[
  {
    "path": ".appveyor_msys_build.sh",
    "content": "export PATH=/c/msys64/mingw$ABI/bin:/c/projects/mpir/bin/:$PATH\ncd /c/projects/mpir\necho && echo build: ./autogen.sh\n./autogen.sh\necho && echo build: ./configure ABI=$ABI $LIB\n./configure ABI=$ABI $LIB\necho && echo build: make\nmake\n# should work but falsely requires texlive ?!?\n#echo && echo build: DISTCHECK_CONFIGURE_FLAGS=\"ABI=$ABI $LIB\" make distcheck\n#DISTCHECK_CONFIGURE_FLAGS=\"ABI=$ABI $LIB\" make distcheck\necho && echo build: make check\nmake check && make -C tests testsuite-all.log\necho && echo build: make dist\nmake dist\n"
  },
  {
    "path": ".gdbinit",
    "content": "# Copyright 1999 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\ndefine pz\nset __gmpz_dump ($)\nend\n\ndefine pq\nset __gmpz_dump ($->_mp_num)\necho /\nset __gmpz_dump ($->_mp_den)\nend\n\ndefine pf\nset __gmpf_dump ($)\nend\n\n"
  },
  {
    "path": ".gitattributes",
    "content": "# Auto detect text files and perform LF normalization\n* text=auto\n\n# Custom for Visual Studio\n*.cs     diff=csharp\n*.sln    merge=union\n*.csproj merge=union\n*.vbproj merge=union\n*.fsproj merge=union\n*.dbproj merge=union\n\n# Standard to msysgit\n*.doc\t diff=astextplain\n*.DOC\t diff=astextplain\n*.docx diff=astextplain\n*.DOCX diff=astextplain\n*.dot  diff=astextplain\n*.DOT  diff=astextplain\n*.pdf  diff=astextplain\n*.PDF\t diff=astextplain\n*.rtf\t diff=astextplain\n*.RTF\t diff=astextplain\n"
  },
  {
    "path": ".gitignore",
    "content": "#################\n## Project files\n#################\n\n/build.vc/output_params.bat\n/build.vc*/test-config.props\n\n\n#################\n## Eclipse\n#################\n\n*.pydevproject\n.project\n.metadata\nbin/\ntmp/\n*.tmp\n*.bak\n*.swp\n*~.nib\nlocal.properties\n.classpath\n.settings/\n.loadpath\n\n# External tool builders\n.externalToolBuilders/\n\n# Locally stored \"Eclipse launch configurations\"\n*.launch\n\n# CDT-specific\n.cproject\n\n# PDT-specific\n.buildpath\n\n\n#################\n## Visual Studio\n#################\n\n## Ignore Visual Studio temporary files, build results, and\n## files generated by popular Visual Studio add-ons.\n##\n## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore\n\n# User-specific files\n*.suo\n*.user\n*.userosscache\n*.sln.docstates\n\n# User-specific files (MonoDevelop/Xamarin Studio)\n*.userprefs\n\n# Build results\n[Dd]ebug/\n[Dd]ebugPublic/\n[Rr]elease/\n[Rr]eleases/\nx64/\nx86/\nbld/\n[Bb]in/\n[Oo]bj/\n[Ll]og/\n[Ww]in32/\n\n# Visual Studio 2015 cache/options directory\n.vs/\n# Uncomment if you have tasks that create the project's static files in wwwroot\n#wwwroot/\n\n# MSTest test Results\n[Tt]est[Rr]esult*/\n[Bb]uild[Ll]og.*\n\n# NUNIT\n*.VisualState.xml\nTestResult.xml\n\n# Build Results of an ATL Project\n[Dd]ebugPS/\n[Rr]eleasePS/\ndlldata.c\n\n# Benchmark Results\nBenchmarkDotNet.Artifacts/\n\n# .NET Core\nproject.lock.json\nproject.fragment.lock.json\nartifacts/\n**/Properties/launchSettings.json\n\n#*_i.c\n#*_p.c\n#*_i.h\n*.ilk\n*.meta\n*.obj\n*.pch\n*.pdb\n*.pgc\n*.pgd\n*.rsp\n*.sbr\n*.tlb\n*.tli\n*.tlh\n*.tmp\n*.tmp_proj\n*.log\n*.vspscc\n*.vssscc\n.builds\n*.pidb\n*.svclog\n*.scc\n\n# Chutzpah Test files\n_Chutzpah*\n\n# Visual C++ cache files\nipch/\n*.aps\n*.ncb\n*.opendb\n*.opensdf\n*.sdf\n*.cachefile\n*.VC.db\n*.VC.VC.opendb\n\n# Visual Studio profiler\n*.psess\n*.vsp\n*.vspx\n*.sap\n\n# TFS 2012 Local Workspace\n$tf/\n\n# Guidance Automation Toolkit\n*.gpState\n\n# ReSharper is a .NET coding add-in\n_ReSharper*/\n*.[Rr]e[Ss]harper\n*.DotSettings.user\n\n# JustCode is a .NET coding add-in\n.JustCode\n\n# TeamCity is a build add-in\n_TeamCity*\n\n# DotCover is a Code Coverage Tool\n*.dotCover\n\n# Visual Studio code coverage results\n*.coverage\n*.coveragexml\n\n# NCrunch\n_NCrunch_*\n.*crunch*.local.xml\nnCrunchTemp_*\n\n# MightyMoose\n*.mm.*\nAutoTest.Net/\n\n# Web workbench (sass)\n.sass-cache/\n\n# Installshield output folder\n[Ee]xpress/\n\n# DocProject is a documentation generator add-in\nDocProject/buildhelp/\nDocProject/Help/*.HxT\nDocProject/Help/*.HxC\nDocProject/Help/*.hhc\nDocProject/Help/*.hhk\nDocProject/Help/*.hhp\nDocProject/Help/Html2\nDocProject/Help/html\n\n# Click-Once directory\npublish/\n\n# Publish Web Output\n*.[Pp]ublish.xml\n*.azurePubxml\n# TODO: Comment the next line if you want to checkin your web deploy settings\n# but database connection strings (with potential passwords) will be unencrypted\n*.pubxml\n*.publishproj\n\n# Microsoft Azure Web App publish settings. Comment the next line if you want to\n# checkin your Azure Web App publish settings, but sensitive information contained\n# in these scripts will be unencrypted\nPublishScripts/\n\n# NuGet Packages\n*.nupkg\n# The packages folder can be ignored because of Package Restore\n**/packages/*\n# except build/, which is used as an MSBuild target.\n!**/packages/build/\n# Uncomment if necessary however generally it will be regenerated when needed\n#!**/packages/repositories.config\n# NuGet v3's project.json files produces more ignorable files\n*.nuget.props\n*.nuget.targets\n\n# Microsoft Azure Build Output\ncsx/\n*.build.csdef\n\n# Microsoft Azure Emulator\necf/\nrcf/\n\n# Windows Store app package directories and files\nAppPackages/\nBundleArtifacts/\nPackage.StoreAssociation.xml\n_pkginfo.txt\n*.appx\n\n# Visual Studio cache files\n# files ending in .cache can be ignored\n*.[Cc]ache\n# but keep track of directories ending in .cache\n!*.[Cc]ache/\n\n# Others\nClientBin/\n~$*\n*~\n*.dbmdl\n*.dbproj.schemaview\n*.jfm\n*.pfx\n*.publishsettings\norleans.codegen.cs\n\n# Since there are multiple workflows, uncomment next line to ignore bower_components\n# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)\n#bower_components/\n\n# RIA/Silverlight projects\nGenerated_Code/\n\n# Backup & report files from converting an old project file\n# to a newer Visual Studio version. Backup files are not needed,\n# because we have git ;-)\n_UpgradeReport_Files/\nBackup*/\nUpgradeLog*.XML\nUpgradeLog*.htm\n\n# SQL Server files\n*.mdf\n*.ldf\n*.ndf\n\n# Business Intelligence projects\n*.rdl.data\n*.bim.layout\n*.bim_*.settings\n\n# Microsoft Fakes\nFakesAssemblies/\n\n# GhostDoc plugin setting file\n*.GhostDoc.xml\n\n# Node.js Tools for Visual Studio\n.ntvs_analysis.dat\nnode_modules/\n\n# Typescript v1 declaration files\ntypings/\n\n# Visual Studio 6 build log\n*.plg\n\n# Visual Studio 6 workspace options file\n*.opt\n\n# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)\n*.vbw\n\n# Visual Studio LightSwitch build output\n**/*.HTMLClient/GeneratedArtifacts\n**/*.DesktopClient/GeneratedArtifacts\n**/*.DesktopClient/ModelManifest.xml\n**/*.Server/GeneratedArtifacts\n**/*.Server/ModelManifest.xml\n_Pvt_Extensions\n\n# Paket dependency manager\n.paket/paket.exe\npaket-files/\n\n# FAKE - F# Make\n.fake/\n\n# JetBrains Rider\n.idea/\n*.sln.iml\n\n# CodeRush\n.cr/\n\n# Python Tools for Visual Studio (PTVS)\n__pycache__/\n*.pyc\n\n# Cake - Uncomment if you are using it\n# tools/**\n# !tools/packages.config\n\n# Tabs Studio\n*.tss\n\n# Telerik's JustMock configuration file\n*.jmconfig\n\n# BizTalk build output\n*.btp.cs\n*.btm.cs\n*.odx.cs\n*.xsd.cs\n\n\n\n############\n## Windows\n############\n\n# Windows image file caches\nThumbs.db\n\n# Folder config file\nDesktop.ini\n\n\n#############\n## Python\n#############\n\n*.py[co]\n\n# Packages\n*.egg\n*.egg-info\ndist\nbuild\neggs\nparts\nbin\nvar\nsdist\ndevelop-eggs\n.installed.cfg\n\n# Installer logs\npip-log.txt\n\n# Unit test / coverage reports\n.coverage\n.tox\n\n#Translations\n*.mo\n\n#Mr Developer\n.mr.developer.cfg\n\n# Mac crap\n.DS_Store\n\n############\n# *nix\n############\n*.lo\n*.o\n*.a\n*.la\n.libs\nmpn/*.c\nmpn/*.as\nmpn/*.asm\nMakefile\nmpir.h\nconfig.h\nconfig.m4\n*.log\n*.in\n*.trs\nconfig.status\nlibtool\nstamp-h1\nautom4te.cache/\nyasm/YASM-VERSION-FILE\nyasm/YASM-VERSION.h\nyasm/autom4te.cache/\nyasm/.deps/\nyasm/results\nyasm/config/config.guess\nyasm/config/config.sub\nyasm/configure.gnu\nyasm/configure.lineno\nyasm/libyasm-stdint.h\nyasm/po/Makefile.in\nyasm/po/POTFILES\nyasm/stamp-h1\nyasm_mac.inc\ngmp.h\nlonglong.h\ngmp-mparam.h\ntune/sqr_basecase.c\nautoscript\ncdata\ntest-driver\ntests/fft/t-adjust\ntests/fft/t-adjust_sqrt2\ntests/fft/t-butterfly\ntests/fft/t-butterfly_lshB\ntests/fft/t-butterfly_rshB\ntests/fft/t-butterfly_sqrt2\ntests/fft/t-butterfly_twiddle\ntests/fft/t-div_2expmod_2expp1\ntests/fft/t-fft_ifft_mfa_trunc_sqrt2\ntests/fft/t-fft_ifft_negacyclic\ntests/fft/t-fft_ifft_radix2\ntests/fft/t-fft_ifft_trunc\ntests/fft/t-fft_ifft_trunc_sqrt2\ntests/fft/t-mul_2expmod_2expp1\ntests/fft/t-mul_fft_main\ntests/fft/t-mul_mfa_trunc_sqrt2\ntests/fft/t-mul_trunc_sqrt2\ntests/fft/t-mulmod_2expp1\ntests/fft/t-normmod_2expp1\ntests/fft/t-split_combine_bits\ntests/mpn/st_fat\ntests/mpn/st_instrument\ntests/mpn/t-addadd_n\ntests/mpn/t-addsub_n\ntests/mpn/t-aors_1\ntests/mpn/t-asmtype\ntests/mpn/t-dc_bdiv_q\ntests/mpn/t-dc_bdiv_q_n\ntests/mpn/t-dc_bdiv_qr\ntests/mpn/t-dc_bdiv_qr_n\ntests/mpn/t-dc_div_q\ntests/mpn/t-dc_div_qr\ntests/mpn/t-dc_div_qr_n\ntests/mpn/t-dc_divappr_q\ntests/mpn/t-divebyff\ntests/mpn/t-divebyfobm1\ntests/mpn/t-divrem_1\ntests/mpn/t-fat\ntests/mpn/t-gcdext\ntests/mpn/t-get_d\ntests/mpn/t-hgcd\ntests/mpn/t-instrument\ntests/mpn/t-inv_div_q\ntests/mpn/t-inv_div_qr\ntests/mpn/t-inv_div_qr_n\ntests/mpn/t-inv_divappr_q\ntests/mpn/t-inv_divappr_q_n\ntests/mpn/t-invert\ntests/mpn/t-iord_u\ntests/mpn/t-lorrshift1\ntests/mpn/t-matrix22\ntests/mpn/t-mp_bases\ntests/mpn/t-mullow_basecase\ntests/mpn/t-mullowhigh\ntests/mpn/t-mulmid\ntests/mpn/t-mulmod_2expm1\ntests/mpn/t-mulmod_2expp1\ntests/mpn/t-neg\ntests/mpn/t-perfsqr\ntests/mpn/t-redc_1\ntests/mpn/t-sb_bdiv_q\ntests/mpn/t-sb_bdiv_qr\ntests/mpn/t-sb_div_q\ntests/mpn/t-sb_div_qr\ntests/mpn/t-sb_divappr_q\ntests/mpn/t-scan\ntests/mpn/t-sizeinbase\ntests/mpn/t-subadd_n\ntests/mpn/t-tdiv_q\ntests/mpn/t-tdiv_qr\ntests/mpn/t-logic\ntests/t-bswap\ntests/t-constants\ntests/t-count_zeros\ntests/t-gmpmax\ntests/t-hightomask\ntests/t-modlinv\ntests/t-parity\ntests/t-popc\ntests/t-sub\ntests/misc/t-locale\ntests/misc/t-printf\ntests/misc/t-scanf\ntests/mpf/reuse\ntests/mpf/t-add\ntests/mpf/t-cmp_d\ntests/mpf/t-cmp_si\ntests/mpf/t-conv\ntests/mpf/t-div\ntests/mpf/t-dm2exp\ntests/mpf/t-eq\ntests/mpf/t-fits\ntests/mpf/t-get_d\ntests/mpf/t-get_d_2exp\ntests/mpf/t-get_si\ntests/mpf/t-get_ui\ntests/mpf/t-gsprec\ntests/mpf/t-inp_str\ntests/mpf/t-int_p\ntests/mpf/t-mul_ui\ntests/mpf/t-muldiv\ntests/mpf/t-set\ntests/mpf/t-set_q\ntests/mpf/t-set_si\ntests/mpf/t-set_ui\ntests/mpf/t-sqrt\ntests/mpf/t-sqrt_ui\ntests/mpf/t-sub\ntests/mpf/t-trunc\ntests/mpf/t-ui_div\ntests/mpq/t-aors\ntests/mpq/t-cmp\ntests/mpq/t-cmp_si\ntests/mpq/t-cmp_ui\ntests/mpq/t-cmp_z\ntests/mpq/t-equal\ntests/mpq/t-get_d\ntests/mpq/t-get_str\ntests/mpq/t-inp_str\ntests/mpq/t-md_2exp\ntests/mpq/t-set_f\ntests/mpq/t-set_str\ntests/mpz/bit\ntests/mpz/convert\ntests/mpz/dive\ntests/mpz/dive_ui\ntests/mpz/io\ntests/mpz/logic\ntests/mpz/reuse\ntests/mpz/st_hamdist\ntests/mpz/st_popcount\ntests/mpz/t-addsub\ntests/mpz/t-aorsmul\ntests/mpz/t-bin\ntests/mpz/t-cdiv_ui\ntests/mpz/t-cmp\ntests/mpz/t-cmp_d\ntests/mpz/t-cmp_si\ntests/mpz/t-cong\ntests/mpz/t-cong_2exp\ntests/mpz/t-div_2exp\ntests/mpz/t-divis\ntests/mpz/t-divis_2exp\ntests/mpz/t-export\ntests/mpz/t-fac_ui\ntests/mpz/t-fdiv\ntests/mpz/t-fdiv_ui\ntests/mpz/t-fib_ui\ntests/mpz/t-fits\ntests/mpz/t-gcd\ntests/mpz/t-gcd_ui\ntests/mpz/t-get_d\ntests/mpz/t-get_d_2exp\ntests/mpz/t-get_si\ntests/mpz/t-get_sx\ntests/mpz/t-get_ux\ntests/mpz/t-hamdist\ntests/mpz/t-import\ntests/mpz/t-inp_str\ntests/mpz/t-io_raw\ntests/mpz/t-jac\ntests/mpz/t-lcm\ntests/mpz/t-likely_prime_p\ntests/mpz/t-limbs\ntests/mpz/t-lucnum_ui\ntests/mpz/t-mfac_uiui\ntests/mpz/t-mul\ntests/mpz/t-mul_i\ntests/mpz/t-next_prime_candidate\ntests/mpz/t-oddeven\ntests/mpz/t-perfpow\ntests/mpz/t-perfsqr\ntests/mpz/t-popcount\ntests/mpz/t-pow\ntests/mpz/t-powm\ntests/mpz/t-powm_ui\ntests/mpz/t-pprime_p\ntests/mpz/t-primorial_ui\ntests/mpz/t-root\ntests/mpz/t-scan\ntests/mpz/t-set_d\ntests/mpz/t-set_f\ntests/mpz/t-set_si\ntests/mpz/t-set_str\ntests/mpz/t-set_sx\ntests/mpz/t-set_ux\ntests/mpz/t-sizeinbase\ntests/mpz/t-sqrtrem\ntests/mpz/t-tdiv\ntests/mpz/t-tdiv_ui\ntests/mpz/t-trial_division\ntests/rand/t-iset\ntests/rand/t-lc2exp\ntests/rand/t-mt\ntests/rand/t-rand\ntests/rand/t-urbui\ntests/rand/t-urmui\ntests/rand/t-urndmm\naclocal.m4\ncompile\nconfigure\ndoc/mpir.info\ndoc/mpir.info-1\ndoc/mpir.info-2\ninstall-sh\nltmain.sh\nmissing\n"
  },
  {
    "path": ".travis.yml",
    "content": "language: c\nsudo: false\nos:\n  - osx\n  - linux\ncompiler:\n  - gcc\n  - clang\naddons:\n  apt:\n    packages:\n      - yasm\n      - texinfo\nscript:\n  - if [[ \"${TRAVIS_OS_NAME}\" == \"osx\" ]]; then\n       brew update;\n       brew install yasm;\n    fi\n  # On OSX, gcc is a wrapper around clang, so use gcc from homebrew\n  - if [[ \"${TRAVIS_OS_NAME}\" == \"osx\" ]] && [[ \"${CC}\" == \"gcc\" ]]; then\n       brew install gcc;\n       brew link --overwrite gcc;\n       export CC=gcc;\n       export CXX=g++;\n    fi\n  - $CC --version\n  - ./autogen.sh\n  - ./configure && make && make check\n"
  },
  {
    "path": "AUTHORS",
    "content": "Authors of GNU MP (in chronological order of initial contribution)\n\nTorbjörn Granlund\tMain author\n\nJohn Amanatides\t\tOriginal version of mpz/pprime_p.c\nPaul Zimmermann\t\tmpn/generic/mul_fft.c, mpn/generic/dc_divrem_n.c,\n\t\t\tnew mpz/powm.c, improved Toom3 code.\nKen Weber\t\tmpn/generic/gcd.c mpn/generic/bdivmod.c\nBennet Yee\t\tmpz/jacobi.c mpz/legendre.c\nAndreas Schwab\t\tmpn/m68k/lshift.S, mpn/m68k/rshift.S\nRobert Harley\t\tmpn/generic/mul_n.c, files in mpn/arm\nLinus Nordberg\t\tRandom number framework, original autoconfery\nKent Boortz\t\tMacOS 9 port\nKevin Ryde\t\tMost x86 assembly, new autoconfery, and countless other\n\t\t\tthings (please see the GMP manual for complete list)\nGerardo Ballabio\tgmpxx.h and C++ istream input\nPedro Gimeno\t\tMersenne Twister random generator, other random number\n\t\t\trevisions\nJason Moxham\t\tNew mpz/fac_ui.c and gen-fac_ui.c\n\nAuthors of code in MPIR (including code from recent GMP)\n\nBrian Gladman        Windows MSVC port and intel format x86_64 code\n                     Port of tune, speed, try and benchmark code to MSVC. \n                     Complete rewrite of benchmark code in C.\nWilliam Hart         Build system modifications and intel format x86_64 code\n                     Added mpn version of Toom 4 and Toom 7 multiplication \n                     routines. Numerous FFT bugfixes, improvements to Toom3, \n                     4, 7 routines, toom squaring routines. Extended GCD code, \n                     improvements to GCD code. Fast mpn_tdiv_q code.\nGonzalo Tornaria     Patches to config.guess and mpirbench and some tuning work.\nMariah Lennox        Patches to mpirbench and build failure reports\nJason Worth Martin   Core 2 assembly support,  merge of Moller's GCD patches,\n                     addmul code for Itanium\nMichael Abshoff      Build system patches and failure reports, valgrinding,\n                     build testing\nNiels Moller         GCD patches, nhgcd2.c, contributions to the schoolbook\n                     and divide and conquer division code, jacobi symbols\nJason Moxham\t     Dramatic speed improvements for K8, K10 and Core 2 \n                     assembler code and improvements to other x86_64 assembly \n                     code. Refactoring CPU detection code. Numerous bug and \n                     build fixes. Improvements to speed program. Many new \n                     assembly functions including division functions. Improved\n                     root code, mulhi and mullo. Implementation of Peter \n                     Montgomery's single limb remainder code.\nPierrick Gaudry      AMD 64 assembly support, revised FFT code\nPaul Zimmermann      mpz implementation of Toom 4, FFT code, invert.c, rootrem\nAlexander Kruppa     revised FFT code\nTorbjorn Granlund    revised FFT code, schoolbook and divide and conquer\n                     division code (reused in asymptotically fast division\n                     code), logops, rootrem, tdiv_q, tdiv_qr, linear division\n                     code for x86_64, itanium assembly code, divexact, \n                     nextprime, n-choose-k, binvert, mullow_n_basecase, powlo,\n                     redc_n, powm, powm_ui improvements \nMarco Bodrato        mpz implementation of Toom 7, mpn implementation of \n                     toom 8.5, contributed to Schoolbook division code\n                     many improved number theoretical functions factorial,\n                     multi-factorial, primorial, n-choose-k\nRobert Gerbicz       fast factorial code\nMarc Glisse          gmpxx.h improvements\nMartin Boij          Miscellaneous improvements to nextprime code\nDavid Harvey         middle product and dc_divappr_q code, divide and conquer\n                     Hensel division code and some work on basecase\nT. R. Nicely         primality tests used in benchmark\nJeff Gilchrist       porting of T. R. Nicely's code to MPIR, testing/tuning\nPeter Shrimpton\t     BPSW primality test code for integers up to GMP_LIMB_BITS\nMinh Van Nguyen      Release manager for MPIR 2.1.0\nCase Vanhorsen\t     Release testing\nDavid Cleaver        Bug report\nJP Flori             Many build system patches, tuning values, port of GMP\n                     mpz_powm and mpz_powm_ui to MPIR\nSisyphus (Rob)       Tuning values\nLeif Lionhardy       Build system patches, tuning values\nJulien Puydt         Tuning values\nVincent Delecroix    Port of mpq_cmp_z from gmp\nsav-ix (Alexander)   Patch for t-locale on Windows\nIsuru Fernando       Tuning values, numerous build patches and continuous\n                     integration\nAlex Dyachenko       mpir.net for interface to .net languages\nTommy Hoffman        Supplied a sed patch.\nAverkhaturau         Fixed a C++ compilation problem.\nMarcell Keller       Fixed a sign conversion bug.\nSergey Taymanov      Fixed some Windows build file issues.\njengelh              Reported a bug and did build test\nDima Pasechnik       Build system patches, bulk changes to x86_64 assembler code\n                     to accommodate OSX linker quirks.\n"
  },
  {
    "path": "COPYING",
    "content": "                    GNU GENERAL PUBLIC LICENSE\n                       Version 3, 29 June 2007\n\n Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n                            Preamble\n\n  The GNU General Public License is a free, copyleft license for\nsoftware and other kinds of works.\n\n  The licenses for most software and other practical works are designed\nto take away your freedom to share and change the works.  By contrast,\nthe GNU General Public License is intended to guarantee your freedom to\nshare and change all versions of a program--to make sure it remains free\nsoftware for all its users.  We, the Free Software Foundation, use the\nGNU General Public License for most of our software; it applies also to\nany other work released this way by its authors.  You can apply it to\nyour programs, too.\n\n  When we speak of free software, we are referring to freedom, not\nprice.  Our General Public Licenses are designed to make sure that you\nhave the freedom to distribute copies of free software (and charge for\nthem if you wish), that you receive source code or can get it if you\nwant it, that you can change the software or use pieces of it in new\nfree programs, and that you know you can do these things.\n\n  To protect your rights, we need to prevent others from denying you\nthese rights or asking you to surrender the rights.  Therefore, you have\ncertain responsibilities if you distribute copies of the software, or if\nyou modify it: responsibilities to respect the freedom of others.\n\n  For example, if you distribute copies of such a program, whether\ngratis or for a fee, you must pass on to the recipients the same\nfreedoms that you received.  You must make sure that they, too, receive\nor can get the source code.  And you must show them these terms so they\nknow their rights.\n\n  Developers that use the GNU GPL protect your rights with two steps:\n(1) assert copyright on the software, and (2) offer you this License\ngiving you legal permission to copy, distribute and/or modify it.\n\n  For the developers' and authors' protection, the GPL clearly explains\nthat there is no warranty for this free software.  For both users' and\nauthors' sake, the GPL requires that modified versions be marked as\nchanged, so that their problems will not be attributed erroneously to\nauthors of previous versions.\n\n  Some devices are designed to deny users access to install or run\nmodified versions of the software inside them, although the manufacturer\ncan do so.  This is fundamentally incompatible with the aim of\nprotecting users' freedom to change the software.  The systematic\npattern of such abuse occurs in the area of products for individuals to\nuse, which is precisely where it is most unacceptable.  Therefore, we\nhave designed this version of the GPL to prohibit the practice for those\nproducts.  If such problems arise substantially in other domains, we\nstand ready to extend this provision to those domains in future versions\nof the GPL, as needed to protect the freedom of users.\n\n  Finally, every program is threatened constantly by software patents.\nStates should not allow patents to restrict development and use of\nsoftware on general-purpose computers, but in those that do, we wish to\navoid the special danger that patents applied to a free program could\nmake it effectively proprietary.  To prevent this, the GPL assures that\npatents cannot be used to render the program non-free.\n\n  The precise terms and conditions for copying, distribution and\nmodification follow.\n\n                       TERMS AND CONDITIONS\n\n  0. Definitions.\n\n  \"This License\" refers to version 3 of the GNU General Public License.\n\n  \"Copyright\" also means copyright-like laws that apply to other kinds of\nworks, such as semiconductor masks.\n\n  \"The Program\" refers to any copyrightable work licensed under this\nLicense.  Each licensee is addressed as \"you\".  \"Licensees\" and\n\"recipients\" may be individuals or organizations.\n\n  To \"modify\" a work means to copy from or adapt all or part of the work\nin a fashion requiring copyright permission, other than the making of an\nexact copy.  The resulting work is called a \"modified version\" of the\nearlier work or a work \"based on\" the earlier work.\n\n  A \"covered work\" means either the unmodified Program or a work based\non the Program.\n\n  To \"propagate\" a work means to do anything with it that, without\npermission, would make you directly or secondarily liable for\ninfringement under applicable copyright law, except executing it on a\ncomputer or modifying a private copy.  Propagation includes copying,\ndistribution (with or without modification), making available to the\npublic, and in some countries other activities as well.\n\n  To \"convey\" a work means any kind of propagation that enables other\nparties to make or receive copies.  Mere interaction with a user through\na computer network, with no transfer of a copy, is not conveying.\n\n  An interactive user interface displays \"Appropriate Legal Notices\"\nto the extent that it includes a convenient and prominently visible\nfeature that (1) displays an appropriate copyright notice, and (2)\ntells the user that there is no warranty for the work (except to the\nextent that warranties are provided), that licensees may convey the\nwork under this License, and how to view a copy of this License.  If\nthe interface presents a list of user commands or options, such as a\nmenu, a prominent item in the list meets this criterion.\n\n  1. Source Code.\n\n  The \"source code\" for a work means the preferred form of the work\nfor making modifications to it.  \"Object code\" means any non-source\nform of a work.\n\n  A \"Standard Interface\" means an interface that either is an official\nstandard defined by a recognized standards body, or, in the case of\ninterfaces specified for a particular programming language, one that\nis widely used among developers working in that language.\n\n  The \"System Libraries\" of an executable work include anything, other\nthan the work as a whole, that (a) is included in the normal form of\npackaging a Major Component, but which is not part of that Major\nComponent, and (b) serves only to enable use of the work with that\nMajor Component, or to implement a Standard Interface for which an\nimplementation is available to the public in source code form.  A\n\"Major Component\", in this context, means a major essential component\n(kernel, window system, and so on) of the specific operating system\n(if any) on which the executable work runs, or a compiler used to\nproduce the work, or an object code interpreter used to run it.\n\n  The \"Corresponding Source\" for a work in object code form means all\nthe source code needed to generate, install, and (for an executable\nwork) run the object code and to modify the work, including scripts to\ncontrol those activities.  However, it does not include the work's\nSystem Libraries, or general-purpose tools or generally available free\nprograms which are used unmodified in performing those activities but\nwhich are not part of the work.  For example, Corresponding Source\nincludes interface definition files associated with source files for\nthe work, and the source code for shared libraries and dynamically\nlinked subprograms that the work is specifically designed to require,\nsuch as by intimate data communication or control flow between those\nsubprograms and other parts of the work.\n\n  The Corresponding Source need not include anything that users\ncan regenerate automatically from other parts of the Corresponding\nSource.\n\n  The Corresponding Source for a work in source code form is that\nsame work.\n\n  2. Basic Permissions.\n\n  All rights granted under this License are granted for the term of\ncopyright on the Program, and are irrevocable provided the stated\nconditions are met.  This License explicitly affirms your unlimited\npermission to run the unmodified Program.  The output from running a\ncovered work is covered by this License only if the output, given its\ncontent, constitutes a covered work.  This License acknowledges your\nrights of fair use or other equivalent, as provided by copyright law.\n\n  You may make, run and propagate covered works that you do not\nconvey, without conditions so long as your license otherwise remains\nin force.  You may convey covered works to others for the sole purpose\nof having them make modifications exclusively for you, or provide you\nwith facilities for running those works, provided that you comply with\nthe terms of this License in conveying all material for which you do\nnot control copyright.  Those thus making or running the covered works\nfor you must do so exclusively on your behalf, under your direction\nand control, on terms that prohibit them from making any copies of\nyour copyrighted material outside their relationship with you.\n\n  Conveying under any other circumstances is permitted solely under\nthe conditions stated below.  Sublicensing is not allowed; section 10\nmakes it unnecessary.\n\n  3. Protecting Users' Legal Rights From Anti-Circumvention Law.\n\n  No covered work shall be deemed part of an effective technological\nmeasure under any applicable law fulfilling obligations under article\n11 of the WIPO copyright treaty adopted on 20 December 1996, or\nsimilar laws prohibiting or restricting circumvention of such\nmeasures.\n\n  When you convey a covered work, you waive any legal power to forbid\ncircumvention of technological measures to the extent such circumvention\nis effected by exercising rights under this License with respect to\nthe covered work, and you disclaim any intention to limit operation or\nmodification of the work as a means of enforcing, against the work's\nusers, your or third parties' legal rights to forbid circumvention of\ntechnological measures.\n\n  4. Conveying Verbatim Copies.\n\n  You may convey verbatim copies of the Program's source code as you\nreceive it, in any medium, provided that you conspicuously and\nappropriately publish on each copy an appropriate copyright notice;\nkeep intact all notices stating that this License and any\nnon-permissive terms added in accord with section 7 apply to the code;\nkeep intact all notices of the absence of any warranty; and give all\nrecipients a copy of this License along with the Program.\n\n  You may charge any price or no price for each copy that you convey,\nand you may offer support or warranty protection for a fee.\n\n  5. Conveying Modified Source Versions.\n\n  You may convey a work based on the Program, or the modifications to\nproduce it from the Program, in the form of source code under the\nterms of section 4, provided that you also meet all of these conditions:\n\n    a) The work must carry prominent notices stating that you modified\n    it, and giving a relevant date.\n\n    b) The work must carry prominent notices stating that it is\n    released under this License and any conditions added under section\n    7.  This requirement modifies the requirement in section 4 to\n    \"keep intact all notices\".\n\n    c) You must license the entire work, as a whole, under this\n    License to anyone who comes into possession of a copy.  This\n    License will therefore apply, along with any applicable section 7\n    additional terms, to the whole of the work, and all its parts,\n    regardless of how they are packaged.  This License gives no\n    permission to license the work in any other way, but it does not\n    invalidate such permission if you have separately received it.\n\n    d) If the work has interactive user interfaces, each must display\n    Appropriate Legal Notices; however, if the Program has interactive\n    interfaces that do not display Appropriate Legal Notices, your\n    work need not make them do so.\n\n  A compilation of a covered work with other separate and independent\nworks, which are not by their nature extensions of the covered work,\nand which are not combined with it such as to form a larger program,\nin or on a volume of a storage or distribution medium, is called an\n\"aggregate\" if the compilation and its resulting copyright are not\nused to limit the access or legal rights of the compilation's users\nbeyond what the individual works permit.  Inclusion of a covered work\nin an aggregate does not cause this License to apply to the other\nparts of the aggregate.\n\n  6. Conveying Non-Source Forms.\n\n  You may convey a covered work in object code form under the terms\nof sections 4 and 5, provided that you also convey the\nmachine-readable Corresponding Source under the terms of this License,\nin one of these ways:\n\n    a) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by the\n    Corresponding Source fixed on a durable physical medium\n    customarily used for software interchange.\n\n    b) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by a\n    written offer, valid for at least three years and valid for as\n    long as you offer spare parts or customer support for that product\n    model, to give anyone who possesses the object code either (1) a\n    copy of the Corresponding Source for all the software in the\n    product that is covered by this License, on a durable physical\n    medium customarily used for software interchange, for a price no\n    more than your reasonable cost of physically performing this\n    conveying of source, or (2) access to copy the\n    Corresponding Source from a network server at no charge.\n\n    c) Convey individual copies of the object code with a copy of the\n    written offer to provide the Corresponding Source.  This\n    alternative is allowed only occasionally and noncommercially, and\n    only if you received the object code with such an offer, in accord\n    with subsection 6b.\n\n    d) Convey the object code by offering access from a designated\n    place (gratis or for a charge), and offer equivalent access to the\n    Corresponding Source in the same way through the same place at no\n    further charge.  You need not require recipients to copy the\n    Corresponding Source along with the object code.  If the place to\n    copy the object code is a network server, the Corresponding Source\n    may be on a different server (operated by you or a third party)\n    that supports equivalent copying facilities, provided you maintain\n    clear directions next to the object code saying where to find the\n    Corresponding Source.  Regardless of what server hosts the\n    Corresponding Source, you remain obligated to ensure that it is\n    available for as long as needed to satisfy these requirements.\n\n    e) Convey the object code using peer-to-peer transmission, provided\n    you inform other peers where the object code and Corresponding\n    Source of the work are being offered to the general public at no\n    charge under subsection 6d.\n\n  A separable portion of the object code, whose source code is excluded\nfrom the Corresponding Source as a System Library, need not be\nincluded in conveying the object code work.\n\n  A \"User Product\" is either (1) a \"consumer product\", which means any\ntangible personal property which is normally used for personal, family,\nor household purposes, or (2) anything designed or sold for incorporation\ninto a dwelling.  In determining whether a product is a consumer product,\ndoubtful cases shall be resolved in favor of coverage.  For a particular\nproduct received by a particular user, \"normally used\" refers to a\ntypical or common use of that class of product, regardless of the status\nof the particular user or of the way in which the particular user\nactually uses, or expects or is expected to use, the product.  A product\nis a consumer product regardless of whether the product has substantial\ncommercial, industrial or non-consumer uses, unless such uses represent\nthe only significant mode of use of the product.\n\n  \"Installation Information\" for a User Product means any methods,\nprocedures, authorization keys, or other information required to install\nand execute modified versions of a covered work in that User Product from\na modified version of its Corresponding Source.  The information must\nsuffice to ensure that the continued functioning of the modified object\ncode is in no case prevented or interfered with solely because\nmodification has been made.\n\n  If you convey an object code work under this section in, or with, or\nspecifically for use in, a User Product, and the conveying occurs as\npart of a transaction in which the right of possession and use of the\nUser Product is transferred to the recipient in perpetuity or for a\nfixed term (regardless of how the transaction is characterized), the\nCorresponding Source conveyed under this section must be accompanied\nby the Installation Information.  But this requirement does not apply\nif neither you nor any third party retains the ability to install\nmodified object code on the User Product (for example, the work has\nbeen installed in ROM).\n\n  The requirement to provide Installation Information does not include a\nrequirement to continue to provide support service, warranty, or updates\nfor a work that has been modified or installed by the recipient, or for\nthe User Product in which it has been modified or installed.  Access to a\nnetwork may be denied when the modification itself materially and\nadversely affects the operation of the network or violates the rules and\nprotocols for communication across the network.\n\n  Corresponding Source conveyed, and Installation Information provided,\nin accord with this section must be in a format that is publicly\ndocumented (and with an implementation available to the public in\nsource code form), and must require no special password or key for\nunpacking, reading or copying.\n\n  7. Additional Terms.\n\n  \"Additional permissions\" are terms that supplement the terms of this\nLicense by making exceptions from one or more of its conditions.\nAdditional permissions that are applicable to the entire Program shall\nbe treated as though they were included in this License, to the extent\nthat they are valid under applicable law.  If additional permissions\napply only to part of the Program, that part may be used separately\nunder those permissions, but the entire Program remains governed by\nthis License without regard to the additional permissions.\n\n  When you convey a copy of a covered work, you may at your option\nremove any additional permissions from that copy, or from any part of\nit.  (Additional permissions may be written to require their own\nremoval in certain cases when you modify the work.)  You may place\nadditional permissions on material, added by you to a covered work,\nfor which you have or can give appropriate copyright permission.\n\n  Notwithstanding any other provision of this License, for material you\nadd to a covered work, you may (if authorized by the copyright holders of\nthat material) supplement the terms of this License with terms:\n\n    a) Disclaiming warranty or limiting liability differently from the\n    terms of sections 15 and 16 of this License; or\n\n    b) Requiring preservation of specified reasonable legal notices or\n    author attributions in that material or in the Appropriate Legal\n    Notices displayed by works containing it; or\n\n    c) Prohibiting misrepresentation of the origin of that material, or\n    requiring that modified versions of such material be marked in\n    reasonable ways as different from the original version; or\n\n    d) Limiting the use for publicity purposes of names of licensors or\n    authors of the material; or\n\n    e) Declining to grant rights under trademark law for use of some\n    trade names, trademarks, or service marks; or\n\n    f) Requiring indemnification of licensors and authors of that\n    material by anyone who conveys the material (or modified versions of\n    it) with contractual assumptions of liability to the recipient, for\n    any liability that these contractual assumptions directly impose on\n    those licensors and authors.\n\n  All other non-permissive additional terms are considered \"further\nrestrictions\" within the meaning of section 10.  If the Program as you\nreceived it, or any part of it, contains a notice stating that it is\ngoverned by this License along with a term that is a further\nrestriction, you may remove that term.  If a license document contains\na further restriction but permits relicensing or conveying under this\nLicense, you may add to a covered work material governed by the terms\nof that license document, provided that the further restriction does\nnot survive such relicensing or conveying.\n\n  If you add terms to a covered work in accord with this section, you\nmust place, in the relevant source files, a statement of the\nadditional terms that apply to those files, or a notice indicating\nwhere to find the applicable terms.\n\n  Additional terms, permissive or non-permissive, may be stated in the\nform of a separately written license, or stated as exceptions;\nthe above requirements apply either way.\n\n  8. Termination.\n\n  You may not propagate or modify a covered work except as expressly\nprovided under this License.  Any attempt otherwise to propagate or\nmodify it is void, and will automatically terminate your rights under\nthis License (including any patent licenses granted under the third\nparagraph of section 11).\n\n  However, if you cease all violation of this License, then your\nlicense from a particular copyright holder is reinstated (a)\nprovisionally, unless and until the copyright holder explicitly and\nfinally terminates your license, and (b) permanently, if the copyright\nholder fails to notify you of the violation by some reasonable means\nprior to 60 days after the cessation.\n\n  Moreover, your license from a particular copyright holder is\nreinstated permanently if the copyright holder notifies you of the\nviolation by some reasonable means, this is the first time you have\nreceived notice of violation of this License (for any work) from that\ncopyright holder, and you cure the violation prior to 30 days after\nyour receipt of the notice.\n\n  Termination of your rights under this section does not terminate the\nlicenses of parties who have received copies or rights from you under\nthis License.  If your rights have been terminated and not permanently\nreinstated, you do not qualify to receive new licenses for the same\nmaterial under section 10.\n\n  9. Acceptance Not Required for Having Copies.\n\n  You are not required to accept this License in order to receive or\nrun a copy of the Program.  Ancillary propagation of a covered work\noccurring solely as a consequence of using peer-to-peer transmission\nto receive a copy likewise does not require acceptance.  However,\nnothing other than this License grants you permission to propagate or\nmodify any covered work.  These actions infringe copyright if you do\nnot accept this License.  Therefore, by modifying or propagating a\ncovered work, you indicate your acceptance of this License to do so.\n\n  10. Automatic Licensing of Downstream Recipients.\n\n  Each time you convey a covered work, the recipient automatically\nreceives a license from the original licensors, to run, modify and\npropagate that work, subject to this License.  You are not responsible\nfor enforcing compliance by third parties with this License.\n\n  An \"entity transaction\" is a transaction transferring control of an\norganization, or substantially all assets of one, or subdividing an\norganization, or merging organizations.  If propagation of a covered\nwork results from an entity transaction, each party to that\ntransaction who receives a copy of the work also receives whatever\nlicenses to the work the party's predecessor in interest had or could\ngive under the previous paragraph, plus a right to possession of the\nCorresponding Source of the work from the predecessor in interest, if\nthe predecessor has it or can get it with reasonable efforts.\n\n  You may not impose any further restrictions on the exercise of the\nrights granted or affirmed under this License.  For example, you may\nnot impose a license fee, royalty, or other charge for exercise of\nrights granted under this License, and you may not initiate litigation\n(including a cross-claim or counterclaim in a lawsuit) alleging that\nany patent claim is infringed by making, using, selling, offering for\nsale, or importing the Program or any portion of it.\n\n  11. Patents.\n\n  A \"contributor\" is a copyright holder who authorizes use under this\nLicense of the Program or a work on which the Program is based.  The\nwork thus licensed is called the contributor's \"contributor version\".\n\n  A contributor's \"essential patent claims\" are all patent claims\nowned or controlled by the contributor, whether already acquired or\nhereafter acquired, that would be infringed by some manner, permitted\nby this License, of making, using, or selling its contributor version,\nbut do not include claims that would be infringed only as a\nconsequence of further modification of the contributor version.  For\npurposes of this definition, \"control\" includes the right to grant\npatent sublicenses in a manner consistent with the requirements of\nthis License.\n\n  Each contributor grants you a non-exclusive, worldwide, royalty-free\npatent license under the contributor's essential patent claims, to\nmake, use, sell, offer for sale, import and otherwise run, modify and\npropagate the contents of its contributor version.\n\n  In the following three paragraphs, a \"patent license\" is any express\nagreement or commitment, however denominated, not to enforce a patent\n(such as an express permission to practice a patent or covenant not to\nsue for patent infringement).  To \"grant\" such a patent license to a\nparty means to make such an agreement or commitment not to enforce a\npatent against the party.\n\n  If you convey a covered work, knowingly relying on a patent license,\nand the Corresponding Source of the work is not available for anyone\nto copy, free of charge and under the terms of this License, through a\npublicly available network server or other readily accessible means,\nthen you must either (1) cause the Corresponding Source to be so\navailable, or (2) arrange to deprive yourself of the benefit of the\npatent license for this particular work, or (3) arrange, in a manner\nconsistent with the requirements of this License, to extend the patent\nlicense to downstream recipients.  \"Knowingly relying\" means you have\nactual knowledge that, but for the patent license, your conveying the\ncovered work in a country, or your recipient's use of the covered work\nin a country, would infringe one or more identifiable patents in that\ncountry that you have reason to believe are valid.\n\n  If, pursuant to or in connection with a single transaction or\narrangement, you convey, or propagate by procuring conveyance of, a\ncovered work, and grant a patent license to some of the parties\nreceiving the covered work authorizing them to use, propagate, modify\nor convey a specific copy of the covered work, then the patent license\nyou grant is automatically extended to all recipients of the covered\nwork and works based on it.\n\n  A patent license is \"discriminatory\" if it does not include within\nthe scope of its coverage, prohibits the exercise of, or is\nconditioned on the non-exercise of one or more of the rights that are\nspecifically granted under this License.  You may not convey a covered\nwork if you are a party to an arrangement with a third party that is\nin the business of distributing software, under which you make payment\nto the third party based on the extent of your activity of conveying\nthe work, and under which the third party grants, to any of the\nparties who would receive the covered work from you, a discriminatory\npatent license (a) in connection with copies of the covered work\nconveyed by you (or copies made from those copies), or (b) primarily\nfor and in connection with specific products or compilations that\ncontain the covered work, unless you entered into that arrangement,\nor that patent license was granted, prior to 28 March 2007.\n\n  Nothing in this License shall be construed as excluding or limiting\nany implied license or other defenses to infringement that may\notherwise be available to you under applicable patent law.\n\n  12. No Surrender of Others' Freedom.\n\n  If conditions are imposed on you (whether by court order, agreement or\notherwise) that contradict the conditions of this License, they do not\nexcuse you from the conditions of this License.  If you cannot convey a\ncovered work so as to satisfy simultaneously your obligations under this\nLicense and any other pertinent obligations, then as a consequence you may\nnot convey it at all.  For example, if you agree to terms that obligate you\nto collect a royalty for further conveying from those to whom you convey\nthe Program, the only way you could satisfy both those terms and this\nLicense would be to refrain entirely from conveying the Program.\n\n  13. Use with the GNU Affero General Public License.\n\n  Notwithstanding any other provision of this License, you have\npermission to link or combine any covered work with a work licensed\nunder version 3 of the GNU Affero General Public License into a single\ncombined work, and to convey the resulting work.  The terms of this\nLicense will continue to apply to the part which is the covered work,\nbut the special requirements of the GNU Affero General Public License,\nsection 13, concerning interaction through a network will apply to the\ncombination as such.\n\n  14. Revised Versions of this License.\n\n  The Free Software Foundation may publish revised and/or new versions of\nthe GNU General Public License from time to time.  Such new versions will\nbe similar in spirit to the present version, but may differ in detail to\naddress new problems or concerns.\n\n  Each version is given a distinguishing version number.  If the\nProgram specifies that a certain numbered version of the GNU General\nPublic License \"or any later version\" applies to it, you have the\noption of following the terms and conditions either of that numbered\nversion or of any later version published by the Free Software\nFoundation.  If the Program does not specify a version number of the\nGNU General Public License, you may choose any version ever published\nby the Free Software Foundation.\n\n  If the Program specifies that a proxy can decide which future\nversions of the GNU General Public License can be used, that proxy's\npublic statement of acceptance of a version permanently authorizes you\nto choose that version for the Program.\n\n  Later license versions may give you additional or different\npermissions.  However, no additional obligations are imposed on any\nauthor or copyright holder as a result of your choosing to follow a\nlater version.\n\n  15. Disclaimer of Warranty.\n\n  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY\nAPPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT\nHOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY\nOF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,\nTHE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\nPURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM\nIS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF\nALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n\n  16. Limitation of Liability.\n\n  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\nWILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS\nTHE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\nGENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE\nUSE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF\nDATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD\nPARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),\nEVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF\nSUCH DAMAGES.\n\n  17. Interpretation of Sections 15 and 16.\n\n  If the disclaimer of warranty and limitation of liability provided\nabove cannot be given local legal effect according to their terms,\nreviewing courts shall apply local law that most closely approximates\nan absolute waiver of all civil liability in connection with the\nProgram, unless a warranty or assumption of liability accompanies a\ncopy of the Program in return for a fee.\n\n                     END OF TERMS AND CONDITIONS\n\n            How to Apply These Terms to Your New Programs\n\n  If you develop a new program, and you want it to be of the greatest\npossible use to the public, the best way to achieve this is to make it\nfree software which everyone can redistribute and change under these terms.\n\n  To do so, attach the following notices to the program.  It is safest\nto attach them to the start of each source file to most effectively\nstate the exclusion of warranty; and each file should have at least\nthe \"copyright\" line and a pointer to where the full notice is found.\n\n    <one line to give the program's name and a brief idea of what it does.>\n    Copyright (C) <year>  <name of author>\n\n    This program is free software: you can redistribute it and/or modify\n    it under the terms of the GNU General Public License as published by\n    the Free Software Foundation, either version 3 of the License, or\n    (at your option) any later version.\n\n    This program is distributed in the hope that it will be useful,\n    but WITHOUT ANY WARRANTY; without even the implied warranty of\n    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n    GNU General Public License for more details.\n\n    You should have received a copy of the GNU General Public License\n    along with this program.  If not, see <http://www.gnu.org/licenses/>.\n\nAlso add information on how to contact you by electronic and paper mail.\n\n  If the program does terminal interaction, make it output a short\nnotice like this when it starts in an interactive mode:\n\n    <program>  Copyright (C) <year>  <name of author>\n    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.\n    This is free software, and you are welcome to redistribute it\n    under certain conditions; type `show c' for details.\n\nThe hypothetical commands `show w' and `show c' should show the appropriate\nparts of the General Public License.  Of course, your program's commands\nmight be different; for a GUI interface, you would use an \"about box\".\n\n  You should also get your employer (if you work as a programmer) or school,\nif any, to sign a \"copyright disclaimer\" for the program, if necessary.\nFor more information on this, and how to apply and follow the GNU GPL, see\n<http://www.gnu.org/licenses/>.\n\n  The GNU General Public License does not permit incorporating your program\ninto proprietary programs.  If your program is a subroutine library, you\nmay consider it more useful to permit linking proprietary applications with\nthe library.  If this is what you want to do, use the GNU Lesser General\nPublic License instead of this License.  But first, please read\n<http://www.gnu.org/philosophy/why-not-lgpl.html>.\n"
  },
  {
    "path": "COPYING.LIB",
    "content": "\t\t   GNU LESSER GENERAL PUBLIC LICENSE\n                       Version 3, 29 June 2007\n\n Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n\n  This version of the GNU Lesser General Public License incorporates\nthe terms and conditions of version 3 of the GNU General Public\nLicense, supplemented by the additional permissions listed below.\n\n  0. Additional Definitions.\n\n  As used herein, \"this License\" refers to version 3 of the GNU Lesser\nGeneral Public License, and the \"GNU GPL\" refers to version 3 of the GNU\nGeneral Public License.\n\n  \"The Library\" refers to a covered work governed by this License,\nother than an Application or a Combined Work as defined below.\n\n  An \"Application\" is any work that makes use of an interface provided\nby the Library, but which is not otherwise based on the Library.\nDefining a subclass of a class defined by the Library is deemed a mode\nof using an interface provided by the Library.\n\n  A \"Combined Work\" is a work produced by combining or linking an\nApplication with the Library.  The particular version of the Library\nwith which the Combined Work was made is also called the \"Linked\nVersion\".\n\n  The \"Minimal Corresponding Source\" for a Combined Work means the\nCorresponding Source for the Combined Work, excluding any source code\nfor portions of the Combined Work that, considered in isolation, are\nbased on the Application, and not on the Linked Version.\n\n  The \"Corresponding Application Code\" for a Combined Work means the\nobject code and/or source code for the Application, including any data\nand utility programs needed for reproducing the Combined Work from the\nApplication, but excluding the System Libraries of the Combined Work.\n\n  1. Exception to Section 3 of the GNU GPL.\n\n  You may convey a covered work under sections 3 and 4 of this License\nwithout being bound by section 3 of the GNU GPL.\n\n  2. Conveying Modified Versions.\n\n  If you modify a copy of the Library, and, in your modifications, a\nfacility refers to a function or data to be supplied by an Application\nthat uses the facility (other than as an argument passed when the\nfacility is invoked), then you may convey a copy of the modified\nversion:\n\n   a) under this License, provided that you make a good faith effort to\n   ensure that, in the event an Application does not supply the\n   function or data, the facility still operates, and performs\n   whatever part of its purpose remains meaningful, or\n\n   b) under the GNU GPL, with none of the additional permissions of\n   this License applicable to that copy.\n\n  3. Object Code Incorporating Material from Library Header Files.\n\n  The object code form of an Application may incorporate material from\na header file that is part of the Library.  You may convey such object\ncode under terms of your choice, provided that, if the incorporated\nmaterial is not limited to numerical parameters, data structure\nlayouts and accessors, or small macros, inline functions and templates\n(ten or fewer lines in length), you do both of the following:\n\n   a) Give prominent notice with each copy of the object code that the\n   Library is used in it and that the Library and its use are\n   covered by this License.\n\n   b) Accompany the object code with a copy of the GNU GPL and this license\n   document.\n\n  4. Combined Works.\n\n  You may convey a Combined Work under terms of your choice that,\ntaken together, effectively do not restrict modification of the\nportions of the Library contained in the Combined Work and reverse\nengineering for debugging such modifications, if you also do each of\nthe following:\n\n   a) Give prominent notice with each copy of the Combined Work that\n   the Library is used in it and that the Library and its use are\n   covered by this License.\n\n   b) Accompany the Combined Work with a copy of the GNU GPL and this license\n   document.\n\n   c) For a Combined Work that displays copyright notices during\n   execution, include the copyright notice for the Library among\n   these notices, as well as a reference directing the user to the\n   copies of the GNU GPL and this license document.\n\n   d) Do one of the following:\n\n       0) Convey the Minimal Corresponding Source under the terms of this\n       License, and the Corresponding Application Code in a form\n       suitable for, and under terms that permit, the user to\n       recombine or relink the Application with a modified version of\n       the Linked Version to produce a modified Combined Work, in the\n       manner specified by section 6 of the GNU GPL for conveying\n       Corresponding Source.\n\n       1) Use a suitable shared library mechanism for linking with the\n       Library.  A suitable mechanism is one that (a) uses at run time\n       a copy of the Library already present on the user's computer\n       system, and (b) will operate properly with a modified version\n       of the Library that is interface-compatible with the Linked\n       Version.\n\n   e) Provide Installation Information, but only if you would otherwise\n   be required to provide such information under section 6 of the\n   GNU GPL, and only to the extent that such information is\n   necessary to install and execute a modified version of the\n   Combined Work produced by recombining or relinking the\n   Application with a modified version of the Linked Version. (If\n   you use option 4d0, the Installation Information must accompany\n   the Minimal Corresponding Source and Corresponding Application\n   Code. If you use option 4d1, you must provide the Installation\n   Information in the manner specified by section 6 of the GNU GPL\n   for conveying Corresponding Source.)\n\n  5. Combined Libraries.\n\n  You may place library facilities that are a work based on the\nLibrary side by side in a single library together with other library\nfacilities that are not Applications and are not covered by this\nLicense, and convey such a combined library under terms of your\nchoice, if you do both of the following:\n\n   a) Accompany the combined library with a copy of the same work based\n   on the Library, uncombined with any other library facilities,\n   conveyed under the terms of this License.\n\n   b) Give prominent notice with the combined library that part of it\n   is a work based on the Library, and explaining where to find the\n   accompanying uncombined form of the same work.\n\n  6. Revised Versions of the GNU Lesser General Public License.\n\n  The Free Software Foundation may publish revised and/or new versions\nof the GNU Lesser General Public License from time to time. Such new\nversions will be similar in spirit to the present version, but may\ndiffer in detail to address new problems or concerns.\n\n  Each version is given a distinguishing version number. If the\nLibrary as you received it specifies that a certain numbered version\nof the GNU Lesser General Public License \"or any later version\"\napplies to it, you have the option of following the terms and\nconditions either of that published version or of any later version\npublished by the Free Software Foundation. If the Library as you\nreceived it does not specify a version number of the GNU Lesser\nGeneral Public License, you may choose any version of the GNU Lesser\nGeneral Public License ever published by the Free Software Foundation.\n\n  If the Library as you received it specifies that a proxy can decide\nwhether future versions of the GNU Lesser General Public License shall\napply, that proxy's public statement of acceptance of any version is\npermanent authorization for you to choose that version for the\nLibrary.\n"
  },
  {
    "path": "ChangeLog",
    "content": "Copyright 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,\n\t  2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.\n\nCopyright 2009-2015 William Hart\n.\nThis file is part of the MPIR Library.\n.\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n.\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n.\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n\n2017-03-01 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 3.0.0 released\n\n2015-11-20 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.7.2 released\n\n2015-11-13 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.7.1 released\n\n2015-06-26 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.7.0 released\n\n2012-10-05 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.6.0 released\n\n2012-10-02 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.5.2 released\n\n2012-03-11 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.5.1 released\n\n2012-01-05 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.5.0 released\n\n2011-06-14 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.4.0 released\n\n2011-04-05 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.3.1 released\n\n2011-03-04 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.3.0 released\n\n2010-12-18 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.2.1 released\n\n2010-12-02 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.2.0 released\n\n2010-12-01 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.1.4 released\n\n2010-06-25 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.1.1 released\n\n2010-06-07 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.1.0 released\n\n2010-04-01 mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR version 2.0.0 released\n\n2009-01-30  mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR Version 1.3.1 released.\n\n2009-01-18  mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR Version 1.3.0 released.\n\n2009-06-06  mpir-devel <http://groups.google.com/group/mpir-devel>\n\n    * MPIR Version 1.2.0 released.\n\n2009-04-15  mpir-devel <http://groups.google.com/group/mpir-devel>\n\n\t* MPIR Version 1.1.0 released.\n\n2009-03-16  mpir-devel <http://groups.google.com/group/mpir-devel>\n\n\t* MPIR Version 1.0.0 released.\n\n2009-01-10  mpir-devel <http://groups.google.com/group/mpir-devel>\n\n\t* MPIR Version 0.9.0 released.\n\n2006-05-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* Version 4.2.1 released.\n\n2006-05-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/vmx/popcount.asm: Conditionally zero extend n.\n\n2006-04-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/divexact.c: Call mpz_tdiv_q for large operands.\n\n\t* configure.in (powerpc-*-darwin): Remove -fast, it affects PIC.\n\n2006-04-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.guess: Try to recognize Ultrasparc T1 (as ultrasparct1).\n\t* config.sub: Handle ultrasparct1.\n\n2006-04-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/gmp-mparam.h: Retune, without separation of GNUC and\n\tnon-GNUC data.\n\n2006-04-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Support powerpc eABI.\n\t* mpn/powerpc32/eabi.m4: New file.\n\n\t* configure.in: Support powerpc *bsd.\n\t* mpn/powerpc64/elf.m4: New name for mpn/powerpc64/linux64.m4.\n\t* mpn/powerpc32/elf.m4: New name for mpn/powerpc32/linux.m4.\n\n\t* mpn/powerpc64/linux64.m4 (ASM_END): Quote TOC_ENTRY.\n\n2006-04-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (gmp_mpn_functions_optional): Add lshiftc.\n\t(HAVE_NATIVE): Add lshiftc.\n\n\t* tune/speed.h (SPEED_ROUTINE_MPN_ADDSUB_N_CALL): New macro.\n\t* tune/common.c (speed_mpn_addsub_n): New function.\n\t* tune/speed.c (routine): Add measuring of mpn_addsub_n.\n\n\t* mpn/powerpc64/linux64.m4 (TOC_ENTRY): Define to empty.\n\t* mpn/powerpc64/aix.m4 (TOC_ENTRY): Likewise.\n\t* mpn/powerpc32/aix.m4 (TOC_ENTRY): Likewise.\n\n\t* mpn/powerpc32/aix.m4 (EXTERN): New, copied form powerpc64/aix.m4.\n\t* mpn/powerpc32/mode1o.asm: Use EXTERN.\n\t* mpn/powerpc32/linux.m4 (EXTERN): Provide dummy definition.\n\t* mpn/powerpc32/darwin.m4 (EXTERN): Likewise.\n\n2006-04-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/devel/try.c (choice_array): Add mpn_addsub_n[c].\n\n2006-04-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* extract-dbl.c: Don't rely on undefined casts to mp_limb_signed_t.\n\n\t* configure.in: Disable AC_C_RESTRICT for now.\n\t* mpn/generic/diveby3.c: Don't use restict keyword for now.\n\n2006-04-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.\n\n\t* aclocal.m4: Regenerate with patched libtool.\n\n\t* mpn/asm-defs.m4 (ASM_END): Provide (empty) default.\n\n2006-04-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (gmp_mpn_functions_optional): Add addsub.\n\n\t* Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):\n\tBump version info.\n\n\t* gmpxx.h: Remove missed MPFR references.\n\n\t* gmp-impl.h (TMP_*, WANT_TMP_DEBUG): Don't expect marker argument;\n\tdefine TMP_SALLOC and TMP_BALLOC.\n\n2006-04-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* tal-notreent.c (__gmp_tmp_mark): Add \"struct\" tag for tmp_marker.\n\t(__gmp_tmp_free): Likewise.\n\n2006-03-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* Version 4.2 released.\n\n\t* mpn/powerpc64/aix.m4 (LEA): Renamed from LDSYM.\n\t* mpn/powerpc64/darwin.m4: Likewise.\n\t* mpn/powerpc64/linux64.m4: Likewise.\n\t* mpn/powerpc64/vmx/popcount.asm: Use LEA, not LDSYM.\n\n2006-03-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h: (class gmp_allocated_string): Prefix strlen with std::.\n\n\t* gmpxx.h (__GMP_DEFINE_TERNARY_EXPR2): Remove for now.\n\t(struct __gmp_ternary_addmul2): Likewise.\n\t(struct __gmp_ternary_submul2): Likewise.\n\n\t* gmpxx.h: #include <cstring>.\n\t(struct __gmp_alloc_cstring): Prefix strlen with std::.\n\n\t* mpn/x86/pentium/com_n.asm: Add TEXT and ALIGN.\n\t* mpn/x86/pentium/copyi.asm: Likewise.\n\t* mpn/x86/pentium/copyd.asm: Likewise.\n\n2006-03-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-h.in: Add a \"using std::FILE\" for C++.\n\t(_GMP_H_HAVE_FILE): Check also _ISO_STDIO_ISO_H.\n\n\t* gmpxx.h: Remove mpfr code.\n\t* tests/cxx: Likewise.\n\n\t* gmp-impl.h (FORCE_DOUBLE): Rename a tempvar to avoid a clash with\n\tGNU/Linux public include file.\n\n\t* configure.in (powerpc64, darwin): New optional, gcc_cflags_subtype.\n\tGrab powerpc32/darwin.m4 for ABI=mode32.\n\n\t* configure.in: Use host_cpu whenever just the cpu type is needed.\n\n2006-03-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/get_si.c: Fix a typo.\n\n\t* tests/mpq/t-get_d.c (check_random): Improve random generation for\n\tnails.\n\n2006-02-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpq/t-get_d.c (check_random): New function.\n\t(main): Call check_random.\n\n\t* mpq/set_d.c: Make choices based on LIMBS_PER_DOUBLE, not\n\tBITS_PER_MP_LIMB.  Make it work for LIMBS_PER_DOUBLE == 4.\n\tUse MPZ_REALLOC.\n\n\t* mpz/set_d.c: Make it work for LIMBS_PER_DOUBLE == 4.\n\n\t* extract-dbl.c: Make it work for LIMBS_PER_DOUBLE > 3.\n\n2006-02-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/cmp_d.c: Declare `i'.\n\t* mpz/cmpabs_d.c: Likewise.\n\n2006-02-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/vmx/copyd.asm: Set right VRSAVE bits.\n\t* mpn/powerpc32/vmx/copyi.asm: Likewise.\n\n2006-02-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/vmx/logops_n.asm: New file.\n\n\t* mpn/powerpc32/diveby3.asm: Rewrite.\n\n2006-02-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/vmx/copyi.asm: New file.\n\t* mpn/powerpc32/vmx/copyd.asm: New file.\n\n2006-02-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev6/nails/aors_n.asm (CYSH): Import proper setting from\n\tdeleted mpn_sub_n.\n\n2006-02-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev6/addmul_1.asm: Correct slotting comments.\n\n2006-02-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/devel/anymul_1.c: Copy error reporting code from addmul_N.c.\n\n\t* tests/devel/addmul_N.c: New file.\n\t* tests/devel/mul_N.c: New file.\n\n\t* mpn/alpha/default.m4 (PROLOGUE_cpu): Align functions at 16-byte\n\tboundary.\n\n\t* mpn/alpha/ev6/nails/aors_n.asm: New file.\n\t* mpn/alpha/ev6/nails/add_n.asm: Remove.\n\t* mpn/alpha/ev6/nails/sub_n.asm: Remove.\n\n\t* mpn/alpha/ev6/nails/addmul_1.asm: Rewrite.\n\t* mpn/alpha/ev6/nails/submul_1.asm: Likewise.\n\t* mpn/alpha/ev6/nails/mul_1.asm: Likewise.\n\n\t* mpn/alpha/ev6/nails/addmul_2.asm: Use L() for labels.\n\t* mpn/alpha/ev6/nails/addmul_3.asm: Use L() for labels.\n\t* mpn/alpha/ev6/nails/addmul_4.asm: Use L() for labels.\n\n2006-02-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/diveby3.asm: Trivially reorder loop insns to save\n\t1 c/l.\n\n2006-02-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/sqr_diagonal.asm: Software pipeline.\n\n\t* mpn/powerpc64/vmx/popcount.asm: Add prefetching.\n\n2006-02-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/vmx/popcount.asm: Remove mpn_hamdist partial code.\n\tMove compare for huge n so that it is always executed.\n\n2006-02-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/linux.m4 (LEA): Add support for PIC.\n\n\t* configure.in (powerpc): New optional, gcc_cflags_subtype.\n\n\t* mpn/powerpc64/linux64.m4 (PROLOGUE_cpu): Align function start to\n\t16-multiple.\n\t* mpn/powerpc64/aix.m4: Likewise.\n\t* mpn/powerpc64/darwin.m4: Likewise.\n\n\t* mpn/powerpc64/copyi.asm: Align loop to 16-multiple.\n\t* mpn/powerpc64/copyd.asm: Likewise\n\n\t* configure.in (powerpc): Add vmx to relevant paths.\n\n\t* mpn/powerpc64/linux64.m4 (DEF_OBJECT): Accept 2nd argument, for\n\talignment.\n\t* mpn/powerpc64/aix.m4: Likewise.\n\t* mpn/powerpc64/darwin.m4: Likewise.\n\n\t* mpn/powerpc32/linux.m4 (DEF_OBJECT, END_OBJECT): New macros,\n\tinherited from powerpc64 versions.\n\t* mpn/powerpc32/aix.m4: Likewise.\n\t* mpn/powerpc32/darwin.m4: Likewise.\n\n\t* mpn/powerpc64/vmx/popcount.asm: New file, for ppc32 and ppc64.\n\t* mpn/powerpc32/vmx/popcount.asm: New file, grabbing above file.\n\n2006-01-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Generalize OS-dependent patterns for powerpcs.\n\n2006-01-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.guess: Recognize power4 and up under linux-gnu.\n\t* config.sub: Generalize power recognition code.\n\t* acinclude.m4 (POWERPC64_PATTERN): Add 64-bit powerpc processors.\n\t* configure.in: Recognize powerpc procssors masquerading as power\n\tprocssors.\n\n2006-01-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/diveby3.c: Use GMP standard parameter names.  Nailify\n\talternative code.  Use restrict for params.\n\n\t* configure.in: Recognize andn_n as not needing nailification.\n\n\t* tests/mpq/t-equal.c (check_various): Disable a test that gives common\n\tfactors for GMP_NUMB_BITS == 62.\n\n2006-01-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/get_str.c (mpn_sb_get_str): Fix digit count computation,\n\twas inaccurate for nails.\n\n2006-01-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86_64/mode1o.asm: Remove unneeded carry register zeroing.\n\n2006-01-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev6/sqr_diagonal.asm: New file.\n\n2006-01-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mullow_n.c (MUL_BASECASE_ALLOC): New #define.\n\t(mpn_mullow_n): Use it.\n\n\t* mpn/powerpc64/aix.m4 (EXTERN): Define to import symbol.\n\t(LDSYM): Remove [RW] attribute.\n\t* mpn/powerpc64/linux64.m4 (EXTERN): Dummy definition.\n\t* mpn/powerpc64/darwin.m4 (EXTERN): Likewise.\n\n2006-01-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/linux64.m4: Move toc entry generation from direct at\n\tDEF_OBJECT to delayed via LDSYM, define ASM_END to output it.\n\t* mpn/powerpc64/aix.m4: Likewise.\n\t* mpn/powerpc64/darwin.m4: Define a dummy ASM_END.\n\n2006-01-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Update bugs reporting address.\n\n2006-01-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/linux64.m4 (CALL): New macro.\n\t* mpn/powerpc64/aix.m4: Likewise.\n\t* mpn/powerpc64/darwin.m4: Likewise, also define macro \"DARWIN\".\n\n2005-12-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/ui_div.c: Implement workaround for GCC bug triggered on alpha.\n\t* mpf/set_q.c: Likewise.\n\n2005-12-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/tdiv_qr.c: Remove statement with no effect.\n\tRename dead variable to `dummy'.\n\n2005-12-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (setup_error_handler): Add a missing \";\".\n\n2005-11-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul.c: Crudely call mpn_mul_fft_full before checking\n\tfor unbalanced operands.\n\n\t* mpn/generic/mul_fft.c: Remove many scalar divisions.\n\t(mpn_mul_fft_lcm): Simplify.\n\t(mpn_mul_fft_decompose): Rewrite to handle arbitrarily unbalanced\n\toperands.\n\n2005-11-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Properly recognize all 32-bit Solaris releases.\n\n2005-11-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul_fft.c: Inline mpn_fft_mul_2exp_modF,\n\tmpn_fft_add_modF and mpn_fft_normalize.\n\n2005-11-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/reuse.c: Increase operand size, decrease # of reps.\n\n2005-10-31  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/pprime_p.c (mpz_probab_prime_p): Considerably limit trial\n\tdividing.\n\n\t* mpz/perfpow.c (mpz_perfect_power_p): Use mpz_divisible_ui_p instead\n\tof mpz_tdiv_ui.\n\n\t* mpz/divegcd.c: Correct probability number for GCD == 1.\n\n\t* mpn/x86: Add cycle counts for array of x86 processors.\n\n\t* mpn/x86/k7/mod_34lsub1.asm: Remove spurious mentions of ebp.\n\n\t* mpn/powerpc32: Add POWER5 timings.\n\n\t* mpn/powerpc32/README: Describe global reference variations.\n\n\t* mpn/generic/dive_1.c: Whitespace cleanup.\n\n\t* mpn/alpha/ev6/nails/addmul_1.asm: Correct comments on slotting.\n\t* mpn/alpha/ev6/nails/addmul_2.asm: Likewise.\n\t* mpn/alpha/ev6/nails/addmul_4.asm: Likewise.\n\n\t* mpf/out_str.c: List some allocation improvement ideas.\n\n\t* doc/gmp.texi: Update many URLs and email addresses.\n\n\t* gmp-h.in (_GMP_H_HAVE_FILE): Check also _STDIO_H_INCLUDED.\n\n2005-10-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/tuneup.c (tune_mullow): Update param.max_size for each threshold\n\tmeasurement.\n\n\t* configure.in (POWERPC64_PATTERN/*-*-darwin*): Set\n\tSPEED_CYCLECOUNTER_OBJ_mode64 and cyclecounter_size_mode64.\n\t(POWERPC64_PATTERN/*-*-linux*): Likewise.\n\n2005-10-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/factorize.c (factor_using_division_2kp): Honor verbose flag.\n\t(factor_using_pollard_rho): Divide out new factor before it's\n\tclobbered.  Don't stop factoring after a composite factor was found.\n\n2005-09-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (fns): Add factorial keywords.\n\n2005-08-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/Makefile.am (EXTRA_DIST): Change \"amd64\" => \"x86_64\".\n\t* mpn/Makefile.am (TARG_DIST): Change \"amd64\" => \"x86_64\".\n\n2005-08-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Change \"amd64\" => \"x86_64\".\n\n2005-06-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/pre_mod_1.c: Canonicalize variable names.\n\n\t* mpn/generic/divrem.c: Rate qxn test as UNLIKELY.\n\n\t* tune/tuneup.c (tune_mullow): Fix all max_size fields.\n\n\t* gmp-impl.h (SQR_TOOM3_THRESHOLD_LIMIT): New #define.\n\t* tune/tuneup.c (tune_sqr): Use SQR_TOOM3_THRESHOLD_LIMIT.\n\t(sqr_toom3_threshold): Initialize from SQR_TOOM3_THRESHOLD_LIMIT.\n\n\t* mpn/generic/mul_n.c (mpn_sqr_n): Use SQR_TOOM3_THRESHOLD_LIMIT.\n\n\t* gmp-impl.h (mpn_nand_n, mpn_iorn_n, mpn_nior_n, mpn_xnor_n):\n\tHandle nails.\n\n2005-06-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev6/sub_n.asm: Analogous changes as to add_n.asm last.\n\n2005-06-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev6/add_n.asm: Rewrite inner loop to load later.\n\tAdd mpn_add_nc entry.\n\n\t* mpn/alpha/ev6/addmul_1.asm: Remove redundant initial loads.\n\n2005-06-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/dive_1.c: Use variable h for upper umul_ppmm result.\n\n2005-06-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/devel/try.c (choice_array): Exclude mpn_preinv_mod_1 unless\n\tUSE_PREINV_MOD_1.\n\t(choice_array): Exclude mpn_sqr_basecase if SQR_KARATSUBA_THRESHOLD\n\tis zero.\n\n2005-06-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev6/addmul_1.asm: Prefix all labels with \"$\".\n\t* mpn/alpha/ev6/mul_1.asm: Likewise.\n\n2005-06-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/refmpn.c (refmpn_divmod_1c_workaround): Implement workaround\n\tto gcc 3.4.x bug triggered on powerpc64 with 32-bit ABI.\n\n2005-06-01  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/devel/try.c (main): Fix a typo.\n\n2005-05-31  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev6/addmul_1.asm: Rewrite for L1 cache, add prefetch.\n\n2005-05-30  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/misc.c (tests_rand_start): Mask random seed to 32 bits.\n\n2005-05-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/mode32/mul_1.asm: Handle BROKEN_LONGLONG_PARAM.\n\t* mpn/powerpc64/mode32/addmul_1.asm: Likewise.\n\t* mpn/powerpc64/mode32/submul_1.asm: Likewise.\n\n\t* mpn/powerpc32/mode1o.asm: Rewrite to actually work.\n\n\t* mpn/powerpc32/aix.m4 (LEA): New macro.\n\t(ASM_END): New macro.\n\n\t* mpn/powerpc32/linux.m4: New file.\n\t* mpn/powerpc32/darwin.m4: New file.\n\t* configure.in: Use linux.m4 and darwin.m4.\n\t(powerpc64-linux-gnu): Add support for mode32.\n\n2005-05-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mullow_n.c: Remove FIXME mentioning fixed flaw.\n\n\t* tests/mpz/t-cmp_d.c (check_one): Fix printf fmt string typo.\n\n\t* demos/isprime.c: #include stdlib.h.\n\t* tests/rand/t-urbui.c: Likewise.\n\t* tests/rand/t-urmui.c: Likewise.\n\n\t* tests/mpz/t-popcount.c (check_random): Remove spurious printf arg.\n\n2005-05-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/devel/try.c (param_init) [TYPE_GET_STR]: Set retval field.\n\t(compare): Handle SIZE_GET_STR as SIZE_RETVAL.\n\n\t* tests/refmpn.c (refmpn_get_str): Rewrite to make it work.\n\n2005-05-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h (many places): Remove lvalue casts.\n\n\t* gmp-impl.h (MPF_SIGNIFICANT_DIGITS): Cast prec to avoid overflow\n\tfor > 4G digits.\n\n\t* mpn/alpha/ev6/add_n.asm: Prefetch using ldl.\n\t* mpn/alpha/ev6/sub_n.asm: Likewise.\n\n\t* mpn/alpha/ev6/slot.pl (optable): Recognize negq and ldl.\n\n2005-05-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev67/popcount.asm: Prefetch.\n\t* mpn/alpha/ev67/hamdist.asm: Prefetch.\n\n\t* longlong.h (add_ssaaaa) [x86]: Remove lvalue casts.\n\t(sub_ddmmss) [x86]: Likewise.\n\n\t* tests/devel/try.c (param_init) [TYPE_MPZ_JACOBI]: Add DATA_SRC1_ODD.\n\t(param_init) [TYPE_MPZ_KRONECKER]: Clear inherited DATA_SRC1_ODD.\n\t(param_init) [TYPE_DIVEXACT_1]: Use symbolic name DIVISOR_LIMB.\n\n2005-05-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/devel/try.c (param_init) [TYPE_MPZ_JACOBI]: Initialize divisor\n\tfield according to UDIV_NEEDS_NORMALIZATION.\n\n\t* mpz/mul_i.h: Remove left-over TMP_XXXX marker arguments.\n\n2005-05-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/pentium4/sse2/addmul_1.asm (mpn_addmul_1c): Put carry in\n\tproper register.\n\n\t* mpn/generic/sqr_basecase.c (mpn_sqr_basecase, addmul_2 version):\n\tAvoid accesses out-of-bound in MPN_SQR_DIAGONAL applicate code.\n\n2005-05-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/diveby3.asm: Make it actually work.\n\n\t* gmp-impl.h (MULLOW_BASECASE_THRESHOLD_LIMIT): New #define.\n\t* mpn/generic/mullow_n.c: Use fixed stack allocation for the smallest\n\toperands; use TMP_S* allocation for medium operands.\n\n\t* gmp-impl.h: Remove nested TUNE_PROGRAM_BUILD test.\n\n2005-05-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul_n.c: Make squaring and multiplication code more\n\tsimilar.  Use TMP_S* functions.\n\n\t* gmp-impl.h (TMP_DECL, TMP_MARK, TMP_FREE): Get rid of argument.\n\t(TMP_SALLOC): New macro for \"small\" allocations.\n\t(TMP_BALLOC): New macro for \"big\" allocations.\n\t(TMP_SDECL, TMP_SMARK, TMP_SFREE): New macros for functions that use\n\tjust TMP_SALLOC.\n\t(WANT_TMP_ALLOCA): Make default functions choose alloca or reentrant\n\tfunctions, depending on size.\n\n\t* *.c: Remove TMP_XXXX marker arguments.\n\n\t* acinclude.m4 (WANT_TMP): Want tal-reent.lo also for alloca case.\n\n2005-05-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (udiv_qrnnd_preinv2): Pull an add into add_ssaaaa.\n\t(udiv_qrnnd_preinv2gen): Likewise.\n\n2005-05-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h (add_ssaaaa) [x86_64]: Restrict allowed immediate\n\toperands.\n\t* (sub_ddmmss) [x86_64]: Likewise.\n\n2005-05-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* acinclude.m4 (GMP_HPC_HPPA_2_0): Make gmp_tmp_v1 sed pattern handle\n\tversion numbers like B.11.X.32509-32512.GP.\n\n\t* mpn/m68k/aors_n.asm: Correct MULFUNC_PROLOGUE.\n\n\t* mpf/inp_str.c: Use plain int for mpf_set_str return value (works\n\taround gcc 4 bug).\n\n\t* acinclude.m4 (GMP_ASM_POWERPC_PIC_ALWAYS): Handle darwin's assembly\n\tsyntax.\n\t(long long reliability test 1): New GMP_PROG_CC_WORKS_PART test.\n\t(long long reliability test 2): New GMP_PROG_CC_WORKS_PART test.\n\n\t* configure.in: Add mode64 support for darwin.  Use darwin.m4.\n\tAdd cflags_opt flags for mode32 darwin.\n\n\t* mpn/asm-defs.m4 (PIC_ALWAYS): Define PIC just iff PIC_ALWAYS = \"yes\".\n\n\t* mpn/powerpc64/darwin.m4: New file.\n\n\t* mpn/powerpc64/linux64.m4: Remove TOCREF, add LDSYM.\n\tRework DEF_OBJECT to need just one argument.\n\t* mpn/powerpc64/aix.m4: Likewise.\n\n2005-05-01  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/popham.c: Compute final summation differently for 64-bit.\n\n\t* tests/mpz/t-popcount.c (check_random): New function.\n\t(main): Call it.\n\n2005-04-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/copyi.asm: If HAVE_ABI_mode32, ignore upper 32 bits of\n\tmp_size_t argument.\n\t* mpn/powerpc64/copyd.asm: Likewise.\n\t* mpn/powerpc64/sqr_diagonal.asm: Likewise.\n\t* mpn/powerpc64/lshift.asm: Likewise.\n\t* mpn/powerpc64/rshift.asm: Likewise.\n\n2005-04-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/rootrem.c: Allocate PP_ALLOC limbs also for qp.\n\n2005-04-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/add_n.asm: Add nc entry point.\n\t* mpn/powerpc32/sub_n.asm: Likewise.\n\n\t* mpn/alpha/add_n.asm: Add correct cycle/limb numbers.\n\t* mpn/alpha/sub_n.asm: Likewise.\n\t* mpn/alpha/ev5/add_n.asm: Likewise.\n\t* mpn/alpha/ev5/sub_n.asm: Likewise.\n\n2005-03-31  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/k7/gmp-mparam.h: Fix typo in last change.\n\n2005-03-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/gmp-mparam.h: Update.\n\t* mpn/alpha/ev5/gmp-mparam.h: Update.\n\t* mpn/alpha/ev6/gmp-mparam.h: Update.\n\n\t* mpn/x86/p6/mmx/gmp-mparam.h: Update.\n\t* mpn/x86/pentium4/sse2/gmp-mparam.h: Update.\n\t* mpn/x86/k7/gmp-mparam.h: Update.\n\n\t* tests/mpz/t-gcd.c (main): Honor command line reps argument.\n\n\t* tune/speed.h (SPEED_ROUTINE_MPN_GCD_CALL): Simplify and correct code\n\tfor generating test operands.\n\n2005-01-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (MPN_SIZEINBASE): Count bits in type size_t.\n\t(MPN_SIZEINBASE_16): Likewise.\n\n2004-12-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/speed.c (run_gnuplot): Use lines, not linespoints.\n\tOutput a reset gnuplot command initially.\n\n2004-12-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/random2.c (gmp_rrandomb): Rework again.\n\t* mpz/rrandomb.c (gmp_rrandomb): Likewise.\n\n2004-11-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/diveby3.asm: Use correct prefetch instruction.\n\n2004-11-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/diveby3.asm: Add \",gp\" glue in PROLOGUE.\n\tAdd r31 dummy operand to `br' instruction.\n\n2004-11-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Invoke AC_C_RESTRICT.\n\n2004-11-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/diveby3.asm: New file.\n\n2004-11-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/popham.c: Add comment.\n\n2004-11-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.guess: Strip any PPC string in /proc/cpuinfo.\n\tRecognize 970 in that code.\n\n2004-10-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/README: Update POWER5/PPC970 pipeline information.\n\n\t* mpn/generic/mul_basecase.c (MAX_LEFT): Add comment.\n\n\t* doc/gmp.texi: Consistently use \"x86\" denotation.\n\t(Assembler SIMD Instructions): Mention SSE2 usage.\n\n\t* demos/pexpr.c (main): Handle \"negative\" base in mpz_sizeinbase call.\n\n2004-10-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Support icc under x86.\n\t(ia64-*-linux*): Pass -no-gcc to icc.\n\n2004-10-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h (ia64 umul_ppmm): Add version for icc.\n\n\t* configure.in: Support icc under ia64-*-linux*.\n\n\t* acinclude.m4: New \"compiler works\" test for icc 8.1 bug.\n\t(GMP_PROG_CC_IS_GNU): Don't let Intel's icc fool us it is GCC.\n\n2004-10-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* acinclude.m4 (GMP_ASM_W32): Try also \"data4\".\n\n\t* longlong.h: Exclude masquerading __INTEL_COMPILER from ia64 asm.\n\t* gmp-impl.h: Likewise.\n\n2004-10-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/devel/try.c: Handle mpn_mul_2 like mpn_addmul_2.\n\n\t* tune/speed.c (routine): Make R parameter optional for mpn_mul_2.\n\n2004-10-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/addmul_1.asm: Update a comment.\n\n\t* tests/devel/aors_n.c: #include tests.h.\n\t* tests/devel/anymul_1.c: Likewise.\n\t* tests/devel/shift.c: Likewise.\n\t* tests/devel/copy.c: Likewise.\n\n\t* tests/devel/aors_n.c: Handle also mpn_addlsh1_n, mpn_sublsh1_n,\n\tmpn_rsh1add_n, and mpn_rsh1sub_n.\n\n\t* tests/devel/Makefile.am (EXTRA_PROGRAMS): Updates for yesterday's\n\tfile removals and additions.\n\n2004-10-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/devel/copy.c: Handle both MPN_COPY_INCR and MPN_COPY_DECR.\n\n\t* tests/devel/logops_n.c: New file, handle all logical operations.\n\n\t* tests/devel/anymul_1.c: New file, handle mpn_mul_1, mpn_addmul_1, and\n\tmpn_submul_1\n\t* tests/devel/mul_1.c: Remove.\n\t* tests/devel/addmul_1.c: Remove.\n\t* tests/devel/submul_1.c: Remove.\n\n\t* tests/devel/shift.c: New file, handle mpn_lshift and mpn_rshift.\n\t* tests/devel/lshift.c: Remove.\n\t* tests/devel/rshift.c: Remove.\n\n\t* tests/devel/aors_n.c: New file, handle mpn_add_n and mpn_sub_n.\n\t* tests/devel/add_n.c: Remove.\n\t* tests/devel/sub_n.c: Remove.\n\n2004-10-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/linux64.m4: Define DEF_OBJECT, END_OBJECT, and TOCREF.\n\t* mpn/powerpc64/aix.m4: Likewise.\n\n2004-10-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/copyi.asm: New file.\n\t* mpn/powerpc64/copyd.asm: New file.\n\t* gmp-h.in: Remove PPC MPN_COPY variants.\n\t* gmp-impl.h: Likewise.\n\n2004-10-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* configfsf.guess: Patch HP-UX code to accomodate HP compiler's new\n\tinability to read from stdin.\n\n\t* mpn/powerpc64/mode64/addsub_n.asm: Remove accidentally added file.\n\n2004-09-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (x86 ULONG_PARITY): Work around GCC change of \"q\" register\n\tflag.\n\n2004-09-28  Paul Zimmermann  <Paul.Zimmermann@loria.fr>\n\n\t* mpn/generic/mul_fft.c (mpn_mul_fft): Fix a bug in the choice of the\n\trecursive fft parameters.\n\n2004-09-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/misc.c (tests_rand_start): Default to strtoul for re-seeding.\n\n\t* tests/mpz/t-mul.c (ref_mpn_mul): Fudge tmp allocation for toom3.\n\n2004-09-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/misc.c (tests_rand_start): Shift tv_usec for better seeding.\n\n2004-09-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/misc.c (tests_rand_start): Invoke fflush after printing seed.\n\n\t* tests/mpz/t-mul.c (main): Check environment for GMP_CHECK_FFT, run\n\textra FFT tests if set.\n\t(ref_mpn_mul): Use library code for kara and toom, but skewded so that\n\twe never use the same algorithm that we're testing.\n\t(mul_kara): Delete.\n\t(debug_mp): Print just one line of large numbers.\n\t(ref_mpn_mul): Rework usage of tp temporary space.\n\n2004-09-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/sqr_basecase.c: In variant for HAVE_NATIVE_mpn_addmul_2,\n\taccumulate carry also for when HAVE_NATIVE_mpn_addlsh1_n.\n\n2004-08-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-sizeinbase.c: Disable mpz_fake_bits and check_sample.\n\n2004-06-17  Kevin Ryde  <kevin@swox.se>\n\n\t* doc/gmp.texi: Use @. when sentence ends with a capital, for good\n\tspacing in tex.\n\t(Language Bindings): Add gmp-d, reported by Ben Hinkle.  Update SWI\n\tProlog URL, reported by Jan Wielemaker.\n\n2004-06-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Handle --enable-fat.  Use that to enable x86 fat\n\tbuilds, remove magic meaning of i386-*-*.\n\n2004-06-03  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (memset): Use a local char* pointer, in case parameter is\n\tsomething else (eg. tune/common.c).  Reported by Emmanuel Thomé.\n\n2004-06-01  Kevin Ryde  <kevin@swox.se>\n\n\t* config.guess (i?86-*-*): Avoid \"Illegal instruction\" message which\n\tgoes to stdout on 80386 freebsd4.9.\n\n2004-05-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/gcdext.c (gcdext_1_odd): Use masking to avoid jumps.\n\n2004-05-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/pentium4/sse2/addmul_1.asm: Add Prescott cycle numbers.\n\n\t* mpn/powerpc32/lshift.asm: Add more cycle numbers.\n\t* mpn/powerpc32/rshift.asm: Likewise.\n\n2004-05-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (mpn_mullow_n, mpn_mullow_basecase): Declare.\n\n\t* tune/Makefile.am: Compile gcdext.c.\n\n\t* gmp-impl.h (GET_STR_THRESHOLD_LIMIT): Lower outrageous value to 150.\n\n2004-05-21  Niels Möller  <nisse@lysator.liu.se>\n\n\t* tune/speed.h (SPEED_ROUTINE_MPN_GCD_CALL): Ensure first operand is\n\tnot smaller than 2nd operand.\n\n2004-05-17  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in (mpz_get_ui): Use #if instead of plain if, and for nails\n\tuse ?: same as normal case, to avoid warnings from Borland C++ 6.0.\n\tReported by delta trinity.\n\n2004-05-15  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/time.c (getrusage_backwards_p): New function\n\t(speed_time_init): Use it to exclude broken netbsd1.4.1 getrusage.\n\t* configure.in (m68*-*-netbsd1.4*): Remove code pretending getrusage\n\tdoesn't exist.\n\t* tune/README (NetBSD 1.4.1 m68k): Update notes.\n\n\t* configure.in (mips*-*-* ABI=n32): Remove gcc_n32_ldflags and\n\tcc_n32_ldflags, libtool knows to put the linker in n32 mode.\n\n2004-05-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.guess (powerpc*-*-*): Add more processor types to mfpvr code.\n\t* configure.in: Generalize powerpc subtype matching code.\n\n\t* mpz/fac_ui.c: Misc cleanups, spelling corrections.\n\n2004-05-14  Kevin Ryde  <kevin@swox.se>\n\n\t* mpf/sub.c: When one operand cancels high limbs of the other, strip\n\thigh zeros on the balance before truncating to destination precision.\n\tTruncating first loses accuracy and can lead to a result 0 despite\n\toperands being not equal.  Reported by John Abbott.\n\tAlso, ensure exponent is zero when result is zero, for instance if\n\toperands are exactly equal.\n\t* tests/mpf/t-sub.c (check_data): New function, exercising these.\n\n2004-05-12  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (AC_PROG_RANLIB): New macro, supposedly required by\n\tautomake, though it doesn't complain.\n\n\t* demos/expr/Makefile.am (ARFLAGS): Add a default setting, to\n\tworkaround an automake bug.\n\n2004-05-10  Kevin Ryde  <kevin@swox.se>\n\n\t* */Makefile.in, install-sh, aclocal.m4: Update to automake 1.8.4.\n\n\t* doc/gmp.texi (Demonstration Programs): Add a remark about expression\n\tevaluation in the main gmp library.\n\n\t* demos/expr/exprfa.c (mpf_expr_a): Correction to mpX_init, use\n\tmpf_init2 to follow requested precision.\n\t* demos/expr/exprza.c, demos/expr/exprqa.c: Use wrappers for mpX_init,\n\tto make parameters match.\n\n\t* demos/expr/run-expr.c: Don't use getopt, to avoid needing configury\n\tfor optarg declaration.  Remove TRY macro, rename foo and bar to var_a\n\tand var_b, for clarity.\n\t* demos/expr/expr-impl.h: Don't use expr-config.h.\n\t* configure.in (demos/expr/expr-config.h): Remove.\n\t* demos/expr/expr-config.in: Remove file.\n\n2004-05-08  Kevin Ryde  <kevin@swox.se>\n\n\t* doc/configuration (Configure): Update for current automake not\n\tcopying acinclude.m4 into aclocal.m4.\n\n\t* configure.in, Makefile.am, doc/gmp.texi, doc/configuration,\n\ttests/cxx/Makefile.am, demos/expr/Makefile.am, demos/expr/README,\n\tdemos/expr/expr.c, demos/expr/expr.h, demos/expr/expr-config-h.in,\n\tdemos/expr/expr-impl.h, demos/expr/run-expr.c, demos/expr/t-expr.c:\n\tMPFR now published separately, remove various bits.\n\t* mpfr/*, tests/cxx/t-headfr.cc, demos/expr/exprfr.c,\n\tdemos/expr/exprfra.c: Remove.\n\n2004-05-07  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/cxx/Makefile.am (TESTS_ENVIRONMENT): Amend c++ shared library\n\tpath hack, on k62-unknown-dragonfly1.0 /usr/bin/make runs its commands\n\t\"set -e\", so we need an \"|| true\" in case there's nothing to copy (for\n\tinstance in a static build).\n\n2004-05-06  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/alpha/mode1o.c: Remove, in favour of ...\n\t* mpn/alpha/mode1o.asm: New file.\n\t* mpn/alpha/alpha-defs.m4 (bwx_available_p): New macro.\n\n\t* tune/amd64.asm: Save rbx in r10 rather than on the stack.\n\n\t* configure.in (x86_64-*-*): Try also \"-march=k8 -mno-sse2\", in case\n\twe're in ABI=32 on an old OS not supporting xmm regs.\n\t(GMP_GCC_PENTIUM4_SSE2, GMP_OS_X86_XMM): Run these tests under\n\t-march=k8 too, and not under ABI=64.\n\n\t* doc/gmp.texi (Converting Integers): For mpz_get_d, note truncation\n\tand overflows.  For mpz_get_d_2exp note truncation, note result if\n\tOP==0, and cross reference libc frexp.\n\t(Rational Conversions): For mpq_get_d, note truncation and overflows.\n\t(Converting Floats): For mpf_get_d, note truncation and overflows.\n\tFor mpf_get_d_2exp, note truncation, note result if OP==0.\n\t(Assembler Code Organisation): Note nails subdirectories.\n\tClarification of get_d_2exp OP==0 reported by Sylvain Pion.\n\n2004-05-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mullow_n.c, mpn/generic/mullow_basecase.c: New files\n\t(mainly by Niels Möller).\n\t* configure.in, mpn/Makefile.am: Add them.\n\n\t* gmp-impl.h (MULLOW_BASECASE_THRESHOLD, MULLOW_DC_THRESHOLD,\n\tMULLOW_MUL_N_THRESHOLD): Override for TUNE_PROGRAM_BUILD.\n\n\t* tune/Makefile.am: Compile mullow_n.c.\n\t* tune/common.c (speed_mpn_mullow_n, speed_mpn_mullow_basecase):\n\tNew functions.\n\t* tune/speed.c (routine): Add entries for mpn_mullow_n and\n\tmpn_mullow_basecase.\n\t* tune/speed.h (SPEED_ROUTINE_MPN_MULLOW_N_CALL,\n\tSPEED_ROUTINE_MPN_MULLOW_BASECASE): New #defines.\n\t* tune/tuneup.c (tune_mullow): New function.\n\n\t* gmp-impl.h (invert_limb): Compute branch-freely.\n\n2004-05-02  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/amd64/mode1o.asm: Use movabsq to support large model non-PIC.\n\tUse 32-bit insns to save code bytes, and to save a couple of cycles on\n\tthe initial setup multiplies.\n\n2004-05-01  Kevin Ryde  <kevin@swox.se>\n\n\t* doc/gmp.texi (References): Update gcc online docs url to\n\tgcc.gnu.org.\n\n\t* configure.in (mips*-*-irix[6789]*): Correction to m4 quoting of this\n\tpattern.  (Believe the mips64*-*-* part also used picks up all current\n\tirix6 tuples anyway.)  Reported by Rainer Orth.\n\n2004-04-30  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_PROG_CC_X86_GOT_EAX_EMITTED,\n\tGMP_ASM_X86_GOT_EAX_OK): New macros.\n\t(GMP_PROG_CC_WORKS): Use them to detect an old gas bug tickled by\n\trecent gcc.  Reported by David Newman.\n\n\t* doc/gmp.texi (Reentrancy): Note also gmp_randinit_default as an\n\talternative to gmp_randinit.\n\n2004-04-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* configfsf.guess: Update to 2004-03-12.\n\t* configfsf.sub: Likewise.\n\n2004-04-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/rrandomb.c (gmp_rrandomb): Rework to avoid extra limb allocation\n\tand to generate even numbers.\n\t* mpn/generic/random2.c (gmp_rrandomb): Likewise.\n\n2004-04-25  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (FORCE_DOUBLE): Don't use an asm with a match constraint\n\ton a memory output, apparently not supported and provokes a warning\n\tfrom gcc 3.4.\n\n2004-04-24  Kevin Ryde  <kevin@swox.se>\n\n\t* longlong.h (count_leading_zeros_gcc_clz,\n\tcount_trailing_zeros_gcc_ctz): New macros.\n\t(count_leading_zeros, count_trailing_zeros) [x86]: Use them on gcc\n\t3.4.\n\n\t* configure.in (x86-*-* gcc_cflags_cpu): Give a -mtune at the start of\n\teach option list, for use by gcc 3.4 to avoid deprecation warnings\n\tabout -mcpu.\n\n\t* mpz/aorsmul.c, mpz/aorsmul_i.c, mpz/cfdiv_q_2exp.c,\n\tmpz/cfdiv_r_2exp.c, mpq/aors.c, mpf/ceilfloor.c: Give REGPARM_ATTR()\n\ton function definition too, as demanded by gcc 3.4.\n\n2004-04-22  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/rand/t-lc2exp.c (check_bigc1): New test.\n\n\t* doc/fdl.texi: Tweak @appendixsubsec -> @appendixsec to match our\n\tpreference for this in an @appendix, and because texi2pdf doesn't\n\tsupport @appendixsubsec directly within an @appendix.\n\n2004-04-20  Kevin Ryde  <kevin@swox.se>\n\n\t* doc/texinfo.tex: Update to 2004-04-07.08 from texinfo 4.7.\n\t* doc/gmp.texi, mpfr/mpfr.texi (@copying): Don't put a line break in\n\t@ref within @copying, recent texinfo.tex doesn't like that.\n\n\t* demos/perl/GMP.xs (static_functable): Treat cygwin the same as mingw\n\tDLLs.\n\n\t* */Makefile.in, install-sh: Update to automake 1.8.3.\n\t* ltmain.sh, aclocal.m4, configure: Update to libtool 1.5.6.\n\n\t* gmp-impl.h (LIMB_HIGHBIT_TO_MASK): Use a compile-time constant\n\texpression, rather than a configure test.\n\t* acinclude.m4, configure.in (GMP_C_RIGHT_SHIFT): Remove, no longer\n\tneeded.\n\t* tests/t-hightomask.c: New file.\n\t* tests/Makefile.am (check_PROGRAMS): Add it.\n\n\t* macos/configure (parse_top_configure): Look for PACKAGE_NAME and\n\tPACKAGE_VERSION now used by autoconf.\n\t(what_objects): Only demand 9 object files, as for instance occurs in\n\tthe scanf directory.\n\t(asm files): Transform labels L(foo) -> Lfoo.  Take func name from\n\tPROLOGUE to support empty \"EPILOGUE()\".  Recognise and subsitute\n\tregister name \"define()\"s.\n\t* macos/Makefile.in (CmnObjs): Add tal-notreent.o.\n\n2004-04-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/speed.h (SPEED_ROUTINE_MPN_ROOTREM): New #define.\n\t(speed_mpn_rootrem): Declare.\n\t* tune/common.c (speed_mpn_rootrem): New function.\n\t* tune/speed.c (routine): Add entry for mpn_rootrem.\n\n2004-04-16  Kevin Ryde  <kevin@swox.se>\n\n\t* doc/fdl.texi: Update from FSF, just fixing a couple of typos.\n\n\t* macos/configure, macos/Makefile.in: Add printf and scanf directories.\n\n\t* tests/mpz/t-gcd.c (check_data): New function, exercising K6\n\tgcd_finda bug.\n\n2004-04-14  Kevin Ryde  <kevin@swox.se>\n\n\t* doc/gmp.texi (Reentrancy, Random State Initialization): Note\n\tgmp_randinit use of gmp_errno is not thread safe.  Reported by Vincent\n\tLefèvre.\n\n\t* doc/gmp.texi (Random State Initialization): Add index entries for\n\tgmp_errno and constants.\n\n\t* mpn/m68k/README: Update _SHORT_LIMB -> __GMP_SHORT_LIMB.\n\n\t* configure.in (--enable-mpbsd): Typo Berkley -> Berkeley in help msg.\n\n2004-04-12  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/perl/GMP.xs (static_functable): New macro, use it for all\n\tfunction tables, to support mingw DLL builds.\n\t* demos/perl/INSTALL (NOTES FOR PARTICULAR SYSTEMS): Remove note on\n\tDLLs, should be ok now.\n\n\t* demos/perl/sample.pl: Print the module and library versions in use.\n\n\t* demos/perl/GMP.pm, Makefile.PL (VERSION): Set to '2.00'.\n\t* demos/perl/GMP.pm (COPYRIGHT): New in the doc section.\n\n\t* Makefile.am: Note 4.1.3 libtool versioning info, and REVISION policy.\n\n\t* tal-debug.c: Add <stdlib.h> for abort.\n\n2004-04-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/refmpf.c (refmpf_add_ulp): Adjust exponent when needed.\n\n\t* mpn/generic/random2.c: Rewrite (clone mpz/rrandomb.c).\n\n2004-04-07  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/k6/gcd_finda.asm: Correction jbe -> jb in initial setups.\n\tZero flag is wrong here, it relects only the high limb of the compare,\n\tleading to n1>=n2 not satisfied and wrong results.  cp[1]==0x7FFFFFFF\n\twith cp[0]>=0x80000001 provokes this.\n\n\t* doc/gmp.texi (BSD Compatible Functions): Note \"pow\" name clash under\n\tthe pow function description too.\n\t(Language Bindings): Add XEmacs (betas at this stage).  Reported by\n\tJerry James.\n\n\t* tests/refmpn.c (refmpn_mod2): Correction to ASSERTs, r==a is allowed.\n\n\t* gen-psqr.c (generate_mod): Cast mpz_invert_ui_2exp args, for K&R.\n\t* gen-bases.c, gen-fib.c, gen-psqr.c: For mpz_out_str, use stdout\n\tinstead of 0, in case a K&R treats int and FILE* params differently.\n\n2004-04-04  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (BSWAP_LIMB) [amd64]: New macro.\n\t(FORCE_DOUBLE): Use this for amd64 too.\n\n\t* tests/amd64check.c, tests/amd64call.asm: New files, derived in part\n\tfrom x86check.c and x86call.asm.\n\t* tests/Makefile.am (EXTRA_libtests_la_SOURCES): Add them.\n\t* configure.in (x86_64-*-* ABI=64): Use them.\n\n2004-04-03  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/amd64/mode1o.asm: New file.\n\t* mpn/amd64/amd64-defs.m4 (ASSERT): New macro.\n\n\t* mpn/x86/k7/mmx/divrem_1.asm, mpn/x86/pentium4/sse2/divrem_1.asm: Add\n\tnote on how \"dr\" part of algorithm is handled.\n\n\t* mpn/x86/k7/dive_1.asm, mpn/x86/k7/mod_34lsub1.asm,\n\tmpn/x86/k7/mode1o.asm: Note Hammer (32-bit mode) speeds.\n\n2004-03-31  Kevin Ryde  <kevin@swox.se>\n\n\t* doc/gmp.texi (Language Bindings): Add GOO, MLGMP and Numerix.\n\n\t* mpf/mul_2exp.c, mpf/div_2exp.c: Rate u==0 as UNLIKELY.\n\n2004-03-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/divrem_1.c: Fix typo.\n\n\t* mpn/generic/sqr_basecase.c: Fix typo.\n\n2004-03-20  Kevin Ryde  <kevin@swox.se>\n\n\t* longlong.h (power, powerpc): Add comments on how we select this code.\n\n\t* gmp-h.in (mpz_get_ui): Use ?: instead of mask style, gcc treats the\n\ttwo identically but ?: is a bit clearer.\n\n\t* insert-dbl.c: Remove file, no longer used, scaling is now integrated\n\tin mpn_get_d.\n\t* Makefile.am (libgmp_la_SOURCES): Remove insert-dbl.c.\n\t* gmp-impl.h (__gmp_scale2): Remove prototype.\n\n2004-03-17  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/fat/fat.c (__gmpn_cpuvec_init, fake_cpuid_table): Add x86_64.\n\n\t* mpq/get_d.c: Use mpn_tdiv_qr, demand den>0 per canonical form.\n\n2004-03-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/sqr_basecase.c: Add versions using mpn_addmul_2 and\n\tmpn_addmul_2s.\n\n2004-03-14  Kevin Ryde  <kevin@swox.se>\n\n\t* mpf/mul_ui.c: Incorporate carry from low limbs, for exactness.\n\t* tests/mpf/t-mul_ui.c: New file.\n\t* tests/mpf/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpf/div.c: Use mpn_tdiv_qr.  Use just one TMP_ALLOC.  Use full\n\tdivisor, since truncating can lose accuracy.\n\t* tests/mpf/t-div.c: New file.\n\t* tests/mpf/Makefile.am (check_PROGRAMS): Add it.\n\n\t* tests/mpf/t-set_q.c, tests/mpf/t-ui_div.c (check_various): Amend\n\tbogus 99/4 test.\n\t* tests/mpf/t-ui_div.c (check_rand): Exercise r==v overlap.\n\n\t* tests/refmpf.c, tests/tests.h (refmpf_set_overlap): New function.\n\n\t* mpf/cmp_si.c [nails]: Correction, cast vval in exp comparisons, for\n\twhen vval=-0x800..00 and limb==longlong.\n\n\t* mpf/cmp_si.c [nails]: Correction, return usign instead of 1 when\n\tuexp==2 but value bigger than an mp_limb_t.\n\t* tests/mpf/t-cmp_si.c (check_data): Add test cases.\n\n\t* tests/trace.c (mpf_trace): Use ABS(mp_trace_base) to allow for\n\tnegative bases used for upper case hex in integer traces.\n\n2004-03-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/sb_divrem_mn.c: Correct header comment.\n\n2004-03-11  Kevin Ryde  <kevin@swox.se>\n\n\t* aclocal.m4, configure, ltmain.sh: Downgrade to libtool 1.5, version\n\t1.5.2 doesn't remove .libs/*.a files when rebuilding, which is bad for\n\tdevelopment when changing contents or with duplicate named files like\n\twe have.\n\n\tRevert this, ie restore AR_FLAGS=cq:\n\t* acinclude.m4 (GMP_PROG_AR): Remove AR_FLAGS=cq, libtool 1.5.2 now\n\tdoes this itself on detecting duplicate object filenames in piecewise\n\tlinking mode.\n\n\t* randbui.c, randmui.c [longlong+nails]: Correction to conditionals\n\tfor second limb.\n\n\t* mpz/aors_ui.h, mpz/cdiv_q_ui.c, mpz/cdiv_qr_ui.c, mpz/cdiv_r_ui.c,\n\tmpz/cdiv_ui.c, mpz/fdiv_q_ui.c, mpz/fdiv_qr_ui.c, mpz/fdiv_r_ui.c,\n\tmpz/fdiv_ui.c, mpz/gcd_ui.c, mpz/iset_ui.c, mpz/lcm_ui.c,\n\tmpz/set_ui.c, mpz/tdiv_q_ui.c, mpz/tdiv_qr_ui.c, mpz/tdiv_r_ui.c,\n\tmpz/tdiv_ui.c, mpz/ui_sub.c, mpf/div_ui.c, mpf/mul_ui.c\n\t[longlong+nails]: Amend #if to avoid warnings about shift amount.\n\n2004-03-07  Kevin Ryde  <kevin@swox.se>\n\n\t* mpf/reldiff.c: Use rprec+ysize limbs for d, to ensure accurate\n\tresult.  Inline mpf_abs(d,d) and mpf_cmp_ui(x,0), and rate the latter\n\tUNLIKELY.\n\n\t* mpf/ui_div.c: Use mpn_tdiv_qr.  Use just one TMP_ALLOC.  Use full\n\tdivisor, since truncating can lose accuracy.\n\t* tests/mpf/t-ui_div.c: New file.\n\t* tests/mpf/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpf/set_q.c: Expand TMP_ALLOC_LIMBS_2, to make conditional clearer\n\tand avoid 1 limb alloc when not wanted.\n\n\t* gmp-impl.h (WANT_TMP_DEBUG): Define to 0 if not defined.\n\t(TMP_ALLOC_LIMBS_2): Use \"if\" within macro rather than \"#if\", for less\n\tpreprocessor conditionals.\n\n\t* mpf/mul_2exp.c, mpf/div_2exp.c: Add some comments.\n\n\t* tests/refmpn.c (refmpn_sb_divrem_mn, refmpn_tdiv_qr): Nailify.\n\n2004-03-04  Kevin Ryde  <kevin@swox.se>\n\n\t* gen-psqr.c (print): Add CNST_LIMB in PERFSQR_MOD_TEST, for benefit\n\tof K&R.\n\t* tests/mpn/t-perfsqr.c (PERFSQR_MOD_1): Use CNST_LIMB for K&R.\n\n\t* doc/configuration (Configure): Remove mkinstalldirs, no longer used.\n\n\t* acinclude.m4 (GMP_PROG_AR): Remove AR_FLAGS=cq, libtool 1.5.2 now\n\tdoes this itself on detecting duplicate object filenames in piecewise\n\tlinking mode.\n\n\t* configure.in (hppa2.0*-*-*): Test sizeof(long) == 4 or 8 to verify\n\tABI=2.0n versus ABI=2.0w.  In particular this lets CC=cc_bundled\n\tcorrectly fall back to ABI=2.0n (we don't automatically add CC=+DD64\n\tto that compiler, currently).\n\n\t* doc/gmp.texi (Reentrancy): Note C++ mpf_class constructors using\n\tglobal default precision.\n\t(Random State Miscellaneous): Describe gmp_urandomb_ui as giving N\n\tbits.\n\t(C++ Interface Floats): Describe operator= copying the value, not the\n\tprecision, and what this can mean about copy constructor versus\n\tdefault constructor plus assignment.\n\n\t* mpf/set_q.c: Use mpn_tdiv_qr rather than mpn_divrem, so no shifting.\n\tDon't truncate the divisor, it can make the result inaccurate.\n\t* tests/mpf/t-set_q.c: New file.\n\t* tests/mpf/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpf/set.c: Use MPN_COPY_INCR, in case r==u and ABSIZ(u) > PREC(r)+1.\n\tNo actual bug here, because MPN_COPY has thusfar been an alias for\n\tMPN_COPY_INCR, only an ASSERT failure.\n\t* tests/mpf/t-set.c: New file.\n\t* tests/mpf/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpf/set.c, mpf/iset.c: Do MPN_COPY last, for possible tail call.\n\n\t* mpf/set_d.c: Rate d==0 as UNLIKELY.  Store size before extract call,\n\tto shorten lifespan of \"negative\".\n\n\t* mpf/init.c, mpf/init2.c, mpf/iset_d.c, mpf/iset_si.c,\n\tmpf/iset_str.c, mpf/iset_ui.c: Store prec before alloc call, for one\n\tless live quantity across that call.\n\t* mpf/init.c, mpf/init2.c, mpf/iset_str.c: Store size and exp before\n\talloc call, to overlap with other operations.\n\n\t* tests/refmpf.c, tests/tests.h (refmpf_fill, refmpf_normalize,\n\trefmpf_validate, refmpf_validate_division): New functions.\n\n\t* tests/refmpn.c, tests/tests.h (refmpn_copy_extend,\n\trefmpn_lshift_or_copy_any, refmpn_rshift_or_copy_any): New functions.\n\n\t* tal-debug.c: Add <string.h> for strcmp.\n\n\t* tests/cxx/t-istream.cc (check_mpz, check_mpq, check_mpf): Use size_t\n\tfor loop index, to quieten g++ warning.\n\n2004-03-01  Torbjorn Granlund  <tege@swox.com>\n\n\tWith Karl Hasselström:\n\t* mpn/generic/dc_divrem_n.c (mpn_dc_div_2_by_1): New function, with\n\tmeat from old mpn_dc_divrem_n.  Accept scratch parameter.  Rewrite to\n\tavoid a recursive call.\n\t(mpn_dc_div_3_by_2): New function, with meat from old\n\tmpn_dc_div_3_halves_by_2.  Accept scratch parameter.\n\t(mpn_dc_divrem_n): Now just allocate scratch space and call new\n\tmpn_dc_div_2_by_1.\n\n2004-02-29  Kevin Ryde  <kevin@swox.se>\n\n\t* longlong.h (count_leading_zeros) [alpha gcc]: New version, inlining\n\tmpn/alpha/cntlz.asm cmpbge technique.\n\n\t* aclocal.m4, configure, install-sh, missing, ltmain.sh,\n\t*/Makefile.in: Update to automake 1.8.2 and libtool 1.5.2.\n\n\t* doc/gmp.texi (C++ Interface Integers): Note / and % rounding follows\n\tC99 / and %.\n\t(Exact Remainder): Index entries for divisibility testing algorithm.\n\n\t* tune/time.c (speed_endtime): Return 0.0 for negative time measured.\n\tRevise usage comments for clarity.\n\t* tune/common.c (speed_measure): Recognise speed_endtime 0.0 for\n\tfailed measurement.\n\n\t* tests/mpn/t-get_d.c (check_rand): Correction to nhigh_mask setup.\n\n2004-02-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/tuneup.c (tune_dc, tune_set_str): Up param.step_factor.\n\n\t* tests/mpz/t-gcd.c: Decrease # of tests to 50.\n\n2004-02-27  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/devel/try.c: Add a comment that this is not for Cray systems.\n\n\t* mpf/set_q.c: Don't support den(q)<0, demand canonical form in the\n\tusual way.\n\n2004-02-24  Torbjorn Granlund  <tege@swox.com>\n\n\tFrom Kevin:\n\t* mpn/generic/mul_fft.c (mpn_fft_add_modF): Loop until normalization\n\tcriterion met.\n\n2004-02-22  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS, GMP_OS_X86_XMM, GMP_PROG_CXX_WORKS):\n\tRemove files that might look like compiler output, so our \"||\"\n\talternatives are not fooled.\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): Add test for lshift_com code\n\tmis-compiled by certain IA-64 HP cc at +O3.\n\n\t* gmp-impl.h (USE_LEADING_REGPARM): Disable under prof or gprof, for\n\tthe benefit of freebsd where .mcount clobbers registers.  Spotted by\n\tTorbjorn.\n\t* configure.in (WANT_PROFILING_PROF, WANT_PROFILING_GPROF): New\n\tAC_DEFINEs.\n\n2004-02-21  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (sparc64-*-*bsd*): Amend -m32 setup for ABI=32, so it's\n\tnot used in ABI=64 on the BSD systems.\n\n2004-02-18  Niels Möller  <nisse@lysator.liu.se>\n\n\t* tests/mpz/t-gcd.c (gcdext_valid_p): New function.\n\t(ref_mpz_gcd): Deleted function.\n\t(one_test): Rearranged to call mpz_gcdext first, so that the\n\treturned value can be validated.\n\t(main): Don't use ref_mpz_gcd.\n\n2004-02-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (MPN_TOOM3_MAX_N): Move to !WANT_FFT section.\n\n\t* tests/mpz/t-mul.c: Exclude special huge operands unless WANT_FFT.\n\n\t* mpz/rrandomb.c (gmp_rrandomb): Rewrite.\n\n\t* mpn/generic/mul_n.c (mpn_toom3_sqr_n): Remove write-only variable c5.\n\n2004-02-18  Kevin Ryde  <kevin@swox.se>\n\n\t* mpf/iset_si.c, mpf/iset_ui.c, mpf/set_si.c, mpf/set_ui.c [nails]:\n\tAlways store second limb, to avoid a conditional.\n\n\t* tests/mpf/t-get_ui.c: New file.\n\t* tests/mpf/Makefile.am (check_PROGRAMS): Add it.\n\t* tests/mpf/t-get_si.c (check_limbdata): Further tests.\n\t* gmp-impl.h (MP_EXP_T_MAX, MP_EXP_T_MIN): New defines.\n\n\t* mpf/get_ui.c, mpf/get_si.c: Remove size==0 test, it's covered by\n\tother conditions.  Attempt greater clarity by expressing conditions as\n\tbased on available data range.\n\t* mpf/get_si.c [nails]: Correction, don't bail on exp > abs_size,\n\tsince may still have second limb above radix point available.\n\t* mpf/get_ui.c: Nailify.\n\n2004-02-16  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/scan0.c, mpz/scan1.c: Use count_trailing_zeros, instead of\n\tcount_leading_zeros on limb&-limb.\n\n\t* mpf/sqrt.c: Use \"/ 2\" for exp, avoiding C undefined behaviour on\n\t\">>\" of negatives.  Correction to comment, exp is rounded upwards.\n\tSIZ(r) always prec now, no need for tsize expression.  Store EXP(r)\n\tand SIZ(r) where calculated to reduce variable lifespans.  Make tsize\n\tmp_size_t not mp_exp_t, though of course those are currently the same.\n\n\t* gmp-h.in (GMP_ERROR_ALLOCATE, GMP_ERROR_BAD_STRING,\n\tGMP_ERROR_UNUSED_ERROR): Remove, never used or documented, and we\n\tdon't want to use globals for communicating error information.\n\n\t* mpz/gcd_ui.c [nails]: Correction, actually return a value.\n\n\t* mpn/generic/addmul_1.c, mpn/generic/submul_1.c [nails==1]: Add code.\n\n2004-02-15  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpz/t-jac.c (check_data): Remove unnecessary variable\n\t\"answer\".\n\n2004-02-14  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_C_RIGHT_SHIFT): Note that it's \"long\"s which we're\n\tconcerned about.\n\n\t* mpn/generic/mul_n.c: Add some remarks about toom3 high zero\n\tstripping.\n\n\t* mpn/generic/scan0.c, mpn/generic/scan1.c: Remove design issue\n\tremarks.  What to do about going outside `up' space is a problem, but\n\tanything to address it would be an incompatible change.\n\n2004-02-11  Kevin Ryde  <kevin@swox.se>\n\n\t* longlong.h (power, powerpc): Use HAVE_HOST_CPU_FAMILY_power and\n\tHAVE_HOST_CPU_FAMILY_powerpc rather than various cpp defines.\n\n\t* gmp-impl.h: Add remarks about limits.h and Cray etc.\n\n\t* dumbmp.c (mpz_mul): Set ALLOC(r) for new data block used.  Reported\n\tby Jason Moxham.\n\n\t* mpn/pa32/README, mpn/pa64/README (REFERENCES): New sections.\n\n2004-02-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-gcd.c: Decrease # of tests run.\n\n\t* mpn/*/gmp-mparam.h: Add HGCD values, update TOOM values.\n\n2004-02-01  Torbjorn Granlund  <tege@swox.com>\n\n\tFrom Kevin:\n\t* config.guess: Recognize AMD's hammer processors, return x86_64.\n\n2004-01-24  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpf/t-sqrt.c (check_rand1): Further diagnostic printouts.\n\n\t* mpn/generic/sqrtrem.c (mpn_sqrtrem): Add ASSERT_MPN.\n\t(mpn_dc_sqrtrem): Add casts for K&R.\n\n\t* mpf/sqrt_ui.c: Nailify.\n\n\t* mpf/set_z.c: Do MPN_COPY last, for possible tail call.\n\n\t* doc/gmp.texi (Miscellaneous Float Functions): For mpf_random2, note\n\texponent is in limbs.\n\n2004-01-22  Kevin Ryde  <kevin@swox.se>\n\n\t* mpf/sqrt.c: Change tsize calculation to get prec limbs result\n\talways, previously got prec+1 when exp was odd.\n\t* tests/mpf/t-sqrt.c (check_rand1): New function, code from main.\n\t(check_rand2): New function.\n\n\t* mpf/sqrt_ui.c: Change rsize calculation to get prec limbs result,\n\tpreviously got prec+1.\n\t* tests/mpf/t-sqrt_ui.c: New file.\n\t* tests/mpf/Makefile.am (check_PROGRAMS): Add it.\n\n\t* tests/refmpf.c, tests/tests.h (refmpf_add_ulp,\n\trefmpf_set_prec_limbs): New functions.\n\n\t* mpz/get_d_2exp.c, mpf/get_d_2exp.c: Remove x86+m68k force to double,\n\tmpn_get_d now does this.  Remove res==1.0 check for round upwards,\n\tmpn_get_d now rounds towards zero.  Move exp store to make mpn_get_d a\n\ttail call.\n\n\t* configure.in (x86-*-*): Use ABI=32 rather than ABI=standard.\n\tUse gcc -m32 when available, to force mode on bi-arch amd64 gcc.\n\t* configure.in, acinclude.m4 (x86_64-*-*): Merge into plain x86 setups\n\tas ABI=64.  Support ABI=32, using athlon code.  Use gcc -mcpu=k8,\n\t-march=k8.\n\t(amd64-*-*): Remove pattern, config.sub only gives x86_64.\n\t* doc/gmp.texi (ABI and ISA): Add x86_64 dual ABIs.\n\n\t* mpn/amd64/README: Add reference to ABI spec.\n\n2004-01-17  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/README: Remove open/mpn versions of toom3, no longer exist.\n\t* tune/powerpc64.asm: Remove unused L(again).\n\t* tune/time.c (mftb): Note single mftb possible for powerpc64.\n\n\t* mpn/generic/mode1o.c: Use \"c<s\" to do underflow detection in last\n\tstep, for better parallelism.\n\n\t* mpn/generic/get_d.c: Preserve comments about hppa fcnv,udw,dbl from\n\tprevious mpz_get_d code.\n\n\t* tune/freq.c: Add some comments about systems not covered.\n\n\t* gmp-h.in (_GMP_H_HAVE_FILE): Add _MSL_STDIO_H for Metrowerks.\n\tReported by Tomas Zahradnicky.\n\n2004-01-16  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (powerpc64-*-linux*): Try gcc64.  Try -m64 with\n\t\"cflags_maybe\" to get it used in all probing.  Add sizeof-long-8 test\n\tto check the mode is right if -m64 is not applicable.\n\n2004-01-15  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (--with-readline=detect): Check for readline/readline.h\n\tand readline/history.h.  Report result of detection.\n\n2004-01-12  Niels Möller  <nisse@lysator.liu.se>\n\n\t* tests/tests.h: Added refmpn_free_limbs prototype.\n\t* tests/refmpn.c (refmpn_free_limbs): New function.\n\n2004-01-11  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/alpha/ev6/slot.pl: New file, derived in part from\n\tmpn/x86/k6/cross.pl.\n\n\t* mpn/alpha/alpha-defs.m4 (ASSERT): New macro.\n\n\t* mpn/asm-defs.m4 (m4_ifdef): New macro, avoiding OSF 4.0 m4 bug.\n\t(m4_assert_defined): Use it.\n\n\t* mpn/alpha/default.m4, mpn/alpha/unicos.m4 (LDGP): New macro.\n\t* mpn/alpha/ev67/gcd_1.asm: Use it to re-establish gp after jsr.\n\n\t* configure.in, demos/calc/Makefile.am: Use -lcurses or -lncurses with\n\treadline, when available.\n\n\t* longlong.h (sub_ddmmss) [generic]: Use al<bl for the borrow rather\n\tthan __x>al, since the former can be done without waiting for __x,\n\thelping superscalar chips, in particular alpha ev5 and ev6.\n\n\t* longlong.h (sub_ddmmss) [ia64]: New macro.\n\n\t* tests/t-sub.c: New file.\n\t* tests/Makefile.am (check_PROGRAMS): Add it.\n\t* tests/refmpn.c, tests/tests.h (refmpn_sub_ddmmss): New function.\n\n2004-01-09  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/p6/mod_34lsub1.asm: New file, derived in part from\n\tmpn/x86/mod_34lsub1.asm.\n\n\t* configure.in (IA64_PATTERN): Use -mtune on gcc 3.4.\n\n2004-01-07  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in, mp-h.in (__GMP_SHORT_LIMB): Renamed from _SHORT_LIMB, to\n\tkeep in our namespace.  (Not actually used anywhere currently.)\n\tReported by Patrick Pelissier.\n\n\t* mp-h.in: Use \"! defined (__GMP_WITHIN_CONFIGURE)\" in the same style\n\tas gmp-h.in (though mp-h.in is not actually used during configure).\n\n\t* mp-h.in (__GMP_DECLSPEC_EXPORT, __GMP_DECLSPEC_IMPORT) [__GNUC__]:\n\tUse __dllexport__ and __dllimport__ to keep out of application\n\tnamespace.  Same previously done in gmp-h.in.\n\n2004-01-06  Kevin Ryde  <kevin@swox.se>\n\n\t* configfsf.sub, configfsf.guess: Update to 2004-01-05.\n\t* configure.in (amd64-*-* | x86_64-*-*): Update comments on what\n\tconfigfsf.sub does.\n\n2004-01-04  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/alpha/README (REFERENCES): Add tru64 assembly manuals.\n\t(ASSEMBLY RULES): Note what gcc says about !literal! etc.\n\n2004-01-03  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/alpha/ev67/gcd_1.asm: New file.\n\n\t* mpn/x86/pentium4/sse2/rsh1add_n.asm: New file, derived in part from\n\tmpn/x86/pentium4/sse2/addlsh1_n.asm.\n\n\t* mpn/x86/p6/p3mmx/popham.asm: Note measured speeds.\n\n\t* mpn/x86/pentium4/sse2/addlsh1_n.asm (PARAM_CARRY): Remove macro, not\n\tused, no such parameter.\n\n\t* mpn/generic/gcd.c: Use <stdio.h> for NULL.\n\n\t* doc/gmp.texi (Single Limb Division): Correction to tex expression\n\tfor (1/2)x1.  And minor wording tweaks elsewhere.\n\n\t* gmp-impl.h (mpn_rsh1add_n, mpn_rsh1sub_n): Correction to comments\n\tabout how carries returned.\n\n\t* longlong.h (umul_ppmm) [generic]: Add comments about squaring\n\t(dropped from tasks list)\n\n2003-12-31  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/perl/GMP.xs (scan0, scan1): Return ~0 for not-found.\n\t* demos/perl/GMP.pm: Describe this, remove the note about ULONG_MAX\n\tbeing the same as ~0 (which is not true in old perl).\n\t* demos/perl/test.pl: Update tests.\n\t* demos/perl/typemap (gmp_UV): New type.\n\n\t* demos/perl/test.pl (fits_slong_p): Comment out uv_max test, it won't\n\tnecessarily exceed a long.\n\n\t* demos/perl/GMP.pm: Add a remark about get_str to the bugs section.\n\n\t* mpn/generic/sqrtrem.c, mpz/fac_ui.c, tests/mpf/reuse.c: Add casts\n\tfor K&R.\n\t* tests/mpf/t-muldiv.c: Make ulimb, vlimb into ulongs, which is how\n\tthey're used, for the benefit of K&R calling.\n\n\t* doc/gmp.texi (Square Root Algorithm): Add a summary of the algorithm.\n\tAnd add further index entries in various places.\n\n\t* mpz/lucnum_ui.c, mpz/lucnum2_ui.c: Use mpn_addlsh1_n when available.\n\n\t* gmp-impl.h, mpn/generic/mul_n.c (mpn_addlsh1_n, mpn_sublsh1_n,\n\tmpn_rsh1add_n, mpn_rsh1sub_n): Move descriptions to gmp-impl.h with\n\tthe prototypes, for ease of locating.\n\n2003-12-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-gcd.c: Rewrite, based on suggestions by Kevin.\n\n\t* mpn/sparc64/README: Remove mpn_Xmul_2, done.\n\tAdd blurb about L1 cache conflicts.\n\n2003-12-29  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/mul_fft.c, mpz/root.c, mpq/cmp_ui.c: Add casts for K&R.\n\n2003-12-27  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpz/t-mul.c (mul_kara, mul_basecase): Use __GMP_PROTO.\n\n\t* Makefile.am (AUTOMAKE_OPTIONS): Restore this, giving no directory on\n\tansi2knr to avoid a circular build rule.\n\t* configure.in (AM_INIT_AUTOMAKE): Note options also in Makefile.am.\n\n\t* configure.in (cflags_maybe): Don't loop adding cflags_maybe if the\n\tuser has set CFLAGS.\n\n2003-12-23  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (*sparc*-*-*): Test sizeof(long)==4 or 8 for ABIs, to\n\tget the right mode when the user sets the CFLAGS.\n\t(testlist): Introduce \"any_<abi>_testlist\" to apply to all compilers.\n\n\t* demos/perl/typemap (MPZ_ASSUME, MPQ_ASSUME, MPF_ASSUME): Remove\n\toutput rules, these are only meant for inputs.\n\t(MPZ_MUTATE): Remove, not used since changes for magic.\n\n\t* demos/perl/GMP.xs (mpz_class_hv, mpq_class_hv, mpf_class_hv): New\n\tvariables, initialized in BOOT.\n\t* demos/perl/GMP.xs, demos/perl/typemap: Use them and explicit\n\tsv_bless, to save a gv_stashpv for every new object.\n\n2003-12-22  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/alpha/mode1o.c, mpn/alpha/dive_1.c: Moved from ev5/mode1o.c and\n\tev5/dive_1.c, these are good for ev4, and would like them in a generic\n\talpha build.\n\n2003-12-21  Kevin Ryde  <kevin@swox.se>\n\n\t* doc/gmp.texi (Integer Logic and Bit Fiddling): Say \"bitwise\" in\n\tmpz_and, mpz_ior and mpz_xor, to avoid any confusion with what C means\n\tby \"logical\".  Reported by Rüdiger Schütz.\n\n\t* gmp-h.in (_GMP_H_HAVE_FILE): Note why defined(EOF) is not good.\n\n2003-12-20  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/speed.h (SPEED_TMP_ALLOC_LIMBS): Correction to last change,\n\tdon't want \"- 1\" on the TMP_ALLOC_LIMBS.\n\n\t* demos/expr/expr.h: Test #ifdef MPFR_VERSION_MAJOR for when mpfr.h is\n\tincluded, not GMP_RNDZ which is now an enum.\n\n\t* demos/expr/exprfra.c (e_mpfr_ulong_p): Use mpfr_integer_p and\n\tmpfr_fits_ulong_p.\n\t(e_mpfr_get_ui_fits): Use mpfr_get_ui.\n\n\t* mpfr/*: Update to mpfr cvs head 2003-12-20.\n\n\t* configure, config.in: Update to autoconf 2.59.\n\t* */Makefile.in, configure, aclocal.m4, ansi2knr.c, install-sh,\n\tdoc/mdate-sh: Update to automake 1.8.\n\n\t* mkinstalldirs: Remove, not required by automake 1.8.\n\t* doc/gmp.texi (Build Options): HTML is a usual target in automake 1.8.\n\n\t* configure.in (AC_PREREQ): Require autoconf 2.59.\n\t(AM_INIT_AUTOMAKE): Require automake 1.8.\n\t(AC_C_INLINE): Use rather than GMP_C_INLINE, now has #ifndef\n\t__cplusplus we want.\n\t(gettimeofday): Use AC_CHECK_FUNCS rather than our workaround code,\n\tautoconf now ok.\n\n\t* acinclude.m4 (GMP_C_INLINE): Remove.\n\t(GMP_H_EXTERN_INLINE): Use AC_C_INLINE.\n\t(GMP_PROG_AR): Comment on automake $ARFLAGS.\n\n2003-12-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpf/t-get_d.c: Print message before aborting.\n\n\t* mpn/generic/get_d.c: Make ONE_LIMB case actually work for nails.\n\n2003-12-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/get_d.c: Don't include longlong.h.\n\n\t* tests/mpz/t-mul.c (ref_mpn_mul): Handle un == vn specially, to avoid\n\ta dummy r/w outside of allocated area.\n\n2003-12-18  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/alpha/unicos.m4 (ALIGN): Add comments on what GCC does.\n\n\t* configure.in (fat setups), acinclude.m4 (GMP_INIT): Obscure\n\tinclude() from automake 1.8 aclocal.\n\t* acinclude.m4: Quote names in AC_DEFUN, for automake 1.8 aclocal.\n\n2003-12-13  Kevin Ryde  <kevin@swox.se>\n\n\t* mpq/get_d.c: Amend comments per mpn_get_d change.\n\t(limb2dbl): Remove, no longer used.\n\n\t* gmp-impl.h (DIVREM_1_NORM_THRESHOLD etc) [nails]: Correction to\n\tcomments, MP_SIZE_T_MAX means preinv never.\n\n\t* gmp-impl.h (DIVEXACT_1_THRESHOLD, MODEXACT_1_ODD_THRESHOLD) [nails]:\n\tRemove overrides, divexact_1 and modexact_1 have been nailified.\n\n\t* mpz/inp_str.c (mpz_inp_str_nowhite): Use ASSERT_ALWAYS for EOF value\n\trequirement.\n\n\t* tests/refmpn.c (refmpn_rsh1add_n, refmpn_rsh1sub_n): Parens around\n\tGMP_NUMB_BITS - 1 with \">>\", to quieten gcc -Wall.\n\t* tests/t-constants.c (main), tests/t-count_zeros.c (check_clz),\n\ttests/t-modlinv.c (one), tests/mpz/t-jac.c (try_si_zi),\n\ttests/mpq/t-get_d.c (check_onebit): : Correction to printfs.\n\t* tests/mpn/t-fat.c: Add <string.h> for memcpy.\n\t* tests/mpz/t-scan.c (check_ref): Remove unused variable \"isigned\".\n\t* tests/mpq/t-get_d.c (check_onebit): Remove unused variable \"limit\".\n\t* tests/mpf/t-set_si.c, tests/mpf/t-set_ui.c (check_data): Braces for\n\tinitializers.\n\t* tests/devel/try.c (mpn_divexact_by3_fun, mpn_modexact_1_odd_fun):\n\tCorrection to return values.\n\n\t* doc/gmp.texi (Miscellaneous Integer Functions): Note mpz_sizeinbase\n\tcan be used to locate the most significant bit.  Reword a bit for\n\tclarity.\n\n2003-12-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h: Change asm => __asm__, tabify.\n\t* mpz/get_d_2exp.c: Likewise.\n\t* mpf/get_d_2exp.c: Likewise.\n\n\t* tests/cxx/t-ops.cc: #if .. #endif out tests that cause ambiguities.\n\n2003-12-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-gcd.c: Generate operands with sizes as a geometric\n\tprogression, to allow for larger operands and less varying timing.\n\n2003-12-10  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/perl/test.pl: Should be $] for perl version in old perl.\n\n\t* configure.in (sparc64-*-*): Single block of gcc configs for all\n\tsystems, on unknown systems try both ABI 32 and 64.\n\n\t* configure.in (LIBGMP_LDFLAGS, LIBGMPXX_LDFLAGS): New AC_SUBSTs with\n\toptions to generate .def files with windows DLLs.\n\t* Makefile.am (libgmp_la_LDFLAGS, libgmpxx_la_LDFLAGS): Use them.\n\n\t* mpn/generic/gcd.c: Use ABOVE_THRESHOLD / BELOW_THRESHOLD, to follow\n\tconvention and cooperate with tune/tuneup.c.\n\n\t* tune/common.c, tune/speed.c, tune/speed.h, tune/speed-ext.c,\n\ttune/tuneup.c (SPEED_TMP_ALLOC_LIMBS): Take variable as parameter\n\trather than returning a value, avoids alloca in a function call.\n\t* tune/common.c, tune/speed.h (speed_tmp_alloc_adjust): Remove, now\n\tinline in SPEED_TMP_ALLOC_LIMBS, and using ptr-NULL for alignment\n\textraction.\n\n\t* gmpxx.h (__gmp_binary_equal, __gmp_binary_not_equal,\n\t__gmp_binary_less, __gmp_binary_less_equal, __gmp_binary_greater,\n\t__gmp_binary_greater_equal, __gmp_cmp_function): Use mpfr_cmp_si and\n\tmpfr_cmp_d.\n\t* tests/cxx/t-ops.cc: Exercise this.\n\n\t* demos/perl/Makefile.PL: Don't install sample.pl and test2.pl.\n\n\t* demos/perl/GMP.xs (use_sv): Prefer PV over IV or NV to avoid any\n\trounding.\n\t* demos/perl/test.pl: Exercise this.\n\n\t* demos/perl/GMP/Mpf.pm (overload_string): Corrections to $# usage.\n\t* demos/perl/test.pl: Exercise this.\n\n2003-12-08  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/perl/GMP.pm: Correction to canonicalize example.\n\n\t* demos/perl/GMP.xs: New type check scheme, support magic scalars,\n\tsupport UV when available.  Remove some unused local variables.\n\t(coerce_long): Check range of double.\n\t(get_d_2exp): Remove stray printf.\n\n\t* demos/perl/test.pl: Exercise magic, rearrange to make it clearer\n\twhat's being tested.\n\n2003-12-07  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/tuneup.c (mul_toom3_threshold): Use MUL_TOOM3_THRESHOLD_LIMIT,\n\tfor the benefit of ASSERT in mpn_mul_n.\n\n\t* tune/tuneup.c (tune_mul): Correction to toom3 param.min_size, should\n\tuse MPN_TOOM3_MUL_N_MINSIZE.\n\n\t* tune/speed.c (check_align_option): Correction to printf format.\n\t* tune/freq.c (freq_sysctl_hw_model): Remove unused \"i\" variable.\n\n\t* scanf/doscan.c: Correction to a couple of trace printfs.\n\tAdd <stdlib.h> for strtol.\n\n\t* tests/misc/t-scanf.c (test_sscanf_eof_ok): New function.\n\t(check_misc): Use it to suppress tests broken by libc.\n\tAnd should be EOF rather than -1 in various places.\n\n2003-12-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/get_str.c (mpn_get_str, POW2_P case):\n\tDon't append extra '\\0' byte.\n\n2003-12-05  Niels Möller  <niels@lysator.liu.se>\n\n\t* tune/speed.h (mpn_gcd_accel): Added prototype.\n\n2003-12-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* randmt.c (__gmp_mt_recalc_buffer): Put parens around \"&\" expressions\n\tinside \"!=\".\n\n\t* mpf/get_str.c: Remove unused variable \"fracn\".\n\n2003-12-03  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in, Makefile.am (LIBGMP_LDFLAGS, LIBGMPXX_LDFLAGS): New\n\tAC_SUBSTs, use them to create .def files with Windows DLLs.\n\t* doc/gmp.texi (Notes for Particular Systems): Update notes on mingw\n\tDLL with MS C.\n\n\t* mpz/export.c: Allow NULL for countp.\n\t* doc/gmp.texi (Integer Import and Export): Describe this.\n\tSuggested by Jack Lloyd.\n\n\t* mpn/x86/p6/aors_n.asm: New file, grabbing the K7 code.\n\tSuperiority of this reported by Patrick Pelissier.\n\n2003-11-30  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/alpha/ev67/popcount.asm, mpn/alpha/ev67/hamdist.asm: New files.\n\n\t* mpn/alpha/ev67: New directory.\n\t* configure.in (alphaev67, alphaev68, alphaev7*): Use it.\n\n\t* doc/gmp.texi (GMPrefu, GMPpxrefu): Change back to plain ref and\n\tpxref, remove macros.\n\t(GMPreftopu, GMPpxreftopu): Remove URL parameter, rename to GMPreftop\n\tand GMPpxreftop.\n\t(Debugging): Remove debauch, seems to have disappeared.\n\t(Language Bindings): Corrections to URLs for CLN, Omni F77, Pike.\n\n2003-11-29  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/perl/GMP/Mpf.pm (overload_string): Use $OFMT to avoid warnings\n\tabout $#.\n\n\t* demos/perl/GMP.xs (fits_slong_p): Use LONG_MAX+1 to avoid possible\n\trounding of 0x7F..FF in a double on 64-bit systems.\n\n\t* configure.in (ppc601-*-*): Remove this case, it never matched\n\tanything, the name adopted is powerpc601.\n\t(powerpc601-*-*): Use gcc -mcpu=601, xlc -qarch=601.\n\n\t* configure.in: Introduce ${cc}_cflags_maybe, used if they work.\n\t(*sparc*-*-*) [ABI=32]: Add gcc_cflags_maybe=-m32 to force that mode.\n\n\t* doc/gmp.texi (Introduction to GMP): Add AMD64 to optimizations list.\n\t(Build Options): Add cpu types alphaev7 and amd64.  Update texinfo\n\thtml cross reference.\n\n2003-11-28  Niels Möller  <nisse@lysator.liu.se>\n\n\t* mpn/generic/gcd.c (MPN_LEQ_P): Copied macro definition (needed\n\tfor compilation with --enable-assert).\n\n2003-11-27  Niels Möller  <nisse@lysator.liu.se>\n\n\t* tests/mpz/t-gcd.c (gcd_values): Moved definition, so that we\n\tdon't need to forward declare the array.\n\n2003-11-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-gcd.c: Generate random operands up to 32767 bits;\n\tdecrease # of test to 1000.\n\t(gcd_values): Remove oversize test case.\n\n2003-11-26  Niels Möller  <niels@lysator.liu.se>\n\n\t* tests/mpz/t-gcd.c (main): Added some tests with non-random\n\tinput.\n\n2003-11-25  Niels Möller  <nisse@lysator.liu.se>\n\n\t* gmp-impl.h (MPN_LEQ_P, MPN_EXTRACT_LIMB): New macros.\n\n2003-11-25  Kevin Ryde  <kevin@swox.se>\n\n\t* doc/gmp.texi (Language Bindings): Add Axiom.\n\n2003-11-22  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/alpha/README: More notes on assembler syntax variations.\n\n\t* mpn/alpha/alpha-defs.m4, mpn/alpha/unicos.m4 (unop): Should be ldq_u\n\tnot bis, and move to alpha-defs.m4 since it can be happily used\n\teverywhere.\n\n\t* mpn/alpha/alpha-defs.m4, mpn/alpha/default.m4, mpn/alpha/unicos.m4\n\t(bigend): Move to alpha-defs.m4 and base it on HAVE_LIMB_BIG_ENDIAN or\n\tHAVE_LIMB_LITTLE_ENDIAN, so as not to hard code system endianness.\n\n\t* mpn/alpha/alpha-defs.m4: New file.\n\t* configure.in (alpha*-*-*): Use it.\n\n2003-11-21  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr-2-0-2-branch 2003-11-21.\n\n\t* mpn/alpha/ev5/com_n.asm: Change \"not\" to \"ornot r31\", since \"not\"\n\tisn't recognised by on Cray Unicos.  Add missing \"gp\" to PROLOGUE.\n\t* mpn/alpha/README: Add a note on \"not\".\n\n2003-11-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/aorslsh1_n.asm: Slightly rework feed-in code, avoiding\n\tspurious reads beyond operand limits.\n\n\t* mpn/alpha/ev5/com_n.asm: Add ASM_START/ASM_END.\n\n\t* mpn/generic/mul_fft.c (mpn_fft_zero_p): Remove unused function.\n\t(mpn_lshift_com): Make static, nailify properly.\n\n2003-11-19  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/diveby3.c: Use a \"q\" variable to make it clearer what\n\tthe code is doing.\n\n\t* mpn/powerpc32/750/lshift.asm, mpn/powerpc32/750/rshift.asm: New\n\tfiles.\n\n\t* mpn/alpha/ev5/com_n.asm: New file.\n\n\t* doc/gmp.texi (Assembler Functional Units, Assembler Writing Guide):\n\tNew sections by Torbjorn, tweaked by me.\n\n2003-11-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32: Add power4/powerpc970 cycle counts.\n\tUse cmpwi instead of cmpi to placate darwin.\n\n2003-11-15  Kevin Ryde  <kevin@swox.se>\n\n\t* config.guess: Add comments on MacOS \"machine\" command.\n\n\t* tests/devel/try.c (main): Use gmp_randinit_default explicitly on\n\t__gmp_rands, since RANDS doesn't allow seeding.\n\n\t* doc/gmp.texi (Assigning Integers): Remove notes on possible change\n\tto disallow whitespace, this would be an incompatible change and\n\treally can't be made.\n\t(Toom 3-Way Multiplication): Updates for Paul's new code.\n\n\t* mpn/generic/mul_n.c (toom3_interpolate, mpn_toom3_mul_n): Put\n\tif/else braces around whole of #if code, for readability.\n\n\t* tests/refmpn.c (refmpn_addlsh1_n, refmpn_sublsh1_n,\n\trefmpn_rsh1add_n, refmpn_rsh1sub_n): Add ASSERTs for operand overlaps\n\tetc.\n\n\t* mpfr/*: Update to mpfr-2-0-2-branch 2003-11-15.\n\n2003-11-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/aorslsh1_n.asm: Use Cray-friendly syntax for \"br\".\n\n2003-11-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/aorslsh1_n.asm: New file.\n\n2003-11-12  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): Add case provoking AIX power2\n\tassembler, test code by Torbjorn.\n\t* configure.in (power*-*-*): Add a comment about -mcpu=rios2 fallback.\n\n\t* tune/speed.c (main): Use gmp_randinit_default explicitly on\n\t__gmp_rands, since RANDS doesn't allow seeding.\n\n\t* mpfr/*: Update to mpfr-2-0-2-branch 2003-11-12.\n\n\t* gmp-impl.h, randmt.h (__gmp_randinit_mt_noseed): Move prototype to\n\tgmp-impl.h, for use by RANDS.\n\n\t* mpn/Makeasm.am (.s, .S, .asm): Quote $< in test -f, per automake.\n\t(.obj): Use test -f and $(CYGPATH_W) as per automake.\n\n2003-11-11  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in: Make umul and udiv standard-optional objects, rather\n\tthan under various extra_functions.\n\n\t* mpn/pa32/hppa1_1/pa7100/add_n.asm,\n\tmpn/pa32/hppa1_1/pa7100/addmul_1.asm,\n\tmpn/pa32/hppa1_1/pa7100/lshift.asm,\n\tmpn/pa32/hppa1_1/pa7100/rshift.asm,\n\tmpn/pa32/hppa1_1/pa7100/sub_n.asm,\n\tmpn/pa32/hppa1_1/pa7100/submul_1.asm: Use LDEF for labels.\n\n\t* mpf/set_str.c: Don't use memcmp for decimal point testing, just a\n\tloop is enough and avoids any chance of memcmp reading past the end of\n\tthe given string.\n\n\t* randmts.c, randmt.h: New files.\n\t* Makefile.am (libgmp_la_SOURCES): Add them.\n\t* randmt.c: Move seeding to randmts.c, common defines in randmt.h.\n\t* gmp-impl.h (RANDS): Use __gmp_randinit_mt_noseed.\n\t* tests/misc.c (tests_rand_start): Use gmp_randinit_default\n\texplicitly, not RANDS.\n\n\t* mpn/ia64/ia64-defs.m4 (PROLOGUE_cpu): Use 32-byte alignment, for the\n\tbenefit of itanium 2.\n\t* mpn/ia64/gcd_1.asm: Remove own .align 32.\n\n\t* mpn/ia64/ia64-defs.m4 (ALIGN): New define, using IA64_ALIGN_OK.\n\n\t* acinclude.m4 (GMP_ASM_IA64_ALIGN_OK): New macro.\n\t* configure.in (IA64_PATTERN): Use it.\n\t* mpn/ia64/README: Add notes on gas big endian align problem.\n\n2003-11-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/aors_n.asm: Align loop to a multiple of 16.  Also align\n\tM4_function_n to a multiple of 16, to minimize alignment padding.\n\tUpdate P6 cycle counts reflecting improvements with new alignment.\n\n2003-11-07  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (HAVE_HOST_CPU_alpha_CIX): New define.\n\t(ULONG_PARITY, popc_limb): Use it, to pick up ev7 as well as 67 and 68.\n\t* longlong.h (count_leading_zeros, count_trailing_zeros): Ditto.\n\n\t* doc/gmp.texi (Notes for Package Builds): Add notes on multi-ABI\n\tsystem packaging.\n\t(ABI and ISA): Add GNU/Linux ABI=64.\n\t(Binary GCD): Add notes on 1x1 GCD algorithms.\n\n\t* mpn/alpha/README: Add some literature references.\n\n\t* mpn/ia64/mode1o.asm: Various corrections to initial checkin.\n\t* mpn/ia64/ia64-defs.m4 (ASSERT): Correction to arg quoting.\n\n2003-11-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/linux64.m4: New file.\n\t* configure.in (POWERPC64_PATTERN): Handle *-*-linux*.\n\tUse linux64.m4.\n\n2003-11-05  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/freq.c (freq_sysctl_hw_model): Relax to just look for \"%u MHz\",\n\tfor the benefit of sparc cypress under netbsd 1.6.1.\n\n\t* mpfr/*: Update to mpfr-2-0-2-branch 2003-11-05.\n\n\t* mpn/alpha/ev5/dive_1.c: New file.\n\n\t* configure.in (x86_64-*-*): Accept together with amd64-*-*.\n\n\t* tune/speed.c: Check range of -x,-y,-w,-W alignment specifiers.\n\t* tune/speed.h (CACHE_LINE_SIZE): Amend comments.\n\n2003-11-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/speed.c: Fix typo in testing HAVE_NATIVE_mpn_modexact_1_odd.\n\n2003-11-03  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/ia64/mode1o.asm: New file.\n\t* mpn/ia64/ia64-defs.m4 (ASSERT): New macro.\n\n\t* tests/mpz/t-set_d.c (check_2n_plus_1): New test.\n\n2003-11-01  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/fac_ui.c (BSWAP_ULONG) [limb==2*long]: Remove this case, it\n\tprovokes code gen problems on HP cc.\n\t(BSWAP_ULONG) [generic]: Rename __dst variable to avoid conflicts with\n\tBITREV_ULONG.\n\tFix by Jason Moxham.\n\n\t* mpn/powerpc32/mode1o.asm: Use 16-bit i*i for early out, no need to\n\ttruncate divisor.  Amend stated 750/7400 speeds, and note operands\n\tthat give the extremes.\n\n\t* mpz/set_d.c: Don't use a special case for d < MP_BASE_AS_DOUBLE, gcc\n\t3.3 -mpowerpc64 on darwin gets ulonglong->double casts wrong.\n\n\t* mpn/generic/diveby3.c: Show a better style carry handling in the\n\talternative pipelined sample code.\n\n\tRevert this, the longlong.h macros need -mpowerpc64:\n\t* acinclude.m4 (GMP_GCC_POWERPC64): New macro.\n\t* configure.in (powerpc64-*-darwin*): Use it to exclude -mpowerpc64\n\twhen bad.\n\n2003-10-31  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr-2-0-2-branch 2003-10-31.\n\n\t* mpn/powerpc64/README: Add subdirectory organisation notes.\n\n\t* tests/mpn/t-get_d.c: Don't use limits.h, LONG_MIN is wrong on gcc\n\t2.95 with -mcpu=ultrasparc.\n\n\t* acinclude.m4 (GMP_GCC_POWERPC64): New macro.\n\t* configure.in (powerpc64-*-darwin*): Use it to exclude -mpowerpc64\n\twhen bad.\n\n\t* configure.in (powerpc64-*-darwin*) [ABI=mode32]: Use gcc -mcpu flags.\n\n\t* mpn/ia64/gcd_1.asm: Use \"C\" for comments.\n\t* mpn/ia64/README, mpn/ia64/ia64-defs.m4: Note this.\n\n\t* mpn/ia64/ia64-defs.m4: Renamed from default.m4, per other defs files.\n\t* configure.in (IA64_PATTERN): Update GMP_INCLUDE_MPN.\n\n\t* doc/gmp.texi (Notes for Particular Systems): Remove m68k ABI notes\n\tfor -mshort and PalmOS, now works.\n\t(References): Correction, GMP Square Root proof already there, just\n\twanting URL from RRRR 4475.\n\n2003-10-29  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (sparc*-*-*): Use gcc -m32 when that option works, to\n\tforce 32-bit mode on dual 32/64 configurations like GNU/Linux.\n\t(sparc64-*-linux*): Add support for ABI=64.\n\n\t* mpn/generic/pre_divrem_1.c: In fraction part, use CNST_LIMB(0) with\n\tudiv_qrnnd_preinv to avoid warning about shift > type.\n\n\t* mpfr/*: Update to mpfr-2-0-2-branch 2003-10-29.\n\n\t* tests/cxx/t-istream.cc: Avoid tellg() checks if putback() doesn't\n\tupdate that, avoids certain g++ 2.96 problems.\n\n\t* tests/mpn/t-fat.c: New file.\n\t* tests/mpn/Makefile.am (check_PROGRAMS): Add it.\n\n\t* configure.in (CPUVEC_INSTALL, ITERATE_FAT_THRESHOLDS): New macros\n\tfor fat.h.\n\t* mpn/x86/fat/fat.c (__gmpn_cpuvec_init): Use CPUVEC_INSTALL instead\n\tof memcpy.  Correction to location of \"initialized\" set.  Improve\n\tvarious comments.\n\n2003-10-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/mul_1.asm: Change addcc => add in a few places.\n\t* mpn/sparc64/addmul_1.asm: Likewise.\n\n\t* mpn/sparc32/v9/mul_1.asm: Apply cross-jumping.\n\t* mpn/sparc32/v9/addmul_1.asm: Likewise.\n\t* mpn/sparc32/v9/submul_1.asm: Likewise.\n\t* mpn/sparc32/v9/sqr_diagonal.asm: Likewise.\n\n2003-10-27  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/cxx/t-misc.cc: Don't use <climits>, on g++ 2.95.4 (debian 3.0)\n\t-mcpu=ultrasparc LONG_MIN is wrong and kills the compile.\n\n\t* tests/cxx/t-istream.cc: Correction to tellg tests, don't assume\n\tstreampos is zero based.\n\n\t* configure.in (HAVE_HOST_CPU_FAMILY_alpha): New define for config.h.\n\t* mpn/generic/get_d.c: Use it instead of __alpha for alpha workaround,\n\tsince Cray cc doesn't define __alpha.\n\n\t* mpn/x86/README: Revise PIC coding notes a bit, add gcc visibility\n\tattribute.\n\n2003-10-25  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/ia64/gcd_1.asm: New file.\n\n\t* tune/many.pl: Allow for PROLOGUE(fun,...), as used on alpha.\n\n\t* doc/gmp.texi (C++ Formatted Input): Describe base indicator handling.\n\n\t* tests/cxx/t-istream.cc: New file.\n\t* tests/cxx/Makefile.am: Add it.\n\n\t* cxx/ismpznw.cc: New file, integer input without whitespace ...\n\t* cxx/ismpz.cc: ... from here.\n\t* gmp-impl.h (__gmpz_operator_in_nowhite): Add prototype.\n\t* cxx/ismpq.cc: Rewrite using mpz input routines.  Change to accept a\n\tseparate base indicator on numerator and denominator.  Fix base\n\tindicator case where \"123/0456\" would stop at \"123/0\".\n\t* Makefile.am, cxx/Makefile.am: Add cxx/ismpznw.cc.\n\n\t* tests/mpz/t-set_d.c: New file, derived from tests/mpz/t-set_si.c\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpn/m68k/lshift.asm, mpn/m68k/rshift.asm: Support 16-bit int and\n\tstack alignment.\n\t* mpn/m68k/README: Add notes on this.\n\t* configure.in (SIZEOF_UNSIGNED): New define in config.m4.\n\t* mpn/m68k/m68k-defs.m4 (m68k_definsn): Add cmpw, movew.\n\tReported by Patrick Pelissier.\n\n\t* mpn/m68k/t-m68k-defs.pl: Don't use -> with hashes, to avoid\n\tdeprecation warnings from perl 5.8.\n\n\t* configure.in (viac3-*-*): Use just x86/pentium in $path not x86/p6.\n\tIf gcc is to be believed the old C3s don't have cmov.\n\n\t* Makefile.am: Amend comments about not building from libtool\n\tconvenience libraries.\n\n\t* mpn/asm-defs.m4 (PROLOGUE): Use m4_file_seen, for correct filename\n\tin missing EPILOGUE error messages.\n\t(m4_file_seen): Amend comments about where used.\n\n\t* Makefile.am (CXX_OBJECTS): Remove $U, C++ files are not subject to\n\tansi2knr rules.\n\n\t* gmp-h.in (mpn_divmod_1): Use __GMP_CAST, to avoid warnings in\n\tapplications using g++ -Wold-style-cast.\n\n\t* mpn/z8000/README: New file.\n\n2003-10-22  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/get_d.c (CONST_1024, CONST_NEG_1023,\n\tCONST_NEG_1022_SUB_53): Replace ALPHA_WORKAROUND with a non-gcc-ism,\n\tand use on Cray Unicos alpha too, which has the same problem.\n\n\t* configure.in (powerpc64-*-darwin*): Make ABI=32 available as the\n\tfinal fallback, remove mode64 until we know how it will work.\n\n\t* doc/gmp.texi (Build Options): Add powerpc970 to available CPUs.\n\t(ABI and ISA): Add mode32 for Darwin.\n\n\t* configure.in (gettimeofday): Use an explicit AC_TRY_LINK, to avoid\n\tknown autoconf 2.57 problems with gettimeofday in AC_CHECK_FUNCS on\n\tHP-UX.\n\n\t* configure.in (powerpc*-*-*): Use ABI=32 instead of ABI=standard for\n\tthe default 32-bit ABI.  Fixes powerpc64-*-aix* which is documented as\n\tchoices \"aix64 32\" but had \"aix64 standard\".\n\n\t* mpfr/*: Update to mpfr-2-0-2-branch 2003-10-22.\n\n\t* doc/gmp.texi (Notes for Particular Systems): Note m68k gcc -mshort\n\tand PalmOS calling conventions not supported.  Reported by Patrick\n\tPelissier.\n\t(References): Add Paul Zimmermann's Inria 4475 paper.\n\n2003-10-21  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpn/t-instrument.c: Add mpn_addlsh1_n, mpn_rsh1add_n,\n\tmpn_rsh1sub_n, mpn_sub_nc, mpn_sublsh1_n.  Typo in mpn_preinv_divrem_1\n\tconditional.\n\n2003-10-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/mode32/add_n.asm: New file.\n\t* mpn/powerpc64/mode32/sub_n.asm: New file.\n\t* mpn/powerpc64/mode32/mul_1.asm: New file.\n\t* mpn/powerpc64/mode32/addmul_1.asm: New file.\n\t* mpn/powerpc64/mode32/submul_1.asm: New file.\n\n2003-10-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h (AMD64): __x86_64__ => __amd64__.\n\t(64-bit powerpc): Only define carry-dependent macros if\n\t!_LONG_LONG_LIMB.\n\n\t* acinclude.m4 (POWERPC64_PATTERN): Add powerpc970-*-*.\n\n\t* configure.in (POWERPC64_PATTERN): Handle *-*-darwin*.\n\t(POWERPC64_PATTERN, *-*-aix*): Prepend powerpc64/mode64 to path_aix64.\n\n\t* mpn/powerpc64/mode64/mul_1.asm: Change cal => addi.\n\t* mpn/powerpc64/mode64/addmul_1.asm: Likewise.\n\t* mpn/powerpc64/mode64/submul_1.asm: Likewise.\n\t* mpn/powerpc64/sqr_diagonal.asm: Likewise.\n\n\t* mpn/powerpc64/mode64/mul_1.asm: Move from \"..\".\n\t* mpn/powerpc64/mode64/addmul_1.asm: Likewise.\n\t* mpn/powerpc64/mode64/submul_1.asm: Likewise.\n\t* mpn/powerpc64/mode64/divrem_1.asm: Likewise.\n\t* mpn/powerpc64/mode64/rsh1sub_n.asm: Likewise.\n\t* mpn/powerpc64/mode64/add_n.asm: Likewise.\n\t* mpn/powerpc64/mode64/addsub_n.asm: Likewise.\n\t* mpn/powerpc64/mode64/sub_n.asm: Likewise.\n\t* mpn/powerpc64/mode64/addlsh1_n.asm: Likewise.\n\t* mpn/powerpc64/mode64/diveby3.asm: Likewise.\n\t* mpn/powerpc64/mode64/rsh1add_n.asm: Likewise.\n\t* mpn/powerpc64/mode64/sublsh1_n.asm: Likewise.\n\n\t* mpn/powerpc64/lshift.asm: Handle mode32 ABI.\n\t* mpn/powerpc64/rshift.asm: Likewise.\n\t* mpn/powerpc64/umul.asm: Likewise.\n\n\t* tune/powerpc64.asm: Make it actually work.\n\n2003-10-19  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/get_d.c: Add a workaround for alpha gcc signed constant\n\tcomparison bug.\n\n\t* gmpxx.h (gmp_randclass gmp_randinit_lc_2exp_size constructor): Throw\n\tstd::length_error if size is too big.\n\t* tests/cxx/t-rand.cc (check_randinit): Exercise this.\n\n\t* mpn/x86/pentium4/sse2/addlsh1_n.asm: New file, derived in part from\n\tmpn/x86/pentium4/sse2/add_n.asm.\n\n\t* doc/gmp.texi (C++ Interface Integers, C++ Interface Rationals, C++\n\tInterface Floats): Note std::invalid_argument exception for invalid\n\tstrings to constructors and operator=.\n\t(C++ Interface Random Numbers): Note std::length_error exception for\n\tsize too big in gmp_randinit_lc_2exp_size.\n\n2003-10-18  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr-2-0-2-branch 2003-10-18.\n\n\t* gmpxx.h (mpz_class, mpq_class, mpf_class, mpfr_class constructors\n\tand operator= taking string or char*): Throw std::invalid_argument if\n\tstring cannot be converted.\n\t* tests/cxx/t-constr.cc, tests/cxx/t-assign.cc: Exercise this.\n\n\t* cxx/ismpz.cc, cxx/ismpq.cc, cxx/ismpf.cc: Use istream std::locale\n\tctype facet for isspace when available.  Only accept space at the\n\tstart of the input, same as g++ libstdc++.  Use ASSERT_NOCARRY to\n\tcheck result of mpz_set_str etc.\n\t* cxx/ismpf.cc: Don't accept \"@\" for exponent indicator.\n\n\t* tune/speed.c, tune/speed.h, tune/common.c, tune/Makefile.am: Remove\n\t_open and _mpn variants of mpn_toom3_mul_n, only one style now.\n\t* tune/mul_n_open.c, tune/mul_n_mpn.c: Remove files.\n\n\t* gmp-impl.h (LIMB_HIGHBIT_TO_MASK): New macro.\n\t(udiv_qrnnd_preinv2, udiv_qrnnd_preinv2gen): Use it.\n\n\t* tests/mpz/t-import.c, tests/mpz/t-export.c: Use octal for character\n\tconstants, hex is an ANSI-ism.\n\n\t* mpn/alpha/ev5/mode1o.c: Corrections to ASSERTs, as per\n\tmpn/generic/mode1o.c.\n\n\t* mpn/generic/diveby3.c: Add commented out alternative code and notes\n\tfor taking the multiply off the dependent chain.  Amend/clarify some\n\tof the other comments.\n\n\t* configure.in (powerpc970-*-*): Use gcc -mcpu=970 when available.\n\t(powerpc7400-*-*): Fallback on gcc -mcpu=750 if -mcpu=7400 not\n\tavailable.\n\n\t* doc/gmp.texi (C++ Formatted Input): Note locale digit grouping not\n\tsupported.\n\t(C++ Formatted Input, C++ Formatted Output): Cross reference class\n\tinterface on overloading.\n\n\t* mpn/m68k/README: Add various ideas from doc/tasks.html.\n\n\t* mpn/m88k/README: New file.\n\n2003-10-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.sub: Recognize powerpc970.\n\n2003-10-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.guess: Recognize powerpc970 under MacOS.\n\n2003-10-15  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in, acinclude.m4 (GMP_C_RIGHT_SHIFT): New test.\n\t* gmp-impl.h (LIMB_HIGHBIT_TO_MASK): New macro.\n\t(udiv_qrnnd_preinv2, udiv_qrnnd_preinv2gen): Use it.\n\n\t* mpn/amd64/amd64-defs.m4: New file, with a non-aligning PROLOGUE.\n\t* configure.in (amd64-*-*): Use it.\n\n\t* mpfr/*: Update to mpfr cvs 2003-10-15.\n\n\t* mpn/generic/get_d.c: Rewrite, simplifying and truncating towards\n\tzero unconditionally.\n\t* tests/mpn/t-get_d.c: Add various further tests.\n\t* gmp-impl.h (FORCE_DOUBLE): New macro.\n\n\t* gmp-h.in (__mpz_struct): Add comment on __mpz_struct getting into\n\tC++ mangled function names.\n\n\t* doc/gmp.texi (Build Options): Update notes for new doc subdir.\n\t(Low-level Functions): Note mpn functions don't check for zero limbs\n\tetc, it's up to an application to strip.\n\n\t* doc/configuration (Configure): mdate-sh now in doc subdir, add\n\tgenerated fat.h.\n\n2003-10-13  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/powerpc32/mod_34lsub1.asm: New file.\n\n\t* mpn/powerpc32/diveby3.asm, mpn/powerpc64/diveby3.asm: src[] in\n\tsecond operand of mullw, to allow possible early-out, which the\n\t0xAA..AB inverse cannot give.  This improvement noticed by Torbjorn.\n\n\t* acinclude.m4 (GMP_ASM_LSYM_PREFIX): Print to config.log whether\n\tlocal label is purely temporary or appears in object files, for\n\tdevelopment purposes.\n\n\t* doc/gmp.texi, doc/fdl.texi, doc/texinfo.tex, doc/mdate-sh: Moved\n\tfrom top-level.\n\t* doc/Makefile.am: New file.\n\t* configure.in (AC_OUTPUT): Add doc/Makefile.\n\t* Makefile.am (SUBDIRS): Move doc subdirectory from EXTRA_DIST.\n\t(info_TEXINFOS, gmp_TEXINFOS): Moved to doc/Makefile.am.\n\t* mpfr/Makefile.am (mpfr_TEXINFOS): fdl.texi now in doc subdir.\n\t(TEXINFO_TEX): texinfo.tex now in doc subdir.\n\t(AM_MAKEINFOFLAGS): Set -I to doc subdir.\n\n\t* mpz/and.c: For positive/positive, use mpn_and_n, rate a realloc as\n\tUNLIKELY.\n\n\t* mpn/generic/mul_n.c (mpn_toom3_mul_n, mpn_toom3_sqr_n): Don't test\n\tfor high zero limbs.\n\n2003-10-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/diveby3.asm: New file (trivial edits of\n\tpowerpc32/diveby3.asm).\n\n\t* mpn/powerpc32/diveby3.asm: Update cycle counts with more processors.\n\t* mpn/powerpc32/sqr_diagonal.asm: Likewise.\n\n\t* mpn/pa64/add_n.asm: Correct PA8500 cycle counts.\n\t* mpn/pa64/sub_n.asm: Likewise.\n\n\t* mpn/m68k/aors_n.asm (INPUT PARAMETERS): Fix typo.\n\t* mpn/m68k/lshift.asm: Likewise.\n\t* mpn/m68k/rshift.asm: Likewise.\n\n\t* mpn/m68k/README: Correct an URL; add some STATUS comments.\n\n\t* mpn/powerpc32/addlsh1_n.asm: New file.\n\t* mpn/powerpc32/sublsh1_n.asm: New file.\n\n2003-10-12  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/sparc64/divrem_1.c, mpn/sparc64/mod_1.c: New files.\n\t* mpn/sparc64/sparc64.h (HALF_ENDIAN_ADJ, count_leading_zeros_32,\n\tinvert_half_limb, udiv_qrnnd_half_preinv): New macros.\n\n\t* gmp-impl.h (udiv_qrnnd_preinv2): Use a ? : for getting the n1 bit,\n\tso as not to depend on signed right shifts being arithmetic.\n\n\t* mpn/powerpc32/diveby3.asm: New file.\n\n\t* mpn/generic/divrem_1.c: Use CNST_LIMB(0) to avoid warnings from\n\tudiv_qrnnd_preinv about shift count when int<long.  Do the same with\n\tudiv_qrnnd, for consistency.\n\n\t* Makefile.am (install-data-hook): Print a warning recommending \"make\n\tcheck\" to watch out for compiler bugs.  Proposed by Torbjorn.\n\n2003-10-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/asm-defs.m4: Handle rsh1aors_n.\n\n\t* configure.in (tmp_mulfunc): Handle rsh1aors_n.\n\n2003-10-11  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium4/sse2/diveby3.asm: Remove non-PIC RODATA memory\n\taccess for 0xAAAAAAAB constant.\n\n\t* gmp-impl.h (popc_limb, ULONG_PARITY) [ev67, ev68]: Add gcc asm\n\tversions using ctpop.\n\n\t* mpn/x86/k6/aorsmul_1.asm: Tweak some comments, remove M4_description\n\tand M4_desc_retval used only in comments.\n\n\t* mpn/x86/k6/mul_basecase.asm: Add comment on using mpn_mul_1.\n\n2003-10-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/addlsh1_n.asm: Tweak for 0.25 c/l better loop speed.\n\t* mpn/powerpc64/sublsh1_n.asm: Likewise.\n\n2003-10-09  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr cvs 2003-10-09.\n\n\t* tests/devel/try.c (_SC_PAGESIZE): Define from _SC_PAGE_SIZE on\n\tsystems which use that, eg. hpux 9.\n\n2003-10-07  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/freq.c (freq_sysctl_hw_model): Correction to last sscanf\tchange.\n\n\t* configure.in: Check for psp_iticksperclktick in struct pst_processor.\n\t* tune/freq.c (freq_pstat_getprocessor): Use this.\n\n\t* tests/devel/try.c (divisor_array): Add a couple of half-limb values.\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): Correction to last change, need to\n\tset result \"yes\" when cross compiling.\n\n2003-10-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul_n.c: Use __GMPN_ADD_1/_GMPN_SUB_1 instead of\n\tmpn_add_1 and mpn_sub_1.\n\n\t* mpn/pa64/aorslsh1_n.asm: Schedule register save and restore code.\n\n2003-10-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa64/mul_1.asm: Misc comment cleanups.\n\t* mpn/pa64/addmul_1.asm: Likewise.\n\t* mpn/pa64/submul_1.asm: Likewise.\n\n\t* mpn/pa64/README: Correct cycle counts.\n\n\t* mpn/pa64/aorslsh1_n.asm: New file.\n\n2003-10-04  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/freq.c (freq_sysctl_hw_model, freq_sunos_sysinfo,\n\tfreq_sco_etchw, freq_bsd_dmesg, freq_irix_hinv): Demand matching of\n\tMHz etc at end of sscanf format string.  In particular need this for\n\tfreq_bsd_dmesg on i486-pc-freebsd4.7 to avoid the 486 cpu being used\n\tfor the frequency.\n\n\t* tests/misc.c, tests/tests.h (tests_setjmp_sigfpe,\n\ttests_sigfpe_handler, tests_sigfpe_done, tests_sigfpe_target,\n\ttests_dbl_mant_bits): New.\n\n\t* configure.in (viac3*-*-*): Add gcc VIA c3 options.\n\n\t* mpfr/*: Update to mpfr cvs 2003-10-04.\n\n\t* tests/refmpn.c (refmpn_addlsh1_n, refmpn_sublsh1_n,\n\trefmpn_rsh1add_n, refmpn_rsh1sub_n): Add ASSERTs for operand overlaps.\n\t* tests/tests.h (refmpn_addlsh1_n, refmpn_sublsh1_n, refmpn_rsh1add_n,\n\trefmpn_rsh1sub_n): Add prototypes.\n\n\t* tests/devel/try.c, tune/many.pl: Add mpn_addlsh1_n, mpn_sublsh1_n,\n\tmpn_rsh1add_n, mpn_rsh1sub_n.\n\n2003-10-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/refmpn.c (refmpn_addlsh1_n, refmpn_sublsh1_n, refmpn_rsh1add_n,\n\trefmpn_rsh1sub_n): New functions.\n\n2003-10-03  Paul Zimmermann  <Paul.Zimmermann@loria.fr>\n\n\t* mpn/generic/mul_n.c (toom3_interpolate): Use mpn_add_1/mpn_sub_1\n\tinstead of MPN_INCR_/MPN_DECR_U.\n\n2003-10-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (ia64*-*-hpux*): Fall back to +O1, not +O.\n\n2003-10-02  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (ia64*-*-hpux*): For cc, let +O optimization level\n\tfallback if +O3 doesn't work.\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): Add a test of __builtin_alloca\n\twhen available, to pick up Itanium HP-UX cc internal errors in +O2.\n\tProvoking code by Torbjorn.\n\n2003-10-01  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/asm-defs.m4: Handle aorslsh1_n.\n\n\t* configure.in (tmp_mulfunc): Handle aorslsh1_n.\n\n2003-10-01  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_C_DOUBLE_FORMAT): Make bad ARM last byte into a\n\tseparate case and consider it non-IEEE, since it looks like this is\n\tdue to some sort of restricted or incorrect software floats.\n\n\t* demos/calc/Makefile.am: Use automake yacc/lex support, seems fine in\n\tseparate objdir now.\n\n\t* cxx/dummy.cc: Moved from top-level dummy.cc.\n\t* Makefile.am (libgmpxx_la_SOURCES): Update to cxx/dummy.cc,\n\tcorrection to comment about this.\n\n2003-09-30  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c: Correct documentation of -split.\n\t(TIME): Remove cast of result to double.\n\t(main): Change timing variables to int.\n\t(main): #ifdef LIMIT_RESOURCE_USAGE, don't convert numbers of more than\n\t100000 digits.\n\n2003-09-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/*/*.asm: Clean up spacing, tabify.\n\n\t* mpn/alpha/rshift.asm: Table cycle counts.\n\t* mpn/alpha/lshift.asm: Likewise.\n\t* mpn/alpha/ev5/rshift.asm: Likewise.\n\t* mpn/alpha/ev5/lshift.asm: Likewise.\n\t* mpn/alpha/ev6/add_n.asm: Likewise.\n\t* mpn/alpha/ev6/sub_n.asm: Likewise.\n\n\t* mpn/pa64/mul_1.asm: Fix comment typo.\n\t* mpn/pa64/addmul_1.asm: Likewise.\n\t* mpn/pa64/submul_1.asm: Likewise.\n\n2003-09-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/README: Update with POWER4/PPC970 pipeline info.\n\n\t* mpn/powerpc64/rshift.asm: Rewrite.\n\t* mpn/powerpc64/lshift.asm: Rewrite.\n\n2003-09-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/common.c (speed_mpn_addlsh1_n, speed_mpn_sublsh1_n,\n\tspeed_mpn_rsh1add_n, speed_mpn_rsh1sub_n): Conditionalize on\n\tcorresponding HAVE_NATIVE_*.\n\n2003-09-25  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/combit.c: Use GMP_NUMB_BITS not BITS_PER_MP_LIMB.\n\n\t* demos/expr/exprfr.c: Allow for mpfr_inf_p, mpfr_nan_p and\n\tmpfr_number_p merely returning non-zero, rather than 1 or 0.\n\n\t* demos/expr/exprfr.c, demos/expr/t-expr.c: Add erf, integer_p, zeta.\n\n\t* demos/expr/Makefile.am (LDADD): Update comments on $(LIBM).\n\n2003-09-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/speed.c (routine): Add entires for mpn_addlsh1_n, mpn_sublsh1_n,\n\tmpn_rsh1add_n, and mpn_rsh1sub_n.\n\n\t* tune/speed.h: Declare speed_mpn_addlsh1_n, speed_mpn_sublsh1_n,\n\tspeed_mpn_rsh1add_n, and speed_mpn_rsh1sub_n.\n\n\t* tune/common.c (speed_mpn_addlsh1_n, speed_mpn_sublsh1_n,\n\tspeed_mpn_rsh1add_n, speed_mpn_rsh1sub_n): New functions.\n\n\t* gmp-impl.h: Declare mpn_addlsh1_n, mpn_sublsh1_n, mpn_rsh1add_n, and\n\tmpn_rsh1sub_n.\n\n\t* mpn/asm-defs.m4: Add define_mpn's for addlsh1_n, sublsh1_n,\n\trsh1add_n, and rsh1sub_n.\n\n\t* mpn/powerpc64/*.asm: Add cycle counts in consistent style.  Misc\n\tstyling edits.\n\n\t* configure.in: Add #undefs for HAVE_NATIVE_mpn_addlsh1_n,\n\tHAVE_NATIVE_mpn_sublsh1_n, HAVE_NATIVE_mpn_rsh1add_n,\n\tHAVE_NATIVE_mpn_rsh1sub_n.\n\t(gmp_mpn_functions_optional): List addlsh1_n, sublsh1_n, rsh1add_n,\n\tand rsh1sub_n.\n\n2003-09-24  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr cvs 2003-09-24.\n\n\t* acinclude.m4 (GMP_C_DOUBLE_FORMAT): Remove conftest* temporary files.\n\n2003-09-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (MUL_TOOM3_THRESHOLD, SQR_TOOM3_THRESHOLD): Now 128.\n\n2003-09-23  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in (gmp_randinit_set): Use __gmp_const rather than const.\n\n2003-09-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/mul_n_mpn.c: (__gmpn_sqr_n): New #define.\n\t* tune/mul_n_open.c (__gmpn_sqr_n): New #define.\n\n\t* mpn/generic/mul.c (mpn_sqr_n): Move from here...\n\t* mpn/generic/mul_n.c (mpn_sqr_n): ...to here.\n\t(mpn_sqr_n): Allocate workspace for toom3 using\tTMP_* mechanism except\n\tfor very large operands when !WANT_FFT.\n\n\t* mpn/generic/mul_n.c: Add a missing \";\". Misc comment fixes.\n\n\t* mpn/generic/mul.c: Remove spurious #include <stdio.h>.\n\n\t* mpn/x86/k7/gmp-mparam.h: Retune.\n\n\t* mpn/generic/mul_n.c (mpn_mul_n): Allocate workspace for toom3 using\n\tTMP_* mechanism except for very large operands when !WANT_FFT.\n\n\t* gmp-impl.h (MPN_TOOM3_MUL_N_TSIZE, MPN_TOOM3_SQR_N_TSIZE):\n\tDefine conditionally on WANT_FFT and HAVE_NATIVE_mpn_sublsh1_n.\n\t(MPN_TOOM3_MAX_N): New #define.\n\n\t* mpn/generic/sqr_basecase.c: Use mpn_addlsh1_n when available.\n\n\t* mpn/generic/mul_n.c: Use proper form for HAVE_NATIVE macros.\n\n2003-09-22  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr cvs 2003-09-22.\n\n2003-09-21  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium4/sse2/gmp-mparam.h (USE_PREINV_DIVREM_1,\n\tUSE_PREINV_MOD_1): Set to 1 for new asm versions.\n\n\t* mpfr/*: Update to mpfr cvs 2003-09-21.\n\n2003-09-21  Paul Zimmermann  <Paul.Zimmermann@loria.fr>\n\n\t* mpn/generic/mul_n.c (mpn_toom3_mul_n): Conditionally use\n\tmpn_sublsh1_n, mpn_rsh1add_n and mpn_rsh1sub_n, in addition to\n\tmpn_addlsh1_n.  Avoid all copying, at the expense of some additional\n\tworkspace.\n\n\t* gmp-impl.h (MPN_TOOM3_MUL_N_TSIZE, MPN_TOOM3_SQR_N_TSIZE): Accomodate\n\tlatest toom3 code.\n\n2003-09-19  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium4/sse2/divrem_1.asm, mpn/x86/pentium4/sse2/mod_1.asm:\n\tNew files.\n\n2003-09-16  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/speed.c (run_one): Don't scale the -1.0 not-available return.\n\tPrint \"n/a\" for times not-available.\n\n2003-09-13  Paul Zimmermann  <Paul.Zimmermann@loria.fr>\n\n\t* mpn/generic/mul_n.c (toom3_interpolate): New function.\n\t(mpn_toom3_mul_n, mpn_toom3_sqr_n): Call toom3_interpolate.\n\n2003-09-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul_n.c (mpn_toom3_mul_n, mpn_toom3_sqr_n): Remove unused\n\tvariables.\n\t(mpn_toom3_mul_n, mpn_toom3_sqr_n): Use offset `+ 1', not `+ 2' in last\n\tMPN_DECR_U calls.\n\n2003-09-12  Paul Zimmermann  <Paul.Zimmermann@loria.fr>\n\n\t* mpn/generic/mul_n.c (mpn_toom3_mul_n, mpn_toom3_sqr_n): Rewrite.\n\n2003-09-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (MPN_KARA_MUL_N_TSIZE, MPN_KARA_SQR_N_TSIZE): Reformulate\n\tto use the same form as MPN_TOOM3_MUL_N_TSIZE.\n\t(MPN_TOOM3_MUL_N_TSIZE, MPN_TOOM3_SQR_N_TSIZE): Update for new Toom3\n\tcode requirements.\n\t* mpn/generic/mul_n.c (evaluate3, interpolate3, add2Times): Remove.\n\t(USE_MORE_MPN): Remove.\n\n2003-08-31  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr cvs 2003-08-31.\n\n2003-08-30  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr cvs 2003-08-30.\n\n2003-08-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/amd64/README: New file.\n\n2003-08-23  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/freq.c (freq_getsysinfo): Correction to speed_cycletime value\n\testablished.\n\n\t* mpz/rootrem.c, gmp-h.in, gmp.texi (mpz_rootrem): Don't return\n\texactness indication, can get that from testing the remainder.\n\n\t* mpn/x86/k7/aors_n.asm, mpn/x86/k7/mmx/copyi.asm: Amend to comments\n\tabout loads and stores and what speed should be possible.\n\n2003-08-19  Kevin Ryde  <kevin@swox.se>\n\n\t* longlong.h (add_ssaaaa, sub_ddmmss) [hppa 64]: Move down into main\n\t__GNUC__ block.  Exclude for _LONG_LONG_LIMB (ie. ABI=2.0n) since\n\tthese forms are only for ABI=2.0w.\n\n\t* longlong.h (count_leading_zeros) [__mcpu32__]: Check __mcpu32__ to\n\tavoid bfffo on GCC 3.4 in CPU32 mode.  Reported by Bernardo Innocenti.\n\n\t* longlong.h (count_trailing_zeros) [x86_64]: Use \"%q0\" to force\n\t64-bit register destination.  Pointed out by Torbjorn.\n\n\t* mpz/combit.c: Correction to carry handling when extending a\n\tnegative, and use __GMPN_ADD_1.  Correction to complement limb for a\n\tnegative when there's a non-zero low limb.\n\t* tests/mpz/bit.c (check_clr_extend, check_com_negs): Exercise these.\n\n\t* demos/perl/GMP.xs, demos/perl/GMP.pm, demos/perl/test.pl: Add\n\tget_d_2exp.\n\t* demos/perl/GMP.xs, demos/perl/GMP.pm, demos/perl/GMP/Rand.pm,\n\tdemos/perl/test.pl: Add gmp_urandomb_ui, gmp_urandomm_ui.\n\t(GMP::Rand::randstate): Accept a randstate object to copy.\n\t* demos/perl/GMP.xs, demos/perl/GMP.pm, demos/perl/GMP/Mpz.pm,\n\tdemos/perl/test.pl: Add combit, rootrem.\n\n2003-08-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/Makefile.am (EXTRA_DIST): Add amd64.asm.\n\n2003-08-17  Kevin Ryde  <kevin@swox.se>\n\n\t* gmpxx.h [__MPFR_H]: Include full <iostream> for inlines.\n\t* tests/cxx/t-headfr.cc: New file, exercising this.\n\t* tests/cxx/Makefile.am: Add it.\n\n\t* tests/cxx/t-constr.cc: Include config.h for WANT_MPFR.\n\n\t* gmpxx.h: Correction to temp variable type in mpf -> mpfr assignment.\n\tReported by Derrick Bass.\n\t* tests/cxx/t-assign.cc (check_mpfr): Exercise this.\n\n\t* configure.in (WANT_MPFR): AC_DEFINE this, for the benefit of\n\ttests/cxx/t-*.cc.  (Was always meant to have been defined.)\n\t* tests/cxx/Makefile.am (INCLUDES): Add -I$(top_srcdir)/mpfr.\n\n\t* gmpxx.h: __gmp_default_rounding_mode -> __gmpfr_default_rounding_mode\n\t(struct __gmp_hypot_function): Correction to mpfr_hypot addition.\n\t* tests/cxx/t-misc.cc (check_mpfr_hypot): Corrections to mpfr/long\n\ttests.\n\n2003-08-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (amd64): New.\n\n\t* mpn/amd64/gmp-mparam.h: New file.\n\n\t* tune/amd64.asm: New file, derived in part from tune/pentium.asm.\n\n2003-08-15  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/freq.c (freq_irix_hinv): Reinstate, for the benefit of IRIX 6.2.\n\t(freq_attr_get_invent): Conditionalize on INFO_LBL_DETAIL_INVENT too.\n\n2003-08-14  Kevin Ryde  <kevin@swox.se>\n\n\t* mpq/get_d.c: Use mpn_get_d.\n\t* tests/mpq/t-get_d.c (check_onebit): New test.\n\n\t* gmp.texi (Notes for Particular Systems): Under x86 cpu types, note\n\ti386 is a fat binary, remove pentium4 recommendation since i386 is now\n\tquite reasonable for p4.\n\t(Notes for Particular Systems): Under Windows DLLs, remove caveat\n\tabout --enable-cxx now ok, update .lib creation for new libtool,\n\tremove .exp not needed for MS C.\n\t(Notes for Package Builds): i386 is a fat binary.\n\t(Reentrancy): Remove SCO ctype.h note, don't want to list every system\n\tmisfeature, and was quite possibly for non-threading mode anyway.\n\t(Autoconf): Remove notes on gmp 2 detection, too old to want to\n\tencourage anyone to use.\n\t(Karatsuba Multiplication): Correction to threshold increase/decrease\n\tfor a and b terms.  Reported by Richard Brent and Paul Zimmermann.\n\tAlso add various further index entries.\n\n\t* tune/freq.c (freq_attr_get_invent): New function.\n\t(freq_irix_hinv): Remove, in favour or freq_attr_get_invent.\n\t* configure.in (AC_CHECK_FUNCS): Add attr_get.\n\t(AC_CHECK_HEADERS): Add invent.h, sys/attributes.h, sys/iograph.h.\n\n2003-08-03  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/tuneup.c (tune_mul): Use MUL_KARATSUBA_THRESHOLD_LIMIT.\n\n2003-08-02  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/asm-defs.m4: Tweak some comments, add hpux11 to m4wrap 0xFF\n\tproblem systems.\n\n\t* configure.in (*-*-sco3.2v5*): Remove lt_cv_archive_cmds_need_lc=no,\n\tsince libtool no longer uses it.  This was a workaround fixing ctype.h\n\tin SCO 5 shared libraries; not sure if libtool now gets it right on\n\tits own, let's hope so.\n\n\t* configure.in, acinclude.m4 (GMP_PROG_HOST_CC): Remove, libtool no\n\tlonger demands HOST_CC.\n\n\t* configure.in: When C or C++ compiler not found, refer user to\n\tconfig.log.\n\n\t* configure.in (i386-*-*): Turn i386 into a fat binary build.\n\t* mpn/x86/fat/fat.c, mpn/x86/fat/fat_entry.asm,\n\tmpn/x86/fat/gmp-mparam.h, mpn/x86/fat/gcd_1.c, mpn/x86/fat/mode1o.c:\n\tNew files.\n\t* gmp-impl.h (struct cpuvec_t) [x86 fat]: New structure.\n\t* longlong.h (COUNT_LEADING_ZEROS_NEED_CLZ_TAB) [x86 fat]: Define.\n\t* mpn/asm-defs.m4 (foreach): New macro.\n\t* mpn/x86/x86-defs.m4 (CPUVEC_FUNCS_LIST): New define.\n\t* mpn/x86/sqr_basecase.asm: New file, primarily as a fallback for fat\n\tbinaries.\n\t* mpn/x86/p6/gmp-mparam.h, mpn/x86/p6/mmx/gmp-mparam.h: Add comments\n\tabout fat binary SQR_KARATSUBA_THRESHOLD for p6 and p6/mmx.\n\n\t* configure.in: Add various supports for fat binaries, via fat_path,\n\tfat_functions and fat_thresholds variables.\n\t* acinclude.m4 (GMP_STRIP_PATH): Mung $fat_path too.\n\t(GMP_FAT_SUFFIX, GMP_REMOVE_FROM_LIST): New macros.\n\t* gmp-impl.h: Add various supports for fat binaries.\n\t(DECL_add_n etc): New macros.\n\t(mpn_mul_basecase etc): Define only if not already defined.\n\t* mpn/asm-defs.m4 (m4_config_gmp_mparam): Mention fat binary.\n\t(MPN): Use m4_unquote, for the benefit of fat binary name expansion.\n\t* doc/configuration: Notes on fat binaries.\n\t* gmp-impl.h (MUL_TOOM3_THRESHOLD_LIMIT): Define always.\n\t(MUL_KARATSUBA_THRESHOLD_LIMIT): New define.\n\t* mpn/generic/mul.c, mpn/generic/mul_n.c: Use these.\n\t* tune/divrem1div.c, tune/divrem1inv.c, tune/mod_1_div.c,\n\ttune/mod_1_inv.c: Define OPERATION_divrem_1 and OPERATION_mod_1, to\n\ttell fat.h what's being done.\n\n\t* config.guess (alpha-*-*): Update comments on what configfsf.guess\n\tdoes and doesn't do for us.\n\n2003-07-31  Kevin Ryde  <kevin@swox.se>\n\n\t* config.guess: Remove $dummy.o files everywhere, in case vendor\n\tcompilers produce that even when not asked.\n\n\t* demos/perl/GMP.xs (class_or_croak): Rename \"class\" parameter to\n\tavoid C++ keyword.\n\t(coerce_ulong, coerce_long): Move croaks to stop g++ 3.3 complaining\n\tabout uninitialized variables.\n\n\t* demos/perl/INSTALL: Add notes on building with a DLL.\n\n\t* longlong.h (count_trailing_zeros) [x86_64]: Ensure bsfq destination\n\tis a 64-bit register.  Diagnosed by Francois G. Dorais.\n\n2003-07-31  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h [ppc]: Remove nested test for vxworks.\n\n2003-07-24  Kevin Ryde  <kevin@swox.se>\n\n\t* gmpxx.h (struct __gmp_binary_multiplies): Use mpz_mul_si for\n\tmpz*long and long*mpz.\n\t* tests/cxx/t-ops.cc (check_mpz): Exercise mpz*long and mpz*ulong.\n\n\t* cxx/ismpf.cc: Use std::locale decimal point when available.  Expect\n\tlocaleconv available always.\n\t* tests/cxx/t-locale.cc: Enable check_input tests.\n\n\t* gmpxx.h (struct __gmp_hypot_function): Use mpfr_hypot.\n\t* tests/cxx/t-misc.cc (check_mpfr_hypot): New tests.\n\n\t* tests/cxx/t-assign.cc, tests/cxx/t-binary.cc, tests/cxx/t-ops.cc,\n\ttests/cxx/t-prec.cc, tests/cxx/t-ternary.cc, tests/cxx/t-unary.cc:\n\tInclude config.h for WANT_MPFR.\n\n\t* tests/mpz/bit.c (check_single): Correction to a diagnostic print.\n\n2003-07-24  Niels Möller  <nisse@lysator.liu.se>\n\n\t* mpz/combit.c: New file.\n\t* Makefile.am, mpz/Makefile.am: Add it.\n\t* gmp-h.in (mpz_combit): Add prototype.\n\t* tests/mpz/bit.c (check_single): Exercise mpz_combit.\n\n2003-07-16  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/get_d.c: Correction to infinity handling for large exp.\n\n2003-07-14  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/get_d.c, mpz/get_d_2exp.c, mpf/get_d.c, mpf/get_d_2exp.c: Use\n\tmpn_get_d.\n\n\t* mpn/generic/get_d.c: New file, based on mpz/get_d.c and insert-dbl.c.\n\t* configure.in, mpn/Makefile.am: Add it.\n\t* gmp-impl.h (mpn_get_d): Add prototype.\n\n\t* tests/mpn/t-get_d.c: New file.\n\t* tests/mpn/Makefile.am: Add it.\n\n\t* tests/mpz/t-get_d_2exp.c (check_onebit, check_round): Test negatives.\n\t(check_onebit): Add a few more bit sizes.\n\n\t* tests/misc.c, tests/tests.h (tests_isinf): New function.\n\n2003-07-12  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (GMP_PROG_CXX_WORKS): Include $CPPFLAGS, same as\n\tautomake does in the actual build.\n\n\t* acinclude.m4 (GMP_PROG_CXX_WORKS): In the namespace test, declare\n\tnamespace before trying to use.  In std iostream test, provoke a\n\tfailure from Compaq C++ in pre-standard mode.\n\n2003-07-08  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): Use separate compiles for various\n\tknown problems, and indicate to the user the reason for rejecting.\n\t(GMP_PROG_CXX_WORKS): Ditto, and insist on being able to execute each\n\tcompiled program.\n\n2003-07-05  Kevin Ryde  <kevin@swox.se>\n\n\t* config.sub: Add comments to our alias transformations.\n\n\t* configfsf.sub, configfsf.guess: Update to 2003-07-04.\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS, GMP_PROG_CC_WORKS_LONGLONG): Show\n\tfailing program in config.log, per other autoconf tests.\n\n\t* configure.in (i786-*-*): Recognise as pentium4, per configfsf.sub.\n\n2003-06-28  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/get_d_2exp.c, mpf/get_d_2exp.c: Avoid res==1.0 when floats round\n\tupwards.\n\n\t* tests/mpz/t-get_d_2exp.c: New file.\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Add it.\n\t* tests/mpf/t-get_d_2exp.c: New file.\n\t* tests/mpf/Makefile.am (check_PROGRAMS): Add it.\n\t* tests/x86call.asm, test/tests.h (x86_fldcw, x86_fstcw): New\n\tfunctions.\n\t* tests/misc.c, tests/tests.h (tests_hardware_getround,\n\ttests_hardware_setround): New functions.\n\n2003-06-25  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/sparc64/dive_1.c: New file.\n\n\t* mpn/sparc64/sparc64.h: New file.\n\t* mpn/sparc64/mode1o.c: Remove things now in sparc64.h.\n\n\t* mpfr/*: Update to mpfr cvs 2003-06-25.\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): In last change provoking gnupro\n\tgcc, don't use ANSI style function definition.\n\n2003-06-22  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/pa32/hppa1_1/udiv.asm: Remove .proc, .entry, .exit and .procend,\n\thandled by PROLOGUE and EPILOGUE.  Comment out .callinfo, per other\n\tasm files.\n\n\t* gmpxx.h (mpz_class __gmp_binary_divides, __gmp_binary_modulus): Fix\n\tlong/mpz and long%mpz for dividend==LONG_MIN divisor==-LONG_MIN.\n\t(mpz_class __gmp_binary_modulus): Fix mpz%long for negative dividend.\n\t* tests/cxx/t-ops.cc (check_mpz): Add test cases for these, merging\n\toperator/ and operator% sections for clarity.\n\n2003-06-21  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr cvs 2003-06-21.\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): Add code by Torbjorn provoking an\n\tICE from gcc 2.9-gnupro-99r1 under -O2 -mcpu=ev6.\n\t* configure.in (alpha*-*-* gcc_cflags_cpu): Fallback on -mcpu=ev56 for\n\tthis compiler.\n\n\t* gmpxx.h (get_d): Remove comments about long double, double is\n\tcorrect for get_d, a future long double form would be get_ld.\n\n2003-06-19  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr cvs 2003-06-19.\n\n\t* mpn/generic/dive_1.c: Share src[0] fetch among all cases.  No need\n\tfor separate final umul_ppmm in even case, make it part of the loop.\n\n\t* mpz/get_d_2exp.c, mpq/set_si.c, mpq/set_ui.c: Nailify.\n\n\t* mpf/iset_si.c: Rewrite using mpf/set_si.c code, in particular this\n\tnailifies it.\n\t* tests/mpf/t-set_si.c: Nailify tests.\n\n\t* mpf/iset_ui.c: Nailify, as per mpf/set_ui.c\n\t* tests/mpf/t-set_ui.c: New file.\n\t* tests/mpf/Makefile.am (check_PROGRAMS): Add it.\n\n2003-06-15  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr cvs 2003-06-15.\n\n\t* mpn/x86/k6/mode1o.asm: Remove a bogus ASSERT.\n\n2003-06-12  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (--enable-assert): Emit WANT_ASSERT to config.m4.\n\t* mpn/powerpc32/powerpc-defs.m4, mpn/x86/x86-defs.m4 (ASSERT): Check\n\tWANT_ASSERT is defined.\n\n\t* mpn/sparc32/v9/udiv.asm: Amend heading, this file is for sparc v9.\n\n\t* tests/cxx/Makefile.am (TESTS_ENVIRONMENT): In libtool openbsd hack,\n\tdisard error messages from cp, for the benefit of --disable-shared or\n\tsystems not using names libgmp.so.*.\n\n\t* tests/devel/try.c (try_one): When overlapping, copy source data\n\tafter filling dst.  Previously probably used only DEADVAL in\n\toverlapping cases.\n\n2003-06-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/random2.c: Rewrite.  Ignore sign of exp parameter.\n\n2003-06-10  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/sparc64/mode1o.c: New file.\n\n2003-06-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/lshift.asm: Add more cycle counts.\n\t* mpn/powerpc32/rshift.asm: Add more cycle counts.\n\n\t* gmp-impl.h (udiv_qrnnd_preinv1): New name for udiv_qrnnd_preinv.\n\t(udiv_qrnnd_preinv2): New name for udiv_qrnnd_preinv2norm.\n\t(udiv_qrnnd_preinv): New #define, making udiv_qrnnd_preinv2\n\tthe default.\n\t* tune/speed.c: Corresponding changes.\n\t* tune/speed.h: Likewise.\n\t* tune/common.c: Likewise.\n\n\t* mpf/get_str.c: Simplify `off' computation.\n\n\t* longlong.h: Tabify.\n\n2003-06-09  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (ABI and ISA): FreeBSD has sparc64 too, just say \"BSD\" to\n\tcover all flavours.\n\t* configure.in: Ditto in some comments.\n\n\t* mpfr/*: Update to mpfr cvs 2003-06-09.\n\n\t* tests/cxx/Makefile.am (LDADD): Add -L$(top_builddir)/$(LIBS), for\n\tthe benefit of gcc 3.2 on itanium2-hp-hpux11.22.\n\n\t* tune/many.pl (mul_2): Add speed routine settings.\n\t(MAKEFILE): Close when done, for the benefit of development hackery.\n\n2003-06-08  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr cvs 2003-06-08.\n\n\t* mpn/x86/x86-defs.m4 (femms): Remove fallback to emms.\n\t(cmovCC, psadbw): Remove simulated versions.\n\t(cmov_available_p, psadbw_available_p): Remove.\n\tThis trickery was only ever for development purposes on machines\n\twithout those instructions.  Removing it simplifies gmp and in\n\tparticular avoids complications for fat binary builds.  Development\n\tcan be done with a wrapper around \"as\" if really needed.\n\n\t* mpn/x86/divrem_1.asm: Don't use loop_or_decljnz, now K6 has its own\n\tmpn/x86/k6/divrem_1.asm.  Amend K6 comments now moved to there.\n\t* mpn/x86/x86-defs.m4 (loop_or_decljnz): Remove, no longer used.\n\n\t* mpn/x86/k6/divrem_1.asm: New file, derived from mpn/x86/divrem_1.asm.\n\n\t* mpn/x86/k6/pre_mod_1.asm: Remove comments now in mpn/x86/mod_1.asm.\n\n\t* mpn/x86/mod_1.asm: Put mpn_mod_1c after mpn_mod_1 for better branch\n\tprediction.  Put done_zero at end for less wastage in alignment.  Use\n\tdecl+jnz unconditionally since in fact it's ok on k6.  Amend comments.\n\n2003-06-07  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/mode1o.c: Fix ASSERTs on return value.\n\n\t* gmp.texi (Build Options): Add viac3 and viac32 cpu types.\n\t(ABI and ISA): Note on sparcv9 ABI=32 vs ABI=64 speed.  More indexing.\n\n\t* configfsf.guess, configfsf.sub: Update to 2003-06-06.\n\t* config.guess: Remove $RANDOM hack supporting netbsd 1.4, not needed\n\tby new configfsf.guess.\n\n\n2003-06-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h (add_ssaaaa) [pa64]: Output zero operand as register 0.\n\tAllow more immediate operands.\n\t(sub_ddmmss) [pa64]: Likewise.\n\t(add_ssaaaa) [pa32]: Likewise.\n\t(sub_ddmmss) [pa32]: Likewise.\n\n\t* mpn/pa64: Change \".level 2.0W\" to \".level 2.0w\" to please\n\tpicky GNU assembler.\n\n2003-06-05  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Integer Special Functions): In mpz_array_init, fix type\n\tshown for integer_array and give an example use.\n\n2003-06-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/set_str.c (mpf_set_str): Work around gcc 2 bug triggered on\n\talpha.\n\n2003-06-03  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium/README: Add 7 c/l mmx mul_1, tweak wordings.\n\n\t* acinclude.m4 (GMP_C_DOUBLE_FORMAT): Use octal char constants in test\n\tprogram, hex is not supported by K&R.\n\n2003-06-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/mips64/divrem_1.asm: New file.\n\n2003-06-01  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/lshift.asm: Reformat code.\n\t* mpn/powerpc32/rshift.asm: Reformat code.\n\n2003-05-30  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/misc.c (tests_start): Set stdout and stderr to unbuffered, to\n\tavoid any chance of losing output on segv etc.\n\n2003-05-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/get_str.c: Move label `done' to match TMP_MARK and TMP_FREE.\n\tRemove redundant variable prec.\n\n2003-05-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/convert.c: Test bases up to 62.\n\n\t* tests/mpf/t-conv.c: Test bases up to 62.\n\n\t* demos/pexpr.c: Don't iterate to get accurate timing.\n\n\t* mpf/set_str.c (mpn_pow_1_highpart): Cleanup.\n\n\t* mp_dv_tab.c: Fix typo.\n\n\t* mpf/get_str.c: Rewrite (now sub-quadratic).\n\n2003-05-22  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpn/t-divrem_1.c: New file.\n\t* tests/mpn/Makefile.am: Add it.\n\n2003-05-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.sub: Recognize viac3* processors.\n\n2003-05-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/addmul_2.asm: New file.\n\n2003-05-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Recognize alphaev7* as alphaev67.\n\n\t* config.guess: Recognize viac3* processors.\n\t* configure.in: Set up path for viac3* processors.\n\t* acinclude.m4 (X86_PATTERN): Include viac3* processors.\n\n2003-05-19  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/freq.c (freq_pstat_getprocessor): New function.\n\t(freq_all): Use it.\n\t* configure.in (AC_CHECK_HEADERS): Add sys/pstat.h.\n\t(AC_CHECK_FUNCS): Add pstat_getprocessor.\n\n2003-05-15  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/mul_fft.c (mpn_mul_fft_decompose): Remove \"inline\",\n\tsince the code is a bit too big.  gcc doesn't actually inline when\n\talloca (TMP_ALLOC) is used anyway.\n\n2003-05-13  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Notes for Particular Systems): Libtool directory is .libs\n\tnot _libs for mingw dll.  Reported by Andreas Fabri.\n\n2003-05-07  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): Add code to generate sse2/xmm code\n\tfrom gcc -march=pentium4, to check the assembler supports that.\n\t(GMP_GCC_PENTIUM4_SSE2, GMP_OS_X86_XMM): New macros.\n\t* configure.in (pentium4-*-*): Use them to see if gcc -march=pentium4\n\t(with sse2) is ok.\n\n2003-05-06  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/com.c: Rate size==0 as UNLIKELY, fix comment to mpn_add_1.\n\n\t* tune/freq.c (<sys/sysinfo.h>): Include only when needed for\n\tgetsysinfo(), to avoid a problem with this file on AIX 5.1.\n\n2003-05-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/set_str.c: Do not ignore supposedly superfluous digits (in part\n\treverting last change).\n\n2003-05-03  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi: Use @code for files in @cindex entries, it looks nicer\n\tthan @file.\n\n\t* Makefile.am: Note gmp 4.1.1 and 4.1.2 version info.\n\n\t* configure.in, acinclude.m4 (GMP_CRAY_OPTIONS): New macro for Cray\n\tsystem setups, letting AC_REQUIRE do its job instead of a hard coded\n\tAC_PROG_EGREP.\n\n\t* config.guess: Amend fake RANDOM to avoid \". configfsf.guess\" which\n\tsegfaults on Debian \"ash\" 0.4.16.\n\n2003-05-01  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (AC_CHECK_FUNCS): Add getsysinfo.\n\t(AC_CHECK_HEADERS): Add sys/sysinfo.h and machine/hal_sysinfo.h.\n\t* tune/freq.c (freq_getsysinfo): New function.\n\t(freq_all): Use it.\n\t(freq_sysctlbyname_i586_freq, freq_sysctlbyname_tsc_freq,\n\tfreq_sysctl_hw_cpufrequency, freq_sysctl_hw_model): Set\n\tspeed_cycletime before trying to print it, when verbose.\n\n2003-04-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/set_str.c: Major overhaul.\n\t(mpn_pow_1_highpart): New helper function, meat extracted from\n\tmpf_set_str.\n\n2003-04-24  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_GCC_ARM_UMODSI): Quote result string against m4.\n\n\t* configure, ltmain.sh, aclocal.m4: Update to libtool 1.5.\n\n\t* longlong.h (add_ssaaaa) [all]: Remove first \"%\" commutative in each,\n\tsince gcc only supports one per asm.\n\n\t* printf/doprnt.c: Add M for mp_limb_t.\n\t* tests/misc/t-printf.c: Exercise this.\n\n\t* tests/mpz/t-cmp_d.c: Test infinities.\n\t* tests/mpf/t-cmp_d.c: New file.\n\t* tests/mpf/Makefile.am: Add it.\n\n\t* mpz/cmp_d.c, mpz/cmpabs_d.c, mpf/cmp_d.c: NaN invalid, Inf bigger\n\tthan any value.\n\t* mpz/set_d.c, mpq/set_d.c, mpf/set_d.c: Nan or Inf invalid.\n\n\t* configure.in (AC_CHECK_FUNCS): Add raise.\n\t* invalid.c: New file.\n\t* Makefile.am: Add it.\n\t* gmp-impl.h (__gmp_invalid_operation): Add prototype.\n\t(DOUBLE_NAN_INF_ACTION): New macro.\n\n\t* tests/trace.c, tests/tests.h (d_trace): New function.\n\t* tests/misc.c, tests/tests.h (tests_infinity_d): New function.\n\t* tests/misc.c (mpz_erandomb, mpz_errandomb): Use gmp_urandomm_ui.\n\n\t* tune/tuneup.c, tune/common.c, tests/devel/try.c: Cast various\n\tmp_size_t values for printf %ld in case mp_size_t==int.  Use\n\tgmp_printf for mp_limb_t values.\n\n\t* gmp.texi (Nomenclature and Types): Add mp_exp_t, mp_size_t,\n\tgmp_randstate_t.  Note ulong for bit counts and size_t for byte\n\tcounts.  Don't bother with @noindent.\n\t(Debugging): New valgrind is getting MMX/SSE.\n\t(Integer Comparisons): mpz_cmp_d and mpz_cmpabs_d on NaNs and Infs.\n\t(Float Comparison): mpf_cmp_d behaviour on NaNs and Infs.\n\t(Low-level Functions): Note with mpn_hamdist what hamming distance is.\n\t(Formatted Output Strings): Add type M.\n\t(Internals): Remove remarks on ulong bits and size_t bytes.  Move int\n\tfield remarks to ...\n\t(Integer Internals, Float Internals): ... here.\n\n2003-04-19  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (*sparc*-*-* ABI=32): Add umul to extra_functions.\n\n\t* mpn/x86/p6/mul_basecase.asm: New file.\n\n2003-04-18  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (m68060-*-*): Fallback to gcc -m68000 when -m68060 not\n\tavailable, and don't use mpn/m68k/mc68020 asm routines.  (Avoids 32x32\n\tmul and 64/32 div which trap to the kernel on 68060.  Advice by\n\tRichard Zidlicky.)\n\t* mpn/m68k/README: Update notes on directory usage.\n\n\t* tests/cxx/Makefile.am (TESTS_ENVIRONMENT): Add a hack to let the\n\ttest programs run with a shared libgmpxx on openbsd 3.2.\n\n\t* gmp.texi (Language Bindings): Add Guile.\n\n2003-04-12  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (cygwin*, mingw*, pw32*, os2*): Add\n\t-Wl,--export-all-symbols to GMP_LDFLAGS, no longer the default in\n\tlatest mingw and libtool.\n\n\t* acinclude.m4 (GMP_ASM_COFF_TYPE): New macro.\n\t* configure.in (x86s): Use it.\n\t* mpn/x86/x86-defs.m4 (COFF_TYPE): New macro.\n\t(PROLOGUE_cpu): Use it, for the benefit of mingw DLLs.\n\n\t* gmp-impl.h (mpn_copyi, mpn_copyd): Add __GMP_DECLSPEC.\n\n\t* gmp.texi (Known Build Problems): Remove windows test program .exe\n\trepeated built, fixed by new libtool.  Remove MacOS C++ shared library\n\tcreation, fixed by new libtool.\n\t(Notes for Package Builds, Known Build Problems): Remove DESTDIR notes\n\ton libgmpxx, fixed in new libtool.\n\n2003-04-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Match turbosparc.\n\t* config.guess: Recognize turbosparc (just for *bsd for now).\n\n2003-04-09  Kevin Ryde  <kevin@swox.se>\n\n\t* mpf/mul_ui.c [nails]: Call mpf_mul to handle v > GMP_NUMB_MAX.\n\n\t* tests/mpz/t-mul.c (main): Don't try FFT sizes when FFT disabled via\n\tMP_SIZE_T_MAX, eg. for nails.\n\n\t* tests/cxx/t-ternary.cc: Split up tests to help compile speed and\n\tmemory usage.\n\n\t* tests/devel/try.c: Print seed under -R, add -E to reseed, use ulong\n\tfor seed not uint.\n\n\t* gmp.texi: Add @: after various abbreviations, more index entries.\n\t(leftarrow): New macro, for non-tex.\n\t(Random State Initialization): Remove commented gmp_randinit_lc, not\n\tgoing to be implemented.\n\t(Random Number Algorithms): New section.\n\t(References): Add Matsumoto and Nishimura on Mersenne Twister, add\n\tBertot, Magaud and Zimmermann on GMP Square Root.\n\n2003-04-06  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpz/t-gcd_ui.c: New file.\n\t* tests/mpz/Makefile.am: Add it.\n\n\t* mpz/gcd_ui.c: Correction to return value on longlong limb systems,\n\tlimb might not fit a ulong.\n\n2003-04-04  Kevin Ryde  <kevin@swox.se>\n\n\t* configure, aclocal.m4, ltmain.sh: Update to libtool cvs snapshot\n\t2003-04-02.\n\n2003-04-02  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (*-*-cygwin*): No longer force lt_cv_sys_max_cmd_len,\n\tlibtool has addressed this now.\n\t(AC_PROVIDE_AC_LIBTOOL_WIN32_DLL): Remove this, libtool _LT_AC_LOCK\n\tno longer needs it.\n\n\t* acinclude.m4 (GMP_PROG_AR): Also set ac_cv_prog_AR and\n\tac_cv_prog_ac_ct_AR when adding flags to AR, so they're not lost by\n\tlibtool's call to AC_CHECK_TOOL.\n\n2003-04-01  Kevin Ryde  <kevin@swox.se>\n\n\t* configure, aclocal.m4, ltmain.sh: Update to libtool cvs snapshot\n\t2003-03-31.\n\n\t* configure.in (AC_PROG_F77): Add a dummy AC_PROVIDE to stop libtool\n\trunning F77 probes.\n\n\t* randlc2x.c (gmp_rand_lc_struct): Add comments about what exactly is\n\tin each field.\n\t(randseed_lc): Rename seedp to seedz to avoid confusion with seedp in\n\tthe lc function.  Suggested by Pedro Gimeno.\n\t(gmp_randinit_lc_2exp): Use __GMP_ALLOCATE_FUNC_TYPE.  No need for\n\t\"+1\" in mpz_init2 of _mp_seed.  Don't bother with mpz_init2 for _mp_a.\n\n2003-03-29  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (m68k-*-*): Use -O2, no longer need to fallback to -O.\n\t* acinclude.m4 (GMP_GCC_M68K_OPTIMIZE): Remove macro.\n\n\t* configure.in (AC_CHECK_TYPES): Add notes on why tested.\n\n\t* gmp.texi (GMPrefu, GMPpxrefu, GMPreftopu, GMPpxreftopu): New macros,\n\tuse them for all external references to get URLs into HTML output.\n\t(Random State Initialization): Add gmp_randinit_set.\n\t(Random State Miscellaneous): New section.\n\n2003-03-29  Kevin Ryde  <kevin@swox.se>\n\n\t* randbui.c, randmui.c: New files.\n\t* Makefile.am: Add them.\n\t* gmp-h.in (gmp_urandomb_ui, gmp_urandomm_ui): Add prototypes.\n\t* tests/rand/t-urbui.c, tests/rand/t-urmui.c: New files.\n\t* tests/rand/Makefile.am: Add them.\n\n\t* gmp-impl.h (gmp_randstate_srcptr): New typedef.\n\t(gmp_randfnptr_t): Add randiset_fn.\n\t* randiset.c: New file.\n\t* Makefile.am: Add it.\n\t* gmp-h.in (gmp_randinit_set): Add prototype.\n\t* randlc2x.c, randmt.c: Add gmp_randinit_set support.\n\t* tests/rand/t-iset.c: New file.\n\t* tests/rand/Makefile.am: Add it.\n\n\t* tests/misc.c, tests/tests.h (call_rand_algs): New function.\n\n2003-03-27  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/bin_uiui.c: Use plain \"*\" for kacc products rather than\n\tumul_ppmm since high not needed, except for an ASSERT now amended.\n\n2003-03-26  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/expr/exprfr.c (cbrt, cmpabs, exp2, gamma, nextabove,\n\tnextbelow, nexttoward): New functions.\n\t* demos/expr/t-expr.c: Exercise these.\n\n\t* mpfr/*: Update to mpfr cvs 2003-03-26.\n\n\t* gmp-impl.h (MPZ_REALLOC): Use UNLIKELY, to expect no realloc.\n\n\t* tune/time.c (cycles_works_p): Scope variables down to relevant part\n\tto avoid warnings about unused.\n\n\t* configfsf.guess, configfsf.sub: Update to 2003-02-22.\n\t* config.guess: Fake a $RANDOM variable when running configfsf.guess,\n\tto workaround a problem on m68k NetBSD 1.4.1.\n\n\t* mpz/fac_ui.c: Remove unused variable \"z1\".\n\n\t* tune/freq.c (freq_irix_hinv): Allow \"Processor 0\" line from IRIX 6.5.\n\n2003-03-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* randlc2x.c (randget_lc): Remove write-only variable rn.\n\t* mpf/eq.c: Remove write-only variable usign.\n\t* gen-psqr.c (main): Remove write-only variable numb_bits.\n\n2003-03-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* Makefile.am (libgmp_la_SOURCES): Add mp_dv_tab.c.\n\t(libmp_la_SOURCES): Add mp_dv_tab.c.\n\n\t* mpn/alpha/invert_limb.asm: Add a few comments.\n\n\t* mp_dv_tab.c: New file, defining __gmp_digit_value_tab.\n\n\t* mpz/set_str.c: Get rid of function digit_value_in_base and use table\n\t__gmp_digit_value_tab instead.\n\t* mpz/inp_str.c: Likewise.\n\t* mpf/set_str.c: Likewise.\n\t* mpbsd/min.c: Likewise.\n\t* mpbsd/xtom.c: Likewise.\n\n\t* mpz/set_str.c: Allow bases <= 62.  Return error for invalid bases.\n\t* mpz/inp_str.c: Likewise.\n\t* mpf/set_str.c: Likewise.\n\t* mpz/out_str.c: Likewise.\n\t* mpz/get_str.c: Likewise.\n\t* mpf/get_str.c: Likewise.\n\n\t* mpz/inp_str.c: Restucture to allocate more string space just\n\tbefore needed.\n\t* mpbsd/min.c: Likewise.\n\n\t* longlong.h (__udiv_qrnnd_c): Remove redundant casts.\n\t(32-bit sparc): Test HAVE_HOST_CPU_supersparc in addition to various\n\tsparc_v8 spellings.\n\n2003-03-17  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr cvs 2003-03-17.\n\n2003-03-15  Kevin Ryde  <kevin@swox.se>\n\n\t* Makefile.am (EXTRA_libgmp_la_SOURCES): Use this for TMP_ALLOC\n\tsources, instead of a libdummy.la.\n\n2003-03-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.guess: Recognize supersparc and microsparc for *BSD systems.\n\tGeneralize some superscalar recognition patterns.\n\n2003-03-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/udiv.asm: New file.\n\n2003-03-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64: Table cycle counts.  Update some comments.\n\n2003-03-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul.c (mpn_mul): Don't blindly expect\n\tMUL_KARATSUBA_THRESHOLD to be a constant.\n\n2003-03-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul.c (mpn_mul): New operand splitting code for\n\tavoiding cache misses when un >> MUL_KARATSUBA_THRESHOLD > vn.\n\t(MUL_BASECASE_MAX_UN): New #define, default to 500 for now.\n\n2003-03-07  Kevin Ryde  <kevin@swox.se>\n\n\t* Makefile.am: Put gmp.h and mp.h under $(exec_prefix)/include.\n\t* gmp.texi (Build Options): Add notes on this.\n\tReported by Vincent Lefèvre.\n\n2003-03-06  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (alpha*-*-* gcc): Add asm option before testing -mcpu,\n\tfor the benefit of gcc 2.9-gnupro-99r1 on alphaev68-dec-osf5.1 which\n\tdoesn't otherwise put the assembler in the right mode for -mcpu=ev6.\n\n2003-03-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/powerpc-defs.m4: Set up renaming for v registers.\n\n\t* mpz/powm.c (redc): Instead of repeated mpn_incr_u invocations,\n\taccumulate carries and add at the end.\n\t(mpz_powm): Trim tp allocation, now as redc doesn't need carry guard.\n\n2003-02-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/pentium4/copyd.asm: Correct header comment.\n\n\t* mpn/arm/addmul_1.asm: Correct cycle counts.\n\t* mpn/arm/submul_1.asm: Likewise.\n\n2003-02-20  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/factorize.c (factor_using_pollard_rho): Test k>0 to avoid\n\tinfinite loop if k=0 and gcd!=1 reveals a factor.  Reported by John\n\tPongsajapan.\n\n\t* gmp.texi, fdl.texi: Update to FDL version 1.2.\n\n2003-02-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/arm/mul_1.asm: Fix typo introduced in last change.\n\n2003-02-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/copyi.asm: Add some header comments.\n\t* mpn/sparc64/copyd.asm: Likewise.\n\n\t* mpn/arm/mul_1.asm: Put vl operand last for umull/umlal.\n\tAdd some header comments.\n\t* mpn/arm/addmul_1.asm: Rewrite.\n\t* mpn/arm/submul_1.asm: Rewrite.\n\t* mpn/arm/gmp-mparam.h: Retune.\n\n2003-02-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/arm/copyi.asm: New file.\n\t* mpn/arm/copyd.asm: New file.\n\n2003-02-16  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_C_DOUBLE_FORMAT): Tolerate incorrect last data\n\tbyte seen on an arm system.\n\n2003-02-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/arm/gmp-mparam.h: Retune.\n\n2003-02-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/750/com_n.asm: Add more cycle counts.\n\n2003-02-13  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (AC_PREREQ): Bump to 2.57.\n\n\t* configure.in, acinclude.m4 (GMP_GCC_WA_OLDAS): New macro, applying\n\t-Wa,-oldas only when necessary.\n\n\t* configure.in (powerpc*-*-*): Don't use -Wa,-mppc with gcc, it\n\toverrides options recent gcc adds for -mcpu, making generated code\n\tfail to assemble.\n\n\t* tune/tuneup.c (mpn_fft_table): Remove definition, it's in mul_fft.c.\n\n2003-02-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/pentium4/sse2/gmp-mparam.h: Retune.\n\t* mpn/x86/k7/gmp-mparam.h: Retune.\n\t* mpn/x86/k6/gmp-mparam.h: Retune.\n\t* mpn/x86/p6/gmp-mparam.h: Retune.\n\t* mpn/x86/p6/mmx/gmp-mparam.h: Retune.\n\n\t* tests/mpz/t-mul.c (main): Rewrite FFT testing code.\n\n2003-02-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.guess: Recognize \"power2\" systems.\n\n\t* mpn/powerpc64/gmp-mparam.h: Fix indentation.\n\t* mpn/power/gmp-mparam.h: Retune.\n\t* mpn/alpha/ev6/nails/gmp-mparam.h: Retune.\n\t* mpn/sparc64/gmp-mparam.h: Retune.\n\t* mpn/pa64/gmp-mparam.h: Retune.\n\t* mpn/sparc32/v8/supersparc/gmp-mparam.h: Retune.\n\t* mpn/sparc32/v8/gmp-mparam.h: Retune.\n\t* mpn/mips64/gmp-mparam.h: Retune.\n\t* mpn/alpha/ev6/gmp-mparam.h: Retune.\n\t* mpn/powerpc32/gmp-mparam.h: Retune.\n\t* mpn/powerpc32/750/gmp-mparam.h: Retune.\n\t* mpn/alpha/ev5/gmp-mparam.h: Retune.\n\t* mpn/m68k/gmp-mparam.h: Retune.\n\t* mpn/cray/gmp-mparam.h: Set GET_STR_PRECOMPUTE_THRESHOLD.\n\n\t* configure.in: Undo this, problem doesn't happen any more:\n\t(mips64*-*-*): Pass just -O1 to cc, to work around compiler bug.\n\n2003-02-03  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (MPN_NORMALIZE, MPN_NORMALIZE_NOT_ZERO): Add parens\n\taround macro parameters.  Reported by Jason Moxham.\n\n2003-02-01  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Low-level Functions): No overlap permitted by mpn_mul_n.\n\tReported by Jason Moxham.\n\t(Formatted Input Strings): Correction to strtoul cross reference\n\tformatting.\n\t(BSD Compatible Functions): Add index entry for MINT.\n\n2003-01-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (mpn_mul_fft): Now returns int.\n\n2003-01-29  Paul Zimmermann  <Paul.Zimmermann@loria.fr>\n\n\t* mpn/generic/mul_fft.c: Major rewrite.\n\n2003-01-25  Kevin Ryde  <kevin@swox.se>\n\n\t* config.guess (powerpc*-*-*): Remove $dummy.core file when mfpvr\n\tfails on NetBSD.\n\t(trap): Remove $dummy.core on abnormal termination too.\n\n\t* mpfr/*: Update to mpfr cvs 2003-01-25.\n\n2003-01-18  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr cvs 2003-01-18.\n\n2003-01-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp.texi: Canonicalize URLs.\n\n2003-01-15  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Notes for Particular Systems): Add hardware floating point\n\tprecision mode.\n\n\t* mpfr/*, configure, aclocal.m4, config.in: Update to mpfr cvs\n\t2003-01-15.\n\n2003-01-11  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to mpfr cvs 2003-01-11.\n\n2003-01-09  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/get_str.c: Update to mpfr cvs 2003-01-09.\n\n\t* doc/configuration: Various updates.\n\n2003-01-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/copyi.asm: Avoid `nop' mnemonic, unsupported on Cray.\n\t* mpn/alpha/copyd.asm: Likewise.\n\n2003-01-05  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/expr/t-expr.c (check_r): Tolerate mpfr_set_str new return\n\tvalue.\n\n\t* configure, aclocal.m4 (*-*-osf4*, *-*-osf5*): Regenerate with\n\tlibtool patch to avoid bash printf option problem when building shared\n\tlibraries with cxx.\n\n\t* configure.in (pentium4-*-*): Use \"-march=pentium4 -mno-sse2\" since\n\tsse2 causes buggy code from gcc 3.2.1 and is only supported on new\n\tenough kernels.\n\n\t* acinclude.m4 (GMP_PROG_NM): Add some notes about failures, per\n\treport by Krzysztof Kozminski.\n\n\t* gmp-h.in (mpz_mdivmod_ui, mpz_mmod_ui): Add parens around \"r\".\n\n\t* gmp-h.in (__GMP_CAST): New macro, clean to g++ -Wold-style-cast.\n\t(GMP_NUMB_MASK, mpz_cmp_si, mpq_cmp_si, mpz_odd_p, mpn_divexact_by3,\n\tmpn_divmod): Use it.  Reported by Krzysztof Kozminski.\n\t(mpz_odd_p): No need for the outermost cast to \"int\".\n\t* tests/cxx/t-cast.cc: New file.\n\t* tests/cxx/Makefile.am: Add it.\n\n2003-01-04  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/set_str.c: Update to mpfr cvs 2003-01-04.\n\n\t* demos/expr/exprfra.c (e_mpfr_number): Tolerate recent mpfr_set_str\n\treturning count of characters accepted.\n\n2003-01-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/copyi.asm: New file.\n\t* mpn/alpha/copyd.asm: New file.\n\n2003-01-03  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/expr/t-expr.c: Use __gmpfr on some mpfr internals that have\n\tchanged.\n\n\t* mpfr/*, aclocal.m4, config.in, configure: Update to mpfr cvs\n\t2003-01-03.\n\n\t* gmp.texi (Introduction to GMP): Mention release announcements\n\tmailing list, and put home page and ftp before mailing lists.\n\n2002-12-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul_fft.c (mpn_fft_next_size): Simplify.\n\n2002-12-28  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (M68K_PATTERN): New macro.\n\t(GMP_GCC_M68K_OPTIMIZE): Use it to avoid m6811 and friends.\n\t* configure.in: Ditto.\n\n\t* tests/mpz/t-import.c, tests/mpz/t-export.c: Use '\\xHH' to avoid\n\twarnings about char overflows.\n\t* acinclude.m4 (GMP_C_DOUBLE_FORMAT): Ditto.\n\n2002-12-28  Pedro Gimeno  <pggimeno@wanadoo.es>\n\n\t* randmt.c (randseed_mt, default_state): Fix off-by-one bug on padding.\n\t(randseed_mt): Add ASSERT checking result of mpz_export.\n\n2002-12-24  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Integer Import and Export): Clarify treatment of signs,\n\treported by Kent Boortz.\n\n\t* randmt.c: Use gmp_uint_least32_t.\n\t(randseed_mt): Add nails to mpz_export in case mt[i] more than 32 bits.\n\n\t* gmp-impl.h (gmp_uint_least32_t): New typedef, replacing GMP_UINT32.\n\t* configure.in (AC_CHECK_TYPES): Add uint_least32_t.\n\t(AC_CHECK_SIZEOF): Add unsigned short.\n\n2002-12-22  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (ULONG_PARITY) [generic C]: Mask result to a single bit.\n\t(ULONG_PARITY) [_CRAY, __ia64]: New macros.\n\t* tests/t-parity.c: New test.\n\t* tests/Makefile.am (check_PROGRAMS): Add it.\n\n\t* longlong.h (count_trailing_zeros) [ia64]: New macro.\n\n\t* tests/t-count_zeros.c (check_various): Remove unused variable \"n\".\n\n\t* mpn/x86/README: Revise notes on PIC, PLT and GOT.\n\n\t* demos/perl/GMP.xs, demos/perl/GMP.pm, demos/perl/test.pl: Add \"mt\"\n\tto GMP::Rand::randstate.\n\n2002-12-22  Pedro Gimeno  <pggimeno@wanadoo.es>\n\n\t* randmt.c (randseed_mt): Fix bug that might cause the generator to\n\treturn all zeros with certain seeds.  Fix WARM_UP==0 case.\n\t(gmp_randinit_mt): Initialize to a known state by default.\n\t(randget_mt): Remove check for uninitialized buffer: no longer needed.\n\t(recalc_buffer): Use ?: instead of two-element array.\n\n\t* tests/rand/t-mt.c: New test.\n\t* tests/rand/Makefile.am (check_PROGRAMS): Add it.\n\n2002-12-21  Kevin Ryde  <kevin@swox.se>\n\n\t* cxx/osdoprnti.cc: Use <cstdarg> and <cstring> rather than <stdarg.h>\n\tand <string.h>.  No need for <stdio.h>.\n\n\t* demos/expr/expr.c, demos/expr/exprfa.c, demos/expr/exprfra.c,\n\tdemos/expr/exprza.c: Use mp_get_memory_functions, not\n\t__gmp_allocate_func etc.\n\t* demos/expr/t-expr.c: Don't use gmp-impl.h.\n\t(numberof): New macro.\n\n\t* gmp-h.in, gmp-impl.h (__gmp_allocate_func, __gmp_reallocate_func,\n\t__gmp_free_func): Move declarations to gmp-impl.h\n\n\t* mp_get_fns.c: New file.\n\t* Makefile.am (libgmp_la_SOURCES, libmp_la_SOURCES): Add it.\n\t* gmp-h.in (mp_get_memory_functions): Add prototype.\n\t* gmp.texi (Custom Allocation): Add mp_get_memory_functions, refer to\n\t\"free\" not \"deallocate\" function.\n\t* gmpxx.h (struct __gmp_alloc_cstring): Use mp_get_memory_functions,\n\tnot __gmp_free_func.\n\n\t* gmp-impl.h [__cplusplus]: Add <cstring> for strlen.\n\t(gmp_allocated_string): Hold length in a field.\n\t* cxx/osdoprnti.cc, cxx/osmpf.cc: Use this.\n\n2002-12-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-perfsqr.c (check_sqrt): Print more variables upon\n\tfailure.\n\n\t* mpn/generic/rootrem.c: In Newton loop, pad qp with leading zero.\n\n2002-12-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/rootrem.c: Allocate 1.585 (log2(3)) times more space\n\tfor pp temporary to allow for worst case overestimate of root.\n\tAdd some asserts.\n\n\t* tests/mpz/t-root.c: Generalize and speed up.\n\n2002-12-19  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/cxx/t-rand.cc (check_randinit): Add gmp_randinit_mt test.\n\n\t* gmp-h.in: Don't bother trying to support Compaq C++ in pre-standard\n\tI/O mode.\n\t* gmp.texi (Notes for Particular Systems): Compaq C++ must be used in\n\t\"standard\" iostream mode.\n\n2002-12-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/mod_34lsub1.asm: Add code for big-endian, using existing\n\tlittle-endian code only if HAVE_LIMB_LITTLE_ENDIAN is defined.\n\n2002-12-18  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (HAVE_LIMB_BIG_ENDIAN, HAVE_LIMB_LITTLE_ENDIAN): New\n\tdefines in config.m4.\n\n2002-12-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* printf/printffuns.c (gmp_fprintf_reps): Make it actually work\n\tfor padding > 256.\n\n2002-12-17  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/freq.c: Add <string.h> for memcmp.\n\n\t* mpz/pprime_p.c: Use MPN_MOD_OR_MODEXACT_1_ODD.\n\n\t* gmp.texi (Formatted Output Strings): %a and %A are C99 not glibc.\n\t(Formatted Input Strings): Type \"l\" is for double too.  Hex floats are\n\taccepted for mpf_t.\n\t(Formatted Input Functions): Describe tightened parse rule, clarify\n\treturn value a bit.\n\n\t* scanf/doscan.c: Add hex floats, tighten matching to follow C99, for\n\tinstance \"0x\" is no longer acceptable to \"%Zi\".\n\tRename \"invalid\" label to avoid \"invalid\" variable, SunOS cc doesn't\n\tlike them the same.\n\t* tests/misc/t-scanf.c: Update tests.\n\t* tests/misc/t-locale.c (check_input): Don't let \"0x\" appear from fake\n\tdecimal point.\n\n\t* config.guess (sparc*-*-*): Look at BSD sysctl hw.model to recognise\n\tultrasparcs.\n\n\t* mpfr/tests/dummy.c: New file.\n\t* mpfr/tests/Makefile.am (libfrtests_a_SOURCES): Add it.\n\n2002-12-14  Kevin Ryde  <kevin@swox.se>\n\n\t* mpbsd/Makefile.am (nodist_libmpbsd_la_SOURCES): Move these mpz\n\tsources to libmpbsd_la_SOURCES directly, automake 1.7.2 now gets the\n\tansi2knr setups right for sources in other directories.\n\n\t* mpfr/tests/Makefile.am: Add libfrtests.a in preparation for new mpfr.\n\n2002-12-13  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/Makefile.am (mpfr_TEXINFOS, AM_MAKEINFOFLAGS): Allow for\n\tfdl.texi in recent mpfr.\n\n\t* configure.in (AC_PROG_EGREP): Ensure this is run outside the Cray\n\tconditional AC_EGREP_CPP.\n\n\t* configure.in (alpha*-*-*): Use gcc -Wa,-oldas if it works, to avoid\n\tproblems with new compaq \"as\" on OSF 5.1.\n\n\t* mpn/Makefile.am (EXTRA_DIST): Remove Makeasm.am, automake 1.7.2 does\n\tit automatically.\n\n\t* acinclude.m4 (AC_LANG_FUNC_LINK_TRY(C)): Remove this hack, fixed by\n\tautoconf 2.57.\n\n\t* configure.in (AC_CONFIG_LIBOBJ_DIR): Set to mpfr, for the benefit of\n\tnew mpfr using LIBOBJ.\n\n\t* configure.in: (AM_INIT_AUTOMAKE): Use \"gnu no-dependencies\n\t$(top_builddir)/ansi2knr\".\n\t* */Makefile.am (AUTOMAKE_OPTIONS): Remove, now in configure.in.\n\n\t* configure, config.in, INSTALL.autoconf: Update to autoconf 2.57.\n\t* */Makefile.in, configure, aclocal.m4, install-sh, mkinstalldirs:\n\tUpdate to automake 1.7.2.\n\n\t* gmp.texi (Build Options): Add hppa64 to cpu types.\n\t(ABI and ISA): Add gcc to hppa 2.0.\n\t(Debugging): Add maximum debuggability config options.\n\t(Language Bindings): Add Arithmos, reported by Johan Vervloet.\n\t(Formatted Output Strings): 128 bits is about 40 digits, ll is only\n\tfor long long not long double.\n\t(Formatted Input Strings): ll is only for long long not long double.\n\n\t* mpz/divis.c, mpz/divis_ui.c, mpz/cong.c, mpz/cong_ui.c: Allow d=0,\n\tunder the rule n==c mod d iff exists q satisfying n=c+q*d.\n\t* gmp.texi (Integer Division): Describe this.\n\tSuggested by Jason Moxham.\n\n2002-12-13  Pedro Gimeno  <pggimeno@wanadoo.es>\n\n\t* randlc2x.c (lc): Remove check for seedn < an, which is now\n\tsuperfluous.  Add ASSERT to ensure it's correct.  Add ASSERT to check\n\tprecondition of __GMPN_ADD.\n\t(gmp_randinit_lc_2exp): Avoid reallocation by allocating one extra bit\n\tfor both seed and a.  Simplify seedn < p->_cn case.\n\n\t* tests/rand/t-lc2exp.c (check_bigs): Test negative seeds.\n\n2002-12-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa32/pa-defs.m4 (PROLOGUE_cpu): Zap spurious argument to `.proc'.\n\tAdd empty `.callinfo'.\n\n2002-12-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/pentium4/sse2/addmul_1.asm: Don't reuse `ret' symbol for a\n\tlabel.\n\n2002-12-11  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (hppa*-*-*): Don't use gcc -mpa-risc-2-0 in ABI=1.0.\n\n\t* mpn/pa32/pa-defs.m4: New file, arranging for .proc/.procend.\n\t* configure.in (hppa*-*-*): Use it.\n\n\t* printf/doprnt.c: Comments on \"ll\" versus \"L\".\n\n\t* tests/mpz/t-div_2exp.c: Reduce tests, especially the random ones.\n\n2002-12-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/get_d.c (limb2dbl): New macro for conversion to `double'.\n\tDefine it to something non-trivial for 64-bit hppa.\n\t* mpq/get_d.c: Likewise.\n\t* mpf/get_d.c: Likewise.\n\n\t* mpn/x86/pentium4/sse2/addmul_1.asm: Unroll to save one c/l.\n\n2002-12-09  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/Makefile.am: Don't use -static under --disable-static, it tends\n\tnot to work.\n\t* configure.in (ENABLE_STATIC): New AM_CONDITIONAL.\n\n\t* gmp-h.in: Use <iostream> instead of <iosfwd> with Compaq C++ in\n\tpre-standard I/O mode.\n\n\t* tests/mpz/t-jac.c, tests/mpz/t-scan.c: Reduce tests.\n\n2002-12-08  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (*-*-ultrix*): Remove forcible --disable-shared,\n\tbelieve this was a generic problem with libtool, now gone.\n\n2002-12-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (USE_LEADING_REGPARM): Disable for PIC code generation.\n\n2002-12-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/cxx/t-misc.cc (check_mpq): Use 0/1 for canonical 0 in\n\tmpq_cmp_ui calls.\n\n\t* configure.in (hppa2.0*-*-*): Pass +O2 instead of +O3 to work around\n\tcompiler bug with mpfr/tests/tdiv.\n\n2002-12-07  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (hppa2.0*-*-* ABI=2.0n): Make -mpa-risc-2-0 optional.\n\tNew hppa-level-2.0 test using GMP_HPPA_LEVEL_20 to detect assembler\n\tsupport for 2.0n.\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): Add code that provokes an error\n\tfrom gcc -mpa-risc-2-0 if the assembler doesn't know 2.0 instructions.\n\t(GMP_HPPA_LEVEL_20): New macro.\n\n2002-12-07  Pedro Gimeno  <pggimeno@wanadoo.es>\n\n\t* gmp-impl.h (gmp_randfnptr_t.randseed_fn) Return void.\n\t(LIMBS_PER_ULONG, MPN_SET_UI): New macros.\n\t(MPZ_FAKE_UI): Rename couple of parameters.\n\n\t* randlc2x.c (gmp_rand_lc_struct): _mp_c and _mp_c_limbs replaced\n\twith mpn style _cp and _cn.  All callers changed.\n\t(randseed_lc): Fix limbs(seed) > bits_to_limbs(m2exp) case.\n\tRemove return value.\n\t(gmp_randinit_lc_2exp): Attempt to avoid redundant reallocation.\n\n\t* randmt.c (mangle_seed): New function by Kevin.\n\t(randseed_mt): Use it instead of mpz_powm, for performance.  Remove\n\treturn value.  Remove commented out code (an inferior alternative to\n\tmpz_export).\n\n\t* randsdui.c (gmp_randseed_ui): Use MPZ_FAKE_UI.\n\n\t* tests/rand/t-lc2exp.c (check_bigm, check_bigs): New tests.\n\t* tests/rand/t-urndmm.c: Add L to constants in calls, for K&R.\n\n2002-12-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Remove -g.\n\t(hppa*-*-*): Pass -Wl,+vnocompatwarnings with +DA2.0.\n\n2002-12-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa64/sqr_diagonal.asm: Remove .entry, .proc, .procend.\n\t* mpn/pa64/udiv.asm: Likewise.\n\n2002-12-05  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/pa64/sub_n.asm: Remove space in \"sub, db\" which gas objects to.\n\t* mpn/pa64/*.asm, tune/hppa2.asm: Use \".level 2.0\" for 2.0n, since gas\n\tdoesn't like \".level 2.0N\".\n\n\t* configure.in (hppa*-*-*): Group path and flags choices, for clarity.\n\t(hppa1.0*-*-*): Use gcc -mpa-risc-1-0 when available.\n\t(hppa2.0*-*-*): Ditto -mpa-risc-2-0.\n\t(*-*-hpux*): Exclude ABI=2.0w for hpux[1-9] and hpux10, rather than\n\tthe converse of allowing it for hpux1[1-9]; ie. list the bad systems\n\trather than try to guess the good systems.\n\t(hppa2.0*-*-*) [ABI=2.0n ABI=2.0w]: Add gcc to likely compilers.\n\t(hppa*-*-*) [gcc]: Test sizeof(long) to differentiate a 32-bit or\n\t64-bit build of the compiler.\n\t(hppa64-*-*): Add this as equivalent to hppa2.0-*-*.\n\t* acinclude.m4 (GMP_C_TEST_SIZEOF): New macro.\n\n\t* tests/tests.h (ostringstream::str): Must null-terminate\n\tostrstream::str() for the string constructor.\n\n2002-12-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa32/hppa1_1/udiv.asm: Don't wrap symbol to INT64 in L() stuff.\n\n\t* longlong.h (mpn_udiv_qrnnd_r based udiv_qrnnd): Fix typo.\n\n\t* mpn/powerpc32/powerpc-defs.m4: Define float registers with `f'\n\tprefix.\n\n2002-12-04  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Floating-point Functions): Note the mantissa is binary and\n\tdecimal fractions cannot be represented exactly.  Suggested by Serge\n\tWinitzki.\n\t(Known Build Problems): Note libtool stripping options when linking.\n\tReported by Vincent Lefevre.\n\n\t* acinclude.m4 (GMP_ASM_LABEL_SUFFIX): Don't make an empty result a\n\tfailure, that's a valid result.\n\t(GMP_ASM_GLOBL): Establish this from the host cpu type.\n\t(IA64_PATTERN): New macro.\n\t(GMP_PROG_EXEEXT_FOR_BUILD, GMP_C_FOR_BUILD_ANSI,\n\tGMP_CHECK_LIBM_FOR_BUILD): Remove temporary files created.\n\t* configure.in: Use IA64_PATTERN.\n\n2002-12-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/hppa.asm: Use config.m4.\n\t* tune/hppa2.asm: Likewise.\n\t* tune/hppa2w.asm: Likewise.\n\n\t* mpn/pa64: Use LDEF.\n\n2002-12-03  Kevin Ryde  <kevin@swox.se>\n\n\t* INSTALL: Use return rather than exit in the example programs.\n\tSuggested by Richard Dawe.\n\n\t* gmp.texi (Build Options): Move non-unix notes to ...\n\t(Notes for Particular Systems): ... here.  Mention MS Interix,\n\treported by Paul Leyland.\n\t(C++ Interface Random Numbers): Add gmp_randinit_mt to examples.\n\n\t* acinclude.m4 (GMP_ASM_LABEL_SUFFIX): Must test empty suffix first,\n\tfor the benefit of hppa hp-ux.\n\t(GMP_ASM_UNDERSCORE): Grep the output of \"nm\" instead of trying to\n\tconstruct an asm file, and in case of failure fallback on no\n\tunderscore and a warning.\n\n\t* longlong.h (count_leading_zeros, count_trailing_zeros) [ev67, ev68]:\n\tRestrict __asm__ ctlz and cttz to __GNUC__.\n\n\t* gen-psqr.c (HAVE_CONST, const): New macros.\n\n\t* tests/cxx/t-rand.cc (check_randinit): Add gmp_randinit_mt.\n\n2002-12-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h: Split popc_limb again, combined version gives too many\n\tcompiler warnings.\n\n2002-12-01  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/gcdext.c (div1): Disable unused function.\n\n\t* mpz/root.c: Don't include stdlib.h or longlong.h.\n\t* mpz/rootrem.c: Likewise.\n\n\t* extract-dbl.c: abort => ASSERT_ALWAYS.\n\t* mpz/set_d.c: Likewise.\n\t* mpn/generic/tdiv_qr.c: Likewise.\n\n\t* gen-psqr.c (f_cmp_fraction, f_cmp_divisor): Change parameter to\n\t`const void *', to match qsort spec.\n\n2002-12-01  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Integer Division): Fix a couple of @math's for tex.\n\tUse @dots in more places.\n\n\t* tests/cxx/t-locale.cc: Test non std::locale systems too.\n\t* tests/cxx/clocale.c: New file, reinstating what was localeconv.c,\n\tand subverting nl_langinfo too.\n\t* tests/cxx/Makefile.am (t_locale_SOURCES): Add it.\n\n\t* tests/tests.h (ostringstream, istringstream): Provide fakes of these\n\tif <sstream> not available.\n\t* tests/cxx/t-locale.cc, tests/cxx/t-ostream.cc: Remove <sstream>.\n\t* configure.in (AC_CHECK_HEADERS) [C++]: Add <sstream>.\n\n2002-11-30  Torbjorn Granlund  <tege@swox.com>\n\n\t* printf/doprnt.c (__gmp_doprnt): Comment out a `break' to shut up\n\tcompiler warnings.\n\n\t* longlong.h (umul_ppmm) [ia64]: Form both product parts in asm.\n\n\t* mpz/bin_uiui.c: Cast umul_ppmm operands.\n\n\t* scanf/doscan.c (gmpscan): Remove unused label store_get_digits.\n\n\t* gmp-impl.h: #undef MIN and MAX before #defining.\n\n2002-11-30  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (AC_CHECK_HEADERS): Add nl_types.h.\n\t* tests/misc/t-locale.c: Use this, for nl_item on netbsd 1.4.1.\n\n2002-11-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/devel/addmul_1.c: Provide prototype for mpn_print.\n\t(OPS): Account for function overhead.\n\t* tests/devel/{submul_1.c,mul_1.c,add_n.c,sub_n.c}: Likewise.\n\n2002-11-28  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Use this rather\n\tthan libdummy.\n\t* tests/Makefile.am (EXTRA_libtests_la_SOURCES): Use this for\n\tx86call.asm and x86check.c rather than libdummy.\n\n2002-11-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-mul.c: Implement reference Karatsuba multiplication.\n\tRewrite testing scheme to run fewer really huge tests.\n\n2002-11-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests: Decrease repetition count for some of the slowest tests.\n\n2002-11-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpfr/tests/tdiv.c: Decrease number of performed tests.\n\n2002-11-23  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/ia64/README: Add some references.\n\n\t* gmp.texi (Build Options): Add itanium and itanium2, mention DocBook\n\tand XML from makeinfo, add texinfo top level cross reference.\n\t(Integer Division): Try to clarify 2exp functions a bit.\n\t(C++ Interface Floats): Giving bad string to constructor is undefined.\n\t(C++ Interface Integers, C++ Interface Rationals): Ditto, and show\n\tdefault base in prototype, not the description.\n\n\t* config.sub, config.guess, configure.in (itanium, itanium2): New cpu\n\ttypes.\n\n\t* tests/misc/t-printf.c, tests/misc/t-scanf.c (check_misc): Suppress\n\t%zd test on glibc prior to 2.1, it's not supported.\n\n2002-11-20  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/cxx/t-locale.cc: Test with locales imbued into stream, use\n\t<sstream>, eliminated some C-isms.  istream tests disabled, not yet\n\tlocale-ized.\n\t* tests/cxx/Makefile.am (t_locale_SOURCES): Remove localeconv.c.\n\t* tests/cxx/localeconv.c: Remove file.\n\n\t* configure.in (AC_CHECK_TYPES) [C++]: Add std::locale.\n\t* printf/doprntf.c: Add decimal point parameter, remove localeconv use.\n\t* gmp-impl.h (__gmp_doprnt_mpf): Update prototype, bump symbol to\n\t__gmp_doprnt_mpf2 to protect old libgmpxx.\n\t* cxx/osmpf.cc: Use this with ostream locale decimal_point facet.\n\t* printf/doprnt.c: Ditto, with GMP_DECIMAL_POINT.\n\n\t* gmp-h.in: More comments on __declspec for windows DLLs.\n\n\t* mpf/set_str.c, scanf/doscan.c: Cast through \"unsigned char\" for\n\tdecimal point string, same as input chars.\n\n\t* configure.in (AC_CHECK_HEADERS): Add langinfo.h.\n\t(AC_CHECK_FUNCS): Add nl_langinfo.\n\t* gmp-impl.h (GMP_DECIMAL_POINT): New macro.\n\t* mpf/out_str.c, mpf/set_str.c, scanf/doscan.c: Use it, and don't\n\tbother with special code for non-locale systems.\n\t* tests/misc/t-locale.c: Subvert nl_langinfo too.\n\n\t* configure.in, acinclude.m4 (GMP_ASM_X86_GOT_UNDERSCORE): New macro.\n\t* mpn/x86/x86-defs.m4 (_GLOBAL_OFFSET_TABLE_): New macro, inserting\n\textra underscore for OpenBSD.\n\t* mpn/x86/README (_GLOBAL_OFFSET_TABLE_): Update notes.\n\tReported by Christian Weisgerber.\n\n\t* tests/cxx/t-rand.cc (check_randinit): New function, collecting up\n\tconstructor tests.\n\n\t* tests/cxx/t-ostream.cc: Use <sstream> instead of <strstream>, use\n\tcompare instead of strcmp.\n\n\t* gmpxx.h (__gmp_randinit_lc_2exp_size_t): Return type is int.\n\n2002-11-18  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/speed.c (r_string): Use CNST_LIMB with <N>bits, spotted by\n\tTorbjorn.\n\n2002-11-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/popham.c: New file, using new faster algorithm.\n\t* mpn/generic/popcount.c: Remove.\n\t* mpn/generic/hamdist.c: Remove.\n\n\t* mpn/ia64/addmul_1.asm: Don't clobber callee-saves register f16.\n\t* mpn/ia64/mul_1.asm: Likewise.\n\n\t* mpn/ia64/addmul_1.asm: Add pred.rel declarations.  Resolve RAW\n\thazards for condition code registers, duplicating code as needed.  Add\n\tprediction to all branches.\n\t* mpn/ia64/mul_1.asm: Likewise.\n\t* mpn/ia64/add_n.asm: Likewise.\n\t* mpn/ia64/sub_n.asm: Likewise.\n\t* mpn/ia64/copyi.asm: Likewise.\n\t* mpn/ia64/copyd.asm: Likewise.\n\n\t* mpn/generic/random2.c: Add a cast to silence some compilers.\n\n2002-11-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/powm.c: Cap allocation by limiting k to 10 (512 precomputed\n\tvalues).\n\n2002-11-16  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in, gmp.texi: Remove powerpc64 ABI=32L, doesn't work and\n\tis unlikely to ever do so.\n\t* configure.in: Allow ABI=32 for powerpc64.\n\tReported by David Edelsohn.\n\n2002-11-14  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/Makefile.am (nodist_libdummy_la_SOURCES): Add addmul_2.c\n\taddmul_3.c addmul_4.c addmul_5.c addmul_6.c addmul_7.c addmul_8.c.\n\n\t* gmp-h.in (__GMP_DECLSPEC_EXPORT, __GMP_DECLSPEC_IMPORT) [__GNUC__]:\n\tUse __dllexport__ and __dllimport__ to keep out of application\n\tnamespace.\n\n2002-11-14  Gerardo Ballabio <gerardo.ballabio@unimib.it>\n\n\t* gmpxx.h (__gmp_randinit_default_t, __gmp_randinit_lc_2exp_t,\n\t__gmp_randinit_lc_2exp_size_t): Use extern \"C\" { typedef ... }, for\n\tthe benefit of g++ prior to 3.2.\n\n2002-11-12  Kevin Ryde  <kevin@swox.se>\n\n\t* gmpxx.h (gmp_randclass constructors): Patch from Roberto Bagnara to\n\tuse extern \"C\" on C function pointer arguments.\n\n2002-11-09  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in, Makefile.am, printf/Makefile.am,\n\tprintf/repl-vsnprintf.c: Handle vsnprintf replacement with C\n\tconditionals.\n\n\t* acinclude.m4 (AC_LANG_FUNC_LINK_TRY(C)): Workaround troubles recent\n\tHP cc +O3 causes for AC_CHECK_FUNCS.\n\n\t* gmp.texi (Notes for Particular Systems): Add Sparc app regs.\n\t(Debugging): Note gcc -fstack options to detect overflow.\n\t(Formatted Output Strings, Formatted Input Strings): Format strings\n\tare not multibyte.\n\n2002-11-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/tdiv_qr.c: Remove a bogus assert.\n\n2002-11-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/tdiv_qr.c: Remove two dead mpn_divrem_2 calls.\n\n2002-11-04  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_C_INLINE): Don't define \"inline\" for C++.\n\n\t* demos/expr/expr-impl.h (stdarg.h): Test __DECC same as gmp.h.\n\n\t* mpbsd/mtox.c, printf/obprintf.c, printf/obvprintf.c,\n\tscanf/vsscanf.c, demos/expr/expr.c, demos/expr/exprf.c,\n\tdemos/expr/exprfa.c, demos/expr/exprfr.c, demos/expr/exprq.c,\n\tdemos/expr/exprz.c, demos/expr/exprza.c: Add <string.h> for strlen and\n\tmemcpy.\n\n2002-11-02  Kevin Ryde  <kevin@swox.se>\n\n\t* longlong.h: Test __x86_64__ not __x86_64.  Reported by Andreas\n\tJaeger.\n\n\t* mpz/import.c, mpz/export.c: Use char* subtract from NULL to get\n\tpointer alignment, for the benefit of Cray vector systems.\n\n\t* cxx/ismpf.cc: Use <clocale>.\n\t* tests/cxx/t-locale.cc: No need to conditionalize <clocale>.\n\n\t* scanf/doscan.c: Don't use isascii, rely on C99 ctype.h.\n\n\t* gmp.texi (Build Options): Describe CC_FOR_BUILD, cross reference\n\ttexinfo manual.\n\t(ABI and ISA): Add powerpc620 and powerpc630 to powerpc64, add NetBSD\n\tand OpenBSD sparc64.\n\t(Notes for Package Builds): Cross reference libtool manual.\n\t(Notes for Particular Systems): Add OpenBSD to non-MMX versions of gas.\n\t(Known Build Problems): Add MacOS X C++ shared libraries.\n\n2002-10-31  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h, tune/speed.c, tune/speed.h, tune/common.c, tune/many.pl,\n\ttests/devel/try.c, tests/tests.h, tests/refmpn.c (mpn_addmul_5,\n\tmpn_addmul_6, mpn_addmul_7, mpn_addmul_8): Add testing and measuring.\n\t* configure.in (config.in): Add #undefs of HAVE_NATIVE_mpn_addmul_5,\n\tHAVE_NATIVE_mpn_addmul_6, HAVE_NATIVE_mpn_addmul_7,\n\tHAVE_NATIVE_mpn_addmul_8.\n\t(gmp_mpn_functions_optional): Add addmul_5 addmul_6 addmul_7 addmul_8.\n\n\t* tests/devel/try.c (ASSERT_CARRY): Remove, now in gmp-impl.h\n\t(try_one): Do dest setups after sources, for benefit of\n\tdst0_from_src1.\n\n2002-11-01  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/tdiv_qr.c: Avoid quadratic behaviour for\n\tsub-division when numerator is more than twice the size of the\n\tdenominator.  Simplify loop logic for the same case.  Clean up a\n\tfew comments.\n\n2002-10-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (*-cray-unicos*): Pass -hnofastmd again.\n\n2002-10-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/tadd.c: Disable test of denorms.\n\n2002-10-23  Linus Nordberg  <linus@swox.se>\n\n\t* gmp.texi (Introduction to GMP): Update section about mailing\n\tlists.\n\n2002-10-23  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in (__GMP_ATTRIBUTE_PURE): Suppress this when\n\t__GMP_NO_ATTRIBUTE_CONST_PURE is defined.\n\t* gmp-impl.h (ATTRIBUTE_CONST): Ditto.\n\t* tune/common.c: Use __GMP_NO_ATTRIBUTE_CONST_PURE.\n\n\t* tune/speed.h, tune/many.pl: Remove ATTRIBUTEs from prototypes.\n\t* tune/speed.h: Remove various \"dummy\" variables attempting to keep\n\t\"pure\" calls live, no longer necessary.  They weren't sufficient for\n\trecent MacOS cc anyway.\n\n2002-10-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/cray/ieee/addmul_1.c: Handle overlap as in mul_1.c.\n\t* mpn/cray/ieee/submul_1.c: Likewise.\n\n2002-10-19  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (sparcv9 etc -*-*bsd*): Add support for NetBSD and\n\tOpenBSD sparc64.  Reported by Christian Weisgerber.\n\t(AC_CHECK_HEADERS): Add sys/param.h for sys/sysctl.h on *BSD.\n\n\t* demos/calc/calc.y: Change ={ to {, needed for bison 1.50.\n\n\t* longlong.h (count_leading_zeros, count_trailing_zeros) [x86_64]:\n\tShould be UDItype.\n\n\t* mpz/set_str.c, mpf/set_str.c, mpbsd/xtom.c, scanf/sscanffuns.c: Cast\n\tchars through \"unsigned char\" to zero extend, required by C99 ctype.h.\n\n2002-10-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-root.c: Test also mpz_rootrem.\n\n\t* mpn/generic/rootrem.c: Avoid overflow problem when n is huge.\n\n\t* mpz/root.c: Avoid overflow problems in allocation computation; also\n\tsimplify it.  Misc cleanups.\n\n\t* mpz/rootrem.c: New file.\n\t* Makefile.am, mpz/Makefile.am, gmp-h.in: Add them.\n\n2002-10-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (popc_limb): Combine variants.\n\n2002-10-14  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (AC_CHECK_HEADERS): Add sys/time.h for sys/resource.h\n\ttest, needed by SunOS, and next autoconf will insist headers actually\n\tcompile.\n\n2002-10-08  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/time.c (speed_time_init): Allow for Cray times() apparently\n\tbeing a cycle counter.\n\n\t* dumbmp.c (mpz_get_str): Fix buf size allocation.\n\n\t* tests/trace.c, tests/tests.h (mp_limb_trace): New function.\n\n\t* tune/speed-ext.c (SPEED_EXTRA_PROTOS): Use __GMP_PROTO.\n\t* tests/devel/try.c (malloc_region): Add a cast for SunOS cc.\n\n\t* configure.in (AC_CHECK_FUNCS): Add strerror.\n\t(AC_CHECK_DECLS): Add sys_errlist, sys_nerr.\n\t* tune/time.c, tests/devel/try.c: Use them.\n\n2002-10-05  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (AC_CHECK_HEADERS): Test float.h, not in SunOS cc.\n\t* printf/repl-vsnprintf.c: Use this.\n\n\t* configure.in (*sparc*-*-*): Collect up various options for clarity,\n\tuse gcc -mcpu=supersparc and ultrasparc3, use cc -xchip, don't use\n\t-xtarget=native, use cc configs with acc, merge SunOS bundled cc and\n\tSunPRO cc configs.\n\n\t* gmp-impl.h (gmp_randfnptr_t): Use __GMP_PROTO.\n\t(MPZ_REALLOC): Cast _mpz_realloc return value to mp_ptr, for the\n\tbenefit of SunOS cc which requires pointers of the same type on the\n\ttwo legs of a ?:.\n\n\t* dumbmp.c (mpz_realloc): Add a cast to avoid a warning from SunOS cc.\n\n\t* acinclude.m4: Allow for i960 b.out default cc output.\n\n\t* gmp.texi (Random State Initialization): Add gmp_randinit_mt.\n\t(Perfect Square Algorithm): Describe new mpn_mod_34lsub1 use.\n\t(Factorial Algorithm): Describe Jason's new code.\n\t(Binomial Coefficients Algorithm): Ideas about improvements\n\tmoved to doc/projects.html.\n\t(Contributors): Add Jason Moxham and Pedro Gimeno.\n\n2002-10-03  Kevin Ryde  <kevin@swox.se>\n\n\t* gen-psqr.c: New file.\n\t* Makefile.am, mpn/Makefile.am: Use it to generate mpn/perfsqr.h.\n\t* mpn/generic/perfsqr.c: Use generated data, put mod 256 data into\n\tlimbs to save space, use mpn_mod_34lsub1 when good.\n\t* tests/mpn/t-perfsqr.c: New file.\n\t* tests/mpn/Makefile.am (check_PROGRAMS): Add it.\n\t* tests/mpz/t-perfsqr.c (check_modulo): New test.\n\t(check_sqrt): New function holding current tests.\n\n\t* configure.in (AC_INIT): Modernize to package name and version here\n\trather than AM_INIT_AUTOMAKE, add bug report email.\n\t(AC_CONFIG_SRCDIR): New macro.\n\n\t* gmp-impl.h (ROUND_UP_MULTIPLE): Fix for non-power-of-2 moduli (not\n\tnormal in current uses), clarify the comments a bit.\n\n2002-09-30  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/Makeasm.am (.s.lo): Add --tag=CC for the benefit of CCAS!=CC,\n\tsame as .S.lo and .asm.lo.\n\n\t* Makefile.am (gen-fac_ui, gen-fib, gen-bases): Quote source files in\n\ttest -f stuff to avoid Sun make rewriting them.\n\n2002-09-28  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/devel/try.c, tune/speed.c: Avoid strings longer than C99\n\tguarantees.\n\n\t* tests/refmpn.c, tests/tests.h (refmpn_zero_extend, refmpn_normalize,\n\trefmpn_sqrtrem): New functions.\n\t* tests/devel/try.c (TYPE_SQRTREM): Use refmpn_sqrtrem.\n\t(compare): Correction to tr->dst_size subscripting.\n\n\t* dumbmp.c: Add several new functions, allow for initial n<d in\n\tmpz_tdiv_q (now in mpz_tdiv_qr actually).\n\n\t* gen-bases.c (chars_per_limb): Get GMP_NUMB_BITS for base==2,\n\tsimilarly other powers of 2, which this was in the past.\n\t* tests/refmpn.c (refmpn_chars_per_limb): Ditto.\n\t* tests/mpn/t-mp_bases.c: Test chars_per_limb for power-of-2 bases too.\n\n\t* Makefile.am, mpz/Makefile.am: Setups for gen-fac_ui.c generating\n\tmpz/fac_ui.h.\n\n2002-09-28  Jason Moxham <J.L.Moxham@maths.soton.ac.uk>\n\n\t* dumbmp.c (mpz_pow_ui, mpz_addmul_ui, mpz_root): New functions.\n\t* gen-fac_ui.c: New file.\n\t* mpz/fac_ui.c: Rewrite.\n\n2002-09-26  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/cxx/localeconv.c: New file, split from t-locale.cc.\n\t* tests/cxx/t-locale.cc: Use it.\n\t* tests/cxx/Makefile.am (t_locale_SOURCES): Add it.\n\n\t* tests/cxx/Makefile.am: Updates for Gerardo's new test programs.\n\n2002-09-26  Gerardo Ballabio <gerardo.ballabio@unimib.it>\n\n\t* gmpxx.h (__gmp_cmp_function): Bug fixes in double/mpq and\n\tdouble/mpfr comparisons.\n\n\t* tests/cxx/t-assign.cc, tests/cxx/t-binary.cc, tests/cxx/t-constr.cc,\n\ttests/cxx/t-ternary.cc, tests/cxx/t-unary.cc: Revise and add various\n\ttests, including some for mpfr, some split from t-expr.cc.\n\t* tests/cxx/t-locale.cc: Modernize include files.\n\t* tests/cxx/t-ostream.cc: Modernize include files, use cout rather\n\tthan printf for diagnostics.\n\t* tests/cxx/t-misc.cc, tests/cxx/t-rand.cc: New file, split from\n\tt-allfuns.cc.\n\t* tests/cxx/t-ops.cc: New file, some split from t-allfuns.cc.\n\t* tests/cxx/t-prec.cc: New file.\n\t* tests/cxx/t-allfuns.cc, tests/cxx/t-expr.cc: Remove files.\n\n2002-09-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (*-cray-unicos*): Remove -hscalar0, it causes too much\n\tperformance loss.  Let's trust Cray to fix their compilers.\n\n2002-09-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/add_n.asm: Rewrite.\n\t* mpn/powerpc32/sub_n.asm: Rewrite.\n\n2002-09-24  Pedro Gimeno  <pggimeno@wanadoo.es>\n\n\t* randlc2x.c: Prepare for nails by changing type of _mp_c to mpz_t,\n\tmake _mp_seed fixed-size, disallow SIZ(a)==0 to optimize comparisons\n\tfor mpn_mul.\n\t* gmp-impl.h (MPZ_FAKE_UI): New macro.\n\n\t* randmt.c: Some constants made long for K&R compliance; remove UL at\n\tthe end of other constants; use mp_size_t where appropriate; use\n\tmpz_export to split the seed.\n\n\t* gmp-impl.h: Remove type cast in RNG_FNPTR and RNG_STATE, to allow\n\tthem to be used as lvalues.\n\t* randclr.c, randlc2x.c, randmt.c, randsd.c: All callers changed.\n\n\t* mpz/urandomm.c: Replace mpn_cmp with MPN_CMP.\n\n\t* tests/rand/gen.c: Get rid of gmp_errno.\n\n2002-09-24  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Custom Allocation): Keep allocate_function etc out of the\n\tfunction index by using @deftypevr.\n\tMore index entries.\n\n2002-09-24  Gerardo Ballabio <gerardo.ballabio@unimib.it>\n\n\t* gmpxx.h (mpfr_class constructors from strings): Precision was set\n\tincorrectly, fixed.\n\n2002-09-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/urandomb.c: Don't crash for overlarge nbits argument.\n\tLet nbits==0 mean to fill number with random bits.\n\n2002-09-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/mod_34lsub1.asm: Add r31 dummy operand to `br' instruction.\n\n2002-09-20  Gerardo Ballabio <gerardo.ballabio@unimib.it>\n\n\t* gmpxx.h (__gmp_binary_equal, __gmp_binary_not_equal): Fix broken\n\tmpq/double functions.\n\n2002-09-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* randmt.c (randget_mt): Fix typo.\n\n2002-09-18  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (_gmp_rand): Avoid evaluating \"state\" more than once, for\n\tthe benefit places calling it with RANDS.\n\n\t* randmt.c (randseed_mt): Use mpz_init for mod and seed1, for safety.\n\n\t* tune/tuneup.c (sqr_karatsuba_threshold): Initialize to\n\tTUNE_SQR_KARATSUBA_MAX so mpn_sqr_n works for randmt initialization.\n\n\t* gmp.texi (Integer Comparisons): Remove mention of non-existant\n\tmpz_cmpabs_si, reported by Conrad Curry.\n\n\t* tune/speed.c, tune/speed.h, tune/common.c: Add gmp_randseed,\n\tgmp_randseed_ui and mpz_urandomb.\n\n2002-09-18  Pedro Gimeno  <pggimeno@wanadoo.es>\n\n\t* tests/rand/gen.c: Add mt, remove lc and bbs.\n\n\t* Makefile.am (libgmp_la_SOURCES): Add randmt.c, remove randlc.c and\n\trandraw.c.\n\n\t* randmt.c: New file.\n\t* gmp-h.in (gmp_randinit_mt): Add prototype.\n\t* randdef.c: Use gmp_randinit_mt.\n\n\t* gmp-impl.h (RNG_FNPTR, RNG_STATE): New macros.\n\t(gmp_randfnptr_t): New structure.\n\t(_gmp_rand): Now a macro not a function.\n\t* gmp-h.in (__gmp_randata_lc): Remove, now internal to randlc2x.c.\n\t(__gmp_randstate_struct): Revise comments on field usage.\n\t* randsd.c, randclr.c: Use function pointer scheme.\n\t* randsdui.c: Use gmp_randseed.\n\t* randraw.c: Remove file.\n\t* randlc2x.c: Collect up lc_2exp related code from randsd.c, randclr.c\n\tand randraw.c, use function pointer scheme, integrate seed==0/a==0\n\tinto main case and fix case where bits(a) < m2exp.\n\n\t* randlc.c: Remove file, never documented and never worked.\n\t* gmp-h.in (gmp_randinit_lc): Remove prototype.\n\n2002-09-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/mod_34lsub1.asm: New file.\n\n2002-09-16  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in, acinclude.m4 (GMP_C_RESTRICT): Remove this, not\n\tcurrently used, and #define restrict upsets Microsoft C headers on\n\twin64.  Reported by David Librik.\n\n\t* configure.in (x86): Add gcc 3.2 -march and -mcpu flags, remove some\n\tunnecessary -march=i486 fallbacks.\n\n\t* gmp.texi (Notes for Particular Systems): Note cl /MD is required for\n\tMicrosoft C and MINGW to cooperate on I/O.  Explained by David Librik.\n\t(Language Bindings): Add linbox.\n\t* gmp.texi (Language Bindings):\n\n2002-09-12  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/aorsmul_i.c: Allow for w==x overlap with nails.  Test\n\tBITS_PER_ULONG > GMP_NUMB_BITS rather than GMP_NAIL_BITS != 0.\n\t* tests/mpz/t-aorsmul.c: Test this.\n\n\t* tune/common.c: mpn_mod_34lsub1 only exists for GMP_NUMB_BITS%4==0\n\t* tune/speed.c: Add mpn_mod_34lsub1.\n\n2002-09-10  Pedro Gimeno  <pggimeno@wanadoo.es>\n\n\t* rand.c: Remove old disabled BBS code.\n\t* mpf/urandomb.c: Use BITS_TO_LIMBS.\n\n2002-09-10  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Multiplication Algorithms): FFT is now enabled by default.\n\n2002-09-10  Pedro Gimeno  <pggimeno@wanadoo.es>\n\n\t* mpz/urandomm.c: Use mpn level functions, avoid an infinite loop if\n\t_gmp_rand forever returns all \"1\" bits.\n\t* tests/rand/t-urndmm.c: New file\n\t* tests/rand/Makefile.am (check_PROGRAMS): Add it.\n\n\t* gmp-impl.h (BITS_TO_LIMBS): New macro.\n\t* mpz/urandomb.c: Use it, and use MPZ_REALLOC.\n\n2002-09-08  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_GCC_WA_MCPU): New macro.\n\t* configure.in (alpha*-*-*): Use it to avoid -Wa,-mev67 if gas isn't\n\tnew enough to know ev67.  Reported by David Bremner.\n\n2002-07-30  Gerardo Ballabio <gerardo.ballabio@unimib.it>\n\n\t* gmpxx.h (__gmpz_value etc): Remove, use mpz_t etc instead.\n\t(__gmp_expr): Reorganise specializations, use __gmp_expr<T,T> not\n\tmpz_class etc.\n\t(mpfr evals): Remove mode parameter, was always\n\t__gmp_default_rounding_mode anyway.\n\n2002-09-07  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in, mp-h.in: Use #ifdef for tests, for the benefit of\n\tapplications using gcc -Wundef.\n\n\t* longlong.h: Define COUNT_LEADING_ZEROS_NEED_CLZ_TAB for all alphas,\n\tsince mpn/alpha/cntlz.asm always goes into libgmp.so, even for ev67\n\tand ev68 which don't need it.  Reported by David Bremner.\n\n\t* gmp.texi (Demonstration Programs): New section, expanding on what\n\twas under \"Build Options\".\n\t(Converting Floats): Don't need \\ for _ in @var within @math.\n\tAdd and amend various index entries.\n\n\t* demos/qcn.c: Add -p prime limit option.\n\n2002-08-30  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/pprime_p.c: Handle small negatives with isprime, in particular\n\tmust do so for n==-2.\n\t* tests/mpz/t-pprime_p.c: New file.\n\t* tests/mpz/Makefile.am: Add it.\n\n2002-08-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp.texi (Converting Floats): Fix typo in mpf_get_d_2exp docs,\n\treported by Paul Zimmermann.\n\n2002-08-26  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in: Echo the ABI being tried for the compilers.\n\t(powerpc*-*-*): Use powerpc64/aix.m4 for ABI=aix64 too.\n\t(AC_CHECK_FUNCS): Add strtol, for tests/rand/gen.c.\n\n2002-08-24  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (HAVE_HOST_CPU_, HAVE_HOST_CPU_FAMILY_, HAVE_NATIVE_):\n\tSetup templates for these using AH_VERBATIM rather than acconfig.h,\n\tpreferred by latest autoconf.  Prune lists to just things used.\n\t* acconfig.h: Remove file.\n\n\t* mpn/powerpc32/mode1o.asm: Forgot ASM_START.\n\n\t* tune/time.c (have_cgt_id): Renamed from HAVE_CGT_ID so avoid\n\tconfusion with autoconf outputs, and turn it into a \"const\" variable.\n\n2002-08-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Choose powerpc32/aix.m4 or powerpc64/aix.m4 based on\n\tABI, not configuration triple.\n\n\t* mpz/pprime_p.c: Partially undo last change--handle small and\n\tnegative numbers in the same test.\n\n2002-08-22  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD): Note\n\tmpn/generic/mul_fft.c is not nails-capable, and don't bother setting\n\tother FFT data for nails.\n\n\t* configfsf.guess: Update to 2002-08-19.\n\t* configfsf.sub: Update to 2002-08-20.\n\n\t* config.guess (powerpc*-*-*): Use a { } construct to suppress SIGILL\n\tmessage on AIX.\n\n2002-08-20  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Build Options): Add ia64 under cpu types.\n\t(ABI and ISA): Describe IRIX 6 ABI=o32.\n\t(Notes for Particular Systems): Remove -march=pentiumpro, now ok.\n\t(Known Build Problems): Binutils 2.12 is ok for libgmp.a.\n\t(Emacs): New section.\n\t(Language Bindings): Update MLton URL, reported by Stephen Weeks.\n\t(Prime Testing Algorithm): New section.\n\tDon't put a blank line after @item in @table since it can make a page\n\tbreak between the heading and the entry.\n\tMisc tweaks elsewhere, in particular more index entries.\n\n\t* mpz/millerrabin.c: Need x to be size+1 for change to urandomm.\n\n\t* gmp-impl.h: Comments on the use of __GMP_DECLSPEC.\n\n\t* tune/time.c (freq_measure_mftb_one): Use struct_timeval, for the\n\tbenefit of mingw.\n\n\t* tests/refmpn.c, tests/tests.h (ref_addc_limb, ref_subc_limb):\n\tRenamed from add and sub, following gmp-impl.h ADDC_LIMB and SUBC_LIMB.\n\n2002-08-17  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/powerpc32/mode1o.asm: New file.\n\t* configure.in, acinclude.m4 (GMP_ASM_POWERPC_PIC_ALWAYS): New macro.\n\t* mpn/asm-defs.m4: Use it to help setting up PIC.\n\n\t* configure.in (AC_PREREQ): Bump to 2.53.\n\n\t* mpn/powerpc32/powerpc-defs.m4\t(ASSERT): New macro.\n\t(PROLOGUE_cpu): New macro, giving ALIGN(4) not 8.\n\n2002-08-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/m68k/lshift.asm: Fix typo in !scale_available_p code.\n\t* mpn/m68k/rshift.asm: Likewise.\n\n2002-08-16  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (--enable-profiling=instrument): New option.\n\t* gmp.texi (Profiling): Describe it.\n\t* mpn/x86/x86-defs.m4 (PROLOGUE_cpu, call_instrument, ret_internal):\n\tAdd support.\n\t(call_mcount): Share PIC setups with call_instrument.\n\t* mpn/x86/*.asm: Use ret_internal.\n\t* mpn/asm-defs.m4 (m4_unquote): New macro.\n\t* tests/mpn/t-instrument.c: New file.\n\t* tests/mpn/Makefile.am: Add it.\n\n\t* mpn/alpha/umul.asm: Add ASM_END.\n\n2002-08-12  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/pprime_p.c: Fake up a local mpz_t to take abs(n), rather than\n\tusing mpz_init etc.\n\n\t* mpz/millerrabin.c: Use mpz_urandomm for uniform selection of x,\n\treported by Jason Moxham.  Exclude x==n-1, ie. -1 mod n.  Use\n\tgmp_randinit_default.\n\n\t* mpn/alpha/umul.asm: Use \"r\" registers, for the benefit of Unicos.\n\n\t* tests/devel/try.c: Add mpn_copyi and mpn_copyd.\n\n2002-08-09  Kevin Ryde  <kevin@swox.se>\n\n\t* Makefile.am: Remove configure.lineno from DISTCLEANFILES and gmp.tmp\n\tfrom MOSTLYCLEANFILES, automake does these itself now.\n\n\t* */Makefile.in, aclocal.m4, configure, install-sh, missing,\n\tmkinstalldirs: Update to automake 1.6.3.\n\n\t* mpn/ia64/README: Some notes on assembler syntax.\n\n\t* mpn/ia64/add_n.asm, mpn/ia64/sub_n.asm: Add .body.\n\t* mpn/ia64/add_n.asm, mpn/ia64/addmul_1.asm, mpn/ia64/mul_1.asm,\n\tmpn/ia64/sub_n.asm: Position .save ar.lc just before relevant\n\tinstruction.\n\t* mpn/ia64/addmul_1.asm, mpn/ia64/mul_1.asm: Add .save ar.pfs and pr.\n\t* mpn/ia64/copyd.asm, mpn/ia64/copyi.asm: Correction to .body position.\n\t* mpn/ia64/lorrshift.asm: Add .prologue stuff.\n\n\t* configure.in (*-*-unicos*): Remove forcible --disable-shared,\n\tlibtool gets this right itself now.\n\n2002-08-07  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium/mmx/hamdist.asm: New file, adapted from\n\tmpn/x86/pentium/mmx/popham.asm.\n\t* mpn/x86/pentium/mmx/popham.asm: Remove file, not faster than plain\n\tmpn/x86/pentium/popcount.asm for the popcount.\n\n\t* mpn/alpha/umul.asm: Use PROLOGUE/EPILOGUE, rename it mpn_umul_ppmm.\n\t* configure.in (alpha*-*-*): Add umul to extra_functions.\n\n\t* mpz/remove.c: Make src==0 return 0, not do DIVIDE_BY_ZERO.\n\n2002-08-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* acconfig.h: Remove spurious undefs for mpn_divrem_newton and\n\tmpn_divrem_classic.\n\n2002-08-05  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/refmpn.c, tests/tests.h, tests/misc/t-printf.c,\n\ttests/mpf/t-trunc.c, tests/mpn/t-mp_bases.c, tests/mpn/t-scan.c,\n\ttests/mpq/t-cmp_ui.c, tests/mpz/bit.c, tests/mpz/t-aorsmul.c,\n\ttests/mpz/t-powm_ui.c tests/mpz/t-root.c, tests/mpz/t-scan.c: More\n\tcare with long and mp_size_t parameters, for the benefit of K&R.\n\n\t* demos/perl/GMP.pm, demos/perl/GMP.xs, demos/perl/GMP/Mpz.pm,\n\tdemos/perl/test.pl: Add mpz_import and mpz_export.\n\t* demos/perl/GMP.pm: Remove \"preliminary\" warning.\n\n\t* mpn/lisp/gmpasm-mode.el: Set add-log-current-defun-header-regexp to\n\tpick up m4 defines etc.\n\n\t* Makefile.am (libgmpxx_la_DEPENDENCIES): libgmp.la should be here,\n\tnot libgmpxx_la_LIBADD, for the benefit of \"make -j2\".\n\n\t* mpn/ia64/*.asm [hpux ABI=32]: Extend 32-bit operands to 64-bits, not\n\toptimal and might not be sufficient, but seems to work.\n\n2002-08-03  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Profiling): Use a table and expand for clarity.\n\t(Integer Special Functions): New section for mpz_array_init,\n\t_mpz_realloc, mpz_getlimbn and mpz_size, to discourage their use.\n\n\t* configure.in (*-*-msdosdjgpp*): Remove forcible --disable-shared,\n\tlibtool gets this right itself now.\n\n2002-07-30  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/powerpc32/lshift.asm, mpn/powerpc32/rshift.asm: Lose final mr,\n\tand make final stwu into an stw.\n\n\t* gmp.texi (Known Build Problems): An easier workaround for DESTDIR,\n\tusing LD_LIBRARY_PATH.\n\t(C++ Interface MPFR): Remove mpfrxx.h.\n\n\t* mpfrxx.h: Remove file.\n\t* Makefile.am: Remove mpfrxx.h.\n\t* tests/cxx/Makefile.am: Add Gerardo's new test programs.\n\n2002-07-30  Gerardo Ballabio <gerardo.ballabio@unimib.it>\n\n\t* gmpxx.h: Use mpz_addmul etc for ternary a+b*c etc.  Reorganise some\n\tmacros for maintainability.  Merge mpfrxx.h.\n\t* tests/cxx/t-constr.cc, tests/cxx/t-expr.cc: Various updates.\n\t* tests/cxx/t-assign.cc, tests/cxx/t-binary.cc,\n\ttests/cxx/t-ternary.cc, tests/cxx/t-unary.cc: New files.\n\n2002-07-27  Kevin Ryde  <kevin@swox.se>\n\n\t* longlong.h (count_trailing_zeros) [ia64 __GNUC__]: Don't use\n\t__builtin_ffs for now, doesn't seem to work.\n\n\t* configure.in: Establish CONFIG_SHELL to avoid a problem with\n\tAC_LIBTOOL_SYS_MAX_CMD_LEN on ia64-*-hpux*.\n\n\t* tune/speed.h (SPEED_ROUTINE_MPN_GCD_FINDA): Don't let calls to\n\tmpn_gcd_finda go dead.\n\n\t* mpn/generic/tdiv_qr.c: Inline mpn_rshift and MPN_COPY of 2 limbs.\n\n2002-07-24  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/primes.c: Use __GMP_PROTO and don't use signed, for the\n\tbenefit of K&R.\n\n\t* demos/calc/calclex.l: Add <string.h> for strcmp.\n\n\t* mpn/ia64/invert_limb.asm: Use .rodata which works on ia64-*-hpux*\n\tand should be standard, rather than worrying about RODATA.\n\n\t* gmp.texi (Function Classes): Add cross references.\n\t(Integer Import and Export): Fix return value grouping.\n\n\t* mpn/lisp/gmpasm-mode.el (gmpasm-comment-start-regexp): Add // for\n\tia64.  Add notes on what the various styles are for.\n\n\t* mpn/ia64/default.m4 (ASM_START): Define to empty, not dnl, so as not\n\tto kill text on the same line.\n\t(EPILOGUE_cpu): Force a newline after \"#\", so as not to suppress macro\n\texpansion in the rest of the EPILOGUE line.\n\n2002-07-21  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/speed.h: Fix some missing _PROTOs.\n\n\t* Makefile.am (DISTCLEANFILES): Add configure.lineno.\n\n\t* acinclude.m4 (GMP_C_DOUBLE_FORMAT): Define\n\tHAVE_DOUBLE_IEEE_BIG_ENDIAN and HAVE_DOUBLE_IEEE_LITTLE_ENDIAN in\n\tconfig.m4 too.\n\t* mpn/ia64/invert_limb.asm: Add big-endian data.\n\n\t* tests/mpz/t-jac.c (try_si_zi): Correction to \"a\" parameter type.\n\n2002-07-20  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/bin_ui.c, mpz/jacobi.c, mpz/pprime_p.c, mpn/generic/divis.c:\n\tMore care with long and mp_size_t parameters, for the benefit of K&R.\n\n\t* gmp-impl.h (invert_limb): Use parens around macro arguments.\n\t(mpn_invert_limb): Give prototype and define unconditionally.\n\n\t* gmp-impl.h (CACHED_ABOVE_THRESHOLD, CACHED_BELOW_THRESHOLD): New\n\tmacros.\n\t* mpn/generic/sb_divrem_mn.c: Use them to help gcc let preinv code go\n\tdead when not wanted.\n\n2002-07-17  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/refmpz.c (refmpz_hamdist): Ensure mp_size_t parameters are\n\tthat type, for the benefit of hpux ia64 bundled cc ABI=64.\n\n\t* configure.in (ia64*-*-hpux*): Need +DD64 in cc_64_cppflags to get\n\tthe right headers for ansi2knr.\n\n\t* acinclude.m4 (GMP_TRY_ASSEMBLE, GMP_ASM_UNDERSCORE): Use $CPPFLAGS\n\twith $CCAS and when linking, as done by the makefiles.\n\t(GMP_ASM_X86_MMX, GMP_ASM_X86_SSE2): Show $CPPFLAGS in diagnostics.\n\n\t* gmp-impl.h (ieee_double_extract): Setup using HAVE_DOUBLE_IEEE_*.\n\t(GMP_UINT32): New define, 32 bit type for ieee_double_extract.\n\t* configure.in: Add AC_CHECK_SIZEOF unsigned.\n\t* configure.in, acinclude.m4 (GMP_IMPL_H_IEEE_FLOATS): Remove.\n\t(GMP_C_DOUBLE_FORMAT): Instead warn about unknown float here.\n\n\t* configure.in, acinclude.m4 (GMP_C_SIZES): Remove.\n\t* acinclude.m4 (GMP_INCLUDE_GMP_H_BITS_PER_MP_LIMB): Remove this\n\tscheme, not required.\n\t* configure.in (unsigned long, mp_limb_t): Run AC_CHECK_SIZEOF for\n\tthese unconditionally, check mp_limb_t against gmp-mparam.h values.\n\t* gmp-impl.h (BYTES_PER_MP_LIMB, BITS_PER_MP_LIMB): Define based on\n\tSIZEOF_MP_LIMB_T if not provided by gmp-mparam.h.\n\t(BITS_PER_ULONG): Define here now.\n\n\t* gmp.texi (ABI and ISA): Add HP-UX IA-64 choices.\n\t(Random State Initialization): Typo in m2exp described for\n\tgmp_randinit_lc_2exp_size.\n\t(Formatted Output Functions): Clarify gmp_obstack_printf a bit.\n\t(Formatted Input Strings): Typo in %n summary.\n\n\t* mpz/inp_raw.c (NTOH_LIMB_FETCH): Use simple generic default, since\n\tendianness detection is now cross-compile friendly.\n\t* mpz/out_raw.c (HTON_LIMB_STORE): Ditto.\n\n\t* mpz/fib_ui.c: Nailify.\n\t* mpz/random.c: Nailify.\n\n\t* mpfr/acinclude.m4 (MPFR_CONFIGS): Patch by Vincent for an apparent\n\tfloat rounding gremlin on powerpc.\n\n2002-07-15  Kevin Ryde  <kevin@swox.se>\n\n\t* Makefile.am (PRINTF_OBJECTS): Avoid ending in a backslash, hpux ia64\n\tmake doesn't like that.\n\n\t* configure.in (ia64*-*-*): Use ABI=64 on non-HPUX systems, for\n\tconsistency.\n\n\t* gmp-impl.h (ieee_double_extract): Test __sparc__, used by gcc 3.1.\n\tReported by nix@esperi.demon.co.uk.\n\t* mpfr/mpfr-math.h (_MPFR_NAN_BYTES etc): Ditto.\n\n2002-07-13  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/powerpc32/rshift.asm: Rewrite, transformed from lshift.asm.\n\n\t* tune/tuneup.c (DIVEXACT_1_THRESHOLD, MODEXACT_1_ODD_THRESHOLD):\n\tAlways zero for native mpn_divexact_1, mpn_modexact_1_odd.\n\n\t* gmp-h.in (__GMP_EXTERN_INLINE): Don't use this during configure,\n\tie. __GMP_WITHIN_CONFIGURE, to avoid needing dependent routines.\n\t* acinclude.m4 (GMP_H_EXTERN_INLINE): Consequent changes.\n\n\t* gmp-impl.h, mpn/asm-defs.m4 (mpn_addmul_2, mpn_addmul_3,\n\tmpn_addmul_4): Add prototypes and defines.\n\n\t* gmp.texi (Number Theoretic Functions): Clarify return value.\n\tReported by Peter Keller.\n\n2002-07-10  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in, acinclude.m4 (GMP_PROG_LEX): Remove this in favour of\n\tAM_PROG_LEX, now ok when lex is missing.\n\n\t* longlong.h (count_leading_zeros) [pentiummmx]: Don't use __clz_tab\n\tvariant under LONGLONG_STANDALONE.\n\t(count_trailing_zeros) [ia64 __GNUC__]: Use __builtin_ffs.\n\n\t* gmp-impl.h (popc_limb): Add an ia64 asm version.\n\t(DItype): Use HAVE_LONG_LONG to choose long long, avoiding _LONGLONG\n\twhich is in gcc but means something unrelated in MS Visual C 7.0.\n\tReported by David Librik.\n\n\t* mpz/divexact.c: Add an ASSERT that den divides num.\n\n\t* mpn/asm-defs.m4 (LDEF): New macro.\n\t(INT32, INT64): Use it.\n\t* mpn/pa32/*.asm: Use it.\n\t* mpn/pa32/README: Update notes on labels.\n\n\t* tests/refmpn.c, tests/tests.h, tests/t-bswap.c (ref_bswap_limb):\n\tRenamed from refmpn_bswap_limb.\n\t* tests/t-bswap.c: Add tests_start/tests_end for randomization.\n\n\t* tests/refmpn.c, tests/tests.h (ref_popc_limb): New function.\n\t* tests/t-popc.c: New file.\n\t* tests/Makefile.am: Add it.\n\n\t* mpn/ia64/invert_limb.asm: Use RODATA since \".section .rodata\" is not\n\taccepted by ia64-*-hpux*.\n\n\t* acinclude.m4 (GMP_ASM_BYTE): New macro.\n\t(GMP_ASM_ALIGN_LOG, GMP_ASM_W32): Use it.\n\t(GMP_ASM_LABEL_SUFFIX): Use test compiles, not $host.\n\t(GMP_ASM_GLOBL): Ditto, and add .global for ia64-*-hpux*.\n\t(GMP_ASM_GLOBL_ATTR): Use GMP_ASM_GLOBL result, not $host.\n\t(GMP_ASM_LSYM_PREFIX): Allow any \"a-z\" nm symbol code, add \".text\" to\n\ttest program, required by ia64-*-hpux*.\n\t(GMP_ASM_LABEL_SUFFIX): Make LABEL_SUFFIX just the value, not a \"$1:\",\n\tthe former being how it's currently being used in fact.\n\n\t* configure.in, acinclude.m4 (GMP_PROG_CC_WORKS_LONGLONG): New macro.\n\t* configure.in (ia64-*-hpux*): Add 32 and 64 bit ABI modes.\n\n2002-07-06  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/cxx/t-allfuns.cc: New file.\n\t* tests/cxx/Makefile.am: Add it.\n\n\t* mpz/clrbit.c, mpz/setbit.c: Only MPN_NORMALIZE if high limb changes\n\tto zero.  Use _mpz_realloc return value.\n\n\t* gmp.texi (Build Options, C++ Formatted Output, C++ Formatted Input):\n\tCross reference to Headers and Libraries for libgmpxx stuff.\n\t(Low-level Functions): mpn_divexact_by3 result based on GMP_NUMB_BITS.\n\tmpn_set_str takes \"unsigned char *\", reported by Mark Sofroniou.\n\t(C++ Interface General): Describe linking with libgmpxx and libgmp.\n\n2002-07-01  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/tuneup.c, gmp-impl.h: Eliminate the array of thresholds in\n\tone(), tune just one at a time and let the callers hand dependencies.\n\tEliminate the second_start_min hack, handle SQR_KARATSUBA_THRESHOLD\n\toddities in tune_sqr() instead.\n\n\t* mpn/pa64/umul.asm, mpn/pa64/udiv.asm, mpn/asm-defs.m4, acconfig.h,\n\tlonglong.h, tune/speed.c, tune/speed.h, tune/common.c, tune/many.pl,\n\ttests/devel/try.c: Introduce mpn_umul_ppmm_r and mpn_udiv_qrnnd_r\n\trather than having variant parameter order for mpn_umul_ppmm and\n\tmpn_udiv_qrnnd on pa64.\n\n\t* gmp-h.in (mpz_export): Remove a spurious parameter name.\n\t* gmp-impl.h (mpn_rootrem): Use __MPN.\n\n2002-06-29  Kevin Ryde  <kevin@swox.se>\n\n\t* longlong.h (udiv_qrnnd) [hppa32]: Remove mpn_udiv_qrnnd version, the\n\tgeneral mechanism for that suffices.\n\n\t* mpf/inp_str.c: Fix returned count of chars read, reported by Paul\n\tZimmermann.  Also fix a memory leak for invalid input.\n\t* tests/mpf/t-inp_str.c: New file.\n\t* tests/mpf/Makefile.am (check_PROGRAMS): Add it.\n\n\t* tests/devel/try.c (mpn_mod_34lsub1): Only exists for\n\tGMP_NUMB_BITS%4==0.\n\t(SIZE2_FIRST): Respect option_firstsize2 for \"fraction\" case.\n\n\t* mpn/generic/diveby3.c: Further nailifications.\n\t* gmp-impl.h (MODLIMB_INVERSE_3): Allow for GMP_NUMB_BITS odd.\n\t(GMP_NUMB_CEIL_MAX_DIV3, GMP_NUMB_CEIL_2MAX_DIV3): New constants.\n\t* tests/t-constants.c: Check them.\n\n\t* gmp-h.in (__GMP_CRAY_Pragma): New macro.\n\t(__GMPN_COPY_REST): Use it.\n\t* gmp-impl.h (CRAY_Pragma): Use it.\n\n2002-06-25  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/import.c, mpz/export.c: Cast data pointer through \"char *\" in\n\talignment tests, for the benefit of Cray vector systems.\n\n\t* configure.in (x86-*-*): Remove -march=pentiumpro check, seems ok\n\twith current code.\n\t* acinclude.m4 (GMP_GCC_MARCH_PENTIUMPRO, GMP_GCC_VERSION_GE): Remove\n\tmacros, no longer needed\n\n\t* acinclude.m4 (GMP_ASM_RODATA): Remove temporary files.\n\n\t* configure.in (GMP_ASM_GLOBL_ATTR): Reposition to avoid duplication\n\tthrough AC_REQUIRE.\n\n2002-06-23  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpz/t-fib_ui.c (check_fib_table): Check table values, not just\n\tthat they're non-zero.\n\n\t* acinclude.m4 (GMP_GCC_ARM_UMODSI): Match bad \"gcc --version\" output\n\texactly, rather than parsing it with GMP_GCC_VERSION_GE.\n\t(GMP_ASM_UNDERSCORE): Use GLOBL_ATTR.\n\n\t* mpn/pa32/udiv.asm, mpn/pa32/hppa1_1/udiv.asm, mpn/pa64/udiv.asm:\n\tRenamed from udiv_qrnnd.asm, for consistency with other udiv's.\n\t* mpn/pa64/umul.asm: Renamed from umul_ppmm.asm likewise.\n\t* configure.in (hppa*-*-*): Update extra_functions.\n\t(NAILS_SUPPORT): Remove umul_ppmm, udiv_qrnnd, udiv_fp, udiv_nfp from\n\tnails-neutral list, no longer needed.\n\n\t* gmp-h.in (__DECC): Add notes on testing this for ANSI-ness.\n\t(__GMP_EXTERN_INLINE): Add static __inline for DEC C.\n\t(mpz_mod_ui): Move up to main section, it's still documented.\n\n2002-06-22  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/jacobi.c, mpz/kronsz.c, mpz/kronuz.c, mpz/kronzs.c,\n\tmpz/kronzu.c: Allow for odd GMP_NUMB_BITS, tweak a few variable setups.\n\t* gmp-impl.h (JACOBI_STRIP_LOW_ZEROS): New macro.\n\n\t* mpn/generic/mod_34lsub1.c: Nailify.\n\t* tests/devel/try.c (CNST_34LSUB1): Nailify.\n\t* gmp-impl.h (ADDC_LIMB): New macro.\n\n\t* gmpxx.h (mpf_class::get_str): Make exponent mp_exp_t&, default\n\tbase=10 and ndigits=0.\n\t(mpz_class::set_str, mpq_class::set_str, mpf_class::set_str): Add\n\tversions accepting \"const char *\".\n\t* mpfrxx.h (mpfr_class::get_str, mpfr_class::set_str): Ditto, and\n\tuncommenting set_str and operator=.\n\t* gmp.texi (C++ Interface Integers, C++ Interface Rationals)\n\t(C++ Interface Floats): Update.\n\n\t* gmp-impl.h (modlimb_invert): Merge the <=64bits and general versions.\n\t(const, signed): Move to near top of file, fixes --enable-alloca=debug\n\ton K&R.\n\n\t* gen-fib.c: New file, derived from mainline in mpn/generic/fib2_ui.c.\n\t* dumbmp.c (mpz_init_set_ui): New function.\n\t* Makefile.am, mpn/Makefile.am: Generate fib_table.h, mpn/fib_table.c.\n\t* gmp-impl.h: Use fib_table.h, add __GMP_DECLSPEC to __gmp_fib_table\n\t(for the benefit of tests/mpz/t-fib_ui.c).\n\t* mpn/generic/fib2_ui.c: Remove __gmp_fib_table and generating code.\n\n\t* Makefile.am: Add mp.h to BUILT_SOURCES, distclean all BUILT_SOURCES,\n\tuse += more.\n\n\t* acinclude.m4 (GMP_ASM_M68K_INSTRUCTION, GMP_ASM_M68K_BRANCHES):\n\tDon't let \"unknown\" get into the cache variables.\n\t(GMP_ASM_TEXT): See what assembles, don't hard-code hpux and aix.\n\t(GMP_PROG_EXEEXT_FOR_BUILD): Add ,ff8 for RISC OS, per autoconf cvs.\n\t(GMP_PROG_CPP_FOR_BUILD): Restructure per AC_PROG_CPP, print correct\n\tresult if CPP_FOR_BUILD overrides the cache variable.\n\t(GMP_PROG_CC_FOR_BUILD_WORKS): New macro split from\n\tGMP_PROG_CC_FOR_BUILD.  Allow for \"conftest\" default compiler output.\n\t* configure.in, acinclude.m4 (GMP_PROG_HOST_CC): Reinstate this,\n\tseparating HOST_CC establishment from GMP_PROG_CC_FOR_BUILD.\n\n\t* configure.in (mpn_objs_in_libgmp): Move mpn/mp_bases.lo ...\n\t* Makefile.am (MPN_OBJECTS): ... to here, add $U, and arrange\n\tMPN_OBJECTS to be common between libgmp and libmp.\n\n2002-06-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul_n.c (TOOM3_MUL_REC, TOOM3_SQR_REC): Don't check if\n\tbasecase is to be invoked when *_TOOM3_THRESHOLD is more than 3 times\n\tthe corresponding *_THRESHOLD.\n\n2002-06-20  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/ia64/submul_1.c: Add missing TMP_DECL, TMP_MARK, TMP_FREE.\n\tReported by Paul Zimmermann.\n\n\t* configure.in, acinclude.m4 (AC_DEFINE): Make templates read \"Define\n\tto 1\", for clarity as per autoconf.\n\t* acinclude.m4 (GMP_OPTION_ALLOCA): Group WANT_TMP templates.\n\n2002-06-20  Gerardo Ballabio <gerardo.ballabio@unimib.it>\n\n\t* gmpxx.h, mpfrxx.h: Remove mpz_classref, let mpq_class::get_num and\n\tmpq_class::get_den return mpz_class& as per the documentation.\n\tReported by Roberto Bagnara.\n\n2002-06-18  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/rand/t-lc2exp.c: New file.\n\t* tests/rand/Makefile.am: Add it, and use tests/libtests.la.\n\n\t* randraw.c (lc): Pad seed==0 case with zero limbs, return same\n\t(m2exp+1)/2 bits as normal, right shift \"c\" result as normal.\n\n\t* configure.in: Don't bother with line numbers in some diagnostics.\n\t(*-*-mingw*): Use -mno-cygwin if it works, suggested by delta trinity.\n\n\t* tests/mpz/Makefile.am, tests/mpq/Makefile.am,\n\ttests/misc/Makefile.am, (CLEANFILES): Set to *.tmp for test program\n\ttemporaries, to get t-scanf.tmp and reduce future maintenance.\n\n2002-06-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/get_str.c (mpn_dc_get_str): Pass scratch memory area in\n\tnew `tmp' parameter.  Trim allocation needs by reusing input parameter.\n\n2002-06-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc32/v9/udiv.asm: New file.\n\n2002-06-15  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_GCC_VERSION_GE): Correction to recognising mingw\n\tgcc 3.1 version number.  Reported by Jim Fougeron.\n\n\t* configure.in (AC_PROVIDE_AC_LIBTOOL_WIN32_DLL): New define, to make\n\tAC_LIBTOOL_WIN32_DLL work with autoconf 2.53.\n\n\t* acinclude.m4 (GMP_C_SIZES): Establish BITS_PER_MP_LIMB as a value,\n\tnot an expression, for the benefit of the gen-bases invocation.\n\n\t* config.guess (CC_FOR_BUILD): Try c99, same as configfsf.guess.\n\n2002-06-15  Paul Zimmermann  <Paul.Zimmermann@loria.fr>\n\n\t* mpfr/set_q.c: Allow for 1 bit numerator or denominator.\n\n2002-06-14  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (AC_C_BIGENDIAN): Use new style action parameters.\n\n\t* randlc2x.c: Allow for a<0, allow for c>=2^m2exp.\n\t* randraw.c (lc): Allow for a==0.\n\n\t* mpn/sparc32/udiv.asm: Renamed from udiv_fp.asm.  Don't know if float\n\tis the best way for v7, but it's what configure has chosen since gmp 3.\n\t* configure.in (*sparc*-*-* ABI=32): extra_functions=\"udiv\" for all,\n\tin particular sparc32/v8/udiv.asm is faster (on ultrasparc2) than\n\tudiv_fp previously used for v9 chips.\n\n\t* gen-bases.c: New file, derived from mpn/mp_bases.c.\n\t* dumbmp.c: New file, mostly by Torbjorn, some by me.\n\t* configure.in, acinclude.m4 (GMP_PROG_CC_FOR_BUILD,\n\tGMP_PROG_CPP_FOR_BUILD, GMP_PROG_EXEEXT_FOR_BUILD,\n\tGMP_C_FOR_BUILD_ANSI, GMP_CHECK_LIBM_FOR_BUILD): New macros.\n\t(GMP_PROG_HOST_CC): Remove, superceded by GMP_PROG_CC_FOR_BUILD.\n\t* Makefile.am: Run gen-bases to create mp_bases.h and mpn/mp_bases.c.\n\t* gmp-impl.h: Use mp_bases.h.\n\t* mpn/mp_bases.c: Remove file.\n\t* mpn/Makefile.am: mp_bases.c now in nodist_libmpn_la_SOURCES.\n\n\t* tests/mpz/t-cmp_d.c (check_one_2exp): Use volatile to force to\n\tdouble, fixes gcc 3.1 with -O4.  Reported by Michael Lee.\n\t* configure.in (AC_C_VOLATILE): New macro.\n\n\t* tests/misc/t-scanf.c:\t(fromstring_gmp_fscanf): Add missing va_end.\n\tDon't mix varargs and fixed args functions, not good on x86_64.\n\tReported by Marcus Meissner.\n\n\t* Makefile.am (EXTRA_DIST): Remove mpfr/README, now in mpfr/Makefile.in\n\n\t* configure, config.in, INSTALL.autoconf: Update to autoconf 2.53.\n\t* */Makefile.in, install-sh, mdate-sh, missing, aclocal.m4, configure:\n\tUpdate to automake 1.6.1.\n\t* configfsf.guess, configfsf.sub: Update to 2002-05-29.\n\n2002-06-12  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_GCC_VERSION_GE): Recognise mingw gcc 3.1 version.\n\t(GMP_PROG_CC_WORKS): Allow for a_out.exe, as per autoconf.\n\t(GMP_GCC_NO_CPP_PRECOMP, GMP_ASM_UNDERSCORE): Ditto, plus a.exe.\n\n2002-06-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* randraw.c (lc): Remove broken ASSERT_ALWAYS.\n\n\t* mpn/x86: Update gmp-mparam.h files with current measures *_THRESHOLD\n\tvalues.\n\t* mpn/x86/p6/mmx/gmp-mparam.h: New file.\n\n2002-06-09  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/*/gmp-mparam.h (USE_PREINV_DIVREM_1): Add tuned settings.\n\n\t* acconfig.h (HAVE_NATIVE_mpn_preinv_divrem_1): New template.\n\n\t* tests/refmpn.c, tests/tests.h (refmpn_chars_per_limb,\n\trefmpn_big_base): New functions.\n\t* tests/mpn/t-mp_bases.c: Use them, and don't test big_base_inverted\n\tunless it's being used.\n\n\t* gmp.texi (Notes for Particular Systems): Using Microsoft C with DLLs.\n\t(Known Build Problems): Notes on MacOS and GCC.\n\t(Integer Logic and Bit Fiddling): Use ULONG_MAX for maximum ulong.\n\t(Low-level Functions): mpn_get_str accepts base==256.\n\t(Formatted Output Functions): Note output is not atomic.\n\t(Internals): Note mp_size_t for limb counts.\n\n\t* mp-h.in, gmp-h.in (mp_ptr, mp_srcptr, mp_size_t, mp_exp_t): Remove\n\tthese types from mp.h, not needed.\n\n\t* mpfr/tests/tadd.c, mpfr/tests/tmul.c (check): Apply a hack to the\n\tparameter order to make sparc gcc 2.95.2 happy.\n\n\t* doc/configuration: Notes on bootstrapping.\n\n2002-06-08  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/tests/tsqrt.c, mpfr/tests/tsqrt_ui.c: Suppress tests if sqrt is\n\tnot affected by mpfr_set_machine_rnd_mode.\n\n\t* mpfr/mul_2si.c: Workaround a mips gcc 2.95.3 bug under -O2 -mabi=n32.\n\n\t* configure.in (alphev56): Fix to use ev5 path.\n\n2002-06-06  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in: Use __gmp_const not const, in a number of places.\n\n\t* configure.in (sparc): Use ABI=32 instead of ABI=standard on v7 and\n\tv8, for consistency with v9 choices.\n\t(sparc64): Restrict GMP_ASM_SPARC_REGISTER to ABI=64.\n\t(x86): Move MMX $path munging to before printout.\n\t(CCAS): Move upward to support this.\n\n\t* gmp-impl.h (modlimb_invert): Merge macros for specific limb sizes,\n\tadd a version for arbitrary limb size, use GMP_NUMB_BITS.\n\t(modlimb_invert, MODLIMB_INVERSE_3): Fix comments to say GMP_NUMB_BITS.\n\n\t* gmp-h.in (__GMP_LIKELY, __GMP_UNLIKELY): New macros.\n\t(mpz_getlimbn, mpz_perfect_square_p, mpz_popcount): Use them, make the\n\tfetch or mpn call likely, unconditionally calculate the alternative so\n\tas to avoid an \"else\" clause.\n\t* gmp-impl.h (LIKELY, UNLIKELY): Aliases.\n\n\t* configure.in, mpfr/tests/Makefile.am: Add $LIBM to $LIBS for\n\tMPFR_CONFIGS so it detects fesetround, and let it go through to\n\t$MPFR_LIBS.\n\t* mpfr/rnd_mode.c: Use gmp-impl.h to get MPFR_HAVE_FESETROUND.\n\n\t* tests/mpz/t-sizeinbase.c: Disable fake bits test, such pointer\n\tsetups are bogus and have been seen failing on hppa.\n\n\t* tests/misc.c, tests/refmpz.c, tests.tests.h, tests/mpz/t-cong.c:\n\tRename mpz_flipbit to refmpz_combit and move from misc.c to refmpz.c.\n\n2002-06-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-powm_ui.c Print proper routine name in error message.\n\n2002-06-03  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/time.c, tune/freq.c, tune/speed.h: Add powerpc mftb support.\n\t(FREQ_MEASURE_ONE): Move to speed.h, fix tv_sec factor.\n\t(freq_measure): Use for mftb measuring too.\n\t* tune/powerpc.asm, tune/powerpc64.asm: New files.\n\t* configure.in, tune/Makefile.am: Add them.\n\n\t* gmp-impl.h (popc_limb): Add versions for Cray and fallback for\n\tarbitrary limb size.\n\n\t* mpn/sparc32/sparc-defs.m4: New file.\n\t* configure.in (sparc*-*-*): Use it.\n\t* acinclude.m4 (GMP_ASM_SPARC_REGISTER): New macro.\n\t* configure.in (sparc64): Use it.  Also, use -Wc,-m64 for linking.\n\t* mpn/sparc64/add_n.asm, mpn/sparc64/addmul_1.asm,\n\tmpn/sparc64/copyd.asm, mpn/sparc64/copyi.asm, mpn/sparc64/lshift.asm,\n\tmpn/sparc64/mul_1.asm, mpn/sparc64/rshift.asm,\n\tmpn/sparc64/sqr_diagonal.asm, mpn/sparc64/sub_n.asm,\n\tmpn/sparc64/submul_1.asm: Use REGISTER for .register.\n\n2002-06-01  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/powm_ui.c: Fix for result range in certain circumstances.\n\n\t* mpn/x86/k6/diveby3.asm: Speedup to 10 c/l, same as divexact_1.\n\tAnomaly pointed out by Alexander Kruppa.\n\n2002-05-31  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/export.c: Cast pointer via `unsigned long' when checking\n\talignment to avoid compiler warnings.\n\n2002-05-29  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (BSWAP_LIMB): Versions for m68k, powerpc, and arbitrary\n\tlimb size.\n\t* configure.in, acconfig.h (HAVE_HOST_CPU_FAMILY_m68k): New define.\n\n2002-05-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul_basecase.c: Improve MAX_LEFT handling, returning\n\twhen possible.  Add code for mpn_addmul_5 and mpn_addmul_6.\n\n2002-05-25  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/tuneup.c: Misc nailifications, and disable preinv thresholds\n\twith nails.\n\t* tune/speed.h: Use GMP_NUMB_HIGHBIT with mpn_sb_divrem_mn and\n\tmpn_divrem_2.\n\t* mpz/powm.c (redc): Nailify q.\n\n\t* tests/mpn/t-scan.c: Reduce the amount of testing, to go faster.\n\n2002-05-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* Version 4.1 released.\n\n\t* mpn/alpha/ev6/nails/gmp-mparam.h: New file.\n\n\t* tests/devel/add_n.c (refmpn_add_n): Nailify.\n\t* tests/devel/sub_n.c (refmpn_sub_n): Nailify.\n\t* tests/devel/addmul_1.c (refmpn_addmul_1): Nailify.\n\t* tests/devel/submul_1.c (refmpn_submul_1): Nailify.\n\n\t* mpn/alpha/ev6/nails/add_n.asm: New file.\n\t* mpn/alpha/ev6/nails/sub_n.asm: New file.\n\t* mpn/alpha/ev6/nails/mul_1.asm: New file.\n\t* mpn/alpha/ev6/nails/submul_1.asm: New file.\n\n2002-05-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev6/nails/addmul_1.asm: New file.\n\n\t* mpz/inp_str.c (mpz_inp_str_nowhite): Nailify.\n\n\t* mpn/generic/mul_basecase.c: Update pointers before conditional\n\tMAX_LEFT break statements.\n\n2002-05-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-gcd.c: Test mpz_gcd_ui.\n\n\t* mpz/lcm_ui.c: Nailify.\n\n\t* mpz/gcd_ui.c: Nailify.  Make it work as documented, allowing\n\tNULL to be passed for result parameter.  Fix gcd(0,0) case.\n\n\t* mpz/set_str.c: Nailify.\n\n\t* randlc2x.c (gmp_randinit_lc_2exp): Nailify.\n\n\tFrom Jakub Jelinek:\n\t* longlong.h (add_ssaaaa,sub_ddmmss) [64-bit sparc]:\n\tMake it actually work.\n\n2002-05-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/ui_div.c: Shut up compiler warning.\n\n\t* mpn/generic/mul_basecase.c: Use mpn_addmul_2, mpn_addmul_3, and\n\tmpn_addmul_4, as available.\n\n\t* mpn/alpha/ev6/nails/addmul_2.asm: Adjust NAILS_SUPPORT decls.\n\t* mpn/alpha/ev6/nails/addmul_3.asm: Likewise\n\t* mpn/alpha/ev6/nails/addmul_4.asm: Likewise.\n\n\t* configure.in (*-cray-unicos*): Back again to -hscalar0.\n\t(gmp_mpn_functions_optional): Add mul_3, mul_4, addmul_2, addmul_3,\n\tand addmul_4.\n\t* acconfig.h: Add #undefs for new optional mpn functions.\n\n2002-05-18  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Integer Import and Export): Mention Cray unfilled words.\n\n\t* mpz/set_d.c, mpq/set_d.c: Use LIMBS_PER_DOUBLE for the output of\n\t__gmp_extract_double.  Reported by Henrik Johansson.\n\n2002-05-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev6/nails/addmul_2.asm: New file.\n\t* mpn/alpha/ev6/nails/addmul_3.asm: New file.\n\t* mpn/alpha/ev6/nails/addmul_4.asm: New file.\n\n\t* mpn/generic/dump.c: Rewrite and nailify.\n\n2002-05-16  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/Makefile.am (EXTRA_DIST): Add BUGS file.\n\n2002-05-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (*-cray-unicos*): Remove -hscalar0, add -hnofastmd\n\tas workaround for compiler bug.\n\t(mips64*-*-*): Pass just -O1 to cc, to work around compiler bug.\n\n2002-05-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (*-cray-unicos*): Pass -hscalar0 to work around\n\tcompiler bug for mpz/import.c.\n\n2002-05-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/import.c: Cast pointer via `unsigned long' when checking\n\talignment to avoid compiler warnings.\n\n\t* mpn/generic/rootrem.c: Adjust allocation of qp temporary area.\n\n2002-05-09  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/import.c: Corrections to size store, special case tests, and\n\tgeneral case ACCUMULATE.\n\t* tests/mpz/t-import.c, tests/mpz/t-export.c: More test data.\n\n2002-05-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/rootrem.c: Use temp space for root, copy value in place\n\tbefore returning.\n\t* mpz/root.c: Don't allocate extra limb for root value.\n\t* mpz/perfpow.c: Undo last change.\n\n2002-05-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (powerpc BSWAP_LIMB_FETCH): Rename local variable to make\n\tit not clash with caller.\n\n\t* mpn/generic/rootrem.c: New file.\n\t* configure.in (gmp_mpn_functions): Add rootrem and pow_1.\n\t* mpn/Makefile.am (nodist_libdummy_la_SOURCES): Add rootrem.c and\n\tpow_1.c\n\t* gmp-impl.h (mpn_rootrem): Add declaration.\n\t* mpz/perfpow.c: Amend allocations for mpn_rootrem requirements.\n\t* mpz/root.c: Rewrite to use mpn_rootrem.\n\n2002-05-08  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (MUL_KARATSUBA_THRESHOLD etc): Remove forced nail values.\n\n\t* mpf/fits_u.h, mpf/fits_s.h, tests/mpf/t-fits.c: Ignore fraction\n\tpart, making the code match the documentation.\n\n\t* gmpxx.h (struct __gmp_binary_minus): Use mpz_ui_sub.\n\n2002-05-07  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/powerpc32/README: New file.\n\n\t* mpz/root.c: Use unsigned long with mpz_sub_ui not mp_limb_t.\n\n\t* tune/README: Misc updates including sparc32/v9 smoothness, low res\n\ttimebase, and mpn_add_n operand overlaps.\n\t* tune/many.pl: Add udiv.asm support.\n\n\t* gmp.texi (Build Options): A couple of --build better as --host.\n\t(Known Build Problems, Notes for Package Builds): Add DESTDIR problem.\n\t(Compatibility with older versions): Compatible with 4.x versions.\n\t(Converting Integers): Remove mpz_get_ui + mpz_tdiv_q_2exp decompose.\n\t(Integer Import and Export): New section.\n\t(Miscellaneous Integer Functions): Clarify mpz_sizeinbase returns 1\n\tfor operand of 0.\n\t(Language Bindings): Add GNU Pascal.\n\t(Low-level Functions): Add GMP_NUMB_MAX.\n\n\t* tests/mpz/t-import.c, tests/mpz/t-export.c, tests/mpz/t-get_d.c:\n\tNew tests.\n\t* tests/mpz/Makefile.am: Add them.\n\n\t* mpz/import.c, mpz/export.c: New files.\n\t* Makefile.am, mpz/Makefile.am, gmp-h.in: Add them.\n\n\t* gmp-h.in, gmp-impl.h (GMP_NUMB_MAX): Move to gmp.h.\n\t* gmp-impl.h (CNST_LIMB): Add cast to mp_limb_t to ensure unsigned.\n\t(CRAY_Pragma, MPN_REVERSE, MPN_BSWAP, MPN_BSWAP_REVERSE,\n\tASSERT_ALWAYS_LIMB, ASSERT_ALWAYS_MPN): New macros.\n\t(MPZ_CHECK_FORMAT): Use ASSERT_ALWAYS_MPN.\n\n2002-05-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/aors_ui.h: Nailify.\n\n\t* tests/mpz/t-addsub.c: New file.\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Add t-addsub.\n\n\t* mpz/ui_sub.c: New file.\n\t* mpz/Makefile.am (libmpz_la_SOURCES): Add ui_sub.c.\n\t* Makefile.am (MPZ_OBJECTS): Ditto.\n\t* gmp-h.in (mpz_ui_sub): Add declaration.\n\n\t* gmp-impl.h (MPZ_REALLOC): Rewrite to allow the use of _mpz_realloc\n\treturn value.\n\n\t* gmp-h.in (mpn_pow_1): Add declaration.\n\n\t* mpn/generic/pow_1.c: Handle exp <= 1.  Reverse rp/tp parity scheme\n\tfor bn == 1 arm.\n\n\t* Rename MP_LIMB_T_HIGHBIT => GMP_LIMB_HIGHBIT.\n\n2002-05-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (main): Don't call mpz_sizeinbase with negative base.\n\n\t* randraw.c (lc): Remove an unused variable.\n\n\t* mpn/generic/get_str.c: Clarify an algorithm description.\n\n\t* tests/mpf/t-trunc.c: Nailify.\n\t* tests/mpf/t-set_si.c: Disable for nails.\n\n\t* mpf/cmp_si.c: Nailify.\n\t* mpf/cmp_ui.c: Nailify.\n\t* mpf/div.c: Nailify.\n\t* mpf/div_2exp.c: Nailify.\n\t* mpf/div_ui.c: Nailify.\n\t* mpf/eq.c: Nailify.\n\t* mpf/get_d.c: Nailify.\n\t* mpf/get_d_2exp.c: Nailify.\n\t* mpf/get_si.c: Nailify.\n\t* mpf/get_str.c: Nailify.\n\t* mpf/get_ui.c: Nailify.\n\t* mpf/mul_2exp.c: Nailify.\n\t* mpf/random2.c: Nailify.\n\t* mpf/set_q.c: Nailify.\n\t* mpf/set_si.c: Nailify.\n\t* mpf/set_str.c: Nailify.\n\t* mpf/set_ui.c: Nailify.\n\t* mpf/sub.c: Nailify.\n\t* mpf/ui_div.c: Nailify.\n\t* mpf/ui_sub.c: Nailify.\n\t* mpf/urandomb.c: Nailify.\n\n\t* gmp-impl.h (__GMPF_BITS_TO_PREC, __GMPF_PREC_TO_BITS): Nailify.\n\n\t* mpz/get_si.c: Misc variable name changes.\n\n\t* mpf/fits_u.h: Rewrite - nailify.\n\t* mpf/fits_s.h: Likewise.\n\n\t* mpz/mod.c: Disambiguate if-statament with extra {}.\n\n\t* mpf/int_p.c: Fix type of size variables.\n\t* mpf/get_ui: Likewise.\n\t* mpf/get_si: Likewise.\n\t* mpq/equal.c: Likewise.\n\t* mpq/get_d.c: Likewise.\n\t* mpz/cmp_d.c: Likewise.\n\t* mpz/cmpabs_d.c: Likewise.\n\t* mpz/divis_2exp.c: Likewise.\n\t* mpz/kronuz.c: Likewise.\n\t* mpz/kronzu.c: Likewise.\n\t* mpz/kronzs.c: Likewise.\n\t* mpz/kronsz.c: Likewise.\n\t* mpz/scan0.c: Likewise.\n\t* mpz/scan1.c: Likewise.\n\t* mpz/tstbit.c: Likewise.\n\t* mpz/cong_2exp.c: Likewise.\n\t* mpz/divis.c: Likewise.\n\n2002-05-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/gcd.c: Additional nailify changes.\n\n2002-05-04  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in (__GNU_MP_VERSION): Set to 4.1.\n\t* Makefile.am (-version-info): Bump for new release.\n\n2002-04-30  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/divrem_1.c: Additional nailify changes.\n\t* mpn/generic/mod_1.c: Likewise.\n\n\t* tests/mpq/t-get_d.c: Print floats with all 16 digits.\n\n\t* mpq/get_d.c: Nailify.\n\n\t* tests/mpq/t-set_f.c: Disable for nails.\n\n\t* mpz/get_d.c: Nailify.\n\n\t* gmp-impl.h (LIMBS_PER_DOUBLE, MP_BASE_AS_DOUBLE): Nailify.\n\n\t* gmp-h.in (__GMPZ_FITS_UTYPE_P): Cast maxval to before shifting it.\n\n\t* extract-dbl.c: Nailify.\n\n2002-04-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpq/md_2exp.c (mord_2exp): Nailify.\n\n\t* mpq/cmp_ui.c: Nailify.\n\n\t* mpq/cmp.c (mpq_cmp): Nailify.\n\n\t* mpn/generic/gcd.c: Nailify.  GNUify code layout.\n\n\t* mpn/generic/gcdext.c: Nailify.  Misc changes.\n\n\t* tests/mpz/t-sqrtrem.c: Let argv[1] mean # of repetitions.\n\t* tests/mpz/t-gcd.c: Likewise.\n\n\t* mpz/gcd.c: Nailify.\n\n\t* mpn/generic/random.c: Nailify.\n\n\t* gmp-impl.h (modlimb_invert): Nailify.\n\n2002-04-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/gcdext.c (div2): Remove qh parameter.\n\t(mpn_gcdext): Streamline double-limb code.\n\tMove GCDEXT_THRESHOLD check to after initial division.\n\n2002-04-27  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (JACOBI_MOD_OR_MODEXACT_1_ODD): Allow for odd\n\tGMP_NUMB_BITS.\n\n\t* tune/time.c (sgi_works_p): Allow for 64-bit counter, and fix\n\tSGI_CYCLECNTR_SIZE handling.\n\n\t* demos/expr/exprfr.c: Add nan and inf constants.\n\t* demos/expr/t-expr.c: Exercise them.\n\n2002-04-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/cmp_ui.c: Fix overflow conditions for nails.\n\n\t* gmp-h.in (mpz_get_ui): Fix typo from last change.\n\n\t* mpz/n_pow_ui.c: Adjust allocation for nails.\n\t(GMP_NUMB_HALFMAX): Renamed from MP_LIMB_T_HALFMAX.\n\tFix umul_ppmm invocation for for nails.\n\n2002-04-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/gcdext.c: Simplify by using mpn_tdiv_qr instead of\n\tmpn_divmod.\n\n2002-04-24  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (*-*-cygwin*): Give a sensible default command line\n\tlimit, to avoid blowups reported by Jim Fougeron on windows 9x.\n\t(--enable-nails): Make the default 2, since mp_bases has data for that.\n\n\t* mpfr/mpfr-math.h (__mpfr_nan): Use a \"double\" for the bytes, to\n\tavoid a mis-conversion on alpha gcc 3.0.2.\n\t(_MPFR_INFP_BYTES, _MPFR_INFM_BYTES): Should be a zero mantissa.\n\n2002-04-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/dive_ui.c: Fix typo.\n\n\t* mpz/fits_s.h: Rewrite.\n\n\t* mpz/jacobi.c: Nailify.\n\t* mpz/kronuz.c: Additional nailify changes.\n\t* mpz/kronsz.c: Likewise.\n\n2002-04-23  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/expr/Makefile.am (LDADD): Add $(LIBM) for the benefit of mpfr.\n\n\t* mpz/divis_ui.c, mpz/cong_ui.c: Nailify.\n\t* mpn/generic/bdivmod.c, mpz/divexact.c, mpz/dive_ui.c: Nailify.\n\t* mpn/generic/sb_divrem_mn.c, mpn/generic/divrem.c,\n\tmpn/generic/divrem_2.c: Nailify ASSERTs.\n\t* mpn/x86/k6/mmx/logops_n.asm, mpn/x86/k6/mmx/com_n.asm: Nailify.\n\t* mpz/inp_raw.c, mpz/out_raw.c: Nailify.\n\t* mpz/kronzu.c, mpz/kronuz.c, mpz/kronzs.c, mpz/kronsz.c: Nailify.\n\t* mpn/generic/divis.c, mpz/cong.c, mpz/cong_2exp.c: Nailify.\n\t* gmp-impl.h (NEG_MOD): Nailify.\n\n\t* gmp-impl.h, mpn/mp_bases.c: Add back GMP_NUMB_BITS==30 bases data.\n\n\t* mpfr/get_d.c: Patch from Paul to avoid problem with constant folding\n\tin gcc on OSF.\n\n\t* mpn/lisp/gmpasm-mode.el: Remove mention of defunct LF macro.\n\n2002-04-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c: Handle \"binomial\" operator.\n\n\t* mpz/cmp_ui.c: Move assignments of `up' out of conditionals.\n\n\t* mpn/generic/gcdext.c: Fix fencepost error in STAT code.\n\n\t* gmp-impl.h (mpn_com_n): Nailify.\n\n\t* tests/mpz/t-cdiv_ui.c: New file.\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Add t-cdiv_ui.\n\t* mpz/cdiv_qr_ui.c: Nailify.\n\t* mpz/cdiv_q_ui.c: Nailify.\n\t* mpz/cdiv_r_ui.c: Nailify.\n\t* mpz/cdiv_ui.c: Nailify.\n\n\t* tests/misc/t-printf.c (CHECK_N): Add cast to allow `char' to be an\n\tunsigned type.\n\t* tests/misc/t-scanf.c: Likewise.\n\n\t* mpz/mul_i.h: Rework nails code to handle parameter overlap.\n\n\t* tests/mpz/t-set_f.c: Disable for nails.\n\n2002-04-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/set_si.c: Add cast to support LONG_LONG_LIMB.\n\t* mpz/iset_si.c: Likewise.\n\n\t* mpz/bin_ui.c: Nailify.\n\t* mpz/bin_uiui.c: Nailify.\n\n\t* mpz/cmpabs_ui.c: Nailify.\n\n\t* tests/mpz/t-aorsmul.c: Nailify.\n\t* mpz/aorsmul_i.c (mpz_addmul_ui, mpz_submul_ui): Nailify better.\n\n2002-04-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-fdiv_ui.c: Check mpz_fdiv_ui.\n\t* tests/mpz/t-tdiv_ui.c: Check mpz_tdiv_ui.\n\n\t* mpz/tdiv_ui.c: Rewrite nails code.\n\t* mpz/fdiv_ui.c: Nailify.\n\n\t* tests/mpz/t-tdiv_ui.c: Check returned remainders.\n\t* tests/mpz/t-fdiv_ui.c: Merge in recent t-tdiv_ui changes.\n\n\t* mpz/tdiv_q_ui.c: Remove spurious TMP_* calls.\n\n\t* mpz/fdiv_qr_ui.c: Nailify.\n\t* mpz/fdiv_q_ui.c: Nailify.\n\t* mpz/fdiv_r_ui.c: Nailify.\n\n\t* mpz/get_si.c: Misc nailify changes to shut up compiler warnings.\n\n\t* mpz/ui_pow_ui.c: Fix typo in last change.\n\n2002-04-20  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/misc/t-printf.c, tests/misc/t-scanf.c: Check all %n types.\n\n\t* mpn/x86/k7/mmx/divrem_1.asm, mpn/x86/p6/mmx/divrem_1.asm\n\t(mpn_preinv_divrem_1): New entrypoint.\n\t(mpn_divrem_1): Avoid a branch when testing high<divisor.\n\t* mpn/asm-defs.m4: Add define_mpn(preinv_divrem_1).\n\t* configure.in: Allow divrem_1.asm to provide mpn_preinv_divrem_1.\n\n\t* gmp-impl.h [nails]: Add #undefs of MUL_KARATSUBA_THRESHOLD etc, to\n\toverride CPU gmp-mparam.h.  Remove JACOBI_BASE_METHOD override since\n\tit's nails-neutral.\n\n\t* tests/mpn/t-mp_bases.c: New file.\n\t* tests/mpn/Makefile.am (check_PROGRAMS): Add it.\n\t* tests/t-constants.c: Move MP_BASES constants checks to it.\n\n\t* mpn/mp_bases.c: Fix big_base_inverted values for nails.\n\t* gmp-impl.h (MP_BASES_BIG_BASE_INVERTED_10,\n\tMP_BASES_NORMALIZATION_STEPS_10): Fix nails values.\n\t(MP_BASES_*): Remove GMP_NUMB_BITS == 30 data.\n\n\t* mpn/x86/pentium/com_n.asm, mpn/x86/pentium/logops_n.asm: Add\n\tNAILS_SUPPORT indicators.\n\n\t* configure.in: Grep for NAILS_SUPPORT in cpu-specific code, and look\n\tin \"nails\" subdirectories, print path used.\n\t* mpn/asm-defs.m4 (NAILS_SUPPORT): New macro.\n\n\t* mpfr/mpfr-test.h: Include config.h, for the benefit of test programs\n\tnot using gmp-impl.h.\n\n2002-04-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-scan.c: Nailify.\n\n\t* mpz/tdiv_qr_ui.c: Nailify.\n\t* mpz/tdiv_q_ui.c: Nailify.\n\t* mpz/tdiv_r_ui.c: Nailify.\n\t* mpz/tdiv_ui.c: Nailify.\n\n\t* mpz/cmp_ui.c: Nailify.\n\n\t* mpz/ui_pow_ui.c: Misc nailify changes to shut up compiler warnings.\n\n\t* mpz/scan0.c: Nailify.\n\t* mpz/scan1.c: Nailify.\n\n\t* tests/mpz/t-sizeinbase.c (mpz_fake_bits): Nailify.\n\n2002-04-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/aorsmul_i.c: Nailify.\n\n\t* mpz/cmp_si.c: Nailify (botched).\n\n\t* mpz/ui_pow_ui.c: Nailify.\n\n\t* gmp-h.in (__GMPZ_FITS_UTYPE_P): Nailify.\n\n\t* mpz/fits_s.h: Nailify.\n\n\t* tests/mpz/bit.c (check_tstbit): Nailify.\n\n\tFrom Paul Zimmermann:\n\t* mpn/generic/sqrtrem.c: Nailify.\n\n\t* mpz/n_pow_ui.c: Nailify.\n\n\t* mpz/cfdiv_r_2exp.c: Nailify.\n\n\t* randraw.c (lc): Undo: Let mpn_rshift put result in place to avoid\n\textra MPN_COPY.\n\n2002-04-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/clrbit.c: Add two GMP_NUMB_MASK masks after addition.\n\n\t* mpn/generic/random2.c (LOGBITS_PER_BLOCK): Decrease to 4.\n\n\t* gmp-impl.h (nail DIV_DC_THRESHOLD): Decrease to 50 to allow fast\n\tdivision.\n\n\t* mpn/generic/random2.c: Nailify.\n\n\t* mpz/fac_ui.c: Nailify.\n\n\t* mpz/mul_i.h: #if ... #endif code block to shut up gcc warnings.\n\n\t* mpn/generic/sqrtrem.c: Adopt to GNU coding standards.\n\t(mpn_dc_sqrtrem): New name for mpn_dq_sqrtrem.\n\tPartial nailification.\n\n\t* configure.in: As a temporary hack, clear extra_functions for nails\n\tbuilds.\n\n\t* gmp-h.in (mpz_get_ui): #if ... #endif else code block to shut up gcc\n\twarnings.\n\n2002-04-17  Kevin Ryde  <kevin@swox.se>\n\n\t* texinfo.tex: Update to 2002-03-26.08 per texinfo 4.2.\n\t* gmp.texi: Must have @top in @ifnottex (or @contents doesn't come out\n\tin one run).\n\n\t* mpn/generic/scan0.c, mpn/generic/scan1.c: Nailify.\n\n\t* tests/mpn/t-scan.c: New file.\n\t* tests/mpn/Makefile.am (check_PROGRAMS): Add it.\n\n\t* tests/refmpn.c, tests/tests.h (refmpn_tstbit): Use unsigned long for\n\tbit index.\n\t(refmpn_setbit, refmpn_clrbit, refmpn_scan0, refmpn_scan1): New\n\tfunctions.\n\n\t* mpfr/cmp_ui.c (mpfr_cmp_si_2exp): Fix b==0 i!=0 case.\n\n2002-04-17  Gerardo Ballabio <gerardo.ballabio@unimib.it>\n\n\t* gmpxx.h, mpfrxx.h: Remove mpfr_class bool combinations, remove\n\tmpfr_class::get_str2, use mp_rnd_t for rounding modes, use\n\t8*sizeof(double) for mpfr_t's holding doubles.\n\n2002-04-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/powm.c: Nailify.\n\t* mpz/powm_ui.c: Nailify.\n\n2002-04-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/hamdist.c: Nailify.\n\t* tests/misc.c (urandom): Nailify.\n\n\t* mpz/get_si.c: Nailify.\n\t* gmp-h.in (mpz_get_ui): Nailify.  Streamline (and probably upset\n\tmemory checkers).\n\n\t* gmp-impl.h (mp_bases[10] values): Add versions for GMP_NUMB_BITS\n\tbeing 28, 60, and 63.\n\t* mpn/mp_bases.c: Add tables for GMP_NUMB_BITS being 28, 60, and 63.\n\n\t* mpz/iset_si.c: Nailify.\n\t* mpz/iset_ui.c: Nailify\n\n\t* tests/mpz/convert.c (main): Print test number in error message.\n\n\t* mpn/generic/get_str.c (mpn_sb_get_str): Shift up `frac' into nails\n\tfield after bignum division.\n\n2002-04-16  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in, gmp-impl.h (GMP_NAIL_MASK): Move to gmp.h.\n\n\t* gmp.texi: Use @documentdescription and @copying, per texinfo 4.2.\n\t(Low-level Functions): Clarify mpn_gcd overlap requirements, rewrite\n\tmpn_set_str description, add nails section.\n\t(C++ Interface General): Remove bool from types that mix with classes.\n\t(Language Bindings): Add STklos, GNU Smalltalk, Regina.\n\t(Binary to Radix, Radix to Binary): Describe new code.\n\t(Assembler Cache Handling): More notes, mostly by Torbjorn.\n\n\t* macos/configure (%vars): Remove __GMP from substitutions, per change\n\tto main configure.\n\n\t* mpn/generic/dive_1.c: Nailify.\n\t* mpn/generic/mode1o.c: Nailify, remove bogus ASSERT in commented-out\n\talternate implementation.\n\t* gmp-impl.h (SUBC_LIMB): New macro.\n\n\t* tests/devel/try.c (validate_divexact_1): Correction to compare.\n\t(udiv_qrnnd): New testing.\n\t(SHIFT_LIMIT): Nailify.\n\t(-b): New option, remove spurious \"H\" from getopt string.\n\n\t* mpz/clrbit.c: Nailify.\n\t* tests/mpz/t-hamdist.c: Nailify.\n\t* gmp-impl.h (MPN_FIB2_SIZE): Nailify.\n\t(PP): Nailify conditionals.\n\t* tests/mpz/t-fib_ui.c (MPZ_FIB_SIZE_FLOAT): Nailify.\n\n\t* configure.in, acinclude.m4: Establish GMP_NAIL_BITS and\n\tGMP_LIMB_BITS for gmp-h.in configure tests.\n\n\t* mpfr/*, configure.in: Update to final mpfr 2.0.1.\n\t* mpfr/acinclude.m4 (MPFR_CONFIGS): Use $host, not uname stuff.\n\t* mpfr/tests/tout_str.c: Patch from Paul for denorm fprintf tests.\n\n2002-04-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/divrem_1.c (EXTRACT): Remove.\n\n\t* tests/mpz/t-tdiv_ui.c (dump_abort): Accept argument for error string.\n\n\t* mpz/rrandomb.c: Nailify.  Needs further work.\n\n\t* mpn/generic/mod_1.c: Nailify.\n\n\t* gmp-impl.h: Set various *_THRESHOLD values to be used for nails to\n\tavoid not yet qualified algorithms.\n\t(MPZ_CHECK_FORMAT): Check that nail part is zero.\n\n\t* tests/mpz/t-mul.c (main): Test squaring even for huge operands.\n\t(base_mul): Nailify.\n\t(dump_abort): Accept argument for error string.  Print product\n\tdifference.\n\n\t* mpn/generic/set_str.c: Nailify.\n\n\t* gmp-h.in (__GMPN_ADD, __GMPN_SUB): Nailify.\n\n2002-04-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* randraw.c (lc): Return non-nonsense return value for seed=0 case.\n\tCheck for m2exp being non-zero early; remove all other tests of m2exp.\n\tRemove redundant MPN_ZERO call.\n\tLet mpn_rshift put result in place to avoid extra MPN_COPY.\n\tRemove confusing comment before function `lc' describing BBS algorithm.\n\tMisc simplification and cleanups.\n\tNailify.  Needs further work.\n\n\t* mpz/set_si.c: Nailify.\n\t* mpz/set_ui.c: Nailify.\n\t* mpz/mul_i.h: Nailify.\n\n\t* tests/mpz/t-mul_i.c: Actually test _ui routines.  Add some more test\n\tvalues.\n\n\t* mpn/generic/mul_n.c: Finish nailifying toom3 code.\n\n2002-04-13  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*: Update to another new mpfr 2.0.1.\n\t* configure.in, Makefile.am, mpfr/Makefile.am, mpfr/tests/Makefile.am:\n\tUse MPFR_CONFIGS macro, establish separate MPFR_CFLAGS for mpfr build.\n\n\t* mpfr/tests/Makefile.am: Correction to convenience rule for libmpfr.a.\n\n2002-04-11  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/set_q.c: gmp-impl.h before mpfr.h to avoid _PROTO redefine.\n\n\t* mpfr/*, configure.in: Update to new mpfr 2.0.1.\n\n\t* tests/refmpn.c (refmpn_udiv_qrnnd, refmpn_divmod_1c_workaround):\n\tFixes for nails.\n\n\t* tests/t-constants.c (MODLIMB_INVERSE_3): Nailify tests.\n\t(MP_BASES_BIG_BASE_INVERTED_10, MP_BASES_NORMALIZATION_STEPS_10): Only\n\tcheck these under USE_PREINV_DIVREM_1.\n\t* tests/t-modlinv.c: Nailify tests.\n\n2002-04-11  Gerardo Ballabio <gerardo.ballabio@unimib.it>\n\n\t* gmpxx.h: Remove bool combinations, remove mpf_class::get_str2, only\n\tneed <iosfwd> now.\n\n2002-04-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/diveby3.c: Nailify.\n\t* gmp-impl.h (MODLIMB_INVERSE_3): Nailify.\n\n\t* mpn/generic/mul_n.c: Nailify Toom3 code.\n\n2002-04-10  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (MPN_KARA_MUL_N_MINSIZE, MPN_KARA_SQR_N_MINSIZE): Set to\n\t3, as needed by nails case.\n\n\t* mpn/generic/addmul_1.c, mpn/generic/submul_1.c [nails]: Fix vl\n\tassert, add rp,n and up,n asserts.\n\n\t* mpfr/Makefile.am: Add new mpfr-math.h, install mpf2mpfr.h.\n\n2002-04-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/divrem_1.c: Nailify.  Update mp_size_t variables to use\n\t`n' suffix instead of `size' suffix.\n\t* mpn/generic/divrem_2.c: Likewise.\n\t* mpn/generic/sb_divrem_mn.c: Nailify.\n\t* mpn/generic/tdiv_qr.c: Nailify.\n\t(SHL): Remove silly macro.\n\n\t* mpn/generic/mul_n.c (mpn_kara_mul_n): Replace open-coded increment by\n\tmpn_incr_u call.  Handle nails in ws[n] increment.\n\t* mpn/generic/mul_n.c (mpn_kara_sqr_n): Likewise.\n\n\t* gmp-h.in (GMP_NUMB_MASK): New #define.\n\t(__GMPN_AORS_1): Add version for nails.\n\n\t* gmp-impl.h (GMP_NUMB_MASK): Comment out, now in gmp.h.\n\t(mpn_incr_u): Don't assume `incr' is non-zero.\n\t(mpn_decr_u): Similarly.\n\n2002-04-09  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/*, configure.in: Update to mpfr 2.0.1.\n\n\t* tests/refmpn.c (refmpn_mul_1c, lshift_make): Corrections for nails.\n\t* tssts/refmpn.c, tests/tests.h (refmpn_cmp_allowzero): New function.\n\n\t* mpn/generic/mul_1.c [nails]: Fix vl assert, add {up,n} assert.\n\n\t* mpn/pa32/hppa1_1/pa7100/addmul_1.asm,\n\tmpn/pa32/hppa1_1/pa7100/submul_1.asm: Rename \"size\" define, to avoid\n\tELF .size directive.  Reported by LaMont Jones.\n\n\t* tests/mpz/t-set_si.c: Add nails support.\n\n2002-04-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h: Replace nail mpn_incr_u, mpn_decr_u with faster versions.\n\t(mp_bases[10] values): Check GMP_NUMB_BITS instead of BITS_PER_MP_LIMB.\n\tAdd GMP_NUMB_BITS == 30 version.\n\t(__gmp_doprnt, etc): Remove parameter names.\n\n\t* mpn/generic/mul_n.c: Nailify Karatsuba code.\n\t* mpn/generic/get_str.c: Nailify.\n\t* mpn/generic/sqr_basecase.c: Nailify.\n\t* mpn/generic/lshift.c: Nailify.\n\t* mpn/generic/rshift.c: Likewise.\n\t* mpn/generic/add_n.c: Nailify.  Revamp non-nail code.\n\t* mpn/generic/sub_n.c: Likewise.\n\t* mpn/generic/mul_1.c: Likewise.\n\t* mpn/generic/addmul_1.c: Likewise.\n\t* mpn/generic/submul_1.c: Likewise.\n\n2002-04-02  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (BSWAP_LIMB_FETCH, BSWAP_LIMB_STORE) [powerpc]:\n\tCorrections to constraints, and restrict to bigendian.\n\n2002-03-31  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpz/dive.c: Better diagnostics.\n\n\t* tests/devel/try.c (mpn_get_str, mpn_umul_ppmm_r): New tests.\n\n\t* tests/misc.c, tests/tests.h (byte_diff_lowest, byte_diff_highest):\n\tNew functions.\n\n\t* tests/t-bswap.c: New file.\n\t* tests/Makefile.am (check_PROGRAMS): Add it.\n\n\t* tests/mpn/t-aors_1.c, tests/mpn/t-iord_u.c: Add nails support.\n\n\t* gmp-impl.h (MPN_IORD_U) [x86]: Eliminate unnecessary jiord and iord,\n\trename \"n\" to incr per generic versions, restrict to nails==0.\n\t(mpn_incr_u, mpn_decr_u): Add nails support.\n\t(GMP_NAIL_LOWBIT, GMP_NUMB_MAX): New macros.\n\n\t* tests/trace.c, tests/tests.h (byte_trace, byte_tracen): New\n\tfunctions.\n\t* tests/trace.c: Handle NULL operands.\n\n\t* tests/refmpn.c, tests/devel/try.c, tune/speed.c: Add preliminary\n\tnail support.\n\n\t* tests/refmpn.c, test/tests.h (byte_overlap_p, refmpn_equal_anynail,\n\trefmpn_umul_ppmm_r, refmpn_udiv_qrnnd_r, refmpn_get_str,\n\trefmpn_bswap_limb, refmpn_random, refmpn_random2, refmpn_bswap_limb):\n\tNew functions.\n\n\t* gmp-impl.h, tests/refmpn.c (ASSERT_LIMB): Renamed from\n\tASSERT_MP_LIMB_T.\n\n\t* mpn/x86/*/*.asm, mpn/powerpc32/*/*.asm, mpn/powerpc64/*/*.asm: Put\n\tspeeds after the copyright notice, so as to keep that clear.\n\n2002-03-29  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (powerpc*-*-aix*): Correction to xlc -qarch selection,\n\tfor 32-bit mode.\n\n2002-03-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn: Fix spacing in many files.\n\n\t* mpn/generic/aorsmul_1.c: Split into addmul_1.c and submul_1.c.\n\t* mpn/generic/aors_n.c: Split into add_n.c and sub_n.c.\n\n\t* mpn/pa64/add_n.asm: Trim another 0.125 cycle/limb.  Fix a comment.\n\t* mpn/pa64/sub_n.asm: Likewise.\n\n\t* mpn/pa64/mul_1.asm: Change comclr, comb to proper forms cmpclr, cmpb.\n\t* mpn/pa64/addmul_1.asm: Likewise.\n\t* mpn/pa64/submul_1.asm: Likewise.\n\n2002-03-28  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Converting Integers): Fix type of exp in mpz_get_d_2exp,\n\treported by epl@unimelb.edu.au.\n\t(References): Update Burnikel and Ziegler URL, reported by Keith\n\tBriggs.\n\n\t* gmp-h.in, mp-h.in, configure.in, acinclude.m4: Remove __GMP from\n\tAC_SUBSTs, since autoconf says leading \"_\" in makefile variables is\n\tnot portable.\n\n\t* demos/expr/run-expr.c: Declare optarg, optind, opterr if necessary.\n\t* configure.in, demos/expr/expr-config-h.in: Configs for this.\n\n2002-03-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/Makefile.am (TARG_DIST): Remove pa64w and hppa, add pa32.\n\n\t* configure.in (path_20w): Remove pa64w.\n\n\t* mpn/pa64/udiv_qrnnd.asm: Tweak for PA8000 performance comparative to\n\tthat on PA8500.\n\n2002-03-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa32: New name for mpn/hppa.\n\t* configure.in: Corresponding changes.\n\n\t* mpn/pa64/umul_ppmm.asm: New file, generalized for both 2.0N and 2.0W.\n\t* mpn/pa64/umul_ppmm.S: Remove.\n\n\t* mpn/pa64/udiv_qrnnd.asm: Generalize for both 2.0N and 2.0W.\n\t* mpn/pa64w/udiv_qrnnd.asm: Remove.\n\n2002-03-26  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/tests/tdiv.c, mpfr/tests/tui_div.c: Don't depend on nan and inf\n\thandling in \"double\", for the benefit of alpha.\n\n\t* configure (hppa2.0w): Set path to \"pa64w pa64\".\n\n\t* acinclude.m4, configure.in (GMP_C_INLINE): New macro.\n\t* acinclude.m4 (GMP_H_EXTERN_INLINE): Use it, and fix \"yes\" handling.\n\n2002-03-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa64w/add_n.s: Remove.\n\t* mpn/pa64w/sub_n.s: Remove.\n\t* mpn/pa64w/lshift.s: Remove.\n\t* mpn/pa64w/rshift.s: Remove.\n\t* mpn/pa64w/mul_1.S: Remove.\n\t* mpn/pa64w/addmul_1.S: Remove.\n\t* mpn/pa64w/submul_1.S: Remove.\n\t* mpn/pa64w/sqr_diagonal.asm: Remove.\n\n\t* mpn/pa64/mul_1.asm: New file with twice faster code; generalized\n\tfor both 2.0N and 2.0W.\n\t* mpn/pa64/submul_1.asm: Likewise.\n\t* mpn/pa64/mul_1.S: Remove.\n\t* mpn/pa64/submul_1.S: Remove.\n\n\t* mpn/pa64/sqr_diagonal.asm: Generalize for both 2.0N and 2.0W.\n\n\t* mpn/pa64/add_n.asm: New file, generalized for both 2.0N and 2.0W.\n\t* mpn/pa64/sub_n.asm: Likewise.\n\t* mpn/pa64/lshift.asm: Likewise.\n\t* mpn/pa64/rshift.asm: Likewise.\n\t* mpn/pa64/add_n.s: Remove.\n\t* mpn/pa64/sub_n.s: Remove.\n\t* mpn/pa64/lshift.s: Remove.\n\t* mpn/pa64/rshift.s: Remove.\n\n2002-03-24  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (BSWAP_LIMB_FETCH, BSWAP_LIMB_STORE): New macros.\n\t* mpz/inp_raw.c, mpz/out_raw.c: Use them.\n\t* acconfig.h (HAVE_HOST_CPU): Add some powerpc types.\n\n\t* mpn/powerpc32/750/com_n.asm: New file.\n\n\t* mpfr/tests/tout_str.c: Disable random tests, since they fail on\n\talphaev56-unknown-freebsd4.1 and do nothing by default.\n\n\t* mpfr/tests/tsqrt.c: Don't depend on nan, inf or -0 in \"double\", for\n\tthe benefit of alpha.\n\t* mpfr/sqrt.c: Clear nan flag on -0.\n\n\t* demos/factorize.c: Use mpn_random() instead of random(), to avoid\n\tportability problems.\n\n\t* demos/isprime.c (print_usage_and_exit): Declare as \"void\" to avoid\n\twarnings.\n\n\t* demos/pexpr.c (setup_error_handler): Corrections to sigstack code.\n\n\t* demos/calc/calc.y: Add some `;'s to make bison 1.34 happy.\n\n2002-03-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa64/addmul_1.asm: New file with twice faster code; generalized\n\tfor both 2.0N and 2.0W.\n\n2002-03-22  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/time.c: Add SGI hardware counter measuring method, change some\n\tabort()s into ASSERT_FAIL()s.\n\n\t* configure.in (AC_CHECK_HEADERS): Add fcntl.h and sys/syssgi.h.\n\t(AC_CHECK_FUNCS): Add syssgi.\n\n\t* configure.in, mpfr/Makefile.am, mpfr/tests/Makefile.am: Use\n\t-mieee-with-inexact or -ieee_with_inexact for mpfr on alpha, so\n\tdenorms work.\n\n\t* mpfr/isinteger.c: Fix a memory leak.\n\n2002-03-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/speed.c (struct choice_t): Make `r' an mp_limb_t.\n\n2002-03-21  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (HAVE_LIMB_BIG_ENDIAN, HAVE_LIMB_LITTLE_ENDIAN): Use an\n\tAH_VERBATIM and better explanation.\n\t* acinclude.m4 (GMP_C_DOUBLE_FORMAT): Similarly for the HAVE_DOUBLE\n\tconstants.\n\n\t* gmp.texi (Number Theoretic Functions): Clarify sign of GCD returned\n\tby mpz_gcdext.\n\n\t* demos/pexpr.c, demos/pexpr-config-h.in, configure.in: Use an\n\tautoconf test for stack_t.\n\n\t* configure.in, gmp-h.in, mp-h.in, macos/configure, tests/mpz/reuse.c,\n\ttests/mpf/reuse.c: Use __GMP_LIBGMP_DLL to enable windows declspec,\n\tdon't require _WIN32 (etc), remove __GMP_LIBGMP_SHARED and\n\t__GMP_LIBGMP_STATIC.\n\n\t* gmp-impl.h (mp_bases): Add __GMP_DECLSPEC, for the benefit of\n\ttests/t-constants.c.\n\n\t* tune/many.pl, tune/speed.h: Remove suffix hack for back.asm.\n\n2002-03-21  Paul Zimmermann  <Paul.Zimmermann@loria.fr>\n\n\t* mpfr/sin_cos.c (mpfr_sin_cos): New file.\n\t* mpfr/mpfr.h, mpfr/mpfr.texi, mpfr/Makefile.am: Add it.\n\t* mpfr/tan.c: Fix sign in 2nd and 4th quadrants.\n\n\t* mpfr/log10.c: Fix hangs on certain inputs.\n\n2002-03-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (setup_error_handler): Declare `s', the first\n\tsigaltstack parameter, using `stack_t' just on AIX.\n\n2002-03-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/mul_1.asm: Use free caller-saves registers instead\n\tof the callee-saves r30 and r31.\n\n2002-03-19  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/freq.c (freq_proc_cpuinfo): Recognise powerpc \"clock\", where\n\tpreviously got the wrong result from \"bogomips\".\n\n\t* mpn/powerpc32/add_n.asm, mpn/powerpc32/sub_n.asm: Rewrite, faster on\n\t750, and smaller too.\n\t* mpn/powerpc32/*.asm: Use L(), add some measured speeds.\n\n\t* longlong.h (count_trailing_zeros) [vax]: Add a version using ffs,\n\tbut commented out.\n\n2002-03-17  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/speed.c, tune/speed.h, tune/common.c, many.pl: Use optional\n\t\".r\" to specify operand overlaps for mpn_add_n, mpn_sub_n and logops.\n\tRemove mpn_add_n_inplace and mpn_add_n_self.\n\t* tune/many.pl: Fix MULFUNC_PROLOGUE parsing.\n\n\t* gmp.texi (Known Build Problems): Note `make' problem with long\n\tlibgmp.la dependencies list.\n\n\t* printf/doprnt.c, scanf/doscan.c (%zn): Remove test of non-existent\n\tHAVE_SIZE_T, just use size_t unconditionally.\n\t* printf/doprnt.c (%zd etc): Fix 'z' type parsing.\n\t* tests/misc/t-printf.c, tests/misc/t-scanf.c: More tests.\n\n\t* configure.in: Use AC_COPYRIGHT.\n\tAdd m4_pattern_allow(GMP_MPARAM_H_SUGGEST).\n\n\t* tune/Makefile.am (libdummy.la): Remove this, sqr_basecase.c already\n\tgets an ansi2knr rule from nodist_tuneup_SOURCES.\n\n\t* longlong.h (count_leading_zeros) [pentiumpro gcc<3]: Test\n\tHAVE_HOST_CPU_i686 too.\n\n\t* mpz/out_raw.c (HTON_LIMB_STORE): Fix a typo in big endian #if.\n\n2002-03-14  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium/com_n.asm, mpn/x86/pentium/logops_n.asm,\n\tmpn/x86/k6/mmx/com_n.asm: Add nails support.\n\n\t* texinfo.tex: Update to 2002-03-01.06 (per texinfo 4.1).\n\t* gmp.texi (@ma): Remove, @math does this now.\n\n\t* mpfr/tests/reuse.c: Clear op1 and op2 flags only in their respective\n\touter loops.\n\n\t* configure.in (--enable-cxx): Correction to the default stated in the\n\thelp string.\n\t(power*-*-aix*, not powerpc): Use aix.m4, don't run\n\tGMP_ASM_POWERPC_R_REGISTERS or use powerpc-defs.m4.\n\n2002-03-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc32/gmp-mparam.h: New file.\n\n2002-03-13  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/expr/exprfr.c: More mpfr functions, corrections to agm, cos,\n\tsin, rename log2 constant to loge2 to make room for log2 function.\n\t* demos/expr/t-expr.c: More tests.\n\n\t* mpz/inp_raw.c (NTOH_LIMB_FETCH) [generic 16bit]: Remove spurious \"+\".\n\n\t* mpfr/acos.c: Avoid a memory leak for certain operands.\n\n\t* acinclude.m4, configure.in (GMP_C_DOUBLE_FORMAT): New macro.\n\n\t* acinclude.m4 (GMP_HPC_HPPA_2_0, GMP_ASM_UNDERSCORE,\n\tGMP_ASM_ALIGN_LOG, GMP_ASM_LSYM_PREFIX, GMP_ASM_W32, GMP_ASM_X86_MMX):\n\tChange ac_objext to OBJEXT, which is the documented variable.\n\n\t* config.guess (powerpc*-*-*): Use #ifdef on constants POWER_630 etc\n\tin the AIX test, since old versions don't have them all.\n\n2002-03-11  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (LIBC211): New AC_DEFINE, for mpfr.\n\n\t* configure.in (mips*-*-*): Support ABI=o32 on irix 6, allow gcc 2.7.2\n\tto fall back on it, but detect it doesn't work with gcc 2.95.  Use\n\tsingle mips-defs.m4 for both mips32 and mips64.\n\t* acinclude.m4 (GMP_GCC_MIPS_O32): New macro.\n\t* mpn/mips32/mips-defs.m4: Renamed from mips.m4.\n\t* mpn/mips64/mips.m4: Remove (was a copy of mips32/mips.m4).\n\n\t* mpn/powerpc32/750: New directory.\n\t* configure.in (powerpc740, powerpc750, powerpc7400): Use it.\n\t* mpn/powerpc32/750/gmp-mparam.h: New file.\n\n\t* config.sub, gmp.texi (ultrasparc1): Remove this, just use plain\n\t\"ultrasparc\".\n\n2002-03-10  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr: Update to 20020301, except internal_ceil_exp2.c,\n\tinternal_ceil_log2.c, internal_floor_log2.c renamed to i_ceil_exp2.c,\n\ti_ceil_log2.c, i_floor_log2.c to be unique in DOS 8.3.  And sqrtrem.c\n\tremoved since no longer required.\n\t* mpfr/mpfr.texi: Fix some formatting.\n\t* mpfr/tests/reuse.c: Patch by Paul to fix test4 variable handling.\n\t* mpfr/sinh.c: Patch by Paul to fix err calculation when t==0.\n\t* mpfr/tests/tget_d.c: Disable until portability of rnd_mode.c can be\n\tsorted out.\n\n\t* configure.in (powerpc*-*-*): Separate gcc and xlc cpu flags setups\n\tfor clarity.\n\n\t* longlong.h (count_leading_zeros, count_trailing_zeros) [x86_64]: New\n\tmacros.\n\n2002-03-07  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Build Options): Note all the ultrasparcs accepted.\n\t(Language Bindings): Add Math::BigInt::GMP.\n\n\t* config.sub (ultrasparc2i): New cpu type.\n\t* config.guess (sparc-*-*, sparc64-*-*): Add some exact CPU detection.\n\n2002-03-05  Kevin Ryde  <kevin@swox.se>\n\n\t* longlong.h (count_leading_zeros, count_trailing_zeros) [alphaev67,\n\talphaev68]: Use ctlz and cttz insns (as per gcc longlong.h).\n\t(count_leading_zeros) [sparclite]: Fix parameter order (as per gcc\n\tlonglong.h).\n\t* acconfig.h (HAVE_HOST_CPU_alphaev68): New define.\n\n\t* config.guess [i?86-*-*]: Suppress error messages if compiler not\n\tfound or test program won't run.\n\t[rs6000-*-*, powerpc-*-*]: Force code alignment for mfpvr test.\n\n2002-03-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/pow_1.c: New file.\n\n2002-03-03  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Build Options): Note compiler must be able to fully link,\n\tadd alphapca57 and alphaev68, give a clearer example of MPN_PATH\n\t(Debugging): Add notes on valgrind.\n\t(C++ Formatted Output): Clarify mpf showbase handling, in particular\n\tnote \"00.4\" in octal.\n\n\t* printf/doprntf.c: Do a showbase on octal float fractions, for\n\tinstance \"00.4\" where previously it gave \"0.4\".\n\t* tests/cxx/t-ostream.cc: Update.\n\n\t* gmp-h.in, mp-h.in (__GMP_DECLSPEC, __GMP_DECLSPEC_XX): Test\n\t__WIN32__ for Borland C, reported by \"delta trinity\".\n\n\t* gmp-h.in, mp-h.in: Use <cstddef> for size_t under C++, suggested by\n\tHans Aberg some time ago.\n\t* gmp-h.in (<iosfwd>): Move to top of file for clarity.\n\n\t* Makefile.am (libgmpxx_la_SOURCES): Use dummy.cc to force C++.\n\t(CXX_OBJECTS): Add osfuns$U.lo.\n\t* dummy.cc: New file.\n\t* cxx/Makefile.am (INCLUDES): Use __GMP_WITHIN_GMPXX.\n\t(libcxx_la_SOURCES): Add osfuns.cc.\n\t* gmp-h.in (__GMP_DECLSPEC_XX): New define, use it on libgmpxx funs.\n\t* gmp-impl.h: Add __GMP_DECLSPEC to libgmp functions used by libgmpxx.\n\n\t* longlong.h (COUNT_TRAILING_ZEROS_TIME): Remove, no longer used.\n\n\t* gmp-impl.h (MPN_SIZEINBASE, MPN_SIZEINBASE_16): Correction to\n\t__totbits for nails.\n\n\t* gmp-impl.h (JACOBI_LS0): Test size before limb, to pacify valgrind.\n\t(JACOBI_0LS): Ditto, and fix parens around arguments.\n\n\t* mpn/x86/x86-defs.m4 (call_mcount): Add a counter to make data labels\n\tunique, since simplified L() scheme no longer gives that effect.\n\t(notl_or_xorl_GMP_NUMB_MASK): New macro.\n\tAdd m4_assert_numargs in a few places.\n\n\t* configure.in (*sparc*): Fix cycle counter setups for ABI=64.\n\n2002-02-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/vax/gmp-mparam.h: New file.\n\n2002-02-28  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in (gmp_errno, gmp_version): Move into extern \"C\" block,\n\treported by librik@panix.com.\n\n\t* gmp-h.in, mp-h.in (__GMP_DECLSPEC_EXPORT, __GMP_DECLSPEC_IMPORT):\n\tUse __declspec(dllexport) and __declspec(dllimport) on Borland.\n\t* gmp-h.in (_GMP_H_HAVE_FILE): Test __STDIO_H for Borland.\n\tReported by \"delta trinity\".\n\n\t* gmp-impl.h (va_copy): Fall back on memcpy, not \"=\".\n\n\t* mpn/generic/pre_mod_1.c: Add a comment about obsolescence.\n\n\t* tune/time.c (MICROSECONDS_P): Don't trust time differences of 1\n\tmicrosecond.\n\n\t* tests/cxx/t-ostream.cc: Use \"const char *\" not just \"char *\" for\n\ttest data strings, avoids warnings on Sun CC.\n\n2002-02-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: For sparc under solaris2.[7-9], pass -fsimple=1 to\n\tdisable some crazy -fast optimizations.\n\n2002-02-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: For sparc under solaris2.[7-9], pass -fns=no to enable\n\tdenorm handling under -fast.\n\n2002-02-25  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (alpha*-*-*): Rearrange -mcpu selection for gcc,\n\tprovide an ev67 -> ev6 fallback.  Fix -arch,-tune selection for DEC C.\n\tAllow ~ for space in optional options lists.\n\n\t* tune/tuneup.c (tune_preinv_divrem_1): Compare against an assembler\n\tmpn_divrem_1 if it exists, not the generic C mpn_divrem_1_div.\n\t(tune_preinv_mod_1): Ditto with mpn_mod_1.\n\n\t* tune/time.c (DIFF_SECS_ROUTINE): Eliminate the unused \"type\"\n\tparameter, try to make the code a bit clearer.\n\n\t* tune/freq.c: Reduce the period measured for cycles versus\n\tgettimeofday, add cycles versus microsecond getrusage.\n\n\t* mpz/array_init.c: \"i\" should be mp_size_t, noticed by E. Khong.\n\n2002-02-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: For sparc under solaris2.[7-9], pass -fast instead of\n\tother optimization options.\n\n2002-02-23  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/asm-defs.m4 (GMP_NUMB_MASK): New macro.\n\t(PROLOGUE, EPILOGUE): Relax quoting for the benefit of tune/many.pl\n\twhen GSYM_PREFIX non-empty.\n\n\t* tune/time.c, tune/speed.h (speed_time_init): Include clock tick\n\tperiod in speed_time_string.\n\t* tune/time.c, configure.in (clock_gettime): New measuring method.\n\n\t* tune/many.pl: Add -DHAVE_NATIVE_mpn_foo to C objects, to avoid\n\tconflicts with a macro version in gmp-impl.h, eg. mpn_com_n.\n\n2002-02-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c: Increase RLIMIT_STACK to 4Mibyte.\n\n2002-02-22  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/tuneup.c: Don't confuse gcc with mipspro cc in diagnostic.\n\n2002-02-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (mips*-*-irix[6789]*]): Set `extra_functions_n32', not\n\t`extra_functions'.\n\n\t* printf/doprnt.c: Conditionally include inttypes.h.\n\t* printf/repl-vsnprintf.c: Likewise.\n\t* scanf/doscan.c: Likewise.\n\n2002-02-20  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/k7/mmx/com_n.asm: New file.\n\n\t* mpz/n_pow_ui.c (SWAP_RP_TP): Use ASSERT_CODE on ralloc and talloc,\n\tto ensure they needn't live past the initial allocs in a normal build.\n\n\t* mpn/generic/mod_34lsub1.c: Note this is for internal use.\n\n2002-02-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* Clean up *_THRESHOLD names.  Many files affected.\n\n\t* mpn/mips32: Asm-ify 32-bit mips code.\n\tMove files from `mips2' to `mips32' directory.\n\t* mpn/mips64: Move files from `mips3' to `mips64' directory.\n\t* configure.in: Change `mips2' => `mips32' and `mips3' => `mips64'.\n\n2002-02-19  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4, configure.in (GMP_PROG_LEX): New macro.\n\n\t* tune/tuneup.c (one): Start next threshold at a max of previous ones,\n\tin order to get a good starting point for TOOM3_SQR_THRESHOLD if\n\tKARATSUBA_SQR_THRESHOLD is 0 (ie. using mpn_mul_basecase only).\n\n\t* configure.in, tune/tuneup.c (GMP_MPARAM_H_SUGGEST): New AC_DEFINE\n\treplacing GMP_MPARAM_H_FILENAME.  Suggest a new file in a cpu specific\n\tsubdirectory rather than mpn/generic.\n\n\t* acinclude.m4 (POWERPC64_PATTERN): New macro.\n\t* configure.in (powerpc*-*-*): Use it.\n\t(powerpc*-*-*): Use umul in 32L and aix64.\n\t(mips*-*-*): Use umul, 32 and 64 bit versions.\n\n2002-02-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h: Add basic x86-64 support.\n\n2002-02-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c: Support `-X' for upper case hex, make `-x' output\n\tlower case hex.\n\n\t* mpn/mips2/umul.s: Make it actually work.\n\t* mpn/mips3/umul.asm: New file.\n\n\t* mpn/mips2/gmp-mparam.h: New file.\n\n2002-02-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/get_str.c (mpn_sb_get_str): Round frac upwards after\n\tumul_ppmm calls.\n\n2002-02-16  Kevin Ryde  <kevin@swox.se>\n\n\t* config.guess (alpha-*-*): Do alpha exact cpu probes on any system,\n\tand only if configfsf.guess gives a plain \"alpha\".\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): Detect a gcc 3.0.3 powerpc64\n\tlinker invocation problem.\n\n2002-02-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/get_str.c (mpn_sb_get_str): For base 10, develop initial\n\tdigits using umul_ppmm, then switch to plain multiplication.\n\n\t* config.guess: Rewrite Alpha subtype detection code for *bsd systems.\n\n2002-02-15  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Build Options): Note powerpc exact cpu types.\n\t(Debugging): Advertise DEBUG in memory.c.\n\n\t* config.sub, config.guess: Add some powerpc exact cpus.\n\t* configure.in: Add configs for them.\n\n\t* memory.c [__NeXT__]: Remove unused #define of \"static\".\n\t(__gmp_default_allocate, __gmp_default_reallocate): Print size if\n\tallocation fails, don't use perror.\n\n\t* gmp-h.in: g++ 3 demands __GMP_NOTHROW is before other attributes.\n\n2002-02-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/mul_1.asm: Fix typo preventing build on T3E systems.\n\n2002-02-14  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/tuneup.c (tune_set_str): Increase max_size, for the benefit of\n\talpha.\n\n\t* macos/README: Bug reports to bug-gmp@gnu.org, clarify MacOS X a bit.\n\n\t* mpn/generic/gcdext.c [WANT_GCDEXT_ONE_STEP]: Add missing TMP_FREE.\n\n\t* tune/speed.c, tune/tuneup.c: Allow for speed_cycletime of 0.0 in\n\tsome diagnostic printouts.\n\t* tune/time.c (speed_cycletime): Note can be 0.0.\n\n2002-02-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/mul_1.asm: Add mpn_mul_1c entry.\n\n\t* mpn/pa64w/sqr_diagonal.asm: Use L() for labels.\n\n2002-02-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/get_str.c (mpn_sb_get_str): Change declaration of rp to\n\taccommodate tuneup compiles.\n\n2002-02-11  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/alpha/default.m4, mpn/alpha/unicos.m4 (PROLOGUE_cpu): Add\n\tnoalign option.\n\t* mpn/alpha/default.m4 (PROLOGUE_cpu): use ALIGN instead of \".align\".\n\n\t* gmp.texi (Debugging): Notes on Checker.\n\t(Other Multiplication): Move note on float FFTs to here.\n\t(Assembler Floating Point): New text and revisions by Torbjorn,\n\tpicture formatting by me.\n\tSimplify tex pictures elsewhere a bit, share heights, eliminate some\n\tgaps at line joins.\n\n2002-02-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/get_str.c (mpn_sb_get_str): Rewrite to generate fraction\n\tlimbs and use multiplication for digit development.  Trim allocation of\n\tbuf.  Get rid of code for !USE_MULTILIMB.\n\n2002-02-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/set_str.c (mpn_set_str): Undo this:\n\tChange invocations of mpn_add_1 to instead use mpn_incr_u.\n\n\t* tests/mpz/convert.c: Free str only after it is used in error message.\n\n\t* mpn/generic/get_str.c (mpn_sb_get_str): Combine tail code for base 10\n\tand generic bases.\n\n\t* mpn/mp_bases.c: Add entries for base 256.  Remove __ prefix from\n\ttable name.\n\t* gmp-impl.h (__mp_bases): Remove superflous `mp_' part of name, making\n\tit __gmpn_bases instead of __gmpn_mp_bases.\n\t(mp_bases): New #define.\n\t* tune/speed.h (SPEED_ROUTINE_MPN_SET_STR): Allow bases up to 256.\n\t(SPEED_ROUTINE_MPN_GET_STR): Likewise.\n\n2002-02-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/set_str.c (mpn_set_str): Use mpn_mul_1c if available.\n\tChange invocations of mpn_add_1 to instead use mpn_incr_u.\n\n2002-02-09  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/array_init.c, mpz/cfdiv_q_2exp.c, mpz/cfdiv_r_2exp.c,\n\tmpz/cong_2exp.c, mpz/divis_2exp.c, mpz/hamdist.c, mpz/init2.c,\n\tmpz/mul_2exp.c, mpz/realloc2.c, mpz/scan0.c, mpz/scan1.c,\n\tmpz/setbit.c, mpz/tdiv_q_2exp.c, mpz/tdiv_r_2exp.c, mpz/tstbit.c,\n\tmpz/urandomb.c: Use GMP_NUMB_BITS.\n\n\t* mpz/iset_str.c [__CHECKER__]: Store a dummy value to the low limb to\n\tstop it appearing uninitialized.\n\n\t* gmp-h.in (__GMP_NOTHROW): New macro.\n\t(mp_set_memory_functions, mpz_cmp, mpz_cmp_si, mpz_cmp_ui, mpz_cmpabs,\n\tmpz_cmpabs_ui, mpz_congruent_2exp_p, mpz_divisible_2exp_p,\n\tmpz_fits_sint_p, mpz_fits_slong_p, mpz_fits_sshort_p, mpz_fits_uint_p,\n\tmpz_fits_ulong_p, mpz_fits_ushort_p, mpz_get_si, mpz_get_ui,\n\tmpz_getlimbn, mpz_hamdist, mpz_popcount, mpz_scan0, mpz_scan1,\n\tmpz_size, mpz_sizeinbase, mpz_swap, mpz_tstbit, mpq_equal, mpq_swap,\n\tmpf_cmp, mpf_cmp_si, mpf_cmp_ui, mpf_fits_sint_p, mpf_fits_slong_p,\n\tmpf_fits_sshort_p, mpf_fits_uint_p, mpf_fits_ulong_p,\n\tmpf_fits_ushort_p, mpf_get_default_prec, mpf_get_prec, mpf_get_si,\n\tmpf_get_ui, mpf_integer_p, mpf_set_default_prec, mpf_set_prec_raw,\n\tmpf_size, mpf_swap, mpn_add_1, mpn_cmp, mpn_hamdist, mpn_popcount,\n\tmpn_sub_1): Use it.\n\n\t* gmp-impl.h (MPN_SIZEINBASE, MPN_SIZEINBASE_16): New macros from\n\tmpn_sizeinbase, and use GMP_NUMB_BITS.\n\t* mpz/get_str.c, mpz/sizeinbase.c, mpbsd/mout.c, tune/speed.h: Use\n\tMPN_SIZEINBASE.\n\t* mpbsd/mtox.c: Use MPN_SIZEINBASE_16.\n\n\t* configure.in, mpn/Makefile.am, gmp-impl.h (mpn_sizeinbase): Remove.\n\t* mpn/generic/sizeinbase.c: Remove file.\n\n\t* gmp-impl.h (MPN_GET_STR_SIZE): Remove.\n\t* tests/mpn/t-g_str_size.c: Remove file.\n\t* tests/mpn/Makefile.am: Update.\n\n\t* Makefile.am (dist-hook): Don't distribute cvs merge \".#\" files.\n\n2002-02-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Override extra_functions for all sparcv8 systems, not\n\tjust supersparc.\n\n2002-02-06  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/tuneup.c (tune_mul, tune_sqr): Disable FFTs until tuned.\n\t* tune/speed.h (SPEED_ROUTINE_MPN_SET_STR): Fix memory clobber in\n\tdestination cache priming.\n\n\t* printf/doprnt.c: Fix parsing of %s and %p conversions.\n\t* tests/misc/t-printf.c (check_misc): Add some tests.\n\n2002-02-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc32/v8/udiv.asm: New file, from v8/supersparc.\n\n\t* mpn/generic/set_str.c: Rename indigits_per_limb => chars_per_limb.\n\tRemove redundant chars_per_limb.  Reverse 4 loops in basecase code for\n\tspeed.  Use MP_BASES_CHARS_PER_LIMB_10.\n\n2002-02-03  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_PROG_NM): Ensure -B or -p get used when doing a\n\tcross compile with the native nm, helps OSF for instance.\n\t(GMP_ASM_LSYM_PREFIX): Remove \".byte 0\" for the benefit of irix 6,\n\tallow \"N\" from nm for OSF, allow for \"t\" for other systems, but prefer\n\tno mention of the symbol at all.\n\n\t* tune/tuneup.c (print_define_remark): New function.\n\tTurn some \"#if\"s into plain \"if\"s.\n\n\t* tune/tuneup.c, gmp-impl.h, tune/Makefile.am\n\t(GET_STR_BASECASE_THRESHOLD, GET_STR_PRECOMPUTE_THRESHOLD): Tune these.\n\t* mpn/generic/get_str.c [TUNE_PROGRAM_BUILD]: Cope with non-constant\n\tGET_STR_PRECOMPUTE_THRESHOLD.\n\n2002-02-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/get_str.c (mpn_get_str): Fix typo in a declaration.\n\n2002-02-02  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/set_str.c: Use MP_PTR_SWAP and POW2_P, add __GMP_PROTO\n\tto convert_blocks prototype, disable SET_STR_BLOCK_SIZE sanity check.\n\n\t* tune/set_strb.c, tune/set_strs.c: New files.\n\t* tune/speed.h, tune/speed.c, tune/common.c,tune/Makefile.am: Add them.\n\t* tune/tuneup.c: Tune SET_STR_THRESHOLD.\n\t(DEFAULT_MAX_SIZE): Renamed from MAX_SIZE, allow any param.max_size[].\n\n2002-02-01  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/convert.c: Increase operand size.  Add (yet disabled) code\n\tfor testing with random strings.\n\n\t* mpn/generic/get_str.c (mpn_get_str): Rewrite to become sub-quadratic.\n\t(mpn_dc_get_str, mpn_sb_get_str): New functions.\n\n2002-01-31  Kevin Ryde  <kevin@swox.se>\n\n\t* gmpxx.h (cmp): Renamed from \"compare\".\n\n\t* configure.in (AC_C_BIGENDIAN): Don't abort when cross compiling.\n\t(PROLOGUE): Allow new style optional second parameter when grepping.\n\n\t* acinclude.m4 (GMP_HPC_HPPA_2_0, GMP_ASM_UNDERSCORE,\n\tGMP_ASM_ALIGN_LOG, GMP_ASM_LSYM_PREFIX, GMP_ASM_W32, GMP_ASM_X86_MMX):\n\tUse $ac_objext for object filenames.\n\t(GMP_ASM_UNDERSCORE): Use CCAS to assemble.\n\n\t* demos/pexpr-config-h.in: New file.\n\t* configure.in: Generate demos/pexpr-config.h.\n\t(AC_CHECK_FUNCS): Add clock, cputime, setrlimit, sigaction,\n\tsigaltstack, sigstack.\n\t* acinclude.m4 (GMP_SUBST_CHECK_FUNCS, GMP_SUBST_CHECK_HEADERS): New\n\tmacros.\n\t* demos/pexpr.c: Use pexpr-config.h, not various #ifdefs.\n\t(setup_error_handler): Use signal if sigaction not available, allow\n\tfor SIGBUS missing on mingw.\n\t(main): Use time() for random seed if gettimeofday not available.\n\t(cleanup_and_exit): Move SIGFPE out of LIMIT_RESOURCE_USAGE.\n\n2002-01-30  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/set_str.c: Rewrite to become sub-quadratic.\n\t(convert_blocks): New function.\n\n2002-01-30  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (GMP_NUMB_MASK, GMP_NAIL_MASK, GMP_NUMB_HIGHBIT,\n\tASSERT_MPN, ASSERT_MP_LIMB_T): New macros.\n\n\t* mpn/generic/fib2_ui.c: Use GMP_NUMB_BITS, simplify the data\n\tgenerator program, share __gmp_fib_table initializers between bit\n\tsizes, cope with bit sizes other than those specifically setup.\n\t* gmp-impl.h (FIB_TABLE_LIMIT, FIB_TABLE_LUCNUM_LIMIT): Corresponding\n\trearrangement of conditionals.\n\t* tests/mpz/t-fib_ui.c (check_fib_table): New test.\n\n2002-01-28  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/set_si.c, mpz/iset_si.c: Store to _mp_d[0] unconditionally, use\n\tan expression for _mp_size.\n\n\t* mpz/init.c, mpz/init2.c, mpz/iset.c, mpq/init.c [__CHECKER__]: Store\n\tdummy values to low limbs to stop them appearing uninitialized.\n\n2002-01-26  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/mpfr-test.h (MAX, MIN, ABS): Use instead a patch from Paul and\n\tVincent.\n\n2002-01-24  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in: Extra quoting to get argument help messages right.\n\n\t* gmp.texi (Efficiency): Suggest hex or octal for input and output.\n\t(Formatted Output Strings): Mention \"*\" for width and precision.\n\n\t* mpn/generic/sizeinbase.c: New file, adapted from mpz/sizeinbase.c.\n\tUse POW2_P, use __mp_bases[base].big_base for log2(base).\n\t* configure.in, mpn/Makefile.am: Add it.\n\t* gmp-impl.h: Add prototype.\n\t* mpz/sizeinbase.c, tune/speed.h, mpn/generic/get_str.c,\n\tmpz/get_str.c, mpbsd/mout.c, mpbsd/mtox.c: Use it.\n\t* mpz/get_str.c: Write directly to user buffer, skip at most one\n\tleading zero, eliminate special case for x==0.\n\t* mpbsd/mtox.c: Allocate exact result space at the start, eliminate\n\tspecial case for x==0.\n\t* mpbsd/mout.c: Only need to skip one high zero with mpn_sizeinbase.\n\n\t* configure.in (--enable-nails): New option.\n\t(GMP_NAIL_BITS, GMP_LIMB_BITS, GMP_NUMB_BITS): New defines for gmp.h\n\tand config.m4.\n\t* gmp-h.in: Add templates.\n\n\t* mpfr/mpfr-test.h (MAX, MIN, ABS): Use #ifndef to avoid a redefine\n\terror on AIX xlc.\n\n2002-01-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/get_str.c: Correct type of `out_len'.\n\n2002-01-22  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/pre_divrem_1.c: Corrections to some ASSERTs.\n\n\t* mpfr/mul_ui.c: Don't call mpn_lshift with 0 shift.\n\n\t* mpfr/mpz_set_fr.c: Produce correct mpz_t for f==0.\n\n2002-01-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h (32-bit powerpc add_ssaaaa): Remove spurious commutative\n\tdeclaration.\n\t(64-bit powerpc add_ssaaaa): Likewise.\n\n2002-01-20  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_FUNC_VSNPRINTF): Use %n to better detect sparc\n\tsolaris 2.7 problems.\n\n2002-01-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (mpz_eval_expr): Optimize s^rhs for -1 <= s <= 1.\n\t(cleanup_and_exit): Improve error message wording.\n\n2002-01-19  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr/mpfr.h (_PROTO): Use __GMP_PROTO, for compatibility with\n\tgmp-impl.h.\n\n2002-01-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpfr/mpfr-test.h: Test \"__hpux\", not \"hpux\".  Mask off mrand48\n\treturn value to 31 bits to work around sloppy mpfr #include practices.\n\n\t* mpfr/tests/*.c: Use #include \"\", not <>, for gmp.h and mpfr.h.\n\tMake sure to #include mpfr-test.h from all files that use random().\n\n2002-01-17  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (__GMP_REALLOCATE_FUNC_MAYBE_TYPE): New macro.\n\t* gmp-impl.h, mpz/get_str.c, mpz/out_raw.c, mpq/get_str.c,\n\tmpq/set_str.c, mpf/get_str.c, printf/asprntffuns.c, printf/doprnt.c,\n\tprintf/repl-vsnprintf.c, printf/snprntffuns.c, scanf/doscan.c,\n\tmpbsd/mtox.c: Some fixes to compile as C++.\n\n\t* mpn/generic/jacbase.c (JACOBI_BASE_METHOD): New tuned parameter,\n\treplacing COUNT_TRAILING_ZEROS_TIME test.  Add a third method too.\n\t* tune/speed.c, tune/speed.h, tune/common.c, tune/Makefile.am: Add\n\tmeasuring of mpn_jacobi_base methods.\n\t* tune/jacbase1.c, tune/jacbase2.c, tune/jacbase3.c: New files.\n\t* tune/tuneup.c (JACOBI_BASE_METHOD): Tune this.\n\t* mpn/x86/*/gmp-mparam.h (COUNT_TRAILING_ZEROS_TIME): Remove macro.\n\n\t* gmp-h.in: Use __gmp prefix on variables in inlines.\n\n\t* gmp-impl.h (MPN_COPY_INCR, MPN_COPY_DECR): Remove __i, unused.\n\n\t* mpn/generic/mul_fft.c: Use HAVE_NATIVE_mpn_addsub_n, not ADDSUB.\n\tUse CNST_LIMB for some constants.\n\n2002-01-15  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpbsd/Makefile.am: Add a convenience rule for ../libtests.la.\n\n\t* printf/Makefile.am: libdummy.la should be in EXTRA_LTLIBRARIES.\n\n\t* mpf/out_str.c: Use MPF_SIGNIFICANT_DIGITS, so mpf_out_str and\n\tmpf_get_str give the same for ndigits==0.\n\n\t* mpfr/exceptions.c (mpfr_set_emin, mpfr_set_emax): Work around a\n\tpowerpc64 gcc 3.0 -O2 bug.\n\n\t* tests/memory.c, tests/tests.h (tests_memory_validate): New function.\n\n2002-01-14  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/sb_divrem_mn.c, mpn/generic/divrem_1.c,\n\tmpn/generic/divrem_2.c, mpn/generic/mod_1.c: Don't use UMUL_TIME and\n\tUDIV_TIME, just default to preinv.\n\t* gmp-impl.h (USE_PREINV_DIVREM_1, USE_PREINV_MOD_1): Ditto.\n\t(DIVEXACT_1_THRESHOLD, MODEXACT_1_ODD_THRESHOLD): Don't use UMUL_TIME\n\tand UDIV_TIME, make default thresholds 0.\n\t(UDIV_NORM_PREINV_TIME, UDIV_UNNORM_PREINV_TIME): Remove macros.\n\t* mpn/x86/*/gmp-mparam.h (UMUL_TIME, UDIV_TIME,\n\tUDIV_NORM_PREINV_TIME): Remove macros.\n\n\t* gmp.texi (Headers and Libraries): New section, being the header\n\tnotes from \"GMP Basics\" and some new stuff.\n\t(Parameter Conventions): Notes on \"const\" parameters.\n\t(Formatted Output Strings): Add type N, tweak some wording.\n\n\t* tests/refmpn.c (refmpn_divmod_1c): Avoid a bug in i386 gcc 3.0.\n\n2002-01-12  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/root.c: Add <stdlib.h>, for abort().\n\n\t* mpfr/tests/Makefile.am (AUTOMAKE_OPTIONS): Add ansi2knr.\n\t* mpfr/mpfr.h, mpfr/mpfr-tests.h, reuse.c, tadd.c, tadd_ui.c, tagm.c,\n\ttatan.c, tcmp2.c, tcos.c, tdiv.c, tdiv_ui.c, teq.c, texp.c,\n\ttget_str.c, thyperbolic.c, tlog.c, tmul.c, tout_str.c, tpow.c,\n\ttrandom.c, tset_z.c, tsin.c, tsqrt.c, tsqrt_ui.c, tsub_ui.c, ttan.c,\n\ttui_div.c: Fixes for K&R.\n\n\t* tests/misc/t-scanf.c (check_misc, check_misc):\n\n\t* tests/mpz/t-inp_str.c, tests/mpq/t-inp_str.c, tests/misc/t-scanf.c:\n\tAvoid strings in ASSERT, not enjoyed by K&R.\n\t* gmp-impl.h (ASSERT): Note this.\n\n\t* tests/tests.h (refmpn_mod_34lsub1): Add __GMP_PROTO.\n\n\t* mpbsd/Makefile.am: Avoid an automake problem with ansi2knr and\n\tsources in a different directory.\n\n\t* printf/repl-vsnprintf.c: Test HAVE_LONG_DOUBLE for long double.\n\n\t* mpn/Makefile.am (nodist_libdummy_la_SOURCES): Add mod_34lsub1.c,\n\tmul_2.c, pre_divrem_1.c.\n\n\t* gmp-h.in, gmp-impl.h (mpn_add_nc, mpn_addmul_1c, mpn_addsub_n,\n\tmpn_addsub_nc, mpn_divrem_1c, mpn_dump, mpn_mod_1c, mpn_mul_1c,\n\tmpn_mul_basecase, mpn_sqr_n, mpn_sqr_basecase, mpn_sub_nc,\n\tmpn_submul_1c): Move to gmp-impl.h, since they're undocumented.\n\n\t* gmp-impl.h (mpn_reciprocal): Remove, unused.\n\n\t* tune/many.pl (cntlz, cnttz): Use new SPEED_ROUTINE_COUNT_ZEROS.\n\n2002-01-11  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/hppa/*.asm, mpn/pa64/*.asm, mpn/pa64w/*.asm: Use L().\n\n2002-01-08  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/asm-defs.m4 (PROLOGUE, EPILOGUE): New scheme, optional function\n\tname to EPILOGUE, check for missing or wrong function name EPILOGUE.\n\t* mpn/alpha/unicos.m4, mpn/alpha/default.m4, mpn/m68k/m68k-defs.m4,\n\tmpn/mips3/mips.m4, mpn/ia64/default.m4, mpn/powerpc32/aix.m4,\n\tmpn/powerpc64/aix.m4, mpn/x86/x86-defs.m4: Consequent updates, add a\n\tfew more asserts.\n\t* mpn/alpha/unicos.m4, mpn/alpha/default.m4, mpn/alpha/cntlz.asm,\n\tmpn/alpha/invert_limb.asm (PROLOGUE_GP): Change to an optional \"gp\"\n\tparameter on plain PROLOGUE.\n\n\t* gmp.texi (Low-level Functions): mpn_get_str doesn't clobber an extra\n\tlimb, and doesn't clobber at all for power of 2 bases.\n\t(Language Bindings): Add python gmpy.\n\n\t* mpz/get_str.c: Determine realloc size arithmetically.\n\n\t* mpbsd/mtox.c: Size memory block returned to actual space needed.\n\t* gmp.texi (BSD Compatible Functions): Describe this.\n\n\t* mpz/get_str.c: Don't copy mpn_get_str input for power of 2 bases.\n\t* mpbsd/mtox.c: Ditto, and as a side effect avoid a memory leak from a\n\tmissing TMP_FREE.\n\n\t* mpz/get_str.c, mpbsd/mout.c: No longer need for +1 limb for\n\tmpn_get_str clobber.\n\n\t* gmp-impl.h (MPN_GET_STR_SIZE): New macro.\n\t* mpn/generic/get_str.c, mpz/get_str.c, mpbsd/mout.c, mpbsd/mtox.c,\n\ttune/speed.h: Use it.\n\t* tests/mpn/t-g_str_size.c: New test.\n\t* tests/mpn/Makefile.am: Add it.\n\n\t* gmp-impl.h (POW2_P): New macro.\n\t* mpn/generic/get_str.c, tests/misc.c: Use it.\n\n\t* printf/doprnt.c: Add \"N\" for mpn, share some code between N, Q and Z.\n\t* tests/misc/t-printf.c: Add tests.\n\t* gmp-impl.h (ASSERT_CODE): New macro.\n\n\t* tests/mpbsd/t-mtox.c: New test.\n\t* tests/mpbsd/Makefile.am: Add it.\n\t(allfuns_LDADD): Don't link against libgmp when testing everything in\n\tlibmp can link.\n\n2002-01-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (MPN_COPY_INCR, MPN_COPY_DECR): Rewrite generic versions.\n\n2002-01-06  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/pre_divrem_1.c: Don't support size==0.\n\t* tests/devel/try.c: Update.\n\n\t* mpn/generic/get_str.c: Add special case for base==10.\n\t* gmp-impl.h (MP_BASES_CHARS_PER_LIMB_10, MP_BASES_BIG_BASE_10,\n\tMP_BASES_BIG_BASE_INVERTED_10, MP_BASES_NORMALIZATION_STEPS_10): New\n\tconstants.\n\t* tests/t-constants.c: Add checks.\n\t* mpn/mp_bases.c [GENERATE_TABLE]: Print defines for gmp-impl.h, print\n\tall standard bits-per-limb by default.\n\n\t* demos/pexpr.c, demos/expr/expr.h, demos/expr/expr-impl.h: Use\n\t__GMP_PROTO.\n\n\t* gmp-h.in (mpn_divexact_by3c): Remove variables from prototype, to\n\tkeep out of application namespace.\n\n2002-01-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h: Move _PROTO declaration to before its first usages.\n\n2002-01-04  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in, mp-h.in, tests/tests.h: Rename _PROTO to __GMP_PROTO, and\n\tdon't use #ifndef just define it ourselves.\n\t* gmp-impl.h: Provide _PROTO as an alias for __GMP_PROTO, to avoid big\n\tedits internally, for the moment.\n\n2002-01-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/speed.c (usage): Insert \"\\n\\\" into a string.\n\n2001-12-30  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa64/udiv_qrnnd.c: Remove file.\n\t* mpn/pa64w/udiv_qrnnd.c: Remove file.\n\n\t* gmp-impl.h (MPN_IORD_U): Change formatting (labels in pos 0, insns\n\tindented by tab).\n\t(MPN_INCR_U): Use \"addl $1,foo; jc\", not \"incl foo; jz\".\n\n\t* gmp-impl.h (udiv_qrnnd_preinv): Use plain subtract, not sub_ddmmss,\n\tin one more case.\n\n2001-12-30  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/get_str.c (udiv_qrnd_unnorm): New macro.\n\tUse \"do while\" for dig_per_u loop since it's non-zero.\n\t* acconfig.h (HAVE_HOST_CPU_m68k etc): Add templates.\n\n\t* mpn/generic/mul_basecase.c, mpz/mul.c, mpz/n_pow_ui.c,\n\tmpn/x86/pentium/mul_2.asm, tests/devel/try.c, tests/tests.h,\n\ttests/refmpn.c, tune/speed.c, tune/speed.h, tune/common.c,\n\ttune/many.pl (mpn_mul_2): New parameter style.\n\t* gmp-impl.h (mpn_mul_2): Add prototype.\n\t* configure.in (gmp_mpn_functions_optional): Add mul_2.\n\n\t* longlong.h (__vxworks__): Remove from powerpc tests, not correct,\n\tnot on its own at least.\n\n\t* tune/speed.c: Add \"aas\" to specify 0xAA..AA data.\n\n\t* tune/tuneup.c (print_define_end): Indicate \"never\" and \"always\".\n\n2001-12-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpq/set_d.c: ANSI-fy.\n\t* mpz/invert.c: Use PTR and SIZ (cosmetic change).\n\n\t* mpz/cong.c: Rename `xor' to `sign' to avoid C++ reserved word.\n\n2001-12-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/sqr_diagonal.asm: New file.\n\n2001-12-28  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/get_str.c: Avoid one mpn_divrem_1 by running main loop\n\tonly until msize==1.\n\n\t* tune/tuneup.c: Break up all() for clarity.\n\t(USE_PREINV_DIVREM_1, USE_PREINV_MOD_1): Compare against plain\n\tdivision udiv_qrnnd, not the tuned and possibly preinv version.\n\n\t* tune/freq.c: Split sysctl and sysctlbyname probes into separate\n\tfunctions, shorten some identifiers, put descriptions inside\n\tfunctions, define functions unconditionally and do nothing if\n\trequisites not available.\n\n\t* mpz/inp_raw.c: Avoid a gcc 3.0 powerpc64 bug on AIX.\n\n\t* acinclude.m4, configure.in (GMP_C_RESTRICT): New macro.\n\n\t* mpfr/sin.c: Patch from Paul to fix sign of sin(3pi/2).\n\n\t* demos/calc/calc.y: Improve some error messages.\n\n2001-12-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/mul_1.asm: Rename r72 -> r80.\n\t* mpn/sparc64/addmul_1.asm: Likewise.\n\n2001-12-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/tdiv_qr.c: Misc formatting cleanups.\n\tFor switch case 2, replace `dn' with its value (2).\n\n2001-12-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/devel/mul_1.c: Add FIXED_XLIMB.\n\t* tests/devel/addmul_1.c: Likewise.\n\t* tests/devel/submul_1.c: Likewise.\n\n\t* tests/devel/add_n.c: Improve error message.\n\tAccept command line argument for # of tests.\n\t* tests/devel/sub_n.c: Likewise.\n\n\t* tests/devel/: Remove CLOCK settings.\n\n\t* mpn/sparc32/v9/mul_1.asm: Rewrite.\n\t* mpn/sparc32/v9/addmul_1.asm: Rewrite.\n\t* mpn/sparc32/v9/submul_1.asm: Rewrite.\n\n2001-12-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/mul_1.asm: Get rid of global constant 0.0 (L(noll)).\n\t* mpn/sparc64/addmul_1.asm: Likewise.\n\n2001-12-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/get_str.c: Move final ASSERT to just before zero fill\n\tloop.\n\n2001-12-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/get_str.c: Move ASSERTs out of loops.  Split digit\n\tgeneration code into two loops, saving a test of msize in the loop.\n\n2001-12-22  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/x86-defs.m4, mpn/x86/*/*.asm: Remove L / LF scheme putting\n\tfunction name in local labels.\n\n\t* mpn/generic/get_str.c: Use mpn_preinv_divrem_1, add a couple of\n\tASSERTs.\n\n\t* mpn/generic/pre_divrem_1.c: New file.\n\t* configure.in (gmp_mpn_functions): Add it.\n\t* gmp-impl.h (mpn_preinv_divrem_1): Add prototype.\n\t(USE_PREINV_DIVREM_1, MPN_DIVREM_OR_PREINV_DIVREM_1): New macros.\n\t* tests/devel/try.c, tune/speed.c, tune/speed.h, tune/common.c,\n\ttune/many.pl, tune/Makefile.am (mpn_preinv_divrem_1): Add testing and\n\tmeasuring.\n\t* tune/tuneup.c: Determine USE_PREINV_DIVREM_1.\n\t* tune/pre_divrem_1.c: New file.\n\t* tests/refmpn.c, tests/tests.h (refmpn_preinv_divrem_1): New function.\n\n\t* tests/mpz/t-io_raw.c: New file.\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpz/inp_raw.c, mpz/out_raw.c: Rewrite.\n\t* acinclude.m4, configure.in (AC_C_BIGENDIAN): New test.\n\t* gmp-impl.h (BSWAP_LIMB): New macro.\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): For a native compile, demand\n\texecutables will run, per AC_PROG_CC.  This detects ABI=64 is unusable\n\tin a native sparc solaris 7 build with the kernel in 32-bit mode.\n\t* gmp.texi (ABI and ISA): Add notes on this, add an example configure\n\tsetting an ABI.\n\n\t* tune/tuneup.c, configure.in: Print the gmp-mparam.h filename.\n\t* tune/tuneup.c: Print the CPU frequency.\n\n\t* tune/time.c, tune/speed.h: Add s390 \"stck\" method, flatten\n\tconditionals in speed_time_init a bit, use have_* variables to let\n\tsome code go dead in speed_starttime and speed_endtime.\n\n\t* tune/freq.c (speed_cpu_frequency_irix_hinv): New function.\n\n\t* Makefile.am, configure.in: Restore mpfr.\n\n\t* configure.in: Add --with-readline, AC_PROG_YACC and AM_PROG_LEX.\n\t* demos/calc/calc.y, demos/calc/calclex.l: Add readline support, add\n\tlucnum function.\n\t* demos/calc/Makefile.am: Add calcread.c, calc-common.h, use $(YACC),\n\t$(LEX) and $(LEXLIB).\n\t* demos/calc/calcread.c, demos/calc/calc-common.h,\n\tdemos/calc/calc-config-h.in, demos/calc/README: New files.\n\n\t* configure.in: Put demos/expr configs in expr-config.h.\n\t* demos/expr/expr-config-h.in: New file.\n\t* demos/expr/expr-impl.h: Renamed from expr-impl-h.in, get configs\n\tfrom expr-config.h.\n\t* demos/expr/Makefile.am: Update.\n\n\t* demos/expr/exprfr.c: Use mpfr_sin and mpfr_cos, remove some spurious\n\treturns.\n\n2001-12-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/mul_1.asm: Trim an instruction.\n\t* mpn/sparc64/addmul_1.asm: Likewise.\n\n\t* mpn/ia64/add_n.asm: Rewrite.\n\t* mpn/ia64/sub_n.asm: Rewrite.\n\n2001-12-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/ia64/mul_1.asm: Rewrite.\n\t* mpn/ia64/addmul_1.asm: Rewrite.\n\t* mpn/ia64/submul_1.c: Use TMP_ALLOC_LIMBS.\n\n\t* tests/devel/mul_1.c: Improve error message.\n\tAccept command line argument for # of tests.\n\t* tests/devel/addmul_1.c: Likewise.\n\t* tests/devel/submul_1.c: Likewise.\n\n2001-12-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/mips3/mul_1.asm: Add NOPs to save a cycle on R1x000.\n\n2001-12-18  Kevin Ryde  <kevin@swox.se>\n\n\t* gmpxx.h (gmp_randclass): Don't allow copy constructors or \"=\",\n\timplementation by Gerardo.\n\n\t* gmp-h.in (operator<<, operator>>): Remove parameter names from\n\tprototypes, to keep out of user namespace.\n\n\t* acinclude.m4 (GMP_FUNC_VSNPRINTF): Let the test program work as C++.\n\n2001-12-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/mul_1.asm: Rewrite.\n\t* mpn/sparc64/addmul_1.asm: Rewrite.\n\t* mpn/sparc64/submul_1.asm: Rewrite.\n\n\t* mpn/sparc64/addmul1h.asm: Remove.\n\t* mpn/sparc64/submul1h.asm: Remove.\n\t* mpn/sparc64/mul1h.asm: Remove.\n\n2001-12-15  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in (mpn_add, mpn_add_1, mpn_cmp, mpn_sub, mpn_sub_1): Follow\n\t__GMP_INLINE_PROTOTYPES for whether to give prototype with inline.\n\n\t* configure.in (i686*-*-*, pentiumpro-*-*, pentium[23]-*-*,\n\tathlon-*-*, pentium4-*-*): Fall back on -march=pentium if\n\t-march=pentiumpro or higher is not good (eg. solaris cmov).\n\n2001-12-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (MPN_ZERO): Rewrite generic version to be similar to\n\tpowerpc version.\n\n2001-12-12  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): Detect cmov problems with gcc\n\t-march=pentiumpro on solaris 2.8.\n\n\t* tune/common.c, tune/speed.h: Allow for commas in count_leading_zeros\n\tand count_trailing_zeros macros.\n\n\t* demos/expr/Makefile.am: Distribute exprfr.c and exprfra.c.\n\n\t* tune/Makefile.am (speed_ext_SOURCES): Should be speed-ext.c.\n\n2001-12-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/s390/addmul_1.asm: New file.\n\t* mpn/s390/submul_1.asm: New file.\n\t* mpn/s390/mul_1.asm: New file.\n\t* mpn/s390/gmp-mparam.h: Update.\n\n2001-12-07  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in, mp-h.in, gmp-impl.h: __GMP_DECLSPEC at start of\n\tprototypes, for the benefit of Microsoft C.\n\n\t* gmp.texi (Introduction to GMP): Mention ABI and ISA section.\n\t(Known Build Problems): Recommend GNU sed on solaris 2.6.\n\t(Assigning Integers): Direct feedback to bug-gmp.\n\t(References): Typo Knuth vol 2 is from 1998.\n\n\t* gmpxx.h (gmp_randclass): Add initializers for gmp_randinit_default\n\tand gmp_randinit_lc_2exp_size.\n\tgmp.texi (C++ Interface Random Numbers): Describe them.\n\n\t* tests/misc/t-locale.c, tests/cxx/t-locale.cc: Ensure mpf_clear is\n\tdone when the localconv override doesn't work.  Reported by Mike\n\tJetzer.\n\n\t* printf/doprnti.c: Don't showbase on a zero mpq denominator.\n\t* tests/misc/t-printf.c, tests/cxx/t-ostream.c: Add test cases.\n\n2001-12-04  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Known Build Problems): Update to gmp_randinit_lc_2exp_size\n\tfor the sparc solaris 2.7 problem.\n\t(Reentrancy): SCO ctype.h affects all text-based input functions.\n\t(Formatted Output Strings): Correction to the mpf example.\n\t(Single Limb Division): Correction, should be q-1 not q+1.\n\t(Extended GCD): Clarify why single-limb is inferior.\n\t(Raw Output Internals): Clarify size is twos complement, note limb\n\torder means _mp_d doesn't get directly read or written.\n\t(Contributors): Clarify mpz_jacobi.\n\tAnd a couple of formatting tweaks elsewhere.\n\n\t* tests/cxx/t-headers.cc: New file.\n\t* tests/cxx/Makefile.am: Add it.\n\n\t* gmpxx.h: Add <strstream>, needed by mpf_class::get_str2.\n\n\t* gmp-h.in (mpq_inp_str, mpn_hamdist): Add __GMP_DECLSPEC.\n\n2001-12-01  Torbjorn Granlund  <tege@swox.com>\n\n\t* Version 4.0 released.\n\n\t* mpfr/README: Replace contents with explanation of why mpfr is gone.\n\n2001-12-01  Kevin Ryde  <kevin@swox.se>\n\n\t* Makefile.am, configure.in: Temporarily remove mpfr, just leave a\n\tREADME.\n\n\t* mpn/Makefile.am (EXTRA_DIST): Add Makeasm.am.\n\n2001-11-30  Gerardo Ballabio  <ballabio@sissa.it>\n\n\t* tests/cxx/t-constr.cc, tests/cxx/t-expr.cc: New files.\n\t* tests/cxx/Makefile.am (check_PROGRAMS): Add them.\n\n2001-11-30  Kevin Ryde  <kevin@swox.se>\n\n\t* mpfr: Update to 2001-11-16.  Patch TMP handling of agm.c and sqrt.c,\n\tuse plain mpn_sqrtrem in sqrt.c, separate .c files for floor and ceil,\n\tdisable an expression style assert in add1.c.\n\n\t* mpn/s370: Rename to s390.\n\t* configure.in (s3[6-9]0*-*-*): Update.\n\t* mpn/Makefile.am (TARG_DIST): Add s390.\n\n\t* mpz/fits_s.c, mpf/fits_s.c, mpf/fits_u.c: Remove files, unused since\n\tchange to .h style.\n\n2001-11-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-h.in: Declare mpz_get_d_2exp and mpf_get_d_2exp.\n\t* Makefile.am: Add mpz/get_d_2exp$U.lo and mpf/get_d_2exp$U.lo.\n\t* mpf/Makefile.am: Add get_d_2exp.c.\n\t* mpz/Makefile.am: Add get_d_2exp.c.\n\n2001-11-29  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/*/gmp-mparam.h: Update measured thresholds.\n\t* mpn/s370/gmp-mparam.h: New file.\n\n\t* mpz/millerrabin.c: Mark for internal use only, for now.\n\t* gmp.texi (Number Theoretic Functions): Remove documentation.\n\n2001-11-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/get_d_2exp.c: New file.\n\t* mpz/get_d_2exp.c: New file.\n\n\t* mpz/realloc2.c: Fix typo.  Make more similar to mpz_realloc.\n\t* mpz/realloc.c: Use __GMP_REALLOCATE_FUNC_LIMBS.\n\n2001-11-27  Gerardo Ballabio  <ballabio@sissa.it>\n\n\t* gmpxx.h, mpfrxx.h: Various updates and improvements.\n\n2001-11-27  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Useful Macros and Constants): Add gmp_version, add @findex\n\tfor mp_bits_per_limb.\n\n\t* demos/perl/GMP.pm, demos/perl/GMP.xs: Use new style gmp_randinit's.\n\t* demos/perl/test.pl: Update for this, and for mpz_perfect_power_p\n\thandling of 0 and 1.\n\n2001-11-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/realloc.c: Clear variable when decreasing allocation to less than\n\tneeded.  Misc updates.\n\n2001-11-25  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/misc/t-locale.c: Avoid printf in the normal case, since the\n\treplacement localeconv breaks it on SunOS 4.\n\n\t* gmp.texi (Build Options, Notes for Package Builds): Note libgmpxx\n\tdepends on libgmp from same GMP version.\n\n\t* acinclude.m4, configure.in (GMP_FUNC_SSCANF_WRITABLE_INPUT): New\n\ttest.\n\t* scanf/sscanf.c, scanf/vsscanf.c: Use it to ensure sscanf input is\n\twritable, if necessary.\n\n\t* tests/misc/t-scanf.c: Ensure sscanf arguments are writable, always.\n\t* configure.in (AC_CHECK_DECLS): Remove sscanf, no longer required.\n\n\t* configure.in (none-*-*): Fix default CFLAGS setups.\n\n\t* doc/configuration: Misc updates.\n\n2001-11-23  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/init2.c, mpz/realloc2.c: New files.\n\t* Makefile.am, mpz/Makefile.am: Add them.\n\t* gmp-h.in: Add prototypes.\n\t* gmp.texi (Efficiency): Mention these instead of _mpz_realloc.\n\t(Initializing Integers): Add documentation, reword other parts.\n\n2001-11-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/cray/ieee/addmul_1.c: Fix logic for more_carries scalar loop.\n\t* mpn/cray/ieee/submul_1.c: Likewise.\n\n2001-11-20  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Known Build Problems): Note an out of memory on DJGPP.\n\t(Function Classes): Update function counts.\n\tMisc tweaks elsewhere.\n\n\t* configure.in (AC_CHECK_DECLS): Add sscanf.\n\t* tests/misc/t-scanf.c: Use it, for the benefit of SunOS 4.\n\n\t* tal-debug.c, gmp-impl.h: More checks of TMP_DECL/TMP_MARK/TMP_FREE\n\tconsistency.\n\n\t* mpfr/Makefile.am (AR): Explicit AR=@AR@ to override automake\n\tdefault, necessary for powerpc64 ABI=aix64.\n\n2001-11-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/powm.c: Move TMP_MARK to before any TMP_ALLOCs.\n\n2001-11-18  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (--enable-fft): Make this the default.\n\t* gmp.texi (Build Options): Update.\n\n\t* Makefile.am (libmp_la_DEPENDENCIES): Revise mpz objects needed by\n\tnew mpz/powm.c.\n\n\t* gmp.texi (Random State Initialization): Add gmp_randinit_default and\n\tgmp_randinit_lc_2exp_size, mark gmp_randinit as obsolete.\n\t(Random State Seeding): New section, taken from \"Random State\n\tInitialization\" and \"Random Number Functions\".\n\n\t* configure.in (AC_CHECK_DECLS): Add fgetc, fscanf, ungetc.\n\t* scanf/fscanffuns.c: Use these, for the benefit of SunOS 4.\n\n\t* gmp-impl.h, gmp-h.in (__gmp_default_fp_limb_precision): Move back to\n\tgmp-impl.h now not required for inlined mpf.\n\n\t* randlc2s.c (gmp_randinit_lc_2exp_size): New file, the size-based LC\n\tselection from rand.c.\n\t* rand.c (gmp_randinit): Use it.\n\t* randdef.c (gmp_randinit_default): New file.\n\t* gmp-impl.h (RANDS): Use it.\n\t(ASSERT_CARRY): New macro.\n\t* gmp-h.in (gmp_randinit_default, gmp_randinit_lc_2exp_size: Add\n\tprototypes.\n\t* Makefile.am (libgmp_la_SOURCES): Add randdef.c and randlc2s.c.\n\n\t* printf/asprntffuns.c: Include config.h before using its defines.\n\n\t* gmp-impl.h: Move C++ <string> to top of file to avoid the memset\n\tredefine upsetting configure tests.  Remove <iostream> since <iosfwd>\n\tin gmp.h suffices.\n\n2001-11-16  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Integer Exponentiation): mpz_powm supports negative\n\texponents.\n\t(Assigning Floats, I/O of Floats, C++ Formatted Output, C++ Formatted\n\tInput): Decimal point follows locale.\n\t(Formatted Output Strings): %n accepts any type.\n\t(Formatted Input Strings): New section.\n\t(Formatted Input Functions): New section.\n\t(C++ Class Interface): Corrections and clarifications suggested by\n\tGerardo.\n\n\t* scanf/doscan.c, scanf/fscanf.c, scanf/fscanffuns.c, scanf/scanf.c,\n\tscanf/sscanf.c, scanf/sscanffuns.c, scanf/vfscanf.c, scanf/vscanf.c,\n\tscanf/vsscanf.c, scanf/Makefile.am, tests/misc/t-scanf.c: New files.\n\t* gmp-h.in, gmp-impl.h, Makefile.am, configure.in: Consequent\n\tadditions.\n\n\t* tests/misc: New directory.\n\t* tests/misc/Makefile.am: New file.\n\t* tests/misc/t-locale.c: New file.\n\t* tests/misc/t-printf.c: Moved from tests/printf.\n\t* tests/printf: Remove directory.\n\t* configure.in, tests/Makefile.am: Update.\n\n\t* tests/cxx/t-locale.cc: New file.\n\t* tests/cxx/Makefile.am: Add it.\n\n\t* mpf/set_str.c, cxx/ismpf.cc: Use localeconv for the decimal point.\n\n\t* acinclude.m4 (GMP_ASM_X86_MCOUNT): Update to $lt_prog_compiler_pic\n\tfor current libtool, recognise non-PIC style mcount in windows DLLs.\n\n\t* gmp-impl.h (__gmp_replacement_vsnprintf): Add prototype.\n\n\t* gmp-impl.h (__gmp_rands, __gmp_rands_initialized,\n\tmodlimb_invert_table): Add __GMP_DECLSPEC for the benefit of test\n\tprograms using them from a windows DLL.\n\t* longlong.h (__clz_tab): Ditto.\n\n\t* mpn/x86/t-zdisp2.pl: New file.\n\n\t* mpn/x86/pentium4/README: New file.\n\n2001-11-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/powm.c (HANDLE_NEGATIVE_EXPONENT): #define to 1.\n\t* tests/mpz/reuse.c (main): Use\tmpz_invert to avoid undefined mpz_powm\n\tcases.\n\n2001-11-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/powm_ui.c: Rewrite along the lines of mpz/powm.c (except still no\n\tredc).\n\t* mpz/powm.c: Adjust for negative b, after exponentiation done.  Add\n\t(still disabled) code for handling negative exponents.  Misc cleanups.\n\n2001-11-14  Kevin Ryde  <kevin@swox.se>\n\n\t* mpf/out_str.c: Use localeconv for the decimal point.\n\n\t* tests/misc.c (tests_rand_end): Use time() if gettimeofday() not\n\tavailable (eg. on mingw).\n\n2001-11-11  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in: Remove parameter names from prototypes, to keep out of\n\tapplication namespace.\n\n2001-11-08  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_GCC_VERSION_GE): Fix sed regexps to work on\n\tSolaris 8.\n\n\t* printf/doprnt.c: Support %n of all types, per glibc.\n\n\t* gmp-h.in, gmp-impl.h, mpf/abs.c, mpf/neg.c, mpf/get_prc.c,\n\tmpf/get_dfl_prec.c, mpf/set_dfl_prec.c, mpf/set_prc_raw.c,\n\tmpf/set_si.c, mpf/set_ui.c, mpf/size.c: Revert mpf inlining, in order\n\tto leave open the possibility of keeping binary compatibility if mpf\n\tbecomes mpfr.\n\n\t* mpn/x86/k7/mmx/lshift.asm, mpn/x86/k7/mmx/rshift.asm: Use Zdisp to\n\tforce code size for computed jumps.\n\t* mpn/x86/k6/mod_34lsub1.asm, mpn/x86/k6/k62mmx/copyd.asm: Use Zdisp\n\tto force good code alignment.\n\t* mpn/x86/x86-defs.m4 (Zdisp): More instructions.\n\n\t* mpn/x86/pentium/sqr_basecase.asm, mpn/x86/k7/mmx/mod_1.asm,\n\tmpn/x86/k7/mmx/popham.asm: Remove some unnecessary \"0\" address offsets.\n\n\t* mpq/set_si.c, mpq/set_ui.c: Set _mp_den._mp_size correctly if\tden==0.\n\n2001-11-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/hppa/hppa1_1/udiv_qrnnd.asm: Work around gas bug.\n\n\t* mpn/asm-defs.m4 (PROLOGUE): Change alignment to 8 (probably a good\n\tidea in general; required for hppa/hppa1_1/udiv_qrnnd.asm).\n\n2001-11-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (MPN_COPY_INCR): Prepend local variable by `__'.\n\t(MPN_COPY_DECR): Likewise.\n\n2001-11-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/powm.c: Call mpn functions, not mpz functions, for computation\n\tmod m.  Streamline allocations to use a mixture of stack allocation and\n\theap allocation.  Add currently disabled phi(m) exponent reduction\n\tcode.  Misc optimizations and cleanups.\n\n2001-11-05  Kevin Ryde  <kevin@swox.se>\n\n\t* mpq/inp_str.c: Remove unused variable \"ret\".\n\n\t* mpn/x86/k7/sqr_basecase.asm: Fix a 0(%edi) to use Zdisp, so the\n\tcomputed jumps hit the right spot on old gas.\n\n\t* mpq/canonicalize.c: DIVIDE_BY_ZERO if denominator is zero.\n\n\t* mpn/lisp/gmpasm-mode.el (comment-start-skip): Correction to the way\n\tthe first \\( \\) pair is setup.\n\t(gmpasm-font-lock-keywords): Don't fontify the space before a \"#\" etc.\n\tMisc tweaks to some comments.\n\n2001-11-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/refmpn.c (refmpn_overlap_p): Reverse return values.\n\n2001-11-02  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/many.pl: Setup CFLAGS_PIC and ASMFLAGS_PIC, since that's no\n\tlonger done by configure.\n\n\t* mpn/x86/pentium4/mmx/popham.asm: New file.\n\n\t* mpn/x86/x86-defs.m4 (psadbw): New macro.\n\t* mpn/x86/k7/mmx/popham.asm: Use it.\n\n\t* tests/refmpn.c (refmpn_overlap_p): New function, independent of\n\tMPN_OVERLAP_P.\n\n2001-10-31  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-powm.c: Print proper error message when finding\n\tdiscrepancy.\n\n2001-10-31  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium/mod_34lsub1.asm: New file.\n\t* mpn/x86/k7/mod_34lsub1.asm: New file.\n\t* mpn/x86/mod_34lsub1.asm: New file.\n\n2001-10-30  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/printf/t-printf.c (check_misc): Add checks from the glibc docs.\n\t(check_vasprintf, check_vsnprintf): Run these unconditionally.\n\n\t* gmp-impl.h (ASSERT_MPQ_CANONICAL): New macro.\n\t* mpq/cmp.c, mpq/cmp_si.c, mpq/cmp_ui.c, mpq/equal.c: Add ASSERTs for\n\tcanonical inputs, where correctness depends on it.\n\n\t* mpn/lisp/gmpasm-mode.el (comment-start-skip): Add \"dnl\".\n\n2001-10-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c: Remove some unused variables.\n\t(main): Allocate more buffer space to accommodate minus sign.\n\n2001-10-27  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h, mpn/asm-defs.m4, configure.in, tune/speed.h,\n\ttune/speed.c, tune/common.c, tune/many.pl, tests/devel/try.c: Add\n\tmpn_mod_34lsub1.\n\t* tests/refmpn.c, tests/tests.h (refmpn_mod_34lsub1): New function.\n\n\t* mpn/generic/mod_34lsub1.c: New file.\n\t* mpn/x86/k6/mod_34lsub1.asm: New file.\n\t* mpn/x86/pentium4/sse2/mod_34lsub1.asm: New file.\n\t* mpn/x86/x86-defs.m4 (Zdisp): Add another instruction.\n\n\t* gmp-h.in, gmpxx.h: Use <iosfwd> not whole <iostream>.\n\n\t* gmp.texi (Known Build Problems): Add note on test programs with\n\tWindows DLLs.\n\n2001-10-26  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpq/t-get_d.c: Limit the size of \"eps\" for vax.\n\n\t* gmp.texi (maybepagebreak): New macro, use it in a few places.\n\t(Notes for Particular Systems): C++ Windows DLLs are not supported.\n\t(Known Build Problems): Note sparc solaris 2.7 gcc 2.95.2 shared\n\tlibrary problems.\n\t(Autoconf): Tweak version numbers shown.\n\t(Integer Roots): mpz_perfect_square_p and mpz_perfect_power_p consider\n\t0 and 1 perfect powers, mpz_perfect_power_p accepts negatives.\n\t(Number Theoretic Functions): Add mpz_millerrabin, combined with a\n\treworded mpz_probab_prime_p.\n\t(Formatted Output Strings): Misc clarifications.\n\t(Formatted Output Functions): gmp_asprintf, gmp_vasprintf,\n\tgmp_snprintf, gmp_vsnprintf always available.\n\t(C++ Formatted Output): Misc rewordings.\n\t(Formatted Input): New chapter.\n\t(C++ Class Interface): New chapter, by Gerardo and me.\n\t(Language Bindings): Update GMP++ now in GMP.\n\t(C++ Interface Internals): New section, by Gerardo and me.\n\n\t* printf/repl-vsnprintf.c: New file.\n\t* configure.in, acinclude.m4, Makefile.am, printf/Makefile.am: Use it\n\tif libc vsnprintf missing or bad.\n\t* configure.in (AC_CHECK_FUNCS): Add strnlen.\n\n\t* printf/snprntffuns.c, printf/vasprintf.c: Use\n\t__gmp_replacement_vsnprintf if libc vsnprintf not available.\n\t* printf/asprintf.c, printf/snprintf.c, printf/vasprintf.c,\n\tprintf/vsnprintf.c: Provide these functions unconditionally.\n\t* acinclude.m4 (GMP_FUNC_VSNPRINTF): Remove warning about omissions\n\twhen vsnprintf not available.\n\n2001-10-24  Kevin Ryde  <kevin@swox.se>\n\n\t* configure, aclocal.m4: Regenerate with a libtool patch for a stray\n\tquote in AC_LIBTOOL_PROG_LD_SHLIBS under mingw and cygwin.\n\n\t* gmp-impl.h (modlimb_invert): More comments.\n\n\t* printf/doprnt.c, printf/doprnti.c: Use the precision field to print\n\tleading zeros.\n\t* tests/printf/t-printf.c: Test this.\n\t* cxx/osdoprnti.cc, gmp-impl.h: Ignore precision in operator<<.\n\n\t* tune/speed.c, tune/speed.h, tune/common.c: Add mpn_mul_1_inplace.\n\n2001-10-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/pprime_p.c (mpz_millerrabin): Remove function and its descendant.\n\n\t* mpz/millerrabin.c: New file with code from pprime.c.\n\t* mpz/Makefile.am: Compile millerrabin.c.\n\t* Makefile.am (MPZ_OBJECTS): Ditto.\n\t* gmp-h.in: Declare mpz_millerrabin.\n\n2001-10-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/mpz/t-perfsqr.c: New file.\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Add it.\n\n\t* demos/factorize.c (factor): Check for number to factor == 0.\n\t(main): When invoked without arguments, read from stdin.\n\n\t* mpz/perfpow.c: Add code to handle negative perfect powers ((-b)^odd).\n\tTreat 0 and 1 as perfect powers.\n\n\t* mpn/sparc32/v9/sqr_diagonal.asm: Jump past .align.\n\n2001-10-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/perfsqr.c\t(sq_res_0x100): Remove bogus final `,'.\n\t(mpn_perfect_square_p): Suppress superfluous `&1' in sq_res_0x100 test.\n\t(mpn_perfect_square_p, O(n) test): Improve comments.  Combine remainder\n\ttests for some small primes.  Don't share code for different limb\n\tsizes.  Use single `if' with many `||' for better code density.\n\n2001-10-22  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/perl/GMP.xs (mutate_mpz, tmp_mpf_grow): Make these \"static\".\n\n\t* mpn/x86/pentium/popcount.asm, mpn/x86/pentium/hamdist.asm\n\t(mpn_popcount_table): Use GSYM_PREFIX.\n\n2001-10-19  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/*.asm: Add some measured speeds on various x86s.\n\n\t* tests/mpz/reuse.c, tests/mpf/reuse.c: Disable tests when using a\n\twindows DLL, because certain global variable usages won't compile.\n\n\t* configure.in (AC_CHECK_FUNCS): Add alarm.\n\t* tests/spinner.c: Conditionalize alarm and SIGALRM availability, for\n\tthe benefit of mingw32.\n\n\t* acinclude.m4 (GMP_ASM_TYPE, GMP_ASM_SIZE): Suppress .type and .size\n\ton COFF.\n\n\t* acinclude.m4 (GMP_PROG_HOST_CC): New macro.\n\t* configure.in: Use it for windows DLL cross-compiles.\n\t* aclocal.m4, configure: Regenerate with libtool patch to hold HOST_CC\n\tin the generated libtool script.\n\n\t* aclocal.m4, configure: Regenerate with libtool patch to suppress\n\twarnings when probing command line limit on FreeBSD.\n\n\t* demos/qcn.c (M_PI): Define if not already provided, helps mingw32.\n\n2001-10-17  Kevin Ryde  <kevin@swox.se>\n\n\t* printf/doprnt.c: Use <stdint.h> for intmax_t.\n\n\t* longlong.h: Recognise __sparcv8 for gcc on Solaris.  Reported by\n\tMark Mentovai <mark@mentovai.com>.\n\n\t* gmp-impl.h (gmp_allocated_string): No need for inline on member funs.\n\n2001-10-16  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Debugging): Add mpatrol.\n\t(Integer Comparisons, Comparing Rationals, Float Comparison): Index\n\tentries for sign tests.\n\t(I/O of Floats): Clarify mpf_out_str exponent is in decimal.\n\t(C++ Formatted Output): mpf_t operator<< exponent now in decimal.\n\t(FFT Multiplication): Use an ascii art sigma.\n\t(Contributors): Add Gerardo Ballabio.\n\n\t* cxx/osfuns.cc (__gmp_doprnt_params_from_ios): Always give mpf_t\n\texponent in decimal, irrespective of ios::hex or ios::oct.\n\t* tests/cxx/t-ostream.cc (check_mpf): Update.\n\n\t* printf/doprnt.c: Support %lln and %hhn.\n\n\t* mpn/x86/pentium4/sse2/submul_1.asm: Use a psubq to negate the\n\tinitial carry (helps the submul_1c case), and improve the comments.\n\n2001-10-11  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4, configure.in (GMP_IMPL_H_IEEE_FLOATS): New macro.\n\n\t* ltmain.sh: Send some rm errors to /dev/null, helps during compiles\n\ton Solaris 2.7 and HP-UX 10.\n\n\t* tal-notreent.c: Renamed from stack-alloc.c.\n\t* Makefile.am, acinclude.m4, gmp-impl.h: Update.\n\n\t* gmp-h.in: Don't give both prototypes and inlines, except on gcc.\n\n\t* gmp-h.in, gmp-impl.h: Use #includes to get necessary standard\n\tclasses, add std:: to prototypes.\n\t* cxx/*.cc, tests/cxx/t-ostream.cc: Add \"use namespace std\".\n\t* acinclude.m4 (GMP_PROG_CXX_WORKS): Ditto.\n\n\t* tests/*/Makefile.in, mpfr/tests/Makefile.in: Regenerate with\n\tautomake patch to avoid Ultrix problem with empty $(TESTS).\n\n\t* */Makefile.in: Regenerate with automake patch to only rm *_.c in\n\t\"make clean\" when ansi2knr actually in use, helps DOS 8.3.\n\n\t* Makefile.in: Regenerate with automake patch to fix stamp-h\n\tnumbering, avoiding an unnecessary config.status run.\n\n2001-10-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/hppa/hppa1_1/udiv_qrnnd.asm: Use L macros for labels.\n\tQuote L reloc operator.\n\n\t* gmp-impl.h: Declare class string.\n\n\t* mpn/asm-defs.m4 (INT32, INT64): Quote $1 to prevent further\n\texpansion.\n\n\t* mpn/alpha/ev6/mul_1.asm: New file.\n\n2001-10-09  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Introduction to GMP): Add pentium 4 to optimized CPUs.\n\t(Build Options): Note macos directory.\n\t(Notes for Package Builds): GMP 4 series binary compatible with 3.\n\t(Known Build Problems): Remove $* and ansi2knr note, now fixed, except\n\tpossibly under --host=none.\n\t(Formatted Output Strings): Remove -1 prec for all digits.\n\n\t* mpz/add.c, mpz/sub.c: Don't use mpz path on #include (helps macos).\n\t* mpbsd/Makefile.am (INCLUDES): Add -I$(top_srcdir)/mpz.\n\n\t* printf/doprnt.c, tests/printf/t-printf.c: Remove support for %.*Fe\n\tprec -1 meaning all digits.\n\n\t* acinclude.m4 (GMP_PROG_AR): Override libtool, use AR_FLAGS=\"cq\".\n\t(GMP_HPC_HPPA_2_0): Print version string to config.log.\n\n\t* Makefile.am (AUTOMAKE_OPTIONS): Remove check-news (permission notice\n\tin NEWS file is too big).\n\t(dist-hook): Don't distribute numbered or unnumbered emacs backups.\n\n\t* Makefile.am, cxx/Makefile.am: Updates for Gerardo's stuff.\n\n2001-10-09  Gerardo Ballabio  <ballabio@sissa.it>\n\n\t* cxx/isfuns.cc: New file.\n\t* gmp-impl.h: Add prototypes.\n\t* cxx/ismpf.cc, cxx/ismpq.cc, cxx/ismpz.cc: New files.\n\t* gmp-h.in: Add prototypes.\n\t* gmpxx.h, mpfrxx.h: New files.\n\n2001-10-08  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (with_tags): Establish a default based on --enable-cxx.\n\n\t* aclocal.m4: Regenerate with libtool patches for sed char range to\n\thelp Cray, LTCC quotes and +Z warnings grep to help HP-UX.\n\n\t* gmp-impl.h (doprnt_format_t, doprnt_memory_t, doprnt_reps_t,\n\tdoprnt_final_t): Use _PROTO.\n\n2001-10-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/asm-defs.m4 (INT32, INT64): Use LABEL_SUFFIX.\n\n\t* mpn/hppa: Convert files to `.asm'.\n\n2001-10-05  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/Makeasm.am (.S files): Revert to separate CPP and CCAS, use\n\tcpp-ccas, and only pass CPPFLAGS to CPP, not whole CFLAGS.\n\t* mpn/cpp-ccas: New file.\n\t* mpn/Makefile.am (EXTRA_DIST): Add it.\n\n\t* tune/common.c, tune/speed.h: Change SPEED_ROUTINE_MPN_COPY_CALL uses\n\tto SPEED_ROUTINE_MPN_COPY or new SPEED_ROUTINE_MPN_COPY_BYTES.  Avoids\n\tmacro expansion problems on Cray.\n\n\t* configure.in (AC_PROG_CXXCPP): Add this, to make libtool happier.\n\n2001-10-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/rrandomb.c (gmp_rrandomb): Change bit_pos to be 0-based (was\n\t1-based); shift 2 (was 1) when making bit mask.  These two changes\n\tavoid undefined shift counts.\n\t(gmp_rrandomb): Avoid most calls to _gmp_rand by caching random values.\n\n\t* mpn/generic/random2.c: Changes for mirroring mpz/rrandomb.c.\n\n2001-10-04  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Build Options): Add --enable-cxx.\n\t(Notes for Particular Systems): Mention pentium4 performance and SSE2.\n\t(Known Build Problems): Remove vax jsobgtr note, no longer needed.\n\t(Converting Floats): Tweak mpf_get_str description.\n\t(Low-level Functions): Correction to mpn_gcdext destination space\n\trequirements.\n\t(C++ Formatted Output): New section.\n\t(Language Bindings): Add ALP\n\t(Contributors): Add Paul Zimmermann's square root, update my things.\n\n\t* acinclude.m4 (GMP_PROG_CC_IS_GNU, GMP_PROG_CXX_WORKS): Send compiler\n\terrors to config.log.\n\n\t* mpq/Makefile.am (INCLUDES): Remove -DOPERATION_$*, not needed.\n\n\t* mpn/x86/*.asm: Change references to old README.family to just README.\n\n\t* mpz/README: Remove file, now adequately covered in the manual.\n\n2001-10-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/pentium4/copyi.asm: New file.\n\t* mpn/x86/pentium4/copyd.asm: New file.\n\n\t* gmp-impl.h: Implement separate MPN_COPY_INCR and MPN_COPY_DECR\n\tmacros for CRAY systems.\n\t(CRAY _MPN_COPY): Delete.\n\n2001-10-02  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpz/t-popcount.c (check_data): Use \"~ (unsigned long) 0\" to\n\tavoid compiler warnings on sco.\n\n\t* mpbsd/Makefile.am: Compile mpz files directly, no copying.\n\tUse mpz/add.c and mpz/sub.c rather than mpz/aors.c.\n\t(INCLUDES): Remove -DOPERATION_$*, no longer needed (by mpz).\n\n\t* mpz/aors.h: Renamed from mpz/aors.c.\n\t* mpz/add.c, mpz/sub.c: New files, using mpz/aors.h.\n\t* mpz/aors_ui.h: Renamed from mpz/aors_ui.c.\n\t* mpz/add_ui.c, mpz/sub_ui.c: New files, using mpz/aors_ui.h.\n\t* mpz/fits_s.h: Renamed and adapted from mpz/fits_s.c.\n\t* mpz/fits_sshort.c, mpz/fits_sint.c, mpz/fits_slong.c: New files.\n\t* mpz/mul_i.h: Renamed from mpz/mul_siui.c.\n\t* mpz/mul_ui.c, mpz/mul_ui.c: New files, using mpz/mul_i.h.\n\t* mpz/Makefile.am: Consequent updates.\n\t(INCLUDES): Remove -DOPERATION_$*.\n\n\t* mpf/fits_s.h: Renamed and adapted from mpf/fits_s.c.\n\t* mpf/fits_sshort.c, mpf/fits_sint.c, mpf/fits_slong.c: New files.\n\t* mpf/fits_u.h: Renamed and adapted from mpf/fits_u.c.\n\t* mpf/fits_ushort.c, mpf/fits_uint.c, mpf/fits_ulong.c: New files.\n\t* mpf/Makefile.am: Consequent updates.\n\t(INCLUDES): Remove -DOPERATION_$*.\n\n\t* cxx/osfuns.cc (__gmp_doprnt_params_from_ios): Don't use ios::hex etc\n\tas cases in a switch, they're not constant in g++ 3.0.\n\n\t* mpn/Makeasm.am (.s.o, .s.obj, .S.o, .S.obj, .asm.o, .asm.obj):\n\tLocate source file with test -f the same as automake.\n\t(.S): Let CCAS do the preprocessing, and run libtool for .S.lo.\n\t(.asm.lo): Run libtool via m4-ccas to get new style foo.lo right.\n\t(COMPILE_FLAGS): Add $(DEFAULT_INCLUDES), per new automake.\n\t* mpn/m4-ccas: New file.\n\t* mpn/Makefile.am (EXTRA_DIST): Add it.\n\t* mpn/asm-defs.m4: Add m4_not_for_expansion(`DLL_EXPORT').\n\t* mpn/x86/x86-defs.m4: Undefine PIC if DLL_EXPORT is set.\n\t* configure.in (CFLAGS_PIC, ASMFLAGS_PIC): Remove, no longer needed.\n\n\t* acinclude.m4 (GMP_FUNC_VSNPRINTF): Warn what's omitted when\n\tvsnprintf not available.\n\n\t* mpn/underscore.h: Remove file, not used since m68k converted to asm.\n\t* mpn/Makefile.am (EXTRA_DIST): Remove it.\n\n\t* tests/refmpz.c: Add <stdlib.h>, for free().\n\n2001-10-01  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/pentium4/sse2/submul_1.asm: Apply some algebraic\n\tsimplifications.\n\t* mpn/x86/pentium4/sse2/addmul_1.asm: Comment.\n\n2001-10-01  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (--enable-cxx): New option for C++ support.\n\tAdd cxx and tests/cxx subdirectories.\n\t* ltmain.sh, aclocal.m4: Update to libtool 2001-09-30.\n\n\t* cxx/Makefile.am, cxx/Makefile.in, cxx/osdoprnti.cc, cxx/osfuns.cc,\n\tcxx/osmpf.cc, cxx/osmpq.cc, cxx/osmpz.cc: New files.\n\t* Makefile.am: Add them, in new libgmpxx.\n\t* gmp-h.in, gmp-impl.h: Prototypes and support.\n\t* tests/cxx/Makefile.am, tests/cxx/Makefile.in,\n\ttests/cxx/t-ostream.cc: New files.\n\n\t* tune/speed.h (SPEED_ROUTINE_MPN_GCD_CALL,\n\tSPEED_ROUTINE_MPN_GCDEXT_ONE): mpn_gcdext needs size+1 for\n\tdestinations.  Found by Torbjorn.\n\n\t* gmp-h.in (__GNU_MP__, __GNU_MP_VERSION): Bump to 4.0.\n\t* mp-h.in (__GNU_MP__): Ditto.\n\t* gmp.texi, Makefile.am, compat.c: Amend version 3.2 to 4.0.\n\n\t* acinclude.m4 (GMP_PROG_CXX_WORKS): New macro.\n\t(GMP_PROG_CC_WORKS): Write \"conftest\" test program, not a.out.\n\n\t* gmp-impl.h (struct gmp_asprintf_t): Moved from printf/vasprintf.c.\n\t(GMP_ASPRINTF_T_INIT): New macro.\n\t(GMP_ASPRINTF_T_NEED): New macro, adapted from vasprintf.c NEED().\n\t* printf/vasprintf.c: Use these.\n\n\t* printf/asprntffuns.c: New file.\n\t* printf/Makefile.am, Makefile.am: Add it.\n\t* printf/asprntffuns.c, printf/vasprintf.c, gmp-impl.h\n\t(__gmp_asprintf_memory, __gmp_asprintf_reps, __gmp_asprintf_final):\n\tMove to asprntffuns.c, rename to __gmp and make global, remove\n\tspurious formal parameters from __gmp_asprintf_final.\n\n\t* configure.in (j90-*-*, sv1-*-*): Don't duplicate $path in $add_path.\n\t(*-*-mingw*): Don't assemble with -DPIC (as per cygwin).\n\n\t* printf/snprntffuns.c (gmp_snprintf_final): Remove spurious formal\n\tparameters.\n\n\t* tune/tuneup.c (POWM_THRESHOLD): Reduce stop_factor to 1.1 to help\n\tCray vector systems.\n\n\t* tests/misc.c (tests_rand_start): Print GMP_CHECK_RANDOMIZE=NN to\n\tfacilitate cut and paste when re-running.\n\t* tests/mpz/t-inp_str.c (check_data): Add more diagnostic prints.\n\n2001-09-30  Kent Boortz  <kent@swox.com>\n\n\t* macos/configure, macos/Makefile.in, macos/README: Updates for gmp 4.\n\t* gmp-h.in (_GMP_H_HAVE_FILE): Recognise Apple MPW.\n\n2001-09-30  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/cray/ieee/submul_1.c: Rewrite.  Streamline multiplications;\n\tuse `majority' logic.\n\n2001-09-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-h.in (__GMPN_AORS_1): Rewrite to work around Cray compiler bug.\n\n2001-09-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/pentium4/sse2/gmp-mparam.h: New file.\n\n2001-09-26  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium4/sse2/dive_1.asm: New file.\n\t* mpn/x86/pentium4/sse2/submul_1.asm: New file.\n\t* mpn/x86/pentium4/sse2/sqr_basecase.asm: New file.\n\n\t* mpn/x86/pentium/copyi.asm: New file, based on past work by Torbjorn.\n\t* mpn/x86/pentium/copyi.asm: New file, ditto.\n\t* mpn/x86/pentium/com_n.asm: Rewrite, ditto.\n\n\t* printf/snprntffuns.c (gmp_snprintf_format): Copy va_list in case\n\tvsnprintf trashes it.\n\t* printf/vasprintf.c (gmp_asprintf_format): Ditto.\n\t* gmp-impl.h, doprnt.c (va_copy): Move to gmp-impl.h.\n\n\t* tests/mpz/t-cmp_d.c (check_low_z_one): Patch by Torbjorn for vax\n\tlimited float range.\n\n2001-09-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/vax/lshift.s: Change `jsob*' to `sob*'.\n\t* mpn/vax/rshift.s: Likewise.\n\n2001-09-23  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium4/sse2/mul_basecase.asm: Some simple but real code.\n\n\t* printf/doprnt.c: Use va_copy for va_list variables, copy function\n\tparameter in case it's call-by-reference.\n\n\t* tune/freq.c (speed_cpu_frequency_bsd_dmesg): New function.\n\t(speed_cpu_frequency_table): Use it.\n\n\t* tune/many.pl (popcount, hamdist): Fix declared return value.\n\t(sb_divrem_mn): Remove a spurious duplicate entry.\n\t(CLEAN): Add tmp-$objbase.c when using that for .h files.\n\t(macro_speed): Give a default for .h files.\n\tAdd ATTRIBUTE_CONST or __GMP_ATTRIBUTE_PURE as appropriate.\n\n\t* tune/speed.h (SPEED_ROUTINE_MPN_MOD_CALL,\n\tSPEED_ROUTINE_MPN_PREINV_MOD_1, SPEED_ROUTINE_MPN_POPCOUNT,\n\tSPEED_ROUTINE_MPN_HAMDIST, SPEED_ROUTINE_MPN_GCD_1N,\n\tSPEED_ROUTINE_MPN_GCD_1_CALL, SPEED_ROUTINE_MPZ_JACOBI): Use return\n\tvalues so gcc 3 won't discard calls to pure or const functions.\n\t(mpn_mod_1_div, mpn_mod_1_inv): Add __GMP_ATTRIBUTE_PURE.\n\n2001-09-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/pentium4/sse2/mul_basecase.asm: New file, placeholder\n\tfor real code, hiding the default x86 mul_basecase.asm.\n\n2001-09-22  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (AC_PREREQ): Bump to 2.52.\n\t(m4_pattern_forbid, m4_pattern_allow): New calls, forbid GMP_.\n\t(AC_CHECK_HEADERS): Remove sys/types.h, already done by autoconf.\n\t* acinclude.m4, configure.in (GMP_GCC_NO_CPP_PRECOMP): New macro.\n\n\t* tests/devel/try.c (TYPE_PREINV_MOD_1): Don't run size==0.\n\t(malloc_region): Need fd=-1 for mmap MAP_ANON on BSD.\n\n2001-09-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/cong.c (mpz_congruent_p): Fix one-limb c<d test.\n\n\t* longlong.h: Rewrite __i370__ smul_ppmm; enable also for __s390__.\n\n\t* configure.in: Add support for IBM 360, 370, 390 families.\n\n2001-09-20  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium4/sse2/diveby3.asm: New file.\n\t* mpn/x86/pentium4/sse2/mode1o.asm: New file.\n\n2001-09-16  Kevin Ryde  <kevin@swox.se>\n\n\t* printf/doprnt.c: '#' means showpoint and showtrailing for %e, %f, %g.\n\t* tests/printf/t-printf.c (check_f): More test cases.\n\n2001-09-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-h.in (__GMPN_AORS_1): Remove param TEST, add OP and CB.\n\tPostpone zeroing of (cout).\n\t(__GMPN_ADD_1, __GMPN_SUB_1): Corresponding changes.\n\n2001-09-14  Kevin Ryde  <kevin@swox.se>\n\n\t* ChangeLog: Merge in tests/rand/ChangeLog.\n\t* tests/rand/ChangeLog: Remove file.\n\n\t* printf/doprnt.c: Fix handling of a plain format after a GMP one; no\n\tneed to protect against negative precision internally.\n\t* tests/printf/t-printf.c (check_misc): More checks.\n\n2001-09-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/cray/ieee/invert_limb.c: Add a PROLOGUE in a comment to have\n\tHAVE_NATIVE_... defined.\n\n2001-09-11  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in, gmp-h.in (__GMP_HAVE_HOST_CPU_FAMILY_power,\n\t__GMP_HAVE_HOST_CPU_FAMILY_powerpc): New AC_SUBSTs.\n\t* gmp-h.in (__GMPN_COPY_INCR): Use them to select the power/powerpc\n\tcode, rather than preprocessor defines.\n\n\t* acinclude.m4, configure.in (GMP_H_ANSI): New macro.\n\n\t* gmp-h.in (__GMP_EXTERN_INLINE): Add a definition for SCO 8 cc.\n\n\t* gmp-h.in, version.c (gmp_version): Make the pointer \"const\" as well\n\tas the string.\n\n\t* acinclude.m4, configure.in (GMP_PROG_CC_IS_XLC): Recognise xlc when\n\tinvoked under another name (cc, xlc128, etc).\n\t* acinclude.m4 (GMP_PROG_CC_IS_GCC): Print a message when recognised.\n\n2001-09-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-h.in: Let __DECC mean __GMP_HAVE_CONST, etc.\n\t* mp-h.in: Likewise.\n\n2001-09-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/pentium4/mmx/lshift.asm: New file.\n\t* mpn/x86/pentium4/mmx/rshift.asm: New file.\n\n\t* tests/mpn/t-iord_u.c (check_incr_data): Work around HP compiler bug.\n\t(check_decr_data): Likewise.\n\n2001-09-08  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Integer Logic and Bit Fiddling): Update mpz_hamdist\n\tbehaviour, clarify mpz_popcount a touch.\n\t(Language Bindings): Add mlton, fix alphabetical order.\n\t(Single Limb Division): Describe 2 or 1/2 limbs at a time style.\n\n\t* configure.in (AC_CHECK_FUNCS): Add mmap.\n\t* tests/devel/try.c (malloc_region): Use mmap if available.\n\n\t* tests/refmpz.c, tests/tests.h (refmpz_hamdist): New function.\n\t* tests/mpz/t-hamdist.c: New file.\n\t* tests/mpz/Makefile.am: Add it.\n\n\t* mpz/hamdist.c: Support neg/neg operands.\n\n\t* macos/Makefile.in: Remove dual compile of mpq/aors.c and\n\tmpn/generic/popham.c.\n\n\t* gmp-impl.h (popc_limb): New macro, adapted from mpn/generic/popham.c.\n\tFor 64-bits reuse 0x33...33 constant.\n\t* mpn/generic/popcount.c, mpn/generic/hamdist.c: Split from popham.c,\n\tuse popc_limb macro, remove unused \"i\", don't bother with \"register\"\n\tqualifiers.\n\t* mpn/generic/popham.c: Remove file.\n\n\t* ltmain.sh, configure, aclocal.m4: Update to libtool 1.4.1, with one\n\tltdll.c generation patch.\n\t* doc/configuration: Misc updates, note libtool patch used.\n\n\t* mpn/x86/pentium4/sse2/mul_1.asm: Use pointer increments not indexed\n\taddressing, to get 4.0 c/l flat.\n\n\t* tests/mpq/t-cmp_si.c (check_data): Use ULONG_MAX for denominators.\n\n\t* tests/misc.c (mpz_negrandom): Use given rstate, not RANDS.\n\n2001-09-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/pentium4/sse2/addmul_1.asm: New file.\n\n2001-09-04  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/freq.c: Define a HAVE for each speed_cpu_frequency routine to\n\tavoid duplicating conditionals.\n\t(speed_cpu_frequency_sco_etchw): New function.\n\t(speed_cpu_frequency_table): Use it.\n\t* tune/README: Mention SCO openunix 8 /etc/hw.\n\n\t* mpz/fib_ui.c: Use ?: to avoid a gcc 3 bug on powerpc64.\n\tStore back a carry for limb<long.\n\n\t* mpn/x86/k7/mmx/divrem_1.asm, mpn/x86/k7/mmx/mod_1.asm,\n\tmpn/x86/p6/mmx/divrem_1.asm: Fix a couple of comments.\n\n\t* config.guess: Give m68020 for 68020 or better, not m68k.\n\t* configfsf.guess: Update to 2001-09-04.\n\n2001-09-02  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (m68k-*-*): Let m68k mean 68000, not 68020.\n\t* gmp.texi (Notes for Particular Systems): Update.\n\n\t* gmp-impl.h (union ieee_double_extract) [m68k]: Use longs, since int\n\tmight be only 16 bits.\n\n\t* tests/mpq/t-aors.c: New file.\n\t* tests/mpq/Makefile.am: Add it.\n\n\t* tests/refmpq.c: New file.\n\t* tests/Makefile.am: Add it.\n\t* tests/tests.h: Add prototypes.\n\n\t* mpq/aors.c: Share object code for mpq_add and mpq_sub.\n\t* Makefile.am, mpq/Makefile.am: Single mpq/aors.lo now.\n\n\t* tests/devel/try.c (TYPE_SUBMUL_1): Use correct reference routine.\n\n2001-08-30  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/x86-defs.m4 (cmov_available_p): Add pentium4.\n\n\t* gmp-h.in: Put #define renamings with prototypes.\n\tRemove commented out #defines of gmp-impl.h things.\n\t(mpn_invert_limb): Remove #define, already in gmp-impl.h.\n\t(mpn_lshiftc, mpn_rshiftc): Remove #defines, unused.\n\t(mpn_addsub_nc): Add prototype to #define.\n\n2001-08-28  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi: Switch to GFDL.\n\t(Top): Arrange copyright and conditions to appear here too.  For\n\tclarity have all this before the miscellaneous macro definitions.\n\t(Copying): Refer to COPYING.LIB file, mention plain GPL2 in demo\n\tprograms.\n\t(Contributors, References): Use @appendix rather than @unnumbered.\n\t(GNU Free Documentation License): New appendix.\n\t(@contents): Move to start of document, use only for tex (not html).\n\t(Debugging): Add leakbug.\n\t(Build Options): Add pentium4.\n\t(I/O of Rationals): Add mpq_inp_str.\n\n\t* fdl.texi: New file, with two @appendix directive tweaks.\n\t* Makefile.am (gmp_TEXINFOS): Add it.\n\n\t* tests/mpz/io.c: Check mpz_inp_str return against ftell, send error\n\tmessages just to stdout.\n\n\t* mpz/inp_str.c, gmp-impl.h (__gmpz_inp_str_nowhite): New function,\n\tand share a __gmp_free_func call.\n\t* mpq/inp_str.c: New file.\n\t* Makefile.am, mpq/Makefile.am: Add it.\n\t* tests/mpq/t-inp_str.c: New file.\n\t* tests/mpq/Makefile.am (check_PROGRAMS): Add it.\n\n\t* configure.in, acconfig.h (HAVE_HOST_CPU_FAMILY_power,\n\tHAVE_HOST_CPU_FAMILY_powerpc, HAVE_HOST_CPU_FAMILY_x86): AC_DEFINEs\n\tfor processor families.\n\t* gmp-impl.h: Use them, rather than cpp defines.\n\n\t* demos/Makefile.am (primes_LDADD): Use $(LIBM), for log().\n\n\t* tune/many.pl, tune/Makefile.am: Fix some from clean and distclean.\n\n2001-08-26  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/devel/try.c (ARRAY_ITERATION): Make types match on \"?:\" legs.\n\t(TYPE_MPZ_JACOBI, TYPE_MPZ_KRONECKER): Remove some superseded code.\n\n\t* tests/printf/t-printf.c (check_plain): Don't compare \"all digits\"\n\tprecision against plain printf.\n\n\t* tune/Makefile.am: Eliminate empty TUNE_MPZ_SRCS.\n\n\t* configure, config.in, INSTALL.autoconf: Update to autoconf 2.52.\n\t* */Makefile.in, mdate-sh, missing, aclocal.m4, configure: Update to\n\tautomake 1.5.\n\t* configfsf.guess, configfsf.sub: Update to 2001-08-23.\n\n2001-08-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/primes.c: Complete rewrite.\n\n2001-08-24  Kevin Ryde  <kevin@swox.se>\n\n\t* longlong.h: Test __ppc__ for apple darwin cc, reported by Jon\n\tBecker.  Also test __POWERPC__, PPC and __vxworks__.\n\n\t* tune/speed.h (speed_cyclecounter) [x86]: Don't clobber ebx in PIC.\n\n2001-08-22  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (x86 mmx): Correction to mmx path stripping.\n\n2001-08-17  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in, acinclude.m4, Makefile.am, printf/Makefile.am,\n\ttests/printf/Makefile.am, gmp-h.in, gmp-impl.h, gmp.texi: Remove C++\n\tsupport, for the time being.\n\t* printf/doprntfx.cc, doprntix.cc, osfuns.cc, osmpf.cc, osmpq.cc,\n\tosmpz.cc, tests/printf/t-ostream.cc: Remove files.\n\n\t* printf/doprnt.c, printf/doprntf.c, gmp-impl.h: Use a single\n\t__gmp_doprnt_mpf, rather than a separate ndigits calculation.\n\t* printf/doprnt.c, printf/doprntf.c, gmp-impl.h, gmp.texi,\n\ttests/printf/t-printf.c: Let empty or -1 prec mean all digits for mpf.\n\t* printf/doprnt.c, tests/printf/t-printf.c: Accept h or l in %n; let\n\tnegative \"*\" style width mean left justify.\n\n\t* gmp-impl.h, mpf/get_str.c (MPF_SIGNIFICANT_DIGITS): New macro,\n\textracted from mpf/get_str.c.\n\n\t* libmp.sym: New file.\n\t* Makefile.am (libmp_la_LDFLAGS): Use it.\n\t(DISTCLEANFILES): Remove asm-syntax.h, no longer generated.\n\tRemove some comments about \"make check\".\n\n\t* demos/perl/GMP.pm, GMP.xs, GMP/Mpf.pm: Add printf and sprintf,\n\tchange get_str to string/exponent for floats, remove separate\n\tmpf_get_str.\n\t* demos/perl/GMP/Mpf.pm (overload_string): Use $# (default \"%.g\").\n\t* demos/perl/typemap: Fix some duplicate string entries.\n\t* demos/perl/test.pl: Update tests, split overloaded constants into ...\n\t* demos/perl/test2.pl: ... this new file.\n\t* demos/perl/Makefile.PL (clean): Add test.tmp.\n\n2001-08-16  Kevin Ryde  <kevin@swox.se>\n\n\t* printf/snprntffuns.c (gmp_snprintf_format): Correction to bufsize-1\n\treturn value handling.\n\n\t* demos/calc/calc.y: Reposition \"%{\" so copyright notice gets into\n\tgenerated files.\n\n\t* INSTALL: Use gmp_printf.\n\n2001-08-14  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/inp_str.c: Fix return value (was 1 too big).\n\t* tests/mpz/t-inp_str.c: New file.\n\t* tests/mpz/Makefile.am: Add it.\n\n\t* mpn/x86/pentium4/sse2/add_n.asm: New file.\n\t* mpn/x86/pentium4/sse2/sub_n.asm: New file.\n\t* mpn/x86/pentium4/sse2/mul_1.asm: New file.\n\n2001-08-12  Kevin Ryde  <kevin@swox.se>\n\n\t* printf/sprintffuns.c, printf/doprntf.c: Don't use sprintf return\n\tvalue (it's a pointer on SunOS 4).\n\n\t* acinclude.m4 (GMP_ASM_X86_SSE2, GMP_STRIP_PATH): New macros.\n\t* configure.in: Add pentium4 support.\n\t* mpn/x86/pentium4, mpn/x86/pentium4/mmx, mpn/x86/pentium4/sse2: New\n\tdirectories.\n\t* mpn/x86/README: Update.\n\n2001-08-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (setup_error_handler): Catch also SIGABRT.\n\n2001-07-31  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/refmpn.c (refmpn_mul_1c): Allow low to high overlaps.\n\n\t* gmp-h.in, gmp-impl.h (_gmp_rand): Move prototype to gmp-impl.h.\n\n\t* tune/Makefile.am (EXTRA_DIST): Add many.pl.\n\n2001-07-28  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Random Number Functions): Old rand functions no longer use\n\tthe C library.\n\n\t* configure.in, acinclude.m4 (GMP_FUNC_VSNPRINTF): New macro.\n\n\t* mpn/generic/get_str.c: Add an ASSERT for high limb non-zero.\n\n2001-07-24  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Build Options): Add --enable-cxx.\n\t(Converting Floats): Note mpf_get_str only generates accurately\n\trepresentable digits.\n\t(Low-level Functions): Note mpn_get_str requires non-zero high limb.\n\t(Formatted Output): New chapter.\n\t(Multiplication Algorithms): Use @quotation with @multitable.\n\t(Toom-Cook 3-Way Multiplication): Ditto.\n\n\t* tests/memory.c (tests_free_nosize): New function.\n\t* tests/tests.h (tests_allocate etc): Add prototypes.\n\n\t* tests/printf: New directory.\n\t* tests/printf/Makefile.am, t-printf.c, t-ostream.cc: New files.\n\t* configure.in, tests/Makefile.am: Add them.\n\n\t* configure.in, acinclude.m4 (GMP_PROG_CXX): New macro.\n\t* configure.in (--enable-cxx): New option.\n\t(AC_CHECK_HEADERS): Add locale.h and sys/types.h, remove unistd.h.\n\t(AC_CHECK_TYPES): Add intmax_t, long double, long long, ptrdiff_t,\n\tquad_t.\n\t(AC_CHECK_FUNCS): Add localeconv, memset, obstack_vprintf, snprintf,\n\tstrchr, vsnprintf.\n\t(AC_CHECK_DECLS): Add vfprintf.\n\n\t* gmp-h.in, gmp-impl.h: Additions for gmp_printf etc.\n\n\t* printf: New directory.\n\t* printf/Makefile.am, asprintf.c, doprnt.c, doprntf.c, doprntfx.cc,\n\tdoprnti.c, doprntix.cc, fprintf.c, obprintf.c, obprntffuns.c,\n\tobvprintf.c, osfuns.cc, osmpf.cc, osmpq.cc, osmpz.cc, printf.c,\n\tprintffuns.c, snprintf.c, snprntffuns.c, sprintf.c, sprintffuns.c,\n\tvasprintf.c, vfprintf.c, vprintf.c, vsnprintf.c, vsprintf.c: New\n\tfiles.\n\t* configure.in, Makefile.am: Add them.\n\n\t* configure.in (HAVE_INLINE): Remove AC_DEFINE, unused.\n\t(AC_CHECK_TYPES): Don't test for void, assume it always exists.\n\n\t* gmp-impl.h (__GMP_REALLOCATE_FUNC_MAYBE): New macro.\n\t* mpz/get_str.c, mpq/get_str.c, mpf/get_str.c: Use it.\n\n\t* gmp-impl.h (mpn_fib2_ui): Use __MPN.\n\t(MPN_COPY_DECR): Fix an ASSERT.\n\t(CAST_TO_VOID): Remove macro.\n\n\t* gmp-h.in (mpq_out_str): Give #define even without prototype.\n\t(mpz_cmp_d, mpz_cmpabs_d): Corrections to #defines.\n\n\t* tests/devel/try.c: Add mpn_add and mpn_sub, don't use CAST_TO_VOID.\n\n2001-07-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.guess: Recognize pentium4.\n\t* config.sub: Recognize pentium4.\n\n2001-07-17  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in (__GMPN_AORS_1): Remove x86 and gcc versions, leave just\n\tone version.\n\t(__GMPN_ADD, __GMPN_SUB): New macros, rewrite of mpn_add and mpn_sub.\n\t(mpn_add, mpn_sub): Use them.\n\t(__GMPN_COPY_REST): New macro.\n\n\t* gmp-h.in, gmp-impl.h, acinclude.m4: Remove __GMP_ASM_L and\n\t__GMP_LSYM_PREFIX, revert to ASM_L in gmp-impl.h and AC_DEFINE of\n\tLSYM_PREFIX.\n\n2001-07-11  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in (__GMPN_ADD_1 etc) [x86]: Don't use this on egcs 2.91.\n\n\t* mpz/fits_uint.c, fits_ulong.c, mpz/fits_ushort.c: Split up fits_u.c.\n\t* mpz/fits_u.c: Remove file.\n\t* mpz/Makefile.am, macos/Makefile.in: Update.\n\n\t* tests/refmpn.c,tests.h (refmpn_copy): New function.\n\t* tests/devel/try.c (TYPE_ZERO): No return value from call.\n\t(TYPE_MODEXACT_1_ODD, TYPE_MODEXACT_1C_ODD): Share call with\n\tTYPE_MOD_1 and TYPE_MOD_1C.\n\t(MPN_COPY, __GMPN_COPY, __GMPN_COPY_INCR): Add testing.\n\n2001-07-10  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in (__GMPN_COPY): Add form to help gcc on power and powerpc.\n\t* gmp-impl.h (MPN_COPY_INCR, MPN_COPY_DECR, MPN_ZERO): Ditto.\n\t* mpn/powerpc64/copyi.asm, mpn/powerpc64/copyd.asm: Remove files.\n\n\t* mpz/tdiv_ui.c: Eliminate some local variables (seems to save code on\n\ti386 gcc 2.95.x), remove a bogus comment about quotient.\n\n\t* errno.c, gmp-impl.h (__gmp_exception, __gmp_divide_by_zero,\n\t__gmp_sqrt_of_negative): New functions.\n\t* gmp-impl.h (GMP_ERROR, DIVIDE_BY_ZERO, SQRT_OF_NEGATIVE): Use them.\n\n\t* randclr.c, randraw.c: Use ASSERT(0) for unrecognised algorithms.\n\n2001-07-07  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (powerpc*-*-*): Use -no-cpp-precomp for Darwin.\n\n\t* tests/mpbsd/t-itom.c: Renamed from t-misc.c.\n\t* tests/mpbsd/t-misc.c: Remove file.\n\t* tests/mpbsd/Makefile.am: Update.\n\n\t* tests/mpf/t-set_si.c,t-cmp_si.c,t-gsprec.c: Split from t-misc.c.\n\t* tests/mpf/t-misc.c: Remove file.\n\t* tests/mpf/Makefile.am: Update.\n\n\t* tests/mpz/t-oddeven.c,t-set_si.c,t-cmp_si.c: Split from t-misc.c.\n\t* tests/mpz/t-misc.c: Remove file.\n\t* tests/mpz/Makefile.am: Update.\n\n\t* stack-alloc.c: Add some alignment ASSERTs.\n\n\t* gmp-impl.h (MPN_NORMALIZE): Add notes on x86 repe/scasl slow.\n\n\t* tests/devel/try.c (MPN_ZERO): Add testing.\n\t* tune/speed.c,speed.h,common.c,many.pl (MPN_ZERO): Add measuring.\n\n\t* mpn/x86/divrem_1.asm: Update a remark about gcc and \"loop\".\n\n\t* tests/mpq/t-cmp_si.c: New file.\n\t* tests/mpq/Makefile.am: Add it.\n\n\t* tests/misc.c,tests.h (mpq_set_str_or_abort): New function.\n\n\t* mpq/cmp_si.c: New file.\n\t* Makefile.am, mpq/Makefile.am: Add it.\n\t* gmp-h.in (mpq_cmp_si): Add prototype.\n\t* gmp.texi (Comparing Rationals): Add doco.\n\n\t* gmp-h.in (_GMP_H_HAVE_FILE): Add _FILE_DEFINED for microsoft, add\n\tnotes on what symbols are for what systems.\n\n2001-07-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h (ibm032 umul_ppmm): Fix typo.\n\t* longlong.h (sparclite sdiv_qrnnd): Fix typo.\n\n2001-07-03  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/bin_ui.c (DIVIDE): Use MPN_DIVREM_OR_DIVEXACT_1.\n\t* mpz/bin_uiui.c (MULDIV): Ditto, and use local variables for size and\n\tpointer.\n\n\t* acinclude.m4 (GMP_INCLUDE_GMP_H): New macro, use it everywhere gmp.h\n\tis wanted at configure time.\n\t* acinclude.m4, configure.in (GMP_H_EXTERN_INLINE, GMP_H_HAVE_FILE):\n\tNew macros.\n\n\t* gmp-h.in (__GMP_EXTERN_INLINE): Set to \"inline\" for C++.\n\t(mpn_add, mpn_sub): Use new style __GMP_EXTERN_INLINE.\n\t* gmp-h.in, mp-h.in, gmp-impl.h (_EXTERN_INLINE): Remove, unused.\n\t* mpn/generic/add.c, mpn/generic/sub.c: New files.\n\t* mpn/generic/inlines.c: Remove file.\n\t* configure.in, mpn/Makefile.am: Update.\n\n\t* gmp.texi (GMP Basics): Note the need for stdio.h to get FILE\n\tprototypes.\n\n2001-07-01  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Build Options, Reentrancy): Updates for new\n\t--enable-alloca behaviour.\n\t(Debugging): Describe --enable-alloca=debug.\n\t(Miscellaneous Integer Functions): Note mpz_sizeinbase ignores signs.\n\t(Low-level Functions): Give a formula for mpn_gcdext cofactor.\n\t(Factorial Algorithm): New section.\n\t(Binomial Coefficients Algorithm): New section.\n\tMisc tweaks elsewhere.\n\n\t* mpf/set_prc.c: Merge the two truncation conditionals, misc cleanups,\n\tno functional changes.\n\n\t* mpn/*/gmp-mparam.h (DIVEXACT_1_THRESHOLD): Add tuned values.\n\t* gmp-impl.h (DIVEXACT_1_THRESHOLD): Make the default 0 when\n\t2*UMUL_TIME < UDIV_TIME.\n\n\t* mpn/x86/p6/dive_1.asm: New file.\n\n\t* mpn/x86/dive_1.asm: New file.\n\t* mpn/x86/gmp-mparam.h (DIVEXACT_1_THRESHOLD): Use it always.\n\n\t* tests/refmpn.c, tests.h (refmpn_zero): New function.\n\t* tests/devel/try.c: Use it.\n\n\t* tests/refmpn.c (refmpn_sb_divrem_mn): Use refmpn_cmp, not mpn_cmp.\n\n\t* tests/mpf/t-get_d.c (main): Use || not |.\n\n\t* tests/misc.c, tests/t-modlinv.c, tests/mpq/t-get_str.c,\n\ttests/mpf/reuse.c: Add string.h.\n\n2001-06-29  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/speed.h (SPEED_ROUTINE_MPN_FIB2_UI,\n\tSPEED_ROUTINE_COUNT_ZEROS_C): Corrections to TMP block handling.\n\n\t* gmp-impl.h (MPN_TOOM3_MUL_N_MINSIZE, MPN_TOOM3_SQR_N_MINSIZE):\n\tCorrections to these to account for adding tD into E.\n\t(MPN_INCR_U, MPN_DECR_U) [WANT_ASSERT]: Add size\n\tassertions, since mpn_add_1 and mpn_sub_1 from gmp.h don't get them.\n\t(MPN_DIVREM_OR_DIVEXACT_1): Add an assert of no remainder.\n\n\t* assert.c: Add stdlib.h for abort prototype.\n\t* tests/spinner.c, trace.c, t-constants.c, t-count_zeros.c,\n\tt-gmpmax.c, t-modlinv.c: Ditto.\n\t* tests/mpz/t-bin.c, t-cmp.c, t-get_si.c, t-misc.c, t-popcount.c,\n\tt-set_str.c, t-sizeinbase.c: Ditto.\n\t* tests/mpq/t-equal.c, t-get_str.c, t-set_f.c, t-set_str.c: Ditto.\n\t* tests/mpf/t-fits.c, t-get_d.c, t-get_si.c, t-int_p.c, t-misc.c,\n\tt-trunc.c: Ditto.\n\t* tests/mpbsd/allfuns.c, t-misc.c: Ditto.\n\n\t* mpn/generic/mul_n.c, mpz/cfdiv_r_2exp.c: Use MPN_INCR_U rather than\n\tmpn_incr_u.\n\n\t* tests/devel/try.c (TYPE_SB_DIVREM_MN): More fixes for calling method.\n\n\t* mpn/x86/k6/cross.pl: More insn exceptions.\n\n2001-06-23  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in (__GMPN_ADD_1, __GMPN_SUB_1) [i386]: Fix some asm output\n\tconstraints.\n\n\t* gmp-impl.h (modlimb_invert): Mask after shifting, so mask constant\n\tfits a signed byte.\n\n\t* tests/devel/try.c (TYPE_SB_DIVREM_MN): Fix initial fill of quotient\n\twith garbage.\n\n2001-06-20  Kevin Ryde  <kevin@swox.se>\n\n\t* config.guess (rs6000-*-aix4* | powerpc-*-aix4*): Suppress error\n\tmessages if $CC_FOR_BUILD or program don't work.\n\n\t* mpz/sqrt.c,sqrtrem.c: Special case for op==0, to avoid TMP_ALLOC(0).\n\t* tests/refmpf.c (refmpf_add, refmpf_sub): Avoid TMP_ALLOC(0).\n\n\t* tests/mpn/t-aors_1.c: New file.\n\t* tests/mpn/Makefile.am: Add it.\n\n\t* gmp-h.in (__GMPN_ADD_1, __GMPN_SUB_1): New macros, rewrite of\n\tmpn_add_1 and mpn_sub_1, better code for src==dst and/or n==1,\n\tseparate versions for gcc x86, gcc generic, and non-gcc.\n\t(mpn_add_1, mpn_sub_1): Use them.\n\t(mpn_add, mpn_sub): Ditto, to get inlines on all compilers.\n\t(extern \"C\") [__cplusplus]: Let this encompass the extern inlines too.\n\t* mpn/generic/add_1.c,sub_1.c: New files, force code from gmp.h.\n\t* configure.in, mpn/Makefile.am: Add them.\n\n\t* acinclude.m4 (GMP_ASM_LSYM_PREFIX): AC_SUBST __GMP_LSYM_PREFIX\n\trather than AC_DEFINE LSYM_PREFIX.\n\t* gmp-h.in (__GMP_LSYM_PREFIX): New substitution.\n\t(__GMP_ASM_L): New macro.\n\t* gmp-impl.h (ASM_L): Use it.\n\n\t* acinclude.m4, configure.in (GMP_C_ATTRIBUTE_MALLOC): New macro.\n\t* gmp-impl.h: Use it for all the malloc based TMP_ALLOCs.\n\n\t* stack-alloc.h: Remove file.\n\t* tal-reent.c: New file.\n\t* Makefile.am: Update.\n\n\t* acinclude.m4, configure.in (GMP_OPTION_ALLOCA): New macro, add\n\tmalloc-reentrant method, use stack-alloc.c as malloc-notreentrant,\n\tmake \"reentrant\" the default.\n\t* gmp-impl.h (__TMP_ALIGN): Moved from stack-alloc.c, use a union to\n\tdetermine the value, and demand only 4 bytes align on 32-bit systems.\n\t* gmp-impl.h (WANT_TMP_NOTREENTRANT): Move global parts of\n\tstack-alloc.h to here, allow non power-of-2 __TMP_ALIGN in TMP_ALLOC.\n\t* gmp-impl.h: Extend extern \"C\" to TMP_ALLOC declarations.\n\t* stack-alloc.c (tmp_stack): Move private parts of stack-alloc.h to\n\there, use gmp-impl.h.\n\n\t* gmp-impl.h (TMP_ALLOC_LIMBS_2): New macro.\n\t* mpz/fib_ui.c, mpz/jacobi.c, mpq/cmp.c, mpn/generic/fib2_ui.c: Use it.\n\n\t* mpfr/exp2.c: Patch by Paul to match TMP_MARK and TMP_FREE in loop.\n\t* mpfr/sqrt.c: Scope nested TMP_DECL into nested { } block, patch by\n\tPaul, tweaked by me.\n\t* mpfr/agm.c: Ditto, and add a final TMP_FREE(marker2).\n\n\t* gmp-h.in (mpn_cmp): Add __GMP_ATTRIBUTE_PURE.\n\n\t* INSTALL: Clarify \"make install\", tweak formatting a bit.\n\n2001-06-17  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in, Makefile.am, gmp-impl.h: Add a debugging TMP_ALLOC,\n\tselected with --enable-alloca=debug.\n\t* tal-debug.c: New file.\n\t* configure.in, Makefile.am: Compile stack-alloc.c only for\n\t--disable-alloca.\n\t* assert.c (__gmp_assert_header): New function, split from\n\t__gmp_assert_fail.\n\n\t* mpz/lcm.c: Don't TMP_MARK and then just return. Remove unnecessary\n\t_mpz_realloc prototype.\n\n\t* mpn/generic/mul.c (mpn_sqr_n): Use __gmp_allocate_func for toom3\n\ttemporary workspace.\n\n2001-06-15  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpz/t-set_f.c: New file.\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpz/set_f.c: Share MPN_COPY between pad and trunc cases, do exp<=0\n\ttest earlier, store SIZ(w) earlier.\n\n\t* tests/t-count_zeros.c: New file.\n\t* tests/t-gmpmax.c: New file.\n\t* tests/Makefile.am (check_PROGRAMS): Add them.\n\n\t* mp_clz_tab.c: Compile the table only if longlong.h says it's needed;\n\tadd an internal-use-only comment.\n\t* tune/common.c: Force a __clz_tab for convenience when testing.\n\n\t* mpn/x86/pentium/gmp-mparam.h, mpn/x86/pentium/mmx/gmp-mparam.h: Add\n\tCOUNT_LEADING_ZEROS_NEED_CLZ_TAB, for mod_1.asm.\n\n\t* longlong.h (count_leading_zeros) [pentium]: Decide to go with float\n\tmethod for p54.\n\t(count_leading_zeros) [alpha]: Add COUNT_LEADING_ZEROS_NEED_CLZ_TAB.\n\t(__clz_tab): Provide a prototype only if it's needed.\n\n\t* tests/trace.c (mpz_trace): Don't use = on structures.\n\t(mpn_trace): Set _mp_alloc when creating mpz.\n\n2001-06-12  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/divrem_1.asm: Amend some comments about P5 speed.\n\n\t* tune/README: Clarify reconfigure on gmp-mparam.h update.\n\n\t* mpn/x86/p6/copyd.asm: New file.\n\t* mpn/x86/p6/README: Update copyd and mod_1.\n\t* mpn/x86/copyd.asm: Amend some comments.\n\n\t* gmp-impl.h (__builtin_constant_p): Add dummy for non-gcc.\n\t(mpn_incr_u, mpn_decr_u): Recognise incr==1 at compile time in the\n\tgeneric code on gcc.\n\n\t* gmp-impl.h (ASSERT_ZERO_P, ASSERT_MPN_NONZERO_P): New macros.\n\t* mpn/generic/gcd_1.c, mpn/generic/mul_fft.c: Use them.\n\t* mpz/get_d.c: Add a private mpn_zero_p.\n\t* mpfr/trunc.c: Use own mpn_zero_p.\n\t* tune/speed.h (SPEED_ROUTINE_MPN_GCD_1N): Use refmpn_zero_p.\n\t* gmp-impl.h (mpn_zero_p): Remove, no longer needed.\n\n\t* gmp-h.in, gmp-impl.h: Move MPN_CMP to gmp.h as __GMPN_CMP, leave an\n\tMPN_CMP alias in gmp-impl.h.\n\t* gmp-h.in (mpn_cmp): Add an inline version.\n\t* mpn/generic/cmp.c: Use __GMP_FORCE_mpn_cmp to get code from gmp.h.\n\n\t* acinclude.m4 (GMP_C_ATTRIBUTE_MODE): New macro.\n\t* configure.in: Call it.\n\t* gmp-impl.h (SItype etc): Use it.\n\n\t* randraw.c (lc): Change mpn_mul_basecase->mpn_mul,\n\tmpn_incr_u->MPN_INCR_U, abort->ASSERT_ALWAYS(0).\n\n\t* longlong.h (count_leading_zeros) [pentiumpro]: Work around a partial\n\tregister stall on gcc < 3.\n\n\t* gmp.texi (Introduction to GMP): Add IA-64.\n\t(Notes for Particular Systems): i386 means generic x86.\n\n\t* tests/t-modlinv.c: Use tests_start and tests_end.\n\n2001-06-10  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Number Theoretic Functions): mpz_jacobi only defined for b\n\todd.  Separate the jacobi/legendre/kronecker descriptions.\n\t(Low-level Functions): Document mpn_mul_1 \"incr\" overlaps.\n\t(Language Bindings): New chapter.\n\n\t* mpz/jacobi.c: Don't retaining old behaviour of mpz_jacobi on even b\n\t(it wasn't documented in 3.1.1).\n\t* mpz/jacobi.c, gmp-h.in (mpz_kronecker, mpz_legendre): Remove\n\tseparate entrypoints, just #define to mpz_jacobi.\n\t* compat.c (__gmpz_legendre): Add compatibility entrypoint.\n\n\t* mpn/generic/mul_1.c: Allow \"incr\" style overlaps.\n\t* tests/devel/try.c (param_init): Test this.\n\n\t* mpf/mul_ui.c: Do size==0 test earlier.\n\n2001-06-08  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (ULONG_HIGHBIT, UINT_HIGHBIT, USHRT_HIGHBIT): Cast\n\tULONG_MAX etc to unsigned long etc before attempting to right shift.\n\n\t* acinclude.m4 (GMP_ASM_LSYM_PREFIX): Add an AC_DEFINE of LSYM_PREFIX.\n\t* gmp-impl.h (ASM_L): New macro.\n\t(mpn_incr_u, mpn_decr_u, MPN_INCR_U, MPN_DECR_U): Add i386 optimized\n\tversions.\n\n\t* mpn/hppa/*.s,S,asm: Use .label so the code works with gas on hppa\n\tGNU/Linux too, reported by LaMont Jones <lamont@smallone.fc.hp.com>.\n\t* mpn/hppa/README: Add some notes on this.\n\t* acinclude.m4 (GMP_ASM_LABEL_SUFFIX): Ditto.\n\n\t* mpn/Makefile.am (nodist_libdummy_la_SOURCES): Add dive_1.c,\n\tfib2_ui.c.\n\n\t* tests/mpn/t-iord_u.c: New file.\n\t* tests/mpn/Makefile.am (check_PROGRAMS): Add it.\n\n\t* configure.in (mips*-*-irix[6789]*): Make ABI=n32 the default, same\n\tas in gmp 3.1.\n\t* gmp.texi (ABI and ISA): Update.\n\n\t* gmp.texi (Build Options): Misc tweaks.\n\t(Notes for Particular Systems): Describe windows DLL handling.\n\t(Known Build Problems): DJGPP needs bash 2.04.\n\t(Number Theoretic Functions): mpz_invert returns 0<=r<modulus; add\n\tmpz_fib2_ui, mpz_lucnum_ui, mpz_lucnum2_ui.\n\t(Fibonacci Numbers Algorithm): Update for new formulas used.\n\t(Lucas Numbers Algorithm): New section.\n\n\t* tune/speed.c,speed.h,common.c,many.pl: Add mpn_fib2_ui, mpz_fib2_ui,\n\tmpz_lucnum_ui, mpz_lucnum2_ui.\n\t* demos/expr/exprz.c,README: Add lucnum.\n\t* demos/perl/GMP.pm,GMP.xs,GMP/Mpz.pm,test.pl: Add fib2, lucnum,\n\tlucnum2.\n\n\t* tests/mpz/t-lucnum_ui.c: New file.\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Add it.\n\t* tests/mpz/t-fib_ui.c: Check mpz_fib2_ui too, updates for new style\n\tMPN_FIB2_SIZE.\n\n\t* tune/tuneup.c, tune/Makefile.am, gmp-impl.h, mpn/*/gmp-mparam.h:\n\tRemove FIB_THRESHOLD, no longer required.\n\n\t* mpz/fib2_ui.c, mpz/lucnum_ui.c mpz/lucnum2_ui.c: New files.\n\t* Makefile.am, mpz/Makefile.am: Add them.\n\t* gmp-h.in (mpz_fib2_ui, mpz_lucnum_ui, mpz_lucnum2_ui): Add\n\tprototypes.\n\n\t* mpn/generic/fib2_ui.c: New file.\n\t* configure.in (gmp_mpn_functions): Add it.\n\t* gmp-impl.h (mpn_fib2_ui, FIB_TABLE, etc): Add these.\n\t* mpz/fib_ui.c: Rewrite.\n\n\t* acinclude.m4 (GMP_C_SIZES): Fix _LONG_LONG_LIMB define for mp_limb_t\n\tsize test.\n\t(GMP_FUNC_ALLOCA): Add dummy __GMP_BITS_PER_MP_LIMB for gmp-h.in work.\n\n\t* configure.in (CPPFLAGS): Remove -D__GMP_WITHIN_GMP, don't want it\n\teverywhere.\n\t* Makefile.am, mpn/Makefile.am, mpz/Makefile.am, mpq/Makefile.am,\n\tmpf/Makefile.am, mpbsd/Makefile.am (INCLUDES): Set -D__GMP_WITHIN_GMP.\n\n\t* configure.in (*-*-msdosdjgpp*): Forcibly disable shared libraries,\n\tto make libtests.la work.\n\n\t* acconfig.h (_LONG_LONG_LIMB, HAVE_MPFR): Remove dummy defines, no\n\tlonger needed.\n\n\t* mpz/set_ui.c: Store to _mp_d[0] unconditionally.\n\n2001-05-27  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in, gmp-h.in, mp-h.in: Add support for windows DLLs.\n\n2001-05-26  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (ABI and ISA, Reentrancy): Minor tweaks\n\t(Notes for Package Builds): Note gmp.h is a generated file.\n\t(Notes for Particular Systems): -march=pentiumpro is used for gcc\n\t2.95.4 and up.\n\t(Assembler Loop Unrolling): Mention non power-of-2 unrolling.\n\t(Internals): New chapter.\n\t* mpf/README: Remove file.\n\n\t* demos/expr/README: Miscellaneous rewordings.\n\n\t* demos/perl: New directory.\n\t* demos/Makefile.am: Add it.\n\t* demos/perl/INSTALL, Makefile.PL, GMP.pm, GMP.xs, typemap,\n\tGMP/Mpz.pm, GMP/Mpq.pm, GMP/mpf.pm, GMP/Rand.pm, sample.pl, test.pl:\n\tNew files.\n\n\t* configure, aclocal.m4: Update to autoconf 2.50.\n\n\t* configure, aclocal.m4, ltmain.sh: Update to libtool 1.4.\n\n\t* configure, aclocal.m4, missing, ansi2knr.c, */Makefile.in: Update to\n\tautomake 1.4f.\n\t* Makefile.am: Conditionalize mpfr in $(SUBDIRS) to handle mpfr.info.\n\t* mpfr/Makefile.am (INFO_DEPS): Remove previous mpfr.info handling.\n\t* mpn/Makefile.am (GENERIC_SOURCES): Remove this, just put mp_bases.c\n\tin libmpn_la_SOURCES.\n\t* tests/Makefile.am (tests.h): Move from EXTRA_HEADERS to\n\tlibtests_la_SOURCES.\n\t* ltconfig: Remove file, no longer needed.\n\n\t* Makefile.am (gmp-impl.h, longlong.h, stack-alloc.h): Move from\n\tEXTRA_DIST to libgmp_la_SOURCES, so they get included in TAGS.\n\t* tests/rand/Makefile.am (gmpstat.h): Move to libstat_la_SOURCES\n\tsimilarly.\n\n\t* config.guess (68k-*-*): Use $SHELL not \"sh\", tweak some comments.\n\n\t* mpfr/mpfr.texi (Introduction to MPFR): Tweak table formatting, note\n\tnon-free programs must be able to be re-linked.\n\n2001-05-20  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/powerpc64/addmul_1.asm, mpn/powerpc64/mul_1.asm,\n\tmpn/powerpc64/submul_1.asm: Add carry-in entrypoints.\n\n2001-05-17  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (ge): Fix definition for info.\n\t(Notes for Particular Systems): Mention 68k dragonball and cpu32.\n\t(Efficiency): Add static linking, more about in-place operations,\n\tdescribe mpq+/-integer using addmul.\n\t(Reporting Bugs): A couple of words about self-contained reports.\n\t(Floating-point Functions): Note exponent limitations of mpf_get_str\n\tand mpf_set_str.\n\t(Initializing Floats): Clarify mpf_get_prec, mpf_set_prec and\n\tmpf_set_prec_raw a bit.\n\t(Float Comparison): Note current mpf_eq deficiencies.\n\n\t* gmp-h.in (__GMP_HAVE_CONST, __GMP_HAVE_PROTOTYPES,\n\t__GMP_HAVE_TOKEN_PASTE): Merge GNU ansidecl.h tests for ANSI compilers.\n\t* demos/expr/expr-impl-h.in: Ditto.\n\n\t* gmp-impl.h (BITS_PER_MP_LIMB): Define from __GMP_BITS_PER_MP_LIMB if\n\tnot already in gmp-mparam.h.\n\t* tests/t-constants.c (BITS_PER_MP_LIMB, __GMP_BITS_PER_MP_LIMB):\n\tCheck these are the same.\n\n\t* gmp-h.in (mpf_get_default_prec, mpf_get_prec, mpf_set_default_prec,\n\tmpf_set_prec_raw): Provide \"extern inline\" versions, use __GMPF on the\n\tmacros.\n\t* mpf/get_dfl_prc.c, mpf/get_prc.c, mpf/set_dfl_prc.c,\n\tmpf/set_prc_raw.c: Get code from gmp.h using __GMP_FORCE.\n\n\t* gmp-h.in, gmp-impl.h (__gmp_default_fp_limb_precision): Move from\n\tgmp-impl.h to gmp-h.in.\n\t(__GMPF_BITS_TO_PREC, __GMPF_PREC_TO_BITS): Ditto, and use __GMPF\n\tprefix and add a couple of casts.\n\t* gmp-h.in (__GMP_MAX): New macro.\n\t* mpf/init2.c mpf/set_prc.c: Update for __GMPF prefix.\n\n\t* gmp-h.in (__GMP_BITS_PER_MP_LIMB): New templated define.\n\t* acinclude.m4 (GMP_C_SIZES): Add AC_SUBST __GMP_BITS_PER_MP_LIMB,\n\tremove AC_DEFINE BITS_PER_MP_LIMB.\n\n2001-05-13  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in, gmp.texi, Makefile.am, mpz/Makefile.am, tests/mpz/t-pow.c:\n\tRemove mpz_si_pow_ui, pending full si support.\n\t* mpz/si_pow_ui.c: Remove file.\n\n2001-05-11  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium/dive_1.asm: New file.\n\n\t* mpn/powerpc32/umul.asm: Use r on registers.\n\t* mpn/powerpc64/umul.asm: New file.\n\t* configure.in (powerpc*-*-*): Enable umul in extra_functions.\n\n\t* tests/refmpn.c, tests/tests.h (refmpn_umul_ppmm): Use same arguments\n\tas normal mpn_umul_ppmm.\n\t(refmpn_mul_1c): Update.\n\t* tests/devel/try.c, tune/many.pl: Add some umul_ppmm testing support.\n\n\t* mpn/x86/k6/mmx/popham.asm, mpn/x86/k7/mmx/popham.asm: Don't support\n\tsize==0.\n\t* mpn/x86/pentium/popcount.asm, mpn/x86/pentium/hamdist.asm: Ditto,\n\tand shave a couple of cycles from the PIC entry code.\n\n\t* mpz/mul.c: Use mpn_mul_1 for size==1 and mpn_mul_2 (if available)\n\tfor size==2, to avoid copying; do vsize==0 test earlier.\n\n\t* mpf/sub.c: Test r!=u before calling mpf_set.\n\t* mpf/add.c: Ditto, and share mpf_set between usize==0 and vsize==0.\n\n\t* mpn/generic/tdiv_qr.c, mpq/get_d.c, mpf/div.c, mpf/set_q.c,\n\tmpf/set_str.c, mpf/ui_div.c: Test for high bit set, not for\n\tcount_leading_zeros zero.\n\n\t* acinclude.m4 (GMP_PROG_AR, GMP_PROG_NM): Print a message if extra\n\tflags are added.\n\n\t* tests/mpz/t-mul_i.c: New file.\n\t* tests/mpz/Makefile.am: Add it.\n\n\t* mpz/mul_siui.c (mpz_mul_si): Fix for -0x80..00 on long long limb.\n\n\t* gmp-h.in (mpf_set_si, mpf_set_ui): Revert last change, set exp to 0\n\twhen n==0.\n\t* mpf/ceilfloor.c, mpf/trunc.c: Fix exp to 0 when setting r to 0.\n\t* gmp-impl.h (MPF_CHECK_FORMAT): Check exp==0 when size==0.\n\n2001-05-07  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in (mpf_set_si, mpf_set_ui): Don't bother setting _mp_exp to 0\n\twhen n==0 (use 1 unconditionally).\n\t* tests/mpf/t-misc.c (check_mpf_set_si): Don't demand anything of\n\t_mp_exp when _mp_size is zero.\n\n\t* mpn/x86/README: Note gas _GLOBAL_OFFSET_TABLE_ with leal problem.\n\n\t* gmp-h.in (mpz_fits_uint_p, mpz_fits_ulong_p, mpz_fits_ushort_p):\n\tProvide these as \"extern inline\"s.\n\t(__GMP_UINT_MAX, __GMP_ULONG_MAX, __GMP_USHRT_MAX): New macros.\n\t(mpz_popcount): Use __GMP_ULONG_MAX.\n\t* gmp-impl.h (UINT_MAX, ULONG_MAX, USHRT_MAX): Use __GMP_U*_MAX, if\n\tnot already defined.\n\t* mpz/fits_u.c: Use the code from gmp.h.\n\n2001-05-06  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/k7/dive_1.asm: New file.\n\t* mpn/x86/k7/gcd_1.asm: New file.\n\t* mpn/asm-defs.m4 (m4_count_trailing_zeros): New macro.\n\n\t* gmp-h.in (mpz_get_ui, mpz_getlimbn, mpz_set_q, mpz_perfect_square_p,\n\tmpz_popcount, mpz_size, mpf_set_ui, mpf_set_si, mpf_size): Provide\n\tthese as \"extern inlines\".\n\tUse just one big extern \"C\" block.\n\t* mpz/getlimbn.c, mpz/get_ui.c, mpz/perfsqr.c, mpz/popcount.c\n\tmpz/set_q.c, mpz/size.c, mpf/set_si.c, mpf/set_ui.c, mpf/size.c: Use\n\t__GMP_FORCE to get code from gmp.h.\n\n2001-05-03  Kevin Ryde  <kevin@swox.se>\n\n\t* extract-dbl.c: Add ASSERT d>=0.\n\n\t* gmp.texi (Efficiency): Add mpz_addmul etc for mpz+=integer, add\n\tmpz_neg etc in-place.\n\t(Integer Arithmetic): Add mpz_addmul, mpz_submul, mpz_submul_ui.\n\t(Initializing Rationals): Add mpq_set_str.\n\t(Low-level Functions): mpn_set_str requires strsize >= 1.\n\n\t* gmp-h.in (__GMP_EXTERN_INLINE, __GMP_ABS): New macros.\n\t(mpz_abs, mpq_abs, mpf_abs, mpz_neg, mpq_neg, mpf_neg): Provide inline\n\tversions.\n\t* mpz/abs.c, mpq/abs.c, mpf/abs.c, mpz/neg.c, mpq/neg.c, mpf/neg.c:\n\tAdd suitable __GMP_FORCE to turn off inline versions.\n\n\t* tests/mpz/t-aorsmul.c,t-cmp_d.c,t-popcount,t-set_str.c: New files.\n\t* tests/mpz/Makefile.am: Add them.\n\n\t* mpz/aorsmul_i.c: New file, rewrite of addmul_ui.c.  Add\n\tmpz_submul_ui entrypoint, share more code between some of the\n\tconditionals, use mpn_mul_1c if available.\n\t* mpz/addmul_ui.c: Remove file.\n\t* mpz/aorsmul.c: New file.\n\t* Makefile.am, mpz/Makefile.am: Update.\n\t* gmp-h.in (mpz_addmul, mpz_submul, mpz_submul_ui): Add prototypes.\n\t* gmp-impl.h (mpz_aorsmul_1): Add prototype.\n\n\t* tests/mpq/t-set_str.c: New file.\n\t* tests/mpq/Makefile.am: Add it.\n\n\t* mpq/set_str.c: New file.\n\t* Makefile.am, mpq/Makefile.am: Add it.\n\t* gmp-h.in (mpq_set_str): Add prototype.\n\n\t* mpz/set_str.c: Fix for trailing white space on zero, eg. \"0 \".\n\t* mpn/generic/set_str.c: Add ASSERT str_len >= 1.\n\n\t* gmp-h.in, gmp-impl.h (mpn_incr_u, mpn_decr_u): Move to gmp-impl.h.\n\t* gmp-impl.h (MPN_INCR_U, MPN_DECR_U): New macros.\n\n2001-04-30  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpz/t-lcm.c: New file.\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpz/lcm.c: Add one limb special case.\n\n\t* mpz/lcm_ui.c: New file.\n\t* Makefile.am, mpz/Makefile.am: Add it.\n\t* gmp-h.in (mpz_lcm_ui): Add prototype.\n\t* gmp.texi (Number Theoretic Functions): Add mpz_lcm_ui, document lcm\n\tnow always positive.\n\n\t* mp-h.in (mp_size_t, mp_exp_t): Fix typedefs to match gmp-h.in.\n\n\t* gmp-h.in (mpn_add_1, mpn_add, mpn_sub_1, mpn_sub): Remove K&R\n\tfunction defines (ansi2knr will handle mpn/inline.c, and just ansi is\n\tenough for gcc extern inline).\n\n\t* gmp-h.in (__GMP_HAVE_TOKEN_PASTE): New macro.\n\t(__MPN): Use it.\n\t* gmp-impl.h (CNST_LIMB): Ditto.\n\n\t* gmp-h.in, mp-h.in (__gmp_const, __gmp_signed, _PROTO, __MPN): Use\n\tANSI forms on Microsoft C.\n\t(__GMP_HAVE_CONST): New define.\n\t* gmp-impl.h (const, signed): Use it.\n\n\t* demos/expr/expr-impl-h.in (<stdarg.h>): Use this with Microsoft C.\n\t(HAVE_STDARG): New define.\n\t* demos/expr/expr.c,exprz.c,exprq.c,exprf.c,exprfr.c: Use it.\n\n\t* acinclude.m4 (GMP_C_STDARG): New macro.\n\t* configure.in: Call it.\n\t* rand.c: Use it.\n\n\t* configure.in (AC_PROG_CC_STDC): New test.\n\n2001-04-25  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/k6/mmx/dive_1.asm: New file.\n\t* mpn/x86/x86-defs.m4 (Zdisp): Two more insns.\n\n\t* mpn/x86/pentium/mul_2.asm: New file.\n\t* mpn/asm-defs.m4: Add define_mpn(mul_2).\n\t* acconfig.h (HAVE_NATIVE_mpn_divexact_1, mul_2): Add templates.\n\n\t* configure.in (ABI): Use AC_ARG_VAR.\n\n\t* tests/devel/try.c: Run reference function when validate fails.\n\n\t* mpq/get_str.c: Fixes for negative bases.\n\t* tests/mpq/t-get_str.c: Check negative bases.\n\t* tests/misc.c,tests.h (__gmp_allocate_strdup, strtoupper): New\n\tfunctions.\n\n2001-04-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/lcm.c (mpz_lcm): Make result always positive.\n\n\t* gmp-h.in (mpz_inp_binary, mpz_out_binary): Remove declarations.\n\n2001-04-22  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/powerpc64/addsub_n.asm: Use config.m4 not asm-syntax.m4.\n\n\t* mpz/cmp_d.c, mpz/cmpabs_d.c: New files.\n\t* Makefile.am, mpz/Makefile.am: Add them.\n\t* mpf/cmp_d.c, mpf/get_dfl_prec.c: New files.\n\t* Makefile.am, mpf/Makefile.am: Add them.\n\t* gmp-h.in (mpz_cmp_d, mpz_cmpabs_d, mpf_cmp_d, mpf_get_default_prec):\n\tAdd prototypes.\n\t* gmp.texi: Add documentation.\n\n\t* mpf/set_prc.c: Avoid a realloc call if already the right precision.\n\n\t* gmp-impl.h (MPF_BITS_TO_PREC, MPF_PREC_TO_BITS): New macros.\n\t* mpf/get_prc.c, init2.c, set_dfl_prec.c, set_prc.c, set_prc_raw.c:\n\tUse them.\n\n2001-04-20  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/devel/try.c: Don't test size==0 on mpn_popcount and\n\tmpn_hamdist; add testing for mpn_divexact_1; print some limb values\n\twith mpn_trace not printf.\n\n\t* mpz/popcount.c, mpz/hamdist.c: Don't pass size==0 to mpn_popcount\n\tand mpn_hamdist.\n\t* mpn/generic/popham.c: Don't support size==0.\n\n\t* config.guess (m68k-*-*): Detect m68010, return m68360 for cpu32,\n\tcleanup the nesting a bit.\n\n\t* gmp.texi (Integer Division): Fix mpz_congruent_2exp_p \"c\" type.\n\t(Integer Division): Add mpz_divexact_ui.\n\t(Number Theoretic Functions): Fix mpz_nextprime return type.\n\t(Exact Remainder): Divisibility tests now implemented.\n\tAnd more index entries in a few places.\n\n\t* tests/mpz/dive_ui.c: New file.\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpz/dive_ui.c: New file.\n\t* Makefile.am, mpz/Makefile.am: Add it.\n\t* gmp-h.in (mpz_divexact_ui): Add prototype.\n\n\t* tune/many.pl, tune/speed.h: Add special mpn_back_to_back for\n\tdevelopment.\n\n\t* gmp-impl.h (MPN_DIVREM_OR_DIVEXACT_1): New macro.\n\t* mpz/divexact.c: Use it.\n\n\t* gmp-impl.h (DIVEXACT_1_THRESHOLD): New threshold.\n\t* tune/tuneup.c: Tune it.\n\n\t* tune/speed.c,speed.h,common.c,many.pl: Add measuring of\n\tmpn_divexact_1, mpn_copyi, mpn_copyd.\n\n\t* mpn/generic/dive_1.c: New file.\n\t* configure.in (gmp_mpn_functions): Add it.\n\t* gmp-impl.h (mpn_divexact_1): Add prototype.\n\t* mpn/asm-defs.m4: Add define_mpn(divexact_1).\n\n\t* tests/mpn: New directory.\n\t* tests/Makefile.am: Add it.\n\t* tests/mpn/Makefile.am: New file.\n\t* configure.in (AC_OUTPUT): Add it.\n\t* tests/mpn/t-asmtype.c: New file.\n\n\t* configure, config.in: Update to autoconf 2.49d.\n\n\t* configure.in, gmp-h.in, mp-h.in, demos/expr/expr-impl-h.in: Revert\n\tto generating gmp.h, mp.h and expr-impl.h with AC_OUTPUT and AC_SUBST.\n\n\t* configure.in (m68*-*-*): Oops, m683?2 is 68000, m68360 is cpu32.\n\t* mpn/m68k/m68k-defs.m4 (scale_available_p): Ditto.\n\n\t* configure.in (underscore, asm_align): Remove these variables,\tunused.\n\t(GMP_ASM_*): Sort by AC_REQUIREs, to avoid duplication.\n\t* acinclude.m4 (GMP_ASM_UNDERSCORE, GMP_ASM_ALIGN_LOG): Remove support\n\tfor actions, no longer needed.\n\n2001-04-17  Kevin Ryde  <kevin@swox.se>\n\n\t* config.guess (m68k-*-*): Look for cpu in linux kernel /proc/cpuinfo.\n\n\t* acinclude.m4 (GMP_GCC_MARCH_PENTIUMPRO): The -mpentiumpro problem is\n\tfixed in 2.95.4, so test for that.\n\t(GMP_ASM_TYPE): Amend some comments.\n\n\t* tune/freq.c (speed_cpu_frequency_sysctl): Avoid having unused\n\tvariables on GNU/Linux.\n\n\t* mpn/asm-defs.m4 (m4_instruction_wrapper): Fix a quoting problem if\n\tthe name of the file is a macro.\n\n2001-04-15  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/powerpc64/*.asm: Add speeds on ppc630.\n\n\t* acconfig.h: Add dummy templates for _LONG_LONG_LIMB and HAVE_MPFR.\n\t* configure.in: Ensure config.in is the last AM_CONFIG_HEADER,\n\twhich autoheader requires.\n\n\t* mpn/x86/pentium/popcount.asm: New file.\n\t* mpn/x86/pentium/hamdist.asm: New file.\n\n\t* mpn/asm-defs.m4: (m4_popcount): New macro.\n\tAmend a few comments elsewhere.\n\n\t* acinclude.m4 (GMP_ASM_RODATA): If possible, grep compiler output for\n\tthe right directive.\n\n\t* tune/speed.c: Print clock speed in MHz, not cycle time.\n\n\t* configure.in (AC_CHECK_HEADERS): Check for sys/processor.h.\n\t* tune/freq.c (speed_cpu_frequency_processor_info): Require\n\t<sys/processor.h> to exist, to differentiate the different\n\tprocessor_info on Darwin.\n\t(speed_cpu_frequency_sysctlbyname): Remove hw.model test which is in\n\tspeed_cpu_frequency_sysctl.\n\t(speed_cpu_frequency_sysctl): Add hw.cpufrequency for Darwin.\n\n\t* gmp-impl.h (MPN_LOGOPS_N_INLINE, mpn_and_n ... mpn_xnor_n): Use a\n\tsingle expression argument for the different operations, necessary for\n\tthe Darwin \"smart\" preprocessor.\n\n\t* mpn/m68k/t-m68k-defs.pl: Allow white space in m4_definsn and\n\tm4_defbranch.\n\n\t* tune/many.pl: Change RM_TMP_S to RM_TMP to match mpn/Makeasm.am,\n\tavoid a possibly undefined array in a diagnostic, add more renaming to\n\thamdist.\n\n2001-04-13  Kevin Ryde  <kevin@swox.se>\n\n\t* ltmain.sh, aclocal.m4, configure, config.in: Update to libtool 1.3d.\n\t* configure.in: Change ac_ to lt_ in lt_cv_archive_cmds_need_lc and\n\tlt_cv_proc_cc_pic.\n\n\t* config.guess (m68*-*-*): Detect exact cpu with BSD sysctl hw.model,\n\tdetect 68000/68010 with trapf, detect 68302 with bfffo.\n\n2001-04-11  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_ASM_M68K_INSTRUCTION, GMP_ASM_M68K_ADDRESSING,\n\tGMP_ASM_M68K_BRANCHES): New macros.\n\t* configure.in: Use them, remove old 68k configs, use mc68020 udiv and\n\tumul.\n\n\t* mpn/m68k/m68k-defs.m4: New file.\n\t* mpn/m68k/t-m68k-defs.pl: New file.\n\t* mpn/m68k/*.asm: New files, converted from .S.  Merge add_n and sub_n\n\tto aors_n, ditto mc68020 addmul_1 and submul_1 to aorsmul_1.  No\n\tobject code changes (except .type and .size now used on NetBSD 1.4).\n\t* mpn/m68k/README: New file.\n\t* mpn/m68k/*.S, */*.S, syntax.h: Remove files.\n\n\t* configure.in (m68*-*-netbsd1.4*): Pretend getrusage doesn't exist.\n\t* tune/README: Update.\n\n\t* configure.in (powerpc*-*-*): For the benefit of Darwin 1.3, add cc\n\tto cclist, make gcc_cflags -Wa,-mppc optional.\n\n2001-04-06  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/lisp/gmpasm-mode.el (gmpasm-comment-start-regexp): Add | for 68k.\n\t(gmpasm-mode-syntax-table): Add to comments.\n\n\t* tests/mpz/reuse.c (dsi_div_func_names): Add names for cdiv_[qr]_2exp.\n\n2001-04-04  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_M4_M4WRAP_SPURIOUS): Fix test so as to actually\n\tdetect the problem, add notes on m68k netbsd 1.4.1.\n\n\t* gmp.texi (Compatibility with older versions): Note libmp\n\tcompatibility.\n\n2001-04-03  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpz/reuse.c: Add mpz_cdiv_q_2exp and mpz_cdiv_r_2exp.\n\n\t* tests/mpz/t-pow.c: Drag in refmpn.o when testing mpz_pow_ui etc with\n\trefmpn_mul_2.\n\n\t* tune/speed.c,speed.h,common.c,many.pl: Add measuring of mpn_com_n\n\tand mpn_mul_2.\n\t* tests/devel/try.c: Add testing of mpn_mul_2, and a\n\tDATA_MULTIPLE_DIVISOR attribute.\n\n\t* gmp.texi (Build Options): List more m68k's.\n\t(Build Options): Add cross reference to tex2html.\n\t(Notes for Particular Systems): Add m68k means 68020 or up.\n\t(Rational Conversions): New section, with mpq_get_d, mpq_set_d and\n\tmpq_set_f from Miscellaneous, and new mpq_set_str.\n\t(Applying Integer Functions): Move mpq_get_num, mpq_get_den,\n\tmpq_set_num and mpq_set_den from Misc.\n\t(Miscellaneous Rational Functions): Remove section.\n\t(Custom Allocation): Partial rewrite for various clarifications.\n\t(References): Improve line breaks near URLs.\n\n\t* acinclude.m4 (GMP_GCC_M68K_OPTIMIZE): New macro.\n\t* configure.in (m68*-*-*): Use it to run gcc 2.95.x at -O not -O2.\n\t(m680?0-*-*, m683?2-*-*, m68360-*-*): Add optional gcc -m options.\n\n\t* tests/mpz/t-cmp.c: New file.\n\t* tests/mpz/t-sizeinbase.c: New file.\n\t* tests/mpz/Makefile.am: Add them.\n\n\t* gmp-impl.h (MPN_CMP): New macro.\n\t* mpz/cmp.c,cmpabs.c: Use it, and minor cleanups too.\n\n\t* tests/mpq/t-equal.c: New file.\n\t* tests/mpq/t-get_str.c: New file.\n\t* tests/mpq/Makefile.am: Add them.\n\n\t* mpq/get_str.c: New file.\n\t* Makefile.am, mpq/Makefile.am: Add it.\n\t* gmp-h.in (mpq_get_str): Add prototype.\n\n\t* mpq/equal.c: Rewrite using inline compare loops.\n\n\t* tests/refmpn.c,tests.h (refmpn_mul_2): Fix parameter order.\n\t* mpz/n_pow_ui.c: Fix mpn_mul_2 calls parameter order.\n\n2001-03-29  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/mpf/t-trunc.c: New file.\n\t* tests/mpf/Makefile.am (check_PROGRAMS): Add it.\n\t* gmp-impl.h (MPF_CHECK_FORMAT): New macro.\n\n\t* mpf/trunc.c: New file, rewrite of integer.c, preserve prec+1 in\n\tcopy, don't copy if unnecessary.\n\t* mpf/ceilfloor.c: New file likewise, and use common subroutine for\n\tceil and floor.\n\t* mpf/integer.c: Remove file.\n\t* Makefile.am, mpf/Makefile.am, macos/Makefile.in: Update.\n\n\t* acinclude.m4 (GMP_GCC_VERSION_GE): New macro.\n\t(GMP_GCC_MARCH_PENTIUMPRO): Use it, remove CCBASE parameter (don't\n\tbother checking it's gcc).\n\t(GMP_GCC_ARM_UMODSI): New macro.\n\t* configure.in (GMP_GCC_MARCH_PENTIUMPRO): Update parameters.\n\t(arm*-*-*): Use GMP_GCC_ARM_UMODSI.\n\t* gmp.texi (Notes for Particular Systems): Add arm gcc requirements.\n\n2001-03-28  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Converting Integers): Document mpz_getlimbn using absolute\n\tvalue and giving zero for N out of range, move to end of section.\n\n\t* tests/refmpn.c (refmpn_tdiv_qr): Use refmpn_divmod_1 rather than\n\trefmpn_divrem_1.\n\t* tests/tests.h: Add some prototypes that were missing.\n\n\t* mpz/tdiv_q_ui.c: Remove a comment that belonged to mpz_tdiv_r_ui.\n\n2001-03-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/gcdext.c: Handle carry overflow after m*n multiply code\n\tin both arms.  Partially combine multiply arms.\n\n2001-03-24  Kevin Ryde  <kevin@swox.se>\n\n\t* longlong.h: Add comments to P5 count_leading_zeros.\n\n\t* demos/expr/exprz.c,t-expr.c,README: Add congruent_p and divisible_p.\n\n2001-03-23  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (GMPceil, GMPfloor, ge, le): New macros.\n\t(Integer Division, mpn_cmp, mpn_sqrtrem, Algorithms): Use them.\n\t(mpn_bdivmod): Refer to mp_bits_per_limb, not BITS_PER_MP_LIMB, and\n\timprove formatting a bit.\n\t(mpn_lshift, mpn_rshift): Clarify the return values, and use {rp,n}\n\tfor the destination.\n\tMiscellaneous minor rewordings in a few places.\n\n\t* mpn/arm/arm-defs.m4: New file.\n\t* configure.in (arm*-*-*): Use it.\n\t* mpn/arm/*.asm: Use changecom and registers from arm-defs.m4, use L()\n\tfor local labels.\n\n\t* mpn/x86/k6/mmx/com_n.asm: Relax code alignment (same speed).\n\n\t* gmp-h.in (__GMP_ATTRIBUTE_PURE): Use __pure__ to avoid application\n\tnamespace.\n\n\t* gmp-impl.h (ABS): Add parens around argument.\n\n2001-03-20  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_PROG_M4): Use AC_ARG_VAR on $M4.\n\n\t* acinclude.m4 (GMP_M4_M4WRAP_SPURIOUS): New macro.\n\t* configure.in: Use it.\n\t* mpn/asm-defs.m4: Ditto.\n\n2001-03-18  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium/logops_n.asm: New file.\n\n\t* mpn/x86/k6/k62mmx/copyd.asm: Rewrite, smaller and simpler, faster on\n\tsmall sizes, slower on big sizes (about half the time).\n\t* mpn/x86/k6/k62mmx/copyi.asm: Remove file, in favour of generic x86.\n\t* mpn/x86/copyi.asm: Add some comments.\n\t* mpn/x86/k6/README: Update.\n\n\t* mpn/x86/k6/gcd_1.asm: New file.\n\n\t* gmp-impl.h (NEG_MOD): Fix type of __dnorm.\n\n\t* acinclude.m4 (GMP_C_SIZES): Fix use of __GMP_WITHIN_CONFIGURE.\n\n2001-03-15  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (GMPabs): New macro.\n\t(Float Comparison - mpf_reldiff): Use it.\n\t(Integer Comparisons - mpz_cmpabs): Ditto, puts \"abs\" in info.\n\t(Reentrancy): Update notes on old random functions.\n\t(Karatsuba Multiplication): Better characterize the effect of basecase\n\tspeedups on the thresholds, pointed out by Torbjorn.\n\n\t* tune/README: Notes on the 1x1 div threshold for mpn_gcd_1.\n\n\t* tests/misc.c (mpz_pow2abs_p, mpz_flipbit, mpz_errandomb,\n\tmpz_errandomb_nonzero, mpz_negrandom): New functions.\n\t(mpz_erandomb, mpz_erandomb_nonzero): Use urandom().\n\t* tests/spinner.c (spinner_wanted, spinner_tick): Make global.\n\t* tests/tests.h: Update prototypes.\n\n\t* tests/mpz/t-cong.c, tests/mpz/t-cong_2exp.c: New files.\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Add them.\n\n\t* mpz/cong.c, mpz/cong_2exp.c, mpz/cong_ui.c: New files.\n\t* Makefile.am, mpz/Makefile.am: Add them.\n\t* gmp-impl.h (NEG_MOD): New macro.\n\t* gmp-h.in (mpz_congruent_p, mpz_congruent_2exp_p,\n\tmpz_congruent_ui_p): Add prototypes.\n\t* gmp.texi (Integer Division, Efficiency): Add documentation.\n\n\t* mpq/aors.c: No need for ABS on denominator sizes.\n\n\t* gmp-impl.h (mpn_divisible_p): Use __MPN.\n\n\t* gmp-impl.h (LOW_ZEROS_MASK): New macro.\n\t* mpz/divis_ui.c, mpn/generic/divis.c: Use it.\n\n\t* mpz/setbit.c: Fix normalization for case of a negative ending up\n\twith a zero high limb.\n\t* tests/mpz/bit.c (check_single): New test for this problem.\n\n\t* configure.in (none-*-*): Fix cclist for default ABI=long.\n\n2001-03-10  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/cfdiv_q_2exp.c: Don't scan for non-zero limbs if they don't\n\tmatter to the rounding.\n\n\t* mpz/get_ui.c: Fetch _mp_d[0] unconditionally, so the code can come\n\tout branch-free.\n\n2001-03-08  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/devel/try.c (param_init): Fix reference functions for and_n\n\tand nand_n.\n\n\t* tune/speed.c, tests/devel/try.c: Seed RANDS, not srandom etc.\n\t* configure.in (AC_CHECK_FUNCS): Remove srand48 and srandom.\n\t* macos/configure (coptions): Remove random/srandom, now unnecessary.\n\n\t* configure.in (gmp.h, mp.h, demos/expr/expr-impl.h): Generate using\n\tAM_CONFIG_HEADER.\n\t(_LONG_LONG_LIMB, HAVE_MPFR): Change to AC_DEFINEs.\n\t* gmp-h.in, mp-h.in, demos/expr/expr-impl-h.in: Change to #undef's.\n\t* acinclude.m4 (GMP_FUNC_ALLOCA, GMP_C_SIZES): Use gmp-h.in, not gmp.h.\n\t* Makefile.am (EXTRA_DIST): Remove gmp-h.in and mp-h.in, now done\n\tautomatically.\n\t* acinclude.m4 (GMP_FUNC_ALLOCA), gmp-impl.h: Set and use\n\t__GMP_WITHIN_CONFIGURE rather than GMP_FUNC_ALLOCA_TEST.\n\n\t* mpf/random2.c: Use _gmp_rand and RANDS instead of random() for the\n\texponent, ensures full range of values too.\n\n\t* tests/mpz/t-div_2exp.c (check_various): Start with d based on i, but\n\tdon't let it go negative.\n\n\t* tune/tuneup.c (KARATSUBA_MUL_THRESHOLD): Limit probing to\n\tTOOM3_MUL_THRESHOLD_LIMIT, the size of the workspace in mul_n.c.\n\tUse a -1 with this too, so size<LIMIT not <=.\n\n2001-03-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/cray/cfp/mul_1.c: Don't call mpn_add_n with size 0.\n\t* mpn/cray/cfp/addmul_1.c: Likewise.\n\t* mpn/cray/cfp/submul_1.c: Don't call mpn_sub_n with size 0.\n\n\t* tests/mpz/t-div_2exp.c (check_various): Start 2nd d loop from 0\n\t(avoid problems with Cray compilers).\n\n2001-03-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/cray/ieee/submul_1.c: Don't call mpn_sub_n with size 0.\n\n\t* mpn/cray/ieee/mul_basecase.c: New file.\n\t* mpn/cray/ieee/sqr_basecase.c: New file, derived from mul_basecase.c.\n\n2001-03-06  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/devel/try.c (pointer_setup): Allow dst_size == SIZE_SIZE2 for\n\tthe benefit of mpn_tdiv_qr.\n\n\t* tune/tuneup.c (all): Start karatsuba probing at size==4, for the\n\tbenefit of cray t90 ieee which has speed oddities at size==2.\n\n\t* gmp-impl.h (USE_LEADING_REGPARM): Use __GMP_GNUC_PREREQ.\n\tUse __GMP_ATTRIBUTE_PURE and ATTRIBUTE_CONST in a few places.\n\n\t* gmp-h.in (__GMP_GNUC_PREREQ) New macro.\n\t(__GMP_ATTRIBUTE_PURE): New macro, use it in many places.\n\n\t* gmp-impl.h, gmp-h.in (mpn_jacobi_base): Move prototype to\n\tgmp-impl.h, use ATTRIBUTE_CONST.\n\n\t* tune/speed.h (speed_cyclecounter): Inline asm version for i386.\n\n\t* mpz/cfdiv_r_2exp.c (cfdiv_r_2exp): Only reread \"up\" after second\n\trealloc, first is under w!=u.\n\n2001-03-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/cray/sub_n.c: Rewrite using `majority' logic.\n\n\t* mpz/cfdiv_r_2exp.c (cfdiv_r_2exp): Reread `up' after realloc of w.\n\n\t* mpn/cray/ieee/mul_1.c: Rewrite.  Streamline multiplications;\n\tuse `majority' logic.\n\t* mpn/cray/ieee/addmul_1.c: Likewise.\n\n\t* mpn/cray/add_n.c: Rewrite using `majority' logic.\n\n2001-03-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h (CRAY udiv_qrnnd): No longer conditional on CRAYMPP.\n\t(64-bit hppa add_ssaaaa): New.\n\t(64-bit hppa sub_ddmmss): New.\n\n\t* mpn/cray/ieee/invert_limb.c: New file.\n\n\t* gmp-impl.h (RANDS): Add a `,0' to make it compile on more compilers.\n\n2001-03-03  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/n_pow_ui.c (ULONG_PARITY): Move to gmp-impl.h.\n\t* gmp-impl.h (ULONG_PARITY): i386 part from n_pow_ui.c, new generic\n\tform by Torbjorn.\n\n\t* tests/mpz/t-div_2exp.c: New file, rewrite of t-2exp.c.\n\t* tests/mpz/t-2exp.c: Remove file.\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Update.\n\n\t* gmp-h.in (mpz_cdiv_q_2exp, mpz_cdiv_q_2exp): Add prototypes.\n\t* gmp.texi (Integer Division): Add mpz_cdiv_q_2exp and mpz_cdiv_q_2exp.\n\n\t* mpz/cfdiv_q_2exp.c: New file, partial rewrite of fdiv_q_2exp.c, add\n\tmpz_cdiv_q_2exp entrypoint.\n\t* mpz/cfdiv_r_2exp.c: New file, rewrite of fdiv_r_2exp.c, use all mpn,\n\tadd mpz_cdiv_r_2exp entrypoint.\n\t* mpz/fdiv_q_2exp.c, mpz/fdiv_r_2exp.c: Remove files.\n\t* mpz/Makefile.am (libmpz_la_SOURCES): Update.\n\t* Makefile.am (MPZ_OBJECTS): Ditto.\n\n\t* gmp-impl.h (USE_LEADING_REGPARM): Use __i386__ same as longlong.h\n\t(REGPARM_2_1, REGPARM_3_1, REGPARM_ATTR): New macros.\n\t* mpz/jacobi.c (jac_or_kron): Use them.\n\n\t* configure.in (HAVE_ABI_$ABI): Re-enable this for config.m4, with\n\tdots changed to underscores (necessary for hppa).\n\n\t* tests/mpz/t-divis.c, tests/mpz/t-divis_2exp.c: New files.\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Add them.\n\n\t* gmp-h.in (mpz_divisible_p, mpz_divisible_ui_p,\n\tmpz_divisible_2exp_p): Add prototypes.\n\t* gmp.texi (Integer Division): Add mpz_divisible_p.\n\t(Efficiency): Add remarks about divisibility testing.\n\n\t* mpz/divis.c, mpz/divis_ui.c, mpz/divis_2exp.c: New files.\n\t* mpz/Makefile.am (libmpz_la_SOURCES): Add them.\n\t* Makefile.am (MPZ_OBJECTS): Ditto.\n\n\t* mpn/generic/divis.c: New file.\n\t* configure.in (gmp_mpn_functions): Add it.\n\t* mpn/Makefile.am (nodist_libdummy_la_SOURCES): Ditto.\n\t* gmp-impl.h (mpn_divisible_p): Add prototype.\n\n\t* urandom.h: Remove file.\n\t* Makefile.am (EXTRA_DIST): Remove it.\n\n\t* tests/mpz/convert.c, dive.c, io.c, logic.c, reuse.c, t-2exp.c,\n\tt-fdiv.c, t-fdiv_ui.c, t-gcd.c, t-jac.c, t-mul.c, t-pow.c,\n\tt-powm.c, t-powm_ui.c, t-root.c, t-sqrtrem.c, t-tdiv.c,\n\tt-tdiv_ui.c: Use RANDS, initialized by tests_rand_start.\n\n\t* tests/mpz/t-pow.c: New file, being t-pow_ui renamed and with some\n\tfurther tests added.\n\t* tests/mpz/t-pow_ui.c: Remove file.\n\t* tests/mpz/Makefile.am (check_PROGRAMS): Update.\n\n\t* tests/t-modlinv.c: Don't use urandom.h.\n\t* tests/mpz/bit.c, tests/mpz/t-scan.c: Ditto.\n\t* tests/mpq/t-cmp.c, tests/mpq/t-cmp_ui.c, tests/mpq/t-get_d.c: Ditto.\n\t* tests/mpf/reuse.c, t-add.c, t-conv.c, t-dm2exp.c, t-muldiv.c,\n\tt-sqrt.c, t-sub.c: Ditto.\n\n\t* tests/misc.c (tests_rand_start, tests_rand_end): New functions.\n\t(tests_start, tests_end): Use them.\n\t(urandom): New function.\n\t* tests/tests.h: Add prototypes.\n\n\t* mpz/random.c: Rewrite using mpz_urandomb and RANDS.\n\t* mpn/generic/random.c: Rewrite using _gmp_rand and RANDS.\n\t* mpn/generic/random2.c: Use RANDS not random() etc.\n\n\t* gmp-impl.h (__gmp_rands, __gmp_rands_initialized): Add externs.\n\t(gmp_randstate_ptr): New typedef.\n\t(RANDS, RANDS_CLEAR): New macros.\n\n\t* rands.c: New file.\n\t* Makefile.am (libgmp_la_SOURCES): Add it.\n\n\t* configure.in (mpn_objs_in_libmp): New AC_SUBST.\n\t* Makefile.am (libmp_la_DEPENDENCIES): Use it.\n\n2001-03-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa64/udiv_qrnnd.asm: New file.\n\n2001-03-01  Kevin Ryde  <kevin@swox.se>\n\n\t* mpbsd/rpow.c: New file.\n\t* mpbsd/Makefile.am (libmpbsd_la_SOURCES): Add it\n\t(nodist_libmpbsd_la_SOURCES): Remove pow_ui.c.\n\t* Makefile.am (MPBSD_OBJECTS): Add rpow.lo, remove pow_ui.lo.\n\t(libmp_la_DEPENDENCIES): Add mpz/n_pow_ui.lo.\n\n\t* mpz/ui_pow_ui.c: Rewrite using mpz_n_pow_ui.\n\t* mpz/pow_ui.c: Ditto, and no longer provide rpow for mpbsd.\n\n\t* mpz/n_pow_ui.c: New file, rewrite of pow_ui.c and ui_pow_ui.c.  Use\n\tless temporary memory, strip factors of 2 from the base, use mpn_mul_2\n\tif available.\n\t* mpz/si_pow_ui.c: New file.\n\t* mpz/Makefile.am (libmpz_la_SOURCES): Add them.\n\t* Makefile.am (MPZ_OBJECTS): Ditto.\n\t* gmp-impl.h (mpz_n_pow_ui): Add prototype.\n\t* gmp-h.in (mpz_si_pow_ui): Add prototype.\n\t* gmp.texi (Integer Exponentiation): Add mpz_si_pow_ui.\n\n\t* acinclude.m4 (GMP_C_SIZES): Add BITS_PER_ULONG.\n\tCorrection to mp_limb_t working check.\n\t* configure.in (limb_chosen): New variable.\n\t* tests/t-constants.c (BITS_PER_ULONG): Check this value.\n\tAdd some reminders about tests that fail on Cray.\n\n\t* tests/refmpn.c (refmpn_mul_2): New function.\n\t* tests/refmpz.c (refmpz_pow_ui): Copied from tests/mpz/t-pow_ui.c\n\t* tests/tests.h: Add prototypes.\n\n\t* configure.in (none-*-*): Add ABI=longlong.\n\t* doc/configuration (Long long limb testing): Describe it.\n\n\t* gmp.texi (Low-level Functions): Move some commented out remarks ...\n\t* mpn/generic/mul_basecase.c: ... to here.\n\n\t* mpn/x86/README: Note \"%=\" as an alternative to \"1:\" in __asm__.\n\n\t* tests/trace.c (mp_trace_start): Print \"bin\" for binary.\n\n\t* mpn/generic/dump.c: Add a couple of casts to keep gcc quiet.\n\n\t* gmp-h.in (mpn_incr_u, mpn_decr_u): Add parens around arguments.\n\n\t* mpbsd/mout.c, mpbsd/mtox.c (num_to_text): Remove unused variable.\n\n\t* mpfr/set_d.c (mpfr_get_d2): Declare \"q\" for 64-bit limbs.\n\n2001-02-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa64w/udiv_qrnnd.asm: Tune.\n\n2001-02-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa64w/udiv_qrnnd.asm: New file.\n\n2001-02-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h (arm): Optimize sub_ddmmss by testing for constant\n\toperands.\n\t* mpn/arm/invert_limb.asm: New file.\n\n2001-02-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/lshift.c: Rewrite.\n\t* mpn/generic/rshift.c: Rewrite.\n\n\t* longlong.h: Use UWtype for external interfaces that expect mp_limb_t.\n\n\t* longlong.h (arm): #define invert_limb.\n\n\t* mpn/arm: Make labels have local scope.\n\n\t* configure.in (arm*-*-*): Set extra_functions.\n\t* longlong.h (arm): #define udiv_qrnnd.\n\t* mpn/arm/udiv.asm: New file.\n\n2001-02-24  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/many.pl: Add mpn_count_leading_zeros, mpn_count_trailing_zeros\n\tand mpn_invert_limb.  Add count_leading_zeros, count_trailing_zeros\n\tfrom a .h file.  Correction to modexact_1_odd prototype.  Support\n\tansi2knr.\n\t* tune/speed.h, tune/common.c: Consequent changes.\n\n\t* demos/expr/*: Make a few more functions available in expressions,\n\tcreate only libexpr.a, misc minor updates.\n\n\t* mpn/Makeasm.am: Add some comments about suffix ordering.\n\n\t* tests/refmpn.c (rshift_make, lshift_make): No need to compare\n\tunsigned to zero.\n\n\t* mpq/mul.c: Detect and optimize squaring.\n\n2001-02-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/mips3: Convert files to `.asm'.\n\n\t* mpn/arm: Convert files to `.asm'.  Misc cleanups.\n\t* mpn/arm/submul_1.asm: New file.\n\n2001-02-21  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/tuneup.c (all): Only one compiler print should match, no need\n\tfor #undef PRINTED_COMPILER.\n\n\t* mpfr/mpfr.h (mpfr_sgn): Use mpfr_cmp_ui (patch from Paul).\n\n\t* mpz/fib_ui.c: Update some remarks about alternative algorithms.\n\t* gmp.texi (Fibonacci Numbers Algorithm): Ditto.\n\t(Assigning Floats): Clarify mpf_swap swaps the precisions too.\n\t(Low-level Functions): Try to be clearer about negative cofactors.\n\n2001-02-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/copyi.asm: Streamline for small operands.\n\t* mpn/sparc64/add_n.asm: Likewise.\n\t* mpn/sparc64/sub_n.asm: Likewise.\n\n\t* mpn/sparc64/copyd.asm: New file.\n\n2001-02-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/lshift.asm: Rewrite.\n\t* mpn/sparc64/rshift.asm: Rewrite.\n\n2001-02-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/add_n.asm: Rewrite using `majority' logic.\n\t* mpn/sparc64/sub_n.asm: Likewise.\n\n\t* tune/tuneup.c (all): Recognise DECC and MIPSpro compilers.\n\n\t* mpn/pa64/sqr_diagonal.asm: Use PROLOGUE/EPILOGUE.\n\t* mpn/pa642/sqr_diagonal.asm: Likewise.\n\n\t* configure.in (HAVE_ABI_$abi): Disable for now.\n\n\t* mpn/asm-defs.m4 (PROLOGUE): Use LABEL_SUFFIX.\n\n\t* acinclude.m4 (GMP_ASM_ATTR): New check, for hppa oddities.\n\n2001-02-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/hppa/hppa1_1/gmp-mparam.h: New file.\n\t* mpn/hppa/hppa2_0/gmp-mparam.h: New file.\n\n\t* mpn/pa64/sqr_diagonal.asm: New file.\n\t* mpn/pa64w/sqr_diagonal.asm: New file.\n\t* mpn/hppa/hppa1_1/sqr_diagonal.asm: New file.\n\t* mpn/hppa/hppa2_0/sqr_diagonal.asm: New file.\n\n\t* mpn/sparc32/v9/add_n.asm: Use `fitod' instead of `fxtod' for dummy\n\tFA-pipeline insns.\n\t* mpn/sparc32/v9/sub_n.asm: Likewise.\n\n2001-02-18  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Known Build Problems): Notes on make, $* and K&R, misc\n\ttweaks elsewhere.\n\t(Low-level Functions): Use {} notation in mpn_sqrtrem.\n\t(Basecase Multiplication): Mention BASECASE_SQR_THRESHOLD.\n\n\t* mpfr/isnan.c (mpfr_number_p): Infinity is not a number.\n\t* mpfr/out_str.c: Pass strlen+1 for the block size to free.\n\t* mpfr/get_str.c: Correction for realloc to strlen+1.\n\n\t* acinclude.m4 (GMP_C_SIZES): Generate an error if mp_limb_t doesn't\n\tseem to work for some reason.\n\n2001-02-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc32/v9/gmp-mparam.h: Retune.\n\n\t* mpn/sparc32/v9/add_n.asm: New file.\n\t* mpn/sparc32/v9/sub_n.asm: New file.\n\n\t* mpn/sparc32/v9/mul_1.asm: Tune function entry.\n\t* mpn/sparc32/v9/addmul_1.asm: Likewise.\n\t* mpn/sparc32/v9/submul_1.asm: Likewise.\n\n\t* mpn/sparc32/v9/sqr_diagonal.asm: New file.\n\n2001-02-16  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in: Fix flags selection when $CC is a compiler known to us.\n\n\t* demos/expr/exprfr.c (e_mpfr_cos, e_mpfr_sin): mpfr_sin_cos now\n\tallows NULL for one parameter.\n\n\t* mpfr/*: Update to 20010215.\n\t* mpfr/trunc.c: Use -DOPERATION scheme, and gmp mpn_zero_p.\n\t* mpfr/sqrt.c: Use plain mpn_sqrtrem, not mpn_sqrtrem_new.\n\t* mpfr/sqrtrem.c: Remove file.\n\t* mpfr/Makefile.am (libmpfr_a_SOURCES): Add isnan.c and set_ui.c,\n\tremove sqrtrem.c and srandom.h.\n\n\t* configfsf.guess: Update to 2001-02-13.\n\t* configfsf.sub: Update to 2001-02-16.\n\t* config.sub (j90, t90): Remove special handing, configfsf.sub now ok.\n\n\t* Makefile.am (MPF_OBJECTS): Add a couple of missing $U's.\n\n\t* tune/tuneup.c: Identify compiler used (GCC and Sun C so far).\n\n2001-02-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc32/v9/mul_1.asm: Change `ld' to `lduw' and `st' to `stw'.\n\t* mpn/sparc32/v9/addmul_1.asm: Likewise.\n\t* mpn/sparc32/v9/submul_1.asm: Likewise.\n\n2001-02-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/mips3/mips.m4: New file.\n\t* configure.in (mips*-*-irix[6789]*): Use mips3/mips.m4.\n\n\t* mpn/powerpc64/sqr_diagonal.asm: New file.\n\n\t* mpn/mips3/sqr_diagonal.asm: New file.\n\n2001-02-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/sqr_diagonal.asm: New file.\n\n\t* mpn/generic/sqr_basecase.c: Remove declaration of mpn_sqr_diagonal.\n\tFix typo in header comment.\n\n2001-02-12  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/mul.c, mpn/generic/mul_n.c, gmp-impl.h: Use\n\tmpn_mul_basecase for squaring below new BASECASE_SQR_THRESHOLD.\n\t* tune/tuneup.c gmp-impl.h: Tune BASECASE_SQR_THRESHOLD.\n\n\t* Makefile.am (libgmp.la, libmp.la): Revert change to build from\n\tmpn/libmpn.la etc, go back to explicitly listed objects.\n\n\t* configure.in: Recognise sparc64-*-*, not just sparc64-*-linux*.\n\n2001-02-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/asm-defs.m4 (sqr_diagonal): New define_mpn.\n\n\t* mpn/alpha/sqr_diagonal.asm: New file.\n\n2001-02-11  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Low-level Functions): Note mpn_get_str clobbers its input\n\tplus 1 extra limb.\n\n\t* mpfr/add.c,agm.c,exp2.c,exp3.c,generic.c,log2.c,pi.c,print_raw.c,\n\tset_d.c,sin_cos.c,sqrtrem.c,sub.c: Apply some tweaks for K&R.\n\t* tests/mpz/reuse.c, tests/mpq/t-md_2exp.c, demos/pexpr.c,\n\tdemos/expr/t-expr.c: Ditto.\n\n\t* configure.in (HAVE_ABI_$abi): New define in config.m4.\n\n\t* gmp-impl.h (mpn_sqr_diagonal): Add prototype and define.\n\t* tune/speed.c,speed.h,common.c,many.pl: Add measuring of\n\tmpn_sqr_diagonal.\n\n\t* gmp.texi, acinclude.m4: Mention x86 solaris 2.7 has the reg->reg\n\tmovq bug the same as 2.6.\n\n\t* mpfr/Makefile.am (EXTRA_DIST): Add mpfr-test.h and mpf2mpfr.h.\n\n\t* mpn/x86/README: Merge contents of README.family.\n\t* mpn/x86/README.family: Remove file.\n\n\t* mpn/Makefile.am (nodist_libdummy_la_SOURCES): Add mode1o, gcd_finda,\n\tinvert_limb, sqr_diagonal; remove mod_1_rs; sort alphabetically.\n\n2001-02-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (gmp_mpn_functions_optional): List sqr_diagonal.\n\n\t* mpn/powerpc32/aix.m4: Use unnamed csects.\n\t* mpn/powerpc64/aix.m4: Likewise.\n\n\t* acconfig.h: Add #undef of mpn_sqr_diagonal.\n\tRemove lots of spacing.\n\n\t* configure.in (syntax testing section): Match power* instead of\n\tpowerpc*.\n\t* mpn/power: Convert files to `.asm'.\n\tPrefix umul_ppmm and sdiv_qrnnd.\n\tUpdate some comments.\n\n2001-02-09  Kevin Ryde  <kevin@swox.se>\n\n\t* acconfig.h: Add HAVE_NATIVE_mpn_modexact_1_odd and\n\tHAVE_NATIVE_mpn_modexact_1c_odd.\n\n\t* configure.in (CCAS): Don't override a user selection.\n\n\t* mpq/cmp_ui.c: DIVIDE_BY_ZERO if den2==0.\n\n2001-02-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/sqr_basecase.c: Use mpn_sqr_diagonal when appropriate.\n\n2001-02-07  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Low-level Functions): mpn_preinv_mod_1 now undocumented.\n\n\t* mpn/generic/random2.c (myrandom): Use rand() on mingw.\n\n\t* mpn/alpha/gmp-mparam.h: Update tuned parameters.\n\n2001-02-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev6/gmp-mparam.h: Retune.\n\n2001-02-05  Kevin Ryde  <kevin@swox.se>\n\n\t* Makefile.am (libgmp, libmp): Construct from mpn/libmpn.la etc rather\n\tthan explicitly listed objects.\n\n\t* urandom.h: Use rand() on mingw.\n\n\t* mpn/powerpc64/lshift.asm,addsub_n.asm: Use r1 not 1.\n\n2001-02-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/ia64/copyi.asm: New file.\n\t* mpn/ia64/copyd.asm: New file.\n\n2001-02-04  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/alpha/ev5/gmp-mparam.h, mpn/mips3/gmp-mparam.h,\n\tmpn/powerpc32/gmp-mparam.h, mpn/powerpc64/gmp-mparam.h,\n\tmpn/sparc64/gmp-mparam.h, mpn/x86/*/gmp-mparam.h:\n\tUpdate tuned parameters.\n\n\t* mpn/x86/i486: New directory.\n\t* configure.in (i486-*-*): Use it.\n\t* mpn/x86/i486/gmp-mparam.h: New file.\n\n\t* mpn/x86/pentium/mode1o.asm: New file.\n\t* mpn/x86/p6/mode1o.asm: New file.\n\n\t* tune/many.pl: Use $(ASMFLAGS_PIC) and $(CFLAGS_PIC).\n\n\t* gmp.texi (Integer Division): Another rewording of 2exp divisions.\n\n2001-02-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/arm/gmp-mparam.h: Tune.\n\n\t* mpn/ia64/popcount.asm: Put a `;;' break at end of main loop.\n\n\t* configure.in (arm*-*-*): Set gcc_cflags in order to pass\n\t$fomit_frame_pointer.\n\n\t* tests/mpz/t-mul.c (base_mul): Remove an unused variable.\n\n2001-02-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (TIME): New macro.\n\t(main): Use TIME--print timing more accurately.\n\t(setup_error_handler): Increase RLIMIT_DATA to 16 Mibyte.\n\n\t* longlong.h (arm): Add __CLOBBER_CC to add_ssaaaa and sub_ddmmss.\n\n2001-02-02  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in: Don't remove gmp-mparam.h and mpn source links under\n\t--no-create since in that case they're not re-created.\n\n\t* demos/expr: New directory.\n\t* Makefile.am (SUBDIRS, allprogs): Add it.\n\t* demos/expr/README, Makefile.am, expr.c, exprv.c, exprz.c, exprza.c,\n\texprq.c, exprqa.c, exprfa.c, exprf.c, exprfr.c, exprfra.c, expr.h,\n\texpr-impl-h.in, run-expr.c, t-expr.c: New files.\n\t* configure.in: Generate demos/expr/Makefile & demos/expr/expr-impl.h.\n\n\t* Makefile.am: Remove mpfr from main libgmp.\n\t* mpfr/Makefile.am: Build and install separate libmpfr.a.\n\t* mpfr/*: Update to mpfr 2001.\n\n\t* gmp-h.in (__GNU_MP_VERSION_MINOR): Bump to 2.\n\t* Makefile.am (libtool -version-info): Bump appropriately.\n\t* NEWS: Updates.\n\n\t* tune/divrem1div.c, tune/divrem1inv.c, tune/divrem2div.c,\n\ttune/divrem2inv.c: Renamed from divrem_1_div.c, divrem_1_inv.c,\n\tdivrem_2_div.c, divrem_2_inv.c, to be unique in DOS 8.3 filenames.\n\t* tune/Makefile.am (libspeed_la_SOURCES): Update.\n\n\t* mpn/x86/*/README, mpn/x86/README.family: Misc updates.\n\t* tune/README: Misc updates.\n\t* doc/configuration: Misc updates.\n\n\t* mpn/x86/pentium/mmx/gmp-mparam.h: Change UDIV_PREINV_TIME to\n\tUDIV_NORM_PREINV_TIME.\n\n\t* mpz/pprime_p.c: Use ASSERT_ALWAYS instead of abort.\n\n\t* rand.c (__gmp_rand_lc_scheme): Add \"const\".\n\t(struct __gmp_rand_lc_scheme_struct): Make astr \"const char *\".\n\n\t* demos/calc/calc.y, demos/calc/calclex.l: Add kron function.\n\n\t* tests/devel/try.c: Partial rewrite, new scheme of function types,\n\tallow result validation functions, add sqrtrem and jacobi testing.\n\t* tune/many.pl: Corresponding updates.\n\t* tests/devel/Makefile.am: Add a convenience rule for libtests.la.\n\n\t* tests/refmpz.c: New file.\n\t* tests/Makefile.am: Add it.\n\t* tests/misc.c (mpz_erandomb, mpz_erandomb_nonzero): New functions.\n\t* tests/tests.h: Add prototypes.\n\n\t* mpn/x86/k6/cross.pl: Add a couple more exceptions.\n\n\t* gmp.texi: Don't use @nicode{'\\0'}, it doesn't come out right in tex.\n\t(Introduction to GMP): Mention Cray vector systems.\n\t(Build Options): Describe --enable-mpfr, refer to its manual.  Add\n\tCrays under supported CPUs.\n\t(Debugging): Add notes on source file paths.\n\t(Autoconf): New section.\n\t(Assigning Integers): Note truncation by mpz_set_d, mpz_set_q and\n\tmpz_set_f.\n\t(Converting Integers): Note the size mpz_get_str allocates.\n\t(Floating-point Functions): Rewrite introduction, clarifying some\n\tpoints about precision handling.\n\t(Converting Floats): Note the size mpf_get_str allocates, and that it\n\tgives an empty string for zero.  Add mpf_get_si and mpf_get_ui.\n\t(Float Comparison): Give the formula mpf_reldiff calculates.\n\t(Miscellaneous Float Functions): Add mpf_integer_p and mpf_fits_*_p.\n\t(Random Number Functions): Misc rewordings for clarity.\n\t(Random State Initialization): Ditto.\n\t(Custom Allocation): Remove note on deallocate_function called with 0,\n\tmisc rewording and clarifications.\n\t(Exact Remainder): New section.\n\t(Binary GCD): A few words on initial reduction using division.\n\t(Accelerated GCD): Refer to exact remainder section.\n\t(Extended GCD): Extra remarks on single versus double selection.\n\t(Jacobi Symbol): Update for mpz/jacobi.c rewrite and modexact_1_odd.\n\t(Modular Powering Algorithm): Refer to exact remainder section.\n\t(Assembler SIMD Instructions): Update remarks on MMX.\n\t(Contributors): Amend to \"Divide and Conquer\" division.\n\t(References): Tweak some formatting.  Add \"Proof of GMP Fast Division\n\tand Square Root Implementations\" by Paul Zimmermann.\n\n2001-01-31  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in: Don't ever pass -mips3; let ABI flags imply ISA.\n\n2001-01-31  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/time.c: Remove unnecessary longlong.h.\n\t(speed_endtime): Add some extra diagnostics.\n\n\t* tests/mpz/t-fdiv_ui.c, tests/mpz/t-tdiv_ui.c: Use unsigned long for\n\tthe divisor, not mp_limb_t.\n\t* tests/mpz/t-jac.c (try_base): Use %llu for long long limb.\n\t* tests/trace.c: Add <string.h> for strlen.\n\n\t* tune/freq.c (speed_cpu_frequency_proc_cpuinfo): Ignore \"cycle\n\tfrequency\" of 0, allow \"BogoMIPS\" as well as \"bogomips\".\n\n\t* macos/Makefile.in: Add mpf/fits_s.c and mpf/fits_u.c objects.\n\n2001-01-30  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h: Add add_ssaaaa and sub_ddmmss for 64-bit sparc.\n\n2001-01-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/addmul_1.asm: Prefix registers with an `r'.\n\t* mpn/powerpc64/submul_1.asm: Likewise.\n\t* mpn/powerpc64/mul_1.asm: Likewise.\n\n\t* configure.in (alpha*-*-*): Amend last change to handle pca*.\n\n2001-01-29  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/speed.h (SPEED_ROUTINE_INVERT_LIMB_CALL): Don't let the\n\tcompiler optimize everything away.\n\n\t* tune/speed.c, tune/speed.h, tune/common.c, tune/Makefile.am: Measure\n\toperator_div, operator_mod, mpn_divrem_2_div, mpn_divrem_2_inv,\n\tmpn_sb_divrem_m3, mpn_sb_divrem_m3_div, mpn_sb_divrem_m3_inv,\n\tmpn_dc_divrem_sb_div, mpn_dc_divrem_sb_inv.\n\t* tune/divrem_2_div.c, tune/divrem_2_inv.c, tune/sb_div.c,\n\ttune/sb_inv.c: New files.\n\n\t* tune/tuneup.c, gmp-impl.h, tune/speed.h, tune/common.c,\n\ttune/Makefile.am: Tune SB_PREINV_THRESHOLD and DIVREM_2_THRESHOLD.\n\n\t* mpn/generic/divrem_2.c: Use new DIVREM_2_THRESHOLD.\n\t* mpn/generic/sb_divrem_mn.c: Use new SB_PREINV_THRESHOLD.\n\n\t* mpn/x86/p6/mmx/lshift.asm, mpn/x86/p6/mmx/rshift.asm: New files,\n\tjust m4 include()ing the P55 code.\n\t* configure.in (pentium[23]-*-*): Remove x86/pentium/mmx from path.\n\n2001-01-27  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (AC_CHECK_FUNCS): Add srand48.\n\t* tune/speed.c: Use this test.\n\n\t* acinclude.m4 (GMP_GCC_MARCH_PENTIUMPRO): Allow \"egcs-\" prefix on gcc\n\t--version, warn if the format is unrecognised.\n\t(GMP_COMPARE_GE): Guard against empty $1 not only on last arg.\n\t(GMP_INIT, GMP_FINISH, GMP_PROG_M4): Obscure or eliminate literal\n\t\"dnl\"s since autoconf thinks they indicate faulty macros.\n\n\t* mpz/get_str.c, mpf/get_str.c: Make allocated string block exactly\n\tstrlen(str)+1 bytes.\n\t* mpz/dump.c, mpf/dump.c, tests/mpz/convert.c: Use this size when\n\tfreeing.\n\t* tests/mpf/t-conv.c: Ditto, and ensure x==0 is exercised.\n\n\t* tests/mpz/t-fits.c: New file.\n\t* tests/mpz/Makefile.am: Add it.\n\n\t* tests/mpf/t-fits.c: New file.\n\t* tests/mpf/t-get_si.c: New file.\n\t* tests/mpf/t-int.c: New file.\n\t* tests/mpf/Makefile.am: Add them.\n\n\t* mpf/fits_s.c: New file.\n\t* mpf/fits_u.c: New file.\n\t* mpf/get_si.c: New file.\n\t* mpf/get_ui.c: New file.\n\t* mpf/int_p.c: New file.\n\t* Makefile.am, mpf/Makefile.am: Add them.\n\t* gmp-h.in (mpf_fits_*_p, mpf_get_si, mpf_get_ui, mpf_integer_p): Add\n\tprototypes.\n\n\t* tests/memory.c (tests_allocate, tests_reallocate): Guard against\n\tsize==0.\n\n\t* tests/mpz/*.c, tests/mpq/*.c, tests/mpf/*.c: Uses tests_start and\n\ttests_end.\n\n\t* gmp-impl.h (USE_LEADING_REGPARM): Fix conditionals.\n\n2001-01-23  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in, mpn/Makeasm.am (ASMFLAGS_PIC): New substitution,\n\tallowing -DPIC to be suppressed on cygwin.\n\t(CFLAGS_PIC): New substitution, use it and $(CCAS) directly, rather\n\tthan $(LIBTOOL), avoiding a problem with FreeBSD 2.2.8.\n\n\t* mpn/x86/k6/mode1o.asm, mpn/x86/k7/mode1o.asm: Remove an unnecessary\n\t+[.-L(here)] from _GLOBAL_OFFSET_TABLE_, avoids a segv from gas 1.92.3.\n\t* mpn/x86/README.family: Add notes on the problem.\n\n2001-01-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (alpha*-*-*): Default `flavour' to ev4.\n\n2001-01-19  Kevin Ryde  <kevin@swox.se>\n\n\t* assert.c, gmp-impl.h (__gmp_assert_fail): Change return type to\n\tvoid, since it's no longer used in expressions.\n\n\t* mpn/x86/addsub_n.S: Remove file, since it doesn't work and it upsets\n\ttune/many.pl.\n\n\t* mpz/jacobi.c: Rewrite, but still binary algorithm; accept zero and\n\tnegative denominators; merge mpz_jacobi and mpz_legendre, add\n\tmpz_kronecker; use mpn directly, add special cases for size==1.\n\t* gmp.texi (Number Theoretic Functions): Update.\n\t* gmp-h.in (mpz_kronecker): Add prototype.\n\t* gmp-impl.h (USE_LEADING_REGPARM): New macro.\n\t* tests/mpz/t-jac.c: Test mpz_kronecker.\n\t* mpz/legendre.c: Remove file.\n\t* Makefile.am, mpz/Makefile.am: Update.\n\n\t* longlong.h (alpha count_leading_zeros): Use __attribute__ ((const))\n\twhen possible, add parameter to prototype.\n\t(ia64 udiv_qrnnd): Use for all compilers, not just gcc.\n\t(pentium count_trailing_zeros): Use count_leading_zeros.\n\n\t* acinclude.m4 (GMP_C_ATTRIBUTE_CONST, GMP_C_ATTRIBUTE_NORETURN): New\n\tmacros.\n\t* configure.in: Use them.\n\t* gmp-impl.h (ATTRIBUTE_CONST, ATTRIBUTE_NORETURN): New macros.\n\t(mpn_invert_limb): Add ATTRIBUTE_CONST.\n\t(__gmp_assert_fail): Add ATTRIBUTE_NORETURN.\n\n2001-01-18  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-h.in, gmp-impl.h (__gmp_allocate_func, __gmp_reallocate_func,\n\t__gmp_free_func): Move prototypes from gmp-impl.h to gmp-h.in, for the\n\tbenefit of gmp++.h.\n\n\t* gmp-impl.h, tests/misc.c, tests/tests.h: Move MPZ_SET_STR_OR_ABORT\n\tand MPF_SET_STR_OR_ABORT to mpz_set_str_or_abort and\n\tmpf_set_str_or_abort in libtests.\n\t* tests/mpz/convert.c, tests/mpz/t-bin.c, tests/mpz/t-get_si.c,\n\ttests/mpz/t-jac.c, tests/mpz/t-misc.c, tests/mpq/t-md_2exp.c,\n\ttests/mpq/t-set_f.c, tests/mpf/t-conv.c, tests/mpf/t-misc.c: Update.\n\n\t* mpn/generic/sqrtrem.c: Use MPN_COPY_INCR (for when rp==NULL).\n\n\t* tests/mpz/reuse.c: Only run mpz_divexact_gcd on positive divisors.\n\n2001-01-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (main): Accept -vml option.\n\t(fns): List `hamdist', `pow', `nextprime'.\n\t(mpz_eval_expr): Return -1 for `popc' of negative.\n\t(mpz_eval_expr): Handle `hamdist', `pow', `nextprime'.\n\n2001-01-15  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/alpha/ev5/mode1o.c: New file.\n\n\t* tune/freq.c (speed_cpu_frequency_measure): Check cycles_works_p\n\tbefore running speed_cyclecounter.\n\t* tune/speed.h (cycles_works_p): Add prototype.\n\n2001-01-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/rand/t-rand.c (farr): Fix typo.\n\t(zarr): Fix typo.\n\n2001-01-12  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/kronsz.c: Don't depend on right shifting a negative.\n\n\t* mpn/x86/gmp-mparam.h: New file.\n\n\t* mpn/x86/pentium/mmx/mul_1.asm: New file.\n\n2001-01-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/kronsz.c: Temporary workaround for Cray right shift oddities.\n\tExplicitly compare against zero in tests.\n\n2001-01-10  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/kronzs.c: Don't depend on right shifting a negative.\n\n2001-01-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/t-constants.c: Disable some undefined tests.\n\t(CHECK_MAX_S): Remove workaround for gcc 2.95.2 bug recently added.\n\n2001-01-09  Kevin Ryde  <kevin@swox.se>\n\n\t* tests/t-constants.c: Add more diagnostics.\n\t(CHECK_MAX_S): Fix for gcc 2.95.2 -mpowerpc64 -maix64.\n\n\t* mpn/x86/k6/mode1o.asm: New file.\n\t* mpn/x86/k7/mode1o.asm: New file.\n\n\t* mpn/asm-defs.m4 (modexact_1_odd, modexact_1c_odd): New define_mpn's.\n\t(__clz_tab, modlimb_invert_table, PROLOGUE, EPILOGUE): Add asserts for\n\tGSYM_PREFIX.\n\t* mpn/x86/x86-defs.m4 (Zdisp): Add a movzbl.\n\n\t* tests/mpz/t-jac.c (check_a_zero): New test.\n\t(check_squares_zi): Fix to use (a^2/b), not (a*b/b); revert last\n\tchange avoiding a,b=0, both are fine.\n\t(try_2den): Don't use mpz_kronecker_ui for the expected answer.\n\t(try_*): Call abort rather than exit.\n\n\t* mpz/kronzu.c, mpz/kronzs.c: Fix for a=0.\n\n\t* tune/tuneup.c (USE_PREINV_MOD_1): Fix to use new DATA_HIGH_LT_R.\n\n2001-01-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* urandom.h: Amend 2000-11-21 change to also handle cygwin.\n\n2001-01-08  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/many.pl: Updates for move to tests/devel, add modexact_1_odd,\n\tdon't assume C files can't have carry-in entrypoints, remove\n\t$(TRY_TESTS_OBJS) now in libtests.\n\n\t* tests/devel/try.c, tests/refmpn.c, tests/tests.h: Remove\n\tmpn_mod_1_rshift testing.\n\n\t* tune/tuneup.c (fft_step_size): Test for overflow using the actual\n\tmp_size_t, don't use BITS_PER_INT.\n\n\t* tune/speed.c (r_string): \"r\" is a limb, use BITS_PER_MP_LIMB and\n\tchange LONG_ONES to LIMB_ONES.\n\t* tune/time.c (M_2POWU): Use INT_MAX rather than BITS_PER_INT.\n\n\t* extract-dbl.c (BITS_PER_PART): Use BITS_PER_MP_LIMB not\n\tBITS_PER_LONGINT.\n\n\t* mpz/inp_raw.c, mpz/out_raw.c: Add private defines of BITS_PER_CHAR.\n\t* mpz/fac_ui.c, tests/mpz/t-fac_ui.c: Don't use BITS_PER_LONGINT.\n\t* tests/mpz/t-get_si.c: Don't use BITS_PER_LONGINT, do the LONG_MAX\n\ttests with some explicit code.\n\n\t* mpn/*/gmp-mparam.h, acinclude.m4, tests/t-constants.c\n\t(BITS_PER_LONGINT, BITS_PER_INT, BITS_PER_SHORTINT, BITS_PER_CHAR):\n\tRemove defines, remove probings, remove tests.\n\n\t* tune/tuneup.c (MODEXACT_1_ODD_THRESHOLD): Add tuning.\n\n\t* tune/speed.c,speed.h,common.c: Add measuring of mpn_modexact_1_odd,\n\tmpn_gcd_finda, and an \"N\" form for mpn_gcd_1.\n\n\t* tests/mpz/t-jac.c (check_squares_zi): Ensure random a,b != 0.\n\n2001-01-07  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (gmp_mpn_functions): Add mode1o, remove mod_1_rs.\n\n\t* mpn/generic/mod_1_rs.c: Remove file, no longer needed.\n\t* gmp-h.in (mpn_mod_1_rshift): Remove prototype and define.\n\n\t* mpq/set_f.c: Use MPN_STRIP_LOW_ZEROS_NOT_ZERO.\n\n\t* mpz/kronzu.c, mpz/kronzs.c, mpz/kronuz.c, mpz/kronsz.c: Use\n\tmpn_modexact_1_odd, new style MPN_STRIP_LOW_ZEROS_NOT_ZERO, and new\n\tJACOBI macros.  Various rearrangements supporting all this.\n\n\t* mpn/generic/gcd_1.c: Use mpn_modexact_1_odd, reduce u%v if u much\n\tbigger than v when size==1, some rearrangements supporting this.\n\n\t* gmp-impl.h (JACOBI_*): More macros, add some casts to \"int\".\n\t(MPN_STRIP_LOW_ZEROS_NOT_ZERO): Add a \"low\" parameter.\n\t(mpn_modexact_1_odd, mpn_modexact_1c_odd): Add prototype and defines.\n\t(MODEXACT_1_ODD_THRESHOLD): New threshold.\n\t(MPN_MOD_OR_MODEXACT_1_ODD, JACOBI_MOD_OR_MODEXACT_1_ODD): New macros.\n\n\t* mpn/generic/mode1o.c: New file.\n\n\t* tests/mpz/reuse.c: Add testing of mpz_divexact_gcd.\n\t* tests/mpz/t-fac_ui.c: Use libtests for memory leak checking.\n\t* tests/mpz/t-fib_ui.c: Add a usage comment.\n\n\t* tests/mpz/bit.c: Use libtests.\n\t* tests/mpz/t-scan.c: Remove unused subroutines.\n\t* tests/devel/try.c: Use libtests, define PROT_NONE if the system\n\tdoesn't.\n\n\t* tests/spinner.c, tests/x86check.c: Use tests.h.\n\t* tests/trace.c: Use tests.h, add mpf_trace.\n\t* tests/refmpn.c: Use tests.h, add refmpn_malloc_limbs_aligned,\n\trefmpn_tstbit, refmpn_neg.\n\n\t* tune/common.c, tune/speed.h: Update for functions moved to\n\ttests/misc.c.\n\n\t* tune/Makefile.am, tests/mpz/Makefile.am, tests/mpq/Makefile.am,\n\ttests/mpf/Makefile.am: Use tests/libtests.la.\n\n\t* configure.in (AC_OUTPUT): Update for new directories.\n\t(x86 CALLING_CONVENTIONS_OBJS): Use .lo for libtests.la, allow\n\tansi2knr on x86check.c.\n\n\t* tests/Makefile.am: Establish new libtests.la convenience library,\n\tadd mpz, mpq, mpf, mpbsd subdirectories.\n\t* tests/tests.h: New file.\n\t* mpn/tests/ref.h,try.h: Remove files, now in tests.h.\n\n\t* tests/mpf/ref.c: Move to tests/refmpf.c, rename functions to refmpf.\n\t* tests/mpf/t-add.c, tests/mpf/t-sub.c: Use libtests.\n\t* tests/mpf/Makefile.am: Update.\n\n\t* tests/memory.c: New file.\n\t* tests/misc.c: New file, a few subroutines from the test programs.\n\n\t* mpz/tests, mpq/tests, mpf/tests, mpbsd/tests: Move directories to\n\ttests/mpz etc.\n\t* mpz/Makefile.am, mpq/Makefile.am, mpf/Makefile.am, mpbsd/Makefile.am\n\t(SUBDIRS): Remove.\n\n\t* tests/devel: New directory.\n\t* mpn/tests/*.c: Move programs to tests/devel.\n\t* mpn/tests/Makefile.am, mpn/tests/README: Move to tests/devel, update.\n\n\t* mpn/tests/ref.c: Move to tests/refmpn.c.\n\t* mpn/tests/spinner.c,trace.c,x86call.asm,x86check.c: Move to tests\n\tdirectory.\n\n\t* tests/t-constants.c: Add checks of HIGHBIT, MAX and MIN constants,\n\tsimplify ANSI vs K&R stringizing, use correct printf format types, do\n\tall tests before aborting.\n\n2001-01-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/cray/ieee/gmp-mparam.h: Retune.\n\n2001-01-05  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (mp.h): Only create this under --enable-mpbsd.\n\n\t* demos/calc: New subdirectory, move demos/calc* to it.\n\t* demos/calc/Makefile.am: New file, split from demos/Makefile.am.\n\t* demos/Makefile.am: Update.\n\t* configure.in (AC_OUTPUT): Add demos/calc/Makefile.\n\n\t* tests/t-constants.c (CALC_BITS_PER_TYPE etc): Use a run-time test\n\tfor how many bits work in a give type, don't assume bits==8*sizeof.\n\n2001-01-04  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/fits_s.c, mpz/fits_u.c: New files, split from fits.c, use plain\n\tUINT_MAX etc, not MPZ_FITS_UTYPE_SDT etc.\n\t* mpz/fits.c: Remove file.\n\t* mpz/Makefile.am, macos/Makefile.in: Update.\n\n\t* gmp-impl.h (UNSIGNED_TYPE_MAX etc): Remove these generic forms.\n\t(MPZ_FITS_[SU]TYPE_SDT): Remove these.\n\t(UINT_MAX etc): Provide a full set of defaults.\n\t* gmp-h.in (__GMP_MP_SIZE_T_INT): New define.\n\n\t* mpz/tests/t-scan.c: New file.\n\t* mpz/tests/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpz/scan0.c, mpz/scan1.c: Rewrite, don't read beyond allocated\n\tmemory, support negatives, return ULONG_MAX for no bit found.\n\t* gmp.texi (Integer Logic and Bit Fiddling): Update.\n\n2001-01-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/tests/dive.c: Generate test operands using new random functions.\n\t* mpz/tests/io.c: Likewise.\n\t* mpz/tests/logic.c: Likewise.\n\t* mpz/tests/t-2exp.c: Likewise.\n\n\t* stack-alloc.c (__gmp_tmp_alloc): Round `now' to required alignment.\n\n\t* stack-alloc.h (__TMP_ALIGN): Append `L'.\n\n\t* gmp-impl.h: For Cray, #include limits.h.\n\t(LONG_MIN): New #define.\n\t(ULONG_HIGHBIT): #define in terms of ULONG_MAX.\n\t(LONG_HIGHBIT): #define as LONG_MIN.\n\t(USHRT_MAX): New name for USHORT_MAX.\n\t(SHRT_MAX): New name for SHORT_MAX.\n\t(SHRT_MIN): New #define.\n\t(USHORT_HIGHBIT,SHORT_HIGHBIT): Removed.\n\n\t* mpbsd/tests/t-misc.c (check_itom [data]): *SHORT* => *SHRT*;\n\tremove code disabling a test for Cray.\n\n\t* tests/t-constants.c (CHECK_CONSTANT): Cast parameters to long.\n\n\t* mpn/generic/mul_n.c (mpn_kara_sqr_n): Remove unused variable `t'.\n\t(mpn_kara_mul_n): Likewise.\n\n\t* mpz/fac_ui.c (MPZ_SET_1_NZ): Actually use `__z'.\n\n\t* mpz/tests/t-jac.c\n\t(main, check_squares_zi): Generate test operands using new random\n\tfunctions.\n\n\tAll changes below on this date for enabling `make; make check'\n\twith C++ compilers:\n\n\t* mpz/tests/t-pow_ui.c (debug_mp, ref_mpz_pow_ui): Provide prototypes.\n\n\t* mpz/tests/t-mul.c (debug_mp, base_mul, ref_mpz_mul):\n\tProvide prototypes.\n\t(dump_abort): Provide prototype and declare properly for C++.\n\n\t* mpz/tests/t-jac.c: #include stdlib.h and sys/time.h.\n\n\t* mpz/tests/t-fdiv.c\n\t(dump_abort): Provide prototype and declare properly for C++.\n\t(debug_mp): Provide prototype.\n\t* mpz/tests/t-fdiv_ui.c: Likewise.\n\t* mpz/tests/t-gcd.c: Likewise.\n\t* mpz/tests/t-powm.c: Likewise.\n\t* mpz/tests/t-powm_ui.c: Likewise.\n\t* mpz/tests/t-sqrtrem.c: Likewise.\n\t* mpz/tests/t-tdiv_ui.c: Likewise.\n\t* mpz/tests/t-tdiv.c: Likewise.\n\n\t* mpz/tests/t-2exp.c: #include stdlib.h and sys/time.h.\n\tRemove #include of longlong.h.\n\n\t* mpz/tests/io.c: #include config.h, stdlib.h, sys/time.h, and\n\tconditionally unistd.h.\n\n\t* mpz/tests/dive.c: #include stdlib.h and sys/time.h.\n\t(dump_abort): Provide prototype and declare properly for C++.\n\t(debug_mp): Provide prototype.\n\t* mpz/tests/logic.c: Likewise.\n\n\t* mpz/tests/convert.c (debug_mp): Provide prototype.\n\t* mpz/tests/t-root.c (debug_mp): Likewise.\n\n\t* mpz/tests/bit.c: #include stdlib.h and sys/time.h.\n\n\t* mpq/tests/t-get_d.c: #include stdlib.h and sys/time.h.\n\t(dump): Provide prototype and declare properly for C++.\n\n\t* mpq/tests/t-cmp_ui.c: #include stdio.h, stdlib.h and sys/time.h.\n\t(ref_mpq_cmp_ui): Declare properly for C++.\n\n\t* mpq/tests/t-cmp.c: #include stdlib.h and sys/time.h.\n\t(ref_mpq_cmp): Declare properly for C++.\n\t(dump): Delete unused function.\n\n\t* mpf/random2.c (myrandom): New function.\n\t(mpf_random2): Use it.\n\n\t* mpn/generic/random2.c: #include stdlib.h (for random/mrand48).\n\t(myrandom): New function.\n\t(mpn_random2): Use it.\n\n\t* mpf/tests/t-add.c: #include stdlib.h and sys/time.h.\n\t(oo): Remove unused function.\n\t* mpf/tests/t-conv.c: Likewise.\n\t* mpf/tests/t-sub.c: Likewise.\n\t* mpf/tests/t-dm2exp.c: Likewise.\n\t* mpf/tests/t-muldiv.c: Likewise.\n\t* mpf/tests/t-sqrt.c: Likewise.\n\n\t* mpf/tests/reuse.c: #include stdlib.h and sys/time.h.\n\tUse PROTO on some typedefs.\n\t(oo): Remove function.\n\t(dump_abort): Call mpf_dump instead of oo.\n\n\t* mpf/set_str.c: #include stdlib.h (for strtol).\n\n\t* mpf/random2.c: #include stdlib.h (for random/mrand48).\n\t* mpn/alpha/udiv_arnnd: File deleted.\n\n\t* Remove K&R function headers.\n\n2001-01-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul.c: Clean up spacing and indentation.\n\n\t* mpn/generic/mul_fft.c (mpn_fft_add_modF): Use mpn_decr_u.\n\tClean up spacing and indentation.\n\n\t* extract-dbl.c: Generalize to handle smaller limb sizes.\n\n2001-01-01  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpbsd/mout.c: Output newline after \"0\".\n\n2000-12-31  Torbjorn Granlund  <tege@swox.com>\n\n\t* ltmain.sh: Remove space between `#!' and `$SHELL' when generating\n\t`libtool'.\n\n\t* mpbsd/tests/t-misc.c (check_itom): Exclude test for all Cray\n\tvector systems.  Correct comment.\n\n2000-12-31  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (ABI and ISA): New enough gcc needed for mips n32 etc, gcc\n\t2.95 needed for sparc 64-bit ABI, gcc 2.8 needed for -mv8plus.\n\n\t* configure.in ([cjt]90,sv1-cray-unicos*): Preserve user specified\n\tMPN_PATH, amend test program indenting.\n\t(none-*-*): Add -DNO_ASM to gcc to disable longlong.h asm macros in\n\tgeneric C.\n\n\t* config.sub (j90, t90): Preserve these, don't let configfsf.sub turn\n\tthem into c90.\n\n\t* config.guess (m68k-*-nextstep*,m68k-*-openstep*): Don't transform\n\tm68k to m68020, since m68k is already interpreted as 68020.\n\n2000-12-30  Kevin Ryde  <kevin@swox.se>\n\n\t* mpq/neg.c: Rewrite, use mpn, avoid denominator copy if unnecessary.\n\n\t* mpz/tstbit.c: Rewrite, slightly simplified.\n\t* mpz/tests/bit.c (check_tstbit): New test, and add a couple more\n\tdiagnostics elsewhere.\n\n\t* configure.in (x86 gcc_cflags_cpu): Add -m486 for gcc 2.7.2.\n\t(ccbase): Only use a known compiler in eval statements (avoids\n\tproblems with non-symbol characters).\n\t(ccbase): Use GMP_PROG_CC_IS_GNU to identify gcc installed under a\n\tdifferent name.\n\t(cclist): Use same style $abi as other variables.\n\n\t* acinclude.m4 (GMP_PROG_CC_IS_GNU): New macro.\n\t(GMP_GCC_MARCH_PENTIUMPRO): Use $ccbase to identify gcc.\n\t(GMP_ASM_TYPE): Define TYPE to empty, not \"dnl\", when no .type needed.\n\t(GMP_ASM_SIZE): Ditto for SIZE, which ensures EPILOGUE on the last\n\tline of a file doesn't leave a tab and no newline.\n\t(GMP_ASM_UNDERSCORE): Add a prototype for C++.\n\n\t* configure.in (sys/mman.h, mprotect): New tests.\n\t* mpn/tests/try.c: Use them, and HAVE_UNISTD_H too.\n\n\t* configure.in (getopt.h): Remove test.\n\t* tune/speed.c, mpn/tests/try.c (getopt.h): Remove include, since\n\tplain getopt() is in <unistd.h>.\n\n\t* configure.in, gmp-h.in (mips*-*-irix6*): Set limb_n32=longlong\n\trather than using _ABIN32.\n\n2000-12-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/tests/reuse.c: Rename dump_abort => dump.\n\t* mpz/tests/reuse.c: Generate operands using gmp_rand*.\n\t* mpz/tests/convert.c: Likewise.\n\n\t* configure.in: Detect T90-ieee systems; move Cray path\n\tselection to after AC_PROG_CC.  Invoke AC_PROG_CPP.\n\t* mpn/cray/cfp: New directory.  Move cfp specific files here.\n\t* mpn/cray/cfp/mulwwc90.s: New file.\n\t* mpn/cray/cfp/mulwwj90.s: New file.\n\t* mpn/cray/mulww.s: Delete.\n\n2000-12-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/cray/ieee/mul_1.c: New file.\n\t* mpn/cray/ieee/addmul_1.c: New file.\n\t* mpn/cray/ieee/submul_1.c: New file.\n\t* mpn/cray/ieee/gmp-mparam.h: New file.\n\n\t* mpn/cray/gmp-mparam.h: Disable UMUL_TIME and UDIV_TIME.\n\n\t* mpn/cray/hamdist.c: New file.\n\t* mpn/cray/popcount.c: New file.\n\t* mpn/cray/rshift.c: New file.\n\t* mpn/cray/lshift.c: New file.\n\n\t* longlong.h: Add count_leading_zeros for _CRAY.\n\tReorganize _CRAY stuff.\n\n2000-12-24  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (alpha*-cray-unicos*): Disable SPEED_CYCLECOUNTER_OBJ,\n\tas tune/alpha.asm doesn't suit.\n\n\t* mpn/generic/sqrtrem.c, mpz/pow_ui.c, mpz/powm_ui.c, mpf/get_str.c,\n\tmpf/set_str.c: Use mpn_sqr_n when applicable, not mpn_mul_n.\n\n2000-12-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul_fft.c: Reformat.\n\t(mpn_fft_neg_modF): Remove.\n\t(mpn_fft_mul_2exp_modF): Inline mpn_fft_neg_modF.\n\n\t* mpn/cray/gmp-mparam.h: Retune.\n\n\t* configure.in (*-cray-unicos*): Pass `-O3 -htask0'.\n\t(vax*-*-*): Fix typo.\n\n\t* mpn/cray/mul_1.c: Use dynamic arrays, get rid of TMP_*.\n\t* mpn/cray/addmul_1.c: Likewise.\n\t* mpn/cray/submul_1.c: Likewise.\n\t* mpn/cray/add_n.c: Likewise.\n\t* mpn/cray/sub_n.c: Likewise.\n\n\t* configure.in (default cc_cflags,cc_64_cflags): Remove -g/add -O.\n\t(mips*-*-irix[6789]*]): Remove -g from cc_*_cflags.\n\n2000-12-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul_n.c: Delete K&R function headers.\n\n\t* mpn/generic/mul_n.c (mpn_kara_mul_n): Clean up type confusion\n\tbetween mp_limb_t and mp_size_t.\n\t(mpn_kara_sqr_n): Likewise.\n\n\t* mpn/generic/mul_n.c (mpn_kara_mul_n): Use mpn_incr_u.\n\t(mpn_kara_sqr_n): Likewise.\n\n\t* mpn/generic/mul_n.c (mpn_kara_mul_n): Change handling of `sign'\n\tto work around GCC 2.8.1 MIPS bug.\n\n\t* configure.in (implied alpha*-cray-unicos*): Remove -g from cc_cflags.\n\n2000-12-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/invert_limb.asm: Simplify a bit.\n\tAdd handling of bigend systems.\n\t* mpn/alpha/unicos.m4: Define `bigend'.\n\t* mpn/alpha/default.m4: Define `bigend' (to expand to nothing).\n\n\t* tests/t-constants.c (CHECK_CONSTANT): Print using %lx.\n\n\t* mpn/alpha/gmp-mparam.h: Remove sizes for plain C types.\n\t* mpn/alpha/ev5/gmp-mparam.h: Likewise.\n\t* mpn/alpha/ev6/gmp-mparam.h: Likewise.\n\n\t* mpn/alpha/unicos.m4: Define LEA.\n\t* mpn/alpha/default.m4: Likewise.\n\t* mpn/alpha/invert_limb.asm: Use LEA for loading symbolic addresses.\n\t* mpn/alpha/cntlz.asm: Likewise.\n\n\t* mpn/alpha/cntlz.asm: Don't use `ldbu', use slightly slower\n\t`ldq_u' + `extbl' instead.\n\n\t* mpn/alpha/unicos.m4: Define EXTERN.\n\t* mpn/alpha/default.m4: Define EXTERN (to expand to nothing).\n\t* mpn/alpha/cntlz.asm: Declare __clz_tab usign `EXTERN' (for the\n\tbenefit of Unicos).\n\n2000-12-21  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/alpha/unicos.m4 (GSYM_PREFIX): Define for the benefit of\n\t__clz_tab.\n\n2000-12-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h: Add udiv_qrnnd and count_leading_zeros for _CRAYMPP\n\tsystems.\n\n2000-12-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (*sparc*-*-*): Remove -g from cc_cflags and acc_cflags.\n\n\t* mpn/generic/sqrtrem.c (mpn_sqrtrem): Separate `limb' values from\n\t`size' values.\n\n\t* configure.in (*-cray-unicos*): Add `-Wa,-B' to cc_cflags.\n\n\t* demos/pexpr.c (rstate): New variable.\n\t(main): Initialize rstate.\n\t(enum op_t): Add RANDOM.\n\t(fns): Add field for RANDOM.\n\t(mpz_eval_expr): Handle RANDOM.\n\n2000-12-19  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/sqrtrem.c: Rewrite by Paul Zimmermann, based on his\n\tKaratsuba Square Root algorithm.\n\t* gmp.texi (Square Root Algorithm): Update.\n\n\t* tune/many.pl: New file.\n\n\t* mpn/tests/try.c,ref.[ch] (mpn_preinv_mod_1, mpn_sb_divrem_mn,\n\tmpn_tdiv_qr, mpn_gcd_finda, mpn_kara_mul_n, mpn_kara_sqr_n,\n\tmpn_toom3_mul_n, mpn_toom3_sqr_n): Add testing.\n\t* mpn/tests/ref.c: Cast some \"0\"s in function calls.\n\n\t* mpn/x86/k7/mmx/mod_1.asm: Add preinv_mod_1 entrypoint, remove extra\n\tvariable for loop termination.\n\n\t* mpn/x86/p6/mmx/mod_1.asm: Remove file, in favour of the following.\n\t* mpn/x86/p6/mod_1.asm: New file.\n\n\t* mpn/x86/pentium/mod_1.asm: New file.\n\n2000-12-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (mips*-*-irix[6789]*): Pass options to compiler using\n\t`-Wc'.\n\n2000-12-18  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/k6/pre_mod_1.asm: New file.\n\n\t* tune/tuneup.c (USE_PREINV_MOD_1): Tune this, rearrange mpn_divrem_1\n\tand mpn_mod_1 handling in support of it.\n\t* tune/Makefile.am: Consequent changes to divrem_1.c and mod_1.c.\n\n\t* gmp-impl.h (USE_PREINV_MOD_1, MPN_MOD_OR_PREINV_MOD_1): New macros.\n\t* mpn/generic/perfsqr.c, mpz/pprime_p.c: Use MPN_MOD_OR_PREINV_MOD_1.\n\n\t* configure.in: Let an asm mod_1 provide a preinv_mod_1 entrypoint.\n\n\t* mpn/alpha/default.m4: Remove some newlines, add some asserts.\n\t(r0 etc, f0 etc): Use defreg and deflit.\n\t(PROLOGUE, PROLOGUE_GP, EPILOGUE): Use GSYM_PREFIX.\n\t* mpn/alpha/unicos.m4: Remove some newlines, add some asserts.\n\t* mpn/alpha/invert_limb.asm: Remove unused second DATASTART parameter.\n\t* mpn/alpha/cntlz.asm: Use mpn_count_leading_zeros and __clz_tab.\n\n\t* mpn/asm-defs.m4 (changecom): Comments on portability.\n\t(__clz_tab, modlimb_invert_table): New macros, matching gmp-impl.h.\n\t(count_leading_zeros, count_trailing_zeros): New define_mpn's.\n\t(PROLOGUE etc): Comments on usage, add some asserts.\n\t(OPERATION_[lr]shift): Use m4_not_for_expansion, for the benefit of\n\tlorrshift multifunc.\n\n\t* mpn/Makeasm.am (RM_TMP): New variable controlling tmp-*.s\n\tremoval, for development purposes.\n\n\t* mpz/fac_ui.c: Fix for long long limb by using mpn_mul_1 not\n\tmpz_mul_ui, and note some possible enhancements.\n\n\t* mpz/tests/t-fac_ui.c: New test.\n\t* mpz/tests/Makefile.am (check_PROGRAMS): Add it.\n\t* macos/Makefile.in: Ditto, and add t-fib_ui too.\n\n\t* mpn/generic/[lr]shift.c: Remove some DEBUG code adequately covered\n\tby new parameter ASSERTs.\n\n\t* longlong.h (count_trailing_zeros): Assert x!=0.\n\n\t* doc/configuration: Updates for new configure things, add some notes\n\ton test setups.\n\n2000-12-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (*-*-aix): Pass -qmaxmem=20000 to xlc also for 64-bit\n\tcompiles.\n\t* configure.in: Disable shared libs for *-*-ultrix*.\n\n2000-12-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (powerpc*-*-*): Pass -Wa,-mppc when using gcc.\n\n\t* gmp-impl.h (_EXTERN_INLINE): #define different for GCC and other\n\tcompilers.\n\n\t* gmp-h.in (__gmp_inline): Remove.\n\t* mp-h.in: Likewise.\n\t* mpn/generic/gcd.c: Use `inline' instead of `__gmp_inline'.\n\n\t* configure.in (mips*-*-irix[6789]*): Define *_ldflags.\n\n2000-12-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/pre_mod_1.c: Use proper type for udiv_qrnnd\n\tparameter `dummy'.\n\n\t* mpn/generic/divrem_1.c: Use explicit `!= 0' in if statement.\n\t* mpn/generic/mod_1.c: Likewise.\n\n2000-12-14  Kevin Ryde  <kevin@swox.se>\n\n\t* config.guess (mips-*-irix[6789]*): Transform to mips64.\n\t(m68k-*-nextstep* | m68k-*-openstep*): Transform to m68020.\n\n2000-12-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* tests/t-constants.c (main): Conditionalize use of PP_INVERTED.\n\n\t* mpn/mp_bases.c: Handle 4-bit limbs.\n\t(main): Add code for generating tables.\n\n\t* mpn/generic/popham.c: Handle limb bitsizes of 4, 8, 16.\n\tSuffix all 32-bit constant with `L'.\n\tUse CNST_LIMB for 64-bit constants.\n\n2000-12-13  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (FIB_THRESHOLD): Defaults for 4,8,16 bits per limb, and\n\tan arbitrary fallback default.\n\t(modlimb_invert): Add efficient code for 8,16 (or 4) bits per limb.\n\n\t* configure.in (mips3, mips64): Don't bother with o32 (mips2 32-bit\n\tlimb) on IRIX 6.\n\n\t* Makefile.am (SUBDIRS): Put \"tests\" first so tests/t-constants.c is\n\trun first, to pick up any limb size mismatch.\n\n\t* tune/tuneup.c (DIVREM_1, MOD_1): Fix result values, were off by 1.\n\n\t* mpz/fib_ui.c (table1, table2): Add data for 4,8,16 bits per limb.\n\n2000-12-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (LIMBS_PER_DOUBLE): Define for any limb bitsize.\n\n2000-12-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/mp_bases.c: Add tables for 8-bit and 16-bit limbs.\n\tRound existing `double' values properly.\n\n\t* gmp-h.in (__gmp_randstate_struct): Prefix field names with _mp_\n\tto keep out of user name space.\n\t(__gmp_randata_lc): Likewise.\n\t* randclr.c, randlc.c, randlc2x.c, randraw.c, randsd.c, randsdui.c:\n\tCorresponding changes.\n\n\t* gmp-impl.h (PP): #define for machines with BITS_PER_MP_LIMB\n\tof 2, 4, 8, and 16.\n\t(PP_FIRST_OMITTED): New, define for various BITS_PER_MP_LIMB.\n\t(PP_MASK): Remove.\n\t(PP_MAXPRIME): Remove.\n\n\t* mpn/generic/perfsqr.c: Generalize PP handling for machines with\n\tlimbs of < 32 bits.  Allow PP_INVERTED to be undefined.\n\t* mpz/pprime_p.c: Likewise.\n\n2000-12-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul_1.c: Declare parameters in C89 style.\n\n2000-12-10  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/Makefile.am (speed_LDFLAGS, speed_ext_LDFLAGS, tune_LDFLAGS):\n\tDon't use -all-static, as gcc 2.95.2 on i386 solaris 8 doesn't like\n\tit.\n\n\t* configure.in (mips3,mips64): Add ABI=64, name the others ABI=n32 and\n\tABI=o32.\n\t* mpn/mips3/gmp-mparam.h (BITS_PER_LONGINT): Remove #define and let\n\tconfigure determine it, since it varies with ABI=64 or ABI=n32.\n\t* gmp.texi (ABI and ISA): Update.\n\t(mpz_mod_ui): Remark that it's identical to mpz_fdiv_r_ui.\n\t(mpn_divexact_by3): Qualify a statement needing mp_bits_per_limb even.\n\n\t* mul_fft.c (mpn_fft_mul_modF_K etc): Patch by Paul Zimmermann to fix\n\tresults in certain cases of recursing into a further FFT.\n\n2000-12-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/cmpabs.c: Remove unused variable.\n\t* mpz/rrandomb.c: Likewise.\n\t* mpz/xor.c: Likewise.\n\n2000-12-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/gcdext.c: Handle double carry when computing s1.\n\tMerge two code blocks for computing s0 and s1.\n\n2000-12-07  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (hppa*-*-*): Remove -Aa -D_HPUX_SOURCE from\n\tcc_cflags/cppflags, and instead let AM_C_PROTOTYPES add it, or -Ae,\n\twhichever works.\n\n\t* configure.in (*-*-aix[34]*): Disable shared by default, but let\n\tthe user override that, if desired.\n\t* gmp.texi (Notes for Particular Systems): Update.\n\n2000-12-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpq/cmp_ui.c: Streamline.\n\n2000-12-06  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/divrem_1_div.c,divrem_1_inv.c,mod_1_div.c,mod_1_inv.c,\n\tgcdext_double.c: New files for measuring.\n\t* tune/Makefile.am (libspeed_la_SOURCES): Add them.\n\t* tune/speed.c,speed.h,common.c: Add measuring of them.\n\t(mpn_preinv_mod_1, mpz_jacobi, mpz_powm_ui): Add measuring.\n\n\t* speed.c (getopt_long): Don't use this, just plain getopt.\n\t* configure.in (getopt_long): Remove test.\n\n\t* gmp-impl.h (MPN_KARA_MUL_N_TSIZE, MPN_KARA_MUL_N_MINSIZE,\n\tMPN_TOOM3_MUL_N_TSIZE, MPN_TOOM3_MUL_N_MINSIZE): New macros, and\n\tassume toom3 square tsize was meant to be the same as the mul (both\n\tare overestimates).\n\t* tune/tuneup.c, mpn/generic/mul.c, mpn/generic/mul_n.c: Use them.\n\t* mpn/generic/mul_n.c (mpn_toom3_sqr_n): Fix an ASSERT to use\n\tTOOM3_SQR_THRESHOLD not TOOM3_MUL_THRESHOLD, add a few that might\n\tbe more realistic size checks.\n\t* tune/speed.h (SPEED_ROUTINE_MPN_MUL_N_TSPACE etc): Use minsize.\n\n\t* mpn/generic/divrem_1.c: Partial rewrite, merge fractional part\n\tcalculation, skip a divide step in more cases, introduce\n\tDIVREM_1_NORM_THRESHOLD and DIVREM_1_UNNORM_THRESHOLD.\n\t* mpn/generic/mod_1.c: Partial rewrite, skip a divide step in more\n\tcases, introduce MOD_1_NORM_THRESHOLD, MOD_1_UNNORM_THRESHOLD.\n\t* longlong.h (UDIV_PREINV_ALWAYS): New define, set for alpha and ia64.\n\t* tune/tuneup.c (DIVREM_1_NORM_THRESHOLD, DIVREM_1_UNNORM_THRESHOLD,\n\tMOD_1_NORM_THRESHOLD, MOD_1_UNNORM_THRESHOLD): Tune these.\n\t* gmp-impl.h [TUNE_PROGRAM_BUILD]: Support for this.\n\t* tune/Makefile.am (TUNE_MPN_SRCS): Add divrem_1.c and mod_1.c.\n\n\t* gmp-impl.h (UDIV_NORM_PREINV_TIME): Renamed from UDIV_PREINV_TIME.\n\t* mpn/generic/perfsqr.c, mpn/generic/sb_divrem_mn.c,\n\tmpn/x86/*/gmp-mparam.h: Ditto.\n\t* gmp-impl.h (UDIV_UNNORM_PREINV_TIME): New define.\n\n\t* configure.in (AC_C_INLINE, HAVE_INLINE): New test and define.\n\t* gmp-impl.h (inline): Remove, use config.h.\n\t(_EXTERN_INLINE): Redefine based on HAVE_INLINE.\n\t(mpn_zero_p): Use HAVE_INLINE.\n\n\t* acinclude.m4 (GMP_PROG_AR, GMP_PROG_NM): Don't add flags to a user\n\tselected $AR or $NM.\n\n\t* tune/tuneup.c (all): Print how long the tuning took.\n\n\t* configure.in (AM_C_PROTOTYPES): Use this, not GMP_ANSI2KNR.\n\t* acinclude.m4 (GMP_ANSI2KNR): Remove.\n\n\t* Makefile.am (gmp.h, mp.h): In DISTCLEANFILES not CLEANFILES.\n\n\t* gmp-h.in (mpn_divmod, mpn_divmod_1, mpn_divexact_by3): Cast some\n\tzeros, for the benefit of K&R if long!=int.\n\n\t* mpn/lisp/gmpasm-mode.el (gmpasm-comment-start-regexp): Add \"*\" for\n\tthe benefit of cray.\n\n\t* compat.c (mpn_divexact_by3, mpn_divmod_1): Return types should be\n\tmp_limb_t, not int, and need an actual \"return\".\n\n2000-12-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc32/v8/supersparc/gmp-mparam.h: Retune.\n\t* mpn/alpha/gmp-mparam.h: Tune for 21064.\n\n\t* longlong.h: Reformat to avoid newlines within strings.\n\n\t* gmp-impl.h (inline): Disable if GCC has defined __STRICT_ANSI__.\n\n\t* configure.in: Do a `mkdir tune' before creating tune/sqr_basecase.c.\n\n\t* Makefile.am: Treat mp.h analogously to gmp.h.\n\n\tconfigure.in (*-*-aix): Pass -qmaxmem=20000 to xlc.\n\n\t* mp-h.in: Renamed from mp.h.\n\tAdd #define for _LONG_LONG_LIMB.\n\tMove some other fixes from gmp-h.in.\n\t* mp.h: Removed.\n\t* configure.in: Generate mp.h from mp-h.in like we handle\n\tgmp-h.in/gmp.h.\n\n2000-12-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* acinclude.m4: Fix typo testing for bad HP compiler.\n\n2000-12-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpbsd/tests/t-misc.c (check_itom): Exclude some tests for Cray\n\tCFP systems.\n\n\t* longlong.h (CRAYIEEE umul_ppmm): New.\n\n\t* mpn/cray/gmp-mparam.h (BITS_PER_SHORTINT): 32 => 64.\n\t(*_THRESHOLD): Tune.\n\n\t* configure.in: Disable shared libs for *-*-unicos*.\n\n2000-12-03  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in, tune/Makefile.am: Create tune/sqr_basecase.c during\n\tconfigure, and use it unconditionally in $(nodist_tuneup_SOURCES).\n\tFixes a problem with sqr_basecase.lo under --disable-static.\n\n2000-12-01  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/tests/t-get_d.c (LOW_BOUND,HIGH_BOUND): #define for non-IEEE\n\tCray systems.\n\n\t* gmp-impl.h (union ieee_double_extract): Test for _CRAYIEEE.\n\n2000-11-30  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/tests/t-mul.c (base_mul): Fix re-evaluation problems in macro\n\tinvocations.\n\t(ref_mpz_mul): New name from mpz_refmul.  Make static.\n\t(base_mul): New name for _mpn_mul_classic.\n\n2000-11-30  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in: Rewrite of CC/CFLAGS selection scheme, introduce a\n\tnotion of ABI, merge compiler and mpn path selection, add flags\n\tselection for AR and NM, let CC without CFLAGS work.\n\t(AC_PROG_CC): Use this, not GMP_SELECT_CC.\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): Don't use AC_TRY_COMPILE, combine\n\tcc/cflags parameter.\n\t(GMP_PROG_CC_FIND, GMP_CHECK_CC_64BIT, GMP_PROG_CC_SELECT): Remove.\n\t* gmp.texi (Installing GMP): Updates for new scheme.\n\n\t* configure.in (AC_CANONICAL_HOST): Use this and $host, not $target.\n\t* acinclude.m4, acconfig.h, longlong.h, mpn/x86/x86-defs.m4,\n\tmpn/x86/k7/mmx/popham.asm: Ditto, renaming HAVE_TARGET_CPU to\n\tHAVE_HOST_CPU.\n\t* gmp.texi (Build Options, and elsewhere): Update.\n\n\t* acinclude.m4 (GMP_COMPARE_GE): New macro.\n\t(GMP_GCC_MARCH_PENTIUMPRO): Use it, add CC parameter, check for GCC.\n\t(GMP_HPC_HPPA_2_0): New macro, adapted from GMP_CHECK_CC_64BIT.\n\n\t* acinclude.m4 (GMP_PROG_AR): New macro, using AC_CHECK_TOOL, adding\n\tGMP flags.\n\t* configure.in: Use it\n\n\t* gmp-h.in: Renamed from gmp.h.\n\t(@define_LONG_LONG_LIMB@): Placeholder for instantiation.\n\t(__GNU_MP__): Bump to 3.\n\t* acinclude.m4 (GMP_VERSION): Get version from gmp-h.in.\n\t* configure.in: Create gmp.h from gmp-h.in to set _LONG_LONG_LIMB.\n\t* gmp.texi.h (ABI and ISA): Mention this.\n\t* acconfig.h (_LONG_LONG_LIMB): Remove undef.\n\t* Makefile.am: Distribute gmp-h.in, not gmp.h.\n\n\t* configure.in (AC_PROG_CPP, AC_PROG_INSTALL, AC_PROG_LN_S): Remove,\n\tdragged in by other macros.\n\t(gmp_asm_syntax_testing): Renamed from gmp_no_asm_syntax_testing.\n\t(AC_EXEEXT, AC_OBJEXT): Remove, done automatically by libtool.\n\t* configure.in, acinclude.m4: Remove \"\" from \"`foo`\", being\n\tunnecessary and not portable.\n\n\t* configure.in (GMP_LDFLAGS): New AC_SUBST flags for libtool link.\n\t(powerpc64*-*-aix*): Use for -Wc,-maix to fix shared library creation,\n\tbut can't build shared and static at the same time.\n\t* Makefile.am (libgmp_la_LDFLAGS, libmp_la_LDFLAGS): Use\n\t$(GMP_LDFLAGS).\n\t* gmp.texi (Notes for Particular Systems): Update AIX problem\n\n\t* configure.in (AC_CONFIG_LINKS): Use where needed, not via gmp_links.\n\t(gmp_srclinks): Build up as needed, not via gmp_links.\n\n\t* acinclude.m4 (GMP_INIT): Do CONFIG_TOP_SRCDIR and asm-defs.m4 here.\n\t* configure.in (asm-defs.m4): Consequent changes.\n\n\t* acinclude.m4 (GMP_INCLUDE_MPN): Using include_mpn(), replacing\n\tGMP_INCLUDE and GMP_SINCLUDE.\n\t* configure.in (gmp_m4postinc): Remove this scheme, use\n\tGMP_INCLUDE_MPN instead.\n\n\t* configure.in (*-*-sco3.2v5*): Force ac_cv_archive_cmds_need_lc=no,\n\tuntil libtool does this itself.\n\t* gmp.texi (Known Build Problems): Remove SCO -lc problem.\n\n\t* configure, INSTALL.autoconf, etc: Update to autoconf 2000-11-29.\n\t* acinclude.m4 (GMP_C_SIZES): Use AC_CHECK_SIZEOF.\n\t* gmp.texi (Known Build Problems): Remove version.c sed/config.h\n\tproblem, fixed.\n\n\t* ltmain.sh, aclocal.m4: Update to libtool 2000-11-25.\n\t* ltconfig: No longer required, but leave an empty dummy for automake.\n\t* gmp.texi (Known Build Problems): Remove SunOS native ar ranlib\n\tproblem, fixed.\n\n\t* */Makefile.in, aclocal.m4: Update to automake 2000-11-25.\n\t* mpbsd/tests/Makefile.am, mpfr/tests/Makefile.am (check_PROGRAMS):\n\tRemove dummy, no longer required.\n\t* mpbsd/tests/dummy.c, mpfr/tests/dummy.c: Remove files.\n\t* depcomp: Remove file, no longer required (with no-dependencies).\n\n\t* texinfo.tex: Update to 2000-11-09.\n\t* gmp.texi (Build Options): Mention PDF from gmp.texi.\n\t* Makefile.am (MOSTLYCLEANFILES): Add gmp.tmp, from new texinfo.tex.\n\n\t* gmp.texi (Build Options): List alphaev56, alphapca56, alphaev67,\n\thppa2.0n and power among supported CPUs.\n\n2000-11-30  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/tests/t-mul.c: Increase max operand size from 2^17 bits\n\tto 2^19 bits.  Misc cleanups.\n\n2000-11-26  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/tuneup.c (FIB_THRESHOLD): Cope better with different speeds of\n\todd and even sizes.\n\n\t* longlong.h (alpha): Use udiv_qrnnd and count_leading_zeros on all\n\tcompilers, not just gcc.\n\n\t* pre_mod_1.c: Use conditional subtract to always skip a division.\n\t(UMUL_TIME, UDIV_TIME): Remove defaults, now in longlong.h.\n\n2000-11-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa64w/gmp-mparam.h: Retune.\n\t* mpn/pa64/gmp-mparam.h: Retune.\n\t* mpn/sparc64/gmp-mparam.h: Retune.\n\n2000-11-22  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (ABOVE_THRESHOLD, BELOW_THRESHOLD): New macros.\n\t* mpn/generic/gcdext.c: Use them.\n\n\t* mpn/generic/gcdext.c [WANT_GCDEXT_ONE_STEP]: Force only one step.\n\t* tune/gcdextos.c, tune/gcdextod.c: New files, one step gcdext, single\n\tand double.\n\t* tune/Makefile.am (libspeed_la_SOURCES): Add them.\n\t(TUNE_MPN_SRCS): Remove gcdext.c.\n\t* tune/speed.h, tune/common.c, tune/speed.c: Add measuring.\n\t* tune/tuneup.c: Use for GCDEXT_THRESHOLD, plus check if double limb\n\tis ever better.  Should be more accurate, and hopefully faster.\n\n\t* tune/gcdext_single.c: New file, gcdext forced to single limbs.\n\t* tune/Makefile.am: Add it.\n\t* tune/speed.h, tune/common.c, tune/speed.c: Add measuring, and of\n\tinvert_limb.\n\n\t* tune/speed.h (speed_params r): Use mp_limb_t, not long.\n\t* tune/speed.h, tune/common.c: Don't \"switch\" on \"r\".\n\t* tune/speed.c (r_string): Accept limb sized constants.\n\t(choice scale): Add a scale factor (eg. \"2.33*mpn_add_n\").\n\t* tune/common.c (SPEED_ROUTINE_UDIV_QRNND_A): Default r to\n\t__mp_bases[10].big_base, being a full limb value.\n\n\t* configure.in (alphapca56*-*-*): Use ev5 mpn path.\n\t(am29000*-*-*): Remove this, leave the canonical a29k.\n\t(z8k*-*-*, z8kx*-*-*): Changed from z8000, since z8k is canonical.\n\t(gmp_mpn_functions_optional): Add invert_limb, use for alpha and ia64.\n\n\t* configure.in (alloca): Accept yes/no/detect, generate an error if\n\t\"yes\" but not available.\n\t* gmp.texi (Build Options): Update.\n\n\t* acinclude.m4 (GMP_TRY_ASSEMBLE): Make conftest.out available.\n\t(GMP_ASM_ALIGN_FILL_0x90): Use it.\n\n\t* acinclude.m4 (GMP_ASM_X86_MMX) [*-*-solaris*]: Check for solaris\n\t2.6 \"as\" movq bug.\n\t* gmp.texi (Notes for Particular Systems): Update x86 MMX note.\n\n2000-11-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/Makefile.am (EXTRA_DIST): List hppa2w.asm.\n\n\t* tune/hppa2.asm: Change level directive to \"2.0n\".\n\t* tune/hppa2w.asm: New file.\n\t* configure.in [SPEED_CYCLECOUNTER_OBJS switch]: Separate out hppa2.0w.\n\n\t* mpn/pa64/gmp-mparam.h (BITS_PER_LONGINT): 64 => 32.\n\n2000-11-21  Kevin Ryde  <kevin@swox.se>\n\n\t* urandom.h (random): No prototype if glibc stdlib.h has already\n\tprovided it (avoids an int32_t/long conflict).\n\n\t* tune/Makefile.am (LDFLAGS): Use -all-static.\n\t(speed-dynamic): Dynamic linked version of speed.c.\n\t* tune/README: Update.\n\n\t* mpn/generic/gcd.c (find_a): Use native version if available.\n\t* acconfig.h (HAVE_NATIVE_mpn_gcd_finda): Add #undef.\n\t* gmp-impl.h (mpn_gcd_finda): Add prototype and define.\n\t* mpn/asm-defs.m4 (mpn_gcd_finda): New define_mpn.\n\t* tune/gcd_finda_gen.c: #undef any HAVE_NATIVE_mpn_gcd_finda.\n\t* configure.in (gmp_mpn_functions_optional): Add gcd_finda.\n\t* mpn/x86/k6/gcd_finda.asm: New file.\n\n\t* tune/tuneup.c (POWM_THRESHOLD): Slightly bigger size steps.\n\n\t* gmp-impl.h (__GMP_IMPL_H__): Protect against multiple inclusion.\n\t* tune/gcd_bin.c, tune/powm_mod.c, tune/powm_redc.c: Use #undef after\n\tgmp-impl.h to force thresholds.\n\t* tune/tuneup.c (print_define, fft): No need for #ifndefs on\n\tthresholds any more.\n\n2000-11-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/tests/t-powm.c: Analogous changes as made 2000-11-12 to t-mul.c.\n\t* mpz/tests/t-powm_ui.c: Likewise.\n\t* mpz/tests/t-pow_ui.c: Likewise.\n\t* mpz/tests/t-root.c: Likewise.\n\n\t* configure.in [compiler switch]: Pass \"-Aa -D_HPUX_SOURCE\" to cc for\n\tall hppa versions.\n\n\t* mpn/hppa/hppa1_1/udiv_qrnnd.S: Reference data using PC relative\n\taddressing (was r19 relative addressing).\n\n2000-11-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* rand.c: (__gmp_rand_lc_scheme): Convert strings to hexadecimal.\n\t(gmp_randinit): Expect strings in hexadecimal.\n\n2000-11-18  Kevin Ryde  <kevin@swox.se>\n\n\t* configfsf.guess, configfsf.sub: Update to 2000-11-16.\n\t* config.guess (alpha*-*-openbsd*): Do exact cpu detection.\n\n2000-11-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/tests/t-fdiv.c: Analogous changes as made 2000-11-12 to t-mul.c.\n\t* mpz/tests/t-tdiv_ui.c: Likewise.\n\t* mpz/tests/t-fdiv_ui.c: Likewise.\n\t* mpz/tests/t-sqrtrem.c: Likewise.\n\t* mpz/tests/t-gcd.c: Likewise.\n\n2000-11-13  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/Makeasm.am: New file, splitting out assembler rules.\n\t* mpn/Makefile.am, tune/Makefile.am: Use it.\n\n\t* mpn/Makefile.am (@CPP@): Remove this, automake already gives it.\n\n\t* configure.in (AC_CHECK_LIBM): New test, and AC_SUBST it.\n\t* Makefile.am (MPFR_LIBADD_OPTION): Use it.\n\t* demos/Makefile.am (qcn_LDADD): Ditto.\n\t* tune/Makefile.am (libspeed_la_LIBADD): Ditto.\n\t* tests/rand/Makefile.am (libstat_la_LIBADD): Ditto.\n\n\t* tune/time.c (timeval_diff_secs): Better calculation.\n\t(read_real_time): New measuring method for AIX power/powerpc.\n\t(speed_endtime): Protect against negative times.\n\t* tune/common.c (speed_measure): Protect against big reps.\n\t* tune/freq.c (speed_cpu_frequency_measure_one): Better timeval diff.\n\t* tune/speed.h (TIMEVAL_DIFF_SEC,USEC): Remove macros.\n\t* configure.in: (sys/systemcfg.h, read_real_time): New tests.\n\n2000-11-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/tests/t-mul.c: Remove #include urandom.h.\n\t* mpz/tests/t-tdiv.c: Likewise.\n\n\t* configure.in [SPEED_CYCLECOUNTER_OBJS switch]:\n\tDeclare hppa.asm as just 32 bits (cyclecounter_size=1).\n\n2000-11-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/tests/t-mul.c\n\t(main): Generate random numbers using gmp_rand* functions.\n\t(main): Distribute random numbers non-uniformly.\n\t(main): Seed by current time if GMP_CHECK_RANDOMIZE is set.\n\t(_mpn_mul_classic): Streamline.\n\t* mpz/tests/t-tdiv.c: Analogous changes.\n\n\t* demos/pexpr.c (HAVE_sigaltstack): Fix typo in testing for _UNICOS.\n\tAlso test for __hpux.\n\n2000-11-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev5/gmp-mparam.h: Retune.\n\n\t* mpn/alpha/ev6/gmp-mparam.h: Retune.\n\n\t* mpn/alpha/ev6/add_n.asm: Misc cleanups.\n\n\t* mpn/alpha/ev6/sub_n.asm: New file.\n\n2000-11-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in [path switch] (alphaev6*-*-*): Add alpha/ev5 to path.\n\n\t* mpn/alpha/ev6/add_n.asm: New file.\n\n2000-11-10  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/powm.c (redc): Make global under WANT_REDC_GLOBAL.\n\t* tune/powm_mod.c, tune/powm_redc.c: New files.\n\t* tune/Makefile.am (libspeed_la_SOURCES): Add them.\n\t* tune/*: Add measuring of redc, mpz_mod, mpz_powm_mod,\tmpz_powm_redc.\n\n\t* tune/tuneup.c (POWM_THRESHOLD): Determine from redc and mpz_mod.\n\t* tune/Makefile.am (TUNE_MPZ_SRCS): Remove powm.\n\n2000-11-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/mips3/gmp-mparam.h: Retune.\n\n\t* configure.in (os_64bit): Rename to check_64bit_compiler.\n\n2000-11-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in [SPEED_CYCLECOUNTER_OBJS switch]: Choose hppa/hppa2 code\n\tdepending on $CC64.\n\n2000-11-09  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium/mul_1.asm: Unroll 2x, saving 1 c/l when in L1.\n\tAdd 1c entrypoint.\n\t* mpn/x86/pentium/aorsmul_1.asm: Add 1c entrypoints, shave a couple\n\tof cycles at entry and exit.\n\n\t* configure.in (power1,2,2sc): Support these as synonyms for plain\n\tpower.\n\n\t* acinclude.m4 (GMP_ASM_X86_SHLDL_CL): GMP_DEFINE WANT_SHLDL_CL here.\n\t(GMP_ASM_X86_MMX, GMP_ASM_X86_SHLDL_CL): Add X86 into the names.\n\t* configure.in: Consequent changes.\n\n\t* gmp.texi (Notes for Particular Systems): Remarks about power/powerpc.\n\t(Reentrancy): Remarks about simultaneous writing.\n\t(Reporting Bugs): Ask for configfsf.guess.\n\n2000-11-08  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_FUNC_ALLOCA): New macro.\n\t* configure.in: Use it.\n\t* gmp-impl.h (alloca): Conditionals and setups as per autoconf\n\t(should make alloca available on more non-gcc compilers).\n\n\t* acinclude.m4: Misc reformatting, simplify some quoting.\n\t(GMP_ASM_UNDERSCORE, GMP_ASM_X86_MCOUNT): Use $CC $CFLAGS $CPPFLAGS.\n\t(GMP_ASM_UNDERSCORE, GMP_ASM_ALIGN_FILL_0x90, GMP_ASM_RODATA): Put\n\tAC_REQUIREs outside AC_CACHE_CHECK.\n\t(GMP_C_SIZES): Use $srcdir/gmp.h, not -I; use $CPPFLAGS.\n\t(GMP_ASM_UNDERSCORE): Use \"gmp_compile\" variable, and only rm\n\tconftes1* conftes2*.\n\t(GMP_PROG_NM): New macro, require it in appropriate GMP_ASM_*.\n\t(GMP_TRY_ASSEMBLE): New macro, use it in various GMP_ASM_*.\n\t* configure.in: Use GMP_PROG_NM.\n\n\t* mpn/tests/spinner.c (spinner_signal): Use RETSIGTYPE.\n\t(spinner_init): Force output to unbuffered.\n\n\t* mpn/x86/README.family: Notes about GOT table and imul, misc updates.\n\t* mpn/x86/k7/diveby3.asm: Change to 3 operands for immediate imul.\n\t* mpn/x86/k6/diveby3.asm: Ditto.\n\n2000-11-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* urandom.h: Simplify and make it work properly for 64-bit\n\tmachines also in environments without `random'.\n\n2000-11-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in [path switch]: Don't match rs6000-*-*, in\n\tparticular don't assume POWER.\n\n\t* tune/tuneup.c (fft): Remove usleep calls.\n\n\t* config.guess: Don't pass \"$@\" when it is known to be empty.\n\n\t* Makefile.am (EXTRA_DIST): List configfsf.guess and configfsf.sub.\n\n2000-11-04  Kevin Ryde  <kevin@swox.se>\n\n\t* configfsf.guess, configfsf.sub: Moved from config.guess and\n\tconfig.sub.\n\t* config.guess, config.sub: New files, wrappers around around\n\tconfigfsf versions.\n\t* configfsf.guess: Update to FSF 2000-10-23.\n\t* configfsf.sub: Update to FSF 2000-10-25.\n\n\t* acinclude.m4 (GMP_ASM_POWERPC_R_REGISTERS): New macro.\n\t* mpn/powerpc32/powerpc-defs.m4: New file, regmap.m4 r0 etc macros\n\tconditionalized by GMP_ASM_POWERPC_R_REGISTERS.\n\t* mpn/powerpc32/regmap.m4: Remove file.\n\t* configure.in (powerpc*-*-*): Use all this.\n\n\t* mpz/divegcd.c: New file, providing mpz_divexact_gcd.\n\t* Makefile.am, mpz/Makefile.am: Add it.\n\t* gmp-impl.h (mpz_divexact_gcd): Add prototype.\n\t* mpq/aors.c,canonicalize.c,div.c,mul.c: Use it.\n\n\t* longlong.h [pentium] (count_leading_zeros): New macro.\n\t(__clz_tab): Always provide prototype.\n\t* acconfig.h (HAVE_TARGET_CPU_): Add x86s.\n\n\t* tune/speed.[ch],common.c (count_leading_zeros,\n\tcount_trailing_zeros, __udiv_qrnnd_c): Add measuring.\n\n\t* configure.in (X86_PATTERN): Move from here ...\n\t* acinclude.m4 (X86_PATTERN): ... to here.\n\t(GMP_ASM_RODATA): Use it.\n\n\t* configure.in (srandom): New test.\n\t* mpn/tests/try.c: Use it.\n\t* tune/speed.c: Ditto, and conditionalize getrusage and headers.\n\n2000-11-02  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/Makefile.am (nodist_libdummy_la_SOURCES): Add udiv_qrnnd.c\n\tand udiv_w_sdiv.c.\n\n\t* mpn/generic/mul_n.c (mpn_kara_sqr_n): Remove a duplicate\n\tsubtract at the evaluate stage.\n\n2000-11-01  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in [compiler switch] (sparc64-*-linux*): Spell\n\tgmp_xoptcflags_gcc properly, and pass same options as for other\n\tsparcv9 configs.\n\n\t* tune/speed.h (SPEED_ROUTINE_MPN_GET_STR): Fix type of wsize.\n\n2000-10-31  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in [compiler switch] (sparc64-*-linux*): Remove -mvis\n\tfrom gmp_xoptflags_gcc, this might not be an ultrasparc.\n\tRemove -m32 from gmp_cflags_gcc; add -Wa,-xarch=v8plus.\n\n2000-10-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/ia64/lorrshift.asm: New file.\n\n\t* configure.in: New mulfunc `lorrshift' for lshift and rshift.\n\n2000-10-29  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/mul_n.c (mpn_kara_sqr_n): Delete code performing\n\tsuperfluous mpn_sub_n calls.\n\n\t* configure.in (found_asm, M4): Account for SPEED_CYCLECOUNTER_OBJ,\n\tfor the benefit of targets whose only .asm is a cycle counter.\n\n\t* tune/tuneup.c (fft): Remove bogus usleep calls.\n\n2000-10-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/ia64/invert_limb.asm: Get return value for 0x800...00 right.\n\n\t* tune/Makefile.am (EXTRA_DIST): Add ia64.asm.\n\n\t* tune/ia64.asm: Fix typo.\n\n\t* add_n.asm addmul_1.asm mul_1.asm popcount.asm sub_n.asm:\n\tPreserve ar.lc as required by ABI.\n\t* longlong.h (ia64 udiv_qrnnd): New.\n\n\t* configure.in [path switch] (ia64*-*-*): Set extra_functions.\n\t* mpn/ia64/invert_limb.asm: New file.\n\n2000-10-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in [compiler switch]:\n\tGet rid of c89 for all hppa flavours--it is an evil compiler!\n\n\t* tune/speed.h (SPEED_ROUTINE_MPN_SET_STR): Fix type of xp.\n\t(SPEED_ROUTINE_MPN_GET_STR): Fix type of wp.\n\n2000-10-27  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Fibonacci Number Algorithm): New section.\n\n\t* mpz/tests/t-fib_ui.c: New file.\n\t* mpz/tests/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpz/fib_ui.c: Rewrite, same formulas but using mpn functions and\n\tsome lookup tables, much faster at small to moderate sizes.\n\t* gmp-impl.h (MPZ_FIB_SIZE): New macro.\n\t(FIB_THRESHOLD): Establish default here.\n\t* tune/tuneup.c (FIB_THRESHOLD): Start search after the new table\n\tdata.\n\n\t* mpn/x86/x86-defs.m4 (mcount_movl_GOT_ebx): Rename from movl_GOT_ebx,\n\tand don't use GSYM_PREFIX with _GLOBAL_OFFSET_TABLE_.\n\n\t* tune/freq.c (speed_cpu_frequency_measure): New test comparing\n\tgettimeofday and speed_cyclecounter, should cover many systems.\n\n2000-10-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/ia64/gmp-mparam.h: Retune.\n\n2000-10-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h (ia64): Set UMUL_TIME and UDIV_TIME.\n\n\t* mpn/ia64/submul_1.c: Fix typo.\n\n2000-10-25  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/freq.c (speed_cpu_frequency_sysctl): New test, supporting\n\thw.model for BSD flavours.\n\t* configure.in (sysctl, sys/param.h): New tests.\n\n2000-10-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/freq.c: Explicitly #include config.h before other include files.\n\n\t* mpz/tests/reuse.c (FAIL2): New #define.\n\t(main): Use FAIL2.  Now this test properly returns non-zero exit\n\tstatus when it fails.\n\n\t* mpn/powerpc32/gmp-mparam.h: Retune.\n\t* mpn/powerpc64/gmp-mparam.h: Retune.\n\n2000-10-24  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/k6/cross.pl: Support 8 and 16 byte code alignment.\n\n\t* mpq/aors.c, mpq/canonicalize.c: Skip two mpz_divexact calls if\n\tgcd gives 1, which should be 60% of the time.\n\t* gmp-impl.h (MPZ_EQUAL_1_P): New macro.\n\t* mpq/mul.c, mpq/div.c: Use it, and a new DIV_OR_SET.\n\n\t* tune/tuneup.c (xp_block, yp_block): Initialize these with random\n\tdata.  Fixes GCD_ACCEL and GCDEXT thresholds, and latest POWM.\n\n2000-10-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in [SPEED_CYCLECOUNTER_OBJS switch]: Add ia64 case.\n\n\t* mpn/ia64/gmp-mparam.h: Fill in some parameters.\n\n\t* mpn/ia64/submul_1.c: New file.\n\n\t* tune/ia64.asm: New file.\n\n\t* gmp-impl.h (union ieee_double_extract): Handle ia64.\n\n\t* mpn/mp_bases.c: Decrease chars_per_bit_exactly for entry 1 to\n\twork around buggy ia64-linux.\n\n\t* longlong.h (ia64 umul_ppmm): Update register flags to match new GCC.\n\n2000-10-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev6/gmp-mparam.h (DC_THRESHOLD): Update.\n\t* mpn/alpha/ev6/submul_1.asm: New file.\n\n2000-10-22  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/gcd_bin.c: New file.\n\t* tune/gcd_finda_gen.c: New file.\n\t* tune/Makefile.am (libspeed_la_SOURCES): Add them.\n\t* tune/speed.[ch],common.c (mpn_gcd_binary, find_a): Add measuring.\n\n\t* * (__gmp_allocate_func etc): Rename from _mp_allocate_func etc.\n\t(__gmp_default_allocate etc): Rename from _mp_default_allocate etc.\n\t* gmp-impl.h (__GMP_REALLOCATE_FUNC_TYPE,\n\t__GMP_REALLOCATE_FUNC_LIMBS): New macros.\n\n\t* gmp-impl.h (DC_THRESHOLD): Establish default here, set to 3*KARA\n\tsince that's the measured average.\n\t* mpn/generic/dc_divrem_n.c, mpn/generic/tdiv_qr.c (DC_THRESHOLD):\n\tRemove default.\n\n2000-10-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/Makefile.am (TARG_DIST): Add ia64.\n\n2000-10-21  Kevin Ryde  <kevin@swox.se>\n\n\t* *: Change BZ -> DC.\n\t* mpn/generic/dc_divrem_n.c: Renamed from bz_divrem_n.c.\n\n\t* doc/multiplication: Remove file, now in the manual.\n\t* doc/assembly_code: Ditto.\n\t* tune/README: Remove some parts now in the manual.\n\n\t* gmp.texi (@m etc): Add and use some new macros.\n\t(Integer Division - mpz_[cft]div_*): Merge descriptions, for brevity\n\tand to emphasise similarities.\n\t(Low-Level Functions - mpn_[lr]shift): Specify count as 1 to\n\tmp_bits_per_limb-1.\n\t(Algorithms): New chapter.\n\t(References): Add some papers.\n\n\t* mpn/generic/mul_n.c (mpn_toom3_mul_n, mpn_toom3_sqr_n): Remove some\n\tunused variables.\n\t* mpn/generic/mul_fft.c (mpn_fft_best_k): Ditto.\n\n\t* tune/freq.c: New file, split from time.c.\n\t* tune/time.c: Rewrite, now more automated.\n\t* configure.in, tune/*: Consequent changes.\n\n2000-10-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/ia64/default.m4: New file.\n\t* configure.in [config.m4 switch] (ia64*-*-*): Use ia64/default.m4.\n\n\t* mpn/ia64/mul_1.asm: New file.\n\t* mpn/ia64/addmul_1.asm: New file.\n\t* mpn/ia64/add_n.asm: New file.\n\t* mpn/ia64/sub_n.asm: New file.\n\t* mpn/ia64/popcount.asm: New file.\n\t* mpn/ia64/README: New file.\n\n\t* mpn/alpha/cntlz.asm: Override `.set noat' from ASM_START.\n\n\t* configure.in (HAVE_TARGET_CPU_*): Support hppa1.0, hppa1.1, hppa2.0\n\tby sed'ing the period into `_'.\n\n\t* acconfig.h: Add #undefs for hppa targets.\n\n\t* longlong.h (udiv_qrnnd): Fix typo in last change.\n\n\t* mpz/tstbit.c: Rewrite (partly to work around GCC 2.95.2 HPPA bug).\n\n\t* configure.in [path switch]:\n\t(hppa2.0*-*-*): For non-CC64 case, update path.\n\n\t* configure.in [compiler switch]:\n\t(hppa2.0w-*-*): Match with same regexp in both places.\n\t(hppa*-*-*): New case.\n\t(all hppa alternatives): Don't inherit default gmp_cflags_cc,\n\tgmp_cflags_c89.\n\n2000-10-18  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (alpha*-*-*): Define gmp_xoptcflags_gcc like for\n\talpha*-*-osf*.\n\n\t* longlong.h (x86 udiv_qrnnd): Change `d' => `dx' to avoid K&R C\n\tstringification.\n\n2000-10-15  Kevin Ryde  <kevin@swox.se>\n\n\t* doc/configuration: Updates.\n\n\t* demos/calc.y: Remove some comments.\n\n2000-10-14  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Parameter Conventions, Memory Management): New sections\n\tsplit from \"Variable Conventions\".\n\t(Efficiency, Debugging, Profiling): New sections in \"GMP Basics\".\n\t(Reentrancy): Some rewording, add note on standard I/O.\n\t(Build options): Add --enable-assert and --enable-profiling.\n\n\t* configure.in (--enable-profiling): New option.\n\t* acinclude.m4 (GMP_ASM_X86_MCOUNT): New macro, finding how to profile.\n\t* mpn/x86/x86-defs.m4 (PROLOGUE_cpu, call_mcount): Profiling support.\n\n\t* acinclude.m4, configure.in (GMP_ASM_*): Rename from GMP_CHECK_ASM_*,\n\tto follow autoconf conventions.\n\n\t* configure.in: Run GMP_CHECK_ASM tests only if needed.\n\t* acinclude.m4 (GMP_CHECK_ASM_MMX): Don't use GMP_CHECK_ASM_TEXT.\n\n\t* mpn/x86/x86-defs.m4 (ASSERT): Allow no condition, to just emit code.\n\n2000-10-13  Kevin Ryde  <kevin@swox.se>\n\n\t* mpq/md_2exp.c: New file.\n\t* mpq/Makefile.am (libmpq_la_SOURCES): Add it.\n\t* Makefile.am (MPQ_OBJECTS): Ditto.\n\t* gmp.h (mpq_mul_2exp, mpq_div_2exp): Add prototypes.\n\t* gmp.texi (Rational Arithmetic): Add documentation.\n\n\t* mpq/tests/t-md_2exp.c: New file.\n\t* mpq/tests/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpn/generic/perfsqr.c: Add/amend some comments.\n\n\t* gmp.texi (Known Build Problems): Note VERSION problem with old\n\tsed, do some minor rewording.\n\t(Build Options): Add cygwin and djgpp URLs, mention INSTALL.autoconf,\n\tmention HTML.\n\t(Getting the Latest Version of GMP): Move this ...\n\t(Introduction to GMP): ... to here.\n\t(Compatibility with older versions): Just refer to 2.x and 3.x, not\n\tevery minor version.\n\t(Initializing Integers): Note restrictions on mpz_array_init'ed\n\tvariables.\n\t(Integer Logic and Bit Fiddling): Note bits are numbered from 0.\n\n\t* INSTALL.autoconf: New file.\n\t* Makefile.am (EXTRA_DIST): Add it.\n\n\t* tune/Makefile.am, tune/tuneup.c, configure.in, gmp-impl.h: New\n\tscheme for recompiled objects used by tune program.  Don't use\n\tlibgmptune.a, make better use of libtool, work with ansi2knr.\n\n\t* tune/speed.h,common.c (SPEED_ROUTINE_MPZ_POWM): Use s->yp and\n\ts->xp_block, make exponent a fixed size.\n\n2000-10-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/mips3/gmp-mparam.h: Retune.\n\n\t* mpn/generic/mul_n.c (USE_MORE_MPN): Revert last change.\n\n2000-10-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/mips3/add_n.s: Decrease carry recurrence from 4 to 3 cycles.\n\t* mpn/mips3/sub_n.s: Likewise.\n\n2000-10-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (sparc64-*-linux*): Set path according to CC64.\n\n2000-10-04  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_CHECK_ASM_UNDERSCORE): Use LABEL_SUFFIX, not a\n\thard-coded \":\".\n\n\t* config.sub: Don't demand \"86\" in CPU name for SCO.\n\n\t* configure.in (supersparc-*-*): Remove -DSUPERSPARC.\n\t* longlong.h: Use HAVE_TARGET_CPU_supersparc.\n\n\t* configure.in (HAVE_TARGET_CPU_*): AC_DEFINE from $target_cpu.\n\t* acconfig.h: Add #undefs, but only for targets of interest.\n\n2000-10-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/cntlz.asm: Rewrite.\n\n\t* mp_clz_tab.c (__clz_tab): Half table size to 128 entires.\n\t* longlong.h (count_leading_zeros): Demand just 128 entries from\n\t__clz_tab.\n\n\t* configure.in (mips-sgi-irix6.*): Pass -mips3 in addition to options\n\tfor n32 ABI.\n\n\t* longlong.h: Move NO_ASM test around all assembly code.\n\tFrom gcc:\n\t* longlong.h (count_leading_zeros): Sparclite scan instruction was\n\tbeing invoked incorrectly.\n\tReplace __mc68332__ with __mcpu32__.\n\tAdd ARC support.\n\n2000-10-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/mips3/gmp-mparam.h: Retune for both gcc and cc.\n\n\t* mpn/generic/mul_n.c (USE_MORE_MPN): Remove exception for __mips.\n\t(interpolate3): Cast mp_limb_t variables to mp_limb_signed_t\n\twhen testing sign bit.\n\n\t* mpn/alpha/ev6/gmp-mparam.h: Retune.\n\t* mpn/powerpc32/gmp-mparam.h: Retune.\n\t* mpn/powerpc64/gmp-mparam.h: Retune.\n\t* mpn/x86/pentium/gmp-mparam.h: Retune.\n\t* mpn/x86/pentium/mmx/gmp-mparam.h: Retune.\n\t* mpn/sparc32/v9/gmp-mparam.h: Retune.\n\t* mpn/x86/k6/gmp-mparam.h: Retune.\n\t* mpn/x86/p6/gmp-mparam.h: Retune.\n\t* mpn/x86/k7/gmp-mparam.h: Retune.\n\t* mpn/sparc64/gmp-mparam.h: Retune.\n\n\t* mpn/m68k/gmp-mparam.h: New file.\n\t* mpn/alpha/ev5/gmp-mparam.h: New file.\n\n\t* gmp-impl.h (default MPN_COPY): Remove final `;'.\n\n\t* tune/time.c (speed_endtime): Rewrite.\n\n\t* tune/speed.h (SPEED_ROUTINE_MPZ_POWM): Set base to a large value,\n\tnot 2.\n\n\t* demos/pexpr.c (setup_error_handler): Fix typo.\n\n\t* mpz/powm.c (redc): New function, based on old mpz_redc.  Don't\n\tmultiply here.\n\t(mpz_redc): Remove.\n\t(mpz_powm): Major changes, partially reverting to mpn calls.\n\tMultiply before calling redc.\n\t(mpz_powm): Use TMP_ allocation.\n\t(mpz_powm): Refine calculation of k (width of exponent window).\n\t(mpz_powm): Cast constants to mp_limb_t before left shifting.\n\n\t* longlong.h: Use ia64 count_leading_zeros just when __GNUC__.\n\n2000-09-29  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_C_SIZES): New macro.\n\t* configure.in: Use it.\n\t* acconfig.in (BYTES_PER_MP_LIMB etc): Add #undefs.\n\t* mpn/generic/gmp-mparam.h (BYTES_PER_MP_LIMB etc): Remove #defines.\n\t* gmp.texi (Known Build Problems): Remove 64-bit generic C\n\tgmp-mparam.h problem, now fixed.\n\n\t* configure.in: Only run GMP_PROG_M4 if it's actually needed.\n\n2000-09-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c: Clean up code for systems not supporting\n\tsigaltstack.  Handle old Linux without sigaltstack.  Properly\n\tdisable all stuff related to sigaltstack under Unicos.\n\n\t* mpn/alpha/ev6/addmul_1.asm: Use explicit offset for all load and\n\tstore insns.  Helps old gas.\n\n\t* longlong.h (count_leading_zeros): Define for ia64.\n\n2000-09-27  Paul Zimmermann  <Paul.Zimmermann@loria.fr>\n\n\t* mpn/generic/bz_divrem_n.c: Fix qhl handling, simplify.\n\n2000-09-27  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/Makefile.in (.SUFFIXES): Regenerate with patched automake to\n\tget .s before .c, which is needed to override ansi2knr .c rules.\n\n\t* gmp.texi (mpn_sqrtrem): Fix r2p==NULL return value description\n\tto match the code (change by Torbjorn).\n\t(mpn_gcd, mpn_gcdext, mpn_sqrtrem, mpn_tdiv_qr): Note most\n\tsignificant limbs must be non-zero.\n\t(mpn_gcd, mpn_gcdext, mpn_sqrtrem): Clarify destination size\n\trequirements.\n\t(mpn_gcd_1): Clarify value must be non-zero, not just size.\n\n\t* gmp-impl.h (mpn_zero_p): New inline function.\n\t* mpn/generic/inlines.c: Add gmp-impl.h.\n\t* mpf/integer.c, mpz/get_d.c, mpn/generic/mul_fft.c: Use it.\n\n\t* mpn/generic/gcd.c: Use MPN_COPY_INCR not MPN_COPY.\n\t* mpf/add_ui.c: Ditto.\n\t* mpf/add.c: Ditto, and fix test to skip copy.\n\n2000-09-26  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h, longlong.h, mpn/generic/*.c: Add ASSERTs for various\n\tparameter restrictions.\n\n\t* gmp-impl.h (UDIV_PREINV_TIME): New macro.\n\t* mpn/generic/sb_divrem_mn.c: Use it.\n\t* mpn/generic/perfsqr.c: Ditto.\n\t* mpn/x86/*/gmp-mparam.h (UDIV_PREINV_TIME): Add values.\n\n\t* macos/Makefile.in: Add mpz/tests/t-get_si.c, mpf/tests/t-set_f.c,\n\tand new multi-function mpz and mpq files.\n\n2000-09-25  Kevin Ryde  <kevin@swox.se>\n\n\t* randlc.c, randlc2x.c, randsd.c, mpz/urandomb.c, mpz/urandomm.c:\n\tUse mpz_ptr and mpz_srcptr for parameters.\n\t* gmp.h (gmp_randinit_lc, gmp_randinit_lc_2exp, gmp_randseed,\n\tmpz_urandomb, mpz_urandomm): Corresponding change to prototypes.\n\t* randsdui.c: Remove wrong K&R parameters part.\n\n2000-09-12  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (mpn_tdiv_qr): Move prototype from here ...\n\t* gmp.h (mpn_tdiv_qr): ... to here.\n\n\t* gmp.texi (Miscellaneous Rational Functions): Comment-out and\n\tmove version 1 compatibility note to \"Compatibility\" section.\n\t(Rational Number Functions): Ditto for canonicalization note.\n\n2000-09-10  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium/com_n.asm: New file.\n\n\t* gmp.texi (Rational Arithmetic): Add mpq_abs.\n\t(Miscellaneous Rational Functions): Merge and simplify descriptions of\n\tmpq_get_num, mpq_get_den, mpq_set_num, mpq_set_den.\n\n\t* mpq/abs.c: New file.\n\t* mpq/Makefile.am (libmpq_la_SOURCES): Add it.\n\t* Makefile.am (MPQ_OBJECTS): Add it.\n\t* gmp.h (mpq_abs): Add prototype.\n\n\t* mpq/set_den.c: Don't discard sign when copying, this makes the\n\tcode match the manual.\n\n2000-09-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/alpha.asm: Rewrite to actually work right.\n\n2000-09-07  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/common.c,speed.[ch]: Add measuring of mpn_sqrtrem,\n\tmpn_get_str, mpn_set_str.\n\t* tune/README: Various updates.\n\n2000-09-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/fits.c: Correct type of `data'.\n\n2000-09-06  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Build Options): Clarify where to find CFLAGS.\n\t(Known Build Problems): Note SCO -lc problem.\n\n\t* tune/speed.h (SPEED_ROUTINE_MPN_GCD_CALL): Fix for sizes > 512 limbs.\n\n\t* doc/multiplication: Corrections and additions suggested by Paul.\n\n\t* tune/modlinv.c: New file with alternate modlimb_inverts.\n\t* tune/Makefile.am, tune/speed.[ch]: Add measuring of them.\n\t* tune/speed.c (FLAG_NODATA): New attribute, use for mpz_bin_uiui,\n\tmpz_fib_ui, mpz_fac_ui.\n\n\t* mpn/x86/t-zdisp.sh: New file.\n\n\t* tests/t-modlinv.c: New file.\n\t* tests/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpq/tests/t-set_f.c: New file.\n\t* mpq/tests/Makefile.am (check_PROGRAMS): Add it.\n\n\t* gmp-impl.h (MPQ_CHECK_FORMAT): New macro.\n\t* mpq/tests/t-get_d.c: Use it.\n\n\t* mpq/set_f.c: New file.\n\t* mpq/Makefile.am (libmpq_la_SOURCES): Add it.\n\t* Makefile.am (MPQ_OBJECTS): Ditto.\n\t* gmp.h: Add prototype.\n\t* gmp.texi (Miscellaneous Rational Functions): Document mpq_set_f,\n\tcorrect return type of mpq_set_d.\n\n2000-09-03  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/aors_ui.c: New file merging add_ui.c and sub_ui.c, no object\n\tcode changes.\n\t* mpz/add_ui.c, mpz/sub_ui.c: Remove files.\n\t* mpz/Makefile.am: Update.\n\n\t* gmp-impl.h (MPZ_FITS_STYPE_SDT, MPZ_FITS_UTYPE_SDT): New macros.\n\t* mpz/fits.c: New file merging six separate fits*.c.\n\t* mpz/fits_sshort_p.c, fits_sint_p.c, fits_slong_p.c, fits_ushort_p.c,\n\tfits_uint_p.c, fits_ulong_p.c: Remove files\n\t* mpz/Makefile.am: Use new fits.c, change object names from\n\tfits_*_p.lo to fits_*.lo to avoid SunOS 4 native \"ar\" warnings.\n\t* Makefile.am (MPZ_OBJECTS): Change from fits_*_p.lo to fits_*.lo.\n\n\t* acinclude.m4 (GMP_CHECK_ASM_RODATA): New macro, defining RODATA.\n\t* configure.in: Use it.\n\t* mpn/x86/k[67]/mmx/popham.asm: Use it.\n\n\t* mpn/x86/*/*.asm: Use \"TEXT\" not \".text\".\n\n2000-09-02  Kevin Ryde  <kevin@swox.se>\n\n\t* mpq/aors.c: New file merging add.c and sub.c, no object code changes.\n\t* mpq/add.c, mpq/sub.c: Remove files.\n\t* mpq/Makefile.am: Update.\n\n\t* mpz/aors.c: New file merging add.c and sub.c, no object code changes.\n\t* mpz/add.c, mpz/sub.c: Remove files.\n\t* mpz/Makefile.am, mpbsd/Makefile.am: Update.\n\n\t* configure.in: Re-apply \"PROLOGUE.*\" regexp change for the\n\tbenefit of alpha PROLOGUE_GP, lost in path search reorganisation.\n\n\t* mpn/x86/x86-defs.m4 (jadcl0, cmov_simulate, ASSERT,\n\tmovl_text_address): Don't use \"1:\" style labels.\n\t(Zdisp): Rearrange a bit, switch to all hex.\n\t* mpn/x86/README.family: Note SCO \"as\" doesn't support \"1:\" style\n\tlocal labels, misc rewordings.\n\n2000-08-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/primes.c: Include string.h.\n\n\t* config.guess (x86 variant recog code): Remove dummy*.o files\n\tgenerated by some compilers.\n\n2000-08-28  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_CHECK_ASM_ALIGN_FILL_0x90): Fix Solaris 2.8\n\twarning message suppression, add notes about SCO.\n\n\t* Makefile.am (MPZ_OBJECTS etc): Move some comments.\n\n2000-08-25  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/pprime_p.c (mpz_millerrabin): Fix a TMP_FREE.\n\n\t* gmp.texi (Copying): Refer to Lesser not Library GPL.\n\t(GMP and Reentrancy): Note stack-alloc.c is not reentrant, and\n\tthat SCO <ctype.h> is potentially not reentrant.\n\n\t* acinclude.m4 (GMP_CHECK_ASM_UNDERSCORE): Test by attempting to\n\tlink with or without an underscore.\n\t* gmp.texi (Known Build Problems): Remove SunOS 4 native grep\n\tGSYM_PREFIX problem, now fixed.\n\n\t* gmp-impl.h (MODLIMB_INVERSE_3): New constant.\n\t* mpn/generic/diveby3.c: Use it instead of own INVERSE_3.\n\t* mpn/generic/mul_n.c: Ditto.\n\t* tests/t-constants.c: Check it, and PP_INVERTED too.\n\n\t* acinclude.m4 (GMP_GCC_MARCH_PENTIUMPRO): New macro.\n\t* configure.in [p6 and athlon] (gmp_optcflags_gcc): Use it to\n\tpossibly add -march=pentiumpro.\n\n\t* gmp-impl.h (MPZ_SET_STR_OR_ABORT, MPF_SET_STR_OR_ABORT): New macros.\n\t* mpz/tests/t-bin.c, mpz/tests/t-get_si.c, mpz/tests/t-jac.c,\n\tmpz/tests/t-misc.c: Use them.\n\t* mpf/tests/t-conv.c, mpf/tests/t-misc.c: Ditto.\n\t* mpz/tests/convert.c: Ditto and amend diagnostics slightly.\n\t* mpz/tests/t-misc.c (check_mpz_set_si): Remove a superfluous init.\n\t* mpz/tests/io.c: Differentiate between I/O and data conversion errors.\n\n\t* mpn/generic/aors_n.c: New file merging add_n and sub_n, no\n\tobject code changes.\n\t* mpn/generic/add_n.c: Remove file.\n\t* mpn/generic/sub_n.c: Remove file.\n\n\t* mpn/generic/aorsmul_1.c: New file merging addmul_1 and submul_1,\n\tno object code changes.\n\t* mpn/generic/addmul_1.c: Remove file.\n\t* mpn/generic/submul_1.c: Remove file.\n\n\t* mpn/generic/popham.c: New file merging popcount and hamdist, no\n\tobject code changes.\n\t* mpn/generic/popcount.c: Remove file.\n\t* mpn/generic/hamdist.c: Remove file.\n\n2000-08-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (mpn_com_n): Fix typo.\n\n2000-08-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/primes.c (main): Don't call mpz_probab_prime_p for numbers\n\tthat are known to be prime after sieving.\n\t(main): Declare and initialize max_s_prime_squared.\n\t(MAX_S_PRIME): Increase.\n\t(ST_SIZE): Increase.\n\n2000-08-23  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (ASSERT_ALWAYS): Change to statement style.\n\t(JACOBI_TWO_U_BIT1): Remove ASSERT.\n\t(MPZ_CHECK_FORMAT): Use ASSERT_ALWAYS as a statement.\n\n2000-08-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (ASSERT): Use do..while for dummy version.\n\n\t* mpf/get_str.c: Don't set n_digits from digits_computed_so_far\n\twhen the converted operand becomes zero.  Misc cleanups.\n\n2000-08-21  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/fdiv_r_2exp.c, mpz/lcm.c, mpz/urandomm.c: Add missing\n\tTMP_MARK/FREE, avoiding memory leak when using stack-alloc.c.\n\n2000-08-20  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/set.c [BERKELEY_MP] (move): Add conditionals to build as\n\t\"move\" for libmp.\n\t* mpbsd/Makefile.am: Use mpz/set.c, not move.c.\n\t* Makefile.am (MPBSD_OBJECTS): Corresponding change.\n\t* mpbsd/move.c: Remove file.\n\n\t* mpn/Makefile.am, mpz/Makefile.am, mpq/Makefile.am, mpf/Makefile.am,\n\tmpbsd/Makefile.am (-DOPERATION_foo): Use \"foo\" even for ansi2knr\n\t\"foo_\" objects.  Do this with the makefiles to keep the sources\n\tcleaner.\n\t* mpz/mul_siui.c, mpf/integer.c: Revert to plain OPERATION_* forms.\n\n\t* mpn/lisp/gmpasm-mode.el (gmpasm-remove-from-list): Renamed from\n\tgmpasm-delete-from-list, because it's non-destructive.\n\t(gmpasm-font-lock-keywords): Add some more keywords.\n\n2000-08-16  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/mul_n_mpn.c, tune/mul_n_open.c: New files, being forced\n\topen-coded and mpn #includes of mpn/generic/mul_n.c.\n\t* tune/*: Add measuring of them.\n\t* tune/speed.c: Print command line into *.gnuplot file.\n\n\t* mpn/generic/mul_n.c (USE_MORE_MPN): Change to #if not #ifdef for\n\tusing the value, add #ifndef for providing the default.\n\t* mpn/sparc64/gmp-mparam.h (USE_MORE_MPN): Add #ifndef.\n\n\t* tests/t-constants.c: New file.\n\t* tests/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpz/get_si.c: Use LONG_MAX, not BITS_PER_MP_LIMB, so the result\n\tdoesn't depend on limb size when outside the range of a long\n\t(though such results are not actually documented).\n\t* mpz/tests/t-get_si.c: New file.\n\t* mpz/tests/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpn/tests/try.c (call): Cast popcount and hamdist calls,\n\tfor the benefit of long long limb.\n\n2000-08-15  Kevin Ryde  <kevin@swox.se>\n\n\t* mp.h (mp_set_memory_functions): Add missing #define.\n\t* mpbsd/tests/allfuns.c (mp_set_memory_functions): Verify its\n\texistance.\n\n\t* mpf/tests/t-misc.c (check_mpf_getset_prec): New test, verifying\n\treverted behaviour of mpf_get_prec.\n\n\t* mpn/tests/ref.c (refmpn_strip_twos): Use refmpn_copyi, not\n\tMPN_COPY_INCR.\n\n\t* mpz/mul_siui.c, mpf/integer.c: Recognise OPERATION_*_ forms\n\tproduced under ansi2knr.\n\n\t* configure.in (mpn_objects, mpn_objs_in_libgmp): Add $U to .c\n\tobjects when ansi2knr in use.\n\n\t* mpn/Makefile.am (AUTOMAKE_OPTIONS): Enable ansi2knr.\n\t(libdummy.la): Add this, not built, to create ansi2knr style rules\n\tfor all potential .c files.\n\t* mpz/Makefile.am, mpq/Makefile.am, mpf/Makefile.am, mpfr/Makefile.am,\n\tmpbsd/Makefile.am, mpq/tests/Makefile.am, tests/Makefile.am\n\t(AUTOMAKE_OPTIONS): Enable ansi2knr (now everywhere).\n\t* Makefile.am (MPZ_OBJECTS, MPQ_OBJECTS, MPF_OBJECTS, MPFR_OBJECTS,\n\tMPBSD_OBJECTS, libmp_la_DEPENDENCIES): Add $U to all .lo filenames.\n\n2000-08-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev6/addmul_1.asm: Correct number of cycles to 3.5/28.\n\n2000-08-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* Version 3.1 released.\n\n\t* gmp.texi: Rephrase mpf_urandomb documentation.\n\n\t* mpn/alpha/ev6: New directory with ev6/21264 optimized code.\n\t* mpn/alpha/ev6/addmul_1.asm: New file.\n\t* mpn/alpha/ev6/gmp-mparam.h: New file.\n\n2000-08-02  Kevin Ryde  <kevin@swox.se>\n\n\t* demos/factorize.c (random): Don't use \"inline\".\n\n\t* mpfr/log.c, mpfr/mul_ui.c, mpfr/round.c, mpfr/set.c, mpfr/set_d.c:\n\tCorrections to K&R parts.\n\n\t* Makefile.am (EXTRA_HEADERS): Omit $(MPFR_HEADERS_OPTION).\n\t* mpfr/Makefile.am (EXTRA_DIST): Add mpfr.h.\n\n\t* gmp.texi (Known Build Problems): Note problem stripping libgmp.a.\n\n2000-08-02  Kent Boortz  <kent@swox.com>\n\n\t* mpfr: Integrated experimental version of mpfr-0.4.\n\t* configure.in: Changes for option --enable-mpfr.\n\t* Makefile.am: Changes for option --enable-mpfr.\n\n2000-08-01  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/popcount.c: Disable SPARC v9 popc_limb pattern.\n\t* mpn/generic/hamdist.c: Likewise.\n\n2000-08-01  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/tests/try.c (try_init): Account for ALIGNMENTS when sizing\n\tsource and dest regions.\n\n2000-07-31  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/get_str.c: Develop three extra digits, not just one.\n\n2000-07-31  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (References): Add URL for invariant division.\n\n2000-07-30  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/time.c (speed_cpu_frequency_proc_cpuinfo): Add support for\n\talpha linux \"cycle frequency\".\n\n\t* mpn/sparc64/gmp-mparam.h: Re-run tune program for FFT thresholds.\n\n2000-07-29  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (ABI and ISA): Add sparc64-*-linux*.\n\t* configure.in [sparc64-*-linux*] (gmp_cflags64_gcc): Same flags\n\tas under solaris.\n\n\t* configure.in (--enable-fft): New option, default \"no\".\n\t* gmp.texi (Build Options): Describe it.\n\t* mpn/generic/mul.c, mpn/generic/mul_n.c [WANT_FFT]: Use it.\n\t* tune/tuneup.c [WANT_FFT]: By default don't probe FFTs if not enabled.\n\t* NEWS: Multiplication optionally using FFT.\n\n\t* tune/README: Notes on FFT and GCD thresholds, other minor updates.\n\n\t* Makefile.am: Expunge the macos generated files update stuff.\n\n2000-07-28  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/*/gmp-mparam.h: Add some FFT thresholds.\n\n2000-07-28  Kent Boortz  <kent@swox.se>\n\n\t* macos/Asm*, macos/CmnObj, macos/Mp*: Delete directories.\n\t* macos/Makefile: Delete file.\n\t* macos/Makefile.cw: Delete file.\n\t* macos/config.h: Delete file.\n\t* macos/Asm/*.s: Delete files.\n\t* macos/configure: Create target directories. Don't transform\n\t'(C)' to '(;)' in a 'dnl' line comment in .asm file.\n\t* Makefile.am: Delete macos targets.\n\t* macos/README: Reflect that we reverted back to a build\n\tprocess that require \"\"macos/configure\" to run on MacOS.\n\tThis imply that MacPerl is needed for a build in MacOS.\n\n2000-07-27  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/mul_fft.c: New file, by Paul Zimmermann, minor mods\n\tapplied.\n\t* configure.in (gmp_mpn_functions): Add it.\n\t* mpn/generic/mul.c, mpn/generic/mul_n.c: Use it.\n\t* doc/multiplication: Describe it (briefly).\n\n\t* gmp-impl.h (FFT_MUL_THRESHOLD etc): New thresholds.\n\t(mpn_fft_best_k, mpn_fft_next_size, mpn_mul_fft, mpn_mul_fft_full):\n\tNew functions.\n\t(numberof, TMP_ALLOC_TYPE etc, _MP_ALLOCATE_FUNC_TYPE etc,\n\tUNSIGNED_TYPE_MAX etc): New macros.\n\n\t* tune/*: Add FFT threshold tuning and speed measuring.\n\t* tune/common.c: Avoid huge macro expansions for umul and udiv.\n\n\t* mpz/tests/t-bin.c, mpz/tests/t-jac.c, mpz/tests/t-misc.c,\n\tmpbsd/tests/t-misc.c, mpf/tests/t-misc.c, mpn/tests/try.c,\n\tmpn/tests/spinner.c: Use new gmp-impl.h macros.\n\n\t* demos/Makefile.am (BUILT_SOURCES): Don't need calc.c etc under this.\n\n2000-07-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/ia64/gmp-mparam.h: New file.\n\n2000-07-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/isprime.c: Handle any number of arguments and print\n\tclassification for each.  Add `-q' option for old behaviour.\n\n2000-07-26  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Build Options): Mention djgpp stack size.\n\t(Notes for Package Builds): New section.\n\t(Compatibility with older versions): Update for 3.1, add mpf_get_prec.\n\n\t* demos/factorize.c [__GLIBC__]: Don't declare random() under glibc.\n\n\t* gmp.h (gmp_version): Add prototype and define.\n\n\t* Makefile.am: Keep macos directory generated files up-to-date\n\tduring development and on a \"make dist\".\n\n2000-07-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/hppa/gmp-mparam.h: Update threshold values from new `tune' run.\n\n\t* mpn/pa64/gmp-mparam.h: Fill in values from `make tune' run.\n\t* mpn/pa64w/gmp-mparam.h: Likewise.\n\t* mpn/mips3/gmp-mparam.h: Likewise.\n\n\t* tune/hppa2.asm: Fix typo in .level directive.\n\n\t* configure.in: Add sparc64-*-linux* support (from Jakub Jelinek).\n\t* configure: Regenerate.\n\n\t* mpn/sparc64/rshift.asm: Use %g5 instead of volatile stack frame area\n\tfor return value (from Jakub Jelinek).\n\t* mpn/sparc64/lshift.asm: Likewise.\n\n\t* mpf/get_prc.c: Revert Aug 8, 1996 change.\n\n\t* version.c: No longer static.\n\n\t* mpn/pa64/gmp-mparam.h: Only #define *_THRESHOLD if not already\n\tdefined.\n\t* mpn/pa64w/gmp-mparam.h: Likewise.\n\t* mpn/arm/gmp-mparam.h: Likewise.\n\t* mpn/mips3/gmp-mparam.h: Likewise.\n\n2000-07-25  Kevin Ryde  <kevin@swox.se>\n\n\t* INSTALL: It's \"info -f ./gmp.info\" to be sure of hitting the\n\tgmp.info in the current directory.\n\n\t* Makefile.am (libmp_la_DEPENDENCIES): Add mpz/cmp.lo, for last\n\tmpz/powm.c fix.\n\n\t* mpn/sparc64/addmul1h.asm, mpn/sparc64/submul1h.asm: Renamed from\n\taddmul_1h.asm, submul_1h.asm to avoid name conflicts on an 8.3\n\tfilesystem.\n\t* mpn/sparc64/addmul_1.asm, mpn/sparc64/submul_1.asm,\n\tmpn/sparc64/mul_1.asm: Update include_mpn()s.\n\n2000-07-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* Update header of all files previously under the Library GPL\n\tto instead be under the Lesser GPL.\n\n\t* COPYING.LIB: Now Lesser GPL.\n\t* demos/primes.c: Change license to GPL (was Library GPL).\n\t* demos/isprime.c: Change license to GPL (was Library GPL).\n\n\t* gmp.h (error code enum): Add GMP_ERROR_BAD_STRING (currently unused).\n\n\t* mpz/tests/t-mul.c: Default SIZE to a function of TOOM3_MUL_THRESHOLD.\n\tImprove error messages.  Decrease reps.\n\n2000-07-22  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/speed.h: Decrease the amount of data used for gcd and powm\n\tmeasuring, to make the tune go a bit faster.\n\n2000-07-21  Kent Boortz  <kent@swox.se>\n\n\t* macos/Asm*, macos/CmnObj, macos/Mp*: Directories no longer created\n\tfrom configure script, now part of dist.\n\t* macos/Makefile\n\t* macos/Makefile.cw\n\t* macos/config.h\n\t* macos/Asm/*.s\n\tNew files and directories that is the output from configure. This way\n\tno Perl installation is required to build on MacOS, just MPW.\n\t* macos/configure: Added prefix '__g' to exported assembler labels.\n\tChanged to handle new m4 syntax instead of the old cpp syntax in asm.\n\t* macos/Makefile.in: Corrected 'clean' target, added 'distclean'\n\tand 'maintainer_clean'. Added \"mpn/mp_bases.c\" to build.\n\t* macos/README: Reflect the new build process without configure.\n\tCorrected the file structure for Apple MPW installation.\n\n2000-07-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/tests/t-muldiv.c: Relax error limit.  Make precision depend\n\ton SIZE.  Misc changes.\n\n\t* configure: Regenerate.\n\n2000-07-20  Kent Boortz  <kent@swox.com>\n\n\t* macos/Makefile.in: Removed hard coded targets, added special\n\ttargets found in Makefile.am files.\n\t* macos/configure: Generate targets from top configure script and\n\tMakefile.am files. Made script runnable from Unix for testing.\n\t* macos/README: Notes about search paths for includes, contributed\n\tby Marco Bambini.\n\t* configure.in: Added comment about lines that the \"macos/configure\"\n\tscript depend on.\n\n2000-07-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/powm.c (mpz_powm): After final mpz_redc call, subtract `mod'\n\tfrom result if it is greater than `mod'.\n\n2000-07-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/hppa/gmp-mparam.h: Fill in values from `make tune' run.\n\t* mpn/alpha/gmp-mparam.h: Likewise.\n\t* mpn/powerpc32/gmp-mparam.h: Likewise.\n\n\t* tune/hppa.asm: New file.\n\t* tune/hppa2.asm: New file.\n\t* configure.in (SPEED_CYCLECOUNTER_OBJS): Set for hppa2*-*-* and\n\thppa*-*-*.\n\t* tune/Makefile.am (EXTRA_DIST): Add hppa.asm and hppa2.asm.\n\n\t* tune/speed.h (SPEED_ROUTINE_MPN_BZ_DIVREM_CALL): Declare `marker';\n\tinvoke TMP_FREE.\n\n\t* mpn/hppa/hppa1_1/udiv_qrnnd.S: Use \"%\" instead of \"'\" for\n\treloc/symbol delimiter.\n\n2000-07-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/gmp-mparam.h: Update with output from tune utility.\n\t* mpn/powerpc64/copyi.asm: New file.\n\t* mpn/powerpc64/copyd.asm: New file.\n\n2000-07-16  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/*: Add measuring for umul_ppmm and udiv_qrnnd.\n\n2000-07-14  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/k6/k62mmx: New directory.\n\t* configure.in (k6[23]*-*-*): Use it.\n\t* mpn/x86/k6/k62mmx/copyi.asm, mpn/x86/k6/k62mmx/copyd.asm: Move from\n\tmmx directory, improve code alignment a bit.\n\t* mpn/x86/k6/k62mmx/lshift.asm, mpn/x86/k6/k62mmx/rshift.asm: Ditto,\n\tand improve addressing modes for pre-CXT cores.\n\t* mpn/x86/x86-defs.m4 (Zdisp): Add an instruction.\n\t* mpn/x86/k6/mmx/lshift.asm, mpn/x86/k6/mmx/rshift.asm: New files,\n\tsuiting plain K6.\n\t* mpn/x86/README, mpn/x86/k6/README: Updates.\n\t* mpn/x86/k6/mmx/*.asm: Update some comments.\n\n\t* mpn/tests/Makefile.am: Use $(MAKE) in .asm rules, not \"m\".\n\t* tune/Makefile.am: Use $(EXEEXT) and libtool --config objdir, for\n\tthe benefit of djgpp.\n\n\t* */Makefile.in: Regenerate with patched automake that adds\n\t$(EXEEXT) to EXTRA_PROGRAMS.\n\n\t* mpn/tests/try.c: Add #ifdef to SIGBUS, for the benefit of djgpp.\n\t* config.guess: Recognise pc:*:*:* as an x86, for djgpp.\n\n\t* configure: Regenerate with patched autoconf to fix temp file\n\t\".hdr\" which is invalid on a DOS 8.3 filesystem, and to fix two\n\tsed substitutes that clobbered a \":\" in $srcdir (eg. a DOS drive\n\tspec).\n\n\t* mpz/tests/io.c: Use one fp opened \"w+\", since separately opened\n\tinput and output doesn't work on MS-DOS 6.21.\n\n\t* tests/rand/Makefile.am (allprogs): Pseudo-target to build everything.\n\t(CLEANFILES): Add EXTRA_PROGRAMS and EXTRA_LTLIBRARIES.\n\t(manual-test, manual-bigtest): Add $(EXEEXT) to\tdependencies.\n\n\t* tests/rand/*/Makefile.in: Regenerate with patched automake that adds\n\t$(EXEEXT) to EXTRA_PROGRAMS.\n\n2000-07-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/tests/t-root.c: Also test mpz_perfect_power_p.\n\tGenerate `nth' so that there will be fewer trivial values.\n\n\t* mpz/root.c: Reverse return value in tests for detecting root of +1\n\tand -1.\n\n\t* mpz/perfpow.c: Use TMP_ALLOC interface.\n\n2000-07-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/perfpow.c (primes): Make it const.\n\n2000-07-06  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/k6/cross.pl: New file.\n\n\t* mpn/x86/*/gmp-mparam.h: Updates to thresholds, conditionalize\n\tall _TIME defines.\n\t* mpn/x86/pentium/mmx/gmp-mparam.h: New file.\n\t* mpn/sparc64/gmp-mparam.h: Update thresholds.\n\t* mpn/sparc32/v9/gmp-mparam.h: Ditto.\n\n2000-07-04  Kevin Ryde  <kevin@swox.se>\n\n\t* NEWS: Updates.\n\t* mpn/x86/*/README: Miscellaneous updates.\n\n\t* tune/speed-ext.c: New file.\n\t* tune/Makefile.am: Add it.\n\t* tune/README: Updates.\n\t* tune/speed.h (SPEED_ROUTINE_MPN_DIVREM_2): Bug fixes.\n\n\t* demos/calc.y,calclex.l: New files.\n\t* demos/calc.c,calc.h,calclex.c: New files, generated from .y and .l.\n\t* demos/Makefile.am: Add them.\n\n\t* gmp.h (mpq_swap, mpf_swap): Add prototypes and defines.\n\n2000-07-01  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (ABI and ISA): New section, bringing together ABI notes.\n\t(Build Options): Add MPN_PATH, various updates.\n\t(Build Options): Add note on setting CFLAGS when setting CC.\n\t(Notes for Particular Systems): Add -march=pentiumpro problem.\n\t(Known Build Problems): Note on gmp-mparam.h for 64-bit generic C.\n\t(GMP Variable Conventions): Add some info on user defined functions.\n\t(Reporting Bugs): Minor rewording.\n\n\t* configure.in (MPN_PATH): Renamed from mpn_path.\n\n\t* gmp-impl.h (ULONG_MAX,ULONG_HIGHBIT,...,SHORT_MAX): New defines.\n\t* mp[zf]/tests/t-misc.c: Use them.\n\n\t* mpbsd/tests/t-misc.c: New file.\n\t* mpbsd/tests/Makefile.am: Add it.\n\n\t* Makefile.am (LIBGMP_LT_*, LIBMP_LT_*): Bump version info.\n\t* gmp.h (__GNU_MP_VERSION_*): Bump to 3.1.\n\n\t* mpf/tests/Makefile.am (AUTOMAKE_OPTIONS): Add ansi2knr.\n\n\t* Makefile.am (libmp_la_SOURCES): Add mp_set_fns.c, accidentally\n\tomitted in gmp 3.0.x.\n\t* gmp.texi (Custom Allocation): Note this is available in mpbsd,\n\tand some minor rewording.\n\n2000-06-30  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/factorize.c (random): New function, defined conditionally.\n\t(factor_using_pollard_rho): Use it, not mrand48.\n\n\t* mpn/cray/README: New file.\n\n2000-06-30  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium/aorsmul_1.asm: Add MULFUNC_PROLOGUE.\n\n\t* mpz/tests/t-jac.c: Test limbs on mpn_jacobi_base, not just ulongs.\n\n\t* gmp-impl.h, mpn/tests/try.c, mpn/tests/spinner.c, tune/speed.c:\n\tUse config.h unconditionally, not under HAVE_CONFIG_H.\n\n\t* demos/pexpr.c [__DJGPP__]: Patch by Richard Dawe to not use\n\tsetup_error_handler on djgpp.\n\n\t* tune/*: Locate data to help direct-mapped caches, add measuring\n\tof mpz_init/clear, mpz_add and mpz_bin_uiui, various cleanups.\n\t* configure.in (AC_CHECK_FUNCS): Add popen.\n\n2000-06-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/mul_2exp.c: Streamline criterion for whether to use mpn_lshift or\n\tmpn_rshift.  Increase precision when exp is a multiple of\n\tBITS_PER_MP_LIMB primarily to make exp==0 be a noop.\n\t* mpf/div_2exp.c: Analogous changes.\n\n\t* mpf/tests/t-dm2exp.c: Set u randomly in loop.  Perform more\n\tmpf_mul_2exp testing.\n\n\t* configure.in: Recognize cray vector processors with a broad `*';\n\tmove after alpha* not to match that.\n\n2000-06-28  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/tests/io.c: Use a disk file, not a pipe, switch to ansi2knr\n\tstyle, switch from MP_INT to mpz_t, add a couple of error checks.\n\t* mpz/tests/Makefile.am (CLEANFILES): Add io.tmp, in case io.c fails.\n\n2000-06-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/tests/t-get_d.c: Be more lax about relative error, to handle Cray\n\tfloating point format.\n\n\t* mpq/tests/t-get_d.c: Decrease default reps to 1000.\n\n\t* mpf/tests/t-conv.c: Correct type of `bexp'.\n\n\t* configure.in (cray vector machines): Don't inherit gmp_cflags_cc.\n\n\t* tune/Makefile.am (EXTRA_DIST): Delete sparc64.asm.\n\n\t* configure.in (cray vector machines): Set extra_functions.\n\n\t* mpn/cray/mulww.f: New file with vectorizing cray code.\n\t* mpn/cray/mulww.s: Generated from mulww.f.\n\t* mpn/cray/mul_1.c: New file.\n\t* mpn/cray/addmul_1.c: New file.\n\t* mpn/cray/submul_1.c: New file.\n\t* mpn/cray/add_n.c: New file.\n\t* mpn/cray/sub_n.c: New file.\n\n2000-06-26  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_CHECK_ASM_ALIGN_FILL_0x90): Fix so it actually\n\tdetects solaris 2.6, and also suppress warning on solaris 2.8.\n\t* configure.in (SPEED_CYCLECOUNTER): Remove spurious \"athlon\" from\n\tsparc case.\n\n\t* mpn/lisp/gmpasm-mode.el: Move keymap to the top of the docstring.\n\n2000-06-21  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/mul_n.c (mpn_kara_mul_n, mpn_kara_sqr_n): Use\n\tmp_size_t for n2.\n\t(mpn_toom3_mul_n, mpn_toom3_sqr_n): Use mp_size_t for size\n\tparameters and \"l\" variables.\n\t* gmp-impl.h (mpn_toom3_mul_n, mpn_toom3_sqr_n): Update prototypes.\n\n\t* mpbsd/itom.c, mpbsd/sdiv.c: Add casts for correct handling of\n\t-0x80...00 on systems with sizeof(short)==sizeof(int).\n\n\t* mpz/tests/t-misc.c: Move \"bin\" test from here ...\n\t* mpz/tests/t-bin.c: ... to here, and add a new (2k,k) test too.\n\t* mpz/tests/Makefile.am (check_PROGRAMS): Add t-bin.\n\n\t* mpz/bin_ui.c [_LONG_LONG_LIMB]: Use mpn_divrem_1, since kacc is\n\ta limb not a ulong.\n\t* mpz/bin_uiui.c [_LONG_LONG_LIMB]: Ditto, and use mpn_mul_1 too,\n\tsince nacc is a limb.\n\n\t* mpf/tests/t-misc.c (check_mpf_set_si, check_mpf_cmp_si):\n\tNew file, testing mpf_set_si, mpf_init_set_si, and mpf_cmp_si.\n\t* mpf/tests/Makefile.am (check_PROGRAMS): Add it.\n\n\t* mpz/tests/t-misc.c (check_mpz_set_si, check_mpz_cmp_si):\n\tNew tests, for mpz_set_si, mpz_init_set_si, and mpz_cmp_si.\n\n\t* mpz/set_si.c, mpz/iset_si.c, mpz/cmp_si.c [_LONG_LONG_LIMB]: Fix\n\thandling of -0x80..00.\n\t* mpf/set_si.c, mpf/iset_si.c, mpf/cmp_si.c [_LONG_LONG_LIMB]: Ditto.\n\n2000-06-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/primes.c: Properly handle arguments `m +n'.\n\n2000-06-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.sub: Recognize k5 and k6 with common pattern.\n\n\t* mpq/tests/t-get_d.c: Also test mpq_set_d.  Misc improvements.\n\n\t* mpq/set_d.c: Special case 0.0.  Don't call mpn_rshift with 0 count.\n\tAllocate correct amount of memory for numerator.  Delete spurious\n\tASSERT_ALWAYS(1).\n\n2000-06-17  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/perfsqr.c: Fix so that zero is considered a perfect square.\n\t(Was wrongly calling mpn_perfect_square_p with size==0.)\n\n2000-06-16  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in: Set k5*-*-* to use basic i386 code until there's\n\tsomething specific.  Add path=x86 as a default for x86s.\n\n\t* acinclude.m4 (GMP_CHECK_ASM_ALIGN_LOG): Generate\n\tALIGN_LOGARITHMIC setting, not a full ALIGN definition.\n\t(GMP_CHECK_ASM_ALIGN_FILL_0x90): New test.\n\t* configure.in [x86-*-*]: Use GMP_CHECK_ASM_ALIGN_FILL_0x90.\n\t* mpn/asm-defs.m4 (ALIGN): New macro.\n\t* mpn/x86/x86-defs.m4 (ALIGN): Remove supplementary definition.\n\n\t* tune/*: Plain \"unsigned\" for speed_cyclecounter.\n\t* configure.in: Use tune/sparcv9.asm for 32 and 64 bit modes.\n\t* tune/sparc64.asm: Remove file.\n\n2000-06-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/k7/mmx/copyi.asm: Use `testb' instead of `test'.\n\t* mpn/x86/k7/mmx/copyd.asm: Likewise.\n\n\t* mpn/x86/k7/mmx/lshift.asm: Avoid using `~' (Solaris as problems).\n\t* mpn/x86/k7/mmx/rshift.asm: Likewise.\n\t* mpn/x86/k6/aors_n.asm: Likewise.\n\t* mpn/x86/k7/aors_n.asm: Likewise.\n\t* mpn/x86/k7/mul_basecase.asm: Likewise.\n\n2000-06-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/sparcv9.asm: Tune, deleting two instructions.\n\n\t* tune/alpha.asm: Update to unified speed_cyclecounter.\n\n2000-06-11  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/tests/reuse.c (FAIL): Add a K&R version.\n\tUse _PROTO on some typedefs.\n\t* mpz/tests/t-misc.c: Add gmp-impl.h for \"const\".\n\n\t* configure.in: Rework mpn multi-function and optional files.\n\tNames standardized, no need for explicit declarations, all picked\n\tup in one $path traversal.\n\t* doc/configuration: Updates.\n\n\t* tests/rand/t-rand.c (main): Change \"usage\" to work with K&R.\n\n2000-06-10  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium/mmx/popham.asm, mpn/x86/p6/mmx/popham.asm,\n\tmpn/x86/p6/p3mmx/popham.asm, mpn/x86/p6/diveby3.asm: Add\n\tMULFUNC_PROLOGUE for correct HAVE_NATIVE_* matching.\n\n\t* mpn/x86/x86-defs.m4 (cmov_bytes_tttn): Use eval() on expressions.\n\t(cmov_available_p): Switch to list CPUs which do have cmov.\n\t* mpn/x86/p6/sqr_basecase.asm, mpn/x86/k6/sqr_basecase.asm,\n\tmpn/x86/k7/sqr_basecase.asm: Use eval() for multiplication.\n\t* mpn/x86/README.family: Various updates.\n\n2000-06-09  Kevin Ryde  <kevin@swox.se>\n\n\t* mpbsd/tests/allfuns.c (main): Call exit() instead of doing return.\n\n\t* doc/tasks.html, doc/projects.html: Moved from projects directory.\n\t* doc/multiplication: New file.\n\t* Makefile.am (EXTRA_DIST): Remove projects, add doc.\n\n\t* Makefile.am (libgmp_la_LIBADD, libmp_la_LIBADD): Remove\n\tunnecessary -lm.\n\t* INSTALL: Remove -lm from instructions.\n\t* demos/Makefile.am (qcn_LDADD): Add -lm.\n\n\t* tune/*: Add measuring for mpn_divrem_2 and modlimb_invert,\n\timprove addsub_n.  Switch to unified speed_cyclecounter.\n\t* configure.in: Update configs for speed_cyclecounter.\n\n\t* gmp-impl.h (MP_LIMB_T_MAX, MP_LIMB_T_HIGHBIT): New macros.\n\t* mpn/generic/diveby3.c, mpn/generic/mul_n.c, mpn/generic/gcd.c,\n\ttune/speed.c, mpn/tests/ref.c: Use them.\n\n\t* mpn/tests/spinner.c: Remove setitimer, just alarm is enough.\n\t* configure.in (AC_CHECK_FUNCS): Remove setitimer.\n\t* mpn/tests/x86call.asm: Start with junk in %eax, %ecx, %edx.\n\t* mpn/tests/ref.[ch] (refmpn_addsub_nc): New function.\n\t* mpn/tests/try.c: Add some support for mpn_addsub_nc.\n\t* mpn/tests/Makefile.am (EXTRA_PROGRAMS): Remove addsub_n and\n\taddsub_n_2 which don't currently build.\n\t* mpn/tests/copy.c: Test MPN_COPY_INCR, not __gmpn_copy.\n\n\t* tests/rand/Makefile.am (libstat_la_LIBADD): Add -lm, no longer on\n\tlibgmp.la.\n\t(findlc_LDADD): Use libstat.la.\n\t(AUTOMAKE_OPTIONS): Use ansi2knr.\n\n2000-06-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* configure.in (alpha*-*-osf*): Default `flavour' to ev6 for ev6 and\n\thigher.\n\t(alpha*-*-*): Likewise.\n\t(alpha*-*-osf*: gmp_optcflags_cc): Move -arch/-tune flags from\n\tgmp_xoptcflags_gcc.\n\n\t* mpn/Makefile.am (TARG_DIST): Add pa64w.\n\n\t* longlong.h: Wrap 64-bit hppa code in #ifndef LONGLONG_STANDALONE.\n\n2000-06-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/remove.c: Fail for `src' being zero.\n\n\t* mpz/tests/reuse.c: Test more functions.\n\t(FAIL): New define.\n\n\t* mpz/tests/t-powm.c: Loop during operand generation while they\n\tare mathematically ill-defined (used to just skip such tests).\n\n\t* mpz/powm.c (mpz_redc): Clean up argument declarations.\n\n\t* configure.in (gmp_cflags64_gcc): Don't add bogus -mWHAT option.\n\t(sparcv9-*-solaris2.[7-9]], gmp_cflags64_gcc):\n\tInherit from previous gmp_cflags64_gcc; pass `-m64 -mptr64'.\n\t(ia64*-*-*): New.\n\n\t* mpn/generic/dump.c: Make it work when an mp_limb_t is not `long'.\n\n\t* mpf/set_prc.c: MPN_COPY => MPN_COPY_INCR.\n\n2000-06-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul_n.c (mpn_toom3_mul_n, mpn_toom3_sqr_n):\n\tUse mpn_incr_u for final carry propagation.\n\n\t* mpz/tests/t-gcd.c: Add calls to mpz_gcdext with argument t == NULL.\n\n\t* mpz/tests/reuse.c: Major rewrite; test many more functions.\n\n\t* mpz/powm_ui.c: When exp is 0, change res assign order in order\n\tto handle argument overlap.\n\t* mpz/powm.c: When exp is 0, change res assign order in order\n\tto handle argument overlap.  Handle negative exp and mod arguments.\n\n\t* mpz/gcdext.c: Rework code after mpn_gcdext call to handle\n\targument overlap.\n\n\t* mpz/fdiv_qr.c: Read dividend->_mp_size before calling mpz_tdiv_qr\n\tin order to handle argument overlap.\n\t* mpz/cdiv_qr.c: Likewise.\n\n\t* mpf/tests/reuse.c: Fix typo that effectively disabled `dis_funcs'\n\ttests.  Clean up test for mpf_ui_div.\n\n2000-06-06  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/p6/sqr_basecase.asm: New file.\n\t* mpn/x86/mod_1.asm: Avoid one conditional jump.\n\t* mpn/x86/p6/gmp-mparam.h: Update thresholds, #ifndef UMUL_TIME\n\tand UDIV_TIME, add COUNT_TRAILING_ZEROS_TIME.\n\n\t* mp_minv_tab.c: New file.\n\t* Makefile.am (libgmp_la_SOURCES, libmp_la_SOURCES): Add it.\n\t* gmp-impl.h (modlimb_invert): New macro.\n\t* mpz/powm.c: Remove mpz_dmprepare, use modlimb_invert instead.\n\t* mpn/generic/bdivmod.c: Use modlimb_invert instead of a loop.\n\t* mpn/generic/gcd.c: Inline two small mpn_bdivmod calls, use\n\tMPN_COPY_INCR not MPN_COPY in one place.\n\n2000-06-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/tests/reuse.c (dsi_funcs): Add mpf_mul_2exp and mpf_div_2exp.\n\t(main): Clean up test for mpf_div_ui.\n\n\t* mpf/mul_2exp.c: Correct criterion for whether to use mpn_lshift or\n\tmpn_rshift.  MPN_COPY => MPN_COPY_INCR.  Coerce the two assignments to\n\tr->_mp_size.\n\n\t* mpf/div_2exp.c: Use mpn_rshift instead of mpn_lshift when overlap\n\tso requires.  MPN_COPY => MPN_COPY_INCR.\n\n\t* mpf/tests/t-dm2exp.c: Correct type of res_prec.\n\n2000-06-04  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/bin_uiui.c: Fix result for n==0 and n==k.\n\t* mpz/bin_ui.c: Fix result for k>n, add support for n<0.\n\t* gmp.texi (Number Theoretic Functions): Update mpz_bin_ui to\n\tnote n<0 is supported.\n\n\t* mpz/tests/t-misc.c: New file.\n\t* mpz/tests/Makefile.am (check_PROGRAMS): Add it.\n\n2000-05-31  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/speed.* (FLAG_R_OPTIONAL): New option for routines, use on\n\tmpn_gcd_1 and mpn_mul_basecase.\n\t* tune/README: Update.\n\n\t* tune/alpha.asm: New file, by Torbjorn.\n\t* tune/Makefile.am (EXTRA_DIST): Add it.\n\t* configure.in (alpha*-*-*): Use it.\n\n2000-05-31  Linus Nordberg  <linus@swox.se>\n\n\t* doc/configuration: New file.\n\n2000-05-30  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul_basecase.c: Call mpn_mul_2 and mpn_addmul_2\n\tif available.  Don't include longlong.h.\n\n\t* doc/isa_abi_headache: New file.\n\n2000-05-30  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in (NM): Use AC_PROG_NM rather than AC_CHECK_TOOL to\n\tfind `nm'.  (AC_PROG_NM comes with Libtool and is needed to get\n\tthe `-B' option (BSD compatible output) included in $NM.)\n\t(AR): Use AC_CHECK_PROG rather than AC_CHECK_TOOL to find `ar'.\n\t(Now that NM isn't a cross compilation tool, don't give the\n\timpression that we know how to cross compile.)\n\t(CCAS): Remove spurious comment.\n\n\t* gmp.texi (Notes for Particular Systems): Remove comment about\n\tusing GNU `nm' on AIX since system nm now works.\n\n2000-05-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/power/mul_1.s: Remove [PR] from first word in function\n\tdescriptor.\n\t* mpn/power/addmul_1.s: Likewise.\n\t* mpn/power/submul_1.s: Likewise.\n\n2000-05-28  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in, tune/*: Change pentium rdtsc cycle scheme to\n\tHAVE_SPEED_CYCLECOUNTER and SPEED_CYCLECOUNTER_OBJS.\n\t* tune/pentium.asm: Renamed and converted from rdtsc.asm.\n\t* tune/sparcv9.asm: New file, by Torbjorn.\n\t* tune/sparc64.asm: New file.\n\t* tune/tuneup.c: Put a limit on gcdext search.\n\n\t* gmp.h (mp_set_memory_functions): Add extern \"C\".\n\t* mp.h (__GNU_MP__): Bump to \"3\".\n\t* mpz/add.c,mul.c,powm.c,sub.c,sqrtrem.c,tdiv_qr.c [BERKELEY_MP]:\n\tInclude mp.h for mpbsd compile.\n\t* mpz/gcd.c: Ditto, and remove _mpz_realloc declaration.\n\n\t* gmp.texi (Integer Functions): Flatten @subsections into @sections.\n\t(Floating-point Functions): Ditto.\n\t(Integer Random Numbers): Split from miscellaneous as a sep section.\n\t(Installing GMP): Make nodes for the sections.\n\tAdd more \"@cindex\"s.\n\t(Known Build Problems): Remove SunOS get_d problem, believed fixed.\n\t(Notes for Particular Systems): Remove HPPA note since now PIC.\n\t(References): URL for Jebelean.\n\n2000-05-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa64w: New directory, contents based on corresponding mpn/pa64\n\tfiles.\n\t* configure.in (hppa2.0w-*-*): New.\n\t* mpz/tests/io.c (_INCLUDE_POSIX_SOURCE): Define when __hpux before\n\tincluding stdio.h.\n\t* gmp-impl.h: Always define DItype and UDItype.\n\n2000-05-27  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/common.c (speed_measure): Correction to array sorting,\n\tbetter diagnostic when measuring fails.\n\t* tune/time.c: Add microsecond accurate getrusage method.\n\n\t* tune/time.c (speed_cpu_frequency_processor_info): New function.\n\t* configure.in (AC_CHECK_FUNCS): Add processor_info.\n\n2000-05-26  Linus Nordberg  <linus@swox.se>\n\n\t* gmp.texi (Installing GMP): Shared libraries work for AIX < 4.3\n\tif using GNU nm.\n\n2000-05-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* tune/tuneup.c (SIGNED_TYPE_MAX): Shift `-1' instead of `1' to\n\tavoid signed overflow.\n\n\t* demos/pexpr.c (setup_error_handler): Don't call sigaltstack on\n\tUnicos.\n\n2000-05-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* insert-dbl.c: Work around GCC 2.8 bug.\n\t* extract-dbl.c: Likewise.\n\n\t* config.sub: Allow i586, i686, i786 again.\n\n\t* config.guess: Use X86CPU for lots more systems.\n\n2000-05-25  Linus Nordberg  <linus@swox.se>\n\n\t* mpbsd/tests/dummy.c (main): Call exit() instead of doing return\n\t(some old SysV machines don't get this correct, I've heard.)\n\n2000-05-25  Kevin Ryde  <kevin@swox.se>\n\n\t* mpf/iset_str.c: Initialize _mp_size and _mp_exp to 0, in case no\n\tdigits in string, so it's the same as a separate init and set_str.\n\n2000-05-24  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/tests/reuse.c: Use mpz_random2 instead of mpz_random.\n\n\t* mpz/divexact.c: Read pointers after reallocation.\n\tCompare `quot' and `den' instead of `qp' and `dp' in overlap check.\n\tUse MPN_COPY_INCR for copying from `np'.\n\n\t(*-*-aix4.[3-9]*): Disable shared libs just for problematic AIX\n\tversions.\n\t* configure.in (*-cray-unicos*): Disable asm syntax checking; set\n\tcompiler explicitly.\n\t* configure.in (hppa*-*-*): Remove code disabling shared libs.\n\n2000-05-24  Linus Nordberg  <linus@swox.se>\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): Don't report progress to user\n\twhen doing the AIX specific test to avoid \"nested output\".\n\n2000-05-22  Kevin Ryde  <kevin@swox.se>\n\n\t* mp.h (_PROTO): Copy from gmp.h, use on prototypes.\n\tAdd extern \"C\" too.\n\t* mpbsd/tests/Makefile.am (AUTOMAKE_OPTIONS): Enable ansi2knr.\n\t* mpbsd/tests/allfuns.c: Don't execute mout, just link to it.\n\t(main): ANSI style definition.\n\n\t* gmp-impl.h (MP_BASE_AS_DOUBLE): Change the expression to\n\tsomething that works on SunOS native cc.  Seems to fix the\n\tmp*_get_d problems.\n\n\t* mpn/tests/ref.c (refmpn_strip_twos): Use MPN_COPY_INCR.\n\t* mpn/tests/Makefile.am: Let .asm.o rules work with absolute $srcdir.\n\n2000-05-21  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/k7/sqr_basecase.asm: Replace file with K7 specific code.\n\t* mpn/x86/k7/README: Update.\n\t* mpn/x86/k7/gmp-mparam.h: Tune thresholds.\n\t(COUNT_TRAILING_ZEROS_TIME): New define.\n\t* mpn/x86/k6/gmp-mparam.h: Ditto.\n\n\t* mpn/x86/pentium/mmx/popham.asm: New file (include_mpn of K6 version).\n\t* mpn/x86/p6/diveby3.asm: New file (include_mpn of P5 version).\n\t* mpn/x86/p6/mmx/popham.asm: New file (include_mpn of K6 version).\n\t* mpn/x86/p6/p3mmx/popham.asm: New file (include_mpn of K7 version).\n\t* configure.in (pentium3-*-*): Add p3mmx to $path.\n\n\t* gmp.texi (Integer Arithmetic): Clarify mpz_jacobi op2; add\n\tmpz_*_kronecker_*.\n\t(Miscellaneous Integer Functions): Add mpz_odd_p and mpz_even_p.\n\t(Low-level Functions): Put mpn_divmod_1 with mpn_divrem_1 and note\n\tit's now a macro.\n\t(References): Add Henri Cohen.\n\n\t* gmp.h (mpn_addmul_1c, mpn_divrem_1c, mpn_mod_1c, mpn_mul_1c,\n\tmpn_submul_1c): Add prototypes.\n\t(mpz_odd_p, mpz_even_p): New macros.\n\n\t* mpn/asm-defs.m4 (m4wrap_prepend): New macro.\n\t(m4_error): Use it.\n\t(m4_not_for_expansion): Corrections to OPERATION symbols.\n\tMore comments about variations between m4 versions.\n\t* mpn/x86/x86-defs.m4 (PROLOGUE): Use m4wrap_prepend (fixes error\n\texit under BSD m4, previously m4_error printed the message but the\n\texit code was 0).\n\n\t* gmp.h (mpn_divmod_1): Change to a macro calling mpn_divrem_1.\n\t* mpn/generic/divrem_1.c: Move divmod_1.c code to here, make it\n\tstatic and call it __gmpn_divmod_1_internal.\n\t* mpn/generic/divmod_1.c: Remove file.\n\t* configure.in (gmp_mpn_functions): Remove divmod_1.\n\t* mpn/asm-defs.m4 (define_mpn): Remove divmod_1 and divmod_1c.\n\t* compat.c (mpn_divmod_1): Add compatibility function.\n\t* tune/*: Remove mpn_divmod_1 measuring (leave just divrem_1).\n\n\t* acconfig.h (HAVE_NATIVE_mpn_*): Add some missing carry-in\n\tvariants, remove divmod_1.\n\n\t* mpn/x86/diveby3.asm: Use imul, update comments.\n\n\t* demos/qcn.c: New file.\n\t* demos/Makefile.am (EXTRA_PROGRAMS): Add it.\n\n\t* mpz/tests/t-jac.c: New file.\n\t* mpz/tests/Makefile.am (check_PROGRAMS): Add it. Enable ansi2knr.\n\n\t* mpz/kronsz.c: New file.\n\t* mpz/kronuz.c: New file.\n\t* mpz/kronzs.c: New file.\n\t* mpz/kronzu.c: New file.\n\t* mpz/Makefile.am (libmpz_la_SOURCES): Add them.\n\t* Makefile.am (MPZ_OBJECTS): Add them.\n\t* gmp-impl.h (JACOBI_*, MPN_STRIP_LOW_ZEROS_NOT_ZERO): New macros.\n\t* gmp.h (mpz_*_kronecker_*): New defines and prototypes.\n\n\t* mpn/generic/jacbase.c: New file.\n\t* mpn/generic/mod_1_rs.c: New file.\n\t* configure.in (gmp_mpn_functions): Add them.\n\t* gmp.h (mpn_jacobi_base, mpn_mod_1_rshift): New defines and\n\tprototypes.\n\t* longlong.h (COUNT_TRAILING_ZEROS_TIME): New define.\n\t* mpn/tests/ref.c (refmpn_mod_1_rshift): New function.\n\t* mpn/tests/try.c: Add mpn_mod_1_rshift.\n\t* tune/*: Add measuring for mpn_jacobi_base.\n\n\t* acinclude.m4 (GMP_FINISH): Add ifdefs to allow multiple\n\tinclusion of config.m4.\n\t(GMP_PROG_M4): Put \"good\" message through to config.log.\n\n\t* mpz/powm.c: Use a POWM_THRESHOLD for where redc stops.\n\t* tune/*: Add mpz_powm measuring, and tune POWM_THRESHOLD.\n\t* gmp-impl.h [TUNE_PROGRAM_BUILD] (POWM_THRESHOLD): Conditional\n\tredefinition for use when tuning.\n\n\t* mpz/powm_ui.c: Use DIVIDE_BY_ZERO.\n\n\t* mpz/iset_str.c: Initialize _mp_size to 0, in case no digits in\n\tstring; this makes it the same as a separate init and set_str.\n\n2000-05-20  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/asm-defs.m4: Note &,|,^ aren't bitwise in BSD m4 eval().\n\t* mpn/x86/k6/sqr_basecase.asm: Use \"%\" not \"&\" in m4 eval()s.\n\n\t* mpn/x86/x86-defs.m4 (Zdisp): Yet more instruction forms.\n\n2000-05-19  Linus Nordberg  <linus@swox.se>\n\n\t* acinclude.m4 (GMP_CHECK_CC_64BIT): Don't use shell variable\n\t`ac_compile' for our own compile command string since other\n\tAutoconf macros may depend on it.\n\n2000-05-19  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/mul_n.c (mpn_toom3_mul_n, mpn_toom3_sqr_n): Fix\n\tcarry propagation in final coefficient additions.\n\n2000-05-18  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in: Set NM before looking for compiler since\n\tGMP_CHECK_CC_64BIT needs it.\n\n\t* acinclude.m4 (GMP_CHECK_CC_64BIT): Don't execute on target.\n\t(GMP_PROG_CC_FIND): Before checking if the compiler knows how to\n\tproduce 64-bit code, verify that it works at all.  The background\n\tis that /usr/ucb/cc on Solaris 7 successfully compiles in 64-bit\n\tmode but fails when doing final link.\n\t(GMP_PROG_CC_WORKS): Report to user what's happening.\n\n2000-05-17  Linus Nordberg  <linus@swox.se>\n\n\t* config.guess: Use X86CPU for x86 Cygwin.\n\n2000-05-16  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/p6/mmx/divrem_1.asm: New file.\n\t* mpn/x86/p6/mmx/mod_1.asm: New file.\n\t* mpn/x86/p6/README: Update.\n\t* mpn/x86/divrem_1.asm: Update comments.\n\t* mpn/x86/mod_1.asm: Ditto.\n\n2000-05-14  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/speed.h: Run gcd functions on a set of data.\n\n\t* mpn/tests/try.c: New file.\n\t* mpn/tests/try.h: New file.\n\t* mpn/tests/spinner.c: New file.\n\t* mpn/tests/trace.c: New file.\n\t* mpn/tests/x86call.asm: New file.\n\t* mpn/tests/x86check.c: New file.\n\t* mpn/tests/ref.c (refmpn_hamdist): Allow size==0.\n\t(refmpn_gcd): New function, and other additions supporting it.\n\t* mpn/tests/ref.h: More prototypes.\n\t* mpn/tests/Makefile.am: Add try program, use ansi2knr.\n\n\t* mpn/x86/k7/mmx/popham.asm: New file.\n\t* mpn/x86/k6/mmx/popham.asm: New file.\n\t* mpn/x86/k6/sqr_basecase.asm: Unroll the addmul, for approx 1.3x\n\tspeedup above 15 limbs.\n\t* mpn/x86/k7/README: Update.\n\t* mpn/x86/k6/README: Update, and add notes on plain K6 and pre-CXT\n\tK6-2 problems.\n\t* configure.in (k6*-*-*, athlon-*-*): Add popham.\n\n\t* mpn/x86/pentium/diveby3.asm: New file.\n\t* mpn/x86/pentium/README: Update.\n\n\t* gmp.texi (Installing GMP): Add note on bad OpenBSD 2.6 m4.\n\t(Reporting Bugs): Ask for config.m4 if asm file related.\n\t(I/O of Rationals): New section, add mpq_out_str.\n\t(References): Add url for on-line gcc manuals.\n\tA few node and menu updates.\n\n\t* INSTALL: Better command line argument checking for test progs.\n\tChange MP -> GMP.\n\n\t* configure.in (WANT_ASSERT, USE_STACK_ALLOC, HAVE_PENTIUM_RDTSC):\n\tPut descriptions here, not in acconfig.h.\n\t(CALLING_CONVENTIONS_OBJS): New AC_SUBST (for mpn/tests/try).\n\t(HAVE_CALLING_CONVENTIONS): New AC_DEFINE.\n\t(AC_CHECK_HEADERS): Add sys/time.h.\n\t(AC_CHECK_FUNCS): Add getpagesize, setitimer.\n\t(KARATSUBA_SQR_THRESHOLD): Strip trailing comments from the\n\t#define when passing through to config.m4.\n\t* acconfig.h (PACKAGE, VERSION, WANT_ASSERT, USE_STACK_ALLOC,\n\tHAVE_PENTIUM_RDTSC): No need for #undefs, autoheader gets them\n\tfrom configure.in.\n\n\t* acinclude.m4 (GMP_PROG_M4): Check for broken OpenBSD 2.6 m4\n\teval(), put messages into config.log.\n\t* mpn/asm-defs.m4: Add notes and test for OpenBSD 2.6 m4.\n\n\t* mpq/out_str.c: New file.\n\t* mpq/Makefile.am (libmpq_la_SOURCES): Add it.\n\t* Makefile.am (MPQ_OBJECTS): Ditto.\n\t* gmp.h (mpq_out_str): New define and prototype.\n\n2000-05-12  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (CONFIG_TOP_SRCDIR): Fix to use $srcdir not\n\t$top_srcdir (which doesn't exist).\n\t* acinclude.m4 (GMP_C_ANSI2KNR): Fix setting U=_.\n\t* gmp-impl.h (mpn_com_n, MPN_LOGOPS_N_INLINE): Fix missing \"do\"\n\t(not currently used, probably no ill effect anyway).\n\n2000-05-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* randraw.c (lc): Major overhaul (pending rewrite).\n\t(_gmp_rand): Rewrite.\n\n2000-05-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/tests/convert.c: Call free via _mp_free_func.\n\t* mpf/tests/t-conv.c: Likewise.\n\n\t* memory.c: Add code enabled for DEBUG that adds special patterns\n\taround allocated blocks.\n\n2000-05-05  Linus Nordberg  <linus@swox.se>\n\n\t* gmp.texi (Miscellaneous Float Functions): Correct parameter list\n\tfor mpf_urandomb().\n\n\t* configure.in: Invoke AC_REVISION.\n\n2000-05-05  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi: Use @dircategory and @direntry.\n\t(Installing GMP): Clarification for --target, updates on SunOS\n\tproblems.\n\t(Integer Arithmetic): Add mpz_mul_si.\n\t(Initializing Rationals): Add mpq_swap.\n\t(Assigning Floats): Add mpf_swap.\n\t(Low-level Functions): Add mpn_divexact_by3c, and details of what\n\tthe calculation actually gives.\n\t(Low-level Functions): Note extra space needed by mpn_gcdext,\n\tclarify the details a bit.\n\n\t* compat.c: New file, entry points for upward binary compatibility.\n\t(mpn_divexact_by3): Compatibility function.\n\t* Makefile.am (libgmp_la_SOURCES): Add compat.c.\n\n\t* mpn/tests/ref.c: Rearrange macros for ansi2knr.\n\t(div1): Renamed from div to avoid library function.\n\t(refmpn_divexact_by3c, refmpn_gcd_1, refmpn_popcount,\n\trefmpn_hamdist): New functions.\n\t* mpn/tests/ref.h: Add extern \"C\", add new prototypes.\n\n\t* gmp.h (gmp_randinit, etc): Add extern \"C\".\n\t(_mpq_cmp_ui): Fix prototype name from mpq_cmp_ui.\n\t(mpn_divexact_by3): Now a macro calling mpn_divexact_by3c.\n\t(mpn_divexact_by3c): New prototype and define.\n\n\t* mpn/x86/diveby3.asm: Change to mpn_divexact_by3c.\n\t* mpn/x86/k6/diveby3.asm: Ditto.\n\t* mpn/generic/diveby3.c: Ditto.\n\t* mpn/asm-defs.m4: Ditto on the define_mpn.\n\t* acconfig.h (HAVE_NATIVE_mpn_divexact_by3c): New define.\n\n\t* mpq/swap.c: New file, derived from mpz/swap.c.\n\t* mpf/swap.c: Ditto.\n\t* mpq/Makefile.am: Add swap.c.\n\t* mpf/Makefile.am: Ditto.\n\t* Makefile.am: Add two new \"swap.lo\"s.\n\n\t* mpn/x86/k6/mmx/com_n.asm: Fix an addressing bug (fortunately\n\tthis code hasn't been used anywhere yet).\n\n\t* mpn/x86/k7/mmx/divrem_1.asm: New file.\n\t* mpn/x86/k7/mmx/mod_1.asm: New file.\n\t* mpn/x86/k7/diveby3.asm: New file.\n\t* mpn/x86/k7/README: Update.\n\n\t* mpn/x86/k7/aorsmul_1.asm: Use new cmovCC, no object code change.\n\t* mpn/x86/k7/mul_basecase.asm: Ditto.\n\t* mpn/x86/p6/aorsmul_1.asm: Ditto.\n\n\t* mpn/x86/x86-defs.m4 (defframe_empty_if_zero): Eval the argument.\n\t(cmovCC): New macros, replacing individual cmovCC_reg_reg forms.\n\t(Zdisp): Recognise more instructions.\n\t(shldl,etc): Use m4_instruction_wrapper().\n\t(ASSERT, movl_text_address): New macros.\n\n\t* mpn/asm-defs.m4: Add remarks on SunOS /usr/bin/m4 and new\n\tOpenBSD m4.\n\t(m4_assert_numargs_internal_check): Remove a spurious parameter.\n\t(m4_empty_if_zero): Eval the argument.\n\t(m4_assert, m4_assert_numargs_range, m4_config_gmp_mparam,\n\tm4_instruction_wrapper): New macros.\n\n2000-05-04  Linus Nordberg  <linus@swox.se>\n\n\t* gmp.texi (Reporting Bugs): Be explicit about output from running\n\ta command.\n\n2000-05-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/bz_divrem_n.c (mpn_bz_divrem_n): Handle non-zero return\n\tfrom first mpn_bz_div_3_halves_by_2 call.\n\t(mpn_bz_divrem_aux): Likewise.\n\n2000-04-30  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/* (GCD_ACCEL_THRESHOLD, GCDEXT_THRESHOLD): Tune these.\n\n\t* mpn/generic/gcdext.c (GCDEXT_THRESHOLD): Rename from THRESHOLD,\n\tuse with >=, adjust default to 17 accordingly.\n\tUse new *_SWAP macros.\n\n\t* mpn/generic/gcd.c (GCD_ACCEL_THRESHOLD): Rename from\n\tACCEL_THRESHOLD, use with >=, adjust default to 5 accordingly.\n\tUse new *_SWAP macros.\n\n\t* mpf/get_str.c, mpf/set_str.c, mpf/sub.c, mpz/add.c, mpz/ior.c,\n\tmpz/and.c, mpz/sub.c, mpz/xor.c, mpz/ui_pow_ui.c,\n\tmpn/generic/mul.c: Use new *_SWAP macros.\n\n\t* stack-alloc.h: Add extern \"C\" around prototypes.\n\n\t* gmp-impl.h: (MP_PTR_SWAP, etc): New macros.\n\t(_mp_allocate_func, etc): Use _PROTO.\n\t[TUNE_PROGRAM_BUILD]: More changes in tune program build part.\n\n2000-04-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/pa64/add_n.s: Add `,entry' to export directive.\n\t* mpn/pa64/addmul_1.S, mpn/pa64/lshift.s, mpn/pa64/mul_1.S,\n\tmpn/pa64/rshift.s, mpn/pa64/sub_n.s, mpn/pa64/submul_1.S,\n\tmpn/pa64/umul_ppmm.S: Likewise.\n\t* mpn/hppa/hppa1_1/udiv_qrnnd.S: New name for udiv_qrnnd.s.\n\tAdd PIC support.\n\n2000-04-29  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h [TUNE_PROGRAM_BUILD] (TOOM3_MUL_THRESHOLD_LIMIT): New\n\tdefine.\n\t* mpn/generic/mul_n.c [TUNE_PROGRAM_BUILD] (mpn_mul_n): Use\n\tTOOM3_MUL_THRESHOLD_LIMIT, not a hard coded 500.\n\n\t* memory.c: Use <stdlib.h> for malloc etc, and use _PROTO.\n\t* stack-alloc.c: Don't use C++ reserved word \"this\".\n\t* urandom.h: Put extern \"C\" around prototypes.\n\t* mpz/powm.c: Switch a couple of parameters to \"const\", which they\n\tare, to satisfy g++.\n\n\t* randraw.c, stack-alloc.c, mpbsd/mout.c, mpbsd/mtox.c: Add casts to\n\thelp g++.\n\n\t* stack-alloc.c: Provide dual ANSI/K&R function definitions.\n\t* mpz/addmul_ui.c,get_d.c,inp_str.c,perfpow.c,powm.c,pprime_p.c,\n\trrandomb.c,set_str.c,ui_pow_ui.c: Ditto.\n\t* mpf/integer.c,set_str.c: Ditto.\n\t* mpbsd/min.c,xtom.c: Ditto.\n\t* mpn/generic/bz_divrem_n.c,dump.c,gcd_1.c,get_str.c,hamdist.c,\n\tpopcount.c,random.c,random2.c,set_str.c: Ditto.\n\n\t* rand.c: Use <stdio.h> for NULL.\n\t* mpz/gcd_ui.c,gcdext.c,mul.c,perfpow.c,powm_ui.c,root.c,sqrt.c,\n\tsqrtrem.c: Ditto\n\t* mpf/sqrt.c,sqrt_ui.c: Ditto.\n\t* mpn/generic/perfsqr.c,sqrtrem.c: Ditto.\n\n\t* gmp-impl.h (NULL, malloc, realloc, free): Don't define/declare.\n\t(extern \"C\"): Add around function prototypes.\n\t(mpn_kara_mul_n, mpn_kara_sqr_n, mpn_toom3_mul_n, mpn_toom3_sqr_n):\n\tAdd prototypes.\n\t[TUNE_PROGRAM_BUILD] (FIB_THRESHOLD): Add necessary redefinitions for\n\tuse by tune program.\n\t* mpn/generic/mul_n.c: Remove mpn_toom3_mul_n prototype.\n\n\t* acinclude.m4 (GMP_C_ANSI2KNR): New macro.\n\t(GMP_CHECK_ASM_MMX, GMP_CHECK_ASM_SHLDL_CL): Fix to use\n\t$gmp_cv_check_asm_text which is what GMP_CHECK_ASM_TEXT sets.\n\t* configure.in (GMP_C_ANSI2KNR): Use this instead of AM_C_PROTOTYPES,\n\tfor reasons described with its definition.\n\n\t* demos/Makefile.am (ansi2knr): Use $(top_builddir) nor $(top_srcdir).\n\n\t* mpz/fib_ui.c (FIB_THRESHOLD): Rename from FIB_THRES, for consistency.\n\t(FIB_THRESHOLD): Conditionalize so gmp-mparam.h can define a value.\n\t(mpz_fib_bigcase): Use >= FIB_THRESHOLD, same as main mpz_fib_ui.\n\t* tune/tuneup.c,Makefile.am (FIB_THRESHOLD): Tune this.\n\n\t* configure.in (*-*-aix* gmp_m4postinc): Fix setting (don't overwrite\n\ta value just stored).\n\n2000-04-26  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/sparc32/udiv_fp.asm: Use mpn_udiv_qrnnd macro.\n\t* mpn/sparc32/udiv_nfp.asm: Ditto.\n\t* mpn/sparc32/v8/supersparc/udiv.asm: Ditto.\n\t* mpn/sparc32/umul.asm: Name the function mpn_umul_ppmm.\n\t* mpn/sparc32/v8/umul.asm: Ditto.\n\t* mpn/powerpc32/umul.asm: Ditto.\n\n\t* mpn/x86/syntax.h: Remove file, since now unused.\n\n\t* configure.in (x86): Remove -DBROKEN_ALIGN and -DOLD_GAS\n\tpreviously used by .S files.\n\t(x86 extra_functions): Add udiv and umul.\n\t(GMP_PROG_M4): Use this instead of AC_CHECK_PROG(M4,m4,...)\n\t(HAVE_NATIVE_*): Loosen up the regexp to \"PROLOGUE.*\" so as to\n\taccept PROLOGUE_GP on alpha.\n\n\t* acconfig.h (HAVE_NATIVE_mpn_umul_ppmm, udiv_qrnnd, invert_limb):\n\tNew template defines.\n\t* mpn/asm-defs.m4 (mpn_umul_ppmm, mpn_udiv_qrnnd): New define_mpn()s.\n\t* longlong.h (umul_ppmm, udiv_qrnnd): Use a library version if\n\tit's available and an asm macro isn't.\n\t* gmp-impl.h (invert_limb): Ditto.\n\n\t* gmp-impl.h (ASSERT_NOREALLOC): Not a good idea, remove it.\n\n\t* acinclude.m4 (GMP_PROG_M4): New macro.\n\n2000-04-25  Linus Nordberg  <linus@swox.se>\n\n\t* gmp.texi (Random State Initialization): Correct arguments to\n\t`gmp_randinit'.\n\n\t* acinclude.m4 (GMP_VERSION): Change `eval' --> `m4_eval'.  Fix\n\tfrom Kevin.\n\t* aclocal.m4: Regenerate.\n\n2000-04-25  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/aors_n.asm: Remove parentheses around an immediate that\n\tSolaris \"as\" doesn't like, change by Torbjorn.\n\n2000-04-24  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (AC_CHECK_FUNCS): Add strtoul.\n\n\t* mpn/generic/mul_n.c [TUNE_PROGRAM_BUILD] (mpn_mul_n): Bigger\n\tarray for karatsuba temporary space for tune program build.\n\t(mpn_toom3_sqr_n) Remove an unused variable.\n\n\t* demos/Makefile.am (AUTOMAKE_OPTIONS): Add ansi2knr.\n\tAdd \"allprogs:\" pseudo-target.\n\t* demos/factorize.c, demos/isprime.c: Switch to ANSI functions,\n\trely on ansi2knr.\n\n\t* gmp.texi (Getting the Latest Version of GMP): Add reference to\n\tftp.gnu.org mirrors list.\n\t* INSTALL: Add arg count check to example programs.\n\n\t* mpn/x86/*/*.asm: Convert to FORTRAN ... or rather to\n\tFORTRAN-style \"C\" commenting to support Solaris \"as\".\n\t* mpn/x86/x86-defs.m4: Ditto, and add another Zdisp insn.\n\t* mpn/asm-defs.m4 (C): Update comments.\n\t* mpn/x86/README.family: Add a note on commenting, remove\n\tdescription of .S files.\n\n\t* mpn/sparc64/addmul_1.asm, mul_1.asm, submul_1.asm: Use\n\tinclude_mpn().\n\n2000-04-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.sub: Merge with FSF version of April 23.\n\n\t* mpn/powerpc32: Use dnl/C instead of `#' for comments.\n\n\t* config.guess: Get \"model\" limit between pentium 2 and pentium3 right.\n\tGet rid of code determining `_' prefix;\tuse double labels instead.\n\t* config.guess: Partially merge with FSF version of April 22.\n\t(Don't bring over NetBSD changes for now.)\n\n2000-04-23  Kevin Ryde  <kevin@swox.se>\n\n\t* tune/Makefile.am, tune/README, tune/common.c, tune/rdtsc.asm,\n\ttune/speed.c, tune/speed.h, tune/time.c, tune/tuneup.c: New files.\n\t* tune/Makefile.in: New file, generated from Makefile.am.\n\n\t* gmp-impl.h (ASSERT_NOREALLOC,TMP_ALLOC_LIMBS): New macros.\n\t[TUNE_PROGRAM_BUILD] Further mods for tune program builds.\n\n\t* mpz/Makefile.am: Add -DOPERATION_$* for new mul_siui.c.\n\tAdd rules to build mul_si and mul_ui from a common mul_siui.c.\n\t* mpz/mul_siui.c: New file, derived from and replacing mul_ui.c.\n\t* gmp.h (mpz_mul_si): New prototype and define.\n\n\t* mpn/tests/*.c [__i386__] (CLOCK): Don't use floating point in\n\tCLOCK because cpp can't handle floats in #if's (TIMES is derived\n\tfrom CLOCK by default).\n\n\t* mpn/asm-defs.m4 (include_mpn): New macro.\n\t(m4_assert_numargs) Changes to implementation.\n\n\t* mpf/Makefile.am: Add -DOPERATION_$* for new integer.c.\n\tRemove explicit rules for floor.o etc.\n\t* mpf/integer.c: Use OPERATION_$* for floor/ceil/trunc.\n\n\t* mpn/Makefile.am: Put \"tests\" in SUBDIRS.\n\t* mpn/tests/Makefile.am: New file providing rules to build test\n\tprograms, nothing done in a \"make all\" or \"make check\" though.\n\t* mpn/tests/README: New file.\n\n\t* acconfig.h (HAVE_PENTIUM_RDTSC): New define.\n\n\t* configure.in (x86): Rearrange target cases.\n\tAdd mulfunc aors_n and aorsmul_1 for x86 and pentium (now all x86s).\n\tRemove asm-syntax.h generation not needed.\n\tRemove now unused family=x86.\n\t(sparc) Remove unused family=sparc.\n\t(HAVE_PENTIUM_RDTSC) New AC_DEFINE and AM_CONDITIONAL.\n\t(AM_C_PROTOTYPES) New test, supporting ansi2knr.\n\t(AC_CHECK_HEADERS) Add getopt.h, unistd.h and sys/sysctl.h for\n\ttune progs.\n\t(AC_CHECK_FUNCS) Add getopt_long, sysconf and sysctlbyname for\n\ttune progs.\n\t(config.m4 CONFIG_TOP_SRCDIR) Renamed from CONFIG_SRCDIR.\n\t(config.m4 asm-defs.m4) Use CONFIG_TOP_SRCDIR and include().\n\t(gmp_m4postinc) Use include_mpn().\n\t(gmp_links) Omit asm-defs.m4/asm.m4 and gmp_m4postinc's.\n\t(MULFUNC_PROLOGUE) Fix regexps so all functions get AC_DEFINE'd.\n\t(PROLOGUE) Ditto (native copyi and copyd were unused in gmp 3).\n\t(KARATSUBA_SQR_THRESHOLD) Copy from gmp-mparam.h into config.m4.\n\t(AC_OUTPUT) Add tune/Makefile, mpn/tests/Makefile.\n\n\t* Makefile.am (AUTOMAKE_OPTIONS): Add ansi2knr.\n\t(SUBDIRS): Add tune, reorder directories.\n\t(MPZ_OBJECTS): Add mpz/mul_si.lo.\n\t(libmp_la_SOURCES): Use this for top-level objects, not .lo's.\n\t* ansi2knr.c, ansi2knr.1: New files, provided by automake.\n\n\t* mpn/x86/aors_n.asm: Convert add_n.S and sub_n.S to a\n\tmulti-function aors_n.asm, no object code change.\n\t* mpn/x86/pentium/aors_n.asm: Ditto.\n\t* mpn/x86/aorsmul_1.asm: Ditto for addmul/submul.\n\t* mpn/x86/pentium/aorsmul_1.asm: Ditto.\n\n\t* mpn/x86/lshift.asm, mpn/x86/mul_1.asm, mpn/x86/mul_basecase.asm,\n\tmpn/x86/rshift.asm: Convert from .S, no object code change.\n\t* mpn/x86/pentium/lshift.asm, mpn/x86/pentium/mul_1.asm,\n\tmpn/x86/pentium/mul_basecase.asm, mpn/x86/pentium/rshift.asm: Ditto.\n\n\t* gmp.texi (Reporting Bugs): Itemize the list of things to include.\n\t(Miscellaneous Float Functions): Correct typo in mpf_ceil etc\n\targument types.\n\tChange @ifinfo -> @ifnottex for benefit of makeinfo --html.\n\tRemove unnecessary @iftex's around @tex.\n\n2000-04-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.guess: Generalize x86 cpu determination code.\n\tNow works on Solaris.\n\n\t* mpz/nextprime.c: Rewrite still disabled code.\n\n\t* configure.in: Specifically match freebsd[3-9].\n\n2000-04-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* rand.c: Call mpz_clear for otherwise leaking mpz_t.\n\n\t* mpz/pprime_p.c (mpz_probab_prime_p): Merge handling of negative\n\tn into code for handling small positive n.  Merge variables m and n.\n\tAfter dividing, simply call mpz_millerrabin.\n\t(isprime): Local variables now use attribute `long'.\n\t(mpz_millerrabin): New static function, based on code from\n\tmpz_probab_prime_p.\n\t(millerrabin): Now simple workhorse for mpz_millerrabin.\n\n2000-04-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h: Fix parenthesis error in test for __APPLE_CC__.\n\n2000-04-18  Linus Nordberg  <linus@swox.se>\n\n\t* NEWS: Add info about shared libraries.  Remove reference to\n\tgmp_randinit_lc.\n\n2000-04-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* Version 3.0 released.\n\n\t* mpn/arm/add_n.S: New version from Robert Harley.\n\t* mpn/arm/addmul_1.S: Likewise.\n\t* mpn/arm/mul_1.S: Likewise.\n\t* mpn/arm/sub_n.S: Likewise.\n\n\t* gmp.h (__GNU_MP_VERSION_PATCHLEVEL): Now 0.\n\n2000-04-17  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in (hppa2.0*-*-*): Pass `+O3' to cc/c89 in 64-bit mode\n\tto avoid compiler bug.\n\t(ns32k*-*-*): Fix typo in path.  Change by Kevin.\n\t(alpha*-*-osf*): New case.  Pass assembly flags for architecture\n\tto gcc.\n\t(alpha*-*-*): Don't bother searching for cc.\n\t* configure: Regenerate.\n\n\t* Makefile.am (EXTRA_DIST): Add `macos', `.gdbinit'.\n\t* Makefile.in: Regenerate.\n\t* mpn/Makefile.am (EXTRA_DIST): Add `m88k', `lisp'.\n\t* mpn/Makefile.in: Regenerate.\n\n2000-04-16  Kevin Ryde  <kevin@swox.se>\n\n\t* README: Updates, and don't duplicate the example in INSTALL.\n\t* INSTALL: Minor updates.\n\t* gmp.texi (Installing MP): Minor edits, restore CC/CFLAGS description.\n\n2000-04-16  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in (*-*-cygwin*): Select BSD_SYNTAX to avoid\n\t.type/.size in PROLOGUE for ELF_SYNTAX.  Override ALIGN definition\n\tfrom x86/syntax.h.\n\t(gmp_xoptcflags_${CC}): New set of variables, indicating\n\t``exclusive optional cflags''.\n\t(most sparcs): Use gmp_xoptcflags instead of gmp_optcflags to\n\tensure that we pass CPU type to older gcc.\n\t(CFLAGS): CFLAGS on the command line was spoiled.\n\t* configure: Regenerate.\n\n2000-04-16  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in: Invoke AC_PROG_LIBTOOL directly.\n\n\t* acinclude.m4 (GMP_PROG_CC_FIND): Quote source variable when\n\tsetting CC64 and CFLAGS64.\n\t(GMP_PROG_CC_SELECT): Cache result.\n\t(GMP_PROG_LIBTOOL): Remove.\n\n\t* aclocal.m4: Regenerate.\n\t* configure: Regenerate.\n\n2000-04-16  Linus Nordberg  <linus@swox.se>\n\n\t* tests/rand/t-rand.c (main): Add non-ANSI function declaration.\n\tDon't use `const'.\n\n2000-04-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/dump.c: Suppress output of leadign zeros.\n\n\t* mpz/inp_str.c: Fix memory leakage.\n\n\t* mpz/tests/reuse.c (dss_func_division): Add a final 1.\n\n\t* longlong.h (alpha count_leading_zeros): Wrap in __MPN.\n\t* mpn/alpha/cntlz.asm: Use __gmpn prefix (by means of __MPN).\n\n\t* longlong.h (__umul_ppmm, __udiv_qrnnd): Wrap in __MPN.\n\t* mpn/alpha/udiv_qrnnd.S: Use __gmpn prefix.\n\t* mpn/hppa/udiv_qrnnd.s: Likewise.\n\t* mpn/hppa/hppa1_1/udiv_qrnnd.s: Likewise.\n\t* mpn/pa64/udiv_qrnnd.c: Likewise (by means of __MPN).\n\t* mpn/pa64/umul_ppmm.S: Likewise.\n\t* mpn/sparc32/udiv_fp.asm: Likewise (by means of MPN).\n\t* mpn/sparc32/udiv_nfp.asm: Likewise (by means of MPN).\n\t* mpn/sparc32/v8/supersparc/udiv.asm: Likewise (by means of MPN).\n\n\t* mpn/generic/tdiv_qr.c: Work around gcc 2.7.2.3 i386 register handling\n\tbug.\n\n\t* mpn/generic/tdiv_qr.c: Use udiv_qrnnd instead of mpn_divrem_1\n\twhen computing appropriate quotient; mpn_divrem_1 writes too\n\tmany quotient limbs.\n\n\t* mpn/asm-defs.m4: invert_normalized_limb => invert_limb.\n\t* mpn/alpha/invert_limb.asm: mpn_invert_normalized_limb =>\n\tmpn_invert_limb.\n\t* gmp.h: Likewise.\n\t* gmp-impl.h (alpha specific): invert_normalized_limb => invert_limb;\n\twrap with __MPN.\n\t* longlong.h (alpha udiv_qrnnd): Likewise.\n\n2000-04-16  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.h (mp_set_memory_functions,mp_bits_per_limb,gmp_errno): Add\n\t#defines so the library symbols are __gmp_*.\n\t* errno.c: Include gmp.h.\n\t* gmp-impl.h (_mp_allocate_func,etc): Add #defines to __gmp_*.\n\t(__clz_tab): New #define to __MPN(clz_tab).\n\t* stack-alloc.c (__gmp_allocate_func,etc): Change from _mp_*.\n\n\t* Makefile.am (libmp_la_DEPENDENCIES): Add some mpz files needed\n\tfor new mpz_powm (pow in libmp).\n\t(EXTRA_DIST): Add projects directory.\n\n\t* mpn/*: Change __mpn to __gmpn.\n\t* gmp.h (__MPN): Ditto.\n\t* stack_alloc.c,stack-alloc.h: Change __tmp to __gmp_tmp.\n\n\t* mpn/generic/sb_divrem_mn.c (mpn_sb_divrem_mn): Avoid gcc 2.7.2.3\n\ti386 register handling bug (same as previously in mpn_divrem_classic).\n\n\t* mpn/generic/divrem.c: Now contains mpn_divrem, which is not an\n\tinternal function, so remove warning comment.\n\n\t* gmp.texi (Compatibility with Version 2.0.x): Source level only.\n\n2000-04-16  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in (hppa1.0*): Prefer c89 to cc.\n\t* configure: Regenerate.\n\n2000-04-15  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in: If `mpn_path' is set by user on configure command\n\tline, use that as path.\n\t* configure: Regenerate.\n\n2000-04-15  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in (hppa2.0*): Use path \"hppa/hppa1_1 hppa\" if no\n\t64-bit compiler was found.\n\t* configure: Regenerate.\n\n2000-04-15  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in: Honor `CC' and `CFLAGS' set by user on configure\n\tcommand line.\n\t* acinclude.m4: (GMP_PROG_CC_SELECT): Set CFLAGS if not set already.\n\t* aclocal.m4: Regenerate.\n\t* configure: Regenerate.\n\n2000-04-15  Linus Nordberg  <linus@swox.se>\n\n\t* acinclude.m4 (GMP_PROG_CC_FIND): Remove debug output.  Remove\n\tcommented out code.\n\t* aclocal.m4: Regenerate.\n\t* configure: Regenerate.\n\n\t* configure.in: Make all `-mcpu' options to gcc optional.\n\t* configure: Regenerate.\n\n\t* tests/rand/Makefile.am: Don't do anything for target 'all'.\n\t* tests/rand/Makefile.in: Regenerate.\n\n2000-04-15  Kevin Ryde  <kevin@swox.se>\n\n\t* README: Small updates.\n\t* NEWS: Add some things about 3.0.\n\n\t* mpz/Makefile.am (EXTRA_DIST): Remove dmincl.c.\n\n\t* Makefile.am: Use -version-info on libraries, not -release.\n\n\t* mpz/tdiv_qr.c: Add mdiv function header #ifdef BERKELEY_MP.\n\t* mpbsd/Makefile.am: Use mpz/tdiv_qr.c, not mdiv.c.\n\t* Makefile.am (MPBSD_OBJECTS): Change mdiv.lo to tdiv_qr.lo.\n\t(libmp_la_DEPENDENCIES): Add mp_clz_tab.lo.\n\t* mpbsd/mdiv.c: Remove file.\n\n\t* config/mt-linux,mt-m68k,mt-m88110,mt-ppc,mt-ppc64-aix,mt-pwr,\n\tmt-sprc8-gcc,mt-sprc9-gcc,mt-supspc-gcc,mt-vax,mt-x86,\n\tmpn/config/mt-pa2hpux,mt-sprc9,t-oldgas,t-ppc-aix,t-pwr-aix:\n\tRemove configure fragments not used since change to autoconf.\n\n\t* mpn/generic/bz_divrem_n.c,sb_divrem_mn.c: Add comment that\n\tinternal functions are changeable and shouldn't be used directly.\n\n2000-04-15  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in: Remove debug output.\n\t* configure: Regenerate.\n\n2000-04-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/tdiv_qr.c: Don't use alloca directly.\n\n\t* mpz/tdiv_qr.c: Fix typo.\n\t* mpz/tdiv_r.c: Fix typo.\n\t* mpz/tdiv_q.c: Fix typo.\n\n\t* configure.in: Disable -march=pentiumpro due to apparent compiler\n\tproblems.\n\n\t* mpz/powm.c: Replace with new code from Paul Zimmermann.\n\n\t* mpz/tdiv_q.c: Remove debug code.\n\n\t* mpn/generic/divrem.c: Remove C++ style `//' commented-out code.\n\t* mpn/generic/sb_divrem_mn.c: Likewise.\n\n2000-04-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/cdiv_q.c: Change temp allocation for new requirements of\n\tmpz_tdiv_qr.\n\t* mpz/fdiv_q.c: Likewise.\n\n\t* mpn/sparc64/gmp-mparam.h: Set up parameters for TOOM3.\n\n\t* mpz/dmincl.c: Delete file.\n\t* mpz/tdiv_qr.c: Rewrite using mpn_tdiv_qr.\n\t* mpz/tdiv_r.c: Likewise.\n\t* mpz/tdiv_q.c: Likewise.\n\n\t* mpn/generic/tdiv_qr.c: New file.\n\t* mpn/generic/bz_divrem_n.c: New file.\n\t* mpn/generic/sb_divrem_mn.c: New file.\n\n\t* gmp-impl.h (MPZ_REALLOC): New macro.\n\t(mpn_sb_divrem_mn): Declare.\n\t(mpn_bz_divrem_n): Declare.\n\t(mpn_tdiv_qr): Declare.\n\n\t* configure.in (gmp_mpn_functions): Delete divrem_newt and divrem_1n;\n\tadd tdiv_qr, bz_divrem_n, and sb_divrem_mn.\n\t* mpn/generic/divrem_newt.c: Delete file.\n\t* mpn/generic/divrem_1n.c: Delete file.\n\n\t* gmp.h (mpn_divrem_newton): Remove declaration.\n\t(mpn_divrem_classic): Remove declaration.\n\n\t* gmp.h (mpn_divrem): Remove function definition.\n\t* mpn/generic/divrem.c: Replace mpn_divrem_classic with a\n\tmpn_divrem wrapper.\n\n2000-04-14  Kevin Ryde  <kevin@swox.se>\n\n\t* mpf/dump.c, mpz/dump.c, mpn/generic/dump.c,\n\tmpn/generic/divrem.c, mpn/generic/divrem_1n.c,\n\tmpn/generic/divrem_2.c, mpn/generic/divrem_newt.c,\n\tmpn/generic/mul.c, mpn/generic/mul_basecase.c,\n\tmpn/generic/mul_n.c, mpn/generic/sqr_basecase.c,\n\tmpn/generic/udiv_w_sdiv.c: Add comment that internal functions are\n\tchangeable and shouldn't be used directly.\n\n\t* mpq/div.c: Use DIVIDE_BY_ZERO (previously didn't get an\n\texception on zero divisor).\n\n\t* mpf/tests/t-get_d.c, mpz/tests/reuse.c: Add K&R function\n\tdefinitions.\n\t* mpz/tests/t-2exp.c: Don't use ANSI-ism 2ul.\n\n\t* gmp.texi (Installing MP): Build problem notes for GSYM_PREFIX\n\tand ranlib on native SunOS.\n\tParticular systems notes about AIX and HPPA shared libraries\n\tdisabled.\n\t(MP Basics): Add that undocumented things shouldn't be used.\n\t(Introduction to MP): Add to CPUs listed.\n\n\t* acinclude.m4 (GMP_CHECK_ASM_UNDERSCORE): Don't depend on C\n\thaving \"void\".\n\n2000-04-13  Linus Nordberg  <linus@swox.se>\n\n\t* mpn/pa64/udiv_qrnnd.c (__udiv_qrnnd64): Add K&R function\n\tdefinition.\n\n\t* configure.in: Disable shared libraries for hppa*.\n\t(mips-sgi-irix6.*): Fix flags for 64-bit gcc.\n\t(hppa2.0*-*-*): Prefer c89 to cc.\n\t* configure: Regenerate.\n\n\t* gmp.h (gmp_randalg_t): Remove comma after last element.\n\n\t* tests/rand/t-rand.c: Add copyright notice.\n\n2000-04-13  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/mul_n.c, mpn/generic/gcdext.c, mpz/nextprime.c,\n\tmpz/remove.c, mpz/root.c: Add K&R function definitions.\n\t* mpz/rrandomb.c: Fix typo in K&R part.\n\t* stack-alloc.c: Add K&R style function pointer declarations.\n\n\t* mpz/root.c: Use SQRT_OF_NEGATIVE on even roots of negatives.\n\tUse DIVIDE_BY_ZERO on a \"zero'th\" root.\n\n\t* configure: Regenerate with autoconf backpatched to fix --srcdir\n\tabsolute path wildcards that bash doesn't like, change by Linus.\n\n\t* gmp.texi (Integer Arithmetic): Document mpz_nextprime.\n\t(Miscellaneous Integer Functions): Fix mpz_fits_* formatting.\n\t(Installing MP): Comment-out CC and CFLAGS description.\n\n2000-04-13  Linus Nordberg  <linus@swox.se>\n\n\t* rand.c (gmp_randinit): Don't combine va_alist with ordinary\n\targuments for non STDC.\n\n2000-04-13  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/nextprime.c: Use proper names of new random types and functions.\n\n\t* mpz/rrandomb.c: New file.\n\t* mpz/Makefile.am: List it.\n\t* mpz/Makefile.in: Regenerate.\n\t* Makefile.am: Here too.\n\t* Makefile.in: Regenerate.\n\t* gmp.h: Declare mpz_rrandomb.\n\n2000-04-12  Linus Nordberg  <linus@swox.se>\n\n\t* Makefile.am, demos/Makefile.am, mpbsd/Makefile.am,\n\tmpbsd/tests/Makefile.am, mpf/Makefile.am, mpf/tests/Makefile.am,\n\tmpn/Makefile.am, mpq/Makefile.am, mpq/tests/Makefile.am,\n\tmpz/Makefile.am, mpz/tests/Makefile.am, tests/Makefile.am,\n\ttests/rand/Makefile.am (AUTOMAKE_OPTIONS): Add 'no-dependencies'.\n\n\t* Makefile.in, demos/Makefile.in, mpbsd/Makefile.in,\n\tmpbsd/tests/Makefile.in, mpf/Makefile.in, mpf/tests/Makefile.in,\n\tmpn/Makefile.in, mpq/Makefile.in, mpq/tests/Makefile.in,\n\tmpz/Makefile.in, mpz/tests/Makefile.in, tests/Makefile.in,\n\ttests/rand/Makefile.in: Regenerate.\n\n2000-04-12  Linus Nordberg  <linus@swox.se>\n\n\t* randlc.c (gmp_randinit_lc): Disable function.\n\t* gmp.texi (Random State Initialization): Remove gmp_randinit_lc.\n\n\t* acinclude.m4 (GMP_CHECK_CC_64BIT): Compiling an empty main\n\tsuccessfully with `-n32' will have to suffice on irix6.\n\t* aclocal.m4: Regenerate.\n\n\t* configure.in (sparc): Don't pass -D_LONG_LONG_LIMB to compiler.\n\t(mips-sgi-irix6.*): Use compiler option `-n32' rather than `-64'\n\tfor 64-bit `cc'.  Add options for gcc.\n\t* configure: Regenerate.\n\n\t* mpf/urandomb.c (mpf_urandomb): Add third parameter 'nbits'.  If\n\t'nbits' doesn't make even limbs, shift up result before\n\tnormalizing.\n\n\t* gmp.h (mpf_urandomb): Add parameter to prototype.\n\n\t* mpf/urandom.c: Rename file to ...\n\t* mpf/urandomb.c: ... this.\n\t* Makefile.am (MPF_OBJECTS): Change urandom.lo --> urandomb.lo.\n\t* Makefile.in: Regenerate.\n\t* mpf/Makefile.am (libmpf_la_SOURCES): Change urandom.c --> urandomb.c.\n\t* mpf/Makefile.in: Regenerate.\n\n\t* config.in: Regenerate for HAVE_DECL_OPTARG.\n\n\t* randraw.c (_gmp_rand): Fix bug with _LONG_LONG_LIMB.\n\t(lc): Change return type.\n\tUse one temporary storage instead of two.\n\tHandle seed of size 0.\n\tAvoid modulus operation in some cases.\n\tAbort if M is not a power of 2.\n\tFix bug with 64-bit limbs.\n\tFix bug with small seed, small A and large M.\n\n\t* tests/rand/gen.c (main): Include gmp.h.  Remove macros MIN, MAX.  Add\n\toption '-q'.  Don't demand argument N.  Change parameters in call\n\tto mpf_urandomb.\n\n\t* tests/rand/t-rand.c: New file for testing random number generation.\n\n\t* tests/rand/Makefile.am: Run t-rand for 'make check'.\n\t(test, bigtest): Rename to manual-test, manual-bigtest.\n\t* tests/rand/Makefile.in: Regenerate.\n\n2000-04-12  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h: Include config.h before TMP_ALLOC, so\n\t--disable-alloca works.\n\n\t* mpbsd/Makefile.am: Don't recompile top-level sources here.\n\t* Makefile.am (libmp_la_DEPENDENCIES): Put objects here instead,\n\tadd errno.lo and stack-alloc.lo.\n\n\t* mpn/asm-defs.m4: Add a test and message for the unsuitable SunOS m4.\n\t* gmp.texi (Installing MP): Update note on SunOS m4 failure.\n\n\t* acconfig.h: Add copyright notice using @TOP@.\n\n\t* stack-alloc.c: Use _mp_allocate_func, not malloc.\n\t* gmp.texi (Installing MP): Note this under --disable-alloca.\n\n\t* gmp.texi (Comparison Functions): mpz_cmp_abs => mpz_cmpabs.\n\t(Integer Arithmetic): mpz_prime_p not yet implemented, comment out.\n\t(Float Arithmetic): mpf_pow_ui now implemented, uncomment-out.\n\t(Miscellaneous Float Functions): Add mpf_ceil, mpf_floor, mpf_trunc.\n\t(Low-level Functions): Add mpn_random2, with mpn_random.\n\n\t* mpn/m68k/mc68020/udiv.S: Rename from udiv.s.\n\t* mpn/m68k/mc68020/umul.S: Ditto.\n\n\t* mpn/alpha/umul.asm: Rename from umul.s, remove .file and\n\tcompiler identifiers.\n\n\t* mpn/powerpc32/syntax.h: Removed, no longer used.\n\n\t* mpn/a29k/udiv.s: Remove .file and compiler identifiers.\n\t* mpn/a29k/umul.s: Ditto.\n\n\t* mpn/tests/ref.c: Use WANT_ASSERT.\n\t* mpn/tests/ref.h: Use _PROTO.\n\n\t* mpbsd/configure.in: Removed, no longer required.\n\n\t* mpf/div.c: Use DIVIDE_BY_ZERO.\n\t* mpf/div_ui.c: Ditto.\n\t* mpf/ui_div.c: Ditto.\n\t* mpq/inv.c: Ditto.\n\t* mpf/sqrt.c: Use SQRT_OF_NEGATIVE.\n\t* mpz/sqrt.c: Ditto.\n\t* mpz/sqrtrem.c: Ditto.\n\n\t* gmp-impl.h (GMP_ERROR,SQRT_OF_NEGATIVE): New macros.\n\t(DIVIDE_BY_ZERO): Use GMP_ERROR.\n\t(__mp_bases): #define to __MPN(mp_bases).\n\n2000-04-11  Linus Nordberg  <linus@swox.se>\n\n\t* tests/rand/stat.c (main): Initialize `l1runs' at declaration.\n\n2000-04-11  Kevin Ryde  <kevin@swox.se>\n\n\t* mpz/fib_ui.c: Add K&R function definitions.\n\n\t* mpbsd/tests/Makefile.am (TESTS): Add a dummy test to avoid a\n\tshell problem with an empty \"for tst in $(TESTS) ; ...\".\n\t* mpbsd/tests/dummy.c: New file.\n\n2000-04-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/bin_uiui.c: Delete several unused variables.\n\tAdd copyright notice.\n\t* mpz/bin_ui.c: Add copyright notice.\n\n\t* longlong.h: Declare __count_leading_zeros for alpha.\n\n2000-04-10  Linus Nordberg  <linus@swox.se>\n\n\t* rand.c (gmp_randinit): Change parameter list to (rstate, alg, ...).\n\t* gmp.h: Change prototype accordingly.\n\t* mpz/pprime_p.c (millerrabin): Change call accordingly.\n\n\t* configure.in: Check for `optarg'.\n\t* configure: Regenerate.\n\n\t* mpn/Makefile.am: Remove incorrect comment.\n\t* mpn/Makefile.in: Regenerate.\n\n\t* gmp.h: Rename most of the random number functions, structs and\n\t  some of the struct members.\n\t* rand.c (gmp_randinit): Likewise.\n\t* randclr.c (gmp_randclear): Likewise.\n\t* randlc.c (gmp_randinit_lc): Likewise.\n\t* randlc2x.c (gmp_randinit_lc_2exp): Likewise.\n\t* randraw.c (lc): Likewise.\n\t(_gmp_rand_getraw): Likewise.\n\t* randsd.c (gmp_randseed): Likewise.\n\t* randsdui.c (gmp_randseed_ui): Likewise.\n\t* gmp.texi: Likewise.\n\n\t* gmp.texi: Use three hyphens for a dash.\n\t(Low-level Functions): Remove documentation for gmp_rand_getraw.\n\t(Random Number Functions): Add info on where to find documentation\n\ton the random number functions.\n\n\t* tests/rand/Makefile.am (test, bigtest): Quote argument to grep.\n\t* tests/rand/Makefile.in: Regenerate.\n\n\t* tests/rand/gen.c: Declare optarg, optind, opterr if not already\n\tdeclared.\n\t(main): Use new names for the random stuff.\n\t(main): Don't use strtoul() if we don't have it.  Use strtol()\n\tinstead, if we have it.  Otherwise, use atoi().\n\t(main): Use srandom/srandomdev for __FreeBSD__ only.\n\t(main): Use new parameter order to gmp_randinit().\n\n\t* tests/rand/stat.c: Declare optarg, optind, opterr if not already\n\tdeclared.\n\n2000-04-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/pprime_p.c: Pass 0L for mpz_scan1.  mpz_mmod => mpz_mod.\n\t(millerrabin): Use new random interface.\n\t(millerrabin): ... and don't forget to call gmp_randclear.\n\n\t* mpz/nextprime.c: New file.\n\t* gmp.h: Declare mpz_nextprime.\n\t* mpz/Makefile.am: List nextprime.c.\n\t* mpz/Makefile.in: Regenerate.\n\t* Makefile.am: List mpz/nextprime.lo.\n\t* Makefile.in: Regenerate.\n\n2000-04-10  Kevin Ryde  <kevin@swox.se>\n\n\t* move-if-change, mpz/tests/move-if-change, mpq/tests/move-if-change,\n\tmpf/tests/move-if-change: Remove, no longer used.\n\n\t* Makefile.am (SUBDIRS): Add tests, demos, mpbsd.\n\t(libmp.la): New target, conditional on WANT_MPBSD.\n\t(libgmp_la_LIBADD): Add -lm.\n\t(AUTOMAKE_OPTIONS): Add check-news.\n\t(include_HEADERS): Setup to install gmp.h and possibly mp.h.\n\t(DISTCLEANFILES): Add generated files.\n\t(check): Remove explicit target (now uses check-recursive).\n\n\t* configure.in: Use AM_CONFIG_HEADER.\n\tAdd --enable-mpbsd setting automake conditional WANT_MPBSD.\n\tOutput demos/Makefile, mpbsd/Makefile and mpbsd/tests/Makefile.\n\n\t* mpz/Makefile.am: Add SUBDIRS=tests, shorten INCLUDES since now\n\tusing AM_CONFIG_HEADER.\n\t* mpq/Makefile.am: Ditto.\n\t* mpf/Makefile.am: Ditto, and add DISTCLEANFILES.\n\t* mpn/Makefile.am: Shorten INCLUDES, amend some comments.\n\t* mpz/tests/Makefile.am: Use TESTS and $(top_builddir).\n\t* mpf/tests/Makefile.am: Ditto.\n\t* mpq/tests/Makefile.am: Ditto.\n\t* demos/Makefile.am: New file.\n\n\t* mpbsd/Makefile.am: New file, derived from old mpbsd/Makefile.in.\n\t* mpbsd/Makefile.in: Now generated from Makefile.am.\n\t* mpbsd/realloc.c: Removed, use mpz/realloc.c instead.\n\t* mpbsd/tests/Makefile.am: New file.\n\t* mpbsd/tests/Makefile.in: New file, generated from Makefile.am.\n\t* mpbsd/tests/allfuns.c: New file.\n\n\t* gmp.texi (Top): Use @ifnottex, to help makeinfo --html.\n\t(Installing MP): Describe --enable-mpbsd and demo programs.\n\n\t* tests/rand/statlib.c: mpz_cmp_abs => mpz_cmpabs.\n\n\t* tests/rand/Makefile.am (LDADD): Don't need -lm (now in libgmp.la).\n\t(EXTRA_PROGRAMS): Not noinst_PROGRAMS.\n\t(INCLUDES): Shorten to -I$(top_srcdir) now using AM_CONFIG_HEADER.\n\n2000-04-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/urandomm.c: Get type of count right.\n\tSimplify computation of nbits.\n\n2000-04-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/urandomb.c: Fix reallocation condition.\n\tSimplify size computation.\n\n2000-04-08  Linus Nordberg  <linus@swox.se>\n\n\t* acinclude.m4 (GMP_CHECK_CC_64BIT): Add special handling for\n\tHPUX.\n\t(GMP_CHECK_ASM_W32): Ditto.\n\t* aclocal.m4: Regenerate.\n\n\t* mpn/Makefile.am: Use $(CCAS) for assembling.\n\t(.asm.obj): Add rule.\n\t* mpn/Makefile.in: Regenerate.\n\n\t* gmp.texi (Miscellaneous Integer Functions): Fix typos.\n\n\t* configure.in: Never pass `-h' to grep.\n\t(mips-sgi-irix6.[2-9]*): Try to find 64-bit compiler.\n\t(hppa1.0*-*-*): New flag for cc.\n\t(hppa2.0*-*-*): Try to find 64-bit compiler.  Chose path, set\n\tCCAS.\n\t* configure: Regenerate.\n\n2000-04-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/bin_ui.c: Don't depend on ANSI C features.\n\t* mpz/bin_uiui.c: Likewise.\n\n\t* Makefile.am (MPZ_OBJECTS): mpz/cmp_abs* => mpz/cmpabs*.\n\t(MPQ_OBJECTS): Add mpq/set_d.lo.\n\t(MPZ_OBJECTS): Add mpz/fits*.lo.\n\t* Makefile.in: Regenerate.\n\n\t* mpz/cmpabs.c: New name for mpz/cmp_abs.c.\n\t* mpz/cmpabs_ui.c: New name for mpz/cmp_abs_ui.c.\n\t* mpz/Makefile.am: Corresponding changes.\n\t* mpz/Makefile.in: Regenerate.\n\t* gmp.h: mpz_cmp_abs* => mpz_cmpabs*.\n\n\t* mpz/addmul_ui.c (mpn_neg1): Don't depend on ANSI C features.\n\n\t* mpz/invert.c: Use TMP_MARK since we invoke MPZ_TMP_INIT.\n\n\t* gmp.h (mpq_set_d): Declare correctly.\n\t(mpz_root): Use _PROTO.\n\t(mpz_remove): Use _PROTO.\n\t(mpf_pow_iu): Use _PROTO.\n\n\t* mpn/asm-defs.m4 (MPN_PREFIX): Revert previous change.\n\t* gmp.h (__MPN): Revert previous change.\n\n\t* mpz/perfpow.c: De-ANSI-fy.  Add copyright notice.\n\n\t* mpz/set_d.c: Misc cleanups.\n\n\t* mpq/set_d: New file.\n\t* gmp.h: Declare mpq_set_d.\n\t* mpq/Makefile.am: List set_d.c.\n\t* mpq/Makefile.in: Regenerate.\n\n2000-04-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/fits_sint_p.c: New file.\n\t* mpz/fits_slong_p.c: New file.\n\t* mpz/fits_sshort_p.c: New file.\n\t* mpz/fits_uint_p.c: New file.\n\t* mpz/fits_ulong_p.c: New file.\n\t* mpz/fits_ushort_p.c: New file.\n\t* gmp.h: Declare mpz_fits_*.\n\t* mpz/Makefile.am: List fits_* files.\n\t* mpz/Makefile.in: Regenerate.\n\n2000-04-06  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.texi (Installing MP): Add known build problem SunOS 4.1.4 m4\n\tfailure.\n\n\t* mpn/x86/pentium/gmp-mparam.h: Tune thresholds.\n\t* mpn/x86/p6/gmp-mparam.h: Ditto.\n\t* mpn/x86/k6/gmp-mparam.h: Tune thresholds, add UMUL_TIME, UDIV_TIME.\n\t* mpn/x86/k7/gmp-mparam.h: Tune thresholds, amend UMUL_TIME.\n\n\t* mpn/generic/mul_n.c (mpn_kara_mul_n): Add an ASSERT.\n\t(mpn_kara_sqr_n): Add an ASSERT, use KARATSUBA_SQR_THRESHOLD.\n\t(mpn_toom3_sqr_n): Eliminate second evaluate3.\n\n\t* gmp-impl.h (mpn_com_n,MPN_LOGOPS_N_INLINE): Don't allow size==0.\n\t(tune_mul_threshold,tune_sqr_threshold): Conditionalize\n\tdeclarations on TUNE_PROGRAM_BUILD.\n\n\t* mpn/generic/sqr_basecase.c: Add an assert.\n\n2000-04-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp.h, mpn/asm-defs.m4: List the same functions for __MPN, but\n\tleave some commented out.\n\n\t* gmp-impl.h (MPN_LOGOPS_N_INLINE): Optimize.\n\t(mpn_com_n): Optimize.\n\n\t* gmp.h (__MPN): Make it use __gmpn instead of __mpn for consistency.\n\t* mpn/asm-defs.m4 (MPN_PREFIX): Likewise.\n\n\t* gmp.h (GMP_ERROR_ALLOCATE): New errcode.\n\n\t* gmp-impl.h (MPN_MUL_N_RECURSE): Delete.\n\t(MPN_SQR_RECURSE): Delete.\n\n\t* gmp-impl.h (TARGET_REGISTER_STARVED): New define.\n\n\t* gmp-impl.h (mpn_kara_sqr_n): Remap with __MPN.\n\t(mpn_toom3_sqr_n): Likewise.\n\t(mpn_kara_mul_n): Likewise.\n\t(mpn_toom3_mul_n): Likewise.\n\t(mpn_reciprocal): Likewise.\n\n\t* gmp-impl.h (__gmpn_mul_n): Remove declaration.\n\t(__gmpn_sqr): Likewise.\n\t* gmp.h (mpn_sqr_n): Declare/remap.\n\t* mpn/generic/mul.c (mpn_sqr_n): New name for mpn_sqr.\n\n\t* gmp.h (mpn_udiv_w_sdiv): Move __MPN remap from here...\n\t* gmp-impl.h: ...to here.\n\n2000-04-05  Linus Nordberg  <linus@swox.se>\n\n\t* gmp.texi (Top): Add `Random Number Functions' to menu.\n\t(Introduction to MP): Fix typo.\n\t(MP Basics): Create menu for all sections.  Move `Random Number\n\tFunctions' to its own chapter.  Add nodes for all sections.\n\t(Function Classes): Mention random generation functions under\n\tmiscellaneous.\n\t(Miscellaneous Integer Functions): Update mpz_urandomb,\n\tmpz_urandomm.\n\t(Low-level Functions): Remove mpn_rawrandom.\n\t(Random State Initialization): Update.\n\n\t* mpf/urandom.c (mpf_urandomb): Remove SIZE parameter.  Normalize\n\tresult correctly.\n\n\t* gmp.h (mpf_urandomb): Remove SIZE parameter.\n\n\t* randraw.c (gmp_rand_getraw): Handle the case where (1) the LC\n\tscheme doesn't generate even limbs and (2) more than one LC\n\tinvokation is necessary to produce the requested number of bits.\n\n2000-04-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/mul_n.c (INVERSE_3): New name for THIRD, define for\n\tany BITS_PER_MP_LIMB.\n\t(MP_LIMB_T_MAX): New.\n\t(mpn_divexact3_n): Remove.\n\t(interpolate3): Use mpn_divexact_by3 instead of mpn_divexact3_n.\n\n2000-04-05  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h (KARATSUBA_MUL_THRESHOLD<2): Remove cpp test.\n\t(tune_mul_threshold,tune_sqr_threshold): Add declarations, used in\n\tdevelopment only.\n\n\t* mpn/x86/k7/sqr_basecase.asm: New file, only a copy of k6 for now.\n\n2000-04-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (TOOM3_MUL_THRESHOLD): Provide default.\n\t(TOOM3_SQR_THRESHOLD): Provide default.\n\n\t* mpn/generic/mul_n.c: Rewrite (mostly by Robert Harley).\n\t* mpn/generic/mul.c: Rewrite (mostly by Robert Harley).\n\n\t* configure.in (sparcv9 64-bit OS): Set extra_functions.\n\n2000-04-04  Linus Nordberg  <linus@swox.se>\n\n\t* mpn/generic/rawrandom.c: Remove file and replace with randraw.c\n\ton top level.\n\t(mpn_rawrandom): Rename to gmp_rand_getraw.\n\n\t* randraw.c: New file; essentially a copy of\n\tmpn/generic/rawrandom.c.\n\t(gmp_rand_getraw): New function (formerly known as mpn_rawrandom).\n\n\t* mpz/urandomb.c (mpz_urandomb): Change mpn_rawrandom -->\n\tgmp_rand_getraw.\n\t* mpz/urandomm.c (mpz_urandomb): Ditto.\n\t* mpf/urandom.c (mpf_urandomb): Ditto.\n\n\t* gmp.h (gmp_rand_getraw): Add function prototype.\n\t(mpn_rawrandom): Remove function prototype.\n\n\t* Makefile.am (libgmp_la_SOURCES): Add randraw.c.\n\t* Makefile.in: Regenerate.\n\n\t* configure.in (gmp_mpn_functions): Remove rawrandom.\n\t* configure: Regenerate.\n\n2000-04-04  Linus Nordberg  <linus@swox.se>\n\n\t* gmp.h (GMP_ERROR enum): Remove comma after last enumeration\n\tsince the AIX compiler (xlc) doesn't like that.\n\n\t* randlc.c (gmp_rand_init_lc): Allocate enough space for seed to\n\thold any upcoming seed.\n\t* randlc2x.c (gmp_rand_init_lc_2exp): Likewise.\n\n\t* mpn/generic/rawrandom.c: Remove debugging code.\n\t(mpn_lc): Don't reallocate seed.\n\n\t* mpz/urandomm.c (mpz_urandomm): Implement function.\n\n\t* mpz/urandomb.c (mpz_urandomb): Fix typo in function definition.\n\n2000-04-04  Kevin Ryde  <kevin@swox.se>\n\n\t* make.bat: Removed (no longer works, no longer supported).\n\t* mpn/msdos/asm-syntax.h: Removed (was used only by make.bat).\n\n2000-04-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/brandom.c: New file, replacing random2.\n\n2000-04-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc32/v9/submul_1.asm: Change some carry-form instructions\n\tinto their plain counterparts.\n\n\t* mpn/sparc64/copyi.asm: Avoid executing ALIGN.\n\n\t* mpn/sparc64/mul_1.asm: Handle overlap of rp/sp.\n\t* mpn/sparc64/addmul_1.asm: Likewise.\n\t* mpn/sparc64/submul_1.asm: Likewise.\n\n2000-04-01  Linus Nordberg  <linus@swox.se>\n\n\t* gmp.h: Fix function prototypes for randomization functions.\n\t(__gmp_rand_lc_scheme_struct): Replace `m' with `m2exp'. Remove\n\tunused `bits'.\n\t(__gmp_rand_data_lc): Add `m2exp' as another way of representing\n\tthe modulus.\n\t(__gmp_rand_state_struct): Remove unused `size'.\n\n\t* rand.c (__gmp_rand_scheme): Use better multipliers.  Remove test\n\tschemes.  Replace `m' with `m2exp'.\n\t(gmp_rand_init): Change parameters and return type.  Use `m2exp'\n\tinstead of `m'.  Set `gmp_errno' on error.  Disable BBS algorithm.\n\n\t* randlc.c (gmp_rand_init_lc): Don't use malloc().  Change\n\tparameters.\n\n\t* randclr.c (gmp_rand_clear): Don't use free().  Disable BBS\n\talgorithm.  Set `gmp_errno' on error.\n\n\t* randlc2x.c (gmp_rand_init_lc_2exp): New function.\n\t* randsd.c (gmp_rand_seed): New function.\n\t* randsdui.c (gmp_rand_seed_ui): New function.\n\t* randlcui.c: Remove unused file.\n\n\t* mpn/generic/rawrandom.c (mpn_rawrandom): Rewrite.\n\t(mpn_lc): New static function.\n\n\t* mpz/urandomb.c (mpz_urandomb): Use ABSIZ() instead of SIZ() for\n\tdetermining size of ROP.\n\n\t* mpf/urandom.c (mpf_urandomb): Add third parameter, nbits.  (Not\n\tused yet!)\n\tChange parameter order to mpn_rawrandom().\n\n\t* Makefile.am (libgmp_la_SOURCES): Add errno.c, randlc2x.c,\n\trandsd.c, randsdui.c.  Remove randui.c.\n\t(MPZ_OBJECTS): Rename urandom.lo --> urandomb.lo.  Add urandomm.lo.\n\t* Makefile.in: Regenerate.\n\n\t* mpz/Makefile.am (libmpz_la_SOURCES): Change urandom.c -->\n\turandomb.c.  Add urandomm.c.\n\t* mpz/Makefile.in: Regenerate.\n\n\t* tests/rand/Makefile.am (noinst_PROGRAMS): Change findcl --> findlc.\n\tAdd gen.static.\n\t* tests/rand/Makefile.in: Regenerate.\n\n\t* tests/rand/gen.c (main): Add mpz_urandomm.  Add command line options\n\t`-C', `-m', extend `-a'.  Use *mp*_*rand*() with new parameters.  Call\n\tgmp_rand_seed().\n\n2000-04-01  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_CHECK_ASM_DATA): Plain .data for hpux.\n\t* configure.in (CCAS): No CFLAGS, they're added when it's used.\n\t(CONFIG_SRCDIR): New define for config.m4.\n\t* mpn/sparc64/addmul_1.asm: Use it for an include().\n\t* mpn/sparc64/submul_1.asm: Ditto.\n\t* mpn/sparc64/mul_1.asm: Ditto.\n\n2000-03-31  Linus Nordberg  <linus@swox.se>\n\n\t* mpz/urandom.c: Rename to...\n\t* mpz/urandomb.c: ...this.\n\n\t* mpz/urandomb.c (mpz_urandomb): Change operand order in call to\n\tmpn_rawrandom().  Use ABSIZ() instead of SIZ() when checking size\n\tof ROP.\n\n\t* mpz/urandomm.c: New file.\n\n2000-03-31  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GMP_CHECK_ASM_MMX): Give a warning when mmx code\n\twill be omitted.\n\n2000-03-30  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/mul_1h.asm: New file.\n\t* mpn/sparc64/addmul_1h.asm: New file.\n\t* mpn/sparc64/submul_1h.asm: New file.\n\t* mpn/sparc64/mul_1.asm: Rewrite.\n\t* mpn/sparc64/addmul_1.asm: Rewrite.\n\t* mpn/sparc64/submul_1.asm: Rewrite.\n\n2000-03-28  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc32/v9/mul_1.asm: Fix typo in branch prediction.\n\t* mpn/sparc32/v9/addmul_1.asm: Likewise.\n\t* mpn/sparc32/v9/submul_1.asm: Likewise.\n\n2000-03-25  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/lisp/gmpasm-mode.el: Fix some comment detection, use custom,\n\tfontify more keywords, turn into a standalone mode.\n\n\t* stamp-vti: New file, generated together with version.texi.\n\n\t* acinclude.m4 (GMP_VERSION,GMP_HEADER_GETVAL): New macros.\n\t* configure.in (AM_INIT_AUTOMAKE): Use GMP_VERSION.\n\n2000-03-24  Kevin Ryde  <kevin@swox.se>\n\n\t* INSTALL: Updates for new configure system.\n\n\t* configure.in: Add gmp_optcflags_gcc for the x86s, setting -mcpu\n\tand -march.\n\n2000-03-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (mpz_eval_expr): Properly initialize rhs/lhs\n\tfor ROOT.\n\n2000-03-23  Kevin Ryde  <kevin@swox.se>\n\n\t* config.guess (i?86:*:*:*): Use uname -m if detection program fails.\n\n\t* mpn/x86/README: Remove remarks on the now implemented MMX shifts.\n\t* mpn/x86/k6/README: Add speed of mpn_divexact_by3, update mpn_mul_1.\n\n\t* gmp.texi (Installing MP): Corrections to target CPUs.\n\n\t* version.c: Use VERSION from config.h, add copyright comment,\n\trestore \"const\" somehow lost.\n\n\t* configure.in (a29k*-*-*): Fix directory name.\n\n2000-03-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (op_t): Add ROOT.\n\t(fns): Add ROOT.\n\t(mpz_eval_expr): Add ROOT.\n\n\t* mpz/root.c: Handle roots of negative numbers.\n\tFix other border cases.\n\tFix rare memory leakage.\n\n\t* errno.c: New file.\n\n2000-03-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp.h (error number enum): New anonymous enum.\n\t(gmp_errno): New.\n\n\t* gmp.h (__GNU_MP_VERSION, __GNU_MP_VERSION_MINOR): Bump for GMP 3.0.\n\n2000-03-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/unicos.m4 (FLOAT64): New define.\n\t* mpn/alpha/default.m4 (FLOAT64): New define.\n\t* mpn/alpha/invert_limb.asm (C36): Use FLOAT64.\n\n2000-03-21  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/k6/diveby3.asm: Tiny speedup.\n\n\t* acinclude.m4 (GMP_CHECK_ASM_SHLDL_CL): New macro.\n\t* configure.in: Use it, set WANT_SHLDL_CL in config.m4.\n\t* mpn/x86/x86-defs.m4 (shldl,shrdl,shldw,shrdw): New macros, using\n\tWANT_SHLDL_CL.\n\t* mpn/x86/k6/mmx/lshift.asm: Use shldl macro.\n\t* mpn/x86/k7/mmx/lshift.asm: Ditto.\n\t* mpn/x86/pentium/mmx/lshift.asm: Ditto.\n\t* mpn/x86/k6/mmx/rshift.asm: Use shrdl macro.\n\t* mpn/x86/k7/mmx/rshift.asm: Ditto.\n\t* mpn/x86/pentium/mmx/rshift.asm: Ditto.\n\t* mpn/x86/README.family: Add a note about this.\n\n2000-03-20  Linus Nordberg  <linus@swox.se>\n\n\t* mpn/generic/rawrandom.c (mpn_rawrandom): Handle seed value of 0\n\tcorrectly.\n\n\t* configure.in: Fix detection of alpha flavour.\n\tSet compiler options for `sparcv8'.\n\t* configure: Regenerate.\n\n\t* rand.c (__gmp_rand_scheme): Clean up some.  Use slightly better\n\tmultipliers.\n\n\t* configure.in (AC_OUTPUT): Add tests/Makefile and\n\ttests/rand/Makefile.\n\n\t* acinclude.m4 (AC_CANONICAL_BUILD): Define to\n\t`_AC_CANONICAL_BUILD' to deal with incompabilities between\n\tAutoconf and Libtool.\n\t(AC_CHECK_TOOL_PREFIX): Likewise.\n\n\t* Makefile.am (EXTRA_DIST): Add directory `tests'.\n\n\t* mkinstalldirs: Update (Automake 2000-03-17).\n\t* ltconfig: Update (Libtool 2000-03-17).\n\t* ltmain.sh: Ditto.\n\n\t* configure: Regenerate with new autoconf/-make/libtool suite.\n\t* aclocal.m4: Ditto.\n\t* config.in: Ditto.\n\t* all Makefile.in's: Ditto.\n\n2000-03-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (main): Don't allow `-N' for base, require `-bN'.\n\n\t* mpn/alpha/unicos.m4 (cvttqc): New define.\n\t* mpn/alpha/invert_limb.asm: Use new define for cvttqc.\n\n2000-03-19  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/k6/sqr_basecase.asm: Tiny amendments for 3x3 case.\n\n\t* gmp.texi: Use @include version.texi.\n\tUse @email and @uref.\n\t(Installing MP): Rewrite for new configure.\n\t(Low-level Functions): Add mpn_divexact_by3.\n\n\t* configure.in (--enable-alloca): New option.\n\t* acconfig.h (USE_STACK_ALLOC): For --disable-alloca.\n\n2000-03-18  Kent Boortz  <kent@swox.com>\n\n\t* macos: New directory with macos port files.\n\n2000-03-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (union ieee_double_extract): Check _CRAYMPP.\n\n\t* mpn/asm-defs.m4 (invert_normalized_limb): Define.\n\n\t* mpn/alpha: Translate `.s' files to `.asm'.\n\n\t* configure: Regenerate.\n\n\t* mpn/alpha/invert_limb.asm: Replace dash in file name with underscore.\n\t* configure.in: Corresponding change.\n\n\t* configure.in: Assign special \"path\" for alphaev6.\n\n\t* mpn/alpha/unicos.m4: New file.\n\t* configure.in (alpha*-cray-unicos*): [This part of the change\n\tcommited 2000-03-13 by linus]\n\t* mpn/alpha/default.m4: New file.\n\t* configure.in (alpha*-*-*): Use it.\n\n2000-03-17  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium/rshift.S: Use plain rcrl (not rcrl $1) for\n\tshift-by-1 case, significant speedup.\n\t* mpn/x86/pentium/README: Add shift-by-1 speed.\n\n2000-03-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.guess: Handle Cray T3D/E.\n\n2000-03-15  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/diveby3.c: New file.\n\t* mpn/x86/diveby3.asm: New file.\n\t* mpn/x86/k6/diveby3.asm: New file.\n\t* gmp.h (mpn_divexact_by3): Prototype and define.\n\t* mpn/asm-defs.m4: define_mpn(divexact_by3).\n\t* configure.in (gmp_mpn_functions): Add diveby3.\n\n\t* mpn/x86/pentium/sqr_basecase.asm: A few better addressing modes.\n\n\t* configure.in: Add AC_C_STRINGIZE and AC_CHECK_TYPES((void)).\n\t* gmp-impl.h (ASSERT): Use them.\n\n\t* mpn/x86/k7/mmx/lshift.asm: New file.\n\t* mpn/x86/k7/mmx/rshift.asm: Rewrite simple loop and return value\n\thandling, add some pictures.\n\n2000-03-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc32/v8/mul_1.asm: Make PIC actually work.\n\t* mpn/sparc32/v8/addmul_1.asm: Likewise.\n\n\t* mpn/sparc32/v8/mul_1.asm: Use m4 ifdef, not cpp #if.\n\t* mpn/sparc32/v8/addmul_1.asm: Likewise.\n\n\t* mpn/asm-defs.m4 (C): New define for comments.\n\t* mpn/sparc32: Start comments with `C'.\n\n\t* config.guess: Remove `SunOS 6' handling.\n\tRecognize sun4m and sun4d architectures under old SunOS.\n\n2000-03-14  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in (gmp_srclinks): Set to list of links created by\n\tconfigure.\n\t* configure: Regenerate.\n\n\t* Makefile.am (libgmp_la_LDFLAGS): Set version info.\n\t(DISTCLEANFILES): Include @gmp_srclinks@.\n\t* Makefile.in: Regenerate.\n\n2000-03-13  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in: Remove some changequote's by quoting the strings\n\tcontaining `[]'.\n\tAdd support for `alpha*-cray-unicos*'.\n\tAC_DEFINE `_LONG_LONG_LIMB' instead of passing it in CFLAGS.\n\tConditionalize the assembler syntax checks.\n\t* configure: Regenerate.\n\t* config.in: Regenerate.\n\n\t* acinclude.m4 (GMP_PROG_CCAS): Remove macro.\n\t* aclocal.m4: Regenerate.\n\n2000-03-13  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/p6/README: New file.\n\n\t* mpn/x86/k6/mul_1.asm: Rewrite, smaller and slightly faster.\n\n\t* mpn/lisp/gmpasm-mode.el: Rewrite assembler comment detection and\n\thandling.\n\n\t* configure.in: Separate mmx directories for each x86 flavour.\n\t* configure: Regenerate.\n\n2000-03-12  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/x86-defs.m4 (ALIGN): Supplement definition from\n\tconfig.m4 so as to pad with nops not zeros on old gas.\n\n\t* mpn/x86/k7/mmx/copyd.asm: Use plain emms (femms is just an alias\n\tfor emms now).\n\t* mpn/x86/k7/mmx/copyi.asm: Ditto.\n\t* mpn/x86/k7/mmx/rshift.asm: Ditto.\n\t* mpn/x86/x86-defs.m4: Amend comments.\n\n\t* mpn/x86/mod_1.asm: Add comments on speeds.\n\n\t* mpn/x86/pentium/mmx/lshift.asm: New file.\n\t* mpn/x86/pentium/mmx/rshift.asm: New file.\n\t* mpn/x86/pentium/README: Add speeds of various routines.\n\n2000-03-10  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in: Reorganize.\n\tUse AC_CHECK_TOOL to find `ar'.\n\tAdd post-includes `regmap.m4' and `aix.m4' for AIX targets.\n\tasm-syntax.h is not needed for PPC or sparc anymore.\n\t(powerpc64-*-aix*): Compiler is always 64-bit. Use `-q64\n\t-qtune=pwr3' to xlc and `-maix64 -mpowerpc64' to gcc.  Pass `-X\n\t64' to `ar' and `nm'.\n\t(pentiummmx): Use GMP_CHECK_ASM_MMX and avoid MMX assembly path if\n\tassembler is not MMX capable.\n\t(pentium[23]): Likewise.\n\t(athlon): Likewise.\n\t(k6*): Likewise.\n\t* configure: Regenerate.\n\n\t* acinclude.m4 (GMP_PROG_CC_WORKS): New macro.\n\t(GMP_PROG_CC_FIND): Use GMP_PROG_CC_WORKS instead of\n\tAC_TRY_COMPILER.  Make sure that the *first* working 32-bit\n\tcompiler is used if no 64-bit compiler is found.\n\t(GMP_CHECK_ASM_MMX): New macro.\n\t* aclocal.m4: Regenerate.\n\n\t* Makefile.in: Regenerate.  (CC_TEST removed.)\n\t* mpf/Makefile.in: Likewise.\n\t* mpn/Makefile.in: Likewise.\n\t* mpq/Makefile.in: Likewise.\n\t* mpz/Makefile.in: Likewise.\n\t* mpf/tests/Makefile.in: Likewise.\n\t* mpq/tests/Makefile.in: Likewise.\n\t* mpz/tests/Makefile.in: Likewise.\n\n\t* acconfig.h (_LONG_LONG_LIMB): Add.\n\n\t* gmp-impl.h: Include config.h only if HAVE_CONFIG_H is defined.\n\n2000-03-09  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/pentium/mul_basecase.S: Small speedup by avoiding an AGI.\n\n\t* mpn/x86/k7/mmx/copyd.asm: Tiny speedup by avoiding popl.\n\t* mpn/x86/k7/mmx/copyi.asm: Ditto.\n\t* mpn/x86/k7/mul_basecase.asm: Ditto.\n\n2000-03-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.guess: Better recognize POWER/PowerPC processor type.\n\n2000-03-07  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/addsub_n.c: Use HAVE_NATIVE_* now in config.h.\n\n\t* mpn/asm-defs.m4: Add comments about SysV m4.\n\t(m4_log2): Don't use <<.\n\t(m4_lshift,m4_rshift): New macros.\n\n2000-03-06  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/regmap.m4: Map cr0 => `0', etc.\n\n2000-03-06  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/tests/ref.c (refmpn_divexact_by3): New function.\n\t* mpn/tests/ref.h: Prototype.\n\n\t* acconfig.h (WANT_ASSERT): New define.\n\t* configure.in (--enable-assert): Turn on WANT_ASSERT.\n\t* assert.c: New file.\n\t* Makefile.am: Add to build.\n\t* gmp-impl.h (ASSERT): New macro.\n\t(ASSERT_NOCARRY) Renamed from assert_nocarry.\n\t(MPZ_CHECK_FORMAT): Use ASSERT_ALWAYS.\n\t* mpn/tests/ref.c: Use ASSERT.\n\t* mpf/get_str.c: Use ASSERT_ALWAYS.\n\t* mpf/set_str.c: Remove old assert macro.\n\n\t* mpn/x86/x86-defs.m4 (cmovnz_ebx_ecx): New macro.\n\t* mpn/x86/p6/aorsmul_1.asm: Use cmov.\n\n\t* mpn/x86/lshift.S: Use %dl with testb, not %edx. No object code\n\tchange, testb was still getting generated.\n\t* mpn/x86/rshift.S: Ditto.\n\n2000-03-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h: Add IA-64 support.\n\n\t* mpn/powerpc32: Misc cleanups.\n\t* mpn/powerpc32/aix.m4: New file (mainly by Linus).\n\t* mpn/powerpc64/aix.m4: New file (mainly by Linus).\n\t* mpn/powerpc64: Translate `.S' files to `.asm'.\n\n\t* configure.in: Fix tyops.\n\t* configure: Regenerate.\n\n2000-03-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc32/regmap.m4: New file.\n\t* mpn/powerpc32: Translate `.S' files to `.asm'.\n\t* configure.in: Use mpn/powerpc32/regmap.m4 for powerpc targets\n\texcept some weird ones.\n\n2000-03-03  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/lisp/gmpasm-mode.el: Suppress postscript comment prefixes in\n\tfilladapt.\n\n\t* mpn/x86/pentium/sqr_basecase.asm: New file.\n\t* mpn/x86/pentium/gmp-mparam.h (KARATSUBA_SQR_THRESHOLD): Update.\n\n\t* configure.in: Add --enable-assert, enable k6 logops functions.\n\n\t* mpn/x86/k6/mmx/copyi.asm: Use m4 for divide, not as.\n\t* mpn/x86/k6/mmx/copyd.asm: Ditto.\n\t* mpn/x86/README.family: Add a note on this.\n\n2000-03-02  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/k6/aors_n.asm: Don't use stosl.\n\t* mpn/x86/copyi.asm: Use cld to clear direction flag.\n\t* mpn/x86/divrem_1.asm: Ditto.\n\t* mpn/x86/README.family: Add a note on this.\n\n\t* mpn/x86/k6/mmx/copyi.asm: Rewrite.\n\t* mpn/x86/k6/mmx/copyd.asm: New file.\n\t* mpn/x86/k6/README: Update, and small amendments.\n\n\t* mpn/x86/x86-defs.m4 (Zdisp): New macro.\n\t* mpn/asm-defs.m4 (m4_stringequal_p): New macro.\n\n\t* mpn/x86/p6/aorsmul_1.asm: Use Zdisp to force zero displacements.\n\t* mpn/x86/k6/aorsmul_1.asm: Ditto.\n\t* mpn/x86/k6/mul_1.asm: Ditto.\n\t* mpn/x86/k6/mul_basecase.asm: Ditto.\n\t* mpn/x86/k7/aors_n.asm: Ditto.\n\t* mpn/x86/k7/aorsmul_1.asm: Ditto.\n\t* mpn/x86/k7/mul_1.asm: Ditto.\n\t* mpn/x86/k7/mul_basecase.asm: Ditto.\n\t* mpn/x86/README.family: Add a note on this.\n\n2000-02-27  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/generic/divrem.c (mpn_divrem_classic): Patch to avoid gcc\n\t2.7.2.3 i386 register handling bug.\n\n\t* mpn/x86/k6/aors_n.asm: Rewrite.\n\t* mpn/x86/k6/mmx/lshift.asm: Rewrite.\n\t* mpn/x86/k6/mmx/rshift.asm: Rewrite.\n\t* mpn/x86/k6/README: Update.\n\n\t* mpn/x86/k7/mmx/copyd.asm: Support size==0.\n\t* mpn/x86/k7/mmx/copyi.asm: Ditto.\n\t* mpn/x86/k6/mmx/copyi.asm: Ditto.\n\t* gmp-impl.h: Comment size==0 allowed in MPN_COPY_INCR and\n\tMPN_COPY_DECR.\n\t* configure.in: Enable x86 copyi, copyd; add k6 com_n.\n\n2000-02-25  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (power): Move factorial handing code from `factor'\n\tto `power'.\n\n\t* demos/factorize.c (factor_using_pollard_rho): Move resetting of `c'\n\tto before checking for a non-zero gcd.\n\n2000-02-25  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/asm-defs.m4 (MULFUNC_PROLOGUE): New macro by Linus.\n\t* mpn/x86/k6/aors_n.asm: Use MULFUNC_PROLOGUE.\n\t* mpn/x86/k6/aorsmul_1.asm: Ditto.\n\t* mpn/x86/k7/aors_n.asm: Ditto.\n\t* mpn/x86/k7/aorsmul_1.asm: Ditto.\n\t* mpn/x86/p6/aorsmul_1.asm: Ditto.\n\n\t* mpn/tests/ref.c (refmpn_copyi,refmpn_copyd): Allow size==0.\n\n\t* gmp-impl.h: Move mpn_and_n, mpn_andn_n, mpn_com_n, mpn_ior_n,\n\tmpn_iorn_n, mpn_nand_n, mpn_nior_n, mpn_xor_n and mpn_xorn_n here\n\tfrom gmp.h.  Use HAVE_NATIVE_mpn_* to make these functions or\n\tinlines.\n\n\t* gmp-impl.h: Move mpn_copyd, mpn_copyi here from gmp.h.\n\t* gmp-impl.h (MPN_COPY_INCR): Use mpn_copyi if available.\n\t* gmp-impl.h (MPN_COPY_DECR): Use mpn_copyd if available.\n\n\t* mpn/x86/k6/mmx/com_n.asm: Moved into mmx subdirectory.\n\t* mpn/x86/k6/mmx/copyi.asm: Ditto.\n\t* mpn/x86/k6/mmx/lshift.asm: Ditto.\n\t* mpn/x86/k6/mmx/rshift.asm: Ditto.\n\t* mpn/x86/k7/mmx/rshift.asm: Ditto.\n\t* mpn/x86/k6/mmx/logops_n.asm: New file.\n\t* configure.in (k6*-*-*): Add logops_n.asm.\n\t* mpn/x86/k6/README: Update.\n\n\t* mpn/x86/k7/mmx/copyi.asm: New file.\n\t* mpn/x86/k7/mmx/copyd.asm: New file.\n\t* mpn/x86/k7/README: Update.\n\n2000-02-24  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/x86-defs.m4 (femms): Generate emms if 3dnow not available.\n\t* mpn/x86/x86-defs.m4 (FRAME_popl): New macro.\n\n\t* Makefile.am: Add info_TEXINFOS = gmp.texi\n\n\t* mpn/x86/divrem_1.asm: Moved from mpn/x86/k6, allow size==0,\n\tconditionalize loop versus decl/jnz.\n\t* mpn/x86/mod_1.asm: Ditto.\n\t* mpn/x86/divmod_1.asm: Removed.\n\t* gmp.texi (mpn_divrem_1,mpn_mod_1): Add that size==0 is allowed.\n\t* mpn/tests/ref.c (refmpn_divrem_1c,etc): Allow size==0.\n\n\t* mpn/x86/k6/aors_n.asm: Avoid gas 1.92.3 leal displacement\n\texpression problem.\n\t* mpn/x86/k6/aorsmul_1.asm: Ditto.\n\t* mpn/x86/k6/mul_1.asm: Ditto.\n\t* mpn/x86/k6/mul_basecase.asm: Ditto\n\t* mpn/x86/k7/aors_n.asm: Ditto.\n\t* mpn/x86/k7/aorsmul_1.asm: Ditto.\n\t* mpn/x86/k7/mul_1.asm: Ditto.\n\t* mpn/x86/k7/mul_basecase.asm: Ditto.\n\t* mpn/x86/k7/rshift.asm: Ditto.\n\t* mpn/x86/p6/aorsmul_1.asm: Ditto.\n\t* mpn/x86/README.family: Describe problem.\n\n2000-02-24  Linus Nordberg  <linus@swox.se>\n\n\t* acinclude.m4 (GMP_CHECK_ASM_LSYM_PREFIX): Add dummy symbol to\n\ttestcase to avoid nm failure.  Try nm before piping to grep.\n\n\t* acconfig.h: Undef HAVE_NATIVE_func for every mpn function found\n\tin gmp.h.\n\n\t* configure.in: Invoke AC_CONFIG_HEADERS.\n\tDon't invoke AM_CONFIG_HEADER; it makes autoconf confused.\n\tDig out entry points declared in assembly code and AC_DEFINE proper\n\tHAVE_NATIVE_func.\n\n\t* mpn/asm-defs.m4 (MULFUNC_PROLOGUE): New macro.\n\n\t* mpn/x86/p6/aorsmul_1.asm: Use MULFUNC_PROLOGUE.\n\t* mpn/x86/k6/aors_n.asm: Likewise.\n\n\t* Makefile.am (EXTRA_DIST): Add config.in; needed when we don't\n\tuse AM_CONFIG_HEADER in configure.in.\n\n\t* mpn/Makefile.am (INCLUDES): Add `-I..' for config.h and\n\tgmp-mparam.h.\n\t* mpf/Makefile.am: Likewise.\n\t* mpq/Makefile.am: Likewise.\n\t* mpz/Makefile.am: Likewise.\n\n\t* mpf/tests/Makefile.am (INCLUDES): Add `-I../..' for config.h and\n\tgmp-mparam.h.\n\t* mpq/tests/Makefile.am: Likewise.\n\t* mpz/tests/Makefile.am: Likewise.\n\n\t* configure: Regenerate.\n\t* aclocal.m4: Regenerate.\n\t* config.in: Regenerate.\n\t* Makefile.in: Regenerate.\n\t* mpf/Makefile.in: Regenerate.\n\t* mpn/Makefile.in: Regenerate.\n\t* mpq/Makefile.in: Regenerate.\n\t* mpz/Makefile.in: Regenerate.\n\t* mpf/tests/Makefile.in: Regenerate.\n\t* mpq/tests/Makefile.in: Regenerate.\n\t* mpz/tests/Makefile.in: Regenerate.\n\n2000-02-23  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/addmul_1.S: Amend comments, this code no longer used by\n\tPentiumPro.\n\t* mpn/x86/submul_1.S: Ditto.\n\n\t* mpn/x86/k6/com_n.asm: Rewrite, smaller but same speed.\n\n\t* mpn/x86/addmul_1.S: Add PROLOGUE and EPILOGUE to get .type and\n\t.size for ELF.  Rename #define size to n to avoid .size.\n\t* mpn/x86/lshift.S: Ditto.\n\t* mpn/x86/mul_1.S: Ditto.\n\t* mpn/x86/mul_basecase.S: Ditto.\n\t* mpn/x86/rshift.S: Ditto.\n\t* mpn/x86/submul_1.S: Ditto.\n\t* mpn/x86/udiv.S: Ditto.\n\t* mpn/x86/umul.S: Ditto.\n\t* mpn/x86/pentium/add_n.S: Ditto.\n\t* mpn/x86/pentium/addmul_1.S: Ditto.\n\t* mpn/x86/pentium/lshift.S: Ditto.\n\t* mpn/x86/pentium/mul_1.S: Ditto.\n\t* mpn/x86/pentium/mul_basecase.S: Ditto.\n\t* mpn/x86/pentium/rshift.S: Ditto.\n\t* mpn/x86/pentium/sub_n.S: Ditto.\n\t* mpn/x86/pentium/submul_1.S: Ditto.\n\n2000-02-22  Linus Nordberg  <linus@swox.se>\n\n\t* acinclude.m4 (GMP_INIT): Use temporary file cnfm4p.tmp for\n\tpost-defines.\n\t(GMP_FINISH): Ditto.\n\t(GMP_DEFINE): Add third optional argument specifying location in\n\toutfile.\n\t(GMP_DEFINE_RAW): New macro.\n\t* aclocal.m4: Regenerate.\n\n\t* configure.in: Add `HAVE_TARGET_CPU_$target_cpu' using\n\tGMP_DEFINE_RAW.\n\t* configure: Regenerate.\n\n\t* mpz/tests/Makefile.am: New test t-root.\n\t* mpz/tests/Makefile.in: Regenerate.\n\n2000-02-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/root.c: Complete rewrite; still primitive, but at least correct.\n\t* mpz/tests/t-root.c: New test.\n\n2000-02-22  Kevin Ryde  <kevin@swox.se>\n\n\t* mpn/x86/k7/mul_basecase.asm: New file.\n\t* mpn/x86/k7/README: Add mpn_mul_basecase speed.\n\t* mpn/x86/k7/gmp-mparam.h: New file.\n\n\t* mpn/x86/x86-defs.m4 (loop_or_decljnz,cmov_bytes): New macros.\n\t* mpn/asm-defs.m4 (m4_ifdef_anyof_p): New macro.\n\n\t* mpn/x86/k6/aorsmul_1.asm: New file.\n\t* mpn/x86/k6/addmul_1.S: Removed (was a copy of pentium version).\n\t* mpn/x86/k6/submul_1.S: Removed (was a copy of pentium version).\n\n\t* mpn/x86/p6/aorsmul_1.asm: Use OPERATION_addmul_1 and\n\tOPERATION_submul_1.\n\t* mpn/x86/k6/aors_n.asm: Use OPERATION_add_n and OPERATION_sub_n.\n\t* configure.in: Declare multi-function files for k6 and p6.\n\n\t* configure.in: Add HAVE_TARGET_CPU_$target_cpu for config.m4.\n\t* mpn/asm-defs.m4 (define_not_for_expansion): New macro.\n\n\t* mpn/generic/divrem_1n.c (__gmpn_divrem_1n): New file, split from\n\tmpn/generic/divrem_1.c.\n\t* mpn/generic/divrem_1.c: Ditto.\n\t* configure.in (gmp_mpn_functions): Ditto.\n\n2000-02-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp.h: Undo 1996-10-06 NeXT change, it was clearly improperly\n\twritten.\n\n2000-02-21  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in: Link <src>/mpn/asm-defs.m4 to <build>mpn/asm.m4.\n\t* configure: Regenerate.\n\n2000-02-21  Linus Nordberg  <linus@swox.se>\n\n\t* mpn/x86/k7/aorsmul_1.asm: Change OPERATION_ADDMUL -->\n\tOPERATION_addmul_1.  Change OPERATION_SUBMUL -->\n\tOPERATION_submul_1.\n\n\t* mpn/x86/k7/aors_n.asm: Change OPERATION_ADD --> OPERATION_add_n.\n\tChange OPERATION_SUB --> OPERATION_sub_n.\n\n\t* mpn/Makefile.am: Pass -DOPERATION_$* to preprocessors.\n\t* mpn/Makefile.in: Regenerate.\n\n\t* configure.in: Symlink mpn/asm-defs.m4 to build-dir/mpn.  Link\n\tmulti-function files to mpn/<function>.asm and remove function\n\tname from `gmp_mpn_functions'.\n\t* configure: Regenerate.\n\n\t* acinclude.m4 (GMP_FINISH): Tell user what we're doing.\n\t* aclocal.m4: Regenerate.\n\n2000-02-21  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp-impl.h: Rename __gmpn_mul_basecase to mpn_mul_basecase and\n\t__gmpn_sqr_basecase to mpn_sqr_basecase, remove __gmpn prototypes.\n\t* mpn/x86/mul_basecase.S: Ditto.\n\t* mpn/x86/pentium/mul_basecase.S: Ditto.\n\n\t* configure.in (gmp_m4postinc): Use x86-defs.m4 on athlon-*-* too.\n\n2000-02-20  Kevin Ryde  <kevin@swox.se>\n\n\t* acinclude.m4 (GSYM_PREFIX): Drop $1, change by Linus.\n\t* mpn/asm-defs.m4 (PROLOGUE,EPILOGUE): Use GSYM_PREFIX as a\n\tstring, change by Linus.\n\t* mpn/x86/x86-defs.m4: Use GSYM_PREFIX as a string.\n\n\t* mpn/x86/k6/gmp-mparam.h: New file.\n\t* mpn/asm-defs.m4 (m4_warning): New macro.\n\n\t* mpn/x86/README: Amendments per new code and directories.\n\t* mpn/x86/README.family: New file.\n\t* mpn/x86/k6/README: New file.\n\t* mpn/x86/k7/README: New file.\n\n\t* mpn/generic/mul_n.c: Rename __gmpn_mul_basecase to\n\tmpn_mul_basecase and __gmpn_sqr_basecase to mpn_sqr_basecase.\n\t* mpn/generic/mul_basecase.c: Ditto.\n\t* mpn/generic/sqr_basecase.c: Ditto.\n\t* mpn/generic/mul.c: Ditto.\n\n2000-02-19  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in: Don't try to symlink more than one multi-func\n\tfile.\n\t* configure: Regenerate.\n\n2000-02-18  Linus Nordberg  <linus@swox.se>\n\n\t* acinclude.m4 (GMP_CHECK_ASM_UNDERSCORE): GMP_DEFINE\n\t`GSYM_PREFIX'.  Run ACTIONs even when value is found in cache.\n\t(GMP_CHECK_ASM_ALIGN_LOG): GMP_DEFINE `ALIGN'.  Run ACTIONs even\n\twhen value is found in cache.\n\t* aclocal.m4: Regenerate.\n\n\t* configure.in: Don't define GSYM_PREFIX or ALIGN.\n\tAdd mechanism for multi-function files.\n\t* configure: Regenerate.\n\n2000-02-18  Kevin Ryde  <kevin@swox.se>\n\n\t* configure.in (gmp_m4postinc): Enable x86-defs.m4.\n\t* mpn/x86/k7/mul_1.asm: Fix include.\n\t* mpn/x86/k6/mul_basecase.S: Removed (copy of the pentium version).\n\t* mpn/x86/k6/mul_basecase.asm: New file.\n\t* mpn/x86/k6/sqr_basecase.asm: New file.\n\t* mpn/x86/k6/com_n.asm: New file.\n\t* mpn/x86/k6/copyi.asm: New file.\n\t* gmp.texi (Low-level Functions): Clarify mpn overlaps permitted.\n\t* gmp-impl.h (MPN_OVERLAP_P): New macro.\n\t* gmp-impl.h (assert_nocarry): New macro.\n\t* mpn/tests/ref.c: New file, based in part on other mpn/tests/*.c.\n\t* mpn/tests/ref.h: New file.\n\n2000-02-17  Linus Nordberg  <linus@swox.se>\n\n\t* Makefile.am (dist-hook): Don't include any emacs backup files\n\t(*.~*) in dist.\n\t* Makefile.in: Regenerate.\n\n2000-02-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc32/v9/mul_1.asm: Use `rd' to get current PC; get rid of\n\tgetpc function.\n\t* mpn/sparc32/v9/addmul_1.asm: Likewise.\n\t* mpn/sparc32/v9/submul_1.asm: Likewise.\n\n2000-02-17  Kevin Ryde  <kevin@swox.se>\n\n\t* gmp.h: Add prototypes and defines for mpn_and_n, mpn_andn_n,\n\tmpn_com_n, mpn_copyd, mpn_copyi, mpn_ior_n, mpn_iorn_n,\n\tmpn_mul_basecase, mpn_nand_n, mpn_nior_n, mpn_sqr_basecase,\n\tmpn_xor_n, mpn_xorn_n.\n\n\t* mpn/asm-defs.m4: Many additions making up initial version.\n\t* mpn/asm-defs.m4 (L): Use defn(`LSYM_PREFIX').\n\t* mpn/x86/x86-defs.m4: New file.\n\t* mpn/x86/k6/aors_n.asm: New file.\n\t* mpn/x86/k6/divmod_1.asm: New file.\n\t* mpn/x86/k6/divrem_1.asm: New file.\n\t* mpn/x86/k6/lshift.S: Removed (was a copy of the pentium version).\n\t* mpn/x86/k6/lshift.asm: New file.\n\t* mpn/x86/k6/mod_1.asm: New file.\n\t* mpn/x86/k6/mul_1.S: Removed (was a copy of the pentium version).\n\t* mpn/x86/k6/mul_1.asm: New file.\n\t* mpn/x86/k6/rshift.S: Removed (was a copy of the pentium version).\n\t* mpn/x86/k6/rshift.asm: New file.\n\t* mpn/x86/k7/aors_n.asm: New file.\n\t* mpn/x86/k7/aorsmul_1.asm: New file.\n\t* mpn/x86/k7/mul_1.asm: New file.\n\t* mpn/x86/k7/rshift.asm: New file.\n\t* mpn/x86/p6/aorsmul_1.asm: New file.\n\t* mpn/x86/copyi.asm: New file.\n\t* mpn/x86/copyd.asm: New file.\n\t* mpn/lisp/gmpasm-mode.el: New file.\n\n2000-02-16  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc32/v9/mul_1.asm: Conditionalize for PIC.\n\t* mpn/sparc32/v9/addmul_1.asm: Likewise.\n\t* mpn/sparc32/v9/submul_1.asm: Likewise.\n\t* mpn/sparc32/v8/supersparc/udiv.asm: Likewise.\n\t* mpn/sparc32/udiv_fp.asm: Likewise.\n\n2000-02-16  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in: Add mechanism for including target specific\n\tm4-files in config.m4.\n\t* configure: Regenerate.\n\n\t* acinclude.m4 (GMP_PROG_CCAS): Begin assembly lines (except\n\tlabels) with a tab character.  HP-UX demands it.\n\t(GMP_CHECK_ASM_SIZE): Ditto.\n\t(GMP_CHECK_ASM_LSYM_PREFIX): Ditto.\n\t(GMP_CHECK_ASM_LABEL_SUFFIX): Set to empty string for HP-UX.\n\t(GMP_CHECK_ASM_GLOBL): Change `.xport' --> `.export'.\n\t* aclocal.m4: Regenerate.\n\n2000-02-16  Linus Nordberg  <linus@swox.se>\n\n\t* acinclude.m4 (GMP_CHECK_ASM_LSYM_PREFIX): Define LSYM_PREFIX as\n\tthe prefix only, no argument.\n\t* aclocal.m4: Regenerate.\n\t* configure: Regenerate.\n\n\t* mpn/asm-defs.m4 (L): No argument to LSYM_PREFIX.\n\n2000-02-15  Linus Nordberg  <linus@swox.se>\n\n\t* acinclude.m4: Prefix all temporary shell variables with\n\t`gmp_tmp_'.\n\t(GMP_PROG_CC_FIND): Use defaults if no arguments are passed.\n\tQuote use of arguments.\n\t(GMP_PROG_CCAS): New macro.\n\t(GMP_INIT): New macro.\n\t(GMP_FINISH): New macro.\n\t(GMP_INCLUDE): New macro.\n\t(GMP_SINCLUDE): New macro.\n\t(GMP_DEFINE): New macro.\n\t(GMP_CHECK_ASM_LABEL_SUFFIX): New macro.\n\t(GMP_CHECK_ASM_TEXT): New macro.\n\t(GMP_CHECK_ASM_DATA): New macro.\n\t(GMP_CHECK_ASM_GLOBL): New macro.\n\t(GMP_CHECK_ASM_TYPE): New macro.\n\t(GMP_CHECK_ASM_SIZE): New macro.\n\t(GMP_CHECK_ASM_LSYM_PREFIX): New macro.\n\t(GMP_CHECK_ASM_W32): New macro.\n\t* aclocal.m4: Regenerate.\n\n\t* configure.in: Find m4 and nm for target.\n\tUse new macros to create config.m4.\n\tPrefix all temporary shell variables with `tmp_'.\n\tPass `-X 64' to nm for 64-bit PPC target with 64-bit compiler.\n\t* configure: Regenerate.\n\n\t* Makefile.am (dist-hook): *Really* remove all CVS dirs in\n\tdist.\n\t* Makefile.in: Regenerate.\n\n\t* mpn/Makefile.am: Add target for building .lo and .o from\n\t.asm.\n\tPass -DPIC to preprocessor (CPP/m4) when building .lo.\n\tBuild .o a second time for target .lo, without -DPIC to\n\tpreprocessor.\n\t(SUFFIX): Add `.asm'.\n\t(EXTRA_DIST): Add asm-defs.m4.\n\t* mpn/Makefile.in: Regenerate.\n\n\t* mpf/Makefile.in: Regenerate.\n\t* mpf/tests/Makefile.in: Regenerate.\n\t* mpq/Makefile.in: Regenerate.\n\t* mpq/tests/Makefile.in: Regenerate.\n\t* mpz/Makefile.in: Regenerate.\n\t* mpz/tests/Makefile.in: Regenerate.\n\n2000-02-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc32/udiv_fp.asm: Change `RODATA' to `DATA'.\n\t* mpn/sparc32/v8/supersparc/udiv.asm: Likewise.\n\t* mpn/sparc32/v9/addmul_1.asm: Likewise.\n\t* mpn/sparc32/v9/submul_1.asm: Likewise.\n\t* mpn/sparc32/v9/mul_1.asm: Likewise.\n\n\t* mpn/sparc32/add_n.asm: Rename `size' -> `n'.\n\t* mpn/sparc32/sub_n.asm: Likewise.\n\n\t* sparc32: Rename `.s' and `.S' files to `.asm'.\n\t* sparc64: Rename `.s' and `.S' files to `.asm'.\n\n2000-02-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.sub: Adopt to new config.guess sparc naming conventions.\n\n\t* config.guess (sun4u:SunOS:5.*:*): Change `sparc9' to `sparcv9'.\n\t* config.guess (sun4m:SunOS:5.*:*): Change to sun4[md]:SunOS:5.*:* and\n\tchange `sparc8' to `sparcv8'.\n\n\t* mpn/x86/add_n.S: Use PROLOGUE/EPILOGUE.\n\t* mpn/x86/sub_n.S: Likewise.\n\n\t* mpn/x86/syntax.h (PROLOGUE): New name for PROLOG.\n\t* mpn/x86/syntax.h (EPILOGUE): New name for EPILOG.\n\n2000-02-11  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in: Better path for 64-bit sparc without 64-bit cc.\n\tChange sparc8 --> sparcv8.\n\tChange sparc9 --> sparcv9.\n\t* configure: Regenerate.\n\n2000-02-10  Linus Nordberg  <linus@swox.se>\n\n\t* configure.in: Use Autoconf.\n\t* Makefile.am: New file.\n\n\t* AUTHORS: New file.\n\t* COPYING: New file.\n\t* acinclude.m4: New file.\n\t* acconfig.h: New file.\n\n\t* configure: Generate.\n\t* Makefile.in: Generate.\n\t* aclocal.m4: Generate.\n\t* config.in: Generate.\n\n\t* install.sh: Remove.\n\t* install-sh: New file from Automake.\n\t* missing: New file from Automake.\n\t* ltconfig: New file from Libtool.\n\t* ltmain.sh: New file from Libtool.\n\n\t* mpf/Makefile.am: New file.\n\t* mpf/Makefile.in: Generate.\n\t* mpf/configure.in: Remove.\n\t* mpf/tests/Makefile.am: New file.\n\t* mpf/tests/Makefile.in: Generate.\n\t* mpf/tests/configure.in: Remove.\n\n\t* mpn/Makefile.am: New file.\n\t* mpn/Makefile.in: Generate.\n\t* mpn/configure.in: Remove.\n\n\t* mpq/Makefile.am: New file.\n\t* mpq/Makefile.in: Generate.\n\t* mpq/configure.in: Remove.\n\t* mpq/tests/Makefile.am: New file.\n\t* mpq/tests/Makefile.in: Generate.\n\t* mpq/tests/configure.in: Remove.\n\n\t* mpz/Makefile.am: New file.\n\t* mpz/Makefile.in: Generate.\n\t* mpz/configure.in: Remove.\n\t* mpz/tests/Makefile.am: New file.\n\t* mpz/tests/Makefile.in: Generate.\n\t* mpz/tests/configure.in: Remove.\n\n2000-02-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/add_n.S: Don't use label L0 twice.\n\t* mpn/x86/sub_n.S: Likewise.\n\n2000-01-20  Linus Nordberg  <linus@swox.se>\n\n\t* demos/pexpr.c: Don't use setup_error_handler() in windoze.\n\n2000-01-19  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (sigaltstack): #define to sigstack for AIX.\n\t(setup_error_handler): Don't write to ss_size and ss_flags\n\ton AIX.\n\n2000-01-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/configure.in (hppa2.0*-*-*): Move assignment of\n\ttarget_makefile_frag to where it belongs.\n\n1999-12-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h (v9 umul_ppmm): New #define.\n\t(v9 udiv_qrnnd): New #define.\n\n1999-12-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/divmod_1.c: Use invert_limb.\n\t* mpn/generic/mod_1.c: Use invert_limb.\n\n\t* gmp-impl.h (invert_limb): Put definition here.\n\t* mpn/generic/divrem.c (invert_limb): Delete definition.\n\t* mpn/generic/divrem_2.c (invert_limb): Delete definition.\n\n\t* gmp.h (mpn_divrem): Inhibit for non-gcc.\n\tBut declare (undo 1999-11-22 change).\n\n\t* gmp-impl.h (DItype,UDItype): Do these also if _LONG_LONG_LIMB.\n\n\t* longlong.h: Move 64-bit hppa code out of __GNUC__ conditional.\n\n\t* stack-alloc.c (HSIZ): New #define.\n\t(__tmp_alloc): Use HSIZ instead of sizeof(tmp_stack).\n\n1999-12-10  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.sub: Clean up handling of x86 CPUs: Properly recognize\n\tAmd CPUs as unique entities.  Use manufacturer's names of\n\tprocessors (\"pentium\", etc); still match ambiguous names like\n\t\"i586\", \"i686\", \"p6\" but be conservative in interpreting them.\n\n\t* configure.in: Recognize x86 CPU types known by config.guess.\n\t* mpn/configure.in: Likewise.  Add x86/mmx path component as\n\tappropriate.\n\t(athlon-*-*): Fix typo.\n\n\t* config.guess: Update x86 recog code to intiallly match\n\tmore than just i386.\n\tCall K6-2 and K6-III for \"k62\" and \"k63\" respectively.\n\n\t* config.guess: Recognize x86 CPU types.\n\tUpdate code for FreeBSD, NetBSD, OpenBSD, Linux.\n\n1999-12-08  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpf/pow_ui.c: Avoid final squaring in loop.\n\n1999-12-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp-impl.h (udiv_qrnnd_preinv2gen): Prefix local variables with `_'.\n\t(udiv_qrnnd_preinv2norm): Likewise.\n\tFrom Kevin Ryde:\n\t(HAVE_ALLOCA): #define also if defined (alloca).\n\n1999-12-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/tests/add_n.c: Set OPS from CLOCK.\n\t* mpn/tests/sub_n.c: Likewise.\n\t* mpn/tests/mul_1.c: Likewise.\n\t* mpn/tests/addmul_1.c: Likewise.\n\t* mpn/tests/submul_1.c: Likewise.\n\n\t* mpn/tests/lshift.c: Update from add_n.c.\n\t* mpn/tests/rshift.c: Likewise.\n\n1999-12-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/powerpc64/copy.S: New file.\n\n1999-12-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/copy.s: New file.\n\n\t* mpn/tests/copy.c: New file.\n\n\t* mpn/configure.in: Recognize more Amd CPUs; Set special paths for\n\tk7 CPU.\n\n\t* configure.in: Recognize Amd x86 CPUs.\n\n\t* mpz/fdiv_r_2exp.c: In rounding code, read in->_mp_size before\n\twriting to res->_mp_size.\n\n\t* mpn/powerpc64/*.S: Clean up assembly syntax, add function headers.\n\t* mpn/powerpc64/gmp-mparam.h: (KARATSUBA_MUL_THRESHOLD): #define.\n\t(KARATSUBA_SQR_THRESHOLD): #define.\n\n\t* mpn/tests/add_n.c (main): Only print test number if TIMES==1\n\tand not printing.\n\t(main): Don't run reference code if NOCHECK.\n\t* mpn/tests/sub_n.c: Likewise.\n\t* mpn/tests/mul_1.c: Likewise.\n\t* mpn/tests/addmul_1.c: Likewise.\n\t* mpn/tests/submul_1.c: Likewise.\n\n\t* mpn/tests/lshift.c: (main): Only print test number if TIMES==1\n\tand not printing.\n\t* mpn/tests/rshift.c: Likewise.\n\n1999-11-22  Torbjorn Granlund  <tege@swox.com>\n\n\t* gmp.h (mpz_init_set_str): Declare using __gmp_const.\n\t(mpz_set_str): Likewise.\n\t(mpf_init_set_str): Likewise.\n\t(mpf_set_str): Likewise.\n\t(mpn_set_str): Likewise.\n\t(__gmp_0): Likewise.\n\t(mpn_divrem): Remove separate declaration; it's defined later in\n\tthis file.\n\n\t* gmp.h: Replace \"defined (__STD__)' by (__STDC__-0) in\n\texpressions involving more than one term, to handle Sun's compiler\n\tthat most helpfully sets __STDC__ to 0.\n\t* gmp-impl.h: Likewise.\n\t* longlong.h: Likewise.\n\n1999-11-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/gmp-mparam.h (KARATSUBA_MUL_THRESHOLD): #define.\n\t(KARATSUBA_SQR_THRESHOLD): #define.\n\n\t* mpn/sparc64/lshift.s: Compensate stack references for odd stack ptr.\n\t* mpn/sparc64/rshift.s: Likewise.\n\n\t* mpn/sparc64/addmul_1.s: Propagate carry properly.\n\t* mpn/sparc64/submul_1.s: Likewise.\n\n\t* mpn/sparc64/sub_n.s: Rewrite.\n\n\t* mpn/sparc64/sub_n.s: Get operand order for main subcc right\n\t(before scrapping this code for new code).\n\n1999-11-20  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/sparc64/add_n.s: Rewrite.\n\n1999-11-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/syntax.h (PROLOG): New #define.\n\t(EPILOG): New #define.\n\n\t* gmp.h (mpn_addsub_n): Declare.\n\t* gmp.h (mpn_add_nc): Declare.\n\t* gmp.h (mpn_sub_nc): Declare.\n\t* mpn/powerpc64/addsub_n.S: New file.\n\n1999-11-17  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/gmp-mparam.h\n\t(KARATSUBA_MUL_THRESHOLD): Only #define #ifndef.\n\t(KARATSUBA_SQR_THRESHOLD): Likewise.\n\n1999-11-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/mul_1.S: Unroll and optimize for P6 and K7.\n\n1999-11-09  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/x86/p6/gmp-mparam.h\n\t(KARATSUBA_MUL_THRESHOLD): Only #define #ifndef.\n\t(KARATSUBA_SQR_THRESHOLD): Likewise.\n\n1999-11-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/addsub_n.c: New file.\n\n1999-11-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.guess: Handle alpha:FreeBSD with alpha:NetBSD.\n\n\t* configure.in (vax*-*-*): New case.\n\t* config/mt-vax: New file.\n\t* mpn/vax/add_n.s: Rewrite.\n\t* mpn/vax/sub_n.s: Rewrite.\n\n1999-10-31  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/vax/rshift.s: New file.\n\t* mpn/vax/lshift.s: New file.\n\n1999-10-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.sub: Handle k5 and k6.\n\t* mpn/configure.in: Recognize k6.\n\n\t* mpf/tests/t-get_d.c (LOW_BOUND, HIGH_BOUND): New #defines.\n\t(main): Tighten error bounds to 14 digits.\n\n\t* longlong.h (default umul_ppmm, when smul_ppmm exists):\n\tRename __m0 => __xm0, __m1 => __xm1.\n\t(default smul_ppmm): Likewise.\n\n1999-10-11  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.guess: Reverse the test for POWER vs PowerPC.\n\t* config.guess (sun4m:SunOS:5.*:*): New case.\n\t* config.guess (sun4u:SunOS:5.*:*): New case.\n\n1999-09-29  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/divrem_2.c: Clean up comments.\n\n1999-09-23  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/tests/Makefile.in: Use move-if-change when generating binaries.\n\t* mpf/tests/Makefile.in: Likewise.\n\t* mpq/tests/Makefile.in: Likewise.\n\t* mpz/tests/move-if-change: New file.\n\t* mpf/tests/move-if-change: New file.\n\t* mpq/tests/move-if-change: New file.\n\n\t* gmp.h (mpn_incr_u): New macro (from mpn/generic/mul_n.c).\n\t(mpn_decr_u): New macro.\n\n\t* mpn/generic/mul_n.c (mpn_incr): Delete.\n\t* mpn/generic/mul_n.c: Update usages mpn_incr => mpn_incr_u.\n\t* mpn/generic/divrem_newt.c: Use mpn_incr_u and mpn_decr_u instead of\n\tmpn_add_1 and mpn_sub_1.\n\t* mpn/generic/sqrtrem.c: Likewise.\n\t* mpz/cdiv_q_ui.c: Likewise.\n\t* mpz/cdiv_qr_ui.c: Likewise.\n\t* mpz/fdiv_q_ui.c: Likewise.\n\t* mpz/fdiv_qr_ui.c: Likewise.\n\n\t* mpn/generic/sqrtrem.c: Start single-limb Newton iteration from 18\n\tbits.\n\n1999-07-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/divrem_1.c (__gmpn_divrem_1n): New function.\n\n\t* mpn/generic/divrem_2.c: New file, code from divrem.c, `case 2:'.\n\t* mpn/Makefile.in: Compile divrem_2.c.\n\t* make.bat: Compile divrem_2.c.\n\t* mpn/configure.in (functions): Add divrem_2.\n\t* gmp.h: Declare mpn_divrem_2.\n\n\t* mpn/generic/divrem.c: Delete special cases, handle just divisors\n\tof more than 2 limbs.\n\t* gmp.h (mpn_divrem): Call mpn_divrem_1, mpn_divrem_2, as appropriate.\n\n\t* mpn/generic/divrem.c: Rework variable usage for better register\n\tallocation.\n\n1999-07-26  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/alpha/ev5/add_n.s: Rewrite for better ev6 speed.\n\t* mpn/alpha/ev5/sub_n.s: Likewise.\n\n1999-07-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h (alpha): Define umul_ppmm for cc.\n\n\t* gmp-impl.h (DItype, UDItype): Define for non-gcc if _LONGLONG is\n\tdefined.\n\n1999-07-15  Torbjorn Granlund  <tege@swox.com>\n\n\t* longlong.h (powerpc64 count_leading_zeros): Fix typo.\n\t(powerpc64 add_ssaaaa): Fix typos.\n\t(powerpc64 sub_ddmmss): Fix typos.\n\n1999-07-14  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpz/tests/Makefile.in: Pass XCFLAGS when linking.\n\t* mpf/tests/Makefile.in: Likewise.\n\t* mpq/tests/Makefile.in: Likewise.\n\t* mpn/Makefile.in (.S.o): Pass XCFLAGS.\n\n\t* longlong.h: Add support for 64-bit PowerPC.\n\t* config.sub: Handle \"powerpc64\".\n\t* configure.in: Likewise.\n\t* mpn/configure.in: Suppress use of config/t-ppc-aix for now,\n\tit seems compiler passes proper options.\n\t* mpn/powerpc64/*.S: New files.\n\n\t* Makefile.in (FLAGS_TO_PASS): Pass \"AR=$(AR)\".\n\n1999-07-07  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (factor): Change alloca call to a malloc/free pair.\n\n\t* mpn/powerpc32/syntax.h: Add #define's for crN.\n\n\t* gmp.h (gmp_rand_algorithm): Remove spurious `,'.\n\n1999-07-05  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/generic/divrem_1.c: Normalize divisor when needed.\n\n1999-07-02  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/configure.in (powerpc*-apple-mach): New configuration.\n\t* mpn/powerpc32/*: Add support for apple-macho syntax.\n\t* mpn/powerpc32/syntax.h: New file.\n\t* gmp-impl.h: Don't use `__attribute__' syntax for Apple's perversion\n\tof GCC.\n\n1999-05-26  Linus Nordberg  <linus@swox.se>\n\n\t* rand.c (gmp_rand_init): Fix typo.\n\n\t* mpn/generic/rawrandom.c (mpn_rawrandom): Count bits, not limbs,\n\tto keep track of how many rounds to do in loop.  Clean up\n\ttemporary allocation.  Update `seedsize' inside loop.  Mask off\n\tthe correct number of bits from final result.  Init `mcopyp' even\n\twhen not normalizing `m'.\n\n\t* randlc.c (gmp_rand_init_lc): Fix typo (don't call\n\tmpz_init_set_ui()).\n\n\t* mpn/generic/rawrandom.c (mpn_rawrandom): Set SIZ(s->seed) when\n\treallocating.\n\n\t* tests/rand/Makefile (test, bigtest): Add 33-bit tests.\n\n\t* tests/rand/gen.c (main): Set precision of variable passed to\n\tmpf_urandomb().  Add option `-p'.\n\n1999-05-25  Linus Nordberg  <linus@swox.se>\n\n\t* randcm.c: Remove.\n\t* randcmui.c: Remove.\n\t* Makefile.in: Remove randcm and randcmui.\n\t* make.bat: Ditto.\n\t* gmp-impl.h: Remove prototypes for __gmp_rand_init_common() and\n\t__gmp_rand_init_common_ui().\n\t* randlc.c (gmp_rand_init_lc): Don't call\n\t__gmp_rand_init_common().\n\n\t* randlcui.c (gmp_rand_init_lc_ui): Don't call\n\t__gmp_rand_init_common_ui().\n\n\t* gmp.h (__gmp_rand_state_struct): Remove unused member `maxval'.\n\t* randclr.c (gmp_rand_clear): Remove reference to s->maxval.\n\t* randcm.c (__gmp_rand_init_common): Ditto\n\n\t* mpn/generic/rawrandom.c (mpn_rawrandom): Don't calculate nlimbs\n\ttwice.\n\n\t* gmp.h (__gmp_rand_dist): Remove.\n\n1999-05-24  Linus Nordberg  <linus@swox.se>\n\n\t* mpn/generic/rawrandom.c: Clean up comments.\n\n\t* gmp.texi: Add documentation for random number generation.\n\n1999-05-21  Linus Nordberg  <linus@swox.se>\n\n\t* gmp.h: Typedef `gmp_rand_state' as an array with one element.\n\tChange prototypes accordingly.\n\t* gmp-impl.h: Change prototypes using `gmp_rand_state'.\n\t* rand.c (gmp_rand_init): Take `gmp_rand_state' as argument\n\tinstead of a pointer to a `gmp_rand_state'.\n\t* mpf/urandom.c (mpf_urandomb): Ditto.\n\t* mpz/urandom.c (mpz_urandomb): Ditto.\n\t* mpn/generic/rawrandom.c (mpn_rawrandom): Ditto.\n\t* randcmui.c (__gmp_rand_init_common_ui): Ditto.\n\t* randlc.c (gmp_rand_init_lc): Ditto.\n\t* randlcui.c (gmp_rand_init_lc_ui): Ditto.\n\t* randui.c (gmp_rand_init_ui): Ditto.\n\t* randcm.c (__gmp_rand_init_common): Ditto.\n\t* randclr.c (gmp_rand_clear): Ditto.\n\n\t* tests/rand/gen.c (main): Pass `s' to rand-funcs instead of address\n\tof `s'.\n\n1999-05-20  Linus Nordberg  <linus@swox.se>\n\n\t* Makefile.in: Rename randi.c --> rand.c, randi_lc.c --> randlc.c,\n\trandicom.c --> randcm.c.  Add randui.c, randcmui.c, randlcui.c.\n\t* make.bat: Ditto.\n\n\t* gmp.h: Add prototypes for gmp_rand_init_ui() and\n\tgmp_rand_init_lc_ui().\n\t* gmp-impl.h: Add prototypes for __gmp_rand_init_common() and\n\t__gmp_rand_init_common_ui().\n\n\t* randlc.c, randcm.c, randclr.c, rand.c: Change #include of\n\t<gmp.h> to \"gmp.h\".\n\t* randclr.c: Include stdlib.h for free().\n\t* rand.c: Include gmp-impl.h.\n\n1999-05-12  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/configure.in: Put generic m68k alternative last.\n\n1999-05-04  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c (setup_error_handler): Use sigemptyset to create\n\tempty set (for portability).\n\t(fns): Fix typo '#if #if'.\n\t(mpz_eval_expr): Implement FERMAT and MERSENNE.\n\n\t* demos/pexpr.c: Cast longjmp argument via long to silent warnings on\n\t64-bit hosts.\n\n1999-05-03  Torbjorn Granlund  <tege@swox.com>\n\n\t* demos/pexpr.c: Add #defines for GMP 1.x and 2.0 compatibility.\n\n\t* demos/pexpr.c (setup_error_handler): New function; take signal\n\thandler setup code from main(), with major modifications to use modern\n\tsignal interface.\n\t(main): Remove signal handler setup code; call setup_error_handler.\n\n1999-04-29  Linus Nordberg  <linus@swox.se>\n\n\t* tests/rand/findcl.c (main): Add option '-i' for interval factor.\n\tSeparate v and merit lose figures.  Add '-v' for version.\n\n1999-04-28  Linus Nordberg  <linus@swox.se>\n\n\t* tests/rand/statlib.c: Change debugging stuff.\n\n\t* tests/rand/gmpstat.h: Add debug values definitions.\n\n\t* tests/rand/findcl.c (main): Print low and high merit on startup.\n\tPrint version string on startup.  Catch SEGV and HUP.  Add option -d\n\tfor debug.  Fix bug making test for v too hard.\n\t(sh_status): New function.\n\t(sh_status): Flush stdout.  Add RCSID.\n\n1999-04-27  Linus Nordberg  <linus@swox.se>\n\n\t* tests/rand/Makefile (clean): Add target.\n\n1999-04-27  Linus Nordberg  <linus.nordberg@canit.se>\n\n\t* tests/rand/stat.c: Include gmpstat.h.\n\tAdd global int g_debug.\n\n\t* tests/rand/spect.c: Include <unistd.h>.\n\n\t* tests/rand/findcl.c (main): Input is `m', not all factors of `m'.\n\tPrint only the very first matching multiplier.  Include <unistd.h>.\n\tFlush stdout.  Print \"done.\" when done.\n\n\t* tests/rand/spect.c: Move everything but main() to statlib.c.\n\n\t* tests/rand/findcl.c: New file.\n\n\t* tests/rand/gmpstat.h: New file.\n\n\t* tests/rand/statlib.c (merit, merit_u, f_floor, vz_dot,\n\tspectral_test): New functions.\n\n1999-04-27  Torbjorn Granlund  <tege@swox.com>\n\n\t* mpn/configure.in: Fix typo, \"sparc-*)\" was \"sparc)\".\n\n1999-04-21  Torbjorn Granlund  <tege@swox.com>\n\n\t* config.sub: Recognize ev6.\n\n1999-04-12  Linus Nordberg  <linus.nordberg@canit.se>\n\n\t* urandom.c: Split up into randclr.c, randi.c, randi_lc.c,\n\trandicom.c.\n\t* randclr.c, randi.c, randi_lc.c, randicom.c: New files.\n\t* Makefile.in: Remove urandom.  Add randclr, randi, randi_lc,\n\trandicom.\n\t* make.bat: Ditto\n\n1999-03-31  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* configure.in (sparc9-*-solaris2.[789]*, etc): New alternative.\n\t* mpn/configure.in: Use mt-sprc9 also for ultrasparc*-*-solaris2*.\n\n1999-03-30  Linus Nordberg  <linus.nordberg@canit.se>\n\n\t* urandom.c (__gmp_rand_scheme): Change NULL->0.\n\tInclude \"gmp.h\" instead of <gmp.h>.\n\n1999-03-29  Linus Nordberg  <linus.nordberg@canit.se>\n\n\t* gmp.h (__gmp_rand_data_lc): Now holds a, c, m instead of scheme\n\tstruct.\n\t(__gmp_rand_lc_scheme_struct): Remove mpz_t's `a' and `m'.\n\n\t* tests/rand/stat.c (f_freq): Don't print 2nd level results if doing\n\t1st level.\n\n\t* tests/rand/gen.c (main): Set default algorithm to mpz_urandomb.\n\t(main): Add option -c.\n\n1999-03-24  Linus Nordberg  <linus.nordberg@canit.se>\n\n\t* tests/rand/Makefile (GMPINC): Rename to GMPH.\n\t(GMPH): Add gmp-mparam.h.\n\t(CFLAGS): Add -I$(GMPLIBDIR)/mpn\n\n1999-03-23  Linus Nordberg  <linus.nordberg@canit.se>\n\n\t* Makefile.in: Compile top-dir/urandom.c.\n\t* make.bat: Ditto.\n\n\t* mpn/Makefile.in: Compile rawrandom.c.\n\t* make.bat: Ditto.\n\n\t* mpn/configure.in (functions): Add rawrandom.\n\n\t* gmp.h (__gmp_rand_scheme_struct): Rename to\n\t__gmp_rand_lc_scheme_struct.\n\t(__gmp_rand_data_lc): Remove member 'n'.  Allocate a\n\t__gmp_rand_lc_scheme_struct instead of a pointer to one.\n\tAdd prototype for gmp_rand_init_lc(), mpn_rawrandom().\n\tNew prototype for mpz_urandomb().\n\n\t* urandom.c: New file.\n\t(__gmp_rand_init_common): New function.\n\t(gmp_rand_init_lc): New function.\n\t(gmp_rand_init): Don't init data_lc->n.  Call gmp_rand_init_lc()\n\tand __gmp_rand_init_common().\n\t(gmp_rand_clear): Remove reference to data_lc->n.\n\n\t* mpz/urandom.c (gmp_rand_init, gmp_rand_clear): Move to new file\n\turandom.c in top-dir.\n\t(mpz_urandomb): Add function parameter nbits.  Call mpn_rawrandom().\n\n\t* mpf/urandom.c (mpf_urandomb): Call mpn_rawrandom().\n\n\t* mpn/generic/rawrandom.c: New file.\n\t(mpn_rawrandom): New function.\n\n1999-03-17  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* extract-dbl.c: When packing result, adjust exp when sc == 0.\n\n\t* mpf/tests/t-get_d.c: New file.\n\t* mpf/tests/Makefile.in: Compile t-get_d.c.\n\n1999-03-16  Linus Nordberg  <linus.nordberg@canit.se>\n\n\t* mpz/urandom.c (__gmp_rand_scheme): Add extra braces around the\n\tmpz_t members.\n\n\t* make.bat: Compile mpz/urandom.c and mpf/urandom.c\n\n\t* tests/rand/statlib.c (ks_table): Use mpf_pow_ui() and exp().\n\n\t* tests/rand/gen.c: Include unistd.h for getopt.\n\n1999-03-15  Linus Nordberg  <linus.nordberg@canit.se>\n\n\t* mpz/urandom.c (gmp_rand_init): New function.\n\t(gmp_rand_clear): New function.\n\t(mpz_urandomb): New function.\n\n\t* mpz/Makefile.in: Compile urandom.c\n\n\t* mpf/urandom.c (mpf_urandomb): New function.\n\n\t* mpf/Makefile.in: Compile urandom.c.\n\n\t* gmp.h (__gmp_rand_state_struct, __gmp_rand_scheme_struct): New\n\tstructs for randomization functions.\n\t(gmp_rand_dist, gmp_rand_alogrithm): New enums for randomization\n\tfunctions.\n\t(mpz_urandomb, mpf_urandomb): Add prototype.\n\t(gmp_rand_init, gmp_rand_clear): Add prototype.\n\n\t* tests/rand/gen.c, stat.c, statlib.c, statlib.h: New files.\n\t* tests/rand/Makefile, tests/rand/ChangeLog: New files.\n\n1999-03-15  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* .gdbinit: New file.\n\n\t* mpz/dump.c: New file.\n\t* mpz/Makefile.in: Compile dump.c.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_dump): Declare.\n\n1999-03-14  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/tests/reuse.c: Also test mpz_invert and mpz_divexact.\n\n\t* mpz/tests/convert.c: Update to GMP 2 variable syntax.\n\n1999-03-13  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpf/README: New file.\n\t* mpz/README: New file.\n\n\t* mpf/pow_ui.c: New file.\n\t* mpf/Makefile.in: Compile pow_ui.c.\n\t* make.bat: Likewise.\n\t* gmp.h (mpf_pow_ui): Declare.\n\n1999-03-12  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/configure.in: Stage 1 of rewrite.\n\t* mpn/underscore.h: New name for bsd.h.\n\t* mpn/sysv.h: Deleted.\n\n\t* mpn/m68k/*: Don't include sysdep.h.\n\n\t* mpn/pa64/README: New file.\n\n1999-03-11  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/powerpc32/add_n.S: Add support for both AIX and ELF syntax.\n\tRenamed from `.s'.\n\t* mpn/powerpc32/sub_n.S: Likewise.\n\t* mpn/powerpc32/lshift.S: Likewise.\n\t* mpn/powerpc32/rshift.S: Likewise.\n\t* mpn/powerpc32/mul_1.S: Likewise.\n\t* mpn/powerpc32/addmul_1.S: Likewise.\n\t* mpn/powerpc32/submul_1.S: Likewise.\n\n\t* mpn/powerpc32/umul.S: New file.\n\t* mpn/sparc32/v8/umul.S: New file.\n\t* mpn/sparc32/umul.S: New file.\n\t* mpn/x86/umul.S: New file.\n\t* mpn/x86/udiv.S: New file.\n\n\t* mpn/Makefile.in (mul_basecase.o): Delete rule.\n\n1999-02-22  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* configure.in (hppa2.0*-*-*): Force use of GCC.\n\n\t* extract-dbl.c: Handle IEEE denormalized numbrs.  Clean up.\n\n1998-12-02  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/Makefile.in (CCAS): New macro.\n\t(.s.o): Use CCAS.\n\t(.S.o): Likewise.\n\n\t* mpn/Makefile.in (mul_basecase.o): Add dependency.\n\t(sqr_basecase.o): Likewise.\n\t(mod_1.o): Likewise.\n\n\t* demos/pexpr.c (cputime): Test also __hpux.\n\t(cleanup_and_exit): Check SIGXCPU only #ifdef LIMIT_RESOURCE_USAGE.\n\n\t* mpz/tests/t-2exp.c: Use urandom, not random.\n\n\t* mpn/configure.in (arm*-*-*): New alternative.\n\n1998-11-30  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* gmp-impl.h (union ieee_double_extract): Special case for\n\tlittle-endian arm.\n\t(LIMBS): Alias for PTR.\n\n1998-11-26  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* longlong.h (m68000 umul_ppmm): Use `muluw', not `mulu'.\n\t(m68k stuff): Clean up; add coldfire support.\n\n1998-11-23  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/mips3/gmp-mparam.h (KARATSUBA_MUL_THRESHOLD): #define.\n\t(KARATSUBA_SQR_THRESHOLD): #define.\n\n\t* mpn/sparc32/v9/README: New file.\n\n1998-11-20  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/x86/README: New file.\n\n\t* mpn/arm/gmp-mparam.h: New file.\n\t* mpn/pa64/gmp-mparam.h: New file.\n\t* mpn/hppa/gmp-mparam.h: New file.\n\t* mpn/x86/pentium/gmp-mparam.h: New file.\n\t* mpn/sparc32/v9/gmp-mparam.h: New file.\n\t* mpn/powerpc32/gmp-mparam.h: New file.\n\t* mpn/x86/p6/gmp-mparam.h: New file.\n\n\t* mpn/alpha/gmp-mparam.h (KARATSUBA_MUL_THRESHOLD): #define.\n\t(KARATSUBA_SQR_THRESHOLD): #define.\n\n\t* mpn/configure.in: Point to x86/p6 when appropriate.\n\n\t* mpn/power/umul.s: New file.\n\t* mpn/power/sdiv.s: New file.\n\t* mpn/pa64/addmul_1.S: New file.\n\t* mpn/pa64/submul_1.S: New file.\n\t* mpn/pa64/mul_1.S: New file.\n\t* mpn/pa64/udiv_qrnnd.c: New file.\n\t* mpn/pa64/umul_ppmm.S: New file.\n\t* mpn/mips2/umul.s: New file.\n\t* mpn/m68k/mc68020/umul.s: New file.\n\t* mpn/m68k/mc68020/udiv.s: New file.\n\t* mpn/hppa/hppa1_1/umul.s: New file.\n\t* mpn/alpha/umul.s: New file.\n\t* mpn/a29k/udiv.s: New file.\n\t* mpn/a29k/umul.s: New file.\n\n1998-11-17  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/x86/mul_basecase.S: New file for non-pentiums.\n\t* mpn/x86/mul_basecase.S: Move to mpn/x86/pentium.\n\n1998-11-16  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* make.bat: Compile mul_basecase.c and sqr_basecase.c.\n\n1998-11-10  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/invert.c: Defer writing to parameter `invert' until\n\tend.\n\n1998-11-03  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/pa64/udiv_qrnnd.c: Handle more border cases.\n\n1998-10-29  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* insert-dbl.c: Special case biased exponents < 1; Get boundary for\n\tInf right.\n\n\t* longlong.h (COUNT_LEADING_ZEROS_NEED_CLZ_TAB): New #define.\n\n1998-10-28  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/powerpc32/submul_1.s: Rewrite, optimizing for PPC604.\n\t* mpn/powerpc32/addmul_1.s: Likewise.\n\t* mpn/powerpc32/lshift.s: Likewise.\n\n1998-10-23  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* config/mt-sprc9-gcc (XCFLAGS): Add -Wa,-xarch=v8plus.\n\n\t* mpn/sparc32/v9/submul_1.s: New file.\n\n1998-10-21  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/config/mt-pa2hpux: New file.\n\t* mpn/configure.in (hppa2.0*-*-*): Use new 64-bit code.\n\n\t* config.sub: Recognize hppa2.0 as CPU type.\n\n\t* longlong.h (64-bit hppa): Add umul_ppmm and udiv_qrnnd.\n\t* mpn/pa64/mul_1.S: New file.\n\t* mpn/pa64/addmul_1.S: New file.\n\t* mpn/pa64/submul_1.S: New file.\n\t* mpn/pa64/umul_ppmm.S: New file.\n\t* mpn/pa64/udiv_qrnnd.c: New file.\n\n1998-10-20  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/pprime_p.c: Pass 1L, not 1, to mpz_cmp_ui.\n\n\t* mpz/fdiv_q_2exp.c: Cast `long' argument to `mp_limb_t' for mpn calls.\n\t* mpz/gcd_ui.c: Likewise.\n\t* mpz/add_ui.c: Likewise.\n\t* mpz/sub_ui.c: Likewise.\n\n1998-10-19  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/generic/bdivmod.c: Avoid using switch statement with mp_limb_t\n\tindex.\n\n1998-10-17  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/sparc32/v9/mul_1.s: Misc cleanups.\n\t* mpn/sparc32/v9/addmul_1.s: Misc cleanups.\n\n1998-10-16  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/tests/{add,sub,}mul_1.c: Print xlimb using mpn_print.\n\n\t* mpz/tests/t-powm.c (SIZE): Increase to 50.\n\t(EXP_SIZE): New parameter; use it for computing exp_size.\n\n1998-10-15  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/generic/divrem_newt.c: Use TMP_ALLOC interface.\n\n\t* mpn/generic/sqrtrem.c: Check BITS_PER_MP_LIMB before defining\n\tassembly variants of SQRT.\n\n1998-10-14  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/tests: Clean up timing routines.  Don't include longlong.h\n\twhere it is not needed.\n\t(mpn_print): Handle printing when _LONG_LONG_LIMB.\n\t* mpn/tests/{add,sub,}mul_1.c: Generate xlimb with mpn_random2\n\tand do it whether TIMES != 1 or not.\n\n\t* mpn/generic/mul_n.c: Delay assignment of `sign' for lower\n\tregister pressure.\n\n\t* mpn/sparc32/v9/mul_1.s: New file.\n\n\t* config/mt-sprc9-gcc: New file.\n\t* configure.in: Use it.\n\n\t* mpn/configure.in: Use sparc64 for Solaris 2.7 and later with a\n\tsparc v9 CPU.\n\t* mpn/configure.in: Use sparc32/v9 for Solaris 2.6 or earlier with\n\ta sparc v9 CPU.\n\n\t* mpf/sub.c: In initial code for ediff == 0, limit precision\n\tbefore jumping to `normalize'.\n\n1998-10-13  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/hppa/hppa2_0/add_n.s: New file.\n\t* mpn/hppa/hppa2_0/sub_n.s: New file.\n\t* mpn/configure.in: Handle hppa2.0 (32-bit code for now).\n\n\t* config.guess: Update from egcs 1.1.\n\t(9000/[3478]??:HP-UX:*:*): Properly return 2.0 for all known 2.0\n\tmachines.\n\n1998-10-07  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/root.c (mpz_root): New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_root): Declare.\n\n\t* mpz/perfpow.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_perfect_power_p): Declare.\n\n\t* mpz/remove.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_remove): Declare.\n\n\t* mpz/bin_ui.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_bin_ui): Declare.\n\n\t* mpz/bin_uiui.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_bin_uiui): Declare.\n\n1998-09-16  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* longlong.h: Test for __powerpc__ in addition to _ARCH_PPC.\n\nSat Sep  5 17:22:28 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpf/cmp_si.c: Compare most significant mantissa limb before\n\ttrying to deduce anything from the limb count.\n\t* mpf/cmp_ui.c: Likewise.\n\nTue Aug 18 10:24:39 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/pprime_p.c (mpz_probab_prime_p): Add new code block\n\tfor doing more dividing.\n\nSat Aug 15 18:43:17 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/generic/divrem_newt.c: New name for divrem_newton.c.\n\t* mpn/Makefile.in: Corresponding changes.\n\t* mpn/configure.in: Likewise.\n\nWed Aug 12 23:07:09 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* config.guess: Handle powerpc for NetBSD.\n\nTue Jul 28 23:10:55 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/fib_ui.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_fib_ui): Declare.\n\nWed Jun 17 22:52:58 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* make.bat: Fix typo, `asm-synt.h' => `asm-syntax.h'.\n\nWed Jun  3 11:27:32 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* config/mt-pwr: New file.\n\t* config/mt-ppc: New file.\n\t* configure.in: Use the new files.\n\nTue Jun  2 13:04:17 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/sparc32/v9/addmul_1.s: New file.\n\t* mpn/config/mt-sprc9: New file.\n\t* mpn/configure.in: Use mt-sprc9.\n\nTue May 26 11:24:18 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* demos/factorize.c (factor_using_pollard_rho): Pass correct\n\tparameters in recursive calls; join the two recursion arms.\n\n\t* mpf/set_q.c: Set result sign.\n\tWhen normalizing the numerator, don't allow it to increase in size\n\tbeyond prec.\n\nTue May 19 17:28:14 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* demos/factorize.c (factor_using_division): Call fflush\n\talso for the factor 2.\n\nMon May 18 15:51:01 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* make.bat: Pass -fomit-frame-pointer.  Do not pass -g.\n\nTue May  5 01:42:50 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/Makefile.in (LOCAL_CC): Remove definition.\n\n\t* gmp.h: Get rid of GMP_SMALL stuff.\n\t* mpz/Makefile.in: Likewise.\n\t* mpq/Makefile.in: Likewise.\n\t* mpf/Makefile.in: Likewise.\n\n\t* mpz/invert.c: Fix typo in comment.\n\nMon May  4 23:05:32 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/generic/sqrtrem.c: Check that __arch64__ is not defined\n\tbefore defining sparc SQRT.\n\nMon Apr 20 19:16:17 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/generic/gcdext.c: Allow gp to be NULL.\n\n1998-04-03  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/configure.in: Recognize `alphaev5*', not `alphaev5'.\n\n\t* config.guess: Handle CPU variants for NetBSD.\n\nMon Mar 16 13:07:54 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/pprime_p.c: Use mpn_mod_1/mpn_preinv_mod_1 for computing mod PP,\n\tnot mpz_tdiv_r_ui (which expects an `unsigned long').\n\t(mpz_probab_prime_p): Change type of `r' to mp_limb_t.\n\nThu Mar 12 17:19:04 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* gmp.h (mpf_ceil, mpf_floor, mpf_trunc): Add declarations.\n\n\t* config.guess: Update from FSF version.\n\t* config.sub: Likewise.\n\n\t* config.guess: Add special handling of alpha-*-NetBSD.\n\nWed Mar 11 00:55:34 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/inp_str.c: Update from set_str.c.\n\tProperly increment `nread' when skipping minus sign.\n\n\t* mpz/set_str.c: Check for empty string after having skipped\n\tleading zeros.\n\nMon Mar  9 19:28:00 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/set_str.c: Skip leading zeros.\n\nWed Mar  4 19:29:16 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* gmp.h (mpz_cmp_si): Cast argument before calling mpz_cmp_ui.\n\n\t* demos/factorize.c: Rewrite.\n\n1998-02-04  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* configure.in (i[3456]86* etc): Check if using gcc before\n\tchoosing mt-x86.\n\n\t* configure.in (m68*-*-*): New alterantive.\n\t* config/mt-m68k: New file.\n\n\t* mpn/alpha/invert-limb.s: Put tables in text segment,\n\tsince not all systems support \"rdata\".\n\nWed Feb  4 02:20:57 1998  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* gmp.h (__GNU_MP_VERSION_SNAP): New #define.\n\t(__GNU_MP_VERSION_MINOR): Now 1.\n\nWed Jan 28 22:29:36 1998  Torbjorn Granlund  <tege@tunnis.tmg.se>\n\n\t* longlong.h (alpha udiv_qrnnd): #define UDIV_NEEDS_NORMALIZATION.\n\nWed Jan 28 20:28:19 1998  Torbjorn Granlund  <tege@sophie.matematik.su.se>\n\n\t* mpz/pprime_p.c (mpz_probab_prime_p): Delete 59 from tried divisors.\n\nMon Jan 26 01:39:02 1998  Torbjorn Granlund  <tege@tunnis.tmg.se>\n\n\t* mpz/pprime_p.c (mpz_probab_prime_p): Major overhaul: Check small\n\tnumers specifically; check small factors, then perform a fermat test.\n\nTue Jan 13 14:58:28 1998  Torbjorn Granlund  <tege@tunnis.tmg.se>\n\n\t* longlong.h (alpha udiv_qrnnd): Call __mpn_invert_normalized_limb\n\tand udiv_qrnnd_preinv.\n\nWed Jan  7 01:52:54 1998  Torbjorn Granlund  <tege@tunnis.tmg.se>\n\n\t* mpn/configure.in (alpha*, extra_functions): Add invert-limb and\n\tremove udiv_qrnnd.\n\n\t* mpn/tests/divrem.c: Get allocations right.\n\n\t* mpn/generic/divrem.c: Conditionally pre-invert most significant\n\tdivisor limb.\n\nTue Jan  6 23:08:54 1998  Torbjorn Granlund  <tege@tunnis.tmg.se>\n\n\t* mpn/generic/divrem_1.c: Rename variables to comply to conventions.\n\tMake `i' have type `mp_size_t'.\n\nTue Dec 30 22:21:42 1997  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/tdiv_qr_ui.c: Return the remainder.\n\t* mpz/tdiv_r_ui.c: Likewise.\n\t* mpz/tdiv_q_ui.c: Likewise.\n\t* gmp.h: Change return type of mpz_tdiv_qr_ui, mpz_tdiv_r_ui,\n\tmpz_tdiv_q_ui.\n\n\t* mpz/tdiv_ui.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_tdiv_ui): Declare.\n\nFri Nov  7 04:21:15 1997  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpf/integer.c (FUNC_NAME): Fix bogus test for mpf_trunc.\n\n\t* demos/isprime.c: New file.\n\n\tSat Nov  1 19:32:25 1997  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/cmp_abs.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_cmp_abs): Declare.\n\n\t* mpz/cmp_abs_ui.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_cmp_abs_ui): Declare.\n\nSat Sep 27 04:49:52 1997  Torbjorn Granlund  <tege@tunnis.tmg.se>\n\n\t* mpz/fdiv_r_2exp.c: Get allocation for `tmp' right.\n\n\t* mpz/fdiv_q_2exp.c: In final result adjustment code, handle\n\tthat intermediate result is zero.\n\n\t* mpz/tests/t-2exp.c: New file.\n\t* mpz/tests/Makefile.in: Handle t-2exp.c.\n\nFri Sep 26 16:29:21 1997  Torbjorn Granlund  <tege@tunnis.tmg.se>\n\n\t* mpz/divexact.c: Fix typo in test for whether to copy numerator to\n\tquotient and move that statement to after handling quotient and\n\tdenominator overlap.  Misc cleanups.\n\n\t* mpn/generic/gcd.c: Change count argument of mpn_lshift/mpn_rshift\n\tcalls to `unsigned int'.\n\t* mpz/divexact.c: Likewise.\n\nMon Sep 22 02:19:52 1997  Torbjorn Granlund  <tege@pro.tmg.se>\n\n\t* mpz/tests/t-powm.c: Decrease `reps' to 2500.\n\n\t* mpz/tests/t-pow_ui.c: New file.\n\t* mpz/tests/Makefile.in: Handle t-pow_ui.c.\n\n\t* mpz/ui_pow_ui.c: Get special cases for exponent and base right.\n\n\t* mpz/pow_ui.c: Increase temp space allocation by 1 limb.\n\tSplit `rsize' into two variables; compute space allocation into\n\t`ralloc'.\n\nSun Sep  7 04:15:12 1997  Torbjorn Granlund  <tege@pro.tmg.se>\n\n\t* mpn/pa64/lshift.s: New file.\n\t* mpn/pa64/rshift.s: New file.\n\t* mpn/pa64/sub_n.s: New file.\n\nSat Sep  6 19:14:13 1997  Torbjorn Granlund  <tege@gmp.tmg.se>\n\n\t* mpn/pa64/add_n.s: New file.\n\t* mpn/pa64: New directory.\n\nTue Aug 19 16:17:09 1997  Torbjorn Granlund  <tege@pro.tmg.se>\n\n\t* mpz/swap.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_swap): Declare.\n\n\t* mpn/generic/mul_n.c: Push assignment of x and y pointers into the\n\tif/else clauses in several places.  (Decreases register pressure.)\n\nMon Aug 18 03:29:50 1997  Torbjorn Granlund  <tege@pro.tmg.se>\n\n\t* mpn/thumb/add_n.s: New file.\n\t* mpn/thumb/sub_n.s: New file.\n\t* mpn/arm/add_n.s: New file.\n\t* mpn/arm/sub_n.s: New file.\n\n\t* mpz/powm.c: After mpn_mul_n and mpn_mul calls, adjust product size\n\tif most significant limb is zero.\n\t* mpz/powm_ui.c: Likewise.\n\nFri Aug 15 02:13:57 1997  Torbjorn Granlund  <tege@pro.tmg.se>\n\n\t* mpn/arm/m/mul_1.s: New file.\n\t* mpn/arm/m/addmul_1.s: New file.\n\n\t* mpn/powerpc32/mul_1.s: Rewrite.\n\n\t* mpn/alpha/mul_1.s: Prefix labels with `.'.\n\nMon Aug 11 02:37:16 1997  Torbjorn Granlund  <tege@pro.tmg.se>\n\n\t* mpn/powerpc32/add_n.s: Rewrite.\n\t* mpn/powerpc32/sub_n.s: Rewrite.\n\nSun Aug 10 17:07:15 1997  Torbjorn Granlund  <tege@pro.tmg.se>\n\n\t* mpn/powerpc32/addmul_1.s: Delete obsolete comments.\n\t* mpn/powerpc32/submul_1.s: Likewise.\n\nFri Jul 25 20:07:54 1997  Torbjorn Granlund  <tege@pro.tmg.se>\n\n\t* mpz/addmul_ui.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_addmul_ui): Declare.\n\n\t* mpz/setbit.c: Add missing code after final `else'.\n\nTue Jul 22 17:45:01 1997  Torbjorn Granlund  <tege@tunnis.tmg.se>\n\n\t* mpn/sh/add_n.s: Fix typo.\n\t* mpn/sh/sub_n.s: Likewise.\n\n\t* longlong.h (ns32k count_trailing_zeros): Fix typo.\n\n\t* insert-dbl.c: Check for exponent overflow and return Inf.\n\n\t* mpz/get_d.c: Rewrite to avoid rounding errors.\n\nThu May 29 11:51:07 1997  Torbjorn Granlund  <tege@pro.tmg.se>\n\n\t* mpq/add.c: Swap some usages of tmp1 and tmp2 to make sure\n\ttheir allocation suffices.\n\t* mpq/sub.c: Likewise.\n\nWed Apr 16 02:24:25 1997  Torbjorn Granlund  <tege@pro.tmg.se>\n\n\t* demos/pexpr.c: New file.\n\n\t* mpn/generic/mul_n.c: Misc optimizations from Robert Harley.\n\n\t* gmp-impl.h (MPZ_PROVOKE_REALLOC): New #define.\n\nSat Apr 12 17:54:04 1997  Torbjorn Granlund  <tege@pro.tmg.se>\n\n\t* mpz/tstbit.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_tstbit): Declare.\n\n\t* mpz/tests/logic.c: Use MPZ_CHECK_FORMAT.\n\t* mpz/tests/bit.c: New test.\n\t* mpz/tests/Makefile.in: Handle bit.c.\n\n\t* mpz/ior.c: In -OP2,+OP1 case, normalize OP2 after call to mpn_sub_1.\n\n\t* gmp-impl.h (MPZ_CHECK_FORMAT): New #define.\n\nThu Apr 10 00:30:14 1997  Torbjorn Granlund  <tege@tmg.se>\n\n\t* longlong.h (POWER/PowerPC): Test _ARCH_PWR instead of _IBMR2.\n\nWed Apr  9 18:23:31 1997  Torbjorn Granlund  <tege@pro.tmg.se>\n\n\t* gmp-impl.h: Move defaulting of UMUL_TIME and UDIV_TIME from here...\n\t* longlong.h: ...to here.\n\nSun Mar 30 12:16:23 1997  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/generic/next_prime.c: New file.\n\n\t* mpn/generic/perfsqr.c: Remove definitions of PP and PP_INVERTED.\n\t* gmp-impl.h: Put them here.\n\nFri Mar 28 08:18:05 1997  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* gmp-impl.h (MPN_COPY_INCR, MPN_COPY_DECR): Define as inline asm for\n\tfor x86, but leave disabled for now.\n\nFri Feb 28 02:39:47 1997  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/Makefile.in (.S.o): Pass SFLAGS and CFLAGS also to compiler\n\tfor assembly phase.\n\t(.s.o): Pass SFLAGS.\n\nWed Feb 26 06:46:08 1997  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/configure.in: For Pentium Pro, use default code, not Pentium\n\toptimized code.\n\n\t* mpn/x86/addmul_1.S: Unroll and optimize for Pentium Pro.\n\t* mpn/x86/submul_1.S: Likewise.\n\nThu Feb 13 08:26:09 1997  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpf/Makefile.in: Compile floor.o, ceil.o and trunc.o (from\n\tinteger.c).\n\t* make.bat: Likewise.\n\nWed Feb  5 05:58:44 1997  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/configure.in (alpha*): Add cntlz to extra_functions.\n\nWed Feb  4 03:30:45 1997  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpf/integer.c: New file (supporting mpf_floor, mpf_ceil, mpf_trunc).\n\nMon Feb  3 14:21:36 1997  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* make.bat: Fix typo, set_dfl_prc => set_dfl_prec.\n\nSun Feb  2 02:34:33 1997  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpf/out_str.c: After outputting `-', decrement n_digits.\n\nWed Jan  8 02:50:20 1997  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpn/generic/divrem.c: qextra_limbs => qxn.\n\nWed Dec 18 07:50:46 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpz/tests/t-tdiv.c (SIZE): Increase to 200.\n\nTue Dec 17 19:32:48 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpn/generic/divrem.c (mpn_divrem_classic): New name for mpn_divrem.\n\t* gmp.h (mpn_divrem): New function.\n\t* mpn/generic/divrem_newton.c: New file.\n\t* mpn/configure.in (functions): Add divrem_newton.\n\t* make.bat: Likewise.\n\nThu Dec 12 17:55:13 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* gmp.h (_GMP_H_HAVE_FILE): Test also __dj_include_stdio_h_.\n\nSat Dec  7 09:40:06 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpn/alpha/invert-limb.s: New file.\n\nThu Dec  5 01:25:31 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpz/ui_pow_ui.c (mpz_pow2): New (static) function.\n\t(mpz_ui_pow_ui): Rewrite.\n\n\t* make.bat: `pre_mod_1.c' => `pre_mod_.c'.  Fix typo in path to\n\tgmp-mpar.h.\n\nFri Nov 15 00:49:55 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpz/ui_pow_ui.c: Rewrite for better speed.\n\nFri Nov  1 16:36:56 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* Makefile.in (recursive make rules): Use `&&' instead of `;' as\n\tdelimiter.\n\nFri Oct 25 17:12:36 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* gmp-impl.h (Cray/uxp MPN_COPY): Really declare as inline.\n\nThu Oct 24 15:08:19 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpn/fujitsu/rshift.c: Fix typo in loop boundaries.\n\nFri Oct 18 03:13:54 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpn/configure.in: Recognize `nextstep' for m68k variants; likewise\n\tfor x86 variants.\n\n\t* mpn/x86/syntax.h (INSND): New macro.\n\t* mpn/x86/[lr]shift.S: Use INSND.\n\t* mpn/x86/pentium/[lr]shift.S: Likewise.\n\t* mpn/config/t-oldgas (SFLAGS): Pass -DOLD_GAS.\n\n\t* gmp-impl.h: In code for determining endianess, test also\n\t__BIG_ENDIAN__ and __hppa__.  Remove test of __NeXT__.\n\nWed Oct 16 03:50:34 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpf/set_str.c: Let `prec' determine precision used in\n\texponentiation code; decrease allocation accordingly.\n\n\t* mpn/vax: Change `jsob*' to `sob*' in all files.\n\nTue Oct 15 03:54:06 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* longlong.h (m88110 udiv_qrnnd): Change type of intermediate quotient\n\tto DImode (divu.d generates a 64-bit quotient).\n\n\t* configure.in (m88110*): Fix typo.\n\n\t* mpf/get_str.c: Compute exp_in_base using `double' to avoid overflow.\n\n\t* gmp-impl.h (struct bases): Change type of chars_per_bit_exactly from\n\tfloat to double.\n\t* mpn/mp_bases.c (__mp_bases): Give 17 digits for chars_per_bit_exactly\n\tfield.\n\n\t* mpf/get_str.c: Let `prec' determine precision used in\n\texponentiation code; decrease allocation accordingly.\n\nSun Oct 13 03:31:53 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* longlong.h: Major cleanup.\n\t(__udiv_qrnnd_c): Compute remainders using multiply and subtract,\n\tnot explicit `%' operator.\n\t(C umul_ppmm): Get rid of a redundant __ll_lowpart.\n\n\t* mpz/invert.c: Properly detect all operands that would yield an\n\tundefined inverse; make sure the inverse is always positive.\n\n\t* mpz/xor.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_xor): Declare.\n\n\t* mpz/tests/logic.c: Also test mpz_xor.\n\n\t* mpz/lcm.c: Special case for when either operand equals 0.\n\nSat Oct 12 01:57:09 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpn/generic/gcd.c (find_a): Don't inline on x86.\n\n\t* Makefile.in (CFLAGS): Default to just `-g'.\n\n\t* configure.in: Recognize 386 and 486 wherever other x86 cpus are\n\trecognized.\n\t* configure.in: Use mt-x86 for all x86 cpus.\n\t* config/mt-x86: New file.\n\n\t* mpn/alpha/cntlz.s: New file.\n\nTue Oct  8 00:16:18 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* longlong.h: Define smul_ppmm for Fujitsu vpp/uxp.\n\tRewrite umul_ppmm to actually work on the hardware.\n\n\t* mpn/x86/sub_n.S: Avoid parens around displacement of `leal'.\n\t* mpn/x86/add_n.S: Likewise.\n\n\t* mpn/x86/syntax.h (R): Define differently depending on __STDC__.\n\nMon Oct  7 16:48:08 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* longlong.h: Don't test for __NeXT__ in outer 68k conditional;\n\tadd test for __m68k__.\n\nSun Oct  6 00:59:09 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* gmp.h: Declare mpn_random.\n\t* make.bat: Compile mpn/generic/random.c.\n\n\t* longlong.h: Define umul_ppmm for Fujitsu vpp/uxp.\n\n\t* gmp-impl.h: Protect definitions using `__attribute__ ((mode (...)))'\n\twith test also for __GNUC_MINOR__.\n\n\t* gmp.h: Don't define macros using __builtin_constant_p when using\n\tNeXT's compiler.\n\nFri Oct  4 16:53:50 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpz/lcm.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h (mpz_lcm): Declare.\n\nWed Sep 25 00:06:21 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpq/tests/t-cmp_ui.c: Make sure numerator and denominator of `b' is\n\twithin limits of an `unsigned long int'.\n\n\t* mpz/tests/t-powm_ui.c: Change type of exp2 to `unsigned long int'.\n\nTue Sep 24 18:58:20 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpz/powm_ui.c: Make result always positive.\n\n\t* urandom.h (urandom): Make it return mp_limb_t.\n\n\t* gmp-impl.h (CNST_LIMB): New macro.\n\t* mpn/mp_bases.c: Use CNST_LIMB.\n\t* mpn/generic/hamdist.c (popc_limb): Likewise.\n\t* mpn/generic/popcount.c (popc_limb): Likewise.\n\t* mpn/generic/perfsqr.c: Likewise.\n\nFri Sep 20 03:08:10 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpz/pprime_p.c: When n <= 3, don't clear out n before using it.\n\nWed Sep 18 11:22:45 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpn/fujitsu/mul_1.c: New file.\n\t* mpn/fujitsu/addmul_1.c: New file.\n\t* mpn/fujitsu/sub_n.c: New file.\n\t* mpn/fujitsu/add_n.c: Mew file.\n\nSun Sep 15 03:13:02 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpn/generic/random.c: New file.\n\t* mpn/configure.in (functions): Add `random'.\n\n\t* gmp-impl.h (MPN_COPY): Define as annotated inline function for\n\tCrays and Fujitsu VPPs.\n\n\t* gmp.h (mp_size_t): Define as `int' for non-MPP Cray.\n\t(mp_exp_t): Likewise.\n\n\t* configure.in: Add support for Fujitsu VPP machines.\n\t* mpn/configure.in: Likewise.\n\t* config.guess: Likewise.\n\t* config.sub: Likewise.\n\n\t* mpn/fujitsu/rshift.c: New file.\n\t* mpn/fujitsu/lshift.c: New file.\n\t* mpn/fujitsu: New directory, for Fujitsu VPP machines.\n\nWed Sep 11 11:34:38 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpn/generic/mul_n.c (__gmpn_mul_n): New name for impn_mul_n.\n\tCall __gmpn_mul_basecase, not impn_mul_n_basecase; update parameter\n\tlist to work with __gmpn_mul_basecase.\n\t(__gmpn_sqr): New name for impn_sqr_n.\n\tCall __gmpn_sqr_basecase, not impn_sqr_n_basecase; update parameter\n\tlist to work with __gmpn_sqr_basecase.\n\t(mpn_mul_n): Update calls to match new names and parameter conventions.\n\t* gmp-impl.h (MPN_MUL_N_RECURSE): Likewise.\n\t(MPN_SQR_RECURSE): New name for MPN_SQR_N_RECURSE.\n\tUpdate calls to match new names and parameter conventions.\n\t* mpn/generic/mul.c: Never perform multiply explicitly here, call\n\t__gmpn_mul_basecase instead.\n\tUpdate calls to match new names and parameter conventions.\n\n\t* mpn/x86/mul_basecase.S: New file.\n\t* mpn/generic/mul_basecase.c: New file.\n\t* mpn/generic/sqr_basecase.c: New file.\n\nWed Sep  4 02:59:21 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpz/set_str.c: Let `0b' and `0B' mean base 2.\n\nFri Aug 30 00:44:00 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* longlong.h (x86 umul_ppmm): Work around GCC bug that was\n\ttriggered by Aug 28 change.\n\n\t* mpbsd/min.c (digit_value_in_base): New function.\n\n\t* mpz/set_str.c: Refine allocation size computation, use\n\tchars_per_bit_exactly instead of chars_per_limb.\n\n\t* mpbsd/Makefile.in (.c.o): Add -D_mpz_realloc=_mp_realloc.\n\nWed Aug 28 02:52:14 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* longlong.h (x86 umul_ppmm): Don't cast result operands.\n\t(x86 udiv_qrnnd): Likewise.\n\t(default smul_ppmm): Fix typo, umul_ppmm => smul_ppmm.\n\t(default umul_ppmm): New #define using smul_ppmm.\n\t(vax smul_ppmm): New #define.\n\t(vax umul_ppmm): Delete.\n\t(POWER umul_ppmm): Delete.\n\t(IBM 370 smul_ppmm): New #define.\n\t(IBM 370 umul_ppmm): Delete.\n\t(IBM RT/ROMP smul_ppmm): New #define.\n\t(IBM RT/ROMP umul_ppmm): Delete.\n\nTue Aug 27 01:03:25 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* gmp-impl.h (__gmp_0): Make it `const'.\n\n\t* mpn/Makefile.in (clean mostlyclean): Comment out recursive clean\n\tof `tests'.\n\n\t* mpn/generic/mul.c: Identify when we do squaring, and call\n\timpn_sqr_n_basecase/impn_sqr_n as appropriate.  Use\n\tKARATSUBA_MUL_THRESHOLD and KARATSUBA_SQR_THRESHOLD.\n\tDon't #define KARATSUBA_THRESHOLD.\n\n\t* mpn/generic/mul_n.c: Don't #define KARATSUBA_THRESHOLD.\n\t(impn_mul_n, impn_sqr_n): Rewrite, based on code contributed by\n\tRobert Harley.\n\t(impn_sqr_n_basecase): Rewrite.\n\n\t* gmp-impl.h (KARATSUBA_MUL_THRESHOLD): New #define.\n\t(KARATSUBA_SQR_THRESHOLD): Likewise.\n\t(MPN_SQR_N_RECURSE): Use KARATSUBA_SQR_THRESHOLD.\n\t(MPN_MUL_N_RECURSE): Use KARATSUBA_MUL_THRESHOLD.\n\n\t* configure.in: Fix typo in last change.\n\nMon Aug 26 22:25:18 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpn/generic/random2.c: Fix typo, `alpha__' => `__alpha'.\n\t* mpf/random2.c: Likewise.\n\nSun Aug 25 00:07:09 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>\n\n\t* mpz/tests/t-mul.c: Also test squaring.\n\nFri Aug 16 05:12:08 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mp_clz_tab.c (__clz_tab): Declare as `const'.\n\t* version.c (gmp_version): Likewise.\n\t* mpn/generic/sqrtrem.c (even_approx_tab, odd_approx_tab): Likewise.\n\nThu Aug 15 02:34:47 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* gmp.h: Fix typo, `mips__' => `__mips'.\n\n\t* mpf/set_str.c: Allow a number to start with a period, if next\n\tposition contains a digit.\n\nTue Aug 13 18:41:25 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpz/gcdext.c: Get cofactor sign right for negative input operands.\n\tClean up code for computing tt.\n\n\t* mpz/invert.c: Get rid of variable `rv'.\n\n\t* mpz/divexact.c: Test for zero divisor in special case for zero\n\tdividend.\n\nMon Aug 12 18:04:07 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpz/?div_*_ui.c: Special case for division by 0.\n\t* mpz/tdiv_q.c: Likewise.\n\nSat Aug 10 14:45:26 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpz/dmincl.c: Special case for division by 0.\n\n\t* mpz/tdiv_*_ui.c: Delete special case for dividend being 0; handle\n\tit when computing size after mpn_divmod_1 call.\n\n\t* mp_bpl.c: (__gmp_junk): New variable.\n\t(__gmp_0): New constant.\n\n\t* gmp-impl.h (DIVIDE_BY_ZERO): New #define.\n\nFri Aug  9 20:03:27 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpz/divexact.c: Test for dividend being zero before testing\n\tfor small divisors.\n\nThu Aug  8 13:20:23 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* configure.in: Require operating system specification for cpus\n\twhere assembly syntax differs between system.\n\n\t* Makefile.in (many targets): Change `-' action prefix to `@'.\n\n\t* mpn/Makefile.in: (distclean): Fix typo.\n\n\t* mpq/cmp_ui.c: Rename function to _mpq_cmp_ui.\n\t(mpq_cmp_ui): #undef deleted.\n\t* mpz/cmp_si.c: Rename function to _mpz_cmp_si.\n\t(mpz_cmp_si): #undef deleted.\n\t* mpz/cmp_ui.c: Rename function to _mpz_cmp_ui.\n\t(mpz_cmp_ui): #undef deleted.\n\t* Makefile.in: Corresponding changes.\n\n\t* mpf/get_prc.c: Return the *highest* precision achievable.\n\n\t* mpf/get_str.c: Complete rewrite.\n\n\t* mpf/set_str.c (swapptr): New #define.\n\t(assert): New #define.\n\t* mpf/set_str.c: Set prec to one more than the saved _mp_prec.\n\tMisc cleanups.\n\n\t* mpz/set_str.c: #include string.h.\n\t* mpf/out_str.c: #include string.h.\n\t* mpbsd/xtom.c: #include string.h and ctype.h.\n\t* mpbsd/mout.c: #include string.h.\n\nWed Aug  7 11:46:04 EDT 1996  Ken Weber <kweber@mcs.kent.edu>\n\n\t* mpn/generic/gcd.c: Reorder mpn_gcd argument list.\n\t* mpz/gcd.c: Change call to mpn_gcd.\n\t* gmp.texi: Update manual entry on mpn_gcd.\n\t* mpn/generic/bdivmod.c: Delete limb cache to make mpn_bdivmod\n\treentrant.\n\nWed Aug  7 02:15:38 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpf/get_str.c: Rewrite code for converting integral part of a\n\tnumber with both an integral and fractional part.\n\n\t* mpf/set_str.c: Get rid of variable xxx.  New variables madj and radj.\n\tIn exp_in_base==0 case, add madj to msize for EXP field.\n\n\t* mpz/tests/t-gcd.c: Test deleted.  Rename t-gcd2.c to t-gcd.c.\n\tIncrease reps to 2000.\n\t* mpz/tests/t-gcd2.c: Get rid of mpz_refgcd.\n\n\t* mpf/set_str.c: Ignore excess limbs in MP,MSIZE.\n\nThu Jul 25 04:39:10 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/configure.in: Fix typo in setting path, \"sparc\" => \"sparc32\".\n\nWed Jul 24 02:27:02 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/generic/gcdext.c: Reorganize and clean up.  Get rid of all\n\tsigned limb arithmetic.\n\nMon Jul 22 02:39:56 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/generic/gcdext.c (mpn_gcdext): For large enough operands,\n\twork with most significant *two* limbs.\n\t(div2): New function (two variants).\n\t(THRESHOLD): New #define.\n\n\t* mpz/gcdext.c: Fix typo in MPZ_TMP_INIT call.\n\n\t* longlong.h (alpha UMUL_TIME): Now 30.\n\t(alpha UDIV_TIME): Now 350.\n\t(x86 UMUL_TIME): Now 10 (let Pentium decide).\n\t(SuperSPARC UDIV_TIME): Override default.\n\n\t* extract-dbl.c (MP_BASE_AS_DOUBLE): Don't redefine here.\n\n\t* extract-dbl.c: New name for extract-double.c.\n\t* insert-dbl.c: New name for insert-double.c.\n\t* Makefile.in: Corresponding changes.\n\t* make.bat: Likewise.\n\n\t* mpz/Makefile.in (.c.o): Don't pass non-portable `-f' to cp.\n\t* mpq/Makefile.in: Likewise.\n\t* mpf/Makefile.in: Likewise.\n\nSat Jul 20 01:35:18 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpz/getlimbn.c: Take ABS of integer->_mp_size.\n\n\t* mpz/divexact.c: Use mpn_divmod_1 if divisor is a single limb.\n\nThu Jul 18 00:31:15 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/generic/popcount.c (popc_limb): Use different masking trick\n\tfor first step (due to David Seal).\n\t* mpn/generic/hamdist.c (popc_limb): Likewise.\n\nWed Jul 17 23:21:48 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/generic/divrem.c: In MPN_COPY_DECR call, copy dsize - 1 limbs.\n\nSun Jul 14 17:47:46 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* configure.in: Handle sparc9, sparc64, and ultrasparc like sparc8.\n\nThu Jul 11 14:05:54 1996  J.T. Conklin  <jtc@rtl.cygnus.com>\n\n\t* longlong.h (mc680x0): Define umul_ppmm, udiv_qrnnd, sdiv_qrnnd\n\tfor the '020, '030, '040, and '332.  Define count_leading_zeros\n\tfor the '020, '030, '040, and '060.\n\nSun Jul 14 15:24:53 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\tFrom Joe Keane:\n\t* mpq/equal.c: Take ABS of num1_size before passing it to mpn_cmp.\n\nFri Jul 12 17:11:17 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/generic/sqrtrem.c (SQRT): New asm for x86, but leave it\n\tdisabled for now.\n\n\t* mpn/generic/sqrtrem.c: Use MP_BASE_AS_DOUBLE.\n\nWed Jul 10 03:17:45 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* cre-mparam.c: Delete obsolete file.\n\n\t* gmp.h: #define _LONG_LONG_LIMB if __mips && _ABIN32.\n\t* longlong.h: Test __mips instead of __mips__.\n\nSun Jul  7 23:19:13 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* longlong.h (_PROTO): Define, unless already defined.\n\t(alpha __udiv_qrnnd): Declare using _PROTO.\n\t(hppa __udiv_qrnnd): Likewise.\n\t(sparc __udiv_qrnnd): Likewise.\n\nMon Jul  1 01:44:30 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* config.guess: Update from master version; add Cray x90 handling.\n\nWed Jun 26 05:35:02 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/power/add_n.s (__mpn_add_n): Work around GAS bug.\n\t* mpn/power/sub_n.s (__mpn_sub_n): Likewise.\n\n\t* insert-double.c: Rework loop to avoid potential overflow.\n\n\t* mpq/get_d.c: For vax, if qsize > N_QLIMBS, ignore excess limbs.\n\n\t* mpq/tests/t-get_d.c (SIZE): Special case for vax.\n\n\t* gmp.h (mpX_cmp_ui): #define also when ! __GNUC__.\n\nMon Jun 24 17:13:21 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* longlong.h (vax sdiv_qrnnd): Fix typo.\n\nSat Jun 15 01:33:33 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* gmp.h: Support `small' and `large' type and function variants,\n\tcontrolled by GMP_SMALL.\n\n\t* mpz/Makefile.in (.c.o): Compile each function twice, for small and\n\tlarge variant.\n\t(MPZS_OBJS): New variable.\n\t(libmpz.a): Include MPZS_OBJS in archive.\n\t* mpf/Makefile.in: Analogous changes.\n\t* mpq/Makefile.in: Analogous changes.\n\n\t* gmp.h: Prefix all functions with __gmp, to allow namespace-clean\n\tinternal calls.\n\n\t* mp.h: Rip out __MP_SMALL__ stuff.\n\t(__mpz_struct): mp_size_t => int.\n\n\t* mpz/invert.c: #include \"gmp-impl.h\".\n\tUse MPZ_TMP_INIT, not mpz_init.\n\n\t* mpz/gcdext.c: Rewrite to call mpn_gcdext.\n\nFri Jun 14 18:05:29 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/generic/gcdext.c (s0size): New parameter.\n\t* gmp.h (mpn_gcdext): Update prototype.\n\n\t* mpn/generic/gcdext.c: Major rewrite.\n\nMon Jun 10 00:14:27 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/generic/dump.c: Add missing `else'.\n\nFri Jun  7 03:35:12 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* Makefile.in (gmp_toc.html): Pass -expandinfo to texi2html.\n\nThu Jun  6 19:00:53 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* Version 2.0.2 released.\n\n\t* install.sh: New file.\n\t* Makefile.in (INSTALL): Use install.sh.\n\t(install-normal): New name for target `install'.\n\t(install): New dummy target.\n\n\t* mpz/pow_ui.c: Swap tests for (e == 0) and (bsize == 0).\n\t* mpz/ui_pow_ui.c: Swap tests for (e == 0) and (blimb == 0).\n\n\t* config/mt-linux (AR_FLAGS): New file.\n\t* configure.in: Use config/mt-linux for all linux systems.\n\nTue Jun  4 03:42:18 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* Version 2.0.1 released.\n\n\t* mpf/tests/ref.c: Cast result of TMP_ALLOC to the right pointer type.\n\n\t* extract-double.c: Test _GMP_IEEE_FLOATS with #if, not plain if.\n\n\t* insert-double.c: Don't #include stdlib.h.\n\n\t* gmp-impl.h (union ieee_double_extract): Test sparc and __sparc.\n\tDo not test __sparc__.\n\n\t* mpf/reldiff.c: Change declaration to work around irix5 compiler bug.\n\t* mpq/equal.c: Likewise.\n\n\t* mpn/generic/gcd.c: Delete spurious comma at end of enumeration.\n\n\t* mpn/generic/gcdext.c: Add K&R declaration syntax.\n\t* stack-alloc.h: Likewise.\n\t* insert-double.c: Likewise.\n\t* extract-double.c: Likewise.\n\t* mpf/tests/reuse.c: Likewise.\n\t* mpz/tests/reuse.c: Likewise.\n\t* mpf/tests/t-sub.c: Likewise.\n\t* mpf/tests/t-add.c: Likewise.\n\t* mpf/tests/t-muldiv.c: Likewise.\n\t* mpf/tests/t-conv.c: Likewise.\n\t* mpf/tests/ref.c: Likewise.\n\n\t* mpn/config/t-oldgas: Renamed from t-freebsd.\n\t* mpn/configure.in: Use t-oldgas for freebsd, netbsd, and some linux\n\tconfigurations.\n\n\t* mpn/powerpc32/mul_1.s: Really clear cy before entering loop.\n\t* mpn/powerpc32/*.s: Fix power/powerpc syntax issues.\n\n\t* mpn/config/t-ppc-aix: New file.\n\t* mpn/configure.in: Use t-ppc-aix for powerpc like t-pwr-aix for power.\n\nWed May 29 02:07:31 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* gmp.h (mp_bits_per_limb): Change qualifier from `const' to\n\t__gmp_const.\n\n\t* gmp.h (mpf_init_set_str): Add `const' qualifier for 2nd parameter.\n\t* mpf/iset_str.c: Likewise.\n\nMon May 27 00:15:58 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* gmp-impl.h: Declare __gmp_extract_double.\n\n\t* mpz/set_q.c: Delete unused variables.\n\n\t* gmp.h (mpq_equal): Declare.\n\n\t* mpf/eq.c: mpf_cmp2 -> mpf_eq.\n\nFri May 24 03:20:44 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpz/iset_d.c: Don't include <math.h>.\n\n\t* insert-double.c (__gmp_scale2): New name for scal2.\n\t* mpz/get_d.c: Corresponding change.\n\t* mpf/get_d.c: Likewise.\n\t* mpq/get_d.c: Likewise.\n\t* gmp-impl.h: Declare __gmp_scale2.\n\n\t* mpn/generic/scan0.c: Clarify comment.\n\n\t* mpz/set_q.c: New file.\n\t* Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h: Declare mpz_set_q.\n\n\t* insert-double.c: New file.\n\t* Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\n\t* mpz/get_d.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h: Declare mpz_get_d.\n\n\t* mpf/get_d.c: New file.\n\t* mpf/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h: Declare mpf_get_d.\n\n\t* make.bat: Compile things in alphabetical order.\n\n\t* gmp-impl.h (MP_BASE_AS_DOUBLE): New #define.\n\t(LIMBS_PER_DOUBLE): New #define.\n\n\t* extract-double.c: New file.\n\t* Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* mpz/set_d.c: Rewrite to use __gmp_extract_double.\n\t* mpf/set_d.c: Likewise.\n\n\t* mpn/configure.in: Use t-pwr-aix also for aix 3.2.4 and up.\n\nWed May 22 02:48:35 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* gmp-impl.h: Rework code for defining ieee_double_extract.\n\t(IEEE_DOUBLE_BIG_ENDIAN): Macro removed.\n\t(_GMP_IEEE_FLOATS): New macro.\n\t* mpn/vax/gmp-mparam.h: Delete.\n\n\t* mpn/config/t-pwr-aix: New file.\n\t* mpn/configure.in: Use t-pwr-aix for aix 4 and later.\n\nMon May 20 16:30:31 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* gmp.h: In code for setting _GMP_H_HAVE_FILE, test more symbols.\n\n\t* mpf/tests/t-add.c (oo): Add some `l' printf modifiers.\n\t* mpf/tests/t-sub.c (oo): Likewise.\n\t* mpf/tests/t-conv.c (oo): Likewise.\n\t* mpf/tests/t-sqrt.c (oo): Likewise.\n\n\t* mpz/tests/t-mul.c (_mpn_mul_classic): Remove unused variables.\n\n\t* mpn/{pyr,i960,clipper}/*.s: Add missing copyright headers.\n\nFri May 17 02:24:43 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpz/set_d.c: Call _mpz_realloc.\n\n\t* mpq/set_z.c: New file.\n\t* mpq/Makefile.in: Compile it.\n\t* make.bat: Likewise.\n\t* gmp.h: Declare mpq_set_z.\n\n\t* mp?/Makefile.in (libmp?.a): Depend on Makefile, not Makefile.in.\n\t* mpf/Makefile.in (test): Delete spurious target.\n\t* mpq/Makefile.in (test): Likewise.\n\n\t* mpf/out_str.c: Use `e' to separate exponent when base <= 10.\n\n\t* mpn/configure.in: Treat ultrasparc just like sparc v8,\n\tuntil 64-bit compilers are ready.\n\n\t* mpf/set_d.c: Make it work for 64-bit machines.\n\nThu May 16 20:53:57 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* gmp-impl.h: Set IEEE_DOUBLE_BIG_ENDIAN to 0 for little-endian\n\tmachines.\n\t* mpn/x86/gmp-mparam.h: Delete file.\n\n\t* configure.in: Treat microsparc like sparc8.\n\n\t* urandom.h: Test __alpha instead of __alpha__, since the former\n\tis the standard symbol.\n\t* mpn/generic/random2.c: Likewise.\n\t* mpf/random2.c: Likewise.\n\nTue May 14 13:42:39 1996  Torbjorn Granlund  (tege@tiny.matematik.su.se)\n\n\t* mpz/set_f.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* gmp.h: Declare mpz_set_f.\n\n\t* mpf/set_q.c: Simplify expression in rsize == nsize if-then-else arms.\n\nTue May 14 13:03:07 1996  Torbjorn Granlund  (tege@tiny.matematik.su.se)\n\n\t* make.bat: Add all new files.\n\nSun May 12 22:24:36 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpf/set_z.c: New file.\n\t* mpf/Makefile.in: Compile it.\n\t* gmp.h: Declare mpf_set_z.\n\nSat May 11 19:26:25 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* gmp.h: Declare mpf_set_q.\n\n\t* mpf/set_q.c: Compute prec-1 limbs in mpn_divrem call.\n\nFri May 10 17:37:38 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpf/set_q.c: New file.\n\t* mpf/Makefile.in: Compile it.\n\n\t* config.sub: Recognize sparc8.\n\nWed May  8 09:19:11 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpf/tests/t-dm2exp.c: New file.\n\n\t* mpf/tests/t-add.c: Correct header comment.\n\t* mpf/tests/t-sub.c: Likewise.\n\t* mpf/tests/t-sqrt.c: Likewise.\n\n\t* mpf/div.c: Misc variable name cleanups.\n\t* mpf/div_ui.c: Base more closely on mpf/div.c.\n\t* mpf/ui_div.c: Likewise.\n\n\t* mpz/tests/Makefile.in (check): Depend on Makefile.\n\t* mpq/tests/Makefile.in (check): Likewise.\n\t* mpf/tests/Makefile.in (check): Likewise.\n\n\t* mpf/tests/t-muldiv.c: New file.\n\t* mpf/tests/Makefile.in: Compile and run `t-muldiv'.\n\t(t-ref.o): Delete spurious rule.\n\n\t* mpf/sqrt.c: Properly detect negative input operand.\n\n\t* mpf/sqrt_ui.c: Delete spurious header comment.\n\t* mpf/sqrt.c: Likewise.\n\t* mpz/sqrt.c: Likewise.\n\n\t* mpz/tests/reuse.c (main): Read `reps' from command line.\n\n\t* mpf/tests/reuse.c: New file.\n\t* mpf/tests/Makefile.in: Compile and run `reuse'.\n\n\t* mpf/mul_ui.c: Disable code for removing low zero limbs.\n\n\t* mpf/div.c: Fix condition for when vp and qp overlaps.\n\n\t* mpf/add_ui.c: When sum equals u, copy up to prec+1 limbs.\n\n\t* mpf/out_str.c: Don't output '\\n' after exponent.\n\n\t* mpf/add_ui.c: New special case for when U is completely cancelled.\n\nWed Apr 24 05:33:28 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* Version 2.0 released.\n\n\t* All files: Update FSF's address.\n\n\t* Makefile.in (gmp_toc.html): New name for gmp.html.\n\t(TAGS): Depend on force.\n\n\t* mpf/tests/t-conv.c: Pass -base to mpf_set_str.\n\nSat Apr 20 03:54:06 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* Makefile.in (ps): New target, depend on gmp.ps.\n\nFri Apr 19 14:03:15 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpf/out_str.c: Print `@' before exponent, not `e'.\n\n\t* make.bat: Update from Makefiles.\n\nThu Apr 18 01:22:05 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpf/set_str.c: If parameter `base' is negative, expect exponent\n\tto be decimal, otherwise in the same base as the mantissa.\n\nWed Apr 17 17:28:36 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpf/set_dfl_prec.c: Don't return anything.\n\t* gmp.h: Corresponding changes.\n\n\t* mpf/set_dfl_prec.c: Use `unsigned long int' for bit counts.\n\t* mpf/init2.c: Likewise.\n\t* mpf/get_prc.c: Likewise.\n\t* mpf/set_prc.c: Likewise.\n\t* mpf/set_prc_raw.c: Likewise.\n\t* mpz/popcount.c: Likewise.\n\t* mpz/hamdist.c: Likewise.\n\t* mpz/scan1.c: Likewise.\n\t* mpz/scan0.c: Likewise.\n\t* mpn/generic/popcount.c: Likewise.\n\t* mpn/generic/hamdist.c: Likewise.\n\t* mpn/generic/scan1.c: Likewise.\n\t* mpn/generic/scan0.c: Likewise.\n\t* gmp.h: Likewise.\n\n\t* mpf/eq.c: New file, based on mpf/diff.c.\n\t* mpf/diff.c: Delete.\n\t* mpf/Makefile.in: Corresponding changes.\n\t* gmp.h: Likewise.\n\n\t* mpf/reldiff.c: New file.\n\t* mpf/Makefile.in: Compile it.\n\t* gmp.h: Declare mpf_reldiff.\n\n\t* mpz/iset_d.c: New file.\n\t* mpz/Makefile.in: Compile it.\n\t* gmp.h: Declare mpz_init_set_d.\n\nTue Apr 16 16:28:31 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* Makefile.in (gmp.html): Pass -acc to texi2html.\n\nMon Apr 15 16:20:24 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpf/set_str.c: Switch off code for defaulting the base from the\n\tleading characters.\n\n\t* gmp.h (mp?_sign): Delete.\n\t(mp?_sgn): New macros.\n\nFri Apr 12 17:23:33 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* Makefile.in (gmp.dvi): Delete tmp.* at end of rule.\n\nWed Apr 10 22:52:02 1996  Torbjorn Granlund  (tege@tiny.matematik.su.se)\n\n\t* mpf/random2.c: Change of `exp' param, mp_size_t => mp_exp_t.\n\t* gmp.h: Corresponding change.\n\n\t* gmp.h (mp_bits_per_limb): Make it const.\n\nSat Mar 30 01:20:23 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* configure.in: Re-enable recognition of with_gcc.\n\n\t* mpf/Makefile.in (.c.o): Pass XCFLAGS.\n\t* mpn/Makefile.in (.c.o): Likewise.\n\t* mpz/Makefile.in (.c.o): Likewise.\n\t* mpq/Makefile.in (.c.o): Likewise.\n\t* mpbsd/Makefile.in (.c.o): Likewise.\n\t* mpf/tests/Makefile.in (.c.o): Likewise.\n\t* mpz/tests/Makefile.in (.c.o): Likewise.\n\t* mpq/tests/Makefile.in (.c.o): Likewise.\n\n\t* Makefile.in (XCFLAGS): Default to empty.\n\t(FLAGS_TO_PASS): Pass on XCFLAGS.\n\t(.c.o): Pass XCFLAGS.\n\n\t* config/mt-m88110 (XCFLAGS): Define instead of CC.\n\t* config/mt-sprc8-gcc (XCFLAGS): Likewise.\n\t* config/mt-supspc-gcc (XCFLAGS): Likewise.\n\n\t* configure: Don't default CC to \"gcc -O2\" is -with-gcc=no was\n\tspecified.\n\nMon Mar 25 01:07:54 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* urandom.h: Test for __SVR4 in addition to __svr4__.\n\n\t* mp_bpl.c (mp_bits_per_limb): Declare as `const'.\n\n\t* Makefile.in (CFLAGS): `-O2' => `-O'.\n\t* mpn/Makefile.in (CFLAGS): Likewise.\n\n\t* gmp-impl.h: Get rid of obsolete field access macros.\n\n\t* mpn/mp_bases.c (__mp_bases): 1e39 => 1e38 to work around Solaris\n\tcc compiler bug.\n\n\t* gmp.h (__MPN): Make it work also for non-ANSI compilers.\n\nThu Mar 21 01:07:54 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpf/sub.c: New special case for ediff <= 1 before generic code.\n\tSimplify generic code for ediff == 0.\n\tRename uexp => exp.\n\nMon Mar 11 18:24:57 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpf/tests/*.c: Use ref_mpf_sub for error calculation.\n\t* mpf/tests/Makefile.in: Link ref.o to all executables.\n\n\t* mpf/tests/t-sub.c: Make u = v + 1 with 50% probability.\n\nSun Mar 10 21:03:17 1996  Torbjorn Granlund  (tege@tiny.matematik.su.se)\n\n\t* mpf/get_str.c: In digit development loop for fractions, change\n\tloop condition from `<' to `<='.\n\nThu Mar  7 04:58:11 1996  Torbjorn Granlund  <tege@tiny.matematik.su.se>\n\n\t* mpn/mp_bases.c (__mp_bases): 1e100 => 1e39 to avoid overflow warning.\n\nWed Mar  6 01:10:42 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpf/tests/t-sqrt.c: New file.\n\t* mpf/tests/Makefile.in: Corresponding changes.\n\n\t* mpf/sqrt.c: Special case for square root of zero.\n\n\t* mpq/add.c: Clean up variable names.\n\t* mpq/sub.c: Update from mpq/add.c.\n\n\t* mpz/divexact.c: abs => ABS.\n\t* mpz/gcd.c: Likewise.  Rewrite final fixup code, to decrease\n\tallocation.  Misc cleanups.\n\nTue Mar  5 22:24:56 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/configure.in: Recognize linuxoldld as a synonym for linuxaout.\n\n\t* gmp.h (mpn_add, mpn_add_1, mpn_sub, mpn_sub_1): Add prototypes.\n\n\t* mpn/configure.in: Use t-freebsd also for netbsd.\n\nMon Mar  4 15:13:28 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpq/Makefile.in (cmp.o): Depend on longlong.h.\n\n\t* mpq/equal.c: New file.\n\t* mpq/Makefile.in: Corresponding changes.\n\n\t* mpf/tests/t-add.c: New file.\n\t* mpf/tests/t-sub.c: Renamed from t-addsub.c.\n\t* mpf/tests/ref.c: New file.\n\t* mpf/tests/Makefile.in: Corresponding changes.\n\n\t* gmp-impl.h (SIZ, ABSIZ, PTR, EXP, PREC, ALLOC): New #defines.\n\nSun Mar  3 07:45:46 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpf/set_str.c: In exponentiation code, allocate 3 extra\n\tlimbs, not just 2.\n\n\t* mpf/get_str.c: Allocate sufficient space for tstr.\n\tWhen calculating exp_in_base, round result down.\n\n\t* mpf/tests/t-conv.c: New file.\n\t* mpf/tests/Makefile.in: Corresponding changes.\n\n\t* mp_bpl.c: New file.\n\t* gmp.h: Declare it.\n\t* Makefile.in: Corresponding changes.\n\nSat Mar  2 06:27:56 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpf/set_prc_raw.c: New file.\n\t* mpf/set_prc.c: Renamed from set_prec.c.\n\t* mpf/get_prc.c: New file.\n\t* mpf/Makefile.in: Corresponding changes.\n\t* gmp.h: Declare new functions.\n\n\t* mpn/generic/gcdext.c: Add copyright header.\n\nFri Mar  1 01:22:24 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/configure.in: For ppc601, search \"power\" before \"powerpc32\".\n\n\t* mp?/Makefile.in (AR_FLAGS): New variable.\n\t(libmp?.a): Use it.\n\n\t* make.bat: New file.\n\t* mpn/msdos: New directory.\n\t* mpn/msdos/asm-syntax.h: New file.\n\n\t* mpn/Makefile.in (distclean maintainer-clean): Delete asm-syntax.h.\n\n\t* config.sub: Recognize [ctj]90-cray.\n\n\t* mpn/configure.in: Recognize [ctj]90-cray-unicos*.\n\n\t* mpn/generic/gcdext.c: Don't use alloca directly, use TMP_* macros.\n\n\t* mpn/generic/gcd.c: Split increment from use of USIZE to avoid\n\tundefined behaviour.\n\nThu Feb 29 04:11:24 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* Makefile.in (install-info-files): Update for new install-info\n\tbehaviour.\n\n\t* mpn/power/add_n.s: Rewrite.\n\t* mpn/power/sub_n.s: Rewrite.\n\nWed Feb 28 01:34:30 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/pow_ui.c: Compute allocation more aggressively for small bases.\n\t* mpz/ui_pow_ui.c: Likewise.\n\n\t* mpn/mp_bases.c (__mp_bases): Put huge value in 2nd field for index 1.\n\n\t* mpn/generic/sqrtrem.c: sizeof (mp_limb_t) => BYTES_PER_MP_LIMB.\n\t* mpn/generic/gcd.c: Likewise.\n\t(SIGN_BIT): Compute differently.\n\nMon Feb 26 00:07:36 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* All files: mp_limb => mp_limb_t, mp_limb_signed => mp_limb_signed_t.\n\n\t* Makefile.in (install, install-bsdmp, install-info-files): Depend\n\ton installdirs.  chmod all installed files.\n\nSun Feb 25 01:47:41 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpbsd/configure.in: Delete debugging code.\n\n\t* All Makefile.in: Update clean targets.\n\n\t* Makefile.in (AR_FLAGS): New variable.\n\t(libgmp.a): Use it.\n\t(libmp.a): Likewise.\n\n\t* VERSION: Delete file.\n\n\t* Makefile.in (installdirs): New target.\n\t* mkinstalldirs: New file (from the texinfo package).\n\n\t* Makefile.in (INSTALL, INSTALL_DATA, INSTALL_PROGRAM): New variables.\n\t(MAKEINFO, MAKEINFOFLAGS, TEXI2DVI): New variables.\n\t(install-info): New target.\n\t(install, install-bsdmp): Depend on install-info.\n\t($(srcdir)/gmp.info): Changed from plain gmp.info; put info files\n\tinto source directory.\n\t(distclean, mostlyclean): New targets.\n\t(maintainer-clean): New name for realclean.\n\t(uninstall): New target.\n\t(TAGS): New target.\n\t(info, dvi): New targets.\n\t(.PHONY): Assign.\n\n\t* Makefile.in (install, install-bsdmp): Use INSTALL_DATA.\n\n\t* mp{n,z,f,bsd}/move-if-change: Delete.\n\n\t* mpbsd/Makefile.in (stamp-stddefh): Delete target.\n\n\t* Makefile.in (.c.o): Pass CFLAGS last.\n\t* mpbsd/Makefile.in (.c.o): Likewise.\n\t* mpf/Makefile.in (.c.o): Likewise.\n\t* mpq/Makefile.in (.c.o): Likewise.\n\t* mpz/Makefile.in (.c.o): Likewise.\n\t* mpn/Makefile.in (.c.o): Likewise.\n\t(.S.o): Likewise.\n\n\t* memory.c: Change allocation error message.\n\n\t* Makefile.in (install): Prefix gmp.h with $(srcdir).\n\t(install-bsdmp): Prefix mp.h with $(srcdir).\n\n\t* mp{n,z,f,bsd}/{configure,config.sub}: Delete.\n\n\t* Makefile.in (gmp.dvi): Set TEXINPUTS also for 2nd tex invocation\n\t(install targets): Install gmp.info-N.\n\nSat Feb 24 03:36:52 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpf/get_str.c: Fix typo.\n\n\t* mpz/legendre.c: Clarify expression with extra parens.\n\n\t* version.c (gmp_version): Not static.\n\n\t* mpf/iset_str.c: Properly return error code.\n\n\t* mpf/add.c: Delete unused variables.\n\t* mpf/inp_str.c: Likewise.\n\t* mpq/get_d.c: Likewise.\n\n\t* mpn/generic/dump.c: #include <stdio.h>.\n\t* mpf/dump.c: Likewise.\n\t* mpf/set_str.c: #include <ctype.h>.\n\t(strtol): Declare.\n\n\t* gmp.h: mpn_sqrt => mpn_sqrtrem.\n\n\t* Makefile.in (clean, realclean): Clean in mpbsd.\n\t(check): Test in mpf.\n\n\t* mpf/Makefile.in (clean): Clean in tests.\n\t* mpq/Makefile.in (clean): Clean in tests.\n\n\t* mpf/tests/Makefile.in: New file.\n\t* mpf/tests/configure.in: New file.\n\t* mpf/tests/t-addsub.c: New file.\n\n\t* mpf/sub_ui.c: Simply call mpf_sub for now.\n\n\t* mpf/sub.c: Increase prec by 1.\n\t* mpf/ui_sub.c: Likewise.\n\nFri Feb 23 00:59:54 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpf/ui_sub.c: Fix typos.\n\n\t* mpf/get_str.c: When allocating space for tmp, allow for an extra\n\tlimb.  In code for fraction conversion, add special case for bases\n\tthat are a power of 2.\n\n\t* mpf/out_str.c: Output leading \"0.\".\n\tDefault base to 10, before computing string allocation.\n\n\t* mpf/get_str.c: Make variables for string size have type size_t.\n\t* gmp.h: Corresponding change.\n\n\t* mpf/random2.c: Allow creation of prec+1 large mantissas.\n\n\t* mpf/add_ui.c: Don't abort if u < 0; special case for u <= 0.\n\tFix typo in MPN_COPY offset.\n\t* mpf/sub_ui.c: Analogous changes.\n\n\t* mpf/set_prec.c: Rewrite.\n\n\t* mpf/init2.c: Compute precision as in set_prec.c.\n\n\t* mpf/div_2exp.c: Special case for u == 0.\n\t* mpf/mul_2exp.c: Likewise.  Write r->_mp_size always.\n\n\t* mpf/sqrt_ui.c: mpn_sqrt => mpn_sqrtrem.\n\t* mpf/sqrt.c: Likewise.  When computing new exponent, round quotient\n\ttowards -infinity.\n\n\t* mpf/add.c: Fix typos.\n\t* mpf/sub.c: Fix typos.\n\nThu Feb 22 00:24:48 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/Makefile.in (stamp-stddefh): Delete target.\n\t(test): Delete target.\n\t* Makefile.in (stamp-stddefh): Delete target.\n\t(cre-stddefh.o): Delete target.\n\t(gmp.dvi): Set TEXINPUTS before invoking tex.\n\n\t* cre-stddefh.c: Delete.\n\n\t* mpz/sqrt.c: Fix typo.\n\n\t* mpz/powm.c: Special case for mod == 0.\n\t* mpz/powm_ui.c: Likewise.\n\n\t* mpz/get_si.c: Handle -0x80000000 correctly.\n\n\t* mpz/inp_str.c: Now returns size_t.\n\tMake it return number of bytes read or error indication.\n\t* mpf/inp_str.c: Likewise.\n\n\t* mpz/out_raw.c: Replace by mpz/out_binary.c, with modifications.\n\t* mpz/inp_raw.c: Rewrite, using mpz/inp_binary as a base.\n\t* mpz/inp_binary.c: Delete.\n\n\t* mpn/Makefile.in (XCFLAGS): Remove variable.\n\t(.c.o): Don't pass XCFLAGS.\n\t(SFLAGS): Set to nothing.\n\t(.S.o): Pass SFLAGS, not XCFLAGS.\n\n\t* mpn/config/t-freebsd (SFLAGS): New name for XCFLAGS.\n\n\t* mpf/out_str.c: Make return number of bytes written or error\n\tindication.\n\t* mpz/out_str.c: Likewise.\n\t* gmp.h: Corresponding changes.\n\n\t* gmp.h (__mpz_struct): mp_size_t => int.\n\t(__mpq_struct): Likewise.\n\t(__mpf_struct): Likewise.\n\t(mp_size_t): int => long int.\n\n\t* mpn/cray: New directory.\n\t* mpn/cray/gmp-mparam.h: New file.\n\t* mpn/configure.in: Recognize cray variants.\n\n\t* Makefile.in: Set defaults for prefix, libdir, etc.\n\t(install): New target.\n\t(install-bsdmp): New target.\n\t(gmp.html): New target.\n\n\t* stack-alloc.c (__tmp_alloc): Cast void ptrs to char * in comparison.\n\nWed Feb 21 04:35:02 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* gmp.h: Sort mpn declarations.\n\t(mpn_gcdext): Add declaration.\n\n\t* mpn/generic/divrem_1.c: New file.\n\t* mpn/Makefile.in (divrem_1.o): New rule.\n\t* configure.in (functions): Add divrem_1.\n\n\t* mpn/generic/divmod.c: Delete file.\n\t* mpn/configure.in (functions): Delete divmod.\n\t* Makefile.in (divmod.o): Delete rule.\n\t* gmp.h (mpn_divmod): New #define.\n\n\t* gmp.h (mpn_next_bit_set): Delete spurious declaration.\n\n\t* mpn/generic/divrem.c (default case): In code assigning\n\tmost_significant_q_limb, move reassignment of n0 into if statement.\n\n\t* gmp.h (mpf_inp_str): Fix typo.\n\t(mpf_out_str): Make prototype match reality.\n\t* mpf/inp_str.c: New file.\n\t* mpf/out_str.c: New file.\n\t* mpf/Makefile.in: Compile new files.\n\n\t* mpn/Makefile.in (dump.o): Fix dependency path.\n\t(inlines.o): Likewise.\n\n\t* mpn/configure.in: Make m68060 be the same as m68000.  Clean up\n\tm68k configs.\n\nTue Feb 20 01:35:11 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/generic/sqrtrem.c: Renamed from sqrt.\n\t* mpn/configure.in (functions): Corresponding change.\n\t* mpn/Makefile.in: Likewise.\n\t* mpz/sqrtrem.c: Likewise.\n\t* mpz/sqrt.c: Likewise.\n\t* mpn/generic/perfsqr.c: Likewise.\n\n\t* Makefile.in (clean): Also remove libmp.a.\n\tDon't compile cre-conv-tab.c or mp_bases.c.\n\tcre-conv-tab.c: Delete file.\n\t(gmp.ps): New rule.\n\n\t* mpn/mp_bases.c: New file.\n\t* mpn/Makefile.in: Compile mp_bases.c.\n\n\t* mpz/set_str.c: Skip initial whitespace.\n\t* mpf/set_str.c: Likewise.\n\t* mpbsd/xtom.c: Likewise.\n\n\t* gmp.h: Add missing mpz declarations.\n\tDelete all formal parameter names from declarations.\n\n\t* mpn/Makefile.in: Add dependencies for .c files.\n\n\t* Makefile.in (check): Write recursive make calls separately, not as\n\ta loop.\n\t(FLAGS_TO_PASS): New variable.  Use it for most recursive makes.\n\nMon Feb 19 01:02:20 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpn/Makefile.in (.S.o): Pipe cpp output to grep in order to delete\n\tlines starting with #.\n\t(CPP): Set to $(CC) -E to avoid gcc dependency.\n\n\t* mpn/m68k/syntax.h (moveql): Define to moveq for MIT_SYNTAX.\n\n\t* mpn/hppa/hppa1_1/pa7100/addmul_1.S: Fix typo in s1_ptr alignment\n\tcode.\n\t* mpn/hppa/hppa1_1/pa7100/submul_1.S: Likewise.\n\n\t* gmp.h: Fix typos in #defines of recently added mpn functions.\n\n\t* mpz/inp_str.c: Skip all whitespace, not just plain space.\n\t* mpbsd/min.c: Likewise.\n\n\t* mpn/configure.in (functions): Add gcdext.\n\t* mpn/generic/gcdext.c: New file.\n\n\t* mpz/legendre.c: mpz_div_2exp => mpz_tdiv_q_2exp.\n\n\t* gmp.h: Surround mpn declarations with extern \"C\" { ... }.\n\n\t* Makefile.in (check): New target.\n\n\t* mpq/get_d.c: Update comments.  Use rsize instead of dsize + N_QLIMBS\n\twhen possible.  Add special case for nsize == 0.\n\n\t* gmp.h (mpq_get_d): Add declaration.\n\t(mpq_canonicalize): Likewise.\n\t(mpq_cmp_ui): Likewise.\n\t(mpf_diff): Likewise.\n\t(mpf_ui_sub): Likewise.\n\t(mpf_set_prec): Likewise.\n\t(mpf_random2): Likewise.\n\n\t* gmp.h (mpz_cmp_ui): New #define.\n\t(mpz_cmp_si): New #define.\n\t(mpq_cmp_ui): New #define.\n\t(mpz_sign): New #define.\n\t(mpq_sign): New #define.\n\t(mpf_sign): New #define.\n\t(mpq_numref): New #define.\n\t(mpq_denref): New #define.\n\n\t* mpq/set_z.c: File deleted.\n\t* mpq/Makefile.in: Corresponding changes.\n\nSun Feb 18 01:34:47 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpbsd/sdiv.c: Use _mp_realloc, not _mpz_realloc.\n\n\t* mpz/inp_binary.c: Default stream to stdin.\n\t* mpz/inp_str.c: Likewise.\n\t* mpz/inp_raw.c: Likewise.\n\t* mpz/out_binary.c: Default stream to stdout.\n\t* mpz/out_raw.c: Likewise.\n\t* mpz/out_str.c: Likewise.\n\n\t* mpbsd/realloc.c: New file.\n\t* mpbsd/Makefile.in: Corresponding changes.\n\n\t* mpbsd/min.c: Rewrite (base on mpz/inp_str.c).\n\t* mpbsd/mtox.c: Rewrite (base on mpz/get_str.c).\n\n\t* mpbsd/mout.c: Rewrite (base on mpz/out_str) but make it output\n\tspaces in each 10th position.\n\t* mpbsd/xtom.c: Rewrite (base on mpz/set_str).\n\n\t* mpq/tests/Makefile.in (st-cmp): New file.\n\t* mpq/tests/configure.in (srcname): New file.\n\n\t* mpz/tests/configure.in (srcname): Fix typo.\n\n\t* mpq/cmp.c: Add check using number of significant bits, to avoid\n\tgeneral multiplication.\n\nSat Feb 17 11:58:30 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpq/cmp_ui.c: Store cy_limb after the mpn_mul_1 calls.\n\n\t* mpq/tests: New directory.\n\t* mpq/tests/t-cmp.c: New file.\n\t* mpq/tests/t-cmp_ui.c: New file.\n\n\t* mpz/tests/dive.c (main): Generate zero numerator.\n\t(get_random_size) : Delete.\n\n\t* mpz/divexact.c: Add special case for 0/x.\n\n\t* gmp.h (mpz_mod): Add declaration.\n\nFri Feb 16 18:18:39 1996  Andreas Schwab  <schwab@informatik.uni-dortmund.de>\n\n\t* mpn/m68k/*: Rewrite code not to use the INSN macros.\n\t(L): New macro to properly prefix local labels for ELF.\n\nFri Feb 16 00:20:56 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* gmp-impl.h (ieee_double_extract): Use plain `unsigned int' for\n\tfields.\n\t* mpn/generic/inlines.c (_FORCE_INLINES): New #define.  Delete\n\tconditional __GNUC__.\n\t* gmp.h (mpn_add, mpn_sub, mpn_add_1, mpn_sub_1):\n\tOnly define these if __GNUC__ || _FORCE_INLINES.\n\t* mpf/random2.c: Add missing parameter in non-ANSI header.\n\t* mpn/generic/gcd.c (SIGN_BIT): Do as #define to work around bug\n\tin AIX compilers.\n\t* mpq/get_d.c: #define N_QLIMBS.\n\t* mpz/divexact.c: Obscure division by 0 to silent compiler warnings.\n\t* stack-alloc.c: Cast void* pointer to char* before doing arithmetic\n\ton it.\n\n\t* Makefile.in (mpbsd/libmpbsd.a): New rule.\n\t* configure.in (configdirs): Add mpbsd.\n\n\t* gmp.h: Add declarations for a few missing mpn functions.\n\n\t* Makefile.in (libmp.a): New rule.\n\n\t* mpbsd/mdiv.c: #include \"dmincl.c\", not \"mpz_dmincl.c\"\n\t* gmp.h: Move #define of __GNU_MP__ into the `#if __GNU_MP__' block.\n\t* mp.h: Likewise.  Update typedefs from gmp.h.\n\t* mpbsd/configure.in: New file.\n\t* mpbsd/Makefile.in: New file.\n\t* mpbsd/configure: Link to master configure.\n\t* mpbsd/config.sub: Link to master config.sub.\n\n\t* Makefile.in: Set RANLIB_TEST.\n\t* (libgmp.a): Use it.\n\t* (libgmp.a): Do ranlib before moving the libgmp.a to the build\n\tdirectory.\n\t* mp?/Makefile.in: Don't use or set RANLIB.\n\nThu Feb 15 16:38:41 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/add_ui.c: MP_INT => mpz_t.\n\t* mpz/cmp_ui.c: Likewise.\n\t* mpz/fac_ui.c: Likewise.\n\t* mpz/inp_binary.c: Likewise.\n\t* mpz/inp_raw.c: Likewise.\n\t* mpz/legendre.c: Likewise.\n\t* mpz/jacobi.c: Likewise.\n\t* mpz/out_binary.c: Likewise.\n\t* mpz/out_raw.c: Likewise.\n\t* mpz/random2.c: Likewise.\n\t* mpz/random.c: Likewise.\n\t* mpz/realloc.c: Likewise.\n\n\t* mpz/legendre.c: __mpz_2factor(X) => mpz_scan1(X,0),\n\t__mpz_odd_less1_2factor => mpz_scan1(X,1).\n\t* mpz/ntsup.c: File deleted.\n\t* mpz/Makefile.in: Corresponding changes.\n\n\t* mpz/pprime_p: Use mpz_scan1 to avoid looping.\n\n\t* mpz/fac_ui.c: Type of `k' and `p' is `unsigned long'.\n\t* mpz/pprime_p.c: Pass long to *_ui functions.\n\t* mpz/gcdext.c: Likewise.\n\t* mpz/fdiv_r_2exp.c: Likewise.\n\t* mpz/fac_ui.c: Likewise.\n\n\t* mpz/powm.c: Don't use mpn_rshift when mod_shift_cnt is 0.\n\n\t* mpz/tests/Makefile.in (st-sqrtrem): Fix typo.\n\n\t* mpz/cmp_ui.c: #undef mpz_cmp_ui.\n\t* mpz/cmp_si.c: #undef mpz_cmp_si.\n\t* gmp.h (mpz_cmp_ui): New #define.\n\t(mpz_cmp_si): New #define.\n\nWed Feb 14 22:11:24 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* gmp.h: Test __cplusplus in addition to __STDC__.\n\t* gmp-impl.h: Likewise.\n\n\t* gmp.h: Surround declarations with extern \"C\" { ... }.\n\nTue Feb 13 15:20:45 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/fdiv_r_2exp.c: Use MPN_NORMALIZE.\n\t* mpz/tdiv_r_2exp.c: Likewise.\n\n\t* mpz/fdiv_r_2exp.c: New file.\n\t* mpz/fdiv_q_2exp.c: New file.\n\t* mpz/tdiv_r_2exp.c: Renamed from mpz/mod_2exp.c.\n\t* mpz/tdiv_q_2exp.c: Renamed from mpz/div_2exp.c\n\t* mpz/Makefile.in: Corresponding changes.\n\n\t* mpz/scan0.c,scan1.c: New files.\n\t* mpz/Makefile.in: Compile them.\n\n\t* gmp.h (mpn_normal_size): Delete.\n\n\t* config.guess: Update from Cygnus version.\n\n\t* mpn/m68k/rshift.S: Use INSN2 macro for lea instructions.\n\t* mpn/m68k/lshift.S: Likewise.\n\n\t* mpn/configure.in: Fix configuration for plain 68000.\n\nMon Feb 12 01:06:06 1996  Torbjorn Granlund  <tege@matematik.su.se>\n\n\t* mpz/tests/t-powm.c: Generate negative BASE operand.\n\n\t* mpz/powm.c: Make result always positive.\n\nSun Feb 11 01:44:56 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpz/tests/*.c: Add t- prefix.\n\t* mpz/tests/Makefile.in: Corresponding changes.\n\t* mpz/tests/configure.in: Update srctrigger.\n\n\t* mpz/tests/gcd.c: Generate negative operands.\n\t* mpz/tests/gcd2.c: Likewise.\n\n\t* mpz/gcdext.c: At end, if G is negative, negate all G, S, and T.\n\nThu Feb  8 17:16:12 UTC 1996 Ken Weber <kweber@mat.ufrgs.br>\n\n\t* mp{z,n}/gcd.c: Change mpn_gcd interface.\n\t* gmp.h: Ditto.\n\t* gmp.texi: update documentation.\n\nMon Feb  7 23:58:43 1996  Andreas Schwab  <schwab@informatik.uni-dortmund.de>\n\n\t* mpn/m68k/{lshift,rshift}.S: New files.\n\t* mpn/m68k/syntax.h: New ELF_SYNTAX macros.\n\t(MEM_INDX, R, PROLOG, EPILOG): New macros.\n\t* mpn/m68k/*.S: Use R macro with register name.  Use PROLOG and EPILOG\n\tmacros.  Rename `size' to `s_size' or s1_size to avoid clash with ELF\n\t.size directive.\n\t* mpn/configure.in: New target m68k-*-linux*.\n\nWed Feb  7 07:41:31 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* Makefile.in (cre-conv-tab): Workaround for SunOS make.\n\n\t* mpz/tests/reuse.c: New file.\n\t* mpz/tests/Makefile.in: Handle reuse.c.\n\nTue Feb  6 11:56:24 UTC 1996 Ken Weber <kweber@mat.ufrgs.br>\n\n\t* mpz/gcd.c: Fix g->size when one op is 0 and g == other op.\n\nTue Feb  6 01:36:39 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* gmp.h (mpz_divexact): Delete parameter names.\n\t(mpz_lcm): Delete spurious declaration.\n\n\t* mpz/dmincl.c: Fix typo.\n\nMon Feb  5 01:11:56 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/generic/gcd.c (gcd_2): Declare consistently.\n\n\t* mpz/tdiv_q.c: Optimize division by a single-limb divisor.\n\t* mpz/dmincl.c: Likewise.\n\n\t* mpz/add.c: Use MPN_NORMALIZE instead of mpn_normal_size.\n\t* mpz/sub.c: Likewise.\n\t* mpn/generic/sqrt.c: Likewise.\n\n\t* mpn/tests/{add_n,sub_n,lshift,rshift}.c: Put garbage in the\n\tdestination arrays.\n\nFri Feb  2 02:21:27 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpz/{jacobi.c,legendre.c,ntsup.c,invert.c}: New files.\n\t* mpz/Makefile.in: Compile them.\n\n\t* mpn/Makefile.in (INCLUDES): Don't search in `generic'.\n\nThu Feb  1 02:15:11 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\tChange from Ken Weber:\n\t* mpz/divexact.c: Make it work when quot is identical to either input.\n\n\t* mpf/ui_sub.c: New file.\n\t* mpf/Makefile.in: Compile it.\n\n\t* gmp-impl.h (MPZ_TMP_INIT): alloca -> TMP_ALLOC.\n\t* mpz/{c,f}div_{q,qr,r}.c: Use TMP_DECL/TMP_MARK/TMP_FREE since\n\tthese use MPZ_TMP_INIT.\n\t* mpz/mod.c: Likewise.\n\t* mpq/{add,sub}.c: Likewise.\n\t* mpq/canonicalize: Likewise.\n\n\t* mpq/{add,sub,mul,div}.c: Use mpz_divexact. MP_INT -> mpz_t.\n\t* mpq/canonicalize.c: Likewise.\n\nWed Jan 31 01:45:00 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/generic/gcd.c: Misc changes from Ken.\n\n\t* mpz/tests/gcd2.c: New file.\n\t* mpz/tests/Makefile.in: Handle gcd2.c.\n\n\t* mpn/generic/gcd.c (mpn_gcd): When GCD == ORIG_V, return vsize,\n\tnot orig_vsize.  Fix parameter declaration.\n\n\t* mpz/mod_ui.c: Delete file.\n\t* mpz/Makefile.in: Don't try to compile mod_ui.\n\n\t* mpz/cdiv_*_ui.c): Make them work right.\n\t* gmp.h: Declare cdiv*.\n\nTue Jan 30 02:22:56 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpz/{cdiv_q.c,cdiv_q_ui.c,cdiv_qr.c,cdiv_qr_ui.c,cdiv_r.c,\n\tcdiv_r_ui.c,cdiv_ui.c}: New files.\n\t* mpz/Makefile.in: Compile them.\n\n\t* All files: Make file permissions right.\n\n\tChanges from Ken Weber:\n\t* mpn/generic/accelgcd.c: Delete.\n\t* mpn/generic/bingcd.c: Delete.\n\t* mpn/generic/numbits.c: Delete.\n\t* mpn/generic/gcd.c: New file.\n\t* mpn/configure.in (functions): Update accordingly.\n\t* mpz/divexact.c: New file.\n\t* mpz/Makefile.in: Compile divexact.c.\n\t* mpz/gcd.c: Rewrite to accommodate for gcd changes in mpn.\n\t* gmp.h: declare new functions, delete obsolete declarations.\n\t* mpz/tests/dive.c: New file.\n\t* mpz/tests/Makefile.in: Handle dive.c.\n\nMon Jan 29 03:53:24 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpz/random.c: Handle negative SIZE parameter.\n\n\t* mpz/tests/tdiv(_ui).c: New name for tst-dm(_ui).c.\n\t* mpz/tests/tst-mdm(_ui).c: Delete.\n\t* mpz/tests/fdiv(_ui).c: New test based in tst-mdm(_ui).\n\t* mpz/tests/*.c: Get rid of tst- prefix for DOS 8+3 naming.\n\t* mpz/tests/Makefile.in: Corresponding changes.\n\t* mpz/tests/configure.in: Update srctrigger.\n\n\t* mpn/generic/divmod.c: Update from divrem.\n\t* mpn/generic/divrem.c: Misc cleanups.\n\nSun Jan 28 03:25:08 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* All files: Use new TMP_ALLOC interface.\n\n\t* mpz/powm_ui.c: Make Jan 25 changes to powm.c also here.\n\n\t* mpz/tests/powm_ui.c: New file.\n\t* mpz/tests/Makefile.in: Add rules for tst-powm and tst-powm_ui.\n\n\t* Makefile.in: Update dependency list.\n\t* mpf/Makefile.in: Likewise.\n\t* mpz/Makefile.in: Likewise.\n\t* mpq/Makefile.in: Likewise.\n\t* Makefile.in: Set RANLIB simply to ranlib, and allow configure\n\tto override it.\n\n\t* mpz/Makefile.in (conf): Delete spurious target.\n\t(mp_bases.c): Delete.\n\t(cre-conv-tab rules): Delete.\n\n\t* Makefile.in (cre-conv-tab): Greatly simplify.\n\nSat Jan 27 13:38:15 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* stack-alloc.c: New file.\n\t* stack-alloc.h: New file.\n\n\t* gmp.h (__gmp_inline): Define using __inline__.\n\nThu Jan 25 00:28:37 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/generic/scan0.c: New file.\n\t* mpn/generic/scan1.c: Renamed from next_bit.c.\n\t* mpn/configure.in (functions): Include scan0 and scan1.\n\n\t* mpn/m68k/*: #include sysdep.h.  Use C_GLOBAL_NAME.\n\n\t* configure: Update from Cygnus version.\n\t* config.guess: Likewise.\n\t* config.sub: Likewise.\n\t* configure: Pass --nfp to recursive configures.\n\n\t* mpz/tests/tst-*.c: Adjust SIZE and reps.\n\n\t* mpz/powm.c: Move esize==0 test earlier.\n\tIn final reduction of rp,rsize, don't call mpn_divmod unless\n\treduction is really needed.\n\n\t* mpz/tests/tst-powm.c: Fix thinko in checking code.\n\n\t* All files: Get rid of `__' prefix from mpn_* calls and declarations.\n\t* gmp.h: #define __MPN.\n\t* gmp.h: Use __MPN in #defines for mpn calls.\n\n\t* mpn/generic/mul_n.c: Prepend `i' to internal routines.\n\t* gmp-impl.h: Add #defines using __MPN for those internal routines.\n\n\t* mpn/generic/sqrt.c: Change call to mpn_mul to mpn_mul_n.\n\nWed Jan 24 13:28:19 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/sparc32/udiv_fp.S: New name for udiv_qrnnd.S.\n\t* mpn/sparc32/udiv_nfp.S: New name for v8/udiv_qrnnd.S.\n\t* mpn/sparc32/v8/supersparc: New directory.\n\t* mpn/sparc32/v8/supersparc/udiv.S: New file.\n\nTue Jan 23 01:10:11 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\tThis major contribution is from Ken Weber:\n\t* mpn/generic/accelgcd.c: New file.\n\t* mpn/generic/bdivmod.c: New file.\n\t* mpn/generic/bingcd.c: New file.\n\t* mpn/generic/gcd_1.c: Rewrite.\n\t* mpn/generic/numbits.c: New file (to go away soon).\n\t* mpz/gcd.c: Rewrite.\n\t* mpz/tests/tst-gcd.c (SIZE): Now 128.\n\t* gmp.h: Declare new functions.\n\t* mpn/configure.in (functions): List new files.\n\t* gmp-impl.h (MPN_SWAP): Delete.\n\t(MPN_LESS_BITS_LIMB, MPN_LESS_BITS, MPN_MORE_BITS): Delete.\n\t(MPN_COMPL_INCR, MPN_COMPL): Delete.\n\nMon Jan 22 02:04:59 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* gmp.h (mpn_name): New #define.\n\n\t* mpn/m88k/mc88110/addmul_1.s: New file.\n\t* mpn/m88k/mc88110/add_n.S: New file.\n\t* mpn/m88k/mc88110/sub_n.S: New file.\n\n\t* mpn/m88k/sub_n.s: Correctly initialize carry.\n\n\t* mpn/sparc32/{add_n.S,sub_n.S,lshift.S,rshift.S): `beq' => `be'.\n\nSun Jan 21 00:04:35 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/sparc64/addmul_1.s: New file.\n\t* mpn/sparc64/submul_1.s: New file.\n\t* mpn/sparc64/rshift.s: New file.\n\nSat Jan 20 00:32:54 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpz/iset.c: Fix typo introduced Dec 25.\n\nWed Jan 17 13:16:44 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* config/mt-sprc8-gcc: New name for mt-sparc8-gcc.\n\t* config/mt-sparcv8-gcc: Delete.\n\t* configure.in: Corresponding changes.\n\nTue Jan 16 16:31:01 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* gmp-impl.h: #include alloca.h when necessary.\n\n\t* longlong.h: Test __alpha instead of __alpha__, since the former\n\tis the standard symbol.\n\nMon Jan 15 18:06:57 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/sparc64/mul_1.s: Swap operands of mulx instructions.\n\t* mpn/sparc64/lshift.s: New file.\n\nFri Dec 29 17:34:03 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/x86/pentium/add_n.S: Get rid of #defines for register names.\n\t* mpn/x86/pentium/sub_n.S: Likewise.\n\nThu Dec 28 03:16:57 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/x86/pentium/mul_1.S: Rework loop to avoid AGI between update\n\tof loop induction variable and load insn at beginning of loop.\n\t* mpn/x86/pentium/addmul_1.S: Likewise.\n\t* mpn/x86/pentium/submul_1.S: Likewise.\n\nMon Dec 25 23:22:55 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* All files: Prefix user-visible structure fields with _mp_.\n\nFri Dec 22 20:42:17 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/configure.in (m68k configs): Terminate path variable with\n\tplain \"m68k\".\n\nFri Dec 22 03:29:33 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/sparc32/add_n.S: Update from sub_n.S to fix bugs, and to\n\tclean things up.\n\n\t* mpn/configure.in (m68k configs): Update #include path for new\n\tmpn directory organization.\n\nTue Dec 12 02:53:02 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* gmp.h: Prefix all structure field with _mp_.\n\t* gmp-impl.h: Define access macros for these fields.\n\nSun Dec 10 00:47:17 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/alpha/addmul_1.s: Prefix labels with `.'.\n\t* mpn/alpha/submul_1.s: Likewise.\n\t* mpn/alpha/[lr]shift.s: Likewise.\n\t* mpn/alpha/udiv_qrnnd.S: Likewise.\n\t* mpn/alpha/ev5/[lr]shift.s: Likewise.\n\n\t* mpn/alpha/ev5/lshift.s: Fix typos.\n\nFri Dec  1 14:28:20 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/Makefile.in (.SUFFIXES): Define.\n\nWed Nov 29 23:11:57 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/sparc64/{add_n.s, sub_n.s}: New files.\n\nTue Nov 28 06:03:13 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/x86/syntax.h: Handle ELF_SYNTAX.\n\tRename GAS_SYNTAX => BSD_SYNTAX.\n\n\t* mpn/configure.in: Handle linuxelf and SysV for x86 variants.\n\nMon Nov 27 01:32:12 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/hppa/hppa1_1/pa7100/submul_1.S: New file.\n\nSun Nov 26 04:30:47 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>\n\n\t* mpn/hppa/hppa1_1/pa7100/addmul_1.S: New file.\n\n\t* mpn/sparc32/add_n.S: Rewrite to use 64 bit loads/stores.\n\t* mpn/sparc32/sub_n.S: Likewise.\n\nFri Nov 17 00:18:46 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/configure.in: Handle m68k on NextStep.\n\nThu Nov 16 02:30:26 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn: Reorganize machine-specific directories.\n\t* mpn/configure.in: Corresponding changes.\n\t(sh, sh2): Handle these.\n\t(m68k targets): Create asm-syntax.h.\n\nThu Nov  9 02:20:50 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/generic/mul_n.c (____mpn_sqr_n): Delete code that calls abort.\n\t(____mpn_mul_n): Likewise.\n\nTue Nov  7 03:25:12 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpf/get_str.c: In exponentiation code (two places), don't swap\n\tinput and output areas when calling mpn_mul_1.\n\t* mpf/set_str.c: Likewise.\n\nFri Nov  3 02:35:58 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpf/Makefile.in: Make sure all objects are listed in dependency list;\n\tdelete spurious entries.\n\n\t* mpf/mul.c: Handle U or V being 0.  Allow prec+1 for result precision.\n\n\t* mpf/set_prec.c: New computation of limb precision.\n\t* mpf/set_dfl_prec.c: Likewise.\n\n\t* mpf/random2.c: Fix typo computing exp.\n\t* mpf/get_str.c: In (uexp > usize) case, set n_limbs as a function of\n\tthe user-requested number of digits, n_digits.\n\nThu Nov  2 16:25:07 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/generic/divrem.c (case 2): Don't move np vector back, it is\n\tnever read.\n\t(default case): Put most significant limb from np in new variable n2;\n\tdecrease size argument for MPN_COPY_DECR; use n2 instead of np[dsize].\n\nWed Nov  1 02:59:53 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/sparc/[lr]shift.S: New files.\n\nTue Oct 31 00:08:12 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpz/gcd_ui.c: Set w->size unconditionally when v is zero.\n\n\t* gmp-impl.h (assert): Delete definition.\n\n\t* mpf/sub.c: Delete all assert calls.  Delete variable `cy'.\n\n\t* mpf/neg.c: Use prec+1 as precision.  Optimize for when arguments\n\tare the same.\n\t* mpf/abs.c: Likewise.\n\t* mpf/{set,neg,abs}.c: Make structure and variable names similar.\n\nMon Oct 30 12:45:26 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpf/random2.c (random): Test __SVR4 in addition to __svr4__.\n\t* mpn/generic/random2.c (random): Likewise.\n\nSun Oct 29 01:54:28 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpf/div.c: Special handle U or V being 0.\n\n\t* mpf/random2.c: New file.\n\n\t* longlong.h (i860 rshift_rhlc): Define.\n\t(i960 udiv_qrnnd): Define.\n\t(i960 count_leading_zeros): Define.\n\t(i960 add_ssaaaa): Define.\n\t(i960 sub_ddmmss): Define.\n\t(i960 rshift_rhlc): Define.\n\nSat Oct 28 19:09:15 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/pentium/rshift.S: Fix and generalize condition for when to use\n\tspecial code for shift by 1.\n\t* mpn/pentium/lshift.S: Likewise.\n\nThu Oct 26 00:02:56 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* gmp.h: #undef __need_size_t.\n\t* mp.h: Update from gmp.h.\n\nWed Oct 25 00:17:27 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpf/Makefile.in: Compile set_prec.c.\n\t* mpf/realloc.c: Delete this file.\n\t* mpf/Makefile.in: Delete mentions of realloc.c.\n\n\t* gmp.h (__mpf_struct): Get rid of `alloc' field.\n\t* mpf/clear.c: Likewise.\n\t* mpf/init*.c: Likewise.\n\t* mpf/set_prec.c: Likewise.\n\t* mpf/iset*.c: Likewise.\n\n\t* mpf/iset_str.c: New file.\n\n\t* mpn/configure.in: Handle pyramid.\n\n\t* mpf/set.c: Use prec+1 as precision.\n\n\t* mpf/set_prec.c: New file.\n\nTue Oct 24 00:56:41 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/generic/divrem.c: New file.  Will replace mpn/generic/divmod.c\n\twhen rest of source is converted.\n\t* mpn/configure.in (functions): Add `divrem'\n\t* mpn/generic/set_str.c: Never call __mpn_mul_1 with zero size.\n\n\t* mpf/get_str.c: Completely rewritten.\n\t* mpf/add.c: Fix several problems.\n\t* mpf/sub.c: Compare operands from most significant end until\n\tfirst difference, exclude skipped limbs from computation.\n\tAccordingly simplify normalization code.\n\t* mpf/set_str.c: Fix several problems.\n\t* mpf/dump.c: New file.\n\t* mpf/Makefile.in: Compile dump.c.\n\t* mpf/init2.c: Set prec field correctly.\n\nSun Oct 22 03:02:09 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* cre-conv-tab.c: #include math.h; don't declare log and floor.\n\nSat Oct 21 23:04:10 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpf/mul_ui.c: Handle U being 0.\n\nWed Oct 18 19:39:27 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/generic/set_str.c: Correctly handle input like \"000000000000\".\n\tMisc cleanups.\n\nTue Oct 17 15:14:13 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* longlong.h: Define COUNT_LEADING_ZEROS_0 for machines where\n\tappropriate.\n\nMon Oct 16 19:14:43 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpf/add.c: Rewrite.\n\t* mpf/set_str.c: New file.  Needs more work.\n\nSat Oct 14 00:14:04 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpf/div_2exp.c: Vastly simplify.\n\t* mpf/mul_2exp.c: Likewise.\n\n\t* mpf/sub.c: Rewrite.\n\n\t* gmp-impl.h (udiv_qrnnd_preinv2gen): Terminate comment.\n\n\t* mpf/dump.c: Free allocated memory.\n\n\t* gmp-impl.h (assert): Define.\n\nWed Oct 11 13:31:00 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/pentium/rshift.S: Install new code to optimize shift-by-1.\n\nTue Oct 10 00:37:21 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/pentium/lshift.S: Install new code to optimize shift-by-1.\n\n\t* mpn/powerpc32/{lshift.s,rshift.s}: New files.\n\n\t* configure.in: Fix typo.\n\nSat Oct  7 08:17:09 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* longlong.h (smul_ppmm): Correct type of __m0 and __m1.\n\nWed Oct  4 16:31:28 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/configure.in: Handle alphaev5.\n\t* mpn/ev4: New name for alpha subdir.\n\t* mpn/ev5: New subdir.\n\t* mpn/ev5/lshift.s: New file.\n\nTue Oct  3 15:06:45 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/alpha/mul_1.s: Avoid static increments of pointers; use\n\tcorresponding offsets in ldq and stq instructions instead.\n\t(Loop): Swap cmpult and stq to save one cycle on EV5.\n\n\t* mpn/tests/{add_n.s,sub_n.s,lshift.s,rshift.s,mul_1.s,addmul_1.s,\n\tsubmul_1.s}: Don't check results if NOCHECK is defined.\n\nMon Oct  2 11:40:18 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* longlong.h (mips umul_ppmm [32 and 64 bit versions]):\n\tMake new variants, based on GCC version number, that use `l' and `h'\n\tconstraints instead of explicit mflo and mfhi instructions\n\nSun Oct  1 00:17:47 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/mc88100/add_n.s: Decrease unrolling factor from 16 to 8.\n\t* mpn/mc88100/sub_n.s: Likewise.\n\n\t* config/mt-m88110: New file.\n\t* configure.in: Use it.\n\n\t* mpn/mc88110/mul_1.s: Fix thinko.\n\nSat Sep 30 21:28:19 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpz/set_d.c: Declare `size' at function start.\n\n\t* experimental: New directory for mpx and mpz2.\n\n\t* mpz/tdiv_q.c: Clarify comments.\n\t* mpz/{mod.c,mod_ui.c}: New file, for math mod function.\n\n\t* mpn/sh2/{mul_1.s,addmul_1.s,submul_1.s}: New files.\n\n\t* mpn/sh/{add_n.s,sub_n.s}: New files.\n\n\t* mpn/pyr/{add_n.s,sub_n.s,mul_1.s,addmul_1.s}: New files.\n\n\t* mpn/i960/{add_n.s,sub_n.s}: New files.\n\n\t* mpn/alpha/addmul_1.s (Loop): Move decrement of r18 to before umulh,\n\tto save cycles on EV5.\n\t* mpn/alpha/submul_1.s: Ditto.\n\t* mpn/alpha/mul_1.s: Ditto.\n\nThu Sep 28 02:48:59 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* gmp.h (mp_limb, mp_limb_signed): Define as `long long' if\n\t_LONG_LONG_LIMB is defined.\n\n\t* longlong.h (m88110): Test __m88110__, not __mc88110__\n\n\t* mpn/mc88110/mul_1.s: Rewrite.\n\nTue Sep 26 23:29:05 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* config.sub: Update from current Cygnus version.\n\n\t* mpn/configure.in: Recognize canonical m88*, not mc88*.\n\nFri Sep 22 14:58:05 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpz/set_d.c: New file.\n\t* mpz/Makefile.in: Build new files.\n\n\t* mpq/get_d.c: Replace usage of scalbn with ldexp.\n\n\t* mpn/{vax,i386}/gmp-mparam.h: New files.\n\t* gmp-impl.h (ieee_double_extract): Define here.\n\t* mpf/set_d.c (ieee_double_extract): Not here.\n\nThu Sep 21 00:56:36 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* longlong.h (C umul_ppmm): Use UWtype, not USItype for temps.\n\t(udiv_qrnnd): For cases implemented with call to __udiv_qrnnd,\n\tprotect with new symbol LONGLONG_STANDALONE.\n\t(68000 umul_ppmm): Use %# prefix for immediate constants.\n\nWed Sep 20 15:36:23 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/generic/divmod_1.c: Handle\n\tdivisor_limb == 1 << (BITS_PER_MP_LIMB - 1)\n\tspecifically also when normalization_steps != 0.\n\nMon Sep 18 15:42:30 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpq/get_d.c: New file.\n\nSun Sep 17 02:04:36 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* longlong.h (pyr): Botch up for now.\n\nSat Sep 16 00:11:50 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/clipper/mul_1.s: New file.\n\t* mpn/clipper/add_n.s: New file.\n\t* mpn/clipper/sub_n.s: New file.\n\t* mpn/configure.in: Handle clipper*-*-*.\n\n\t* mpn/configure.in: Recognize rs6000-*-*.\n\nFri Sep 15 00:41:34 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/alpha/add_n.s: New file.\n\t* mpn/alpha/sub_n.s: New file.\n\n\t* mpn/mips3: New name for mpn/r4000.\n\t* mpn/mips2: New name for mpn/r3000.\n\t* mpn/configure.in: Corresponding changes.\n\n\t* mpn/generic/perfsqr.c (primes): Delete.\n\t(residue_map): Delete.\n\nThu Sep 14 00:07:58 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/r3000/sub_n.s: Fix typo.\n\n\t* dm_trunc.c: Delete spurious file.\n\n\t* mpz/out_binary.c: Fix typo.\n\n\t* mpn/configure.in (per-target): Make mips*-*-irix6* imply r4000.\n\n\t* gmp-impl.h: For sparc and sgi, include alloca.h.\n\n\t* mpn/z8000/mul_1.s: Replace `test r' with `and r,r'.  Replace\n\t`ldk r,#0' with `xor r,r'.\n\nWed Sep  6 00:58:38 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpz/inp_binary.c: New file.\n\t* mpz/out_binary.c: New file.\n\t* mpz/Makefile.in: Build new files.\n\nTue Sep  5 22:53:51 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* gmp.h (__mpz_struct): Change `long int' => `mp_size_t' for alloc\n\tand size fields.\n\nSat Sep  2 17:47:59 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/r4000/{add_n.s,sub_n.s}: Optimize away some pointer arithmetic.\n\t* mpn/r3000/{add_n.s,sub_n.s,lshift.s,rshift.s}: New files,\n\tderived from r4000 code.\n\nFri Sep  1 05:35:52 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/r3000/mul_1.s: Fix typo.\n\n\t* mpn/powerpc32: Fix some old vs new mnemonic issues.\n\n\t* mpn/powerpc32/{add_n.s,sub_n.s}: New files.\n\t* mpn/r4000/{add_n.s,sub_n.s,lshift.s,rshift.s}: New files.\n\nWed Aug 30 10:43:47 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/r3000/mul_1.s ($LC1): Use addiu for immediate add.\n\t* mpn/r4000/{mul_1.s,addmul_1.s,submul_1.s}: New files.\n\n\t* config.guess: Update to latest FSF revision.\n\nMon Aug 28 02:18:13 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpz/out_str.c: Cast str to char * in fputs call.\n\n\t* gmp-impl.h: Define UQItype, SItype, and USItype also\n\twhen not __GNUC__.\n\nFri Aug 25 01:45:04 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/i386/syntax.h: Renamed from asm-syntax.h.\n\t* mpn/mc68020/syntax.h: Renamed from asm-syntax.h.\n\t* mpn/configure.in: Corresponding changes.\n\nSun Aug 13 19:20:04 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>\n\n\t* mpn/generic/random2.c: Test __hpux, not hpux.\n\nSat Apr 15 20:50:33 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpn/sparc/add_n.S: Make it work for PIC.\n\t* mpn/sparc/sub_n.s: Likewise.\n\t* mpn/sparc8/addmul_1.S: Likewise.\n\t* mpn/sparc8/mul_1.S: Likewise.\n\t* mpn/i386/add_n.S: Likewise.\n\t* mpn/i386/sub_n.S: Likewise.\n\nThu Apr 13 23:15:03 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpn/configure.in: Don't search power subdir for generic ppc configs.\n\tAdd some ppc cpu-specific configs.  Misc clean up.\n\nMon Apr 10 00:16:35 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpz/ui_pow_ui.c: Delete spurious code to handle negative results.\n\nSun Apr  9 12:38:11 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* longlong.h (SPARC v8 udiv_qrnnd): Generate remainder in C,\n\tnot in asm.\n\n\t* mpn/generic/sqrt.c (SQRT): Test for __SOFT_FLOAT.\n\nTue Mar 28 00:19:52 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpn/generic/hamdist.c (popc_limb): Make Mar 16 change here too.\n\nFri Mar 17 23:29:22 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* longlong.h (SH umul_ppmm): Define.\n\nThu Mar 16 16:40:44 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpn/generic/popcount.c (popc_limb): Rearrange 32 bit case\n\tto help CSE.\n\nFri Mar 10 20:03:49 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpn/powerpc32/mul_1.s: Clear cy before entering loop.\n\tRearrange loop to save a cycle.\n\t* mpn/powerpc32/addmul_1.s: New file.\n\t* mpn/powerpc32/submul_1.s: New file.\n\nFri Feb 17 22:44:45 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpn/configure.in: Set target_makefile_frag for freebsd\n\tin new case stmt.\n\t* mpn/config/t-freebsd: New file.\n\t* mpn/Makefile.in: Add #### for frag insertion.\n\t(XCFLAGS): Clear by default.\n\t(.c.o, .S.o rules): Pass XCFLAGS.\n\nTue Feb  7 16:27:50 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* longlong.h (68000 umul_ppmm): Merge improvements from henderson.\n\nTue Jan 24 04:23:20 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* longlong.h (default umul_ppmm): Store input parameters in temporaries\n\tto avoid reading them twice.\n\t(default smul_ppmm): New definition.\n\nThu Dec 29 04:20:07 1994  Jim Meyering  (meyering@comco.com)\n\n\t* generic/perfsqr.c (__mpn_perfect_square_p): Remove declaration\n\tof unused variable.\n\t* generic/pre_mod_1.c (__mpn_preinv_mod_1): Likewise.\n\t* mpz/powm.c (pow): Likewise.\n\n\t* mpz/and.c (mpz_and): Use {} instead of `;' for empty else clause\n\tto placate `gcc -Wall'.\n\t* mpz/ior.c (mpz_ior): Likewise.\n\nWed Dec 28 13:31:40 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpn/m*68*/*.S: #include asm-syntax.h, not asm.h.\n\nMon Dec 26 17:15:36 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* longlong.h: Test for more symbols, in __mc68000__ case.\n\n\t* mpn/mpn/config.sub: Recognize m68060.\n\t* mpn/configure.in: Change mc* to m* for 68k targets.\n\t* mpn/Makefile.in (.S.o): Delete spurious creation of temp .c file.\n\nMon Dec 19 01:56:30 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* config.sub: Recognize pentium as a valid CPU.\n\t* mpn/configure.in: Handle pentium specifically, to use new assembly\n\tcode.\n\nMon Dec 19 00:13:01 1994  Jim Meyering  (meyering@comco.com)\n\n\t* gmp.h: Define _GMP_H_HAVE_FILE if FILE, __STDIO_H__, or H_STDIO\n\tis defined.\n\t* gmp.h: test _GMP_H_HAVE_FILE instead of FILE everywhere else.\n\nMon Dec 19 00:04:54 1994  Kent Boortz  (boortz@sics.se)\n\n\t* Makefile.in (recursive makes): Pass CFLAGS.\n\nSun Dec 18 22:34:49 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpn/pentium: New directory.\n\n\t* mpz/pprime.c: Make sure to mpz_clear all temporaries.\n\n\t* longlong.h: Don't use udiv instruction when SUPERSPARC is defined.\n\t* configure.in: Handle supersparc*-.\n\t* config/mt-supspc-gcc: New file.\n\t* config/mt-sparc8-gcc: New name for mt-sparcv8-gcc.\n\nMon Dec 12 22:22:10 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpn/i386/*.S: #include \"asm-syntax.h\", not \"asm.h\".\n\t#include sysdep.h before asm-syntax.h.\n\n\t* mpn/mc68020/asm-syntax.h: #undef ALIGN before defining it.\n\t* mpn/i386/asm-syntax.h: Likewise.\n\n\t* mpn/mc68020/asm-syntax.h: New name for asm.h.\n\t* mpn/i386/asm-syntax.h: New name for asm.h.\n\nTue Dec  6 21:55:25 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpz/array_init.c: Fix typo in declaration.\n\nFri Nov 18 19:50:52 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpn/Makefile.in (.S.o): Pass CFLAGS and INCLUDES.\n\nMon Nov 14 00:34:12 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpn/generic/random2.c (random): Test for __svr4__.\n\nWed Oct 12 23:28:16 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* cre-conv-tab.c (main): Avoid upper-case X in printf format string.\n\nTue Aug 23 17:16:35 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpz/perfsqr.c: Use mpn_perfect_square_p.\n\t* mpn/generic/perfsqr.c: New file.\n\nWed Jul  6 13:46:51 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpz/array_init.c: New file.\n\t* mpz/Makefile.in: Compile array_init.\n\t* gmp.h: Declare mpz_array_init.\n\nMon Jul  4 01:10:03 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpz/add.c: Fix bogus comment.\n\t* mpz/sub.c: Likewise.\n\nSat Jul  2 02:14:56 1994  Torbjorn Granlund  (tege@adder.cygnus.com)\n\n\t* mpn/generic/pre_mod_1.c: New file.\n\t* mpz/perfsqr.c: Use __mpn_preinv_mod_1 when faster.\n\nFri Jul 01 22:10:19 1994  Richard Earnshaw (rwe11@cl.cam.ac.uk)\n\n\t* longlong.h (arm umul_ppmm): Fix typos in last change.  Mark\n\thard-coded registers with \"%|\"\n\nThu Jun 30 03:59:33 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpz/perfsqr.c: Define PP, etc, for machines with 64 bit limbs.\n\tUse __mpn_mod_1.\n\t* mpz/perfsqr.c: Don't clobber REM in quadratic residue check loop.\n\nWed Jun 29 18:45:41 1994  Torbjorn Granlund  (tege@adder.cygnus.com)\n\n\t* mpn/generic/sqrt.c (SQRT): New asm for IBM POWER2.\n\n\t* mpz/gcd_ui.c: Return 0 if result does not fit an unsigned long.\n\n\t* gmp.h: Use \"defined (__STDC__)\" consistently.\n\nTue Jun 28 18:44:58 1994  Torbjorn Granlund  (tege@adder.cygnus.com)\n\n\t* gmp.h (mpz_get_si): Don't use \"signed\" keyword for return type.\n\n\t* mpz/tests/Makefile.in: Use CFLAGS for linking.\n\n\t* Makefile.in (CFLAGS): Use -O2 here.\n\t* mpn/Makefile (CFLAGS): Not here.\n\n\t* mpq/cmp_ui.c: Fix typo.\n\t* mpq/canonicalize.c: Fix typo.\n\t* mpz/gcd_ui.c: Handle gcd(0,v) and gcd(u,0) correctly.\n\t* mpn/generic/gcd_1.c: Fix braino in last change.\n\nMon Jun 27 16:10:27 1994  Torbjorn Granlund  (tege@rtl.cygnus.com)\n\n\t* mpz/gcd_ui.c: Change return type and return result.\n\tAllow destination param to be NULL.\n\t* gmp.h: Corresponding change.\n\t* mpn/generic/gcd_1.c: Handle zero return from mpn_mod_1.\n\nTue Jun 14 02:17:43 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)\n\n\t* mpn/i386/asm.h (ALIGN): Make it take a parameter.\n\t* mpn/i386/*.S: Use ALIGN to align all loops.\n\n\t* mpn/i386/*.S: Move colon inside C_GLOBAL_NAME expression.\n\t(Makes old versions of GAS happy.)\n\nSat May 28 01:43:54 1994  Torbjorn Granlund  (tege@adder.cygnus.com)\n\n\t* Many files: Delete unused variables and labels.\n\t* mpn/generic/dump.c: cast printf width argument to int.\n\nWed May 25 00:42:37 1994  Torbjorn Granlund  (tege@thepub.cygnus.com)\n\n\t* mpz/gcd.c (mpz_gcd): Normalize after __mpn_sub calls.\n\t(xmod): Ignore return value of __mpn_divmod.\n\t(xmod): Improve normalization code.\n\nSat May 21 01:30:09 1994  Torbjorn Granlund  (tege@adder.cygnus.com)\n\n\t* mpz/gcdext.c: Cosmetic changes.\n\n\t* mpz/fdiv_ui.c: New file.\n\nFri May 20 00:24:53 1994  Torbjorn Granlund  (tege@adder.cygnus.com)\n\n\t* mpz/tests/Makefile.in: Use explicit rules for running tests,\n\tnot a shell loop.\n\t(clean): Delete stmp-*.\n\n\t* mpz/Makefile.in: Update.\n\n\t* mpz/div_ui.c: Don't include longlong.h.\n\t* mpz/dm_ui.c: Likewise.\n\n\t* mpz/fdiv_q.c, mpz/fdiv_q_ui.c, mpz/fdiv_qr.c, mpz/fdiv_qr_ui.c,\n\tmpz/fdiv_r.c, mpz/fdiv_r_ui.c: New files.  Code partly from deleted\n\tmdm.c, mdm_ui.c, etc, partly rewritten.\n\t* mpz/dm_floor_ui.c, mpz/dm_floor.c: Delete.\n\t* mpz/mdm.c, mpz/mdm_ui.c, mpz/mdiv.c, mpz/mdiv_ui.c, mpz/mmod.c,\n\tmpz/mmod_ui.c: Delete.\n\n\t* mpz/tdiv_q.c, mpz/tdiv_q_ui.c, mpz/tdiv_qr.c, mpz/tdiv_qr_ui.c,\n\tmpz/tdiv_r.c, mpz/tdiv_r_ui.c:\n\tNew names for files implementing truncating division.\n\t* mpz/div_ui.c, mpz/dm_ui.c, mpz/mod_ui.c: Simplify.\n\n\t* mpn/Makefile.in (.S.o): Don't rely on CPP being defined, use CC\n\tinstead.\n\t(clean): Delete tmp-*.\n\nThu May 19 01:37:44 1994  Torbjorn Granlund  (tege@adder.cygnus.com)\n\n\t* mpz/cmp.c: Call __mpn_cmp.\n\n\t* mpz/popcount.c: Fix typo.\n\n\t* mpz/powm_ui.c: Simplify main loop.  Keep principal operand size\n\tsmaller than MSIZE when possible.\n\t* mpz/powm.c: Likewise.\n\n\t* mpn/generic/sqrt.c: Move alloca calls into where the memory is\n\tneeded.  Simplify.\n\n\t* gmp.h: (_PROTO): New macro.\n\tAdd many function declarations; use _PROTO macro in all declarations.\n\n\t* mpf/*.c: Prepend mpn calls with __.\n\nWed May 18 20:57:06 1994  Torbjorn Granlund  (tege@adder.cygnus.com)\n\n\t* mpf/*ui*.c: Make ui argument `long' for consistency with mpz\n\tfunctions.\n\n\t* mpf/div_ui.c: Simplify.\n\nTue May 17 01:05:14 1994  Torbjorn Granlund  (tege@adder.cygnus.com)\n\n\t* mpz/*.c: Prepend mpn calls with __.\n\n\t* mpz/mul_ui.c: Use mpn_mul_1.\n\nMon May 16 17:19:41 1994  Torbjorn Granlund  (tege@adder.cygnus.com)\n\n\t* mpn/i386/mul_1.S: Use C_GLOBAL_NAME.\n\t* mpn/i386/mul_1.S, mpn/i386/addmul_1.S, mpn/i386/submul_1.S:\n\tNuke use of LAB.\n\nSat May 14 14:21:02 1994  Torbjorn Granlund  (tege@adder.cygnus.com)\n\n\t* gmp-impl.h: Don't define abort here.\n\n\t* mpz/pow_ui.c: Increase temporary allocation.\n\t* mpz/ui_pow_ui.c: Likewise.\n\n\t* gmp.h (mpz_add_1, mpz_sub_1): Don't call memcpy.\n\n\t* All Makefile.in: Delete spurious -I arguments.\n\tUpdate dependencies.\n\n\t* mpz/popcount.c: New file.\n\t* mpz/hamdist.c: New file.\n\n\t* All configure: Latest version from Cygnus.\n\n\t* mpq/Makefile.in: New file.\n\t* mpq/configure.in: New file.\n\t* Makefile.in, configure.in: Enable compilation of mpq.\n\n\t* mpq/set_z.c: Fix typos.\n\t* mpq/canonicalize.c: Fix typos.\n\t* mpq/cmp_ui.c: Fix typos.\n\n\t* mpf/add_ui.c: Read U->D into UP always.  Delete spurious MPN_COPY.\n\t* mpf/sub_ui.c: Likewise.\n\n\t* gmp-impl.h: Don't redefine alloca.\n\n\t* COPYING.LIB: Renamed from COPYING.\n\nWed May 11 01:45:44 1994  Torbjorn Granlund  (tege@adder.cygnus.com)\n\n\t* mpz/powm_ui.c: When shifting E left by C+1, handle out-of-range\n\tshift counts.  Fix typo when testing negative_result.\n\t* mpz/powm.c: Likewise.\n\n\t* mpz/ui_pow_ui.c: New file.\n\t* mpz/Makefile.in: Update.\n\n\t* mpz/pow_ui.c: Call __mpn_mul_n instead of __mpn_mul when possible.\n\n\t* mpz/div.c, mpz/div_ui.c, mpz/gcd.c: Prefix external mpn calls.\n\t* mpz/gcd.c: Declare mpn_xmod.\n\n\t* mpz/powm.c: Major changes to accommodate changed mpn semantics.\n\t* mpz/powm_ui.c: Update from mpz/powm.c.\n\n\t* mpz/tests/tst-io.c: New file.\n\t* mpz/tests/tst-logic: New file.\n\t* mpz/tests/Makefile.in: Update.\n\n\t* mpz/inp_str.c: Get base right when checking for first digit.\n\t* mpz/inp_str.c: Allocate more space for DEST when needed.\n\n\t* mpz/com.c: Use mpn_add_1 and mpn_sub_1.\n\t* mpz/and.c, mpz/ior.c: Likewise.  Simplify somewhat.\n\n\t* mpz/add_ui.c: Use mpn_add_1 and mpn_sub_1.\n\tRename parameters to be consistent with mpz/sub_ui.\n\tGeneral simplifications.\n\t* mpz/sub_ui.x: Likewise.\n\nTue Aug 10 19:41:16 1993  Torbjorn Granlund  (tege@prudens.matematik.su.se)\n\n\t* mpf: New directory.\n\t* mpf/*.c: Merge basic set of mpf functions.\n\n\t* Many logs missing...\n\nSun Apr 25 18:40:26 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)\n\n\t* memory.c: Use #if instead of #ifdef for __STDC__ for consistency.\n\t* bsd/xtom.c: Likewise.\n\n\t* mpz/div.c: Remove free_me and free_me_size and their usage.\n\tUse mpn_divmod for division; corresponding changes in return value\n\tconvention.\n\t* mpz/powm.c: `carry_digit' => `carry_limb'.\n\t* bsd/sdiv.c: Clearify comment.\n\nSun Apr 25 00:31:28 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)\n\n\t* longlong.h (__udiv_qrnnd_c): Make all variables `unsigned long int'.\n\nSat Apr 24 16:23:33 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)\n\n\t* longlong.h (__udiv_qrnnd_c): Make all variables `unsigned long int'.\n\n\t* gmp-impl.h: #define ABS.\n\t* (Many files): Use ABS instead of abs.\n\n\t* mpn/generic/sqrt.c, mpz/clrbit.c, mpz/get_si.c, mpz/mod_2exp.c,\n\tmpz/pow_ui.c: Cast 1 to mp_limb before shifting.\n\n\t* mpz/perfsqr.c: Use #if, not plain if for exclusion of code for\n\tnon-32-bit machines.\n\nTue Apr 20 13:13:58 1993  Torbjorn Granlund  (tege@du.nada.kth.se)\n\n\t* mpn/generic/sqrt.c: Handle overflow for intermediate quotients by\n\trounding them down to fit.\n\n\t* mpz/perfsqr.c (PP): Define in hexadecimal to avoid GCC warnings.\n\n\t* mpz/inp_str.c (char_ok_for_base): New function.\n\t(mpz_inp_str): Use it.\n\nSun Mar 28 21:54:06 1993  Torbjorn Granlund  (tege@cyklop.nada.kth.se)\n\n\t* mpz/inp_raw.c: Allocate x_index, not xsize limbs.\n\nMon Mar 15 11:44:06 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)\n\n\t* mpz/pprime.c: Declare param `const'.\n\t* gmp.h: Add declarations for mpz_com.\n\nThu Feb 18 14:10:34 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)\n\n\t* mpq/add.c, mpq/sub.c: Call mpz_clear for t.\n\nFri Feb 12 20:27:34 1993  Torbjorn Granlund  (tege@cyklop.nada.kth.se)\n\n\t* mpz/inp_str.c: Recog minus sign as first character.\n\nWed Feb  3 01:36:02 1993  Torbjorn Granlund  (tege@cyklop.nada.kth.se)\n\n\t* mpz/iset.c: Handle 0 size.\n\nTue Feb  2 13:03:33 1993  Torbjorn Granlund  (tege@cyklop.nada.kth.se)\n\n\t* mpz/mod_ui.c: Initialize dividend_size before it's used.\n\nMon Jan  4 09:11:15 1993  Torbjorn Granlund  (tege@sics.se)\n\n\t* bsd/itom.c: Declare param explicitly 'signed'.\n\t* bsd/sdiv.c: Likewise.\n\n\t* mpq/cmp.c: Remove unused variable tmp_size.\n\t* mpz/powm_ui.c: Fix typo in esize==0 if stmt.\n\t* mpz/powm.c: Likewise.\n\nSun Nov 29 01:16:11 1992  Torbjorn Granlund  (tege@sics.se)\n\n\t* mpn/generic/divmod_1.c (mpn_divmod_1): Handle\n\tdivisor_limb == 1 << (BITS_PER_MP_LIMB - 1)\n\tspecifically.\n\n\t* Reorganize sources.  New directories mpn, mpn/MACH, mpn/generic,\n\tmpz, mpq, bsd.  Use full file name for change logs hereafter.\n\nWed Oct 28 17:40:04 1992  Torbjorn Granlund  (tege@jupiter.sics.se)\n\n\t* longlong.h (__hppa umul_ppmm): Fix typos.\n\t(__hppa sub_ddmmss): Swap input arguments.\n\n\t* mpz_perfsqr.c (mpz_perfect_square_p): Avoid , before } in\n\tinitializator.\n\nSun Oct 25 20:30:06 1992  Torbjorn Granlund  (tege@jupiter.sics.se)\n\n\t* mpz_pprime.c (mpz_probab_prime_p): Handle numbers <= 3\n\tspecifically (used to consider all negative numbers prime).\n\n\t* mpz_powm_ui: `carry_digit' => `carry_limb'.\n\n\t* sdiv: Handle zero dividend specifically.  Replace most code in\n\tthis function with a call to mpn_divmod_1.\n\nFri Sep 11 22:15:55 1992  Torbjorn Granlund  (tege@tarrega.sics.se)\n\n\t* mpq_clear: Don't free the MP_RAT!\n\n\t* mpn_lshift, mpn_rshift, mpn_rshiftci: Remove `long' from 4:th arg.\n\nThu Sep  3 01:47:07 1992  Torbjorn Granlund  (tege@jupiter.sics.se)\n\n\t* All files: Remove leading _ from mpn function names.\n\nWed Sep  2 22:21:16 1992  Torbjorn Granlund  (tege@jupiter.sics.se)\n\n\tFix from Jan-Hein Buhrman:\n\t* mpz_mdiv.c, mpz_mmod.c, mpz_mdm.c: Make them work as documented.\n\n\t* mpz_mmod.c, mpz_mdm.c: Move decl of TEMP_DIVISOR to reflect its\n\tlife.\n\nSun Aug 30 18:37:15 1992  Torbjorn Granlund  (tege@jupiter.sics.se)\n\n\t* _mpz_get_str: Use mpz_sizeinbase for computing out_len.\n\t* _mpz_get_str: Don't remove leading zeros.  Abort if there are some.\n\nWed Mar  4 17:56:56 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* gmp.h: Change definition of MP_INT to make the & before params\n\toptional.  Use typedef to define it.\n\t* mp.h: Use typedef to define MINT.\n\nTue Feb 18 14:38:39 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\tlonglong.h (hppa umul_ppmm): Add missing semicolon.  Declare type\n\tof __w1 and __w0.\n\nFri Feb 14 21:33:21 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* longlong.h: Make default count_leading_zeros work for machines >\n\t32 bits.  Prepend `__' before local variables to avoid conflicts\n\twith users' variables.\n\n\t* mpn_dm_1.c: Remove udiv_qrnnd_preinv ...\n\t* gmp-impl.h: ... and put it here.\n\t* mpn_mod_1: Use udiv_qrnnd_preinv if it is faster than udiv_qrnnd.\n\nTue Feb 11 17:20:12 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpn_mul: Enhance base case by handling small multiplicands.\n\t* mpn_dm_1.c: Revert last change.\n\nMon Feb 10 11:55:15 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpn_dm_1.c: Don't define udiv_qrnnd_preinv unless needed.\n\nFri Feb  7 16:26:16 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpn_mul: Replace code for base case.\n\nThu Feb  6 15:10:42 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpn_dm_1.c (_mpn_divmod_1): Add code for avoiding division by\n\tpre-inverting divisor.\n\nSun Feb  2 11:10:25 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* longlong.h: Make __LLDEBUG__ work differently.\n\t(_IBMR2): Reinsert old code.\n\nSat Feb  1 16:43:00 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* longlong.h (#ifdef _IBMR2): Replace udiv_qrnnd with new code\n\tusing floating point operations.  Don't define\n\tUDIV_NEEDS_NORMALIZATION any longer.\n\nFri Jan 31 15:09:13 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* longlong.h: Define UMUL_TIME and UDIV_TIME for most machines.\n\t* longlong.h (#ifdef __hppa): Define umul_ppmm.\n\nWed Jan 29 16:41:36 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpn_cmp: Only one length parameter, assume operand lengths are\n\tthe same.  Don't require normalization.\n\t* mpq_cmp, mpz_add, mpz_sub, mpz_gcd, mpn_mul, mpn_sqrt: Change for\n\tnew mpn_cmp definition.\n\nTue Jan 28 11:18:55 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* _mpz_get_str: Fix typo in comment.\n\nMon Jan 27 09:44:16 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* Makefile.in: Add new files.\n\n\t* mpn_dm_1.c: New file with function _mpn_divmod_1.\n\t* mpz_dm_ui.c (mpz_divmod_ui): Use _mpn_divmod_1.\n\t* mpz_div_ui: Likewise.\n\n\t* mpn_mod_1.c: New file with function _mpn_mod_1.\n\t* mpz_mod_ui: Use _mpn_mod_1.\n\nThu Jan 23 18:54:09 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\tBug found by Paul Zimmermann (zimmermann@inria.inria.fr):\n\t* mpz_div_ui.c (mpz_div_ui), mpz_dm_ui.c (mpz_divmod_ui):\n\tHandle dividend == 0.\n\nWed Jan 22 12:02:26 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_pprime.c: Use \"\" for #include.\n\nSun Jan 19 13:36:55 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpn_rshiftci.c (header): Correct comment.\n\nWed Jan 15 18:56:04 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_powm, mpz_powm_ui (if (bsize > msize)): Do alloca (bsize + 1)\n\tto make space for ignored quotient at the end.  (The quotient might\n\talways be an extra limb.)\n\nTue Jan 14 21:28:48 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_powm_ui: Fix comment.\n\t* mpz_powm: Likewise.\n\nMon Jan 13 18:16:25 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* tests/Makefile.in: Prepend $(TEST_PREFIX) to Makefile target.\n\nSun Jan 12 13:54:28 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\tFixes from Kazumaro Aoki:\n\t* mpz_out_raw: Take abs of size to handle negative values.\n\t* mpz_inp_raw: Reallocate before reading ptr from X.\n\t* mpz_inp_raw: Store, don't read, size to x->size.\n\nTue Jan  7 17:50:25 1992  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* gmp.h, mp.h: Remove parameter names from prototypes.\n\nSun Dec 15 00:09:36 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* tests/Makefile.in: Prepend \"./\" to file names when executing\n\ttests.\n\n\t* Makefile.in: Fix many problems.\n\nSat Dec 14 01:00:02 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpn_sqrt.c: New file with _mpn_sqrt.\n\t* mpz_sqrt, mpz_sqrtrem, mpz_perfect_square_p: Use _mpn_sqrt.\n\t* msqrt.c: Delete.  Create from mpz_sqrtrem.c in Makefile.in.\n\t* mpz_do_sqrt.c: Delete.\n\t* Makefile.in: Update to reflect these changes.\n\n\t* Makefile.in, configure, configure.subr: New files\n\t(from bothner@cygnus.com).\n\t* dist-Makefile: Delete.\n\n\t* mpz_fac_ui: Fix comment.\n\n\t* mpz_random2: Rewrite a bit to make it possible for the most\n\tsignificant limb to be == 1.\n\n\t* mpz_pprime.c (mpz_probab_prime_p): Remove \\t\\n.\n\nFri Dec 13 23:10:02 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_do_sqrt: Simplify special case for U == 0.\n\t* m*sqrt*.c, mpz_perfsqr.c (mpz_perfect_square_p):\n\t  Rename _mpz_impl_sqrt to _mpz_do_sqrt.\n\nFri Dec 13 12:52:28 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* gmp-impl.h (MPZ_TMP_INIT): Cast to the right type.\n\nThu Dec 12 22:17:29 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpn_add, mpn_sub, mpn_mul, mpn_div: Change type of several\n\tvariables to mp_size.\n\nWed Dec 11 22:00:34 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpn_rshift.c: Fix header comments.\n\nMon Dec  9 17:46:10 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\tReleased 1.2.\n\n\t* gmp-impl.h (MPZ_TMP_INIT): Cast alloca return value.\n\n\t* dist-Makefile: Add missing dependency for cre-mparam.\n\n\t* mpz_mdiv.c, mpz_mmod.c, mpz_mdm.c, mpz_mdiv_ui.c,\n\t  mpz_mmod_ui.c, mpz_mdm_ui.c: Remove obsolete comment.\n\n\t* dist-Makefile (clean): clean in tests subdir too.\n\t* tests/Makefile: Define default values for ROOT and SUB.\n\n\t* longlong.h (__a29k__ udiv_qrnnd): Change \"q\" to \"1\" for operand\n\t2 constraint.\n\nMon Nov 11 00:06:05 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_sizeinb.c (mpz_sizeinbase): Special code for size == 0.\n\nSat Nov  9 23:47:38 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\tReleased 1.1.94.\n\n\t* dist-Makefile, Makefile, tests/Makefile: Merge tests into\n\tdistribution.\n\nFri Nov  8 22:57:19 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* gmp.h: Don't use keyword `signed' for non-ANSI compilers.\n\nThu Nov  7 22:06:46 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* longlong.h: Cosmetic changes to keep it identical to gcc2 version\n\tof longlong.h.\n\t* longlong.h (__ibm032__): Fix operand order for add_ssaaaa and\n\tsub_ddmmss.\n\nMon Nov  4 00:36:46 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpn_mul: Fix indentation.\n\n\t* mpz_do_sqrt: Don't assume 32 bit limbs (had constant\n\t4294967296.0).\n\t* mpz_do_sqrt: Handle overflow in conversion from double returned\n\tby SQRT to mp_limb.\n\n\t* gmp.h: Add missing function definitions.\n\nSun Nov  3 18:25:25 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_pow_ui: Change type of `i' to int.\n\n\t* ChangeLog: Add change log entry.\n\t* ChangeLog: Add change log entry.\n\t* ChangeLog: Add change log entry.\n\t* ChangeLog: Add change log entry.\n\t* ChangeLog: Add change log entry.\n\t* ChangeLog: Add change log entry.\n\t* ChangeLog: Add change log entry.\n\t* ChangeLog: Add change log entry.\nStack overflow.\n\n\t* mpz_pow_ui.c: Fix typo in comment.\n\n\t* dist-Makefile: Create rpow.c from mpz_powm_ui.c.\n\t* mpz_powm_ui.c: Add code for rpow.\n\t* rpow.c: Delete this file.  The rpow function is now implemented\n\tin mpz_powm_ui.c.\n\n\t* mpz_fac_ui.c: New file.\n\t* gmp.h, dist-Makefile: Add stuff for mpz_fac_ui.\n\n\tBug found by John Amanatides (amana@sasquatch.cs.yorku.ca):\n\t* mpz_powm_ui, mpz_powm: Call _mpn_mul in the right way, with\n\tthe first argument not smaller than the second.\n\nTue Oct 29 13:56:55 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* cre-conv-tab.c (main), cre-mparam.c (main): Fix typo in output\n\theader text.\n\nMon Oct 28 00:35:29 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_random2: Handle size == 0.\n\n\t* gmp-impl.h (struct __mp_bases): Rename chars_per_limb_exactly to\n\tchars_per_bit_exactly, and change its definition.\n\t* cre-conv-tab.c (main): Output field according to its new\n\tdefinition.\n\t* mpz_out_str, _mpz_get_str, mpz_sizeinb, mout:\n\tUse chars_per_bit_exactly.\n\n\t* mpz_random2: Change the loop termination condition in order to\n\tget a large most significant limb with higher probability.\n\n\t* gmp.h: Add declaration of new mpz_random2 and mpz_get_si.\n\t* mpz_get_si.c: New file.\n\t* dist-Makefile: Add mpz_random2 and mpz_get_si.\n\n\t* mpz_sizeinb.c (mpz_sizeinbase): Special code for base being a\n\tpower of 2, giving exact result.\n\n\t* mpn_mul: Fix MPN_MUL_VERIFY in various ways.\n\t* mpn_mul: New macro KARATSUBA_THRESHOLD.\n\t* mpn_mul (karatsuba's algorithm): Don't write intermediate results\n\tto prodp, use temporary pp instead.  (Intermediate results can be\n\tlarger than the final result, possibly writing into hyperspace.)\n\t* mpn_mul: Make smarter choice between Karatsuba's algorithm and the\n\tshortcut algorithm.\n\t* mpn_mul: Fix typo, cy instead of xcy.  Unify carry handling code.\n\nSun Oct 27 19:57:32 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpn_mul: In non-classical case, choose Karatsuba's algorithm only\n\twhen usize > 1.5 vsize.\n\n\t* mpn_mul: Break between classical and Karatsuba's algorithm at\n\tKARATSUBA_THRESHOLD, if defined.  Default to 8.\n\n\t* mpn_div: Kludge to fix stray memory read.\n\nSat Oct 26 20:06:14 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_gcdext: Handle a = b = 0.  Remove memory leakage by calling\n\tmpz_clear for all temporary variables.\n\n\t* mpz_gcd: Reduce w_bcnt in _mpn_lshift call to hold that\n\tfunction's argument constraints.  Compute wsize correctly.\n\n\t* mpz_gcd: Fix typo in comment.\n\n\t* memory.c (_mp_default_allocate, _mp_default_reallocate): Call\n\tabort if allocation fails, don't just exit.\n\nFri Oct 25 22:17:20 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_random2.c: New file.\n\nThu Oct 17 18:06:42 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\tBugs found by Pierre-Joseph Gailly (pjg@sunbim.be):\n\t* mpq_cmp: Take sign into account, don't just compare the\n\tmagnitudes.\n\t* mpq_cmp: Call _mpn_mul in the right way, with the first argument\n\tnot smaller than the second.\n\nWed Oct 16 19:27:32 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_random: Ensure the result is normalized.\n\nTue Oct 15 14:55:13 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_clrbit: Support non-ANSI compilers.\n\nWed Oct  9 18:03:28 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* longlong.h (68k add_ssaaaa, sub_ddmmss): Generalize constraints.\n\nTue Oct  8 17:42:59 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_mdm_ui: Add comments.\n\n\t* mpz_mdiv: Use MPZ_TMP_INIT instead of mpz_init.\n\t* mpz_init_ui: Change spacing and header comment.\n\nThu Oct  3 18:36:13 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* dist-Makefile: Prepend `./' before some filenames.\n\nSun Sep 29 14:02:11 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\tReleased 1.1 (public).\n\n\t* mpz_com: New name of mpz_not.\n\t* dist-Makefile: Change mpz_not to mpz_com.\n\nTue Sep 24 12:44:11 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* longlong.h: Fix header comment.\n\nMon Sep  9 15:16:24 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\tReleased 1.0.92.\n\n\t* mpn_mul.c (_mpn_mul): Handle leading zero limbs in non-Karatsuba\n\tcase.\n\n\t* longlong.h (m68000 umul_ppmm): Clobber one register less by\n\tslightly rearranging the code.\n\nSun Sep  1 18:53:25 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* dist-Makefile (stamp-stddefh): Fix typo.\n\nSat Aug 31 20:41:31 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\tReleased 1.0.91.\n\n\t* mpz_mdiv.c, mpz_mmod.c, mpz_mdm.c, mpz_mdiv_ui.c,\n\t  mpz_mmod_ui.c, mpz_mdm_ui.c: New files and functions.\n\t* gmp.h, gmp.texi: Define the new functions.\n\nFri Aug 30 08:32:56 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_gcdext: Compute t argument from the other quantities at the\n\t  end, of the function, not in the loop.  New feature: Allow t to be\n\t  NULL.\n\n\t* mpz_add.c, mpz_sub.c, mpz_mul.c, mpz_powm.c, mpz_gcd.c: Don't\n\t  include \"mp.h\".  Use type name `MP_INT' always.\n\n\t* dist-Makefile, mpz_cmp.c: Merge mcmp.c from mpz_cmp.c.\n\nWed Aug 28 00:45:11 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* dist-Makefile (documentation): Go via tmp.texi to avoid the\n\t  creation of gmp.dvi if any errors occur.  Make tex read input\n\t  from /dev/null.\n\nFri Aug 23 15:58:52 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* longlong.h (68020, i386): Don't define machine-dependent\n\t  __umulsidi3 (so the default definition is used).\n\t* longlong.h (all machines): Cast all operands, sources and\n\t  destinations, to `unsigned long int'.\n\t* longlong.h: Add gmicro support.\n\nThu Aug 22 00:28:29 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* longlong.h: Rename BITS_PER_LONG to LONG_TYPE_SIZE.\n\t* longlong.h (__ibm032__): Define count_leading_zeros and umul_ppmm.\n\t* longlong.h: Define UMUL_TIME and UDIV_TIME for some CPUs.\n\t* _mpz_get_str.c: Add code to do division by big_base using only\n\t  umul_qrnnd, if that is faster.  Use UMUL_TIME and UDIV_TIME to\n\t  decide which variant to use.\n\nWed Aug 21 15:45:23 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* longlong.h (__sparc__ umul_ppmm): Move two insn from end to the\n\t  nops.  (Saves two insn.)\n\n\t* longlong.h (__sparc__ umul_ppmm): Rewrite in order to avoid\n\t  branch, and to permit input/output register overlap.\n\n\t* longlong.h (__29k__): Remove duplicated udiv_qrnnd definition.\n\t* longlong.h (__29k__ umul_ppmm): Split asm instructions into two\n\t  asm statements (gives better code if either the upper or lower\n\t  part of the product is unused.\n\nTue Aug 20 17:57:59 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* _mpz_get_str.c (outside of functions): Remove\n\t  num_to_ascii_lower_case and num_to_ascii_upper_case.  Use string\n\t  constants in the function instead.\n\nMon Aug 19 00:37:42 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* cre-conv-tab.c (main): Output table in hex.  Output 4 fields, not\n\t  3, for components 0 and 1.\n\n\t* gmp.h: Add declaration of mpq_neg.\n\n\tReleased 1.0beta.13.\n\n\t* _mpz_set_str.c (mpz_set_str): Cast EOF and SPC to char before\n\t  comparing to enum literals SPC and EOF.  This makes the code work\n\t  for compilers where `char' is unsigned.  (Bug found by Brian\n\t  Beuning).\n\n\tReleased 1.0beta.12.\n\n\t* mpz_mod_ui: Remove references to quot.  Remove quot_ptr, quot_size\n\t  declarations and assignment code.\n\nSun Aug 18 14:44:26 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_mod_ui: Handle dividend < 0.\n\n\tReleased 1.0beta.11.\n\n\t* mpz_dm_ui, mpz_div_ui, mpz_mod_ui, sdiv: Make them share the same\n\t  general structure, variable names, etc.\n\n\t* sdiv: Un-normalize the remainder in n1 before it is negated.\n\n\t* longlong.h: Mention UDIV_NEEDS_NORMALIZATION in description of\n\t  udiv_qrnnd.\n\n\t* mpz_dm_ui.c (mpz_divmod_ui), mpz_div_ui.c (mpz_div_ui): Increment\n\t  the quotient size if the dividend size is incremented.  (Bug found\n\t  by Brian Beuning.)\n\n\t* mpz_mod_ui: Shift back the remainder, if UDIV_NEEDS_NORMALIZATION.\n\t  (Bug found by Brian Beuning.)\n\n\t* mpz_mod_ui: Replace \"digit\" by \"limb\".\n\n\t* mpz_perfsqr.c (mpz_perfect_square_p): Disable second test case\n\t  for non-32-bit machines (PP is hardwired for such machines).\n\t* mpz_perfsqr.c (outside of functions): Define PP value with an L.\n\n\t* mpn_mul.c (_mpn_mul): Add verification code that is activated if\n\t  DEBUG is defined.  Replace \"digit\" by \"limb\".\n\t* mpn_mul.c (_mpn_mul: Karatsuba's algorithm: 4.): Normalize temp\n\t  after the addition.\n\t* mpn_mul.c (_mpn_mul: Karatsuba's algorithm: 1.): Compare u0_size\n\t  and v0_size, and according to the result, swap arguments in\n\t  recursive call.  (Don't violate mpn_mul's own argument\n\t  constraints.)\n\nFri Aug 16 13:47:12 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\tReleased 1.0beta.10.\n\n\t* longlong.h (IBMR2): Add udiv_qrnnd.\n\n\t* mpz_perfsqr: Remove unused variables.\n\n\t* mpz_and (case for different signs): Initialize loop variable i!\n\n\t* dist-Makefile: Update automatically generated dependencies.\n\t* dist-Makefile (madd.c, msub.c, pow.c, mult.c, gcd.c): Add mp.h,\n\t  etc to dependency file lists.\n\n\t* longlong.h (add_ssaaaa, sub_ddmmss [C default versions]): Make __x\n\t  `unsigned long int'.\n\t* longlong.h: Add `int' after `unsigned' and `long' everywhere.\n\nWed Aug 14 18:06:48 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* longlong.h: Add ARM, i860 support.\n\n\t* mpn_lshift, mpn_rshift, mpn_rshiftci: Rename *_word with *_limb.\n\nTue Aug 13 21:57:43 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* _mpz_get_str.c, _mpz_set_str.c, mpz_sizeinb.c (mpz_sizeinbase),\n\t  mpz_out_str.c, mout.c: Remove declaration of __mp_bases.\n\t* gmp-impl.h: Put it here, and make it `const'.\n\t* cre-conv-tab.c (main): Make struct __mp_bases `const'.\n\nMon Aug 12 17:11:46 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* cre-conv-tab.c (main): Use %lu in printf for long ints.\n\n\t* dist-Makefile: Fix cre-* dependencies.\n\n\t* cre-conv-tab.c (main): Output field big_base_inverted.\n\n\t* gmp-impl.h (struct bases): New field big_base_inverted.\n\t* gmp-impl.h (struct bases): Change type of chars_per_limb_exactly\n\t  to float (in order to keep the structure smaller).\n\n\t* mp.h, gmp.h: Change names of macros for avoiding multiple\n\t  includes.\n\nFri Aug  9 18:01:36 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* _mpz_get_str: Only shift limb array if normalization_steps != 0\n\t  (optimization).\n\n\t* longlong.h (sparc umul_ppmm): Use __asm__, not asm.\n\t* longlong.h (IBMR2 umul_ppmm): Refer to __m0 and __m1, not to m0\n\t  and m1 (overlap between output and input operands did not work).\n\t* longlong.h: Add VAX, ROMP and HP-PA support.\n\t* longlong.h: Sort the machine dependent code in alphabetical order\n\t  on the CPU name.\n\t* longlong.h: Hack comments.\n\nThu Aug  8 14:13:36 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\tReleased 1.0beta.9.\n\n\t* longlong.h: Define BITS_PER_LONG to 32 if it's not already\n\t  defined.\n\t* Define __BITS4 to BITS_PER_LONG / 4.\n\t* Don't assume 32 bit word size in \"count_leading_zeros\" C macro.\n\t  Use __BITS4 and BITS_PER_LONG instead.\n\n\t* longlong.h: Don't #undef internal macros (reverse change of Aug 3).\n\n\t* longlong.h (68k): Define add_ssaaaa sub_ddmmss, and umul_ppmm\n\t  even for plain mc68000.\n\n\t* mpq_div: Flip the sign of the numerator *and* denominator of the\n\t  result if the intermediate denominator is negative.\n\n\t* mpz_and.c, mpz_ior.c: Use MPN_COPY for all copying operations.\n\n\t* mpz_and.c: Compute the result size more conservatively.\n\t* mpz_ior.c: Likewise.\n\n\t* mpz_realloc: Never allocate zero space even if NEW_SIZE == 0.\n\n\t* dist-Makefile: Remove madd.c, msub.c, pow.c, mult.c, gcd.c from\n\t  BSDMP_SRCS.\n\n\t* dist-Makefile: Create mult.c from mpz_mul.c.\n\t* mult.c: Delete this file.\n\n\t* _mpz_set_str: Normalize the result (for bases 2, 4, 8... it was\n\t  not done properly if the input string had many leading zeros).\n\nSun Aug  4 16:54:14 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* dist-Makefile (gcd.c, pow.c, madd.c, msub.c): Make these targets\n\t  work with VPATH and GNU MP.\n\n\t* mpz_gcd: Don't call mpz_set; inline its functionality.\n\n\t* mpq_mul, mpq_div: Fix several serious typos.\n\n\t* mpz_dmincl, mpz_div: Don't normalize the quotient if it's already\n\t  zero.\n\n\t* mpq_neg.c: New file.\n\n\t* dist-Makefile: Remove obsolete dependencies.\n\n\t* mpz_sub: Fix typo.\n\n\tBugs found by Pierre-Joseph Gailly (pjg@sunbim.be):\n\t* mpq_mul, mpq_div: Initialize tmp[12] variables even when the gcd\n\t  is just 1.\n\t* mpz_gcd: Handle gcd(0,v) and gcd(u,0) in special cases.\n\nSat Aug  3 23:45:28 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* longlong.h: Clean up comments.\n\t* longlong.h: #undef internal macros.\n\nFri Aug  2 18:29:11 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpq_set_si, mpq_set_ui: Canonicalize 0/x to 0/1.\n\t* mpq_set_si, mpq_set_ui: Cosmetic formatting changes.\n\n\t* mpz_dmincl.c: Normalize the remainder before shifting it back.\n\n\t* mpz_dm_ui.c (mpz_divmod_ui): Handle rem == dividend.\n\n\t* mpn_div.c: Fix comment.\n\n\t* mpz_add.c, mpz_sub.c: Use __MP_INT (not MP_INT) for intermediate\n\t  type, in order to work for both GNU and Berkeley functions.\n\n\t* dist-Makefile: Create gcd.c from mpz_gcd.c, pow.c from mpz_powm,\n\t  madd.c from mpz_add.c, msub.c from mpz_sub.c.\n\t  respectively.\n\t* pow.c, gcd.c, mpz_powmincl.c, madd.c, msub.c: Remove these.\n\t* mpz_powm.c, mpz_gcd.c, mpz_add.c, mpz_sub.c: #ifdef for GNU and\n\t  Berkeley function name variants.\n\t* dist-Makefile: Add created files to \"clean\" target.\n\nTue Jul 16 15:19:46 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpq_get_den: No need for absolute value of the size, the\n\t  denominator is always positive.\n\n\t* mpz_get_ui: If the operand is zero, return zero.  Don't read the\n\t  limb array!\n\n\t* mpz_dmincl.c: Don't ignore the return value from _mpn_rshift, it\n\t  is the size of the remainder.\n\nMon Jul 15 11:08:05 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* Several files: Remove unused variables and functions.\n\n\t* gmp-impl.h: Declare _mpz_impl_sqrt.\n\n\t* mpz_dm_ui (mpz_divmod_ui), sdiv: Shift back the remainder if\n\t  UDIV_NEEDS_NORMALIZATION.  (Fix from Brian Beuning.)\n\n\t* mpz_dm_ui.c, sdiv: Replace *digit with *limb.\n\n\t* mpz_ior: Add missing else statement in -OP1 | -OP2 case.\n\t* mpz_ior: Add missing else statement in OP1 | -OP2 case.\n\t* mpz_ior: Swap also OP1 and OP2 pointers in -OP1 & OP2 case.\n\t* mpz_ior: Duplicate _mpz_realloc code.\n\n\t* mpz_and: Add missing else statement in -OP1 & -OP2 case.\n\t* mpz_and: Rewrite OP1 & -OP2 case.\n\t* mpz_and: Swap also OP1 and OP2 pointers in -OP1 & OP2 case.\n\n\t* mpz_gcdext: Loop in d1.size (not b->size).  (Fix from Brian\n\t  Beuning.)\n\n\t* mpz_perfsqr: Fix argument order in _mpz_impl_sqrt call.  (Fix from\n\t  Brian Beuning.)\n\nFri Jul 12 17:10:33 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpq_set.c, mpq_set_ui.c, mpq_set_si.c, mpq_inv.c,\n\t  mpq_get_num.c, mpq_get_den.c, mpq_set_num.c, mpq_set_den.c:\n\t  New files.\n\n\t* mpz_dmincl.c: Remove second re-allocation of rem->d.  It\n\t  was never executed.\n\n\t* dist-Makefile: Use `-r' instead of `-x' for test for ranlib (as\n\t  some unixes' test doesn't have the -r option).\n\n\t* *.*: Cast allocated pointers to the appropriate type (makes old C\n\t  compilers happier).\n\n\t* cre-conv-tab.c (main): Divide max_uli by 2 and multiply again\n\t  after conversion to double.  (Kludge for broken C compilers.)\n\n\t* dist-Makefile (stamp-stddefh): New target.  Test if \"stddef.h\"\n\t  exists in the system and creates a minimal one if it does not\n\t  exist.\n\t* cre-stddefh.c: New file.\n\t* dist-Makefile: Make libgmp.a and libmp.a depend on stamp-stddefh.\n\t* dist-Makefile (clean): Add some more.\n\t* gmp.h, mp.h: Unconditionally include \"stddef.h\".\n\nThu Jul 11 10:08:21 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* min: Do ungetc of last read character.\n\t* min.c: include stdio.h.\n\n\t* dist-Makefile: Go via tmp- files for cre* redirection.\n\t* dist-Makefile: Add tmp* to \"clean\" target.\n\n\t* dist-Makefile: Use LOCAL_CC for cre*, to simplyfy cross\n\t  compilation.\n\n\t* gmp.h, mp.h: Don't define NULL here.\n\t* gmp-impl.h: Define it here.\n\nWed Jul 10 14:13:33 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_mod_2exp: Don't copy too much, overwriting most significant\n\t  limb.\n\n\t* mpz_and, mpz_ior: Don't read op[12]_ptr from op[12] when\n\t  reallocating res, if op[12]_ptr got their value from alloca.\n\n\t* mpz_and, mpz_ior: Clear up comments.\n\n\t* cre-mparam.c: Output parameters for `short int' and `int'.\n\n\t* mpz_and, mpz_ior: Negate negative op[12]_size in several places.\n\nTue Jul  9 18:40:30 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* gmp.h, mp.h: Test for _SIZE_T defined before typedef'ing size_t.\n\t  (Fix for Sun lossage.)\n\n\t* gmp.h: Add declaration of mpq_clear.\n\n\t* dist-Makefile: Chack if \"ranlib\" exists, before using it.\n\t* dist-Makefile: Add mpz_sqrtrem.c and mpz_size.c.\n\t* mpz_powm: Fix typo, \"pow\" instead of \"mpz_powm\".\n\nFri Jul  5 19:08:09 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* move: Remove incorrect comment.\n\n\t* mpz_free, mpq_free: Rename to *_clear.\n\t* dist-Makefile: Likewise.\n\t* mpq_add, mpq_sub, mpq_mul, mpq_div: Likewise.\n\n\t* mpz_dmincl.c: Don't call \"move\", inline its functionality.\n\nThu Jul  4 00:06:39 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* Makefile: Include dist-Makefile.  Fix dist target to include\n\t  dist-Makefile (with the name \"Makefile\" in the archive).\n\n\t* dist-Makefile: New file made from Makefile.  Add new mpz_...\n\t  functions.\n\n\t* mpz_powincl.c New file for mpz_powm (Berkeley MP pow)\n\t  functionality.  Avoids code duplication.\n\t* pow.c, mpz_powm.c: Include mpz_powincl.c\n\n\t* mpz_dmincl.c: New file containing general division code.  Avoids\n\t  code duplication.\n\t* mpz_dm.c (mpz_divmod), mpz_mod.c (mpz_mod), mdiv.c (mdiv): Include\n\t  mpz_dmincl.c.\n\n\t* _mpz_get_str: Don't call memmove, unless HAS_MEMMOVE is defined.\n\t  Instead, write the overlapping memory copying inline.\n\n\t* mpz_dm_ui.c: New name for mpz_divmod_ui.c (SysV file name limit).\n\n\t* longlong.h: Don't use #elif.\n\t* mpz_do_sqrt.c: Likewise.\n\n\t* longlong.h: Use __asm__ instead of asm.\n\t* longlong.h (sparc udiv_qrnnd): Make it to one string over several\n\t  lines.\n\n\t* longlong.h: Preend __ll_ to B, highpart, and lowpart.\n\n\t* longlong.h: Move array t in count_leading_zeros to the new file\n\t  mp_clz_tab.c.  Rename the array __clz_tab.\n\t* All files: #ifdef for traditional C compatibility.\n\nWed Jul  3 11:42:14 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_and: Initialize res_ptr always (used to be initialized only\n\t  when reallocating).\n\n\t* longlong.h (umul_ppmm [C variant]): Make __ul...__vh\n\t  `unsigned int', and cast the multiplications.  This way\n\t  compilers more easily can choose cheaper multiplication\n\t  instructions.\n\n\t* mpz_mod_2exp: Handle input argument < modulo argument.\n\t* mpz_many: Make sure mp_size is the type for sizes, not int.\n\n\t* mpz_init, mpz_init_set*, mpq_init, mpq_add, mpq_sub, mpq_mul,\n\t  mpq_div: Change mpz_init* interface.  Structure pointer as first\n\t  arg to initialization function, no longer *return* struct.\n\nSun Jun 30 19:21:44 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* Rename mpz_impl_sqrt.c to mpz_do_sqrt.c to satisfy SysV 14\n\t  character file name length limit.\n\n\t* Most files: Rename MINT to MP_INT.  Rename MRAT to MP_RAT.\n\t* mpz_sizeinb.c: New file with function mpz_sizeinbase.\n\t* mp_bases.c: New file, with array __mp_bases.\n\t* _mpz_get_str, _mpz_set_str: Remove struct bases, use extern\n\t  __mp_bases instead.\n\t* mout, mpz_out_str: Use array __mp_bases instead of function\n\t  _mpz_get_cvtlen.\n\t* mpz_get_cvtlen.c: Remove.\n\t* Makefile: Update.\n\nSat Jun 29 21:57:28 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* longlong.h (__sparc8__ umul_ppmm): Insert 3 nop:s for wr delay.\n\t* longlong.h (___IBMR2__): Define umul_ppmm, add_ssaaaa, sub_ddmmss.\n\t* longlong.h (__sparc__): Don't call .umul; expand asm instead.\n\t  Don't define __umulsidi3 (i.e. use default definition).\n\nMon Jun 24 17:37:23 1991  Torbjorn Granlund  (tege@amon.sics.se)\n\n\t* _mpz_get_str.c (num_to_ascii_lower_case, num_to_ascii_upper_case):\n\t  Swap 't' and 's'.\n\nSat Jun 22 13:54:01 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_gcdext.c: New file.\n\n\t* mpn_mul: Handle carry and unexpected operand sizes in last\n\t  additions/subtractions.  (Bug trigged when v1_size == 1.)\n\n\t* mp*_alloc*: Rename functions to mp*_init* (files to mp*_iset*.c).\n\t* mpq_*: Call mpz_init*.\n\n\t* mpz_pow_ui, rpow: Use _mpn_mul instead of mult.  Restructure.\n\nWed May 29 20:32:33 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_get_cvtlen: multiply by size.\n\nSun May 26 15:01:15 1991  Torbjorn Granlund  (tege@bella.nada.kth.se)\n\n\tAlpha-release 0.95.\n\n\tFixes from Doug Lea (dl@g.oswego.edu):\n\t* mpz_mul_ui: Loop to MULT_SIZE (not PROD_SIZE).  Adjust PROD_SIZE\n\t  correctly.\n\t* mpz_div: Prepend _ to mpz_realloc.\n\t* mpz_set_xs, mpz_set_ds: Fix typos in function name.\n\nSat May 25 22:51:16 1991  Torbjorn Granlund  (tege@bella.nada.kth.se)\n\n\t* mpz_divmod_ui: New function.\n\n\t* sdiv: Make the sign of the remainder correct.\n\nThu May 23 15:28:24 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* Alpha-release 0.94.\n\n\t* mpz_mul_ui: Include longlong.h.\n\n\t* mpz_perfsqr.c (mpz_perfect_square_p): Call _mpz_impl_sqrt instead\n\t  of msqrt.\n\n\t* mpz_impl_sqrt: Don't call \"move\", inline its functionality.\n\n\t* mdiv: Use MPN_COPY instead of memcpy.\n\t* rpow, mpz_mul, mpz_mod_2exp: Likewise.\n\t* pow.c: Likewise, and fix bug in the size arg.\n\n\t* xtom: Don't use mpz_alloc, inline needed code instead.  Call\n\t  _mpz_set_str instead of mpz_set_str.\n\n\t* Makefile: Make two libraries, libmp.a and libgmp.a.\n\nThu May 22 20:25:29 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* Add manual to distribution.\n\t* Fold in many missing routines descibed in the manual.\n\t* Update Makefile.\n\nWed May 22 13:48:46 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_set_str: Make it handle 0x prefix OK.\n\nSat May 18 18:31:02 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* memory.c (_mp_default_reallocate): Swap OLD_SIZE and NEW_SIZE\n\t  arguments.\n\t* mpz_realloc (_mpz_realloc): Swap in call to _mp_reallocate_func.\n\t* min: Likewise.\n\nThu May 16 20:43:05 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* memory.c: Make the default allocations functions global.\n\t* mp_set_fns (mp_set_memory_functions): Make a NULL pointer mean the\n\t  default memory function.\n\nWed May  8 20:02:42 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_div: Handle DEN the same as QUOT correctly by copying DEN->D\n\t  even if no normalization is needed.\n\t* mpz_div: Rework reallocation scheme, to avoid excess copying.\n\n\t* mpz_sub_ui.c, mpz_add_ui.c: New files.\n\n\t* mpz_cmp.c, mpz_cmp_ui.c: New files.\n\n\t* mpz_mul_2exp: Handle zero input MINT correctly.\n\n\t* mpn_rshiftci: Don't handle shift counts > BITS_PER_MP_DIGIT.\n\n\t* mpz_out_raw.c, mpz_inp_raw.c: New files for raw I/O.\n\nTue May  7 15:44:58 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpn_rshift: Don't handle shift counts > BITS_PER_MP_DIGIT.\n\t* mpz_div_2exp: Don't call _mpn_rshift with cnt > BITS_PER_MP_DIGIT.\n\t* gcd, mpz_gcd: Likewise.\n\n\t* gcd, mpz_gcd: Handle common 2 factors correctly.\n\nMon May  6 20:22:59 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* gmp-impl.h (MPN_COPY): Inline a loop instead of calling memcpy.\n\n\t* gmp-impl.h, mpz_get_str, rpow: Swap DST and SRC in TMPCOPY* macros.\n\nSun May  5 15:16:23 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpz_div: Remove test for QUOT == 0.\n\nSun Apr 28 20:21:04 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* pow: Don't make MOD normalization in place, as it's a bad idea to\n\t  write on an input parameter.\n\t* pow: Reduce BASE if it's > MOD.\n\t* pow, mult, mpz_mul: Simplify realloc code.\n\nSat Apr 27 21:03:11 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* Install multplication using Karatsuba's algorithm as default.\n\nFri Apr 26 01:03:57 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* msqrt: Store in ROOT even for U==0, to make msqrt(0) defined.\n\n\t* mpz_div_2exp.c, mpz_mul_2exp.c: New files for shifting right and\n\t  left, respectively.\n\t* gmp.h: Add definitions for mpz_div_2exp and mpz_mul_2exp.\n\n\t* mlshift.c, mrshift.c: Remove.\n\nWed Apr 24 21:39:22 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* mpn_mul: Check only for m2_size == 0 in function header.\n\nMon Apr 22 01:31:57 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* karatsuba.c: New file for Karatsuba's multplication algorithm.\n\n\t* mpz_random, mpz_init, mpz_mod_2exp: New files and functions.\n\n\t* mpn_cmp: Fix header comment.\n\nSun Apr 21 00:10:44 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* pow: Switch off initial base reduction.\n\nSat Apr 20 22:06:05 1991  Torbjorn Granlund  (tege@echnaton.sics.se)\n\n\t* mpz_get_str: Don't generate initial zeros for initial word.\n\t  Used to write outside of allocated storage.\n\nMon Apr 15 15:48:08 1991  Torbjorn Granlund  (tege@zevs.sics.se)\n\n\t* _mpz_realloc: Make it accept size in number of mp_digits.\n\t* Most functions: Use new _mpz_realloc definition.\n\n\t* mpz_set_str: Remove calls _mp_free_func.\n\n\t* Most functions: Rename mpn_* to _mpn_*.  Rename mpz_realloc to\n\t  _mpz_realloc.\n\t* mpn_lshift: Redefine _mpn_lshift to only handle small shifts.\n\t* mdiv, mpz_div, ...: Changes for new definition of _mpn_lshift.\n\t* msqrt, mp*_*shift*: Define cnt as unsigned (for speed).\n\nSat Apr  6 14:05:16 1991  Torbjorn Granlund  (tege@musta.nada.kth.se)\n\n\t* mpn_mul: Multiply by the first digit in M2 in a special\n\t  loop instead of zeroing the product area.\n\n\t* mpz_abs.c: New file.\n\n\t* sdiv: Implement as mpz_div_si for speed.\n\n\t* mpn_add: Make it work for second source operand == 0.\n\n\t* msub: Negate the correct operand, i.e. V before swapping, not\n\t  the smaller of U and V!\n\t* madd, msub: Update abs_* when swapping operands, and not after\n\t  (optimization).\n\nFri Apr  5 00:19:36 1991  Torbjorn Granlund  (tege@black.nada.kth.se)\n\n\t* mpn_sub: Make it work for subtrahend == 0.\n\n\t* madd, msub: Rewrite to minimize mpn_cmp calls.  Ensure\n\t  mpn_cmp is called with positive sizes (used to be called\n\t  incorrectly with negative sizes sometimes).\n\n\t* msqrt: Make it divide by zero if fed with a negative number.\n\t* Remove if statement at end of precision calculation that was\n\t  never true.\n\n\t* itom, mp.h: The argument is of type short, not int.\n\n\t* mpz_realloc, gmp.h: Make mpz_realloc return the new digit pointer.\n\n\t* mpz_get_str.c, mpz_set_str.c, mpz_new_str.c: Don't include mp.h.\n\n\t* Add COPYING to distribution.\n\n\t* mpz_div_ui.c, mpz_div_si.c, mpz_new_ui.c, mpz_new_si.c: New files.\n\nFri Mar 15 00:26:29 1991  Torbjorn Granlund  (tege@musta.nada.kth.se)\n\n\t* Add Copyleft headers to all files.\n\n\t* mpn_mul.c, mpn_div.c: Add header comments.\n\t* mult.c, mdiv.c: Update header comments.\n\n\t* mpq_add.c, mpq_sub.c, mpq_div.c, mpq_new.c, mpq_new_ui.c,\n\t  mpq_free.c: New files for rational arithmetics.\n\n\t* mpn_lshift.c: Avoid writing the most significant word if it is 0.\n\n\t* mdiv.c: Call mpn_lshift for the normalization.\n\t* mdiv.c: Remove #ifdefs.\n\n\t* Makefile: Add ChangeLog to DISTFILES.\n\n\t* mpn_div.c: Make the add_back code work (by removing abort()).\n\t* mpn_div.c: Make it return if the quotient is size as compared\n\t  with the difference NSIZE - DSIZE.  If the stored quotient is\n\t  larger than that, return 1, otherwise 0.\n\t* gmp.h: Fix mpn_div declaration.\n\t* mdiv.c: Adopt call to mpn_div.\n\t* mpz_div.c: New file (developed from mdiv.c).\n\n\t* README: Update routine names.\n\nThu Mar 14 18:45:28 1991  Torbjorn Granlund  (tege@musta.nada.kth.se)\n\n\t* mpq_mul.c: New file for rational multplication.\n\n\t* gmp.h: Add definitions for rational arithmetics.\n\n\t* mpn_div: Kludge the case where the high numerator digit > the\n\t  high denominator digit.  (This code is going to be optimized later.)\n\n\t* New files: gmp.h for GNU specific functions, gmp-common.h for\n\t  definitions common for mp.h and gmp.h.\n\n\t* Ensure mp.h just defines what BSD mp.h defines.\n\n\t* pow.c: Fix typo for bp allocation.\n\n\t* Rename natural number functions to mpn_*, integer functions to\n\t  mpz_*.\n\nTue Mar  5 18:47:04 1991  Torbjorn Granlund  (tege@musta.nada.kth.se)\n\n\t* mdiv.c (_mp_divide, case 2): Change test for estimate of Q from\n\t  \"n0 >= r\" to \"n0 > r\".\n\n\t* msqrt: Tune the increasing precision scheme, to do fewer steps.\n\nTue Mar  3 18:50:10 1991  Torbjorn Granlund  (tege@musta.nada.kth.se)\n\n\t* msqrt: Use the low level routines.  Use low precision in the\n\tbeginning, and increase the precision as the result converges.\n\t(This optimization gave a 6-fold speedup.)\n\n\n\f\nLocal Variables:\nmode: indented-text\nleft-margin: 8\nfill-column: 79\nversion-control: never\neval: (unless (string-match \"GNU Emacs \\\\(21\\.[4-9]\\\\|22\\.\\\\)\" (emacs-version))\n\t(save-excursion\n\t  (goto-char (point-min))\n\t  (when (looking-at \"Copyright\")\n\t    (search-forward \"\\n\\n\")\n\t    (skip-chars-forward \"\\n\")\n\t    (narrow-to-region (point) (point-max))\n\t    (message \"Note! narrowed to hide copyright notice\"))))\nEnd:\n"
  },
  {
    "path": "INSTALL",
    "content": "Installation Instructions\n*************************\n\nCopyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation,\nInc.\n\n   Copying and distribution of this file, with or without modification,\nare permitted in any medium without royalty provided the copyright\nnotice and this notice are preserved.  This file is offered as-is,\nwithout warranty of any kind.\n\nBasic Installation\n==================\n\n   Briefly, the shell command `./configure && make && make install'\nshould configure, build, and install this package.  The following\nmore-detailed instructions are generic; see the `README' file for\ninstructions specific to this package.  Some packages provide this\n`INSTALL' file but do not implement all of the features documented\nbelow.  The lack of an optional feature in a given package is not\nnecessarily a bug.  More recommendations for GNU packages can be found\nin *note Makefile Conventions: (standards)Makefile Conventions.\n\n   The `configure' shell script attempts to guess correct values for\nvarious system-dependent variables used during compilation.  It uses\nthose values to create a `Makefile' in each directory of the package.\nIt may also create one or more `.h' files containing system-dependent\ndefinitions.  Finally, it creates a shell script `config.status' that\nyou can run in the future to recreate the current configuration, and a\nfile `config.log' containing compiler output (useful mainly for\ndebugging `configure').\n\n   It can also use an optional file (typically called `config.cache'\nand enabled with `--cache-file=config.cache' or simply `-C') that saves\nthe results of its tests to speed up reconfiguring.  Caching is\ndisabled by default to prevent problems with accidental use of stale\ncache files.\n\n   If you need to do unusual things to compile the package, please try\nto figure out how `configure' could check whether to do them, and mail\ndiffs or instructions to the address given in the `README' so they can\nbe considered for the next release.  If you are using the cache, and at\nsome point `config.cache' contains results you don't want to keep, you\nmay remove or edit it.\n\n   The file `configure.ac' (or `configure.in') is used to create\n`configure' by a program called `autoconf'.  You need `configure.ac' if\nyou want to change it or regenerate `configure' using a newer version\nof `autoconf'.\n\n   The simplest way to compile this package is:\n\n  1. `cd' to the directory containing the package's source code and type\n     `./configure' to configure the package for your system.\n\n     Running `configure' might take a while.  While running, it prints\n     some messages telling which features it is checking for.\n\n  2. Type `make' to compile the package.\n\n  3. Optionally, type `make check' to run any self-tests that come with\n     the package, generally using the just-built uninstalled binaries.\n\n  4. Type `make install' to install the programs and any data files and\n     documentation.  When installing into a prefix owned by root, it is\n     recommended that the package be configured and built as a regular\n     user, and only the `make install' phase executed with root\n     privileges.\n\n  5. Optionally, type `make installcheck' to repeat any self-tests, but\n     this time using the binaries in their final installed location.\n     This target does not install anything.  Running this target as a\n     regular user, particularly if the prior `make install' required\n     root privileges, verifies that the installation completed\n     correctly.\n\n  6. You can remove the program binaries and object files from the\n     source code directory by typing `make clean'.  To also remove the\n     files that `configure' created (so you can compile the package for\n     a different kind of computer), type `make distclean'.  There is\n     also a `make maintainer-clean' target, but that is intended mainly\n     for the package's developers.  If you use it, you may have to get\n     all sorts of other programs in order to regenerate files that came\n     with the distribution.\n\n  7. Often, you can also type `make uninstall' to remove the installed\n     files again.  In practice, not all packages have tested that\n     uninstallation works correctly, even though it is required by the\n     GNU Coding Standards.\n\n  8. Some packages, particularly those that use Automake, provide `make\n     distcheck', which can by used by developers to test that all other\n     targets like `make install' and `make uninstall' work correctly.\n     This target is generally not run by end users.\n\nCompilers and Options\n=====================\n\n   Some systems require unusual options for compilation or linking that\nthe `configure' script does not know about.  Run `./configure --help'\nfor details on some of the pertinent environment variables.\n\n   You can give `configure' initial values for configuration parameters\nby setting variables in the command line or in the environment.  Here\nis an example:\n\n     ./configure CC=c99 CFLAGS=-g LIBS=-lposix\n\n   *Note Defining Variables::, for more details.\n\nCompiling For Multiple Architectures\n====================================\n\n   You can compile the package for more than one kind of computer at the\nsame time, by placing the object files for each architecture in their\nown directory.  To do this, you can use GNU `make'.  `cd' to the\ndirectory where you want the object files and executables to go and run\nthe `configure' script.  `configure' automatically checks for the\nsource code in the directory that `configure' is in and in `..'.  This\nis known as a \"VPATH\" build.\n\n   With a non-GNU `make', it is safer to compile the package for one\narchitecture at a time in the source code directory.  After you have\ninstalled the package for one architecture, use `make distclean' before\nreconfiguring for another architecture.\n\n   On MacOS X 10.5 and later systems, you can create libraries and\nexecutables that work on multiple system types--known as \"fat\" or\n\"universal\" binaries--by specifying multiple `-arch' options to the\ncompiler but only a single `-arch' option to the preprocessor.  Like\nthis:\n\n     ./configure CC=\"gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64\" \\\n                 CXX=\"g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64\" \\\n                 CPP=\"gcc -E\" CXXCPP=\"g++ -E\"\n\n   This is not guaranteed to produce working output in all cases, you\nmay have to build one architecture at a time and combine the results\nusing the `lipo' tool if you have problems.\n\nInstallation Names\n==================\n\n   By default, `make install' installs the package's commands under\n`/usr/local/bin', include files under `/usr/local/include', etc.  You\ncan specify an installation prefix other than `/usr/local' by giving\n`configure' the option `--prefix=PREFIX', where PREFIX must be an\nabsolute file name.\n\n   You can specify separate installation prefixes for\narchitecture-specific files and architecture-independent files.  If you\npass the option `--exec-prefix=PREFIX' to `configure', the package uses\nPREFIX as the prefix for installing programs and libraries.\nDocumentation and other data files still use the regular prefix.\n\n   In addition, if you use an unusual directory layout you can give\noptions like `--bindir=DIR' to specify different values for particular\nkinds of files.  Run `configure --help' for a list of the directories\nyou can set and what kinds of files go in them.  In general, the\ndefault for these options is expressed in terms of `${prefix}', so that\nspecifying just `--prefix' will affect all of the other directory\nspecifications that were not explicitly provided.\n\n   The most portable way to affect installation locations is to pass the\ncorrect locations to `configure'; however, many packages provide one or\nboth of the following shortcuts of passing variable assignments to the\n`make install' command line to change installation locations without\nhaving to reconfigure or recompile.\n\n   The first method involves providing an override variable for each\naffected directory.  For example, `make install\nprefix=/alternate/directory' will choose an alternate location for all\ndirectory configuration variables that were expressed in terms of\n`${prefix}'.  Any directories that were specified during `configure',\nbut not in terms of `${prefix}', must each be overridden at install\ntime for the entire installation to be relocated.  The approach of\nmakefile variable overrides for each directory variable is required by\nthe GNU Coding Standards, and ideally causes no recompilation.\nHowever, some platforms have known limitations with the semantics of\nshared libraries that end up requiring recompilation when using this\nmethod, particularly noticeable in packages that use GNU Libtool.\n\n   The second method involves providing the `DESTDIR' variable.  For\nexample, `make install DESTDIR=/alternate/directory' will prepend\n`/alternate/directory' before all installation names.  The approach of\n`DESTDIR' overrides is not required by the GNU Coding Standards, and\ndoes not work on platforms that have drive letters.  On the other hand,\nit does better at avoiding recompilation issues, and works well even\nwhen some directory options were not specified in terms of `${prefix}'\nat `configure' time.\n\nOptional Features\n=================\n\n   If the package supports it, you can cause programs to be installed\nwith an extra prefix or suffix on their names by giving `configure' the\noption `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.\n\n   Some packages pay attention to `--enable-FEATURE' options to\n`configure', where FEATURE indicates an optional part of the package.\nThey may also pay attention to `--with-PACKAGE' options, where PACKAGE\nis something like `gnu-as' or `x' (for the X Window System).  The\n`README' should mention any `--enable-' and `--with-' options that the\npackage recognizes.\n\n   For packages that use the X Window System, `configure' can usually\nfind the X include and library files automatically, but if it doesn't,\nyou can use the `configure' options `--x-includes=DIR' and\n`--x-libraries=DIR' to specify their locations.\n\n   Some packages offer the ability to configure how verbose the\nexecution of `make' will be.  For these packages, running `./configure\n--enable-silent-rules' sets the default to minimal output, which can be\noverridden with `make V=1'; while running `./configure\n--disable-silent-rules' sets the default to verbose, which can be\noverridden with `make V=0'.\n\nParticular systems\n==================\n\n   On HP-UX, the default C compiler is not ANSI C compatible.  If GNU\nCC is not installed, it is recommended to use the following options in\norder to use an ANSI C compiler:\n\n     ./configure CC=\"cc -Ae -D_XOPEN_SOURCE=500\"\n\nand if that doesn't work, install pre-built binaries of GCC for HP-UX.\n\n   HP-UX `make' updates targets which have the same time stamps as\ntheir prerequisites, which makes it generally unusable when shipped\ngenerated files such as `configure' are involved.  Use GNU `make'\ninstead.\n\n   On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot\nparse its `<wchar.h>' header file.  The option `-nodtk' can be used as\na workaround.  If GNU CC is not installed, it is therefore recommended\nto try\n\n     ./configure CC=\"cc\"\n\nand if that doesn't work, try\n\n     ./configure CC=\"cc -nodtk\"\n\n   On Solaris, don't put `/usr/ucb' early in your `PATH'.  This\ndirectory contains several dysfunctional programs; working variants of\nthese programs are available in `/usr/bin'.  So, if you need `/usr/ucb'\nin your `PATH', put it _after_ `/usr/bin'.\n\n   On Haiku, software installed for all users goes in `/boot/common',\nnot `/usr/local'.  It is recommended to use the following options:\n\n     ./configure --prefix=/boot/common\n\nSpecifying the System Type\n==========================\n\n   There may be some features `configure' cannot figure out\nautomatically, but needs to determine by the type of machine the package\nwill run on.  Usually, assuming the package is built to be run on the\n_same_ architectures, `configure' can figure that out, but if it prints\na message saying it cannot guess the machine type, give it the\n`--build=TYPE' option.  TYPE can either be a short name for the system\ntype, such as `sun4', or a canonical name which has the form:\n\n     CPU-COMPANY-SYSTEM\n\nwhere SYSTEM can have one of these forms:\n\n     OS\n     KERNEL-OS\n\n   See the file `config.sub' for the possible values of each field.  If\n`config.sub' isn't included in this package, then this package doesn't\nneed to know the machine type.\n\n   If you are _building_ compiler tools for cross-compiling, you should\nuse the option `--target=TYPE' to select the type of system they will\nproduce code for.\n\n   If you want to _use_ a cross compiler, that generates code for a\nplatform different from the build platform, you should specify the\n\"host\" platform (i.e., that on which the generated programs will\neventually be run) with `--host=TYPE'.\n\nSharing Defaults\n================\n\n   If you want to set default values for `configure' scripts to share,\nyou can create a site shell script called `config.site' that gives\ndefault values for variables like `CC', `cache_file', and `prefix'.\n`configure' looks for `PREFIX/share/config.site' if it exists, then\n`PREFIX/etc/config.site' if it exists.  Or, you can set the\n`CONFIG_SITE' environment variable to the location of the site script.\nA warning: not all `configure' scripts look for a site script.\n\nDefining Variables\n==================\n\n   Variables not defined in a site shell script can be set in the\nenvironment passed to `configure'.  However, some packages may run\nconfigure again during the build, and the customized values of these\nvariables may be lost.  In order to avoid this problem, you should set\nthem in the `configure' command line, using `VAR=value'.  For example:\n\n     ./configure CC=/usr/local2/bin/gcc\n\ncauses the specified `gcc' to be used as the C compiler (unless it is\noverridden in the site shell script).\n\nUnfortunately, this technique does not work for `CONFIG_SHELL' due to\nan Autoconf limitation.  Until the limitation is lifted, you can use\nthis workaround:\n\n     CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash\n\n`configure' Invocation\n======================\n\n   `configure' recognizes the following options to control how it\noperates.\n\n`--help'\n`-h'\n     Print a summary of all of the options to `configure', and exit.\n\n`--help=short'\n`--help=recursive'\n     Print a summary of the options unique to this package's\n     `configure', and exit.  The `short' variant lists options used\n     only in the top level, while the `recursive' variant lists options\n     also present in any nested packages.\n\n`--version'\n`-V'\n     Print the version of Autoconf used to generate the `configure'\n     script, and exit.\n\n`--cache-file=FILE'\n     Enable the cache: use and save the results of the tests in FILE,\n     traditionally `config.cache'.  FILE defaults to `/dev/null' to\n     disable caching.\n\n`--config-cache'\n`-C'\n     Alias for `--cache-file=config.cache'.\n\n`--quiet'\n`--silent'\n`-q'\n     Do not print messages saying which checks are being made.  To\n     suppress all normal output, redirect it to `/dev/null' (any error\n     messages will still be shown).\n\n`--srcdir=DIR'\n     Look for the package's source code in directory DIR.  Usually\n     `configure' can determine that directory automatically.\n\n`--prefix=DIR'\n     Use DIR as the installation prefix.  *note Installation Names::\n     for more details, including other options available for fine-tuning\n     the installation locations.\n\n`--no-create'\n`-n'\n     Run the configure checks, but stop before creating any output\n     files.\n\n`configure' also accepts some other, not widely useful, options.  Run\n`configure --help' for more details.\n"
  },
  {
    "path": "INSTALL.autoconf",
    "content": "Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software\nFoundation, Inc.\n\n   This file is free documentation; the Free Software Foundation gives\nunlimited permission to copy, distribute and modify it.\n\nBasic Installation\n==================\n\n   These are generic installation instructions.\n\n   The `configure' shell script attempts to guess correct values for\nvarious system-dependent variables used during compilation.  It uses\nthose values to create a `Makefile' in each directory of the package.\nIt may also create one or more `.h' files containing system-dependent\ndefinitions.  Finally, it creates a shell script `config.status' that\nyou can run in the future to recreate the current configuration, and a\nfile `config.log' containing compiler output (useful mainly for\ndebugging `configure').\n\n   It can also use an optional file (typically called `config.cache'\nand enabled with `--cache-file=config.cache' or simply `-C') that saves\nthe results of its tests to speed up reconfiguring.  (Caching is\ndisabled by default to prevent problems with accidental use of stale\ncache files.)\n\n   If you need to do unusual things to compile the package, please try\nto figure out how `configure' could check whether to do them, and mail\ndiffs or instructions to the address given in the `README' so they can\nbe considered for the next release.  If you are using the cache, and at\nsome point `config.cache' contains results you don't want to keep, you\nmay remove or edit it.\n\n   The file `configure.ac' (or `configure.in') is used to create\n`configure' by a program called `autoconf'.  You only need\n`configure.ac' if you want to change it or regenerate `configure' using\na newer version of `autoconf'.\n\nThe simplest way to compile this package is:\n\n  1. `cd' to the directory containing the package's source code and type\n     `./configure' to configure the package for your system.  If you're\n     using `csh' on an old version of System V, you might need to type\n     `sh ./configure' instead to prevent `csh' from trying to execute\n     `configure' itself.\n\n     Running `configure' takes awhile.  While running, it prints some\n     messages telling which features it is checking for.\n\n  2. Type `make' to compile the package.\n\n  3. Optionally, type `make check' to run any self-tests that come with\n     the package.\n\n  4. Type `make install' to install the programs and any data files and\n     documentation.\n\n  5. You can remove the program binaries and object files from the\n     source code directory by typing `make clean'.  To also remove the\n     files that `configure' created (so you can compile the package for\n     a different kind of computer), type `make distclean'.  There is\n     also a `make maintainer-clean' target, but that is intended mainly\n     for the package's developers.  If you use it, you may have to get\n     all sorts of other programs in order to regenerate files that came\n     with the distribution.\n\nCompilers and Options\n=====================\n\n   Some systems require unusual options for compilation or linking that\nthe `configure' script does not know about.  Run `./configure --help'\nfor details on some of the pertinent environment variables.\n\n   You can give `configure' initial values for configuration parameters\nby setting variables in the command line or in the environment.  Here\nis an example:\n\n     ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix\n\n   *Note Defining Variables::, for more details.\n\nCompiling For Multiple Architectures\n====================================\n\n   You can compile the package for more than one kind of computer at the\nsame time, by placing the object files for each architecture in their\nown directory.  To do this, you must use a version of `make' that\nsupports the `VPATH' variable, such as GNU `make'.  `cd' to the\ndirectory where you want the object files and executables to go and run\nthe `configure' script.  `configure' automatically checks for the\nsource code in the directory that `configure' is in and in `..'.\n\n   If you have to use a `make' that does not support the `VPATH'\nvariable, you have to compile the package for one architecture at a\ntime in the source code directory.  After you have installed the\npackage for one architecture, use `make distclean' before reconfiguring\nfor another architecture.\n\nInstallation Names\n==================\n\n   By default, `make install' will install the package's files in\n`/usr/local/bin', `/usr/local/man', etc.  You can specify an\ninstallation prefix other than `/usr/local' by giving `configure' the\noption `--prefix=PATH'.\n\n   You can specify separate installation prefixes for\narchitecture-specific files and architecture-independent files.  If you\ngive `configure' the option `--exec-prefix=PATH', the package will use\nPATH as the prefix for installing programs and libraries.\nDocumentation and other data files will still use the regular prefix.\n\n   In addition, if you use an unusual directory layout you can give\noptions like `--bindir=PATH' to specify different values for particular\nkinds of files.  Run `configure --help' for a list of the directories\nyou can set and what kinds of files go in them.\n\n   If the package supports it, you can cause programs to be installed\nwith an extra prefix or suffix on their names by giving `configure' the\noption `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.\n\nOptional Features\n=================\n\n   Some packages pay attention to `--enable-FEATURE' options to\n`configure', where FEATURE indicates an optional part of the package.\nThey may also pay attention to `--with-PACKAGE' options, where PACKAGE\nis something like `gnu-as' or `x' (for the X Window System).  The\n`README' should mention any `--enable-' and `--with-' options that the\npackage recognizes.\n\n   For packages that use the X Window System, `configure' can usually\nfind the X include and library files automatically, but if it doesn't,\nyou can use the `configure' options `--x-includes=DIR' and\n`--x-libraries=DIR' to specify their locations.\n\nSpecifying the System Type\n==========================\n\n   There may be some features `configure' cannot figure out\nautomatically, but needs to determine by the type of machine the package\nwill run on.  Usually, assuming the package is built to be run on the\n_same_ architectures, `configure' can figure that out, but if it prints\na message saying it cannot guess the machine type, give it the\n`--build=TYPE' option.  TYPE can either be a short name for the system\ntype, such as `sun4', or a canonical name which has the form:\n\n     CPU-COMPANY-SYSTEM\n\nwhere SYSTEM can have one of these forms:\n\n     OS KERNEL-OS\n\n   See the file `config.sub' for the possible values of each field.  If\n`config.sub' isn't included in this package, then this package doesn't\nneed to know the machine type.\n\n   If you are _building_ compiler tools for cross-compiling, you should\nuse the `--target=TYPE' option to select the type of system they will\nproduce code for.\n\n   If you want to _use_ a cross compiler, that generates code for a\nplatform different from the build platform, you should specify the\n\"host\" platform (i.e., that on which the generated programs will\neventually be run) with `--host=TYPE'.\n\nSharing Defaults\n================\n\n   If you want to set default values for `configure' scripts to share,\nyou can create a site shell script called `config.site' that gives\ndefault values for variables like `CC', `cache_file', and `prefix'.\n`configure' looks for `PREFIX/share/config.site' if it exists, then\n`PREFIX/etc/config.site' if it exists.  Or, you can set the\n`CONFIG_SITE' environment variable to the location of the site script.\nA warning: not all `configure' scripts look for a site script.\n\nDefining Variables\n==================\n\n   Variables not defined in a site shell script can be set in the\nenvironment passed to `configure'.  However, some packages may run\nconfigure again during the build, and the customized values of these\nvariables may be lost.  In order to avoid this problem, you should set\nthem in the `configure' command line, using `VAR=value'.  For example:\n\n     ./configure CC=/usr/local2/bin/gcc\n\nwill cause the specified gcc to be used as the C compiler (unless it is\noverridden in the site shell script).\n\n`configure' Invocation\n======================\n\n   `configure' recognizes the following options to control how it\noperates.\n\n`--help'\n`-h'\n     Print a summary of the options to `configure', and exit.\n\n`--version'\n`-V'\n     Print the version of Autoconf used to generate the `configure'\n     script, and exit.\n\n`--cache-file=FILE'\n     Enable the cache: use and save the results of the tests in FILE,\n     traditionally `config.cache'.  FILE defaults to `/dev/null' to\n     disable caching.\n\n`--config-cache'\n`-C'\n     Alias for `--cache-file=config.cache'.\n\n`--quiet'\n`--silent'\n`-q'\n     Do not print messages saying which checks are being made.  To\n     suppress all normal output, redirect it to `/dev/null' (any error\n     messages will still be shown).\n\n`--srcdir=DIR'\n     Look for the package's source code in directory DIR.  Usually\n     `configure' can determine that directory automatically.\n\n`configure' also accepts some other, not widely useful, options.  Run\n`configure --help' for more details.\n\n"
  },
  {
    "path": "Makefile.am",
    "content": "# Process this file with automake to generate Makefile.in\n\n\n# Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004,\n# 2006 Free Software Foundation, Inc.\n#\n# Copyright 2008 William Hart, Michael Abshoff\n#\n# This file is part of the MPIR Library.\n#\n# The MPIR Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The MPIR Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the MPIR Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n# Libtool -version-info for libmpir.la and libmp.la.  See \"Versioning\" in the\n# libtool manual.\n#\n# 1. No interfaces changed, only implementations (good): Increment REVISION.\n#\n# 2. Interfaces added, none removed (good): Increment CURRENT, increment\n#    AGE, set REVISION to 0.\n#\n# 3. Interfaces removed (BAD, breaks upward compatibility): Increment\n#    CURRENT, set AGE and REVISION to 0.\n#\n# Do this separately for libgmp, libgmpxx and libmp, and only for releases.\n#\n#\t  GMP\t   -version-info\n#       release   libgmp  libgmpxx libmp\n#        2.0.x      -        -       -\n#        3.0      3:0:0      -     3:0:0\n#        3.0.1    3:1:0      -     3:0:0\n#        3.1      4:0:1      -     4:0:1\n#        3.1.1    4:1:1      -     4:1:1\n#        4.0      5:0:2    3:0:0   4:2:1\n#        4.0.1    5:1:2    3:1:0   4:3:1\n#        4.1      6:0:3    3:2:0   4:4:1\n#        4.1.1    6:1:3    3:3:0   4:5:1\n#        4.1.2    6:2:3    3:4:0   4:6:1\n#        4.1.3    6:3:3    3:5:0   4:7:1\n#        4.1.4    6:3:3    3:5:0   4:7:1\tWRONG, same as 4.1.3!\n#        4.2      6:0:3    3:2:0   4:4:1\tREALLY WRONG, same as 4.1!\n#        4.2.1    7:1:4    4:1:1   4:10:1\n#\n#\t  MPIR\t   -version-info\n#       release   libgmp  libgmpxx libmp\n#\t0.9.0\t7:1:4\t4:1:1\t4:10:1   WRONG, same as GMP 4.2.1\n#\t1.0.0\t7:2:4\t4:2:1\t4:11:1\n#\t1.1.0\t7:3:4\t4:3:1\t4:12:1\n#\t1.2.0\t7:4:4\t4:4:1\t4:13:1\n#\t1.3.0\t8:0:0\t4:5:1\t4:14:1 \n#\t1.3.1\t8:1:0\t4:6:1\t0:0:0\n#\t2.0.0\t9:2:0\t4:7:1\n#\t2.1.0\t10:0:1\t4:8:1\n#\t2.1.1\t10:1:2\t4:9:2\n#\t2.1.2\t10:2:2\t4:10:2\n#\t2.1.3\t10:3:2\t4:11:2\n#\t2.1.4\t10:4:2\t4:12:2\n#\t2.2.0\t10:5:2\t4:13:2\n#\t2.2.1\t10:6:3\t4:14:3\n#\t2.3.0\t10:7:3\t4:15:3\n#\t2.3.1\t10:8:3\t4:16:3\n#\t2.4.0   11:0:4  4:17:3\n#\t2.5.0\t11:1:4\t4:18:3\n#\t2.5.1\t11:2:4\t4:19:3\n#\t2.5.2\t11:3:4\t4:20:3\n#\t2.6.0   11:0:5  4:21:3               Clearly wrong!!\n#       2.7.0   16:0:6  8:0:4                Attempt to correct wrt all prev \n#       2.7.1   22:1:6  12:1:4               Due to trailing space for 2.7.1\n#       2.7.2   22:2:6  12:2:4\n#       3.0.0   23:3:0  12:3:4\n#\n# Starting at 3:0:0 is a slight abuse of the versioning system, but it\n# ensures we're past soname libgmp.so.2, which was used on Debian GNU/Linux\n# packages of gmp 2.  Pretend gmp 2 was 2:0:0, so the interface changes for\n# gmp 3 mean 3:0:0 is right.\n#\n# We interpret \"implementation changed\" in item \"1.\" above as meaning any\n# release, ie. the REVISION is incremented every time (if nothing else).\n# Even if we thought the code generated will be identical on all systems,\n# it's still good to get the shared library filename (like\n# libgmpxx.so.3.0.4) incrementing, to make it clear which GMP it's from.\n\nLIBGMP_LT_CURRENT = 23\nLIBGMP_LT_REVISION = 3\nLIBGMP_LT_AGE = 0\n\nLIBGMPXX_LT_CURRENT = 12\nLIBGMPXX_LT_REVISION = 3\nLIBGMPXX_LT_AGE = 4\n\nSUBDIRS = tests\n\nSUBDIRS += mpn fft mpz mpq mpf printf scanf cxx tune doc\n\nEXTRA_DIST = configfsf.guess configfsf.sub .gdbinit INSTALL.autoconf\n\nif WANT_CXX\nGMPXX_HEADERS_OPTION = mpirxx.h\nendif\nEXTRA_DIST += mpirxx.h yasm_macwin.inc.fat yasm_macwin.inc.nofat yasm_mac.inc.fat yasm_mac.inc.nofat strip_fPIC.sh mpn/x86w mpn/x86_64w mpir.net doc/devel doc/fdl.texi cpuid.c gpl-2.0.txt lgpl-2.1.txt\n\n# mpir.h is architecture dependent, mainly since they encode the\n# limb size used in libmpir.  For that reason they belong under $exec_prefix\n# not $prefix, strictly speaking.\n#\n# $exec_prefix/include is not in the default include path for gcc built to\n# the same $prefix and $exec_prefix, which might mean mpir.h is not found,\n# but anyone knowledgable enough to be playing with exec_prefix will be able\n# to address that.\n#\nincludeexecdir = $(exec_prefix)/include\ninclude_HEADERS = $(GMPXX_HEADERS_OPTION)\nnodist_includeexec_HEADERS = mpir.h\nlib_LTLIBRARIES = libmpir.la\nif WANT_GMPCOMPAT\nlib_LTLIBRARIES += libgmp.la\nendif\nlib_LTLIBRARIES += $(GMPXX_LTLIBRARIES_OPTION)\n\nBUILT_SOURCES = mpir.h\nif WANT_GMPCOMPAT\nBUILT_SOURCES += gmp.h\nnodist_includeexec_HEADERS += gmp.h\nif WANT_CXX\nBUILT_SOURCES += gmpxx.h\nnodist_includeexec_HEADERS += gmpxx.h\nendif\nendif\n\nDISTCLEANFILES = $(BUILT_SOURCES) config.m4 fat.h @gmp_srclinks@\n\n# Tell mpir.h it's building gmp, not an application, used by windows DLL stuff.\nAM_CPPFLAGS=-D__GMP_WITHIN_GMP\n\nMPF_OBJECTS = mpf/abs$U.lo mpf/add$U.lo mpf/add_ui$U.lo mpf/ceilfloor$U.lo mpf/clear$U.lo mpf/clears$U.lo mpf/cmp$U.lo mpf/cmp_d$U.lo mpf/cmp_si$U.lo mpf/cmp_ui$U.lo mpf/cmp_z$U.lo mpf/div$U.lo mpf/div_2exp$U.lo mpf/div_ui$U.lo mpf/dump$U.lo mpf/eq$U.lo mpf/fits_si$U.lo mpf/fits_sint$U.lo mpf/fits_slong$U.lo mpf/fits_sshort$U.lo mpf/fits_ui$U.lo mpf/fits_uint$U.lo mpf/fits_ulong$U.lo mpf/fits_ushort$U.lo mpf/get_d$U.lo mpf/get_d_2exp$U.lo mpf/get_dfl_prec$U.lo mpf/get_prc$U.lo mpf/get_si$U.lo mpf/get_str$U.lo mpf/get_ui$U.lo mpf/init$U.lo mpf/init2$U.lo mpf/inits$U.lo mpf/inp_str$U.lo mpf/int_p$U.lo mpf/iset$U.lo mpf/iset_d$U.lo mpf/iset_si$U.lo mpf/iset_str$U.lo mpf/iset_ui$U.lo mpf/mul$U.lo mpf/mul_2exp$U.lo mpf/mul_ui$U.lo mpf/neg$U.lo mpf/out_str$U.lo mpf/pow_ui$U.lo mpf/random2$U.lo mpf/reldiff$U.lo mpf/rrandomb$U.lo mpf/set$U.lo mpf/set_d$U.lo mpf/set_dfl_prec$U.lo mpf/set_prc$U.lo mpf/set_prc_raw$U.lo mpf/set_q$U.lo mpf/set_si$U.lo mpf/set_str$U.lo mpf/set_ui$U.lo mpf/set_z$U.lo mpf/size$U.lo mpf/sqrt$U.lo mpf/sqrt_ui$U.lo mpf/sub$U.lo mpf/sub_ui$U.lo mpf/swap$U.lo mpf/trunc$U.lo mpf/ui_div$U.lo mpf/ui_sub$U.lo mpf/urandomb$U.lo \n\nFFT_OBJECTS = fft/adjust$U.lo fft/adjust_sqrt2$U.lo fft/butterfly_lshB$U.lo fft/butterfly_rshB$U.lo fft/combine_bits$U.lo fft/div_2expmod_2expp1$U.lo fft/fermat_to_mpz$U.lo fft/fft_mfa_trunc_sqrt2$U.lo fft/fft_mfa_trunc_sqrt2_inner$U.lo fft/fft_negacyclic$U.lo fft/fft_radix2$U.lo fft/fft_trunc$U.lo fft/fft_trunc_sqrt2$U.lo fft/ifft_mfa_trunc_sqrt2$U.lo fft/ifft_negacyclic$U.lo fft/ifft_radix2$U.lo fft/ifft_trunc$U.lo fft/ifft_trunc_sqrt2$U.lo fft/mul_2expmod_2expp1$U.lo fft/mul_fft_main$U.lo fft/mul_mfa_trunc_sqrt2$U.lo fft/mul_trunc_sqrt2$U.lo fft/mulmod_2expp1$U.lo fft/normmod_2expp1$U.lo fft/revbin$U.lo fft/split_bits$U.lo \n\nMPZ_OBJECTS = mpz/2fac_ui$U.lo mpz/abs$U.lo mpz/add$U.lo mpz/add_ui$U.lo mpz/and$U.lo mpz/aorsmul$U.lo mpz/aorsmul_i$U.lo mpz/array_init$U.lo mpz/bin_ui$U.lo mpz/bin_uiui$U.lo mpz/cdiv_q$U.lo mpz/cdiv_q_ui$U.lo mpz/cdiv_qr$U.lo mpz/cdiv_qr_ui$U.lo mpz/cdiv_r$U.lo mpz/cdiv_r_ui$U.lo mpz/cdiv_ui$U.lo mpz/cfdiv_q_2exp$U.lo mpz/cfdiv_r_2exp$U.lo mpz/clear$U.lo mpz/clears$U.lo mpz/clrbit$U.lo mpz/cmp$U.lo mpz/cmp_d$U.lo mpz/cmp_si$U.lo mpz/cmp_ui$U.lo mpz/cmpabs$U.lo mpz/cmpabs_d$U.lo mpz/cmpabs_ui$U.lo mpz/com$U.lo mpz/combit$U.lo mpz/cong$U.lo mpz/cong_2exp$U.lo mpz/cong_ui$U.lo mpz/dive_ui$U.lo mpz/divegcd$U.lo mpz/divexact$U.lo mpz/divis$U.lo mpz/divis_2exp$U.lo mpz/divis_ui$U.lo mpz/dump$U.lo mpz/export$U.lo mpz/fac_ui$U.lo mpz/fdiv_q$U.lo mpz/fdiv_q_ui$U.lo mpz/fdiv_qr$U.lo mpz/fdiv_qr_ui$U.lo mpz/fdiv_r$U.lo mpz/fdiv_r_ui$U.lo mpz/fdiv_ui$U.lo mpz/fib2_ui$U.lo mpz/fib_ui$U.lo mpz/fits_si$U.lo mpz/fits_sint$U.lo mpz/fits_slong$U.lo mpz/fits_sshort$U.lo mpz/fits_ui$U.lo mpz/fits_uint$U.lo mpz/fits_ulong$U.lo mpz/fits_ushort$U.lo mpz/gcd$U.lo mpz/gcd_ui$U.lo mpz/gcdext$U.lo mpz/get_d$U.lo mpz/get_d_2exp$U.lo mpz/get_si$U.lo mpz/get_str$U.lo mpz/get_sx$U.lo mpz/get_ui$U.lo mpz/get_ux$U.lo mpz/getlimbn$U.lo mpz/hamdist$U.lo mpz/import$U.lo mpz/init$U.lo mpz/init2$U.lo mpz/inits$U.lo mpz/inp_raw$U.lo mpz/inp_str$U.lo mpz/invert$U.lo mpz/ior$U.lo mpz/iset$U.lo mpz/iset_d$U.lo mpz/iset_si$U.lo mpz/iset_str$U.lo mpz/iset_sx$U.lo mpz/iset_ui$U.lo mpz/iset_ux$U.lo mpz/jacobi$U.lo mpz/kronsz$U.lo mpz/kronuz$U.lo mpz/kronzs$U.lo mpz/kronzu$U.lo mpz/lcm$U.lo mpz/lcm_ui$U.lo mpz/likely_prime_p$U.lo mpz/limbs_finish$U.lo mpz/limbs_modify$U.lo mpz/limbs_read$U.lo mpz/limbs_write$U.lo mpz/lucnum2_ui$U.lo mpz/lucnum_ui$U.lo mpz/mfac_uiui$U.lo mpz/miller_rabin$U.lo mpz/millerrabin$U.lo mpz/mod$U.lo mpz/mul$U.lo mpz/mul_2exp$U.lo mpz/mul_si$U.lo mpz/mul_ui$U.lo mpz/n_pow_ui$U.lo mpz/neg$U.lo mpz/next_prime_candidate$U.lo mpz/nextprime$U.lo mpz/nthroot$U.lo mpz/oddfac_1$U.lo mpz/out_raw$U.lo mpz/out_str$U.lo mpz/perfpow$U.lo mpz/perfsqr$U.lo mpz/popcount$U.lo mpz/pow_ui$U.lo mpz/powm$U.lo mpz/powm_ui$U.lo mpz/pprime_p$U.lo mpz/primorial_ui$U.lo mpz/probable_prime_p$U.lo mpz/prodlimbs$U.lo mpz/realloc$U.lo mpz/realloc2$U.lo mpz/remove$U.lo mpz/roinit_n$U.lo mpz/root$U.lo mpz/rootrem$U.lo mpz/rrandomb$U.lo mpz/scan0$U.lo mpz/scan1$U.lo mpz/set$U.lo mpz/set_d$U.lo mpz/set_f$U.lo mpz/set_q$U.lo mpz/set_si$U.lo mpz/set_str$U.lo mpz/set_sx$U.lo mpz/set_ui$U.lo mpz/set_ux$U.lo mpz/setbit$U.lo mpz/size$U.lo mpz/sizeinbase$U.lo mpz/sqrt$U.lo mpz/sqrtrem$U.lo mpz/sub$U.lo mpz/sub_ui$U.lo mpz/swap$U.lo mpz/tdiv_q$U.lo mpz/tdiv_q_2exp$U.lo mpz/tdiv_q_ui$U.lo mpz/tdiv_qr$U.lo mpz/tdiv_qr_ui$U.lo mpz/tdiv_r$U.lo mpz/tdiv_r_2exp$U.lo mpz/tdiv_r_ui$U.lo mpz/tdiv_ui$U.lo mpz/trial_division$U.lo mpz/tstbit$U.lo mpz/ui_pow_ui$U.lo mpz/ui_sub$U.lo mpz/urandomb$U.lo mpz/urandomm$U.lo mpz/xor$U.lo \n\nMPQ_OBJECTS = mpq/abs$U.lo mpq/aors$U.lo mpq/canonicalize$U.lo mpq/clear$U.lo mpq/clears$U.lo mpq/cmp$U.lo mpq/cmp_si$U.lo mpq/cmp_ui$U.lo mpq/div$U.lo mpq/equal$U.lo mpq/get_d$U.lo mpq/get_den$U.lo mpq/get_num$U.lo mpq/get_str$U.lo mpq/init$U.lo mpq/inits$U.lo mpq/inp_str$U.lo mpq/inv$U.lo mpq/md_2exp$U.lo mpq/mul$U.lo mpq/neg$U.lo mpq/out_str$U.lo mpq/set$U.lo mpq/set_d$U.lo mpq/set_den$U.lo mpq/set_f$U.lo mpq/set_num$U.lo mpq/set_si$U.lo mpq/set_str$U.lo mpq/set_ui$U.lo mpq/set_z$U.lo mpq/swap$U.lo \n\nMPN_OBJECTS = mpn/dummy1$U.lo\n\nPRINTF_OBJECTS = printf/asprintf$U.lo printf/asprntffuns$U.lo printf/doprnt$U.lo printf/doprntf$U.lo printf/doprnti$U.lo printf/fprintf$U.lo printf/obprintf$U.lo printf/obprntffuns$U.lo printf/obvprintf$U.lo printf/printf$U.lo printf/printffuns$U.lo printf/repl-vsnprintf$U.lo printf/snprintf$U.lo printf/snprntffuns$U.lo printf/sprintf$U.lo printf/sprintffuns$U.lo printf/vasprintf$U.lo printf/vfprintf$U.lo printf/vprintf$U.lo printf/vsnprintf$U.lo printf/vsprintf$U.lo \n\nSCANF_OBJECTS = scanf/doscan$U.lo scanf/fscanf$U.lo scanf/fscanffuns$U.lo scanf/scanf$U.lo scanf/sscanf$U.lo scanf/sscanffuns$U.lo scanf/vfscanf$U.lo scanf/vscanf$U.lo scanf/vsscanf$U.lo \n\n# no $U for C++ files\nCXX_OBJECTS =\t\t\t\t\t\t\t\t\\\n  cxx/isfuns.lo cxx/ismpf.lo cxx/ismpq.lo cxx/ismpz.lo cxx/ismpznw.lo\t\\\n  cxx/osdoprnti.lo cxx/osfuns.lo\t\t\t\t\t\\\n  cxx/osmpf.lo cxx/osmpq.lo cxx/osmpz.lo\n\n# In libtool 1.5 it doesn't work to build libmpir.la from the convenience\n# libraries like mpz/libmpz.la.  Or rather it works, but it ends up putting\n# PIC objects into libmpir.a if shared and static are both built.  (The PIC\n# objects go into mpz/.libs/libmpz.a, and thence into .libs/libmpir.a.)\n#\n# For now the big lists of objects above are used.  Something like mpz/*.lo\n# would probably work, but might risk missing something out or getting\n# something extra.  The source files for each .lo are listed in the\n# Makefile.am's in the subdirectories.\n#\n# Currently, for libmpir, unlike libmp below, we're not using\n# -export-symbols, since the tune and speed programs, and perhaps some of\n# the test programs, want to access undocumented symbols.\n\nlibmpir_la_SOURCES = gmp-impl.h longlong_pre.h longlong_post.h randmt.h\t\t\t\\\n  assert.c compat.c errno.c extract-dbl.c invalid.c memory.c\t\t\\\n  mp_bpl.c mp_clz_tab.c mp_dv_tab.c mp_minv_tab.c mp_get_fns.c mp_set_fns.c \\\n  randclr.c randdef.c randiset.c randlc2s.c randlc2x.c randmt.c\t\\\n  randmts.c rands.c randsd.c randsdui.c randbui.c randmui.c primesieve.c \\\n  nextprime.c version.c\nEXTRA_libmpir_la_SOURCES = tal-debug.c tal-notreent.c tal-reent.c\nlibmpir_la_DEPENDENCIES = @TAL_OBJECT@\t\t\\\n  $(MPF_OBJECTS) $(FFT_OBJECTS) $(MPZ_OBJECTS) $(MPQ_OBJECTS)\t\\\n  $(MPN_OBJECTS) @mpn_objs_in_libgmp@\t\t\\\n  $(PRINTF_OBJECTS)  $(SCANF_OBJECTS)\nlibmpir_la_LIBADD = $(libmpir_la_DEPENDENCIES)\nlibmpir_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMP_LDFLAGS) \\\n  -version-info $(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE)\n\nlibgmp_la_SOURCES = $(libmpir_la_SOURCES)\nEXTRA_libgmp_la_SOURCES = $(EXTRA_libmpir_la_SOURCES)\nlibgmp_la_DEPENDENCIES = @TAL_OBJECT@\t\t\\\n  $(MPF_OBJECTS) $(FFT_OBJECTS) $(MPZ_OBJECTS) $(MPQ_OBJECTS)\t\\\n  $(MPN_OBJECTS) @mpn_objs_in_libgmp@\t\t\\\n  $(PRINTF_OBJECTS)  $(SCANF_OBJECTS)\nlibgmp_la_LIBADD = $(libgmp_la_DEPENDENCIES)\nlibgmp_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMP_LDFLAGS) \\\n  -version-info $(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE)\n\n\n# We need at least one .cc file in $(libmpirxx_la_SOURCES) so automake will\n# use $(CXXLINK) rather than the plain C $(LINK).  cxx/dummy.cc is that\n# file.\n\nif WANT_CXX\nGMPXX_LTLIBRARIES_OPTION = libmpirxx.la\nif WANT_GMPCOMPAT\nGMPXX_LTLIBRARIES_OPTION += libgmpxx.la\nendif\nendif\nlibmpirxx_la_SOURCES = cxx/dummy.cc\nlibmpirxx_la_DEPENDENCIES = $(CXX_OBJECTS) libmpir.la\nlibmpirxx_la_LIBADD = $(libmpirxx_la_DEPENDENCIES)\nlibmpirxx_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMPXX_LDFLAGS) \\\n  -version-info $(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE)\n\nlibgmpxx_la_SOURCES = $(libmpirxx_la_SOURCES)\nlibgmpxx_la_DEPENDENCIES = $(CXX_OBJECTS) libgmp.la\nlibgmpxx_la_LIBADD = $(libgmpxx_la_DEPENDENCIES)\nlibgmpxx_la_LDFLAGS = $(libmpirxx_la_LDFLAGS)\n\n\t@echo ''\n\t@echo '+-------------------------------------------------------------+'\n\t@echo '| CAUTION:                                                    |'\n\t@echo '|                                                             |'\n\t@echo '| If you have not already run \"make check\", then we strongly  |'\n\t@echo '| recommend you do so.                                        |'\n\t@echo '|                                                             |'\n\t@echo '| MPIR has been carefully tested by its authors, but compilers|'\n\t@echo '| are all too often released with serious bugs.  MPIR tends to|'\n\t@echo '| explore interesting corners in compilers and has hit bugs   |'\n\t@echo '| on quite a few occasions.                                   |'\n\t@echo '|                                                             |'\n\t@echo '+-------------------------------------------------------------+'\n\t@echo ''\n\n# The \"test -f\" support for srcdir!=builddir is similar to the automake .c.o\n# etc rules, but with each foo.c explicitly, since $< is not portable\n# outside an inference rule.\n#\n# A quoted 'foo.c' is used with the \"test -f\"'s to avoid Sun make rewriting\n# it as part of its VPATH support.  See the autoconf manual \"Limitations of\n# Make\".\n#\n# Generated .h files which are used by gmp-impl.h are BUILT_SOURCES since\n# they must exist before anything can be compiled.\n#\n# Other generated .h files are also BUILT_SOURCES so as to get all the\n# build-system stuff over and done with at the start.  Also, dependencies on\n# the .h files are not properly expressed for the various objects that use\n# them.\n\nmpn/dummy1.c:\n\techo \"void dummy1(void){return;}\" > mpn/dummy1.c\nBUILT_SOURCES += mpn/dummy1.c\n\n\n# Avoid: CVS - cvs directories\n#        *~  - emacs backups\n#        .#* - cvs merge originals\n#\n# *~ and .#* only occur when a whole directory without it's own Makefile.am\n# is distributed, like \"doc\" or the mpn cpu subdirectories.\n#\ndist-hook:\n\t-find $(distdir) \\( -name CVS -type d \\) -o -name \"*~\" -o -name \".#*\" -o -name \".svn\" | xargs rm -rf\n"
  },
  {
    "path": "NEWS",
    "content": "Copyright 1996, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software\nFoundation, Inc.\n\nCopyright 2009, 2010 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n\nChanges between MPIR 3.0.0 and MPIR 2.7.2\n\nBug Fixes:\n\n* Port bug fix for gmp_get_str from GMP\n* Fix t-locale failure on Windows\n* Attempt to fix issue with gmp_randinit_set for _mt generator\n* Fix long/long long pointer issue on Windows 64\n* Clz_tab array length to 129 in line with GMP\n\nFeatures:\n\n* New Intel Skylake assembly support due to Jens Nurmann, Alex Kruppa and GMP\n* New Intel Haswell assembly support due to Alex Kruppa and GMP\n* Rudimentary Broadwell support (not optimisation)\n* Improved AMD Bulldozer support due to Alex Kruppa\n* Faster mpz_powm, mpz_powm_ui from GMP\n* New mpz_limbs functionality from GMP 6\n* New mpn_sizeinbase, mullow_basecase_n, binvert, redc_1, redc_2, redc_n functions from GMP 6\n* New mpn_nsumdiff_n function (speeds up FFT on haswell)\n* Visual Studio 2017 support\n* mpir.net for interface to .net languages\n* Appveyor-CI support\n\nOther:\n\n* Separate yasm from MPIR build (use --with-yasm=/path/to/yasm with configure)\n* Remove autogenerated Makefiles, makeinfo and configure from version control\n\nChanges between MPIR 2.7.2 and MPIR 2.7.1\n\nBug Fixes:\n\n* Increase .so version numbering to prevent conflict with MPIR 2.7.1\n\nFeatures:\n\n* Wave motion gun\n\nChanges between MPIR 2.7.1 and MPIR 2.7.0\n\nBug Fixes:\n\n* Fix bug in Karatsuba assembly code on Windows\n* Fix bug in basecase division code (by removing implementation)\n\nFeatures:\n\n* Add building of tests to command line build for Windows\n* Improve speed and tune targets on Windows\n* Various MSVC build improvements\n\nOther:\n\n* Add some missing information in AUTHORS\n* Travis continuous integration\n\nChanges between MPIR 2.7.0 and MPIR 2.6.0\n\nBug Fixes:\n\n* Many build fixes\n* Fix fat binary build on 32 bit x86\n* Fix minor bugs in invert.c and t-invertc.\n* Remove broken assert in generic build\n* Fix bug in FFT that causes tuning code to crash\n* fix bug in sqr_basecase.asm on Windows\n* Fix bug in mpz_nextprime\n* Fix aliasing bug in mpz_nthroot\n* Fix bug in mpn_mulmod_basecase\n* Add missing GMP_DECLSPEC's causing segfaults on Windows\n\nSpeedups:\n\n* faster divide-and-conquer division using new algorithm of W. Hart\n* merge many speedups for number theoretic functions from GMP project\n  (factorial, multi-factorial, primorial, n-choose-k, jacobi symbols)\n* fft versions of mpn_mulmod_bnm1/p1 to speed up GMP-ECM\n* switch to using the much cleaner gcdext implementation from GMP\n\nFeatures:\n\n* Cygwin64 support\n* MSYS2 support\n* configure support for latest Intel/AMD chips\n* MSVC 2015 support\n* Clean up Jason Moxham's old C code\n* change handling of stdint.h (user optionally includes it before mpir.h)\n* support OpenBSD 5.6 and later\n* GCC 4.9 and 5.x support\n* Clang support\n* C++11 C++ compiler support\n\nChanges between MPIR 2.6.0 and MPIR 2.5.2\n\nBug Fixes:\n\n* fixed bug in speed/common for dc_bdiv_qr_n\n* documentation error wrt -ve exponents in mpz_powm_ui\n* correct mpq_cmp_ui declaration error found by David Cleaver\n* correct bug in Windows assembler code for karasub\n\nSpeedups:\n\n* Completely new FFT implementation (William Hart)\n\nFeatures:\n\n* Capability to deal with full 32/64 bit words on 32/64 bit Windows (Brian Gladman)\n* Python Windows build generator (Brian Gladman)\n\nCompatibility:\n\n* Add macros for __GNU_MP_RELEASE and __MPIR_RELEASE\n\nChanges between MPIR 2.5.2 and MPIR 2.5.1\n\nBug Fixes:\n\n* fixed build bug due to incorrect header in redc_2\n* proper test for unpatched gcc-4.3.2\n\nSpeedups:\n\n* Some new cpuids supported\n* assembly redc_1 for sandybridge and bobcat for x86_64 and x86_64w\n\nCompatibility:\n\n* Added undocumented macros in gmp-impl.h for mpfr-3.1.0\n\nChanges between MPIR 2.5.1 and MPIR 2.5.0\n\nBug Fixes:\n\n * Correct very rare errors in multiplication code\n * Correct very rare error in GCD code\n * minor MSVC build fixes\n\nFeatures:\n\n * addadd addsub subadd sumdiff unconditionally availible on all systems \n * redc_2 availible\n\nChanges between MPIR 2.5.0 and MPIR 2.4.0\n\nBug Fixes:\n\n * Correct rounding mode make check tests\n\nSpeedups:\n\n * x86_64 fat build now uses RIP relative addressing\n * Toom22 speedup via assembler\n * New x86_64 assembler code\n * New Toom eval function using the new assembler code\n * Detect another Sandybridge CPU\n * New GCD code (by the GMP project)\n\nFeatures:\n\n * General cleanup of old features \n * Upgrade intenal component YASM\n * MSVC build tweeks\n * New experimental MSVC python build system\n\nChanges:\n\n * Remove explicit support for thumb m68k and sh cpu's\n\nChanges between MPIR 2.4.0 and MPIR 2.3.1\n\nBug Fixes:\n\n * The define __GMP_CC in mpir.h and gmp.h can miss out the the option\n   -std=gnu99 \n * Old versions of GAS can fail to assemble the mpn/x86_64/core2/popcount.asm\n * Make install will fail to install the file gmpxx.h (only needed with the\n   configure options --enable-cxx --enable-gmpcompat) when built out of the\n   source tree.\n * Add an alias used by MPFR (snprintf -> _snprintf)\n * Correct incorrect parameter type for mpz_cdiv_q_2exp\n * Corrected CRLF endings in some C files\n * Corrected bitcnt_t functions error return on Win64 \n\nSpeedups:\n\n * Sandybridge and Bobcat have been tuned\n * Updated to the latest compiler flags\n * Bulldozer cpu detection\n\nFeatures:\n\n * Upgrade internal components yasm and autotools\n * General cleanups of sourcebase\n * Initial support for intmax functions\n * Initial support for MSVC command line build for VS2005 and up\n * Support for MSBUILD of VS2010\n * Much more extensive and automated testing system\n\nChanges:\n\n * Remove explicit support for arc s390 pa32 pa64\n\nChanges between MPIR 2.3.1 and MPIR 2.3.0\n\nBug Fixes:\n\n * noexecstack enabled for linux only\n * FAT builds failed on unknown processors\n * Compilers without GAS inline assembler on Unix like systems had a \n   bug on AMD chips\n \nChanges between MPIR 2.3.0 and MPIR 2.2.1\n\nBug Fixes:\n\n * noexecstack enabled for x86 properly\n \nSpeedups:\n\n * Detect a few more cpu's correctly\n * New assembler code for x64\n\nFeatures:\n\n * MinGW64 port with full assembler support\n\nChanges:\n\n * MSVC VS2008 port removed\n\nChanges between MPIR 2.2.1 and MPIR 2.2.0\n\nBug Fixes:\n\n * Added a missing windows export declaration\n \nSpeedups:\n\n * Detect a few more cpu's correctly\n\nFeatures:\n\n * Exported the function mpn_redc_1\n\nChanges:\n\n * Renamed the function mpn_redc_basecase to mpn_redc_1 and re-ordered the \n   parameters\n\n\nChanges between MPIR 2.1.1 and MPIR 2.2.0\n\nBug Fixes:\n\n * A bug in mpn_divexact which caused a seg-fault has been corrected\n * An original GMP bug in mpf_get_d_2exp for negative numbers has been \n   corrected\n * A parallel make install race condition has been fixed\n \nSpeedups:\n\n * Detect a few more cpu's correctly\n\nFeatures:\n\n * Upgrade internal components yasm,autotools,gnulib\n * Removed the prebuild steps\n * General cleanup of the source base\n * Windows DLL builds can now be tested\n * make check can now run the tests in parallel\n * Initial support for running under MinGW64\n * Enable noexecstack for x86/x86_64 with GCC\n\nChanges:\n\n * Removed explicit support for ancient cpus's namely a29k clipper i960*\n   m88* ns32k pyr* z8000* gmicro i860 romp uxp POWER1 POWER2 cray vax\n * Removed explicit support for ancient OS'es namely pw32 djgpp os2 unicos\n   osf tru64\n * Removed the demo's from the library\n * Renamed the internal function mpn_divexact_bybM1of to mpn_divexact_fobm1\n * Windows build directory changes\n \nChanges between MPIR 2.1.0 and MPIR 2.1.1\n\nBug Fixes:\n\n * Fixed Windows K8/K10 mpn_sublsh_n function entry point\n\nSpeedups:\n\n * None\n\nFeatures:\n\n * Initial build with Visual Studio Express 2010\n\nChanges:\n\n * None\n\nChanges between MPIR 2.0.0 and MPIR 2.1.0\n\nBug Fixes:\n\n * Fixed the xgcd normalisation issue and redid the tuning code for gcd\n   and xgcd\n * Fixes for compiling with GCC 4.5.0 on Itanium\n\nSpeedups:\n\n * None\n\nFeatures:\n\n * Initial build with Visual Studio 2010\n\nChanges:\n\n * Export new function mpn_sqr\n\nChanges between MPIR 1.3.0 and MPIR 2.0.0\n\nLicense:\n \n * Switched to overall LGPL v3+\n\nBug Fixes:\n \n * Fixed a bug in the probable prime code (reported by Xiangyu Liu)\n * Fixed a buld issue on 32 bit p6 Apples\n * Fixed demos/pollard_rho\n * Numerous tuning bug fixes\n\nSpeedups:\n\n * Sped up squaring code\n * Minor speedup to toom4 code\n * Sped up x86_64 divrem_1 when divisor is 64 bits\n * Sped up x86_64 divrem_2\n * Sped up GCD and GCDEXT by an improved nhgcd2.c\n * Sped up addmul code for Itanium (by Jason Martin)\n * Large number of new and sped up Itanium assembly functions\n   (by Torbjorn Granlund)\n\nFeatures:\n \n * Toom8.5 code (by Marco Bodrato) see the paper\n   M. Bodrato, \"High degree Toom'n'half for balanced and unbalanced \n   multiplication\", E. Antelo, D. Hough and P. Ienne, editors, Proceedings \n   of the 20th IEEE Symposium on Computer Arithmetic, IEEE, Tubingen, Germany, \n   July 25-27, 2011, pp. 15--22.\n * Schoolbook Euclidean division code (by Torbjorn Granlund)\n * Divide and conquer Euclidean division code (by Torbjorn Granlund)\n   and Marco Bodrato (adapted to use David Harvey's middle product based\n   approximate quotient code)\n * Asymptotically fast division code (by William Hart), based on Paul\n   Zimmermann's mpn_invert and some reuse of the divide and conquer code.\n * New mpn_tdiv_q and mpn_tdiv_qr code (by Torbjorn Granlund)\n * Schoolbook Hensel division code, (largely by Niels Moller)\n * Divide and conquer Hensel division code (by Niels Moller, Torbjorn\n   Granlund and David Harvey)\n * New mpn_divexact code and mpz_divexact to match (by Torbjorn Granlund)\n * New mpn_rootrem, mpz_rootrem and mpz_root code (by Paul Zimmermann and\n   Torbjorn Granlund)\n * New mpn_neg, mpn_sqr, mpn_zero, mpn_and_n, mpn_ior_n, mpn_xor_n, \n   mpn_xnor_n, mpn_nand_n, etc (by Torbjorn Granlund)\n * New string input/output code (by Torbjorn Granlund)\n * New mp_bitcnt_t type for multiple precision bit counts\n \nChanges:\n\n * Removed benchmark 0.1 code from tarball\n * Updated GMP_VERSION to \"5.0.1\"\n\nChanges between MPIR 1.2.0 and MPIR 1.3.0\n\nBug Fixes:\n\n * Fixes to the build system to better support MinGW\n * Fixed a memory leak in lehmer GCD code\n * Fixed a CPU misidentification on BSD\n * Fixed a BSD install issue\n * Fixed a make try warning on Solaris\n * Fixed make distclean to clean up properly after a fat binary build\n * Fixed a bug in make distcheck\n * Fixed mpf_eq bug (reported on GMP list)\n * Fixed non-uniformness of mpz_urandomm\n * Fixed mpf exponent printing issue (reported on GMP list)\n * Fixed bug in sparc32/v9 add/sub code\n * Fixed bug in rootrem code\n \nSpeedups:\n\n * Unbalanced Toom 4 multiplication\n * Toom 53 multiplication \n * New fast single limb gcd and gcdext routines\n * Switched on ngcd based Lehmer GCD routine\n * Strassen multiplication for 2x2 matrices to speed up ngcd and ngcdext\n * Switched on new MPN_ZERO and mpn_store assembly routines in FFT code\n * Left and right shift assembly code for x86_64\n * Rewrote generic mullow and mulhi functions\n * New mpz factorial code and tuning (contributed by Robert Gerbicz)\n * Updating of 32 bit Windows support for AMD64, p3 and p4\n * Core2/penryn and nehalem mpn_store assembly code\n * Core2/penryn copyi assembly code\n * Better 32 bit k8/k10 and Nehalem assembly code\n * Initial support for via Nano\n * New mpn_rootrem code\n * Select better assembly code for Atom 64 bit\n * New faster mpz_tdiv_q code\n * Faster division and exact division by a single limb on x86_64 \n * Core2/penryn and nehalem addlsh_n assembly code\n * K8/k10 addlsh_n, sublsh_n assembly functions, including carry in variants\n * K8/k10 inclsh_n, declsh_n assembly code \n \nFeatures:\n\n * Middle product multiplication (by David Harvey)\n * Optimised k8/k10 and Nehalem assembly code for add_err1_n, sub_err1_n used by mulmid\n * Speed program accepts lines of data from a text file\n * A batch script to build MPIR using MSVC using a configure/make like syntax\n * Complete rewrite of the benchmark program in C by Brian Gladman\n * New primality test code written by T. R. Nicely used as a benchmark case, adapted with \n   the help of Jeff Gilchrist\n * mpn_lshift2 and mpn_rshift2 assembly functions\n * Latest Yasm assembler\n * sb_divappr_q, schoolbook approximate quotient\n * dc_divappr_q, divide and conquer approximate quotient (by David Harvey)\n * Script for setting all version numbers automatically when doing a release\n * mpn_neg_n function\n * New mpn_mulmod_2expp1 and mpn_mulmod_2expm1 functions\n * Benchmark for mpn functions\n * New k8 mpn_lshiftc assembler function\n * Macro functions inclsh1, declsh1\n * The try program now tests macro functions\n * Macros for memory managers to determine when reallocations are likely to occur\n * New function mpz_nthroot\n * New mpz_next_likely_prime, mpz_probable_prime_p and mpz_likely_prime functions\n * BPSW primality test code for integers up to GMP_LIMB_BITS, contributed by Peter Shrimpton\n * Factor out trial division function from primality test code\n * New mpf_rrandomb without global state\n * New mpn_urandomb, mpn_urandomm, mpn_rrandom and mpn_randomb functions without global state\n * New mpn_invert code (contributed by Paul Zimmermann), used in division code\n * New generic divrem_hensel functions\n * Implement Peter Montgomery's mpn_mod_1_k algorithms\n * Optimised AMD, core2/penryn, atom, nehalem assembly functions for mpn_mod_1_?\n * New assembly code for AMD divrem_hensel_qr_1, divrem_hensel_r_1\n * New AMD, core2/penryn, atom, nehalem assembly functions  mpn_rsh_divrem_hensel_qr_1_2\n * New optimised AMD, core2/penryn, atom, nehalem assembly functions mpn_divrem_hensel_qr_1_2\n * New generic functions mpn_rsh_divrem_hensel_qr_1_?\n * New generic mpn_tdiv_q function (based on mulmid/dc_divappr_q code)\n * Improved Windows timing code\n * Support for new Intel family 6, model 30\n\nChanges:\n\n * Removed requirement to type make install-gmpcompat\n * Make check tests both static and dynamic libraries where code differs\n * Changed library version numbers from x.y to x.y.z when doing a new minor release\n * Removed numerous extremely old deprecated functions\n * Removed mpbsd support from MPIR\n * Removed ancient ansi2knr conversion\n * Added architecture directory k102 for Phenom II assembly code\n\n\nChanges between MPIR 1.1.0 and MPIR 1.2.0\n\n  Bugs:\n  * None\n\n  Speedups:\n  * Add new FFT code written by Paul Zimmermann as revised by Paul Zimmermann, \n    Pierrick Gaudry, Alexander Kruppa and Torbjorn Granlund, with numerous bug \n    fixes due to William Hart\n  * Add tuning parameters for new FFT for most modern processors\n  * Write tuning code for new FFT\n  * Implement Toom32, unbalanced Toom3, Toom42\n  * Optimise Toom3 and Toom3 squaring code using better sequences\n  * Factor out Toom4/7 interpolate sequences and switch to twos complement\n  * Optimise memory usage in Toom 3, 4 and 7 routines\n  * Many new highly optimised assembly routines for x86_64 architectures\n  * Fast XGCD based on Moller's ngcd algorithm\n\n  Features:\n  * Modified speed program to be able to add values from columns together\n\n  Changes:\n  * None\n\nChanges between MPIR 1.0.0 and MPIR 1.1.0\n\n  Bugs:\n  * Work around a linker bug in Apple Darwin Tiger\n  * Resolve an issue causing a build failure on recent Cygwin32's\n  * Fixed development test code to do proper overlap tests for functions with \n    three source operands\n\n  Speedups:\n  * Added numerous assembly optimised linear division functions (Jason Moxham)\n  * Optimised mul_2 and addmul_2 (Jason Moxham)\n  * Added Toom 4 and Toom 7 multiplication for balanced operands (William Hart)\n  * Small speedup for mpz_mul for small operands when not aliased\n  \n  Features:\n  * Complete rearrangement of cpu detection code to explicitly support k8, k10, \n    pentium4, prescott, netburst, netburstlahf, core2, core, penryn, atom, nehalem\n  * factored out x86/x86_64 detection for both ordinary and fat builds into cpuid.c\n  * Distribute mpirbench with mpir (new make bench option)\n  * Added __GMP_CC and __GMP_CFLAGS, __MPIR_CC and __MPIR_CFLAGS to gmp/mpir.h \n  * Report when CPU is not identified (try sensible defaults)\n  * Support Pentium 4's that do not support LAHF/SAHF instructions\n  * Support Pathscale gcc on MIPS64\n  * Addition of assembly optimised subadd_n function\n\n  Changes:\n  * Re-enabled mpbsd functionality\n\nChanges between MPIR 0.9.0 and MPIR 1.0.0\n\n  Bugs:\n  * Building outside the source tree is now possible\n  * Bug removed from Windows Assembler file dive_1.asm\n  * Fat binary support for Core 2 64 bit fixed\n  * x86_64 fat binary support on Sun machines with gcc fixed\n  * Build failure on Sun machines using later versions of gcc fixed  \n  * Aliasing bug in mpz_urandomm fixed\n  * Fixed numerous build bugs on OSX (reported by Michael Abshoff)\n\n  Speedups:\n  * Dramatic speedups for K8 assembly code (due primarily to Jason Moxham)\n  * Assembly support for K10\n  * Significant speedups for Core 2 assembly (due primarily to Jason Moxham)\n  * Some mpn assembler functions were not being used in mpz layer due to \n    missing HAVE_NATIVE flags\n  * Nocona processors now use Core 2 assembly functions instead of generic C\n\n  Features:\n  * Emit mpir binaries and mpir.h and offer support for gmp compatibility\n  * Build support for Intel Atom\n  * Unrecognised Intel 64 machines now default to Core 2 assembly support\n  * Some new, undocumented mpn functions\n  * Try, speed and tune now available for Windows MSVC build\n\nChanges between GMP 4.2.1 and MPIR 0.9.0\n\n  Bugs:\n  * Sun CC support\n  * C99 support in gmp.h\n  * Build fixes for Apple GCC compiler\n  * Numerous bug fixes posted to gmp-devel for GMP 4.2.1\n  * Corrections in documentation including function prototypes\n  * Build fix (-fast) for cc on sparc-solaris\n  * Support for Core 2 Solaris\n  * Support for SiCortex MIPS\n  * Distinguish and detect P4, Nocona, Prescott \n  * Support numerous recent Intel family 6 and AMD Dunnington prcessors\n  * Fixed bugs in perfect power detection\n\n  Speedups:\n  * Jason Martin's Core 2 assembly patches\n  * Niels Möhler's GCD patches\n  * Pierrick Gaudry's AMD64 assembly patches\n  * Tuning flags for P4, Prescott, Nocona and Core 2\n\n  Features:\n  * x86_64 code to Yasm format (Yasm supplied with MPIR)\n  * Support for building on MSVC\n  * x86_64 fat binary support\n\n  Changes:\n  * Disabled nails support\n  * Removed macos port\n\nChanges between GMP version 4.2 and 4.2.1\n\n  Bugs:\n  * Shared library numbers corrected.\n  * Broken support for 32-bit AIX fixed.\n  * Misc minor fixes.\n\n  Speedups:\n  * Exact division (mpz_divexact) now falls back to plain division for large\n    operands.\n\n  Features:\n  * Support for some new systems.\n\n\nChanges between GMP version 4.1.4 and 4.2\n\n  Bugs:\n  * Minor bug fixes and code generalizations.\n  * Expanded and improved test suite.\n\n  Speedups:\n  * Many minor optimizations, too many to mention here.\n  * Division now always subquadratic.\n  * Computation of n-factorial much faster.\n  * Added basic x86-64 assembly code.\n  * Floating-point output is now subquadratic for all bases.\n  * FFT multiply code now about 25% faster.\n  * Toom3 multiply code faster.\n\n  Features:\n  * Much improved configure.\n  * Workarounds for many more compiler bugs.\n  * Temporary allocations are now made on the stack only if small.\n  * New systems supported: HPPA-2.0 gcc, IA-64 HP-UX, PowerPC-64 Darwin,\n    Sparc64 GNU/Linux.\n  * New i386 fat binaries, selecting optimised code at runtime (--enable-fat).\n  * New build option: --enable-profiling=instrument.\n  * New memory function: mp_get_memory_functions.\n  * New Mersenne Twister random numbers: gmp_randinit_mt, also now used for\n    gmp_randinit_default.\n  * New random functions: gmp_randinit_set, gmp_urandomb_ui, gmp_urandomm_ui.\n  * New integer functions: mpz_combit, mpz_rootrem.\n  * gmp_printf etc new type \"M\" for mp_limb_t.\n  * gmp_scanf and friends now accept C99 hex floats.\n  * Numeric input and output can now be in bases up to 62.\n  * Comparisons mpz_cmp_d, mpz_cmpabs_d, mpf_cmp_d recognise infinities.\n  * Conversions mpz_get_d, mpq_get_d, mpf_get_d truncate towards zero,\n    previously their behaviour was unspecified.\n  * Fixes for overflow issues with operands >= 2^31 bits.\n\n  Caveats:\n  * mpfr is gone, and will from now on be released only separately.  Please see\n    www.mpfr.org.\n\n\nChanges between GMP version 4.1.3 and 4.1.4\n\n* Bug fix to FFT multiplication code (crash for huge operands).\n* Bug fix to mpf_sub (miscomputation).\n* Support for powerpc64-gnu-linux.\n* Better support for AMD64 in 32-bit mode.\n* Upwardly binary compatible with 4.1.3, 4.1.2, 4.1.1, 4.1, 4.0.1, 4.0,\n  and 3.x versions.\n\n\nChanges between GMP version 4.1.2 and 4.1.3\n\n* Bug fix for FFT multiplication code (miscomputation).\n* Bug fix to K6 assembly code for gcd.\n* Bug fix to IA-64 assembly code for population count.\n* Portability improvements, most notably functional AMD64 support.\n* mpz_export allows NULL for countp parameter.\n* Many minor bug fixes.\n* mpz_export allows NULL for countp parameter.\n* Upwardly binary compatible with 4.1.2, 4.1.1, 4.1, 4.0.1, 4.0, and 3.x\n  versions.\n\n\nChanges between GMP version 4.1.1 and 4.1.2\n\n* Bug fixes.\n\n\nChanges between GMP version 4.1 and 4.1.1\n\n* Bug fixes.\n* New systems supported: NetBSD and OpenBSD sparc64.\n\n\nChanges between GMP version 4.0.1 and 4.1\n\n* Bug fixes.\n* Speed improvements.\n* Upwardly binary compatible with 4.0, 4.0.1, and 3.x versions.\n* Asymptotically fast conversion to/from strings (mpz, mpq, mpn levels), but\n  also major speed improvements for tiny operands.\n* mpn_get_str parameter restrictions relaxed.\n* Major speed improvments for HPPA 2.0 systems.\n* Major speed improvments for UltraSPARC systems.\n* Major speed improvments for IA-64 systems (but still sub-optimal code).\n* Extended test suite.\n* mpfr is back, with many bug fixes and portability improvements.\n* New function: mpz_ui_sub.\n* New functions: mpz_export, mpz_import.\n* Optimization for nth root functions (mpz_root, mpz_perfect_power_p).\n* Optimization for extended gcd (mpz_gcdext, mpz_invert, mpn_gcdext).\n* Generalized low-level number format, reserving a `nails' part of each\n  limb.  (Please note that this is really experimental; some functions\n  are likely to compute garbage when nails are enabled.)\n* Nails-enabled Alpha 21264 assembly code, allowing up to 75% better\n  performance.  (Use --enable-nails=4 to enable it.)\n\n\nChanges between GMP version 4.0 and 4.0.1\n\n* Bug fixes.\n\n\nChanges between GMP version 3.1.1 and 4.0\n\n* Bug fixes.\n* Speed improvements.\n* Upwardly binary compatible with 3.x versions.\n* New CPU support: IA-64, Pentium 4.\n* Improved CPU support: 21264, Cray vector systems.\n* Support for all MIPS ABIs: o32, n32, 64.\n* New systems supported: Darwin, SCO, Windows DLLs.\n* New divide-and-conquer square root algorithm.\n* New algorithms chapter in the manual.\n* New malloc reentrant temporary memory method.\n* New C++ class interface by Gerardo Ballabio (beta).\n* Revamped configure, featuring ABI selection.\n* Speed improvements for mpz_powm and mpz_powm_ui (mainly affecting small\n  operands).\n* mpz_perfect_power_p now properly recognizes 0, 1, and negative perfect\n  powers.\n* mpz_hamdist now supports negative operands.\n* mpz_jacobi now accepts non-positive denominators.\n* mpz_powm now supports negative exponents.\n* mpn_mul_1 operand overlap requirements relaxed.\n* Float input and output uses locale specific decimal point where available.\n* New gmp_printf, gmp_scanf and related functions.\n* New division functions: mpz_cdiv_q_2exp, mpz_cdiv_r_2exp, mpz_divexact_ui.\n* New divisibility tests: mpz_divisible_p, mpz_divisible_ui_p,\n  mpz_divisible_2exp_p, mpz_congruent_p, mpz_congruent_ui_p,\n  mpz_congruent_2exp_p.\n* New Fibonacci function: mpz_fib2_ui.\n* New Lucas number functions: mpz_lucnum_ui, mpz_lucnum2_ui.\n* Other new integer functions: mpz_cmp_d, mpz_cmpabs_d, mpz_get_d_2exp,\n  mpz_init2, mpz_kronecker, mpz_lcm_ui, mpz_realloc2.\n* New rational I/O: mpq_get_str, mpq_inp_str, mpq_out_str, mpq_set_str.\n* Other new rational functions: mpq_abs, mpq_cmp_si, mpq_div_2exp,\n  mpq_mul_2exp, mpq_set_f.\n* New float tests: mpf_integer_p, mpf_fits_sint_p, mpf_fits_slong_p,\n  mpf_fits_sshort_p, mpf_fits_uint_p, mpf_fits_ulong_p, mpf_fits_ushort_p.\n* Other new float functions: mpf_cmp_d, mpf_get_default_prec, mpf_get_si,\n  mpf_get_ui, mpf_get_d_2exp.\n* New random functions: gmp_randinit_default, gmp_randinit_lc_2exp_size.\n* New demo expression string parser (see demos/expr).\n* New preliminary perl interface (see demos/perl).\n* Tuned algorithm thresholds for many more CPUs.\n\n\nChanges between GMP version 3.1 and 3.1.1\n\n* Bug fixes for division (rare), mpf_get_str, FFT, and miscellaneous minor\n  things.\n\n\nChanges between GMP version 3.0 and 3.1\n\n* Bug fixes.\n* Improved `make check' running more tests.\n* Tuned algorithm cutoff points for many machines.  This will improve speed for\n  a lot of operations, in some cases by a large amount.\n* Major speed improvments: Alpha 21264.\n* Some speed improvments: Cray vector computers, AMD K6 and Athlon, Intel P5\n  and Pentium Pro/II/III.\n* The mpf_get_prec function now works as it did in GMP 2.\n* New utilities for auto-tuning and speed measuring.\n* Multiplication now optionally uses FFT for very large operands.  (To enable\n  it, pass --enable-fft to configure.)\n* Support for new systems: Solaris running on x86, FreeBSD 5, HP-UX 11, Cray\n  vector computers, Rhapsody, Nextstep/Openstep, MacOS.\n* Support for shared libraries on 32-bit HPPA.\n* New integer functions: mpz_mul_si, mpz_odd_p, mpz_even_p.\n* New Kronecker symbol functions: mpz_kronecker_si, mpz_kronecker_ui,\n  mpz_si_kronecker, mpz_ui_kronecker.\n* New rational functions: mpq_out_str, mpq_swap.\n* New float functions: mpf_swap.\n* New mpn functions: mpn_divexact_by3c, mpn_tdiv_qr.\n* New EXPERIMENTAL function layer for accurate floating-point arithmetic, mpfr.\n  To try it, pass --enable-mpfr to configure.  See the mpfr subdirectory for\n  more information; it is not documented in the main GMP manual.\n\n\nChanges between GMP version 3.0 and 3.0.1\n\n* Memory leaks in gmp_randinit and mpz_probab_prime_p fixed.\n* Documentation for gmp_randinit fixed.  Misc documentation errors fixed.\n\n\nChanges between GMP version 2.0 and 3.0\n\n* Source level compatibility with past releases (except mpn_gcd).\n* Bug fixes.\n* Much improved speed thanks to both host independent and host dependent\n  optimizations.\n* Switch to autoconf/automake/libtool.\n* Support for building libgmp as a shared library.\n* Multiplication and squaring using 3-way Toom-Cook.\n* Division using the Burnikel-Ziegler method.\n* New functions computing binomial coefficients: mpz_bin_ui, mpz_bin_uiui.\n* New function computing Fibonacci numbers: mpz_fib_ui.\n* New random number generators: mpf_urandomb, mpz_rrandomb, mpz_urandomb,\n  mpz_urandomm, gmp_randclear, gmp_randinit, gmp_randinit_lc_2exp, gmp_randseed,\n  gmp_randseed_ui.\n* New function for quickly extracting limbs: mpz_getlimbn.\n* New functions performing integer size tests: mpz_fits_sint_p,\n  mpz_fits_slong_p, mpz_fits_sshort_p, mpz_fits_uint_p, mpz_fits_ulong_p,\n  mpz_fits_ushort_p.\n* New mpf functions: mpf_ceil, mpf_floor, mpf_pow_ui, mpf_trunc.\n* New mpq function: mpq_set_d.\n* New mpz functions: mpz_addmul_ui, mpz_cmpabs, mpz_cmpabs_ui, mpz_lcm,\n  mpz_nextprime, mpz_perfect_power_p, mpz_remove, mpz_root, mpz_swap,\n  mpz_tdiv_ui, mpz_tstbit, mpz_xor.\n* New mpn function: mpn_divexact_by3.\n* New CPU support: DEC Alpha 21264, AMD K6 and Athlon, HPPA 2.0 and 64,\n  Intel Pentium Pro and Pentium-II/III, Sparc 64, PowerPC 64.\n* Almost 10 times faster mpz_invert and mpn_gcdext.\n* The interface of mpn_gcd has changed.\n* Better support for MIPS R4x000 and R5000 under Irix 6.\n* Improved support for SPARCv8 and SPARCv9 processors.\n\n\nChanges between GMP version 2.0 and 2.0.2\n\n* Many bug fixes.\n\n\nChanges between GMP version 1.3.2 and 2.0\n\n* Division routines in the mpz class have changed.  There are three classes of\n  functions, that rounds the quotient to -infinity, 0, and +infinity,\n  respectively.  The first class of functions have names that begin with\n  mpz_fdiv (f is short for floor), the second class' names begin with mpz_tdiv\n  (t is short for trunc), and the third class' names begin with mpz_cdiv (c is\n  short for ceil).\n\n  The old division routines beginning with mpz_m are similar to the new\n  mpz_fdiv, with the exception that some of the new functions return useful\n  values.\n\n  The old function names can still be used.  All the old functions names will\n  now do floor division, not trunc division as some of them used to.  This was\n  changed to make the functions more compatible with common mathematical\n  practice.\n\n  The mpz_mod and mpz_mod_ui functions now compute the mathematical mod\n  function.  I.e., the sign of the 2nd argument is ignored.\n\n* The mpq assignment functions do not canonicalize their results.  A new\n  function, mpq_canonicalize must be called by the user if the result is not\n  known to be canonical.\n* The mpn functions are now documented.  These functions are intended for\n  very time critical applications, or applications that need full control over\n  memory allocation.  Note that the mpn interface is irregular and hard to\n  use.\n* New functions for arbitrary precision floating point arithmetic.  Names\n  begin with `mpf_'.  Associated type mpf_t.\n* New and improved mpz functions, including much faster GCD, fast exact\n  division (mpz_divexact), bit scan (mpz_scan0 and mpz_scan1), and number\n  theoretical functions like Jacobi (mpz_jacobi) and multiplicative inverse\n  (mpz_invert).\n* New variable types (mpz_t and mpq_t) are available that makes syntax of\n  mpz and mpq calls nicer (no need for & before variables).  The MP_INT and\n  MP_RAT types are still available for compatibility.\n* Uses GNU configure.  This makes it possible to choose target architecture\n  and CPU variant, and to compile into a separate object directory.\n* Carefully optimized assembly for important inner loops.  Support for DEC\n  Alpha, Amd 29000, HPPA 1.0 and 1.1, Intel Pentium and generic x86, Intel\n  i960, Motorola MC68000, MC68020, MC88100, and MC88110, Motorola/IBM\n  PowerPC, National NS32000, IBM POWER, MIPS R3000, R4000, SPARCv7,\n  SuperSPARC, generic SPARCv8, and DEC VAX.  Some support also for ARM,\n  Clipper, IBM ROMP (RT), and Pyramid AP/XP.\n* Faster.  Thanks to the assembler code, new algorithms, and general tuning.\n  In particular, the speed on machines without GCC is improved.\n* Support for machines without alloca.\n* Now under the LGPL.\n\nINCOMPATIBILITIES BETWEEN GMP 1 AND GMP 2\n\n* mpq assignment functions do not canonicalize their results.\n* mpz division functions round differently.\n* mpz mod functions now really compute mod.\n* mpz_powm and mpz_powm_ui now really use mod for reduction.\n\n\n\n----------------\nLocal variables:\nmode: text\nfill-column: 76\nEnd:\n"
  },
  {
    "path": "README",
    "content": "Copyright 1991, 1996, 1999, 2000 Free Software Foundation, Inc.\n\nCopyright 2008, 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n\n\n\n\n\n\t\t\tTHE MPIR LIBRARY\n\nMPIR is a fork of the GNU Multi Precision library (GMP -- see http://gmplib.org)\n\nMPIR is a library for arbitrary precision arithmetic, operating on signed\nintegers, rational numbers, and floating point numbers.  It has a rich set of\nfunctions, and the functions have a regular interface.\n\nMPIR is designed to be as fast as possible, both for small operands and huge\noperands.  The speed is achieved by using fullwords as the basic arithmetic\ntype, by using fast algorithms, with carefully optimized assembly code for the\nmost common inner loops for lots of CPUs, and by a general emphasis on speed\n(instead of simplicity or elegance).\n\nGMP/MPIR is believed to be faster than any other similar library.  Its advantage\nincreases with operand sizes for certain operations, since MPIR in many\ncases has asymptotically faster algorithms.\n\nMPIR is free software and may be freely copied on the terms contained in the\nfiles COPYING.LIB and COPYING (most of MPIR is under the former, some under\nthe latter).\n\n\n\n\t\t\tOVERVIEW OF MPIR\n\nThere are four classes of functions in MPIR.\n\n 1. Signed integer arithmetic functions (mpz).  These functions are intended\n    to be easy to use, with their regular interface.  The associated type is\n    `mpz_t'.\n\n 2. Rational arithmetic functions (mpq).  For now, just a small set of\n    functions necessary for basic rational arithmetics.  The associated type\n    is `mpq_t'.\n\n 3. Floating-point arithmetic functions (mpf).  If the C type `double'\n    doesn't give enough precision for your application, declare your\n    variables as `mpf_t' instead, set the precision to any number desired,\n    and call the functions in the mpf class for the arithmetic operations.\n\n 4. Positive-integer, hard-to-use, very low overhead functions are in the\n    mpn class.  No memory management is performed.  The caller must ensure\n    enough space is available for the results.  The set of functions is not\n    regular, nor is the calling interface.  These functions accept input\n    arguments in the form of pairs consisting of a pointer to the least\n    significant word, and an integral size telling how many limbs (= words)\n    the pointer points to.\n\n    Almost all calculations, in the entire package, are made by calling these\n    low-level functions.\n\nFor more information on how to use MPIR, please refer to the documentation.\nIt is composed from the file mpir.texi, and can be displayed on the screen or\nprinted.  How to do that, as well how to build the library, is described in\nthe INSTALL file in this directory.\n\n\n\n\t\t\tREPORTING BUGS\n\nIf you find a bug in the library, please make sure to tell us about it via\nthe GitHub issue tracker!\n\nReport bugs to our development list: http://groups.google.com/group/mpir-devel.  \nWhat information is needed in a good bug report is described in the manual.  \nThe same address can be used for suggesting modifications and enhancements.\n\n\n\n\n----------------\nLocal variables:\nmode: text\nfill-column: 78\nEnd:\n"
  },
  {
    "path": "acinclude.m4",
    "content": "dnl  GMP specific autoconf macros\n\n\ndnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software\ndnl  Foundation, Inc.\ndnl\ndnl  Copyright 2008 William Hart\ndnl\ndnl  This file is part of the MPIR Library.\ndnl\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\ndnl\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write to\ndnl  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\ndnl  MA 02110-1301, USA.\n\n\ndnl  Some tests use, or must delete, the default compiler output.  The\ndnl  possible filenames are based on what autoconf looks for, namely\ndnl\ndnl    a.out - normal unix style\ndnl    b.out - i960 systems, including gcc there\ndnl    a.exe - djgpp\ndnl    a_out.exe - OpenVMS DEC C called via GNV wrapper (gnv.sourceforge.net)\ndnl    conftest.exe - various DOS compilers\n\n\ndefine(IA64_PATTERN,\n[[ia64*-*-* | itanium-*-* | itanium2-*-*]])\n\ndefine(POWERPC64_PATTERN,\n[[powerpc64-*-* | powerpc64le-*-* | powerpc620-*-* | powerpc630-*-* | powerpc970-*-* | power[3-9]-*-*]])\n\ndefine(X86_PATTERN,\n[[i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | prescott-*-* | core-*-* | athlon-*-* | viac3*-*-*]])\n\ndefine(X86_64_PATTERN,\n[[x86_64-*-* | netburst-*-* | netburstlahf-*-* | k8-*-* | k10-*-* | k102-*-* | k103-*-* | core2-*-* | penryn-*-* | nehalem-*-* | westmere-*-* | sandybridge-*-* | atom-*-* | nano-*-* | bobcat-*-* | bulldozer-*-* | piledriver-*-* | ivybridge-*-* | haswell-*-*  | haswellavx-*-* | broadwell-*-* | skylake-*-* | skylakeavx-*-*]])\n\ndnl  GMP_FAT_SUFFIX(DSTVAR, DIRECTORY)\ndnl  ---------------------------------\ndnl  Emit code to set shell variable DSTVAR to the suffix for a fat binary\ndnl  routine from DIRECTORY.  DIRECTORY can be a shell expression like $foo\ndnl  etc.\ndnl\ndnl  The suffix is directory separators / or \\ changed to underscores, and\ndnl  if there's more than one directory part, then the first is dropped.\ndnl\ndnl  For instance,\ndnl\ndnl      x86         ->  x86\ndnl      x86/k6      ->  k6\ndnl      x86/k6/mmx  ->  k6_mmx\ndnl\talso want to turn x86_64w into x86_64\ndefine(GMP_FAT_SUFFIX,\n[\n\tif test \"$2\" = \"x86_64w\"; then \n\t\t[$1=\"x86_64\"]\n\telse\n\t\t[$1=`echo $2 | sed -e '/\\//s:^[^/]*/::' -e 's:[\\\\/]:_:g'`]\n\tfi\n])\n\n\ndnl  GMP_REMOVE_FROM_LIST(listvar,item)\ndnl  ----------------------------------\ndnl  Emit code to remove any occurance of ITEM from $LISTVAR.  ITEM can be a\ndnl  shell expression like $foo if desired.\n\ndefine(GMP_REMOVE_FROM_LIST,\n[remove_from_list_tmp=\nfor remove_from_list_i in $[][$1]; do\n  if test $remove_from_list_i = [$2]; then :;\n  else\n     remove_from_list_tmp=\"$remove_from_list_tmp $remove_from_list_i\"\n  fi\ndone\n[$1]=$remove_from_list_tmp\n])\n\n\ndnl  GMP_STRIP_PATH(subdir)\ndnl  ----------------------\ndnl  Strip entries */subdir from $path and $fat_path.\n\ndefine(GMP_STRIP_PATH,\n[GMP_STRIP_PATH_VAR(path, [$1])\nGMP_STRIP_PATH_VAR(fat_path, [$1])\n])\n\ndefine(GMP_STRIP_PATH_VAR,\n[tmp_path=\nfor i in $[][$1]; do\n  case $i in\n    */[$2]) ;;\n    *) tmp_path=\"$tmp_path $i\" ;;\n  esac\ndone\n[$1]=\"$tmp_path\"\n])\n\n\ndnl  GMP_INCLUDE_GMP_H\ndnl  -----------------\ndnl  Expand to the right way to #include gmp-h.in.  This must be used\ndnl  instead of mpir.h, since that file isn't generated until the end of the\ndnl  configure.\ndnl\ndnl  Dummy values for __GMP_BITS_PER_MP_LIMB and GMP_LIMB_BITS are enough\ndnl  for all current configure-time uses of mpir.h.\n\ndefine(GMP_INCLUDE_GMP_H,\n[[#define __GMP_WITHIN_CONFIGURE 1   /* ignore template stuff */\n#define GMP_NAIL_BITS $GMP_NAIL_BITS\n#define __GMP_BITS_PER_MP_LIMB 123 /* dummy for GMP_NUMB_BITS etc */\n#define GMP_LIMB_BITS 123\n$DEFN_LONG_LONG_LIMB\n#include \"$srcdir/gmp-h.in\"]\n])\n\n\ndnl  GMP_HEADER_GETVAL(NAME,FILE)\ndnl  ----------------------------\ndnl  Expand at autoconf time to the value of a \"#define NAME\" from the given\ndnl  FILE.  The regexps here aren't very rugged, but are enough for gmp.\ndnl  /dev/null as a parameter prevents a hang if $2 is accidentally omitted.\n\ndefine(GMP_HEADER_GETVAL,\n[patsubst(patsubst(\nesyscmd([grep \"^#define $1 \" $2 /dev/null 2>/dev/null]),\n[^.*$1[ \t]+],[]),\n[[\n \t]*$],[])])\n\n\ndnl  GMP_VERSION\ndnl  -----------\ndnl  The gmp version number, extracted from the #defines in gmp-h.in at\ndnl  autoconf time.  Two digits like 3.0 if patchlevel <= 0, or three digits\ndnl  like 3.0.1 if patchlevel > 0.\n\ndefine(GMP_VERSION,\n[GMP_HEADER_GETVAL(__GNU_MP_VERSION,gmp-h.in)[]dnl\n.GMP_HEADER_GETVAL(__GNU_MP_VERSION_MINOR,gmp-h.in)[]dnl\nifelse(m4_eval(GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp-h.in) > 0),1,\n[.GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp-h.in)])])\n\n\ndnl  MPIR_VERSION\ndnl  -----------\ndnl  The mpir version number, extracted from the #defines in gmp-h.in at\ndnl  autoconf time.\n\ndefine(MPIR_VERSION,\n[GMP_HEADER_GETVAL(__MPIR_VERSION,gmp-h.in)[]dnl\n.GMP_HEADER_GETVAL(__MPIR_VERSION_MINOR,gmp-h.in)[]dnl\n.GMP_HEADER_GETVAL(__MPIR_VERSION_PATCHLEVEL,gmp-h.in)])\n\n\ndnl  GMP_SUBST_CHECK_FUNCS(func,...)\ndnl  ------------------------------\ndnl  Setup an AC_SUBST of HAVE_FUNC_01 for each argument.\n\nAC_DEFUN([GMP_SUBST_CHECK_FUNCS],\n[m4_if([$1],,,\n[_GMP_SUBST_CHECK_FUNCS(ac_cv_func_[$1],HAVE_[]m4_translit([$1],[a-z],[A-Z])_01)\nGMP_SUBST_CHECK_FUNCS(m4_shift($@))])])\n\ndnl  Called: _GMP_SUBST_CHECK_FUNCS(cachevar,substvar)\nAC_DEFUN([_GMP_SUBST_CHECK_FUNCS],\n[case $[$1] in\nyes) AC_SUBST([$2],1) ;;\nno)  [$2]=0 ;;\nesac\n])\n\n\ndnl  GMP_SUBST_CHECK_HEADERS(foo.h,...)\ndnl  ----------------------------------\ndnl  Setup an AC_SUBST of HAVE_FOO_H_01 for each argument.\n\nAC_DEFUN([GMP_SUBST_CHECK_HEADERS],\n[m4_if([$1],,,\n[_GMP_SUBST_CHECK_HEADERS(ac_cv_header_[]m4_translit([$1],[./],[__]),\nHAVE_[]m4_translit([$1],[a-z./],[A-Z__])_01)\nGMP_SUBST_CHECK_HEADERS(m4_shift($@))])])\n\ndnl  Called: _GMP_SUBST_CHECK_HEADERS(cachevar,substvar)\nAC_DEFUN([_GMP_SUBST_CHECK_HEADERS],\n[case $[$1] in\nyes) AC_SUBST([$2],1) ;;\nno)  [$2]=0 ;;\nesac\n])\n\n\ndnl  GMP_COMPARE_GE(A1,B1, A2,B2, ...)\ndnl  ---------------------------------\ndnl  Compare two version numbers A1.A2.etc and B1.B2.etc.  Set\ndnl  $gmp_compare_ge to yes or no accoring to the result.  The A parts\ndnl  should be variables, the B parts fixed numbers.  As many parts as\ndnl  desired can be included.  An empty string in an A part is taken to be\ndnl  zero, the B parts should be non-empty and non-zero.\ndnl\ndnl  For example,\ndnl\ndnl      GMP_COMPARE($major,10, $minor,3, $subminor,1)\ndnl\ndnl  would test whether $major.$minor.$subminor is greater than or equal to\ndnl  10.3.1.\n\nAC_DEFUN([GMP_COMPARE_GE],\n[gmp_compare_ge=no\nGMP_COMPARE_GE_INTERNAL($@)\n])\n\nAC_DEFUN([GMP_COMPARE_GE_INTERNAL],\n[ifelse(len([$3]),0,\n[if test -n \"$1\" && test \"$1\" -ge $2; then\n  gmp_compare_ge=yes\nfi],\n[if test -n \"$1\"; then\n  if test \"$1\" -gt $2; then\n    gmp_compare_ge=yes\n  else\n    if test \"$1\" -eq $2; then\n      GMP_COMPARE_GE_INTERNAL(m4_shift(m4_shift($@)))\n    fi\n  fi\nfi])\n])\n  \n\ndnl  GMP_PROG_AR\ndnl  -----------\ndnl  GMP additions to $AR.\ndnl\ndnl  A cross-\"ar\" may be necessary when cross-compiling since the build\ndnl  system \"ar\" might try to interpret the object files to build a symbol\ndnl  table index, hence the use of AC_CHECK_TOOL.\ndnl\ndnl  A user-selected $AR is always left unchanged.  AC_CHECK_TOOL is still\ndnl  run to get the \"checking\" message printed though.\ndnl\ndnl  If extra flags are added to AR, then ac_cv_prog_AR and\ndnl  ac_cv_prog_ac_ct_AR are set too, since libtool (cvs 2003-03-31 at\ndnl  least) will do an AC_CHECK_TOOL and that will AR from one of those two\ndnl  cached variables.  (ac_cv_prog_AR is used if there's an ac_tool_prefix,\ndnl  or ac_cv_prog_ac_ct_AR is used otherwise.)  FIXME: This is highly\ndnl  dependent on autoconf internals, perhaps it'd work to put our extra\ndnl  flags into AR_FLAGS instead.\ndnl\ndnl  $AR_FLAGS is set to \"cq\" rather than leaving it to libtool \"cru\".  The\ndnl  latter fails when libtool goes into piecewise mode and is unlucky\ndnl  enough to have two same-named objects in separate pieces, as happens\ndnl  for instance to random.o (and others) on vax-dec-ultrix4.5.  Naturally\ndnl  a user-selected $AR_FLAGS is left unchanged.\ndnl\ndnl  For reference, $ARFLAGS is used by automake (1.8) for its \".a\" archive\ndnl  file rules.  This doesn't get used by the piecewise linking, so we\ndnl  leave it at the default \"cru\".\ndnl\ndnl  FIXME: Libtool 1.5.2 has its own arrangments for \"cq\", but that version\ndnl  is broken in other ways.  When we can upgrade, remove the forcible\ndnl  AR_FLAGS=cq.\n\nAC_DEFUN([GMP_PROG_AR],\n[dnl  Want to establish $AR before libtool initialization.\nAC_BEFORE([$0],[AC_PROG_LIBTOOL])\ngmp_user_AR=$AR\nAC_CHECK_TOOL(AR, ar, ar)\nif test -z \"$gmp_user_AR\"; then\n                        eval arflags=\\\"\\$ar${abi1}_flags\\\"\n  test -n \"$arflags\" || eval arflags=\\\"\\$ar${abi2}_flags\\\"\n  if test -n \"$arflags\"; then\n    AC_MSG_CHECKING([for extra ar flags])\n    AR=\"$AR $arflags\"\n    ac_cv_prog_AR=\"$AR $arflags\"\n    ac_cv_prog_ac_ct_AR=\"$AR $arflags\"\n    AC_MSG_RESULT([$arflags])\n  fi\nfi\nif test -z \"$AR_FLAGS\"; then\n  AR_FLAGS=cq\nfi\n])\n\n\ndnl  GMP_PROG_M4\ndnl  -----------\ndnl  Find a working m4, either in $PATH or likely locations, and setup $M4\ndnl  and an AC_SUBST accordingly.  If $M4 is already set then it's a user\ndnl  choice and is accepted with no checks.  GMP_PROG_M4 is like\ndnl  AC_PATH_PROG or AC_CHECK_PROG, but tests each m4 found to see if it's\ndnl  good enough.\ndnl \ndnl  See mpn/asm-defs.m4 for details on the known bad m4s.\n\nAC_DEFUN([GMP_PROG_M4],\n[AC_ARG_VAR(M4,[m4 macro processor])\nAC_CACHE_CHECK([for suitable m4],\n                gmp_cv_prog_m4,\n[if test -n \"$M4\"; then\n  gmp_cv_prog_m4=\"$M4\"\nelse\n  cat >conftest.m4 <<\\EOF\ndnl  Must protect this against being expanded during autoconf m4!\ndnl  Dont put \"dnl\"s in this as autoconf will flag an error for unexpanded\ndnl  macros.\n[define(dollarhash,``$][#'')ifelse(dollarhash(x),1,`define(t1,Y)',\n``bad: $][# not supported (SunOS /usr/bin/m4)\n'')ifelse(eval(89),89,`define(t2,Y)',\n`bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4)\n')ifelse(t1`'t2,YY,`good\n')]\nEOF\ndnl ' <- balance the quotes for emacs sh-mode\n  echo \"trying m4\" >&AC_FD_CC\n  gmp_tmp_val=`(m4 conftest.m4) 2>&AC_FD_CC`\n  echo \"$gmp_tmp_val\" >&AC_FD_CC\n  if test \"$gmp_tmp_val\" = good; then\n    gmp_cv_prog_m4=\"m4\"\n  else\n    IFS=\"${IFS= \t}\"; ac_save_ifs=\"$IFS\"; IFS=\":\"\ndnl $ac_dummy forces splitting on constant user-supplied paths.\ndnl POSIX.2 word splitting is done only on the output of word expansions,\ndnl not every word.  This closes a longstanding sh security hole.\n    ac_dummy=\"$PATH:/usr/5bin\"\n    for ac_dir in $ac_dummy; do\n      test -z \"$ac_dir\" && ac_dir=.\n      echo \"trying $ac_dir/m4\" >&AC_FD_CC\n      gmp_tmp_val=`($ac_dir/m4 conftest.m4) 2>&AC_FD_CC`\n      echo \"$gmp_tmp_val\" >&AC_FD_CC\n      if test \"$gmp_tmp_val\" = good; then\n        gmp_cv_prog_m4=\"$ac_dir/m4\"\n        break\n      fi\n    done\n    IFS=\"$ac_save_ifs\"\n    if test -z \"$gmp_cv_prog_m4\"; then\n      AC_MSG_ERROR([No usable m4 in \\$PATH or /usr/5bin (see config.log for reasons).])\n    fi\n  fi\n  rm -f conftest.m4\nfi])\nM4=\"$gmp_cv_prog_m4\"\nAC_SUBST(M4)\n])\n\n\ndnl  GMP_M4_M4WRAP_SPURIOUS\ndnl  ----------------------\ndnl  Check for spurious output from m4wrap(), as described in mpn/asm-defs.m4.\ndnl\ndnl  The following systems have been seen with the problem.\ndnl\ndnl  - MacOS X Darwin, its assembler fails.\ndnl  - NetBSD 1.4.1 m68k, and gas 1.92.3 there gives a warning and ignores\ndnl    the bad last line since it doesn't have a newline.\ndnl  - NetBSD 1.4.2 alpha, but its assembler doesn't seem to mind.\ndnl  - HP-UX ia64.\ndnl\ndnl  Enhancement: Maybe this could be in GMP_PROG_M4, and attempt to prefer\ndnl  an m4 with a working m4wrap, if it can be found.\n\nAC_DEFUN([GMP_M4_M4WRAP_SPURIOUS],\n[AC_REQUIRE([GMP_PROG_M4])\nAC_CACHE_CHECK([if m4wrap produces spurious output],\n               gmp_cv_m4_m4wrap_spurious,\n[# hide the d-n-l from autoconf's error checking\ntmp_d_n_l=d\"\"nl\ncat >conftest.m4 <<EOF\n[changequote({,})define(x,)m4wrap({x})$tmp_d_n_l]\nEOF\necho test input is >&AC_FD_CC\ncat conftest.m4 >&AC_FD_CC\ntmp_chars=`$M4 conftest.m4 | wc -c`\necho produces $tmp_chars chars output >&AC_FD_CC\nrm -f conftest.m4\nif test $tmp_chars = 0; then\n  gmp_cv_m4_m4wrap_spurious=no\nelse\n  gmp_cv_m4_m4wrap_spurious=yes\nfi\n])\nGMP_DEFINE_RAW([\"define(<M4WRAP_SPURIOUS>,<$gmp_cv_m4_m4wrap_spurious>)\"])\n])\n\n\ndnl  GMP_PROG_NM\ndnl  -----------\ndnl  GMP additions to libtool AC_PROG_NM.\ndnl\ndnl  Note that if AC_PROG_NM can't find a working nm it still leaves\ndnl  $NM set to \"nm\", so $NM can't be assumed to actually work.\ndnl\ndnl  A user-selected $NM is always left unchanged.  AC_PROG_NM is still run\ndnl  to get the \"checking\" message printed though.\ndnl\ndnl  Perhaps it'd be worthwhile checking that nm works, by running it on an\ndnl  actual object file.  For instance on sparcv9 solaris old versions of\ndnl  GNU nm don't recognise 64-bit objects.  Checking would give a better\ndnl  error message than just a failure in later tests like GMP_ASM_W32 etc.\ndnl\ndnl  On the other hand it's not really normal autoconf practice to take too\ndnl  much trouble over detecting a broken set of tools.  And libtool doesn't\ndnl  do anything at all for say ranlib or strip.  So for now we're inclined\ndnl  to just demand that the user provides a coherent environment.\n\nAC_DEFUN([GMP_PROG_NM],\n[dnl  Make sure we're the first to call AC_PROG_NM, so our extra flags are\ndnl   used by everyone.\nAC_BEFORE([$0],[AC_PROG_NM])\ngmp_user_NM=$NM\nAC_PROG_NM\n\n# FIXME: When cross compiling (ie. $ac_tool_prefix not empty), libtool\n# defaults to plain \"nm\" if a \"${ac_tool_prefix}nm\" is not found.  In this\n# case run it again to try the native \"nm\", firstly so that likely locations\n# are searched, secondly so that -B or -p are added if necessary for BSD\n# format.  This is necessary for instance on OSF with \"./configure\n# --build=alphaev5-dec-osf --host=alphaev6-dec-osf\".\n#\nif test -z \"$gmp_user_NM\" && test -n \"$ac_tool_prefix\" && test \"$NM\" = nm; then\n  $as_unset lt_cv_path_NM\n  gmp_save_ac_tool_prefix=$ac_tool_prefix\n  ac_tool_prefix=\n  NM=\n  AC_PROG_NM\n  ac_tool_prefix=$gmp_save_ac_tool_prefix\nfi\n\nif test -z \"$gmp_user_NM\"; then\n                        eval nmflags=\\\"\\$nm${abi1}_flags\\\"\n  test -n \"$nmflags\" || eval nmflags=\\\"\\$nm${abi2}_flags\\\"\n  if test -n \"$nmflags\"; then\n    AC_MSG_CHECKING([for extra nm flags])\n    NM=\"$NM $nmflags\"\n    AC_MSG_RESULT([$nmflags])\n  fi\nfi\n])\n\n\ndnl  GMP_PROG_CC_WORKS(cc+cflags,[ACTION-IF-WORKS][,ACTION-IF-NOT-WORKS])\ndnl  --------------------------------------------------------------------\ndnl  Check if cc+cflags can compile and link.\ndnl\ndnl  This test is designed to be run repeatedly with different cc+cflags\ndnl  selections, so the result is not cached.\ndnl\ndnl  For a native build, meaning $cross_compiling == no, we require that the\ndnl  generated program will run.  This is the same as AC_PROG_CC does in\ndnl  _AC_COMPILER_EXEEXT_WORKS, and checking here will ensure we don't pass\ndnl  a CC/CFLAGS combination that it rejects.\ndnl\ndnl  sparc-*-solaris2.7 can compile ABI=64 but won't run it if the kernel\ndnl  was booted in 32-bit mode.  The effect of requiring the compiler output\ndnl  will run is that a plain native \"./configure\" falls back on ABI=32, but\ndnl  ABI=64 is still available as a cross-compile.\ndnl\ndnl  The various specific problems we try to detect are done in separate\ndnl  compiles.  Although this is probably a bit slower than one test\ndnl  program, it makes it easy to indicate the problem in AC_MSG_RESULT,\ndnl  hence giving the user a clue about why we rejected the compiler.\n\nAC_DEFUN([GMP_PROG_CC_WORKS],\n[AC_MSG_CHECKING([compiler $1])\ngmp_prog_cc_works=yes\n\n# first see a simple \"main()\" works, then go on to other checks\nGMP_PROG_CC_WORKS_PART([$1], [])\n\nGMP_PROG_CC_WORKS_PART_MAIN([$1], [gcc-4.3.2 on 64-bit is bad , try -O1 or -fno-strict-aliasing for the flags],\n[/* The following aborts with gcc-4.3.2 on a 64-bit system which is an unusable compiler */\n#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__)\nint __attribute__((noinline))\nfoo(int i)\n{\n  int *p = __builtin_malloc (4 * sizeof(int));\n  *p = 0;\n  p[i] = 1;\n  return *p;\n}\nextern void abort (void);\nint main()\n{\n   if (foo(0) != 1)\n        abort ();\n        return 0;\n}\n#else\nint main(){return 0;}\n#endif\n])\n\nGMP_PROG_CC_WORKS_PART([$1], [function pointer return],\n[/* The following provokes an internal error from gcc 2.95.2 -mpowerpc64\n   (without -maix64), hence detecting an unusable compiler */\nvoid *g() { return (void *) 0; }\nvoid *f() { return g(); }\n])\n\nGMP_PROG_CC_WORKS_PART([$1], [cmov instruction],\n[/* The following provokes an invalid instruction syntax from i386 gcc\n   -march=pentiumpro on Solaris 2.8.  The native sun assembler\n   requires a non-standard syntax for cmov which gcc (as of 2.95.2 at\n   least) doesn't know.  */\nint n;\nint cmov () { return (n >= 0 ? n : 0); }\n])\n\nGMP_PROG_CC_WORKS_PART_MAIN([$1], [double -> ulong conversion],\n[/* The following provokes a linker invocation problem with gcc 3.0.3\n   on AIX 4.3 under \"-maix64 -mpowerpc64 -mcpu=630\".  The -mcpu=630\n   option causes gcc to incorrectly select the 32-bit libgcc.a, not\n   the 64-bit one, and consequently it misses out on the __fixunsdfdi\n   helper (double -> uint64 conversion).\n   This also provokers errors on x86 when AVX instructions are\n   generated but not understood by the assembler or processor.*/\nvolatile double d;\nvolatile unsigned long u;\nint main() { d = 0.1; u = (unsigned long)d; return (int)u; }\n])\n\nGMP_PROG_CC_WORKS_PART_MAIN([$1], [double negation],\n[/* The following provokes an error from hppa gcc 2.95 under -mpa-risc-2-0 if\n   the assembler doesn't know hppa 2.0 instructions.  fneg is a 2.0\n   instruction, and a negation like this comes out using it.  */\nvolatile double d;\nvolatile double d2;\nint main() { d = -0.1; d2 = -d; return (int)d2; }\n])\n\nGMP_PROG_CC_WORKS_PART_MAIN([$1], [double -> float conversion],\n[/* The following makes gcc 3.3 -march=pentium4 generate an SSE2 xmm insn\n   (cvtsd2ss) which will provoke an error if the assembler doesn't recognise\n   those instructions.  Not sure how much of the gmp code will come out\n   wanting sse2, but it's easiest to reject an option we know is bad.  */\nvolatile double d;\nvolatile float f;\nint main() { d = 0.1; f = (float)d; return (int)f; }\n])\n\nGMP_PROG_CC_WORKS_PART_MAIN([$1], [unsigned long/double division],\n[/* The following generates a vmovd instruction on Sandy Bridge.\n   Check that the assembler knows this instruction. */\nvolatile unsigned long a;\nvolatile double b;\nint main()\n{ a = 1; b = 3; return (int)(a/b); }\n])\n\n# __builtin_alloca is not available everywhere, check it exists before\n# seeing that it works\nGMP_PROG_CC_WORKS_PART_TEST([$1],[__builtin_alloca availability],\n[int k; int foo () { __builtin_alloca (k); }],\n  [GMP_PROG_CC_WORKS_PART([$1], [alloca array],\n[/* The following provokes an internal compiler error from Itanium HP-UX cc\n    under +O2 or higher.  We use this sort of code in mpn/generic/mul_fft.c. */\nint k;\nint foo ()\n{\n  int i, **a;\n  a = __builtin_alloca (k);\n  for (i = 0; i <= k; i++)\n    a[i] = __builtin_alloca (1 << i);\n}\n])])\n\nGMP_PROG_CC_WORKS_PART([$1], [long long reliability test 1],\n[/* The following provokes a segfault in the compiler on powerpc-apple-darwin.\n   Extracted from tests/mpn/t-iord_u.c.  Causes Apple's gcc 3.3 build 1640 and\n   1666 to segfault with, e.g., -O2 -mpowerpc64.  */\n\n#if defined(__GNUC__) && !defined(__clang__)\ntypedef unsigned long long t1;typedef t1*t2;\n#if defined(__GNUC_STDC_INLINE__)  /* e.g. GCC 5.x defaults to this, not __GNUC_GNU_INLINE__ */\nextern\n#endif\n__inline__ t1 e(t2 rp,t2 up,int n,t1 v0)\n{t1 c,x,r;int i;if(v0){c=1;for(i=1;i<n;i++){x=up[i];r=x+1;rp[i]=r;}}return c;}\nvoid f(){static const struct{t1 n;t1 src[9];t1 want[9];}d[]={{1,{0},{1}},};t1 got[9];int i;\nvoid h(){} void g(){}\nfor(i=0;i<1;i++){if(e(got,got,9,d[i].n)==0)h();g(i,d[i].src,d[i].n,got,d[i].want,9);if(d[i].n)h();}}\n#else\nint dummy;\n#endif\n])\n\nGMP_PROG_CC_WORKS_PART([$1], [long long reliability test 2],\n[/* The following provokes an internal compiler error on powerpc-apple-darwin.\n   Extracted from mpz/cfdiv_q_2exp.c.  Causes Apple's gcc 3.3 build 1640 and\n   1666 to get an ICE with -O1 -mpowerpc64.  */\n\n#ifdef __GNUC__\nextern int g();\nvoid f(int u){int i;long long x;x=u?~0:0;if(x)for(i=0;i<9;i++);x&=g();if(x)g();}\nint g(){return 0;}\n#else\nint dummy;\n#endif\n])\n\nGMP_PROG_CC_WORKS_PART_MAIN([$1], [mpn_lshift_com optimization],\n[/* The following is mis-compiled by HP ia-64 cc version\n        cc: HP aC++/ANSI C B3910B A.05.55 [Dec 04 2003]\n   under \"cc +O3\", both in +DD32 and +DD64 modes.  The mpn_lshift_com gets\n   inlined and its return value somehow botched to be 0 instead of 1.  This\n   arises in the real mpn_lshift_com in mul_fft.c.  A lower optimization\n   level, like +O2 seems ok.  This code needs to be run to show the problem,\n   but that's fine, the offending cc is a native-only compiler so we don't\n   have to worry about cross compiling.  */\n\nunsigned long\nlshift_com (rp, up, n, cnt)\n  unsigned long *rp;\n  unsigned long *up;\n  long n;\n  unsigned cnt;\n{\n  unsigned long retval, high_limb, low_limb;\n  unsigned tnc;\n  long i;\n  tnc = 8 * sizeof (unsigned long) - cnt;\n  low_limb = *up++;\n  retval = low_limb >> tnc;\n  high_limb = low_limb << cnt;\n  for (i = n - 1; i != 0; i--)\n    {\n      low_limb = *up++;\n      *rp++ = ~(high_limb | (low_limb >> tnc));\n      high_limb = low_limb << cnt;\n    }\n  return retval;\n}\nint\nmain ()\n{\n  unsigned long cy, rp[2], up[2];\n  up[0] = ~ 0L;\n  up[1] = 0;\n  cy = lshift_com (rp, up, 2L, 1);\n  if (cy != 1L)\n    return 1;\n  return 0;\n}\n])\n\nGMP_PROG_CC_WORKS_PART_MAIN([$1], [mpn_lshift_com optimization 2],\n[/* The following is mis-compiled by Intel ia-64 icc version 1.8 under\n    \"icc -O3\",  After several calls, the function writes parial garbage to\n    the result vector.  Perhaps relates to the chk.a.nc insn.  This code needs\n    to be run to show the problem, but that's fine, the offending cc is a\n    native-only compiler so we don't have to worry about cross compiling.  */\n\n#include <stdlib.h>\nvoid\nlshift_com (rp, up, n, cnt)\n  unsigned long *rp;\n  unsigned long *up;\n  long n;\n  unsigned cnt;\n{\n  unsigned long high_limb, low_limb;\n  unsigned tnc;\n  long i;\n  up += n;\n  rp += n;\n  tnc = 8 * sizeof (unsigned long) - cnt;\n  low_limb = *--up;\n  high_limb = low_limb << cnt;\n  for (i = n - 1; i != 0; i--)\n    {\n      low_limb = *--up;\n      *--rp = ~(high_limb | (low_limb >> tnc));\n      high_limb = low_limb << cnt;\n    }\n  *--rp = ~high_limb;\n}\nint\nmain ()\n{\n  unsigned long *r, *r2;\n  unsigned long a[88 + 1];\n  long i;\n  for (i = 0; i < 88 + 1; i++)\n    a[i] = ~0L;\n  r = malloc (10000 * sizeof (unsigned long));\n  r2 = r;\n  for (i = 0; i < 528; i += 22)\n    {\n      lshift_com (r2, a,\n\t\t  i / (8 * sizeof (unsigned long)) + 1,\n\t\t  i % (8 * sizeof (unsigned long)));\n      r2 += 88 + 1;\n    }\n  if (r[2048] != 0 || r[2049] != 0 || r[2050] != 0 || r[2051] != 0 ||\n      r[2052] != 0 || r[2053] != 0 || r[2054] != 0)\n    abort ();\n  return 0;\n}\n])\n\n\n# A certain _GLOBAL_OFFSET_TABLE_ problem in past versions of gas, tickled\n# by recent versions of gcc.\n#\nif test \"$gmp_prog_cc_works\" = yes; then\n  case $host in\n    X86_PATTERN)\n      # this problem only arises in PIC code, so don't need to test when\n      # --disable-shared.  We don't necessarily have $enable_shared set to\n      # yes at this point, it will still be unset for the default (which is\n      # yes); hence the use of \"!= no\".\n      if test \"$enable_shared\" != no; then\n        GMP_PROG_CC_X86_GOT_EAX_EMITTED([$1],\n          [GMP_ASM_X86_GOT_EAX_OK([$1],,\n            [gmp_prog_cc_works=\"no, bad gas GOT with eax\"])])\n      fi\n      ;;\n  esac\nfi\n\nAC_MSG_RESULT($gmp_prog_cc_works)\ncase $gmp_prog_cc_works in\n  yes)\n    [$2]\n    ;;\n  *)\n    [$3]\n    ;;\nesac\n])\n\ndnl  Called: GMP_PROG_CC_WORKS_PART(CC+CFLAGS,FAIL-MESSAGE [,CODE])\ndnl  A dummy main() is appended to the CODE given.\ndnl\nAC_DEFUN([GMP_PROG_CC_WORKS_PART],\n[GMP_PROG_CC_WORKS_PART_MAIN([$1],[$2],\n[$3]\n[int main () { return 0; }])\n])\n\ndnl  Called: GMP_PROG_CC_WORKS_PART_MAIN(CC+CFLAGS,FAIL-MESSAGE,CODE)\ndnl  CODE must include a main().\ndnl\nAC_DEFUN([GMP_PROG_CC_WORKS_PART_MAIN],\n[GMP_PROG_CC_WORKS_PART_TEST([$1],[$2],[$3],\n  [],\n  gmp_prog_cc_works=\"no[]m4_if([$2],,,[[, ]])[$2]\",\n  gmp_prog_cc_works=\"no[]m4_if([$2],,,[[, ]])[$2][[, program does not run]]\")\n])\n\ndnl  Called: GMP_PROG_CC_WORKS_PART_TEST(CC+CFLAGS,TITLE,[CODE],\ndnl            [ACTION-GOOD],[ACTION-BAD][ACTION-NORUN])\ndnl\nAC_DEFUN([GMP_PROG_CC_WORKS_PART_TEST],\n[if test \"$gmp_prog_cc_works\" = yes; then\n  # remove anything that might look like compiler output to our \"||\" expression\n  rm -f conftest* a.out b.out a.exe a_out.exe\n  cat >conftest.c <<EOF\n[$3]\nEOF\n  echo \"Test compile: [$2]\" >&AC_FD_CC\n  gmp_compile=\"$1 conftest.c >&AC_FD_CC\"\n  if AC_TRY_EVAL(gmp_compile); then\n    cc_works_part=yes\n    if test \"$cross_compiling\" = no; then\n      if AC_TRY_COMMAND([./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest]); then :;\n      else\n        cc_works_part=norun\n      fi\n    fi\n  else\n    cc_works_part=no\n  fi\n  if test \"$cc_works_part\" != yes; then\n    echo \"failed program was:\" >&AC_FD_CC\n    cat conftest.c >&AC_FD_CC\n  fi\n  rm -f conftest* a.out b.out a.exe a_out.exe\n  case $cc_works_part in\n    yes)\n      $4\n      ;;\n    no)\n      $5\n      ;;\n    norun)\n      $6\n      ;;\n  esac\nfi\n])\n\n\ndnl  GMP_PROG_CC_WORKS_LONGLONG(cc+cflags,[ACTION-YES][,ACTION-NO])\ndnl  --------------------------------------------------------------\ndnl  Check that cc+cflags accepts \"long long\".\ndnl\ndnl  This test is designed to be run repeatedly with different cc+cflags\ndnl  selections, so the result is not cached.\n\nAC_DEFUN([GMP_PROG_CC_WORKS_LONGLONG],\n[AC_MSG_CHECKING([compiler $1 has long long])\ncat >conftest.c <<EOF\nlong long  foo;\nlong long  bar () { return foo; }\nint main () { return 0; }\nEOF\ngmp_prog_cc_works=no\ngmp_compile=\"$1 -c conftest.c >&AC_FD_CC\"\nif AC_TRY_EVAL(gmp_compile); then\n  gmp_prog_cc_works=yes\nelse\n  echo \"failed program was:\" >&AC_FD_CC\n  cat conftest.c >&AC_FD_CC\nfi\nrm -f conftest* a.out b.out a.exe a_out.exe\nAC_MSG_RESULT($gmp_prog_cc_works)\nif test $gmp_prog_cc_works = yes; then\n  ifelse([$2],,:,[$2])\nelse\n  ifelse([$3],,:,[$3])\nfi\n])\n\n\ndnl  GMP_C_TEST_SIZEOF(cc/cflags,test,[ACTION-GOOD][,ACTION-BAD])\ndnl  ------------------------------------------------------------\ndnl  The given cc/cflags compiler is run to check the size of a type\ndnl  specified by the \"test\" argument.  \"test\" can either be a string, or a\ndnl  variable like $foo.  The value should be for instance \"sizeof-long-4\",\ndnl  to test that sizeof(long)==4.\ndnl\ndnl  This test is designed to be run for different compiler and/or flags\ndnl  combinations, so the result is not cached.\ndnl\ndnl  The idea for making an array that has a negative size if the desired\ndnl  condition test is false comes from autoconf AC_CHECK_SIZEOF.  The cast\ndnl  to \"long\" in the array dimension also follows autoconf, apparently it's\ndnl  a workaround for a HP compiler bug.\n\nAC_DEFUN([GMP_C_TEST_SIZEOF],\n[echo \"configure: testlist $2\" >&AC_FD_CC\n[gmp_sizeof_type=`echo \"$2\" | sed 's/sizeof-\\([a-z]*\\).*/\\1/'`]\n[gmp_sizeof_want=`echo \"$2\" | sed 's/sizeof-[a-z]*-\\([0-9]*\\).*/\\1/'`]\nAC_MSG_CHECKING([compiler $1 has sizeof($gmp_sizeof_type)==$gmp_sizeof_want])\ncat >conftest.c <<EOF\n[int\nmain ()\n{\n  static int test_array [1 - 2 * (long) (sizeof ($gmp_sizeof_type) != $gmp_sizeof_want)];\n  test_array[0] = 0;\n  return 0;\n}]\nEOF\ngmp_c_testlist_sizeof=no\ngmp_compile=\"$1 -c conftest.c >&AC_FD_CC\"\nif AC_TRY_EVAL(gmp_compile); then\n  gmp_c_testlist_sizeof=yes\nfi\nrm -f conftest*\nAC_MSG_RESULT($gmp_c_testlist_sizeof)\nif test $gmp_c_testlist_sizeof = yes; then\n  ifelse([$3],,:,[$3])\nelse\n  ifelse([$4],,:,[$4])\nfi\n])\n\n\ndnl  GMP_PROG_CC_IS_GNU(CC,[ACTIONS-IF-YES][,ACTIONS-IF-NO])\ndnl  -------------------------------------------------------\ndnl  Determine whether the given compiler is GNU C.\ndnl\ndnl  This test is the same as autoconf _AC_LANG_COMPILER_GNU, but doesn't\ndnl  cache the result.  The same \"ifndef\" style test is used, to avoid\ndnl  problems with syntax checking cpp's used on NeXT and Apple systems.\n\nAC_DEFUN([GMP_PROG_CC_IS_GNU],\n[cat >conftest.c <<EOF\n#if ! defined (__GNUC__) || defined (__INTEL_COMPILER) || defined (__PATHCC__)\n  choke me\n#endif\nEOF\ngmp_compile=\"$1 -c conftest.c >&AC_FD_CC\"\nif AC_TRY_EVAL(gmp_compile); then\n  rm -f conftest*\n  AC_MSG_CHECKING([whether $1 is gcc])\n  AC_MSG_RESULT(yes)\n  ifelse([$2],,:,[$2])\nelse\n  rm -f conftest*\n  ifelse([$3],,:,[$3])\nfi\n])\n\n\ndnl  GMP_PROG_CC_IS_XLC(CC,[ACTIONS-IF-YES][,ACTIONS-IF-NO])\ndnl  -------------------------------------------------------\ndnl  Determine whether the given compiler is IBM xlc (on AIX).\ndnl\ndnl  There doesn't seem to be a preprocessor symbol to test for this, or if\ndnl  there is one then it's well hidden in xlc 3.1 on AIX 4.3, so just grep\ndnl  the man page printed when xlc is invoked with no arguments.\n\nAC_DEFUN([GMP_PROG_CC_IS_XLC],\n[gmp_command=\"$1 2>&1 | grep xlc >/dev/null\"\nif AC_TRY_EVAL(gmp_command); then\n  AC_MSG_CHECKING([whether $1 is xlc])\n  AC_MSG_RESULT(yes)\n  ifelse([$2],,:,[$2])\nelse\n  ifelse([$3],,:,[$3])\nfi\n])\n\n\ndnl  GMP_PROG_CC_X86_GOT_EAX_EMITTED(CC+CFLAGS, [ACTION-YES] [, ACTION-NO])\ndnl  ----------------------------------------------------------------------\ndnl  Determine whether CC+CFLAGS emits instructions using %eax with\ndnl  _GLOBAL_OFFSET_TABLE_.  This test is for use on x86 systems.\ndnl\ndnl  Recent versions of gcc will use %eax for the GOT in leaf functions, for\ndnl  instance gcc 3.3.3 with -O3.  This avoids having to save and restore\ndnl  %ebx which otherwise usually holds the GOT, and is what gcc used in the\ndnl  past.\ndnl\ndnl  %ecx and %edx are also candidates for this sort of optimization, and\ndnl  are used under lesser optimization levels, like -O2 in 3.3.3.  FIXME:\ndnl  It's not quite clear what the conditions for using %eax are, we might\ndnl  need more test code to provoke it.\ndnl\ndnl  The motivation for this test is that past versions of gas have bugs\ndnl  affecting this usage, see GMP_ASM_X86_GOT_EAX_OK.\ndnl\ndnl  This test is not specific to gcc, other compilers might emit %eax GOT\ndnl  insns like this, though we've not investigated that.\ndnl\ndnl  This is for use by compiler probing in GMP_PROG_CC_WORKS, so we doesn't\ndnl  cache the result.\ndnl\ndnl  -fPIC is hard coded here, because this test is for use before libtool\ndnl  has established the pic options.  It's right for gcc, but perhaps not\ndnl  other compilers.\n\nAC_DEFUN([GMP_PROG_CC_X86_GOT_EAX_EMITTED],\n[echo \"Testing gcc GOT with eax emitted\" >&AC_FD_CC\ncat >conftest.c <<\\EOF\n[int foo;\nint bar () { return foo; }\n]EOF\ntmp_got_emitted=no\ngmp_compile=\"$1 -fPIC -S conftest.c >&AC_FD_CC 2>&1\"\nif AC_TRY_EVAL(gmp_compile); then\n  if grep \"addl.*_GLOBAL_OFFSET_TABLE_.*eax\" conftest.s >/dev/null; then\n    tmp_got_emitted=yes\n  fi\nfi\nrm -f conftest.*\necho \"Result: $tmp_got_emitted\" >&AC_FD_CC\nif test \"$tmp_got_emitted\" = yes; then\n  ifelse([$2],,:,[$2])\nelse\n  ifelse([$3],,:,[$3])\nfi\n])\n\n\ndnl  GMP_HPC_HPPA_2_0(cc,[ACTION-IF-GOOD][,ACTION-IF-BAD])\ndnl  ---------------------------------------------------------\ndnl  Find out whether a HP compiler is good enough to generate hppa 2.0.\ndnl\ndnl  This test might be repeated for different compilers, so the result is\ndnl  not cached.\n\nAC_DEFUN([GMP_HPC_HPPA_2_0],\n[AC_MSG_CHECKING([whether HP compiler $1 is good for 64-bits])\n# Bad compiler output:\n#   ccom: HP92453-01 G.10.32.05 HP C Compiler\n# Good compiler output:\n#   ccom: HP92453-01 A.10.32.30 HP C Compiler\n# Let A.10.32.30 or higher be ok.\necho >conftest.c\ngmp_tmp_vs=`$1 $2 -V -c -o conftest.$OBJEXT conftest.c 2>&1 | grep \"^ccom:\"`\necho \"Version string: $gmp_tmp_vs\" >&AC_FD_CC\nrm conftest*\ngmp_tmp_v1=`echo $gmp_tmp_vs | sed 's/.* .\\.\\([[0-9]]*\\).*/\\1/'`\ngmp_tmp_v2=`echo $gmp_tmp_vs | sed 's/.* .\\..*\\.\\(.*\\)\\..* HP C.*/\\1/'`\ngmp_tmp_v3=`echo $gmp_tmp_vs | sed 's/.* .\\..*\\..*\\.\\(.*\\) HP C.*/\\1/'`\necho \"Version number: $gmp_tmp_v1.$gmp_tmp_v2.$gmp_tmp_v3\" >&AC_FD_CC\nif test -z \"$gmp_tmp_v1\"; then\n  gmp_hpc_64bit=not-applicable\nelse\n  GMP_COMPARE_GE($gmp_tmp_v1, 10, $gmp_tmp_v2, 32, $gmp_tmp_v3, 30)\n  gmp_hpc_64bit=$gmp_compare_ge\nfi\nAC_MSG_RESULT($gmp_hpc_64bit)\nif test $gmp_hpc_64bit = yes; then\n  ifelse([$2],,:,[$2])\nelse\n  ifelse([$3],,:,[$3])\nfi\n])\n\n\ndnl  GMP_GCC_ARM_UMODSI(CC,[ACTIONS-IF-GOOD][,ACTIONS-IF-BAD])\ndnl  ---------------------------------------------------------\ndnl  gcc 2.95.3 and earlier on arm has a bug in the libgcc __umodsi routine\ndnl  making \"%\" give wrong results for some operands, eg. \"0x90000000 % 3\".\ndnl  We're hoping it'll be fixed in 2.95.4, and we know it'll be fixed in\ndnl  gcc 3.\ndnl\ndnl  There's only a couple of places gmp cares about this, one is the\ndnl  size==1 case in mpn/generic/mode1o.c, and this shows up in\ndnl  tests/mpz/t-jac.c as a wrong result from mpz_kronecker_ui.\n\nAC_DEFUN([GMP_GCC_ARM_UMODSI],\n[AC_MSG_CHECKING([whether ARM gcc unsigned division works])\ntmp_version=`$1 --version`\necho \"$tmp_version\" >&AC_FD_CC\ncase $tmp_version in\n  [2.95 | 2.95.[123]])\n    ifelse([$3],,:,[$3])\n    gmp_gcc_arm_umodsi_result=[\"no, gcc 2.95.[0123]\"] ;;\n  *)\n    ifelse([$2],,:,[$2])\n    gmp_gcc_arm_umodsi_result=yes ;;\nesac\nAC_MSG_RESULT([$gmp_gcc_arm_umodsi_result])\n])\n\n\ndnl  GMP_GCC_MIPS_O32(gcc,[actions-yes][,[actions-no]])\ndnl  -------------------------------------------------\ndnl  Test whether gcc supports o32.\ndnl\ndnl  gcc 2.7.2.2 only does o32, and doesn't accept -mabi=32.\ndnl\ndnl  gcc 2.95 accepts -mabi=32 but it only works on irix5, on irix6 it gives\ndnl  \"cc1: The -mabi=32 support does not work yet\".\n\nAC_DEFUN([GMP_GCC_MIPS_O32],\n[AC_MSG_CHECKING([whether gcc supports o32])\necho 'int x;' >conftest.c\necho \"$1 -mabi=32 -c conftest.c\" >&AC_FD_CC\nif $1 -mabi=32 -c conftest.c >conftest.out 2>&1; then\n  result=yes\nelse\n  cat conftest.out >&AC_FD_CC\n  if grep \"cc1: Invalid option \\`abi=32'\" conftest.out >/dev/null; then\n    result=yes\n  else\n    result=no\n  fi\nfi\nrm -f conftest.*\nAC_MSG_RESULT($result)\nif test $result = yes; then\n  ifelse([$2],,:,[$2])\nelse\n  ifelse([$3],,:,[$3])\nfi\n])\n\n\ndnl  GMP_GCC_NO_CPP_PRECOMP(CCBASE,CC,CFLAGS,[ACTIONS-YES][,ACTIONS-NO])\ndnl  -------------------------------------------------------------------\ndnl  Check whether -no-cpp-precomp should be used on this compiler, and\ndnl  execute the corresponding ACTIONS-YES or ACTIONS-NO.\ndnl\ndnl  -no-cpp-precomp is only meant for Apple's hacked version of gcc found\ndnl  on powerpc*-*-darwin*, but we can give it a try on any gcc.  Normal gcc\ndnl  (as of 3.0 at least) only gives a warning, not an actual error, and we\ndnl  watch for that and decide against the option in that case, to avoid\ndnl  confusing the user.\n\nAC_DEFUN([GMP_GCC_NO_CPP_PRECOMP],\n[if test \"$ccbase\" = gcc; then\n  AC_MSG_CHECKING([compiler $2 $3 -no-cpp-precomp])\n  result=no\n  cat >conftest.c <<EOF\nint main () { return 0; }\nEOF\n  gmp_compile=\"$2 $3 -no-cpp-precomp conftest.c >conftest.out 2>&1\"\n  if AC_TRY_EVAL(gmp_compile); then\n    if grep \"unrecognized option.*-no-cpp-precomp\" conftest.out >/dev/null; then : ;\n    else\n      result=yes\n    fi\n  fi\n  cat conftest.out >&AC_FD_CC\n  rm -f conftest* a.out b.out a.exe a_out.exe\n  AC_MSG_RESULT($result)\n  if test \"$result\" = yes; then\n      ifelse([$4],,:,[$4])\n  else\n      ifelse([$5],,:,[$5])\n  fi\nfi\n])\n\n\ndnl  GMP_GCC_PENTIUM4_SSE2(CC+CFLAGS,[ACTION-IF-YES][,ACTION-IF-NO])\ndnl  ---------------------------------------------------------------\ndnl  Determine whether gcc CC+CFLAGS is a good enough version for\ndnl  -march=pentium4 with sse2.\ndnl\ndnl  Gcc 3.2.1 was seen generating incorrect code for raw double -> int\ndnl  conversions through a union.  We believe the problem is in all 3.1 and\ndnl  3.2 versions, but that it's fixed in 3.3.\n\nAC_DEFUN([GMP_GCC_PENTIUM4_SSE2],\n[AC_MSG_CHECKING([whether gcc is good for sse2])\ncase `$1 -dumpversion` in\n  [3.[012] | 3.[012].*]) result=no ;;\n  *)                     result=yes ;;\nesac\nAC_MSG_RESULT($result)\nif test \"$result\" = yes; then\n  ifelse([$2],,:,[$2])\nelse\n  ifelse([$3],,:,[$3])\nfi\n])\n\n\ndnl  GMP_GCC_WA_MCPU(CC+CFLAGS, NEWFLAG [,ACTION-YES [,ACTION-NO]])\ndnl  --------------------------------------------------------------\ndnl  Check whether gcc (or gas rather) accepts a flag like \"-Wa,-mev67\".\ndnl\ndnl  Gas doesn't give an error for an unknown cpu, it only prints a warning\ndnl  like \"Warning: Unknown CPU identifier `ev78'\".\ndnl\ndnl  This is intended for use on alpha, since only recent versions of gas\ndnl  accept -mev67, but there's nothing here that's alpha specific.\n\nAC_DEFUN([GMP_GCC_WA_MCPU],\n[AC_MSG_CHECKING([assembler $1 $2])\nresult=no\ncat >conftest.c <<EOF\nint main () {}\nEOF\ngmp_compile=\"$1 $2 -c conftest.c >conftest.out 2>&1\"\nif AC_TRY_EVAL(gmp_compile); then\n  if grep \"Unknown CPU identifier\" conftest.out >/dev/null; then : ;\n  else\n    result=yes\n  fi    \nfi\ncat conftest.out >&AC_FD_CC\nrm -f conftest*\nAC_MSG_RESULT($result)\nif test \"$result\" = yes; then\n  ifelse([$3],,:,[$3])\nelse\n  ifelse([$4],,:,[$4])\nfi\n])\n\ndnl  GMP_OS_X86_XMM(CC+CFLAGS,[ACTION-IF-YES][,ACTION-IF-NO])\ndnl  --------------------------------------------------------\ndnl  Determine whether the operating system supports XMM registers.\ndnl\ndnl  If build==host then a test program is run, executing an SSE2\ndnl  instruction using an XMM register.  This will give a SIGILL if the\ndnl  system hasn't set the OSFXSR bit in CR4 to say it knows it must use\ndnl  fxsave/fxrestor in a context switch (to save xmm registers).\ndnl\ndnl  If build!=host, we can fallback on:\ndnl\ndnl      - FreeBSD version 4 is the first supporting xmm.\ndnl\ndnl      - Linux kernel 2.4 might be the first stable series supporting xmm\ndnl        (not sure).  But there's no version number in the GNU/Linux\ndnl        config tuple to test anyway.\ndnl\ndnl  The default is to allow xmm.  This might seem rash, but it's likely\ndnl  most systems know xmm by now, so this will normally be what's wanted.\ndnl  And cross compiling is a bit hairy anyway, so hopefully anyone doing it\ndnl  will be smart enough to know what to do.\ndnl\ndnl  In the test program, .text and .globl are hard coded because this macro\ndnl  is wanted before GMP_ASM_TEXT and GMP_ASM_GLOBL are run.  A .byte\ndnl  sequence is used (for xorps %xmm0, %xmm0) to make us independent of\ndnl  tests for whether the assembler supports sse2/xmm.  Obviously we need\ndnl  both assembler and OS support, but this means we don't force the order\ndnl  in which we test.\ndnl\ndnl  FIXME: Maybe we should use $CCAS to assemble, if it's set.  (Would\ndnl  still want $CC/$CFLAGS for the link.)  But this test is used before\ndnl  AC_PROG_CC sets $OBJEXT, so we'd need to check for various object file\ndnl  suffixes ourselves.\n\nAC_DEFUN([GMP_OS_X86_XMM],\n[AC_CACHE_CHECK([whether the operating system supports XMM registers],\n\t\tgmp_cv_os_x86_xmm,\n[if test \"$build\" = \"$host\"; then\n  # remove anything that might look like compiler output to our \"||\" expression\n  rm -f conftest* a.out b.out a.exe a_out.exe\n  cat >conftest.s <<EOF\n\t.text\nmain:\n_main:\n\t.globl\tmain\n\t.globl\t_main\n\t.byte\t0x0f, 0x57, 0xc0\n\txorl\t%eax, %eax\n\tret\nEOF\n  gmp_compile=\"$1 conftest.s -o conftest >&AC_FD_CC\"\n  if AC_TRY_EVAL(gmp_compile); then\n    if AC_TRY_COMMAND([./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest]); then\n      gmp_cv_os_x86_xmm=yes\n    else\n      gmp_cv_os_x86_xmm=no\n    fi\n  else\n    AC_MSG_WARN([Oops, cannot compile test program])\n  fi\n  rm -f conftest*\nfi\n\nif test -z \"$gmp_cv_os_x86_xmm\"; then\n  case $host_os in\n    [freebsd[123] | freebsd[123].*])\n      gmp_cv_os_x86_xmm=no ;;\n    freebsd*)\n      gmp_cv_os_x86_xmm=yes ;;\n    *)\n      gmp_cv_os_x86_xmm=probably ;;\n  esac\nfi\n])\n\nif test \"$gmp_cv_os_x86_xmm\" = probably; then\n  AC_MSG_WARN([Not certain of OS support for xmm when cross compiling.])\n  AC_MSG_WARN([Will assume it's ok, expect a SIGILL if this is wrong.])\nfi\n\ncase $gmp_cv_os_x86_xmm in\nno)\n  $3\n  ;;\n*)\n  $2\n  ;;\nesac\n])\n\ndnl  GMP_HPPA_LEVEL_20(cc/cflags [, ACTION-GOOD [,ACTION-BAD]])\ndnl  ----------------------------------------------------------\ndnl  Check that the given cc/cflags accepts HPPA 2.0n assembler code.\ndnl\ndnl  Old versions of gas don't know 2.0 instructions.  It rejects \".level\ndnl  2.0\" for a start, so just test that.\ndnl\ndnl  This test is designed to be run for various different compiler and\ndnl  flags combinations, and hence doesn't cache its result.\n\nAC_DEFUN([GMP_HPPA_LEVEL_20],\n[AC_MSG_CHECKING([$1 assembler knows hppa 2.0])\nresult=no\ncat >conftest.s <<EOF\n\t.level 2.0\nEOF\ngmp_compile=\"$1 -c conftest.s >&AC_FD_CC 2>&1\"\nif AC_TRY_EVAL(gmp_compile); then\n  result=yes\nelse\n  echo \"failed program was\" >&AC_FD_CC\n  cat conftest.s >&AC_FD_CC\nfi\nrm -f conftest*\nAC_MSG_RESULT($result)\nif test \"$result\" = yes; then\n  ifelse([$2],,:,[$2])\nelse\n  ifelse([$3],,:,[$3])\nfi\n])\n\n\ndnl  GMP_PROG_CXX_WORKS(cxx/cxxflags [, ACTION-YES [,ACTION-NO]])\ndnl  ------------------------------------------------------------\ndnl  Check whether cxx/cxxflags can compile and link.\ndnl\ndnl  This test is designed to be run repeatedly with different cxx/cxxflags\ndnl  selections, so the result is not cached.\ndnl\ndnl  For a native build, we insist on being able to run the program, so as\ndnl  to detect any problems with the standard C++ library.  During\ndnl  development various systems with broken or incomplete C++ installations\ndnl  were seen.\ndnl\ndnl  The various features and problems we try to detect are done in separate\ndnl  compiles.  Although this is probably a bit slower than one test\ndnl  program, it makes it easy to indicate the problem in AC_MSG_RESULT,\ndnl  hence giving the user a clue about why we rejected the compiler.\n\nAC_DEFUN([GMP_PROG_CXX_WORKS],\n[AC_MSG_CHECKING([C++ compiler $1])\ngmp_prog_cxx_works=yes\n\n# start with a plain \"main()\", then go on to further checks\nGMP_PROG_CXX_WORKS_PART([$1], [])\n\nGMP_PROG_CXX_WORKS_PART([$1], [namespace],\n[namespace foo { }\nusing namespace foo;\n])\n\n# GMP requires the standard C++ iostream classes\nGMP_PROG_CXX_WORKS_PART([$1], [std iostream],\n[/* This test rejects g++ 2.7.2 which doesn't have <iostream>, only a\n    pre-standard iostream.h. */\n#include <iostream>\n\n/* This test rejects OSF 5.1 Compaq C++ in its default pre-standard iostream\n   mode, since that mode puts cout in the global namespace, not \"std\".  */\nvoid someoutput (void) { std::cout << 123; }\n])\n\nAC_MSG_RESULT($gmp_prog_cxx_works)\ncase $gmp_prog_cxx_works in\n  yes)\n    [$2]\n    ;;\n  *)\n    [$3]\n    ;;\nesac\n])\n\ndnl  Called: GMP_PROG_CXX_WORKS_PART(CXX+CXXFLAGS, FAIL-MESSAGE [,CODE])\ndnl\nAC_DEFUN([GMP_PROG_CXX_WORKS_PART],\n[if test \"$gmp_prog_cxx_works\" = yes; then\n  # remove anything that might look like compiler output to our \"||\" expression\n  rm -f conftest* a.out b.out a.exe a_out.exe\n  cat >conftest.cc <<EOF\n[$3]\nint main (void) { return 0; }\nEOF\n  echo \"Test compile: [$2]\" >&AC_FD_CC\n  gmp_cxxcompile=\"$1 conftest.cc >&AC_FD_CC\"\n  if AC_TRY_EVAL(gmp_cxxcompile); then\n    if test \"$cross_compiling\" = no; then\n      if AC_TRY_COMMAND([./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest]); then :;\n      else\n        gmp_prog_cxx_works=\"no[]m4_if([$2],,,[, ])[$2], program does not run\"\n      fi\n    fi\n  else\n    gmp_prog_cxx_works=\"no[]m4_if([$2],,,[, ])[$2]\"\n  fi\n  case $gmp_prog_cxx_works in\n    no*)\n      echo \"failed program was:\" >&AC_FD_CC\n      cat conftest.cc >&AC_FD_CC\n      ;;\n  esac\n  rm -f conftest* a.out b.out a.exe a_out.exe\nfi\n])\n\n\ndnl  GMP_INIT([M4-DEF-FILE])\ndnl  -----------------------\ndnl  Initializations for GMP config.m4 generation.\ndnl\ndnl  FIXME: The generated config.m4 doesn't get recreated by config.status.\ndnl  Maybe the relevant \"echo\"s should go through AC_CONFIG_COMMANDS.\n\nAC_DEFUN([GMP_INIT],\n[ifelse([$1], , gmp_configm4=config.m4, gmp_configm4=\"[$1]\")\ngmp_tmpconfigm4=cnfm4.tmp\ngmp_tmpconfigm4i=cnfm4i.tmp\ngmp_tmpconfigm4p=cnfm4p.tmp\nrm -f $gmp_tmpconfigm4 $gmp_tmpconfigm4i $gmp_tmpconfigm4p\n\n# CONFIG_TOP_SRCDIR is a path from the mpn builddir to the top srcdir.\n# The pattern here tests for an absolute path the same way as\n# _AC_OUTPUT_FILES in autoconf acgeneral.m4.\ncase $srcdir in\n[[\\\\/]]* | ?:[[\\\\/]]* )  tmp=\"$srcdir\"    ;;\n*)                       tmp=\"../$srcdir\" ;;\nesac\necho [\"define(<CONFIG_TOP_SRCDIR>,<\\`$tmp'>)\"] >>$gmp_tmpconfigm4\n\n# All CPUs use asm-defs.m4 \necho [\"include][(CONFIG_TOP_SRCDIR\\`/mpn/asm-defs.m4')\"] >>$gmp_tmpconfigm4i\n])\n\n\ndnl  GMP_FINISH\ndnl  ----------\ndnl  Create config.m4 from its accumulated parts.\ndnl\ndnl  __CONFIG_M4_INCLUDED__ is used so that a second or subsequent include\ndnl  of config.m4 is harmless.\ndnl\ndnl  A separate ifdef on the angle bracket quoted part ensures the quoting\ndnl  style there is respected.  The basic defines from gmp_tmpconfigm4 are\ndnl  fully quoted but are still put under an ifdef in case any have been\ndnl  redefined by one of the m4 include files.\ndnl\ndnl  Doing a big ifdef within asm-defs.m4 and/or other macro files wouldn't\ndnl  work, since it'd interpret parentheses and quotes in dnl comments, and\ndnl  having a whole file as a macro argument would overflow the string space\ndnl  on BSD m4.\n\nAC_DEFUN([GMP_FINISH],\n[AC_REQUIRE([GMP_INIT])\necho \"creating $gmp_configm4\"\necho [\"d\"\"nl $gmp_configm4.  Generated automatically by configure.\"] > $gmp_configm4\nif test -f $gmp_tmpconfigm4; then\n  echo [\"changequote(<,>)\"] >> $gmp_configm4\n  echo [\"ifdef(<__CONFIG_M4_INCLUDED__>,,<\"] >> $gmp_configm4\n  cat $gmp_tmpconfigm4 >> $gmp_configm4\n  echo [\">)\"] >> $gmp_configm4\n  echo [\"changequote(\\`,')\"] >> $gmp_configm4\n  rm $gmp_tmpconfigm4\nfi\necho [\"ifdef(\\`__CONFIG_M4_INCLUDED__',,\\`\"] >> $gmp_configm4\nif test -f $gmp_tmpconfigm4i; then\n  cat $gmp_tmpconfigm4i >> $gmp_configm4\n  rm $gmp_tmpconfigm4i\nfi\nif test -f $gmp_tmpconfigm4p; then\n  cat $gmp_tmpconfigm4p >> $gmp_configm4\n  rm $gmp_tmpconfigm4p\nfi\necho [\"')\"] >> $gmp_configm4\necho [\"define(\\`__CONFIG_M4_INCLUDED__')\"] >> $gmp_configm4\n])\n\n\ndnl  GMP_INCLUDE_MPN(FILE)\ndnl  ---------------------\ndnl  Add an include_mpn(`FILE') to config.m4.  FILE should be a path\ndnl  relative to the mpn source directory, for example\ndnl\ndnl      GMP_INCLUDE_MPN(`x86/x86-defs.m4')\ndnl\n\nAC_DEFUN([GMP_INCLUDE_MPN],\n[AC_REQUIRE([GMP_INIT])\necho [\"include_mpn(\\`$1')\"] >> $gmp_tmpconfigm4i\n])\n\n\ndnl  GMP_DEFINE(MACRO, DEFINITION [, LOCATION])\ndnl  ------------------------------------------\ndnl  Define M4 macro MACRO as DEFINITION in temporary file.\ndnl\ndnl  If LOCATION is `POST', the definition will appear after any include()\ndnl  directives inserted by GMP_INCLUDE.  Mind the quoting!  No shell\ndnl  variables will get expanded.  Don't forget to invoke GMP_FINISH to\ndnl  create file config.m4.  config.m4 uses `<' and '>' as quote characters\ndnl  for all defines.\n\nAC_DEFUN([GMP_DEFINE], \n[AC_REQUIRE([GMP_INIT])\necho ['define(<$1>, <$2>)'] >>ifelse([$3], [POST],\n                              $gmp_tmpconfigm4p, $gmp_tmpconfigm4)\n])\n\n\ndnl  GMP_DEFINE_RAW(STRING [, LOCATION])\ndnl  ------------------------------------\ndnl  Put STRING into config.m4 file.\ndnl\ndnl  If LOCATION is `POST', the definition will appear after any include()\ndnl  directives inserted by GMP_INCLUDE.  Don't forget to invoke GMP_FINISH\ndnl  to create file config.m4.\n\nAC_DEFUN([GMP_DEFINE_RAW],\n[AC_REQUIRE([GMP_INIT])\necho [$1] >> ifelse([$2], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4)\n])\n\n\ndnl  GMP_TRY_ASSEMBLE(asm-code,[action-success][,action-fail])\ndnl  ----------------------------------------------------------\ndnl  Attempt to assemble the given code.\ndnl  Do \"action-success\" if this succeeds, \"action-fail\" if not.\ndnl\ndnl  conftest.o and conftest.out are available for inspection in\ndnl  \"action-success\".  If either action does a \"break\" out of a loop then\ndnl  an explicit \"rm -f conftest*\" will be necessary.\ndnl\ndnl  This is not unlike AC_TRY_COMPILE, but there's no default includes or\ndnl  anything in \"asm-code\", everything wanted must be given explicitly.\n\nAC_DEFUN([GMP_TRY_ASSEMBLE],\n[cat >conftest.s <<EOF\n[$1]\nEOF\ngmp_assemble=\"$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1\"\nif AC_TRY_EVAL(gmp_assemble); then\n  cat conftest.out >&AC_FD_CC\n  ifelse([$2],,:,[$2])\nelse\n  cat conftest.out >&AC_FD_CC\n  echo \"configure: failed program was:\" >&AC_FD_CC\n  cat conftest.s >&AC_FD_CC\n  ifelse([$3],,:,[$3])\nfi\nrm -f conftest*\n])\n\n\ndnl  GMP_ASM_LABEL_SUFFIX\ndnl  --------------------\ndnl  : - is usual.\ndnl  empty - hppa on HP-UX doesn't use a :, just the label name\ndnl\ndnl  Note that it's necessary to test the empty case first, since HP \"as\"\ndnl  will accept \"somelabel:\", and take it to mean a label with a name that\ndnl  happens to end in a colon.\n\nAC_DEFUN([GMP_ASM_LABEL_SUFFIX],\n[AC_REQUIRE([GMP_ASM_TEXT])\nAC_CACHE_CHECK([for assembler label suffix],\n                gmp_cv_asm_label_suffix,\n[gmp_cv_asm_label_suffix=unknown\nfor i in \"\" \":\"; do\n  echo \"trying $i\" >&AC_FD_CC\n  GMP_TRY_ASSEMBLE(\n[\t$gmp_cv_asm_text\nsomelabel$i],\n    [gmp_cv_asm_label_suffix=$i\n     rm -f conftest*\n     break],\n    [cat conftest.out >&AC_FD_CC])\ndone\nif test \"$gmp_cv_asm_label_suffix\" = \"unknown\"; then\n  AC_MSG_ERROR([Cannot determine label suffix])\nfi\n])\necho [\"define(<LABEL_SUFFIX>, <$gmp_cv_asm_label_suffix>)\"] >> $gmp_tmpconfigm4\n])\n\n\ndnl  GMP_ASM_UNDERSCORE\ndnl  ------------------\ndnl  Determine whether global symbols need to be prefixed with an underscore.\ndnl  The output from \"nm\" is grepped to see what a typical symbol looks like.\ndnl\ndnl  This test used to grep the .o file directly, but that failed with greps\ndnl  that don't like binary files (eg. SunOS 4).\ndnl\ndnl  This test also used to construct an assembler file with and without an\ndnl  underscore and try to link that to a C file, to see which worked.\ndnl  Although that's what will happen in the real build we don't really want\ndnl  to depend on creating asm files within configure for every possible CPU\ndnl  (or at least we don't want to do that more than we have to).\ndnl\ndnl  The fallback on no underscore is based on the assumption that the world\ndnl  is moving towards non-underscore systems.  There should actually be no\ndnl  good reason for nm to fail though.\n\nAC_DEFUN([GMP_ASM_UNDERSCORE],\n[AC_REQUIRE([GMP_PROG_NM])\nAC_CACHE_CHECK([if globals are prefixed by underscore], \n               gmp_cv_asm_underscore,\n[gmp_cv_asm_underscore=\"unknown\"\ncat >conftest.c <<EOF\nint gurkmacka;\nEOF\ngmp_compile=\"$CC $CFLAGS $CPPFLAGS -c conftest.c >&AC_FD_CC\"\nif AC_TRY_EVAL(gmp_compile); then\n  $NM conftest.$OBJEXT >conftest.out\n  if grep _gurkmacka conftest.out >/dev/null; then\n    gmp_cv_asm_underscore=yes\n  elif grep gurkmacka conftest.out >/dev/null; then\n    gmp_cv_asm_underscore=no\n  else\n    echo \"configure: $NM doesn't have gurkmacka:\" >&AC_FD_CC\n    cat conftest.out >&AC_FD_CC\n  fi\nelse\n  echo \"configure: failed program was:\" >&AC_FD_CC\n  cat conftest.c >&AC_FD_CC\nfi\nrm -f conftest*\n])\ncase $gmp_cv_asm_underscore in\n  yes)\n    GMP_DEFINE(GSYM_PREFIX, [_]) \n    GSYM_FLAG=\"-D GSYM_PREFIX\" ;;\n  no)\n    GMP_DEFINE(GSYM_PREFIX, [])\n    GSYM_FLAG=\"\" ;;\n  *)\n    AC_MSG_WARN([+----------------------------------------------------------])\n    AC_MSG_WARN([| Cannot determine global symbol prefix.])\n    AC_MSG_WARN([| $NM output doesn't contain a global data symbol.])\n    AC_MSG_WARN([| Will proceed with no underscore.])\n    AC_MSG_WARN([| If this is wrong then you'll get link errors referring])\n    AC_MSG_WARN([| to ___gmpn_add_n (note three underscores).])\n    AC_MSG_WARN([| In this case do a fresh build with an override,])\n    AC_MSG_WARN([|     ./configure gmp_cv_asm_underscore=yes])\n    AC_MSG_WARN([+----------------------------------------------------------])\n    GMP_DEFINE(GSYM_PREFIX, [])\n    GSYM_FLAG=\"\" ;;\nesac\n   AC_SUBST(GSYM_FLAG)\n])\n\n\ndnl  GMP_ASM_ALIGN_LOG\ndnl  -----------------\ndnl  Is parameter to `.align' logarithmic?\n\nAC_DEFUN([GMP_ASM_ALIGN_LOG],\n[AC_REQUIRE([GMP_ASM_GLOBL])\nAC_REQUIRE([GMP_ASM_BYTE])\nAC_REQUIRE([GMP_ASM_DATA])\nAC_REQUIRE([GMP_ASM_LABEL_SUFFIX])\nAC_REQUIRE([GMP_PROG_NM])\nAC_CACHE_CHECK([if .align assembly directive is logarithmic],\n               gmp_cv_asm_align_log,\n[GMP_TRY_ASSEMBLE(\n[      \t$gmp_cv_asm_data\n      \t.align  4\n\t$gmp_cv_asm_globl\tfoo\n\t$gmp_cv_asm_byte\t1\n\t.align\t4\nfoo$gmp_cv_asm_label_suffix\n\t$gmp_cv_asm_byte\t2],\n  [gmp_tmp_val=[`$NM conftest.$OBJEXT | grep foo | \\\n     sed -e 's;[[][0-9][]]\\(.*\\);\\1;' -e 's;[^1-9]*\\([0-9]*\\).*;\\1;'`]\n  if test \"$gmp_tmp_val\" = \"10\" || test \"$gmp_tmp_val\" = \"16\"; then\n    gmp_cv_asm_align_log=yes\n  else\n    gmp_cv_asm_align_log=no\n  fi],\n  [AC_MSG_ERROR([cannot assemble alignment test])])])\n\nGMP_DEFINE_RAW([\"define(<ALIGN_LOGARITHMIC>,<$gmp_cv_asm_align_log>)\"])\n])\n\n\ndnl  GMP_ASM_ALIGN_FILL_0x90\ndnl  -----------------------\ndnl  Determine whether a \",0x90\" suffix works on a .align directive.\ndnl  This is only meant for use on x86, 0x90 being a \"nop\".\ndnl\ndnl  Old gas, eg. 1.92.3\ndnl       Needs \",0x90\" or else the fill is 0x00, which can't be executed\ndnl       across.\ndnl\ndnl  New gas, eg. 2.91\ndnl       Generates multi-byte nop fills even when \",0x90\" is given.\ndnl\ndnl  Solaris 2.6 as\ndnl       \",0x90\" is not allowed, causes a fatal error.\ndnl\ndnl  Solaris 2.8 as\ndnl       \",0x90\" does nothing, generates a warning that it's being ignored.\ndnl\ndnl  SCO OpenServer 5 as\ndnl       Second parameter is max bytes to fill, not a fill pattern.\ndnl       \",0x90\" is an error due to being bigger than the first parameter.\ndnl       Multi-byte nop fills are generated in text segments.\ndnl\ndnl  Note that both solaris \"as\"s only care about \",0x90\" if they actually\ndnl  have to use it to fill something, hence the .byte in the test.  It's\ndnl  the second .align which provokes the error or warning.\ndnl\ndnl  The warning from solaris 2.8 is supressed to stop anyone worrying that\ndnl  something might be wrong.\n\nAC_DEFUN([GMP_ASM_ALIGN_FILL_0x90],\n[AC_REQUIRE([GMP_ASM_TEXT])\nAC_CACHE_CHECK([if the .align directive accepts an 0x90 fill in .text],\n               gmp_cv_asm_align_fill_0x90,\n[GMP_TRY_ASSEMBLE(\n[      \t$gmp_cv_asm_text\n      \t.align  4, 0x90\n\t.byte   0\n      \t.align  4, 0x90],\n[if grep \"Warning: Fill parameter ignored for executable section\" conftest.out >/dev/null; then\n  echo \"Supressing this warning by omitting 0x90\" 1>&AC_FD_CC\n  gmp_cv_asm_align_fill_0x90=no\nelse\n  gmp_cv_asm_align_fill_0x90=yes\nfi],\n[gmp_cv_asm_align_fill_0x90=no])])\n\nGMP_DEFINE_RAW([\"define(<ALIGN_FILL_0x90>,<$gmp_cv_asm_align_fill_0x90>)\"])\n])\n\n\ndnl  GMP_ASM_BYTE\ndnl  ------------\ndnl  .byte - is usual.\ndnl  data1 - required by ia64 (on hpux at least).\ndnl\ndnl  This macro is just to support other configure tests, not any actual asm\ndnl  code.\n\nAC_DEFUN([GMP_ASM_BYTE],\n[AC_REQUIRE([GMP_ASM_TEXT])\nAC_REQUIRE([GMP_ASM_LABEL_SUFFIX])\nAC_CACHE_CHECK([for assembler byte directive],\n                gmp_cv_asm_byte,\n[for i in .byte data1; do\n  echo \"trying $i\" >&AC_FD_CC\n  GMP_TRY_ASSEMBLE(\n[\t$gmp_cv_asm_data\n\t$i\t0\n],\n    [gmp_cv_asm_byte=$i\n     rm -f conftest*\n     break],\n    [cat conftest.out >&AC_FD_CC])\ndone\nif test -z \"$gmp_cv_asm_byte\"; then\n  AC_MSG_ERROR([Cannot determine how to emit a data byte])\nfi\n])\n])\n\n\ndnl  GMP_ASM_TEXT\ndnl  ------------\ndnl  .text - is usual.\ndnl  .code - is needed by the hppa on HP-UX (but ia64 HP-UX uses .text)\ndnl  .csect .text[PR] - is for AIX.\n\nAC_DEFUN([GMP_ASM_TEXT],\n[AC_CACHE_CHECK([how to switch to text section],\n                gmp_cv_asm_text,\n[for i in \".text\" \".code\" [\".csect .text[PR]\"]; do\n  echo \"trying $i\" >&AC_FD_CC\n  GMP_TRY_ASSEMBLE([\t$i],\n    [gmp_cv_asm_text=$i\n     rm -f conftest*\n     break])\ndone\nif test -z \"$gmp_cv_asm_text\"; then\n  AC_MSG_ERROR([Cannot determine text section directive])\nfi\n])\necho [\"define(<TEXT>, <$gmp_cv_asm_text>)\"] >> $gmp_tmpconfigm4\n])\n\n\ndnl  GMP_ASM_DATA\ndnl  ------------\ndnl  Can we say `.data'?\n\nAC_DEFUN([GMP_ASM_DATA],\n[AC_CACHE_CHECK([how to switch to data section],\n                gmp_cv_asm_data,\n[case $host in\n  *-*-aix*) gmp_cv_asm_data=[\".csect .data[RW]\"] ;;\n  *)        gmp_cv_asm_data=\".data\" ;;\nesac\n])\necho [\"define(<DATA>, <$gmp_cv_asm_data>)\"] >> $gmp_tmpconfigm4\n])\n\n\ndnl  GMP_ASM_RODATA\ndnl  --------------\ndnl  Find out how to switch to the read-only data section.\ndnl\ndnl  The compiler output is grepped for the right directive.  It's not\ndnl  considered wise to just probe for \".section .rodata\" or whatever works,\ndnl  since arbitrary section names might be accepted, but not necessarily do\ndnl  the right thing when they get to the linker.\ndnl\ndnl  Only a few asm files use RODATA, so this code is perhaps a bit\ndnl  excessive right now, but should find more uses in the future.\ndnl\ndnl  FIXME: gcc on aix generates something like \".csect _foo.ro_c[RO],3\"\ndnl  where foo is the object file.  Might need to check for that if we use\ndnl  RODATA there.\n\nAC_DEFUN([GMP_ASM_RODATA],\n[AC_REQUIRE([GMP_ASM_TEXT])\nAC_REQUIRE([GMP_ASM_DATA])\nAC_REQUIRE([GMP_ASM_LABEL_SUFFIX])\nAC_REQUIRE([GMP_ASM_UNDERSCORE])\nAC_CACHE_CHECK([how to switch to read-only data section],\n               gmp_cv_asm_rodata,\n[\ndnl Default to DATA on CPUs with split code/data caching, and TEXT\ndnl elsewhere.  i386 means generic x86, so use DATA on it.\ncase $host in\nX86_PATTERN | x86_64-*-*)\n  gmp_cv_asm_rodata=\"$gmp_cv_asm_data\" ;;\n*)\n  gmp_cv_asm_rodata=\"$gmp_cv_asm_text\" ;;\nesac\n\ncat >conftest.c <<EOF\nconst int foo = 123;\nEOF\necho \"Test program:\" >&AC_FD_CC\ncat conftest.c >&AC_FD_CC\ngmp_compile=\"$CC $CFLAGS $CPPFLAGS -S conftest.c >&AC_FD_CC\"\nif AC_TRY_EVAL(gmp_compile); then\n  echo \"Compiler output:\" >&AC_FD_CC\n  cat conftest.s >&AC_FD_CC\n  if test $gmp_cv_asm_underscore = yes; then\n    tmp_gsym_prefix=_\n  else\n    tmp_gsym_prefix=\n  fi\n  # must see our label\n  if grep \"^${tmp_gsym_prefix}foo$gmp_cv_asm_label_suffix\" conftest.s >/dev/null 2>&AC_FD_CC; then\n    # take the last directive before our label (hence skipping segments\n    # getting debugging info etc)\n    tmp_match=`sed -n [\"/^${tmp_gsym_prefix}foo$gmp_cv_asm_label_suffix/q\n                        /^[. \t]*data/p\n                        /^[. \t]*rdata/p\n                        /^[. \t]*text/p\n                        /^[. \t]*section/p\n                        /^[. \t]*csect/p\n                        /^[. \t]*CSECT/p\"] conftest.s | sed -n '$p'`\n    echo \"Match: $tmp_match\" >&AC_FD_CC\n    if test -n \"$tmp_match\"; then\n      gmp_cv_asm_rodata=$tmp_match\n    fi\n  else\n    echo \"Couldn't find label: ^${tmp_gsym_prefix}foo$gmp_cv_asm_label_suffix\" >&AC_FD_CC\n  fi\nfi\nrm -f conftest*\n])\necho [\"define(<RODATA>, <$gmp_cv_asm_rodata>)\"] >> $gmp_tmpconfigm4\n])\n\n\ndnl  GMP_ASM_GLOBL\ndnl  -------------\ndnl  The assembler directive to mark a label as a global symbol.\ndnl\ndnl  ia64 - .global is standard, according to the Intel documentation.\ndnl\ndnl  hppa - \".export foo,entry\" is demanded by HP hppa \"as\".  \".global\" is a\ndnl      kind of import.\ndnl\ndnl  other - .globl is usual.\ndnl\ndnl  \"gas\" tends to accept .globl everywhere, in addition to .export or\ndnl  .global or whatever the system assembler demands.  \n\nAC_DEFUN([GMP_ASM_GLOBL],\n[AC_REQUIRE([GMP_ASM_TEXT])\nAC_CACHE_CHECK([for assembler global directive],\n                gmp_cv_asm_globl,\n[case $host in\n  hppa*-*-*)     gmp_cv_asm_globl=.export ;;\n  IA64_PATTERN)  gmp_cv_asm_globl=.global ;;\n  *)             gmp_cv_asm_globl=.globl  ;;\nesac\n])\necho [\"define(<GLOBL>, <$gmp_cv_asm_globl>)\"] >> $gmp_tmpconfigm4\n])\n\n\ndnl  GMP_ASM_GLOBL_ATTR\ndnl  ------------------\ndnl  Do we need something after `GLOBL symbol'?\n\nAC_DEFUN([GMP_ASM_GLOBL_ATTR],\n[AC_REQUIRE([GMP_ASM_GLOBL])\nAC_CACHE_CHECK([for assembler global directive attribute],\n                gmp_cv_asm_globl_attr,\n[case $gmp_cv_asm_globl in\n  .export) gmp_cv_asm_globl_attr=\",entry\" ;;\n  *)       gmp_cv_asm_globl_attr=\"\" ;;\nesac\n])\necho [\"define(<GLOBL_ATTR>, <$gmp_cv_asm_globl_attr>)\"] >> $gmp_tmpconfigm4\n])\n\n\ndnl  GMP_ASM_TYPE\ndnl  ------------\ndnl  Can we say \".type\", and how?\ndnl\ndnl  For i386 GNU/Linux ELF systems, and very likely other ELF systems,\ndnl  .type and .size are important on functions in shared libraries.  If\ndnl  .type is omitted and the mainline program references that function then\ndnl  the code will be copied down to the mainline at load time like a piece\ndnl  of data.  If .size is wrong or missing (it defaults to 4 bytes or some\ndnl  such) then incorrect bytes will be copied and a segv is the most likely\ndnl  result.  In any case such copying is not what's wanted, a .type\ndnl  directive will ensure a PLT entry is used.\ndnl\ndnl  In GMP the assembler functions are normally only used from within the\ndnl  library (since most programs are not interested in the low level\ndnl  routines), and in those circumstances a missing .type isn't fatal,\ndnl  letting the problem go unnoticed.  tests/mpn/t-asmtype.c aims to check\ndnl  for it.\n\nAC_DEFUN([GMP_ASM_TYPE],\n[AC_CACHE_CHECK([for assembler .type directive],\n                gmp_cv_asm_type,\n[gmp_cv_asm_type=\nfor gmp_tmp_prefix in @ \\# %; do\n  GMP_TRY_ASSEMBLE([\t.type\tsym,${gmp_tmp_prefix}function],\n    [if grep \"\\.type pseudo-op used outside of \\.def/\\.endef ignored\" conftest.out >/dev/null; then : ;\n    else\n      gmp_cv_asm_type=\".type\t\\$][1,${gmp_tmp_prefix}\\$][2\"\n      break\n    fi])\ndone\nrm -f conftest*\n])\necho [\"define(<TYPE>, <$gmp_cv_asm_type>)\"] >> $gmp_tmpconfigm4\n])\n\n\ndnl  GMP_ASM_SIZE\ndnl  ------------\ndnl  Can we say `.size'?\n\nAC_DEFUN([GMP_ASM_SIZE],\n[AC_CACHE_CHECK([for assembler .size directive],\n                gmp_cv_asm_size,\n[gmp_cv_asm_size=\nGMP_TRY_ASSEMBLE([\t.size\tsym,1],\n  [if grep \"\\.size pseudo-op used outside of \\.def/\\.endef ignored\" conftest.out >/dev/null; then : ;\n  else\n    gmp_cv_asm_size=\".size\t\\$][1,\\$][2\"\n  fi])\n])\necho [\"define(<SIZE>, <$gmp_cv_asm_size>)\"] >> $gmp_tmpconfigm4\n])\n\n\ndnl  GMP_ASM_COFF_TYPE\ndnl  -----------------\ndnl  Determine whether the assembler supports COFF type information.\ndnl\ndnl  Currently this is only needed for mingw (and cygwin perhaps) and so is\ndnl  run only on the x86s, but it ought to work anywhere.\ndnl\ndnl  On MINGW, recent versions of the linker have an automatic import scheme\ndnl  for data in a DLL which is referenced by a mainline but without\ndnl  __declspec (__dllimport__) on the prototype.  It seems functions\ndnl  without type information are treated as data, or something, and calls\ndnl  to them from the mainline will crash.  gcc puts type information on the\ndnl  C functions it generates, we need to do the same for assembler\ndnl  functions.\ndnl\ndnl  This applies only to functions without __declspec(__dllimport__),\ndnl  ie. without __GMP_DECLSPEC in the case of libmpir, so it also works just\ndnl  to ensure all assembler functions used from outside libmpir have\ndnl  __GMP_DECLSPEC on their prototypes.  But this isn't an ideal situation,\ndnl  since we don't want perfectly valid calls going wrong just because\ndnl  there wasn't a prototype in scope.\ndnl\ndnl  When an auto-import takes place, the following warning is given by the\ndnl  linker.  This shouldn't be seen for any functions.\ndnl\ndnl      Info: resolving _foo by linking to __imp__foo (auto-import)\ndnl\ndnl\ndnl  COFF type directives look like the following\ndnl\ndnl      .def    _foo\ndnl      .scl    2\ndnl      .type   32\ndnl      .endef\ndnl\ndnl  _foo is the symbol with GSYM_PREFIX (_).  .scl is the storage class, 2\ndnl  for external, 3 for static.  .type is the object type, 32 for a\ndnl  function.\ndnl\ndnl  On an ELF system, this is (correctly) rejected due to .def, .endef and\ndnl  .scl being invalid, and .type not having enough arguments.\n\nAC_DEFUN([GMP_ASM_COFF_TYPE],\n[AC_REQUIRE([GMP_ASM_TEXT])\nAC_REQUIRE([GMP_ASM_GLOBL])\nAC_REQUIRE([GMP_ASM_GLOBL_ATTR])\nAC_REQUIRE([GMP_ASM_LABEL_SUFFIX])\nAC_REQUIRE([GMP_ASM_UNDERSCORE])\nAC_CACHE_CHECK([for assembler COFF type directives],\n\t\tgmp_cv_asm_x86_coff_type,\n[GMP_TRY_ASSEMBLE(\n[\t$gmp_cv_asm_text\n\t$gmp_cv_asm_globl ${tmp_gsym_prefix}foo$gmp_cv_asm_globl_attr\n\t.def\t${tmp_gsym_prefix}foo\n\t.scl\t2\n\t.type\t32\n\t.endef\n${tmp_gsym_prefix}foo$gmp_cv_asm_label_suffix\n],\n  [gmp_cv_asm_x86_coff_type=yes],\n  [gmp_cv_asm_x86_coff_type=no])\n])\necho [\"define(<HAVE_COFF_TYPE>, <$gmp_cv_asm_x86_coff_type>)\"] >> $gmp_tmpconfigm4\n])\n\n\ndnl  GMP_ASM_LSYM_PREFIX\ndnl  -------------------\ndnl  What is the prefix for a local label?\ndnl\ndnl  The prefixes tested are,\ndnl\ndnl      L  - usual for underscore systems\ndnl      .L - usual for non-underscore systems\ndnl      $  - alpha (gas and OSF system assembler)\ndnl      L$ - hppa (gas and HP-UX system assembler)\ndnl\ndnl  The default is \"L\" if the tests fail for any reason.  There's a good\ndnl  chance this will be adequate, since on most systems labels are local\ndnl  anyway unless given a \".globl\", and an \"L\" will avoid clashes with\ndnl  other identifers.\ndnl\ndnl  For gas, \".L\" is normally purely local to the assembler, it doesn't get\ndnl  put into the object file at all.  This style is preferred, to keep the\ndnl  object files nice and clean.\ndnl\ndnl  BSD format nm produces a line like\ndnl\ndnl      00000000 t Lgurkmacka\ndnl\ndnl  The symbol code is normally \"t\" for text, but any lower case letter\ndnl  indicates a local definition.\ndnl\ndnl  Code \"n\" is for a debugging symbol, OSF \"nm -B\" gives that as an upper\ndnl  case \"N\" for a local.\ndnl\ndnl  HP-UX nm prints an error message (though seems to give a 0 exit) if\ndnl  there's no symbols at all in an object file, hence the use of \"dummy\".\n\nAC_DEFUN([GMP_ASM_LSYM_PREFIX],\n[AC_REQUIRE([GMP_ASM_LABEL_SUFFIX])\nAC_REQUIRE([GMP_ASM_TEXT])\nAC_REQUIRE([GMP_PROG_NM])\nAC_CACHE_CHECK([for assembler local label prefix], \n               gmp_cv_asm_lsym_prefix,\n[gmp_tmp_pre_appears=yes\nfor gmp_tmp_pre in L .L $ L$; do\n  echo \"Trying $gmp_tmp_pre\" >&AC_FD_CC\n  GMP_TRY_ASSEMBLE(\n[\t$gmp_cv_asm_text\ndummy${gmp_cv_asm_label_suffix}\n${gmp_tmp_pre}gurkmacka${gmp_cv_asm_label_suffix}],\n  [if $NM conftest.$OBJEXT >conftest.nm 2>&AC_FD_CC; then : ; else\n    cat conftest.nm >&AC_FD_CC\n    AC_MSG_WARN([\"$NM\" failure])\n    break\n  fi\n  cat conftest.nm >&AC_FD_CC\n  if grep gurkmacka conftest.nm >/dev/null; then : ; else\n    # no mention of the symbol, this is good\n    echo \"$gmp_tmp_pre label doesn't appear in object file at all (good)\" >&AC_FD_CC\n    gmp_cv_asm_lsym_prefix=\"$gmp_tmp_pre\"\n    gmp_tmp_pre_appears=no\n    break\n  fi\n  if grep [' [a-zN] .*gurkmacka'] conftest.nm >/dev/null; then\n    # symbol mentioned as a local, use this if nothing better\n    echo \"$gmp_tmp_pre label is local but still in object file\" >&AC_FD_CC\n    if test -z \"$gmp_cv_asm_lsym_prefix\"; then\n      gmp_cv_asm_lsym_prefix=\"$gmp_tmp_pre\"\n    fi\n  else\n    echo \"$gmp_tmp_pre label is something unknown\" >&AC_FD_CC\n  fi\n  ])\ndone\nrm -f conftest*\nif test -z \"$gmp_cv_asm_lsym_prefix\"; then\n  gmp_cv_asm_lsym_prefix=L\n  AC_MSG_WARN([cannot determine local label, using default $gmp_cv_asm_lsym_prefix])\nfi\n# for development purposes, note whether we got a purely temporary local label\necho \"Local label appears in object files: $gmp_tmp_pre_appears\" >&AC_FD_CC\n])\necho [\"define(<LSYM_PREFIX>, <${gmp_cv_asm_lsym_prefix}>)\"] >> $gmp_tmpconfigm4\nAC_DEFINE_UNQUOTED(LSYM_PREFIX, \"$gmp_cv_asm_lsym_prefix\",\n                   [Assembler local label prefix])\n])\n\n\ndnl  GMP_ASM_W32\ndnl  -----------\ndnl  How to define a 32-bit word.\ndnl\ndnl  FIXME: This test is not right for ia64-*-hpux*.  The directive should\ndnl  be \"data4\", but the W32 macro is not currently used by the mpn/ia64 asm\ndnl  files.\n\nAC_DEFUN([GMP_ASM_W32],\n[AC_REQUIRE([GMP_ASM_DATA])\nAC_REQUIRE([GMP_ASM_BYTE])\nAC_REQUIRE([GMP_ASM_GLOBL])\nAC_REQUIRE([GMP_ASM_LABEL_SUFFIX])\nAC_REQUIRE([GMP_PROG_NM])\nAC_CACHE_CHECK([how to define a 32-bit word],\n\t       gmp_cv_asm_w32,\n[case $host in \n  *-*-hpux*)\n    # FIXME: HPUX puts first symbol at 0x40000000, breaking our assumption\n    # that it's at 0x0.  We'll have to declare another symbol before the\n    # .long/.word and look at the distance between the two symbols.  The\n    # only problem is that the sed expression(s) barfs (on Solaris, for\n    # example) for the symbol with value 0.  For now, HPUX uses .word.\n    gmp_cv_asm_w32=\".word\"\n    ;;\n  *-*-*)\n    gmp_tmp_val=\n    for gmp_tmp_op in .long .word data4; do\n      GMP_TRY_ASSEMBLE(\n[\t$gmp_cv_asm_data\n\t$gmp_cv_asm_globl\tfoo\n\t$gmp_tmp_op\t0\nfoo$gmp_cv_asm_label_suffix\n\t$gmp_cv_asm_byte\t0],\n        [gmp_tmp_val=[`$NM conftest.$OBJEXT | grep foo | \\\n          sed -e 's;[[][0-9][]]\\(.*\\);\\1;' -e 's;[^1-9]*\\([0-9]*\\).*;\\1;'`]\n        if test \"$gmp_tmp_val\" = 4; then\n          gmp_cv_asm_w32=\"$gmp_tmp_op\"\n          break\n        fi])\n    done\n    rm -f conftest*\n    ;;\nesac\nif test -z \"$gmp_cv_asm_w32\"; then\n  AC_MSG_ERROR([cannot determine how to define a 32-bit word])\nfi\n])\necho [\"define(<W32>, <$gmp_cv_asm_w32>)\"] >> $gmp_tmpconfigm4\n])\n\n\ndnl  GMP_X86_ASM_GOT_UNDERSCORE\ndnl  --------------------------\ndnl  Determine whether i386 _GLOBAL_OFFSET_TABLE_ needs an additional\ndnl  underscore prefix.\ndnl\ndnl    SVR4      - the standard is _GLOBAL_OFFSET_TABLE_\ndnl    GNU/Linux - follows SVR4\ndnl    OpenBSD   - an a.out underscore system, uses __GLOBAL_OFFSET_TABLE_\ndnl    NetBSD    - also an a.out underscore system, but _GLOBAL_OFFSET_TABLE_\ndnl\ndnl  The test attempts to link a program using _GLOBAL_OFFSET_TABLE_ or\ndnl  __GLOBAL_OFFSET_TABLE_ to see which works.\ndnl\ndnl  $lt_prog_compiler_pic is included in the compile because old versions\ndnl  of gas wouldn't accept PIC idioms without the right option (-K).  This\ndnl  is the same as what libtool and mpn/Makeasm.am will do.\ndnl\ndnl  $lt_prog_compiler_pic is also included in the link because OpenBSD ld\ndnl  won't accept an R_386_GOTPC relocation without the right options.  This\ndnl  is not what's done by the Makefiles when building executables, but\ndnl  let's hope it's ok (it works fine with gcc).\ndnl\ndnl  The fallback is no additional underscore, on the basis that this will\ndnl  suit SVR4/ELF style systems, which should be much more common than\ndnl  a.out systems with shared libraries.\ndnl\ndnl  Note that it's not an error for the tests to fail, since for instance\ndnl  cygwin, mingw and djgpp don't have a _GLOBAL_OFFSET_TABLE_ scheme at\ndnl  all.\ndnl\ndnl  Perhaps $CCAS could be asked to do the linking as well as the\ndnl  assembling, but in the Makefiles it's only used for assembling, so lets\ndnl  keep it that way.\ndnl\ndnl  The test here is run even under --disable-shared, so that PIC objects\ndnl  can be built and tested by the tune/many.pl development scheme.  The\ndnl  tests will be reasonably quick and won't give a fatal error, so this\ndnl  arrangement is ok.  AC_LIBTOOL_PROG_COMPILER_PIC does its\ndnl  $lt_prog_compiler_pic setups even for --disable-shared too.\n\nAC_DEFUN([GMP_ASM_X86_GOT_UNDERSCORE],\n[AC_REQUIRE([GMP_ASM_TEXT])\nAC_REQUIRE([GMP_ASM_GLOBL])\nAC_REQUIRE([GMP_ASM_GLOBL_ATTR])\nAC_REQUIRE([GMP_ASM_LABEL_SUFFIX])\nAC_REQUIRE([GMP_ASM_UNDERSCORE])\nAC_REQUIRE([AC_LIBTOOL_PROG_COMPILER_PIC])\nAC_CACHE_CHECK([if _GLOBAL_OFFSET_TABLE_ is prefixed by underscore], \n               gmp_cv_asm_x86_got_underscore,\n[gmp_cv_asm_x86_got_underscore=\"not applicable\"\nif test $gmp_cv_asm_underscore = yes; then\n  tmp_gsym_prefix=_\nelse\n  tmp_gsym_prefix=\nfi\nfor tmp_underscore in \"\" \"_\"; do\n  cat >conftest.s <<EOF\n\t$gmp_cv_asm_text\n\t$gmp_cv_asm_globl ${tmp_gsym_prefix}main$gmp_cv_asm_globl_attr\n${tmp_gsym_prefix}main$gmp_cv_asm_label_suffix\n\taddl\t$ ${tmp_underscore}_GLOBAL_OFFSET_TABLE_, %ebx\nEOF\n  gmp_compile=\"$CCAS $CFLAGS $CPPFLAGS $lt_prog_compiler_pic conftest.s >&AC_FD_CC && $CC $CFLAGS $CPPFLAGS $lt_prog_compiler_pic conftest.$OBJEXT >&AC_FD_CC\"\n  if AC_TRY_EVAL(gmp_compile); then\n    if test \"$tmp_underscore\" = \"_\"; then\n      gmp_cv_asm_x86_got_underscore=yes\n    else\n      gmp_cv_asm_x86_got_underscore=no\n    fi\n    break\n  fi\ndone\nrm -f conftest* a.out b.out a.exe a_out.exe\n])\nif test \"$gmp_cv_asm_x86_got_underscore\" = \"yes\"; then\n  GMP_DEFINE(GOT_GSYM_PREFIX, [_])\nelse\n  GMP_DEFINE(GOT_GSYM_PREFIX, [])\nfi    \n])\n\n\ndnl  GMP_ASM_X86_GOT_EAX_OK(CC+CFLAGS, [ACTION-YES] [, ACTION-NO])\ndnl  -------------------------------------------------------------\ndnl  Determine whether _GLOBAL_OFFSET_TABLE_ used with %eax is ok.\ndnl\ndnl  An instruction\ndnl\ndnl          addl  $_GLOBAL_OFFSET_TABLE_, %eax\ndnl\ndnl  is incorrectly assembled by gas 2.12 (or thereabouts) and earlier.  It\ndnl  puts an addend 2 into the R_386_GOTPC relocation, but it should be 1\ndnl  for this %eax form being a 1 byte opcode (with other registers it's 2\ndnl  opcode bytes).  See note about this in mpn/x86/README too.\ndnl\ndnl  We assemble this, surrounded by some unlikely byte sequences as\ndnl  delimiters, and check for the bad output.\ndnl\ndnl  This is for use by compiler probing in GMP_PROG_CC_WORKS, so the result\ndnl  is not cached.\ndnl\ndnl  This test is not specific to gas, but old gas is the only assembler we\ndnl  know of with this problem.  The Solaris has been seen coming out ok.\ndnl\ndnl  \".text\" is hard coded because this macro is wanted before GMP_ASM_TEXT.\ndnl  This should be fine, \".text\" is normal on x86 systems, and certainly\ndnl  will be fine with the offending gas.\ndnl\ndnl  If an error occurs when assembling, we consider the assembler ok, since\ndnl  the bad output does not occur.  This happens for instance on mingw,\ndnl  where _GLOBAL_OFFSET_TABLE_ results in a bfd error, since there's no\ndnl  GOT etc in PE object files.\ndnl\ndnl  This test is used before the object file extension has been determined,\ndnl  so we force output to conftest.o.  Using -o with -c is not portable,\ndnl  but we think all x86 compilers will accept -o with -c, certainly gcc\ndnl  does.\ndnl\ndnl  -fPIC is hard coded here, because this test is for use before libtool\ndnl  has established the pic options.  It's right for gcc, but perhaps not\ndnl  other compilers.\n\nAC_DEFUN([GMP_ASM_X86_GOT_EAX_OK],\n[echo \"Testing gas GOT with eax good\" >&AC_FD_CC\ncat >conftest.awk <<\\EOF\n[BEGIN {\n  want[0]  = \"001\"\n  want[1]  = \"043\"\n  want[2]  = \"105\"\n  want[3]  = \"147\"\n  want[4]  = \"211\"\n  want[5]  = \"253\"\n  want[6]  = \"315\"\n  want[7]  = \"357\"\n\n  want[8]  = \"005\"\n  want[9]  = \"002\"\n  want[10] = \"000\"\n  want[11] = \"000\"\n  want[12] = \"000\"\n\n  want[13] = \"376\"\n  want[14] = \"334\"\n  want[15] = \"272\"\n  want[16] = \"230\"\n  want[17] = \"166\"\n  want[18] = \"124\"\n  want[19] = \"062\"\n  want[20] = \"020\"\n\n  result = \"yes\"\n}\n{\n  for (f = 2; f <= NF; f++)\n    {\n      for (i = 0; i < 20; i++)\n        got[i] = got[i+1];\n      got[20] = $f;\n\n      found = 1\n      for (i = 0; i < 21; i++)\n        if (got[i] != want[i])\n          {\n            found = 0\n            break\n          }\n      if (found)\n        {\n          result = \"no\"\n          exit\n        }\n    }\n}\nEND {\n  print result\n}\n]EOF\ncat >conftest.s <<\\EOF\n[\t.text\n\t.byte\t1, 35, 69, 103, 137, 171, 205, 239\n\taddl\t$_GLOBAL_OFFSET_TABLE_, %eax\n\t.byte\t254, 220, 186, 152, 118, 84, 50, 16\n]EOF\ntmp_got_good=yes\ngmp_compile=\"$1 -fPIC -o conftest.o -c conftest.s >&AC_FD_CC 2>&1\"\nif AC_TRY_EVAL(gmp_compile); then\n  tmp_got_good=`od -b conftest.o | $AWK -f conftest.awk`\nfi\nrm -f conftest.*\necho \"Result: $tmp_got_good\" >&AC_FD_CC\nif test \"$tmp_got_good\" = no; then\n  ifelse([$3],,:,[$3])\nelse\n  ifelse([$2],,:,[$2])\nfi\n])\n\n\ndnl  GMP_ASM_X86_MMX([ACTION-IF-YES][,ACTION-IF-NO])\ndnl  -----------------------------------------------\ndnl  Determine whether the assembler supports MMX instructions.\ndnl\ndnl  This macro is wanted before GMP_ASM_TEXT, so \".text\" is hard coded\ndnl  here.  \".text\" is believed to be correct on all x86 systems.  Actually\ndnl  \".text\" probably isn't needed at all, at least for just checking\ndnl  instruction syntax.\ndnl\ndnl  \"movq %mm0, %mm1\" should assemble to \"0f 6f c8\", but Solaris 2.6 and\ndnl  2.7 wrongly assemble it to \"0f 6f c1\" (that being the reverse \"movq\ndnl  %mm1, %mm0\").  It seems more trouble than it's worth to work around\ndnl  this in the code, so just detect and reject.\n\nAC_DEFUN([GMP_ASM_X86_MMX],\n[AC_CACHE_CHECK([if the assembler knows about MMX instructions],\n\t\tgmp_cv_asm_x86_mmx,\n[GMP_TRY_ASSEMBLE(\n[\t.text\n\tmovq\t%mm0, %mm1],\n[gmp_cv_asm_x86_mmx=yes\ncase $host in\n*-*-solaris*)\n  if (dis conftest.$OBJEXT >conftest.out) 2>/dev/null; then\n    if grep \"0f 6f c1\" conftest.out >/dev/null; then\n      gmp_cv_asm_x86_mmx=movq-bug\n    fi\n  else\n    AC_MSG_WARN([\"dis\" not available to check for \"as\" movq bug])\n  fi\nesac],\n[gmp_cv_asm_x86_mmx=no])])\n\ncase $gmp_cv_asm_x86_mmx in\nmovq-bug)\n  AC_MSG_WARN([+----------------------------------------------------------])\n  AC_MSG_WARN([| WARNING WARNING WARNING])\n  AC_MSG_WARN([| Host CPU has MMX code, but the assembler])\n  AC_MSG_WARN([|     $CCAS $CFLAGS $CPPFLAGS])\n  AC_MSG_WARN([| has the Solaris 2.6 and 2.7 bug where register to register])\n  AC_MSG_WARN([| movq operands are reversed.])\n  AC_MSG_WARN([| Non-MMX replacements will be used.])\n  AC_MSG_WARN([| This will be an inferior build.])\n  AC_MSG_WARN([+----------------------------------------------------------])\n  ;;\nno)\n  AC_MSG_WARN([+----------------------------------------------------------])\n  AC_MSG_WARN([| WARNING WARNING WARNING])\n  AC_MSG_WARN([| Host CPU has MMX code, but it can't be assembled by])\n  AC_MSG_WARN([|     $CCAS $CFLAGS $CPPFLAGS])\n  AC_MSG_WARN([| Non-MMX replacements will be used.])\n  AC_MSG_WARN([| This will be an inferior build.])\n  AC_MSG_WARN([+----------------------------------------------------------])\n  ;;\nesac\nif test \"$gmp_cv_asm_x86_mmx\" = yes; then\n  ifelse([$1],,:,[$1])\nelse\n  ifelse([$2],,:,[$2])\nfi\n])\n\n\ndnl  GMP_ASM_X86_SHLDL_CL\ndnl  --------------------\n\nAC_DEFUN([GMP_ASM_X86_SHLDL_CL],\n[AC_REQUIRE([GMP_ASM_TEXT])\nAC_CACHE_CHECK([if the assembler takes cl with shldl],\n\t\tgmp_cv_asm_x86_shldl_cl,\n[GMP_TRY_ASSEMBLE(\n[\t$gmp_cv_asm_text\n\tshldl\t%cl, %eax, %ebx],\n  gmp_cv_asm_x86_shldl_cl=yes,\n  gmp_cv_asm_x86_shldl_cl=no)\n])\nif test \"$gmp_cv_asm_x86_shldl_cl\" = \"yes\"; then\n  GMP_DEFINE(WANT_SHLDL_CL,1)\nelse\n  GMP_DEFINE(WANT_SHLDL_CL,0)\nfi\n])\n\n\ndnl  GMP_ASM_X86_SSE2([ACTION-IF-YES][,ACTION-IF-NO])\ndnl  ------------------------------------------------\ndnl  Determine whether the assembler supports SSE2 instructions.\ndnl\ndnl  This macro is wanted before GMP_ASM_TEXT, so \".text\" is hard coded\ndnl  here.  \".text\" is believed to be correct on all x86 systems, certainly\ndnl  it's all GMP_ASM_TEXT gives currently.  Actually \".text\" probably isn't\ndnl  needed at all, at least for just checking instruction syntax.\n\nAC_DEFUN([GMP_ASM_X86_SSE2],\n[AC_CACHE_CHECK([if the assembler knows about SSE2 instructions],\n\t\tgmp_cv_asm_x86_sse2,\n[GMP_TRY_ASSEMBLE(\n[\t.text\n\tpaddq\t%mm0, %mm1],\n  [gmp_cv_asm_x86_sse2=yes],\n  [gmp_cv_asm_x86_sse2=no])\n])\ncase $gmp_cv_asm_x86_sse2 in\nyes)\n  ifelse([$1],,:,[$1])\n  ;;\n*)\n  AC_MSG_WARN([+----------------------------------------------------------])\n  AC_MSG_WARN([| WARNING WARNING WARNING])\n  AC_MSG_WARN([| Host CPU has SSE2 code, but it can't be assembled by])\n  AC_MSG_WARN([|     $CCAS $CFLAGS $CPPFLAGS])\n  AC_MSG_WARN([| Non-SSE2 replacements will be used.])\n  AC_MSG_WARN([| This will be an inferior build.])\n  AC_MSG_WARN([+----------------------------------------------------------])\n  ifelse([$2],,:,[$2])\n  ;;\nesac\n])\n\n\ndnl  GMP_ASM_X86_MCOUNT\ndnl  ------------------\ndnl  Find out how to call mcount for profiling on an x86 system.\ndnl\ndnl  A dummy function is compiled and the \".s\" output examined.  The pattern\ndnl  matching might be a bit fragile, but should work at least with gcc on\ndnl  sensible systems.  Certainly it's better than hard coding a table of\ndnl  conventions.\ndnl\ndnl  For non-PIC, any \".data\" is taken to mean a counter might be passed.\ndnl  It's assumed a movl will set it up, and the right register is taken\ndnl  from that movl.  Any movl involving %esp is ignored (a frame pointer\ndnl  setup normally).\ndnl\ndnl  For PIC, any \".data\" is similarly interpreted, but a GOTOFF identifies\ndnl  the line setting up the right register.\ndnl\ndnl  In both cases a line with \"mcount\" identifies the call and that line is\ndnl  used literally.\ndnl\ndnl  On some systems (eg. FreeBSD 3.5) gcc emits \".data\" but doesn't use it,\ndnl  so it's not an error to have .data but then not find a register.\ndnl\ndnl  Variations in mcount conventions on different x86 systems can be found\ndnl  in gcc config/i386.  mcount can have a \"_\" prefix or be .mcount or\ndnl  _mcount_ptr, and for PIC it can be called through a GOT entry, or via\ndnl  the PLT.  If a pointer to a counter is required it's passed in %eax or\ndnl  %edx.\ndnl\ndnl  Flags to specify PIC are taken from $lt_prog_compiler_pic set by\ndnl  AC_PROG_LIBTOOL.\ndnl\ndnl  Enhancement: Cache the values determined here. But what's the right way\ndnl  to get two variables (mcount_nonpic_reg and mcount_nonpic_call say) set\ndnl  from one block of commands?\n\nAC_DEFUN([GMP_ASM_X86_MCOUNT],\n[AC_REQUIRE([AC_ENABLE_SHARED])\nAC_REQUIRE([AC_PROG_LIBTOOL])\nAC_MSG_CHECKING([how to call x86 mcount])\ncat >conftest.c <<EOF\nfoo(){bar();}\nEOF\n\nif test \"$enable_static\" = yes; then\n  gmp_asmout_compile=\"$CC $CFLAGS $CPPFLAGS -S conftest.c 1>&AC_FD_CC\"\n  if AC_TRY_EVAL(gmp_asmout_compile); then\n    if grep '\\.data' conftest.s >/dev/null; then\n      mcount_nonpic_reg=`sed -n ['/esp/!s/.*movl.*,\\(%[a-z]*\\).*$/\\1/p'] conftest.s`\n    else\n      mcount_nonpic_reg=\n    fi\n    mcount_nonpic_call=`grep 'call.*mcount' conftest.s`\n    if test -z \"$mcount_nonpic_call\"; then\n      AC_MSG_ERROR([Cannot find mcount call for non-PIC])\n    fi\n  else\n    AC_MSG_ERROR([Cannot compile test program for non-PIC])\n  fi\nfi\n\nif test \"$enable_shared\" = yes; then\n  gmp_asmout_compile=\"$CC $CFLAGS $CPPFLAGS $lt_prog_compiler_pic -S conftest.c 1>&AC_FD_CC\"\n  if AC_TRY_EVAL(gmp_asmout_compile); then\n    if grep '\\.data' conftest.s >/dev/null; then\n      case $lt_prog_compiler_pic in\n        *-DDLL_EXPORT*)\n          # Windows DLLs have non-PIC style mcount\n          mcount_pic_reg=`sed -n ['/esp/!s/.*movl.*,\\(%[a-z]*\\).*$/\\1/p'] conftest.s`\n          ;;\n        *)\n          mcount_pic_reg=`sed -n ['s/.*GOTOFF.*,\\(%[a-z]*\\).*$/\\1/p'] conftest.s`\n          ;;\n      esac\n    else\n      mcount_pic_reg=\n    fi\n    mcount_pic_call=`grep 'call.*mcount' conftest.s`\n    if test -z \"$mcount_pic_call\"; then\n      AC_MSG_ERROR([Cannot find mcount call for PIC])\n    fi\n  else\n    AC_MSG_ERROR([Cannot compile test program for PIC])\n  fi\nfi\n\nGMP_DEFINE_RAW([\"define(<MCOUNT_NONPIC_REG>, <\\`$mcount_nonpic_reg'>)\"])\nGMP_DEFINE_RAW([\"define(<MCOUNT_NONPIC_CALL>,<\\`$mcount_nonpic_call'>)\"])\nGMP_DEFINE_RAW([\"define(<MCOUNT_PIC_REG>,    <\\`$mcount_pic_reg'>)\"])\nGMP_DEFINE_RAW([\"define(<MCOUNT_PIC_CALL>,   <\\`$mcount_pic_call'>)\"])\n\nrm -f conftest.*\nAC_MSG_RESULT([determined])\n])\n\n\ndnl  GMP_ASM_IA64_ALIGN_OK\ndnl  ---------------------\ndnl  Determine whether .align correctly pads with nop instructions in a text\ndnl  segment.\ndnl\ndnl  gas 2.14 and earlier byte swaps its padding bundle on big endian\ndnl  systems, which is incorrect (endianness only changes data).  What\ndnl  should be \"nop.m / nop.f / nop.i\" comes out as \"break\" instructions.\ndnl\ndnl  The test here detects the bad case, and assumes anything else is ok\ndnl  (there are many sensible nop bundles, so it'd be impractical to try to\ndnl  match everything good).\n\nAC_DEFUN([GMP_ASM_IA64_ALIGN_OK],\n[AC_CACHE_CHECK([whether assembler .align padding is good],\n\t\tgmp_cv_asm_ia64_align_ok,\n[cat >conftest.awk <<\\EOF\n[BEGIN {\n  want[0]  = \"011\"\n  want[1]  = \"160\"\n  want[2]  = \"074\"\n  want[3]  = \"040\"\n  want[4]  = \"000\"\n  want[5]  = \"040\"\n  want[6]  = \"020\"\n  want[7]  = \"221\"\n  want[8]  = \"114\"\n  want[9]  = \"000\"\n  want[10] = \"100\"\n  want[11] = \"200\"\n  want[12] = \"122\"\n  want[13] = \"261\"\n  want[14] = \"000\"\n  want[15] = \"200\"\n\n  want[16] = \"000\"\n  want[17] = \"004\"\n  want[18] = \"000\"\n  want[19] = \"000\"\n  want[20] = \"000\"\n  want[21] = \"000\"\n  want[22] = \"002\"\n  want[23] = \"000\"\n  want[24] = \"000\"\n  want[25] = \"000\"\n  want[26] = \"000\"\n  want[27] = \"001\"\n  want[28] = \"000\"\n  want[29] = \"000\"\n  want[30] = \"000\"\n  want[31] = \"014\"\n\n  want[32] = \"011\"\n  want[33] = \"270\"\n  want[34] = \"140\"\n  want[35] = \"062\"\n  want[36] = \"000\"\n  want[37] = \"040\"\n  want[38] = \"240\"\n  want[39] = \"331\"\n  want[40] = \"160\"\n  want[41] = \"000\"\n  want[42] = \"100\"\n  want[43] = \"240\"\n  want[44] = \"343\"\n  want[45] = \"371\"\n  want[46] = \"000\"\n  want[47] = \"200\"\n\n  result = \"yes\"\n}\n{\n  for (f = 2; f <= NF; f++)\n    {\n      for (i = 0; i < 47; i++)\n        got[i] = got[i+1];\n      got[47] = $f;\n\n      found = 1\n      for (i = 0; i < 48; i++)\n        if (got[i] != want[i])\n          {\n            found = 0\n            break\n          }\n      if (found)\n        {\n          result = \"no\"\n          exit\n        }\n    }\n}\nEND {\n  print result\n}\n]EOF\nGMP_TRY_ASSEMBLE(\n[\t.text\n\t.align\t32\n{ .mmi;\tadd\tr14 = r15, r16\n\tadd\tr17 = r18, r19\n\tadd\tr20 = r21, r22 ;; }\n\t.align\t32\n{ .mmi;\tadd\tr23 = r24, r25\n\tadd\tr26 = r27, r28\n\tadd\tr29 = r30, r31 ;; }\n],\n  [gmp_cv_asm_ia64_align_ok=`od -b conftest.$OBJEXT | $AWK -f conftest.awk`],\n  [AC_MSG_WARN([oops, cannot compile test program])\n   gmp_cv_asm_ia64_align_ok=yes])\n])\nGMP_DEFINE_RAW([\"define(<IA64_ALIGN_OK>, <\\`$gmp_cv_asm_ia64_align_ok'>)\"])\n])\n\n\ndnl  GMP_ASM_POWERPC_PIC_ALWAYS\ndnl  --------------------------\ndnl  Determine whether PIC is the default compiler output.\ndnl\ndnl  SVR4 style \"foo@ha\" addressing is interpreted as non-PIC, and anything\ndnl  else is assumed to require PIC always (Darwin or AIX).  SVR4 is the\ndnl  only non-PIC addressing syntax the asm files have at the moment anyway.\ndnl\ndnl  Libtool does this by taking \"*-*-aix* | *-*-darwin* | *-*-rhapsody*\" to\ndnl  mean PIC always, but it seems more reliable to grep the compiler\ndnl  output.\ndnl\ndnl The next paragraph is untrue for Tiger.  Was it ever true?  For tiger,\ndnl \"cc -fast\" makes non-PIC the default (and the binaries do run).\ndnl  On Darwin \"cc -static\" is non-PIC with syntax \"ha16(_foo)\", but that's\ndnl  apparently only for use in the kernel, which we're not attempting to\ndnl  target at the moment, so don't look for that.\n\nAC_DEFUN([GMP_ASM_POWERPC_PIC_ALWAYS],\n[AC_REQUIRE([AC_PROG_CC])\nAC_CACHE_CHECK([whether compiler output is PIC by default],\n               gmp_cv_asm_powerpc_pic,\n[gmp_cv_asm_powerpc_pic=yes\ncat >conftest.c <<EOF\nint foo;\nint *bar() { return &foo; }\nEOF\necho \"Test program:\" >&AC_FD_CC\ncat conftest.c >&AC_FD_CC\ngmp_compile=\"$CC $CFLAGS $CPPFLAGS -S conftest.c >&AC_FD_CC\"\nif AC_TRY_EVAL(gmp_compile); then\n  echo \"Compiler output:\" >&AC_FD_CC\n  cat conftest.s >&AC_FD_CC\n  if grep 'foo@ha' conftest.s >/dev/null 2>&AC_FD_CC; then\n    gmp_cv_asm_powerpc_pic=no\n  fi\n  if grep 'ha16(_foo)' conftest.s >/dev/null 2>&AC_FD_CC; then\n    gmp_cv_asm_powerpc_pic=no\n  fi\nfi\nrm -f conftest*\n])\nGMP_DEFINE_RAW([\"define(<PIC_ALWAYS>,<$gmp_cv_asm_powerpc_pic>)\"])\n])\n\n\ndnl  GMP_ASM_POWERPC_R_REGISTERS\ndnl  ---------------------------\ndnl  Determine whether the assembler takes powerpc registers with an \"r\" as\ndnl  in \"r6\", or as plain \"6\".  The latter is standard, but NeXT, Rhapsody,\ndnl  and MacOS-X require the \"r\" forms.\ndnl\ndnl  See also mpn/powerpc32/powerpc-defs.m4 which uses the result of this\ndnl  test.\n\nAC_DEFUN([GMP_ASM_POWERPC_R_REGISTERS],\n[AC_REQUIRE([GMP_ASM_TEXT])\nAC_CACHE_CHECK([if the assembler needs r on registers],\n               gmp_cv_asm_powerpc_r_registers,\n[GMP_TRY_ASSEMBLE(\n[\t$gmp_cv_asm_text\n\tmtctr\t6],\n[gmp_cv_asm_powerpc_r_registers=no],\n[GMP_TRY_ASSEMBLE(\n[\t$gmp_cv_asm_text\n\tmtctr\tr6],\n[gmp_cv_asm_powerpc_r_registers=yes],\n[AC_MSG_ERROR([neither \"mtctr 6\" nor \"mtctr r6\" works])])])])\n\nGMP_DEFINE_RAW([\"define(<WANT_R_REGISTERS>,<$gmp_cv_asm_powerpc_r_registers>)\"])\n])\n\n\ndnl  GMP_ASM_SPARC_REGISTER\ndnl  ----------------------\ndnl  Determine whether the assembler accepts the \".register\" directive.\ndnl  Old versions of solaris \"as\" don't.\ndnl\ndnl  See also mpn/sparc32/sparc-defs.m4 which uses the result of this test.\n\nAC_DEFUN([GMP_ASM_SPARC_REGISTER],\n[AC_REQUIRE([GMP_ASM_TEXT])\nAC_CACHE_CHECK([if the assembler accepts \".register\"],\n               gmp_cv_asm_sparc_register,\n[GMP_TRY_ASSEMBLE(\n[\t$gmp_cv_asm_text\n\t.register\t%g2,#scratch\n],\n[gmp_cv_asm_sparc_register=yes],\n[gmp_cv_asm_sparc_register=no])])\n\nGMP_DEFINE_RAW([\"define(<HAVE_REGISTER>,<$gmp_cv_asm_sparc_register>)\"])\n])\n\n\ndnl  GMP_C_ATTRIBUTE_CONST\ndnl  ---------------------\n\nAC_DEFUN([GMP_C_ATTRIBUTE_CONST],\n[AC_CACHE_CHECK([whether gcc __attribute__ ((const)) works],\n                gmp_cv_c_attribute_const,\n[AC_TRY_COMPILE([int foo (int x) __attribute__ ((const));], ,\n  gmp_cv_c_attribute_const=yes, gmp_cv_c_attribute_const=no)\n])\nif test $gmp_cv_c_attribute_const = yes; then\n  AC_DEFINE(HAVE_ATTRIBUTE_CONST, 1,\n  [Define to 1 if the compiler accepts gcc style __attribute__ ((const))])\nfi\n])\n\n\ndnl  GMP_C_ATTRIBUTE_MALLOC\ndnl  ----------------------\ndnl  gcc 2.95.x accepts __attribute__ ((malloc)) but with a warning that\ndnl  it's ignored.  Pretend it doesn't exist in this case, to avoid that\ndnl  warning.\n\nAC_DEFUN([GMP_C_ATTRIBUTE_MALLOC],\n[AC_CACHE_CHECK([whether gcc __attribute__ ((malloc)) works],\n                gmp_cv_c_attribute_malloc,\n[cat >conftest.c <<EOF\nvoid *foo (int x) __attribute__ ((malloc));\nEOF\ngmp_compile=\"$CC $CFLAGS $CPPFLAGS -c conftest.c >conftest.out 2>&1\"\nif AC_TRY_EVAL(gmp_compile); then\n  if grep \"attribute directive ignored\" conftest.out >/dev/null; then\n    gmp_cv_c_attribute_malloc=no\n  else\n    gmp_cv_c_attribute_malloc=yes\n  fi\nelse\n  gmp_cv_c_attribute_malloc=no\nfi\ncat conftest.out >&AC_FD_CC\nrm -f conftest*\n])\nif test $gmp_cv_c_attribute_malloc = yes; then\n  AC_DEFINE(HAVE_ATTRIBUTE_MALLOC, 1,\n  [Define to 1 if the compiler accepts gcc style __attribute__ ((malloc))])\nfi\n])\n\n\ndnl  GMP_C_ATTRIBUTE_MODE\ndnl  --------------------\ndnl  Introduced in gcc 2.2, but perhaps not in all Apple derived versions.\n\nAC_DEFUN([GMP_C_ATTRIBUTE_MODE],\n[AC_CACHE_CHECK([whether gcc __attribute__ ((mode (XX))) works],\n                gmp_cv_c_attribute_mode,\n[AC_TRY_COMPILE([typedef int SItype __attribute__ ((mode (SI)));], ,\n  gmp_cv_c_attribute_mode=yes, gmp_cv_c_attribute_mode=no)\n])\nif test $gmp_cv_c_attribute_mode = yes; then\n  AC_DEFINE(HAVE_ATTRIBUTE_MODE, 1,\n  [Define to 1 if the compiler accepts gcc style __attribute__ ((mode (XX)))])\nfi\n])\n\n\ndnl  GMP_C_ATTRIBUTE_NORETURN\ndnl  ------------------------\n\nAC_DEFUN([GMP_C_ATTRIBUTE_NORETURN],\n[AC_CACHE_CHECK([whether gcc __attribute__ ((noreturn)) works],\n                gmp_cv_c_attribute_noreturn,\n[AC_TRY_COMPILE([void foo (int x) __attribute__ ((noreturn));], ,\n  gmp_cv_c_attribute_noreturn=yes, gmp_cv_c_attribute_noreturn=no)\n])\nif test $gmp_cv_c_attribute_noreturn = yes; then\n  AC_DEFINE(HAVE_ATTRIBUTE_NORETURN, 1,\n  [Define to 1 if the compiler accepts gcc style __attribute__ ((noreturn))])\nfi\n])\n\n\ndnl  GMP_C_DOUBLE_FORMAT\ndnl  -------------------\ndnl  Determine the floating point format.\ndnl\ndnl  The object file is grepped, in order to work when cross compiling.  A\ndnl  start and end sequence is included to avoid false matches, and\ndnl  allowance is made for the desired data crossing an \"od -b\" line\ndnl  boundary.  The test number is a small integer so it should appear\ndnl  exactly, no rounding or truncation etc.\ndnl\ndnl  \"od -b\", incidentally, is supported even by Unix V7, and the awk script\ndnl  used doesn't have functions or anything, so even an \"old\" awk should\ndnl  suffice.\n\nAC_DEFUN([GMP_C_DOUBLE_FORMAT],\n[AC_REQUIRE([AC_PROG_CC])\nAC_REQUIRE([AC_PROG_AWK])\nAC_CACHE_CHECK([format of `double' floating point],\n                gmp_cv_c_double_format,\n[gmp_cv_c_double_format=unknown\ncat >conftest.c <<\\EOF\n[struct {\n  char    before[8];\n  double  x;\n  char    after[8];\n} foo = {\n  { '\\001', '\\043', '\\105', '\\147', '\\211', '\\253', '\\315', '\\357' },\n  -123456789.0,\n  { '\\376', '\\334', '\\272', '\\230', '\\166', '\\124', '\\062', '\\020' },\n};]\nEOF\ngmp_compile=\"$CC $CFLAGS $CPPFLAGS -c conftest.c >&AC_FD_CC 2>&1\"\nif AC_TRY_EVAL(gmp_compile); then\ncat >conftest.awk <<\\EOF\n[\nBEGIN {\n  found = 0\n}\n\n{\n  for (f = 2; f <= NF; f++)\n    {\n      for (i = 0; i < 23; i++)\n        got[i] = got[i+1];\n      got[23] = $f;\n\n      # match the special begin and end sequences\n      if (got[0] != \"001\") continue\n      if (got[1] != \"043\") continue\n      if (got[2] != \"105\") continue\n      if (got[3] != \"147\") continue\n      if (got[4] != \"211\") continue\n      if (got[5] != \"253\") continue\n      if (got[6] != \"315\") continue\n      if (got[7] != \"357\") continue\n      if (got[16] != \"376\") continue\n      if (got[17] != \"334\") continue\n      if (got[18] != \"272\") continue\n      if (got[19] != \"230\") continue\n      if (got[20] != \"166\") continue\n      if (got[21] != \"124\") continue\n      if (got[22] != \"062\") continue\n      if (got[23] != \"020\") continue\n\n      saw = \" (\" got[8] \" \" got[9] \" \" got[10] \" \" got[11] \" \" got[12] \" \" got[13] \" \" got[14] \" \" got[15] \")\"\n\n      if (got[8]  == \"000\" &&  \\\n          got[9]  == \"000\" &&  \\\n          got[10] == \"000\" &&  \\\n          got[11] == \"124\" &&  \\\n          got[12] == \"064\" &&  \\\n          got[13] == \"157\" &&  \\\n          got[14] == \"235\" &&  \\\n          got[15] == \"301\")\n        {\n          print \"IEEE little endian\"\n          found = 1\n          exit\n        }\n\n      # Little endian with the two 4-byte halves swapped, as used by ARM\n      # when the chip is in little endian mode.\n      #\n      if (got[8]  == \"064\" &&  \\\n          got[9]  == \"157\" &&  \\\n          got[10] == \"235\" &&  \\\n          got[11] == \"301\" &&  \\\n          got[12] == \"000\" &&  \\\n          got[13] == \"000\" &&  \\\n          got[14] == \"000\" &&  \\\n          got[15] == \"124\")\n        {\n          print \"IEEE little endian, swapped halves\"\n          found = 1\n          exit\n        }\n\n      # gcc 2.95.4 on one GNU/Linux ARM system was seen generating 000 in\n      # the last byte, whereas 124 is correct.  Not sure where the bug\n      # actually lies, but a running program didn't seem to get a full\n      # mantissa worth of working bits.\n      #\n      # We match this case explicitly so we can give a nice result message,\n      # but we deliberately exclude it from the normal IEEE double setups\n      # since it's too broken.\n      #\n      if (got[8]  == \"064\" &&  \\\n          got[9]  == \"157\" &&  \\\n          got[10] == \"235\" &&  \\\n          got[11] == \"301\" &&  \\\n          got[12] == \"000\" &&  \\\n          got[13] == \"000\" &&  \\\n          got[14] == \"000\" &&  \\\n          got[15] == \"000\")\n        {\n          print \"bad ARM software floats\"\n          found = 1\n          exit\n        }\n\n      if (got[8]  == \"301\" &&  \\\n          got[9]  == \"235\" &&  \\\n          got[10] == \"157\" &&  \\\n          got[11] == \"064\" &&  \\\n          got[12] == \"124\" &&  \\\n          got[13] == \"000\" &&  \\\n          got[14] == \"000\" &&  \\\n\t  got[15] == \"000\")\n        {\n          print \"IEEE big endian\"\n          found = 1\n          exit\n        }\n\n      if (got[8]  == \"353\" &&  \\\n          got[9]  == \"315\" &&  \\\n          got[10] == \"242\" &&  \\\n          got[11] == \"171\" &&  \\\n          got[12] == \"000\" &&  \\\n          got[13] == \"240\" &&  \\\n          got[14] == \"000\" &&  \\\n          got[15] == \"000\")\n        {\n          print \"VAX D\"\n          found = 1\n          exit\n        }\n\n      if (got[8]  == \"275\" &&  \\\n          got[9]  == \"301\" &&  \\\n          got[10] == \"064\" &&  \\\n          got[11] == \"157\" &&  \\\n          got[12] == \"000\" &&  \\\n          got[13] == \"124\" &&  \\\n          got[14] == \"000\" &&  \\\n          got[15] == \"000\")\n        {\n          print \"VAX G\"\n          found = 1\n          exit\n        }\n\n      if (got[8]  == \"300\" &&  \\\n          got[9]  == \"033\" &&  \\\n          got[10] == \"353\" &&  \\\n          got[11] == \"171\" &&  \\\n          got[12] == \"242\" &&  \\\n          got[13] == \"240\" &&  \\\n          got[14] == \"000\" &&  \\\n          got[15] == \"000\")\n        {\n          print \"Cray CFP\"\n          found = 1\n          exit\n        }\n    }\n}\n\nEND {\n  if (! found)\n    print \"unknown\", saw\n}\n]\nEOF\n  gmp_cv_c_double_format=`od -b conftest.$OBJEXT | $AWK -f conftest.awk`\n  case $gmp_cv_c_double_format in\n  unknown*)\n    echo \"cannot match anything, conftest.$OBJEXT contains\" >&AC_FD_CC\n    od -b conftest.$OBJEXT >&AC_FD_CC\n    ;;\n  esac\nelse\n  AC_MSG_WARN([oops, cannot compile test program])\nfi\nrm -f conftest*\n])\n\nAH_VERBATIM([HAVE_DOUBLE],\n[/* Define one of the following to 1 for the format of a `double'.\n   If your format is not among these choices, or you don't know what it is,\n   then leave all undefined.\n   IEEE_LITTLE_SWAPPED means little endian, but with the two 4-byte halves\n   swapped, as used by ARM CPUs in little endian mode.  */\n#undef HAVE_DOUBLE_IEEE_BIG_ENDIAN\n#undef HAVE_DOUBLE_IEEE_LITTLE_ENDIAN\n#undef HAVE_DOUBLE_IEEE_LITTLE_SWAPPED\n#undef HAVE_DOUBLE_VAX_D\n#undef HAVE_DOUBLE_VAX_G\n#undef HAVE_DOUBLE_CRAY_CFP])\n\ncase $gmp_cv_c_double_format in\n  \"IEEE big endian\")\n    AC_DEFINE(HAVE_DOUBLE_IEEE_BIG_ENDIAN, 1)\n    GMP_DEFINE_RAW(\"define_not_for_expansion(\\`HAVE_DOUBLE_IEEE_BIG_ENDIAN')\", POST)\n    ;;\n  \"IEEE little endian\")\n    AC_DEFINE(HAVE_DOUBLE_IEEE_LITTLE_ENDIAN, 1)\n    GMP_DEFINE_RAW(\"define_not_for_expansion(\\`HAVE_DOUBLE_IEEE_LITTLE_ENDIAN')\", POST)\n    ;;\n  \"IEEE little endian, swapped halves\")\n    AC_DEFINE(HAVE_DOUBLE_IEEE_LITTLE_SWAPPED, 1) ;;\n  \"VAX D\")\n    AC_DEFINE(HAVE_DOUBLE_VAX_D, 1) ;;\n  \"VAX G\")\n    AC_DEFINE(HAVE_DOUBLE_VAX_G, 1) ;;\n  \"Cray CFP\")\n    AC_DEFINE(HAVE_DOUBLE_CRAY_CFP, 1) ;;\n  \"bad ARM software floats\")\n    ;;\n  unknown*)\n    AC_MSG_WARN([Could not determine float format.])\n    AC_MSG_WARN([Conversions to and from \"double\" may be slow.])\n    ;;\n  *)\n    AC_MSG_WARN([oops, unrecognised float format: $gmp_cv_c_double_format])\n    ;;\nesac\n])\n\n\ndnl  GMP_C_STDARG\ndnl  ------------\ndnl  Test whether to use <stdarg.h> or <varargs.h>.\ndnl\ndnl  Notice the AC_DEFINE here is HAVE_STDARG to avoid clashing with\ndnl  HAVE_STDARG_H which could arise from AC_CHECK_HEADERS.\ndnl\ndnl  This test might be slight overkill, after all there's really only going\ndnl  to be ANSI or K&R and the two can be differentiated by AC_PROG_CC_STDC\ndnl  or very likely by the setups for _PROTO in mpir.h.  On the other hand\ndnl  this test is nice and direct, being what we're going to actually use.\n\nAC_DEFUN([GMP_C_STDARG],\n[AC_CACHE_CHECK([whether <stdarg.h> exists and works],\n                gmp_cv_c_stdarg,\n[AC_TRY_COMPILE(\n[#include <stdarg.h>\nint foo (int x, ...)\n{\n  va_list  ap;\n  int      y;\n  va_start (ap, x);\n  y = va_arg (ap, int);\n  va_end (ap);\n  return y;\n}],,\ngmp_cv_c_stdarg=yes, gmp_cv_c_stdarg=no)\n])\nif test $gmp_cv_c_stdarg = yes; then\n  AC_DEFINE(HAVE_STDARG, 1, [Define to 1 if <stdarg.h> exists and works])\nfi\n])\n\n\ndnl  GMP_FUNC_ALLOCA\ndnl  ---------------\ndnl  Determine whether \"alloca\" is available.  This is AC_FUNC_ALLOCA from\ndnl  autoconf, but changed so it doesn't use alloca.c if alloca() isn't\ndnl  available, and also to use gmp-impl.h for the conditionals detecting\ndnl  compiler builtin alloca's.\n\nAC_DEFUN([GMP_FUNC_ALLOCA],\n[AC_REQUIRE([GMP_HEADER_ALLOCA])\nAC_CACHE_CHECK([for alloca (via gmp-impl.h)],\n               gmp_cv_func_alloca,\n[AC_TRY_LINK(\nGMP_INCLUDE_GMP_H\n[#include \"$srcdir/gmp-impl.h\"\n],\n  [char *p = (char *) alloca (1);],\n  gmp_cv_func_alloca=yes,\n  gmp_cv_func_alloca=no)])\nif test $gmp_cv_func_alloca = yes; then\n  AC_DEFINE(HAVE_ALLOCA, 1, [Define to 1 if alloca() works (via gmp-impl.h).])\nfi\n])\n\nAC_DEFUN([GMP_HEADER_ALLOCA],\n[# The Ultrix 4.2 mips builtin alloca declared by alloca.h only works\n# for constant arguments.  Useless!\nAC_CACHE_CHECK([for working alloca.h],\n               gmp_cv_header_alloca,\n[AC_TRY_LINK([#include <alloca.h>],\n  [char *p = (char *) alloca (2 * sizeof (int));],\n  gmp_cv_header_alloca=yes,\n  gmp_cv_header_alloca=no)])\nif test $gmp_cv_header_alloca = yes; then\n  AC_DEFINE(HAVE_ALLOCA_H, 1,\n  [Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).])\nfi\n])\n\n\ndnl  GMP_OPTION_ALLOCA\ndnl  -----------------\ndnl  Decide what to do about --enable-alloca from the user.\ndnl  This is a macro so it can require GMP_FUNC_ALLOCA.\n\nAC_DEFUN([GMP_OPTION_ALLOCA],\n[AC_REQUIRE([GMP_FUNC_ALLOCA])\nAC_CACHE_CHECK([how to allocate temporary memory],\n               gmp_cv_option_alloca,\n[case $enable_alloca in\n  yes)\n    gmp_cv_option_alloca=alloca\n    ;;\n  no)\n    gmp_cv_option_alloca=malloc-reentrant\n    ;;\n  reentrant | notreentrant)\n    case $gmp_cv_func_alloca in\n    yes)  gmp_cv_option_alloca=alloca ;;\n    *)    gmp_cv_option_alloca=malloc-$enable_alloca ;;\n    esac\n    ;;\n  *)\n    gmp_cv_option_alloca=$enable_alloca\n    ;;\nesac\n])\n\nAH_VERBATIM([WANT_TMP],\n[/* Define one of these to 1 for the desired temporary memory allocation\n   method, per --enable-alloca. */\n#undef WANT_TMP_ALLOCA\n#undef WANT_TMP_REENTRANT\n#undef WANT_TMP_NOTREENTRANT\n#undef WANT_TMP_DEBUG])\n\ncase $gmp_cv_option_alloca in\n  alloca)\n    if test $gmp_cv_func_alloca = no; then\n      AC_MSG_ERROR([--enable-alloca=alloca specified, but alloca not available])\n    fi\n    AC_DEFINE(WANT_TMP_ALLOCA)\n    TAL_OBJECT=tal-reent$U.lo\n    ;;\n  malloc-reentrant)\n    AC_DEFINE(WANT_TMP_REENTRANT)\n    TAL_OBJECT=tal-reent$U.lo\n    ;;\n  malloc-notreentrant)\n    AC_DEFINE(WANT_TMP_NOTREENTRANT)\n    TAL_OBJECT=tal-notreent$U.lo\n    ;;\n  debug)\n    AC_DEFINE(WANT_TMP_DEBUG)\n    TAL_OBJECT=tal-debug$U.lo\n    ;;\n  *)\n    # checks at the start of configure.in should protect us\n    AC_MSG_ERROR([unrecognised --enable-alloca=$gmp_cv_option_alloca])\n    ;;\nesac\nAC_SUBST(TAL_OBJECT)\n])\n\n\ndnl  GMP_FUNC_SSCANF_WRITABLE_INPUT\ndnl  ------------------------------\ndnl  Determine whether sscanf requires a writable input string.\ndnl\ndnl  It might be nicer to run a program to determine this when doing a\ndnl  native build, but the systems afflicted are few and far between these\ndnl  days, so it seems good enough just to list them.\n\nAC_DEFUN([GMP_FUNC_SSCANF_WRITABLE_INPUT],\n[AC_CACHE_CHECK([whether sscanf needs writable input],\n                 gmp_cv_func_sscanf_writable_input,\n[case $host in\n  *-*-hpux9 | *-*-hpux9.*)\n     gmp_cv_func_sscanf_writable_input=yes ;;\n  *) gmp_cv_func_sscanf_writable_input=no  ;;\nesac\n])\ncase $gmp_cv_func_sscanf_writable_input in\n  yes) AC_DEFINE(SSCANF_WRITABLE_INPUT, 1,\n                 [Define to 1 if sscanf requires writable inputs]) ;;\n  no)  ;;\n  *)   AC_MSG_ERROR([unrecognised \\$gmp_cv_func_sscanf_writable_input]) ;;\nesac\n])\n\n\ndnl  GMP_FUNC_VSNPRINTF\ndnl  ------------------\ndnl  Check whether vsnprintf exists, and works properly.\ndnl\ndnl  Systems without vsnprintf include mingw32\ndnl\ndnl  Sparc Solaris 2.7 in 64-bit mode doesn't always truncate, making\ndnl  vsnprintf like vsprintf, and hence completely useless.  On one system a\ndnl  literal string is enough to provoke the problem, on another a \"%n\" was\ndnl  needed.  There seems to be something weird going on with the optimizer\ndnl  or something, since on the first system adding a second check with\ndnl  \"%n\", or even just an initialized local variable, makes it work.  In\ndnl  any case, without bothering to get to the bottom of this, the two\ndnl  program runs in the code below end up successfully detecting the\ndnl  problem.\ndnl\ndnl  glibc 2.0.x returns either -1 or bufsize-1 for an overflow (both seen,\ndnl  not sure which 2.0.x does which), but still puts the correct null\ndnl  terminated result into the buffer.\n\nAC_DEFUN([GMP_FUNC_VSNPRINTF],\n[AC_REQUIRE([GMP_C_STDARG])\nAC_CHECK_FUNC(vsnprintf,\n              [gmp_vsnprintf_exists=yes],\n              [gmp_vsnprintf_exists=no])\nif test \"$gmp_vsnprintf_exists\" = no; then\n  gmp_cv_func_vsnprintf=no\nelse\n  AC_CACHE_CHECK([whether vsnprintf works],\n                 gmp_cv_func_vsnprintf,\n  [gmp_cv_func_vsnprintf=yes\n   for i in 'check (\"hello world\");' 'int n; check (\"%nhello world\", &n);'; do\n     AC_TRY_RUN([\n#include <string.h>  /* for strcmp */\n#include <stdio.h>   /* for vsnprintf */\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\nint\n#if HAVE_STDARG\ncheck (const char *fmt, ...)\n#else\ncheck (va_alist)\n     va_dcl\n#endif\n{\n  static char  buf[128];\n  va_list  ap;\n  int      ret;\n\n#if HAVE_STDARG\n  va_start (ap, fmt);\n#else\n  char *fmt;\n  va_start (ap);\n  fmt = va_arg (ap, char *);\n#endif\n\n  ret = vsnprintf (buf, 4, fmt, ap);\n\n  if (strcmp (buf, \"hel\") != 0)\n    return 1;\n\n  /* allowed return values */\n  if (ret != -1 && ret != 3 && ret != 11)\n    return 2;\n\n  return 0;\n}\n\nint\nmain ()\n{\n$i\n  return 0;\n}\n],\n      [:],\n      [gmp_cv_func_vsnprintf=no; break],\n      [gmp_cv_func_vsnprintf=probably; break])\n  done\n  ])\n  if test \"$gmp_cv_func_vsnprintf\" = probably; then\n    AC_MSG_WARN([cannot check for properly working vsnprintf when cross compiling, will assume it's ok])\n  fi\n  if test \"$gmp_cv_func_vsnprintf\" != no; then\n    AC_DEFINE(HAVE_VSNPRINTF,1,\n    [Define to 1 if you have the `vsnprintf' function and it works properly.])\n  fi\nfi\n])\n\n\ndnl  GMP_H_ANSI\ndnl  ----------\ndnl  Check whether mpir.h recognises the compiler as ANSI capable.\n\nAC_DEFUN([GMP_H_ANSI],\n[AC_REQUIRE([AC_PROG_CC_STDC])\ncase $ac_cv_prog_cc_stdc in\n  no)\n    ;;\n  *)\n    AC_TRY_COMPILE(\nGMP_INCLUDE_GMP_H\n[#if ! __GMP_HAVE_PROTOTYPES\ndie die die\n#endif\n],,,\n    [AC_MSG_WARN([mpir.h doesnt recognise compiler as ANSI, prototypes and \"const\" will be unavailable])])\n    ;;\nesac\n])\n\n\ndnl  GMP_H_EXTERN_INLINE\ndnl  -------------------\ndnl  If the compiler has an \"inline\" of some sort, check whether the\ndnl  #ifdef's in mpir.h recognise it.\n\nAC_DEFUN([GMP_H_EXTERN_INLINE],\n[AC_REQUIRE([AC_C_INLINE])\ncase $ac_cv_c_inline in\nno) ;;\n*)\n  AC_TRY_COMPILE(\n[#define __GMP_WITHIN_CONFIGURE_INLINE 1\n]GMP_INCLUDE_GMP_H[\n#ifndef __GMP_EXTERN_INLINE\ndie die die\n#endif\n],,,\n  [case $ac_cv_c_inline in\n  yes) tmp_inline=inline ;;\n  *)   tmp_inline=$ac_cv_c_inline ;;\n  esac    \n  AC_MSG_WARN([mpir.h doesnt recognise compiler \"$tmp_inline\", inlines will be unavailable])])\n  ;;\nesac\n])\n\n\ndnl  GMP_H_HAVE_FILE\ndnl  ---------------\ndnl  Check whether the #ifdef's in mpir.h recognise when stdio.h has been\ndnl  included to get FILE.\n\nAC_DEFUN([GMP_H_HAVE_FILE],\n[AC_TRY_COMPILE(\n[#include <stdio.h>]\nGMP_INCLUDE_GMP_H\n[#if ! _GMP_H_HAVE_FILE\ndie die die\n#endif\n],,,\n  [AC_MSG_WARN([mpir.h doesnt recognise <stdio.h>, FILE prototypes will be unavailable])])\n])\n\n\ndnl  GMP_PROG_CC_FOR_BUILD\ndnl  ---------------------\ndnl  Establish CC_FOR_BUILD, a C compiler for the build system.\ndnl\ndnl  If CC_FOR_BUILD is set then it's expected to work, likewise the old\ndnl  style HOST_CC, otherwise some likely candidates are tried, the same as\ndnl  configfsf.guess.\n\nAC_DEFUN([GMP_PROG_CC_FOR_BUILD],\n[AC_REQUIRE([AC_PROG_CC])\nif test -n \"$CC_FOR_BUILD\"; then\n  GMP_PROG_CC_FOR_BUILD_WORKS($CC_FOR_BUILD,,\n    [AC_MSG_ERROR([Specified CC_FOR_BUILD doesn't seem to work])])\nelif test -n \"$HOST_CC\"; then\n  GMP_PROG_CC_FOR_BUILD_WORKS($HOST_CC,\n    [CC_FOR_BUILD=$HOST_CC],\n    [AC_MSG_ERROR([Specified HOST_CC doesn't seem to work])])\nelse\n  for i in \"$CC\" \"$CC $CFLAGS $CPPFLAGS\" cc gcc c89 c99; do\n    GMP_PROG_CC_FOR_BUILD_WORKS($i,\n      [CC_FOR_BUILD=$i\n       break])\n  done\n  if test -z \"$CC_FOR_BUILD\"; then\n    AC_MSG_ERROR([Cannot find a build system compiler])\n  fi\nfi\n    \nAC_ARG_VAR(CC_FOR_BUILD,[build system C compiler])\nAC_SUBST(CC_FOR_BUILD)\n])\n\n\ndnl  GMP_PROG_CC_FOR_BUILD_WORKS(cc/cflags[,[action-if-good][,action-if-bad]])\ndnl  -------------------------------------------------------------------------\ndnl  See if the given cc/cflags works on the build system.\ndnl\ndnl  It seems easiest to just use the default compiler output, rather than\ndnl  figuring out the .exe or whatever at this stage.\n\nAC_DEFUN([GMP_PROG_CC_FOR_BUILD_WORKS],\n[AC_MSG_CHECKING([build system compiler $1])\n# remove anything that might look like compiler output to our \"||\" expression\nrm -f conftest* a.out b.out a.exe a_out.exe\ncat >conftest.c <<EOF\nint\nmain ()\n{\n  return 0;\n}\nEOF\ngmp_compile=\"$1 conftest.c\"\ncc_for_build_works=no\nif AC_TRY_EVAL(gmp_compile); then\n  if (./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest) >&AC_FD_CC 2>&1; then\n    cc_for_build_works=yes\n  fi\nfi\nrm -f conftest* a.out b.out a.exe a_out.exe\nAC_MSG_RESULT($cc_for_build_works)\nif test \"$cc_for_build_works\" = yes; then\n  ifelse([$2],,:,[$2])\nelse\n  ifelse([$3],,:,[$3])\nfi\n])\n\n\ndnl  GMP_PROG_CPP_FOR_BUILD\ndnl  ---------------------\ndnl  Establish CPP_FOR_BUILD, the build system C preprocessor.\ndnl  The choices tried here are the same as AC_PROG_CPP, but with\ndnl  CC_FOR_BUILD.\n\nAC_DEFUN([GMP_PROG_CPP_FOR_BUILD],\n[AC_REQUIRE([GMP_PROG_CC_FOR_BUILD])\nAC_MSG_CHECKING([for build system preprocessor])\nif test -z \"$CPP_FOR_BUILD\"; then\n  AC_CACHE_VAL(gmp_cv_prog_cpp_for_build,\n  [cat >conftest.c <<EOF\n#define FOO BAR\nEOF\n  for i in \"$CC_FOR_BUILD -E\" \"$CC_FOR_BUILD -E -traditional-cpp\" \"/lib/cpp\"; do\n    gmp_compile=\"$i conftest.c\"\n    if AC_TRY_EVAL(gmp_compile) >&AC_FD_CC 2>&1; then\n      gmp_cv_prog_cpp_for_build=$i\n      break\n    fi\n  done\n  rm -f conftest* a.out b.out a.exe a_out.exe\n  if test -z \"$gmp_cv_prog_cpp_for_build\"; then\n    AC_MSG_ERROR([Cannot find build system C preprocessor.])\n  fi\n  ])\n  CPP_FOR_BUILD=$gmp_cv_prog_cpp_for_build\nfi\nAC_MSG_RESULT([$CPP_FOR_BUILD])\n\nAC_ARG_VAR(CPP_FOR_BUILD,[build system C preprocessor])\nAC_SUBST(CPP_FOR_BUILD)\n])\n\n\ndnl  GMP_PROG_EXEEXT_FOR_BUILD\ndnl  -------------------------\ndnl  Determine EXEEXT_FOR_BUILD, the build system executable suffix.\ndnl\ndnl  The idea is to find what \"-o conftest$foo\" will make it possible to run\ndnl  the program with ./conftest.  On Unix-like systems this is of course\ndnl  nothing, for DOS it's \".exe\", or for a strange RISC OS foreign file\ndnl  system cross compile it can be \",ff8\" apparently.  Not sure if the\ndnl  latter actually applies to a build-system executable, maybe it doesn't,\ndnl  but it won't hurt to try.\n\nAC_DEFUN([GMP_PROG_EXEEXT_FOR_BUILD],\n[AC_REQUIRE([GMP_PROG_CC_FOR_BUILD])\nAC_CACHE_CHECK([for build system executable suffix],\n               gmp_cv_prog_exeext_for_build,\n[cat >conftest.c <<EOF\nint\nmain ()\n{\n  return 0;\n}\nEOF\nfor i in .exe ,ff8 \"\"; do\n  gmp_compile=\"$CC_FOR_BUILD conftest.c -o conftest$i\"\n  if AC_TRY_EVAL(gmp_compile); then\n    if (./conftest) 2>&AC_FD_CC; then\n      gmp_cv_prog_exeext_for_build=$i\n      break\n    fi\n  fi\ndone\nrm -f conftest*\nif test \"${gmp_cv_prog_exeext_for_build+set}\" != set; then\n  AC_MSG_ERROR([Cannot determine executable suffix])\nfi\n])\nAC_SUBST(EXEEXT_FOR_BUILD,$gmp_cv_prog_exeext_for_build)\n])\n\n\ndnl  GMP_C_FOR_BUILD_ANSI\ndnl  --------------------\ndnl  Determine whether CC_FOR_BUILD is ANSI, and establish U_FOR_BUILD\ndnl  accordingly.\n\nAC_DEFUN([GMP_C_FOR_BUILD_ANSI],\n[AC_REQUIRE([GMP_PROG_CC_FOR_BUILD])\nAC_CACHE_CHECK([whether build system compiler is ANSI],\n               gmp_cv_c_for_build_ansi,\n[cat >conftest.c <<EOF\nint\nmain (int argc, char *argv[])\n{\n  return 0;\n}\nEOF\ngmp_compile=\"$CC_FOR_BUILD conftest.c\"\nif AC_TRY_EVAL(gmp_compile); then\n  gmp_cv_c_for_build_ansi=yes\nelse\n  gmp_cv_c_for_build_ansi=no\nfi\nrm -f conftest* a.out b.out a.exe a_out.exe\n])\nif test \"$gmp_cv_c_for_build_ansi\" = yes; then\n  U_FOR_BUILD=\nelse\n  AC_SUBST(U_FOR_BUILD,_)\nfi\n])\n\n\ndnl  GMP_CHECK_LIBM_FOR_BUILD\ndnl  ------------------------\ndnl  Establish LIBM_FOR_BUILD as -lm, if that seems to work.\ndnl\ndnl  Libtool AC_CHECK_LIBM also uses -lmw on *-ncr-sysv4.3*, if it works.\ndnl  Don't know what that does, lets assume it's not needed just for log().\n\nAC_DEFUN([GMP_CHECK_LIBM_FOR_BUILD],\n[AC_REQUIRE([GMP_PROG_CC_FOR_BUILD])\nAC_CACHE_CHECK([for build system compiler math library],\n               gmp_cv_check_libm_for_build,\n[cat >conftest.c <<EOF\n#include <math.h>\nint\nmain ()\n{\n  return 0;\n}\ndouble d;\ndouble\nfoo ()\n{\n  return log (d);\n}\nEOF\ngmp_compile=\"$CC_FOR_BUILD conftest.c -lm\"\nif AC_TRY_EVAL(gmp_compile); then\n  gmp_cv_check_libm_for_build=-lm\nelse\n  gmp_cv_check_libm_for_build=no\nfi\nrm -f conftest* a.out b.out a.exe a_out.exe\n])\ncase $gmp_cv_check_libm_for_build in\n  yes) AC_SUBST(LIBM_FOR_BUILD,-lm) ;;\n  no)  LIBM_FOR_BUILD= ;;\n  *)   LIBM_FOR_BUILD=$gmp_cv_check_libm_for_build ;;\nesac\n])\n"
  },
  {
    "path": "appveyor.yml",
    "content": "version: 1.0.{build}\nbuild:\n  verbosity: minimal\n  \nenvironment:\n  YASM_BINARY: yasm-1.3.0-win64.exe\n  YASM_DOWNLOAD: http://www.tortall.net/projects/yasm/releases/%YASM_BINARY%\n  MINGW_PREREQ: zip lzip\n  matrix:\n    - COMPILER: MinGW-w64\n      ABI: 32\n      LIB: --enable-static --disable-shared\n    - COMPILER: MinGW-w64\n      ABI: 32\n      LIB: --disable-static --enable-shared\n    - COMPILER: MinGW-w64\n      ABI: 64\n      LIB: --enable-static --disable-shared\n    - COMPILER: MinGW-w64\n      ABI: 64\n      LIB: --disable-static --enable-shared\n\ninstall:\n  - if [%COMPILER%]==[MinGW-w64] C:\\msys64\\usr\\bin\\bash -lc \"pacman --noconfirm -S %MINGW_PREREQ%\"\n  - if not exist \"%YASM_BINARY%\" appveyor DownloadFile \"%YASM_DOWNLOAD%\"\n  - if [%COMPILER%]==[MinGW-w64] mkdir bin && copy \"%YASM_BINARY%\" bin\\yasm.exe 1>NUL\n \nbuild_script:\n  - if [%COMPILER%]==[MinGW-w64] C:\\msys64\\usr\\bin\\sh.exe --login /c/projects/mpir/.appveyor_msys_build.sh\n\ncache:\n  - '%YASM_BINARY%'\n\nartifacts:\n  - path: mpir-3.*\n    name: source tarball\n  - path: tests/testsuite-all.log\n    name: testsuite results\n\non_failure:\n  - appveyor PushArtifact tests/testsuite-all.log\n"
  },
  {
    "path": "assert.c",
    "content": "/* GMP assertion failure handler.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nvoid\n__gmp_assert_header (const char *filename, int linenum)\n{\n  if (filename != NULL && filename[0] != '\\0')\n    {\n      fprintf (stderr, \"%s:\", filename);\n      if (linenum != -1)\n        fprintf (stderr, \"%d: \", linenum);\n    }\n}\n\nvoid\n__gmp_assert_fail (const char *filename, int linenum,\n                   const char *expr)\n{\n  __gmp_assert_header (filename, linenum);\n  fprintf (stderr, \"GNU MP assertion failed: %s\\n\", expr);\n  abort();\n}\n"
  },
  {
    "path": "autogen.sh",
    "content": "#! /bin/sh\n\n# Save FSF scripts wrappers.\nmv -f config.guess configmpir.guess\nmv -f config.sub configmpir.sub\n# FSF scripts should be updated manually if needed.\n#mv configfsf.guess config.guess\n#mv configfsf.sub config.sub\n\n# Delete cached stuff\nrm -rf autom4te.cache\n\n# Regenerate autotools stuff\nautoreconf -isv\n\n# Restore the FSF scripts wrappers.\n#mv config.guess configfsf.guess\n#mv config.sub configfsf.sub\nmv -f configmpir.guess config.guess\nmv -f configmpir.sub config.sub\n\n"
  },
  {
    "path": "cl_int.bat",
    "content": "@echo off\n::  can use x86 or amd64 as param for the below batch file\ncall \"%VS90COMNTOOLS%\\..\\..\\VC\\vcvarsall.bat\" >nul\necho cl %*\ncl %*\n\n\n"
  },
  {
    "path": "clwrap",
    "content": "#!/bin/sh\n\nclopt=\"/nologo\"\nprev=\"none\"\n\nwhile test $# != 0 ; do\n\tcase $1 in\n\t-D*)\n\t\tloc=`echo $1 | cut -b 3-`\n\t\tclopt=\"$clopt /D$loc\" ;;\n\t-I*)\n\t\tloc=`echo $1 | cut -b 3-`\n\t\tclopt=\"$clopt /I$loc\" ;;\n\t-O2)\tclopt=\"$clopt /O2\" ;;\n\t-c)\tprev=src ;;\n\t-o)\tprev=out ;;\n\t*)\tcase $prev in\n\t\t\tsrc)\n\t\t\t\tclopt=\"$clopt /Tc$1\" ;;\n\t\t\tout)\n\t\t\t\tclopt=\"$clopt /Fo$1\" ;;\n\t\t\tnone)\n\t\t\t\tclopt=\"$clopt $1\" ;;\t# wouldn't normally pass thru\n\t\tesac\n\t\tprev=\"none\" ;;\n\tesac\n\tshift\ndone\ncmd /c cl_int.bat $clopt\n# probably want to convert / to \\ as well\n"
  },
  {
    "path": "compat.c",
    "content": "/* Old function entrypoints retained for binary compatibility.\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* mpn_divexact_by3 was a function in gmp 3.0.1, but as of gmp 3.1 it's a\n   macro calling mpn_divexact_by3c.  */\n#if defined( _MSC_VER) && !defined( HAVE_NATIVE_mpn_divexact_by3 )\n\nmp_limb_t\n__MPN (divexact_by3) (mp_ptr dst, mp_srcptr src, mp_size_t size)\n{\n  return mpn_divexact_by3 (dst, src, size);\n}\n\n#endif\n\n/* mpn_divmod_1 was a function in gmp 3.0.1 and earlier, but marked obsolete\n   in both gmp 2 and 3.  As of gmp 3.1 it's a macro calling mpn_divrem_1. */\nmp_limb_t\n__MPN (divmod_1) (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)\n{\n  return mpn_divmod_1 (dst, src, size, divisor);\n}\n\n\n/* mpz_legendre was a separate function in gmp 3.1.1 and earlier, but as of\n   4.0 it's a #define alias for mpz_jacobi.  */\nint\n__gmpz_legendre (mpz_srcptr a, mpz_srcptr b)\n{\n  return mpz_jacobi (a, b);\n}\n"
  },
  {
    "path": "config.guess",
    "content": "#! /bin/sh\n#\n# GMP config.guess wrapper.\n\n\n# Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation,\n# Inc.\n#\n# Copyright 2008 William Hart, Gonzalo Tornaria\n#\n# This file is part of the MPIR Library.\n#\n# The MPIR Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published\n# by the Free Software Foundation; either version 2.1 of the License, or (at\n# your option) any later version.\n#\n# The MPIR Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the MPIR Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\n# Usage: config.guess\n#\n# Print the host system CPU-VENDOR-OS.\n#\n# configfsf.guess is run and its guess then sharpened up to take advantage\n# of the finer grained CPU types that GMP knows.\n\n\n# Expect to find configfsf.guess in the same directory as this config.guess\nconfigfsf_guess=\"`echo \\\"$0\\\" | sed 's/config.guess$/configfsf.guess/'`\"\ncpuid_c_path=\"`echo \\\"$0\\\" | sed 's/config.guess$/cpuid.c/'`\"\nif test \"$configfsf_guess\" = \"$0\"; then\n  echo \"Cannot derive configfsf.guess from $0\" 1>&2\n  exit 1\nfi\nif test -f \"$configfsf_guess\"; then\n  :\nelse\n  echo \"$configfsf_guess not found\" 1>&2\n  exit 1\nfi\n\n# Setup a $SHELL with which to run configfsf.guess, using the same\n# $CONFIG_SHELL or /bin/sh as autoconf does when running config.guess\nSHELL=${CONFIG_SHELL-/bin/sh}\n\n# Identify ourselves on --version, --help or errors\nif test $# != 0; then\n  echo \"(GNU MP wrapped config.guess)\"\n  $SHELL $configfsf_guess \"$@\"\n  exit 1\nfi\n\nguess_full=`$SHELL $configfsf_guess`\nif test $? != 0; then\n  exit 1\nfi\n\nguess_cpu=`echo \"$guess_full\" | sed 's/-.*$//'`\nguess_rest=`echo \"$guess_full\" | sed 's/^[^-]*//'`\nexact_cpu=\n\n\n# -------------------------------------------------------------------------\n# The following should look at the current guess and probe the system to\n# establish a better guess in exact_cpu.  Leave exact_cpu empty if probes\n# can't be done, or don't work.\n#\n# When a number of probes are done, test -z \"$exact_cpu\" can be used instead\n# of putting each probe under an \"else\" of the preceeding.  That can stop\n# the code getting horribly nested and marching off the right side of the\n# screen.\n\n# Note that when a compile-and-link is done in one step we need to remove .o\n# files, since lame C compilers generate these even when not asked.\n#\n\ndummy=dummy-$$\ntrap 'rm -f $dummy.c $dummy.o $dummy.core $dummy ${dummy}1.s ${dummy}2.c ; exit 1' 1 2 15\n\n# Use $HOST_CC if defined. $CC may point to a cross-compiler\nif test x\"$CC_FOR_BUILD\" = x; then\n  if test x\"$HOST_CC\" != x; then\n    CC_FOR_BUILD=\"$HOST_CC\"\n  else\n    if test x\"$CC\" != x; then\n      CC_FOR_BUILD=\"$CC\"\n    else\n      echo 'dummy(){}' >$dummy.c\n      for c in cc gcc c89 c99; do\n\t  ($c $dummy.c -c) >/dev/null 2>&1\n\t  if test $? = 0; then\n\t      CC_FOR_BUILD=\"$c\"; break\n\t  fi\n      done\n      rm -f $dummy.c $dummy.o\n      if test x\"$CC_FOR_BUILD\" = x; then\n\tCC_FOR_BUILD=no_compiler_found\n      fi\n    fi\n  fi\nfi\n\n#here we have to distinguish between mingw32 32bit and 64bit , as the msys people refuse to\n#as we changing the middle part of the triple we do this first\n# find the size of a limb\n\ncase \"$guess_full\" in\n\n*-pc-mingw32)\n\n  cat <<EOF >${dummy}.c\nmain(){\n#ifdef _WIN64\nprintf(\"-w64-mingw32\\n\");\n#endif\nreturn 0;}\nEOF\n  if ($CC_FOR_BUILD ${dummy}.c -o $dummy) >/dev/null 2>&1; then\n    x=`$SHELL -c ./$dummy 2>/dev/null`\n    if test $? = 0 && test -n \"$x\"; then\n      guess_rest=$x\n    fi\n  fi\n  rm -f ${dummy}.c $dummy\n  ;;\n*-pc-mingw64)\n  guess_rest=\"-w64-mingw64\";;\nesac\n\n\n\n\ncase \"$guess_full\" in\n\nia64*-*-*)\n  # CPUID[3] bits 24 to 31 is the processor family.  itanium2 is documented\n  # as 0x1f, plain itanium has been seen returning 0x07 on two systems, but\n  # haven't found any documentation on it as such.\n  #\n  # Defining both getcpuid and _getcpuid lets us ignore whether the system\n  # expects underscores or not.\n  #\n  # \"unsigned long long\" is always 64 bits, in fact on hpux in ilp32 mode\n  # (which is the default there), it's the only 64-bit type.\n  #\n  cat >${dummy}a.s <<EOF\n\t.text\n\t.global\t_getcpuid\n\t.proc\t_getcpuid\n_getcpuid:\n\tmov\tr8 = CPUID[r32] ;;\n\tbr.ret.sptk.many rp ;;\n\t.endp\t_getcpuid\n\t.global\tgetcpuid\n\t.proc\tgetcpuid\ngetcpuid:\n\tmov\tr8 = CPUID[r32] ;;\n\tbr.ret.sptk.many rp ;;\n\t.endp\tgetcpuid\nEOF\n  cat >${dummy}b.c <<EOF\n#include <stdio.h>\nunsigned long long getcpuid ();\nint\nmain ()\n{\n  if (getcpuid(0LL) == 0x49656E69756E6547LL && getcpuid(1LL) == 0x6C65746ELL)\n    {\n      /* \"GenuineIntel\" */\n      switch ((getcpuid(3LL) >> 24) & 0xFF) {\n      case 0x07: puts (\"itanium\");  break;\n      case 0x1F: puts (\"itanium2\"); break;\n      }\n    }\n  return 0;\n}\nEOF\n  if $CC_FOR_BUILD ${dummy}a.s ${dummy}b.c -o $dummy >/dev/null 2>&1; then\n    exact_cpu=`./$dummy`\n  fi\n  rm -f ${dummy}a.s ${dummy}a.o ${dummy}b.c ${dummy}b.o $dummy $dummy.core core\n  ;;\n\nrs6000-*-* | powerpc*-*-*)\n  # Enhancement: On MacOS the \"machine\" command prints for instance\n  # \"ppc750\".  Interestingly on powerpc970-apple-darwin6.8.5 it prints\n  # \"ppc970\" where there's no actual #define for 970 from NXGetLocalArchInfo\n  # (as noted below).  But the man page says the command is still \"under\n  # development\", so it doesn't seem wise to use it just yet, not while\n  # there's an alternative.\n  #\n  # Try to read the PVR.  mfpvr is a protected instruction, NetBSD, MacOS\n  # and AIX don't allow it in user mode, but the Linux kernel does.\n  #\n  # Using explicit bytes for mfpvr avoids worrying about assembler syntax\n  # and underscores.  \"char\"s are used instead of \"int\"s to avoid worrying\n  # whether sizeof(int)==4 or if it's the right endianness.\n  #\n  # Note this is no good on AIX, since a C function there is the address of\n  # a function descriptor, not actual code.  But this doesn't matter since\n  # AIX doesn't allow mfpvr anyway.\n  #\n  cat >$dummy.c <<\\EOF\n#include <stdio.h>\nstruct {\n  int   n;  /* force 4-byte alignment */\n  char  a[8];\n} getpvr = {\n  0,\n  {\n    0x7c, 0x7f, 0x42, 0xa6,  /* mfpvr r3 */\n    0x4e, 0x80, 0x00, 0x20,  /* blr      */\n  }\n};\nint\nmain ()\n{\n  unsigned  (*fun)();\n  unsigned  pvr;\n\n  /* a separate \"fun\" variable is necessary for gcc 2.95.2 on MacOS,\n     it gets a compiler error on a combined cast and call */\n  fun = (unsigned (*)()) getpvr.a;\n  pvr = (*fun) ();\n\n  switch (pvr >> 16) {\n  case 0x0001: puts (\"powerpc601\");  break;\n  case 0x0003: puts (\"powerpc603\");  break;\n  case 0x0004: puts (\"powerpc604\");  break;\n  case 0x0006: puts (\"powerpc603e\"); break;\n  case 0x0007: puts (\"powerpc603e\"); break;  /* 603ev */\n  case 0x0008: puts (\"powerpc750\");  break;\n  case 0x0009: puts (\"powerpc604e\"); break;\n  case 0x000a: puts (\"powerpc604e\"); break;  /* 604ev5 */\n  case 0x000c: puts (\"powerpc7400\"); break;\n  case 0x0041: puts (\"powerpc630\");  break;\n  case 0x0050: puts (\"powerpc860\");  break;\n  case 0x8000: puts (\"powerpc7450\"); break;\n  case 0x8001: puts (\"powerpc7455\"); break;\n  case 0x8002: puts (\"powerpc7457\"); break;\n  case 0x800c: puts (\"powerpc7410\"); break;\n  }\n  return 0;\n}\nEOF\n  if ($CC_FOR_BUILD $dummy.c -o $dummy) >/dev/null 2>&1; then\n    # This style construct is needed on AIX 4.3 to suppress the SIGILL error\n    # from (*fun)().  Using $SHELL -c ./$dummy 2>/dev/null doesn't work.\n    { x=`./$dummy`; } 2>/dev/null\n    if test -n \"$x\"; then\n      exact_cpu=$x\n    fi\n  fi\n  rm -f $dummy.c $dummy.o $dummy $dummy.core\n\n  # Grep the linux kernel /proc/cpuinfo pseudo-file.\n  # Anything unrecognised is ignored, since of course we mustn't spit out\n  # a cpu type config.sub doesn't know.\n  if test -z \"$exact_cpu\" && test -f /proc/cpuinfo; then\n    x=`grep \"^cpu[ \t]\" /proc/cpuinfo | head -n 1`\n    x=`echo $x | sed -n 's/^cpu[ \t]*:[ \t]*\\([A-Za-z0-9]*\\).*/\\1/p'`\n    x=`echo $x | sed 's/PPC//'`\n    case $x in\n      601)     exact_cpu=\"power\" ;;\n      603ev)   exact_cpu=\"powerpc603e\" ;;\n      604ev5)  exact_cpu=\"powerpc604e\" ;;\n      603 | 603e | 604 | 604e | 750 | 821 | 860 | 970)\n        exact_cpu=\"powerpc$x\" ;;\n      POWER[4-9])\n        exact_cpu=`echo $x | sed \"s;POWER;power;\"` ;;\n    esac\n  fi\n\n  if test -z \"$exact_cpu\"; then\n    # On AIX, try looking at _system_configuration.  This is present in\n    # version 4 at least.\n    cat >$dummy.c <<EOF\n#include <stdio.h>\n#include <sys/systemcfg.h>\nint\nmain ()\n{\n  switch (_system_configuration.implementation) {\n  /* Old versions of AIX don't have all these constants,\n     use ifdef for safety. */\n#ifdef POWER_RS2\n  case POWER_RS2:    puts (\"power2\");     break;\n#endif\n#ifdef POWER_601\n  case POWER_601:    puts (\"power\");      break;\n#endif\n#ifdef POWER_603\n  case POWER_603:    puts (\"powerpc603\"); break;\n#endif\n#ifdef POWER_604\n  case POWER_604:    puts (\"powerpc604\"); break;\n#endif\n#ifdef POWER_620\n  case POWER_620:    puts (\"powerpc620\"); break;\n#endif\n#ifdef POWER_630\n  case POWER_630:    puts (\"powerpc630\"); break;\n#endif\n  /* Dunno what this is, leave it out for now.\n  case POWER_A35:    puts (\"powerpca35\"); break;\n  */\n  /* This is waiting for a bit more info.\n  case POWER_RS64II: puts (\"powerpcrs64ii\"); break;\n  */\n  default:\n    if (_system_configuration.architecture == POWER_RS)\n      puts (\"power\");\n    else if (_system_configuration.width == 64)\n      puts (\"powerpc64\");\n  }\n  return 0;\n}\nEOF\n    if ($CC_FOR_BUILD $dummy.c -o $dummy) >/dev/null 2>&1; then\n      x=`./$dummy`\n      if test -n \"$x\"; then\n        exact_cpu=$x\n      fi\n    fi\n    rm -f $dummy.c $dummy.o $dummy\n  fi\n\n  if test -z \"$exact_cpu\"; then\n    # On MacOS X (or any Mach-O presumably), NXGetLocalArchInfo cpusubtype\n    # can tell us the exact cpu.\n    cat >$dummy.c <<EOF\n#include <stdio.h>\n#include <mach-o/arch.h>\nint\nmain (void)\n{\n  const NXArchInfo *a = NXGetLocalArchInfo();\n  if (a->cputype == CPU_TYPE_POWERPC)\n    {\n      switch (a->cpusubtype) {\n      /* The following known to Darwin 1.3. */\n      case CPU_SUBTYPE_POWERPC_601:   puts (\"powerpc601\");  break;\n      case CPU_SUBTYPE_POWERPC_602:   puts (\"powerpc602\");  break;\n      case CPU_SUBTYPE_POWERPC_603:   puts (\"powerpc603\");  break;\n      case CPU_SUBTYPE_POWERPC_603e:  puts (\"powerpc603e\"); break;\n      case CPU_SUBTYPE_POWERPC_603ev: puts (\"powerpc603e\"); break;\n      case CPU_SUBTYPE_POWERPC_604:   puts (\"powerpc604\");  break;\n      case CPU_SUBTYPE_POWERPC_604e:  puts (\"powerpc604e\"); break;\n      case CPU_SUBTYPE_POWERPC_620:   puts (\"powerpc620\");  break;\n      case CPU_SUBTYPE_POWERPC_750:   puts (\"powerpc750\");  break;\n      case CPU_SUBTYPE_POWERPC_7400:  puts (\"powerpc7400\"); break;\n      case CPU_SUBTYPE_POWERPC_7450:  puts (\"powerpc7450\"); break;\n      /* Darwin 6.8.5 doesn't define a constant for 970, but gives 100 */\n      case 100:                       puts (\"powerpc970\");  break;\n      }\n    }\n  return 0;\n}\nEOF\n    if ($CC_FOR_BUILD $dummy.c -o $dummy) >/dev/null 2>&1; then\n      x=`./$dummy`\n      if test -n \"$x\"; then\n        exact_cpu=$x\n      fi\n    fi\n    rm -f $dummy.c $dummy.o $dummy\n  fi\n  ;;\n\nsparc-*-* | sparc64-*-*)\n  # If we can recognise an actual v7 then $exact_cpu is set to \"sparc\" so as\n  # to short-circuit subsequent tests.\n\n  # Grep the linux kernel /proc/cpuinfo pseudo-file.\n  # A typical line is \"cpu\\t\\t: TI UltraSparc II  (BlackBird)\"\n  # See arch/sparc/kernel/cpu.c and arch/sparc64/kernel/cpu.c.\n  #\n  if test -f /proc/cpuinfo; then\n    if grep 'cpu.*Cypress' /proc/cpuinfo >/dev/null; then\n      exact_cpu=\"sparc\"   # ie. v7\n    elif grep 'cpu.*Power-UP' /proc/cpuinfo >/dev/null; then\n      exact_cpu=\"sparc\"   # ie. v7\n    elif grep 'cpu.*HyperSparc' /proc/cpuinfo >/dev/null; then\n      exact_cpu=\"sparcv8\"\n    elif grep 'cpu.*SuperSparc' /proc/cpuinfo >/dev/null; then\n      exact_cpu=\"supersparc\"\n    elif grep 'cpu.*MicroSparc' /proc/cpuinfo >/dev/null; then\n      exact_cpu=\"microsparc\"\n    elif grep 'cpu.*MB86904' /proc/cpuinfo >/dev/null; then\n      # actually MicroSPARC-II\n      exact_cpu=microsparc\n    elif grep 'cpu.*UltraSparc T1' /proc/cpuinfo >/dev/null; then\n      # this grep pattern has not been tested against any Linux\n      exact_cpu=\"ultrasparct1\"\n    elif grep 'cpu.*UltraSparc III' /proc/cpuinfo >/dev/null; then\n      exact_cpu=\"ultrasparc3\"\n    elif grep 'cpu.*UltraSparc IIi' /proc/cpuinfo >/dev/null; then\n      exact_cpu=\"ultrasparc2i\"\n    elif grep 'cpu.*UltraSparc II' /proc/cpuinfo >/dev/null; then\n      exact_cpu=\"ultrasparc2\"\n    elif grep 'cpu.*UltraSparc' /proc/cpuinfo >/dev/null; then\n      exact_cpu=\"ultrasparc\"\n    fi\n  fi\n\n  # Grep the output from sysinfo on SunOS.\n  # sysinfo has been seen living in /bin or in /usr/kvm\n  #\tcpu0 is a \"SuperSPARC Model 41 SPARCmodule\" CPU\n  #\tcpu0 is a \"75 MHz TI,TMS390Z55\" CPU\n  #\n  if test -z \"$exact_cpu\"; then\n    for i in sysinfo /usr/kvm/sysinfo; do\n      if $SHELL -c $i 2>/dev/null >conftest.dat; then\n        if grep 'cpu0 is a \"SuperSPARC' conftest.dat >/dev/null; then\n          exact_cpu=supersparc\n          break\n        elif grep 'cpu0 is a .*TMS390Z5.' conftest.dat >/dev/null; then\n          # TMS390Z50 and TMS390Z55\n          exact_cpu=supersparc\n          break\n        fi\n      fi\n    done\n    rm -f conftest.dat\n  fi\n\n  # Grep the output from prtconf on Solaris.\n  # Use an explicit /usr/sbin, since that directory might not be in a normal\n  # user's path.\n  #\n  #     SUNW,UltraSPARC (driver not attached)\n  #     SUNW,UltraSPARC-II (driver not attached)\n  #     SUNW,UltraSPARC-IIi (driver not attached)\n  #     SUNW,UltraSPARC-III+ (driver not attached)\n  #     Ross,RT625 (driver not attached)\n  #     TI,TMS390Z50 (driver not attached)\n  #\n  # /usr/sbin/sysdef prints similar information, but includes all loadable\n  # cpu modules, not just the real cpu.\n  #\n  # We first try a plain prtconf, since that is known to work on older systems.\n  # But for newer T1 systems, that doesn't produce any useful output, we need\n  # \"prtconf -vp\" there.\n  #\n  for prtconfopt in \"\" \"-vp\"; do\n    if test -z \"$exact_cpu\"; then\n      if $SHELL -c \"/usr/sbin/prtconf $prtconfopt\" 2>/dev/null >conftest.dat; then\n\tif grep 'SUNW,UltraSPARC-T1' conftest.dat >/dev/null; then\n\t  exact_cpu=ultrasparct1\n\telif grep 'SUNW,UltraSPARC-III' conftest.dat >/dev/null; then\n\t  exact_cpu=ultrasparc3\n\telif grep 'SUNW,UltraSPARC-IIi' conftest.dat >/dev/null; then\n\t  exact_cpu=ultrasparc2i\n\telif grep 'SUNW,UltraSPARC-II' conftest.dat >/dev/null; then\n\t  exact_cpu=ultrasparc2\n\telif grep 'SUNW,UltraSPARC' conftest.dat >/dev/null; then\n\t  exact_cpu=ultrasparc\n\telif grep 'Ross,RT62.' conftest.dat >/dev/null; then\n\t  # RT620, RT625, RT626 hypersparcs (v8).\n\t  exact_cpu=sparcv8\n\telif grep 'TI,TMS390Z5.' conftest.dat >/dev/null; then\n\t  # TMS390Z50 and TMS390Z55\n\t  exact_cpu=supersparc\n\telif grep 'TI,TMS390S10' conftest.dat >/dev/null; then\n\t  exact_cpu=microsparc\n\telif grep 'FMI,MB86904' conftest.dat >/dev/null; then\n\t  # actually MicroSPARC-II\n\t  exact_cpu=microsparc\n\tfi\n      fi\n      rm -f conftest.dat\n    fi\n  done\n\n  # Grep the output from sysctl hw.model on sparc or sparc64 *BSD.\n  # Use an explicit /sbin, since that directory might not be in a normal\n  # user's path.  Example outputs,\n  #\n  #     hw.model: Sun Microsystems UltraSparc-IIi\n  #\n  if test -z \"$exact_cpu\"; then\n    if $SHELL -c \"/sbin/sysctl hw.model\" 2>/dev/null >conftest.dat; then\n      if grep 'UltraSparc-T1' conftest.dat >/dev/null; then\n\t# this grep pattern has not been tested against any BSD\n        exact_cpu=ultrasparct1\n      elif grep 'UltraSparc-III' conftest.dat >/dev/null; then\n        exact_cpu=ultrasparc3\n      elif grep 'UltraSparc-IIi' conftest.dat >/dev/null; then\n        exact_cpu=ultrasparc2i\n      elif grep 'UltraSparc-II' conftest.dat >/dev/null; then\n        exact_cpu=ultrasparc2\n      elif grep 'UltraSparc' conftest.dat >/dev/null; then\n        exact_cpu=ultrasparc\n      elif grep 'TMS390Z5.' conftest.dat >/dev/null; then\n        # TMS390Z50 and TMS390Z55\n        exact_cpu=supersparc\n      elif grep 'TMS390S10' conftest.dat >/dev/null; then\n        exact_cpu=microsparc\n      elif grep 'MB86904' conftest.dat >/dev/null; then\n        # actually MicroSPARC-II\n        exact_cpu=microsparc\n      elif grep 'MB86907' conftest.dat >/dev/null; then\n        exact_cpu=turbosparc\n      fi\n    fi\n    rm -f conftest.dat\n  fi\n\n  # sun4m and sun4d are v8s of some sort, sun4u is a v9 of some sort\n  #\n  if test -z \"$exact_cpu\"; then\n    case `uname -m` in\n      sun4[md]) exact_cpu=sparcv8 ;;\n      sun4u)    exact_cpu=sparcv9 ;;\n    esac\n  fi\n  ;;\n\ni?86-*-*|x86_64-*-*|amd64-*-*)\n    cat <<EOF >${dummy}032.s\n\t.globl cpuid\n\t.globl _cpuid\ncpuid:\n_cpuid:\n\tpushl %esi\n\tpushl %ebx\n\tmovl 16(%esp),%eax\n\t.byte 0x0f\n\t.byte 0xa2\n\tmovl 12(%esp),%esi\n\tmovl %ebx,(%esi)\n\tmovl %edx,4(%esi)\n\tmovl %ecx,8(%esi)\n\tpopl %ebx\n\tpopl %esi\n\tret\nEOF\n  cat <<EOF >${dummy}064.s\n\t.globl cpuid\n\t.globl _cpuid\ncpuid:\n_cpuid:\n\tmovq %rbx,%r11\n\tmovq %rdi,%r9\n\tmovq %rsi,%rax\n\torq  %r8,%r8\n\tcmovne %rcx,%r9\n\tcmovne %rdx,%rax\t\n\t.byte 0x0f\n\t.byte 0xa2\n\tmovl %ebx,(%r9)\n\tmovl %edx,4(%r9)\n\tmovl %ecx,8(%r9)\n\tmovq %r11,%rbx\n\tret\nEOF\n  cat <<EOF >${dummy}32.c\n#define CONFIG_GUESS\t\t1\n#define CONFIG_GUESS_32BIT\t1\n#define CONFIG_GUESS_64BIT\t0\n#define FAT32\t\t\t0\n#define FAT64\t\t\t0\n#define INFAT\t\t\t0\nEOF\ncat $cpuid_c_path  >> ${dummy}32.c\n  cat <<EOF >>${dummy}32.c\nmain (){char *modelstr;\n  modelstr=__gmpn_cpu(0);\n  printf (\"%s\\n\", modelstr);\n  return 0;}\nEOF\n  cat <<EOF >${dummy}64.c\n#define CONFIG_GUESS\t\t1\n#define CONFIG_GUESS_32BIT\t0\n#define CONFIG_GUESS_64BIT\t1\n#define FAT32\t\t\t0\n#define FAT64\t\t\t0\n#define INFAT\t\t\t0\nEOF\ncat $cpuid_c_path  >> ${dummy}64.c\n  cat <<EOF >>${dummy}64.c\nmain (){char *modelstr;\n  modelstr=__gmpn_cpu(0);\n  printf (\"%s\\n\", modelstr);\n  return 0;}\nEOF\n\n  if ($CC_FOR_BUILD ${dummy}064.s ${dummy}64.c -o $dummy) >/dev/null 2>&1; then\n    # On 80386 and early 80486 cpuid is not available and will result in a\n    # SIGILL message, hence 2>/dev/null.\n    #\n    # On i386-unknown-freebsd4.9, \"/bin/sh -c ./dummy\" seems to send an\n    # \"Illegal instruction (core dumped)\" message to stdout, so we test $?\n    # to check if the program run was successful.\n    #\n    x=`$SHELL -c ./$dummy 2>/dev/null` \n    if test $? = 0 && test -n \"$x\"; then\n      exact_cpu=$x\n    fi\n  fi\n  if test -z \"$exact_cpu\"; then  \n  if ($CC_FOR_BUILD ${dummy}032.s ${dummy}32.c -o $dummy) >/dev/null 2>&1; then\n    # On 80386 and early 80486 cpuid is not available and will result in a\n    # SIGILL message, hence 2>/dev/null.\n    #\n    # On i386-unknown-freebsd4.9, \"/bin/sh -c ./dummy\" seems to send an\n    # \"Illegal instruction (core dumped)\" message to stdout, so we test $?\n    # to check if the program run was successful.\n    #\n    #  NOTE : Pathscale CC doesn't like name1.s name2.c with name1==name2\n    x=`$SHELL -c ./$dummy 2>/dev/null`\n    if test $? = 0 && test -n \"$x\"; then\n      exact_cpu=$x\n    fi\n  fi\n  fi\n  \n  # We need to remove some .o files here since lame C compilers\n  # generate these even when not asked.\n  rm -f ${dummy}032.s ${dummy}32.o ${dummy}32.c ${dummy}032.o ${dummy}064.s ${dummy}64.o ${dummy}64.c ${dummy}064.o $dummy ${dummy}.exe\n  ;;\n\nesac\n\n\n\n# -------------------------------------------------------------------------\n# Use an exact cpu, if possible\n\nif test -n \"$exact_cpu\"; then\n  echo \"$exact_cpu$guess_rest\"\nelse\n  echo \"$guess_full\"\nfi\n\nexit 0\n\n\n\n# Local variables:\n# fill-column: 76\n# End:\n"
  },
  {
    "path": "config.sub",
    "content": "#! /bin/sh\n#\n# GMP config.sub wrapper.\n\n\n# Copyright 2000, 2001, 2002, 2003, 2006 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published\n# by the Free Software Foundation; either version 2.1 of the License, or (at\n# your option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\n# Usage: config.sub CPU-VENDOR-OS\n#        config.sub ALIAS\n#\n# Validate and canonicalize the given configuration name, with special\n# handling for GMP extra CPU names.\n#\n# When the CPU isn't special the whole name is simply passed straight\n# through to configfsf.sub.\n#\n# When the CPU is a GMP extra, configfsf.sub is run on a similar CPU that it\n# will recognise.  For example \"athlon-pc-freebsd3.5\" is validated using\n# \"i386-pc-freebsd3.5\".\n#\n# Any canonicalizations made by configfsf.sub are preserved.  For example\n# given \"athlon-linux\", configfsf.sub is called with \"i386-linux\" and will\n# give back \"i386-pc-linux-gnu\".  \"athlon\" is then reinstated, so we print\n# \"athlon-pc-linux-gnu\".\n\n\n# Expect to find configfsf.sub in the same directory as this config.sub\nconfigfsf_sub=\"`echo \\\"$0\\\" | sed 's/config.sub$/configfsf.sub/'`\"\nif test \"$configfsf_sub\" = \"$0\"; then\n  echo \"Cannot derive configfsf.sub from $0\" 1>&2\n  exit 1\nfi\nif test -f \"$configfsf_sub\"; then\n  :\nelse\n  echo \"$configfsf_sub not found\" 1>&2\n  exit 1\nfi\n\n# Always run configfsf.sub with $SHELL, like autoconf does for config.sub\nSHELL=${CONFIG_SHELL-/bin/sh}\n\n# Identify ourselves on --version, --help, etc\ncase \"$1\" in\n\"\" | -*)\n  echo \"(GNU MP wrapped config.sub)\" 1>&2\n  $SHELL $configfsf_sub \"$@\"\n  exit\n  ;;\nesac\n\ngiven_full=\"$1\"\ngiven_cpu=`echo \"$given_full\" | sed 's/-.*$//'`\ngiven_rest=`echo \"$given_full\" | sed 's/^[^-]*//'`\n\n\n# Aliases for GMP extras\ncase \"$given_cpu\" in\n  # configfsf.sub turns p5 into i586, instead use our exact cpu type\n  p5 | p54)   given_cpu=pentium ;;\n  p55)        given_cpu=pentiummmx ;;\n\n  # configfsf.sub turns p6, pentiumii and pentiumiii into i686, instead use\n  # our exact cpu types\n  p6)         given_cpu=pentiumpro ;;\n  pentiumii)  given_cpu=pentium2 ;;\n  pentiumiii) given_cpu=pentium3 ;;\nesac\ngiven_full=\"$given_cpu$given_rest\"\n\n\n# GMP extras and what to use for the config.sub test\ncase \"$given_cpu\" in\nitanium | itanium2)\n  test_cpu=ia64 ;;\npentium | pentiummmx | pentiumpro | pentium[234] | k[56] | k6[23] | athlon | k7 | viac3*)\n  test_cpu=i386 ;;\npower[2-9] | power2sc)\n  test_cpu=power ;;\npowerpc401 | powerpc403 | powerpc405 | \\\npowerpc505 | \\\npowerpc601 | powerpc602  | \\\npowerpc603 | powerpc603e | \\\npowerpc604 | powerpc604e | \\\npowerpc620 | powerpc630  | powerpc970  | \\\npowerpc740 | powerpc7400 | powerpc7450 | powerpc750  | \\\npowerpc801 | powerpc821 | powerpc823  | powerpc860 | \\\npowerpc64)\n  test_cpu=powerpc ;;\nsparcv8 | supersparc | microsparc | \\\nultrasparc | ultrasparc2 | ultrasparc2i | ultrasparc3 | ultrasparct1)\n  test_cpu=sparc ;;\nsh2)\n  test_cpu=sh ;;\n\n*)\n  # Don't need or want to change the given name, just run configfsf.sub\n  $SHELL $configfsf_sub \"$given_full\"\n  if test $? = 0; then\n    exit 0\n  else\n    echo \"(GNU MP wrapped config.sub, testing \\\"$given_full\\\")\"\n    exit 1\n  fi\nesac\n\n\ntest_full=\"$test_cpu$given_rest\"\ncanonical_full=`$SHELL $configfsf_sub \"$test_full\"`\nif test $? = 0; then\n  :\nelse\n  echo \"(GNU MP wrapped config.sub, testing \\\"$given_full\\\" as \\\"$test_full\\\")\"\n  exit 1\nfi\n\ncanonical_rest=`echo \"$canonical_full\" | sed 's/^[^-]*//'`\necho \"$given_cpu$canonical_rest\"\nexit 0\n\n\n\n# Local variables:\n# fill-column: 76\n# End:\n"
  },
  {
    "path": "configfsf.guess",
    "content": "#! /bin/sh\n# Attempt to guess a canonical system name.\n#   Copyright 1992-2015 Free Software Foundation, Inc.\n\ntimestamp='2015-03-04'\n\n# This file is free software; you can redistribute it and/or modify it\n# under the terms of the GNU General Public License as published by\n# the Free Software Foundation; either version 3 of the License, or\n# (at your option) any later version.\n#\n# This program is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of\n# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n# General Public License for more details.\n#\n# You should have received a copy of the GNU General Public License\n# along with this program; if not, see <http://www.gnu.org/licenses/>.\n#\n# As a special exception to the GNU General Public License, if you\n# distribute this file as part of a program that contains a\n# configuration script generated by Autoconf, you may include it under\n# the same distribution terms that you use for the rest of that\n# program.  This Exception is an additional permission under section 7\n# of the GNU General Public License, version 3 (\"GPLv3\").\n#\n# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.\n#\n# You can get the latest version of this script from:\n# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD\n#\n# Please send patches to <config-patches@gnu.org>.\n\n\nme=`echo \"$0\" | sed -e 's,.*/,,'`\n\nusage=\"\\\nUsage: $0 [OPTION]\n\nOutput the configuration name of the system \\`$me' is run on.\n\nOperation modes:\n  -h, --help         print this help, then exit\n  -t, --time-stamp   print date of last modification, then exit\n  -v, --version      print version number, then exit\n\nReport bugs and patches to <config-patches@gnu.org>.\"\n\nversion=\"\\\nGNU config.guess ($timestamp)\n\nOriginally written by Per Bothner.\nCopyright 1992-2015 Free Software Foundation, Inc.\n\nThis is free software; see the source for copying conditions.  There is NO\nwarranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\"\n\nhelp=\"\nTry \\`$me --help' for more information.\"\n\n# Parse command line\nwhile test $# -gt 0 ; do\n  case $1 in\n    --time-stamp | --time* | -t )\n       echo \"$timestamp\" ; exit ;;\n    --version | -v )\n       echo \"$version\" ; exit ;;\n    --help | --h* | -h )\n       echo \"$usage\"; exit ;;\n    -- )     # Stop option processing\n       shift; break ;;\n    - )\t# Use stdin as input.\n       break ;;\n    -* )\n       echo \"$me: invalid option $1$help\" >&2\n       exit 1 ;;\n    * )\n       break ;;\n  esac\ndone\n\nif test $# != 0; then\n  echo \"$me: too many arguments$help\" >&2\n  exit 1\nfi\n\ntrap 'exit 1' 1 2 15\n\n# CC_FOR_BUILD -- compiler used by this script. Note that the use of a\n# compiler to aid in system detection is discouraged as it requires\n# temporary files to be created and, as you can see below, it is a\n# headache to deal with in a portable fashion.\n\n# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still\n# use `HOST_CC' if defined, but it is deprecated.\n\n# Portable tmp directory creation inspired by the Autoconf team.\n\nset_cc_for_build='\ntrap \"exitcode=\\$?; (rm -f \\$tmpfiles 2>/dev/null; rmdir \\$tmp 2>/dev/null) && exit \\$exitcode\" 0 ;\ntrap \"rm -f \\$tmpfiles 2>/dev/null; rmdir \\$tmp 2>/dev/null; exit 1\" 1 2 13 15 ;\n: ${TMPDIR=/tmp} ;\n { tmp=`(umask 077 && mktemp -d \"$TMPDIR/cgXXXXXX\") 2>/dev/null` && test -n \"$tmp\" && test -d \"$tmp\" ; } ||\n { test -n \"$RANDOM\" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||\n { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo \"Warning: creating insecure temp directory\" >&2 ; } ||\n { echo \"$me: cannot create a temporary directory in $TMPDIR\" >&2 ; exit 1 ; } ;\ndummy=$tmp/dummy ;\ntmpfiles=\"$dummy.c $dummy.o $dummy.rel $dummy\" ;\ncase $CC_FOR_BUILD,$HOST_CC,$CC in\n ,,)    echo \"int x;\" > $dummy.c ;\n\tfor c in cc gcc c89 c99 ; do\n\t  if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then\n\t     CC_FOR_BUILD=\"$c\"; break ;\n\t  fi ;\n\tdone ;\n\tif test x\"$CC_FOR_BUILD\" = x ; then\n\t  CC_FOR_BUILD=no_compiler_found ;\n\tfi\n\t;;\n ,,*)   CC_FOR_BUILD=$CC ;;\n ,*,*)  CC_FOR_BUILD=$HOST_CC ;;\nesac ; set_cc_for_build= ;'\n\n# This is needed to find uname on a Pyramid OSx when run in the BSD universe.\n# (ghazi@noc.rutgers.edu 1994-08-24)\nif (test -f /.attbin/uname) >/dev/null 2>&1 ; then\n\tPATH=$PATH:/.attbin ; export PATH\nfi\n\nUNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown\nUNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown\nUNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown\nUNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown\n\ncase \"${UNAME_SYSTEM}\" in\nLinux|GNU|GNU/*)\n\t# If the system lacks a compiler, then just pick glibc.\n\t# We could probably try harder.\n\tLIBC=gnu\n\n\teval $set_cc_for_build\n\tcat <<-EOF > $dummy.c\n\t#include <features.h>\n\t#if defined(__UCLIBC__)\n\tLIBC=uclibc\n\t#elif defined(__dietlibc__)\n\tLIBC=dietlibc\n\t#else\n\tLIBC=gnu\n\t#endif\n\tEOF\n\teval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`\n\t;;\nesac\n\n# Note: order is significant - the case branches are not exclusive.\n\ncase \"${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}\" in\n    *:NetBSD:*:*)\n\t# NetBSD (nbsd) targets should (where applicable) match one or\n\t# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,\n\t# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently\n\t# switched to ELF, *-*-netbsd* would select the old\n\t# object file format.  This provides both forward\n\t# compatibility and a consistent mechanism for selecting the\n\t# object file format.\n\t#\n\t# Note: NetBSD doesn't particularly care about the vendor\n\t# portion of the name.  We always set it to \"unknown\".\n\tsysctl=\"sysctl -n hw.machine_arch\"\n\tUNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \\\n\t    /sbin/$sysctl 2>/dev/null || \\\n\t    /usr/sbin/$sysctl 2>/dev/null || \\\n\t    echo unknown)`\n\tcase \"${UNAME_MACHINE_ARCH}\" in\n\t    armeb) machine=armeb-unknown ;;\n\t    arm*) machine=arm-unknown ;;\n\t    sh3el) machine=shl-unknown ;;\n\t    sh3eb) machine=sh-unknown ;;\n\t    sh5el) machine=sh5le-unknown ;;\n\t    earmv*)\n\t\tarch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\\(armv[0-9]\\).*$,\\1,'`\n\t\tendian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\\(eb\\)$,\\1,p'`\n\t\tmachine=${arch}${endian}-unknown\n\t\t;;\n\t    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;\n\tesac\n\t# The Operating System including object format, if it has switched\n\t# to ELF recently, or will in the future.\n\tcase \"${UNAME_MACHINE_ARCH}\" in\n\t    arm*|earm*|i386|m68k|ns32k|sh3*|sparc|vax)\n\t\teval $set_cc_for_build\n\t\tif echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \\\n\t\t\t| grep -q __ELF__\n\t\tthen\n\t\t    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).\n\t\t    # Return netbsd for either.  FIX?\n\t\t    os=netbsd\n\t\telse\n\t\t    os=netbsdelf\n\t\tfi\n\t\t;;\n\t    *)\n\t\tos=netbsd\n\t\t;;\n\tesac\n\t# Determine ABI tags.\n\tcase \"${UNAME_MACHINE_ARCH}\" in\n\t    earm*)\n\t\texpr='s/^earmv[0-9]/-eabi/;s/eb$//'\n\t\tabi=`echo ${UNAME_MACHINE_ARCH} | sed -e \"$expr\"`\n\t\t;;\n\tesac\n\t# The OS release\n\t# Debian GNU/NetBSD machines have a different userland, and\n\t# thus, need a distinct triplet. However, they do not need\n\t# kernel version information, so it can be replaced with a\n\t# suitable tag, in the style of linux-gnu.\n\tcase \"${UNAME_VERSION}\" in\n\t    Debian*)\n\t\trelease='-gnu'\n\t\t;;\n\t    *)\n\t\trelease=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\\./'`\n\t\t;;\n\tesac\n\t# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:\n\t# contains redundant information, the shorter form:\n\t# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.\n\techo \"${machine}-${os}${release}${abi}\"\n\texit ;;\n    *:Bitrig:*:*)\n\tUNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`\n\techo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE}\n\texit ;;\n    *:OpenBSD:*:*)\n\tUNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`\n\techo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}\n\texit ;;\n    *:ekkoBSD:*:*)\n\techo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}\n\texit ;;\n    *:SolidBSD:*:*)\n\techo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}\n\texit ;;\n    macppc:MirBSD:*:*)\n\techo powerpc-unknown-mirbsd${UNAME_RELEASE}\n\texit ;;\n    *:MirBSD:*:*)\n\techo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}\n\texit ;;\n    alpha:OSF1:*:*)\n\tcase $UNAME_RELEASE in\n\t*4.0)\n\t\tUNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`\n\t\t;;\n\t*5.*)\n\t\tUNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`\n\t\t;;\n\tesac\n\t# According to Compaq, /usr/sbin/psrinfo has been available on\n\t# OSF/1 and Tru64 systems produced since 1995.  I hope that\n\t# covers most systems running today.  This code pipes the CPU\n\t# types through head -n 1, so we only detect the type of CPU 0.\n\tALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \\(.*\\) processor.*$/\\1/p' | head -n 1`\n\tcase \"$ALPHA_CPU_TYPE\" in\n\t    \"EV4 (21064)\")\n\t\tUNAME_MACHINE=\"alpha\" ;;\n\t    \"EV4.5 (21064)\")\n\t\tUNAME_MACHINE=\"alpha\" ;;\n\t    \"LCA4 (21066/21068)\")\n\t\tUNAME_MACHINE=\"alpha\" ;;\n\t    \"EV5 (21164)\")\n\t\tUNAME_MACHINE=\"alphaev5\" ;;\n\t    \"EV5.6 (21164A)\")\n\t\tUNAME_MACHINE=\"alphaev56\" ;;\n\t    \"EV5.6 (21164PC)\")\n\t\tUNAME_MACHINE=\"alphapca56\" ;;\n\t    \"EV5.7 (21164PC)\")\n\t\tUNAME_MACHINE=\"alphapca57\" ;;\n\t    \"EV6 (21264)\")\n\t\tUNAME_MACHINE=\"alphaev6\" ;;\n\t    \"EV6.7 (21264A)\")\n\t\tUNAME_MACHINE=\"alphaev67\" ;;\n\t    \"EV6.8CB (21264C)\")\n\t\tUNAME_MACHINE=\"alphaev68\" ;;\n\t    \"EV6.8AL (21264B)\")\n\t\tUNAME_MACHINE=\"alphaev68\" ;;\n\t    \"EV6.8CX (21264D)\")\n\t\tUNAME_MACHINE=\"alphaev68\" ;;\n\t    \"EV6.9A (21264/EV69A)\")\n\t\tUNAME_MACHINE=\"alphaev69\" ;;\n\t    \"EV7 (21364)\")\n\t\tUNAME_MACHINE=\"alphaev7\" ;;\n\t    \"EV7.9 (21364A)\")\n\t\tUNAME_MACHINE=\"alphaev79\" ;;\n\tesac\n\t# A Pn.n version is a patched version.\n\t# A Vn.n version is a released version.\n\t# A Tn.n version is a released field test version.\n\t# A Xn.n version is an unreleased experimental baselevel.\n\t# 1.2 uses \"1.2\" for uname -r.\n\techo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`\n\t# Reset EXIT trap before exiting to avoid spurious non-zero exit code.\n\texitcode=$?\n\ttrap '' 0\n\texit $exitcode ;;\n    Alpha\\ *:Windows_NT*:*)\n\t# How do we know it's Interix rather than the generic POSIX subsystem?\n\t# Should we change UNAME_MACHINE based on the output of uname instead\n\t# of the specific Alpha model?\n\techo alpha-pc-interix\n\texit ;;\n    21064:Windows_NT:50:3)\n\techo alpha-dec-winnt3.5\n\texit ;;\n    Amiga*:UNIX_System_V:4.0:*)\n\techo m68k-unknown-sysv4\n\texit ;;\n    *:[Aa]miga[Oo][Ss]:*:*)\n\techo ${UNAME_MACHINE}-unknown-amigaos\n\texit ;;\n    *:[Mm]orph[Oo][Ss]:*:*)\n\techo ${UNAME_MACHINE}-unknown-morphos\n\texit ;;\n    *:OS/390:*:*)\n\techo i370-ibm-openedition\n\texit ;;\n    *:z/VM:*:*)\n\techo s390-ibm-zvmoe\n\texit ;;\n    *:OS400:*:*)\n\techo powerpc-ibm-os400\n\texit ;;\n    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)\n\techo arm-acorn-riscix${UNAME_RELEASE}\n\texit ;;\n    arm*:riscos:*:*|arm*:RISCOS:*:*)\n\techo arm-unknown-riscos\n\texit ;;\n    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)\n\techo hppa1.1-hitachi-hiuxmpp\n\texit ;;\n    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)\n\t# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.\n\tif test \"`(/bin/universe) 2>/dev/null`\" = att ; then\n\t\techo pyramid-pyramid-sysv3\n\telse\n\t\techo pyramid-pyramid-bsd\n\tfi\n\texit ;;\n    NILE*:*:*:dcosx)\n\techo pyramid-pyramid-svr4\n\texit ;;\n    DRS?6000:unix:4.0:6*)\n\techo sparc-icl-nx6\n\texit ;;\n    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)\n\tcase `/usr/bin/uname -p` in\n\t    sparc) echo sparc-icl-nx7; exit ;;\n\tesac ;;\n    s390x:SunOS:*:*)\n\techo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`\n\texit ;;\n    sun4H:SunOS:5.*:*)\n\techo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`\n\texit ;;\n    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)\n\techo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`\n\texit ;;\n    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)\n\techo i386-pc-auroraux${UNAME_RELEASE}\n\texit ;;\n    i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)\n\teval $set_cc_for_build\n\tSUN_ARCH=\"i386\"\n\t# If there is a compiler, see if it is configured for 64-bit objects.\n\t# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.\n\t# This test works for both compilers.\n\tif [ \"$CC_FOR_BUILD\" != 'no_compiler_found' ]; then\n\t    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \\\n\t\t(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \\\n\t\tgrep IS_64BIT_ARCH >/dev/null\n\t    then\n\t\tSUN_ARCH=\"x86_64\"\n\t    fi\n\tfi\n\techo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`\n\texit ;;\n    sun4*:SunOS:6*:*)\n\t# According to config.sub, this is the proper way to canonicalize\n\t# SunOS6.  Hard to guess exactly what SunOS6 will be like, but\n\t# it's likely to be more like Solaris than SunOS4.\n\techo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`\n\texit ;;\n    sun4*:SunOS:*:*)\n\tcase \"`/usr/bin/arch -k`\" in\n\t    Series*|S4*)\n\t\tUNAME_RELEASE=`uname -v`\n\t\t;;\n\tesac\n\t# Japanese Language versions have a version number like `4.1.3-JL'.\n\techo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`\n\texit ;;\n    sun3*:SunOS:*:*)\n\techo m68k-sun-sunos${UNAME_RELEASE}\n\texit ;;\n    sun*:*:4.2BSD:*)\n\tUNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`\n\ttest \"x${UNAME_RELEASE}\" = \"x\" && UNAME_RELEASE=3\n\tcase \"`/bin/arch`\" in\n\t    sun3)\n\t\techo m68k-sun-sunos${UNAME_RELEASE}\n\t\t;;\n\t    sun4)\n\t\techo sparc-sun-sunos${UNAME_RELEASE}\n\t\t;;\n\tesac\n\texit ;;\n    aushp:SunOS:*:*)\n\techo sparc-auspex-sunos${UNAME_RELEASE}\n\texit ;;\n    # The situation for MiNT is a little confusing.  The machine name\n    # can be virtually everything (everything which is not\n    # \"atarist\" or \"atariste\" at least should have a processor\n    # > m68000).  The system name ranges from \"MiNT\" over \"FreeMiNT\"\n    # to the lowercase version \"mint\" (or \"freemint\").  Finally\n    # the system name \"TOS\" denotes a system which is actually not\n    # MiNT.  But MiNT is downward compatible to TOS, so this should\n    # be no problem.\n    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)\n\techo m68k-atari-mint${UNAME_RELEASE}\n\texit ;;\n    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)\n\techo m68k-atari-mint${UNAME_RELEASE}\n\texit ;;\n    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)\n\techo m68k-atari-mint${UNAME_RELEASE}\n\texit ;;\n    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)\n\techo m68k-milan-mint${UNAME_RELEASE}\n\texit ;;\n    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)\n\techo m68k-hades-mint${UNAME_RELEASE}\n\texit ;;\n    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)\n\techo m68k-unknown-mint${UNAME_RELEASE}\n\texit ;;\n    m68k:machten:*:*)\n\techo m68k-apple-machten${UNAME_RELEASE}\n\texit ;;\n    powerpc:machten:*:*)\n\techo powerpc-apple-machten${UNAME_RELEASE}\n\texit ;;\n    RISC*:Mach:*:*)\n\techo mips-dec-mach_bsd4.3\n\texit ;;\n    RISC*:ULTRIX:*:*)\n\techo mips-dec-ultrix${UNAME_RELEASE}\n\texit ;;\n    VAX*:ULTRIX*:*:*)\n\techo vax-dec-ultrix${UNAME_RELEASE}\n\texit ;;\n    2020:CLIX:*:* | 2430:CLIX:*:*)\n\techo clipper-intergraph-clix${UNAME_RELEASE}\n\texit ;;\n    mips:*:*:UMIPS | mips:*:*:RISCos)\n\teval $set_cc_for_build\n\tsed 's/^\t//' << EOF >$dummy.c\n#ifdef __cplusplus\n#include <stdio.h>  /* for printf() prototype */\n\tint main (int argc, char *argv[]) {\n#else\n\tint main (argc, argv) int argc; char *argv[]; {\n#endif\n\t#if defined (host_mips) && defined (MIPSEB)\n\t#if defined (SYSTYPE_SYSV)\n\t  printf (\"mips-mips-riscos%ssysv\\n\", argv[1]); exit (0);\n\t#endif\n\t#if defined (SYSTYPE_SVR4)\n\t  printf (\"mips-mips-riscos%ssvr4\\n\", argv[1]); exit (0);\n\t#endif\n\t#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)\n\t  printf (\"mips-mips-riscos%sbsd\\n\", argv[1]); exit (0);\n\t#endif\n\t#endif\n\t  exit (-1);\n\t}\nEOF\n\t$CC_FOR_BUILD -o $dummy $dummy.c &&\n\t  dummyarg=`echo \"${UNAME_RELEASE}\" | sed -n 's/\\([0-9]*\\).*/\\1/p'` &&\n\t  SYSTEM_NAME=`$dummy $dummyarg` &&\n\t    { echo \"$SYSTEM_NAME\"; exit; }\n\techo mips-mips-riscos${UNAME_RELEASE}\n\texit ;;\n    Motorola:PowerMAX_OS:*:*)\n\techo powerpc-motorola-powermax\n\texit ;;\n    Motorola:*:4.3:PL8-*)\n\techo powerpc-harris-powermax\n\texit ;;\n    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)\n\techo powerpc-harris-powermax\n\texit ;;\n    Night_Hawk:Power_UNIX:*:*)\n\techo powerpc-harris-powerunix\n\texit ;;\n    m88k:CX/UX:7*:*)\n\techo m88k-harris-cxux7\n\texit ;;\n    m88k:*:4*:R4*)\n\techo m88k-motorola-sysv4\n\texit ;;\n    m88k:*:3*:R3*)\n\techo m88k-motorola-sysv3\n\texit ;;\n    AViiON:dgux:*:*)\n\t# DG/UX returns AViiON for all architectures\n\tUNAME_PROCESSOR=`/usr/bin/uname -p`\n\tif [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]\n\tthen\n\t    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \\\n\t       [ ${TARGET_BINARY_INTERFACE}x = x ]\n\t    then\n\t\techo m88k-dg-dgux${UNAME_RELEASE}\n\t    else\n\t\techo m88k-dg-dguxbcs${UNAME_RELEASE}\n\t    fi\n\telse\n\t    echo i586-dg-dgux${UNAME_RELEASE}\n\tfi\n\texit ;;\n    M88*:DolphinOS:*:*)\t# DolphinOS (SVR3)\n\techo m88k-dolphin-sysv3\n\texit ;;\n    M88*:*:R3*:*)\n\t# Delta 88k system running SVR3\n\techo m88k-motorola-sysv3\n\texit ;;\n    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)\n\techo m88k-tektronix-sysv3\n\texit ;;\n    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)\n\techo m68k-tektronix-bsd\n\texit ;;\n    *:IRIX*:*:*)\n\techo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`\n\texit ;;\n    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.\n\techo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id\n\texit ;;               # Note that: echo \"'`uname -s`'\" gives 'AIX '\n    i*86:AIX:*:*)\n\techo i386-ibm-aix\n\texit ;;\n    ia64:AIX:*:*)\n\tif [ -x /usr/bin/oslevel ] ; then\n\t\tIBM_REV=`/usr/bin/oslevel`\n\telse\n\t\tIBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}\n\tfi\n\techo ${UNAME_MACHINE}-ibm-aix${IBM_REV}\n\texit ;;\n    *:AIX:2:3)\n\tif grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then\n\t\teval $set_cc_for_build\n\t\tsed 's/^\t\t//' << EOF >$dummy.c\n\t\t#include <sys/systemcfg.h>\n\n\t\tmain()\n\t\t\t{\n\t\t\tif (!__power_pc())\n\t\t\t\texit(1);\n\t\t\tputs(\"powerpc-ibm-aix3.2.5\");\n\t\t\texit(0);\n\t\t\t}\nEOF\n\t\tif $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`\n\t\tthen\n\t\t\techo \"$SYSTEM_NAME\"\n\t\telse\n\t\t\techo rs6000-ibm-aix3.2.5\n\t\tfi\n\telif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then\n\t\techo rs6000-ibm-aix3.2.4\n\telse\n\t\techo rs6000-ibm-aix3.2\n\tfi\n\texit ;;\n    *:AIX:*:[4567])\n\tIBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`\n\tif /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then\n\t\tIBM_ARCH=rs6000\n\telse\n\t\tIBM_ARCH=powerpc\n\tfi\n\tif [ -x /usr/bin/lslpp ] ; then\n\t\tIBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc |\n\t\t\t   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`\n\telse\n\t\tIBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}\n\tfi\n\techo ${IBM_ARCH}-ibm-aix${IBM_REV}\n\texit ;;\n    *:AIX:*:*)\n\techo rs6000-ibm-aix\n\texit ;;\n    ibmrt:4.4BSD:*|romp-ibm:BSD:*)\n\techo romp-ibm-bsd4.4\n\texit ;;\n    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and\n\techo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to\n\texit ;;                             # report: romp-ibm BSD 4.3\n    *:BOSX:*:*)\n\techo rs6000-bull-bosx\n\texit ;;\n    DPX/2?00:B.O.S.:*:*)\n\techo m68k-bull-sysv3\n\texit ;;\n    9000/[34]??:4.3bsd:1.*:*)\n\techo m68k-hp-bsd\n\texit ;;\n    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)\n\techo m68k-hp-bsd4.4\n\texit ;;\n    9000/[34678]??:HP-UX:*:*)\n\tHPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`\n\tcase \"${UNAME_MACHINE}\" in\n\t    9000/31? )            HP_ARCH=m68000 ;;\n\t    9000/[34]?? )         HP_ARCH=m68k ;;\n\t    9000/[678][0-9][0-9])\n\t\tif [ -x /usr/bin/getconf ]; then\n\t\t    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`\n\t\t    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`\n\t\t    case \"${sc_cpu_version}\" in\n\t\t      523) HP_ARCH=\"hppa1.0\" ;; # CPU_PA_RISC1_0\n\t\t      528) HP_ARCH=\"hppa1.1\" ;; # CPU_PA_RISC1_1\n\t\t      532)                      # CPU_PA_RISC2_0\n\t\t\tcase \"${sc_kernel_bits}\" in\n\t\t\t  32) HP_ARCH=\"hppa2.0n\" ;;\n\t\t\t  64) HP_ARCH=\"hppa2.0w\" ;;\n\t\t\t  '') HP_ARCH=\"hppa2.0\" ;;   # HP-UX 10.20\n\t\t\tesac ;;\n\t\t    esac\n\t\tfi\n\t\tif [ \"${HP_ARCH}\" = \"\" ]; then\n\t\t    eval $set_cc_for_build\n\t\t    sed 's/^\t\t//' << EOF >$dummy.c\n\n\t\t#define _HPUX_SOURCE\n\t\t#include <stdlib.h>\n\t\t#include <unistd.h>\n\n\t\tint main ()\n\t\t{\n\t\t#if defined(_SC_KERNEL_BITS)\n\t\t    long bits = sysconf(_SC_KERNEL_BITS);\n\t\t#endif\n\t\t    long cpu  = sysconf (_SC_CPU_VERSION);\n\n\t\t    switch (cpu)\n\t\t\t{\n\t\t\tcase CPU_PA_RISC1_0: puts (\"hppa1.0\"); break;\n\t\t\tcase CPU_PA_RISC1_1: puts (\"hppa1.1\"); break;\n\t\t\tcase CPU_PA_RISC2_0:\n\t\t#if defined(_SC_KERNEL_BITS)\n\t\t\t    switch (bits)\n\t\t\t\t{\n\t\t\t\tcase 64: puts (\"hppa2.0w\"); break;\n\t\t\t\tcase 32: puts (\"hppa2.0n\"); break;\n\t\t\t\tdefault: puts (\"hppa2.0\"); break;\n\t\t\t\t} break;\n\t\t#else  /* !defined(_SC_KERNEL_BITS) */\n\t\t\t    puts (\"hppa2.0\"); break;\n\t\t#endif\n\t\t\tdefault: puts (\"hppa1.0\"); break;\n\t\t\t}\n\t\t    exit (0);\n\t\t}\nEOF\n\t\t    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`\n\t\t    test -z \"$HP_ARCH\" && HP_ARCH=hppa\n\t\tfi ;;\n\tesac\n\tif [ ${HP_ARCH} = \"hppa2.0w\" ]\n\tthen\n\t    eval $set_cc_for_build\n\n\t    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating\n\t    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler\n\t    # generating 64-bit code.  GNU and HP use different nomenclature:\n\t    #\n\t    # $ CC_FOR_BUILD=cc ./config.guess\n\t    # => hppa2.0w-hp-hpux11.23\n\t    # $ CC_FOR_BUILD=\"cc +DA2.0w\" ./config.guess\n\t    # => hppa64-hp-hpux11.23\n\n\t    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |\n\t\tgrep -q __LP64__\n\t    then\n\t\tHP_ARCH=\"hppa2.0w\"\n\t    else\n\t\tHP_ARCH=\"hppa64\"\n\t    fi\n\tfi\n\techo ${HP_ARCH}-hp-hpux${HPUX_REV}\n\texit ;;\n    ia64:HP-UX:*:*)\n\tHPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`\n\techo ia64-hp-hpux${HPUX_REV}\n\texit ;;\n    3050*:HI-UX:*:*)\n\teval $set_cc_for_build\n\tsed 's/^\t//' << EOF >$dummy.c\n\t#include <unistd.h>\n\tint\n\tmain ()\n\t{\n\t  long cpu = sysconf (_SC_CPU_VERSION);\n\t  /* The order matters, because CPU_IS_HP_MC68K erroneously returns\n\t     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct\n\t     results, however.  */\n\t  if (CPU_IS_PA_RISC (cpu))\n\t    {\n\t      switch (cpu)\n\t\t{\n\t\t  case CPU_PA_RISC1_0: puts (\"hppa1.0-hitachi-hiuxwe2\"); break;\n\t\t  case CPU_PA_RISC1_1: puts (\"hppa1.1-hitachi-hiuxwe2\"); break;\n\t\t  case CPU_PA_RISC2_0: puts (\"hppa2.0-hitachi-hiuxwe2\"); break;\n\t\t  default: puts (\"hppa-hitachi-hiuxwe2\"); break;\n\t\t}\n\t    }\n\t  else if (CPU_IS_HP_MC68K (cpu))\n\t    puts (\"m68k-hitachi-hiuxwe2\");\n\t  else puts (\"unknown-hitachi-hiuxwe2\");\n\t  exit (0);\n\t}\nEOF\n\t$CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&\n\t\t{ echo \"$SYSTEM_NAME\"; exit; }\n\techo unknown-hitachi-hiuxwe2\n\texit ;;\n    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )\n\techo hppa1.1-hp-bsd\n\texit ;;\n    9000/8??:4.3bsd:*:*)\n\techo hppa1.0-hp-bsd\n\texit ;;\n    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)\n\techo hppa1.0-hp-mpeix\n\texit ;;\n    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )\n\techo hppa1.1-hp-osf\n\texit ;;\n    hp8??:OSF1:*:*)\n\techo hppa1.0-hp-osf\n\texit ;;\n    i*86:OSF1:*:*)\n\tif [ -x /usr/sbin/sysversion ] ; then\n\t    echo ${UNAME_MACHINE}-unknown-osf1mk\n\telse\n\t    echo ${UNAME_MACHINE}-unknown-osf1\n\tfi\n\texit ;;\n    parisc*:Lites*:*:*)\n\techo hppa1.1-hp-lites\n\texit ;;\n    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)\n\techo c1-convex-bsd\n\texit ;;\n    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)\n\tif getsysinfo -f scalar_acc\n\tthen echo c32-convex-bsd\n\telse echo c2-convex-bsd\n\tfi\n\texit ;;\n    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)\n\techo c34-convex-bsd\n\texit ;;\n    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)\n\techo c38-convex-bsd\n\texit ;;\n    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)\n\techo c4-convex-bsd\n\texit ;;\n    CRAY*Y-MP:*:*:*)\n\techo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\\.[^.]*$/.X/'\n\texit ;;\n    CRAY*[A-Z]90:*:*:*)\n\techo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \\\n\t| sed -e 's/CRAY.*\\([A-Z]90\\)/\\1/' \\\n\t      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \\\n\t      -e 's/\\.[^.]*$/.X/'\n\texit ;;\n    CRAY*TS:*:*:*)\n\techo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\\.[^.]*$/.X/'\n\texit ;;\n    CRAY*T3E:*:*:*)\n\techo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\\.[^.]*$/.X/'\n\texit ;;\n    CRAY*SV1:*:*:*)\n\techo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\\.[^.]*$/.X/'\n\texit ;;\n    *:UNICOS/mp:*:*)\n\techo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\\.[^.]*$/.X/'\n\texit ;;\n    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)\n\tFUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`\n\tFUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\\///'`\n\tFUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`\n\techo \"${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}\"\n\texit ;;\n    5000:UNIX_System_V:4.*:*)\n\tFUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\\///'`\n\tFUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`\n\techo \"sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}\"\n\texit ;;\n    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\\ Embedded/OS:*:*)\n\techo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}\n\texit ;;\n    sparc*:BSD/OS:*:*)\n\techo sparc-unknown-bsdi${UNAME_RELEASE}\n\texit ;;\n    *:BSD/OS:*:*)\n\techo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}\n\texit ;;\n    *:FreeBSD:*:*)\n\tUNAME_PROCESSOR=`/usr/bin/uname -p`\n\tcase ${UNAME_PROCESSOR} in\n\t    amd64)\n\t\techo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;\n\t    *)\n\t\techo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;\n\tesac\n\texit ;;\n    i*:CYGWIN*:*)\n\techo ${UNAME_MACHINE}-pc-cygwin\n\texit ;;\n    *:MINGW64*:*)\n\techo ${UNAME_MACHINE}-pc-mingw64\n\texit ;;\n    *:MINGW*:*)\n\techo ${UNAME_MACHINE}-pc-mingw32\n\texit ;;\n    *:MSYS*:*)\n\techo ${UNAME_MACHINE}-pc-msys\n\texit ;;\n    i*:windows32*:*)\n\t# uname -m includes \"-pc\" on this system.\n\techo ${UNAME_MACHINE}-mingw32\n\texit ;;\n    i*:PW*:*)\n\techo ${UNAME_MACHINE}-pc-pw32\n\texit ;;\n    *:Interix*:*)\n\tcase ${UNAME_MACHINE} in\n\t    x86)\n\t\techo i586-pc-interix${UNAME_RELEASE}\n\t\texit ;;\n\t    authenticamd | genuineintel | EM64T)\n\t\techo x86_64-unknown-interix${UNAME_RELEASE}\n\t\texit ;;\n\t    IA64)\n\t\techo ia64-unknown-interix${UNAME_RELEASE}\n\t\texit ;;\n\tesac ;;\n    [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)\n\techo i${UNAME_MACHINE}-pc-mks\n\texit ;;\n    8664:Windows_NT:*)\n\techo x86_64-pc-mks\n\texit ;;\n    i*:Windows_NT*:* | Pentium*:Windows_NT*:*)\n\t# How do we know it's Interix rather than the generic POSIX subsystem?\n\t# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we\n\t# UNAME_MACHINE based on the output of uname instead of i386?\n\techo i586-pc-interix\n\texit ;;\n    i*:UWIN*:*)\n\techo ${UNAME_MACHINE}-pc-uwin\n\texit ;;\n    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)\n\techo x86_64-unknown-cygwin\n\texit ;;\n    p*:CYGWIN*:*)\n\techo powerpcle-unknown-cygwin\n\texit ;;\n    prep*:SunOS:5.*:*)\n\techo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`\n\texit ;;\n    *:GNU:*:*)\n\t# the GNU system\n\techo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`\n\texit ;;\n    *:GNU/*:*:*)\n\t# other systems with GNU libc and userland\n\techo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}\n\texit ;;\n    i*86:Minix:*:*)\n\techo ${UNAME_MACHINE}-pc-minix\n\texit ;;\n    aarch64:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    aarch64_be:Linux:*:*)\n\tUNAME_MACHINE=aarch64_be\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    alpha:Linux:*:*)\n\tcase `sed -n '/^cpu model/s/^.*: \\(.*\\)/\\1/p' < /proc/cpuinfo` in\n\t  EV5)   UNAME_MACHINE=alphaev5 ;;\n\t  EV56)  UNAME_MACHINE=alphaev56 ;;\n\t  PCA56) UNAME_MACHINE=alphapca56 ;;\n\t  PCA57) UNAME_MACHINE=alphapca56 ;;\n\t  EV6)   UNAME_MACHINE=alphaev6 ;;\n\t  EV67)  UNAME_MACHINE=alphaev67 ;;\n\t  EV68*) UNAME_MACHINE=alphaev68 ;;\n\tesac\n\tobjdump --private-headers /bin/sh | grep -q ld.so.1\n\tif test \"$?\" = 0 ; then LIBC=\"gnulibc1\" ; fi\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    arc:Linux:*:* | arceb:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    arm*:Linux:*:*)\n\teval $set_cc_for_build\n\tif echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \\\n\t    | grep -q __ARM_EABI__\n\tthen\n\t    echo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\telse\n\t    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \\\n\t\t| grep -q __ARM_PCS_VFP\n\t    then\n\t\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi\n\t    else\n\t\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf\n\t    fi\n\tfi\n\texit ;;\n    avr32*:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    cris:Linux:*:*)\n\techo ${UNAME_MACHINE}-axis-linux-${LIBC}\n\texit ;;\n    crisv32:Linux:*:*)\n\techo ${UNAME_MACHINE}-axis-linux-${LIBC}\n\texit ;;\n    e2k:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    frv:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    hexagon:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    i*86:Linux:*:*)\n\techo ${UNAME_MACHINE}-pc-linux-${LIBC}\n\texit ;;\n    ia64:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    m32r*:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    m68*:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    mips:Linux:*:* | mips64:Linux:*:*)\n\teval $set_cc_for_build\n\tsed 's/^\t//' << EOF >$dummy.c\n\t#undef CPU\n\t#undef ${UNAME_MACHINE}\n\t#undef ${UNAME_MACHINE}el\n\t#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)\n\tCPU=${UNAME_MACHINE}el\n\t#else\n\t#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)\n\tCPU=${UNAME_MACHINE}\n\t#else\n\tCPU=\n\t#endif\n\t#endif\nEOF\n\teval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`\n\ttest x\"${CPU}\" != x && { echo \"${CPU}-unknown-linux-${LIBC}\"; exit; }\n\t;;\n    openrisc*:Linux:*:*)\n\techo or1k-unknown-linux-${LIBC}\n\texit ;;\n    or32:Linux:*:* | or1k*:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    padre:Linux:*:*)\n\techo sparc-unknown-linux-${LIBC}\n\texit ;;\n    parisc64:Linux:*:* | hppa64:Linux:*:*)\n\techo hppa64-unknown-linux-${LIBC}\n\texit ;;\n    parisc:Linux:*:* | hppa:Linux:*:*)\n\t# Look for CPU level\n\tcase `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in\n\t  PA7*) echo hppa1.1-unknown-linux-${LIBC} ;;\n\t  PA8*) echo hppa2.0-unknown-linux-${LIBC} ;;\n\t  *)    echo hppa-unknown-linux-${LIBC} ;;\n\tesac\n\texit ;;\n    ppc64:Linux:*:*)\n\techo powerpc64-unknown-linux-${LIBC}\n\texit ;;\n    ppc:Linux:*:*)\n\techo powerpc-unknown-linux-${LIBC}\n\texit ;;\n    ppc64le:Linux:*:*)\n\techo powerpc64le-unknown-linux-${LIBC}\n\texit ;;\n    ppcle:Linux:*:*)\n\techo powerpcle-unknown-linux-${LIBC}\n\texit ;;\n    s390:Linux:*:* | s390x:Linux:*:*)\n\techo ${UNAME_MACHINE}-ibm-linux-${LIBC}\n\texit ;;\n    sh64*:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    sh*:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    sparc:Linux:*:* | sparc64:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    tile*:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    vax:Linux:*:*)\n\techo ${UNAME_MACHINE}-dec-linux-${LIBC}\n\texit ;;\n    x86_64:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    xtensa*:Linux:*:*)\n\techo ${UNAME_MACHINE}-unknown-linux-${LIBC}\n\texit ;;\n    i*86:DYNIX/ptx:4*:*)\n\t# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.\n\t# earlier versions are messed up and put the nodename in both\n\t# sysname and nodename.\n\techo i386-sequent-sysv4\n\texit ;;\n    i*86:UNIX_SV:4.2MP:2.*)\n\t# Unixware is an offshoot of SVR4, but it has its own version\n\t# number series starting with 2...\n\t# I am not positive that other SVR4 systems won't match this,\n\t# I just have to hope.  -- rms.\n\t# Use sysv4.2uw... so that sysv4* matches it.\n\techo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}\n\texit ;;\n    i*86:OS/2:*:*)\n\t# If we were able to find `uname', then EMX Unix compatibility\n\t# is probably installed.\n\techo ${UNAME_MACHINE}-pc-os2-emx\n\texit ;;\n    i*86:XTS-300:*:STOP)\n\techo ${UNAME_MACHINE}-unknown-stop\n\texit ;;\n    i*86:atheos:*:*)\n\techo ${UNAME_MACHINE}-unknown-atheos\n\texit ;;\n    i*86:syllable:*:*)\n\techo ${UNAME_MACHINE}-pc-syllable\n\texit ;;\n    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)\n\techo i386-unknown-lynxos${UNAME_RELEASE}\n\texit ;;\n    i*86:*DOS:*:*)\n\techo ${UNAME_MACHINE}-pc-msdosdjgpp\n\texit ;;\n    i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)\n\tUNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\\/MP$//'`\n\tif grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then\n\t\techo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}\n\telse\n\t\techo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}\n\tfi\n\texit ;;\n    i*86:*:5:[678]*)\n\t# UnixWare 7.x, OpenUNIX and OpenServer 6.\n\tcase `/bin/uname -X | grep \"^Machine\"` in\n\t    *486*)\t     UNAME_MACHINE=i486 ;;\n\t    *Pentium)\t     UNAME_MACHINE=i586 ;;\n\t    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;\n\tesac\n\techo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}\n\texit ;;\n    i*86:*:3.2:*)\n\tif test -f /usr/options/cb.name; then\n\t\tUNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`\n\t\techo ${UNAME_MACHINE}-pc-isc$UNAME_REL\n\telif /bin/uname -X 2>/dev/null >/dev/null ; then\n\t\tUNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`\n\t\t(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486\n\t\t(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \\\n\t\t\t&& UNAME_MACHINE=i586\n\t\t(/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \\\n\t\t\t&& UNAME_MACHINE=i686\n\t\t(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \\\n\t\t\t&& UNAME_MACHINE=i686\n\t\techo ${UNAME_MACHINE}-pc-sco$UNAME_REL\n\telse\n\t\techo ${UNAME_MACHINE}-pc-sysv32\n\tfi\n\texit ;;\n    pc:*:*:*)\n\t# Left here for compatibility:\n\t# uname -m prints for DJGPP always 'pc', but it prints nothing about\n\t# the processor, so we play safe by assuming i586.\n\t# Note: whatever this is, it MUST be the same as what config.sub\n\t# prints for the \"djgpp\" host, or else GDB configury will decide that\n\t# this is a cross-build.\n\techo i586-pc-msdosdjgpp\n\texit ;;\n    Intel:Mach:3*:*)\n\techo i386-pc-mach3\n\texit ;;\n    paragon:*:*:*)\n\techo i860-intel-osf1\n\texit ;;\n    i860:*:4.*:*) # i860-SVR4\n\tif grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then\n\t  echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4\n\telse # Add other i860-SVR4 vendors below as they are discovered.\n\t  echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4\n\tfi\n\texit ;;\n    mini*:CTIX:SYS*5:*)\n\t# \"miniframe\"\n\techo m68010-convergent-sysv\n\texit ;;\n    mc68k:UNIX:SYSTEM5:3.51m)\n\techo m68k-convergent-sysv\n\texit ;;\n    M680?0:D-NIX:5.3:*)\n\techo m68k-diab-dnix\n\texit ;;\n    M68*:*:R3V[5678]*:*)\n\ttest -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;\n    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)\n\tOS_REL=''\n\ttest -r /etc/.relid \\\n\t&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \\([0-9][0-9]\\).*/\\1/p' < /etc/.relid`\n\t/bin/uname -p 2>/dev/null | grep 86 >/dev/null \\\n\t  && { echo i486-ncr-sysv4.3${OS_REL}; exit; }\n\t/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \\\n\t  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;\n    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)\n\t/bin/uname -p 2>/dev/null | grep 86 >/dev/null \\\n\t  && { echo i486-ncr-sysv4; exit; } ;;\n    NCR*:*:4.2:* | MPRAS*:*:4.2:*)\n\tOS_REL='.3'\n\ttest -r /etc/.relid \\\n\t    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \\([0-9][0-9]\\).*/\\1/p' < /etc/.relid`\n\t/bin/uname -p 2>/dev/null | grep 86 >/dev/null \\\n\t    && { echo i486-ncr-sysv4.3${OS_REL}; exit; }\n\t/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \\\n\t    && { echo i586-ncr-sysv4.3${OS_REL}; exit; }\n\t/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \\\n\t    && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;\n    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)\n\techo m68k-unknown-lynxos${UNAME_RELEASE}\n\texit ;;\n    mc68030:UNIX_System_V:4.*:*)\n\techo m68k-atari-sysv4\n\texit ;;\n    TSUNAMI:LynxOS:2.*:*)\n\techo sparc-unknown-lynxos${UNAME_RELEASE}\n\texit ;;\n    rs6000:LynxOS:2.*:*)\n\techo rs6000-unknown-lynxos${UNAME_RELEASE}\n\texit ;;\n    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)\n\techo powerpc-unknown-lynxos${UNAME_RELEASE}\n\texit ;;\n    SM[BE]S:UNIX_SV:*:*)\n\techo mips-dde-sysv${UNAME_RELEASE}\n\texit ;;\n    RM*:ReliantUNIX-*:*:*)\n\techo mips-sni-sysv4\n\texit ;;\n    RM*:SINIX-*:*:*)\n\techo mips-sni-sysv4\n\texit ;;\n    *:SINIX-*:*:*)\n\tif uname -p 2>/dev/null >/dev/null ; then\n\t\tUNAME_MACHINE=`(uname -p) 2>/dev/null`\n\t\techo ${UNAME_MACHINE}-sni-sysv4\n\telse\n\t\techo ns32k-sni-sysv\n\tfi\n\texit ;;\n    PENTIUM:*:4.0*:*)\t# Unisys `ClearPath HMP IX 4000' SVR4/MP effort\n\t\t\t# says <Richard.M.Bartel@ccMail.Census.GOV>\n\techo i586-unisys-sysv4\n\texit ;;\n    *:UNIX_System_V:4*:FTX*)\n\t# From Gerald Hewes <hewes@openmarket.com>.\n\t# How about differentiating between stratus architectures? -djm\n\techo hppa1.1-stratus-sysv4\n\texit ;;\n    *:*:*:FTX*)\n\t# From seanf@swdc.stratus.com.\n\techo i860-stratus-sysv4\n\texit ;;\n    i*86:VOS:*:*)\n\t# From Paul.Green@stratus.com.\n\techo ${UNAME_MACHINE}-stratus-vos\n\texit ;;\n    *:VOS:*:*)\n\t# From Paul.Green@stratus.com.\n\techo hppa1.1-stratus-vos\n\texit ;;\n    mc68*:A/UX:*:*)\n\techo m68k-apple-aux${UNAME_RELEASE}\n\texit ;;\n    news*:NEWS-OS:6*:*)\n\techo mips-sony-newsos6\n\texit ;;\n    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)\n\tif [ -d /usr/nec ]; then\n\t\techo mips-nec-sysv${UNAME_RELEASE}\n\telse\n\t\techo mips-unknown-sysv${UNAME_RELEASE}\n\tfi\n\texit ;;\n    BeBox:BeOS:*:*)\t# BeOS running on hardware made by Be, PPC only.\n\techo powerpc-be-beos\n\texit ;;\n    BeMac:BeOS:*:*)\t# BeOS running on Mac or Mac clone, PPC only.\n\techo powerpc-apple-beos\n\texit ;;\n    BePC:BeOS:*:*)\t# BeOS running on Intel PC compatible.\n\techo i586-pc-beos\n\texit ;;\n    BePC:Haiku:*:*)\t# Haiku running on Intel PC compatible.\n\techo i586-pc-haiku\n\texit ;;\n    x86_64:Haiku:*:*)\n\techo x86_64-unknown-haiku\n\texit ;;\n    SX-4:SUPER-UX:*:*)\n\techo sx4-nec-superux${UNAME_RELEASE}\n\texit ;;\n    SX-5:SUPER-UX:*:*)\n\techo sx5-nec-superux${UNAME_RELEASE}\n\texit ;;\n    SX-6:SUPER-UX:*:*)\n\techo sx6-nec-superux${UNAME_RELEASE}\n\texit ;;\n    SX-7:SUPER-UX:*:*)\n\techo sx7-nec-superux${UNAME_RELEASE}\n\texit ;;\n    SX-8:SUPER-UX:*:*)\n\techo sx8-nec-superux${UNAME_RELEASE}\n\texit ;;\n    SX-8R:SUPER-UX:*:*)\n\techo sx8r-nec-superux${UNAME_RELEASE}\n\texit ;;\n    Power*:Rhapsody:*:*)\n\techo powerpc-apple-rhapsody${UNAME_RELEASE}\n\texit ;;\n    *:Rhapsody:*:*)\n\techo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}\n\texit ;;\n    *:Darwin:*:*)\n\tUNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown\n\teval $set_cc_for_build\n\tif test \"$UNAME_PROCESSOR\" = unknown ; then\n\t    UNAME_PROCESSOR=powerpc\n\tfi\n\tif test `echo \"$UNAME_RELEASE\" | sed -e 's/\\..*//'` -le 10 ; then\n\t    if [ \"$CC_FOR_BUILD\" != 'no_compiler_found' ]; then\n\t\tif (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \\\n\t\t    (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \\\n\t\t    grep IS_64BIT_ARCH >/dev/null\n\t\tthen\n\t\t    case $UNAME_PROCESSOR in\n\t\t\ti386) UNAME_PROCESSOR=x86_64 ;;\n\t\t\tpowerpc) UNAME_PROCESSOR=powerpc64 ;;\n\t\t    esac\n\t\tfi\n\t    fi\n\telif test \"$UNAME_PROCESSOR\" = i386 ; then\n\t    # Avoid executing cc on OS X 10.9, as it ships with a stub\n\t    # that puts up a graphical alert prompting to install\n\t    # developer tools.  Any system running Mac OS X 10.7 or\n\t    # later (Darwin 11 and later) is required to have a 64-bit\n\t    # processor. This is not true of the ARM version of Darwin\n\t    # that Apple uses in portable devices.\n\t    UNAME_PROCESSOR=x86_64\n\tfi\n\techo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}\n\texit ;;\n    *:procnto*:*:* | *:QNX:[0123456789]*:*)\n\tUNAME_PROCESSOR=`uname -p`\n\tif test \"$UNAME_PROCESSOR\" = \"x86\"; then\n\t\tUNAME_PROCESSOR=i386\n\t\tUNAME_MACHINE=pc\n\tfi\n\techo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}\n\texit ;;\n    *:QNX:*:4*)\n\techo i386-pc-qnx\n\texit ;;\n    NEO-?:NONSTOP_KERNEL:*:*)\n\techo neo-tandem-nsk${UNAME_RELEASE}\n\texit ;;\n    NSE-*:NONSTOP_KERNEL:*:*)\n\techo nse-tandem-nsk${UNAME_RELEASE}\n\texit ;;\n    NSR-?:NONSTOP_KERNEL:*:*)\n\techo nsr-tandem-nsk${UNAME_RELEASE}\n\texit ;;\n    *:NonStop-UX:*:*)\n\techo mips-compaq-nonstopux\n\texit ;;\n    BS2000:POSIX*:*:*)\n\techo bs2000-siemens-sysv\n\texit ;;\n    DS/*:UNIX_System_V:*:*)\n\techo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}\n\texit ;;\n    *:Plan9:*:*)\n\t# \"uname -m\" is not consistent, so use $cputype instead. 386\n\t# is converted to i386 for consistency with other x86\n\t# operating systems.\n\tif test \"$cputype\" = \"386\"; then\n\t    UNAME_MACHINE=i386\n\telse\n\t    UNAME_MACHINE=\"$cputype\"\n\tfi\n\techo ${UNAME_MACHINE}-unknown-plan9\n\texit ;;\n    *:TOPS-10:*:*)\n\techo pdp10-unknown-tops10\n\texit ;;\n    *:TENEX:*:*)\n\techo pdp10-unknown-tenex\n\texit ;;\n    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)\n\techo pdp10-dec-tops20\n\texit ;;\n    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)\n\techo pdp10-xkl-tops20\n\texit ;;\n    *:TOPS-20:*:*)\n\techo pdp10-unknown-tops20\n\texit ;;\n    *:ITS:*:*)\n\techo pdp10-unknown-its\n\texit ;;\n    SEI:*:*:SEIUX)\n\techo mips-sei-seiux${UNAME_RELEASE}\n\texit ;;\n    *:DragonFly:*:*)\n\techo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`\n\texit ;;\n    *:*VMS:*:*)\n\tUNAME_MACHINE=`(uname -p) 2>/dev/null`\n\tcase \"${UNAME_MACHINE}\" in\n\t    A*) echo alpha-dec-vms ; exit ;;\n\t    I*) echo ia64-dec-vms ; exit ;;\n\t    V*) echo vax-dec-vms ; exit ;;\n\tesac ;;\n    *:XENIX:*:SysV)\n\techo i386-pc-xenix\n\texit ;;\n    i*86:skyos:*:*)\n\techo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'\n\texit ;;\n    i*86:rdos:*:*)\n\techo ${UNAME_MACHINE}-pc-rdos\n\texit ;;\n    i*86:AROS:*:*)\n\techo ${UNAME_MACHINE}-pc-aros\n\texit ;;\n    x86_64:VMkernel:*:*)\n\techo ${UNAME_MACHINE}-unknown-esx\n\texit ;;\nesac\n\ncat >&2 <<EOF\n$0: unable to guess system type\n\nThis script, last modified $timestamp, has failed to recognize\nthe operating system you are using. It is advised that you\ndownload the most up to date version of the config scripts from\n\n  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD\nand\n  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD\n\nIf the version you run ($0) is already up to date, please\nsend the following data and any information you think might be\npertinent to <config-patches@gnu.org> in order to provide the needed\ninformation to handle your system.\n\nconfig.guess timestamp = $timestamp\n\nuname -m = `(uname -m) 2>/dev/null || echo unknown`\nuname -r = `(uname -r) 2>/dev/null || echo unknown`\nuname -s = `(uname -s) 2>/dev/null || echo unknown`\nuname -v = `(uname -v) 2>/dev/null || echo unknown`\n\n/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`\n/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`\n\nhostinfo               = `(hostinfo) 2>/dev/null`\n/bin/universe          = `(/bin/universe) 2>/dev/null`\n/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`\n/bin/arch              = `(/bin/arch) 2>/dev/null`\n/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`\n/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`\n\nUNAME_MACHINE = ${UNAME_MACHINE}\nUNAME_RELEASE = ${UNAME_RELEASE}\nUNAME_SYSTEM  = ${UNAME_SYSTEM}\nUNAME_VERSION = ${UNAME_VERSION}\nEOF\n\nexit 1\n\n# Local variables:\n# eval: (add-hook 'write-file-hooks 'time-stamp)\n# time-stamp-start: \"timestamp='\"\n# time-stamp-format: \"%:y-%02m-%02d\"\n# time-stamp-end: \"'\"\n# End:\n\n"
  },
  {
    "path": "configfsf.sub",
    "content": "#! /bin/sh\n# Configuration validation subroutine script.\n#   Copyright 1992-2015 Free Software Foundation, Inc.\n\ntimestamp='2015-03-08'\n\n# This file is free software; you can redistribute it and/or modify it\n# under the terms of the GNU General Public License as published by\n# the Free Software Foundation; either version 3 of the License, or\n# (at your option) any later version.\n#\n# This program is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of\n# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n# General Public License for more details.\n#\n# You should have received a copy of the GNU General Public License\n# along with this program; if not, see <http://www.gnu.org/licenses/>.\n#\n# As a special exception to the GNU General Public License, if you\n# distribute this file as part of a program that contains a\n# configuration script generated by Autoconf, you may include it under\n# the same distribution terms that you use for the rest of that\n# program.  This Exception is an additional permission under section 7\n# of the GNU General Public License, version 3 (\"GPLv3\").\n\n\n# Please send patches to <config-patches@gnu.org>.\n#\n# Configuration subroutine to validate and canonicalize a configuration type.\n# Supply the specified configuration type as an argument.\n# If it is invalid, we print an error message on stderr and exit with code 1.\n# Otherwise, we print the canonical config type on stdout and succeed.\n\n# You can get the latest version of this script from:\n# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD\n\n# This file is supposed to be the same for all GNU packages\n# and recognize all the CPU types, system types and aliases\n# that are meaningful with *any* GNU software.\n# Each package is responsible for reporting which valid configurations\n# it does not support.  The user should be able to distinguish\n# a failure to support a valid configuration from a meaningless\n# configuration.\n\n# The goal of this file is to map all the various variations of a given\n# machine specification into a single specification in the form:\n#\tCPU_TYPE-MANUFACTURER-OPERATING_SYSTEM\n# or in some cases, the newer four-part form:\n#\tCPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM\n# It is wrong to echo any other type of specification.\n\nme=`echo \"$0\" | sed -e 's,.*/,,'`\n\nusage=\"\\\nUsage: $0 [OPTION] CPU-MFR-OPSYS\n       $0 [OPTION] ALIAS\n\nCanonicalize a configuration name.\n\nOperation modes:\n  -h, --help         print this help, then exit\n  -t, --time-stamp   print date of last modification, then exit\n  -v, --version      print version number, then exit\n\nReport bugs and patches to <config-patches@gnu.org>.\"\n\nversion=\"\\\nGNU config.sub ($timestamp)\n\nCopyright 1992-2015 Free Software Foundation, Inc.\n\nThis is free software; see the source for copying conditions.  There is NO\nwarranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\"\n\nhelp=\"\nTry \\`$me --help' for more information.\"\n\n# Parse command line\nwhile test $# -gt 0 ; do\n  case $1 in\n    --time-stamp | --time* | -t )\n       echo \"$timestamp\" ; exit ;;\n    --version | -v )\n       echo \"$version\" ; exit ;;\n    --help | --h* | -h )\n       echo \"$usage\"; exit ;;\n    -- )     # Stop option processing\n       shift; break ;;\n    - )\t# Use stdin as input.\n       break ;;\n    -* )\n       echo \"$me: invalid option $1$help\"\n       exit 1 ;;\n\n    *local*)\n       # First pass through any local machine types.\n       echo $1\n       exit ;;\n\n    * )\n       break ;;\n  esac\ndone\n\ncase $# in\n 0) echo \"$me: missing argument$help\" >&2\n    exit 1;;\n 1) ;;\n *) echo \"$me: too many arguments$help\" >&2\n    exit 1;;\nesac\n\n# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).\n# Here we must recognize all the valid KERNEL-OS combinations.\nmaybe_os=`echo $1 | sed 's/^\\(.*\\)-\\([^-]*-[^-]*\\)$/\\2/'`\ncase $maybe_os in\n  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \\\n  linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \\\n  knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \\\n  kopensolaris*-gnu* | \\\n  storm-chaos* | os2-emx* | rtmk-nova*)\n    os=-$maybe_os\n    basic_machine=`echo $1 | sed 's/^\\(.*\\)-\\([^-]*-[^-]*\\)$/\\1/'`\n    ;;\n  android-linux)\n    os=-linux-android\n    basic_machine=`echo $1 | sed 's/^\\(.*\\)-\\([^-]*-[^-]*\\)$/\\1/'`-unknown\n    ;;\n  *)\n    basic_machine=`echo $1 | sed 's/-[^-]*$//'`\n    if [ $basic_machine != $1 ]\n    then os=`echo $1 | sed 's/.*-/-/'`\n    else os=; fi\n    ;;\nesac\n\n### Let's recognize common machines as not being operating systems so\n### that things like config.sub decstation-3100 work.  We also\n### recognize some manufacturers as not being operating systems, so we\n### can provide default operating systems below.\ncase $os in\n\t-sun*os*)\n\t\t# Prevent following clause from handling this invalid input.\n\t\t;;\n\t-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \\\n\t-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \\\n\t-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \\\n\t-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\\\n\t-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \\\n\t-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \\\n\t-apple | -axis | -knuth | -cray | -microblaze*)\n\t\tos=\n\t\tbasic_machine=$1\n\t\t;;\n\t-bluegene*)\n\t\tos=-cnk\n\t\t;;\n\t-sim | -cisco | -oki | -wec | -winbond)\n\t\tos=\n\t\tbasic_machine=$1\n\t\t;;\n\t-scout)\n\t\t;;\n\t-wrs)\n\t\tos=-vxworks\n\t\tbasic_machine=$1\n\t\t;;\n\t-chorusos*)\n\t\tos=-chorusos\n\t\tbasic_machine=$1\n\t\t;;\n\t-chorusrdb)\n\t\tos=-chorusrdb\n\t\tbasic_machine=$1\n\t\t;;\n\t-hiux*)\n\t\tos=-hiuxwe2\n\t\t;;\n\t-sco6)\n\t\tos=-sco5v6\n\t\tbasic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`\n\t\t;;\n\t-sco5)\n\t\tos=-sco3.2v5\n\t\tbasic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`\n\t\t;;\n\t-sco4)\n\t\tos=-sco3.2v4\n\t\tbasic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`\n\t\t;;\n\t-sco3.2.[4-9]*)\n\t\tos=`echo $os | sed -e 's/sco3.2./sco3.2v/'`\n\t\tbasic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`\n\t\t;;\n\t-sco3.2v[4-9]*)\n\t\t# Don't forget version if it is 3.2v4 or newer.\n\t\tbasic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`\n\t\t;;\n\t-sco5v6*)\n\t\t# Don't forget version if it is 3.2v4 or newer.\n\t\tbasic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`\n\t\t;;\n\t-sco*)\n\t\tos=-sco3.2v2\n\t\tbasic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`\n\t\t;;\n\t-udk*)\n\t\tbasic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`\n\t\t;;\n\t-isc)\n\t\tos=-isc2.2\n\t\tbasic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`\n\t\t;;\n\t-clix*)\n\t\tbasic_machine=clipper-intergraph\n\t\t;;\n\t-isc*)\n\t\tbasic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`\n\t\t;;\n\t-lynx*178)\n\t\tos=-lynxos178\n\t\t;;\n\t-lynx*5)\n\t\tos=-lynxos5\n\t\t;;\n\t-lynx*)\n\t\tos=-lynxos\n\t\t;;\n\t-ptx*)\n\t\tbasic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`\n\t\t;;\n\t-windowsnt*)\n\t\tos=`echo $os | sed -e 's/windowsnt/winnt/'`\n\t\t;;\n\t-psos*)\n\t\tos=-psos\n\t\t;;\n\t-mint | -mint[0-9]*)\n\t\tbasic_machine=m68k-atari\n\t\tos=-mint\n\t\t;;\nesac\n\n# Decode aliases for certain CPU-COMPANY combinations.\ncase $basic_machine in\n\t# Recognize the basic CPU types without company name.\n\t# Some are omitted here because they have special meanings below.\n\t1750a | 580 \\\n\t| a29k \\\n\t| aarch64 | aarch64_be \\\n\t| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \\\n\t| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \\\n\t| am33_2.0 \\\n\t| arc | arceb \\\n\t| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \\\n\t| avr | avr32 \\\n\t| be32 | be64 \\\n\t| bfin \\\n\t| c4x | c8051 | clipper \\\n\t| d10v | d30v | dlx | dsp16xx \\\n\t| e2k | epiphany \\\n\t| fido | fr30 | frv | ft32 \\\n\t| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \\\n\t| hexagon \\\n\t| i370 | i860 | i960 | ia64 \\\n\t| ip2k | iq2000 \\\n\t| k10m \\\n\t| le32 | le64 \\\n\t| lm32 \\\n\t| m32c | m32r | m32rle | m68000 | m68k | m88k \\\n\t| maxq | mb | microblaze | microblazeel | mcore | mep | metag \\\n\t| mips | mipsbe | mipseb | mipsel | mipsle \\\n\t| mips16 \\\n\t| mips64 | mips64el \\\n\t| mips64octeon | mips64octeonel \\\n\t| mips64orion | mips64orionel \\\n\t| mips64r5900 | mips64r5900el \\\n\t| mips64vr | mips64vrel \\\n\t| mips64vr4100 | mips64vr4100el \\\n\t| mips64vr4300 | mips64vr4300el \\\n\t| mips64vr5000 | mips64vr5000el \\\n\t| mips64vr5900 | mips64vr5900el \\\n\t| mipsisa32 | mipsisa32el \\\n\t| mipsisa32r2 | mipsisa32r2el \\\n\t| mipsisa32r6 | mipsisa32r6el \\\n\t| mipsisa64 | mipsisa64el \\\n\t| mipsisa64r2 | mipsisa64r2el \\\n\t| mipsisa64sb1 | mipsisa64sb1el \\\n\t| mipsisa64sr71k | mipsisa64sr71kel \\\n\t| mipsisa64r6 | mipsisa64r6el \\\n\t| mipsr5900 | mipsr5900el \\\n\t| mipstx39 | mipstx39el \\\n\t| mn10200 | mn10300 \\\n\t| moxie \\\n\t| mt \\\n\t| msp430 \\\n\t| nds32 | nds32le | nds32be \\\n\t| nios | nios2 | nios2eb | nios2el \\\n\t| ns16k | ns32k \\\n\t| open8 | or1k | or1knd | or32 \\\n\t| pdp10 | pdp11 | pj | pjl \\\n\t| powerpc | powerpc64 | powerpc64le | powerpcle \\\n\t| pyramid \\\n\t| riscv32 | riscv64 \\\n\t| rl78 | rx \\\n\t| score \\\n\t| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \\\n\t| sh64 | sh64le \\\n\t| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \\\n\t| sparcv8 | sparcv9 | sparcv9b | sparcv9v \\\n\t| spu \\\n\t| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \\\n\t| ubicom32 \\\n\t| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \\\n\t| visium \\\n\t| we32k \\\n\t| x86 | xc16x | xstormy16 | xtensa \\\n\t| z8k | z80)\n\t\tbasic_machine=$basic_machine-unknown\n\t\t;;\n\tc54x)\n\t\tbasic_machine=tic54x-unknown\n\t\t;;\n\tc55x)\n\t\tbasic_machine=tic55x-unknown\n\t\t;;\n\tc6x)\n\t\tbasic_machine=tic6x-unknown\n\t\t;;\n\tleon|leon[3-9])\n\t        basic_machine=sparc-$basic_machine\n\t        ;;\n\tm6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)\n\t\tbasic_machine=$basic_machine-unknown\n\t\tos=-none\n\t\t;;\n\tm88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)\n\t\t;;\n\tms1)\n\t\tbasic_machine=mt-unknown\n\t\t;;\n\n\tstrongarm | thumb | xscale)\n\t\tbasic_machine=arm-unknown\n\t\t;;\n\txgate)\n\t\tbasic_machine=$basic_machine-unknown\n\t\tos=-none\n\t\t;;\n\txscaleeb)\n\t\tbasic_machine=armeb-unknown\n\t\t;;\n\n\txscaleel)\n\t\tbasic_machine=armel-unknown\n\t\t;;\n\n\t# We use `pc' rather than `unknown'\n\t# because (1) that's what they normally are, and\n\t# (2) the word \"unknown\" tends to confuse beginning users.\n\ti*86 | x86_64)\n\t  basic_machine=$basic_machine-pc\n\t  ;;\n\t# Object if more than one company name word.\n\t*-*-*)\n\t\techo Invalid configuration \\`$1\\': machine \\`$basic_machine\\' not recognized 1>&2\n\t\texit 1\n\t\t;;\n\t# Recognize the basic CPU types with company name.\n\t580-* \\\n\t| a29k-* \\\n\t| aarch64-* | aarch64_be-* \\\n\t| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \\\n\t| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \\\n\t| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \\\n\t| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \\\n\t| avr-* | avr32-* \\\n\t| be32-* | be64-* \\\n\t| bfin-* | bs2000-* \\\n\t| c[123]* | c30-* | [cjt]90-* | c4x-* \\\n\t| c8051-* | clipper-* | craynv-* | cydra-* \\\n\t| d10v-* | d30v-* | dlx-* \\\n\t| e2k-* | elxsi-* \\\n\t| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \\\n\t| h8300-* | h8500-* \\\n\t| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \\\n\t| hexagon-* \\\n\t| i*86-* | i860-* | i960-* | ia64-* \\\n\t| ip2k-* | iq2000-* \\\n\t| k10m-* \\\n\t| le32-* | le64-* \\\n\t| lm32-* \\\n\t| m32c-* | m32r-* | m32rle-* \\\n\t| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \\\n\t| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \\\n\t| microblaze-* | microblazeel-* \\\n\t| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \\\n\t| mips16-* \\\n\t| mips64-* | mips64el-* \\\n\t| mips64octeon-* | mips64octeonel-* \\\n\t| mips64orion-* | mips64orionel-* \\\n\t| mips64r5900-* | mips64r5900el-* \\\n\t| mips64vr-* | mips64vrel-* \\\n\t| mips64vr4100-* | mips64vr4100el-* \\\n\t| mips64vr4300-* | mips64vr4300el-* \\\n\t| mips64vr5000-* | mips64vr5000el-* \\\n\t| mips64vr5900-* | mips64vr5900el-* \\\n\t| mipsisa32-* | mipsisa32el-* \\\n\t| mipsisa32r2-* | mipsisa32r2el-* \\\n\t| mipsisa32r6-* | mipsisa32r6el-* \\\n\t| mipsisa64-* | mipsisa64el-* \\\n\t| mipsisa64r2-* | mipsisa64r2el-* \\\n\t| mipsisa64r6-* | mipsisa64r6el-* \\\n\t| mipsisa64sb1-* | mipsisa64sb1el-* \\\n\t| mipsisa64sr71k-* | mipsisa64sr71kel-* \\\n\t| mipsr5900-* | mipsr5900el-* \\\n\t| mipstx39-* | mipstx39el-* \\\n\t| mmix-* \\\n\t| mt-* \\\n\t| msp430-* \\\n\t| nds32-* | nds32le-* | nds32be-* \\\n\t| nios-* | nios2-* | nios2eb-* | nios2el-* \\\n\t| none-* | np1-* | ns16k-* | ns32k-* \\\n\t| open8-* \\\n\t| or1k*-* \\\n\t| orion-* \\\n\t| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \\\n\t| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \\\n\t| pyramid-* \\\n\t| rl78-* | romp-* | rs6000-* | rx-* \\\n\t| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \\\n\t| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \\\n\t| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \\\n\t| sparclite-* \\\n\t| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \\\n\t| tahoe-* \\\n\t| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \\\n\t| tile*-* \\\n\t| tron-* \\\n\t| ubicom32-* \\\n\t| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \\\n\t| vax-* \\\n\t| visium-* \\\n\t| we32k-* \\\n\t| x86-* | x86_64-* | xc16x-* | xps100-* \\\n\t| xstormy16-* | xtensa*-* \\\n\t| ymp-* \\\n\t| z8k-* | z80-*)\n\t\t;;\n\t# Recognize the basic CPU types without company name, with glob match.\n\txtensa*)\n\t\tbasic_machine=$basic_machine-unknown\n\t\t;;\n\t# Recognize the various machine names and aliases which stand\n\t# for a CPU type and a company and sometimes even an OS.\n\t386bsd)\n\t\tbasic_machine=i386-unknown\n\t\tos=-bsd\n\t\t;;\n\t3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)\n\t\tbasic_machine=m68000-att\n\t\t;;\n\t3b*)\n\t\tbasic_machine=we32k-att\n\t\t;;\n\ta29khif)\n\t\tbasic_machine=a29k-amd\n\t\tos=-udi\n\t\t;;\n\tabacus)\n\t\tbasic_machine=abacus-unknown\n\t\t;;\n\tadobe68k)\n\t\tbasic_machine=m68010-adobe\n\t\tos=-scout\n\t\t;;\n\talliant | fx80)\n\t\tbasic_machine=fx80-alliant\n\t\t;;\n\taltos | altos3068)\n\t\tbasic_machine=m68k-altos\n\t\t;;\n\tam29k)\n\t\tbasic_machine=a29k-none\n\t\tos=-bsd\n\t\t;;\n\tamd64)\n\t\tbasic_machine=x86_64-pc\n\t\t;;\n\tamd64-*)\n\t\tbasic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tamdahl)\n\t\tbasic_machine=580-amdahl\n\t\tos=-sysv\n\t\t;;\n\tamiga | amiga-*)\n\t\tbasic_machine=m68k-unknown\n\t\t;;\n\tamigaos | amigados)\n\t\tbasic_machine=m68k-unknown\n\t\tos=-amigaos\n\t\t;;\n\tamigaunix | amix)\n\t\tbasic_machine=m68k-unknown\n\t\tos=-sysv4\n\t\t;;\n\tapollo68)\n\t\tbasic_machine=m68k-apollo\n\t\tos=-sysv\n\t\t;;\n\tapollo68bsd)\n\t\tbasic_machine=m68k-apollo\n\t\tos=-bsd\n\t\t;;\n\taros)\n\t\tbasic_machine=i386-pc\n\t\tos=-aros\n\t\t;;\n\tasmjs)\n\t        basic_machine=asmjs-unknown\n\t        ;;\n\taux)\n\t\tbasic_machine=m68k-apple\n\t\tos=-aux\n\t\t;;\n\tbalance)\n\t\tbasic_machine=ns32k-sequent\n\t\tos=-dynix\n\t\t;;\n\tblackfin)\n\t\tbasic_machine=bfin-unknown\n\t\tos=-linux\n\t\t;;\n\tblackfin-*)\n\t\tbasic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\tos=-linux\n\t\t;;\n\tbluegene*)\n\t\tbasic_machine=powerpc-ibm\n\t\tos=-cnk\n\t\t;;\n\tc54x-*)\n\t\tbasic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tc55x-*)\n\t\tbasic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tc6x-*)\n\t\tbasic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tc90)\n\t\tbasic_machine=c90-cray\n\t\tos=-unicos\n\t\t;;\n\tcegcc)\n\t\tbasic_machine=arm-unknown\n\t\tos=-cegcc\n\t\t;;\n\tconvex-c1)\n\t\tbasic_machine=c1-convex\n\t\tos=-bsd\n\t\t;;\n\tconvex-c2)\n\t\tbasic_machine=c2-convex\n\t\tos=-bsd\n\t\t;;\n\tconvex-c32)\n\t\tbasic_machine=c32-convex\n\t\tos=-bsd\n\t\t;;\n\tconvex-c34)\n\t\tbasic_machine=c34-convex\n\t\tos=-bsd\n\t\t;;\n\tconvex-c38)\n\t\tbasic_machine=c38-convex\n\t\tos=-bsd\n\t\t;;\n\tcray | j90)\n\t\tbasic_machine=j90-cray\n\t\tos=-unicos\n\t\t;;\n\tcraynv)\n\t\tbasic_machine=craynv-cray\n\t\tos=-unicosmp\n\t\t;;\n\tcr16 | cr16-*)\n\t\tbasic_machine=cr16-unknown\n\t\tos=-elf\n\t\t;;\n\tcrds | unos)\n\t\tbasic_machine=m68k-crds\n\t\t;;\n\tcrisv32 | crisv32-* | etraxfs*)\n\t\tbasic_machine=crisv32-axis\n\t\t;;\n\tcris | cris-* | etrax*)\n\t\tbasic_machine=cris-axis\n\t\t;;\n\tcrx)\n\t\tbasic_machine=crx-unknown\n\t\tos=-elf\n\t\t;;\n\tda30 | da30-*)\n\t\tbasic_machine=m68k-da30\n\t\t;;\n\tdecstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)\n\t\tbasic_machine=mips-dec\n\t\t;;\n\tdecsystem10* | dec10*)\n\t\tbasic_machine=pdp10-dec\n\t\tos=-tops10\n\t\t;;\n\tdecsystem20* | dec20*)\n\t\tbasic_machine=pdp10-dec\n\t\tos=-tops20\n\t\t;;\n\tdelta | 3300 | motorola-3300 | motorola-delta \\\n\t      | 3300-motorola | delta-motorola)\n\t\tbasic_machine=m68k-motorola\n\t\t;;\n\tdelta88)\n\t\tbasic_machine=m88k-motorola\n\t\tos=-sysv3\n\t\t;;\n\tdicos)\n\t\tbasic_machine=i686-pc\n\t\tos=-dicos\n\t\t;;\n\tdjgpp)\n\t\tbasic_machine=i586-pc\n\t\tos=-msdosdjgpp\n\t\t;;\n\tdpx20 | dpx20-*)\n\t\tbasic_machine=rs6000-bull\n\t\tos=-bosx\n\t\t;;\n\tdpx2* | dpx2*-bull)\n\t\tbasic_machine=m68k-bull\n\t\tos=-sysv3\n\t\t;;\n\tebmon29k)\n\t\tbasic_machine=a29k-amd\n\t\tos=-ebmon\n\t\t;;\n\telxsi)\n\t\tbasic_machine=elxsi-elxsi\n\t\tos=-bsd\n\t\t;;\n\tencore | umax | mmax)\n\t\tbasic_machine=ns32k-encore\n\t\t;;\n\tes1800 | OSE68k | ose68k | ose | OSE)\n\t\tbasic_machine=m68k-ericsson\n\t\tos=-ose\n\t\t;;\n\tfx2800)\n\t\tbasic_machine=i860-alliant\n\t\t;;\n\tgenix)\n\t\tbasic_machine=ns32k-ns\n\t\t;;\n\tgmicro)\n\t\tbasic_machine=tron-gmicro\n\t\tos=-sysv\n\t\t;;\n\tgo32)\n\t\tbasic_machine=i386-pc\n\t\tos=-go32\n\t\t;;\n\th3050r* | hiux*)\n\t\tbasic_machine=hppa1.1-hitachi\n\t\tos=-hiuxwe2\n\t\t;;\n\th8300hms)\n\t\tbasic_machine=h8300-hitachi\n\t\tos=-hms\n\t\t;;\n\th8300xray)\n\t\tbasic_machine=h8300-hitachi\n\t\tos=-xray\n\t\t;;\n\th8500hms)\n\t\tbasic_machine=h8500-hitachi\n\t\tos=-hms\n\t\t;;\n\tharris)\n\t\tbasic_machine=m88k-harris\n\t\tos=-sysv3\n\t\t;;\n\thp300-*)\n\t\tbasic_machine=m68k-hp\n\t\t;;\n\thp300bsd)\n\t\tbasic_machine=m68k-hp\n\t\tos=-bsd\n\t\t;;\n\thp300hpux)\n\t\tbasic_machine=m68k-hp\n\t\tos=-hpux\n\t\t;;\n\thp3k9[0-9][0-9] | hp9[0-9][0-9])\n\t\tbasic_machine=hppa1.0-hp\n\t\t;;\n\thp9k2[0-9][0-9] | hp9k31[0-9])\n\t\tbasic_machine=m68000-hp\n\t\t;;\n\thp9k3[2-9][0-9])\n\t\tbasic_machine=m68k-hp\n\t\t;;\n\thp9k6[0-9][0-9] | hp6[0-9][0-9])\n\t\tbasic_machine=hppa1.0-hp\n\t\t;;\n\thp9k7[0-79][0-9] | hp7[0-79][0-9])\n\t\tbasic_machine=hppa1.1-hp\n\t\t;;\n\thp9k78[0-9] | hp78[0-9])\n\t\t# FIXME: really hppa2.0-hp\n\t\tbasic_machine=hppa1.1-hp\n\t\t;;\n\thp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)\n\t\t# FIXME: really hppa2.0-hp\n\t\tbasic_machine=hppa1.1-hp\n\t\t;;\n\thp9k8[0-9][13679] | hp8[0-9][13679])\n\t\tbasic_machine=hppa1.1-hp\n\t\t;;\n\thp9k8[0-9][0-9] | hp8[0-9][0-9])\n\t\tbasic_machine=hppa1.0-hp\n\t\t;;\n\thppa-next)\n\t\tos=-nextstep3\n\t\t;;\n\thppaosf)\n\t\tbasic_machine=hppa1.1-hp\n\t\tos=-osf\n\t\t;;\n\thppro)\n\t\tbasic_machine=hppa1.1-hp\n\t\tos=-proelf\n\t\t;;\n\ti370-ibm* | ibm*)\n\t\tbasic_machine=i370-ibm\n\t\t;;\n\ti*86v32)\n\t\tbasic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`\n\t\tos=-sysv32\n\t\t;;\n\ti*86v4*)\n\t\tbasic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`\n\t\tos=-sysv4\n\t\t;;\n\ti*86v)\n\t\tbasic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`\n\t\tos=-sysv\n\t\t;;\n\ti*86sol2)\n\t\tbasic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`\n\t\tos=-solaris2\n\t\t;;\n\ti386mach)\n\t\tbasic_machine=i386-mach\n\t\tos=-mach\n\t\t;;\n\ti386-vsta | vsta)\n\t\tbasic_machine=i386-unknown\n\t\tos=-vsta\n\t\t;;\n\tiris | iris4d)\n\t\tbasic_machine=mips-sgi\n\t\tcase $os in\n\t\t    -irix*)\n\t\t\t;;\n\t\t    *)\n\t\t\tos=-irix4\n\t\t\t;;\n\t\tesac\n\t\t;;\n\tisi68 | isi)\n\t\tbasic_machine=m68k-isi\n\t\tos=-sysv\n\t\t;;\n\tleon-*|leon[3-9]-*)\n\t        basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'`\n\t        ;;\n\tm68knommu)\n\t\tbasic_machine=m68k-unknown\n\t\tos=-linux\n\t\t;;\n\tm68knommu-*)\n\t\tbasic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\tos=-linux\n\t\t;;\n\tm88k-omron*)\n\t\tbasic_machine=m88k-omron\n\t\t;;\n\tmagnum | m3230)\n\t\tbasic_machine=mips-mips\n\t\tos=-sysv\n\t\t;;\n\tmerlin)\n\t\tbasic_machine=ns32k-utek\n\t\tos=-sysv\n\t\t;;\n\tmicroblaze*)\n\t\tbasic_machine=microblaze-xilinx\n\t\t;;\n\tmingw64)\n\t\tbasic_machine=x86_64-pc\n\t\tos=-mingw64\n\t\t;;\n\tmingw32)\n\t\tbasic_machine=i686-pc\n\t\tos=-mingw32\n\t\t;;\n\tmingw32ce)\n\t\tbasic_machine=arm-unknown\n\t\tos=-mingw32ce\n\t\t;;\n\tminiframe)\n\t\tbasic_machine=m68000-convergent\n\t\t;;\n\t*mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)\n\t\tbasic_machine=m68k-atari\n\t\tos=-mint\n\t\t;;\n\tmips3*-*)\n\t\tbasic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`\n\t\t;;\n\tmips3*)\n\t\tbasic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown\n\t\t;;\n\tmonitor)\n\t\tbasic_machine=m68k-rom68k\n\t\tos=-coff\n\t\t;;\n\tmorphos)\n\t\tbasic_machine=powerpc-unknown\n\t\tos=-morphos\n\t\t;;\n\tmoxiebox)\n\t        basic_machine=moxie-unknown\n\t        os=-moxiebox\n\t        ;;\n\tmsdos)\n\t\tbasic_machine=i686-pc\n\t\tos=-msdos\n\t\t;;\n\tms1-*)\n\t\tbasic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`\n\t\t;;\n\tmsys)\n\t\tbasic_machine=i686-pc\n\t\tos=-msys\n\t\t;;\n\tmvs)\n\t\tbasic_machine=i370-ibm\n\t\tos=-mvs\n\t\t;;\n\tnacl)\n\t\tbasic_machine=le32-unknown\n\t\tos=-nacl\n\t\t;;\n\tncr3000)\n\t\tbasic_machine=i486-ncr\n\t\tos=-sysv4\n\t\t;;\n\tnetbsd386)\n\t\tbasic_machine=i386-unknown\n\t\tos=-netbsd\n\t\t;;\n\tnetwinder)\n\t\tbasic_machine=armv4l-rebel\n\t\tos=-linux\n\t\t;;\n\tnews | news700 | news800 | news900)\n\t\tbasic_machine=m68k-sony\n\t\tos=-newsos\n\t\t;;\n\tnews1000)\n\t\tbasic_machine=m68030-sony\n\t\tos=-newsos\n\t\t;;\n\tnews-3600 | risc-news)\n\t\tbasic_machine=mips-sony\n\t\tos=-newsos\n\t\t;;\n\tnecv70)\n\t\tbasic_machine=v70-nec\n\t\tos=-sysv\n\t\t;;\n\tnext | m*-next )\n\t\tbasic_machine=m68k-next\n\t\tcase $os in\n\t\t    -nextstep* )\n\t\t\t;;\n\t\t    -ns2*)\n\t\t      os=-nextstep2\n\t\t\t;;\n\t\t    *)\n\t\t      os=-nextstep3\n\t\t\t;;\n\t\tesac\n\t\t;;\n\tnh3000)\n\t\tbasic_machine=m68k-harris\n\t\tos=-cxux\n\t\t;;\n\tnh[45]000)\n\t\tbasic_machine=m88k-harris\n\t\tos=-cxux\n\t\t;;\n\tnindy960)\n\t\tbasic_machine=i960-intel\n\t\tos=-nindy\n\t\t;;\n\tmon960)\n\t\tbasic_machine=i960-intel\n\t\tos=-mon960\n\t\t;;\n\tnonstopux)\n\t\tbasic_machine=mips-compaq\n\t\tos=-nonstopux\n\t\t;;\n\tnp1)\n\t\tbasic_machine=np1-gould\n\t\t;;\n\tneo-tandem)\n\t\tbasic_machine=neo-tandem\n\t\t;;\n\tnse-tandem)\n\t\tbasic_machine=nse-tandem\n\t\t;;\n\tnsr-tandem)\n\t\tbasic_machine=nsr-tandem\n\t\t;;\n\top50n-* | op60c-*)\n\t\tbasic_machine=hppa1.1-oki\n\t\tos=-proelf\n\t\t;;\n\topenrisc | openrisc-*)\n\t\tbasic_machine=or32-unknown\n\t\t;;\n\tos400)\n\t\tbasic_machine=powerpc-ibm\n\t\tos=-os400\n\t\t;;\n\tOSE68000 | ose68000)\n\t\tbasic_machine=m68000-ericsson\n\t\tos=-ose\n\t\t;;\n\tos68k)\n\t\tbasic_machine=m68k-none\n\t\tos=-os68k\n\t\t;;\n\tpa-hitachi)\n\t\tbasic_machine=hppa1.1-hitachi\n\t\tos=-hiuxwe2\n\t\t;;\n\tparagon)\n\t\tbasic_machine=i860-intel\n\t\tos=-osf\n\t\t;;\n\tparisc)\n\t\tbasic_machine=hppa-unknown\n\t\tos=-linux\n\t\t;;\n\tparisc-*)\n\t\tbasic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\tos=-linux\n\t\t;;\n\tpbd)\n\t\tbasic_machine=sparc-tti\n\t\t;;\n\tpbb)\n\t\tbasic_machine=m68k-tti\n\t\t;;\n\tpc532 | pc532-*)\n\t\tbasic_machine=ns32k-pc532\n\t\t;;\n\tpc98)\n\t\tbasic_machine=i386-pc\n\t\t;;\n\tpc98-*)\n\t\tbasic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tpentium | p5 | k5 | k6 | nexgen | viac3)\n\t\tbasic_machine=i586-pc\n\t\t;;\n\tpentiumpro | p6 | 6x86 | athlon | athlon_*)\n\t\tbasic_machine=i686-pc\n\t\t;;\n\tpentiumii | pentium2 | pentiumiii | pentium3)\n\t\tbasic_machine=i686-pc\n\t\t;;\n\tpentium4)\n\t\tbasic_machine=i786-pc\n\t\t;;\n\tpentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)\n\t\tbasic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tpentiumpro-* | p6-* | 6x86-* | athlon-*)\n\t\tbasic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tpentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)\n\t\tbasic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tpentium4-*)\n\t\tbasic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tpn)\n\t\tbasic_machine=pn-gould\n\t\t;;\n\tpower)\tbasic_machine=power-ibm\n\t\t;;\n\tppc | ppcbe)\tbasic_machine=powerpc-unknown\n\t\t;;\n\tppc-* | ppcbe-*)\n\t\tbasic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tppcle | powerpclittle | ppc-le | powerpc-little)\n\t\tbasic_machine=powerpcle-unknown\n\t\t;;\n\tppcle-* | powerpclittle-*)\n\t\tbasic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tppc64)\tbasic_machine=powerpc64-unknown\n\t\t;;\n\tppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tppc64le | powerpc64little | ppc64-le | powerpc64-little)\n\t\tbasic_machine=powerpc64le-unknown\n\t\t;;\n\tppc64le-* | powerpc64little-*)\n\t\tbasic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tps2)\n\t\tbasic_machine=i386-ibm\n\t\t;;\n\tpw32)\n\t\tbasic_machine=i586-unknown\n\t\tos=-pw32\n\t\t;;\n\trdos | rdos64)\n\t\tbasic_machine=x86_64-pc\n\t\tos=-rdos\n\t\t;;\n\trdos32)\n\t\tbasic_machine=i386-pc\n\t\tos=-rdos\n\t\t;;\n\trom68k)\n\t\tbasic_machine=m68k-rom68k\n\t\tos=-coff\n\t\t;;\n\trm[46]00)\n\t\tbasic_machine=mips-siemens\n\t\t;;\n\trtpc | rtpc-*)\n\t\tbasic_machine=romp-ibm\n\t\t;;\n\ts390 | s390-*)\n\t\tbasic_machine=s390-ibm\n\t\t;;\n\ts390x | s390x-*)\n\t\tbasic_machine=s390x-ibm\n\t\t;;\n\tsa29200)\n\t\tbasic_machine=a29k-amd\n\t\tos=-udi\n\t\t;;\n\tsb1)\n\t\tbasic_machine=mipsisa64sb1-unknown\n\t\t;;\n\tsb1el)\n\t\tbasic_machine=mipsisa64sb1el-unknown\n\t\t;;\n\tsde)\n\t\tbasic_machine=mipsisa32-sde\n\t\tos=-elf\n\t\t;;\n\tsei)\n\t\tbasic_machine=mips-sei\n\t\tos=-seiux\n\t\t;;\n\tsequent)\n\t\tbasic_machine=i386-sequent\n\t\t;;\n\tsh)\n\t\tbasic_machine=sh-hitachi\n\t\tos=-hms\n\t\t;;\n\tsh5el)\n\t\tbasic_machine=sh5le-unknown\n\t\t;;\n\tsh64)\n\t\tbasic_machine=sh64-unknown\n\t\t;;\n\tsparclite-wrs | simso-wrs)\n\t\tbasic_machine=sparclite-wrs\n\t\tos=-vxworks\n\t\t;;\n\tsps7)\n\t\tbasic_machine=m68k-bull\n\t\tos=-sysv2\n\t\t;;\n\tspur)\n\t\tbasic_machine=spur-unknown\n\t\t;;\n\tst2000)\n\t\tbasic_machine=m68k-tandem\n\t\t;;\n\tstratus)\n\t\tbasic_machine=i860-stratus\n\t\tos=-sysv4\n\t\t;;\n\tstrongarm-* | thumb-*)\n\t\tbasic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`\n\t\t;;\n\tsun2)\n\t\tbasic_machine=m68000-sun\n\t\t;;\n\tsun2os3)\n\t\tbasic_machine=m68000-sun\n\t\tos=-sunos3\n\t\t;;\n\tsun2os4)\n\t\tbasic_machine=m68000-sun\n\t\tos=-sunos4\n\t\t;;\n\tsun3os3)\n\t\tbasic_machine=m68k-sun\n\t\tos=-sunos3\n\t\t;;\n\tsun3os4)\n\t\tbasic_machine=m68k-sun\n\t\tos=-sunos4\n\t\t;;\n\tsun4os3)\n\t\tbasic_machine=sparc-sun\n\t\tos=-sunos3\n\t\t;;\n\tsun4os4)\n\t\tbasic_machine=sparc-sun\n\t\tos=-sunos4\n\t\t;;\n\tsun4sol2)\n\t\tbasic_machine=sparc-sun\n\t\tos=-solaris2\n\t\t;;\n\tsun3 | sun3-*)\n\t\tbasic_machine=m68k-sun\n\t\t;;\n\tsun4)\n\t\tbasic_machine=sparc-sun\n\t\t;;\n\tsun386 | sun386i | roadrunner)\n\t\tbasic_machine=i386-sun\n\t\t;;\n\tsv1)\n\t\tbasic_machine=sv1-cray\n\t\tos=-unicos\n\t\t;;\n\tsymmetry)\n\t\tbasic_machine=i386-sequent\n\t\tos=-dynix\n\t\t;;\n\tt3e)\n\t\tbasic_machine=alphaev5-cray\n\t\tos=-unicos\n\t\t;;\n\tt90)\n\t\tbasic_machine=t90-cray\n\t\tos=-unicos\n\t\t;;\n\ttile*)\n\t\tbasic_machine=$basic_machine-unknown\n\t\tos=-linux-gnu\n\t\t;;\n\ttx39)\n\t\tbasic_machine=mipstx39-unknown\n\t\t;;\n\ttx39el)\n\t\tbasic_machine=mipstx39el-unknown\n\t\t;;\n\ttoad1)\n\t\tbasic_machine=pdp10-xkl\n\t\tos=-tops20\n\t\t;;\n\ttower | tower-32)\n\t\tbasic_machine=m68k-ncr\n\t\t;;\n\ttpf)\n\t\tbasic_machine=s390x-ibm\n\t\tos=-tpf\n\t\t;;\n\tudi29k)\n\t\tbasic_machine=a29k-amd\n\t\tos=-udi\n\t\t;;\n\tultra3)\n\t\tbasic_machine=a29k-nyu\n\t\tos=-sym1\n\t\t;;\n\tv810 | necv810)\n\t\tbasic_machine=v810-nec\n\t\tos=-none\n\t\t;;\n\tvaxv)\n\t\tbasic_machine=vax-dec\n\t\tos=-sysv\n\t\t;;\n\tvms)\n\t\tbasic_machine=vax-dec\n\t\tos=-vms\n\t\t;;\n\tvpp*|vx|vx-*)\n\t\tbasic_machine=f301-fujitsu\n\t\t;;\n\tvxworks960)\n\t\tbasic_machine=i960-wrs\n\t\tos=-vxworks\n\t\t;;\n\tvxworks68)\n\t\tbasic_machine=m68k-wrs\n\t\tos=-vxworks\n\t\t;;\n\tvxworks29k)\n\t\tbasic_machine=a29k-wrs\n\t\tos=-vxworks\n\t\t;;\n\tw65*)\n\t\tbasic_machine=w65-wdc\n\t\tos=-none\n\t\t;;\n\tw89k-*)\n\t\tbasic_machine=hppa1.1-winbond\n\t\tos=-proelf\n\t\t;;\n\txbox)\n\t\tbasic_machine=i686-pc\n\t\tos=-mingw32\n\t\t;;\n\txps | xps100)\n\t\tbasic_machine=xps100-honeywell\n\t\t;;\n\txscale-* | xscalee[bl]-*)\n\t\tbasic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`\n\t\t;;\n\tymp)\n\t\tbasic_machine=ymp-cray\n\t\tos=-unicos\n\t\t;;\n\tz8k-*-coff)\n\t\tbasic_machine=z8k-unknown\n\t\tos=-sim\n\t\t;;\n\tz80-*-coff)\n\t\tbasic_machine=z80-unknown\n\t\tos=-sim\n\t\t;;\n\tnone)\n\t\tbasic_machine=none-none\n\t\tos=-none\n\t\t;;\n\n# Here we handle the default manufacturer of certain CPU types.  It is in\n# some cases the only manufacturer, in others, it is the most popular.\n\tw89k)\n\t\tbasic_machine=hppa1.1-winbond\n\t\t;;\n\top50n)\n\t\tbasic_machine=hppa1.1-oki\n\t\t;;\n\top60c)\n\t\tbasic_machine=hppa1.1-oki\n\t\t;;\n\tromp)\n\t\tbasic_machine=romp-ibm\n\t\t;;\n\tmmix)\n\t\tbasic_machine=mmix-knuth\n\t\t;;\n\trs6000)\n\t\tbasic_machine=rs6000-ibm\n\t\t;;\n\tvax)\n\t\tbasic_machine=vax-dec\n\t\t;;\n\tpdp10)\n\t\t# there are many clones, so DEC is not a safe bet\n\t\tbasic_machine=pdp10-unknown\n\t\t;;\n\tpdp11)\n\t\tbasic_machine=pdp11-dec\n\t\t;;\n\twe32k)\n\t\tbasic_machine=we32k-att\n\t\t;;\n\tsh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)\n\t\tbasic_machine=sh-unknown\n\t\t;;\n\tsparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)\n\t\tbasic_machine=sparc-sun\n\t\t;;\n\tcydra)\n\t\tbasic_machine=cydra-cydrome\n\t\t;;\n\torion)\n\t\tbasic_machine=orion-highlevel\n\t\t;;\n\torion105)\n\t\tbasic_machine=clipper-highlevel\n\t\t;;\n\tmac | mpw | mac-mpw)\n\t\tbasic_machine=m68k-apple\n\t\t;;\n\tpmac | pmac-mpw)\n\t\tbasic_machine=powerpc-apple\n\t\t;;\n\t*-unknown | *-pc | *-apple | *-w64)\n\t\t# Make sure to match an already-canonicalized machine name.\n\t\t;;\n\t*)\n\t\techo Invalid configuration \\`$1\\': machine \\`$basic_machine\\' not recognized 1>&2\n\t\texit 1\n\t\t;;\nesac\n\n# Here we canonicalize certain aliases for manufacturers.\ncase $basic_machine in\n\t*-digital*)\n\t\tbasic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`\n\t\t;;\n\t*-commodore*)\n\t\tbasic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`\n\t\t;;\n\t*)\n\t\t;;\nesac\n\n# Decode manufacturer-specific aliases for certain operating systems.\n\nif [ x\"$os\" != x\"\" ]\nthen\ncase $os in\n\t# First match some system type aliases\n\t# that might get confused with valid system types.\n\t# -solaris* is a basic system type, with this one exception.\n\t-auroraux)\n\t\tos=-auroraux\n\t\t;;\n\t-solaris1 | -solaris1.*)\n\t\tos=`echo $os | sed -e 's|solaris1|sunos4|'`\n\t\t;;\n\t-solaris)\n\t\tos=-solaris2\n\t\t;;\n\t-svr4*)\n\t\tos=-sysv4\n\t\t;;\n\t-unixware*)\n\t\tos=-sysv4.2uw\n\t\t;;\n\t-gnu/linux*)\n\t\tos=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`\n\t\t;;\n\t# First accept the basic system types.\n\t# The portable systems comes first.\n\t# Each alternative MUST END IN A *, to match a version number.\n\t# -sysv* is not here because it comes later, after sysvr4.\n\t-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \\\n\t      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\\\n\t      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \\\n\t      | -sym* | -kopensolaris* | -plan9* \\\n\t      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \\\n\t      | -aos* | -aros* | -cloudabi* \\\n\t      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \\\n\t      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \\\n\t      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \\\n\t      | -bitrig* | -openbsd* | -solidbsd* \\\n\t      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \\\n\t      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \\\n\t      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \\\n\t      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \\\n\t      | -chorusos* | -chorusrdb* | -cegcc* \\\n\t      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \\\n\t      | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \\\n\t      | -linux-newlib* | -linux-musl* | -linux-uclibc* \\\n\t      | -uxpv* | -beos* | -mpeix* | -udk* | -moviebox* \\\n\t      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \\\n\t      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \\\n\t      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \\\n\t      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \\\n\t      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \\\n\t      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \\\n\t      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* | -tirtos*)\n\t# Remember, each alternative MUST END IN *, to match a version number.\n\t\t;;\n\t-qnx*)\n\t\tcase $basic_machine in\n\t\t    x86-* | i*86-*)\n\t\t\t;;\n\t\t    *)\n\t\t\tos=-nto$os\n\t\t\t;;\n\t\tesac\n\t\t;;\n\t-nto-qnx*)\n\t\t;;\n\t-nto*)\n\t\tos=`echo $os | sed -e 's|nto|nto-qnx|'`\n\t\t;;\n\t-sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \\\n\t      | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \\\n\t      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)\n\t\t;;\n\t-mac*)\n\t\tos=`echo $os | sed -e 's|mac|macos|'`\n\t\t;;\n\t-linux-dietlibc)\n\t\tos=-linux-dietlibc\n\t\t;;\n\t-linux*)\n\t\tos=`echo $os | sed -e 's|linux|linux-gnu|'`\n\t\t;;\n\t-sunos5*)\n\t\tos=`echo $os | sed -e 's|sunos5|solaris2|'`\n\t\t;;\n\t-sunos6*)\n\t\tos=`echo $os | sed -e 's|sunos6|solaris3|'`\n\t\t;;\n\t-opened*)\n\t\tos=-openedition\n\t\t;;\n\t-os400*)\n\t\tos=-os400\n\t\t;;\n\t-wince*)\n\t\tos=-wince\n\t\t;;\n\t-osfrose*)\n\t\tos=-osfrose\n\t\t;;\n\t-osf*)\n\t\tos=-osf\n\t\t;;\n\t-utek*)\n\t\tos=-bsd\n\t\t;;\n\t-dynix*)\n\t\tos=-bsd\n\t\t;;\n\t-acis*)\n\t\tos=-aos\n\t\t;;\n\t-atheos*)\n\t\tos=-atheos\n\t\t;;\n\t-syllable*)\n\t\tos=-syllable\n\t\t;;\n\t-386bsd)\n\t\tos=-bsd\n\t\t;;\n\t-ctix* | -uts*)\n\t\tos=-sysv\n\t\t;;\n\t-nova*)\n\t\tos=-rtmk-nova\n\t\t;;\n\t-ns2 )\n\t\tos=-nextstep2\n\t\t;;\n\t-nsk*)\n\t\tos=-nsk\n\t\t;;\n\t# Preserve the version number of sinix5.\n\t-sinix5.*)\n\t\tos=`echo $os | sed -e 's|sinix|sysv|'`\n\t\t;;\n\t-sinix*)\n\t\tos=-sysv4\n\t\t;;\n\t-tpf*)\n\t\tos=-tpf\n\t\t;;\n\t-triton*)\n\t\tos=-sysv3\n\t\t;;\n\t-oss*)\n\t\tos=-sysv3\n\t\t;;\n\t-svr4)\n\t\tos=-sysv4\n\t\t;;\n\t-svr3)\n\t\tos=-sysv3\n\t\t;;\n\t-sysvr4)\n\t\tos=-sysv4\n\t\t;;\n\t# This must come after -sysvr4.\n\t-sysv*)\n\t\t;;\n\t-ose*)\n\t\tos=-ose\n\t\t;;\n\t-es1800*)\n\t\tos=-ose\n\t\t;;\n\t-xenix)\n\t\tos=-xenix\n\t\t;;\n\t-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)\n\t\tos=-mint\n\t\t;;\n\t-aros*)\n\t\tos=-aros\n\t\t;;\n\t-zvmoe)\n\t\tos=-zvmoe\n\t\t;;\n\t-dicos*)\n\t\tos=-dicos\n\t\t;;\n\t-nacl*)\n\t\t;;\n\t-none)\n\t\t;;\n\t*)\n\t\t# Get rid of the `-' at the beginning of $os.\n\t\tos=`echo $os | sed 's/[^-]*-//'`\n\t\techo Invalid configuration \\`$1\\': system \\`$os\\' not recognized 1>&2\n\t\texit 1\n\t\t;;\nesac\nelse\n\n# Here we handle the default operating systems that come with various machines.\n# The value should be what the vendor currently ships out the door with their\n# machine or put another way, the most popular os provided with the machine.\n\n# Note that if you're going to try to match \"-MANUFACTURER\" here (say,\n# \"-sun\"), then you have to tell the case statement up towards the top\n# that MANUFACTURER isn't an operating system.  Otherwise, code above\n# will signal an error saying that MANUFACTURER isn't an operating\n# system, and we'll never get to this point.\n\ncase $basic_machine in\n\tscore-*)\n\t\tos=-elf\n\t\t;;\n\tspu-*)\n\t\tos=-elf\n\t\t;;\n\t*-acorn)\n\t\tos=-riscix1.2\n\t\t;;\n\tarm*-rebel)\n\t\tos=-linux\n\t\t;;\n\tarm*-semi)\n\t\tos=-aout\n\t\t;;\n\tc4x-* | tic4x-*)\n\t\tos=-coff\n\t\t;;\n\tc8051-*)\n\t        os=-elf\n\t        ;;\n\thexagon-*)\n\t\tos=-elf\n\t\t;;\n\ttic54x-*)\n\t\tos=-coff\n\t\t;;\n\ttic55x-*)\n\t\tos=-coff\n\t\t;;\n\ttic6x-*)\n\t\tos=-coff\n\t\t;;\n\t# This must come before the *-dec entry.\n\tpdp10-*)\n\t\tos=-tops20\n\t\t;;\n\tpdp11-*)\n\t\tos=-none\n\t\t;;\n\t*-dec | vax-*)\n\t\tos=-ultrix4.2\n\t\t;;\n\tm68*-apollo)\n\t\tos=-domain\n\t\t;;\n\ti386-sun)\n\t\tos=-sunos4.0.2\n\t\t;;\n\tm68000-sun)\n\t\tos=-sunos3\n\t\t;;\n\tm68*-cisco)\n\t\tos=-aout\n\t\t;;\n\tmep-*)\n\t\tos=-elf\n\t\t;;\n\tmips*-cisco)\n\t\tos=-elf\n\t\t;;\n\tmips*-*)\n\t\tos=-elf\n\t\t;;\n\tor32-*)\n\t\tos=-coff\n\t\t;;\n\t*-tti)\t# must be before sparc entry or we get the wrong os.\n\t\tos=-sysv3\n\t\t;;\n\tsparc-* | *-sun)\n\t\tos=-sunos4.1.1\n\t\t;;\n\t*-be)\n\t\tos=-beos\n\t\t;;\n\t*-haiku)\n\t\tos=-haiku\n\t\t;;\n\t*-ibm)\n\t\tos=-aix\n\t\t;;\n\t*-knuth)\n\t\tos=-mmixware\n\t\t;;\n\t*-wec)\n\t\tos=-proelf\n\t\t;;\n\t*-winbond)\n\t\tos=-proelf\n\t\t;;\n\t*-oki)\n\t\tos=-proelf\n\t\t;;\n\t*-hp)\n\t\tos=-hpux\n\t\t;;\n\t*-hitachi)\n\t\tos=-hiux\n\t\t;;\n\ti860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)\n\t\tos=-sysv\n\t\t;;\n\t*-cbm)\n\t\tos=-amigaos\n\t\t;;\n\t*-dg)\n\t\tos=-dgux\n\t\t;;\n\t*-dolphin)\n\t\tos=-sysv3\n\t\t;;\n\tm68k-ccur)\n\t\tos=-rtu\n\t\t;;\n\tm88k-omron*)\n\t\tos=-luna\n\t\t;;\n\t*-next )\n\t\tos=-nextstep\n\t\t;;\n\t*-sequent)\n\t\tos=-ptx\n\t\t;;\n\t*-crds)\n\t\tos=-unos\n\t\t;;\n\t*-ns)\n\t\tos=-genix\n\t\t;;\n\ti370-*)\n\t\tos=-mvs\n\t\t;;\n\t*-next)\n\t\tos=-nextstep3\n\t\t;;\n\t*-gould)\n\t\tos=-sysv\n\t\t;;\n\t*-highlevel)\n\t\tos=-bsd\n\t\t;;\n\t*-encore)\n\t\tos=-bsd\n\t\t;;\n\t*-sgi)\n\t\tos=-irix\n\t\t;;\n\t*-siemens)\n\t\tos=-sysv4\n\t\t;;\n\t*-masscomp)\n\t\tos=-rtu\n\t\t;;\n\tf30[01]-fujitsu | f700-fujitsu)\n\t\tos=-uxpv\n\t\t;;\n\t*-rom68k)\n\t\tos=-coff\n\t\t;;\n\t*-*bug)\n\t\tos=-coff\n\t\t;;\n\t*-apple)\n\t\tos=-macos\n\t\t;;\n\t*-atari*)\n\t\tos=-mint\n\t\t;;\n\t*)\n\t\tos=-none\n\t\t;;\nesac\nfi\n\n# Here we handle the case where we know the os, and the CPU type, but not the\n# manufacturer.  We pick the logical manufacturer.\nvendor=unknown\ncase $basic_machine in\n\t*-unknown)\n\t\tcase $os in\n\t\t\t-riscix*)\n\t\t\t\tvendor=acorn\n\t\t\t\t;;\n\t\t\t-sunos*)\n\t\t\t\tvendor=sun\n\t\t\t\t;;\n\t\t\t-cnk*|-aix*)\n\t\t\t\tvendor=ibm\n\t\t\t\t;;\n\t\t\t-beos*)\n\t\t\t\tvendor=be\n\t\t\t\t;;\n\t\t\t-hpux*)\n\t\t\t\tvendor=hp\n\t\t\t\t;;\n\t\t\t-mpeix*)\n\t\t\t\tvendor=hp\n\t\t\t\t;;\n\t\t\t-hiux*)\n\t\t\t\tvendor=hitachi\n\t\t\t\t;;\n\t\t\t-unos*)\n\t\t\t\tvendor=crds\n\t\t\t\t;;\n\t\t\t-dgux*)\n\t\t\t\tvendor=dg\n\t\t\t\t;;\n\t\t\t-luna*)\n\t\t\t\tvendor=omron\n\t\t\t\t;;\n\t\t\t-genix*)\n\t\t\t\tvendor=ns\n\t\t\t\t;;\n\t\t\t-mvs* | -opened*)\n\t\t\t\tvendor=ibm\n\t\t\t\t;;\n\t\t\t-os400*)\n\t\t\t\tvendor=ibm\n\t\t\t\t;;\n\t\t\t-ptx*)\n\t\t\t\tvendor=sequent\n\t\t\t\t;;\n\t\t\t-tpf*)\n\t\t\t\tvendor=ibm\n\t\t\t\t;;\n\t\t\t-vxsim* | -vxworks* | -windiss*)\n\t\t\t\tvendor=wrs\n\t\t\t\t;;\n\t\t\t-aux*)\n\t\t\t\tvendor=apple\n\t\t\t\t;;\n\t\t\t-hms*)\n\t\t\t\tvendor=hitachi\n\t\t\t\t;;\n\t\t\t-mpw* | -macos*)\n\t\t\t\tvendor=apple\n\t\t\t\t;;\n\t\t\t-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)\n\t\t\t\tvendor=atari\n\t\t\t\t;;\n\t\t\t-vos*)\n\t\t\t\tvendor=stratus\n\t\t\t\t;;\n\t\tesac\n\t\tbasic_machine=`echo $basic_machine | sed \"s/unknown/$vendor/\"`\n\t\t;;\nesac\n\necho $basic_machine$os\nexit\n\n# Local variables:\n# eval: (add-hook 'write-file-hooks 'time-stamp)\n# time-stamp-start: \"timestamp='\"\n# time-stamp-format: \"%:y-%02m-%02d\"\n# time-stamp-end: \"'\"\n# End:\n"
  },
  {
    "path": "configure.ac",
    "content": "dnl  Process this file with autoconf to produce a configure script.\n\ndefine(GMP_COPYRIGHT,[[\n\nCopyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006\nFree Software Foundation, Inc.\n\nCopyright 2008 William Hart.\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n]])\n\nAC_COPYRIGHT(GMP_COPYRIGHT)\nAH_TOP(/*GMP_COPYRIGHT*/)\n\nAC_REVISION($Revision: 1.11 $)\nAC_PREREQ(2.68)\nAC_INIT(MPIR, MPIR_VERSION, http://groups.google.co.uk/group/mpir-devel/, mpir)\nAC_CONFIG_SRCDIR(gmp-impl.h)\nm4_pattern_forbid([^[ \\t]*GMP_])\nm4_pattern_allow(GMP_LDFLAGS)\nm4_pattern_allow(GMP_LIMB_BITS)\nm4_pattern_allow(GMP_MPARAM_H_SUGGEST)\nm4_pattern_allow(GMP_NAIL_BITS)\nm4_pattern_allow(GMP_NUMB_BITS)\nm4_pattern_allow(WANT_GMP_CC)\n\n# If --target is not used then $target_alias is empty, but if say\n# \"./configure athlon-pc-freebsd3.5\" is used, then all three of\n# $build_alias, $host_alias and $target_alias are set to\n# \"athlon-pc-freebsd3.5\".\n#\nif test -n \"$target_alias\" && test \"$target_alias\" != \"$host_alias\"; then\n  AC_MSG_ERROR([--target is not appropriate for GMP\nUse --build=CPU-VENDOR-OS if you need to specify your CPU and/or system\nexplicitly.  Use --host if cross-compiling (see \"Installing GMP\" in the\nmanual for more on this).])\nfi\n\nGMP_INIT(config.m4)\n\nAC_CANONICAL_HOST\n\ndnl  Automake \"no-dependencies\" is used because include file dependencies\ndnl  are not useful to us.  Pretty much everything depends just on mpir.h,\ndnl  gmp-impl.h and longlong.h, and yet only rarely does everything need to\ndnl  be rebuilt for changes to those files.\ndnl\ndnl  Note that there's a copy of these options in the top-level Makefile.am,\ndnl  so update there too if changing anything.\ndnl\nAM_INIT_AUTOMAKE([1.11 gnu subdir-objects no-dependencies parallel-tests no-dist-gzip dist-bzip2 dist-zip dist-lzip])\nAC_CONFIG_HEADERS(config.h:config.in)\nAM_MAINTAINER_MODE\n\ndnl  Yasm configuration\nAC_ARG_WITH([yasm],\n[AC_HELP_STRING([--with-yasm],[use a custom Yasm])],\n[],\n[]\n)\n\nAC_ARG_ENABLE(assert,\nAC_HELP_STRING([--enable-assert],[enable ASSERT checking [[default=no]]]),\n[case $enableval in\nyes|no) ;;\n*) AC_MSG_ERROR([bad value $enableval for --enable-assert, need yes or no]) ;;\nesac],\n[enable_assert=no])\n\nif test \"$enable_assert\" = \"yes\"; then\n  AC_DEFINE(WANT_ASSERT,1,\n  [Define to 1 to enable ASSERT checking, per --enable-assert])\n  want_assert_01=1\nelse\n  want_assert_01=0\nfi\nGMP_DEFINE_RAW([\"define(<WANT_ASSERT>,$want_assert_01)\"])\n\n\nAC_ARG_ENABLE(alloca,\nAC_HELP_STRING([--enable-alloca],[how to get temp memory [[default=reentrant]]]),\n[case $enableval in\nalloca|malloc-reentrant|malloc-notreentrant) ;;\nyes|no|reentrant|notreentrant) ;;\ndebug) ;;\n*)\n  AC_MSG_ERROR([bad value $enableval for --enable-alloca, need one of:\nyes no reentrant notreentrant alloca malloc-reentrant malloc-notreentrant debug]) ;;\nesac],\n[enable_alloca=reentrant])\n\n\n# IMPROVE ME: The default for C++ is disabled.  The tests currently\n# performed below for a working C++ compiler are not particularly strong,\n# and in general can't be expected to get the right setup on their own.  The\n# most significant problem is getting the ABI the same.  Defaulting CXXFLAGS\n# to CFLAGS takes only a small step towards this.  It's also probably worth\n# worrying whether the C and C++ runtimes from say gcc and a vendor C++ can\n# work together.  Some rather broken C++ installations were encountered\n# during testing, and though such things clearly aren't GMP's problem, if\n# --enable-cxx=detect were to be the default then some careful checks of\n# which, if any, C++ compiler on the system is up to scratch would be\n# wanted.\n#\nAC_ARG_ENABLE(cxx,\nAC_HELP_STRING([--enable-cxx],[enable C++ support [[default=no]]]),\n[case $enableval in\nyes|no|detect) ;;\n*) AC_MSG_ERROR([bad value $enableval for --enable-cxx, need yes/no/detect]) ;;\nesac],\n[enable_cxx=no])\n\n\nAC_ARG_ENABLE(fft,\nAC_HELP_STRING([--enable-fft],[enable FFTs for multiplication [[default=yes]]]),\n[case $enableval in\nyes|no) ;;\n*) AC_MSG_ERROR([bad value $enableval for --enable-fft, need yes or no]) ;;\nesac],\n[enable_fft=yes])\n\nif test \"$enable_fft\" = \"yes\"; then\n  AC_DEFINE(WANT_FFT,1,\n  [Define to 1 to enable FFTs for multiplication, per --enable-fft])\nfi\n\nAC_ARG_ENABLE(gmpcompat,\nAC_HELP_STRING([--enable-gmpcompat],\n               [create library and header files named gmp as well as mpir [[default=no]]]),\n[case $enableval in\nyes|no) ;;\n*) AC_MSG_ERROR([bad value $enableval for --enable-gmpcompat, need yes or no]) ;;\nesac],\n[enable_gmpcompat=no])\nAM_CONDITIONAL(WANT_GMPCOMPAT, test \"$enable_gmpcompat\" = \"yes\")\n\nAC_ARG_ENABLE(nails,\nAC_HELP_STRING([--enable-nails],[use nails on limbs [[default=no]]]),\n[case $enableval in\n[no|[0]]) ;;\n*) AC_MSG_ERROR([bad value $enableval for --enable-nails, Sorry nails not available in this build]) ;;\nesac],\n[enable_nails=no])\n\ncase $enable_nails in\nyes) GMP_NAIL_BITS=2 ;;\nno)  GMP_NAIL_BITS=0 ;;\n*)   GMP_NAIL_BITS=$enable_nails ;;\nesac\nAC_SUBST(GMP_NAIL_BITS)\n\n\nAC_ARG_ENABLE(profiling,\nAC_HELP_STRING([--enable-profiling],\n               [build with profiler support [[default=no]]]),\n[case $enableval in\nno|prof|gprof|instrument) ;;\n*) AC_MSG_ERROR([bad value $enableval for --enable-profiling, need no/prof/gprof/instrument]) ;;\nesac],\n[enable_profiling=no])\n\ncase $enable_profiling in\n  prof)\n    AC_DEFINE(WANT_PROFILING_PROF, 1,\n              [Define to 1 if --enable-profiling=prof])\n    ;;\n  gprof)\n    AC_DEFINE(WANT_PROFILING_GPROF, 1,\n              [Define to 1 if --enable-profiling=gprof])\n    ;;\n  instrument)\n    AC_DEFINE(WANT_PROFILING_INSTRUMENT, 1,\n              [Define to 1 if --enable-profiling=instrument])\n    ;;\nesac\n\nGMP_DEFINE_RAW([\"define(<WANT_PROFILING>,<\\`$enable_profiling'>)\"])\n\n# -fomit-frame-pointer is incompatible with -pg on some chips\nif test \"$enable_profiling\" = gprof; then\n  fomit_frame_pointer=\nelse\n  fomit_frame_pointer=\"-fomit-frame-pointer\"\nfi\n\nAC_ARG_ENABLE(fat,\nAC_HELP_STRING([--enable-fat],\n               [build a fat binary on systems that support it [[default=no]]]),\n[case $enableval in\nyes|no) ;;\n*) AC_MSG_ERROR([bad value $enableval for --enable-fat, need yes or no]) ;;\nesac],\n[enable_fat=no])\n\n# Table of compilers, options, and mpn paths.  This code has various related\n# purposes\n#\n#   - better default CC/CFLAGS selections than autoconf otherwise gives\n#   - default CC/CFLAGS selections for extra CPU types specific to GMP\n#   - a few tests for known bad compilers\n#   - choice of ABIs on suitable systems\n#   - selection of corresponding mpn search path\n#\n# After GMP specific searches and tests, the standard autoconf AC_PROG_CC is\n# called.  User selections of CC etc are respected.\n#\n# Care is taken not to use macros like AC_TRY_COMPILE during the GMP\n# pre-testing, since they of course depend on AC_PROG_CC, and also some of\n# them cache their results, which is not wanted.\n#\n# The ABI selection mechanism is unique to GMP.  All that reaches autoconf\n# is a different selection of CC/CFLAGS according to the best ABI the system\n# supports, and/or what the user selects.  Naturally the mpn assembler code\n# selected is very dependent on the ABI.\n#\n# The closest the standard tools come to a notion of ABI is something like\n# \"sparc64\" which encodes a CPU and an ABI together.  This doesn't seem to\n# scale well for GMP, where exact CPU types like \"ultrasparc2\" are wanted,\n# separate from the ABI used on them.\n#\n#\n# The variables set here are\n#\n#   cclist              the compiler choices\n#   xx_cflags           flags for compiler xx\n#   xx_cflags_maybe     flags for compiler xx, if they work\n#   xx_cppflags         cpp flags for compiler xx\n#   xx_cflags_optlist   list of sets of optional flags\n#   xx_cflags_yyy       set yyy of optional flags for compiler xx\n#   xx_ldflags          -Wc,-foo flags for libtool linking with compiler xx\n#   ar_flags            extra flags for $AR\n#   nm_flags            extra flags for $NM\n#   limb                limb size, can be \"longlong\"\n#   path                mpn search path\n#   extra_functions     extra mpn functions\n#   fat_path            fat binary mpn search path [if fat binary desired]\n#   fat_functions       fat functions\n#   fat_thresholds      fat thresholds\n#\n# Suppose xx_cflags_optlist=\"arch\", then flags from $xx_cflags_arch are\n# tried, and the first flag that works will be used.  An optlist like \"arch\n# cpu optimize\" can be used to get multiple independent sets of flags tried.\n# The first that works from each will be used.  If no flag in a set works\n# then nothing from that set is added.\n#\n# For multiple ABIs, the scheme extends as follows.\n#\n#   abilist               set of ABI choices\n#   cclist_aa             compiler choices in ABI aa\n#   xx_aa_cflags          flags for xx in ABI aa\n#   xx_aa_cflags_maybe    flags for xx in ABI aa, if they work\n#   xx_aa_cppflags        cpp flags for xx in ABI aa\n#   xx_aa_cflags_optlist  list of sets of optional flags in ABI aa\n#   xx_aa_cflags_yyy      set yyy of optional flags for compiler xx in ABI aa\n#   xx_aa_ldflags         -Wc,-foo flags for libtool linking\n#   ar_aa_flags           extra flags for $AR in ABI aa\n#   nm_aa_flags           extra flags for $NM in ABI aa\n#   limb_aa               limb size in ABI aa, can be \"longlong\"\n#   path_aa               mpn search path in ABI aa\n#   extra_functions_aa    extra mpn functions in ABI aa\n#\n# As a convenience, the unadorned xx_cflags (etc) are used for the last ABI\n# in ablist, if an xx_aa_cflags for that ABI isn't given.  For example if\n# abilist=\"64 32\" then $cc_64_cflags will be used for the 64-bit ABI, but\n# for the 32-bit either $cc_32_cflags or $cc_cflags is used, whichever is\n# defined.  This makes it easy to add some 64-bit compilers and flags to an\n# unadorned 32-bit set.\n#\n# limb=longlong (or limb_aa=longlong) applies to all compilers within that\n# ABI.  It won't work to have some needing long long and some not, since a\n# single instantiated mpir.h will be used by both.\n#\n# SPEED_CYCLECOUNTER, cyclecounter_size and CALLING_CONVENTIONS_OBJS are\n# also set here, with an ABI suffix.\n#\n#\n#\n# A table-driven approach like this to mapping cpu type to good compiler\n# options is a bit of a maintenance burden, but there's not much uniformity\n# between options specifications on different compilers.  Some sort of\n# separately updatable tool might be cute.\n#\n# The use of lots of variables like this, direct and indirect, tends to\n# obscure when and how various things are done, but unfortunately it's\n# pretty much the only way.  If shell subroutines were portable then actual\n# code like \"if this .. do that\" could be written, but attempting the same\n# with full copies of GMP_PROG_CC_WORKS etc expanded at every point would\n# hugely bloat the output.\n\n\nAC_ARG_VAR(ABI, [desired ABI (for processors supporting more than one ABI)])\n\n# abilist needs to be non-empty, \"standard\" is just a generic name here\nabilist=\"standard\"\n\n# FIXME: We'd like to prefer an ANSI compiler, perhaps by preferring\n# c89 over cc here.  But note that on HP-UX c89 provides a castrated\n# environment, and would want to be excluded somehow.  Maybe\n# AC_PROG_CC_STDC already does enough to stick cc into ANSI mode and\n# we don't need to worry.\n#\ncclist=\"gcc cc\"\n\ngcc_cflags=\"-O2\"\ngcc_64_cflags=\"-O2\"\ncc_cflags=\"-O\"\ncc_64_cflags=\"-O\"\n\nSPEED_CYCLECOUNTER_OBJ=\ncyclecounter_size=2\n\ncase $host in\n\n  alpha*-*-*)\n    case $host_cpu in\n      alphaev5* | alphapca5*) path=\"alpha/ev5 alpha\" ;;\n      alphaev67 | alphaev68 | alphaev7*)\n        path=\"alpha/ev67 alpha/ev6 alpha/ev5 alpha\" ;;\n      alphaev6* | alphaev7*)  path=\"alpha/ev6 alpha/ev5 alpha\" ;;\n      *)                      path=\"alpha\" ;;\n    esac\n    extra_functions=\"cntlz\"\n    gcc_cflags_optlist=\"asm cpu\" # need asm ahead of cpu, see below\n\n    # gcc 2.7.2.3 doesn't know any -mcpu= for alpha, apparently.\n    # gcc 2.95 knows -mcpu= ev4, ev5, ev56, pca56, ev6.\n    # gcc 3.0 adds nothing.\n    # gcc 3.1 adds ev45, ev67 (but ev45 is the same as ev4).\n    # gcc 3.2 adds nothing.\n    #\n    # gcc version \"2.9-gnupro-99r1\" under \"-O2 -mcpu=ev6\" strikes internal\n    # compiler errors too easily and is rejected by GMP_PROG_CC_WORKS.  Each\n    # -mcpu=ev6 below has a fallback to -mcpu=ev56 for this reason.\n    #\n    case $host_cpu in\n      alpha)        gcc_cflags_cpu=\"-mcpu=ev4\" ;;\n      alphaev5)     gcc_cflags_cpu=\"-mcpu=ev5\" ;;\n      alphaev56)    gcc_cflags_cpu=\"-mcpu=ev56\" ;;\n      alphapca56 | alphapca57)\n                    gcc_cflags_cpu=\"-mcpu=pca56\" ;;\n      alphaev6)     gcc_cflags_cpu=\"-mcpu=ev6 -mcpu=ev56\" ;;\n      alphaev67 | alphaev68 | alphaev7*)\n                    gcc_cflags_cpu=\"-mcpu=ev67 -mcpu=ev6 -mcpu=ev56\" ;;\n    esac\n\n    # On old versions of gcc, which don't know -mcpu=, we believe an\n    # explicit -Wa,-mev5 etc will be necessary to put the assembler in\n    # the right mode for our .asm files and longlong.h asm blocks.\n    #\n    # On newer versions of gcc, when -mcpu= is known, we must give a -Wa\n    # which is at least as high as the code gcc will generate.  gcc\n    # establishes what it needs with a \".arch\" directive, our command line\n    # option seems to override that.\n    #\n    # gas prior to 2.14 doesn't accept -mev67, but -mev6 seems enough for\n    # ctlz and cttz (in 2.10.0 at least).\n    #\n    case $host_cpu in\n      alpha)        gcc_cflags_asm=\"-Wa,-arch,ev4 -Wa,-mev4\" ;;\n      alphaev5)     gcc_cflags_asm=\"-Wa,-arch,ev5 -Wa,-mev5\" ;;\n      alphaev56)    gcc_cflags_asm=\"-Wa,-arch,ev56 -Wa,-mev56\" ;;\n      alphapca56 | alphapca57)\n                    gcc_cflags_asm=\"-Wa,-arch,pca56 -Wa,-mpca56\" ;;\n      alphaev6)     gcc_cflags_asm=\"-Wa,-arch,ev6 -Wa,-mev6\" ;;\n      alphaev67 | alphaev68 | alphaev7*)\n                    gcc_cflags_asm=\"-Wa,-arch,ev67 -Wa,-mev67 -Wa,-arch,ev6 -Wa,-mev6\" ;;\n    esac\n\n    # It might be better to ask \"cc\" whether it's Cray C or DEC C,\n    # instead of relying on the OS part of $host.  But it's hard to\n    # imagine either of those compilers anywhere except their native\n    # systems.\n    #\n    GMP_INCLUDE_MPN(alpha/alpha-defs.m4)\n    GMP_INCLUDE_MPN(alpha/default.m4)\n    SPEED_CYCLECOUNTER_OBJ=alpha.lo\n    cyclecounter_size=1\n    ;;\n\n  arm*-*-*)\n    path=\"arm\"\n    gcc_cflags=\"$gcc_cflags $fomit_frame_pointer\"\n    gcc_testlist=\"gcc-arm-umodsi\"\n    GMP_INCLUDE_MPN(arm/arm-defs.m4)\n    ;;\n\n  IA64_PATTERN)\n    abilist=\"64\"\n    GMP_INCLUDE_MPN(ia64/ia64-defs.m4)\n    SPEED_CYCLECOUNTER_OBJ=ia64.lo\n    gcc_64_cflags=\"-O2\"\n\n    case $host_cpu in\n      itanium)   path=\"ia64/itanium  ia64\" ;;\n      itanium2)  path=\"ia64/itanium2 ia64\" ;;\n      *)         path=\"ia64\" ;;\n    esac\n\n    gcc_64_cflags_optlist=\"tune\"\n    gcc_32_cflags_optlist=$gcc_64_cflags_optlist\n\n    # gcc pre-release 3.4 adds -mtune itanium and itanium2\n    case $host_cpu in\n      itanium)   gcc_cflags_tune=\"-mtune=itanium\" ;;\n      itanium2)  gcc_cflags_tune=\"-mtune=itanium2\" ;;\n    esac\n\n    case $host in\n      *-*-linux*)\n\tcclist=\"gcc icc\"\n\ticc_cflags=\"-no-gcc -fpic\"\n\ticc_cflags_optlist=\"opt\"\n\t# FIXME: Check if -O3 is really right.  The manual warns it is for\n\t# large data sets, for which GMP uses assembly loops.\n\ticc_cflags_opt=\"-O3 -O2 -O1\"\n\t;;\n\n      *-*-hpux*)\n        # HP cc sometimes gets internal errors if the optimization level is\n        # too high.  GMP_PROG_CC_WORKS detects this, the \"_opt\" fallbacks\n        # let us use whatever seems to work.\n        #\n        abilist=\"32 64\"\n\n        cclist_32=\"gcc cc\"\n        path_32=\"ia64\"\n        cc_32_cflags=\"\"\n        cc_32_cflags_optlist=\"opt\"\n        cc_32_cflags_opt=\"+O3 +O2 +O1\"\n        gcc_32_cflags=\"-milp32 -O2\"\n        limb_32=longlong\n        SPEED_CYCLECOUNTER_OBJ_32=ia64.lo\n        cyclecounter_size_32=2\n\n        # Must have +DD64 in CPPFLAGS to get the right __LP64__ for headers,\n        # but also need it in CFLAGS for linking programs, since automake\n        # only uses CFLAGS when linking, not CPPFLAGS.\n        # FIXME: Maybe should use cc_64_ldflags for this, but that would\n        # need GMP_LDFLAGS used consistently by all the programs.\n        #\n        cc_64_cflags=\"+DD64\"\n        cc_64_cppflags=\"+DD64\"\n        cc_64_cflags_optlist=\"opt\"\n        cc_64_cflags_opt=\"+O3 +O2 +O1\"\n        gcc_64_cflags=\"-mlp64 -O2\"\n        ;;\n    esac\n    ;;\n\n  # Irix  5 and earlier can only run 32-bit o32.\n  #\n  # IRIX 6 and up always has a 64-bit mips CPU can run n32 or 64.  n32 is\n  # preferred over 64, but only because that's been the default in past\n  # versions of GMP.  The two are equally efficient.\n  #\n  # Linux kernel 2.2.13 arch/mips/kernel/irixelf.c has a comment about not\n  # supporting n32 or 64.\n  #\n  # For reference, libtool (eg. 1.5.6) recognises the n32 ABI and knows the\n  # right options to use when linking (both cc and gcc), so no need for\n  # anything special from us.\n  #\n  mips*-*-*)\n    abilist=\"o32\"\n    gcc_cflags_optlist=\"abi\"\n    gcc_cflags_abi=\"-mabi=32\"\n    gcc_testlist=\"gcc-mips-o32\"\n    path=\"mips32\"\n    cc_cflags=\"-O2 -o32\"   # no -g, it disables all optimizations\n    # this suits both mips32 and mips64\n    GMP_INCLUDE_MPN(mips32/mips-defs.m4)\n\n    case $host in\n      [mips64*-*-*])\n        abilist=\"64 n32 o32\"\n\n        cclist_n32=\"gcc cc\"\n\tgcc_n32_cflags=\"-O2 -mabi=n32\"\n        cc_n32_cflags=\"-O2 -n32\"\t# no -g, it disables all optimizations\n        limb_n32=longlong\n        path_n32=\"mips64\"\n\n        cclist_64=\"pathcc gcc cc\"\n        gcc_64_cflags=\"-O2 -mabi=64\"\n        gcc_64_ldflags=\"-Wc,-mabi=64\"\n        cc_64_cflags=\"-O2 -64\"\t\t# no -g, it disables all optimizations\n        cc_64_ldflags=\"-Wc,-64\"\n        pathcc_64_cflags=\"-O0 -mabi=64\"\n        pathcc_64_ldflags=\"-Wc,-mabi=64\"\n        path_64=\"mips64\"\n        ;;\n    esac\n    ;;\n\n\n  # Darwin (powerpc-apple-darwin1.3) has it's hacked gcc installed as cc.\n  # Our usual \"gcc in disguise\" detection means gcc_cflags etc here gets\n  # used.\n  #\n  # The darwin pre-compiling preprocessor is disabled with -no-cpp-precomp\n  # since it doesn't like \"__attribute__ ((mode (SI)))\" etc in gmp-impl.h,\n  # and so always ends up running the plain preprocessor anyway.  This could\n  # be done in CPPFLAGS rather than CFLAGS, but there's not many places\n  # preprocessing is done separately, and this is only a speedup, the normal\n  # preprocessor gets run if there's any problems.\n  #\n  # We used to use -Wa,-mppc with gcc, but can't remember exactly why.\n  # Presumably it was for old versions of gcc where -mpowerpc doesn't put\n  # the assembler in the right mode.  In any case -Wa,-mppc is not good, for\n  # instance -mcpu=604 makes recent gcc use -m604 to get access to the\n  # \"fsel\" instruction, but a -Wa,-mppc overrides that, making code that\n  # comes out with fsel fail.\n  #\n  # (Note also that the darwin assembler doesn't accept \"-mppc\", so any\n  # -Wa,-mppc was used only if it worked.  The right flag on darwin would be\n  # \"-arch ppc\" or some such, but that's already the default.)\n  #\n  [powerpc*-*-* | power[3-9]-*-*])\n    abilist=\"32\"\n    cclist=\"gcc cc\"\n    cc_cflags=\"-O2\"\n    gcc_cflags=\"-O2 -mpowerpc\"\n    gcc_cflags_optlist=\"precomp subtype asm cpu\"\n    gcc_cflags_precomp=\"-no-cpp-precomp\"\n    gcc_cflags_subtype=\"-force_cpusubtype_ALL\"\t# for vmx on darwin\n    gcc_cflags_asm=\"\"\n    gcc_cflags_cpu=\"\"\n\n    # grab this object, though it's not a true cycle counter routine\n    SPEED_CYCLECOUNTER_OBJ=powerpc.lo\n    cyclecounter_size=0\n\n    case $host_cpu in\n      powerpc740 | powerpc750)\n        path=\"powerpc32/750 powerpc32\" ;;\n      powerpc7400)\n        path=\"powerpc32/vmx powerpc32/750 powerpc32\" ;;\n      [powerpc74[45]?])\n        path=\"powerpc32/vmx powerpc32 powerpc32\" ;;\n      *)\n        path=\"powerpc32\" ;;\n    esac\n\n    # gcc 2.7.2 knows -mcpu=403, 601, 603, 604.\n    # gcc 2.95 adds 401, 505, 602, 603e, ec603e, 604e, 620, 740, 750,\n    #   801, 821, 823, 860.\n    # gcc 3.0 adds 630, rs64a.\n    # gcc 3.1 adds 405, 7400, 7450.\n    # gcc 3.2 adds nothing.\n    # gcc 3.3 adds power3, power4, 8540.  power3 seems to be a synonym for 630.\n    # gcc pre-release 3.4 adds 405fp, 440, 440fp, 970.\n    #\n    # FIXME: The way 603e falls back to 603 for gcc 2.7.2 should be\n    # done for all the others too.  But what would be the correct\n    # arrangements?\n    #\n    case $host_cpu in\n      powerpc401)   gcc_cflags_cpu=\"-mcpu=401\" ;;\n      powerpc403)   gcc_cflags_cpu=\"-mcpu=403\" ;;\n      powerpc405)   gcc_cflags_cpu=\"-mcpu=405\" ;;\n      powerpc505)   gcc_cflags_cpu=\"-mcpu=505\" ;;\n      powerpc601)   gcc_cflags_cpu=\"-mcpu=601\" ;;\n      powerpc602)   gcc_cflags_cpu=\"-mcpu=602\" ;;\n      powerpc603)   gcc_cflags_cpu=\"-mcpu=603\" ;;\n      powerpc603e)  gcc_cflags_cpu=\"-mcpu=603e -mcpu=603\" ;;\n      powerpc604)   gcc_cflags_cpu=\"-mcpu=604\" ;;\n      powerpc604e)  gcc_cflags_cpu=\"-mcpu=604e -mcpu=604\" ;;\n      powerpc620)   gcc_cflags_cpu=\"-mcpu=620\" ;;\n      powerpc630)   gcc_cflags_cpu=\"-mcpu=630\" ;;\n      powerpc740)   gcc_cflags_cpu=\"-mcpu=740\" ;;\n      powerpc7400 | powerpc7410)\n\t\t    gcc_cflags_asm=\"-Wa,-maltivec\"\n  \t\t    gcc_cflags_cpu=\"-mcpu=7400 -mcpu=750\" ;;\n      powerpc74[45]?)\n            gcc_cflags_asm=\"-Wa,-maltivec\"\n  \t\t    gcc_cflags_cpu=\"-mcpu=7450\" ;;\n      powerpc750)   gcc_cflags_cpu=\"-mcpu=750\" ;;\n      powerpc801)   gcc_cflags_cpu=\"-mcpu=801\" ;;\n      powerpc821)   gcc_cflags_cpu=\"-mcpu=821\" ;;\n      powerpc823)   gcc_cflags_cpu=\"-mcpu=823\" ;;\n      powerpc860)   gcc_cflags_cpu=\"-mcpu=860\" ;;\n      powerpc970)   gcc_cflags_cpu=\"-mcpu=970\" ;;\n    esac\n\n    case $host in\n      *-*-aix*)\n        cclist=\"gcc xlc cc\"\n        xlc_cflags=\"-O2 -qmaxmem=20000\"\n        xlc_cflags_optlist=\"arch\"\n\n        # xlc (what version?) knows -qarch=ppc, ppcgr, 601, 602, 603, 604,\n        # 403, rs64a\n        # -qarch=ppc is needed, so ensure everything falls back to that.\n        # FIXME: Perhaps newer versions know more flavours.\n        #\n    \tcase $host_cpu in\n\t  powerpc403)   xlc_cflags_arch=\"-qarch=403 -qarch=ppc\" ;;\n\t  powerpc601)   xlc_cflags_arch=\"-qarch=601 -qarch=ppc\" ;;\n\t  powerpc602)   xlc_cflags_arch=\"-qarch=602 -qarch=ppc\" ;;\n\t  powerpc603)   xlc_cflags_arch=\"-qarch=603 -qarch=ppc\" ;;\n\t  powerpc603e)  xlc_cflags_arch=\"-qarch=603 -qarch=ppc\" ;;\n\t  powerpc604)   xlc_cflags_arch=\"-qarch=604 -qarch=ppc\" ;;\n\t  powerpc604e)  xlc_cflags_arch=\"-qarch=604 -qarch=ppc\" ;;\n\t  *)            xlc_cflags_arch=\"-qarch=ppc\" ;;\n        esac\n        ;;\n    esac\n\n    case $host in\n      POWERPC64_PATTERN)\n        case $host_cpu in\n\t  powerpc970)\t\tvmx_path=\"powerpc64/vmx\" ;;\n\t  *)\t\t\tvmx_path=\"\" ;;\n\tesac\n        case $host in\n          *-*-aix*)\n            # On AIX a true 64-bit ABI is available.\n            # Need -Wc to pass object type flags through to the linker.\n            abilist=\"aix64 $abilist\"\n            cclist_aix64=\"gcc xlc\"\n            gcc_aix64_cflags=\"-O2 -maix64 -mpowerpc64\"\n            gcc_aix64_cflags_optlist=\"cpu\"\n\t    gcc_aix64_ldflags=\"-Wc,-maix64\"\n            xlc_aix64_cflags=\"-O2 -q64 -qtune=pwr3 -qmaxmem=20000\"\n\t    xlc_aix64_ldflags=\"-Wc,-q64\"\n            # Must indicate object type to ar and nm\n\t    ar_aix64_flags=\"-X64\"\n\t    nm_aix64_flags=\"-X64\"\n            path_aix64=\"powerpc64/mode64 $vmx_path powerpc64\"\n            # grab this object, though it's not a true cycle counter routine\n            SPEED_CYCLECOUNTER_OBJ_aix64=powerpc64.lo\n            cyclecounter_size_aix64=0\n            ;;\n          *-*-darwin*)\n            # On Darwin we can use 64-bit instructions with a longlong limb,\n            # but the chip still in 32-bit mode.\n            # In theory this can be used on any OS which knows how to save\n            # 64-bit registers in a context switch.\n            #\n            # Note that we must use -mpowerpc64 with gcc, since the\n            # longlong.h macros expect limb operands in a single 64-bit\n            # register, not two 32-bit registers as would be given for a\n            # long long without -mpowerpc64.  In theory we could detect and\n            # accomodate both styles, but the proper 64-bit registers will\n            # be fastest and are what we really want to use.\n            #\n\t    # One would think -mpowerpc64 would set the assembler in the right\n\t    # mode to handle 64-bit instructions.  But for that, also\n\t    # -force_cpusubtype_ALL is needed.\n\t    #\n\t    # Do not use -fast for Darwin, it actually adds options\n\t    # incompatible with a shared library.\n\t    #\n\t    abilist=\"mode64 mode32 $abilist\"\n\t    gcc_cflags_opt=\"-O3 -O2 -O1\"\t# will this become used?\n\t    cclist_mode32=\"gcc\"\n\t    gcc_mode32_cflags=\"-mpowerpc64\"\n\t    gcc_mode32_cflags_optlist=\"subtype cpu opt\"\n\t    gcc_mode32_cflags_subtype=\"-force_cpusubtype_ALL\"\n\t    gcc_mode32_cflags_opt=\"-O3 -O2 -O1\"\n\t    path_mode32=\"powerpc64/mode32 $vmx_path powerpc64\"\n\t    limb_mode32=longlong\n\t    cclist_mode64=\"gcc\"\n\t    gcc_mode64_cflags=\"-m64\"\n\t    gcc_mode64_cflags_optlist=\"cpu opt\"\n\t    gcc_mode64_cflags_opt=\"-O3 -O2 -O1\"\n\t    path_mode64=\"powerpc64/mode64 $vmx_path powerpc64\"\n            SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo\n            cyclecounter_size_mode64=0\n\t    any_mode64_testlist=\"sizeof-long-8\"\n\t    ;;\n\t  *-*-linux* | *-*-*bsd*)\n\t    # On GNU/Linux, assume the processor is in 64-bit mode.  Some\n\t    # environments have a gcc that is always in 64-bit mode, while\n\t    # others require -m64, hence the use of cflags_maybe.  The\n\t    # sizeof-long-8 test checks the mode is right (for the no option\n\t    # case).\n            #\n            # -mpowerpc64 is not used, since it should be the default in\n            # 64-bit mode.  (We need its effect for the various longlong.h\n            # asm macros to be right of course.)\n            #\n            # gcc64 was an early port of gcc to 64-bit mode, but should be\n            # obsolete before too long.  We prefer plain gcc when it knows\n            # 64-bits.\n\t    #\n\t    abilist=\"mode64 mode32 $abilist\"\n\t    cclist_mode32=\"gcc\"\n\t    gcc_mode32_cflags=\"-mpowerpc64 -Wa,-mppc64\"\n\t    gcc_mode32_cflags_optlist=\"cpu opt\"\n\t    gcc_mode32_cflags_opt=\"-O3 -O2 -O1\"\n\t    path_mode32=\"powerpc64/mode32 $vmx_path powerpc64\"\n\t    limb_mode32=longlong\n\t    cclist_mode64=\"gcc gcc64\"\n\t    gcc_mode64_cflags_maybe=\"-m64\"\n\t    gcc_mode64_cflags_optlist=\"cpu opt\"\n\t    gcc_mode64_cflags_opt=\"-O3 -O2 -O1\"\n\t    path_mode64=\"powerpc64/mode64 $vmx_path powerpc64\"\n            SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo\n            cyclecounter_size_mode64=0\n\t    any_mode64_testlist=\"sizeof-long-8\"\n\t    ;;\n        esac\n        ;;\n    esac\n    ;;\n\n  *sparc*-*-*)\n    # sizeof(long)==4 or 8 is tested, to ensure we get the right ABI.  We've\n    # had various bug reports where users have set CFLAGS for their desired\n    # mode, but not set our ABI.  For some reason it's sparc where this\n    # keeps coming up, presumably users there are accustomed to driving the\n    # compiler mode that way.  The effect of our testlist setting is to\n    # reject ABI=64 in favour of ABI=32 if the user has forced the flags to\n    # 32-bit mode.\n    #\n    abilist=\"32\"\n    cclist=\"gcc acc cc\"\n    any_testlist=\"sizeof-long-4\"\n    GMP_INCLUDE_MPN(sparc32/sparc-defs.m4)\n\n    case $host_cpu in\n      sparcv8 | microsparc | turbosparc)\n        path=\"sparc32\" ;;\n      supersparc)\n        path=\"sparc32\" ;;\n      sparc64 | sparcv9* | ultrasparc*)\n        path=\"sparc32/v9 sparc32\" ;;\n      *)\n        path=\"sparc32\" ;;\n    esac\n\n    # gcc 2.7.2 doesn't know about v9 and doesn't pass -xarch=v8plus to the\n    # assembler.  Add it explicitly since the solaris assembler won't accept\n    # our sparc32/v9 asm code without it.  gas accepts -xarch=v8plus too, so\n    # it can be in the cflags unconditionally (though gas doesn't need it).\n    #\n    # gcc -m32 is needed to force 32-bit mode on a dual-ABI system, but past\n    # gcc doesn't know that flag, hence cflags_maybe.  Note that -m32 cannot\n    # be done through the optlist since the plain cflags would be run first\n    # and we don't want to require the default mode (whatever it is) works.\n    #\n    # Note it's gcc_32_cflags_maybe and not gcc_cflags_maybe because the\n    # latter would be used in the 64-bit ABI on systems like \"*bsd\" where\n    # abilist=\"64\" only.\n    #\n    case $host_cpu in\n      sparc64 | sparcv9* | ultrasparc*)\n        gcc_cflags=\"-O2 -Wa,-xarch=v8plus\" ;;\n      *)\n        gcc_cflags=\"-O2\" ;;\n    esac\n    gcc_32_cflags_maybe=\"-m32\"\n    gcc_cflags_optlist=\"cpu\"\n\n    # gcc 2.7.2 knows -mcypress, -msupersparc, -mv8, -msparclite.\n    # gcc 2.95 knows -mcpu= v7, hypersparc, sparclite86x, f930, f934,\n    #   sparclet, tsc701, v9, ultrasparc.  A warning is given that the\n    #   plain -m forms will disappear.\n    # gcc 3.0 adds nothing.\n    # gcc 3.1 adds nothing.\n    # gcc 3.2 adds nothing.\n    # gcc 3.3 adds ultrasparc3.\n    #\n    case $host_cpu in\n      supersparc)           gcc_cflags_cpu=\"-mcpu=supersparc -msupersparc\" ;;\n      sparcv8 | microsparc | turbosparc)\n\t\t\t    gcc_cflags_cpu=\"-mcpu=v8 -mv8\" ;;\n      sparc64 | sparcv9*)   gcc_cflags_cpu=\"-mcpu=v9 -mv8\" ;;\n      ultrasparc3)          gcc_cflags_cpu=\"-mcpu=ultrasparc3 -mcpu=ultrasparc -mv8\" ;;\n      ultrasparc*)          gcc_cflags_cpu=\"-mcpu=ultrasparc -mv8\" ;;\n      *)                    gcc_cflags_cpu=\"-mcpu=v7 -mcypress\" ;;\n    esac\n\n    # SunPRO cc and acc, and SunOS bundled cc\n    case $host in\n      *-*-solaris* | *-*-sunos*)\n    \t# Note no -g, it disables all optimizations.\n    \tcc_cflags=\n    \tcc_cflags_optlist=\"opt arch cpu\"\n\n        # SunOS cc doesn't know -xO4, fallback to -O2.\n    \tcc_cflags_opt=\"-xO4 -O2\"\n\n        # SunOS cc doesn't know -xarch, apparently always generating v7\n        # code, so make this optional\n    \tcase $host_cpu in\n    \t  sparcv8 | microsparc | supersparc | turbosparc)\n\t\t\t\t\t      cc_cflags_arch=\"-xarch=v8\" ;;\n    \t  sparc64 | sparcv9* | ultrasparc*)   cc_cflags_arch=\"-xarch=v8plus\" ;;\n    \t  *)                                  cc_cflags_arch=\"-xarch=v7\" ;;\n    \tesac\n\n        # SunOS cc doesn't know -xchip and doesn't seem to have an equivalent.\n    \t# SunPRO cc 5 recognises -xchip=generic, old, super, super2, micro,\n    \t#   micro2, hyper, hyper2, powerup, ultra, ultra2, ultra2i.\n    \t# SunPRO cc 6 adds -xchip=ultra2e, ultra3cu.\n        #\n    \t# FIXME: Which of ultra, ultra2 or ultra2i is the best fallback for\n    \t# ultrasparc3?\n    \t#\n    \tcase $host_cpu in\n    \t  supersparc)   cc_cflags_cpu=\"-xchip=super\" ;;\n    \t  microsparc)   cc_cflags_cpu=\"-xchip=micro\" ;;\n    \t  turbosparc)   cc_cflags_cpu=\"-xchip=micro2\" ;;\n    \t  ultrasparc)   cc_cflags_cpu=\"-xchip=ultra\" ;;\n    \t  ultrasparc2)  cc_cflags_cpu=\"-xchip=ultra2\" ;;\n    \t  ultrasparc2i) cc_cflags_cpu=\"-xchip=ultra2i\" ;;\n    \t  ultrasparc3)  cc_cflags_cpu=\"-xchip=ultra3 -xchip=ultra\" ;;\n    \t  *)            cc_cflags_cpu=\"-xchip=generic\" ;;\n    \tesac\n    esac\n\n    case $host_cpu in\n      sparc64 | sparcv9* | ultrasparc*)\n        case $host in\n          # Solaris 6 and earlier cannot run ABI=64 since it doesn't save\n          # registers properly, so ABI=32 is left as the only choice.\n          #\n          [*-*-solaris2.[0-6] | *-*-solaris2.[0-6].*]) ;;\n\n          # BSD sparc64 ports are 64-bit-only systems, so ABI=64 is the only\n          # choice.  In fact they need no special compiler flags, gcc -m64\n          # is the default, but it doesn't hurt to add it.  v9 CPUs always\n          # use the sparc64 port, since the plain 32-bit sparc ports don't\n          # run on a v9.\n          #\n          *-*-*bsd*) abilist=\"64\" ;;\n\n          # For all other systems, we try both 64 and 32.\n          #\n          # GNU/Linux sparc64 has only recently gained a 64-bit user mode.\n          # In the past sparc64 meant a v9 cpu, but there were no 64-bit\n          # operations in user mode.  We assume that if \"gcc -m64\" works\n          # then the system is suitable.  Hopefully even if someone attempts\n          # to put a new gcc and/or glibc on an old system it won't run.\n          #\n          *) abilist=\"64 32\" ;;\n        esac\n\n        path_64=\"sparc64\"\n        cclist_64=\"gcc\"\n        any_64_testlist=\"sizeof-long-8\"\n\n        # gcc -mptr64 is probably implied by -m64, but we're not sure if\n        # this was always so.  On Solaris in the past we always used both\n        # \"-m64 -mptr64\".\n        #\n        # gcc -Wa,-xarch=v9 is thought to be necessary in some cases on\n        # solaris, but it would seem likely that if gcc is going to generate\n        # 64-bit code it will have to add that option itself where needed.\n        # An extra copy of this option should be harmless though, but leave\n        # it until we're sure.  (Might want -xarch=v9a or -xarch=v9b for the\n        # higher cpu types instead.)\n        #\n        gcc_64_cflags=\"-O2 -m64 -mptr64\"\n        gcc_64_ldflags=\"-Wc,-m64\"\n        gcc_64_cflags_optlist=\"cpu\"\n\n        case $host in\n          *-*-solaris*)\n            # Sun cc.\n            #\n            # -fast enables different optimizations depending on compiler\n            # version.  Unfortunately it does things according to the native\n            # system, which may not be optimal when cross compiling (to a\n            # different sparc).  -xchip from cc_cflags_cpu will override at\n            # least that part of its selections.\n            #\n             # -fns=no and -fsimple=1 disable some transformations that\n            # conflict with IEEE 754, which some compiler versions perform\n            # under -fast.\n            #\n            # In any case -fast can result in incorrect optimisations and so\n            # has been removed (see\n            # http://swox.com/list-archives/gmp-bugs/2008-April/000987.html)\n            #\n           cclist_64=\"$cclist_64 cc\"\n            cc_64_cflags=\"-xarch=v9\"\n            cc_64_cflags_optlist=\"cpu\"\n            ;;\n        esac\n\n        # using the v9 %tick register\n        SPEED_CYCLECOUNTER_OBJ_32=sparcv9.lo\n        SPEED_CYCLECOUNTER_OBJ_64=sparcv9.lo\n        cyclecounter_size_32=2\n        cyclecounter_size_64=2\n        ;;\n    esac\n    ;;\n\n  # AMD and Intel x86 configurations, including AMD64\n  #\n  # Rumour has it gcc -O2 used to give worse register allocation than just\n  # -O, but lets assume that's no longer true.\n  #\n  # -m32 forces 32-bit mode on a bi-arch 32/64 amd64 build of gcc.  -m64 is\n  # the default in such a build (we think), so -m32 is essential for ABI=32.\n  # This is, of course, done for any $host_cpu, not just x86_64, so we can\n  # get such a gcc into the right mode to cross-compile to say i486-*-*.\n  #\n  # -m32 is not available in gcc 2.95 and earlier, hence cflags_maybe to use\n  # it when it works.  We check sizeof(long)==4 to ensure we get the right\n  # mode, in case -m32 has failed not because it's an old gcc, but because\n  # it's a dual 32/64-bit gcc without a 32-bit libc, or whatever.\n  #\n  X86_PATTERN | X86_64_PATTERN)\n    abilist=\"32\"\n    cclist=\"gcc icc cc\"\n    gcc_cflags=\"-O2 $fomit_frame_pointer\"\n\tcase $host in\n\t*-*-linux-gnu)\n\t    gcc_32_ldflags=\"-Wl,-z,noexecstack\" ;;\n\tesac\n    gcc_32_cflags_maybe=\"-m32\"\n    icc_cflags=\"-no-gcc\"\n    icc_cflags_optlist=\"opt\"\n    icc_cflags_opt=\"-O3 -O2 -O1\"\n    any_32_testlist=\"sizeof-long-4\"\n    CALLING_CONVENTIONS_OBJS='x86call.lo x86check$U.lo'\n\n    # Availability of rdtsc is checked at run-time.\n    SPEED_CYCLECOUNTER_OBJ=pentium.lo\n\n    case $host in\n      *-*-solaris* | *-*-sunos*)\n    \t# Note no -g, it disables all optimizations.\n    \tcc_cflags=\n    \tcc_cflags_optlist=\"opt arch cpu\"\n\n        # SunOS cc doesn't know -xO4, fallback to -O2.\n    \tcc_cflags_opt=\"-xO4 -O2\" ;;\n    esac\n\n    # gcc 2.7.2 only knows i386 and i486, using -m386 or -m486.  These\n    #     represent -mcpu= since -m486 doesn't generate 486 specific insns.\n    # gcc 2.95 adds k6, pentium and pentiumpro, and takes -march= and -mcpu=.\n    # gcc 3.0 adds athlon.\n    # gcc 3.1 adds k6-2, k6-3, pentium-mmx, pentium2, pentium3, pentium4,\n    #     athlon-tbird, athlon-4, athlon-xp, athlon-mp.\n    # gcc 3.2 adds winchip2.\n    # gcc 3.3 adds winchip-c6.\n    # gcc 3.3.1 from mandrake adds k8 and knows -mtune.\n    # gcc 3.4 adds c3, c3-2, k8, and deprecates -mcpu in favour of -mtune.\n    #\n    # In gcc 2.95.[0123], -march=pentiumpro provoked a stack slot bug in an\n    # old version of mpz/powm.c.  Seems to be fine with the current code, so\n    # no need for any restrictions on that option.\n    #\n    # -march=pentiumpro can fail if the assembler doesn't know \"cmov\"\n    # (eg. solaris 2.8 native \"as\"), so always have -march=pentium after\n    # that as a fallback.\n    #\n    # -march=pentium4 and -march=k8 enable SSE2 instructions, which may or\n    # may not be supported by the assembler and/or the OS, and is bad in gcc\n    # prior to 3.3.  The tests will reject these if no good, so fallbacks\n    # like \"-march=pentium4 -mno-sse2\" are given to try also without SSE2.\n    # Note the relevant -march types are listed in the optflags handling\n    # below, be sure to update there if adding new types emitting SSE2.\n    #\n    # -mtune is used at the start of each cpu option list to give something\n    # gcc 3.4 will use, thereby avoiding warnings from -mcpu.  -mcpu forms\n    # are retained for use by prior gcc.  For example pentium has\n    # \"-mtune=pentium -mcpu=pentium ...\", the -mtune is for 3.4 and the\n    # -mcpu for prior.  If there's a brand new choice in 3.4 for a chip,\n    # like k8 for x86_64, then it can be the -mtune at the start, no need to\n    # duplicate anything.\n    #\n    gcc_cflags_optlist=\"cpu arch\"\n    case $host_cpu in\n      i386*)\n        gcc_cflags_cpu=\"-mtune=i386 -mcpu=i386 -m386\"\n        gcc_cflags_arch=\"-march=i386\"\n        ;;\n      i486*)\n        gcc_cflags_cpu=\"-mtune=i486 -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=i486\"\n        ;;\n      i586 | pentium)\n        gcc_cflags_cpu=\"-mtune=pentium -mcpu=pentium -m486\"\n        gcc_cflags_arch=\"-march=pentium\"\n        ;;\n      pentiummmx)\n        gcc_cflags_cpu=\"-mtune=pentium-mmx -mcpu=pentium-mmx -mcpu=pentium -m486\"\n        gcc_cflags_arch=\"-march=pentium-mmx -march=pentium\"\n        ;;\n      i686 | pentiumpro)\n        gcc_cflags_cpu=\"-mtune=pentiumpro -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=pentiumpro -march=pentium\"\n        ;;\n      pentium2)\n        gcc_cflags_cpu=\"-mtune=pentium2 -mcpu=pentium2 -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=pentium2 -march=pentiumpro -march=pentium\"\n        ;;\n      pentium3)\n        gcc_cflags_cpu=\"-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=pentium3 -march=pentiumpro -march=pentium\"\n        ;;\n      core)\n        gcc_cflags_cpu=\"-mtune=pentium-m -mtune=pentium3m -mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=pentium-m -march=pentium3m -march=pentium3 -march=pentiumpro -march=pentium\"\n        ;;\n      k6)\n        gcc_cflags_cpu=\"-mtune=k6 -mcpu=k6 -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=k6\"\n        ;;\n      k62)\n        gcc_cflags_cpu=\"-mtune=k6-2 -mcpu=k6-2 -mcpu=k6 -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=k6-2 -march=k6\"\n        ;;\n      k63)\n        gcc_cflags_cpu=\"-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=k6-3 -march=k6\"\n        ;;\n      k7 | athlon)\n        # Athlon instruction costs are close to P6 (3 cycle load latency,\n        # 4-6 cycle mul, 40 cycle div, pairable adc, etc) so if gcc doesn't\n        # know athlon (eg. 2.95.2 doesn't) then fall back on pentiumpro.\n        gcc_cflags_cpu=\"-mtune=athlon -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=athlon -march=pentiumpro -march=pentium\"\n        ;;\n      i786 | pentium4)\n        # pentiumpro is the primary fallback when gcc doens't know pentium4.\n        # This gets us cmov to eliminate branches.  Maybe \"athlon\" would be\n        # a possibility on gcc 3.0.\n        #\n        gcc_cflags_cpu=\"-mtune=pentium4 -mcpu=pentium4 -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=pentium4 -march=pentium4~-mno-sse2 -march=pentiumpro -march=pentium\"\n        ;;\n      prescott)\n        # prescott is defined for our purposes as 32 bit pentium4 with SSE3\n        gcc_cflags_cpu=\"-mtune=prescott -mtune=pentium4 -mcpu=pentium4 -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=prescott -march=pentium4~-mno-sse2 -march=pentiumpro -march=pentium\"\n        ;;\n      netburst | netburstlahf)\n        # prescott is defined for our purposes as 32 bit pentium4 with SSE3\n        gcc_cflags_cpu=\"-mtune=nocona -mtune=pentium4 -mcpu=pentium4 -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=nocona -march=pentium4~-mno-sse2 -march=pentiumpro -march=pentium\"\n        ;;\n      nano | viac32)\n        # Not sure of the best fallbacks here for -mcpu.\n        # c3-2 has sse and mmx, so pentium3 is good for -march.\n        gcc_cflags_cpu=\"-mtune=c3-2 -mcpu=c3-2 -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=c3-2 -march=pentium3 -march=pentiumpro -march=pentium\"\n        ;;\n      viac3*)\n        # Not sure of the best fallbacks here.\n        gcc_cflags_cpu=\"-mtune=c3 -mcpu=c3 -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=c3 -march=pentium-mmx -march=pentium\"\n        ;;\n      x86_64 | k8)\n        gcc_cflags_cpu=\"-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium\"\n        ;;\n      k10 | k102 | k103)\n        gcc_cflags_cpu=\"-mtune=amdfam10 -mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=amdfam10 -march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium\"\n        ;;\n      bulldozer)\n        gcc_cflags_cpu=\"-mtune=bdver1 -mtune=amdfam10 -mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=bdver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium\"\n        ;;\n      piledriver)\n        gcc_cflags_cpu=\"-mtune=bdver2 -mtune=amdfam10 -mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=bdver2 -march=amdfam10 -march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium\"\n        ;;\n      bobcat)\n        gcc_cflags_cpu=\"-mtune=btver1 -mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=btver1 -march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium\"\n        ;;\n      atom)\n        gcc_cflags_cpu=\"-mtune=atom -mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=atom -march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium\"\n        ;;\n      core2 | penryn)\n        gcc_cflags_cpu=\"-mtune=core2 -mtune=nocona -mtune=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=core2 -march=nocona -march=pentium3 -march=pentiumpro -march=pentium\"\n        ;;\n      nehalem | westmere)\n        gcc_cflags_cpu=\"-mtune=corei7 -mtune=core2 -mtune=nocona -mtune=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=corei7 -march=core2 -march=nocona -march=pentium3 -march=pentiumpro -march=pentium\"\n        ;;\n      haswell)\n        gcc_cflags_cpu=\"-mtune=corei7 -mtune=corei7 -mtune=core2 -mtune=nocona -mtune=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=corei7 -march=corei7 -march=core2 -march=nocona -march=pentium3 -march=pentiumpro -march=pentium\"\n        ;;\n      sandybridge | ivybridge | haswellavx)\n        gcc_cflags_cpu=\"-mtune=corei7-avx -mtune=corei7 -mtune=core2 -mtune=nocona -mtune=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=corei7-avx -march=corei7 -march=core2 -march=nocona -march=pentium3 -march=pentiumpro -march=pentium\"\n        ;;\n      skylake)\n        gcc_cflags_cpu=\"-mtune=corei7 -mtune=core2 -mtune=nocona -mtune=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=corei7 -march=core2 -march=nocona -march=pentium3 -march=pentiumpro -march=pentium\"\n        ;;\n      skylakeavx)\n        gcc_cflags_cpu=\"-mtune=skylake -mtune=corei7-avx -mtune=corei7 -mtune=core2 -mtune=nocona -mtune=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=skylake -march=corei7-avx -march=corei7 -march=core2 -march=nocona -march=pentium3 -march=pentiumpro -march=pentium\"\n        ;;\n      broadwell)\n        gcc_cflags_cpu=\"-mtune=broadwell -mtune=corei7-avx -mtune=corei7 -mtune=core2 -mtune=nocona -mtune=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=broadwell -march=corei7-avx -march=corei7 -march=core2 -march=nocona -march=pentium3 -march=pentiumpro -march=pentium\"\n        ;;\n      *)\n        gcc_cflags_cpu=\"-mtune=i486 -mcpu=i486 -m486\"\n        gcc_cflags_arch=\"-march=i486\"\n        ;;\n    esac\n\n    case $host_cpu in\n      i386*)                   path=\"x86/i386 x86\" ;;\n      i486*)                   path=\"x86/i486 x86\" ;;\n      i586 | pentium)          path=\"x86/pentium x86\" ;;\n      pentiummmx)              path=\"x86/pentium/mmx x86/pentium x86\" ;;\n      i686 | pentiumpro)       path=\"x86/p6 x86\" ;;\n      pentium2)                path=\"x86/p6/mmx x86/p6 x86\" ;;\n      pentium3)                path=\"x86/p6/p3mmx x86/p6/mmx x86/p6 x86\";;\n      core)                    path=\"x86/p6/p3mmx x86/p6/mmx x86/p6 x86\";;\n      [k6[23]])                path=\"x86/k6/k62mmx x86/k6/mmx x86/k6 x86\" ;;\n      k6)                      path=\"x86/k6/mmx x86/k6 x86\" ;;\n      athlon | k7 | x86_64 | atom)   path=\"x86/k7/mmx x86/k7 x86\" ;;\n      k102 | k103 | bulldozer | piledriver)       path=\"x86/k7/mmx/k8/k10/k102  x86/k7/mmx/k8/k10 x86/k7/mmx/k8 x86/k7/mmx x86/k7 x86\" ;;\n      k10)\t\t\tpath=\"x86/k7/mmx/k8/k10 x86/k7/mmx/k8 x86/k7/mmx x86/k7 x86\" ;;\n      k8 | bobcat)\t\tpath=\"x86/k7/mmx/k8 x86/k7/mmx x86/k7 x86\" ;;\n      core2 | penryn)\t\tpath=\"x86/core2 x86\" ;;\n      i786 | pentium4)\t\tpath=\"x86/pentium4/sse2 x86/pentium4/mmx x86/pentium4 x86\" ;;\n      nehalem | westmere | sandybridge | ivybridge | haswell | haswellavx | skylake | skylakeavx | broadwell)\tpath=\"x86/nehalem x86\" ;;\n      prescott | netburst | netburstlahf)       path=\"x86/pentium4/sse2 x86/pentium4/mmx x86/pentium4 x86\" ;;\n      # VIA/Centaur processors, sold as CyrixIII and C3.\n      nano | viac32)           path=\"x86/p6/p3mmx x86/p6/mmx x86/p6 x86\";;\n      viac3*)                  path=\"x86/pentium/mmx x86/pentium x86\";;\n      *)                       path=\"x86\" ;;\n    esac\n\n    # 32bit apple darwin doesn't like our PIC format asm code\n    case $host in\n\ti[34567]86-apple-darwin* | pentium*-apple-darwin* | prescott-apple-darwin* | core-apple-darwin* )\tpath=\"x86/applenopic\" ;;\n\t# assume Core2 or later\n\t*-apple-darwin* )\tpath=\"x86/applenopic/core2 x86/applenopic\" ;;\n\t*)\t\t\t;;\n    esac\n\n    # If the user asked for a fat build, override the path set above\n    if test $enable_fat = yes; then\n      gcc_cflags_cpu=\"\"\n      gcc_cflags_arch=\"\"\n      extra_functions=\"$extra_functions fat fat_entry\"\n      path=\"x86 x86/fat\"\n      fat_path=\"x86 x86/fat x86/i486\n\t\tx86/k6 x86/k6/mmx x86/k6/k62mmx\n\t\tx86/k7 x86/k7/mmx x86/k7/mmx/k8 x86/k7/mmx/k8/k10 x86/k7/mmx/k8/k10/k102\n\t\tx86/pentium x86/pentium/mmx\n\t\tx86/p6 x86/p6/mmx x86/p6/p3mmx\n\t\tx86/pentium4 x86/pentium4/mmx x86/pentium4/sse2\n\t\tx86/core2 x86/nehalem\"\n      fat_functions=\"add_n addmul_1 add_err1_n add_err2_n copyd copyi\n\t\t     divexact_1 divexact_by3c divexact_byfobm1 divrem_1 divrem_2\n\t\t\t divrem_euclidean_qr_1\n\t\t\tdivrem_euclidean_qr_2 gcd_1 lshift\n\t\t     mod_1 mod_34lsub1 modexact_1c_odd mul_1 mul_basecase\n\t\t     mulmid_basecase preinv_divrem_1 preinv_mod_1 redc_1 rshift\n\t\t     sqr_basecase sub_n submul_1 sumdiff_n sub_err1_n sub_err2_n\"\n      fat_exclude=\"\"\n      fat_thresholds=\"MUL_KARATSUBA_THRESHOLD MUL_TOOM3_THRESHOLD\n\t\t      SQR_KARATSUBA_THRESHOLD SQR_TOOM3_THRESHOLD\"\n    fi\n\n    case $host in\n      X86_64_PATTERN)\n        abilist=\"64 32\"\n        cclist_64=\"gcc cc\"\n        gcc_64_cflags=\"-m64\"\n\tgcc_cflags_opt=\"-O2 -O1\"\n\tcase $host in\n\t*-*-linux-gnu)\n\t    gcc_64_ldflags=\"-Wl,-z,noexecstack\" ;;\n\tesac\n        gcc_64_cflags_optlist=\"opt arch cpu\"\n        SPEED_CYCLECOUNTER_OBJ_64=x86_64.lo\n        CALLING_CONVENTIONS_OBJS_64='x86_64call.lo x86_64check$U.lo'\n        cyclecounter_size_64=2\n        case $host in\n           *-*-solaris* | *-*-sunos*)\n           # Note no -g, it disables all optimizations.\n           cc_64_cflags=\"-m64\"\n           cc_64_cflags_optlist=\"opt arch cpu\"\n\n           # SunOS cc doesn't know -xO4, fallback to -O2.\n           cc_64_cflags_opt=\"-xO4 -O2\" ;;\n        esac\n\n        case $host in\n          # NOTE x86_64w and x86_64 paths MUST be the same , for the fat structure\n          x86_64-pc-msys|x86_64-w64-mingw*|x86_64-*-cygwin*)\n            path_64=\"x86_64w\" ;;\n          netburst-pc-msys|netburst-w64-mingw*|netburst-*-cygwin*)\n            path_64=\"x86_64w/netburst x86_64w\" ;;\n          netburstlahf-pc-msys|netburstlahf-w64-mingw*|netburstlahf-*-cygwin*)\n            path_64=\"x86_64w/netburst x86_64w\" ;;\n          k8-pc-msys|k8-w64-mingw*|k8-*-cygwin*)\n            path_64=\"x86_64w/k8/k8only x86_64w/k8 x86_64w\" ;;\n          k10-pc-msys|k10-w64-mingw*|k10-*-cygwin*)\n            path_64=\"x86_64w/k8/k10 x86_64w/k8 x86_64w\" ;;\n          k102-pc-msys|k102-w64-mingw*|k102-*-cygwin*)\n            path_64=\"x86_64w/k8/k10/k102 x86_64w/k8/k10 x86_64w/k8 x86_64w\" ;;\n          k103-pc-msys|k103-w64-mingw*|k103-*-cygwin*)\n            path_64=\"x86_64w/k8/k10/k102 x86_64w/k8/k10 x86_64w/k8 x86_64w\" ;;\n          bulldozer-pc-msys|bulldozer-w64-mingw*|bulldozer-*-cygwin*)\n            path_64=\"x86_64w/bulldozer x86_64w/k8/k10/k102 x86_64w/k8/k10 x86_64w/k8 x86_64w\" ;;\n          piledriver-pc-msys|piledriver-w64-mingw*|piledriver-*-cygwin*)\n            path_64=\"x86_64w/bulldozer/piledriver x86_64w/bulldozer x86_64w/k8/k10/k102 x86_64w/k8/k10 x86_64w/k8 x86_64w\" ;;\n          bobcat-pc-msys|bobcat-w64-mingw*|bobcat-*-cygwin*)\n            path_64=\"x86_64w/bobcat x86_64w\" ;;\n          core2-pc-msys|core2-w64-mingw*|core2-*-cygwin*)\n            path_64=\"x86_64w/core2 x86_64w\" ;;\n          penryn-pc-msys|penryn-w64-mingw*|penryn-*-cygwin*)\n            path_64=\"x86_64w/core2/penryn x86_64w/core2 x86_64w\" ;;\n          nehalem-pc-msys|nehalem-w64-mingw*|nehalem-*-cygwin*)\n            path_64=\"x86_64w/nehalem x86_64w\" ;;\n          westmere-pc-msys|westmere-w64-mingw*|westmere-*-cygwin*)\n            path_64=\"x86_64w/nehalem/westmere x86_64w/nehalem x86_64w\" ;;\n          sandybridge-pc-msys|sandybridge-w64-mingw*|sandybridge-*-cygwin*)\n            path_64=\"x86_64w/sandybridge x86_64w\" ;;\n          ivybridge-pc-msys|ivybridge-w64-mingw*|ivybridge-*-cygwin*)\n            path_64=\"x86_64w/sandybridge/ivybridge x86_64w/sandybridge x86_64w\" ;;\n          haswell-pc-msys|haswell-w64-mingw*|haswell-*-cygwin*)\n            path_64=\"x86_64w/haswell x86_64w/sandybridge x86_64w\" ;;\n          haswellavx-pc-msys|haswellavx-w64-mingw*|haswellavx-*-cygwin*)\n            path_64=\"x86_64w/haswell/avx x86_64w/haswell x86_64w/sandybridge x86_64w\" ;;\n          skylake-pc-msys|skylake-w64-mingw*|skylake-*-cygwin*)\n            path_64=\"x86_64w/skylake x86_64w/sandybridge x86_64w\" ;;\n          skylakeavx-pc-msys|skylakeavx-w64-mingw*|skylakeavx-*-cygwin*)\n            path_64=\"x86_64w/skylake/avx x86_64w/skylake x86_64w/haswell x86_64w/sandybridge x86_64w\" ;;\n          broadwell-pc-msys|broadwell-w64-mingw*|broadwell-*-cygwin*)\n            path_64=\"x86_64w/haswell/broadwell x86_64w/haswell/avx x86_64w/haswell x86_64w/sandybridge x86_64w\" ;;\n          atom-pc-msys|atom-w64-mingw*|atom-*-cygwin*)\n            path_64=\"x86_64w/atom x86_64w\" ;;\n          nano-pc-msys|nano-w64-mingw*|nano-*-cygwin*)\n            path_64=\"x86_64w/k8/k8only x86_64w/k8 x86_64w\" ;;\n\n          x86_64-*-*)\n            path_64=\"x86_64\" ;;\n          netburst-*-*)\n            path_64=\"x86_64/netburst x86_64\" ;;\n          netburstlahf-*-*)\n            path_64=\"x86_64/netburst x86_64\" ;;\n          k8-*-*)\n            path_64=\"x86_64/k8/k8only x86_64/k8 x86_64\" ;;\n          k10-*-*)\n            path_64=\"x86_64/k8/k10 x86_64/k8 x86_64\" ;;\n          k102-*-*)\n            path_64=\"x86_64/k8/k10/k102 x86_64/k8/k10 x86_64/k8 x86_64\" ;;\n          k103-*-*)\n            path_64=\"x86_64/k8/k10/k102 x86_64/k8/k10 x86_64/k8 x86_64\" ;;\n          bulldozer-*-*)\n            path_64=\"x86_64/bulldozer x86_64/k8/k10/k102 x86_64/k8/k10 x86_64/k8 x86_64\" ;;\n          piledriver-*-*)\n            path_64=\"x86_64/bulldozer/piledriver x86_64/bulldozer x86_64/k8/k10/k102 x86_64/k8/k10 x86_64/k8 x86_64\" ;;\n          bobcat-*-*)\n            path_64=\"x86_64/bobcat x86_64\" ;;\n          core2-*-*)\n            path_64=\"x86_64/core2 x86_64\" ;;\n          penryn-*-*)\n            path_64=\"x86_64/core2/penryn x86_64/core2 x86_64\" ;;\n          nehalem-*-*)\n            path_64=\"x86_64/nehalem x86_64/core2 x86_64\" ;;\n          westmere-*-*)\n            path_64=\"x86_64/nehalem/westmere x86_64/nehalem x86_64/core2 x86_64\" ;;\n          sandybridge-*-*)\n            path_64=\"x86_64/sandybridge x86_64/nehalem x86_64/core2 x86_64\" ;;\n          ivybridge-*-*)\n            path_64=\"x86_64/sandybridge/ivybridge x86_64/sandybridge x86_64/nehalem x86_64/core2 x86_64\" ;;\n          haswell-*-*)\n            path_64=\"x86_64/haswell x86_64/sandybridge x86_64\" ;;\n          haswellavx-*-*)\n            path_64=\"x86_64/haswell/avx x86_64/haswell x86_64/sandybridge x86_64\" ;;\n          broadwell-*-*)\n            path_64=\"x86_64/haswell/broadwell x86_64/haswell/avx x86_64/haswell x86_64/sandybridge x86_64\" ;;\n          skylake-*-*)\n            path_64=\"x86_64/skylake x86_64/sandybridge x86_64\" ;;\n          skylakeavx-*-*)\n            path_64=\"x86_64/skylake/avx x86_64/haswell/avx x86_64/skylake x86_64/haswell x86_64/sandybridge x86_64\" ;;\n          atom-*-*)\n            path_64=\"x86_64/atom x86_64\" ;;\n          nano-*-*)\n            path_64=\"x86_64/k8/k8only x86_64/k8 x86_64\" ;;\n        esac\n\n        # If the user asked for a fat build, override the path set above\n        if test $enable_fat = yes; then\n          gcc_64_cflags_cpu=\"\"\n          gcc_64_cflags_arch=\"\"\n          extra_functions_64=\"$extra_functions_64 fat fat_entry\"\n          case $host in\n            *-pc-msys|*-w64-mingw*|*-*-cygwin*)\n              path_64=\"x86_64w x86_64w/fat\"\n              fat_path_64=\"x86_64w x86_64w/fat x86_64w/netburst x86_64w/k8 x86_64w/k8/k8only x86_64w/k8/k10 x86_64w/k8/k10/k102  x86_64w/core2 x86_64w/core2/penryn x86_64w/nehalem x86_64w/nehalem/westmere x86_64w/atom x86_64w/bobcat x86_64w/sandybridge x86_64w/haswell x86_64w/haswell/avx x86_64w/haswell/broadwell x86_64w/skylake x86_64w/skylake/avx\" ;;\n            *-*-*)\n              path_64=\"x86_64 x86_64/fat\"\n              fat_path_64=\"x86_64 x86_64/fat x86_64/netburst x86_64/k8 x86_64/k8/k8only x86_64/k8/k10 x86_64/k8/k10/k102  x86_64/core2 x86_64/core2/penryn x86_64/nehalem x86_64/nehalem/westmere x86_64/atom x86_64/bobcat x86_64/bulldozer x86_64/bulldozer/piledriver x86_64/sandybridge x86_64/sandybridge/ivybridge x86_64/haswell x86_64/haswell/avx x86_64/haswell/broadwell x86_64/skylake x86_64/skylake/avx\" ;;\n          esac\n        fi\n        ;;\n\n      esac\n      ;;\n\n\n  # Special CPU \"none\" selects generic C.\n  #\n  none-*-*)\n    abilist=\"long longlong\"\n    cclist_long=$cclist\n    gcc_long_cflags=$gcc_cflags\n    cc_long_cflags=$cc_cflags\n    cclist_longlong=$cclist\n    gcc_longlong_cflags=$gcc_cflags\n    cc_longlong_cflags=$cc_cflags\n    limb_longlong=longlong\n    ;;\n\nesac\n\n# mingw can be built by the cygwin gcc if -mno-cygwin is added.  For\n# convenience add this automatically if it works.  Actual mingw gcc accepts\n# -mno-cygwin too, but of course is the default.  mingw only runs on the\n# x86s, but allow any CPU here so as to catch \"none\" too.\n#\ncase $host in\n  *-pc-msys|*-*-mingw*)\n    gcc_cflags_optlist=\"$gcc_cflags_optlist nocygwin\"\n    gcc_cflags_nocygwin=\"-mno-cygwin\"\n    ;;\nesac\n\nCFLAGS_or_unset=${CFLAGS-'(unset)'}\nCPPFLAGS_or_unset=${CPPFLAGS-'(unset)'}\n\ncat >&AC_FD_CC <<EOF\nUser:\nABI=$ABI\nCC=$CC\nCFLAGS=$CFLAGS_or_unset\nCPPFLAGS=$CPPFLAGS_or_unset\nMPN_PATH=$MPN_PATH\nGMP:\nabilist=$abilist\ncclist=$cclist\nEOF\n\n\ntest_CFLAGS=${CFLAGS+set}\ntest_CPPFLAGS=${CPPFLAGS+set}\n\nfor abi in $abilist; do\n  abi_last=\"$abi\"\ndone\n\n# If the user specifies an ABI then it must be in $abilist, after that\n# $abilist is restricted to just that choice.\n#\nif test -n \"$ABI\"; then\n  found=no\n  for abi in $abilist; do\n    if test $abi = \"$ABI\"; then found=yes; break; fi\n  done\n  if test $found = no; then\n    AC_MSG_ERROR([ABI=$ABI is not among the following valid choices: $abilist])\n  fi\n  abilist=\"$ABI\"\nfi\n\nfound_compiler=no\n\nfor abi in $abilist; do\n\n  echo \"checking ABI=$abi\"\n\n  # Suppose abilist=\"64 32\", then for abi=64, will have abi1=\"_64\" and\n  # abi2=\"_64\".  For abi=32, will have abi1=\"_32\" and abi2=\"\".  This is how\n  # $gcc_cflags becomes a fallback for $gcc_32_cflags (the last in the\n  # abilist), but there's no fallback for $gcc_64_cflags.\n  #\n  abi1=[`echo _$abi | sed 's/[.]//g'`]\n  if test $abi = $abi_last; then abi2=; else abi2=\"$abi1\"; fi\n\n  # Compiler choices under this ABI\n                              eval cclist_chosen=\\\"\\$cclist$abi1\\\"\n  test -n \"$cclist_chosen\" || eval cclist_chosen=\\\"\\$cclist$abi2\\\"\n\n  # If there's a user specified $CC then don't use a list for\n  # $cclist_chosen, just a single value for $ccbase.\n  #\n  if test -n \"$CC\"; then\n\n    # The first word of $CC, stripped of any directory.  For instance\n    # CC=\"/usr/local/bin/gcc -pipe\" will give \"gcc\".\n    #\n    for ccbase in $CC; do break; done\n    ccbase=`echo $ccbase | sed 's:.*/::'`\n\n    # If this $ccbase is in $cclist_chosen then it's a compiler we know and\n    # we can do flags defaulting with it.  If not, then $cclist_chosen is\n    # set to \"unrecognised\" so no default flags are used.\n    #\n    # \"unrecognised\" is used to avoid bad effects with eval if $ccbase has\n    # non-symbol characters.  For instance ccbase=my+cc would end up with\n    # something like cflags=\"$my+cc_cflags\" which would give\n    # cflags=\"+cc_cflags\" rather than the intended empty string for an\n    # unknown compiler.\n    #\n    found=unrecognised\n    for i in $cclist_chosen; do\n      if test \"$ccbase\" = $i; then\n        found=$ccbase\n        break\n      fi\n    done\n    cclist_chosen=$found\n  fi\n\n  for ccbase in $cclist_chosen; do\n\n    # When cross compiling, look for a compiler with the $host_alias as a\n    # prefix, the same way that AC_CHECK_TOOL does.  But don't do this to a\n    # user-selected $CC.\n    #\n    # $cross_compiling will be yes/no/maybe at this point.  Do the host\n    # prefixing for \"maybe\" as well as \"yes\".\n    #\n    if test \"$cross_compiling\" != no && test -z \"$CC\"; then\n      cross_compiling_prefix=\"${host_alias}-\"\n    fi\n\n    for ccprefix in $cross_compiling_prefix \"\"; do\n\n      cc=\"$CC\"\n      test -n \"$cc\" || cc=\"$ccprefix$ccbase\"\n\n      # If the compiler is gcc but installed under another name, then change\n      # $ccbase so as to use the flags we know for gcc.  This helps for\n      # instance when specifying CC=gcc272 on Debian GNU/Linux, or the\n      # native cc which is really gcc on NeXT or MacOS-X.\n      #\n      # FIXME: There's a slight misfeature here.  If cc is actually gcc but\n      # gcc is not a known compiler under this $abi then we'll end up\n      # testing it with no flags and it'll work, but chances are it won't be\n      # in the right mode for the ABI we desire.  Let's quietly hope this\n      # doesn't happen.\n      #\n      if test $ccbase != gcc; then\n        GMP_PROG_CC_IS_GNU($cc,ccbase=gcc)\n      fi\n\n      # Similarly if the compiler is IBM xlc but invoked as cc or whatever\n      # then change $ccbase and make the default xlc flags available.\n      if test $ccbase != xlc; then\n        GMP_PROG_CC_IS_XLC($cc,ccbase=xlc)\n      fi\n\n      # acc was Sun's first unbundled compiler back in the SunOS days, or\n      # something like that, but today its man page says it's not meant to\n      # be used directly (instead via /usr/ucb/cc).  The options are pretty\n      # much the same as the main SunPRO cc, so share those configs.\n      #\n      case $host in\n        *sparc*-*-solaris* | *sparc*-*-sunos*)\n          if test \"$ccbase\" = acc; then ccbase=cc; fi ;;\n      esac\n\n      for tmp_cflags_maybe in yes no; do\n                             eval cflags=\\\"\\$${ccbase}${abi1}_cflags\\\"\n        test -n \"$cflags\" || eval cflags=\\\"\\$${ccbase}${abi2}_cflags\\\"\n\n\tif test \"$tmp_cflags_maybe\" = yes; then\n          # don't try cflags_maybe when the user set CFLAGS\n          if test \"$test_CFLAGS\" = set; then continue; fi\n                                     eval cflags_maybe=\\\"\\$${ccbase}${abi1}_cflags_maybe\\\"\n          test -n \"$cflags_maybe\" || eval cflags_maybe=\\\"\\$${ccbase}${abi2}_cflags_maybe\\\"\n          # don't try cflags_maybe if there's nothing set\n          if test -z \"$cflags_maybe\"; then continue; fi\n          cflags=\"$cflags_maybe $cflags\"\n        fi\n\n        # Any user CFLAGS, even an empty string, takes precendence\n        if test \"$test_CFLAGS\" = set; then cflags=$CFLAGS; fi\n\n        # Any user CPPFLAGS, even an empty string, takes precendence\n                               eval cppflags=\\\"\\$${ccbase}${abi1}_cppflags\\\"\n        test -n \"$cppflags\" || eval cppflags=\\\"\\$${ccbase}${abi2}_cppflags\\\"\n        if test \"$test_CPPFLAGS\" = set; then cppflags=$CPPFLAGS; fi\n\n        # --enable-profiling adds -p/-pg even to user-specified CFLAGS.\n        # This is convenient, but it's perhaps a bit naughty to modify user\n        # CFLAGS.\n        case \"$enable_profiling\" in\n          prof)       cflags=\"$cflags -p\" ;;\n          gprof)      cflags=\"$cflags -pg\" ;;\n          instrument) cflags=\"$cflags -finstrument-functions\" ;;\n        esac\n\n        GMP_PROG_CC_WORKS($cc $cflags $cppflags,,continue)\n\n        # If we're supposed to be using a \"long long\" for a limb, check that\n        # it works.\n                                  eval limb_chosen=\\\"\\$limb$abi1\\\"\n        test -n \"$limb_chosen\" || eval limb_chosen=\\\"\\$limb$abi2\\\"\n        if test \"$limb_chosen\" = longlong; then\n          GMP_PROG_CC_WORKS_LONGLONG($cc $cflags $cppflags,,continue)\n        fi\n\n        # The tests to perform on this $cc, if any\n                               eval testlist=\\\"\\$${ccbase}${abi1}_testlist\\\"\n        test -n \"$testlist\" || eval testlist=\\\"\\$${ccbase}${abi2}_testlist\\\"\n        test -n \"$testlist\" || eval testlist=\\\"\\$any${abi1}_testlist\\\"\n        test -n \"$testlist\" || eval testlist=\\\"\\$any${abi2}_testlist\\\"\n\n        testlist_pass=yes\n        for tst in $testlist; do\n          case $tst in\n          gcc-arm-umodsi) GMP_GCC_ARM_UMODSI($cc,,testlist_pass=no) ;;\n          gcc-mips-o32)   GMP_GCC_MIPS_O32($cc,,testlist_pass=no) ;;\n          sizeof*)       GMP_C_TEST_SIZEOF($cc $cflags,$tst,,testlist_pass=no) ;;\n          esac\n          if test $testlist_pass = no; then break; fi\n        done\n\n        if test $testlist_pass = yes; then\n          found_compiler=yes\n          break\n        fi\n      done\n\n      if test $found_compiler = yes; then break; fi\n    done\n\n    if test $found_compiler = yes; then break; fi\n  done\n\n  if test $found_compiler = yes; then break; fi\ndone\n\n\n# If we recognised the CPU, as indicated by $path being set, then insist\n# that we have a working compiler, either from our $cclist choices or from\n# $CC.  We can't let AC_PROG_CC look around for a compiler because it might\n# find one that we've rejected (for not supporting the modes our asm code\n# demands, etc).\n#\n# If we didn't recognise the CPU (and this includes host_cpu=none), then\n# fall through and let AC_PROG_CC look around for a compiler too.  This is\n# mostly in the interests of following a standard autoconf setup, after all\n# we've already tested cc and gcc adequately (hopefully).  As of autoconf\n# 2.50 the only thing AC_PROG_CC really adds is a check for \"cl\" (Microsoft\n# C on MS-DOS systems).\n#\nif test $found_compiler = no && test -n \"$path\"; then\n  AC_MSG_ERROR([could not find a working compiler, see config.log for details])\nfi\n\n\nif test $found_compiler = yes; then\n\n  # If we're creating CFLAGS, then look for optional additions.  If the user\n  # set CFLAGS then leave it alone.\n  #\n  if test \"$test_CFLAGS\" != set; then\n                          eval optlist=\\\"\\$${ccbase}${abi1}_cflags_optlist\\\"\n    test -n \"$optlist\" || eval optlist=\\\"\\$${ccbase}${abi2}_cflags_optlist\\\"\n\n    for opt in $optlist; do\n                             eval optflags=\\\"\\$${ccbase}${abi1}_cflags_${opt}\\\"\n      test -n \"$optflags\" || eval optflags=\\\"\\$${ccbase}${abi2}_cflags_${opt}\\\"\n      test -n \"$optflags\" || eval optflags=\\\"\\$${ccbase}_cflags_${opt}\\\"\n\n      for flag in $optflags; do\n\n\t# ~ respresents a space in an option spec\n        flag=`echo \"$flag\" | tr '~' ' '`\n\n        case $flag in\n          -march=pentium4 | -march=k8)\n            # For -march settings which enable SSE2 we exclude certain bad\n            # gcc versions and we need an OS knowing how to save xmm regs.\n            #\n            # This is only for ABI=32, any 64-bit gcc is good and any OS\n            # knowing x86_64 will know xmm.\n            #\n            # -march=k8 was only introduced in gcc 3.3, so we shouldn't need\n            # the GMP_GCC_PENTIUM4_SSE2 check (for gcc 3.2 and prior).  But\n            # it doesn't hurt to run it anyway, sharing code with the\n            # pentium4 case.\n            #\n            if test \"$abi\" = 32; then\n              GMP_GCC_PENTIUM4_SSE2($cc $cflags $cppflags,, continue)\n              GMP_OS_X86_XMM($cc $cflags $cppflags,, continue)\n            fi\n            ;;\n          -no-cpp-precomp)\n            # special check, avoiding a warning\n            GMP_GCC_NO_CPP_PRECOMP($ccbase,$cc,$cflags,\n                                   [cflags=\"$cflags $flag\"\n                                   break],\n                                   [continue])\n            ;;\n          -Wa,-m*)\n            ;;\n        esac\n\n        GMP_PROG_CC_WORKS($cc $cflags $cppflags $flag,\n          [cflags=\"$cflags $flag\"\n          break])\n      done\n    done\n  fi\n\n  ABI=\"$abi\"\n  CC=\"$cc\"\n  CFLAGS=\"$cflags\"\n  CPPFLAGS=\"$cppflags\"\n  if test $enable_gmpcompat = yes; then\n    GMP_CC=\"#define __GMP_CC __MPIR_CC\"\n    GMP_CFLAGS=\"#define __GMP_CFLAGS __MPIR_CFLAGS\"\n  else\n    GMP_CC=\"/* No __GMP_CC here as --enable-gmpcompat option not selected */\"\n    GMP_CFLAGS=\"\"\n  fi\n\n   AC_SUBST(GMP_CC)\n   AC_SUBST(GMP_CFLAGS)\n\n  # Could easily have this in config.h too, if desired.\n  ABI_nodots=`echo $ABI | sed 's/\\./_/'`\n  GMP_DEFINE_RAW(\"define_not_for_expansion(\\`HAVE_ABI_$ABI_nodots')\", POST)\n\n\n  # GMP_LDFLAGS substitution, selected according to ABI.\n  # These are needed on libmpir.la and libmp.la, but currently not on\n  # convenience libraries like tune/libspeed.la or mpz/libmpz.la.\n  #\n                            eval GMP_LDFLAGS=\\\"\\$${ccbase}${abi1}_ldflags\\\"\n  test -n \"$GMP_LDFLAGS\" || eval GMP_LDFLAGS=\\\"\\$${ccbase}${abi1}_ldflags\\\"\n  AC_SUBST(GMP_LDFLAGS)\n  AC_SUBST(LIBGMP_LDFLAGS)\n  AC_SUBST(LIBGMPXX_LDFLAGS)\n\n  # extra_functions, selected according to ABI\n                    eval tmp=\\\"\\$extra_functions$abi1\\\"\n  test -n \"$tmp\" || eval tmp=\\\"\\$extra_functions$abi2\\\"\n  extra_functions=\"$tmp\"\n\n\n  # Cycle counter, selected according to ABI.\n  #\n                    eval tmp=\\\"\\$SPEED_CYCLECOUNTER_OBJ$abi1\\\"\n  test -n \"$tmp\" || eval tmp=\\\"\\$SPEED_CYCLECOUNTER_OBJ$abi2\\\"\n  SPEED_CYCLECOUNTER_OBJ=\"$tmp\"\n                    eval tmp=\\\"\\$cyclecounter_size$abi1\\\"\n  test -n \"$tmp\" || eval tmp=\\\"\\$cyclecounter_size$abi2\\\"\n  cyclecounter_size=\"$tmp\"\n\n  if test -n \"$SPEED_CYCLECOUNTER_OBJ\"; then\n    AC_DEFINE_UNQUOTED(HAVE_SPEED_CYCLECOUNTER, $cyclecounter_size,\n    [Tune directory speed_cyclecounter, undef=none, 1=32bits, 2=64bits)])\n  fi\n  AC_SUBST(SPEED_CYCLECOUNTER_OBJ)\n\n\n  # Calling conventions checking, selected according to ABI.\n  #\n                    eval tmp=\\\"\\$CALLING_CONVENTIONS_OBJS$abi1\\\"\n  test -n \"$tmp\" || eval tmp=\\\"\\$CALLING_CONVENTIONS_OBJS$abi2\\\"\n  CALLING_CONVENTIONS_OBJS=\"$tmp\"\n\n  if test -n \"$CALLING_CONVENTIONS_OBJS\"; then\n    AC_DEFINE(HAVE_CALLING_CONVENTIONS,1,\n    [Define to 1 if tests/libtests has calling conventions checking for the CPU])\n  fi\n  AC_SUBST(CALLING_CONVENTIONS_OBJS)\n\nfi\n\n\n# If the user gave an MPN_PATH, use that verbatim, otherwise choose\n# according to the ABI and add \"generic\".\n#\nif test -n \"$MPN_PATH\"; then\n  path=\"$MPN_PATH\"\nelse\n                    eval tmp=\\\"\\$path$abi1\\\"\n  test -n \"$tmp\" || eval tmp=\\\"\\$path$abi2\\\"\n  path=\"$tmp generic\"\nfi\n\n\n# Long long limb setup for mpir.h.\ncase $limb_chosen in\nlonglong) DEFN_LONG_LONG_LIMB=\"#define _LONG_LONG_LIMB 1\"    ;;\n*)        DEFN_LONG_LONG_LIMB=\"/* #undef _LONG_LONG_LIMB */\" ;;\nesac\nAC_SUBST(DEFN_LONG_LONG_LIMB)\n\nAC_GNU_SOURCE\n\n# The C compiler and preprocessor, put into ANSI mode if possible.\nAC_PROG_CC\nAC_PROG_CC_STDC\nAC_PROG_CPP\nGMP_H_ANSI\n\n\n# The C compiler on the build system, and associated tests.\nGMP_PROG_CC_FOR_BUILD\nGMP_PROG_CPP_FOR_BUILD\nGMP_PROG_EXEEXT_FOR_BUILD\nGMP_C_FOR_BUILD_ANSI\nGMP_CHECK_LIBM_FOR_BUILD\n\n# How to assemble, used with CFLAGS etc, see mpn/Makeasm.am.\n# Using the compiler is a lot easier than figuring out how to invoke the\n# assembler directly.\n#\ntest -n \"$CCAS\" || CCAS=\"$CC -c\"\nAC_SUBST(CCAS)\n\n# If the user provided its Yasm, check it seems functional\nMPIR_AS=\"\"\nif test -z \"$with_yasm\"; then\n  echo \"Looking for a system-wide yasm...\"\n  MPIR_AS=`command -v yasm`\n  if test $? -ne 0; then\n    AC_MSG_ERROR([no system-wide yasm found])\n  fi\nelse\n  MPIR_AS=\"$with_yasm\"\nfi\necho \"Checking yasm...\"\nif ! test -f \"$MPIR_AS\" || ! test -x \"$MPIR_AS\"; then\n  AC_MSG_ERROR([$MPIR_AS does not seem functional])\nfi\nAC_SUBST(MPIR_AS)\n\n# The C++ compiler, if desired.\n\ntest_CXXFLAGS=${CXXFLAGS+set}\n\nAC_PROG_CXX\n\n# Now actually check that the C++ compiler works if it is needed.\nwant_cxx=no\nif test $enable_cxx != no; then\n\n  echo \"CXXFLAGS chosen by autoconf: $CXXFLAGS\" >&AC_FD_CC\n  cxxflags_ac_prog_cxx=$CXXFLAGS\n  cxxflags_list=ac_prog_cxx\n\n  # If the user didn't specify $CXXFLAGS, then try $CFLAGS, with -g removed\n  # if AC_PROG_CXX thinks that doesn't work.  $CFLAGS stands a good chance\n  # of working, eg. on a GNU system where CC=gcc and CXX=g++.\n  #\n  if test \"$test_CXXFLAGS\" != set; then\n    cxxflags_cflags=$CFLAGS\n    cxxflags_list=\"cflags $cxxflags_list\"\n    if test \"$ac_prog_cxx_g\" = no; then\n      cxxflags_cflags=`echo \"$cxxflags_cflags\" | sed -e 's/ -g //' -e 's/^-g //' -e 's/ -g$//'`\n    fi\n  fi\n\n  # See if the C++ compiler works.  If the user specified CXXFLAGS then all\n  # we're doing is checking whether AC_PROG_CXX succeeded, since it doesn't\n  # give a fatal error, just leaves CXX set to a default g++.  If on the\n  # other hand the user didn't specify CXXFLAGS then we get to try here our\n  # $cxxflags_list alternatives.\n  #\n  # Automake includes $CPPFLAGS in a C++ compile, so we do the same here.\n  #\n  for cxxflags_choice in $cxxflags_list; do\n    eval CXXFLAGS=\\\"\\$cxxflags_$cxxflags_choice\\\"\n    GMP_PROG_CXX_WORKS($CXX $CPPFLAGS $CXXFLAGS,\n      [want_cxx=yes\n      break])\n  done\n\n  # If --enable-cxx=yes but a C++ compiler can't be found, then abort.\n  if test $want_cxx = no && test $enable_cxx = yes; then\n    AC_MSG_ERROR([C++ compiler not available, see config.log for details])\n  fi\nfi\n\nAM_CONDITIONAL(WANT_CXX, test $want_cxx = yes)\n\nif test -z \"$MPN_PATH\"; then\n  path=\"$add_path $path\"\nfi\n\n# For a nail build, also look in \"nails\" subdirectories.\n#\nif test $GMP_NAIL_BITS != 0 && test -z \"$MPN_PATH\"; then\n  new_path=\n  for i in $path; do\n    case $i in\n    generic) new_path=\"$new_path $i\" ;;\n    *)       new_path=\"$new_path $i/nails $i\" ;;\n    esac\n  done\n  path=$new_path\nfi\n\n\n# Put all directories into CPUVEC_list so as to get a full set of\n# CPUVEC_SETUP_$tmp_suffix defines into config.h, even if some of them are\n# empty because mmx and/or sse2 had to be dropped.\n# For MINGW64 we have have to hack this so as x86_64w gets turned into x86_64\nfor i in $fat_path; do\n  GMP_FAT_SUFFIX(tmp_suffix, $i)\n  CPUVEC_list=\"$CPUVEC_list CPUVEC_SETUP_$tmp_suffix\"\ndone\n\n\n# If there's any sse2 or mmx in the path, check whether the assembler\n# supports it, and remove if not.\n#\n# We only need this in ABI=32, for ABI=64 on x86_64 we can assume a new\n# enough assembler.\n#\ncase $host in\n  X86_PATTERN | X86_64_PATTERN)\n    if test \"$ABI\" = 32; then\n      case \"$path $fat_path\" in\n        *mmx*)   GMP_ASM_X86_MMX( , [GMP_STRIP_PATH(*mmx*)]) ;;\n      esac\n      case \"$path $fat_path\" in\n        *sse2*)  GMP_ASM_X86_SSE2( , [GMP_STRIP_PATH(sse2)]) ;;\n      esac\n    fi\n    ;;\nesac\n\n\ncat >&AC_FD_CC <<EOF\nDecided:\nABI=$ABI\nCC=$CC\nCFLAGS=$CFLAGS\nCPPFLAGS=$CPPFLAGS\nGMP_LDFLAGS=$GMP_LDFLAGS\nCXX=$CXX\nCXXFLAGS=$CXXFLAGS\npath=$path\nEOF\necho \"using ABI=\\\"$ABI\\\"\"\necho \"      CC=\\\"$CC\\\"\"\necho \"      CFLAGS=\\\"$CFLAGS\\\"\"\necho \"      CPPFLAGS=\\\"$CPPFLAGS\\\"\"\nif test $want_cxx = yes; then\n  echo \"      CXX=\\\"$CXX\\\"\"\n  echo \"      CXXFLAGS=\\\"$CXXFLAGS\\\"\"\nfi\necho \"      MPN_PATH=\\\"$path\\\"\"\n\n\nGMP_PROG_AR\nGMP_PROG_NM\n\ncase $host in\n  # FIXME: On AIX 3 and 4, $libname.a is included in libtool\n  # $library_names_spec, so libmpir.a becomes a symlink to libmpir.so, making\n  # it impossible to build shared and static libraries simultaneously.\n  # Disable shared libraries by default, but let the user override with\n  # --enable-shared --disable-static.\n  #\n  # FIXME: This $libname.a problem looks like it might apply to *-*-amigaos*\n  # and *-*-os2* too, but wait for someone to test this before worrying\n  # about it.  If there is a problem then of course libtool is the right\n  # place to fix it.\n  #\n  [*-*-aix[34]*])\n    if test -z \"$enable_shared\"; then enable_shared=no; fi ;;\nesac\n\n\n# Enable various configuration flags and DLL on Windows\nLT_INIT([win32-dll])\n\n# Configs for Windows DLLs.\n\nAC_SUBST(LIBGMP_DLL,0)\ncase $host in\n  *-pc-msys|*-*-cygwin* | *-*-mingw*)\n    # By default, build only static.\n    if test -z \"$enable_shared\"; then\n      enable_shared=no\n    fi\n    # Don't allow both static and DLL.\n    if test \"$enable_shared\" != no && test \"$enable_static\" != no; then\n      AC_MSG_ERROR([cannot build both static and DLL, since mpir.h is different for each.\nUse \"--disable-static --enable-shared\" to build just a DLL.])\n    fi\n\n    # \"-no-undefined\" is required when building a DLL, see documentation on\n    # AC_LIBTOOL_WIN32_DLL.\n    #\n    # \"-Wl,--export-all-symbols\" is a bit of a hack, it gets all libmpir and\n    # libmpirxx functions and variables exported.  This is what libtool did\n    # in the past, and it's convenient for us in the test programs.\n    #\n    # Maybe it'd be prudent to check for --export-all-symbols before using\n    # it, but it seems to have been in ld since at least 2000, and there's\n    # not really any alternative we want to take up at the moment.\n    #\n    # \"-Wl,output-def\" is used to get a .def file for use by MS lib to make\n    # a .lib import library, described in the manual.  libmpir-3.dll.def\n    # corresponds to the libmp-3.dll.def generated by libtool (as a result\n    # of -export-symbols on that library).\n    #\n    # Incidentally, libtool does generate an import library libmpir.dll.a,\n    # but it's \"ar\" format and cannot be used by the MS linker.  There\n    # doesn't seem to be any GNU tool for generating or converting to .lib.\n    #\n    # FIXME: The .def files produced by -Wl,output-def include isascii,\n    # iscsym, iscsymf and toascii, apparently because mingw ctype.h doesn't\n    # inline isascii (used in gmp).  It gives an extern inline for\n    # __isascii, but for some reason not the plain isascii.\n    #\n    # LDFLAGS=\"$LDFLAGS -Wl,--enable-auto-import\"  this is too general\n    if test \"$enable_shared\" = yes; then\n      GMP_LDFLAGS=\"$GMP_LDFLAGS -no-undefined -Wl,--export-all-symbols\"\n      LIBGMP_LDFLAGS=\"$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libmpir-3.dll.def\"\n      LIBGMPXX_LDFLAGS=\"$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libmpirxx-3.dll.def\"\n      LIBGMP_DLL=1\n    fi\n    ;;\nesac\n\n# Generate an error here if attempting to build both shared and static when\n# $libname.a is in $library_names_spec (as mentioned above), rather than\n# wait for ar or ld to fail.\n#\nif test \"$enable_shared\" = yes && test \"$enable_static\" = yes; then\n  case $library_names_spec in\n    *libname.a*)\n      AC_MSG_ERROR([cannot create both shared and static libraries on this system, --disable one of the two])\n      ;;\n  esac\nfi\n\nAM_CONDITIONAL(ENABLE_STATIC, test \"$enable_static\" = yes)\nAM_CONDITIONAL(ENABLE_SHARED, test \"$enable_shared\" = yes)\n\n\n# Many of these library and header checks are for the benefit of\n# supplementary programs.  libmpir doesn't use anything too weird.\n\nAC_HEADER_STDC\nAC_HEADER_TIME\n\n# Reasons for testing:\n#   float.h - not in SunOS bundled cc\n#   langinfo.h - X/Open standard only, not in djgpp for instance\n#   locale.h - old systems won't have this\n#   nl_types.h - X/Open standard only, not in djgpp for instance\n#       (usually langinfo.h gives nl_item etc, but not on netbsd 1.4.1)\n#   sys/mman.h - not in Cray Unicos\n#   sys/param.h - not in mingw\n#   sys/processor.h - solaris specific, though also present in macos\n#   sys/pstat.h - HPUX specific\n#   sys/resource.h - not in mingw\n#   sys/sysctl.h - not in mingw\n#   sys/sysinfo.h - OSF specific\n#   sys/systemcfg.h - AIX specific\n#   sys/time.h - autoconf suggests testing, don't know anywhere without it\n#   sys/times.h - not in mingw\n#\n# inttypes.h, stdint.h, unistd.h and sys/types.h are already in the autoconf\n# default tests\n#\nAC_CHECK_HEADERS(fenv.h fcntl.h float.h langinfo.h locale.h nl_types.h sys/mman.h sys/param.h sys/processor.h sys/pstat.h sys/sysinfo.h sys/systemcfg.h sys/time.h sys/times.h)\n\n# On SunOS, sys/resource.h needs sys/time.h (for struct timeval)\nAC_CHECK_HEADERS(sys/resource.h,,,\n[#if TIME_WITH_SYS_TIME\n# include <sys/time.h>\n# include <time.h>\n#else\n# if HAVE_SYS_TIME_H\n#  include <sys/time.h>\n# else\n#  include <time.h>\n# endif\n#endif])\n\n# On NetBSD and OpenBSD, sys/sysctl.h needs sys/param.h for various constants\nAC_CHECK_HEADERS(sys/sysctl.h,,,\n[#if HAVE_SYS_PARAM_H\n# include <sys/param.h>\n#endif])\n\n# Reasons for testing:\n#   optarg - not declared in mingw\n#   fgetc, fscanf, ungetc, vfprintf - not declared in SunOS 4\n#   sys_errlist, sys_nerr - not declared in SunOS 4\n#\n# optarg should be in unistd.h and the rest in stdio.h, both of which are\n# in the autoconf default includes.\n#\n# sys_errlist and sys_nerr are supposed to be in <errno.h> on SunOS according\n# to the man page (but aren't), in glibc they're in stdio.h.\n#\nAC_CHECK_DECLS([fgetc, fscanf, optarg, ungetc, vfprintf])\nAC_CHECK_DECLS([sys_errlist, sys_nerr], , ,\n[#include <stdio.h>\n#include <errno.h>])\n\n# Reasons for testing:\n#   intmax_t       - C99\n#   long double    - not in the HP bundled K&R cc\n#   long long      - only in reasonably recent compilers\n#   quad_t         - BSD specific\n#   uint_least32_t - C99\n#\n# the default includes are sufficient for all these types\n#\nAC_CHECK_TYPES([intmax_t, uintmax_t, long double, long long, quad_t, uint_least32_t])\n\nAC_C_STRINGIZE\n\n# FIXME: Really want #ifndef __cplusplus around the #define volatile\n# replacement autoconf gives, since volatile is always available in C++.\n# But we don't use it in C++ currently.\nAC_C_VOLATILE\n\n# AC_C_RESTRICT\n\nGMP_C_STDARG\nGMP_C_ATTRIBUTE_CONST\nGMP_C_ATTRIBUTE_MALLOC\nGMP_C_ATTRIBUTE_MODE\nGMP_C_ATTRIBUTE_NORETURN\n\nGMP_H_EXTERN_INLINE\n\n# from libtool\nLT_LIB_M\nAC_SUBST(LIBM)\n\nGMP_FUNC_ALLOCA\nGMP_OPTION_ALLOCA\n\nGMP_H_HAVE_FILE\n\nAC_C_BIGENDIAN(\n  [AC_DEFINE(HAVE_LIMB_BIG_ENDIAN, 1)\n   GMP_DEFINE_RAW(\"define_not_for_expansion(\\`HAVE_LIMB_BIG_ENDIAN')\", POST)],\n  [AC_DEFINE(HAVE_LIMB_LITTLE_ENDIAN, 1)\n   GMP_DEFINE_RAW(\"define_not_for_expansion(\\`HAVE_LIMB_LITTLE_ENDIAN')\", POST)\n  ], [:])\nAH_VERBATIM([HAVE_LIMB],\n[/* Define one of these to 1 for the endianness of `mp_limb_t'.\n   If the endianness is not a simple big or little, or you don't know what\n   it is, then leave both undefined. */\n#undef HAVE_LIMB_BIG_ENDIAN\n#undef HAVE_LIMB_LITTLE_ENDIAN])\n\nGMP_C_DOUBLE_FORMAT\n\n\n# Reasons for testing:\n#   alarm - not in mingw\n#   attr_get - IRIX specific\n#   clock_gettime - not in glibc 2.2.4, only very recent systems\n#   cputime - not in glibc\n#   getsysinfo - OSF specific\n#   getrusage - not in mingw\n#   gettimeofday - not in mingw\n#   mmap - not in mingw, djgpp\n#   nl_langinfo - X/Open standard only, not in djgpp for instance\n#   obstack_vprintf - glibc specific\n#   processor_info - solaris specific\n#   pstat_getprocessor - HPUX specific (10.x and up)\n#   raise - an ANSI-ism, though probably almost universal by now\n#   read_real_time - AIX specific\n#   strerror - not in SunOS\n#   strnlen - glibc extension (some other systems too)\n#   syssgi - IRIX specific\n#   times - not in mingw\n#\n# clock_gettime is in librt on *-*-osf5.1.  We could look for it\n# there, but that's not worth bothering with unless it has a decent\n# resolution (in a quick test clock_getres said only 1 millisecond).\n#\n# AC_FUNC_STRNLEN is not used because we don't want the AC_LIBOBJ\n# replacement setups it gives.  It detects a faulty strnlen on AIX, but\n# missing out on that test is ok since our only use of strnlen is in\n# __gmp_replacement_vsnprintf which is not required on AIX since it has a\n# vsnprintf.\n#\nAC_CHECK_FUNCS(alarm attr_get clock clock_gettime cputime getpagesize getrusage gettimeofday getsysinfo localeconv mmap nl_langinfo obstack_vprintf popen processor_info pstat_getprocessor raise read_real_time syssgi strerror strnlen strtol strtoul sysconf sysctl sysctlbyname times)\n\n# mingw returns OK on mprotect , but it's broken\n#\ncase $host in\n  *-pc-msys|*-*-mingw*)\n    ;;\n  *)\n    AC_CHECK_FUNCS(mprotect)\nesac\n\nGMP_FUNC_VSNPRINTF\nGMP_FUNC_SSCANF_WRITABLE_INPUT\n\n# Reasons for checking:\n#   pst_processor psp_iticksperclktick - not in hpux 9\n#\nAC_CHECK_MEMBER(struct pst_processor.psp_iticksperclktick,\n                [AC_DEFINE(HAVE_PSP_ITICKSPERCLKTICK, 1,\n[Define to 1 if <sys/pstat.h> `struct pst_processor' exists\nand contains `psp_iticksperclktick'.])],,\n                [#include <sys/pstat.h>])\n\n# C++ tests, when required\n#\nif test $enable_cxx = yes; then\n  AC_LANG_PUSH(C++)\n\n  # Reasons for testing:\n  #   <sstream> - not in g++ 2.95.2\n  #   std::locale - not in g++ 2.95.4\n  #\n  AC_CHECK_HEADERS([sstream])\n  AC_CHECK_TYPES([std::locale],,,[#include <locale>])\n\n  AC_LANG_POP(C++)\nfi\n\n\n# Pick the correct source files in $path and link them to mpn/.\n# $gmp_mpn_functions lists all functions we need.\n#\n# The rule is to find a file with the function name and a .asm, .S,\n# .s, or .c extension.  Certain multi-function files with special names\n# can provide some functions too.  (mpn/Makefile.am passes\n# -DOPERATION_<func> to get them to generate the right code.)\n\n# Note: $gmp_mpn_functions must have mod_1 before preinv_mod_1 so the former\n#       can optionally provide the latter as an extra entrypoint.  Likewise\n#       divrem_1 and preinv_divrem_1.\n\ngmp_mpn_functions_optional=\"umul udiv copyi copyd com_n neg_n\n  and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n\t\t\t\\\n  invert_limb sqr_diagonal lshift1 rshift1 lshift2 rshift2\t\\\n  mul_2 mul_3 mul_4 store not karaadd karasub half double\t\t\\\n  addmul_2 addmul_3 addmul_4 addmul_5 addmul_6 addmul_7 addmul_8\t\\\n  addlsh1_n sublsh1_n rsh1add_n rsh1sub_n sumdiff_n lshiftc\t\t\\\n  addlsh_n sublsh_n inclsh_n declsh_n nsumdiff_n\"\n\ngmp_mpn_functions=\"$extra_functions\t\t\t\t\t   \\\n  add add_1 add_n sub sub_1 sub_n mul_1 addmul_1 submul_1 lshift rshift    \\\n  addadd_n addsub_n subadd_n\t\t\\\n  comb_tables divexact_1 divexact_by3c divexact_byff divexact_byfobm1 divisible_p divrem divrem_1 divrem_2       \\\n  divrem_euclidean_qr_1 divrem_euclidean_qr_2 divrem_euclidean_r_1\t\\\n  divrem_hensel_qr_1 divrem_hensel_qr_1_1 divrem_hensel_qr_1_2\t\t\\\n  divrem_hensel_r_1 divrem_hensel_rsh_qr_1 rsh_divrem_hensel_qr_1\t\\\n  rsh_divrem_hensel_qr_1_1 rsh_divrem_hensel_qr_1_2\t\t\t\\\n  add_err1_n add_err2_n sub_err1_n sub_err2_n\t\t\t\t   \\\n  fib2_ui mod_1 mod_34lsub1 modexact_1c_odd preinv_divrem_1 preinv_mod_1 dump\t\t   \\\n  gcd_subdiv_step gcdext_lehmer gcdext_1 hgcd matrix22_mul hgcd2            \\\n  hgcd2_jacobi hgcd_appr hgcd_jacobi hgcd_matrix hgcd_reduce hgcd_step \\\n  matrix22_mul1_inverse_vector \\\n  mod_1_1 mod_1_2 mod_1_3 tdiv_q mp_bases fib_table\t\t\t\\\n  mulmid_basecase mulmid mulmid_n toom42_mulmid\tmulmod_bexpp1 mulmod_2expm1 \\\n  mulmod_2expp1_basecase mul_fft \\\n  mul mul_n mul_basecase sqr_basecase random random2 \t   \\\n  pow_1 powlo powm binvert \\\n  urandomb urandomm randomb rrandom invert \\\n  rootrem sizeinbase sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp perfect_square_p \\\n  bdivmod gcd gcd_1 gcdext tdiv_qr jacobi_base jacobi jacobi_2 get_d  \\\n  mullow_n mulhigh_n mullow_n_basecase mullow_basecase \\\n  redc_1 redc_2 redc_n \\\n  sb_divappr_q toom3_mul toom3_mul_n toom4_mul toom4_mul_n \\\n  dc_div_q dc_divappr_q sb_div_q sb_div_qr dc_div_qr dc_div_qr_n inv_divappr_q_n \\\n  inv_divappr_q inv_div_q inv_div_qr inv_div_qr_n rootrem_basecase \\\n  toom_eval_dgr3_pm1 toom_eval_dgr3_pm2 toom_eval_pm1 toom_eval_pm2 \\\n  toom_eval_pm2exp toom_eval_pm2rexp toom_interpolate_16pts \\\n  toom8_sqr_n toom8h_mul toom_couple_handling sb_bdiv_q sb_bdiv_qr \\\n  dc_bdiv_q_n dc_bdiv_q dc_bdiv_qr dc_bdiv_qr_n divexact zero zero_p $gmp_mpn_functions_optional\"\n\n# should be able to remove the mulfunc below\ndefine(GMP_MULFUNC_CHOICES,\n[# functions that can be provided by multi-function files\ntmp_mulfunc=\ncase $tmp_fn in\n  and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)\n                     tmp_mulfunc=\"logops_n\"  ;;\nesac\n])\n\n# the list of all object files used by mpn/Makefile.in and the\n# top-level Makefile.in, respectively\nmpn_objects=\nmpn_objs_in_libgmp=\n\n# links from the sources, to be removed by \"make distclean\"\ngmp_srclinks=\n\n\n# mpn_relative_top_srcdir is $top_srcdir, but for use from within the mpn\n# build directory.  If $srcdir is relative then we use a relative path too,\n# so the two trees can be moved together.\ncase $srcdir in\n  [[\\\\/]* | ?:[\\\\/]*])  # absolute, as per autoconf\n    mpn_relative_top_srcdir=$srcdir ;;\n  *)                    # relative\n    mpn_relative_top_srcdir=../$srcdir ;;\nesac\n\n\ndefine(MPN_SUFFIXES,[as asm S s c])\n\n\ndnl  Usage: GMP_FILE_TO_FUNCTION_BASE(func,file)\ndnl\ndnl  Set $func to the function base name for $file, eg. dive_1 gives\ndnl  divexact_1.\ndnl\ndefine(GMP_FILE_TO_FUNCTION,\n[case $$2 in\n  *)         \t$1=$$2 ;;\nesac\n])\n\n# Fat binary setups.\n#\n# We proceed through each $fat_path directory, and look for $fat_function\n# routines there.  Those found are incorporated in the build by generating a\n# little mpn/<foo>.asm or mpn/<foo>.c file in the build directory, with\n# suitable function renaming, and adding that to $mpn_objects (the same as a\n# normal mpn file).\n#\n# fat.h is generated with macros to let internal calls to each $fat_function\n# go directly through __gmpn_cpuvec, plus macros and declarations helping to\n# setup that structure, on a per-directory basis ready for\n# mpn/<cpu>/fat/fat.c.\n#\n# fat.h includes thesholds listed in $fat_thresholds, extracted from\n# gmp-mparam.h in each directory.  An overall maximum for each threshold is\n# established, for use in making fixed size arrays of temporary space.\n# (Eg. MUL_TOOM3_THRESHOLD_LIMIT used by mpn/generic/mul.c.)\n#\n# It'd be possible to do some of this manually, but when there's more than a\n# few functions and a few directories it becomes very tedious, and very\n# prone to having some routine accidentally omitted.  On that basis it seems\n# best to automate as much as possible, even if the code to do so is a bit\n# ugly.\n#\n\n\nif test -n \"$fat_path\"; then\n  if test \"$ABI\" = 64; then\n    fat_path=\"$fat_path_64\"\n  fi\n\n  # Usually the mpn build directory is created with mpn/Makefile\n  # instantiation, but we want to write to it sooner.\n  mkdir mpn 2>/dev/null\n\n  echo \"/* fat.h - setups for fat binaries.\" >fat.h\n  echo \"   Generated by configure - DO NOT EDIT.  */\" >>fat.h\n\n  AC_DEFINE(WANT_FAT_BINARY, 1, [Define to 1 when building a fat binary.])\n  GMP_DEFINE(WANT_FAT_BINARY, yes)\n\n  # Don't want normal copies of fat functions\n  for tmp_fn in $fat_functions; do\n    GMP_REMOVE_FROM_LIST(gmp_mpn_functions, $tmp_fn)\n    GMP_REMOVE_FROM_LIST(gmp_mpn_functions_optional, $tmp_fn)\n  done\n\n  for tmp_fn in $fat_functions; do\n    GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)\n    echo \"\n#ifndef OPERATION_$tmp_fn\n#undef  mpn_$tmp_fbase\n#define mpn_$tmp_fbase  (*__gmpn_cpuvec.$tmp_fbase)\n#endif\nDECL_$tmp_fbase (__MPN(${tmp_fbase}_init));\" >>fat.h\n    # encourage various macros to use fat functions\n    AC_DEFINE_UNQUOTED(HAVE_NATIVE_$tmp_fbase)\n  done\n\n  echo \"\" >>fat.h\n  echo \"/* variable thresholds */\" >>fat.h\n  for tmp_tn in $fat_thresholds; do\n    echo \"#undef  $tmp_tn\" >>fat.h\n    echo \"#define $tmp_tn  CPUVEC_THRESHOLD (`echo $tmp_tn | tr '[A-Z]' '[a-z]'`)\" >>fat.h\n  done\n\n  echo \"\n/* Copy all fields into __gmpn_cpuvec.\n   memcpy is not used because it might operate byte-wise (depending on its\n   implemenation), and we need the function pointer writes to be atomic.\n   \"volatile\" discourages the compiler from trying to optimize this.  */\n#define CPUVEC_INSTALL(vec) \\\\\n  do { \\\\\n    volatile struct cpuvec_t *p = &__gmpn_cpuvec; \\\\\" >>fat.h\n  for tmp_fn in $fat_functions; do\n    GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)\n    echo \"    p->$tmp_fbase = vec.$tmp_fbase; \\\\\" >>fat.h\n  done\n  for tmp_tn in $fat_thresholds; do\n    tmp_field_name=`echo $tmp_tn | tr '[[A-Z]]' '[[a-z]]'`\n    echo \"    p->$tmp_field_name = vec.$tmp_field_name; \\\\\" >>fat.h\n  done\n  echo \"  } while (0)\" >>fat.h\n\n  echo \"\n/* A helper to check all fields are filled. */\n#define ASSERT_CPUVEC(vec) \\\\\n  do { \\\\\" >>fat.h\n  for tmp_fn in $fat_functions; do\n    GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)\n    echo \"    ASSERT (vec.$tmp_fbase != NULL); \\\\\" >>fat.h\n  done\n  for tmp_tn in $fat_thresholds; do\n    tmp_field_name=`echo $tmp_tn | tr '[[A-Z]]' '[[a-z]]'`\n    echo \"    ASSERT (vec.$tmp_field_name != 0); \\\\\" >>fat.h\n  done\n  echo \"  } while (0)\" >>fat.h\n\n  echo \"\n/* Call ITERATE(field) for each fat threshold field. */\n#define ITERATE_FAT_THRESHOLDS() \\\\\n  do { \\\\\" >>fat.h\n  for tmp_tn in $fat_thresholds; do\n    tmp_field_name=`echo $tmp_tn | tr '[[A-Z]]' '[[a-z]]'`\n    echo \"    ITERATE ($tmp_tn, $tmp_field_name); \\\\\" >>fat.h\n  done\n  echo \"  } while (0)\" >>fat.h\n\n  for tmp_dir in $fat_path; do\n    CPUVEC_SETUP=\n    THRESH_ASM_SETUP=\n    echo \"\" >>fat.h\n    GMP_FAT_SUFFIX(tmp_suffix, $tmp_dir)\n\n    # In order to keep names unique on a DOS 8.3 filesystem, use a prefix\n    # (rather than a suffix) for the generated file names, and abbreviate.\n    case $tmp_suffix in\n      pentium)       tmp_prefix=p   ;;\n      pentium_mmx)   tmp_prefix=pm  ;;\n      p6_mmx)        tmp_prefix=p2  ;;\n      p6_p3mmx)      tmp_prefix=p3  ;;\n      pentium4)      tmp_prefix=p4  ;;\n      pentium4_mmx)  tmp_prefix=p4m ;;\n      pentium4_sse2) tmp_prefix=p4s ;;\n      k6_mmx)        tmp_prefix=k6m ;;\n      k6_k62mmx)     tmp_prefix=k62 ;;\n      k7_mmx)        tmp_prefix=k7m ;;\n      *)             tmp_prefix=$tmp_suffix ;;\n    esac\n\n    # Extract desired thresholds from gmp-mparam.h file in this directory,\n    # if prsent.\n    tmp_mparam=$srcdir/mpn/$tmp_dir/gmp-mparam.h\n    if test -f $tmp_mparam; then\n      for tmp_tn in $fat_thresholds; do\n        tmp_thresh=`sed -n \"s/^#define $tmp_tn[ \t]*\\\\([0-9][0-9]*\\\\).*$/\\\\1/p\" $tmp_mparam`\n        if test -n \"$tmp_thresh\"; then\n          THRESH_ASM_SETUP=[\"${THRESH_ASM_SETUP}define($tmp_tn,$tmp_thresh)\n\"]\n          CPUVEC_SETUP=\"$CPUVEC_SETUP    decided_cpuvec.`echo $tmp_tn | tr '[[A-Z]]' '[[a-z]]'` = $tmp_thresh; \\\\\n\"\n          eval tmp_limit=\\$${tmp_tn}_LIMIT\n          if test -z \"$tmp_limit\"; then\n            tmp_limit=0\n          fi\n          if test $tmp_thresh -gt $tmp_limit; then\n            eval ${tmp_tn}_LIMIT=$tmp_thresh\n          fi\n        fi\n      done\n    fi\n\n    for tmp_fn in $fat_functions; do\n      GMP_MULFUNC_CHOICES\n\n      for tmp_base in $tmp_fn $tmp_mulfunc; do\n        for tmp_ext in MPN_SUFFIXES; do\n          tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext\n          if test -f $tmp_file; then\n\n            mpn_objects=\"$mpn_objects ${tmp_prefix}_$tmp_fn.lo\"\n            mpn_objs_in_libgmp=\"$mpn_objs_in_libgmp mpn/${tmp_prefix}_$tmp_fn.lo\"\n\n            GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)\n\n            # carry-in variant, eg. divrem_1c or modexact_1c_odd\n            case $tmp_fbase in\n              *_1*) tmp_fbasec=`echo $tmp_fbase | sed 's/_1/_1c/'` ;;\n              *)    tmp_fbasec=${tmp_fbase}c ;;\n            esac\n\n            # Create a little file doing an include from srcdir.  The\n            # OPERATION and renamings aren't all needed all the time, but\n            # they don't hurt if unused.\n            #\n            # FIXME: Should generate these via config.status commands.\n            # Would need them all in one AC_CONFIG_COMMANDS though, since\n            # that macro doesn't accept a set of separate commands generated\n            # by shell code.\n            #\n\t    gmp_srclinks=\"$gmp_srclinks mpn/${tmp_prefix}_$tmp_fn.$tmp_ext\"\n            case $tmp_ext in\n              as)\n                echo [\";  mpn_$tmp_fbase - from $tmp_dir directory for fat binary.\n;  Generated by configure - DO NOT EDIT.\n\n%define OPERATION_$tmp_fn\n%define suffix $tmp_suffix\n\n%include \\\"$mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.as\\\"\n\"] >mpn/${tmp_prefix}_$tmp_fn.as\n                ;;\n              asm)\n                # hide the d-n-l from autoconf's error checking\n                tmp_d_n_l=d\"\"nl\n                echo [\"$tmp_d_n_l  mpn_$tmp_fbase - from $tmp_dir directory for fat binary.\n$tmp_d_n_l  Generated by configure - DO NOT EDIT.\n$tmp_d_n_l  This line is to help MINGW64\n\ndefine(OPERATION_$tmp_fn)\ndefine(__gmpn_$tmp_fbase, __gmpn_${tmp_fbase}_$tmp_suffix)\ndefine(__gmpn_$tmp_fbasec,__gmpn_${tmp_fbasec}_${tmp_suffix})\ndefine(__gmpn_preinv_${tmp_fbase},__gmpn_preinv_${tmp_fbase}_${tmp_suffix})\n\n$tmp_d_n_l  For k6 and k7 gcd_1 calling their corresponding mpn_modexact_1_odd\nifdef(\\`__gmpn_modexact_1_odd',,\n\\`define(__gmpn_modexact_1_odd,__gmpn_modexact_1_odd_${tmp_suffix})')\n\n$THRESH_ASM_SETUP\ninclude][($mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.asm)\n\"] >mpn/${tmp_prefix}_$tmp_fn.asm\n                ;;\n              c)\n\n                echo [\"/* mpn_$tmp_fbase - from $tmp_dir directory for fat binary.\n   Generated by configure - DO NOT EDIT. */\n\n#define OPERATION_$tmp_fn 1\n#define __gmpn_$tmp_fbase           __gmpn_${tmp_fbase}_$tmp_suffix\n#define __gmpn_$tmp_fbasec          __gmpn_${tmp_fbasec}_${tmp_suffix}\n#define __gmpn_preinv_${tmp_fbase}  __gmpn_preinv_${tmp_fbase}_${tmp_suffix}\n\n#include \\\"$mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.c\\\"\n\"] >mpn/${tmp_prefix}_$tmp_fn.c\n                ;;\n            esac\n\n            # Prototype, and append to CPUVEC_SETUP for this directory.\n            echo \"DECL_$tmp_fbase (__gmpn_${tmp_fbase}_$tmp_suffix);\" >>fat.h\n            CPUVEC_SETUP=\"$CPUVEC_SETUP    decided_cpuvec.$tmp_fbase = __gmpn_${tmp_fbase}_${tmp_suffix}; \\\\\n\"\n            # Ditto for any preinv variant (preinv_divrem_1, preinv_mod_1).\n            if grep \"^PROLOGUE(mpn_preinv_$tmp_fn)\\|GLOBAL_FUNC mpn_preinv_$tmp_fn\" $tmp_file >/dev/null; then\n              echo \"DECL_preinv_$tmp_fbase (__gmpn_preinv_${tmp_fbase}_$tmp_suffix);\" >>fat.h\n              CPUVEC_SETUP=\"$CPUVEC_SETUP    decided_cpuvec.preinv_$tmp_fbase = __gmpn_preinv_${tmp_fbase}_${tmp_suffix}; \\\\\n\"\n            fi\n          fi\n        done\n      done\n    done\n\n    # Emit CPUVEC_SETUP for this directory\n    echo \"\" >>fat.h\n    echo \"#define CPUVEC_SETUP_$tmp_suffix \\\\\" >>fat.h\n    echo \"  do { \\\\\" >>fat.h\n    echo \"$CPUVEC_SETUP  } while (0)\" >>fat.h\n  done\n\n  # Emit threshold limits\n  echo \"\" >>fat.h\n  for tmp_tn in $fat_thresholds; do\n    eval tmp_limit=\\$${tmp_tn}_LIMIT\n    echo \"#define ${tmp_tn}_LIMIT  $tmp_limit\" >>fat.h\n  done\nfi\n\n\n# Normal binary setups.\n#\n\nfor tmp_ext in MPN_SUFFIXES; do\n  eval found_$tmp_ext=no\ndone\n\nfor tmp_fn in $gmp_mpn_functions; do\n  for tmp_ext in MPN_SUFFIXES; do\n    test \"$no_create\" = yes || rm -f mpn/$tmp_fn.$tmp_ext\n  done\n\n  # mpn_preinv_divrem_1 might have been provided by divrem_1.asm, likewise\n  # mpn_preinv_mod_1 by mod_1.asm.\n  case $tmp_fn in\n  preinv_divrem_1)\n    if test \"$HAVE_NATIVE_mpn_preinv_divrem_1\" = yes; then continue; fi ;;\n  preinv_mod_1)\n    if test \"$HAVE_NATIVE_mpn_preinv_mod_1\" = yes; then continue; fi ;;\n  esac\n\n  GMP_MULFUNC_CHOICES\n\n  found=no\n  for tmp_dir in $path; do\n    for tmp_base in $tmp_fn $tmp_mulfunc; do\n      for tmp_ext in MPN_SUFFIXES; do\n        tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext\n        if test -f $tmp_file; then\n\n          # For a nails build, check if the file supports our nail bits.\n          # Generic code always supports all nails.\n          #\n          # FIXME: When a multi-function file is selected to provide one of\n          # the nails-neutral routines, like logops_n for and_n, the\n          # PROLOGUE grepping will create HAVE_NATIVE_mpn_<foo> defines for\n          # all functions in that file, even if they haven't all been\n          # nailified.  Not sure what to do about this, it's only really a\n          # problem for logops_n, and it's not too terrible to insist those\n          # get nailified always.\n          #\n          if test $GMP_NAIL_BITS != 0 && test $tmp_dir != generic; then\n            case $tmp_fn in\n              and_n | ior_n | xor_n | andn_n | \\\n              copyi | copyd | \\\n              popcount | hamdist | \\\n              udiv | udiv_w_sdiv | umul | \\\n              cntlz | invert_limb)\n                # these operations are either unaffected by nails or defined\n                # to operate on full limbs\n                ;;\n              *)\n                nails=[`sed -n 's/^[ \t]*NAILS_SUPPORT(\\(.*\\))/\\1/p' $tmp_file `]\n                for n in $nails; do\n                  case $n in\n                  *-*)\n                    n_start=`echo \"$n\" | sed -n 's/\\(.*\\)-.*/\\1/p'`\n                    n_end=`echo \"$n\" | sed -n 's/.*-\\(.*\\)/\\1/p'`\n                    ;;\n                  *)\n                    n_start=$n\n                    n_end=$n\n                    ;;\n                  esac\n                  if test $GMP_NAIL_BITS -ge $n_start && test $GMP_NAIL_BITS -le $n_end; then\n                    found=yes\n                    break\n                  fi\n                done\n                if test $found != yes; then\n                  continue\n                fi\n                ;;\n            esac\n          fi\n\n          found=yes\n          eval found_$tmp_ext=yes\n\n          if test $tmp_ext = c; then\n            tmp_u='$U'\n          else\n            tmp_u=\n          fi\n\n\t  mpn_objects=\"$mpn_objects $tmp_fn$tmp_u.lo\"\n          mpn_objs_in_libgmp=\"$mpn_objs_in_libgmp mpn/$tmp_fn$tmp_u.lo\"\n          AC_CONFIG_LINKS(mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext)\n          gmp_srclinks=\"$gmp_srclinks mpn/$tmp_fn.$tmp_ext\"\n\n          # Duplicate AC_DEFINEs are harmless, so it doesn't matter\n          # that multi-function files get grepped here repeatedly.\n          # The PROLOGUE pattern excludes the optional second parameter.\n          gmp_ep=[`\n            sed -n 's/^[ \t]*MULFUNC_PROLOGUE(\\(.*\\))/\\1/p' $tmp_file ;\n            sed -n 's/^[ \t]*PROLOGUE(\\([^,]*\\).*)/\\1/p' $tmp_file ;\n            sed -n 's/^;[ \t]*PROLOGUE(\\([^,]*\\).*)/\\1/p' $tmp_file ;\n            sed -n 's/[^G]*GLOBAL_FUNC[[:space:]]*\\(.*\\)/\\1/p' $tmp_file ;\n          `]\n          for gmp_tmp in $gmp_ep; do\n            AC_DEFINE_UNQUOTED(HAVE_NATIVE_$gmp_tmp)\n            eval HAVE_NATIVE_$gmp_tmp=yes\n          done\n\n          case $tmp_fn in\n          sqr_basecase) sqr_basecase_source=$tmp_file ;;\n          esac\n\n          break\n        fi\n      done\n      if test $found = yes; then break ; fi\n    done\n    if test $found = yes; then break ; fi\n  done\n\n  if test $found = no; then\n    for tmp_optional in $gmp_mpn_functions_optional; do\n      if test $tmp_optional = $tmp_fn; then\n        found=yes\n      fi\n    done\n    if test $found = no; then\n      AC_MSG_ERROR([no version of $tmp_fn found in path: $path])\n    fi\n  fi\ndone\n\n# For mingw64 rename all *.asm files to *.as files so we then will use yasm\n# cant move them yet as the mpn directory is not created until the end.\n# Apart from defining old_gmp_srclinks (and we could do without it...),\n# what follows is not strictly needed and just make gmp_srclinks more\n# consistent.\ncase $host in\n  *-pc-msys|*-w64-mingw*|*-*-cygwin*)\n    if test \"$ABI\" = \"64\"; then\n      old_gmp_srclinks=$gmp_srclinks\n      gmp_srclinks=\n      for jay in $old_gmp_srclinks; do\n        jayext=`echo \"$jay\" | awk -F . '{print $NF}'`\n        #jayext=`/bin/echo \"${jay#*.}\"`\n        #jayext=`echo \"$jay\" | sed -n 's/.*\\.//'`\n        base=`echo \"${jay%.*}\"`\n        name=`basename $base`\n        if test \"$jayext\" = \"asm\" && test \"$name\" != \"fat_entry\"; then\n          gmp_srclinks=\"$gmp_srclinks $base.as\"\n        else\n          gmp_srclinks=\"$gmp_srclinks $jay\"\n        fi\n      done\n    fi\n  ;;\nesac\n\n# All cycle counters are .asm files currently\nif test -n \"$SPEED_CYCLECOUNTER_OBJ\"; then\n  found_asm=yes\nfi\n\ndnl  The following list only needs to have templates for those defines which\ndnl  are going to be tested by the code, there's no need to have every\ndnl  possible mpn routine.\n\nAH_VERBATIM([HAVE_NATIVE],\n[/* Define to 1 each of the following for which a native (ie. CPU specific)\n    implementation of the corresponding routine exists.  */\n#undef HAVE_NATIVE_mpn_add_n\n#undef HAVE_NATIVE_mpn_add_nc\n#undef HAVE_NATIVE_mpn_addadd_n\n#undef HAVE_NATIVE_mpn_addlsh1_n\n#undef HAVE_NATIVE_mpn_addlsh_n\n#undef HAVE_NATIVE_mpn_addlsh_nc\n#undef HAVE_NATIVE_mpn_addmul_1c\n#undef HAVE_NATIVE_mpn_addmul_2\n#undef HAVE_NATIVE_mpn_addmul_3\n#undef HAVE_NATIVE_mpn_addmul_4\n#undef HAVE_NATIVE_mpn_addmul_5\n#undef HAVE_NATIVE_mpn_addmul_6\n#undef HAVE_NATIVE_mpn_addmul_7\n#undef HAVE_NATIVE_mpn_addmul_8\n#undef HAVE_NATIVE_mpn_addsub_n\n#undef HAVE_NATIVE_mpn_sumdiff_n\n#undef HAVE_NATIVE_mpn_nsumdiff_n\n#undef HAVE_NATIVE_mpn_and_n\n#undef HAVE_NATIVE_mpn_andn_n\n#undef HAVE_NATIVE_mpn_com_n\n#undef HAVE_NATIVE_mpn_not\n#undef HAVE_NATIVE_mpn_copyd\n#undef HAVE_NATIVE_mpn_copyi\n#undef HAVE_NATIVE_mpn_declsh_n\n#undef HAVE_NATIVE_mpn_divexact_1\n#undef HAVE_NATIVE_mpn_divrem_euclidean_qr_1\n#undef HAVE_NATIVE_mpn_divrem_hensel_qr_1\n#undef HAVE_NATIVE_mpn_divrem_hensel_qr_1_1\n#undef HAVE_NATIVE_mpn_divrem_hensel_qr_1_2\n#undef HAVE_NATIVE_mpn_divrem_hensel_r_1\n#undef HAVE_NATIVE_mpn_divrem_hensel_rsh_qr_1\n#undef HAVE_NATIVE_mpn_rsh_divrem_hensel_qr_1\n#undef HAVE_NATIVE_mpn_rsh_divrem_hensel_qr_1_1\n#undef HAVE_NATIVE_mpn_rsh_divrem_hensel_qr_1_2\n#undef HAVE_NATIVE_mpn_divrem_euclidean_qr_2\n#undef HAVE_NATIVE_mpn_divrem_euclidean_r_1\n#undef HAVE_NATIVE_mpn_divrem_1\n#undef HAVE_NATIVE_mpn_divrem_1c\n#undef HAVE_NATIVE_mpn_divrem_2\n#undef HAVE_NATIVE_mpn_double\n#undef HAVE_NATIVE_mpn_gcd_1\n#undef HAVE_NATIVE_mpn_half\n#undef HAVE_NATIVE_mpn_inclsh_n\n#undef HAVE_NATIVE_mpn_invert_limb\n#undef HAVE_NATIVE_mpn_ior_n\n#undef HAVE_NATIVE_mpn_iorn_n\n#undef HAVE_NATIVE_mpn_karaadd\n#undef HAVE_NATIVE_mpn_karasub\n#undef HAVE_NATIVE_mpn_lshift1\n#undef HAVE_NATIVE_mpn_lshift2\n#undef HAVE_NATIVE_mpn_lshiftc\n#undef HAVE_NATIVE_mpn_mod_1\n#undef HAVE_NATIVE_mpn_mod_1_1\n#undef HAVE_NATIVE_mpn_mod_1_2\n#undef HAVE_NATIVE_mpn_mod_1_3\n#undef HAVE_NATIVE_mpn_mod_1c\n#undef HAVE_NATIVE_mpn_modexact_1_odd\n#undef HAVE_NATIVE_mpn_modexact_1c_odd\n#undef HAVE_NATIVE_mpn_mul_1c\n#undef HAVE_NATIVE_mpn_mul_2\n#undef HAVE_NATIVE_mpn_mul_3\n#undef HAVE_NATIVE_mpn_mul_4\n#undef HAVE_NATIVE_mpn_mul_basecase\n#undef HAVE_NATIVE_mpn_nand_n\n#undef HAVE_NATIVE_mpn_neg_n\n#undef HAVE_NATIVE_mpn_nior_n\n#undef HAVE_NATIVE_mpn_preinv_divrem_1\n#undef HAVE_NATIVE_mpn_preinv_mod_1\n#undef HAVE_NATIVE_mpn_redc_1\n#undef HAVE_NATIVE_mpn_redc_2\n#undef HAVE_NATIVE_mpn_rsh1add_n\n#undef HAVE_NATIVE_mpn_rsh1sub_n\n#undef HAVE_NATIVE_mpn_rshift1\n#undef HAVE_NATIVE_mpn_rshift2\n#undef HAVE_NATIVE_mpn_sqr_basecase\n#undef HAVE_NATIVE_mpn_sqr_diagonal\n#undef HAVE_NATIVE_mpn_store\n#undef HAVE_NATIVE_mpn_sub_n\n#undef HAVE_NATIVE_mpn_sub_nc\n#undef HAVE_NATIVE_mpn_subadd_n\n#undef HAVE_NATIVE_mpn_sublsh1_n\n#undef HAVE_NATIVE_mpn_sublsh_n\n#undef HAVE_NATIVE_mpn_sublsh_nc\n#undef HAVE_NATIVE_mpn_submul_1c\n#undef HAVE_NATIVE_mpn_umul_ppmm\n#undef HAVE_NATIVE_mpn_umul_ppmm_r\n#undef HAVE_NATIVE_mpn_udiv_qrnnd\n#undef HAVE_NATIVE_mpn_udiv_qrnnd_r\n#undef HAVE_NATIVE_mpn_xor_n\n#undef HAVE_NATIVE_mpn_xnor_n])\n\n\n# Don't demand an m4 unless it's actually needed.\nif test $found_asm = yes; then\n  GMP_PROG_M4\n  GMP_M4_M4WRAP_SPURIOUS\nelse\n  M4=m4-not-needed\nfi\n\n# Only do the GMP_ASM checks if there's a .S or .asm wanting them.\nif test $found_asm = no && test $found_S = no; then\n  gmp_asm_syntax_testing=no\nfi\n\nif test \"$gmp_asm_syntax_testing\" != no; then\n  GMP_ASM_TEXT\n  GMP_ASM_DATA\n  GMP_ASM_LABEL_SUFFIX\n  GMP_ASM_GLOBL\n  GMP_ASM_GLOBL_ATTR\n  GMP_ASM_UNDERSCORE\n  GMP_ASM_RODATA\n  GMP_ASM_TYPE\n  GMP_ASM_SIZE\n  GMP_ASM_LSYM_PREFIX\n  GMP_ASM_W32\n  GMP_ASM_ALIGN_LOG\n  case $host in\n    IA64_PATTERN)\n      GMP_ASM_IA64_ALIGN_OK\n      ;;\n    [powerpc*-*-* | power[3-9]-*-*])\n      GMP_ASM_POWERPC_PIC_ALWAYS\n      GMP_ASM_POWERPC_R_REGISTERS\n      GMP_INCLUDE_MPN(powerpc32/powerpc-defs.m4)\n      case $host in\n        *-*-aix*)\n\t  case $ABI in\n\t    64 | aix64)  GMP_INCLUDE_MPN(powerpc64/aix.m4) ;;\n            *)           GMP_INCLUDE_MPN(powerpc32/aix.m4) ;;\n          esac\n          ;;\n        *-*-linux* | *-*-*bsd*)\n\t  case $ABI in\n\t    mode64)      GMP_INCLUDE_MPN(powerpc64/elf.m4) ;;\n\t    mode32 | 32) GMP_INCLUDE_MPN(powerpc32/elf.m4) ;;\n          esac\n          ;;\n        *-*-darwin*)\n\t  case $ABI in\n\t    mode64)      GMP_INCLUDE_MPN(powerpc64/darwin.m4) ;;\n\t    mode32 | 32) GMP_INCLUDE_MPN(powerpc32/darwin.m4) ;;\n          esac\n          ;;\n        *)\n\t  # Assume unrecognized operating system is the powerpc eABI\n          GMP_INCLUDE_MPN(powerpc32/eabi.m4)\n\t  ;;\n      esac\n      ;;\n    power*-*-aix*)\n      GMP_INCLUDE_MPN(powerpc32/aix.m4)\n      ;;\n    sparcv9*-*-* | ultrasparc*-*-* | sparc64-*-*)\n      case $ABI in\n        64)\n          GMP_ASM_SPARC_REGISTER\n          ;;\n      esac\n      ;;\n    X86_PATTERN | X86_64_PATTERN)\n      GMP_ASM_ALIGN_FILL_0x90\n      case $ABI in\n        32)\n          GMP_INCLUDE_MPN(x86/x86-defs.m4)\n          GMP_ASM_COFF_TYPE\n          GMP_ASM_X86_GOT_UNDERSCORE\n          GMP_ASM_X86_SHLDL_CL\n      \t  case $enable_profiling in\n      \t    prof | gprof)  GMP_ASM_X86_MCOUNT ;;\n      \t  esac\n          case $host in\n            *-*-darwin*)\n              OBJECT_FORMAT=\"-f macho32\" ;;\n            *)\n              OBJECT_FORMAT=\"-f elf32\" ;;\n           esac\n          ;;\n\n        64)\n          GMP_INCLUDE_MPN(x86_64/x86_64-defs.m4)\n          case $host in\n            *-*-darwin*)\n              # Defined in mpn/x86_64/x86_64-defs.m4, but there currently\n              # hardcoded just for ELF, so redefine it here for Mach-O:\n\t      GMP_DEFINE_RAW([\"define(\\`JUMPTABSECT',\\`    .text')\"],POST)\n              OBJECT_FORMAT=\"-f macho64\" ;;\n            *-pc-msys|*-w64-mingw*|*-*-cygwin*)\n              OBJECT_FORMAT=\"-f x64\"  ;;\n            *)\n              OBJECT_FORMAT=\"-f elf64\" ;;\n          esac\n          ;;\n      esac\n      AC_SUBST(OBJECT_FORMAT)\n      ;;\n  esac\nfi\n\n# Create link for gmp-mparam.h.\ngmp_mparam_source=\nfor gmp_mparam_dir in $path; do\n  test \"$no_create\" = yes || rm -f gmp-mparam.h\n  tmp_file=$srcdir/mpn/$gmp_mparam_dir/gmp-mparam.h\n  if test -f $tmp_file; then\n    AC_CONFIG_LINKS(gmp-mparam.h:mpn/$gmp_mparam_dir/gmp-mparam.h)\n    gmp_srclinks=\"$gmp_srclinks gmp-mparam.h\"\n    gmp_mparam_source=$tmp_file\n    break\n  fi\ndone\nif test -z \"$gmp_mparam_source\"; then\n  AC_MSG_ERROR([no version of gmp-mparam.h found in path: $path])\nfi\n\n# Create longlong.h from the path\nlonglong_source=\nfor longlong_dir in $path; do\n  test \"$no_create\" = yes || rm -f longlong.h\n  tmp_file=$srcdir/mpn/$longlong_dir/longlong_inc.h\n  if test -f $tmp_file; then\n    AC_CONFIG_FILES(longlong.h:longlong_pre.h:mpn/$longlong_dir/longlong_inc.h:longlong_post.h)\n    gmp_srclinks=\"$gmp_srclinks longlong.h\"\n    longlong_source=$tmp_file\n    break\n  fi\ndone\nif test -z \"$longlong_source\"; then\n  AC_MSG_ERROR([no version of longlong_inc.h found in path: $path])\nfi\n\n# For a helpful message from tune/tuneup.c\ngmp_mparam_suggest=$gmp_mparam_source\nif test \"$gmp_mparam_dir\" = generic; then\n  for i in $path; do break; done\n  if test \"$i\" != generic; then\n    gmp_mparam_suggest=\"new file $srcdir/mpn/$i/gmp-mparam.h\"\n  fi\nfi\nAC_DEFINE_UNQUOTED(GMP_MPARAM_H_SUGGEST, \"$gmp_mparam_source\",\n[The gmp-mparam.h file (a string) the tune program should suggest updating.])\n\n\n# Copy any SQR_KARATSUBA_THRESHOLD from gmp-mparam.h to config.m4.\n# Some versions of sqr_basecase.asm use this.\n# Fat binaries do this on a per-file basis, so skip in that case.\n#\nif test -z \"$fat_path\"; then\n  tmp_gmp_karatsuba_sqr_threshold=`sed -n 's/^#define SQR_KARATSUBA_THRESHOLD[ \t]*\\([0-9][0-9]*\\).*$/\\1/p' $gmp_mparam_source`\n  if test -n \"$tmp_gmp_karatsuba_sqr_threshold\"; then\n    GMP_DEFINE_RAW([\"define(<SQR_KARATSUBA_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)\"])\n  fi\nfi\n\n\n# Sizes of some types, needed at preprocessing time.\n#\n# FIXME: The assumption that BITS_PER_MP_LIMB is 8*sizeof(mp_limb_t) might\n# be slightly rash, but it's true everwhere we know of and ought to be true\n# of any sensible system.  In a generic C build, grepping LONG_BIT out of\n# <limits.h> might be an alternative, for maximum portability.\n#\nAC_CHECK_SIZEOF(unsigned short)\nAC_CHECK_SIZEOF(unsigned)\nAC_CHECK_SIZEOF(unsigned long)\nAC_CHECK_SIZEOF(uintmax_t)\nAC_CHECK_SIZEOF(mp_limb_t, , GMP_INCLUDE_GMP_H)\nif test \"$ac_cv_sizeof_mp_limb_t\" = 0; then\n  AC_MSG_ERROR([Oops, mp_limb_t doesn't seem to work])\nfi\nAC_SUBST(BITS_PER_MP_LIMB, `expr 8 \\* $ac_cv_sizeof_mp_limb_t`)\nGMP_DEFINE_RAW([\"define(<SIZEOF_UNSIGNED>,<$ac_cv_sizeof_unsigned>)\"])\n\n# Check compiler limb size matches gmp-mparam.h\n#\n# FIXME: Some of the cycle counter objects in the tune directory depend on\n# the size of ulong, it'd be possible to check that here, though a mismatch\n# probably wouldn't want to be fatal, none of the libmpir assembler code\n# depends on ulong.\n#\nmparam_bits=[`sed -n 's/^#define BITS_PER_MP_LIMB[ \t][ \t]*\\([0-9]*\\).*$/\\1/p' $gmp_mparam_source`]\nif test -n \"$mparam_bits\" && test \"$mparam_bits\" -ne $BITS_PER_MP_LIMB; then\n  if test \"$test_CFLAGS\" = set; then\n    AC_MSG_ERROR([Oops, mp_limb_t is $BITS_PER_MP_LIMB bits, but the assembler code\nin this configuration expects $mparam_bits bits.\nYou appear to have set \\$CFLAGS, perhaps you also need to tell GMP the\nintended ABI, see \"ABI and ISA\" in the manual.])\n  else\n    AC_MSG_ERROR([Oops, mp_limb_t is $BITS_PER_MP_LIMB bits, but the assembler code\nin this configuration expects $mparam_bits bits.])\n  fi\nfi\n\nGMP_DEFINE_RAW([\"define(<GMP_LIMB_BITS>,$BITS_PER_MP_LIMB)\"])\nGMP_DEFINE_RAW([\"define(<GMP_NAIL_BITS>,$GMP_NAIL_BITS)\"])\nGMP_DEFINE_RAW([\"define(<GMP_NUMB_BITS>,eval(GMP_LIMB_BITS-GMP_NAIL_BITS))\"])\n\nAC_SUBST(mpn_objects)\nAC_SUBST(mpn_objs_in_libgmp)\n\n\n\nAC_SUBST(gmp_srclinks)\n\n\n# A recompiled sqr_basecase for use in the tune program, if necessary.\nTUNE_SQR_OBJ=\ntest -d tune || mkdir tune\ncase $sqr_basecase_source in\n  *.asm | *.as)\n    sqr_max=[`sed -n 's/^def...(SQR_KARATSUBA_THRESHOLD_MAX, *\\([0-9]*\\))/\\1/p' $sqr_basecase_source`]\n    if test -n \"$sqr_max\"; then\n      TUNE_SQR_OBJ=sqr_asm.o\n      AC_DEFINE_UNQUOTED(TUNE_SQR_KARATSUBA_MAX,$sqr_max,\n      [Maximum size the tune program can test for SQR_KARATSUBA_THRESHOLD])\n    fi\n    cat >tune/sqr_basecase.c <<EOF\n/* not sure that an empty file can compile, so put in a dummy */\nint sqr_basecase_dummy;\nEOF\n    ;;\n  *.c)\n    TUNE_SQR_OBJ=\n    AC_DEFINE(TUNE_SQR_KARATSUBA_MAX,SQR_KARATSUBA_MAX_GENERIC)\n    cat >tune/sqr_basecase.c <<EOF\n#define TUNE_PROGRAM_BUILD 1\n#define TUNE_PROGRAM_BUILD_SQR 1\n#include \"mpn/sqr_basecase.c\"\nEOF\n    ;;\nesac\nAC_SUBST(TUNE_SQR_OBJ)\n\n# Create config.m4.\nGMP_FINISH\n\ncase $host in\n  *-pc-msys|*-w64-mingw*|*-*-cygwin*)\n    if test \"$ABI\" = \"64\"; then\n      if test $enable_fat = yes; then\n        YASM_MAC_INC=yasm_macwin.inc.fat\n      else\n        YASM_MAC_INC=yasm_macwin.inc.nofat\n      fi\n    else\n      if test $enable_fat = yes; then\n        YASM_MAC_INC=yasm_mac.inc.fat\n      else\n        YASM_MAC_INC=yasm_mac.inc.nofat\n      fi\n    fi\n  ;;\n  *-*-*)\n    if test $enable_fat = yes; then\n      YASM_MAC_INC=yasm_mac.inc.fat\n    else\n      YASM_MAC_INC=yasm_mac.inc.nofat\n    fi\n  ;;\nesac\nAC_CONFIG_LINKS(yasm_mac.inc:$YASM_MAC_INC)\n\n# Create Makefiles\n# FIXME: Upcoming version of autoconf/automake don't like broken lines.\n#        Right now automake isn't accepting the new AC_CONFIG_FILES scheme.\n\nAC_CONFIG_FILES(Makefile mpf/Makefile mpn/Makefile fft/Makefile mpq/Makefile mpz/Makefile printf/Makefile scanf/Makefile cxx/Makefile tests/Makefile tests/devel/Makefile tests/mpf/Makefile tests/mpn/Makefile tests/fft/Makefile tests/mpq/Makefile tests/mpz/Makefile tests/rand/Makefile tests/misc/Makefile tests/cxx/Makefile doc/Makefile tune/Makefile)\nAC_CONFIG_FILES(mpir.h:gmp-h.in)\nif test $enable_gmpcompat = yes; then\n  AC_CONFIG_FILES(gmp.h:gmp-h.in)\n  if test $enable_cxx = yes ; then\n    AC_CONFIG_FILES(gmpxx.h:mpirxx.h)\n  fi\nfi\n\nAC_OUTPUT\n\n# now we have created and populated the mpn directory we can rename the files\n# and post process the \"m4 macros\" into \"yasm macros\"\ncase $host in\n  *-pc-msys|*-w64-mingw*|*-*-cygwin*)\n    if test \"$ABI\" = \"64\"; then\n      echo \"Renaming assembly files on Win64...\"\n      for jay in $old_gmp_srclinks; do\n        jayext=`echo \"$jay\" | awk -F . '{print $NF}'`\n        #jayext=`/bin/echo \"${jay#*.}\"`\n        #jayext=`echo \"$jay\" | sed -n 's/.*\\.//'`\n        base=`echo \"${jay%.*}\"`\n        name=`basename $base`\n        if test \"$jayext\" = \"asm\" && test \"$name\" != \"fat_entry\" ; then\n          got=`grep -ce \"d\"\"nl  This line is to help MINGW64\" $jay`\n          if test \"$got\" = \"1\" ; then\n            cat $jay | sed s/\"include(\\(.*\\))\"/\"%include \\\"\\1\\\"\"/ | sed s/\"d\"\"efine(\\(.*\\),\\(.*\\))\"/\"%d\"\"efine \\1 \\2\"/ | sed s/\"d\"\"efine(\\(.*\\))\"/\"%d\"\"efine \\1\"/ | sed s/\"d\"\"nl\\(.*\\)\"/\"; \\1\"/ | sed s/ifdef.*// | sed s/\"\\`.*\"// > $base.as\n            rm -f $jay\n          else\n            mv $jay $base.as\n          fi\n        fi\n      done\n# the file x86_64_divrem_euclidean_qr_1.as should detect the defined macro EXCLUDE_PREINV , but for some reason it doesnt, so we hack it\n      if test $enable_fat = yes; then\n        echo \"%d\"\"efine EXCLUDE_PREINV 1\" > mpn/x86_64_divrem_euclidean_qr_1.tmp\n        cat mpn/x86_64_divrem_euclidean_qr_1.as >> mpn/x86_64_divrem_euclidean_qr_1.tmp\n        mv mpn/x86_64_divrem_euclidean_qr_1.tmp mpn/x86_64_divrem_euclidean_qr_1.as\n      fi\n    fi\n  ;;\nesac\n"
  },
  {
    "path": "configure.yasm",
    "content": "./configure\n"
  },
  {
    "path": "cpuid.c",
    "content": "/*\n\nCopyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006\nFree Software Foundation, Inc.\n\nCopyright 2008 William Hart.\n\nCopyright 2009,2010,2011 Jason Moxham\n\nCopyright 2010 Gonzalo Tornaria\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#define WANT_FAKE_BUILD_CPU 0\n#define WANT_FAKE_FAT_CPU 0\n\n#define FAKE_BUILD_CPU_VENDOR \"MPIRSNOTFAKE\"\n#define FAKE_BUILD_CPU_FAMILY 0\n#define FAKE_BUILD_CPU_EXTFAMILY 0\n#define FAKE_BUILD_CPU_MODEL 0\n#define FAKE_BUILD_CPU_EXTMODEL 0\n\n#define FAKE_FAT_CPU_VENDOR \"MPIRSNOTFAKE\"\n#define FAKE_FAT_CPU_FAMILY 0\n#define FAKE_FAT_CPU_EXTFAMILY 0\n#define FAKE_FAT_CPU_MODEL 0\n#define FAKE_FAT_CPU_EXTMODEL 0\n\n#if INFAT\n#define WANT_FAKE_CPU\t\t\tWANT_FAKE_FAT_CPU\n#define\tFAKE_CPU_VENDOR\t\t\tFAKE_FAT_CPU_VENDOR\n#define\tFAKE_CPU_FAMILY\t\t\tFAKE_FAT_CPU_FAMILY\n#define FAKE_CPU_EXTFAMILY\t\tFAKE_FAT_CPU_EXTFAMILY\n#define FAKE_CPU_MODEL\t\t\tFAKE_FAT_CPU_MODEL\n#define FAKE_CPU_EXTMODEL\t\tFAKE_FAT_CPU_EXTMODEL\n#endif\n#if CONFIG_GUESS\n#define WANT_FAKE_CPU\t\t\tWANT_FAKE_BUILD_CPU\n#define\tFAKE_CPU_VENDOR\t\t\tFAKE_BUILD_CPU_VENDOR\n#define\tFAKE_CPU_FAMILY\t\t\tFAKE_BUILD_CPU_FAMILY\n#define FAKE_CPU_EXTFAMILY\t\tFAKE_BUILD_CPU_EXTFAMILY\n#define FAKE_CPU_MODEL\t\t\tFAKE_BUILD_CPU_MODEL\n#define FAKE_CPU_EXTMODEL\t\tFAKE_BUILD_CPU_EXTMODEL\n#endif\n\n#if WANT_FAKE_CPU\nlong fake_cpuid(char *p,unsigned int level)\n{unsigned int eax,feat801=0,feat2=0,family,extfamily,model,extmodel;\n char *vendor;\n\n// can set feat801=1 for netburstlahf\n// can set feat2=256 for prescott\nvendor=FAKE_CPU_VENDOR;\nfamily=FAKE_CPU_FAMILY;\nextfamily=FAKE_CPU_EXTFAMILY;\nmodel=FAKE_CPU_MODEL;\nextmodel=FAKE_CPU_EXTMODEL;\nmemset(p,0,12);\nif(level==0){strncpy(p,vendor,12);return 1;}\nif(level==1){eax=0+(model<<4)+(family<<8)+(0<<12)+(extmodel<<16)+(extfamily<<20);memcpy(p,&feat2,4);return eax;}\nif(level==0x80000000){return 1;}\nif(level==0x80000001){memcpy(p+8,&feat801,4);return 0;}\nreturn 0;}\n#endif\n\n#if WANT_FAKE_CPU \n#define __gmpn_cpuid fake_cpuid\n#else\n#if CONFIG_GUESS\n#define __gmpn_cpuid(_x,_y)\tcpuid(_x,_y,1,0,0)\n#endif\n#endif\n\n#define FEAT_HAS_AVX 0x10000000\n\n#if CONFIG_GUESS\n// use's the stringinzing directive  #x   ie #x expands to \"x\"\n#define CPUIS(x)\tmodelstr=#x\nchar*\t__gmpn_cpu(int *vector){\n#endif\n#if INFAT\n#define CPUIS(x)\tdo{TRACE(printf(\"  \"#x\"\\n\"));CPUSETUP_##x;}while(0)\nchar*\t__gmpn_cpu(struct cpuvec_t *vector){\nstruct cpuvec_t decided_cpuvec;\n#endif\n  char vendor_string[13];\n  char features[12];\n  long fms;\n  int family, model, stepping;\n  char *modelstr=0;\n\n#if INFAT\nmemset (&decided_cpuvec, '\\0', sizeof (decided_cpuvec));\nCPUVEC_SETUP_fat;\n#if FAT32\nCPUVEC_SETUP_x86;\n#endif\n#if FAT64\nCPUVEC_SETUP_x86_64;\n#endif\n#endif\n  __gmpn_cpuid (vendor_string, 0);\n  vendor_string[12] = 0;\n\n  fms = __gmpn_cpuid (features, 1);\n\n  family = ((fms >> 8) & 15) + ((fms >> 20) & 0xff);\n  model = ((fms >> 4) & 15) + ((fms >> 12) & 0xf0);\n  stepping = fms & 15;\n\n  #if CONFIG_GUESS_64BIT\n  modelstr = \"x86_64\";\n  #else\n  modelstr = \"i486\";// shouldn't we make this x86??\n  #endif\n\n  if (strcmp (vendor_string, \"GenuineIntel\") == 0)\n    {\n      switch (family)\n\t{\n\t#if CONFIG_GUESS_32BIT || FAT32\n\tcase 5:\n\t  if (model <= 2) CPUIS(pentium);\n\t  if (model >= 4) CPUIS(pentiummmx);\n\t  break;\n        #endif\n\tcase 6:\n\t  #if CONFIG_GUESS_32BIT || FAT32\n\t  if (model == 1) { CPUIS(pentiumpro);break;}\n\t  if (model <= 6) { CPUIS(pentium2);break;}\n\t  if (model <= 13){ CPUIS(pentium3);break;}\n\t  if (model == 14){ CPUIS(core);break;}\n\t  if (model == 16){ CPUIS(core);break;}\n\t  #endif\n\t  if (model == 15){ CPUIS(core2);break;}\n\t  if (model == 17){ CPUIS(penryn);break;}\n\t  if (model == 22){ CPUIS(core2);break;}\n\t  if (model == 23){ CPUIS(penryn);break;}\n\t  if (model == 25){ CPUIS(westmere);break;}\n\t  if (model == 26){ CPUIS(nehalem);break;}\n\t  if (model == 28){ CPUIS(atom);break;}// 45nm\n\t  if (model == 29){ CPUIS(penryn);break;}\n\t  if (model == 30){ CPUIS(nehalem);break;}\n\t  if (model == 31){ CPUIS(nehalem);break;}\n\t  if (model == 37){ CPUIS(westmere);break;}\n\t  if (model == 38){ CPUIS(atom);break;}// atom z670 tunnel creek\n\t  if (model == 39){ CPUIS(atom);break;}// Intel Atom Z2460 (Medfield platform, Penwell SoC, Saltwell core)\n\t  if (model == 42){\n        int feat = ((int *)features)[2];\n        if (feat & FEAT_HAS_AVX) { CPUIS(sandybridge);break;}\n        else { CPUIS(westmere);break;} /* Really a crippled sandybridge with no avx */\n     }\n\t  if (model == 43){ CPUIS(sandybridge);break;}\n\t  if (model == 44){ CPUIS(westmere);break;}\n\t  if (model == 45){ CPUIS(sandybridge);break;}\n\t  if (model == 46){ CPUIS(nehalem);break;}\n\t  if (model == 47){ CPUIS(westmere);break;}\n\t  if (model == 54){ CPUIS(atom);break;}//DualCore Intel Atom D2700, 2133 MHz (16 x 133) (Cedarview, Saltwell core) 32nm\n\t  if (model == 55){ CPUIS(atom);break;}\n          if (model == 58){ CPUIS(ivybridge);break;}\n\t  if (model == 60){\n          int feat = ((int *)features)[2];\n          if (feat & FEAT_HAS_AVX) { CPUIS(haswellavx);break; } /* Core i Haswell */\n          else { CPUIS(haswell);break; } /* Celeron/Pentium Haswell without AVX */\n      }\n          if (model == 61){ CPUIS(broadwell);break;}\n          if (model == 62){ CPUIS(ivybridge);break;}\n          if (model == 63){ CPUIS(haswellavx);break;}\n          if (model == 69){ CPUIS(haswellavx);break;}\n          if (model == 70){ CPUIS(haswellavx);break;}\n          if (model == 71){ CPUIS(broadwell);break;}\n          if (model == 78){ CPUIS(skylakeavx);break;}\n          if (model == 79){ CPUIS(broadwell);break;}\n          if (model == 94){\n              int feat = ((int *)features)[2];\n              if (feat & FEAT_HAS_AVX) { CPUIS(skylakeavx);break; } /* Core i Skylake */\n              else { CPUIS(skylake);break; } /* Celeron/Pentium Skylake without AVX2 */\n          }\n     break;\n   case 15:\n        #if CONFIG_GUESS_64BIT || FAT64\n          __gmpn_cpuid(features,0x80000001);\n          if ( features[8]&1 ){ CPUIS(netburstlahf);break;}\n          CPUIS(netburst);break;\n        #endif\n        #if CONFIG_GUESS_32BIT || FAT32\n\t  if (model <= 6) { CPUIS(pentium4);break;}\n          int feat = ((int *)features)[2];\n          if (feat & 1) { CPUIS(prescott);break;}\n        #endif\n          break;\n\t}\n    }\n  else if (strcmp (vendor_string, \"AuthenticAMD\") == 0)\n    {\n      switch (family)\n\t{\n\t#if CONFIG_GUESS_32BIT || FAT32\n\tcase 5:\n\t  if (model <= 3) { CPUIS(k5);break;}\n\t  if (model <= 7) { CPUIS(k6);break;}\n\t  if (model <= 8) { CPUIS(k62);break;}\n\t  if (model <= 9) { CPUIS(k63);break;}\n\t  break;\n\tcase 6:\n\t  CPUIS(k7);\n\t  break;\n        #endif\n   case 15:\n\t  CPUIS(k8);\n\t  break;\n   case 16:\n\t  if (model == 2) { CPUIS(k10);break; }\n\t  if (model == 4) { CPUIS(k102);break; }\n\t  if (model == 5) { CPUIS(k102);break; }\n\t  if (model == 6) { CPUIS(k102);break; }\n\t  if (model == 8) { CPUIS(k102);break; }\n\t  if (model == 9) { CPUIS(k102);break; }\n\t  if (model == 10) { CPUIS(k102);break; }\n\t  break;\n   case 17:\n     CPUIS(k8);// low power k8 \n     break;\n   case 18:\n     CPUIS(k103);// like k102 but with hardware divider, this is lano\n     break;\n   case 20:\n     CPUIS(bobcat);// fusion of bobcat and GPU\n     break;\n   case 21:\n     if (model == 1) { CPUIS(bulldozer); break; }\n     if (model == 2) { CPUIS(piledriver); break; }\n     if (model == 3) { CPUIS(piledriver); break; }\n     if (model == 16) { CPUIS(piledriver); break; }\n     if (model == 18) { CPUIS(piledriver); break; }\n     if (model == 19) { CPUIS(piledriver); break; }\n     break;  \n   /* \n   case 22:\n     CPUIS(jaguar); ?????\n     break;\n   */\n        }\n    }\n  else if (strcmp (vendor_string, \"CentaurHauls\") == 0)\n    {\n      switch (family)\n\t{\n\tcase 6:\n\t  if (model == 15){CPUIS(nano);break;}\n\t#if CONFIG_GUESS_32BIT || FAT32\n\t  if (model < 9) { CPUIS(viac3);break;}\n\t  CPUIS(viac32);break;\n        #endif\n\t}\n    }\n#if INFAT\n*vector=decided_cpuvec;\n#endif\nreturn modelstr;}\n"
  },
  {
    "path": "cxx/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 2001, 2002, 2003 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -D__GMP_WITHIN_GMPXX -I$(top_srcdir)\n\nif WANT_CXX\nnoinst_LTLIBRARIES = libcxx.la\nendif\n\nlibcxx_la_SOURCES = \\\n  isfuns.cc ismpf.cc ismpq.cc ismpz.cc ismpznw.cc \\\n  osdoprnti.cc osfuns.cc osmpf.cc osmpq.cc osmpz.cc\n"
  },
  {
    "path": "cxx/dummy.cc",
    "content": "/* Dummy file to make automake treat libmpirxx.la as C++.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n/* some compilers reputedly dislike completely empty files */\ntypedef int  foo;\n"
  },
  {
    "path": "cxx/isfuns.cc",
    "content": "/* Auxiliary functions for C++-style input of GMP types.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <cctype>\n#include <iostream>\n#include <string>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nusing namespace std;\n\n\nint\n__gmp_istream_set_base (istream &i, char &c, bool &zero, bool &showbase)\n{\n  int base;\n\n  zero = showbase = false;\n  switch (i.flags() & ios::basefield)\n    {\n    case ios::dec:\n      base = 10;\n      break;\n    case ios::hex:\n      base = 16;\n      break;\n    case ios::oct:\n      base = 8;\n      break;\n    default:\n      showbase = true; // look for initial \"0\" or \"0x\" or \"0X\"\n      if (c == '0')\n\t{\n\t  if (! i.get(c))\n\t    c = 0; // reset or we might loop indefinitely\n\n\t  if (c == 'x' || c == 'X')\n\t    {\n\t      base = 16;\n\t      i.get(c);\n\t    }\n\t  else\n\t    {\n\t      base = 8;\n\t      zero = true; // if no other digit is read, the \"0\" counts\n\t    }\n\t}\n      else\n\tbase = 10;\n      break;\n    }\n\n  return base;\n}\n\nvoid\n__gmp_istream_set_digits (string &s, istream &i, char &c, bool &ok, int base)\n{\n  switch (base)\n    {\n    case 10:\n      while (isdigit(c))\n\t{\n\t  ok = true; // at least a valid digit was read\n\t  s += c;\n\t  if (! i.get(c))\n\t    break;\n\t}\n      break;\n    case 8:\n      while (isdigit(c) && c != '8' && c != '9')\n\t{\n\t  ok = true; // at least a valid digit was read\n\t  s += c;\n\t  if (! i.get(c))\n\t    break;\n\t}\n      break;\n    case 16:\n      while (isxdigit(c))\n\t{\n\t  ok = true; // at least a valid digit was read\n\t  s += c;\n\t  if (! i.get(c))\n\t    break;\n\t}\n      break;\n    }\n}\n"
  },
  {
    "path": "cxx/ismpf.cc",
    "content": "/* operator>> -- C++-style input of mpf_t.\n\nCopyright 2001, 2003 Free Software Foundation, Inc.\n\nCopyright 2009 William Hart\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <cctype>\n#include <iostream>\n#include <string>\n#include <clocale>    // for localeconv\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nusing namespace std;\n\n\n// For g++ libstdc++ parsing see num_get<chartype,initer>::_M_extract_float\n// in include/bits/locale_facets.tcc.\n//\n// There are no plans to accept hex or octal floats, not unless the standard\n// C++ library does so.  Although such formats might be of use, it's\n// considered more important to be compatible with what the normal\n// operator>> does on \"double\"s etc.\n\nistream &\noperator>> (istream &i, mpf_ptr f)\n{\n  int base;\n  char c = 0;\n  string s;\n  bool ok = false;\n\n  // C decimal point, as expected by mpf_set_str\n  const char *lconv_point = localeconv()->decimal_point;\n\n  // C++ decimal point\n#if HAVE_STD__LOCALE && !defined (__sun) && !(defined(__APPLE_CC__) && (__APPLE_CC__ > 1))\n  const locale& loc = i.getloc();\n  char point_char = use_facet< numpunct<char> >(loc).decimal_point();\n#else\n  const char *point = lconv_point;\n  char point_char = *point;\n#endif\n\n  i.get(c); // start reading\n\n  if (i.flags() & ios::skipws) // skip initial whitespace\n    {\n      // C++ isspace\n#if HAVE_STD__LOCALE && !defined(__sun) && !(defined(__APPLE_CC__) && (__APPLE_CC__ > 1))\n      const ctype<char>& ct = use_facet< ctype<char> >(loc);\n#define cxx_isspace(c)  (ct.is(ctype_base::space,(c)))\n#else\n#define cxx_isspace(c)  isspace(c)\n#endif\n\n      while (cxx_isspace(c) && i.get(c))\n        ;\n    }\n\n  if (c == '-' || c == '+') // sign\n    {\n      if (c == '-')\n\ts = \"-\";\n      i.get(c);\n    }\n\n  base = 10;\n  __gmp_istream_set_digits(s, i, c, ok, base); // read the number\n\n  // look for the C++ radix point, but put the C one in for mpf_set_str\n  if (c == point_char)\n    {\n#if HAVE_STD__LOCALE\n      i.get(c);\n#else // lconv point can be multi-char\n      for (;;)\n        {\n          i.get(c);\n          point++;\n          if (*point == '\\0')\n            break;\n          if (c != *point)\n            goto fail;\n        }\n#endif\n      s += lconv_point;\n      __gmp_istream_set_digits(s, i, c, ok, base); // read the mantissa\n    }\n\n  if (ok && (c == 'e' || c == 'E')) // exponent\n    {\n      s += c;\n      i.get(c);\n      ok = false; // exponent is mandatory\n\n      if (c == '-' || c == '+') // sign\n\t{\n\t  s += c;\n\t  i.get(c);\n\t}\n\n      __gmp_istream_set_digits(s, i, c, ok, base); // read the exponent\n    }\n\n  if (i.good()) // last character read was non-numeric\n    i.putback(c);\n  else if (i.eof() && ok) // stopped just before eof\n    i.clear();\n\n  if (ok)\n    ASSERT_NOCARRY (mpf_set_str(f, s.c_str(), base)); // extract the number\n  else\n    {\n#if !HAVE_STD__LOCALE\n    fail:\n#endif\n      i.setstate(ios::failbit); // read failed\n    }\n\n  return i;\n}\n"
  },
  {
    "path": "cxx/ismpq.cc",
    "content": "/* operator>> -- C++-style input of mpq_t.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <cctype>\n#include <iostream>\n#include <string>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nusing namespace std;\n\n\nistream &\noperator>> (istream &i, mpq_ptr q)\n{\n  if (! (i >> mpq_numref(q)))\n    return i;\n\n  char  c = 0;\n  i.get(c); // start reading\n\n  if (c == '/')\n    {\n      // skip slash, read denominator\n      i.get(c);\n      return __gmpz_operator_in_nowhite (i, mpq_denref(q), c);\n    }\n  else\n    {\n      // no denominator, set 1\n      q->_mp_den._mp_size = 1;\n      q->_mp_den._mp_d[0] = 1;\n      if (i.good())\n        i.putback(c);\n      else if (i.eof())\n        i.clear();\n    }\n\n  return i;\n}\n"
  },
  {
    "path": "cxx/ismpz.cc",
    "content": "/* operator>> -- C++-style input of mpz_t.\n\nCopyright 2001, 2003 Free Software Foundation, Inc.\n\nCopyright 2009 William Hart\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <cctype>\n#include <iostream>\n#include <string>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nusing namespace std;\n\n\n// For g++ libstdc++ parsing see num_get<chartype,initer>::_M_extract_int in\n// include/bits/locale_facets.tcc.\n\nistream &\noperator>> (istream &i, mpz_ptr z)\n{\n  char c = 0;\n  i.get(c); // start reading\n\n  if (i.flags() & ios::skipws) // skip initial whitespace\n    {\n#if HAVE_STD__LOCALE && !defined (__sun) && !(defined(__APPLE_CC__) && (__APPLE_CC__ > 1))\n      const ctype<char>& ct = use_facet< ctype<char> >(i.getloc());\n#define cxx_isspace(c)  (ct.is(ctype_base::space,(c)))\n#else\n#define cxx_isspace(c)  isspace(c)\n#endif\n\n      while (cxx_isspace(c) && i.get(c))\n        ;\n    }\n\n  return __gmpz_operator_in_nowhite (i, z, c);\n}\n"
  },
  {
    "path": "cxx/ismpznw.cc",
    "content": "/* __gmpz_operator_in_nowhite -- C++-style input of mpz_t, no whitespace skip.\n\nCopyright 2001, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <cctype>\n#include <iostream>\n#include <string>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nusing namespace std;\n\n\n// For g++ libstdc++ parsing see num_get<chartype,initer>::_M_extract_int in\n// include/bits/locale_facets.tcc.\n\nistream &\n__gmpz_operator_in_nowhite (istream &i, mpz_ptr z, char c)\n{\n  int base;\n  string s;\n  bool ok = false, zero, showbase;\n\n  if (c == '-' || c == '+') // sign\n    {\n      if (c == '-') // mpz_set_str doesn't accept '+'\n\ts = \"-\";\n      i.get(c);\n    }\n\n  base = __gmp_istream_set_base(i, c, zero, showbase); // select the base\n  __gmp_istream_set_digits(s, i, c, ok, base);         // read the number\n\n  if (i.good()) // last character read was non-numeric\n    i.putback(c);\n  else if (i.eof() && (ok || zero)) // stopped just before eof\n    i.clear();\n\n  if (ok)\n    ASSERT_NOCARRY (mpz_set_str (z, s.c_str(), base)); // extract the number\n  else if (zero)\n    mpz_set_ui(z, 0);\n  else\n    i.setstate(ios::failbit); // read failed\n\n  return i;\n}\n"
  },
  {
    "path": "cxx/osdoprnti.cc",
    "content": "/* __gmp_doprnt_integer_ios -- integer formatted output to an ostream.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <iostream>\n#include <cstdarg>    /* for va_list and hence doprnt_funs_t */\n#include <cstring>    /* for strlen */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nusing namespace std;\n\n\n/* The gmp_asprintf support routines never give an error, so\n   __gmp_doprnt_integer shouldn't fail and it's return can just be checked\n   with an ASSERT.  */\n\nostream&\n__gmp_doprnt_integer_ostream (ostream &o, struct doprnt_params_t *p,\n                              char *s)\n{\n  struct gmp_asprintf_t   d;\n  char  *result;\n  int   ret;\n\n  /* don't show leading zeros the way printf does */\n  p->prec = -1;\n\n  GMP_ASPRINTF_T_INIT (d, &result);\n  ret = __gmp_doprnt_integer (&__gmp_asprintf_funs_noformat, &d, p, s);\n  ASSERT (ret != -1);\n  __gmp_asprintf_final (&d);\n  (*__gmp_free_func) (s, strlen(s)+1);\n\n  gmp_allocated_string  t (result);\n  return o.write (t.str, t.len);\n}\n"
  },
  {
    "path": "cxx/osfuns.cc",
    "content": "/* Support for operator<< routines.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <iostream>\n#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nusing namespace std;\n\n\n/* Don't need \"format\" for operator<< routines, just \"memory\" and \"reps\".\n   Omitting gmp_asprintf_format lets us avoid dragging vsnprintf into the\n   link.  __gmp_asprintf_final will be called directly and doesn't need to\n   be in the struct.  */\n\nconst struct doprnt_funs_t  __gmp_asprintf_funs_noformat = {\n  NULL,\n  (doprnt_memory_t) __gmp_asprintf_memory,\n  (doprnt_reps_t)   __gmp_asprintf_reps,\n  NULL\n};\n\n\nvoid\n__gmp_doprnt_params_from_ios (struct doprnt_params_t *p, ios &o)\n{\n  if ((o.flags() & ios::basefield) == ios::hex)\n    {\n      p->expfmt = \"@%c%02d\";\n      p->base = (o.flags() & ios::uppercase ? -16 : 16);\n    }\n  else\n    {\n      p->expfmt = (o.flags() & ios::uppercase ? \"E%c%02d\" : \"e%c%02d\");\n      if ((o.flags() & ios::basefield) == ios::oct)\n        p->base = 8;\n      else\n        p->base = 10;\n    }\n\n  /* \"general\" if none or more than one bit set */\n  if ((o.flags() & ios::floatfield) == ios::fixed)\n    p->conv = DOPRNT_CONV_FIXED;\n  else if ((o.flags() & ios::floatfield) == ios::scientific)\n    p->conv = DOPRNT_CONV_SCIENTIFIC;\n  else\n    p->conv = DOPRNT_CONV_GENERAL;\n\n  p->exptimes4 = 0;\n\n  p->fill = o.fill();\n\n  /* \"right\" if more than one bit set */\n  if ((o.flags() & ios::adjustfield) == ios::left)\n    p->justify = DOPRNT_JUSTIFY_LEFT;\n  else if ((o.flags() & ios::adjustfield) == ios::internal)\n    p->justify = DOPRNT_JUSTIFY_INTERNAL;\n  else\n    p->justify = DOPRNT_JUSTIFY_RIGHT;\n\n  /* ios::fixed allows prec==0, others take 0 as the default 6.\n     Don't allow negatives (they do bad things to __gmp_doprnt_float_cxx).  */\n  p->prec = MAX (0, o.precision());\n  if (p->prec == 0 && p->conv != DOPRNT_CONV_FIXED)\n    p->prec = 6;\n\n  /* for hex showbase is always, for octal only non-zero */\n  if (o.flags() & ios::showbase)\n    p->showbase = ((o.flags() & ios::basefield) == ios::hex\n                   ? DOPRNT_SHOWBASE_YES : DOPRNT_SHOWBASE_NONZERO);\n  else\n    p->showbase = DOPRNT_SHOWBASE_NO;\n\n  p->showpoint = ((o.flags() & ios::showpoint) != 0);\n\n  /* in fixed and scientific always show trailing zeros, in general format\n     show them if showpoint is set (or so it seems) */\n  if ((o.flags() & ios::floatfield) == ios::fixed\n      || (o.flags() & ios::floatfield) == ios::scientific)\n    p->showtrailing = 1;\n  else\n    p->showtrailing = p->showpoint;\n\n  p->sign = (o.flags() & ios::showpos ? '+' : '\\0');\n\n  p->width = o.width();\n\n  /* reset on each output */\n  o.width (0);\n}\n"
  },
  {
    "path": "cxx/osmpf.cc",
    "content": "/* operator<< -- mpf formatted output to an ostream.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nCopyright 2009 William Hart\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <clocale>\n#include <iostream>\n#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nusing namespace std;\n\n\n/* The gmp_asprintf support routines never give an error, so\n   __gmp_doprnt_mpf shouldn't fail and it's return can just be checked with\n   an ASSERT.  */\n\nostream&\noperator<< (ostream &o, mpf_srcptr f)\n{\n  struct doprnt_params_t  param;\n  struct gmp_asprintf_t   d;\n  char  *result;\n  int   ret;\n\n  __gmp_doprnt_params_from_ios (&param, o);\n\n#if HAVE_STD__LOCALE && !defined (__sun) && !(defined(__APPLE_CC__) && (__APPLE_CC__ > 1))\n  char  point[2];\n  point[0] = use_facet< numpunct<char> >(o.getloc()).decimal_point();\n  point[1] = '\\0';\n#else\n  const char *point = localeconv()->decimal_point;\n#endif\n\n  GMP_ASPRINTF_T_INIT (d, &result);\n  ret = __gmp_doprnt_mpf (&__gmp_asprintf_funs_noformat, &d, &param, point, f);\n  ASSERT (ret != -1);\n  __gmp_asprintf_final (&d);\n\n  gmp_allocated_string  t (result);\n  return o.write (t.str, t.len);\n}\n"
  },
  {
    "path": "cxx/osmpq.cc",
    "content": "/* operator<< -- mpq formatted output to an ostream.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <iostream>\n#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nusing namespace std;\n\n\nostream&\noperator<< (ostream &o, mpq_srcptr q)\n{\n  struct doprnt_params_t  param;\n  __gmp_doprnt_params_from_ios (&param, o);\n  return __gmp_doprnt_integer_ostream (o, &param,\n                                       mpq_get_str (NULL, param.base, q));\n}\n"
  },
  {
    "path": "cxx/osmpz.cc",
    "content": "/* operator<< -- mpz formatted output to an ostream.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <iostream>\n#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nusing namespace std;\n\n\nostream&\noperator<< (ostream &o, mpz_srcptr z)\n{\n  struct doprnt_params_t  param;\n  __gmp_doprnt_params_from_ios (&param, o);\n  return __gmp_doprnt_integer_ostream (o, &param,\n                                       mpz_get_str (NULL, param.base, z));\n}\n"
  },
  {
    "path": "devel/configfsf.sub.diff",
    "content": "\n\nWe can use the standard fsf config.sub , but we need change this (~line 1200)\n\n\n\tpmac | pmac-mpw)\n\t\tbasic_machine=powerpc-apple\n\t\t;;\n\t*-unknown)\n\t\t# Make sure to match an already-canonicalized machine name.\n\t\t;;\n\t*)\n\t\techo Invalid configuration \\`$1\\': machine \\`$basic_machine\\' not recognized 1>&2\n\t\texit 1\n\n\n\tpmac | pmac-mpw)\n\t\tbasic_machine=powerpc-apple\n\t\t;;\n\t*-unknown | *-pc | *-apple | *-w64)\n\t\t# Make sure to match an already-canonicalized machine name.\n\t\t;;\n\t*)\n\t\techo Invalid configuration \\`$1\\': machine \\`$basic_machine\\' not recognized 1>&2\n\t\texit 1\n\n\nso that we recognize more default already-canonicalized machine names , because\nwe have so many more cpu names\n\n"
  },
  {
    "path": "devel/gen-bases.c",
    "content": "/* Generate mp_bases data.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2002, 2004 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <math.h>\n#include <stdio.h>\n#include \"mpir.h\"\n\n\nint    chars_per_limb;\ndouble chars_per_bit_exactly;\nmpz_t  big_base;\nint    normalization_steps;\nmpz_t  big_base_inverted;\n\nmpz_t  t;\n\n#define POW2_P(n)  (((n) & ((n) - 1)) == 0)\n\nunsigned int\nulog2 (unsigned int x)\n{\n  unsigned int i;\n  for (i = 0;  x != 0;  i++)\n    x >>= 1;\n  return i;\n}\n\nvoid\ngenerate (int limb_bits, int nail_bits, int base)\n{\n  int  numb_bits = limb_bits - nail_bits;\n\n  mpz_set_ui (t, 1L);\n  mpz_mul_2exp (t, t, numb_bits);\n  mpz_set_ui (big_base, 1L);\n  chars_per_limb = 0;\n  for (;;)\n    {\n      mpz_mul_ui (big_base, big_base, (long) base);\n      if (mpz_cmp (big_base, t) > 0)\n        break;\n      chars_per_limb++;\n    }\n\n  chars_per_bit_exactly = 0.69314718055994530942 / log ((double) base);\n\n  mpz_ui_pow_ui (big_base, (long) base, (long) chars_per_limb);\n\n  normalization_steps = limb_bits - mpz_sizeinbase (big_base, 2);\n\n  mpz_set_ui (t, 1L);\n  mpz_mul_2exp (t, t, 2*limb_bits - normalization_steps);\n  mpz_tdiv_q (big_base_inverted, t, big_base);\n  mpz_set_ui (t, 1L);\n  mpz_mul_2exp (t, t, limb_bits);\n  mpz_sub (big_base_inverted, big_base_inverted, t);\n}\n\nvoid\nheader (int limb_bits, int nail_bits)\n{\n  int  numb_bits = limb_bits - nail_bits;\n\n  generate (limb_bits, nail_bits, 10);\n\n  printf (\"/* This file generated by gen-bases.c - DO NOT EDIT. */\\n\");\n  printf (\"\\n\");\n  printf (\"#if GMP_NUMB_BITS != %d\\n\", numb_bits);\n  printf (\"Error, error, this data is for %d bits\\n\", numb_bits);\n  printf (\"#endif\\n\");\n  printf (\"\\n\");\n  printf (\"/* mp_bases[10] data, as literal values */\\n\");\n  printf (\"#define MP_BASES_CHARS_PER_LIMB_10      %d\\n\", chars_per_limb);\n  printf (\"#define MP_BASES_BIG_BASE_10            CNST_LIMB(0x\");\n  mpz_out_str (stdout, 16, big_base);\n  printf (\")\\n\");\n  printf (\"#define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0x\");\n  mpz_out_str (stdout, 16, big_base_inverted);\n  printf (\")\\n\");\n  printf (\"#define MP_BASES_NORMALIZATION_STEPS_10 %d\\n\", normalization_steps);\n}\n\nvoid\ntable (int limb_bits, int nail_bits)\n{\n  int  numb_bits = limb_bits - nail_bits;\n  int  base;\n\n  printf (\"/* This file generated by gen-bases.c - DO NOT EDIT. */\\n\");\n  printf (\"\\n\");\n  printf (\"#include \\\"mpir.h\\\"\\n\");\n  printf (\"#include \\\"gmp-impl.h\\\"\\n\");\n  printf (\"\\n\");\n  printf (\"#if GMP_NUMB_BITS != %d\\n\", numb_bits);\n  printf (\"Error, error, this data is for %d bits\\n\", numb_bits);\n  printf (\"#endif\\n\");\n  printf (\"\\n\");\n  puts (\"const struct bases mp_bases[257] =\\n{\");\n  puts (\"  /*   0 */ { 0, 0.0, 0 },\");\n  puts (\"  /*   1 */ { 0, 1e37, 0 },\");\n  for (base = 2; base <= 256; base++)\n    {\n      generate (limb_bits, nail_bits, base);\n\n      printf (\"  /* %3u */ { \", base);\n      if (POW2_P (base))\n\t{\n          printf (\"%u, %.16f, 0x%x },\\n\",\n                  chars_per_limb, chars_per_bit_exactly, ulog2 (base) - 1);\n\t}\n      else\n\t{\n          printf (\"%u, %.16f, CNST_LIMB(0x\",\n                  chars_per_limb, chars_per_bit_exactly);\n\t  mpz_out_str (stdout, 16, big_base);\n          printf (\"), CNST_LIMB(0x\");\n\t  mpz_out_str (stdout, 16, big_base_inverted);\n          printf (\") },\\n\");\n\t}\n    }\n\n  puts (\"};\");\n}\n\nint\nmain (int argc, char **argv)\n{\n  int  limb_bits, nail_bits;\n\n  mpz_init (big_base);\n  mpz_init (big_base_inverted);\n  mpz_init (t);\n\n  if (argc != 4)\n    {\n      fprintf (stderr, \"Usage: gen-bases <header|table> <limbbits> <nailbits>\\n\");\n      exit (1);\n    }\n\n  limb_bits = atoi (argv[2]);\n  nail_bits = atoi (argv[3]);\n\n  if (limb_bits <= 0\n      || nail_bits < 0\n      || nail_bits >= limb_bits)\n    {\n      fprintf (stderr, \"Invalid limb/nail bits: %d %d\\n\",\n               limb_bits, nail_bits);\n      exit (1);\n    }\n\n  if (strcmp (argv[1], \"header\") == 0)\n    header (limb_bits, nail_bits);\n  else if (strcmp (argv[1], \"table\") == 0)\n    table (limb_bits, nail_bits);\n  else\n    {\n      fprintf (stderr, \"Invalid header/table choice: %s\\n\", argv[1]);\n      exit (1);\n    }\n\n  return 0;\n}\n\n"
  },
  {
    "path": "devel/gen-fac_ui.c",
    "content": "/* Generate mpz_fac_ui data.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n\n\n/* sets x=y*(y+2)*(y+4)*....*(y+2*(z-1))\t*/\nvoid\nodd_products (mpz_t x, mpz_t y, int z)\n{\n  mpz_t t;\n\n  mpz_init_set (t, y);\n  mpz_set_ui (x, 1);\n  for (; z != 0; z--)\n    {\n      mpz_mul (x, x, t);\n      mpz_add_ui (t, t, 2);\n    }\n  mpz_clear (t);\n  return;\n}\n\n/* returns 0 on success\t\t*/\nint\ngen_consts (int numb, int nail, int limb)\n{\n  mpz_t x, y, z, t;\n  unsigned long a, b, first = 1;\n\n  printf (\"/* This file is automatically generated by gen-fac_ui.c */\\n\\n\");\n  printf (\"#if GMP_NUMB_BITS != %d\\n\", numb);\n  printf (\"Error , error this data is for %d GMP_NUMB_BITS only\\n\", numb);\n  printf (\"#endif\\n\");\n  printf (\"#if GMP_LIMB_BITS != %d\\n\", limb);\n  printf (\"Error , error this data is for %d GMP_LIMB_BITS only\\n\", limb);\n  printf (\"#endif\\n\");\n\n  printf\n    (\"/* This table is 0!,1!,2!,3!,...,n! where n! has <= GMP_NUMB_BITS bits */\\n\");\n  printf\n    (\"#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x2),\");\n  mpz_init_set_ui (x, 2);\n  for (b = 3;; b++)\n    {\n      mpz_mul_ui (x, x, b);\t/* so b!=a       */\n      if (mpz_sizeinbase (x, 2) > numb)\n\tbreak;\n      if (first)\n\t{\n\t  first = 0;\n\t}\n      else\n\t{\n\t  printf (\"),\");\n\t}\n      printf (\"CNST_LIMB(0x\");\n      mpz_out_str (stdout, 16, x);\n    }\n  printf (\")\\n\");\n\n\n  mpz_set_ui (x, 1);\n  mpz_mul_2exp (x, x, limb + 1);\t/* x=2^(limb+1)        */\n  mpz_init (y);\n  mpz_set_ui (y, 10000);\n  mpz_mul (x, x, y);\t\t/* x=2^(limb+1)*10^4     */\n  mpz_set_ui (y, 27182);\t/* exp(1)*10^4      */\n  mpz_tdiv_q (x, x, y);\t\t/* x=2^(limb+1)/exp(1)        */\n  printf (\"\\n/* is 2^(GMP_LIMB_BITS+1)/exp(1) */\\n\");\n  printf (\"#define FAC2OVERE CNST_LIMB(0x\");\n  mpz_out_str (stdout, 16, x);\n  printf (\")\\n\");\n\n\n  printf\n    (\"\\n/* FACMULn is largest odd x such that x*(x+2)*...*(x+2(n-1))<=2^GMP_NUMB_BITS-1 */\\n\\n\");\n  mpz_init (z);\n  mpz_init (t);\n  for (a = 2; a <= 4; a++)\n    {\n      mpz_set_ui (x, 1);\n      mpz_mul_2exp (x, x, numb);\n      mpz_root (x, x, a);\n      /* so x is approx sol       */\n      if (mpz_even_p (x))\n\tmpz_sub_ui (x, x, 1);\n      mpz_set_ui (y, 1);\n      mpz_mul_2exp (y, y, numb);\n      mpz_sub_ui (y, y, 1);\n      /* decrement x until we are <= real sol     */\n      do\n\t{\n\t  mpz_sub_ui (x, x, 2);\n\t  odd_products (t, x, a);\n\t  if (mpz_cmp (t, y) <= 0)\n\t    break;\n\t}\n      while (1);\n      /* increment x until > real sol     */\n      do\n\t{\n\t  mpz_add_ui (x, x, 2);\n\t  odd_products (t, x, a);\n\t  if (mpz_cmp (t, y) > 0)\n\t    break;\n\t}\n      while (1);\n      /* dec once to get real sol */\n      mpz_sub_ui (x, x, 2);\n      printf (\"#define FACMUL%lu CNST_LIMB(0x\", a);\n      mpz_out_str (stdout, 16, x);\n      printf (\")\\n\");\n    }\n\n  return 0;\n}\n\nint\nmain (int argc, char *argv[])\n{\n  int nail_bits, limb_bits, numb_bits;\n\n  if (argc != 3)\n    {\n      fprintf (stderr, \"Usage: gen-fac_ui limbbits nailbits\\n\");\n      exit (1);\n    }\n  limb_bits = atoi (argv[1]);\n  nail_bits = atoi (argv[2]);\n  numb_bits = limb_bits - nail_bits;\n  if (limb_bits < 0 || nail_bits < 0 || numb_bits < 0)\n    {\n      fprintf (stderr, \"Invalid limb/nail bits %d,%d\\n\", limb_bits,\n\t       nail_bits);\n      exit (1);\n    }\n  gen_consts (numb_bits, nail_bits, limb_bits);\n  return 0;\n}\n"
  },
  {
    "path": "devel/gen-fib.c",
    "content": "/* Generate Fibonacci table data.\n\nCopyright 2001, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n\nmpz_t  *f;\nint    fnum, fib_limit, luc_limit;\n\nvoid\ngenerate (int numb_bits)\n{\n  mpz_t  limit, l;\n  int    falloc, i;\n\n  mpz_init_set_ui (limit, 1L);\n  mpz_mul_2exp (limit, limit, numb_bits);\n\n  /* fib(2n) > 2^n, so use 2n as a limit for the table size */\n  falloc = 2 * numb_bits;\n  f = (mpz_t *) malloc (falloc * sizeof (*f));\n\n  mpz_init_set_ui (f[0], 1L);  /* F[-1] */\n  mpz_init_set_ui (f[1], 0L);  /* F[0] */\n\n  mpz_init (l);\n\n  for (i = 2; ; i++)\n    {\n      //ASSERT (i < falloc);\n\n      /* F[i] = F[i-1] + F[i-2] */\n      mpz_init (f[i]);\n      mpz_add (f[i], f[i-1], f[i-2]);\n      if (mpz_cmp (f[i], limit) >= 0)\n        break;\n\n      fnum = i+1;\n      fib_limit = i-1;\n\n      /* L[i] = F[i]+2*F[i-1] */\n      mpz_add (l, f[i], f[i-1]);\n      mpz_add (l, l, f[i-1]);\n\n      if (mpz_cmp (l, limit) < 0)\n        luc_limit = i-1;\n    }\n\n  mpz_clear (limit);\n}\n\n\nvoid\nheader (int numb_bits)\n{\n  printf (\"/* This file generated by gen-fib.c - DO NOT EDIT. */\\n\");\n  printf (\"\\n\");\n  printf (\"#if GMP_NUMB_BITS != %d\\n\", numb_bits);\n  printf (\"Error, error, this data is for %d bits\\n\", numb_bits);\n  printf (\"#endif\\n\");\n  printf (\"\\n\");\n  printf (\"#define FIB_TABLE_LIMIT         %d\\n\", fib_limit);\n  printf (\"#define FIB_TABLE_LUCNUM_LIMIT  %d\\n\", luc_limit);\n}\n\nvoid\ntable (int numb_bits)\n{\n  int  i;\n\n  printf (\"/* This file generated by gen-fib.c - DO NOT EDIT. */\\n\");\n  printf (\"\\n\");\n  printf (\"#include \\\"mpir.h\\\"\\n\");\n  printf (\"#include \\\"gmp-impl.h\\\"\\n\");\n  printf (\"\\n\");\n  printf (\"#if GMP_NUMB_BITS != %d\\n\", numb_bits);\n  printf (\"Error, error, this data is for %d bits\\n\", numb_bits);\n  printf (\"#endif\\n\");\n  printf (\"\\n\");\n  printf (\"const mp_limb_t\\n\");\n  printf (\"__gmp_fib_table[FIB_TABLE_LIMIT+2] = {\\n\");\n\n  for (i = 0; i < fnum; i++)\n    {\n      printf (\"  CNST_LIMB (0x\");\n      mpz_out_str (stdout, 16, f[i]);\n      printf (\"),  /* %d */\\n\", i-1);\n    }\n  printf (\"};\\n\");\n}\n\nint\nmain (int argc, char *argv[])\n{\n  int  limb_bits, nail_bits, numb_bits;\n\n  if (argc != 4)\n    {\n      fprintf (stderr, \"Usage: gen-bases <header|table> <limbbits> <nailbits>\\n\");\n      exit (1);\n    }\n\n  limb_bits = atoi (argv[2]);\n  nail_bits = atoi (argv[3]);\n\n  if (limb_bits <= 0\n      || nail_bits < 0\n      || nail_bits >= limb_bits)\n    {\n      fprintf (stderr, \"Invalid limb/nail bits: %d %d\\n\",\n               limb_bits, nail_bits);\n      exit (1);\n    }\n  numb_bits = limb_bits - nail_bits;\n\n  generate (numb_bits);\n\n  if (strcmp (argv[1], \"header\") == 0)\n    header (numb_bits);\n  else if (strcmp (argv[1], \"table\") == 0)\n    table (numb_bits);\n  else\n    {\n      fprintf (stderr, \"Invalid header/table choice: %s\\n\", argv[1]);\n      exit (1);\n    }\n\n  return 0;\n}\n"
  },
  {
    "path": "devel/gen-psqr.c",
    "content": "/* Generate perfect square testing data.\n\nCopyright 2002, 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n\n\n/* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1\n   (plus a PERFSQR_PP modulus), and generate tables indicating quadratic\n   residues and non-residues modulo small factors of that modulus.\n\n   For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used.  That\n   function exists specifically because 2^24-1 and 2^48-1 have nice sets of\n   prime factors.  For other limb sizes it's considered, but if it doesn't\n   have good factors then mpn_mod_1 will be used instead.\n\n   When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a\n   selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,\n   with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=\n   GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its\n   calculation within a single limb.\n\n   In either case primes can be combined to make divisors.  The table data\n   then effectively indicates remainders which are quadratic residues mod\n   all the primes.  This sort of combining reduces the number of steps\n   needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.\n   Nothing is gained or lost in terms of detections, the same total fraction\n   of non-residues will be identified.\n\n   Nothing particularly sophisticated is attempted for combining factors to\n   make divisors.  This is probably a kind of knapsack problem so it'd be\n   too hard to attempt anything completely general.  For the usual 32 and 64\n   bit limbs we get a good enough result just pairing the biggest and\n   smallest which fit together, repeatedly.\n\n   Another aim is to get powerful combinations, ie. divisors which identify\n   biggest fraction of non-residues, and have those run first.  Again for\n   the usual 32 and 64 bits it seems good enough just to pair for big\n   divisors then sort according to the resulting fraction of non-residues\n   identified.\n\n   Also in this program, a table sq_res_0x100 of residues modulo 256 is\n   generated.  This simply fills bits into limbs of the appropriate\n   build-time GMP_LIMB_BITS each.\n\n*/\n\n\n/* Normally we aren't using const in gen*.c programs, so as not to have to\n   bother figuring out if it works, but using it with f_cmp_divisor and\n   f_cmp_fraction avoids warnings from the qsort calls. */\n\n/* Same tests as mpir.h. */\n#if  defined (__STDC__)                                 \\\n  || defined (__cplusplus)                              \\\n  || defined (_AIX)                                     \\\n  || defined (__DECC)                                   \\\n  || (defined (__mips) && defined (_SYSTYPE_SVR4))      \\\n  || defined (_MSC_VER)                                 \\\n  || defined (_WIN32)\n#define HAVE_CONST        1\n#endif\n\n#if ! HAVE_CONST\n#define const\n#endif\n\n#define MIN(l,o) ((l) < (o) ? (l) : (o))\n\nmpz_t  *sq_res_0x100;          /* table of limbs */\nint    nsq_res_0x100;          /* elements in sq_res_0x100 array */\nint    sq_res_0x100_num;       /* squares in sq_res_0x100 */\ndouble sq_res_0x100_fraction;  /* sq_res_0x100_num / 256 */\n\nint     mod34_bits;        /* 3*GMP_NUMB_BITS/4 */\nint     mod_bits;          /* bits from PERFSQR_MOD_34 or MOD_PP */\nint     max_divisor;       /* all divisors <= max_divisor */\nint     max_divisor_bits;  /* ceil(log2(max_divisor)) */\ndouble  total_fraction;    /* of squares */\nmpz_t   pp;                /* product of primes, or 0 if mod_34lsub1 used */\nmpz_t   pp_norm;           /* pp shifted so NUMB high bit set */\nmpz_t   pp_inverted;       /* invert_limb style inverse */\nmpz_t   mod_mask;          /* 2^mod_bits-1 */\nchar    mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */\n\n/* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */\nstruct rawfactor_t {\n  int     divisor;\n  int     multiplicity;\n};\nstruct rawfactor_t  *rawfactor;\nint                 nrawfactor;\n\n/* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */\nstruct factor_t {\n  int     divisor;\n  mpz_t   inverse;   /* 1/divisor mod 2^mod_bits */\n  mpz_t   mask;      /* indicating squares mod divisor */\n  double  fraction;  /* squares/total */\n};\nstruct factor_t  *factor;\nint              nfactor;       /* entries in use in factor array */\nint              factor_alloc;  /* entries allocated to factor array */\n\nint\nlog2_ceil (int n)\n{\n  int  e;\n  //ASSERT (n >= 1);\n  for (e = 0; ; e++)\n    if ((1 << e) >= n)\n      break;\n  return e;\n}\n\nint\nisprime (int n)\n{\n  int  i;\n  if (n < 2)\n    return 0;\n  for (i = 2; i < n; i++)\n    if ((n % i) == 0)\n      return 0;\n  return 1;\n}\n\n/* Set inv to the inverse of d, in the style of invert_limb, ie. for\n   udiv_qrnnd_preinv.  */\nvoid\nmpz_preinv_invert (mpz_t inv, mpz_t d, int numb_bits)\n{\n  mpz_t  t;\n  int    norm;\n  //ASSERT (SIZ(d) > 0);\n\n  norm = numb_bits - mpz_sizeinbase (d, 2);\n  //ASSERT (norm >= 0);\n  mpz_init_set_ui (t, 1L);\n  mpz_mul_2exp (t, t, 2*numb_bits - norm);\n  mpz_tdiv_q (inv, t, d);\n  mpz_set_ui (t, 1L);\n  mpz_mul_2exp (t, t, numb_bits);\n  mpz_sub (inv, inv, t);\n\n  mpz_clear (t);\n}\n\nvoid\nmem_copyi (char *dst, char *src, int size)\n{\n  int  i;\n  for (i = 0; i < size; i++)\n    dst[i] = src[i];\n}\n\n/* Calculate r satisfying r*d == 1 mod 2^n. */\nvoid\nmpz_invert_2exp (mpz_t r, mpz_t a, unsigned long n)\n{\n  unsigned long  i;\n  mpz_t  inv, prod;\n\n  //ASSERT (mpz_odd_p (a));\n\n  mpz_init_set_ui (inv, 1L);\n  mpz_init (prod);\n\n  for (i = 1; i < n; i++)\n    {\n      mpz_mul (prod, inv, a);\n      if (mpz_tstbit (prod, i) != 0)\n        mpz_setbit (inv, i);\n    }\n\n  mpz_mul (prod, inv, a);\n  mpz_tdiv_r_2exp (prod, prod, n);\n  //ASSERT (mpz_cmp_ui (prod, 1L) == 0);\n\n  mpz_set (r, inv);\n\n  mpz_clear (inv);\n  mpz_clear (prod);\n}\n\n/* Calculate inv satisfying r*a == 1 mod 2^n. */\nvoid\nmpz_invert_ui_2exp (mpz_t r, unsigned long a, unsigned long n)\n{\n  mpz_t  az;\n  mpz_init_set_ui (az, a);\n  mpz_invert_2exp (r, az, n);\n  mpz_clear (az);\n}\n\nint\nf_cmp_divisor (const void *parg, const void *qarg)\n{\n  const struct factor_t *p, *q;\n  p = parg;\n  q = qarg;\n  if (p->divisor > q->divisor)\n    return 1;\n  else if (p->divisor < q->divisor)\n    return -1;\n  else\n    return 0;\n}\n\nint\nf_cmp_fraction (const void *parg, const void *qarg)\n{\n  const struct factor_t *p, *q;\n  p = parg;\n  q = qarg;\n  if (p->fraction > q->fraction)\n    return 1;\n  else if (p->fraction < q->fraction)\n    return -1;\n  else\n    return 0;\n}\n\n/* Remove array[idx] by copying the remainder down, and adjust narray\n   accordingly.  */\n#define COLLAPSE_ELEMENT(array, idx, narray)                    \\\n  do {                                                          \\\n    mem_copyi ((char *) &(array)[idx],                          \\\n               (char *) &(array)[idx+1],                        \\\n               ((narray)-((idx)+1)) * sizeof (array[0]));       \\\n    (narray)--;                                                 \\\n  } while (0)\n\n\n/* return n*2^p mod m */\nint\nmul_2exp_mod (int n, int p, int m)\n{\n  int  i;\n  for (i = 0; i < p; i++)\n    n = (2 * n) % m;\n  return n;\n}\n\n/* return -n mod m */\nint\nneg_mod (int n, int m)\n{\n  //ASSERT (n >= 0 && n < m);\n  return (n == 0 ? 0 : m-n);\n}\n\n/* Set \"mask\" to a value such that \"mask & (1<<idx)\" is non-zero if\n   \"-(idx<<mod_bits)\" can be a square modulo m.  */\nvoid\nsquare_mask (mpz_t mask, int m)\n{\n  int    p, i, r, idx;\n\n  p = mul_2exp_mod (1, mod_bits, m);\n  p = neg_mod (p, m);\n\n  mpz_set_ui (mask, 0L);\n  for (i = 0; i < m; i++)\n    {\n      r = (i * i) % m;\n      idx = (r * p) % m;\n      mpz_setbit (mask, (unsigned long) idx);\n    }\n}\n\nvoid\ngenerate_sq_res_0x100 (int limb_bits)\n{\n  int  i, res;\n\n  nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;\n  sq_res_0x100 = (mpz_t *) malloc (nsq_res_0x100 * sizeof (*sq_res_0x100));\n\n  for (i = 0; i < nsq_res_0x100; i++)\n    mpz_init_set_ui (sq_res_0x100[i], 0L);\n\n  for (i = 0; i < 0x100; i++)\n    {\n      res = (i * i) % 0x100;\n      mpz_setbit (sq_res_0x100[res / limb_bits],\n                  (unsigned long) (res % limb_bits));\n    }\n\n  sq_res_0x100_num = 0;\n  for (i = 0; i < nsq_res_0x100; i++)\n    sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);\n  sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;\n}\n\nvoid\ngenerate_mod (int limb_bits, int nail_bits)\n{\n  int    numb_bits = limb_bits - nail_bits;\n  int    i, divisor;\n\n  mpz_init_set_ui (pp, 0L);\n  mpz_init_set_ui (pp_norm, 0L);\n  mpz_init_set_ui (pp_inverted, 0L);\n\n  /* no more than limb_bits many factors in a one limb modulus (and of\n     course in reality nothing like that many) */\n  factor_alloc = limb_bits;\n  factor = (struct factor_t *) malloc (factor_alloc * sizeof (*factor));\n  rawfactor = (struct rawfactor_t *)\n    malloc (factor_alloc * sizeof (*rawfactor));\n\n  if (numb_bits % 4 != 0)\n    {\n      strcpy (mod34_excuse, \"GMP_NUMB_BITS % 4 != 0\");\n      goto use_pp;\n    }\n\n  max_divisor = 2*limb_bits;\n  max_divisor_bits = log2_ceil (max_divisor);\n\n  if (numb_bits / 4 < max_divisor_bits)\n    {\n      /* Wind back to one limb worth of max_divisor, if that will let us use\n         mpn_mod_34lsub1.  */\n      max_divisor = limb_bits;\n      max_divisor_bits = log2_ceil (max_divisor);\n\n      if (numb_bits / 4 < max_divisor_bits)\n        {\n          strcpy (mod34_excuse, \"GMP_NUMB_BITS / 4 too small\");\n          goto use_pp;\n        }\n    }\n\n  {\n    /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */\n    mpz_t  m, q, r;\n    int    multiplicity;\n\n    mod34_bits = (numb_bits / 4) * 3;\n\n    /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at\n       the mod34_bits mark, adding the two halves for a remainder of at most\n       mod34_bits+1 many bits */\n    mod_bits = mod34_bits + 1;\n\n    mpz_init_set_ui (m, 1L);\n    mpz_mul_2exp (m, m, mod34_bits);\n    mpz_sub_ui (m, m, 1L);\n\n    mpz_init (q);\n    mpz_init (r);\n\n    for (i = 3; i <= max_divisor; i++)\n      {\n        if (! isprime (i))\n          continue;\n\n        mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);\n        if (mpz_sgn (r) != 0)\n          continue;\n\n        /* if a repeated prime is found it's used as an i^n in one factor */\n        divisor = 1;\n        multiplicity = 0;\n        do\n          {\n            if (divisor > max_divisor / i)\n              break;\n            multiplicity++;\n            mpz_set (m, q);\n            mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);\n          }\n        while (mpz_sgn (r) == 0);\n\n        //ASSERT (nrawfactor < factor_alloc);\n        rawfactor[nrawfactor].divisor = i;\n        rawfactor[nrawfactor].multiplicity = multiplicity;\n        nrawfactor++;\n      }\n\n    mpz_clear (m);\n    mpz_clear (q);\n    mpz_clear (r);\n  }\n\n  if (nrawfactor <= 2)\n    {\n      mpz_t  new_pp;\n\n      sprintf (mod34_excuse, \"only %d small factor%s\",\n               nrawfactor, nrawfactor == 1 ? \"\" : \"s\");\n\n    use_pp:\n      /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code\n         tried with just one */\n      max_divisor = 2*limb_bits;\n      max_divisor_bits = log2_ceil (max_divisor);\n\n      mpz_init (new_pp);\n      nrawfactor = 0;\n      mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);\n\n      /* one copy of each small prime */\n      mpz_set_ui (pp, 1L);\n      for (i = 3; i <= max_divisor; i++)\n        {\n          if (! isprime (i))\n            continue;\n\n          mpz_mul_ui (new_pp, pp, (unsigned long) i);\n          if (mpz_sizeinbase (new_pp, 2) > mod_bits)\n            break;\n          mpz_set (pp, new_pp);\n\n          //ASSERT (nrawfactor < factor_alloc);\n          rawfactor[nrawfactor].divisor = i;\n          rawfactor[nrawfactor].multiplicity = 1;\n          nrawfactor++;\n        }\n\n      /* Plus an extra copy of one or more of the primes selected, if that\n         still fits in max_divisor and the total in mod_bits.  Usually only\n         3 or 5 will be candidates */\n      for (i = nrawfactor-1; i >= 0; i--)\n        {\n          if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)\n            continue;\n          mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);\n          if (mpz_sizeinbase (new_pp, 2) > mod_bits)\n            continue;\n          mpz_set (pp, new_pp);\n\n          rawfactor[i].multiplicity++;\n        }\n\n      mod_bits = mpz_sizeinbase (pp, 2);\n\n      mpz_set (pp_norm, pp);\n      while (mpz_sizeinbase (pp_norm, 2) < numb_bits)\n        mpz_add (pp_norm, pp_norm, pp_norm);\n\n      mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);\n\n      mpz_clear (new_pp);\n    }\n\n  /* start the factor array */\n  for (i = 0; i < nrawfactor; i++)\n    {\n      int  j;\n      //ASSERT (nfactor < factor_alloc);\n      factor[nfactor].divisor = 1;\n      for (j = 0; j < rawfactor[i].multiplicity; j++)\n        factor[nfactor].divisor *= rawfactor[i].divisor;\n      nfactor++;\n    }\n\n combine:\n  /* Combine entries in the factor array.  Combine the smallest entry with\n     the biggest one that will fit with it (ie. under max_divisor), then\n     repeat that with the new smallest entry. */\n  qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);\n  for (i = nfactor-1; i >= 1; i--)\n    {\n      if (factor[i].divisor <= max_divisor / factor[0].divisor)\n        {\n          factor[0].divisor *= factor[i].divisor;\n          COLLAPSE_ELEMENT (factor, i, nfactor);\n          goto combine;\n        }\n    }\n\n  total_fraction = 1.0;\n  for (i = 0; i < nfactor; i++)\n    {\n      mpz_init (factor[i].inverse);\n      mpz_invert_ui_2exp (factor[i].inverse,\n                          (unsigned long) factor[i].divisor,\n                          (unsigned long) mod_bits);\n\n      mpz_init (factor[i].mask);\n      square_mask (factor[i].mask, factor[i].divisor);\n\n      /* fraction of possible squares */\n      factor[i].fraction = (double) mpz_popcount (factor[i].mask)\n        / factor[i].divisor;\n\n      /* total fraction of possible squares */\n      total_fraction *= factor[i].fraction;\n    }\n\n  /* best tests first (ie. smallest fraction) */\n  qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);\n}\n\nvoid\nprint (int limb_bits, int nail_bits)\n{\n  int    i;\n  mpz_t  mhi, mlo;\n\n  printf (\"/* This file generated by gen-psqr.c - DO NOT EDIT. */\\n\");\n  printf (\"\\n\");\n\n  printf (\"#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\\n\",\n          limb_bits, nail_bits);\n  printf (\"Error, error, this data is for %d bit limb and %d bit nail\\n\",\n          limb_bits, nail_bits);\n  printf (\"#endif\\n\");\n  printf (\"\\n\");\n\n  printf (\"/* Non-zero bit indicates a quadratic residue mod 0x100.\\n\");\n  printf (\"   This test identifies %.2f%% as non-squares (%d/256). */\\n\",\n          (1.0 - sq_res_0x100_fraction) * 100.0,\n          0x100 - sq_res_0x100_num);\n  printf (\"static const mp_limb_t\\n\");\n  printf (\"sq_res_0x100[%d] = {\\n\", nsq_res_0x100);\n  for (i = 0; i < nsq_res_0x100; i++)\n    {\n      printf (\"  CNST_LIMB(0x\");\n      mpz_out_str (stdout, 16, sq_res_0x100[i]);\n      printf (\"),\\n\");\n    }\n  printf (\"};\\n\");\n  printf (\"\\n\");\n\n  if (mpz_sgn (pp) != 0)\n    {\n      printf (\"/* mpn_mod_34lsub1 not used due to %s */\\n\", mod34_excuse);\n      printf (\"/* PERFSQR_PP = \");\n    }\n  else\n    printf (\"/* 2^%d-1 = \", mod34_bits);\n  for (i = 0; i < nrawfactor; i++)\n    {\n      if (i != 0)\n        printf (\" * \");\n      printf (\"%d\", rawfactor[i].divisor);\n      if (rawfactor[i].multiplicity != 1)\n        printf (\"^%d\", rawfactor[i].multiplicity);\n    }\n  printf (\" %s*/\\n\", mpz_sgn (pp) == 0 ? \"... \" : \"\");\n\n  printf (\"#define PERFSQR_MOD_BITS  %d\\n\", mod_bits);\n  if (mpz_sgn (pp) != 0)\n    {\n      printf (\"#define PERFSQR_PP            CNST_LIMB(0x\");\n      mpz_out_str (stdout, 16, pp);\n      printf (\")\\n\");\n      printf (\"#define PERFSQR_PP_NORM       CNST_LIMB(0x\");\n      mpz_out_str (stdout, 16, pp_norm);\n      printf (\")\\n\");\n      printf (\"#define PERFSQR_PP_INVERTED   CNST_LIMB(0x\");\n      mpz_out_str (stdout, 16, pp_inverted);\n      printf (\")\\n\");\n    }\n  printf (\"\\n\");\n\n  mpz_init (mhi);\n  mpz_init (mlo);\n\n  printf (\"/* This test identifies %.2f%% as non-squares. */\\n\",\n          (1.0 - total_fraction) * 100.0);\n  printf (\"#define PERFSQR_MOD_TEST(up, usize) \\\\\\n\");\n  printf (\"  do {                              \\\\\\n\");\n  printf (\"    mp_limb_t  r;                   \\\\\\n\");\n  if (mpz_sgn (pp) != 0)\n    printf (\"    PERFSQR_MOD_PP (r, up, usize);  \\\\\\n\");\n  else\n    printf (\"    PERFSQR_MOD_34 (r, up, usize);  \\\\\\n\");\n\n  for (i = 0; i < nfactor; i++)\n    {\n      printf (\"                                    \\\\\\n\");\n      printf (\"    /* %5.2f%% */                    \\\\\\n\",\n              (1.0 - factor[i].fraction) * 100.0);\n\n      printf (\"    PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x\",\n              factor[i].divisor <= limb_bits ? 1 : 2,\n              factor[i].divisor);\n      mpz_out_str (stdout, 16, factor[i].inverse);\n      printf (\"), \\\\\\n\");\n      printf (\"                   CNST_LIMB(0x\");\n\n      if ( factor[i].divisor <= limb_bits)\n        {\n          mpz_out_str (stdout, 16, factor[i].mask);\n        }\n      else\n        {\n          mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);\n          mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);\n          mpz_out_str (stdout, 16, mhi);\n          printf (\"), CNST_LIMB(0x\");\n          mpz_out_str (stdout, 16, mlo);\n        }\n      printf (\")); \\\\\\n\");\n    }\n\n  printf (\"  } while (0)\\n\");\n  printf (\"\\n\");\n\n  printf (\"/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\\n\",\n          (1.0 - (total_fraction * 44.0/256.0)) * 100.0);\n  printf (\"\\n\");\n\n  printf (\"/* helper for tests/mpz/t-perfsqr.c */\\n\");\n  printf (\"#define PERFSQR_DIVISORS  { 256,\");\n  for (i = 0; i < nfactor; i++)\n      printf (\" %d,\", factor[i].divisor);\n  printf (\" }\\n\");\n\n\n  mpz_clear (mhi);\n  mpz_clear (mlo);\n}\n\nint\nmain (int argc, char *argv[])\n{\n  int  limb_bits, nail_bits;\n\n  if (argc != 3)\n    {\n      fprintf (stderr, \"Usage: gen-psqr <limbbits> <nailbits>\\n\");\n      exit (1);\n    }\n\n  limb_bits = atoi (argv[1]);\n  nail_bits = atoi (argv[2]);\n\n  if (limb_bits <= 0\n      || nail_bits < 0\n      || nail_bits >= limb_bits)\n    {\n      fprintf (stderr, \"Invalid limb/nail bits: %d %d\\n\",\n               limb_bits, nail_bits);\n      exit (1);\n    }\n\n  generate_sq_res_0x100 (limb_bits);\n  generate_mod (limb_bits, nail_bits);\n\n  print (limb_bits, nail_bits);\n\n  return 0;\n}\n"
  },
  {
    "path": "devel/regen",
    "content": "#!/bin/bash\n# run this from the devel directory\n\n# mpz/Makefile.am\ncd ../mpz\nsrcs=$(ls *.h *.c 2>/dev/null)\nsrcs=$(echo -n $srcs)\ncat Makefile.am | sed s/\"libmpz_la_SOURCES[[:space:]]*=[[:space:]]*.*\"/\"libmpz_la_SOURCES = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\nsrcs=$(ls *.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"mpz\\\\\\\\\\/\\1\\$U.lo \"/g)\ncat ../Makefile.am | sed s/\"MPZ_OBJECTS[[:space:]]*=.*\"/\"MPZ_OBJECTS = $srcs\"/ > tmpfile\nmv tmpfile ../Makefile.am\ncd ../devel\n\n# fft/Makefile.am\ncd ../fft\nsrcs=$(ls *.h *.c 2>/dev/null)\nsrcs=$(echo -n $srcs)\ncat Makefile.am | sed s/\"libfft_la_SOURCES[[:space:]]*=[[:space:]]*.*\"/\"libfft_la_SOURCES = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\nsrcs=$(ls *.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"fft\\\\\\\\\\/\\1\\$U.lo \"/g)\ncat ../Makefile.am | sed s/\"FFT_OBJECTS[[:space:]]*=.*\"/\"FFT_OBJECTS = $srcs\"/ > tmpfile\nmv tmpfile ../Makefile.am\ncd ../devel\n\n# mpf/Makefile.am\ncd ../mpf\nsrcs=$(ls *.h *.c 2>/dev/null)\nsrcs=$(echo -n $srcs)\ncat Makefile.am | sed s/\"libmpf_la_SOURCES[[:space:]]*=[[:space:]]*.*\"/\"libmpf_la_SOURCES = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\nsrcs=$(ls *.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"mpf\\\\\\\\\\/\\1\\$U.lo \"/g)\ncat ../Makefile.am | sed s/\"MPF_OBJECTS[[:space:]]*=.*\"/\"MPF_OBJECTS = $srcs\"/ > tmpfile\nmv tmpfile ../Makefile.am\ncd ../devel\n\n# mpq/Makefile.am Makefile.am\ncd ../mpq\nsrcs=$(ls *.h *.c 2>/dev/null)\nsrcs=$(echo -n $srcs)\ncat Makefile.am | sed s/\"libmpq_la_SOURCES[[:space:]]*=[[:space:]]*.*\"/\"libmpq_la_SOURCES = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\nsrcs=$(ls *.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"mpq\\\\\\\\\\/\\1\\$U.lo \"/g)\ncat ../Makefile.am | sed s/\"MPQ_OBJECTS[[:space:]]*=.*\"/\"MPQ_OBJECTS = $srcs\"/ > tmpfile\nmv tmpfile ../Makefile.am\ncd ../devel\n\n# printf/Makefile.am Makefile.am\ncd ../printf\nsrcs=$(ls *.h *.c 2>/dev/null)\nsrcs=$(echo -n $srcs)\ncat Makefile.am | sed s/\"libprintf_la_SOURCES[[:space:]]*=[[:space:]]*.*\"/\"libprintf_la_SOURCES = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\nsrcs=$(ls *.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"printf\\\\\\\\\\/\\1\\$U.lo \"/g)\ncat ../Makefile.am | sed s/\"PRINTF_OBJECTS[[:space:]]*=.*\"/\"PRINTF_OBJECTS = $srcs\"/ > tmpfile\nmv tmpfile ../Makefile.am\ncd ../devel\n\n# scanf/Makefile.am Makefile.am\ncd ../scanf\nsrcs=$(ls *.h *.c 2>/dev/null)\nsrcs=$(echo -n $srcs)\ncat Makefile.am | sed s/\"libscanf_la_SOURCES[[:space:]]*=[[:space:]]*.*\"/\"libscanf_la_SOURCES = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\nsrcs=$(ls *.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"scanf\\\\\\\\\\/\\1\\$U.lo \"/g)\ncat ../Makefile.am | sed s/\"SCANF_OBJECTS[[:space:]]*=.*\"/\"SCANF_OBJECTS = $srcs\"/ > tmpfile\nmv tmpfile ../Makefile.am\ncd ../devel\n\n# tests/mpz/Makefile.am\ncd ../tests/mpz\nsrcs=$(ls *.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"\\1 \"/g)\ncat Makefile.am | sed s/\"check_PROGRAMS[[:space:]]*=.*\"/\"check_PROGRAMS = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\ncd ../../devel\n\n# tests/fft/Makefile.am\ncd ../tests/fft\nsrcs=$(ls *.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"\\1 \"/g)\ncat Makefile.am | sed s/\"check_PROGRAMS[[:space:]]*=.*\"/\"check_PROGRAMS = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\ncd ../../devel\n\n# tests/mpf/Makefile.am\ncd ../tests/mpf\nsrcs=$(ls *.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"\\1 \"/g)\ncat Makefile.am | sed s/\"check_PROGRAMS[[:space:]]*=.*\"/\"check_PROGRAMS = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\ncd ../../devel\n\n# tests/mpq/Makefile.am\ncd ../tests/mpq\nsrcs=$(ls *.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"\\1 \"/g)\ncat Makefile.am | sed s/\"check_PROGRAMS[[:space:]]*=.*\"/\"check_PROGRAMS = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\ncd ../../devel\n\n# tests/misc/Makefile.am\ncd ../tests/misc\nsrcs=$(ls *.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"\\1 \"/g)\ncat Makefile.am | sed s/\"check_PROGRAMS[[:space:]]*=.*\"/\"check_PROGRAMS = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\ncd ../../devel\n\n# tests/mpn/Makefile.am\ncd ../tests/mpn\nsrcs=$(ls *.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"\\1 \"/g)\ncat Makefile.am | sed s/\"check_PROGRAMS[[:space:]]*=.*\"/\"check_PROGRAMS = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\ncd ../../devel\n\n# tests/rand/Makefile.am\ncd ../tests/rand\nsrcs=$(ls t-*.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"\\1 \"/g)\ncat Makefile.am | sed s/\"check_PROGRAMS[[:space:]]*=.*\"/\"check_PROGRAMS = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\ncd ../../devel\n\n# tests/Makefile.am\ncd ../tests\nsrcs=$(ls t-*.c)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.c[[:space:]]*\\)\"/\"\\1 \"/g)\ncat Makefile.am | sed s/\"check_PROGRAMS[[:space:]]*=.*\"/\"check_PROGRAMS = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\ncd ../devel\n\n# tests/cxx/Makefile.am , this is not complete\ncd ../tests/cxx\nsrcs=$(ls *.cc)\n#cxxsrcs=$(echo -n $srcs | sed s/\"t-\\([^[:space:]]\\+\\)\\(\\.cc[[:space:]]*\\)\"/\"t_\\1_SOURCES = t-\\1.cc\\n\"/g)\nsrcs=$(echo -n $srcs | sed s/\"\\([^[:space:]]\\+\\)\\(\\.cc[[:space:]]*\\)\"/\"\\1 \"/g)\ncat Makefile.am | sed s/\"check_PROGRAMS[[:space:]]*=.*\"/\"check_PROGRAMS = $srcs\"/ > tmpfile\nmv tmpfile Makefile.am\ncd ../../devel\n\n# CRLF to LF on all C files\ncd ..\nfor i in $(file $(find mpz mpq mpf mpn printf scanf tests tune -name \\*.c) | grep -e CRLF | cut -f 1 -d \" \" | cut -f 1 -d :) ; do cat $i | fromdos > crap ; mv crap $i  ; done\nfor i in $(file $(find mpz mpq mpf mpn printf scanf tests tune -name \\*.h) | grep -e CRLF | cut -f 1 -d \" \" | cut -f 1 -d :) ; do cat $i | fromdos > crap ; mv crap $i  ; done\nfor i in $(file $(find -name \\*.c -maxdepth 1) | grep -e CRLF | cut -f 1 -d \" \" | cut -f 1 -d :) ; do cat $i | fromdos > crap ; mv crap $i  ; done\nfor i in $(file $(find -name \\*.h -maxdepth 1) | grep -e CRLF | cut -f 1 -d \" \" | cut -f 1 -d :) ; do cat $i | fromdos > crap ; mv crap $i  ; done\ncd devel\n"
  },
  {
    "path": "devel/setversion",
    "content": "#!/bin/bash\n\n# run this script from the devel directory\n\n# set version number\nVERSION=3\nVERSION_MINOR=0\nVERSION_PATCHLEVEL=0\n\n# these are the library soname numbers see\n# http://sources.redhat.com/autobook/autobook/autobook_91.html\n# or the start of Makefile.am to see what was done before\n\nLIBGMP_LT_CURRENT=23\nLIBGMP_LT_REVISION=3\nLIBGMP_LT_AGE=0\n\nLIBGMPXX_LT_CURRENT=12\nLIBGMPXX_LT_REVISION=3\nLIBGMPXX_LT_AGE=4\n\necho \"Setting MPIR to\"\necho \"MPIR-$VERSION.$VERSION_MINOR.$VERSION_PATCHLEVEL\"\necho \"GMP-soname-$LIBGMP_LT_CURRENT.$LIBGMP_LT_REVISION.$LIBGMP_LT_AGE\"\necho \"GMPXX-soname-$LIBGMPXX_LT_CURRENT.$LIBGMPXX_LT_REVISION.$LIBGMPXX_LT_AGE\"\n\n###############################################################\n##############################################################\n\nVERSION_STR=$VERSION.$VERSION_MINOR.$VERSION_PATCHLEVEL\nTMPFILE=$(mktemp mpir.XXXXXX)\n\n#update gmp-h.in\ncat ../gmp-h.in \\\n| sed s/\"#define[[:space:]]\\+__MPIR_VERSION[[:space:]]\\+.*\"/\"#define __MPIR_VERSION \"$VERSION/ \\\n| sed s/\"#define[[:space:]]\\+__MPIR_VERSION_MINOR[[:space:]]\\+.*\"/\"#define __MPIR_VERSION_MINOR \"$VERSION_MINOR/ \\\n| sed s/\"#define[[:space:]]\\+__MPIR_VERSION_PATCHLEVEL[[:space:]]\\+.*\"/\"#define __MPIR_VERSION_PATCHLEVEL \"$VERSION_PATCHLEVEL/ \\\n| sed s/\"#define[[:space:]]\\+_MSC_MPIR_VERSION[[:space:]]\\+.*\"/\"#define _MSC_MPIR_VERSION \"\\\"$VERSION_STR\\\"/ > $TMPFILE\nmv $TMPFILE ../gmp-h.in\n\n#update Makefile.am\ncat ../Makefile.am \\\n| sed s/\"LIBGMP_LT_CURRENT[[:space:]]*=[[:space:]]*.*\"/\"LIBGMP_LT_CURRENT = \"$LIBGMP_LT_CURRENT/ \\\n| sed s/\"LIBGMP_LT_REVISION[[:space:]]*=[[:space:]]*.*\"/\"LIBGMP_LT_REVISION = \"$LIBGMP_LT_REVISION/ \\\n| sed s/\"LIBGMP_LT_AGE[[:space:]]*=[[:space:]]*.*\"/\"LIBGMP_LT_AGE = \"$LIBGMP_LT_AGE/ \\\n| sed s/\"LIBGMPXX_LT_CURRENT[[:space:]]*=[[:space:]]*.*\"/\"LIBGMPXX_LT_CURRENT = \"$LIBGMPXX_LT_CURRENT/ \\\n| sed s/\"LIBGMPXX_LT_REVISION[[:space:]]*=[[:space:]]*.*\"/\"LIBGMPXX_LT_REVISION = \"$LIBGMPXX_LT_REVISION/ \\\n| sed s/\"LIBGMPXX_LT_AGE[[:space:]]*=[[:space:]]*.*\"/\"LIBGMPXX_LT_AGE = \"$LIBGMPXX_LT_AGE/ > $TMPFILE\nmv $TMPFILE ../Makefile.am\n\n#update doc/version\necho -n \"@set UPDATED \" > ../doc/version.texi\ndate \"+%d %B %Y\" >> ../doc/version.texi\necho -n \"@set UPDATED-MONTH \" >> ../doc/version.texi\ndate \"+%B %Y\" >> ../doc/version.texi\necho \"@set EDITION $VERSION_STR\" >> ../doc/version.texi\necho \"@set VERSION $VERSION_STR\" >> ../doc/version.texi\n\n#update makeinfo\ncd ../doc\nmakeinfo mpir.texi\nmakeinfo version.texi\ncd ..\n\nautoreconf -fiv --no-recursive\nrm -f $TMPFILE\ncd devel\n"
  },
  {
    "path": "devel/yasm.diff",
    "content": "###\n###\tTo upgrade mpir's yasm.\n###\n###\tchange to the mpir directory\n###\n###\tsvn del yasm\n###\tsvn commit -m \"delete old yasm\"\n###\tsvn co http://www.tortall.net/svn/yasm/trunk/yasm yasm   or  get yasm-1.2.tar.gz and untar and rename directory to yasm\n###\tcd yasm\n###\trm -r $(find -name \\\\.svn)\n###\tpatch -Np1 < ../devel/yasm.diff\n###\t./autogen.sh\t\tor run autoreconf\n###\t./configure\n###\tmake\n###\tmake distclean\n###\trm -r autom4te.cache\n###\n###\tand replace any symbolic links in the config directory with real files\n###\n###\tcd ..\n###\tsvn add yasm\n###\tsvn commit -m \"new yasm installed version number ????\"\n###\n###\n### NOTE we can get rid of the readline part , and nail part? , and add in profiling\n\ndiff -Naur yasm2227/config/config.sub mpiryasm/config/config.sub\n--- yasm2227/config/config.sub\t2010-06-25 12:59:22.000000000 +0100\n+++ mpiryasm/config/config.sub\t2010-06-25 13:00:42.000000000 +0100\n@@ -1073,7 +1073,7 @@\n \tpmac | pmac-mpw)\n \t\tbasic_machine=powerpc-apple\n \t\t;;\n-\t*-unknown)\n+\t*-unknown | *-pc | *-apple | *-w64)\n \t\t# Make sure to match an already-canonicalized machine name.\n \t\t;;\n \t*)\ndiff -Naur yasm2227/configure.ac mpiryasm/configure.ac\n--- yasm2227/configure.ac\t2010-06-25 12:59:23.000000000 +0100\n+++ mpiryasm/configure.ac\t2010-06-25 13:00:42.000000000 +0100\n@@ -20,6 +20,22 @@\n #\n # autoconf command-line options\n #\n+AC_ARG_ENABLE(assert,,,)\n+\n+AC_ARG_ENABLE(alloca,,,)\n+\n+AC_ARG_ENABLE(cxx,,,)\n+\n+AC_ARG_ENABLE(fft,,,)\n+\n+AC_ARG_ENABLE(nails,,,)\n+\n+AC_ARG_ENABLE(gmpcompat,,,)\n+\n+AC_ARG_WITH(readline,,,)\n+\n+AC_ARG_ENABLE(fat,,,)\n+\n AC_ARG_ENABLE(debug,\n AC_HELP_STRING([--enable-debug],\n \t       [Turn on debugging and compile time warnings]),\n"
  },
  {
    "path": "doc/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n\n# Copyright 2003 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nEXTRA_DIST = isa_abi_headache\n\ninfo_TEXINFOS = mpir.texi\ngmp_TEXINFOS = fdl.texi\n"
  },
  {
    "path": "doc/devel/Configure.txt",
    "content": "Copyright 2008 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n-define MPIR_COPYRIGHT to GPL text\n\n-Initialise autotools\n\n-Various command line options to configure, including:\n\n--enable-assert enables asserts\n--enable_alloca use alloca for memory allocation\n--enable-cxx want c++ headers\n--enable-fft want FFT for large integer multiplication\n--enable-mpbsd enable berkeley mp functions\n--enable-nails enable nails\n--enable-profiling enable profiling (prof, gprof or instrumentation)\n\nset fomit_frame_pointer to -fomit-frame-pointer unless profiling is gprof\n\n--with-readline (mainly for readline support in calc demo)\n--enable-fat build a fat binary\n\n-set tmp_host to $host_cpu with .'s replaced by _'s\n\n-#undef all the HAVE_HOST_CPU possibilities (that will be tested)\n\n-set cclist to \"gcc cc\"\n-set gcc-cflags and gcc_64_cflags to -O3\n-set cc-cflags and cc_64_cflags to -O\n-set cyclecounter_size 2\n\n-substitute HAVE_HOST_CPU_FAMILY_power and HAVE_HOST_CPU_FAMILY_powerpc with 0\n\n-go through each architecture and set things like:\n\nX86_PATTERN | athlon64-*-* | core2-*-* | x86_64-*-*)\n-abilist 32\n-cclist gcc icc cc\n-gcc_cflags -O2 $fomit_frame_pointer\n-gcc_32_cflags_maybe -m32\nicc_cflags -no-gcc\nicc_cflags_optlist opt\nicc_cflags_opt -O3 -O2 -O1\nany_32_testlist sizeof-long-4\nCALLING_CONVENTIONS_OBJS x86call.lo c86check$U.lo\nSPEED_CYCLECOUNTER_OBJ pentium.lo\n\n-set gcc_cflags_cpu and gcc_cflags_arch for each of $host_cpu in i386*, i486*, i586, pentiummmx, i686, pentiumpro, pentium2, pentium3, k6, k62, k63, athlon, i787, pentium4, viac32, viac3*, athlon64, core2, x86_64 (default to i486 options)\n\n-set path to correct mpn directory according to type processor\n\n-override the settings above if a fat binary was selected (path becomes x86/fat)\n\n- for $host_cpu in athlon64, core2, pentium4, x86_64 (i.e. 64 bit machines) set 64 bit only options\ncclist_64 gcc\ngcc_64_cflags -O2 -m64\ngcc_64_cflags_optlist cpu\nCALLING_CONVENTIONS_OBJS_6 amd64call.lo amd64check$U.lo\nSPEED_CYCLECOUNTER_OBJ_64 x86_64.lo\ncyclecounter_size_64 2\nabilist 64 32\npath x86_64 / x86_64/pentium4 / x86_64/core2\non pentium4 only gcc is available for cclist_64\n\n-set options for none-*-* for generic C only\n\n-if $host in *-*-mingw set gcc_cflags_optlist gcc_cflags_nocygwin appropriately\n\n-write a file(?) AC_FD_CC with User options ABI, CC, CFLAGS, CPPFLAGS, MPN_PATH and GMP: abilist, cclist\n\n-test that the CFLAGS work\n\n-set abi_last to the last thing in abilist\n\n-set ABI to the ABI the user specified if it is a valid option\n\n-set abi1, abi2 to the different possibilities\n\n-set cclist_chosen to the right cclist for this abi and default to what the user specified if they specified one\n\n-set up the CC and CFLAGS and check it works\n\n-check if longlong works for a limb if we are supposed to be using it\n\n-do various test on the CC (not relevant to x86)\n\n-exclude various bad compilers and operating systems that don't save XMM registers properly\n\n-#define HAVE_ABI_32 or 64 according to the ABI\n\n-set extra_functions according to ABI\n\n-set SPEED_CYCLECOUNTER_OBJ and cyclecounter_size according to ABI\n\n-if SPEED_CYCLECOUNTER_OBJ is defined #define HAVE_SPEED_CYCLECOUNTER to cyclecounter_size\n\n-set CALLING_CONVENTIONS_OBJS according to ABI\n\n-if CALLING_CONVENTIONS_OBJS is defined #define #HAVE_CALLING_CONVENTIONS 1\n\n-choose MPN_PATH according to ABI or use the user defined path\n\n-if limbchosen is longlong then DEFN_LONG_LONG_LIMB is set to #define _LONG_LONG_LIMB 1 else to #undef _LONG_LONG_LIMB\n\n-run the standard autoconf things for CC, etc\n\n-GMP_H_ANSI puts C compiler into ANSI mode if possible\n\n-various GMP macros which check whether CC, CPP work for build, PROG_EXTEXT works for build (??), whether ANSI C works, whether LIBM is  available for build\n\n-the assembler is invoked via CC or we use yasm for .as files\n\n-check CPP compiler works and set WANT_CXX if C++ is wanted\n\n-some path setups for the Cray\n\n-if asked for a NAILS build add the nails directory to path\n\n-if there is any sse2 or mmx in the path, remove it if the assembler doesn't support it\n\n-update the file AC_FD_CC with: ABI, CC, CFLAGS, CPPFLAGS, GMP_LDFLAGS, CXX, CXXFLAGS, path\n\n-configuration for windows dlls\n\n-enable CXX in libtool only if we want it, by setting with_tags appropriately\n\n-error if trying to build static and shared libraries at the same time when $libname.a is in $library_names_spec\n\n-check for a whole load of system specific header files, like float.h, sys/time.h, etc\n\n-check various C functions are available\n\n-check various types are available\n\n-AC_C_STRINGIZE \n-AC_C_VOLATILE \n-AC_C_RESTRICT \n\n-GMP_C_STDARG\n-GMP_C_ATTRIBUTE_CONST\n-GMP_C_ATTRIBUTE_MALLOC\n-GMP_C_ATTRIBUTE_MODE\n-GMP_C_ATTRIBUTE_NORETURN\n-GMP_H_EXTERN_INLINE\n\n-These various macros above set various things that end up being substituted into files, e.g. all instances of _extern_inline_ get replaced with something like inline where they occur in header files, etc.\n\n-AC_CHECK_LIBM\n-AC_SUBST(LIBM)\n\n-GMP_FUNC_ALLOCA\n-GMP_OPTION_ALLOCA\n\n-GMP_H_HAVE_FILE\n\n-AC_C_BIGENDIAN sets HAVE_LIMB_BIG_ENDIAN or HAVE_LIMB_LITTLE_ENDIAN\n\n-GMP_C_DOUBLE_FORMAT\n\n-check for a pile of functions known to be missing in certain configurations\n\n-enable C++ tests when required\n\n-create symlinks to the required assembly files in path and link them to mpn/\n\n-take care of functions that can be provided by multifunction files, e.g. aorsmul_1\n\n-define possible mpn suffixes to S s c as\n\n-GMP_FILE_TO_FUNCTION associates the various mpn functions to actual files\n\n-pile of setups for fat binaries\n\n-for each mpn suffix, set tmp_ext to no\n\n-for each gmp_mpn_function do:\nGMP_MULFUNC_CHOICES\nfind the appropriate file\ncheck if it supports the nails specified\nadd the appropriate object file name to mpn_objects and mpn_objs_in_libgmp\nAC_CONFIG_LINKS\nadd the filename to gmp_srclinks\ndefine the right HAVE_NATIVE_fn's\ndeal with sqr_basecase if found\n\n-set up autoheader stuff to undef all the HAVE_NATIVE_*\n\n-if assembler is going to be needed, check for M4, or use yasm\n\n-the following macros are for ASM syntax checking and for various macros defined for .asm assembly files\n the macros are defined in acinclude.m4:\n  GMP_ASM_TEXT\n  GMP_ASM_DATA\n  GMP_ASM_LABEL_SUFFIX\n  GMP_ASM_GLOBL - global symbol prefix\n  GMP_ASM_GLOBL_ATTR\n  GMP_ASM_UNDERSCORE\n  GMP_ASM_RODATA\n  GMP_ASM_TYPE\n  GMP_ASM_SIZE\n  GMP_ASM_LSYM_PREFIX - local symbol prefix\n  GMP_ASM_W32\n  GMP_ASM_ALIGN_LOG - whether alignment is logarithmic\n\n-include various m4 files and do various assembly defines per host cpu\n\n-create a link to the appropriate gmp-mparam.h file and set gmp_mparam_source and gmp_srclinks appropriately\n\n-copy SQR_KARATSUBA_THRESHOLD into config.m4\n\n-check size of some types, e.g. unsigned short, and appropriately define BITS_PER_MP_LIMB and SIZEOF_UNSIGNED\n\n-check compiler limb size matches gmp-mparam.h\n\n-define GMP_LIMB_BITS, GMP_NAIL_BITS and GMP_NUMB_BITS\n\n-exclude the mpn random functions from mpbsd\n\n-make the file tune/sqr_basecase.c if needed according to whether sqr_basecase is an assembly or C file\n\n-various configurations for the demo programs\n\n-GMP_FINISH creates config.m4\n\n-create makefiles from the Makefile.in's\n\nThis document, configure.txt can be found in /doc/devel of the MPIR source tree."
  },
  {
    "path": "doc/devel/FILES.txt",
    "content": "Copyright 2008, William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n.gdbinit - three basic macros for printing mpz, mpq and mpf's in gdb\nacinclude - many GMP specific script macros to be used by autoconf\naclocal - scripts for setting up the tools used to build MPIR on a wide variety of platforms - part of autotools\nansi2knr - a script belonging to ansi2knr\nansi2knr.c - C program for converting from Ansi C to Kernighan and Ritchie C\nassert.c - two MPIR wide assert functions\nAUTHORS - a list of authors and what they did\nCHANGELOG - a changelog going back to March 1991\ncompat.c - some function entry points for three deprecated functions\nconfig.in - piles of undefs for all environment variables used by MPIR\nconfig.guess - MPIR script to try and guess what the CPU-VENDOR-OS is \nconfig.sub - gives CPU-VENDOR-OS names MPIR recognizes, in some canonical way\nconfigfsf - the GNU CPU-VENDOR-OS guess program, later sharpened by config.guess\nconfigfsf.sub - common to all GNU software, attempts to come up with canonical CPU-VENDOR-OS\nconfigure - guess values for system dependent variables and create Makefiles\nconfigure.in - used by autoconf to produce a configure script\nCOPYING - GPL version 2\nCOPYING.lib - GPL version 2.1\ndumbgmp.c - very limited subset of MPIR used in some build time computations\nerrno.c - some wrappers for the exception generating code \nextract-dbl.c - convert from a double to an array of mp_limb_t's\ngen-bases.c - auto generates some hard coded macros for base conversion\ngen-facui.c - generates data tables for factorials, e.g. all factorials which fit in a limb\ngen-fib.c - generate data for Fibonnacci number tables\ngen-psqr.c - generates data for perfect square testing\ngmp-h.in - mpir.h with some platform dependent stuff not yet inserted (mpir.h is included by \n  programs using MPIR and by code within MPIR)\ngmp-impl.h - internal include file containing all the macros used MPIR wide\nmpirxx.h - C++ class wrapper for MPIR types\nINSTALL - basic text notes on installing MPIR\nINSTALL.AUTOCONF - more detailed text file on configuring MPIR\ninstall-sh - script for installing MPIR (once built)\ninvalid.c - an extra exception wrapper for invalid operations\nlibmp - a short list of relatively basic mathematical function names (??)\nlonglong.h - assembly code for numerous machines for longlong arithmetic\nltmain.sh - generalised library tool services\nmakefile-in - makefile\nmakefile.am - used by automake to generate makefile-in\nmemory.c - the default MPIR memory allocation functions\nmissing - stubs for missing GNU programs (prints warning messages)\nmp_bpl.c - defines mp_bits_per_limb, __gmp_0 (= (int) 0) and __gmp_junk\nmp_clz_tab.c - defines __clz_tab[128] for counting leading zeroes\nmp_dv_tab.c - defines __gmp_digit_value_tab for giving values to ascii chars for bases\nmp_get_fns.c - contains mp_get_memory_functions(...)\nmp_minv_tab.c - defines modlimb_invert_table[128] for inverses of 2*i+1 mod 256\nmp_set_fns.c - contains mp_set_memory_functions(...)\nmp-h.in - MINT is an __mpz_struct, gives protos for a handful of Berkeley mp functions\nNEWS - details the major changes between all the released versions of GMP/MPIR\nrand.c - defines the function gmp_randinit (which is essentially a wrapper)\nrandbui.c - defines the function gmp_urandomb_ui which is a wrapper\nrandclr.c - defines gmp_randclear which is a wrapper\nranddef.c - defines gmp_randinit_default which is a wrapper\nrandiset.c - defines gmp_randinit_set which is a wrapper\nrandlc2s.c - initialises a random state with a linear congruential generator of given size\nrandlc2x.c - implementation of linear congruential pseudorandom functions\nrandmt.c - implementation of mersenne twister pseudorandom functions\nrandmt.h - header file for randmt.c\nrandmts.h - functions for managing the seed, etc, for the mersenne twister\nrandmui.c - defines gmp_urandomm_ui which is a wrapper\nrands.c - global random state for old style random functions\nrandsd.c - defines gmp_randseed which is a wrapper\nrandsdui.c - defines gmp_randseed_ui which is a wrapper\nREADME - a very short overview of the mpir library and how to report bugs\ntal-debug.c - debug versions of TMP_ALLOC\ntal-notreent.c - non-reentrant versions of the TMP_ALLOC functions\ntal-reent.c - reentrant versions of TMP_ALLOC functions\nversion.c - defines gmp_version and mpir_version\n\nThis document, FILES, is contained in the /doc/devel directory of the MPIR source tree."
  },
  {
    "path": "doc/devel/configuration",
    "content": "/* doc/configuration (in Emacs -*-outline-*- format). */\n\nCopyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.\nCopyright 2008 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n* Adding a new file\n\n** Adding a top-level file\n\n  i) Add it to libgmp_la_SOURCES in Makefile.am.\n\n  ii) If libmp.la needs it (usually doesn't), then add it to\n      libmp_la_SOURCES too.\n\n** Adding a subdirectory file\n\nFor instance for mpz,\n\n  i) Add file.c to libmpz_la_SOURCES in mpz/Makefile.am.\n\n  ii) Add mpz/file$U.lo to MPZ_OBJECTS in the top-level Makefile.am\n\n  iii) If for some reason libmp.la needs it (usually doesn't) then add\n       mpz/file$U.lo to libmp_la_DEPENDENCIES in the top-level\n       Makefile.am too.\n\nThe same applies to mpf, mpq, scanf and printf.\n\n** Adding an mpn file\n\nThe way we build libmpn (in the `mpn' subdirectory) is quite special.\n\nCurrently only mpn/mp_bases.c is truely generic and included in every\nconfiguration.  All other files are linked at build time into the mpn\nbuild directory from one of the CPU specific sub-directories, or from\nthe mpn/generic directory.\n\nThere are five types of mpn source files.\n\n  .asm\t  Assembly code preprocessed with m4\n  .S\t  Assembly code preprocessed with cpp\n  .s\t  Assembly code not preprocessed at all\n  .c\t  C code\n  .as   Yasm format assembly file - yasm macros only, no preprocessing\n\nThere are two types of .asm files.\n\n  i) ``Normal'' files containing one function, though possibly with\n     more than one entry point.\n\n  ii) Multi-function files that generate one of a set of functions\n      according to build options.\n\nTo add a new implementation of an existing function,\n\n  i) Put it in the appropriate CPU-specific mpn subdirectory, it'll be\n     detected and used (if it is for an architecture supported by yasm\n     simply call it .as and yasm will automatically be used to assemble \n     it).\n\n  ii) Any entrypoints tested by HAVE_NATIVE_func in other code must\n      have PROLOGUE(func) for configure to grep.  This is normal for\n      .asm or .S files, but for .c and .as files a dummy comment like \n      the following will be needed.\n\n              /*\n              PROLOGUE(func)\n              */\n\n              OR\n\n              ;\n              ; PROLOGUE(func)\n              ;\n\nTo add a new implementation using a multi-function file, in addition\ndo the following,\n\n  i) Use a MULFUNC_PROLOGUE(func1 func2 ...) in the .asm, declaring\n     all the functions implemented, including carry-in variants.\n\n     If there's a separate PROLOGUE(func) for each possible function\n     (but this is usually not the case), then MULFUNC_PROLOGUE isn't\n     necessary.\n\n     Currently we don't use multifunction files with yasm\n\nTo add a new style of multi-function file, in addition do the\nfollowing,\n\n  i) Add to the GMP_MULFUNC_CHOICES \"case\" statement in configure.in\n     which lists each multi-function filename and what function files\n     it can provide.\n\nTo add a completely new mpn function file, do the following,\n\n  i) Ensure the filename is a valid C identifier, due to the\n     -DOPERATION_$* used to support multi-function files.  This means\n     \"-\" can't be used (but \"_\" can).\n\n  ii) Add it to configure.in under one of the following\n\n      a) `gmp_mpn_functions' if it exists for every target.  This\n         means there must be a C version in mpn/generic.  (Eg. mul_1)\n\n      b) `gmp_mpn_functions_optional' if it's a standard function, but\n         doesn't need to exist for every target.  Code wanting to use\n         this will test HAVE_NATIVE_func to see if it's available.\n         (Eg. copyi)\n\n      c) `extra_functions' for some targets, if it's a special\n         function that only ever needs to exist for certain targets.\n         Code wanting to use it can test either HAVE_NATIVE_func or\n         HAVE_HOST_CPU_foo, as desired.\n\n  iii) If HAVE_NATIVE_func is going to be used, then add a #undef to\n       the AH_VERBATIM([HAVE_NATIVE] block in configure.in.\n\n  iv) Add file.c to nodist_libdummy_la_SOURCES in mpn/Makefile.am (in\n      order to get an ansi2knr rule).  If the file is only in\n      assembler then this step is unnecessary, but do it anyway so as\n      not to forget if later a .c version is added.\n\n  v) If the function can be provided by a multi-function file, then\n     add to the \"case\" statement in configure.in which lists each\n     multi-function filename and what function files it can provide.\n\n\n** Adding a test program\n\n  i) Tests to be run early in the testing can be added to the main\n     \"tests\" sub-directory.\n\n  ii) Tests for mpn, mpz, mpq and mpf can be added under the\n      corresponding tests subdirectory.\n\n  iii) Generic tests for late in the testing can be added to\n       \"tests/misc\".  printf and scanf tests currently live there too.\n\n  iv) Random number function tests can be added to \"tests/rand\".  That\n      directory has some development-time programs too.\n\n  v) C++ test programs can be added to \"tests/cxx\".  A line like the\n     following must be added for each, since by default automake looks\n     for a .c file.\n\n             t_foo_SOURCES = t-foo.cc\n\nIn all cases the name of the program should be added to check_PROGRAMS\nin the Makefile.am.  TESTS is equal to check_PROGRAMS, so all those\nprograms get run.\n\n\"tests/devel\" has a number of programs which are only for development\npurposes and are not for use in \"make check\".  These should be listed\nin EXTRA_PROGRAMS to get Makefile rules created, but they're never\nbuilt or run unless an explicit \"make someprog\" is used.\n\n* Adding a new CPU\n\nIn general it's policy to use proper names for each CPU type\nsupported.  If two CPUs are quite similar and perhaps don't have any\nactual differences in MPIR then they're still given separate names, for\nexample alphaev67 and alphaev68.\n\nCanonical names:\n\n  i) Decide the canonical CPU names MPIR will accept.\n\n  ii) Add these to the config.sub wrapper if configfsf.sub doesn't\n      already accept them.\n\n  iii) Document the names in gmp.texi.\n\nAliases (optional):\n\n  i) Any aliases can be added to the config.sub wrapper, unless\n     configfsf.sub already does the right thing with them.\n\n  ii) Leave configure.in and everywhere else using only the canonical\n      names.  Aliases shouldn't appear anywhere except config.sub.\n\n  iii) Document in gmp.texi, if desired.  Usually this isn't a good\n       idea, better encourage users to know just the canonical\n       names.\n\nConfigure:\n\n  i) Add patterns to configure.in for the new CPU names.  Include the\n     following (see configure.in for the variables to set up),\n\n     a) ABI choices (if any).\n     b) Compiler choices.\n     c) mpn path for CPU specific code.\n     d) Good default CFLAGS for each likely compiler.\n     d) Any special tests necessary on the compiler or assembler\n        capabilities.\n\n  ii) M4 macros to be shared by asm files in a CPU family are by\n      convention in a foo-defs.m4 like mpn/x86/x86-defs.m4.  They're\n      likely to use settings from config.m4 generated by configure.\n\nFat binaries:\n\n  i) In configure.in, add CPU specific directory(s) to fat_path.\n\n  ii) In mpn/<cpu>/fat.c, identify the CPU at runtime and use suitable\n      CPUVEC_SETUP_subdir macros to select the function pointers for it.\n\n  iii) For the x86s, add to the \"$tmp_prefix\" setups in configure.in\n       which abbreviates subdirectory names to fit an 8.3 filesystem.\n       (No need to restrict to 8.3, just ensure uniqueness when\n       truncated.)\n\n\n* The configure system\n\n** Installing tools\n\nThe current versions of automake, autoconf and libtool in use can be\nchecked in the ChangeLog.  Look for \"Update to ...\".  Patches may have\nbeen applied, look for \"Regenerate ...\".\n\nThe MPIR build system is in places somewhat dependent on the internals\nof the build tools.  Obviously that's avoided as much as possible, but\nwhere it can't it creates a problem when upgrading or attempting to\nuse different tools versions.\n\n** Updating mpir\n\nThe following files need to be updated when going to a new version of\nthe build tools.  Unfortunately the tools generally don't identify\nwhen an out-of-date version is present.\n\naclocal.m4 is updated by running \"aclocal\".  (Only needed for a new\nautomake or libtool.)\n\nINSTALL.autoconf can be copied from INSTALL in autoconf.\n\nltmain.sh comes from libtool.  Remove it and run \"libtoolize --copy\",\nor just copy the file by hand.\n\nansi2knr.c, ansi2knr.1, install-sh and doc/mdate-sh come from automake\nand can be updated by copying or by removing and running \"automake\n--add-missing --copy\".\n\ntexinfo.tex can be updated from ftp.gnu.org.  Check it still works\nwith \"make gmp.dvi\", \"make gmp.ps\" and \"make gmp.pdf\".\n\nconfigfsf.guess and configfsf.sub can be updated from ftp.gnu.org (or\nfrom the \"config\" cvs module at subversions.gnu.org).  The gmp\nconfig.guess and config.sub wrappers are supposed to make such an\nupdate fairly painless.\n\ndepcomp from automake is not needed because configure.in specifies\nautomake with \"no-dependencies\".\n\n** How it works\n\nDuring development:\n\n    Input files                       Tool       Output files\n    ---------------------------------------------------------\n                                 \n                                     aclocal\n    $prefix/share/aclocal*/*.m4 ----------------> aclocal.m4\n\n                   \n    configure.in \\                   autoconf\n    aclocal.m4   / -----------------------------> configure\n\n                  \n    */Makefile.am \\                  automake\n    configure.in  | ----------------------------> Makefile.in\n    aclocal.m4    /\n                    \n    configure.in \\                  autoheader\n    aclocal.m4   / -----------------------------> config.in\n\nAt build time:\n\n    Input files          Tool       Output files\n    --------------------------------------------\n\n    */Makefile.in  \\   configure    / */Makefile\n    config.in      | -------------> | config.h\n    gmp-h.in       |                | config.m4\n    mp-h.in        /                | mpir.h\n                                    | mp.h\n                                    \\ fat.h  (fat binary build only)\n\nWhen configured with --enable-maintainer-mode the Makefiles include\nrules to re-run the necessary tools if the input files are changed.\nThis can end up running a lot more things than are really necessary.\n\nIf a build tree is in too much of a mess for those rules to work\nproperly then a bootstrap can be done from the source directory with\n\n\taclocal\n\tautoconf\n\tautomake\n\tautoheader\n\nThe autom4te.cache directory is created by autoconf to save some work\nin subsequent automake or autoheader runs.  It's recreated\nautomatically if removed, it doesn't get distributed.\n\n** C++ configuration\n\nIt's intended that the contents of libmpir.la won't vary according to\nwhether --enable-cxx is selected.  This means that if C++ shared\nlibraries don't work properly then a shared+static with --disable-cxx\ncan be done for the C parts, then a static-only with --enable-cxx to\nget libmpirxx.\n\nlibmpirxx.la uses some internals from libmpir.la, in order to share code\nbetween C and C++.  It's intended that libmpirxx can only be expected\nto work with libmpir from the same version of MPIR.  If some of the\nshared internals change their interface, then it's proposed to rename\nthem, for instance __gmp_doprint2 or the like, so as to provoke link\nerrors rather than mysterious failures from a mismatch.\n\n* Development setups\n\n** General\n\n--disable-shared will make builds go much faster, though of course\nshared or shared+static should be tested too.\n\n--enable-mpbsd grabs various bits of mpz, which might need to be\nadjusted if things in those routines are changed.  Building mpbsd all\nthe time doesn't cost much.\n\n--prefix to a dummy directory followed by \"make install\" will show\nwhat's installed.\n\n\"make check\" acts on the libmpir just built, and will ignore any other\n/usr/lib/libmpir, or at least it should do.  Libtool does various hairy\nthings to ensure it hits the just-built library.\n\n** Long long limb testing\n\nOn systems where gcc supports long long, but a limb is normally just a\nlong, the following can be used to force long long for testing\npurposes.  It will probably run quite slowly.\n\n\t./configure --host=none ABI=longlong\n\n** Function argument conversions\n\nWhen using gcc, configuring with something like\n\n\t./configure CFLAGS=\"-g -Wall -Wconversion -Wno-sign-compare\"\n\ncan show where function parameters are being converted due to having\nfunction prototypes available, which won't happen in a K&R compiler.\nDoing this in combination with the long long limb setups above is\ngood.\n\nConversions between int and long aren't warned about by gcc when\nthey're the same size, which is unfortunate because casts should be\nused in such cases, for the benefit of K&R compilers with int!=long\nand where the difference matters in function calls.\n\n** K&R support\n\nFunction definitions must be in the GNU stylized form to work.  See\nthe ansi2knr.1 man page (included in the MPIR sources).\n\n__GMP_PROTO is used for function prototypes, other ANSI / K&R\ndifferences are conditionalized in various places.\n\nProper testing of the K&R support requires a compiler which gives an\nerror for ANSI-isms.  Configuring with --host=none is a good idea, to\ntest all the generic C code.\n\nWhen using an ANSI compiler, the ansi2knr setups can be partially\ntested with\n\n\t./configure am_cv_prog_cc_stdc=no ac_cv_prog_cc_stdc=no\n\nThis will test the use of $U and the like in the makefiles, but not\nmuch else.\n\nForcing the cache variables can be used with a compiler like HP C\nwhich is K&R by default but to which configure normally adds ANSI mode\nflags.  This then should be a good full K&R test.\n\n* Other Notes\n\n** Compatibility\n\ncompat.c is the home of functions retained for binary compatibility,\n    but now done by other means (like a macro).\n\nstruct __mpz_struct etc - this must be retained for C++ compatibility.\n    C++ applications defining functions taking mpz_t etc parameters\n    will get this in the mangled name because C++ \"sees though\" the\n    typedef mpz_t to the underlying struct.\n\n    Incidentally, this probably means for C++ that our mp.h is not\n    compatible with an original BSD mp.h, since we use struct\n    __mpz_struct for MINT in ours.  Maybe we could change to whatever\n    the original did, but it seems unlikely anyone would be using C++\n    with mp.h.\n\n__gmpn - note that glibc defines some __mpn symbols, old versions of\n    some mpn routines, which it uses for floating point printfs.\n\n\n\n\nLocal variables:\nmode: outline\nfill-column: 70\nEnd:\n/* eof doc/devel/configuration */\n"
  },
  {
    "path": "doc/devel/gmp-h.txt",
    "content": "Copyright 2008, William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\nSome compilers with hacks:\n====================================\n- GNUC\n- DEC C\n- SCO OpenUNIX 8 cc\n- __cplusplus\n\nUseful/undocumented macros in mpir.h:\n====================================\n__GMPN_ABS(x) - absolute value\n__GMPN_MAX(h, i) - max of h and i\n\n__GMPN_UINT_MAX - (int) ~0\n__GMPN_ULONG_MAX - (unsigned long) ~0\n__GMPN_USHRT_MAX - unsigned short) ~0\n\n__GMPN_LIKELY(cond) - branch likely hint\n__GMPN_UNLIKELY(cond) - branch unlikely hint\n  - e.g. if UNLIKELY(c >= 0L)....\n\nNote: the __GMPN_ macros are redefined with nicer names in gmp-impl.h\n\nmp_bits_per_limb - number of bits per limb\ngmp_version - GMP version number i.j or i.j.k as a null terminated string\nmpir_version - MPIR version number i.j or i.j.k as a null terminated string\ngmp_errno - integer corresponding to a GMP error condition:\n  0 none\n  1 unsupported argument\n  2 division by zero\n  4 sqrt of negative\n  8 invalid argument\n\nUndocumented mpn functions prototyped in mpir.h:\n===============================================\nmp_limb_t mpn_preinv_mod_1(mp_srcptr sp, mp_size_t sn, mp_limb_t x, mp_limb_t xinv) - \n  compute {sp, sn} mod x given a precomputed inverse xinv of x\n\n\n"
  },
  {
    "path": "doc/devel/gmp-impl-h.txt",
    "content": "Copyright 2008, William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\nMACROS in gmp_impl.h\n===========================================\nLIKELY(cond) - branch hint (likely), e.g. if LIKELY(x > 0)....\nUNLIKELY(cond) - branch hint (unlikely)\n\nABS(x) - absolute value of x\nMIN(x, y) - minimum of x and y (operands may be signed or unsigned)\nMAX(x, y) - maximum of x and y\nnumberof(x) - assumes x is a pointer to an array, returns the number of elements\n  of the array that fit into a type the same size as a pointer (??)\n\nSIZ(z) - _mp_size field of z\nABSIZ(z) - abs val of SIZ(z)\nPTR(z) - _mp_d field of z\nLIMBS(z) - same as PTR(z)\nEXP(z) -  _mp_exp field of x\nPREC(x) - _mp_prec field of x\nALLOC(x) - _mp_alloc field of x\n\nMP_LIMB_T_SWAP(x, y) - swap two mp_limb_t's\nMP_SIZE_T_SWAP(x, y) - swap two mp_size_t's\nMP_PTR_SWAP(x, y) - swap two mp_ptr's\nMP_SRCPTR_SWAP(x, y) - swap two mp_srcptr's\nMPN_PTR_SWAP(xp, xs, yp, ys) - swap {xp, xs} and {yp, ys}\nMPN_SRCPTR_SWAP(xp, xs, yp, ys) - swap {xp, xs} and {yp, ys}\nMPZ_PTR_SWAP(x, y) - swap two mpz_ptr's\nMPZ_SRCPTR_SWAP(x, y) - swap two mpz_srcptr's\n\nMPN_CMP(result, xp, yp, size) - compare {xp, size} with {yp, size} and set \n  result to -1, 0, 1, size may be zero\nMPN_COPY_INCR(dst, src, size) - copy size limbs from src to dst incrementing\n  memory locations throughout copy (i.e. src must come after dst if they overlap)\n  size == 0 is allowed\nMPN_COPY_DECR(dst, src, size) - as for MPN_COPY_INCR except ptrs are decremented\n  (i.e. src must come before dst if they overlap)\nMPN_COPY(dst, src, size) - copy size limbs from src to dst which must be same\n  or separate\nMPN_REVERSE(dst, src, size) - set {dst, size} to the limbs of {src, size} in \n  reverse order\nMPN_ZERO(dst, n) - set n limbs at dst to zero\nmpn_com_n(dst, src, size) - set {dst, size} to the complement of {src, size}\nmpn_and_n(dst, src1, src2, n) - set {dst, n} to {src1, n} & {src2, n}\nmpn_andn_n(dst, src1, src2, n) - set {dst, n} to {src1, n} & ~{src2, n}\nmpn_nand_n(dst, src1, src2, n) - set {dst, n} to {src1, n} nand {src2, n}\nmpn_ior_n(dst, src1, src2, n) - set {dst, n} to {src1, n} | {src2, n}\nmpn_iorn_n(dst, src1, src2, n) - set {dst, n} to {src1, n} | ~{src2, n}\nmpn_nior_n(dst, src1, src2, n) - set {dst, n} to {src1, n} nor {src2, n}\nmpn_xor_n(dst, src1, src2, n) - set {dst, n} to {src1, n} xor {src2, n}\nmpn_xnor_n(dst, src1, src2, n) - set {dst, n} to {src1, n} xnor {src2, n}\nMPN_NORMALIZE(dst, nlimbs) - normalise the mpn at dst, i.e. set nlimbs to number\n  of non-zero limbs\nMPN_NORMALIZE_NOT_ZERO(dst, nlimbs) - normalise the mpn at dst, i.e. set \n  nlimbs to number of non-zero limbs, assuming it isn't zero\nMPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) - strip least sig zero limbs from \n  {ptr, size} by incr ptr and decr size, low should be ptr[0] and will be set\n  to new ptr[0] upon return, function assumes {ptr, size} is not zero\nMPN_OVERLAP_P(xp, xsize, yp, ysize) - nonzero if the given mpns overlap\nMPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) - nonzero if ok for incr algorithm\nMPN_SAME_OR_INCR_P(dst, src, size) - as for INCR2 variant, but same sizes\nMPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) - nonzero if ok for decr algorithm\nMPN_SAME_OR_DECR_P(dst, src, size) - as for DECR2 variant, but same sizes\nMPN_LOGOPS_N_INLINE(dst, src1, src2, n, operation) - do n operations involving\n  any of src1, src2, dst\nMPN_INCR_U(ptr, size, incr) - {ptr, size} += n expect no carry\nMPN_DECR_U(ptr, size, incr) - {ptr, size} -= n expect no carry\nmpn_incr_u(ptr, incr) - {ptr, size} += n carry possible\nmpn_decr_u(ptr, incr) - {ptr, size} -= n carry possible\nMPN_SIZEINBASE(result, ptr, size, base) - set result to number of numerals \n  {ptr, size} has, in the given base. For power of 2 bases it is exact, otherwise\n  it may sometimes be one too big - this is done for efficiency so not all limbs\n  have to be checked\nMPN_SIZEINBASE_16(result, ptr, size, base) - special optimisation for base 16\nMPN_SET_UI(zp, zn, u) - set {zp, zn} to the given ui, where zn must be big enough\n  to accomodate a ulong  \nMPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) - set {qp, xsize} to\n  {ap, size} divided by d with dinv an inverse of d and shift the number of bits\n  d has to be shifted, only uses the preinv if this will be better on this arch\nMPN_MOD_OR_PREINV_MOD_1(ap,size,d,dinv) - return {ap, size} mod d with d an \n  inverse of d - using preinv only if better on this arch\nMPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor) - divide {src, size} by      divisor assuming no remainder if this will be faster\nMPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor) - mpn_modexact_1_odd or \n  mpn_mod_1 depending on which is faster on this arch\nMPN_BSWAP(dst, src, size) - byte swap limbs from {src,size} and store \n  at {dst,size}\nMPN_BSWAP_REVERSE(dst, src, size) - byte swap limbs from {dst,size} and \n  store in reverse order at {src,size}\n\nADDC_LIMB(cout, w, x, y) - w = x + y with cout set to 1 for carry from add\nADDC_LIMB(cout, w, x, y) - w = x - y with cout set to 1 for borrow from sub\n\nBITS_TO_LIMBS(n) - number of limbs required to store the given number of bits\nLIMB_HIGHBIT_TO_MASK(n) - platform independently set a mask to 0 or 0xFF..FF\n  depending on whether high bit of n is 0 or 1\nLOW_ZEROS_MASK(n) - bit mask of all the least significant zero bits of n\nULONG_PARITY(p, n) - sets p to 1 if the number of 1 bits in n is odd\nNEG_MOD(r, a, d) - r = -a mod d (a >= d is allowed) may return r > d (all limbs)\nBSWAP_LIMB(dst, src) - reverse bytes in a limb\nBSWAP_LIMB_FETCH(dst, src) - set dst to the reverse of the bytes in the limb \n  pointed to by src\nBSWAP_LIMB_STORE(dst, src) - set limb pointed to by dst to the reverse of the       bytes in src\npopc_limb(result, input) - population count of a limb (fast!!)\nPOW2_P(n) - return 1 if n is a power of 2 (or 0)\nCNST_LIMB(C) - platform independent way of writing e.g. 1L if C was 1 say\n\nMEM_OVERLAP_P(xp, xsize, yp, ysize) - nonzero if given arrays of bytes overlap\n\ninvert_limb(invxl, xl) - set invxl to the \"inverse\" of the limb xl for functions\n  that take a precomputed \"inverse\" limb\n  technically this isn't an inverse, but it sets invxl to the largest limb\n  not larger than (2^(2*BITS_PER_MP_LIMB))/xl - (2^BITS_PER_MP_LIMB) and all\n  1's if this would give an overflow\nudiv_qrnnd_preinv1(q, r, nh, nl, d, di) - nh, nl divided by d with di an \"inverse\"\n  of d. The most significant bit of d has to be set\nudiv_qrnnd_preinv2(q, r, nh, nl, d, di) - as for preinv1 but branch free\nudiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) - as for preinv2 but for\n  any d. dnorm is d shifted left so its msb is set, lgup is ceil_log2(d)\n\nmodlimb_invert(inv, n) - true limb invert, i.e. inv*n == 1 mod 2^GMP_NUMB_BITS\nMODLIMB_INVERSE_3 - special case of modlimb_invert for n == 3\n\nMPZ_TMP_INIT(X, NLIMBS) - Allocates a temporary mpz_t with space for NLIMBS, space\n  will automatically be cleared out upon calling function's return\nMPZ_REALLOC(z, n) - reallocate mpz_t to n limbs if it doesn't have enough\nMPZ_EQUAL_1_P(z) - returns 1 if z is 1, otherwise 0\nMPZ_FAKE_UI(z, zp, u) - creates a fake mpz_t z from an ui. zp must have space for\n  an unsigned long\n\nABOVE_THRESHOLD(size, thresh) - decide whether to use algorithm A or B depending     whether size >= thresh. thresh == MP_SIZE_T_MAX means only ever use A, \n  thresh == 0 means only ever use B\nBELOW_THRESHOLD(size, thresh) - !ABOVE_THRESHOLD\n\nMacro constants defined in gmp_impl.h\n===========================================\n\nBYTES_PER_MP_LIMB - bytes per limb\nBITS_PER_MP_LIMB - bits per limb\nBITS_PER_ULONG - bits in an unsigned long\nLIMBS_PER_ULONG - number of limbs per unsigned long\n\nULONG_MAX - unsigned long ~0\nUINT_MAX - unsigned in ~0\nUSHRT_MAX - unsigned short ~0\nMP_LIMB_T_MAX - mp_limb_t ~0\n\nULONG_HIGHBIT - unsigned long high bit set\nUINT_HIGHBIT - unsigned int high bit set\nUSHRT_HIGHBIT - unsigned short high bit set\nGMP_LIMB_HIGHBIT - mp_limb_t high bit set\n\nLONG_MIN - smallest negative long\nLONG_MAX - largest positive long\nLONG_HIGHBIT - LONG_MIN\nINT_MIN - smallest negative int\nINT_MAX - largest positive int\nINT_HIGHBIT - INT_MIN\nSHRT_MIN - smallest negative short\nSHRT_MAX - largest positive short\nSHRT_HIGHBIT - SHRT_MIN\nMP_SIZE_T_MIN - smallest negative mp_size_t\nMP_SIZE_T_MAX - largest positive mp_size_t\nMP_EXP_T_MIN - smallest negative mp_exp_t\nMP_EXP_T_MAX - largest positive mp_exp_t\nGMP_NUMB_HIGHBIT - highest bit of a limb (not including nail bits)\n\nGMP_NUMB_CEIL_MAX_DIV3 - ceil(GMP_NUMB_MAX/3)\nGMP_NUMB_CEIL_2MAX_DIV3 - ceil(2*GMP_NUMB_MAX/3)\n\nPP - product of odd primes that will fit in a limb 3x5x...\nPP_FIRST_OMITTED - first odd prime omitted\nPP_INVERTED - inverse of PP\n\nException macros\n===========================================\n\nGMP_ERROR(code) - raise exception with given code\nDIVIDE_BY_ZERO - raise divide by zero exception\nSQRT_OF_NEGATIVE - raise square root of negative exception\n\nSome types defined in gmp_impl.h\n===========================================\n\ngmp_uint_least32_t - unsigned integer type with at least 32 bits\n\nMacros used in prototypes\n===========================================\n\nATTRIBUTE_CONST - function examines its arguments and returns a value but no \n   memory is read or written to and function has no side effects\n\nATTRIBUTE_NORETURN - signifies a function that won't ever return (supresses\n   \"code unreachable\" compile time errors)\n\nATTRIBUTE_MALLOC - function returns a pointer that can't alias anything, \n   just as malloc does\n\nREGPARM_2_1(a,b,x) - reorder parameters to x,a,b so that x can be put in a reg\nREGPARM_3_1(a,b,c,x) - reorder parameters to x,a,b,c so that x can be put in a reg\nREGPARM_ATTR(n) - make parameter a register parameter\n\nStack based memory manager\n==========================\nUsage: TMP_DECL;\n       TMP_MARK;\n       ptr = TMP_ALLOC(bytes);\n       TMP_FREE;\n\nThis pushes handling of the stack allocation onto the calling function, which is\nwhat Pari does. It is faster than malloc and other stack based methods (!) A \nreentrant version is available by doing #define WANT_TMP_REENTRANT 1\n\nVariants: TMP_SDECL, TMP_SMARK, TMP_SALLOC, TMP_SFREE - for small allocations\n  (uses alloca if available)\n          TMP_SDECL, TMP_SMARK, TMP_SALLOC, TMP_SFREE - for large allocations\n  \nNote: TEMP_ALLOC calls TMP_SALLOC if bytes < 65536, otherwise TMP_BALLOC\n\nTMP_B/S/ALLOC_TYPE(n, type) - alloc space for n variables of the given type\nTMP_B/S/ALLOC_LIMBS(n) - alloc space for n limbs\n\nTMP_B/S/ALLOC_MP_PTRS(n) - alloc space for n mp_ptr's\n\nTMP_ALLOC_LIMBS_2(xp, xsize, yp, ysize) - makes two allocations at once (faster)\n\nMemory manager\n==========================\n\nalloca(size) - allocate size bytes of space in the stack frame of the calling\n  function. Space is automatically freed when the calling function returns\n\n__gmp_default_allocate(bytes) - alloc bytes using GMP default memory manager\n__gmp_default_reallocate(ptr,bytes) - realloc bytes using default memory manager\n__gmp_default_free(ptr, size) - free the size bytes allocated at ptr\n\n__GMP_ALLOCATE_FUNC_TYPE(n, type) - allocate space for n vars of given type using\n  user defined memory management function\n__GMP_ALLOCATE_FUNC_LIMBS(n) - allocate n limbs, user defined mem man\n\n__GMP_REALLOCATE_FUNC_TYPE(p, old, new, type) - reallocate from old to new size\n  user defined memory management function\n__GMP_REALLOCATE_FUNC_LIMBS(p, old, new) - realloc from old to new limbs, \n  user defined mem man\n\n__GMP_FREE_FUNC_TYPE(n, type) - free space for n vars of given type using\n  user defined memory management function\n__GMP_FREE_FUNC_LIMBS(n) - free n limbs, user defined mem man\n\n__GMP_REALLOCATE_FUNC_MAYBE(p, old, new) - reallocate, oldsize can equal newsize,\n  user defined memory management function\n\n__GMP_REALLOCATE_FUNC_MAYBE_TYPE(p, old, new, type) - reallocate type, \n  oldsize can equal newsize, user defined memory management function\n\nAssembly macros\n====================================\n\nASM_L(name) - local label for a gcc asm block\n\nDebugging\n====================================\n\nMPZ_CHECK_FORMAT(z) - tries to check an mpz_t to see if it is broken\nMPZ_PROVOKE_REALLOC(z) - cause z to be realloc'd just because ou feel like it\n\nMisc\n====================================\n\n- Macros to do with the random generators\n- Lot's of undocumented mpn functions presumably defined in mpn/generic\n- macros for temp space required by kara and toom3 mul and sqr \n- space for fibonnacci functions, fn for n-th fib num from table and biggest n\n  such that L_n fits in a long\n- default values for mullow, kara, toom, fft params and thresholds, div divide and\n  conquer thresholds\n- piles of different assert macros\n- defines for longlong.h\n- stuff for handling doubles\n- lots of macros for v. fast computation of jacobi symbols\n- stuff for mpf's\n- stuff for formatted printing, reading\n"
  },
  {
    "path": "doc/devel/mpn_functions.txt",
    "content": "New assembly mpn functions since MPIR 0.9 = GMP 4.2.1 + Gaudry + Martin patches\n================================================================\n/* \n   {rp, n + 1} = {rp, n} + {up, n}*(vl1, vl2)  and return carry limb\n*/\nmp_limb_t mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl1, mp_limb_t vl2)\n\n/* {qp, n} = {xp, n}/3 if ci = 0\n   {xp, n} = {qp, n}*3 - return*B^n for 0 <= return < 3\n   ci can be used to chain these together\n*/ \nmp_limb_t mpn_divexact_by3c(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t ci)\n\n/* \n   {qp, n} = {xp, n}/(B-1) \n   {xp, n} = {qp, n}*(B-1) - return*B^n for 0 <= return < B - 1\n*/ \nmp_limb_t\nmpn_divexact_byff (mp_ptr qp, mp_srcptr xp, mp_size_t n)\n\n/* \n   {qp, n} = {xp, n}/((B-1)/f) \n   Caller must set Bm1of to (B-1)/f\n   {xp, n} = {qp, n}*((B-1)/f) - return*B^n for 0 <= return < (B - 1)/f\n*/ \nmp_limb_t\t mpn_divexact_byBm1of(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t f, mp_limb_t Bm1of)\n\n/*\n   {qp,n} = {xp,n}/d\n   {xp,n} = {qp,n}*d + return  0 <= return < d\n*/\nmp_limb_t mpn_divrem_euclidean_qr_1(mp_ptr qp, mp_srcptr xp,mp_size_t n,mp_limb_t d)\n\n/*\n  {rp, n} = {up, n}*vl and return carry limb\n*/\nmp_limb_t mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)\n\n/*\n  {rp, n+1} = {up, n}*(vl1, vl2) and return carry limb\n*/\nmp_limb_t mpn_mul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl1, mp_limb_t vl2)\n\n/*\n  {rp, n} = {up, n} + {vp, n} + {wp, n} and return carry limb\n*/\nmp_limb_t mpn_addadd_n(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_srcptr wp, mp_size_t n)\n\n/*\n  {rp, n} = {up, n} + {vp, n} - {wp, n} and return carry/borrow limb (1 for carry, -1 for borrow)\n*/\nmp_limb_t mpn_addsub_n(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_srcptr wp, mp_size_t n)\n\n/*\n  {rp, n} = (twos) complement of {up, n}\n*/\nvoid mpn_com_n (mp_ptr rp, mp_srcptr up, mp_size_t n)\n\n/*\n  {rp, n} = {up, n} + 2*{vp, n} and return carry\n*/\nmp_limb_t mpn_addlsh1(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n\n/*\n  {rp, n} = 2^shift*{up, n} with 0 < shift < BITS_PER_LIMB and return carry\n*/\nmp_limb_t mpn_lshift(mp_ptr rp, mp_srcptr up, mp_size_t n, mp_size_t shift)\n\n/*\n  {rp, n} = 2*{up, n} and return carry\n*/\nmp_limb_t mpn_lshift1(mp_ptr rp, mp_srcptr up, mp_size_t n)\n\n/* Set cp[] <- tp[]/B^n mod mp[].  Clobber tp[].\n   mp[] is n limbs; tp[] is 2n limbs.  where Nprim*mp[0] == 1 mod B\n*/\nvoid mpn_redc_1 (mp_ptr cp, mp_ptr tp, mp_srcptr mp, mp_size_t n, mp_limb_t Nprim)\n\n/*\n  {rp, n} = {up, n}/2^shift with 0 < shift < BITS_PER_LIMB and return msl\n*/\nmp_limb_t mpn_rshift(mp_ptr rp, mp_srcptr up, mp_size_t n, mp_size_t shift)\n\n/*\n  {rp, n} = {up, n}/2 and return msl\n*/\nmp_limb_t mpn_rshift1(mp_ptr rp, mp_srcptr up, mp_size_t n)\n\n/*\n  {rp, n} = {up, n} - ({vp, n} + {wp, n}) and return borrow\n*/\nmp_limb_t mpn_subadd_n(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_srcptr wp, mp_size_t n)\n\n/*\n  {rp, n} = {up, n} - 2*{vp, n} and return borrow\n*/\nmp_limb_t mpn_sublsh1(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n\n/*\n  {rp1, n} = {up, n} + {vp, n} with carry = c\n  {rp2, n} = {up, n} - {vp, n} with borrow = b\n  return 2*c+b\n  \n*/\nmp_limb_t mpn_sumdiff_n(mp_ptr rp1, mp_ptr rp2, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n\n/*\n  {rp, n} = {up, n} xxx {vp, n} \n  where (xxx) is one of:\n  and = u and v, andn = u and (not v), nand = not (u and v), ior = u inclusive or v, iorn = u inclusive or (not b), \n  nior = not (u inclusive or v), xnor = u exclusive or (not v), xor = u exclusive or v  \n*/\nvoid mpn_xxx_n(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n\nNew C level mpn functions since MPIR 0.9\n================================\n\n/* \n   Multiply {up, n} by {vp, n} and store the result at {rp, 2n} using Toom 4 algorithm \n*/\nvoid\n mpn_toom4_mul_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n\n"
  },
  {
    "path": "doc/devel/projects.html",
    "content": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n<html>\n<head>\n  <title>GMP Development Projects</title>\n  <link rel=\"shortcut icon\" href=\"favicon.ico\">\n  <link rel=\"stylesheet\" href=\"gmp.css\">\n  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n</head>\n\n<center>\n  <h1>\n    GMP Development Projects\n  </h1>\n</center>\n\n<font size=-1>\nCopyright 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation,\nInc. <br><br>\nCopyright 2008 William Hart. <br><br>\nThis file is part of the MPIR Library. <br><br>\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version. <br><br>\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details. <br><br>\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n</font>\n\n<hr>\n<!-- NB. timestamp updated automatically by emacs -->\n  This file current as of 21 Apr 2006. Please send comments \n  to http://groups.google.co.uk/group/mpir-devel/.\n\n<p> This file lists projects suitable for volunteers.  Please see the\n    <a href=\"tasks.html\">tasks file</a> for smaller tasks.\n\n<p> If you want to work on any of the projects below, please let us know at\n    http://groups.google.co.uk/group/mpir-devel/.  If you want to help with a project\n    that already somebody else is working on, we will help you will get in touch.  \n    (There are no email addresses of\n    volunteers below, due to spamming problems.)\n\n<ul>\n<li> <strong>Faster multiplication</strong>\n\n  <p> The current multiplication code uses Karatsuba, 3-way Toom, and Fermat\n      FFT.  Several new developments are desirable:\n\n  <ol>\n\n    <li> Handle multiplication of operands with different digit count better\n\t than today.  We now split the operands in a very inefficient way, see\n\t mpn/generic/mul.c.  The best operands splitting strategy depends on\n\t the underlying multiplication algorithm to be used.\n\n    <li> Implement an FFT variant computing the coefficients mod m different\n\t limb size primes of the form l*2^k+1. i.e., compute m separate FFTs.\n\t The wanted coefficients will at the end be found by lifting with CRT\n\t (Chinese Remainder Theorem).  If we let m = 3, i.e., use 3 primes, we\n\t can split the operands into coefficients at limb boundaries, and if\n\t our machine uses b-bit limbs, we can multiply numbers with close to\n\t 2^b limbs without coefficient overflow.  For smaller multiplication,\n\t we might perhaps let m = 1, and instead of splitting our operands at\n\t limb boundaries, split them in much smaller pieces.  We might also use\n\t 4 or more primes, and split operands into bigger than b-bit chunks.\n\t By using more primes, the gain in shorter transform length, but lose\n\t in having to do more FFTs, but that is a slight total save.  We then\n\t lose in more expensive CRT. <br><br>\n\n         An nearly complete implementation has been done by Tommy Färnqvist.\n\n    <li> Perhaps consider N-way Toom, N > 3.  See Knuth's Seminumerical\n\t Algorithms for details on the method.  Code implementing it exists.\n\t This is asymptotically inferior to FFTs, but is finer grained.  A\n\t Toom-4 might fit in between Toom-3 and the FFTs (or it might not).\n\n    <li> Add support for partial products, either a given number of low limbs\n         or high limbs of the result.  A high partial product can be used by\n         <code>mpf_mul</code> and by Newton approximations, a low half partial\n         product might be of use in a future sub-quadratic REDC.  On small\n         sizes a partial product will be faster simply through fewer\n         cross-products, similar to the way squaring is faster.  But work by\n         Thom Mulders shows that for Karatsuba and higher order algorithms the\n         advantage is progressively lost, so for large sizes partial products\n         turn out to be no faster.\n\n  </ol>\n\n  <p> Another possibility would be an optimized cube.  In the basecase that\n      should definitely be able to save cross-products in a similar fashion to\n      squaring, but some investigation might be needed for how best to adapt\n      the higher-order algorithms.  Not sure whether cubing or further small\n      powers have any particularly important uses though.\n\n\n<li> <strong>Assembly routines</strong>\n\n  <p> Write new and improve existing assembly routines.  The tests/devel\n      programs and the tune/speed.c and tune/many.pl programs are useful for\n      testing and timing the routines you write.  See the README files in those\n      directories for more information.\n\n  <p> Please make sure your new routines are fast for these three situations:\n      <ol>\n        <li> Operands that fit into the cache.\n        <li> Small operands of less than, say, 10 limbs.\n        <li> Huge operands that does not fit into the cache.\n      </ol>\n\n  <p> The most important routines are mpn_addmul_1, mpn_mul_basecase and\n      mpn_sqr_basecase.  The latter two don't exist for all machines, while\n      mpn_addmul_1 exists for almost all machines.\n\n  <p> Standard techniques for these routines are unrolling, software\n      pipelining, and specialization for common operand values.  For machines\n      with poor integer multiplication, it is often possible to improve the\n      performance using floating-point operations, or SIMD operations such as\n      MMX or Sun's VIS.\n\n  <p> Using floating-point operations is interesting but somewhat tricky.\n      Since IEEE double has 53 bit of mantissa, one has to split the operands\n      in small prices, so that no result is greater than 2^53.  For 32-bit\n      computers, splitting one operand into 16-bit pieces works.  For 64-bit\n      machines, one operand can be split into 21-bit pieces and the other into\n      32-bit pieces.  (A 64-bit operand can be split into just three 21-bit\n      pieces if one allows the split operands to be negative!)\n\n\n<li> <strong>Faster GCD</strong>\n\n  <p> Work on Schönhage GCD and GCDEXT for large numbers is in progress.\n      Contact Niels Möller if you want to help.\n\n\n<li> <strong>Math functions for the mpf layer</strong>\n\n  <p> Implement the functions of math.h for the GMP mpf layer!  Check the book\n      \"Pi and the AGM\" by Borwein and Borwein for ideas how to do this.  These\n      functions are desirable: acos, acosh, asin, asinh, atan, atanh, atan2,\n      cos, cosh, exp, log, log10, pow, sin, sinh, tan, tanh.\n\n\n<li> <strong>Faster sqrt</strong>\n\n  <p> The current code uses divisions, which are reasonably fast, but it'd be\n      possible to use only multiplications by computing 1/sqrt(A) using this\n      formula:\n      <pre>\n                                    2\n                   x   = x  (3 &minus; A x )/2\n                    i+1   i         i  </pre>\n      The square root can then be computed like this:\n      <pre>\n                     sqrt(A) = A x\n                                  n  </pre>\n      <p> That final multiply might be the full size of the input (though it might\n      only need the high half of that), so there may or may not be any speedup\n      overall.\n\n\n<li> <strong>Nth root</strong>\n\n  <p> Improve mpn_rootrem.  The current code is really naive, using full\n      precision from the first iteration.  Also, calling mpn_pow_1 isn't very\n      clever, as only 1/n of the result bits will be used; truncation after\n      each multiplication would be better.  Avoiding division might also be\n      possible.\n\n      Work mostly done, see\n      <a href=\"http://gmplib.org/devel/</a>.\n\n\n<li> <strong>Quotient-Only Division</strong>\n\n  <p> Some work can be saved when only the quotient is required in a division,\n      basically the necessary correction -0, -1 or -2 to the estimated quotient\n      can almost always be determined from only a few limbs of multiply and\n      subtract, rather than forming a complete remainder.  The greatest savings\n      are when the quotient is small compared to the dividend and divisor.\n\n  <p> Some code along these lines can be found in the current\n      <code>mpn_tdiv_qr</code>, though perhaps calculating bigger chunks of\n      remainder than might be strictly necessary.  That function in its current\n      form actually then always goes on to calculate a full remainder.\n      Burnikel and Zeigler describe a similar approach for the divide and\n      conquer case.\n\n\n<li> <strong>Sub-Quadratic REDC and Exact Division</strong>\n\n  <p> See also\n      <a href=\"http://gmplib.org/devel/\">http://gmplib.org/devel/</a>\n      for some new code for divexact.\n\n  <p> <code>mpn_bdivmod</code> and the <code>redc</code> in\n      <code>mpz_powm</code> should use some sort of divide and conquer\n      algorithm.  This would benefit <code>mpz_divexact</code>, and\n      <code>mpn_gcd</code> on large unequal size operands.  See \"Exact Division\n      with Karatsuba Complexity\" by Jebelean for a (brief) description.\n\n  <p> Failing that, some sort of <code>DIVEXACT_THRESHOLD</code> could be added\n      to control whether <code>mpz_divexact</code> uses\n      <code>mpn_bdivmod</code> or <code>mpn_tdiv_qr</code>, since the latter is\n      faster on large divisors.\n\n  <p> For the REDC, basically all that's needed is Montgomery's algorithm done\n      in multi-limb integers.  R is multiple limbs, and the inverse and\n      operands are multi-precision.\n\n  <p> For exact division the time to calculate a multi-limb inverse is not\n      amortized across many modular operations, but instead will probably\n      create a threshold below which the current style <code>mpn_bdivmod</code>\n      is best.  There's also Krandick and Jebelean, \"Bidirectional Exact\n      Integer Division\" to basically use a low to high exact division for the\n      low half quotient, and a quotient-only division for the high half.\n\n  <p> It will be noted that low-half and high-half multiplies, and a low-half\n      square, can be used.  These ought to each take as little as half the time\n      of a full multiply, or square, though work by Thom Mulders shows the\n      advantage is progressively lost as Karatsuba and higher algorithms are\n      applied.\n\n\n<li> <strong>Exceptions</strong>\n\n  <p> Some sort of scheme for exceptions handling would be desirable.\n      Presently the only thing documented is that divide by zero in GMP\n      functions provokes a deliberate machine divide by zero (on those systems\n      where such a thing exists at least).  The global <code>gmp_errno</code>\n      is not actually documented, except for the old <code>gmp_randinit</code>\n      function.  Being currently just a plain global means it's not\n      thread-safe.\n\n  <p> The basic choices for exceptions are returning an error code or having a\n      handler function to be called.  The disadvantage of error returns is they\n      have to be checked, leading to tedious and rarely executed code, and\n      strictly speaking such a scheme wouldn't be source or binary compatible.\n      The disadvantage of a handler function is that a <code>longjmp</code> or\n      similar recovery from it may be difficult.  A combination would be\n      possible, for instance by allowing the handler to return an error code.\n\n  <p> Divide-by-zero, sqrt-of-negative, and similar operand range errors can\n      normally be detected at the start of functions, so exception handling\n      would have a clean state.  What's worth considering though is that the\n      GMP function detecting the exception may have been called via some third\n      party library or self contained application module, and hence have\n      various bits of state to be cleaned up above it.  It'd be highly\n      desirable for an exceptions scheme to allow for such cleanups.\n\n  <p> The C++ destructor mechanism could help with cleanups both internally and\n      externally, but being a plain C library we don't want to depend on that.\n\n  <p> A C++ <code>throw</code> might be a good optional extra exceptions\n      mechanism, perhaps under a build option.  For GCC\n      <code>-fexceptions</code> will add the necessary frame information to\n      plain C code, or GMP could be compiled as C++.\n\n  <p> Out-of-memory exceptions are expected to be handled by the\n      <code>mp_set_memory_functions</code> routines, rather than being a\n      prospective part of divide-by-zero etc.  Some similar considerations\n      apply but what differs is that out-of-memory can arise deep within GMP\n      internals.  Even fundamental routines like <code>mpn_add_n</code> and\n      <code>mpn_addmul_1</code> can use temporary memory (for instance on Cray\n      vector systems).  Allowing for an error code return would require an\n      awful lot of checking internally.  Perhaps it'd still be worthwhile, but\n      it'd be a lot of changes and the extra code would probably be rather\n      rarely executed in normal usages.\n\n  <p> A <code>longjmp</code> recovery for out-of-memory will currently, in\n      general, lead to memory leaks and may leave GMP variables operated on in\n      inconsistent states.  Maybe it'd be possible to record recovery\n      information for use by the relevant allocate or reallocate function, but\n      that too would be a lot of changes.\n\n  <p> One scheme for out-of-memory would be to note that all GMP allocations go\n      through the <code>mp_set_memory_functions</code> routines.  So if the\n      application has an intended <code>setjmp</code> recovery point it can\n      record memory activity by GMP and abandon space allocated and variables\n      initialized after that point.  This might be as simple as directing the\n      allocation functions to a separate pool, but in general would have the\n      disadvantage of needing application-level bookkeeping on top of the\n      normal system <code>malloc</code>.  An advantage however is that it needs\n      nothing from GMP itself and on that basis doesn't burden applications not\n      needing recovery.  Note that there's probably some details to be worked\n      out here about reallocs of existing variables, and perhaps about copying\n      or swapping between \"permanent\" and \"temporary\" variables.\n\n  <p> Applications desiring a fine-grained error control, for instance a\n      language interpreter, would very possibly not be well served by a scheme\n      requiring <code>longjmp</code>.  Wrapping every GMP function call with a\n      <code>setjmp</code> would be very inconvenient.\n\n  <p> Another option would be to let <code>mpz_t</code> etc hold a sort of NaN,\n      a special value indicating an out-of-memory or other failure.  This would\n      be similar to NaNs in mpfr.  Unfortunately such a scheme could only be\n      used by programs prepared to handle such special values, since for\n      instance a program waiting for some condition to be satisfied could\n      become an infinite loop if it wasn't also watching for NaNs.  The work to\n      implement this would be significant too, lots of checking of inputs and\n      intermediate results.  And if <code>mpn</code> routines were to\n      participate in this (which they would have to internally) a lot of new\n      return values would need to be added, since of course there's no\n      <code>mpz_t</code> etc structure for them to indicate failure in.\n\n  <p> Stack overflow is another possible exception, but perhaps not one that\n      can be easily detected in general.  On i386 GNU/Linux for instance GCC\n      normally doesn't generate stack probes for an <code>alloca</code>, but\n      merely adjusts <code>%esp</code>.  A big enough <code>alloca</code> can\n      miss the stack redzone and hit arbitrary data.  GMP stack usage is\n      normally a function of operand size, which might be enough for some\n      applications to know they'll be safe.  Otherwise a fixed maximum usage\n      can probably be obtained by building with\n      <code>--enable-alloca=malloc-reentrant</code> (or\n      <code>notreentrant</code>).  Arranging the default to be\n      <code>alloca</code> only on blocks up to a certain size and\n      <code>malloc</code> thereafter might be a better approach and would have\n      the advantage of not having calculations limited by available stack.\n\n  <p> Actually recovering from stack overflow is of course another problem.  It\n      might be possible to catch a <code>SIGSEGV</code> in the stack redzone\n      and do something in a <code>sigaltstack</code>, on systems which have\n      that, but recovery might otherwise not be possible.  This is worth\n      bearing in mind because there's no point worrying about tight and careful\n      out-of-memory recovery if an out-of-stack is fatal.\n\n  <p> Operand overflow is another exception to be addressed.  It's easy for\n      instance to ask <code>mpz_pow_ui</code> for a result bigger than an\n      <code>mpz_t</code> can possibly represent.  Currently overflows in limb\n      or byte count calculations will go undetected.  Often they'll still end\n      up asking the memory functions for blocks bigger than available memory,\n      but that's by no means certain and results are unpredictable in general.\n      It'd be desirable to tighten up such size calculations.  Probably only\n      selected routines would need checks, if it's assumed say that no input\n      will be more than half of all memory and hence size additions like say\n      <code>mpz_mul</code> won't overflow.\n\n\n<li> <strong>Performance Tool</strong>\n\n  <p> It'd be nice to have some sort of tool for getting an overview of\n      performance.  Clearly a great many things could be done, but some primary\n      uses would be,\n\n      <ol>\n        <li> Checking speed variations between compilers.\n        <li> Checking relative performance between systems or CPUs.\n      </ol>\n\n  <p> A combination of measuring some fundamental routines and some\n      representative application routines might satisfy these.\n\n  <p> The tune/time.c routines would be the easiest way to get good accurate\n      measurements on lots of different systems.  The high level\n      <code>speed_measure</code> may or may not suit, but the basic\n      <code>speed_starttime</code> and <code>speed_endtime</code> would cover\n      lots of portability and accuracy questions.\n\n\n<li> <strong>Using <code>restrict</code></strong>\n\n  <p> There might be some value in judicious use of C99 style\n      <code>restrict</code> on various pointers, but this would need some\n      careful thought about what it implies for the various operand overlaps\n      permitted in GMP.\n\n  <p> Rumour has it some pre-C99 compilers had <code>restrict</code>, but\n      expressing tighter (or perhaps looser) requirements.  Might be worth\n      investigating that before using <code>restrict</code> unconditionally.\n\n  <p> Loops are presumably where the greatest benefit would be had, by allowing\n      the compiler to advance reads ahead of writes, perhaps as part of loop\n      unrolling.  However critical loops are generally coded in assembler, so\n      there might not be very much to gain.  And on Cray systems the explicit\n      use of <code>_Pragma</code> gives an equivalent effect.\n\n  <p> One thing to note is that Microsoft C headers (on ia64 at least) contain\n      <code>__declspec(restrict)</code>, so a <code>#define</code> of\n      <code>restrict</code> should be avoided.  It might be wisest to setup a\n      <code>gmp_restrict</code>.\n\n\n<li> <strong>Nx1 Division</strong>\n\n  <p> The limb-by-limb dependencies in the existing Nx1 division (and\n      remainder) code means that chips with multiple execution units or\n      pipelined multipliers are not fully utilized.\n\n  <p> One possibility is to follow the current preinv method but taking two\n      limbs at a time.  That means a 2x2-&gt;4 and a 2x1-&gt;2 multiply for\n      each two limbs processed, and because the 2x2 and 2x1 can each be done in\n      parallel the latency will be not much more than 2 multiplies for two\n      limbs, whereas the single limb method has a 2 multiply latency for just\n      one limb.  A version of <code>mpn_divrem_1</code> doing this has been\n      written in C, but not yet tested on likely chips.  Clearly this scheme\n      would extend to 3x3-&gt;9 and 3x1-&gt;3 etc, though with diminishing\n      returns.\n\n  <p> For <code>mpn_mod_1</code>, Peter L. Montgomery proposes the following\n      scheme.  For a limb R=2^<code>bits_per_mp_limb</code>, pre-calculate\n      values R mod N, R^2 mod N, R^3 mod N, R^4 mod N.  Then take dividend\n      limbs and multiply them by those values, thereby reducing them (moving\n      them down) by the corresponding factor.  The products can be added to\n      produce an intermediate remainder of 2 or 3 limbs to be similarly\n      included in the next step.  The point is that such multiplies can be done\n      in parallel, meaning as little as 1 multiply worth of latency for 4\n      limbs.  If the modulus N is less than R/4 (or is it R/5?) the summed\n      products will fit in 2 limbs, otherwise 3 will be required, but with the\n      high only being small.  Clearly this extends to as many factors of R as a\n      chip can efficiently apply.\n\n  <p> The logical conclusion for powers R^i is a whole array \"p[i] = R^i mod N\"\n      for i up to k, the size of the dividend.  This could then be applied at\n      multiplier throughput speed like an inner product.  If the powers took\n      roughly k divide steps to calculate then there'd be an advantage any time\n      the same N was used three or more times.  Suggested by Victor Shoup in\n      connection with chinese-remainder style decompositions, but perhaps with\n      other uses.\n\n  <p> <code>mpn_modexact_1_odd</code> calculates an x in the range 0&lt;=x&lt;d\n      satisfying a = q*d + x*b^n, where b=2^bits_per_limb.  The factor b^n\n      needed to get the true remainder r could be calculated by a powering\n      algorithm, allowing <code>mpn_modexact_1_odd</code> to be pressed into\n      service for an <code>mpn_mod_1</code>.  <code>modexact_1</code> is\n      simpler and on some chips can run noticeably faster than plain\n      <code>mod_1</code>, on Athlon for instance 11 cycles/limb instead of 17.\n      Such a difference could soon overcome the time to calculate b^n.  The\n      requirement for an odd divisor in <code>modexact</code> can be handled by\n      some shifting on-the-fly, or perhaps by an extra partial-limb step at the\n      end.\n\n\n<li> <strong>Factorial</strong>\n\n  <p> The removal of twos in the current code could be extended to factors of 3\n      or 5.  Taking this to its logical conclusion would be a complete\n      decomposition into powers of primes.  The power for a prime p is of\n      course floor(n/p)+floor(n/p^2)+...  Conrad Curry found this is quite fast\n      (using simultaneous powering as per Handbook of Applied Cryptography\n      algorithm 14.88).\n\n  <p> A difficulty with using all primes is that quite large n can be\n      calculated on a system with enough memory, larger than we'd probably want\n      for a table of primes, so some sort of sieving would be wanted.  Perhaps\n      just taking out the factors of 3 and 5 would give most of the speedup\n      that a prime decomposition can offer.\n\n\n<li> <strong>Binomial Coefficients</strong>\n\n  <p> An obvious improvement to the current code would be to strip factors of 2\n      from each multiplier and divisor and count them separately, to be applied\n      with a bit shift at the end.  Factors of 3 and perhaps 5 could even be\n      handled similarly.\n\n  <p> Conrad Curry reports a big speedup for binomial coefficients using a\n      prime powering scheme, at least for k near n/2.  Of course this is only\n      practical for moderate size n since again it requires primes up to n.\n\n  <p> When k is small the current (n-k+1)...n/1...k will be fastest.  Some sort\n      of rule would be needed for when to use this or when to use prime\n      powering.  Such a rule will be a function of both n and k.  Some\n      investigation is needed to see what sort of shape the crossover line will\n      have, the usual parameter tuning can of course find machine dependent\n      constants to fill in where necessary.\n\n  <p> An easier possibility also reported by Conrad Curry is that it may be\n      faster not to divide out the denominator (1...k) one-limb at a time, but\n      do one big division at the end.  Is this because a big divisor in\n      <code>mpn_bdivmod</code> trades the latency of\n      <code>mpn_divexact_1</code> for the throughput of\n      <code>mpn_submul_1</code>?  Overheads must hurt though.\n\n  <p> Another reason a big divisor might help is that\n      <code>mpn_divexact_1</code> won't be getting a full limb in\n      <code>mpz_bin_uiui</code>.  It's called when the n accumulator is full\n      but the k may be far from full.  Perhaps the two could be decoupled so k\n      is applied when full.  It'd be necessary to delay consideration of k\n      terms until the corresponding n terms had been applied though, since\n      otherwise the division won't be exact.\n\n\n<li> <strong>Perfect Power Testing</strong>\n\n  <p> <code>mpz_perfect_power_p</code> could be improved in a number of ways,\n      for instance p-adic arithmetic to find possible roots.\n\n  <p> Non-powers can be quickly identified by checking for Nth power residues\n      modulo small primes, like <code>mpn_perfect_square_p</code> does for\n      squares.  The residues to each power N for a given remainder could be\n      grouped into a bit mask, the masks for the remainders to each divisor\n      would then be \"and\"ed together to hopefully leave only a few candidate\n      powers.  Need to think about how wide to make such masks, ie. how many\n      powers to examine in this way.\n\n  <p> Any zero remainders found in residue testing reveal factors which can be\n      divided out, with the multiplicity restricting the powers that need to be\n      considered, as per the current code.  Further prime dividing should be\n      grouped into limbs like <code>PP</code>.  Need to think about how much\n      dividing to do like that, probably more for bigger inputs, less for\n      smaller inputs.\n\n  <p> <code>mpn_gcd_1</code> would probably be better than the current private\n      GCD routine.  The use it's put to isn't time-critical, and it might help\n      ensure correctness to just use the main GCD routine.\n\n\n<li> <strong>Prime Testing</strong>\n\n  <p> GMP is not really a number theory library and probably shouldn't have\n      large amounts of code dedicated to sophisticated prime testing\n      algorithms, but basic things well-implemented would suit.  Tests offering\n      certainty are probably all too big or too slow (or both!) to justify\n      inclusion in the main library.  Demo programs showing some possibilities\n      would be good though.\n\n  <p> The present \"repetitions\" argument to <code>mpz_probab_prime_p</code> is\n      rather specific to the Miller-Rabin tests of the current implementation.\n      Better would be some sort of parameter asking perhaps for a maximum\n      chance 1/2^x of a probable prime in fact being composite.  If\n      applications follow the advice that the present reps gives 1/4^reps\n      chance then perhaps such a change is unnecessary, but an explicitly\n      described 1/2^x would allow for changes in the implementation or even for\n      new proofs about the theory.\n\n  <p> <code>mpz_probab_prime_p</code> always initializes a new\n      <code>gmp_randstate_t</code> for randomized tests, which unfortunately\n      means it's not really very random and in particular always runs the same\n      tests for a given input.  Perhaps a new interface could accept an rstate\n      to use, so successive tests could increase confidence in the result.\n\n  <p> <code>mpn_mod_34lsub1</code> is an obvious and easy improvement to the\n      trial divisions.  And since the various prime factors are constants, the\n      remainder can be tested with something like\n<pre>\n#define MP_LIMB_DIVISIBLE_7_P(n) \\\n  ((n) * MODLIMB_INVERSE_7 &lt;= MP_LIMB_T_MAX/7)\n</pre>\n      Which would help compilers that don't know how to optimize divisions by\n      constants, and is even an improvement on current gcc 3.2 code.  This\n      technique works for any modulus, see Granlund and Montgomery \"Division by\n      Invariant Integers\" section 9.\n\n  <p> The trial divisions are done with primes generated and grouped at\n      runtime.  This could instead be a table of data, with pre-calculated\n      inverses too.  Storing deltas, ie. amounts to add, rather than actual\n      primes would save space.  <code>udiv_qrnnd_preinv</code> style inverses\n      can be made to exist by adding dummy factors of 2 if necessary.  Some\n      thought needs to be given as to how big such a table should be, based on\n      how much dividing would be profitable for what sort of size inputs.  The\n      data could be shared by the perfect power testing.\n\n  <p> Jason Moxham points out that if a sqrt(-1) mod N exists then any factor\n      of N must be == 1 mod 4, saving half the work in trial dividing.  (If\n      x^2==-1 mod N then for a prime factor p we have x^2==-1 mod p and so the\n      jacobi symbol (-1/p)=1.  But also (-1/p)=(-1)^((p-1)/2), hence must have\n      p==1 mod 4.)  But knowing whether sqrt(-1) mod N exists is not too easy.\n      A strong pseudoprime test can reveal one, so perhaps such a test could be\n      inserted part way though the dividing.\n\n  <p> Jon Grantham \"Frobenius Pseudoprimes\" (www.pseudoprime.com) describes a\n      quadratic pseudoprime test taking about 3x longer than a plain test, but\n      with only a 1/7710 chance of error (whereas 3 plain Miller-Rabin tests\n      would offer only (1/4)^3 == 1/64).  Such a test needs completely random\n      parameters to satisfy the theory, though single-limb values would run\n      faster.  It's probably best to do at least one plain Miller-Rabin before\n      any quadratic tests, since that can identify composites in less total\n      time.\n\n  <p> Some thought needs to be given to the structure of which tests (trial\n      division, Miller-Rabin, quadratic) and how many are done, based on what\n      sort of inputs we expect, with a view to minimizing average time.\n\n  <p> It might be a good idea to break out subroutines for the various tests,\n      so that an application can combine them in ways it prefers, if sensible\n      defaults in <code>mpz_probab_prime_p</code> don't suit.  In particular\n      this would let applications skip tests it knew would be unprofitable,\n      like trial dividing when an input is already known to have no small\n      factors.\n\n  <p> For small inputs, combinations of theory and explicit search make it\n      relatively easy to offer certainty.  For instance numbers up to 2^32\n      could be handled with a strong pseudoprime test and table lookup.  But\n      it's rather doubtful whether a smallnum prime test belongs in a bignum\n      library.  Perhaps if it had other internal uses.\n\n  <p> An <code>mpz_nthprime</code> might be cute, but is almost certainly\n      impractical for anything but small n.\n\n\n<li> <strong>Intra-Library Calls</strong>\n\n  <p> On various systems, calls within libmpir still go through the PLT, TOC or\n      other mechanism, which makes the code bigger and slower than it needs to\n      be.\n\n  <p> The theory would be to have all GMP intra-library calls resolved directly\n      to the routines in the library.  An application wouldn't be able to\n      replace a routine, the way it can normally, but there seems no good\n      reason to do that, in normal circumstances.\n\n  <p> The <code>visibility</code> attribute in recent gcc is good for this,\n      because it lets gcc omit unnecessary GOT pointer setups or whatever if it\n      finds all calls are local and there's no global data references.\n      Documented entrypoints would be <code>protected</code>, and purely\n      internal things not wanted by test programs or anything can be\n      <code>internal</code>.\n\n  <p> Unfortunately, on i386 it seems <code>protected</code> ends up causing\n      text segment relocations within libmpir.so, meaning the library code can't\n      be shared between processes, defeating the purpose of a shared library.\n      Perhaps this is just a gremlin in binutils (debian packaged\n      2.13.90.0.16-1).\n\n  <p> The linker can be told directly (with a link script, or options) to do\n      the same sort of thing.  This doesn't change the code emitted by gcc of\n      course, but it does mean calls are resolved directly to their targets,\n      avoiding a PLT entry.\n\n  <p> Keeping symbols private to libmpir.so is probably a good thing in general\n      too, to stop anyone even attempting to access them.  But some\n      undocumented things will need or want to be kept visible, for use by\n      mpfr, or the test and tune programs.  Libtool has a standard option for\n      selecting public symbols (used now for libmp).\n\n\n</ul>\n<hr>\n\n</body>\n</html>\n\n<!--\nLocal variables:\neval: (add-hook 'write-file-hooks 'time-stamp)\ntime-stamp-start: \"This file current as of \"\ntime-stamp-format: \"%:d %3b %:y\"\ntime-stamp-end: \"\\\\.\"\ntime-stamp-line-limit: 50\nEnd:\n-->\n"
  },
  {
    "path": "doc/devel/tasks.html",
    "content": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n<html>\n<head>\n  <title>GMP Itemized Development Tasks</title>\n  <link rel=\"shortcut icon\" href=\"favicon.ico\">\n  <link rel=\"stylesheet\" href=\"gmp.css\">\n  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n</head>\n\n<center>\n  <h1>\n    GMP Itemized Development Tasks\n  </h1>\n</center>\n\n<font size=-1>\nCopyright 2000, 2001, 2002, 2003, 2004, 2006 Free Software Foundation, Inc. <br><br>\nCopyright 2008 William Hart.<br><br>\nThis file is part of the MPIR Library. <br><br>\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version. <br><br>\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details. <br><br>\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n</font>\n\n<hr>\n<!-- NB. timestamp updated automatically by emacs -->\n  This file current as of 21 Apr 2006.  \n  Please send comments to\n  <a href=\"http://groups.google.co.uk/group/mpir-devel/\">http://groups.google.co.uk/group/mpir-devel/</a>.\n\n<p> These are itemized GMP development tasks.  Not all the tasks\n    listed here are suitable for volunteers, but many of them are.\n    Please see the <a href=\"projects.html\">projects file</a> for more\n    sizeable projects.\n\n<h4>Correctness and Completeness</h4>\n<ul>\n<li> <code>_LONG_LONG_LIMB</code> in mpir.h is not namespace clean.  Reported\n     by Patrick Pelissier.\n     <br>\n     We sort of mentioned <code>_LONG_LONG_LIMB</code> in past releases, so\n     need to be careful about changing it.  It used to be a define\n     applications had to set for long long limb systems, but that in\n     particular is no longer relevant now that it's established automatically.\n<li> The various reuse.c tests need to force reallocation by calling\n     <code>_mpz_realloc</code> with a small (1 limb) size.\n<li> One reuse case is missing from mpX/tests/reuse.c:\n     <code>mpz_XXX(a,a,a)</code>.\n<li> When printing <code>mpf_t</code> numbers with exponents &gt;2^53 on\n     machines with 64-bit <code>mp_exp_t</code>, the precision of\n     <code>__mp_bases[base].chars_per_bit_exactly</code> is insufficient and\n     <code>mpf_get_str</code> aborts.  Detect and compensate.  Alternately,\n     think seriously about using some sort of fixed-point integer value.\n     Avoiding unnecessary floating point is probably a good thing in general,\n     and it might be faster on some CPUs.\n<li> Make the string reading functions allow the `0x' prefix when the base is\n     explicitly 16.  They currently only allow that prefix when the base is\n     unspecified (zero).\n<li> <code>mpf_eq</code> is not always correct, when one operand is\n     1000000000... and the other operand is 0111111111..., i.e., extremely\n     close.  There is a special case in <code>mpf_sub</code> for this\n     situation; put similar code in <code>mpf_eq</code>.  [In progress.]\n<li> <code>mpf_eq</code> doesn't implement what gmp.texi specifies.  It should\n     not use just whole limbs, but partial limbs.  [In progress.]\n<li> <code>mpf_set_str</code> doesn't validate it's exponent, for instance\n     garbage 123.456eX789X is accepted (and an exponent 0 used), and overflow\n     of a <code>long</code> is not detected.\n<li> <code>mpf_add</code> doesn't check for a carry from truncated portions of\n     the inputs, and in that respect doesn't implement the \"infinite precision\n     followed by truncate\" specified in the manual.\n<li> Windows DLLs: tests/mpz/reuse.c and tests/mpf/reuse.c initialize global\n     variables with pointers to <code>mpz_add</code> etc, which doesn't work\n     when those routines are coming from a DLL (because they're effectively\n     function pointer global variables themselves).  Need to rearrange perhaps\n     to a set of calls to a test function rather than iterating over an array.\n<li> <code>mpz_pow_ui</code>: Detect when the result would be more memory than\n     a <code>size_t</code> can represent and raise some suitable exception,\n     probably an alloc call asking for <code>SIZE_T_MAX</code>, and if that\n     somehow succeeds then an <code>abort</code>.  Various size overflows of\n     this kind are not handled gracefully, probably resulting in segvs.\n     <br>\n     In <code>mpz_n_pow_ui</code>, detect when the count of low zero bits\n     exceeds an <code>unsigned long</code>.  There's a (small) chance of this\n     happening but still having enough memory to represent the value.\n     Reported by Winfried Dreckmann in for instance <code>mpz_ui_pow_ui (x,\n     4UL, 1431655766UL)</code>.\n<li> <code>mpf</code>: Detect exponent overflow and raise some exception.\n     It'd be nice to allow the full <code>mp_exp_t</code> range since that's\n     how it's been in the past, but maybe dropping one bit would make it\n     easier to test if e1+e2 goes out of bounds.\n</ul>\n\n\n\n<h4>Machine Independent Optimization</h4>\n<ul>\n<li> <code>mpf_cmp</code>: For better cache locality, don't test for low zero\n     limbs until the high limbs fail to give an ordering.  Reduce code size by\n     turning the three <code>mpn_cmp</code>'s into a single loop stopping when\n     the end of one operand is reached (and then looking for a non-zero in the\n     rest of the other).\n<li> <code>mpf_mul_2exp</code>, <code>mpf_div_2exp</code>: The use of\n     <code>mpn_lshift</code> for any size&lt;=prec means repeated\n     <code>mul_2exp</code> and <code>div_2exp</code> calls accumulate low zero\n     limbs until size==prec+1 is reached.  Those zeros will slow down\n     subsequent operations, especially if the value is otherwise only small.\n     If low bits of the low limb are zero, use <code>mpn_rshift</code> so as\n     to not increase the size.\n<li> <code>mpn_dc_sqrtrem</code>: Don't use <code>mpn_addmul_1</code> with\n     multiplier==2, instead either <code>mpn_addlsh1_n</code> when available,\n     or <code>mpn_lshift</code>+<code>mpn_add_n</code> if not.\n<li> <code>mpn_dc_sqrtrem</code>, <code>mpn_sqrtrem2</code>: Don't use\n     <code>mpn_add_1</code> and <code>mpn_sub_1</code> for 1 limb operations,\n     instead <code>ADDC_LIMB</code> and <code>SUBC_LIMB</code>.\n<li> <code>mpn_sqrtrem2</code>: Use plain variables for <code>sp[0]</code> and\n     <code>rp[0]</code> calculations, so the compiler needn't worry about\n     aliasing between <code>sp</code> and <code>rp</code>.\n<li> <code>mpn_sqrtrem</code>: Some work can be saved in the last step when\n     the remainder is not required, as noted in Paul's paper.\n<li> <code>mpq_add</code>, <code>mpq_add</code>: The division \"op1.den / gcd\"\n     is done twice, where of course only once is necessary.  Reported by Larry\n     Lambe.\n<li> <code>mpq_add</code>, <code>mpq_sub</code>: The gcd fits a single limb\n     with high probability and in this case <code>modlimb_invert</code> could\n     be used to calculate the inverse just once for the two exact divisions\n     \"op1.den / gcd\" and \"op2.den / gcd\", rather than letting\n     <code>mpn_divexact_1</code> do it each time.  This would require a new\n     <code>mpn_preinv_divexact_1</code> interface.  Not sure if it'd be worth\n     the trouble.\n<li> <code>mpq_add</code>, <code>mpq_sub</code>: The use of\n     <code>mpz_mul(x,y,x)</code> causes temp allocation or copying in\n     <code>mpz_mul</code> which can probably be avoided.  A rewrite using\n     <code>mpn</code> might be best.\n<li> <code>mpn_gcdext</code>: Don't test <code>count_leading_zeros</code> for\n     zero, instead check the high bit of the operand and avoid invoking\n     <code>count_leading_zeros</code>.  This is an optimization on all\n     machines, and significant on machines with slow\n     <code>count_leading_zeros</code>, though it's possible an already\n     normalized operand might not be encountered very often.\n<li> Rewrite <code>umul_ppmm</code> to use floating-point for generating the\n     most significant limb (if <code>BITS_PER_MP_LIMB</code> &lt= 52 bits).\n     (Peter Montgomery has some ideas on this subject.)\n<li> Improve the default <code>umul_ppmm</code> code in longlong.h: Add partial\n     products with fewer operations.\n<li> Consider inlining <code>mpz_set_ui</code>.  This would be both small and\n     fast, especially for compile-time constants, but would make application\n     binaries depend on having 1 limb allocated to an <code>mpz_t</code>,\n     preventing the \"lazy\" allocation scheme below.\n<li> Consider inlining <code>mpz_[cft]div_ui</code> and maybe\n     <code>mpz_[cft]div_r_ui</code>.  A <code>__gmp_divide_by_zero</code>\n     would be needed for the divide by zero test, unless that could be left to\n     <code>mpn_mod_1</code> (not sure currently whether all the risc chips\n     provoke the right exception there if using mul-by-inverse).\n<li> Consider inlining: <code>mpz_fits_s*_p</code>.  The setups for\n     <code>LONG_MAX</code> etc would need to go into mpir.h, and on Cray it\n     might, unfortunately, be necessary to forcibly include &lt;limits.h&gt;\n     since there's no apparent way to get <code>SHRT_MAX</code> with an\n     expression (since <code>short</code> and <code>unsigned short</code> can\n     be different sizes).\n<li> <code>mpz_powm</code> and <code>mpz_powm_ui</code> aren't very\n     fast on one or two limb moduli, due to a lot of function call\n     overheads.  These could perhaps be handled as special cases.\n<li> <code>mpz_powm</code> and <code>mpz_powm_ui</code> want better\n     algorithm selection, and the latter should use REDC.  Both could\n     change to use an <code>mpn_powm</code> and <code>mpn_redc</code>.\n<li> <code>mpz_powm</code> REDC should do multiplications by <code>g[]</code>\n     using the division method when they're small, since the REDC form of a\n     small multiplier is normally a full size product.  Probably would need a\n     new tuned parameter to say what size multiplier is \"small\", as a function\n     of the size of the modulus.\n<li> <code>mpz_powm</code> REDC should handle even moduli if possible.  Maybe\n     this would mean for m=n*2^k doing mod n using REDC and an auxiliary\n     calculation mod 2^k, then putting them together at the end.\n<li> <code>mpn_gcd</code> might be able to be sped up on small to\n     moderate sizes by improving <code>find_a</code>, possibly just by\n     providing an alternate implementation for CPUs with slowish\n     <code>count_leading_zeros</code>.\n<li> Toom3 could use a low to high cache localized evaluate and interpolate.\n     The necessary <code>mpn_divexact_by3c</code> exists.\n<li> <code>mpf_set_str</code> produces low zero limbs when a string has a\n     fraction but is exactly representable, eg. 0.5 in decimal.  These could be\n     stripped to save work in later operations.\n<li> <code>mpz_and</code>, <code>mpz_ior</code> and <code>mpz_xor</code> should\n     use <code>mpn_and_n</code> etc for the benefit of the small number of\n     targets with native versions of those routines.  Need to be careful not to\n     pass size==0.  Is some code sharing possible between the <code>mpz</code>\n     routines?\n<li> <code>mpf_add</code>: Don't do a copy to avoid overlapping operands\n     unless it's really necessary (currently only sizes are tested, not\n     whether r really is u or v).\n<li> <code>mpf_add</code>: Under the check for v having no effect on the\n     result, perhaps test for r==u and do nothing in that case, rather than\n     currently it looks like an <code>MPN_COPY_INCR</code> will be done to\n     reduce prec+1 limbs to prec.\n<li> <code>mpf_div_ui</code>: Instead of padding with low zeros, call\n     <code>mpn_divrem_1</code> asking for fractional quotient limbs.\n<li> <code>mpf_div_ui</code>: Eliminate <code>TMP_ALLOC</code>.  When r!=u\n     there's no overlap and the division can be called on those operands.\n     When r==u and is prec+1 limbs, then it's an in-place division.  If r==u\n     and not prec+1 limbs, then move the available limbs up to prec+1 and do\n     an in-place there.\n<li> <code>mpf_div_ui</code>: Whether the high quotient limb is zero can be\n     determined by testing the dividend for high&lt;divisor.  When non-zero, the\n     divison can be done on prec dividend limbs instead of prec+1.  The result\n     size is also known before the division, so that can be a tail call (once\n     the <code>TMP_ALLOC</code> is eliminated).\n<li> <code>mpn_divrem_2</code> could usefully accept unnormalized divisors and\n     shift the dividend on-the-fly, since this should cost nothing on\n     superscalar processors and avoid the need for temporary copying in\n     <code>mpn_tdiv_qr</code>.\n<li> <code>mpf_sqrt</code>: If r!=u, and if u doesn't need to be padded with\n     zeros, then there's no need for the tp temporary.\n<li> <code>mpq_cmp_ui</code> could form the <code>num1*den2</code> and\n     <code>num2*den1</code> products limb-by-limb from high to low and look at\n     each step for values differing by more than the possible carry bit from\n     the uncalculated portion.\n<li> <code>mpq_cmp</code> could do the same high-to-low progressive multiply\n     and compare.  The benefits of karatsuba and higher multiplication\n     algorithms are lost, but if it's assumed only a few high limbs will be\n     needed to determine an order then that's fine.\n<li> <code>mpn_add_1</code>, <code>mpn_sub_1</code>, <code>mpn_add</code>,\n     <code>mpn_sub</code>: Internally use <code>__GMPN_ADD_1</code> etc\n     instead of the functions, so they get inlined on all compilers, not just\n     gcc and others with <code>inline</code> recognised in mpir.h.\n     <code>__GMPN_ADD_1</code> etc are meant mostly to support application\n     inline <code>mpn_add_1</code> etc and if they don't come out good for\n     internal uses then special forms can be introduced, for instance many\n     internal uses are in-place.  Sometimes a block of code is executed based\n     on the carry-out, rather than using it arithmetically, and those places\n     might want to do their own loops entirely.\n<li> <code>__gmp_extract_double</code> on 64-bit systems could use just one\n     bitfield for the mantissa extraction, not two, when endianness permits.\n     Might depend on the compiler allowing <code>long long</code> bit fields\n     when that's the only actual 64-bit type.\n<li> tal-notreent.c could keep a block of memory permanently allocated.\n     Currently the last nested <code>TMP_FREE</code> releases all memory, so\n     there's an allocate and free every time a top-level function using\n     <code>TMP</code> is called.  Would need\n     <code>mp_set_memory_functions</code> to tell tal-notreent.c to release\n     any cached memory when changing allocation functions though.\n<li> <code>__gmp_tmp_alloc</code> from tal-notreent.c could be partially\n     inlined.  If the current chunk has enough room then a couple of pointers\n     can be updated.  Only if more space is required then a call to some sort\n     of <code>__gmp_tmp_increase</code> would be needed.  The requirement that\n     <code>TMP_ALLOC</code> is an expression might make the implementation a\n     bit ugly and/or a bit sub-optimal.\n<pre>\n#define TMP_ALLOC(n)\n  ((ROUND_UP(n) &gt; current-&gt;end - current-&gt;point ?\n     __gmp_tmp_increase (ROUND_UP (n)) : 0),\n     current-&gt;point += ROUND_UP (n),\n     current-&gt;point - ROUND_UP (n))\n</pre>\n<li> <code>__mp_bases</code> has a lot of data for bases which are pretty much\n     never used.  Perhaps the table should just go up to base 16, and have\n     code to generate data above that, if and when required.  Naturally this\n     assumes the code would be smaller than the data saved.\n<li> <code>__mp_bases</code> field <code>big_base_inverted</code> is only used\n     if <code>USE_PREINV_DIVREM_1</code> is true, and could be omitted\n     otherwise, to save space.\n<li> <code>mpz_get_str</code>, <code>mtox</code>: For power-of-2 bases, which\n     are of course fast, it seems a little silly to make a second pass over\n     the <code>mpn_get_str</code> output to convert to ASCII.  Perhaps combine\n     that with the bit extractions.\n<li> <code>mpz_gcdext</code>: If the caller requests only the S cofactor (of\n     A), and A&lt;B, then the code ends up generating the cofactor T (of B) and\n     deriving S from that.  Perhaps it'd be possible to arrange to get S in\n     the first place by calling <code>mpn_gcdext</code> with A+B,B.  This\n     might only be an advantage if A and B are about the same size.\n<li> <code>mpz_n_pow_ui</code> does a good job with small bases and stripping\n     powers of 2, but it's perhaps a bit too complicated for what it gains.\n     The simpler <code>mpn_pow_1</code> is a little faster on small exponents.\n     (Note some of the ugliness in <code>mpz_n_pow_ui</code> is due to\n     supporting <code>mpn_mul_2</code>.)\n     <br>\n     Perhaps the stripping of 2s in <code>mpz_n_pow_ui</code> should be\n     confined to single limb operands for simplicity and since that's where\n     the greatest gain would be.\n     <br>\n     Ideally <code>mpn_pow_1</code> and <code>mpz_n_pow_ui</code> would be\n     merged.  The reason <code>mpz_n_pow_ui</code> writes to an\n     <code>mpz_t</code> is that its callers leave it to make a good estimate\n     of the result size.  Callers of <code>mpn_pow_1</code> already know the\n     size by separate means (<code>mp_bases</code>).\n<li> <code>mpz_invert</code> should call <code>mpn_gcdext</code> directly.\n</ul>\n\n\n<h4>Machine Dependent Optimization</h4>\n<ul>\n<li> <code>invert_limb</code> on various processors might benefit from the\n     little Newton iteration done for alpha and ia64.\n<li> Alpha 21264: <code>mpn_addlsh1_n</code> could be implemented with\n     <code>mpn_addmul_1</code>, since that code at 3.5 is a touch faster than\n     a separate <code>lshift</code> and <code>add_n</code> at\n     1.75+2.125=3.875.  Or very likely some specific <code>addlsh1_n</code>\n     code could beat both.\n<li> Alpha 21264: Improve feed-in code for <code>mpn_mul_1</code>,\n     <code>mpn_addmul_1</code>, and <code>mpn_submul_1</code>.\n<li> Alpha 21164: Rewrite <code>mpn_mul_1</code>, <code>mpn_addmul_1</code>,\n     and <code>mpn_submul_1</code> for the 21164.  This should use both integer\n     multiplies and floating-point multiplies.  For the floating-point\n     operations, the single-limb multiplier should be split into three 21-bit\n     chunks, or perhaps even better in four 16-bit chunks.  Probably possible\n     to reach 9 cycles/limb.\n<li> Alpha: GCC 3.4 will introduce <code>__builtin_ctzl</code>,\n     <code>__builtin_clzl</code> and <code>__builtin_popcountl</code> using\n     the corresponding CIX <code>ct</code> instructions, and\n     <code>__builtin_alpha_cmpbge</code>.  These should give GCC more\n     information about sheduling etc than the <code>asm</code> blocks\n     currently used in longlong.h and gmp-impl.h.\n<li> Alpha Unicos: Apparently there's no <code>alloca</code> on this system,\n     making <code>configure</code> choose the slower\n     <code>malloc-reentrant</code> allocation method.  Is there a better way?\n     Maybe variable-length arrays per notes below.\n<li> Alpha Unicos 21164, 21264: <code>.align</code> is not used since it pads\n     with garbage.  Does the code get the intended slotting required for the\n     claimed speeds?  <code>.align</code> at the start of a function would\n     presumably be safe no matter how it pads.\n<li> ARM V5: <code>count_leading_zeros</code> can use the <code>clz</code>\n     instruction.  For GCC 3.4 and up, do this via <code>__builtin_clzl</code>\n     since then gcc knows it's \"predicable\".\n<li> Itanium: GCC 3.4 introduces <code>__builtin_popcount</code> which can be\n     used instead of an <code>asm</code> block.  The builtin should give gcc\n     more opportunities for scheduling, bundling and predication.\n     <code>__builtin_ctz</code> similarly (it just uses popcount as per\n     current longlong.h).\n<li> UltraSPARC/64: Optimize <code>mpn_mul_1</code>, <code>mpn_addmul_1</code>,\n     for s2 &lt; 2^32 (or perhaps for any zero 16-bit s2 chunk).  Not sure how\n     much this can improve the speed, though, since the symmetry that we rely\n     on is lost.  Perhaps we can just gain cycles when s2 &lt; 2^16, or more\n     accurately, when two 16-bit s2 chunks which are 16 bits apart are zero.\n<li> UltraSPARC/64: Write native <code>mpn_submul_1</code>, analogous to\n     <code>mpn_addmul_1</code>.\n<li> UltraSPARC/64: Write <code>umul_ppmm</code>.  Using four\n     \"<code>mulx</code>\"s either with an asm block or via the generic C code is\n     about 90 cycles.  Try using fp operations, and also try using karatsuba\n     for just three \"<code>mulx</code>\"s.\n<li> UltraSPARC/32: Rewrite <code>mpn_lshift</code>, <code>mpn_rshift</code>.\n     Will give 2 cycles/limb.  Trivial modifications of mpn/sparc64 should do.\n<li> UltraSPARC/32: Write special mpn_Xmul_1 loops for s2 &lt; 2^16.\n<li> UltraSPARC/32: Use <code>mulx</code> for <code>umul_ppmm</code> if\n     possible (see commented out code in longlong.h).  This is unlikely to\n     save more than a couple of cycles, so perhaps isn't worth bothering with.\n<li> UltraSPARC/32: On Solaris gcc doesn't give us <code>__sparc_v9__</code>\n     or anything to indicate V9 support when -mcpu=v9 is selected.  See\n     gcc/config/sol2-sld-64.h.  Will need to pass something through from\n     ./configure to select the right code in longlong.h.  (Currently nothing\n     is lost because <code>mulx</code> for multiplying is commented out.)\n<li> UltraSPARC/32: <code>mpn_divexact_1</code> and\n     <code>mpn_modexact_1c_odd</code> can use a 64-bit inverse and take\n     64-bits at a time from the dividend, as per the 32-bit divisor case in\n     mpn/sparc64/mode1o.c.  This must be done in assembler, since the full\n     64-bit registers (<code>%gN</code>) are not available from C.\n<li> UltraSPARC/32: <code>mpn_divexact_by3c</code> can work 64-bits at a time\n     using <code>mulx</code>, in assembler.  This would be the same as for\n     sparc64.\n<li> UltraSPARC: <code>modlimb_invert</code> might save a few cycles from\n     masking down to just the useful bits at each point in the calculation,\n     since <code>mulx</code> speed depends on the highest bit set.  Either\n     explicit masks or small types like <code>short</code> and\n     <code>int</code> ought to work.\n<li> Sparc64 HAL R1 <code>popc</code>: This chip reputedly implements\n     <code>popc</code> properly (see gcc sparc.md).  Would need to recognise\n     it as <code>sparchalr1</code> or something in configure / config.sub /\n     config.guess.  <code>popc_limb</code> in gmp-impl.h could use this (per\n     commented out code).  <code>count_trailing_zeros</code> could use it too.\n<li> PA64: Improve <code>mpn_addmul_1</code>, <code>mpn_submul_1</code>, and\n     <code>mpn_mul_1</code>.  The current code runs at 11 cycles/limb.  It\n     should be possible to saturate the cache, which will happen at 8\n     cycles/limb (7.5 for mpn_mul_1).  Write special loops for s2 &lt; 2^32;\n     it should be possible to make them run at about 5 cycles/limb.\n<li> PPC601: See which of the power or powerpc32 code runs better.  Currently\n     the powerpc32 is used, but only because it's the default for\n     <code>powerpc*</code>.\n<li> PPC630: Rewrite <code>mpn_addmul_1</code>, <code>mpn_submul_1</code>, and\n     <code>mpn_mul_1</code>.  Use both integer and floating-point operations,\n     possibly two floating-point and one integer limb per loop.  Split operands\n     into four 16-bit chunks for fast fp operations.  Should easily reach 9\n     cycles/limb (using one int + one fp), but perhaps even 7 cycles/limb\n     (using one int + two fp).\n<li> PPC630: <code>mpn_rshift</code> could do the same sort of unrolled loop\n     as <code>mpn_lshift</code>.  Some judicious use of m4 might let the two\n     share source code, or with a register to control the loop direction\n     perhaps even share object code.\n<li> Implement <code>mpn_mul_basecase</code> and <code>mpn_sqr_basecase</code>\n     for important machines.  Helping the generic sqr_basecase.c with an\n     <code>mpn_sqr_diagonal</code> might be enough for some of the RISCs.\n<li> POWER2/POWER2SC: Schedule <code>mpn_lshift</code>/<code>mpn_rshift</code>.\n     Will bring time from 1.75 to 1.25 cycles/limb.\n<li> X86: Optimize non-MMX <code>mpn_lshift</code> for shifts by 1.  (See\n     Pentium code.)\n<li> X86: Good authority has it that in the past an inline <code>rep\n     movs</code> would upset GCC register allocation for the whole function.\n     Is this still true in GCC 3?  It uses <code>rep movs</code> itself for\n     <code>__builtin_memcpy</code>.  Examine the code for some simple and\n     complex functions to find out.  Inlining <code>rep movs</code> would be\n     desirable, it'd be both smaller and faster.\n<li> Pentium P54: <code>mpn_lshift</code> and <code>mpn_rshift</code> can come\n     down from 6.0 c/l to 5.5 or 5.375 by paying attention to pairing after\n     <code>shrdl</code> and <code>shldl</code>, see mpn/x86/pentium/README.\n<li> Pentium P55 MMX: <code>mpn_lshift</code> and <code>mpn_rshift</code>\n     might benefit from some destination prefetching.\n<li> PentiumPro: <code>mpn_divrem_1</code> might be able to use a\n     mul-by-inverse, hoping for maybe 30 c/l.\n<li> K7: <code>mpn_lshift</code> and <code>mpn_rshift</code> might be able to\n     do something branch-free for unaligned startups, and shaving one insn\n     from the loop with alternative indexing might save a cycle.\n<li> PPC32: Try using fewer registers in the current <code>mpn_lshift</code>.\n     The pipeline is now extremely deep, perhaps unnecessarily deep.\n<li> Fujitsu VPP: Vectorize main functions, perhaps in assembly language.\n<li> Fujitsu VPP: Write <code>mpn_mul_basecase</code> and\n     <code>mpn_sqr_basecase</code>.  This should use a \"vertical multiplication\n     method\", to avoid carry propagation.  splitting one of the operands in\n     11-bit chunks.\n<li> Pentium: <code>mpn_lshift</code> by 31 should use the special rshift\n     by 1 code, and vice versa <code>mpn_rshift</code> by 31 should use the\n     special lshift by 1.  This would be best as a jump across to the other\n     routine, could let both live in lshift.asm and omit rshift.asm on finding\n     <code>mpn_rshift</code> already provided.\n<li> Cray T3E: Experiment with optimization options.  In particular,\n     -hpipeline3 seems promising.  We should at least up -O to -O2 or -O3.\n<li> Cray: <code>mpn_com_n</code> and <code>mpn_and_n</code> etc very probably\n     wants a pragma like <code>MPN_COPY_INCR</code>.\n<li> Cray vector systems: <code>mpn_lshift</code>, <code>mpn_rshift</code>,\n     <code>mpn_popcount</code> and <code>mpn_hamdist</code> are nice and small\n     and could be inlined to avoid function calls.\n<li> Cray: Variable length arrays seem to be faster than the tal-notreent.c\n     scheme.  Not sure why, maybe they merely give the compiler more\n     information about aliasing (or the lack thereof).  Would like to modify\n     <code>TMP_ALLOC</code> to use them, or introduce a new scheme.  Memory\n     blocks wanted unconditionally are easy enough, those wanted only\n     sometimes are a problem.  Perhaps a special size calculation to ask for a\n     dummy length 1 when unwanted, or perhaps an inlined subroutine\n     duplicating code under each conditional.  Don't really want to turn\n     everything into a dog's dinner just because Cray don't offer an\n     <code>alloca</code>.\n<li> Cray: <code>mpn_get_str</code> on power-of-2 bases ought to vectorize.\n     Does it?  <code>bits_per_digit</code> and the inner loop over bits in a\n     limb might prevent it.  Perhaps special cases for binary, octal and hex\n     would be worthwhile (very possibly for all processors too).\n<li> S390: <code>BSWAP_LIMB_FETCH</code> looks like it could be done with\n     <code>lrvg</code>, as per glibc sysdeps/s390/s390-64/bits/byteswap.h.\n     This is only for 64-bit mode or something is it, since 32-bit mode has\n     other code?  Also, is it worth using for <code>BSWAP_LIMB</code> too, or\n     would that mean a store and re-fetch?  Presumably that's what comes out\n     in glibc.\n<li> Improve <code>count_leading_zeros</code> for 64-bit machines:\n  <pre>\n\t   if ((x &gt&gt 32) == 0) { x &lt&lt= 32; cnt += 32; }\n\t   if ((x &gt&gt 48) == 0) { x &lt&lt= 16; cnt += 16; }\n\t   ... </pre>\n<li> IRIX 6 MIPSpro compiler has an <code>__inline</code> which could perhaps\n     be used in <code>__GMP_EXTERN_INLINE</code>.  What would be the right way\n     to identify suitable versions of that compiler?\n<li> IRIX <code>cc</code> is rumoured to have an <code>_int_mult_upper</code>\n     (in <code>&lt;intrinsics.h&gt;</code> like Cray), but it didn't seem to\n     exist on some IRIX 6.5 systems tried.  If it does actually exist\n     somewhere it would very likely be an improvement over a function call to\n     umul.asm.\n<li> <code>mpn_get_str</code> final divisions by the base with\n     <code>udiv_qrnd_unnorm</code> could use some sort of multiply-by-inverse\n     on suitable machines.  This ends up happening for decimal by presenting\n     the compiler with a run-time constant, but the same for other bases would\n     be good.  Perhaps use could be made of the fact base&lt;256.\n<li> <code>mpn_umul_ppmm</code>, <code>mpn_udiv_qrnnd</code>: Return a\n     structure like <code>div_t</code> to avoid going through memory, in\n     particular helping RISCs that don't do store-to-load forwarding.  Clearly\n     this is only possible if the ABI returns a structure of two\n     <code>mp_limb_t</code>s in registers.\n     <br>\n     On PowerPC, structures are returned in memory on AIX and Darwin.  In SVR4\n     they're returned in registers, except that draft SVR4 had said memory, so\n     it'd be prudent to check which is done.  We can jam the compiler into the\n     right mode if we know how, since all this is purely internal to libmpir.\n     (gcc has an option, though of course gcc doesn't matter since we use\n     inline asm there.)\n</ul>\n\n<h4>New Functionality</h4>\n<ul>\n<li> Maybe add <code>mpz_crr</code> (Chinese Remainder Reconstruction).\n<li> Let `0b' and `0B' mean binary input everywhere.\n<li> <code>mpz_init</code> and <code>mpq_init</code> could do lazy allocation.\n     Set <code>ALLOC(var)</code> to 0 to indicate nothing allocated, and let\n     <code>_mpz_realloc</code> do the initial alloc.  Set\n     <code>z-&gt;_mp_d</code> to a dummy that <code>mpz_get_ui</code> and\n     similar can unconditionally fetch from.  Niels Möller has had a go at\n     this.\n     <br>\n     The advantages of the lazy scheme would be:\n     <ul>\n     <li> Initial allocate would be the size required for the first value\n          stored, rather than getting 1 limb in <code>mpz_init</code> and then\n          more or less immediately reallocating.\n     <li> <code>mpz_init</code> would only store magic values in the\n          <code>mpz_t</code> fields, and could be inlined.\n     <li> A fixed initializer could even be used by applications, like\n          <code>mpz_t z = MPZ_INITIALIZER;</code>, which might be convenient\n          for globals.\n     </ul>\n     The advantages of the current scheme are:\n     <ul>\n     <li> <code>mpz_set_ui</code> and other similar routines needn't check the\n          size allocated and can just store unconditionally.\n     <li> <code>mpz_set_ui</code> and perhaps others like\n          <code>mpz_tdiv_r_ui</code> and a prospective\n          <code>mpz_set_ull</code> could be inlined.\n     </ul>\n<li> Add <code>mpf_out_raw</code> and <code>mpf_inp_raw</code>.  Make sure\n     format is portable between 32-bit and 64-bit machines, and between\n     little-endian and big-endian machines.  A format which MPFR can use too\n     would be good.\n<li> <code>mpn_and_n</code> ... <code>mpn_copyd</code>: Perhaps make the mpn\n     logops and copys available in mpir.h, either as library functions or\n     inlines, with the availability of library functions instantiated in the\n     generated mpir.h at build time.\n<li> <code>mpz_set_str</code> etc variants taking string lengths rather than\n     null-terminators.\n<li> <code>mpz_andn</code>, <code>mpz_iorn</code>, <code>mpz_nand</code>,\n     <code>mpz_nior</code>, <code>mpz_xnor</code> might be useful additions,\n     if they could share code with the current such functions (which should be\n     possible).\n<li> <code>mpz_and_ui</code> etc might be of use sometimes.  Suggested by\n     Niels Möller.\n<li> <code>mpf_set_str</code> and <code>mpf_inp_str</code> could usefully\n     accept 0x, 0b etc when base==0.  Perhaps the exponent could default to\n     decimal in this case, with a further 0x, 0b etc allowed there.\n     Eg. 0xFFAA@0x5A.  A leading \"0\" for octal would match the integers, but\n     probably something like \"0.123\" ought not mean octal.\n<li> <code>GMP_LONG_LONG_LIMB</code> or some such could become a documented\n     feature of mpir.h, so applications could know whether to\n     <code>printf</code> a limb using <code>%lu</code> or <code>%Lu</code>.\n<li> <code>GMP_PRIdMP_LIMB</code> and similar defines following C99\n     &lt;inttypes.h&gt; might be of use to applications printing limbs.  But\n     if <code>GMP_LONG_LONG_LIMB</code> or whatever is added then perhaps this\n     can easily enough be left to applications.\n<li> <code>gmp_printf</code> could accept <code>%b</code> for binary output.\n     It'd be nice if it worked for plain <code>int</code> etc too, not just\n     <code>mpz_t</code> etc.\n<li> <code>gmp_printf</code> in fact could usefully accept an arbitrary base,\n     for both integer and float conversions.  A base either in the format\n     string or as a parameter with <code>*</code> should be allowed.  Maybe\n     <code>&amp;13b</code> (b for base) or something like that.\n<li> <code>gmp_printf</code> could perhaps accept <code>mpq_t</code> for float\n     conversions, eg. <code>\"%.4Qf\"</code>.  This would be merely for\n     convenience, but still might be useful.  Rounding would be the same as\n     for an <code>mpf_t</code> (ie. currently round-to-nearest, but not\n     actually documented).  Alternately, perhaps a separate\n     <code>mpq_get_str_point</code> or some such might be more use.  Suggested\n     by Pedro Gimeno.\n<li> <code>mpz_rscan0</code> or <code>mpz_revscan0</code> or some such\n     searching towards the low end of an integer might match\n     <code>mpz_scan0</code> nicely.  Likewise for <code>scan1</code>.\n     Suggested by Roberto Bagnara.\n<li> <code>mpz_bit_subset</code> or some such to test whether one integer is a\n     bitwise subset of another might be of use.  Some sort of return value\n     indicating whether it's a proper or non-proper subset would be good and\n     wouldn't cost anything in the implementation.  Suggested by Roberto\n     Bagnara.\n<li> <code>mpf_get_ld</code>, <code>mpf_set_ld</code>: Conversions between\n     <code>mpf_t</code> and <code>long double</code>, suggested by Dan\n     Christensen.  Other <code>long double</code> routines might be desirable\n     too, but <code>mpf</code> would be a start.\n     <br>\n     <code>long double</code> is an ANSI-ism, so everything involving it would\n     need to be suppressed on a K&amp;R compiler.\n     <br>\n     There'd be some work to be done by <code>configure</code> to recognise\n     the format in use, MPFR has a start on this.  Often <code>long\n     double</code> is the same as <code>double</code>, which is easy but\n     pretty pointless.  A single float format detector macro could look at\n     <code>double</code> then <code>long double</code>\n     <br>\n     Sometimes there's a compiler option for the size of a <code>long\n     double</code>, eg. xlc on AIX can use either 64-bit or 128-bit.  It's\n     probably simplest to regard this as a compiler compatibility issue, and\n     leave it to users or sysadmins to ensure application and library code is\n     built the same.\n<li> <code>mpz_sqrt_if_perfect_square</code>: When\n     <code>mpz_perfect_square_p</code> does its tests it calculates a square\n     root and then discards it.  For some applications it might be useful to\n     return that root.  Suggested by Jason Moxham.\n<li> <code>mpz_get_ull</code>, <code>mpz_set_ull</code>,\n     <code>mpz_get_sll</code>, <code>mpz_get_sll</code>: Conversions for\n     <code>long long</code>.  These would aid interoperability, though a\n     mixture of GMP and <code>long long</code> would probably not be too\n     common.  Since <code>long long</code> is not always available (it's in\n     C99 and GCC though), disadvantages of using <code>long long</code> in\n     libmpir.a would be\n     <ul>\n     <li> Library contents vary according to the build compiler.\n     <li> mpir.h would need an ugly <code>#ifdef</code> block to decide if the\n          application compiler could take the <code>long long</code>\n          prototypes.\n     <li> Some sort of <code>LIBGMP_HAS_LONGLONG</code> might be wanted to\n          indicate whether the functions are available.  (Applications using\n          autoconf could probe the library too.)\n     </ul>\n     It'd be possible to defer the need for <code>long long</code> to\n     application compile time, by having something like\n     <code>mpz_set_2ui</code> called with two halves of a <code>long\n     long</code>.  Disadvantages of this would be,\n     <ul>\n     <li> Bigger code in the application, though perhaps not if a <code>long\n          long</code> is normally passed as two halves anyway.\n     <li> <code>mpz_get_ull</code> would be a rather big inline, or would have\n          to be two function calls.\n     <li> <code>mpz_get_sll</code> would be a worse inline, and would put the\n          treatment of <code>-0x10..00</code> into applications (see\n          <code>mpz_get_si</code> correctness above).\n     <li> Although having libmpir.a independent of the build compiler is nice,\n          it sort of sacrifices the capabilities of a good compiler to\n          uniformity with inferior ones.\n     </ul>\n     Plain use of <code>long long</code> is probably the lesser evil, if only\n     because it makes best use of gcc.  In fact perhaps it would suffice to\n     guarantee <code>long long</code> conversions only when using GCC for both\n     application and library.  That would cover free software, and we can\n     worry about selected vendor compilers later.\n     <br>\n     In C++ the situation is probably clearer, we demand fairly recent C++ so\n     <code>long long</code> should be available always.  We'd probably prefer\n     to have the C and C++ the same in respect of <code>long long</code>\n     support, but it would be possible to have it unconditionally in mpirxx.h,\n     by some means or another.\n<li> <code>mpz_strtoz</code> parsing the same as <code>strtol</code>.\n     Suggested by Alexander Kruppa.\n</ul>\n\n\n<h4>Configuration</h4>\n\n<ul>\n<li> Alpha ev7, ev79: Add code to config.guess to detect these.  Believe ev7\n     will be \"3-1307\" in the current switch, but need to verify that.  (On\n     OSF, current configfsf.guess identifies ev7 using psrinfo, we need to do\n     it ourselves for other systems.)\n<li> Alpha OSF: Libtool (version 1.5) doesn't seem to recognise this system is\n     \"pic always\" and ends up running gcc twice with the same options.  This\n     is wasteful, but harmless.  Perhaps a newer libtool will be better.\n<li> ARM: <code>umul_ppmm</code> in longlong.h always uses <code>umull</code>,\n     but is that available only for M series chips or some such?  Perhaps it\n     should be configured in some way.\n<li> HPPA: config.guess should recognize 7000, 7100, 7200, and 8x00.\n<li> HPPA: gcc 3.2 introduces a <code>-mschedule=7200</code> etc parameter,\n     which could be driven by an exact hppa cpu type.\n<li> Mips: config.guess should say mipsr3000, mipsr4000, mipsr10000, etc.\n     \"hinv -c processor\" gives lots of information on Irix.  Standard\n     config.guess appends \"el\" to indicate endianness, but\n     <code>AC_C_BIGENDIAN</code> seems the best way to handle that for GMP.\n<li> PowerPC: The function descriptor nonsense for AIX is currently driven by\n     <code>*-*-aix*</code>.  It might be more reliable to do some sort of\n     feature test, examining the compiler output perhaps.  It might also be\n     nice to merge the aix.m4 files into powerpc-defs.m4.\n<li> config.m4 is generated only by the configure script, it won't be\n     regenerated by config.status.  Creating it as an <code>AC_OUTPUT</code>\n     would work, but it might upset \"make\" to have things like <code>L$</code>\n     get into the Makefiles through <code>AC_SUBST</code>.\n     <code>AC_CONFIG_COMMANDS</code> would be the alternative.  With some\n     careful m4 quoting the <code>changequote</code> calls might not be\n     needed, which might free up the order in which things had to be output.\n<li> Automake: Latest automake has a <code>CCAS</code>, <code>CCASFLAGS</code>\n     scheme.  Though we probably wouldn't be using its assembler support we\n     could try to use those variables in compatible ways.\n<li> <code>GMP_LDFLAGS</code> could probably be done with plain\n     <code>LDFLAGS</code> already used by automake for all linking.  But with\n     a bit of luck the next libtool will pass pretty much all\n     <code>CFLAGS</code> through to the compiler when linking, making\n     <code>GMP_LDFLAGS</code> unnecessary.\n<li> mpn/Makeasm.am uses <code>-c</code> and <code>-o</code> together in the\n     .S and .asm rules, but apparently that isn't completely portable (there's\n     an autoconf <code>AC_PROG_CC_C_O</code> test for it).  So far we've not\n     had problems, but perhaps the rules could be rewritten to use \"foo.s\" as\n     the temporary, or to do a suitable \"mv\" of the result.  The only danger\n     from using foo.s would be if a compile failed and the temporary foo.s\n     then looked like the primary source.  Hopefully if the\n     <code>SUFFIXES</code> are ordered to have .S and .asm ahead of .s that\n     wouldn't happen.  Might need to check.\n</ul>\n\n\n<h4>Random Numbers</h4>\n<ul>\n<li> <code>_gmp_rand</code> is not particularly fast on the linear\n     congruential algorithm and could stand various improvements.\n     <ul>\n     <li> Make a second seed area within <code>gmp_randstate_t</code> (or\n          <code>_mp_algdata</code> rather) to save some copying.\n     <li> Make a special case for a single limb <code>2exp</code> modulus, to\n          avoid <code>mpn_mul</code> calls.  Perhaps the same for two limbs.\n     <li> Inline the <code>lc</code> code, to avoid a function call and\n          <code>TMP_ALLOC</code> for every chunk.\n     <li> Perhaps the <code>2exp</code> and general LC cases should be split,\n          for clarity (if the general case is retained).\n     </ul>\n<li> <code>gmp_randstate_t</code> used for parameters perhaps should become\n     <code>gmp_randstate_ptr</code> the same as other types.\n<li> Some of the empirical randomness tests could be included in a \"make\n     check\".  They ought to work everywhere, for a given seed at least.\n</ul>\n\n\n<h4>C++</h4>\n<ul>\n<li> <code>mpz_class(string)</code>, etc: Use the C++ global locale to\n     identify whitespace.\n     <br>\n     <code>mpf_class(string)</code>: Use the C++ global locale decimal point,\n     rather than the C one.\n     <br>\n     Consider making these variant <code>mpz_set_str</code> etc forms\n     available for <code>mpz_t</code> too, not just <code>mpz_class</code>\n     etc.\n<li> <code>mpq_class operator+=</code>: Don't emit an unnecssary\n     <code>mpq_set(q,q)</code> before <code>mpz_addmul</code> etc.\n<li> Put various bits of mpirxx.h into libmpirxx, to avoid excessive inlining.\n     Candidates for this would be,\n     <ul>\n     <li> <code>mpz_class(const char *)</code>, etc: since they're normally\n          not fast anyway, and we can hide the exception <code>throw</code>.\n     <li> <code>mpz_class(string)</code>, etc: to hide the <code>cstr</code>\n          needed to get to the C conversion function.\n     <li> <code>mpz_class string, char*</code> etc constructors: likewise to\n          hide the throws and conversions.\n     <li> <code>mpz_class::get_str</code>, etc: to hide the <code>char*</code>\n          to <code>string</code> conversion and free.  Perhaps\n          <code>mpz_get_str</code> can write directly into a\n          <code>string</code>, to avoid copying.\n          <br>\n          Consider making such <code>string</code> returning variants\n          available for use with plain <code>mpz_t</code> etc too.\n     </ul>\n</ul>\n\n<h4>Miscellaneous</h4>\n<ul>\n<li> <code>mpz_gcdext</code> and <code>mpn_gcdext</code> ought to document\n     what range of values the generated cofactors can take, and preferably\n     ensure the definition uniquely specifies the cofactors for given inputs.\n     A basic extended Euclidean algorithm or multi-step variant leads to\n     |x|&lt;|b| and |y|&lt;|a| or something like that, but there's probably\n     two solutions under just those restrictions.\n<li> demos/factorize.c: use <code>mpz_divisible_ui_p</code> rather than\n     <code>mpz_tdiv_qr_ui</code>.  (Of course dividing multiple primes at a\n     time would be better still.)\n<li> The various test programs use quite a bit of the main\n     <code>libmpir</code>.  This establishes good cross-checks, but it might be\n     better to use simple reference routines where possible.  Where it's not\n     possible some attention could be paid to the order of the tests, so a\n     <code>libmpir</code> routine is only used for tests once it seems to be\n     good.\n<li> <code>MUL_FFT_THRESHOLD</code> etc: the FFT thresholds should allow a\n     return to a previous k at certain sizes.  This arises basically due to\n     the step effect caused by size multiples effectively used for each k.\n     Looking at a graph makes it fairly clear.\n<li> <code>__gmp_doprnt_mpf</code> does a rather unattractive round-to-nearest\n     on the string returned by <code>mpf_get_str</code>.  Perhaps some variant\n     of <code>mpf_get_str</code> could be made which would better suit.\n</ul>\n\n\n<h4>Aids to Development</h4>\n<ul>\n<li> Add <code>ASSERT</code>s at the start of each user-visible mpz/mpq/mpf\n     function to check the validity of each <code>mp?_t</code> parameter, in\n     particular to check they've been <code>mp?_init</code>ed.  This might\n     catch elementary mistakes in user programs.  Care would need to be taken\n     over <code>MPZ_TMP_INIT</code>ed variables used internally.  If nothing\n     else then consistency checks like size&lt;=alloc, ptr not\n     <code>NULL</code> and ptr+size not wrapping around the address space,\n     would be possible.  A more sophisticated scheme could track\n     <code>_mp_d</code> pointers and ensure only a valid one is used.  Such a\n     scheme probably wouldn't be reentrant, not without some help from the\n     system.\n<li> tune/time.c could try to determine at runtime whether\n     <code>getrusage</code> and <code>gettimeofday</code> are reliable.\n     Currently we pretend in configure that the dodgy m68k netbsd 1.4.1\n     <code>getrusage</code> doesn't exist.  If a test might take a long time\n     to run then perhaps cache the result in a file somewhere.\n<li> tune/time.c could choose the default precision based on the\n     <code>speed_unittime</code> determined, independent of the method in use.\n<li> Cray vector systems: CPU frequency could be determined from\n     <code>sysconf(_SC_CLK_TCK)</code>, since it seems to be clock cycle\n     based.  Is this true for all Cray systems?  Would like some documentation\n     or something to confirm.\n</ul>\n\n\n<h4>Documentation</h4>\n<ul>\n<li> <code>mpz_inp_str</code> (etc) doesn't say when it stops reading digits.\n<li> <code>mpn_get_str</code> isn't terribly clear about how many digits it\n     produces.  It'd probably be possible to say at most one leading zero,\n     which is what both it and <code>mpz_get_str</code> currently do.  But\n     want to be careful not to bind ourselves to something that might not suit\n     another implementation.\n<li> <code>va_arg</code> doesn't do the right thing with <code>mpz_t</code>\n     etc directly, but instead needs a pointer type like <code>MP_INT*</code>.\n     It'd be good to show how to do this, but we'd either need to document\n     <code>mpz_ptr</code> and friends, or perhaps fallback on something\n     slightly nasty with <code>void*</code>.\n</ul>\n\n\n<h4>Bright Ideas</h4>\n\n<p> The following may or may not be feasible, and aren't likely to get done in the\nnear future, but are at least worth thinking about.\n\n<ul>\n<li> Reorganize longlong.h so that we can inline the operations even for the\n     system compiler.  When there is no such compiler feature, make calls to\n     stub functions.  Write such stub functions for as many machines as\n     possible.\n<li> longlong.h could declare when it's using, or would like to use,\n     <code>mpn_umul_ppmm</code>, and the corresponding umul.asm file could be\n     included in libmpir only in that case, the same as is effectively done for\n     <code>__clz_tab</code>.  Likewise udiv.asm and perhaps cntlz.asm.  This\n     would only be a very small space saving, so perhaps not worth the\n     complexity.\n<li> longlong.h could be built at configure time by concatenating or\n     #including fragments from each directory in the mpn path.  This would\n     select CPU specific macros the same way as CPU specific assembler code.\n     Code used would no longer depend on cpp predefines, and the current\n     nested conditionals could be flattened out.\n<li> <code>mpz_get_si</code> returns 0x80000000 for -0x100000000, whereas it's\n     sort of supposed to return the low 31 (or 63) bits.  But this is\n     undocumented, and perhaps not too important.\n<li> <code>mpz_init_set*</code> and <code>mpz_realloc</code> could allocate\n     say an extra 16 limbs over what's needed, so as to reduce the chance of\n     having to do a reallocate if the <code>mpz_t</code> grows a bit more.\n     This could only be an option, since it'd badly bloat memory usage in\n     applications using many small values.\n<li> <code>mpq</code> functions could perhaps check for numerator or\n     denominator equal to 1, on the assumption that integers or\n     denominator-only values might be expected to occur reasonably often.\n<li> <code>count_trailing_zeros</code> is used on more or less uniformly\n     distributed numbers in a couple of places.  For some CPUs\n     <code>count_trailing_zeros</code> is slow and it's probably worth handling\n     the frequently occurring 0 to 2 trailing zeros cases specially.\n<li> <code>mpf_t</code> might like to let the exponent be undefined when\n     size==0, instead of requiring it 0 as now.  It should be possible to do\n     size==0 tests before paying attention to the exponent.  The advantage is\n     not needing to set exp in the various places a zero result can arise,\n     which avoids some tedium but is otherwise perhaps not too important.\n     Currently <code>mpz_set_f</code> and <code>mpf_cmp_ui</code> depend on\n     exp==0, maybe elsewhere too.\n<li> <code>__gmp_allocate_func</code>: Could use GCC <code>__attribute__\n     ((malloc))</code> on this, though don't know if it'd do much.  GCC 3.0\n     allows that attribute on functions, but not function pointers (see info\n     node \"Attribute Syntax\"), so would need a new autoconf test.  This can\n     wait until there's a GCC that supports it.\n<li> <code>mpz_add_ui</code> contains two <code>__GMPN_COPY</code>s, one from\n     <code>mpn_add_1</code> and one from <code>mpn_sub_1</code>.  If those two\n     routines were opened up a bit maybe that code could be shared.  When a\n     copy needs to be done there's no carry to append for the add, and if the\n     copy is non-empty no high zero for the sub.\n</ul>\n\n\n<h4>Old and Obsolete Stuff</h4>\n\n<p> The following tasks apply to chips or systems that are old and/or obsolete.\nIt's unlikely anything will be done about them unless anyone is actively using\nthem.\n\n<ul>\n<li> Sparc32: The integer based udiv_nfp.asm used to be selected by\n     <code>configure --nfp</code> but that option is gone now that autoconf is\n     used.  The file could go somewhere suitable in the mpn search if any\n     chips might benefit from it, though it's possible we don't currently\n     differentiate enough exact cpu types to do this properly.\n<li> VAX D and G format <code>double</code> floats are straightforward and\n     could perhaps be handled directly in <code>__gmp_extract_double</code>\n     and maybe in <code>mpn_get_d</code>, rather than falling back on the\n     generic code.  (Both formats are detected by <code>configure</code>.)\n</ul>\n\n\n<hr>\n\n</body>\n</html>\n\n<!--\nLocal variables:\neval: (add-hook 'write-file-hooks 'time-stamp)\ntime-stamp-start: \"This file current as of \"\ntime-stamp-format: \"%:d %3b %:y\"\ntime-stamp-end: \"\\\\.\"\ntime-stamp-line-limit: 50\nEnd:\n-->\n"
  },
  {
    "path": "doc/fdl.texi",
    "content": "@c The GNU Free Documentation License.\n@center Version 1.3, 3 November 2008\n\n@c This file is intended to be included within another document,\n@c hence no sectioning command or @node.\n\n@display\nCopyright @copyright{} 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.\n@uref{http://fsf.org/}\n\nEveryone is permitted to copy and distribute verbatim copies\nof this license document, but changing it is not allowed.\n@end display\n\n@enumerate 0\n@item\nPREAMBLE\n\nThe purpose of this License is to make a manual, textbook, or other\nfunctional and useful document @dfn{free} in the sense of freedom: to\nassure everyone the effective freedom to copy and redistribute it,\nwith or without modifying it, either commercially or noncommercially.\nSecondarily, this License preserves for the author and publisher a way\nto get credit for their work, while not being considered responsible\nfor modifications made by others.\n\nThis License is a kind of ``copyleft'', which means that derivative\nworks of the document must themselves be free in the same sense.  It\ncomplements the GNU General Public License, which is a copyleft\nlicense designed for free software.\n\nWe have designed this License in order to use it for manuals for free\nsoftware, because free software needs free documentation: a free\nprogram should come with manuals providing the same freedoms that the\nsoftware does.  But this License is not limited to software manuals;\nit can be used for any textual work, regardless of subject matter or\nwhether it is published as a printed book.  We recommend this License\nprincipally for works whose purpose is instruction or reference.\n\n@item\nAPPLICABILITY AND DEFINITIONS\n\nThis License applies to any manual or other work, in any medium, that\ncontains a notice placed by the copyright holder saying it can be\ndistributed under the terms of this License.  Such a notice grants a\nworld-wide, royalty-free license, unlimited in duration, to use that\nwork under the conditions stated herein.  The ``Document'', below,\nrefers to any such manual or work.  Any member of the public is a\nlicensee, and is addressed as ``you''.  You accept the license if you\ncopy, modify or distribute the work in a way requiring permission\nunder copyright law.\n\nA ``Modified Version'' of the Document means any work containing the\nDocument or a portion of it, either copied verbatim, or with\nmodifications and/or translated into another language.\n\nA ``Secondary Section'' is a named appendix or a front-matter section\nof the Document that deals exclusively with the relationship of the\npublishers or authors of the Document to the Document's overall\nsubject (or to related matters) and contains nothing that could fall\ndirectly within that overall subject.  (Thus, if the Document is in\npart a textbook of mathematics, a Secondary Section may not explain\nany mathematics.)  The relationship could be a matter of historical\nconnection with the subject or with related matters, or of legal,\ncommercial, philosophical, ethical or political position regarding\nthem.\n\nThe ``Invariant Sections'' are certain Secondary Sections whose titles\nare designated, as being those of Invariant Sections, in the notice\nthat says that the Document is released under this License.  If a\nsection does not fit the above definition of Secondary then it is not\nallowed to be designated as Invariant.  The Document may contain zero\nInvariant Sections.  If the Document does not identify any Invariant\nSections then there are none.\n\nThe ``Cover Texts'' are certain short passages of text that are listed,\nas Front-Cover Texts or Back-Cover Texts, in the notice that says that\nthe Document is released under this License.  A Front-Cover Text may\nbe at most 5 words, and a Back-Cover Text may be at most 25 words.\n\nA ``Transparent'' copy of the Document means a machine-readable copy,\nrepresented in a format whose specification is available to the\ngeneral public, that is suitable for revising the document\nstraightforwardly with generic text editors or (for images composed of\npixels) generic paint programs or (for drawings) some widely available\ndrawing editor, and that is suitable for input to text formatters or\nfor automatic translation to a variety of formats suitable for input\nto text formatters.  A copy made in an otherwise Transparent file\nformat whose markup, or absence of markup, has been arranged to thwart\nor discourage subsequent modification by readers is not Transparent.\nAn image format is not Transparent if used for any substantial amount\nof text.  A copy that is not ``Transparent'' is called ``Opaque''.\n\nExamples of suitable formats for Transparent copies include plain\n@sc{ascii} without markup, Texinfo input format, La@TeX{} input\nformat, @acronym{SGML} or @acronym{XML} using a publicly available\n@acronym{DTD}, and standard-conforming simple @acronym{HTML},\nPostScript or @acronym{PDF} designed for human modification.  Examples\nof transparent image formats include @acronym{PNG}, @acronym{XCF} and\n@acronym{JPG}.  Opaque formats include proprietary formats that can be\nread and edited only by proprietary word processors, @acronym{SGML} or\n@acronym{XML} for which the @acronym{DTD} and/or processing tools are\nnot generally available, and the machine-generated @acronym{HTML},\nPostScript or @acronym{PDF} produced by some word processors for\noutput purposes only.\n\nThe ``Title Page'' means, for a printed book, the title page itself,\nplus such following pages as are needed to hold, legibly, the material\nthis License requires to appear in the title page.  For works in\nformats which do not have any title page as such, ``Title Page'' means\nthe text near the most prominent appearance of the work's title,\npreceding the beginning of the body of the text.\n\nThe ``publisher'' means any person or entity that distributes copies\nof the Document to the public.\n\nA section ``Entitled XYZ'' means a named subunit of the Document whose\ntitle either is precisely XYZ or contains XYZ in parentheses following\ntext that translates XYZ in another language.  (Here XYZ stands for a\nspecific section name mentioned below, such as ``Acknowledgements'',\n``Dedications'', ``Endorsements'', or ``History''.)  To ``Preserve the Title''\nof such a section when you modify the Document means that it remains a\nsection ``Entitled XYZ'' according to this definition.\n\nThe Document may include Warranty Disclaimers next to the notice which\nstates that this License applies to the Document.  These Warranty\nDisclaimers are considered to be included by reference in this\nLicense, but only as regards disclaiming warranties: any other\nimplication that these Warranty Disclaimers may have is void and has\nno effect on the meaning of this License.\n\n@item\nVERBATIM COPYING\n\nYou may copy and distribute the Document in any medium, either\ncommercially or noncommercially, provided that this License, the\ncopyright notices, and the license notice saying this License applies\nto the Document are reproduced in all copies, and that you add no other\nconditions whatsoever to those of this License.  You may not use\ntechnical measures to obstruct or control the reading or further\ncopying of the copies you make or distribute.  However, you may accept\ncompensation in exchange for copies.  If you distribute a large enough\nnumber of copies you must also follow the conditions in section 3.\n\nYou may also lend copies, under the same conditions stated above, and\nyou may publicly display copies.\n\n@item\nCOPYING IN QUANTITY\n\nIf you publish printed copies (or copies in media that commonly have\nprinted covers) of the Document, numbering more than 100, and the\nDocument's license notice requires Cover Texts, you must enclose the\ncopies in covers that carry, clearly and legibly, all these Cover\nTexts: Front-Cover Texts on the front cover, and Back-Cover Texts on\nthe back cover.  Both covers must also clearly and legibly identify\nyou as the publisher of these copies.  The front cover must present\nthe full title with all words of the title equally prominent and\nvisible.  You may add other material on the covers in addition.\nCopying with changes limited to the covers, as long as they preserve\nthe title of the Document and satisfy these conditions, can be treated\nas verbatim copying in other respects.\n\nIf the required texts for either cover are too voluminous to fit\nlegibly, you should put the first ones listed (as many as fit\nreasonably) on the actual cover, and continue the rest onto adjacent\npages.\n\nIf you publish or distribute Opaque copies of the Document numbering\nmore than 100, you must either include a machine-readable Transparent\ncopy along with each Opaque copy, or state in or with each Opaque copy\na computer-network location from which the general network-using\npublic has access to download using public-standard network protocols\na complete Transparent copy of the Document, free of added material.\nIf you use the latter option, you must take reasonably prudent steps,\nwhen you begin distribution of Opaque copies in quantity, to ensure\nthat this Transparent copy will remain thus accessible at the stated\nlocation until at least one year after the last time you distribute an\nOpaque copy (directly or through your agents or retailers) of that\nedition to the public.\n\nIt is requested, but not required, that you contact the authors of the\nDocument well before redistributing any large number of copies, to give\nthem a chance to provide you with an updated version of the Document.\n\n@item\nMODIFICATIONS\n\nYou may copy and distribute a Modified Version of the Document under\nthe conditions of sections 2 and 3 above, provided that you release\nthe Modified Version under precisely this License, with the Modified\nVersion filling the role of the Document, thus licensing distribution\nand modification of the Modified Version to whoever possesses a copy\nof it.  In addition, you must do these things in the Modified Version:\n\n@enumerate A\n@item\nUse in the Title Page (and on the covers, if any) a title distinct\nfrom that of the Document, and from those of previous versions\n(which should, if there were any, be listed in the History section\nof the Document).  You may use the same title as a previous version\nif the original publisher of that version gives permission.\n\n@item\nList on the Title Page, as authors, one or more persons or entities\nresponsible for authorship of the modifications in the Modified\nVersion, together with at least five of the principal authors of the\nDocument (all of its principal authors, if it has fewer than five),\nunless they release you from this requirement.\n\n@item\nState on the Title page the name of the publisher of the\nModified Version, as the publisher.\n\n@item\nPreserve all the copyright notices of the Document.\n\n@item\nAdd an appropriate copyright notice for your modifications\nadjacent to the other copyright notices.\n\n@item\nInclude, immediately after the copyright notices, a license notice\ngiving the public permission to use the Modified Version under the\nterms of this License, in the form shown in the Addendum below.\n\n@item\nPreserve in that license notice the full lists of Invariant Sections\nand required Cover Texts given in the Document's license notice.\n\n@item\nInclude an unaltered copy of this License.\n\n@item\nPreserve the section Entitled ``History'', Preserve its Title, and add\nto it an item stating at least the title, year, new authors, and\npublisher of the Modified Version as given on the Title Page.  If\nthere is no section Entitled ``History'' in the Document, create one\nstating the title, year, authors, and publisher of the Document as\ngiven on its Title Page, then add an item describing the Modified\nVersion as stated in the previous sentence.\n\n@item\nPreserve the network location, if any, given in the Document for\npublic access to a Transparent copy of the Document, and likewise\nthe network locations given in the Document for previous versions\nit was based on.  These may be placed in the ``History'' section.\nYou may omit a network location for a work that was published at\nleast four years before the Document itself, or if the original\npublisher of the version it refers to gives permission.\n\n@item\nFor any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve\nthe Title of the section, and preserve in the section all the\nsubstance and tone of each of the contributor acknowledgements and/or\ndedications given therein.\n\n@item\nPreserve all the Invariant Sections of the Document,\nunaltered in their text and in their titles.  Section numbers\nor the equivalent are not considered part of the section titles.\n\n@item\nDelete any section Entitled ``Endorsements''.  Such a section\nmay not be included in the Modified Version.\n\n@item\nDo not retitle any existing section to be Entitled ``Endorsements'' or\nto conflict in title with any Invariant Section.\n\n@item\nPreserve any Warranty Disclaimers.\n@end enumerate\n\nIf the Modified Version includes new front-matter sections or\nappendices that qualify as Secondary Sections and contain no material\ncopied from the Document, you may at your option designate some or all\nof these sections as invariant.  To do this, add their titles to the\nlist of Invariant Sections in the Modified Version's license notice.\nThese titles must be distinct from any other section titles.\n\nYou may add a section Entitled ``Endorsements'', provided it contains\nnothing but endorsements of your Modified Version by various\nparties---for example, statements of peer review or that the text has\nbeen approved by an organization as the authoritative definition of a\nstandard.\n\nYou may add a passage of up to five words as a Front-Cover Text, and a\npassage of up to 25 words as a Back-Cover Text, to the end of the list\nof Cover Texts in the Modified Version.  Only one passage of\nFront-Cover Text and one of Back-Cover Text may be added by (or\nthrough arrangements made by) any one entity.  If the Document already\nincludes a cover text for the same cover, previously added by you or\nby arrangement made by the same entity you are acting on behalf of,\nyou may not add another; but you may replace the old one, on explicit\npermission from the previous publisher that added the old one.\n\nThe author(s) and publisher(s) of the Document do not by this License\ngive permission to use their names for publicity for or to assert or\nimply endorsement of any Modified Version.\n\n@item\nCOMBINING DOCUMENTS\n\nYou may combine the Document with other documents released under this\nLicense, under the terms defined in section 4 above for modified\nversions, provided that you include in the combination all of the\nInvariant Sections of all of the original documents, unmodified, and\nlist them all as Invariant Sections of your combined work in its\nlicense notice, and that you preserve all their Warranty Disclaimers.\n\nThe combined work need only contain one copy of this License, and\nmultiple identical Invariant Sections may be replaced with a single\ncopy.  If there are multiple Invariant Sections with the same name but\ndifferent contents, make the title of each such section unique by\nadding at the end of it, in parentheses, the name of the original\nauthor or publisher of that section if known, or else a unique number.\nMake the same adjustment to the section titles in the list of\nInvariant Sections in the license notice of the combined work.\n\nIn the combination, you must combine any sections Entitled ``History''\nin the various original documents, forming one section Entitled\n``History''; likewise combine any sections Entitled ``Acknowledgements'',\nand any sections Entitled ``Dedications''.  You must delete all\nsections Entitled ``Endorsements.''\n\n@item\nCOLLECTIONS OF DOCUMENTS\n\nYou may make a collection consisting of the Document and other documents\nreleased under this License, and replace the individual copies of this\nLicense in the various documents with a single copy that is included in\nthe collection, provided that you follow the rules of this License for\nverbatim copying of each of the documents in all other respects.\n\nYou may extract a single document from such a collection, and distribute\nit individually under this License, provided you insert a copy of this\nLicense into the extracted document, and follow this License in all\nother respects regarding verbatim copying of that document.\n\n@item\nAGGREGATION WITH INDEPENDENT WORKS\n\nA compilation of the Document or its derivatives with other separate\nand independent documents or works, in or on a volume of a storage or\ndistribution medium, is called an ``aggregate'' if the copyright\nresulting from the compilation is not used to limit the legal rights\nof the compilation's users beyond what the individual works permit.\nWhen the Document is included in an aggregate, this License does not\napply to the other works in the aggregate which are not themselves\nderivative works of the Document.\n\nIf the Cover Text requirement of section 3 is applicable to these\ncopies of the Document, then if the Document is less than one half of\nthe entire aggregate, the Document's Cover Texts may be placed on\ncovers that bracket the Document within the aggregate, or the\nelectronic equivalent of covers if the Document is in electronic form.\nOtherwise they must appear on printed covers that bracket the whole\naggregate.\n\n@item\nTRANSLATION\n\nTranslation is considered a kind of modification, so you may\ndistribute translations of the Document under the terms of section 4.\nReplacing Invariant Sections with translations requires special\npermission from their copyright holders, but you may include\ntranslations of some or all Invariant Sections in addition to the\noriginal versions of these Invariant Sections.  You may include a\ntranslation of this License, and all the license notices in the\nDocument, and any Warranty Disclaimers, provided that you also include\nthe original English version of this License and the original versions\nof those notices and disclaimers.  In case of a disagreement between\nthe translation and the original version of this License or a notice\nor disclaimer, the original version will prevail.\n\nIf a section in the Document is Entitled ``Acknowledgements'',\n``Dedications'', or ``History'', the requirement (section 4) to Preserve\nits Title (section 1) will typically require changing the actual\ntitle.\n\n@item\nTERMINATION\n\nYou may not copy, modify, sublicense, or distribute the Document\nexcept as expressly provided under this License.  Any attempt\notherwise to copy, modify, sublicense, or distribute it is void, and\nwill automatically terminate your rights under this License.\n\nHowever, if you cease all violation of this License, then your license\nfrom a particular copyright holder is reinstated (a) provisionally,\nunless and until the copyright holder explicitly and finally\nterminates your license, and (b) permanently, if the copyright holder\nfails to notify you of the violation by some reasonable means prior to\n60 days after the cessation.\n\nMoreover, your license from a particular copyright holder is\nreinstated permanently if the copyright holder notifies you of the\nviolation by some reasonable means, this is the first time you have\nreceived notice of violation of this License (for any work) from that\ncopyright holder, and you cure the violation prior to 30 days after\nyour receipt of the notice.\n\nTermination of your rights under this section does not terminate the\nlicenses of parties who have received copies or rights from you under\nthis License.  If your rights have been terminated and not permanently\nreinstated, receipt of a copy of some or all of the same material does\nnot give you any rights to use it.\n\n@item\nFUTURE REVISIONS OF THIS LICENSE\n\nThe Free Software Foundation may publish new, revised versions\nof the GNU Free Documentation License from time to time.  Such new\nversions will be similar in spirit to the present version, but may\ndiffer in detail to address new problems or concerns.  See\n@uref{http://www.gnu.org/copyleft/}.\n\nEach version of the License is given a distinguishing version number.\nIf the Document specifies that a particular numbered version of this\nLicense ``or any later version'' applies to it, you have the option of\nfollowing the terms and conditions either of that specified version or\nof any later version that has been published (not as a draft) by the\nFree Software Foundation.  If the Document does not specify a version\nnumber of this License, you may choose any version ever published (not\nas a draft) by the Free Software Foundation.  If the Document\nspecifies that a proxy can decide which future versions of this\nLicense can be used, that proxy's public statement of acceptance of a\nversion permanently authorizes you to choose that version for the\nDocument.\n\n@item\nRELICENSING\n\n``Massive Multiauthor Collaboration Site'' (or ``MMC Site'') means any\nWorld Wide Web server that publishes copyrightable works and also\nprovides prominent facilities for anybody to edit those works.  A\npublic wiki that anybody can edit is an example of such a server.  A\n``Massive Multiauthor Collaboration'' (or ``MMC'') contained in the\nsite means any set of copyrightable works thus published on the MMC\nsite.\n\n``CC-BY-SA'' means the Creative Commons Attribution-Share Alike 3.0\nlicense published by Creative Commons Corporation, a not-for-profit\ncorporation with a principal place of business in San Francisco,\nCalifornia, as well as future copyleft versions of that license\npublished by that same organization.\n\n``Incorporate'' means to publish or republish a Document, in whole or\nin part, as part of another Document.\n\nAn MMC is ``eligible for relicensing'' if it is licensed under this\nLicense, and if all works that were first published under this License\nsomewhere other than this MMC, and subsequently incorporated in whole\nor in part into the MMC, (1) had no cover texts or invariant sections,\nand (2) were thus incorporated prior to November 1, 2008.\n\nThe operator of an MMC Site may republish an MMC contained in the site\nunder CC-BY-SA on the same site at any time before August 1, 2009,\nprovided the MMC is eligible for relicensing.\n\n@end enumerate\n\n@page\n@heading ADDENDUM: How to use this License for your documents\n\nTo use this License in a document you have written, include a copy of\nthe License in the document and put the following copyright and\nlicense notices just after the title page:\n\n@smallexample\n@group\n  Copyright (C)  @var{year}  @var{your name}.\n  Permission is granted to copy, distribute and/or modify this document\n  under the terms of the GNU Free Documentation License, Version 1.3\n  or any later version published by the Free Software Foundation;\n  with no Invariant Sections, no Front-Cover Texts, and no Back-Cover\n  Texts.  A copy of the license is included in the section entitled ``GNU\n  Free Documentation License''.\n@end group\n@end smallexample\n\nIf you have Invariant Sections, Front-Cover Texts and Back-Cover Texts,\nreplace the ``with@dots{}Texts.'' line with this:\n\n@smallexample\n@group\n    with the Invariant Sections being @var{list their titles}, with\n    the Front-Cover Texts being @var{list}, and with the Back-Cover Texts\n    being @var{list}.\n@end group\n@end smallexample\n\nIf you have Invariant Sections without Cover Texts, or some other\ncombination of the three, merge those two alternatives to suit the\nsituation.\n\nIf your document contains nontrivial examples of program code, we\nrecommend releasing these examples in parallel under your choice of\nfree software license, such as the GNU General Public License,\nto permit their use in free software.\n\n@c Local Variables:\n@c ispell-local-pdict: \"ispell-dict\"\n@c End:\n\n"
  },
  {
    "path": "doc/isa_abi_headache",
    "content": "Copyright 2000 Free Software Foundation, Inc.\nCopyright 2008 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n\n\n\nTerms Used In This Document:\n  ISA = Instruction Set Architecture.   The instructions the current\n        processor provides.\n  ABI = Application Binary Interface.  Specifies calling convention,\n        type sizes, etc.\n  AR64 = Arithmetic operations are 64-bit using 64-bit instructions\n\t (E.g., addition, subtraction, load, store, of 64-bit integer types\n\t are done with single instructions, not 32 bits at a time.)\n  Environment = The operating system and compiler.\n\nMPIR is a very complex package to build since its speed is very\nsensitive to the ISA and ABI.  For example, if the ISA provides 64-bit\ninstructions, it is crucial that MPIR is configured to use them.\n\nMost environments that run on a 64-bit ISA provide more than one ABI.\nTypically one of the supported ABI's is a backward compatible 32-bit\nABI, and one ABI provides 64-bit addressing and `long' (sometimes\nknown as LP64).  But a few environments (IRIX, HP-UX) provide\nintermediate ABI's using 32-bit addressing but allow efficient 64-bit\noperations through a `long long' type.  For the latter to be useful to\nMPIR, the ABI must allow operations using the native 64-bit\ninstructions provided by the ISA, and allow passing of 64-bit\nquantities atomically.\n\nThe ABI is typically chosen by means of command line options to the\ncompiler tools (gcc, cc, c89, nm, ar, ld, as).  Different environments\nuse different defaults, but as of this writing (May 2000) the\ndominating default is to the plain 32-bit ABI in its most arcane form.\n\nThe GMP 3.0.x approach was to compile using the ABI that gives the\nbest performance.  That places the burden on users to pass special\noptions to the compiler when they compile their MPIR applications.\nThat approach has its advantages and disadvantages.  The main\nadvantage is that users don't unknowingly get bad MPIR performance.\nThe main disadvantage is that users' compiles (actually links) will\nfail unless they pass special compiler options.\n\n** SPARC\n\nSystem vendors often confuse ABI, ISA, and implementation.  In the \ncase of Solaris, the unbundled compiler confuses ISA and ABI, and\nthe options have very confusing names.\n\n     option\t\tinterpretation\n     ======\t\t==============\ncc   -xarch=v8plus\tISA=sparcv9, ABI=V8plus (PTR=32, see below)\ngcc  -mv8plus\t\tISA=sparcv9, ABI=V8plus (see below)\ncc   -xarch=v9\t\tISA=sparcv9, ABI=V9 (implying AR=64, PTR=64)\n\nIt's hard to believe, but the option v8plus really means ISA=V9!\n\nSolaris releases prior to version 7 running on a V9 CPU fails to\nsave/restore the upper 32 bits of the `i' and `l' registers.  The\n`v8plus' option generates code that uses as many V9 features as\npossible under such circumstances.\n\n** MIPS\n\nThe IRIX 6 compilers gets things right.  They have a clear\nunderstanding of the differences between ABI and ISA.  The option\nnames are descriptive.\n\n     option\t\tinterpretation\n     ======\t\t==============\ncc   -n32\t\tABI=n32 (implying AR=64, PTR=32)\ngcc  -mabi=n32\t\tABI=n32 (implying AR=64, PTR=32)\ncc   -64\t\tABI=64 (implying AR=64, PTR=64)\ngcc  -mabi=64\t\tABI=64 (implying AR=64, PTR=64)\ncc   -mips3\t\tISA=mips3\ngcc  -mips3\t\tISA=mips3\ncc   -mips4\t\tISA=mips4\ngcc  -mips4\t\tISA=mips4\n\n** HP-PA\n\nHP-UX is somewhat weird, but easier to deal with than Solaris.\n\n     option\t\tinterpretation\n     ======\t\t==============\ncc   +DA2.0\t\tABI=32bit (implying AR=64, PTR=32)\ncc   +DD64\t\tABI=64bit (implying AR=64, PTR=64)\n\nCode performing 64-bit arithmetic in the HP-UX 32-bit is not\ncompatible with the 64-bit ABI; the former has a calling convention\nthat passes/returns 64-bit integer quantities as two 32-bit chunks.\n\n** PowerPC\n\nWhile the PowerPC ABI's are capable of supporting 64-bit\nregisters/operations, the compilers under AIX are similar to Solaris'\ncc in that they don't currently provide any 32-bit addressing with\n64-bit arithmetic.\n\n     option\t\t\tinterpretation\n     ======\t\t\t==============\ncc   -q64\t\t\tABI=64bit (implying AR=64, PTR=64)\ngcc  -maix64 -mpowerpc64\tABI=64bit (implying AR=64, PTR=64)\n"
  },
  {
    "path": "doc/mdate-sh",
    "content": "#!/bin/sh\n# Get modification time of a file or directory and pretty-print it.\n\nscriptversion=2015-04-09.19; # UTC\n\n# Copyright (C) 1995-2014 Free Software Foundation, Inc.\n# written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, June 1995\n#\n# This program is free software; you can redistribute it and/or modify\n# it under the terms of the GNU General Public License as published by\n# the Free Software Foundation; either version 2, or (at your option)\n# any later version.\n#\n# This program is distributed in the hope that it will be useful,\n# but WITHOUT ANY WARRANTY; without even the implied warranty of\n# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n# GNU General Public License for more details.\n#\n# You should have received a copy of the GNU General Public License\n# along with this program.  If not, see <http://www.gnu.org/licenses/>.\n\n# As a special exception to the GNU General Public License, if you\n# distribute this file as part of a program that contains a\n# configuration script generated by Autoconf, you may include it under\n# the same distribution terms that you use for the rest of that program.\n\n# This file is maintained in Automake, please report\n# bugs to <bug-automake@gnu.org> or send patches to\n# <automake-patches@gnu.org>.\n\nif test -n \"${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then\n  emulate sh\n  NULLCMD=:\n  # Pre-4.2 versions of Zsh do word splitting on ${1+\"$@\"}, which\n  # is contrary to our usage.  Disable this feature.\n  alias -g '${1+\"$@\"}'='\"$@\"'\n  setopt NO_GLOB_SUBST\nfi\n\ncase $1 in\n  '')\n     echo \"$0: No file.  Try '$0 --help' for more information.\" 1>&2\n     exit 1;\n     ;;\n  -h | --h*)\n    cat <<\\EOF\nUsage: mdate-sh [--help] [--version] FILE\n\nPretty-print the modification day of FILE, in the format:\n1 January 1970\n\nReport bugs to <bug-automake@gnu.org>.\nEOF\n    exit $?\n    ;;\n  -v | --v*)\n    echo \"mdate-sh $scriptversion\"\n    exit $?\n    ;;\nesac\n\nerror ()\n{\n  echo \"$0: $1\" >&2\n  exit 1\n}\n\n\n# Prevent date giving response in another language.\nLANG=C\nexport LANG\nLC_ALL=C\nexport LC_ALL\nLC_TIME=C\nexport LC_TIME\n\n# Use UTC to get reproducible result\nTZ=UTC\nexport TZ\n\n# GNU ls changes its time format in response to the TIME_STYLE\n# variable.  Since we cannot assume 'unset' works, revert this\n# variable to its documented default.\nif test \"${TIME_STYLE+set}\" = set; then\n  TIME_STYLE=posix-long-iso\n  export TIME_STYLE\nfi\n\nsave_arg1=$1\n\n# Find out how to get the extended ls output of a file or directory.\nif ls -L /dev/null 1>/dev/null 2>&1; then\n  ls_command='ls -L -l -d'\nelse\n  ls_command='ls -l -d'\nfi\n# Avoid user/group names that might have spaces, when possible.\nif ls -n /dev/null 1>/dev/null 2>&1; then\n  ls_command=\"$ls_command -n\"\nfi\n\n# A 'ls -l' line looks as follows on OS/2.\n#  drwxrwx---        0 Aug 11  2001 foo\n# This differs from Unix, which adds ownership information.\n#  drwxrwx---   2 root  root      4096 Aug 11  2001 foo\n#\n# To find the date, we split the line on spaces and iterate on words\n# until we find a month.  This cannot work with files whose owner is a\n# user named \"Jan\", or \"Feb\", etc.  However, it's unlikely that '/'\n# will be owned by a user whose name is a month.  So we first look at\n# the extended ls output of the root directory to decide how many\n# words should be skipped to get the date.\n\n# On HPUX /bin/sh, \"set\" interprets \"-rw-r--r--\" as options, so the \"x\" below.\nset x`$ls_command /`\n\n# Find which argument is the month.\nmonth=\ncommand=\nuntil test $month\ndo\n  test $# -gt 0 || error \"failed parsing '$ls_command /' output\"\n  shift\n  # Add another shift to the command.\n  command=\"$command shift;\"\n  case $1 in\n    Jan) month=January; nummonth=1;;\n    Feb) month=February; nummonth=2;;\n    Mar) month=March; nummonth=3;;\n    Apr) month=April; nummonth=4;;\n    May) month=May; nummonth=5;;\n    Jun) month=June; nummonth=6;;\n    Jul) month=July; nummonth=7;;\n    Aug) month=August; nummonth=8;;\n    Sep) month=September; nummonth=9;;\n    Oct) month=October; nummonth=10;;\n    Nov) month=November; nummonth=11;;\n    Dec) month=December; nummonth=12;;\n  esac\ndone\n\ntest -n \"$month\" || error \"failed parsing '$ls_command /' output\"\n\n# Get the extended ls output of the file or directory.\nset dummy x`eval \"$ls_command \\\"\\\\\\$save_arg1\\\"\"`\n\n# Remove all preceding arguments\neval $command\n\n# Because of the dummy argument above, month is in $2.\n#\n# On a POSIX system, we should have\n#\n# $# = 5\n# $1 = file size\n# $2 = month\n# $3 = day\n# $4 = year or time\n# $5 = filename\n#\n# On Darwin 7.7.0 and 7.6.0, we have\n#\n# $# = 4\n# $1 = day\n# $2 = month\n# $3 = year or time\n# $4 = filename\n\n# Get the month.\ncase $2 in\n  Jan) month=January; nummonth=1;;\n  Feb) month=February; nummonth=2;;\n  Mar) month=March; nummonth=3;;\n  Apr) month=April; nummonth=4;;\n  May) month=May; nummonth=5;;\n  Jun) month=June; nummonth=6;;\n  Jul) month=July; nummonth=7;;\n  Aug) month=August; nummonth=8;;\n  Sep) month=September; nummonth=9;;\n  Oct) month=October; nummonth=10;;\n  Nov) month=November; nummonth=11;;\n  Dec) month=December; nummonth=12;;\nesac\n\ncase $3 in\n  ???*) day=$1;;\n  *) day=$3; shift;;\nesac\n\n# Here we have to deal with the problem that the ls output gives either\n# the time of day or the year.\ncase $3 in\n  *:*) set `date`; eval year=\\$$#\n       case $2 in\n\t Jan) nummonthtod=1;;\n\t Feb) nummonthtod=2;;\n\t Mar) nummonthtod=3;;\n\t Apr) nummonthtod=4;;\n\t May) nummonthtod=5;;\n\t Jun) nummonthtod=6;;\n\t Jul) nummonthtod=7;;\n\t Aug) nummonthtod=8;;\n\t Sep) nummonthtod=9;;\n\t Oct) nummonthtod=10;;\n\t Nov) nummonthtod=11;;\n\t Dec) nummonthtod=12;;\n       esac\n       # For the first six month of the year the time notation can also\n       # be used for files modified in the last year.\n       if (expr $nummonth \\> $nummonthtod) > /dev/null;\n       then\n\t year=`expr $year - 1`\n       fi;;\n  *) year=$3;;\nesac\n\n# The result.\necho $day $month $year\n\n# Local Variables:\n# mode: shell-script\n# sh-indentation: 2\n# eval: (add-hook 'write-file-hooks 'time-stamp)\n# time-stamp-start: \"scriptversion=\"\n# time-stamp-format: \"%:y-%02m-%02d.%02H\"\n# time-stamp-time-zone: \"UTC\"\n# time-stamp-end: \"; # UTC\"\n# End:\n"
  },
  {
    "path": "doc/mpir.texi",
    "content": "\\input texinfo    @c -*-texinfo-*-\n@c %**start of header\n@setfilename mpir.info\n@include version.texi\n@settitle MPIR @value{VERSION}\n@synindex tp fn\n@iftex\n@afourpaper\n@end iftex\n@comment %**end of header\n\n@copying\nThis manual describes how to install and use MPIR, the Multiple Precision Integers and Rationals\nlibrary, version @value{VERSION}.\n\nCopyright 1991, 1993-2016 Free Software Foundation, Inc.\n\nCopyright 2008, 2009, 2010 William Hart\n\nPermission is granted to copy, distribute and/or modify this document under\nthe terms of the GNU Free Documentation License, Version 1.3 or any later\nversion published by the Free Software Foundation; with no Invariant Sections,\nwith the Front-Cover Texts being ``A GNU Manual'', and with the Back-Cover\nTexts being ``You have freedom to copy and modify this GNU Manual, like GNU\nsoftware''.  A copy of the license is included in\n@ref{GNU Free Documentation License}.\n@end copying\n@c  Note the @ref above must be on one line, a line break in an @ref within\n@c  @copying will bomb in recent texinfo.tex (eg. 2004-04-07.08 which comes\n@c  with texinfo 4.7), with messages about missing @endcsname.\n\n\n@c  Texinfo version 4.2 or up will be needed to process this file.\n@c\n@c  The version number and edition number are taken from version.texi provided\n@c  by automake (note that it's regenerated only if you configure with\n@c  --enable-maintainer-mode).\n@c\n@c  Notes discussing the present version number of GMP/MPIR in relation to previous\n@c  ones (for instance in the \"Compatibility\" section) must be updated\n@c  manually though.\n@c\n@c  @cindex entries have been made for function categories and programming\n@c  topics.  The \"mpn\" section is not included in this, because a beginner\n@c  looking for \"GCD\" or something is only going to be confused by pointers to\n@c  low level routines.\n@c\n@c  @cindex entries are present for processors and systems when there's\n@c  particular notes concerning them, but not just for everything MPIR\n@c  supports.\n@c\n@c  Index entries for files use @code rather than @file, @samp or @option,\n@c  since the latter come out with quotes in TeX, which are nice in the text\n@c  but don't look so good in index columns.\n@c\n@c  Tex:\n@c\n@c  A suitable texinfo.tex is supplied, a newer one should work equally well.\n@c\n@c  HTML:\n@c\n@c  Nothing special is done for links to external manuals, they just come out\n@c  in the usual makeinfo style, eg. \"../libc/Locales.html\".  If you have\n@c  local copies of such manuals then this is a good thing, if not then you\n@c  may want to search-and-replace to some online source.\n@c\n\n@dircategory GNU libraries\n@direntry\n* mpir: (mpir).                   MPIR Multiple Precision Integers and Rationals Library.\n@end direntry\n\n@c  html <meta name=\"description\" content=\"...\">\n@documentdescription\nHow to install and use the MPIR multiple precision arithmetic library, version @value{VERSION}.\n@end documentdescription\n\n@c smallbook\n@finalout\n@setchapternewpage on\n\n@ifnottex\n@node Top, Copying, (dir), (dir)\n@top MPIR\n@end ifnottex\n\n@iftex\n@titlepage\n@title MPIR\n@subtitle The Multiple Precision Integers and Rationals Library\n@subtitle Edition @value{EDITION}\n@subtitle @value{UPDATED}\n\n@author Original Authors: Torbjorn Granlund and the GMP Development Team \n@author Subsequent modifications: William Hart and the MPIR Team\n@c @email{goodwillhart@gmail.com}\n\n@c Include the Distribution inside the titlepage so\n@c that headings are turned off.\n\n@tex\n\\global\\parindent=0pt\n\\global\\parskip=8pt\n\\global\\baselineskip=13pt\n@end tex\n\n@page\n@vskip 0pt plus 1filll\n@end iftex\n\n@insertcopying\n@ifnottex\n@sp 1\n@end ifnottex\n\n@iftex\n@end titlepage\n@headings double\n@end iftex\n\n@c  Don't bother with contents for html, the menus seem adequate.\n@ifnothtml\n@contents\n@end ifnothtml\n\n@menu\n* Copying::                    MPIR Copying Conditions (LGPL).\n* Introduction to MPIR::       Brief introduction to MPIR.\n* Installing MPIR::            How to configure and compile the MPIR library.\n* MPIR Basics::                What every MPIR user should know.\n* Reporting Bugs::             How to usefully report bugs.\n* Integer Functions::          Functions for arithmetic on signed integers.\n* Rational Number Functions::  Functions for arithmetic on rational numbers.\n* Floating-point Functions::   Functions for arithmetic on floats.\n* Low-level Functions::        Fast functions for natural numbers.\n* Random Number Functions::    Functions for generating random numbers.\n* Formatted Output::           @code{printf} style output.\n* Formatted Input::            @code{scanf} style input.\n* C++ Class Interface::        Class wrappers around MPIR types.\n* .Net Interface::             Managed .Net wrappers for MPIR types.\n* Custom Allocation::          How to customize the internal allocation.\n* Language Bindings::          Using MPIR from other languages.\n* Algorithms::                 What happens behind the scenes.\n* Internals::                  How values are represented behind the scenes.\n\n* Contributors::\t       Who brings you this library?\n* References::                 Some useful papers and books to read.\n* GNU Free Documentation License::\n* Concept Index::\n* Function Index::\n@end menu\n\n\n@c  @m{T,N} is $T$ in tex or @math{N} otherwise.  This is an easy way to give\n@c  different forms for math in tex and info.  Commas in N or T don't work,\n@c  but @C{} can be used instead.  \\, works in info but not in tex.\n@iftex\n@macro m {T,N}\n@tex$\\T\\$@end tex\n@end macro\n@end iftex\n@ifnottex\n@macro m {T,N}\n@math{\\N\\}\n@end macro\n@end ifnottex\n\n@macro C {}\n,\n@end macro\n\n@c  @ms{V,N} is $V_N$ in tex or just vn otherwise.  This suits simple\n@c  subscripts like @ms{x,0}.\n@iftex\n@macro ms {V,N}\n@tex$\\V\\_{\\N\\}$@end tex\n@end macro\n@end iftex\n@ifnottex\n@macro ms {V,N}\n\\V\\\\N\\\n@end macro\n@end ifnottex\n\n@c  @nicode{S} is plain S in info, or @code{S} elsewhere.  This can be used\n@c  when the quotes that @code{} gives in info aren't wanted, but the\n@c  fontification in tex or html is wanted.  Doesn't work as @nicode{'\\\\0'}\n@c  though (gives two backslashes in tex).\n@ifinfo\n@macro nicode {S}\n\\S\\\n@end macro\n@end ifinfo\n@ifnotinfo\n@macro nicode {S}\n@code{\\S\\}\n@end macro\n@end ifnotinfo\n\n@c  @nisamp{S} is plain S in info, or @samp{S} elsewhere.  This can be used\n@c  when the quotes that @samp{} gives in info aren't wanted, but the\n@c  fontification in tex or html is wanted.\n@ifinfo\n@macro nisamp {S}\n\\S\\\n@end macro\n@end ifinfo\n@ifnotinfo\n@macro nisamp {S}\n@samp{\\S\\}\n@end macro\n@end ifnotinfo\n\n@c  Usage: @GMPtimes{}\n@c  Give either \\times or the word \"times\".\n@tex\n\\gdef\\GMPtimes{\\times}\n@end tex\n@ifnottex\n@macro GMPtimes\ntimes\n@end macro\n@end ifnottex\n\n@c  Usage: @GMPmultiply{}\n@c  Give * in info, or nothing in tex.\n@tex\n\\gdef\\GMPmultiply{}\n@end tex\n@ifnottex\n@macro GMPmultiply\n*\n@end macro\n@end ifnottex\n\n@c  Usage: @GMPabs{x}\n@c  Give either |x| in tex, or abs(x) in info or html.\n@tex\n\\gdef\\GMPabs#1{|#1|}\n@end tex\n@ifnottex\n@macro GMPabs {X}\n@abs{}(\\X\\)\n@end macro\n@end ifnottex\n\n@c  Usage: @GMPfloor{x}\n@c  Give either \\lfloor x\\rfloor in tex, or floor(x) in info or html.\n@tex\n\\gdef\\GMPfloor#1{\\lfloor #1\\rfloor}\n@end tex\n@ifnottex\n@macro GMPfloor {X}\nfloor(\\X\\)\n@end macro\n@end ifnottex\n\n@c  Usage: @GMPceil{x}\n@c  Give either \\lceil x\\rceil in tex, or ceil(x) in info or html.\n@tex\n\\gdef\\GMPceil#1{\\lceil #1 \\rceil}\n@end tex\n@ifnottex\n@macro GMPceil {X}\nceil(\\X\\)\n@end macro\n@end ifnottex\n\n@c  Math operators already available in tex, made available in info too.\n@c  For example @bmod{} can be used in both tex and info.\n@ifnottex\n@macro bmod\nmod\n@end macro\n@macro gcd\ngcd\n@end macro\n@macro ge\n>=\n@end macro\n@macro le\n<=\n@end macro\n@macro log\nlog\n@end macro\n@macro min\nmin\n@end macro\n@macro leftarrow\n<-\n@end macro\n@macro rightarrow\n->\n@end macro\n@end ifnottex\n\n@c  New math operators.\n@c  @abs{} can be used in both tex and info, or just \\abs in tex.\n@tex\n\\gdef\\abs{\\mathop{\\rm abs}}\n@end tex\n@ifnottex\n@macro abs\nabs\n@end macro\n@end ifnottex\n\n@c  @cross{} is a \\times symbol in tex, or an \"x\" in info.  In tex it works\n@c  inside or outside $ $.\n@tex\n\\gdef\\cross{\\ifmmode\\times\\else$\\times$\\fi}\n@end tex\n@ifnottex\n@macro cross\nx\n@end macro\n@end ifnottex\n\n@c  @times{} made available as a \"*\" in info and html (already works in tex).\n@ifnottex\n@macro times\n*\n@end macro\n@end ifnottex\n\n@c  Usage: @W{text}\n@c  Like @w{} but working in math mode too.\n@tex\n\\gdef\\W#1{\\ifmmode{#1}\\else\\w{#1}\\fi}\n@end tex\n@ifnottex\n@macro W {S}\n@w{\\S\\}\n@end macro\n@end ifnottex\n\n@c  Usage: \\GMPdisplay{text}\n@c  Put the given text in an @display style indent, but without turning off\n@c  paragraph reflow etc.\n@tex\n\\gdef\\GMPdisplay#1{%\n\\noindent\n\\advance\\leftskip by \\lispnarrowing\n#1\\par}\n@end tex\n\n@c  Usage: \\GMPhat\n@c  A new \\hat that will work in math mode, unlike the texinfo redefined\n@c  version.\n@tex\n\\gdef\\GMPhat{\\mathaccent\"705E}\n@end tex\n\n@c  Usage: \\GMPraise{text}\n@c  For use in a $ $ math expression as an alternative to \"^\".  This is good\n@c  for @code{} in an exponent, since there seems to be no superscript font\n@c  for that.\n@tex\n\\gdef\\GMPraise#1{\\mskip0.5\\thinmuskip\\hbox{\\raise0.8ex\\hbox{#1}}}\n@end tex\n\n@c  Usage: @texlinebreak{}\n@c  A line break as per @*, but only in tex.\n@iftex\n@macro texlinebreak\n@*\n@end macro\n@end iftex\n@ifnottex\n@macro texlinebreak\n@end macro\n@end ifnottex\n\n@c  Usage: @maybepagebreak\n@c  Allow tex to insert a page break, if it feels the urge.\n@c  Normally blocks of @deftypefun/funx are kept together, which can lead to\n@c  some poor page break positioning if it's a big block, like the sets of\n@c  division functions etc.\n@tex\n\\gdef\\maybepagebreak{\\penalty0}\n@end tex\n@ifnottex\n@macro maybepagebreak\n@end macro\n@end ifnottex\n\n@c  Usage: @GMPreftop{info,title}\n@c  Usage: @GMPpxreftop{info,title}\n@c\n@c  Like @ref{} and @pxref{}, but designed for a reference to the top of a\n@c  document, not a particular section.  The TeX output for plain @ref insists\n@c  on printing a particular section, GMPreftop gives just the title.\n@c\n@c  The texinfo manual recommends putting a likely section name in references\n@c  like this, eg. \"Introduction\", but it seems better to just give the title.\n@c\n@iftex\n@macro GMPreftop{info,title}\n@i{\\title\\}\n@end macro\n@macro GMPpxreftop{info,title}\nsee @i{\\title\\}\n@end macro\n@end iftex\n@c\n@ifnottex\n@macro GMPreftop{info,title}\n@ref{Top,\\title\\,\\title\\,\\info\\,\\title\\}\n@end macro\n@macro GMPpxreftop{info,title}\n@pxref{Top,\\title\\,\\title\\,\\info\\,\\title\\}\n@end macro\n@end ifnottex\n\n\n@node Copying, Introduction to MPIR, Top, Top\n@comment  node-name, next, previous,  up\n@unnumbered MPIR Copying Conditions\n@cindex Copying conditions\n@cindex Conditions for copying MPIR\n@cindex License conditions\n\nThis library is @dfn{free}; this means that everyone is free to use it and\nfree to redistribute it on a free basis.  The library is not in the public\ndomain; it is copyrighted and there are restrictions on its distribution, but\nthese restrictions are designed to permit everything that a good cooperating\ncitizen would want to do.  What is not allowed is to try to prevent others\nfrom further sharing any version of this library that they might get from\nyou.@refill\n\nSpecifically, we want to make sure that you have the right to give away copies\nof the library, that you receive source code or else can get it if you want\nit, that you can change this library or use pieces of it in new free programs,\nand that you know you can do these things.@refill\n\nTo make sure that everyone has such rights, we have to forbid you to deprive\nanyone else of these rights.  For example, if you distribute copies of the MPIR\nlibrary, you must give the recipients all the rights that you have.  You\nmust make sure that they, too, receive or can get the source code.  And you\nmust tell them their rights.@refill\n\nAlso, for our own protection, we must make certain that everyone finds out\nthat there is no warranty for the MPIR library.  If it is modified by\nsomeone else and passed on, we want their recipients to know that what they\nhave is not what we distributed, so that any problems introduced by others\nwill not reflect on our reputation.@refill\n\nThe precise conditions of the license for the MPIR library are found in the\nLesser General Public License version 3 that accompanies the source code,\nsee @file{COPYING.LIB}.\n\n\n@node Introduction to MPIR, Installing MPIR, Copying, Top\n@comment  node-name,  next,  previous,  up\n@chapter Introduction to MPIR\n@cindex Introduction\n\nMPIR is a portable library written in C for arbitrary precision arithmetic\non integers, rational numbers, and floating-point numbers.  It aims to provide\nthe fastest possible arithmetic for all applications that need higher\nprecision than is directly supported by the basic C types.\n\nMany applications use just a few hundred bits of precision; but some\napplications may need thousands or even millions of bits.  MPIR is designed to\ngive good performance for both, by choosing algorithms based on the sizes of\nthe operands, and by carefully keeping the overhead at a minimum.\n\nThe speed of MPIR is achieved by using fullwords as the basic arithmetic type,\nby using sophisticated algorithms, by including carefully optimized assembly\ncode for the most common inner loops for many different CPUs, and by a general\nemphasis on speed (as opposed to simplicity or elegance).\n\nThere is assembly code for these CPUs:\n@cindex CPU types\nARM,\nDEC Alpha 21064, 21164, and 21264,\nAMD K6, K6-2, Athlon, K8 and K10,\nIntel Pentium, Pentium Pro/II/III, Pentium 4, generic x86,\nIntel IA-64, Core 2, i7, Atom,\nMotorola/IBM PowerPC 32 and 64,\nMIPS R3000, R4000,\nSPARCv7, SuperSPARC, generic SPARCv8, UltraSPARC,\n\n@cindex Home page\n@cindex Web page\n@noindent\nFor up-to-date information on, and latest version of, MPIR, please see the MPIR web pages at\n\n@display\n@uref{http://www.mpir.org/}\n@end display\n\n@cindex Mailing lists\nThere are a number of public mailing lists of interest.  The development list is\n\n@display\n@uref{http://groups.google.com/group/mpir-devel/}.\n@end display\n\nThe proper place for bug reports is @uref{http://groups.google.com/group/mpir-devel}.  See\n@ref{Reporting Bugs} for information about reporting bugs.\n\n@sp 1\n@section How to use this Manual\n@cindex About this manual\n\nEveryone should read @ref{MPIR Basics}.  If you need to install the library\nyourself, then read @ref{Installing MPIR}.  If you have a system with multiple\nABIs, then read @ref{ABI and ISA}, for the compiler options that must be used\non applications.\nIn addition to usual compilation tools, MPIR depends on Yasm to be built.\nIf yasm is not available on your system, you can download its sources at\n\n@display\n@uref{http://yasm.tortall.net/Download.html}\n@end display\n\nbuild your own version and make it available to MPIR configuration system\nby passing its path to @file{configure} through the @samp{--with-yasm} option.\nSee @ref{Build Options} for further details.\n\nThe rest of the manual can be used for later reference, although it is\nprobably a good idea to glance through it.\n\n\n@node Installing MPIR, MPIR Basics, Introduction to MPIR, Top\n@comment  node-name,  next,  previous,  up\n@chapter Installing MPIR\n@cindex Installing MPIR\n@cindex Configuring MPIR\n@cindex Building MPIR\n\n\nMPIR has an autoconf/automake/libtool based configuration system.  On a\nUnix-like system a basic build can be done with\n\n@example\n./configure\nmake\n@end example\n\n@noindent\nSome self-tests can be run with\n\n@example\nmake check\n@end example\n\n@noindent\nAnd you can install (under @file{/usr/local} by default) with\n\n@example\nmake install\n@end example\n\nImportant note: by default MPIR produces libraries named libmpir, etc., and the header file \n@file{mpir.h}. If you wish to have MPIR to build a library named libgmp as well, etc., and a \n@file{gmp.h} header file, so that you can use mpir with programs designed to only work with GMP, \nthen use the @samp{--enable-gmpcompat} option when invoking configure:\n\n@example\n./configure --enable-gmpcompat\n@end example\n\nNote @file{gmp.h} is only created upon running make install. \n\nMPIR is compatible with GMP when the @samp{--enable-gmpcompat} option is used, except that the GMP secure cryptographic functions are not available. \n\nSome deprecated GMP functionality may be unavailable if this option is not selected. \n\nIf you experience problems, please report them to \n\n@uref{http://groups.google.com/group/mpir-devel}.\n\nSee @ref{Reporting Bugs}, for information on what to include in useful bug\nreports.\n\n@menu\n* Build Options::\n* ABI and ISA::\n* Notes for Package Builds::\n* Building with Microsoft Visual Studio::\n* Notes for Particular Systems::\n* Known Build Problems::\n* Performance optimization::\n@end menu\n\n\n@node Build Options, ABI and ISA, Installing MPIR, Installing MPIR\n@section Build Options\n@cindex Build options\n\nAll the usual autoconf configure options are available, run @samp{./configure\n--help} for a summary.  The file @file{INSTALL.autoconf} has some generic\ninstallation information too.\n\n@table @asis\n@item Tools\n@cindex Non-Unix systems\n@samp{configure} requires various Unix-like tools.  See @ref{Notes for\nParticular Systems}, for some options on non-Unix systems.\n\nIt might be possible to build without the help of @samp{configure}, certainly\nall the code is there, but unfortunately you'll be on your own.\n\n@item Build Directory\n@cindex Build directory\nTo compile in a separate build directory, @command{cd} to that directory, and\nprefix the configure command with the path to the MPIR source directory.  For\nexample\n\n@example\ncd /my/build/dir\n/my/sources/mpir-@value{VERSION}/configure\n@end example\n\nNot all @samp{make} programs have the necessary features (@code{VPATH}) to\nsupport this.  In particular, SunOS and Solaris @command{make} have bugs that\nmake them unable to build in a separate directory.  Use GNU @command{make}\ninstead.\n\n@item @option{--prefix} and @option{--exec-prefix}\n@cindex Prefix\n@cindex Exec prefix\n@cindex Install prefix\n@cindex @code{--prefix}\n@cindex @code{--exec-prefix}\nThe @option{--prefix} option can be used in the normal way to direct MPIR to\ninstall under a particular tree.  The default is @samp{/usr/local}.\n\n@option{--exec-prefix} can be used to direct architecture-dependent files like\n@file{libmpir.a} to a different location.  This can be used to share\narchitecture-independent parts like the documentation, but separate the\ndependent parts.  Note however that @file{mpir.h} and @file{mp.h} are\narchitecture-dependent since they encode certain aspects of @file{libmpir}, so\nit will be necessary to ensure both @file{$prefix/include} and\n@file{$exec_prefix/include} are available to the compiler.\n\n@item @option{--enable-gmpcompat}\n@cindex @code{--enable-gmpcompat}\nBy default make builds libmpir library files (and libmpirxx if C++ headers are requested) and the @file{mpir.h} header file. This option allows you to specify that you want additional libraries created called libgmp (and libgmpxx), etc., for libraries and gmp.h for compatibility with GMP (except for GMP secure cryptograhic functions, which are not available in MPIR).\n\n@item @option{--disable-shared}, @option{--disable-static}\n@cindex @code{--disable-shared}\n@cindex @code{--disable-static}\nBy default both shared and static libraries are built (where possible), but\none or other can be disabled.  Shared libraries result in smaller executables\nand permit code sharing between separate running processes, but on some CPUs\nare slightly slower, having a small cost on each function call.\n\n@item Native Compilation, @option{--build=CPU-VENDOR-OS}\n@cindex Native compilation\n@cindex Build system\n@cindex @code{--build}\nFor normal native compilation, the system can be specified with\n@samp{--build}.  By default @samp{./configure} uses the output from running\n@samp{./config.guess}.  On some systems @samp{./config.guess} can determine\nthe exact CPU type, on others it will be necessary to give it explicitly.  For\nexample,\n\n@example\n./configure --build=ultrasparc-sun-solaris2.7\n@end example\n\nIn all cases the @samp{OS} part is important, since it controls how libtool\ngenerates shared libraries.  Running @samp{./config.guess} is the simplest way\nto see what it should be, if you don't know already.\n\n@item Cross Compilation, @option{--host=CPU-VENDOR-OS}\n@cindex Cross compiling\n@cindex Host system\n@cindex @code{--host}\nWhen cross-compiling, the system used for compiling is given by @samp{--build}\nand the system where the library will run is given by @samp{--host}.  For\nexample when using a FreeBSD Athlon system to build GNU/Linux m68k binaries,\n\n@example\n./configure --build=athlon-pc-freebsd3.5 --host=m68k-mac-linux-gnu\n@end example\n\nCompiler tools are sought first with the host system type as a prefix.  For\nexample @command{m68k-mac-linux-gnu-ranlib} is tried, then plain\n@command{ranlib}.  This makes it possible for a set of cross-compiling tools\nto co-exist with native tools.  The prefix is the argument to @samp{--host},\nand this can be an alias, such as @samp{m68k-linux}.  But note that tools\ndon't have to be setup this way, it's enough to just have a @env{PATH} with a\nsuitable cross-compiling @command{cc} etc.\n\nCompiling for a different CPU in the same family as the build system is a form\nof cross-compilation, though very possibly this would merely be special\noptions on a native compiler.  In any case @samp{./configure} avoids depending\non being able to run code on the build system, which is important when\ncreating binaries for a newer CPU since they very possibly won't run on the\nbuild system.\n\nIn all cases the compiler must be able to produce an executable (of whatever\nformat) from a standard C @code{main}.  Although only object files will go to\nmake up @file{libmpir}, @samp{./configure} uses linking tests for various\npurposes, such as determining what functions are available on the host system.\n\nCurrently a warning is given unless an explicit @samp{--build} is used when\ncross-compiling, because it may not be possible to correctly guess the build\nsystem type if the @env{PATH} has only a cross-compiling @command{cc}.\n\nNote that the @samp{--target} option is not appropriate for MPIR@.  It's for use\nwhen building compiler tools, with @samp{--host} being where they will run,\nand @samp{--target} what they'll produce code for.  Ordinary programs or\nlibraries like MPIR are only interested in the @samp{--host} part, being where\nthey'll run.  \n\n@item CPU types\n@cindex CPU types\nIn general, if you want a library that runs as fast as possible, you should\nconfigure MPIR for the exact CPU type your system uses.  However, this may mean\nthe binaries won't run on older members of the family, and might run slower on\nother members, older or newer.  The best idea is always to build MPIR for the\nexact machine type you intend to run it on.\n\nThe following CPUs have specific support.  See @file{configure.in} for details\nof what code and compiler options they select.\n\n@itemize @bullet\n\n@c Keep this formatting, it's easy to read and it can be grepped to\n@c automatically test that CPUs listed get through ./config.sub\n\n@item\nAlpha:\n@nisamp{alpha},\n@nisamp{alphaev5},\n@nisamp{alphaev56},\n@nisamp{alphapca56},\n@nisamp{alphapca57},\n@nisamp{alphaev6},\n@nisamp{alphaev67},\n@nisamp{alphaev68}\n@nisamp{alphaev7}\n\n@item\nIA-64:\n@nisamp{ia64},\n@nisamp{itanium},\n@nisamp{itanium2}\n\n@item\nMIPS:\n@nisamp{mips},\n@nisamp{mips3},\n@nisamp{mips64}\n\n@item\nPowerPC:\n@nisamp{powerpc},\n@nisamp{powerpc64},\n@nisamp{powerpc401},\n@nisamp{powerpc403},\n@nisamp{powerpc405},\n@nisamp{powerpc505},\n@nisamp{powerpc601},\n@nisamp{powerpc602},\n@nisamp{powerpc603},\n@nisamp{powerpc603e},\n@nisamp{powerpc604},\n@nisamp{powerpc604e},\n@nisamp{powerpc620},\n@nisamp{powerpc630},\n@nisamp{powerpc740},\n@nisamp{powerpc7400},\n@nisamp{powerpc7450},\n@nisamp{powerpc750},\n@nisamp{powerpc801},\n@nisamp{powerpc821},\n@nisamp{powerpc823},\n@nisamp{powerpc860},\n@nisamp{powerpc970}\n\n@item\nSPARC:\n@nisamp{sparc},\n@nisamp{sparcv8},\n@nisamp{microsparc},\n@nisamp{supersparc},\n@nisamp{sparcv9},\n@nisamp{ultrasparc},\n@nisamp{ultrasparc2},\n@nisamp{ultrasparc2i},\n@nisamp{ultrasparc3},\n@nisamp{sparc64}\n\n@item\nx86 family:\n@nisamp{pentium},\n@nisamp{pentiummmx},\n@nisamp{pentiumpro},\n@nisamp{pentium2},\n@nisamp{pentium3},\n@nisamp{pentium4},\n@nisamp{netburst},\n@nisamp{netburstlahf},\n@nisamp{prescott},\n@nisamp{core},\n@nisamp{core2},\n@nisamp{penryn},\n@nisamp{nehalem},\n@nisamp{westmere}\n@nisamp{sandybridge}\n@nisamp{haswell}\n@nisamp{nano}\n@nisamp{atom},\n@nisamp{k5},\n@nisamp{k6},\n@nisamp{k62},\n@nisamp{k63},\n@nisamp{k7},\n@nisamp{k8},\n@nisamp{k10}\n@nisamp{k102}\n@nisamp{piledriver}\n@nisamp{bulldozer}\n@nisamp{bobcat}\n@nisamp{viac3},\n@nisamp{viac32}\n\n@item\nOther:\n@nisamp{arm},\n@end itemize\n\nCPUs not listed will use generic C code.\n\n@item Generic C Build\n@cindex Generic C\nIf some of the assembly code causes problems, or if otherwise desired, the\ngeneric C code can be selected with CPU @samp{none}.  For example,\n\n@example\n./configure --host=none-unknown-freebsd3.5\n@end example\n\nNote that this will run quite slowly, but it should be portable and should at\nleast make it possible to get something running if all else fails.\n\n@item Fat binary, @option{--enable-fat}\n@cindex Fat binary\n@cindex @option{--enable-fat}\nUsing @option{--enable-fat} selects a ``fat binary'' build on x86 or x86_64 \nsystems, where optimized low level subroutines are chosen at runtime according \nto the CPU detected.  This means more code, but gives reasonable performance \nfrom a single binary for all x86 chips, or similarly for all x86_64 chips. \n(This option might become available for more architectures in the future.)\n\n@item @option{ABI}\n@cindex ABI\nOn some systems MPIR supports multiple ABIs (application binary interfaces),\nmeaning data type sizes and calling conventions.  By default MPIR chooses the\nbest ABI available, but a particular ABI can be selected.  For example\n\n@example\n./configure --host=mips64-sgi-irix6 ABI=n32\n@end example\n\nSee @ref{ABI and ISA}, for the available choices on relevant CPUs, and what\napplications need to do.\n\n@item @option{--with-yasm}\n@cindex Yasm\n@cindex @option{--with-yasm}\nBy default MPIR will look for a system-wide Yasm using the @command{which} command.\nPassing @option{--with-yasm} let MPIR use a version of Yasm at a non-standard\nlocation.\nThis is useful if none is available in @env{PATH}.\nWith this option a full path to Yasm's binary should be given, for example\n\n@example\n./configure --with-yasm=/usr/local/bin/yasm\n@end example\n\n@item @option{CC}, @option{CFLAGS}\n@cindex C compiler\n@cindex @code{CC}\n@cindex @code{CFLAGS}\nBy default the C compiler used is chosen from among some likely candidates,\nwith @command{gcc} normally preferred if it's present.  The usual\n@samp{CC=whatever} can be passed to @samp{./configure} to choose something\ndifferent.\n\nFor various systems, default compiler flags are set based on the CPU and\ncompiler.  The usual @samp{CFLAGS=\"-whatever\"} can be passed to\n@samp{./configure} to use something different or to set good flags for systems\nMPIR doesn't otherwise know.\n\nThe @samp{CC} and @samp{CFLAGS} used are printed during @samp{./configure},\nand can be found in each generated @file{Makefile}.  This is the easiest way\nto check the defaults when considering changing or adding something.\n\nNote that when @samp{CC} and @samp{CFLAGS} are specified on a system\nsupporting multiple ABIs it's important to give an explicit\n@samp{ABI=whatever}, since MPIR can't determine the ABI just from the flags and\nwon't be able to select the correct assembler code.\n\nIf just @samp{CC} is selected then normal default @samp{CFLAGS} for that\ncompiler will be used (if MPIR recognises it).  For example @samp{CC=gcc} can\nbe used to force the use of GCC, with default flags (and default ABI).\n\n@item @option{CPPFLAGS}\n@cindex @code{CPPFLAGS}\nAny flags like @samp{-D} defines or @samp{-I} includes required by the\npreprocessor should be set in @samp{CPPFLAGS} rather than @samp{CFLAGS}.\nCompiling is done with both @samp{CPPFLAGS} and @samp{CFLAGS}, but\npreprocessing uses just @samp{CPPFLAGS}.  This distinction is because most\npreprocessors won't accept all the flags the compiler does.  Preprocessing is\ndone separately in some configure tests, and in the @samp{ansi2knr} support\nfor K&R compilers.\n\n@item @option{CC_FOR_BUILD}\n@cindex @code{CC_FOR_BUILD}\nSome build-time programs are compiled and run to generate host-specific data\ntables.  @samp{CC_FOR_BUILD} is the compiler used for this.  It doesn't need\nto be in any particular ABI or mode, it merely needs to generate executables\nthat can run.  The default is to try the selected @samp{CC} and some likely\ncandidates such as @samp{cc} and @samp{gcc}, looking for something that works.\n\nNo flags are used with @samp{CC_FOR_BUILD} because a simple invocation like\n@samp{cc foo.c} should be enough.  If some particular options are required\nthey can be included as for instance @samp{CC_FOR_BUILD=\"cc -whatever\"}.\n\n@item C++ Support, @option{--enable-cxx}\n@cindex C++ support\n@cindex @code{--enable-cxx}\nC++ support in MPIR can be enabled with @samp{--enable-cxx}, in which case a\nC++ compiler will be required.  As a convenience @samp{--enable-cxx=detect}\ncan be used to enable C++ support only if a compiler can be found.  The C++\nsupport consists of a library @file{libmpirxx.la} and header file\n@file{mpirxx.h} (@pxref{Headers and Libraries}).\n\nA separate @file{libmpirxx.la} has been adopted rather than having C++ objects\nwithin @file{libmpir.la} in order to ensure dynamic linked C programs aren't\nbloated by a dependency on the C++ standard library, and to avoid any chance\nthat the C++ compiler could be required when linking plain C programs.\n\n@file{libmpirxx.la} will use certain internals from @file{libmpir.la} and can\nonly be expected to work with @file{libmpir.la} from the same MPIR version.\nFuture changes to the relevant internals will be accompanied by renaming, so a\nmismatch will cause unresolved symbols rather than perhaps mysterious\nmisbehaviour.\n\nIn general @file{libmpirxx.la} will be usable only with the C++ compiler that\nbuilt it, since name mangling and runtime support are usually incompatible\nbetween different compilers.\n\n@item @option{CXX}, @option{CXXFLAGS}\n@cindex C++ compiler\n@cindex @code{CXX}\n@cindex @code{CXXFLAGS}\nWhen C++ support is enabled, the C++ compiler and its flags can be set with\nvariables @samp{CXX} and @samp{CXXFLAGS} in the usual way.  The default for\n@samp{CXX} is the first compiler that works from a list of likely candidates,\nwith @command{g++} normally preferred when available.  The default for\n@samp{CXXFLAGS} is to try @samp{CFLAGS}, @samp{CFLAGS} without @samp{-g}, then\nfor @command{g++} either @samp{-g -O2} or @samp{-O2}, or for other compilers\n@samp{-g} or nothing.  Trying @samp{CFLAGS} this way is convenient when using\n@samp{gcc} and @samp{g++} together, since the flags for @samp{gcc} will\nusually suit @samp{g++}.\n\nIt's important that the C and C++ compilers match, meaning their startup and\nruntime support routines are compatible and that they generate code in the\nsame ABI (if there's a choice of ABIs on the system).  @samp{./configure}\nisn't currently able to check these things very well itself, so for that\nreason @samp{--disable-cxx} is the default, to avoid a build failure due to a\ncompiler mismatch.  Perhaps this will change in the future.\n\nIncidentally, it's normally not good enough to set @samp{CXX} to the same as\n@samp{CC}.  Although @command{gcc} for instance recognises @file{foo.cc} as\nC++ code, only @command{g++} will invoke the linker the right way when\nbuilding an executable or shared library from C++ object files.\n\n@item Temporary Memory, @option{--enable-alloca=<choice>}\n@cindex Temporary memory\n@cindex Stack overflow\n@cindex @code{alloca}\n@cindex @code{--enable-alloca}\nMPIR allocates temporary workspace using one of the following three methods,\nwhich can be selected with for instance\n@samp{--enable-alloca=malloc-reentrant}.\n\n@itemize @bullet\n@item\n@samp{alloca} - C library or compiler builtin.\n@item\n@samp{malloc-reentrant} - the heap, in a re-entrant fashion.\n@item\n@samp{malloc-notreentrant} - the heap, with global variables.\n@end itemize\n\nFor convenience, the following choices are also available.\n@samp{--disable-alloca} is the same as @samp{no}.\n\n@itemize @bullet\n@item\n@samp{yes} - a synonym for @samp{alloca}.\n@item\n@samp{no} - a synonym for @samp{malloc-reentrant}.\n@item\n@samp{reentrant} - @code{alloca} if available, otherwise\n@samp{malloc-reentrant}.  This is the default.\n@item\n@samp{notreentrant} - @code{alloca} if available, otherwise\n@samp{malloc-notreentrant}.\n@end itemize\n\n@code{alloca} is reentrant and fast, and is recommended.  It actually allocates\njust small blocks on the stack; larger ones use malloc-reentrant.\n\n@samp{malloc-reentrant} is, as the name suggests, reentrant and thread safe,\nbut @samp{malloc-notreentrant} is faster and should be used if reentrancy is\nnot required.\n\nThe two malloc methods in fact use the memory allocation functions selected by\n@code{mp_set_memory_functions}, these being @code{malloc} and friends by\ndefault.  @xref{Custom Allocation}.\n\nAn additional choice @samp{--enable-alloca=debug} is available, to help when\ndebugging memory related problems (@pxref{Debugging}).\n\n@item FFT Multiplication, @option{--disable-fft}\n@cindex FFT multiplication\n@cindex @code{--disable-fft}\nBy default multiplications are done using Karatsuba, Toom, and\nFFT algorithms@.  The FFT is only used on large to very large operands and \ncan be disabled to save code size if desired.\n\n@item Assertion Checking, @option{--enable-assert}\n@cindex Assertion checking\n@cindex @code{--enable-assert}\nThis option enables some consistency checking within the library.  This can be\nof use while debugging, @pxref{Debugging}.\n\n@item Execution Profiling, @option{--enable-profiling=prof/gprof/instrument}\n@cindex Execution profiling\n@cindex @code{--enable-profiling}\nEnable profiling support, in one of various styles, @pxref{Profiling}.\n\n@item @option{MPN_PATH}\n@cindex @code{MPN_PATH}\nVarious assembler versions of mpn subroutines are provided.  For a given\nCPU, a search is made though a path to choose a version of each.  For example\n@samp{sparcv8} has\n\n@example\nMPN_PATH=\"sparc32/v8 sparc32 generic\"\n@end example\n\nwhich means look first for v8 code, then plain sparc32 (which is v7), and\nfinally fall back on generic C@.  Knowledgeable users with special requirements\ncan specify a different path.  Normally this is completely unnecessary.\n\n@item Documentation\n@cindex Documentation formats\n@cindex Texinfo\nThe source for the document you're now reading is @file{doc/mpir.texi}, in\nTexinfo format, see @GMPreftop{texinfo, Texinfo}.\n\n@cindex Postscript\n@cindex DVI\n@cindex PDF\nInfo format @samp{doc/mpir.info} is included in the distribution.  The usual\nautomake targets are available to make PostScript, DVI, PDF and HTML (these\nwill require various @TeX{} and Texinfo tools).\n\n@cindex DocBook\n@cindex XML\nDocBook and XML can be generated by the Texinfo @command{makeinfo} program\ntoo, see @ref{makeinfo options,, Options for @command{makeinfo}, texinfo,\nTexinfo}.\n\nSome supplementary notes can also be found in the @file{doc} subdirectory.\n\n@end table\n\n\n@need 2000\n@node ABI and ISA, Notes for Package Builds, Build Options, Installing MPIR\n@section ABI and ISA\n@cindex ABI\n@cindex Application Binary Interface\n@cindex ISA\n@cindex Instruction Set Architecture\n\nABI (Application Binary Interface) refers to the calling conventions between\nfunctions, meaning what registers are used and what sizes the various C data\ntypes are.  ISA (Instruction Set Architecture) refers to the instructions and\nregisters a CPU has available.\n\nSome 64-bit ISA CPUs have both a 64-bit ABI and a 32-bit ABI defined, the\nlatter for compatibility with older CPUs in the family.  MPIR supports some\nCPUs like this in both ABIs.  In fact within MPIR @samp{ABI} means a\ncombination of chip ABI, plus how MPIR chooses to use it.  For example in some\n32-bit ABIs, MPIR may support a limb as either a 32-bit @code{long} or a 64-bit\n@code{long long}.\n\nBy default MPIR chooses the best ABI available for a given system, and this\ngenerally gives significantly greater speed.  But an ABI can be chosen\nexplicitly to make MPIR compatible with other libraries, or particular\napplication requirements.  For example,\n\n@example\n./configure ABI=32\n@end example\n\nIn all cases it's vital that all object code used in a given program is\ncompiled for the same ABI.\n\nUsually a limb is implemented as a @code{long}.  When a @code{long long} limb\nis used this is encoded in the generated @file{mpir.h}.  This is convenient for\napplications, but it does mean that @file{mpir.h} will vary, and can't be just\ncopied around.  @file{mpir.h} remains compiler independent though, since all\ncompilers for a particular ABI will be expected to use the same limb type.\n\nCurrently no attempt is made to follow whatever conventions a system has for\ninstalling library or header files built for a particular ABI@.  This will\nprobably only matter when installing multiple builds of MPIR, and it might be\nas simple as configuring with a special @samp{libdir}, or it might require\nmore than that.  Note that builds for different ABIs need to done separately,\nwith a fresh (@command{make distclean}), @command{./configure} and @command{make}.\n\n@sp 1\n@table @asis\n@need 1000\n@item AMD64 (@samp{x86_64})\n@cindex AMD64\nOn AMD64 systems supporting both 32-bit and 64-bit modes for applications, the\nfollowing ABI choices are available.\n\n@table @asis\n@item @samp{ABI=64}\nThe 64-bit ABI uses 64-bit limbs and pointers and makes full use of the chip\narchitecture.  This is the default.  Applications will usually not need\nspecial compiler flags, but for reference the option is\n\n@example\ngcc  -m64\n@end example\n\n@item @samp{ABI=32}\nThe 32-bit ABI is the usual i386 conventions.  This will be slower, and is not\nrecommended except for inter-operating with other code not yet 64-bit capable.\nApplications must be compiled with\n\n@example\ngcc  -m32\n@end example\n\n(In GCC 2.95 and earlier there's no @samp{-m32} option, it's the only mode.)\n@end table\n\n@sp 1\n@need 1500\n@item IA-64 under HP-UX (@samp{ia64*-*-hpux*}, @samp{itanium*-*-hpux*})\n@cindex IA-64\n@cindex HP-UX\nHP-UX supports two ABIs for IA-64.  MPIR performance is the same in both.\n\n@table @asis\n@item @samp{ABI=32}\nIn the 32-bit ABI, pointers, @code{int}s and @code{long}s are 32 bits and MPIR\nuses a 64 bit @code{long long} for a limb.  Applications can be compiled\nwithout any special flags since this ABI is the default in both HP C and GCC,\nbut for reference the flags are\n\n@example\ngcc  -milp32\ncc   +DD32\n@end example\n\n@item @samp{ABI=64}\nIn the 64-bit ABI, @code{long}s and pointers are 64 bits and MPIR uses a\n@code{long} for a limb.  Applications must be compiled with\n\n@example\ngcc  -mlp64\ncc   +DD64\n@end example\n@end table\n\nOn other IA-64 systems, GNU/Linux for instance, @samp{ABI=64} is the only\nchoice.\n\n@sp 1\n@need 1000\n@item PowerPC 64 (@samp{powerpc64}, @samp{powerpc620}, @samp{powerpc630}, @samp{powerpc970})\n@cindex PowerPC\n@table @asis\n@item @samp{ABI=aix64}\n@cindex AIX\nThe AIX 64 ABI uses 64-bit limbs and pointers and is the default on PowerPC 64\n@samp{*-*-aix*} systems.  Applications must be compiled with\n\n@example\ngcc  -maix64\nxlc  -q64\n@end example\n\n@item @samp{ABI=mode32}\n@cindex AIX\nThe @samp{mode32} ABI uses a 64-bit @code{long long} limb but with the chip\nstill in 32-bit mode and using 32-bit calling conventions.  This is the\ndefault on PowerPC 64 @samp{*-*-darwin*} systems.  No special compiler options\nare needed for applications.\n\n@item @samp{ABI=32}\nThis is the basic 32-bit PowerPC ABI, with a 32-bit limb.  No special compiler\noptions are needed for applications.\n@end table\n\nMPIR speed is greatest in @samp{aix64} and @samp{mode32}.  In @samp{ABI=32}\nonly the 32-bit ISA is used and this doesn't make full use of a 64-bit chip.\nOn a suitable system we could perhaps use more of the ISA, but there are no\nplans to do so.\n\n@sp 1\n@need 1000\n@item Sparc V9 (@samp{sparc64}, @samp{sparcv9}, @samp{ultrasparc*})\n@cindex Sparc V9\n@cindex Solaris\n@cindex Sun\n@table @asis\n@item @samp{ABI=64}\nThe 64-bit V9 ABI is available on the various BSD sparc64 ports, recent\nversions of Sparc64 GNU/Linux, and Solaris 2.7 and up (when the kernel is in\n64-bit mode).  GCC 3.2 or higher, or Sun @command{cc} is required.  On\nGNU/Linux, depending on the default @command{gcc} mode, applications must be\ncompiled with\n\n@example\ngcc  -m64\n@end example\n\nOn Solaris applications must be compiled with\n\n@example\ngcc  -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9\ncc   -xarch=v9\n@end example\n\nOn the BSD sparc64 systems no special options are required, since 64-bits is\nthe only ABI available.\n\n@item @samp{ABI=32}\nFor the basic 32-bit ABI, MPIR still uses as much of the V9 ISA as it can.  In\nthe Sun documentation this combination is known as ``v8plus''.  On GNU/Linux,\ndepending on the default @command{gcc} mode, applications may need to be\ncompiled with\n\n@example\ngcc  -m32\n@end example\n\nOn Solaris, no special compiler options are required for applications, though\nusing something like the following is recommended.  (@command{gcc} 2.8 and\nearlier only support @samp{-mv8} though.)\n\n@example\ngcc  -mv8plus\ncc   -xarch=v8plus\n@end example\n@end table\n\nMPIR speed is greatest in @samp{ABI=64}, so it's the default where available.\nThe speed is partly because there are extra registers available and partly\nbecause 64-bits is considered the more important case and has therefore had\nbetter code written for it.\n\nDon't be confused by the names of the @samp{-m} and @samp{-x} compiler\noptions, they're called @samp{arch} but effectively control both ABI and ISA@.\n\nOn Solaris 2.6 and earlier, only @samp{ABI=32} is available since the kernel\ndoesn't save all registers.\n\nOn Solaris 2.7 with the kernel in 32-bit mode, a normal native build will\nreject @samp{ABI=64} because the resulting executables won't run.\n@samp{ABI=64} can still be built if desired by making it look like a\ncross-compile, for example\n\n@example\n./configure --build=none --host=sparcv9-sun-solaris2.7 ABI=64\n@end example\n@end table\n\n\n@need 2000\n@node Notes for Package Builds, Building with Microsoft Visual Studio, ABI and ISA, Installing MPIR\n@section Notes for Package Builds\n@cindex Build notes for binary packaging\n@cindex Packaged builds\n\nMPIR should present no great difficulties for packaging in a binary\ndistribution.\n\n@cindex Libtool versioning\n@cindex Shared library versioning\nLibtool is used to build the library and @samp{-version-info} is set\nappropriately, having started from @samp{3:0:0} in GMP 3.0 (@pxref{Versioning,\nLibrary interface versions, Library interface versions, libtool, GNU\nLibtool}).\n\nThe GMP 4 series and MPIR 1 series will be upwardly binary compatible in each \nrelease and will be upwardly binary compatible with all of the GMP 3 series.  \nAdditional function interfaces may be added in each release, so on systems where \nlibtool versioning is not fully checked by the loader an auxiliary mechanism may be\nneeded to express that a dynamic linked application depends on a new enough\nMPIR.\n\nFrom MPIR 2.0.0 binary compatibility with the GMP 5 series will be maintained\nwith the exception of the availability of secure functions for cryptography,\nwhich will not be supported in MPIR. For full GMP compatibility, including\ndeprecated functionality, the @samp{--enable-gmpcompat} configuration option\nmust be used.\n\nAn auxiliary mechanism may also be needed to express that @file{libmpirxx.la}\n(from @option{--enable-cxx}, @pxref{Build Options}) requires @file{libmpir.la}\nfrom the same MPIR version, since this is not done by the libtool versioning,\nnor otherwise.  A mismatch will result in unresolved symbols from the linker,\nor perhaps the loader.\n\nWhen building a package for a CPU family, care should be taken to use\n@samp{--host} (or @samp{--build}) to choose the least common denominator among\nthe CPUs which might use the package.  For example this might mean plain\n@samp{sparc} (meaning V7) for SPARCs.\n\nFor x86s, @option{--enable-fat} sets things up for a fat binary build, making a\nruntime selection of optimized low level routines.  This is a good choice for\npackaging to run on a range of x86 chips.\n\nUsers who care about speed will want MPIR built for their exact CPU type, to\nmake best use of the available optimizations.  Providing a way to suitably\nrebuild a package may be useful.  This could be as simple as making it\npossible for a user to omit @samp{--build} (and @samp{--host}) so\n@samp{./config.guess} will detect the CPU@.  But a way to manually specify a\n@samp{--build} will be wanted for systems where @samp{./config.guess} is\ninexact.\n\nOn systems with multiple ABIs, a packaged build will need to decide which\namong the choices is to be provided, see @ref{ABI and ISA}.  A given run of\n@samp{./configure} etc will only build one ABI@.  If a second ABI is also\nrequired then a second run of @samp{./configure} etc must be made, starting\nfrom a clean directory tree (@samp{make distclean}).\n\nAs noted under ``ABI and ISA'', currently no attempt is made to follow system\nconventions for install locations that vary with ABI, such as\n@file{/usr/lib/sparcv9} for @samp{ABI=64} as opposed to @file{/usr/lib} for\n@samp{ABI=32}.  A package build can override @samp{libdir} and other standard\nvariables as necessary.\n\nNote that @file{mpir.h} is a generated file, and will be architecture and ABI\ndependent.  When attempting to install two ABIs simultaneously it will be\nimportant that an application compile gets the correct @file{mpir.h} for its\ndesired ABI@.  If compiler include paths don't vary with ABI options then it\nmight be necessary to create a @file{/usr/include/mpir.h} which tests\npreprocessor symbols and chooses the correct actual @file{mpir.h}.\n\n@need 2000\n@node Building with Microsoft Visual Studio, Notes for Particular Systems, Notes for Package Builds, Installing MPIR\n@section Building with Microsoft Visual Studio \n@cindex Build notes for MSVC\n@cindex MSVC\n@cindex Visual Studio\n\nMPIR can be built with the professional and higher versions of Visual Studio\n2013, 2015 and 2017.  It can also be built with the community editions of\nVisual Studio 2015 and 2017. If the assembler optimised versions of MPIR\nare required, then Python 3 and the YASM assembler also need to be\ninstalled.  MPIR can also be built with the Intel C/C++ compiler that can be\nintegrated into versions of Visual Studio.\n\nPython 3 can be obtained from:\n\n@display\n@uref{https://www.python.org/}\n@end display\n\nand the YASM assembler from:\n\n@display\n@uref{http://yasm.tortall.net/Download.html}\n@end display\n\nThis assembler (@file{vsyasm.exe}, NOT @file{yasm.exe}) should be placed in the \ndirectory @file{C:\\Program Files\\yasm}.\n\nAlternatively @file{vsyasm.exe} can be placed elsewhere provided that the\nenvironment variable @samp{YASMPATH} gives its location.\n\n@table @asis\n\n@item Building MPIR\n\nA build of MPIR is started by double clicking on the file @file{mpir.sln}\nin the appropriate sub-directory within the MPIR root directory:\n\n@example\n    Visual Studio 2013:  mpir/msvc/vs13/mpir.sln\n    Visual Studio 2015:  mpir/msvc/vs15/mpir.sln\n    Visual Studio 2017:  mpir/msvc/vs17/mpir.sln\n@end example\n    \nVisual Studio will then display a list of individual build projects\nfrom which an appropriate version of MPIR can be built. For example,\na typical list of projects is:\n\n@example\n    dll_mpir_gc     standard DLL, no assembler (win32 and x64)\n    dll_mpir_p6     assembly optimised DLL for pentium 6 (win32)\n    lib_mpir_p6     assembly optimised static library for \n                    pentium 6 (win32)\n    lib_mpir_core2  assembly optimised static library for \n                    core2 (x64)\n    dll_mpir_core2  assembly optimised DLL for core2 (x64)\n@end example\n\nMPIR can be built either as a static library or as a DLL. A DLL will\ninclude both the C and C++ features of MPIR but a static library will include\nonly the C features so in this case the project:\n\n@example\n  lib_mpir_cxx     the MPIRXX C++ static library (win32 and x64)\n@end example\n\nshould also be built to provide the MPIR C++ static library (@samp{MPIRXX}).\n\nBefore a project is built, Visual Studio should be set to the required \nconfiguration (Release or Debug) and the required target architecture \n(win32 or x64).  The build process puts the output files into one of the\ntwo sub-directories:\n\n@display\n@file{mpir/lib} \n@file{mpir/dll} \n@end display\n    \ndepending on whether static library or DLL versions have been built.\n\n@item Additional Assembler Optimised Versions\n\nThe Visual Studio builds for MPIR are initially provided with a small set\nof assembler optimised projects but many more are available and can be\nobtained by running the Python program @command{mpir_config.py <N> [<SDK>]} \nthat is in the mpir\\msvc directory. The value of <N> required depends on the\nversion of Visual Studio in use as follows:\n\n@example\n    Visual Studio 2013:  13\n    Visual Studio 2015:  15\n    Visual Studio 2017:  17\n@end example\n\nThe parameter <SDK> is optional and is the version number of the Windows SDK\nfor which the builds will be taargetted (e.g. 10.0.16299.0).  This program,\nwhich has to be run before Visual Studio, provides a list of all the assembler\noptimised versions of MPIR that are available.  Any number of versions can be\nchosen and these builds will then be available when Visual Studio is subsequently\nopened by double clicking on @file{mpir.sln}.  It is advisable not to have Visual\nStudio running when new builds are being added in this way.\n\n@item Testing Visual Studio versions of MPIR \n\nTesting a version of the library once it has been built is started by double\nclicking on the appropriate solution file:\n\n@example\n    Visual Studio 2013:  mpir/msvc/vs13/mpir-tests.sln\n    Visual Studio 2015:  mpir/msvc/vs15/mpir-tests.sln\n    Visual Studio 2017:  mpir/msvc/vs17/mpir-tests.sln\n@end example\n\nThe tests are always run on the last version of MPIR built and it is important\nthat the configuration set for building the tests (Release or Debug, win32 or\nx64) is the same as that used to build MPIR.  When testing the static library\nversions of MPIR, both the C (mpir.lib) and C++ (mpirxx.lib) must be built.\nAfter loading there will be a large list of test projects starting:\n\n@example\n  Solution `mpir-tests' (202 projects)\n        add-test-lib \n        bswap\n        constants\n        ....\n@end example\n      \nThe project @samp{add-test-lib} should be selected and built first, after which \nthe solution as a whole (i.e. the first line shown above) can be selected to \nbuild all the tests.  After the build has completed, the tests are run by \nexecuting the Python program @command{run-tests.py} in the appropriate Visual \nStudio build sub-directory, for example, for Visual Studio 2017:\n\n@example\n   mpir/msvc/vs17/mpir-tests/run-tests.py \n@end example\n\n@end table\n\n@need 2000\n@node Notes for Particular Systems, Known Build Problems, Building with Microsoft Visual Studio, Installing MPIR\n@section Notes for Particular Systems\n@cindex Build notes for particular systems\n@cindex Particular systems\n@cindex Systems\n@table @asis\n\n@c This section is more or less meant for notes about performance or about\n@c build problems that have been worked around but might leave a user\n@c scratching their head.  Fun with different ABIs on a system belongs in the\n@c above section.\n\n@item ARM\n@cindex ARM\nOn systems @samp{arm*-*-*}, versions of GCC up to and including 2.95.3 have a\nbug in unsigned division, giving wrong results for some operands.  MPIR\n@samp{./configure} will demand GCC 2.95.4 or later.\n\n@item Floating Point Mode\n@cindex Floating point mode\n@cindex Hardware floating point mode\n@cindex Precision of hardware floating point\n@cindex x87\nOn some systems, the hardware floating point has a control mode which can set\nall operations to be done in a particular precision, for instance single,\ndouble or extended on x86 systems (x87 floating point).  The MPIR functions\ninvolving a @code{double} cannot be expected to operate to their full\nprecision when the hardware is in single precision mode.  Of course this\naffects all code, including application code, not just MPIR.\n\n@item MS-DOS and MS Windows\n@cindex MS-DOS\n@cindex MS Windows\n@cindex Windows\n@cindex Cygwin\n@cindex MINGW\nOn an MS Windows system Cygwin and Cygwin64 and Msys2/Mingw can be used, they \nare ports of GCC and the various GNU tools.\n\n@display\n@uref{http://www.cygwin.com/}\n@uref{https://msys2.github.io/}\n@end display\n\nBoth 32 and 64 bit versions of Msys2/Mingw and Cygwin are supported. Building\non these systems is very similar to building on Linux. \n\nWe strongly recommend using recent versions of Cygwin/Msys2.\n\n@item MS Windows DLLs\n@cindex DLLs\n@cindex MS Windows\n@cindex Windows\nOn systems @samp{*-*-cygwin*} and @samp{*-*-mingw*} and @samp{*-*-msys} static\nand DLL libraries can't both be built, since certain export directives in \n@file{mpir.h} must be different. Therefore you must specify whether you want\na shared library or a static library. For example if you want just a shared\nlibrary you can type the following.\n\n@example\n./configure --disable-static --enable-shared\n@end example\n\nLibtool doesn't install a @file{.lib} format import library, but it can be \ncreated with MS @command{lib} as follows, and copied to the install directory.\nSimilarly for @file{libmpir} and @file{libmpirxx}.\n\n@example\ncd .libs\nlib /def:libgmp-3.dll.def /out:libgmp-3.lib\n@end example\n\n@item Sparc CPU Types\n@cindex Sparc\n@samp{sparcv8} or @samp{supersparc} on relevant systems will give a\nsignificant performance increase over the V7 code selected by plain\n@samp{sparc}.\n\n@item Sparc App Regs\n@cindex Sparc\nThe MPIR assembler code for both 32-bit and 64-bit Sparc clobbers the\n``application registers'' @code{g2}, @code{g3} and @code{g4}, the same way\nthat the GCC default @samp{-mapp-regs} does (@pxref{SPARC Options,, SPARC\nOptions, gcc, Using the GNU Compiler Collection (GCC)}).\n\nThis makes that code unsuitable for use with the special V9\n@samp{-mcmodel=embmedany} (which uses @code{g4} as a data segment pointer),\nand for applications wanting to use those registers for special purposes.  In\nthese cases the only suggestion currently is to build MPIR with CPU @samp{none}\nto avoid the assembler code.\n\n@item SPARC Solaris\n@cindex Sparc\nBuilding applications against MPIR on SPARC Solaris (including @code{make \ncheck}) requires the @code{LD_LIBRARY_PATH} to be set appropriately. In \nparticular if one is building with @code{ABI=64} the linker needs to know\nwhere to find @code{libgcc} (often often @code{/usr/lib/sparcv9} \nor @code{/usr/local/lib/sparcv9} or @code{/lib/sparcv9}).\n\nIt is not enough to specify the location in @code{LD_LIBRARY_PATH_64} unless\n@code{LD_LIBRARY_PATH_64} is added to @code{LD_LIBRARY_PATH}. Specifically\nthe 64 bit @code{libgcc} path needs to be in @code{LD_LIBRARY_PATH}. \n\nThe linker is able to automatically distinguish 32 and 64 bit libraries, \nso it is safe to include paths to both the 32 and 64 bit libraries in the \n@code{LD_LIBRARY_PATH}.\n\n@item Solaris 10 First Release on SPARC\n@cindex Solaris\nMPIR fails to build with Solaris 10 first release. Patch 123647-01 for SPARC, released by Sun in August 2006 fixes this problem.\n\n@item x86 CPU Types\n@cindex x86\n@cindex 80x86\n@cindex i386\n@samp{i586}, @samp{pentium} or @samp{pentiummmx} code is good for its intended\nP5 Pentium chips, but quite slow when run on Intel P6 class chips (PPro, P-II,\nP-III)@.  @samp{i386} is a better choice when making binaries that must run on\nboth.\n\n@item x86 MMX and SSE2 Code\n@cindex MMX\n@cindex SSE2\nIf the CPU selected has MMX code but the assembler doesn't support it, a\nwarning is given and non-MMX code is used instead.  This will be an inferior\nbuild, since the MMX code that's present is there because it's faster than the\ncorresponding plain integer code.  The same applies to SSE2.\n\nOld versions of @samp{gas} don't support MMX instructions, in particular\nversion 1.92.3 that comes with FreeBSD 2.2.8 or the more recent OpenBSD 3.1\ndoesn't.\n\nSolaris 2.6 and 2.7 @command{as} generate incorrect object code for register\nto register @code{movq} instructions, and so can't be used for MMX code.\nInstall a recent @command{gas} if MMX code is wanted on these systems.\n@end table\n\n\n@need 2000\n@node Known Build Problems, Performance optimization, Notes for Particular Systems, Installing MPIR\n@section Known Build Problems\n@cindex Build problems known\n\n@c This section is more or less meant for known build problems that are not\n@c otherwise worked around and require some sort of manual intervention.\n\nYou might find more up-to-date information at @uref{http://www.mpir.org/}.\n\n@table @asis\n@item GCC XOP issues\n@cindex GCC\n@cindex XOP\nGCC from version 4.6.0 to 4.8.x have a problem with the XOP instruction, \nespecially with @samp{-O3} on at least AMD Opteron @samp{62xx/63xx}, \n@samp{FX-(4,6,8)[13]xx} and the Devil's Canyon @samp{9xxx} and the Kaveri APUs.\n\nA workaround is to pass @samp{-mno-xop} when compiling with @samp{-O3}.\n@end table\n\n\n@need 2000\n@node Performance optimization, , Known Build Problems, Installing MPIR\n@section Performance optimization\n@cindex Optimizing performance\n\n@c At some point, this should perhaps move to a separate chapter on optimizing\n@c performance.\n\nFor optimal performance, build MPIR for the exact CPU type of the target\ncomputer, see @ref{Build Options}.\n\nUnlike what is the case for most other programs, the compiler typically\ndoesn't matter much, since MPIR uses assembly language for the most critical\noperations.\n\nIn particular for long-running MPIR applications, and applications demanding\nextremely large numbers, building and running the @code{tuneup} program in the\n@file{tune} subdirectory, can be important.  For example,\n\n@example\ncd tune\nmake tuneup\n./tuneup\n@end example\n\nwill generate better contents for the @file{gmp-mparam.h} parameter file.\n\nTo use the results, put the output in the file indicated in the\n@samp{Parameters for ...} header.  Then recompile from scratch.\n\nThe @code{tuneup} program takes one useful parameter, @samp{-f NNN}, which\ninstructs the program how long to check FFT multiply parameters.  If you're\ngoing to use MPIR for extremely large numbers, you may want to run @code{tuneup}\nwith a large NNN value.\n\n\n@node MPIR Basics, Reporting Bugs, Installing MPIR, Top\n@comment  node-name,  next,  previous,  up\n@chapter MPIR Basics\n@cindex Basics\n\n@strong{Using functions, macros, data types, etc.@: not documented in this\nmanual is strongly discouraged.  If you do so your application is guaranteed\nto be incompatible with future versions of MPIR.}\n\n@menu\n* Headers and Libraries::\n* Nomenclature and Types::\n* MPIR on Windows x64::\n* Function Classes::\n* Variable Conventions::\n* Parameter Conventions::\n* Memory Management::\n* Reentrancy::\n* Useful Macros and Constants::\n* Compatibility with older versions::\n* Efficiency::\n* Debugging::\n* Profiling::\n* Autoconf::\n* Emacs::\n@end menu\n\n@node Headers and Libraries, Nomenclature and Types, MPIR Basics, MPIR Basics\n@section Headers and Libraries\n@cindex Headers\n\n@cindex @file{mpir.h}\n@cindex Include files\n@cindex @code{#include}\nAll declarations needed to use MPIR are collected in the include file\n@file{mpir.h}.  It is designed to work with both C and C++ compilers.\n\n@example\n#include <mpir.h>\n@end example\n\n@cindex @code{stdio.h}\nNote however that prototypes for MPIR functions with @code{FILE *} parameters\nare only provided if @code{<stdio.h>} is included too.\n\n@example\n#include <stdio.h>\n#include <mpir.h>\n@end example\n\n@cindex @code{stdarg.h}\nLikewise @code{<stdarg.h>} (or @code{<varargs.h>}) is required for prototypes\nwith @code{va_list} parameters, such as @code{gmp_vprintf}.  And\n@code{<obstack.h>} for prototypes with @code{struct obstack} parameters, such\nas @code{gmp_obstack_printf}, when available.\n\n@cindex Libraries\n@cindex Linking\n@cindex @code{libmpir}\nAll programs using MPIR must link against the @file{libmpir} library.  On a\ntypical Unix-like system this can be done with @samp{-lmpir} respectively, for example\n\n@example\ngcc myprogram.c -lmpir\n@end example\n\n@cindex @code{libmpirxx}\nMPIR C++ functions are in a separate @file{libmpirxx} library.  This is built\nand installed if C++ support has been enabled (@pxref{Build Options}).  For\nexample,\n\n@example\ng++ mycxxprog.cc -lmpirxx -lmpir\n@end example\n\n@cindex Libtool\nMPIR is built using Libtool and an application can use that to link if desired,\n@GMPpxreftop{libtool, GNU Libtool}\n\nIf MPIR has been installed to a non-standard location then it may be necessary\nto use @samp{-I} and @samp{-L} compiler options to point to the right\ndirectories, and some sort of run-time path for a shared library.\n\n\n@node Nomenclature and Types, MPIR on Windows x64, Headers and Libraries, MPIR Basics\n@section Nomenclature and Types\n@cindex Nomenclature\n@cindex Types\n\n@cindex Integer\n@tindex @code{mpz_t}\nIn this manual, @dfn{integer} usually means a multiple precision integer, as\ndefined by the MPIR library.  The C data type for such integers is @code{mpz_t}.\nHere are some examples of how to declare such integers:\n\n@example\nmpz_t sum;\n\nstruct foo @{ mpz_t x, y; @};\n\nmpz_t vec[20];\n@end example\n\n@cindex Rational number\n@tindex @code{mpq_t}\n@dfn{Rational number} means a multiple precision fraction.  The C data type\nfor these fractions is @code{mpq_t}.  For example:\n\n@example\nmpq_t quotient;\n@end example\n\n@cindex Floating-point number\n@tindex @code{mpf_t}\n@dfn{Floating point number} or @dfn{Float} for short, is an arbitrary precision\nmantissa with a limited precision exponent.  The C data type for such objects\nis @code{mpf_t}.  For example:\n\n@example\nmpf_t fp;\n@end example\n\n@tindex @code{mp_exp_t}\nThe floating point functions accept and return exponents in the C type\n@code{mp_exp_t}.  Currently this is usually a @code{long}, but on some systems\nit's an @code{int} for efficiency.\n\n@cindex Limb\n@tindex @code{mp_limb_t}\nA @dfn{limb} means the part of a multi-precision number that fits in a single\nmachine word.  (We chose this word because a limb of the human body is\nanalogous to a digit, only larger, and containing several digits.)  Normally a\nlimb is 32 or 64 bits.  The C data type for a limb is @code{mp_limb_t}.\n\n@tindex @code{mp_size_t}\nCounts of limbs are represented in the C type @code{mp_size_t}.  Currently\nthis is normally a @code{long}, but on some systems it's an @code{int} for\nefficiency.\n\n@tindex @code{mp_bitcnt_t}\nCounts of bits of a multi-precision number are represented in the C type\n@code{mp_bitcnt_t}.  Currently this is always an @code{unsigned long}, but on\nsome systems it will be an @code{unsigned long long} in the future .\n\n@cindex Random state\n@tindex @code{gmp_randstate_t}\n@dfn{Random state} means an algorithm selection and current state data.  The C\ndata type for such objects is @code{gmp_randstate_t}.  For example:\n\n@example\ngmp_randstate_t rstate;\n@end example\n\nAlso, in general @code{mp_bitcnt_t} is used for bit counts and ranges, and\n@code{size_t} is used for byte or character counts.\n\n@node MPIR on Windows x64, Function Classes, Nomenclature and Types, MPIR Basics\n@section MPIR on Windows x64\n@cindex Windows\n\nAlthough Windows x64 is a 64-bit operating system, Microsoft has decided to\nmake long integers 32-bits, which is inconsistent when compared with almost\nall other 64-bit operating systems.   This has caused many subtle bugs when   \nopen source code is ported to Windows x64 because many developers reasonably\nexpect to find that long integers on a 64-bit operating system will be 64 \nbits long.  \n\nMPIR contains functions with suffixes of @code{_ui} and @code{_si} that are \nused to input unsigned and signed integers into and convert them for use \nwith MPIR's multiple precision integers (mpz types).   For example, the \nfollowing functions set an @code{mpz_t} integer from @code{unsigned} and \n@code{signed long} integers respectively.\n\n@deftypefun void mpz_set_ui (mpz_t, unsigned long int)\n@end deftypefun\n\n@deftypefun void mpz_set_si (mpz_t, signed long int)\n@end deftypefun\n\n@sp 1 \nAlso, the following functions obtain @code{unsigned} and \n@code{signed long int} values from an MPIR multiple precision integer \n(@code{mpz_t}).\n\n@deftypefun unsigned long int mpz_get_ui (mpz_t)\n@end deftypefun\n\n@deftypefun signed long int mpz_get_si (mpz_t)\n@end deftypefun\n\n@sp 1\nTo bring MPIR on Windows x64 into line with other 64-bit operating systems\ntwo new types have been introduced throughout MPIR:\n\n@itemize @bullet\n@item @code{mpir_ui} defined as @code{unsigned long int} on all but Windows x64, defined as @code{unsigned long long int} on Windows x64\n\n@item @code{mpir_si} defined as @code{signed long int} on all but Windows x64, defined as @code{signed long long int} on Windows x64\n@end itemize\n\n@sp 1 \nThe above prototypes in MPIR 2.6.0 are changed to:\n \n@deftypefun void mpz_set_ui (mpz_t, mpir_ui)\n@end deftypefun\n\n@deftypefun void mpz_set_si (mpz_t, mpir_si)\n@end deftypefun\n\n@deftypefun mpir_ui mpz_get_ui (mpz_t)\n@end deftypefun\n\n@deftypefun mpir_si mpz_get_si (mpz_t)\n@end deftypefun\n\n@sp 1\nThese changes are applied to all MPIR functions with @code{_ui} and \n@code{_si} suffixes.    \n\n@node Function Classes, Variable Conventions, MPIR on Windows x64, MPIR Basics\n@section Function Classes\n@cindex Function classes\n\nThere are five classes of functions in the MPIR library:\n\n@enumerate\n@item\nFunctions for signed integer arithmetic, with names beginning with\n@code{mpz_}.  The associated type is @code{mpz_t}.  There are about 150\nfunctions in this class.  (@pxref{Integer Functions})\n\n@item\nFunctions for rational number arithmetic, with names beginning with\n@code{mpq_}.  The associated type is @code{mpq_t}.  There are about 40\nfunctions in this class, but the integer functions can be used for arithmetic\non the numerator and denominator separately.  (@pxref{Rational Number\nFunctions})\n\n@item\nFunctions for floating-point arithmetic, with names beginning with\n@code{mpf_}.  The associated type is @code{mpf_t}.  There are about 60\nfunctions is this class.  (@pxref{Floating-point Functions})\n\n@item\nFast low-level functions that operate on natural numbers.  These are used by\nthe functions in the preceding groups, and you can also call them directly\nfrom very time-critical user programs.  These functions' names begin with\n@code{mpn_}.  The associated type is array of @code{mp_limb_t}.  There are\nabout 30 (hard-to-use) functions in this class.  (@pxref{Low-level Functions})\n\n@item\nMiscellaneous functions.  Functions for setting up custom allocation and\nfunctions for generating random numbers.  (@pxref{Custom Allocation}, and\n@pxref{Random Number Functions})\n@end enumerate\n\n\n@node Variable Conventions, Parameter Conventions, Function Classes, MPIR Basics\n@section Variable Conventions\n@cindex Variable conventions\n@cindex Conventions for variables\n\nMPIR functions generally have output arguments before input arguments.  This\nnotation is by analogy with the assignment operator.\n\nMPIR lets you use the same variable for both input and output in one call.  For\nexample, the main function for integer multiplication, @code{mpz_mul}, can be\nused to square @code{x} and put the result back in @code{x} with\n\n@example\nmpz_mul (x, x, x);\n@end example\n\nBefore you can assign to an MPIR variable, you need to initialize it by calling\none of the special initialization functions.  When you're done with a\nvariable, you need to clear it out, using one of the functions for that\npurpose.  Which function to use depends on the type of variable.  See the\nchapters on integer functions, rational number functions, and floating-point\nfunctions for details.\n\nA variable should only be initialized once, or at least cleared between each\ninitialization.  After a variable has been initialized, it may be assigned to\nany number of times.\n\nFor efficiency reasons, avoid excessive initializing and clearing.  In\ngeneral, initialize near the start of a function and clear near the end.  For\nexample,\n\n@example\nvoid\nfoo (void)\n@{\n  mpz_t  n;\n  int    i;\n  mpz_init (n);\n  for (i = 1; i < 100; i++)\n    @{\n      mpz_mul (n, @dots{});\n      mpz_fdiv_q (n, @dots{});\n      @dots{}\n    @}\n  mpz_clear (n);\n@}\n@end example\n\n\n@node Parameter Conventions, Memory Management, Variable Conventions, MPIR Basics\n@section Parameter Conventions\n@cindex Parameter conventions\n@cindex Conventions for parameters\n\nWhen an MPIR variable is used as a function parameter, it's effectively a\ncall-by-reference, meaning if the function stores a value there it will change\nthe original in the caller.  Parameters which are input-only can be designated\n@code{const} to provoke a compiler error or warning on attempting to modify\nthem.\n\nWhen a function is going to return an MPIR result, it should designate a\nparameter that it sets, like the library functions do.  More than one value\ncan be returned by having more than one output parameter, again like the\nlibrary functions.  A @code{return} of an @code{mpz_t} etc doesn't return the\nobject, only a pointer, and this is almost certainly not what's wanted.\n\nHere's an example accepting an @code{mpz_t} parameter, doing a calculation,\nand storing the result to the indicated parameter.\n\n@example\nvoid\nfoo (mpz_t result, const mpz_t param, mpir_ui n)\n@{\n  mpir_ui  i;\n  mpz_mul_ui (result, param, n);\n  for (i = 1; i < n; i++)\n    mpz_add_ui (result, result, i*7);\n@}\n\nint\nmain (void)\n@{\n  mpz_t  r, n;\n  mpz_init (r);\n  mpz_init_set_str (n, \"123456\", 0);\n  foo (r, n, 20L);\n  gmp_printf (\"%Zd\\n\", r);\n  return 0;\n@}\n@end example\n\n@code{foo} works even if the mainline passes the same variable for\n@code{param} and @code{result}, just like the library functions.  But\nsometimes it's tricky to make that work, and an application might not want to\nbother supporting that sort of thing.\n\nFor interest, the MPIR types @code{mpz_t} etc are implemented as one-element\narrays of certain structures.  This is why declaring a variable creates an\nobject with the fields MPIR needs, but then using it as a parameter passes a\npointer to the object.  Note that the actual fields in each @code{mpz_t} etc\nare for internal use only and should not be accessed directly by code that\nexpects to be compatible with future MPIR releases.\n\n\n@need 1000\n@node Memory Management, Reentrancy, Parameter Conventions, MPIR Basics\n@section Memory Management\n@cindex Memory management\n\nThe MPIR types like @code{mpz_t} are small, containing only a couple of sizes,\nand pointers to allocated data.  Once a variable is initialized, MPIR takes\ncare of all space allocation.  Additional space is allocated whenever a\nvariable doesn't have enough.\n\n@code{mpz_t} and @code{mpq_t} variables never reduce their allocated space.\nNormally this is the best policy, since it avoids frequent reallocation.\nApplications that need to return memory to the heap at some particular point\ncan use @code{mpz_realloc2}, or clear variables no longer needed.\n\n@code{mpf_t} variables, in the current implementation, use a fixed amount of\nspace, determined by the chosen precision and allocated at initialization, so\ntheir size doesn't change.\n\nAll memory is allocated using @code{malloc} and friends by default, but this\ncan be changed, see @ref{Custom Allocation}.  Temporary memory on the stack is\nalso used (via @code{alloca}), but this can be changed at build-time if\ndesired, see @ref{Build Options}.\n\n\n@node Reentrancy, Useful Macros and Constants, Memory Management, MPIR Basics\n@section Reentrancy\n@cindex Reentrancy\n@cindex Thread safety\n@cindex Multi-threading\n\n@noindent\nMPIR is reentrant and thread-safe, with some exceptions:\n\n@itemize @bullet\n@item\nIf configured with @option{--enable-alloca=malloc-notreentrant} (or with\n@option{--enable-alloca=notreentrant} when @code{alloca} is not available),\nthen naturally MPIR is not reentrant.\n\n@item\n@code{mpf_set_default_prec} and @code{mpf_init} use a global variable for the\nselected precision.  @code{mpf_init2} can be used instead, and in the C++\ninterface an explicit precision to the @code{mpf_class} constructor.\n\n@item\n@code{mp_set_memory_functions} uses global variables to store the selected\nmemory allocation functions.\n\n@item\nIf the memory allocation functions set by a call to\n@code{mp_set_memory_functions} (or @code{malloc} and friends by default) are\nnot reentrant, then MPIR will not be reentrant either.\n\n@item\nIf the standard I/O functions such as @code{fwrite} are not reentrant then the\nMPIR I/O functions using them will not be reentrant either.\n\n@item\nIt's safe for two threads to read from the same MPIR variable simultaneously,\nbut it's not safe for one to read while the another might be writing, nor for\ntwo threads to write simultaneously.  It's not safe for two threads to\ngenerate a random number from the same @code{gmp_randstate_t} simultaneously,\nsince this involves an update of that variable.\n@end itemize\n\n\n@need 2000\n@node Useful Macros and Constants, Compatibility with older versions, Reentrancy, MPIR Basics\n@section Useful Macros and Constants\n@cindex Useful macros and constants\n@cindex Constants\n\n@deftypevr {Global Constant} {const int} mp_bits_per_limb\n@findex mp_bits_per_limb\n@cindex Bits per limb\n@cindex Limb size\nThe number of bits per limb.\n@end deftypevr\n\n@defmac __GNU_MP_VERSION\n@defmacx __GNU_MP_VERSION_MINOR\n@defmacx __GNU_MP_VERSION_PATCHLEVEL\n@cindex Version number\n@cindex MPIR version number\nThe major and minor GMP version, and patch level, respectively, as integers.\nFor GMP i.j.k, these numbers will be i, j, and k, respectively.\nThese numbers represent the version of GMP fully supported by this version of MPIR.\n@end defmac\n\n@defmac __MPIR_VERSION\n@defmacx __MPIR_VERSION_MINOR\n@defmacx __MPIR_VERSION_PATCHLEVEL\n@cindex Version number\n@cindex MPIR version number\nThe major and minor MPIR version, and patch level, respectively, as integers.\nFor MPIR i.j.k, these numbers will be i, j, and k, respectively.\n@end defmac\n\n@deftypevr {Global Constant} {const char * const} gmp_version\n@findex gmp_version\nThe GNU MP version number, as a null-terminated string, in the form \n``i.j.k''.\n@end deftypevr\n\n@defmac __GMP_CC\n@defmacx __GMP_CFLAGS\nThe compiler and compiler flags, respectively, used when compiling GMP, as\nstrings.\n@end defmac\n\n@deftypevr {Global Constant} {const char * const} mpir_version\n@findex mpir_version\nThe MPIR version number, as a null-terminated string, in the form\n``i.j.k''.  This release is @nicode{\"@value{VERSION}\"}.\n@end deftypevr\n\n@node Compatibility with older versions, Efficiency, Useful Macros and Constants, MPIR Basics\n@section Compatibility with older versions\n@cindex Compatibility with older versions\n@cindex Past GMP/MPIR versions\n@cindex Upward compatibility\n\nThis version of MPIR is upwardly binary compatible with all GMP 5.x, 4.x and 3.x\nversions, and upwardly compatible at the source level with all 2.x versions,\nwith the following exceptions.\n\n@itemize @bullet\n@item\n@code{mpn_gcd} had its source arguments swapped as of GMP 3.0, for consistency\nwith other @code{mpn} functions.\n\n@item\n@code{mpf_get_prec} counted precision slightly differently in GMP 3.0 and\n3.0.1, but in 3.1 reverted to the 2.x style.\n\n@item\n@code{mpn_bdivmod} provided provisionally in the past has been removed from MPIR\n2.7.0.\n\n@item\nMPIR does not support the secure cryptographic functions provided by GMP.\n\n@item\nFull GMP compatibility is only available when the @samp{--enable-gmpcompat}\nconfigure option is used.\n@end itemize\n\nThere are a number of compatibility issues between GMP 1 and GMP 2 that of\ncourse also apply when porting applications from GMP 1 to GMP 4 and MPIR 1 \nand 2.  Please see the GMP 2 manual for details.\n\n@need 1000\n@node Efficiency, Debugging, Compatibility with older versions, MPIR Basics\n@section Efficiency\n@cindex Efficiency\n\n@table @asis\n@item Small Operands\n@cindex Small operands\nOn small operands, the time for function call overheads and memory allocation\ncan be significant in comparison to actual calculation.  This is unavoidable\nin a general purpose variable precision library, although MPIR attempts to be\nas efficient as it can on both large and small operands.\n\n@item Static Linking\n@cindex Static linking\nOn some CPUs, in particular the x86s, the static @file{libmpir.a} should be\nused for maximum speed, since the PIC code in the shared @file{libmpir.so} will\nhave a small overhead on each function call and global data address.  For many\nprograms this will be insignificant, but for long calculations there's a gain\nto be had.\n\n@item Initializing and Clearing\n@cindex Initializing and clearing\nAvoid excessive initializing and clearing of variables, since this can be\nquite time consuming, especially in comparison to otherwise fast operations\nlike addition.\n\nA language interpreter might want to keep a free list or stack of\ninitialized variables ready for use.  It should be possible to integrate\nsomething like that with a garbage collector too.\n\n@item Reallocations\n@cindex Reallocations\nAn @code{mpz_t} or @code{mpq_t} variable used to hold successively increasing\nvalues will have its memory repeatedly @code{realloc}ed, which could be quite\nslow or could fragment memory, depending on the C library.  If an application\ncan estimate the final size then @code{mpz_init2} or @code{mpz_realloc2} can\nbe called to allocate the necessary space from the beginning\n(@pxref{Initializing Integers}).\n\nIt doesn't matter if a size set with @code{mpz_init2} or @code{mpz_realloc2}\nis too small, since all functions will do a further reallocation if necessary.\nBadly overestimating memory required will waste space though.\n\n@item @code{2exp} Functions\n@cindex @code{2exp} functions\nIt's up to an application to call functions like @code{mpz_mul_2exp} when\nappropriate.  General purpose functions like @code{mpz_mul} make no attempt to\nidentify powers of two or other special forms, because such inputs will\nusually be very rare and testing every time would be wasteful.\n\n@item @code{ui} and @code{si} Functions\n@cindex @code{ui} and @code{si} functions\nThe @code{ui} functions and the small number of @code{si} functions exist for\nconvenience and should be used where applicable.  But if for example an\n@code{mpz_t} contains a value that fits in an @code{unsigned long} \n(@code{unsigned long long} on Windows x64) there's no need extract it and \ncall a @code{ui} function, just use the regular @code{mpz} function.\n\n@item In-Place Operations\n@cindex In-place operations\n@code{mpz_abs}, @code{mpq_abs}, @code{mpf_abs}, @code{mpz_neg}, @code{mpq_neg}\nand @code{mpf_neg} are fast when used for in-place operations like\n@code{mpz_abs(x,x)}, since in the current implementation only a single field\nof @code{x} needs changing.  On suitable compilers (GCC for instance) this is\ninlined too.\n\n@code{mpz_add_ui}, @code{mpz_sub_ui}, @code{mpf_add_ui} and @code{mpf_sub_ui}\nbenefit from an in-place operation like @code{mpz_add_ui(x,x,y)}, since\nusually only one or two limbs of @code{x} will need to be changed.  The same\napplies to the full precision @code{mpz_add} etc if @code{y} is small.  If\n@code{y} is big then cache locality may be helped, but that's all.\n\n@code{mpz_mul} is currently the opposite, a separate destination is slightly\nbetter.  A call like @code{mpz_mul(x,x,y)} will, unless @code{y} is only one\nlimb, make a temporary copy of @code{x} before forming the result.  Normally\nthat copying will only be a tiny fraction of the time for the multiply, so\nthis is not a particularly important consideration.\n\n@code{mpz_set}, @code{mpq_set}, @code{mpq_set_num}, @code{mpf_set}, etc, make\nno attempt to recognise a copy of something to itself, so a call like\n@code{mpz_set(x,x)} will be wasteful.  Naturally that would never be written\ndeliberately, but if it might arise from two pointers to the same object then\na test to avoid it might be desirable.\n\n@example\nif (x != y)\n  mpz_set (x, y);\n@end example\n\nNote that it's never worth introducing extra @code{mpz_set} calls just to get\nin-place operations.  If a result should go to a particular variable then just\ndirect it there and let MPIR take care of data movement.\n\n@item Divisibility Testing (Small Integers)\n@cindex Divisibility testing\n@code{mpz_divisible_ui_p} and @code{mpz_congruent_ui_p} are the best functions\nfor testing whether an @code{mpz_t} is divisible by an individual small\ninteger.  They use an algorithm which is faster than @code{mpz_tdiv_ui}, but\nwhich gives no useful information about the actual remainder, only whether\nit's zero (or a particular value).\n\nHowever when testing divisibility by several small integers, it's best to take\na remainder modulo their product, to save multi-precision operations.  For\ninstance to test whether a number is divisible by any of 23, 29 or 31 take a\nremainder modulo @math{23@times{}29@times{}31 = 20677} and then test that.\n\nThe division functions like @code{mpz_tdiv_q_ui} which give a quotient as well\nas a remainder are generally a little slower than the remainder-only functions\nlike @code{mpz_tdiv_ui}.  If the quotient is only rarely wanted then it's\nprobably best to just take a remainder and then go back and calculate the\nquotient if and when it's wanted (@code{mpz_divexact_ui} can be used if the\nremainder is zero).\n\n@item Rational Arithmetic\n@cindex Rational arithmetic\nThe @code{mpq} functions operate on @code{mpq_t} values with no common factors\nin the numerator and denominator.  Common factors are checked-for and cast out\nas necessary.  In general, cancelling factors every time is the best approach\nsince it minimizes the sizes for subsequent operations.\n\nHowever, applications that know something about the factorization of the\nvalues they're working with might be able to avoid some of the GCDs used for\ncanonicalization, or swap them for divisions.  For example when multiplying by\na prime it's enough to check for factors of it in the denominator instead of\ndoing a full GCD@.  Or when forming a big product it might be known that very\nlittle cancellation will be possible, and so canonicalization can be left to\nthe end.\n\nThe @code{mpq_numref} and @code{mpq_denref} macros give access to the\nnumerator and denominator to do things outside the scope of the supplied\n@code{mpq} functions.  @xref{Applying Integer Functions}.\n\nThe canonical form for rationals allows mixed-type @code{mpq_t} and integer\nadditions or subtractions to be done directly with multiples of the\ndenominator.  This will be somewhat faster than @code{mpq_add}.  For example,\n\n@example\n/* mpq increment */\nmpz_add (mpq_numref(q), mpq_numref(q), mpq_denref(q));\n\n/* mpq += unsigned long */\nmpz_addmul_ui (mpq_numref(q), mpq_denref(q), 123UL);\n\n/* mpq -= mpz */\nmpz_submul (mpq_numref(q), mpq_denref(q), z);\n@end example\n\n@item Number Sequences\n@cindex Number sequences\nFunctions like @code{mpz_fac_ui}, @code{mpz_fib_ui} and @code{mpz_bin_uiui}\nare designed for calculating isolated values.  If a range of values is wanted\nit's probably best to call to get a starting point and iterate from there.\n\n@item Text Input/Output\n@cindex Text input/output\nHexadecimal or octal are suggested for input or output in text form.\nPower-of-2 bases like these can be converted much more efficiently than other\nbases, like decimal.  For big numbers there's usually nothing of particular\ninterest to be seen in the digits, so the base doesn't matter much.\n\nMaybe we can hope octal will one day become the normal base for everyday use,\nas proposed by King Charles XII of Sweden and later reformers.\n@c Reference: Knuth volume 2 section 4.1, page 184 of second edition.  :-)\n@end table\n\n\n@node Debugging, Profiling, Efficiency, MPIR Basics\n@section Debugging\n@cindex Debugging\n\n@table @asis\n@item Stack Overflow\n@cindex Stack overflow\n@cindex Segmentation violation\n@cindex Bus error\nDepending on the system, a segmentation violation or bus error might be the\nonly indication of stack overflow.  See @samp{--enable-alloca} choices in\n@ref{Build Options}, for how to address this.\n\nIn new enough versions of GCC, @samp{-fstack-check} may be able to ensure an\noverflow is recognised by the system before too much damage is done, or\n@samp{-fstack-limit-symbol} or @samp{-fstack-limit-register} may be able to\nadd checking if the system itself doesn't do any (@pxref{Code Gen Options,,\nOptions for Code Generation, gcc, Using the GNU Compiler Collection (GCC)}).\nThese options must be added to the @samp{CFLAGS} used in the MPIR build\n(@pxref{Build Options}), adding them just to an application will have no\neffect.  Note also they're a slowdown, adding overhead to each function call\nand each stack allocation.\n\n@item Heap Problems\n@cindex Heap problems\n@cindex Malloc problems\nThe most likely cause of application problems with MPIR is heap corruption.\nFailing to @code{init} MPIR variables will have unpredictable effects, and\ncorruption arising elsewhere in a program may well affect MPIR@.  Initializing\nMPIR variables more than once or failing to clear them will cause memory leaks.\n\n@cindex Malloc debugger\nIn all such cases a @code{malloc} debugger is recommended.  On a GNU or BSD\nsystem the standard C library @code{malloc} has some diagnostic facilities,\nsee @ref{Allocation Debugging,, Allocation Debugging, libc, The GNU C Library\nReference Manual}, or @samp{man 3 malloc}.  Other possibilities, in no\nparticular order, include\n\n@display\n@uref{http://dmalloc.com/}\n@uref{http://www.perens.com/FreeSoftware/} @ (electric fence)\n@uref{http://www.gnupdate.org/components/leakbug/}\n@uref{http://wwww.gnome.org/projects/memprof}\n@end display\n\nThe MPIR default allocation routines in @file{memory.c} also have a simple\nsentinel scheme which can be enabled with @code{#define DEBUG} in that file.\nThis is mainly designed for detecting buffer overruns during MPIR development,\nbut might find other uses.\n\n@item Stack Backtraces\n@cindex Stack backtrace\nOn some systems the compiler options MPIR uses by default can interfere with\ndebugging.  In particular on x86 and 68k systems @samp{-fomit-frame-pointer}\nis used and this generally inhibits stack backtracing.  Recompiling without\nsuch options may help while debugging, though the usual caveats about it\npotentially moving a memory problem or hiding a compiler bug will apply.\n\n@item GDB, the GNU Debugger\n@cindex GDB\n@cindex GNU Debugger\nA sample @file{.gdbinit} is included in the distribution, showing how to call\nsome undocumented dump functions to print MPIR variables from within GDB@.  Note\nthat these functions shouldn't be used in final application code since they're\nundocumented and may be subject to incompatible changes in future versions of\nMPIR.\n\n@item Source File Paths\nMPIR has multiple source files with the same name, in different directories.\nFor example @file{mpz}, @file{mpq} and @file{mpf} each have an\n@file{init.c}.  If the debugger can't already determine the right one it may\nhelp to build with absolute paths on each C file.  One way to do that is to\nuse a separate object directory with an absolute path to the source directory.\n\n@example\ncd /my/build/dir\n/my/source/dir/gmp-@value{VERSION}/configure\n@end example\n\nThis works via @code{VPATH}, and might require GNU @command{make}.\nAlternately it might be possible to change the @code{.c.lo} rules\nappropriately.\n\n@item Assertion Checking\n@cindex Assertion checking\nThe build option @option{--enable-assert} is available to add some consistency\nchecks to the library (see @ref{Build Options}).  These are likely to be of\nlimited value to most applications.  Assertion failures are just as likely to\nindicate memory corruption as a library or compiler bug.\n\nApplications using the low-level @code{mpn} functions, however, will benefit\nfrom @option{--enable-assert} since it adds checks on the parameters of most\nsuch functions, many of which have subtle restrictions on their usage.  Note\nhowever that only the generic C code has checks, not the assembler code, so\nCPU @samp{none} should be used for maximum checking.\n\n@item Temporary Memory Checking\nThe build option @option{--enable-alloca=debug} arranges that each block of\ntemporary memory in MPIR is allocated with a separate call to @code{malloc} (or\nthe allocation function set with @code{mp_set_memory_functions}).\n\nThis can help a malloc debugger detect accesses outside the intended bounds,\nor detect memory not released.  In a normal build, on the other hand,\ntemporary memory is allocated in blocks which MPIR divides up for its own use,\nor may be allocated with a compiler builtin @code{alloca} which will go\nnowhere near any malloc debugger hooks.\n\n@item Maximum Debuggability\nTo summarize the above, an MPIR build for maximum debuggability would be\n\n@example\n./configure --disable-shared --enable-assert \\\n  --enable-alloca=debug --host=none CFLAGS=-g\n@end example\n\nFor C++, add @samp{--enable-cxx CXXFLAGS=-g}.\n\n@item Checker\n@cindex Checker\n@cindex GCC Checker\nThe GCC checker (@uref{http://savannah.gnu.org/projects/checker/}) can be used\nwith MPIR@.  It contains a stub library which means MPIR applications compiled\nwith checker can use a normal MPIR build.\n\nA build of MPIR with checking within MPIR itself can be made.  This will run\nvery very slowly.  On GNU/Linux for example,\n\n@cindex @command{checkergcc}\n@example\n./configure --host=none-pc-linux-gnu CC=checkergcc\n@end example\n\n@samp{--host=none} must be used, since the MPIR assembler code doesn't support\nthe checking scheme.  The MPIR C++ features cannot be used, since current\nversions of checker (0.9.9.1) don't yet support the standard C++ library.\n\n@item Valgrind\n@cindex Valgrind\nThe valgrind program (@uref{http://valgrind.org/}) is a memory\nchecker for x86s.  It translates and emulates machine instructions to do\nstrong checks for uninitialized data (at the level of individual bits), memory\naccesses through bad pointers, and memory leaks.\n\nRecent versions of Valgrind are getting support for MMX and SSE/SSE2\ninstructions, for past versions MPIR will need to be configured not to use\nthose, ie.@: for an x86 without them (for instance plain @samp{i486}).\n\n@item Other Problems\nAny suspected bug in MPIR itself should be isolated to make sure it's not an\napplication problem, see @ref{Reporting Bugs}.\n@end table\n\n\n@node Profiling, Autoconf, Debugging, MPIR Basics\n@section Profiling\n@cindex Profiling\n@cindex Execution profiling\n@cindex @code{--enable-profiling}\n\nRunning a program under a profiler is a good way to find where it's spending\nmost time and where improvements can be best sought.  The profiling choices\nfor a MPIR build are as follows.\n\n@table @asis\n@item @samp{--disable-profiling}\nThe default is to add nothing special for profiling.\n\nIt should be possible to just compile the mainline of a program with @code{-p}\nand use @command{prof} to get a profile consisting of timer-based sampling of\nthe program counter.  Most of the MPIR assembler code has the necessary symbol\ninformation.\n\nThis approach has the advantage of minimizing interference with normal program\noperation, but on most systems the resolution of the sampling is quite low (10\nmilliseconds for instance), requiring long runs to get accurate information.\n\n@item @samp{--enable-profiling=prof}\n@cindex @code{prof}\nBuild with support for the system @command{prof}, which means @samp{-p} added\nto the @samp{CFLAGS}.\n\nThis provides call counting in addition to program counter sampling, which\nallows the most frequently called routines to be identified, and an average\ntime spent in each routine to be determined.\n\nThe x86 assembler code has support for this option, but on other processors\nthe assembler routines will be as if compiled without @samp{-p} and therefore\nwon't appear in the call counts.\n\nOn some systems, such as GNU/Linux, @samp{-p} in fact means @samp{-pg} and in\nthis case @samp{--enable-profiling=gprof} described below should be used\ninstead.\n\n@item @samp{--enable-profiling=gprof}\n@cindex @code{gprof}\nBuild with support for @command{gprof} (@GMPpxreftop{gprof, GNU gprof}), which\nmeans @samp{-pg} added to the @samp{CFLAGS}.\n\nThis provides call graph construction in addition to call counting and program\ncounter sampling, which makes it possible to count calls coming from different\nlocations.  For example the number of calls to @code{mpn_mul} from\n@code{mpz_mul} versus the number from @code{mpf_mul}.  The program counter\nsampling is still flat though, so only a total time in @code{mpn_mul} would be\naccumulated, not a separate amount for each call site.\n\nThe x86 assembler code has support for this option, but on other processors\nthe assembler routines will be as if compiled without @samp{-pg} and therefore\nnot be included in the call counts.\n\nOn x86 and m68k systems @samp{-pg} and @samp{-fomit-frame-pointer} are\nincompatible, so the latter is omitted from the default flags in that case,\nwhich might result in poorer code generation.\n\nIncidentally, it should be possible to use the @command{gprof} program with a\nplain @samp{--enable-profiling=prof} build.  But in that case only the\n@samp{gprof -p} flat profile and call counts can be expected to be valid, not\nthe @samp{gprof -q} call graph.\n\n@item @samp{--enable-profiling=instrument}\n@cindex @code{-finstrument-functions}\n@cindex @code{instrument-functions}\nBuild with the GCC option @samp{-finstrument-functions} added to the\n@samp{CFLAGS} (@pxref{Code Gen Options,, Options for Code Generation, gcc,\nUsing the GNU Compiler Collection (GCC)}).\n\nThis inserts special instrumenting calls at the start and end of each\nfunction, allowing exact timing and full call graph construction.\n\nThis instrumenting is not normally a standard system feature and will require\nsupport from an external library, such as\n\n@cindex FunctionCheck\n@cindex fnccheck\n@display\n@uref{http://sourceforge.net/projects/fnccheck/}\n@end display\n\nThis should be included in @samp{LIBS} during the MPIR configure so that test\nprograms will link.  For example,\n\n@example\n./configure --enable-profiling=instrument LIBS=-lfc\n@end example\n\nOn a GNU system the C library provides dummy instrumenting functions, so\nprograms compiled with this option will link.  In this case it's only\nnecessary to ensure the correct library is added when linking an application.\n\nThe x86 assembler code supports this option, but on other processors the\nassembler routines will be as if compiled without\n@samp{-finstrument-functions} meaning time spent in them will effectively be\nattributed to their caller.\n@end table\n\n\n@node Autoconf, Emacs, Profiling, MPIR Basics\n@section Autoconf\n@cindex Autoconf\n\nAutoconf based applications can easily check whether MPIR is installed.  The\nonly thing to be noted is that GMP/MPIR library symbols from version 3 of GMP\nand version 1 of MPIR onwards have prefixes like @code{__gmpz}.  The following \ntherefore would be a simple test,\n\n@cindex @code{AC_CHECK_LIB}\n@example\nAC_CHECK_LIB(mpir, __gmpz_init)\n@end example\n\nThis just uses the default @code{AC_CHECK_LIB} actions for found or not found,\nbut an application that must have MPIR would want to generate an error if not\nfound.  For example,\n\n@example\nAC_CHECK_LIB(mpir, __gmpz_init, ,\n  [AC_MSG_ERROR([MPIR not found, see http://www.mpir.org/])])\n@end example\n\nIf functions added in some particular version of GMP/MPIR are required, then one of\nthose can be used when checking.  For example @code{mpz_mul_si} was added in\nGMP 3.1,\n\n@example\nAC_CHECK_LIB(mpir, __gmpz_mul_si, ,\n  [AC_MSG_ERROR(\n  [GMP/MPIR not found, or not GMP 3.1 or up or MPIR 1.0 or up, see http://www.mpir.org/])])\n@end example\n\nAn alternative would be to test the version number in @file{mpir.h} using say\n@code{AC_EGREP_CPP}.  That would make it possible to test the exact version,\nif some particular sub-minor release is known to be necessary.\n\nIn general it's recommended that applications should simply demand a new\nenough MPIR rather than trying to provide supplements for features not\navailable in past versions.\n\nOccasionally an application will need or want to know the size of a type at\nconfiguration or preprocessing time, not just with @code{sizeof} in the code.\nThis can be done in the normal way with @code{mp_limb_t} etc, but GMP 4.0 or\nup and MPIR 1.0 and up is best for this, since prior versions needed certain \n@samp{-D} defines on systems using a @code{long long} limb.  The following \nwould suit Autoconf 2.50 or up,\n\n@example\nAC_CHECK_SIZEOF(mp_limb_t, , [#include <mpir.h>])\n@end example\n\n\n@node Emacs,  , Autoconf, MPIR Basics\n@section Emacs\n@cindex Emacs\n@cindex @code{info-lookup-symbol}\n\n@key{C-h C-i} (@code{info-lookup-symbol}) is a good way to find documentation\non C functions while editing (@pxref{Info Lookup, , Info Documentation Lookup,\nemacs, The Emacs Editor}).\n\nThe MPIR manual can be included in such lookups by putting the following in\nyour @file{.emacs},\n\n@c  This isn't pretty, but there doesn't seem to be a better way (in emacs\n@c  21.2 at least).  info-lookup->mode-value could be used for the \"assoc\"s,\n@c  but that function isn't documented, whereas info-lookup-alist is.\n@c\n@example\n(eval-after-load \"info-look\"\n  '(let ((mode-value (assoc 'c-mode (assoc 'symbol info-lookup-alist))))\n     (setcar (nthcdr 3 mode-value)\n             (cons '(\"(gmp)Function Index\" nil \"^ -.* \" \"\\\\>\")\n                   (nth 3 mode-value)))))\n@end example\n\n\n@node Reporting Bugs, Integer Functions, MPIR Basics, Top\n@comment  node-name,  next,  previous,  up\n@chapter Reporting Bugs\n@cindex Reporting bugs\n@cindex Bug reporting\n\nIf you think you have found a bug in the MPIR library, please investigate it\nand report it.  We have made this library available to you, and it is not too\nmuch to ask you to report the bugs you find.\n\nBefore you report a bug, check it's not already addressed in @ref{Known Build\nProblems}, or perhaps @ref{Notes for Particular Systems}.  You may also want\nto check @uref{http://www.mpir.org/} for patches for this release.\n\nPlease include the following in any report,\n\n@itemize @bullet\n@item\nThe MPIR version number, and if pre-packaged or patched then say so.\n\n@item\nA test program that makes it possible for us to reproduce the bug.  Include\ninstructions on how to run the program.\n\n@item\nA description of what is wrong.  If the results are incorrect, in what way.\nIf you get a crash, say so.\n\n@item\nIf you get a crash, include a stack backtrace from the debugger if it's\ninformative (@samp{where} in @command{gdb}, or @samp{$C} in @command{adb}).\n\n@item\nPlease do not send core dumps, executables or @command{strace}s.\n\n@item\nThe configuration options you used when building MPIR, if any.\n\n@item\nThe name of the compiler and its version.  For @command{gcc}, get the version\nwith @samp{gcc -v}, otherwise perhaps @samp{what `which cc`}, or similar.\n\n@item\nThe output from running @samp{uname -a}.\n\n@item\nThe output from running @samp{./config.guess}, and from running\n@samp{./configfsf.guess} (might be the same).\n\n@item\nIf the bug is related to @samp{configure}, then the contents of\n@file{config.log}.\n\n@item\nIf the bug is related to an @file{asm} file not assembling, then the contents\nof @file{config.m4} and the offending line or lines from the temporary\n@file{mpn/tmp-<file>.s}.\n@end itemize\n\nPlease make an effort to produce a self-contained report, with something\ndefinite that can be tested or debugged.  Vague queries or piecemeal messages\nare difficult to act on and don't help the development effort.\n\nIt is not uncommon that an observed problem is actually due to a bug in the\ncompiler; the MPIR code tends to explore interesting corners in compilers.\n\nIf your bug report is good, we will do our best to help you get a corrected\nversion of the library; if the bug report is poor, we won't do anything about\nit (except maybe ask you to send a better report).\n\nSend your report to: @uref{http://groups.google.com/group/mpir-devel}.\n\nIf you think something in this manual is unclear, or downright incorrect, or if\nthe language needs to be improved, please send a note to the same address.\n\n\n@node Integer Functions, Rational Number Functions, Reporting Bugs, Top\n@comment  node-name,  next,  previous,  up\n@chapter Integer Functions\n@cindex Integer functions\n\nThis chapter describes the MPIR functions for performing integer arithmetic.\nThese functions start with the prefix @code{mpz_}.\n\nMPIR integers are stored in objects of type @code{mpz_t}.\n\n@menu\n* Initializing Integers::\n* Assigning Integers::\n* Simultaneous Integer Init & Assign::\n* Converting Integers::\n* Integer Arithmetic::\n* Integer Division::\n* Integer Exponentiation::\n* Integer Roots::\n* Number Theoretic Functions::\n* Integer Comparisons::\n* Integer Logic and Bit Fiddling::\n* I/O of Integers::\n* Integer Random Numbers::\n* Integer Import and Export::\n* Miscellaneous Integer Functions::\n* Integer Special Functions::\n@end menu\n\n@node Initializing Integers, Assigning Integers, Integer Functions, Integer Functions\n@comment  node-name,  next,  previous,  up\n@section Initialization Functions\n@cindex Integer initialization functions\n@cindex Initialization functions\n\nThe functions for integer arithmetic assume that all integer objects are\ninitialized.  You do that by calling the function @code{mpz_init}.  For\nexample,\n\n@example\n@{\n  mpz_t integ;\n  mpz_init (integ);\n  @dots{}\n  mpz_add (integ, @dots{});\n  @dots{}\n  mpz_sub (integ, @dots{});\n\n  /* Unless the program is about to exit, do ... */\n  mpz_clear (integ);\n@}\n@end example\n\nAs you can see, you can store new values any number of times, once an\nobject is initialized.\n\n@deftypefun void mpz_init (mpz_t @var{integer})\nInitialize @var{integer}, and set its value to 0.\n@end deftypefun\n\n@deftypefun void mpz_inits (mpz_t @var{x}, ...)\nInitialize a NULL-terminated list of @code{mpz_t} variables, and set their\nvalues to 0.\n@end deftypefun\n\n@deftypefun void mpz_init2 (mpz_t @var{integer}, mp_bitcnt_t @var{n})\nInitialize @var{integer}, with space for @var{n} bits, and set its value to 0.\n\n@var{n} is only the initial space, @var{integer} will grow automatically in\nthe normal way, if necessary, for subsequent values stored.  @code{mpz_init2}\nmakes it possible to avoid such reallocations if a maximum size is known in\nadvance.\n@end deftypefun\n\n@deftypefun void mpz_clear (mpz_t @var{integer})\nFree the space occupied by @var{integer}.  Call this function for all\n@code{mpz_t} variables when you are done with them.\n@end deftypefun\n\n@deftypefun void mpz_clears (mpz_t @var{x}, ...)\nFree the space occupied by a NULL-terminated list of @code{mpz_t} variables.\n@end deftypefun\n\n@deftypefun void mpz_realloc2 (mpz_t @var{integer}, mp_bitcnt_t @var{n})\nChange the space allocated for @var{integer} to @var{n} bits.  The value in\n@var{integer} is preserved if it fits, or is set to 0 if not.\n\nThis function can be used to increase the space for a variable in order to\navoid repeated automatic reallocations, or to decrease it to give memory back\nto the heap.\n@end deftypefun\n\n\n@node Assigning Integers, Simultaneous Integer Init & Assign, Initializing Integers, Integer Functions\n@comment  node-name,  next,  previous,  up\n@section Assignment Functions\n@cindex Integer assignment functions\n@cindex Assignment functions\n\nThese functions assign new values to already initialized integers\n(@pxref{Initializing Integers}).\n\n@deftypefun void mpz_set (mpz_t @var{rop}, mpz_t @var{op})\n@deftypefunx void mpz_set_ui (mpz_t @var{rop}, mpir_ui @var{op})\n@deftypefunx void mpz_set_si (mpz_t @var{rop}, mpir_si @var{op})\n@deftypefunx void mpz_set_ux (mpz_t @var{rop}, uintmax_t @var{op})\n@deftypefunx void mpz_set_sx (mpz_t @var{rop}, intmax_t @var{op})\n@deftypefunx void mpz_set_d (mpz_t @var{rop}, double @var{op})\n@deftypefunx void mpz_set_q (mpz_t @var{rop}, mpq_t @var{op})\n@deftypefunx void mpz_set_f (mpz_t @var{rop}, mpf_t @var{op})\nSet the value of @var{rop} from @var{op}. Note that the @code{(u)intmax} versions are only available\nif @file{stdint.h} header exists. It is included conditionally and automatically on modern\ncompilers supporting @code{__has_include()} macro, otherwise\nyou need to include @file{stdint.h} before including @file{mpir.h}.\n\n@code{mpz_set_d}, @code{mpz_set_q} and @code{mpz_set_f} truncate @var{op} to\nmake it an integer.\n@end deftypefun\n\n@deftypefun int mpz_set_str (mpz_t @var{rop}, char *@var{str}, int @var{base})\nSet the value of @var{rop} from @var{str}, a null-terminated C string in base\n@var{base}.  White space is allowed in the string, and is simply ignored.\n\nThe @var{base} may vary from 2 to 62, or if @var{base} is 0, then the leading\ncharacters are used: @code{0x} and @code{0X} for hexadecimal, @code{0b} and\n@code{0B} for binary, @code{0} for octal, or decimal otherwise.\n\nFor bases up to 36, case is ignored; upper-case and lower-case letters have\nthe same value.  For bases 37 to 62, upper-case letter represent the usual\n10..35 while lower-case letter represent 36..61.\n\nThis function returns 0 if the entire string is a valid number in base\n@var{base}.  Otherwise it returns @minus{}1.\n@c\n@c  It turns out that it is not entirely true that this function ignores\n@c  white-space.  It does ignore it between digits, but not after a minus sign\n@c  or within or after ``0x''.  Some thought was given to disallowing all\n@c  whitespace, but that would be an incompatible change, whitespace has been\n@c  documented as ignored ever since GMP 1.\n@c\n@end deftypefun\n\n@deftypefun void mpz_swap (mpz_t @var{rop1}, mpz_t @var{rop2})\nSwap the values @var{rop1} and @var{rop2} efficiently.\n@end deftypefun\n\n\n@node Simultaneous Integer Init & Assign, Converting Integers, Assigning Integers, Integer Functions\n@comment  node-name,  next,  previous,  up\n@section Combined Initialization and Assignment Functions\n@cindex Integer assignment functions\n@cindex Assignment functions\n@cindex Integer initialization functions\n@cindex Initialization functions\n\nFor convenience, MPIR provides a parallel series of initialize-and-set functions\nwhich initialize the output and then store the value there.  These functions'\nnames have the form @code{mpz_init_set@dots{}}\n\nHere is an example of using one:\n\n@example\n@{\n  mpz_t pie;\n  mpz_init_set_str (pie, \"3141592653589793238462643383279502884\", 10);\n  @dots{}\n  mpz_sub (pie, @dots{});\n  @dots{}\n  mpz_clear (pie);\n@}\n@end example\n\n@noindent\nOnce the integer has been initialized by any of the @code{mpz_init_set@dots{}}\nfunctions, it can be used as the source or destination operand for the ordinary\ninteger functions.  Don't use an initialize-and-set function on a variable\nalready initialized!\n\n@deftypefun void mpz_init_set (mpz_t @var{rop}, mpz_t @var{op})\n@deftypefunx void mpz_init_set_ui (mpz_t @var{rop}, mpir_ui @var{op})\n@deftypefunx void mpz_init_set_si (mpz_t @var{rop}, mpir_si @var{op})\n@deftypefunx void mpz_init_set_ux (mpz_t @var{rop}, uintmax_t @var{op})\n@deftypefunx void mpz_init_set_sx (mpz_t @var{rop}, intmax_t @var{op})\n@deftypefunx void mpz_init_set_d (mpz_t @var{rop}, double @var{op})\nInitialize @var{rop} with limb space and set the initial numeric value from\n@var{op}. Note the intmax versions are only available\nif you include the @file{stdint.h} header before including @file{mpir.h}.\n@end deftypefun\n\n@deftypefun int mpz_init_set_str (mpz_t @var{rop}, char *@var{str}, int @var{base})\nInitialize @var{rop} and set its value like @code{mpz_set_str} (see its\ndocumentation above for details).\n\nIf the string is a correct base @var{base} number, the function returns 0;\nif an error occurs it returns @minus{}1.  @var{rop} is initialized even if\nan error occurs.  (I.e., you have to call @code{mpz_clear} for it.)\n@end deftypefun\n\n\n@node Converting Integers, Integer Arithmetic, Simultaneous Integer Init & Assign, Integer Functions\n@comment  node-name,  next,  previous,  up\n@section Conversion Functions\n@cindex Integer conversion functions\n@cindex Conversion functions\n\nThis section describes functions for converting MPIR integers to standard C\ntypes.  Functions for converting @emph{to} MPIR integers are described in\n@ref{Assigning Integers} and @ref{I/O of Integers}.\n\n@deftypefun mpir_ui mpz_get_ui (mpz_t @var{op})\nReturn the value of @var{op} as an @code{mpir_ui}.\n\nIf @var{op} is too big to fit an @code{mpir_ui} then just the least\nsignificant bits that do fit are returned.  The sign of @var{op} is ignored,\nonly the absolute value is used.\n@end deftypefun\n\n@deftypefun mpir_si mpz_get_si (mpz_t @var{op})\nIf @var{op} fits into a @code{mpir_si} return the value of @var{op}.\nOtherwise return the least significant part of @var{op}, with the same sign\nas @var{op}.\n\nIf @var{op} is too big to fit in a @code{mpir_si}, the returned\nresult is probably not very useful.  To find out if the value will fit, use\nthe function @code{mpz_fits_slong_p}.\n@end deftypefun\n\n@deftypefun {uintmax_t} mpz_get_ux (mpz_t @var{op})\nReturn the value of @var{op} as an @code{uintmax_t}.\n\nIf @var{op} is too big to fit an @code{uintmax_t} then just the least\nsignificant bits that do fit are returned.  The sign of @var{op} is ignored,\nonly the absolute value is used. Note this function is only available if you\ninclude @file{stdint.h} before including @file{mpir.h}.\n@end deftypefun\n\n@deftypefun {intmax_t} mpz_get_sx (mpz_t @var{op})\nIf @var{op} fits into a @code{intmax_t} return the value of @var{op}.\nOtherwise return the least significant part of @var{op}, with the same sign\nas @var{op}.\n\nIf @var{op} is too big to fit in a @code{intmax_t}, the returned\nresult is probably not very useful. Note this function is only available if you\ninclude the @file{stdint.h} header before including @file{mpir.h}. \n@end deftypefun\n\n@deftypefun double mpz_get_d (mpz_t @var{op})\nConvert @var{op} to a @code{double}, truncating if necessary (ie.@: rounding\ntowards zero).\n\nIf the exponent from the conversion is too big, the result is system\ndependent.  An infinity is returned where available.  A hardware overflow trap\nmay or may not occur.\n@end deftypefun\n\n@deftypefun double mpz_get_d_2exp (mpir_si *@var{exp}, mpz_t @var{op})\nConvert @var{op} to a @code{double}, truncating if necessary (ie.@: rounding\ntowards zero), and returning the exponent separately.\n\nThe return value is in the range @math{0.5@le{}@GMPabs{@var{d}}<1} and the\nexponent is stored to @code{*@var{exp}}.  @m{@var{d} * 2^{exp}, @var{d} *\n2^@var{exp}} is the (truncated) @var{op} value.  If @var{op} is zero, the\nreturn is @math{0.0} and 0 is stored to @code{*@var{exp}}.\n\n@cindex @code{frexp}\nThis is similar to the standard C @code{frexp} function (@pxref{Normalization\nFunctions,,, libc, The GNU C Library Reference Manual}).\n@end deftypefun\n\n@deftypefun mpir_si mpz_get_2exp_d (double *@var{rop}, mpz_t @var{op})\nConvert @var{op} to a @code{double}, truncating if necessary (ie.@: rounding\ntowards zero), and returning the exponent separately.\n\nThis function has been added on Windows as an alternative to mpz_get_d_2exp\nto avoid problems that occur on 64-bit Windows systems where pointers to \nintegers point to 32-bit integer variables when MPIR expects to find space\nfor 64-bit integers.\n@end deftypefun\n\n@deftypefun {char *} mpz_get_str (char *@var{str}, int @var{base}, mpz_t @var{op})\nConvert @var{op} to a string of digits in base @var{base}.  The base may vary\nfrom 2 to 36 or from @minus{}2 to @minus{}36.\n\nFor @var{base} in the range 2..36, digits and lower-case letters are used; for\n@minus{}2..@minus{}36, digits and upper-case letters are used; for 37..62,\ndigits, upper-case letters, and lower-case letters (in that significance order)\nare used.\n\nIf @var{str} is @code{NULL}, the result string is allocated using the current\nallocation function (@pxref{Custom Allocation}).  The block will be\n@code{strlen(str)+1} bytes, that being exactly enough for the string and\nnull-terminator.\n\nIf @var{str} is not @code{NULL}, it should point to a block of storage large\nenough for the result, that being @code{mpz_sizeinbase (@var{op}, @var{base})\n+ 2}.  The two extra bytes are for a possible minus sign, and the\nnull-terminator.\n\nA pointer to the result string is returned, being either the allocated block,\nor the given @var{str}.\n@end deftypefun\n\n\n@need 2000\n@node Integer Arithmetic, Integer Division, Converting Integers, Integer Functions\n@comment  node-name,  next,  previous,  up\n@section Arithmetic Functions\n@cindex Integer arithmetic functions\n@cindex Arithmetic functions\n\n@deftypefun void mpz_add (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})\n@deftypefunx void mpz_add_ui (mpz_t @var{rop}, mpz_t @var{op1}, mpir_ui @var{op2})\nSet @var{rop} to @math{@var{op1} + @var{op2}}.\n@end deftypefun\n\n@deftypefun void mpz_sub (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})\n@deftypefunx void mpz_sub_ui (mpz_t @var{rop}, mpz_t @var{op1}, mpir_ui @var{op2})\n@deftypefunx void mpz_ui_sub (mpz_t @var{rop}, mpir_ui @var{op1}, mpz_t @var{op2})\nSet @var{rop} to @var{op1} @minus{} @var{op2}.\n@end deftypefun\n\n@deftypefun void mpz_mul (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})\n@deftypefunx void mpz_mul_si (mpz_t @var{rop}, mpz_t @var{op1}, mpir_si @var{op2})\n@deftypefunx void mpz_mul_ui (mpz_t @var{rop}, mpz_t @var{op1}, mpir_ui @var{op2})\nSet @var{rop} to @math{@var{op1} @GMPtimes{} @var{op2}}.\n@end deftypefun\n\n@deftypefun void mpz_addmul (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})\n@deftypefunx void mpz_addmul_ui (mpz_t @var{rop}, mpz_t @var{op1}, mpir_ui @var{op2})\nSet @var{rop} to @math{@var{rop} + @var{op1} @GMPtimes{} @var{op2}}.\n@end deftypefun\n\n@deftypefun void mpz_submul (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})\n@deftypefunx void mpz_submul_ui (mpz_t @var{rop}, mpz_t @var{op1}, mpir_ui @var{op2})\nSet @var{rop} to @math{@var{rop} - @var{op1} @GMPtimes{} @var{op2}}.\n@end deftypefun\n\n@deftypefun void mpz_mul_2exp (mpz_t @var{rop}, mpz_t @var{op1}, mp_bitcnt_t @var{op2})\n@cindex Bit shift left\nSet @var{rop} to @m{@var{op1} \\times 2^{op2}, @var{op1} times 2 raised to\n@var{op2}}.  This operation can also be defined as a left shift by @var{op2}\nbits.\n@end deftypefun\n\n@deftypefun void mpz_neg (mpz_t @var{rop}, mpz_t @var{op})\nSet @var{rop} to @minus{}@var{op}.\n@end deftypefun\n\n@deftypefun void mpz_abs (mpz_t @var{rop}, mpz_t @var{op})\nSet @var{rop} to the absolute value of @var{op}.\n@end deftypefun\n\n\n@need 2000\n@node Integer Division, Integer Exponentiation, Integer Arithmetic, Integer Functions\n@section Division Functions\n@cindex Integer division functions\n@cindex Division functions\n\nDivision is undefined if the divisor is zero.  Passing a zero divisor to the\ndivision or modulo functions (including the modular powering functions\n@code{mpz_powm} and @code{mpz_powm_ui}), will cause an intentional division by\nzero.  This lets a program handle arithmetic exceptions in these functions the\nsame way as for normal C @code{int} arithmetic.\n\n@c  Separate deftypefun groups for cdiv, fdiv and tdiv produce a blank line\n@c  between each, and seem to let tex do a better job of page breaks than an\n@c  @sp 1 in the middle of one big set.\n\n@deftypefun void mpz_cdiv_q (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})\n@deftypefunx void mpz_cdiv_r (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})\n@deftypefunx void mpz_cdiv_qr (mpz_t @var{q}, mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})\n@maybepagebreak\n@deftypefunx mpir_ui mpz_cdiv_q_ui (mpz_t @var{q}, mpz_t @var{n}, mpir_ui @var{d})\n@deftypefunx mpir_ui mpz_cdiv_r_ui (mpz_t @var{r}, mpz_t @var{n}, mpir_ui @var{d})\n@deftypefunx mpir_ui mpz_cdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{mpz_t @var{n}}, mpir_ui @var{d})\n@deftypefunx mpir_ui mpz_cdiv_ui (mpz_t @var{n}, mpir_ui @var{d})\n@maybepagebreak\n@deftypefunx void mpz_cdiv_q_2exp (mpz_t @var{q}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})\n@deftypefunx void mpz_cdiv_r_2exp (mpz_t @var{r}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})\n@end deftypefun\n\n@deftypefun void mpz_fdiv_q (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})\n@deftypefunx void mpz_fdiv_r (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})\n@deftypefunx void mpz_fdiv_qr (mpz_t @var{q}, mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})\n@maybepagebreak\n@deftypefunx mpir_ui mpz_fdiv_q_ui (mpz_t @var{q}, mpz_t @var{n}, mpir_ui @var{d})\n@deftypefunx mpir_ui mpz_fdiv_r_ui (mpz_t @var{r}, mpz_t @var{n}, mpir_ui @var{d})\n@deftypefunx mpir_ui mpz_fdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{mpz_t @var{n}}, mpir_ui @var{d})\n@deftypefunx mpir_ui mpz_fdiv_ui (mpz_t @var{n}, mpir_ui @var{d})\n@maybepagebreak\n@deftypefunx void mpz_fdiv_q_2exp (mpz_t @var{q}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})\n@deftypefunx void mpz_fdiv_r_2exp (mpz_t @var{r}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})\n@end deftypefun\n\n@deftypefun void mpz_tdiv_q (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})\n@deftypefunx void mpz_tdiv_r (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})\n@deftypefunx void mpz_tdiv_qr (mpz_t @var{q}, mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})\n@maybepagebreak\n@deftypefunx mpir_ui mpz_tdiv_q_ui (mpz_t @var{q}, mpz_t @var{n}, mpir_ui @var{d})\n@deftypefunx mpir_ui mpz_tdiv_r_ui (mpz_t @var{r}, mpz_t @var{n}, mpir_ui @var{d})\n@deftypefunx mpir_ui mpz_tdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{mpz_t @var{n}}, mpir_ui @var{d})\n@deftypefunx mpir_ui mpz_tdiv_ui (mpz_t @var{n}, mpir_ui @var{d})\n@maybepagebreak\n@deftypefunx void mpz_tdiv_q_2exp (mpz_t @var{q}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})\n@deftypefunx void mpz_tdiv_r_2exp (mpz_t @var{r}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})\n@cindex Bit shift right\n\n@sp 1\nDivide @var{n} by @var{d}, forming a quotient @var{q} and/or remainder\n@var{r}.  For the @code{2exp} functions, @m{@var{d}=2^b, @var{d}=2^@var{b}}.\nThe rounding is in three styles, each suiting different applications.\n\n@itemize @bullet\n@item\n@code{cdiv} rounds @var{q} up towards @m{+\\infty, +infinity}, and @var{r} will\nhave the opposite sign to @var{d}.  The @code{c} stands for ``ceil''.\n\n@item\n@code{fdiv} rounds @var{q} down towards @m{-\\infty, @minus{}infinity}, and\n@var{r} will have the same sign as @var{d}.  The @code{f} stands for\n``floor''.\n\n@item\n@code{tdiv} rounds @var{q} towards zero, and @var{r} will have the same sign\nas @var{n}.  The @code{t} stands for ``truncate''.\n@end itemize\n\nIn all cases @var{q} and @var{r} will satisfy\n@m{@var{n}=@var{q}@var{d}+@var{r}, @var{n}=@var{q}*@var{d}+@var{r}}, and\n@var{r} will satisfy @math{0@le{}@GMPabs{@var{r}}<@GMPabs{@var{d}}}.\n\nThe @code{q} functions calculate only the quotient, the @code{r} functions\nonly the remainder, and the @code{qr} functions calculate both.  Note that for\n@code{qr} the same variable cannot be passed for both @var{q} and @var{r}, or\nresults will be unpredictable.\n\nFor the @code{ui} variants the return value is the remainder, and in fact\nreturning the remainder is all the @code{div_ui} functions do.  For\n@code{tdiv} and @code{cdiv} the remainder can be negative, so for those the\nreturn value is the absolute value of the remainder.\n\nFor the @code{2exp} variants the divisor is @m{2^b,2^@var{b}}.  These\nfunctions are implemented as right shifts and bit masks, but of course they\nround the same as the other functions.\n\nFor positive @var{n} both @code{mpz_fdiv_q_2exp} and @code{mpz_tdiv_q_2exp}\nare simple bitwise right shifts.  For negative @var{n}, @code{mpz_fdiv_q_2exp}\nis effectively an arithmetic right shift treating @var{n} as twos complement\nthe same as the bitwise logical functions do, whereas @code{mpz_tdiv_q_2exp}\neffectively treats @var{n} as sign and magnitude.\n@end deftypefun\n\n@deftypefun void mpz_mod (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})\n@deftypefunx mpir_ui mpz_mod_ui (mpz_t @var{r}, mpz_t @var{n}, mpir_ui @var{d})\nSet @var{r} to @var{n} @code{mod} @var{d}.  The sign of the divisor is\nignored; the result is always non-negative.\n\n@code{mpz_mod_ui} is identical to @code{mpz_fdiv_r_ui} above, returning the\nremainder as well as setting @var{r}.  See @code{mpz_fdiv_ui} above if only\nthe return value is wanted.\n@end deftypefun\n\n@deftypefun void mpz_divexact (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})\n@deftypefunx void mpz_divexact_ui (mpz_t @var{q}, mpz_t @var{n}, mpir_ui @var{d})\n@cindex Exact division functions\nSet @var{q} to @var{n}/@var{d}.  These functions produce correct results only\nwhen it is known in advance that @var{d} divides @var{n}.\n\nThese routines are much faster than the other division functions, and are the\nbest choice when exact division is known to occur, for example reducing a\nrational to lowest terms.\n@end deftypefun\n\n@deftypefun int mpz_divisible_p (mpz_t @var{n}, mpz_t @var{d})\n@deftypefunx int mpz_divisible_ui_p (mpz_t @var{n}, mpir_ui @var{d})\n@deftypefunx int mpz_divisible_2exp_p (mpz_t @var{n}, mp_bitcnt_t @var{b})\n@cindex Divisibility functions\nReturn non-zero if @var{n} is exactly divisible by @var{d}, or in the case of\n@code{mpz_divisible_2exp_p} by @m{2^b,2^@var{b}}.\n\n@var{n} is divisible by @var{d} if there exists an integer @var{q} satisfying\n@math{@var{n} = @var{q}@GMPmultiply{}@var{d}}.  Unlike the other division\nfunctions, @math{@var{d}=0} is accepted and following the rule it can be seen\nthat only 0 is considered divisible by 0.\n@end deftypefun\n\n@deftypefun int mpz_congruent_p (mpz_t @var{n}, mpz_t @var{c}, mpz_t @var{d})\n@deftypefunx int mpz_congruent_ui_p (mpz_t @var{n}, mpir_ui @var{c}, mpir_ui @var{d})\n@deftypefunx int mpz_congruent_2exp_p (mpz_t @var{n}, mpz_t @var{c}, mp_bitcnt_t @var{b})\n@cindex Divisibility functions\n@cindex Congruence functions\nReturn non-zero if @var{n} is congruent to @var{c} modulo @var{d}, or in the\ncase of @code{mpz_congruent_2exp_p} modulo @m{2^b,2^@var{b}}.\n\n@var{n} is congruent to @var{c} mod @var{d} if there exists an integer @var{q}\nsatisfying @math{@var{n} = @var{c} + @var{q}@GMPmultiply{}@var{d}}.  Unlike\nthe other division functions, @math{@var{d}=0} is accepted and following the\nrule it can be seen that @var{n} and @var{c} are considered congruent mod 0\nonly when exactly equal.\n@end deftypefun\n\n\n@need 2000\n@node Integer Exponentiation, Integer Roots, Integer Division, Integer Functions\n@section Exponentiation Functions\n@cindex Integer exponentiation functions\n@cindex Exponentiation functions\n@cindex Powering functions\n\n@deftypefun void mpz_powm (mpz_t @var{rop}, mpz_t @var{base}, mpz_t @var{exp}, mpz_t @var{mod})\n@deftypefunx void mpz_powm_ui (mpz_t @var{rop}, mpz_t @var{base}, mpir_ui @var{exp}, mpz_t @var{mod})\nSet @var{rop} to @m{base^{exp} \\bmod mod, (@var{base} raised to @var{exp})\nmodulo @var{mod}}.\n\nA negative @var{exp} is supported in @code{mpz_powm} if an inverse \n@math{@var{base}^@W{-1} @bmod @var{mod}} exists (see @code{mpz_invert} in \n@ref{Number Theoretic Functions}).\nIf an inverse doesn't exist then a divide by zero is raised.\n@end deftypefun\n\n@deftypefun void mpz_pow_ui (mpz_t @var{rop}, mpz_t @var{base}, mpir_ui @var{exp})\n@deftypefunx void mpz_ui_pow_ui (mpz_t @var{rop}, mpir_ui @var{base}, mpir_ui @var{exp})\nSet @var{rop} to @m{base^{exp}, @var{base} raised to @var{exp}}.  The case\n@math{0^0} yields 1.\n@end deftypefun\n\n\n@need 2000\n@node Integer Roots, Number Theoretic Functions, Integer Exponentiation, Integer Functions\n@section Root Extraction Functions\n@cindex Integer root functions\n@cindex Root extraction functions\n\n@deftypefun int mpz_root (mpz_t @var{rop}, mpz_t @var{op}, mpir_ui @var{n})\nSet @var{rop} to @m{\\lfloor\\root n \\of {op}\\rfloor@C{},} the truncated integer\npart of the @var{n}th root of @var{op}.  Return non-zero if the computation\nwas exact, i.e., if @var{op} is @var{rop} to the @var{n}th power.\n@end deftypefun\n\n@deftypefun void mpz_nthroot (mpz_t @var{rop}, mpz_t @var{op}, mpir_ui @var{n})\nSet @var{rop} to @m{\\lfloor\\root n \\of {op}\\rfloor@C{},} the truncated integer\npart of the @var{n}th root of @var{op}.\n@end deftypefun\n\n@deftypefun void mpz_rootrem (mpz_t @var{root}, mpz_t @var{rem}, mpz_t @var{u}, mpir_ui @var{n})\nSet @var{root} to @m{\\lfloor\\root n \\of {u}\\rfloor@C{},} the truncated\ninteger part of the @var{n}th root of @var{u}.  Set @var{rem} to the\nremainder, @m{(@var{u} - @var{root}^n),\n@var{u}@minus{}@var{root}**@var{n}}.\n@end deftypefun\n\n@deftypefun void mpz_sqrt (mpz_t @var{rop}, mpz_t @var{op})\nSet @var{rop} to @m{\\lfloor\\sqrt{@var{op}}\\rfloor@C{},} the truncated\ninteger part of the square root of @var{op}.\n@end deftypefun\n\n@deftypefun void mpz_sqrtrem (mpz_t @var{rop1}, mpz_t @var{rop2}, mpz_t @var{op})\nSet @var{rop1} to @m{\\lfloor\\sqrt{@var{op}}\\rfloor, the truncated integer part\nof the square root of @var{op}}, like @code{mpz_sqrt}.  Set @var{rop2} to the\nremainder @m{(@var{op} - @var{rop1}^2),\n@var{op}@minus{}@var{rop1}*@var{rop1}}, which will be zero if @var{op} is a\nperfect square.\n\nIf @var{rop1} and @var{rop2} are the same variable, the results are\nundefined.\n@end deftypefun\n\n@deftypefun int mpz_perfect_power_p (mpz_t @var{op})\n@cindex Perfect power functions\n@cindex Root testing functions\nReturn non-zero if @var{op} is a perfect power, i.e., if there exist integers\n@m{a,@var{a}} and @m{b,@var{b}}, with @m{b>1, @var{b}>1}, such that\n@m{@var{op}=a^b, @var{op} equals @var{a} raised to the power @var{b}}.\n\nUnder this definition both 0 and 1 are considered to be perfect powers.\nNegative values of @var{op} are accepted, but of course can only be odd\nperfect powers.\n@end deftypefun\n\n@deftypefun int mpz_perfect_square_p (mpz_t @var{op})\n@cindex Perfect square functions\n@cindex Root testing functions\nReturn non-zero if @var{op} is a perfect square, i.e., if the square root of\n@var{op} is an integer.  Under this definition both 0 and 1 are considered to\nbe perfect squares.\n@end deftypefun\n\n\n@need 2000\n@node Number Theoretic Functions, Integer Comparisons, Integer Roots, Integer Functions\n@section Number Theoretic Functions\n@cindex Number theoretic functions\n\n@deftypefun int mpz_probable_prime_p (mpz_t @var{n}, gmp_randstate_t @var{state}, int @var{prob}, mpir_ui @var{div})\n@cindex Prime testing functions\n@cindex Probable prime testing functions\nDetermine whether @var{n} is a probable prime with the chance of error being at most 1 in 2^prob.\nreturn value is 1 if @var{n} is probably prime, or 0 if\n@var{n} is definitely composite.\n\nThis function does some trial divisions to speed up the average case, then some probabilistic\nprimality tests to achieve the desired level of error.\n\n@var{div} can be used to inform the function that trial division up to @var{div} has\nalready been performed on @var{n} and so @var{n} has NO divisors <= @var{div}.Use 0 to\ninform the function that no trial division has been done.\n\n@strong{This function interface is preliminary and may change in the future.}\n@end deftypefun\n\n@deftypefun int mpz_likely_prime_p (mpz_t @var{n}, gmp_randstate_t @var{state}, mpir_ui @var{div})\n@cindex Prime testing functions\n@cindex Probable prime testing functions\nDetermine whether @var{n} is likely a prime, i.e. you can consider it a prime for practical purposes.\nreturn value is 1 if @var{n} can be considered prime, or 0 if\n@var{n} is definitely composite.\n\nThis function does some trial divisions to speed up the average case, then some probabilistic\nprimality tests. The term ``likely'' refers to the fact that the number will not have small factors.\n\n@var{div} can be used to inform the function that trial division up to @var{div} has\nalready been performed on @var{n} and so @var{n} has NO divisors <= @var{div}\n\n@strong{This function interface is preliminary and may change in the future.}\n@end deftypefun\n\n@deftypefun int mpz_probab_prime_p (mpz_t @var{n}, int @var{reps})\n@cindex Prime testing functions\n@cindex Probable prime testing functions\nDetermine whether @var{n} is prime.  Return 2 if @var{n} is definitely prime,\nreturn 1 if @var{n} is probably prime (without being certain), or return 0 if\n@var{n} is definitely composite.\n\nThis function does some trial divisions, then some Miller-Rabin probabilistic\nprimality tests.  @var{reps} controls how many such tests are done, 5 to 10 is\na reasonable number, more will reduce the chances of a composite being\nreturned as ``probably prime''.\n\nMiller-Rabin and similar tests can be more properly called compositeness\ntests.  Numbers which fail are known to be composite but those which pass\nmight be prime or might be composite.  Only a few composites pass, hence those\nwhich pass are considered probably prime.\n\n@strong{This function is obsolete.  It will disappear from future MPIR releases.}\n@end deftypefun\n\n@deftypefun void mpz_nextprime (mpz_t @var{rop}, mpz_t @var{op})\n@cindex Next prime function\nSet @var{rop} to the next prime greater than @var{op}.\n\nThis function uses a probabilistic algorithm to identify primes.  For\npractical purposes it's adequate, the chance of a composite passing will be\nextremely small. However, despite the name, it does not guarantee primality.\n\n@strong{This function is obsolete.  It will disappear from future MPIR releases.}\n@end deftypefun\n\n@deftypefun void mpz_next_prime_candidate (mpz_t @var{rop}, mpz_t @var{op}, gmp_randstate_t @var{state})\n@cindex Next candidate prime function\nSet @var{rop} to the next candidate prime greater than @var{op}. Note that this\nfunction will occasionally return composites. It is designed to give a quick \nmethod for generating numbers which do not have small prime factors (less than\n1000) and which pass a small number of rounds of Miller-Rabin (just two rounds).The test is designed for speed, assuming that a high quality followup test can \nthen be run to ensure primality. \n\nThe variable @var{state} must be initialized by calling one of the\n@code{gmp_randinit} functions (@ref{Random State Initialization})\nbefore invoking this function.\n@end deftypefun\n\n@deftypefun void mpz_gcd (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})\n@cindex Greatest common divisor functions\n@cindex GCD functions\nSet @var{rop} to the greatest common divisor of @var{op1} and @var{op2}.\nThe result is always positive even if one or both input operands\nare negative.\n@end deftypefun\n\n@deftypefun mpir_ui mpz_gcd_ui (mpz_t @var{rop}, mpz_t @var{op1}, mpir_ui @var{op2})\nCompute the greatest common divisor of @var{op1} and @var{op2}.  If\n@var{rop} is not @code{NULL}, store the result there.\n\nIf the result is small enough to fit in an @code{mpir_ui}, it is\nreturned.  If the result does not fit, 0 is returned, and the result is equal\nto the argument @var{op1}.  Note that the result will always fit if @var{op2}\nis non-zero.\n@end deftypefun\n\n@deftypefun void mpz_gcdext (mpz_t @var{g}, mpz_t @var{s}, mpz_t @var{t}, const mpz_t @var{a}, const mpz_t @var{b})\n@cindex Extended GCD\n@cindex GCD extended\nSet @var{g} to the greatest common divisor of @var{a} and @var{b}, and in\naddition set @var{s} and @var{t} to coefficients satisfying\n@math{@var{a}@GMPmultiply{}@var{s} + @var{b}@GMPmultiply{}@var{t} = @var{g}}.\nThe value in @var{g} is always positive, even if one or both of @var{a} and\n@var{b} are negative (or zero if both inputs are zero).  The values in @var{s}\nand @var{t} are chosen such that normally, @math{@GMPabs{@var{s}} <\n@GMPabs{@var{b}} / (2 @var{g})} and @math{@GMPabs{@var{t}} < @GMPabs{@var{a}}\n/ (2 @var{g})}, and these relations define @var{s} and @var{t} uniquely.  There\nare a few exceptional cases:\n\nIf @math{@GMPabs{@var{a}} = @GMPabs{@var{b}}}, then @math{@var{s} = 0},\n@math{@var{t} = sgn(@var{b})}.\n\nOtherwise, @math{@var{s} = sgn(@var{a})} if @math{@var{b} = 0} or\n@math{@GMPabs{@var{b}} = 2 @var{g}}, and @math{@var{t} = sgn(@var{b})} if\n@math{@var{a} = 0} or @math{@GMPabs{@var{a}} = 2 @var{g}}.\n\nIn all cases, @math{@var{s} = 0} if and only if @math{@var{g} =\n@GMPabs{@var{b}}}, i.e., if @var{b} divides @var{a} or @math{@var{a} = @var{b}\n= 0}.\n\nIf @var{t} is @code{NULL} then that value is not computed.\n@end deftypefun\n\n@deftypefun void mpz_lcm (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})\n@deftypefunx void mpz_lcm_ui (mpz_t @var{rop}, mpz_t @var{op1}, mpir_ui @var{op2})\n@cindex Least common multiple functions\n@cindex LCM functions\nSet @var{rop} to the least common multiple of @var{op1} and @var{op2}.\n@var{rop} is always positive, irrespective of the signs of @var{op1} and\n@var{op2}.  @var{rop} will be zero if either @var{op1} or @var{op2} is zero.\n@end deftypefun\n\n@deftypefun int mpz_invert (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})\n@cindex Modular inverse functions\n@cindex Inverse modulo functions\nCompute the inverse of @var{op1} modulo @var{op2} and put the result in\n@var{rop}.  If the inverse exists, the return value is non-zero and @var{rop}\nwill satisfy @math{0 @le{} @var{rop} < @var{op2}}.  If an inverse doesn't exist\nthe return value is zero and @var{rop} is undefined.\n@end deftypefun\n\n@deftypefun int mpz_jacobi (mpz_t @var{a}, mpz_t @var{b})\n@cindex Jacobi symbol functions\nCalculate the Jacobi symbol @m{\\left(a \\over b\\right),\n(@var{a}/@var{b})}.  This is defined only for @var{b} odd.\n@end deftypefun\n\n@deftypefun int mpz_legendre (mpz_t @var{a}, mpz_t @var{p})\n@cindex Legendre symbol functions\nCalculate the Legendre symbol @m{\\left(a \\over p\\right),\n(@var{a}/@var{p})}.  This is defined only for @var{p} an odd positive\nprime, and for such @var{p} it's identical to the Jacobi symbol.\n@end deftypefun\n\n@deftypefun int mpz_kronecker (mpz_t @var{a}, mpz_t @var{b})\n@deftypefunx int mpz_kronecker_si (mpz_t @var{a}, mpir_si @var{b})\n@deftypefunx int mpz_kronecker_ui (mpz_t @var{a}, mpir_ui @var{b})\n@deftypefunx int mpz_si_kronecker (mpir_si @var{a}, mpz_t @var{b})\n@deftypefunx int mpz_ui_kronecker (mpir_ui @var{a}, mpz_t @var{b})\n@cindex Kronecker symbol functions\nCalculate the Jacobi symbol @m{\\left(a \\over b\\right),\n(@var{a}/@var{b})} with the Kronecker extension @m{\\left(a \\over\n2\\right) = \\left(2 \\over a\\right), (a/2)=(2/a)} when @math{a} odd, or\n@m{\\left(a \\over 2\\right) = 0, (a/2)=0} when @math{a} even.\n\nWhen @var{b} is odd the Jacobi symbol and Kronecker symbol are\nidentical, so @code{mpz_kronecker_ui} etc can be used for mixed\nprecision Jacobi symbols too.\n\nFor more information see Henri Cohen section 1.4.2 (@pxref{References}),\nor any number theory textbook.  See also the example program\n@file{demos/qcn.c} which uses @code{mpz_kronecker_ui} on the MPIR website.\n@end deftypefun\n\n@deftypefun {mp_bitcnt_t} mpz_remove (mpz_t @var{rop}, mpz_t @var{op}, mpz_t @var{f})\n@cindex Remove factor functions\n@cindex Factor removal functions\nRemove all occurrences of the factor @var{f} from @var{op} and store the\nresult in @var{rop}.  The return value is how many such occurrences were\nremoved.\n@end deftypefun\n\n@deftypefun void mpz_fac_ui (mpz_t @var{rop}, unsigned long int @var{n})\n@deftypefunx void mpz_2fac_ui (mpz_t @var{rop}, unsigned long int @var{n})\n@deftypefunx void mpz_mfac_uiui (mpz_t @var{rop}, unsigned long int @var{n}, unsigned long int @var{m})\n@cindex Factorial functions\nSet @var{rop} to the factorial of @var{n}: @code{mpz_fac_ui} computes the plain factorial @var{n}!,\n@code{mpz_2fac_ui} computes the double-factorial @var{n}!!, and @code{mpz_mfac_uiui} the\n@var{m}-multi-factorial @m{n!^{(m)}, @var{n}!^(@var{m})}.\n@end deftypefun\n\n@deftypefun void mpz_primorial_ui (mpz_t @var{rop}, unsigned long int @var{n})\n@cindex Primorial functions\nSet @var{rop} to the primorial of @var{n}, i.e. the product of all positive\nprime numbers @math{@le{}@var{n}}.\n@end deftypefun\n\n@deftypefun void mpz_bin_ui (mpz_t @var{rop}, mpz_t @var{n}, mpir_ui @var{k})\n@deftypefunx void mpz_bin_uiui (mpz_t @var{rop}, mpir_ui @var{n}, mpir_ui @var{k})\n@cindex Binomial coefficient functions\nCompute the binomial coefficient @m{\\left({n}\\atop{k}\\right), @var{n} over\n@var{k}} and store the result in @var{rop}.  Negative values of @var{n} are\nsupported by @code{mpz_bin_ui}, using the identity\n@m{\\left({-n}\\atop{k}\\right) = (-1)^k \\left({n+k-1}\\atop{k}\\right),\nbin(-n@C{}k) = (-1)^k * bin(n+k-1@C{}k)}, see Knuth volume 1 section 1.2.6\npart G.\n@end deftypefun\n\n@deftypefun void mpz_fib_ui (mpz_t @var{fn}, mpir_ui @var{n})\n@deftypefunx void mpz_fib2_ui (mpz_t @var{fn}, mpz_t @var{fnsub1}, mpir_ui @var{n})\n@cindex Fibonacci sequence functions\n@code{mpz_fib_ui} sets @var{fn} to to @m{F_n,F[n]}, the @var{n}'th Fibonacci\nnumber.  @code{mpz_fib2_ui} sets @var{fn} to @m{F_n,F[n]}, and @var{fnsub1} to\n@m{F_{n-1},F[n-1]}.\n\nThese functions are designed for calculating isolated Fibonacci numbers.  When\na sequence of values is wanted it's best to start with @code{mpz_fib2_ui} and\niterate the defining @m{F_{n+1} = F_n + F_{n-1}, F[n+1]=F[n]+F[n-1]} or\nsimilar.\n@end deftypefun\n\n@deftypefun void mpz_lucnum_ui (mpz_t @var{ln}, mpir_ui @var{n})\n@deftypefunx void mpz_lucnum2_ui (mpz_t @var{ln}, mpz_t @var{lnsub1}, mpir_ui @var{n})\n@cindex Lucas number functions\n@code{mpz_lucnum_ui} sets @var{ln} to to @m{L_n,L[n]}, the @var{n}'th Lucas\nnumber.  @code{mpz_lucnum2_ui} sets @var{ln} to @m{L_n,L[n]}, and @var{lnsub1}\nto @m{L_{n-1},L[n-1]}.\n\nThese functions are designed for calculating isolated Lucas numbers.  When a\nsequence of values is wanted it's best to start with @code{mpz_lucnum2_ui} and\niterate the defining @m{L_{n+1} = L_n + L_{n-1}, L[n+1]=L[n]+L[n-1]} or\nsimilar.\n\nThe Fibonacci numbers and Lucas numbers are related sequences, so it's never\nnecessary to call both @code{mpz_fib2_ui} and @code{mpz_lucnum2_ui}.  The\nformulas for going from Fibonacci to Lucas can be found in @ref{Lucas Numbers\nAlgorithm}, the reverse is straightforward too.\n@end deftypefun\n\n\n@node Integer Comparisons, Integer Logic and Bit Fiddling, Number Theoretic Functions, Integer Functions\n@comment  node-name,  next,  previous,  up\n@section Comparison Functions\n@cindex Integer comparison functions\n@cindex Comparison functions\n\n@deftypefn Function int mpz_cmp (mpz_t @var{op1}, mpz_t @var{op2})\n@deftypefnx Function int mpz_cmp_d (mpz_t @var{op1}, double @var{op2})\n@deftypefnx Macro int mpz_cmp_si (mpz_t @var{op1}, mpir_si @var{op2})\n@deftypefnx Macro int mpz_cmp_ui (mpz_t @var{op1}, mpir_ui @var{op2})\nCompare @var{op1} and @var{op2}.  Return a positive value if @math{@var{op1} >\n@var{op2}}, zero if @math{@var{op1} = @var{op2}}, or a negative value if\n@math{@var{op1} < @var{op2}}.\n\n@code{mpz_cmp_ui} and @code{mpz_cmp_si} are macros and will evaluate their\narguments more than once.  @code{mpz_cmp_d} can be called with an infinity,\nbut results are undefined for a NaN.\n@end deftypefn\n\n@deftypefn Function int mpz_cmpabs (mpz_t @var{op1}, mpz_t @var{op2})\n@deftypefnx Function int mpz_cmpabs_d (mpz_t @var{op1}, double @var{op2})\n@deftypefnx Function int mpz_cmpabs_ui (mpz_t @var{op1}, mpir_ui @var{op2})\nCompare the absolute values of @var{op1} and @var{op2}.  Return a positive\nvalue if @math{@GMPabs{@var{op1}} > @GMPabs{@var{op2}}}, zero if\n@math{@GMPabs{@var{op1}} = @GMPabs{@var{op2}}}, or a negative value if\n@math{@GMPabs{@var{op1}} < @GMPabs{@var{op2}}}.\n\n@code{mpz_cmpabs_d} can be called with an infinity, but results are undefined\nfor a NaN.\n@end deftypefn\n\n@deftypefn Macro int mpz_sgn (mpz_t @var{op})\n@cindex Sign tests\n@cindex Integer sign tests\nReturn @math{+1} if @math{@var{op} > 0}, 0 if @math{@var{op} = 0}, and\n@math{-1} if @math{@var{op} < 0}.\n\nThis function is actually implemented as a macro.  It evaluates its argument\nmultiple times.\n@end deftypefn\n\n\n@node Integer Logic and Bit Fiddling, I/O of Integers, Integer Comparisons, Integer Functions\n@comment  node-name,  next,  previous,  up\n@section Logical and Bit Manipulation Functions\n@cindex Logical functions\n@cindex Bit manipulation functions\n@cindex Integer logical functions\n@cindex Integer bit manipulation functions\n\nThese functions behave as if twos complement arithmetic were used (although\nsign-magnitude is the actual implementation).  The least significant bit is\nnumber 0.\n\n@deftypefun void mpz_and (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})\nSet @var{rop} to @var{op1} bitwise-and @var{op2}.\n@end deftypefun\n\n@deftypefun void mpz_ior (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})\nSet @var{rop} to @var{op1} bitwise inclusive-or @var{op2}.\n@end deftypefun\n\n@deftypefun void mpz_xor (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})\nSet @var{rop} to @var{op1} bitwise exclusive-or @var{op2}.\n@end deftypefun\n\n@deftypefun void mpz_com (mpz_t @var{rop}, mpz_t @var{op})\nSet @var{rop} to the one's complement of @var{op}.\n@end deftypefun\n\n@deftypefun {mp_bitcnt_t} mpz_popcount (mpz_t @var{op})\nIf @math{@var{op}@ge{}0}, return the population count of @var{op}, which is\nthe number of 1 bits in the binary representation.  If @math{@var{op}<0}, the\nnumber of 1s is infinite, and the return value is @var{ULONG_MAX}, the largest\npossible @code{mp_bitcnt_t}.\n@end deftypefun\n\n@deftypefun {mp_bitcnt_t} mpz_hamdist (mpz_t @var{op1}, mpz_t @var{op2})\nIf @var{op1} and @var{op2} are both @math{@ge{}0} or both @math{<0}, return\nthe hamming distance between the two operands, which is the number of bit\npositions where @var{op1} and @var{op2} have different bit values.  If one\noperand is @math{@ge{}0} and the other @math{<0} then the number of bits\ndifferent is infinite, and the return value is the largest\npossible @code{imp_bitcnt_t}.\n@end deftypefun\n\n@deftypefun {mp_bitcnt_t} mpz_scan0 (mpz_t @var{op}, mp_bitcnt_t @var{starting_bit})\n@deftypefunx mp_bitcnt_t mpz_scan1 (mpz_t @var{op}, mp_bitcnt_t @var{starting_bit})\n@cindex Bit scanning functions\n@cindex Scan bit functions\nScan @var{op}, starting from bit @var{starting_bit}, towards more significant\nbits, until the first 0 or 1 bit (respectively) is found.  Return the index of\nthe found bit.\n\nIf the bit at @var{starting_bit} is already what's sought, then\n@var{starting_bit} is returned.\n\nIf there's no bit found, then the largest possible @code{mp_bitcnt_t} is \nreturned.  This will happen in @code{mpz_scan0} past the end of a positive \nnumber, or @code{mpz_scan1} past the end of a nonnegative number.\n@end deftypefun\n\n@deftypefun void mpz_setbit (mpz_t @var{rop}, mp_bitcnt_t @var{bit_index})\nSet bit @var{bit_index} in @var{rop}.\n@end deftypefun\n\n@deftypefun void mpz_clrbit (mpz_t @var{rop}, mp_bitcnt_t @var{bit_index})\nClear bit @var{bit_index} in @var{rop}.\n@end deftypefun\n\n@deftypefun void mpz_combit (mpz_t @var{rop}, mp_bitcnt_t @var{bit_index})\nComplement bit @var{bit_index} in @var{rop}.\n@end deftypefun\n\n@deftypefun int mpz_tstbit (mpz_t @var{op}, mp_bitcnt_t @var{bit_index})\nTest bit @var{bit_index} in @var{op} and return 0 or 1 accordingly.\n@end deftypefun\n\n@node I/O of Integers, Integer Random Numbers, Integer Logic and Bit Fiddling, Integer Functions\n@comment  node-name,  next,  previous,  up\n@section Input and Output Functions\n@cindex Integer input and output functions\n@cindex Input functions\n@cindex Output functions\n@cindex I/O functions\n\nFunctions that perform input from a stdio stream, and functions that output to\na stdio stream.  Passing a @code{NULL} pointer for a @var{stream} argument to any of\nthese functions will make them read from @code{stdin} and write to\n@code{stdout}, respectively.\n\nWhen using any of these functions, it is a good idea to include @file{stdio.h}\nbefore @file{mpir.h}, since that will allow @file{mpir.h} to define prototypes\nfor these functions.\n\n@deftypefun size_t mpz_out_str (FILE *@var{stream}, int @var{base}, mpz_t @var{op})\nOutput @var{op} on stdio stream @var{stream}, as a string of digits in base\n@var{base}.  The base argument may vary from 2 to 62 or from @minus{}2 to\n@minus{}36.\n\nFor @var{base} in the range 2..36, digits and lower-case letters are used; for\n@minus{}2..@minus{}36, digits and upper-case letters are used; for 37..62,\ndigits, upper-case letters, and lower-case letters (in that significance order)\nare used.\n\nReturn the number of bytes written, or if an error occurred, return 0.\n@end deftypefun\n\n@deftypefun size_t mpz_inp_str (mpz_t @var{rop}, FILE *@var{stream}, int @var{base})\nInput a possibly white-space preceded string in base @var{base} from stdio\nstream @var{stream}, and put the read integer in @var{rop}.\n\nThe @var{base} may vary from 2 to 62, or if @var{base} is 0, then the leading\ncharacters are used: @code{0x} and @code{0X} for hexadecimal, @code{0b} and\n@code{0B} for binary, @code{0} for octal, or decimal otherwise.\n\nFor bases up to 36, case is ignored; upper-case and lower-case letters have\nthe same value.  For bases 37 to 62, upper-case letter represent the usual\n10..35 while lower-case letter represent 36..61.\n\nReturn the number of bytes read, or if an error occurred, return 0.\n@end deftypefun\n\n@deftypefun size_t mpz_out_raw (FILE *@var{stream}, mpz_t @var{op})\nOutput @var{op} on stdio stream @var{stream}, in raw binary format.  The\ninteger is written in a portable format, with 4 bytes of size information, and\nthat many bytes of limbs.  Both the size and the limbs are written in\ndecreasing significance order (i.e., in big-endian).\n\nThe output can be read with @code{mpz_inp_raw}.\n\nReturn the number of bytes written, or if an error occurred, return 0.\n\nThe output of this can not be read by @code{mpz_inp_raw} from GMP 1, because\nof changes necessary for compatibility between 32-bit and 64-bit machines.\n@end deftypefun\n\n@deftypefun size_t mpz_inp_raw (mpz_t @var{rop}, FILE *@var{stream})\nInput from stdio stream @var{stream} in the format written by\n@code{mpz_out_raw}, and put the result in @var{rop}.  Return the number of\nbytes read, or if an error occurred, return 0.\n\nThis routine can read the output from @code{mpz_out_raw} also from GMP 1, in\nspite of changes necessary for compatibility between 32-bit and 64-bit\nmachines.\n@end deftypefun\n\n\n@need 2000\n@node Integer Random Numbers, Integer Import and Export, I/O of Integers, Integer Functions\n@comment  node-name,  next,  previous,  up\n@section Random Number Functions\n@cindex Integer random number functions\n@cindex Random number functions\n\nThe random number functions of MPIR come in two groups; older function\nthat rely on a global state, and newer functions that accept a state\nparameter that is read and modified.  Please see the @ref{Random Number\nFunctions} for more information on how to use and not to use random\nnumber functions.\n\n@deftypefun void mpz_urandomb (mpz_t @var{rop}, gmp_randstate_t @var{state}, mp_bitcnt_t @var{n})\nGenerate a uniformly distributed random integer in the range 0 to @m{2^n-1,\n2^@var{n}@minus{}1}, inclusive.\n\nThe variable @var{state} must be initialized by calling one of the\n@code{gmp_randinit} functions (@ref{Random State Initialization})\nbefore invoking this function.\n@end deftypefun\n\n@deftypefun void mpz_urandomm (mpz_t @var{rop}, gmp_randstate_t @var{state}, mpz_t @var{n})\nGenerate a uniform random integer in the range 0 to @math{@var{n}-1},\ninclusive.\n\nThe variable @var{state} must be initialized by calling one of the\n@code{gmp_randinit} functions (@ref{Random State Initialization})\nbefore invoking this function.\n@end deftypefun\n\n@deftypefun void mpz_rrandomb (mpz_t @var{rop}, gmp_randstate_t @var{state}, mp_bitcnt_t @var{n})\nGenerate a random integer with long strings of zeros and ones in the\nbinary representation.  Useful for testing functions and algorithms,\nsince this kind of random numbers have proven to be more likely to\ntrigger corner-case bugs.  The random number will be in the range\n0 to @m{2^n-1, 2^@var{n}@minus{}1}, inclusive.\n\nThe variable @var{state} must be initialized by calling one of the\n@code{gmp_randinit} functions (@ref{Random State Initialization})\nbefore invoking this function.\n@end deftypefun\n\n@node Integer Import and Export, Miscellaneous Integer Functions, Integer Random Numbers, Integer Functions\n@section Integer Import and Export\n\n@code{mpz_t} variables can be converted to and from arbitrary words of binary\ndata with the following functions.\n\n@deftypefun void mpz_import (mpz_t @var{rop}, size_t @var{count}, int @var{order}, size_t @var{size}, int @var{endian}, size_t @var{nails}, const void *@var{op})\n@cindex Integer import\n@cindex Import\nSet @var{rop} from an array of word data at @var{op}.\n\nThe parameters specify the format of the data.  @var{count} many words are\nread, each @var{size} bytes.  @var{order} can be 1 for most significant word\nfirst or -1 for least significant first.  Within each word @var{endian} can be\n1 for most significant byte first, -1 for least significant first, or 0 for\nthe native endianness of the host CPU@.  The most significant @var{nails} bits\nof each word are skipped, this can be 0 to use the full words.\n\nThere is no sign taken from the data, @var{rop} will simply be a positive\ninteger.  An application can handle any sign itself, and apply it for instance\nwith @code{mpz_neg}.\n\nThere are no data alignment restrictions on @var{op}, any address is allowed.\n\nHere's an example converting an array of @code{mpir_ui} data, most\nsignificant element first, and host byte order within each value.\n\n@example\nmpir_ui  a[20];\nmpz_t          z;\nmpz_import (z, 20, 1, sizeof(a[0]), 0, 0, a);\n@end example\n\nThis example assumes the full @code{sizeof} bytes are used for data in the\ngiven type, which is usually true, and certainly true for @code{mpir_ui}\neverywhere we know of.  However on Cray vector systems it may be noted that\n@code{short} and @code{int} are always stored in 8 bytes (and with\n@code{sizeof} indicating that) but use only 32 or 46 bits.  The @var{nails}\nfeature can account for this, by passing for instance\n@code{8*sizeof(int)-INT_BIT}.\n@end deftypefun\n\n@deftypefun {void *} mpz_export (void *@var{rop}, size_t *@var{countp}, int @var{order}, size_t @var{size}, int @var{endian}, size_t @var{nails}, mpz_t @var{op})\n@cindex Integer export\n@cindex Export\nFill @var{rop} with word data from @var{op}.\n\nThe parameters specify the format of the data produced.  Each word will be\n@var{size} bytes and @var{order} can be 1 for most significant word first or\n-1 for least significant first.  Within each word @var{endian} can be 1 for\nmost significant byte first, -1 for least significant first, or 0 for the\nnative endianness of the host CPU@.  The most significant @var{nails} bits of\neach word are unused and set to zero, this can be 0 to produce full words.\n\nThe number of words produced is written to @code{*@var{countp}}, or\n@var{countp} can be @code{NULL} to discard the count.  @var{rop} must have\nenough space for the data, or if @var{rop} is @code{NULL} then a result array\nof the necessary size is allocated using the current MPIR allocation function\n(@pxref{Custom Allocation}).  In either case the return value is the\ndestination used, either @var{rop} or the allocated block.\n\nIf @var{op} is non-zero then the most significant word produced will be\nnon-zero.  If @var{op} is zero then the count returned will be zero and\nnothing written to @var{rop}.  If @var{rop} is @code{NULL} in this case, no\nblock is allocated, just @code{NULL} is returned.\n\nThe sign of @var{op} is ignored, just the absolute value is exported.  An\napplication can use @code{mpz_sgn} to get the sign and handle it as desired.\n(@pxref{Integer Comparisons})\n\nThere are no data alignment restrictions on @var{rop}, any address is allowed.\n\nWhen an application is allocating space itself the required size can be\ndetermined with a calculation like the following.  Since @code{mpz_sizeinbase}\nalways returns at least 1, @code{count} here will be at least one, which\navoids any portability problems with @code{malloc(0)}, though if @code{z} is\nzero no space at all is actually needed (or written).\n\n@example\nnumb = 8*size - nail;\ncount = (mpz_sizeinbase (z, 2) + numb-1) / numb;\np = malloc (count * size);\n@end example\n@end deftypefun\n\n\n@need 2000\n@node Miscellaneous Integer Functions, Integer Special Functions, Integer Import and Export, Integer Functions\n@comment  node-name,  next,  previous,  up\n@section Miscellaneous Functions\n@cindex Miscellaneous integer functions\n@cindex Integer miscellaneous functions\n\n@deftypefun int mpz_fits_ulong_p (mpz_t @var{op})\n@deftypefunx int mpz_fits_slong_p (mpz_t @var{op})\n@deftypefunx int mpz_fits_uint_p (mpz_t @var{op})\n@deftypefunx int mpz_fits_sint_p (mpz_t @var{op})\n@deftypefunx int mpz_fits_ushort_p (mpz_t @var{op})\n@deftypefunx int mpz_fits_sshort_p (mpz_t @var{op})\nReturn non-zero iff the value of @var{op} fits in an @code{unsigned long},\n@code{long}, @code{unsigned int}, @code{signed int}, @code{unsigned\nshort int}, or @code{signed short int}, respectively.  Otherwise, return zero.\n@end deftypefun\n\n@deftypefn Macro int mpz_odd_p (mpz_t @var{op})\n@deftypefnx Macro int mpz_even_p (mpz_t @var{op})\nDetermine whether @var{op} is odd or even, respectively.  Return non-zero if\nyes, zero if no.  These macros evaluate their argument more than once.\n@end deftypefn\n\n@deftypefun size_t mpz_sizeinbase (mpz_t @var{op}, int @var{base})\n@cindex Size in digits\n@cindex Digits in an integer\nReturn the size of @var{op} measured in number of digits in the given\n@var{base}.  @var{base} can vary from 2 to 36.  The sign of @var{op} is\nignored, just the absolute value is used.  The result will be either exact or\n1 too big.  If @var{base} is a power of 2, the result is always exact.  If\n@var{op} is zero the return value is always 1.\n\nThis function can be used to determine the space required when converting\n@var{op} to a string.  The right amount of allocation is normally two more\nthan the value returned by @code{mpz_sizeinbase}, one extra for a minus sign\nand one for the null-terminator.\n\n@cindex Most significant bit\nIt will be noted that @code{mpz_sizeinbase(@var{op},2)} can be used to locate\nthe most significant 1 bit in @var{op}, counting from 1.  (Unlike the bitwise\nfunctions which start from 0, @xref{Integer Logic and Bit Fiddling,, Logical\nand Bit Manipulation Functions}.)\n@end deftypefun\n\n\n@node Integer Special Functions,  , Miscellaneous Integer Functions, Integer Functions\n@section Special Functions\n@cindex Special integer functions\n@cindex Integer special functions\n\nThe functions in this section are for various special purposes.  Most\napplications will not need them.\n\n@deftypefun void mpz_array_init (mpz_t @var{integer_array}, size_t @var{array_size}, @w{mp_size_t @var{fixed_num_bits}})\nThis is a special type of initialization.  @strong{Fixed} space of\n@var{fixed_num_bits} is allocated to each of the @var{array_size} integers in\n@var{integer_array}.  There is no way to free the storage allocated by this\nfunction.  Don't call @code{mpz_clear}!\n\nThe @var{integer_array} parameter is the first @code{mpz_t} in the array.  For\nexample,\n\n@example\nmpz_t  arr[20000];\nmpz_array_init (arr[0], 20000, 512);\n@end example\n\n@c  In case anyone's wondering, yes this parameter style is a bit anomalous,\n@c  it'd probably be nicer if it was \"arr\" instead of \"arr[0]\".  Obviously the\n@c  two differ only in the declaration, not the pointer value, but changing is\n@c  not possible since it'd provoke warnings or errors in existing sources.\n\nThis function is only intended for programs that create a large number\nof integers and need to reduce memory usage by avoiding the overheads of\nallocating and reallocating lots of small blocks.  In normal programs this\nfunction is not recommended.\n\nThe space allocated to each integer by this function will not be automatically\nincreased, unlike the normal @code{mpz_init}, so an application must ensure it\nis sufficient for any value stored.  The following space requirements apply to\nvarious routines,\n\n@itemize @bullet\n@item\n@code{mpz_abs}, @code{mpz_neg}, @code{mpz_set}, @code{mpz_set_si} and\n@code{mpz_set_ui} need room for the value they store.\n\n@item\n@code{mpz_add}, @code{mpz_add_ui}, @code{mpz_sub} and @code{mpz_sub_ui} need\nroom for the larger of the two operands, plus an extra\n@code{mp_bits_per_limb}.\n\n@item\n@code{mpz_mul}, @code{mpz_mul_ui} and @code{mpz_mul_ui} need room for the sum\nof the number of bits in their operands, but each rounded up to a multiple of\n@code{mp_bits_per_limb}.\n\n@item\n@code{mpz_swap} can be used between two array variables, but not between an\narray and a normal variable.\n@end itemize\n\nFor other functions, or if in doubt, the suggestion is to calculate in a\nregular @code{mpz_init} variable and copy the result to an array variable with\n@code{mpz_set}.\n\n@strong{This function is obsolete.  It will disappear from future MPIR releases.}\n@end deftypefun\n\n@deftypefun {void *} _mpz_realloc (mpz_t @var{integer}, mp_size_t @var{new_alloc})\nChange the space for @var{integer} to @var{new_alloc} limbs.  The value in\n@var{integer} is preserved if it fits, or is set to 0 if not.  The return\nvalue is not useful to applications and should be ignored.\n\n@code{mpz_realloc2} is the preferred way to accomplish allocation changes like\nthis.  @code{mpz_realloc2} and @code{_mpz_realloc} are the same except that\n@code{_mpz_realloc} takes its size in limbs.\n@end deftypefun\n\n@deftypefun mp_limb_t mpz_getlimbn (mpz_t @var{op}, mp_size_t @var{n})\nReturn limb number @var{n} from @var{op}.  The sign of @var{op} is ignored,\njust the absolute value is used.  The least significant limb is number 0.\n\n@code{mpz_size} can be used to find how many limbs make up @var{op}.\n@code{mpz_getlimbn} returns zero if @var{n} is outside the range 0 to\n@code{mpz_size(@var{op})-1}.\n@end deftypefun\n\n@deftypefun size_t mpz_size (mpz_t @var{op})\nReturn the size of @var{op} measured in number of limbs.  If @var{op} is zero,\nthe returned value will be zero.\n@c (@xref{Nomenclature}, for an explanation of the concept @dfn{limb}.)\n@end deftypefun\n\n@deftypefun {const mp_limb_t *} mpz_limbs_read (const mpz_t @var{x})\nReturn a pointer to the limb array representing the absolute value of @var{x}.\nThe size of the array is @code{mpz_size(@var{x})}. Intended for read access\nonly.\n@end deftypefun\n\n@deftypefun {mp_limb_t *} mpz_limbs_write (mpz_t @var{x}, mp_size_t @var{n})\n@deftypefunx {mp_limb_t *} mpz_limbs_modify (mpz_t @var{x}, mp_size_t @var{n})\nReturn a pointer to the limb array, intended for write access. The array is\nreallocated as needed, to make room for @var{n} limbs. Requires @math{@var{n}\n> 0}. The @code{mpz_limbs_modify} function returns an array that holds the old\nabsolute value of @var{x}, while @code{mpz_limbs_write} may destroy the old\nvalue and return an array with unspecified contents.\n@end deftypefun\n\n@deftypefun void mpz_limbs_finish (mpz_t @var{x}, mp_size_t @var{s})\nUpdates the internal size field of @var{x}. Used after writing to the limb\narray pointer returned by @code{mpz_limbs_write} or @code{mpz_limbs_modify} is\ncompleted. The array should contain @math{@GMPabs{@var{s}}} valid limbs,\nrepresenting the new absolute value for @var{x}, and the sign of @var{x} is\ntaken from the sign of @var{s}. This function never reallocates @var{x}, so\nthe limb pointer remains valid.\n@end deftypefun\n\n@c FIXME: Some more useful and less silly example?\n@example\nvoid foo (mpz_t x)\n@{\n  mp_size_t n, i;\n  mp_limb_t *xp;\n\n  n = mpz_size (x);\n  xp = mpz_limbs_modify (x, 2*n);\n  for (i = 0; i < n; i++)\n    xp[n+i] = xp[n-1-i];\n  mpz_limbs_finish (x, mpz_sgn (x) < 0 ? - 2*n : 2*n);\n@}\n@end example\n\n@deftypefun mpz_srcptr mpz_roinit_n (mpz_t @var{x}, const mp_limb_t *@var{xp}, mp_size_t @var{xs})\nSpecial initialization of @var{x}, using the given limb array and size.\n@var{x} should be treated as read-only: it can be passed safely as input to\nany mpz function, but not as an output. The array @var{xp} must point to at\nleast a readable limb, its size is\n@math{@GMPabs{@var{xs}}}, and the sign of @var{x} is the sign of @var{xs}. For\nconvenience, the function returns @var{x}, but cast to a const pointer type.\n@end deftypefun\n\n@example\nvoid foo (mpz_t x)\n@{\n  static const mp_limb_t y[3] = @{ 0x1, 0x2, 0x3 @};\n  mpz_t tmp;\n  mpz_add (x, x, mpz_roinit_n (tmp, y, 3));\n@}\n@end example\n\n@deftypefn Macro mpz_t MPZ_ROINIT_N (mp_limb_t *@var{xp}, mp_size_t @var{xs})\nThis macro expands to an initializer which can be assigned to an mpz_t\nvariable. The limb array @var{xp} must point to at least a readable limb,\nmoreover, unlike the @code{mpz_roinit_n} function, the array must be\nnormalized: if @var{xs} is non-zero, then\n@code{@var{xp}[@math{@GMPabs{@var{xs}}-1}]} must be non-zero. Intended\nprimarily for constant values. Using it for non-constant values requires a C\ncompiler supporting C99.\n@end deftypefn\n\n@example\nvoid foo (mpz_t x)\n@{\n  static const mp_limb_t ya[3] = @{ 0x1, 0x2, 0x3 @};\n  static const mpz_t y = MPZ_ROINIT_N ((mp_limb_t *) ya, 3);\n\n  mpz_add (x, x, y);\n@}\n@end example\n\n@node Rational Number Functions, Floating-point Functions, Integer Functions, Top\n@comment  node-name,  next,  previous,  up\n@chapter Rational Number Functions\n@cindex Rational number functions\n\nThis chapter describes the MPIR functions for performing arithmetic on rational\nnumbers.  These functions start with the prefix @code{mpq_}.\n\nRational numbers are stored in objects of type @code{mpq_t}.\n\nAll rational arithmetic functions assume operands have a canonical form, and\ncanonicalize their result.  The canonical from means that the denominator and\nthe numerator have no common factors, and that the denominator is positive.\nZero has the unique representation 0/1.\n\nPure assignment functions do not canonicalize the assigned variable.  It is\nthe responsibility of the user to canonicalize the assigned variable before\nany arithmetic operations are performed on that variable.\n\n@deftypefun void mpq_canonicalize (mpq_t @var{op})\nRemove any factors that are common to the numerator and denominator of\n@var{op}, and make the denominator positive.\n@end deftypefun\n\n@menu\n* Initializing Rationals::\n* Rational Conversions::\n* Rational Arithmetic::\n* Comparing Rationals::\n* Applying Integer Functions::\n* I/O of Rationals::\n@end menu\n\n@node Initializing Rationals, Rational Conversions, Rational Number Functions, Rational Number Functions\n@comment  node-name,  next,  previous,  up\n@section Initialization and Assignment Functions\n@cindex Rational assignment functions\n@cindex Assignment functions\n@cindex Rational initialization functions\n@cindex Initialization functions\n\n@deftypefun void mpq_init (mpq_t @var{dest_rational})\nInitialize @var{dest_rational} and set it to 0/1.  Each variable should\nnormally only be initialized once, or at least cleared out (using the function\n@code{mpq_clear}) between each initialization.\n@end deftypefun\n\n@deftypefun void mpq_inits (mpq_t @var{x}, ...)\nInitialize a NULL-terminated list of @code{mpq_t} variables, and set their\nvalues to 0/1.\n@end deftypefun\n\n@deftypefun void mpq_clear (mpq_t @var{rational_number})\nFree the space occupied by @var{rational_number}.  Make sure to call this\nfunction for all @code{mpq_t} variables when you are done with them.\n@end deftypefun\n\n@deftypefun void mpq_clears (mpq_t @var{x}, ...)\nFree the space occupied by a NULL-terminated list of @code{mpq_t} variables.\n@end deftypefun\n\n@deftypefun void mpq_set (mpq_t @var{rop}, mpq_t @var{op})\n@deftypefunx void mpq_set_z (mpq_t @var{rop}, mpz_t @var{op})\nAssign @var{rop} from @var{op}.\n@end deftypefun\n\n@deftypefun void mpq_set_ui (mpq_t @var{rop}, mpir_ui @var{op1}, mpir_ui @var{op2})\n@deftypefunx void mpq_set_si (mpq_t @var{rop}, mpir_si @var{op1}, mpir_ui @var{op2})\nSet the value of @var{rop} to @var{op1}/@var{op2}.  Note that if @var{op1} and\n@var{op2} have common factors, @var{rop} has to be passed to\n@code{mpq_canonicalize} before any operations are performed on @var{rop}.\n@end deftypefun\n\n@deftypefun int mpq_set_str (mpq_t @var{rop}, char *@var{str}, int @var{base})\nSet @var{rop} from a null-terminated string @var{str} in the given @var{base}.\n\nThe string can be an integer like ``41'' or a fraction like ``41/152''.  The\nfraction must be in canonical form (@pxref{Rational Number Functions}), or if\nnot then @code{mpq_canonicalize} must be called.\n\nThe numerator and optional denominator are parsed the same as in\n@code{mpz_set_str} (@pxref{Assigning Integers}).  White space is allowed in\nthe string, and is simply ignored.  The @var{base} can vary from 2 to 62, or\nif @var{base} is 0 then the leading characters are used: @code{0x} or @code{0X} for hex,\n@code{0b} or @code{0B} for binary,\n@code{0} for octal, or decimal otherwise.  Note that this is done separately\nfor the numerator and denominator, so for instance @code{0xEF/100} is 239/100,\nwhereas @code{0xEF/0x100} is 239/256.\n\nThe return value is 0 if the entire string is a valid number, or @minus{}1 if\nnot.\n@end deftypefun\n\n@deftypefun void mpq_swap (mpq_t @var{rop1}, mpq_t @var{rop2})\nSwap the values @var{rop1} and @var{rop2} efficiently.\n@end deftypefun\n\n\n@need 2000\n@node Rational Conversions, Rational Arithmetic, Initializing Rationals, Rational Number Functions\n@comment  node-name,  next,  previous,  up\n@section Conversion Functions\n@cindex Rational conversion functions\n@cindex Conversion functions\n\n@deftypefun double mpq_get_d (mpq_t @var{op})\nConvert @var{op} to a @code{double}, truncating if necessary (ie.@: rounding\ntowards zero).\n\nIf the exponent from the conversion is too big or too small to fit a\n@code{double} then the result is system dependent.  For too big an infinity is\nreturned when available.  For too small @math{0.0} is normally returned.\nHardware overflow, underflow and denorm traps may or may not occur.\n@end deftypefun\n\n@deftypefun void mpq_set_d (mpq_t @var{rop}, double @var{op})\n@deftypefunx void mpq_set_f (mpq_t @var{rop}, mpf_t @var{op})\nSet @var{rop} to the value of @var{op}.  There is no rounding, this conversion\nis exact.\n@end deftypefun\n\n@deftypefun {char *} mpq_get_str (char *@var{str}, int @var{base}, mpq_t @var{op})\nConvert @var{op} to a string of digits in base @var{base}.  The base may vary\nfrom 2 to 36.  The string will be of the form @samp{num/den}, or if the\ndenominator is 1 then just @samp{num}.\n\nIf @var{str} is @code{NULL}, the result string is allocated using the current\nallocation function (@pxref{Custom Allocation}).  The block will be\n@code{strlen(str)+1} bytes, that being exactly enough for the string and\nnull-terminator.\n\nIf @var{str} is not @code{NULL}, it should point to a block of storage large\nenough for the result, that being\n\n@example\nmpz_sizeinbase (mpq_numref(@var{op}), @var{base})\n+ mpz_sizeinbase (mpq_denref(@var{op}), @var{base}) + 3\n@end example\n\nThe three extra bytes are for a possible minus sign, possible slash, and the\nnull-terminator.\n\nA pointer to the result string is returned, being either the allocated block,\nor the given @var{str}.\n@end deftypefun\n\n\n@node Rational Arithmetic, Comparing Rationals, Rational Conversions, Rational Number Functions\n@comment  node-name,  next,  previous,  up\n@section Arithmetic Functions\n@cindex Rational arithmetic functions\n@cindex Arithmetic functions\n\n@deftypefun void mpq_add (mpq_t @var{sum}, mpq_t @var{addend1}, mpq_t @var{addend2})\nSet @var{sum} to @var{addend1} + @var{addend2}.\n@end deftypefun\n\n@deftypefun void mpq_sub (mpq_t @var{difference}, mpq_t @var{minuend}, mpq_t @var{subtrahend})\nSet @var{difference} to @var{minuend} @minus{} @var{subtrahend}.\n@end deftypefun\n\n@deftypefun void mpq_mul (mpq_t @var{product}, mpq_t @var{multiplier}, mpq_t @var{multiplicand})\nSet @var{product} to @math{@var{multiplier} @GMPtimes{} @var{multiplicand}}.\n@end deftypefun\n\n@deftypefun void mpq_mul_2exp (mpq_t @var{rop}, mpq_t @var{op1}, mp_bitcnt_t @var{op2})\nSet @var{rop} to @m{@var{op1} \\times 2^{op2}, @var{op1} times 2 raised to\n@var{op2}}.\n@end deftypefun\n\n@deftypefun void mpq_div (mpq_t @var{quotient}, mpq_t @var{dividend}, mpq_t @var{divisor})\n@cindex Division functions\nSet @var{quotient} to @var{dividend}/@var{divisor}.\n@end deftypefun\n\n@deftypefun void mpq_div_2exp (mpq_t @var{rop}, mpq_t @var{op1}, mp_bitcnt_t @var{op2})\nSet @var{rop} to @m{@var{op1}/2^{op2}, @var{op1} divided by 2 raised to\n@var{op2}}.\n@end deftypefun\n\n@deftypefun void mpq_neg (mpq_t @var{negated_operand}, mpq_t @var{operand})\nSet @var{negated_operand} to @minus{}@var{operand}.\n@end deftypefun\n\n@deftypefun void mpq_abs (mpq_t @var{rop}, mpq_t @var{op})\nSet @var{rop} to the absolute value of @var{op}.\n@end deftypefun\n\n@deftypefun void mpq_inv (mpq_t @var{inverted_number}, mpq_t @var{number})\nSet @var{inverted_number} to 1/@var{number}.  If the new denominator is\nzero, this routine will divide by zero.\n@end deftypefun\n\n@node Comparing Rationals, Applying Integer Functions, Rational Arithmetic, Rational Number Functions\n@comment  node-name,  next,  previous,  up\n@section Comparison Functions\n@cindex Rational comparison functions\n@cindex Comparison functions\n\n@deftypefun int mpq_cmp (mpq_t @var{op1}, mpq_t @var{op2})\n@deftypefunx int mpq_cmp_z (const mpq_t @var{op1}, const mpz_t @var{op2})\nCompare @var{op1} and @var{op2}.  Return a positive value if @math{@var{op1} >\n@var{op2}}, zero if @math{@var{op1} = @var{op2}}, and a negative value if\n@math{@var{op1} < @var{op2}}.\n\nTo determine if two rationals are equal, @code{mpq_equal} is faster than\n@code{mpq_cmp}.\n@end deftypefun\n\n@deftypefn Macro int mpq_cmp_ui (mpq_t @var{op1}, mpir_ui @var{num2}, mpir_ui @var{den2})\n@deftypefnx Macro int mpq_cmp_si (mpq_t @var{op1}, mpir_si @var{num2}, mpir_ui @var{den2})\nCompare @var{op1} and @var{num2}/@var{den2}.  Return a positive value if\n@math{@var{op1} > @var{num2}/@var{den2}}, zero if @math{@var{op1} =\n@var{num2}/@var{den2}}, and a negative value if @math{@var{op1} <\n@var{num2}/@var{den2}}.\n\n@var{num2} and @var{den2} are allowed to have common factors.\n\nThese functions are implemented as a macros and evaluate their arguments\nmultiple times.\n@end deftypefn\n\n@deftypefn Macro int mpq_sgn (mpq_t @var{op})\n@cindex Sign tests\n@cindex Rational sign tests\nReturn @math{+1} if @math{@var{op} > 0}, 0 if @math{@var{op} = 0}, and\n@math{-1} if @math{@var{op} < 0}.\n\nThis function is actually implemented as a macro.  It evaluates its\narguments multiple times.\n@end deftypefn\n\n@deftypefun int mpq_equal (mpq_t @var{op1}, mpq_t @var{op2})\nReturn non-zero if @var{op1} and @var{op2} are equal, zero if they are\nnon-equal.  Although @code{mpq_cmp} can be used for the same purpose, this\nfunction is much faster.\n@end deftypefun\n\n@node Applying Integer Functions, I/O of Rationals, Comparing Rationals, Rational Number Functions\n@comment  node-name,  next,  previous,  up\n@section Applying Integer Functions to Rationals\n@cindex Rational numerator and denominator\n@cindex Numerator and denominator\n\nThe set of @code{mpq} functions is quite small.  In particular, there are few\nfunctions for either input or output.  The following functions give direct\naccess to the numerator and denominator of an @code{mpq_t}.\n\nNote that if an assignment to the numerator and/or denominator could take an\n@code{mpq_t} out of the canonical form described at the start of this chapter\n(@pxref{Rational Number Functions}) then @code{mpq_canonicalize} must be\ncalled before any other @code{mpq} functions are applied to that @code{mpq_t}.\n\n@deftypefn Macro mpz_t mpq_numref (mpq_t @var{op})\n@deftypefnx Macro mpz_t mpq_denref (mpq_t @var{op})\nReturn a reference to the numerator and denominator of @var{op}, respectively.\nThe @code{mpz} functions can be used on the result of these macros.\n@end deftypefn\n\n@deftypefun void mpq_get_num (mpz_t @var{numerator}, mpq_t @var{rational})\n@deftypefunx void mpq_get_den (mpz_t @var{denominator}, mpq_t @var{rational})\n@deftypefunx void mpq_set_num (mpq_t @var{rational}, mpz_t @var{numerator})\n@deftypefunx void mpq_set_den (mpq_t @var{rational}, mpz_t @var{denominator})\nGet or set the numerator or denominator of a rational.  These functions are\nequivalent to calling @code{mpz_set} with an appropriate @code{mpq_numref} or\n@code{mpq_denref}.  Direct use of @code{mpq_numref} or @code{mpq_denref} is\nrecommended instead of these functions.\n@end deftypefun\n\n\n@need 2000\n@node I/O of Rationals,  , Applying Integer Functions, Rational Number Functions\n@comment  node-name,  next,  previous,  up\n@section Input and Output Functions\n@cindex Rational input and output functions\n@cindex Input functions\n@cindex Output functions\n@cindex I/O functions\n\nWhen using any of these functions, it's a good idea to include @file{stdio.h}\nbefore @file{mpir.h}, since that will allow @file{mpir.h} to define prototypes\nfor these functions.\n\nPassing a @code{NULL} pointer for a @var{stream} argument to any of these\nfunctions will make them read from @code{stdin} and write to @code{stdout},\nrespectively.\n\n@deftypefun size_t mpq_out_str (FILE *@var{stream}, int @var{base}, mpq_t @var{op})\nOutput @var{op} on stdio stream @var{stream}, as a string of digits in base\n@var{base}.  The base may vary from 2 to 36.  Output is in the form\n@samp{num/den} or if the denominator is 1 then just @samp{num}.\n\nReturn the number of bytes written, or if an error occurred, return 0.\n@end deftypefun\n\n@deftypefun size_t mpq_inp_str (mpq_t @var{rop}, FILE *@var{stream}, int @var{base})\nRead a string of digits from @var{stream} and convert them to a rational in\n@var{rop}.  Any initial white-space characters are read and discarded.  Return\nthe number of characters read (including white space), or 0 if a rational\ncould not be read.\n\nThe input can be a fraction like @samp{17/63} or just an integer like\n@samp{123}.  Reading stops at the first character not in this form, and white\nspace is not permitted within the string.  If the input might not be in\ncanonical form, then @code{mpq_canonicalize} must be called (@pxref{Rational\nNumber Functions}).\n\nThe @var{base} can be between 2 and 36, or can be 0 in which case the leading\ncharacters of the string determine the base, @samp{0x} or @samp{0X} for\nhexadecimal, @samp{0} for octal, or decimal otherwise.  The leading characters\nare examined separately for the numerator and denominator of a fraction, so\nfor instance @samp{0x10/11} is @math{16/11}, whereas @samp{0x10/0x11} is\n@math{16/17}.\n@end deftypefun\n\n\n@node Floating-point Functions, Low-level Functions, Rational Number Functions, Top\n@comment  node-name,  next,  previous,  up\n@chapter Floating-point Functions\n@cindex Floating-point functions\n@cindex Float functions\n@cindex User-defined precision\n@cindex Precision of floats\n\nMPIR floating point numbers are stored in objects of type @code{mpf_t} and\nfunctions operating on them have an @code{mpf_} prefix.\n\nThe mantissa of each float has a user-selectable precision, limited only by\navailable memory.  Each variable has its own precision, and that can be\nincreased or decreased at any time.\n\nThe exponent of each float is a fixed precision, one machine word on most\nsystems.  In the current implementation the exponent is a count of limbs, so\nfor example on a 32-bit system this means a range of roughly\n@math{2^@W{-68719476768}} to @math{2^@W{68719476736}}, or on a 64-bit system\nthis will be greater.  Note however @code{mpf_get_str} can only return an\nexponent which fits an @code{mp_exp_t} and currently @code{mpf_set_str}\ndoesn't accept exponents bigger than a @code{mpir_si}.\n\nEach variable keeps a size for the mantissa data actually in use.  This means\nthat if a float is exactly represented in only a few bits then only those bits\nwill be used in a calculation, even if the selected precision is high.\n\nAll calculations are performed to the precision of the destination variable.\nEach function is defined to calculate with ``infinite precision'' followed by\na truncation to the destination precision, but of course the work done is only\nwhat's needed to determine a result under that definition.\n\nThe precision selected for a variable is a minimum value, MPIR may increase it\na little to facilitate efficient calculation.  Currently this means rounding\nup to a whole limb, and then sometimes having a further partial limb,\ndepending on the high limb of the mantissa.  But applications shouldn't be\nconcerned by such details.\n\nThe mantissa in stored in binary, as might be imagined from the fact\nprecisions are expressed in bits.  One consequence of this is that decimal\nfractions like @math{0.1} cannot be represented exactly.  The same is true of\nplain IEEE @code{double} floats.  This makes both highly unsuitable for\ncalculations involving money or other values that should be exact decimal\nfractions.  (Suitably scaled integers, or perhaps rationals, are better\nchoices.)\n\n@code{mpf} functions and variables have no special notion of infinity or\nnot-a-number, and applications must take care not to overflow the exponent or\nresults will be unpredictable.  This might change in a future release.\n\nNote that the @code{mpf} functions are @emph{not} intended as a smooth\nextension to IEEE P754 arithmetic.  In particular results obtained on one\ncomputer often differ from the results on a computer with a different word\nsize.\n\n@menu\n* Initializing Floats::\n* Assigning Floats::\n* Simultaneous Float Init & Assign::\n* Converting Floats::\n* Float Arithmetic::\n* Float Comparison::\n* I/O of Floats::\n* Miscellaneous Float Functions::\n@end menu\n\n@node Initializing Floats, Assigning Floats, Floating-point Functions, Floating-point Functions\n@comment  node-name,  next,  previous,  up\n@section Initialization Functions\n@cindex Float initialization functions\n@cindex Initialization functions\n\n@deftypefun void mpf_set_default_prec (mp_bitcnt_t @var{prec})\nSet the default precision to be @strong{at least} @var{prec} bits.  All\nsubsequent calls to @code{mpf_init} will use this precision, but previously\ninitialized variables are unaffected.\n@end deftypefun\n\n@deftypefun {mp_bitcnt_t} mpf_get_default_prec (void)\nReturn the default precision actually used.\n@end deftypefun\n\nAn @code{mpf_t} object must be initialized before storing the first value in\nit.  The functions @code{mpf_init} and @code{mpf_init2} are used for that\npurpose.\n\n@deftypefun void mpf_init (mpf_t @var{x})\nInitialize @var{x} to 0.  Normally, a variable should be initialized once only\nor at least be cleared, using @code{mpf_clear}, between initializations.  The\nprecision of @var{x} is undefined unless a default precision has already been\nestablished by a call to @code{mpf_set_default_prec}.\n@end deftypefun\n\n@deftypefun void mpf_init2 (mpf_t @var{x}, mp_bitcnt_t @var{prec})\nInitialize @var{x} to 0 and set its precision to be @strong{at least}\n@var{prec} bits.  Normally, a variable should be initialized once only or at\nleast be cleared, using @code{mpf_clear}, between initializations.\n@end deftypefun\n\n@deftypefun void mpf_inits (mpf_t @var{x}, ...)\nInitialize a NULL-terminated list of @code{mpf_t} variables, and set their\nvalues to 0.  The precision of the initialized variables is undefined unless a\ndefault precision has already been established by a call to\n@code{mpf_set_default_prec}.\n@end deftypefun\n\n@deftypefun void mpf_clear (mpf_t @var{x})\nFree the space occupied by @var{x}.  Make sure to call this function for all\n@code{mpf_t} variables when you are done with them.\n@end deftypefun\n\n@deftypefun void mpf_clears (mpf_t @var{x}, ...)\nFree the space occupied by a NULL-terminated list of @code{mpf_t} variables.\n@end deftypefun\n\n@need 2000\nHere is an example on how to initialize floating-point variables:\n@example\n@{\n  mpf_t x, y;\n  mpf_init (x);           /* use default precision */\n  mpf_init2 (y, 256);     /* precision @emph{at least} 256 bits */\n  @dots{}\n  /* Unless the program is about to exit, do ... */\n  mpf_clear (x);\n  mpf_clear (y);\n@}\n@end example\n\nThe following three functions are useful for changing the precision during a\ncalculation.  A typical use would be for adjusting the precision gradually in\niterative algorithms like Newton-Raphson, making the computation precision\nclosely match the actual accurate part of the numbers.\n\n@deftypefun {mp_bitcnt_t} mpf_get_prec (mpf_t @var{op})\nReturn the current precision of @var{op}, in bits.\n@end deftypefun\n\n@deftypefun void mpf_set_prec (mpf_t @var{rop}, mp_bitcnt_t @var{prec})\nSet the precision of @var{rop} to be @strong{at least} @var{prec} bits.  The\nvalue in @var{rop} will be truncated to the new precision.\n\nThis function requires a call to @code{realloc}, and so should not be used in\na tight loop.\n@end deftypefun\n\n@deftypefun void mpf_set_prec_raw (mpf_t @var{rop}, mp_bitcnt_t @var{prec})\nSet the precision of @var{rop} to be @strong{at least} @var{prec} bits,\nwithout changing the memory allocated.\n\n@var{prec} must be no more than the allocated precision for @var{rop}, that\nbeing the precision when @var{rop} was initialized, or in the most recent\n@code{mpf_set_prec}.\n\nThe value in @var{rop} is unchanged, and in particular if it had a higher\nprecision than @var{prec} it will retain that higher precision.  New values\nwritten to @var{rop} will use the new @var{prec}.\n\nBefore calling @code{mpf_clear} or the full @code{mpf_set_prec}, another\n@code{mpf_set_prec_raw} call must be made to restore @var{rop} to its original\nallocated precision.  Failing to do so will have unpredictable results.\n\n@code{mpf_get_prec} can be used before @code{mpf_set_prec_raw} to get the\noriginal allocated precision.  After @code{mpf_set_prec_raw} it reflects the\n@var{prec} value set.\n\n@code{mpf_set_prec_raw} is an efficient way to use an @code{mpf_t} variable at\ndifferent precisions during a calculation, perhaps to gradually increase\nprecision in an iteration, or just to use various different precisions for\ndifferent purposes during a calculation.\n@end deftypefun\n\n\n@need 2000\n@node Assigning Floats, Simultaneous Float Init & Assign, Initializing Floats, Floating-point Functions\n@comment  node-name,  next,  previous,  up\n@section Assignment Functions\n@cindex Float assignment functions\n@cindex Assignment functions\n\nThese functions assign new values to already initialized floats\n(@pxref{Initializing Floats}).\n\n@deftypefun void mpf_set (mpf_t @var{rop}, mpf_t @var{op})\n@deftypefunx void mpf_set_ui (mpf_t @var{rop}, mpir_ui @var{op})\n@deftypefunx void mpf_set_si (mpf_t @var{rop}, mpir_si @var{op})\n@deftypefunx void mpf_set_d (mpf_t @var{rop}, double @var{op})\n@deftypefunx void mpf_set_z (mpf_t @var{rop}, mpz_t @var{op})\n@deftypefunx void mpf_set_q (mpf_t @var{rop}, mpq_t @var{op})\nSet the value of @var{rop} from @var{op}.\n@end deftypefun\n\n@deftypefun int mpf_set_str (mpf_t @var{rop}, char *@var{str}, int @var{base})\nSet the value of @var{rop} from the string in @var{str}.  The string is of the\nform @samp{M@@N} or, if the base is 10 or less, alternatively @samp{MeN}.\n@samp{M} is the mantissa and @samp{N} is the exponent.  The mantissa is always\nin the specified base.  The exponent is either in the specified base or, if\n@var{base} is negative, in decimal.  The decimal point expected is taken from\nthe current locale, on systems providing @code{localeconv}.\n\nThe argument @var{base} may be in the ranges 2 to 62, or @minus{}62 to\n@minus{}2.  Negative values are used to specify that the exponent is in\ndecimal.\n\nFor bases up to 36, case is ignored; upper-case and lower-case letters have\nthe same value; for bases 37 to 62, upper-case letter represent the usual\n10..35 while lower-case letter represent 36..61.\n\nUnlike the corresponding @code{mpz} function, the base will not be determined\nfrom the leading characters of the string if @var{base} is 0.  This is so that\nnumbers like @samp{0.23} are not interpreted as octal.\n\nWhite space is allowed in the string, and is simply ignored.  [This is not\nreally true; white-space is ignored in the beginning of the string and within\nthe mantissa, but not in other places, such as after a minus sign or in the\nexponent.  We are considering changing the definition of this function, making\nit fail when there is any white-space in the input, since that makes a lot of\nsense.  Please tell us your opinion about this change.  Do you really want it\nto accept @nicode{\"3 14\"} as meaning 314 as it does now?]\n\nThis function returns 0 if the entire string is a valid number in base\n@var{base}.  Otherwise it returns @minus{}1.\n@end deftypefun\n\n@deftypefun void mpf_swap (mpf_t @var{rop1}, mpf_t @var{rop2})\nSwap @var{rop1} and @var{rop2} efficiently.  Both the values and the\nprecisions of the two variables are swapped.\n@end deftypefun\n\n\n@node Simultaneous Float Init & Assign, Converting Floats, Assigning Floats, Floating-point Functions\n@comment  node-name,  next,  previous,  up\n@section Combined Initialization and Assignment Functions\n@cindex Float assignment functions\n@cindex Assignment functions\n@cindex Float initialization functions\n@cindex Initialization functions\n\nFor convenience, MPIR provides a parallel series of initialize-and-set functions\nwhich initialize the output and then store the value there.  These functions'\nnames have the form @code{mpf_init_set@dots{}}\n\nOnce the float has been initialized by any of the @code{mpf_init_set@dots{}}\nfunctions, it can be used as the source or destination operand for the ordinary\nfloat functions.  Don't use an initialize-and-set function on a variable\nalready initialized!\n\n@deftypefun void mpf_init_set (mpf_t @var{rop}, mpf_t @var{op})\n@deftypefunx void mpf_init_set_ui (mpf_t @var{rop}, mpir_ui @var{op})\n@deftypefunx void mpf_init_set_si (mpf_t @var{rop}, mpir_si @var{op})\n@deftypefunx void mpf_init_set_d (mpf_t @var{rop}, double @var{op})\nInitialize @var{rop} and set its value from @var{op}.\n\nThe precision of @var{rop} will be taken from the active default precision, as\nset by @code{mpf_set_default_prec}.\n@end deftypefun\n\n@deftypefun int mpf_init_set_str (mpf_t @var{rop}, char *@var{str}, int @var{base})\nInitialize @var{rop} and set its value from the string in @var{str}.  See\n@code{mpf_set_str} above for details on the assignment operation.\n\nNote that @var{rop} is initialized even if an error occurs.  (I.e., you have to\ncall @code{mpf_clear} for it.)\n\nThe precision of @var{rop} will be taken from the active default precision, as\nset by @code{mpf_set_default_prec}.\n@end deftypefun\n\n\n@node Converting Floats, Float Arithmetic, Simultaneous Float Init & Assign, Floating-point Functions\n@comment  node-name,  next,  previous,  up\n@section Conversion Functions\n@cindex Float conversion functions\n@cindex Conversion functions\n\n@deftypefun double mpf_get_d (mpf_t @var{op})\nConvert @var{op} to a @code{double}, truncating if necessary (ie.@: rounding\ntowards zero).\n\nIf the exponent in @var{op} is too big or too small to fit a @code{double}\nthen the result is system dependent.  For too big an infinity is returned when\navailable.  For too small @math{0.0} is normally returned.  Hardware overflow,\nunderflow and denorm traps may or may not occur.\n@end deftypefun\n\n@deftypefun double mpf_get_d_2exp (mpir_si *@var{exp}, mpf_t @var{op})\nConvert @var{op} to a @code{double}, truncating if necessary (ie.@: rounding\ntowards zero), and with an exponent returned separately.\n\nThe return value is in the range @math{0.5@le{}@GMPabs{@var{d}}<1} and the\nexponent is stored to @code{*@var{exp}}.  @m{@var{d} * 2^{exp}, @var{d} *\n2^@var{exp}} is the (truncated) @var{op} value.  If @var{op} is zero, the\nreturn is @math{0.0} and 0 is stored to @code{*@var{exp}}.\n\n@cindex @code{frexp}\nThis is similar to the standard C @code{frexp} function (@pxref{Normalization\nFunctions,,, libc, The GNU C Library Reference Manual}).\n@end deftypefun\n\n@deftypefun mpir_si mpf_get_2exp_d (double *@var{rop}, mpf_t @var{op})\nConvert @var{op} to a @code{double}, truncating if necessary (ie.@: rounding\ntowards zero), and with an exponent returned separately.\n\nThis function has been added on Windows as an alternative to mpf_get_d_2exp\nto avoid problems that occur on 64-bit Windows systems where pointers to \nintegers point to 32-bit integer variables when MPIR expects to find space\nfor 64-bit integers.\n@end deftypefun\n\n@deftypefun mpir_si mpf_get_si (mpf_t @var{op})\n@deftypefunx mpir_ui mpf_get_ui (mpf_t @var{op})\nConvert @var{op} to a @code{mpir_si} or @code{mpir_ui}, truncating any\nfraction part.  If @var{op} is too big for the return type, the result is\nundefined.\n\nSee also @code{mpf_fits_slong_p} and @code{mpf_fits_ulong_p}\n(@pxref{Miscellaneous Float Functions}).\n@end deftypefun\n\n@deftypefun {char *} mpf_get_str (char *@var{str}, mp_exp_t *@var{expptr}, int @var{base}, size_t @var{n_digits}, mpf_t @var{op})\nConvert @var{op} to a string of digits in base @var{base}.  @var{base} can vary\nfrom 2 to 362 or from @minus{}2 to @minus{}36.  Up to @var{n_digits} digits \nwill be generated.  Trailing zeros are not returned.  No more digits than can\nbe accurately represented by @var{op} are ever generated.  If @var{n_digits} \nis 0 then that accurate maximum number of digits are generated.\n\nFor @var{base} in the range 2..36, digits and lower-case letters are used; for\n@minus{}2..@minus{}36, digits and upper-case letters are used; for 37..62,\ndigits, upper-case letters, and lower-case letters (in that significance order)\nare used.\n\nIf @var{str} is @code{NULL}, the result string is allocated using the current\nallocation function (@pxref{Custom Allocation}).  The block will be\n@code{strlen(str)+1} bytes, that being exactly enough for the string and\nnull-terminator.\n\nIf @var{str} is not @code{NULL}, it should point to a block of\n@math{@var{n_digits} + 2} bytes, that being enough for the mantissa, a\npossible minus sign, and a null-terminator.  When @var{n_digits} is 0 to get\nall significant digits, an application won't be able to know the space\nrequired, and @var{str} should be @code{NULL} in that case.\n\nThe generated string is a fraction, with an implicit radix point immediately\nto the left of the first digit.  The applicable exponent is written through\nthe @var{expptr} pointer.  For example, the number 3.1416 would be returned as\nstring @nicode{\"31416\"} and exponent 1.\n\nWhen @var{op} is zero, an empty string is produced and the exponent returned\nis 0.\n\nA pointer to the result string is returned, being either the allocated block\nor the given @var{str}.\n@end deftypefun\n\n\n@node Float Arithmetic, Float Comparison, Converting Floats, Floating-point Functions\n@comment  node-name,  next,  previous,  up\n@section Arithmetic Functions\n@cindex Float arithmetic functions\n@cindex Arithmetic functions\n\n@deftypefun void mpf_add (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})\n@deftypefunx void mpf_add_ui (mpf_t @var{rop}, mpf_t @var{op1}, mpir_ui @var{op2})\nSet @var{rop} to @math{@var{op1} + @var{op2}}.\n@end deftypefun\n\n@deftypefun void mpf_sub (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})\n@deftypefunx void mpf_ui_sub (mpf_t @var{rop}, mpir_ui @var{op1}, mpf_t @var{op2})\n@deftypefunx void mpf_sub_ui (mpf_t @var{rop}, mpf_t @var{op1}, mpir_ui @var{op2})\nSet @var{rop} to @var{op1} @minus{} @var{op2}.\n@end deftypefun\n\n@deftypefun void mpf_mul (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})\n@deftypefunx void mpf_mul_ui (mpf_t @var{rop}, mpf_t @var{op1}, mpir_ui @var{op2})\nSet @var{rop} to @math{@var{op1} @GMPtimes{} @var{op2}}.\n@end deftypefun\n\nDivision is undefined if the divisor is zero, and passing a zero divisor to the\ndivide functions will make these functions intentionally divide by zero.  This\nlets the user handle arithmetic exceptions in these functions in the same\nmanner as other arithmetic exceptions.\n\n@deftypefun void mpf_div (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})\n@deftypefunx void mpf_ui_div (mpf_t @var{rop}, mpir_ui @var{op1}, mpf_t @var{op2})\n@deftypefunx void mpf_div_ui (mpf_t @var{rop}, mpf_t @var{op1}, mpir_ui @var{op2})\n@cindex Division functions\nSet @var{rop} to @var{op1}/@var{op2}.\n@end deftypefun\n\n@deftypefun void mpf_sqrt (mpf_t @var{rop}, mpf_t @var{op})\n@deftypefunx void mpf_sqrt_ui (mpf_t @var{rop}, mpir_ui @var{op})\n@cindex Root extraction functions\nSet @var{rop} to @m{\\sqrt{@var{op}}, the square root of @var{op}}.\n@end deftypefun\n\n@deftypefun void mpf_pow_ui (mpf_t @var{rop}, mpf_t @var{op1}, mpir_ui @var{op2})\n@cindex Exponentiation functions\n@cindex Powering functions\nSet @var{rop} to @m{@var{op1}^{op2}, @var{op1} raised to the power @var{op2}}.\n@end deftypefun\n\n@deftypefun void mpf_neg (mpf_t @var{rop}, mpf_t @var{op})\nSet @var{rop} to @minus{}@var{op}.\n@end deftypefun\n\n@deftypefun void mpf_abs (mpf_t @var{rop}, mpf_t @var{op})\nSet @var{rop} to the absolute value of @var{op}.\n@end deftypefun\n\n@deftypefun void mpf_mul_2exp (mpf_t @var{rop}, mpf_t @var{op1}, mp_bitcnt_t @var{op2})\nSet @var{rop} to @m{@var{op1} \\times 2^{op2}, @var{op1} times 2 raised to\n@var{op2}}.\n@end deftypefun\n\n@deftypefun void mpf_div_2exp (mpf_t @var{rop}, mpf_t @var{op1}, mp_bitcnt_t @var{op2})\nSet @var{rop} to @m{@var{op1}/2^{op2}, @var{op1} divided by 2 raised to\n@var{op2}}.\n@end deftypefun\n\n@node Float Comparison, I/O of Floats, Float Arithmetic, Floating-point Functions\n@comment  node-name,  next,  previous,  up\n@section Comparison Functions\n@cindex Float comparison functions\n@cindex Comparison functions\n\n@deftypefun int mpf_cmp (mpf_t @var{op1}, mpf_t @var{op2})\n@deftypefunx int mpf_cmp_z (const mpf_t @var{op1}, const mpz_t @var{op2})\n@deftypefunx int mpf_cmp_d (mpf_t @var{op1}, double @var{op2})\n@deftypefunx int mpf_cmp_ui (mpf_t @var{op1}, mpir_ui @var{op2})\n@deftypefunx int mpf_cmp_si (mpf_t @var{op1}, mpir_si @var{op2})\nCompare @var{op1} and @var{op2}.  Return a positive value if @math{@var{op1} >\n@var{op2}}, zero if @math{@var{op1} = @var{op2}}, and a negative value if\n@math{@var{op1} < @var{op2}}.\n\n@code{mpf_cmp_d} can be called with an infinity, but results are undefined for\na NaN.\n@end deftypefun\n\n@deftypefun int mpf_eq (mpf_t @var{op1}, mpf_t @var{op2}, mp_bitcnt_t op3)\nReturn non-zero if the first @var{op3} bits of @var{op1} and @var{op2} are\nequal, zero otherwise.  I.e., test if @var{op1} and @var{op2} are approximately\nequal.\n\nIn the future values like 1000 and 0111 may be considered the same\nto 3 bits (on the basis that their difference is that small).\n@end deftypefun\n\n@deftypefun void mpf_reldiff (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})\nCompute the relative difference between @var{op1} and @var{op2} and store the\nresult in @var{rop}.  This is @math{@GMPabs{@var{op1}-@var{op2}}/@var{op1}}.\n@end deftypefun\n\n@deftypefn Macro int mpf_sgn (mpf_t @var{op})\n@cindex Sign tests\n@cindex Float sign tests\nReturn @math{+1} if @math{@var{op} > 0}, 0 if @math{@var{op} = 0}, and\n@math{-1} if @math{@var{op} < 0}.\n\nThis function is actually implemented as a macro.  It evaluates its arguments\nmultiple times.\n@end deftypefn\n\n@node I/O of Floats, Miscellaneous Float Functions, Float Comparison, Floating-point Functions\n@comment  node-name,  next,  previous,  up\n@section Input and Output Functions\n@cindex Float input and output functions\n@cindex Input functions\n@cindex Output functions\n@cindex I/O functions\n\nFunctions that perform input from a stdio stream, and functions that output to\na stdio stream.  Passing a @code{NULL} pointer for a @var{stream} argument to\nany of these functions will make them read from @code{stdin} and write to\n@code{stdout}, respectively.\n\nWhen using any of these functions, it is a good idea to include @file{stdio.h}\nbefore @file{mpir.h}, since that will allow @file{mpir.h} to define prototypes\nfor these functions.\n\n@deftypefun size_t mpf_out_str (FILE *@var{stream}, int @var{base}, size_t @var{n_digits}, mpf_t @var{op})\nPrint @var{op} to @var{stream}, as a string of digits.  Return the number of\nbytes written, or if an error occurred, return 0.\n\nThe mantissa is prefixed with an @samp{0.} and is in the given @var{base},\nwhich may vary from 2 to 36.  An exponent then printed, separated by an\n@samp{e}, or if @var{base} is greater than 10 then by an @samp{@@}.  The\nexponent is always in decimal.  The decimal point follows the current locale,\non systems providing @code{localeconv}.\n\nFor @var{base} in the range 2..36, digits and lower-case letters are used; for\n@minus{}2..@minus{}36, digits and upper-case letters are used; for 37..62,\ndigits, upper-case letters, and lower-case letters (in that significance order)\nare used.\n\nUp to @var{n_digits} will be printed from the mantissa, except that no more\ndigits than are accurately representable by @var{op} will be printed.\n@var{n_digits} can be 0 to select that accurate maximum.\n@end deftypefun\n\n@deftypefun size_t mpf_inp_str (mpf_t @var{rop}, FILE *@var{stream}, int @var{base})\nRead a string in base @var{base} from @var{stream}, and put the read float in\n@var{rop}.  The string is of the form @samp{M@@N} or, if the base is 10 or\nless, alternatively @samp{MeN}.  @samp{M} is the mantissa and @samp{N} is the\nexponent.  The mantissa is always in the specified base.  The exponent is\neither in the specified base or, if @var{base} is negative, in decimal.  The\ndecimal point expected is taken from the current locale, on systems providing\n@code{localeconv}.\n\nThe argument @var{base} may be in the ranges 2 to 36, or @minus{}36 to\n@minus{}2.  Negative values are used to specify that the exponent is in\ndecimal.\n\nUnlike the corresponding @code{mpz} function, the base will not be determined\nfrom the leading characters of the string if @var{base} is 0.  This is so that\nnumbers like @samp{0.23} are not interpreted as octal.\n\nReturn the number of bytes read, or if an error occurred, return 0.\n@end deftypefun\n\n@c @deftypefun void mpf_out_raw (FILE *@var{stream}, mpf_t @var{float})\n@c Output @var{float} on stdio stream @var{stream}, in raw binary\n@c format.  The float is written in a portable format, with 4 bytes of\n@c size information, and that many bytes of limbs.  Both the size and the\n@c limbs are written in decreasing significance order.\n@c @end deftypefun\n\n@c @deftypefun void mpf_inp_raw (mpf_t @var{float}, FILE *@var{stream})\n@c Input from stdio stream @var{stream} in the format written by\n@c @code{mpf_out_raw}, and put the result in @var{float}.\n@c @end deftypefun\n\n\n@node Miscellaneous Float Functions,  , I/O of Floats, Floating-point Functions\n@comment  node-name,  next,  previous,  up\n@section Miscellaneous Functions\n@cindex Miscellaneous float functions\n@cindex Float miscellaneous functions\n\n@deftypefun void mpf_ceil (mpf_t @var{rop}, mpf_t @var{op})\n@deftypefunx void mpf_floor (mpf_t @var{rop}, mpf_t @var{op})\n@deftypefunx void mpf_trunc (mpf_t @var{rop}, mpf_t @var{op})\n@cindex Rounding functions\n@cindex Float rounding functions\nSet @var{rop} to @var{op} rounded to an integer.  @code{mpf_ceil} rounds to the\nnext higher integer, @code{mpf_floor} to the next lower, and @code{mpf_trunc}\nto the integer towards zero.\n@end deftypefun\n\n@deftypefun int mpf_integer_p (mpf_t @var{op})\nReturn non-zero if @var{op} is an integer.\n@end deftypefun\n\n@deftypefun int mpf_fits_ulong_p (mpf_t @var{op})\n@deftypefunx int mpf_fits_slong_p (mpf_t @var{op})\n@deftypefunx int mpf_fits_uint_p (mpf_t @var{op})\n@deftypefunx int mpf_fits_sint_p (mpf_t @var{op})\n@deftypefunx int mpf_fits_ushort_p (mpf_t @var{op})\n@deftypefunx int mpf_fits_sshort_p (mpf_t @var{op})\nReturn non-zero if @var{op} would fit in the respective C data type, when\ntruncated to an integer.\n@end deftypefun\n\n@deftypefun void mpf_urandomb (mpf_t @var{rop}, gmp_randstate_t @var{state}, mp_bitcnt_t @var{nbits})\n@cindex Random number functions\n@cindex Float random number functions\nGenerate a uniformly distributed random float in @var{rop}, such that @math{0\n@le{} @var{rop} < 1}, with @var{nbits} significant bits in the mantissa.\n\nThe variable @var{state} must be initialized by calling one of the\n@code{gmp_randinit} functions (@ref{Random State Initialization}) before\ninvoking this function.\n@end deftypefun\n\n@deftypefun void mpf_rrandomb (mpf_t @var{rop}, gmp_randstate_t @var{state}, mp_size_t @var{max_size}, mp_exp_t @var{exp})\nGenerate a random float of at most @var{max_size} limbs, with long strings of\nzeros and ones in the binary representation.  The exponent of the number is in\nthe interval @minus{}@var{exp} to @var{exp} (in limbs).  This function is\nuseful for testing functions and algorithms, since these kind of random\nnumbers have proven to be more likely to trigger corner-case bugs.  Negative\nrandom numbers are generated when @var{max_size} is negative.\n\n@strong{This interface is preliminary.  It might change incompatibly in future revisions.}\n@end deftypefun\n\n@deftypefun void mpf_random2 (mpf_t @var{rop}, mp_size_t @var{max_size}, mp_exp_t @var{exp})\nGenerate a random float of at most @var{max_size} limbs, with long strings of\nzeros and ones in the binary representation.  The exponent of the number is in\nthe interval @minus{}@var{exp} to @var{exp} (in limbs).  This function is\nuseful for testing functions and algorithms, since these kind of random\nnumbers have proven to be more likely to trigger corner-case bugs.  Negative\nrandom numbers are generated when @var{max_size} is negative.\n\n@strong{This function is obsolete.  It will disappear from future MPIR releases.}\n@end deftypefun\n\n@node Low-level Functions, Random Number Functions, Floating-point Functions, Top\n@comment  node-name,  next,  previous,  up\n@chapter Low-level Functions\n@cindex Low-level functions\n\nThis chapter describes low-level MPIR functions, used to implement the\nhigh-level MPIR functions, but also intended for time-critical user code.\n\nThese functions start with the prefix @code{mpn_}.\n\n@c 1. Some of these function clobber input operands.\n@c\n\nThe @code{mpn} functions are designed to be as fast as possible, @strong{not}\nto provide a coherent calling interface.  The different functions have somewhat\nsimilar interfaces, but there are variations that make them hard to use.  These\nfunctions do as little as possible apart from the real multiple precision\ncomputation, so that no time is spent on things that not all callers need.\n\nA source operand is specified by a pointer to the least significant limb and a\nlimb count.  A destination operand is specified by just a pointer.  It is the\nresponsibility of the caller to ensure that the destination has enough space\nfor storing the result.\n\nWith this way of specifying operands, it is possible to perform computations on\nsubranges of an argument, and store the result into a subrange of a\ndestination.\n\nA common requirement for all functions is that each source area needs at least\none limb.  No size argument may be zero.  Unless otherwise stated, in-place\noperations are allowed where source and destination are the same, but not where\nthey only partly overlap.\n\nThe @code{mpn} functions are the base for the implementation of the\n@code{mpz_}, @code{mpf_}, and @code{mpq_} functions.\n\nThis example adds the number beginning at @var{s1p} and the number beginning at\n@var{s2p} and writes the sum at @var{destp}.  All areas have @var{n} limbs.\n\n@example\ncy = mpn_add_n (destp, s1p, s2p, n)\n@end example\n\nIt should be noted that the @code{mpn} functions make no attempt to identify\nhigh or low zero limbs on their operands, or other special forms.  On random\ndata such cases will be unlikely and it'd be wasteful for every function to\ncheck every time.  An application knowing something about its data can take\nsteps to trim or perhaps split its calculations.\n@c\n@c  For reference, within gmp mpz_t operands never have high zero limbs, and\n@c  we rate low zero limbs as unlikely too (or something an application should\n@c  handle).  This is a prime motivation for not stripping zero limbs in say\n@c  mpn_mul_n etc.\n@c\n@c  Other applications doing variable-length calculations will quite likely do\n@c  something similar to mpz.  And even if not then it's highly likely zero\n@c  limb stripping can be done at just a few judicious points, which will be\n@c  more efficient than having lots of mpn functions checking every time.\n\n@sp 1\n@noindent\nIn the notation used below, a source operand is identified by the pointer to\nthe least significant limb, and the limb count in braces.  For example,\n@{@var{s1p}, @var{s1n}@}.\n\n@deftypefun mp_limb_t mpn_add_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})\nAdd @{@var{s1p}, @var{n}@} and @{@var{s2p}, @var{n}@}, and write the @var{n}\nleast significant limbs of the result to @var{rp}.  Return carry, either 0 or\n1.\n\nThis is the lowest-level function for addition.  It is the preferred function\nfor addition, since it is written in assembly for most CPUs.  For addition of\na variable to itself (i.e., @var{s1p} equals @var{s2p}, use @code{mpn_lshift}\nwith a count of 1 for optimal speed.\n@end deftypefun\n\n@deftypefun mp_limb_t mpn_add_1 (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}, mp_limb_t @var{s2limb})\nAdd @{@var{s1p}, @var{n}@} and @var{s2limb}, and write the @var{n} least\nsignificant limbs of the result to @var{rp}.  Return carry, either 0 or 1.\n@end deftypefun\n\n@deftypefun mp_limb_t mpn_add (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{s1n}, const mp_limb_t *@var{s2p}, mp_size_t @var{s2n})\nAdd @{@var{s1p}, @var{s1n}@} and @{@var{s2p}, @var{s2n}@}, and write the\n@var{s1n} least significant limbs of the result to @var{rp}.  Return carry,\neither 0 or 1.\n\nThis function requires that @var{s1n} is greater than or equal to @var{s2n}.\n@end deftypefun\n\n@deftypefun mp_limb_t mpn_sub_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})\nSubtract @{@var{s2p}, @var{n}@} from @{@var{s1p}, @var{n}@}, and write the\n@var{n} least significant limbs of the result to @var{rp}.  Return borrow,\neither 0 or 1.\n\nThis is the lowest-level function for subtraction.  It is the preferred\nfunction for subtraction, since it is written in assembly for most CPUs.\n@end deftypefun\n\n@deftypefun mp_limb_t mpn_sub_1 (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}, mp_limb_t @var{s2limb})\nSubtract @var{s2limb} from @{@var{s1p}, @var{n}@}, and write the @var{n} least\nsignificant limbs of the result to @var{rp}.  Return borrow, either 0 or 1.\n@end deftypefun\n\n@deftypefun mp_limb_t mpn_sub (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{s1n}, const mp_limb_t *@var{s2p}, mp_size_t @var{s2n})\nSubtract @{@var{s2p}, @var{s2n}@} from @{@var{s1p}, @var{s1n}@}, and write the\n@var{s1n} least significant limbs of the result to @var{rp}.  Return borrow,\neither 0 or 1.\n\nThis function requires that @var{s1n} is greater than or equal to\n@var{s2n}.\n@end deftypefun\n\n@deftypefun void mpn_neg (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n})\nPerform the negation of @{@var{sp}, @var{n}@}, and write the result to\n@{@var{rp}, @var{n}@}.  Return carry-out.\n@end deftypefun\n\n@deftypefun void mpn_mul_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})\nMultiply @{@var{s1p}, @var{n}@} and @{@var{s2p}, @var{n}@}, and write the\n2*@var{n}-limb result to @var{rp}.\n\nThe destination has to have space for 2*@var{n} limbs, even if the product's\nmost significant limb is zero.  No overlap is permitted between the\ndestination and either source.\n\nIf the input operands are the same, @code{mpn_sqr} will generally be faster.\n@end deftypefun\n\n@deftypefun mp_limb_t mpn_mul_1 (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}, mp_limb_t @var{s2limb})\nMultiply @{@var{s1p}, @var{n}@} by @var{s2limb}, and write the @var{n} least\nsignificant limbs of the product to @var{rp}.  Return the most significant\nlimb of the product.  @{@var{s1p}, @var{n}@} and @{@var{rp}, @var{n}@} are\nallowed to overlap provided @math{@var{rp} @le{} @var{s1p}}.\n\nThis is a low-level function that is a building block for general\nmultiplication as well as other operations in MPIR@.  It is written in assembly\nfor most CPUs.\n\nDon't call this function if @var{s2limb} is a power of 2; use @code{mpn_lshift}\nwith a count equal to the logarithm of @var{s2limb} instead, for optimal speed.\n@end deftypefun\n\n@deftypefun mp_limb_t mpn_addmul_1 (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}, mp_limb_t @var{s2limb})\nMultiply @{@var{s1p}, @var{n}@} and @var{s2limb}, and add the @var{n} least\nsignificant limbs of the product to @{@var{rp}, @var{n}@} and write the result\nto @var{rp}.  Return the most significant limb of the product, plus carry-out\nfrom the addition.\n\nThis is a low-level function that is a building block for general\nmultiplication as well as other operations in MPIR@.  It is written in assembly\nfor most CPUs.\n@end deftypefun\n\n@deftypefun mp_limb_t mpn_submul_1 (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}, mp_limb_t @var{s2limb})\nMultiply @{@var{s1p}, @var{n}@} and @var{s2limb}, and subtract the @var{n}\nleast significant limbs of the product from @{@var{rp}, @var{n}@} and write the\nresult to @var{rp}.  Return the most significant limb of the product, minus\nborrow-out from the subtraction.\n\nThis is a low-level function that is a building block for general\nmultiplication and division as well as other operations in MPIR@.  It is written\nin assembly for most CPUs.\n@end deftypefun\n\n@deftypefun mp_limb_t mpn_mul (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{s1n}, const mp_limb_t *@var{s2p}, mp_size_t @var{s2n})\nMultiply @{@var{s1p}, @var{s1n}@} and @{@var{s2p}, @var{s2n}@}, and write the\nresult to @var{rp}.  Return the most significant limb of the result.\n\nThe destination has to have space for @var{s1n} + @var{s2n} limbs, even if the\nresult might be one limb smaller.\n\nThis function requires that @var{s1n} is greater than or equal to\n@var{s2n}.  The destination must be distinct from both input operands.\n@end deftypefun\n\n@deftypefun void mpn_sqr (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n})\nCompute the square of @{@var{s1p}, @var{n}@} and write the 2*@var{n}-limb\nresult to @var{rp}.\n\nThe destination has to have space for 2*@var{n} limbs, even if the result's\nmost significant limb is zero.  No overlap is permitted between the\ndestination and the source.\n@end deftypefun\n\n@deftypefun void mpn_tdiv_qr (mp_limb_t *@var{qp}, mp_limb_t *@var{rp}, mp_size_t @var{qxn}, const mp_limb_t *@var{np}, mp_size_t @var{nn}, const mp_limb_t *@var{dp}, mp_size_t @var{dn})\nDivide @{@var{np}, @var{nn}@} by @{@var{dp}, @var{dn}@} and put the quotient\nat @{@var{qp}, @var{nn}@minus{}@var{dn}+1@} and the remainder at @{@var{rp},\n@var{dn}@}.  The quotient is rounded towards 0.\n\nNo overlap is permitted between arguments.  @var{nn} must be greater than or\nequal to @var{dn}.  The most significant limb of @var{dp} must be non-zero.\nThe @var{qxn} operand must be zero.\n@comment FIXME: Relax overlap requirements!\n@end deftypefun\n\n@deftypefun mp_limb_t mpn_divrem (mp_limb_t *@var{r1p}, mp_size_t @var{qxn}, mp_limb_t *@var{rs2p}, mp_size_t @var{rs2n}, const mp_limb_t *@var{s3p}, mp_size_t @var{s3n})\n[This function is obsolete.  Please call @code{mpn_tdiv_qr} instead for best\nperformance.]\n\nDivide @{@var{rs2p}, @var{rs2n}@} by @{@var{s3p}, @var{s3n}@}, and write the\nquotient at @var{r1p}, with the exception of the most significant limb, which\nis returned.  The remainder replaces the dividend at @var{rs2p}; it will be\n@var{s3n} limbs long (i.e., as many limbs as the divisor).\n\nIn addition to an integer quotient, @var{qxn} fraction limbs are developed, and\nstored after the integral limbs.  For most usages, @var{qxn} will be zero.\n\nIt is required that @var{rs2n} is greater than or equal to @var{s3n}.  It is\nrequired that the most significant bit of the divisor is set.\n\nIf the quotient is not needed, pass @var{rs2p} + @var{s3n} as @var{r1p}.  Aside\nfrom that special case, no overlap between arguments is permitted.\n\nReturn the most significant limb of the quotient, either 0 or 1.\n\nThe area at @var{r1p} needs to be @var{rs2n} @minus{} @var{s3n} + @var{qxn}\nlimbs large.\n@end deftypefun\n\n\n@deftypefn Function mp_limb_t mpn_divrem_1 (mp_limb_t *@var{r1p}, mp_size_t @var{qxn}, @w{mp_limb_t *@var{s2p}}, mp_size_t @var{s2n}, mp_limb_t @var{s3limb})\n@deftypefnx Macro mp_limb_t mpn_divmod_1 (mp_limb_t *@var{r1p}, mp_limb_t *@var{s2p}, @w{mp_size_t @var{s2n}}, @w{mp_limb_t @var{s3limb}})\nDivide @{@var{s2p}, @var{s2n}@} by @var{s3limb}, and write the quotient at\n@var{r1p}.  Return the remainder.\n\nThe integer quotient is written to @{@var{r1p}+@var{qxn}, @var{s2n}@} and in\naddition @var{qxn} fraction limbs are developed and written to @{@var{r1p},\n@var{qxn}@}.  Either or both @var{s2n} and @var{qxn} can be zero.  For most\nusages, @var{qxn} will be zero.\n\n@code{mpn_divmod_1} exists for upward source compatibility and is simply a\nmacro calling @code{mpn_divrem_1} with a @var{qxn} of 0.\n\nThe areas at @var{r1p} and @var{s2p} have to be identical or completely\nseparate, not partially overlapping.\n@end deftypefn\n\n@deftypefn Macro mp_limb_t mpn_divexact_by3 (mp_limb_t *@var{rp}, mp_limb_t *@var{sp}, @w{mp_size_t @var{n}})\n@deftypefnx Function mp_limb_t mpn_divexact_by3c (mp_limb_t *@var{rp}, mp_limb_t *@var{sp}, @w{mp_size_t @var{n}}, mp_limb_t @var{carry})\nDivide @{@var{sp}, @var{n}@} by 3, expecting it to divide exactly, and writing\nthe result to @{@var{rp}, @var{n}@}.  If 3 divides exactly, the return value is\nzero and the result is the quotient.  If not, the return value is non-zero and\nthe result won't be anything useful.\n\n@code{mpn_divexact_by3c} takes an initial carry parameter, which can be the\nreturn value from a previous call, so a large calculation can be done piece by\npiece from low to high.  @code{mpn_divexact_by3} is simply a macro calling\n@code{mpn_divexact_by3c} with a 0 carry parameter.\n\nThese routines use a multiply-by-inverse and will be faster than\n@code{mpn_divrem_1} on CPUs with fast multiplication but slow division.\n\nThe source @math{a}, result @math{q}, size @math{n}, initial carry @math{i},\nand return value @math{c} satisfy @m{cb^n+a-i=3q, c*b^n + a-i = 3*q}, where\n@m{b=2\\GMPraise{@code{GMP\\_NUMB\\_BITS}}, b=2^GMP_NUMB_BITS}.  The\nreturn @math{c} is always 0, 1 or 2, and the initial carry @math{i} must also\nbe 0, 1 or 2 (these are both borrows really).  When @math{c=0} clearly\n@math{q=(a-i)/3}.  When @m{c \\neq 0, c!=0}, the remainder @math{(a-i) @bmod{}\n3} is given by @math{3-c}, because @math{b @equiv{} 1 @bmod{} 3} (when\n@code{mp_bits_per_limb} is even, which is always so currently).\n@end deftypefn\n\n@deftypefun mp_limb_t mpn_mod_1 (mp_limb_t *@var{s1p}, mp_size_t @var{s1n}, mp_limb_t @var{s2limb})\nDivide @{@var{s1p}, @var{s1n}@} by @var{s2limb}, and return the remainder.\n@var{s1n} can be zero.\n@end deftypefun\n\n@deftypefun mp_limb_t mpn_lshift (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n}, unsigned int @var{count})\nShift @{@var{sp}, @var{n}@} left by @var{count} bits, and write the result to\n@{@var{rp}, @var{n}@}.  The bits shifted out at the left are returned in the\nleast significant @var{count} bits of the return value (the rest of the return\nvalue is zero).\n\n@var{count} must be in the range 1 to @nicode{mp_bits_per_limb}@minus{}1.  The\nregions @{@var{sp}, @var{n}@} and @{@var{rp}, @var{n}@} may overlap, provided\n@math{@var{rp} @ge{} @var{sp}}.\n\nThis function is written in assembly for most CPUs.\n@end deftypefun\n\n@deftypefun mp_limb_t mpn_rshift (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n}, unsigned int @var{count})\nShift @{@var{sp}, @var{n}@} right by @var{count} bits, and write the result to\n@{@var{rp}, @var{n}@}.  The bits shifted out at the right are returned in the\nmost significant @var{count} bits of the return value (the rest of the return\nvalue is zero).\n\n@var{count} must be in the range 1 to @nicode{mp_bits_per_limb}@minus{}1.  The\nregions @{@var{sp}, @var{n}@} and @{@var{rp}, @var{n}@} may overlap, provided\n@math{@var{rp} @le{} @var{sp}}.\n\nThis function is written in assembly for most CPUs.\n@end deftypefun\n\n@deftypefun int mpn_cmp (const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})\nCompare @{@var{s1p}, @var{n}@} and @{@var{s2p}, @var{n}@} and return a\npositive value if @math{@var{s1} > @var{s2}}, 0 if they are equal, or a\nnegative value if @math{@var{s1} < @var{s2}}.\n@end deftypefun\n\n@deftypefun int mpn_zero_p (const mp_limb_t *@var{sp}, mp_size_t @var{n})\nTest @{@var{sp}, @var{n}@} and return 1 if the operand is zero, 0 otherwise.\n@end deftypefun\n\n@deftypefun mp_size_t mpn_gcd (mp_limb_t *@var{rp}, mp_limb_t *@var{s1p}, mp_size_t @var{s1n}, mp_limb_t *@var{s2p}, mp_size_t @var{s2n})\nSet @{@var{rp}, @var{retval}@} to the greatest common divisor of @{@var{s1p},\n@var{s1n}@} and @{@var{s2p}, @var{s2n}@}.  The result can be up to @var{s2n}\nlimbs, the return value is the actual number produced.  Both source operands\nare destroyed.\n\n@{@var{s1p}, @var{s1n}@} must have at least as many bits as @{@var{s2p},\n@var{s2n}@}.  @{@var{s2p}, @var{s2n}@} must be odd.  Both operands must have\nnon-zero most significant limbs.  No overlap is permitted between @{@var{s1p},\n@var{s1n}@} and @{@var{s2p}, @var{s2n}@}.\n@end deftypefun\n\n@deftypefun mp_limb_t mpn_gcd_1 (const mp_limb_t *@var{s1p}, mp_size_t @var{s1n}, mp_limb_t @var{s2limb})\nReturn the greatest common divisor of @{@var{s1p}, @var{s1n}@} and\n@var{s2limb}.  Both operands must be non-zero.\n@end deftypefun\n\n@deftypefun mp_size_t mpn_gcdext (mp_limb_t *@var{gp}, mp_limb_t *@var{sp}, mp_size_t *@var{sn}, mp_limb_t *@var{xp}, mp_size_t @var{xn}, mp_limb_t *@var{yp}, mp_size_t @var{yn})\nLet @m{U,@var{U}} be defined by @{@var{xp}, @var{xn}@} and let @m{V,@var{V}} be\ndefined by @{@var{yp}, @var{yn}@}.\n\nCompute the greatest common divisor @math{G} of @math{U} and @math{V}.  Compute\na cofactor @math{S} such that @math{G = US + VT}.  The second cofactor @var{T}\nis not computed but can easily be obtained from @m{(G - US) / V, (@var{G} -\n@var{U}*@var{S}) / @var{V}} (the division will be exact).  It is required that\n@math{U @ge V > 0}.\n\n@math{S} satisfies @math{S = 1} or @math{@GMPabs{S} < V / (2 G)}. @math{S =\n0} if and only if @math{V} divides @math{U} (i.e., @math{G = V}).\n\nStore @math{G} at @var{gp} and let the return value define its limb count.\nStore @math{S} at @var{sp} and let |*@var{sn}| define its limb count.  @math{S}\ncan be negative; when this happens *@var{sn} will be negative.  The areas at\n@var{gp} and @var{sp} should each have room for @math{@var{xn}+1} limbs.\n\nThe areas @{@var{xp}, @math{@var{xn}+1}@} and @{@var{yp}, @math{@var{yn}+1}@}\nare destroyed (i.e.@: the input operands plus an extra limb past the end of\neach).\n\nCompatibility note: MPIR versions 1.3,2.0 and GMP versions 4.3.0,4.3.1 defined @math{S} less strictly.\nEarlier as well as later GMP releases define @math{S} as described here.\n@end deftypefun\n\n@deftypefun mp_size_t mpn_sqrtrem (mp_limb_t *@var{r1p}, mp_limb_t *@var{r2p}, const mp_limb_t *@var{sp}, mp_size_t @var{n})\nCompute the square root of @{@var{sp}, @var{n}@} and put the result at\n@{@var{r1p}, @math{@GMPceil{@var{n}/2}}@} and the remainder at @{@var{r2p},\n@var{retval}@}.  @var{r2p} needs space for @var{n} limbs, but the return value\nindicates how many are produced.\n\nThe most significant limb of @{@var{sp}, @var{n}@} must be non-zero.  The\nareas @{@var{r1p}, @math{@GMPceil{@var{n}/2}}@} and @{@var{sp}, @var{n}@} must\nbe completely separate.  The areas @{@var{r2p}, @var{n}@} and @{@var{sp},\n@var{n}@} must be either identical or completely separate.\n\nIf the remainder is not wanted then @var{r2p} can be @code{NULL}, and in this\ncase the return value is zero or non-zero according to whether the remainder\nwould have been zero or non-zero.\n\nA return value of zero indicates a perfect square.  See also\n@code{mpz_perfect_square_p}.\n@end deftypefun\n\n@deftypefun mp_size_t mpn_get_str (unsigned char *@var{str}, int @var{base}, mp_limb_t *@var{s1p}, mp_size_t @var{s1n})\nConvert @{@var{s1p}, @var{s1n}@} to a raw unsigned char array at @var{str} in\nbase @var{base}, and return the number of characters produced.  There may be\nleading zeros in the string.  The string is not in ASCII; to convert it to\nprintable format, add the ASCII codes for @samp{0} or @samp{A}, depending on\nthe base and range.  @var{base} can vary from 2 to 256.\n\nThe most significant limb of the input @{@var{s1p}, @var{s1n}@} must be\nnon-zero.  The input @{@var{s1p}, @var{s1n}@} is clobbered, except when\n@var{base} is a power of 2, in which case it's unchanged.\n\nThe area at @var{str} has to have space for the largest possible number\nrepresented by a @var{s1n} long limb array, plus one extra character.\n@end deftypefun\n\n@deftypefun mp_size_t mpn_set_str (mp_limb_t *@var{rp}, const unsigned char *@var{str}, size_t @var{strsize}, int @var{base})\nConvert bytes @{@var{str},@var{strsize}@} in the given @var{base} to limbs at\n@var{rp}.\n\n@math{@var{str}[0]} is the most significant byte and\n@math{@var{str}[@var{strsize}-1]} is the least significant.  Each byte should\nbe a value in the range 0 to @math{@var{base}-1}, not an ASCII character.\n@var{base} can vary from 2 to 256.\n\nThe return value is the number of limbs written to @var{rp}.  If the most\nsignificant input byte is non-zero then the high limb at @var{rp} will be\nnon-zero, and only that exact number of limbs will be required there.\n\nIf the most significant input byte is zero then there may be high zero limbs\nwritten to @var{rp} and included in the return value.\n\n@var{strsize} must be at least 1, and no overlap is permitted between\n@{@var{str},@var{strsize}@} and the result at @var{rp}.\n@end deftypefun\n\n@deftypefun {mp_bitcnt_t} mpn_scan0 (const mp_limb_t *@var{s1p}, imp_bitcnt_t @var{bit})\nScan @var{s1p} from bit position @var{bit} for the next clear bit.\n\nIt is required that there be a clear bit within the area at @var{s1p} at or\nbeyond bit position @var{bit}, so that the function has something to return.\n@end deftypefun\n\n@deftypefun {mp_bitcnt_t} mpn_scan1 (const mp_limb_t *@var{s1p}, mp_bitcnt_t @var{bit})\nScan @var{s1p} from bit position @var{bit} for the next set bit.\n\nIt is required that there be a set bit within the area at @var{s1p} at or\nbeyond bit position @var{bit}, so that the function has something to return.\n@end deftypefun\n\n@deftypefun void mpn_random (mp_limb_t *@var{r1p}, mp_size_t @var{r1n})\n@deftypefunx void mpn_random2 (mp_limb_t *@var{r1p}, mp_size_t @var{r1n})\nGenerate a random number of length @var{r1n} and store it at @var{r1p}.  The\nmost significant limb is always non-zero.  @code{mpn_random} generates\nuniformly distributed limb data, @code{mpn_random2} generates long strings of\nzeros and ones in the binary representation.\n\n@code{mpn_random2} is intended for testing the correctness of the @code{mpn}\nroutines.\n\n@strong{These functions are obsolete. They will disappear from future MPIR releases.}\n@end deftypefun\n\n@deftypefun void mpn_urandomb (mp_limb_t *@var{rp}, gmp_randstate_t @var{state}, mpir_ui @var{n})\nGenerate a uniform random number of length @var{n} bits and store it at @var{rp}.\n\n@strong{This function interface is preliminary and may change in the future.}\n@end deftypefun\n\n@deftypefun void mpn_urandomm (mp_limb_t *@var{rp}, gmp_randstate_t @var{state}, const mp_limb_t *@var{mp}, mp_size_t @var{n})\nGenerate a uniform random number modulo (@var{mp},@var{n}) of length @var{n} limbs and store it at @var{rp}.\n\n@strong{This function interface is preliminary and may change in the future.}\n@end deftypefun\n\n@deftypefun void mpn_randomb (mp_limb_t *@var{rp}, gmp_randstate_t @var{state}, mp_size_t @var{n})\nGenerate a random number of length @var{n} limbs and store it at @var{rp}.\nThe most significant limb is always non-zero.\n\n@strong{This function interface is preliminary and may change in the future.}\n@end deftypefun\n\n@deftypefun void mpn_rrandom (mp_limb_t *@var{rp}, gmp_randstate_t @var{state}, mp_size_t @var{n})\nGenerate a random number of length @var{n} limbs and store it at @var{rp}.\nThe most significant limb is always non-zero. Generates long strings of\nzeros and ones in the binary representation and is intended for testing the correctness of the @code{mpn}\nroutines.\n\n@strong{This function interface is preliminary and may change in the future.}\n@end deftypefun\n\n\n@deftypefun {mp_bitcnt_t} mpn_popcount (const mp_limb_t *@var{s1p}, mp_size_t @var{n})\nCount the number of set bits in @{@var{s1p}, @var{n}@}.\n@end deftypefun\n\n@deftypefun {mp_bitcnt_t} mpn_hamdist (const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})\nCompute the hamming distance between @{@var{s1p}, @var{n}@} and @{@var{s2p},\n@var{n}@}, which is the number of bit positions where the two operands have\ndifferent bit values.\n@end deftypefun\n\n@deftypefun int mpn_perfect_square_p (const mp_limb_t *@var{s1p}, mp_size_t @var{n})\nReturn non-zero iff @{@var{s1p}, @var{n}@} is a perfect square.\n@end deftypefun\n\n@deftypefun void mpn_and_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})\nPerform the bitwise logical and of @{@var{s1p}, @var{n}@} and @{@var{s2p},\n@var{n}@}, and write the result to @{@var{rp}, @var{n}@}.\n@end deftypefun\n\n@deftypefun void mpn_ior_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})\nPerform the bitwise logical inclusive or of @{@var{s1p}, @var{n}@} and\n@{@var{s2p}, @var{n}@}, and write the result to @{@var{rp}, @var{n}@}.\n@end deftypefun\n\n@deftypefun void mpn_xor_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})\nPerform the bitwise logical exclusive or of @{@var{s1p}, @var{n}@} and\n@{@var{s2p}, @var{n}@}, and write the result to @{@var{rp}, @var{n}@}.\n@end deftypefun\n\n@deftypefun void mpn_andn_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})\nPerform the bitwise logical and of @{@var{s1p}, @var{n}@} and the bitwise\ncomplement of @{@var{s2p}, @var{n}@}, and write the result to @{@var{rp}, @var{n}@}.\n@end deftypefun\n\n@deftypefun void mpn_iorn_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})\nPerform the bitwise logical inclusive or of @{@var{s1p}, @var{n}@} and the bitwise\ncomplement of @{@var{s2p}, @var{n}@}, and write the result to @{@var{rp}, @var{n}@}.\n@end deftypefun\n\n@deftypefun void mpn_nand_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})\nPerform the bitwise logical and of @{@var{s1p}, @var{n}@} and @{@var{s2p},\n@var{n}@}, and write the bitwise complement of the result to @{@var{rp}, @var{n}@}.\n@end deftypefun\n\n@deftypefun void mpn_nior_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})\nPerform the bitwise logical inclusive or of @{@var{s1p}, @var{n}@} and\n@{@var{s2p}, @var{n}@}, and write the bitwise complement of the result to\n@{@var{rp}, @var{n}@}.\n@end deftypefun\n\n@deftypefun void mpn_xnor_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})\nPerform the bitwise logical exclusive or of @{@var{s1p}, @var{n}@} and\n@{@var{s2p}, @var{n}@}, and write the bitwise complement of the result to\n@{@var{rp}, @var{n}@}.\n@end deftypefun\n\n@deftypefun void mpn_com (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n})\nPerform the bitwise complement of @{@var{sp}, @var{n}@}, and write the result\nto @{@var{rp}, @var{n}@}.\n@end deftypefun\n\n@deftypefun void mpn_copyi (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n})\nCopy from @{@var{s1p}, @var{n}@} to @{@var{rp}, @var{n}@}, increasingly.\n@end deftypefun\n\n@deftypefun void mpn_copyd (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n})\nCopy from @{@var{s1p}, @var{n}@} to @{@var{rp}, @var{n}@}, decreasingly.\n@end deftypefun\n\n@deftypefun void mpn_zero (mp_limb_t *@var{rp}, mp_size_t @var{n})\nZero @{@var{rp}, @var{n}@}.\n@end deftypefun\n\n@sp 1\n@section Nails\n@cindex Nails\n\n@strong{Everything in this section is highly experimental and may disappear or\nbe subject to incompatible changes in a future version of MPIR.}\n\nN.B: Nails are currently disabled and not supported in MPIR. They may or may not return in a future version of MPIR.\n\nNails are an experimental feature whereby a few bits are left unused at the\ntop of each @code{mp_limb_t}.  This can significantly improve carry handling\non some processors.\n\nAll the @code{mpn} functions accepting limb data will expect the nail bits to\nbe zero on entry, and will return data with the nails similarly all zero.\nThis applies both to limb vectors and to single limb arguments.\n\nNails can be enabled by configuring with @samp{--enable-nails}.  By default\nthe number of bits will be chosen according to what suits the host processor,\nbut a particular number can be selected with @samp{--enable-nails=N}.\n\nAt the mpn level, a nail build is neither source nor binary compatible with a\nnon-nail build, strictly speaking.  But programs acting on limbs only through\nthe mpn functions are likely to work equally well with either build, and\njudicious use of the definitions below should make any program compatible with\neither build, at the source level.\n\nFor the higher level routines, meaning @code{mpz} etc, a nail build should be\nfully source and binary compatible with a non-nail build.\n\n@defmac GMP_NAIL_BITS\n@defmacx GMP_NUMB_BITS\n@defmacx GMP_LIMB_BITS\n@code{GMP_NAIL_BITS} is the number of nail bits, or 0 when nails are not in\nuse.  @code{GMP_NUMB_BITS} is the number of data bits in a limb.\n@code{GMP_LIMB_BITS} is the total number of bits in an @code{mp_limb_t}.  In\nall cases\n\n@example\nGMP_LIMB_BITS == GMP_NAIL_BITS + GMP_NUMB_BITS\n@end example\n@end defmac\n\n@defmac GMP_NAIL_MASK\n@defmacx GMP_NUMB_MASK\nBit masks for the nail and number parts of a limb.  @code{GMP_NAIL_MASK} is 0\nwhen nails are not in use.\n\n@code{GMP_NAIL_MASK} is not often needed, since the nail part can be obtained\nwith @code{x >> GMP_NUMB_BITS}, and that means one less large constant, which\ncan help various RISC chips.\n@end defmac\n\n@defmac GMP_NUMB_MAX\nThe maximum value that can be stored in the number part of a limb.  This is\nthe same as @code{GMP_NUMB_MASK}, but can be used for clarity when doing\ncomparisons rather than bit-wise operations.\n@end defmac\n\nThe term ``nails'' comes from finger or toe nails, which are at the ends of a\nlimb (arm or leg).  ``numb'' is short for number, but is also how the\ndevelopers felt after trying for a long time to come up with sensible names\nfor these things.\n\nIn the future (the distant future most likely) a non-zero nail might be\npermitted, giving non-unique representations for numbers in a limb vector.\nThis would help vector processors since carries would only ever need to\npropagate one or two limbs.\n\n\n@node Random Number Functions, Formatted Output, Low-level Functions, Top\n@chapter Random Number Functions\n@cindex Random number functions\n\nSequences of pseudo-random numbers in MPIR are generated using a variable of\ntype @code{gmp_randstate_t}, which holds an algorithm selection and a current\nstate.  Such a variable must be initialized by a call to one of the\n@code{gmp_randinit} functions, and can be seeded with one of the\n@code{gmp_randseed} functions.\n\nThe functions actually generating random numbers are described in @ref{Integer\nRandom Numbers}, and @ref{Miscellaneous Float Functions}.\n\nThe older style random number functions don't accept a @code{gmp_randstate_t}\nparameter but instead share a global variable of that type.  They use a\ndefault algorithm and are currently not seeded (though perhaps that will\nchange in the future).  The new functions accepting a @code{gmp_randstate_t}\nare recommended for applications that care about randomness.\n\n@menu\n* Random State Initialization::\n* Random State Seeding::\n* Random State Miscellaneous::\n@end menu\n\n@node Random State Initialization, Random State Seeding, Random Number Functions, Random Number Functions\n@section Random State Initialization\n@cindex Random number state\n@cindex Initialization functions\n\n@deftypefun void gmp_randinit_default (gmp_randstate_t @var{state})\nInitialize @var{state} with a default algorithm.  This will be a compromise\nbetween speed and randomness, and is recommended for applications with no\nspecial requirements.  Currently this is @code{gmp_randinit_mt}.\n@end deftypefun\n\n@deftypefun void gmp_randinit_mt (gmp_randstate_t @var{state})\n@cindex Mersenne twister random numbers\nInitialize @var{state} for a Mersenne Twister algorithm.  This algorithm is\nfast and has good randomness properties.\n@end deftypefun\n\n@deftypefun void gmp_randinit_lc_2exp (gmp_randstate_t @var{state}, mpz_t @var{a}, @w{mpir_ui @var{c}}, @w{mp_bitcnt_t @var{m2exp}})\n@cindex Linear congruential random numbers\nInitialize @var{state} with a linear congruential algorithm @m{X = (@var{a}X +\n@var{c}) @bmod 2^{m2exp}, X = (@var{a}*X + @var{c}) mod 2^@var{m2exp}}.\n\nThe low bits of @math{X} in this algorithm are not very random.  The least\nsignificant bit will have a period no more than 2, and the second bit no more\nthan 4, etc.  For this reason only the high half of each @math{X} is actually\nused.\n\nWhen a random number of more than @math{@var{m2exp}/2} bits is to be\ngenerated, multiple iterations of the recurrence are used and the results\nconcatenated.\n@end deftypefun\n\n@deftypefun int gmp_randinit_lc_2exp_size (gmp_randstate_t @var{state}, mp_bitcnt_t @var{size})\n@cindex Linear congruential random numbers\nInitialize @var{state} for a linear congruential algorithm as per\n@code{gmp_randinit_lc_2exp}.  @var{a}, @var{c} and @var{m2exp} are selected\nfrom a table, chosen so that @var{size} bits (or more) of each @math{X} will\nbe used, ie.@: @math{@var{m2exp}/2 @ge{} @var{size}}.\n\nIf successful the return value is non-zero.  If @var{size} is bigger than the\ntable data provides then the return value is zero.  The maximum @var{size}\ncurrently supported is 128.\n@end deftypefun\n\n@deftypefun int gmp_randinit_set (gmp_randstate_t @var{rop}, gmp_randstate_t @var{op})\nInitialize @var{rop} with a copy of the algorithm and state from @var{op}.\n@end deftypefun\n\n@deftypefun void gmp_randclear (gmp_randstate_t @var{state})\nFree all memory occupied by @var{state}.\n@end deftypefun\n\n\n@node Random State Seeding, Random State Miscellaneous, Random State Initialization, Random Number Functions\n@section Random State Seeding\n@cindex Random number seeding\n@cindex Seeding random numbers\n\n@deftypefun void gmp_randseed (gmp_randstate_t @var{state}, mpz_t @var{seed})\n@deftypefunx void gmp_randseed_ui (gmp_randstate_t @var{state}, mpir_ui @var{seed})\nSet an initial seed value into @var{state}.\n\nThe size of a seed determines how many different sequences of random numbers\nthat it's possible to generate.  The ``quality'' of the seed is the randomness\nof a given seed compared to the previous seed used, and this affects the\nrandomness of separate number sequences.  The method for choosing a seed is\ncritical if the generated numbers are to be used for important applications,\nsuch as generating cryptographic keys.\n\nTraditionally the system time has been used to seed, but care needs to be\ntaken with this.  If an application seeds often and the resolution of the\nsystem clock is low, then the same sequence of numbers might be repeated.\nAlso, the system time is quite easy to guess, so if unpredictability is\nrequired then it should definitely not be the only source for the seed value.\nOn some systems there's a special device @file{/dev/random} which provides\nrandom data better suited for use as a seed.\n@end deftypefun\n\n\n@node Random State Miscellaneous,  , Random State Seeding, Random Number Functions\n@section Random State Miscellaneous\n\n@deftypefun mpir_ui gmp_urandomb_ui (gmp_randstate_t @var{state}, mpir_ui @var{n})\nReturn a uniformly distributed random number of @var{n} bits, ie.@: in the\nrange 0 to @m{2^n-1,2^@var{n}-1} inclusive.  @var{n} must be less than or\nequal to the number of bits in an @code{mpir_ui}.\n@end deftypefun\n\n@deftypefun mpir_ui gmp_urandomm_ui (gmp_randstate_t @var{state}, mpir_ui @var{n})\nReturn a uniformly distributed random number in the range 0 to\n@math{@var{n}-1}, inclusive.\n@end deftypefun\n\n\n@node Formatted Output, Formatted Input, Random Number Functions, Top\n@chapter Formatted Output\n@cindex Formatted output\n@cindex @code{printf} formatted output\n\n@menu\n* Formatted Output Strings::\n* Formatted Output Functions::\n* C++ Formatted Output::\n@end menu\n\n@node Formatted Output Strings, Formatted Output Functions, Formatted Output, Formatted Output\n@section Format Strings\n\n@code{gmp_printf} and friends accept format strings similar to the standard C\n@code{printf} (@pxref{Formatted Output,, Formatted Output, libc, The GNU C\nLibrary Reference Manual}).  A format specification is of the form\n\n@example\n% [flags] [width] [.[precision]] [type] conv\n@end example\n\nMPIR adds types @samp{Z}, @samp{Q} and @samp{F} for @code{mpz_t}, @code{mpq_t}\nand @code{mpf_t} respectively, @samp{M} for @code{mp_limb_t}, and @samp{N} for\nan @code{mp_limb_t} array.  @samp{Z}, @samp{Q}, @samp{M} and @samp{N} behave\nlike integers.  @samp{Q} will print a @samp{/} and a denominator, if needed.\n@samp{F} behaves like a float.  For example,\n\n@example\nmpz_t z;\ngmp_printf (\"%s is an mpz %Zd\\n\", \"here\", z);\n\nmpq_t q;\ngmp_printf (\"a hex rational: %#40Qx\\n\", q);\n\nmpf_t f;\nint   n;\ngmp_printf (\"fixed point mpf %.*Ff with %d digits\\n\", n, f, n);\n\nmp_limb_t l;\ngmp_printf (\"limb %Mu\\n\", limb);\n\nconst mp_limb_t *ptr;\nmp_size_t       size;\ngmp_printf (\"limb array %Nx\\n\", ptr, size);\n@end example\n\nFor @samp{N} the limbs are expected least significant first, as per the\n@code{mpn} functions (@pxref{Low-level Functions}).  A negative size can be\ngiven to print the value as a negative.\n\nAll the standard C @code{printf} types behave the same as the C library\n@code{printf}, and can be freely intermixed with the MPIR extensions.  In the\ncurrent implementation the standard parts of the format string are simply\nhanded to @code{printf} and only the MPIR extensions handled directly.\n\nThe flags accepted are as follows.  GLIBC style @nisamp{'} is only for the\nstandard C types (not the MPIR types), and only if the C library supports it.\n\n@quotation\n@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}\n@item @nicode{0} @tab pad with zeros (rather than spaces)\n@item @nicode{#} @tab show the base with @samp{0x}, @samp{0X} or @samp{0}\n@item @nicode{+} @tab always show a sign\n@item (space)    @tab show a space or a @samp{-} sign\n@item @nicode{'} @tab group digits, GLIBC style (not MPIR types)\n@end multitable\n@end quotation\n\nThe optional width and precision can be given as a number within the format\nstring, or as a @samp{*} to take an extra parameter of type @code{int}, the\nsame as the standard @code{printf}.\n\nThe standard types accepted are as follows.  @samp{h} and @samp{l} are\nportable, the rest will depend on the compiler (or include files) for the type\nand the C library for the output.\n\n@quotation\n@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}\n@item @nicode{h}  @tab @nicode{short}\n@item @nicode{hh} @tab @nicode{char}\n@item @nicode{j}  @tab @nicode{intmax_t} or @nicode{uintmax_t}\n@item @nicode{l}  @tab @nicode{long} or @nicode{wchar_t}\n@item @nicode{ll} @tab @nicode{long long}\n@item @nicode{L}  @tab @nicode{long double}\n@item @nicode{q}  @tab @nicode{quad_t} or @nicode{u_quad_t}\n@item @nicode{t}  @tab @nicode{ptrdiff_t}\n@item @nicode{z}  @tab @nicode{size_t}\n@end multitable\n@end quotation\n\n@noindent\nThe MPIR types are\n\n@quotation\n@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}\n@item @nicode{F}  @tab @nicode{mpf_t}, float conversions\n@item @nicode{Q}  @tab @nicode{mpq_t}, integer conversions\n@item @nicode{M}  @tab @nicode{mp_limb_t}, integer conversions\n@item @nicode{N}  @tab @nicode{mp_limb_t} array, integer conversions\n@item @nicode{Z}  @tab @nicode{mpz_t}, integer conversions\n@end multitable\n@end quotation\n\nThe conversions accepted are as follows.  @samp{a} and @samp{A} are always\nsupported for @code{mpf_t} but depend on the C library for standard C float\ntypes.  @samp{m} and @samp{p} depend on the C library.\n\n@quotation\n@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}\n@item @nicode{a} @nicode{A} @tab hex floats, C99 style\n@item @nicode{c}            @tab character\n@item @nicode{d}            @tab decimal integer\n@item @nicode{e} @nicode{E} @tab scientific format float\n@item @nicode{f}            @tab fixed point float\n@item @nicode{i}            @tab same as @nicode{d}\n@item @nicode{g} @nicode{G} @tab fixed or scientific float\n@item @nicode{m}            @tab @code{strerror} string, GLIBC style\n@item @nicode{n}            @tab store characters written so far\n@item @nicode{o}            @tab octal integer\n@item @nicode{p}            @tab pointer\n@item @nicode{s}            @tab string\n@item @nicode{u}            @tab unsigned integer\n@item @nicode{x} @nicode{X} @tab hex integer\n@end multitable\n@end quotation\n\n@samp{o}, @samp{x} and @samp{X} are unsigned for the standard C types, but for\ntypes @samp{Z}, @samp{Q} and @samp{N} they are signed.  @samp{u} is not\nmeaningful for @samp{Z}, @samp{Q} and @samp{N}.\n\n@samp{M} is a proxy for the C library @samp{l} or @samp{L}, according to the\nsize of @code{mp_limb_t}.  Unsigned conversions will be usual, but a signed\nconversion can be used and will interpret the value as a twos complement\nnegative.\n\n@samp{n} can be used with any type, even the MPIR types.\n\nOther types or conversions that might be accepted by the C library\n@code{printf} cannot be used through @code{gmp_printf}, this includes for\ninstance extensions registered with GLIBC @code{register_printf_function}.\nAlso currently there's no support for POSIX @samp{$} style numbered arguments\n(perhaps this will be added in the future).\n\nThe precision field has it's usual meaning for integer @samp{Z} and float\n@samp{F} types, but is currently undefined for @samp{Q} and should not be used\nwith that.\n\n@code{mpf_t} conversions only ever generate as many digits as can be\naccurately represented by the operand, the same as @code{mpf_get_str} does.\nZeros will be used if necessary to pad to the requested precision.  This\nhappens even for an @samp{f} conversion of an @code{mpf_t} which is an\ninteger, for instance @math{2^@W{1024}} in an @code{mpf_t} of 128 bits\nprecision will only produce about 40 digits, then pad with zeros to the\ndecimal point.  An empty precision field like @samp{%.Fe} or @samp{%.Ff} can\nbe used to specifically request just the significant digits.\n\nThe decimal point character (or string) is taken from the current locale\nsettings on systems which provide @code{localeconv} (@pxref{Locales,, Locales\nand Internationalization, libc, The GNU C Library Reference Manual}).  The C\nlibrary will normally do the same for standard float output.\n\nThe format string is only interpreted as plain @code{char}s, multibyte\ncharacters are not recognised.  Perhaps this will change in the future.\n\n\n@node Formatted Output Functions, C++ Formatted Output, Formatted Output Strings, Formatted Output\n@section Functions\n@cindex Output functions\n\nEach of the following functions is similar to the corresponding C library\nfunction.  The basic @code{printf} forms take a variable argument list.  The\n@code{vprintf} forms take an argument pointer, see @ref{Variadic Functions,,\nVariadic Functions, libc, The GNU C Library Reference Manual}, or @samp{man 3\nva_start}.\n\nIt should be emphasised that if a format string is invalid, or the arguments\ndon't match what the format specifies, then the behaviour of any of these\nfunctions will be unpredictable.  GCC format string checking is not available,\nsince it doesn't recognise the MPIR extensions.\n\nThe file based functions @code{gmp_printf} and @code{gmp_fprintf} will return\n@math{-1} to indicate a write error.  Output is not ``atomic'', so partial\noutput may be produced if a write error occurs.  All the functions can return\n@math{-1} if the C library @code{printf} variant in use returns @math{-1}, but\nthis shouldn't normally occur.\n\n@deftypefun int gmp_printf (const char *@var{fmt}, @dots{})\n@deftypefunx int gmp_vprintf (const char *@var{fmt}, va_list @var{ap})\nPrint to the standard output @code{stdout}.  Return the number of characters\nwritten, or @math{-1} if an error occurred.\n@end deftypefun\n\n@deftypefun int gmp_fprintf (FILE *@var{fp}, const char *@var{fmt}, @dots{})\n@deftypefunx int gmp_vfprintf (FILE *@var{fp}, const char *@var{fmt}, va_list @var{ap})\nPrint to the stream @var{fp}.  Return the number of characters written, or\n@math{-1} if an error occurred.\n@end deftypefun\n\n@deftypefun int gmp_sprintf (char *@var{buf}, const char *@var{fmt}, @dots{})\n@deftypefunx int gmp_vsprintf (char *@var{buf}, const char *@var{fmt}, va_list @var{ap})\nForm a null-terminated string in @var{buf}.  Return the number of characters\nwritten, excluding the terminating null.\n\nNo overlap is permitted between the space at @var{buf} and the string\n@var{fmt}.\n\nThese functions are not recommended, since there's no protection against\nexceeding the space available at @var{buf}.\n@end deftypefun\n\n@deftypefun int gmp_snprintf (char *@var{buf}, size_t @var{size}, const char *@var{fmt}, @dots{})\n@deftypefunx int gmp_vsnprintf (char *@var{buf}, size_t @var{size}, const char *@var{fmt}, va_list @var{ap})\nForm a null-terminated string in @var{buf}.  No more than @var{size} bytes\nwill be written.  To get the full output, @var{size} must be enough for the\nstring and null-terminator.\n\nThe return value is the total number of characters which ought to have been\nproduced, excluding the terminating null.  If @math{@var{retval} @ge{}\n@var{size}} then the actual output has been truncated to the first\n@math{@var{size}-1} characters, and a null appended.\n\nNo overlap is permitted between the region @{@var{buf},@var{size}@} and the\n@var{fmt} string.\n\nNotice the return value is in ISO C99 @code{snprintf} style.  This is so even\nif the C library @code{vsnprintf} is the older GLIBC 2.0.x style.\n@end deftypefun\n\n@deftypefun int gmp_asprintf (char **@var{pp}, const char *@var{fmt}, @dots{})\n@deftypefunx int gmp_vasprintf (char **@var{pp}, const char *@var{fmt}, va_list @var{ap})\nForm a null-terminated string in a block of memory obtained from the current\nmemory allocation function (@pxref{Custom Allocation}).  The block will be the\nsize of the string and null-terminator.  The address of the block in stored to\n*@var{pp}.  The return value is the number of characters produced, excluding\nthe null-terminator.\n\nUnlike the C library @code{asprintf}, @code{gmp_asprintf} doesn't return\n@math{-1} if there's no more memory available, it lets the current allocation\nfunction handle that.\n@end deftypefun\n\n@deftypefun int gmp_obstack_printf (struct obstack *@var{ob}, const char *@var{fmt}, @dots{})\n@deftypefunx int gmp_obstack_vprintf (struct obstack *@var{ob}, const char *@var{fmt}, va_list @var{ap})\n@cindex @code{obstack} output\nAppend to the current object in @var{ob}.  The return value is the number of\ncharacters written.  A null-terminator is not written.\n\n@var{fmt} cannot be within the current object in @var{ob}, since that object\nmight move as it grows.\n\nThese functions are available only when the C library provides the obstack\nfeature, which probably means only on GNU systems, see @ref{Obstacks,,\nObstacks, libc, The GNU C Library Reference Manual}.\n@end deftypefun\n\n\n@node C++ Formatted Output,  , Formatted Output Functions, Formatted Output\n@section C++ Formatted Output\n@cindex C++ @code{ostream} output\n@cindex @code{ostream} output\n\nThe following functions are provided in @file{libmpirxx} (@pxref{Headers and\nLibraries}), which is built if C++ support is enabled (@pxref{Build Options}).\nPrototypes are available from @code{<mpir.h>}.\n\n@deftypefun ostream& operator<< (ostream& @var{stream}, mpz_t @var{op})\nPrint @var{op} to @var{stream}, using its @code{ios} formatting settings.\n@code{ios::width} is reset to 0 after output, the same as the standard\n@code{ostream operator<<} routines do.\n\nIn hex or octal, @var{op} is printed as a signed number, the same as for\ndecimal.  This is unlike the standard @code{operator<<} routines on @code{int}\netc, which instead give twos complement.\n@end deftypefun\n\n@deftypefun ostream& operator<< (ostream& @var{stream}, mpq_t @var{op})\nPrint @var{op} to @var{stream}, using its @code{ios} formatting settings.\n@code{ios::width} is reset to 0 after output, the same as the standard\n@code{ostream operator<<} routines do.\n\nOutput will be a fraction like @samp{5/9}, or if the denominator is 1 then\njust a plain integer like @samp{123}.\n\nIn hex or octal, @var{op} is printed as a signed value, the same as for\ndecimal.  If @code{ios::showbase} is set then a base indicator is shown on\nboth the numerator and denominator (if the denominator is required).\n@end deftypefun\n\n@deftypefun ostream& operator<< (ostream& @var{stream}, mpf_t @var{op})\nPrint @var{op} to @var{stream}, using its @code{ios} formatting settings.\n@code{ios::width} is reset to 0 after output, the same as the standard\n@code{ostream operator<<} routines do.\n\nThe decimal point follows the standard library float @code{operator<<}, which\non recent systems means the @code{std::locale} imbued on @var{stream}.\n\nHex and octal are supported, unlike the standard @code{operator<<} on\n@code{double}.  The mantissa will be in hex or octal, the exponent will be in\ndecimal.  For hex the exponent delimiter is an @samp{@@}.  This is as per\n@code{mpf_out_str}.\n\n@code{ios::showbase} is supported, and will put a base on the mantissa, for\nexample hex @samp{0x1.8} or @samp{0x0.8}, or octal @samp{01.4} or @samp{00.4}.\nThis last form is slightly strange, but at least differentiates itself from\ndecimal.\n@end deftypefun\n\nThese operators mean that MPIR types can be printed in the usual C++ way, for\nexample,\n\n@example\nmpz_t  z;\nint    n;\n...\ncout << \"iteration \" << n << \" value \" << z << \"\\n\";\n@end example\n\nBut note that @code{ostream} output (and @code{istream} input, @pxref{C++\nFormatted Input}) is the only overloading available for the MPIR types and that\nfor instance using @code{+} with an @code{mpz_t} will have unpredictable\nresults.  For classes with overloading, see @ref{C++ Class Interface}.\n\n\n@node Formatted Input, C++ Class Interface, Formatted Output, Top\n@chapter Formatted Input\n@cindex Formatted input\n@cindex @code{scanf} formatted input\n\n@menu\n* Formatted Input Strings::\n* Formatted Input Functions::\n* C++ Formatted Input::\n@end menu\n\n\n@node Formatted Input Strings, Formatted Input Functions, Formatted Input, Formatted Input\n@section Formatted Input Strings\n\n@code{gmp_scanf} and friends accept format strings similar to the standard C\n@code{scanf} (@pxref{Formatted Input,, Formatted Input, libc, The GNU C\nLibrary Reference Manual}).  A format specification is of the form\n\n@example\n% [flags] [width] [type] conv\n@end example\n\nMPIR adds types @samp{Z}, @samp{Q} and @samp{F} for @code{mpz_t}, @code{mpq_t}\nand @code{mpf_t} respectively.  @samp{Z} and @samp{Q} behave like integers.\n@samp{Q} will read a @samp{/} and a denominator, if present.  @samp{F} behaves\nlike a float.\n\nMPIR variables don't require an @code{&} when passed to @code{gmp_scanf}, since\nthey're already ``call-by-reference''.  For example,\n\n@example\n/* to read say \"a(5) = 1234\" */\nint   n;\nmpz_t z;\ngmp_scanf (\"a(%d) = %Zd\\n\", &n, z);\n\nmpq_t q1, q2;\ngmp_sscanf (\"0377 + 0x10/0x11\", \"%Qi + %Qi\", q1, q2);\n\n/* to read say \"topleft (1.55,-2.66)\" */\nmpf_t x, y;\nchar  buf[32];\ngmp_scanf (\"%31s (%Ff,%Ff)\", buf, x, y);\n@end example\n\nAll the standard C @code{scanf} types behave the same as in the C library\n@code{scanf}, and can be freely intermixed with the MPIR extensions.  In the\ncurrent implementation the standard parts of the format string are simply\nhanded to @code{scanf} and only the MPIR extensions handled directly.\n\nThe flags accepted are as follows.  @samp{a} and @samp{'} will depend on\nsupport from the C library, and @samp{'} cannot be used with MPIR types.\n\n@quotation\n@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}\n@item @nicode{*} @tab read but don't store\n@item @nicode{a} @tab allocate a buffer (string conversions)\n@item @nicode{'} @tab grouped digits, GLIBC style (not MPIR types)\n@end multitable\n@end quotation\n\nThe standard types accepted are as follows.  @samp{h} and @samp{l} are\nportable, the rest will depend on the compiler (or include files) for the type\nand the C library for the input.\n\n@quotation\n@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}\n@item @nicode{h}  @tab @nicode{short}\n@item @nicode{hh} @tab @nicode{char}\n@item @nicode{j}  @tab @nicode{intmax_t} or @nicode{uintmax_t}\n@item @nicode{l}  @tab @nicode{long int}, @nicode{double} or @nicode{wchar_t}\n@item @nicode{ll} @tab @nicode{long long}\n@item @nicode{L}  @tab @nicode{long double}\n@item @nicode{q}  @tab @nicode{quad_t} or @nicode{u_quad_t}\n@item @nicode{t}  @tab @nicode{ptrdiff_t}\n@item @nicode{z}  @tab @nicode{size_t}\n@end multitable\n@end quotation\n\n@noindent\nThe MPIR types are\n\n@quotation\n@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}\n@item @nicode{F}  @tab @nicode{mpf_t}, float conversions\n@item @nicode{Q}  @tab @nicode{mpq_t}, integer conversions\n@item @nicode{Z}  @tab @nicode{mpz_t}, integer conversions\n@end multitable\n@end quotation\n\nThe conversions accepted are as follows.  @samp{p} and @samp{[} will depend on\nsupport from the C library, the rest are standard.\n\n@quotation\n@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}\n@item @nicode{c}            @tab character or characters\n@item @nicode{d}            @tab decimal integer\n@item @nicode{e} @nicode{E} @nicode{f} @nicode{g} @nicode{G}\n                            @tab float\n@item @nicode{i}            @tab integer with base indicator\n@item @nicode{n}            @tab characters read so far\n@item @nicode{o}            @tab octal integer\n@item @nicode{p}            @tab pointer\n@item @nicode{s}            @tab string of non-whitespace characters\n@item @nicode{u}            @tab decimal integer\n@item @nicode{x} @nicode{X} @tab hex integer\n@item @nicode{[}            @tab string of characters in a set\n@end multitable\n@end quotation\n\n@samp{e}, @samp{E}, @samp{f}, @samp{g} and @samp{G} are identical, they all\nread either fixed point or scientific format, and either upper or lower case\n@samp{e} for the exponent in scientific format.\n\nC99 style hex float format (@code{printf %a}, @pxref{Formatted Output\nStrings}) is always accepted for @code{mpf_t}, but for the standard float\ntypes it will depend on the C library.\n\n@samp{x} and @samp{X} are identical, both accept both upper and lower case\nhexadecimal.\n\n@samp{o}, @samp{u}, @samp{x} and @samp{X} all read positive or negative\nvalues.  For the standard C types these are described as ``unsigned''\nconversions, but that merely affects certain overflow handling, negatives are\nstill allowed (per @code{strtoul}, @pxref{Parsing of Integers,, Parsing of\nIntegers, libc, The GNU C Library Reference Manual}).  For MPIR types there are\nno overflows, so @samp{d} and @samp{u} are identical.\n\n@samp{Q} type reads the numerator and (optional) denominator as given.  If the\nvalue might not be in canonical form then @code{mpq_canonicalize} must be\ncalled before using it in any calculations (@pxref{Rational Number\nFunctions}).\n\n@samp{Qi} will read a base specification separately for the numerator and\ndenominator.  For example @samp{0x10/11} would be 16/11, whereas\n@samp{0x10/0x11} would be 16/17.\n\n@samp{n} can be used with any of the types above, even the MPIR types.\n@samp{*} to suppress assignment is allowed, though in that case it would do\nnothing at all.\n\nOther conversions or types that might be accepted by the C library\n@code{scanf} cannot be used through @code{gmp_scanf}.\n\nWhitespace is read and discarded before a field, except for @samp{c} and\n@samp{[} conversions.\n\nFor float conversions, the decimal point character (or string) expected is\ntaken from the current locale settings on systems which provide\n@code{localeconv} (@pxref{Locales,, Locales and Internationalization, libc,\nThe GNU C Library Reference Manual}).  The C library will normally do the same\nfor standard float input.\n\nThe format string is only interpreted as plain @code{char}s, multibyte\ncharacters are not recognised.  Perhaps this will change in the future.\n\n\n@node Formatted Input Functions, C++ Formatted Input, Formatted Input Strings, Formatted Input\n@section Formatted Input Functions\n@cindex Input functions\n\nEach of the following functions is similar to the corresponding C library\nfunction.  The plain @code{scanf} forms take a variable argument list.  The\n@code{vscanf} forms take an argument pointer, see @ref{Variadic Functions,,\nVariadic Functions, libc, The GNU C Library Reference Manual}, or @samp{man 3\nva_start}.\n\nIt should be emphasised that if a format string is invalid, or the arguments\ndon't match what the format specifies, then the behaviour of any of these\nfunctions will be unpredictable.  GCC format string checking is not available,\nsince it doesn't recognise the MPIR extensions.\n\nNo overlap is permitted between the @var{fmt} string and any of the results\nproduced.\n\n@deftypefun int gmp_scanf (const char *@var{fmt}, @dots{})\n@deftypefunx int gmp_vscanf (const char *@var{fmt}, va_list @var{ap})\nRead from the standard input @code{stdin}.\n@end deftypefun\n\n@deftypefun int gmp_fscanf (FILE *@var{fp}, const char *@var{fmt}, @dots{})\n@deftypefunx int gmp_vfscanf (FILE *@var{fp}, const char *@var{fmt}, va_list @var{ap})\nRead from the stream @var{fp}.\n@end deftypefun\n\n@deftypefun int gmp_sscanf (const char *@var{s}, const char *@var{fmt}, @dots{})\n@deftypefunx int gmp_vsscanf (const char *@var{s}, const char *@var{fmt}, va_list @var{ap})\nRead from a null-terminated string @var{s}.\n@end deftypefun\n\nThe return value from each of these functions is the same as the standard C99\n@code{scanf}, namely the number of fields successfully parsed and stored.\n@samp{%n} fields and fields read but suppressed by @samp{*} don't count\ntowards the return value.\n\nIf end of input (or a file error) is reached before a character for a field or\na literal, and if no previous non-suppressed fields have matched, then the\nreturn value is @code{EOF} instead of 0.  A whitespace character in the format\nstring is only an optional match and doesn't induce an @code{EOF} in this\nfashion.  Leading whitespace read and discarded for a field don't count as\ncharacters for that field.\n\nFor the MPIR types, input parsing follows C99 rules, namely one character of\nlookahead is used and characters are read while they continue to meet the\nformat requirements.  If this doesn't provide a complete number then the\nfunction terminates, with that field not stored nor counted towards the return\nvalue.  For instance with @code{mpf_t} an input @samp{1.23e-XYZ} would be read\nup to the @samp{X} and that character pushed back since it's not a digit.  The\nstring @samp{1.23e-} would then be considered invalid since an @samp{e} must\nbe followed by at least one digit.\n\nFor the standard C types, in the current implementation MPIR calls the C\nlibrary @code{scanf} functions, which might have looser rules about what\nconstitutes a valid input.\n\nNote that @code{gmp_sscanf} is the same as @code{gmp_fscanf} and only does one\ncharacter of lookahead when parsing.  Although clearly it could look at its\nentire input, it is deliberately made identical to @code{gmp_fscanf}, the same\nway C99 @code{sscanf} is the same as @code{fscanf}.\n\n\n@node C++ Formatted Input,  , Formatted Input Functions, Formatted Input\n@section C++ Formatted Input\n@cindex C++ @code{istream} input\n@cindex @code{istream} input\n\nThe following functions are provided in @file{libmpirxx} (@pxref{Headers and\nLibraries}), which is built only if C++ support is enabled (@pxref{Build\nOptions}).  Prototypes are available from @code{<mpir.h>}.\n\n@deftypefun istream& operator>> (istream& @var{stream}, mpz_t @var{rop})\nRead @var{rop} from @var{stream}, using its @code{ios} formatting settings.\n@end deftypefun\n\n@deftypefun istream& operator>> (istream& @var{stream}, mpq_t @var{rop})\nAn integer like @samp{123} will be read, or a fraction like @samp{5/9}.  No\nwhitespace is allowed around the @samp{/}.  If the fraction is not in\ncanonical form then @code{mpq_canonicalize} must be called (@pxref{Rational\nNumber Functions}) before operating on it.\n\nAs per integer input, an @samp{0} or @samp{0x} base indicator is read when\nnone of @code{ios::dec}, @code{ios::oct} or @code{ios::hex} are set.  This is\ndone separately for numerator and denominator, so that for instance\n@samp{0x10/11} is @math{16/11} and @samp{0x10/0x11} is @math{16/17}.\n@end deftypefun\n\n@deftypefun istream& operator>> (istream& @var{stream}, mpf_t @var{rop})\nRead @var{rop} from @var{stream}, using its @code{ios} formatting settings.\n\nHex or octal floats are not supported, but might be in the future, or perhaps\nit's best to accept only what the standard float @code{operator>>} does.\n@end deftypefun\n\nNote that digit grouping specified by the @code{istream} locale is currently\nnot accepted.  Perhaps this will change in the future.\n\n@sp 1\nThese operators mean that MPIR types can be read in the usual C++ way, for\nexample,\n\n@example\nmpz_t  z;\n...\ncin >> z;\n@end example\n\nBut note that @code{istream} input (and @code{ostream} output, @pxref{C++\nFormatted Output}) is the only overloading available for the MPIR types and\nthat for instance using @code{+} with an @code{mpz_t} will have unpredictable\nresults.  For classes with overloading, see @ref{C++ Class Interface}.\n\n\n\n@node C++ Class Interface, .Net Interface, Formatted Input, Top\n@chapter C++ Class Interface\n@cindex C++ interface\n\nThis chapter describes the C++ class based interface to MPIR.\n\nAll MPIR C language types and functions can be used in C++ programs, since\n@file{mpir.h} has @code{extern \"C\"} qualifiers, but the class interface offers\noverloaded functions and operators which may be more convenient.\n\nDue to the implementation of this interface, a reasonably recent C++ compiler\nis required, one supporting namespaces, partial specialization of templates\nand member templates.  For GCC this means version 2.91 or later.\n\n@strong{Everything described in this chapter is to be considered preliminary\nand might be subject to incompatible changes if some unforeseen difficulty\nreveals itself.}\n\n@menu\n* C++ Interface General::\n* C++ Interface Integers::\n* C++ Interface Rationals::\n* C++ Interface Floats::\n* C++ Interface Random Numbers::\n* C++ Interface Limitations::\n@end menu\n\n\n@node C++ Interface General, C++ Interface Integers, C++ Class Interface, C++ Class Interface\n@section C++ Interface General\n\n@noindent\nAll the C++ classes and functions are available with\n\n@cindex @code{mpirxx.h}\n@example\n#include <mpirxx.h>\n@end example\n\nPrograms should be linked with the @file{libmpirxx} and @file{libmpir}\nlibraries.  For example,\n\n@example\ng++ mycxxprog.cc -lmpirxx -lmpir\n@end example\n\n@noindent\nThe classes defined are\n\n@deftp Class mpz_class\n@deftpx Class mpq_class\n@deftpx Class mpf_class\n@end deftp\n\nThe standard operators and various standard functions are overloaded to allow\narithmetic with these classes.  For example,\n\n@example\nint\nmain (void)\n@{\n  mpz_class a, b, c;\n\n  a = 1234;\n  b = \"-5678\";\n  c = a+b;\n  cout << \"sum is \" << c << \"\\n\";\n  cout << \"absolute value is \" << abs(c) << \"\\n\";\n\n  return 0;\n@}\n@end example\n\nAn important feature of the implementation is that an expression like\n@code{a=b+c} results in a single call to the corresponding @code{mpz_add},\nwithout using a temporary for the @code{b+c} part.  Expressions which by their\nnature imply intermediate values, like @code{a=b*c+d*e}, still use temporaries\nthough.\n\nThe classes can be freely intermixed in expressions, as can the classes and\nthe standard types @code{mpir_si}, @code{mpir_ui} and @code{double}.\nSmaller types like @code{int} or @code{float} can also be intermixed, since\nC++ will promote them.\n\nNote that @code{bool} is not accepted directly, but must be explicitly cast to\nan @code{int} first.  This is because C++ will automatically convert any\npointer to a @code{bool}, so if MPIR accepted @code{bool} it would make all\nsorts of invalid class and pointer combinations compile but almost certainly\nnot do anything sensible.\n\nConversions back from the classes to standard C++ types aren't done\nautomatically, instead member functions like @code{get_si} are provided (see\nthe following sections for details).\n\nAlso there are no automatic conversions from the classes to the corresponding\nMPIR C types, instead a reference to the underlying C object can be obtained\nwith the following functions,\n\n@deftypefun mpz_t mpz_class::get_mpz_t ()\n@deftypefunx mpq_t mpq_class::get_mpq_t ()\n@deftypefunx mpf_t mpf_class::get_mpf_t ()\n@end deftypefun\n\nThese can be used to call a C function which doesn't have a C++ class\ninterface.  For example to set @code{a} to the GCD of @code{b} and @code{c},\n\n@example\nmpz_class a, b, c;\n...\nmpz_gcd (a.get_mpz_t(), b.get_mpz_t(), c.get_mpz_t());\n@end example\n\nIn the other direction, a class can be initialized from the corresponding MPIR\nC type, or assigned to if an explicit constructor is used.  In both cases this\nmakes a copy of the value, it doesn't create any sort of association.  For\nexample,\n\n@example\nmpz_t z;\n// ... init and calculate z ...\nmpz_class x(z);\nmpz_class y;\ny = mpz_class (z);\n@end example\n\nThere are no namespace setups in @file{mpirxx.h}, all types and functions are\nsimply put into the global namespace.  This is what @file{mpir.h} has done in\nthe past, and continues to do for compatibility.  The extras provided by\n@file{mpirxx.h} follow MPIR naming conventions and are unlikely to clash with\nanything.\n\n\n@node C++ Interface Integers, C++ Interface Rationals, C++ Interface General, C++ Class Interface\n@section C++ Interface Integers\n\n@deftypefun void mpz_class::mpz_class (type @var{n})\nConstruct an @code{mpz_class}.  All the standard C++ types may be used\n@code{long double}, and all the MPIR C++ classes can be\nused.  Any necessary conversion follows the corresponding C function, for\nexample @code{double} follows @code{mpz_set_d} (@pxref{Assigning Integers}).\n@end deftypefun\n\n@deftypefun void mpz_class::mpz_class (mpz_t @var{z})\nConstruct an @code{mpz_class} from an @code{mpz_t}.  The value in @var{z} is\ncopied into the new @code{mpz_class}, there won't be any permanent association\nbetween it and @var{z}.\n@end deftypefun\n\n@deftypefun void mpz_class::mpz_class (const char *@var{s})\n@deftypefunx void mpz_class::mpz_class (const char *@var{s}, int @var{base} = 0)\n@deftypefunx void mpz_class::mpz_class (const string& @var{s})\n@deftypefunx void mpz_class::mpz_class (const string& @var{s}, int @var{base} = 0)\nConstruct an @code{mpz_class} converted from a string using @code{mpz_set_str}\n(@pxref{Assigning Integers}).\n\nIf the string is not a valid integer, an @code{std::invalid_argument}\nexception is thrown.  The same applies to @code{operator=}.\n@end deftypefun\n\n@deftypefun mpz_class operator\"\" _mpz (const char *@var{str})\nWith C++11 compilers, integers can be constructed with the syntax\n@code{123_mpz} which is equivalent to @code{mpz_class(\"123\")}.\n@end deftypefun\n\n@deftypefun mpz_class operator/ (mpz_class @var{a}, mpz_class @var{d})\n@deftypefunx mpz_class operator% (mpz_class @var{a}, mpz_class @var{d})\nDivisions involving @code{mpz_class} round towards zero, as per the\n@code{mpz_tdiv_q} and @code{mpz_tdiv_r} functions (@pxref{Integer Division}).\nThis is the same as the C99 @code{/} and @code{%} operators.\n\nThe @code{mpz_fdiv@dots{}} or @code{mpz_cdiv@dots{}} functions can always be called\ndirectly if desired.  For example,\n\n@example\nmpz_class q, a, d;\n...\nmpz_fdiv_q (q.get_mpz_t(), a.get_mpz_t(), d.get_mpz_t());\n@end example\n@end deftypefun\n\n@deftypefun mpz_class abs (mpz_class @var{op1})\n@deftypefunx int cmp (mpz_class @var{op1}, type @var{op2})\n@deftypefunx int cmp (type @var{op1}, mpz_class @var{op2})\n@maybepagebreak\n@deftypefunx bool mpz_class::fits_sint_p (void)\n@deftypefunx bool mpz_class::fits_slong_p (void)\n@deftypefunx bool mpz_class::fits_sshort_p (void)\n@maybepagebreak\n@deftypefunx bool mpz_class::fits_uint_p (void)\n@deftypefunx bool mpz_class::fits_ulong_p (void)\n@deftypefunx bool mpz_class::fits_ushort_p (void)\n@maybepagebreak\n@deftypefunx double mpz_class::get_d (void)\n@deftypefunx mpir_si mpz_class::get_si (void)\n@deftypefunx string mpz_class::get_str (int @var{base} = 10)\n@deftypefunx mpir_ui mpz_class::get_ui (void)\n@maybepagebreak\n@deftypefunx int mpz_class::set_str (const char *@var{str}, int @var{base})\n@deftypefunx int mpz_class::set_str (const string& @var{str}, int @var{base})\n@deftypefunx int sgn (mpz_class @var{op})\n@deftypefunx mpz_class sqrt (mpz_class @var{op})\n@deftypefunx void mpz_class::swap (mpz_class& @var{op})\n@deftypefunx void swap (mpz_class& @var{op1}, mpz_class& @var{op2})\nThese functions provide a C++ class interface to the corresponding MPIR C\nroutines.\n\n@code{cmp} can be used with any of the classes or the standard C++ types,\nexcept @code{long double}.\n@end deftypefun\n\n@sp 1\nOverloaded operators for combinations of @code{mpz_class} and @code{double}\nare provided for completeness, but it should be noted that if the given\n@code{double} is not an integer then the way any rounding is done is currently\nunspecified.  The rounding might take place at the start, in the middle, or at\nthe end of the operation, and it might change in the future.\n\nConversions between @code{mpz_class} and @code{double}, however, are defined\nto follow the corresponding C functions @code{mpz_get_d} and @code{mpz_set_d}.\nAnd comparisons are always made exactly, as per @code{mpz_cmp_d}.\n\n\n@node C++ Interface Rationals, C++ Interface Floats, C++ Interface Integers, C++ Class Interface\n@section C++ Interface Rationals\n\nIn all the following constructors, if a fraction is given then it should be in\ncanonical form, or if not then @code{mpq_class::canonicalize} called.\n\n@deftypefun void mpq_class::mpq_class (type @var{op})\n@deftypefunx void mpq_class::mpq_class (integer @var{num}, integer @var{den})\nConstruct an @code{mpq_class}.  The initial value can be a single value of any\ntype, or a pair of integers (@code{mpz_class} or standard C++ integer types)\nrepresenting a fraction, except that @code{long double}\nis not supported.  For example,\n\n@example\nmpq_class q (99);\nmpq_class q (1.75);\nmpq_class q (1, 3);\n@end example\n@end deftypefun\n\n@deftypefun void mpq_class::mpq_class (mpq_t @var{q})\nConstruct an @code{mpq_class} from an @code{mpq_t}.  The value in @var{q} is\ncopied into the new @code{mpq_class}, there won't be any permanent association\nbetween it and @var{q}.\n@end deftypefun\n\n@deftypefun void mpq_class::mpq_class (const char *@var{s})\n@deftypefunx void mpq_class::mpq_class (const char *@var{s}, int @var{base} = 0)\n@deftypefunx void mpq_class::mpq_class (const string& @var{s})\n@deftypefunx void mpq_class::mpq_class (const string& @var{s}, int @var{base} = 0)\nConstruct an @code{mpq_class} converted from a string using @code{mpq_set_str}\n(@pxref{Initializing Rationals}).\n\nIf the string is not a valid rational, an @code{std::invalid_argument}\nexception is thrown.  The same applies to @code{operator=}.\n@end deftypefun\n\n@deftypefun mpq_class operator\"\" _mpq (const char *@var{str})\nWith C++11 compilers, integral rationals can be constructed with the syntax\n@code{123_mpq} which is equivalent to @code{mpq_class(123_mpz)}. Other\nrationals can be built as @code{-1_mpq/2} or @code{0xb_mpq/123456_mpz}.\n@end deftypefun\n\n@deftypefun void mpq_class::canonicalize ()\nPut an @code{mpq_class} into canonical form, as per @ref{Rational Number\nFunctions}.  All arithmetic operators require their operands in canonical\nform, and will return results in canonical form.\n@end deftypefun\n\n@deftypefun mpq_class abs (mpq_class @var{op})\n@deftypefunx int cmp (mpq_class @var{op1}, type @var{op2})\n@deftypefunx int cmp (type @var{op1}, mpq_class @var{op2})\n@maybepagebreak\n@deftypefunx double mpq_class::get_d (void)\n@deftypefunx string mpq_class::get_str (int @var{base} = 10)\n@maybepagebreak\n@deftypefunx int mpq_class::set_str (const char *@var{str}, int @var{base})\n@deftypefunx int mpq_class::set_str (const string& @var{str}, int @var{base})\n@deftypefunx int sgn (mpq_class @var{op})\n@deftypefunx void mpq_class::swap (mpq_class& @var{op})\n@deftypefunx void swap (mpq_class& @var{op1}, mpq_class& @var{op2})\nThese functions provide a C++ class interface to the corresponding MPIR C\nroutines.\n\n@code{cmp} can be used with any of the classes or the standard C++ types,\nexcept @code{long double}.\n@end deftypefun\n\n@deftypefun {mpz_class&} mpq_class::get_num ()\n@deftypefunx {mpz_class&} mpq_class::get_den ()\nGet a reference to an @code{mpz_class} which is the numerator or denominator\nof an @code{mpq_class}.  This can be used both for read and write access.  If\nthe object returned is modified, it modifies the original @code{mpq_class}.\n\nIf direct manipulation might produce a non-canonical value, then\n@code{mpq_class::canonicalize} must be called before further operations.\n@end deftypefun\n\n@deftypefun mpz_t mpq_class::get_num_mpz_t ()\n@deftypefunx mpz_t mpq_class::get_den_mpz_t ()\nGet a reference to the underlying @code{mpz_t} numerator or denominator of an\n@code{mpq_class}.  This can be passed to C functions expecting an\n@code{mpz_t}.  Any modifications made to the @code{mpz_t} will modify the\noriginal @code{mpq_class}.\n\nIf direct manipulation might produce a non-canonical value, then\n@code{mpq_class::canonicalize} must be called before further operations.\n@end deftypefun\n\n@deftypefun istream& operator>> (istream& @var{stream}, mpq_class& @var{rop});\nRead @var{rop} from @var{stream}, using its @code{ios} formatting settings,\nthe same as @code{mpq_t operator>>} (@pxref{C++ Formatted Input}).\n\nIf the @var{rop} read might not be in canonical form then\n@code{mpq_class::canonicalize} must be called.\n@end deftypefun\n\n\n@node C++ Interface Floats, C++ Interface Random Numbers, C++ Interface Rationals, C++ Class Interface\n@section C++ Interface Floats\n\nWhen an expression requires the use of temporary intermediate @code{mpf_class}\nvalues, like @code{f=g*h+x*y}, those temporaries will have the same precision\nas the destination @code{f}.  Explicit constructors can be used if this\ndoesn't suit.\n\n@deftypefun {} mpf_class::mpf_class (type @var{op})\n@deftypefunx {} mpf_class::mpf_class (type @var{op}, mpir_ui @var{prec})\nConstruct an @code{mpf_class}.  Any standard C++ type can be used, except\n@code{long double}, and any of the MPIR C++ classes can be\nused.\n\nIf @var{prec} is given, the initial precision is that value, in bits.  If\n@var{prec} is not given, then the initial precision is determined by the type\nof @var{op} given.  An @code{mpz_class}, @code{mpq_class}, or C++\nbuiltin type will give the default @code{mpf} precision (@pxref{Initializing\nFloats}).  An @code{mpf_class} or expression will give the precision of that\nvalue.  The precision of a binary expression is the higher of the two\noperands.\n\n@example\nmpf_class f(1.5);        // default precision\nmpf_class f(1.5, 500);   // 500 bits (at least)\nmpf_class f(x);          // precision of x\nmpf_class f(abs(x));     // precision of x\nmpf_class f(-g, 1000);   // 1000 bits (at least)\nmpf_class f(x+y);        // greater of precisions of x and y\n@end example\n@end deftypefun\n\n@deftypefun void mpf_class::mpf_class (const char *@var{s})\n@deftypefunx void mpf_class::mpf_class (const char *@var{s}, mpir_ui @var{prec}, int @var{base} = 0)\n@deftypefunx void mpf_class::mpf_class (const string& @var{s})\n@deftypefunx void mpf_class::mpf_class (const string& @var{s}, mpir_ui @var{prec}, int @var{base} = 0)\nConstruct an @code{mpf_class} converted from a string using @code{mpf_set_str}\n(@pxref{Assigning Floats}).  If @var{prec} is given, the initial precision is\nthat value, in bits.  If not, the default @code{mpf} precision\n(@pxref{Initializing Floats}) is used.\n\nIf the string is not a valid float, an @code{std::invalid_argument} exception\nis thrown.  The same applies to @code{operator=}.\n@end deftypefun\n\n@deftypefun mpf_class operator\"\" _mpf (const char *@var{str})\nWith C++11 compilers, floats can be constructed with the syntax\n@code{1.23e-1_mpf} which is equivalent to @code{mpf_class(\"1.23e-1\")}.\n@end deftypefun\n\n@deftypefun {mpf_class&} mpf_class::operator= (type @var{op})\nConvert and store the given @var{op} value to an @code{mpf_class} object.  The\nsame types are accepted as for the constructors above.\n\nNote that @code{operator=} only stores a new value, it doesn't copy or change\nthe precision of the destination, instead the value is truncated if necessary.\nThis is the same as @code{mpf_set} etc.  Note in particular this means for\n@code{mpf_class} a copy constructor is not the same as a default constructor\nplus assignment.\n\n@example\nmpf_class x (y);   // x created with precision of y\n\nmpf_class x;       // x created with default precision\nx = y;             // value truncated to that precision\n@end example\n\nApplications using templated code may need to be careful about the assumptions\nthe code makes in this area, when working with @code{mpf_class} values of\nvarious different or non-default precisions.  For instance implementations of\nthe standard @code{complex} template have been seen in both styles above,\nthough of course @code{complex} is normally only actually specified for use\nwith the builtin float types.\n@end deftypefun\n\n@deftypefun mpf_class abs (mpf_class @var{op})\n@deftypefunx mpf_class ceil (mpf_class @var{op})\n@deftypefunx int cmp (mpf_class @var{op1}, type @var{op2})\n@deftypefunx int cmp (type @var{op1}, mpf_class @var{op2})\n@maybepagebreak\n@deftypefunx bool mpf_class::fits_sint_p (void)\n@deftypefunx bool mpf_class::fits_slong_p (void)\n@deftypefunx bool mpf_class::fits_sshort_p (void)\n@maybepagebreak\n@deftypefunx bool mpf_class::fits_uint_p (void)\n@deftypefunx bool mpf_class::fits_ulong_p (void)\n@deftypefunx bool mpf_class::fits_ushort_p (void)\n@maybepagebreak\n@deftypefunx mpf_class floor (mpf_class @var{op})\n@deftypefunx mpf_class hypot (mpf_class @var{op1}, mpf_class @var{op2})\n@maybepagebreak\n@deftypefunx double mpf_class::get_d (void)\n@deftypefunx mpir_si mpf_class::get_si (void)\n@deftypefunx string mpf_class::get_str (mp_exp_t& @var{exp}, int @var{base} = 10, size_t @var{digits} = 0)\n@deftypefunx mpir_ui mpf_class::get_ui (void)\n@maybepagebreak\n@deftypefunx int mpf_class::set_str (const char *@var{str}, int @var{base})\n@deftypefunx int mpf_class::set_str (const string& @var{str}, int @var{base})\n@deftypefunx int sgn (mpf_class @var{op})\n@deftypefunx mpf_class sqrt (mpf_class @var{op})\n@deftypefunx void mpf_class::swap (mpf_class& @var{op})\n@deftypefunx void swap (mpf_class& @var{op1}, mpf_class& @var{op2})\n@deftypefunx mpf_class trunc (mpf_class @var{op})\nThese functions provide a C++ class interface to the corresponding MPIR C\nroutines.\n\n@code{cmp} can be used with any of the classes or the standard C++ types,\nexcept @code{long double}.\n\nThe accuracy provided by @code{hypot} is not currently guaranteed.\n@end deftypefun\n\n@deftypefun {mp_bitcnt_t} mpf_class::get_prec ()\n@deftypefunx void mpf_class::set_prec (mp_bitcnt_t @var{prec})\n@deftypefunx void mpf_class::set_prec_raw (mp_bitcnt_t @var{prec})\nGet or set the current precision of an @code{mpf_class}.\n\nThe restrictions described for @code{mpf_set_prec_raw} (@pxref{Initializing\nFloats}) apply to @code{mpf_class::set_prec_raw}.  Note in particular that the\n@code{mpf_class} must be restored to it's allocated precision before being\ndestroyed.  This must be done by application code, there's no automatic\nmechanism for it.\n@end deftypefun\n\n\n@node C++ Interface Random Numbers, C++ Interface Limitations, C++ Interface Floats, C++ Class Interface\n@section C++ Interface Random Numbers\n\n@deftp Class gmp_randclass\nThe C++ class interface to the MPIR random number functions uses\n@code{gmp_randclass} to hold an algorithm selection and current state, as per\n@code{gmp_randstate_t}.\n@end deftp\n\n@deftypefun {} gmp_randclass::gmp_randclass (void (*@var{randinit}) (gmp_randstate_t, @dots{}), @dots{})\nConstruct a @code{gmp_randclass}, using a call to the given @var{randinit}\nfunction (@pxref{Random State Initialization}).  The arguments expected are\nthe same as @var{randinit}, but with @code{mpz_class} instead of @code{mpz_t}.\nFor example,\n\n@example\ngmp_randclass r1 (gmp_randinit_default);\ngmp_randclass r2 (gmp_randinit_lc_2exp_size, 32);\ngmp_randclass r3 (gmp_randinit_lc_2exp, a, c, m2exp);\ngmp_randclass r4 (gmp_randinit_mt);\n@end example\n\n@code{gmp_randinit_lc_2exp_size} will fail if the size requested is too big,\nan @code{std::length_error} exception is thrown in that case.\n@end deftypefun\n\n@deftypefun void gmp_randclass::seed (mpir_ui @var{s})\n@deftypefunx void gmp_randclass::seed (mpz_class @var{s})\nSeed a random number generator.  See @pxref{Random Number Functions}, for how\nto choose a good seed.\n@end deftypefun\n\n@deftypefun mpz_class gmp_randclass::get_z_bits (mpir_ui @var{bits})\n@deftypefunx mpz_class gmp_randclass::get_z_bits (mpz_class @var{bits})\nGenerate a random integer with a specified number of bits.\n@end deftypefun\n\n@deftypefun mpz_class gmp_randclass::get_z_range (mpz_class @var{n})\nGenerate a random integer in the range 0 to @math{@var{n}-1} inclusive.\n@end deftypefun\n\n@deftypefun mpf_class gmp_randclass::get_f ()\n@deftypefunx mpf_class gmp_randclass::get_f (mpir_ui @var{prec})\nGenerate a random float @var{f} in the range @math{0 <= @var{f} < 1}.  @var{f}\nwill be to @var{prec} bits precision, or if @var{prec} is not given then to\nthe precision of the destination.  For example,\n\n@example\ngmp_randclass  r;\n...\nmpf_class  f (0, 512);   // 512 bits precision\nf = r.get_f();           // random number, 512 bits\n@end example\n@end deftypefun\n\n@deftypefun randstate_t gmp_randclass::get_randstate_t ()\nGet the underlying @code{randstate_t} from the object. This can be used to call a\nC function that doesn't have a C++ interface. For example, to check if an integer\nis prime,\n@example\ngmp_randclass r(gmp_randinit_mt);\nmpz_class p;\n// ... seed r and generate p ...\nis_prime = mpz_likely_prime_p(p.get_mpz_t(), r.get_randstate_t(), 0);\n@end example\n@end deftypefun\n\n\n\n@node C++ Interface Limitations,  , C++ Interface Random Numbers, C++ Class Interface\n@section C++ Interface Limitations\n\n@table @asis\n@item @code{mpq_class} and Templated Reading\nA generic piece of template code probably won't know that @code{mpq_class}\nrequires a @code{canonicalize} call if inputs read with @code{operator>>}\nmight be non-canonical.  This can lead to incorrect results.\n\n@code{operator>>} behaves as it does for reasons of efficiency.  A\ncanonicalize can be quite time consuming on large operands, and is best\navoided if it's not necessary.\n\nBut this potential difficulty reduces the usefulness of @code{mpq_class}.\nPerhaps a mechanism to tell @code{operator>>} what to do will be adopted in\nthe future, maybe a preprocessor define, a global flag, or an @code{ios} flag\npressed into service.  Or maybe, at the risk of inconsistency, the\n@code{mpq_class} @code{operator>>} could canonicalize and leave @code{mpq_t}\n@code{operator>>} not doing so, for use on those occasions when that's\nacceptable.  Send feedback or alternate ideas to @uref{http://groups.google.com/group/mpir-devel}.\n\n@item Subclassing\nSubclassing the MPIR C++ classes works, but is not currently recommended.\n\nExpressions involving subclasses resolve correctly (or seem to), but in normal\nC++ fashion the subclass doesn't inherit constructors and assignments.\nThere's many of those in the MPIR classes, and a good way to reestablish them\nin a subclass is not yet provided.\n\n@item Templated Expressions\nA subtle difficulty exists when using expressions together with\napplication-defined template functions.  Consider the following, with @code{T}\nintended to be some numeric type,\n\n@example\ntemplate <class T>\nT fun (const T &, const T &);\n@end example\n\n@noindent\nWhen used with, say, plain @code{mpz_class} variables, it works fine: @code{T}\nis resolved as @code{mpz_class}.\n\n@example\nmpz_class f(1), g(2);\nfun (f, g);    // Good\n@end example\n\n@noindent\nBut when one of the arguments is an expression, it doesn't work.\n\n@example\nmpz_class f(1), g(2), h(3);\nfun (f, g+h);  // Bad\n@end example\n\nThis is because @code{g+h} ends up being a certain expression template type\ninternal to @code{mpirxx.h}, which the C++ template resolution rules are unable\nto automatically convert to @code{mpz_class}.  The workaround is simply to add\nan explicit cast.\n\n@example\nmpz_class f(1), g(2), h(3);\nfun (f, mpz_class(g+h));  // Good\n@end example\n\nSimilarly, within @code{fun} it may be necessary to cast an expression to type\n@code{T} when calling a templated @code{fun2}.\n\n@example\ntemplate <class T>\nvoid fun (T f, T g)\n@{\n  fun2 (f, f+g);     // Bad\n@}\n\ntemplate <class T>\nvoid fun (T f, T g)\n@{\n  fun2 (f, T(f+g));  // Good\n@}\n@end example\n@end table\n\n@node .Net Interface, Custom Allocation, C++ Class Interface, Top\n@comment  node-name,  next,  previous,  up\n@chapter .Net Interface\n@cindex .Net Interface\n@cindex Microsoft.Net\n@cindex Managed Interface\n\nThis chapter describes the Microsoft.Net wrapper around MPIR.\n\nIf you are a .Net developer on MS Windows, using MPIR is possible\nvia the basic managed-to-native interop tooling provided by .Net.\nWhile this would allow access to the full MPIR intreface, \nyou would essentially be embedding C code inside whatever .Net language you are using.\nThis would virtually require familiarity with C/C++, \nthe interop artefacts in your code would be distractingly evident,\nand it would be hard to maintain a smooth code style around managed/native transitions.\n\nMPIR offers an alternative that addresses these issues: @strong{MPIR.Net}.\nMPIR.Net is a Microsoft Visual Studio solution that interoperates with MPIR\nand exposes a full managed interface built from scratch, for consumption in any .Net language.\nIt internalizes all C-rooted idiosynchrasies and allows you to work with MPIR objects\nthrough managed classes that perform all necessary marshaling behind the scenes.\nIt strives to provide maximum performance by implementing MPIR operations\nwith direct calls to the native routines while not requiring\nyou to sacrifice any of your code style.  It eliminates any requirement of fluency in C,\nyet delivers the performance of native MPIR.  In fact, it can consume any native MPIR build,\nincluding all supported processor-specific builds, and can thus take advantage of the\nentire wealth of assembly-optimized MPIR routines.\n\nMPIR.Net is, however, limited to MS Windows and Visual Studio at this time.  The managed\ninterface is written in Microsoft C++/CLI, which ties you to that specific environment.\nIf you use .Net on Linux and use a compiler other than Visual Studio, MPIR.Net will not\nwork for you, but then again, you may already have better native interop facilities \navailable to you than your Windows colleagues, making MPIR.Net rather moot.\n\nMPIR.Net is bundled with MPIR as an optional feature.  To build it, you still\nneed to build the native MPIR library first.  As you do, you can select the best\nprocessor architecture that matches your requirements.  Then you build MPIR.Net, and it\nis linked statically to the native MPIR library, producing a managed assembly.\nThus, to build MPIR.Net, you need to be familiar with the MPIR build process on Windows,\nand have a recent version of Visual Studio available (a community edition will suffice).\n\n@menu\n* MPIR.Net Feature Overview::\n* Building MPIR.Net::\n* MPIR.Net Integers::\n* MPIR.Net Rationals::\n* MPIR.Net Floats::\n* MPIR.Net Random Numbers::\n* MPIR.Net Settings::\n@end menu\n\n@node MPIR.Net Feature Overview, Building MPIR.Net, .Net Interface, .Net Interface\n@section MPIR.Net Feature Overview\n\n@noindent\nMPIR.Net exposes the following main classes:\n\n@deftp Class HugeInt\n@deftpx Class HugeRational\n@deftpx Class HugeFloat\n@deftpx Class MpirRandom\n@end deftp\n\nThe standard operators are overloaded to allow arithmetic with these classes.  For example,\n\n@example\nvoid Calculate()\n@{\n  using (var a = new HugeInt(1234))\n  using (var b = new HugeInt(\"-5678\"))\n  using (var c = new HugeInt(a + b))\n  @{\n    Debug.WriteLine(\"Result: @{0@}\", c);\n  @}\n@}\n@end example\n\nMPIR.Net's multi-precision classes implement @code{IDisposable}, and the recommended usage\nfor local instances is as shown above, within a @code{using} clause \nto guarantee native memory clean-up when a variable is disposed.\n\nReferences that go out of scope without having been disposed are subject to the normal\n.Net garbage collection, which in most cases invokes object finalizers, and those in turn\ndeallocate native memory. Applications that don't have memory pressure should\nwork just fine either way, although deterministic disposal is a best practice.\n\nLike MPIR's native @ref{C++ Class Interface}, MPIR.Net implements an expression like\n@code{a.Value = b + c} with a single call to the corresponding native @code{mpz_add},\nwithout using a temporary for the @code{b + c} part.  More complex expressions that do not have \na single-call native implementation like @code{a.Value = b*c + d*e}, still use temporary variables.\nImportantly, @code{a.Value = a + b*c} and the like will utilize the native @code{mpz_addmul}, etc.\nNote that in all of the above cases the assignment syntax is to set the @code{Value} property; more on that below.\n\nAnother similarity of MPIR.Net with the C++ interface is the deferral of evaluation.\nAll arithmetic operations and many methods produce an expression object rather than an immediate result.\nThis allows expressions of arbitrary complexity to be built.  They are not evaluated until the expression\nis assigned to a destination variable, or when calling a method that produces a primitive (non-MPIR.Net type) result. For example:\n\n@example\nvoid Calculate()\n@{\n  var a = new HugeInt(12345);\n  var b = new HugeInt(67890);\n  var sum = a + b;                // produces an expression\n  var doubleSum = sum * 2;        // produces a new expression\n  bool positive = doubleSum > 0;  // evaluates the doubleSum expression\n  int sumSign = doubleSum.Sign(); // evaluates the doubleSum expression\n  a.Value = doubleSum - 4;        // evaluates the doubleSum expression\n@}\n@end example\n\nHere the addition and multiplication in @code{(a + b) * 2} are computed three times\nbecause they are part of an expression that is consumed\nby three destinations, @code{positive}, @code{sumSign}, and @code{a}.  \nTo avoid the triple addition, this method should be re-written as:\n\n@example\nvoid Calculate()\n@{\n  var a = new HugeInt(12345);\n  var b = new HugeInt(67890);\n  var sum = a + b;                      // produces an expression\n  var doubleSum = new HugeInt(sum * 2); // evaluates the expression\n  bool positive = doubleSum > 0;        // evaluates the > comparison\n  int sumSign = doubleSum.Sign();       // computes the sign\n  a.Value = doubleSum - 4;              // computes the subtraction\n@}\n@end example\n\nNow the result of @code{(a + b) * 2} is computed once and stored in an intermediate variable,\nwhose value is used in subsequent statements.  \nThis code can be shortened as follows without changing the internal calculation:\n\n@example\nvoid Calculate()\n@{\n  var a = new HugeInt(12345);\n  var b = new HugeInt(67890);\n  var doubleSum = new HugeInt((a + b) * 2); // evaluates the expression\n  var positive = doubleSum > 0;             // evaluates the > comparison\n  var sumSign = doubleSum.Sign();           // computes the sign\n  a.Value = doubleSum - 4;                  // computes the subtraction\n@}\n@end example\n\nThe main idiosyncrasy of MPIR.Net is its assignment pattern.  \nMPIR.Net types are implemented as reference types with value semantics.\nLike .Net Strings, the objects themselves are just lightweight pointers to data allocated elsewhere.\nIn this case, the data is in native memory.\nUnlike Strings, MPIR types are mutable.\n\nValue semantics requires you to be able to code statements like @code{a = b + c}.\nHowever, .Net (outside of C++) does not allow overloading the assignment operator,\nwhile assigning references would necessitate some unnecessary duplication and extra memory allocations,\nrequire reliance on the garbage collector, and prevent the use of @code{mpz_addmul} and the like.\n\nTo solve this problem, MPIR.Net uses the property assignment.\nAll MPIR.Net types have a @code{Value} property.  \nThe magic of this property is in its setter, which does what an overloaded assignment operator would do in C++.\nSo you write @code{a.Value = b + c} to calculate the sum of @code{b} and @code{c} and store the result in the existing variable @code{a}.\nThis seems to be as close to an overloaded assignment as you can get in .Net, but is fluent enough to become a quick habit,\nand additionally reinforces the concept that an existing object can change its value while reusing internally allocated memory.\n\nSetting @code{Value} evaluates the expression being assigned.  Since at this point the destination is known,\n@code{mpz_addmul} and similar can be recognized and invoked.\n\nReading this property is less interesting,\nas it's equivalent to but wordier than using the reference itself, i.e. @code{a + b} is equivalent to @code{a.Value + b.Value}.\nHowever it is still useful for making possible constructs such as @code{a.Value += 5}, @code{a.Value *= 10}, etc.\n\nIf you absent-mindedly type @code{a = b + c} or @code{a *= 10}, these will not compile \nbecause there is no implicit conversion from an expression.\nIf an implicit conversion were defined, such code would incur an extra allocation plus garbage collection,\nmaking it potentially slower than performing the same operations on @code{a.Value}. \nIt would also not compile if the destination were a local variable defined in a @code{using} clause, \nas is the recommended practice for method-local instances.\n\nCare should be taken with the construct @code{var a = b;}. While perfectly legal (and cannot be made otherwise) in .Net, \nthis only creates a copy of the managed reference to the same MPIR.Net object, without any copying of the data.\nIf @code{b} is subsequently disposed, referencing @code{a} will throw an error.\n\nMPIR classes can be intermixed in expressions to some degree.  For example, most arithmetic operations with \nrational operands will accept integers.  Where mixed operations are defined in MPIR, they are also implemented in MPIR.Net.\nFloats, on the other hand, typically don't accept operands of other types.  There is some cost associated with\ncreating a floating point instance out of an integer, which would not be evident if automatic promotion existed.\nUse explicit constructors to convert instances of one type to new instances of other types,\nor one of the @code{SetTo()} overloads to save the result into an existing instance.\n\nMPIR classes can also be intermixed in expressions with primitive types.  For 64-bit builds, this includes \n@code{long} and @code{ulong}, which correspond to an MPIR limb.  For 32-bit builds, @code{int} and @code{uint}\nare the largest primitive types you can use.  Smaller integer primitives can always be used because they will be promoted by .Net.\n\nConversions back from MPIR classes to primitive types aren't done automatically,\ninstead methods @code{ToLong()}/@code{ToUlong()} for 64-bit builds or @code{ToInt()}/@code{ToUint()} are provided.  \nIntegers also implement @code{GetLimb()}.\n\n@node Building MPIR.Net, MPIR.Net Integers, MPIR.Net Feature Overview, .Net Interface\n@section Building MPIR.Net\n\nTo build MPIR.Net, follow the steps below:\n\n@enumerate\n@item\nGet the sources\n\n@item\nBuild MPIR\n\n@item\nRun MPIR unit tests\n\n@item\nBuild MPIR.Net\n\n@item\nRun MPIR.Net unit tests\n\n@item\nReference MPIR.Net in your managed project\n@end enumerate\n\n@strong{Get the sources}: Clone the MPIR repository on GitHub to get the latest stable MPIR release.\nThis repository includes MPIR.Net.\nOr you can clone the MPIR.Net fork, which will get you the development repository.\n\n@strong{Build MPIR}: Once you have the sources, you will need to build MPIR first. \nRead the MPIR manual, available as a Documentation link on the MPIR page, for full details. \nSince MPIR.Net currently requires Windows, you will need to build MPIR for Windows using Microsoft Visual Studio.\nMPIR provides solutions for the three latest versions of Visual Studio, and includes full build instructions.\nYou can select either a generic C build or an optimized build for a specific processor.\nYou must also select the Windows architecture desired (32-bit or 64-bit), and build configuration (debug/release).\nYou will need to build MPIR as Lib, not DLL, to use it with MPIR.Net.\n\n@strong{Run MPIR unit tests}: MPIR contains a full suite of unit tests that you can (and should) execute to validate your build.\nIt is a large and complex project, and many things can go wrong while building from sources.\nBuilding and running the tests only takes a few of minutes and might save you a lot of headache.\nNote that you must also build MPIR's C++ interface to run unit tests, however it is not a dependency for MPIR.Net.\n\n@strong{Build MPIR.Net}: Next, load the MPIR.Net solution in Visual Studio. \nIt is located in the MPIR.Net folder, under which there are folders for the different supported Visual Studio versions.\nThe projects are set up to look for the previously built MPIR library in its normal location in the Lib folder.\nYou will need to select the same architecture (x64 or x86) and configuration (debug/release) as when you built MPIR.\nThen simply build the solution, and you are good to go.\n\n@strong{Run MPIR.Net unit tests}: MPIR.Net includes its own suite of unit tests. \nBecause MPIR.Net is a wrapper around MPIR, these tests simply ensure that the right routines in MPIR are being called,\nbut do not validate the robustness of the MPIR build itself.\nThus, it is necessary to run both MPIR tests and MPIR.Net tests.\nMPIR.Net tests, though, are easier to run because they are included right in the MPIR.Net solution.\n\nThrough binary compatibility with GMP 5.x, MPIR 2.x inherits a known issue that causes a few\nMPIR.Net tests (2 for x86, 3 for x64) to fail. The issue has been corrected in GMP 6.x, and is expected to be\ncorrected correspondingly in MPIR 3.x. Because this behavior is not intuitive,\nthese tests remain in their current failing state until this is resolved.\n\n@strong{Reference MPIR.Net}: With the MPIR.Net assembly built, you're ready to create your own project \nin a .Net language of your choice, add a reference to MPIR.Net, and take advantage of the great mathematical powers of MPIR!\n\n@node MPIR.Net Integers, MPIR.Net Rationals, Building MPIR.Net, .Net Interface\n@section MPIR.Net Integers\n\n@deftp Class HugeInt : IntegerExpression, IDisposable\nThe MPIR.Net type for the MPIR multi-precision integer is @code{HugeInt}.  \nA closely related type is @code{IntegerExpression}, which is returned from all operators and methods whose\nvalue semantics are to compute another number from the source instance and any arguments.\n@code{HugeInt} derives from @code{IntegerExpression}, and many operations are defined on the expression class.\nOperations defined on @code{HugeInt} but not on @code{IntegerExpression} are typically those that modify the value\nof the source number itself, and thus performing them on an expression is meaningless.\nBecause through inheritance all operations are available on @code{HugeInt}, the descriptions below \ndo not specifically indicate whether each operator or method is defined for expressions, \nor just for @code{HugeInt} instances. For the sake of brevity, they are listed as if they were methods of the @code{HugeInt} class.\nVisual Studio provides Intellisense and immediate feedback to help sort out which operations are available\non expressions.\n@end deftp\n\nBelow is a brief summary of the supported multi-precision integer methods and operators.  \nTo avoid repetition, implementation details are ommitted.  Since MPIR native functions are called behind the scenes,\nreview @ref{Integer Functions} for further details about the native implementations.\n\n@deftypefn Constructor {} HugeInt ()\n@deftypefnx Constructor {} HugeInt (int/long @var{n})\n@deftypefnx Constructor {} HugeInt (uint/ulong @var{n})\n@deftypefnx Constructor {} HugeInt (double @var{n})\nConstructs a @code{HugeInt} object.  Single-limb constructors vary by architecture,\n32-bit builds take an @code{int} or @code{uint} argument, 64-bit builds take a @code{long} or @code{ulong}.\nAny necessary conversion follows the corresponding C function, for\nexample @code{double} follows @code{mpz_set_d} (@pxref{Assigning Integers}).\n@end deftypefn\n\n@deftypefn Constructor {} HugeInt (string @var{s})\n@deftypefnx Constructor {} HugeInt (string @var{s}, int @var{base})\nConstructs a @code{HugeInt} converted from a string using @code{mpz_set_str}\n(@pxref{Assigning Integers}).  If the string is not a valid integer, an exception is thrown.\n@end deftypefn\n\n@deftypefn Constructor {} HugeInt (IntegerExpression @var{e})\nEvaluates the supplied expression and saves its result to the new instance.\nBecause @code{HugeInt} is derived from @code{IntegerExpression}, this constructor \ncan be used to make a copy of an existing variable, i.e. @code{HugeInt a = new HugeInt(b);}\nwithout creating any permanent association between them.\n@end deftypefn\n\n@deftypefn {Static Method} {static HugeInt} Allocate (uint/ulong @var{bits})\n@deftypefnx Method void Reallocate (uint/ulong @var{bits})\nControls the capacity in bits of the allocated integer.\n@end deftypefn\n\n@deftypefn Property int AllocatedSize\nReturns the number of limbs currently allocated.\n@end deftypefn\n\n@deftypefn Method ulong Size ()\nReturns the number of limbs currently used.\n@end deftypefn\n\n@deftypefn Method long GetLimb (mp_size_t @var{index})\nReturns the specified limb.\n@end deftypefn\n\n@deftypefn Method bool FitsUlong () //64-bit builds only\n@deftypefnx Method bool FitsLong () //64-bit builds only\n@deftypefnx Method bool FitsUint ()\n@deftypefnx Method bool FitsInt ()\n@deftypefnx Method bool FitsUshort ()\n@deftypefnx Method bool FitsShort ()\n@deftypefnx Method long ApproximateSizeInBase (int @var{base})\nChecks whether the number would fit in one of the built-in .Net types.\n@end deftypefn\n\n@deftypefn Method string ToString ()\n@deftypefnx Method string ToString (int @var{base})\n@deftypefnx Method string ToString (int @var{base}, bool @var{lowercase})\nReturns the string representation of the number.  The default @code{base} is 10,\nand the parameterless overload is limited to 256 least significant digits by default,\nproducing a leading ellipsis (i.e. ...12345) when the number has more digits. This is done\nto prevent huge numbers from unexpectedly consuming large amounts of memory in the debugger.\nThe maximum number of digits output is configurable via the @code{MpirSettings.ToStringDigits} property,\nwhere zero means unlimited.  The other overloads always output all digits.\n@end deftypefn\n\n@deftypefn Method int ToInt () //32-bit builds\n@deftypefnx Method uint ToUint () //32-bit builds\n@deftypefnx Method long ToLong () //64-bit builds\n@deftypefnx Method ulong ToUlong () //64-bit builds\n@deftypefnx Method double ToDouble ()\n@deftypefnx Method double ToDouble (@code{out} int/long @var{exp})\nConverts the number to a primitive (built-in) .Net type, assuming it fits,\nwhich can be determined by calling one of the @code{Fits...} methods.\n@end deftypefn\n\n@deftypefn Property IntegerExpression Value\nGetting this property is essentially a no-op, as it returns the object instance itself.\nThis never needs to be done explicitly, but is used implicitly in statements like @code{a.Value += 5;}\n\nSetting the @code{Value} property evaluates the assigned expression and saves the result to the object.\n@end deftypefn\n\n@deftypefn Method void SetTo (int/long @var{value}) // 32/64-bit builds\n@deftypefnx Method void SetTo (uint/ulong @var{value}) // 32/64-bit builds\n@deftypefnx Method void SetTo (double @var{value})\n@deftypefnx Method void SetTo (string @var{value})\n@deftypefnx Method void SetTo (string @var{value}, int @var{base})\n@deftypefnx Method void SetTo (RationalExpression @var{value})\n@deftypefnx Method void SetTo (FloatExpression @var{value})\nSets the value of existing variable from types other than @code{IntegerExpression}.\n@end deftypefn\n\n@deftypefn Method void Swap (HugeInt @var{a})\nSwaps the values of the two objects. This is an @math{O(1)} operation.\n@end deftypefn\n\nArithmetic operators (@code{+}, @code{-}, @code{*}, @code{/}, @code{%}) are overloaded to allow integers to participate\nin expressions much like primitive integers can.  Single-limb primitive types can be used.\nThese operators will also accept @code{RationalExpression} arguments, producing a @code{RationalExpression} result.\nSome expression types expose additional methods, these are listed below.  \nInvoking these methods does not prevent the expression from participating in further expressions.\n\nExpressions resulting from division or computing a modulo allow setting an explicit rounding mode:\n@example\nc.Value = (a / b).Rounding(RoundingModes.Ceiling) + 4;\nd.Value = (a % b).Rounding(RoundingModes.Floor) + 4;\n@end example\n\nDivision expressions optionally allow the remainder to be saved:\n@example\nc.Value = (a / b).SavingRemainderTo(e) + 4;\n@end example\n\nWhen dividing by a limb, the remainder is a single limb and is saved to an unsigned limb variable.\nHowever, passing this variable as an @code{out} argument would not work because\nof the deferred evaluation.  Instead, a delegate is passed which is called during evaluation:\n@example\nulong/uint remainder; // 64/32-bit builds\nd.Value = (a / 100).SettingRemainderTo(x => remainder = x) + 4;\n@end example\n\nSymmetrically, the modulo expressions (@code{%}) allow the quotient to be saved:\n@example\nc.Value = (a % b).SavingQuotientTo(e).RoundingMode(RoundingModes.Ceiling) + 4;\nulong/uint quotient; // 64/32-bit builds\nd.Value = (a % 100).SettingQuotientTo(x => quotient = x) + 4;\n@end example\n\n@deftypefn Method uint/ulong Mod (uint/ulong @var{divisor})\n@deftypefnx Method uint/ulong Mod (uint/ulong @var{divisor}, RoundingModes @var{roundingMode})\nComputes the absolute value of the remainder from division of the source number by the specified @code{divisor}.\nThis operation differs from using the @code{%} operator by where the result is saved.  \nThe @code{%} operator returns an expression, and a @code{HugeInt} variable is required to receive\nthe result when the expression is assigned to its @code{Value} property.\nThe @code{Mod} method, on the other hand, computes and returns the remainder immediately\nsince it's a primitive type (single limb), and no destination @code{HugeInt} variable is needed.\n@end deftypefn\n\nOperator @code{^} serves dual purposes: when the right operand is a single limb, it raises the source number to a power,\nif the right operand is an @code{IntegerExpression} it performs a bitwise XOR.\n\nComparison operators (@code{==}, @code{!=}, @code{<}, @code{<=}, @code{>}, @code{>=}) accept @code{IntegerExpression},\nsingle-limb, or double arguments, but do not accept @code{RationalExpression} because that would require an awkward explicit cast\nwhen comparing with null.\n\n@deftypefn Method int CompareTo (IntegerExpression @var{a})\n@deftypefnx Method bool Equals (IntegerExpression @var{a})\nImplement @code{IComparable<IntegerExpression>} and @code{IEquatable<IntegerExpression>} for strongly-typed comparisons.\n@end deftypefn\n\n@deftypefn Method int CompareTo (object @var{a})\n@deftypefnx Method bool Equals (object @var{a})\nImplement @code{IComparable} and equality check for any object.  These accept a @code{RationalExpression} as an argument, \nallowing cross-type comparisons not possible with operators.\n@end deftypefn\n\n@deftypefn Method int GetHashCode ()\nThis @code{object} override computes the hash code.  This is an @math{O(N)} operation where @math{N} is the number of limbs in use.\nChanging a number's @code{Value} changes its hash code, so this should not be done on any object that has been added\nto a hash table or dictionary.\n@end deftypefn\n\n@deftypefn Method int CompareAbsTo (IntegerExpression @var{a})\n@deftypefnx Method int CompareAbsTo (uint/ulong @var{a})\n@deftypefnx Method int CompareAbsTo (double @var{a})\nCompares the absolute value of the number with the operand.\n@end deftypefn\n\n@deftypefn Method int Sign ()\nReturns the number's sign.\n@end deftypefn\n\nBit shift operators (@code{<<}, @code{>>}) accept an unsigned limb operand.\n\nThe right shift (@code{>>}) expression provides a method to compute the modulo, rather than the default quotient:\n@example\nvar a = new HugeInt(\"0x1357\");\nDebug.WriteLine((a >> 8).ToString(16)); //prints 13\nDebug.WriteLine((a >> 8).Remainder().ToString(16)); //prints 57\n@end example\n\nBitwize operators (@code{&}, @code{|}, @code{^}, @code{~}) are defined for @code{IntegerExpression} operands only.\nNote that operator @code{^} is also defined for a limb operand, and in that case computes a power.\n\n@deftypefn Method bool GetBit (uint/ulong @var{position})\n@deftypefnx Method void SetBit (uint/ulong @var{position}, bool @var{value})\n@deftypefnx Method void ComplementBit (uint/ulong @var{position})\nAllows access to individual bits of the number, using a \"virtual\" two's complement representation.\n@end deftypefn\n\n@deftypefn Method uint/ulong PopCount () // 32/64-bit builds\nGets the number of set bits in the number.\n@end deftypefn\n\n@deftypefn Method uint/ulong HammingDistance (IntegerExpression @var{target}) // 32/64-bit builds\nGets the hamming distance between this number and @code{target}.\n@end deftypefn\n\n@deftypefn Method uint/ulong FindBit (bool @var{value}, uint/ulong @var{start}) // 32/64-bit builds\nScans the number for next set or cleared bit (depending on @code{value}).\n@end deftypefn\n\n@deftypefn Method IntegerExpression Abs ()\nReturns an expression that computes the absolute value of the number.\n@end deftypefn\n\n@deftypefn Method IntegerExpression DivideExactly (IntegerExpression @var{divisor})\n@deftypefnx Method IntegerExpression DivideExactly (uint/ulong @var{divisor}) // 32/64-bit builds\nReturns an expression that performs a fast division where it is known that there is no remainder.\n@end deftypefn\n\n@deftypefn Method IntegerExpression PowerMod (IntegerExpression @var{power}, IntegerExpression @var{modulo})\n@deftypefnx Method IntegerExpression PowerMod (uint/ulong @var{power}, IntegerExpression @var{modulo}) // 32/64-bit builds\nReturns an expression that raises the source to the specified @code{power} modulo @code{modulo}.\n@end deftypefn\n\n@deftypefn Method bool IsDivisibleBy (IntegerExpression @var{a})\n@deftypefnx Method bool IsDivisibleBy (uint/ulong @var{a})\n@deftypefnx Method bool IsDivisibleByPowerOf2 (uint/ulong @var{power})\n@deftypefnx Method bool IsCongruentTo (IntegerExpression @var{a}, IntegerExpression @var{modulo})\n@deftypefnx Method bool IsCongruentTo (uint/ulong @var{a}, uint/ulong @var{modulo})\n@deftypefnx Method bool IsCongruentToModPowerOf2 (IntegerExpression @var{a}, uint/ulong @var{power})\n@deftypefnx Method bool IsPerfectPower ()\n@deftypefnx Method bool IsPerfectSquare ()\nPerforms various divisibility checks.  These methods return a bool result, and therefore are executed immediately.\nIf they are called on an expression, the expression is evaluated to a temporary which is discarded immediately afterwards.\nIf you will need this result again, assign the expression to a @code{HugeInt} variable and call the method on it.\n@end deftypefn\n\n@deftypefn Method long Write (Stream @var{stream})\n@deftypefnx Method long Read (Stream @var{stream})\nWrites and reads integers to/from streams using the raw binary format.\n@end deftypefn\n\n@deftypefn Method long Write (TextWriter @var{writer})\n@deftypefnx Method long Write (TextWriter @var{writer}, int @var{base})\n@deftypefnx Method long Write (TextWriter @var{writer}, int @var{base}, bool @var{lowercase})\n@deftypefnx Method long Read (TextReader @var{reader})\n@deftypefnx Method long Read (TextReader @var{reader}, int @var{base})\nWrites and reads integers as text.\n@end deftypefn\n\n@deftypefn Method void Import<T> (T[] @var{data}, long @var{limbCount}, int @var{bytesPerLimb}, LimbOrder @var{limbOrder}, Endianness @var{endianness}, int @var{nails})\n@deftypefnx Method long Export<T> (T[] @var{data}, int @var{bytesPerLimb}, LimbOrder @var{limbOrder}, Endianness @var{endianness}, int @var{nails})\n@deftypefnx Method T[] Export<T> (int @var{bytesPerLimb}, LimbOrder @var{limbOrder}, Endianness @var{endianness}, int @var{nails})\nImports/exports the absolute value of the number to/from arbitrary words of data.\n@end deftypefn\n\n@deftypefn Method bool IsProbablePrime (MpirRandom @var{random}, int @var{probability}, ulong/uint @var{pretested})\n@deftypefnx Method bool IsLikelyPrime (MpirRandom @var{random}, ulong/uint @var{pretested})\n@deftypefnx {Static Method} {static int} Jacobi (HugeInteger @var{a}, HugeInteger @var{b})\n@deftypefnx {Static Method} {static int} Legendre (HugeInteger @var{a}, HugeInteger @var{b})\n@deftypefnx {Static Method} {static int} Kronecker (HugeInteger @var{a}, HugeInteger @var{b})\n@deftypefnx {Static Method} {static int} Kronecker (HugeInteger @var{a}, int/long @var{b})\n@deftypefnx {Static Method} {static int} Kronecker (HugeInteger @var{a}, uint/ulong @var{b})\n@deftypefnx {Static Method} {static int} Kronecker (int/long @var{a}, HugeInteger @var{b})\n@deftypefnx {Static Method} {static int} Kronecker (uint/ulong @var{a}, HugeInteger @var{b})\n@deftypefnx {Static Method} {static IntegerExpression} Power (uint/ulong @var{value}, uint/ulong @var{power})\n@deftypefnx {Static Method} {static IntegerExpression} Factorial (uint/ulong @var{value})\n@deftypefnx {Static Method} {static IntegerExpression} Factorial (uint/ulong @var{value}, uint/ulong @var{order})\n@deftypefnx {Static Method} {static IntegerExpression} Primorial (uint/ulong @var{value})\n@deftypefnx {Static Method} {static IntegerExpression} Binomial (uint/ulong @var{n}, uint/ulong @var{k})\n@deftypefnx {Static Method} {static IntegerExpression} Binomial (IntegerExpression @var{n}, uint/ulong @var{k})\nPerforms various number-theoretic computations.\n@end deftypefn\n\n@deftypefn {Static Method} {static IntegerSequenceExpression} Fibonacci (int/long @var{n})\n@deftypefnx {Static Method} {static IntegerSequenceExpression} Lucas (int/long @var{n})\nThese two methods return a specialized expression that provides an additional method to optionally\nsave the previous number in the sequence, in addition to the number requested, for example:\n@example\nvar b = new HugeInt();\nvar c = new HugeInt(HugeInt.Fibonacci(300).SavingPreviousTo(b));\n@end example\n@end deftypefn\n\n@deftypefn Method IntegerSquareRootExpression SquareRoot ()\nReturns an expression that evaluates to the square root of the number.  The expression provides a method\nto optionally save the remainder to a second variable:\n@example\na.Value = b.SquareRoot().SavingRemainderTo(c);\n@end example\n@end deftypefn\n\n@deftypefn Method IntegerRootExpression Root (ulong/uint @var{power})\nReturns an expression that evaluates to the root of the specified @code{power} of the number.  The expression provides two \noptional methods.  One allows to save the remainder to a second variable, and the other allows to set a boolean flag\nindicating whether the root operation was exact. Note that computing the remainder is more costly than just getting an exact flag.\n@example\nbool exact = false;\na.Value = b.Root(3).SavingRemainderTo(r);\nc.Value = d.Root(4).SettingExactTo(x => exact = x);\ne.Value = f.Root(5).SavingRemainderTo(r).SettingExactTo(x => exact = x);\n@end example\n@end deftypefn\n\n@deftypefn Method IntegerExpression NextPrimeCandidate (MpirRandom @var{random})\nReturns an expression that looks for the next possible prime greater than the source number.\n@end deftypefn\n\n@deftypefn Method uint/ulong Gcd (uint/ulong @var{a})\nComputes the greatest common divisor with the specified single-limb number.\n@end deftypefn\n\n@deftypefn Method IntegerGcdExpression Gcd (IntegerExpression @var{a})\nReturns an expression that computes the greatest common divisor of the source number and @code{a}. \nProvides a method to optionally calculate the related Diophantine equation multiplier(s):\n@example\nc.Value = a.Gcd(b).SavingDiophantineMultipliersTo(s, t);\n@end example\nIf either @code{s} or @code{t} is null, that coefficient is not computed.\n@end deftypefn\n\n@deftypefn Method IntegerExpression Lcm (IntegerExpression @var{a})\n@deftypefnx Method IntegerExpression Lcm (uint/ulong @var{a})\nComputes the least common multiple with @code{a}.\n@end deftypefn\n\n@deftypefn Method IntegerExpression Invert (IntegerExpression @var{modulo})\nReturns an expression to compute the inverse of the source number modulo @code{modulo}.\n@end deftypefn\n\n@deftypefn Method IntegerRemoveFactorsExpression RemoveFactors (IntegerExpression @var{factor})\nReturns an expression that evaluates to the result of removing all occurrences of the specified @code{factor} from the source number.\nProvides a method to optionally save the number of factors that were removed:\n@example\nulong/uint numberRemoved; // 64/32-bit builds\na.Value = b.RemoveFactors(c);\nd.Value = e.RemoveFactors(f).SavingCountRemovedTo(x => numberRemoved = x);\n@end example\n@end deftypefn\n\n@node MPIR.Net Rationals, MPIR.Net Floats, MPIR.Net Integers, .Net Interface\n@section MPIR.Net Rationals\n\n@deftp Class HugeRational : RationalExpression, IDisposable\nMPIR multi-precision rational numbers are represented by the @code{HugeRational} class,\nalong with its corresponding expression class @code{RationalExpression},\nwhich is returned from all operators and methods whose\nvalue semantics are to compute another number from the source instance and any arguments.\nOperations defined on @code{HugeRational} but not on @code{RationalExpression} are typically those that modify the value\nof the source number itself, and thus performing them on an expression is meaningless.\nBecause through inheritance all operations are available on @code{HugeRational}, the descriptions below \ndo not specifically indicate whether each operator or method is defined for expressions, \nor just for @code{HugeRational} instances. For the sake of brevity, they are listed as if they were methods of the @code{HugeRational} class.\nVisual Studio provides Intellisense and immediate feedback to help sort out which operations are available\non expressions.\n@end deftp\n\nBelow is a brief summary of the supported multi-precision rational methods and operators.  \nTo avoid repetition, implementation details are ommitted.  Since MPIR native functions are called behind the scenes,\nreview @ref{Rational Number Functions} for further details about the native implementations.\n\n@deftypefn Constructor {} HugeRational ()\n@deftypefnx Constructor {} HugeRational (int/long @var{numerator}, uint/ulong @var{denominator})\n@deftypefnx Constructor {} HugeRational (uint/ulong @var{numerator}, uint/ulong @var{denominator})\n@deftypefnx Constructor {} HugeRational (IntegerExpression @var{numerator}, IntegerExpression @var{demoninator})\n@deftypefnx Constructor {} HugeRational (double @var{n})\nConstructs a @code{HugeRational} object.  Single-limb constructors vary by architecture,\n32-bit builds take @code{int} or @code{uint} arguments, 64-bit builds take @code{long} or @code{ulong}.\nAny necessary conversion follows the corresponding C function, for\nexample @code{double} follows @code{mpq_set_d} (@pxref{Initializing Rationals}).\n@end deftypefn\n\n@deftypefn Constructor {} HugeRational (string @var{s})\n@deftypefnx Constructor {} HugeRational (string @var{s}, int @var{base})\nConstructs a @code{HugeRational} converted from a string using @code{mpq_set_str}\n(@pxref{Initializing Rationals}).  If the string is not a valid integer or rational, an exception is thrown.\n@end deftypefn\n\nWhen constructing a rational number from a numerator and denominator, including\nthe string constructors where both numerator and denominator are specified, the fraction \nshould be in canonical form, or if not then @code{Canonicalize()} should be called.\n\n@deftypefn Constructor {} HugeRational (IntegerExpression @var{e})\n@deftypefnx Constructor {} HugeRational (RationalExpression @var{e})\n@deftypefnx Constructor {} HugeRational (FloatExpression @var{e})\nEvaluates the supplied expression and saves its result to the new instance.\nBecause multi-precision classes are derived from their corresponding expression classes,\nthese constructors can be used to make a copy of an existing variable, i.e. @code{HugeRational a = new HugeRational(b);}\nwithout creating any permanent association between them.\n@end deftypefn\n\n@deftypefn {Static Method} {static HugeRational} Allocate (uint/ulong @var{numeratorBits}, uint/ulong @var{denominatorBits})\nControls the capacity in bits of the allocated integer.  HugeRational does not have a @code{Reallocate} method,\nbut its numerator and demonimator are derived from HugeInt and can thus be reallocated separately.\n@end deftypefn\n\n@deftypefn Method void Canonicalize ()\nPuts a @code{HugeRational} into canonical form, as per @ref{Rational Number\nFunctions}.  All arithmetic operators require their operands in canonical\nform, and will return results in canonical form.\n@end deftypefn\n\n@deftypefn Property HugeInt Numerator\n@deftypefnx Property HugeInt Denominator\nThese read-only properties expose the numerator and denominator for direct manipulation.\nThey return specialized instances of the @code{HugeInt} class\nthat do not own their limb data.  They override the @code{Dispose()} method with a no-op, so they can be safely passed\naround as normal integers, even to code that tries to dispose of them.\n\nOnce a numerator or denominator is obtained, it remains valid for the life of the @code{HugeRational} instance.\nIt references live data, so for example, if the @code{Value} of the rational is modified, \nit will be visible through a previously obtained numerator/denominator instance.\nConversely, setting the @code{Value} of a numerator or denominator modifies the @code{Value} of its owning rational,\nand if this cannot be known to keep the rational in canonical form, @code{Canonicalize()} must be called\nbefore performing any further MPIR operations on the rational.\n\nMultiple copies can be safely obtained, and reference the same internal structures.\nOnce the @code{HugeRational} is disposed, any numerator and denominator instances obtained from it\nare no longer valid.\n@end deftypefn\n\n@deftypefn Method long ApproximateSizeInBase (int @var{base})\nReturns the number of digits the absolute value of number would take if written in the specified base.\nThe result can be at most 2 characters too long, and allows for a numerator, a division sign, \nand a denominator, but excludes the leading minus sign.\n@end deftypefn\n\n@deftypefn Method string ToString ()\n@deftypefnx Method string ToString (int @var{base})\n@deftypefnx Method string ToString (int @var{base}, bool @var{lowercase})\nReturns the string representation of the number.  The default @code{base} is 10,\nand the parameterless overload is limited to 256 least significant digits by default,\neach for a numerator and a denominator, producing a leading ellipsis (i.e. ...12345) \nwhen either component has more digits.\nThis is done to prevent huge numbers from unexpectedly consuming large amounts of memory in the debugger.\nThe maximum number of digits output is configurable via the @code{MpirSettings.ToStringDigits} property,\nwhere zero means unlimited.  The other overloads always output all digits.\n@end deftypefn\n\n@deftypefn Method double ToDouble ()\nConverts the number to a double, possibly truncated.\n@end deftypefn\n\n@deftypefn Property RationalExpression Value\nGetting this property is essentially a no-op, as it returns the object instance itself.\nThis never needs to be done explicitly, but is used implicitly in statements like @code{a.Value += 5;}\n\nSetting the @code{Value} property evaluates the assigned expression and saves the result to the object.\n@end deftypefn\n\n@deftypefn Method void SetTo (int/long @var{value}) // 32/64-bit builds\n@deftypefnx Method void SetTo (int/long @var{numerator}, uint/ulong @var{denominator})\n@deftypefnx Method void SetTo (uint/ulong @var{value})\n@deftypefnx Method void SetTo (uint/ulong @var{numerator}, uint/ulong @var{denominator})\n@deftypefnx Method void SetTo (double @var{value})\n@deftypefnx Method void SetTo (string @var{value})\n@deftypefnx Method void SetTo (string @var{value}, int @var{base})\n@deftypefnx Method void SetTo (IntegerExpression @var{value})\n@deftypefnx Method void SetTo (IntegerExpression @var{numerator}, IntegerExpression @var{denominator})\n@deftypefnx Method void SetTo (FloatExpression @var{value})\nSets the value of existing variable from types other than @code{RationalExpression}.\nWhen setting both the numerator and denominator, canonicalization must be\nmanaged explicitly.\n@end deftypefn\n\n@deftypefn Method void Swap (HugeRational @var{a})\nSwaps the values of the two objects. This is an @math{O(1)} operation. Any existing numerators and denominators\nremain associated with the object on which they were obtained, and reflect its new value.\n@end deftypefn\n\nArithmetic operators (@code{+}, @code{-}, @code{*}, @code{/}) are overloaded to allow rationals to participate\nin expressions much like primitive integers can.  Single-limb primitive types can be used.\nThese operators will also accept @code{IntegerExpression} arguments, \nand will automatically promote them.  In expressions, promotion of an @code{IntegerExpression} \nto a @code{RationalExpression} is an @math{O(1)} operation.  Of course, when constructing a rational\nfrom an integer, a copy is made so this becomes @math{O(N)}.\n\nDue to the rationals' nature, division is always exact (there is no rounding)\nand the modulo operator (@code{%}) is not defined.  \nAlso not defined are the bit shift operators (@code{<<}, @code{>>}),\nand the bitwise operators (@code{&}, @code{|}, @code{^}, @code{~}).\n\nOperator @code{^} raises the source number to the specified power.\n\nComparison operators (@code{==}, @code{!=}, @code{<}, @code{<=}, @code{>}, @code{>=}) accept @code{RationalExpression},\nsingle-limb, or double arguments, but do not accept integer or float expressions because that would require an awkward explicit cast\nwhen comparing with null.  Use the @code{CompareTo(object)} method for cross-comparisons.\n\n@deftypefn Method int CompareTo (RationalExpression @var{a})\n@deftypefnx Method bool Equals (RationalExpression @var{a})\nImplement @code{IComparable<RationalExpression>} and @code{IEquatable<RationalExpression>} for strongly-typed comparisons.\n@end deftypefn\n\n@deftypefn Method int CompareTo (object @var{a})\n@deftypefnx Method bool Equals (object @var{a})\nImplement @code{IComparable} and equality check for any object.  \nFor rationals, these methods support any expression type (integer, rational, or float).\n@end deftypefn\n\n@deftypefn Method bool Equals (int/long @var{numerator}, uint/ulong @var{denominator})\n@deftypefnx Method bool Equals (uint/ulong @var{numerator}, uint/ulong @var{denominator})\n@deftypefnx Method int CompareTo (int/long @var{numerator}, uint/ulong @var{denominator})\n@deftypefnx Method int CompareTo (uint/ulong @var{numerator}, uint/ulong @var{denominator})\nSingle-limb comparisons for rationals take two arguments.\n@end deftypefn\n\n@deftypefn Method int GetHashCode ()\nThis @code{object} override computes the hash code.  This is an @math{O(N)} operation where @math{N} is the number of limbs in use\nin the numerator and denominator combined.\nChanging a number's @code{Value} changes its hash code, so this should not be done on any object that has been added\nto a hash table or dictionary.\n@end deftypefn\n\n@deftypefn Method int Sign ()\nReturns the number's sign.\n@end deftypefn\n\n@deftypefn Method RationalExpression Abs ()\nReturns an expression that computes the absolute value of the number.\n@end deftypefn\n\n@deftypefn Method RationalExpression Invert ()\nReturns an expression that computes the inverse of the number.\n@end deftypefn\n\n@deftypefn Method long Write (Stream @var{stream})\n@deftypefnx Method long Read (Stream @var{stream})\nWrites and reads rationals to/from streams using the raw binary format.\n@end deftypefn\n\n@deftypefn Method long Write (TextWriter @var{writer})\n@deftypefnx Method long Write (TextWriter @var{writer}, int @var{base})\n@deftypefnx Method long Write (TextWriter @var{writer}, int @var{base}, bool @var{lowercase})\n@deftypefnx Method long Read (TextReader @var{reader})\n@deftypefnx Method long Read (TextReader @var{reader}, int @var{base})\nWrites and reads rationals as text.\n@end deftypefn\n\nThere are no @code{Import}/@code{Export} methods, but they can of course be invoked\non the numerator and/or denominator.\n\n@code{RationalExpression} does not have any specialized subclasses, as there are no\noperations on the rational type that require additional inputs beyond the left and right \noperator operands.\n\n@node MPIR.Net Floats, MPIR.Net Random Numbers, MPIR.Net Rationals, .Net Interface\n@section MPIR.Net Floats\n\n@deftp Class HugeFloat : FloatExpression, IDisposable\nThe MPIR.Net class for multi-precision floating point numbers is @code{HugeFloat}, and its\ncorresponding expression class is @code{FloatExpression},\nwhich is returned from all operators and methods whose\nvalue semantics are to compute another number from the source instance and any arguments. \n@code{HugeFloat} derives from @code{FloatExpression}, and many operations are defined on the expression class.\nOperations defined on @code{HugeFloat} but not on @code{FloatExpression} are typically those that modify the value\nof the source number itself, and thus performing them on an expression is meaningless.\nBecause through inheritance all operations are available on HugeFloat, the descriptions below \ndo not specifically indicate whether each operator or method is defined for expressions, \nor just for @code{HugeFloat} instances. For the sake of brevity,\nthey are listed as if they were methods of the @code{HugeFloat} class.\nVisual Studio provides Intellisense and immediate feedback to help sort out which operations are available\non expressions.\n@end deftp\n\nBelow is a brief summary of the supported multi-precision rational methods and operators.  \nTo avoid repetition, implementation details are ommitted.  Since MPIR native functions are called behind the scenes,\nreview @ref{Floating-point Functions} for further details about the native implementations.\n\n@deftypefn {Static Property} {static uint/ulong} DefaultPrecision\nGets or sets the default precision of the floating point mantissa, in bits.\nIf the value is not a multiple of limb size, the actual precision will be\nrounded up.  All newly constructed @code{HugeFloat} objects that don't\nexplicitly specify precision will use this default.  Previously constructed\nobjects are unaffected.  The initial default precision is 2 limbs.\n\nWhen an expression is evaluated, it is either because it is being assigned to some\ndestination variable (e.g. @code{a.Value = b + c;}) or a primitive-computing method\nis being called (e.g. @code{int s = (b + c).Sign();}) In the former case,\nthe precision of the destination is used for all computations and temporaries\nduring expression evaluation.  In the latter case, there is no destination\nso the @code{DefaultPrecision} is used.\n@end deftypefn\n\n@deftypefn Constructor {} HugeFloat ()\n@deftypefnx Constructor {} HugeFloat (int/long @var{value})\n@deftypefnx Constructor {} HugeFloat (uint/ulong @var{value})\n@deftypefnx Constructor {} HugeFloat (double @var{n})\nConstructs a @code{HugeFloat} object.  Single-limb constructors vary by architecture,\n32-bit builds take @code{int} or @code{uint} arguments, 64-bit builds take @code{long} or @code{ulong}.\nAny necessary conversion follows the corresponding C function, for\nexample @code{double} follows @code{mpf_set_d} (@pxref{Initializing Floats}).\n@end deftypefn\n\n@deftypefn Constructor {} HugeFloat (string @var{s})\n@deftypefnx Constructor {} HugeFloat (string @var{s}, int @var{base})\n@deftypefnx Constructor {} HugeFloat (string @var{s}, int @var{base}, bool @var{exponentInDecimal})\nConstructs a @code{HugeFloat} converted from a string using @code{mpf_set_str}\n(@pxref{Initializing Floats}).  If the string is not a valid integer or floating point number, an exception is thrown.\n@end deftypefn\n\n@deftypefn Constructor {} HugeFloat (IntegerExpression @var{value})\n@deftypefnx Constructor {} HugeFloat (RationalExpression @var{value})\n@deftypefnx Constructor {} HugeFloat (FloatExpression @var{value})\nEvaluates the supplied expression and saves its result to the new instance.\nBecause multi-precision classes are derived from their corresponding expression classes,\nthese constructors can be used to make a copy of an existing variable, i.e. @code{HugeFloat a = new HugeFloat(b);}\nwithout creating any permanent association between them.\n@end deftypefn\n\n@deftypefn {Static Method} {static HugeRational} Allocate (uint/ulong @var{precision})\n@deftypefnx Method void Reallocate (uint/ulong @var{precision})\nControls the allocated precision in bits of the new or existing @code{HugeFloat}.\n@end deftypefn\n\n@deftypefn Property uint/ulong AllocatedPrecision\nGets the precision in bits that is currently allocated for internal storage of the mantissa.\nThe precision actually in effect, used in calculations, is initially the same \nbut may be reduced by setting the @code{Precision} property.\n@end deftypefn\n\n@deftypefn Property uint/ulong Precision\nGets or sets the effective precision of the number without changing the memory allocated.\nThe number of bits cannot exceed the precision with which the variable was initialized or last reallocated.\nThe value of the number is unchanged, and in particular if it \npreviously had a higher precision it will retain that higher precision.\nNew values assigned to the @code{Value} property will use the new precision.\nThe number can be safely disposed after modifying its @code{Precision}\n(unlike the native MPIR, which requires you to restore the precision\nto the allocated value before the memory can be freed).\n@end deftypefn\n\n@deftypefn Method bool FitsUlong () //64-bit builds only\n@deftypefnx Method bool FitsLong () //64-bit builds only\n@deftypefnx Method bool FitsUint ()\n@deftypefnx Method bool FitsInt ()\n@deftypefnx Method bool FitsUshort ()\n@deftypefnx Method bool FitsShort ()\nChecks whether the number would fit in one of the built-in .Net types.\n@end deftypefn\n\n@deftypefn Method bool IsInteger ()\nChecks whether the number is a whole integer.\n@end deftypefn\n\n@deftypefn Method string ToString ()\n@deftypefnx Method string ToString (int @var{base})\n@deftypefnx Method string ToString (int @var{base}, bool @var{lowercase})\n@deftypefnx Method string ToString (int @var{base}, bool @var{lowercase}, bool @var{exponentInDecimal})\nReturns the string representation of the number.  The default @code{base} is 10,\nand the parameterless overload is limited to 256 mantissa digits by default. This is done\nto prevent huge numbers from unexpectedly consuming large amounts of memory in the debugger.\nThe maximum number of digits output is configurable via the @code{MpirSettings.ToStringDigits} property,\nwhere zero means unlimited.  @code{MpirSettings.ToStringDigits} applies to integers and rationals as well.\nThe other overloads always output all digits.\n@end deftypefn\n\n@deftypefn Method int ToInt () //32-bit builds\n@deftypefnx Method uint ToUint () //32-bit builds\n@deftypefnx Method long ToLong () //64-bit builds\n@deftypefnx Method ulong ToUlong () //64-bit builds\n@deftypefnx Method double ToDouble ()\n@deftypefnx Method double ToDouble (@code{out} int/long @var{exp})\nConverts the number to a primitive (built-in) .Net type, assuming it fits,\nwhich can be determined by calling one of the @code{Fits...} methods.\n@end deftypefn\n\n@deftypefn Property FloatExpression Value\nGetting this property is essentially a no-op, as it returns the object instance itself.\nThis never needs to be done explicitly, but is used implicitly in statements like @code{a.Value += 5;}\n\nSetting the @code{Value} property evaluates the assigned expression and saves the result to the object.\n@end deftypefn\n\n@deftypefn Method void SetTo (int/long @var{value}) // 32/64-bit builds\n@deftypefnx Method void SetTo (uint/ulong @var{value}) // 32/64-bit builds\n@deftypefnx Method void SetTo (double @var{value})\n@deftypefnx Method void SetTo (string @var{value})\n@deftypefnx Method void SetTo (string @var{value}, int @var{base})\n@deftypefnx Method void SetTo (string @var{value}, int @var{base}, bool @var{exponentInDecimal})\n@deftypefnx Method void SetTo (IntegerExpression @var{value})\n@deftypefnx Method void SetTo (RationalExpression @var{value})\nSets the value of existing variable from types other than @code{FloatExpression}.\n@end deftypefn\n\n@deftypefn Method void Swap (HugeFloat @var{a})\nSwaps the values (and precisions) of the two objects. This is an @math{O(1)} operation.\n@end deftypefn\n\nArithmetic operators (@code{+}, @code{-}, @code{*}, @code{/}) and bit shifts (@code{<<}, @code{>>})\nare overloaded to allow floats to participate\nin expressions much like primitive types can.  Single-limb primitive types can be used.\nThese operators do not accept integer or rational expressions.  \nThere is some cost of instantiating a floating point number from another multi-precision type, \nso to make this point clear MPIR.Net forces you to use explicit constructors or assignments for this conversion.\n\nThe modulo operator (@code{%}) and the bitwise operators (@code{&}, @code{|}, @code{^}, @code{~}) are not defined.\n\nOperator @code{^} raises the source number to the specified power.\n\nComparison operators (@code{==}, @code{!=}, @code{<}, @code{<=}, @code{>}, @code{>=}) accept @code{FloatExpression},\nsingle-limb, or double arguments, but do not accept integer or rational expressions\nbecause that would require an awkward explicit cast when comparing with null.\n\n@deftypefn Method int CompareTo (FloatExpression @var{a})\n@deftypefnx Method bool Equals (FloatExpression @var{a})\nImplement @code{IComparable<FloatExpression>} and @code{IEquatable<FloatExpression>} for strongly-typed comparisons.\n@end deftypefn\n\n@deftypefn Method int CompareTo (object @var{a})\n@deftypefnx Method bool Equals (object @var{a})\nImplement @code{IComparable} and equality check for any object.  These support only float expressions or .Net primitive types. \nWhen this method is called on a @code{HugeFloat} object, comparison is performed to the\nprecision of the object.  When called on an expression, comparison is performed to the \ndefault precision.\n@end deftypefn\n\n@deftypefn Method int GetHashCode ()\nThis @code{object} override computes the hash code.  This is an @math{O(N)} operation where @math{N} is the number of limbs allocated.\nChanging a number's @code{Value} changes its hash code, so this should not be done on any object that has been added\nto a hash table or dictionary.\n@end deftypefn\n\n@deftypefn Method bool Equals (object @var{a}, uint/ulong @var{precision})\nChecks for equality using the specified precision.  The argument @code{a} can be\na @code{FloatExpression} or a primitive type. \n@end deftypefn\n\n@deftypefn Method FloatExpression RelativeDifferenceFrom (FloatExpression @var{a})\nReturns an expression that computes @math{@GMPabs{@var{this}-@var{a}}/@var{this}}\n@end deftypefn\n\n@deftypefn Method FloatExpression Abs ()\n@deftypefnx Method FloatExpression SquareRoot ()\n@deftypefnx {Static Method} {static FloatExpression} SquareRoot (uint/ulong a)\n@deftypefnx Method FloatExpression Floor ()\n@deftypefnx Method FloatExpression Ceiling ()\n@deftypefnx Method FloatExpression Truncate ()\n@deftypefnx Method int Sign ()\nPerform various floating-point operations.\n@end deftypefn\n\n@deftypefn Method long Write (TextWriter @var{writer})\n@deftypefnx Method long Write (TextWriter @var{writer}, int @var{base})\n@deftypefnx Method long Write (TextWriter @var{writer}, int @var{base}, int @var{maxDigits}, bool @var{lowercase}, bool @var{exponentInDecimal})\n@deftypefnx Method long Read (TextReader @var{reader})\n@deftypefnx Method long Read (TextReader @var{reader}, int @var{base})\n@deftypefnx Method long Read (TextReader @var{reader}, int @var{base}, bool @var{exponentInDecimal})\nWrites and reads floats as text.\n@end deftypefn\n\n@node MPIR.Net Random Numbers, MPIR.Net Settings, MPIR.Net Floats, .Net Interface\n@section MPIR.Net Random Numbers\n\n@deftp Class MpirRandom : IDisposable\nThe MPIR.Net class that wraps the MPIR random number functions is @code{MpirRandom}.\nIt holds an algorithm selection and current state, as per @code{gmp_randstate_t}.\nAs the multi-precision classes, @code{MpirRandom} allocates unmanaged memory,\nand should be disposed of via its @code{IDisposable} implementation when no longer in use.\n@end deftp\n\n@deftypefn {Static Method} {static MpirRandom} Default ()\n@deftypefnx {Static Method} {static MpirRandom} MersenneTwister ()\n@deftypefnx {Static Method} {static MpirRandom} LinearCongruential (HugeInt @var{a}, ulong/uint @var{c}, ulong/uint @var{m})\n@deftypefnx {Static Method} {static MpirRandom} LinearCongruential (ulong/uint @var{size})\nIn lieu of constructors, @code{MpirRandom} uses more descriptive static factory methods to create new instances\nof specific random number generator algorithms.\n@end deftypefn\n\n@deftypefn Method MpirRandom Copy ()\nCreates a new random number generator with a copy of the algorithm and state from the source instance.\n@end deftypefn\n\n@deftypefn Method void Seed (ulong/uint @var{seed})\n@deftypefnx Method void Seed (HugeInt @var{seed})\nSets an initial seed value into the random number generator.\n@end deftypefn\n\n@deftypefn Method ulong/uint GetLimbBits (ulong/uint @var{bitCount})\nGenerates a uniformly distributed random number of @code{bitCount} bits,\ni.e. in the range 0 to @math{2^@W{bitCount-1}} inclusive.\n@end deftypefn\n\n@deftypefn Method ulong/uint GetLimb (ulong/uint @var{max})\nGenerates a uniformly distributed random number in the range 0 to @math{@W{max-1}} inclusive.\n@end deftypefn\n\n@deftypefn Method IntegerExpression GetIntBits (ulong/uint @var{bitCount})\n@deftypefnx Method IntegerExpression GetIntBitsChunky (ulong/uint @var{bitCount})\nReturns an expression that generates a uniformly distributed random integer in the range 0 to @math{2^@W{bitCount-1}}, inclusive.\n@end deftypefn\n\n@deftypefn Method IntegerExpression GetInt (IntegerExpression @var{max})\nReturns an expression that generates a uniformly distributed random number in the range 0 to @math{@W{max-1}} inclusive.\n@end deftypefn\n\n@deftypefn Method FloatExpression GetFloat ()\nReturns an expression that generates a uniformly distributed random float\nin the range @math{0 @le{} n < 1}.  As with all float expressions, precision of the destination is used when available.\n@end deftypefn\n\n@deftypefn Method FloatExpression GetFloatBits (ulong/uint @var{bitCount})\nReturns an expression that generates a uniformly distributed random float\nin the range @math{0 @le{} n < 1}, with the specified number of significant mantissa bits.\n@end deftypefn\n\n@deftypefn Method FloatExpression GetFloatChunky (int @var{maxExponent})\nReturns an expression that generates a random float with long strings of zeros and ones in the binary representation,\nusing the precision of the destination. The argument is the maximum absolute value for the\nexponent of the generated number, expressed in limbs.\n@end deftypefn\n\n@deftypefn Method FloatExpression GetFloatLimbsChunky (long @var{limbCount}, int @var{maxExponent})\nReturns an expression that generates a random float with long strings of zeros and ones in the binary representation, \nand the specified number of significant limbs in the mantissa.\n@end deftypefn\n\n\n@node MPIR.Net Settings, , MPIR.Net Random Numbers, .Net Interface\n@section MPIR.Net Settings\n\n@deftp {Static Class} MpirSettings\nThis static class contains several members that describe or control \nvarious default behaviors of the other MPIR.Net classes.\n@end deftp\n\n@deftypefn Constant int BITS_PER_LIMB\nRepresents the total number of bits in a single MPIR limb, including data bits and nail bits.\nThis will be either 32 or 64, depending on your selected build architecture.\n@end deftypefn\n\n@deftypefn Constant int NAIL_BITS_PER_LIMB\nRepresents the number of nail bits in a single MPIR limb.  Nail bits are used internally by MPIR.\n@end deftypefn\n\n@deftypefn Constant int USABLE_BITS_PER_LIMB\nRepresents the number of data bits in a single MPIR limb.\n@end deftypefn\n\n@deftypefn Constant Version MPIR_VERSION\nRepresents the version of the underlying MPIR library\n@end deftypefn\n\n@deftypefn Constant Version GMP_VERSION\nRepresents the version of GMP with which the underlying MPIR library is compatible\n@end deftypefn\n\n@deftypefn {Static Property} RoundingModes RoundingMode\nGets or sets the default rounding mode used for MPIR integer division operations that don't explicitly specify a rounding mode.\nDoes not affect rational or float operations.  The default value is @code{Truncate}.\n@end deftypefn\n\n@deftypefn {Static Property} int ToStringDigits\nGets or sets the maximum number of digits the @code{object.ToString()} method override will output.\nIf an integer number is longer than this number of digits, it will be output as \"@code{[-]...NNNNN}\" with the least significant digits shown.\nRational numbers apply the limit separately to the numerator and denominator.  \nFloats output the most significant digits, and there is no ellipsis.\n\nThe primary purpose of this setting is to prevent accidental allocation of large memory blocks while inspecting variables in the debugger.\nThe default value is 256.  Setting this property to 0 causes all digits to be output.\n@end deftypefn\n\n@node Custom Allocation, Language Bindings, .Net Interface, Top\n@comment  node-name,  next,  previous,  up\n@chapter Custom Allocation\n@cindex Custom allocation\n@cindex Memory allocation\n@cindex Allocation of memory\n\nBy default MPIR uses @code{malloc}, @code{realloc} and @code{free} for memory\nallocation, and if they fail MPIR prints a message to the standard error output\nand terminates the program.\n\nAlternate functions can be specified, to allocate memory in a different way or\nto have a different error action on running out of memory.\n\n@deftypefun void mp_set_memory_functions (@* void *(*@var{alloc_func_ptr}) (size_t), @* void *(*@var{realloc_func_ptr}) (void *, size_t, size_t), @* void (*@var{free_func_ptr}) (void *, size_t))\nReplace the current allocation functions from the arguments.  If an argument\nis @code{NULL}, the corresponding default function is used.\n\nThese functions will be used for all memory allocation done by MPIR, apart from\ntemporary space from @code{alloca} if that function is available and MPIR is\nconfigured to use it (@pxref{Build Options}).\n\n@strong{Be sure to call @code{mp_set_memory_functions} only when there are no\nactive MPIR objects allocated using the previous memory functions!  Usually\nthat means calling it before any other MPIR function.}\n@end deftypefun\n\nThe functions supplied should fit the following declarations:\n\n@deftypevr Function {void *} allocate_function (size_t @var{alloc_size})\nReturn a pointer to newly allocated space with at least @var{alloc_size}\nbytes.\n@end deftypevr\n\n@deftypevr Function {void *} reallocate_function (void *@var{ptr}, size_t @var{old_size}, size_t @var{new_size})\nResize a previously allocated block @var{ptr} of @var{old_size} bytes to be\n@var{new_size} bytes.\n\nThe block may be moved if necessary or if desired, and in that case the\nsmaller of @var{old_size} and @var{new_size} bytes must be copied to the new\nlocation.  The return value is a pointer to the resized block, that being the\nnew location if moved or just @var{ptr} if not.\n\n@var{ptr} is never @code{NULL}, it's always a previously allocated block.\n@var{new_size} may be bigger or smaller than @var{old_size}.\n@end deftypevr\n\n@deftypevr Function void free_function (void *@var{ptr}, size_t @var{size})\nDe-allocate the space pointed to by @var{ptr}.\n\n@var{ptr} is never @code{NULL}, it's always a previously allocated block of\n@var{size} bytes.\n@end deftypevr\n\nA @dfn{byte} here means the unit used by the @code{sizeof} operator.\n\nThe @var{old_size} parameters to @var{reallocate_function} and\n@var{free_function} are passed for convenience, but of course can be ignored\nif not needed.  The default functions using @code{malloc} and friends for\ninstance don't use them.\n\nNo error return is allowed from any of these functions, if they return then\nthey must have performed the specified operation.  In particular note that\n@var{allocate_function} or @var{reallocate_function} mustn't return\n@code{NULL}.\n\nGetting a different fatal error action is a good use for custom allocation\nfunctions, for example giving a graphical dialog rather than the default print\nto @code{stderr}.  How much is possible when genuinely out of memory is\nanother question though.\n\nThere's currently no defined way for the allocation functions to recover from\nan error such as out of memory, they must terminate program execution.  A\n@code{longjmp} or throwing a C++ exception will have undefined results.  This\nmay change in the future.\n\nMPIR may use allocated blocks to hold pointers to other allocated blocks.  This\nwill limit the assumptions a conservative garbage collection scheme can make.\n\nAny custom allocation functions must align pointers to limb boundaries. Thus if a limb is eight bytes (e.g. on x86_64), then all blocks must be aligned to eight byte boundaries. Check the configuration options for the custom allocation library in use. It is not necessary to align blocks to SSE boundaries even when SSE code is used. All MPIR assembly routines assume limb boundary alignment only (which is the default for most standard memory managers).\n\nSince the default MPIR allocation uses @code{malloc} and friends, those\nfunctions will be linked in even if the first thing a program does is an\n@code{mp_set_memory_functions}.  It's necessary to change the MPIR sources if\nthis is a problem.\n\n@sp 1\n@deftypefun void mp_get_memory_functions (@* void *(**@var{alloc_func_ptr}) (size_t), @* void *(**@var{realloc_func_ptr}) (void *, size_t, size_t), @* void (**@var{free_func_ptr}) (void *, size_t))\nGet the current allocation functions, storing function pointers to the\nlocations given by the arguments.  If an argument is @code{NULL}, that\nfunction pointer is not stored.\n\n@need 1000\nFor example, to get just the current free function,\n\n@example\nvoid (*freefunc) (void *, size_t);\n\nmp_get_memory_functions (NULL, NULL, &freefunc);\n@end example\n@end deftypefun\n\n@node Language Bindings, Algorithms, Custom Allocation, Top\n@chapter Language Bindings\n@cindex Language bindings\n@cindex Other languages\n\nThe following packages and projects offer access to MPIR from languages other\nthan C, though perhaps with varying levels of functionality and efficiency.\n\n@c  @spaceuref{U} is the same as @uref{U}, but with a couple of extra spaces\n@c  in tex, just to separate the URL from the preceding text a bit.\n@iftex\n@macro spaceuref {U}\n@ @ @uref{\\U\\}\n@end macro\n@end iftex\n@ifnottex\n@macro spaceuref {U}\n@uref{\\U\\}\n@end macro\n@end ifnottex\n\n@sp 1\n@table @asis\n@item C++\n@itemize @bullet\n@item\nMPIR C++ class interface, @pxref{C++ Class Interface} @* Straightforward\ninterface, expression templates to eliminate temporaries.\n@item\nALP @spaceuref{http://www-sop.inria.fr/saga/logiciels/ALP/} @* Linear algebra and\npolynomials using templates.\n@item\nCLN @spaceuref{http://www.ginac.de/CLN/} @* High level classes for arithmetic.\n@item\nLiDIA @spaceuref{http://www.informatik.tu-darmstadt.de/TI/LiDIA/} @* A C++\nlibrary for computational number theory.\n@item\nLinbox @spaceuref{http://www.linalg.org/} @* Sparse vectors and matrices.\n@item\nNTL @spaceuref{http://www.shoup.net/ntl/} @* A C++ number theory library.\n@end itemize\n\n@item Eiffel\n@itemize @bullet\n@item\nEiffel Interface @spaceuref{http://www.eiffelroom.org/node/407} @* An Eiffel Interface to MPFR, MPC and MPIR by Chris Saunders.\n@end itemize\n\n@item Fortran\n@itemize @bullet\n@item\nOmni F77 @spaceuref{http://phase.hpcc.jp/Omni/home.html} @* Arbitrary\nprecision floats.\n@end itemize\n\n@item Haskell\n@itemize @bullet\n@item\nGlasgow Haskell Compiler @spaceuref{http://www.haskell.org/ghc/}\n@end itemize\n\n@item Java\n@itemize @bullet\n@item\nKaffe @spaceuref{http://www.kaffe.org/}\n@end itemize\n\n@item Lisp\n@itemize @bullet\n@item\nEmbeddable Common Lisp @spaceuref{http://ecls.sourceforge.net/download.html}\n@item\nGNU Common Lisp @spaceuref{http://www.gnu.org/software/gcl/gcl.html}\n@item\nLibrep @spaceuref{http://librep.sourceforge.net/}\n@item\n@c  FIXME: When there's a stable release with gmp support, just refer to it\n@c  rather than bothering to talk about betas.\nXEmacs (21.5.18 beta and up) @spaceuref{http://www.xemacs.org} @* Optional\nbig integers, rationals and floats using MPIR.\n@end itemize\n\n@item M4\n@itemize @bullet\n@item\n@c  FIXME: When there's a stable release with gmp support, just refer to it\n@c  rather than bothering to talk about betas.\nGNU m4 betas @spaceuref{http://www.seindal.dk/rene/gnu/} @* Optionally provides\nan arbitrary precision @code{mpeval}.\n@end itemize\n\n@item ML\n@itemize @bullet\n@item\nMLton compiler @spaceuref{http://mlton.org/}\n@end itemize\n\n@item Objective Caml\n@itemize @bullet\n@item\nNumerix @spaceuref{http://pauillac.inria.fr/~quercia/} @* Optionally using\nGMP.\n@end itemize\n\n@item Oz\n@itemize @bullet\n@item\nMozart @spaceuref{http://www.mozart-oz.org/}\n@end itemize\n\n@item Pascal\n@itemize @bullet\n@item\nGNU Pascal Compiler @spaceuref{http://www.gnu-pascal.de/} @* GMP unit.\n@item\nNumerix @spaceuref{http://pauillac.inria.fr/~quercia/} @* For Free Pascal,\noptionally using GMP.\n@end itemize\n\n@item Perl\n@itemize @bullet\n@item\nGMP module, see @file{demos/perl} on the MPIR website.\n@item\nMath::GMP @spaceuref{http://www.cpan.org/} @* Compatible with Math::BigInt, but\nnot as many functions as the GMP module above.\n@item\nMath::BigInt::GMP @spaceuref{http://www.cpan.org/} @* Plug Math::GMP into\nnormal Math::BigInt operations.\n@end itemize\n\n@item PHP\n@itemize @bullet\n@item\nmpz module in the main distribution, @uref{http://php.net/}\n@end itemize\n\n@need 1000\n@item Pike\n@itemize @bullet\n@item\nmpz module in the standard distribution, @uref{http://pike.ida.liu.se/}\n@end itemize\n\n@need 500\n@item Prolog\n@itemize @bullet\n@item\nSWI Prolog @spaceuref{http://www.swi-prolog.org/} @*\nArbitrary precision floats.\n@end itemize\n\n@item Python\n@itemize @bullet\n@item\nmpz module in the standard distribution, @uref{http://www.python.org/}\n@item\nGMPY @uref{http://gmpy.sourceforge.net/}\n@end itemize\n\n@item Scheme\n@itemize @bullet\n@item\nGNU Guile (upcoming 1.8) @spaceuref{http://www.gnu.org/software/guile/guile.html}\n@item\nRScheme @spaceuref{http://www.rscheme.org/}\n@c\n@c  For reference, MzScheme uses some of gmp, but (as of version 205) it only\n@c  has copies of some of the generic C code, and we don't consider that a\n@c  language binding to gmp.\n@c\n@end itemize\n\n@item Smalltalk\n@itemize @bullet\n@item\nGNU Smalltalk @spaceuref{http://www.smalltalk.org/versions/GNUSmalltalk.html}\n@end itemize\n\n@item Other\n@itemize @bullet\n@item\nALGLIB @uref{http://www.alglib.net/} @* Numerical analysis and data processing\nlibrary.\n@item\nAxiom @uref{http://savannah.nongnu.org/projects/axiom} @* Computer algebra\nusing GCL.\n@item\nGiNaC @spaceuref{http://www.ginac.de/} @* C++ computer algebra using CLN.\n@item\nGOO @spaceuref{http://www.googoogaga.org/} @* Dynamic object oriented\nlanguage.\n@item\nMaxima @uref{http://www.ma.utexas.edu/users/wfs/maxima.html} @* Macsyma\ncomputer algebra using GCL.\n@item\nQ @spaceuref{http://q-lang.sourceforge.net/} @* Equational programming system.\n@item\nRegina @spaceuref{http://regina.sourceforge.net/} @* Topological calculator.\n@item\nSage @spaceuref{http://www.sagemath.org/} @* Computer Algebra System written \nin Python and Cython.\n@item\nYacas @spaceuref{http://yacas.sourceforge.net/homepage.html} @* Yet another\ncomputer algebra system.\n@end itemize\n\n@end table\n\n\n@node Algorithms, Internals, Language Bindings, Top\n@chapter Algorithms\n@cindex Algorithms\n\nThis chapter is an introduction to some of the algorithms used for various MPIR\noperations.  The code is likely to be hard to understand without knowing\nsomething about the algorithms.\n\nSome MPIR internals are mentioned, but applications that expect to be\ncompatible with future MPIR releases should take care to use only the\ndocumented functions.\n\n@menu\n* Multiplication Algorithms::\n* Division Algorithms::\n* Greatest Common Divisor Algorithms::\n* Powering Algorithms::\n* Root Extraction Algorithms::\n* Radix Conversion Algorithms::\n* Other Algorithms::\n* Assembler Coding::\n@end menu\n\n\n@node Multiplication Algorithms, Division Algorithms, Algorithms, Algorithms\n@section Multiplication\n@cindex Multiplication algorithms\n\nN@cross{}N limb multiplications and squares are done using one of six \nalgorithms, as the size N increases.\n\n@quotation\n@multitable {KaratsubaMMM} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}\n@item Algorithm  @tab Mul Threshold                   \n@item Basecase   @tab (none)                         \n@item Karatsuba  @tab @code{MUL_KARATSUBA_THRESHOLD}  \n@item Toom-3     @tab @code{MUL_TOOM3_THRESHOLD}      \n@item Toom-4     @tab @code{MUL_TOOM4_THRESHOLD}      \n@item Toom-8(.5) @tab @code{MUL_TOOM8H_THRESHOLD}     \n@item FFT        @tab @code{MUL_FFT_FULL_THRESHOLD}   \n@end multitable\n@end quotation\n\n@quotation\n@multitable {KaratsubaMMM} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}\n@item Algorithm  @tab Sqr Threshold\n@item Basecase   @tab (none)\n@item Karatsuba  @tab @code{SQR_KARATSUBA_THRESHOLD}\n@item Toom-3     @tab @code{SQR_TOOM3_THRESHOLD}\n@item Toom-4     @tab @code{SQR_TOOM4_THRESHOLD}\n@item Toom-8     @tab @code{SQR_TOOM8_THRESHOLD}\n@item FFT        @tab @code{SQR_FFT_FULL_THRESHOLD}\n@end multitable\n@end quotation\n\nN@cross{}M multiplications of operands with different sizes above\n@code{MUL_KARATSUBA_THRESHOLD} are done using unbalanced Toom algorithms or\nwith the FFT. See (@pxref{Unbalanced Multiplication}).\n\n@menu\n* Basecase Multiplication::\n* Karatsuba Multiplication::\n* Toom 3-Way Multiplication::\n* Toom 4-Way Multiplication::\n* FFT Multiplication::\n* Other Multiplication::\n* Unbalanced Multiplication::\n@end menu\n\n\n@node Basecase Multiplication, Karatsuba Multiplication, Multiplication Algorithms, Multiplication Algorithms\n@subsection Basecase Multiplication\n\nBasecase N@cross{}M multiplication is a straightforward rectangular set of\ncross-products, the same as long multiplication done by hand and for that\nreason sometimes known as the schoolbook or grammar school method.  This is an\n@m{O(NM),O(N*M)} algorithm.  See Knuth section 4.3.1 algorithm M\n(@pxref{References}), and the @file{mpn/generic/mul_basecase.c} code.\n\nAssembler implementations of @code{mpn_mul_basecase} are essentially the same\nas the generic C code, but have all the usual assembler tricks and\nobscurities introduced for speed.\n\nA square can be done in roughly half the time of a multiply, by using the fact\nthat the cross products above and below the diagonal are the same.  A triangle\nof products below the diagonal is formed, doubled (left shift by one bit), and\nthen the products on the diagonal added.  This can be seen in\n@file{mpn/generic/sqr_basecase.c}.  Again the assembler implementations take\nessentially the same approach.\n\n@tex\n\\def\\GMPline#1#2#3#4#5#6{%\n  \\hbox {%\n    \\vrule height 2.5ex depth 1ex\n           \\hbox to 2em {\\hfil{#2}\\hfil}%\n    \\vrule \\hbox to 2em {\\hfil{#3}\\hfil}%\n    \\vrule \\hbox to 2em {\\hfil{#4}\\hfil}%\n    \\vrule \\hbox to 2em {\\hfil{#5}\\hfil}%\n    \\vrule \\hbox to 2em {\\hfil{#6}\\hfil}%\n    \\vrule}}\n\\GMPdisplay{\n  \\hbox{%\n    \\vbox{%\n      \\hbox to 1.5em {\\vrule height 2.5ex depth 1ex width 0pt}%\n      \\hbox {\\vrule height 2.5ex depth 1ex width 0pt u0\\hfil}%\n      \\hbox {\\vrule height 2.5ex depth 1ex width 0pt u1\\hfil}%\n      \\hbox {\\vrule height 2.5ex depth 1ex width 0pt u2\\hfil}%\n      \\hbox {\\vrule height 2.5ex depth 1ex width 0pt u3\\hfil}%\n      \\hbox {\\vrule height 2.5ex depth 1ex width 0pt u4\\hfil}%\n      \\vfill}%\n    \\vbox{%\n      \\hbox{%\n        \\hbox to 2em {\\hfil u0\\hfil}%\n        \\hbox to 2em {\\hfil u1\\hfil}%\n        \\hbox to 2em {\\hfil u2\\hfil}%\n        \\hbox to 2em {\\hfil u3\\hfil}%\n        \\hbox to 2em {\\hfil u4\\hfil}}%\n      \\vskip 0.7ex\n      \\hrule\n      \\GMPline{u0}{d}{}{}{}{}%\n      \\hrule\n      \\GMPline{u1}{}{d}{}{}{}%\n      \\hrule\n      \\GMPline{u2}{}{}{d}{}{}%\n      \\hrule\n      \\GMPline{u3}{}{}{}{d}{}%\n      \\hrule\n      \\GMPline{u4}{}{}{}{}{d}%\n      \\hrule}}}\n@end tex\n@ifnottex\n@example\n@group\n     u0  u1  u2  u3  u4\n   +---+---+---+---+---+\nu0 | d |   |   |   |   |\n   +---+---+---+---+---+\nu1 |   | d |   |   |   |\n   +---+---+---+---+---+\nu2 |   |   | d |   |   |\n   +---+---+---+---+---+\nu3 |   |   |   | d |   |\n   +---+---+---+---+---+\nu4 |   |   |   |   | d |\n   +---+---+---+---+---+\n@end group\n@end example\n@end ifnottex\n\nIn practice squaring isn't a full 2@cross{} faster than multiplying, it's\nusually around 1.5@cross{}.  Less than 1.5@cross{} probably indicates\n@code{mpn_sqr_basecase} wants improving on that CPU.\n\nOn some CPUs @code{mpn_mul_basecase} can be faster than the generic C\n@code{mpn_sqr_basecase} on some small sizes.  @code{SQR_BASECASE_THRESHOLD} is\nthe size at which to use @code{mpn_sqr_basecase}, this will be zero if that\nroutine should be used always.\n\n\n@node Karatsuba Multiplication, Toom 3-Way Multiplication, Basecase Multiplication, Multiplication Algorithms\n@subsection Karatsuba Multiplication\n@cindex Karatsuba multiplication\n\nThe Karatsuba multiplication algorithm is described in Knuth section 4.3.3\npart A, and various other textbooks.  A brief description is given here.\n\nThe inputs @math{x} and @math{y} are treated as each split into two parts of\nequal length (or the most significant part one limb shorter if N is odd).\n\n@tex\n% GMPboxwidth used for all the multiplication pictures\n\\global\\newdimen\\GMPboxwidth \\global\\GMPboxwidth=5em\n% GMPboxdepth and GMPboxheight are also used for the float pictures\n\\global\\newdimen\\GMPboxdepth  \\global\\GMPboxdepth=1ex\n\\global\\newdimen\\GMPboxheight \\global\\GMPboxheight=2ex\n\\gdef\\GMPvrule{\\vrule height \\GMPboxheight depth \\GMPboxdepth}\n\\def\\GMPbox#1#2{%\n  \\vbox {%\n    \\hrule\n    \\hbox to 2\\GMPboxwidth{%\n      \\GMPvrule \\hfil $#1$\\hfil \\vrule \\hfil $#2$\\hfil \\vrule}%\n    \\hrule}}\n\\GMPdisplay{%\n\\vbox{%\n  \\hbox to 2\\GMPboxwidth {high \\hfil low}\n  \\vskip 0.7ex\n  \\GMPbox{x_1}{x_0}\n  \\vskip 0.5ex\n  \\GMPbox{y_1}{y_0}\n}}\n@end tex\n@ifnottex\n@example\n@group\n high              low\n+----------+----------+\n|    x1    |    x0    |\n+----------+----------+\n\n+----------+----------+\n|    y1    |    y0    |\n+----------+----------+\n@end group\n@end example\n@end ifnottex\n\nLet @math{b} be the power of 2 where the split occurs, ie.@: if @ms{x,0} is\n@math{k} limbs (@ms{y,0} the same) then\n@m{b=2\\GMPraise{$k*$@code{mp\\_bits\\_per\\_limb}}, b=2^(k*mp_bits_per_limb)}.\nWith that @m{x=x_1b+x_0,x=x1*b+x0} and @m{y=y_1b+y_0,y=y1*b+y0}, and the\nfollowing holds,\n\n@display\n@m{xy = (b^2+b)x_1y_1 - b(x_1-x_0)(y_1-y_0) + (b+1)x_0y_0,\n  x*y = (b^2+b)*x1*y1 - b*(x1-x0)*(y1-y0) + (b+1)*x0*y0}\n@end display\n\nThis formula means doing only three multiplies of (N/2)@cross{}(N/2) limbs,\nwhereas a basecase multiply of N@cross{}N limbs is equivalent to four\nmultiplies of (N/2)@cross{}(N/2).  The factors @math{(b^2+b)} etc represent\nthe positions where the three products must be added.\n\n@tex\n\\def\\GMPboxA#1#2{%\n  \\vbox{%\n    \\hrule\n    \\hbox{%\n      \\GMPvrule\n      \\hbox to 2\\GMPboxwidth {\\hfil\\hbox{$#1$}\\hfil}%\n      \\vrule\n      \\hbox to 2\\GMPboxwidth {\\hfil\\hbox{$#2$}\\hfil}%\n      \\vrule}\n    \\hrule}}\n\\def\\GMPboxB#1#2{%\n  \\hbox{%\n    \\raise \\GMPboxdepth \\hbox to \\GMPboxwidth {\\hfil #1\\hskip 0.5em}%\n    \\vbox{%\n      \\hrule\n      \\hbox{%\n        \\GMPvrule\n        \\hbox to 2\\GMPboxwidth {\\hfil\\hbox{$#2$}\\hfil}%\n        \\vrule}%\n      \\hrule}}}\n\\GMPdisplay{%\n\\vbox{%\n  \\hbox to 4\\GMPboxwidth {high \\hfil low}\n  \\vskip 0.7ex\n  \\GMPboxA{x_1y_1}{x_0y_0}\n  \\vskip 0.5ex\n  \\GMPboxB{$+$}{x_1y_1}\n  \\vskip 0.5ex\n  \\GMPboxB{$+$}{x_0y_0}\n  \\vskip 0.5ex\n  \\GMPboxB{$-$}{(x_1-x_0)(y_1-y_0)}\n}}\n@end tex\n@ifnottex\n@example\n@group\n high                              low\n+--------+--------+ +--------+--------+\n|      x1*y1      | |      x0*y0      |\n+--------+--------+ +--------+--------+\n          +--------+--------+\n      add |      x1*y1      |\n          +--------+--------+\n          +--------+--------+\n      add |      x0*y0      |\n          +--------+--------+\n          +--------+--------+\n      sub | (x1-x0)*(y1-y0) |\n          +--------+--------+\n@end group\n@end example\n@end ifnottex\n\nThe term @m{(x_1-x_0)(y_1-y_0),(x1-x0)*(y1-y0)} is best calculated as an\nabsolute value, and the sign used to choose to add or subtract.  Notice the\nsum @m{\\mathop{\\rm high}(x_0y_0)+\\mathop{\\rm low}(x_1y_1),\nhigh(x0*y0)+low(x1*y1)} occurs twice, so it's possible to do @m{5k,5*k} limb\nadditions, rather than @m{6k,6*k}, but in MPIR extra function call overheads\noutweigh the saving.\n\nSquaring is similar to multiplying, but with @math{x=y} the formula reduces to\nan equivalent with three squares,\n\n@display\n@m{x^2 = (b^2+b)x_1^2 - b(x_1-x_0)^2 + (b+1)x_0^2,\n   x^2 = (b^2+b)*x1^2 - b*(x1-x0)^2 + (b+1)*x0^2}\n@end display\n\nThe final result is accumulated from those three squares the same way as for\nthe three multiplies above.  The middle term @m{(x_1-x_0)^2,(x1-x0)^2} is now\nalways positive.\n\nA similar formula for both multiplying and squaring can be constructed with a\nmiddle term @m{(x_1+x_0)(y_1+y_0),(x1+x0)*(y1+y0)}.  But those sums can exceed\n@math{k} limbs, leading to more carry handling and additions than the form\nabove.\n\nKaratsuba multiplication is asymptotically an @math{O(N^@W{1.585})} algorithm,\nthe exponent being @m{\\log3/\\log2,log(3)/log(2)}, representing 3 multiplies\neach @math{1/2} the size of the inputs.  This is a big improvement over the\nbasecase multiply at @math{O(N^2)} and the advantage soon overcomes the extra\nadditions Karatsuba performs.  @code{MUL_KARATSUBA_THRESHOLD} can be as little\nas 10 limbs.  The @code{SQR} threshold is usually about twice the @code{MUL}.\n\nThe basecase algorithm will take a time of the form @m{M(N) = aN^2 + bN + c,\nM(N) = a*N^2 + b*N + c} and the Karatsuba algorithm @m{K(N) = 3M(N/2) + dN +\ne, K(N) = 3*M(N/2) + d*N + e}, which expands to @m{K(N) = {3\\over4} aN^2 +\n{3\\over2} bN + 3c + dN + e, K(N) = 3/4*a*N^2 + 3/2*b*N + 3*c + d*N + e}.  The\nfactor @m{3\\over4, 3/4} for @math{a} means per-crossproduct speedups in the\nbasecase code will increase the threshold since they benefit @math{M(N)} more\nthan @math{K(N)}.  And conversely the @m{3\\over2, 3/2} for @math{b} means\nlinear style speedups of @math{b} will increase the threshold since they\nbenefit @math{K(N)} more than @math{M(N)}.  The latter can be seen for\ninstance when adding an optimized @code{mpn_sqr_diagonal} to\n@code{mpn_sqr_basecase}.  Of course all speedups reduce total time, and in\nthat sense the algorithm thresholds are merely of academic interest.\n\n\n@node Toom 3-Way Multiplication, Toom 4-Way Multiplication, Karatsuba Multiplication, Multiplication Algorithms\n@subsection Toom 3-Way Multiplication\n@cindex Toom multiplication\n\nThe Karatsuba formula is the simplest case of a general approach to splitting\ninputs that leads to both Toom and FFT algorithms.  A description of\nToom can be found in Knuth section 4.3.3, with an example 3-way\ncalculation after Theorem A@.  The 3-way form used in MPIR is described here.\n\nThe operands are each considered split into 3 pieces of equal length (or the\nmost significant part 1 or 2 limbs shorter than the other two).\n\n@tex\n\\def\\GMPbox#1#2#3{%\n  \\vbox{%\n    \\hrule \\vfil\n    \\hbox to 3\\GMPboxwidth {%\n      \\GMPvrule\n      \\hfil$#1$\\hfil\n      \\vrule\n      \\hfil$#2$\\hfil\n      \\vrule\n      \\hfil$#3$\\hfil\n      \\vrule}%\n    \\vfil \\hrule\n}}\n\\GMPdisplay{%\n\\vbox{%\n  \\hbox to 3\\GMPboxwidth {high \\hfil low}\n  \\vskip 0.7ex\n  \\GMPbox{x_2}{x_1}{x_0}\n  \\vskip 0.5ex\n  \\GMPbox{y_2}{y_1}{y_0}\n  \\vskip 0.5ex\n}}\n@end tex\n@ifnottex\n@example\n@group\n high                         low\n+----------+----------+----------+\n|    x2    |    x1    |    x0    |\n+----------+----------+----------+\n\n+----------+----------+----------+\n|    y2    |    y1    |    y0    |\n+----------+----------+----------+\n@end group\n@end example\n@end ifnottex\n\n@noindent\nThese parts are treated as the coefficients of two polynomials\n\n@display\n@group\n@m{X(t) = x_2t^2 + x_1t + x_0,\n   X(t) = x2*t^2 + x1*t + x0}\n@m{Y(t) = y_2t^2 + y_1t + y_0,\n   Y(t) = y2*t^2 + y1*t + y0}\n@end group\n@end display\n\nLet @math{b} equal the power of 2 which is the size of the @ms{x,0}, @ms{x,1},\n@ms{y,0} and @ms{y,1} pieces, ie.@: if they're @math{k} limbs each then\n@m{b=2\\GMPraise{$k*$@code{mp\\_bits\\_per\\_limb}}, b=2^(k*mp_bits_per_limb)}.\nWith this @math{x=X(b)} and @math{y=Y(b)}.\n\nLet a polynomial @m{W(t)=X(t)Y(t),W(t)=X(t)*Y(t)} and suppose its coefficients\nare\n\n@display\n@m{W(t) = w_4t^4 + w_3t^3 + w_2t^2 + w_1t + w_0,\n   W(t) = w4*t^4 + w3*t^3 + w2*t^2 + w1*t + w0}\n@end display\n\nThe @m{w_i,w[i]} are going to be determined, and when they are they'll give\nthe final result using @math{w=W(b)}, since\n@m{xy=X(b)Y(b),x*y=X(b)*Y(b)=W(b)}.  The coefficients will be roughly\n@math{b^2} each, and the final @math{W(b)} will be an addition like,\n\n@tex\n\\def\\GMPbox#1#2{%\n  \\moveright #1\\GMPboxwidth\n  \\vbox{%\n    \\hrule\n    \\hbox{%\n      \\GMPvrule\n      \\hbox to 2\\GMPboxwidth {\\hfil$#2$\\hfil}%\n      \\vrule}%\n    \\hrule\n}}\n\\GMPdisplay{%\n\\vbox{%\n  \\hbox to 6\\GMPboxwidth {high \\hfil low}%\n  \\vskip 0.7ex\n  \\GMPbox{0}{w_4}\n  \\vskip 0.5ex\n  \\GMPbox{1}{w_3}\n  \\vskip 0.5ex\n  \\GMPbox{2}{w_2}\n  \\vskip 0.5ex\n  \\GMPbox{3}{w_1}\n  \\vskip 0.5ex\n  \\GMPbox{4}{w_1}\n}}\n@end tex\n@ifnottex\n@example\n@group\n high                                        low\n+-------+-------+\n|       w4      |\n+-------+-------+\n       +--------+-------+\n       |        w3      |\n       +--------+-------+\n               +--------+-------+\n               |        w2      |\n               +--------+-------+\n                       +--------+-------+\n                       |        w1      |\n                       +--------+-------+\n                                +-------+-------+\n                                |       w0      |\n                                +-------+-------+\n@end group\n@end example\n@end ifnottex\n\nThe @m{w_i,w[i]} coefficients could be formed by a simple set of cross\nproducts, like @m{w_4=x_2y_2,w4=x2*y2}, @m{w_3=x_2y_1+x_1y_2,w3=x2*y1+x1*y2},\n@m{w_2=x_2y_0+x_1y_1+x_0y_2,w2=x2*y0+x1*y1+x0*y2} etc, but this would need all\nnine @m{x_iy_j,x[i]*y[j]} for @math{i,j=0,1,2}, and would be equivalent merely\nto a basecase multiply.  Instead the following approach is used.\n\n@math{X(t)} and @math{Y(t)} are evaluated and multiplied at 5 points, giving\nvalues of @math{W(t)} at those points.  In MPIR the following points are used,\n\n@quotation\n@multitable {@m{t=\\infty,t=inf}M} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}\n@item Point                 @tab Value\n@item @math{t=0}            @tab @m{x_0y_0,x0 * y0}, which gives @ms{w,0} immediately\n@item @math{t=1}            @tab @m{(x_2+x_1+x_0)(y_2+y_1+y_0),(x2+x1+x0) * (y2+y1+y0)}\n@item @math{t=-1}           @tab @m{(x_2-x_1+x_0)(y_2-y_1+y_0),(x2-x1+x0) * (y2-y1+y0)}\n@item @math{t=2}            @tab @m{(4x_2+2x_1+x_0)(4y_2+2y_1+y_0),(4*x2+2*x1+x0) * (4*y2+2*y1+y0)}\n@item @m{t=\\infty,t=inf}    @tab @m{x_2y_2,x2 * y2}, which gives @ms{w,4} immediately\n@end multitable\n@end quotation\n\nAt @math{t=-1} the values can be negative and that's handled using the\nabsolute values and tracking the sign separately.  At @m{t=\\infty,t=inf} the\nvalue is actually @m{\\lim_{t\\to\\infty} {X(t)Y(t)\\over t^4}, X(t)*Y(t)/t^4 in\nthe limit as t approaches infinity}, but it's much easier to think of as\nsimply @m{x_2y_2,x2*y2} giving @ms{w,4} immediately (much like\n@m{x_0y_0,x0*y0} at @math{t=0} gives @ms{w,0} immediately).\n\nEach of the points substituted into\n@m{W(t)=w_4t^4+\\cdots+w_0,W(t)=w4*t^4+@dots{}+w0} gives a linear combination\nof the @m{w_i,w[i]} coefficients, and the value of those combinations has just\nbeen calculated.\n\n@tex\n\\GMPdisplay{%\n$\\matrix{%\nW(0)      & = &       &   &      &   &      &   &      &   & w_0 \\cr\nW(1)      & = &   w_4 & + &  w_3 & + &  w_2 & + &  w_1 & + & w_0 \\cr\nW(-1)     & = &   w_4 & - &  w_3 & + &  w_2 & - &  w_1 & + & w_0 \\cr\nW(2)      & = & 16w_4 & + & 8w_3 & + & 4w_2 & + & 2w_1 & + & w_0 \\cr\nW(\\infty) & = &   w_4 \\cr\n}$}\n@end tex\n@ifnottex\n@example\n@group\nW(0)   =                              w0\nW(1)   =    w4 +   w3 +   w2 +   w1 + w0\nW(-1)  =    w4 -   w3 +   w2 -   w1 + w0\nW(2)   = 16*w4 + 8*w3 + 4*w2 + 2*w1 + w0\nW(inf) =    w4\n@end group\n@end example\n@end ifnottex\n\nThis is a set of five equations in five unknowns, and some elementary linear\nalgebra quickly isolates each @m{w_i,w[i]}.  This involves adding or\nsubtracting one @math{W(t)} value from another, and a couple of divisions by\npowers of 2 and one division by 3, the latter using the special\n@code{mpn_divexact_by3} (@pxref{Exact Division}).\n\nThe conversion of @math{W(t)} values to the coefficients is interpolation.  A\npolynomial of degree 4 like @math{W(t)} is uniquely determined by values known\nat 5 different points.  The points are arbitrary and can be chosen to make the\nlinear equations come out with a convenient set of steps for quickly isolating\nthe @m{w_i,w[i]}.\n\nSquaring follows the same procedure as multiplication, but there's only one\n@math{X(t)} and it's evaluated at the 5 points, and those values squared to\ngive values of @math{W(t)}.  The interpolation is then identical, and in fact\nthe same @code{toom3_interpolate} subroutine is used for both squaring and\nmultiplying.\n\nToom-3 is asymptotically @math{O(N^@W{1.465})}, the exponent being\n@m{\\log5/\\log3,log(5)/log(3)}, representing 5 recursive multiplies of 1/3 the\noriginal size each.  This is an improvement over Karatsuba at\n@math{O(N^@W{1.585})}, though Toom does more work in the evaluation and\ninterpolation and so it only realizes its advantage above a certain size.\n\nNear the crossover between Toom-3 and Karatsuba there's generally a range of\nsizes where the difference between the two is small.\n@code{MUL_TOOM3_THRESHOLD} is a somewhat arbitrary point in that range and\nsuccessive runs of the tune program can give different values due to small\nvariations in measuring.  A graph of time versus size for the two shows the\neffect, see @file{tune/README}.\n\nAt the fairly small sizes where the Toom-3 thresholds occur it's worth\nremembering that the asymptotic behaviour for Karatsuba and Toom-3 can't be\nexpected to make accurate predictions, due of course to the big influence of\nall sorts of overheads, and the fact that only a few recursions of each are\nbeing performed.  Even at large sizes there's a good chance machine dependent\neffects like cache architecture will mean actual performance deviates from\nwhat might be predicted.\n\nThe formula given for the Karatsuba algorithm (@pxref{Karatsuba\nMultiplication}) has an equivalent for Toom-3 involving only five multiplies,\nbut this would be complicated and unenlightening.\n\nAn alternate view of Toom-3 can be found in Zuras (@pxref{References}), using\na vector to represent the @math{x} and @math{y} splits and a matrix\nmultiplication for the evaluation and interpolation stages.  The matrix\ninverses are not meant to be actually used, and they have elements with values\nmuch greater than in fact arise in the interpolation steps.  The diagram shown\nfor the 3-way is attractive, but again doesn't have to be implemented that way\nand for example with a bit of rearrangement just one division by 6 can be\ndone.\n\n@node Toom 4-Way Multiplication, FFT Multiplication, Toom 3-Way Multiplication, Multiplication Algorithms\n@subsection Toom 4-Way Multiplication\n@cindex Toom multiplication\n\nKaratsuba and Toom-3 split the operands into 2 and 3 coefficients,\nrespectively.  Toom-4 analogously splits the operands into 4 coefficients.\nUsing the notation from the section on Toom-3 multiplication, we form two\npolynomials:\n\n@display\n@group\n@m{X(t) = x_3t^3 + x_2t^2 + x_1t + x_0,\n   X(t) = x3*t^3 + x2*t^2 + x1*t + x0}\n@m{Y(t) = y_3t^3 + y_2t^2 + y_1t + y_0,\n   Y(t) = y3*t^3 + y2*t^2 + y1*t + y0}\n@end group\n@end display\n\n@math{X(t)} and @math{Y(t)} are evaluated and multiplied at 7 points, giving\nvalues of @math{W(t)} at those points.  In MPIR the following points are used,\n\n@quotation\n@multitable {@m{t=-1/2,t=inf}M} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}\n@item Point              @tab Value\n@item @math{t=0}         @tab @m{x_0y_0,x0 * y0}, which gives @ms{w,0} immediately\n@item @math{t=1/2}       @tab @m{(x_3+2x_2+4x_1+8x_0)(y_3+2y_2+4y_1+8y_0),(x3+2*x2+4*x1+8*x0) * (y3+2*y2+4*y1+8*y0)}\n@item @math{t=-1/2}      @tab @m{(-x_3+2x_2-4x_1+8x_0)(-y_3+2y_2-4y_1+8y_0),(-x3+2*x2-4*x1+8*x0) * (-y3+2*y2-4*y1+8*y0)}\n@item @math{t=1}         @tab @m{(x_3+x_2+x_1+x_0)(y_3+y_2+y_1+y_0),(x3+x2+x1+x0) * (y3+y2+y1+y0)}\n@item @math{t=-1}        @tab @m{(-x_3+x_2-x_1+x_0)(-y_3+y_2-y_1+y_0),(-x3+x2-x1+x0) * (-y3+y2-y1+y0)}\n@item @math{t=2}         @tab @m{(8x_3+4x_2+2x_1+x_0)(8y_3+4y_2+2y_1+y_0),(8*x3+4*x2+2*x1+x0) * (8*y3+4*y2+2*y1+y0)}\n@item @m{t=\\infty,t=inf} @tab @m{x_3y_3,x3 * y3}, which gives @ms{w,6} immediately\n@end multitable\n@end quotation\n\nThe number of additions and subtractions for Toom-4 is much larger than for Toom-3.\nBut several subexpressions occur multiple times, for example @m{x_2+x_0,x2+x0}, occurs\nfor both @math{t=1} and @math{t=-1}.\n\nToom-4 is asymptotically @math{O(N^@W{1.404})}, the exponent being\n@m{\\log7/\\log4,log(7)/log(4)}, representing 7 recursive multiplies of 1/4 the\noriginal size each.\n\n@node FFT Multiplication, Other Multiplication, Toom 4-Way Multiplication, Multiplication Algorithms\n@subsection FFT Multiplication\n@cindex FFT multiplication\n@cindex Fast Fourier Transform\n\nThis section is out-of-date and will be updated when the new FFT is added.\n\nAt large to very large sizes a Fermat style FFT multiplication is used,\nfollowing Sch@\"onhage and Strassen (@pxref{References}).  Descriptions of FFTs\nin various forms can be found in many textbooks, for instance Knuth section\n4.3.3 part C or Lipson chapter IX@.  A brief description of the form used in\nMPIR is given here.\n\nThe multiplication done is @m{xy \\bmod 2^N+1, x*y mod 2^N+1}, for a given\n@math{N}.  A full product @m{xy,x*y} is obtained by choosing @m{N \\ge\n\\mathop{\\rm bits}(x)+\\mathop{\\rm bits}(y), N>=bits(x)+bits(y)} and padding\n@math{x} and @math{y} with high zero limbs.  The modular product is the native\nform for the algorithm, so padding to get a full product is unavoidable.\n\nThe algorithm follows a split, evaluate, pointwise multiply, interpolate and\ncombine similar to that described above for Karatsuba and Toom-3.  A @math{k}\nparameter controls the split, with an FFT-@math{k} splitting into @math{2^k}\npieces of @math{M=N/2^k} bits each.  @math{N} must be a multiple of\n@m{2^k\\times@code{mp\\_bits\\_per\\_limb}, (2^k)*@nicode{mp_bits_per_limb}} so\nthe split falls on limb boundaries, avoiding bit shifts in the split and\ncombine stages.\n\nThe evaluations, pointwise multiplications, and interpolation, are all done\nmodulo @m{2^{N'}+1, 2^N'+1} where @math{N'} is @math{2M+k+3} rounded up to a\nmultiple of @math{2^k} and of @code{mp_bits_per_limb}.  The results of\ninterpolation will be the following negacyclic convolution of the input\npieces, and the choice of @math{N'} ensures these sums aren't truncated.\n@tex\n$$ w_n = \\sum_{{i+j = b2^k+n}\\atop{b=0,1}} (-1)^b x_i y_j $$\n@end tex\n@ifnottex\n\n@example\n           ---\n           \\         b\nw[n] =     /     (-1) * x[i] * y[j]\n           ---\n       i+j==b*2^k+n\n          b=0,1\n@end example\n\n@end ifnottex\nThe points used for the evaluation are @math{g^i} for @math{i=0} to\n@math{2^k-1} where @m{g=2^{2N'/2^k}, g=2^(2N'/2^k)}.  @math{g} is a\n@m{2^k,2^k'}th root of unity mod @m{2^{N'}+1,2^N'+1}, which produces necessary\ncancellations at the interpolation stage, and it's also a power of 2 so the\nfast fourier transforms used for the evaluation and interpolation do only\nshifts, adds and negations.\n\nThe pointwise multiplications are done modulo @m{2^{N'}+1, 2^N'+1} and either\nrecurse into a further FFT or use a plain multiplication (Toom-3, Karatsuba or\nbasecase), whichever is optimal at the size @math{N'}.  The interpolation is\nan inverse fast fourier transform.  The resulting set of sums of @m{x_iy_j,\nx[i]*y[j]} are added at appropriate offsets to give the final result.\n\nSquaring is the same, but @math{x} is the only input so it's one transform at\nthe evaluate stage and the pointwise multiplies are squares.  The\ninterpolation is the same.\n\nFor a mod @math{2^N+1} product, an FFT-@math{k} is an @m{O(N^{k/(k-1)}),\nO(N^(k/(k-1)))} algorithm, the exponent representing @math{2^k} recursed\nmodular multiplies each @m{1/2^{k-1},1/2^(k-1)} the size of the original.\nEach successive @math{k} is an asymptotic improvement, but overheads mean each\nis only faster at bigger and bigger sizes.  In the code, @code{MUL_FFT_TABLE}\nand @code{SQR_FFT_TABLE} are the thresholds where each @math{k} is used.  Each\nnew @math{k} effectively swaps some multiplying for some shifts, adds and\noverheads.\n\nA mod @math{2^N+1} product can be formed with a normal\n@math{N@cross{}N@rightarrow{}2N} bit multiply plus a subtraction, so an FFT\nand Toom-3 etc can be compared directly.  A @math{k=4} FFT at\n@math{O(N^@W{1.333})} can be expected to be the first faster than Toom-3 at\n@math{O(N^@W{1.465})}.  In practice this is what's found, with\n@code{MUL_FFT_MODF_THRESHOLD} and @code{SQR_FFT_MODF_THRESHOLD} being between\n300 and 1000 limbs, depending on the CPU@.  So far it's been found that only\nvery large FFTs recurse into pointwise multiplies above these sizes.\n\nWhen an FFT is to give a full product, the change of @math{N} to @math{2N}\ndoesn't alter the theoretical complexity for a given @math{k}, but for the\npurposes of considering where an FFT might be first used it can be assumed\nthat the FFT is recursing into a normal multiply and that on that basis it's\ndoing @math{2^k} recursed multiplies each @m{1/2^{k-2},1/2^(k-2)} the size of\nthe inputs, making it @m{O(N^{k/(k-2)}), O(N^(k/(k-2)))}.  This would mean\n@math{k=7} at @math{O(N^@W{1.4})} would be the first FFT faster than Toom-3.\nIn practice @code{MUL_FFT_FULL_THRESHOLD} and @code{SQR_FFT_FULL_THRESHOLD} \nhave been found to be in the @math{k=8} range, somewhere between 3000 and \n10000 limbs.\n\nThe way @math{N} is split into @math{2^k} pieces and then @math{2M+k+3} is\nrounded up to a multiple of @math{2^k} and @code{mp_bits_per_limb} means that\nwhen @math{2^k@ge{}@nicode{mp\\_bits\\_per\\_limb}} the effective @math{N} is a\nmultiple of @m{2^{2k-1},2^(2k-1)} bits.  The @math{+k+3} means some values of\n@math{N} just under such a multiple will be rounded to the next.  The\ncomplexity calculations above assume that a favourable size is used, meaning\none which isn't padded through rounding, and it's also assumed that the extra\n@math{+k+3} bits are negligible at typical FFT sizes.\n\nThe practical effect of the @m{2^{2k-1},2^(2k-1)} constraint is to introduce a\nstep-effect into measured speeds.  For example @math{k=8} will round @math{N}\nup to a multiple of 32768 bits, so for a 32-bit limb there'll be 512 limb\ngroups of sizes for which @code{mpn_mul_n} runs at the same speed.  Or for\n@math{k=9} groups of 2048 limbs, @math{k=10} groups of 8192 limbs, etc.  In\npractice it's been found each @math{k} is used at quite small multiples of its\nsize constraint and so the step effect is quite noticeable in a time versus\nsize graph.\n\nThe threshold determinations currently measure at the mid-points of size\nsteps, but this is sub-optimal since at the start of a new step it can happen\nthat it's better to go back to the previous @math{k} for a while.  Something\nmore sophisticated for @code{MUL_FFT_TABLE} and @code{SQR_FFT_TABLE} will be\nneeded.\n\n\n@node Other Multiplication, Unbalanced Multiplication, FFT Multiplication, Multiplication Algorithms\n@subsection Other Multiplication\n@cindex Toom multiplication\n\nThe Toom algorithms described above (@pxref{Toom 3-Way Multiplication}), \n@pxref{Toom 4-Way Multiplication}) generalize to split into an arbitrary \nnumber of pieces, as per Knuth section 4.3.3 algorithm C@. MPIR currently \nimplements Toom 8 routines. \n\nThese are generated automatically via a technique due to Bodrato \n(@pxref{References}) which mixes evaluation, pointwise multiplication and\ninterpolation phases. The routine used is called Toom 8.5. See Bodrato's\npaper.\n\nFor general Toom-n a split into @math{r+1} pieces is made, and evaluations and\npointwise multiplications done at @m{2r+1,2*r+1} points.  A 4-way split does 7\npointwise multiplies, 5-way does 9, etc.  Asymptotically an @math{(r+1)}-way\nalgorithm is @m{O(N^{log(2r+1)/log(r+1)}, O(N^(log(2*r+1)/log(r+1)))}.  Only\nthe pointwise multiplications count towards big-@math{O} complexity, but the\ntime spent in the evaluate and interpolate stages grows with @math{r} and has\na significant practical impact, with the asymptotic advantage of each @math{r}\nrealized only at bigger and bigger sizes.  The overheads grow as\n@m{O(Nr),O(N*r)}, whereas in an @math{r=2^k} FFT they grow only as @m{O(N \\log\nr), O(N*log(r))}.\n\nKnuth algorithm C evaluates at points 0,1,2,@dots{},@m{2r,2*r}, but exercise 4\nuses @math{-r},@dots{},0,@dots{},@math{r} and the latter saves some small\nmultiplies in the evaluate stage (or rather trades them for additions), and\nhas a further saving of nearly half the interpolate steps.  The idea is to\nseparate odd and even final coefficients and then perform algorithm C steps C7\nand C8 on them separately.  The divisors at step C7 become @math{j^2} and the\nmultipliers at C8 become @m{2tj-j^2,2*t*j-j^2}.\n\nSplitting odd and even parts through positive and negative points can be\nthought of as using @math{-1} as a square root of unity.  If a 4th root of\nunity was available then a further split and speedup would be possible, but no\nsuch root exists for plain integers.  Going to complex integers with\n@m{i=\\sqrt{-1}, i=sqrt(-1)} doesn't help, essentially because in cartesian\nform it takes three real multiplies to do a complex multiply.  The existence\nof @m{2^k,2^k'}th roots of unity in a suitable ring or field lets the fast\nfourier transform keep splitting and get to @m{O(N \\log r), O(N*log(r))}.\n\nFloating point FFTs use complex numbers approximating Nth roots of unity.\nSome processors have special support for such FFTs.  But these are not used in\nMPIR since it's very difficult to guarantee an exact result (to some number of\nbits).  An occasional difference of 1 in the last bit might not matter to a\ntypical signal processing algorithm, but is of course of vital importance to\nMPIR.\n\n@node Unbalanced Multiplication,  , Other Multiplication, Multiplication Algorithms\n@subsection Unbalanced Multiplication\n@cindex Unbalanced multiplication\n\nMultiplication of operands with different sizes, both below\n@code{MUL_KARATSUBA_THRESHOLD} are done with plain schoolbook multiplication\n(@pxref{Basecase Multiplication}).\n\nFor really large operands, we invoke the FFT directly.\n\nFor operands between these sizes, we use Toom inspired algorithms suggested by\nAlberto Zanoni and Marco Bodrato.  The idea is to split the operands into\npolynomials of different degree. These algorithms are denoted ToomMN where\nthe first input is broken into M components and the second operand is broken\ninto N components. MPIR currently implements Toom32, Toom33, Toom44, Toom53 and\nToom8h which deals with a variety of sizes where the product polynomial will\nhave length 15 or 16.\n\n@node Division Algorithms, Greatest Common Divisor Algorithms, Multiplication Algorithms, Algorithms\n@section Division Algorithms\n@cindex Division algorithms\n\n@menu\n* Single Limb Division::\n* Basecase Division::\n* Divide and Conquer Division::\n* Exact Division::\n* Exact Remainder::\n* Small Quotient Division::\n@end menu\n\n\n@node Single Limb Division, Basecase Division, Division Algorithms, Division Algorithms\n@subsection Single Limb Division\n\nN@cross{}1 division is implemented using repeated 2@cross{}1 divisions from\nhigh to low, either with a hardware divide instruction or a multiplication by\ninverse, whichever is best on a given CPU.\n\nThe multiply by inverse follows section 8 of ``Division by Invariant Integers\nusing Multiplication'' by Granlund and Montgomery (@pxref{References}) and is\nimplemented as @code{udiv_qrnnd_preinv} in @file{gmp-impl.h}.  The idea is to\nhave a fixed-point approximation to @math{1/d} (see @code{invert_limb}) and\nthen multiply by the high limb (plus one bit) of the dividend to get a\nquotient @math{q}.  With @math{d} normalized (high bit set), @math{q} is no\nmore than 1 too small.  Subtracting @m{qd,q*d} from the dividend gives a\nremainder, and reveals whether @math{q} or @math{q-1} is correct.\n\nThe result is a division done with two multiplications and four or five\narithmetic operations.  On CPUs with low latency multipliers this can be much\nfaster than a hardware divide, though the cost of calculating the inverse at\nthe start may mean it's only better on inputs bigger than say 4 or 5 limbs.\n\nWhen a divisor must be normalized, either for the generic C\n@code{__udiv_qrnnd_c} or the multiply by inverse, the division performed is\nactually @m{a2^k,a*2^k} by @m{d2^k,d*2^k} where @math{a} is the dividend and\n@math{k} is the power necessary to have the high bit of @m{d2^k,d*2^k} set.\nThe bit shifts for the dividend are usually accomplished ``on the fly''\nmeaning by extracting the appropriate bits at each step.  Done this way the\nquotient limbs come out aligned ready to store.  When only the remainder is\nwanted, an alternative is to take the dividend limbs unshifted and calculate\n@m{r = a \\bmod d2^k, r = a mod d*2^k} followed by an extra final step @m{r2^k\n\\bmod d2^k, r*2^k mod d*2^k}.  This can help on CPUs with poor bit shifts or\nfew registers.\n\nThe multiply by inverse can be done two limbs at a time.  The calculation is\nbasically the same, but the inverse is two limbs and the divisor treated as if\npadded with a low zero limb.  This means more work, since the inverse will\nneed a 2@cross{}2 multiply, but the four 1@cross{}1s to do that are\nindependent and can therefore be done partly or wholly in parallel.  Likewise\nfor a 2@cross{}1 calculating @m{qd,q*d}.  The net effect is to process two\nlimbs with roughly the same two multiplies worth of latency that one limb at a\ntime gives.  This extends to 3 or 4 limbs at a time, though the extra work to\napply the inverse will almost certainly soon reach the limits of multiplier\nthroughput.\n\nA similar approach in reverse can be taken to process just half a limb at a\ntime if the divisor is only a half limb.  In this case the 1@cross{}1 multiply\nfor the inverse effectively becomes two @m{{1\\over2}\\times1, (1/2)x1} for each\nlimb, which can be a saving on CPUs with a fast half limb multiply, or in fact\nif the only multiply is a half limb, and especially if it's not pipelined.\n\n\n@node Basecase Division, Divide and Conquer Division, Single Limb Division, Division Algorithms\n@subsection Basecase Division\n\nThis section is out-of-date.\n\nBasecase N@cross{}M division is like long division done by hand, but in base\n@m{2\\GMPraise{@code{mp\\_bits\\_per\\_limb}}, 2^mp_bits_per_limb}.  See Knuth\nsection 4.3.1 algorithm D.\n\nBriefly stated, while the dividend remains larger than the divisor, a high\nquotient limb is formed and the N@cross{}1 product @m{qd,q*d} subtracted at\nthe top end of the dividend.  With a normalized divisor (most significant bit\nset), each quotient limb can be formed with a 2@cross{}1 division and a\n1@cross{}1 multiplication plus some subtractions.  The 2@cross{}1 division is\nby the high limb of the divisor and is done either with a hardware divide or a\nmultiply by inverse (the same as in @ref{Single Limb Division}) whichever is\nfaster.  Such a quotient is sometimes one too big, requiring an addback of the\ndivisor, but that happens rarely.\n\nWith Q=N@minus{}M being the number of quotient limbs, this is an\n@m{O(QM),O(Q*M)} algorithm and will run at a speed similar to a basecase\nQ@cross{}M multiplication, differing in fact only in the extra multiply and\ndivide for each of the Q quotient limbs.\n\n\n@node Divide and Conquer Division, Exact Division, Basecase Division, Division Algorithms\n@subsection Divide and Conquer Division\n\nThis section is out-of-date\n\nFor divisors larger than @code{DIV_DC_THRESHOLD}, division is done by dividing.\nOr to be precise by a recursive divide and conquer algorithm based on work by\nMoenck and Borodin, Jebelean, and Burnikel and Ziegler (@pxref{References}).\n\nThe algorithm consists essentially of recognising that a 2N@cross{}N division\ncan be done with the basecase division algorithm (@pxref{Basecase Division}),\nbut using N/2 limbs as a base, not just a single limb.  This way the\nmultiplications that arise are (N/2)@cross{}(N/2) and can take advantage of\nKaratsuba and higher multiplication algorithms (@pxref{Multiplication\nAlgorithms}).  The two ``digits'' of the quotient are formed by recursive\nN@cross{}(N/2) divisions.\n\nIf the (N/2)@cross{}(N/2) multiplies are done with a basecase multiplication\nthen the work is about the same as a basecase division, but with more function\ncall overheads and with some subtractions separated from the multiplies.\nThese overheads mean that it's only when N/2 is above\n@code{MUL_KARATSUBA_THRESHOLD} that divide and conquer is of use.\n\n@code{DIV_DC_THRESHOLD} is based on the divisor size N, so it will be somewhere\nabove twice @code{MUL_KARATSUBA_THRESHOLD}, but how much above depends on the\nCPU@.  An optimized @code{mpn_mul_basecase} can lower @code{DIV_DC_THRESHOLD} a\nlittle by offering a ready-made advantage over repeated @code{mpn_submul_1}\ncalls.\n\nDivide and conquer is asymptotically @m{O(M(N)\\log N),O(M(N)*log(N))} where\n@math{M(N)} is the time for an N@cross{}N multiplication done with FFTs.  The\nactual time is a sum over multiplications of the recursed sizes, as can be\nseen near the end of section 2.2 of Burnikel and Ziegler.  For example, within\nthe Toom-3 range, divide and conquer is @m{2.63M(N), 2.63*M(N)}.  With higher\nalgorithms the @math{M(N)} term improves and the multiplier tends to @m{\\log\nN, log(N)}.  In practice, at moderate to large sizes, a 2N@cross{}N division\nis about 2 to 4 times slower than an N@cross{}N multiplication.\n\nNewton's method used for division is asymptotically @math{O(M(N))} and should\ntherefore be superior to divide and conquer, but it's believed this would only\nbe for large to very large N.\n\n\n@node Exact Division, Exact Remainder, Divide and Conquer Division, Division Algorithms\n@subsection Exact Division\n\nThis section is out-of-date\n\nA so-called exact division is when the dividend is known to be an exact\nmultiple of the divisor.  Jebelean's exact division algorithm uses this\nknowledge to make some significant optimizations (@pxref{References}).\n\nThe idea can be illustrated in decimal for example with 368154 divided by\n543.  Because the low digit of the dividend is 4, the low digit of the\nquotient must be 8.  This is arrived at from @m{4 \\mathord{\\times} 7 \\bmod 10,\n4*7 mod 10}, using the fact 7 is the modular inverse of 3 (the low digit of\nthe divisor), since @m{3 \\mathord{\\times} 7 \\mathop{\\equiv} 1 \\bmod 10, 3*7\n@equiv{} 1 mod 10}.  So @m{8\\mathord{\\times}543 = 4344,8*543=4344} can be\nsubtracted from the dividend leaving 363810.  Notice the low digit has become\nzero.\n\nThe procedure is repeated at the second digit, with the next quotient digit 7\n(@m{1 \\mathord{\\times} 7 \\bmod 10, 7 @equiv{} 1*7 mod 10}), subtracting\n@m{7\\mathord{\\times}543 = 3801,7*543=3801}, leaving 325800.  And finally at\nthe third digit with quotient digit 6 (@m{8 \\mathord{\\times} 7 \\bmod 10, 8*7\nmod 10}), subtracting @m{6\\mathord{\\times}543 = 3258,6*543=3258} leaving 0.\nSo the quotient is 678.\n\nNotice however that the multiplies and subtractions don't need to extend past\nthe low three digits of the dividend, since that's enough to determine the\nthree quotient digits.  For the last quotient digit no subtraction is needed\nat all.  On a 2N@cross{}N division like this one, only about half the work of\na normal basecase division is necessary.\n\nFor an N@cross{}M exact division producing Q=N@minus{}M quotient limbs, the\nsaving over a normal basecase division is in two parts.  Firstly, each of the\nQ quotient limbs needs only one multiply, not a 2@cross{}1 divide and\nmultiply.  Secondly, the crossproducts are reduced when @math{Q>M} to\n@m{QM-M(M+1)/2,Q*M-M*(M+1)/2}, or when @math{Q@le{}M} to @m{Q(Q-1)/2,\nQ*(Q-1)/2}.  Notice the savings are complementary.  If Q is big then many\ndivisions are saved, or if Q is small then the crossproducts reduce to a small\nnumber.\n\nThe modular inverse used is calculated efficiently by @code{modlimb_invert} in\n@file{gmp-impl.h}.  This does four multiplies for a 32-bit limb, or six for a\n64-bit limb.  @file{tune/modlinv.c} has some alternate implementations that\nmight suit processors better at bit twiddling than multiplying.\n\nThe sub-quadratic exact division described by Jebelean in ``Exact Division\nwith Karatsuba Complexity'' is not currently implemented.  It uses a\nrearrangement similar to the divide and conquer for normal division\n(@pxref{Divide and Conquer Division}), but operating from low to high.  A\nfurther possibility not currently implemented is ``Bidirectional Exact Integer\nDivision'' by Krandick and Jebelean which forms quotient limbs from both the\nhigh and low ends of the dividend, and can halve once more the number of\ncrossproducts needed in a 2N@cross{}N division.\n\nA special case exact division by 3 exists in @code{mpn_divexact_by3},\nsupporting Toom-3 multiplication and @code{mpq} canonicalizations.  It forms\nquotient digits with a multiply by the modular inverse of 3 (which is\n@code{0xAA..AAB}) and uses two comparisons to determine a borrow for the next\nlimb.  The multiplications don't need to be on the dependent chain, as long as\nthe effect of the borrows is applied, which can help chips with pipelined\nmultipliers.\n\n\n@node Exact Remainder, Small Quotient Division, Exact Division, Division Algorithms\n@subsection Exact Remainder\n@cindex Exact remainder\n\nIf the exact division algorithm is done with a full subtraction at each stage\nand the dividend isn't a multiple of the divisor, then low zero limbs are\nproduced but with a remainder in the high limbs.  For dividend @math{a},\ndivisor @math{d}, quotient @math{q}, and @m{b = 2\n\\GMPraise{@code{mp\\_bits\\_per\\_limb}}, b = 2^mp_bits_per_limb}, this remainder\n@math{r} is of the form\n@tex\n$$ a = qd + r b^n $$\n@end tex\n@ifnottex\n\n@example\na = q*d + r*b^n\n@end example\n\n@end ifnottex\n@math{n} represents the number of zero limbs produced by the subtractions,\nthat being the number of limbs produced for @math{q}.  @math{r} will be in the\nrange @math{0@le{}r<d} and can be viewed as a remainder, but one shifted up by\na factor of @math{b^n}.\n\nCarrying out full subtractions at each stage means the same number of cross\nproducts must be done as a normal division, but there's still some single limb\ndivisions saved.  When @math{d} is a single limb some simplifications arise,\nproviding good speedups on a number of processors.\n\n@code{mpn_bdivmod}, @code{mpn_divexact_by3}, @code{mpn_modexact_1_odd} and the\n@code{redc} function in @code{mpz_powm} differ subtly in how they return\n@math{r}, leading to some negations in the above formula, but all are\nessentially the same.\n\n@cindex Divisibility algorithm\n@cindex Congruence algorithm\nClearly @math{r} is zero when @math{a} is a multiple of @math{d}, and this\nleads to divisibility or congruence tests which are potentially more efficient\nthan a normal division.\n\nThe factor of @math{b^n} on @math{r} can be ignored in a GCD when @math{d} is\nodd, hence the use of @code{mpn_bdivmod} in @code{mpn_gcd}, and the use of\n@code{mpn_modexact_1_odd} by @code{mpn_gcd_1} and @code{mpz_kronecker_ui} etc\n(@pxref{Greatest Common Divisor Algorithms}).\n\nMontgomery's REDC method for modular multiplications uses operands of the form\nof @m{xb^{-n}, x*b^-n} and @m{yb^{-n}, y*b^-n} and on calculating @m{(xb^{-n})\n(yb^{-n}), (x*b^-n)*(y*b^-n)} uses the factor of @math{b^n} in the exact\nremainder to reach a product in the same form @m{(xy)b^{-n}, (x*y)*b^-n}\n(@pxref{Modular Powering Algorithm}).\n\nNotice that @math{r} generally gives no useful information about the ordinary\nremainder @math{a @bmod d} since @math{b^n @bmod d} could be anything.  If\nhowever @math{b^n @equiv{} 1 @bmod d}, then @math{r} is the negative of the\nordinary remainder.  This occurs whenever @math{d} is a factor of\n@math{b^n-1}, as for example with 3 in @code{mpn_divexact_by3}.  For a 32 or\n64 bit limb other such factors include 5, 17 and 257, but no particular use\nhas been found for this.\n\n\n@node Small Quotient Division,  , Exact Remainder, Division Algorithms\n@subsection Small Quotient Division\n\nAn N@cross{}M division where the number of quotient limbs Q=N@minus{}M is\nsmall can be optimized somewhat.\n\nAn ordinary basecase division normalizes the divisor by shifting it to make\nthe high bit set, shifting the dividend accordingly, and shifting the\nremainder back down at the end of the calculation.  This is wasteful if only a\nfew quotient limbs are to be formed.  Instead a division of just the top\n@m{\\rm2Q,2*Q} limbs of the dividend by the top Q limbs of the divisor can be\nused to form a trial quotient.  This requires only those limbs normalized, not\nthe whole of the divisor and dividend.\n\nA multiply and subtract then applies the trial quotient to the M@minus{}Q\nunused limbs of the divisor and N@minus{}Q dividend limbs (which includes Q\nlimbs remaining from the trial quotient division).  The starting trial\nquotient can be 1 or 2 too big, but all cases of 2 too big and most cases of 1\ntoo big are detected by first comparing the most significant limbs that will\narise from the subtraction.  An addback is done if the quotient still turns\nout to be 1 too big.\n\nThis whole procedure is essentially the same as one step of the basecase\nalgorithm done in a Q limb base, though with the trial quotient test done only\nwith the high limbs, not an entire Q limb ``digit'' product.  The correctness\nof this weaker test can be established by following the argument of Knuth\nsection 4.3.1 exercise 20 but with the @m{v_2 \\GMPhat q > b \\GMPhat r\n+ u_2, v2*q>b*r+u2} condition appropriately relaxed.\n\n\n@need 1000\n@node Greatest Common Divisor Algorithms, Powering Algorithms, Division Algorithms, Algorithms\n@section Greatest Common Divisor\n@cindex Greatest common divisor algorithms\n@cindex GCD algorithms\n\n@menu\n* Binary GCD::\n* Lehmer's GCD::\n* Subquadratic GCD::\n* Extended GCD::\n* Jacobi Symbol::\n@end menu\n\n\n@node Binary GCD, Lehmer's GCD, Greatest Common Divisor Algorithms, Greatest Common Divisor Algorithms\n@subsection Binary GCD\n\nAt small sizes MPIR uses an @math{O(N^2)} binary style GCD@.  This is described\nin many textbooks, for example Knuth section 4.5.2 algorithm B@.  It simply\nconsists of successively reducing odd operands @math{a} and @math{b} using\n\n@quotation\n@math{a,b = @abs{}(a-b),@min{}(a,b)} @*\nstrip factors of 2 from @math{a}\n@end quotation\n\nThe Euclidean GCD algorithm, as per Knuth algorithms E and A, reduces using\n@math{a @bmod b} but this has so far been found to be slower everywhere.  One\nreason the binary method does well is that the implied quotient at each step\nis usually small, so often only one or two subtractions are needed to get the\nsame effect as a division.  Quotients 1, 2 and 3 for example occur 67.7% of\nthe time, see Knuth section 4.5.3 Theorem E.\n\nWhen the implied quotient is large, meaning @math{b} is much smaller than\n@math{a}, then a division is worthwhile.  This is the basis for the initial\n@math{a @bmod b} reductions in @code{mpn_gcd} and @code{mpn_gcd_1} (the latter\nfor both N@cross{}1 and 1@cross{}1 cases).  But after that initial reduction,\nbig quotients occur too rarely to make it worth checking for them.\n\n@sp 1\nThe final @math{1@cross{}1} GCD in @code{mpn_gcd_1} is done in the generic C\ncode as described above.  For two N-bit operands, the algorithm takes about\n0.68 iterations per bit.  For optimum performance some attention needs to be\npaid to the way the factors of 2 are stripped from @math{a}.\n\nFirstly it may be noted that in twos complement the number of low zero bits on\n@math{a-b} is the same as @math{b-a}, so counting or testing can begin on\n@math{a-b} without waiting for @math{@abs{}(a-b)} to be determined.\n\nA loop stripping low zero bits tends not to branch predict well, since the\ncondition is data dependent.  But on average there's only a few low zeros, so\nan option is to strip one or two bits arithmetically then loop for more (as\ndone for AMD K6).  Or use a lookup table to get a count for several bits then\nloop for more (as done for AMD K7).  An alternative approach is to keep just\none of @math{a} or @math{b} odd and iterate\n\n@quotation\n@math{a,b = @abs{}(a-b), @min{}(a,b)} @*\n@math{a = a/2} if even @*\n@math{b = b/2} if even\n@end quotation\n\nThis requires about 1.25 iterations per bit, but stripping of a single bit at\neach step avoids any branching.  Repeating the bit strip reduces to about 0.9\niterations per bit, which may be a worthwhile tradeoff.\n\nGenerally with the above approaches a speed of perhaps 6 cycles per bit can be\nachieved, which is still not terribly fast with for instance a 64-bit GCD\ntaking nearly 400 cycles.  It's this sort of time which means it's not usually\nadvantageous to combine a set of divisibility tests into a GCD.\n\n\n@node Lehmer's GCD, Subquadratic GCD, Binary GCD, Greatest Common Divisor Algorithms\n@subsection Lehmer's GCD\n\nLehmer's improvement of the Euclidean algorithms is based on the observation\nthat the initial part of the quotient sequence depends only on the most\nsignificant parts of the inputs. The variant of Lehmer's algorithm used in MPIR \nsplits off the most significant two limbs, as suggested, e.g., in ``A\nDouble-Digit Lehmer-Euclid Algorithm'' by Jebelean (@pxref{References}). The\nquotients of two double-limb inputs are collected as a 2 by 2 matrix with\nsingle-limb elements. This is done by the function @code{mpn_hgcd2}. The\nresulting matrix is applied to the inputs using @code{mpn_mul_1} and\n@code{mpn_submul_1}. Each iteration usually reduces the inputs by almost one\nlimb. In the rare case of a large quotient, no progress can be made by\nexamining just the most significant two limbs, and the quotient is computing\nusing plain division.\n\nThe resulting algorithm is asymptotically @math{O(N^2)}, just as the Euclidean\nalgorithm and the binary algorithm. The quadratic part of the work are\nthe calls to @code{mpn_mul_1} and @code{mpn_submul_1}. For small sizes, the\nlinear work is also significant. There are roughly @math{N} calls to the\n@code{mpn_hgcd2} function. This function uses a couple of important\noptimizations:\n\n@itemize\n@item\nIt uses the same relaxed notion of correctness as @code{mpn_hgcd} (see next\nsection). This means that when called with the most significant two limbs of\ntwo large numbers, the returned matrix does not always correspond exactly to\nthe initial quotient sequence for the two large numbers; the final quotient\nmay sometimes be one off.\n\n@item\nIt takes advantage of the fact the quotients are usually small. The division\noperator is not used, since the corresponding assembler instruction is very\nslow on most architectures. (This code could probably be improved further, it\nuses many branches that are unfriendly to prediction).\n\n@item\nIt switches from double-limb calculations to single-limb calculations half-way\nthrough, when the input numbers have been reduced in size from two limbs to\none and a half.\n\n@end itemize\n\n@node Subquadratic GCD, Extended GCD, Lehmer's GCD, Greatest Common Divisor Algorithms\n@subsection Subquadratic GCD\n\nFor inputs larger than @code{GCD_DC_THRESHOLD}, GCD is computed via the HGCD\n(Half GCD) function, as a generalization to Lehmer's algorithm.\n\nLet the inputs @math{a,b} be of size @math{N} limbs each. Put @m{S=\\lfloor N/2\n\\rfloor + 1, S = floor(N/2) + 1}. Then HGCD(a,b) returns a transformation\nmatrix @math{T} with non-negative elements, and reduced numbers @math{(c;d) =\nT^{-1} (a;b)}. The reduced numbers @math{c,d} must be larger than @math{S}\nlimbs, while their difference @math{abs(c-d)} must fit in @math{S} limbs. The\nmatrix elements will also be of size roughly @math{N/2}.\n\nThe HGCD base case uses Lehmer's algorithm, but with the above stop condition\nthat returns reduced numbers and the corresponding transformation matrix\nhalf-way through. For inputs larger than @code{HGCD_THRESHOLD}, HGCD is\ncomputed recursively, using the divide and conquer algorithm in ``On\nSch@\"onhage's algorithm and subquadratic integer GCD computation'' by M@\"oller\n(@pxref{References}). The recursive algorithm consists of these main\nsteps.\n\n@itemize\n\n@item\nCall HGCD recursively, on the most significant @math{N/2} limbs. Apply the\nresulting matrix @math{T_1} to the full numbers, reducing them to a size just\nabove @math{3N/2}.\n\n@item\nPerform a small number of division or subtraction steps to reduce the numbers\nto size below @math{3N/2}. This is essential mainly for the unlikely case of\nlarge quotients.\n\n@item\nCall HGCD recursively, on the most significant @math{N/2} limbs of the reduced\nnumbers. Apply the resulting matrix @math{T_2} to the full numbers, reducing\nthem to a size just above @math{N/2}.\n\n@item\nCompute @math{T = T_1 T_2}.\n\n@item\nPerform a small number of division and subtraction steps to satisfy the\nrequirements, and return.\n@end itemize\n\nGCD is then implemented as a loop around HGCD, similarly to Lehmer's\nalgorithm. Where Lehmer repeatedly chops off the top two limbs, calls\n@code{mpn_hgcd2}, and applies the resulting matrix to the full numbers, the\nsubquadratic GCD chops off the most significant third of the limbs (the\nproportion is a tuning parameter, and @math{1/3} seems to be more efficient\nthan, e.g, @math{1/2}), calls @code{mpn_hgcd}, and applies the resulting\nmatrix. Once the input numbers are reduced to size below\n@code{GCD_DC_THRESHOLD}, Lehmer's algorithm is used for the rest of the work.\n\nThe asymptotic running time of both HGCD and GCD is @m{O(M(N)\\log N),O(M(N)*log(N))},\nwhere @math{M(N)} is the time for multiplying two @math{N}-limb numbers.\n\n@node Extended GCD, Jacobi Symbol, Subquadratic GCD, Greatest Common Divisor Algorithms\n@subsection Extended GCD\n\nThe extended GCD function, or gcdext, calculates @math{@gcd{}(a,b)} and also\none of the cofactors @math{x} and @math{y} satisfying @m{ax+by=\\gcd(a@C{}b),\na*x+b*y=gcd(a@C{}b)}. The algorithms used for plain GCD are extended to\nhandle this case. \n\nLehmer's algorithm is used for sizes up to @code{GCDEXT_DC_THRESHOLD}. Above\nthis threshold, GCDEXT is implemented as a loop around HGCD, but with more\nbook-keeping to keep track of the cofactors. \n\n@node Jacobi Symbol,  , Extended GCD, Greatest Common Divisor Algorithms\n@subsection Jacobi Symbol\n@cindex Jacobi symbol algorithm\n\n@code{mpz_jacobi} and @code{mpz_kronecker} are currently implemented with a\nsimple binary algorithm similar to that described for the GCDs (@pxref{Binary\nGCD}).  They're not very fast when both inputs are large.  Lehmer's multi-step\nimprovement or a binary based multi-step algorithm is likely to be better.\n\nWhen one operand fits a single limb, and that includes @code{mpz_kronecker_ui}\nand friends, an initial reduction is done with either @code{mpn_mod_1} or\n@code{mpn_modexact_1_odd}, followed by the binary algorithm on a single limb.\nThe binary algorithm is well suited to a single limb, and the whole\ncalculation in this case is quite efficient.\n\nIn all the routines sign changes for the result are accumulated using some bit\ntwiddling, avoiding table lookups or conditional jumps.\n\n\n@need 1000\n@node Powering Algorithms, Root Extraction Algorithms, Greatest Common Divisor Algorithms, Algorithms\n@section Powering Algorithms\n@cindex Powering algorithms\n\n@menu\n* Normal Powering Algorithm::\n* Modular Powering Algorithm::\n@end menu\n\n\n@node Normal Powering Algorithm, Modular Powering Algorithm, Powering Algorithms, Powering Algorithms\n@subsection Normal Powering\n\nNormal @code{mpz} or @code{mpf} powering uses a simple binary algorithm,\nsuccessively squaring and then multiplying by the base when a 1 bit is seen in\nthe exponent, as per Knuth section 4.6.3.  The ``left to right''\nvariant described there is used rather than algorithm A, since it's just as\neasy and can be done with somewhat less temporary memory.\n\n\n@node Modular Powering Algorithm,  , Normal Powering Algorithm, Powering Algorithms\n@subsection Modular Powering\n\nModular powering is implemented using a @math{2^k}-ary sliding window\nalgorithm, as per ``Handbook of Applied Cryptography'' algorithm 14.85\n(@pxref{References}).  @math{k} is chosen according to the size of the\nexponent.  Larger exponents use larger values of @math{k}, the choice being\nmade to minimize the average number of multiplications that must supplement\nthe squaring.\n\nThe modular multiplies and squares use either a simple division or the REDC\nmethod by Montgomery (@pxref{References}).  REDC is a little faster,\nessentially saving N single limb divisions in a fashion similar to an exact\nremainder (@pxref{Exact Remainder}).  The current REDC has some limitations.\nIt's only @math{O(N^2)} so above @code{POWM_THRESHOLD} division becomes faster\nand is used.  It doesn't attempt to detect small bases, but rather always uses\na REDC form, which is usually a full size operand.  And lastly it's only\napplied to odd moduli.\n\n\n@node Root Extraction Algorithms, Radix Conversion Algorithms, Powering Algorithms, Algorithms\n@section Root Extraction Algorithms\n@cindex Root extraction algorithms\n\n@menu\n* Square Root Algorithm::\n* Nth Root Algorithm::\n* Perfect Square Algorithm::\n* Perfect Power Algorithm::\n@end menu\n\n\n@node Square Root Algorithm, Nth Root Algorithm, Root Extraction Algorithms, Root Extraction Algorithms\n@subsection Square Root\n@cindex Square root algorithm\n@cindex Karatsuba square root algorithm\n\nSquare roots are taken using the ``Karatsuba Square Root'' algorithm by Paul\nZimmermann (@pxref{References}).\n\nAn input @math{n} is split into four parts of @math{k} bits each, so with\n@math{b=2^k} we have @m{n = a_3b^3 + a_2b^2 + a_1b + a_0, n = a3*b^3 + a2*b^2\n+ a1*b + a0}.  Part @ms{a,3} must be ``normalized'' so that either the high or\nsecond highest bit is set.  In MPIR, @math{k} is kept on a limb boundary and\nthe input is left shifted (by an even number of bits) to normalize.\n\nThe square root of the high two parts is taken, by recursive application of\nthe algorithm (bottoming out in a one-limb Newton's method),\n@tex\n$$ s',r' = \\mathop{\\rm sqrtrem} \\> (a_3b + a_2) $$\n@end tex\n@ifnottex\n\n@example\ns1,r1 = sqrtrem (a3*b + a2)\n@end example\n\n@end ifnottex\nThis is an approximation to the desired root and is extended by a division to\ngive @math{s},@math{r},\n@tex\n$$\\eqalign{\nq,u &= \\mathop{\\rm divrem} \\> (r'b + a_1, 2s') \\cr\ns &= s'b + q \\cr\nr &= ub + a_0 - q^2\n}$$\n@end tex\n@ifnottex\n\n@example\nq,u = divrem (r1*b + a1, 2*s1)\ns = s1*b + q\nr = u*b + a0 - q^2\n@end example\n\n@end ifnottex\nThe normalization requirement on @ms{a,3} means at this point @math{s} is\neither correct or 1 too big.  @math{r} is negative in the latter case, so\n@tex\n$$\\eqalign{\n\\mathop{\\rm if} \\; r &< 0 \\; \\mathop{\\rm then} \\cr\nr &\\leftarrow r + 2s - 1 \\cr\ns &\\leftarrow s - 1\n}$$\n@end tex\n@ifnottex\n\n@example\nif r < 0 then\n  r = r + 2*s - 1\n  s = s - 1\n@end example\n\n@end ifnottex\nThe algorithm is expressed in a divide and conquer form, but as noted in the\npaper it can also be viewed as a discrete variant of Newton's method, or as a\nvariation on the schoolboy method (no longer taught) for square roots two\ndigits at a time.\n\nIf the remainder @math{r} is not required then usually only a few high limbs\nof @math{r} and @math{u} need to be calculated to determine whether an\nadjustment to @math{s} is required.  This optimization is not currently\nimplemented.\n\nIn the Karatsuba multiplication range this algorithm is @m{O({3\\over2}\nM(N/2)),O(1.5*M(N/2))}, where @math{M(n)} is the time to multiply two numbers\nof @math{n} limbs.  In the FFT multiplication range this grows to a bound of\n@m{O(6 M(N/2)),O(6*M(N/2))}.  In practice a factor of about 1.5 to 1.8 is\nfound in the Karatsuba and Toom-3 ranges, growing to 2 or 3 in the FFT range.\n\nThe algorithm does all its calculations in integers and the resulting\n@code{mpn_sqrtrem} is used for both @code{mpz_sqrt} and @code{mpf_sqrt}.\nThe extended precision given by @code{mpf_sqrt_ui} is obtained by\npadding with zero limbs.\n\n\n@node Nth Root Algorithm, Perfect Square Algorithm, Square Root Algorithm, Root Extraction Algorithms\n@subsection Nth Root\n@cindex Root extraction algorithm\n@cindex Nth root algorithm\n\nInteger Nth roots are taken using Newton's method with the following\niteration, where @math{A} is the input and @math{n} is the root to be taken.\n@tex\n$$a_{i+1} = {1\\over n} \\left({A \\over a_i^{n-1}} + (n-1)a_i \\right)$$\n@end tex\n@ifnottex\n\n@example\n         1         A\na[i+1] = - * ( --------- + (n-1)*a[i] )\n         n     a[i]^(n-1)\n@end example\n\n@end ifnottex\nThe initial approximation @m{a_1,a[1]} is generated bitwise by successively\npowering a trial root with or without new 1 bits, aiming to be just above the\ntrue root.  The iteration converges quadratically when started from a good\napproximation.  When @math{n} is large more initial bits are needed to get\ngood convergence.  The current implementation is not particularly well\noptimized.\n\n\n@node Perfect Square Algorithm, Perfect Power Algorithm, Nth Root Algorithm, Root Extraction Algorithms\n@subsection Perfect Square\n@cindex Perfect square algorithm\n\nA significant fraction of non-squares can be quickly identified by checking\nwhether the input is a quadratic residue modulo small integers.\n\n@code{mpz_perfect_square_p} first tests the input mod 256, which means just\nexamining the low byte.  Only 44 different values occur for squares mod 256,\nso 82.8% of inputs can be immediately identified as non-squares.\n\nOn a 32-bit system similar tests are done mod 9, 5, 7, 13 and 17, for a total\n99.25% of inputs identified as non-squares.  On a 64-bit system 97 is tested\ntoo, for a total 99.62%.\n\nThese moduli are chosen because they're factors of @math{2^@W{24}-1} (or\n@math{2^@W{48}-1} for 64-bits), and such a remainder can be quickly taken just\nusing additions (see @code{mpn_mod_34lsub1}).\n\nWhen nails are in use moduli are instead selected by the @file{gen-psqr.c}\nprogram and applied with an @code{mpn_mod_1}.  The same @math{2^@W{24}-1} or\n@math{2^@W{48}-1} could be done with nails using some extra bit shifts, but\nthis is not currently implemented.\n\nIn any case each modulus is applied to the @code{mpn_mod_34lsub1} or\n@code{mpn_mod_1} remainder and a table lookup identifies non-squares.  By\nusing a ``modexact'' style calculation, and suitably permuted tables, just one\nmultiply each is required, see the code for details.  Moduli are also combined\nto save operations, so long as the lookup tables don't become too big.\n@file{gen-psqr.c} does all the pre-calculations.\n\nA square root must still be taken for any value that passes these tests, to\nverify it's really a square and not one of the small fraction of non-squares\nthat get through (ie.@: a pseudo-square to all the tested bases).\n\nClearly more residue tests could be done, @code{mpz_perfect_square_p} only\nuses a compact and efficient set.  Big inputs would probably benefit from more\nresidue testing, small inputs might be better off with less.  The assumed\ndistribution of squares versus non-squares in the input would affect such\nconsiderations.\n\n\n@node Perfect Power Algorithm,  , Perfect Square Algorithm, Root Extraction Algorithms\n@subsection Perfect Power\n@cindex Perfect power algorithm\n\nDetecting perfect powers is required by some factorization algorithms.\nCurrently @code{mpz_perfect_power_p} is implemented using repeated Nth root\nextractions, though naturally only prime roots need to be considered.\n(@xref{Nth Root Algorithm}.)\n\nIf a prime divisor @math{p} with multiplicity @math{e} can be found, then only\nroots which are divisors of @math{e} need to be considered, much reducing the\nwork necessary.  To this end divisibility by a set of small primes is checked.\n\n\n@node Radix Conversion Algorithms, Other Algorithms, Root Extraction Algorithms, Algorithms\n@section Radix Conversion\n@cindex Radix conversion algorithms\n\nRadix conversions are less important than other algorithms.  A program\ndominated by conversions should probably use a different data representation.\n\n@menu\n* Binary to Radix::\n* Radix to Binary::\n@end menu\n\n\n@node Binary to Radix, Radix to Binary, Radix Conversion Algorithms, Radix Conversion Algorithms\n@subsection Binary to Radix\n\nConversions from binary to a power-of-2 radix use a simple and fast\n@math{O(N)} bit extraction algorithm.\n\nConversions from binary to other radices use one of two algorithms.  Sizes\nbelow @code{GET_STR_PRECOMPUTE_THRESHOLD} use a basic @math{O(N^2)} method.\nRepeated divisions by @math{b^n} are made, where @math{b} is the radix and\n@math{n} is the biggest power that fits in a limb.  But instead of simply\nusing the remainder @math{r} from such divisions, an extra divide step is done\nto give a fractional limb representing @math{r/b^n}.  The digits of @math{r}\ncan then be extracted using multiplications by @math{b} rather than divisions.\nSpecial case code is provided for decimal, allowing multiplications by 10 to\noptimize to shifts and adds.\n\nAbove @code{GET_STR_PRECOMPUTE_THRESHOLD} a sub-quadratic algorithm is used.\nFor an input @math{t}, powers @m{b^{n2^i},b^(n*2^i)} of the radix are\ncalculated, until a power between @math{t} and @m{\\sqrt{t},sqrt(t)} is\nreached.  @math{t} is then divided by that largest power, giving a quotient\nwhich is the digits above that power, and a remainder which is those below.\nThese two parts are in turn divided by the second highest power, and so on\nrecursively.  When a piece has been divided down to less than\n@code{GET_STR_DC_THRESHOLD} limbs, the basecase algorithm described above is\nused.\n\nThe advantage of this algorithm is that big divisions can make use of the\nsub-quadratic divide and conquer division (@pxref{Divide and Conquer\nDivision}), and big divisions tend to have less overheads than lots of\nseparate single limb divisions anyway.  But in any case the cost of\ncalculating the powers @m{b^{n2^i},b^(n*2^i)} must first be overcome.\n\n@code{GET_STR_PRECOMPUTE_THRESHOLD} and @code{GET_STR_DC_THRESHOLD} represent\nthe same basic thing, the point where it becomes worth doing a big division to\ncut the input in half.  @code{GET_STR_PRECOMPUTE_THRESHOLD} includes the cost\nof calculating the radix power required, whereas @code{GET_STR_DC_THRESHOLD}\nassumes that's already available, which is the case when recursing.\n\nSince the base case produces digits from least to most significant but they\nwant to be stored from most to least, it's necessary to calculate in advance\nhow many digits there will be, or at least be sure not to underestimate that.\nFor MPIR the number of input bits is multiplied by @code{chars_per_bit_exactly}\nfrom @code{mp_bases}, rounding up.  The result is either correct or one too\nbig.\n\nExamining some of the high bits of the input could increase the chance of\ngetting the exact number of digits, but an exact result every time would not\nbe practical, since in general the difference between numbers 100@dots{} and\n99@dots{} is only in the last few bits and the work to identify 99@dots{}\nmight well be almost as much as a full conversion.\n\n@code{mpf_get_str} doesn't currently use the algorithm described here, it\nmultiplies or divides by a power of @math{b} to move the radix point to the\njust above the highest non-zero digit (or at worst one above that location),\nthen multiplies by @math{b^n} to bring out digits.  This is @math{O(N^2)} and\nis certainly not optimal.\n\nThe @math{r/b^n} scheme described above for using multiplications to bring out\ndigits might be useful for more than a single limb.  Some brief experiments\nwith it on the base case when recursing didn't give a noticeable improvement,\nbut perhaps that was only due to the implementation.  Something similar would\nwork for the sub-quadratic divisions too, though there would be the cost of\ncalculating a bigger radix power.\n\nAnother possible improvement for the sub-quadratic part would be to arrange\nfor radix powers that balanced the sizes of quotient and remainder produced,\nie.@: the highest power would be an @m{b^{nk},b^(n*k)} approximately equal to\n@m{\\sqrt{t},sqrt(t)}, not restricted to a @math{2^i} factor.  That ought to\nsmooth out a graph of times against sizes, but may or may not be a net\nspeedup.\n\n\n@node Radix to Binary,  , Binary to Radix, Radix Conversion Algorithms\n@subsection Radix to Binary\n\nThis section is out-of-date.\n\nConversions from a power-of-2 radix into binary use a simple and fast\n@math{O(N)} bitwise concatenation algorithm.\n\nConversions from other radices use one of two algorithms.  Sizes below\n@code{SET_STR_THRESHOLD} use a basic @math{O(N^2)} method.  Groups of @math{n}\ndigits are converted to limbs, where @math{n} is the biggest power of the base\n@math{b} which will fit in a limb, then those groups are accumulated into the\nresult by multiplying by @math{b^n} and adding.  This saves multi-precision\noperations, as per Knuth section 4.4 part E (@pxref{References}).  Some\nspecial case code is provided for decimal, giving the compiler a chance to\noptimize multiplications by 10.\n\nAbove @code{SET_STR_THRESHOLD} a sub-quadratic algorithm is used.  First\ngroups of @math{n} digits are converted into limbs.  Then adjacent limbs are\ncombined into limb pairs with @m{xb^n+y,x*b^n+y}, where @math{x} and @math{y}\nare the limbs.  Adjacent limb pairs are combined into quads similarly with\n@m{xb^{2n}+y,x*b^(2n)+y}.  This continues until a single block remains, that\nbeing the result.\n\nThe advantage of this method is that the multiplications for each @math{x} are\nbig blocks, allowing Karatsuba and higher algorithms to be used.  But the cost\nof calculating the powers @m{b^{n2^i},b^(n*2^i)} must be overcome.\n@code{SET_STR_THRESHOLD} usually ends up quite big, around 5000 digits, and on\nsome processors much bigger still.\n\n@code{SET_STR_THRESHOLD} is based on the input digits (and tuned for decimal),\nthough it might be better based on a limb count, so as to be independent of\nthe base.  But that sort of count isn't used by the base case and so would\nneed some sort of initial calculation or estimate.\n\nThe main reason @code{SET_STR_THRESHOLD} is so much bigger than the\ncorresponding @code{GET_STR_PRECOMPUTE_THRESHOLD} is that @code{mpn_mul_1} is\nmuch faster than @code{mpn_divrem_1} (often by a factor of 10, or more).\n\n\n@need 1000\n@node Other Algorithms, Assembler Coding, Radix Conversion Algorithms, Algorithms\n@section Other Algorithms\n\n@menu\n* Prime Testing Algorithm::\n* Factorial Algorithm::\n* Binomial Coefficients Algorithm::\n* Fibonacci Numbers Algorithm::\n* Lucas Numbers Algorithm::\n* Random Number Algorithms::\n@end menu\n\n\n@node Prime Testing Algorithm, Factorial Algorithm, Other Algorithms, Other Algorithms\n@subsection Prime Testing\n@cindex Prime testing algorithms\n\nThis section is somewhat out-of-date.\n\nThe primality testing in @code{mpz_probab_prime_p} (@pxref{Number Theoretic\nFunctions}) first does some trial division by small factors and then uses the\nMiller-Rabin probabilistic primality testing algorithm, as described in Knuth\nsection 4.5.4 algorithm P (@pxref{References}).\n\nFor an odd input @math{n}, and with @math{n = q@GMPmultiply{}2^k+1} where\n@math{q} is odd, this algorithm selects a random base @math{x} and tests\nwhether @math{x^q @bmod{} n} is 1 or @math{-1}, or an @m{x^{q2^j} \\bmod n,\nx^(q*2^j) mod n} is @math{1}, for @math{1@le{}j@le{}k}.  If so then @math{n}\nis probably prime, if not then @math{n} is definitely composite.\n\nAny prime @math{n} will pass the test, but some composites do too.  Such\ncomposites are known as strong pseudoprimes to base @math{x}.  No @math{n} is\na strong pseudoprime to more than @math{1/4} of all bases (see Knuth exercise\n22), hence with @math{x} chosen at random there's no more than a @math{1/4}\nchance a ``probable prime'' will in fact be composite.\n\nIn fact strong pseudoprimes are quite rare, making the test much more\npowerful than this analysis would suggest, but @math{1/4} is all that's proven\nfor an arbitrary @math{n}.\n\n\n@node Factorial Algorithm, Binomial Coefficients Algorithm, Prime Testing Algorithm, Other Algorithms\n@subsection Factorial\n@cindex Factorial algorithm\n\nThis section is out-of-date.\n\nFactorials are calculated by a combination of removal of twos, powering, and\nbinary splitting.  The procedure can be best illustrated with an example,\n\n@quotation\n@math{23! = 1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.18.19.20.21.22.23}\n@end quotation\n\n@noindent\nhas factors of two removed,\n\n@quotation\n@math{23! = 2^{19}.1.1.3.1.5.3.7.1.9.5.11.3.13.7.15.1.17.9.19.5.21.11.23}\n@end quotation\n\n@noindent\nand the resulting terms collected up according to their multiplicity,\n\n@quotation\n@math{23! = 2^{19}.(3.5)^3.(7.9.11)^2.(13.15.17.19.21.23)}\n@end quotation\n\nEach sequence such as @math{13.15.17.19.21.23} is evaluated by splitting into\nevery second term, as for instance @math{(13.17.21).(15.19.23)}, and the same\nrecursively on each half.  This is implemented iteratively using some bit\ntwiddling.\n\nSuch splitting is more efficient than repeated N@cross{}1 multiplies since it\nforms big multiplies, allowing Karatsuba and higher algorithms to be used.\nAnd even below the Karatsuba threshold a big block of work can be more\nefficient for the basecase algorithm.\n\nSplitting into subsequences of every second term keeps the resulting products\nmore nearly equal in size than would the simpler approach of say taking the\nfirst half and second half of the sequence.  Nearly equal products are more\nefficient for the current multiply implementation.\n\n\n@node Binomial Coefficients Algorithm, Fibonacci Numbers Algorithm, Factorial Algorithm, Other Algorithms\n@subsection Binomial Coefficients\n@cindex Binomial coefficient algorithm\n\nBinomial coefficients @m{\\left({n}\\atop{k}\\right), C(n@C{}k)} are calculated\nby first arranging @math{k @le{} n/2} using @m{\\left({n}\\atop{k}\\right) =\n\\left({n}\\atop{n-k}\\right), C(n@C{}k) = C(n@C{}n-k)} if necessary, and then\nevaluating the following product simply from @math{i=2} to @math{i=k}.\n@tex\n$$ \\left({n}\\atop{k}\\right) = (n-k+1) \\prod_{i=2}^{k} {{n-k+i} \\over i} $$\n@end tex\n@ifnottex\n\n@example\n                      k  (n-k+i)\nC(n,k) =  (n-k+1) * prod -------\n                     i=2    i\n@end example\n\n@end ifnottex\nIt's easy to show that each denominator @math{i} will divide the product so\nfar, so the exact division algorithm is used (@pxref{Exact Division}).\n\nThe numerators @math{n-k+i} and denominators @math{i} are first accumulated\ninto as many fit a limb, to save multi-precision operations, though for\n@code{mpz_bin_ui} this applies only to the divisors, since @math{n} is an\n@code{mpz_t} and @math{n-k+i} in general won't fit in a limb at all.\n\n\n@node Fibonacci Numbers Algorithm, Lucas Numbers Algorithm, Binomial Coefficients Algorithm, Other Algorithms\n@subsection Fibonacci Numbers\n@cindex Fibonacci number algorithm\n\nThe Fibonacci functions @code{mpz_fib_ui} and @code{mpz_fib2_ui} are designed\nfor calculating isolated @m{F_n,F[n]} or @m{F_n,F[n]},@m{F_{n-1},F[n-1]}\nvalues efficiently.\n\nFor small @math{n}, a table of single limb values in @code{__gmp_fib_table} is\nused.  On a 32-bit limb this goes up to @m{F_{47},F[47]}, or on a 64-bit limb\nup to @m{F_{93},F[93]}.  For convenience the table starts at @m{F_{-1},F[-1]}.\n\nBeyond the table, values are generated with a binary powering algorithm,\ncalculating a pair @m{F_n,F[n]} and @m{F_{n-1},F[n-1]} working from high to\nlow across the bits of @math{n}.  The formulas used are\n@tex\n$$\\eqalign{\n  F_{2k+1} &= 4F_k^2 - F_{k-1}^2 + 2(-1)^k \\cr\n  F_{2k-1} &=  F_k^2 + F_{k-1}^2           \\cr\n  F_{2k}   &= F_{2k+1} - F_{2k-1}\n}$$\n@end tex\n@ifnottex\n\n@example\nF[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k\nF[2k-1] =   F[k]^2 + F[k-1]^2\n\nF[2k] = F[2k+1] - F[2k-1]\n@end example\n\n@end ifnottex\nAt each step, @math{k} is the high @math{b} bits of @math{n}.  If the next bit\nof @math{n} is 0 then @m{F_{2k},F[2k]},@m{F_{2k-1},F[2k-1]} is used, or if\nit's a 1 then @m{F_{2k+1},F[2k+1]},@m{F_{2k},F[2k]} is used, and the process\nrepeated until all bits of @math{n} are incorporated.  Notice these formulas\nrequire just two squares per bit of @math{n}.\n\nIt'd be possible to handle the first few @math{n} above the single limb table\nwith simple additions, using the defining Fibonacci recurrence @m{F_{k+1} =\nF_k + F_{k-1}, F[k+1]=F[k]+F[k-1]}, but this is not done since it usually\nturns out to be faster for only about 10 or 20 values of @math{n}, and\nincluding a block of code for just those doesn't seem worthwhile.  If they\nreally mattered it'd be better to extend the data table.\n\nUsing a table avoids lots of calculations on small numbers, and makes small\n@math{n} go fast.  A bigger table would make more small @math{n} go fast, it's\njust a question of balancing size against desired speed.  For MPIR the code is\nkept compact, with the emphasis primarily on a good powering algorithm.\n\n@code{mpz_fib2_ui} returns both @m{F_n,F[n]} and @m{F_{n-1},F[n-1]}, but\n@code{mpz_fib_ui} is only interested in @m{F_n,F[n]}.  In this case the last\nstep of the algorithm can become one multiply instead of two squares.  One of\nthe following two formulas is used, according as @math{n} is odd or even.\n@tex\n$$\\eqalign{\n  F_{2k}   &= F_k (F_k + 2F_{k-1}) \\cr\n  F_{2k+1} &= (2F_k + F_{k-1}) (2F_k - F_{k-1}) + 2(-1)^k\n}$$\n@end tex\n@ifnottex\n\n@example\nF[2k]   = F[k]*(F[k]+2F[k-1])\n\nF[2k+1] = (2F[k]+F[k-1])*(2F[k]-F[k-1]) + 2*(-1)^k\n@end example\n\n@end ifnottex\n@m{F_{2k+1},F[2k+1]} here is the same as above, just rearranged to be a\nmultiply.  For interest, the @m{2(-1)^k, 2*(-1)^k} term both here and above\ncan be applied just to the low limb of the calculation, without a carry or\nborrow into further limbs, which saves some code size.  See comments with\n@code{mpz_fib_ui} and the internal @code{mpn_fib2_ui} for how this is done.\n\n\n@node Lucas Numbers Algorithm, Random Number Algorithms, Fibonacci Numbers Algorithm, Other Algorithms\n@subsection Lucas Numbers\n@cindex Lucas number algorithm\n\n@code{mpz_lucnum2_ui} derives a pair of Lucas numbers from a pair of Fibonacci\nnumbers with the following simple formulas.\n@tex\n$$\\eqalign{\n  L_k     &=  F_k + 2F_{k-1} \\cr\n  L_{k-1} &= 2F_k -  F_{k-1}\n}$$\n@end tex\n@ifnottex\n\n@example\nL[k]   =   F[k] + 2*F[k-1]\nL[k-1] = 2*F[k] -   F[k-1]\n@end example\n\n@end ifnottex\n@code{mpz_lucnum_ui} is only interested in @m{L_n,L[n]}, and some work can be\nsaved.  Trailing zero bits on @math{n} can be handled with a single square\neach.\n@tex\n$$ L_{2k} = L_k^2 - 2(-1)^k $$\n@end tex\n@ifnottex\n\n@example\nL[2k] = L[k]^2 - 2*(-1)^k\n@end example\n\n@end ifnottex\nAnd the lowest 1 bit can be handled with one multiply of a pair of Fibonacci\nnumbers, similar to what @code{mpz_fib_ui} does.\n@tex\n$$ L_{2k+1} = 5F_{k-1} (2F_k + F_{k-1}) - 4(-1)^k $$\n@end tex\n@ifnottex\n\n@example\nL[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k\n@end example\n\n@end ifnottex\n\n\n@node Random Number Algorithms,  , Lucas Numbers Algorithm, Other Algorithms\n@subsection Random Numbers\n@cindex Random number algorithms\n\nFor the @code{urandomb} functions, random numbers are generated simply by\nconcatenating bits produced by the generator.  As long as the generator has\ngood randomness properties this will produce well-distributed @math{N} bit\nnumbers.\n\nFor the @code{urandomm} functions, random numbers in a range @math{0@le{}R<N}\nare generated by taking values @math{R} of @m{\\lceil \\log_2 N \\rceil,\nceil(log2(N))} bits each until one satisfies @math{R<N}.  This will normally\nrequire only one or two attempts, but the attempts are limited in case the\ngenerator is somehow degenerate and produces only 1 bits or similar.\n\n@cindex Mersenne twister algorithm\nThe Mersenne Twister generator is by Matsumoto and Nishimura\n(@pxref{References}).  It has a non-repeating period of @math{2^@W{19937}-1},\nwhich is a Mersenne prime, hence the name of the generator.  The state is 624\nwords of 32-bits each, which is iterated with one XOR and shift for each\n32-bit word generated, making the algorithm very fast.  Randomness properties\nare also very good and this is the default algorithm used by MPIR.\n\n@cindex Linear congruential algorithm\nLinear congruential generators are described in many text books, for instance\nKnuth volume 2 (@pxref{References}).  With a modulus @math{M} and parameters\n@math{A} and @math{C}, a integer state @math{S} is iterated by the formula\n@math{S @leftarrow{} A@GMPmultiply{}S+C @bmod{} M}.  At each step the new\nstate is a linear function of the previous, mod @math{M}, hence the name of\nthe generator.\n\nIn MPIR only moduli of the form @math{2^N} are supported, and the current\nimplementation is not as well optimized as it could be.  Overheads are\nsignificant when @math{N} is small, and when @math{N} is large clearly the\nmultiply at each step will become slow.  This is not a big concern, since the\nMersenne Twister generator is better in every respect and is therefore\nrecommended for all normal applications.\n\nFor both generators the current state can be deduced by observing enough\noutput and applying some linear algebra (over GF(2) in the case of the\nMersenne Twister).  This generally means raw output is unsuitable for\ncryptographic applications without further hashing or the like.\n\n\n@node Assembler Coding,  , Other Algorithms, Algorithms\n@section Assembler Coding\n@cindex Assembler coding\n\nThe assembler subroutines in MPIR are the most significant source of speed at\nsmall to moderate sizes.  At larger sizes algorithm selection becomes more\nimportant, but of course speedups in low level routines will still speed up\neverything proportionally.\n\nCarry handling and widening multiplies that are important for MPIR can't be\neasily expressed in C@.  GCC @code{asm} blocks help a lot and are provided in\n@file{longlong.h}, but hand coding low level routines invariably offers a\nspeedup over generic C by a factor of anything from 2 to 10.\n\n@menu\n* Assembler Code Organisation::\n* Assembler Basics::\n* Assembler Carry Propagation::\n* Assembler Cache Handling::\n* Assembler Functional Units::\n* Assembler Floating Point::\n* Assembler SIMD Instructions::\n* Assembler Software Pipelining::\n* Assembler Loop Unrolling::\n* Assembler Writing Guide::\n@end menu\n\n\n@node Assembler Code Organisation, Assembler Basics, Assembler Coding, Assembler Coding\n@subsection Code Organisation\n@cindex Assembler code organisation\n@cindex Code organisation\n\nThe various @file{mpn} subdirectories contain machine-dependent code, written\nin C or assembler.  The @file{mpn/generic} subdirectory contains default code,\nused when there's no machine-specific version of a particular file.\n\nEach @file{mpn} subdirectory is for an ISA family.  Generally 32-bit and\n64-bit variants in a family cannot share code and have separate directories.\nWithin a family further subdirectories may exist for CPU variants.\n\nIn each directory a @file{nails} subdirectory may exist, holding code with\nnails support for that CPU variant.  A @code{NAILS_SUPPORT} directive in each\nfile indicates the nails values the code handles.  Nails code only exists\nwhere it's faster, or promises to be faster, than plain code.  There's no\neffort put into nails if they're not going to enhance a given CPU.\n\n\n@node Assembler Basics, Assembler Carry Propagation, Assembler Code Organisation, Assembler Coding\n@subsection Assembler Basics\n\n@code{mpn_addmul_1} and @code{mpn_submul_1} are the most important routines\nfor overall MPIR performance.  All multiplications and divisions come down to\nrepeated calls to these.  @code{mpn_add_n}, @code{mpn_sub_n},\n@code{mpn_lshift} and @code{mpn_rshift} are next most important.\n\nOn some CPUs assembler versions of the internal functions\n@code{mpn_mul_basecase} and @code{mpn_sqr_basecase} give significant speedups,\nmainly through avoiding function call overheads.  They can also potentially\nmake better use of a wide superscalar processor, as can bigger primitives like\n@code{mpn_addmul_2} or @code{mpn_addmul_4}.\n\nThe restrictions on overlaps between sources and destinations\n(@pxref{Low-level Functions}) are designed to facilitate a variety of\nimplementations.  For example, knowing @code{mpn_add_n} won't have partly\noverlapping sources and destination means reading can be done far ahead of\nwriting on superscalar processors, and loops can be vectorized on a vector\nprocessor, depending on the carry handling.\n\n\n@node Assembler Carry Propagation, Assembler Cache Handling, Assembler Basics, Assembler Coding\n@subsection Carry Propagation\n@cindex Assembler carry propagation\n\nThe problem that presents most challenges in MPIR is propagating carries from\none limb to the next.  In functions like @code{mpn_addmul_1} and\n@code{mpn_add_n}, carries are the only dependencies between limb operations.\n\nOn processors with carry flags, a straightforward CISC style @code{adc} is\ngenerally best.  AMD K6 @code{mpn_addmul_1} however is an example of an\nunusual set of circumstances where a branch works out better.\n\nOn RISC processors generally an add and compare for overflow is used.  This\nsort of thing can be seen in @file{mpn/generic/aors_n.c}.  Some carry\npropagation schemes require 4 instructions, meaning at least 4 cycles per\nlimb, but other schemes may use just 1 or 2.  On wide superscalar processors\nperformance may be completely determined by the number of dependent\ninstructions between carry-in and carry-out for each limb.\n\nOn vector processors good use can be made of the fact that a carry bit only\nvery rarely propagates more than one limb.  When adding a single bit to a\nlimb, there's only a carry out if that limb was @code{0xFF@dots{}FF} which on\nrandom data will be only 1 in @m{2\\GMPraise{@code{mp\\_bits\\_per\\_limb}},\n2^mp_bits_per_limb}.  @file{mpn/cray/add_n.c} is an example of this, it adds\nall limbs in parallel, adds one set of carry bits in parallel and then only\nrarely needs to fall through to a loop propagating further carries.\n\nOn the x86s, GCC (as of version 2.95.2) doesn't generate particularly good code\nfor the RISC style idioms that are necessary to handle carry bits in\nC@.  Often conditional jumps are generated where @code{adc} or @code{sbb} forms\nwould be better.  And so unfortunately almost any loop involving carry bits\nneeds to be coded in assembler for best results.\n\n\n@node Assembler Cache Handling, Assembler Functional Units, Assembler Carry Propagation, Assembler Coding\n@subsection Cache Handling\n@cindex Assembler cache handling\n\nMPIR aims to perform well both on operands that fit entirely in L1 cache and\nthose which don't.\n\nBasic routines like @code{mpn_add_n} or @code{mpn_lshift} are often used on\nlarge operands, so L2 and main memory performance is important for them.\n@code{mpn_mul_1} and @code{mpn_addmul_1} are mostly used for multiply and\nsquare basecases, so L1 performance matters most for them, unless assembler\nversions of @code{mpn_mul_basecase} and @code{mpn_sqr_basecase} exist, in\nwhich case the remaining uses are mostly for larger operands.\n\nFor L2 or main memory operands, memory access times will almost certainly be\nmore than the calculation time.  The aim therefore is to maximize memory\nthroughput, by starting a load of the next cache line while processing the\ncontents of the previous one.  Clearly this is only possible if the chip has a\nlock-up free cache or some sort of prefetch instruction.  Most current chips\nhave both these features.\n\nPrefetching sources combines well with loop unrolling, since a prefetch can be\ninitiated once per unrolled loop (or more than once if the loop covers more\nthan one cache line).\n\nOn CPUs without write-allocate caches, prefetching destinations will ensure\nindividual stores don't go further down the cache hierarchy, limiting\nbandwidth.  Of course for calculations which are slow anyway, like\n@code{mpn_divrem_1}, write-throughs might be fine.\n\nThe distance ahead to prefetch will be determined by memory latency versus\nthroughput.  The aim of course is to have data arriving continuously, at peak\nthroughput.  Some CPUs have limits on the number of fetches or prefetches in\nprogress.\n\nIf a special prefetch instruction doesn't exist then a plain load can be used,\nbut in that case care must be taken not to attempt to read past the end of an\noperand, since that might produce a segmentation violation.\n\nSome CPUs or systems have hardware that detects sequential memory accesses and\ninitiates suitable cache movements automatically, making life easy.\n\n\n@node Assembler Functional Units, Assembler Floating Point, Assembler Cache Handling, Assembler Coding\n@subsection Functional Units\n\nWhen choosing an approach for an assembler loop, consideration is given to\nwhat operations can execute simultaneously and what throughput can thereby be\nachieved.  In some cases an algorithm can be tweaked to accommodate available\nresources.\n\nLoop control will generally require a counter and pointer updates, costing as\nmuch as 5 instructions, plus any delays a branch introduces.  CPU addressing\nmodes might reduce pointer updates, perhaps by allowing just one updating\npointer and others expressed as offsets from it, or on CISC chips with all\naddressing done with the loop counter as a scaled index.\n\nThe final loop control cost can be amortised by processing several limbs in\neach iteration (@pxref{Assembler Loop Unrolling}).  This at least ensures loop\ncontrol isn't a big fraction the work done.\n\nMemory throughput is always a limit.  If perhaps only one load or one store\ncan be done per cycle then 3 cycles/limb will the top speed for ``binary''\noperations like @code{mpn_add_n}, and any code achieving that is optimal.\n\nInteger resources can be freed up by having the loop counter in a float\nregister, or by pressing the float units into use for some multiplying,\nperhaps doing every second limb on the float side (@pxref{Assembler Floating\nPoint}).\n\nFloat resources can be freed up by doing carry propagation on the integer\nside, or even by doing integer to float conversions in integers using bit\ntwiddling.\n\n\n@node Assembler Floating Point, Assembler SIMD Instructions, Assembler Functional Units, Assembler Coding\n@subsection Floating Point\n@cindex Assembler floating Point\n\nFloating point arithmetic is used in MPIR for multiplications on CPUs with poor\ninteger multipliers.  It's mostly useful for @code{mpn_mul_1},\n@code{mpn_addmul_1} and @code{mpn_submul_1} on 64-bit machines, and\n@code{mpn_mul_basecase} on both 32-bit and 64-bit machines.\n\nWith IEEE 53-bit double precision floats, integer multiplications producing up\nto 53 bits will give exact results.  Breaking a 64@cross{}64 multiplication\ninto eight 16@cross{}@math{32@rightarrow{}48} bit pieces is convenient.  With\nsome care though six 21@cross{}@math{32@rightarrow{}53} bit products can be\nused, if one of the lower two 21-bit pieces also uses the sign bit.\n\nFor the @code{mpn_mul_1} family of functions on a 64-bit machine, the\ninvariant single limb is split at the start, into 3 or 4 pieces.  Inside the\nloop, the bignum operand is split into 32-bit pieces.  Fast conversion of\nthese unsigned 32-bit pieces to floating point is highly machine-dependent.\nIn some cases, reading the data into the integer unit, zero-extending to\n64-bits, then transferring to the floating point unit back via memory is the\nonly option.\n\nConverting partial products back to 64-bit limbs is usually best done as a\nsigned conversion.  Since all values are smaller than @m{2^{53},2^53}, signed\nand unsigned are the same, but most processors lack unsigned conversions.\n\n@sp 2\n\nHere is a diagram showing 16@cross{}32 bit products for an @code{mpn_mul_1} or\n@code{mpn_addmul_1} with a 64-bit limb.  The single limb operand V is split\ninto four 16-bit parts.  The multi-limb operand U is split in the loop into\ntwo 32-bit parts.\n\n@tex\n\\global\\newdimen\\GMPbits      \\global\\GMPbits=0.18em\n\\def\\GMPbox#1#2#3{%\n  \\hbox{%\n    \\hbox to 128\\GMPbits{\\hfil\n      \\vbox{%\n        \\hrule\n        \\hbox to 48\\GMPbits {\\GMPvrule \\hfil$#2$\\hfil \\vrule}%\n        \\hrule}%\n      \\hskip #1\\GMPbits}%\n    \\raise \\GMPboxdepth \\hbox{\\hskip 2em #3}}}\n%\n\\GMPdisplay{%\n  \\vbox{%\n    \\hbox{%\n      \\hbox to 128\\GMPbits {\\hfil\n        \\vbox{%\n          \\hrule\n          \\hbox to 64\\GMPbits{%\n            \\GMPvrule \\hfil$v48$\\hfil\n            \\vrule    \\hfil$v32$\\hfil\n            \\vrule    \\hfil$v16$\\hfil\n            \\vrule    \\hfil$v00$\\hfil\n            \\vrule}\n          \\hrule}}%\n       \\raise \\GMPboxdepth \\hbox{\\hskip 2em V Operand}}\n    \\vskip 0.5ex\n    \\hbox{%\n      \\hbox to 128\\GMPbits {\\hfil\n        \\raise \\GMPboxdepth \\hbox{$\\times$\\hskip 1.5em}%\n        \\vbox{%\n          \\hrule\n          \\hbox to 64\\GMPbits {%\n            \\GMPvrule \\hfil$u32$\\hfil\n            \\vrule \\hfil$u00$\\hfil\n            \\vrule}%\n          \\hrule}}%\n       \\raise \\GMPboxdepth \\hbox{\\hskip 2em U Operand (one limb)}}%\n    \\vskip 0.5ex\n    \\hbox{\\vbox to 2ex{\\hrule width 128\\GMPbits}}%\n    \\GMPbox{0}{u00 \\times v00}{$p00$\\hskip 1.5em 48-bit products}%\n    \\vskip 0.5ex\n    \\GMPbox{16}{u00 \\times v16}{$p16$}\n    \\vskip 0.5ex\n    \\GMPbox{32}{u00 \\times v32}{$p32$}\n    \\vskip 0.5ex\n    \\GMPbox{48}{u00 \\times v48}{$p48$}\n    \\vskip 0.5ex\n    \\GMPbox{32}{u32 \\times v00}{$r32$}\n    \\vskip 0.5ex\n    \\GMPbox{48}{u32 \\times v16}{$r48$}\n    \\vskip 0.5ex\n    \\GMPbox{64}{u32 \\times v32}{$r64$}\n    \\vskip 0.5ex\n    \\GMPbox{80}{u32 \\times v48}{$r80$}\n}}\n@end tex\n@ifnottex\n@example\n@group\n                +---+---+---+---+\n                |v48|v32|v16|v00|    V operand\n                +---+---+---+---+\n\n                +-------+---+---+\n            x   |  u32  |  u00  |    U operand (one limb)\n                +---------------+\n\n---------------------------------\n\n                    +-----------+\n                    | u00 x v00 |    p00    48-bit products\n                    +-----------+\n                +-----------+\n                | u00 x v16 |        p16\n                +-----------+\n            +-----------+\n            | u00 x v32 |            p32\n            +-----------+\n        +-----------+\n        | u00 x v48 |                p48\n        +-----------+\n            +-----------+\n            | u32 x v00 |            r32\n            +-----------+\n        +-----------+\n        | u32 x v16 |                r48\n        +-----------+\n    +-----------+\n    | u32 x v32 |                    r64\n    +-----------+\n+-----------+\n| u32 x v48 |                        r80\n+-----------+\n@end group\n@end example\n@end ifnottex\n\n@math{p32} and @math{r32} can be summed using floating-point addition, and\nlikewise @math{p48} and @math{r48}.  @math{p00} and @math{p16} can be summed\nwith @math{r64} and @math{r80} from the previous iteration.\n\nFor each loop then, four 49-bit quantities are transfered to the integer unit,\naligned as follows,\n\n@tex\n% GMPbox here should be 49 bits wide, but use 51 to better show p16+r80'\n% crossing into the upper 64 bits.\n\\def\\GMPbox#1#2#3{%\n  \\hbox{%\n    \\hbox to 128\\GMPbits {%\n      \\hfil\n      \\vbox{%\n        \\hrule\n        \\hbox to 51\\GMPbits {\\GMPvrule \\hfil$#2$\\hfil \\vrule}%\n        \\hrule}%\n      \\hskip #1\\GMPbits}%\n    \\raise \\GMPboxdepth \\hbox{\\hskip 1.5em $#3$\\hfil}%\n}}\n\\newbox\\b \\setbox\\b\\hbox{64 bits}%\n\\newdimen\\bw \\bw=\\wd\\b \\advance\\bw by 2em\n\\newdimen\\x \\x=128\\GMPbits\n\\advance\\x by -2\\bw\n\\divide\\x by4\n\\GMPdisplay{%\n  \\vbox{%\n    \\hbox to 128\\GMPbits {%\n      \\GMPvrule\n      \\raise 0.5ex \\vbox{\\hrule \\hbox to \\x {}}%\n      \\hfil 64 bits\\hfil\n      \\raise 0.5ex \\vbox{\\hrule \\hbox to \\x {}}%\n      \\vrule\n      \\raise 0.5ex \\vbox{\\hrule \\hbox to \\x {}}%\n      \\hfil 64 bits\\hfil\n      \\raise 0.5ex \\vbox{\\hrule \\hbox to \\x {}}%\n      \\vrule}%\n    \\vskip 0.7ex\n    \\GMPbox{0}{p00+r64'}{i00}\n    \\vskip 0.5ex\n    \\GMPbox{16}{p16+r80'}{i16}\n    \\vskip 0.5ex\n    \\GMPbox{32}{p32+r32}{i32}\n    \\vskip 0.5ex\n    \\GMPbox{48}{p48+r48}{i48}\n}}\n@end tex\n@ifnottex\n@example\n@group\n|-----64bits----|-----64bits----|\n                   +------------+\n                   | p00 + r64' |    i00\n                   +------------+\n               +------------+\n               | p16 + r80' |        i16\n               +------------+\n           +------------+\n           | p32 + r32  |            i32\n           +------------+\n       +------------+\n       | p48 + r48  |                i48\n       +------------+\n@end group\n@end example\n@end ifnottex\n\nThe challenge then is to sum these efficiently and add in a carry limb,\ngenerating a low 64-bit result limb and a high 33-bit carry limb (@math{i48}\nextends 33 bits into the high half).\n\n\n@node Assembler SIMD Instructions, Assembler Software Pipelining, Assembler Floating Point, Assembler Coding\n@subsection SIMD Instructions\n@cindex Assembler SIMD\n\nThe single-instruction multiple-data support in current microprocessors is\naimed at signal processing algorithms where each data point can be treated\nmore or less independently.  There's generally not much support for\npropagating the sort of carries that arise in MPIR.\n\nSIMD multiplications of say four 16@cross{}16 bit multiplies only do as much\nwork as one 32@cross{}32 from MPIR's point of view, and need some shifts and\nadds besides.  But of course if say the SIMD form is fully pipelined and uses\nless instruction decoding then it may still be worthwhile.\n\nOn the x86 chips, MMX has so far found a use in @code{mpn_rshift} and\n@code{mpn_lshift}, and is used in a special case for 16-bit multipliers in the\nP55 @code{mpn_mul_1}.  SSE2 is used for Pentium 4 @code{mpn_mul_1},\n@code{mpn_addmul_1}, and @code{mpn_submul_1}.\n\n\n@node Assembler Software Pipelining, Assembler Loop Unrolling, Assembler SIMD Instructions, Assembler Coding\n@subsection Software Pipelining\n@cindex Assembler software pipelining\n\nSoftware pipelining consists of scheduling instructions around the branch\npoint in a loop.  For example a loop might issue a load not for use in the\npresent iteration but the next, thereby allowing extra cycles for the data to\narrive from memory.\n\nNaturally this is wanted only when doing things like loads or multiplies that\ntake several cycles to complete, and only where a CPU has multiple functional\nunits so that other work can be done in the meantime.\n\nA pipeline with several stages will have a data value in progress at each\nstage and each loop iteration moves them along one stage.  This is like\njuggling.\n\nIf the latency of some instruction is greater than the loop time then it will\nbe necessary to unroll, so one register has a result ready to use while\nanother (or multiple others) are still in progress.  (@pxref{Assembler Loop\nUnrolling}).\n\n\n@node Assembler Loop Unrolling, Assembler Writing Guide, Assembler Software Pipelining, Assembler Coding\n@subsection Loop Unrolling\n@cindex Assembler loop unrolling\n\nLoop unrolling consists of replicating code so that several limbs are\nprocessed in each loop.  At a minimum this reduces loop overheads by a\ncorresponding factor, but it can also allow better register usage, for example\nalternately using one register combination and then another.  Judicious use of\n@command{m4} macros can help avoid lots of duplication in the source code.\n\nAny amount of unrolling can be handled with a loop counter that's decremented\nby @math{N} each time, stopping when the remaining count is less than the\nfurther @math{N} the loop will process.  Or by subtracting @math{N} at the\nstart, the termination condition becomes when the counter @math{C} is less\nthan 0 (and the count of remaining limbs is @math{C+N}).\n\nAlternately for a power of 2 unroll the loop count and remainder can be\nestablished with a shift and mask.  This is convenient if also making a\ncomputed jump into the middle of a large loop.\n\nThe limbs not a multiple of the unrolling can be handled in various ways, for\nexample\n\n@itemize @bullet\n@item\nA simple loop at the end (or the start) to process the excess.  Care will be\nwanted that it isn't too much slower than the unrolled part.\n\n@item\nA set of binary tests, for example after an 8-limb unrolling, test for 4 more\nlimbs to process, then a further 2 more or not, and finally 1 more or not.\nThis will probably take more code space than a simple loop.\n\n@item\nA @code{switch} statement, providing separate code for each possible excess,\nfor example an 8-limb unrolling would have separate code for 0 remaining, 1\nremaining, etc, up to 7 remaining.  This might take a lot of code, but may be\nthe best way to optimize all cases in combination with a deep pipelined loop.\n\n@item\nA computed jump into the middle of the loop, thus making the first iteration\nhandle the excess.  This should make times smoothly increase with size, which\nis attractive, but setups for the jump and adjustments for pointers can be\ntricky and could become quite difficult in combination with deep pipelining.\n@end itemize\n\n\n@node Assembler Writing Guide,  , Assembler Loop Unrolling, Assembler Coding\n@subsection Writing Guide\n@cindex Assembler writing guide\n\nThis is a guide to writing software pipelined loops for processing limb\nvectors in assembler.\n\nFirst determine the algorithm and which instructions are needed.  Code it\nwithout unrolling or scheduling, to make sure it works.  On a 3-operand CPU\ntry to write each new value to a new register, this will greatly simplify later\nsteps.\n\nThen note for each instruction the functional unit and/or issue port\nrequirements.  If an instruction can use either of two units, like U0 or U1\nthen make a category ``U0/U1''.  Count the total using each unit (or combined\nunit), and count all instructions.\n\nFigure out from those counts the best possible loop time.  The goal will be to\nfind a perfect schedule where instruction latencies are completely hidden.\nThe total instruction count might be the limiting factor, or perhaps a\nparticular functional unit.  It might be possible to tweak the instructions to\nhelp the limiting factor.\n\nSuppose the loop time is @math{N}, then make @math{N} issue buckets, with the\nfinal loop branch at the end of the last.  Now fill the buckets with dummy\ninstructions using the functional units desired.  Run this to make sure the\nintended speed is reached.\n\nNow replace the dummy instructions with the real instructions from the slow\nbut correct loop you started with.  The first will typically be a load\ninstruction.  Then the instruction using that value is placed in a bucket an\nappropriate distance down.  Run the loop again, to check it still runs at\ntarget speed.\n\nKeep placing instructions, frequently measuring the loop.  After a few you\nwill need to wrap around from the last bucket back to the top of the loop.  If\nyou used the new-register for new-value strategy above then there will be no\nregister conflicts.  If not then take care not to clobber something already in\nuse.  Changing registers at this time is very error prone.\n\nThe loop will overlap two or more of the original loop iterations, and the\ncomputation of one vector element result will be started in one iteration of\nthe new loop, and completed one or several iterations later.\n\nThe final step is to create feed-in and wind-down code for the loop.  A good\nway to do this is to make a copy (or copies) of the loop at the start and\ndelete those instructions which don't have valid antecedents, and at the end\nreplicate and delete those whose results are unwanted (including any further\nloads).\n\nThe loop will have a minimum number of limbs loaded and processed, so the\nfeed-in code must test if the request size is smaller and skip either to a\nsuitable part of the wind-down or to special code for small sizes.\n\n\n@node Internals, Contributors, Algorithms, Top\n@chapter Internals\n@cindex Internals\n\n@strong{This chapter is provided only for informational purposes and the\nvarious internals described here may change in future MPIR releases.\nApplications expecting to be compatible with future releases should use only\nthe documented interfaces described in previous chapters.}\n\n@menu\n* Integer Internals::\n* Rational Internals::\n* Float Internals::\n* Raw Output Internals::\n* C++ Interface Internals::\n@end menu\n\n@node Integer Internals, Rational Internals, Internals, Internals\n@section Integer Internals\n@cindex Integer internals\n\n@code{mpz_t} variables represent integers using sign and magnitude, in space\ndynamically allocated and reallocated.  The fields are as follows.\n\n@table @asis\n@item @code{_mp_size}\nThe number of limbs, or the negative of that when representing a negative\ninteger.  Zero is represented by @code{_mp_size} set to zero, in which case\nthe @code{_mp_d} data is unused.\n\n@item @code{_mp_d}\nA pointer to an array of limbs which is the magnitude.  These are stored\n``little endian'' as per the @code{mpn} functions, so @code{_mp_d[0]} is the\nleast significant limb and @code{_mp_d[ABS(_mp_size)-1]} is the most\nsignificant.  Whenever @code{_mp_size} is non-zero, the most significant limb\nis non-zero.\n\nCurrently there's always at least one limb allocated, so for instance\n@code{mpz_set_ui} never needs to reallocate, and @code{mpz_get_ui} can fetch\n@code{_mp_d[0]} unconditionally (though its value is then only wanted if\n@code{_mp_size} is non-zero).\n\n@item @code{_mp_alloc}\n@code{_mp_alloc} is the number of limbs currently allocated at @code{_mp_d},\nand naturally @code{_mp_alloc >= ABS(_mp_size)}.  When an @code{mpz} routine\nis about to (or might be about to) increase @code{_mp_size}, it checks\n@code{_mp_alloc} to see whether there's enough space, and reallocates if not.\n@code{MPZ_REALLOC} is generally used for this.\n@end table\n\nThe various bitwise logical functions like @code{mpz_and} behave as if\nnegative values were twos complement.  But sign and magnitude is always used\ninternally, and necessary adjustments are made during the calculations.\nSometimes this isn't pretty, but sign and magnitude are best for other\nroutines.\n\nSome internal temporary variables are setup with @code{MPZ_TMP_INIT} and these\nhave @code{_mp_d} space obtained from @code{TMP_ALLOC} rather than the memory\nallocation functions.  Care is taken to ensure that these are big enough that\nno reallocation is necessary (since it would have unpredictable consequences).\n\n@code{_mp_size} and @code{_mp_alloc} are @code{int}, although @code{mp_size_t}\nis usually a @code{long}.  This is done to make the fields just 32 bits on\nsome 64 bits systems, thereby saving a few bytes of data space but still\nproviding plenty of range.\n\n\n@node Rational Internals, Float Internals, Integer Internals, Internals\n@section Rational Internals\n@cindex Rational internals\n\n@code{mpq_t} variables represent rationals using an @code{mpz_t} numerator and\ndenominator (@pxref{Integer Internals}).\n\nThe canonical form adopted is denominator positive (and non-zero), no common\nfactors between numerator and denominator, and zero uniquely represented as\n0/1.\n\nIt's believed that casting out common factors at each stage of a calculation\nis best in general.  A GCD is an @math{O(N^2)} operation so it's better to do\na few small ones immediately than to delay and have to do a big one later.\nKnowing the numerator and denominator have no common factors can be used for\nexample in @code{mpq_mul} to make only two cross GCDs necessary, not four.\n\nThis general approach to common factors is badly sub-optimal in the presence\nof simple factorizations or little prospect for cancellation, but MPIR has no\nway to know when this will occur.  As per @ref{Efficiency}, that's left to\napplications.  The @code{mpq_t} framework might still suit, with\n@code{mpq_numref} and @code{mpq_denref} for direct access to the numerator and\ndenominator, or of course @code{mpz_t} variables can be used directly.\n\n\n@node Float Internals, Raw Output Internals, Rational Internals, Internals\n@section Float Internals\n@cindex Float internals\n\nEfficient calculation is the primary aim of MPIR floats and the use of whole\nlimbs and simple rounding facilitates this.\n\n@code{mpf_t} floats have a variable precision mantissa and a single machine\nword signed exponent.  The mantissa is represented using sign and magnitude.\n\n@c FIXME: The arrow heads don't join to the lines exactly.\n@tex\n\\global\\newdimen\\GMPboxwidth \\GMPboxwidth=5em\n\\global\\newdimen\\GMPboxheight \\GMPboxheight=3ex\n\\def\\centreline{\\hbox{\\raise 0.8ex \\vbox{\\hrule \\hbox{\\hfil}}}}\n\\GMPdisplay{%\n\\vbox{%\n  \\hbox to 5\\GMPboxwidth {most significant limb \\hfil least significant limb}\n  \\vskip 0.7ex\n  \\def\\GMPcentreline#1{\\hbox{\\raise 0.5 ex \\vbox{\\hrule \\hbox to #1 {}}}}\n  \\hbox {\n    \\hbox to 3\\GMPboxwidth {%\n      \\setbox 0 = \\hbox{@code{\\_mp\\_exp}}%\n      \\dimen0=3\\GMPboxwidth\n      \\advance\\dimen0 by -\\wd0\n      \\divide\\dimen0 by 2\n      \\advance\\dimen0 by -1em\n      \\setbox1 = \\hbox{$\\rightarrow$}%\n      \\dimen1=\\dimen0\n      \\advance\\dimen1 by -\\wd1\n      \\GMPcentreline{\\dimen0}%\n      \\hfil\n      \\box0%\n      \\hfil\n      \\GMPcentreline{\\dimen1{}}%\n      \\box1}\n    \\hbox to 2\\GMPboxwidth {\\hfil @code{\\_mp\\_d}}}\n  \\vskip 0.5ex\n  \\vbox {%\n    \\hrule\n    \\hbox{%\n      \\vrule height 2ex depth 1ex\n      \\hbox to \\GMPboxwidth {}%\n      \\vrule\n      \\hbox to \\GMPboxwidth {}%\n      \\vrule\n      \\hbox to \\GMPboxwidth {}%\n      \\vrule\n      \\hbox to \\GMPboxwidth {}%\n      \\vrule\n      \\hbox to \\GMPboxwidth {}%\n      \\vrule}\n    \\hrule\n  }\n  \\hbox {%\n    \\hbox to 0.8 pt {}\n    \\hbox to 3\\GMPboxwidth {%\n      \\hfil $\\cdot$} \\hbox {$\\leftarrow$ radix point\\hfil}}\n  \\hbox to 5\\GMPboxwidth{%\n    \\setbox 0 = \\hbox{@code{\\_mp\\_size}}%\n    \\dimen0 = 5\\GMPboxwidth\n    \\advance\\dimen0 by -\\wd0\n    \\divide\\dimen0 by 2\n    \\advance\\dimen0 by -1em\n    \\dimen1 = \\dimen0\n    \\setbox1 = \\hbox{$\\leftarrow$}%\n    \\setbox2 = \\hbox{$\\rightarrow$}%\n    \\advance\\dimen0 by -\\wd1\n    \\advance\\dimen1 by -\\wd2\n    \\hbox to 0.3 em {}%\n    \\box1\n    \\GMPcentreline{\\dimen0}%\n    \\hfil\n    \\box0\n    \\hfil\n    \\GMPcentreline{\\dimen1}%\n    \\box2}\n}}\n@end tex\n@ifnottex\n@example\n   most                   least\nsignificant            significant\n   limb                   limb\n\n                            _mp_d\n |---- _mp_exp --->           |\n  _____ _____ _____ _____ _____\n |_____|_____|_____|_____|_____|\n                   . <------------ radix point\n\n  <-------- _mp_size --------->\n@sp 1\n@end example\n@end ifnottex\n\n@noindent\nThe fields are as follows.\n\n@table @asis\n@item @code{_mp_size}\nThe number of limbs currently in use, or the negative of that when\nrepresenting a negative value.  Zero is represented by @code{_mp_size} and\n@code{_mp_exp} both set to zero, and in that case the @code{_mp_d} data is\nunused.  (In the future @code{_mp_exp} might be undefined when representing\nzero.)\n\n@item @code{_mp_prec}\nThe precision of the mantissa, in limbs.  In any calculation the aim is to\nproduce @code{_mp_prec} limbs of result (the most significant being non-zero).\n\n@item @code{_mp_d}\nA pointer to the array of limbs which is the absolute value of the mantissa.\nThese are stored ``little endian'' as per the @code{mpn} functions, so\n@code{_mp_d[0]} is the least significant limb and\n@code{_mp_d[ABS(_mp_size)-1]} the most significant.\n\nThe most significant limb is always non-zero, but there are no other\nrestrictions on its value, in particular the highest 1 bit can be anywhere\nwithin the limb.\n\n@code{_mp_prec+1} limbs are allocated to @code{_mp_d}, the extra limb being\nfor convenience (see below).  There are no reallocations during a calculation,\nonly in a change of precision with @code{mpf_set_prec}.\n\n@item @code{_mp_exp}\nThe exponent, in limbs, determining the location of the implied radix point.\nZero means the radix point is just above the most significant limb.  Positive\nvalues mean a radix point offset towards the lower limbs and hence a value\n@math{@ge{} 1}, as for example in the diagram above.  Negative exponents mean\na radix point further above the highest limb.\n\nNaturally the exponent can be any value, it doesn't have to fall within the\nlimbs as the diagram shows, it can be a long way above or a long way below.\nLimbs other than those included in the @code{@{_mp_d,_mp_size@}} data\nare treated as zero.\n@end table\n\n@code{_mp_size} and @code{_mp_prec} are @code{int}, although @code{mp_size_t}\nis usually a @code{long}.  This is done to make the fields just 32 bits on\nsome 64 bits systems, thereby saving a few bytes of data space but still\nproviding plenty of range.\n\n\n@sp 1\n@noindent\nThe following various points should be noted.\n\n@table @asis\n@item Low Zeros\nThe least significant limbs @code{_mp_d[0]} etc can be zero, though such low\nzeros can always be ignored.  Routines likely to produce low zeros check and\navoid them to save time in subsequent calculations, but for most routines\nthey're quite unlikely and aren't checked.\n\n@item Mantissa Size Range\nThe @code{_mp_size} count of limbs in use can be less than @code{_mp_prec} if\nthe value can be represented in less.  This means low precision values or\nsmall integers stored in a high precision @code{mpf_t} can still be operated\non efficiently.\n\n@code{_mp_size} can also be greater than @code{_mp_prec}.  Firstly a value is\nallowed to use all of the @code{_mp_prec+1} limbs available at @code{_mp_d},\nand secondly when @code{mpf_set_prec_raw} lowers @code{_mp_prec} it leaves\n@code{_mp_size} unchanged and so the size can be arbitrarily bigger than\n@code{_mp_prec}.\n\n@item Rounding\nAll rounding is done on limb boundaries.  Calculating @code{_mp_prec} limbs\nwith the high non-zero will ensure the application requested minimum precision\nis obtained.\n\nThe use of simple ``trunc'' rounding towards zero is efficient, since there's\nno need to examine extra limbs and increment or decrement.\n\n@item Bit Shifts\nSince the exponent is in limbs, there are no bit shifts in basic operations\nlike @code{mpf_add} and @code{mpf_mul}.  When differing exponents are\nencountered all that's needed is to adjust pointers to line up the relevant\nlimbs.\n\nOf course @code{mpf_mul_2exp} and @code{mpf_div_2exp} will require bit shifts,\nbut the choice is between an exponent in limbs which requires shifts there, or\none in bits which requires them almost everywhere else.\n\n@item Use of @code{_mp_prec+1} Limbs\nThe extra limb on @code{_mp_d} (@code{_mp_prec+1} rather than just\n@code{_mp_prec}) helps when an @code{mpf} routine might get a carry from its\noperation.  @code{mpf_add} for instance will do an @code{mpn_add} of\n@code{_mp_prec} limbs.  If there's no carry then that's the result, but if\nthere is a carry then it's stored in the extra limb of space and\n@code{_mp_size} becomes @code{_mp_prec+1}.\n\nWhenever @code{_mp_prec+1} limbs are held in a variable, the low limb is not\nneeded for the intended precision, only the @code{_mp_prec} high limbs.  But\nzeroing it out or moving the rest down is unnecessary.  Subsequent routines\nreading the value will simply take the high limbs they need, and this will be\n@code{_mp_prec} if their target has that same precision.  This is no more than\na pointer adjustment, and must be checked anyway since the destination\nprecision can be different from the sources.\n\nCopy functions like @code{mpf_set} will retain a full @code{_mp_prec+1} limbs\nif available.  This ensures that a variable which has @code{_mp_size} equal to\n@code{_mp_prec+1} will get its full exact value copied.  Strictly speaking\nthis is unnecessary since only @code{_mp_prec} limbs are needed for the\napplication's requested precision, but it's considered that an @code{mpf_set}\nfrom one variable into another of the same precision ought to produce an exact\ncopy.\n\n@item Application Precisions\n@code{__GMPF_BITS_TO_PREC} converts an application requested precision to an\n@code{_mp_prec}.  The value in bits is rounded up to a whole limb then an\nextra limb is added since the most significant limb of @code{_mp_d} is only\nnon-zero and therefore might contain only one bit.\n\n@code{__GMPF_PREC_TO_BITS} does the reverse conversion, and removes the extra\nlimb from @code{_mp_prec} before converting to bits.  The net effect of\nreading back with @code{mpf_get_prec} is simply the precision rounded up to a\nmultiple of @code{mp_bits_per_limb}.\n\nNote that the extra limb added here for the high only being non-zero is in\naddition to the extra limb allocated to @code{_mp_d}.  For example with a\n32-bit limb, an application request for 250 bits will be rounded up to 8\nlimbs, then an extra added for the high being only non-zero, giving an\n@code{_mp_prec} of 9.  @code{_mp_d} then gets 10 limbs allocated.  Reading\nback with @code{mpf_get_prec} will take @code{_mp_prec} subtract 1 limb and\nmultiply by 32, giving 256 bits.\n\nStrictly speaking, the fact the high limb has at least one bit means that a\nfloat with, say, 3 limbs of 32-bits each will be holding at least 65 bits, but\nfor the purposes of @code{mpf_t} it's considered simply to be 64 bits, a nice\nmultiple of the limb size.\n@end table\n\n\n@node Raw Output Internals, C++ Interface Internals, Float Internals, Internals\n@section Raw Output Internals\n@cindex Raw output internals\n\n@noindent\n@code{mpz_out_raw} uses the following format.\n\n@tex\n\\global\\newdimen\\GMPboxwidth \\GMPboxwidth=5em\n\\global\\newdimen\\GMPboxheight \\GMPboxheight=3ex\n\\def\\centreline{\\hbox{\\raise 0.8ex \\vbox{\\hrule \\hbox{\\hfil}}}}\n\\GMPdisplay{%\n\\vbox{%\n  \\def\\GMPcentreline#1{\\hbox{\\raise 0.5 ex \\vbox{\\hrule \\hbox to #1 {}}}}\n  \\vbox {%\n    \\hrule\n    \\hbox{%\n      \\vrule height 2.5ex depth 1.5ex\n      \\hbox to \\GMPboxwidth {\\hfil size\\hfil}%\n      \\vrule\n      \\hbox to 3\\GMPboxwidth {\\hfil data bytes\\hfil}%\n      \\vrule}\n    \\hrule}\n}}\n@end tex\n@ifnottex\n@example\n+------+------------------------+\n| size |       data bytes       |\n+------+------------------------+\n@end example\n@end ifnottex\n\nThe size is 4 bytes written most significant byte first, being the number of\nsubsequent data bytes, or the twos complement negative of that when a negative\ninteger is represented.  The data bytes are the absolute value of the integer,\nwritten most significant byte first.\n\nThe most significant data byte is always non-zero, so the output is the same\non all systems, irrespective of limb size.\n\nIn GMP 1, leading zero bytes were written to pad the data bytes to a multiple\nof the limb size.  @code{mpz_inp_raw} will still accept this, for\ncompatibility.\n\nThe use of ``big endian'' for both the size and data fields is deliberate, it\nmakes the data easy to read in a hex dump of a file.  Unfortunately it also\nmeans that the limb data must be reversed when reading or writing, so neither\na big endian nor little endian system can just read and write @code{_mp_d}.\n\n\n@node C++ Interface Internals,  , Raw Output Internals, Internals\n@section C++ Interface Internals\n@cindex C++ interface internals\n\nA system of expression templates is used to ensure something like @code{a=b+c}\nturns into a simple call to @code{mpz_add} etc.  For @code{mpf_class}\nthe scheme also ensures the precision of the final\ndestination is used for any temporaries within a statement like\n@code{f=w*x+y*z}.  These are important features which a naive implementation\ncannot provide.\n\nA simplified description of the scheme follows.  The true scheme is\ncomplicated by the fact that expressions have different return types.  For\ndetailed information, refer to the source code.\n\nTo perform an operation, say, addition, we first define a ``function object''\nevaluating it,\n\n@example\nstruct __gmp_binary_plus\n@{\n  static void eval(mpf_t f, mpf_t g, mpf_t h) @{ mpf_add(f, g, h); @}\n@};\n@end example\n\n@noindent\nAnd an ``additive expression'' object,\n\n@example\n__gmp_expr<__gmp_binary_expr<mpf_class, mpf_class, __gmp_binary_plus> >\noperator+(const mpf_class &f, const mpf_class &g)\n@{\n  return __gmp_expr\n    <__gmp_binary_expr<mpf_class, mpf_class, __gmp_binary_plus> >(f, g);\n@}\n@end example\n\nThe seemingly redundant @code{__gmp_expr<__gmp_binary_expr<@dots{}>>} is used to\nencapsulate any possible kind of expression into a single template type.  In\nfact even @code{mpf_class} etc are @code{typedef} specializations of\n@code{__gmp_expr}.\n\nNext we define assignment of @code{__gmp_expr} to @code{mpf_class}.\n\n@example\ntemplate <class T>\nmpf_class & mpf_class::operator=(const __gmp_expr<T> &expr)\n@{\n  expr.eval(this->get_mpf_t(), this->precision());\n  return *this;\n@}\n\ntemplate <class Op>\nvoid __gmp_expr<__gmp_binary_expr<mpf_class, mpf_class, Op> >::eval\n(mpf_t f, mp_bitcnt_t precision)\n@{\n  Op::eval(f, expr.val1.get_mpf_t(), expr.val2.get_mpf_t());\n@}\n@end example\n\nwhere @code{expr.val1} and @code{expr.val2} are references to the expression's\noperands (here @code{expr} is the @code{__gmp_binary_expr} stored within the\n@code{__gmp_expr}).\n\nThis way, the expression is actually evaluated only at the time of assignment,\nwhen the required precision (that of @code{f}) is known.  Furthermore the\ntarget @code{mpf_t} is now available, thus we can call @code{mpf_add} directly\nwith @code{f} as the output argument.\n\nCompound expressions are handled by defining operators taking subexpressions\nas their arguments, like this:\n\n@example\ntemplate <class T, class U>\n__gmp_expr\n<__gmp_binary_expr<__gmp_expr<T>, __gmp_expr<U>, __gmp_binary_plus> >\noperator+(const __gmp_expr<T> &expr1, const __gmp_expr<U> &expr2)\n@{\n  return __gmp_expr\n    <__gmp_binary_expr<__gmp_expr<T>, __gmp_expr<U>, __gmp_binary_plus> >\n    (expr1, expr2);\n@}\n@end example\n\nAnd the corresponding specializations of @code{__gmp_expr::eval}:\n\n@example\ntemplate <class T, class U, class Op>\nvoid __gmp_expr\n<__gmp_binary_expr<__gmp_expr<T>, __gmp_expr<U>, Op> >::eval\n(mpf_t f, mp_bitcnt_t precision)\n@{\n  // declare two temporaries\n  mpf_class temp1(expr.val1, precision), temp2(expr.val2, precision);\n  Op::eval(f, temp1.get_mpf_t(), temp2.get_mpf_t());\n@}\n@end example\n\nThe expression is thus recursively evaluated to any level of complexity and\nall subexpressions are evaluated to the precision of @code{f}.\n\n\n@node Contributors, References, Internals, Top\n@comment  node-name,  next,  previous,  up\n@appendix Contributors\n@cindex Contributors\n\nTorbjorn Granlund wrote the original GMP library and is still developing and\nmaintaining it.  Several other individuals and organizations have contributed\nto GMP in various ways.  Here is a list in chronological order:\n\nGunnar Sjoedin and Hans Riesel helped with mathematical problems in early\nversions of the library.\n\nRichard Stallman contributed to the interface design and revised the first\nversion of this manual.\n\nBrian Beuning and Doug Lea helped with testing of early versions of the\nlibrary and made creative suggestions.\n\nJohn Amanatides of York University in Canada contributed the function\n@code{mpz_probab_prime_p}.\n\nPaul Zimmermann of Inria sparked the development of GMP 2, with his\ncomparisons between bignum packages.\n\nKen Weber (Kent State University, Universidade Federal do Rio Grande do Sul)\ncontributed @code{mpz_gcd}, @code{mpz_divexact}, @code{mpn_gcd}, and\n@code{mpn_bdivmod}, partially supported by CNPq (Brazil) grant 301314194-2.\n\nPer Bothner of Cygnus Support helped to set up GMP to use Cygnus' configure.\nHe has also made valuable suggestions and tested numerous intermediary\nreleases.\n\nJoachim Hollman was involved in the design of the @code{mpf} interface, and in\nthe @code{mpz} design revisions for version 2.\n\nBennet Yee contributed the initial versions of @code{mpz_jacobi} and\n@code{mpz_legendre}.\n\nAndreas Schwab contributed the files @file{mpn/m68k/lshift.S} and\n@file{mpn/m68k/rshift.S} (now in @file{.asm} form).\n\nThe development of floating point functions of GNU MP 2, were supported in part\nby the ESPRIT-BRA (Basic Research Activities) 6846 project POSSO (POlynomial\nSystem SOlving).\n\nGNU MP 2 was finished and released by SWOX AB, SWEDEN, in cooperation with the\nIDA Center for Computing Sciences, USA.\n\nRobert Harley of Inria, France and David Seal of ARM, England, suggested clever\nimprovements for population count.\n\nRobert Harley also wrote highly optimized Karatsuba and 3-way Toom\nmultiplication functions for GMP 3.  He also contributed the ARM assembly\ncode.\n\nTorsten Ekedahl of the Mathematical department of Stockholm University provided\nsignificant inspiration during several phases of the GMP development.  His\nmathematical expertise helped improve several algorithms.\n\nPaul Zimmermann wrote the Divide and Conquer division code, the REDC code, the\nREDC-based mpz_powm code, the FFT multiply code, and the Karatsuba square root\ncode.  He also rewrote the Toom3 code for GMP 4.2.  The ECMNET project Paul is\norganizing was a driving force behind many of the optimizations in GMP 3.\n\nLinus Nordberg wrote the new configure system based on autoconf and\nimplemented the new random functions.\n\nKent Boortz made the Mac OS 9 port.\n\nKevin Ryde worked on a number of things: optimized x86 code, m4 asm macros,\nparameter tuning, speed measuring, the configure system, function inlining,\ndivisibility tests, bit scanning, Jacobi symbols, Fibonacci and Lucas number\nfunctions, printf and scanf functions, perl interface, demo expression parser,\nthe algorithms chapter in the manual, @file{gmpasm-mode.el}, and various\nmiscellaneous improvements elsewhere.\n\nSteve Root helped write the optimized alpha 21264 assembly code.\n\nGerardo Ballabio wrote the @file{gmpxx.h} C++ class interface and the C++\n@code{istream} input routines.\n\nGNU MP 4 was finished and released by Torbjorn Granlund and Kevin Ryde.\nTorbjorn's work was partially funded by the IDA Center for Computing Sciences,\nUSA.\n\nJason Moxham rewrote @code{mpz_fac_ui}.\n\nPedro Gimeno implemented the Mersenne Twister and made other random number\nimprovements.\n\n(This list is chronological, not ordered after significance.  If you have\ncontributed to GMP/MPIR but are not listed above, please tell \n@uref{http://groups.google.com/group/mpir-devel} about the omission!)\n\nThanks go to Hans Thorsen for donating an SGI system for the GMP test system\nenvironment.\n\nIn 2008 GMP was forked and gave rise to the MPIR (Multiple Precision Integers\nand Rationals) project. In 2010 version 2.0.0 of MPIR switched to LGPL v3+ \nand much code from GMP was again incorporated into MPIR. \n\nThe MPIR project has largely been a collaboration of William Hart, Brian \nGladman and Jason Moxham. MPIR code not obtained from GMP and not specifically\nmentioned elsewhere below is likely written by one of these three.\n\nWilliam Hart did much of the early MPIR coding including build system fixes.\nHis contributions also include Toom 4 and 7 code and variants, extended GCD \nbased on Niels Mollers ngcd work, asymptotically fast division code. He does\nmuch of the release management work.\n\nBrian Gladman wrote and maintains MSVC project files. He has also done much of\nthe conversion of assembly code to yasm format. He rewrote the benchmark \nprogram and developed MSVC ports of tune, speed, try and the benchmark code.\nHe helped with many aspects of the merging of GMP code into MPIR after the \nswitch to LGPL v3+.\n\nJason Moxham has contributed a great deal of x86 assembly code. He has also\ncontributed improved root code and mulhi and mullo routines and implemented \nPeter Montgomery's single limb remainder algorithm. He has also contributed\na command line build system for Windows and numerous build system fixes.\n\nThe following people have either contributed directly to the MPIR project, \nmade code available on their websites or contributed code to the official \nGNU project which has been used in MPIR.\n\nJason Martin wrote some fast assembly patches for Core 2 and converted them to\nintel format. He also did the initial merge of Niels Moller's fast GCD patches.\nHe wrote fast addmul functions for Itanium.\n\nGonzalo Tornaria helped patch config.guess and associated files to distinguish\nmodern processors. He also patched mpirbench.\n\nMichael Abshoff helped resolve some build issues on various platforms. He served for a while as release manager for the MPIR project.\n\nMariah Lennox contributed patches to mpirbench and various build failure reports. She has also reported gcc bugs found during MPIR development.\n\nNiels Moller wrote the fast ngcd code for computing integer GCD, the quadratic\nHensel division code and precomputed inverse code for Euclidean division, along\nwith fast jacobi symbols code. \nHe also made contributions to the Toom multiply code, \nespecially helper functions to simplify Toom evaluations.\n\nBurcin Erocal helped with build testing on Pentium-D\n\nPierrick Gaudry provided initial AMD 64 assembly support and revised the FFT code.\n\nPaul Zimmermann provided an mpz implementation of Toom 4, wrote much of the FFT code, wrote some of the rootrem code and contributed invert.c for computing\nprecomputed inverses.\n\nAlexander Kruppa revised the FFT code and helped write and superoptimise\nassembly code for Skylake, Haswell and Bulldozer and helped write a\nsuperoptimiser.\n\nTorbjorn Granlund revised the FFT code and wrote a lot of division code, \nincluding the quadratic Euclidean division code, many parts of the divide\nand conquer division code, both Hensel and Euclidean, and his code was also\nreused for parts of the asymptotically fast division code. He also helped \nwrite the root code and wrote much of the Itanium assembly code and a couple\nof Core 2 assembly functions and part of the basecase middle product assembly\ncode for x86 64 bit. He also wrote the improved string input and output code\nand made improvements to the GCD and extended GCD code. He also contributed the\nnextprime code and coauthored the bin_uiui code. He also wrote or maintained\nthe binvert, mullow_n_basecase, powlo, redc_n code and the powm and powm_ui\nimprovements. Torbjorn is also responsible for numerous other bits and pieces\nthat have been used from the GNU project.\n\nMarco Bodrato and Alberto Zanoni suggested the unbalanced multiply strategy\nand found optimal Toom multiplication sequences.\n\nMarco Bodrato wrote an mpz implementation of the Toom 7 code and wrote most of\nthe Toom 8.5 multiply and squaring code. He also helped write the divide and conquer Euclidean division code. He also contributed many improved number\ntheoretical functions including factorial, multi-factorial, primorial, \nn-choose-k.\n\nMarc Glisse improved gmpxx.h\n\nRobert Gerbicz contributed fast factorial code.\n\nMartin Boij made assorted contributions to the nextprime code.\n\nDavid Harvey wrote fast middle product code and divide and conquer approximate \nquotient code for both Euclidean and Hensel division and contributed to the\nquadratic Hensel code.\n\nT. R. Nicely wrote primality tests used in the benchmark code.\n\nJeff Gilchrist assisted with the porting of T. R. Nicely's primality code to MPIR and helped with tuning.\n\nDavid Kirkby helped with build testing on Sun servers\n\nPeter Shrimpton wrote the BPSW primality test used up to GMP_LIMB_BITS.\n\nThanks to Microsoft for supporting Jason Moxham to work on a command line\nbuild system for Windows and some assembly improvements for Windows.\n\nThanks to William Stein for giving us access to his sage.math machines for\ntesting and for hosting the MPIR website, and for supporting us in inumerably \nmany other ways.\n\nMinh Van Nguyen served as release manager for MPIR 2.1.0.\n\nCase Vanhorsen helped with release testing.\n\nDavid Cleaver filed a bug report.\n\nJulien Puydt provided tuning values.\n\nLeif Lionhardy supplied build patches and provided tuning values.\n\nJean-Pierre Flori ported the powm, powm_ui improvements from GMP, supplied many\nbuild system patches and improvements and provided tuning values.\n\nThanks to an anonymous Japanese contributor for assembly improvements\n\nMarshall Hampton reported an issue on apple machines\n\nJens Nurmann contributed significant quantities of Skylake assembly code and\ncontributed assembly improvements that have been used elsewhere.\n\nAlex Best wrote an assembly superoptimiser.\n\nVincent Delecroix ported mpq_cmp_z from GMP.\n\nSisyphus (Rob) submitted tuning values.\n\nsav-ix (Alexander) provided a patch for t-locale on Windows.\n\nIsurus Fernando provided tuning values, numerous build system patches, did\nrelease testing and helped with continuous integration.\n\nAlex Dyachenko wrote mpir.net for interfacing MPIR to .net languages.\n\nTommy Hoffman supplied a sed patch.\n\nAverkhaturau fixed a C++ compilation problem.\n\nMarcell Keller fixed a sign conversion bug.\n\nSergey Taymanov fixed some Windows build file issues.\n\njengelh reported a bug and helped with build testing\n\n@node References, GNU Free Documentation License, Contributors, Top\n@comment  node-name,  next,  previous,  up\n@appendix References\n@cindex References\n\n@c  FIXME: In tex, the @uref's are unhyphenated, which is good for clarity,\n@c  but being long words they upset paragraph formatting (the preceding line\n@c  can get badly stretched).  Would like an conditional @* style line break\n@c  if the uref is too long to fit on the last line of the paragraph, but it's\n@c  not clear how to do that.  For now explicit @texlinebreak{}s are used on\n@c  paragraphs that come out bad.\n\n@section Books\n\n@itemize @bullet\n@item\nJonathan M. Borwein and Peter B. Borwein, ``Pi and the AGM: A Study in\nAnalytic Number Theory and Computational Complexity'', Wiley, 1998.\n\n@item\nHenri Cohen, ``A Course in Computational Algebraic Number Theory'', Graduate\nTexts in Mathematics number 138, Springer-Verlag, 1993.\n@texlinebreak{} @uref{http://www.math.u-bordeaux.fr/~cohen/}\n\n@item\nRichard Crandall, Carl Pomerance, ``Prime Numbers: A Computational Perspective'' 2nd edition, Springer, 2005.\n\n@item\nDonald E. Knuth, ``The Art of Computer Programming'', volume 2,\n``Seminumerical Algorithms'', 3rd edition, Addison-Wesley, 1998.\n@texlinebreak{} @uref{http://www-cs-faculty.stanford.edu/~knuth/taocp.html}\n\n@item\nJohn D. Lipson, ``Elements of Algebra and Algebraic Computing'',\nThe Benjamin Cummings Publishing Company Inc, 1981.\n\n@item\nAlfred J. Menezes, Paul C. van Oorschot and Scott A. Vanstone, ``Handbook of\nApplied Cryptography'', @uref{http://www.cacr.math.uwaterloo.ca/hac/}\n\n@item\nRichard M. Stallman, ``Using and Porting GCC'', Free Software Foundation, 1999,\navailable online @uref{http://gcc.gnu.org/onlinedocs/}, and in\nthe GCC package @uref{ftp://ftp.gnu.org/gnu/gcc/}\n@end itemize\n\n@section Papers\n\n@itemize @bullet\n@item\nDan Bernstein, ``Detecting perfect powers in essentially linear time'', Math. Comp. (67) pp.@: 1253-1283, 1998.\n\n@item\nYves Bertot, Nicolas Magaud and Paul Zimmermann, ``A Proof of GMP Square\nRoot'', Journal of Automated Reasoning, volume 29, 2002, pp.@: 225-252.  Also\navailable online as INRIA Research Report 4475, June 2001,\n@uref{http://www.inria.fr/rrrt/rr-4475.html}\n\n@item\nMarco Bodrato, Alberto Zanoni, ``Integer and Polynomial Multiplication: Towards optimal Toom-Cook Matrices'', ISAAC 2007 Proceedings, Ontario, Canada, July 29 - August 1, 2007, ACM Press. Available online at @uref{http://ln.bodrato.it/issac2007_pdf}\n\n@item\nMarco Bodrato, ``High degree Toom`n'half for balanced and unbalanced multiplication'', E. Antelo, D. Hough and P. Ienne, editors, Proceedings of the 20th IEEE Symposium on Computer Arithmetic, IEEE, Tubingen, Germany, July 25-27, 2011, pp. 15--222. See @uref{http://bodrato.it/papers}\n\n@item\nRichard Brent and Paul Zimmermann, ``Modern Computer Arithmetic'',\nversion 0.4, November 2009, @uref{http://www.loria.fr/~zimmerma/mca/mca-0.4.pdf}\n\n@item\nChristoph Burnikel and Joachim Ziegler, ``Fast Recursive Division'',\nMax-Planck-Institut fuer Informatik Research Report MPI-I-98-1-022,\n@texlinebreak{} @uref{http://data.mpi-sb.mpg.de/internet/reports.nsf/NumberView/1998-1-022}\n\n@item\nAgner Fog, ``Software optimization resources'', online at @uref{http://www.agner.org/optimize/}\n\n@item\nPierrick Gaudry, Alexander Kruppa, Paul Zimmermann, ``A GMP-based implementation of Schoenhage-Strassen's large integer multiplication algorithm'', ISAAC 2007 Proceedings, Ontario, Canada, July 29 - August 1, 2007, pp.@: 167-174, ACM Press. Full text available at @uref{http://hal.inria.fr/docs/00/14/86/20/PDF/fft.final.pdf}\n\n@item\nTorbjorn Granlund and Peter L. Montgomery, ``Division by Invariant Integers\nusing Multiplication'', in Proceedings of the SIGPLAN PLDI'94 Conference, June\n1994.  Also available @uref{ftp://ftp.cwi.nl/pub/pmontgom/divcnst.psa4.gz}\n(and .psl.gz).\n\n@item\nNiels M@\"oller and Torbj@\"orn Granlund, ``Improved division by invariant\nintegers'', to appear.\n\n@item\nTorbj@\"orn Granlund and Niels M@\"oller, ``Division of integers large and\nsmall'', to appear.\n\n@item\nDavid Harvey, ``The Karatsuba middle product for integers'', (preprint), 2009. Available at @uref{http://www.cims.nyu.edu/~harvey/mulmid/mulmid.pdf}\n\n@item\nTudor Jebelean,\n``An algorithm for exact division'',\nJournal of Symbolic Computation,\nvolume 15, 1993, pp.@: 169-180.\nResearch report version available @texlinebreak{}\n@uref{ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1992/92-35.ps.gz}\n\n@item\nTudor Jebelean, ``Exact Division with Karatsuba Complexity - Extended\nAbstract'', RISC-Linz technical report 96-31, @texlinebreak{}\n@uref{ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1996/96-31.ps.gz}\n\n@item\nTudor Jebelean, ``Practical Integer Division with Karatsuba Complexity'',\nISSAC 97, pp.@: 339-341.  Technical report available @texlinebreak{}\n@uref{ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1996/96-29.ps.gz}\n\n@item\nTudor Jebelean, ``A Generalization of the Binary GCD Algorithm'', ISSAC 93,\npp.@: 111-116.  Technical report version available @texlinebreak{}\n@uref{ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1993/93-01.ps.gz}\n\n@item\nTudor Jebelean, ``A Double-Digit Lehmer-Euclid Algorithm for Finding the GCD\nof Long Integers'', Journal of Symbolic Computation, volume 19, 1995,\npp.@: 145-157.  Technical report version also available @texlinebreak{}\n@uref{ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1992/92-69.ps.gz}\n\n@item\nWerner Krandick, Jeremy R. Johnson, ``Efficient Multiprecision Floating Point Multiplication with Exact Rounding'', Technical Report, RISC Linz, 1993, available at @uref{ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1993/93-76.ps.gz}\n\n@item\nWerner Krandick and Tudor Jebelean, ``Bidirectional Exact Integer Division'',\nJournal of Symbolic Computation, volume 21, 1996, pp.@: 441-455.  Early\ntechnical report version also available\n@uref{ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1994/94-50.ps.gz}\n\n@item\nMakoto Matsumoto and Takuji Nishimura, ``Mersenne Twister: A 623-dimensionally\nequidistributed uniform pseudorandom number generator'', ACM Transactions on\nModelling and Computer Simulation, volume 8, January 1998, pp.@: 3-30.\nAvailable online @texlinebreak{}\n@uref{http://www.math.keio.ac.jp/~nisimura/random/doc/mt.ps.gz} (or .pdf)\n\n@item\nR. Moenck and A. Borodin, ``Fast Modular Transforms via Division'',\nProceedings of the 13th Annual IEEE Symposium on Switching and Automata\nTheory, October 1972, pp.@: 90-96.  Reprinted as ``Fast Modular Transforms'',\nJournal of Computer and System Sciences, volume 8, number 3, June 1974,\npp.@: 366-386.\n\n@item\nNiels M@\"oller, ``On Schoenhage's algorithm and subquadratic integer GCD computation'', Math. Comp. 2007. Available online at @uref{http://www.lysator.liu.se/~nisse/archive/S0025-5718-07-02017-0.pdf}\n\n@item\nPeter L. Montgomery, ``Modular Multiplication Without Trial Division'', in\nMathematics of Computation, volume 44, number 170, April 1985.\n\n@item\nThom Mulders, ``On short multiplications and divisions'', Appl. Algebra Engrg. Comm. Comput. 11 (2000), no. 1, pp.@: 69-88. Tech. report No. 276, Dept. of Comp. Sci., ETH Zurich, Nov 1997, available online at @uref{ftp://ftp.inf.ethz.ch/pub/publications/tech-reports/2xx/276.pdf}\n\n@item\nArnold Sch@\"onhage and Volker Strassen, ``Schnelle Multiplikation grosser\nZahlen'', Computing 7, 1971, pp.@: 281-292.\n\n@item\nA. Sch@\"onhage, A. F. W. Grotefeld and E. Vetter, \"Fast Algorithms, A Multitape Turing Machine Implementation\" BI Wissenschafts-Verlag, Mannheim, 1994. \n\n@item\nKenneth Weber, ``The accelerated integer GCD algorithm'',\nACM Transactions on Mathematical Software,\nvolume 21, number 1, March 1995, pp.@: 111-122.\n\n@item\nPaul Zimmermann, ``Karatsuba Square Root'', INRIA Research Report 3805,\nNovember 1999, @uref{http://www.inria.fr/rrrt/rr-3805.html}\n\n@item\nPaul Zimmermann, ``A Proof of GMP Fast Division and Square Root\nImplementations'', @texlinebreak{}\n@uref{http://www.loria.fr/~zimmerma/papers/proof-div-sqrt.ps.gz}\n\n@item\nDan Zuras, ``On Squaring and Multiplying Large Integers'', ARITH-11: IEEE\nSymposium on Computer Arithmetic, 1993, pp.@: 260 to 271.  Reprinted as ``More\non Multiplying and Squaring Large Integers'', IEEE Transactions on Computers,\nvolume 43, number 8, August 1994, pp.@: 899-908.\n@end itemize\n\n\n@node GNU Free Documentation License, Concept Index, References, Top\n@appendix GNU Free Documentation License\n@cindex GNU Free Documentation License\n@cindex Free Documentation License\n@cindex Documentation license\n@include fdl.texi\n\n\n@node Concept Index, Function Index, GNU Free Documentation License, Top\n@comment  node-name,  next,  previous,  up\n@unnumbered Concept Index\n@printindex cp\n\n@node Function Index,  , Concept Index, Top\n@comment  node-name,  next,  previous,  up\n@unnumbered Function and Type Index\n@printindex fn\n\n@bye\n\n@c Local variables:\n@c fill-column: 78\n@c compile-command: \"make mpir.info\"\n@c End:\n\n"
  },
  {
    "path": "doc/texinfo.tex",
    "content": "% texinfo.tex -- TeX macros to handle Texinfo files.\n% \n% Load plain if necessary, i.e., if running under initex.\n\\expandafter\\ifx\\csname fmtname\\endcsname\\relax\\input plain\\fi\n%\n\\def\\texinfoversion{2013-02-01.11}\n%\n% Copyright 1985, 1986, 1988, 1990, 1991, 1992, 1993, 1994, 1995,\n% 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,\n% 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.\n%\n% This texinfo.tex file is free software: you can redistribute it and/or\n% modify it under the terms of the GNU General Public License as\n% published by the Free Software Foundation, either version 3 of the\n% License, or (at your option) any later version.\n%\n% This texinfo.tex file is distributed in the hope that it will be\n% useful, but WITHOUT ANY WARRANTY; without even the implied warranty\n% of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n% General Public License for more details.\n%\n% You should have received a copy of the GNU General Public License\n% along with this program.  If not, see <http://www.gnu.org/licenses/>.\n%\n% As a special exception, when this file is read by TeX when processing\n% a Texinfo source document, you may use the result without\n% restriction. This Exception is an additional permission under section 7\n% of the GNU General Public License, version 3 (\"GPLv3\").\n%\n% Please try the latest version of texinfo.tex before submitting bug\n% reports; you can get the latest version from:\n%   http://ftp.gnu.org/gnu/texinfo/ (the Texinfo release area), or\n%   http://ftpmirror.gnu.org/texinfo/ (same, via a mirror), or\n%   http://www.gnu.org/software/texinfo/ (the Texinfo home page)\n% The texinfo.tex in any given distribution could well be out\n% of date, so if that's what you're using, please check.\n%\n% Send bug reports to bug-texinfo@gnu.org.  Please include including a\n% complete document in each bug report with which we can reproduce the\n% problem.  Patches are, of course, greatly appreciated.\n%\n% To process a Texinfo manual with TeX, it's most reliable to use the\n% texi2dvi shell script that comes with the distribution.  For a simple\n% manual foo.texi, however, you can get away with this:\n%   tex foo.texi\n%   texindex foo.??\n%   tex foo.texi\n%   tex foo.texi\n%   dvips foo.dvi -o  # or whatever; this makes foo.ps.\n% The extra TeX runs get the cross-reference information correct.\n% Sometimes one run after texindex suffices, and sometimes you need more\n% than two; texi2dvi does it as many times as necessary.\n%\n% It is possible to adapt texinfo.tex for other languages, to some\n% extent.  You can get the existing language-specific files from the\n% full Texinfo distribution.\n%\n% The GNU Texinfo home page is http://www.gnu.org/software/texinfo.\n\n\n\\message{Loading texinfo [version \\texinfoversion]:}\n\n% If in a .fmt file, print the version number\n% and turn on active characters that we couldn't do earlier because\n% they might have appeared in the input file name.\n\\everyjob{\\message{[Texinfo version \\texinfoversion]}%\n  \\catcode`+=\\active \\catcode`\\_=\\active}\n\n\\chardef\\other=12\n\n% We never want plain's \\outer definition of \\+ in Texinfo.\n% For @tex, we can use \\tabalign.\n\\let\\+ = \\relax\n\n% Save some plain tex macros whose names we will redefine.\n\\let\\ptexb=\\b\n\\let\\ptexbullet=\\bullet\n\\let\\ptexc=\\c\n\\let\\ptexcomma=\\,\n\\let\\ptexdot=\\.\n\\let\\ptexdots=\\dots\n\\let\\ptexend=\\end\n\\let\\ptexequiv=\\equiv\n\\let\\ptexexclam=\\!\n\\let\\ptexfootnote=\\footnote\n\\let\\ptexgtr=>\n\\let\\ptexhat=^\n\\let\\ptexi=\\i\n\\let\\ptexindent=\\indent\n\\let\\ptexinsert=\\insert\n\\let\\ptexlbrace=\\{\n\\let\\ptexless=<\n\\let\\ptexnewwrite\\newwrite\n\\let\\ptexnoindent=\\noindent\n\\let\\ptexplus=+\n\\let\\ptexraggedright=\\raggedright\n\\let\\ptexrbrace=\\}\n\\let\\ptexslash=\\/\n\\let\\ptexstar=\\*\n\\let\\ptext=\\t\n\\let\\ptextop=\\top\n{\\catcode`\\'=\\active \\global\\let\\ptexquoteright'}% active in plain's math mode\n\n% If this character appears in an error message or help string, it\n% starts a new line in the output.\n\\newlinechar = `^^J\n\n% Use TeX 3.0's \\inputlineno to get the line number, for better error\n% messages, but if we're using an old version of TeX, don't do anything.\n%\n\\ifx\\inputlineno\\thisisundefined\n  \\let\\linenumber = \\empty % Pre-3.0.\n\\else\n  \\def\\linenumber{l.\\the\\inputlineno:\\space}\n\\fi\n\n% Set up fixed words for English if not already set.\n\\ifx\\putwordAppendix\\undefined  \\gdef\\putwordAppendix{Appendix}\\fi\n\\ifx\\putwordChapter\\undefined   \\gdef\\putwordChapter{Chapter}\\fi\n\\ifx\\putworderror\\undefined     \\gdef\\putworderror{error}\\fi\n\\ifx\\putwordfile\\undefined      \\gdef\\putwordfile{file}\\fi\n\\ifx\\putwordin\\undefined        \\gdef\\putwordin{in}\\fi\n\\ifx\\putwordIndexIsEmpty\\undefined       \\gdef\\putwordIndexIsEmpty{(Index is empty)}\\fi\n\\ifx\\putwordIndexNonexistent\\undefined   \\gdef\\putwordIndexNonexistent{(Index is nonexistent)}\\fi\n\\ifx\\putwordInfo\\undefined      \\gdef\\putwordInfo{Info}\\fi\n\\ifx\\putwordInstanceVariableof\\undefined \\gdef\\putwordInstanceVariableof{Instance Variable of}\\fi\n\\ifx\\putwordMethodon\\undefined  \\gdef\\putwordMethodon{Method on}\\fi\n\\ifx\\putwordNoTitle\\undefined   \\gdef\\putwordNoTitle{No Title}\\fi\n\\ifx\\putwordof\\undefined        \\gdef\\putwordof{of}\\fi\n\\ifx\\putwordon\\undefined        \\gdef\\putwordon{on}\\fi\n\\ifx\\putwordpage\\undefined      \\gdef\\putwordpage{page}\\fi\n\\ifx\\putwordsection\\undefined   \\gdef\\putwordsection{section}\\fi\n\\ifx\\putwordSection\\undefined   \\gdef\\putwordSection{Section}\\fi\n\\ifx\\putwordsee\\undefined       \\gdef\\putwordsee{see}\\fi\n\\ifx\\putwordSee\\undefined       \\gdef\\putwordSee{See}\\fi\n\\ifx\\putwordShortTOC\\undefined  \\gdef\\putwordShortTOC{Short Contents}\\fi\n\\ifx\\putwordTOC\\undefined       \\gdef\\putwordTOC{Table of Contents}\\fi\n%\n\\ifx\\putwordMJan\\undefined \\gdef\\putwordMJan{January}\\fi\n\\ifx\\putwordMFeb\\undefined \\gdef\\putwordMFeb{February}\\fi\n\\ifx\\putwordMMar\\undefined \\gdef\\putwordMMar{March}\\fi\n\\ifx\\putwordMApr\\undefined \\gdef\\putwordMApr{April}\\fi\n\\ifx\\putwordMMay\\undefined \\gdef\\putwordMMay{May}\\fi\n\\ifx\\putwordMJun\\undefined \\gdef\\putwordMJun{June}\\fi\n\\ifx\\putwordMJul\\undefined \\gdef\\putwordMJul{July}\\fi\n\\ifx\\putwordMAug\\undefined \\gdef\\putwordMAug{August}\\fi\n\\ifx\\putwordMSep\\undefined \\gdef\\putwordMSep{September}\\fi\n\\ifx\\putwordMOct\\undefined \\gdef\\putwordMOct{October}\\fi\n\\ifx\\putwordMNov\\undefined \\gdef\\putwordMNov{November}\\fi\n\\ifx\\putwordMDec\\undefined \\gdef\\putwordMDec{December}\\fi\n%\n\\ifx\\putwordDefmac\\undefined    \\gdef\\putwordDefmac{Macro}\\fi\n\\ifx\\putwordDefspec\\undefined   \\gdef\\putwordDefspec{Special Form}\\fi\n\\ifx\\putwordDefvar\\undefined    \\gdef\\putwordDefvar{Variable}\\fi\n\\ifx\\putwordDefopt\\undefined    \\gdef\\putwordDefopt{User Option}\\fi\n\\ifx\\putwordDeffunc\\undefined   \\gdef\\putwordDeffunc{Function}\\fi\n\n% Since the category of space is not known, we have to be careful.\n\\chardef\\spacecat = 10\n\\def\\spaceisspace{\\catcode`\\ =\\spacecat}\n\n% sometimes characters are active, so we need control sequences.\n\\chardef\\ampChar   = `\\&\n\\chardef\\colonChar = `\\:\n\\chardef\\commaChar = `\\,\n\\chardef\\dashChar  = `\\-\n\\chardef\\dotChar   = `\\.\n\\chardef\\exclamChar= `\\!\n\\chardef\\hashChar  = `\\#\n\\chardef\\lquoteChar= `\\`\n\\chardef\\questChar = `\\?\n\\chardef\\rquoteChar= `\\'\n\\chardef\\semiChar  = `\\;\n\\chardef\\slashChar = `\\/\n\\chardef\\underChar = `\\_\n\n% Ignore a token.\n%\n\\def\\gobble#1{}\n\n% The following is used inside several \\edef's.\n\\def\\makecsname#1{\\expandafter\\noexpand\\csname#1\\endcsname}\n\n% Hyphenation fixes.\n\\hyphenation{\n  Flor-i-da Ghost-script Ghost-view Mac-OS Post-Script\n  ap-pen-dix bit-map bit-maps\n  data-base data-bases eshell fall-ing half-way long-est man-u-script\n  man-u-scripts mini-buf-fer mini-buf-fers over-view par-a-digm\n  par-a-digms rath-er rec-tan-gu-lar ro-bot-ics se-vere-ly set-up spa-ces\n  spell-ing spell-ings\n  stand-alone strong-est time-stamp time-stamps which-ever white-space\n  wide-spread wrap-around\n}\n\n% Margin to add to right of even pages, to left of odd pages.\n\\newdimen\\bindingoffset\n\\newdimen\\normaloffset\n\\newdimen\\pagewidth \\newdimen\\pageheight\n\n% For a final copy, take out the rectangles\n% that mark overfull boxes (in case you have decided\n% that the text looks ok even though it passes the margin).\n%\n\\def\\finalout{\\overfullrule=0pt }\n\n% Sometimes it is convenient to have everything in the transcript file\n% and nothing on the terminal.  We don't just call \\tracingall here,\n% since that produces some useless output on the terminal.  We also make\n% some effort to order the tracing commands to reduce output in the log\n% file; cf. trace.sty in LaTeX.\n%\n\\def\\gloggingall{\\begingroup \\globaldefs = 1 \\loggingall \\endgroup}%\n\\def\\loggingall{%\n  \\tracingstats2\n  \\tracingpages1\n  \\tracinglostchars2  % 2 gives us more in etex\n  \\tracingparagraphs1\n  \\tracingoutput1\n  \\tracingmacros2\n  \\tracingrestores1\n  \\showboxbreadth\\maxdimen \\showboxdepth\\maxdimen\n  \\ifx\\eTeXversion\\thisisundefined\\else % etex gives us more logging\n    \\tracingscantokens1\n    \\tracingifs1\n    \\tracinggroups1\n    \\tracingnesting2\n    \\tracingassigns1\n  \\fi\n  \\tracingcommands3  % 3 gives us more in etex\n  \\errorcontextlines16\n}%\n\n% @errormsg{MSG}.  Do the index-like expansions on MSG, but if things\n% aren't perfect, it's not the end of the world, being an error message,\n% after all.\n% \n\\def\\errormsg{\\begingroup \\indexnofonts \\doerrormsg}\n\\def\\doerrormsg#1{\\errmessage{#1}}\n\n% add check for \\lastpenalty to plain's definitions.  If the last thing\n% we did was a \\nobreak, we don't want to insert more space.\n%\n\\def\\smallbreak{\\ifnum\\lastpenalty<10000\\par\\ifdim\\lastskip<\\smallskipamount\n  \\removelastskip\\penalty-50\\smallskip\\fi\\fi}\n\\def\\medbreak{\\ifnum\\lastpenalty<10000\\par\\ifdim\\lastskip<\\medskipamount\n  \\removelastskip\\penalty-100\\medskip\\fi\\fi}\n\\def\\bigbreak{\\ifnum\\lastpenalty<10000\\par\\ifdim\\lastskip<\\bigskipamount\n  \\removelastskip\\penalty-200\\bigskip\\fi\\fi}\n\n% Do @cropmarks to get crop marks.\n%\n\\newif\\ifcropmarks\n\\let\\cropmarks = \\cropmarkstrue\n%\n% Dimensions to add cropmarks at corners.\n% Added by P. A. MacKay, 12 Nov. 1986\n%\n\\newdimen\\outerhsize \\newdimen\\outervsize % set by the paper size routines\n\\newdimen\\cornerlong  \\cornerlong=1pc\n\\newdimen\\cornerthick \\cornerthick=.3pt\n\\newdimen\\topandbottommargin \\topandbottommargin=.75in\n\n% Output a mark which sets \\thischapter, \\thissection and \\thiscolor.\n% We dump everything together because we only have one kind of mark.\n% This works because we only use \\botmark / \\topmark, not \\firstmark.\n%\n% A mark contains a subexpression of the \\ifcase ... \\fi construct.\n% \\get*marks macros below extract the needed part using \\ifcase.\n%\n% Another complication is to let the user choose whether \\thischapter\n% (\\thissection) refers to the chapter (section) in effect at the top\n% of a page, or that at the bottom of a page.  The solution is\n% described on page 260 of The TeXbook.  It involves outputting two\n% marks for the sectioning macros, one before the section break, and\n% one after.  I won't pretend I can describe this better than DEK...\n\\def\\domark{%\n  \\toks0=\\expandafter{\\lastchapterdefs}%\n  \\toks2=\\expandafter{\\lastsectiondefs}%\n  \\toks4=\\expandafter{\\prevchapterdefs}%\n  \\toks6=\\expandafter{\\prevsectiondefs}%\n  \\toks8=\\expandafter{\\lastcolordefs}%\n  \\mark{%\n                   \\the\\toks0 \\the\\toks2\n      \\noexpand\\or \\the\\toks4 \\the\\toks6\n    \\noexpand\\else \\the\\toks8\n  }%\n}\n% \\topmark doesn't work for the very first chapter (after the title\n% page or the contents), so we use \\firstmark there -- this gets us\n% the mark with the chapter defs, unless the user sneaks in, e.g.,\n% @setcolor (or @url, or @link, etc.) between @contents and the very\n% first @chapter.\n\\def\\gettopheadingmarks{%\n  \\ifcase0\\topmark\\fi\n  \\ifx\\thischapter\\empty \\ifcase0\\firstmark\\fi \\fi\n}\n\\def\\getbottomheadingmarks{\\ifcase1\\botmark\\fi}\n\\def\\getcolormarks{\\ifcase2\\topmark\\fi}\n\n% Avoid \"undefined control sequence\" errors.\n\\def\\lastchapterdefs{}\n\\def\\lastsectiondefs{}\n\\def\\prevchapterdefs{}\n\\def\\prevsectiondefs{}\n\\def\\lastcolordefs{}\n\n% Main output routine.\n\\chardef\\PAGE = 255\n\\output = {\\onepageout{\\pagecontents\\PAGE}}\n\n\\newbox\\headlinebox\n\\newbox\\footlinebox\n\n% \\onepageout takes a vbox as an argument.  Note that \\pagecontents\n% does insertions, but you have to call it yourself.\n\\def\\onepageout#1{%\n  \\ifcropmarks \\hoffset=0pt \\else \\hoffset=\\normaloffset \\fi\n  %\n  \\ifodd\\pageno  \\advance\\hoffset by \\bindingoffset\n  \\else \\advance\\hoffset by -\\bindingoffset\\fi\n  %\n  % Do this outside of the \\shipout so @code etc. will be expanded in\n  % the headline as they should be, not taken literally (outputting ''code).\n  \\ifodd\\pageno \\getoddheadingmarks \\else \\getevenheadingmarks \\fi\n  \\setbox\\headlinebox = \\vbox{\\let\\hsize=\\pagewidth \\makeheadline}%\n  \\ifodd\\pageno \\getoddfootingmarks \\else \\getevenfootingmarks \\fi\n  \\setbox\\footlinebox = \\vbox{\\let\\hsize=\\pagewidth \\makefootline}%\n  %\n  {%\n    % Have to do this stuff outside the \\shipout because we want it to\n    % take effect in \\write's, yet the group defined by the \\vbox ends\n    % before the \\shipout runs.\n    %\n    \\indexdummies         % don't expand commands in the output.\n    \\normalturnoffactive  % \\ in index entries must not stay \\, e.g., if\n               % the page break happens to be in the middle of an example.\n               % We don't want .vr (or whatever) entries like this:\n               % \\entry{{\\tt \\indexbackslash }acronym}{32}{\\code {\\acronym}}\n               % \"\\acronym\" won't work when it's read back in;\n               % it needs to be\n               % {\\code {{\\tt \\backslashcurfont }acronym}\n    \\shipout\\vbox{%\n      % Do this early so pdf references go to the beginning of the page.\n      \\ifpdfmakepagedest \\pdfdest name{\\the\\pageno} xyz\\fi\n      %\n      \\ifcropmarks \\vbox to \\outervsize\\bgroup\n        \\hsize = \\outerhsize\n        \\vskip-\\topandbottommargin\n        \\vtop to0pt{%\n          \\line{\\ewtop\\hfil\\ewtop}%\n          \\nointerlineskip\n          \\line{%\n            \\vbox{\\moveleft\\cornerthick\\nstop}%\n            \\hfill\n            \\vbox{\\moveright\\cornerthick\\nstop}%\n          }%\n          \\vss}%\n        \\vskip\\topandbottommargin\n        \\line\\bgroup\n          \\hfil % center the page within the outer (page) hsize.\n          \\ifodd\\pageno\\hskip\\bindingoffset\\fi\n          \\vbox\\bgroup\n      \\fi\n      %\n      \\unvbox\\headlinebox\n      \\pagebody{#1}%\n      \\ifdim\\ht\\footlinebox > 0pt\n        % Only leave this space if the footline is nonempty.\n        % (We lessened \\vsize for it in \\oddfootingyyy.)\n        % The \\baselineskip=24pt in plain's \\makefootline has no effect.\n        \\vskip 24pt\n        \\unvbox\\footlinebox\n      \\fi\n      %\n      \\ifcropmarks\n          \\egroup % end of \\vbox\\bgroup\n        \\hfil\\egroup % end of (centering) \\line\\bgroup\n        \\vskip\\topandbottommargin plus1fill minus1fill\n        \\boxmaxdepth = \\cornerthick\n        \\vbox to0pt{\\vss\n          \\line{%\n            \\vbox{\\moveleft\\cornerthick\\nsbot}%\n            \\hfill\n            \\vbox{\\moveright\\cornerthick\\nsbot}%\n          }%\n          \\nointerlineskip\n          \\line{\\ewbot\\hfil\\ewbot}%\n        }%\n      \\egroup % \\vbox from first cropmarks clause\n      \\fi\n    }% end of \\shipout\\vbox\n  }% end of group with \\indexdummies\n  \\advancepageno\n  \\ifnum\\outputpenalty>-20000 \\else\\dosupereject\\fi\n}\n\n\\newinsert\\margin \\dimen\\margin=\\maxdimen\n\n\\def\\pagebody#1{\\vbox to\\pageheight{\\boxmaxdepth=\\maxdepth #1}}\n{\\catcode`\\@ =11\n\\gdef\\pagecontents#1{\\ifvoid\\topins\\else\\unvbox\\topins\\fi\n% marginal hacks, juha@viisa.uucp (Juha Takala)\n\\ifvoid\\margin\\else % marginal info is present\n  \\rlap{\\kern\\hsize\\vbox to\\z@{\\kern1pt\\box\\margin \\vss}}\\fi\n\\dimen@=\\dp#1\\relax \\unvbox#1\\relax\n\\ifvoid\\footins\\else\\vskip\\skip\\footins\\footnoterule \\unvbox\\footins\\fi\n\\ifr@ggedbottom \\kern-\\dimen@ \\vfil \\fi}\n}\n\n% Here are the rules for the cropmarks.  Note that they are\n% offset so that the space between them is truly \\outerhsize or \\outervsize\n% (P. A. MacKay, 12 November, 1986)\n%\n\\def\\ewtop{\\vrule height\\cornerthick depth0pt width\\cornerlong}\n\\def\\nstop{\\vbox\n  {\\hrule height\\cornerthick depth\\cornerlong width\\cornerthick}}\n\\def\\ewbot{\\vrule height0pt depth\\cornerthick width\\cornerlong}\n\\def\\nsbot{\\vbox\n  {\\hrule height\\cornerlong depth\\cornerthick width\\cornerthick}}\n\n% Parse an argument, then pass it to #1.  The argument is the rest of\n% the input line (except we remove a trailing comment).  #1 should be a\n% macro which expects an ordinary undelimited TeX argument.\n%\n\\def\\parsearg{\\parseargusing{}}\n\\def\\parseargusing#1#2{%\n  \\def\\argtorun{#2}%\n  \\begingroup\n    \\obeylines\n    \\spaceisspace\n    #1%\n    \\parseargline\\empty% Insert the \\empty token, see \\finishparsearg below.\n}\n\n{\\obeylines %\n  \\gdef\\parseargline#1^^M{%\n    \\endgroup % End of the group started in \\parsearg.\n    \\argremovecomment #1\\comment\\ArgTerm%\n  }%\n}\n\n% First remove any @comment, then any @c comment.\n\\def\\argremovecomment#1\\comment#2\\ArgTerm{\\argremovec #1\\c\\ArgTerm}\n\\def\\argremovec#1\\c#2\\ArgTerm{\\argcheckspaces#1\\^^M\\ArgTerm}\n\n% Each occurrence of `\\^^M' or `<space>\\^^M' is replaced by a single space.\n%\n% \\argremovec might leave us with trailing space, e.g.,\n%    @end itemize  @c foo\n% This space token undergoes the same procedure and is eventually removed\n% by \\finishparsearg.\n%\n\\def\\argcheckspaces#1\\^^M{\\argcheckspacesX#1\\^^M \\^^M}\n\\def\\argcheckspacesX#1 \\^^M{\\argcheckspacesY#1\\^^M}\n\\def\\argcheckspacesY#1\\^^M#2\\^^M#3\\ArgTerm{%\n  \\def\\temp{#3}%\n  \\ifx\\temp\\empty\n    % Do not use \\next, perhaps the caller of \\parsearg uses it; reuse \\temp:\n    \\let\\temp\\finishparsearg\n  \\else\n    \\let\\temp\\argcheckspaces\n  \\fi\n  % Put the space token in:\n  \\temp#1 #3\\ArgTerm\n}\n\n% If a _delimited_ argument is enclosed in braces, they get stripped; so\n% to get _exactly_ the rest of the line, we had to prevent such situation.\n% We prepended an \\empty token at the very beginning and we expand it now,\n% just before passing the control to \\argtorun.\n% (Similarly, we have to think about #3 of \\argcheckspacesY above: it is\n% either the null string, or it ends with \\^^M---thus there is no danger\n% that a pair of braces would be stripped.\n%\n% But first, we have to remove the trailing space token.\n%\n\\def\\finishparsearg#1 \\ArgTerm{\\expandafter\\argtorun\\expandafter{#1}}\n\n% \\parseargdef\\foo{...}\n%\tis roughly equivalent to\n% \\def\\foo{\\parsearg\\Xfoo}\n% \\def\\Xfoo#1{...}\n%\n% Actually, I use \\csname\\string\\foo\\endcsname, ie. \\\\foo, as it is my\n% favourite TeX trick.  --kasal, 16nov03\n\n\\def\\parseargdef#1{%\n  \\expandafter \\doparseargdef \\csname\\string#1\\endcsname #1%\n}\n\\def\\doparseargdef#1#2{%\n  \\def#2{\\parsearg#1}%\n  \\def#1##1%\n}\n\n% Several utility definitions with active space:\n{\n  \\obeyspaces\n  \\gdef\\obeyedspace{ }\n\n  % Make each space character in the input produce a normal interword\n  % space in the output.  Don't allow a line break at this space, as this\n  % is used only in environments like @example, where each line of input\n  % should produce a line of output anyway.\n  %\n  \\gdef\\sepspaces{\\obeyspaces\\let =\\tie}\n\n  % If an index command is used in an @example environment, any spaces\n  % therein should become regular spaces in the raw index file, not the\n  % expansion of \\tie (\\leavevmode \\penalty \\@M \\ ).\n  \\gdef\\unsepspaces{\\let =\\space}\n}\n\n\n\\def\\flushcr{\\ifx\\par\\lisppar \\def\\next##1{}\\else \\let\\next=\\relax \\fi \\next}\n\n% Define the framework for environments in texinfo.tex.  It's used like this:\n%\n%   \\envdef\\foo{...}\n%   \\def\\Efoo{...}\n%\n% It's the responsibility of \\envdef to insert \\begingroup before the\n% actual body; @end closes the group after calling \\Efoo.  \\envdef also\n% defines \\thisenv, so the current environment is known; @end checks\n% whether the environment name matches.  The \\checkenv macro can also be\n% used to check whether the current environment is the one expected.\n%\n% Non-false conditionals (@iftex, @ifset) don't fit into this, so they\n% are not treated as environments; they don't open a group.  (The\n% implementation of @end takes care not to call \\endgroup in this\n% special case.)\n\n\n% At run-time, environments start with this:\n\\def\\startenvironment#1{\\begingroup\\def\\thisenv{#1}}\n% initialize\n\\let\\thisenv\\empty\n\n% ... but they get defined via ``\\envdef\\foo{...}'':\n\\long\\def\\envdef#1#2{\\def#1{\\startenvironment#1#2}}\n\\def\\envparseargdef#1#2{\\parseargdef#1{\\startenvironment#1#2}}\n\n% Check whether we're in the right environment:\n\\def\\checkenv#1{%\n  \\def\\temp{#1}%\n  \\ifx\\thisenv\\temp\n  \\else\n    \\badenverr\n  \\fi\n}\n\n% Environment mismatch, #1 expected:\n\\def\\badenverr{%\n  \\errhelp = \\EMsimple\n  \\errmessage{This command can appear only \\inenvironment\\temp,\n    not \\inenvironment\\thisenv}%\n}\n\\def\\inenvironment#1{%\n  \\ifx#1\\empty\n    outside of any environment%\n  \\else\n    in environment \\expandafter\\string#1%\n  \\fi\n}\n\n% @end foo executes the definition of \\Efoo.\n% But first, it executes a specialized version of \\checkenv\n%\n\\parseargdef\\end{%\n  \\if 1\\csname iscond.#1\\endcsname\n  \\else\n    % The general wording of \\badenverr may not be ideal.\n    \\expandafter\\checkenv\\csname#1\\endcsname\n    \\csname E#1\\endcsname\n    \\endgroup\n  \\fi\n}\n\n\\newhelp\\EMsimple{Press RETURN to continue.}\n\n\n% Be sure we're in horizontal mode when doing a tie, since we make space\n% equivalent to this in @example-like environments. Otherwise, a space\n% at the beginning of a line will start with \\penalty -- and\n% since \\penalty is valid in vertical mode, we'd end up putting the\n% penalty on the vertical list instead of in the new paragraph.\n{\\catcode`@ = 11\n % Avoid using \\@M directly, because that causes trouble\n % if the definition is written into an index file.\n \\global\\let\\tiepenalty = \\@M\n \\gdef\\tie{\\leavevmode\\penalty\\tiepenalty\\ }\n}\n\n% @: forces normal size whitespace following.\n\\def\\:{\\spacefactor=1000 }\n\n% @* forces a line break.\n\\def\\*{\\unskip\\hfil\\break\\hbox{}\\ignorespaces}\n\n% @/ allows a line break.\n\\let\\/=\\allowbreak\n\n% @. is an end-of-sentence period.\n\\def\\.{.\\spacefactor=\\endofsentencespacefactor\\space}\n\n% @! is an end-of-sentence bang.\n\\def\\!{!\\spacefactor=\\endofsentencespacefactor\\space}\n\n% @? is an end-of-sentence query.\n\\def\\?{?\\spacefactor=\\endofsentencespacefactor\\space}\n\n% @frenchspacing on|off  says whether to put extra space after punctuation.\n%\n\\def\\onword{on}\n\\def\\offword{off}\n%\n\\parseargdef\\frenchspacing{%\n  \\def\\temp{#1}%\n  \\ifx\\temp\\onword \\plainfrenchspacing\n  \\else\\ifx\\temp\\offword \\plainnonfrenchspacing\n  \\else\n    \\errhelp = \\EMsimple\n    \\errmessage{Unknown @frenchspacing option `\\temp', must be on|off}%\n  \\fi\\fi\n}\n\n% @w prevents a word break.  Without the \\leavevmode, @w at the\n% beginning of a paragraph, when TeX is still in vertical mode, would\n% produce a whole line of output instead of starting the paragraph.\n\\def\\w#1{\\leavevmode\\hbox{#1}}\n\n% @group ... @end group forces ... to be all on one page, by enclosing\n% it in a TeX vbox.  We use \\vtop instead of \\vbox to construct the box\n% to keep its height that of a normal line.  According to the rules for\n% \\topskip (p.114 of the TeXbook), the glue inserted is\n% max (\\topskip - \\ht (first item), 0).  If that height is large,\n% therefore, no glue is inserted, and the space between the headline and\n% the text is small, which looks bad.\n%\n% Another complication is that the group might be very large.  This can\n% cause the glue on the previous page to be unduly stretched, because it\n% does not have much material.  In this case, it's better to add an\n% explicit \\vfill so that the extra space is at the bottom.  The\n% threshold for doing this is if the group is more than \\vfilllimit\n% percent of a page (\\vfilllimit can be changed inside of @tex).\n%\n\\newbox\\groupbox\n\\def\\vfilllimit{0.7}\n%\n\\envdef\\group{%\n  \\ifnum\\catcode`\\^^M=\\active \\else\n    \\errhelp = \\groupinvalidhelp\n    \\errmessage{@group invalid in context where filling is enabled}%\n  \\fi\n  \\startsavinginserts\n  %\n  \\setbox\\groupbox = \\vtop\\bgroup\n    % Do @comment since we are called inside an environment such as\n    % @example, where each end-of-line in the input causes an\n    % end-of-line in the output.  We don't want the end-of-line after\n    % the `@group' to put extra space in the output.  Since @group\n    % should appear on a line by itself (according to the Texinfo\n    % manual), we don't worry about eating any user text.\n    \\comment\n}\n%\n% The \\vtop produces a box with normal height and large depth; thus, TeX puts\n% \\baselineskip glue before it, and (when the next line of text is done)\n% \\lineskip glue after it.  Thus, space below is not quite equal to space\n% above.  But it's pretty close.\n\\def\\Egroup{%\n    % To get correct interline space between the last line of the group\n    % and the first line afterwards, we have to propagate \\prevdepth.\n    \\endgraf % Not \\par, as it may have been set to \\lisppar.\n    \\global\\dimen1 = \\prevdepth\n  \\egroup           % End the \\vtop.\n  % \\dimen0 is the vertical size of the group's box.\n  \\dimen0 = \\ht\\groupbox  \\advance\\dimen0 by \\dp\\groupbox\n  % \\dimen2 is how much space is left on the page (more or less).\n  \\dimen2 = \\pageheight   \\advance\\dimen2 by -\\pagetotal\n  % if the group doesn't fit on the current page, and it's a big big\n  % group, force a page break.\n  \\ifdim \\dimen0 > \\dimen2\n    \\ifdim \\pagetotal < \\vfilllimit\\pageheight\n      \\page\n    \\fi\n  \\fi\n  \\box\\groupbox\n  \\prevdepth = \\dimen1\n  \\checkinserts\n}\n%\n% TeX puts in an \\escapechar (i.e., `@') at the beginning of the help\n% message, so this ends up printing `@group can only ...'.\n%\n\\newhelp\\groupinvalidhelp{%\ngroup can only be used in environments such as @example,^^J%\nwhere each line of input produces a line of output.}\n\n% @need space-in-mils\n% forces a page break if there is not space-in-mils remaining.\n\n\\newdimen\\mil  \\mil=0.001in\n\n\\parseargdef\\need{%\n  % Ensure vertical mode, so we don't make a big box in the middle of a\n  % paragraph.\n  \\par\n  %\n  % If the @need value is less than one line space, it's useless.\n  \\dimen0 = #1\\mil\n  \\dimen2 = \\ht\\strutbox\n  \\advance\\dimen2 by \\dp\\strutbox\n  \\ifdim\\dimen0 > \\dimen2\n    %\n    % Do a \\strut just to make the height of this box be normal, so the\n    % normal leading is inserted relative to the preceding line.\n    % And a page break here is fine.\n    \\vtop to #1\\mil{\\strut\\vfil}%\n    %\n    % TeX does not even consider page breaks if a penalty added to the\n    % main vertical list is 10000 or more.  But in order to see if the\n    % empty box we just added fits on the page, we must make it consider\n    % page breaks.  On the other hand, we don't want to actually break the\n    % page after the empty box.  So we use a penalty of 9999.\n    %\n    % There is an extremely small chance that TeX will actually break the\n    % page at this \\penalty, if there are no other feasible breakpoints in\n    % sight.  (If the user is using lots of big @group commands, which\n    % almost-but-not-quite fill up a page, TeX will have a hard time doing\n    % good page breaking, for example.)  However, I could not construct an\n    % example where a page broke at this \\penalty; if it happens in a real\n    % document, then we can reconsider our strategy.\n    \\penalty9999\n    %\n    % Back up by the size of the box, whether we did a page break or not.\n    \\kern -#1\\mil\n    %\n    % Do not allow a page break right after this kern.\n    \\nobreak\n  \\fi\n}\n\n% @br   forces paragraph break (and is undocumented).\n\n\\let\\br = \\par\n\n% @page forces the start of a new page.\n%\n\\def\\page{\\par\\vfill\\supereject}\n\n% @exdent text....\n% outputs text on separate line in roman font, starting at standard page margin\n\n% This records the amount of indent in the innermost environment.\n% That's how much \\exdent should take out.\n\\newskip\\exdentamount\n\n% This defn is used inside fill environments such as @defun.\n\\parseargdef\\exdent{\\hfil\\break\\hbox{\\kern -\\exdentamount{\\rm#1}}\\hfil\\break}\n\n% This defn is used inside nofill environments such as @example.\n\\parseargdef\\nofillexdent{{\\advance \\leftskip by -\\exdentamount\n  \\leftline{\\hskip\\leftskip{\\rm#1}}}}\n\n% @inmargin{WHICH}{TEXT} puts TEXT in the WHICH margin next to the current\n% paragraph.  For more general purposes, use the \\margin insertion\n% class.  WHICH is `l' or `r'.  Not documented, written for gawk manual.\n%\n\\newskip\\inmarginspacing \\inmarginspacing=1cm\n\\def\\strutdepth{\\dp\\strutbox}\n%\n\\def\\doinmargin#1#2{\\strut\\vadjust{%\n  \\nobreak\n  \\kern-\\strutdepth\n  \\vtop to \\strutdepth{%\n    \\baselineskip=\\strutdepth\n    \\vss\n    % if you have multiple lines of stuff to put here, you'll need to\n    % make the vbox yourself of the appropriate size.\n    \\ifx#1l%\n      \\llap{\\ignorespaces #2\\hskip\\inmarginspacing}%\n    \\else\n      \\rlap{\\hskip\\hsize \\hskip\\inmarginspacing \\ignorespaces #2}%\n    \\fi\n    \\null\n  }%\n}}\n\\def\\inleftmargin{\\doinmargin l}\n\\def\\inrightmargin{\\doinmargin r}\n%\n% @inmargin{TEXT [, RIGHT-TEXT]}\n% (if RIGHT-TEXT is given, use TEXT for left page, RIGHT-TEXT for right;\n% else use TEXT for both).\n%\n\\def\\inmargin#1{\\parseinmargin #1,,\\finish}\n\\def\\parseinmargin#1,#2,#3\\finish{% not perfect, but better than nothing.\n  \\setbox0 = \\hbox{\\ignorespaces #2}%\n  \\ifdim\\wd0 > 0pt\n    \\def\\lefttext{#1}%  have both texts\n    \\def\\righttext{#2}%\n  \\else\n    \\def\\lefttext{#1}%  have only one text\n    \\def\\righttext{#1}%\n  \\fi\n  %\n  \\ifodd\\pageno\n    \\def\\temp{\\inrightmargin\\righttext}% odd page -> outside is right margin\n  \\else\n    \\def\\temp{\\inleftmargin\\lefttext}%\n  \\fi\n  \\temp\n}\n\n% @| inserts a changebar to the left of the current line.  It should\n% surround any changed text.  This approach does *not* work if the\n% change spans more than two lines of output.  To handle that, we would\n% have adopt a much more difficult approach (putting marks into the main\n% vertical list for the beginning and end of each change).  This command\n% is not documented, not supported, and doesn't work.\n%\n\\def\\|{%\n  % \\vadjust can only be used in horizontal mode.\n  \\leavevmode\n  %\n  % Append this vertical mode material after the current line in the output.\n  \\vadjust{%\n    % We want to insert a rule with the height and depth of the current\n    % leading; that is exactly what \\strutbox is supposed to record.\n    \\vskip-\\baselineskip\n    %\n    % \\vadjust-items are inserted at the left edge of the type.  So\n    % the \\llap here moves out into the left-hand margin.\n    \\llap{%\n      %\n      % For a thicker or thinner bar, change the `1pt'.\n      \\vrule height\\baselineskip width1pt\n      %\n      % This is the space between the bar and the text.\n      \\hskip 12pt\n    }%\n  }%\n}\n\n% @include FILE -- \\input text of FILE.\n%\n\\def\\include{\\parseargusing\\filenamecatcodes\\includezzz}\n\\def\\includezzz#1{%\n  \\pushthisfilestack\n  \\def\\thisfile{#1}%\n  {%\n    \\makevalueexpandable  % we want to expand any @value in FILE.\n    \\turnoffactive        % and allow special characters in the expansion\n    \\indexnofonts         % Allow `@@' and other weird things in file names.\n    \\wlog{texinfo.tex: doing @include of #1^^J}%\n    \\edef\\temp{\\noexpand\\input #1 }%\n    %\n    % This trickery is to read FILE outside of a group, in case it makes\n    % definitions, etc.\n    \\expandafter\n  }\\temp\n  \\popthisfilestack\n}\n\\def\\filenamecatcodes{%\n  \\catcode`\\\\=\\other\n  \\catcode`~=\\other\n  \\catcode`^=\\other\n  \\catcode`_=\\other\n  \\catcode`|=\\other\n  \\catcode`<=\\other\n  \\catcode`>=\\other\n  \\catcode`+=\\other\n  \\catcode`-=\\other\n  \\catcode`\\`=\\other\n  \\catcode`\\'=\\other\n}\n\n\\def\\pushthisfilestack{%\n  \\expandafter\\pushthisfilestackX\\popthisfilestack\\StackTerm\n}\n\\def\\pushthisfilestackX{%\n  \\expandafter\\pushthisfilestackY\\thisfile\\StackTerm\n}\n\\def\\pushthisfilestackY #1\\StackTerm #2\\StackTerm {%\n  \\gdef\\popthisfilestack{\\gdef\\thisfile{#1}\\gdef\\popthisfilestack{#2}}%\n}\n\n\\def\\popthisfilestack{\\errthisfilestackempty}\n\\def\\errthisfilestackempty{\\errmessage{Internal error:\n  the stack of filenames is empty.}}\n%\n\\def\\thisfile{}\n\n% @center line\n% outputs that line, centered.\n%\n\\parseargdef\\center{%\n  \\ifhmode\n    \\let\\centersub\\centerH\n  \\else\n    \\let\\centersub\\centerV\n  \\fi\n  \\centersub{\\hfil \\ignorespaces#1\\unskip \\hfil}%\n  \\let\\centersub\\relax % don't let the definition persist, just in case\n}\n\\def\\centerH#1{{%\n  \\hfil\\break\n  \\advance\\hsize by -\\leftskip\n  \\advance\\hsize by -\\rightskip\n  \\line{#1}%\n  \\break\n}}\n%\n\\newcount\\centerpenalty\n\\def\\centerV#1{%\n  % The idea here is the same as in \\startdefun, \\cartouche, etc.: if\n  % @center is the first thing after a section heading, we need to wipe\n  % out the negative parskip inserted by \\sectionheading, but still\n  % prevent a page break here.\n  \\centerpenalty = \\lastpenalty\n  \\ifnum\\centerpenalty>10000 \\vskip\\parskip \\fi\n  \\ifnum\\centerpenalty>9999 \\penalty\\centerpenalty \\fi\n  \\line{\\kern\\leftskip #1\\kern\\rightskip}%\n}\n\n% @sp n   outputs n lines of vertical space\n%\n\\parseargdef\\sp{\\vskip #1\\baselineskip}\n\n% @comment ...line which is ignored...\n% @c is the same as @comment\n% @ignore ... @end ignore  is another way to write a comment\n%\n\\def\\comment{\\begingroup \\catcode`\\^^M=\\other%\n\\catcode`\\@=\\other \\catcode`\\{=\\other \\catcode`\\}=\\other%\n\\commentxxx}\n{\\catcode`\\^^M=\\other \\gdef\\commentxxx#1^^M{\\endgroup}}\n%\n\\let\\c=\\comment\n\n% @paragraphindent NCHARS\n% We'll use ems for NCHARS, close enough.\n% NCHARS can also be the word `asis' or `none'.\n% We cannot feasibly implement @paragraphindent asis, though.\n%\n\\def\\asisword{asis} % no translation, these are keywords\n\\def\\noneword{none}\n%\n\\parseargdef\\paragraphindent{%\n  \\def\\temp{#1}%\n  \\ifx\\temp\\asisword\n  \\else\n    \\ifx\\temp\\noneword\n      \\defaultparindent = 0pt\n    \\else\n      \\defaultparindent = #1em\n    \\fi\n  \\fi\n  \\parindent = \\defaultparindent\n}\n\n% @exampleindent NCHARS\n% We'll use ems for NCHARS like @paragraphindent.\n% It seems @exampleindent asis isn't necessary, but\n% I preserve it to make it similar to @paragraphindent.\n\\parseargdef\\exampleindent{%\n  \\def\\temp{#1}%\n  \\ifx\\temp\\asisword\n  \\else\n    \\ifx\\temp\\noneword\n      \\lispnarrowing = 0pt\n    \\else\n      \\lispnarrowing = #1em\n    \\fi\n  \\fi\n}\n\n% @firstparagraphindent WORD\n% If WORD is `none', then suppress indentation of the first paragraph\n% after a section heading.  If WORD is `insert', then do indent at such\n% paragraphs.\n%\n% The paragraph indentation is suppressed or not by calling\n% \\suppressfirstparagraphindent, which the sectioning commands do.\n% We switch the definition of this back and forth according to WORD.\n% By default, we suppress indentation.\n%\n\\def\\suppressfirstparagraphindent{\\dosuppressfirstparagraphindent}\n\\def\\insertword{insert}\n%\n\\parseargdef\\firstparagraphindent{%\n  \\def\\temp{#1}%\n  \\ifx\\temp\\noneword\n    \\let\\suppressfirstparagraphindent = \\dosuppressfirstparagraphindent\n  \\else\\ifx\\temp\\insertword\n    \\let\\suppressfirstparagraphindent = \\relax\n  \\else\n    \\errhelp = \\EMsimple\n    \\errmessage{Unknown @firstparagraphindent option `\\temp'}%\n  \\fi\\fi\n}\n\n% Here is how we actually suppress indentation.  Redefine \\everypar to\n% \\kern backwards by \\parindent, and then reset itself to empty.\n%\n% We also make \\indent itself not actually do anything until the next\n% paragraph.\n%\n\\gdef\\dosuppressfirstparagraphindent{%\n  \\gdef\\indent{%\n    \\restorefirstparagraphindent\n    \\indent\n  }%\n  \\gdef\\noindent{%\n    \\restorefirstparagraphindent\n    \\noindent\n  }%\n  \\global\\everypar = {%\n    \\kern -\\parindent\n    \\restorefirstparagraphindent\n  }%\n}\n\n\\gdef\\restorefirstparagraphindent{%\n  \\global \\let \\indent = \\ptexindent\n  \\global \\let \\noindent = \\ptexnoindent\n  \\global \\everypar = {}%\n}\n\n\n% @refill is a no-op.\n\\let\\refill=\\relax\n\n% If working on a large document in chapters, it is convenient to\n% be able to disable indexing, cross-referencing, and contents, for test runs.\n% This is done with @novalidate (before @setfilename).\n%\n\\newif\\iflinks \\linkstrue % by default we want the aux files.\n\\let\\novalidate = \\linksfalse\n\n% @setfilename is done at the beginning of every texinfo file.\n% So open here the files we need to have open while reading the input.\n% This makes it possible to make a .fmt file for texinfo.\n\\def\\setfilename{%\n   \\fixbackslash  % Turn off hack to swallow `\\input texinfo'.\n   \\iflinks\n     \\tryauxfile\n     % Open the new aux file.  TeX will close it automatically at exit.\n     \\immediate\\openout\\auxfile=\\jobname.aux\n   \\fi % \\openindices needs to do some work in any case.\n   \\openindices\n   \\let\\setfilename=\\comment % Ignore extra @setfilename cmds.\n   %\n   % If texinfo.cnf is present on the system, read it.\n   % Useful for site-wide @afourpaper, etc.\n   \\openin 1 texinfo.cnf\n   \\ifeof 1 \\else \\input texinfo.cnf \\fi\n   \\closein 1\n   %\n   \\comment % Ignore the actual filename.\n}\n\n% Called from \\setfilename.\n%\n\\def\\openindices{%\n  \\newindex{cp}%\n  \\newcodeindex{fn}%\n  \\newcodeindex{vr}%\n  \\newcodeindex{tp}%\n  \\newcodeindex{ky}%\n  \\newcodeindex{pg}%\n}\n\n% @bye.\n\\outer\\def\\bye{\\pagealignmacro\\tracingstats=1\\ptexend}\n\n\n\\message{pdf,}\n% adobe `portable' document format\n\\newcount\\tempnum\n\\newcount\\lnkcount\n\\newtoks\\filename\n\\newcount\\filenamelength\n\\newcount\\pgn\n\\newtoks\\toksA\n\\newtoks\\toksB\n\\newtoks\\toksC\n\\newtoks\\toksD\n\\newbox\\boxA\n\\newcount\\countA\n\\newif\\ifpdf\n\\newif\\ifpdfmakepagedest\n\n% when pdftex is run in dvi mode, \\pdfoutput is defined (so \\pdfoutput=1\n% can be set).  So we test for \\relax and 0 as well as being undefined.\n\\ifx\\pdfoutput\\thisisundefined\n\\else\n  \\ifx\\pdfoutput\\relax\n  \\else\n    \\ifcase\\pdfoutput\n    \\else\n      \\pdftrue\n    \\fi\n  \\fi\n\\fi\n\n% PDF uses PostScript string constants for the names of xref targets,\n% for display in the outlines, and in other places.  Thus, we have to\n% double any backslashes.  Otherwise, a name like \"\\node\" will be\n% interpreted as a newline (\\n), followed by o, d, e.  Not good.\n% \n% See http://www.ntg.nl/pipermail/ntg-pdftex/2004-July/000654.html and\n% related messages.  The final outcome is that it is up to the TeX user\n% to double the backslashes and otherwise make the string valid, so\n% that's what we do.  pdftex 1.30.0 (ca.2005) introduced a primitive to\n% do this reliably, so we use it.\n\n% #1 is a control sequence in which to do the replacements,\n% which we \\xdef.\n\\def\\txiescapepdf#1{%\n  \\ifx\\pdfescapestring\\thisisundefined\n    % No primitive available; should we give a warning or log?\n    % Many times it won't matter.\n  \\else\n    % The expandable \\pdfescapestring primitive escapes parentheses,\n    % backslashes, and other special chars.\n    \\xdef#1{\\pdfescapestring{#1}}%\n  \\fi\n}\n\n\\newhelp\\nopdfimagehelp{Texinfo supports .png, .jpg, .jpeg, and .pdf images\nwith PDF output, and none of those formats could be found.  (.eps cannot\nbe supported due to the design of the PDF format; use regular TeX (DVI\noutput) for that.)}\n\n\\ifpdf\n  %\n  % Color manipulation macros based on pdfcolor.tex,\n  % except using rgb instead of cmyk; the latter is said to render as a\n  % very dark gray on-screen and a very dark halftone in print, instead\n  % of actual black.\n  \\def\\rgbDarkRed{0.50 0.09 0.12}\n  \\def\\rgbBlack{0 0 0}\n  %\n  % k sets the color for filling (usual text, etc.);\n  % K sets the color for stroking (thin rules, e.g., normal _'s).\n  \\def\\pdfsetcolor#1{\\pdfliteral{#1 rg  #1 RG}}\n  %\n  % Set color, and create a mark which defines \\thiscolor accordingly,\n  % so that \\makeheadline knows which color to restore.\n  \\def\\setcolor#1{%\n    \\xdef\\lastcolordefs{\\gdef\\noexpand\\thiscolor{#1}}%\n    \\domark\n    \\pdfsetcolor{#1}%\n  }\n  %\n  \\def\\maincolor{\\rgbBlack}\n  \\pdfsetcolor{\\maincolor}\n  \\edef\\thiscolor{\\maincolor}\n  \\def\\lastcolordefs{}\n  %\n  \\def\\makefootline{%\n    \\baselineskip24pt\n    \\line{\\pdfsetcolor{\\maincolor}\\the\\footline}%\n  }\n  %\n  \\def\\makeheadline{%\n    \\vbox to 0pt{%\n      \\vskip-22.5pt\n      \\line{%\n        \\vbox to8.5pt{}%\n        % Extract \\thiscolor definition from the marks.\n        \\getcolormarks\n        % Typeset the headline with \\maincolor, then restore the color.\n        \\pdfsetcolor{\\maincolor}\\the\\headline\\pdfsetcolor{\\thiscolor}%\n      }%\n      \\vss\n    }%\n    \\nointerlineskip\n  }\n  %\n  %\n  \\pdfcatalog{/PageMode /UseOutlines}\n  %\n  % #1 is image name, #2 width (might be empty/whitespace), #3 height (ditto).\n  \\def\\dopdfimage#1#2#3{%\n    \\def\\pdfimagewidth{#2}\\setbox0 = \\hbox{\\ignorespaces #2}%\n    \\def\\pdfimageheight{#3}\\setbox2 = \\hbox{\\ignorespaces #3}%\n    %\n    % pdftex (and the PDF format) support .pdf, .png, .jpg (among\n    % others).  Let's try in that order, PDF first since if\n    % someone has a scalable image, presumably better to use that than a\n    % bitmap.\n    \\let\\pdfimgext=\\empty\n    \\begingroup\n      \\openin 1 #1.pdf \\ifeof 1\n        \\openin 1 #1.PDF \\ifeof 1\n          \\openin 1 #1.png \\ifeof 1\n            \\openin 1 #1.jpg \\ifeof 1\n              \\openin 1 #1.jpeg \\ifeof 1\n                \\openin 1 #1.JPG \\ifeof 1\n                  \\errhelp = \\nopdfimagehelp\n                  \\errmessage{Could not find image file #1 for pdf}%\n                \\else \\gdef\\pdfimgext{JPG}%\n                \\fi\n              \\else \\gdef\\pdfimgext{jpeg}%\n              \\fi\n            \\else \\gdef\\pdfimgext{jpg}%\n            \\fi\n          \\else \\gdef\\pdfimgext{png}%\n          \\fi\n        \\else \\gdef\\pdfimgext{PDF}%\n        \\fi\n      \\else \\gdef\\pdfimgext{pdf}%\n      \\fi\n      \\closein 1\n    \\endgroup\n    %\n    % without \\immediate, ancient pdftex seg faults when the same image is\n    % included twice.  (Version 3.14159-pre-1.0-unofficial-20010704.)\n    \\ifnum\\pdftexversion < 14\n      \\immediate\\pdfimage\n    \\else\n      \\immediate\\pdfximage\n    \\fi\n      \\ifdim \\wd0 >0pt width \\pdfimagewidth \\fi\n      \\ifdim \\wd2 >0pt height \\pdfimageheight \\fi\n      \\ifnum\\pdftexversion<13\n         #1.\\pdfimgext\n       \\else\n         {#1.\\pdfimgext}%\n       \\fi\n    \\ifnum\\pdftexversion < 14 \\else\n      \\pdfrefximage \\pdflastximage\n    \\fi}\n  %\n  \\def\\pdfmkdest#1{{%\n    % We have to set dummies so commands such as @code, and characters\n    % such as \\, aren't expanded when present in a section title.\n    \\indexnofonts\n    \\turnoffactive\n    \\makevalueexpandable\n    \\def\\pdfdestname{#1}%\n    \\txiescapepdf\\pdfdestname\n    \\safewhatsit{\\pdfdest name{\\pdfdestname} xyz}%\n  }}\n  %\n  % used to mark target names; must be expandable.\n  \\def\\pdfmkpgn#1{#1}\n  %\n  % by default, use a color that is dark enough to print on paper as\n  % nearly black, but still distinguishable for online viewing.\n  \\def\\urlcolor{\\rgbDarkRed}\n  \\def\\linkcolor{\\rgbDarkRed}\n  \\def\\endlink{\\setcolor{\\maincolor}\\pdfendlink}\n  %\n  % Adding outlines to PDF; macros for calculating structure of outlines\n  % come from Petr Olsak\n  \\def\\expnumber#1{\\expandafter\\ifx\\csname#1\\endcsname\\relax 0%\n    \\else \\csname#1\\endcsname \\fi}\n  \\def\\advancenumber#1{\\tempnum=\\expnumber{#1}\\relax\n    \\advance\\tempnum by 1\n    \\expandafter\\xdef\\csname#1\\endcsname{\\the\\tempnum}}\n  %\n  % #1 is the section text, which is what will be displayed in the\n  % outline by the pdf viewer.  #2 is the pdf expression for the number\n  % of subentries (or empty, for subsubsections).  #3 is the node text,\n  % which might be empty if this toc entry had no corresponding node.\n  % #4 is the page number\n  %\n  \\def\\dopdfoutline#1#2#3#4{%\n    % Generate a link to the node text if that exists; else, use the\n    % page number.  We could generate a destination for the section\n    % text in the case where a section has no node, but it doesn't\n    % seem worth the trouble, since most documents are normally structured.\n    \\edef\\pdfoutlinedest{#3}%\n    \\ifx\\pdfoutlinedest\\empty\n      \\def\\pdfoutlinedest{#4}%\n    \\else\n      \\txiescapepdf\\pdfoutlinedest\n    \\fi\n    %\n    % Also escape PDF chars in the display string.\n    \\edef\\pdfoutlinetext{#1}%\n    \\txiescapepdf\\pdfoutlinetext\n    %\n    \\pdfoutline goto name{\\pdfmkpgn{\\pdfoutlinedest}}#2{\\pdfoutlinetext}%\n  }\n  %\n  \\def\\pdfmakeoutlines{%\n    \\begingroup\n      % Read toc silently, to get counts of subentries for \\pdfoutline.\n      \\def\\partentry##1##2##3##4{}% ignore parts in the outlines\n      \\def\\numchapentry##1##2##3##4{%\n\t\\def\\thischapnum{##2}%\n\t\\def\\thissecnum{0}%\n\t\\def\\thissubsecnum{0}%\n      }%\n      \\def\\numsecentry##1##2##3##4{%\n\t\\advancenumber{chap\\thischapnum}%\n\t\\def\\thissecnum{##2}%\n\t\\def\\thissubsecnum{0}%\n      }%\n      \\def\\numsubsecentry##1##2##3##4{%\n\t\\advancenumber{sec\\thissecnum}%\n\t\\def\\thissubsecnum{##2}%\n      }%\n      \\def\\numsubsubsecentry##1##2##3##4{%\n\t\\advancenumber{subsec\\thissubsecnum}%\n      }%\n      \\def\\thischapnum{0}%\n      \\def\\thissecnum{0}%\n      \\def\\thissubsecnum{0}%\n      %\n      % use \\def rather than \\let here because we redefine \\chapentry et\n      % al. a second time, below.\n      \\def\\appentry{\\numchapentry}%\n      \\def\\appsecentry{\\numsecentry}%\n      \\def\\appsubsecentry{\\numsubsecentry}%\n      \\def\\appsubsubsecentry{\\numsubsubsecentry}%\n      \\def\\unnchapentry{\\numchapentry}%\n      \\def\\unnsecentry{\\numsecentry}%\n      \\def\\unnsubsecentry{\\numsubsecentry}%\n      \\def\\unnsubsubsecentry{\\numsubsubsecentry}%\n      \\readdatafile{toc}%\n      %\n      % Read toc second time, this time actually producing the outlines.\n      % The `-' means take the \\expnumber as the absolute number of\n      % subentries, which we calculated on our first read of the .toc above.\n      %\n      % We use the node names as the destinations.\n      \\def\\numchapentry##1##2##3##4{%\n        \\dopdfoutline{##1}{count-\\expnumber{chap##2}}{##3}{##4}}%\n      \\def\\numsecentry##1##2##3##4{%\n        \\dopdfoutline{##1}{count-\\expnumber{sec##2}}{##3}{##4}}%\n      \\def\\numsubsecentry##1##2##3##4{%\n        \\dopdfoutline{##1}{count-\\expnumber{subsec##2}}{##3}{##4}}%\n      \\def\\numsubsubsecentry##1##2##3##4{% count is always zero\n        \\dopdfoutline{##1}{}{##3}{##4}}%\n      %\n      % PDF outlines are displayed using system fonts, instead of\n      % document fonts.  Therefore we cannot use special characters,\n      % since the encoding is unknown.  For example, the eogonek from\n      % Latin 2 (0xea) gets translated to a | character.  Info from\n      % Staszek Wawrykiewicz, 19 Jan 2004 04:09:24 +0100.\n      %\n      % TODO this right, we have to translate 8-bit characters to\n      % their \"best\" equivalent, based on the @documentencoding.  Too\n      % much work for too little return.  Just use the ASCII equivalents\n      % we use for the index sort strings.\n      % \n      \\indexnofonts\n      \\setupdatafile\n      % We can have normal brace characters in the PDF outlines, unlike\n      % Texinfo index files.  So set that up.\n      \\def\\{{\\lbracecharliteral}%\n      \\def\\}{\\rbracecharliteral}%\n      \\catcode`\\\\=\\active \\otherbackslash\n      \\input \\tocreadfilename\n    \\endgroup\n  }\n  {\\catcode`[=1 \\catcode`]=2\n   \\catcode`{=\\other \\catcode`}=\\other\n   \\gdef\\lbracecharliteral[{]%\n   \\gdef\\rbracecharliteral[}]%\n  ]\n  %\n  \\def\\skipspaces#1{\\def\\PP{#1}\\def\\D{|}%\n    \\ifx\\PP\\D\\let\\nextsp\\relax\n    \\else\\let\\nextsp\\skipspaces\n      \\addtokens{\\filename}{\\PP}%\n      \\advance\\filenamelength by 1\n    \\fi\n    \\nextsp}\n  \\def\\getfilename#1{%\n    \\filenamelength=0\n    % If we don't expand the argument now, \\skipspaces will get\n    % snagged on things like \"@value{foo}\".\n    \\edef\\temp{#1}%\n    \\expandafter\\skipspaces\\temp|\\relax\n  }\n  \\ifnum\\pdftexversion < 14\n    \\let \\startlink \\pdfannotlink\n  \\else\n    \\let \\startlink \\pdfstartlink\n  \\fi\n  % make a live url in pdf output.\n  \\def\\pdfurl#1{%\n    \\begingroup\n      % it seems we really need yet another set of dummies; have not\n      % tried to figure out what each command should do in the context\n      % of @url.  for now, just make @/ a no-op, that's the only one\n      % people have actually reported a problem with.\n      %\n      \\normalturnoffactive\n      \\def\\@{@}%\n      \\let\\/=\\empty\n      \\makevalueexpandable\n      % do we want to go so far as to use \\indexnofonts instead of just\n      % special-casing \\var here?\n      \\def\\var##1{##1}%\n      %\n      \\leavevmode\\setcolor{\\urlcolor}%\n      \\startlink attr{/Border [0 0 0]}%\n        user{/Subtype /Link /A << /S /URI /URI (#1) >>}%\n    \\endgroup}\n  \\def\\pdfgettoks#1.{\\setbox\\boxA=\\hbox{\\toksA={#1.}\\toksB={}\\maketoks}}\n  \\def\\addtokens#1#2{\\edef\\addtoks{\\noexpand#1={\\the#1#2}}\\addtoks}\n  \\def\\adn#1{\\addtokens{\\toksC}{#1}\\global\\countA=1\\let\\next=\\maketoks}\n  \\def\\poptoks#1#2|ENDTOKS|{\\let\\first=#1\\toksD={#1}\\toksA={#2}}\n  \\def\\maketoks{%\n    \\expandafter\\poptoks\\the\\toksA|ENDTOKS|\\relax\n    \\ifx\\first0\\adn0\n    \\else\\ifx\\first1\\adn1 \\else\\ifx\\first2\\adn2 \\else\\ifx\\first3\\adn3\n    \\else\\ifx\\first4\\adn4 \\else\\ifx\\first5\\adn5 \\else\\ifx\\first6\\adn6\n    \\else\\ifx\\first7\\adn7 \\else\\ifx\\first8\\adn8 \\else\\ifx\\first9\\adn9\n    \\else\n      \\ifnum0=\\countA\\else\\makelink\\fi\n      \\ifx\\first.\\let\\next=\\done\\else\n        \\let\\next=\\maketoks\n        \\addtokens{\\toksB}{\\the\\toksD}\n        \\ifx\\first,\\addtokens{\\toksB}{\\space}\\fi\n      \\fi\n    \\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\n    \\next}\n  \\def\\makelink{\\addtokens{\\toksB}%\n    {\\noexpand\\pdflink{\\the\\toksC}}\\toksC={}\\global\\countA=0}\n  \\def\\pdflink#1{%\n    \\startlink attr{/Border [0 0 0]} goto name{\\pdfmkpgn{#1}}\n    \\setcolor{\\linkcolor}#1\\endlink}\n  \\def\\done{\\edef\\st{\\global\\noexpand\\toksA={\\the\\toksB}}\\st}\n\\else\n  % non-pdf mode\n  \\let\\pdfmkdest = \\gobble\n  \\let\\pdfurl = \\gobble\n  \\let\\endlink = \\relax\n  \\let\\setcolor = \\gobble\n  \\let\\pdfsetcolor = \\gobble\n  \\let\\pdfmakeoutlines = \\relax\n\\fi  % \\ifx\\pdfoutput\n\n\n\\message{fonts,}\n\n% Change the current font style to #1, remembering it in \\curfontstyle.\n% For now, we do not accumulate font styles: @b{@i{foo}} prints foo in\n% italics, not bold italics.\n%\n\\def\\setfontstyle#1{%\n  \\def\\curfontstyle{#1}% not as a control sequence, because we are \\edef'd.\n  \\csname ten#1\\endcsname  % change the current font\n}\n\n% Select #1 fonts with the current style.\n%\n\\def\\selectfonts#1{\\csname #1fonts\\endcsname \\csname\\curfontstyle\\endcsname}\n\n\\def\\rm{\\fam=0 \\setfontstyle{rm}}\n\\def\\it{\\fam=\\itfam \\setfontstyle{it}}\n\\def\\sl{\\fam=\\slfam \\setfontstyle{sl}}\n\\def\\bf{\\fam=\\bffam \\setfontstyle{bf}}\\def\\bfstylename{bf}\n\\def\\tt{\\fam=\\ttfam \\setfontstyle{tt}}\n\n% Unfortunately, we have to override this for titles and the like, since\n% in those cases \"rm\" is bold.  Sigh.\n\\def\\rmisbold{\\rm\\def\\curfontstyle{bf}}\n\n% Texinfo sort of supports the sans serif font style, which plain TeX does not.\n% So we set up a \\sf.\n\\newfam\\sffam\n\\def\\sf{\\fam=\\sffam \\setfontstyle{sf}}\n\\let\\li = \\sf % Sometimes we call it \\li, not \\sf.\n\n% We don't need math for this font style.\n\\def\\ttsl{\\setfontstyle{ttsl}}\n\n\n% Set the baselineskip to #1, and the lineskip and strut size\n% correspondingly.  There is no deep meaning behind these magic numbers\n% used as factors; they just match (closely enough) what Knuth defined.\n%\n\\def\\lineskipfactor{.08333}\n\\def\\strutheightpercent{.70833}\n\\def\\strutdepthpercent {.29167}\n%\n% can get a sort of poor man's double spacing by redefining this.\n\\def\\baselinefactor{1}\n%\n\\newdimen\\textleading\n\\def\\setleading#1{%\n  \\dimen0 = #1\\relax\n  \\normalbaselineskip = \\baselinefactor\\dimen0\n  \\normallineskip = \\lineskipfactor\\normalbaselineskip\n  \\normalbaselines\n  \\setbox\\strutbox =\\hbox{%\n    \\vrule width0pt height\\strutheightpercent\\baselineskip\n                    depth \\strutdepthpercent \\baselineskip\n  }%\n}\n\n% PDF CMaps.  See also LaTeX's t1.cmap.\n%\n% do nothing with this by default.\n\\expandafter\\let\\csname cmapOT1\\endcsname\\gobble\n\\expandafter\\let\\csname cmapOT1IT\\endcsname\\gobble\n\\expandafter\\let\\csname cmapOT1TT\\endcsname\\gobble\n\n% if we are producing pdf, and we have \\pdffontattr, then define cmaps.\n% (\\pdffontattr was introduced many years ago, but people still run\n% older pdftex's; it's easy to conditionalize, so we do.)\n\\ifpdf \\ifx\\pdffontattr\\thisisundefined \\else\n  \\begingroup\n    \\catcode`\\^^M=\\active \\def^^M{^^J}% Output line endings as the ^^J char.\n    \\catcode`\\%=12 \\immediate\\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap\n%%DocumentNeededResources: ProcSet (CIDInit)\n%%IncludeResource: ProcSet (CIDInit)\n%%BeginResource: CMap (TeX-OT1-0)\n%%Title: (TeX-OT1-0 TeX OT1 0)\n%%Version: 1.000\n%%EndComments\n/CIDInit /ProcSet findresource begin\n12 dict begin\nbegincmap\n/CIDSystemInfo\n<< /Registry (TeX)\n/Ordering (OT1)\n/Supplement 0\n>> def\n/CMapName /TeX-OT1-0 def\n/CMapType 2 def\n1 begincodespacerange\n<00> <7F>\nendcodespacerange\n8 beginbfrange\n<00> <01> <0393>\n<09> <0A> <03A8>\n<23> <26> <0023>\n<28> <3B> <0028>\n<3F> <5B> <003F>\n<5D> <5E> <005D>\n<61> <7A> <0061>\n<7B> <7C> <2013>\nendbfrange\n40 beginbfchar\n<02> <0398>\n<03> <039B>\n<04> <039E>\n<05> <03A0>\n<06> <03A3>\n<07> <03D2>\n<08> <03A6>\n<0B> <00660066>\n<0C> <00660069>\n<0D> <0066006C>\n<0E> <006600660069>\n<0F> <00660066006C>\n<10> <0131>\n<11> <0237>\n<12> <0060>\n<13> <00B4>\n<14> <02C7>\n<15> <02D8>\n<16> <00AF>\n<17> <02DA>\n<18> <00B8>\n<19> <00DF>\n<1A> <00E6>\n<1B> <0153>\n<1C> <00F8>\n<1D> <00C6>\n<1E> <0152>\n<1F> <00D8>\n<21> <0021>\n<22> <201D>\n<27> <2019>\n<3C> <00A1>\n<3D> <003D>\n<3E> <00BF>\n<5C> <201C>\n<5F> <02D9>\n<60> <2018>\n<7D> <02DD>\n<7E> <007E>\n<7F> <00A8>\nendbfchar\nendcmap\nCMapName currentdict /CMap defineresource pop\nend\nend\n%%EndResource\n%%EOF\n    }\\endgroup\n  \\expandafter\\edef\\csname cmapOT1\\endcsname#1{%\n    \\pdffontattr#1{/ToUnicode \\the\\pdflastobj\\space 0 R}%\n  }%\n%\n% \\cmapOT1IT\n  \\begingroup\n    \\catcode`\\^^M=\\active \\def^^M{^^J}% Output line endings as the ^^J char.\n    \\catcode`\\%=12 \\immediate\\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap\n%%DocumentNeededResources: ProcSet (CIDInit)\n%%IncludeResource: ProcSet (CIDInit)\n%%BeginResource: CMap (TeX-OT1IT-0)\n%%Title: (TeX-OT1IT-0 TeX OT1IT 0)\n%%Version: 1.000\n%%EndComments\n/CIDInit /ProcSet findresource begin\n12 dict begin\nbegincmap\n/CIDSystemInfo\n<< /Registry (TeX)\n/Ordering (OT1IT)\n/Supplement 0\n>> def\n/CMapName /TeX-OT1IT-0 def\n/CMapType 2 def\n1 begincodespacerange\n<00> <7F>\nendcodespacerange\n8 beginbfrange\n<00> <01> <0393>\n<09> <0A> <03A8>\n<25> <26> <0025>\n<28> <3B> <0028>\n<3F> <5B> <003F>\n<5D> <5E> <005D>\n<61> <7A> <0061>\n<7B> <7C> <2013>\nendbfrange\n42 beginbfchar\n<02> <0398>\n<03> <039B>\n<04> <039E>\n<05> <03A0>\n<06> <03A3>\n<07> <03D2>\n<08> <03A6>\n<0B> <00660066>\n<0C> <00660069>\n<0D> <0066006C>\n<0E> <006600660069>\n<0F> <00660066006C>\n<10> <0131>\n<11> <0237>\n<12> <0060>\n<13> <00B4>\n<14> <02C7>\n<15> <02D8>\n<16> <00AF>\n<17> <02DA>\n<18> <00B8>\n<19> <00DF>\n<1A> <00E6>\n<1B> <0153>\n<1C> <00F8>\n<1D> <00C6>\n<1E> <0152>\n<1F> <00D8>\n<21> <0021>\n<22> <201D>\n<23> <0023>\n<24> <00A3>\n<27> <2019>\n<3C> <00A1>\n<3D> <003D>\n<3E> <00BF>\n<5C> <201C>\n<5F> <02D9>\n<60> <2018>\n<7D> <02DD>\n<7E> <007E>\n<7F> <00A8>\nendbfchar\nendcmap\nCMapName currentdict /CMap defineresource pop\nend\nend\n%%EndResource\n%%EOF\n    }\\endgroup\n  \\expandafter\\edef\\csname cmapOT1IT\\endcsname#1{%\n    \\pdffontattr#1{/ToUnicode \\the\\pdflastobj\\space 0 R}%\n  }%\n%\n% \\cmapOT1TT\n  \\begingroup\n    \\catcode`\\^^M=\\active \\def^^M{^^J}% Output line endings as the ^^J char.\n    \\catcode`\\%=12 \\immediate\\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap\n%%DocumentNeededResources: ProcSet (CIDInit)\n%%IncludeResource: ProcSet (CIDInit)\n%%BeginResource: CMap (TeX-OT1TT-0)\n%%Title: (TeX-OT1TT-0 TeX OT1TT 0)\n%%Version: 1.000\n%%EndComments\n/CIDInit /ProcSet findresource begin\n12 dict begin\nbegincmap\n/CIDSystemInfo\n<< /Registry (TeX)\n/Ordering (OT1TT)\n/Supplement 0\n>> def\n/CMapName /TeX-OT1TT-0 def\n/CMapType 2 def\n1 begincodespacerange\n<00> <7F>\nendcodespacerange\n5 beginbfrange\n<00> <01> <0393>\n<09> <0A> <03A8>\n<21> <26> <0021>\n<28> <5F> <0028>\n<61> <7E> <0061>\nendbfrange\n32 beginbfchar\n<02> <0398>\n<03> <039B>\n<04> <039E>\n<05> <03A0>\n<06> <03A3>\n<07> <03D2>\n<08> <03A6>\n<0B> <2191>\n<0C> <2193>\n<0D> <0027>\n<0E> <00A1>\n<0F> <00BF>\n<10> <0131>\n<11> <0237>\n<12> <0060>\n<13> <00B4>\n<14> <02C7>\n<15> <02D8>\n<16> <00AF>\n<17> <02DA>\n<18> <00B8>\n<19> <00DF>\n<1A> <00E6>\n<1B> <0153>\n<1C> <00F8>\n<1D> <00C6>\n<1E> <0152>\n<1F> <00D8>\n<20> <2423>\n<27> <2019>\n<60> <2018>\n<7F> <00A8>\nendbfchar\nendcmap\nCMapName currentdict /CMap defineresource pop\nend\nend\n%%EndResource\n%%EOF\n    }\\endgroup\n  \\expandafter\\edef\\csname cmapOT1TT\\endcsname#1{%\n    \\pdffontattr#1{/ToUnicode \\the\\pdflastobj\\space 0 R}%\n  }%\n\\fi\\fi\n\n\n% Set the font macro #1 to the font named \\fontprefix#2.\n% #3 is the font's design size, #4 is a scale factor, #5 is the CMap\n% encoding (only OT1, OT1IT and OT1TT are allowed, or empty to omit).\n% Example:\n% #1 = \\textrm\n% #2 = \\rmshape\n% #3 = 10\n% #4 = \\mainmagstep\n% #5 = OT1\n%\n\\def\\setfont#1#2#3#4#5{%\n  \\font#1=\\fontprefix#2#3 scaled #4\n  \\csname cmap#5\\endcsname#1%\n}\n% This is what gets called when #5 of \\setfont is empty.\n\\let\\cmap\\gobble\n%\n% (end of cmaps)\n\n% Use cm as the default font prefix.\n% To specify the font prefix, you must define \\fontprefix\n% before you read in texinfo.tex.\n\\ifx\\fontprefix\\thisisundefined\n\\def\\fontprefix{cm}\n\\fi\n% Support font families that don't use the same naming scheme as CM.\n\\def\\rmshape{r}\n\\def\\rmbshape{bx}               % where the normal face is bold\n\\def\\bfshape{b}\n\\def\\bxshape{bx}\n\\def\\ttshape{tt}\n\\def\\ttbshape{tt}\n\\def\\ttslshape{sltt}\n\\def\\itshape{ti}\n\\def\\itbshape{bxti}\n\\def\\slshape{sl}\n\\def\\slbshape{bxsl}\n\\def\\sfshape{ss}\n\\def\\sfbshape{ss}\n\\def\\scshape{csc}\n\\def\\scbshape{csc}\n\n% Definitions for a main text size of 11pt.  (The default in Texinfo.)\n%\n\\def\\definetextfontsizexi{%\n% Text fonts (11.2pt, magstep1).\n\\def\\textnominalsize{11pt}\n\\edef\\mainmagstep{\\magstephalf}\n\\setfont\\textrm\\rmshape{10}{\\mainmagstep}{OT1}\n\\setfont\\texttt\\ttshape{10}{\\mainmagstep}{OT1TT}\n\\setfont\\textbf\\bfshape{10}{\\mainmagstep}{OT1}\n\\setfont\\textit\\itshape{10}{\\mainmagstep}{OT1IT}\n\\setfont\\textsl\\slshape{10}{\\mainmagstep}{OT1}\n\\setfont\\textsf\\sfshape{10}{\\mainmagstep}{OT1}\n\\setfont\\textsc\\scshape{10}{\\mainmagstep}{OT1}\n\\setfont\\textttsl\\ttslshape{10}{\\mainmagstep}{OT1TT}\n\\font\\texti=cmmi10 scaled \\mainmagstep\n\\font\\textsy=cmsy10 scaled \\mainmagstep\n\\def\\textecsize{1095}\n\n% A few fonts for @defun names and args.\n\\setfont\\defbf\\bfshape{10}{\\magstep1}{OT1}\n\\setfont\\deftt\\ttshape{10}{\\magstep1}{OT1TT}\n\\setfont\\defttsl\\ttslshape{10}{\\magstep1}{OT1TT}\n\\def\\df{\\let\\tentt=\\deftt \\let\\tenbf = \\defbf \\let\\tenttsl=\\defttsl \\bf}\n\n% Fonts for indices, footnotes, small examples (9pt).\n\\def\\smallnominalsize{9pt}\n\\setfont\\smallrm\\rmshape{9}{1000}{OT1}\n\\setfont\\smalltt\\ttshape{9}{1000}{OT1TT}\n\\setfont\\smallbf\\bfshape{10}{900}{OT1}\n\\setfont\\smallit\\itshape{9}{1000}{OT1IT}\n\\setfont\\smallsl\\slshape{9}{1000}{OT1}\n\\setfont\\smallsf\\sfshape{9}{1000}{OT1}\n\\setfont\\smallsc\\scshape{10}{900}{OT1}\n\\setfont\\smallttsl\\ttslshape{10}{900}{OT1TT}\n\\font\\smalli=cmmi9\n\\font\\smallsy=cmsy9\n\\def\\smallecsize{0900}\n\n% Fonts for small examples (8pt).\n\\def\\smallernominalsize{8pt}\n\\setfont\\smallerrm\\rmshape{8}{1000}{OT1}\n\\setfont\\smallertt\\ttshape{8}{1000}{OT1TT}\n\\setfont\\smallerbf\\bfshape{10}{800}{OT1}\n\\setfont\\smallerit\\itshape{8}{1000}{OT1IT}\n\\setfont\\smallersl\\slshape{8}{1000}{OT1}\n\\setfont\\smallersf\\sfshape{8}{1000}{OT1}\n\\setfont\\smallersc\\scshape{10}{800}{OT1}\n\\setfont\\smallerttsl\\ttslshape{10}{800}{OT1TT}\n\\font\\smalleri=cmmi8\n\\font\\smallersy=cmsy8\n\\def\\smallerecsize{0800}\n\n% Fonts for title page (20.4pt):\n\\def\\titlenominalsize{20pt}\n\\setfont\\titlerm\\rmbshape{12}{\\magstep3}{OT1}\n\\setfont\\titleit\\itbshape{10}{\\magstep4}{OT1IT}\n\\setfont\\titlesl\\slbshape{10}{\\magstep4}{OT1}\n\\setfont\\titlett\\ttbshape{12}{\\magstep3}{OT1TT}\n\\setfont\\titlettsl\\ttslshape{10}{\\magstep4}{OT1TT}\n\\setfont\\titlesf\\sfbshape{17}{\\magstep1}{OT1}\n\\let\\titlebf=\\titlerm\n\\setfont\\titlesc\\scbshape{10}{\\magstep4}{OT1}\n\\font\\titlei=cmmi12 scaled \\magstep3\n\\font\\titlesy=cmsy10 scaled \\magstep4\n\\def\\titleecsize{2074}\n\n% Chapter (and unnumbered) fonts (17.28pt).\n\\def\\chapnominalsize{17pt}\n\\setfont\\chaprm\\rmbshape{12}{\\magstep2}{OT1}\n\\setfont\\chapit\\itbshape{10}{\\magstep3}{OT1IT}\n\\setfont\\chapsl\\slbshape{10}{\\magstep3}{OT1}\n\\setfont\\chaptt\\ttbshape{12}{\\magstep2}{OT1TT}\n\\setfont\\chapttsl\\ttslshape{10}{\\magstep3}{OT1TT}\n\\setfont\\chapsf\\sfbshape{17}{1000}{OT1}\n\\let\\chapbf=\\chaprm\n\\setfont\\chapsc\\scbshape{10}{\\magstep3}{OT1}\n\\font\\chapi=cmmi12 scaled \\magstep2\n\\font\\chapsy=cmsy10 scaled \\magstep3\n\\def\\chapecsize{1728}\n\n% Section fonts (14.4pt).\n\\def\\secnominalsize{14pt}\n\\setfont\\secrm\\rmbshape{12}{\\magstep1}{OT1}\n\\setfont\\secit\\itbshape{10}{\\magstep2}{OT1IT}\n\\setfont\\secsl\\slbshape{10}{\\magstep2}{OT1}\n\\setfont\\sectt\\ttbshape{12}{\\magstep1}{OT1TT}\n\\setfont\\secttsl\\ttslshape{10}{\\magstep2}{OT1TT}\n\\setfont\\secsf\\sfbshape{12}{\\magstep1}{OT1}\n\\let\\secbf\\secrm\n\\setfont\\secsc\\scbshape{10}{\\magstep2}{OT1}\n\\font\\seci=cmmi12 scaled \\magstep1\n\\font\\secsy=cmsy10 scaled \\magstep2\n\\def\\sececsize{1440}\n\n% Subsection fonts (13.15pt).\n\\def\\ssecnominalsize{13pt}\n\\setfont\\ssecrm\\rmbshape{12}{\\magstephalf}{OT1}\n\\setfont\\ssecit\\itbshape{10}{1315}{OT1IT}\n\\setfont\\ssecsl\\slbshape{10}{1315}{OT1}\n\\setfont\\ssectt\\ttbshape{12}{\\magstephalf}{OT1TT}\n\\setfont\\ssecttsl\\ttslshape{10}{1315}{OT1TT}\n\\setfont\\ssecsf\\sfbshape{12}{\\magstephalf}{OT1}\n\\let\\ssecbf\\ssecrm\n\\setfont\\ssecsc\\scbshape{10}{1315}{OT1}\n\\font\\sseci=cmmi12 scaled \\magstephalf\n\\font\\ssecsy=cmsy10 scaled 1315\n\\def\\ssececsize{1200}\n\n% Reduced fonts for @acro in text (10pt).\n\\def\\reducednominalsize{10pt}\n\\setfont\\reducedrm\\rmshape{10}{1000}{OT1}\n\\setfont\\reducedtt\\ttshape{10}{1000}{OT1TT}\n\\setfont\\reducedbf\\bfshape{10}{1000}{OT1}\n\\setfont\\reducedit\\itshape{10}{1000}{OT1IT}\n\\setfont\\reducedsl\\slshape{10}{1000}{OT1}\n\\setfont\\reducedsf\\sfshape{10}{1000}{OT1}\n\\setfont\\reducedsc\\scshape{10}{1000}{OT1}\n\\setfont\\reducedttsl\\ttslshape{10}{1000}{OT1TT}\n\\font\\reducedi=cmmi10\n\\font\\reducedsy=cmsy10\n\\def\\reducedecsize{1000}\n\n\\textleading = 13.2pt % line spacing for 11pt CM\n\\textfonts            % reset the current fonts\n\\rm\n} % end of 11pt text font size definitions, \\definetextfontsizexi\n\n\n% Definitions to make the main text be 10pt Computer Modern, with\n% section, chapter, etc., sizes following suit.  This is for the GNU\n% Press printing of the Emacs 22 manual.  Maybe other manuals in the\n% future.  Used with @smallbook, which sets the leading to 12pt.\n%\n\\def\\definetextfontsizex{%\n% Text fonts (10pt).\n\\def\\textnominalsize{10pt}\n\\edef\\mainmagstep{1000}\n\\setfont\\textrm\\rmshape{10}{\\mainmagstep}{OT1}\n\\setfont\\texttt\\ttshape{10}{\\mainmagstep}{OT1TT}\n\\setfont\\textbf\\bfshape{10}{\\mainmagstep}{OT1}\n\\setfont\\textit\\itshape{10}{\\mainmagstep}{OT1IT}\n\\setfont\\textsl\\slshape{10}{\\mainmagstep}{OT1}\n\\setfont\\textsf\\sfshape{10}{\\mainmagstep}{OT1}\n\\setfont\\textsc\\scshape{10}{\\mainmagstep}{OT1}\n\\setfont\\textttsl\\ttslshape{10}{\\mainmagstep}{OT1TT}\n\\font\\texti=cmmi10 scaled \\mainmagstep\n\\font\\textsy=cmsy10 scaled \\mainmagstep\n\\def\\textecsize{1000}\n\n% A few fonts for @defun names and args.\n\\setfont\\defbf\\bfshape{10}{\\magstephalf}{OT1}\n\\setfont\\deftt\\ttshape{10}{\\magstephalf}{OT1TT}\n\\setfont\\defttsl\\ttslshape{10}{\\magstephalf}{OT1TT}\n\\def\\df{\\let\\tentt=\\deftt \\let\\tenbf = \\defbf \\let\\tenttsl=\\defttsl \\bf}\n\n% Fonts for indices, footnotes, small examples (9pt).\n\\def\\smallnominalsize{9pt}\n\\setfont\\smallrm\\rmshape{9}{1000}{OT1}\n\\setfont\\smalltt\\ttshape{9}{1000}{OT1TT}\n\\setfont\\smallbf\\bfshape{10}{900}{OT1}\n\\setfont\\smallit\\itshape{9}{1000}{OT1IT}\n\\setfont\\smallsl\\slshape{9}{1000}{OT1}\n\\setfont\\smallsf\\sfshape{9}{1000}{OT1}\n\\setfont\\smallsc\\scshape{10}{900}{OT1}\n\\setfont\\smallttsl\\ttslshape{10}{900}{OT1TT}\n\\font\\smalli=cmmi9\n\\font\\smallsy=cmsy9\n\\def\\smallecsize{0900}\n\n% Fonts for small examples (8pt).\n\\def\\smallernominalsize{8pt}\n\\setfont\\smallerrm\\rmshape{8}{1000}{OT1}\n\\setfont\\smallertt\\ttshape{8}{1000}{OT1TT}\n\\setfont\\smallerbf\\bfshape{10}{800}{OT1}\n\\setfont\\smallerit\\itshape{8}{1000}{OT1IT}\n\\setfont\\smallersl\\slshape{8}{1000}{OT1}\n\\setfont\\smallersf\\sfshape{8}{1000}{OT1}\n\\setfont\\smallersc\\scshape{10}{800}{OT1}\n\\setfont\\smallerttsl\\ttslshape{10}{800}{OT1TT}\n\\font\\smalleri=cmmi8\n\\font\\smallersy=cmsy8\n\\def\\smallerecsize{0800}\n\n% Fonts for title page (20.4pt):\n\\def\\titlenominalsize{20pt}\n\\setfont\\titlerm\\rmbshape{12}{\\magstep3}{OT1}\n\\setfont\\titleit\\itbshape{10}{\\magstep4}{OT1IT}\n\\setfont\\titlesl\\slbshape{10}{\\magstep4}{OT1}\n\\setfont\\titlett\\ttbshape{12}{\\magstep3}{OT1TT}\n\\setfont\\titlettsl\\ttslshape{10}{\\magstep4}{OT1TT}\n\\setfont\\titlesf\\sfbshape{17}{\\magstep1}{OT1}\n\\let\\titlebf=\\titlerm\n\\setfont\\titlesc\\scbshape{10}{\\magstep4}{OT1}\n\\font\\titlei=cmmi12 scaled \\magstep3\n\\font\\titlesy=cmsy10 scaled \\magstep4\n\\def\\titleecsize{2074}\n\n% Chapter fonts (14.4pt).\n\\def\\chapnominalsize{14pt}\n\\setfont\\chaprm\\rmbshape{12}{\\magstep1}{OT1}\n\\setfont\\chapit\\itbshape{10}{\\magstep2}{OT1IT}\n\\setfont\\chapsl\\slbshape{10}{\\magstep2}{OT1}\n\\setfont\\chaptt\\ttbshape{12}{\\magstep1}{OT1TT}\n\\setfont\\chapttsl\\ttslshape{10}{\\magstep2}{OT1TT}\n\\setfont\\chapsf\\sfbshape{12}{\\magstep1}{OT1}\n\\let\\chapbf\\chaprm\n\\setfont\\chapsc\\scbshape{10}{\\magstep2}{OT1}\n\\font\\chapi=cmmi12 scaled \\magstep1\n\\font\\chapsy=cmsy10 scaled \\magstep2\n\\def\\chapecsize{1440}\n\n% Section fonts (12pt).\n\\def\\secnominalsize{12pt}\n\\setfont\\secrm\\rmbshape{12}{1000}{OT1}\n\\setfont\\secit\\itbshape{10}{\\magstep1}{OT1IT}\n\\setfont\\secsl\\slbshape{10}{\\magstep1}{OT1}\n\\setfont\\sectt\\ttbshape{12}{1000}{OT1TT}\n\\setfont\\secttsl\\ttslshape{10}{\\magstep1}{OT1TT}\n\\setfont\\secsf\\sfbshape{12}{1000}{OT1}\n\\let\\secbf\\secrm\n\\setfont\\secsc\\scbshape{10}{\\magstep1}{OT1}\n\\font\\seci=cmmi12\n\\font\\secsy=cmsy10 scaled \\magstep1\n\\def\\sececsize{1200}\n\n% Subsection fonts (10pt).\n\\def\\ssecnominalsize{10pt}\n\\setfont\\ssecrm\\rmbshape{10}{1000}{OT1}\n\\setfont\\ssecit\\itbshape{10}{1000}{OT1IT}\n\\setfont\\ssecsl\\slbshape{10}{1000}{OT1}\n\\setfont\\ssectt\\ttbshape{10}{1000}{OT1TT}\n\\setfont\\ssecttsl\\ttslshape{10}{1000}{OT1TT}\n\\setfont\\ssecsf\\sfbshape{10}{1000}{OT1}\n\\let\\ssecbf\\ssecrm\n\\setfont\\ssecsc\\scbshape{10}{1000}{OT1}\n\\font\\sseci=cmmi10\n\\font\\ssecsy=cmsy10\n\\def\\ssececsize{1000}\n\n% Reduced fonts for @acro in text (9pt).\n\\def\\reducednominalsize{9pt}\n\\setfont\\reducedrm\\rmshape{9}{1000}{OT1}\n\\setfont\\reducedtt\\ttshape{9}{1000}{OT1TT}\n\\setfont\\reducedbf\\bfshape{10}{900}{OT1}\n\\setfont\\reducedit\\itshape{9}{1000}{OT1IT}\n\\setfont\\reducedsl\\slshape{9}{1000}{OT1}\n\\setfont\\reducedsf\\sfshape{9}{1000}{OT1}\n\\setfont\\reducedsc\\scshape{10}{900}{OT1}\n\\setfont\\reducedttsl\\ttslshape{10}{900}{OT1TT}\n\\font\\reducedi=cmmi9\n\\font\\reducedsy=cmsy9\n\\def\\reducedecsize{0900}\n\n\\divide\\parskip by 2  % reduce space between paragraphs\n\\textleading = 12pt   % line spacing for 10pt CM\n\\textfonts            % reset the current fonts\n\\rm\n} % end of 10pt text font size definitions, \\definetextfontsizex\n\n\n% We provide the user-level command\n%   @fonttextsize 10\n% (or 11) to redefine the text font size.  pt is assumed.\n%\n\\def\\xiword{11}\n\\def\\xword{10}\n\\def\\xwordpt{10pt}\n%\n\\parseargdef\\fonttextsize{%\n  \\def\\textsizearg{#1}%\n  %\\wlog{doing @fonttextsize \\textsizearg}%\n  %\n  % Set \\globaldefs so that documents can use this inside @tex, since\n  % makeinfo 4.8 does not support it, but we need it nonetheless.\n  %\n \\begingroup \\globaldefs=1\n  \\ifx\\textsizearg\\xword \\definetextfontsizex\n  \\else \\ifx\\textsizearg\\xiword \\definetextfontsizexi\n  \\else\n    \\errhelp=\\EMsimple\n    \\errmessage{@fonttextsize only supports `10' or `11', not `\\textsizearg'}\n  \\fi\\fi\n \\endgroup\n}\n\n\n% In order for the font changes to affect most math symbols and letters,\n% we have to define the \\textfont of the standard families.  Since\n% texinfo doesn't allow for producing subscripts and superscripts except\n% in the main text, we don't bother to reset \\scriptfont and\n% \\scriptscriptfont (which would also require loading a lot more fonts).\n%\n\\def\\resetmathfonts{%\n  \\textfont0=\\tenrm \\textfont1=\\teni \\textfont2=\\tensy\n  \\textfont\\itfam=\\tenit \\textfont\\slfam=\\tensl \\textfont\\bffam=\\tenbf\n  \\textfont\\ttfam=\\tentt \\textfont\\sffam=\\tensf\n}\n\n% The font-changing commands redefine the meanings of \\tenSTYLE, instead\n% of just \\STYLE.  We do this because \\STYLE needs to also set the\n% current \\fam for math mode.  Our \\STYLE (e.g., \\rm) commands hardwire\n% \\tenSTYLE to set the current font.\n%\n% Each font-changing command also sets the names \\lsize (one size lower)\n% and \\lllsize (three sizes lower).  These relative commands are used in\n% the LaTeX logo and acronyms.\n%\n% This all needs generalizing, badly.\n%\n\\def\\textfonts{%\n  \\let\\tenrm=\\textrm \\let\\tenit=\\textit \\let\\tensl=\\textsl\n  \\let\\tenbf=\\textbf \\let\\tentt=\\texttt \\let\\smallcaps=\\textsc\n  \\let\\tensf=\\textsf \\let\\teni=\\texti \\let\\tensy=\\textsy\n  \\let\\tenttsl=\\textttsl\n  \\def\\curfontsize{text}%\n  \\def\\lsize{reduced}\\def\\lllsize{smaller}%\n  \\resetmathfonts \\setleading{\\textleading}}\n\\def\\titlefonts{%\n  \\let\\tenrm=\\titlerm \\let\\tenit=\\titleit \\let\\tensl=\\titlesl\n  \\let\\tenbf=\\titlebf \\let\\tentt=\\titlett \\let\\smallcaps=\\titlesc\n  \\let\\tensf=\\titlesf \\let\\teni=\\titlei \\let\\tensy=\\titlesy\n  \\let\\tenttsl=\\titlettsl\n  \\def\\curfontsize{title}%\n  \\def\\lsize{chap}\\def\\lllsize{subsec}%\n  \\resetmathfonts \\setleading{27pt}}\n\\def\\titlefont#1{{\\titlefonts\\rmisbold #1}}\n\\def\\chapfonts{%\n  \\let\\tenrm=\\chaprm \\let\\tenit=\\chapit \\let\\tensl=\\chapsl\n  \\let\\tenbf=\\chapbf \\let\\tentt=\\chaptt \\let\\smallcaps=\\chapsc\n  \\let\\tensf=\\chapsf \\let\\teni=\\chapi \\let\\tensy=\\chapsy\n  \\let\\tenttsl=\\chapttsl\n  \\def\\curfontsize{chap}%\n  \\def\\lsize{sec}\\def\\lllsize{text}%\n  \\resetmathfonts \\setleading{19pt}}\n\\def\\secfonts{%\n  \\let\\tenrm=\\secrm \\let\\tenit=\\secit \\let\\tensl=\\secsl\n  \\let\\tenbf=\\secbf \\let\\tentt=\\sectt \\let\\smallcaps=\\secsc\n  \\let\\tensf=\\secsf \\let\\teni=\\seci \\let\\tensy=\\secsy\n  \\let\\tenttsl=\\secttsl\n  \\def\\curfontsize{sec}%\n  \\def\\lsize{subsec}\\def\\lllsize{reduced}%\n  \\resetmathfonts \\setleading{16pt}}\n\\def\\subsecfonts{%\n  \\let\\tenrm=\\ssecrm \\let\\tenit=\\ssecit \\let\\tensl=\\ssecsl\n  \\let\\tenbf=\\ssecbf \\let\\tentt=\\ssectt \\let\\smallcaps=\\ssecsc\n  \\let\\tensf=\\ssecsf \\let\\teni=\\sseci \\let\\tensy=\\ssecsy\n  \\let\\tenttsl=\\ssecttsl\n  \\def\\curfontsize{ssec}%\n  \\def\\lsize{text}\\def\\lllsize{small}%\n  \\resetmathfonts \\setleading{15pt}}\n\\let\\subsubsecfonts = \\subsecfonts\n\\def\\reducedfonts{%\n  \\let\\tenrm=\\reducedrm \\let\\tenit=\\reducedit \\let\\tensl=\\reducedsl\n  \\let\\tenbf=\\reducedbf \\let\\tentt=\\reducedtt \\let\\reducedcaps=\\reducedsc\n  \\let\\tensf=\\reducedsf \\let\\teni=\\reducedi \\let\\tensy=\\reducedsy\n  \\let\\tenttsl=\\reducedttsl\n  \\def\\curfontsize{reduced}%\n  \\def\\lsize{small}\\def\\lllsize{smaller}%\n  \\resetmathfonts \\setleading{10.5pt}}\n\\def\\smallfonts{%\n  \\let\\tenrm=\\smallrm \\let\\tenit=\\smallit \\let\\tensl=\\smallsl\n  \\let\\tenbf=\\smallbf \\let\\tentt=\\smalltt \\let\\smallcaps=\\smallsc\n  \\let\\tensf=\\smallsf \\let\\teni=\\smalli \\let\\tensy=\\smallsy\n  \\let\\tenttsl=\\smallttsl\n  \\def\\curfontsize{small}%\n  \\def\\lsize{smaller}\\def\\lllsize{smaller}%\n  \\resetmathfonts \\setleading{10.5pt}}\n\\def\\smallerfonts{%\n  \\let\\tenrm=\\smallerrm \\let\\tenit=\\smallerit \\let\\tensl=\\smallersl\n  \\let\\tenbf=\\smallerbf \\let\\tentt=\\smallertt \\let\\smallcaps=\\smallersc\n  \\let\\tensf=\\smallersf \\let\\teni=\\smalleri \\let\\tensy=\\smallersy\n  \\let\\tenttsl=\\smallerttsl\n  \\def\\curfontsize{smaller}%\n  \\def\\lsize{smaller}\\def\\lllsize{smaller}%\n  \\resetmathfonts \\setleading{9.5pt}}\n\n% Fonts for short table of contents.\n\\setfont\\shortcontrm\\rmshape{12}{1000}{OT1}\n\\setfont\\shortcontbf\\bfshape{10}{\\magstep1}{OT1}  % no cmb12\n\\setfont\\shortcontsl\\slshape{12}{1000}{OT1}\n\\setfont\\shortconttt\\ttshape{12}{1000}{OT1TT}\n\n% Define these just so they can be easily changed for other fonts.\n\\def\\angleleft{$\\langle$}\n\\def\\angleright{$\\rangle$}\n\n% Set the fonts to use with the @small... environments.\n\\let\\smallexamplefonts = \\smallfonts\n\n% About \\smallexamplefonts.  If we use \\smallfonts (9pt), @smallexample\n% can fit this many characters:\n%   8.5x11=86   smallbook=72  a4=90  a5=69\n% If we use \\scriptfonts (8pt), then we can fit this many characters:\n%   8.5x11=90+  smallbook=80  a4=90+  a5=77\n% For me, subjectively, the few extra characters that fit aren't worth\n% the additional smallness of 8pt.  So I'm making the default 9pt.\n%\n% By the way, for comparison, here's what fits with @example (10pt):\n%   8.5x11=71  smallbook=60  a4=75  a5=58\n% --karl, 24jan03.\n\n% Set up the default fonts, so we can use them for creating boxes.\n%\n\\definetextfontsizexi\n\n\n\\message{markup,}\n\n% Check if we are currently using a typewriter font.  Since all the\n% Computer Modern typewriter fonts have zero interword stretch (and\n% shrink), and it is reasonable to expect all typewriter fonts to have\n% this property, we can check that font parameter.\n%\n\\def\\ifmonospace{\\ifdim\\fontdimen3\\font=0pt }\n\n% Markup style infrastructure.  \\defmarkupstylesetup\\INITMACRO will\n% define and register \\INITMACRO to be called on markup style changes.\n% \\INITMACRO can check \\currentmarkupstyle for the innermost\n% style and the set of \\ifmarkupSTYLE switches for all styles\n% currently in effect.\n\\newif\\ifmarkupvar\n\\newif\\ifmarkupsamp\n\\newif\\ifmarkupkey\n%\\newif\\ifmarkupfile % @file == @samp.\n%\\newif\\ifmarkupoption % @option == @samp.\n\\newif\\ifmarkupcode\n\\newif\\ifmarkupkbd\n%\\newif\\ifmarkupenv % @env == @code.\n%\\newif\\ifmarkupcommand % @command == @code.\n\\newif\\ifmarkuptex % @tex (and part of @math, for now).\n\\newif\\ifmarkupexample\n\\newif\\ifmarkupverb\n\\newif\\ifmarkupverbatim\n\n\\let\\currentmarkupstyle\\empty\n\n\\def\\setupmarkupstyle#1{%\n  \\csname markup#1true\\endcsname\n  \\def\\currentmarkupstyle{#1}%\n  \\markupstylesetup\n}\n\n\\let\\markupstylesetup\\empty\n\n\\def\\defmarkupstylesetup#1{%\n  \\expandafter\\def\\expandafter\\markupstylesetup\n    \\expandafter{\\markupstylesetup #1}%\n  \\def#1%\n}\n\n% Markup style setup for left and right quotes.\n\\defmarkupstylesetup\\markupsetuplq{%\n  \\expandafter\\let\\expandafter \\temp\n    \\csname markupsetuplq\\currentmarkupstyle\\endcsname\n  \\ifx\\temp\\relax \\markupsetuplqdefault \\else \\temp \\fi\n}\n\n\\defmarkupstylesetup\\markupsetuprq{%\n  \\expandafter\\let\\expandafter \\temp\n    \\csname markupsetuprq\\currentmarkupstyle\\endcsname\n  \\ifx\\temp\\relax \\markupsetuprqdefault \\else \\temp \\fi\n}\n\n{\n\\catcode`\\'=\\active\n\\catcode`\\`=\\active\n\n\\gdef\\markupsetuplqdefault{\\let`\\lq}\n\\gdef\\markupsetuprqdefault{\\let'\\rq}\n\n\\gdef\\markupsetcodequoteleft{\\let`\\codequoteleft}\n\\gdef\\markupsetcodequoteright{\\let'\\codequoteright}\n}\n\n\\let\\markupsetuplqcode \\markupsetcodequoteleft\n\\let\\markupsetuprqcode \\markupsetcodequoteright\n%\n\\let\\markupsetuplqexample \\markupsetcodequoteleft\n\\let\\markupsetuprqexample \\markupsetcodequoteright\n%\n\\let\\markupsetuplqkbd     \\markupsetcodequoteleft\n\\let\\markupsetuprqkbd     \\markupsetcodequoteright\n%\n\\let\\markupsetuplqsamp \\markupsetcodequoteleft\n\\let\\markupsetuprqsamp \\markupsetcodequoteright\n%\n\\let\\markupsetuplqverb \\markupsetcodequoteleft\n\\let\\markupsetuprqverb \\markupsetcodequoteright\n%\n\\let\\markupsetuplqverbatim \\markupsetcodequoteleft\n\\let\\markupsetuprqverbatim \\markupsetcodequoteright\n\n% Allow an option to not use regular directed right quote/apostrophe\n% (char 0x27), but instead the undirected quote from cmtt (char 0x0d).\n% The undirected quote is ugly, so don't make it the default, but it\n% works for pasting with more pdf viewers (at least evince), the\n% lilypond developers report.  xpdf does work with the regular 0x27.\n%\n\\def\\codequoteright{%\n  \\expandafter\\ifx\\csname SETtxicodequoteundirected\\endcsname\\relax\n    \\expandafter\\ifx\\csname SETcodequoteundirected\\endcsname\\relax\n      '%\n    \\else \\char'15 \\fi\n  \\else \\char'15 \\fi\n}\n%\n% and a similar option for the left quote char vs. a grave accent.\n% Modern fonts display ASCII 0x60 as a grave accent, so some people like\n% the code environments to do likewise.\n%\n\\def\\codequoteleft{%\n  \\expandafter\\ifx\\csname SETtxicodequotebacktick\\endcsname\\relax\n    \\expandafter\\ifx\\csname SETcodequotebacktick\\endcsname\\relax\n      % [Knuth] pp. 380,381,391\n      % \\relax disables Spanish ligatures ?` and !` of \\tt font.\n      \\relax`%\n    \\else \\char'22 \\fi\n  \\else \\char'22 \\fi\n}\n\n% Commands to set the quote options.\n% \n\\parseargdef\\codequoteundirected{%\n  \\def\\temp{#1}%\n  \\ifx\\temp\\onword\n    \\expandafter\\let\\csname SETtxicodequoteundirected\\endcsname\n      = t%\n  \\else\\ifx\\temp\\offword\n    \\expandafter\\let\\csname SETtxicodequoteundirected\\endcsname\n      = \\relax\n  \\else\n    \\errhelp = \\EMsimple\n    \\errmessage{Unknown @codequoteundirected value `\\temp', must be on|off}%\n  \\fi\\fi\n}\n%\n\\parseargdef\\codequotebacktick{%\n  \\def\\temp{#1}%\n  \\ifx\\temp\\onword\n    \\expandafter\\let\\csname SETtxicodequotebacktick\\endcsname\n      = t%\n  \\else\\ifx\\temp\\offword\n    \\expandafter\\let\\csname SETtxicodequotebacktick\\endcsname\n      = \\relax\n  \\else\n    \\errhelp = \\EMsimple\n    \\errmessage{Unknown @codequotebacktick value `\\temp', must be on|off}%\n  \\fi\\fi\n}\n\n% [Knuth] pp. 380,381,391, disable Spanish ligatures ?` and !` of \\tt font.\n\\def\\noligaturesquoteleft{\\relax\\lq}\n\n% Count depth in font-changes, for error checks\n\\newcount\\fontdepth \\fontdepth=0\n\n% Font commands.\n\n% #1 is the font command (\\sl or \\it), #2 is the text to slant.\n% If we are in a monospaced environment, however, 1) always use \\ttsl,\n% and 2) do not add an italic correction.\n\\def\\dosmartslant#1#2{%\n  \\ifusingtt \n    {{\\ttsl #2}\\let\\next=\\relax}%\n    {\\def\\next{{#1#2}\\futurelet\\next\\smartitaliccorrection}}%\n  \\next\n}\n\\def\\smartslanted{\\dosmartslant\\sl}\n\\def\\smartitalic{\\dosmartslant\\it}\n\n% Output an italic correction unless \\next (presumed to be the following\n% character) is such as not to need one.\n\\def\\smartitaliccorrection{%\n  \\ifx\\next,%\n  \\else\\ifx\\next-%\n  \\else\\ifx\\next.%\n  \\else\\ptexslash\n  \\fi\\fi\\fi\n  \\aftersmartic\n}\n\n% Unconditional use \\ttsl, and no ic.  @var is set to this for defuns.\n\\def\\ttslanted#1{{\\ttsl #1}}\n\n% @cite is like \\smartslanted except unconditionally use \\sl.  We never want\n% ttsl for book titles, do we?\n\\def\\cite#1{{\\sl #1}\\futurelet\\next\\smartitaliccorrection}\n\n\\def\\aftersmartic{}\n\\def\\var#1{%\n  \\let\\saveaftersmartic = \\aftersmartic\n  \\def\\aftersmartic{\\null\\let\\aftersmartic=\\saveaftersmartic}%\n  \\smartslanted{#1}%\n}\n\n\\let\\i=\\smartitalic\n\\let\\slanted=\\smartslanted\n\\let\\dfn=\\smartslanted\n\\let\\emph=\\smartitalic\n\n% Explicit font changes: @r, @sc, undocumented @ii.\n\\def\\r#1{{\\rm #1}}              % roman font\n\\def\\sc#1{{\\smallcaps#1}}       % smallcaps font\n\\def\\ii#1{{\\it #1}}             % italic font\n\n% @b, explicit bold.  Also @strong.\n\\def\\b#1{{\\bf #1}}\n\\let\\strong=\\b\n\n% @sansserif, explicit sans.\n\\def\\sansserif#1{{\\sf #1}}\n\n% We can't just use \\exhyphenpenalty, because that only has effect at\n% the end of a paragraph.  Restore normal hyphenation at the end of the\n% group within which \\nohyphenation is presumably called.\n%\n\\def\\nohyphenation{\\hyphenchar\\font = -1  \\aftergroup\\restorehyphenation}\n\\def\\restorehyphenation{\\hyphenchar\\font = `- }\n\n% Set sfcode to normal for the chars that usually have another value.\n% Can't use plain's \\frenchspacing because it uses the `\\x notation, and\n% sometimes \\x has an active definition that messes things up.\n%\n\\catcode`@=11\n  \\def\\plainfrenchspacing{%\n    \\sfcode\\dotChar  =\\@m \\sfcode\\questChar=\\@m \\sfcode\\exclamChar=\\@m\n    \\sfcode\\colonChar=\\@m \\sfcode\\semiChar =\\@m \\sfcode\\commaChar =\\@m\n    \\def\\endofsentencespacefactor{1000}% for @. and friends\n  }\n  \\def\\plainnonfrenchspacing{%\n    \\sfcode`\\.3000\\sfcode`\\?3000\\sfcode`\\!3000\n    \\sfcode`\\:2000\\sfcode`\\;1500\\sfcode`\\,1250\n    \\def\\endofsentencespacefactor{3000}% for @. and friends\n  }\n\\catcode`@=\\other\n\\def\\endofsentencespacefactor{3000}% default\n\n% @t, explicit typewriter.\n\\def\\t#1{%\n  {\\tt \\rawbackslash \\plainfrenchspacing #1}%\n  \\null\n}\n\n% @samp.\n\\def\\samp#1{{\\setupmarkupstyle{samp}\\lq\\tclose{#1}\\rq\\null}}\n\n% @indicateurl is \\samp, that is, with quotes.\n\\let\\indicateurl=\\samp\n\n% @code (and similar) prints in typewriter, but with spaces the same\n% size as normal in the surrounding text, without hyphenation, etc.\n% This is a subroutine for that.\n\\def\\tclose#1{%\n  {%\n    % Change normal interword space to be same as for the current font.\n    \\spaceskip = \\fontdimen2\\font\n    %\n    % Switch to typewriter.\n    \\tt\n    %\n    % But `\\ ' produces the large typewriter interword space.\n    \\def\\ {{\\spaceskip = 0pt{} }}%\n    %\n    % Turn off hyphenation.\n    \\nohyphenation\n    %\n    \\rawbackslash\n    \\plainfrenchspacing\n    #1%\n  }%\n  \\null % reset spacefactor to 1000\n}\n\n% We *must* turn on hyphenation at `-' and `_' in @code.\n% Otherwise, it is too hard to avoid overfull hboxes\n% in the Emacs manual, the Library manual, etc.\n%\n% Unfortunately, TeX uses one parameter (\\hyphenchar) to control\n% both hyphenation at - and hyphenation within words.\n% We must therefore turn them both off (\\tclose does that)\n% and arrange explicitly to hyphenate at a dash.\n%  -- rms.\n{\n  \\catcode`\\-=\\active \\catcode`\\_=\\active\n  \\catcode`\\'=\\active \\catcode`\\`=\\active\n  \\global\\let'=\\rq \\global\\let`=\\lq  % default definitions\n  %\n  \\global\\def\\code{\\begingroup\n    \\setupmarkupstyle{code}%\n    % The following should really be moved into \\setupmarkupstyle handlers.\n    \\catcode\\dashChar=\\active  \\catcode\\underChar=\\active\n    \\ifallowcodebreaks\n     \\let-\\codedash\n     \\let_\\codeunder\n    \\else\n     \\let-\\normaldash\n     \\let_\\realunder\n    \\fi\n    \\codex\n  }\n}\n\n\\def\\codex #1{\\tclose{#1}\\endgroup}\n\n\\def\\normaldash{-}\n\\def\\codedash{-\\discretionary{}{}{}}\n\\def\\codeunder{%\n  % this is all so @math{@code{var_name}+1} can work.  In math mode, _\n  % is \"active\" (mathcode\"8000) and \\normalunderscore (or \\char95, etc.)\n  % will therefore expand the active definition of _, which is us\n  % (inside @code that is), therefore an endless loop.\n  \\ifusingtt{\\ifmmode\n               \\mathchar\"075F % class 0=ordinary, family 7=ttfam, pos 0x5F=_.\n             \\else\\normalunderscore \\fi\n             \\discretionary{}{}{}}%\n            {\\_}%\n}\n\n% An additional complication: the above will allow breaks after, e.g.,\n% each of the four underscores in __typeof__.  This is bad.\n% @allowcodebreaks provides a document-level way to turn breaking at -\n% and _ on and off.\n%\n\\newif\\ifallowcodebreaks  \\allowcodebreakstrue\n\n\\def\\keywordtrue{true}\n\\def\\keywordfalse{false}\n\n\\parseargdef\\allowcodebreaks{%\n  \\def\\txiarg{#1}%\n  \\ifx\\txiarg\\keywordtrue\n    \\allowcodebreakstrue\n  \\else\\ifx\\txiarg\\keywordfalse\n    \\allowcodebreaksfalse\n  \\else\n    \\errhelp = \\EMsimple\n    \\errmessage{Unknown @allowcodebreaks option `\\txiarg', must be true|false}%\n  \\fi\\fi\n}\n\n% For @command, @env, @file, @option quotes seem unnecessary,\n% so use \\code rather than \\samp.\n\\let\\command=\\code\n\\let\\env=\\code\n\\let\\file=\\code\n\\let\\option=\\code\n\n% @uref (abbreviation for `urlref') takes an optional (comma-separated)\n% second argument specifying the text to display and an optional third\n% arg as text to display instead of (rather than in addition to) the url\n% itself.  First (mandatory) arg is the url.\n% (This \\urefnobreak definition isn't used now, leaving it for a while\n% for comparison.)\n\\def\\urefnobreak#1{\\dourefnobreak #1,,,\\finish}\n\\def\\dourefnobreak#1,#2,#3,#4\\finish{\\begingroup\n  \\unsepspaces\n  \\pdfurl{#1}%\n  \\setbox0 = \\hbox{\\ignorespaces #3}%\n  \\ifdim\\wd0 > 0pt\n    \\unhbox0 % third arg given, show only that\n  \\else\n    \\setbox0 = \\hbox{\\ignorespaces #2}%\n    \\ifdim\\wd0 > 0pt\n      \\ifpdf\n        \\unhbox0             % PDF: 2nd arg given, show only it\n      \\else\n        \\unhbox0\\ (\\code{#1})% DVI: 2nd arg given, show both it and url\n      \\fi\n    \\else\n      \\code{#1}% only url given, so show it\n    \\fi\n  \\fi\n  \\endlink\n\\endgroup}\n\n% This \\urefbreak definition is the active one.\n\\def\\urefbreak{\\begingroup \\urefcatcodes \\dourefbreak}\n\\let\\uref=\\urefbreak\n\\def\\dourefbreak#1{\\urefbreakfinish #1,,,\\finish}\n\\def\\urefbreakfinish#1,#2,#3,#4\\finish{% doesn't work in @example\n  \\unsepspaces\n  \\pdfurl{#1}%\n  \\setbox0 = \\hbox{\\ignorespaces #3}%\n  \\ifdim\\wd0 > 0pt\n    \\unhbox0 % third arg given, show only that\n  \\else\n    \\setbox0 = \\hbox{\\ignorespaces #2}%\n    \\ifdim\\wd0 > 0pt\n      \\ifpdf\n        \\unhbox0             % PDF: 2nd arg given, show only it\n      \\else\n        \\unhbox0\\ (\\urefcode{#1})% DVI: 2nd arg given, show both it and url\n      \\fi\n    \\else\n      \\urefcode{#1}% only url given, so show it\n    \\fi\n  \\fi\n  \\endlink\n\\endgroup}\n\n% Allow line breaks around only a few characters (only).\n\\def\\urefcatcodes{%\n  \\catcode\\ampChar=\\active   \\catcode\\dotChar=\\active\n  \\catcode\\hashChar=\\active  \\catcode\\questChar=\\active\n  \\catcode\\slashChar=\\active\n}\n{\n  \\urefcatcodes\n  %\n  \\global\\def\\urefcode{\\begingroup\n    \\setupmarkupstyle{code}%\n    \\urefcatcodes\n    \\let&\\urefcodeamp\n    \\let.\\urefcodedot\n    \\let#\\urefcodehash\n    \\let?\\urefcodequest\n    \\let/\\urefcodeslash\n    \\codex\n  }\n  %\n  % By default, they are just regular characters.\n  \\global\\def&{\\normalamp}\n  \\global\\def.{\\normaldot}\n  \\global\\def#{\\normalhash}\n  \\global\\def?{\\normalquest}\n  \\global\\def/{\\normalslash}\n}\n\n% we put a little stretch before and after the breakable chars, to help\n% line breaking of long url's.  The unequal skips make look better in\n% cmtt at least, especially for dots.\n\\def\\urefprestretch{\\urefprebreak \\hskip0pt plus.13em }\n\\def\\urefpoststretch{\\urefpostbreak \\hskip0pt plus.1em }\n%\n\\def\\urefcodeamp{\\urefprestretch \\&\\urefpoststretch}\n\\def\\urefcodedot{\\urefprestretch .\\urefpoststretch}\n\\def\\urefcodehash{\\urefprestretch \\#\\urefpoststretch}\n\\def\\urefcodequest{\\urefprestretch ?\\urefpoststretch}\n\\def\\urefcodeslash{\\futurelet\\next\\urefcodeslashfinish}\n{\n  \\catcode`\\/=\\active\n  \\global\\def\\urefcodeslashfinish{%\n    \\urefprestretch \\slashChar\n    % Allow line break only after the final / in a sequence of\n    % slashes, to avoid line break between the slashes in http://.\n    \\ifx\\next/\\else \\urefpoststretch \\fi\n  }\n}\n\n% One more complication: by default we'll break after the special\n% characters, but some people like to break before the special chars, so\n% allow that.  Also allow no breaking at all, for manual control.\n% \n\\parseargdef\\urefbreakstyle{%\n  \\def\\txiarg{#1}%\n  \\ifx\\txiarg\\wordnone\n    \\def\\urefprebreak{\\nobreak}\\def\\urefpostbreak{\\nobreak}\n  \\else\\ifx\\txiarg\\wordbefore\n    \\def\\urefprebreak{\\allowbreak}\\def\\urefpostbreak{\\nobreak}\n  \\else\\ifx\\txiarg\\wordafter\n    \\def\\urefprebreak{\\nobreak}\\def\\urefpostbreak{\\allowbreak}\n  \\else\n    \\errhelp = \\EMsimple\n    \\errmessage{Unknown @urefbreakstyle setting `\\txiarg'}%\n  \\fi\\fi\\fi\n}\n\\def\\wordafter{after}\n\\def\\wordbefore{before}\n\\def\\wordnone{none}\n\n\\urefbreakstyle after\n\n% @url synonym for @uref, since that's how everyone uses it.\n%\n\\let\\url=\\uref\n\n% rms does not like angle brackets --karl, 17may97.\n% So now @email is just like @uref, unless we are pdf.\n%\n%\\def\\email#1{\\angleleft{\\tt #1}\\angleright}\n\\ifpdf\n  \\def\\email#1{\\doemail#1,,\\finish}\n  \\def\\doemail#1,#2,#3\\finish{\\begingroup\n    \\unsepspaces\n    \\pdfurl{mailto:#1}%\n    \\setbox0 = \\hbox{\\ignorespaces #2}%\n    \\ifdim\\wd0>0pt\\unhbox0\\else\\code{#1}\\fi\n    \\endlink\n  \\endgroup}\n\\else\n  \\let\\email=\\uref\n\\fi\n\n% @kbdinputstyle -- arg is `distinct' (@kbd uses slanted tty font always),\n%   `example' (@kbd uses ttsl only inside of @example and friends),\n%   or `code' (@kbd uses normal tty font always).\n\\parseargdef\\kbdinputstyle{%\n  \\def\\txiarg{#1}%\n  \\ifx\\txiarg\\worddistinct\n    \\gdef\\kbdexamplefont{\\ttsl}\\gdef\\kbdfont{\\ttsl}%\n  \\else\\ifx\\txiarg\\wordexample\n    \\gdef\\kbdexamplefont{\\ttsl}\\gdef\\kbdfont{\\tt}%\n  \\else\\ifx\\txiarg\\wordcode\n    \\gdef\\kbdexamplefont{\\tt}\\gdef\\kbdfont{\\tt}%\n  \\else\n    \\errhelp = \\EMsimple\n    \\errmessage{Unknown @kbdinputstyle setting `\\txiarg'}%\n  \\fi\\fi\\fi\n}\n\\def\\worddistinct{distinct}\n\\def\\wordexample{example}\n\\def\\wordcode{code}\n\n% Default is `distinct'.\n\\kbdinputstyle distinct\n\n% @kbd is like @code, except that if the argument is just one @key command,\n% then @kbd has no effect.\n\\def\\kbd#1{{\\def\\look{#1}\\expandafter\\kbdsub\\look??\\par}}\n\n\\def\\xkey{\\key}\n\\def\\kbdsub#1#2#3\\par{%\n  \\def\\one{#1}\\def\\three{#3}\\def\\threex{??}%\n  \\ifx\\one\\xkey\\ifx\\threex\\three \\key{#2}%\n  \\else{\\tclose{\\kbdfont\\setupmarkupstyle{kbd}\\look}}\\fi\n  \\else{\\tclose{\\kbdfont\\setupmarkupstyle{kbd}\\look}}\\fi\n}\n\n% definition of @key that produces a lozenge.  Doesn't adjust to text size.\n%\\setfont\\keyrm\\rmshape{8}{1000}{OT1}\n%\\font\\keysy=cmsy9\n%\\def\\key#1{{\\keyrm\\textfont2=\\keysy \\leavevmode\\hbox{%\n%  \\raise0.4pt\\hbox{\\angleleft}\\kern-.08em\\vtop{%\n%    \\vbox{\\hrule\\kern-0.4pt\n%     \\hbox{\\raise0.4pt\\hbox{\\vphantom{\\angleleft}}#1}}%\n%    \\kern-0.4pt\\hrule}%\n%  \\kern-.06em\\raise0.4pt\\hbox{\\angleright}}}}\n\n% definition of @key with no lozenge.  If the current font is already\n% monospace, don't change it; that way, we respect @kbdinputstyle.  But\n% if it isn't monospace, then use \\tt.\n%\n\\def\\key#1{{\\setupmarkupstyle{key}%\n  \\nohyphenation\n  \\ifmonospace\\else\\tt\\fi\n  #1}\\null}\n\n% @clicksequence{File @click{} Open ...}\n\\def\\clicksequence#1{\\begingroup #1\\endgroup}\n\n% @clickstyle @arrow   (by default)\n\\parseargdef\\clickstyle{\\def\\click{#1}}\n\\def\\click{\\arrow}\n\n% Typeset a dimension, e.g., `in' or `pt'.  The only reason for the\n% argument is to make the input look right: @dmn{pt} instead of @dmn{}pt.\n%\n\\def\\dmn#1{\\thinspace #1}\n\n% @l was never documented to mean ``switch to the Lisp font'',\n% and it is not used as such in any manual I can find.  We need it for\n% Polish suppressed-l.  --karl, 22sep96.\n%\\def\\l#1{{\\li #1}\\null}\n\n% @acronym for \"FBI\", \"NATO\", and the like.\n% We print this one point size smaller, since it's intended for\n% all-uppercase.\n%\n\\def\\acronym#1{\\doacronym #1,,\\finish}\n\\def\\doacronym#1,#2,#3\\finish{%\n  {\\selectfonts\\lsize #1}%\n  \\def\\temp{#2}%\n  \\ifx\\temp\\empty \\else\n    \\space ({\\unsepspaces \\ignorespaces \\temp \\unskip})%\n  \\fi\n  \\null % reset \\spacefactor=1000\n}\n\n% @abbr for \"Comput. J.\" and the like.\n% No font change, but don't do end-of-sentence spacing.\n%\n\\def\\abbr#1{\\doabbr #1,,\\finish}\n\\def\\doabbr#1,#2,#3\\finish{%\n  {\\plainfrenchspacing #1}%\n  \\def\\temp{#2}%\n  \\ifx\\temp\\empty \\else\n    \\space ({\\unsepspaces \\ignorespaces \\temp \\unskip})%\n  \\fi\n  \\null % reset \\spacefactor=1000\n}\n\n% @asis just yields its argument.  Used with @table, for example.\n%\n\\def\\asis#1{#1}\n\n% @math outputs its argument in math mode.\n%\n% One complication: _ usually means subscripts, but it could also mean\n% an actual _ character, as in @math{@var{some_variable} + 1}.  So make\n% _ active, and distinguish by seeing if the current family is \\slfam,\n% which is what @var uses.\n{\n  \\catcode`\\_ = \\active\n  \\gdef\\mathunderscore{%\n    \\catcode`\\_=\\active\n    \\def_{\\ifnum\\fam=\\slfam \\_\\else\\sb\\fi}%\n  }\n}\n% Another complication: we want \\\\ (and @\\) to output a math (or tt) \\.\n% FYI, plain.tex uses \\\\ as a temporary control sequence (for no\n% particular reason), but this is not advertised and we don't care.\n%\n% The \\mathchar is class=0=ordinary, family=7=ttfam, position=5C=\\.\n\\def\\mathbackslash{\\ifnum\\fam=\\ttfam \\mathchar\"075C \\else\\backslash \\fi}\n%\n\\def\\math{%\n  \\tex\n  \\mathunderscore\n  \\let\\\\ = \\mathbackslash\n  \\mathactive\n  % make the texinfo accent commands work in math mode\n  \\let\\\"=\\ddot\n  \\let\\'=\\acute\n  \\let\\==\\bar\n  \\let\\^=\\hat\n  \\let\\`=\\grave\n  \\let\\u=\\breve\n  \\let\\v=\\check\n  \\let\\~=\\tilde\n  \\let\\dotaccent=\\dot\n  $\\finishmath\n}\n\\def\\finishmath#1{#1$\\endgroup}  % Close the group opened by \\tex.\n\n% Some active characters (such as <) are spaced differently in math.\n% We have to reset their definitions in case the @math was an argument\n% to a command which sets the catcodes (such as @item or @section).\n%\n{\n  \\catcode`^ = \\active\n  \\catcode`< = \\active\n  \\catcode`> = \\active\n  \\catcode`+ = \\active\n  \\catcode`' = \\active\n  \\gdef\\mathactive{%\n    \\let^ = \\ptexhat\n    \\let< = \\ptexless\n    \\let> = \\ptexgtr\n    \\let+ = \\ptexplus\n    \\let' = \\ptexquoteright\n  }\n}\n\n% ctrl is no longer a Texinfo command, but leave this definition for fun.\n\\def\\ctrl #1{{\\tt \\rawbackslash \\hat}#1}\n\n% @inlinefmt{FMTNAME,PROCESSED-TEXT} and @inlineraw{FMTNAME,RAW-TEXT}.\n% Ignore unless FMTNAME == tex; then it is like @iftex and @tex,\n% except specified as a normal braced arg, so no newlines to worry about.\n% \n\\def\\outfmtnametex{tex}\n%\n\\long\\def\\inlinefmt#1{\\doinlinefmt #1,\\finish}\n\\long\\def\\doinlinefmt#1,#2,\\finish{%\n  \\def\\inlinefmtname{#1}%\n  \\ifx\\inlinefmtname\\outfmtnametex \\ignorespaces #2\\fi\n}\n% For raw, must switch into @tex before parsing the argument, to avoid\n% setting catcodes prematurely.  Doing it this way means that, for\n% example, @inlineraw{html, foo{bar} gets a parse error instead of being\n% ignored.  But this isn't important because if people want a literal\n% *right* brace they would have to use a command anyway, so they may as\n% well use a command to get a left brace too.  We could re-use the\n% delimiter character idea from \\verb, but it seems like overkill.\n% \n\\long\\def\\inlineraw{\\tex \\doinlineraw}\n\\long\\def\\doinlineraw#1{\\doinlinerawtwo #1,\\finish}\n\\def\\doinlinerawtwo#1,#2,\\finish{%\n  \\def\\inlinerawname{#1}%\n  \\ifx\\inlinerawname\\outfmtnametex \\ignorespaces #2\\fi\n  \\endgroup % close group opened by \\tex.\n}\n\n\n\\message{glyphs,}\n% and logos.\n\n% @@ prints an @, as does @atchar{}.\n\\def\\@{\\char64 }\n\\let\\atchar=\\@\n\n% @{ @} @lbracechar{} @rbracechar{} all generate brace characters.\n% Unless we're in typewriter, use \\ecfont because the CM text fonts do\n% not have braces, and we don't want to switch into math.\n\\def\\mylbrace{{\\ifmonospace\\else\\ecfont\\fi \\char123}}\n\\def\\myrbrace{{\\ifmonospace\\else\\ecfont\\fi \\char125}}\n\\let\\{=\\mylbrace \\let\\lbracechar=\\{\n\\let\\}=\\myrbrace \\let\\rbracechar=\\}\n\\begingroup\n  % Definitions to produce \\{ and \\} commands for indices,\n  % and @{ and @} for the aux/toc files.\n  \\catcode`\\{ = \\other \\catcode`\\} = \\other\n  \\catcode`\\[ = 1 \\catcode`\\] = 2\n  \\catcode`\\! = 0 \\catcode`\\\\ = \\other\n  !gdef!lbracecmd[\\{]%\n  !gdef!rbracecmd[\\}]%\n  !gdef!lbraceatcmd[@{]%\n  !gdef!rbraceatcmd[@}]%\n!endgroup\n\n% @comma{} to avoid , parsing problems.\n\\let\\comma = ,\n\n% Accents: @, @dotaccent @ringaccent @ubaraccent @udotaccent\n% Others are defined by plain TeX: @` @' @\" @^ @~ @= @u @v @H.\n\\let\\, = \\ptexc\n\\let\\dotaccent = \\ptexdot\n\\def\\ringaccent#1{{\\accent23 #1}}\n\\let\\tieaccent = \\ptext\n\\let\\ubaraccent = \\ptexb\n\\let\\udotaccent = \\d\n\n% Other special characters: @questiondown @exclamdown @ordf @ordm\n% Plain TeX defines: @AA @AE @O @OE @L (plus lowercase versions) @ss.\n\\def\\questiondown{?`}\n\\def\\exclamdown{!`}\n\\def\\ordf{\\leavevmode\\raise1ex\\hbox{\\selectfonts\\lllsize \\underbar{a}}}\n\\def\\ordm{\\leavevmode\\raise1ex\\hbox{\\selectfonts\\lllsize \\underbar{o}}}\n\n% Dotless i and dotless j, used for accents.\n\\def\\imacro{i}\n\\def\\jmacro{j}\n\\def\\dotless#1{%\n  \\def\\temp{#1}%\n  \\ifx\\temp\\imacro \\ifmmode\\imath \\else\\ptexi \\fi\n  \\else\\ifx\\temp\\jmacro \\ifmmode\\jmath \\else\\j \\fi\n  \\else \\errmessage{@dotless can be used only with i or j}%\n  \\fi\\fi\n}\n\n% The \\TeX{} logo, as in plain, but resetting the spacing so that a\n% period following counts as ending a sentence.  (Idea found in latex.)\n%\n\\edef\\TeX{\\TeX \\spacefactor=1000 }\n\n% @LaTeX{} logo.  Not quite the same results as the definition in\n% latex.ltx, since we use a different font for the raised A; it's most\n% convenient for us to use an explicitly smaller font, rather than using\n% the \\scriptstyle font (since we don't reset \\scriptstyle and\n% \\scriptscriptstyle).\n%\n\\def\\LaTeX{%\n  L\\kern-.36em\n  {\\setbox0=\\hbox{T}%\n   \\vbox to \\ht0{\\hbox{%\n     \\ifx\\textnominalsize\\xwordpt\n       % for 10pt running text, \\lllsize (8pt) is too small for the A in LaTeX.\n       % Revert to plain's \\scriptsize, which is 7pt.\n       \\count255=\\the\\fam $\\fam\\count255 \\scriptstyle A$%\n     \\else\n       % For 11pt, we can use our lllsize.\n       \\selectfonts\\lllsize A%\n     \\fi\n     }%\n     \\vss\n  }}%\n  \\kern-.15em\n  \\TeX\n}\n\n% Some math mode symbols.\n\\def\\bullet{$\\ptexbullet$}\n\\def\\geq{\\ifmmode \\ge\\else $\\ge$\\fi}\n\\def\\leq{\\ifmmode \\le\\else $\\le$\\fi}\n\\def\\minus{\\ifmmode -\\else $-$\\fi}\n\n% @dots{} outputs an ellipsis using the current font.\n% We do .5em per period so that it has the same spacing in the cm\n% typewriter fonts as three actual period characters; on the other hand,\n% in other typewriter fonts three periods are wider than 1.5em.  So do\n% whichever is larger.\n%\n\\def\\dots{%\n  \\leavevmode\n  \\setbox0=\\hbox{...}% get width of three periods\n  \\ifdim\\wd0 > 1.5em\n    \\dimen0 = \\wd0\n  \\else\n    \\dimen0 = 1.5em\n  \\fi\n  \\hbox to \\dimen0{%\n    \\hskip 0pt plus.25fil\n    .\\hskip 0pt plus1fil\n    .\\hskip 0pt plus1fil\n    .\\hskip 0pt plus.5fil\n  }%\n}\n\n% @enddots{} is an end-of-sentence ellipsis.\n%\n\\def\\enddots{%\n  \\dots\n  \\spacefactor=\\endofsentencespacefactor\n}\n\n% @point{}, @result{}, @expansion{}, @print{}, @equiv{}.\n%\n% Since these characters are used in examples, they should be an even number of\n% \\tt widths. Each \\tt character is 1en, so two makes it 1em.\n%\n\\def\\point{$\\star$}\n\\def\\arrow{\\leavevmode\\raise.05ex\\hbox to 1em{\\hfil$\\rightarrow$\\hfil}}\n\\def\\result{\\leavevmode\\raise.05ex\\hbox to 1em{\\hfil$\\Rightarrow$\\hfil}}\n\\def\\expansion{\\leavevmode\\hbox to 1em{\\hfil$\\mapsto$\\hfil}}\n\\def\\print{\\leavevmode\\lower.1ex\\hbox to 1em{\\hfil$\\dashv$\\hfil}}\n\\def\\equiv{\\leavevmode\\hbox to 1em{\\hfil$\\ptexequiv$\\hfil}}\n\n% The @error{} command.\n% Adapted from the TeXbook's \\boxit.\n%\n\\newbox\\errorbox\n%\n{\\tentt \\global\\dimen0 = 3em}% Width of the box.\n\\dimen2 = .55pt % Thickness of rules\n% The text. (`r' is open on the right, `e' somewhat less so on the left.)\n\\setbox0 = \\hbox{\\kern-.75pt \\reducedsf \\putworderror\\kern-1.5pt}\n%\n\\setbox\\errorbox=\\hbox to \\dimen0{\\hfil\n   \\hsize = \\dimen0 \\advance\\hsize by -5.8pt % Space to left+right.\n   \\advance\\hsize by -2\\dimen2 % Rules.\n   \\vbox{%\n      \\hrule height\\dimen2\n      \\hbox{\\vrule width\\dimen2 \\kern3pt          % Space to left of text.\n         \\vtop{\\kern2.4pt \\box0 \\kern2.4pt}% Space above/below.\n         \\kern3pt\\vrule width\\dimen2}% Space to right.\n      \\hrule height\\dimen2}\n    \\hfil}\n%\n\\def\\error{\\leavevmode\\lower.7ex\\copy\\errorbox}\n\n% @pounds{} is a sterling sign, which Knuth put in the CM italic font.\n%\n\\def\\pounds{{\\it\\$}}\n\n% @euro{} comes from a separate font, depending on the current style.\n% We use the free feym* fonts from the eurosym package by Henrik\n% Theiling, which support regular, slanted, bold and bold slanted (and\n% \"outlined\" (blackboard board, sort of) versions, which we don't need).\n% It is available from http://www.ctan.org/tex-archive/fonts/eurosym.\n%\n% Although only regular is the truly official Euro symbol, we ignore\n% that.  The Euro is designed to be slightly taller than the regular\n% font height.\n%\n% feymr - regular\n% feymo - slanted\n% feybr - bold\n% feybo - bold slanted\n%\n% There is no good (free) typewriter version, to my knowledge.\n% A feymr10 euro is ~7.3pt wide, while a normal cmtt10 char is ~5.25pt wide.\n% Hmm.\n%\n% Also doesn't work in math.  Do we need to do math with euro symbols?\n% Hope not.\n%\n%\n\\def\\euro{{\\eurofont e}}\n\\def\\eurofont{%\n  % We set the font at each command, rather than predefining it in\n  % \\textfonts and the other font-switching commands, so that\n  % installations which never need the symbol don't have to have the\n  % font installed.\n  %\n  % There is only one designed size (nominal 10pt), so we always scale\n  % that to the current nominal size.\n  %\n  % By the way, simply using \"at 1em\" works for cmr10 and the like, but\n  % does not work for cmbx10 and other extended/shrunken fonts.\n  %\n  \\def\\eurosize{\\csname\\curfontsize nominalsize\\endcsname}%\n  %\n  \\ifx\\curfontstyle\\bfstylename\n    % bold:\n    \\font\\thiseurofont = \\ifusingit{feybo10}{feybr10} at \\eurosize\n  \\else\n    % regular:\n    \\font\\thiseurofont = \\ifusingit{feymo10}{feymr10} at \\eurosize\n  \\fi\n  \\thiseurofont\n}\n\n% Glyphs from the EC fonts.  We don't use \\let for the aliases, because\n% sometimes we redefine the original macro, and the alias should reflect\n% the redefinition.\n%\n% Use LaTeX names for the Icelandic letters.\n\\def\\DH{{\\ecfont \\char\"D0}} % Eth\n\\def\\dh{{\\ecfont \\char\"F0}} % eth\n\\def\\TH{{\\ecfont \\char\"DE}} % Thorn\n\\def\\th{{\\ecfont \\char\"FE}} % thorn\n%\n\\def\\guillemetleft{{\\ecfont \\char\"13}}\n\\def\\guillemotleft{\\guillemetleft}\n\\def\\guillemetright{{\\ecfont \\char\"14}}\n\\def\\guillemotright{\\guillemetright}\n\\def\\guilsinglleft{{\\ecfont \\char\"0E}}\n\\def\\guilsinglright{{\\ecfont \\char\"0F}}\n\\def\\quotedblbase{{\\ecfont \\char\"12}}\n\\def\\quotesinglbase{{\\ecfont \\char\"0D}}\n%\n% This positioning is not perfect (see the ogonek LaTeX package), but\n% we have the precomposed glyphs for the most common cases.  We put the\n% tests to use those glyphs in the single \\ogonek macro so we have fewer\n% dummy definitions to worry about for index entries, etc.\n%\n% ogonek is also used with other letters in Lithuanian (IOU), but using\n% the precomposed glyphs for those is not so easy since they aren't in\n% the same EC font.\n\\def\\ogonek#1{{%\n  \\def\\temp{#1}%\n  \\ifx\\temp\\macrocharA\\Aogonek\n  \\else\\ifx\\temp\\macrochara\\aogonek\n  \\else\\ifx\\temp\\macrocharE\\Eogonek\n  \\else\\ifx\\temp\\macrochare\\eogonek\n  \\else\n    \\ecfont \\setbox0=\\hbox{#1}%\n    \\ifdim\\ht0=1ex\\accent\"0C #1%\n    \\else\\ooalign{\\unhbox0\\crcr\\hidewidth\\char\"0C \\hidewidth}%\n    \\fi\n  \\fi\\fi\\fi\\fi\n  }%\n}\n\\def\\Aogonek{{\\ecfont \\char\"81}}\\def\\macrocharA{A}\n\\def\\aogonek{{\\ecfont \\char\"A1}}\\def\\macrochara{a}\n\\def\\Eogonek{{\\ecfont \\char\"86}}\\def\\macrocharE{E}\n\\def\\eogonek{{\\ecfont \\char\"A6}}\\def\\macrochare{e}\n%\n% Use the ec* fonts (cm-super in outline format) for non-CM glyphs.\n\\def\\ecfont{%\n  % We can't distinguish serif/sans and italic/slanted, but this\n  % is used for crude hacks anyway (like adding French and German\n  % quotes to documents typeset with CM, where we lose kerning), so\n  % hopefully nobody will notice/care.\n  \\edef\\ecsize{\\csname\\curfontsize ecsize\\endcsname}%\n  \\edef\\nominalsize{\\csname\\curfontsize nominalsize\\endcsname}%\n  \\ifmonospace\n    % typewriter:\n    \\font\\thisecfont = ectt\\ecsize \\space at \\nominalsize\n  \\else\n    \\ifx\\curfontstyle\\bfstylename\n      % bold:\n      \\font\\thisecfont = ecb\\ifusingit{i}{x}\\ecsize \\space at \\nominalsize\n    \\else\n      % regular:\n      \\font\\thisecfont = ec\\ifusingit{ti}{rm}\\ecsize \\space at \\nominalsize\n    \\fi\n  \\fi\n  \\thisecfont\n}\n\n% @registeredsymbol - R in a circle.  The font for the R should really\n% be smaller yet, but lllsize is the best we can do for now.\n% Adapted from the plain.tex definition of \\copyright.\n%\n\\def\\registeredsymbol{%\n  $^{{\\ooalign{\\hfil\\raise.07ex\\hbox{\\selectfonts\\lllsize R}%\n               \\hfil\\crcr\\Orb}}%\n    }$%\n}\n\n% @textdegree - the normal degrees sign.\n%\n\\def\\textdegree{$^\\circ$}\n\n% Laurent Siebenmann reports \\Orb undefined with:\n%  Textures 1.7.7 (preloaded format=plain 93.10.14)  (68K)  16 APR 2004 02:38\n% so we'll define it if necessary.\n%\n\\ifx\\Orb\\thisisundefined\n\\def\\Orb{\\mathhexbox20D}\n\\fi\n\n% Quotes.\n\\chardef\\quotedblleft=\"5C\n\\chardef\\quotedblright=`\\\"\n\\chardef\\quoteleft=`\\`\n\\chardef\\quoteright=`\\'\n\n\n\\message{page headings,}\n\n\\newskip\\titlepagetopglue \\titlepagetopglue = 1.5in\n\\newskip\\titlepagebottomglue \\titlepagebottomglue = 2pc\n\n% First the title page.  Must do @settitle before @titlepage.\n\\newif\\ifseenauthor\n\\newif\\iffinishedtitlepage\n\n% Do an implicit @contents or @shortcontents after @end titlepage if the\n% user says @setcontentsaftertitlepage or @setshortcontentsaftertitlepage.\n%\n\\newif\\ifsetcontentsaftertitlepage\n \\let\\setcontentsaftertitlepage = \\setcontentsaftertitlepagetrue\n\\newif\\ifsetshortcontentsaftertitlepage\n \\let\\setshortcontentsaftertitlepage = \\setshortcontentsaftertitlepagetrue\n\n\\parseargdef\\shorttitlepage{%\n  \\begingroup \\hbox{}\\vskip 1.5in \\chaprm \\centerline{#1}%\n  \\endgroup\\page\\hbox{}\\page}\n\n\\envdef\\titlepage{%\n  % Open one extra group, as we want to close it in the middle of \\Etitlepage.\n  \\begingroup\n    \\parindent=0pt \\textfonts\n    % Leave some space at the very top of the page.\n    \\vglue\\titlepagetopglue\n    % No rule at page bottom unless we print one at the top with @title.\n    \\finishedtitlepagetrue\n    %\n    % Most title ``pages'' are actually two pages long, with space\n    % at the top of the second.  We don't want the ragged left on the second.\n    \\let\\oldpage = \\page\n    \\def\\page{%\n      \\iffinishedtitlepage\\else\n\t \\finishtitlepage\n      \\fi\n      \\let\\page = \\oldpage\n      \\page\n      \\null\n    }%\n}\n\n\\def\\Etitlepage{%\n    \\iffinishedtitlepage\\else\n\t\\finishtitlepage\n    \\fi\n    % It is important to do the page break before ending the group,\n    % because the headline and footline are only empty inside the group.\n    % If we use the new definition of \\page, we always get a blank page\n    % after the title page, which we certainly don't want.\n    \\oldpage\n  \\endgroup\n  %\n  % Need this before the \\...aftertitlepage checks so that if they are\n  % in effect the toc pages will come out with page numbers.\n  \\HEADINGSon\n  %\n  % If they want short, they certainly want long too.\n  \\ifsetshortcontentsaftertitlepage\n    \\shortcontents\n    \\contents\n    \\global\\let\\shortcontents = \\relax\n    \\global\\let\\contents = \\relax\n  \\fi\n  %\n  \\ifsetcontentsaftertitlepage\n    \\contents\n    \\global\\let\\contents = \\relax\n    \\global\\let\\shortcontents = \\relax\n  \\fi\n}\n\n\\def\\finishtitlepage{%\n  \\vskip4pt \\hrule height 2pt width \\hsize\n  \\vskip\\titlepagebottomglue\n  \\finishedtitlepagetrue\n}\n\n% Settings used for typesetting titles: no hyphenation, no indentation,\n% don't worry much about spacing, ragged right.  This should be used\n% inside a \\vbox, and fonts need to be set appropriately first.  Because\n% it is always used for titles, nothing else, we call \\rmisbold.  \\par\n% should be specified before the end of the \\vbox, since a vbox is a group.\n% \n\\def\\raggedtitlesettings{%\n  \\rmisbold\n  \\hyphenpenalty=10000\n  \\parindent=0pt\n  \\tolerance=5000\n  \\ptexraggedright\n}\n\n% Macros to be used within @titlepage:\n\n\\let\\subtitlerm=\\tenrm\n\\def\\subtitlefont{\\subtitlerm \\normalbaselineskip = 13pt \\normalbaselines}\n\n\\parseargdef\\title{%\n  \\checkenv\\titlepage\n  \\vbox{\\titlefonts \\raggedtitlesettings #1\\par}%\n  % print a rule at the page bottom also.\n  \\finishedtitlepagefalse\n  \\vskip4pt \\hrule height 4pt width \\hsize \\vskip4pt\n}\n\n\\parseargdef\\subtitle{%\n  \\checkenv\\titlepage\n  {\\subtitlefont \\rightline{#1}}%\n}\n\n% @author should come last, but may come many times.\n% It can also be used inside @quotation.\n%\n\\parseargdef\\author{%\n  \\def\\temp{\\quotation}%\n  \\ifx\\thisenv\\temp\n    \\def\\quotationauthor{#1}% printed in \\Equotation.\n  \\else\n    \\checkenv\\titlepage\n    \\ifseenauthor\\else \\vskip 0pt plus 1filll \\seenauthortrue \\fi\n    {\\secfonts\\rmisbold \\leftline{#1}}%\n  \\fi\n}\n\n\n% Set up page headings and footings.\n\n\\let\\thispage=\\folio\n\n\\newtoks\\evenheadline    % headline on even pages\n\\newtoks\\oddheadline     % headline on odd pages\n\\newtoks\\evenfootline    % footline on even pages\n\\newtoks\\oddfootline     % footline on odd pages\n\n% Now make TeX use those variables\n\\headline={{\\textfonts\\rm \\ifodd\\pageno \\the\\oddheadline\n                            \\else \\the\\evenheadline \\fi}}\n\\footline={{\\textfonts\\rm \\ifodd\\pageno \\the\\oddfootline\n                            \\else \\the\\evenfootline \\fi}\\HEADINGShook}\n\\let\\HEADINGShook=\\relax\n\n% Commands to set those variables.\n% For example, this is what  @headings on  does\n% @evenheading @thistitle|@thispage|@thischapter\n% @oddheading @thischapter|@thispage|@thistitle\n% @evenfooting @thisfile||\n% @oddfooting ||@thisfile\n\n\n\\def\\evenheading{\\parsearg\\evenheadingxxx}\n\\def\\evenheadingxxx #1{\\evenheadingyyy #1\\|\\|\\|\\|\\finish}\n\\def\\evenheadingyyy #1\\|#2\\|#3\\|#4\\finish{%\n\\global\\evenheadline={\\rlap{\\centerline{#2}}\\line{#1\\hfil#3}}}\n\n\\def\\oddheading{\\parsearg\\oddheadingxxx}\n\\def\\oddheadingxxx #1{\\oddheadingyyy #1\\|\\|\\|\\|\\finish}\n\\def\\oddheadingyyy #1\\|#2\\|#3\\|#4\\finish{%\n\\global\\oddheadline={\\rlap{\\centerline{#2}}\\line{#1\\hfil#3}}}\n\n\\parseargdef\\everyheading{\\oddheadingxxx{#1}\\evenheadingxxx{#1}}%\n\n\\def\\evenfooting{\\parsearg\\evenfootingxxx}\n\\def\\evenfootingxxx #1{\\evenfootingyyy #1\\|\\|\\|\\|\\finish}\n\\def\\evenfootingyyy #1\\|#2\\|#3\\|#4\\finish{%\n\\global\\evenfootline={\\rlap{\\centerline{#2}}\\line{#1\\hfil#3}}}\n\n\\def\\oddfooting{\\parsearg\\oddfootingxxx}\n\\def\\oddfootingxxx #1{\\oddfootingyyy #1\\|\\|\\|\\|\\finish}\n\\def\\oddfootingyyy #1\\|#2\\|#3\\|#4\\finish{%\n  \\global\\oddfootline = {\\rlap{\\centerline{#2}}\\line{#1\\hfil#3}}%\n  %\n  % Leave some space for the footline.  Hopefully ok to assume\n  % @evenfooting will not be used by itself.\n  \\global\\advance\\pageheight by -12pt\n  \\global\\advance\\vsize by -12pt\n}\n\n\\parseargdef\\everyfooting{\\oddfootingxxx{#1}\\evenfootingxxx{#1}}\n\n% @evenheadingmarks top     \\thischapter <- chapter at the top of a page\n% @evenheadingmarks bottom  \\thischapter <- chapter at the bottom of a page\n%\n% The same set of arguments for:\n%\n% @oddheadingmarks\n% @evenfootingmarks\n% @oddfootingmarks\n% @everyheadingmarks\n% @everyfootingmarks\n\n\\def\\evenheadingmarks{\\headingmarks{even}{heading}}\n\\def\\oddheadingmarks{\\headingmarks{odd}{heading}}\n\\def\\evenfootingmarks{\\headingmarks{even}{footing}}\n\\def\\oddfootingmarks{\\headingmarks{odd}{footing}}\n\\def\\everyheadingmarks#1 {\\headingmarks{even}{heading}{#1}\n                          \\headingmarks{odd}{heading}{#1} }\n\\def\\everyfootingmarks#1 {\\headingmarks{even}{footing}{#1}\n                          \\headingmarks{odd}{footing}{#1} }\n% #1 = even/odd, #2 = heading/footing, #3 = top/bottom.\n\\def\\headingmarks#1#2#3 {%\n  \\expandafter\\let\\expandafter\\temp \\csname get#3headingmarks\\endcsname\n  \\global\\expandafter\\let\\csname get#1#2marks\\endcsname \\temp\n}\n\n\\everyheadingmarks bottom\n\\everyfootingmarks bottom\n\n% @headings double      turns headings on for double-sided printing.\n% @headings single      turns headings on for single-sided printing.\n% @headings off         turns them off.\n% @headings on          same as @headings double, retained for compatibility.\n% @headings after       turns on double-sided headings after this page.\n% @headings doubleafter turns on double-sided headings after this page.\n% @headings singleafter turns on single-sided headings after this page.\n% By default, they are off at the start of a document,\n% and turned `on' after @end titlepage.\n\n\\def\\headings #1 {\\csname HEADINGS#1\\endcsname}\n\n\\def\\headingsoff{% non-global headings elimination\n  \\evenheadline={\\hfil}\\evenfootline={\\hfil}%\n   \\oddheadline={\\hfil}\\oddfootline={\\hfil}%\n}\n\n\\def\\HEADINGSoff{{\\globaldefs=1 \\headingsoff}} % global setting\n\\HEADINGSoff  % it's the default\n\n% When we turn headings on, set the page number to 1.\n% For double-sided printing, put current file name in lower left corner,\n% chapter name on inside top of right hand pages, document\n% title on inside top of left hand pages, and page numbers on outside top\n% edge of all pages.\n\\def\\HEADINGSdouble{%\n\\global\\pageno=1\n\\global\\evenfootline={\\hfil}\n\\global\\oddfootline={\\hfil}\n\\global\\evenheadline={\\line{\\folio\\hfil\\thistitle}}\n\\global\\oddheadline={\\line{\\thischapter\\hfil\\folio}}\n\\global\\let\\contentsalignmacro = \\chapoddpage\n}\n\\let\\contentsalignmacro = \\chappager\n\n% For single-sided printing, chapter title goes across top left of page,\n% page number on top right.\n\\def\\HEADINGSsingle{%\n\\global\\pageno=1\n\\global\\evenfootline={\\hfil}\n\\global\\oddfootline={\\hfil}\n\\global\\evenheadline={\\line{\\thischapter\\hfil\\folio}}\n\\global\\oddheadline={\\line{\\thischapter\\hfil\\folio}}\n\\global\\let\\contentsalignmacro = \\chappager\n}\n\\def\\HEADINGSon{\\HEADINGSdouble}\n\n\\def\\HEADINGSafter{\\let\\HEADINGShook=\\HEADINGSdoublex}\n\\let\\HEADINGSdoubleafter=\\HEADINGSafter\n\\def\\HEADINGSdoublex{%\n\\global\\evenfootline={\\hfil}\n\\global\\oddfootline={\\hfil}\n\\global\\evenheadline={\\line{\\folio\\hfil\\thistitle}}\n\\global\\oddheadline={\\line{\\thischapter\\hfil\\folio}}\n\\global\\let\\contentsalignmacro = \\chapoddpage\n}\n\n\\def\\HEADINGSsingleafter{\\let\\HEADINGShook=\\HEADINGSsinglex}\n\\def\\HEADINGSsinglex{%\n\\global\\evenfootline={\\hfil}\n\\global\\oddfootline={\\hfil}\n\\global\\evenheadline={\\line{\\thischapter\\hfil\\folio}}\n\\global\\oddheadline={\\line{\\thischapter\\hfil\\folio}}\n\\global\\let\\contentsalignmacro = \\chappager\n}\n\n% Subroutines used in generating headings\n% This produces Day Month Year style of output.\n% Only define if not already defined, in case a txi-??.tex file has set\n% up a different format (e.g., txi-cs.tex does this).\n\\ifx\\today\\thisisundefined\n\\def\\today{%\n  \\number\\day\\space\n  \\ifcase\\month\n  \\or\\putwordMJan\\or\\putwordMFeb\\or\\putwordMMar\\or\\putwordMApr\n  \\or\\putwordMMay\\or\\putwordMJun\\or\\putwordMJul\\or\\putwordMAug\n  \\or\\putwordMSep\\or\\putwordMOct\\or\\putwordMNov\\or\\putwordMDec\n  \\fi\n  \\space\\number\\year}\n\\fi\n\n% @settitle line...  specifies the title of the document, for headings.\n% It generates no output of its own.\n\\def\\thistitle{\\putwordNoTitle}\n\\def\\settitle{\\parsearg{\\gdef\\thistitle}}\n\n\n\\message{tables,}\n% Tables -- @table, @ftable, @vtable, @item(x).\n\n% default indentation of table text\n\\newdimen\\tableindent \\tableindent=.8in\n% default indentation of @itemize and @enumerate text\n\\newdimen\\itemindent  \\itemindent=.3in\n% margin between end of table item and start of table text.\n\\newdimen\\itemmargin  \\itemmargin=.1in\n\n% used internally for \\itemindent minus \\itemmargin\n\\newdimen\\itemmax\n\n% Note @table, @ftable, and @vtable define @item, @itemx, etc., with\n% these defs.\n% They also define \\itemindex\n% to index the item name in whatever manner is desired (perhaps none).\n\n\\newif\\ifitemxneedsnegativevskip\n\n\\def\\itemxpar{\\par\\ifitemxneedsnegativevskip\\nobreak\\vskip-\\parskip\\nobreak\\fi}\n\n\\def\\internalBitem{\\smallbreak \\parsearg\\itemzzz}\n\\def\\internalBitemx{\\itemxpar \\parsearg\\itemzzz}\n\n\\def\\itemzzz #1{\\begingroup %\n  \\advance\\hsize by -\\rightskip\n  \\advance\\hsize by -\\tableindent\n  \\setbox0=\\hbox{\\itemindicate{#1}}%\n  \\itemindex{#1}%\n  \\nobreak % This prevents a break before @itemx.\n  %\n  % If the item text does not fit in the space we have, put it on a line\n  % by itself, and do not allow a page break either before or after that\n  % line.  We do not start a paragraph here because then if the next\n  % command is, e.g., @kindex, the whatsit would get put into the\n  % horizontal list on a line by itself, resulting in extra blank space.\n  \\ifdim \\wd0>\\itemmax\n    %\n    % Make this a paragraph so we get the \\parskip glue and wrapping,\n    % but leave it ragged-right.\n    \\begingroup\n      \\advance\\leftskip by-\\tableindent\n      \\advance\\hsize by\\tableindent\n      \\advance\\rightskip by0pt plus1fil\\relax\n      \\leavevmode\\unhbox0\\par\n    \\endgroup\n    %\n    % We're going to be starting a paragraph, but we don't want the\n    % \\parskip glue -- logically it's part of the @item we just started.\n    \\nobreak \\vskip-\\parskip\n    %\n    % Stop a page break at the \\parskip glue coming up.  However, if\n    % what follows is an environment such as @example, there will be no\n    % \\parskip glue; then the negative vskip we just inserted would\n    % cause the example and the item to crash together.  So we use this\n    % bizarre value of 10001 as a signal to \\aboveenvbreak to insert\n    % \\parskip glue after all.  Section titles are handled this way also.\n    %\n    \\penalty 10001\n    \\endgroup\n    \\itemxneedsnegativevskipfalse\n  \\else\n    % The item text fits into the space.  Start a paragraph, so that the\n    % following text (if any) will end up on the same line.\n    \\noindent\n    % Do this with kerns and \\unhbox so that if there is a footnote in\n    % the item text, it can migrate to the main vertical list and\n    % eventually be printed.\n    \\nobreak\\kern-\\tableindent\n    \\dimen0 = \\itemmax  \\advance\\dimen0 by \\itemmargin \\advance\\dimen0 by -\\wd0\n    \\unhbox0\n    \\nobreak\\kern\\dimen0\n    \\endgroup\n    \\itemxneedsnegativevskiptrue\n  \\fi\n}\n\n\\def\\item{\\errmessage{@item while not in a list environment}}\n\\def\\itemx{\\errmessage{@itemx while not in a list environment}}\n\n% @table, @ftable, @vtable.\n\\envdef\\table{%\n  \\let\\itemindex\\gobble\n  \\tablecheck{table}%\n}\n\\envdef\\ftable{%\n  \\def\\itemindex ##1{\\doind {fn}{\\code{##1}}}%\n  \\tablecheck{ftable}%\n}\n\\envdef\\vtable{%\n  \\def\\itemindex ##1{\\doind {vr}{\\code{##1}}}%\n  \\tablecheck{vtable}%\n}\n\\def\\tablecheck#1{%\n  \\ifnum \\the\\catcode`\\^^M=\\active\n    \\endgroup\n    \\errmessage{This command won't work in this context; perhaps the problem is\n      that we are \\inenvironment\\thisenv}%\n    \\def\\next{\\doignore{#1}}%\n  \\else\n    \\let\\next\\tablex\n  \\fi\n  \\next\n}\n\\def\\tablex#1{%\n  \\def\\itemindicate{#1}%\n  \\parsearg\\tabley\n}\n\\def\\tabley#1{%\n  {%\n    \\makevalueexpandable\n    \\edef\\temp{\\noexpand\\tablez #1\\space\\space\\space}%\n    \\expandafter\n  }\\temp \\endtablez\n}\n\\def\\tablez #1 #2 #3 #4\\endtablez{%\n  \\aboveenvbreak\n  \\ifnum 0#1>0 \\advance \\leftskip by #1\\mil \\fi\n  \\ifnum 0#2>0 \\tableindent=#2\\mil \\fi\n  \\ifnum 0#3>0 \\advance \\rightskip by #3\\mil \\fi\n  \\itemmax=\\tableindent\n  \\advance \\itemmax by -\\itemmargin\n  \\advance \\leftskip by \\tableindent\n  \\exdentamount=\\tableindent\n  \\parindent = 0pt\n  \\parskip = \\smallskipamount\n  \\ifdim \\parskip=0pt \\parskip=2pt \\fi\n  \\let\\item = \\internalBitem\n  \\let\\itemx = \\internalBitemx\n}\n\\def\\Etable{\\endgraf\\afterenvbreak}\n\\let\\Eftable\\Etable\n\\let\\Evtable\\Etable\n\\let\\Eitemize\\Etable\n\\let\\Eenumerate\\Etable\n\n% This is the counter used by @enumerate, which is really @itemize\n\n\\newcount \\itemno\n\n\\envdef\\itemize{\\parsearg\\doitemize}\n\n\\def\\doitemize#1{%\n  \\aboveenvbreak\n  \\itemmax=\\itemindent\n  \\advance\\itemmax by -\\itemmargin\n  \\advance\\leftskip by \\itemindent\n  \\exdentamount=\\itemindent\n  \\parindent=0pt\n  \\parskip=\\smallskipamount\n  \\ifdim\\parskip=0pt \\parskip=2pt \\fi\n  %\n  % Try typesetting the item mark that if the document erroneously says\n  % something like @itemize @samp (intending @table), there's an error\n  % right away at the @itemize.  It's not the best error message in the\n  % world, but it's better than leaving it to the @item.  This means if\n  % the user wants an empty mark, they have to say @w{} not just @w.\n  \\def\\itemcontents{#1}%\n  \\setbox0 = \\hbox{\\itemcontents}%\n  %\n  % @itemize with no arg is equivalent to @itemize @bullet.\n  \\ifx\\itemcontents\\empty\\def\\itemcontents{\\bullet}\\fi\n  %\n  \\let\\item=\\itemizeitem\n}\n\n% Definition of @item while inside @itemize and @enumerate.\n%\n\\def\\itemizeitem{%\n  \\advance\\itemno by 1  % for enumerations\n  {\\let\\par=\\endgraf \\smallbreak}% reasonable place to break\n  {%\n   % If the document has an @itemize directly after a section title, a\n   % \\nobreak will be last on the list, and \\sectionheading will have\n   % done a \\vskip-\\parskip.  In that case, we don't want to zero\n   % parskip, or the item text will crash with the heading.  On the\n   % other hand, when there is normal text preceding the item (as there\n   % usually is), we do want to zero parskip, or there would be too much\n   % space.  In that case, we won't have a \\nobreak before.  At least\n   % that's the theory.\n   \\ifnum\\lastpenalty<10000 \\parskip=0in \\fi\n   \\noindent\n   \\hbox to 0pt{\\hss \\itemcontents \\kern\\itemmargin}%\n   %\n   \\vadjust{\\penalty 1200}}% not good to break after first line of item.\n  \\flushcr\n}\n\n% \\splitoff TOKENS\\endmark defines \\first to be the first token in\n% TOKENS, and \\rest to be the remainder.\n%\n\\def\\splitoff#1#2\\endmark{\\def\\first{#1}\\def\\rest{#2}}%\n\n% Allow an optional argument of an uppercase letter, lowercase letter,\n% or number, to specify the first label in the enumerated list.  No\n% argument is the same as `1'.\n%\n\\envparseargdef\\enumerate{\\enumeratey #1  \\endenumeratey}\n\\def\\enumeratey #1 #2\\endenumeratey{%\n  % If we were given no argument, pretend we were given `1'.\n  \\def\\thearg{#1}%\n  \\ifx\\thearg\\empty \\def\\thearg{1}\\fi\n  %\n  % Detect if the argument is a single token.  If so, it might be a\n  % letter.  Otherwise, the only valid thing it can be is a number.\n  % (We will always have one token, because of the test we just made.\n  % This is a good thing, since \\splitoff doesn't work given nothing at\n  % all -- the first parameter is undelimited.)\n  \\expandafter\\splitoff\\thearg\\endmark\n  \\ifx\\rest\\empty\n    % Only one token in the argument.  It could still be anything.\n    % A ``lowercase letter'' is one whose \\lccode is nonzero.\n    % An ``uppercase letter'' is one whose \\lccode is both nonzero, and\n    %   not equal to itself.\n    % Otherwise, we assume it's a number.\n    %\n    % We need the \\relax at the end of the \\ifnum lines to stop TeX from\n    % continuing to look for a <number>.\n    %\n    \\ifnum\\lccode\\expandafter`\\thearg=0\\relax\n      \\numericenumerate % a number (we hope)\n    \\else\n      % It's a letter.\n      \\ifnum\\lccode\\expandafter`\\thearg=\\expandafter`\\thearg\\relax\n        \\lowercaseenumerate % lowercase letter\n      \\else\n        \\uppercaseenumerate % uppercase letter\n      \\fi\n    \\fi\n  \\else\n    % Multiple tokens in the argument.  We hope it's a number.\n    \\numericenumerate\n  \\fi\n}\n\n% An @enumerate whose labels are integers.  The starting integer is\n% given in \\thearg.\n%\n\\def\\numericenumerate{%\n  \\itemno = \\thearg\n  \\startenumeration{\\the\\itemno}%\n}\n\n% The starting (lowercase) letter is in \\thearg.\n\\def\\lowercaseenumerate{%\n  \\itemno = \\expandafter`\\thearg\n  \\startenumeration{%\n    % Be sure we're not beyond the end of the alphabet.\n    \\ifnum\\itemno=0\n      \\errmessage{No more lowercase letters in @enumerate; get a bigger\n                  alphabet}%\n    \\fi\n    \\char\\lccode\\itemno\n  }%\n}\n\n% The starting (uppercase) letter is in \\thearg.\n\\def\\uppercaseenumerate{%\n  \\itemno = \\expandafter`\\thearg\n  \\startenumeration{%\n    % Be sure we're not beyond the end of the alphabet.\n    \\ifnum\\itemno=0\n      \\errmessage{No more uppercase letters in @enumerate; get a bigger\n                  alphabet}\n    \\fi\n    \\char\\uccode\\itemno\n  }%\n}\n\n% Call \\doitemize, adding a period to the first argument and supplying the\n% common last two arguments.  Also subtract one from the initial value in\n% \\itemno, since @item increments \\itemno.\n%\n\\def\\startenumeration#1{%\n  \\advance\\itemno by -1\n  \\doitemize{#1.}\\flushcr\n}\n\n% @alphaenumerate and @capsenumerate are abbreviations for giving an arg\n% to @enumerate.\n%\n\\def\\alphaenumerate{\\enumerate{a}}\n\\def\\capsenumerate{\\enumerate{A}}\n\\def\\Ealphaenumerate{\\Eenumerate}\n\\def\\Ecapsenumerate{\\Eenumerate}\n\n\n% @multitable macros\n% Amy Hendrickson, 8/18/94, 3/6/96\n%\n% @multitable ... @end multitable will make as many columns as desired.\n% Contents of each column will wrap at width given in preamble.  Width\n% can be specified either with sample text given in a template line,\n% or in percent of \\hsize, the current width of text on page.\n\n% Table can continue over pages but will only break between lines.\n\n% To make preamble:\n%\n% Either define widths of columns in terms of percent of \\hsize:\n%   @multitable @columnfractions .25 .3 .45\n%   @item ...\n%\n%   Numbers following @columnfractions are the percent of the total\n%   current hsize to be used for each column. You may use as many\n%   columns as desired.\n\n\n% Or use a template:\n%   @multitable {Column 1 template} {Column 2 template} {Column 3 template}\n%   @item ...\n%   using the widest term desired in each column.\n\n% Each new table line starts with @item, each subsequent new column\n% starts with @tab. Empty columns may be produced by supplying @tab's\n% with nothing between them for as many times as empty columns are needed,\n% ie, @tab@tab@tab will produce two empty columns.\n\n% @item, @tab do not need to be on their own lines, but it will not hurt\n% if they are.\n\n% Sample multitable:\n\n%   @multitable {Column 1 template} {Column 2 template} {Column 3 template}\n%   @item first col stuff @tab second col stuff @tab third col\n%   @item\n%   first col stuff\n%   @tab\n%   second col stuff\n%   @tab\n%   third col\n%   @item first col stuff @tab second col stuff\n%   @tab Many paragraphs of text may be used in any column.\n%\n%         They will wrap at the width determined by the template.\n%   @item@tab@tab This will be in third column.\n%   @end multitable\n\n% Default dimensions may be reset by user.\n% @multitableparskip is vertical space between paragraphs in table.\n% @multitableparindent is paragraph indent in table.\n% @multitablecolmargin is horizontal space to be left between columns.\n% @multitablelinespace is space to leave between table items, baseline\n%                                                            to baseline.\n%   0pt means it depends on current normal line spacing.\n%\n\\newskip\\multitableparskip\n\\newskip\\multitableparindent\n\\newdimen\\multitablecolspace\n\\newskip\\multitablelinespace\n\\multitableparskip=0pt\n\\multitableparindent=6pt\n\\multitablecolspace=12pt\n\\multitablelinespace=0pt\n\n% Macros used to set up halign preamble:\n%\n\\let\\endsetuptable\\relax\n\\def\\xendsetuptable{\\endsetuptable}\n\\let\\columnfractions\\relax\n\\def\\xcolumnfractions{\\columnfractions}\n\\newif\\ifsetpercent\n\n% #1 is the @columnfraction, usually a decimal number like .5, but might\n% be just 1.  We just use it, whatever it is.\n%\n\\def\\pickupwholefraction#1 {%\n  \\global\\advance\\colcount by 1\n  \\expandafter\\xdef\\csname col\\the\\colcount\\endcsname{#1\\hsize}%\n  \\setuptable\n}\n\n\\newcount\\colcount\n\\def\\setuptable#1{%\n  \\def\\firstarg{#1}%\n  \\ifx\\firstarg\\xendsetuptable\n    \\let\\go = \\relax\n  \\else\n    \\ifx\\firstarg\\xcolumnfractions\n      \\global\\setpercenttrue\n    \\else\n      \\ifsetpercent\n         \\let\\go\\pickupwholefraction\n      \\else\n         \\global\\advance\\colcount by 1\n         \\setbox0=\\hbox{#1\\unskip\\space}% Add a normal word space as a\n                   % separator; typically that is always in the input, anyway.\n         \\expandafter\\xdef\\csname col\\the\\colcount\\endcsname{\\the\\wd0}%\n      \\fi\n    \\fi\n    \\ifx\\go\\pickupwholefraction\n      % Put the argument back for the \\pickupwholefraction call, so\n      % we'll always have a period there to be parsed.\n      \\def\\go{\\pickupwholefraction#1}%\n    \\else\n      \\let\\go = \\setuptable\n    \\fi%\n  \\fi\n  \\go\n}\n\n% multitable-only commands.\n%\n% @headitem starts a heading row, which we typeset in bold.\n% Assignments have to be global since we are inside the implicit group\n% of an alignment entry.  \\everycr resets \\everytab so we don't have to\n% undo it ourselves.\n\\def\\headitemfont{\\b}% for people to use in the template row; not changeable\n\\def\\headitem{%\n  \\checkenv\\multitable\n  \\crcr\n  \\global\\everytab={\\bf}% can't use \\headitemfont since the parsing differs\n  \\the\\everytab % for the first item\n}%\n%\n% A \\tab used to include \\hskip1sp.  But then the space in a template\n% line is not enough.  That is bad.  So let's go back to just `&' until\n% we again encounter the problem the 1sp was intended to solve.\n%\t\t\t\t\t--karl, nathan@acm.org, 20apr99.\n\\def\\tab{\\checkenv\\multitable &\\the\\everytab}%\n\n% @multitable ... @end multitable definitions:\n%\n\\newtoks\\everytab  % insert after every tab.\n%\n\\envdef\\multitable{%\n  \\vskip\\parskip\n  \\startsavinginserts\n  %\n  % @item within a multitable starts a normal row.\n  % We use \\def instead of \\let so that if one of the multitable entries\n  % contains an @itemize, we don't choke on the \\item (seen as \\crcr aka\n  % \\endtemplate) expanding \\doitemize.\n  \\def\\item{\\crcr}%\n  %\n  \\tolerance=9500\n  \\hbadness=9500\n  \\setmultitablespacing\n  \\parskip=\\multitableparskip\n  \\parindent=\\multitableparindent\n  \\overfullrule=0pt\n  \\global\\colcount=0\n  %\n  \\everycr = {%\n    \\noalign{%\n      \\global\\everytab={}%\n      \\global\\colcount=0 % Reset the column counter.\n      % Check for saved footnotes, etc.\n      \\checkinserts\n      % Keeps underfull box messages off when table breaks over pages.\n      %\\filbreak\n\t% Maybe so, but it also creates really weird page breaks when the\n\t% table breaks over pages. Wouldn't \\vfil be better?  Wait until the\n\t% problem manifests itself, so it can be fixed for real --karl.\n    }%\n  }%\n  %\n  \\parsearg\\domultitable\n}\n\\def\\domultitable#1{%\n  % To parse everything between @multitable and @item:\n  \\setuptable#1 \\endsetuptable\n  %\n  % This preamble sets up a generic column definition, which will\n  % be used as many times as user calls for columns.\n  % \\vtop will set a single line and will also let text wrap and\n  % continue for many paragraphs if desired.\n  \\halign\\bgroup &%\n    \\global\\advance\\colcount by 1\n    \\multistrut\n    \\vtop{%\n      % Use the current \\colcount to find the correct column width:\n      \\hsize=\\expandafter\\csname col\\the\\colcount\\endcsname\n      %\n      % In order to keep entries from bumping into each other\n      % we will add a \\leftskip of \\multitablecolspace to all columns after\n      % the first one.\n      %\n      % If a template has been used, we will add \\multitablecolspace\n      % to the width of each template entry.\n      %\n      % If the user has set preamble in terms of percent of \\hsize we will\n      % use that dimension as the width of the column, and the \\leftskip\n      % will keep entries from bumping into each other.  Table will start at\n      % left margin and final column will justify at right margin.\n      %\n      % Make sure we don't inherit \\rightskip from the outer environment.\n      \\rightskip=0pt\n      \\ifnum\\colcount=1\n\t% The first column will be indented with the surrounding text.\n\t\\advance\\hsize by\\leftskip\n      \\else\n\t\\ifsetpercent \\else\n\t  % If user has not set preamble in terms of percent of \\hsize\n\t  % we will advance \\hsize by \\multitablecolspace.\n\t  \\advance\\hsize by \\multitablecolspace\n\t\\fi\n       % In either case we will make \\leftskip=\\multitablecolspace:\n      \\leftskip=\\multitablecolspace\n      \\fi\n      % Ignoring space at the beginning and end avoids an occasional spurious\n      % blank line, when TeX decides to break the line at the space before the\n      % box from the multistrut, so the strut ends up on a line by itself.\n      % For example:\n      % @multitable @columnfractions .11 .89\n      % @item @code{#}\n      % @tab Legal holiday which is valid in major parts of the whole country.\n      % Is automatically provided with highlighting sequences respectively\n      % marking characters.\n      \\noindent\\ignorespaces##\\unskip\\multistrut\n    }\\cr\n}\n\\def\\Emultitable{%\n  \\crcr\n  \\egroup % end the \\halign\n  \\global\\setpercentfalse\n}\n\n\\def\\setmultitablespacing{%\n  \\def\\multistrut{\\strut}% just use the standard line spacing\n  %\n  % Compute \\multitablelinespace (if not defined by user) for use in\n  % \\multitableparskip calculation.  We used define \\multistrut based on\n  % this, but (ironically) that caused the spacing to be off.\n  % See bug-texinfo report from Werner Lemberg, 31 Oct 2004 12:52:20 +0100.\n\\ifdim\\multitablelinespace=0pt\n\\setbox0=\\vbox{X}\\global\\multitablelinespace=\\the\\baselineskip\n\\global\\advance\\multitablelinespace by-\\ht0\n\\fi\n% Test to see if parskip is larger than space between lines of\n% table. If not, do nothing.\n%        If so, set to same dimension as multitablelinespace.\n\\ifdim\\multitableparskip>\\multitablelinespace\n\\global\\multitableparskip=\\multitablelinespace\n\\global\\advance\\multitableparskip-7pt % to keep parskip somewhat smaller\n                                      % than skip between lines in the table.\n\\fi%\n\\ifdim\\multitableparskip=0pt\n\\global\\multitableparskip=\\multitablelinespace\n\\global\\advance\\multitableparskip-7pt % to keep parskip somewhat smaller\n                                      % than skip between lines in the table.\n\\fi}\n\n\n\\message{conditionals,}\n\n% @iftex, @ifnotdocbook, @ifnothtml, @ifnotinfo, @ifnotplaintext,\n% @ifnotxml always succeed.  They currently do nothing; we don't\n% attempt to check whether the conditionals are properly nested.  But we\n% have to remember that they are conditionals, so that @end doesn't\n% attempt to close an environment group.\n%\n\\def\\makecond#1{%\n  \\expandafter\\let\\csname #1\\endcsname = \\relax\n  \\expandafter\\let\\csname iscond.#1\\endcsname = 1\n}\n\\makecond{iftex}\n\\makecond{ifnotdocbook}\n\\makecond{ifnothtml}\n\\makecond{ifnotinfo}\n\\makecond{ifnotplaintext}\n\\makecond{ifnotxml}\n\n% Ignore @ignore, @ifhtml, @ifinfo, and the like.\n%\n\\def\\direntry{\\doignore{direntry}}\n\\def\\documentdescription{\\doignore{documentdescription}}\n\\def\\docbook{\\doignore{docbook}}\n\\def\\html{\\doignore{html}}\n\\def\\ifdocbook{\\doignore{ifdocbook}}\n\\def\\ifhtml{\\doignore{ifhtml}}\n\\def\\ifinfo{\\doignore{ifinfo}}\n\\def\\ifnottex{\\doignore{ifnottex}}\n\\def\\ifplaintext{\\doignore{ifplaintext}}\n\\def\\ifxml{\\doignore{ifxml}}\n\\def\\ignore{\\doignore{ignore}}\n\\def\\menu{\\doignore{menu}}\n\\def\\xml{\\doignore{xml}}\n\n% Ignore text until a line `@end #1', keeping track of nested conditionals.\n%\n% A count to remember the depth of nesting.\n\\newcount\\doignorecount\n\n\\def\\doignore#1{\\begingroup\n  % Scan in ``verbatim'' mode:\n  \\obeylines\n  \\catcode`\\@ = \\other\n  \\catcode`\\{ = \\other\n  \\catcode`\\} = \\other\n  %\n  % Make sure that spaces turn into tokens that match what \\doignoretext wants.\n  \\spaceisspace\n  %\n  % Count number of #1's that we've seen.\n  \\doignorecount = 0\n  %\n  % Swallow text until we reach the matching `@end #1'.\n  \\dodoignore{#1}%\n}\n\n{ \\catcode`_=11 % We want to use \\_STOP_ which cannot appear in texinfo source.\n  \\obeylines %\n  %\n  \\gdef\\dodoignore#1{%\n    % #1 contains the command name as a string, e.g., `ifinfo'.\n    %\n    % Define a command to find the next `@end #1'.\n    \\long\\def\\doignoretext##1^^M@end #1{%\n      \\doignoretextyyy##1^^M@#1\\_STOP_}%\n    %\n    % And this command to find another #1 command, at the beginning of a\n    % line.  (Otherwise, we would consider a line `@c @ifset', for\n    % example, to count as an @ifset for nesting.)\n    \\long\\def\\doignoretextyyy##1^^M@#1##2\\_STOP_{\\doignoreyyy{##2}\\_STOP_}%\n    %\n    % And now expand that command.\n    \\doignoretext ^^M%\n  }%\n}\n\n\\def\\doignoreyyy#1{%\n  \\def\\temp{#1}%\n  \\ifx\\temp\\empty\t\t\t% Nothing found.\n    \\let\\next\\doignoretextzzz\n  \\else\t\t\t\t\t% Found a nested condition, ...\n    \\advance\\doignorecount by 1\n    \\let\\next\\doignoretextyyy\t\t% ..., look for another.\n    % If we're here, #1 ends with ^^M\\ifinfo (for example).\n  \\fi\n  \\next #1% the token \\_STOP_ is present just after this macro.\n}\n\n% We have to swallow the remaining \"\\_STOP_\".\n%\n\\def\\doignoretextzzz#1{%\n  \\ifnum\\doignorecount = 0\t% We have just found the outermost @end.\n    \\let\\next\\enddoignore\n  \\else\t\t\t\t% Still inside a nested condition.\n    \\advance\\doignorecount by -1\n    \\let\\next\\doignoretext      % Look for the next @end.\n  \\fi\n  \\next\n}\n\n% Finish off ignored text.\n{ \\obeylines%\n  % Ignore anything after the last `@end #1'; this matters in verbatim\n  % environments, where otherwise the newline after an ignored conditional\n  % would result in a blank line in the output.\n  \\gdef\\enddoignore#1^^M{\\endgroup\\ignorespaces}%\n}\n\n\n% @set VAR sets the variable VAR to an empty value.\n% @set VAR REST-OF-LINE sets VAR to the value REST-OF-LINE.\n%\n% Since we want to separate VAR from REST-OF-LINE (which might be\n% empty), we can't just use \\parsearg; we have to insert a space of our\n% own to delimit the rest of the line, and then take it out again if we\n% didn't need it.\n% We rely on the fact that \\parsearg sets \\catcode`\\ =10.\n%\n\\parseargdef\\set{\\setyyy#1 \\endsetyyy}\n\\def\\setyyy#1 #2\\endsetyyy{%\n  {%\n    \\makevalueexpandable\n    \\def\\temp{#2}%\n    \\edef\\next{\\gdef\\makecsname{SET#1}}%\n    \\ifx\\temp\\empty\n      \\next{}%\n    \\else\n      \\setzzz#2\\endsetzzz\n    \\fi\n  }%\n}\n% Remove the trailing space \\setxxx inserted.\n\\def\\setzzz#1 \\endsetzzz{\\next{#1}}\n\n% @clear VAR clears (i.e., unsets) the variable VAR.\n%\n\\parseargdef\\clear{%\n  {%\n    \\makevalueexpandable\n    \\global\\expandafter\\let\\csname SET#1\\endcsname=\\relax\n  }%\n}\n\n% @value{foo} gets the text saved in variable foo.\n\\def\\value{\\begingroup\\makevalueexpandable\\valuexxx}\n\\def\\valuexxx#1{\\expandablevalue{#1}\\endgroup}\n{\n  \\catcode`\\- = \\active \\catcode`\\_ = \\active\n  %\n  \\gdef\\makevalueexpandable{%\n    \\let\\value = \\expandablevalue\n    % We don't want these characters active, ...\n    \\catcode`\\-=\\other \\catcode`\\_=\\other\n    % ..., but we might end up with active ones in the argument if\n    % we're called from @code, as @code{@value{foo-bar_}}, though.\n    % So \\let them to their normal equivalents.\n    \\let-\\normaldash \\let_\\normalunderscore\n  }\n}\n\n% We have this subroutine so that we can handle at least some @value's\n% properly in indexes (we call \\makevalueexpandable in \\indexdummies).\n% The command has to be fully expandable (if the variable is set), since\n% the result winds up in the index file.  This means that if the\n% variable's value contains other Texinfo commands, it's almost certain\n% it will fail (although perhaps we could fix that with sufficient work\n% to do a one-level expansion on the result, instead of complete).\n%\n\\def\\expandablevalue#1{%\n  \\expandafter\\ifx\\csname SET#1\\endcsname\\relax\n    {[No value for ``#1'']}%\n    \\message{Variable `#1', used in @value, is not set.}%\n  \\else\n    \\csname SET#1\\endcsname\n  \\fi\n}\n\n% @ifset VAR ... @end ifset reads the `...' iff VAR has been defined\n% with @set.\n%\n% To get special treatment of `@end ifset,' call \\makeond and the redefine.\n%\n\\makecond{ifset}\n\\def\\ifset{\\parsearg{\\doifset{\\let\\next=\\ifsetfail}}}\n\\def\\doifset#1#2{%\n  {%\n    \\makevalueexpandable\n    \\let\\next=\\empty\n    \\expandafter\\ifx\\csname SET#2\\endcsname\\relax\n      #1% If not set, redefine \\next.\n    \\fi\n    \\expandafter\n  }\\next\n}\n\\def\\ifsetfail{\\doignore{ifset}}\n\n% @ifclear VAR ... @end executes the `...' iff VAR has never been\n% defined with @set, or has been undefined with @clear.\n%\n% The `\\else' inside the `\\doifset' parameter is a trick to reuse the\n% above code: if the variable is not set, do nothing, if it is set,\n% then redefine \\next to \\ifclearfail.\n%\n\\makecond{ifclear}\n\\def\\ifclear{\\parsearg{\\doifset{\\else \\let\\next=\\ifclearfail}}}\n\\def\\ifclearfail{\\doignore{ifclear}}\n\n% @ifcommandisdefined CMD ... @end executes the `...' if CMD (written\n% without the @) is in fact defined.  We can only feasibly check at the\n% TeX level, so something like `mathcode' is going to considered\n% defined even though it is not a Texinfo command.\n% \n\\makecond{ifcommanddefined}\n\\def\\ifcommanddefined{\\parsearg{\\doifcmddefined{\\let\\next=\\ifcmddefinedfail}}}\n%\n\\def\\doifcmddefined#1#2{{%\n    \\makevalueexpandable\n    \\let\\next=\\empty\n    \\expandafter\\ifx\\csname #2\\endcsname\\relax\n      #1% If not defined, \\let\\next as above.\n    \\fi\n    \\expandafter\n  }\\next\n}\n\\def\\ifcmddefinedfail{\\doignore{ifcommanddefined}}\n\n% @ifcommandnotdefined CMD ... handled similar to @ifclear above.\n\\makecond{ifcommandnotdefined}\n\\def\\ifcommandnotdefined{%\n  \\parsearg{\\doifcmddefined{\\else \\let\\next=\\ifcmdnotdefinedfail}}}\n\\def\\ifcmdnotdefinedfail{\\doignore{ifcommandnotdefined}}\n\n% Set the `txicommandconditionals' variable, so documents have a way to\n% test if the @ifcommand...defined conditionals are available.\n\\set txicommandconditionals\n\n% @dircategory CATEGORY  -- specify a category of the dir file\n% which this file should belong to.  Ignore this in TeX.\n\\let\\dircategory=\\comment\n\n% @defininfoenclose.\n\\let\\definfoenclose=\\comment\n\n\n\\message{indexing,}\n% Index generation facilities\n\n% Define \\newwrite to be identical to plain tex's \\newwrite\n% except not \\outer, so it can be used within macros and \\if's.\n\\edef\\newwrite{\\makecsname{ptexnewwrite}}\n\n% \\newindex {foo} defines an index named foo.\n% It automatically defines \\fooindex such that\n% \\fooindex ...rest of line... puts an entry in the index foo.\n% It also defines \\fooindfile to be the number of the output channel for\n% the file that accumulates this index.  The file's extension is foo.\n% The name of an index should be no more than 2 characters long\n% for the sake of vms.\n%\n\\def\\newindex#1{%\n  \\iflinks\n    \\expandafter\\newwrite \\csname#1indfile\\endcsname\n    \\openout \\csname#1indfile\\endcsname \\jobname.#1 % Open the file\n  \\fi\n  \\expandafter\\xdef\\csname#1index\\endcsname{%     % Define @#1index\n    \\noexpand\\doindex{#1}}\n}\n\n% @defindex foo  ==  \\newindex{foo}\n%\n\\def\\defindex{\\parsearg\\newindex}\n\n% Define @defcodeindex, like @defindex except put all entries in @code.\n%\n\\def\\defcodeindex{\\parsearg\\newcodeindex}\n%\n\\def\\newcodeindex#1{%\n  \\iflinks\n    \\expandafter\\newwrite \\csname#1indfile\\endcsname\n    \\openout \\csname#1indfile\\endcsname \\jobname.#1\n  \\fi\n  \\expandafter\\xdef\\csname#1index\\endcsname{%\n    \\noexpand\\docodeindex{#1}}%\n}\n\n\n% @synindex foo bar    makes index foo feed into index bar.\n% Do this instead of @defindex foo if you don't want it as a separate index.\n%\n% @syncodeindex foo bar   similar, but put all entries made for index foo\n% inside @code.\n%\n\\def\\synindex#1 #2 {\\dosynindex\\doindex{#1}{#2}}\n\\def\\syncodeindex#1 #2 {\\dosynindex\\docodeindex{#1}{#2}}\n\n% #1 is \\doindex or \\docodeindex, #2 the index getting redefined (foo),\n% #3 the target index (bar).\n\\def\\dosynindex#1#2#3{%\n  % Only do \\closeout if we haven't already done it, else we'll end up\n  % closing the target index.\n  \\expandafter \\ifx\\csname donesynindex#2\\endcsname \\relax\n    % The \\closeout helps reduce unnecessary open files; the limit on the\n    % Acorn RISC OS is a mere 16 files.\n    \\expandafter\\closeout\\csname#2indfile\\endcsname\n    \\expandafter\\let\\csname donesynindex#2\\endcsname = 1\n  \\fi\n  % redefine \\fooindfile:\n  \\expandafter\\let\\expandafter\\temp\\expandafter=\\csname#3indfile\\endcsname\n  \\expandafter\\let\\csname#2indfile\\endcsname=\\temp\n  % redefine \\fooindex:\n  \\expandafter\\xdef\\csname#2index\\endcsname{\\noexpand#1{#3}}%\n}\n\n% Define \\doindex, the driver for all \\fooindex macros.\n% Argument #1 is generated by the calling \\fooindex macro,\n%  and it is \"foo\", the name of the index.\n\n% \\doindex just uses \\parsearg; it calls \\doind for the actual work.\n% This is because \\doind is more useful to call from other macros.\n\n% There is also \\dosubind {index}{topic}{subtopic}\n% which makes an entry in a two-level index such as the operation index.\n\n\\def\\doindex#1{\\edef\\indexname{#1}\\parsearg\\singleindexer}\n\\def\\singleindexer #1{\\doind{\\indexname}{#1}}\n\n% like the previous two, but they put @code around the argument.\n\\def\\docodeindex#1{\\edef\\indexname{#1}\\parsearg\\singlecodeindexer}\n\\def\\singlecodeindexer #1{\\doind{\\indexname}{\\code{#1}}}\n\n% Take care of Texinfo commands that can appear in an index entry.\n% Since there are some commands we want to expand, and others we don't,\n% we have to laboriously prevent expansion for those that we don't.\n%\n\\def\\indexdummies{%\n  \\escapechar = `\\\\     % use backslash in output files.\n  \\def\\@{@}% change to @@ when we switch to @ as escape char in index files.\n  \\def\\ {\\realbackslash\\space }%\n  %\n  % Need these unexpandable (because we define \\tt as a dummy)\n  % definitions when @{ or @} appear in index entry text.  Also, more\n  % complicated, when \\tex is in effect and \\{ is a \\delimiter again.\n  % We can't use \\lbracecmd and \\rbracecmd because texindex assumes\n  % braces and backslashes are used only as delimiters.  Perhaps we\n  % should define @lbrace and @rbrace commands a la @comma.\n  \\def\\{{{\\tt\\char123}}%\n  \\def\\}{{\\tt\\char125}}%\n  %\n  % I don't entirely understand this, but when an index entry is\n  % generated from a macro call, the \\endinput which \\scanmacro inserts\n  % causes processing to be prematurely terminated.  This is,\n  % apparently, because \\indexsorttmp is fully expanded, and \\endinput\n  % is an expandable command.  The redefinition below makes \\endinput\n  % disappear altogether for that purpose -- although logging shows that\n  % processing continues to some further point.  On the other hand, it\n  % seems \\endinput does not hurt in the printed index arg, since that\n  % is still getting written without apparent harm.\n  %\n  % Sample source (mac-idx3.tex, reported by Graham Percival to\n  % help-texinfo, 22may06):\n  % @macro funindex {WORD}\n  % @findex xyz\n  % @end macro\n  % ...\n  % @funindex commtest\n  %\n  % The above is not enough to reproduce the bug, but it gives the flavor.\n  %\n  % Sample whatsit resulting:\n  % .@write3{\\entry{xyz}{@folio }{@code {xyz@endinput }}}\n  %\n  % So:\n  \\let\\endinput = \\empty\n  %\n  % Do the redefinitions.\n  \\commondummies\n}\n\n% For the aux and toc files, @ is the escape character.  So we want to\n% redefine everything using @ as the escape character (instead of\n% \\realbackslash, still used for index files).  When everything uses @,\n% this will be simpler.\n%\n\\def\\atdummies{%\n  \\def\\@{@@}%\n  \\def\\ {@ }%\n  \\let\\{ = \\lbraceatcmd\n  \\let\\} = \\rbraceatcmd\n  %\n  % Do the redefinitions.\n  \\commondummies\n  \\otherbackslash\n}\n\n% Called from \\indexdummies and \\atdummies.\n%\n\\def\\commondummies{%\n  %\n  % \\definedummyword defines \\#1 as \\string\\#1\\space, thus effectively\n  % preventing its expansion.  This is used only for control words,\n  % not control letters, because the \\space would be incorrect for\n  % control characters, but is needed to separate the control word\n  % from whatever follows.\n  %\n  % For control letters, we have \\definedummyletter, which omits the\n  % space.\n  %\n  % These can be used both for control words that take an argument and\n  % those that do not.  If it is followed by {arg} in the input, then\n  % that will dutifully get written to the index (or wherever).\n  %\n  \\def\\definedummyword  ##1{\\def##1{\\string##1\\space}}%\n  \\def\\definedummyletter##1{\\def##1{\\string##1}}%\n  \\let\\definedummyaccent\\definedummyletter\n  %\n  \\commondummiesnofonts\n  %\n  \\definedummyletter\\_%\n  \\definedummyletter\\-%\n  %\n  % Non-English letters.\n  \\definedummyword\\AA\n  \\definedummyword\\AE\n  \\definedummyword\\DH\n  \\definedummyword\\L\n  \\definedummyword\\O\n  \\definedummyword\\OE\n  \\definedummyword\\TH\n  \\definedummyword\\aa\n  \\definedummyword\\ae\n  \\definedummyword\\dh\n  \\definedummyword\\exclamdown\n  \\definedummyword\\l\n  \\definedummyword\\o\n  \\definedummyword\\oe\n  \\definedummyword\\ordf\n  \\definedummyword\\ordm\n  \\definedummyword\\questiondown\n  \\definedummyword\\ss\n  \\definedummyword\\th\n  %\n  % Although these internal commands shouldn't show up, sometimes they do.\n  \\definedummyword\\bf\n  \\definedummyword\\gtr\n  \\definedummyword\\hat\n  \\definedummyword\\less\n  \\definedummyword\\sf\n  \\definedummyword\\sl\n  \\definedummyword\\tclose\n  \\definedummyword\\tt\n  %\n  \\definedummyword\\LaTeX\n  \\definedummyword\\TeX\n  %\n  % Assorted special characters.\n  \\definedummyword\\arrow\n  \\definedummyword\\bullet\n  \\definedummyword\\comma\n  \\definedummyword\\copyright\n  \\definedummyword\\registeredsymbol\n  \\definedummyword\\dots\n  \\definedummyword\\enddots\n  \\definedummyword\\entrybreak\n  \\definedummyword\\equiv\n  \\definedummyword\\error\n  \\definedummyword\\euro\n  \\definedummyword\\expansion\n  \\definedummyword\\geq\n  \\definedummyword\\guillemetleft\n  \\definedummyword\\guillemetright\n  \\definedummyword\\guilsinglleft\n  \\definedummyword\\guilsinglright\n  \\definedummyword\\lbracechar\n  \\definedummyword\\leq\n  \\definedummyword\\minus\n  \\definedummyword\\ogonek\n  \\definedummyword\\pounds\n  \\definedummyword\\point\n  \\definedummyword\\print\n  \\definedummyword\\quotedblbase\n  \\definedummyword\\quotedblleft\n  \\definedummyword\\quotedblright\n  \\definedummyword\\quoteleft\n  \\definedummyword\\quoteright\n  \\definedummyword\\quotesinglbase\n  \\definedummyword\\rbracechar\n  \\definedummyword\\result\n  \\definedummyword\\textdegree\n  %\n  % We want to disable all macros so that they are not expanded by \\write.\n  \\macrolist\n  %\n  \\normalturnoffactive\n  %\n  % Handle some cases of @value -- where it does not contain any\n  % (non-fully-expandable) commands.\n  \\makevalueexpandable\n}\n\n% \\commondummiesnofonts: common to \\commondummies and \\indexnofonts.\n%\n\\def\\commondummiesnofonts{%\n  % Control letters and accents.\n  \\definedummyletter\\!%\n  \\definedummyaccent\\\"%\n  \\definedummyaccent\\'%\n  \\definedummyletter\\*%\n  \\definedummyaccent\\,%\n  \\definedummyletter\\.%\n  \\definedummyletter\\/%\n  \\definedummyletter\\:%\n  \\definedummyaccent\\=%\n  \\definedummyletter\\?%\n  \\definedummyaccent\\^%\n  \\definedummyaccent\\`%\n  \\definedummyaccent\\~%\n  \\definedummyword\\u\n  \\definedummyword\\v\n  \\definedummyword\\H\n  \\definedummyword\\dotaccent\n  \\definedummyword\\ogonek\n  \\definedummyword\\ringaccent\n  \\definedummyword\\tieaccent\n  \\definedummyword\\ubaraccent\n  \\definedummyword\\udotaccent\n  \\definedummyword\\dotless\n  %\n  % Texinfo font commands.\n  \\definedummyword\\b\n  \\definedummyword\\i\n  \\definedummyword\\r\n  \\definedummyword\\sansserif\n  \\definedummyword\\sc\n  \\definedummyword\\slanted\n  \\definedummyword\\t\n  %\n  % Commands that take arguments.\n  \\definedummyword\\abbr\n  \\definedummyword\\acronym\n  \\definedummyword\\anchor\n  \\definedummyword\\cite\n  \\definedummyword\\code\n  \\definedummyword\\command\n  \\definedummyword\\dfn\n  \\definedummyword\\dmn\n  \\definedummyword\\email\n  \\definedummyword\\emph\n  \\definedummyword\\env\n  \\definedummyword\\file\n  \\definedummyword\\image\n  \\definedummyword\\indicateurl\n  \\definedummyword\\inforef\n  \\definedummyword\\kbd\n  \\definedummyword\\key\n  \\definedummyword\\math\n  \\definedummyword\\option\n  \\definedummyword\\pxref\n  \\definedummyword\\ref\n  \\definedummyword\\samp\n  \\definedummyword\\strong\n  \\definedummyword\\tie\n  \\definedummyword\\uref\n  \\definedummyword\\url\n  \\definedummyword\\var\n  \\definedummyword\\verb\n  \\definedummyword\\w\n  \\definedummyword\\xref\n}\n\n% \\indexnofonts is used when outputting the strings to sort the index\n% by, and when constructing control sequence names.  It eliminates all\n% control sequences and just writes whatever the best ASCII sort string\n% would be for a given command (usually its argument).\n%\n\\def\\indexnofonts{%\n  % Accent commands should become @asis.\n  \\def\\definedummyaccent##1{\\let##1\\asis}%\n  % We can just ignore other control letters.\n  \\def\\definedummyletter##1{\\let##1\\empty}%\n  % All control words become @asis by default; overrides below.\n  \\let\\definedummyword\\definedummyaccent\n  %\n  \\commondummiesnofonts\n  %\n  % Don't no-op \\tt, since it isn't a user-level command\n  % and is used in the definitions of the active chars like <, >, |, etc.\n  % Likewise with the other plain tex font commands.\n  %\\let\\tt=\\asis\n  %\n  \\def\\ { }%\n  \\def\\@{@}%\n  \\def\\_{\\normalunderscore}%\n  \\def\\-{}% @- shouldn't affect sorting\n  %\n  % Unfortunately, texindex is not prepared to handle braces in the\n  % content at all.  So for index sorting, we map @{ and @} to strings\n  % starting with |, since that ASCII character is between ASCII { and }.\n  \\def\\{{|a}%\n  \\def\\lbracechar{|a}%\n  %\n  \\def\\}{|b}%\n  \\def\\rbracechar{|b}%\n  %\n  % Non-English letters.\n  \\def\\AA{AA}%\n  \\def\\AE{AE}%\n  \\def\\DH{DZZ}%\n  \\def\\L{L}%\n  \\def\\OE{OE}%\n  \\def\\O{O}%\n  \\def\\TH{ZZZ}%\n  \\def\\aa{aa}%\n  \\def\\ae{ae}%\n  \\def\\dh{dzz}%\n  \\def\\exclamdown{!}%\n  \\def\\l{l}%\n  \\def\\oe{oe}%\n  \\def\\ordf{a}%\n  \\def\\ordm{o}%\n  \\def\\o{o}%\n  \\def\\questiondown{?}%\n  \\def\\ss{ss}%\n  \\def\\th{zzz}%\n  %\n  \\def\\LaTeX{LaTeX}%\n  \\def\\TeX{TeX}%\n  %\n  % Assorted special characters.\n  % (The following {} will end up in the sort string, but that's ok.)\n  \\def\\arrow{->}%\n  \\def\\bullet{bullet}%\n  \\def\\comma{,}%\n  \\def\\copyright{copyright}%\n  \\def\\dots{...}%\n  \\def\\enddots{...}%\n  \\def\\equiv{==}%\n  \\def\\error{error}%\n  \\def\\euro{euro}%\n  \\def\\expansion{==>}%\n  \\def\\geq{>=}%\n  \\def\\guillemetleft{<<}%\n  \\def\\guillemetright{>>}%\n  \\def\\guilsinglleft{<}%\n  \\def\\guilsinglright{>}%\n  \\def\\leq{<=}%\n  \\def\\minus{-}%\n  \\def\\point{.}%\n  \\def\\pounds{pounds}%\n  \\def\\print{-|}%\n  \\def\\quotedblbase{\"}%\n  \\def\\quotedblleft{\"}%\n  \\def\\quotedblright{\"}%\n  \\def\\quoteleft{`}%\n  \\def\\quoteright{'}%\n  \\def\\quotesinglbase{,}%\n  \\def\\registeredsymbol{R}%\n  \\def\\result{=>}%\n  \\def\\textdegree{o}%\n  %\n  \\expandafter\\ifx\\csname SETtxiindexlquoteignore\\endcsname\\relax\n  \\else \\indexlquoteignore \\fi\n  %\n  % We need to get rid of all macros, leaving only the arguments (if present).\n  % Of course this is not nearly correct, but it is the best we can do for now.\n  % makeinfo does not expand macros in the argument to @deffn, which ends up\n  % writing an index entry, and texindex isn't prepared for an index sort entry\n  % that starts with \\.\n  %\n  % Since macro invocations are followed by braces, we can just redefine them\n  % to take a single TeX argument.  The case of a macro invocation that\n  % goes to end-of-line is not handled.\n  %\n  \\macrolist\n}\n\n% Undocumented (for FSFS 2nd ed.): @set txiindexlquoteignore makes us\n% ignore left quotes in the sort term.\n{\\catcode`\\`=\\active\n \\gdef\\indexlquoteignore{\\let`=\\empty}}\n\n\\let\\indexbackslash=0  %overridden during \\printindex.\n\\let\\SETmarginindex=\\relax % put index entries in margin (undocumented)?\n\n% Most index entries go through here, but \\dosubind is the general case.\n% #1 is the index name, #2 is the entry text.\n\\def\\doind#1#2{\\dosubind{#1}{#2}{}}\n\n% Workhorse for all \\fooindexes.\n% #1 is name of index, #2 is stuff to put there, #3 is subentry --\n% empty if called from \\doind, as we usually are (the main exception\n% is with most defuns, which call us directly).\n%\n\\def\\dosubind#1#2#3{%\n  \\iflinks\n  {%\n    % Store the main index entry text (including the third arg).\n    \\toks0 = {#2}%\n    % If third arg is present, precede it with a space.\n    \\def\\thirdarg{#3}%\n    \\ifx\\thirdarg\\empty \\else\n      \\toks0 = \\expandafter{\\the\\toks0 \\space #3}%\n    \\fi\n    %\n    \\edef\\writeto{\\csname#1indfile\\endcsname}%\n    %\n    \\safewhatsit\\dosubindwrite\n  }%\n  \\fi\n}\n\n% Write the entry in \\toks0 to the index file:\n%\n\\def\\dosubindwrite{%\n  % Put the index entry in the margin if desired.\n  \\ifx\\SETmarginindex\\relax\\else\n    \\insert\\margin{\\hbox{\\vrule height8pt depth3pt width0pt \\the\\toks0}}%\n  \\fi\n  %\n  % Remember, we are within a group.\n  \\indexdummies % Must do this here, since \\bf, etc expand at this stage\n  \\def\\backslashcurfont{\\indexbackslash}% \\indexbackslash isn't defined now\n      % so it will be output as is; and it will print as backslash.\n  %\n  % Process the index entry with all font commands turned off, to\n  % get the string to sort by.\n  {\\indexnofonts\n   \\edef\\temp{\\the\\toks0}% need full expansion\n   \\xdef\\indexsorttmp{\\temp}%\n  }%\n  %\n  % Set up the complete index entry, with both the sort key and\n  % the original text, including any font commands.  We write\n  % three arguments to \\entry to the .?? file (four in the\n  % subentry case), texindex reduces to two when writing the .??s\n  % sorted result.\n  \\edef\\temp{%\n    \\write\\writeto{%\n      \\string\\entry{\\indexsorttmp}{\\noexpand\\folio}{\\the\\toks0}}%\n  }%\n  \\temp\n}\n\n% Take care of unwanted page breaks/skips around a whatsit:\n%\n% If a skip is the last thing on the list now, preserve it\n% by backing up by \\lastskip, doing the \\write, then inserting\n% the skip again.  Otherwise, the whatsit generated by the\n% \\write or \\pdfdest will make \\lastskip zero.  The result is that\n% sequences like this:\n% @end defun\n% @tindex whatever\n% @defun ...\n% will have extra space inserted, because the \\medbreak in the\n% start of the @defun won't see the skip inserted by the @end of\n% the previous defun.\n%\n% But don't do any of this if we're not in vertical mode.  We\n% don't want to do a \\vskip and prematurely end a paragraph.\n%\n% Avoid page breaks due to these extra skips, too.\n%\n% But wait, there is a catch there:\n% We'll have to check whether \\lastskip is zero skip.  \\ifdim is not\n% sufficient for this purpose, as it ignores stretch and shrink parts\n% of the skip.  The only way seems to be to check the textual\n% representation of the skip.\n%\n% The following is almost like \\def\\zeroskipmacro{0.0pt} except that\n% the ``p'' and ``t'' characters have catcode \\other, not 11 (letter).\n%\n\\edef\\zeroskipmacro{\\expandafter\\the\\csname z@skip\\endcsname}\n%\n\\newskip\\whatsitskip\n\\newcount\\whatsitpenalty\n%\n% ..., ready, GO:\n%\n\\def\\safewhatsit#1{\\ifhmode\n  #1%\n \\else\n  % \\lastskip and \\lastpenalty cannot both be nonzero simultaneously.\n  \\whatsitskip = \\lastskip\n  \\edef\\lastskipmacro{\\the\\lastskip}%\n  \\whatsitpenalty = \\lastpenalty\n  %\n  % If \\lastskip is nonzero, that means the last item was a\n  % skip.  And since a skip is discardable, that means this\n  % -\\whatsitskip glue we're inserting is preceded by a\n  % non-discardable item, therefore it is not a potential\n  % breakpoint, therefore no \\nobreak needed.\n  \\ifx\\lastskipmacro\\zeroskipmacro\n  \\else\n    \\vskip-\\whatsitskip\n  \\fi\n  %\n  #1%\n  %\n  \\ifx\\lastskipmacro\\zeroskipmacro\n    % If \\lastskip was zero, perhaps the last item was a penalty, and\n    % perhaps it was >=10000, e.g., a \\nobreak.  In that case, we want\n    % to re-insert the same penalty (values >10000 are used for various\n    % signals); since we just inserted a non-discardable item, any\n    % following glue (such as a \\parskip) would be a breakpoint.  For example:\n    %   @deffn deffn-whatever\n    %   @vindex index-whatever\n    %   Description.\n    % would allow a break between the index-whatever whatsit\n    % and the \"Description.\" paragraph.\n    \\ifnum\\whatsitpenalty>9999 \\penalty\\whatsitpenalty \\fi\n  \\else\n    % On the other hand, if we had a nonzero \\lastskip,\n    % this make-up glue would be preceded by a non-discardable item\n    % (the whatsit from the \\write), so we must insert a \\nobreak.\n    \\nobreak\\vskip\\whatsitskip\n  \\fi\n\\fi}\n\n% The index entry written in the file actually looks like\n%  \\entry {sortstring}{page}{topic}\n% or\n%  \\entry {sortstring}{page}{topic}{subtopic}\n% The texindex program reads in these files and writes files\n% containing these kinds of lines:\n%  \\initial {c}\n%     before the first topic whose initial is c\n%  \\entry {topic}{pagelist}\n%     for a topic that is used without subtopics\n%  \\primary {topic}\n%     for the beginning of a topic that is used with subtopics\n%  \\secondary {subtopic}{pagelist}\n%     for each subtopic.\n\n% Define the user-accessible indexing commands\n% @findex, @vindex, @kindex, @cindex.\n\n\\def\\findex {\\fnindex}\n\\def\\kindex {\\kyindex}\n\\def\\cindex {\\cpindex}\n\\def\\vindex {\\vrindex}\n\\def\\tindex {\\tpindex}\n\\def\\pindex {\\pgindex}\n\n\\def\\cindexsub {\\begingroup\\obeylines\\cindexsub}\n{\\obeylines %\n\\gdef\\cindexsub \"#1\" #2^^M{\\endgroup %\n\\dosubind{cp}{#2}{#1}}}\n\n% Define the macros used in formatting output of the sorted index material.\n\n% @printindex causes a particular index (the ??s file) to get printed.\n% It does not print any chapter heading (usually an @unnumbered).\n%\n\\parseargdef\\printindex{\\begingroup\n  \\dobreak \\chapheadingskip{10000}%\n  %\n  \\smallfonts \\rm\n  \\tolerance = 9500\n  \\plainfrenchspacing\n  \\everypar = {}% don't want the \\kern\\-parindent from indentation suppression.\n  %\n  % See if the index file exists and is nonempty.\n  % Change catcode of @ here so that if the index file contains\n  % \\initial {@}\n  % as its first line, TeX doesn't complain about mismatched braces\n  % (because it thinks @} is a control sequence).\n  \\catcode`\\@ = 11\n  \\openin 1 \\jobname.#1s\n  \\ifeof 1\n    % \\enddoublecolumns gets confused if there is no text in the index,\n    % and it loses the chapter title and the aux file entries for the\n    % index.  The easiest way to prevent this problem is to make sure\n    % there is some text.\n    \\putwordIndexNonexistent\n  \\else\n    %\n    % If the index file exists but is empty, then \\openin leaves \\ifeof\n    % false.  We have to make TeX try to read something from the file, so\n    % it can discover if there is anything in it.\n    \\read 1 to \\temp\n    \\ifeof 1\n      \\putwordIndexIsEmpty\n    \\else\n      % Index files are almost Texinfo source, but we use \\ as the escape\n      % character.  It would be better to use @, but that's too big a change\n      % to make right now.\n      \\def\\indexbackslash{\\backslashcurfont}%\n      \\catcode`\\\\ = 0\n      \\escapechar = `\\\\\n      \\begindoublecolumns\n      \\input \\jobname.#1s\n      \\enddoublecolumns\n    \\fi\n  \\fi\n  \\closein 1\n\\endgroup}\n\n% These macros are used by the sorted index file itself.\n% Change them to control the appearance of the index.\n\n\\def\\initial#1{{%\n  % Some minor font changes for the special characters.\n  \\let\\tentt=\\sectt \\let\\tt=\\sectt \\let\\sf=\\sectt\n  %\n  % Remove any glue we may have, we'll be inserting our own.\n  \\removelastskip\n  %\n  % We like breaks before the index initials, so insert a bonus.\n  \\nobreak\n  \\vskip 0pt plus 3\\baselineskip\n  \\penalty 0\n  \\vskip 0pt plus -3\\baselineskip\n  %\n  % Typeset the initial.  Making this add up to a whole number of\n  % baselineskips increases the chance of the dots lining up from column\n  % to column.  It still won't often be perfect, because of the stretch\n  % we need before each entry, but it's better.\n  %\n  % No shrink because it confuses \\balancecolumns.\n  \\vskip 1.67\\baselineskip plus .5\\baselineskip\n  \\leftline{\\secbf #1}%\n  % Do our best not to break after the initial.\n  \\nobreak\n  \\vskip .33\\baselineskip plus .1\\baselineskip\n}}\n\n% \\entry typesets a paragraph consisting of the text (#1), dot leaders, and\n% then page number (#2) flushed to the right margin.  It is used for index\n% and table of contents entries.  The paragraph is indented by \\leftskip.\n%\n% A straightforward implementation would start like this:\n%\t\\def\\entry#1#2{...\n% But this freezes the catcodes in the argument, and can cause problems to\n% @code, which sets - active.  This problem was fixed by a kludge---\n% ``-'' was active throughout whole index, but this isn't really right.\n% The right solution is to prevent \\entry from swallowing the whole text.\n%                                 --kasal, 21nov03\n\\def\\entry{%\n  \\begingroup\n    %\n    % Start a new paragraph if necessary, so our assignments below can't\n    % affect previous text.\n    \\par\n    %\n    % Do not fill out the last line with white space.\n    \\parfillskip = 0in\n    %\n    % No extra space above this paragraph.\n    \\parskip = 0in\n    %\n    % Do not prefer a separate line ending with a hyphen to fewer lines.\n    \\finalhyphendemerits = 0\n    %\n    % \\hangindent is only relevant when the entry text and page number\n    % don't both fit on one line.  In that case, bob suggests starting the\n    % dots pretty far over on the line.  Unfortunately, a large\n    % indentation looks wrong when the entry text itself is broken across\n    % lines.  So we use a small indentation and put up with long leaders.\n    %\n    % \\hangafter is reset to 1 (which is the value we want) at the start\n    % of each paragraph, so we need not do anything with that.\n    \\hangindent = 2em\n    %\n    % When the entry text needs to be broken, just fill out the first line\n    % with blank space.\n    \\rightskip = 0pt plus1fil\n    %\n    % A bit of stretch before each entry for the benefit of balancing\n    % columns.\n    \\vskip 0pt plus1pt\n    %\n    % When reading the text of entry, convert explicit line breaks\n    % from @* into spaces.  The user might give these in long section\n    % titles, for instance.\n    \\def\\*{\\unskip\\space\\ignorespaces}%\n    \\def\\entrybreak{\\hfil\\break}%\n    %\n    % Swallow the left brace of the text (first parameter):\n    \\afterassignment\\doentry\n    \\let\\temp =\n}\n\\def\\entrybreak{\\unskip\\space\\ignorespaces}%\n\\def\\doentry{%\n    \\bgroup % Instead of the swallowed brace.\n      \\noindent\n      \\aftergroup\\finishentry\n      % And now comes the text of the entry.\n}\n\\def\\finishentry#1{%\n    % #1 is the page number.\n    %\n    % The following is kludged to not output a line of dots in the index if\n    % there are no page numbers.  The next person who breaks this will be\n    % cursed by a Unix daemon.\n    \\setbox\\boxA = \\hbox{#1}%\n    \\ifdim\\wd\\boxA = 0pt\n      \\ %\n    \\else\n      %\n      % If we must, put the page number on a line of its own, and fill out\n      % this line with blank space.  (The \\hfil is overwhelmed with the\n      % fill leaders glue in \\indexdotfill if the page number does fit.)\n      \\hfil\\penalty50\n      \\null\\nobreak\\indexdotfill % Have leaders before the page number.\n      %\n      % The `\\ ' here is removed by the implicit \\unskip that TeX does as\n      % part of (the primitive) \\par.  Without it, a spurious underfull\n      % \\hbox ensues.\n      \\ifpdf\n\t\\pdfgettoks#1.%\n\t\\ \\the\\toksA\n      \\else\n\t\\ #1%\n      \\fi\n    \\fi\n    \\par\n  \\endgroup\n}\n\n% Like plain.tex's \\dotfill, except uses up at least 1 em.\n\\def\\indexdotfill{\\cleaders\n  \\hbox{$\\mathsurround=0pt \\mkern1.5mu.\\mkern1.5mu$}\\hskip 1em plus 1fill}\n\n\\def\\primary #1{\\line{#1\\hfil}}\n\n\\newskip\\secondaryindent \\secondaryindent=0.5cm\n\\def\\secondary#1#2{{%\n  \\parfillskip=0in\n  \\parskip=0in\n  \\hangindent=1in\n  \\hangafter=1\n  \\noindent\\hskip\\secondaryindent\\hbox{#1}\\indexdotfill\n  \\ifpdf\n    \\pdfgettoks#2.\\ \\the\\toksA % The page number ends the paragraph.\n  \\else\n    #2\n  \\fi\n  \\par\n}}\n\n% Define two-column mode, which we use to typeset indexes.\n% Adapted from the TeXbook, page 416, which is to say,\n% the manmac.tex format used to print the TeXbook itself.\n\\catcode`\\@=11\n\n\\newbox\\partialpage\n\\newdimen\\doublecolumnhsize\n\n\\def\\begindoublecolumns{\\begingroup % ended by \\enddoublecolumns\n  % Grab any single-column material above us.\n  \\output = {%\n    %\n    % Here is a possibility not foreseen in manmac: if we accumulate a\n    % whole lot of material, we might end up calling this \\output\n    % routine twice in a row (see the doublecol-lose test, which is\n    % essentially a couple of indexes with @setchapternewpage off).  In\n    % that case we just ship out what is in \\partialpage with the normal\n    % output routine.  Generally, \\partialpage will be empty when this\n    % runs and this will be a no-op.  See the indexspread.tex test case.\n    \\ifvoid\\partialpage \\else\n      \\onepageout{\\pagecontents\\partialpage}%\n    \\fi\n    %\n    \\global\\setbox\\partialpage = \\vbox{%\n      % Unvbox the main output page.\n      \\unvbox\\PAGE\n      \\kern-\\topskip \\kern\\baselineskip\n    }%\n  }%\n  \\eject % run that output routine to set \\partialpage\n  %\n  % Use the double-column output routine for subsequent pages.\n  \\output = {\\doublecolumnout}%\n  %\n  % Change the page size parameters.  We could do this once outside this\n  % routine, in each of @smallbook, @afourpaper, and the default 8.5x11\n  % format, but then we repeat the same computation.  Repeating a couple\n  % of assignments once per index is clearly meaningless for the\n  % execution time, so we may as well do it in one place.\n  %\n  % First we halve the line length, less a little for the gutter between\n  % the columns.  We compute the gutter based on the line length, so it\n  % changes automatically with the paper format.  The magic constant\n  % below is chosen so that the gutter has the same value (well, +-<1pt)\n  % as it did when we hard-coded it.\n  %\n  % We put the result in a separate register, \\doublecolumhsize, so we\n  % can restore it in \\pagesofar, after \\hsize itself has (potentially)\n  % been clobbered.\n  %\n  \\doublecolumnhsize = \\hsize\n    \\advance\\doublecolumnhsize by -.04154\\hsize\n    \\divide\\doublecolumnhsize by 2\n  \\hsize = \\doublecolumnhsize\n  %\n  % Double the \\vsize as well.  (We don't need a separate register here,\n  % since nobody clobbers \\vsize.)\n  \\vsize = 2\\vsize\n}\n\n% The double-column output routine for all double-column pages except\n% the last.\n%\n\\def\\doublecolumnout{%\n  \\splittopskip=\\topskip \\splitmaxdepth=\\maxdepth\n  % Get the available space for the double columns -- the normal\n  % (undoubled) page height minus any material left over from the\n  % previous page.\n  \\dimen@ = \\vsize\n  \\divide\\dimen@ by 2\n  \\advance\\dimen@ by -\\ht\\partialpage\n  %\n  % box0 will be the left-hand column, box2 the right.\n  \\setbox0=\\vsplit255 to\\dimen@ \\setbox2=\\vsplit255 to\\dimen@\n  \\onepageout\\pagesofar\n  \\unvbox255\n  \\penalty\\outputpenalty\n}\n%\n% Re-output the contents of the output page -- any previous material,\n% followed by the two boxes we just split, in box0 and box2.\n\\def\\pagesofar{%\n  \\unvbox\\partialpage\n  %\n  \\hsize = \\doublecolumnhsize\n  \\wd0=\\hsize \\wd2=\\hsize\n  \\hbox to\\pagewidth{\\box0\\hfil\\box2}%\n}\n%\n% All done with double columns.\n\\def\\enddoublecolumns{%\n  % The following penalty ensures that the page builder is exercised\n  % _before_ we change the output routine.  This is necessary in the\n  % following situation:\n  %\n  % The last section of the index consists only of a single entry.\n  % Before this section, \\pagetotal is less than \\pagegoal, so no\n  % break occurs before the last section starts.  However, the last\n  % section, consisting of \\initial and the single \\entry, does not\n  % fit on the page and has to be broken off.  Without the following\n  % penalty the page builder will not be exercised until \\eject\n  % below, and by that time we'll already have changed the output\n  % routine to the \\balancecolumns version, so the next-to-last\n  % double-column page will be processed with \\balancecolumns, which\n  % is wrong:  The two columns will go to the main vertical list, with\n  % the broken-off section in the recent contributions.  As soon as\n  % the output routine finishes, TeX starts reconsidering the page\n  % break.  The two columns and the broken-off section both fit on the\n  % page, because the two columns now take up only half of the page\n  % goal.  When TeX sees \\eject from below which follows the final\n  % section, it invokes the new output routine that we've set after\n  % \\balancecolumns below; \\onepageout will try to fit the two columns\n  % and the final section into the vbox of \\pageheight (see\n  % \\pagebody), causing an overfull box.\n  %\n  % Note that glue won't work here, because glue does not exercise the\n  % page builder, unlike penalties (see The TeXbook, pp. 280-281).\n  \\penalty0\n  %\n  \\output = {%\n    % Split the last of the double-column material.  Leave it on the\n    % current page, no automatic page break.\n    \\balancecolumns\n    %\n    % If we end up splitting too much material for the current page,\n    % though, there will be another page break right after this \\output\n    % invocation ends.  Having called \\balancecolumns once, we do not\n    % want to call it again.  Therefore, reset \\output to its normal\n    % definition right away.  (We hope \\balancecolumns will never be\n    % called on to balance too much material, but if it is, this makes\n    % the output somewhat more palatable.)\n    \\global\\output = {\\onepageout{\\pagecontents\\PAGE}}%\n  }%\n  \\eject\n  \\endgroup % started in \\begindoublecolumns\n  %\n  % \\pagegoal was set to the doubled \\vsize above, since we restarted\n  % the current page.  We're now back to normal single-column\n  % typesetting, so reset \\pagegoal to the normal \\vsize (after the\n  % \\endgroup where \\vsize got restored).\n  \\pagegoal = \\vsize\n}\n%\n% Called at the end of the double column material.\n\\def\\balancecolumns{%\n  \\setbox0 = \\vbox{\\unvbox255}% like \\box255 but more efficient, see p.120.\n  \\dimen@ = \\ht0\n  \\advance\\dimen@ by \\topskip\n  \\advance\\dimen@ by-\\baselineskip\n  \\divide\\dimen@ by 2 % target to split to\n  %debug\\message{final 2-column material height=\\the\\ht0, target=\\the\\dimen@.}%\n  \\splittopskip = \\topskip\n  % Loop until we get a decent breakpoint.\n  {%\n    \\vbadness = 10000\n    \\loop\n      \\global\\setbox3 = \\copy0\n      \\global\\setbox1 = \\vsplit3 to \\dimen@\n    \\ifdim\\ht3>\\dimen@\n      \\global\\advance\\dimen@ by 1pt\n    \\repeat\n  }%\n  %debug\\message{split to \\the\\dimen@, column heights: \\the\\ht1, \\the\\ht3.}%\n  \\setbox0=\\vbox to\\dimen@{\\unvbox1}%\n  \\setbox2=\\vbox to\\dimen@{\\unvbox3}%\n  %\n  \\pagesofar\n}\n\\catcode`\\@ = \\other\n\n\n\\message{sectioning,}\n% Chapters, sections, etc.\n\n% Let's start with @part.\n\\outer\\parseargdef\\part{\\partzzz{#1}}\n\\def\\partzzz#1{%\n  \\chapoddpage\n  \\null\n  \\vskip.3\\vsize  % move it down on the page a bit\n  \\begingroup\n    \\noindent \\titlefonts\\rmisbold #1\\par % the text\n    \\let\\lastnode=\\empty      % no node to associate with\n    \\writetocentry{part}{#1}{}% but put it in the toc\n    \\headingsoff              % no headline or footline on the part page\n    \\chapoddpage\n  \\endgroup\n}\n\n% \\unnumberedno is an oxymoron.  But we count the unnumbered\n% sections so that we can refer to them unambiguously in the pdf\n% outlines by their \"section number\".  We avoid collisions with chapter\n% numbers by starting them at 10000.  (If a document ever has 10000\n% chapters, we're in trouble anyway, I'm sure.)\n\\newcount\\unnumberedno \\unnumberedno = 10000\n\\newcount\\chapno\n\\newcount\\secno        \\secno=0\n\\newcount\\subsecno     \\subsecno=0\n\\newcount\\subsubsecno  \\subsubsecno=0\n\n% This counter is funny since it counts through charcodes of letters A, B, ...\n\\newcount\\appendixno  \\appendixno = `\\@\n%\n% \\def\\appendixletter{\\char\\the\\appendixno}\n% We do the following ugly conditional instead of the above simple\n% construct for the sake of pdftex, which needs the actual\n% letter in the expansion, not just typeset.\n%\n\\def\\appendixletter{%\n  \\ifnum\\appendixno=`A A%\n  \\else\\ifnum\\appendixno=`B B%\n  \\else\\ifnum\\appendixno=`C C%\n  \\else\\ifnum\\appendixno=`D D%\n  \\else\\ifnum\\appendixno=`E E%\n  \\else\\ifnum\\appendixno=`F F%\n  \\else\\ifnum\\appendixno=`G G%\n  \\else\\ifnum\\appendixno=`H H%\n  \\else\\ifnum\\appendixno=`I I%\n  \\else\\ifnum\\appendixno=`J J%\n  \\else\\ifnum\\appendixno=`K K%\n  \\else\\ifnum\\appendixno=`L L%\n  \\else\\ifnum\\appendixno=`M M%\n  \\else\\ifnum\\appendixno=`N N%\n  \\else\\ifnum\\appendixno=`O O%\n  \\else\\ifnum\\appendixno=`P P%\n  \\else\\ifnum\\appendixno=`Q Q%\n  \\else\\ifnum\\appendixno=`R R%\n  \\else\\ifnum\\appendixno=`S S%\n  \\else\\ifnum\\appendixno=`T T%\n  \\else\\ifnum\\appendixno=`U U%\n  \\else\\ifnum\\appendixno=`V V%\n  \\else\\ifnum\\appendixno=`W W%\n  \\else\\ifnum\\appendixno=`X X%\n  \\else\\ifnum\\appendixno=`Y Y%\n  \\else\\ifnum\\appendixno=`Z Z%\n  % The \\the is necessary, despite appearances, because \\appendixletter is\n  % expanded while writing the .toc file.  \\char\\appendixno is not\n  % expandable, thus it is written literally, thus all appendixes come out\n  % with the same letter (or @) in the toc without it.\n  \\else\\char\\the\\appendixno\n  \\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\n  \\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi\\fi}\n\n% Each @chapter defines these (using marks) as the number+name, number\n% and name of the chapter.  Page headings and footings can use\n% these.  @section does likewise.\n\\def\\thischapter{}\n\\def\\thischapternum{}\n\\def\\thischaptername{}\n\\def\\thissection{}\n\\def\\thissectionnum{}\n\\def\\thissectionname{}\n\n\\newcount\\absseclevel % used to calculate proper heading level\n\\newcount\\secbase\\secbase=0 % @raisesections/@lowersections modify this count\n\n% @raisesections: treat @section as chapter, @subsection as section, etc.\n\\def\\raisesections{\\global\\advance\\secbase by -1}\n\\let\\up=\\raisesections % original BFox name\n\n% @lowersections: treat @chapter as section, @section as subsection, etc.\n\\def\\lowersections{\\global\\advance\\secbase by 1}\n\\let\\down=\\lowersections % original BFox name\n\n% we only have subsub.\n\\chardef\\maxseclevel = 3\n%\n% A numbered section within an unnumbered changes to unnumbered too.\n% To achieve this, remember the \"biggest\" unnum. sec. we are currently in:\n\\chardef\\unnlevel = \\maxseclevel\n%\n% Trace whether the current chapter is an appendix or not:\n% \\chapheadtype is \"N\" or \"A\", unnumbered chapters are ignored.\n\\def\\chapheadtype{N}\n\n% Choose a heading macro\n% #1 is heading type\n% #2 is heading level\n% #3 is text for heading\n\\def\\genhead#1#2#3{%\n  % Compute the abs. sec. level:\n  \\absseclevel=#2\n  \\advance\\absseclevel by \\secbase\n  % Make sure \\absseclevel doesn't fall outside the range:\n  \\ifnum \\absseclevel < 0\n    \\absseclevel = 0\n  \\else\n    \\ifnum \\absseclevel > 3\n      \\absseclevel = 3\n    \\fi\n  \\fi\n  % The heading type:\n  \\def\\headtype{#1}%\n  \\if \\headtype U%\n    \\ifnum \\absseclevel < \\unnlevel\n      \\chardef\\unnlevel = \\absseclevel\n    \\fi\n  \\else\n    % Check for appendix sections:\n    \\ifnum \\absseclevel = 0\n      \\edef\\chapheadtype{\\headtype}%\n    \\else\n      \\if \\headtype A\\if \\chapheadtype N%\n\t\\errmessage{@appendix... within a non-appendix chapter}%\n      \\fi\\fi\n    \\fi\n    % Check for numbered within unnumbered:\n    \\ifnum \\absseclevel > \\unnlevel\n      \\def\\headtype{U}%\n    \\else\n      \\chardef\\unnlevel = 3\n    \\fi\n  \\fi\n  % Now print the heading:\n  \\if \\headtype U%\n    \\ifcase\\absseclevel\n\t\\unnumberedzzz{#3}%\n    \\or \\unnumberedseczzz{#3}%\n    \\or \\unnumberedsubseczzz{#3}%\n    \\or \\unnumberedsubsubseczzz{#3}%\n    \\fi\n  \\else\n    \\if \\headtype A%\n      \\ifcase\\absseclevel\n\t  \\appendixzzz{#3}%\n      \\or \\appendixsectionzzz{#3}%\n      \\or \\appendixsubseczzz{#3}%\n      \\or \\appendixsubsubseczzz{#3}%\n      \\fi\n    \\else\n      \\ifcase\\absseclevel\n\t  \\chapterzzz{#3}%\n      \\or \\seczzz{#3}%\n      \\or \\numberedsubseczzz{#3}%\n      \\or \\numberedsubsubseczzz{#3}%\n      \\fi\n    \\fi\n  \\fi\n  \\suppressfirstparagraphindent\n}\n\n% an interface:\n\\def\\numhead{\\genhead N}\n\\def\\apphead{\\genhead A}\n\\def\\unnmhead{\\genhead U}\n\n% @chapter, @appendix, @unnumbered.  Increment top-level counter, reset\n% all lower-level sectioning counters to zero.\n%\n% Also set \\chaplevelprefix, which we prepend to @float sequence numbers\n% (e.g., figures), q.v.  By default (before any chapter), that is empty.\n\\let\\chaplevelprefix = \\empty\n%\n\\outer\\parseargdef\\chapter{\\numhead0{#1}} % normally numhead0 calls chapterzzz\n\\def\\chapterzzz#1{%\n  % section resetting is \\global in case the chapter is in a group, such\n  % as an @include file.\n  \\global\\secno=0 \\global\\subsecno=0 \\global\\subsubsecno=0\n    \\global\\advance\\chapno by 1\n  %\n  % Used for \\float.\n  \\gdef\\chaplevelprefix{\\the\\chapno.}%\n  \\resetallfloatnos\n  %\n  % \\putwordChapter can contain complex things in translations.\n  \\toks0=\\expandafter{\\putwordChapter}%\n  \\message{\\the\\toks0 \\space \\the\\chapno}%\n  %\n  % Write the actual heading.\n  \\chapmacro{#1}{Ynumbered}{\\the\\chapno}%\n  %\n  % So @section and the like are numbered underneath this chapter.\n  \\global\\let\\section = \\numberedsec\n  \\global\\let\\subsection = \\numberedsubsec\n  \\global\\let\\subsubsection = \\numberedsubsubsec\n}\n\n\\outer\\parseargdef\\appendix{\\apphead0{#1}} % normally calls appendixzzz\n%\n\\def\\appendixzzz#1{%\n  \\global\\secno=0 \\global\\subsecno=0 \\global\\subsubsecno=0\n    \\global\\advance\\appendixno by 1\n  \\gdef\\chaplevelprefix{\\appendixletter.}%\n  \\resetallfloatnos\n  %\n  % \\putwordAppendix can contain complex things in translations.\n  \\toks0=\\expandafter{\\putwordAppendix}%\n  \\message{\\the\\toks0 \\space \\appendixletter}%\n  %\n  \\chapmacro{#1}{Yappendix}{\\appendixletter}%\n  %\n  \\global\\let\\section = \\appendixsec\n  \\global\\let\\subsection = \\appendixsubsec\n  \\global\\let\\subsubsection = \\appendixsubsubsec\n}\n\n% normally unnmhead0 calls unnumberedzzz:\n\\outer\\parseargdef\\unnumbered{\\unnmhead0{#1}}\n\\def\\unnumberedzzz#1{%\n  \\global\\secno=0 \\global\\subsecno=0 \\global\\subsubsecno=0\n    \\global\\advance\\unnumberedno by 1\n  %\n  % Since an unnumbered has no number, no prefix for figures.\n  \\global\\let\\chaplevelprefix = \\empty\n  \\resetallfloatnos\n  %\n  % This used to be simply \\message{#1}, but TeX fully expands the\n  % argument to \\message.  Therefore, if #1 contained @-commands, TeX\n  % expanded them.  For example, in `@unnumbered The @cite{Book}', TeX\n  % expanded @cite (which turns out to cause errors because \\cite is meant\n  % to be executed, not expanded).\n  %\n  % Anyway, we don't want the fully-expanded definition of @cite to appear\n  % as a result of the \\message, we just want `@cite' itself.  We use\n  % \\the<toks register> to achieve this: TeX expands \\the<toks> only once,\n  % simply yielding the contents of <toks register>.  (We also do this for\n  % the toc entries.)\n  \\toks0 = {#1}%\n  \\message{(\\the\\toks0)}%\n  %\n  \\chapmacro{#1}{Ynothing}{\\the\\unnumberedno}%\n  %\n  \\global\\let\\section = \\unnumberedsec\n  \\global\\let\\subsection = \\unnumberedsubsec\n  \\global\\let\\subsubsection = \\unnumberedsubsubsec\n}\n\n% @centerchap is like @unnumbered, but the heading is centered.\n\\outer\\parseargdef\\centerchap{%\n  % Well, we could do the following in a group, but that would break\n  % an assumption that \\chapmacro is called at the outermost level.\n  % Thus we are safer this way:\t\t--kasal, 24feb04\n  \\let\\centerparametersmaybe = \\centerparameters\n  \\unnmhead0{#1}%\n  \\let\\centerparametersmaybe = \\relax\n}\n\n% @top is like @unnumbered.\n\\let\\top\\unnumbered\n\n% Sections.\n% \n\\outer\\parseargdef\\numberedsec{\\numhead1{#1}} % normally calls seczzz\n\\def\\seczzz#1{%\n  \\global\\subsecno=0 \\global\\subsubsecno=0  \\global\\advance\\secno by 1\n  \\sectionheading{#1}{sec}{Ynumbered}{\\the\\chapno.\\the\\secno}%\n}\n\n% normally calls appendixsectionzzz:\n\\outer\\parseargdef\\appendixsection{\\apphead1{#1}}\n\\def\\appendixsectionzzz#1{%\n  \\global\\subsecno=0 \\global\\subsubsecno=0  \\global\\advance\\secno by 1\n  \\sectionheading{#1}{sec}{Yappendix}{\\appendixletter.\\the\\secno}%\n}\n\\let\\appendixsec\\appendixsection\n\n% normally calls unnumberedseczzz:\n\\outer\\parseargdef\\unnumberedsec{\\unnmhead1{#1}}\n\\def\\unnumberedseczzz#1{%\n  \\global\\subsecno=0 \\global\\subsubsecno=0  \\global\\advance\\secno by 1\n  \\sectionheading{#1}{sec}{Ynothing}{\\the\\unnumberedno.\\the\\secno}%\n}\n\n% Subsections.\n% \n% normally calls numberedsubseczzz:\n\\outer\\parseargdef\\numberedsubsec{\\numhead2{#1}}\n\\def\\numberedsubseczzz#1{%\n  \\global\\subsubsecno=0  \\global\\advance\\subsecno by 1\n  \\sectionheading{#1}{subsec}{Ynumbered}{\\the\\chapno.\\the\\secno.\\the\\subsecno}%\n}\n\n% normally calls appendixsubseczzz:\n\\outer\\parseargdef\\appendixsubsec{\\apphead2{#1}}\n\\def\\appendixsubseczzz#1{%\n  \\global\\subsubsecno=0  \\global\\advance\\subsecno by 1\n  \\sectionheading{#1}{subsec}{Yappendix}%\n                 {\\appendixletter.\\the\\secno.\\the\\subsecno}%\n}\n\n% normally calls unnumberedsubseczzz:\n\\outer\\parseargdef\\unnumberedsubsec{\\unnmhead2{#1}}\n\\def\\unnumberedsubseczzz#1{%\n  \\global\\subsubsecno=0  \\global\\advance\\subsecno by 1\n  \\sectionheading{#1}{subsec}{Ynothing}%\n                 {\\the\\unnumberedno.\\the\\secno.\\the\\subsecno}%\n}\n\n% Subsubsections.\n% \n% normally numberedsubsubseczzz:\n\\outer\\parseargdef\\numberedsubsubsec{\\numhead3{#1}}\n\\def\\numberedsubsubseczzz#1{%\n  \\global\\advance\\subsubsecno by 1\n  \\sectionheading{#1}{subsubsec}{Ynumbered}%\n                 {\\the\\chapno.\\the\\secno.\\the\\subsecno.\\the\\subsubsecno}%\n}\n\n% normally appendixsubsubseczzz:\n\\outer\\parseargdef\\appendixsubsubsec{\\apphead3{#1}}\n\\def\\appendixsubsubseczzz#1{%\n  \\global\\advance\\subsubsecno by 1\n  \\sectionheading{#1}{subsubsec}{Yappendix}%\n                 {\\appendixletter.\\the\\secno.\\the\\subsecno.\\the\\subsubsecno}%\n}\n\n% normally unnumberedsubsubseczzz:\n\\outer\\parseargdef\\unnumberedsubsubsec{\\unnmhead3{#1}}\n\\def\\unnumberedsubsubseczzz#1{%\n  \\global\\advance\\subsubsecno by 1\n  \\sectionheading{#1}{subsubsec}{Ynothing}%\n                 {\\the\\unnumberedno.\\the\\secno.\\the\\subsecno.\\the\\subsubsecno}%\n}\n\n% These macros control what the section commands do, according\n% to what kind of chapter we are in (ordinary, appendix, or unnumbered).\n% Define them by default for a numbered chapter.\n\\let\\section = \\numberedsec\n\\let\\subsection = \\numberedsubsec\n\\let\\subsubsection = \\numberedsubsubsec\n\n% Define @majorheading, @heading and @subheading\n\n\\def\\majorheading{%\n  {\\advance\\chapheadingskip by 10pt \\chapbreak }%\n  \\parsearg\\chapheadingzzz\n}\n\n\\def\\chapheading{\\chapbreak \\parsearg\\chapheadingzzz}\n\\def\\chapheadingzzz#1{%\n  \\vbox{\\chapfonts \\raggedtitlesettings #1\\par}%\n  \\nobreak\\bigskip \\nobreak\n  \\suppressfirstparagraphindent\n}\n\n% @heading, @subheading, @subsubheading.\n\\parseargdef\\heading{\\sectionheading{#1}{sec}{Yomitfromtoc}{}\n  \\suppressfirstparagraphindent}\n\\parseargdef\\subheading{\\sectionheading{#1}{subsec}{Yomitfromtoc}{}\n  \\suppressfirstparagraphindent}\n\\parseargdef\\subsubheading{\\sectionheading{#1}{subsubsec}{Yomitfromtoc}{}\n  \\suppressfirstparagraphindent}\n\n% These macros generate a chapter, section, etc. heading only\n% (including whitespace, linebreaking, etc. around it),\n% given all the information in convenient, parsed form.\n\n% Args are the skip and penalty (usually negative)\n\\def\\dobreak#1#2{\\par\\ifdim\\lastskip<#1\\removelastskip\\penalty#2\\vskip#1\\fi}\n\n% Parameter controlling skip before chapter headings (if needed)\n\\newskip\\chapheadingskip\n\n% Define plain chapter starts, and page on/off switching for it.\n\\def\\chapbreak{\\dobreak \\chapheadingskip {-4000}}\n\\def\\chappager{\\par\\vfill\\supereject}\n% Because \\domark is called before \\chapoddpage, the filler page will\n% get the headings for the next chapter, which is wrong.  But we don't\n% care -- we just disable all headings on the filler page.\n\\def\\chapoddpage{%\n  \\chappager\n  \\ifodd\\pageno \\else\n    \\begingroup\n      \\headingsoff\n      \\null\n      \\chappager\n    \\endgroup\n  \\fi\n}\n\n\\def\\setchapternewpage #1 {\\csname CHAPPAG#1\\endcsname}\n\n\\def\\CHAPPAGoff{%\n\\global\\let\\contentsalignmacro = \\chappager\n\\global\\let\\pchapsepmacro=\\chapbreak\n\\global\\let\\pagealignmacro=\\chappager}\n\n\\def\\CHAPPAGon{%\n\\global\\let\\contentsalignmacro = \\chappager\n\\global\\let\\pchapsepmacro=\\chappager\n\\global\\let\\pagealignmacro=\\chappager\n\\global\\def\\HEADINGSon{\\HEADINGSsingle}}\n\n\\def\\CHAPPAGodd{%\n\\global\\let\\contentsalignmacro = \\chapoddpage\n\\global\\let\\pchapsepmacro=\\chapoddpage\n\\global\\let\\pagealignmacro=\\chapoddpage\n\\global\\def\\HEADINGSon{\\HEADINGSdouble}}\n\n\\CHAPPAGon\n\n% Chapter opening.\n%\n% #1 is the text, #2 is the section type (Ynumbered, Ynothing,\n% Yappendix, Yomitfromtoc), #3 the chapter number.\n%\n% To test against our argument.\n\\def\\Ynothingkeyword{Ynothing}\n\\def\\Yomitfromtockeyword{Yomitfromtoc}\n\\def\\Yappendixkeyword{Yappendix}\n%\n\\def\\chapmacro#1#2#3{%\n  % Insert the first mark before the heading break (see notes for \\domark).\n  \\let\\prevchapterdefs=\\lastchapterdefs\n  \\let\\prevsectiondefs=\\lastsectiondefs\n  \\gdef\\lastsectiondefs{\\gdef\\thissectionname{}\\gdef\\thissectionnum{}%\n                        \\gdef\\thissection{}}%\n  %\n  \\def\\temptype{#2}%\n  \\ifx\\temptype\\Ynothingkeyword\n    \\gdef\\lastchapterdefs{\\gdef\\thischaptername{#1}\\gdef\\thischapternum{}%\n                          \\gdef\\thischapter{\\thischaptername}}%\n  \\else\\ifx\\temptype\\Yomitfromtockeyword\n    \\gdef\\lastchapterdefs{\\gdef\\thischaptername{#1}\\gdef\\thischapternum{}%\n                          \\gdef\\thischapter{}}%\n  \\else\\ifx\\temptype\\Yappendixkeyword\n    \\toks0={#1}%\n    \\xdef\\lastchapterdefs{%\n      \\gdef\\noexpand\\thischaptername{\\the\\toks0}%\n      \\gdef\\noexpand\\thischapternum{\\appendixletter}%\n      % \\noexpand\\putwordAppendix avoids expanding indigestible\n      % commands in some of the translations.\n      \\gdef\\noexpand\\thischapter{\\noexpand\\putwordAppendix{}\n                                 \\noexpand\\thischapternum:\n                                 \\noexpand\\thischaptername}%\n    }%\n  \\else\n    \\toks0={#1}%\n    \\xdef\\lastchapterdefs{%\n      \\gdef\\noexpand\\thischaptername{\\the\\toks0}%\n      \\gdef\\noexpand\\thischapternum{\\the\\chapno}%\n      % \\noexpand\\putwordChapter avoids expanding indigestible\n      % commands in some of the translations.\n      \\gdef\\noexpand\\thischapter{\\noexpand\\putwordChapter{}\n                                 \\noexpand\\thischapternum:\n                                 \\noexpand\\thischaptername}%\n    }%\n  \\fi\\fi\\fi\n  %\n  % Output the mark.  Pass it through \\safewhatsit, to take care of\n  % the preceding space.\n  \\safewhatsit\\domark\n  %\n  % Insert the chapter heading break.\n  \\pchapsepmacro\n  %\n  % Now the second mark, after the heading break.  No break points\n  % between here and the heading.\n  \\let\\prevchapterdefs=\\lastchapterdefs\n  \\let\\prevsectiondefs=\\lastsectiondefs\n  \\domark\n  %\n  {%\n    \\chapfonts \\rmisbold\n    %\n    % Have to define \\lastsection before calling \\donoderef, because the\n    % xref code eventually uses it.  On the other hand, it has to be called\n    % after \\pchapsepmacro, or the headline will change too soon.\n    \\gdef\\lastsection{#1}%\n    %\n    % Only insert the separating space if we have a chapter/appendix\n    % number, and don't print the unnumbered ``number''.\n    \\ifx\\temptype\\Ynothingkeyword\n      \\setbox0 = \\hbox{}%\n      \\def\\toctype{unnchap}%\n    \\else\\ifx\\temptype\\Yomitfromtockeyword\n      \\setbox0 = \\hbox{}% contents like unnumbered, but no toc entry\n      \\def\\toctype{omit}%\n    \\else\\ifx\\temptype\\Yappendixkeyword\n      \\setbox0 = \\hbox{\\putwordAppendix{} #3\\enspace}%\n      \\def\\toctype{app}%\n    \\else\n      \\setbox0 = \\hbox{#3\\enspace}%\n      \\def\\toctype{numchap}%\n    \\fi\\fi\\fi\n    %\n    % Write the toc entry for this chapter.  Must come before the\n    % \\donoderef, because we include the current node name in the toc\n    % entry, and \\donoderef resets it to empty.\n    \\writetocentry{\\toctype}{#1}{#3}%\n    %\n    % For pdftex, we have to write out the node definition (aka, make\n    % the pdfdest) after any page break, but before the actual text has\n    % been typeset.  If the destination for the pdf outline is after the\n    % text, then jumping from the outline may wind up with the text not\n    % being visible, for instance under high magnification.\n    \\donoderef{#2}%\n    %\n    % Typeset the actual heading.\n    \\nobreak % Avoid page breaks at the interline glue.\n    \\vbox{\\raggedtitlesettings \\hangindent=\\wd0 \\centerparametersmaybe\n          \\unhbox0 #1\\par}%\n  }%\n  \\nobreak\\bigskip % no page break after a chapter title\n  \\nobreak\n}\n\n% @centerchap -- centered and unnumbered.\n\\let\\centerparametersmaybe = \\relax\n\\def\\centerparameters{%\n  \\advance\\rightskip by 3\\rightskip\n  \\leftskip = \\rightskip\n  \\parfillskip = 0pt\n}\n\n\n% I don't think this chapter style is supported any more, so I'm not\n% updating it with the new noderef stuff.  We'll see.  --karl, 11aug03.\n%\n\\def\\setchapterstyle #1 {\\csname CHAPF#1\\endcsname}\n%\n\\def\\unnchfopen #1{%\n  \\chapoddpage\n  \\vbox{\\chapfonts \\raggedtitlesettings #1\\par}%\n  \\nobreak\\bigskip\\nobreak\n}\n\\def\\chfopen #1#2{\\chapoddpage {\\chapfonts\n\\vbox to 3in{\\vfil \\hbox to\\hsize{\\hfil #2} \\hbox to\\hsize{\\hfil #1} \\vfil}}%\n\\par\\penalty 5000 %\n}\n\\def\\centerchfopen #1{%\n  \\chapoddpage\n  \\vbox{\\chapfonts \\raggedtitlesettings \\hfill #1\\hfill}%\n  \\nobreak\\bigskip \\nobreak\n}\n\\def\\CHAPFopen{%\n  \\global\\let\\chapmacro=\\chfopen\n  \\global\\let\\centerchapmacro=\\centerchfopen}\n\n\n% Section titles.  These macros combine the section number parts and\n% call the generic \\sectionheading to do the printing.\n%\n\\newskip\\secheadingskip\n\\def\\secheadingbreak{\\dobreak \\secheadingskip{-1000}}\n\n% Subsection titles.\n\\newskip\\subsecheadingskip\n\\def\\subsecheadingbreak{\\dobreak \\subsecheadingskip{-500}}\n\n% Subsubsection titles.\n\\def\\subsubsecheadingskip{\\subsecheadingskip}\n\\def\\subsubsecheadingbreak{\\subsecheadingbreak}\n\n\n% Print any size, any type, section title.\n%\n% #1 is the text, #2 is the section level (sec/subsec/subsubsec), #3 is\n% the section type for xrefs (Ynumbered, Ynothing, Yappendix), #4 is the\n% section number.\n%\n\\def\\seckeyword{sec}\n%\n\\def\\sectionheading#1#2#3#4{%\n  {%\n    \\checkenv{}% should not be in an environment.\n    %\n    % Switch to the right set of fonts.\n    \\csname #2fonts\\endcsname \\rmisbold\n    %\n    \\def\\sectionlevel{#2}%\n    \\def\\temptype{#3}%\n    %\n    % Insert first mark before the heading break (see notes for \\domark).\n    \\let\\prevsectiondefs=\\lastsectiondefs\n    \\ifx\\temptype\\Ynothingkeyword\n      \\ifx\\sectionlevel\\seckeyword\n        \\gdef\\lastsectiondefs{\\gdef\\thissectionname{#1}\\gdef\\thissectionnum{}%\n                              \\gdef\\thissection{\\thissectionname}}%\n      \\fi\n    \\else\\ifx\\temptype\\Yomitfromtockeyword\n      % Don't redefine \\thissection.\n    \\else\\ifx\\temptype\\Yappendixkeyword\n      \\ifx\\sectionlevel\\seckeyword\n        \\toks0={#1}%\n        \\xdef\\lastsectiondefs{%\n          \\gdef\\noexpand\\thissectionname{\\the\\toks0}%\n          \\gdef\\noexpand\\thissectionnum{#4}%\n          % \\noexpand\\putwordSection avoids expanding indigestible\n          % commands in some of the translations.\n          \\gdef\\noexpand\\thissection{\\noexpand\\putwordSection{}\n                                     \\noexpand\\thissectionnum:\n                                     \\noexpand\\thissectionname}%\n        }%\n      \\fi\n    \\else\n      \\ifx\\sectionlevel\\seckeyword\n        \\toks0={#1}%\n        \\xdef\\lastsectiondefs{%\n          \\gdef\\noexpand\\thissectionname{\\the\\toks0}%\n          \\gdef\\noexpand\\thissectionnum{#4}%\n          % \\noexpand\\putwordSection avoids expanding indigestible\n          % commands in some of the translations.\n          \\gdef\\noexpand\\thissection{\\noexpand\\putwordSection{}\n                                     \\noexpand\\thissectionnum:\n                                     \\noexpand\\thissectionname}%\n        }%\n      \\fi\n    \\fi\\fi\\fi\n    %\n    % Go into vertical mode.  Usually we'll already be there, but we\n    % don't want the following whatsit to end up in a preceding paragraph\n    % if the document didn't happen to have a blank line.\n    \\par\n    %\n    % Output the mark.  Pass it through \\safewhatsit, to take care of\n    % the preceding space.\n    \\safewhatsit\\domark\n    %\n    % Insert space above the heading.\n    \\csname #2headingbreak\\endcsname\n    %\n    % Now the second mark, after the heading break.  No break points\n    % between here and the heading.\n    \\let\\prevsectiondefs=\\lastsectiondefs\n    \\domark\n    %\n    % Only insert the space after the number if we have a section number.\n    \\ifx\\temptype\\Ynothingkeyword\n      \\setbox0 = \\hbox{}%\n      \\def\\toctype{unn}%\n      \\gdef\\lastsection{#1}%\n    \\else\\ifx\\temptype\\Yomitfromtockeyword\n      % for @headings -- no section number, don't include in toc,\n      % and don't redefine \\lastsection.\n      \\setbox0 = \\hbox{}%\n      \\def\\toctype{omit}%\n      \\let\\sectionlevel=\\empty\n    \\else\\ifx\\temptype\\Yappendixkeyword\n      \\setbox0 = \\hbox{#4\\enspace}%\n      \\def\\toctype{app}%\n      \\gdef\\lastsection{#1}%\n    \\else\n      \\setbox0 = \\hbox{#4\\enspace}%\n      \\def\\toctype{num}%\n      \\gdef\\lastsection{#1}%\n    \\fi\\fi\\fi\n    %\n    % Write the toc entry (before \\donoderef).  See comments in \\chapmacro.\n    \\writetocentry{\\toctype\\sectionlevel}{#1}{#4}%\n    %\n    % Write the node reference (= pdf destination for pdftex).\n    % Again, see comments in \\chapmacro.\n    \\donoderef{#3}%\n    %\n    % Interline glue will be inserted when the vbox is completed.\n    % That glue will be a valid breakpoint for the page, since it'll be\n    % preceded by a whatsit (usually from the \\donoderef, or from the\n    % \\writetocentry if there was no node).  We don't want to allow that\n    % break, since then the whatsits could end up on page n while the\n    % section is on page n+1, thus toc/etc. are wrong.  Debian bug 276000.\n    \\nobreak\n    %\n    % Output the actual section heading.\n    \\vbox{\\hyphenpenalty=10000 \\tolerance=5000 \\parindent=0pt \\ptexraggedright\n          \\hangindent=\\wd0  % zero if no section number\n          \\unhbox0 #1}%\n  }%\n  % Add extra space after the heading -- half of whatever came above it.\n  % Don't allow stretch, though.\n  \\kern .5 \\csname #2headingskip\\endcsname\n  %\n  % Do not let the kern be a potential breakpoint, as it would be if it\n  % was followed by glue.\n  \\nobreak\n  %\n  % We'll almost certainly start a paragraph next, so don't let that\n  % glue accumulate.  (Not a breakpoint because it's preceded by a\n  % discardable item.)  However, when a paragraph is not started next\n  % (\\startdefun, \\cartouche, \\center, etc.), this needs to be wiped out\n  % or the negative glue will cause weirdly wrong output, typically\n  % obscuring the section heading with something else.\n  \\vskip-\\parskip\n  %\n  % This is so the last item on the main vertical list is a known\n  % \\penalty > 10000, so \\startdefun, etc., can recognize the situation\n  % and do the needful.\n  \\penalty 10001\n}\n\n\n\\message{toc,}\n% Table of contents.\n\\newwrite\\tocfile\n\n% Write an entry to the toc file, opening it if necessary.\n% Called from @chapter, etc.\n%\n% Example usage: \\writetocentry{sec}{Section Name}{\\the\\chapno.\\the\\secno}\n% We append the current node name (if any) and page number as additional\n% arguments for the \\{chap,sec,...}entry macros which will eventually\n% read this.  The node name is used in the pdf outlines as the\n% destination to jump to.\n%\n% We open the .toc file for writing here instead of at @setfilename (or\n% any other fixed time) so that @contents can be anywhere in the document.\n% But if #1 is `omit', then we don't do anything.  This is used for the\n% table of contents chapter openings themselves.\n%\n\\newif\\iftocfileopened\n\\def\\omitkeyword{omit}%\n%\n\\def\\writetocentry#1#2#3{%\n  \\edef\\writetoctype{#1}%\n  \\ifx\\writetoctype\\omitkeyword \\else\n    \\iftocfileopened\\else\n      \\immediate\\openout\\tocfile = \\jobname.toc\n      \\global\\tocfileopenedtrue\n    \\fi\n    %\n    \\iflinks\n      {\\atdummies\n       \\edef\\temp{%\n         \\write\\tocfile{@#1entry{#2}{#3}{\\lastnode}{\\noexpand\\folio}}}%\n       \\temp\n      }%\n    \\fi\n  \\fi\n  %\n  % Tell \\shipout to create a pdf destination on each page, if we're\n  % writing pdf.  These are used in the table of contents.  We can't\n  % just write one on every page because the title pages are numbered\n  % 1 and 2 (the page numbers aren't printed), and so are the first\n  % two pages of the document.  Thus, we'd have two destinations named\n  % `1', and two named `2'.\n  \\ifpdf \\global\\pdfmakepagedesttrue \\fi\n}\n\n\n% These characters do not print properly in the Computer Modern roman\n% fonts, so we must take special care.  This is more or less redundant\n% with the Texinfo input format setup at the end of this file.\n%\n\\def\\activecatcodes{%\n  \\catcode`\\\"=\\active\n  \\catcode`\\$=\\active\n  \\catcode`\\<=\\active\n  \\catcode`\\>=\\active\n  \\catcode`\\\\=\\active\n  \\catcode`\\^=\\active\n  \\catcode`\\_=\\active\n  \\catcode`\\|=\\active\n  \\catcode`\\~=\\active\n}\n\n\n% Read the toc file, which is essentially Texinfo input.\n\\def\\readtocfile{%\n  \\setupdatafile\n  \\activecatcodes\n  \\input \\tocreadfilename\n}\n\n\\newskip\\contentsrightmargin \\contentsrightmargin=1in\n\\newcount\\savepageno\n\\newcount\\lastnegativepageno \\lastnegativepageno = -1\n\n% Prepare to read what we've written to \\tocfile.\n%\n\\def\\startcontents#1{%\n  % If @setchapternewpage on, and @headings double, the contents should\n  % start on an odd page, unlike chapters.  Thus, we maintain\n  % \\contentsalignmacro in parallel with \\pagealignmacro.\n  % From: Torbjorn Granlund <tege@matematik.su.se>\n  \\contentsalignmacro\n  \\immediate\\closeout\\tocfile\n  %\n  % Don't need to put `Contents' or `Short Contents' in the headline.\n  % It is abundantly clear what they are.\n  \\chapmacro{#1}{Yomitfromtoc}{}%\n  %\n  \\savepageno = \\pageno\n  \\begingroup                  % Set up to handle contents files properly.\n    \\raggedbottom              % Worry more about breakpoints than the bottom.\n    \\advance\\hsize by -\\contentsrightmargin % Don't use the full line length.\n    %\n    % Roman numerals for page numbers.\n    \\ifnum \\pageno>0 \\global\\pageno = \\lastnegativepageno \\fi\n}\n\n% redefined for the two-volume lispref.  We always output on\n% \\jobname.toc even if this is redefined.\n%\n\\def\\tocreadfilename{\\jobname.toc}\n\n% Normal (long) toc.\n%\n\\def\\contents{%\n  \\startcontents{\\putwordTOC}%\n    \\openin 1 \\tocreadfilename\\space\n    \\ifeof 1 \\else\n      \\readtocfile\n    \\fi\n    \\vfill \\eject\n    \\contentsalignmacro % in case @setchapternewpage odd is in effect\n    \\ifeof 1 \\else\n      \\pdfmakeoutlines\n    \\fi\n    \\closein 1\n  \\endgroup\n  \\lastnegativepageno = \\pageno\n  \\global\\pageno = \\savepageno\n}\n\n% And just the chapters.\n\\def\\summarycontents{%\n  \\startcontents{\\putwordShortTOC}%\n    %\n    \\let\\partentry = \\shortpartentry\n    \\let\\numchapentry = \\shortchapentry\n    \\let\\appentry = \\shortchapentry\n    \\let\\unnchapentry = \\shortunnchapentry\n    % We want a true roman here for the page numbers.\n    \\secfonts\n    \\let\\rm=\\shortcontrm \\let\\bf=\\shortcontbf\n    \\let\\sl=\\shortcontsl \\let\\tt=\\shortconttt\n    \\rm\n    \\hyphenpenalty = 10000\n    \\advance\\baselineskip by 1pt % Open it up a little.\n    \\def\\numsecentry##1##2##3##4{}\n    \\let\\appsecentry = \\numsecentry\n    \\let\\unnsecentry = \\numsecentry\n    \\let\\numsubsecentry = \\numsecentry\n    \\let\\appsubsecentry = \\numsecentry\n    \\let\\unnsubsecentry = \\numsecentry\n    \\let\\numsubsubsecentry = \\numsecentry\n    \\let\\appsubsubsecentry = \\numsecentry\n    \\let\\unnsubsubsecentry = \\numsecentry\n    \\openin 1 \\tocreadfilename\\space\n    \\ifeof 1 \\else\n      \\readtocfile\n    \\fi\n    \\closein 1\n    \\vfill \\eject\n    \\contentsalignmacro % in case @setchapternewpage odd is in effect\n  \\endgroup\n  \\lastnegativepageno = \\pageno\n  \\global\\pageno = \\savepageno\n}\n\\let\\shortcontents = \\summarycontents\n\n% Typeset the label for a chapter or appendix for the short contents.\n% The arg is, e.g., `A' for an appendix, or `3' for a chapter.\n%\n\\def\\shortchaplabel#1{%\n  % This space should be enough, since a single number is .5em, and the\n  % widest letter (M) is 1em, at least in the Computer Modern fonts.\n  % But use \\hss just in case.\n  % (This space doesn't include the extra space that gets added after\n  % the label; that gets put in by \\shortchapentry above.)\n  %\n  % We'd like to right-justify chapter numbers, but that looks strange\n  % with appendix letters.  And right-justifying numbers and\n  % left-justifying letters looks strange when there is less than 10\n  % chapters.  Have to read the whole toc once to know how many chapters\n  % there are before deciding ...\n  \\hbox to 1em{#1\\hss}%\n}\n\n% These macros generate individual entries in the table of contents.\n% The first argument is the chapter or section name.\n% The last argument is the page number.\n% The arguments in between are the chapter number, section number, ...\n\n% Parts, in the main contents.  Replace the part number, which doesn't\n% exist, with an empty box.  Let's hope all the numbers have the same width.\n% Also ignore the page number, which is conventionally not printed.\n\\def\\numeralbox{\\setbox0=\\hbox{8}\\hbox to \\wd0{\\hfil}}\n\\def\\partentry#1#2#3#4{\\dochapentry{\\numeralbox\\labelspace#1}{}}\n%\n% Parts, in the short toc.\n\\def\\shortpartentry#1#2#3#4{%\n  \\penalty-300\n  \\vskip.5\\baselineskip plus.15\\baselineskip minus.1\\baselineskip\n  \\shortchapentry{{\\bf #1}}{\\numeralbox}{}{}%\n}\n\n% Chapters, in the main contents.\n\\def\\numchapentry#1#2#3#4{\\dochapentry{#2\\labelspace#1}{#4}}\n%\n% Chapters, in the short toc.\n% See comments in \\dochapentry re vbox and related settings.\n\\def\\shortchapentry#1#2#3#4{%\n  \\tocentry{\\shortchaplabel{#2}\\labelspace #1}{\\doshortpageno\\bgroup#4\\egroup}%\n}\n\n% Appendices, in the main contents.\n% Need the word Appendix, and a fixed-size box.\n%\n\\def\\appendixbox#1{%\n  % We use M since it's probably the widest letter.\n  \\setbox0 = \\hbox{\\putwordAppendix{} M}%\n  \\hbox to \\wd0{\\putwordAppendix{} #1\\hss}}\n%\n\\def\\appentry#1#2#3#4{\\dochapentry{\\appendixbox{#2}\\labelspace#1}{#4}}\n\n% Unnumbered chapters.\n\\def\\unnchapentry#1#2#3#4{\\dochapentry{#1}{#4}}\n\\def\\shortunnchapentry#1#2#3#4{\\tocentry{#1}{\\doshortpageno\\bgroup#4\\egroup}}\n\n% Sections.\n\\def\\numsecentry#1#2#3#4{\\dosecentry{#2\\labelspace#1}{#4}}\n\\let\\appsecentry=\\numsecentry\n\\def\\unnsecentry#1#2#3#4{\\dosecentry{#1}{#4}}\n\n% Subsections.\n\\def\\numsubsecentry#1#2#3#4{\\dosubsecentry{#2\\labelspace#1}{#4}}\n\\let\\appsubsecentry=\\numsubsecentry\n\\def\\unnsubsecentry#1#2#3#4{\\dosubsecentry{#1}{#4}}\n\n% And subsubsections.\n\\def\\numsubsubsecentry#1#2#3#4{\\dosubsubsecentry{#2\\labelspace#1}{#4}}\n\\let\\appsubsubsecentry=\\numsubsubsecentry\n\\def\\unnsubsubsecentry#1#2#3#4{\\dosubsubsecentry{#1}{#4}}\n\n% This parameter controls the indentation of the various levels.\n% Same as \\defaultparindent.\n\\newdimen\\tocindent \\tocindent = 15pt\n\n% Now for the actual typesetting. In all these, #1 is the text and #2 is the\n% page number.\n%\n% If the toc has to be broken over pages, we want it to be at chapters\n% if at all possible; hence the \\penalty.\n\\def\\dochapentry#1#2{%\n   \\penalty-300 \\vskip1\\baselineskip plus.33\\baselineskip minus.25\\baselineskip\n   \\begingroup\n     \\chapentryfonts\n     \\tocentry{#1}{\\dopageno\\bgroup#2\\egroup}%\n   \\endgroup\n   \\nobreak\\vskip .25\\baselineskip plus.1\\baselineskip\n}\n\n\\def\\dosecentry#1#2{\\begingroup\n  \\secentryfonts \\leftskip=\\tocindent\n  \\tocentry{#1}{\\dopageno\\bgroup#2\\egroup}%\n\\endgroup}\n\n\\def\\dosubsecentry#1#2{\\begingroup\n  \\subsecentryfonts \\leftskip=2\\tocindent\n  \\tocentry{#1}{\\dopageno\\bgroup#2\\egroup}%\n\\endgroup}\n\n\\def\\dosubsubsecentry#1#2{\\begingroup\n  \\subsubsecentryfonts \\leftskip=3\\tocindent\n  \\tocentry{#1}{\\dopageno\\bgroup#2\\egroup}%\n\\endgroup}\n\n% We use the same \\entry macro as for the index entries.\n\\let\\tocentry = \\entry\n\n% Space between chapter (or whatever) number and the title.\n\\def\\labelspace{\\hskip1em \\relax}\n\n\\def\\dopageno#1{{\\rm #1}}\n\\def\\doshortpageno#1{{\\rm #1}}\n\n\\def\\chapentryfonts{\\secfonts \\rm}\n\\def\\secentryfonts{\\textfonts}\n\\def\\subsecentryfonts{\\textfonts}\n\\def\\subsubsecentryfonts{\\textfonts}\n\n\n\\message{environments,}\n% @foo ... @end foo.\n\n% @tex ... @end tex    escapes into raw TeX temporarily.\n% One exception: @ is still an escape character, so that @end tex works.\n% But \\@ or @@ will get a plain @ character.\n\n\\envdef\\tex{%\n  \\setupmarkupstyle{tex}%\n  \\catcode `\\\\=0 \\catcode `\\{=1 \\catcode `\\}=2\n  \\catcode `\\$=3 \\catcode `\\&=4 \\catcode `\\#=6\n  \\catcode `\\^=7 \\catcode `\\_=8 \\catcode `\\~=\\active \\let~=\\tie\n  \\catcode `\\%=14\n  \\catcode `\\+=\\other\n  \\catcode `\\\"=\\other\n  \\catcode `\\|=\\other\n  \\catcode `\\<=\\other\n  \\catcode `\\>=\\other\n  \\catcode`\\`=\\other\n  \\catcode`\\'=\\other\n  \\escapechar=`\\\\\n  %\n  % ' is active in math mode (mathcode\"8000).  So reset it, and all our\n  % other math active characters (just in case), to plain's definitions.\n  \\mathactive\n  %\n  \\let\\b=\\ptexb\n  \\let\\bullet=\\ptexbullet\n  \\let\\c=\\ptexc\n  \\let\\,=\\ptexcomma\n  \\let\\.=\\ptexdot\n  \\let\\dots=\\ptexdots\n  \\let\\equiv=\\ptexequiv\n  \\let\\!=\\ptexexclam\n  \\let\\i=\\ptexi\n  \\let\\indent=\\ptexindent\n  \\let\\noindent=\\ptexnoindent\n  \\let\\{=\\ptexlbrace\n  \\let\\+=\\tabalign\n  \\let\\}=\\ptexrbrace\n  \\let\\/=\\ptexslash\n  \\let\\*=\\ptexstar\n  \\let\\t=\\ptext\n  \\expandafter \\let\\csname top\\endcsname=\\ptextop  % outer\n  \\let\\frenchspacing=\\plainfrenchspacing\n  %\n  \\def\\endldots{\\mathinner{\\ldots\\ldots\\ldots\\ldots}}%\n  \\def\\enddots{\\relax\\ifmmode\\endldots\\else$\\mathsurround=0pt \\endldots\\,$\\fi}%\n  \\def\\@{@}%\n}\n% There is no need to define \\Etex.\n\n% Define @lisp ... @end lisp.\n% @lisp environment forms a group so it can rebind things,\n% including the definition of @end lisp (which normally is erroneous).\n\n% Amount to narrow the margins by for @lisp.\n\\newskip\\lispnarrowing \\lispnarrowing=0.4in\n\n% This is the definition that ^^M gets inside @lisp, @example, and other\n% such environments.  \\null is better than a space, since it doesn't\n% have any width.\n\\def\\lisppar{\\null\\endgraf}\n\n% This space is always present above and below environments.\n\\newskip\\envskipamount \\envskipamount = 0pt\n\n% Make spacing and below environment symmetrical.  We use \\parskip here\n% to help in doing that, since in @example-like environments \\parskip\n% is reset to zero; thus the \\afterenvbreak inserts no space -- but the\n% start of the next paragraph will insert \\parskip.\n%\n\\def\\aboveenvbreak{{%\n  % =10000 instead of <10000 because of a special case in \\itemzzz and\n  % \\sectionheading, q.v.\n  \\ifnum \\lastpenalty=10000 \\else\n    \\advance\\envskipamount by \\parskip\n    \\endgraf\n    \\ifdim\\lastskip<\\envskipamount\n      \\removelastskip\n      % it's not a good place to break if the last penalty was \\nobreak\n      % or better ...\n      \\ifnum\\lastpenalty<10000 \\penalty-50 \\fi\n      \\vskip\\envskipamount\n    \\fi\n  \\fi\n}}\n\n\\let\\afterenvbreak = \\aboveenvbreak\n\n% \\nonarrowing is a flag.  If \"set\", @lisp etc don't narrow margins; it will\n% also clear it, so that its embedded environments do the narrowing again.\n\\let\\nonarrowing=\\relax\n\n% @cartouche ... @end cartouche: draw rectangle w/rounded corners around\n% environment contents.\n\\font\\circle=lcircle10\n\\newdimen\\circthick\n\\newdimen\\cartouter\\newdimen\\cartinner\n\\newskip\\normbskip\\newskip\\normpskip\\newskip\\normlskip\n\\circthick=\\fontdimen8\\circle\n%\n\\def\\ctl{{\\circle\\char'013\\hskip -6pt}}% 6pt from pl file: 1/2charwidth\n\\def\\ctr{{\\hskip 6pt\\circle\\char'010}}\n\\def\\cbl{{\\circle\\char'012\\hskip -6pt}}\n\\def\\cbr{{\\hskip 6pt\\circle\\char'011}}\n\\def\\carttop{\\hbox to \\cartouter{\\hskip\\lskip\n        \\ctl\\leaders\\hrule height\\circthick\\hfil\\ctr\n        \\hskip\\rskip}}\n\\def\\cartbot{\\hbox to \\cartouter{\\hskip\\lskip\n        \\cbl\\leaders\\hrule height\\circthick\\hfil\\cbr\n        \\hskip\\rskip}}\n%\n\\newskip\\lskip\\newskip\\rskip\n\n\\envdef\\cartouche{%\n  \\ifhmode\\par\\fi  % can't be in the midst of a paragraph.\n  \\startsavinginserts\n  \\lskip=\\leftskip \\rskip=\\rightskip\n  \\leftskip=0pt\\rightskip=0pt % we want these *outside*.\n  \\cartinner=\\hsize \\advance\\cartinner by-\\lskip\n  \\advance\\cartinner by-\\rskip\n  \\cartouter=\\hsize\n  \\advance\\cartouter by 18.4pt\t% allow for 3pt kerns on either\n\t\t\t\t% side, and for 6pt waste from\n\t\t\t\t% each corner char, and rule thickness\n  \\normbskip=\\baselineskip \\normpskip=\\parskip \\normlskip=\\lineskip\n  % Flag to tell @lisp, etc., not to narrow margin.\n  \\let\\nonarrowing = t%\n  %\n  % If this cartouche directly follows a sectioning command, we need the\n  % \\parskip glue (backspaced over by default) or the cartouche can\n  % collide with the section heading.\n  \\ifnum\\lastpenalty>10000 \\vskip\\parskip \\penalty\\lastpenalty \\fi\n  %\n  \\vbox\\bgroup\n      \\baselineskip=0pt\\parskip=0pt\\lineskip=0pt\n      \\carttop\n      \\hbox\\bgroup\n\t  \\hskip\\lskip\n\t  \\vrule\\kern3pt\n\t  \\vbox\\bgroup\n\t      \\kern3pt\n\t      \\hsize=\\cartinner\n\t      \\baselineskip=\\normbskip\n\t      \\lineskip=\\normlskip\n\t      \\parskip=\\normpskip\n\t      \\vskip -\\parskip\n\t      \\comment % For explanation, see the end of def\\group.\n}\n\\def\\Ecartouche{%\n              \\ifhmode\\par\\fi\n\t      \\kern3pt\n\t  \\egroup\n\t  \\kern3pt\\vrule\n\t  \\hskip\\rskip\n      \\egroup\n      \\cartbot\n  \\egroup\n  \\checkinserts\n}\n\n\n% This macro is called at the beginning of all the @example variants,\n% inside a group.\n\\newdimen\\nonfillparindent\n\\def\\nonfillstart{%\n  \\aboveenvbreak\n  \\hfuzz = 12pt % Don't be fussy\n  \\sepspaces % Make spaces be word-separators rather than space tokens.\n  \\let\\par = \\lisppar % don't ignore blank lines\n  \\obeylines % each line of input is a line of output\n  \\parskip = 0pt\n  % Turn off paragraph indentation but redefine \\indent to emulate\n  % the normal \\indent.\n  \\nonfillparindent=\\parindent\n  \\parindent = 0pt\n  \\let\\indent\\nonfillindent\n  %\n  \\emergencystretch = 0pt % don't try to avoid overfull boxes\n  \\ifx\\nonarrowing\\relax\n    \\advance \\leftskip by \\lispnarrowing\n    \\exdentamount=\\lispnarrowing\n  \\else\n    \\let\\nonarrowing = \\relax\n  \\fi\n  \\let\\exdent=\\nofillexdent\n}\n\n\\begingroup\n\\obeyspaces\n% We want to swallow spaces (but not other tokens) after the fake\n% @indent in our nonfill-environments, where spaces are normally\n% active and set to @tie, resulting in them not being ignored after\n% @indent.\n\\gdef\\nonfillindent{\\futurelet\\temp\\nonfillindentcheck}%\n\\gdef\\nonfillindentcheck{%\n\\ifx\\temp %\n\\expandafter\\nonfillindentgobble%\n\\else%\n\\leavevmode\\nonfillindentbox%\n\\fi%\n}%\n\\endgroup\n\\def\\nonfillindentgobble#1{\\nonfillindent}\n\\def\\nonfillindentbox{\\hbox to \\nonfillparindent{\\hss}}\n\n% If you want all examples etc. small: @set dispenvsize small.\n% If you want even small examples the full size: @set dispenvsize nosmall.\n% This affects the following displayed environments:\n%    @example, @display, @format, @lisp\n%\n\\def\\smallword{small}\n\\def\\nosmallword{nosmall}\n\\let\\SETdispenvsize\\relax\n\\def\\setnormaldispenv{%\n  \\ifx\\SETdispenvsize\\smallword\n    % end paragraph for sake of leading, in case document has no blank\n    % line.  This is redundant with what happens in \\aboveenvbreak, but\n    % we need to do it before changing the fonts, and it's inconvenient\n    % to change the fonts afterward.\n    \\ifnum \\lastpenalty=10000 \\else \\endgraf \\fi\n    \\smallexamplefonts \\rm\n  \\fi\n}\n\\def\\setsmalldispenv{%\n  \\ifx\\SETdispenvsize\\nosmallword\n  \\else\n    \\ifnum \\lastpenalty=10000 \\else \\endgraf \\fi\n    \\smallexamplefonts \\rm\n  \\fi\n}\n\n% We often define two environments, @foo and @smallfoo.\n% Let's do it in one command.  #1 is the env name, #2 the definition.\n\\def\\makedispenvdef#1#2{%\n  \\expandafter\\envdef\\csname#1\\endcsname {\\setnormaldispenv #2}%\n  \\expandafter\\envdef\\csname small#1\\endcsname {\\setsmalldispenv #2}%\n  \\expandafter\\let\\csname E#1\\endcsname \\afterenvbreak\n  \\expandafter\\let\\csname Esmall#1\\endcsname \\afterenvbreak\n}\n\n% Define two environment synonyms (#1 and #2) for an environment.\n\\def\\maketwodispenvdef#1#2#3{%\n  \\makedispenvdef{#1}{#3}%\n  \\makedispenvdef{#2}{#3}%\n}\n%\n% @lisp: indented, narrowed, typewriter font;\n% @example: same as @lisp.\n%\n% @smallexample and @smalllisp: use smaller fonts.\n% Originally contributed by Pavel@xerox.\n%\n\\maketwodispenvdef{lisp}{example}{%\n  \\nonfillstart\n  \\tt\\setupmarkupstyle{example}%\n  \\let\\kbdfont = \\kbdexamplefont % Allow @kbd to do something special.\n  \\gobble % eat return\n}\n% @display/@smalldisplay: same as @lisp except keep current font.\n%\n\\makedispenvdef{display}{%\n  \\nonfillstart\n  \\gobble\n}\n\n% @format/@smallformat: same as @display except don't narrow margins.\n%\n\\makedispenvdef{format}{%\n  \\let\\nonarrowing = t%\n  \\nonfillstart\n  \\gobble\n}\n\n% @flushleft: same as @format, but doesn't obey \\SETdispenvsize.\n\\envdef\\flushleft{%\n  \\let\\nonarrowing = t%\n  \\nonfillstart\n  \\gobble\n}\n\\let\\Eflushleft = \\afterenvbreak\n\n% @flushright.\n%\n\\envdef\\flushright{%\n  \\let\\nonarrowing = t%\n  \\nonfillstart\n  \\advance\\leftskip by 0pt plus 1fill\\relax\n  \\gobble\n}\n\\let\\Eflushright = \\afterenvbreak\n\n\n% @raggedright does more-or-less normal line breaking but no right\n% justification.  From plain.tex.\n\\envdef\\raggedright{%\n  \\rightskip0pt plus2em \\spaceskip.3333em \\xspaceskip.5em\\relax\n}\n\\let\\Eraggedright\\par\n\n\\envdef\\raggedleft{%\n  \\parindent=0pt \\leftskip0pt plus2em\n  \\spaceskip.3333em \\xspaceskip.5em \\parfillskip=0pt\n  \\hbadness=10000 % Last line will usually be underfull, so turn off\n                  % badness reporting.\n}\n\\let\\Eraggedleft\\par\n\n\\envdef\\raggedcenter{%\n  \\parindent=0pt \\rightskip0pt plus1em \\leftskip0pt plus1em\n  \\spaceskip.3333em \\xspaceskip.5em \\parfillskip=0pt\n  \\hbadness=10000 % Last line will usually be underfull, so turn off\n                  % badness reporting.\n}\n\\let\\Eraggedcenter\\par\n\n\n% @quotation does normal linebreaking (hence we can't use \\nonfillstart)\n% and narrows the margins.  We keep \\parskip nonzero in general, since\n% we're doing normal filling.  So, when using \\aboveenvbreak and\n% \\afterenvbreak, temporarily make \\parskip 0.\n%\n\\makedispenvdef{quotation}{\\quotationstart}\n%\n\\def\\quotationstart{%\n  \\indentedblockstart % same as \\indentedblock, but increase right margin too.\n  \\ifx\\nonarrowing\\relax\n    \\advance\\rightskip by \\lispnarrowing\n  \\fi\n  \\parsearg\\quotationlabel\n}\n\n% We have retained a nonzero parskip for the environment, since we're\n% doing normal filling.\n%\n\\def\\Equotation{%\n  \\par\n  \\ifx\\quotationauthor\\thisisundefined\\else\n    % indent a bit.\n    \\leftline{\\kern 2\\leftskip \\sl ---\\quotationauthor}%\n  \\fi\n  {\\parskip=0pt \\afterenvbreak}%\n}\n\\def\\Esmallquotation{\\Equotation}\n\n% If we're given an argument, typeset it in bold with a colon after.\n\\def\\quotationlabel#1{%\n  \\def\\temp{#1}%\n  \\ifx\\temp\\empty \\else\n    {\\bf #1: }%\n  \\fi\n}\n\n% @indentedblock is like @quotation, but indents only on the left and\n% has no optional argument.\n% \n\\makedispenvdef{indentedblock}{\\indentedblockstart}\n%\n\\def\\indentedblockstart{%\n  {\\parskip=0pt \\aboveenvbreak}% because \\aboveenvbreak inserts \\parskip\n  \\parindent=0pt\n  %\n  % @cartouche defines \\nonarrowing to inhibit narrowing at next level down.\n  \\ifx\\nonarrowing\\relax\n    \\advance\\leftskip by \\lispnarrowing\n    \\exdentamount = \\lispnarrowing\n  \\else\n    \\let\\nonarrowing = \\relax\n  \\fi\n}\n\n% Keep a nonzero parskip for the environment, since we're doing normal filling.\n%\n\\def\\Eindentedblock{%\n  \\par\n  {\\parskip=0pt \\afterenvbreak}%\n}\n\\def\\Esmallindentedblock{\\Eindentedblock}\n\n\n% LaTeX-like @verbatim...@end verbatim and @verb{<char>...<char>}\n% If we want to allow any <char> as delimiter,\n% we need the curly braces so that makeinfo sees the @verb command, eg:\n% `@verbx...x' would look like the '@verbx' command.  --janneke@gnu.org\n%\n% [Knuth]: Donald Ervin Knuth, 1996.  The TeXbook.\n%\n% [Knuth] p.344; only we need to do the other characters Texinfo sets\n% active too.  Otherwise, they get lost as the first character on a\n% verbatim line.\n\\def\\dospecials{%\n  \\do\\ \\do\\\\\\do\\{\\do\\}\\do\\$\\do\\&%\n  \\do\\#\\do\\^\\do\\^^K\\do\\_\\do\\^^A\\do\\%\\do\\~%\n  \\do\\<\\do\\>\\do\\|\\do\\@\\do+\\do\\\"%\n  % Don't do the quotes -- if we do, @set txicodequoteundirected and\n  % @set txicodequotebacktick will not have effect on @verb and\n  % @verbatim, and ?` and !` ligatures won't get disabled.\n  %\\do\\`\\do\\'%\n}\n%\n% [Knuth] p. 380\n\\def\\uncatcodespecials{%\n  \\def\\do##1{\\catcode`##1=\\other}\\dospecials}\n%\n% Setup for the @verb command.\n%\n% Eight spaces for a tab\n\\begingroup\n  \\catcode`\\^^I=\\active\n  \\gdef\\tabeightspaces{\\catcode`\\^^I=\\active\\def^^I{\\ \\ \\ \\ \\ \\ \\ \\ }}\n\\endgroup\n%\n\\def\\setupverb{%\n  \\tt  % easiest (and conventionally used) font for verbatim\n  \\def\\par{\\leavevmode\\endgraf}%\n  \\setupmarkupstyle{verb}%\n  \\tabeightspaces\n  % Respect line breaks,\n  % print special symbols as themselves, and\n  % make each space count\n  % must do in this order:\n  \\obeylines \\uncatcodespecials \\sepspaces\n}\n\n% Setup for the @verbatim environment\n%\n% Real tab expansion.\n\\newdimen\\tabw \\setbox0=\\hbox{\\tt\\space} \\tabw=8\\wd0 % tab amount\n%\n% We typeset each line of the verbatim in an \\hbox, so we can handle\n% tabs.  The \\global is in case the verbatim line starts with an accent,\n% or some other command that starts with a begin-group.  Otherwise, the\n% entire \\verbbox would disappear at the corresponding end-group, before\n% it is typeset.  Meanwhile, we can't have nested verbatim commands\n% (can we?), so the \\global won't be overwriting itself.\n\\newbox\\verbbox\n\\def\\starttabbox{\\global\\setbox\\verbbox=\\hbox\\bgroup}\n%\n\\begingroup\n  \\catcode`\\^^I=\\active\n  \\gdef\\tabexpand{%\n    \\catcode`\\^^I=\\active\n    \\def^^I{\\leavevmode\\egroup\n      \\dimen\\verbbox=\\wd\\verbbox % the width so far, or since the previous tab\n      \\divide\\dimen\\verbbox by\\tabw\n      \\multiply\\dimen\\verbbox by\\tabw % compute previous multiple of \\tabw\n      \\advance\\dimen\\verbbox by\\tabw  % advance to next multiple of \\tabw\n      \\wd\\verbbox=\\dimen\\verbbox \\box\\verbbox \\starttabbox\n    }%\n  }\n\\endgroup\n\n% start the verbatim environment.\n\\def\\setupverbatim{%\n  \\let\\nonarrowing = t%\n  \\nonfillstart\n  \\tt % easiest (and conventionally used) font for verbatim\n  % The \\leavevmode here is for blank lines.  Otherwise, we would\n  % never \\starttabox and the \\egroup would end verbatim mode.\n  \\def\\par{\\leavevmode\\egroup\\box\\verbbox\\endgraf}%\n  \\tabexpand\n  \\setupmarkupstyle{verbatim}%\n  % Respect line breaks,\n  % print special symbols as themselves, and\n  % make each space count.\n  % Must do in this order:\n  \\obeylines \\uncatcodespecials \\sepspaces\n  \\everypar{\\starttabbox}%\n}\n\n% Do the @verb magic: verbatim text is quoted by unique\n% delimiter characters.  Before first delimiter expect a\n% right brace, after last delimiter expect closing brace:\n%\n%    \\def\\doverb'{'<char>#1<char>'}'{#1}\n%\n% [Knuth] p. 382; only eat outer {}\n\\begingroup\n  \\catcode`[=1\\catcode`]=2\\catcode`\\{=\\other\\catcode`\\}=\\other\n  \\gdef\\doverb{#1[\\def\\next##1#1}[##1\\endgroup]\\next]\n\\endgroup\n%\n\\def\\verb{\\begingroup\\setupverb\\doverb}\n%\n%\n% Do the @verbatim magic: define the macro \\doverbatim so that\n% the (first) argument ends when '@end verbatim' is reached, ie:\n%\n%     \\def\\doverbatim#1@end verbatim{#1}\n%\n% For Texinfo it's a lot easier than for LaTeX,\n% because texinfo's \\verbatim doesn't stop at '\\end{verbatim}':\n% we need not redefine '\\', '{' and '}'.\n%\n% Inspired by LaTeX's verbatim command set [latex.ltx]\n%\n\\begingroup\n  \\catcode`\\ =\\active\n  \\obeylines %\n  % ignore everything up to the first ^^M, that's the newline at the end\n  % of the @verbatim input line itself.  Otherwise we get an extra blank\n  % line in the output.\n  \\xdef\\doverbatim#1^^M#2@end verbatim{#2\\noexpand\\end\\gobble verbatim}%\n  % We really want {...\\end verbatim} in the body of the macro, but\n  % without the active space; thus we have to use \\xdef and \\gobble.\n\\endgroup\n%\n\\envdef\\verbatim{%\n    \\setupverbatim\\doverbatim\n}\n\\let\\Everbatim = \\afterenvbreak\n\n\n% @verbatiminclude FILE - insert text of file in verbatim environment.\n%\n\\def\\verbatiminclude{\\parseargusing\\filenamecatcodes\\doverbatiminclude}\n%\n\\def\\doverbatiminclude#1{%\n  {%\n    \\makevalueexpandable\n    \\setupverbatim\n    \\indexnofonts       % Allow `@@' and other weird things in file names.\n    \\wlog{texinfo.tex: doing @verbatiminclude of #1^^J}%\n    \\input #1\n    \\afterenvbreak\n  }%\n}\n\n% @copying ... @end copying.\n% Save the text away for @insertcopying later.\n%\n% We save the uninterpreted tokens, rather than creating a box.\n% Saving the text in a box would be much easier, but then all the\n% typesetting commands (@smallbook, font changes, etc.) have to be done\n% beforehand -- and a) we want @copying to be done first in the source\n% file; b) letting users define the frontmatter in as flexible order as\n% possible is very desirable.\n%\n\\def\\copying{\\checkenv{}\\begingroup\\scanargctxt\\docopying}\n\\def\\docopying#1@end copying{\\endgroup\\def\\copyingtext{#1}}\n%\n\\def\\insertcopying{%\n  \\begingroup\n    \\parindent = 0pt  % paragraph indentation looks wrong on title page\n    \\scanexp\\copyingtext\n  \\endgroup\n}\n\n\n\\message{defuns,}\n% @defun etc.\n\n\\newskip\\defbodyindent \\defbodyindent=.4in\n\\newskip\\defargsindent \\defargsindent=50pt\n\\newskip\\deflastargmargin \\deflastargmargin=18pt\n\\newcount\\defunpenalty\n\n% Start the processing of @deffn:\n\\def\\startdefun{%\n  \\ifnum\\lastpenalty<10000\n    \\medbreak\n    \\defunpenalty=10003 % Will keep this @deffn together with the\n                        % following @def command, see below.\n  \\else\n    % If there are two @def commands in a row, we'll have a \\nobreak,\n    % which is there to keep the function description together with its\n    % header.  But if there's nothing but headers, we need to allow a\n    % break somewhere.  Check specifically for penalty 10002, inserted\n    % by \\printdefunline, instead of 10000, since the sectioning\n    % commands also insert a nobreak penalty, and we don't want to allow\n    % a break between a section heading and a defun.\n    %\n    % As a further refinement, we avoid \"club\" headers by signalling\n    % with penalty of 10003 after the very first @deffn in the\n    % sequence (see above), and penalty of 10002 after any following\n    % @def command.\n    \\ifnum\\lastpenalty=10002 \\penalty2000 \\else \\defunpenalty=10002 \\fi\n    %\n    % Similarly, after a section heading, do not allow a break.\n    % But do insert the glue.\n    \\medskip  % preceded by discardable penalty, so not a breakpoint\n  \\fi\n  %\n  \\parindent=0in\n  \\advance\\leftskip by \\defbodyindent\n  \\exdentamount=\\defbodyindent\n}\n\n\\def\\dodefunx#1{%\n  % First, check whether we are in the right environment:\n  \\checkenv#1%\n  %\n  % As above, allow line break if we have multiple x headers in a row.\n  % It's not a great place, though.\n  \\ifnum\\lastpenalty=10002 \\penalty3000 \\else \\defunpenalty=10002 \\fi\n  %\n  % And now, it's time to reuse the body of the original defun:\n  \\expandafter\\gobbledefun#1%\n}\n\\def\\gobbledefun#1\\startdefun{}\n\n% \\printdefunline \\deffnheader{text}\n%\n\\def\\printdefunline#1#2{%\n  \\begingroup\n    % call \\deffnheader:\n    #1#2 \\endheader\n    % common ending:\n    \\interlinepenalty = 10000\n    \\advance\\rightskip by 0pt plus 1fil\\relax\n    \\endgraf\n    \\nobreak\\vskip -\\parskip\n    \\penalty\\defunpenalty  % signal to \\startdefun and \\dodefunx\n    % Some of the @defun-type tags do not enable magic parentheses,\n    % rendering the following check redundant.  But we don't optimize.\n    \\checkparencounts\n  \\endgroup\n}\n\n\\def\\Edefun{\\endgraf\\medbreak}\n\n% \\makedefun{deffn} creates \\deffn, \\deffnx and \\Edeffn;\n% the only thing remaining is to define \\deffnheader.\n%\n\\def\\makedefun#1{%\n  \\expandafter\\let\\csname E#1\\endcsname = \\Edefun\n  \\edef\\temp{\\noexpand\\domakedefun\n    \\makecsname{#1}\\makecsname{#1x}\\makecsname{#1header}}%\n  \\temp\n}\n\n% \\domakedefun \\deffn \\deffnx \\deffnheader\n%\n% Define \\deffn and \\deffnx, without parameters.\n% \\deffnheader has to be defined explicitly.\n%\n\\def\\domakedefun#1#2#3{%\n  \\envdef#1{%\n    \\startdefun\n    \\doingtypefnfalse    % distinguish typed functions from all else\n    \\parseargusing\\activeparens{\\printdefunline#3}%\n  }%\n  \\def#2{\\dodefunx#1}%\n  \\def#3%\n}\n\n\\newif\\ifdoingtypefn       % doing typed function?\n\\newif\\ifrettypeownline    % typeset return type on its own line?\n\n% @deftypefnnewline on|off says whether the return type of typed functions\n% are printed on their own line.  This affects @deftypefn, @deftypefun,\n% @deftypeop, and @deftypemethod.\n% \n\\parseargdef\\deftypefnnewline{%\n  \\def\\temp{#1}%\n  \\ifx\\temp\\onword\n    \\expandafter\\let\\csname SETtxideftypefnnl\\endcsname\n      = \\empty\n  \\else\\ifx\\temp\\offword\n    \\expandafter\\let\\csname SETtxideftypefnnl\\endcsname\n      = \\relax\n  \\else\n    \\errhelp = \\EMsimple\n    \\errmessage{Unknown @txideftypefnnl value `\\temp',\n                must be on|off}%\n  \\fi\\fi\n}\n\n% Untyped functions:\n\n% @deffn category name args\n\\makedefun{deffn}{\\deffngeneral{}}\n\n% @deffn category class name args\n\\makedefun{defop}#1 {\\defopon{#1\\ \\putwordon}}\n\n% \\defopon {category on}class name args\n\\def\\defopon#1#2 {\\deffngeneral{\\putwordon\\ \\code{#2}}{#1\\ \\code{#2}} }\n\n% \\deffngeneral {subind}category name args\n%\n\\def\\deffngeneral#1#2 #3 #4\\endheader{%\n  % Remember that \\dosubind{fn}{foo}{} is equivalent to \\doind{fn}{foo}.\n  \\dosubind{fn}{\\code{#3}}{#1}%\n  \\defname{#2}{}{#3}\\magicamp\\defunargs{#4\\unskip}%\n}\n\n% Typed functions:\n\n% @deftypefn category type name args\n\\makedefun{deftypefn}{\\deftypefngeneral{}}\n\n% @deftypeop category class type name args\n\\makedefun{deftypeop}#1 {\\deftypeopon{#1\\ \\putwordon}}\n\n% \\deftypeopon {category on}class type name args\n\\def\\deftypeopon#1#2 {\\deftypefngeneral{\\putwordon\\ \\code{#2}}{#1\\ \\code{#2}} }\n\n% \\deftypefngeneral {subind}category type name args\n%\n\\def\\deftypefngeneral#1#2 #3 #4 #5\\endheader{%\n  \\dosubind{fn}{\\code{#4}}{#1}%\n  \\doingtypefntrue\n  \\defname{#2}{#3}{#4}\\defunargs{#5\\unskip}%\n}\n\n% Typed variables:\n\n% @deftypevr category type var args\n\\makedefun{deftypevr}{\\deftypecvgeneral{}}\n\n% @deftypecv category class type var args\n\\makedefun{deftypecv}#1 {\\deftypecvof{#1\\ \\putwordof}}\n\n% \\deftypecvof {category of}class type var args\n\\def\\deftypecvof#1#2 {\\deftypecvgeneral{\\putwordof\\ \\code{#2}}{#1\\ \\code{#2}} }\n\n% \\deftypecvgeneral {subind}category type var args\n%\n\\def\\deftypecvgeneral#1#2 #3 #4 #5\\endheader{%\n  \\dosubind{vr}{\\code{#4}}{#1}%\n  \\defname{#2}{#3}{#4}\\defunargs{#5\\unskip}%\n}\n\n% Untyped variables:\n\n% @defvr category var args\n\\makedefun{defvr}#1 {\\deftypevrheader{#1} {} }\n\n% @defcv category class var args\n\\makedefun{defcv}#1 {\\defcvof{#1\\ \\putwordof}}\n\n% \\defcvof {category of}class var args\n\\def\\defcvof#1#2 {\\deftypecvof{#1}#2 {} }\n\n% Types:\n\n% @deftp category name args\n\\makedefun{deftp}#1 #2 #3\\endheader{%\n  \\doind{tp}{\\code{#2}}%\n  \\defname{#1}{}{#2}\\defunargs{#3\\unskip}%\n}\n\n% Remaining @defun-like shortcuts:\n\\makedefun{defun}{\\deffnheader{\\putwordDeffunc} }\n\\makedefun{defmac}{\\deffnheader{\\putwordDefmac} }\n\\makedefun{defspec}{\\deffnheader{\\putwordDefspec} }\n\\makedefun{deftypefun}{\\deftypefnheader{\\putwordDeffunc} }\n\\makedefun{defvar}{\\defvrheader{\\putwordDefvar} }\n\\makedefun{defopt}{\\defvrheader{\\putwordDefopt} }\n\\makedefun{deftypevar}{\\deftypevrheader{\\putwordDefvar} }\n\\makedefun{defmethod}{\\defopon\\putwordMethodon}\n\\makedefun{deftypemethod}{\\deftypeopon\\putwordMethodon}\n\\makedefun{defivar}{\\defcvof\\putwordInstanceVariableof}\n\\makedefun{deftypeivar}{\\deftypecvof\\putwordInstanceVariableof}\n\n% \\defname, which formats the name of the @def (not the args).\n% #1 is the category, such as \"Function\".\n% #2 is the return type, if any.\n% #3 is the function name.\n%\n% We are followed by (but not passed) the arguments, if any.\n%\n\\def\\defname#1#2#3{%\n  \\par\n  % Get the values of \\leftskip and \\rightskip as they were outside the @def...\n  \\advance\\leftskip by -\\defbodyindent\n  %\n  % Determine if we are typesetting the return type of a typed function\n  % on a line by itself.\n  \\rettypeownlinefalse\n  \\ifdoingtypefn  % doing a typed function specifically?\n    % then check user option for putting return type on its own line:\n    \\expandafter\\ifx\\csname SETtxideftypefnnl\\endcsname\\relax \\else\n      \\rettypeownlinetrue\n    \\fi\n  \\fi\n  %\n  % How we'll format the category name.  Putting it in brackets helps\n  % distinguish it from the body text that may end up on the next line\n  % just below it.\n  \\def\\temp{#1}%\n  \\setbox0=\\hbox{\\kern\\deflastargmargin \\ifx\\temp\\empty\\else [\\rm\\temp]\\fi}\n  %\n  % Figure out line sizes for the paragraph shape.  We'll always have at\n  % least two.\n  \\tempnum = 2\n  %\n  % The first line needs space for \\box0; but if \\rightskip is nonzero,\n  % we need only space for the part of \\box0 which exceeds it:\n  \\dimen0=\\hsize  \\advance\\dimen0 by -\\wd0  \\advance\\dimen0 by \\rightskip\n  %\n  % If doing a return type on its own line, we'll have another line.\n  \\ifrettypeownline\n    \\advance\\tempnum by 1\n    \\def\\maybeshapeline{0in \\hsize}%\n  \\else\n    \\def\\maybeshapeline{}%\n  \\fi\n  %\n  % The continuations:\n  \\dimen2=\\hsize  \\advance\\dimen2 by -\\defargsindent\n  %\n  % The final paragraph shape:\n  \\parshape \\tempnum  0in \\dimen0  \\maybeshapeline  \\defargsindent \\dimen2\n  %\n  % Put the category name at the right margin.\n  \\noindent\n  \\hbox to 0pt{%\n    \\hfil\\box0 \\kern-\\hsize\n    % \\hsize has to be shortened this way:\n    \\kern\\leftskip\n    % Intentionally do not respect \\rightskip, since we need the space.\n  }%\n  %\n  % Allow all lines to be underfull without complaint:\n  \\tolerance=10000 \\hbadness=10000\n  \\exdentamount=\\defbodyindent\n  {%\n    % defun fonts. We use typewriter by default (used to be bold) because:\n    % . we're printing identifiers, they should be in tt in principle.\n    % . in languages with many accents, such as Czech or French, it's\n    %   common to leave accents off identifiers.  The result looks ok in\n    %   tt, but exceedingly strange in rm.\n    % . we don't want -- and --- to be treated as ligatures.\n    % . this still does not fix the ?` and !` ligatures, but so far no\n    %   one has made identifiers using them :).\n    \\df \\tt\n    \\def\\temp{#2}% text of the return type\n    \\ifx\\temp\\empty\\else\n      \\tclose{\\temp}% typeset the return type\n      \\ifrettypeownline\n        % put return type on its own line; prohibit line break following:\n        \\hfil\\vadjust{\\nobreak}\\break  \n      \\else\n        \\space  % type on same line, so just followed by a space\n      \\fi\n    \\fi           % no return type\n    #3% output function name\n  }%\n  {\\rm\\enskip}% hskip 0.5 em of \\tenrm\n  %\n  \\boldbrax\n  % arguments will be output next, if any.\n}\n\n% Print arguments in slanted roman (not ttsl), inconsistently with using\n% tt for the name.  This is because literal text is sometimes needed in\n% the argument list (groff manual), and ttsl and tt are not very\n% distinguishable.  Prevent hyphenation at `-' chars.\n%\n\\def\\defunargs#1{%\n  % use sl by default (not ttsl),\n  % tt for the names.\n  \\df \\sl \\hyphenchar\\font=0\n  %\n  % On the other hand, if an argument has two dashes (for instance), we\n  % want a way to get ttsl.  We used to recommend @var for that, so\n  % leave the code in, but it's strange for @var to lead to typewriter.\n  % Nowadays we recommend @code, since the difference between a ttsl hyphen\n  % and a tt hyphen is pretty tiny.  @code also disables ?` !`.\n  \\def\\var##1{{\\setupmarkupstyle{var}\\ttslanted{##1}}}%\n  #1%\n  \\sl\\hyphenchar\\font=45\n}\n\n% We want ()&[] to print specially on the defun line.\n%\n\\def\\activeparens{%\n  \\catcode`\\(=\\active \\catcode`\\)=\\active\n  \\catcode`\\[=\\active \\catcode`\\]=\\active\n  \\catcode`\\&=\\active\n}\n\n% Make control sequences which act like normal parenthesis chars.\n\\let\\lparen = ( \\let\\rparen = )\n\n% Be sure that we always have a definition for `(', etc.  For example,\n% if the fn name has parens in it, \\boldbrax will not be in effect yet,\n% so TeX would otherwise complain about undefined control sequence.\n{\n  \\activeparens\n  \\global\\let(=\\lparen \\global\\let)=\\rparen\n  \\global\\let[=\\lbrack \\global\\let]=\\rbrack\n  \\global\\let& = \\&\n\n  \\gdef\\boldbrax{\\let(=\\opnr\\let)=\\clnr\\let[=\\lbrb\\let]=\\rbrb}\n  \\gdef\\magicamp{\\let&=\\amprm}\n}\n\n\\newcount\\parencount\n\n% If we encounter &foo, then turn on ()-hacking afterwards\n\\newif\\ifampseen\n\\def\\amprm#1 {\\ampseentrue{\\bf\\&#1 }}\n\n\\def\\parenfont{%\n  \\ifampseen\n    % At the first level, print parens in roman,\n    % otherwise use the default font.\n    \\ifnum \\parencount=1 \\rm \\fi\n  \\else\n    % The \\sf parens (in \\boldbrax) actually are a little bolder than\n    % the contained text.  This is especially needed for [ and ] .\n    \\sf\n  \\fi\n}\n\\def\\infirstlevel#1{%\n  \\ifampseen\n    \\ifnum\\parencount=1\n      #1%\n    \\fi\n  \\fi\n}\n\\def\\bfafterword#1 {#1 \\bf}\n\n\\def\\opnr{%\n  \\global\\advance\\parencount by 1\n  {\\parenfont(}%\n  \\infirstlevel \\bfafterword\n}\n\\def\\clnr{%\n  {\\parenfont)}%\n  \\infirstlevel \\sl\n  \\global\\advance\\parencount by -1\n}\n\n\\newcount\\brackcount\n\\def\\lbrb{%\n  \\global\\advance\\brackcount by 1\n  {\\bf[}%\n}\n\\def\\rbrb{%\n  {\\bf]}%\n  \\global\\advance\\brackcount by -1\n}\n\n\\def\\checkparencounts{%\n  \\ifnum\\parencount=0 \\else \\badparencount \\fi\n  \\ifnum\\brackcount=0 \\else \\badbrackcount \\fi\n}\n% these should not use \\errmessage; the glibc manual, at least, actually\n% has such constructs (when documenting function pointers).\n\\def\\badparencount{%\n  \\message{Warning: unbalanced parentheses in @def...}%\n  \\global\\parencount=0\n}\n\\def\\badbrackcount{%\n  \\message{Warning: unbalanced square brackets in @def...}%\n  \\global\\brackcount=0\n}\n\n\n\\message{macros,}\n% @macro.\n\n% To do this right we need a feature of e-TeX, \\scantokens,\n% which we arrange to emulate with a temporary file in ordinary TeX.\n\\ifx\\eTeXversion\\thisisundefined\n  \\newwrite\\macscribble\n  \\def\\scantokens#1{%\n    \\toks0={#1}%\n    \\immediate\\openout\\macscribble=\\jobname.tmp\n    \\immediate\\write\\macscribble{\\the\\toks0}%\n    \\immediate\\closeout\\macscribble\n    \\input \\jobname.tmp\n  }\n\\fi\n\n\\def\\scanmacro#1{\\begingroup\n  \\newlinechar`\\^^M\n  \\let\\xeatspaces\\eatspaces\n  %\n  % Undo catcode changes of \\startcontents and \\doprintindex\n  % When called from @insertcopying or (short)caption, we need active\n  % backslash to get it printed correctly.  Previously, we had\n  % \\catcode`\\\\=\\other instead.  We'll see whether a problem appears\n  % with macro expansion.\t\t\t\t--kasal, 19aug04\n  \\catcode`\\@=0 \\catcode`\\\\=\\active \\escapechar=`\\@\n  %\n  % ... and for \\example:\n  \\spaceisspace\n  %\n  % The \\empty here causes a following catcode 5 newline to be eaten as\n  % part of reading whitespace after a control sequence.  It does not\n  % eat a catcode 13 newline.  There's no good way to handle the two\n  % cases (untried: maybe e-TeX's \\everyeof could help, though plain TeX\n  % would then have different behavior).  See the Macro Details node in\n  % the manual for the workaround we recommend for macros and\n  % line-oriented commands.\n  % \n  \\scantokens{#1\\empty}%\n\\endgroup}\n\n\\def\\scanexp#1{%\n  \\edef\\temp{\\noexpand\\scanmacro{#1}}%\n  \\temp\n}\n\n\\newcount\\paramno   % Count of parameters\n\\newtoks\\macname    % Macro name\n\\newif\\ifrecursive  % Is it recursive?\n\n% List of all defined macros in the form\n%    \\definedummyword\\macro1\\definedummyword\\macro2...\n% Currently is also contains all @aliases; the list can be split\n% if there is a need.\n\\def\\macrolist{}\n\n% Add the macro to \\macrolist\n\\def\\addtomacrolist#1{\\expandafter \\addtomacrolistxxx \\csname#1\\endcsname}\n\\def\\addtomacrolistxxx#1{%\n     \\toks0 = \\expandafter{\\macrolist\\definedummyword#1}%\n     \\xdef\\macrolist{\\the\\toks0}%\n}\n\n% Utility routines.\n% This does \\let #1 = #2, with \\csnames; that is,\n%   \\let \\csname#1\\endcsname = \\csname#2\\endcsname\n% (except of course we have to play expansion games).\n%\n\\def\\cslet#1#2{%\n  \\expandafter\\let\n  \\csname#1\\expandafter\\endcsname\n  \\csname#2\\endcsname\n}\n\n% Trim leading and trailing spaces off a string.\n% Concepts from aro-bend problem 15 (see CTAN).\n{\\catcode`\\@=11\n\\gdef\\eatspaces #1{\\expandafter\\trim@\\expandafter{#1 }}\n\\gdef\\trim@ #1{\\trim@@ @#1 @ #1 @ @@}\n\\gdef\\trim@@ #1@ #2@ #3@@{\\trim@@@\\empty #2 @}\n\\def\\unbrace#1{#1}\n\\unbrace{\\gdef\\trim@@@ #1 } #2@{#1}\n}\n\n% Trim a single trailing ^^M off a string.\n{\\catcode`\\^^M=\\other \\catcode`\\Q=3%\n\\gdef\\eatcr #1{\\eatcra #1Q^^MQ}%\n\\gdef\\eatcra#1^^MQ{\\eatcrb#1Q}%\n\\gdef\\eatcrb#1Q#2Q{#1}%\n}\n\n% Macro bodies are absorbed as an argument in a context where\n% all characters are catcode 10, 11 or 12, except \\ which is active\n% (as in normal texinfo). It is necessary to change the definition of \\\n% to recognize macro arguments; this is the job of \\mbodybackslash.\n%\n% Non-ASCII encodings make 8-bit characters active, so un-activate\n% them to avoid their expansion.  Must do this non-globally, to\n% confine the change to the current group.\n%\n% It's necessary to have hard CRs when the macro is executed. This is\n% done by making ^^M (\\endlinechar) catcode 12 when reading the macro\n% body, and then making it the \\newlinechar in \\scanmacro.\n%\n\\def\\scanctxt{% used as subroutine\n  \\catcode`\\\"=\\other\n  \\catcode`\\+=\\other\n  \\catcode`\\<=\\other\n  \\catcode`\\>=\\other\n  \\catcode`\\@=\\other\n  \\catcode`\\^=\\other\n  \\catcode`\\_=\\other\n  \\catcode`\\|=\\other\n  \\catcode`\\~=\\other\n  \\ifx\\declaredencoding\\ascii \\else \\setnonasciicharscatcodenonglobal\\other \\fi\n}\n\n\\def\\scanargctxt{% used for copying and captions, not macros.\n  \\scanctxt\n  \\catcode`\\\\=\\other\n  \\catcode`\\^^M=\\other\n}\n\n\\def\\macrobodyctxt{% used for @macro definitions\n  \\scanctxt\n  \\catcode`\\{=\\other\n  \\catcode`\\}=\\other\n  \\catcode`\\^^M=\\other\n  \\usembodybackslash\n}\n\n\\def\\macroargctxt{% used when scanning invocations\n  \\scanctxt\n  \\catcode`\\\\=0\n}\n% why catcode 0 for \\ in the above?  To recognize \\\\ \\{ \\} as \"escapes\"\n% for the single characters \\ { }.  Thus, we end up with the \"commands\"\n% that would be written @\\ @{ @} in a Texinfo document.\n% \n% We already have @{ and @}.  For @\\, we define it here, and only for\n% this purpose, to produce a typewriter backslash (so, the @\\ that we\n% define for @math can't be used with @macro calls):\n%\n\\def\\\\{\\normalbackslash}%\n% \n% We would like to do this for \\, too, since that is what makeinfo does.\n% But it is not possible, because Texinfo already has a command @, for a\n% cedilla accent.  Documents must use @comma{} instead.\n%\n% \\anythingelse will almost certainly be an error of some kind.\n\n\n% \\mbodybackslash is the definition of \\ in @macro bodies.\n% It maps \\foo\\ => \\csname macarg.foo\\endcsname => #N\n% where N is the macro parameter number.\n% We define \\csname macarg.\\endcsname to be \\realbackslash, so\n% \\\\ in macro replacement text gets you a backslash.\n%\n{\\catcode`@=0 @catcode`@\\=@active\n @gdef@usembodybackslash{@let\\=@mbodybackslash}\n @gdef@mbodybackslash#1\\{@csname macarg.#1@endcsname}\n}\n\\expandafter\\def\\csname macarg.\\endcsname{\\realbackslash}\n\n\\def\\margbackslash#1{\\char`\\#1 }\n\n\\def\\macro{\\recursivefalse\\parsearg\\macroxxx}\n\\def\\rmacro{\\recursivetrue\\parsearg\\macroxxx}\n\n\\def\\macroxxx#1{%\n  \\getargs{#1}% now \\macname is the macname and \\argl the arglist\n  \\ifx\\argl\\empty       % no arguments\n     \\paramno=0\\relax\n  \\else\n     \\expandafter\\parsemargdef \\argl;%\n     \\if\\paramno>256\\relax\n       \\ifx\\eTeXversion\\thisisundefined\n         \\errhelp = \\EMsimple\n         \\errmessage{You need eTeX to compile a file with macros with more than 256 arguments}\n       \\fi\n     \\fi\n  \\fi\n  \\if1\\csname ismacro.\\the\\macname\\endcsname\n     \\message{Warning: redefining \\the\\macname}%\n  \\else\n     \\expandafter\\ifx\\csname \\the\\macname\\endcsname \\relax\n     \\else \\errmessage{Macro name \\the\\macname\\space already defined}\\fi\n     \\global\\cslet{macsave.\\the\\macname}{\\the\\macname}%\n     \\global\\expandafter\\let\\csname ismacro.\\the\\macname\\endcsname=1%\n     \\addtomacrolist{\\the\\macname}%\n  \\fi\n  \\begingroup \\macrobodyctxt\n  \\ifrecursive \\expandafter\\parsermacbody\n  \\else \\expandafter\\parsemacbody\n  \\fi}\n\n\\parseargdef\\unmacro{%\n  \\if1\\csname ismacro.#1\\endcsname\n    \\global\\cslet{#1}{macsave.#1}%\n    \\global\\expandafter\\let \\csname ismacro.#1\\endcsname=0%\n    % Remove the macro name from \\macrolist:\n    \\begingroup\n      \\expandafter\\let\\csname#1\\endcsname \\relax\n      \\let\\definedummyword\\unmacrodo\n      \\xdef\\macrolist{\\macrolist}%\n    \\endgroup\n  \\else\n    \\errmessage{Macro #1 not defined}%\n  \\fi\n}\n\n% Called by \\do from \\dounmacro on each macro.  The idea is to omit any\n% macro definitions that have been changed to \\relax.\n%\n\\def\\unmacrodo#1{%\n  \\ifx #1\\relax\n    % remove this\n  \\else\n    \\noexpand\\definedummyword \\noexpand#1%\n  \\fi\n}\n\n% This makes use of the obscure feature that if the last token of a\n% <parameter list> is #, then the preceding argument is delimited by\n% an opening brace, and that opening brace is not consumed.\n\\def\\getargs#1{\\getargsxxx#1{}}\n\\def\\getargsxxx#1#{\\getmacname #1 \\relax\\getmacargs}\n\\def\\getmacname#1 #2\\relax{\\macname={#1}}\n\\def\\getmacargs#1{\\def\\argl{#1}}\n\n% For macro processing make @ a letter so that we can make Texinfo private macro names.\n\\edef\\texiatcatcode{\\the\\catcode`\\@}\n\\catcode `@=11\\relax\n\n% Parse the optional {params} list.  Set up \\paramno and \\paramlist\n% so \\defmacro knows what to do.  Define \\macarg.BLAH for each BLAH\n% in the params list to some hook where the argument si to be expanded.  If\n% there are less than 10 arguments that hook is to be replaced by ##N where N\n% is the position in that list, that is to say the macro arguments are to be\n% defined `a la TeX in the macro body.  \n%\n% That gets used by \\mbodybackslash (above).\n%\n% We need to get `macro parameter char #' into several definitions.\n% The technique used is stolen from LaTeX: let \\hash be something\n% unexpandable, insert that wherever you need a #, and then redefine\n% it to # just before using the token list produced.\n%\n% The same technique is used to protect \\eatspaces till just before\n% the macro is used.\n%\n% If there are 10 or more arguments, a different technique is used, where the\n% hook remains in the body, and when macro is to be expanded the body is\n% processed again to replace the arguments.\n%\n% In that case, the hook is \\the\\toks N-1, and we simply set \\toks N-1 to the\n% argument N value and then \\edef  the body (nothing else will expand because of\n% the catcode regime underwhich the body was input).\n%\n% If you compile with TeX (not eTeX), and you have macros with 10 or more\n% arguments, you need that no macro has more than 256 arguments, otherwise an\n% error is produced.\n\\def\\parsemargdef#1;{%\n  \\paramno=0\\def\\paramlist{}%\n  \\let\\hash\\relax\n  \\let\\xeatspaces\\relax\n  \\parsemargdefxxx#1,;,%\n  % In case that there are 10 or more arguments we parse again the arguments\n  % list to set new definitions for the \\macarg.BLAH macros corresponding to\n  % each BLAH argument. It was anyhow needed to parse already once this list\n  % in order to count the arguments, and as macros with at most 9 arguments\n  % are by far more frequent than macro with 10 or more arguments, defining\n  % twice the \\macarg.BLAH macros does not cost too much processing power.\n  \\ifnum\\paramno<10\\relax\\else\n    \\paramno0\\relax\n    \\parsemmanyargdef@@#1,;,% 10 or more arguments\n  \\fi\n}\n\\def\\parsemargdefxxx#1,{%\n  \\if#1;\\let\\next=\\relax\n  \\else \\let\\next=\\parsemargdefxxx\n    \\advance\\paramno by 1\n    \\expandafter\\edef\\csname macarg.\\eatspaces{#1}\\endcsname\n        {\\xeatspaces{\\hash\\the\\paramno}}%\n    \\edef\\paramlist{\\paramlist\\hash\\the\\paramno,}%\n  \\fi\\next}\n\n\\def\\parsemmanyargdef@@#1,{%\n  \\if#1;\\let\\next=\\relax\n  \\else \n    \\let\\next=\\parsemmanyargdef@@\n    \\edef\\tempb{\\eatspaces{#1}}%\n    \\expandafter\\def\\expandafter\\tempa\n       \\expandafter{\\csname macarg.\\tempb\\endcsname}%\n    % Note that we need some extra \\noexpand\\noexpand, this is because we\n    % don't want \\the  to be expanded in the \\parsermacbody  as it uses an\n    % \\xdef .\n    \\expandafter\\edef\\tempa\n      {\\noexpand\\noexpand\\noexpand\\the\\toks\\the\\paramno}%\n    \\advance\\paramno by 1\\relax\n  \\fi\\next}\n\n% These two commands read recursive and nonrecursive macro bodies.\n% (They're different since rec and nonrec macros end differently.)\n%\n\n\\catcode `\\@\\texiatcatcode\n\\long\\def\\parsemacbody#1@end macro%\n{\\xdef\\temp{\\eatcr{#1}}\\endgroup\\defmacro}%\n\\long\\def\\parsermacbody#1@end rmacro%\n{\\xdef\\temp{\\eatcr{#1}}\\endgroup\\defmacro}%\n\\catcode `\\@=11\\relax\n\n\\let\\endargs@\\relax\n\\let\\nil@\\relax\n\\def\\nilm@{\\nil@}%\n\\long\\def\\nillm@{\\nil@}%\n\n% This macro is expanded during the Texinfo macro expansion, not during its\n% definition.  It gets all the arguments values and assigns them to macros\n% macarg.ARGNAME\n%\n% #1 is the macro name\n% #2 is the list of argument names\n% #3 is the list of argument values\n\\def\\getargvals@#1#2#3{%\n  \\def\\macargdeflist@{}%\n  \\def\\saveparamlist@{#2}% Need to keep a copy for parameter expansion.\n  \\def\\paramlist{#2,\\nil@}%\n  \\def\\macroname{#1}%\n  \\begingroup\n  \\macroargctxt\n  \\def\\argvaluelist{#3,\\nil@}%\n  \\def\\@tempa{#3}%\n  \\ifx\\@tempa\\empty\n    \\setemptyargvalues@\n  \\else\n    \\getargvals@@\n  \\fi\n}\n\n% \n\\def\\getargvals@@{%\n  \\ifx\\paramlist\\nilm@\n      % Some sanity check needed here that \\argvaluelist is also empty.\n      \\ifx\\argvaluelist\\nillm@\n      \\else\n        \\errhelp = \\EMsimple\n        \\errmessage{Too many arguments in macro `\\macroname'!}%\n      \\fi\n      \\let\\next\\macargexpandinbody@\n  \\else\n    \\ifx\\argvaluelist\\nillm@\n       % No more arguments values passed to macro.  Set remaining named-arg\n       % macros to empty.\n       \\let\\next\\setemptyargvalues@\n    \\else\n      % pop current arg name into \\@tempb\n      \\def\\@tempa##1{\\pop@{\\@tempb}{\\paramlist}##1\\endargs@}%\n      \\expandafter\\@tempa\\expandafter{\\paramlist}%\n       % pop current argument value into \\@tempc\n      \\def\\@tempa##1{\\longpop@{\\@tempc}{\\argvaluelist}##1\\endargs@}%\n      \\expandafter\\@tempa\\expandafter{\\argvaluelist}%\n       % Here \\@tempb is the current arg name and \\@tempc is the current arg value.\n       % First place the new argument macro definition into \\@tempd\n       \\expandafter\\macname\\expandafter{\\@tempc}%\n       \\expandafter\\let\\csname macarg.\\@tempb\\endcsname\\relax\n       \\expandafter\\def\\expandafter\\@tempe\\expandafter{%\n         \\csname macarg.\\@tempb\\endcsname}%\n       \\edef\\@tempd{\\long\\def\\@tempe{\\the\\macname}}%\n       \\push@\\@tempd\\macargdeflist@\n       \\let\\next\\getargvals@@\n    \\fi\n  \\fi\n  \\next\n}\n\n\\def\\push@#1#2{%\n  \\expandafter\\expandafter\\expandafter\\def\n  \\expandafter\\expandafter\\expandafter#2%\n  \\expandafter\\expandafter\\expandafter{%\n  \\expandafter#1#2}%\n}\n\n% Replace arguments by their values in the macro body, and place the result\n% in macro \\@tempa\n\\def\\macvalstoargs@{%\n  %  To do this we use the property that token registers that are \\the'ed\n  % within an \\edef  expand only once. So we are going to place all argument\n  % values into respective token registers.\n  %\n  % First we save the token context, and initialize argument numbering.\n  \\begingroup\n    \\paramno0\\relax\n    % Then, for each argument number #N, we place the corresponding argument\n    % value into a new token list register \\toks#N\n    \\expandafter\\putargsintokens@\\saveparamlist@,;,%\n    % Then, we expand the body so that argument are replaced by their\n    % values. The trick for values not to be expanded themselves is that they\n    % are within tokens and that tokens expand only once in an \\edef .\n    \\edef\\@tempc{\\csname mac.\\macroname .body\\endcsname}%\n    % Now we restore the token stack pointer to free the token list registers\n    % which we have used, but we make sure that expanded body is saved after\n    % group.\n    \\expandafter\n  \\endgroup\n  \\expandafter\\def\\expandafter\\@tempa\\expandafter{\\@tempc}%\n  }\n\n\\def\\macargexpandinbody@{% \n  %% Define the named-macro outside of this group and then close this group. \n  \\expandafter\n  \\endgroup\n  \\macargdeflist@\n  % First the replace in body the macro arguments by their values, the result\n  % is in \\@tempa .\n  \\macvalstoargs@\n  % Then we point at the \\norecurse or \\gobble (for recursive) macro value\n  % with \\@tempb .\n  \\expandafter\\let\\expandafter\\@tempb\\csname mac.\\macroname .recurse\\endcsname\n  % Depending on whether it is recursive or not, we need some tailing\n  % \\egroup .\n  \\ifx\\@tempb\\gobble\n     \\let\\@tempc\\relax\n  \\else\n     \\let\\@tempc\\egroup\n  \\fi\n  % And now we do the real job:\n  \\edef\\@tempd{\\noexpand\\@tempb{\\macroname}\\noexpand\\scanmacro{\\@tempa}\\@tempc}%\n  \\@tempd\n}\n\n\\def\\putargsintokens@#1,{%\n  \\if#1;\\let\\next\\relax\n  \\else\n    \\let\\next\\putargsintokens@\n    % First we allocate the new token list register, and give it a temporary\n    % alias \\@tempb .\n    \\toksdef\\@tempb\\the\\paramno\n    % Then we place the argument value into that token list register.\n    \\expandafter\\let\\expandafter\\@tempa\\csname macarg.#1\\endcsname\n    \\expandafter\\@tempb\\expandafter{\\@tempa}%\n    \\advance\\paramno by 1\\relax\n  \\fi\n  \\next\n}\n\n% Save the token stack pointer into macro #1\n\\def\\texisavetoksstackpoint#1{\\edef#1{\\the\\@cclvi}}\n% Restore the token stack pointer from number in macro #1\n\\def\\texirestoretoksstackpoint#1{\\expandafter\\mathchardef\\expandafter\\@cclvi#1\\relax}\n% newtoks that can be used non \\outer .\n\\def\\texinonouternewtoks{\\alloc@ 5\\toks \\toksdef \\@cclvi}\n\n% Tailing missing arguments are set to empty\n\\def\\setemptyargvalues@{%\n  \\ifx\\paramlist\\nilm@\n    \\let\\next\\macargexpandinbody@\n  \\else\n    \\expandafter\\setemptyargvaluesparser@\\paramlist\\endargs@\n    \\let\\next\\setemptyargvalues@\n  \\fi\n  \\next\n}\n\n\\def\\setemptyargvaluesparser@#1,#2\\endargs@{%\n  \\expandafter\\def\\expandafter\\@tempa\\expandafter{%\n    \\expandafter\\def\\csname macarg.#1\\endcsname{}}%\n  \\push@\\@tempa\\macargdeflist@\n  \\def\\paramlist{#2}%\n}\n\n% #1 is the element target macro\n% #2 is the list macro\n% #3,#4\\endargs@ is the list value\n\\def\\pop@#1#2#3,#4\\endargs@{%\n   \\def#1{#3}%\n   \\def#2{#4}%\n}\n\\long\\def\\longpop@#1#2#3,#4\\endargs@{%\n   \\long\\def#1{#3}%\n   \\long\\def#2{#4}%\n}\n\n% This defines a Texinfo @macro. There are eight cases: recursive and\n% nonrecursive macros of zero, one, up to nine, and many arguments.\n% Much magic with \\expandafter here.\n% \\xdef is used so that macro definitions will survive the file\n% they're defined in; @include reads the file inside a group.\n%\n\\def\\defmacro{%\n  \\let\\hash=##% convert placeholders to macro parameter chars\n  \\ifrecursive\n    \\ifcase\\paramno\n    % 0\n      \\expandafter\\xdef\\csname\\the\\macname\\endcsname{%\n        \\noexpand\\scanmacro{\\temp}}%\n    \\or % 1\n      \\expandafter\\xdef\\csname\\the\\macname\\endcsname{%\n         \\bgroup\\noexpand\\macroargctxt\n         \\noexpand\\braceorline\n         \\expandafter\\noexpand\\csname\\the\\macname xxx\\endcsname}%\n      \\expandafter\\xdef\\csname\\the\\macname xxx\\endcsname##1{%\n         \\egroup\\noexpand\\scanmacro{\\temp}}%\n    \\else\n      \\ifnum\\paramno<10\\relax % at most 9\n        \\expandafter\\xdef\\csname\\the\\macname\\endcsname{%\n           \\bgroup\\noexpand\\macroargctxt\n           \\noexpand\\csname\\the\\macname xx\\endcsname}%\n        \\expandafter\\xdef\\csname\\the\\macname xx\\endcsname##1{%\n            \\expandafter\\noexpand\\csname\\the\\macname xxx\\endcsname ##1,}%\n        \\expandafter\\expandafter\n        \\expandafter\\xdef\n        \\expandafter\\expandafter\n          \\csname\\the\\macname xxx\\endcsname\n            \\paramlist{\\egroup\\noexpand\\scanmacro{\\temp}}%\n      \\else % 10 or more\n        \\expandafter\\xdef\\csname\\the\\macname\\endcsname{%\n          \\noexpand\\getargvals@{\\the\\macname}{\\argl}%\n        }%    \n        \\global\\expandafter\\let\\csname mac.\\the\\macname .body\\endcsname\\temp\n        \\global\\expandafter\\let\\csname mac.\\the\\macname .recurse\\endcsname\\gobble\n      \\fi\n    \\fi\n  \\else\n    \\ifcase\\paramno\n    % 0\n      \\expandafter\\xdef\\csname\\the\\macname\\endcsname{%\n        \\noexpand\\norecurse{\\the\\macname}%\n        \\noexpand\\scanmacro{\\temp}\\egroup}%\n    \\or % 1\n      \\expandafter\\xdef\\csname\\the\\macname\\endcsname{%\n         \\bgroup\\noexpand\\macroargctxt\n         \\noexpand\\braceorline\n         \\expandafter\\noexpand\\csname\\the\\macname xxx\\endcsname}%\n      \\expandafter\\xdef\\csname\\the\\macname xxx\\endcsname##1{%\n        \\egroup\n        \\noexpand\\norecurse{\\the\\macname}%\n        \\noexpand\\scanmacro{\\temp}\\egroup}%\n    \\else % at most 9\n      \\ifnum\\paramno<10\\relax\n        \\expandafter\\xdef\\csname\\the\\macname\\endcsname{%\n           \\bgroup\\noexpand\\macroargctxt\n           \\expandafter\\noexpand\\csname\\the\\macname xx\\endcsname}%\n        \\expandafter\\xdef\\csname\\the\\macname xx\\endcsname##1{%\n            \\expandafter\\noexpand\\csname\\the\\macname xxx\\endcsname ##1,}%\n        \\expandafter\\expandafter\n        \\expandafter\\xdef\n        \\expandafter\\expandafter\n        \\csname\\the\\macname xxx\\endcsname\n        \\paramlist{%\n            \\egroup\n            \\noexpand\\norecurse{\\the\\macname}%\n            \\noexpand\\scanmacro{\\temp}\\egroup}%\n      \\else % 10 or more:\n        \\expandafter\\xdef\\csname\\the\\macname\\endcsname{%\n          \\noexpand\\getargvals@{\\the\\macname}{\\argl}%\n        }%\n        \\global\\expandafter\\let\\csname mac.\\the\\macname .body\\endcsname\\temp\n        \\global\\expandafter\\let\\csname mac.\\the\\macname .recurse\\endcsname\\norecurse\n      \\fi\n    \\fi\n  \\fi}\n\n\\catcode `\\@\\texiatcatcode\\relax\n\n\\def\\norecurse#1{\\bgroup\\cslet{#1}{macsave.#1}}\n\n% \\braceorline decides whether the next nonwhitespace character is a\n% {.  If so it reads up to the closing }, if not, it reads the whole\n% line.  Whatever was read is then fed to the next control sequence\n% as an argument (by \\parsebrace or \\parsearg).\n% \n\\def\\braceorline#1{\\let\\macnamexxx=#1\\futurelet\\nchar\\braceorlinexxx}\n\\def\\braceorlinexxx{%\n  \\ifx\\nchar\\bgroup\\else\n    \\expandafter\\parsearg\n  \\fi \\macnamexxx}\n\n\n% @alias.\n% We need some trickery to remove the optional spaces around the equal\n% sign.  Make them active and then expand them all to nothing.\n%\n\\def\\alias{\\parseargusing\\obeyspaces\\aliasxxx}\n\\def\\aliasxxx #1{\\aliasyyy#1\\relax}\n\\def\\aliasyyy #1=#2\\relax{%\n  {%\n    \\expandafter\\let\\obeyedspace=\\empty\n    \\addtomacrolist{#1}%\n    \\xdef\\next{\\global\\let\\makecsname{#1}=\\makecsname{#2}}%\n  }%\n  \\next\n}\n\n\n\\message{cross references,}\n\n\\newwrite\\auxfile\n\\newif\\ifhavexrefs    % True if xref values are known.\n\\newif\\ifwarnedxrefs  % True if we warned once that they aren't known.\n\n% @inforef is relatively simple.\n\\def\\inforef #1{\\inforefzzz #1,,,,**}\n\\def\\inforefzzz #1,#2,#3,#4**{%\n  \\putwordSee{} \\putwordInfo{} \\putwordfile{} \\file{\\ignorespaces #3{}},\n  node \\samp{\\ignorespaces#1{}}}\n\n% @node's only job in TeX is to define \\lastnode, which is used in\n% cross-references.  The @node line might or might not have commas, and\n% might or might not have spaces before the first comma, like:\n% @node foo , bar , ...\n% We don't want such trailing spaces in the node name.\n%\n\\parseargdef\\node{\\checkenv{}\\donode #1 ,\\finishnodeparse}\n%\n% also remove a trailing comma, in case of something like this:\n% @node Help-Cross,  ,  , Cross-refs\n\\def\\donode#1 ,#2\\finishnodeparse{\\dodonode #1,\\finishnodeparse}\n\\def\\dodonode#1,#2\\finishnodeparse{\\gdef\\lastnode{#1}}\n\n\\let\\nwnode=\\node\n\\let\\lastnode=\\empty\n\n% Write a cross-reference definition for the current node.  #1 is the\n% type (Ynumbered, Yappendix, Ynothing).\n%\n\\def\\donoderef#1{%\n  \\ifx\\lastnode\\empty\\else\n    \\setref{\\lastnode}{#1}%\n    \\global\\let\\lastnode=\\empty\n  \\fi\n}\n\n% @anchor{NAME} -- define xref target at arbitrary point.\n%\n\\newcount\\savesfregister\n%\n\\def\\savesf{\\relax \\ifhmode \\savesfregister=\\spacefactor \\fi}\n\\def\\restoresf{\\relax \\ifhmode \\spacefactor=\\savesfregister \\fi}\n\\def\\anchor#1{\\savesf \\setref{#1}{Ynothing}\\restoresf \\ignorespaces}\n\n% \\setref{NAME}{SNT} defines a cross-reference point NAME (a node or an\n% anchor), which consists of three parts:\n% 1) NAME-title - the current sectioning name taken from \\lastsection,\n%                 or the anchor name.\n% 2) NAME-snt   - section number and type, passed as the SNT arg, or\n%                 empty for anchors.\n% 3) NAME-pg    - the page number.\n%\n% This is called from \\donoderef, \\anchor, and \\dofloat.  In the case of\n% floats, there is an additional part, which is not written here:\n% 4) NAME-lof   - the text as it should appear in a @listoffloats.\n%\n\\def\\setref#1#2{%\n  \\pdfmkdest{#1}%\n  \\iflinks\n    {%\n      \\atdummies  % preserve commands, but don't expand them\n      \\edef\\writexrdef##1##2{%\n\t\\write\\auxfile{@xrdef{#1-% #1 of \\setref, expanded by the \\edef\n\t  ##1}{##2}}% these are parameters of \\writexrdef\n      }%\n      \\toks0 = \\expandafter{\\lastsection}%\n      \\immediate \\writexrdef{title}{\\the\\toks0 }%\n      \\immediate \\writexrdef{snt}{\\csname #2\\endcsname}% \\Ynumbered etc.\n      \\safewhatsit{\\writexrdef{pg}{\\folio}}% will be written later, at \\shipout\n    }%\n  \\fi\n}\n\n% @xrefautosectiontitle on|off says whether @section(ing) names are used\n% automatically in xrefs, if the third arg is not explicitly specified.\n% This was provided as a \"secret\" @set xref-automatic-section-title\n% variable, now it's official.\n% \n\\parseargdef\\xrefautomaticsectiontitle{%\n  \\def\\temp{#1}%\n  \\ifx\\temp\\onword\n    \\expandafter\\let\\csname SETxref-automatic-section-title\\endcsname\n      = \\empty\n  \\else\\ifx\\temp\\offword\n    \\expandafter\\let\\csname SETxref-automatic-section-title\\endcsname\n      = \\relax\n  \\else\n    \\errhelp = \\EMsimple\n    \\errmessage{Unknown @xrefautomaticsectiontitle value `\\temp',\n                must be on|off}%\n  \\fi\\fi\n}\n\n% \f\n% @xref, @pxref, and @ref generate cross-references.  For \\xrefX, #1 is\n% the node name, #2 the name of the Info cross-reference, #3 the printed\n% node name, #4 the name of the Info file, #5 the name of the printed\n% manual.  All but the node name can be omitted.\n%\n\\def\\pxref#1{\\putwordsee{} \\xrefX[#1,,,,,,,]}\n\\def\\xref#1{\\putwordSee{} \\xrefX[#1,,,,,,,]}\n\\def\\ref#1{\\xrefX[#1,,,,,,,]}\n%\n\\newbox\\toprefbox\n\\newbox\\printedrefnamebox\n\\newbox\\infofilenamebox\n\\newbox\\printedmanualbox\n%\n\\def\\xrefX[#1,#2,#3,#4,#5,#6]{\\begingroup\n  \\unsepspaces\n  %\n  % Get args without leading/trailing spaces.\n  \\def\\printedrefname{\\ignorespaces #3}%\n  \\setbox\\printedrefnamebox = \\hbox{\\printedrefname\\unskip}%\n  %\n  \\def\\infofilename{\\ignorespaces #4}%\n  \\setbox\\infofilenamebox = \\hbox{\\infofilename\\unskip}%\n  %\n  \\def\\printedmanual{\\ignorespaces #5}%\n  \\setbox\\printedmanualbox  = \\hbox{\\printedmanual\\unskip}%\n  %\n  % If the printed reference name (arg #3) was not explicitly given in\n  % the @xref, figure out what we want to use.\n  \\ifdim \\wd\\printedrefnamebox = 0pt\n    % No printed node name was explicitly given.\n    \\expandafter\\ifx\\csname SETxref-automatic-section-title\\endcsname \\relax\n      % Not auto section-title: use node name inside the square brackets.\n      \\def\\printedrefname{\\ignorespaces #1}%\n    \\else\n      % Auto section-title: use chapter/section title inside\n      % the square brackets if we have it.\n      \\ifdim \\wd\\printedmanualbox > 0pt\n        % It is in another manual, so we don't have it; use node name.\n        \\def\\printedrefname{\\ignorespaces #1}%\n      \\else\n        \\ifhavexrefs\n          % We (should) know the real title if we have the xref values.\n          \\def\\printedrefname{\\refx{#1-title}{}}%\n        \\else\n          % Otherwise just copy the Info node name.\n          \\def\\printedrefname{\\ignorespaces #1}%\n        \\fi%\n      \\fi\n    \\fi\n  \\fi\n  %\n  % Make link in pdf output.\n  \\ifpdf\n    {\\indexnofonts\n     \\turnoffactive\n     \\makevalueexpandable\n     % This expands tokens, so do it after making catcode changes, so _\n     % etc. don't get their TeX definitions.  This ignores all spaces in\n     % #4, including (wrongly) those in the middle of the filename.\n     \\getfilename{#4}%\n     %\n     % This (wrongly) does not take account of leading or trailing\n     % spaces in #1, which should be ignored.\n     \\edef\\pdfxrefdest{#1}%\n     \\ifx\\pdfxrefdest\\empty\n       \\def\\pdfxrefdest{Top}% no empty targets\n     \\else\n       \\txiescapepdf\\pdfxrefdest  % escape PDF special chars\n     \\fi\n     %\n     \\leavevmode\n     \\startlink attr{/Border [0 0 0]}%\n     \\ifnum\\filenamelength>0\n       goto file{\\the\\filename.pdf} name{\\pdfxrefdest}%\n     \\else\n       goto name{\\pdfmkpgn{\\pdfxrefdest}}%\n     \\fi\n    }%\n    \\setcolor{\\linkcolor}%\n  \\fi\n  %\n  % Float references are printed completely differently: \"Figure 1.2\"\n  % instead of \"[somenode], p.3\".  We distinguish them by the\n  % LABEL-title being set to a magic string.\n  {%\n    % Have to otherify everything special to allow the \\csname to\n    % include an _ in the xref name, etc.\n    \\indexnofonts\n    \\turnoffactive\n    \\expandafter\\global\\expandafter\\let\\expandafter\\Xthisreftitle\n      \\csname XR#1-title\\endcsname\n  }%\n  \\iffloat\\Xthisreftitle\n    % If the user specified the print name (third arg) to the ref,\n    % print it instead of our usual \"Figure 1.2\".\n    \\ifdim\\wd\\printedrefnamebox = 0pt\n      \\refx{#1-snt}{}%\n    \\else\n      \\printedrefname\n    \\fi\n    %\n    % If the user also gave the printed manual name (fifth arg), append\n    % \"in MANUALNAME\".\n    \\ifdim \\wd\\printedmanualbox > 0pt\n      \\space \\putwordin{} \\cite{\\printedmanual}%\n    \\fi\n  \\else\n    % node/anchor (non-float) references.\n    % \n    % If we use \\unhbox to print the node names, TeX does not insert\n    % empty discretionaries after hyphens, which means that it will not\n    % find a line break at a hyphen in a node names.  Since some manuals\n    % are best written with fairly long node names, containing hyphens,\n    % this is a loss.  Therefore, we give the text of the node name\n    % again, so it is as if TeX is seeing it for the first time.\n    % \n    \\ifdim \\wd\\printedmanualbox > 0pt\n      % Cross-manual reference with a printed manual name.\n      % \n      \\crossmanualxref{\\cite{\\printedmanual\\unskip}}%\n    %\n    \\else\\ifdim \\wd\\infofilenamebox > 0pt\n      % Cross-manual reference with only an info filename (arg 4), no\n      % printed manual name (arg 5).  This is essentially the same as\n      % the case above; we output the filename, since we have nothing else.\n      % \n      \\crossmanualxref{\\code{\\infofilename\\unskip}}%\n    %\n    \\else\n      % Reference within this manual.\n      %\n      % _ (for example) has to be the character _ for the purposes of the\n      % control sequence corresponding to the node, but it has to expand\n      % into the usual \\leavevmode...\\vrule stuff for purposes of\n      % printing. So we \\turnoffactive for the \\refx-snt, back on for the\n      % printing, back off for the \\refx-pg.\n      {\\turnoffactive\n       % Only output a following space if the -snt ref is nonempty; for\n       % @unnumbered and @anchor, it won't be.\n       \\setbox2 = \\hbox{\\ignorespaces \\refx{#1-snt}{}}%\n       \\ifdim \\wd2 > 0pt \\refx{#1-snt}\\space\\fi\n      }%\n      % output the `[mynode]' via the macro below so it can be overridden.\n      \\xrefprintnodename\\printedrefname\n      %\n      % But we always want a comma and a space:\n      ,\\space\n      %\n      % output the `page 3'.\n      \\turnoffactive \\putwordpage\\tie\\refx{#1-pg}{}%\n    \\fi\\fi\n  \\fi\n  \\endlink\n\\endgroup}\n\n% Output a cross-manual xref to #1.  Used just above (twice).\n% \n% Only include the text \"Section ``foo'' in\" if the foo is neither\n% missing or Top.  Thus, @xref{,,,foo,The Foo Manual} outputs simply\n% \"see The Foo Manual\", the idea being to refer to the whole manual.\n% \n% But, this being TeX, we can't easily compare our node name against the\n% string \"Top\" while ignoring the possible spaces before and after in\n% the input.  By adding the arbitrary 7sp below, we make it much less\n% likely that a real node name would have the same width as \"Top\" (e.g.,\n% in a monospaced font).  Hopefully it will never happen in practice.\n% \n% For the same basic reason, we retypeset the \"Top\" at every\n% reference, since the current font is indeterminate.\n% \n\\def\\crossmanualxref#1{%\n  \\setbox\\toprefbox = \\hbox{Top\\kern7sp}%\n  \\setbox2 = \\hbox{\\ignorespaces \\printedrefname \\unskip \\kern7sp}%\n  \\ifdim \\wd2 > 7sp  % nonempty?\n    \\ifdim \\wd2 = \\wd\\toprefbox \\else  % same as Top?\n      \\putwordSection{} ``\\printedrefname'' \\putwordin{}\\space\n    \\fi\n  \\fi\n  #1%\n}\n\n% This macro is called from \\xrefX for the `[nodename]' part of xref\n% output.  It's a separate macro only so it can be changed more easily,\n% since square brackets don't work well in some documents.  Particularly\n% one that Bob is working on :).\n%\n\\def\\xrefprintnodename#1{[#1]}\n\n% Things referred to by \\setref.\n%\n\\def\\Ynothing{}\n\\def\\Yomitfromtoc{}\n\\def\\Ynumbered{%\n  \\ifnum\\secno=0\n    \\putwordChapter@tie \\the\\chapno\n  \\else \\ifnum\\subsecno=0\n    \\putwordSection@tie \\the\\chapno.\\the\\secno\n  \\else \\ifnum\\subsubsecno=0\n    \\putwordSection@tie \\the\\chapno.\\the\\secno.\\the\\subsecno\n  \\else\n    \\putwordSection@tie \\the\\chapno.\\the\\secno.\\the\\subsecno.\\the\\subsubsecno\n  \\fi\\fi\\fi\n}\n\\def\\Yappendix{%\n  \\ifnum\\secno=0\n     \\putwordAppendix@tie @char\\the\\appendixno{}%\n  \\else \\ifnum\\subsecno=0\n     \\putwordSection@tie @char\\the\\appendixno.\\the\\secno\n  \\else \\ifnum\\subsubsecno=0\n    \\putwordSection@tie @char\\the\\appendixno.\\the\\secno.\\the\\subsecno\n  \\else\n    \\putwordSection@tie\n      @char\\the\\appendixno.\\the\\secno.\\the\\subsecno.\\the\\subsubsecno\n  \\fi\\fi\\fi\n}\n\n% Define \\refx{NAME}{SUFFIX} to reference a cross-reference string named NAME.\n% If its value is nonempty, SUFFIX is output afterward.\n%\n\\def\\refx#1#2{%\n  {%\n    \\indexnofonts\n    \\otherbackslash\n    \\expandafter\\global\\expandafter\\let\\expandafter\\thisrefX\n      \\csname XR#1\\endcsname\n  }%\n  \\ifx\\thisrefX\\relax\n    % If not defined, say something at least.\n    \\angleleft un\\-de\\-fined\\angleright\n    \\iflinks\n      \\ifhavexrefs\n        {\\toks0 = {#1}% avoid expansion of possibly-complex value\n         \\message{\\linenumber Undefined cross reference `\\the\\toks0'.}}%\n      \\else\n        \\ifwarnedxrefs\\else\n          \\global\\warnedxrefstrue\n          \\message{Cross reference values unknown; you must run TeX again.}%\n        \\fi\n      \\fi\n    \\fi\n  \\else\n    % It's defined, so just use it.\n    \\thisrefX\n  \\fi\n  #2% Output the suffix in any case.\n}\n\n% This is the macro invoked by entries in the aux file.  Usually it's\n% just a \\def (we prepend XR to the control sequence name to avoid\n% collisions).  But if this is a float type, we have more work to do.\n%\n\\def\\xrdef#1#2{%\n  {% The node name might contain 8-bit characters, which in our current\n   % implementation are changed to commands like @'e.  Don't let these\n   % mess up the control sequence name.\n    \\indexnofonts\n    \\turnoffactive\n    \\xdef\\safexrefname{#1}%\n  }%\n  %\n  \\expandafter\\gdef\\csname XR\\safexrefname\\endcsname{#2}% remember this xref\n  %\n  % Was that xref control sequence that we just defined for a float?\n  \\expandafter\\iffloat\\csname XR\\safexrefname\\endcsname\n    % it was a float, and we have the (safe) float type in \\iffloattype.\n    \\expandafter\\let\\expandafter\\floatlist\n      \\csname floatlist\\iffloattype\\endcsname\n    %\n    % Is this the first time we've seen this float type?\n    \\expandafter\\ifx\\floatlist\\relax\n      \\toks0 = {\\do}% yes, so just \\do\n    \\else\n      % had it before, so preserve previous elements in list.\n      \\toks0 = \\expandafter{\\floatlist\\do}%\n    \\fi\n    %\n    % Remember this xref in the control sequence \\floatlistFLOATTYPE,\n    % for later use in \\listoffloats.\n    \\expandafter\\xdef\\csname floatlist\\iffloattype\\endcsname{\\the\\toks0\n      {\\safexrefname}}%\n  \\fi\n}\n\n% Read the last existing aux file, if any.  No error if none exists.\n%\n\\def\\tryauxfile{%\n  \\openin 1 \\jobname.aux\n  \\ifeof 1 \\else\n    \\readdatafile{aux}%\n    \\global\\havexrefstrue\n  \\fi\n  \\closein 1\n}\n\n\\def\\setupdatafile{%\n  \\catcode`\\^^@=\\other\n  \\catcode`\\^^A=\\other\n  \\catcode`\\^^B=\\other\n  \\catcode`\\^^C=\\other\n  \\catcode`\\^^D=\\other\n  \\catcode`\\^^E=\\other\n  \\catcode`\\^^F=\\other\n  \\catcode`\\^^G=\\other\n  \\catcode`\\^^H=\\other\n  \\catcode`\\^^K=\\other\n  \\catcode`\\^^L=\\other\n  \\catcode`\\^^N=\\other\n  \\catcode`\\^^P=\\other\n  \\catcode`\\^^Q=\\other\n  \\catcode`\\^^R=\\other\n  \\catcode`\\^^S=\\other\n  \\catcode`\\^^T=\\other\n  \\catcode`\\^^U=\\other\n  \\catcode`\\^^V=\\other\n  \\catcode`\\^^W=\\other\n  \\catcode`\\^^X=\\other\n  \\catcode`\\^^Z=\\other\n  \\catcode`\\^^[=\\other\n  \\catcode`\\^^\\=\\other\n  \\catcode`\\^^]=\\other\n  \\catcode`\\^^^=\\other\n  \\catcode`\\^^_=\\other\n  % It was suggested to set the catcode of ^ to 7, which would allow ^^e4 etc.\n  % in xref tags, i.e., node names.  But since ^^e4 notation isn't\n  % supported in the main text, it doesn't seem desirable.  Furthermore,\n  % that is not enough: for node names that actually contain a ^\n  % character, we would end up writing a line like this: 'xrdef {'hat\n  % b-title}{'hat b} and \\xrdef does a \\csname...\\endcsname on the first\n  % argument, and \\hat is not an expandable control sequence.  It could\n  % all be worked out, but why?  Either we support ^^ or we don't.\n  %\n  % The other change necessary for this was to define \\auxhat:\n  % \\def\\auxhat{\\def^{'hat }}% extra space so ok if followed by letter\n  % and then to call \\auxhat in \\setq.\n  %\n  \\catcode`\\^=\\other\n  %\n  % Special characters.  Should be turned off anyway, but...\n  \\catcode`\\~=\\other\n  \\catcode`\\[=\\other\n  \\catcode`\\]=\\other\n  \\catcode`\\\"=\\other\n  \\catcode`\\_=\\other\n  \\catcode`\\|=\\other\n  \\catcode`\\<=\\other\n  \\catcode`\\>=\\other\n  \\catcode`\\$=\\other\n  \\catcode`\\#=\\other\n  \\catcode`\\&=\\other\n  \\catcode`\\%=\\other\n  \\catcode`+=\\other % avoid \\+ for paranoia even though we've turned it off\n  %\n  % This is to support \\ in node names and titles, since the \\\n  % characters end up in a \\csname.  It's easier than\n  % leaving it active and making its active definition an actual \\\n  % character.  What I don't understand is why it works in the *value*\n  % of the xrdef.  Seems like it should be a catcode12 \\, and that\n  % should not typeset properly.  But it works, so I'm moving on for\n  % now.  --karl, 15jan04.\n  \\catcode`\\\\=\\other\n  %\n  % Make the characters 128-255 be printing characters.\n  {%\n    \\count1=128\n    \\def\\loop{%\n      \\catcode\\count1=\\other\n      \\advance\\count1 by 1\n      \\ifnum \\count1<256 \\loop \\fi\n    }%\n  }%\n  %\n  % @ is our escape character in .aux files, and we need braces.\n  \\catcode`\\{=1\n  \\catcode`\\}=2\n  \\catcode`\\@=0\n}\n\n\\def\\readdatafile#1{%\n\\begingroup\n  \\setupdatafile\n  \\input\\jobname.#1\n\\endgroup}\n\n\n\\message{insertions,}\n% including footnotes.\n\n\\newcount \\footnoteno\n\n% The trailing space in the following definition for supereject is\n% vital for proper filling; pages come out unaligned when you do a\n% pagealignmacro call if that space before the closing brace is\n% removed. (Generally, numeric constants should always be followed by a\n% space to prevent strange expansion errors.)\n\\def\\supereject{\\par\\penalty -20000\\footnoteno =0 }\n\n% @footnotestyle is meaningful for Info output only.\n\\let\\footnotestyle=\\comment\n\n{\\catcode `\\@=11\n%\n% Auto-number footnotes.  Otherwise like plain.\n\\gdef\\footnote{%\n  \\let\\indent=\\ptexindent\n  \\let\\noindent=\\ptexnoindent\n  \\global\\advance\\footnoteno by \\@ne\n  \\edef\\thisfootno{$^{\\the\\footnoteno}$}%\n  %\n  % In case the footnote comes at the end of a sentence, preserve the\n  % extra spacing after we do the footnote number.\n  \\let\\@sf\\empty\n  \\ifhmode\\edef\\@sf{\\spacefactor\\the\\spacefactor}\\ptexslash\\fi\n  %\n  % Remove inadvertent blank space before typesetting the footnote number.\n  \\unskip\n  \\thisfootno\\@sf\n  \\dofootnote\n}%\n\n% Don't bother with the trickery in plain.tex to not require the\n% footnote text as a parameter.  Our footnotes don't need to be so general.\n%\n% Oh yes, they do; otherwise, @ifset (and anything else that uses\n% \\parseargline) fails inside footnotes because the tokens are fixed when\n% the footnote is read.  --karl, 16nov96.\n%\n\\gdef\\dofootnote{%\n  \\insert\\footins\\bgroup\n  % We want to typeset this text as a normal paragraph, even if the\n  % footnote reference occurs in (for example) a display environment.\n  % So reset some parameters.\n  \\hsize=\\pagewidth\n  \\interlinepenalty\\interfootnotelinepenalty\n  \\splittopskip\\ht\\strutbox % top baseline for broken footnotes\n  \\splitmaxdepth\\dp\\strutbox\n  \\floatingpenalty\\@MM\n  \\leftskip\\z@skip\n  \\rightskip\\z@skip\n  \\spaceskip\\z@skip\n  \\xspaceskip\\z@skip\n  \\parindent\\defaultparindent\n  %\n  \\smallfonts \\rm\n  %\n  % Because we use hanging indentation in footnotes, a @noindent appears\n  % to exdent this text, so make it be a no-op.  makeinfo does not use\n  % hanging indentation so @noindent can still be needed within footnote\n  % text after an @example or the like (not that this is good style).\n  \\let\\noindent = \\relax\n  %\n  % Hang the footnote text off the number.  Use \\everypar in case the\n  % footnote extends for more than one paragraph.\n  \\everypar = {\\hang}%\n  \\textindent{\\thisfootno}%\n  %\n  % Don't crash into the line above the footnote text.  Since this\n  % expands into a box, it must come within the paragraph, lest it\n  % provide a place where TeX can split the footnote.\n  \\footstrut\n  %\n  % Invoke rest of plain TeX footnote routine.\n  \\futurelet\\next\\fo@t\n}\n}%end \\catcode `\\@=11\n\n% In case a @footnote appears in a vbox, save the footnote text and create\n% the real \\insert just after the vbox finished.  Otherwise, the insertion\n% would be lost.\n% Similarly, if a @footnote appears inside an alignment, save the footnote\n% text to a box and make the \\insert when a row of the table is finished.\n% And the same can be done for other insert classes.  --kasal, 16nov03.\n\n% Replace the \\insert primitive by a cheating macro.\n% Deeper inside, just make sure that the saved insertions are not spilled\n% out prematurely.\n%\n\\def\\startsavinginserts{%\n  \\ifx \\insert\\ptexinsert\n    \\let\\insert\\saveinsert\n  \\else\n    \\let\\checkinserts\\relax\n  \\fi\n}\n\n% This \\insert replacement works for both \\insert\\footins{foo} and\n% \\insert\\footins\\bgroup foo\\egroup, but it doesn't work for \\insert27{foo}.\n%\n\\def\\saveinsert#1{%\n  \\edef\\next{\\noexpand\\savetobox \\makeSAVEname#1}%\n  \\afterassignment\\next\n  % swallow the left brace\n  \\let\\temp =\n}\n\\def\\makeSAVEname#1{\\makecsname{SAVE\\expandafter\\gobble\\string#1}}\n\\def\\savetobox#1{\\global\\setbox#1 = \\vbox\\bgroup \\unvbox#1}\n\n\\def\\checksaveins#1{\\ifvoid#1\\else \\placesaveins#1\\fi}\n\n\\def\\placesaveins#1{%\n  \\ptexinsert \\csname\\expandafter\\gobblesave\\string#1\\endcsname\n    {\\box#1}%\n}\n\n% eat @SAVE -- beware, all of them have catcode \\other:\n{\n  \\def\\dospecials{\\do S\\do A\\do V\\do E} \\uncatcodespecials  %  ;-)\n  \\gdef\\gobblesave @SAVE{}\n}\n\n% initialization:\n\\def\\newsaveins #1{%\n  \\edef\\next{\\noexpand\\newsaveinsX \\makeSAVEname#1}%\n  \\next\n}\n\\def\\newsaveinsX #1{%\n  \\csname newbox\\endcsname #1%\n  \\expandafter\\def\\expandafter\\checkinserts\\expandafter{\\checkinserts\n    \\checksaveins #1}%\n}\n\n% initialize:\n\\let\\checkinserts\\empty\n\\newsaveins\\footins\n\\newsaveins\\margin\n\n\n% @image.  We use the macros from epsf.tex to support this.\n% If epsf.tex is not installed and @image is used, we complain.\n%\n% Check for and read epsf.tex up front.  If we read it only at @image\n% time, we might be inside a group, and then its definitions would get\n% undone and the next image would fail.\n\\openin 1 = epsf.tex\n\\ifeof 1 \\else\n  % Do not bother showing banner with epsf.tex v2.7k (available in\n  % doc/epsf.tex and on ctan).\n  \\def\\epsfannounce{\\toks0 = }%\n  \\input epsf.tex\n\\fi\n\\closein 1\n%\n% We will only complain once about lack of epsf.tex.\n\\newif\\ifwarnednoepsf\n\\newhelp\\noepsfhelp{epsf.tex must be installed for images to\n  work.  It is also included in the Texinfo distribution, or you can get\n  it from ftp://tug.org/tex/epsf.tex.}\n%\n\\def\\image#1{%\n  \\ifx\\epsfbox\\thisisundefined\n    \\ifwarnednoepsf \\else\n      \\errhelp = \\noepsfhelp\n      \\errmessage{epsf.tex not found, images will be ignored}%\n      \\global\\warnednoepsftrue\n    \\fi\n  \\else\n    \\imagexxx #1,,,,,\\finish\n  \\fi\n}\n%\n% Arguments to @image:\n% #1 is (mandatory) image filename; we tack on .eps extension.\n% #2 is (optional) width, #3 is (optional) height.\n% #4 is (ignored optional) html alt text.\n% #5 is (ignored optional) extension.\n% #6 is just the usual extra ignored arg for parsing stuff.\n\\newif\\ifimagevmode\n\\def\\imagexxx#1,#2,#3,#4,#5,#6\\finish{\\begingroup\n  \\catcode`\\^^M = 5     % in case we're inside an example\n  \\normalturnoffactive  % allow _ et al. in names\n  % If the image is by itself, center it.\n  \\ifvmode\n    \\imagevmodetrue\n  \\else \\ifx\\centersub\\centerV\n    % for @center @image, we need a vbox so we can have our vertical space\n    \\imagevmodetrue\n    \\vbox\\bgroup % vbox has better behavior than vtop herev\n  \\fi\\fi\n  %\n  \\ifimagevmode\n    \\nobreak\\medskip\n    % Usually we'll have text after the image which will insert\n    % \\parskip glue, so insert it here too to equalize the space\n    % above and below.\n    \\nobreak\\vskip\\parskip\n    \\nobreak\n  \\fi\n  %\n  % Leave vertical mode so that indentation from an enclosing\n  %  environment such as @quotation is respected.\n  % However, if we're at the top level, we don't want the\n  %  normal paragraph indentation.\n  % On the other hand, if we are in the case of @center @image, we don't\n  %  want to start a paragraph, which will create a hsize-width box and\n  %  eradicate the centering.\n  \\ifx\\centersub\\centerV\\else \\noindent \\fi\n  %\n  % Output the image.\n  \\ifpdf\n    \\dopdfimage{#1}{#2}{#3}%\n  \\else\n    % \\epsfbox itself resets \\epsf?size at each figure.\n    \\setbox0 = \\hbox{\\ignorespaces #2}\\ifdim\\wd0 > 0pt \\epsfxsize=#2\\relax \\fi\n    \\setbox0 = \\hbox{\\ignorespaces #3}\\ifdim\\wd0 > 0pt \\epsfysize=#3\\relax \\fi\n    \\epsfbox{#1.eps}%\n  \\fi\n  %\n  \\ifimagevmode\n    \\medskip  % space after a standalone image\n  \\fi  \n  \\ifx\\centersub\\centerV \\egroup \\fi\n\\endgroup}\n\n\n% @float FLOATTYPE,LABEL,LOC ... @end float for displayed figures, tables,\n% etc.  We don't actually implement floating yet, we always include the\n% float \"here\".  But it seemed the best name for the future.\n%\n\\envparseargdef\\float{\\eatcommaspace\\eatcommaspace\\dofloat#1, , ,\\finish}\n\n% There may be a space before second and/or third parameter; delete it.\n\\def\\eatcommaspace#1, {#1,}\n\n% #1 is the optional FLOATTYPE, the text label for this float, typically\n% \"Figure\", \"Table\", \"Example\", etc.  Can't contain commas.  If omitted,\n% this float will not be numbered and cannot be referred to.\n%\n% #2 is the optional xref label.  Also must be present for the float to\n% be referable.\n%\n% #3 is the optional positioning argument; for now, it is ignored.  It\n% will somehow specify the positions allowed to float to (here, top, bottom).\n%\n% We keep a separate counter for each FLOATTYPE, which we reset at each\n% chapter-level command.\n\\let\\resetallfloatnos=\\empty\n%\n\\def\\dofloat#1,#2,#3,#4\\finish{%\n  \\let\\thiscaption=\\empty\n  \\let\\thisshortcaption=\\empty\n  %\n  % don't lose footnotes inside @float.\n  %\n  % BEWARE: when the floats start float, we have to issue warning whenever an\n  % insert appears inside a float which could possibly float. --kasal, 26may04\n  %\n  \\startsavinginserts\n  %\n  % We can't be used inside a paragraph.\n  \\par\n  %\n  \\vtop\\bgroup\n    \\def\\floattype{#1}%\n    \\def\\floatlabel{#2}%\n    \\def\\floatloc{#3}% we do nothing with this yet.\n    %\n    \\ifx\\floattype\\empty\n      \\let\\safefloattype=\\empty\n    \\else\n      {%\n        % the floattype might have accents or other special characters,\n        % but we need to use it in a control sequence name.\n        \\indexnofonts\n        \\turnoffactive\n        \\xdef\\safefloattype{\\floattype}%\n      }%\n    \\fi\n    %\n    % If label is given but no type, we handle that as the empty type.\n    \\ifx\\floatlabel\\empty \\else\n      % We want each FLOATTYPE to be numbered separately (Figure 1,\n      % Table 1, Figure 2, ...).  (And if no label, no number.)\n      %\n      \\expandafter\\getfloatno\\csname\\safefloattype floatno\\endcsname\n      \\global\\advance\\floatno by 1\n      %\n      {%\n        % This magic value for \\lastsection is output by \\setref as the\n        % XREFLABEL-title value.  \\xrefX uses it to distinguish float\n        % labels (which have a completely different output format) from\n        % node and anchor labels.  And \\xrdef uses it to construct the\n        % lists of floats.\n        %\n        \\edef\\lastsection{\\floatmagic=\\safefloattype}%\n        \\setref{\\floatlabel}{Yfloat}%\n      }%\n    \\fi\n    %\n    % start with \\parskip glue, I guess.\n    \\vskip\\parskip\n    %\n    % Don't suppress indentation if a float happens to start a section.\n    \\restorefirstparagraphindent\n}\n\n% we have these possibilities:\n% @float Foo,lbl & @caption{Cap}: Foo 1.1: Cap\n% @float Foo,lbl & no caption:    Foo 1.1\n% @float Foo & @caption{Cap}:     Foo: Cap\n% @float Foo & no caption:        Foo\n% @float ,lbl & Caption{Cap}:     1.1: Cap\n% @float ,lbl & no caption:       1.1\n% @float & @caption{Cap}:         Cap\n% @float & no caption:\n%\n\\def\\Efloat{%\n    \\let\\floatident = \\empty\n    %\n    % In all cases, if we have a float type, it comes first.\n    \\ifx\\floattype\\empty \\else \\def\\floatident{\\floattype}\\fi\n    %\n    % If we have an xref label, the number comes next.\n    \\ifx\\floatlabel\\empty \\else\n      \\ifx\\floattype\\empty \\else % if also had float type, need tie first.\n        \\appendtomacro\\floatident{\\tie}%\n      \\fi\n      % the number.\n      \\appendtomacro\\floatident{\\chaplevelprefix\\the\\floatno}%\n    \\fi\n    %\n    % Start the printed caption with what we've constructed in\n    % \\floatident, but keep it separate; we need \\floatident again.\n    \\let\\captionline = \\floatident\n    %\n    \\ifx\\thiscaption\\empty \\else\n      \\ifx\\floatident\\empty \\else\n\t\\appendtomacro\\captionline{: }% had ident, so need a colon between\n      \\fi\n      %\n      % caption text.\n      \\appendtomacro\\captionline{\\scanexp\\thiscaption}%\n    \\fi\n    %\n    % If we have anything to print, print it, with space before.\n    % Eventually this needs to become an \\insert.\n    \\ifx\\captionline\\empty \\else\n      \\vskip.5\\parskip\n      \\captionline\n      %\n      % Space below caption.\n      \\vskip\\parskip\n    \\fi\n    %\n    % If have an xref label, write the list of floats info.  Do this\n    % after the caption, to avoid chance of it being a breakpoint.\n    \\ifx\\floatlabel\\empty \\else\n      % Write the text that goes in the lof to the aux file as\n      % \\floatlabel-lof.  Besides \\floatident, we include the short\n      % caption if specified, else the full caption if specified, else nothing.\n      {%\n        \\atdummies\n        %\n        % since we read the caption text in the macro world, where ^^M\n        % is turned into a normal character, we have to scan it back, so\n        % we don't write the literal three characters \"^^M\" into the aux file.\n\t\\scanexp{%\n\t  \\xdef\\noexpand\\gtemp{%\n\t    \\ifx\\thisshortcaption\\empty\n\t      \\thiscaption\n\t    \\else\n\t      \\thisshortcaption\n\t    \\fi\n\t  }%\n\t}%\n        \\immediate\\write\\auxfile{@xrdef{\\floatlabel-lof}{\\floatident\n\t  \\ifx\\gtemp\\empty \\else : \\gtemp \\fi}}%\n      }%\n    \\fi\n  \\egroup  % end of \\vtop\n  %\n  % place the captured inserts\n  %\n  % BEWARE: when the floats start floating, we have to issue warning\n  % whenever an insert appears inside a float which could possibly\n  % float. --kasal, 26may04\n  %\n  \\checkinserts\n}\n\n% Append the tokens #2 to the definition of macro #1, not expanding either.\n%\n\\def\\appendtomacro#1#2{%\n  \\expandafter\\def\\expandafter#1\\expandafter{#1#2}%\n}\n\n% @caption, @shortcaption\n%\n\\def\\caption{\\docaption\\thiscaption}\n\\def\\shortcaption{\\docaption\\thisshortcaption}\n\\def\\docaption{\\checkenv\\float \\bgroup\\scanargctxt\\defcaption}\n\\def\\defcaption#1#2{\\egroup \\def#1{#2}}\n\n% The parameter is the control sequence identifying the counter we are\n% going to use.  Create it if it doesn't exist and assign it to \\floatno.\n\\def\\getfloatno#1{%\n  \\ifx#1\\relax\n      % Haven't seen this figure type before.\n      \\csname newcount\\endcsname #1%\n      %\n      % Remember to reset this floatno at the next chap.\n      \\expandafter\\gdef\\expandafter\\resetallfloatnos\n        \\expandafter{\\resetallfloatnos #1=0 }%\n  \\fi\n  \\let\\floatno#1%\n}\n\n% \\setref calls this to get the XREFLABEL-snt value.  We want an @xref\n% to the FLOATLABEL to expand to \"Figure 3.1\".  We call \\setref when we\n% first read the @float command.\n%\n\\def\\Yfloat{\\floattype@tie \\chaplevelprefix\\the\\floatno}%\n\n% Magic string used for the XREFLABEL-title value, so \\xrefX can\n% distinguish floats from other xref types.\n\\def\\floatmagic{!!float!!}\n\n% #1 is the control sequence we are passed; we expand into a conditional\n% which is true if #1 represents a float ref.  That is, the magic\n% \\lastsection value which we \\setref above.\n%\n\\def\\iffloat#1{\\expandafter\\doiffloat#1==\\finish}\n%\n% #1 is (maybe) the \\floatmagic string.  If so, #2 will be the\n% (safe) float type for this float.  We set \\iffloattype to #2.\n%\n\\def\\doiffloat#1=#2=#3\\finish{%\n  \\def\\temp{#1}%\n  \\def\\iffloattype{#2}%\n  \\ifx\\temp\\floatmagic\n}\n\n% @listoffloats FLOATTYPE - print a list of floats like a table of contents.\n%\n\\parseargdef\\listoffloats{%\n  \\def\\floattype{#1}% floattype\n  {%\n    % the floattype might have accents or other special characters,\n    % but we need to use it in a control sequence name.\n    \\indexnofonts\n    \\turnoffactive\n    \\xdef\\safefloattype{\\floattype}%\n  }%\n  %\n  % \\xrdef saves the floats as a \\do-list in \\floatlistSAFEFLOATTYPE.\n  \\expandafter\\ifx\\csname floatlist\\safefloattype\\endcsname \\relax\n    \\ifhavexrefs\n      % if the user said @listoffloats foo but never @float foo.\n      \\message{\\linenumber No `\\safefloattype' floats to list.}%\n    \\fi\n  \\else\n    \\begingroup\n      \\leftskip=\\tocindent  % indent these entries like a toc\n      \\let\\do=\\listoffloatsdo\n      \\csname floatlist\\safefloattype\\endcsname\n    \\endgroup\n  \\fi\n}\n\n% This is called on each entry in a list of floats.  We're passed the\n% xref label, in the form LABEL-title, which is how we save it in the\n% aux file.  We strip off the -title and look up \\XRLABEL-lof, which\n% has the text we're supposed to typeset here.\n%\n% Figures without xref labels will not be included in the list (since\n% they won't appear in the aux file).\n%\n\\def\\listoffloatsdo#1{\\listoffloatsdoentry#1\\finish}\n\\def\\listoffloatsdoentry#1-title\\finish{{%\n  % Can't fully expand XR#1-lof because it can contain anything.  Just\n  % pass the control sequence.  On the other hand, XR#1-pg is just the\n  % page number, and we want to fully expand that so we can get a link\n  % in pdf output.\n  \\toksA = \\expandafter{\\csname XR#1-lof\\endcsname}%\n  %\n  % use the same \\entry macro we use to generate the TOC and index.\n  \\edef\\writeentry{\\noexpand\\entry{\\the\\toksA}{\\csname XR#1-pg\\endcsname}}%\n  \\writeentry\n}}\n\n\n\\message{localization,}\n\n% For single-language documents, @documentlanguage is usually given very\n% early, just after @documentencoding.  Single argument is the language\n% (de) or locale (de_DE) abbreviation.\n%\n{\n  \\catcode`\\_ = \\active\n  \\globaldefs=1\n\\parseargdef\\documentlanguage{\\begingroup\n  \\let_=\\normalunderscore  % normal _ character for filenames\n  \\tex % read txi-??.tex file in plain TeX.\n    % Read the file by the name they passed if it exists.\n    \\openin 1 txi-#1.tex\n    \\ifeof 1\n      \\documentlanguagetrywithoutunderscore{#1_\\finish}%\n    \\else\n      \\globaldefs = 1  % everything in the txi-LL files needs to persist\n      \\input txi-#1.tex\n    \\fi\n    \\closein 1\n  \\endgroup % end raw TeX\n\\endgroup}\n%\n% If they passed de_DE, and txi-de_DE.tex doesn't exist,\n% try txi-de.tex.\n%\n\\gdef\\documentlanguagetrywithoutunderscore#1_#2\\finish{%\n  \\openin 1 txi-#1.tex\n  \\ifeof 1\n    \\errhelp = \\nolanghelp\n    \\errmessage{Cannot read language file txi-#1.tex}%\n  \\else\n    \\globaldefs = 1  % everything in the txi-LL files needs to persist\n    \\input txi-#1.tex\n  \\fi\n  \\closein 1\n}\n}% end of special _ catcode\n%\n\\newhelp\\nolanghelp{The given language definition file cannot be found or\nis empty.  Maybe you need to install it?  Putting it in the current\ndirectory should work if nowhere else does.}\n\n% This macro is called from txi-??.tex files; the first argument is the\n% \\language name to set (without the \"\\lang@\" prefix), the second and\n% third args are \\{left,right}hyphenmin.\n%\n% The language names to pass are determined when the format is built.\n% See the etex.log file created at that time, e.g.,\n% /usr/local/texlive/2008/texmf-var/web2c/pdftex/etex.log.\n%\n% With TeX Live 2008, etex now includes hyphenation patterns for all\n% available languages.  This means we can support hyphenation in\n% Texinfo, at least to some extent.  (This still doesn't solve the\n% accented characters problem.)\n%\n\\catcode`@=11\n\\def\\txisetlanguage#1#2#3{%\n  % do not set the language if the name is undefined in the current TeX.\n  \\expandafter\\ifx\\csname lang@#1\\endcsname \\relax\n    \\message{no patterns for #1}%\n  \\else\n    \\global\\language = \\csname lang@#1\\endcsname\n  \\fi\n  % but there is no harm in adjusting the hyphenmin values regardless.\n  \\global\\lefthyphenmin = #2\\relax\n  \\global\\righthyphenmin = #3\\relax\n}\n\n% Helpers for encodings.\n% Set the catcode of characters 128 through 255 to the specified number.\n%\n\\def\\setnonasciicharscatcode#1{%\n   \\count255=128\n   \\loop\\ifnum\\count255<256\n      \\global\\catcode\\count255=#1\\relax\n      \\advance\\count255 by 1\n   \\repeat\n}\n\n\\def\\setnonasciicharscatcodenonglobal#1{%\n   \\count255=128\n   \\loop\\ifnum\\count255<256\n      \\catcode\\count255=#1\\relax\n      \\advance\\count255 by 1\n   \\repeat\n}\n\n% @documentencoding sets the definition of non-ASCII characters\n% according to the specified encoding.\n%\n\\parseargdef\\documentencoding{%\n  % Encoding being declared for the document.\n  \\def\\declaredencoding{\\csname #1.enc\\endcsname}%\n  %\n  % Supported encodings: names converted to tokens in order to be able\n  % to compare them with \\ifx.\n  \\def\\ascii{\\csname US-ASCII.enc\\endcsname}%\n  \\def\\latnine{\\csname ISO-8859-15.enc\\endcsname}%\n  \\def\\latone{\\csname ISO-8859-1.enc\\endcsname}%\n  \\def\\lattwo{\\csname ISO-8859-2.enc\\endcsname}%\n  \\def\\utfeight{\\csname UTF-8.enc\\endcsname}%\n  %\n  \\ifx \\declaredencoding \\ascii\n     \\asciichardefs\n  %\n  \\else \\ifx \\declaredencoding \\lattwo\n     \\setnonasciicharscatcode\\active\n     \\lattwochardefs\n  %\n  \\else \\ifx \\declaredencoding \\latone\n     \\setnonasciicharscatcode\\active\n     \\latonechardefs\n  %\n  \\else \\ifx \\declaredencoding \\latnine\n     \\setnonasciicharscatcode\\active\n     \\latninechardefs\n  %\n  \\else \\ifx \\declaredencoding \\utfeight\n     \\setnonasciicharscatcode\\active\n     \\utfeightchardefs\n  %\n  \\else\n    \\message{Unknown document encoding #1, ignoring.}%\n  %\n  \\fi % utfeight\n  \\fi % latnine\n  \\fi % latone\n  \\fi % lattwo\n  \\fi % ascii\n}\n\n% A message to be logged when using a character that isn't available\n% the default font encoding (OT1).\n%\n\\def\\missingcharmsg#1{\\message{Character missing in OT1 encoding: #1.}}\n\n% Take account of \\c (plain) vs. \\, (Texinfo) difference.\n\\def\\cedilla#1{\\ifx\\c\\ptexc\\c{#1}\\else\\,{#1}\\fi}\n\n% First, make active non-ASCII characters in order for them to be\n% correctly categorized when TeX reads the replacement text of\n% macros containing the character definitions.\n\\setnonasciicharscatcode\\active\n%\n% Latin1 (ISO-8859-1) character definitions.\n\\def\\latonechardefs{%\n  \\gdef^^a0{\\tie}\n  \\gdef^^a1{\\exclamdown}\n  \\gdef^^a2{\\missingcharmsg{CENT SIGN}}\n  \\gdef^^a3{{\\pounds}}\n  \\gdef^^a4{\\missingcharmsg{CURRENCY SIGN}}\n  \\gdef^^a5{\\missingcharmsg{YEN SIGN}}\n  \\gdef^^a6{\\missingcharmsg{BROKEN BAR}}\n  \\gdef^^a7{\\S}\n  \\gdef^^a8{\\\"{}}\n  \\gdef^^a9{\\copyright}\n  \\gdef^^aa{\\ordf}\n  \\gdef^^ab{\\guillemetleft}\n  \\gdef^^ac{$\\lnot$}\n  \\gdef^^ad{\\-}\n  \\gdef^^ae{\\registeredsymbol}\n  \\gdef^^af{\\={}}\n  %\n  \\gdef^^b0{\\textdegree}\n  \\gdef^^b1{$\\pm$}\n  \\gdef^^b2{$^2$}\n  \\gdef^^b3{$^3$}\n  \\gdef^^b4{\\'{}}\n  \\gdef^^b5{$\\mu$}\n  \\gdef^^b6{\\P}\n  %\n  \\gdef^^b7{$^.$}\n  \\gdef^^b8{\\cedilla\\ }\n  \\gdef^^b9{$^1$}\n  \\gdef^^ba{\\ordm}\n  %\n  \\gdef^^bb{\\guillemetright}\n  \\gdef^^bc{$1\\over4$}\n  \\gdef^^bd{$1\\over2$}\n  \\gdef^^be{$3\\over4$}\n  \\gdef^^bf{\\questiondown}\n  %\n  \\gdef^^c0{\\`A}\n  \\gdef^^c1{\\'A}\n  \\gdef^^c2{\\^A}\n  \\gdef^^c3{\\~A}\n  \\gdef^^c4{\\\"A}\n  \\gdef^^c5{\\ringaccent A}\n  \\gdef^^c6{\\AE}\n  \\gdef^^c7{\\cedilla C}\n  \\gdef^^c8{\\`E}\n  \\gdef^^c9{\\'E}\n  \\gdef^^ca{\\^E}\n  \\gdef^^cb{\\\"E}\n  \\gdef^^cc{\\`I}\n  \\gdef^^cd{\\'I}\n  \\gdef^^ce{\\^I}\n  \\gdef^^cf{\\\"I}\n  %\n  \\gdef^^d0{\\DH}\n  \\gdef^^d1{\\~N}\n  \\gdef^^d2{\\`O}\n  \\gdef^^d3{\\'O}\n  \\gdef^^d4{\\^O}\n  \\gdef^^d5{\\~O}\n  \\gdef^^d6{\\\"O}\n  \\gdef^^d7{$\\times$}\n  \\gdef^^d8{\\O}\n  \\gdef^^d9{\\`U}\n  \\gdef^^da{\\'U}\n  \\gdef^^db{\\^U}\n  \\gdef^^dc{\\\"U}\n  \\gdef^^dd{\\'Y}\n  \\gdef^^de{\\TH}\n  \\gdef^^df{\\ss}\n  %\n  \\gdef^^e0{\\`a}\n  \\gdef^^e1{\\'a}\n  \\gdef^^e2{\\^a}\n  \\gdef^^e3{\\~a}\n  \\gdef^^e4{\\\"a}\n  \\gdef^^e5{\\ringaccent a}\n  \\gdef^^e6{\\ae}\n  \\gdef^^e7{\\cedilla c}\n  \\gdef^^e8{\\`e}\n  \\gdef^^e9{\\'e}\n  \\gdef^^ea{\\^e}\n  \\gdef^^eb{\\\"e}\n  \\gdef^^ec{\\`{\\dotless i}}\n  \\gdef^^ed{\\'{\\dotless i}}\n  \\gdef^^ee{\\^{\\dotless i}}\n  \\gdef^^ef{\\\"{\\dotless i}}\n  %\n  \\gdef^^f0{\\dh}\n  \\gdef^^f1{\\~n}\n  \\gdef^^f2{\\`o}\n  \\gdef^^f3{\\'o}\n  \\gdef^^f4{\\^o}\n  \\gdef^^f5{\\~o}\n  \\gdef^^f6{\\\"o}\n  \\gdef^^f7{$\\div$}\n  \\gdef^^f8{\\o}\n  \\gdef^^f9{\\`u}\n  \\gdef^^fa{\\'u}\n  \\gdef^^fb{\\^u}\n  \\gdef^^fc{\\\"u}\n  \\gdef^^fd{\\'y}\n  \\gdef^^fe{\\th}\n  \\gdef^^ff{\\\"y}\n}\n\n% Latin9 (ISO-8859-15) encoding character definitions.\n\\def\\latninechardefs{%\n  % Encoding is almost identical to Latin1.\n  \\latonechardefs\n  %\n  \\gdef^^a4{\\euro}\n  \\gdef^^a6{\\v S}\n  \\gdef^^a8{\\v s}\n  \\gdef^^b4{\\v Z}\n  \\gdef^^b8{\\v z}\n  \\gdef^^bc{\\OE}\n  \\gdef^^bd{\\oe}\n  \\gdef^^be{\\\"Y}\n}\n\n% Latin2 (ISO-8859-2) character definitions.\n\\def\\lattwochardefs{%\n  \\gdef^^a0{\\tie}\n  \\gdef^^a1{\\ogonek{A}}\n  \\gdef^^a2{\\u{}}\n  \\gdef^^a3{\\L}\n  \\gdef^^a4{\\missingcharmsg{CURRENCY SIGN}}\n  \\gdef^^a5{\\v L}\n  \\gdef^^a6{\\'S}\n  \\gdef^^a7{\\S}\n  \\gdef^^a8{\\\"{}}\n  \\gdef^^a9{\\v S}\n  \\gdef^^aa{\\cedilla S}\n  \\gdef^^ab{\\v T}\n  \\gdef^^ac{\\'Z}\n  \\gdef^^ad{\\-}\n  \\gdef^^ae{\\v Z}\n  \\gdef^^af{\\dotaccent Z}\n  %\n  \\gdef^^b0{\\textdegree}\n  \\gdef^^b1{\\ogonek{a}}\n  \\gdef^^b2{\\ogonek{ }}\n  \\gdef^^b3{\\l}\n  \\gdef^^b4{\\'{}}\n  \\gdef^^b5{\\v l}\n  \\gdef^^b6{\\'s}\n  \\gdef^^b7{\\v{}}\n  \\gdef^^b8{\\cedilla\\ }\n  \\gdef^^b9{\\v s}\n  \\gdef^^ba{\\cedilla s}\n  \\gdef^^bb{\\v t}\n  \\gdef^^bc{\\'z}\n  \\gdef^^bd{\\H{}}\n  \\gdef^^be{\\v z}\n  \\gdef^^bf{\\dotaccent z}\n  %\n  \\gdef^^c0{\\'R}\n  \\gdef^^c1{\\'A}\n  \\gdef^^c2{\\^A}\n  \\gdef^^c3{\\u A}\n  \\gdef^^c4{\\\"A}\n  \\gdef^^c5{\\'L}\n  \\gdef^^c6{\\'C}\n  \\gdef^^c7{\\cedilla C}\n  \\gdef^^c8{\\v C}\n  \\gdef^^c9{\\'E}\n  \\gdef^^ca{\\ogonek{E}}\n  \\gdef^^cb{\\\"E}\n  \\gdef^^cc{\\v E}\n  \\gdef^^cd{\\'I}\n  \\gdef^^ce{\\^I}\n  \\gdef^^cf{\\v D}\n  %\n  \\gdef^^d0{\\DH}\n  \\gdef^^d1{\\'N}\n  \\gdef^^d2{\\v N}\n  \\gdef^^d3{\\'O}\n  \\gdef^^d4{\\^O}\n  \\gdef^^d5{\\H O}\n  \\gdef^^d6{\\\"O}\n  \\gdef^^d7{$\\times$}\n  \\gdef^^d8{\\v R}\n  \\gdef^^d9{\\ringaccent U}\n  \\gdef^^da{\\'U}\n  \\gdef^^db{\\H U}\n  \\gdef^^dc{\\\"U}\n  \\gdef^^dd{\\'Y}\n  \\gdef^^de{\\cedilla T}\n  \\gdef^^df{\\ss}\n  %\n  \\gdef^^e0{\\'r}\n  \\gdef^^e1{\\'a}\n  \\gdef^^e2{\\^a}\n  \\gdef^^e3{\\u a}\n  \\gdef^^e4{\\\"a}\n  \\gdef^^e5{\\'l}\n  \\gdef^^e6{\\'c}\n  \\gdef^^e7{\\cedilla c}\n  \\gdef^^e8{\\v c}\n  \\gdef^^e9{\\'e}\n  \\gdef^^ea{\\ogonek{e}}\n  \\gdef^^eb{\\\"e}\n  \\gdef^^ec{\\v e}\n  \\gdef^^ed{\\'{\\dotless{i}}}\n  \\gdef^^ee{\\^{\\dotless{i}}}\n  \\gdef^^ef{\\v d}\n  %\n  \\gdef^^f0{\\dh}\n  \\gdef^^f1{\\'n}\n  \\gdef^^f2{\\v n}\n  \\gdef^^f3{\\'o}\n  \\gdef^^f4{\\^o}\n  \\gdef^^f5{\\H o}\n  \\gdef^^f6{\\\"o}\n  \\gdef^^f7{$\\div$}\n  \\gdef^^f8{\\v r}\n  \\gdef^^f9{\\ringaccent u}\n  \\gdef^^fa{\\'u}\n  \\gdef^^fb{\\H u}\n  \\gdef^^fc{\\\"u}\n  \\gdef^^fd{\\'y}\n  \\gdef^^fe{\\cedilla t}\n  \\gdef^^ff{\\dotaccent{}}\n}\n\n% UTF-8 character definitions.\n%\n% This code to support UTF-8 is based on LaTeX's utf8.def, with some\n% changes for Texinfo conventions.  It is included here under the GPL by\n% permission from Frank Mittelbach and the LaTeX team.\n%\n\\newcount\\countUTFx\n\\newcount\\countUTFy\n\\newcount\\countUTFz\n\n\\gdef\\UTFviiiTwoOctets#1#2{\\expandafter\n   \\UTFviiiDefined\\csname u8:#1\\string #2\\endcsname}\n%\n\\gdef\\UTFviiiThreeOctets#1#2#3{\\expandafter\n   \\UTFviiiDefined\\csname u8:#1\\string #2\\string #3\\endcsname}\n%\n\\gdef\\UTFviiiFourOctets#1#2#3#4{\\expandafter\n   \\UTFviiiDefined\\csname u8:#1\\string #2\\string #3\\string #4\\endcsname}\n\n\\gdef\\UTFviiiDefined#1{%\n  \\ifx #1\\relax\n    \\message{\\linenumber Unicode char \\string #1 not defined for Texinfo}%\n  \\else\n    \\expandafter #1%\n  \\fi\n}\n\n\\begingroup\n  \\catcode`\\~13\n  \\catcode`\\\"12\n\n  \\def\\UTFviiiLoop{%\n    \\global\\catcode\\countUTFx\\active\n    \\uccode`\\~\\countUTFx\n    \\uppercase\\expandafter{\\UTFviiiTmp}%\n    \\advance\\countUTFx by 1\n    \\ifnum\\countUTFx < \\countUTFy\n      \\expandafter\\UTFviiiLoop\n    \\fi}\n\n  \\countUTFx = \"C2\n  \\countUTFy = \"E0\n  \\def\\UTFviiiTmp{%\n    \\xdef~{\\noexpand\\UTFviiiTwoOctets\\string~}}\n  \\UTFviiiLoop\n\n  \\countUTFx = \"E0\n  \\countUTFy = \"F0\n  \\def\\UTFviiiTmp{%\n    \\xdef~{\\noexpand\\UTFviiiThreeOctets\\string~}}\n  \\UTFviiiLoop\n\n  \\countUTFx = \"F0\n  \\countUTFy = \"F4\n  \\def\\UTFviiiTmp{%\n    \\xdef~{\\noexpand\\UTFviiiFourOctets\\string~}}\n  \\UTFviiiLoop\n\\endgroup\n\n\\begingroup\n  \\catcode`\\\"=12\n  \\catcode`\\<=12\n  \\catcode`\\.=12\n  \\catcode`\\,=12\n  \\catcode`\\;=12\n  \\catcode`\\!=12\n  \\catcode`\\~=13\n\n  \\gdef\\DeclareUnicodeCharacter#1#2{%\n    \\countUTFz = \"#1\\relax\n    %\\wlog{\\space\\space defining Unicode char U+#1 (decimal \\the\\countUTFz)}%\n    \\begingroup\n      \\parseXMLCharref\n      \\def\\UTFviiiTwoOctets##1##2{%\n        \\csname u8:##1\\string ##2\\endcsname}%\n      \\def\\UTFviiiThreeOctets##1##2##3{%\n        \\csname u8:##1\\string ##2\\string ##3\\endcsname}%\n      \\def\\UTFviiiFourOctets##1##2##3##4{%\n        \\csname u8:##1\\string ##2\\string ##3\\string ##4\\endcsname}%\n      \\expandafter\\expandafter\\expandafter\\expandafter\n       \\expandafter\\expandafter\\expandafter\n       \\gdef\\UTFviiiTmp{#2}%\n    \\endgroup}\n\n  \\gdef\\parseXMLCharref{%\n    \\ifnum\\countUTFz < \"A0\\relax\n      \\errhelp = \\EMsimple\n      \\errmessage{Cannot define Unicode char value < 00A0}%\n    \\else\\ifnum\\countUTFz < \"800\\relax\n      \\parseUTFviiiA,%\n      \\parseUTFviiiB C\\UTFviiiTwoOctets.,%\n    \\else\\ifnum\\countUTFz < \"10000\\relax\n      \\parseUTFviiiA;%\n      \\parseUTFviiiA,%\n      \\parseUTFviiiB E\\UTFviiiThreeOctets.{,;}%\n    \\else\n      \\parseUTFviiiA;%\n      \\parseUTFviiiA,%\n      \\parseUTFviiiA!%\n      \\parseUTFviiiB F\\UTFviiiFourOctets.{!,;}%\n    \\fi\\fi\\fi\n  }\n\n  \\gdef\\parseUTFviiiA#1{%\n    \\countUTFx = \\countUTFz\n    \\divide\\countUTFz by 64\n    \\countUTFy = \\countUTFz\n    \\multiply\\countUTFz by 64\n    \\advance\\countUTFx by -\\countUTFz\n    \\advance\\countUTFx by 128\n    \\uccode `#1\\countUTFx\n    \\countUTFz = \\countUTFy}\n\n  \\gdef\\parseUTFviiiB#1#2#3#4{%\n    \\advance\\countUTFz by \"#10\\relax\n    \\uccode `#3\\countUTFz\n    \\uppercase{\\gdef\\UTFviiiTmp{#2#3#4}}}\n\\endgroup\n\n\\def\\utfeightchardefs{%\n  \\DeclareUnicodeCharacter{00A0}{\\tie}\n  \\DeclareUnicodeCharacter{00A1}{\\exclamdown}\n  \\DeclareUnicodeCharacter{00A3}{\\pounds}\n  \\DeclareUnicodeCharacter{00A8}{\\\"{ }}\n  \\DeclareUnicodeCharacter{00A9}{\\copyright}\n  \\DeclareUnicodeCharacter{00AA}{\\ordf}\n  \\DeclareUnicodeCharacter{00AB}{\\guillemetleft}\n  \\DeclareUnicodeCharacter{00AD}{\\-}\n  \\DeclareUnicodeCharacter{00AE}{\\registeredsymbol}\n  \\DeclareUnicodeCharacter{00AF}{\\={ }}\n\n  \\DeclareUnicodeCharacter{00B0}{\\ringaccent{ }}\n  \\DeclareUnicodeCharacter{00B4}{\\'{ }}\n  \\DeclareUnicodeCharacter{00B8}{\\cedilla{ }}\n  \\DeclareUnicodeCharacter{00BA}{\\ordm}\n  \\DeclareUnicodeCharacter{00BB}{\\guillemetright}\n  \\DeclareUnicodeCharacter{00BF}{\\questiondown}\n\n  \\DeclareUnicodeCharacter{00C0}{\\`A}\n  \\DeclareUnicodeCharacter{00C1}{\\'A}\n  \\DeclareUnicodeCharacter{00C2}{\\^A}\n  \\DeclareUnicodeCharacter{00C3}{\\~A}\n  \\DeclareUnicodeCharacter{00C4}{\\\"A}\n  \\DeclareUnicodeCharacter{00C5}{\\AA}\n  \\DeclareUnicodeCharacter{00C6}{\\AE}\n  \\DeclareUnicodeCharacter{00C7}{\\cedilla{C}}\n  \\DeclareUnicodeCharacter{00C8}{\\`E}\n  \\DeclareUnicodeCharacter{00C9}{\\'E}\n  \\DeclareUnicodeCharacter{00CA}{\\^E}\n  \\DeclareUnicodeCharacter{00CB}{\\\"E}\n  \\DeclareUnicodeCharacter{00CC}{\\`I}\n  \\DeclareUnicodeCharacter{00CD}{\\'I}\n  \\DeclareUnicodeCharacter{00CE}{\\^I}\n  \\DeclareUnicodeCharacter{00CF}{\\\"I}\n\n  \\DeclareUnicodeCharacter{00D0}{\\DH}\n  \\DeclareUnicodeCharacter{00D1}{\\~N}\n  \\DeclareUnicodeCharacter{00D2}{\\`O}\n  \\DeclareUnicodeCharacter{00D3}{\\'O}\n  \\DeclareUnicodeCharacter{00D4}{\\^O}\n  \\DeclareUnicodeCharacter{00D5}{\\~O}\n  \\DeclareUnicodeCharacter{00D6}{\\\"O}\n  \\DeclareUnicodeCharacter{00D8}{\\O}\n  \\DeclareUnicodeCharacter{00D9}{\\`U}\n  \\DeclareUnicodeCharacter{00DA}{\\'U}\n  \\DeclareUnicodeCharacter{00DB}{\\^U}\n  \\DeclareUnicodeCharacter{00DC}{\\\"U}\n  \\DeclareUnicodeCharacter{00DD}{\\'Y}\n  \\DeclareUnicodeCharacter{00DE}{\\TH}\n  \\DeclareUnicodeCharacter{00DF}{\\ss}\n\n  \\DeclareUnicodeCharacter{00E0}{\\`a}\n  \\DeclareUnicodeCharacter{00E1}{\\'a}\n  \\DeclareUnicodeCharacter{00E2}{\\^a}\n  \\DeclareUnicodeCharacter{00E3}{\\~a}\n  \\DeclareUnicodeCharacter{00E4}{\\\"a}\n  \\DeclareUnicodeCharacter{00E5}{\\aa}\n  \\DeclareUnicodeCharacter{00E6}{\\ae}\n  \\DeclareUnicodeCharacter{00E7}{\\cedilla{c}}\n  \\DeclareUnicodeCharacter{00E8}{\\`e}\n  \\DeclareUnicodeCharacter{00E9}{\\'e}\n  \\DeclareUnicodeCharacter{00EA}{\\^e}\n  \\DeclareUnicodeCharacter{00EB}{\\\"e}\n  \\DeclareUnicodeCharacter{00EC}{\\`{\\dotless{i}}}\n  \\DeclareUnicodeCharacter{00ED}{\\'{\\dotless{i}}}\n  \\DeclareUnicodeCharacter{00EE}{\\^{\\dotless{i}}}\n  \\DeclareUnicodeCharacter{00EF}{\\\"{\\dotless{i}}}\n\n  \\DeclareUnicodeCharacter{00F0}{\\dh}\n  \\DeclareUnicodeCharacter{00F1}{\\~n}\n  \\DeclareUnicodeCharacter{00F2}{\\`o}\n  \\DeclareUnicodeCharacter{00F3}{\\'o}\n  \\DeclareUnicodeCharacter{00F4}{\\^o}\n  \\DeclareUnicodeCharacter{00F5}{\\~o}\n  \\DeclareUnicodeCharacter{00F6}{\\\"o}\n  \\DeclareUnicodeCharacter{00F8}{\\o}\n  \\DeclareUnicodeCharacter{00F9}{\\`u}\n  \\DeclareUnicodeCharacter{00FA}{\\'u}\n  \\DeclareUnicodeCharacter{00FB}{\\^u}\n  \\DeclareUnicodeCharacter{00FC}{\\\"u}\n  \\DeclareUnicodeCharacter{00FD}{\\'y}\n  \\DeclareUnicodeCharacter{00FE}{\\th}\n  \\DeclareUnicodeCharacter{00FF}{\\\"y}\n\n  \\DeclareUnicodeCharacter{0100}{\\=A}\n  \\DeclareUnicodeCharacter{0101}{\\=a}\n  \\DeclareUnicodeCharacter{0102}{\\u{A}}\n  \\DeclareUnicodeCharacter{0103}{\\u{a}}\n  \\DeclareUnicodeCharacter{0104}{\\ogonek{A}}\n  \\DeclareUnicodeCharacter{0105}{\\ogonek{a}}\n  \\DeclareUnicodeCharacter{0106}{\\'C}\n  \\DeclareUnicodeCharacter{0107}{\\'c}\n  \\DeclareUnicodeCharacter{0108}{\\^C}\n  \\DeclareUnicodeCharacter{0109}{\\^c}\n  \\DeclareUnicodeCharacter{0118}{\\ogonek{E}}\n  \\DeclareUnicodeCharacter{0119}{\\ogonek{e}}\n  \\DeclareUnicodeCharacter{010A}{\\dotaccent{C}}\n  \\DeclareUnicodeCharacter{010B}{\\dotaccent{c}}\n  \\DeclareUnicodeCharacter{010C}{\\v{C}}\n  \\DeclareUnicodeCharacter{010D}{\\v{c}}\n  \\DeclareUnicodeCharacter{010E}{\\v{D}}\n\n  \\DeclareUnicodeCharacter{0112}{\\=E}\n  \\DeclareUnicodeCharacter{0113}{\\=e}\n  \\DeclareUnicodeCharacter{0114}{\\u{E}}\n  \\DeclareUnicodeCharacter{0115}{\\u{e}}\n  \\DeclareUnicodeCharacter{0116}{\\dotaccent{E}}\n  \\DeclareUnicodeCharacter{0117}{\\dotaccent{e}}\n  \\DeclareUnicodeCharacter{011A}{\\v{E}}\n  \\DeclareUnicodeCharacter{011B}{\\v{e}}\n  \\DeclareUnicodeCharacter{011C}{\\^G}\n  \\DeclareUnicodeCharacter{011D}{\\^g}\n  \\DeclareUnicodeCharacter{011E}{\\u{G}}\n  \\DeclareUnicodeCharacter{011F}{\\u{g}}\n\n  \\DeclareUnicodeCharacter{0120}{\\dotaccent{G}}\n  \\DeclareUnicodeCharacter{0121}{\\dotaccent{g}}\n  \\DeclareUnicodeCharacter{0124}{\\^H}\n  \\DeclareUnicodeCharacter{0125}{\\^h}\n  \\DeclareUnicodeCharacter{0128}{\\~I}\n  \\DeclareUnicodeCharacter{0129}{\\~{\\dotless{i}}}\n  \\DeclareUnicodeCharacter{012A}{\\=I}\n  \\DeclareUnicodeCharacter{012B}{\\={\\dotless{i}}}\n  \\DeclareUnicodeCharacter{012C}{\\u{I}}\n  \\DeclareUnicodeCharacter{012D}{\\u{\\dotless{i}}}\n\n  \\DeclareUnicodeCharacter{0130}{\\dotaccent{I}}\n  \\DeclareUnicodeCharacter{0131}{\\dotless{i}}\n  \\DeclareUnicodeCharacter{0132}{IJ}\n  \\DeclareUnicodeCharacter{0133}{ij}\n  \\DeclareUnicodeCharacter{0134}{\\^J}\n  \\DeclareUnicodeCharacter{0135}{\\^{\\dotless{j}}}\n  \\DeclareUnicodeCharacter{0139}{\\'L}\n  \\DeclareUnicodeCharacter{013A}{\\'l}\n\n  \\DeclareUnicodeCharacter{0141}{\\L}\n  \\DeclareUnicodeCharacter{0142}{\\l}\n  \\DeclareUnicodeCharacter{0143}{\\'N}\n  \\DeclareUnicodeCharacter{0144}{\\'n}\n  \\DeclareUnicodeCharacter{0147}{\\v{N}}\n  \\DeclareUnicodeCharacter{0148}{\\v{n}}\n  \\DeclareUnicodeCharacter{014C}{\\=O}\n  \\DeclareUnicodeCharacter{014D}{\\=o}\n  \\DeclareUnicodeCharacter{014E}{\\u{O}}\n  \\DeclareUnicodeCharacter{014F}{\\u{o}}\n\n  \\DeclareUnicodeCharacter{0150}{\\H{O}}\n  \\DeclareUnicodeCharacter{0151}{\\H{o}}\n  \\DeclareUnicodeCharacter{0152}{\\OE}\n  \\DeclareUnicodeCharacter{0153}{\\oe}\n  \\DeclareUnicodeCharacter{0154}{\\'R}\n  \\DeclareUnicodeCharacter{0155}{\\'r}\n  \\DeclareUnicodeCharacter{0158}{\\v{R}}\n  \\DeclareUnicodeCharacter{0159}{\\v{r}}\n  \\DeclareUnicodeCharacter{015A}{\\'S}\n  \\DeclareUnicodeCharacter{015B}{\\'s}\n  \\DeclareUnicodeCharacter{015C}{\\^S}\n  \\DeclareUnicodeCharacter{015D}{\\^s}\n  \\DeclareUnicodeCharacter{015E}{\\cedilla{S}}\n  \\DeclareUnicodeCharacter{015F}{\\cedilla{s}}\n\n  \\DeclareUnicodeCharacter{0160}{\\v{S}}\n  \\DeclareUnicodeCharacter{0161}{\\v{s}}\n  \\DeclareUnicodeCharacter{0162}{\\cedilla{t}}\n  \\DeclareUnicodeCharacter{0163}{\\cedilla{T}}\n  \\DeclareUnicodeCharacter{0164}{\\v{T}}\n\n  \\DeclareUnicodeCharacter{0168}{\\~U}\n  \\DeclareUnicodeCharacter{0169}{\\~u}\n  \\DeclareUnicodeCharacter{016A}{\\=U}\n  \\DeclareUnicodeCharacter{016B}{\\=u}\n  \\DeclareUnicodeCharacter{016C}{\\u{U}}\n  \\DeclareUnicodeCharacter{016D}{\\u{u}}\n  \\DeclareUnicodeCharacter{016E}{\\ringaccent{U}}\n  \\DeclareUnicodeCharacter{016F}{\\ringaccent{u}}\n\n  \\DeclareUnicodeCharacter{0170}{\\H{U}}\n  \\DeclareUnicodeCharacter{0171}{\\H{u}}\n  \\DeclareUnicodeCharacter{0174}{\\^W}\n  \\DeclareUnicodeCharacter{0175}{\\^w}\n  \\DeclareUnicodeCharacter{0176}{\\^Y}\n  \\DeclareUnicodeCharacter{0177}{\\^y}\n  \\DeclareUnicodeCharacter{0178}{\\\"Y}\n  \\DeclareUnicodeCharacter{0179}{\\'Z}\n  \\DeclareUnicodeCharacter{017A}{\\'z}\n  \\DeclareUnicodeCharacter{017B}{\\dotaccent{Z}}\n  \\DeclareUnicodeCharacter{017C}{\\dotaccent{z}}\n  \\DeclareUnicodeCharacter{017D}{\\v{Z}}\n  \\DeclareUnicodeCharacter{017E}{\\v{z}}\n\n  \\DeclareUnicodeCharacter{01C4}{D\\v{Z}}\n  \\DeclareUnicodeCharacter{01C5}{D\\v{z}}\n  \\DeclareUnicodeCharacter{01C6}{d\\v{z}}\n  \\DeclareUnicodeCharacter{01C7}{LJ}\n  \\DeclareUnicodeCharacter{01C8}{Lj}\n  \\DeclareUnicodeCharacter{01C9}{lj}\n  \\DeclareUnicodeCharacter{01CA}{NJ}\n  \\DeclareUnicodeCharacter{01CB}{Nj}\n  \\DeclareUnicodeCharacter{01CC}{nj}\n  \\DeclareUnicodeCharacter{01CD}{\\v{A}}\n  \\DeclareUnicodeCharacter{01CE}{\\v{a}}\n  \\DeclareUnicodeCharacter{01CF}{\\v{I}}\n\n  \\DeclareUnicodeCharacter{01D0}{\\v{\\dotless{i}}}\n  \\DeclareUnicodeCharacter{01D1}{\\v{O}}\n  \\DeclareUnicodeCharacter{01D2}{\\v{o}}\n  \\DeclareUnicodeCharacter{01D3}{\\v{U}}\n  \\DeclareUnicodeCharacter{01D4}{\\v{u}}\n\n  \\DeclareUnicodeCharacter{01E2}{\\={\\AE}}\n  \\DeclareUnicodeCharacter{01E3}{\\={\\ae}}\n  \\DeclareUnicodeCharacter{01E6}{\\v{G}}\n  \\DeclareUnicodeCharacter{01E7}{\\v{g}}\n  \\DeclareUnicodeCharacter{01E8}{\\v{K}}\n  \\DeclareUnicodeCharacter{01E9}{\\v{k}}\n\n  \\DeclareUnicodeCharacter{01F0}{\\v{\\dotless{j}}}\n  \\DeclareUnicodeCharacter{01F1}{DZ}\n  \\DeclareUnicodeCharacter{01F2}{Dz}\n  \\DeclareUnicodeCharacter{01F3}{dz}\n  \\DeclareUnicodeCharacter{01F4}{\\'G}\n  \\DeclareUnicodeCharacter{01F5}{\\'g}\n  \\DeclareUnicodeCharacter{01F8}{\\`N}\n  \\DeclareUnicodeCharacter{01F9}{\\`n}\n  \\DeclareUnicodeCharacter{01FC}{\\'{\\AE}}\n  \\DeclareUnicodeCharacter{01FD}{\\'{\\ae}}\n  \\DeclareUnicodeCharacter{01FE}{\\'{\\O}}\n  \\DeclareUnicodeCharacter{01FF}{\\'{\\o}}\n\n  \\DeclareUnicodeCharacter{021E}{\\v{H}}\n  \\DeclareUnicodeCharacter{021F}{\\v{h}}\n\n  \\DeclareUnicodeCharacter{0226}{\\dotaccent{A}}\n  \\DeclareUnicodeCharacter{0227}{\\dotaccent{a}}\n  \\DeclareUnicodeCharacter{0228}{\\cedilla{E}}\n  \\DeclareUnicodeCharacter{0229}{\\cedilla{e}}\n  \\DeclareUnicodeCharacter{022E}{\\dotaccent{O}}\n  \\DeclareUnicodeCharacter{022F}{\\dotaccent{o}}\n\n  \\DeclareUnicodeCharacter{0232}{\\=Y}\n  \\DeclareUnicodeCharacter{0233}{\\=y}\n  \\DeclareUnicodeCharacter{0237}{\\dotless{j}}\n\n  \\DeclareUnicodeCharacter{02DB}{\\ogonek{ }}\n\n  \\DeclareUnicodeCharacter{1E02}{\\dotaccent{B}}\n  \\DeclareUnicodeCharacter{1E03}{\\dotaccent{b}}\n  \\DeclareUnicodeCharacter{1E04}{\\udotaccent{B}}\n  \\DeclareUnicodeCharacter{1E05}{\\udotaccent{b}}\n  \\DeclareUnicodeCharacter{1E06}{\\ubaraccent{B}}\n  \\DeclareUnicodeCharacter{1E07}{\\ubaraccent{b}}\n  \\DeclareUnicodeCharacter{1E0A}{\\dotaccent{D}}\n  \\DeclareUnicodeCharacter{1E0B}{\\dotaccent{d}}\n  \\DeclareUnicodeCharacter{1E0C}{\\udotaccent{D}}\n  \\DeclareUnicodeCharacter{1E0D}{\\udotaccent{d}}\n  \\DeclareUnicodeCharacter{1E0E}{\\ubaraccent{D}}\n  \\DeclareUnicodeCharacter{1E0F}{\\ubaraccent{d}}\n\n  \\DeclareUnicodeCharacter{1E1E}{\\dotaccent{F}}\n  \\DeclareUnicodeCharacter{1E1F}{\\dotaccent{f}}\n\n  \\DeclareUnicodeCharacter{1E20}{\\=G}\n  \\DeclareUnicodeCharacter{1E21}{\\=g}\n  \\DeclareUnicodeCharacter{1E22}{\\dotaccent{H}}\n  \\DeclareUnicodeCharacter{1E23}{\\dotaccent{h}}\n  \\DeclareUnicodeCharacter{1E24}{\\udotaccent{H}}\n  \\DeclareUnicodeCharacter{1E25}{\\udotaccent{h}}\n  \\DeclareUnicodeCharacter{1E26}{\\\"H}\n  \\DeclareUnicodeCharacter{1E27}{\\\"h}\n\n  \\DeclareUnicodeCharacter{1E30}{\\'K}\n  \\DeclareUnicodeCharacter{1E31}{\\'k}\n  \\DeclareUnicodeCharacter{1E32}{\\udotaccent{K}}\n  \\DeclareUnicodeCharacter{1E33}{\\udotaccent{k}}\n  \\DeclareUnicodeCharacter{1E34}{\\ubaraccent{K}}\n  \\DeclareUnicodeCharacter{1E35}{\\ubaraccent{k}}\n  \\DeclareUnicodeCharacter{1E36}{\\udotaccent{L}}\n  \\DeclareUnicodeCharacter{1E37}{\\udotaccent{l}}\n  \\DeclareUnicodeCharacter{1E3A}{\\ubaraccent{L}}\n  \\DeclareUnicodeCharacter{1E3B}{\\ubaraccent{l}}\n  \\DeclareUnicodeCharacter{1E3E}{\\'M}\n  \\DeclareUnicodeCharacter{1E3F}{\\'m}\n\n  \\DeclareUnicodeCharacter{1E40}{\\dotaccent{M}}\n  \\DeclareUnicodeCharacter{1E41}{\\dotaccent{m}}\n  \\DeclareUnicodeCharacter{1E42}{\\udotaccent{M}}\n  \\DeclareUnicodeCharacter{1E43}{\\udotaccent{m}}\n  \\DeclareUnicodeCharacter{1E44}{\\dotaccent{N}}\n  \\DeclareUnicodeCharacter{1E45}{\\dotaccent{n}}\n  \\DeclareUnicodeCharacter{1E46}{\\udotaccent{N}}\n  \\DeclareUnicodeCharacter{1E47}{\\udotaccent{n}}\n  \\DeclareUnicodeCharacter{1E48}{\\ubaraccent{N}}\n  \\DeclareUnicodeCharacter{1E49}{\\ubaraccent{n}}\n\n  \\DeclareUnicodeCharacter{1E54}{\\'P}\n  \\DeclareUnicodeCharacter{1E55}{\\'p}\n  \\DeclareUnicodeCharacter{1E56}{\\dotaccent{P}}\n  \\DeclareUnicodeCharacter{1E57}{\\dotaccent{p}}\n  \\DeclareUnicodeCharacter{1E58}{\\dotaccent{R}}\n  \\DeclareUnicodeCharacter{1E59}{\\dotaccent{r}}\n  \\DeclareUnicodeCharacter{1E5A}{\\udotaccent{R}}\n  \\DeclareUnicodeCharacter{1E5B}{\\udotaccent{r}}\n  \\DeclareUnicodeCharacter{1E5E}{\\ubaraccent{R}}\n  \\DeclareUnicodeCharacter{1E5F}{\\ubaraccent{r}}\n\n  \\DeclareUnicodeCharacter{1E60}{\\dotaccent{S}}\n  \\DeclareUnicodeCharacter{1E61}{\\dotaccent{s}}\n  \\DeclareUnicodeCharacter{1E62}{\\udotaccent{S}}\n  \\DeclareUnicodeCharacter{1E63}{\\udotaccent{s}}\n  \\DeclareUnicodeCharacter{1E6A}{\\dotaccent{T}}\n  \\DeclareUnicodeCharacter{1E6B}{\\dotaccent{t}}\n  \\DeclareUnicodeCharacter{1E6C}{\\udotaccent{T}}\n  \\DeclareUnicodeCharacter{1E6D}{\\udotaccent{t}}\n  \\DeclareUnicodeCharacter{1E6E}{\\ubaraccent{T}}\n  \\DeclareUnicodeCharacter{1E6F}{\\ubaraccent{t}}\n\n  \\DeclareUnicodeCharacter{1E7C}{\\~V}\n  \\DeclareUnicodeCharacter{1E7D}{\\~v}\n  \\DeclareUnicodeCharacter{1E7E}{\\udotaccent{V}}\n  \\DeclareUnicodeCharacter{1E7F}{\\udotaccent{v}}\n\n  \\DeclareUnicodeCharacter{1E80}{\\`W}\n  \\DeclareUnicodeCharacter{1E81}{\\`w}\n  \\DeclareUnicodeCharacter{1E82}{\\'W}\n  \\DeclareUnicodeCharacter{1E83}{\\'w}\n  \\DeclareUnicodeCharacter{1E84}{\\\"W}\n  \\DeclareUnicodeCharacter{1E85}{\\\"w}\n  \\DeclareUnicodeCharacter{1E86}{\\dotaccent{W}}\n  \\DeclareUnicodeCharacter{1E87}{\\dotaccent{w}}\n  \\DeclareUnicodeCharacter{1E88}{\\udotaccent{W}}\n  \\DeclareUnicodeCharacter{1E89}{\\udotaccent{w}}\n  \\DeclareUnicodeCharacter{1E8A}{\\dotaccent{X}}\n  \\DeclareUnicodeCharacter{1E8B}{\\dotaccent{x}}\n  \\DeclareUnicodeCharacter{1E8C}{\\\"X}\n  \\DeclareUnicodeCharacter{1E8D}{\\\"x}\n  \\DeclareUnicodeCharacter{1E8E}{\\dotaccent{Y}}\n  \\DeclareUnicodeCharacter{1E8F}{\\dotaccent{y}}\n\n  \\DeclareUnicodeCharacter{1E90}{\\^Z}\n  \\DeclareUnicodeCharacter{1E91}{\\^z}\n  \\DeclareUnicodeCharacter{1E92}{\\udotaccent{Z}}\n  \\DeclareUnicodeCharacter{1E93}{\\udotaccent{z}}\n  \\DeclareUnicodeCharacter{1E94}{\\ubaraccent{Z}}\n  \\DeclareUnicodeCharacter{1E95}{\\ubaraccent{z}}\n  \\DeclareUnicodeCharacter{1E96}{\\ubaraccent{h}}\n  \\DeclareUnicodeCharacter{1E97}{\\\"t}\n  \\DeclareUnicodeCharacter{1E98}{\\ringaccent{w}}\n  \\DeclareUnicodeCharacter{1E99}{\\ringaccent{y}}\n\n  \\DeclareUnicodeCharacter{1EA0}{\\udotaccent{A}}\n  \\DeclareUnicodeCharacter{1EA1}{\\udotaccent{a}}\n\n  \\DeclareUnicodeCharacter{1EB8}{\\udotaccent{E}}\n  \\DeclareUnicodeCharacter{1EB9}{\\udotaccent{e}}\n  \\DeclareUnicodeCharacter{1EBC}{\\~E}\n  \\DeclareUnicodeCharacter{1EBD}{\\~e}\n\n  \\DeclareUnicodeCharacter{1ECA}{\\udotaccent{I}}\n  \\DeclareUnicodeCharacter{1ECB}{\\udotaccent{i}}\n  \\DeclareUnicodeCharacter{1ECC}{\\udotaccent{O}}\n  \\DeclareUnicodeCharacter{1ECD}{\\udotaccent{o}}\n\n  \\DeclareUnicodeCharacter{1EE4}{\\udotaccent{U}}\n  \\DeclareUnicodeCharacter{1EE5}{\\udotaccent{u}}\n\n  \\DeclareUnicodeCharacter{1EF2}{\\`Y}\n  \\DeclareUnicodeCharacter{1EF3}{\\`y}\n  \\DeclareUnicodeCharacter{1EF4}{\\udotaccent{Y}}\n\n  \\DeclareUnicodeCharacter{1EF8}{\\~Y}\n  \\DeclareUnicodeCharacter{1EF9}{\\~y}\n\n  \\DeclareUnicodeCharacter{2013}{--}\n  \\DeclareUnicodeCharacter{2014}{---}\n  \\DeclareUnicodeCharacter{2018}{\\quoteleft}\n  \\DeclareUnicodeCharacter{2019}{\\quoteright}\n  \\DeclareUnicodeCharacter{201A}{\\quotesinglbase}\n  \\DeclareUnicodeCharacter{201C}{\\quotedblleft}\n  \\DeclareUnicodeCharacter{201D}{\\quotedblright}\n  \\DeclareUnicodeCharacter{201E}{\\quotedblbase}\n  \\DeclareUnicodeCharacter{2022}{\\bullet}\n  \\DeclareUnicodeCharacter{2026}{\\dots}\n  \\DeclareUnicodeCharacter{2039}{\\guilsinglleft}\n  \\DeclareUnicodeCharacter{203A}{\\guilsinglright}\n  \\DeclareUnicodeCharacter{20AC}{\\euro}\n\n  \\DeclareUnicodeCharacter{2192}{\\expansion}\n  \\DeclareUnicodeCharacter{21D2}{\\result}\n\n  \\DeclareUnicodeCharacter{2212}{\\minus}\n  \\DeclareUnicodeCharacter{2217}{\\point}\n  \\DeclareUnicodeCharacter{2261}{\\equiv}\n}% end of \\utfeightchardefs\n\n\n% US-ASCII character definitions.\n\\def\\asciichardefs{% nothing need be done\n   \\relax\n}\n\n% Make non-ASCII characters printable again for compatibility with\n% existing Texinfo documents that may use them, even without declaring a\n% document encoding.\n%\n\\setnonasciicharscatcode \\other\n\n\n\\message{formatting,}\n\n\\newdimen\\defaultparindent \\defaultparindent = 15pt\n\n\\chapheadingskip = 15pt plus 4pt minus 2pt\n\\secheadingskip = 12pt plus 3pt minus 2pt\n\\subsecheadingskip = 9pt plus 2pt minus 2pt\n\n% Prevent underfull vbox error messages.\n\\vbadness = 10000\n\n% Don't be very finicky about underfull hboxes, either.\n\\hbadness = 6666\n\n% Following George Bush, get rid of widows and orphans.\n\\widowpenalty=10000\n\\clubpenalty=10000\n\n% Use TeX 3.0's \\emergencystretch to help line breaking, but if we're\n% using an old version of TeX, don't do anything.  We want the amount of\n% stretch added to depend on the line length, hence the dependence on\n% \\hsize.  We call this whenever the paper size is set.\n%\n\\def\\setemergencystretch{%\n  \\ifx\\emergencystretch\\thisisundefined\n    % Allow us to assign to \\emergencystretch anyway.\n    \\def\\emergencystretch{\\dimen0}%\n  \\else\n    \\emergencystretch = .15\\hsize\n  \\fi\n}\n\n% Parameters in order: 1) textheight; 2) textwidth;\n% 3) voffset; 4) hoffset; 5) binding offset; 6) topskip;\n% 7) physical page height; 8) physical page width.\n%\n% We also call \\setleading{\\textleading}, so the caller should define\n% \\textleading.  The caller should also set \\parskip.\n%\n\\def\\internalpagesizes#1#2#3#4#5#6#7#8{%\n  \\voffset = #3\\relax\n  \\topskip = #6\\relax\n  \\splittopskip = \\topskip\n  %\n  \\vsize = #1\\relax\n  \\advance\\vsize by \\topskip\n  \\outervsize = \\vsize\n  \\advance\\outervsize by 2\\topandbottommargin\n  \\pageheight = \\vsize\n  %\n  \\hsize = #2\\relax\n  \\outerhsize = \\hsize\n  \\advance\\outerhsize by 0.5in\n  \\pagewidth = \\hsize\n  %\n  \\normaloffset = #4\\relax\n  \\bindingoffset = #5\\relax\n  %\n  \\ifpdf\n    \\pdfpageheight #7\\relax\n    \\pdfpagewidth #8\\relax\n    % if we don't reset these, they will remain at \"1 true in\" of\n    % whatever layout pdftex was dumped with.\n    \\pdfhorigin = 1 true in\n    \\pdfvorigin = 1 true in\n  \\fi\n  %\n  \\setleading{\\textleading}\n  %\n  \\parindent = \\defaultparindent\n  \\setemergencystretch\n}\n\n% @letterpaper (the default).\n\\def\\letterpaper{{\\globaldefs = 1\n  \\parskip = 3pt plus 2pt minus 1pt\n  \\textleading = 13.2pt\n  %\n  % If page is nothing but text, make it come out even.\n  \\internalpagesizes{607.2pt}{6in}% that's 46 lines\n                    {\\voffset}{.25in}%\n                    {\\bindingoffset}{36pt}%\n                    {11in}{8.5in}%\n}}\n\n% Use @smallbook to reset parameters for 7x9.25 trim size.\n\\def\\smallbook{{\\globaldefs = 1\n  \\parskip = 2pt plus 1pt\n  \\textleading = 12pt\n  %\n  \\internalpagesizes{7.5in}{5in}%\n                    {-.2in}{0in}%\n                    {\\bindingoffset}{16pt}%\n                    {9.25in}{7in}%\n  %\n  \\lispnarrowing = 0.3in\n  \\tolerance = 700\n  \\hfuzz = 1pt\n  \\contentsrightmargin = 0pt\n  \\defbodyindent = .5cm\n}}\n\n% Use @smallerbook to reset parameters for 6x9 trim size.\n% (Just testing, parameters still in flux.)\n\\def\\smallerbook{{\\globaldefs = 1\n  \\parskip = 1.5pt plus 1pt\n  \\textleading = 12pt\n  %\n  \\internalpagesizes{7.4in}{4.8in}%\n                    {-.2in}{-.4in}%\n                    {0pt}{14pt}%\n                    {9in}{6in}%\n  %\n  \\lispnarrowing = 0.25in\n  \\tolerance = 700\n  \\hfuzz = 1pt\n  \\contentsrightmargin = 0pt\n  \\defbodyindent = .4cm\n}}\n\n% Use @afourpaper to print on European A4 paper.\n\\def\\afourpaper{{\\globaldefs = 1\n  \\parskip = 3pt plus 2pt minus 1pt\n  \\textleading = 13.2pt\n  %\n  % Double-side printing via postscript on Laserjet 4050\n  % prints double-sided nicely when \\bindingoffset=10mm and \\hoffset=-6mm.\n  % To change the settings for a different printer or situation, adjust\n  % \\normaloffset until the front-side and back-side texts align.  Then\n  % do the same for \\bindingoffset.  You can set these for testing in\n  % your texinfo source file like this:\n  % @tex\n  % \\global\\normaloffset = -6mm\n  % \\global\\bindingoffset = 10mm\n  % @end tex\n  \\internalpagesizes{673.2pt}{160mm}% that's 51 lines\n                    {\\voffset}{\\hoffset}%\n                    {\\bindingoffset}{44pt}%\n                    {297mm}{210mm}%\n  %\n  \\tolerance = 700\n  \\hfuzz = 1pt\n  \\contentsrightmargin = 0pt\n  \\defbodyindent = 5mm\n}}\n\n% Use @afivepaper to print on European A5 paper.\n% From romildo@urano.iceb.ufop.br, 2 July 2000.\n% He also recommends making @example and @lisp be small.\n\\def\\afivepaper{{\\globaldefs = 1\n  \\parskip = 2pt plus 1pt minus 0.1pt\n  \\textleading = 12.5pt\n  %\n  \\internalpagesizes{160mm}{120mm}%\n                    {\\voffset}{\\hoffset}%\n                    {\\bindingoffset}{8pt}%\n                    {210mm}{148mm}%\n  %\n  \\lispnarrowing = 0.2in\n  \\tolerance = 800\n  \\hfuzz = 1.2pt\n  \\contentsrightmargin = 0pt\n  \\defbodyindent = 2mm\n  \\tableindent = 12mm\n}}\n\n% A specific text layout, 24x15cm overall, intended for A4 paper.\n\\def\\afourlatex{{\\globaldefs = 1\n  \\afourpaper\n  \\internalpagesizes{237mm}{150mm}%\n                    {\\voffset}{4.6mm}%\n                    {\\bindingoffset}{7mm}%\n                    {297mm}{210mm}%\n  %\n  % Must explicitly reset to 0 because we call \\afourpaper.\n  \\globaldefs = 0\n}}\n\n% Use @afourwide to print on A4 paper in landscape format.\n\\def\\afourwide{{\\globaldefs = 1\n  \\afourpaper\n  \\internalpagesizes{241mm}{165mm}%\n                    {\\voffset}{-2.95mm}%\n                    {\\bindingoffset}{7mm}%\n                    {297mm}{210mm}%\n  \\globaldefs = 0\n}}\n\n% @pagesizes TEXTHEIGHT[,TEXTWIDTH]\n% Perhaps we should allow setting the margins, \\topskip, \\parskip,\n% and/or leading, also. Or perhaps we should compute them somehow.\n%\n\\parseargdef\\pagesizes{\\pagesizesyyy #1,,\\finish}\n\\def\\pagesizesyyy#1,#2,#3\\finish{{%\n  \\setbox0 = \\hbox{\\ignorespaces #2}\\ifdim\\wd0 > 0pt \\hsize=#2\\relax \\fi\n  \\globaldefs = 1\n  %\n  \\parskip = 3pt plus 2pt minus 1pt\n  \\setleading{\\textleading}%\n  %\n  \\dimen0 = #1\\relax\n  \\advance\\dimen0 by \\voffset\n  %\n  \\dimen2 = \\hsize\n  \\advance\\dimen2 by \\normaloffset\n  %\n  \\internalpagesizes{#1}{\\hsize}%\n                    {\\voffset}{\\normaloffset}%\n                    {\\bindingoffset}{44pt}%\n                    {\\dimen0}{\\dimen2}%\n}}\n\n% Set default to letter.\n%\n\\letterpaper\n\n\n\\message{and turning on texinfo input format.}\n\n\\def^^L{\\par} % remove \\outer, so ^L can appear in an @comment\n\n% DEL is a comment character, in case @c does not suffice.\n\\catcode`\\^^? = 14\n\n% Define macros to output various characters with catcode for normal text.\n\\catcode`\\\"=\\other \\def\\normaldoublequote{\"}\n\\catcode`\\$=\\other \\def\\normaldollar{$}%$ font-lock fix\n\\catcode`\\+=\\other \\def\\normalplus{+}\n\\catcode`\\<=\\other \\def\\normalless{<}\n\\catcode`\\>=\\other \\def\\normalgreater{>}\n\\catcode`\\^=\\other \\def\\normalcaret{^}\n\\catcode`\\_=\\other \\def\\normalunderscore{_}\n\\catcode`\\|=\\other \\def\\normalverticalbar{|}\n\\catcode`\\~=\\other \\def\\normaltilde{~}\n\n% This macro is used to make a character print one way in \\tt\n% (where it can probably be output as-is), and another way in other fonts,\n% where something hairier probably needs to be done.\n%\n% #1 is what to print if we are indeed using \\tt; #2 is what to print\n% otherwise.  Since all the Computer Modern typewriter fonts have zero\n% interword stretch (and shrink), and it is reasonable to expect all\n% typewriter fonts to have this, we can check that font parameter.\n%\n\\def\\ifusingtt#1#2{\\ifdim \\fontdimen3\\font=0pt #1\\else #2\\fi}\n\n% Same as above, but check for italic font.  Actually this also catches\n% non-italic slanted fonts since it is impossible to distinguish them from\n% italic fonts.  But since this is only used by $ and it uses \\sl anyway\n% this is not a problem.\n\\def\\ifusingit#1#2{\\ifdim \\fontdimen1\\font>0pt #1\\else #2\\fi}\n\n% Turn off all special characters except @\n% (and those which the user can use as if they were ordinary).\n% Most of these we simply print from the \\tt font, but for some, we can\n% use math or other variants that look better in normal text.\n\n\\catcode`\\\"=\\active\n\\def\\activedoublequote{{\\tt\\char34}}\n\\let\"=\\activedoublequote\n\\catcode`\\~=\\active\n\\def~{{\\tt\\char126}}\n\\chardef\\hat=`\\^\n\\catcode`\\^=\\active\n\\def^{{\\tt \\hat}}\n\n\\catcode`\\_=\\active\n\\def_{\\ifusingtt\\normalunderscore\\_}\n\\let\\realunder=_\n% Subroutine for the previous macro.\n\\def\\_{\\leavevmode \\kern.07em \\vbox{\\hrule width.3em height.1ex}\\kern .07em }\n\n\\catcode`\\|=\\active\n\\def|{{\\tt\\char124}}\n\\chardef \\less=`\\<\n\\catcode`\\<=\\active\n\\def<{{\\tt \\less}}\n\\chardef \\gtr=`\\>\n\\catcode`\\>=\\active\n\\def>{{\\tt \\gtr}}\n\\catcode`\\+=\\active\n\\def+{{\\tt \\char 43}}\n\\catcode`\\$=\\active\n\\def${\\ifusingit{{\\sl\\$}}\\normaldollar}%$ font-lock fix\n\n% If a .fmt file is being used, characters that might appear in a file\n% name cannot be active until we have parsed the command line.\n% So turn them off again, and have \\everyjob (or @setfilename) turn them on.\n% \\otherifyactive is called near the end of this file.\n\\def\\otherifyactive{\\catcode`+=\\other \\catcode`\\_=\\other}\n\n% Used sometimes to turn off (effectively) the active characters even after\n% parsing them.\n\\def\\turnoffactive{%\n  \\normalturnoffactive\n  \\otherbackslash\n}\n\n\\catcode`\\@=0\n\n% \\backslashcurfont outputs one backslash character in current font,\n% as in \\char`\\\\.\n\\global\\chardef\\backslashcurfont=`\\\\\n\\global\\let\\rawbackslashxx=\\backslashcurfont  % let existing .??s files work\n\n% \\realbackslash is an actual character `\\' with catcode other, and\n% \\doublebackslash is two of them (for the pdf outlines).\n{\\catcode`\\\\=\\other @gdef@realbackslash{\\} @gdef@doublebackslash{\\\\}}\n\n% In texinfo, backslash is an active character; it prints the backslash\n% in fixed width font.\n\\catcode`\\\\=\\active  % @ for escape char from now on.\n\n% The story here is that in math mode, the \\char of \\backslashcurfont\n% ends up printing the roman \\ from the math symbol font (because \\char\n% in math mode uses the \\mathcode, and plain.tex sets\n% \\mathcode`\\\\=\"026E).  It seems better for @backslashchar{} to always\n% print a typewriter backslash, hence we use an explicit \\mathchar,\n% which is the decimal equivalent of \"715c (class 7, e.g., use \\fam;\n% ignored family value; char position \"5C).  We can't use \" for the\n% usual hex value because it has already been made active.\n@def@normalbackslash{{@tt @ifmmode @mathchar29020 @else @backslashcurfont @fi}}\n@let@backslashchar = @normalbackslash % @backslashchar{} is for user documents.\n\n% On startup, @fixbackslash assigns:\n%  @let \\ = @normalbackslash\n% \\rawbackslash defines an active \\ to do \\backslashcurfont.\n% \\otherbackslash defines an active \\ to be a literal `\\' character with\n% catcode other.  We switch back and forth between these.\n@gdef@rawbackslash{@let\\=@backslashcurfont}\n@gdef@otherbackslash{@let\\=@realbackslash}\n\n% Same as @turnoffactive except outputs \\ as {\\tt\\char`\\\\} instead of\n% the literal character `\\'.  Also revert - to its normal character, in\n% case the active - from code has slipped in.\n%\n{@catcode`- = @active\n @gdef@normalturnoffactive{%\n   @let-=@normaldash\n   @let\"=@normaldoublequote\n   @let$=@normaldollar %$ font-lock fix\n   @let+=@normalplus\n   @let<=@normalless\n   @let>=@normalgreater\n   @let\\=@normalbackslash\n   @let^=@normalcaret\n   @let_=@normalunderscore\n   @let|=@normalverticalbar\n   @let~=@normaltilde\n   @markupsetuplqdefault\n   @markupsetuprqdefault\n   @unsepspaces\n }\n}\n\n% Make _ and + \\other characters, temporarily.\n% This is canceled by @fixbackslash.\n@otherifyactive\n\n% If a .fmt file is being used, we don't want the `\\input texinfo' to show up.\n% That is what \\eatinput is for; after that, the `\\' should revert to printing\n% a backslash.\n%\n@gdef@eatinput input texinfo{@fixbackslash}\n@global@let\\ = @eatinput\n\n% On the other hand, perhaps the file did not have a `\\input texinfo'. Then\n% the first `\\' in the file would cause an error. This macro tries to fix\n% that, assuming it is called before the first `\\' could plausibly occur.\n% Also turn back on active characters that might appear in the input\n% file name, in case not using a pre-dumped format.\n%\n@gdef@fixbackslash{%\n  @ifx\\@eatinput @let\\ = @normalbackslash @fi\n  @catcode`+=@active\n  @catcode`@_=@active\n}\n\n% Say @foo, not \\foo, in error messages.\n@escapechar = `@@\n\n% These (along with & and #) are made active for url-breaking, so need\n% active definitions as the normal characters.\n@def@normaldot{.}\n@def@normalquest{?}\n@def@normalslash{/}\n\n% These look ok in all fonts, so just make them not special.\n% @hashchar{} gets its own user-level command, because of #line.\n@catcode`@& = @other @def@normalamp{&}\n@catcode`@# = @other @def@normalhash{#}\n@catcode`@% = @other @def@normalpercent{%}\n\n@let @hashchar = @normalhash\n\n@c Finally, make ` and ' active, so that txicodequoteundirected and\n@c txicodequotebacktick work right in, e.g., @w{@code{`foo'}}.  If we\n@c don't make ` and ' active, @code will not get them as active chars.\n@c Do this last of all since we use ` in the previous @catcode assignments.\n@catcode`@'=@active\n@catcode`@`=@active\n@markupsetuplqdefault\n@markupsetuprqdefault\n\n@c Local variables:\n@c eval: (add-hook 'write-file-hooks 'time-stamp)\n@c page-delimiter: \"^\\\\\\\\message\"\n@c time-stamp-start: \"def\\\\\\\\texinfoversion{\"\n@c time-stamp-format: \"%:y-%02m-%02d.%02H\"\n@c time-stamp-end: \"}\"\n@c End:\n\n@c vim:sw=2:\n\n@ignore\n   arch-tag: e1b36e32-c96e-4135-a41a-0b2efa2ea115\n@end ignore\n"
  },
  {
    "path": "doc/version.texi",
    "content": "@set UPDATED 18 February 2017\n@set UPDATED-MONTH February 2017\n@set EDITION 3.0.0\n@set VERSION 3.0.0\n"
  },
  {
    "path": "errno.c",
    "content": "/* gmp_errno, __gmp_exception -- exception handling and reporting.\n\n   THE FUNCTIONS IN THIS FILE, APART FROM gmp_errno, ARE FOR INTERNAL USE\n   ONLY.  THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR\n   DISAPPEAR COMPLETELY IN FUTURE GNU MP RELEASES.\n\nCopyright 2000, 2001, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint gmp_errno = 0;\n\n/* The deliberate divide by zero triggers an exception on most systems.  On\n   those where it doesn't, for example power and powerpc, use abort instead.\n\n   Enhancement: Perhaps raise(SIGFPE) (or the same with kill()) would be\n   better than abort.  Perhaps it'd be possible to get the BSD style\n   FPE_INTDIV_TRAP parameter in there too.  */\n\nvoid\n__gmp_exception (int error_bit)\n{\n  __gmp_junk = 10 / __gmp_0;\n  abort ();\n}\n\n\n/* These functions minimize the amount of code required in functions raising\n   exceptions.  Since they're \"noreturn\" and don't take any parameters, a\n   test and call might even come out as a simple conditional jump.  */\nvoid\n__gmp_sqrt_of_negative (void)\n{\n  __gmp_exception (GMP_ERROR_SQRT_OF_NEGATIVE);\n}\nvoid\n__gmp_divide_by_zero (void)\n{\n  __gmp_exception (GMP_ERROR_DIVISION_BY_ZERO);\n}\n"
  },
  {
    "path": "extract-dbl.c",
    "content": "/* __gmp_extract_double -- convert from double to array of mp_limb_t.\n\nCopyright 1996, 1999, 2000, 2001, 2002, 2006 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#define BITS_IN_MANTISSA 53\n\n/* Extract a non-negative double in d.  */\n\nint\n__gmp_extract_double (mp_ptr rp, double d)\n{\n  long exp;\n  unsigned sc;\n#ifdef _LONG_LONG_LIMB\n#define BITS_PER_PART 64\t/* somewhat bogus */\n  unsigned long long int manl;\n#else\n#define BITS_PER_PART GMP_LIMB_BITS\n  unsigned long int manh, manl;\n#endif\n\n  /* BUGS\n\n     1. Should handle Inf and NaN in IEEE specific code.\n     2. Handle Inf and NaN also in default code, to avoid hangs.\n     3. Generalize to handle all GMP_LIMB_BITS >= 32.\n     4. This lits is incomplete and misspelled.\n   */\n\n  ASSERT (d >= 0.0);\n\n  if (d == 0.0)\n    {\n      MPN_ZERO (rp, LIMBS_PER_DOUBLE);\n      return 0;\n    }\n\n#if 1\n  {\n    union ieee_double_extract x;\n    x.d = d;\n    exp = x.s.exp;\n#if BITS_PER_PART == 64\t\t/* generalize this to BITS_PER_PART > BITS_IN_MANTISSA */\n    manl = (((mp_limb_t) 1 << 63)\n\t    | ((mp_limb_t) x.s.manh << 43) | ((mp_limb_t) x.s.manl << 11));\n    if (exp == 0)\n      {\n\t/* Denormalized number.  Don't try to be clever about this,\n\t   since it is not an important case to make fast.  */\n\texp = 1;\n\tdo\n\t  {\n\t    manl = manl << 1;\n\t    exp--;\n\t  }\n\twhile ((manl & GMP_LIMB_HIGHBIT) == 0);\n      }\n#endif\n#if BITS_PER_PART == 32\n    manh = ((mp_limb_t) 1 << 31) | (x.s.manh << 11) | (x.s.manl >> 21);\n    manl = x.s.manl << 11;\n    if (exp == 0)\n      {\n\t/* Denormalized number.  Don't try to be clever about this,\n\t   since it is not an important case to make fast.  */\n\texp = 1;\n\tdo\n\t  {\n\t    manh = (manh << 1) | (manl >> 31);\n\t    manl = manl << 1;\n\t    exp--;\n\t  }\n\twhile ((manh & GMP_LIMB_HIGHBIT) == 0);\n      }\n#endif\n#if BITS_PER_PART != 32 && BITS_PER_PART != 64\n  You need to generalize the code above to handle this.\n#endif\n    exp -= 1022;\t\t/* Remove IEEE bias.  */\n  }\n#else\n  {\n    /* Unknown (or known to be non-IEEE) double format.  */\n    exp = 0;\n    if (d >= 1.0)\n      {\n\tASSERT_ALWAYS (d * 0.5 != d);\n\n\twhile (d >= 32768.0)\n\t  {\n\t    d *= (1.0 / 65536.0);\n\t    exp += 16;\n\t  }\n\twhile (d >= 1.0)\n\t  {\n\t    d *= 0.5;\n\t    exp += 1;\n\t  }\n      }\n    else if (d < 0.5)\n      {\n\twhile (d < (1.0 / 65536.0))\n\t  {\n\t    d *=  65536.0;\n\t    exp -= 16;\n\t  }\n\twhile (d < 0.5)\n\t  {\n\t    d *= 2.0;\n\t    exp -= 1;\n\t  }\n      }\n\n    d *= (4.0 * ((unsigned long int) 1 << (BITS_PER_PART - 2)));\n#if BITS_PER_PART == 64\n    manl = d;\n#else\n    manh = d;\n    manl = (d - manh) * (4.0 * ((unsigned long int) 1 << (BITS_PER_PART - 2)));\n#endif\n  }\n#endif /* IEEE */\n\n  /* Up until here, we have ignored the actual limb size.  Remains\n     to split manh,,manl into an array of LIMBS_PER_DOUBLE limbs.\n  */\n\n  sc = (unsigned) (exp + 64 * GMP_NUMB_BITS) % GMP_NUMB_BITS;\n\n  /* We add something here to get rounding right.  */\n  exp = (exp + 64 * GMP_NUMB_BITS) / GMP_NUMB_BITS - 64 * GMP_NUMB_BITS / GMP_NUMB_BITS + 1;\n\n#if LIMBS_PER_DOUBLE == 2\n#if GMP_NAIL_BITS == 0\n  if (sc != 0)\n    {\n      rp[1] = manl >> (GMP_LIMB_BITS - sc);\n      rp[0] = manl << sc;\n    }\n  else\n    {\n      rp[1] = manl;\n      rp[0] = 0;\n      exp--;\n    }\n#else\n  if (sc > GMP_NAIL_BITS)\n    {\n      rp[1] = manl >> (GMP_LIMB_BITS - sc);\n      rp[0] = (manl << (sc - GMP_NAIL_BITS)) & GMP_NUMB_MASK;\n    }\n  else\n    {\n      if (sc == 0)\n\t{\n\t  rp[1] = manl >> GMP_NAIL_BITS;\n\t  rp[0] = (manl << GMP_NUMB_BITS - GMP_NAIL_BITS) & GMP_NUMB_MASK;\n\t  exp--;\n\t}\n      else\n\t{\n\t  rp[1] = manl >> (GMP_LIMB_BITS - sc);\n\t  rp[0] = (manl >> (GMP_NAIL_BITS - sc)) & GMP_NUMB_MASK;\n\t}\n    }\n#endif\n#endif\n\n#if LIMBS_PER_DOUBLE == 3\n#if GMP_NAIL_BITS == 0\n  if (sc != 0)\n    {\n      rp[2] = manh >> (GMP_LIMB_BITS - sc);\n      rp[1] = (manh << sc) | (manl >> (GMP_LIMB_BITS - sc));\n      rp[0] = manl << sc;\n    }\n  else\n    {\n      rp[2] = manh;\n      rp[1] = manl;\n      rp[0] = 0;\n      exp--;\n    }\n#else\n  if (sc > GMP_NAIL_BITS)\n    {\n      rp[2] = (manh >> (GMP_LIMB_BITS - sc));\n      rp[1] = ((manh << (sc - GMP_NAIL_BITS)) |\n\t       (manl >> (GMP_LIMB_BITS - sc + GMP_NAIL_BITS))) & GMP_NUMB_MASK;\n      if (sc >= 2 * GMP_NAIL_BITS)\n\trp[0] = (manl << sc - 2 * GMP_NAIL_BITS) & GMP_NUMB_MASK;\n      else\n\trp[0] = manl >> (2 * GMP_NAIL_BITS - sc) & GMP_NUMB_MASK;\n    }\n  else\n    {\n      if (sc == 0)\n\t{\n\t  rp[2] = manh >> GMP_NAIL_BITS;\n\t  rp[1] = ((manh << GMP_NUMB_BITS - GMP_NAIL_BITS) | (manl >> 2 * GMP_NAIL_BITS)) & GMP_NUMB_MASK;\n\t  rp[0] = 0;\n\t  exp--;\n\t}\n      else\n\t{\n\t  rp[2] = (manh >> (GMP_LIMB_BITS - sc));\n\t  rp[1] = (manh >> (GMP_NAIL_BITS - sc)) & GMP_NUMB_MASK;\n\t  rp[0] = ((manh << (GMP_NUMB_BITS - GMP_NAIL_BITS + sc))\n\t\t   | (manl >> (GMP_LIMB_BITS - (GMP_NUMB_BITS - GMP_NAIL_BITS + sc)))) & GMP_NUMB_MASK;\n\t}\n    }\n#endif\n#endif\n\n#if LIMBS_PER_DOUBLE > 3\n  if (sc == 0)\n    {\n      int i;\n\n      for (i = LIMBS_PER_DOUBLE - 1; i >= 0; i--)\n\t{\n\t  rp[i] = manh >> (BITS_PER_ULONG - GMP_NUMB_BITS);\n\t  manh = ((manh << GMP_NUMB_BITS)\n\t\t  | (manl >> (BITS_PER_ULONG - GMP_NUMB_BITS)));\n\t  manl = manl << GMP_NUMB_BITS;\n\t}\n      exp--;\n    }\n  else\n    {\n      int i;\n\n      rp[LIMBS_PER_DOUBLE - 1] = (manh >> (GMP_LIMB_BITS - sc));\n      manh = (manh << sc) | (manl >> (GMP_LIMB_BITS - sc));\n      manl = (manl << sc);\n      for (i = LIMBS_PER_DOUBLE - 2; i >= 0; i--)\n\t{\n\t  rp[i] = manh >> (BITS_PER_ULONG - GMP_NUMB_BITS);\n\t  manh = ((manh << GMP_NUMB_BITS)\n\t\t  | (manl >> (BITS_PER_ULONG - GMP_NUMB_BITS)));\n\t  manl = manl << GMP_NUMB_BITS;\n\t}\n  }\n#endif\n\n  return exp;\n}\n"
  },
  {
    "path": "fft/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2003 Free Software\n# Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -D__GMP_WITHIN_GMP -I$(top_srcdir)\n\nEXTRA_DIST = \n\nnoinst_LTLIBRARIES = libfft.la\nlibfft_la_SOURCES = adjust.c adjust_sqrt2.c butterfly_lshB.c butterfly_rshB.c combine_bits.c div_2expmod_2expp1.c fermat_to_mpz.c fft_mfa_trunc_sqrt2.c fft_mfa_trunc_sqrt2_inner.c fft_negacyclic.c fft_radix2.c fft_trunc.c fft_trunc_sqrt2.c ifft_mfa_trunc_sqrt2.c ifft_negacyclic.c ifft_radix2.c ifft_trunc.c ifft_trunc_sqrt2.c mul_2expmod_2expp1.c mul_fft_main.c mul_mfa_trunc_sqrt2.c mul_trunc_sqrt2.c mulmod_2expp1.c normmod_2expp1.c revbin.c split_bits.c\n"
  },
  {
    "path": "fft/adjust.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n      \nvoid mpir_fft_adjust(mp_ptr r, mp_ptr i1, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w)\n{\n   mp_bitcnt_t b1;\n   mp_limb_t cy;\n   mp_size_t x;\n\n   b1 = i*w;\n   x  = b1/GMP_LIMB_BITS;\n   b1 = b1%GMP_LIMB_BITS;\n\n   if (x)\n   {\n      mpn_copyi(r + x, i1, limbs - x);\n      r[limbs] = 0;\n      cy = mpn_neg_n(r, i1 + limbs - x, x);\n      mpn_addmod_2expp1_1(r + x, limbs - x, -i1[limbs]);\n      mpn_sub_1(r + x, r + x, limbs - x + 1, cy); \n      mpn_mul_2expmod_2expp1(r, r, limbs, b1);\n   } else\n      mpn_mul_2expmod_2expp1(r, i1, limbs, b1);\n}\n"
  },
  {
    "path": "fft/adjust_sqrt2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n      \nvoid mpir_fft_adjust_sqrt2(mp_ptr r, mp_ptr i1, \n            mp_size_t i, mp_size_t limbs, mp_bitcnt_t w, mp_ptr temp)\n{\n   mp_bitcnt_t wn = limbs*GMP_LIMB_BITS;\n   mp_limb_t cy;\n   mp_size_t j = i/2, k = w/2;\n   mp_size_t y;\n   mp_bitcnt_t b1;\n   int negate = 0;\n\n   b1 = j + wn/4 + i*k;\n   if (b1 >= wn) \n   {\n      negate = 1;\n      b1 -= wn;\n   }\n   y  = b1/GMP_LIMB_BITS;\n   b1 = b1%GMP_LIMB_BITS;\n \n   /* multiply by 2^{j + wn/4 + i*k} */\n   if (y)\n   {\n      mpn_copyi(temp + y, i1, limbs - y);\n      cy = mpn_neg_n(temp, i1 + limbs - y, y);\n      temp[limbs] = 0;\n      mpn_addmod_2expp1_1(temp + y, limbs - y, -i1[limbs]);\n      mpn_sub_1(temp + y, temp + y, limbs - y + 1, cy); \n      mpn_mul_2expmod_2expp1(r, temp, limbs, b1);\n   } else\n      mpn_mul_2expmod_2expp1(r, i1, limbs, b1);\n\n   /* multiply by 2^{wn/2} */\n   y = limbs/2;  \n   cy = 0;\n\n   mpn_copyi(temp + y, r, limbs - y);\n   temp[limbs] = 0;\n   if(y) cy = mpn_neg_n(temp, r + limbs - y, y);\n   mpn_addmod_2expp1_1(temp + y, limbs - y, -r[limbs]);\n   mpn_sub_1(temp + y, temp + y, limbs - y + 1, cy); \n   \n   /* shift by an additional half limb (rare) */\n   if (limbs & 1) \n       mpn_mul_2expmod_2expp1(temp, temp, limbs, GMP_LIMB_BITS/2);\n\n   /* subtract */\n   if (negate)\n      mpn_sub_n(r, r, temp, limbs + 1);\n   else\n      mpn_sub_n(r, temp, r, limbs + 1);\n}\n\n"
  },
  {
    "path": "fft/butterfly_lshB.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid mpir_butterfly_lshB(mp_ptr t, mp_ptr u, mp_ptr i1, \n                       mp_ptr i2, mp_size_t limbs, mp_size_t x, mp_size_t y)\n{\n   mp_limb_t cy, cy1;\n#ifdef HAVE_NATIVE_mpn_nsumdiff_n\n   const mp_limb_t cy2 = 0;\n#else\n   mp_limb_t cy2;\n#endif\n\n   if (x == 0)\n   {\n      if (y == 0)\n         cy = mpn_sumdiff_n(t + x, u + y, i1, i2, limbs + 1);\n      else\n      {\n         cy = mpn_sumdiff_n(t, u + y, i1, i2, limbs - y);\n         u[limbs] = -(cy&1);\n         cy1 = cy>>1;\n         cy = mpn_sumdiff_n(t + limbs - y, u, i2 + limbs - y, i1 + limbs - y, y);\n         t[limbs] = cy>>1;\n         mpn_add_1(t + limbs - y, t + limbs - y, y + 1, cy1);\n         cy1 = -(cy&1) + (i2[limbs] - i1[limbs]);\n         mpn_addmod_2expp1_1(u + y, limbs - y, cy1);\n         cy1 = -(i1[limbs] + i2[limbs]);\n         mpn_addmod_2expp1_1(t, limbs, cy1);\n      }\n   } else if (y == 0)\n   {\n      cy = mpn_sumdiff_n(t + x, u, i1, i2, limbs - x);\n      t[limbs] = cy>>1;\n      cy1 = cy&1;\n#ifdef HAVE_NATIVE_mpn_nsumdiff_n\n      cy = mpn_nsumdiff_n(t, u + limbs - x, i1 + limbs - x, i2 + limbs - x, x);\n#else\n      cy = mpn_sumdiff_n(t, u + limbs - x, i1 + limbs - x, i2 + limbs - x, x);\n      cy2 = mpn_neg_n(t, t, x);\n#endif\n      u[limbs] = -(cy&1);\n      mpn_sub_1(u + limbs - x, u + limbs - x, x + 1, cy1);\n      cy1 = -(cy>>1) - cy2;\n      cy1 -= (i1[limbs] + i2[limbs]);\n      mpn_addmod_2expp1_1(t + x, limbs - x, cy1);\n      cy1 = (i2[limbs] - i1[limbs]);\n      mpn_addmod_2expp1_1(u, limbs, cy1);\n   } else if (x > y)\n   {\n      cy = mpn_sumdiff_n(t + x, u + y, i1, i2, limbs - x);\n      t[limbs] = cy>>1;\n      cy1 = cy&1;\n#ifdef HAVE_NATIVE_mpn_nsumdiff_n\n      cy = mpn_nsumdiff_n(t, u + y + limbs - x, i1 + limbs - x, i2 + limbs - x, x - y);\n#else\n      cy = mpn_sumdiff_n(t, u + y + limbs - x, i1 + limbs - x, i2 + limbs - x, x - y);\n      cy2 = mpn_neg_n(t, t, x - y);\n#endif\n      u[limbs] = -(cy&1);\n      mpn_sub_1(u + y + limbs - x, u + y + limbs - x, x - y + 1, cy1);\n      cy1 = (cy>>1) + cy2;\n#ifdef HAVE_NATIVE_mpn_nsumdiff_n\n      cy = mpn_nsumdiff_n(t + x - y, u, i2 + limbs - y, i1 + limbs - y, y);\n#else\n      cy = mpn_sumdiff_n(t + x - y, u, i2 + limbs - y, i1 + limbs - y, y);\n      cy2 = mpn_neg_n(t + x - y, t + x - y, y);\n#endif\n      cy1 = -(cy>>1) - mpn_sub_1(t + x - y, t + x - y, y, cy1) - cy2;\n      cy1 -= (i1[limbs] + i2[limbs]);\n      mpn_addmod_2expp1_1(t + x, limbs - x, cy1);\n      cy1 = -(cy&1) + (i2[limbs] - i1[limbs]);\n      mpn_addmod_2expp1_1(u + y, limbs - y, cy1);\n   } else if (x < y)\n   {\n      cy = mpn_sumdiff_n(t + x, u + y, i1, i2, limbs - y);\n      u[limbs] = -(cy&1);\n      cy1 = cy>>1;\n      cy = mpn_sumdiff_n(t + x + limbs - y, u, i2 + limbs - y, i1 + limbs - y, y - x);\n      t[limbs] = cy>>1;\n      mpn_add_1(t + x + limbs - y, t + x + limbs - y, y - x + 1, cy1);\n      cy1 = cy&1;\n#ifdef HAVE_NATIVE_mpn_nsumdiff_n\n      cy = mpn_nsumdiff_n(t, u + y - x, i2 + limbs - x, i1 + limbs - x, x);\n#else\n      cy = mpn_sumdiff_n(t, u + y - x, i2 + limbs - x, i1 + limbs - x, x);\n#endif\n      cy1 = -(cy&1) - mpn_sub_1(u + y - x, u + y - x, x, cy1);\n      cy1 += (i2[limbs] - i1[limbs]);\n      mpn_addmod_2expp1_1(u + y, limbs - y, cy1);\n#ifndef HAVE_NATIVE_mpn_nsumdiff_n\n      cy2 = mpn_neg_n(t, t, x);\n#endif\n      cy1 = -(cy>>1) - (i1[limbs] + i2[limbs]) - cy2;\n      mpn_addmod_2expp1_1(t + x, limbs - x, cy1);\n   } else /* x == y */\n   {\n      cy = mpn_sumdiff_n(t + x, u + x, i1, i2, limbs - x);\n      t[limbs] = cy>>1;\n      u[limbs] = -(cy&1);\n#ifdef HAVE_NATIVE_mpn_nsumdiff_n\n      cy = mpn_nsumdiff_n(t, u, i2 + limbs - x, i1 + limbs - x, x);\n#else\n      cy = mpn_sumdiff_n(t, u, i2 + limbs - x, i1 + limbs - x, x);\n      cy2 = mpn_neg_n(t, t, x);\n#endif\n      cy1 = -(cy>>1) - (i1[limbs] + i2[limbs]) - cy2;\n      mpn_addmod_2expp1_1(t + x, limbs - x, cy1);\n      cy1 = -(cy&1) + i2[limbs] - i1[limbs];\n      mpn_addmod_2expp1_1(u + x, limbs - x, cy1);\n  }\n}\n\n"
  },
  {
    "path": "fft/butterfly_rshB.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid mpir_butterfly_rshB(mp_ptr t, mp_ptr u, mp_ptr i1, \n                       mp_ptr i2, mp_size_t limbs, mp_size_t x, mp_size_t y)\n{\n   mp_limb_t cy, cy1, cy2, cy3;\n\n   if (x == 0)\n   {\n      if (y == 0)\n      {\n         cy = mpn_sumdiff_n(t, u, i1, i2, limbs + 1);     \n      } \n      else /* y != 0 */\n      {\n         cy = mpn_sumdiff_n(t, u, i1, i2 + y, limbs - y);\n         cy1 = (cy>>1);\n         cy2 = -(cy&1);\n         cy = mpn_sumdiff_n(u + limbs - y, t + limbs - y, i1 + limbs - y, i2, y);\n         u[limbs] = (cy>>1) + i1[limbs];\n         t[limbs] = i1[limbs] - (cy&1);\n         mpn_addmod_2expp1_1(t + limbs - y, y, cy1 + i2[limbs]);\n         mpn_addmod_2expp1_1(u + limbs - y, y, cy2 - i2[limbs]);\n      }\n   } else if (y == 0) /* x != 0 */\n   {\n      cy = mpn_sumdiff_n(t, u, i1 + x, i2, limbs - x);\n      cy1 = (cy>>1);\n      cy2 = -(cy&1);\n      cy3 = mpn_neg_n(i1, i1, x);\n      cy = mpn_sumdiff_n(t + limbs - x, u + limbs - x, i1, i2 + limbs - x, x);\n      u[limbs] = -cy3 - (cy&1) - i2[limbs];\n      t[limbs] = -cy3 + i2[limbs] + (cy>>1);\n      mpn_addmod_2expp1_1(t + limbs - x, x, cy1 + i1[limbs]);\n      mpn_addmod_2expp1_1(u + limbs - x, x, cy2 + i1[limbs]);\n   } else if (x == y)\n   {\n      cy = mpn_sumdiff_n(t, u, i1 + x, i2 + x, limbs - x);\n      cy1 = (cy>>1);\n      cy2 = -(cy&1);\n#ifdef HAVE_NATIVE_mpn_nsumdiff_n\n      cy = mpn_nsumdiff_n(t + limbs - x, u + limbs - x, i2, i1, x);\n      cy3 = 0;\n#else\n      cy = mpn_sumdiff_n(t + limbs - x, u + limbs - x, i2, i1, x);\n      cy3 = mpn_neg_n(t + limbs - x, t + limbs - x, x);\n#endif\n      u[limbs] = -(cy&1);\n      t[limbs] = -(cy>>1) - cy3;\n      mpn_addmod_2expp1_1(t + limbs - x, x, cy1 + i1[limbs] + i2[limbs]);\n      mpn_addmod_2expp1_1(u + limbs - x, x, cy2 + i1[limbs] - i2[limbs]);\n   } else if (x > y)\n   {\n#ifdef HAVE_NATIVE_mpn_nsumdiff_n\n      cy = mpn_nsumdiff_n(t + limbs - y, u + limbs - y, i2, i1 + x - y, y);\n      cy3 = 0;\n#else\n      cy = mpn_sumdiff_n(t + limbs - y, u + limbs - y, i2, i1 + x - y, y);\n      cy3 = mpn_neg_n(t + limbs - y, t + limbs - y, y);\n#endif\n      t[limbs] = -(cy>>1) - cy3;\n      u[limbs] = -(cy&1);\n      cy3 = mpn_neg_n(i1, i1, x - y);\n      cy = mpn_sumdiff_n(t + limbs - x, u + limbs - x, i1, i2 + limbs - x + y, x - y);\n      mpn_addmod_2expp1_1(t + limbs - y, y, (cy>>1) + i2[limbs] - cy3);\n      mpn_addmod_2expp1_1(u + limbs - y, y, -(cy&1) - i2[limbs] - cy3);\n      cy = mpn_sumdiff_n(t, u, i1 + x, i2 + y, limbs - x);\n      mpn_addmod_2expp1_1(t + limbs - x, x, (cy>>1) + i1[limbs]);\n      mpn_addmod_2expp1_1(u + limbs - x, x, -(cy&1) + i1[limbs]);\n   } else /* x < y */\n   {\n#ifdef HAVE_NATIVE_mpn_nsumdiff_n\n      cy = mpn_nsumdiff_n(t + limbs - x, u + limbs - x, i2 + y - x, i1, x);\n      cy3 = 0;\n#else\n      cy = mpn_sumdiff_n(t + limbs - x, u + limbs - x, i2 + y - x, i1, x);\n      cy3 = mpn_neg_n(t + limbs - x, t + limbs - x, x);\n#endif\n      t[limbs] = -(cy>>1) - cy3;\n      u[limbs] = -(cy&1);\n      cy3 = mpn_neg_n(i2, i2, y - x);\n      cy = mpn_sumdiff_n(t + limbs - y, u + limbs - y, i1 + limbs - y + x, i2, y - x);\n      mpn_addmod_2expp1_1(t + limbs - x, x, (cy>>1) + i1[limbs] - cy3);\n      mpn_addmod_2expp1_1(u + limbs - x, x, -(cy&1) + i1[limbs] + cy3);\n      cy = mpn_sumdiff_n(t, u, i1 + x, i2 + y, limbs - y);\n      mpn_addmod_2expp1_1(t + limbs - y, y, (cy>>1) + i2[limbs]);\n      mpn_addmod_2expp1_1(u + limbs - y, y, -(cy&1) - i2[limbs]);      \n   }\n}\n"
  },
  {
    "path": "fft/combine_bits.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"stdlib.h\"\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid \nmpir_fft_combine_limbs(mp_ptr res, const mp_ptr * poly, long length, mp_size_t coeff_limbs, mp_size_t output_limbs, mp_size_t total_limbs)\n{\n   mp_size_t skip, i;\n   \n   for (skip = 0, i = 0; i < length && skip + output_limbs + 1 <= total_limbs; i++, skip += coeff_limbs)\n      mpn_add(res + skip, res + skip, output_limbs + 1, poly[i], output_limbs); \n\n   while ((skip < total_limbs) && (i < length))\n   {\n      mpn_add(res + skip, res + skip, total_limbs - skip, poly[i], MIN(total_limbs - skip, output_limbs));\n      \n      i++;\n      \n      skip += coeff_limbs;\n   }  \n}\n\nvoid \nmpir_fft_combine_bits(mp_ptr res, const mp_ptr * poly, long length, mp_bitcnt_t bits, mp_size_t output_limbs, mp_size_t total_limbs)\n{\n   mp_bitcnt_t shift_bits, top_bits = ((GMP_LIMB_BITS - 1) & bits);\n   mp_size_t coeff_limbs, i;\n   mp_limb_t * temp, * limb_ptr, * end;\n   TMP_DECL;\n\n   if (top_bits == 0)\n   {\n      mpir_fft_combine_limbs(res, poly, length, bits/GMP_LIMB_BITS, output_limbs, total_limbs);\n      return;\n   }\n   \n   TMP_MARK;\n   coeff_limbs = (bits/GMP_LIMB_BITS) + 1;\n   temp = TMP_BALLOC_LIMBS(output_limbs + 1);\n   shift_bits = 0;\n   limb_ptr = res;\n   end = res + total_limbs;\n   \n   for (i = 0; i < length && limb_ptr + output_limbs + 1 < end; i++)\n   { \n      if (shift_bits)\n      {\n         mpn_lshift(temp, poly[i], output_limbs + 1, shift_bits);\n         mpn_add_n(limb_ptr, limb_ptr, temp, output_limbs + 1);\n      } else\n         mpn_add(limb_ptr, limb_ptr, output_limbs + 1, poly[i], output_limbs);\n\n      shift_bits += top_bits;\n      limb_ptr += (coeff_limbs - 1);\n\n      if (shift_bits >= GMP_LIMB_BITS)\n      {\n         limb_ptr++;\n         shift_bits -= GMP_LIMB_BITS;\n      }      \n   } \n\n   while (limb_ptr < end && i < length)\n   {\n      if (shift_bits)\n      {\n         mpn_lshift(temp, poly[i], output_limbs + 1, shift_bits);\n         mpn_add_n(limb_ptr, limb_ptr, temp, end - limb_ptr);\n      } else\n         mpn_add_n(limb_ptr, limb_ptr, poly[i], end - limb_ptr);\n\n      shift_bits += top_bits;\n      limb_ptr += (coeff_limbs - 1);\n\n      if (shift_bits >= GMP_LIMB_BITS)\n      {\n         limb_ptr++;\n         shift_bits -= GMP_LIMB_BITS;\n      }  \n\n      i++;    \n   }\n   \n   TMP_FREE;\n}\n"
  },
  {
    "path": "fft/div_2expmod_2expp1.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* WARNING: relies on GCC's handling of >> as arithmetic shift right */\n\nvoid mpn_div_2expmod_2expp1(mp_ptr t, mp_srcptr i1, mp_size_t limbs, mp_bitcnt_t d)\n{\n   mp_limb_t lo;\n   mp_limb_t * ptr;\n   mp_limb_signed_t hi;\n   \n   if (d == 0)\n   {   \n      if (t != i1)\n         mpn_copyi(t, i1, limbs + 1);\n   } else\n   {\n      hi = i1[limbs];\n      lo = mpn_rshift(t, i1, limbs + 1, d);\n      t[limbs] = (hi >> d);\n      ptr = t + limbs - 1;\n      sub_ddmmss(ptr[1], ptr[0], ptr[1], ptr[0], (mp_limb_t) 0, lo);\n   }\n}\n"
  },
  {
    "path": "fft/fermat_to_mpz.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid mpir_fermat_to_mpz(mpz_t m, mp_ptr i, mp_size_t limbs)\n{\n   mp_limb_signed_t hi;\n   \n   mpz_realloc(m, limbs + 1);\n   mpn_copyi(m->_mp_d, i, limbs + 1);\n   hi = i[limbs];\n   if (hi < 0L)\n   {\n      mpn_neg_n(m->_mp_d, m->_mp_d, limbs + 1);\n      m->_mp_size = limbs + 1;\n      while ((m->_mp_size) && (!m->_mp_d[m->_mp_size - 1])) \n         m->_mp_size--;\n      m->_mp_size = -m->_mp_size;\n   } else\n   {\n      m->_mp_size = limbs + 1;\n      while ((m->_mp_size) && (!m->_mp_d[m->_mp_size - 1])) \n         m->_mp_size--;\n   }\n}\n"
  },
  {
    "path": "fft/fft_mfa_trunc_sqrt2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n      \nvoid mpir_fft_butterfly_twiddle(mp_ptr u, mp_ptr v, \n    mp_ptr s, mp_ptr t, mp_size_t limbs, mp_bitcnt_t b1, mp_bitcnt_t b2)\n{\n   mp_limb_t nw = limbs*GMP_LIMB_BITS;\n   mp_size_t x, y;\n   int negate1 = 0;\n   int negate2 = 0;\n   \n   if (b1 >= nw) \n   {\n      negate2 = 1;\n      b1 -= nw;\n   }\n   x  = b1/GMP_LIMB_BITS;\n   b1 = b1%GMP_LIMB_BITS;\n\n   if (b2 >= nw) \n   {\n      negate1 = 1;\n      b2 -= nw;\n   }\n   y  = b2/GMP_LIMB_BITS;\n   b2 = b2%GMP_LIMB_BITS;\n \n   mpir_butterfly_lshB(u, v, s, t, limbs, x, y);\n   mpn_mul_2expmod_2expp1(u, u, limbs, b1);\n   if (negate2) mpn_neg_n(u, u, limbs + 1);\n   mpn_mul_2expmod_2expp1(v, v, limbs, b2);\n   if (negate1) mpn_neg_n(v, v, limbs + 1);\n}\n\nvoid mpir_fft_radix2_twiddle(mp_ptr * ii, mp_size_t is,\n      mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,\n      mp_size_t ws, mp_size_t r, mp_size_t c, mp_size_t rs)\n{\n   mp_size_t i;\n   mp_size_t limbs;\n\n#if 0\nstart:\n#endif\n\n   limbs = (w*n)/GMP_LIMB_BITS;\n   \n   if (n == 1) \n   {\n      mp_size_t tw1 = r*c;\n      mp_size_t tw2 = tw1 + rs*c;\n\n      mpir_fft_butterfly_twiddle(*t1, *t2, ii[0], ii[is], limbs, tw1*ws, tw2*ws);\n      MP_PTR_SWAP(ii[0],  *t1);\n      MP_PTR_SWAP(ii[is], *t2);\n\n      return;\n   }\n\n   for (i = 0; i < n; i++) \n   {   \n      mpir_fft_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, limbs, w);\n   \n      MP_PTR_SWAP(ii[i*is],     *t1);\n      MP_PTR_SWAP(ii[(n+i)*is], *t2);\n   }\n\n   mpir_fft_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, ws, r, c, 2*rs);\n#if 0\n   ii += n * is;\n   n /= 2;\n   w += w;\n   r += rs;\n   rs += rs;\n   goto start;\n#else\n   mpir_fft_radix2_twiddle(ii+n*is, is, n/2, 2*w, t1, t2, ws, r + rs, c, 2*rs);\n#endif\n}\n\nvoid mpir_fft_trunc1_twiddle(mp_ptr * ii, mp_size_t is,\n      mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,\n      mp_size_t ws, mp_size_t r, mp_size_t c, mp_size_t rs, mp_size_t trunc)\n{\n   mp_size_t i;\n   mp_size_t limbs = (w*n)/GMP_LIMB_BITS;\n   \n   if (trunc == 2*n)\n      mpir_fft_radix2_twiddle(ii, is, n, w, t1, t2, ws, r, c, rs);\n   else if (trunc <= n)\n   {\n      for (i = 0; i < n; i++)\n         mpn_add_n(ii[i*is], ii[i*is], ii[(i+n)*is], limbs + 1);\n      \n      mpir_fft_trunc1_twiddle(ii, is, n/2, 2*w, t1, t2, ws, r, c, 2*rs, trunc);\n   } else\n   {\n      for (i = 0; i < n; i++) \n      {   \n         mpir_fft_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, limbs, w);\n   \n         MP_PTR_SWAP(ii[i*is],     *t1);\n         MP_PTR_SWAP(ii[(n+i)*is], *t2);\n      }\n\n      mpir_fft_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, ws, r, c, 2*rs);  \n      mpir_fft_trunc1_twiddle(ii + n*is, is, n/2, 2*w, \n                                     t1, t2, ws, r + rs, c, 2*rs, trunc - n);\n   }\n}\n\nvoid mpir_fft_mfa_trunc_sqrt2(mp_ptr * ii, mp_size_t n, \n                   mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2, \n                             mp_ptr * temp, mp_size_t n1, mp_size_t trunc)\n{\n   mp_size_t i, j, s;\n   mp_size_t n2 = (2*n)/n1;\n   mp_size_t trunc2 = (trunc - 2*n)/n1;\n   mp_size_t limbs = (n*w)/GMP_LIMB_BITS;\n   mp_bitcnt_t depth = 0;\n   mp_bitcnt_t depth2 = 0;\n   \n   while ((((mp_size_t)1)<<depth) < n2) depth++;\n   while ((((mp_size_t)1)<<depth2) < n1) depth2++;\n\n   /* first half matrix fourier FFT : n2 rows, n1 cols */\n   \n   /* FFTs on columns */\n   for (i = 0; i < n1; i++)\n   {   \n      /* relevant part of first layer of full sqrt2 FFT */\n      if (w & 1)\n      {\n         for (j = i; j < trunc - 2*n; j+=n1) \n         {   \n            if (j & 1)\n               mpir_fft_butterfly_sqrt2(*t1, *t2, ii[j], ii[2*n+j], j, limbs, w, *temp);\n            else\n               mpir_fft_butterfly(*t1, *t2, ii[j], ii[2*n+j], j/2, limbs, w);     \n\n            MP_PTR_SWAP(ii[j],     *t1);\n            MP_PTR_SWAP(ii[2*n+j], *t2);\n         }\n\n         for ( ; j < 2*n; j+=n1)\n         {\n             if (i & 1)\n                mpir_fft_adjust_sqrt2(ii[j + 2*n], ii[j], j, limbs, w, *temp); \n             else\n                mpir_fft_adjust(ii[j + 2*n], ii[j], j/2, limbs, w); \n         }\n      } else\n      {\n         for (j = i; j < trunc - 2*n; j+=n1) \n         {   \n            mpir_fft_butterfly(*t1, *t2, ii[j], ii[2*n+j], j, limbs, w/2);\n   \n            MP_PTR_SWAP(ii[j],     *t1);\n            MP_PTR_SWAP(ii[2*n+j], *t2);\n         }\n\n         for ( ; j < 2*n; j+=n1)\n            mpir_fft_adjust(ii[j + 2*n], ii[j], j, limbs, w/2);\n      }\n   \n      /* \n         FFT of length n2 on column i, applying z^{r*i} for rows going up in steps \n         of 1 starting at row 0, where z => w bits\n      */\n      \n      mpir_fft_radix2_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, w, 0, i, 1);\n      for (j = 0; j < n2; j++)\n      {\n         mp_size_t s = mpir_revbin(j, depth);\n         if (j < s) MP_PTR_SWAP(ii[i+j*n1], ii[i+s*n1]);\n      }\n   }\n   \n   /* FFTs on rows */\n   for (i = 0; i < n2; i++)\n   {\n      mpir_fft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);\n      for (j = 0; j < n1; j++)\n      {\n         mp_size_t t = mpir_revbin(j, depth2);\n         if (j < t) MP_PTR_SWAP(ii[i*n1+j], ii[i*n1+t]);\n      }\n   }\n   \n   /* second half matrix fourier FFT : n2 rows, n1 cols */\n   ii += 2*n;\n\n   /* FFTs on columns */\n   for (i = 0; i < n1; i++)\n   {   \n      /*\n         FFT of length n2 on column i, applying z^{r*i} for rows going up in steps \n         of 1 starting at row 0, where z => w bits\n      */\n      \n      mpir_fft_trunc1_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, w, 0, i, 1, trunc2);\n      for (j = 0; j < n2; j++)\n      {\n         mp_size_t s = mpir_revbin(j, depth);\n         if (j < s) MP_PTR_SWAP(ii[i+j*n1], ii[i+s*n1]);\n      }\n   }\n\n   /* FFTs on relevant rows */\n   for (s = 0; s < trunc2; s++)\n   {\n      i = mpir_revbin(s, depth);\n      mpir_fft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);\n      \n      for (j = 0; j < n1; j++)\n      {\n         mp_size_t t = mpir_revbin(j, depth2);\n         if (j < t) MP_PTR_SWAP(ii[i*n1+j], ii[i*n1+t]);\n      }\n   }\n}\n\nvoid mpir_fft_mfa_trunc_sqrt2_outer(mp_ptr * ii, mp_size_t n, \n                   mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2, \n                             mp_ptr * temp, mp_size_t n1, mp_size_t trunc)\n{\n   mp_size_t i, j;\n   mp_size_t n2 = (2*n)/n1;\n   mp_size_t trunc2 = (trunc - 2*n)/n1;\n   mp_size_t limbs = (n*w)/GMP_LIMB_BITS;\n   mp_bitcnt_t depth = 0;\n   mp_bitcnt_t depth2 = 0;\n   \n   while ((((mp_size_t)1)<<depth) < n2) depth++;\n   while ((((mp_size_t)1)<<depth2) < n1) depth2++;\n\n   /* first half matrix fourier FFT : n2 rows, n1 cols */\n   \n   /* FFTs on columns */\n   for (i = 0; i < n1; i++)\n   {   \n      /* relevant part of first layer of full sqrt2 FFT */\n      if (w & 1)\n      {\n         for (j = i; j < trunc - 2*n; j+=n1) \n         {   \n            if (j & 1)\n               mpir_fft_butterfly_sqrt2(*t1, *t2, ii[j], ii[2*n+j], j, limbs, w, *temp);\n            else\n               mpir_fft_butterfly(*t1, *t2, ii[j], ii[2*n+j], j/2, limbs, w);     \n\n            MP_PTR_SWAP(ii[j],     *t1);\n            MP_PTR_SWAP(ii[2*n+j], *t2);\n         }\n\n         for ( ; j < 2*n; j+=n1)\n         {\n             if (i & 1)\n                mpir_fft_adjust_sqrt2(ii[j + 2*n], ii[j], j, limbs, w, *temp); \n             else\n                mpir_fft_adjust(ii[j + 2*n], ii[j], j/2, limbs, w); \n         }\n      } else\n      {\n         for (j = i; j < trunc - 2*n; j+=n1) \n         {   \n            mpir_fft_butterfly(*t1, *t2, ii[j], ii[2*n+j], j, limbs, w/2);\n   \n            MP_PTR_SWAP(ii[j],     *t1);\n            MP_PTR_SWAP(ii[2*n+j], *t2);\n         }\n\n         for ( ; j < 2*n; j+=n1)\n            mpir_fft_adjust(ii[j + 2*n], ii[j], j, limbs, w/2);\n      }\n   \n      /* \n         FFT of length n2 on column i, applying z^{r*i} for rows going up in steps \n         of 1 starting at row 0, where z => w bits\n      */\n      \n      mpir_fft_radix2_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, w, 0, i, 1);\n      for (j = 0; j < n2; j++)\n      {\n         mp_size_t s = mpir_revbin(j, depth);\n         if (j < s) MP_PTR_SWAP(ii[i+j*n1], ii[i+s*n1]);\n      }\n   }\n      \n   /* second half matrix fourier FFT : n2 rows, n1 cols */\n   ii += 2*n;\n\n   /* FFTs on columns */\n   for (i = 0; i < n1; i++)\n   {   \n      /*\n         FFT of length n2 on column i, applying z^{r*i} for rows going up in steps \n         of 1 starting at row 0, where z => w bits\n      */\n      \n      mpir_fft_trunc1_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, w, 0, i, 1, trunc2);\n      for (j = 0; j < n2; j++)\n      {\n         mp_size_t s = mpir_revbin(j, depth);\n         if (j < s) MP_PTR_SWAP(ii[i+j*n1], ii[i+s*n1]);\n      }\n   }\n}\n"
  },
  {
    "path": "fft/fft_mfa_trunc_sqrt2_inner.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid mpir_fft_mfa_trunc_sqrt2_inner(mp_ptr * ii, mp_ptr * jj, mp_size_t n, \n                   mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2, \n                  mp_ptr * temp, mp_size_t n1, mp_size_t trunc, mp_ptr tt)\n{\n   mp_size_t i, j, s;\n   mp_size_t n2 = (2*n)/n1;\n   mp_size_t trunc2 = (trunc - 2*n)/n1;\n   mp_size_t limbs = (n*w)/GMP_LIMB_BITS;\n   mp_bitcnt_t depth = 0;\n   mp_bitcnt_t depth2 = 0;\n   \n   while ((((mp_size_t)1)<<depth) < n2) depth++;\n   while ((((mp_size_t)1)<<depth2) < n1) depth2++;\n\n   ii += 2*n;\n   jj += 2*n;\n\n   /* convolutions on relevant rows */\n   for (s = 0; s < trunc2; s++)\n   {\n      i = mpir_revbin(s, depth);\n      mpir_fft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);\n      if (ii != jj) mpir_fft_radix2(jj + i*n1, n1/2, w*n2, t1, t2);\n      \n      for (j = 0; j < n1; j++)\n      {\n         mp_size_t t = i*n1 + j;\n         mpn_normmod_2expp1(ii[t], limbs);\n         if (ii != jj) mpn_normmod_2expp1(jj[t], limbs);\n         mpn_mulmod_Bexpp1(ii[t], ii[t], jj[t], n * w / GMP_LIMB_BITS, tt);\n      }      \n      \n      mpir_ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);\n   }\n\n   ii -= 2*n;\n   jj -= 2*n;\n\n   /* convolutions on rows */\n   for (i = 0; i < n2; i++)\n   {\n      mpir_fft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);\n      if (ii != jj) mpir_fft_radix2(jj + i*n1, n1/2, w*n2, t1, t2);\n\n      for (j = 0; j < n1; j++)\n      {\n         mp_size_t t = i*n1 + j;\n         mpn_normmod_2expp1(ii[t], limbs);\n         if (ii != jj) mpn_normmod_2expp1(jj[t], limbs);\n         mpn_mulmod_Bexpp1(ii[t], ii[t], jj[t], n * w / GMP_LIMB_BITS, tt);\n      }      \n      \n      mpir_ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);\n   }\n}\n\n"
  },
  {
    "path": "fft/fft_negacyclic.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n      \nvoid mpir_fft_negacyclic(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w, \n                    mp_ptr * t1, mp_ptr * t2, mp_ptr * temp)\n{\n   mp_size_t i;\n   mp_size_t limbs = (w*n)/GMP_LIMB_BITS;\n   \n   /* first apply twiddle factors corresponding to shifts of w*i/2 bits */\n   if (w & 1)\n   {\n      for (i = 0; i < n; i++) \n      {   \n          mpir_fft_adjust(*t1, ii[i], i/2, limbs, w);\n          MP_PTR_SWAP(ii[i], *t1);\n            \n          mpir_fft_adjust(*t2, ii[n+i], (n+i)/2, limbs, w);\n          MP_PTR_SWAP(ii[n+i], *t2);\n\n          mpir_fft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);\n          MP_PTR_SWAP(ii[i],   *t1);\n          MP_PTR_SWAP(ii[n+i], *t2);\n\n          i++;\n          \n          mpir_fft_adjust_sqrt2(*t1, ii[i], i, limbs, w, *temp);\n          MP_PTR_SWAP(ii[i], *t1);\n          \n          mpir_fft_adjust_sqrt2(*t2, ii[n+i], n+i, limbs, w, *temp);\n          MP_PTR_SWAP(ii[n+i], *t2);\n          \n          mpir_fft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);\n          MP_PTR_SWAP(ii[i],   *t1);\n          MP_PTR_SWAP(ii[n+i], *t2);\n       }\n   } else\n   {\n       for (i = 0; i < n; i++) \n       {   \n          mpir_fft_adjust(*t1, ii[i], i, limbs, w/2);\n          MP_PTR_SWAP(ii[i], *t1);\n            \n          mpir_fft_adjust(*t2, ii[n+i], n+i, limbs, w/2);\n          MP_PTR_SWAP(ii[n+i], *t2);\n      \n          mpir_fft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);\n          MP_PTR_SWAP(ii[i],   *t1);\n          MP_PTR_SWAP(ii[n+i], *t2);\n       }\n   }\n\n   mpir_fft_radix2(ii, n/2, 2*w, t1, t2);\n   mpir_fft_radix2(ii+n, n/2, 2*w, t1, t2);\n}\n"
  },
  {
    "path": "fft/fft_radix2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n      \nvoid mpir_fft_butterfly(mp_ptr s, mp_ptr t, mp_ptr i1, \n                   mp_ptr i2, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w)\n{\n   mp_size_t y;\n   mp_bitcnt_t b1;\n\n   b1 = i*w;\n   y  = b1/GMP_LIMB_BITS;\n   b1 = b1%GMP_LIMB_BITS;\n \n   mpir_butterfly_lshB(s, t, i1, i2, limbs, 0, y);\n   mpn_mul_2expmod_2expp1(t, t, limbs, b1);\n}\n\nvoid mpir_fft_radix2(mp_ptr * ii, \n      mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2)\n{\n   mp_size_t i;\n   mp_size_t limbs = (w*n)/GMP_LIMB_BITS;\n   ASSERT(n!=0);\n   if (n == 1) \n   {\n      mpir_fft_butterfly(*t1, *t2, ii[0], ii[1], 0, limbs, w);\n\n      MP_PTR_SWAP(ii[0], *t1);\n      MP_PTR_SWAP(ii[1], *t2);\n      \n      return;\n   }\n\n   for (i = 0; i < n; i++) \n   {   \n      mpir_fft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);\n   \n      MP_PTR_SWAP(ii[i],   *t1);\n      MP_PTR_SWAP(ii[n+i], *t2);\n   }\n\n   mpir_fft_radix2(ii,     n/2, 2*w, t1, t2);\n   mpir_fft_radix2(ii + n, n/2, 2*w, t1, t2);\n}\n"
  },
  {
    "path": "fft/fft_trunc.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n      \nvoid mpir_fft_trunc1(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w, \n                   mp_ptr * t1, mp_ptr * t2, mp_size_t trunc)\n{\n    mp_size_t i;\n    mp_size_t limbs = (w*n)/GMP_LIMB_BITS;\n   \n    if (trunc == 2*n)\n        mpir_fft_radix2(ii, n, w, t1, t2);\n    else if (trunc <= n)\n    {\n        for (i = 0; i < n; i++)\n            mpn_add_n(ii[i], ii[i], ii[i+n], limbs + 1);\n      \n        mpir_fft_trunc1(ii, n/2, 2*w, t1, t2, trunc);\n    } else\n    {\n        for (i = 0; i < n; i++) \n        {   \n            mpir_fft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);\n   \n            MP_PTR_SWAP(ii[i],   *t1);\n            MP_PTR_SWAP(ii[n+i], *t2);\n        }\n\n        mpir_fft_radix2(ii, n/2, 2*w, t1, t2);\n        mpir_fft_trunc1(ii+n, n/2, 2*w, t1, t2, trunc - n);\n   }\n}\n\nvoid mpir_fft_trunc(mp_ptr * ii,  mp_size_t n, mp_bitcnt_t w, \n                  mp_ptr * t1, mp_ptr * t2, mp_size_t trunc)\n{\n    mp_size_t i;\n    mp_size_t limbs = (w*n)/GMP_LIMB_BITS;\n   \n    if (trunc == 2*n)\n       mpir_fft_radix2(ii, n, w, t1, t2);\n    else if (trunc <= n)\n       mpir_fft_trunc(ii, n/2, 2*w, t1, t2, trunc);\n    else\n    {\n        for (i = 0; i < trunc - n; i++) \n        {   \n            mpir_fft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);\n   \n            MP_PTR_SWAP(ii[i],   *t1);\n            MP_PTR_SWAP(ii[n+i], *t2);\n        }\n\n        for ( ; i < n; i++)\n            mpir_fft_adjust(ii[i+n], ii[i], i, limbs, w); \n   \n        mpir_fft_radix2(ii, n/2, 2*w, t1, t2);\n        mpir_fft_trunc1(ii+n, n/2, 2*w, t1, t2, trunc - n);\n   }\n}\n"
  },
  {
    "path": "fft/fft_trunc_sqrt2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n      \nvoid mpir_fft_butterfly_sqrt2(mp_ptr s, mp_ptr t, \n                    mp_ptr i1, mp_ptr i2, mp_size_t i, \n                         mp_size_t limbs, mp_bitcnt_t w, mp_ptr temp)\n{\n   mp_bitcnt_t wn = limbs*GMP_LIMB_BITS;\n   mp_limb_t cy = 0;\n   mp_size_t j = i/2, k = w/2;\n   mp_size_t y;\n   mp_bitcnt_t b1;\n   int negate = 0;\n\n   b1 = j + wn/4 + i*k;\n   if (b1 >= wn) \n   {\n      negate = 1;\n      b1 -= wn;\n   }\n   y  = b1/GMP_LIMB_BITS;\n   b1 = b1%GMP_LIMB_BITS;\n \n   /* sumdiff and multiply by 2^{j + wn/4 + i*k} */\n   mpir_butterfly_lshB(s, t, i1, i2, limbs, 0, y);\n   mpn_mul_2expmod_2expp1(t, t, limbs, b1);\n   \n   /* multiply by 2^{wn/2} */\n   y = limbs/2;\n   \n   mpn_copyi(temp + y, t, limbs - y);\n   temp[limbs] = 0;\n   if (y) cy = mpn_neg_n(temp, t + limbs - y, y);\n   mpn_addmod_2expp1_1(temp + y, limbs - y, -t[limbs]);\n   mpn_sub_1(temp + y, temp + y, limbs - y + 1, cy); \n   \n   /* shift by an additional half limb (rare) */\n   if (limbs & 1) \n       mpn_mul_2expmod_2expp1(temp, temp, limbs, GMP_LIMB_BITS/2);\n\n   /* subtract */\n   if (negate)\n       mpn_sub_n(t, t, temp, limbs + 1);\n   else\n       mpn_sub_n(t, temp, t, limbs + 1);\n}\n\nvoid mpir_fft_trunc_sqrt2(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w, \n       mp_ptr * t1, mp_ptr * t2, mp_ptr * temp, mp_size_t trunc)\n{\n    mp_size_t i;\n    mp_size_t limbs = (w*n)/GMP_LIMB_BITS;\n   \n    if ((w & 1) == 0)\n    {\n        mpir_fft_trunc(ii, 2*n, w/2, t1, t2, trunc);\n        return;\n    }\n   \n    for (i = 0; i < trunc - 2*n; i++) \n    {   \n        mpir_fft_butterfly(*t1, *t2, ii[i], ii[2*n+i], i/2, limbs, w);\n   \n        MP_PTR_SWAP(ii[i],     *t1);\n        MP_PTR_SWAP(ii[i+2*n], *t2);\n \n        i++;\n      \n        mpir_fft_butterfly_sqrt2(*t1, *t2, ii[i], ii[2*n+i], i, limbs, w, *temp);\n\n        MP_PTR_SWAP(ii[i],     *t1);\n        MP_PTR_SWAP(ii[2*n+i], *t2);\n    }\n\n    for (i = trunc - 2*n; i < 2*n; i++)\n    {\n        mpir_fft_adjust(ii[i+2*n], ii[i], i/2, limbs, w); \n         \n        i++;\n\n        mpir_fft_adjust_sqrt2(ii[i+2*n], ii[i], i, limbs, w, *temp); \n    }\n   \n    mpir_fft_radix2(ii, n, w, t1, t2);\n    mpir_fft_trunc1(ii + 2*n, n, w, t1, t2, trunc - 2*n);\n}\n"
  },
  {
    "path": "fft/ifft_mfa_trunc_sqrt2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid mpir_ifft_butterfly_twiddle(mp_ptr u, mp_ptr v, \n   mp_ptr s, mp_ptr t, mp_size_t limbs, mp_bitcnt_t b1, mp_bitcnt_t b2)\n{\n   mp_limb_t nw = limbs*GMP_LIMB_BITS;\n   mp_size_t x, y;\n   int negate1 = 0;\n   int negate2 = 0;\n   \n   if (b1 >= nw)\n   {\n      negate1 = 1;\n      b1 -= nw;\n   }\n   x  = b1/GMP_LIMB_BITS;\n   b1 = b1%GMP_LIMB_BITS;\n\n   if (b2 >= nw)\n   {\n      negate2 = 1;\n      b2 -= nw;\n   }\n   y  = b2/GMP_LIMB_BITS;\n   b2 = b2%GMP_LIMB_BITS;\n\n   if (negate1) mpn_neg_n(s, s, limbs + 1);\n   mpn_div_2expmod_2expp1(s, s, limbs, b1);\n   if (negate2) mpn_neg_n(t, t, limbs + 1);\n   mpn_div_2expmod_2expp1(t, t, limbs, b2);\n   mpir_butterfly_rshB(u, v, s, t, limbs, x, y);\n}\n\nvoid mpir_ifft_radix2_twiddle(mp_ptr * ii, mp_size_t is,\n        mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,\n                            mp_size_t ws, mp_size_t r, mp_size_t c, mp_size_t rs)\n{\n   mp_size_t i;\n   mp_size_t limbs = (w*n)/GMP_LIMB_BITS;\n   \n   if (n == 1) \n   {\n      mp_size_t tw1, tw2;\n      tw1 = r*c;\n      tw2 = tw1 + rs*c;\n\n      mpir_ifft_butterfly_twiddle(*t1, *t2, ii[0], ii[is], limbs, tw1*ws, tw2*ws);\n      MP_PTR_SWAP(ii[0],  *t1);\n      MP_PTR_SWAP(ii[is], *t2);\n      return;\n   }\n\n   mpir_ifft_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, ws, r, c, 2*rs);\n   mpir_ifft_radix2_twiddle(ii+n*is, is, n/2, 2*w, t1, t2, ws, r + rs, c, 2*rs);\n\n   for (i = 0; i < n; i++) \n   {   \n      mpir_ifft_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, limbs, w);\n   \n      MP_PTR_SWAP(ii[i*is], *t1);\n      MP_PTR_SWAP(ii[(n+i)*is], *t2);\n   }\n}\n\nvoid mpir_ifft_trunc1_twiddle(mp_ptr * ii, mp_size_t is,\n        mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,\n           mp_size_t ws, mp_size_t r, mp_size_t c, mp_size_t rs, mp_size_t trunc)\n{\n   mp_size_t i;\n   mp_size_t limbs = (w*n)/GMP_LIMB_BITS;\n   \n   if (trunc == 2*n)\n      mpir_ifft_radix2_twiddle(ii, is, n, w, t1, t2, ws, r, c, rs);\n   else if (trunc <= n)\n   {\n      for (i = trunc; i < n; i++)\n      {\n         mpn_add_n(ii[i*is], ii[i*is], ii[(i+n)*is], limbs + 1);\n         mpn_div_2expmod_2expp1(ii[i*is], ii[i*is], limbs, 1);\n      }\n      \n      mpir_ifft_trunc1_twiddle(ii, is, n/2, 2*w, t1, t2, ws, r, c, 2*rs, trunc);\n\n      for (i = 0; i < trunc; i++)\n      {\n#if  HAVE_NATIVE_mpn_addsub_n\n         mpn_addsub_n(ii[i*is], ii[i*is], ii[i*is], ii[(n+i)*is], limbs + 1);\n#else\n         mpn_add_n(ii[i*is], ii[i*is], ii[i*is], limbs + 1);\n         mpn_sub_n(ii[i*is], ii[i*is], ii[(n+i)*is], limbs + 1);\n#endif\n      }\n   } else\n   {\n      mpir_ifft_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, ws, r, c, 2*rs);\n      \n      for (i = trunc - n; i < n; i++)\n      {\n          mpn_sub_n(ii[(i+n)*is], ii[i*is], ii[(i+n)*is], limbs + 1);\n          mpir_fft_adjust(*t1, ii[(i+n)*is], i, limbs, w);\n          mpn_add_n(ii[i*is], ii[i*is], ii[(i+n)*is], limbs + 1);\n          MP_PTR_SWAP(ii[(i+n)*is], *t1);\n      }\n\n      mpir_ifft_trunc1_twiddle(ii + n*is, is, n/2, 2*w, t1, t2, ws, r + rs, c, 2*rs, trunc - n);\n\n      for (i = 0; i < trunc - n; i++) \n      {   \n         mpir_ifft_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, limbs, w);\n   \n         MP_PTR_SWAP(ii[i*is],     *t1);\n         MP_PTR_SWAP(ii[(n+i)*is], *t2);\n      }\n   }\n}\n\nvoid mpir_ifft_mfa_trunc_sqrt2(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w, \n   mp_ptr * t1, mp_ptr * t2, mp_ptr * temp, mp_size_t n1, mp_size_t trunc)\n{\n   mp_size_t i, j, s;\n   mp_size_t n2 = (2*n)/n1;\n   mp_size_t trunc2 = (trunc - 2*n)/n1;\n   mp_bitcnt_t depth = 0;\n   mp_bitcnt_t depth2 = 0;\n   mp_bitcnt_t limbs = (w*n)/GMP_LIMB_BITS;\n   \n   while ((((mp_size_t)1)<<depth) < n2) depth++;\n   while ((((mp_size_t)1)<<depth2) < n1) depth2++;\n\n   /* first half mfa IFFT : n2 rows, n1 cols */\n\n   /* row IFFTs */\n   for (i = 0; i < n2; i++)\n   {\n      for (j = 0; j < n1; j++)\n      {\n         mp_size_t s = mpir_revbin(j, depth2);\n         if (j < s) MP_PTR_SWAP(ii[i*n1+j], ii[i*n1+s]);\n      }      \n      \n      mpir_ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);\n   }\n   \n   /* column IFFTs */\n   for (i = 0; i < n1; i++)\n   {   \n      for (j = 0; j < n2; j++)\n      {\n         mp_size_t s = mpir_revbin(j, depth);\n         if (j < s) MP_PTR_SWAP(ii[i+j*n1], ii[i+s*n1]);\n      }\n      \n      /*\n         IFFT of length n2 on column i, applying z^{r*i} for rows going up in steps \n         of 1 starting at row 0, where z => w bits\n      */\n      mpir_ifft_radix2_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, w, 0, i, 1);\n   }\n   \n   /* second half IFFT : n2 rows, n1 cols */\n   ii += 2*n;\n\n   /* row IFFTs */\n   for (s = 0; s < trunc2; s++)\n   {\n      i = mpir_revbin(s, depth);\n      for (j = 0; j < n1; j++)\n      {\n         mp_size_t t = mpir_revbin(j, depth2);\n         if (j < t) MP_PTR_SWAP(ii[i*n1+j], ii[i*n1+t]);\n      }      \n      \n      mpir_ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);\n   }\n\n   /* column IFFTs with relevant sqrt2 layer butterflies combined */\n   for (i = 0; i < n1; i++)\n   {   \n      for (j = 0; j < trunc2; j++)\n      {\n         mp_size_t s = mpir_revbin(j, depth);\n         if (j < s) MP_PTR_SWAP(ii[i+j*n1], ii[i+s*n1]);\n      }\n\n      for ( ; j < n2; j++)\n      {\n         mp_size_t u = i + j*n1;\n         if (w & 1)\n         {\n            if (i & 1)\n               mpir_fft_adjust_sqrt2(ii[i + j*n1], ii[u - 2*n], u, limbs, w, *temp); \n            else\n               mpir_fft_adjust(ii[i + j*n1], ii[u - 2*n], u/2, limbs, w); \n         } else\n            mpir_fft_adjust(ii[i + j*n1], ii[u - 2*n], u, limbs, w/2);\n      }\n\n      /* \n         IFFT of length n2 on column i, applying z^{r*i} for rows going up in steps \n         of 1 starting at row 0, where z => w bits\n      */\n      mpir_ifft_trunc1_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, w, 0, i, 1, trunc2);\n      \n      /* relevant components of final sqrt2 layer of IFFT */\n      if (w & 1)\n      {\n         for (j = i; j < trunc - 2*n; j+=n1) \n         {   \n            if (j & 1)\n               mpir_ifft_butterfly_sqrt2(*t1, *t2, ii[j - 2*n], ii[j], j, limbs, w, *temp); \n            else\n               mpir_ifft_butterfly(*t1, *t2, ii[j - 2*n], ii[j], j/2, limbs, w);\n\n            MP_PTR_SWAP(ii[j-2*n], *t1);\n            MP_PTR_SWAP(ii[j],     *t2);\n         }\n      } else\n      {\n         for (j = i; j < trunc - 2*n; j+=n1) \n         {   \n            mpir_ifft_butterfly(*t1, *t2, ii[j - 2*n], ii[j], j, limbs, w/2);\n   \n            MP_PTR_SWAP(ii[j-2*n], *t1);\n            MP_PTR_SWAP(ii[j],     *t2);\n         }\n      }\n\n      for (j = trunc + i - 2*n; j < 2*n; j+=n1)\n           mpn_add_n(ii[j - 2*n], ii[j - 2*n], ii[j - 2*n], limbs + 1);\n   }\n}\n\nvoid mpir_ifft_mfa_trunc_sqrt2_outer(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w, \n   mp_ptr * t1, mp_ptr * t2, mp_ptr * temp, mp_size_t n1, mp_size_t trunc)\n{\n   mp_size_t i, j;\n   mp_size_t n2 = (2*n)/n1;\n   mp_size_t trunc2 = (trunc - 2*n)/n1;\n   mp_bitcnt_t depth = 0;\n   mp_bitcnt_t depth2 = 0;\n   mp_bitcnt_t limbs = (w*n)/GMP_LIMB_BITS;\n   \n   while ((((mp_size_t)1)<<depth) < n2) depth++;\n   while ((((mp_size_t)1)<<depth2) < n1) depth2++;\n\n   /* first half mfa IFFT : n2 rows, n1 cols */\n   \n   /* column IFFTs */\n   for (i = 0; i < n1; i++)\n   {   \n      for (j = 0; j < n2; j++)\n      {\n         mp_size_t s = mpir_revbin(j, depth);\n         if (j < s) MP_PTR_SWAP(ii[i+j*n1], ii[i+s*n1]);\n      }\n      \n      /*\n         IFFT of length n2 on column i, applying z^{r*i} for rows going up in steps \n         of 1 starting at row 0, where z => w bits\n      */\n      mpir_ifft_radix2_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, w, 0, i, 1);\n   }\n   \n   /* second half IFFT : n2 rows, n1 cols */\n   ii += 2*n;\n\n   /* column IFFTs with relevant sqrt2 layer butterflies combined */\n   for (i = 0; i < n1; i++)\n   {   \n      for (j = 0; j < trunc2; j++)\n      {\n         mp_size_t s = mpir_revbin(j, depth);\n         if (j < s) MP_PTR_SWAP(ii[i+j*n1], ii[i+s*n1]);\n      }\n\n      for ( ; j < n2; j++)\n      {\n         mp_size_t u = i + j*n1;\n         if (w & 1)\n         {\n            if (i & 1)\n               mpir_fft_adjust_sqrt2(ii[i + j*n1], ii[u - 2*n], u, limbs, w, *temp); \n            else\n               mpir_fft_adjust(ii[i + j*n1], ii[u - 2*n], u/2, limbs, w); \n         } else\n            mpir_fft_adjust(ii[i + j*n1], ii[u - 2*n], u, limbs, w/2);\n      }\n\n      /* \n         IFFT of length n2 on column i, applying z^{r*i} for rows going up in steps \n         of 1 starting at row 0, where z => w bits\n      */\n      mpir_ifft_trunc1_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, w, 0, i, 1, trunc2);\n      \n      /* relevant components of final sqrt2 layer of IFFT */\n      if (w & 1)\n      {\n         for (j = i; j < trunc - 2*n; j+=n1) \n         {   \n            if (j & 1)\n               mpir_ifft_butterfly_sqrt2(*t1, *t2, ii[j - 2*n], ii[j], j, limbs, w, *temp); \n            else\n               mpir_ifft_butterfly(*t1, *t2, ii[j - 2*n], ii[j], j/2, limbs, w);\n\n            MP_PTR_SWAP(ii[j-2*n], *t1);\n            MP_PTR_SWAP(ii[j],     *t2);\n         }\n      } else\n      {\n         for (j = i; j < trunc - 2*n; j+=n1) \n         {   \n            mpir_ifft_butterfly(*t1, *t2, ii[j - 2*n], ii[j], j, limbs, w/2);\n   \n            MP_PTR_SWAP(ii[j-2*n], *t1);\n            MP_PTR_SWAP(ii[j],     *t2);\n         }\n      }\n\n      for (j = trunc + i - 2*n; j < 2*n; j+=n1)\n           mpn_add_n(ii[j - 2*n], ii[j - 2*n], ii[j - 2*n], limbs + 1);\n\n      for (j = 0; j < trunc2; j++)\n      {\n         mp_size_t t = j*n1 + i;\n         mpn_div_2expmod_2expp1(ii[t], ii[t], limbs, depth + depth2 + 1);\n         mpn_normmod_2expp1(ii[t], limbs);\n      }\n\n      for (j = 0; j < n2; j++)\n      {\n         mp_size_t t = j*n1 + i - 2*n;\n         mpn_div_2expmod_2expp1(ii[t], ii[t], limbs, depth + depth2 + 1);\n         mpn_normmod_2expp1(ii[t], limbs);\n      }\n   }\n}\n"
  },
  {
    "path": "fft/ifft_negacyclic.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n      \nvoid mpir_ifft_negacyclic(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w, \n                     mp_ptr * t1, mp_ptr * t2, mp_ptr * temp)\n{\n   mp_size_t i;\n   mp_size_t limbs = (w*n)/GMP_LIMB_BITS;\n\n   mpir_ifft_radix2(ii, n/2, 2*w, t1, t2);\n   mpir_ifft_radix2(ii+n, n/2, 2*w, t1, t2);\n\n   if (w & 1)\n   {\n      for (i = 0; i < n; i++) \n      {   \n          mpir_ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);\n          MP_PTR_SWAP(ii[i],   *t1);\n          MP_PTR_SWAP(ii[n+i], *t2);\n\n          mpir_fft_adjust(*t1, ii[i], n - i/2, limbs, w);\n          mpn_neg_n(*t1, *t1, limbs + 1);\n          MP_PTR_SWAP(ii[i], *t1);\n            \n          mpir_fft_adjust(*t2, ii[n+i], n - (n+i)/2, limbs, w);\n          mpn_neg_n(*t2, *t2, limbs + 1);\n          MP_PTR_SWAP(ii[n+i], *t2);\n            \n          i++;\n\n          mpir_ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);\n          MP_PTR_SWAP(ii[i],   *t1);\n          MP_PTR_SWAP(ii[n+i], *t2);\n\n          mpir_fft_adjust_sqrt2(*t1, ii[i], 2*n-i, limbs, w, *temp);\n          mpn_neg_n(*t1, *t1, limbs + 1);\n          MP_PTR_SWAP(ii[i], *t1);\n          \n          mpir_fft_adjust_sqrt2(*t2, ii[n+i], n-i, limbs, w, *temp);\n          mpn_neg_n(*t2, *t2, limbs + 1);\n          MP_PTR_SWAP(ii[n+i], *t2);\n       }\n   } else\n   {\n       for (i = 0; i < n; i++) \n       {   \n          mpir_ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);\n          MP_PTR_SWAP(ii[i],   *t1);\n          MP_PTR_SWAP(ii[n+i], *t2);\n\n          mpir_fft_adjust(*t1, ii[i], 2*n-i, limbs, w/2);\n          mpn_neg_n(*t1, *t1, limbs + 1);\n          MP_PTR_SWAP(ii[i], *t1);\n            \n          mpir_fft_adjust(*t2, ii[n+i], n-i, limbs, w/2);\n          mpn_neg_n(*t2, *t2, limbs + 1);\n          MP_PTR_SWAP(ii[n+i], *t2);\n       }\n   }\n}\n\n"
  },
  {
    "path": "fft/ifft_radix2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n      \nvoid mpir_ifft_butterfly(mp_ptr s, mp_ptr t, mp_ptr i1, \n                    mp_ptr i2, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w)\n{\n   mp_size_t y;\n   mp_bitcnt_t b1;\n   \n   b1 = i*w;\n   y  = b1/GMP_LIMB_BITS;\n   b1 = b1%GMP_LIMB_BITS;\n\n   mpn_div_2expmod_2expp1(i2, i2, limbs, b1);\n   mpir_butterfly_rshB(s, t, i1, i2, limbs, 0, y);\n}\n\nvoid mpir_ifft_radix2(mp_ptr * ii, mp_size_t n, \n                 mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2)\n{\n   mp_size_t i;\n   mp_size_t limbs = (w*n)/GMP_LIMB_BITS;\n   \n   if (n == 1) \n   {\n      mpir_ifft_butterfly(*t1, *t2, ii[0], ii[1], 0, limbs, w);\n      \n      MP_PTR_SWAP(ii[0], *t1);\n      MP_PTR_SWAP(ii[1], *t2);\n      \n      return;\n   }\n\n   mpir_ifft_radix2(ii,   n/2, 2*w, t1, t2);\n   mpir_ifft_radix2(ii+n, n/2, 2*w, t1, t2);\n\n   for (i = 0; i < n; i++) \n   {   \n      mpir_ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);\n   \n      MP_PTR_SWAP(ii[i],   *t1);\n      MP_PTR_SWAP(ii[n+i], *t2);\n   }\n}\n"
  },
  {
    "path": "fft/ifft_trunc.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n      \nvoid mpir_ifft_trunc1(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w, \n                    mp_ptr * t1, mp_ptr * t2, mp_size_t trunc)\n{\n    mp_size_t i;\n    mp_size_t limbs = (w*n)/GMP_LIMB_BITS;\n    \n    if (trunc == 2*n)\n        mpir_ifft_radix2(ii, n, w, t1, t2);\n    else if (trunc <= n)\n    {\n        for (i = trunc; i < n; i++)\n        {\n            mpn_add_n(ii[i], ii[i], ii[i+n], limbs + 1);\n            mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, 1);\n        }\n      \n        mpir_ifft_trunc1(ii, n/2, 2*w, t1, t2, trunc);\n\n        for (i = 0; i < trunc; i++)\n        {\n#if  HAVE_NATIVE_mpn_addsub_n\n            mpn_addsub_n(ii[i], ii[i], ii[i], ii[n+i], limbs + 1);\n#else\n            mpn_add_n(ii[i], ii[i], ii[i], limbs + 1);\n            mpn_sub_n(ii[i], ii[i], ii[n+i], limbs + 1);\n#endif\n        }\n    } else\n    {\n        mpir_ifft_radix2(ii, n/2, 2*w, t1, t2);\n\n        for (i = trunc - n; i < n; i++)\n        {\n            mpn_sub_n(ii[i+n], ii[i], ii[i+n], limbs + 1);\n            mpir_fft_adjust(*t1, ii[i+n], i, limbs, w);\n            mpn_add_n(ii[i], ii[i], ii[i+n], limbs + 1);\n            MP_PTR_SWAP(ii[i+n], *t1);\n        }\n   \n        mpir_ifft_trunc1(ii+n, n/2, 2*w, t1, t2, trunc - n);\n\n        for (i = 0; i < trunc - n; i++) \n        {   \n            mpir_ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);\n   \n            MP_PTR_SWAP(ii[i],   *t1);\n            MP_PTR_SWAP(ii[n+i], *t2);\n        }\n    }\n}\n\nvoid mpir_ifft_trunc(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w, \n                   mp_ptr * t1, mp_ptr * t2, mp_size_t trunc)\n{\n    mp_size_t i;\n    mp_size_t limbs = (w*n)/GMP_LIMB_BITS;\n   \n    if (trunc == 2*n)\n        mpir_ifft_radix2(ii, n, w, t1, t2);\n    else if (trunc <= n)\n    {\n        mpir_ifft_trunc(ii, n/2, 2*w, t1, t2, trunc);\n\n        for (i = 0; i < trunc; i++)\n            mpn_add_n(ii[i], ii[i], ii[i], limbs + 1);\n    } else\n    {\n        mpir_ifft_radix2(ii, n/2, 2*w, t1, t2);\n\n        for (i = trunc - n; i < n; i++)\n            mpir_fft_adjust(ii[i+n], ii[i], i, limbs, w);\n        \n        mpir_ifft_trunc1(ii+n, n/2, 2*w, t1, t2, trunc - n);\n\n        for (i = 0; i < trunc - n; i++) \n        {   \n            mpir_ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);\n   \n            MP_PTR_SWAP(ii[i],   *t1);\n            MP_PTR_SWAP(ii[n+i], *t2);\n        }\n\n        for (i = trunc - n; i < n; i++)\n            mpn_add_n(ii[i], ii[i], ii[i], limbs + 1);\n    }\n}\n"
  },
  {
    "path": "fft/ifft_trunc_sqrt2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid mpir_ifft_butterfly_sqrt2(mp_ptr s, mp_ptr t, mp_ptr i1, \n   mp_ptr i2, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w, mp_ptr temp)\n{\n   mp_bitcnt_t wn = limbs*GMP_LIMB_BITS;\n   mp_limb_t cy = 0;\n   mp_size_t j = i/2, k = w/2;\n   mp_size_t y2, y;\n   mp_size_t b1;\n   int negate = 1;\n\n   b1 = wn - j - i*k - 1 + wn/4;\n   if (b1 >= wn) \n   {\n      negate = 0;\n      b1 -= wn;\n   }\n   y2 = b1/GMP_LIMB_BITS;\n   b1 = b1%GMP_LIMB_BITS;\n\n   /* multiply by small part of 2^{2*wn - j - ik - 1 + wn/4} */\n   if (b1) mpn_mul_2expmod_2expp1(i2, i2, limbs, b1);\n   \n   /* multiply by 2^{wn/2} */\n   y = limbs/2;\n   \n   mpn_copyi(temp + y, i2, limbs - y);\n   temp[limbs] = 0;\n   if (y) cy = mpn_neg_n(temp, i2 + limbs - y, y);\n   mpn_addmod_2expp1_1(temp + y, limbs - y, -i2[limbs]);\n   mpn_sub_1(temp + y, temp + y, limbs - y + 1, cy); \n   \n   /* shift by an additional half limb (rare) */\n   if (limbs & 1) \n      mpn_mul_2expmod_2expp1(temp, temp, limbs, GMP_LIMB_BITS/2);\n\n   /* subtract and negate... */\n   if (negate) mpn_sub_n(i2, temp, i2, limbs + 1);\n   else mpn_sub_n(i2, i2, temp, limbs + 1);\n\n   /* ...negate and shift **left** by y2 limbs (i.e. shift right by \n   (size - y2) limbs) and sumdiff */\n   mpir_butterfly_rshB(s, t, i1, i2, limbs, 0, limbs - y2);\n}\n\n\nvoid mpir_ifft_trunc_sqrt2(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w, \n            mp_ptr * t1, mp_ptr * t2, mp_ptr * temp, mp_size_t trunc)\n{\n    mp_size_t i;\n    mp_size_t limbs = (w*n)/GMP_LIMB_BITS;\n   \n   if ((w & 1) == 0)\n   {\n      mpir_ifft_trunc(ii, 2*n, w/2, t1, t2, trunc);\n      return;\n   }\n\n   mpir_ifft_radix2(ii, n, w, t1, t2);\n\n   for (i = trunc - 2*n; i < 2*n; i++)\n   {\n      mpir_fft_adjust(ii[i+2*n], ii[i], i/2, limbs, w);\n\n      i++;\n\n      mpir_fft_adjust_sqrt2(ii[i+2*n], ii[i], i, limbs, w, *temp);\n   }\n   \n   mpir_ifft_trunc1(ii + 2*n, n, w, t1, t2, trunc - 2*n);\n\n   for (i = 0; i < trunc - 2*n; i++) \n   {   \n      mpir_ifft_butterfly(*t1, *t2, ii[i], ii[2*n+i], i/2, limbs, w);\n   \n      MP_PTR_SWAP(ii[i], *t1);\n      MP_PTR_SWAP(ii[2*n+i], *t2);\n\n      i++;\n\n      mpir_ifft_butterfly_sqrt2(*t1, *t2, ii[i], ii[2*n+i], i, limbs, w, *temp);\n   \n      MP_PTR_SWAP(ii[i], *t1);\n      MP_PTR_SWAP(ii[2*n+i], *t2);\n   }\n\n  for (i = trunc - 2*n; i < 2*n; i++)\n     mpn_add_n(ii[i], ii[i], ii[i], limbs + 1);\n}\n"
  },
  {
    "path": "fft/mul_2expmod_2expp1.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* WARNING: relies on GCC's handling of >> as arithmetic shift right */\n\nvoid mpn_mul_2expmod_2expp1(mp_ptr t, mp_ptr i1, mp_size_t limbs, mp_bitcnt_t d)\n{\n   mp_limb_signed_t hi1, hi2;\n   \n   if (d == 0)\n   {   \n      if (t != i1)\n         mpn_copyi(t, i1, limbs + 1);\n   } else\n   {\n      hi1 = ((mp_limb_signed_t) i1[limbs] >> (GMP_LIMB_BITS - d)); \n      mpn_lshift(t, i1, limbs + 1, d);\n      hi2 = t[limbs];\n      t[limbs] = 0;\n      mpn_sub_1(t, t, limbs + 1, hi2);\n      mpn_addmod_2expp1_1(t + 1, limbs - 1, -hi1);\n   }\n}\n\n"
  },
  {
    "path": "fft/mul_fft_main.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nstatic int mpir_fft_tuning_table[5][2] = FFT_TAB;\n\nvoid \nmpn_mul_fft_main(mp_ptr r1, mp_srcptr i1, mp_size_t n1, mp_srcptr i2, mp_size_t n2)\n{\n   mp_size_t off, depth = 6;\n   mp_size_t w = 1;\n   mp_size_t n = ((mp_size_t) 1 << depth);\n   mp_bitcnt_t bits = (n*w - (depth+1))/2;\n\n   mp_bitcnt_t bits1 = n1*GMP_LIMB_BITS;\n   mp_bitcnt_t bits2 = n2*GMP_LIMB_BITS;\n\n   mp_size_t j1 = (bits1 - 1)/bits + 1;\n   mp_size_t j2 = (bits2 - 1)/bits + 1;\n\n   ASSERT(n1 > 0);\n   ASSERT(n2 > 0);\n   ASSERT(j1 + j2 - 1 > 2*n);\n\n   while (j1 + j2 - 1 > 4*n) /* find initial n, w */\n   {\n      if (w == 1) w = 2;\n      else \n      {\n         depth++;\n         w = 1;\n         n *= 2;\n      }\n\n      bits = (n*w - (depth+1))/2;\n      j1 = (bits1 - 1)/bits + 1;\n      j2 = (bits2 - 1)/bits + 1;\n   }\n\n   if (depth < 11)\n   {\n      mp_size_t wadj = 1;\n      \n      off = mpir_fft_tuning_table[depth - 6][w - 1]; /* adjust n and w */\n      depth -= off;\n      n = ((mp_size_t) 1 << depth);\n      w *= ((mp_size_t) 1 << (2*off));\n      \n      if (depth < 6) wadj = ((mp_size_t) 1 << (6 - depth));\n\n      if (w > wadj)\n      {\n         do { /* see if a smaller w will work */\n            w -= wadj;\n            bits = (n*w - (depth+1))/2;\n            j1 = (bits1 - 1)/bits + 1;\n            j2 = (bits2 - 1)/bits + 1;\n         } while (j1 + j2 - 1 <= 4*n && w > wadj);  \n         w += wadj;\n      }\n\n      mpn_mul_trunc_sqrt2(r1, i1, n1, i2, n2, depth, w);\n   } else \n   {   \n      if (j1 + j2 - 1 <= 3*n)\n      {\n         depth--;\n         w *= 3;\n      }\n      \n      mpn_mul_mfa_trunc_sqrt2(r1, i1, n1, i2, n2, depth, w);\n   }\n}\n"
  },
  {
    "path": "fft/mul_mfa_trunc_sqrt2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid \nmpn_mul_mfa_trunc_sqrt2(mp_ptr r1, mp_srcptr i1, mp_size_t n1, \n                        mp_srcptr i2, mp_size_t n2, mp_bitcnt_t depth, mp_bitcnt_t w)\n{\n   mp_size_t n = (((mp_size_t)1)<<depth);\n   mp_bitcnt_t bits1 = (n*w - (depth+1))/2; \n   mp_size_t sqrt = (((mp_size_t)1)<<(depth/2));\n\n   mp_size_t r_limbs = n1 + n2;\n   mp_size_t limbs = (n*w)/GMP_LIMB_BITS;\n   mp_size_t size = limbs + 1;\n\n   mp_size_t j1 = (n1*GMP_LIMB_BITS - 1)/bits1 + 1;\n   mp_size_t j2 = (n2*GMP_LIMB_BITS - 1)/bits1 + 1;\n   \n   mp_size_t i, j, trunc;\n\n   mp_ptr * ii, * jj, t1, t2, s1, ptr;\n   mp_limb_t * tt;\n   TMP_DECL;\n\n   TMP_MARK;\n   ii = TMP_BALLOC_MP_PTRS(4*(n + n*size) + 5*size);\n   for (i = 0, ptr = (mp_ptr) ii + 4*n; i < 4*n; i++, ptr += size) \n   {\n      ii[i] = ptr;\n   }\n   t1 = ptr;\n   t2 = t1 + size;\n   s1 = t2 + size;\n   tt = s1 + size;\n   \n   if (i1 != i2)\n   {\n      jj = TMP_BALLOC_MP_PTRS(4*(n + n*size));\n      for (i = 0, ptr = (mp_ptr) jj + 4*n; i < 4*n; i++, ptr += size) \n      {\n         jj[i] = ptr;\n      }\n   } \n   else \n       jj = ii;\n\n   trunc = j1 + j2 - 1;\n   if (trunc <= 2*n) trunc = 2*n + 1; /* trunc must be greater than 2n */\n   trunc = 2*sqrt*((trunc + 2*sqrt - 1)/(2*sqrt)); /* trunc must be divisible by 2*sqrt */\n\n   j1 = mpir_fft_split_bits(ii, i1, n1, bits1, limbs);\n   for (j = j1 ; j < 4*n; j++)\n      mpn_zero(ii[j], limbs + 1);\n   \n   mpir_fft_mfa_trunc_sqrt2_outer(ii, n, w, &t1, &t2, &s1, sqrt, trunc);\n   \n   if (i1 != i2)\n   {\n      j2 = mpir_fft_split_bits(jj, i2, n2, bits1, limbs);\n      for (j = j2 ; j < 4*n; j++)\n         mpn_zero(jj[j], limbs + 1); \n      mpir_fft_mfa_trunc_sqrt2_outer(jj, n, w, &t1, &t2, &s1, sqrt, trunc);\n   } \n   else \n       j2 = j1;\n\n   mpir_fft_mfa_trunc_sqrt2_inner(ii, jj, n, w, &t1, &t2, &s1, sqrt, trunc, tt);\n   mpir_ifft_mfa_trunc_sqrt2_outer(ii, n, w, &t1, &t2, &s1, sqrt, trunc);\n       \n   mpn_zero(r1, r_limbs);\n   mpir_fft_combine_bits(r1, ii, j1 + j2 - 1, bits1, limbs, r_limbs);\n     \n   TMP_FREE;\n}\n"
  },
  {
    "path": "fft/mul_trunc_sqrt2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n      \nvoid \nmpn_mul_trunc_sqrt2(mp_ptr r1, mp_srcptr i1, mp_size_t n1, \n                        mp_srcptr i2, mp_size_t n2, mp_bitcnt_t depth, mp_bitcnt_t w)\n{\n   mp_size_t n = (((mp_size_t)1)<<depth);\n   mp_bitcnt_t bits1 = (n*w - (depth+1))/2; \n   \n   mp_size_t r_limbs = n1 + n2;\n   mp_size_t limbs = (n*w)/GMP_LIMB_BITS;\n   mp_size_t size = limbs + 1;\n\n   mp_size_t j1 = (n1*GMP_LIMB_BITS - 1)/bits1 + 1;\n   mp_size_t j2 = (n2*GMP_LIMB_BITS - 1)/bits1 + 1;\n   \n   mp_size_t i, j, trunc;\n\n   mp_limb_t ** ii, ** jj, * t1, * t2, * s1, * tt, * ptr;\n   mp_limb_t c;\n   TMP_DECL;\n\n   TMP_MARK;\n   ii = TMP_BALLOC_MP_PTRS(4*(n + n*size) + 5*size);\n   for (i = 0, ptr = (mp_ptr) ii + 4*n; i < 4*n; i++, ptr += size) \n   {\n      ii[i] = ptr;\n   }\n   t1 = ptr;\n   t2 = t1 + size;\n   s1 = t2 + size;\n   tt = s1 + size;\n\n   if (i1 != i2)\n   {\n      jj = TMP_BALLOC_MP_PTRS(4*(n + n*size));\n      for (i = 0, ptr = (mp_ptr) jj + 4*n; i < 4*n; i++, ptr += size) \n      {\n         jj[i] = ptr;\n      }\n   } \n   else\n      jj = ii;\n\n   trunc = j1 + j2 - 1;\n   if (trunc <= 2*n) trunc = 2*n + 1; /* trunc must be greater than 2n */\n   trunc = 2*((trunc + 1)/2); /* trunc must be divisible by 2 */\n\n   j1 = mpir_fft_split_bits(ii, i1, n1, bits1, limbs);\n   for (j = j1 ; j < 4*n; j++)\n      mpn_zero(ii[j], limbs + 1);\n   \n   mpir_fft_trunc_sqrt2(ii, n, w, &t1, &t2, &s1, trunc);\n    \n   if (i1 != i2)\n   {\n      j2 = mpir_fft_split_bits(jj, i2, n2, bits1, limbs);\n      for (j = j2 ; j < 4*n; j++)\n         mpn_zero(jj[j], limbs + 1);\n      mpir_fft_trunc_sqrt2(jj, n, w, &t1, &t2, &s1, trunc);      \n   } \n   else \n       j2 = j1;\n\n   for (j = 0; j < trunc; j++)\n   {\n      mpn_normmod_2expp1(ii[j], limbs);\n      if (i1 != i2) mpn_normmod_2expp1(jj[j], limbs);\n      c = 2*ii[j][limbs] + jj[j][limbs];\n\n      ii[j][limbs] = mpn_mulmod_2expp1_basecase(ii[j], ii[j], jj[j], c, n*w, tt);\n   }\n\n   mpir_ifft_trunc_sqrt2(ii, n, w, &t1, &t2, &s1, trunc);\n   for (j = 0; j < trunc; j++)\n   {\n      mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 2);\n      mpn_normmod_2expp1(ii[j], limbs);\n   }\n   \n   mpn_zero(r1, r_limbs);\n   mpir_fft_combine_bits(r1, ii, j1 + j2 - 1, bits1, limbs, r_limbs);\n     \n   TMP_FREE;\n}\n"
  },
  {
    "path": "fft/mulmod_2expp1.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nstatic mp_size_t mulmod_2expp1_table_n[FFT_N_NUM] = MULMOD_TAB;\n\nvoid mpir_fft_naive_convolution_1(mp_ptr r, mp_srcptr ii, mp_srcptr jj, mp_size_t m)\n{\n   mp_size_t i, j;\n\n   for (i = 0; i < m; i++)\n      r[i] = ii[0]*jj[i];\n\n   for (i = 1; i < m; i++)\n   {\n      for (j = 0; j < m - i; j++)\n         r[i+j] += ii[i]*jj[j];\n\n      for ( ; j < m; j++)\n         r[i+j-m] -=ii[i]*jj[j];\n   }\n}\n\nvoid mpir_fft_mulmod_2expp1(mp_ptr r1, mp_srcptr i1, mp_srcptr i2, \n                 mp_size_t r_limbs, mp_bitcnt_t depth, mp_bitcnt_t w)\n{\n   mp_size_t n = (((mp_size_t)1)<<depth);\n   mp_bitcnt_t bits1 = (r_limbs*GMP_LIMB_BITS)/(2*n);\n   \n   mp_size_t limb_add, limbs = (n*w)/GMP_LIMB_BITS;\n   mp_size_t size = limbs + 1;\n   mp_size_t i, j, ll;\n\n   mp_limb_t * ptr;\n   mp_limb_t ** ii, ** jj, *tt, *t1, *t2, *s1, *r, *ii0, *jj0;\n   mp_limb_t c;\n   TMP_DECL;\n\n   TMP_MARK;\n   ii = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 4*n + 5*size);\n   for (i = 0, ptr = (mp_ptr) ii + 2*n; i < 2*n; i++, ptr += size) \n   {\n      ii[i] = ptr;\n   }\n   ii0 = ptr;\n   t1 = ii0 + 2*n;\n   t2 = t1 + size;\n   s1 = t2 + size;\n   r = s1 + size;\n   tt = r + 2*n;\n   \n   if (i1 != i2)\n   {\n      jj = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 2*n);\n      for (i = 0, ptr = (mp_ptr) jj + 2*n; i < 2*n; i++, ptr += size) \n      {\n         jj[i] = ptr;\n      }\n      jj0 = ptr;\n   } else\n   {\n      jj = ii;\n      jj0 = ii0;\n   }\n\n   j = mpir_fft_split_bits(ii, i1, r_limbs, bits1, limbs);\n   for ( ; j < 2*n; j++)\n      mpn_zero(ii[j], limbs + 1);\n\n   for (i = 0; i < 2*n; i++)\n      ii0[i] = ii[i][0];\n \n   mpir_fft_negacyclic(ii, n, w, &t1, &t2, &s1);\n   for (j = 0; j < 2*n; j++)\n      mpn_normmod_2expp1(ii[j], limbs);\n\n   if (i1 != i2)\n   {\n      j = mpir_fft_split_bits(jj, i2, r_limbs, bits1, limbs);\n      for ( ; j < 2*n; j++)\n          mpn_zero(jj[j], limbs + 1);\n\n      for (i = 0; i < 2*n; i++)\n         jj0[i] = jj[i][0];\n\n      mpir_fft_negacyclic(jj, n, w, &t1, &t2, &s1);\n   }\n      \n   for (j = 0; j < 2*n; j++)\n   {\n      if (i1 != i2) mpn_normmod_2expp1(jj[j], limbs);\n      c = 2*ii[j][limbs] + jj[j][limbs];\n\n      ii[j][limbs] = mpn_mulmod_2expp1_basecase(ii[j], ii[j], jj[j], c, n*w, tt);\n   }\n   \n   mpir_ifft_negacyclic(ii, n, w, &t1, &t2, &s1);\n   \n   mpir_fft_naive_convolution_1(r, ii0, jj0, 2*n);\n\n   for (j = 0; j < 2*n; j++)\n   {\n      mp_limb_t t, cy2;\n      \n      mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 1);\n      mpn_normmod_2expp1(ii[j], limbs);\n\n      t = ii[j][limbs];\n      ii[j][limbs] = r[j] - ii[j][0];\n      cy2 = mpn_add_1(ii[j], ii[j], limbs + 1, ii[j][limbs]);\n      add_ssaaaa(r[j], ii[j][limbs], 0, ii[j][limbs], 0, t);\n      if (cy2) r[j]++;\n   }\n   \n   mpn_zero(r1, r_limbs + 1);\n   mpir_fft_combine_bits(r1, ii, 2*n - 1, bits1, limbs + 1, r_limbs + 1);\n   \n   /* \n      as the negacyclic convolution has effectively done subtractions\n      some of the coefficients will be negative, so need to subtract p\n   */\n   ll = 0;\n   limb_add = bits1/GMP_LIMB_BITS;\n   \n   for (j = 0; j < 2*n - 2; j++)\n   {   \n      if (r[j]) \n         mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1);\n      else if ((mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */\n      {\n         mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1);\n         mpn_sub_1(r1 + ll + limbs + 1, r1 + ll + limbs + 1, r_limbs - limbs - ll, 1);\n      }\n\n      ll += limb_add;\n   }\n   /* penultimate coefficient, top bit was already ignored */\n   if (r[j] || (mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */\n      mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1);\n   \n   /* final coefficient wraps around */\n   if (limb_add)\n      r1[r_limbs] += mpn_add_n(r1 + r_limbs - limb_add, r1 + r_limbs - limb_add, ii[2*n - 1], limb_add);\n   c = mpn_sub_n(r1, r1, ii[2*n - 1] + limb_add, limbs + 1 - limb_add);\n   mpn_addmod_2expp1_1(r1 + limbs + 1 - limb_add, r_limbs - limbs - 1 + limb_add, -c);\n   mpn_normmod_2expp1(r1, r_limbs);\n   \n   TMP_FREE;\n}\n\nmpir_si mpir_fft_adjust_limbs(mp_size_t limbs)\n{\n   mp_size_t bits1 = limbs*GMP_LIMB_BITS, bits2;\n   mp_size_t depth = 1, limbs2, depth1 = 1, depth2 = 1, adj;\n   mp_size_t off1, off2;\n\n   if (limbs <= FFT_MULMOD_2EXPP1_CUTOFF) return limbs;\n         \n   while ((((mpir_ui)1)<<depth)<limbs) depth++;\n   limbs2 = (((mpir_si)1)<<depth); /* within a factor of 2 of limbs */\n   bits2 = limbs2*GMP_LIMB_BITS;\n\n   while ((((mpir_ui)1)<<depth1) < bits1) depth1++;\n   if (depth1 < 12) off1 = mulmod_2expp1_table_n[0];\n   else off1 = mulmod_2expp1_table_n[MIN(depth1, FFT_N_NUM + 11) - 12];\n   depth1 = depth1/2 - off1;\n   \n   while ((((mpir_ui)1)<<depth2) < bits2) depth2++;\n   if (depth2 < 12) off2 = mulmod_2expp1_table_n[0];\n   else off2 = mulmod_2expp1_table_n[MIN(depth2, FFT_N_NUM + 11) - 12];\n   depth2 = depth2/2 - off2;\n   \n   depth1 = MAX(depth1, depth2);\n   adj = (((mpir_si)1)<<(depth1 + 1));\n   limbs2 = adj*((limbs + adj - 1)/adj); /* round up number of limbs */\n   bits1 = limbs2*GMP_LIMB_BITS;\n   bits2 = (((mpir_si)1)<<(depth1*2));\n   bits1 = bits2*((bits1 + bits2 - 1)/bits2); /* round up bits */\n   limbs = bits1/GMP_LIMB_BITS;\n\n   return limbs;\n}\n\nint\nmpn_mulmod_Bexpp1_fft (mp_ptr op, mp_size_t pl,\n\t     mp_srcptr n, mp_size_t nl,\n\t     mp_srcptr m, mp_size_t ml)\n{\n   mp_ptr a, b, tt;\n   mp_limb_t cy;\n\n   TMP_DECL;\n   TMP_MARK;\n\n   /* temporary space */\n   tt = TMP_ALLOC_LIMBS(2*pl);\n   \n   /* make copies of inputs, padded out to pl limbs */\n   a = TMP_ALLOC_LIMBS(pl + 1);\n   mpn_copyi(a, n, nl);\n   MPN_ZERO(a + nl, pl + 1 - nl);\n   \n   b = TMP_ALLOC_LIMBS(pl + 1);\n   mpn_copyi(b, m, ml);\n   MPN_ZERO(b + ml, pl + 1 - ml);\n   \n   /* this function only cares about the product, limbs = pl*GMP_LIMB_BITS */\n   cy = mpn_mulmod_Bexpp1(op, a, b, pl, tt);\n   \n   TMP_FREE;\n\n   return cy;\n}\n"
  },
  {
    "path": "fft/normmod_2expp1.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid \nmpn_normmod_2expp1(mp_ptr t, mp_size_t limbs)\n{\n   mp_limb_signed_t hi = t[limbs];\n   \n   if (hi)\n   {\n      t[limbs] = 0;\n\n      mpn_addmod_2expp1_1(t, limbs, -hi);\n\n      /* hi will now be in [-1,1] */\n      if ((hi = t[limbs]))\n      {\n         t[limbs] = 0;\n\n         mpn_addmod_2expp1_1(t, limbs, -hi);\n\n         if (t[limbs] == ~(mp_limb_signed_t) 0) /* if we now have -1 (very unlikely) */\n         {\n            t[limbs] = 0;\n            mpn_addmod_2expp1_1(t, limbs, 1);\n         }\n      }\n   }\n}\n"
  },
  {
    "path": "fft/revbin.c",
    "content": "/* \n\nCopyright 2009 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nconst mp_limb_t revtab0[1] = { 0 };\nconst mp_limb_t revtab1[2] = { 0, 1 };\nconst mp_limb_t revtab2[4] = { 0, 2, 1, 3 };\nconst mp_limb_t revtab3[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };\nconst mp_limb_t revtab4[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };\n\nconst mp_limb_t * revtab[5] = { revtab0, revtab1, revtab2, revtab3, revtab4 };\n\n/*\n   computes the reverse binary of a binary number of the given number of bits\n */\nmp_limb_t mpir_revbin(mp_limb_t in, mp_limb_t bits)\n{\n    mp_limb_t out = 0, i;\n    \n    if (bits <= 4)\n        return revtab[bits][in];\n\n    for (i = 0; i < bits; i++)\n    {   \n       out <<= 1;\n       out += (in & 1);\n       in >>= 1;\n    }\n\n    return out;\n}\n"
  },
  {
    "path": "fft/split_bits.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmp_size_t \nmpir_fft_split_limbs(mp_ptr * poly, mp_srcptr limbs, \n                mp_size_t total_limbs, mp_size_t coeff_limbs, mp_size_t output_limbs)\n{\n   mp_size_t i, skip, length = (total_limbs - 1)/coeff_limbs + 1;\n   \n   for (skip = 0, i = 0; skip + coeff_limbs <= total_limbs; skip += coeff_limbs, i++)\n   {\n      mpn_zero(poly[i], output_limbs + 1);\n      mpn_copyi(poly[i], limbs + skip, coeff_limbs);\n   }\n   \n   if (i < length) \n      mpn_zero(poly[i], output_limbs + 1);\n   \n   if (total_limbs > skip) \n      mpn_copyi(poly[i], limbs + skip, total_limbs - skip);\n   \n   return length;\n}\n\nmp_size_t mpir_fft_split_bits(mp_ptr * poly, mp_srcptr limbs, \n               mp_size_t total_limbs, mp_bitcnt_t bits, mp_size_t output_limbs)\n{\n   mp_size_t i, coeff_limbs, limbs_left, length = (GMP_LIMB_BITS*total_limbs - 1)/bits + 1;\n   mp_bitcnt_t shift_bits, top_bits = ((GMP_LIMB_BITS - 1) & bits);\n   mp_srcptr limb_ptr;\n   mp_limb_t mask;\n   \n   if (top_bits == 0)\n      return mpir_fft_split_limbs(poly, limbs, total_limbs, bits/GMP_LIMB_BITS, output_limbs);\n\n   coeff_limbs = (bits/GMP_LIMB_BITS) + 1;\n   mask = (((mp_limb_t)1)<<top_bits) - 1;\n   shift_bits = 0L;\n   limb_ptr = limbs;                      \n    \n   for (i = 0; i < length - 1; i++)\n   {\n      mpn_zero(poly[i], output_limbs + 1);\n      \n      if (!shift_bits)\n      {\n         mpn_copyi(poly[i], limb_ptr, coeff_limbs);\n         poly[i][coeff_limbs - 1] &= mask;\n         limb_ptr += (coeff_limbs - 1);\n         shift_bits += top_bits;\n      } else\n      {\n         mpn_rshift(poly[i], limb_ptr, coeff_limbs, shift_bits);\n         limb_ptr += (coeff_limbs - 1);\n         shift_bits += top_bits;\n\n         if (shift_bits >= GMP_LIMB_BITS)\n         {\n            limb_ptr++;\n            poly[i][coeff_limbs - 1] += (limb_ptr[0] << (GMP_LIMB_BITS - (shift_bits - top_bits)));\n            shift_bits -= GMP_LIMB_BITS; \n         }\n         \n         poly[i][coeff_limbs - 1] &= mask;\n         \n      } \n   }\n   \n   mpn_zero(poly[i], output_limbs + 1);\n   \n   limbs_left = total_limbs - (limb_ptr - limbs);\n   \n   if (!shift_bits)\n      mpn_copyi(poly[i], limb_ptr, limbs_left);\n   else\n      mpn_rshift(poly[i], limb_ptr, limbs_left, shift_bits);                   \n     \n   return length;\n}\n\n"
  },
  {
    "path": "gmp-impl.h",
    "content": "/* Include file for internal GNU MP types and definitions.\n\n   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO\n   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.\n\nCopyright 1991, 1993-1997, 1999, 2000-2015 Free Software Foundation, Inc.\n\nCopyright 2009, 2013 William Hart\n\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n\n/* __GMP_DECLSPEC must be given on any global data that will be accessed\n   from outside libmpir, meaning from the test or development programs, or\n   from libmpirxx.  Failing to do this will result in an incorrect address\n   being used for the accesses.  On functions __GMP_DECLSPEC makes calls\n   from outside libmpir more efficient, but they'll still work fine without\n   it.  */\n\n\n#ifndef __GMP_IMPL_H__\n#define __GMP_IMPL_H__\n\n/* limits.h is not used in general, since it's an ANSI-ism, and since on\n   solaris gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong\n   values (the ABI=64 values).\n\n   On Cray vector systems, however, we need the system limits.h since sizes\n   of signed and unsigned types can differ there, depending on compiler\n   options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail.  For\n   reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and\n   short can be 24, 32, 46 or 64 bits, and different for ushort.  */\n\n#if defined _WIN64\n#include <limits.h>\n#endif\n\n/* For fat.h and other fat binary stuff.\n   No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions\n   declared this way are only used to set function pointers in __gmp_cpuvec,\n   they're not called directly.  */\n#define DECL_add_err1_n(name) \\\n  mp_limb_t name(mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n#define DECL_add_err2_n(name) \\\n  mp_limb_t name(mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n#define DECL_add_n(name) \\\n  mp_limb_t name(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n#define DECL_addmul_1(name) \\\n  mp_limb_t name(mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n#define DECL_copyd(name) \\\n  void name(mp_ptr, mp_srcptr, mp_size_t)\n#define DECL_copyi(name) \\\n  void name(mp_ptr, mp_srcptr, mp_size_t)\n#define DECL_divexact_1(name) \\\n  void name(mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n#define DECL_divexact_by3c(name) \\\n  mp_limb_t name(mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n#define DECL_divexact_byfobm1(name) \\\n  mp_limb_t name(mp_ptr, mp_srcptr, mp_size_t, mp_limb_t,mp_limb_t)\n#define DECL_divrem_1(name) \\\n  mp_limb_t name(mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)\n#define DECL_divrem_2(name) \\\n  mp_limb_t name(mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr)\n#define DECL_divrem_euclidean_qr_1(name) \\\n  mp_limb_t name(mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)\n#define DECL_divrem_euclidean_qr_2(name) \\\n  mp_limb_t name(mp_ptr, mp_ptr, mp_size_t, mp_srcptr)\n#define DECL_gcd_1(name) \\\n  mp_limb_t name(mp_srcptr, mp_size_t, mp_limb_t)\n#define DECL_lshift(name) \\\n  mp_limb_t name(mp_ptr, mp_srcptr, mp_size_t, unsigned)\n#define DECL_mod_1(name) \\\n  mp_limb_t name(mp_srcptr, mp_size_t, mp_limb_t)\n#define DECL_mod_34lsub1(name) \\\n  mp_limb_t name(mp_srcptr, mp_size_t)\n#define DECL_modexact_1c_odd(name) \\\n  mp_limb_t name(mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)\n#define DECL_mul_1(name) \\\n  DECL_addmul_1 (name)\n#define DECL_mul_basecase(name) \\\n  void name(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)\n#define DECL_mulmid_basecase(name) \\\n  void name(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)\n#define DECL_preinv_divrem_1(name) \\\n  mp_limb_t name(mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)\n#define DECL_preinv_mod_1(name) \\\n  mp_limb_t name(mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)\n#define DECL_redc_1(name) \\\n  void name(mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n#define DECL_rshift(name) \\\n  DECL_lshift (name)\n#define DECL_sqr_basecase(name) \\\n  void name(mp_ptr, mp_srcptr, mp_size_t)\n#define DECL_sub_err1_n(name) \\\n  mp_limb_t name(mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n#define DECL_sub_err2_n(name) \\\n  mp_limb_t name(mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n#define DECL_sub_n(name) \\\n  DECL_add_n (name)\n#define DECL_submul_1(name) \\\n  DECL_addmul_1 (name)\n#define DECL_sumdiff_n(name) \\\n  mp_limb_t name(mp_ptr, mp_ptr, mp_srcptr,mp_srcptr,mp_size_t)\n#define DECL_nsumdiff_n(name) \\\n  DECL_sumdiff_n(name)\n\n#if ! __GMP_WITHIN_CONFIGURE\n#include \"config.h\"\n#include \"gmp-mparam.h\"\n\n/* These constants are generated by gen-fib.c header limbbits nailbits */\n#if GMP_NUMB_BITS == 32\n#define FIB_TABLE_LIMIT         47\n#define FIB_TABLE_LUCNUM_LIMIT  46\n#endif /* 32 bits */\n#if GMP_NUMB_BITS == 64\n#define FIB_TABLE_LIMIT         93\n#define FIB_TABLE_LUCNUM_LIMIT  92\n#endif /* 64 bits */\n\n/* This constants are generated by gen-bases.c header limbbits nailbits */\n#if GMP_NUMB_BITS == 32\n#define MP_BASES_CHARS_PER_LIMB_10      9\n#define MP_BASES_BIG_BASE_10            CNST_LIMB(0x3b9aca00)\n#define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0x12e0be82)\n#define MP_BASES_NORMALIZATION_STEPS_10 2\n#endif /* 32 bits */\n#if GMP_NUMB_BITS == 64\n#define MP_BASES_CHARS_PER_LIMB_10      19\n#define MP_BASES_BIG_BASE_10            CNST_LIMB(0x8ac7230489e80000)\n#define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0xd83c94fb6d2ac34a)\n#define MP_BASES_NORMALIZATION_STEPS_10 0\n#endif /* 64 bits */\n\n#if defined _LONG_LONG_LIMB\n#if __GMP_HAVE_TOKEN_PASTE\n#define CNST_LIMB(C) ((mp_limb_t) C##LL)\n#else\n#define CNST_LIMB(C) ((mp_limb_t) C/**/LL)\n#endif\n#else /* not _LONG_LONG_LIMB */\n#if __GMP_HAVE_TOKEN_PASTE\n#define CNST_LIMB(C) ((mp_limb_t) C##L)\n#else\n#define CNST_LIMB(C) ((mp_limb_t) C/**/L)\n#endif\n#endif /* _LONG_LONG_LIMB */\n\n/* This constants and defines are generated by gen-psqr limbbits nailbits */\n#if GMP_LIMB_BITS == 32 && GMP_NAIL_BITS == 0\n/* Non-zero bit indicates a quadratic residue mod 0x100.\n   This test identifies 82.81% as non-squares (212/256). */\nstatic const mp_limb_t\nsq_res_0x100[8] = {\n  CNST_LIMB(0x2030213),\n  CNST_LIMB(0x2020212),\n  CNST_LIMB(0x2020213),\n  CNST_LIMB(0x2020212),\n  CNST_LIMB(0x2030212),\n  CNST_LIMB(0x2020212),\n  CNST_LIMB(0x2020212),\n  CNST_LIMB(0x2020212),\n};\n\n/* 2^24-1 = 3^2 * 5 * 7 * 13 * 17 ... */\n#define PERFSQR_MOD_BITS  25\n\n/* This test identifies 95.66% as non-squares. */\n#define PERFSQR_MOD_TEST(up, usize) \\\n  do {                              \\\n    mp_limb_t  r;                   \\\n    PERFSQR_MOD_34 (r, up, usize);  \\\n                                    \\\n    /* 73.33% */                    \\\n    PERFSQR_MOD_2 (r, CNST_LIMB(45), CNST_LIMB(0xfa4fa5), \\\n                   CNST_LIMB(0x920), CNST_LIMB(0x1a442481)); \\\n                                    \\\n    /* 47.06% */                    \\\n    PERFSQR_MOD_1 (r, CNST_LIMB(17), CNST_LIMB(0xf0f0f1), \\\n                   CNST_LIMB(0x1a317)); \\\n                                    \\\n    /* 46.15% */                    \\\n    PERFSQR_MOD_1 (r, CNST_LIMB(13), CNST_LIMB(0xec4ec5), \\\n                   CNST_LIMB(0x9e5)); \\\n                                    \\\n    /* 42.86% */                    \\\n    PERFSQR_MOD_1 (r, CNST_LIMB( 7), CNST_LIMB(0xdb6db7), \\\n                   CNST_LIMB(0x69)); \\\n  } while (0)\n\n/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, 99.25% non-squares. */\n\n/* helper for tests/mpz/t-perfsqr.c */\n#define PERFSQR_DIVISORS  { 256, 45, 17, 13, 7, }\n\n#elif GMP_LIMB_BITS == 64 && GMP_NAIL_BITS == 0\n\n/* Non-zero bit indicates a quadratic residue mod 0x100.\n   This test identifies 82.81% as non-squares (212/256). */\nstatic const mp_limb_t\nsq_res_0x100[4] = {\n  CNST_LIMB(0x202021202030213),\n  CNST_LIMB(0x202021202020213),\n  CNST_LIMB(0x202021202030212),\n  CNST_LIMB(0x202021202020212),\n};\n\n/* 2^48-1 = 3^2 * 5 * 7 * 13 * 17 * 97 ... */\n#define PERFSQR_MOD_BITS  49\n\n/* This test identifies 97.81% as non-squares. */\n#define PERFSQR_MOD_TEST(up, usize) \\\n  do {                              \\\n    mp_limb_t  r;                   \\\n    PERFSQR_MOD_34 (r, up, usize);  \\\n                                    \\\n    /* 69.23% */                    \\\n    PERFSQR_MOD_2 (r, CNST_LIMB(91), CNST_LIMB(0xfd2fd2fd2fd3), \\\n                   CNST_LIMB(0x2191240), CNST_LIMB(0x8850a206953820e1)); \\\n                                    \\\n    /* 68.24% */                    \\\n    PERFSQR_MOD_2 (r, CNST_LIMB(85), CNST_LIMB(0xfcfcfcfcfcfd), \\\n                   CNST_LIMB(0x82158), CNST_LIMB(0x10b48c4b4206a105)); \\\n                                    \\\n    /* 55.56% */                    \\\n    PERFSQR_MOD_1 (r, CNST_LIMB( 9), CNST_LIMB(0xe38e38e38e39), \\\n                   CNST_LIMB(0x93)); \\\n                                    \\\n    /* 49.48% */                    \\\n    PERFSQR_MOD_2 (r, CNST_LIMB(97), CNST_LIMB(0xfd5c5f02a3a1), \\\n                   CNST_LIMB(0x1eb628b47), CNST_LIMB(0x6067981b8b451b5f)); \\\n  } while (0)\n\n/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, 99.62% non-squares. */\n\n/* helper for tests/mpz/t-perfsqr.c */\n#define PERFSQR_DIVISORS  { 256, 91, 85, 9, 97, }\n\n#else\n#error no data available for this limb size in perfsqr.h\n#endif\n\n#if WANT_FAT_BINARY\n#include \"fat.h\"\n#endif\n#endif\n\n#if HAVE_INTTYPES_H      /* for uint_least32_t */\n# include <inttypes.h>\n#else\n# ifdef HAVE_STDINT_H\n#  include <stdint.h>\n# endif\n#endif\n\n#ifdef __cplusplus\n#include <cstring>  /* for strlen */\n#include <string>   /* for std::string */\n#endif\n\n\n#ifndef WANT_TMP_DEBUG  /* for TMP_ALLOC_LIMBS_2 and others */\n#define WANT_TMP_DEBUG 0\n#endif\n\n/* The following tries to get a good version of alloca.  The tests are\n   adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.\n   Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will\n   be setup appropriately.\n\n   ifndef alloca - a cpp define might already exist.\n       glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.\n       HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.\n\n   GCC __builtin_alloca - preferred whenever available.\n\n   _AIX pragma - IBM compilers need a #pragma in \"each module that needs to\n       use alloca\".  Pragma indented to protect pre-ANSI cpp's.  _IBMR2 was\n       used in past versions of GMP, retained still in case it matters.\n\n       The autoconf manual says this pragma needs to be at the start of a C\n       file, apart from comments and preprocessor directives.  Is that true?\n       xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc\n       from mpir.h.\n*/\n\n#ifndef alloca\n# ifdef __GNUC__\n#  define alloca __builtin_alloca\n# else\n#  ifdef __DECC\n#   define alloca(x) __ALLOCA(x)\n#  else\n#   ifdef _MSC_VER\n#    include <malloc.h>\n#    define alloca _alloca\n#   else\n#    if HAVE_ALLOCA_H\n#     include <alloca.h>\n#    else\n#     if defined (_AIX) || defined (_IBMR2)\n #pragma alloca\n#     else\n       char *alloca ();\n#     endif\n#    endif\n#   endif\n#  endif\n# endif\n#endif\n\n\n/* if not provided by gmp-mparam.h */\n#ifndef BYTES_PER_MP_LIMB\n#define BYTES_PER_MP_LIMB  SIZEOF_MP_LIMB_T\n#endif\n#ifndef BITS_PER_MP_LIMB\n#define BITS_PER_MP_LIMB  (8 * SIZEOF_MP_LIMB_T)\n#endif\n\n#define BITS_PER_ULONG   (8 * SIZEOF_UNSIGNED_LONG)\n#ifdef HAVE_STDINT_H\n#define BITS_PER_UINTMAX (8 * SIZEOF_UINTMAX_T)\n#endif\n\n/* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */\n#if HAVE_UINT_LEAST32_T\ntypedef uint_least32_t      gmp_uint_least32_t;\n#else\n#if SIZEOF_UNSIGNED_SHORT >= 4\ntypedef unsigned short      gmp_uint_least32_t;\n#else\n#if SIZEOF_UNSIGNED >= 4\ntypedef unsigned            gmp_uint_least32_t;\n#else\ntypedef unsigned long       gmp_uint_least32_t;\n#endif\n#endif\n#endif\n\n/* pre-inverse types for truncating division and modulo */\ntypedef struct {mp_limb_t inv32;} gmp_pi1_t;\ntypedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;\n\n\n\n/* const and signed must match __gmp_const and __gmp_signed, so follow the\n   decision made for those in mpir.h.    */\n#if ! __GMP_HAVE_CONST\n#define const   /* empty */\n#define signed  /* empty */\n#endif\n\n/* \"const\" basically means a function does nothing but examine its arguments\n   and give a return value, it doesn't read or write any memory (neither\n   global nor pointed to by arguments), and has no other side-effects.  This\n   is more restrictive than \"pure\".  See info node \"(gcc)Function\n   Attributes\".  __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn\n   this off when trying to write timing loops.  */\n#if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE) && !( defined (__cplusplus) && defined (__sun))\n#define ATTRIBUTE_CONST  __attribute__ ((const))\n#else\n#define ATTRIBUTE_CONST\n#endif\n\n#if HAVE_ATTRIBUTE_NORETURN && !( defined (__cplusplus) && defined (__sun))\n#define ATTRIBUTE_NORETURN  __attribute__ ((noreturn))\n#else\n#define ATTRIBUTE_NORETURN\n#endif\n\n/* \"malloc\" means a function behaves like malloc in that the pointer it\n   returns doesn't alias anything.  */\n#if HAVE_ATTRIBUTE_MALLOC && !( defined (__cplusplus) && defined (__sun))\n#define ATTRIBUTE_MALLOC  __attribute__ ((malloc))\n#else\n#define ATTRIBUTE_MALLOC\n#endif\n\n\n/* va_copy is standard in C99, and gcc provides __va_copy when in strict C89\n   mode.  Falling back to a memcpy will give maximum portability, since it\n   works no matter whether va_list is a pointer, struct or array.  */\n#if ! defined (va_copy) && defined (__va_copy)\n#define va_copy(dst,src)  __va_copy(dst,src)\n#endif\n#if ! defined (va_copy)\n#define va_copy(dst,src) \\\n  do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)\n#endif\n\n#if defined (__cplusplus)\nextern \"C\" {\n#endif\n\n\n/* Usage: TMP_DECL;\n          TMP_MARK;\n          ptr = TMP_ALLOC (bytes);\n          TMP_FREE;\n\n   Small allocations should use TMP_SALLOC, big allocations should use\n   TMP_BALLOC.  Allocations that might be small or big should use TMP_ALLOC.\n\n   Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and\n   TMP_SFREE.\n\n   TMP_DECL just declares a variable, but might be empty and so must be last\n   in a list of variables.  TMP_MARK must be done before any TMP_ALLOC.\n   TMP_ALLOC(0) is not allowed.  TMP_FREE doesn't need to be done if a\n   TMP_MARK was made, but then no TMP_ALLOCs.  */\n\n/* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or\n   __gmp_allocate_func doesn't already determine it.  Currently TMP_ALLOC\n   isn't used for \"double\"s, so that's not in the union.  */\nunion tmp_align_t {\n  mp_limb_t  l;\n  char       *p;\n};\n#define __TMP_ALIGN  sizeof (union tmp_align_t)\n\n/* Return \"a\" rounded upwards to a multiple of \"m\", if it isn't already.\n   \"a\" must be an unsigned type.\n   This is designed for use with a compile-time constant \"m\".\n   The POW2 case is expected to be usual, and gcc 3.0 and up recognises\n   \"(-(8*n))%8\" or the like is always zero, which means the rounding up in\n   the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop.  */\n#define ROUND_UP_MULTIPLE(a,m)          \\\n  (POW2_P(m) ? (a) + (-(a))%(m)         \\\n   : (a)+(m)-1 - (((a)+(m)-1) % (m)))\n\n#if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)\nstruct tmp_reentrant_t {\n  struct tmp_reentrant_t  *next;\n  size_t\t\t  size;\t  /* bytes, including header */\n};\n__GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc(struct tmp_reentrant_t **, size_t) ATTRIBUTE_MALLOC;\n__GMP_DECLSPEC void  __gmp_tmp_reentrant_free(struct tmp_reentrant_t *);\n#endif\n\n#if WANT_TMP_ALLOCA\n#define TMP_SDECL\n#define TMP_DECL\t\tstruct tmp_reentrant_t *__tmp_marker\n#define TMP_SMARK\n#define TMP_MARK\t\t__tmp_marker = 0\n#define TMP_SALLOC(n)\t\talloca(n)\n#define TMP_BALLOC(n)\t\t__gmp_tmp_reentrant_alloc (&__tmp_marker, n)\n#define TMP_ALLOC(n)\t\t\t\t\t\t\t\\\n  (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n))\n#define TMP_SFREE\n#define TMP_FREE\t\t\t\t\t\t\t   \\\n  do {\t\t\t\t\t\t\t\t\t   \\\n    if (UNLIKELY (__tmp_marker != 0)) __gmp_tmp_reentrant_free (__tmp_marker); \\\n  } while (0)\n#endif\n\n#if WANT_TMP_REENTRANT\n#define TMP_SDECL\t\tTMP_DECL\n#define TMP_DECL\t\tstruct tmp_reentrant_t *__tmp_marker\n#define TMP_SMARK\t\tTMP_MARK\n#define TMP_MARK\t\t__tmp_marker = 0\n#define TMP_SALLOC(n)\t\tTMP_ALLOC(n)\n#define TMP_BALLOC(n)\t\tTMP_ALLOC(n)\n#define TMP_ALLOC(n)\t\t__gmp_tmp_reentrant_alloc (&__tmp_marker, n)\n#define TMP_SFREE\t\tTMP_FREE\n#define TMP_FREE\t\t__gmp_tmp_reentrant_free (__tmp_marker)\n#endif\n\n#if WANT_TMP_NOTREENTRANT\nstruct tmp_marker\n{\n  struct tmp_stack *which_chunk;\n  void *alloc_point;\n};\n__GMP_DECLSPEC void *__gmp_tmp_alloc(unsigned long) ATTRIBUTE_MALLOC;\n__GMP_DECLSPEC void __gmp_tmp_mark(struct tmp_marker *);\n__GMP_DECLSPEC void __gmp_tmp_free(struct tmp_marker *);\n#define TMP_SDECL\t\tTMP_DECL\n#define TMP_DECL\t\tstruct tmp_marker __tmp_marker\n#define TMP_SMARK\t\tTMP_MARK\n#define TMP_MARK\t\t__gmp_tmp_mark (&__tmp_marker)\n#define TMP_SALLOC(n)\t\tTMP_ALLOC(n)\n#define TMP_BALLOC(n)\t\tTMP_ALLOC(n)\n#define TMP_ALLOC(n)\t\t\t\t\t\t\t\\\n  __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))\n#define TMP_SFREE\t\tTMP_FREE\n#define TMP_FREE\t\t__gmp_tmp_free (&__tmp_marker)\n#endif\n\n#if WANT_TMP_DEBUG\n/* See tal-debug.c for some comments. */\nstruct tmp_debug_t {\n  struct tmp_debug_entry_t  *list;\n  const char                *file;\n  int                       line;\n};\nstruct tmp_debug_entry_t {\n  struct tmp_debug_entry_t  *next;\n  char                      *block;\n  size_t                    size;\n};\n__GMP_DECLSPEC void  __gmp_tmp_debug_mark(const char *, int, struct tmp_debug_t **,\n                                     struct tmp_debug_t *,\n                                     const char *, const char *);\n__GMP_DECLSPEC void *__gmp_tmp_debug_alloc(const char *, int, int,\n                                     struct tmp_debug_t **, const char *,\n                                     size_t) ATTRIBUTE_MALLOC;\n__GMP_DECLSPEC void  __gmp_tmp_debug_free(const char *, int, int,\n                                     struct tmp_debug_t **,\n                                     const char *, const char *);\n#define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, \"__tmp_marker\")\n#define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, \"__tmp_marker\")\n#define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, \"__tmp_marker\")\n#define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, \"__tmp_marker\")\n#define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, \"__tmp_marker\")\n#define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, \"__tmp_marker\")\n/* The marker variable is designed to provoke an uninitialized varialble\n   warning from the compiler if TMP_FREE is used without a TMP_MARK.\n   __tmp_marker_inscope does the same for TMP_ALLOC.  Runtime tests pick\n   these things up too.  */\n#define TMP_DECL_NAME(marker, marker_name)                      \\\n  int marker;                                                   \\\n  int __tmp_marker_inscope;                                     \\\n  const char *__tmp_marker_name = marker_name;                  \\\n  struct tmp_debug_t  __tmp_marker_struct;                      \\\n  /* don't demand NULL, just cast a zero */                     \\\n  struct tmp_debug_t  *__tmp_marker = (struct tmp_debug_t *) 0\n#define TMP_MARK_NAME(marker, marker_name)                      \\\n  do {                                                          \\\n    marker = 1;                                                 \\\n    __tmp_marker_inscope = 1;                                   \\\n    __gmp_tmp_debug_mark  (ASSERT_FILE, ASSERT_LINE,            \\\n                           &__tmp_marker, &__tmp_marker_struct, \\\n                           __tmp_marker_name, marker_name);     \\\n  } while (0)\n#define TMP_SALLOC(n)\t\tTMP_ALLOC(n)\n#define TMP_BALLOC(n)\t\tTMP_ALLOC(n)\n#define TMP_ALLOC(size)                                                 \\\n  __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE,                      \\\n                         __tmp_marker_inscope,                          \\\n                         &__tmp_marker, __tmp_marker_name, size)\n#define TMP_FREE_NAME(marker, marker_name)                      \\\n  do {                                                          \\\n    __gmp_tmp_debug_free  (ASSERT_FILE, ASSERT_LINE,            \\\n                           marker, &__tmp_marker,               \\\n                           __tmp_marker_name, marker_name);     \\\n  } while (0)\n#endif /* WANT_TMP_DEBUG */\n\n\n/* Allocating various types. */\n#define TMP_ALLOC_TYPE(n,type)  ((type *) TMP_ALLOC ((n) * sizeof (type)))\n#define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))\n#define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))\n#define TMP_ALLOC_LIMBS(n)      TMP_ALLOC_TYPE(n,mp_limb_t)\n#define TMP_SALLOC_LIMBS(n)     TMP_SALLOC_TYPE(n,mp_limb_t)\n#define TMP_BALLOC_LIMBS(n)     TMP_BALLOC_TYPE(n,mp_limb_t)\n#define TMP_ALLOC_MP_PTRS(n)    TMP_ALLOC_TYPE(n,mp_ptr)\n#define TMP_SALLOC_MP_PTRS(n)   TMP_SALLOC_TYPE(n,mp_ptr)\n#define TMP_BALLOC_MP_PTRS(n)   TMP_BALLOC_TYPE(n,mp_ptr)\n\n/* It's more efficient to allocate one block than two.  This is certainly\n   true of the malloc methods, but it can even be true of alloca if that\n   involves copying a chunk of stack (various RISCs), or a call to a stack\n   bounds check (mingw).  In any case, when debugging keep separate blocks\n   so a redzoning malloc debugger can protect each individually.  */\n#define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize)           \\\n  do {                                                  \\\n    if (WANT_TMP_DEBUG)                                 \\\n      {                                                 \\\n        (xp) = TMP_ALLOC_LIMBS (xsize);                 \\\n        (yp) = TMP_ALLOC_LIMBS (ysize);                 \\\n      }                                                 \\\n    else                                                \\\n      {                                                 \\\n        (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize));     \\\n        (yp) = (xp) + (xsize);                          \\\n      }                                                 \\\n  } while (0)\n\n\n/* From mpir.h, nicer names for internal use. */\n#define MPN_CMP(result, xp, yp, size)  __GMPN_CMP(result, xp, yp, size)\n#define LIKELY(cond)                   __GMP_LIKELY(cond)\n#define UNLIKELY(cond)                 __GMP_UNLIKELY(cond)\n\n#define ABS(x) ((x) >= 0 ? (x) : -(x))\n#undef MIN\n#define MIN(l,o) ((l) < (o) ? (l) : (o))\n#undef MAX\n#define MAX(h,i) ((h) > (i) ? (h) : (i))\n#define numberof(x)  (sizeof (x) / sizeof ((x)[0]))\n\n/* Field access macros.  */\n#define SIZ(x) ((x)->_mp_size)\n#define ABSIZ(x) ABS (SIZ (x))\n#define PTR(x) ((x)->_mp_d)\n#define LIMBS(x) ((x)->_mp_d)\n#define EXP(x) ((x)->_mp_exp)\n#define PREC(x) ((x)->_mp_prec)\n#define ALLOC(x) ((x)->_mp_alloc)\n#define NUM(x) mpq_numref(x)\n#define DEN(x) mpq_denref(x)\n\n/* n-1 inverts any low zeros and the lowest one bit.  If n&(n-1) leaves zero\n   then that lowest one bit must have been the only bit set.  n==0 will\n   return true though, so avoid that.  */\n#define POW2_P(n)  (((n) & ((n) - 1)) == 0)\n\n/* This is intended for constant THRESHOLDs only, where the compiler\n   can completely fold the result.  */\n#define LOG2C(n) \\\n (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \\\n  ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \\\n  ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \\\n  ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))\n\n/* The \"short\" defines are a bit different because shorts are promoted to\n   ints by ~ or >> etc.\n\n   #ifndef's are used since on some systems (HP?) header files other than\n   limits.h setup these defines.  We could forcibly #undef in that case, but\n   there seems no need to worry about that.  */\n\n#ifndef ULONG_MAX\n#define ULONG_MAX   __GMP_ULONG_MAX\n#endif\n#ifndef UINT_MAX\n#define UINT_MAX    __GMP_UINT_MAX\n#endif\n#ifndef USHRT_MAX\n#define USHRT_MAX   __GMP_USHRT_MAX\n#endif\n#define MP_LIMB_T_MAX      (~ (mp_limb_t) 0)\n\n/* Must cast ULONG_MAX etc to unsigned long etc, since they might not be\n   unsigned on a K&R compiler.  In particular the HP-UX 10 bundled K&R cc\n   treats the plain decimal values in <limits.h> as signed.  */\n#define ULONG_HIGHBIT      (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))\n#define UINT_HIGHBIT       (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))\n#define USHRT_HIGHBIT      ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)))\n#define GMP_LIMB_HIGHBIT  (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))\n#ifdef HAVE_STDINT_H\n#define UINTMAX_HIGHBIT   (UINTMAX_MAX ^ (UINTMAX_MAX >> 1))\n#endif\n\n#ifndef LONG_MIN\n#define LONG_MIN           ((long) ULONG_HIGHBIT)\n#endif\n#ifndef LONG_MAX\n#define LONG_MAX           (-(LONG_MIN+1))\n#endif\n\n#ifndef INT_MIN\n#define INT_MIN            ((int) UINT_HIGHBIT)\n#endif\n#ifndef INT_MAX\n#define INT_MAX            (-(INT_MIN+1))\n#endif\n\n#ifndef SHRT_MIN\n#define SHRT_MIN           ((short) USHRT_HIGHBIT)\n#endif\n#ifndef SHRT_MAX\n#define SHRT_MAX           ((short) (-(SHRT_MIN+1)))\n#endif\n\n#if defined( _WIN64 )\n#define MP_SIZE_T_MAX      _I64_MAX\n#define MP_SIZE_T_MIN      _I64_MIN\n#elif __GMP_MP_SIZE_T_INT\n#define MP_SIZE_T_MAX      INT_MAX\n#define MP_SIZE_T_MIN      INT_MIN\n#else\n#define MP_SIZE_T_MAX      LONG_MAX\n#define MP_SIZE_T_MIN      LONG_MIN\n#endif\n\n/* mp_exp_t is the same as mp_size_t */\n#if defined( _WIN64 )\n#define MP_EXP_T_MAX   LONG_MAX\n#define MP_EXP_T_MIN   LONG_MIN\n#else\n#define MP_EXP_T_MAX   MP_SIZE_T_MAX\n#define MP_EXP_T_MIN   MP_SIZE_T_MIN\n#endif\n\n#define LONG_HIGHBIT       LONG_MIN\n#define INT_HIGHBIT        INT_MIN\n#define SHRT_HIGHBIT       SHRT_MIN\n\n\n#define GMP_NUMB_HIGHBIT  (CNST_LIMB(1) << (GMP_NUMB_BITS-1))\n\n#if GMP_NAIL_BITS == 0\n#define GMP_NAIL_LOWBIT   CNST_LIMB(0)\n#else\n#define GMP_NAIL_LOWBIT   (CNST_LIMB(1) << GMP_NUMB_BITS)\n#endif\n\n/* Swap macros. */\n\n#define MP_LIMB_T_SWAP(x, y)                    \\\n  do {                                          \\\n    mp_limb_t __mp_limb_t_swap__tmp = (x);      \\\n    (x) = (y);                                  \\\n    (y) = __mp_limb_t_swap__tmp;                \\\n  } while (0)\n#define MP_SIZE_T_SWAP(x, y)                    \\\n  do {                                          \\\n    mp_size_t __mp_size_t_swap__tmp = (x);      \\\n    (x) = (y);                                  \\\n    (y) = __mp_size_t_swap__tmp;                \\\n  } while (0)\n\n#define MP_PTR_SWAP(x, y)               \\\n  do {                                  \\\n    mp_ptr __mp_ptr_swap__tmp = (x);    \\\n    (x) = (y);                          \\\n    (y) = __mp_ptr_swap__tmp;           \\\n  } while (0)\n#define MP_SRCPTR_SWAP(x, y)                    \\\n  do {                                          \\\n    mp_srcptr __mp_srcptr_swap__tmp = (x);      \\\n    (x) = (y);                                  \\\n    (y) = __mp_srcptr_swap__tmp;                \\\n  } while (0)\n\n#define MPN_PTR_SWAP(xp,xs, yp,ys)      \\\n  do {                                  \\\n    MP_PTR_SWAP (xp, yp);               \\\n    MP_SIZE_T_SWAP (xs, ys);            \\\n  } while(0)\n#define MPN_SRCPTR_SWAP(xp,xs, yp,ys)   \\\n  do {                                  \\\n    MP_SRCPTR_SWAP (xp, yp);            \\\n    MP_SIZE_T_SWAP (xs, ys);            \\\n  } while(0)\n\n#define MPZ_PTR_SWAP(x, y)              \\\n  do {                                  \\\n    mpz_ptr __mpz_ptr_swap__tmp = (x);  \\\n    (x) = (y);                          \\\n    (y) = __mpz_ptr_swap__tmp;          \\\n  } while (0)\n#define MPZ_SRCPTR_SWAP(x, y)                   \\\n  do {                                          \\\n    mpz_srcptr __mpz_srcptr_swap__tmp = (x);    \\\n    (x) = (y);                                  \\\n    (y) = __mpz_srcptr_swap__tmp;               \\\n  } while (0)\n\n\n/* Enhancement: __gmp_allocate_func could have \"__attribute__ ((malloc))\",\n   but current gcc (3.0) doesn't seem to support that.  */\n__GMP_DECLSPEC extern void * (*__gmp_allocate_func)(size_t);\n__GMP_DECLSPEC extern void * (*__gmp_reallocate_func)(void *, size_t, size_t);\n__GMP_DECLSPEC extern void   (*__gmp_free_func)(void *, size_t);\n\n__GMP_DECLSPEC void *__gmp_default_allocate(size_t);\n__GMP_DECLSPEC void *__gmp_default_reallocate(void *, size_t, size_t);\n__GMP_DECLSPEC void __gmp_default_free(void *, size_t);\n\n#define __GMP_ALLOCATE_FUNC_TYPE(n,type) \\\n  ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))\n#define __GMP_ALLOCATE_FUNC_LIMBS(n)   __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)\n\n#define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \\\n  ((type *) (*__gmp_reallocate_func)                            \\\n   (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))\n#define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \\\n  __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)\n\n#define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))\n#define __GMP_FREE_FUNC_LIMBS(p,n)     __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)\n\n#define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize)      \\\n  do {                                                          \\\n    if ((oldsize) != (newsize))                                 \\\n      (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \\\n  } while (0)\n\n#define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type)   \\\n  do {                                                                  \\\n    if ((oldsize) != (newsize))                                         \\\n      (ptr) = (type *) (*__gmp_reallocate_func)                         \\\n        (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));    \\\n  } while (0)\n\n\n/* Dummy for non-gcc, code involving it will go dead. */\n#if ! defined (__GNUC__) || __GNUC__ < 2\n#define __builtin_constant_p(x)   0\n#endif\n\n\n/* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the\n   stack usage is compatible.  __attribute__ ((regparm (N))) helps by\n   putting leading parameters in registers, avoiding extra stack.\n\n   regparm cannot be used with calls going through the PLT, because the\n   binding code there may clobber the registers (%eax, %edx, %ecx) used for\n   the regparm parameters.  Calls to local (ie. static) functions could\n   still use this, if we cared to differentiate locals and globals.\n\n   On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with\n   -p or -pg profiling, since that version of gcc doesn't realize the\n   .mcount calls will clobber the parameter registers.  Other systems are\n   ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't\n   bother to try to detect this.  regparm is only an optimization so we just\n   disable it when profiling (profiling being a slowdown anyway).  */\n\n#define USE_LEADING_REGPARM 0\n\n\n/* Macros for altering parameter order according to regparm usage. */\n#if USE_LEADING_REGPARM\n#define REGPARM_2_1(a,b,x)    x,a,b\n#define REGPARM_3_1(a,b,c,x)  x,a,b,c\n#define REGPARM_ATTR(n) __attribute__ ((regparm (n)))\n#else\n#define REGPARM_2_1(a,b,x)    a,b,x\n#define REGPARM_3_1(a,b,c,x)  a,b,c,x\n#define REGPARM_ATTR(n)\n#endif\n\n__GMP_DECLSPEC int mpir_is_likely_prime_BPSW(mp_limb_t n);\n\n__GMP_DECLSPEC mp_limb_t mpir_sqrt(mp_limb_t r);\n\n__GMP_DECLSPEC void __gmpz_aorsmul_1(REGPARM_3_1 (mpz_ptr w, mpz_srcptr u, mp_limb_t v, mp_size_t sub)) REGPARM_ATTR(1);\n#define mpz_aorsmul_1(w,u,v,sub)  __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))\n\n#define mpz_n_pow_ui __gmpz_n_pow_ui\n__GMP_DECLSPEC void    mpz_n_pow_ui(mpz_ptr, mp_srcptr, mp_size_t, mpir_ui);\n\n\n#define mpn_add_nc __MPN(add_nc)\n__GMP_DECLSPEC mp_limb_t mpn_add_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);\n\n#define mpn_addmul_1c __MPN(addmul_1c)\n__GMP_DECLSPEC mp_limb_t mpn_addmul_1c(mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);\n\n#define mpn_addmul_2 __MPN(addmul_2)\n__GMP_DECLSPEC mp_limb_t mpn_addmul_2(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);\n\n#define mpn_addmul_3 __MPN(addmul_3)\n__GMP_DECLSPEC mp_limb_t mpn_addmul_3(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);\n\n#define mpn_addmul_4 __MPN(addmul_4)\n__GMP_DECLSPEC mp_limb_t mpn_addmul_4(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);\n\n#define mpn_addmul_5 __MPN(addmul_5)\n__GMP_DECLSPEC mp_limb_t mpn_addmul_5(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);\n\n#define mpn_addmul_6 __MPN(addmul_6)\n__GMP_DECLSPEC mp_limb_t mpn_addmul_6(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);\n\n#define mpn_addmul_7 __MPN(addmul_7)\n__GMP_DECLSPEC mp_limb_t mpn_addmul_7(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);\n\n#define mpn_addmul_8 __MPN(addmul_8)\n__GMP_DECLSPEC mp_limb_t mpn_addmul_8(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);\n\n#define mpn_addlsh_n __MPN(addlsh_n)\n__GMP_DECLSPEC mp_limb_t mpn_addlsh_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,unsigned int);\n\n#define mpn_sublsh_n __MPN(sublsh_n)\n__GMP_DECLSPEC mp_limb_t mpn_sublsh_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,unsigned int);\n\n#define mpn_addlsh_nc __MPN(addlsh_nc)\n__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,unsigned int, mp_limb_t);\n\n#define mpn_sublsh_nc __MPN(sublsh_nc)\n__GMP_DECLSPEC mp_limb_t mpn_sublsh_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,unsigned int, mp_limb_t);\n\n/* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,\n   and returns the bit rshifted out (0 or 1).  */\n#define mpn_rsh1add_n __MPN(rsh1add_n)\n__GMP_DECLSPEC mp_limb_t mpn_rsh1add_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);\n\n/* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1,\n   and returns the bit rshifted out (0 or 1).  If there's a borrow from the\n   subtract, it's stored as a 1 in the high bit of c[n-1], like a twos\n   complement negative.  */\n#define mpn_rsh1sub_n __MPN(rsh1sub_n)\n__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);\n\n#if HAVE_NATIVE_mpn_lshiftc\n#define mpn_lshiftc __MPN(lshiftc)\n__GMP_DECLSPEC mp_limb_t mpn_lshiftc(mp_ptr, mp_srcptr, mp_size_t,unsigned int);\n#endif\n\n#define mpn_addadd_n __MPN(addadd_n)\n__GMP_DECLSPEC mp_limb_t mpn_addadd_n(mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t);\n\n#define mpn_addsub_n __MPN(addsub_n)\n__GMP_DECLSPEC int mpn_addsub_n(mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t);\n\n#define mpn_subadd_n __MPN(subadd_n)\n__GMP_DECLSPEC mp_limb_t mpn_subadd_n(mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t);\n\n#if HAVE_NATIVE_mpn_karaadd\n#define mpn_karaadd __MPN(karaadd)\n__GMP_DECLSPEC void mpn_karaadd(mp_ptr, mp_ptr, mp_size_t);\n#endif\n\n#if HAVE_NATIVE_mpn_karasub\n#define mpn_karasub __MPN(karasub)\n__GMP_DECLSPEC void mpn_karasub(mp_ptr, mp_ptr, mp_size_t);\n#endif\n\n#ifndef mpn_sumdiff_n  /* if not done with cpuvec in a fat binary */\n#define mpn_sumdiff_n __MPN(sumdiff_n)\n__GMP_DECLSPEC mp_limb_t mpn_sumdiff_n(mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);\n#endif\n\n#ifndef mpn_nsumdiff_n\n#define mpn_nsumdiff_n __MPN(nsumdiff_n)\n__GMP_DECLSPEC mp_limb_t mpn_nsumdiff_n(mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);\n#endif\n\n#define mpn_sumdiff_nc __MPN(sumdiff_nc)\n__GMP_DECLSPEC mp_limb_t mpn_sumdiff_nc(mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);\n\n#define mpn_divexact_byff __MPN(divexact_byff)\n__GMP_DECLSPEC mp_limb_t mpn_divexact_byff(mp_ptr, mp_srcptr, mp_size_t);\n\n#ifndef mpn_divexact_byfobm1      /* if not done with cpuvec in a fat binary */\n#define mpn_divexact_byfobm1 __MPN(divexact_byfobm1)\n__GMP_DECLSPEC mp_limb_t mpn_divexact_byfobm1(mp_ptr, mp_srcptr, mp_size_t,mp_limb_t,mp_limb_t);\n#endif\n\n#ifndef mpn_add_err1_n      /* if not done with cpuvec in a fat binary */\n#define mpn_add_err1_n  __MPN(add_err1_n)\n__GMP_DECLSPEC mp_limb_t mpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);\n#endif\n\n#ifndef mpn_sub_err1_n      /* if not done with cpuvec in a fat binary */\n#define mpn_sub_err1_n  __MPN(sub_err1_n)\n__GMP_DECLSPEC mp_limb_t mpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);\n#endif\n\n#ifndef mpn_add_err2_n      /* if not done with cpuvec in a fat binary */\n#define mpn_add_err2_n  __MPN(add_err2_n)\n__GMP_DECLSPEC mp_limb_t mpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);\n#endif\n\n#ifndef mpn_sub_err2_n      /* if not done with cpuvec in a fat binary */\n#define mpn_sub_err2_n  __MPN(sub_err2_n)\n__GMP_DECLSPEC mp_limb_t mpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);\n#endif\n\n#define mpn_divrem_1c __MPN(divrem_1c)\n__GMP_DECLSPEC mp_limb_t mpn_divrem_1c(mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);\n\n#define mpn_dump __MPN(dump)\n__GMP_DECLSPEC void mpn_dump(mp_srcptr, mp_size_t);\n\n#define mpn_fib2_ui __MPN(fib2_ui)\n__GMP_DECLSPEC mp_size_t mpn_fib2_ui(mp_ptr, mp_ptr, mpir_ui);\n\n/* Remap names of internal mpn functions.  */\n#define __clz_tab               __MPN(clz_tab)\n#define mpn_udiv_w_sdiv\t\t__MPN(udiv_w_sdiv)\n\n#define mpn_jacobi_base __MPN(jacobi_base)\n__GMP_DECLSPEC int mpn_jacobi_base(mp_limb_t a, mp_limb_t b, int result_bit1) ATTRIBUTE_CONST;\n\n#define mpn_jacobi_2 __MPN(jacobi_2)\n__GMP_DECLSPEC int mpn_jacobi_2(mp_srcptr, mp_srcptr, unsigned);\n\n#define mpn_jacobi_n __MPN(jacobi_n)\n__GMP_DECLSPEC int mpn_jacobi_n(mp_ptr, mp_ptr, mp_size_t, unsigned);\n\n#define mpn_mod_1c __MPN(mod_1c)\n__GMP_DECLSPEC mp_limb_t mpn_mod_1c(mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;\n\n#define mpn_mul_1c __MPN(mul_1c)\n__GMP_DECLSPEC mp_limb_t mpn_mul_1c(mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);\n\n#define mpn_mul_2 __MPN(mul_2)\nmp_limb_t mpn_mul_2(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);\n\n#ifndef mpn_mul_basecase  /* if not done with cpuvec in a fat binary */\n#define mpn_mul_basecase __MPN(mul_basecase)\n__GMP_DECLSPEC void mpn_mul_basecase(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);\n#endif\n\n#define mpn_mullow_n __MPN(mullow_n)\n__GMP_DECLSPEC void mpn_mullow_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);\n\n#define mpn_mullow_n_basecase __MPN(mullow_n_basecase)\n__GMP_DECLSPEC void mpn_mullow_n_basecase(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);\n\n#define mpn_mulhigh_n __MPN(mulhigh_n)\n__GMP_DECLSPEC void mpn_mulhigh_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);\n\n#define mpn_mullow_basecase __MPN(mullow_basecase)\n__GMP_DECLSPEC void mpn_mullow_basecase(mp_ptr, mp_srcptr,mp_size_t, mp_srcptr, mp_size_t,mp_size_t);\n\n#ifndef mpn_mulmid_basecase      /* if not done with cpuvec in a fat binary */\n#define mpn_mulmid_basecase __MPN(mulmid_basecase)\n__GMP_DECLSPEC void mpn_mulmid_basecase(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);\n#endif\n\n#define mpn_mod_1_1 __MPN(mod_1_1)\n__GMP_DECLSPEC void mpn_mod_1_1(mp_ptr,mp_srcptr, mp_size_t, mp_srcptr);\n\n#define mpn_mod_1_2 __MPN(mod_1_2)\n__GMP_DECLSPEC void mpn_mod_1_2(mp_ptr,mp_srcptr, mp_size_t, mp_srcptr);\n\n#define mpn_mod_1_3 __MPN(mod_1_3)\n__GMP_DECLSPEC void mpn_mod_1_3(mp_ptr,mp_srcptr, mp_size_t, mp_srcptr);\n\n#define mpn_mod_1_k __MPN(mod_1_k)\n__GMP_DECLSPEC mp_limb_t mpn_mod_1_k(mp_srcptr, mp_size_t, mp_limb_t,mp_size_t);\n\n#define mpn_mulmid __MPN(mulmid)\n__GMP_DECLSPEC void mpn_mulmid(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);\n\n#define mpn_mulmid_n __MPN(mulmid_n)\n__GMP_DECLSPEC void mpn_mulmid_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);\n\n#ifndef mpn_sqr_basecase  /* if not done with cpuvec in a fat binary */\n#define mpn_sqr_basecase __MPN(sqr_basecase)\n__GMP_DECLSPEC void mpn_sqr_basecase(mp_ptr, mp_srcptr, mp_size_t);\n#endif\n\n#define mpz_trial_division __gmpz_trial_division\n__GMP_DECLSPEC unsigned long mpz_trial_division(mpz_srcptr,unsigned long, unsigned long);\n\n#define mpn_sub_nc __MPN(sub_nc)\n__GMP_DECLSPEC mp_limb_t mpn_sub_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);\n\n#define mpn_submul_1c __MPN(submul_1c)\n__GMP_DECLSPEC mp_limb_t mpn_submul_1c(mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);\n\n#define mpn_invert_2exp __MPN(invert_2exp)\n__GMP_DECLSPEC void mpn_invert_2exp(mp_ptr, mp_srcptr, mp_size_t, mp_ptr);\n\n#define mpn_is_invert __MPN(is_invert)\n__GMP_DECLSPEC int mpn_is_invert(mp_srcptr, mp_srcptr, mp_size_t);\n\n#define mpn_invert_trunc __MPN(invert_trunc)\n__GMP_DECLSPEC void mpn_invert_trunc(mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr);\n\n#define   mpn_binvert __MPN(binvert)\n__GMP_DECLSPEC void      mpn_binvert(mp_ptr, mp_srcptr, mp_size_t, mp_ptr);\n#define   mpn_binvert_itch __MPN(binvert_itch)\n__GMP_DECLSPEC mp_size_t mpn_binvert_itch(mp_size_t) ATTRIBUTE_CONST;\n\n#define   mpn_powm __MPN(powm)\n__GMP_DECLSPEC void      mpn_powm(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);\n#define   mpn_powlo __MPN(powlo)\n__GMP_DECLSPEC void      mpn_powlo(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);\n\n#ifndef mpn_divrem_euclidean_qr_1    /* if not done with cpuvec in a fat binary */\n#define mpn_divrem_euclidean_qr_1 __MPN(divrem_euclidean_qr_1)\n__GMP_DECLSPEC mp_limb_t mpn_divrem_euclidean_qr_1(mp_ptr, mp_size_t, mp_srcptr, mp_size_t,mp_limb_t);\n#endif\n\n#ifndef mpn_divrem_euclidean_qr_2    /* if not done with cpuvec in a fat binary */\n#define mpn_divrem_euclidean_qr_2 __MPN(divrem_euclidean_qr_2)\n__GMP_DECLSPEC mp_limb_t mpn_divrem_euclidean_qr_2(mp_ptr, mp_ptr, mp_size_t,mp_srcptr);\n#endif\n\n#define mpn_divrem_euclidean_r_1 __MPN(divrem_euclidean_r_1)\n__GMP_DECLSPEC mp_limb_t mpn_divrem_euclidean_r_1(mp_srcptr, mp_size_t,mp_limb_t);\n\n#define mpn_divrem_hensel_qr_1 __MPN(divrem_hensel_qr_1)\n__GMP_DECLSPEC mp_limb_t mpn_divrem_hensel_qr_1(mp_ptr, mp_srcptr, mp_size_t,mp_limb_t);\n\n#define mpn_divrem_hensel_qr_1_1 __MPN(divrem_hensel_qr_1_1)\n__GMP_DECLSPEC mp_limb_t mpn_divrem_hensel_qr_1_1(mp_ptr, mp_srcptr, mp_size_t,mp_limb_t);\n\n#define mpn_divrem_hensel_qr_1_2 __MPN(divrem_hensel_qr_1_2)\n__GMP_DECLSPEC mp_limb_t mpn_divrem_hensel_qr_1_2(mp_ptr, mp_srcptr, mp_size_t,mp_limb_t);\n\n#define mpn_divrem_hensel_rsh_qr_1_preinv __MPN(divrem_hensel_rsh_qr_1_preinv)\n__GMP_DECLSPEC mp_limb_t mpn_divrem_hensel_rsh_qr_1_preinv(mp_ptr, mp_srcptr, mp_size_t,mp_limb_t,mp_limb_t,int);\n\n#define mpn_divrem_hensel_rsh_qr_1 __MPN(divrem_hensel_rsh_qr_1)\n__GMP_DECLSPEC mp_limb_t mpn_divrem_hensel_rsh_qr_1(mp_ptr, mp_srcptr, mp_size_t,mp_limb_t,int);\n\n#define mpn_rsh_divrem_hensel_qr_1 __MPN(rsh_divrem_hensel_qr_1)\n__GMP_DECLSPEC mp_limb_t mpn_rsh_divrem_hensel_qr_1(mp_ptr, mp_srcptr, mp_size_t,mp_limb_t,int,mp_limb_t);\n\n#define mpn_rsh_divrem_hensel_qr_1_1 __MPN(rsh_divrem_hensel_qr_1_1)\n__GMP_DECLSPEC mp_limb_t mpn_rsh_divrem_hensel_qr_1_1(mp_ptr, mp_srcptr, mp_size_t,mp_limb_t,int,mp_limb_t);\n\n#define mpn_rsh_divrem_hensel_qr_1_2 __MPN(rsh_divrem_hensel_qr_1_2)\n__GMP_DECLSPEC mp_limb_t mpn_rsh_divrem_hensel_qr_1_2(mp_ptr, mp_srcptr, mp_size_t,mp_limb_t,int,mp_limb_t);\n\n#define mpn_divrem_hensel_r_1 __MPN(divrem_hensel_r_1)\n__GMP_DECLSPEC mp_limb_t mpn_divrem_hensel_r_1(mp_srcptr, mp_size_t,mp_limb_t);\n\n#define mpir_random_fermat(nn, state, limbs) \\\n   do { mp_limb_t t; \\\n      mpn_rrandom(nn, state, limbs); \\\n      mpn_rrandom(&nn[limbs], state, 1); \\\n      nn[limbs] %= 1024; \\\n      mpn_rrandom(&t, state, 1); \\\n      if (t % 2) \\\n         nn[limbs] = -nn[limbs]; \\\n   } while (0)\n\n#define mpn_addmod_2expp1_1(r, limbs, c)                    \\\ndo {                                                        \\\n   mp_limb_t __sum = (r)[0] + (mp_limb_signed_t)(c);        \\\n   /* check if adding c causes carry propagation */         \\\n   if ((mp_limb_signed_t)(__sum ^ (r)[0]) >= 0)             \\\n      (r)[0] = __sum;                                       \\\n   else                                                     \\\n   {                                                        \\\n      if ((mp_limb_signed_t) (c) >= 0) mpn_add_1((r),       \\\n          (r), (limbs) + 1, (mp_limb_signed_t) (c));        \\\n      else mpn_sub_1((r), (r), (limbs) + 1,                 \\\n                         -(mp_limb_signed_t) (c));          \\\n   }                                                        \\\n} while (0)\n\n#define mpn_mul_2expmod_2expp1 __MPN(mul_2expmod_2expp1)\n__GMP_DECLSPEC void mpn_mul_2expmod_2expp1(mp_ptr t, mp_ptr i1, mp_size_t limbs, mp_bitcnt_t d);\n\n#define mpir_revbin __mpir_revbin\n__GMP_DECLSPEC mp_limb_t mpir_revbin(mp_limb_t in, mp_limb_t bits);\n\n#define  mpir_fft_adjust_limbs  __mpir_fft_adjust_limbs\n__GMP_DECLSPEC mpir_si mpir_fft_adjust_limbs(mp_size_t limbs);\n\n#define mpir_fft_combine_bits __mpir_fft_combine_bits\n__GMP_DECLSPEC void mpir_fft_combine_bits(mp_ptr res, const mp_ptr * poly,\n                long length, mp_bitcnt_t bits, mp_size_t output_limbs, mp_size_t total_limbs);\n\n#define mpir_fft_split_bits __mpir_fft_split_bits\n__GMP_DECLSPEC mp_size_t mpir_fft_split_bits(mp_ptr * poly, mp_srcptr limbs,\n                 mp_size_t total_limbs, mp_bitcnt_t bits, mp_size_t output_limbs);\n\n#define mpir_fft_adjust __mpir_fft_adjust\n__GMP_DECLSPEC void mpir_fft_adjust(mp_ptr r, mp_ptr i1,\n                                     mp_size_t i, mp_size_t limbs, mp_bitcnt_t w);\n\n#define mpir_fft_adjust_sqrt2 __mpir_fft_adjust_sqrt2\n__GMP_DECLSPEC void mpir_fft_adjust_sqrt2(mp_ptr r, mp_ptr i1,\n                   mp_size_t i, mp_size_t limbs, mp_bitcnt_t w, mp_ptr temp);\n\n#define mpir_butterfly_lshB __mpir_butterfly_lshB\n__GMP_DECLSPEC void mpir_butterfly_lshB(mp_ptr t, mp_ptr u, mp_ptr i1,\n                       mp_ptr i2, mp_size_t limbs, mp_size_t x, mp_size_t y);\n\n#define mpir_butterfly_rshB __mpir_butterfly_rshB\n__GMP_DECLSPEC void mpir_butterfly_rshB(mp_ptr t, mp_ptr u, mp_ptr i1,\n                       mp_ptr i2, mp_size_t limbs, mp_size_t x, mp_size_t y);\n\n#define mpir_fermat_to_mpz __fermat_to_mpz\n__GMP_DECLSPEC void mpir_fermat_to_mpz(mpz_t m, mp_ptr i, mp_size_t limbs);\n\n#define mpir_fft_butterfly_twiddle __mpir_fft_butterfly_twiddle\n__GMP_DECLSPEC void mpir_fft_butterfly_twiddle(mp_ptr u, mp_ptr v,\n   mp_ptr s, mp_ptr t, mp_size_t limbs, mp_bitcnt_t b1, mp_bitcnt_t b2);\n\n#define mpir_ifft_butterfly_twiddle __mpir_ifft_butterfly_twiddle\n__GMP_DECLSPEC void mpir_ifft_butterfly_twiddle(mp_ptr u, mp_ptr v,\n   mp_ptr s, mp_ptr t, mp_size_t limbs, mp_bitcnt_t b1, mp_bitcnt_t b2);\n\n#define mpir_fft_butterfly_sqrt2 __mpir_fft_butterfly_sqrt2\n__GMP_DECLSPEC void mpir_fft_butterfly_sqrt2(mp_ptr s, mp_ptr t,\n                         mp_ptr i1, mp_ptr i2, mp_size_t i,\n                                mp_size_t limbs, mp_bitcnt_t w, mp_ptr temp);\n\n#define mpir_ifft_butterfly_sqrt2 __mpir_ifft_butterfly_sqrt2\n__GMP_DECLSPEC void mpir_ifft_butterfly_sqrt2(mp_ptr s, mp_ptr t, mp_ptr i1,\n   mp_ptr i2, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w, mp_ptr temp);\n\n#define mpir_fft_butterfly __mpir_fft_butterfly\n__GMP_DECLSPEC void mpir_fft_butterfly(mp_ptr s, mp_ptr t, mp_ptr i1,\n                     mp_ptr i2, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w);\n\n#define mpir_ifft_butterfly __mpir_ifft_butterfly\n__GMP_DECLSPEC void mpir_ifft_butterfly(mp_ptr s, mp_ptr t, mp_ptr i1,\n                     mp_ptr i2, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w);\n\n#define mpir_fft_combine_limbs __combine_limbs\n__GMP_DECLSPEC void mpir_fft_combine_limbs(mp_ptr res, const mp_ptr * poly, long length,\n            mp_size_t coeff_limbs, mp_size_t output_limbs, mp_size_t total_limbs);\n\n#define mpir_fft_split_limbs __mpir_fft_split_limbs\n__GMP_DECLSPEC mp_size_t mpir_fft_split_limbs(mp_ptr * poly, mp_srcptr limbs,\n            mp_size_t total_limbs, mp_size_t coeff_limbs, mp_size_t output_limbs);\n\n#define mpir_fft_radix2 __mpir_fft_radix2\n__GMP_DECLSPEC void mpir_fft_radix2(mp_ptr * ii,\n                    mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2);\n\n#define mpir_ifft_radix2 __mpir_ifft_radix2\n__GMP_DECLSPEC void mpir_ifft_radix2(mp_ptr * ii, mp_size_t n,\n                                 mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2);\n\n#define mpir_fft_trunc __mpir_fft_trunc\n__GMP_DECLSPEC void mpir_fft_trunc(mp_ptr * ii,  mp_size_t n, mp_bitcnt_t w,\n                               mp_ptr * t1, mp_ptr * t2, mp_size_t trunc);\n\n#define mpir_ifft_trunc __mpir_ifft_trunc\n__GMP_DECLSPEC void mpir_ifft_trunc(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w,\n                               mp_ptr * t1, mp_ptr * t2, mp_size_t trunc);\n\n#define mpir_fft_trunc_sqrt2 __mpir_fft_trunc_sqrt2\n__GMP_DECLSPEC void mpir_fft_trunc_sqrt2(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w,\n            mp_ptr * t1, mp_ptr * t2, mp_ptr * temp, mp_size_t trunc);\n\n#define mpir_ifft_trunc_sqrt2 __mpir_ifft_trunc_sqrt2\n__GMP_DECLSPEC void mpir_ifft_trunc_sqrt2(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w,\n            mp_ptr * t1, mp_ptr * t2, mp_ptr * temp, mp_size_t trunc);\n\n#define mpir_fft_mfa_trunc_sqrt2 __mpir_fft_mfa_trunc_sqrt2\n__GMP_DECLSPEC void mpir_fft_mfa_trunc_sqrt2(mp_ptr * ii, mp_size_t n,\n                       mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,\n                                mp_ptr * temp, mp_size_t n1, mp_size_t trunc);\n\n#define mpir_ifft_mfa_trunc_sqrt2 __mpir_ifft_mfa_trunc_sqrt2\n__GMP_DECLSPEC void mpir_ifft_mfa_trunc_sqrt2(mp_ptr * ii, mp_size_t n,\n                      mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,\n                                mp_ptr * temp, mp_size_t n1, mp_size_t trunc);\n\n#define mpir_fft_negacyclic __mpir_fft_negacyclic\n__GMP_DECLSPEC void mpir_fft_negacyclic(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w,\n                             mp_ptr * t1, mp_ptr * t2, mp_ptr * temp);\n\n#define mpir_ifft_negacyclic __mpir_ifft_negacyclic\n__GMP_DECLSPEC void mpir_ifft_negacyclic(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w,\n                             mp_ptr * t1, mp_ptr * t2, mp_ptr * temp);\n\n#define mpir_fft_mfa_trunc_sqrt2_outer __mpir_fft_mfa_trunc_sqrt2_outer\n__GMP_DECLSPEC void mpir_fft_mfa_trunc_sqrt2_outer(mp_ptr * ii, mp_size_t n,\n                      mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,\n                                mp_ptr * temp, mp_size_t n1, mp_size_t trunc);\n\n#define mpir_ifft_mfa_trunc_sqrt2_outer __mpir_ifft_mfa_trunc_sqrt2_outer\n__GMP_DECLSPEC void mpir_ifft_mfa_trunc_sqrt2_outer(mp_ptr * ii, mp_size_t n,\n                        mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,\n                                mp_ptr * temp, mp_size_t n1, mp_size_t trunc);\n\n#define mpir_fft_mfa_trunc_sqrt2_inner __mpir_fft_mfa_trunc_sqrt2_inner\n__GMP_DECLSPEC void mpir_fft_mfa_trunc_sqrt2_inner(mp_ptr * ii, mp_ptr * jj,\n            mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,\n                mp_ptr * temp, mp_size_t n1, mp_size_t trunc, mp_ptr tt);\n\n#define mpir_fft_mulmod_2expp1 __mpir_fft_mulmod_2expp1\n__GMP_DECLSPEC void mpir_fft_mulmod_2expp1(mp_ptr r1, mp_srcptr i1, mp_srcptr i2,\n                 mp_size_t r_limbs, mp_bitcnt_t depth, mp_bitcnt_t w);\n\n#define mpir_fft_trunc1 __mpir_fft_trunc1\n__GMP_DECLSPEC void mpir_fft_trunc1(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w,\n                               mp_ptr * t1, mp_ptr * t2, mp_size_t trunc);\n\n#define mpir_ifft_trunc1 __mpir_ifft_trunc1\n__GMP_DECLSPEC void mpir_ifft_trunc1(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w,\n                               mp_ptr * t1, mp_ptr * t2, mp_size_t trunc);\n\n#define mpir_fft_radix2_twiddle __mpir_fft_radix2_twiddle\n__GMP_DECLSPEC void mpir_fft_radix2_twiddle(mp_ptr * ii, mp_size_t is,\n      mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,\n                            mp_size_t ws, mp_size_t r, mp_size_t c, mp_size_t rs);\n\n#define mpir_ifft_radix2_twiddle __mpir_ifft_radix2_twiddle\n__GMP_DECLSPEC void mpir_ifft_radix2_twiddle(mp_ptr * ii, mp_size_t is,\n        mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,\n                            mp_size_t ws, mp_size_t r, mp_size_t c, mp_size_t rs);\n\n#define mpir_fft_trunc1_twiddle __mpir_fft_trunc1_twiddle\n__GMP_DECLSPEC void mpir_fft_trunc1_twiddle(mp_ptr * ii, mp_size_t is,\n        mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,\n           mp_size_t ws, mp_size_t r, mp_size_t c, mp_size_t rs, mp_size_t trunc);\n\n#define mpir_ifft_trunc1_twiddle __mpir_ifft_trunc1_twiddle\n__GMP_DECLSPEC void mpir_ifft_trunc1_twiddle(mp_ptr * ii, mp_size_t is,\n        mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,\n           mp_size_t ws, mp_size_t r, mp_size_t c, mp_size_t rs, mp_size_t trunc);\n\n#define mpir_fft_naive_convolution_1 __mpir_fft_naive_convolution_1\n__GMP_DECLSPEC void mpir_fft_naive_convolution_1(mp_ptr r, mp_srcptr ii,\n                                                     mp_srcptr jj, mp_size_t m);\n\n#define mpn_mulmod_2expp1_basecase __MPN(mulmod_2expp1_basecase)\n__GMP_DECLSPEC int mpn_mulmod_2expp1_basecase(mp_ptr, mp_srcptr, mp_srcptr, int, mpir_ui, mp_ptr);\n\ntypedef __gmp_randstate_struct *gmp_randstate_ptr;\ntypedef const __gmp_randstate_struct *gmp_randstate_srcptr;\n\n#define mpn_normmod_2expp1 __MPN(normmod_2expp1)\n__GMP_DECLSPEC void mpn_normmod_2expp1(mp_ptr t, mp_size_t limbs);\n\n#define mpn_div_2expmod_2expp1 __MPN(div_2expmod_2expp1)\n__GMP_DECLSPEC void mpn_div_2expmod_2expp1(mp_ptr t, mp_srcptr i1, mp_size_t limbs, mp_bitcnt_t d);\n\n#define mpn_mul_trunc_sqrt2 __MPN(mul_trunc_sqrt2)\n__GMP_DECLSPEC void mpn_mul_trunc_sqrt2(mp_ptr r1, mp_srcptr i1, mp_size_t n1,\n                  mp_srcptr i2, mp_size_t n2, mp_bitcnt_t depth, mp_bitcnt_t w);\n\n#define mpn_mul_mfa_trunc_sqrt2 __MPN(mul_mfa_trunc_sqrt2)\n__GMP_DECLSPEC void mpn_mul_mfa_trunc_sqrt2(mp_ptr r1, mp_srcptr i1, mp_size_t n1,\n                  mp_srcptr i2, mp_size_t n2, mp_bitcnt_t depth, mp_bitcnt_t w);\n\n/* Pseudo-random number generator function pointers structure.  */\ntypedef struct {\n  void (*randseed_fn)(gmp_randstate_t rstate, mpz_srcptr seed);\n  void (*randget_fn)(gmp_randstate_t rstate, mp_ptr dest, mpir_ui nbits);\n  void (*randclear_fn)(gmp_randstate_t rstate);\n  void (*randiset_fn)(gmp_randstate_ptr, gmp_randstate_srcptr);\n} gmp_randfnptr_t;\n\n/* Macro to obtain a void pointer to the function pointers structure.  */\n#define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)\n\n/* Macro to obtain a pointer to the generator's state.\n   When used as a lvalue the rvalue needs to be cast to mp_ptr.  */\n#define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)\n\n/* Write a given number of random bits to rp.  */\n#define _gmp_rand(rp, state, bits)                              \\\n  do {                                                          \\\n    gmp_randstate_ptr  __rstate = (state);                      \\\n    (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn)   \\\n       (__rstate, rp, bits);                                    \\\n  } while (0)\n\n__GMP_DECLSPEC void __gmp_randinit_mt_noseed(gmp_randstate_t);\n\n\n/* __gmp_rands is the global state for the old-style random functions, and\n   is also used in the test programs (hence the __GMP_DECLSPEC).\n\n   There's no seeding here, so mpz_random etc will generate the same\n   sequence every time.  This is not unlike the C library random functions\n   if you don't seed them, so perhaps it's acceptable.  Digging up a seed\n   from /dev/random or the like would work on many systems, but might\n   encourage a false confidence, since it'd be pretty much impossible to do\n   something that would work reliably everywhere.  In any case the new style\n   functions are recommended to applications which care about randomness, so\n   the old functions aren't too important.  */\n\n__GMP_DECLSPEC extern char             __gmp_rands_initialized;\n__GMP_DECLSPEC extern gmp_randstate_t  __gmp_rands;\n\n#define RANDS                                       \\\n  ((__gmp_rands_initialized ? 0                     \\\n    : (__gmp_rands_initialized = 1,                 \\\n       __gmp_randinit_mt_noseed (__gmp_rands), 0)), \\\n   __gmp_rands)\n\n/* this is used by the test programs, to free memory */\n#define RANDS_CLEAR()                   \\\n  do {                                  \\\n    if (__gmp_rands_initialized)        \\\n      {                                 \\\n        __gmp_rands_initialized = 0;    \\\n        gmp_randclear (__gmp_rands);    \\\n      }                                 \\\n  } while (0)\n\n#define mpn_toom42_mulmid_itch(n) (3*(n) + 64)\n\n/* kara uses n+1 limbs of temporary space and then recurses with the balance,\n   so need (n+1) + (ceil(n/2)+1) + (ceil(n/4)+1) + ...  This can be solved to\n   2n + o(n).  Since n is very limited, o(n) in practice could be around 15.\n   For now, assume n is arbitrarily large.  */\n#define MPN_KARA_MUL_N_TSIZE(n)   (2*(n) + 2*GMP_LIMB_BITS)\n#define MPN_KARA_SQR_N_TSIZE(n)   (2*(n) + 2*GMP_LIMB_BITS)\n\n/* toom3 uses 2n + 2n/3 + o(n) limbs of temporary space if mpn_sublsh1_n is\n   unavailable, but just 2n + o(n) if mpn_sublsh1_n is available.  It is hard\n   to pin down the value of o(n), since it is a complex function of\n   MUL_TOOM3_THRESHOLD and n.  Normally toom3 is used between kara and fft; in\n   that case o(n) will be really limited.  If toom3 is used for arbitrarily\n   large operands, o(n) will be larger.  These definitions handle operands of\n   up to 8956264246117233 limbs.  A single multiplication using toom3 on the\n   fastest hardware currently (2003) would need 100 million years, which\n   suggests that these limits are acceptable.  */\n#if WANT_FFT\n#if HAVE_NATIVE_mpn_sublsh1_n\n#define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 63)\n#define MPN_TOOM3_MUL_TSIZE(n)    (3*(n) + 63)\n#define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 63)\n#else\n#define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 2*(n/3) + 63)\n#define MPN_TOOM3_MUL_TSIZE(n)    (3*(n) + 3*(n/3) + 63)\n#define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 2*(n/3) + 63)\n#endif\n#else /* WANT_FFT */\n#if HAVE_NATIVE_mpn_sublsh1_n\n#define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 255)\n#define MPN_TOOM3_MUL_TSIZE(n)    (3*(n) + 255)\n#define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 255)\n#else\n#define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 2*(n/3) + 255)\n#define MPN_TOOM3_MUL_TSIZE(n)    (3*(n) + 3*(n/3) + 255)\n#define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 2*(n/3) + 255)\n#endif\n#define MPN_TOOM3_MAX_N 285405\n#endif /* WANT_FFT */\n\n/* need 2 so that n2>=1 */\n#if defined(HAVE_NATIVE_mpn_karaadd) || defined(HAVE_NATIVE_mpn_karasub)\n#define MPN_KARA_MUL_N_MINSIZE    8\n#define MPN_KARA_SQR_N_MINSIZE    8\n#else\n#define MPN_KARA_MUL_N_MINSIZE    2\n#define MPN_KARA_SQR_N_MINSIZE    2\n#endif\n\n/* Need l>=1, ls>=1, and 2*ls > l (the latter for the tD MPN_INCR_U) */\n#define MPN_TOOM3_MUL_N_MINSIZE   17\n#define MPN_TOOM4_MUL_N_MINSIZE   32\n#define MPN_TOOM8H_MUL_MINSIZE    86\n#define MPN_TOOM3_SQR_N_MINSIZE   17\n#define MPN_TOOM4_SQR_N_MINSIZE   32\n#define MPN_TOOM8_SQR_N_MINSIZE   58\n#define MPN_FFT_MUL_N_MINSIZE     64\n\n#define mpn_sqr_diagonal __MPN(sqr_diagonal)\n__GMP_DECLSPEC void mpn_sqr_diagonal(mp_ptr, mp_srcptr, mp_size_t);\n\n#define mpn_kara_mul_n\t__MPN(kara_mul_n)\n__GMP_DECLSPEC void mpn_kara_mul_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);\n\n#define mpn_kara_sqr_n  __MPN(kara_sqr_n)\n__GMP_DECLSPEC void mpn_kara_sqr_n(mp_ptr, mp_srcptr, mp_size_t, mp_ptr);\n\n#define mpn_toom3_mul_n  __MPN(toom3_mul_n)\n__GMP_DECLSPEC void mpn_toom3_mul_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr);\n\n#define mpn_toom3_mul  __MPN(toom3_mul)\n__GMP_DECLSPEC void mpn_toom3_mul(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,\n                                                           mp_size_t,mp_ptr);\n\n#define mpn_toom3_interpolate __MPN(toom3_interpolate)\n__GMP_DECLSPEC void mpn_toom3_interpolate(mp_ptr c, mp_ptr v1, mp_ptr v2, mp_ptr vm1,\n                     mp_ptr vinf, mp_size_t k, mp_size_t rr2, int sa,\n                                          mp_limb_t vinf0, mp_ptr ws);\n\n#define mpn_toom32_mul __MPN(toom32_mul)\n__GMP_DECLSPEC void mpn_toom32_mul(mp_ptr c, mp_srcptr a, mp_size_t an, mp_srcptr b,\n                                                      mp_size_t bn, mp_ptr t);\n\n#define mpn_toom42_mul  __MPN(toom42_mul)\n__GMP_DECLSPEC void mpn_toom42_mul(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t,mp_ptr);\n\n\n#define mpn_toom4_mul_n  __MPN(toom4_mul_n)\n__GMP_DECLSPEC void mpn_toom4_mul_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);\n\n#define mpn_toom4_mul  __MPN(toom4_mul)\n__GMP_DECLSPEC void mpn_toom4_mul(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,\n                                                           mp_size_t);\n\n#define mpn_toom53_mul __MPN(toom53_mul)\n__GMP_DECLSPEC void mpn_toom53_mul(mp_ptr rp, mp_srcptr up, mp_size_t un,\n                                           mp_srcptr vp, mp_size_t vn);\n\n#define mpn_toom4_interpolate __MPN(toom4_interpolate)\n__GMP_DECLSPEC void mpn_toom4_interpolate(mp_ptr rp, mp_size_t * rpn, mp_size_t sn,\n               mp_ptr tp, mp_size_t s4, mp_size_t n4, mp_size_t n6,\n               mp_limb_t r30);\n\n#define mpn_toom_eval_dgr3_pm1  __MPN(toom_eval_dgr3_pm1)\n__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1(mp_ptr xp1, mp_ptr xm1,\n            mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp);\n\n#define mpn_toom_eval_dgr3_pm2  __MPN(toom_eval_dgr3_pm2)\n__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2(mp_ptr xp2, mp_ptr xm2,\n            mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp);\n\n#define mpn_toom_eval_pm1  __MPN(toom_eval_pm1)\n__GMP_DECLSPEC int mpn_toom_eval_pm1(mp_ptr xp1, mp_ptr xm1, unsigned k,\n           mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp);\n\n#define mpn_toom_eval_pm2  __MPN(toom_eval_pm2)\n__GMP_DECLSPEC int mpn_toom_eval_pm2(mp_ptr xp2, mp_ptr xm2, unsigned k,\n           mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp);\n\n#define mpn_toom_eval_pm2exp  __MPN(toom_eval_pm2exp)\n__GMP_DECLSPEC int mpn_toom_eval_pm2exp(mp_ptr xp2, mp_ptr xm2, unsigned k,\n              mp_srcptr xp, mp_size_t n, mp_size_t hn, unsigned shift,\n              mp_ptr tp);\n\n#define mpn_toom_eval_pm2rexp  __MPN(toom_eval_pm2rexp)\n__GMP_DECLSPEC int mpn_toom_eval_pm2rexp(mp_ptr rp, mp_ptr rm,\n              unsigned int q, mp_srcptr ap, mp_size_t n, mp_size_t t,\n              unsigned int s, mp_ptr ws);\n\n#define mpn_toom_interpolate_16pts  __MPN(toom_interpolate_16pts)\n__GMP_DECLSPEC void mpn_toom_interpolate_16pts(mp_ptr pp, mp_ptr r1, mp_ptr r3,\n                   mp_ptr r5, mp_ptr r7, mp_size_t n, mp_size_t spt,\n                         int half, mp_ptr wsi);\n\n#define mpn_toom_couple_handling  __MPN(toom_couple_handling)\n__GMP_DECLSPEC void mpn_toom_couple_handling(mp_ptr pp, mp_size_t n, mp_ptr np,\n              int nsign, mp_size_t off, int ps, int ns);\n\n#define mpn_toom8h_mul  __MPN(toom8h_mul)\n__GMP_DECLSPEC void mpn_toom8h_mul(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,\n                                                             mp_size_t);\n\n#define mpn_toom3_sqr_n  __MPN(toom3_sqr_n)\n__GMP_DECLSPEC void mpn_toom3_sqr_n(mp_ptr, mp_srcptr, mp_size_t, mp_ptr);\n\n#define mpn_toom4_sqr_n  __MPN(toom4_sqr_n)\n__GMP_DECLSPEC void mpn_toom4_sqr_n(mp_ptr, mp_srcptr, mp_size_t);\n\n#define mpn_toom8_sqr_n  __MPN(toom8_sqr_n)\n__GMP_DECLSPEC void mpn_toom8_sqr_n(mp_ptr, mp_srcptr, mp_size_t);\n\n#define   mpn_toom42_mulmid __MPN(toom42_mulmid)\n__GMP_DECLSPEC void      mpn_toom42_mulmid(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);\n\n#define mpn_mulmod_Bexpp1_fft __MPN(mulmod_Bexpp1_fft)\n__GMP_DECLSPEC int mpn_mulmod_Bexpp1_fft(mp_ptr op, mp_size_t pl,\n\t     mp_srcptr n, mp_size_t nl,\n\t     mp_srcptr m, mp_size_t ml);\n\n#define DC_DIVAPPR_Q_N_ITCH(n) ((n)*4 + 64)\n#define DC_BDIV_Q_N_ITCH(n) ((n)/2 + 2)\n#define DC_BDIV_QR_N_ITCH(n) (n)\n\n/* #define mpn_tdiv_q  __MPN(tdiv_q) */\n/* void mpn_tdiv_q(mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); */\n\n#define mpz_divexact_gcd  __gmpz_divexact_gcd\n__GMP_DECLSPEC void mpz_divexact_gcd(mpz_ptr q, mpz_srcptr a, mpz_srcptr d);\n\n#define mpz_prodlimbs  __gmpz_prodlimbs\n__GMP_DECLSPEC mp_size_t mpz_prodlimbs (mpz_ptr, mp_ptr, mp_size_t);\n\n#define mpz_oddfac_1  __gmpz_oddfac_1\n__GMP_DECLSPEC void mpz_oddfac_1 (mpz_ptr, mp_limb_t, unsigned);\n\n#define mpz_inp_str_nowhite __gmpz_inp_str_nowhite\n#ifdef _GMP_H_HAVE_FILE\n__GMP_DECLSPEC size_t mpz_inp_str_nowhite(mpz_ptr x, FILE *stream, int base, int c, size_t nread);\n#endif\n\n#define mpn_divisible_p __MPN(divisible_p)\n__GMP_DECLSPEC int mpn_divisible_p(mp_srcptr ap, mp_size_t asize,\n                                 mp_srcptr dp, mp_size_t dsize) __GMP_ATTRIBUTE_PURE;\n\n#define mpn_rootrem __MPN(rootrem)\n__GMP_DECLSPEC mp_size_t mpn_rootrem(mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);\n\n#define mpn_rootrem_basecase __MPN(rootrem_basecase)\n__GMP_DECLSPEC mp_size_t mpn_rootrem_basecase(mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);\n\n#if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi\n#define MPN_COPY_INCR(dst, src, size)                   \\\n  do {                                                  \\\n    ASSERT ((size) >= 0);                               \\\n    ASSERT (MPN_SAME_OR_INCR_P (dst, src, size));       \\\n    mpn_copyi (dst, src, size);                         \\\n  } while (0)\n#endif\n\n/* Copy N limbs from SRC to DST incrementing, N==0 allowed.  */\n#if ! defined (MPN_COPY_INCR)\n#define MPN_COPY_INCR(dst, src, n)                      \\\n  do {                                                  \\\n    ASSERT ((n) >= 0);                                  \\\n    ASSERT (MPN_SAME_OR_INCR_P (dst, src, n));          \\\n    if ((n) != 0)                                       \\\n      {                                                 \\\n    mp_size_t __n = (n) - 1;                        \\\n    mp_ptr __dst = (dst);                           \\\n    mp_srcptr __src = (src);                        \\\n    mp_limb_t __x;                                  \\\n    __x = *__src++;                                 \\\n    if (__n != 0)                                   \\\n      {                                             \\\n        do                                          \\\n          {                                         \\\n        *__dst++ = __x;                         \\\n        __x = *__src++;                         \\\n          }                                         \\\n        while (--__n);                              \\\n      }                                             \\\n    *__dst++ = __x;                                 \\\n      }                                                 \\\n  } while (0)\n#endif\n\n#if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd\n#define MPN_COPY_DECR(dst, src, size)                   \\\n  do {                                                  \\\n    ASSERT ((size) >= 0);                               \\\n    ASSERT (MPN_SAME_OR_DECR_P (dst, src, size));       \\\n    mpn_copyd (dst, src, size);                         \\\n  } while (0)\n#endif\n\n/* Copy N limbs from SRC to DST decrementing, N==0 allowed.  */\n#if ! defined (MPN_COPY_DECR)\n#define MPN_COPY_DECR(dst, src, n)                      \\\n  do {                                                  \\\n    ASSERT ((n) >= 0);                                  \\\n    ASSERT (MPN_SAME_OR_DECR_P (dst, src, n));          \\\n    if ((n) != 0)                                       \\\n      {                                                 \\\n    mp_size_t __n = (n) - 1;                        \\\n    mp_ptr __dst = (dst) + __n;                     \\\n    mp_srcptr __src = (src) + __n;                  \\\n    mp_limb_t __x;                                  \\\n    __x = *__src--;                                 \\\n    if (__n != 0)                                   \\\n      {                                             \\\n        do                                          \\\n          {                                         \\\n        *__dst-- = __x;                         \\\n        __x = *__src--;                         \\\n          }                                         \\\n        while (--__n);                              \\\n      }                                             \\\n    *__dst-- = __x;                                 \\\n      }                                                 \\\n  } while (0)\n#endif\n\n\n#ifndef MPN_COPY\n#define MPN_COPY(d,s,n)                         \\\n  do {                                          \\\n    ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n));  \\\n    MPN_COPY_INCR (d, s, n);                    \\\n  } while (0)\n#endif\n\n\n/* Set {dst,size} to the limbs of {src,size} in reverse order. */\n#define MPN_REVERSE(dst, src, size)                     \\\n  do {                                                  \\\n    mp_ptr     __dst = (dst);                           \\\n    mp_size_t  __size = (size);                         \\\n    mp_srcptr  __src = (src) + __size - 1;              \\\n    mp_size_t  __i;                                     \\\n    ASSERT ((size) >= 0);                               \\\n    ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \\\n    for (__i = 0; __i < __size; __i++)                  \\\n      {                                                 \\\n        *__dst = *__src;                                \\\n        __dst++;                                        \\\n        __src--;                                        \\\n      }                                                 \\\n  } while (0)\n\n\n\n/* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to\n   start up and would need to strip a lot of zeros before it'd be faster\n   than a simple cmpl loop.  Here are some times in cycles for\n   std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping\n   low zeros).\n\n                std   cld\n           P5    18    16\n           P6    46    38\n           K6    36    13\n           K7    21    20\n*/\n#ifndef MPN_NORMALIZE\n#define MPN_NORMALIZE(DST, NLIMBS) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    while ((NLIMBS) > 0)                                                \\\n      {\t\t\t\t\t\t\t\t\t\\\n    if ((DST)[(NLIMBS) - 1] != 0)\t\t\t\t\t\\\n      break;\t\t\t\t\t\t\t\\\n    (NLIMBS)--;\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n#ifndef MPN_NORMALIZE_NOT_ZERO\n#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS)     \\\n  do {                                          \\\n    ASSERT ((NLIMBS) >= 1);                     \\\n    while (1)                                   \\\n      {                                         \\\n    if ((DST)[(NLIMBS) - 1] != 0)           \\\n      break;                                \\\n    (NLIMBS)--;                             \\\n      }                                         \\\n  } while (0)\n#endif\n\n/* Strip least significant zero limbs from {ptr,size} by incrementing ptr\n   and decrementing size.  low should be ptr[0], and will be the new ptr[0]\n   on returning.  The number in {ptr,size} must be non-zero, ie. size!=0 and\n   somewhere a non-zero limb.  */\n#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low)    \\\n  do {                                                  \\\n    ASSERT ((size) >= 1);                               \\\n    ASSERT ((low) == (ptr)[0]);                         \\\n                                                        \\\n    while ((low) == 0)                                  \\\n      {                                                 \\\n        (size)--;                                       \\\n        ASSERT ((size) >= 1);                           \\\n        (ptr)++;                                        \\\n        (low) = *(ptr);                                 \\\n      }                                                 \\\n  } while (0)\n\n/* Initialize X of type mpz_t with space for NLIMBS limbs.  X should be a\n   temporary variable; it will be automatically cleared out at function\n   return.  We use __x here to make it possible to accept both mpz_ptr and\n   mpz_t arguments.  */\n#define MPZ_TMP_INIT(X, NLIMBS)                                         \\\n  do {                                                                  \\\n    mpz_ptr __x = (X);                                                  \\\n    ASSERT ((NLIMBS) >= 1);                                             \\\n    __x->_mp_alloc = (NLIMBS);                                          \\\n    __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB);     \\\n  } while (0)\n\n\n/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */\n#define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))     \\\n                          ? (mp_ptr) _mpz_realloc(z,n)  \\\n                          : PTR(z))\n\n#define MPZ_NEWALLOC MPZ_REALLOC\n\n#define MPZ_EQUAL_1_P(z)  (SIZ(z)==1 && PTR(z)[0] == 1)\n\n\n/* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and\n   f1p.\n\n   From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the\n   nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio.  So the\n   number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.\n\n   The multiplier used is 23/32=0.71875 for efficient calculation on CPUs\n   without good floating point.  There's +2 for rounding up, and a further\n   +2 since at the last step x limbs are doubled into a 2x+1 limb region\n   whereas the actual F[2k] value might be only 2x-1 limbs.\n\n   Note that a division is done first, since on a 32-bit system it's at\n   least conceivable to go right up to n==ULONG_MAX.  (F[2^32-1] would be\n   about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and\n   whatever a multiply of two 190Mbyte numbers takes.)\n\n   Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be\n   worked into the multiplier.  */\n\n#define MPN_FIB2_SIZE(n) \\\n  ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)\n\n\n/* FIB_TABLE(n) returns the Fibonacci number F[n].  Must have n in the range\n   -1 <= n <= FIB_TABLE_LIMIT\n\n   FIB_TABLE_LUCNUM_LIMIT is the largest n for which L[n] =\n   F[n] + 2*F[n-1] fits in a limb.  */\n\n__GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];\n#define FIB_TABLE(n)  (__gmp_fib_table[(n)+1])\n\nextern const mp_limb_t __gmp_oddfac_table[];\nextern const mp_limb_t __gmp_odd2fac_table[];\nextern const unsigned char __gmp_fac2cnt_table[];\nextern const mp_limb_t __gmp_limbroots_table[];\n\n/* n^log <= GMP_NUMB_MAX, a limb can store log factors less than n */\nstatic inline unsigned\nlog_n_max (mp_limb_t n)\n{\n  unsigned log;\n  for (log = 8; n > __gmp_limbroots_table[log - 1]; log--);\n  return log;\n}\n\n#define SIEVESIZE 512\t\t/* FIXME: Allow gmp_init_primesieve to choose */\ntypedef struct\n{\n  mpir_ui d;\t\t   /* current index in s[] */\n  mpir_ui s0;\t\t   /* number corresponding to s[0] */\n  mpir_ui sqrt_s0;\t   /* misnomer for sqrt(s[SIEVESIZE-1]) */\n  unsigned char s[SIEVESIZE + 1];  /* sieve table */\n} gmp_primesieve_t;\n\n#define gmp_init_primesieve __gmp_init_primesieve\n__GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *);\n\n#define gmp_nextprime __gmp_nextprime\n__GMP_DECLSPEC mpir_ui gmp_nextprime (gmp_primesieve_t *);\n\n#define gmp_primesieve __gmp_primesieve\n__GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);\n\n/* For a threshold between algorithms A and B, size>=thresh is where B\n   should be used.  Special value MP_SIZE_T_MAX means only ever use A, or\n   value 0 means only ever use B.  The tests for these special values will\n   be compile-time constants, so the compiler should be able to eliminate\n   the code for the unwanted algorithm.  */\n\n#define ABOVE_THRESHOLD(size,thresh)    \\\n  ((thresh) == 0                        \\\n   || ((thresh) != MP_SIZE_T_MAX        \\\n       && (size) >= (thresh)))\n#define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))\n\n/* If MUL_KARATSUBA_THRESHOLD is not already defined, define it to a\n   value which is good on most machines.  */\n#ifndef MUL_KARATSUBA_THRESHOLD\n#define MUL_KARATSUBA_THRESHOLD 32\n#endif\n\n#ifndef SQR_KARATSUBA_THRESHOLD\n#define SQR_KARATSUBA_THRESHOLD 32\n#endif\n\n/* If MUL_TOOM3_THRESHOLD is not already defined, define it to a\n   value which is good on most machines.  */\n#ifndef MUL_TOOM3_THRESHOLD\n#define MUL_TOOM3_THRESHOLD 128\n#endif\n\n#ifndef MUL_TOOM4_THRESHOLD\n#define MUL_TOOM4_THRESHOLD 300\n#endif\n\n#ifndef MULMID_TOOM42_THRESHOLD\n#define MULMID_TOOM42_THRESHOLD 36\n#endif\n\n#ifndef MUL_TOOM8H_THRESHOLD\n#define MUL_TOOM8H_THRESHOLD 401\n#endif\n\n#ifndef SQR_TOOM3_THRESHOLD\n#define SQR_TOOM3_THRESHOLD 128\n#endif\n\n#ifndef SQR_TOOM4_THRESHOLD\n#define SQR_TOOM4_THRESHOLD 300\n#endif\n\n#ifndef SQR_TOOM8_THRESHOLD\n#define SQR_TOOM8_THRESHOLD 401\n#endif\n\n#ifndef MULLOW_BASECASE_THRESHOLD\n#define MULLOW_BASECASE_THRESHOLD\t8\n#endif\n\n#ifndef MULLOW_DC_THRESHOLD\n#define MULLOW_DC_THRESHOLD\t32\n#endif\n\n#ifndef MULLOW_MUL_THRESHOLD\n#define MULLOW_MUL_THRESHOLD\t8192\n#endif\n\n#ifndef MULHIGH_BASECASE_THRESHOLD\n#define MULHIGH_BASECASE_THRESHOLD\t16\n#endif\n\n#ifndef MULHIGH_DC_THRESHOLD\n#define MULHIGH_DC_THRESHOLD\t32\n#endif\n\n#ifndef MULHIGH_MUL_THRESHOLD\n#define MULHIGH_MUL_THRESHOLD\t8192\n#endif\n\n#ifndef MULMOD_2EXPM1_THRESHOLD\n#define MULMOD_2EXPM1_THRESHOLD\t16\n#endif\n\n#ifndef FAC_UI_THRESHOLD\n#define FAC_UI_THRESHOLD\t8192\n#endif\n\n#ifndef ROOTREM_THRESHOLD\n#define ROOTREM_THRESHOLD\t8\n#endif\n\n#ifndef DIVREM_HENSEL_QR_1_THRESHOLD\n#define DIVREM_HENSEL_QR_1_THRESHOLD 8\n#endif\n\n#ifndef RSH_DIVREM_HENSEL_QR_1_THRESHOLD\n#define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 8\n#endif\n\n#ifndef DIVREM_EUCLID_HENSEL_THRESHOLD\n#define DIVREM_EUCLID_HENSEL_THRESHOLD 32\n#endif\n\n#ifndef MOD_1_1_THRESHOLD\n#define MOD_1_1_THRESHOLD\t16\n#endif\n\n#ifndef MOD_1_2_THRESHOLD\n#define MOD_1_2_THRESHOLD\t32\n#endif\n\n#ifndef MOD_1_3_THRESHOLD\n#define MOD_1_3_THRESHOLD\t64\n#endif\n\n/* MUL_KARATSUBA_THRESHOLD_LIMIT is the maximum for MUL_KARATSUBA_THRESHOLD.\n   In a normal build MUL_KARATSUBA_THRESHOLD is a constant and we use that.\n   In a fat binary or tune program build MUL_KARATSUBA_THRESHOLD is a\n   variable and a separate hard limit will have been defined.  Similarly for\n   TOOM3.  */\n#ifndef MUL_KARATSUBA_THRESHOLD_LIMIT\n#define MUL_KARATSUBA_THRESHOLD_LIMIT  MUL_KARATSUBA_THRESHOLD\n#endif\n#ifndef MUL_TOOM3_THRESHOLD_LIMIT\n#define MUL_TOOM3_THRESHOLD_LIMIT  MUL_TOOM3_THRESHOLD\n#endif\n#ifndef MUL_TOOM4_THRESHOLD_LIMIT\n#define MUL_TOOM4_THRESHOLD_LIMIT  MUL_TOOM4_THRESHOLD\n#endif\n#ifndef MUL_TOOM8H_THRESHOLD_LIMIT\n#define MUL_TOOM8H_THRESHOLD_LIMIT  MUL_TOOM8H_THRESHOLD\n#endif\n#ifndef MULLOW_BASECASE_THRESHOLD_LIMIT\n#define MULLOW_BASECASE_THRESHOLD_LIMIT  MULLOW_BASECASE_THRESHOLD\n#endif\n\n/* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from\n   mpn_mul_basecase in mpn_sqr_n.  Default is to use mpn_sqr_basecase\n   always.  (Note that we certainly always want it if there's a native\n   assembler mpn_sqr_basecase.)\n\n   If it turns out that mpn_kara_sqr_n becomes faster than mpn_mul_basecase\n   before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the\n   karatsuba threshold and SQR_KARATSUBA_THRESHOLD is 0.  This oddity arises\n   more or less because SQR_KARATSUBA_THRESHOLD represents the size up to\n   which mpn_sqr_basecase should be used, and that may be never.  */\n\n#ifndef SQR_BASECASE_THRESHOLD\n#define SQR_BASECASE_THRESHOLD 0\n#endif\n\n#ifndef SQR_KARATSUBA_THRESHOLD\n#define SQR_KARATSUBA_THRESHOLD (2*MUL_KARATSUBA_THRESHOLD)\n#endif\n\n#ifndef SQR_TOOM3_THRESHOLD\n#define SQR_TOOM3_THRESHOLD 128\n#endif\n\n#ifndef SQR_TOOM4_THRESHOLD\n#define SQR_TOOM4_THRESHOLD 300\n#endif\n\n#ifndef SQR_TOOM8_THRESHOLD\n#define SQR_TOOM8_THRESHOLD 400\n#endif\n\n/* See comments above about MUL_TOOM3_THRESHOLD_LIMIT.  */\n#ifndef SQR_TOOM3_THRESHOLD_LIMIT\n#define SQR_TOOM3_THRESHOLD_LIMIT  SQR_TOOM3_THRESHOLD\n#endif\n\n#ifndef SQR_TOOM4_THRESHOLD_LIMIT\n#define SQR_TOOM4_THRESHOLD_LIMIT  SQR_TOOM4_THRESHOLD\n#endif\n\n#ifndef SQR_TOOM8_THRESHOLD_LIMIT\n#define SQR_TOOM8_THRESHOLD_LIMIT  SQR_TOOM8_THRESHOLD\n#endif\n\n/* points at which fft is used for mul/sqr and mulmod_Bexp resp. */\n#ifndef MUL_FFT_FULL_THRESHOLD\n#define MUL_FFT_FULL_THRESHOLD   (MUL_TOOM8H_THRESHOLD * 10)\n#endif\n#ifndef SQR_FFT_FULL_THRESHOLD\n#define SQR_FFT_FULL_THRESHOLD   (SQR_TOOM8_THRESHOLD * 10)\n#endif\n\n#ifndef MUL_FFT_THRESHOLD\n#define MUL_FFT_THRESHOLD   (MUL_FFT_FULL_THRESHOLD / 2)\n#endif\n#ifndef SQR_FFT_THRESHOLD\n#define SQR_FFT_THRESHOLD   (SQR_FFT_FULL_THRESHOLD / 2)\n#endif\n\n#ifndef FFT_MULMOD_2EXPP1_CUTOFF\n#define FFT_MULMOD_2EXPP1_CUTOFF 128\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2\n\n#ifndef REDC_1_TO_REDC_2_THRESHOLD\n#define REDC_1_TO_REDC_2_THRESHOLD       15\n#endif\n#ifndef REDC_2_TO_REDC_N_THRESHOLD\n#define REDC_2_TO_REDC_N_THRESHOLD      100\n#endif\n\n#else\n\n#ifndef REDC_1_TO_REDC_N_THRESHOLD\n#define REDC_1_TO_REDC_N_THRESHOLD      100\n#endif\n\n#endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */\n\n#ifndef DC_DIV_QR_THRESHOLD\n#define DC_DIV_QR_THRESHOLD    (3 * MUL_KARATSUBA_THRESHOLD)\n#endif\n\n#ifndef DC_DIVAPPR_Q_N_THRESHOLD\n#define DC_DIVAPPR_Q_N_THRESHOLD    (3 * MUL_KARATSUBA_THRESHOLD)\n#endif\n\n#ifndef DC_BDIV_QR_THRESHOLD\n#define DC_BDIV_QR_THRESHOLD    (3 * MUL_KARATSUBA_THRESHOLD)\n#endif\n\n#ifndef DC_BDIV_Q_THRESHOLD\n#define DC_BDIV_Q_THRESHOLD    (3 * MUL_KARATSUBA_THRESHOLD)\n#endif\n\n#ifndef INV_DIV_QR_THRESHOLD\n#define INV_DIV_QR_THRESHOLD    (MUL_FFT_THRESHOLD/3)\n#endif\n\n#ifndef INV_DIVAPPR_Q_N_THRESHOLD\n#define INV_DIVAPPR_Q_N_THRESHOLD    (MUL_FFT_THRESHOLD/3)\n#endif\n\n#ifndef DC_DIV_Q_THRESHOLD\n#define DC_DIV_Q_THRESHOLD    (3 * MUL_KARATSUBA_THRESHOLD)\n#endif\n\n#ifndef INV_DIV_Q_THRESHOLD\n#define INV_DIV_Q_THRESHOLD    (MUL_FFT_THRESHOLD/3)\n#endif\n\n#ifndef BINV_NEWTON_THRESHOLD\n#define BINV_NEWTON_THRESHOLD           300\n#endif\n\n#ifndef DC_DIVAPPR_Q_THRESHOLD\n#define DC_DIVAPPR_Q_THRESHOLD    (3 * MUL_TOOM3_THRESHOLD)\n#endif\n\n#ifndef INV_DIVAPPR_Q_THRESHOLD\n#define INV_DIVAPPR_Q_THRESHOLD    (MUL_FFT_THRESHOLD/2)\n#endif\n\n#ifndef GET_STR_DC_THRESHOLD\n#define GET_STR_DC_THRESHOLD             18\n#endif\n\n#ifndef GET_STR_PRECOMPUTE_THRESHOLD\n#define GET_STR_PRECOMPUTE_THRESHOLD     35\n#endif\n\n#ifndef SET_STR_DC_THRESHOLD\n#define SET_STR_DC_THRESHOLD            750\n#endif\n\n#ifndef SET_STR_PRECOMPUTE_THRESHOLD\n#define SET_STR_PRECOMPUTE_THRESHOLD   2000\n#endif\n\n#ifndef FAC_ODD_THRESHOLD\n#define FAC_ODD_THRESHOLD    35\n#endif\n\n#ifndef FAC_DSC_THRESHOLD\n#define FAC_DSC_THRESHOLD   400\n#endif\n\n/* Return non-zero if xp,xsize and yp,ysize overlap.\n   If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no\n   overlap.  If both these are false, there's an overlap. */\n#define MPN_OVERLAP_P(xp, xsize, yp, ysize) \\\n  ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))\n#define MEM_OVERLAP_P(xp, xsize, yp, ysize)     \\\n  (   (char *) (xp) + (xsize) > (char *) (yp)   \\\n   && (char *) (yp) + (ysize) > (char *) (xp))\n\n/* Return non-zero if xp,xsize and yp,ysize are either identical or not\n   overlapping.  Return zero if they're partially overlapping. */\n#define MPN_SAME_OR_SEPARATE_P(xp, yp, size)    \\\n  MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)\n#define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize)           \\\n  ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))\n\n/* Return non-zero if dst,dsize and src,ssize are either identical or\n   overlapping in a way suitable for an incrementing/decrementing algorithm.\n   Return zero if they're partially overlapping in an unsuitable fashion. */\n#define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize)             \\\n  ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))\n#define MPN_SAME_OR_INCR_P(dst, src, size)      \\\n  MPN_SAME_OR_INCR2_P(dst, size, src, size)\n#define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize)             \\\n  ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))\n#define MPN_SAME_OR_DECR_P(dst, src, size)      \\\n  MPN_SAME_OR_DECR2_P(dst, size, src, size)\n\n\n/* ASSERT() is a private assertion checking scheme, similar to <assert.h>.\n   ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()\n   does it always.  Generally assertions are meant for development, but\n   might help when looking for a problem later too.\n\n   Note that strings shouldn't be used within the ASSERT expression,\n   eg. ASSERT(strcmp(s,\"notgood\")!=0), since the quotes upset the \"expr\"\n   used in the !HAVE_STRINGIZE case (ie. K&R).  */\n\n#ifdef __LINE__\n#define ASSERT_LINE  __LINE__\n#else\n#define ASSERT_LINE  -1\n#endif\n\n#ifdef __FILE__\n#define ASSERT_FILE  __FILE__\n#else\n#define ASSERT_FILE  \"\"\n#endif\n\n__GMP_DECLSPEC void __gmp_assert_header(const char *filename, int linenum);\n__GMP_DECLSPEC void __gmp_assert_fail(const char *filename, int linenum, const char *expr) ATTRIBUTE_NORETURN;\n\n#if HAVE_STRINGIZE\n#define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)\n#else\n#define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, \"expr\")\n#endif\n\n#define ASSERT_ALWAYS(expr)     \\\n  do {                          \\\n    if (!(expr))                \\\n      ASSERT_FAIL (expr);       \\\n  } while (0)\n\n#if WANT_ASSERT\n#define ASSERT(expr)   ASSERT_ALWAYS (expr)\n#else\n#define ASSERT(expr)   do {} while (0)\n#endif\n\n\n/* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks\n   that it's zero.  In both cases if assertion checking is disabled the\n   expression is still evaluated.  These macros are meant for use with\n   routines like mpn_add_n() where the return value represents a carry or\n   whatever that should or shouldn't occur in some context.  For example,\n   ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */\n#if WANT_ASSERT\n#define ASSERT_CARRY(expr)     ASSERT_ALWAYS ((expr) != 0)\n#define ASSERT_NOCARRY(expr)   ASSERT_ALWAYS ((expr) == 0)\n#else\n#define ASSERT_CARRY(expr)     (expr)\n#define ASSERT_NOCARRY(expr)   (expr)\n#endif\n\n\n/* ASSERT_CODE includes code when assertion checking is wanted.  This is the\n   same as writing \"#if WANT_ASSERT\", but more compact.  */\n#if WANT_ASSERT\n#define ASSERT_CODE(expr)  expr\n#else\n#define ASSERT_CODE(expr)\n#endif\n\n\n/* Test that an mpq_t is in fully canonical form.  This can be used as\n   protection on routines like mpq_equal which give wrong results on\n   non-canonical inputs.  */\n#if WANT_ASSERT\n#define ASSERT_MPQ_CANONICAL(q)                         \\\n  do {                                                  \\\n    ASSERT (q->_mp_den._mp_size > 0);                   \\\n    if (q->_mp_num._mp_size == 0)                       \\\n      {                                                 \\\n        /* zero should be 0/1 */                        \\\n        ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0);   \\\n      }                                                 \\\n    else                                                \\\n      {                                                 \\\n        /* no common factors */                         \\\n        mpz_t  __g;                                     \\\n        mpz_init (__g);                                 \\\n        mpz_gcd (__g, mpq_numref(q), mpq_denref(q));    \\\n        ASSERT (mpz_cmp_ui (__g, 1) == 0);              \\\n        mpz_clear (__g);                                \\\n      }                                                 \\\n  } while (0)\n#else\n#define ASSERT_MPQ_CANONICAL(q)  do {} while (0)\n#endif\n\n/* Check that the nail parts are zero. */\n#define ASSERT_ALWAYS_LIMB(limb)                \\\n  do {                                          \\\n    mp_limb_t  __nail = (limb) & GMP_NAIL_MASK; \\\n    ASSERT_ALWAYS (__nail == 0);                \\\n  } while (0)\n#define ASSERT_ALWAYS_MPN(ptr, size)            \\\n  do {                                          \\\n    /* let whole loop go dead when no nails */  \\\n    if (GMP_NAIL_BITS != 0)                     \\\n      {                                         \\\n        mp_size_t  __i;                         \\\n        for (__i = 0; __i < (size); __i++)      \\\n          ASSERT_ALWAYS_LIMB ((ptr)[__i]);      \\\n      }                                         \\\n  } while (0)\n#if WANT_ASSERT\n#define ASSERT_LIMB(limb)       ASSERT_ALWAYS_LIMB (limb)\n#define ASSERT_MPN(ptr, size)   ASSERT_ALWAYS_MPN (ptr, size)\n#else\n#define ASSERT_LIMB(limb)       do {} while (0)\n#define ASSERT_MPN(ptr, size)   do {} while (0)\n#endif\n\n\n/* Assert that an mpn region {ptr,size} is zero, or non-zero.\n   size==0 is allowed, and in that case {ptr,size} considered to be zero.  */\n#if WANT_ASSERT\n#define ASSERT_MPN_ZERO_P(ptr,size)     \\\n  do {                                  \\\n    mp_size_t  __i;                     \\\n    ASSERT ((size) >= 0);               \\\n    for (__i = 0; __i < (size); __i++)  \\\n      ASSERT ((ptr)[__i] == 0);         \\\n  } while (0)\n#define ASSERT_MPN_NONZERO_P(ptr,size)  \\\n  do {                                  \\\n    mp_size_t  __i;                     \\\n    int        __nonzero = 0;           \\\n    ASSERT ((size) >= 0);               \\\n    for (__i = 0; __i < (size); __i++)  \\\n      if ((ptr)[__i] != 0)              \\\n        {                               \\\n          __nonzero = 1;                \\\n          break;                        \\\n        }                               \\\n    ASSERT (__nonzero);                 \\\n  } while (0)\n#else\n#define ASSERT_MPN_ZERO_P(ptr,size)     do {} while (0)\n#define ASSERT_MPN_NONZERO_P(ptr,size)  do {} while (0)\n#endif\n\n\n#if HAVE_NATIVE_mpn_com_n\n#define mpn_com_n __MPN(com_n)\n__GMP_DECLSPEC void mpn_com_n(mp_ptr, mp_srcptr, mp_size_t);\n#elif !defined(mpn_com_n)\n#define mpn_com_n(d,s,n)                                \\\n  do {                                                  \\\n    mp_ptr     __d = (d);                               \\\n    mp_srcptr  __s = (s);                               \\\n    mp_size_t  __n = (n);                               \\\n    ASSERT (__n >= 1);                                  \\\n    ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n));    \\\n    do                                                  \\\n      *__d++ = (~ *__s++) & GMP_NUMB_MASK;              \\\n    while (--__n);                                      \\\n  } while (0)\n#endif\n\n#define MPN_LOGOPS_N_INLINE(d, s1, s2, n, operation)    \\\n  do {                                                  \\\n    mp_ptr\t __d = (d);                             \\\n    mp_srcptr\t __s1 = (s1);                           \\\n    mp_srcptr\t __s2 = (s2);                           \\\n    mp_size_t\t __n = (n);                             \\\n    ASSERT (__n >= 1);                                  \\\n    ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s1, __n));   \\\n    ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s2, __n));   \\\n    do                                                  \\\n      operation;                                        \\\n    while (--__n);                                      \\\n  } while (0)\n\n#if !HAVE_NATIVE_mpn_and_n\n#undef mpn_and_n\n#define mpn_and_n(d, s1, s2, n) \\\n  MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & *__s2++)\n#endif\n\n#if !HAVE_NATIVE_mpn_andn_n\n#undef mpn_andn_n\n#define mpn_andn_n(d, s1, s2, n) \\\n  MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & ~*__s2++)\n#endif\n\n#if !HAVE_NATIVE_mpn_nand_n\n#undef mpn_nand_n\n#define mpn_nand_n(d, s1, s2, n) \\\n  MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ & *__s2++) & GMP_NUMB_MASK)\n#endif\n\n#if !HAVE_NATIVE_mpn_ior_n\n#undef mpn_ior_n\n#define mpn_ior_n(d, s1, s2, n) \\\n  MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | *__s2++)\n#endif\n\n#if !HAVE_NATIVE_mpn_iorn_n\n#undef mpn_iorn_n\n#define mpn_iorn_n(d, s1, s2, n) \\\n  MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = (*__s1++ | ~*__s2++) & GMP_NUMB_MASK)\n#endif\n\n#if !HAVE_NATIVE_mpn_nior_n\n#undef mpn_nior_n\n#define mpn_nior_n(d, s1, s2, n) \\\n  MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ | *__s2++) & GMP_NUMB_MASK)\n#endif\n\n#if !HAVE_NATIVE_mpn_xor_n\n#undef mpn_xor_n\n#define mpn_xor_n(d, s1, s2, n) \\\n  MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ ^ *__s2++)\n#endif\n\n#if !HAVE_NATIVE_mpn_xnor_n\n#undef mpn_xnor_n\n#define mpn_xnor_n(d, s1, s2, n) \\\n  MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ ^ *__s2++) & GMP_NUMB_MASK)\n#endif\n\n#if HAVE_NATIVE_mpn_not\n#define mpn_not __MPN(not)\n__GMP_DECLSPEC void mpn_not(mp_ptr,mp_size_t);\n#else\n#define mpn_not(__xp,__n) mpn_com((__xp),(__xp),(__n))\n#endif\n\n#if HAVE_NATIVE_mpn_double\n#define mpn_double __MPN(double)\n__GMP_DECLSPEC mp_limb_t mpn_double(mp_ptr,mp_size_t);\n#else\n#define mpn_double(__xp,__n) mpn_lshift1((__xp),(__xp),(__n))\n#endif\n\n#if HAVE_NATIVE_mpn_half\n#define mpn_half __MPN(half)\n__GMP_DECLSPEC mp_limb_t mpn_half(mp_ptr,mp_size_t);\n#else\n#define mpn_half(__xp,__n) mpn_rshift1((__xp),(__xp),(__n))\n#endif\n\n#if HAVE_NATIVE_mpn_lshift1\n#define mpn_lshift1 __MPN(lshift1)\n__GMP_DECLSPEC mp_limb_t mpn_lshift1(mp_ptr,mp_srcptr,mp_size_t);\n#else\n#define mpn_lshift1(__xp,__yp,__n) mpn_lshift((__xp),(__yp),(__n),1)\n#endif\n\n#if HAVE_NATIVE_mpn_rshift1\n#define mpn_rshift1 __MPN(rshift1)\n__GMP_DECLSPEC mp_limb_t mpn_rshift1(mp_ptr,mp_srcptr,mp_size_t);\n#else\n#define mpn_rshift1(__xp,__yp,__n) mpn_rshift((__xp),(__yp),(__n),1)\n#endif\n\n#if HAVE_NATIVE_mpn_lshift2\n#define mpn_lshift2 __MPN(lshift2)\n__GMP_DECLSPEC mp_limb_t mpn_lshift2(mp_ptr,mp_srcptr,mp_size_t);\n#else\n#define mpn_lshift2(__xp,__yp,__n) mpn_lshift((__xp),(__yp),(__n),2)\n#endif\n\n#if HAVE_NATIVE_mpn_rshift2\n#define mpn_rshift2 __MPN(rshift2)\n__GMP_DECLSPEC mp_limb_t mpn_rshift2(mp_ptr,mp_srcptr,mp_size_t);\n#else\n#define mpn_rshift2(__xp,__yp,__n) mpn_rshift((__xp),(__yp),(__n),2)\n#endif\n\n#if HAVE_NATIVE_mpn_addlsh1_n\n#define mpn_addlsh1_n __MPN(addlsh1_n)\n__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);\n#elif HAVE_NATIVE_mpn_addlsh_n\n#define mpn_addlsh1_n(__xp,__yp,__zp,__n) mpn_addlsh_n((__xp),(__yp),(__zp),(__n),1)\n#define HAVE_NATIVE_mpn_addlsh1_n 1\n#endif\n\n#if HAVE_NATIVE_mpn_sublsh1_n\n#define mpn_sublsh1_n __MPN(sublsh1_n)\n__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);\n#elif HAVE_NATIVE_mpn_sublsh_n\n#define mpn_sublsh1_n(__xp,__yp,__zp,__n) mpn_sublsh_n((__xp),(__yp),(__zp),(__n),1)\n#define HAVE_NATIVE_mpn_sublsh1_n 1\n#endif\n\n#if HAVE_NATIVE_mpn_inclsh_n\n#define mpn_inclsh_n __MPN(inclsh_n)\n__GMP_DECLSPEC mp_limb_t mpn_inclsh_n(mp_ptr, mp_srcptr, mp_size_t, unsigned int);\n#elif HAVE_NATIVE_mpn_addlsh_n\n#define mpn_inclsh_n(__xp,__yp,__n,__c) mpn_addlsh_n((__xp),(__xp),(__yp),(__n),(__c))\n#define HAVE_NATIVE_mpn_inclsh_n 1\n#endif\n\n#if HAVE_NATIVE_mpn_declsh_n\n#define mpn_declsh_n __MPN(declsh_n)\n__GMP_DECLSPEC mp_limb_t mpn_declsh_n(mp_ptr, mp_srcptr, mp_size_t, unsigned int);\n#elif HAVE_NATIVE_mpn_sublsh_n\n#define mpn_declsh_n(__xp,__yp,__n,__c) mpn_sublsh_n((__xp),(__xp),(__yp),(__n),(__c))\n#define HAVE_NATIVE_mpn_declsh_n 1\n#endif\n\n#if HAVE_NATIVE_mpn_store\n#define mpn_store __MPN(store)\n__GMP_DECLSPEC mp_limb_t mpn_store(mp_ptr,mp_size_t,mp_limb_t);\n#else\n#define mpn_store(dst, n,val)\t\t\t\\\n  do {\t\t\t\t\t\t\\\n    ASSERT ((n) >= 0);\t\t\t\t\\\n    if ((n) != 0)\t\t\t\t\\\n      {\t\t\t\t\t\t\\\n    mp_ptr __dst = (dst);\t\t\t\\\n    mp_size_t __n = (n);\t\t\t\\\n    do\t\t\t\t\t\\\n      *__dst++ = val;\t\t\t\\\n    while (--__n);\t\t\t\t\\\n      }\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n#define MPN_ZERO(dst,n)\tmpn_store(dst,n,0)\n\n/* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */\n#if GMP_NAIL_BITS == 0\n#define ADDC_LIMB(cout, w, x, y)        \\\n  do {                                  \\\n    mp_limb_t  __x = (x);               \\\n    mp_limb_t  __y = (y);               \\\n    mp_limb_t  __w = __x + __y;         \\\n    (w) = __w;                          \\\n    (cout) = __w < __x;                 \\\n  } while (0)\n#else\n#define ADDC_LIMB(cout, w, x, y)        \\\n  do {                                  \\\n    mp_limb_t  __w;                     \\\n    ASSERT_LIMB (x);                    \\\n    ASSERT_LIMB (y);                    \\\n    __w = (x) + (y);                    \\\n    (w) = __w & GMP_NUMB_MASK;          \\\n    (cout) = __w >> GMP_NUMB_BITS;      \\\n  } while (0)\n#endif\n\n/* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that\n   subtract.  */\n#if GMP_NAIL_BITS == 0\n#define SUBC_LIMB(cout, w, x, y)        \\\n  do {                                  \\\n    mp_limb_t  __x = (x);               \\\n    mp_limb_t  __y = (y);               \\\n    mp_limb_t  __w = __x - __y;         \\\n    (w) = __w;                          \\\n    (cout) = __w > __x;                 \\\n  } while (0)\n#else\n#define SUBC_LIMB(cout, w, x, y)        \\\n  do {                                  \\\n    mp_limb_t  __w = (x) - (y);         \\\n    (w) = __w & GMP_NUMB_MASK;          \\\n    (cout) = __w >> (GMP_LIMB_BITS-1);  \\\n  } while (0)\n#endif\n\n\n/* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both\n   expecting no carry (or borrow) from that.\n\n   The size parameter is only for the benefit of assertion checking.  In a\n   normal build it's unused and the carry/borrow is just propagated as far\n   as it needs to go.\n\n   On random data, usually only one or two limbs of {ptr,size} get updated,\n   so there's no need for any sophisticated looping, just something compact\n   and sensible.\n\n   FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,\n   declaring their operand sizes, then remove the former.  This is purely\n   for the benefit of assertion checking.  */\n\n#if GMP_NAIL_BITS == 0\n#ifndef mpn_incr_u\n#define mpn_incr_u(p,incr)                              \\\n  do {                                                  \\\n    mp_limb_t __x;                                      \\\n    mp_ptr __p = (p);                                   \\\n    if (__builtin_constant_p (incr) && (incr) == 1)     \\\n      {                                                 \\\n        while (++(*(__p++)) == 0)                       \\\n          ;                                             \\\n      }                                                 \\\n    else                                                \\\n      {                                                 \\\n        __x = *__p + (incr);                            \\\n        *__p = __x;                                     \\\n        if (__x < (incr))                               \\\n          while (++(*(++__p)) == 0)                     \\\n            ;                                           \\\n      }                                                 \\\n  } while (0)\n#endif\n#ifndef mpn_decr_u\n#define mpn_decr_u(p,incr)                              \\\n  do {                                                  \\\n    mp_limb_t __x;                                      \\\n    mp_ptr __p = (p);                                   \\\n    if (__builtin_constant_p (incr) && (incr) == 1)     \\\n      {                                                 \\\n        while ((*(__p++))-- == 0)                       \\\n          ;                                             \\\n      }                                                 \\\n    else                                                \\\n      {                                                 \\\n        __x = *__p;                                     \\\n        *__p = __x - (incr);                            \\\n        if (__x < (incr))                               \\\n          while ((*(++__p))-- == 0)                     \\\n            ;                                           \\\n      }                                                 \\\n  } while (0)\n#endif\n#endif\n\n#if GMP_NAIL_BITS >= 1\n#ifndef mpn_incr_u\n#define mpn_incr_u(p,incr)                              \\\n  do {\t\t\t\t\t\t\t\\\n    mp_limb_t __x;\t\t\t\t\t\\\n    mp_ptr __p = (p);\t\t\t\t\t\\\n    if (__builtin_constant_p (incr) && (incr) == 1)\t\\\n      {\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\\\n        __x = (*__p + 1) & GMP_NUMB_MASK;\t\t\\\n        *__p++ = __x;\t\t\t\t\\\n      }\t\t\t\t\t\t\\\n    while (__x == 0);\t\t\t\t\\\n      }\t\t\t\t\t\t\t\\\n    else\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\\\n    __x = (*__p + (incr));\t\t\t\t\\\n    *__p++ = __x & GMP_NUMB_MASK;\t\t\t\\\n    if (__x >> GMP_NUMB_BITS != 0)\t\t\t\\\n      {\t\t\t\t\t\t\\\n        do\t\t\t\t\t\t\\\n          {\t\t\t\t\t\t\\\n        __x = (*__p + 1) & GMP_NUMB_MASK;\t\\\n        *__p++ = __x;\t\t\t\t\\\n          }\t\t\t\t\t\t\\\n        while (__x == 0);\t\t\t\t\\\n      }\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n#ifndef mpn_decr_u\n#define mpn_decr_u(p,incr)\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\\\n    mp_limb_t __x;\t\t\t\t\t\\\n    mp_ptr __p = (p);\t\t\t\t\t\\\n    if (__builtin_constant_p (incr) && (incr) == 1)\t\\\n      {\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\\\n        __x = *__p;\t\t\t\t\t\\\n        *__p++ = (__x - 1) & GMP_NUMB_MASK;\t\t\\\n      }\t\t\t\t\t\t\\\n    while (__x == 0);\t\t\t\t\\\n      }\t\t\t\t\t\t\t\\\n    else\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\\\n    __x = *__p - (incr);\t\t\t\t\\\n    *__p++ = __x & GMP_NUMB_MASK;\t\t\t\\\n    if (__x >> GMP_NUMB_BITS != 0)\t\t\t\\\n      {\t\t\t\t\t\t\\\n        do\t\t\t\t\t\t\\\n          {\t\t\t\t\t\t\\\n        __x = *__p;\t\t\t\t\\\n        *__p++ = (__x - 1) & GMP_NUMB_MASK;\t\\\n          }\t\t\t\t\t\t\\\n        while (__x == 0);\t\t\t\t\\\n      }\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n#endif\n\n#ifndef MPN_INCR_U\n#if WANT_ASSERT\n#define MPN_INCR_U(ptr, size, n)                        \\\n  do {                                                  \\\n    ASSERT ((size) >= 1);                               \\\n    ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n));     \\\n  } while (0)\n#else\n#define MPN_INCR_U(ptr, size, n)   mpn_incr_u (ptr, n)\n#endif\n#endif\n\n#ifndef MPN_DECR_U\n#if WANT_ASSERT\n#define MPN_DECR_U(ptr, size, n)                        \\\n  do {                                                  \\\n    ASSERT ((size) >= 1);                               \\\n    ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n));     \\\n  } while (0)\n#else\n#define MPN_DECR_U(ptr, size, n)   mpn_decr_u (ptr, n)\n#endif\n#endif\n\n\n/* Structure for conversion between internal binary format and\n   strings in base 2..36.  */\nstruct bases\n{\n  /* Number of digits in the conversion base that always fits in an mp_limb_t.\n     For example, for base 10 on a machine where a mp_limb_t has 32 bits this\n     is 9, since 10**9 is the largest number that fits into a mp_limb_t.  */\n  int chars_per_limb;\n\n  /* log(2)/log(conversion_base) */\n  double chars_per_bit_exactly;\n\n  /* base**chars_per_limb, i.e. the biggest number that fits a word, built by\n     factors of base.  Exception: For 2, 4, 8, etc, big_base is log2(base),\n     i.e. the number of bits used to represent each digit in the base.  */\n  mp_limb_t big_base;\n\n  /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a\n     fixed-point number.  Instead of dividing by big_base an application can\n     choose to multiply by big_base_inverted.  */\n  mp_limb_t big_base_inverted;\n};\n\n#define mp_bases __MPN(bases)\n#define __mp_bases __MPN(bases)\n__GMP_DECLSPEC extern const struct bases mp_bases[257];\n\n/* the following are exposed for the benefit of MPIR.Net. */\n\n__GMP_DECLSPEC extern const unsigned char __gmp_digit_value_tab[480];\n\n/* This struct is used to pass local state between functions that comprise the raw i/o interfaces mpz_inp_raw and mpz_out_raw.\n   Previously monolithic, they were split into several calls for the benefit of MPIR.Net.\n   The separation did not change the contract nor the implementation, merely separating the routines into several steps,\n   in order for MPIR.Net to consume the raw format processing code while substituting its own file I/O. */\ntypedef struct\n{\n    char* allocated;\n    size_t allocatedSize;\n    char* written;\n    size_t writtenSize;\n} __mpir_out_struct;\ntypedef __mpir_out_struct mpir_out_struct[1];\ntypedef __mpir_out_struct *mpir_out_ptr;\n/* Part of mpz_inp_raw that decodes input size and allocates appropriate memory. Does not need _GMP_H_HAVE_FILE. Also used by MPIR.Net. */\n__GMP_DECLSPEC void mpz_inp_raw_p(mpz_ptr x, unsigned char* csize_bytes, mpir_out_ptr out);\n/* Part of mpz_inp_raw that reconstitutes limb data from raw format. Does not need _GMP_H_HAVE_FILE. Also used by MPIR.Net. */\n__GMP_DECLSPEC void mpz_inp_raw_m(mpz_ptr x, mpir_out_ptr out);\n/* Part of mpz_out_raw that performs raw output into memory in preparation for writing out to a file. Does not need _GMP_H_HAVE_FILE. Also used by MPIR.Net. */\n__GMP_DECLSPEC void mpz_out_raw_m(mpir_out_ptr, mpz_srcptr);\n\n/* End of MPIR.Net consumables */\n\n/* For power of 2 bases this is exact.  For other bases the result is either\n   exact or one too big.\n\n   To be exact always it'd be necessary to examine all the limbs of the\n   operand, since numbers like 100..000 and 99...999 generally differ only\n   in the lowest limb.  It'd be possible to examine just a couple of high\n   limbs to increase the probability of being exact, but that doesn't seem\n   worth bothering with.  */\n\n#define MPN_SIZEINBASE(result, ptr, size, base)                         \\\n  do {                                                                  \\\n    int       __lb_base, __cnt;                                         \\\n    size_t __totbits;                                                   \\\n                                                                        \\\n    ASSERT ((size) >= 0);                                               \\\n    ASSERT ((base) >= 2);                                               \\\n    ASSERT ((base) < numberof (mp_bases));                              \\\n                                                                        \\\n    /* Special case for X == 0.  */                                     \\\n    if ((size) == 0)                                                    \\\n      (result) = 1;                                                     \\\n    else                                                                \\\n      {                                                                 \\\n        /* Calculate the total number of significant bits of X.  */     \\\n        count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \\\n        __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\\\n                                                                        \\\n        if (POW2_P (base))                                              \\\n          {                                                             \\\n            __lb_base = mp_bases[base].big_base;                        \\\n            (result) = (__totbits + __lb_base - 1) / __lb_base;         \\\n          }                                                             \\\n        else                                                            \\\n          (result) = (size_t)                                           \\\n            (__totbits * mp_bases[base].chars_per_bit_exactly) + 1;     \\\n      }                                                                 \\\n  } while (0)\n\n#define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp)\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\t\\\n    int          __cnt;\t\t\t\t\t\t\t\t\\\n    mp_bitcnt_t  __totbits;\t\t\t\t\t\t\t\\\n    ASSERT ((size) > 0);\t\t\t\t\t\t\t\\\n    ASSERT ((ptr)[(size)-1] != 0);\t\t\t\t\t\t\\\n    count_leading_zeros (__cnt, (ptr)[(size)-1]);\t\t\t\t\\\n    __totbits = (mp_bitcnt_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\t\\\n    (result) = (__totbits + (base2exp)-1) / (base2exp);\t\t\t\t\\\n  } while (0)\n\n/* eliminate mp_bases lookups for base==16 */\n#define MPN_SIZEINBASE_16(result, ptr, size)                            \\\n  do {                                                                  \\\n    int       __cnt;                                                    \\\n    mp_size_t __totbits;                                                \\\n                                                                        \\\n    ASSERT ((size) >= 0);                                               \\\n                                                                        \\\n    /* Special case for X == 0.  */                                     \\\n    if ((size) == 0)                                                    \\\n      (result) = 1;                                                     \\\n    else                                                                \\\n      {                                                                 \\\n        /* Calculate the total number of significant bits of X.  */     \\\n        count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \\\n        __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\\\n        (result) = (__totbits + 4 - 1) / 4;                             \\\n      }                                                                 \\\n  } while (0)\n\n/* bit count to limb count, rounding up */\n#define BITS_TO_LIMBS(n)  (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)\n\n/* MPN_SET_UI sets an mpn (ptr, cnt) to given ui.  MPZ_FAKE_UI creates fake\n   mpz_t from ui.  The zp argument must have room for LIMBS_PER_UI limbs\n   in both cases (LIMBS_PER_UI is also defined here.) */\n#if BITS_PER_UI <= GMP_NUMB_BITS /* need one limb per ulong */\n\n#define LIMBS_PER_UI 1\n#define MPN_SET_UI(zp, zn, u)   \\\n  (zp)[0] = (u);                \\\n  (zn) = ((zp)[0] != 0);\n#define MPZ_FAKE_UI(z, zp, u)   \\\n  (zp)[0] = (u);                \\\n  PTR (z) = (zp);               \\\n  SIZ (z) = ((zp)[0] != 0);     \\\n  ASSERT_CODE (ALLOC (z) = 1);\n\n#else /* need two limbs per ulong */\n\n#define LIMBS_PER_UI 2\n#define MPN_SET_UI(zp, zn, u)                          \\\n  (zp)[0] = (u) & GMP_NUMB_MASK;                       \\\n  (zp)[1] = (u) >> GMP_NUMB_BITS;                      \\\n  (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);\n#define MPZ_FAKE_UI(z, zp, u)                          \\\n  (zp)[0] = (u) & GMP_NUMB_MASK;                       \\\n  (zp)[1] = (u) >> GMP_NUMB_BITS;                      \\\n  SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \\\n  PTR (z) = (zp);                                      \\\n  ASSERT_CODE (ALLOC (z) = 2);\n\n#endif\n\n/* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1\n   or 0 there into a limb 0xFF..FF or 0 respectively.\n\n   On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,\n   but C99 doesn't guarantee signed right shifts are arithmetic, so we have\n   a little compile-time test and a fallback to a \"? :\" form.  The latter is\n   necessary for instance on Cray vector systems.\n\n   Recent versions of gcc (eg. 3.3) will in fact optimize a \"? :\" like this\n   to an arithmetic right shift anyway, but it's good to get the desired\n   shift on past versions too (in particular since an important use of\n   LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv).  */\n\n#define LIMB_HIGHBIT_TO_MASK(n)                                 \\\n  (((mp_limb_signed_t) -1 >> 1) < 0                             \\\n   ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1)              \\\n   : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))\n\n\n/* Use a library function for invert_limb, if available. */\n#define mpn_invert_limb  __MPN(invert_limb)\nmp_limb_t mpn_invert_limb(mp_limb_t) ATTRIBUTE_CONST;\n#if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb\n#define invert_limb(invxl,xl)           \\\n  do {                                  \\\n    (invxl) = mpn_invert_limb (xl);     \\\n  } while (0)\n#endif\n\n#ifndef invert_limb\n#define invert_limb(invxl,xl)                   \\\n  do {                                          \\\n    mp_limb_t dummy;                            \\\n    ASSERT ((xl) != 0);                         \\\n    udiv_qrnnd (invxl, dummy, ~(xl), ~CNST_LIMB(0), xl);  \\\n  } while (0)\n#endif\n\n#define mpir_invert_pi1(dinv, d1, d0)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_limb_t _v, _p, _t1, _t0, _mask;\t\t\t\t\t\\\n    invert_limb (_v, d1);\t\t\t\t\t\t\\\n    _p = (d1) * _v;\t\t\t\t\t\t\t\\\n    _p += (d0);\t\t\t\t\t\t\t\t\\\n    if (_p < (d0))\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\t_v--;\t\t\t\t\t\t\t\t\\\n\t_mask = -(mp_limb_t) (_p >= (d1));\t\t\t\t\\\n\t_p -= (d1);\t\t\t\t\t\t\t\\\n\t_v += _mask;\t\t\t\t\t\t\t\\\n\t_p -= _mask & (d1);\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    umul_ppmm (_t1, _t0, d0, _v);\t\t\t\t\t\\\n    _p += _t1;\t\t\t\t\t\t\t\t\\\n    if (_p < _t1)\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\t_v--;\t\t\t\t\t\t\t\t\\\n\tif (UNLIKELY (_p >= (d1)))\t\t\t\t\t\\\n\t  {\t\t\t\t\t\t\t\t\\\n\t    if (_p > (d1) || _t0 >= (d0))\t\t\t\t\\\n\t      _v--;\t\t\t\t\t\t\t\\\n\t  }\t\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    dinv = _v;\t\t\t\t\t\t\t\\\n  } while (0)\n\n/* For compatibility with GMP only */\n#define invert_pi1(dinv, d1, d0)\t\t\t\t\\\n   mpir_invert_pi1((dinv).inv32, d1, d0)\n\n/* Compute quotient the quotient and remainder for n / d. Requires d\n   >= B^2 / 2 and n < d B. di is the inverse\n\n     floor ((B^3 - 1) / (d0 + d1 B)) - B.\n\n   NOTE: Output variables are updated multiple times. Only some inputs\n   and outputs may overlap.\n*/\n#define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv)\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_limb_t _q0, _t1, _t0;\t\t\t\t\t\\\n    umul_ppmm ((q), _q0, (n2), (dinv));\t\t\t\t\t\\\n    add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1));\t\t\t\\\n                                    \\\n    /* Compute the two most significant limbs of n - q'd */\t\t\\\n    (r1) = (n1) - (d1) * (q);\t\t\t\t\t\t\\\n    sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0));\t\t\t\\\n    umul_ppmm (_t1, _t0, (d0), (q));\t\t\t\t\t\\\n    sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0);\t\t\t\\\n    (q)++;\t\t\t\t\t\t\t\t\\\n                                    \\\n    /* Conditionally adjust q and the remainders */\t\t\t\\\n    if ((r1) >= _q0) {\t\t\t\t\\\n       (q)--;\t\t\t\t\t\t\t\\\n       add_ssaaaa ((r1), (r0), (r1), (r0), (d1), (d0));\t} \\\n    if (UNLIKELY ((r1) >= (d1)))\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n    if ((r1) > (d1) || (r0) >= (d0))\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\\\n        (q)++;\t\t\t\t\t\t\t\\\n        sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));\t\t\\\n      }\t\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n  } while (0)\n\n#ifndef udiv_qrnnd_preinv\n#define udiv_qrnnd_preinv udiv_qrnnd_preinv2\n#endif\n\n/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest\n   limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).\n   If this would yield overflow, DI should be the largest possible number\n   (i.e., only ones).  For correct operation, the most significant bit of D\n   has to be set.  Put the quotient in Q and the remainder in R.  */\n#define udiv_qrnnd_preinv1(q, r, nh, nl, d, di)\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_limb_t _q, _ql, _r;\t\t\t\t\t\t\\\n    mp_limb_t _xh, _xl;\t\t\t\t\t\t\t\\\n    ASSERT ((d) != 0);\t\t\t\t\t\t\t\\\n    umul_ppmm (_q, _ql, (nh), (di));\t\t\t\t\t\\\n    _q += (nh);\t/* Compensate, di is 2**GMP_LIMB_BITS too small */\t\\\n    umul_ppmm (_xh, _xl, _q, (d));\t\t\t\t\t\\\n    sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);\t\t\t\t\\\n    if (_xh != 0)\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n    sub_ddmmss (_xh, _r, _xh, _r, 0, (d));\t\t\t\t\\\n    _q += 1;\t\t\t\t\t\t\t\\\n    if (_xh != 0)\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\\\n        _r -= (d);\t\t\t\t\t\t\t\\\n        _q += 1;\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    if (_r >= (d))\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n    _r -= (d);\t\t\t\t\t\t\t\\\n    _q += 1;\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    (r) = _r;\t\t\t\t\t\t\t\t\\\n    (q) = _q;\t\t\t\t\t\t\t\t\\\n  } while (0)\n\n/* Like udiv_qrnnd_preinv, but branch-free. */\n#define udiv_qrnnd_preinv2(q, r, nh, nl, d, di)\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_limb_t _n2, _n10, _nmask, _nadj, _q1;\t\t\t\t\\\n    mp_limb_t _xh, _xl;\t\t\t\t\t\t\t\\\n    _n2 = (nh);\t\t\t\t\t\t\t\t\\\n    _n10 = (nl);\t\t\t\t\t\t\t\\\n    _nmask = LIMB_HIGHBIT_TO_MASK (_n10);\t\t\t\t\\\n    _nadj = _n10 + (_nmask & (d));\t\t\t\t\t\\\n    umul_ppmm (_xh, _xl, di, _n2 - _nmask);\t\t\t\t\\\n    add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);\t\t\t\\\n    _q1 = ~_xh;\t\t\t\t\t\t\t\t\\\n    umul_ppmm (_xh, _xl, _q1, d);\t\t\t\t\t\\\n    add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);\t\t\t\t\\\n    _xh -= (d);\t\t\t\t\t/* xh = 0 or -1 */\t\\\n    (r) = _xl + ((d) & _xh);\t\t\t\t\t\t\\\n    (q) = _xh - _q1;\t\t\t\t\t\t\t\\\n  } while (0)\n\n/* Like udiv_qrnnd_preinv2, but for for any value D.  DNORM is D shifted left\n   so that its most significant bit is set.  LGUP is ceil(log2(D)).  */\n#define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_limb_t _n2, _n10, _nmask, _nadj, _q1;\t\t\t\t\\\n    mp_limb_t _xh, _xl;\t\t\t\t\t\t\t\\\n    _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\\\n    _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup));\t\t\t\t\\\n    _nmask = LIMB_HIGHBIT_TO_MASK (_n10);\t\t\t\t\\\n    _nadj = _n10 + (_nmask & (dnorm));\t\t\t\t\t\\\n    umul_ppmm (_xh, _xl, di, _n2 - _nmask);\t\t\t\t\\\n    add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);\t\t\t\\\n    _q1 = ~_xh;\t\t\t\t\t\t\t\t\\\n    umul_ppmm (_xh, _xl, _q1, d);\t\t\t\t\t\\\n    add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);\t\t\t\t\\\n    _xh -= (d);\t\t\t\t\t\t\t\t\\\n    (r) = _xl + ((d) & _xh);\t\t\t\t\t\t\\\n    (q) = _xh - _q1;\t\t\t\t\t\t\t\\\n  } while (0)\n\n\n#ifndef mpn_preinv_divrem_1  /* if not done with cpuvec in a fat binary */\n#define mpn_preinv_divrem_1  __MPN(preinv_divrem_1)\n__GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1(mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);\n#endif\n\n\n/* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to\n   the plain mpn_divrem_1.  Likewise USE_PREINV_MOD_1 chooses between\n   mpn_preinv_mod_1 and plain mpn_mod_1.  The default for both is yes, since\n   the few CISC chips where preinv is not good have defines saying so.  */\n#ifndef USE_PREINV_DIVREM_1\n#define USE_PREINV_DIVREM_1   1\n#endif\n#ifndef USE_PREINV_MOD_1\n#define USE_PREINV_MOD_1   1\n#endif\n\n#if USE_PREINV_DIVREM_1\n#define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \\\n  mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)\n#else\n#define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \\\n  mpn_divrem_1 (qp, xsize, ap, size, d)\n#endif\n\n#if USE_PREINV_MOD_1\n#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)       \\\n  mpn_preinv_mod_1 (src, size, divisor, inverse)\n#else\n#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)       \\\n  mpn_mod_1 (src, size, divisor)\n#endif\n\n\n#ifndef mpn_mod_34lsub1  /* if not done with cpuvec in a fat binary */\n#define mpn_mod_34lsub1 __MPN(mod_34lsub1)\n__GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1(mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;\n#endif\n\n\n/* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to\n   plain mpn_divrem_1.  Likewise MODEXACT_1_ODD_THRESHOLD for\n   mpn_modexact_1_odd against plain mpn_mod_1.  On most CPUs divexact and\n   modexact are faster at all sizes, so the defaults are 0.  Those CPUs\n   where this is not right have a tuned threshold.  */\n#ifndef DIVEXACT_1_THRESHOLD\n#define DIVEXACT_1_THRESHOLD  0\n#endif\n#ifndef MODEXACT_1_ODD_THRESHOLD\n#define MODEXACT_1_ODD_THRESHOLD  0\n#endif\n\n#ifndef mpn_divexact_1  /* if not done with cpuvec in a fat binary */\n#define mpn_divexact_1 __MPN(divexact_1)\n__GMP_DECLSPEC void mpn_divexact_1(mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);\n#endif\n\n#define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor)                     \\\n  do {                                                                        \\\n    if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD))                         \\\n      ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \\\n    else                                                                      \\\n      {                                                                       \\\n        ASSERT (mpn_mod_1 (src, size, divisor) == 0);                         \\\n        mpn_divexact_1 (dst, src, size, divisor);                             \\\n      }                                                                       \\\n  } while (0)\n\n#ifndef mpn_modexact_1c_odd  /* if not done with cpuvec in a fat binary */\n#define mpn_modexact_1c_odd  __MPN(modexact_1c_odd)\n__GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd(mp_srcptr src, mp_size_t size,\n                                       mp_limb_t divisor, mp_limb_t c) __GMP_ATTRIBUTE_PURE;\n#endif\n\n#if HAVE_NATIVE_mpn_modexact_1_odd\n#define mpn_modexact_1_odd   __MPN(modexact_1_odd)\n__GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd(mp_srcptr src, mp_size_t size,\n                                      mp_limb_t divisor) __GMP_ATTRIBUTE_PURE;\n#else\n#define mpn_modexact_1_odd(src,size,divisor) \\\n  mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))\n#endif\n\n#define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor)\t\t\t\\\n  (ABOVE_THRESHOLD (size, MODEXACT_1_ODD_THRESHOLD)\t\t\t\\\n   ? mpn_modexact_1_odd (src, size, divisor)\t\t\t\t\\\n   : mpn_mod_1 (src, size, divisor))\n\n\n/* modlimb_invert() sets inv to the multiplicative inverse of n modulo\n   2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.\n   n must be odd (otherwise such an inverse doesn't exist).\n\n   This is not to be confused with invert_limb(), which is completely\n   different.\n\n   The table lookup gives an inverse with the low 8 bits valid, and each\n   multiply step doubles the number of bits.  See Jebelean \"An algorithm for\n   exact division\" end of section 4 (reference in gmp.texi).\n\n   Possible enhancement: Could use UHWtype until the last step, if half-size\n   multiplies are faster (might help under _LONG_LONG_LIMB).\n\n   Alternative: As noted in Granlund and Montgomery \"Division by Invariant\n   Integers using Multiplication\" (reference in gmp.texi), n itself gives a\n   3-bit inverse immediately, and could be used instead of a table lookup.\n   A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into\n   bit 3, for instance with (((n + 2) & 4) << 1) ^ n.  */\n\n#define modlimb_invert_table  __gmp_modlimb_invert_table\n__GMP_DECLSPEC extern const unsigned char  modlimb_invert_table[128];\n\n#define modlimb_invert(inv,n)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_limb_t  __n = (n);\t\t\t\t\t\t\\\n    mp_limb_t  __inv;\t\t\t\t\t\t\t\\\n    ASSERT ((__n & 1) == 1);\t\t\t\t\t\t\\\n                                    \\\n    __inv = modlimb_invert_table[(__n/2) & 0x7F]; /*  8 */\t\t\\\n    if (GMP_NUMB_BITS > 8)   __inv = 2 * __inv - __inv * __inv * __n;\t\\\n    if (GMP_NUMB_BITS > 16)  __inv = 2 * __inv - __inv * __inv * __n;\t\\\n    if (GMP_NUMB_BITS > 32)  __inv = 2 * __inv - __inv * __inv * __n;\t\\\n                                    \\\n    if (GMP_NUMB_BITS > 64)\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n    int  __invbits = 64;\t\t\t\t\t\t\\\n    do {\t\t\t\t\t\t\t\t\\\n      __inv = 2 * __inv - __inv * __inv * __n;\t\t\t\\\n      __invbits *= 2;\t\t\t\t\t\t\\\n    } while (__invbits < GMP_NUMB_BITS);\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n                                    \\\n    ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1);\t\t\t\\\n    (inv) = __inv & GMP_NUMB_MASK;\t\t\t\t\t\\\n  } while (0)\n\n/* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.\n   Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.\n   GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd\n   we need to start from GMP_NUMB_MAX>>1. */\n#define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)\n\n/* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).\n   These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */\n#define GMP_NUMB_CEIL_MAX_DIV3   (GMP_NUMB_MAX / 3 + 1)\n#define GMP_NUMB_CEIL_2MAX_DIV3  ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)\n\n\n/* Set r to -a mod d.  a>=d is allowed.  Can give r>d.  All should be limbs.\n\n   It's not clear whether this is the best way to do this calculation.\n   Anything congruent to -a would be fine for the one limb congruence\n   tests.  */\n\n#define NEG_MOD(r, a, d)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    ASSERT ((d) != 0);\t\t\t\t\t\t\t\\\n    ASSERT_LIMB (a);\t\t\t\t\t\t\t\\\n    ASSERT_LIMB (d);\t\t\t\t\t\t\t\\\n                                    \\\n    if ((a) <= (d))\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n        /* small a is reasonably likely */\t\t\t\t\\\n        (r) = (d) - (a);\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    else\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n        unsigned   __twos;\t\t\t\t\t\t\\\n        mp_limb_t  __dnorm;\t\t\t\t\t\t\\\n        count_leading_zeros (__twos, d);\t\t\t\t\\\n        __twos -= GMP_NAIL_BITS;\t\t\t\t\t\\\n        __dnorm = (d) << __twos;\t\t\t\t\t\\\n        (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a);\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n                                    \\\n    ASSERT_LIMB (r);\t\t\t\t\t\t\t\\\n  } while (0)\n\n/* A bit mask of all the least significant zero bits of n, or -1 if n==0. */\n#define LOW_ZEROS_MASK(n)  (((n) & -(n)) - 1)\n\n\n/* ULONG_PARITY sets \"p\" to 1 if there's an odd number of 1 bits in \"n\", or\n   to 0 if there's an even number.  \"n\" should be an unsigned long and \"p\"\n   an int.  */\n\n#if ! defined (ULONG_PARITY)\n#define ULONG_PARITY(p, n)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    unsigned long  __n = (n);\t\t\t\t\t\t\\\n    int  __p = 0;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n        __p ^= 0x96696996L >> (__n & 0x1F);\t\t\t\t\\\n        __n >>= 5;\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    while (__n != 0);\t\t\t\t\t\t\t\\\n                                    \\\n    (p) = __p & 1;\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n#if ! defined (BSWAP_LIMB)\n#if BITS_PER_MP_LIMB == 8\n#define BSWAP_LIMB(dst, src)            \\\n  do { (dst) = (src); } while (0)\n#endif\n#if BITS_PER_MP_LIMB == 16\n#define BSWAP_LIMB(dst, src)                    \\\n  do {                                          \\\n    (dst) = ((src) << 8) + ((src) >> 8);        \\\n  } while (0)\n#endif\n#if BITS_PER_MP_LIMB == 32\n#define BSWAP_LIMB(dst, src)    \\\n  do {                          \\\n    (dst) =                     \\\n      ((src) << 24)             \\\n      + (((src) & 0xFF00) << 8) \\\n      + (((src) >> 8) & 0xFF00) \\\n      + ((src) >> 24);          \\\n  } while (0)\n#endif\n#if BITS_PER_MP_LIMB == 64\n#define BSWAP_LIMB(dst, src)            \\\n  do {                                  \\\n    (dst) =                             \\\n      ((src) << 56)                     \\\n      + (((src) & 0xFF00) << 40)        \\\n      + (((src) & 0xFF0000) << 24)      \\\n      + (((src) & 0xFF000000) << 8)     \\\n      + (((src) >> 8) & 0xFF000000)     \\\n      + (((src) >> 24) & 0xFF0000)      \\\n      + (((src) >> 40) & 0xFF00)        \\\n      + ((src) >> 56);                  \\\n  } while (0)\n#endif\n#endif\n\n#if ! defined (BSWAP_LIMB)\n#define BSWAP_LIMB(dst, src)                            \\\n  do {                                                  \\\n    mp_limb_t  __bswapl_src = (src);                    \\\n    mp_limb_t  __dst = 0;                               \\\n    int        __i;                                     \\\n    for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++)       \\\n      {                                                 \\\n        __dst = (__dst << 8) | (__bswapl_src & 0xFF);   \\\n        __bswapl_src >>= 8;                             \\\n      }                                                 \\\n    (dst) = __dst;                                      \\\n  } while (0)\n#endif\n\n#if ! defined (BSWAP_LIMB_FETCH)\n#define BSWAP_LIMB_FETCH(limb, src)  BSWAP_LIMB (limb, *(src))\n#endif\n\n#if ! defined (BSWAP_LIMB_STORE)\n#define BSWAP_LIMB_STORE(dst, limb)  BSWAP_LIMB (*(dst), limb)\n#endif\n\n\n/* Byte swap limbs from {src,size} and store at {dst,size}. */\n#define MPN_BSWAP(dst, src, size)                       \\\n  do {                                                  \\\n    mp_ptr     __dst = (dst);                           \\\n    mp_srcptr  __src = (src);                           \\\n    mp_size_t  __size = (size);                         \\\n    mp_size_t  __i;                                     \\\n    ASSERT ((size) >= 0);                               \\\n    ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));   \\\n    for (__i = 0; __i < __size; __i++)                  \\\n      {                                                 \\\n        BSWAP_LIMB_FETCH (*__dst, __src);               \\\n        __dst++;                                        \\\n        __src++;                                        \\\n      }                                                 \\\n  } while (0)\n\n/* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */\n#define MPN_BSWAP_REVERSE(dst, src, size)               \\\n  do {                                                  \\\n    mp_ptr     __dst = (dst);                           \\\n    mp_size_t  __size = (size);                         \\\n    mp_srcptr  __src = (src) + __size - 1;              \\\n    mp_size_t  __i;                                     \\\n    ASSERT ((size) >= 0);                               \\\n    ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \\\n    for (__i = 0; __i < __size; __i++)                  \\\n      {                                                 \\\n        BSWAP_LIMB_FETCH (*__dst, __src);               \\\n        __dst++;                                        \\\n        __src--;                                        \\\n      }                                                 \\\n  } while (0)\n\n/* Cool population count of an mp_limb_t.\n   You have to figure out how this works, We won't tell you!\n\n   The constants could also be expressed as:\n     0x55... = [2^N / 3]     = [(2^N-1)/3]\n     0x33... = [2^N / 5]     = [(2^N-1)/5]\n     0x0f... = [2^N / 17]    = [(2^N-1)/17]\n     (N is GMP_LIMB_BITS, [] denotes truncation.) */\n\n#if ! defined (popc_limb) && GMP_LIMB_BITS == 8\n#define popc_limb(result, input)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_limb_t  __x = (input);\t\t\t\t\t\t\\\n    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t\\\n    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);\t\\\n    __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;\t\t\t\\\n    (result) = __x & 0xff;\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n#if ! defined (popc_limb) && GMP_LIMB_BITS == 16\n#define popc_limb(result, input)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_limb_t  __x = (input);\t\t\t\t\t\t\\\n    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t\\\n    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);\t\\\n    __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;\t\t\t\\\n    if (GMP_LIMB_BITS > 8)\t\t\t\t\t\t\\\n      __x = ((__x >> 8) + __x);\t\t\t\t\t\t\\\n    (result) = __x & 0xff;\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n#if ! defined (popc_limb) && GMP_LIMB_BITS == 32\n#define popc_limb(result, input)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_limb_t  __x = (input);\t\t\t\t\t\t\\\n    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t\\\n    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);\t\\\n    __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;\t\t\t\\\n    if (GMP_LIMB_BITS > 8)\t\t\t\t\t\t\\\n      __x = ((__x >> 8) + __x);\t\t\t\t\t\t\\\n    if (GMP_LIMB_BITS > 16)\t\t\t\t\t\t\\\n      __x = ((__x >> 16) + __x);\t\t\t\t\t\\\n    (result) = __x & 0xff;\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n#if ! defined (popc_limb) && GMP_LIMB_BITS == 64\n#define popc_limb(result, input)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_limb_t  __x = (input);\t\t\t\t\t\t\\\n    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t\\\n    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);\t\\\n    __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;\t\t\t\\\n    if (GMP_LIMB_BITS > 8)\t\t\t\t\t\t\\\n      __x = ((__x >> 8) + __x);\t\t\t\t\t\t\\\n    if (GMP_LIMB_BITS > 16)\t\t\t\t\t\t\\\n      __x = ((__x >> 16) + __x);\t\t\t\t\t\\\n    if (GMP_LIMB_BITS > 32)\t\t\t\t\t\t\\\n      __x = ((__x >> 32) + __x);\t\t\t\t\t\\\n    (result) = __x & 0xff;\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n\n/* Define stuff for longlong.h.  */\n#if HAVE_ATTRIBUTE_MODE && defined (__GNUC__)\ntypedef unsigned int UQItype\t__attribute__ ((mode (QI)));\ntypedef\t\t int SItype\t__attribute__ ((mode (SI)));\ntypedef unsigned int USItype\t__attribute__ ((mode (SI)));\ntypedef\t\t int DItype\t__attribute__ ((mode (DI)));\ntypedef unsigned int UDItype\t__attribute__ ((mode (DI)));\n#else\ntypedef unsigned char UQItype;\ntypedef\t\t long SItype;\ntypedef unsigned long USItype;\n#if HAVE_LONG_LONG\ntypedef\tlong long int DItype;\ntypedef unsigned long long int UDItype;\n#else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */\ntypedef long int DItype;\ntypedef unsigned long int UDItype;\n#endif\n#endif\n\ntypedef mp_limb_t UWtype;\ntypedef unsigned int UHWtype;\n#define W_TYPE_SIZE BITS_PER_MP_LIMB\n\n/* Define ieee_double_extract\n\n   Bit field packing is \"implementation defined\" according to C99, which\n   leaves us at the compiler's mercy here.  For some systems packing is\n   defined in the ABI (eg. x86).  In any case so far it seems universal that\n   little endian systems pack from low to high, and big endian from high to\n   low within the given type.\n\n   Within the fields we rely on the integer endianness being the same as the\n   float endianness, this is true everywhere we know of and it'd be a fairly\n   strange system that did anything else.  */\n\n#if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED\n#define _GMP_IEEE_FLOATS 1\nunion ieee_double_extract\n{\n  struct\n    {\n      gmp_uint_least32_t manh:20;\n      gmp_uint_least32_t exp:11;\n      gmp_uint_least32_t sig:1;\n      gmp_uint_least32_t manl:32;\n    } s;\n  double d;\n};\n#endif\n\n#if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN\n#define _GMP_IEEE_FLOATS 1\nunion ieee_double_extract\n{\n  struct\n    {\n      gmp_uint_least32_t manl:32;\n      gmp_uint_least32_t manh:20;\n      gmp_uint_least32_t exp:11;\n      gmp_uint_least32_t sig:1;\n    } s;\n  double d;\n};\n#endif\n\n#if HAVE_DOUBLE_IEEE_BIG_ENDIAN\n#define _GMP_IEEE_FLOATS 1\nunion ieee_double_extract\n{\n  struct\n    {\n      gmp_uint_least32_t sig:1;\n      gmp_uint_least32_t exp:11;\n      gmp_uint_least32_t manh:20;\n      gmp_uint_least32_t manl:32;\n    } s;\n  double d;\n};\n#endif\n\n\n/* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers\n   that don't convert ulong->double correctly (eg. SunOS 4 native cc).  */\n#define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))\n/* Maximum number of limbs it will take to store any `double'.\n   We assume doubles have 53 mantissam bits.  */\n#define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS + 1)\n\n__GMP_DECLSPEC int __gmp_extract_double(mp_ptr, double);\n\n#define mpn_get_d __gmpn_get_d\n__GMP_DECLSPEC double mpn_get_d(mp_srcptr, mp_size_t, mp_size_t, long) __GMP_ATTRIBUTE_PURE;\n\n\n/* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes\n   a_inf if x is an infinity.  Both are considered unlikely values, for\n   branch prediction.  */\n\n#if _GMP_IEEE_FLOATS\n#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \\\n  do {                                          \\\n    union ieee_double_extract  u;               \\\n    u.d = (x);                                  \\\n    if (UNLIKELY (u.s.exp == 0x7FF))            \\\n      {                                         \\\n        if (u.s.manl == 0 && u.s.manh == 0)     \\\n          { a_inf; }                            \\\n        else                                    \\\n          { a_nan; }                            \\\n      }                                         \\\n  } while (0)\n#endif\n\n#ifndef DOUBLE_NAN_INF_ACTION\n/* Unknown format, try something generic.\n   NaN should be \"unordered\", so x!=x.\n   Inf should be bigger than DBL_MAX.  */\n#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)                  \\\n  do {                                                          \\\n    {                                                           \\\n      if (UNLIKELY ((x) != (x)))                                \\\n        { a_nan; }                                              \\\n      else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX))      \\\n        { a_inf; }                                              \\\n    }                                                           \\\n  } while (0)\n#endif\n\n__GMP_DECLSPEC extern int __gmp_junk;\n__GMP_DECLSPEC extern const int __gmp_0;\n__GMP_DECLSPEC void __gmp_exception(int) ATTRIBUTE_NORETURN;\n__GMP_DECLSPEC void __gmp_divide_by_zero(void) ATTRIBUTE_NORETURN;\n__GMP_DECLSPEC void __gmp_sqrt_of_negative(void) ATTRIBUTE_NORETURN;\n__GMP_DECLSPEC void __gmp_invalid_operation(void) ATTRIBUTE_NORETURN;\n#define GMP_ERROR(code)   __gmp_exception (code)\n#define DIVIDE_BY_ZERO    __gmp_divide_by_zero ()\n#define SQRT_OF_NEGATIVE  __gmp_sqrt_of_negative ()\n\n\n/* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */\n#if GMP_NUMB_BITS == 2\n#define PP 0x3\t\t\t\t\t/* 3 */\n#define PP_FIRST_OMITTED 5\n#endif\n#if GMP_NUMB_BITS == 4\n#define PP 0xF\t\t\t\t\t/* 3 x 5 */\n#define PP_FIRST_OMITTED 7\n#endif\n#if GMP_NUMB_BITS == 8\n#define PP 0x69\t\t\t\t\t/* 3 x 5 x 7 */\n#define PP_FIRST_OMITTED 11\n#endif\n#if GMP_NUMB_BITS == 16\n#define PP 0x3AA7\t\t\t\t/* 3 x 5 x 7 x 11 x 13 */\n#define PP_FIRST_OMITTED 17\n#endif\n#if GMP_NUMB_BITS == 32\n#define PP 0xC0CFD797L\t\t\t\t/* 3 x 5 x 7 x 11 x ... x 29 */\n#define PP_INVERTED 0x53E5645CL\n#define PP_FIRST_OMITTED 31\n#endif\n#if GMP_NUMB_BITS == 64\n#define PP CNST_LIMB(0xE221F97C30E94E1D)\t/* 3 x 5 x 7 x 11 x ... x 53 */\n#define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)\n#define PP_FIRST_OMITTED 59\n#endif\n#ifndef PP_FIRST_OMITTED\n#define PP_FIRST_OMITTED 3\n#endif\n\n/* BIT1 means a result value in bit 1 (second least significant bit), with a\n   zero bit representing +1 and a one bit representing -1.  Bits other than\n   bit 1 are garbage.  These are meant to be kept in \"int\"s, and casts are\n   used to ensure the expressions are \"int\"s even if a and/or b might be\n   other types.\n\n   JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base\n   and their speed is important.  Expressions are used rather than\n   conditionals to accumulate sign changes, which effectively means XORs\n   instead of conditional JUMPs. */\n\n/* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */\n#define JACOBI_S0(a)   (((a) == 1) | ((a) == -1))\n\n/* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */\n#define JACOBI_U0(a)   ((a) == 1)\n\n/* (a/0), with a given by low and size;\n   is 1 if a=+/-1, 0 otherwise */\n#define JACOBI_LS0(alow,asize) \\\n  (((asize) == 1 || (asize) == -1) && (alow) == 1)\n\n/* (a/0), with a an mpz_t;\n   fetch of low limb always valid, even if size is zero */\n#define JACOBI_Z0(a)   JACOBI_LS0 (PTR(a)[0], SIZ(a))\n\n/* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */\n#define JACOBI_0U(b)   ((b) == 1)\n\n/* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */\n#define JACOBI_0S(b)   ((b) == 1 || (b) == -1)\n\n/* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */\n#define JACOBI_0LS(blow,bsize) \\\n  (((bsize) == 1 || (bsize) == -1) && (blow) == 1)\n\n/* Convert a bit1 to +1 or -1. */\n#define JACOBI_BIT1_TO_PN(result_bit1) \\\n  (1 - ((int) (result_bit1) & 2))\n\n/* (2/b), with b unsigned and odd;\n   is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and\n   hence obtained from (b>>1)^b */\n#define JACOBI_TWO_U_BIT1(b) \\\n  ((int) (((b) >> 1) ^ (b)))\n\n/* (2/b)^twos, with b unsigned and odd */\n#define JACOBI_TWOS_U_BIT1(twos, b) \\\n  ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))\n\n/* (2/b)^twos, with b unsigned and odd */\n#define JACOBI_TWOS_U(twos, b) \\\n  (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))\n\n/* (-1/b), with b odd (signed or unsigned);\n   is (-1)^((b-1)/2) */\n#define JACOBI_N1B_BIT1(b) \\\n  ((int) (b))\n\n/* (a/b) effect due to sign of a: signed/unsigned, b odd;\n   is (-1/b) if a<0, or +1 if a>=0 */\n#define JACOBI_ASGN_SU_BIT1(a, b) \\\n  ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))\n\n/* (a/b) effect due to sign of b: signed/signed;\n   is -1 if a and b both negative, +1 otherwise */\n#define JACOBI_BSGN_SS_BIT1(a, b) \\\n  ((((a)<0) & ((b)<0)) << 1)\n\n/* (a/b) effect due to sign of b: signed/mpz;\n   is -1 if a and b both negative, +1 otherwise */\n#define JACOBI_BSGN_SZ_BIT1(a, b) \\\n  JACOBI_BSGN_SS_BIT1 (a, SIZ(b))\n\n/* (a/b) effect due to sign of b: mpz/signed;\n   is -1 if a and b both negative, +1 otherwise */\n#define JACOBI_BSGN_ZS_BIT1(a, b) \\\n  JACOBI_BSGN_SZ_BIT1 (b, a)\n\n/* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;\n   is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if\n   both a,b==3mod4, achieved in bit 1 by a&b.  No ASSERT()s about a,b odd\n   because this is used in a couple of places with only bit 1 of a or b\n   valid. */\n#define JACOBI_RECIP_UU_BIT1(a, b) \\\n  ((int) ((a) & (b)))\n\n/* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and\n   decrementing b_size.  b_low should be b_ptr[0] on entry, and will be\n   updated for the new b_ptr.  result_bit1 is updated according to the\n   factors of 2 stripped, as per (a/2).  */\n#define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low)    \\\n  do {                                                                  \\\n    ASSERT ((b_size) >= 1);                                             \\\n    ASSERT ((b_low) == (b_ptr)[0]);                                     \\\n                                                                        \\\n    while (UNLIKELY ((b_low) == 0))                                     \\\n      {                                                                 \\\n        (b_size)--;                                                     \\\n        ASSERT ((b_size) >= 1);                                         \\\n        (b_ptr)++;                                                      \\\n        (b_low) = *(b_ptr);                                             \\\n                                                                        \\\n        ASSERT (((a) & 1) != 0);                                        \\\n        if ((GMP_NUMB_BITS % 2) == 1)                                   \\\n          (result_bit1) ^= JACOBI_TWO_U_BIT1(a);                        \\\n      }                                                                 \\\n  } while (0)\n\n/* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or\n   modexact_1_odd, but in either case leaving a_rem<b.  b must be odd and\n   unsigned.  modexact_1_odd effectively calculates -a mod b, and\n   result_bit1 is adjusted for the factor of -1.\n\n   The way mpn_modexact_1_odd sometimes bases its remainder on a_size and\n   sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what\n   factor to introduce into result_bit1, so for that case use mpn_mod_1\n   unconditionally.\n\n   FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used\n   for odd GMP_NUMB_BITS would be good.  Perhaps it could mung its result,\n   or not skip a divide step, or something. */\n\n#define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \\\n  do {                                                                     \\\n    mp_srcptr  __a_ptr  = (a_ptr);                                         \\\n    mp_size_t  __a_size = (a_size);                                        \\\n    mp_limb_t  __b      = (b);                                             \\\n                                                                           \\\n    ASSERT (__a_size >= 1);                                                \\\n    ASSERT (__b & 1);                                                      \\\n                                                                           \\\n    if ((GMP_NUMB_BITS % 2) != 0                                           \\\n        || BELOW_THRESHOLD (__a_size, MODEXACT_1_ODD_THRESHOLD))           \\\n      {                                                                    \\\n        (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b);                      \\\n      }                                                                    \\\n    else                                                                   \\\n      {                                                                    \\\n        (result_bit1) ^= JACOBI_N1B_BIT1 (__b);                            \\\n        (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b);             \\\n      }                                                                    \\\n  } while (0)\n\n/* State for the Jacobi computation using Lehmer. */\n#define jacobi_table __gmp_jacobi_table\n__GMP_DECLSPEC extern const unsigned char jacobi_table[208];\n\n/* Bit layout for the initial state. b must be odd.\n\n      3  2  1 0\n   +--+--+--+--+\n   |a1|a0|b1| s|\n   +--+--+--+--+\n\n */\nstatic inline unsigned\nmpn_jacobi_init (unsigned a, unsigned b, unsigned s)\n{\n  ASSERT (b & 1);\n  ASSERT (s <= 1);\n  return ((a & 3) << 2) + (b & 2) + s;\n}\n\nstatic inline int\nmpn_jacobi_finish (unsigned bits)\n{\n  /* (a, b) = (1,0) or (0,1) */\n  ASSERT ( (bits & 14) == 0);\n\n  return 1-2*(bits & 1);\n}\n\nstatic inline unsigned\nmpn_jacobi_update (unsigned bits, unsigned denominator, unsigned q)\n{\n  /* FIXME: Could halve table size by not including the e bit in the\n   * index, and instead xor when updating. Then the lookup would be\n   * like\n   *\n   *   bits ^= table[((bits & 30) << 2) + (denominator << 2) + q];\n   */\n\n  ASSERT (bits < 26);\n  ASSERT (denominator < 2);\n  ASSERT (q < 4);\n\n  /* For almost all calls, denominator is constant and quite often q\n     is constant too. So use addition rather than or, so the compiler\n     can put the constant part can into the offset of an indexed\n     addressing instruction.\n\n     With constant denominator, the below table lookup is compiled to\n\n       C Constant q = 1, constant denominator = 1\n       movzbl table+5(%eax,8), %eax\n\n     or\n\n       C q in %edx, constant denominator = 1\n       movzbl table+4(%edx,%eax,8), %eax\n\n     One could maintain the state preshifted 3 bits, to save a shift\n     here, but at least on x86, that's no real saving.\n  */\n  return bits = jacobi_table[(bits << 3) + (denominator << 2) + q];\n}\n\n\n/* Matrix multiplication */\n#define   mpn_matrix22_mul __MPN(matrix22_mul)\n__GMP_DECLSPEC void      mpn_matrix22_mul(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);\n#define   mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen)\n__GMP_DECLSPEC void      mpn_matrix22_mul_strassen(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);\n#define   mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)\n__GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch(mp_size_t, mp_size_t);\n\n#ifndef MATRIX22_STRASSEN_THRESHOLD\n#define MATRIX22_STRASSEN_THRESHOLD 30\n#endif\n\n/* HGCD definitions */\n\n/* Extract one numb, shifting count bits left\n    ________  ________\n   |___xh___||___xl___|\n      |____r____|\n   >count <\n\n   The count includes any nail bits, so it should work fine if count\n   is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of\n   xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.\n\n   FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for\n   those calls where the count high bits of xh may be non-zero.\n*/\n\n#define MPN_EXTRACT_NUMB(count, xh, xl)\t\t\t\t\\\n  ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) |\t\\\n   ((xl) >> (GMP_LIMB_BITS - (count))))\n\n\n/* The matrix non-negative M = (u, u'; v,v') keeps track of the\n   reduction (a;b) = M (alpha; beta) where alpha, beta are smaller\n   than a, b. The determinant must always be one, so that M has an\n   inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1\n   bits. */\nstruct hgcd_matrix1\n{\n  mp_limb_t u[2][2];\n};\n\n#define mpn_hgcd2 __MPN (hgcd2)\n__GMP_DECLSPEC int mpn_hgcd2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t,\tstruct hgcd_matrix1 *);\n\n#define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)\n__GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);\n\n#define mpn_matrix22_mul1_inverse_vector __MPN (matrix22_mul1_inverse_vector)\n__GMP_DECLSPEC mp_size_t mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);\n\n#define mpn_hgcd2_jacobi __MPN (hgcd2_jacobi)\n__GMP_DECLSPEC int mpn_hgcd2_jacobi (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *, unsigned *);\n\nstruct hgcd_matrix\n{\n  mp_size_t alloc;\t\t/* for sanity checking only */\n  mp_size_t n;\n  mp_ptr p[2][2];\n};\n\n#define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))\n\n#define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)\n__GMP_DECLSPEC void mpn_hgcd_matrix_init (struct hgcd_matrix *, mp_size_t, mp_ptr);\n\n#define mpn_hgcd_matrix_update_q __MPN (hgcd_matrix_update_q)\n__GMP_DECLSPEC void mpn_hgcd_matrix_update_q (struct hgcd_matrix *, mp_srcptr, mp_size_t, unsigned, mp_ptr);\n\n#define mpn_hgcd_matrix_mul_1 __MPN (hgcd_matrix_mul_1)\n__GMP_DECLSPEC void mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *, const struct hgcd_matrix1 *, mp_ptr);\n\n#define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)\n__GMP_DECLSPEC void mpn_hgcd_matrix_mul (struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr);\n\n#define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)\n__GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust (const struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);\n\n#define mpn_hgcd_step __MPN(hgcd_step)\n__GMP_DECLSPEC mp_size_t mpn_hgcd_step (mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);\n\n#define mpn_hgcd_reduce __MPN(hgcd_reduce)\n__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);\n\n#define mpn_hgcd_reduce_itch __MPN(hgcd_reduce_itch)\n__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce_itch (mp_size_t, mp_size_t);\n\n#define mpn_hgcd_itch __MPN (hgcd_itch)\n__GMP_DECLSPEC mp_size_t mpn_hgcd_itch (mp_size_t);\n\n#define mpn_hgcd __MPN (hgcd)\n__GMP_DECLSPEC mp_size_t mpn_hgcd (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);\n\n#define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch)\n__GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch (mp_size_t);\n\n#define mpn_hgcd_appr __MPN (hgcd_appr)\n__GMP_DECLSPEC int mpn_hgcd_appr (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);\n\n#define mpn_hgcd_jacobi __MPN (hgcd_jacobi)\n__GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr);\n\ntypedef void gcd_subdiv_step_hook(void *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);\n\n/* Needs storage for the quotient */\n#define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)\n\n#define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)\n__GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step (mp_ptr, mp_ptr, mp_size_t, mp_size_t, gcd_subdiv_step_hook *, void *, mp_ptr);\n\nstruct gcdext_ctx\n{\n  /* Result parameters. */\n  mp_ptr gp;\n  mp_size_t gn;\n  mp_ptr up;\n  mp_size_t *usize;\n\n  /* Cofactors updated in each step. */\n  mp_size_t un;\n  mp_ptr u0, u1, tp;\n};\n\n#define mpn_gcdext_hook __MPN (gcdext_hook)\ngcd_subdiv_step_hook mpn_gcdext_hook;\n\n#define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)\n\n#define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)\n__GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr);\n\n/* 4*(an + 1) + 4*(bn + 1) + an */\n#define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)\n\n#ifndef HGCD_THRESHOLD\n#define HGCD_THRESHOLD 400\n#endif\n\n#ifndef HGCD_APPR_THRESHOLD\n#define HGCD_APPR_THRESHOLD 400\n#endif\n\n#ifndef HGCD_REDUCE_THRESHOLD\n#define HGCD_REDUCE_THRESHOLD 1000\n#endif\n\n#ifndef GCD_DC_THRESHOLD\n#define GCD_DC_THRESHOLD 1000\n#endif\n\n#ifndef GCDEXT_DC_THRESHOLD\n#define GCDEXT_DC_THRESHOLD 600\n#endif\n\n#define mpn_mulmod_bnm1 __MPN(mulmod_bnm1)\n__GMP_DECLSPEC void mpn_mulmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);\n\n#define mpn_mulmod_bnm1_next_size(x) \\\n   ((x) <= 2*FFT_MULMOD_2EXPP1_CUTOFF ? (x) : 2*mpir_fft_adjust_limbs(((x) + 1)/2))\n\nstatic inline mp_size_t\nmpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {\n  return 5*rn + 220;\n}\n\n/* Definitions for mpn_set_str and mpn_get_str */\nstruct powers\n{\n  mp_ptr p;\t\t\t/* actual power value */\n  mp_size_t n;\t\t\t/* # of limbs at p */\n  mp_size_t shift;\t\t/* weight of lowest limb, in limb base B */\n  size_t digits_in_base;\t/* number of corresponding digits */\n  int base;\n};\ntypedef struct powers powers_t;\n#define mpn_dc_set_str_powtab_alloc(n) ((n) + GMP_LIMB_BITS)\n#define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)\n#define mpn_dc_get_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS)\n#define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)\n\n#define   mpn_dc_set_str __MPN(dc_set_str)\n__GMP_DECLSPEC mp_size_t mpn_dc_set_str(mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr);\n#define   mpn_bc_set_str __MPN(bc_set_str)\n__GMP_DECLSPEC mp_size_t mpn_bc_set_str(mp_ptr, const unsigned char *, size_t, int);\n#define   mpn_set_str_compute_powtab __MPN(set_str_compute_powtab)\n__GMP_DECLSPEC void      mpn_set_str_compute_powtab(powers_t *, mp_ptr, mp_size_t, int);\n#define mpn_pre_set_str __MPN(pre_set_str)\n__GMP_DECLSPEC void mpn_pre_set_str(mp_ptr wp, unsigned char *str, size_t str_len, powers_t *powtab, mp_ptr tp);\n\n\nvoid _tc4_add(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, mp_size_t r1n, mp_srcptr r2, mp_size_t r2n);\n\nvoid tc4_add(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, mp_size_t r1n, mp_srcptr r2, mp_size_t r2n);\n\nvoid _tc4_add_unsigned(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, mp_size_t r1n, mp_srcptr r2, mp_size_t r2n);\n\nvoid tc4_add_unsigned(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, mp_size_t r1n, mp_srcptr r2, mp_size_t r2n);\n\nvoid tc4_sub(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, mp_size_t r1n, mp_srcptr r2, mp_size_t r2n);\n\nvoid tc4_lshift(mp_ptr rp, mp_size_t * rn, mp_srcptr xp, mp_size_t xn, mp_size_t bits);\n\nvoid tc4_rshift_inplace(mp_ptr rp, mp_size_t * rn, mp_size_t bits);\n\nvoid tc4_addlsh1_unsigned(mp_ptr rp, mp_size_t * rn, mp_srcptr xp, mp_size_t xn);\n\nvoid tc4_divexact_ui(mp_ptr rp, mp_size_t * rn, mp_ptr x, mp_size_t xn, mp_limb_t c);\n\nvoid tc4_divexact_by3(mp_ptr rp, mp_size_t * rn, mp_ptr x, mp_size_t xn);\n\nvoid tc4_divexact_by15(mp_ptr rp, mp_size_t * rn, mp_ptr x, mp_size_t xn);\n\nvoid tc4_addmul_1(mp_ptr wp, mp_size_t * wn, mp_srcptr xp, mp_size_t xn, mp_limb_t y);\n\nvoid tc4_submul_1(mp_ptr wp, mp_size_t * wn, mp_srcptr x, mp_size_t xn, mp_limb_t y);\n\nvoid tc4_copy (mp_ptr yp, mp_size_t * yn, mp_size_t offset, mp_srcptr xp, mp_size_t xn);\n\nvoid __divappr_helper(mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t qn);\n\n/* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole\n   limb and adds an extra limb.  __GMPF_PREC_TO_BITS drops that extra limb,\n   hence giving back the user's size in bits rounded up.  Notice that\n   converting prec->bits->prec gives an unchanged value.  */\n#define __GMPF_BITS_TO_PREC(n)\t\t\t\t\t\t\\\n  ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))\n#define __GMPF_PREC_TO_BITS(n) \\\n  ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)\n\n__GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;\n\n\n/* Set n to the number of significant digits an mpf of the given _mp_prec\n   field, in the given base.  This is a rounded up value, designed to ensure\n   there's enough digits to reproduce all the guaranteed part of the value.\n\n   There are prec many limbs, but the high might be only \"1\" so forget it\n   and just count prec-1 limbs into chars.  +1 rounds that upwards, and a\n   further +1 is because the limbs usually won't fall on digit boundaries.\n\n   FIXME: If base is a power of 2 and the bits per digit divides\n   BITS_PER_MP_LIMB then the +2 is unnecessary.  This happens always for\n   base==2, and in base==16 with the current 32 or 64 bit limb sizes. */\n\n#define MPF_SIGNIFICANT_DIGITS(n, base, prec)                           \\\n  do {                                                                  \\\n    ASSERT (base >= 2 && base < numberof (mp_bases));                   \\\n    (n) = 2 + (size_t) ((((size_t) (prec) - 1) * GMP_NUMB_BITS)         \\\n                        * mp_bases[(base)].chars_per_bit_exactly);      \\\n  } while (0)\n\n\n/* Decimal point string, from the current C locale.  Needs <langinfo.h> for\n   nl_langinfo and constants, preferrably with _GNU_SOURCE defined to get\n   DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under\n   their respective #if HAVE_FOO_H.\n\n   GLIBC recommends nl_langinfo because getting only one facet can be\n   faster, apparently. */\n\n/* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */\n#if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)\n#define GMP_DECIMAL_POINT  (nl_langinfo (DECIMAL_POINT))\n#endif\n/* RADIXCHAR is deprecated, still in unix98 or some such. */\n#if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)\n#define GMP_DECIMAL_POINT  (nl_langinfo (RADIXCHAR))\n#endif\n/* localeconv is slower since it returns all locale stuff */\n#if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)\n#define GMP_DECIMAL_POINT  (localeconv()->decimal_point)\n#endif\n#if ! defined (GMP_DECIMAL_POINT)\n#define GMP_DECIMAL_POINT  (\".\")\n#endif\n\n\n#define DOPRNT_CONV_FIXED        1\n#define DOPRNT_CONV_SCIENTIFIC   2\n#define DOPRNT_CONV_GENERAL      3\n\n#define DOPRNT_JUSTIFY_NONE      0\n#define DOPRNT_JUSTIFY_LEFT      1\n#define DOPRNT_JUSTIFY_RIGHT     2\n#define DOPRNT_JUSTIFY_INTERNAL  3\n\n#define DOPRNT_SHOWBASE_YES      1\n#define DOPRNT_SHOWBASE_NO       2\n#define DOPRNT_SHOWBASE_NONZERO  3\n\nstruct doprnt_params_t {\n  int         base;          /* negative for upper case */\n  int         conv;          /* choices above */\n  const char  *expfmt;       /* exponent format */\n  int         exptimes4;     /* exponent multiply by 4 */\n  char        fill;          /* character */\n  int         justify;       /* choices above */\n  int         prec;          /* prec field, or -1 for all digits */\n  int         showbase;      /* choices above */\n  int         showpoint;     /* if radix point always shown */\n  int         showtrailing;  /* if trailing zeros wanted */\n  char        sign;          /* '+', ' ', or '\\0' */\n  int         width;         /* width field */\n};\n\n#if _GMP_H_HAVE_VA_LIST\n\ntypedef int (*doprnt_format_t)(void *data, const char *fmt, va_list ap);\ntypedef int (*doprnt_memory_t)(void *data, const char *str, size_t len);\ntypedef int (*doprnt_reps_t)(void *data, int c, int reps);\ntypedef int (*doprnt_final_t)(void *data);\n\nstruct doprnt_funs_t {\n  doprnt_format_t  format;\n  doprnt_memory_t  memory;\n  doprnt_reps_t    reps;\n  doprnt_final_t   final;   /* NULL if not required */\n};\n\nextern const struct doprnt_funs_t  __gmp_fprintf_funs;\nextern const struct doprnt_funs_t  __gmp_sprintf_funs;\nextern const struct doprnt_funs_t  __gmp_snprintf_funs;\nextern const struct doprnt_funs_t  __gmp_obstack_printf_funs;\nextern const struct doprnt_funs_t  __gmp_ostream_funs;\n\n/* \"buf\" is a __gmp_allocate_func block of \"alloc\" many bytes.  The first\n   \"size\" of these have been written.  \"alloc > size\" is maintained, so\n   there's room to store a '\\0' at the end.  \"result\" is where the\n   application wants the final block pointer.  */\nstruct gmp_asprintf_t {\n  char    **result;\n  char    *buf;\n  size_t  size;\n  size_t  alloc;\n};\n\n#define GMP_ASPRINTF_T_INIT(d, output)                          \\\n  do {                                                          \\\n    (d).result = (output);                                      \\\n    (d).alloc = 256;                                            \\\n    (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc);      \\\n    (d).size = 0;                                               \\\n  } while (0)\n\n/* If a realloc is necessary, use twice the size actually required, so as to\n   avoid repeated small reallocs.  */\n#define GMP_ASPRINTF_T_NEED(d, n)                                       \\\n  do {                                                                  \\\n    size_t  alloc, newsize, newalloc;                                   \\\n    ASSERT ((d)->alloc >= (d)->size + 1);                               \\\n                                                                        \\\n    alloc = (d)->alloc;                                                 \\\n    newsize = (d)->size + (n);                                          \\\n    if (alloc <= newsize)                                               \\\n      {                                                                 \\\n        newalloc = 2*newsize;                                           \\\n        (d)->alloc = newalloc;                                          \\\n        (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf,                \\\n                                               alloc, newalloc, char);  \\\n      }                                                                 \\\n  } while (0)\n\n__GMP_DECLSPEC int __gmp_asprintf_memory(struct gmp_asprintf_t *d, const char *str, size_t len);\n__GMP_DECLSPEC int __gmp_asprintf_reps(struct gmp_asprintf_t *d, int c, int reps);\n__GMP_DECLSPEC int __gmp_asprintf_final(struct gmp_asprintf_t *d);\n\n/* buf is where to write the next output, and size is how much space is left\n   there.  If the application passed size==0 then that's what we'll have\n   here, and nothing at all should be written.  */\nstruct gmp_snprintf_t {\n  char    *buf;\n  size_t  size;\n};\n\n/* Add the bytes printed by the call to the total retval, or bail out on an\n   error.  */\n#define DOPRNT_ACCUMULATE(call) \\\n  do {                          \\\n    int  __ret;                 \\\n    __ret = call;               \\\n    if (__ret == -1)            \\\n      goto error;               \\\n    retval += __ret;            \\\n  } while (0)\n#define DOPRNT_ACCUMULATE_FUN(fun, params)      \\\n  do {                                          \\\n    ASSERT ((fun) != NULL);                     \\\n    DOPRNT_ACCUMULATE ((*(fun)) params);        \\\n  } while (0)\n\n#define DOPRNT_FORMAT(fmt, ap)                          \\\n  DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))\n#define DOPRNT_MEMORY(ptr, len)                                 \\\n  DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))\n#define DOPRNT_REPS(c, n)                               \\\n  DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))\n\n#define DOPRNT_STRING(str)      DOPRNT_MEMORY (str, strlen (str))\n\n#define DOPRNT_REPS_MAYBE(c, n) \\\n  do {                          \\\n    if ((n) != 0)               \\\n      DOPRNT_REPS (c, n);       \\\n  } while (0)\n#define DOPRNT_MEMORY_MAYBE(ptr, len)   \\\n  do {                                  \\\n    if ((len) != 0)                     \\\n      DOPRNT_MEMORY (ptr, len);         \\\n  } while (0)\n\n__GMP_DECLSPEC int __gmp_doprnt(const struct doprnt_funs_t *, void *, const char *, va_list);\n__GMP_DECLSPEC int __gmp_doprnt_integer(const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *);\n\n#define __gmp_doprnt_mpf __gmp_doprnt_mpf2\n__GMP_DECLSPEC int __gmp_doprnt_mpf(const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr);\n\n__GMP_DECLSPEC int __gmp_replacement_vsnprintf(char *, size_t, const char *, va_list);\n#endif /* _GMP_H_HAVE_VA_LIST */\n\n\ntypedef int (*gmp_doscan_scan_t) (void *, const char *, ...);\ntypedef void *(*gmp_doscan_step_t)(void *, int);\ntypedef int (*gmp_doscan_get_t)  (void *);\ntypedef int (*gmp_doscan_unget_t)(int, void *);\n\nstruct gmp_doscan_funs_t {\n  gmp_doscan_scan_t   scan;\n  gmp_doscan_step_t   step;\n  gmp_doscan_get_t    get;\n  gmp_doscan_unget_t  unget;\n};\nextern const struct gmp_doscan_funs_t  __gmp_fscanf_funs;\nextern const struct gmp_doscan_funs_t  __gmp_sscanf_funs;\n\n#if _GMP_H_HAVE_VA_LIST\n__GMP_DECLSPEC int __gmp_doscan(const struct gmp_doscan_funs_t *, void *,\n                          const char *, va_list);\n#endif\n\n\n/* For testing and debugging.  */\n#define MPZ_CHECK_FORMAT(z)\t\t\t\t\t\\\n  do {                                                          \\\n    ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0);\t\\\n    ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z));\t\t\t\\\n    ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z));                       \\\n  } while (0)\n\n#define MPQ_CHECK_FORMAT(q)                             \\\n  do {                                                  \\\n    MPZ_CHECK_FORMAT (mpq_numref (q));                  \\\n    MPZ_CHECK_FORMAT (mpq_denref (q));                  \\\n    ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1);            \\\n                                                        \\\n    if (SIZ(mpq_numref(q)) == 0)                        \\\n      {                                                 \\\n        /* should have zero as 0/1 */                   \\\n        ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1          \\\n                       && PTR(mpq_denref(q))[0] == 1);  \\\n      }                                                 \\\n    else                                                \\\n      {                                                 \\\n        /* should have no common factors */             \\\n        mpz_t  g;                                       \\\n        mpz_init (g);                                   \\\n        mpz_gcd (g, mpq_numref(q), mpq_denref(q));      \\\n        ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);         \\\n        mpz_clear (g);                                  \\\n      }                                                 \\\n  } while (0)\n\n#define MPF_CHECK_FORMAT(f)                             \\\n  do {                                                  \\\n    ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \\\n    ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1);              \\\n    if (SIZ(f) == 0)                                    \\\n      ASSERT_ALWAYS (EXP(f) == 0);                      \\\n    if (SIZ(f) != 0)                                    \\\n      ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0);        \\\n  } while (0)\n\n\n#define MPZ_PROVOKE_REALLOC(z)\t\t\t\t\t\\\n  do { ALLOC(z) = ABSIZ(z); } while (0)\n\n\n/* Enhancement: The \"mod\" and \"gcd_1\" functions below could have\n   __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on\n   function pointers, only actual functions.  It probably doesn't make much\n   difference to the gmp code, since hopefully we arrange calls so there's\n   no great need for the compiler to move things around.  */\n\n#if WANT_FAT_BINARY\n/* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST\n   in mpn/x86/x86-defs.m4 and in mpn/x86_64/x86_64-defs.m4.  Be sure to\n    update them there when changing here.  */\nstruct cpuvec_t {\n  DECL_add_err1_n      ((*add_err1_n));\n  DECL_add_err2_n      ((*add_err2_n));\n  DECL_add_n           ((*add_n));\n  DECL_addmul_1        ((*addmul_1));\n  DECL_copyd           ((*copyd));\n  DECL_copyi           ((*copyi));\n  DECL_divexact_1      ((*divexact_1));\n  DECL_divexact_by3c   ((*divexact_by3c));\n  DECL_divexact_byfobm1   ((*divexact_byfobm1));\n  DECL_divrem_1        ((*divrem_1));\n  DECL_divrem_2        ((*divrem_2));\n  DECL_divrem_euclidean_qr_1        ((*divrem_euclidean_qr_1));\n  DECL_divrem_euclidean_qr_2        ((*divrem_euclidean_qr_2));\n  DECL_gcd_1           ((*gcd_1));\n  DECL_lshift          ((*lshift));\n  DECL_mod_1           ((*mod_1));\n  DECL_mod_34lsub1     ((*mod_34lsub1));\n  DECL_modexact_1c_odd ((*modexact_1c_odd));\n  DECL_mul_1           ((*mul_1));\n  DECL_mul_basecase    ((*mul_basecase));\n  DECL_mulmid_basecase ((*mulmid_basecase));\n  DECL_preinv_divrem_1 ((*preinv_divrem_1));\n  DECL_preinv_mod_1    ((*preinv_mod_1));\n  DECL_redc_1   ((*redc_1));\n  DECL_rshift          ((*rshift));\n  DECL_sqr_basecase    ((*sqr_basecase));\n  DECL_sub_err1_n      ((*sub_err1_n));\n  DECL_sub_err2_n      ((*sub_err2_n));\n  DECL_sub_n           ((*sub_n));\n  DECL_submul_1        ((*submul_1));\n  DECL_sumdiff_n       ((*sumdiff_n));\n\n  int                  initialized;\n  mp_size_t            mul_karatsuba_threshold;\n  mp_size_t            mul_toom3_threshold;\n  mp_size_t            sqr_karatsuba_threshold;\n  mp_size_t            sqr_toom3_threshold;\n};\n__GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;\n#endif /* x86 fat binary */\n\n__GMP_DECLSPEC void __gmpn_cpuvec_init(void);\n\n/* Get a threshold \"field\" from __gmpn_cpuvec, running __gmpn_cpuvec_init()\n   if that hasn't yet been done (to establish the right values).  */\n#define CPUVEC_THRESHOLD(field)                                               \\\n  ((LIKELY (__gmpn_cpuvec.initialized) ? 0 : (__gmpn_cpuvec_init (), 0)),     \\\n   __gmpn_cpuvec.field)\n\n#if TUNE_PROGRAM_BUILD\n/* Some extras wanted when recompiling some .c files for use by the tune\n   program.  Not part of a normal build.\n\n   It's necessary to keep these thresholds as #defines (just to an\n   identically named variable), since various defaults are established based\n   on #ifdef in the .c files.  For some this is not so (the defaults are\n   instead establshed above), but all are done this way for consistency. */\n\n#undef  MUL_KARATSUBA_THRESHOLD\n#define MUL_KARATSUBA_THRESHOLD      mul_karatsuba_threshold\nextern mp_size_t                     mul_karatsuba_threshold;\n\n#undef  MUL_TOOM3_THRESHOLD\n#define MUL_TOOM3_THRESHOLD          mul_toom3_threshold\nextern mp_size_t                     mul_toom3_threshold;\n\n#undef  MUL_TOOM4_THRESHOLD\n#define MUL_TOOM4_THRESHOLD          mul_toom4_threshold\nextern mp_size_t                     mul_toom4_threshold;\n\n#undef  MUL_TOOM8H_THRESHOLD\n#define MUL_TOOM8H_THRESHOLD         mul_toom8h_threshold\nextern mp_size_t                     mul_toom8h_threshold;\n\n#undef  MUL_FFT_THRESHOLD\n#define MUL_FFT_THRESHOLD            mul_fft_threshold\nextern mp_size_t                     mul_fft_threshold;\n\n#undef  MUL_FFT_FULL_THRESHOLD\n#define MUL_FFT_FULL_THRESHOLD       mul_fft_full_threshold\nextern mp_size_t                     mul_fft_full_threshold;\n\n/* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should\n   remain as zero (always use it). */\n#if ! HAVE_NATIVE_mpn_sqr_basecase\n#undef  SQR_BASECASE_THRESHOLD\n#define SQR_BASECASE_THRESHOLD       sqr_basecase_threshold\nextern mp_size_t                     sqr_basecase_threshold;\n#endif\n\n#if TUNE_PROGRAM_BUILD_SQR\n#undef  SQR_KARATSUBA_THRESHOLD\n#define SQR_KARATSUBA_THRESHOLD      SQR_KARATSUBA_MAX_GENERIC\n#else\n#undef  SQR_KARATSUBA_THRESHOLD\n#define SQR_KARATSUBA_THRESHOLD      sqr_karatsuba_threshold\nextern mp_size_t                     sqr_karatsuba_threshold;\n#endif\n\n#undef  SQR_TOOM3_THRESHOLD\n#define SQR_TOOM3_THRESHOLD          sqr_toom3_threshold\nextern mp_size_t                     sqr_toom3_threshold;\n\n#undef  SQR_TOOM4_THRESHOLD\n#define SQR_TOOM4_THRESHOLD          sqr_toom4_threshold\nextern mp_size_t                     sqr_toom4_threshold;\n\n#undef  SQR_TOOM8_THRESHOLD\n#define SQR_TOOM8_THRESHOLD          sqr_toom8_threshold\nextern mp_size_t                     sqr_toom8_threshold;\n\n#undef SQR_FFT_THRESHOLD\n#define SQR_FFT_THRESHOLD            sqr_fft_threshold\nextern mp_size_t                     sqr_fft_threshold;\n\n#undef SQR_FFT_FULL_THRESHOLD\n#define SQR_FFT_FULL_THRESHOLD       sqr_fft_full_threshold\nextern mp_size_t                     sqr_fft_full_threshold;\n\n#undef  MULLOW_BASECASE_THRESHOLD\n#define MULLOW_BASECASE_THRESHOLD    mullow_basecase_threshold\nextern mp_size_t                     mullow_basecase_threshold;\n\n#undef  MULLOW_DC_THRESHOLD\n#define MULLOW_DC_THRESHOLD          mullow_dc_threshold\nextern mp_size_t                     mullow_dc_threshold;\n\n#undef  MULLOW_MUL_THRESHOLD\n#define MULLOW_MUL_THRESHOLD         mullow_mul_threshold\nextern mp_size_t                     mullow_mul_threshold;\n\n#undef  MULMID_TOOM42_THRESHOLD\n#define MULMID_TOOM42_THRESHOLD      mulmid_toom42_threshold\nextern mp_size_t                     mulmid_toom42_threshold;\n\n#undef  MULHIGH_BASECASE_THRESHOLD\n#define MULHIGH_BASECASE_THRESHOLD   mulhigh_basecase_threshold\nextern mp_size_t                     mulhigh_basecase_threshold;\n\n#undef  MULHIGH_DC_THRESHOLD\n#define MULHIGH_DC_THRESHOLD         mulhigh_dc_threshold\nextern mp_size_t                     mulhigh_dc_threshold;\n\n#undef  MULHIGH_MUL_THRESHOLD\n#define MULHIGH_MUL_THRESHOLD        mulhigh_mul_threshold\nextern mp_size_t                     mulhigh_mul_threshold;\n\n#undef  MULMOD_2EXPM1_THRESHOLD\n#define MULMOD_2EXPM1_THRESHOLD      mulmod_2expm1_threshold\nextern mp_size_t                     mulmod_2expm1_threshold;\n\n#if ! UDIV_PREINV_ALWAYS\n#undef  DIV_SB_PREINV_THRESHOLD\n#define DIV_SB_PREINV_THRESHOLD      div_sb_preinv_threshold\nextern mp_size_t                     div_sb_preinv_threshold;\n#endif\n\n#undef  DC_DIV_QR_THRESHOLD\n#define DC_DIV_QR_THRESHOLD          dc_div_qr_threshold\nextern mp_size_t                     dc_div_qr_threshold;\n\n#undef  DC_BDIV_QR_THRESHOLD\n#define DC_BDIV_QR_THRESHOLD         dc_bdiv_qr_threshold\nextern mp_size_t                     dc_bdiv_qr_threshold;\n\n#undef  DC_BDIV_Q_THRESHOLD\n#define DC_BDIV_Q_THRESHOLD          dc_bdiv_q_threshold\nextern mp_size_t                     dc_bdiv_q_threshold;\n\n#undef  INV_DIV_QR_THRESHOLD\n#define INV_DIV_QR_THRESHOLD         inv_div_qr_threshold\nextern mp_size_t                     inv_div_qr_threshold;\n\n#undef  INV_DIVAPPR_Q_N_THRESHOLD\n#define INV_DIVAPPR_Q_N_THRESHOLD    inv_divappr_q_n_threshold\nextern mp_size_t                     inv_divappr_q_n_threshold;\n\n#undef  DC_DIV_Q_THRESHOLD\n#define DC_DIV_Q_THRESHOLD           dc_div_q_threshold\nextern mp_size_t                     dc_div_q_threshold;\n\n#undef  INV_DIV_Q_THRESHOLD\n#define INV_DIV_Q_THRESHOLD          inv_div_q_threshold\nextern mp_size_t                     inv_div_q_threshold;\n\n#undef BINV_NEWTON_THRESHOLD\n#define BINV_NEWTON_THRESHOLD        binv_newton_threshold\nextern mp_size_t                     binv_newton_threshold;\n\n#undef  DC_DIVAPPR_Q_THRESHOLD\n#define DC_DIVAPPR_Q_THRESHOLD       dc_divappr_q_threshold\nextern mp_size_t                     dc_divappr_q_threshold;\n\n#undef  INV_DIVAPPR_Q_THRESHOLD\n#define INV_DIVAPPR_Q_THRESHOLD      inv_divappr_q_threshold\nextern mp_size_t                     inv_divappr_q_threshold;\n\n#undef  ROOTREM_THRESHOLD\n#define ROOTREM_THRESHOLD            rootrem_threshold\nextern mp_size_t                     rootrem_threshold;\n\n#undef DIVREM_HENSEL_QR_1_THRESHOLD\n#define DIVREM_HENSEL_QR_1_THRESHOLD divrem_hensel_qr_1_threshold\nextern mp_size_t\t\t     divrem_hensel_qr_1_threshold;\n\n#undef RSH_DIVREM_HENSEL_QR_1_THRESHOLD\n#define RSH_DIVREM_HENSEL_QR_1_THRESHOLD rsh_divrem_hensel_qr_1_threshold\nextern mp_size_t\t\t     rsh_divrem_hensel_qr_1_threshold;\n\n#undef DIVREM_EUCLID_HENSEL_THRESHOLD\n#define DIVREM_EUCLID_HENSEL_THRESHOLD divrem_euclid_hensel_threshold\nextern mp_size_t\t\t       divrem_euclid_hensel_threshold;\n\n#undef  MOD_1_1_THRESHOLD\n#define MOD_1_1_THRESHOLD            mod_1_1_threshold\nextern mp_size_t                     mod_1_1_threshold;\n\n#undef  MOD_1_2_THRESHOLD\n#define MOD_1_2_THRESHOLD            mod_1_2_threshold\nextern mp_size_t                     mod_1_2_threshold;\n\n#undef  MOD_1_3_THRESHOLD\n#define MOD_1_3_THRESHOLD            mod_1_3_threshold\nextern mp_size_t                     mod_1_3_threshold;\n\n#undef\tREDC_1_TO_REDC_2_THRESHOLD\n#define REDC_1_TO_REDC_2_THRESHOLD\tredc_1_to_redc_2_threshold\nextern mp_size_t\t\t\tredc_1_to_redc_2_threshold;\n\n#undef\tREDC_2_TO_REDC_N_THRESHOLD\n#define REDC_2_TO_REDC_N_THRESHOLD\tredc_2_to_redc_n_threshold\nextern mp_size_t\t\t\tredc_2_to_redc_n_threshold;\n\n#undef\tREDC_1_TO_REDC_N_THRESHOLD\n#define REDC_1_TO_REDC_N_THRESHOLD\tredc_1_to_redc_n_threshold\nextern mp_size_t\t\t\tredc_1_to_redc_n_threshold;\n\n#undef\tMATRIX22_STRASSEN_THRESHOLD\n#define MATRIX22_STRASSEN_THRESHOLD\tmatrix22_strassen_threshold\nextern mp_size_t\t\t\tmatrix22_strassen_threshold;\n\n#undef\tHGCD_THRESHOLD\n#define HGCD_THRESHOLD\t\t\thgcd_threshold\nextern mp_size_t\t\t\thgcd_threshold;\n\n#undef\tHGCD_APPR_THRESHOLD\n#define HGCD_APPR_THRESHOLD\t\thgcd_appr_threshold\nextern mp_size_t\t\t\thgcd_appr_threshold;\n\n#undef\tHGCD_REDUCE_THRESHOLD\n#define HGCD_REDUCE_THRESHOLD\t\thgcd_reduce_threshold\nextern mp_size_t\t\t\thgcd_reduce_threshold;\n\n#undef\tGCD_DC_THRESHOLD\n#define GCD_DC_THRESHOLD\t\tgcd_dc_threshold\nextern mp_size_t\t\t\tgcd_dc_threshold;\n\n#undef  GCDEXT_DC_THRESHOLD\n#define GCDEXT_DC_THRESHOLD\t\tgcdext_dc_threshold\nextern mp_size_t\t\t\tgcdext_dc_threshold;\n\n#undef DIVREM_1_NORM_THRESHOLD\n#define DIVREM_1_NORM_THRESHOLD      divrem_1_norm_threshold\nextern mp_size_t                     divrem_1_norm_threshold;\n\n#undef DIVREM_1_UNNORM_THRESHOLD\n#define DIVREM_1_UNNORM_THRESHOLD    divrem_1_unnorm_threshold\nextern mp_size_t                     divrem_1_unnorm_threshold;\n\n#undef MOD_1_NORM_THRESHOLD\n#define MOD_1_NORM_THRESHOLD         mod_1_norm_threshold\nextern mp_size_t                     mod_1_norm_threshold;\n\n#undef MOD_1_UNNORM_THRESHOLD\n#define MOD_1_UNNORM_THRESHOLD       mod_1_unnorm_threshold\nextern mp_size_t                     mod_1_unnorm_threshold;\n\n#if ! UDIV_PREINV_ALWAYS\n#undef  DIVREM_2_THRESHOLD\n#define DIVREM_2_THRESHOLD           divrem_2_threshold\nextern mp_size_t                     divrem_2_threshold;\n#endif\n\n#undef  GET_STR_DC_THRESHOLD\n#define GET_STR_DC_THRESHOLD         get_str_dc_threshold\nextern mp_size_t                     get_str_dc_threshold;\n\n#undef GET_STR_PRECOMPUTE_THRESHOLD\n#define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold\nextern mp_size_t                     get_str_precompute_threshold;\n\n#undef\tSET_STR_DC_THRESHOLD\n#define SET_STR_DC_THRESHOLD\t\tset_str_dc_threshold\nextern mp_size_t\t\t\tset_str_dc_threshold;\n\n#undef  SET_STR_PRECOMPUTE_THRESHOLD\n#define SET_STR_PRECOMPUTE_THRESHOLD\tset_str_precompute_threshold\nextern mp_size_t\t\t\tset_str_precompute_threshold;\n\n#undef  FAC_ODD_THRESHOLD\n#define FAC_ODD_THRESHOLD\t\tfac_odd_threshold\nextern  mp_size_t\t\t\tfac_odd_threshold;\n\n#undef  FAC_DSC_THRESHOLD\n#define FAC_DSC_THRESHOLD\t\tfac_dsc_threshold\nextern  mp_size_t\t\t\tfac_dsc_threshold;\n\n/* Sizes the tune program tests up to, used in a couple of recompilations. */\n#undef MUL_KARATSUBA_THRESHOLD_LIMIT\n#undef MUL_TOOM3_THRESHOLD_LIMIT\n#undef MUL_TOOM4_THRESHOLD_LIMIT\n#undef MUL_TOOM8H_THRESHOLD_LIMIT\n#undef MULLOW_BASECASE_THRESHOLD_LIMIT\n#undef SQR_TOOM3_THRESHOLD_LIMIT\n#undef SQR_TOOM4_THRESHOLD_LIMIT\n#undef SQR_TOOM8_THRESHOLD_LIMIT\n#define SQR_KARATSUBA_MAX_GENERIC       200\n#define MUL_KARATSUBA_THRESHOLD_LIMIT   700\n#define MUL_TOOM3_THRESHOLD_LIMIT       700\n#define MUL_TOOM4_THRESHOLD_LIMIT       1000\n#define MUL_TOOM8H_THRESHOLD_LIMIT      2000\n#define MULLOW_BASECASE_THRESHOLD_LIMIT 200\n#define SQR_TOOM3_THRESHOLD_LIMIT       400\n#define SQR_TOOM4_THRESHOLD_LIMIT       1000\n#define SQR_TOOM8_THRESHOLD_LIMIT       2000\n#define GET_STR_THRESHOLD_LIMIT         150\n#define FAC_DSC_THRESHOLD_LIMIT         2048\n\n#endif /* TUNE_PROGRAM_BUILD */\n\n#if defined (__cplusplus)\n}\n#endif\n\n\n#ifdef __cplusplus\n\n/* A little helper for a null-terminated __gmp_allocate_func string.\n   The destructor ensures it's freed even if an exception is thrown.\n   The len field is needed by the destructor, and can be used by anyone else\n   to avoid a second strlen pass over the data.\n\n   Since our input is a C string, using strlen is correct.  Perhaps it'd be\n   more C++-ish style to use std::char_traits<char>::length, but char_traits\n   isn't available in gcc 2.95.4.  */\n\nclass gmp_allocated_string {\n public:\n  char *str;\n  size_t len;\n  gmp_allocated_string(char *arg)\n  {\n    str = arg;\n    len = std::strlen (str);\n  }\n  ~gmp_allocated_string()\n  {\n    (*__gmp_free_func) (str, len+1);\n  }\n};\n\nstd::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);\nint __gmp_istream_set_base (std::istream &, char &, bool &, bool &);\nvoid __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);\nvoid __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);\nstd::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);\nextern const struct doprnt_funs_t  __gmp_asprintf_funs_noformat;\n\n#endif /* __cplusplus */\n\n#endif /* __GMP_IMPL_H__ */\n"
  },
  {
    "path": "gpl-2.0.txt",
    "content": "\t\t    GNU GENERAL PUBLIC LICENSE\n\t\t       Version 2, June 1991\n\n Copyright (C) 1989, 1991 Free Software Foundation, Inc.\n 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n\t\t\t    Preamble\n\n  The licenses for most software are designed to take away your\nfreedom to share and change it.  By contrast, the GNU General Public\nLicense is intended to guarantee your freedom to share and change free\nsoftware--to make sure the software is free for all its users.  This\nGeneral Public License applies to most of the Free Software\nFoundation's software and to any other program whose authors commit to\nusing it.  (Some other Free Software Foundation software is covered by\nthe GNU Lesser General Public License instead.)  You can apply it to\nyour programs, too.\n\n  When we speak of free software, we are referring to freedom, not\nprice.  Our General Public Licenses are designed to make sure that you\nhave the freedom to distribute copies of free software (and charge for\nthis service if you wish), that you receive source code or can get it\nif you want it, that you can change the software or use pieces of it\nin new free programs; and that you know you can do these things.\n\n  To protect your rights, we need to make restrictions that forbid\nanyone to deny you these rights or to ask you to surrender the rights.\nThese restrictions translate to certain responsibilities for you if you\ndistribute copies of the software, or if you modify it.\n\n  For example, if you distribute copies of such a program, whether\ngratis or for a fee, you must give the recipients all the rights that\nyou have.  You must make sure that they, too, receive or can get the\nsource code.  And you must show them these terms so they know their\nrights.\n\n  We protect your rights with two steps: (1) copyright the software, and\n(2) offer you this license which gives you legal permission to copy,\ndistribute and/or modify the software.\n\n  Also, for each author's protection and ours, we want to make certain\nthat everyone understands that there is no warranty for this free\nsoftware.  If the software is modified by someone else and passed on, we\nwant its recipients to know that what they have is not the original, so\nthat any problems introduced by others will not reflect on the original\nauthors' reputations.\n\n  Finally, any free program is threatened constantly by software\npatents.  We wish to avoid the danger that redistributors of a free\nprogram will individually obtain patent licenses, in effect making the\nprogram proprietary.  To prevent this, we have made it clear that any\npatent must be licensed for everyone's free use or not licensed at all.\n\n  The precise terms and conditions for copying, distribution and\nmodification follow.\n\n\t\t    GNU GENERAL PUBLIC LICENSE\n   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION\n\n  0. This License applies to any program or other work which contains\na notice placed by the copyright holder saying it may be distributed\nunder the terms of this General Public License.  The \"Program\", below,\nrefers to any such program or work, and a \"work based on the Program\"\nmeans either the Program or any derivative work under copyright law:\nthat is to say, a work containing the Program or a portion of it,\neither verbatim or with modifications and/or translated into another\nlanguage.  (Hereinafter, translation is included without limitation in\nthe term \"modification\".)  Each licensee is addressed as \"you\".\n\nActivities other than copying, distribution and modification are not\ncovered by this License; they are outside its scope.  The act of\nrunning the Program is not restricted, and the output from the Program\nis covered only if its contents constitute a work based on the\nProgram (independent of having been made by running the Program).\nWhether that is true depends on what the Program does.\n\n  1. You may copy and distribute verbatim copies of the Program's\nsource code as you receive it, in any medium, provided that you\nconspicuously and appropriately publish on each copy an appropriate\ncopyright notice and disclaimer of warranty; keep intact all the\nnotices that refer to this License and to the absence of any warranty;\nand give any other recipients of the Program a copy of this License\nalong with the Program.\n\nYou may charge a fee for the physical act of transferring a copy, and\nyou may at your option offer warranty protection in exchange for a fee.\n\n  2. You may modify your copy or copies of the Program or any portion\nof it, thus forming a work based on the Program, and copy and\ndistribute such modifications or work under the terms of Section 1\nabove, provided that you also meet all of these conditions:\n\n    a) You must cause the modified files to carry prominent notices\n    stating that you changed the files and the date of any change.\n\n    b) You must cause any work that you distribute or publish, that in\n    whole or in part contains or is derived from the Program or any\n    part thereof, to be licensed as a whole at no charge to all third\n    parties under the terms of this License.\n\n    c) If the modified program normally reads commands interactively\n    when run, you must cause it, when started running for such\n    interactive use in the most ordinary way, to print or display an\n    announcement including an appropriate copyright notice and a\n    notice that there is no warranty (or else, saying that you provide\n    a warranty) and that users may redistribute the program under\n    these conditions, and telling the user how to view a copy of this\n    License.  (Exception: if the Program itself is interactive but\n    does not normally print such an announcement, your work based on\n    the Program is not required to print an announcement.)\n\nThese requirements apply to the modified work as a whole.  If\nidentifiable sections of that work are not derived from the Program,\nand can be reasonably considered independent and separate works in\nthemselves, then this License, and its terms, do not apply to those\nsections when you distribute them as separate works.  But when you\ndistribute the same sections as part of a whole which is a work based\non the Program, the distribution of the whole must be on the terms of\nthis License, whose permissions for other licensees extend to the\nentire whole, and thus to each and every part regardless of who wrote it.\n\nThus, it is not the intent of this section to claim rights or contest\nyour rights to work written entirely by you; rather, the intent is to\nexercise the right to control the distribution of derivative or\ncollective works based on the Program.\n\nIn addition, mere aggregation of another work not based on the Program\nwith the Program (or with a work based on the Program) on a volume of\na storage or distribution medium does not bring the other work under\nthe scope of this License.\n\n  3. You may copy and distribute the Program (or a work based on it,\nunder Section 2) in object code or executable form under the terms of\nSections 1 and 2 above provided that you also do one of the following:\n\n    a) Accompany it with the complete corresponding machine-readable\n    source code, which must be distributed under the terms of Sections\n    1 and 2 above on a medium customarily used for software interchange; or,\n\n    b) Accompany it with a written offer, valid for at least three\n    years, to give any third party, for a charge no more than your\n    cost of physically performing source distribution, a complete\n    machine-readable copy of the corresponding source code, to be\n    distributed under the terms of Sections 1 and 2 above on a medium\n    customarily used for software interchange; or,\n\n    c) Accompany it with the information you received as to the offer\n    to distribute corresponding source code.  (This alternative is\n    allowed only for noncommercial distribution and only if you\n    received the program in object code or executable form with such\n    an offer, in accord with Subsection b above.)\n\nThe source code for a work means the preferred form of the work for\nmaking modifications to it.  For an executable work, complete source\ncode means all the source code for all modules it contains, plus any\nassociated interface definition files, plus the scripts used to\ncontrol compilation and installation of the executable.  However, as a\nspecial exception, the source code distributed need not include\nanything that is normally distributed (in either source or binary\nform) with the major components (compiler, kernel, and so on) of the\noperating system on which the executable runs, unless that component\nitself accompanies the executable.\n\nIf distribution of executable or object code is made by offering\naccess to copy from a designated place, then offering equivalent\naccess to copy the source code from the same place counts as\ndistribution of the source code, even though third parties are not\ncompelled to copy the source along with the object code.\n\n  4. You may not copy, modify, sublicense, or distribute the Program\nexcept as expressly provided under this License.  Any attempt\notherwise to copy, modify, sublicense or distribute the Program is\nvoid, and will automatically terminate your rights under this License.\nHowever, parties who have received copies, or rights, from you under\nthis License will not have their licenses terminated so long as such\nparties remain in full compliance.\n\n  5. You are not required to accept this License, since you have not\nsigned it.  However, nothing else grants you permission to modify or\ndistribute the Program or its derivative works.  These actions are\nprohibited by law if you do not accept this License.  Therefore, by\nmodifying or distributing the Program (or any work based on the\nProgram), you indicate your acceptance of this License to do so, and\nall its terms and conditions for copying, distributing or modifying\nthe Program or works based on it.\n\n  6. Each time you redistribute the Program (or any work based on the\nProgram), the recipient automatically receives a license from the\noriginal licensor to copy, distribute or modify the Program subject to\nthese terms and conditions.  You may not impose any further\nrestrictions on the recipients' exercise of the rights granted herein.\nYou are not responsible for enforcing compliance by third parties to\nthis License.\n\n  7. If, as a consequence of a court judgment or allegation of patent\ninfringement or for any other reason (not limited to patent issues),\nconditions are imposed on you (whether by court order, agreement or\notherwise) that contradict the conditions of this License, they do not\nexcuse you from the conditions of this License.  If you cannot\ndistribute so as to satisfy simultaneously your obligations under this\nLicense and any other pertinent obligations, then as a consequence you\nmay not distribute the Program at all.  For example, if a patent\nlicense would not permit royalty-free redistribution of the Program by\nall those who receive copies directly or indirectly through you, then\nthe only way you could satisfy both it and this License would be to\nrefrain entirely from distribution of the Program.\n\nIf any portion of this section is held invalid or unenforceable under\nany particular circumstance, the balance of the section is intended to\napply and the section as a whole is intended to apply in other\ncircumstances.\n\nIt is not the purpose of this section to induce you to infringe any\npatents or other property right claims or to contest validity of any\nsuch claims; this section has the sole purpose of protecting the\nintegrity of the free software distribution system, which is\nimplemented by public license practices.  Many people have made\ngenerous contributions to the wide range of software distributed\nthrough that system in reliance on consistent application of that\nsystem; it is up to the author/donor to decide if he or she is willing\nto distribute software through any other system and a licensee cannot\nimpose that choice.\n\nThis section is intended to make thoroughly clear what is believed to\nbe a consequence of the rest of this License.\n\n  8. If the distribution and/or use of the Program is restricted in\ncertain countries either by patents or by copyrighted interfaces, the\noriginal copyright holder who places the Program under this License\nmay add an explicit geographical distribution limitation excluding\nthose countries, so that distribution is permitted only in or among\ncountries not thus excluded.  In such case, this License incorporates\nthe limitation as if written in the body of this License.\n\n  9. The Free Software Foundation may publish revised and/or new versions\nof the General Public License from time to time.  Such new versions will\nbe similar in spirit to the present version, but may differ in detail to\naddress new problems or concerns.\n\nEach version is given a distinguishing version number.  If the Program\nspecifies a version number of this License which applies to it and \"any\nlater version\", you have the option of following the terms and conditions\neither of that version or of any later version published by the Free\nSoftware Foundation.  If the Program does not specify a version number of\nthis License, you may choose any version ever published by the Free Software\nFoundation.\n\n  10. If you wish to incorporate parts of the Program into other free\nprograms whose distribution conditions are different, write to the author\nto ask for permission.  For software which is copyrighted by the Free\nSoftware Foundation, write to the Free Software Foundation; we sometimes\nmake exceptions for this.  Our decision will be guided by the two goals\nof preserving the free status of all derivatives of our free software and\nof promoting the sharing and reuse of software generally.\n\n\t\t\t    NO WARRANTY\n\n  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY\nFOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN\nOTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES\nPROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED\nOR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\nMERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS\nTO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE\nPROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,\nREPAIR OR CORRECTION.\n\n  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\nWILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR\nREDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,\nINCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING\nOUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED\nTO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY\nYOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER\nPROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE\nPOSSIBILITY OF SUCH DAMAGES.\n\n\t\t     END OF TERMS AND CONDITIONS\n\n\t    How to Apply These Terms to Your New Programs\n\n  If you develop a new program, and you want it to be of the greatest\npossible use to the public, the best way to achieve this is to make it\nfree software which everyone can redistribute and change under these terms.\n\n  To do so, attach the following notices to the program.  It is safest\nto attach them to the start of each source file to most effectively\nconvey the exclusion of warranty; and each file should have at least\nthe \"copyright\" line and a pointer to where the full notice is found.\n\n    <one line to give the program's name and a brief idea of what it does.>\n    Copyright (C) <year>  <name of author>\n\n    This program is free software; you can redistribute it and/or modify\n    it under the terms of the GNU General Public License as published by\n    the Free Software Foundation; either version 2 of the License, or\n    (at your option) any later version.\n\n    This program is distributed in the hope that it will be useful,\n    but WITHOUT ANY WARRANTY; without even the implied warranty of\n    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n    GNU General Public License for more details.\n\n    You should have received a copy of the GNU General Public License\n    along with this program; if not, write to the Free Software\n    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA\n\n\nAlso add information on how to contact you by electronic and paper mail.\n\nIf the program is interactive, make it output a short notice like this\nwhen it starts in an interactive mode:\n\n    Gnomovision version 69, Copyright (C) year name of author\n    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.\n    This is free software, and you are welcome to redistribute it\n    under certain conditions; type `show c' for details.\n\nThe hypothetical commands `show w' and `show c' should show the appropriate\nparts of the General Public License.  Of course, the commands you use may\nbe called something other than `show w' and `show c'; they could even be\nmouse-clicks or menu items--whatever suits your program.\n\nYou should also get your employer (if you work as a programmer) or your\nschool, if any, to sign a \"copyright disclaimer\" for the program, if\nnecessary.  Here is a sample; alter the names:\n\n  Yoyodyne, Inc., hereby disclaims all copyright interest in the program\n  `Gnomovision' (which makes passes at compilers) written by James Hacker.\n\n  <signature of Ty Coon>, 1 April 1989\n  Ty Coon, President of Vice\n\nThis General Public License does not permit incorporating your program into\nproprietary programs.  If your program is a subroutine library, you may\nconsider it more useful to permit linking proprietary applications with the\nlibrary.  If this is what you want to do, use the GNU Lesser General\nPublic License instead of this License.\n"
  },
  {
    "path": "invalid.c",
    "content": "/* __gmp_invalid_operation -- invalid floating point operation.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#include <signal.h>\n#include <stdlib.h>\n\n#if HAVE_UNISTD_H\n#include <unistd.h>  /* for getpid */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Incidentally, kill is not available on mingw, but that's ok, it has raise\n   and we'll be using that.  */\n#if ! HAVE_RAISE\n#define raise(sig)   kill (getpid(), sig)\n#endif\n\n\n/* __gmp_invalid_operation is for an invalid floating point operation, like\n   mpz_set_d on a NaN or Inf.  It's done as a subroutine to minimize code in\n   places raising an exception.\n\n   feraiseexcept(FE_INVALID) is not used here, since unfortunately on most\n   systems it would require libm.\n\n   Alternatives:\n\n   It might be possible to check whether a hardware \"invalid operation\" trap\n   is enabled or not before raising a signal.  This would require all\n   callers to be prepared to continue with some bogus result.  Bogus returns\n   are bad, but presumably an application disabling the trap is prepared for\n   that.\n\n   On some systems (eg. BSD) the signal handler can find out the reason for\n   a SIGFPE (overflow, invalid, div-by-zero, etc).  Perhaps we could get\n   that into our raise too.\n\n   i386 GLIBC implements feraiseexcept(FE_INVALID) with an asm fdiv 0/0.\n   That would both respect the exceptions mask and give a reason code in a\n   BSD signal.  */\n\nvoid\n__gmp_invalid_operation (void)\n{\n  raise (SIGFPE);\n  abort ();\n}\n"
  },
  {
    "path": "lgpl-2.1.txt",
    "content": "\t\t  GNU LESSER GENERAL PUBLIC LICENSE\n\t\t       Version 2.1, February 1999\n\n Copyright (C) 1991, 1999 Free Software Foundation, Inc.\n 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n[This is the first released version of the Lesser GPL.  It also counts\n as the successor of the GNU Library Public License, version 2, hence\n the version number 2.1.]\n\n\t\t\t    Preamble\n\n  The licenses for most software are designed to take away your\nfreedom to share and change it.  By contrast, the GNU General Public\nLicenses are intended to guarantee your freedom to share and change\nfree software--to make sure the software is free for all its users.\n\n  This license, the Lesser General Public License, applies to some\nspecially designated software packages--typically libraries--of the\nFree Software Foundation and other authors who decide to use it.  You\ncan use it too, but we suggest you first think carefully about whether\nthis license or the ordinary General Public License is the better\nstrategy to use in any particular case, based on the explanations below.\n\n  When we speak of free software, we are referring to freedom of use,\nnot price.  Our General Public Licenses are designed to make sure that\nyou have the freedom to distribute copies of free software (and charge\nfor this service if you wish); that you receive source code or can get\nit if you want it; that you can change the software and use pieces of\nit in new free programs; and that you are informed that you can do\nthese things.\n\n  To protect your rights, we need to make restrictions that forbid\ndistributors to deny you these rights or to ask you to surrender these\nrights.  These restrictions translate to certain responsibilities for\nyou if you distribute copies of the library or if you modify it.\n\n  For example, if you distribute copies of the library, whether gratis\nor for a fee, you must give the recipients all the rights that we gave\nyou.  You must make sure that they, too, receive or can get the source\ncode.  If you link other code with the library, you must provide\ncomplete object files to the recipients, so that they can relink them\nwith the library after making changes to the library and recompiling\nit.  And you must show them these terms so they know their rights.\n\n  We protect your rights with a two-step method: (1) we copyright the\nlibrary, and (2) we offer you this license, which gives you legal\npermission to copy, distribute and/or modify the library.\n\n  To protect each distributor, we want to make it very clear that\nthere is no warranty for the free library.  Also, if the library is\nmodified by someone else and passed on, the recipients should know\nthat what they have is not the original version, so that the original\nauthor's reputation will not be affected by problems that might be\nintroduced by others.\n\f\n  Finally, software patents pose a constant threat to the existence of\nany free program.  We wish to make sure that a company cannot\neffectively restrict the users of a free program by obtaining a\nrestrictive license from a patent holder.  Therefore, we insist that\nany patent license obtained for a version of the library must be\nconsistent with the full freedom of use specified in this license.\n\n  Most GNU software, including some libraries, is covered by the\nordinary GNU General Public License.  This license, the GNU Lesser\nGeneral Public License, applies to certain designated libraries, and\nis quite different from the ordinary General Public License.  We use\nthis license for certain libraries in order to permit linking those\nlibraries into non-free programs.\n\n  When a program is linked with a library, whether statically or using\na shared library, the combination of the two is legally speaking a\ncombined work, a derivative of the original library.  The ordinary\nGeneral Public License therefore permits such linking only if the\nentire combination fits its criteria of freedom.  The Lesser General\nPublic License permits more lax criteria for linking other code with\nthe library.\n\n  We call this license the \"Lesser\" General Public License because it\ndoes Less to protect the user's freedom than the ordinary General\nPublic License.  It also provides other free software developers Less\nof an advantage over competing non-free programs.  These disadvantages\nare the reason we use the ordinary General Public License for many\nlibraries.  However, the Lesser license provides advantages in certain\nspecial circumstances.\n\n  For example, on rare occasions, there may be a special need to\nencourage the widest possible use of a certain library, so that it becomes\na de-facto standard.  To achieve this, non-free programs must be\nallowed to use the library.  A more frequent case is that a free\nlibrary does the same job as widely used non-free libraries.  In this\ncase, there is little to gain by limiting the free library to free\nsoftware only, so we use the Lesser General Public License.\n\n  In other cases, permission to use a particular library in non-free\nprograms enables a greater number of people to use a large body of\nfree software.  For example, permission to use the GNU C Library in\nnon-free programs enables many more people to use the whole GNU\noperating system, as well as its variant, the GNU/Linux operating\nsystem.\n\n  Although the Lesser General Public License is Less protective of the\nusers' freedom, it does ensure that the user of a program that is\nlinked with the Library has the freedom and the wherewithal to run\nthat program using a modified version of the Library.\n\n  The precise terms and conditions for copying, distribution and\nmodification follow.  Pay close attention to the difference between a\n\"work based on the library\" and a \"work that uses the library\".  The\nformer contains code derived from the library, whereas the latter must\nbe combined with the library in order to run.\n\f\n\t\t  GNU LESSER GENERAL PUBLIC LICENSE\n   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION\n\n  0. This License Agreement applies to any software library or other\nprogram which contains a notice placed by the copyright holder or\nother authorized party saying it may be distributed under the terms of\nthis Lesser General Public License (also called \"this License\").\nEach licensee is addressed as \"you\".\n\n  A \"library\" means a collection of software functions and/or data\nprepared so as to be conveniently linked with application programs\n(which use some of those functions and data) to form executables.\n\n  The \"Library\", below, refers to any such software library or work\nwhich has been distributed under these terms.  A \"work based on the\nLibrary\" means either the Library or any derivative work under\ncopyright law: that is to say, a work containing the Library or a\nportion of it, either verbatim or with modifications and/or translated\nstraightforwardly into another language.  (Hereinafter, translation is\nincluded without limitation in the term \"modification\".)\n\n  \"Source code\" for a work means the preferred form of the work for\nmaking modifications to it.  For a library, complete source code means\nall the source code for all modules it contains, plus any associated\ninterface definition files, plus the scripts used to control compilation\nand installation of the library.\n\n  Activities other than copying, distribution and modification are not\ncovered by this License; they are outside its scope.  The act of\nrunning a program using the Library is not restricted, and output from\nsuch a program is covered only if its contents constitute a work based\non the Library (independent of the use of the Library in a tool for\nwriting it).  Whether that is true depends on what the Library does\nand what the program that uses the Library does.\n  \n  1. You may copy and distribute verbatim copies of the Library's\ncomplete source code as you receive it, in any medium, provided that\nyou conspicuously and appropriately publish on each copy an\nappropriate copyright notice and disclaimer of warranty; keep intact\nall the notices that refer to this License and to the absence of any\nwarranty; and distribute a copy of this License along with the\nLibrary.\n\n  You may charge a fee for the physical act of transferring a copy,\nand you may at your option offer warranty protection in exchange for a\nfee.\n\f\n  2. You may modify your copy or copies of the Library or any portion\nof it, thus forming a work based on the Library, and copy and\ndistribute such modifications or work under the terms of Section 1\nabove, provided that you also meet all of these conditions:\n\n    a) The modified work must itself be a software library.\n\n    b) You must cause the files modified to carry prominent notices\n    stating that you changed the files and the date of any change.\n\n    c) You must cause the whole of the work to be licensed at no\n    charge to all third parties under the terms of this License.\n\n    d) If a facility in the modified Library refers to a function or a\n    table of data to be supplied by an application program that uses\n    the facility, other than as an argument passed when the facility\n    is invoked, then you must make a good faith effort to ensure that,\n    in the event an application does not supply such function or\n    table, the facility still operates, and performs whatever part of\n    its purpose remains meaningful.\n\n    (For example, a function in a library to compute square roots has\n    a purpose that is entirely well-defined independent of the\n    application.  Therefore, Subsection 2d requires that any\n    application-supplied function or table used by this function must\n    be optional: if the application does not supply it, the square\n    root function must still compute square roots.)\n\nThese requirements apply to the modified work as a whole.  If\nidentifiable sections of that work are not derived from the Library,\nand can be reasonably considered independent and separate works in\nthemselves, then this License, and its terms, do not apply to those\nsections when you distribute them as separate works.  But when you\ndistribute the same sections as part of a whole which is a work based\non the Library, the distribution of the whole must be on the terms of\nthis License, whose permissions for other licensees extend to the\nentire whole, and thus to each and every part regardless of who wrote\nit.\n\nThus, it is not the intent of this section to claim rights or contest\nyour rights to work written entirely by you; rather, the intent is to\nexercise the right to control the distribution of derivative or\ncollective works based on the Library.\n\nIn addition, mere aggregation of another work not based on the Library\nwith the Library (or with a work based on the Library) on a volume of\na storage or distribution medium does not bring the other work under\nthe scope of this License.\n\n  3. You may opt to apply the terms of the ordinary GNU General Public\nLicense instead of this License to a given copy of the Library.  To do\nthis, you must alter all the notices that refer to this License, so\nthat they refer to the ordinary GNU General Public License, version 2,\ninstead of to this License.  (If a newer version than version 2 of the\nordinary GNU General Public License has appeared, then you can specify\nthat version instead if you wish.)  Do not make any other change in\nthese notices.\n\f\n  Once this change is made in a given copy, it is irreversible for\nthat copy, so the ordinary GNU General Public License applies to all\nsubsequent copies and derivative works made from that copy.\n\n  This option is useful when you wish to copy part of the code of\nthe Library into a program that is not a library.\n\n  4. You may copy and distribute the Library (or a portion or\nderivative of it, under Section 2) in object code or executable form\nunder the terms of Sections 1 and 2 above provided that you accompany\nit with the complete corresponding machine-readable source code, which\nmust be distributed under the terms of Sections 1 and 2 above on a\nmedium customarily used for software interchange.\n\n  If distribution of object code is made by offering access to copy\nfrom a designated place, then offering equivalent access to copy the\nsource code from the same place satisfies the requirement to\ndistribute the source code, even though third parties are not\ncompelled to copy the source along with the object code.\n\n  5. A program that contains no derivative of any portion of the\nLibrary, but is designed to work with the Library by being compiled or\nlinked with it, is called a \"work that uses the Library\".  Such a\nwork, in isolation, is not a derivative work of the Library, and\ntherefore falls outside the scope of this License.\n\n  However, linking a \"work that uses the Library\" with the Library\ncreates an executable that is a derivative of the Library (because it\ncontains portions of the Library), rather than a \"work that uses the\nlibrary\".  The executable is therefore covered by this License.\nSection 6 states terms for distribution of such executables.\n\n  When a \"work that uses the Library\" uses material from a header file\nthat is part of the Library, the object code for the work may be a\nderivative work of the Library even though the source code is not.\nWhether this is true is especially significant if the work can be\nlinked without the Library, or if the work is itself a library.  The\nthreshold for this to be true is not precisely defined by law.\n\n  If such an object file uses only numerical parameters, data\nstructure layouts and accessors, and small macros and small inline\nfunctions (ten lines or less in length), then the use of the object\nfile is unrestricted, regardless of whether it is legally a derivative\nwork.  (Executables containing this object code plus portions of the\nLibrary will still fall under Section 6.)\n\n  Otherwise, if the work is a derivative of the Library, you may\ndistribute the object code for the work under the terms of Section 6.\nAny executables containing that work also fall under Section 6,\nwhether or not they are linked directly with the Library itself.\n\f\n  6. As an exception to the Sections above, you may also combine or\nlink a \"work that uses the Library\" with the Library to produce a\nwork containing portions of the Library, and distribute that work\nunder terms of your choice, provided that the terms permit\nmodification of the work for the customer's own use and reverse\nengineering for debugging such modifications.\n\n  You must give prominent notice with each copy of the work that the\nLibrary is used in it and that the Library and its use are covered by\nthis License.  You must supply a copy of this License.  If the work\nduring execution displays copyright notices, you must include the\ncopyright notice for the Library among them, as well as a reference\ndirecting the user to the copy of this License.  Also, you must do one\nof these things:\n\n    a) Accompany the work with the complete corresponding\n    machine-readable source code for the Library including whatever\n    changes were used in the work (which must be distributed under\n    Sections 1 and 2 above); and, if the work is an executable linked\n    with the Library, with the complete machine-readable \"work that\n    uses the Library\", as object code and/or source code, so that the\n    user can modify the Library and then relink to produce a modified\n    executable containing the modified Library.  (It is understood\n    that the user who changes the contents of definitions files in the\n    Library will not necessarily be able to recompile the application\n    to use the modified definitions.)\n\n    b) Use a suitable shared library mechanism for linking with the\n    Library.  A suitable mechanism is one that (1) uses at run time a\n    copy of the library already present on the user's computer system,\n    rather than copying library functions into the executable, and (2)\n    will operate properly with a modified version of the library, if\n    the user installs one, as long as the modified version is\n    interface-compatible with the version that the work was made with.\n\n    c) Accompany the work with a written offer, valid for at\n    least three years, to give the same user the materials\n    specified in Subsection 6a, above, for a charge no more\n    than the cost of performing this distribution.\n\n    d) If distribution of the work is made by offering access to copy\n    from a designated place, offer equivalent access to copy the above\n    specified materials from the same place.\n\n    e) Verify that the user has already received a copy of these\n    materials or that you have already sent this user a copy.\n\n  For an executable, the required form of the \"work that uses the\nLibrary\" must include any data and utility programs needed for\nreproducing the executable from it.  However, as a special exception,\nthe materials to be distributed need not include anything that is\nnormally distributed (in either source or binary form) with the major\ncomponents (compiler, kernel, and so on) of the operating system on\nwhich the executable runs, unless that component itself accompanies\nthe executable.\n\n  It may happen that this requirement contradicts the license\nrestrictions of other proprietary libraries that do not normally\naccompany the operating system.  Such a contradiction means you cannot\nuse both them and the Library together in an executable that you\ndistribute.\n\f\n  7. You may place library facilities that are a work based on the\nLibrary side-by-side in a single library together with other library\nfacilities not covered by this License, and distribute such a combined\nlibrary, provided that the separate distribution of the work based on\nthe Library and of the other library facilities is otherwise\npermitted, and provided that you do these two things:\n\n    a) Accompany the combined library with a copy of the same work\n    based on the Library, uncombined with any other library\n    facilities.  This must be distributed under the terms of the\n    Sections above.\n\n    b) Give prominent notice with the combined library of the fact\n    that part of it is a work based on the Library, and explaining\n    where to find the accompanying uncombined form of the same work.\n\n  8. You may not copy, modify, sublicense, link with, or distribute\nthe Library except as expressly provided under this License.  Any\nattempt otherwise to copy, modify, sublicense, link with, or\ndistribute the Library is void, and will automatically terminate your\nrights under this License.  However, parties who have received copies,\nor rights, from you under this License will not have their licenses\nterminated so long as such parties remain in full compliance.\n\n  9. You are not required to accept this License, since you have not\nsigned it.  However, nothing else grants you permission to modify or\ndistribute the Library or its derivative works.  These actions are\nprohibited by law if you do not accept this License.  Therefore, by\nmodifying or distributing the Library (or any work based on the\nLibrary), you indicate your acceptance of this License to do so, and\nall its terms and conditions for copying, distributing or modifying\nthe Library or works based on it.\n\n  10. Each time you redistribute the Library (or any work based on the\nLibrary), the recipient automatically receives a license from the\noriginal licensor to copy, distribute, link with or modify the Library\nsubject to these terms and conditions.  You may not impose any further\nrestrictions on the recipients' exercise of the rights granted herein.\nYou are not responsible for enforcing compliance by third parties with\nthis License.\n\f\n  11. If, as a consequence of a court judgment or allegation of patent\ninfringement or for any other reason (not limited to patent issues),\nconditions are imposed on you (whether by court order, agreement or\notherwise) that contradict the conditions of this License, they do not\nexcuse you from the conditions of this License.  If you cannot\ndistribute so as to satisfy simultaneously your obligations under this\nLicense and any other pertinent obligations, then as a consequence you\nmay not distribute the Library at all.  For example, if a patent\nlicense would not permit royalty-free redistribution of the Library by\nall those who receive copies directly or indirectly through you, then\nthe only way you could satisfy both it and this License would be to\nrefrain entirely from distribution of the Library.\n\nIf any portion of this section is held invalid or unenforceable under any\nparticular circumstance, the balance of the section is intended to apply,\nand the section as a whole is intended to apply in other circumstances.\n\nIt is not the purpose of this section to induce you to infringe any\npatents or other property right claims or to contest validity of any\nsuch claims; this section has the sole purpose of protecting the\nintegrity of the free software distribution system which is\nimplemented by public license practices.  Many people have made\ngenerous contributions to the wide range of software distributed\nthrough that system in reliance on consistent application of that\nsystem; it is up to the author/donor to decide if he or she is willing\nto distribute software through any other system and a licensee cannot\nimpose that choice.\n\nThis section is intended to make thoroughly clear what is believed to\nbe a consequence of the rest of this License.\n\n  12. If the distribution and/or use of the Library is restricted in\ncertain countries either by patents or by copyrighted interfaces, the\noriginal copyright holder who places the Library under this License may add\nan explicit geographical distribution limitation excluding those countries,\nso that distribution is permitted only in or among countries not thus\nexcluded.  In such case, this License incorporates the limitation as if\nwritten in the body of this License.\n\n  13. The Free Software Foundation may publish revised and/or new\nversions of the Lesser General Public License from time to time.\nSuch new versions will be similar in spirit to the present version,\nbut may differ in detail to address new problems or concerns.\n\nEach version is given a distinguishing version number.  If the Library\nspecifies a version number of this License which applies to it and\n\"any later version\", you have the option of following the terms and\nconditions either of that version or of any later version published by\nthe Free Software Foundation.  If the Library does not specify a\nlicense version number, you may choose any version ever published by\nthe Free Software Foundation.\n\f\n  14. If you wish to incorporate parts of the Library into other free\nprograms whose distribution conditions are incompatible with these,\nwrite to the author to ask for permission.  For software which is\ncopyrighted by the Free Software Foundation, write to the Free\nSoftware Foundation; we sometimes make exceptions for this.  Our\ndecision will be guided by the two goals of preserving the free status\nof all derivatives of our free software and of promoting the sharing\nand reuse of software generally.\n\n\t\t\t    NO WARRANTY\n\n  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO\nWARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.\nEXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR\nOTHER PARTIES PROVIDE THE LIBRARY \"AS IS\" WITHOUT WARRANTY OF ANY\nKIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\nPURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE\nLIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME\nTHE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n\n  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN\nWRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY\nAND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU\nFOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR\nCONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE\nLIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING\nRENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A\nFAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF\nSUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH\nDAMAGES.\n\n\t\t     END OF TERMS AND CONDITIONS\n\f\n           How to Apply These Terms to Your New Libraries\n\n  If you develop a new library, and you want it to be of the greatest\npossible use to the public, we recommend making it free software that\neveryone can redistribute and change.  You can do so by permitting\nredistribution under these terms (or, alternatively, under the terms of the\nordinary General Public License).\n\n  To apply these terms, attach the following notices to the library.  It is\nsafest to attach them to the start of each source file to most effectively\nconvey the exclusion of warranty; and each file should have at least the\n\"copyright\" line and a pointer to where the full notice is found.\n\n    <one line to give the library's name and a brief idea of what it does.>\n    Copyright (C) <year>  <name of author>\n\n    This library is free software; you can redistribute it and/or\n    modify it under the terms of the GNU Lesser General Public\n    License as published by the Free Software Foundation; either\n    version 2.1 of the License, or (at your option) any later version.\n\n    This library is distributed in the hope that it will be useful,\n    but WITHOUT ANY WARRANTY; without even the implied warranty of\n    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n    Lesser General Public License for more details.\n\n    You should have received a copy of the GNU Lesser General Public\n    License along with this library; if not, write to the Free Software\n    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA\n\nAlso add information on how to contact you by electronic and paper mail.\n\nYou should also get your employer (if you work as a programmer) or your\nschool, if any, to sign a \"copyright disclaimer\" for the library, if\nnecessary.  Here is a sample; alter the names:\n\n  Yoyodyne, Inc., hereby disclaims all copyright interest in the\n  library `Frob' (a library for tweaking knobs) written by James Random Hacker.\n\n  <signature of Ty Coon>, 1 April 1990\n  Ty Coon, President of Vice\n\nThat's all there is to it!\n\n\n"
  },
  {
    "path": "longlong_post.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 Free Software Foundation, Inc.\n\nCopyright 2013 William Hart\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/* You have to define the following before including this file:\n\n   UWtype -- An unsigned type, default type for operations (typically a \"word\")\n   UHWtype -- An unsigned type, at least half the size of UWtype.\n   UDWtype -- An unsigned type, at least twice as large a UWtype\n   W_TYPE_SIZE -- size in bits of UWtype\n\n   SItype, USItype -- Signed and unsigned 32 bit types.\n   DItype, UDItype -- Signed and unsigned 64 bit types.\n\n   On a 32 bit machine UWtype should typically be USItype;\n   on a 64 bit machine, UWtype should typically be UDItype.\n*/\n\n/* Use mpn_umul_ppmm or mpn_udiv_qrnnd functions, if they exist.  The \"_r\"\n   forms have \"reversed\" arguments, meaning the pointer is last, which\n   sometimes allows better parameter passing, in particular on 64-bit\n   hppa. */\n\n#define mpn_umul_ppmm  __MPN(umul_ppmm)\nextern UWtype mpn_umul_ppmm(UWtype *, UWtype, UWtype);\n\n#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm  \\\n  && ! defined (LONGLONG_STANDALONE)\n#define umul_ppmm(wh, wl, u, v)\t\t\t\t\t\t      \\\n  do {\t\t\t\t\t\t\t\t\t      \\\n    UWtype __umul_ppmm__p0;\t\t\t\t\t\t      \\\n    (wh) = mpn_umul_ppmm (&__umul_ppmm__p0, (UWtype) (u), (UWtype) (v));      \\\n    (wl) = __umul_ppmm__p0;\t\t\t\t\t\t      \\\n  } while (0)\n#endif\n\n#define mpn_umul_ppmm_r  __MPN(umul_ppmm_r)\nextern UWtype mpn_umul_ppmm_r(UWtype, UWtype, UWtype *);\n\n#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm_r\t\\\n  && ! defined (LONGLONG_STANDALONE)\n#define umul_ppmm(wh, wl, u, v)\t\t\t\t\t\t      \\\n  do {\t\t\t\t\t\t\t\t\t      \\\n    UWtype __umul_ppmm__p0;\t\t\t\t\t\t      \\\n    (wh) = mpn_umul_ppmm_r ((UWtype) (u), (UWtype) (v), &__umul_ppmm__p0);    \\\n    (wl) = __umul_ppmm__p0;\t\t\t\t\t\t      \\\n  } while (0)\n#endif\n\n#define mpn_udiv_qrnnd  __MPN(udiv_qrnnd)\nextern UWtype mpn_udiv_qrnnd(UWtype *, UWtype, UWtype, UWtype);\n\n#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd\t\\\n  && ! defined (LONGLONG_STANDALONE)\n#define udiv_qrnnd(q, r, n1, n0, d)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype __udiv_qrnnd__r;\t\t\t\t\t\t\\\n    (q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r,\t\t\t\t\\\n\t\t\t  (UWtype) (n1), (UWtype) (n0), (UWtype) d);\t\\\n    (r) = __udiv_qrnnd__r;\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n#define mpn_udiv_qrnnd_r  __MPN(udiv_qrnnd_r)\nextern UWtype mpn_udiv_qrnnd_r(UWtype, UWtype, UWtype, UWtype *);\n\n#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd_r\t\\\n  && ! defined (LONGLONG_STANDALONE)\n#define udiv_qrnnd(q, r, n1, n0, d)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype __udiv_qrnnd__r;\t\t\t\t\t\t\\\n    (q) = mpn_udiv_qrnnd_r ((UWtype) (n1), (UWtype) (n0), (UWtype) d,\t\\\n\t\t\t    &__udiv_qrnnd__r);\t\t\t\t\\\n    (r) = __udiv_qrnnd__r;\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n\n/* If this machine has no inline assembler, use C macros.  */\n\n#if !defined (add_ssaaaa)\n#define add_ssaaaa(sh, sl, ah, al, bh, bl) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype __x;\t\t\t\t\t\t\t\t\\\n    __x = (al) + (bl);\t\t\t\t\t\t\t\\\n    (sh) = (ah) + (bh) + (__x < (al));\t\t\t\t\t\\\n    (sl) = __x;\t\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n#if !defined (sub_ddmmss)\n#define sub_ddmmss(sh, sl, ah, al, bh, bl) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype __x;\t\t\t\t\t\t\t\t\\\n    __x = (al) - (bl);\t\t\t\t\t\t\t\\\n    (sh) = (ah) - (bh) - ((al) < (bl));                                 \\\n    (sl) = __x;\t\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n#if !defined (add_333)\n#define add_333(sh, sm, sl, ah, am, al, bh, bm, bl)  \\\n   do { \\\n      UWtype __cy1, __cy2; \\\n      __cy1 = ((al) + (bl) < (al)); \\\n      (sl) = (al) + (bl); \\\n      __cy2 = ((am) + (bm) < (am)); \\\n      (sm) = (am) + (bm); \\\n      __cy2 += ((sm) + __cy1 < (sm)); \\\n      (sm) = (sm) + __cy1; \\\n      (sh) = (ah) + (bh) + __cy2; \\\n   } while (0)\n#endif\n\n#if !defined(sub_333)\n#define sub_333(sh, sm, sl, ah, am, al, bh, bm, bl)  \\\n   do { \\\n      UWtype __cy1, __cy2; \\\n      __cy1 = ((al) < (bl)); \\\n      (sl) = (al) - (bl); \\\n      __cy2 = ((am) < (bm)); \\\n      (sm) = (am) - (bm); \\\n      __cy2 += ((sm) < __cy1); \\\n      (sm) = (sm) - __cy1; \\\n      (sh) = (ah) - (bh) - __cy2; \\\n   } while (0)\n#endif\n\n/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of\n   smul_ppmm.  */\n#if !defined (umul_ppmm) && defined (smul_ppmm)\n#define umul_ppmm(w1, w0, u, v)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype __w1;\t\t\t\t\t\t\t\\\n    UWtype __xm0 = (u), __xm1 = (v);\t\t\t\t\t\\\n    smul_ppmm (__w1, w0, __xm0, __xm1);\t\t\t\t\t\\\n    (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)\t\t\\\n\t\t+ (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);\t\t\\\n  } while (0)\n#endif\n\n/* If we still don't have umul_ppmm, define it using plain C.\n\n   For reference, when this code is used for squaring (ie. u and v identical\n   expressions), gcc recognises __x1 and __x2 are the same and generates 3\n   multiplies, not 4.  The subsequent additions could be optimized a bit,\n   but the only place GMP currently uses such a square is mpn_sqr_basecase,\n   and chips obliged to use this generic C umul will have plenty of worse\n   performance problems than a couple of extra instructions on the diagonal\n   of sqr_basecase.  */\n\n#if !defined (umul_ppmm)\n#define umul_ppmm(w1, w0, u, v)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype __x0, __x1, __x2, __x3;\t\t\t\t\t\\\n    UHWtype __ul, __vl, __uh, __vh;\t\t\t\t\t\\\n    UWtype __u = (u), __v = (v);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    __ul = __ll_lowpart (__u);\t\t\t\t\t\t\\\n    __uh = __ll_highpart (__u);\t\t\t\t\t\t\\\n    __vl = __ll_lowpart (__v);\t\t\t\t\t\t\\\n    __vh = __ll_highpart (__v);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    __x0 = (UWtype) __ul * __vl;\t\t\t\t\t\\\n    __x1 = (UWtype) __ul * __vh;\t\t\t\t\t\\\n    __x2 = (UWtype) __uh * __vl;\t\t\t\t\t\\\n    __x3 = (UWtype) __uh * __vh;\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    __x1 += __ll_highpart (__x0);/* this can't give carry */\t\t\\\n    __x1 += __x2;\t\t/* but this indeed can */\t\t\\\n    if (__x1 < __x2)\t\t/* did we get it? */\t\t\t\\\n      __x3 += __ll_B;\t\t/* yes, add it in the proper pos. */\t\\\n\t\t\t\t\t\t\t\t\t\\\n    (w1) = __x3 + __ll_highpart (__x1);\t\t\t\t\t\\\n    (w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0);\t\t\\\n  } while (0)\n#endif\n\n/* If we don't have smul_ppmm, define it using umul_ppmm (which surely will\n   exist in one form or another.  */\n#if !defined (smul_ppmm)\n#define smul_ppmm(w1, w0, u, v)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype __w1;\t\t\t\t\t\t\t\\\n    UWtype __xm0 = (u), __xm1 = (v);\t\t\t\t\t\\\n    umul_ppmm (__w1, w0, __xm0, __xm1);\t\t\t\t\t\\\n    (w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)\t\t\\\n\t\t- (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);\t\t\\\n  } while (0)\n#endif\n\n/* Define this unconditionally, so it can be used for debugging.  */\n#define __udiv_qrnnd_c(q, r, n1, n0, d) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    ASSERT ((d) != 0);\t\t\t\t\t\t\t\\\n    ASSERT ((n1) < (d));\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    __d1 = __ll_highpart (d);\t\t\t\t\t\t\\\n    __d0 = __ll_lowpart (d);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    __q1 = (n1) / __d1;\t\t\t\t\t\t\t\\\n    __r1 = (n1) - __q1 * __d1;\t\t\t\t\t\t\\\n    __m = __q1 * __d0;\t\t\t\t\t\t\t\\\n    __r1 = __r1 * __ll_B | __ll_highpart (n0);\t\t\t\t\\\n    if (__r1 < __m)\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\t__q1--, __r1 += (d);\t\t\t\t\t\t\\\n\tif (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\\\n\t  if (__r1 < __m)\t\t\t\t\t\t\\\n\t    __q1--, __r1 += (d);\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    __r1 -= __m;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    __q0 = __r1 / __d1;\t\t\t\t\t\t\t\\\n    __r0 = __r1  - __q0 * __d1;\t\t\t\t\t\t\\\n    __m = __q0 * __d0;\t\t\t\t\t\t\t\\\n    __r0 = __r0 * __ll_B | __ll_lowpart (n0);\t\t\t\t\\\n    if (__r0 < __m)\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\t__q0--, __r0 += (d);\t\t\t\t\t\t\\\n\tif (__r0 >= (d))\t\t\t\t\t\t\\\n\t  if (__r0 < __m)\t\t\t\t\t\t\\\n\t    __q0--, __r0 += (d);\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    __r0 -= __m;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    (q) = __q1 * __ll_B | __q0;\t\t\t\t\t\t\\\n    (r) = __r0;\t\t\t\t\t\t\t\t\\\n  } while (0)\n\n/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through\n   __udiv_w_sdiv (defined in libgcc or elsewhere).  */\n#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)\n#define udiv_qrnnd(q, r, nh, nl, d) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype __r;\t\t\t\t\t\t\t\t\\\n    (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d);\t\t\t\t\\\n    (r) = __r;\t\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */\n#if !defined (udiv_qrnnd)\n#define UDIV_NEEDS_NORMALIZATION 1\n#define udiv_qrnnd __udiv_qrnnd_c\n#endif\n\n#if !defined (count_leading_zeros)\n#define count_leading_zeros(count, x) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype __xr = (x);\t\t\t\t\t\t\t\\\n    UWtype __a;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    if (W_TYPE_SIZE == 32)\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\t__a = __xr < ((UWtype) 1 << 2*__BITS4)\t\t\t\t\\\n\t  ? (__xr < ((UWtype) 1 << __BITS4) ? 1 : __BITS4 + 1)\t\t\\\n\t  : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 + 1\t\t\\\n\t  : 3*__BITS4 + 1);\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    else\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tfor (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)\t\t\t\\\n\t  if (((__xr >> __a) & 0xff) != 0)\t\t\t\t\\\n\t    break;\t\t\t\t\t\t\t\\\n\t++__a;\t\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a];\t\t\\\n  } while (0)\n/* This version gives a well-defined value for zero. */\n#define COUNT_LEADING_ZEROS_0 (W_TYPE_SIZE - 1)\n#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB\n#endif\n\n#ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB\nextern const unsigned char __GMP_DECLSPEC __clz_tab[129];\n#endif\n\n#if !defined (count_trailing_zeros)\n/* Define count_trailing_zeros using count_leading_zeros.  The latter might be\n   defined in asm, but if it is not, the C version above is good enough.  */\n#define count_trailing_zeros(count, x) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype __ctz_x = (x);\t\t\t\t\t\t\\\n    UWtype __ctz_c;\t\t\t\t\t\t\t\\\n    ASSERT (__ctz_x != 0);\t\t\t\t\t\t\\\n    count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);\t\t\t\\\n    (count) = W_TYPE_SIZE - 1 - __ctz_c;\t\t\t\t\\\n  } while (0)\n#endif\n\n#ifndef UDIV_NEEDS_NORMALIZATION\n#define UDIV_NEEDS_NORMALIZATION 0\n#endif\n\n/* Whether udiv_qrnnd is actually implemented with udiv_qrnnd_preinv, and\n   that hence the latter should always be used.  */\n#ifndef UDIV_PREINV_ALWAYS\n#define UDIV_PREINV_ALWAYS 0\n#endif\n"
  },
  {
    "path": "longlong_pre.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 Free Software Foundation, Inc.\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/* You have to define the following before including this file:\n\n   UWtype -- An unsigned type, default type for operations (typically a \"word\")\n   UHWtype -- An unsigned type, at least half the size of UWtype.\n   UDWtype -- An unsigned type, at least twice as large a UWtype\n   W_TYPE_SIZE -- size in bits of UWtype\n\n   SItype, USItype -- Signed and unsigned 32 bit types.\n   DItype, UDItype -- Signed and unsigned 64 bit types.\n\n   On a 32 bit machine UWtype should typically be USItype;\n   on a 64 bit machine, UWtype should typically be UDItype.\n*/\n\n#define __BITS4 (W_TYPE_SIZE / 4)\n#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))\n#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))\n#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))\n\n/* This is used to make sure no undesirable sharing between different libraries\n   that use this file takes place.  */\n#ifndef __MPN\n#define __MPN(x) __##x\n#endif\n\n/* Define auxiliary asm macros.\n\n   1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two\n   UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype\n   word product in HIGH_PROD and LOW_PROD.\n\n   3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,\n   denominator) divides a UDWtype, composed by the UWtype integers\n   HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient\n   in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less\n   than DENOMINATOR for correct operation.  If, in addition, the most\n   significant bit of DENOMINATOR must be 1, then the pre-processor symbol\n   UDIV_NEEDS_NORMALIZATION is defined to 1.\n\n   4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,\n   denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient\n   is rounded towards 0.\n\n   5) count_leading_zeros(count, x) counts the number of zero-bits from the\n   msb to the first non-zero bit in the UWtype X.  This is the number of\n   steps X needs to be shifted left to set the msb.  Undefined for X == 0,\n   unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.\n\n   6) count_trailing_zeros(count, x) like count_leading_zeros, but counts\n   from the least significant end.\n\n   7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,\n   high_addend_2, low_addend_2) adds two UWtype integers, composed by\n   HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2\n   respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow\n   (i.e. carry out) is not stored anywhere, and is lost.\n\n   8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,\n   high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,\n   composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and\n   LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE\n   and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,\n   and is lost.\n\n   If any of these macros are left undefined for a particular CPU,\n   C macros are used.\n\n\n   Notes:\n\n   For add_ssaaaa the two high and two low addends can both commute, but\n   unfortunately gcc only supports one \"%\" commutative in each asm block.\n   This has always been so but is only documented in recent versions\n   (eg. pre-release 3.3).  Having two or more \"%\"s can cause an internal\n   compiler error in certain rare circumstances.\n\n   Apparently it was only the last \"%\" that was ever actually respected, so\n   the code has been updated to leave just that.  Clearly there's a free\n   choice whether high or low should get it, if there's a reason to favour\n   one over the other.  Also obviously when the constraints on the two\n   operands are identical there's no benefit to the reloader in any \"%\" at\n   all.\n\n   */\n\n/* The CPUs come in alphabetical order below.\n\n   Please add support for more CPUs here, or improve the current support\n   for the CPUs below!  */\n\n\n/* count_leading_zeros_gcc_clz is count_leading_zeros implemented with gcc\n   3.4 __builtin_clzl or __builtin_clzll, according to our limb size.\n   Similarly count_trailing_zeros_gcc_ctz using __builtin_ctzl or\n   __builtin_ctzll.\n\n   These builtins are only used when we check what code comes out, on some\n   chips they're merely libgcc calls, where we will instead want an inline\n   in that case (either asm or generic C).\n\n   These builtins are better than an asm block of the same insn, since an\n   asm block doesn't give gcc any information about scheduling or resource\n   usage.  We keep an asm block for use on prior versions of gcc though.\n\n   For reference, __builtin_ffs existed in gcc prior to __builtin_clz, but\n   it's not used (for count_leading_zeros) because it generally gives extra\n   code to ensure the result is 0 when the input is 0, which we don't need\n   or want.  */\n\n#ifdef _LONG_LONG_LIMB\n#define count_leading_zeros_gcc_clz(count,x)    \\\n  do {                                          \\\n    ASSERT ((x) != 0);                          \\\n    (count) = __builtin_clzll (x);              \\\n  } while (0)\n#else\n#define count_leading_zeros_gcc_clz(count,x)    \\\n  do {                                          \\\n    ASSERT ((x) != 0);                          \\\n    (count) = __builtin_clzl (x);               \\\n  } while (0)\n#endif\n\n#ifdef _LONG_LONG_LIMB\n#define count_trailing_zeros_gcc_ctz(count,x)   \\\n  do {                                          \\\n    ASSERT ((x) != 0);                          \\\n    (count) = __builtin_ctzll (x);              \\\n  } while (0)\n#else\n#define count_trailing_zeros_gcc_ctz(count,x)   \\\n  do {                                          \\\n    ASSERT ((x) != 0);                          \\\n    (count) = __builtin_ctzl (x);               \\\n  } while (0)\n#endif\n"
  },
  {
    "path": "memory.c",
    "content": "/* Memory allocation routines.\n\nCopyright 1991, 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h> /* for malloc, realloc, free */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nvoid *\t(*__gmp_allocate_func)(size_t) = __gmp_default_allocate;\nvoid *\t(*__gmp_reallocate_func)(void *, size_t, size_t) = __gmp_default_reallocate;\nvoid\t(*__gmp_free_func)(void *, size_t) = __gmp_default_free;\n\n\n/* Default allocation functions.  In case of failure to allocate/reallocate\n   an error message is written to stderr and the program aborts.  */\n\nvoid *\n__gmp_default_allocate (size_t size)\n{\n  void *ret;\n#ifdef DEBUG\n  size_t req_size = size;\n  size += 2 * BYTES_PER_MP_LIMB;\n#endif\n  ret = malloc (size);\n  if (ret == 0)\n    {\n#if defined _MSC_VER && defined _WIN64\n      fprintf (stderr, \"GNU MP: Cannot allocate memory (size=%llu)\\n\", size);\n#else\n      fprintf (stderr, \"GNU MP: Cannot allocate memory (size=%lu)\\n\", size);\n#endif\n      abort ();\n    }\n\n#ifdef DEBUG\n  {\n    mp_ptr p = ret;\n    p++;\n    p[-1] = (0xdeadbeef << 31) + 0xdeafdeed;\n    if (req_size % BYTES_PER_MP_LIMB == 0)\n      p[req_size / BYTES_PER_MP_LIMB] = ~((0xdeadbeef << 31) + 0xdeafdeed);\n    ret = p;\n  }\n#endif\n  return ret;\n}\n\nvoid *\n__gmp_default_reallocate (void *oldptr, size_t old_size, size_t new_size)\n{\n  void *ret;\n\n#ifdef DEBUG\n  size_t req_size = new_size;\n\n  if (old_size != 0)\n    {\n      mp_ptr p = oldptr;\n      if (p[-1] != (0xdeadbeef << 31) + 0xdeafdeed)\n\t{\n\t  fprintf (stderr, \"gmp: (realloc) data clobbered before allocation block\\n\");\n\t  abort ();\n\t}\n      if (old_size % BYTES_PER_MP_LIMB == 0)\n\tif (p[old_size / BYTES_PER_MP_LIMB] != ~((0xdeadbeef << 31) + 0xdeafdeed))\n\t  {\n\t    fprintf (stderr, \"gmp: (realloc) data clobbered after allocation block\\n\");\n\t    abort ();\n\t  }\n      oldptr = p - 1;\n    }\n\n  new_size += 2 * BYTES_PER_MP_LIMB;\n#endif\n\n  ret = realloc (oldptr, new_size);\n  if (ret == 0)\n    {\n#if defined _MSC_VER && defined _WIN64\n        fprintf (stderr, \"GNU MP: Cannot reallocate memory (old_size=%llu new_size=%llu)\\n\", old_size, new_size);\n#else\n        fprintf (stderr, \"GNU MP: Cannot reallocate memory (old_size=%lu new_size=%lu)\\n\", old_size, new_size);\n#endif\n      abort ();\n    }\n\n#ifdef DEBUG\n  {\n    mp_ptr p = ret;\n    p++;\n    p[-1] = (0xdeadbeef << 31) + 0xdeafdeed;\n    if (req_size % BYTES_PER_MP_LIMB == 0)\n      p[req_size / BYTES_PER_MP_LIMB] = ~((0xdeadbeef << 31) + 0xdeafdeed);\n    ret = p;\n  }\n#endif\n  return ret;\n}\n\nvoid\n__gmp_default_free (void *blk_ptr, size_t blk_size)\n{\n#ifdef DEBUG\n  {\n    mp_ptr p = blk_ptr;\n    if (blk_size != 0)\n      {\n\tif (p[-1] != (0xdeadbeef << 31) + 0xdeafdeed)\n\t  {\n\t    fprintf (stderr, \"gmp: (free) data clobbered before allocation block\\n\");\n\t    abort ();\n\t  }\n\tif (blk_size % BYTES_PER_MP_LIMB == 0)\n\t  if (p[blk_size / BYTES_PER_MP_LIMB] != ~((0xdeadbeef << 31) + 0xdeafdeed))\n\t    {\n\t      fprintf (stderr, \"gmp: (free) data clobbered after allocation block\\n\");\n\t      abort ();\n\t    }\n      }\n    blk_ptr = p - 1;\n  }\n#endif\n  free (blk_ptr);\n}\n"
  },
  {
    "path": "mp_bpl.c",
    "content": "/*\nCopyright 1996 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nconst int mp_bits_per_limb = BITS_PER_MP_LIMB;\nconst int __gmp_0 = 0;\nint __gmp_junk;\n"
  },
  {
    "path": "mp_clz_tab.c",
    "content": "/* __clz_tab -- support for longlong.h\n\n   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND MAY CHANGE\n   INCOMPATIBLY OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000, 2001 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB\nconst\nunsigned char __clz_tab[129] =\n{\n  1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,\n  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,\n  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,\n  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,\n  9\n};\n#endif\n"
  },
  {
    "path": "mp_dv_tab.c",
    "content": "/* __gmp_digit_value_tab -- support for mp*_set_str\n\n   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND MAY CHANGE\n   INCOMPATIBLY OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* Table to be indexed by character, to get its numerical value.  Assumes ASCII\n   character set.\n\n   First part of table supports common usages, where 'A' and 'a' have the same\n   value; this supports bases 2..36\n\n   At offset 224, values for bases 37..62 start.  Here, 'A' has the value 10\n   (in decimal) and 'a' has the value 36.  */\n\n#define X 0xff\nconst unsigned char __gmp_digit_value_tab[] =\n{\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, X, X, X, X, X, X,\n  X,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,\n  25,26,27,28,29,30,31,32,33,34,35,X, X, X, X, X,\n  X,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,\n  25,26,27,28,29,30,31,32,33,34,35,X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, X, X, X, X, X, X,\n  X,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,\n  25,26,27,28,29,30,31,32,33,34,35,X, X, X, X, X,\n  X,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,\n  51,52,53,54,55,56,57,58,59,60,61,X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,\n  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X\n};\n"
  },
  {
    "path": "mp_get_fns.c",
    "content": "/* mp_get_memory_functions -- Get the allocate, reallocate, and free functions.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>  /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmp_get_memory_functions (void *(**alloc_func) (size_t),\n\t\t\t void *(**realloc_func) (void *, size_t, size_t),\n\t\t\t void (**free_func) (void *, size_t))\n{\n  if (alloc_func != NULL)\n    *alloc_func = __gmp_allocate_func;\n\n  if (realloc_func != NULL)\n    *realloc_func = __gmp_reallocate_func;\n\n  if (free_func != NULL)\n    *free_func = __gmp_free_func;\n}\n"
  },
  {
    "path": "mp_minv_tab.c",
    "content": "/* A table of data supporting modlimb_invert().\n\n   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND MAY CHANGE\n   INCOMPATIBLY OR DISAPPEAR IN A FUTURE GNU MP RELEASE.  */\n\n/*\nCopyright 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* modlimb_invert_table[i] is the multiplicative inverse of 2*i+1 mod 256,\n   ie. (modlimb_invert_table[i] * (2*i+1)) % 256 == 1 */\n\nconst unsigned char  modlimb_invert_table[128] = {\n  0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,\n  0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,\n  0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,\n  0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,\n  0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,\n  0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,\n  0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,\n  0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,\n  0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,\n  0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,\n  0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,\n  0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,\n  0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,\n  0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,\n  0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,\n  0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF\n};\n"
  },
  {
    "path": "mp_set_fns.c",
    "content": "/* mp_set_memory_functions -- Set the allocate, reallocate, and free functions\n   for use by the mp package.\n\nCopyright 1991, 1993, 1994, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmp_set_memory_functions (void *(*alloc_func) (size_t),\n\t\t\t void *(*realloc_func) (void *, size_t, size_t),\n\t\t\t void (*free_func) (void *, size_t))\n{\n  if (alloc_func == 0)\n    alloc_func = __gmp_default_allocate;\n  if (realloc_func == 0)\n    realloc_func = __gmp_default_reallocate;\n  if (free_func == 0)\n    free_func = __gmp_default_free;\n\n  __gmp_allocate_func = alloc_func;\n  __gmp_reallocate_func = realloc_func;\n  __gmp_free_func = free_func;\n}\n"
  },
  {
    "path": "mpf/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,\n# Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -D__GMP_WITHIN_GMP -I$(top_srcdir)\n\nnoinst_LTLIBRARIES = libmpf.la\nlibmpf_la_SOURCES = abs.c add.c add_ui.c ceilfloor.c clear.c clears.c cmp.c cmp_d.c cmp_si.c cmp_ui.c cmp_z.c div.c div_2exp.c div_ui.c dump.c eq.c fits_s.h fits_si.c fits_sint.c fits_slong.c fits_sshort.c fits_u.h fits_ui.c fits_uint.c fits_ulong.c fits_ushort.c get_d.c get_d_2exp.c get_dfl_prec.c get_prc.c get_si.c get_str.c get_ui.c init.c init2.c inits.c inp_str.c int_p.c iset.c iset_d.c iset_si.c iset_str.c iset_ui.c mul.c mul_2exp.c mul_ui.c neg.c out_str.c pow_ui.c random2.c reldiff.c rrandomb.c set.c set_d.c set_dfl_prec.c set_prc.c set_prc_raw.c set_q.c set_si.c set_str.c set_ui.c set_z.c size.c sqrt.c sqrt_ui.c sub.c sub_ui.c swap.c trunc.c ui_div.c ui_sub.c urandomb.c\n"
  },
  {
    "path": "mpf/abs.c",
    "content": "/* mpf_abs -- Compute the absolute value of a float.\n\nCopyright 1993, 1994, 1995, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_abs (mpf_ptr r, mpf_srcptr u)\n{\n  mp_size_t size;\n\n  size = ABS (u->_mp_size);\n  if (r != u)\n    {\n      mp_size_t prec;\n      mp_ptr rp, up;\n\n      prec = r->_mp_prec + 1;\t/* lie not to lose precision in assignment */\n      rp = r->_mp_d;\n      up = u->_mp_d;\n\n      if (size > prec)\n\t{\n\t  up += size - prec;\n\t  size = prec;\n\t}\n\n      MPN_COPY (rp, up, size);\n      r->_mp_exp = u->_mp_exp;\n    }\n  r->_mp_size = size;\n}\n"
  },
  {
    "path": "mpf/add.c",
    "content": "/* mpf_add -- Add two floats.\n\nCopyright 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_add (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)\n{\n  mp_srcptr up, vp;\n  mp_ptr rp, tp;\n  mp_size_t usize, vsize, rsize;\n  mp_size_t prec;\n  mp_exp_t uexp;\n  mp_size_t ediff;\n  mp_limb_t cy;\n  int negate;\n  TMP_DECL;\n\n  usize = u->_mp_size;\n  vsize = v->_mp_size;\n\n  /* Handle special cases that don't work in generic code below.  */\n  if (usize == 0)\n    {\n    set_r_v_maybe:\n      if (r != v)\n        mpf_set (r, v);\n      return;\n    }\n  if (vsize == 0)\n    {\n      v = u;\n      goto set_r_v_maybe;\n    }\n\n  /* If signs of U and V are different, perform subtraction.  */\n  if ((usize ^ vsize) < 0)\n    {\n      __mpf_struct v_negated;\n      v_negated._mp_size = -vsize;\n      v_negated._mp_exp = v->_mp_exp;\n      v_negated._mp_d = v->_mp_d;\n      mpf_sub (r, u, &v_negated);\n      return;\n    }\n\n  TMP_MARK;\n\n  /* Signs are now known to be the same.  */\n  negate = usize < 0;\n\n  /* Make U be the operand with the largest exponent.  */\n  if (u->_mp_exp < v->_mp_exp)\n    {\n      mpf_srcptr t;\n      t = u; u = v; v = t;\n      usize = u->_mp_size;\n      vsize = v->_mp_size;\n    }\n\n  usize = ABS (usize);\n  vsize = ABS (vsize);\n  up = u->_mp_d;\n  vp = v->_mp_d;\n  rp = r->_mp_d;\n  prec = r->_mp_prec;\n  uexp = u->_mp_exp;\n  ediff = u->_mp_exp - v->_mp_exp;\n\n  /* If U extends beyond PREC, ignore the part that does.  */\n  if (usize > prec)\n    {\n      up += usize - prec;\n      usize = prec;\n    }\n\n  /* If V extends beyond PREC, ignore the part that does.\n     Note that this may make vsize negative.  */\n  if (vsize + ediff > prec)\n    {\n      vp += vsize + ediff - prec;\n      vsize = prec - ediff;\n    }\n\n#if 0\n  /* Locate the least significant non-zero limb in (the needed parts\n     of) U and V, to simplify the code below.  */\n  while (up[0] == 0)\n    up++, usize--;\n  while (vp[0] == 0)\n    vp++, vsize--;\n#endif\n\n  /* Allocate temp space for the result.  Allocate\n     just vsize + ediff later???  */\n  tp = (mp_ptr) TMP_ALLOC (prec * BYTES_PER_MP_LIMB);\n\n  if (ediff >= prec)\n    {\n      /* V completely cancelled.  */\n      if (rp != up)\n\tMPN_COPY_INCR (rp, up, usize);\n      rsize = usize;\n    }\n  else\n    {\n      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */\n      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */\n\n      if (usize > ediff)\n\t{\n\t  /* U and V partially overlaps.  */\n\t  if (vsize + ediff <= usize)\n\t    {\n\t      /* uuuu     */\n\t      /*   v      */\n\t      mp_size_t size;\n\t      size = usize - ediff - vsize;\n\t      MPN_COPY (tp, up, size);\n\t      cy = mpn_add (tp + size, up + size, usize - size, vp, vsize);\n\t      rsize = usize;\n\t    }\n\t  else\n\t    {\n\t      /* uuuu     */\n\t      /*   vvvvv  */\n\t      mp_size_t size;\n\t      size = vsize + ediff - usize;\n\t      MPN_COPY (tp, vp, size);\n\t      cy = mpn_add (tp + size, up, usize, vp + size, usize - ediff);\n\t      rsize = vsize + ediff;\n\t    }\n\t}\n      else\n\t{\n\t  /* uuuu     */\n\t  /*      vv  */\n\t  mp_size_t size;\n\t  size = vsize + ediff - usize;\n\t  MPN_COPY (tp, vp, vsize);\n\t  MPN_ZERO (tp + vsize, ediff - usize);\n\t  MPN_COPY (tp + size, up, usize);\n\t  cy = 0;\n\t  rsize = size + usize;\n\t}\n\n      MPN_COPY (rp, tp, rsize);\n      rp[rsize] = cy;\n      rsize += cy;\n      uexp += cy;\n    }\n\n  r->_mp_size = negate ? -rsize : rsize;\n  r->_mp_exp = uexp;\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpf/add_ui.c",
    "content": "/* mpf_add_ui -- Add a float and an unsigned integer.\n\nCopyright 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_add_ui (mpf_ptr sum, mpf_srcptr u, mpir_ui v)\n{\n  mp_srcptr up = u->_mp_d;\n  mp_ptr sump = sum->_mp_d;\n  mp_size_t usize, sumsize;\n  mp_size_t prec = sum->_mp_prec;\n  mp_exp_t uexp = u->_mp_exp;\n\n  usize = u->_mp_size;\n  if (usize <= 0)\n    {\n      if (usize == 0)\n\t{\n\t  mpf_set_ui (sum, v);\n\t  return;\n\t}\n      else\n\t{\n\t  __mpf_struct u_negated;\n\t  u_negated._mp_size = -usize;\n\t  u_negated._mp_exp = u->_mp_exp;\n\t  u_negated._mp_d = u->_mp_d;\n\t  mpf_sub_ui (sum, &u_negated, v);\n\t  sum->_mp_size = -(sum->_mp_size);\n\t  return;\n\t}\n    }\n\n  if (v == 0)\n    {\n    sum_is_u:\n      if (u != sum)\n\t{\n\t  sumsize = MIN (usize, prec + 1);\n\t  MPN_COPY (sum->_mp_d, up + usize - sumsize, sumsize);\n\t  sum->_mp_size = sumsize;\n\t  sum->_mp_exp = u->_mp_exp;\n\t}\n      return;\n    }\n\n  if (uexp > 0)\n    {\n      /* U >= 1.  */\n      if (uexp > prec)\n\t{\n\t  /* U >> V, V is not part of final result.  */\n\t  goto sum_is_u;\n\t}\n      else\n\t{\n\t  /* U's \"limb point\" is somewhere between the first limb\n\t     and the PREC:th limb.\n\t     Both U and V are part of the final result.  */\n\t  if (uexp > usize)\n\t    {\n\t      /*   uuuuuu0000. */\n\t      /* +          v. */\n\t      /* We begin with moving U to the top of SUM, to handle\n\t\t samevar(U,SUM).  */\n\t      MPN_COPY_DECR (sump + uexp - usize, up, usize);\n\t      sump[0] = v;\n\t      MPN_ZERO (sump + 1, uexp - usize - 1);\n#if 0 /* What is this??? */\n\t      if (sum == u)\n\t\tMPN_COPY (sum->_mp_d, sump, uexp);\n#endif\n\t      sum->_mp_size = uexp;\n\t      sum->_mp_exp = uexp;\n\t    }\n\t  else\n\t    {\n\t      /*   uuuuuu.uuuu */\n\t      /* +      v.     */\n\t      mp_limb_t cy_limb;\n\t      if (usize > prec)\n\t\t{\n\t\t  /* Ignore excess limbs in U.  */\n\t\t  up += usize - prec;\n\t\t  usize -= usize - prec; /* Eq. usize = prec */\n\t\t}\n\t      if (sump != up)\n\t\tMPN_COPY_INCR (sump, up, usize - uexp);\n\t      cy_limb = mpn_add_1 (sump + usize - uexp, up + usize - uexp,\n\t\t\t\t   uexp, (mp_limb_t) v);\n\t      sump[usize] = cy_limb;\n\t      sum->_mp_size = usize + cy_limb;\n\t      sum->_mp_exp = uexp + cy_limb;\n\t    }\n\t}\n    }\n  else\n    {\n      /* U < 1, so V > U for sure.  */\n      /* v.         */\n      /*  .0000uuuu */\n      if ((-uexp) >= prec)\n\t{\n\t  sump[0] = v;\n\t  sum->_mp_size = 1;\n\t  sum->_mp_exp = 1;\n\t}\n      else\n\t{\n\t  if (usize + (-uexp) + 1 > prec)\n\t    {\n\t      /* Ignore excess limbs in U.  */\n\t      up += usize + (-uexp) + 1 - prec;\n\t      usize -= usize + (-uexp) + 1 - prec;\n\t    }\n\t  if (sump != up)\n\t    MPN_COPY_INCR (sump, up, usize);\n\t  MPN_ZERO (sump + usize, -uexp);\n\t  sump[usize + (-uexp)] = v;\n\t  sum->_mp_size = usize + (-uexp) + 1;\n\t  sum->_mp_exp = 1;\n\t}\n    }\n}\n"
  },
  {
    "path": "mpf/ceilfloor.c",
    "content": "/* mpf_ceil, mpf_floor -- round an mpf to an integer.\n\nCopyright 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* dir==1 for ceil, dir==-1 for floor\n\n   Notice the use of prec+1 ensures mpf_ceil and mpf_floor are equivalent to\n   mpf_set if u is already an integer.  */\n\nstatic void __gmpf_ceil_or_floor(REGPARM_2_1 (mpf_ptr r, mpf_srcptr u, int dir)) REGPARM_ATTR (1);\n#define mpf_ceil_or_floor(r,u,dir)  __gmpf_ceil_or_floor (REGPARM_2_1 (r, u, dir))\n\nREGPARM_ATTR (1) static void\nmpf_ceil_or_floor (mpf_ptr r, mpf_srcptr u, int dir)\n{\n  mp_ptr     rp, up, p;\n  mp_size_t  size, asize, prec;\n  mp_exp_t   exp;\n\n  size = SIZ(u);\n  if (size == 0)\n    {\n    zero:\n      SIZ(r) = 0;\n      EXP(r) = 0;\n      return;\n    }\n\n  rp = PTR(r);\n  exp = EXP(u);\n  if (exp <= 0)\n    {\n      /* u is only a fraction */\n      if ((size ^ dir) < 0)\n        goto zero;\n      rp[0] = 1;\n      EXP(r) = 1;\n      SIZ(r) = dir;\n      return;\n    }\n  EXP(r) = exp;\n\n  up = PTR(u);\n  asize = ABS (size);\n  up += asize;\n\n  /* skip fraction part of u */\n  asize = MIN (asize, exp);\n\n  /* don't lose precision in the copy */\n  prec = PREC (r) + 1;\n\n  /* skip excess over target precision */\n  asize = MIN (asize, prec);\n\n  up -= asize;\n\n  if ((size ^ dir) >= 0)\n    {\n      /* rounding direction matches sign, must increment if ignored part is\n         non-zero */\n      for (p = PTR(u); p != up; p++)\n        {\n          if (*p != 0)\n            {\n              if (mpn_add_1 (rp, up, asize, CNST_LIMB(1)))\n                {\n                  /* was all 0xFF..FFs, which have become zeros, giving just\n                     a carry */\n                  rp[0] = 1;\n                  asize = 1;\n                  EXP(r)++;\n                }\n              SIZ(r) = (size >= 0 ? asize : -asize);\n              return;\n            }\n        }\n    }\n\n  SIZ(r) = (size >= 0 ? asize : -asize);\n  if (rp != up)\n    MPN_COPY_INCR (rp, up, asize);\n}\n\n\nvoid\nmpf_ceil (mpf_ptr r, mpf_srcptr u)\n{\n  mpf_ceil_or_floor (r, u, 1);\n}\n\nvoid\nmpf_floor (mpf_ptr r, mpf_srcptr u)\n{\n  mpf_ceil_or_floor (r, u, -1);\n}\n"
  },
  {
    "path": "mpf/clear.c",
    "content": "/* mpf_clear -- de-allocate the space occupied by the dynamic digit space of\n   an integer.\n\nCopyright 1993, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_clear (mpf_ptr m)\n{\n  (*__gmp_free_func) (m->_mp_d, (m->_mp_prec + 1) * BYTES_PER_MP_LIMB);\n}\n"
  },
  {
    "path": "mpf/clears.c",
    "content": "/* mpf_clears() -- Clear multiple mpf_t variables.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\t\t/* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\n#if HAVE_STDARG\nmpf_clears (mpf_ptr x, ...)\n#else\nmpf_clears (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n\n#if HAVE_STDARG\n  va_start (ap, x);\n#else\n  mpf_ptr x;\n  va_start (ap);\n  x = va_arg (ap, mpf_ptr);\n#endif\n\n  while (x != NULL)\n    {\n      mpf_clear (x);\n      x = va_arg (ap, mpf_ptr);\n    }\n  va_end (ap);\n}\n"
  },
  {
    "path": "mpf/cmp.c",
    "content": "/* mpf_cmp -- Compare two floats.\n\nCopyright 1993, 1994, 1996, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpf_cmp (mpf_srcptr u, mpf_srcptr v)\n{\n  mp_srcptr up, vp;\n  mp_size_t usize, vsize;\n  mp_exp_t uexp, vexp;\n  int cmp;\n  int usign;\n\n  uexp = u->_mp_exp;\n  vexp = v->_mp_exp;\n\n  usize = u->_mp_size;\n  vsize = v->_mp_size;\n\n  /* 1. Are the signs different?  */\n  if ((usize ^ vsize) >= 0)\n    {\n      /* U and V are both non-negative or both negative.  */\n      if (usize == 0)\n\t/* vsize >= 0 */\n\treturn -(vsize != 0);\n      if (vsize == 0)\n\t/* usize >= 0 */\n\treturn usize != 0;\n      /* Fall out.  */\n    }\n  else\n    {\n      /* Either U or V is negative, but not both.  */\n      return usize >= 0 ? 1 : -1;\n    }\n\n  /* U and V have the same sign and are both non-zero.  */\n\n  usign = usize >= 0 ? 1 : -1;\n\n  /* 2. Are the exponents different?  */\n  if (uexp > vexp)\n    return usign;\n  if (uexp < vexp)\n    return -usign;\n\n  usize = ABS (usize);\n  vsize = ABS (vsize);\n\n  up = u->_mp_d;\n  vp = v->_mp_d;\n\n#define STRICT_MPF_NORMALIZATION 0\n#if ! STRICT_MPF_NORMALIZATION\n  /* Ignore zeroes at the low end of U and V.  */\n  while (up[0] == 0)\n    {\n      up++;\n      usize--;\n    }\n  while (vp[0] == 0)\n    {\n      vp++;\n      vsize--;\n    }\n#endif\n\n  if (usize > vsize)\n    {\n      cmp = mpn_cmp (up + usize - vsize, vp, vsize);\n      if (cmp == 0)\n\treturn usign;\n    }\n  else if (vsize > usize)\n    {\n      cmp = mpn_cmp (up, vp + vsize - usize, usize);\n      if (cmp == 0)\n\treturn -usign;\n    }\n  else\n    {\n      cmp = mpn_cmp (up, vp, usize);\n      if (cmp == 0)\n\treturn 0;\n    }\n  return cmp > 0 ? usign : -usign;\n}\n"
  },
  {
    "path": "mpf/cmp_d.c",
    "content": "/* mpf_cmp_d -- compare mpf and double.\n\nCopyright 2001, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_FLOAT_H\n#include <float.h>  /* for DBL_MAX */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpf_cmp_d (mpf_srcptr f, double d)\n{\n  mp_limb_t  darray[LIMBS_PER_DOUBLE];\n  mpf_t      df;\n\n  /* d=NaN has no sensible return value, so raise an exception.\n     d=Inf or -Inf is always bigger than z.  */\n  DOUBLE_NAN_INF_ACTION (d,\n                         __gmp_invalid_operation (),\n                         return (d < 0.0 ? 1 : -1));\n\n  if (d == 0.0)\n    return SIZ(f);\n\n  PTR(df) = darray;\n  SIZ(df) = (d >= 0.0 ? LIMBS_PER_DOUBLE : -LIMBS_PER_DOUBLE);\n  EXP(df) = __gmp_extract_double (darray, ABS(d));\n\n  return mpf_cmp (f, df);\n}\n"
  },
  {
    "path": "mpf/cmp_si.c",
    "content": "/* mpf_cmp_si -- Compare a float with a signed integer.\n\nCopyright 1993, 1994, 1995, 1999, 2000, 2001, 2002, 2004 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpf_cmp_si (mpf_srcptr u, mpir_si vval)\n{\n  mp_srcptr up;\n  mp_size_t usize;\n  mp_exp_t uexp;\n  mp_limb_t ulimb;\n  int usign;\n\n  uexp = u->_mp_exp;\n  usize = u->_mp_size;\n\n  /* 1. Are the signs different?  */\n  if ((usize < 0) == (vval < 0)) /* don't use xor, type size may differ */\n    {\n      /* U and V are both non-negative or both negative.  */\n      if (usize == 0)\n\t/* vval >= 0 */\n\treturn -(vval != 0);\n      if (vval == 0)\n\t/* usize >= 0 */\n\treturn usize != 0;\n      /* Fall out.  */\n    }\n  else\n    {\n      /* Either U or V is negative, but not both.  */\n      return usize >= 0 ? 1 : -1;\n    }\n\n  /* U and V have the same sign and are both non-zero.  */\n\n  usign = usize >= 0 ? 1 : -1;\n  usize = ABS (usize);\n  vval = ABS (vval);\n\n  /* 2. Are the exponents different (V's exponent == 1)?  */\n#if GMP_NAIL_BITS != 0\n  if (uexp > 1 + ((mpir_ui) vval > GMP_NUMB_MAX))\n    return usign;\n  if (uexp < 1 + ((mpir_ui) vval > GMP_NUMB_MAX))\n    return -usign;\n#else\n  if (uexp > 1)\n    return usign;\n  if (uexp < 1)\n    return -usign;\n#endif\n\n  up = u->_mp_d;\n\n  ulimb = up[usize - 1];\n#if GMP_NAIL_BITS != 0\n  if (usize >= 2 && uexp == 2)\n    {\n      if ((ulimb >> GMP_NAIL_BITS) != 0)\n\treturn usign;\n      ulimb = (ulimb << GMP_NUMB_BITS) | up[usize - 2];\n      usize--;\n    }\n#endif\n  usize--;\n\n  /* 3. Compare the most significant mantissa limb with V.  */\n  if (ulimb > (mpir_ui) vval)\n    return usign;\n  else if (ulimb < (mpir_ui) vval)\n    return -usign;\n\n  /* Ignore zeroes at the low end of U.  */\n  while (*up == 0)\n    {\n      up++;\n      usize--;\n    }\n\n  /* 4. Now, if the number of limbs are different, we have a difference\n     since we have made sure the trailing limbs are not zero.  */\n  if (usize > 0)\n    return usign;\n\n  /* Wow, we got zero even if we tried hard to avoid it.  */\n  return 0;\n}\n"
  },
  {
    "path": "mpf/cmp_ui.c",
    "content": "/* mpf_cmp_ui -- Compare a float with an unsigned integer.\n\nCopyright 1993, 1994, 1995, 1999, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpf_cmp_ui (mpf_srcptr u, mpir_ui vval)\n{\n  mp_srcptr up;\n  mp_size_t usize;\n  mp_exp_t uexp;\n  mp_limb_t ulimb;\n\n  uexp = u->_mp_exp;\n  usize = u->_mp_size;\n\n  /* 1. Is U negative?  */\n  if (usize < 0)\n    return -1;\n  /* We rely on usize being non-negative in the code that follows.  */\n\n  if (vval == 0)\n    return usize != 0;\n\n  /* 2. Are the exponents different (V's exponent == 1)?  */\n#if GMP_NAIL_BITS != 0\n  if (uexp > 1 + (vval > GMP_NUMB_MAX))\n    return 1;\n  if (uexp < 1 + (vval > GMP_NUMB_MAX))\n    return -1;\n#else\n  if (uexp > 1)\n    return 1;\n  if (uexp < 1)\n    return -1;\n#endif\n\n  up = u->_mp_d;\n\n  ulimb = up[usize - 1];\n#if GMP_NAIL_BITS != 0\n  if (usize >= 2 && uexp == 2)\n    {\n      if ((ulimb >> GMP_NAIL_BITS) != 0)\n\treturn 1;\n      ulimb = (ulimb << GMP_NUMB_BITS) | up[usize - 2];\n      usize--;\n    }\n#endif\n  usize--;\n\n  /* 3. Compare the most significant mantissa limb with V.  */\n  if (ulimb > vval)\n    return 1;\n  else if (ulimb < vval)\n    return -1;\n\n  /* Ignore zeroes at the low end of U.  */\n  while (*up == 0)\n    {\n      up++;\n      usize--;\n    }\n\n  /* 4. Now, if the number of limbs are different, we have a difference\n     since we have made sure the trailing limbs are not zero.  */\n  if (usize > 0)\n    return 1;\n\n  /* Wow, we got zero even if we tried hard to avoid it.  */\n  return 0;\n}\n"
  },
  {
    "path": "mpf/cmp_z.c",
    "content": "/* mpf_cmp_z -- Compare a float with an integer.\n\nCopyright 2015 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpf_cmp_z (mpf_srcptr u, mpz_srcptr v) __GMP_NOTHROW\n{\n  mpf_t vf;\n  mp_size_t size;\n\n  SIZ (vf) = size = SIZ (v);\n  EXP (vf) = size = ABS (size);\n  /* PREC (vf) = size; */ \n  PTR (vf) = PTR (v);\n\n  return mpf_cmp (u, vf);\n}\n"
  },
  {
    "path": "mpf/div.c",
    "content": "/* mpf_div -- Divide two floats.\n\nCopyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>  /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* Not done:\n\n   No attempt is made to identify an overlap u==v.  The result will be\n   correct (1.0), but a full actual division is done whereas of course\n   x/x==1 needs no work.  Such a call is not a sensible thing to make, and\n   it's left to an application to notice and optimize if it might arise\n   somehow through pointer aliasing or whatever.\n\n   Enhancements:\n\n   The high quotient limb is non-zero when high{up,vsize} >= {vp,vsize}.  We\n   could make that comparison and use qsize==prec instead of qsize==prec+1,\n   to save one limb in the division.\n\n   If r==u but the size is enough bigger than prec that there won't be an\n   overlap between quotient and dividend in mpn_tdiv_qr, then we can avoid\n   copying up,usize.  This would only arise from a prec reduced with\n   mpf_set_prec_raw and will be pretty unusual, but might be worthwhile if\n   it could be worked into the copy_u decision cleanly.\n\n   Future:\n\n   If/when mpn_tdiv_qr supports its qxn parameter we can use that instead of\n   padding u with zeros in temporary space.\n\n   If/when a quotient-only division exists it can be used here immediately.\n   remp is only to satisfy mpn_tdiv_qr, the remainder is not used.  */\n\nvoid\nmpf_div (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)\n{\n  mp_srcptr up, vp;\n  mp_ptr rp, remp, tp, new_vp;\n  mp_size_t usize, vsize, rsize, prospective_rsize, tsize, zeros, copy_v_size;\n  mp_size_t sign_quotient, prec, high_zero, chop;\n  mp_exp_t rexp;\n  int copy_u;\n  TMP_DECL;\n\n  usize = u->_mp_size;\n  vsize = v->_mp_size;\n  sign_quotient = usize ^ vsize;\n  usize = ABS (usize);\n  vsize = ABS (vsize);\n  prec = r->_mp_prec;\n\n  if (vsize == 0)\n    DIVIDE_BY_ZERO;\n\n  if (usize == 0)\n    {\n      r->_mp_size = 0;\n      r->_mp_exp = 0;\n      return;\n    }\n\n  TMP_MARK;\n  rexp = u->_mp_exp - v->_mp_exp + 1;\n\n  rp = r->_mp_d;\n  up = u->_mp_d;\n  vp = v->_mp_d;\n\n  prospective_rsize = usize - vsize + 1; /* quot from using given u,v sizes */\n  rsize = prec + 1;                      /* desired quot */\n\n  zeros = rsize - prospective_rsize;     /* padding u to give rsize */\n  copy_u = (zeros > 0 || rp == up);      /* copy u if overlap or padding */\n\n  chop = MAX (-zeros, 0);                /* negative zeros means shorten u */\n  up += chop;\n  usize -= chop;\n  zeros += chop;                         /* now zeros >= 0 */\n\n  tsize = usize + zeros;                 /* size for possible copy of u */\n\n  if (WANT_TMP_DEBUG)\n    {\n      /* separate blocks, for malloc debugging */\n      remp = TMP_ALLOC_LIMBS (vsize);\n      tp = (copy_u ? TMP_ALLOC_LIMBS (tsize) : NULL);\n      new_vp = (rp == vp ? TMP_ALLOC_LIMBS (vsize) : NULL);\n    }\n  else\n    {\n      /* one block with conditionalized size, for efficiency */\n      copy_v_size = (rp == vp ? vsize : 0);\n      remp = TMP_ALLOC_LIMBS (vsize + copy_v_size + (copy_u ? tsize : 0));\n      new_vp = remp + vsize;\n      tp = new_vp + copy_v_size;\n    }\n\n  /* copy and possibly extend u if necessary */\n  if (copy_u)\n    {\n      MPN_ZERO (tp, zeros);\n      MPN_COPY (tp+zeros, up, usize);\n      up = tp;\n      usize = tsize;\n    }\n\n  /* ensure divisor doesn't overlap quotient */\n  if (rp == vp)\n    {\n      MPN_COPY (new_vp, vp, vsize);\n      vp = new_vp;\n    }\n\n  ASSERT (usize-vsize+1 == rsize);\n  mpn_tdiv_qr (rp, remp, (mp_size_t) 0, up, usize, vp, vsize);\n\n  /* strip possible zero high limb */\n  high_zero = (rp[rsize-1] == 0);\n  rsize -= high_zero;\n  rexp -= high_zero;\n\n  r->_mp_size = sign_quotient >= 0 ? rsize : -rsize;\n  r->_mp_exp = rexp;\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpf/div_2exp.c",
    "content": "/* mpf_div_2exp -- Divide a float by 2^n.\n\nCopyright 1993, 1994, 1996, 2000, 2001, 2002, 2004 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Multiples of GMP_NUMB_BITS in exp simply mean an amount subtracted from\n   EXP(u) to set EXP(r).  The remainder exp%GMP_NUMB_BITS is then a right\n   shift for the limb data.\n\n   If exp%GMP_NUMB_BITS == 0 then there's no shifting, we effectively just\n   do an mpz_set with changed EXP(r).  Like mpz_set we take prec+1 limbs in\n   this case.  Although just prec would suffice, it's nice to have\n   mpf_div_2exp with exp==0 come out the same as mpz_set.\n\n   When shifting we take up to prec many limbs from the input.  Our shift is\n   cy = mpn_rshift (PTR(r)+1, PTR(u)+k, ...), where k is the number of low\n   limbs dropped from u, and the carry out is stored to PTR(r)[0].  We don't\n   try to work extra bits from PTR(u)[k-1] (when k>=1 makes it available)\n   into that low carry limb.  Just prec limbs (with the high non-zero) from\n   the input is enough bits for the application requested precision, no need\n   to do extra work.\n\n   If r==u the shift will have overlapping operands.  When k>=1 (ie. when\n   usize > prec), the overlap is in the style supported by rshift (ie. dst\n   <= src).\n\n   But when r==u and k==0 (ie. usize <= prec), we would have an invalid\n   overlap (mpn_rshift (rp+1, rp, ...)).  In this case we must instead use\n   mpn_lshift (PTR(r), PTR(u), size, NUMB-shift).  An lshift by NUMB-shift\n   bits gives identical data of course, it's just its overlap restrictions\n   which differ.\n\n   In both shift cases, the resulting data is abs_usize+1 limbs.  \"adj\" is\n   used to add +1 to that size if the high is non-zero (it may of course\n   have become zero by the shifting).  EXP(u) is the exponent just above\n   those abs_usize+1 limbs, so it gets -1+adj, which means -1 if the high is\n   zero, or no change if the high is non-zero.\n\n   Enhancements:\n\n   The way mpn_lshift is used means successive mpf_div_2exp calls on the\n   same operand will accumulate low zero limbs, until prec+1 limbs is\n   reached.  This is wasteful for subsequent operations.  When abs_usize <=\n   prec, we should test the low exp%GMP_NUMB_BITS many bits of PTR(u)[0],\n   ie. those which would be shifted out by an mpn_rshift.  If they're zero\n   then use that mpn_rshift.  */\n\nvoid\nmpf_div_2exp (mpf_ptr r, mpf_srcptr u, mp_bitcnt_t exp)\n{\n  mp_srcptr up;\n  mp_ptr rp = r->_mp_d;\n  mp_size_t usize;\n  mp_size_t abs_usize;\n  mp_size_t prec = r->_mp_prec;\n  mp_exp_t uexp = u->_mp_exp;\n\n  usize = u->_mp_size;\n\n  if (UNLIKELY (usize == 0))\n    {\n      r->_mp_size = 0;\n      r->_mp_exp = 0;\n      return;\n    }\n\n  abs_usize = ABS (usize);\n  up = u->_mp_d;\n\n  if (exp % GMP_NUMB_BITS == 0)\n    {\n      prec++;\t\t\t/* retain more precision here as we don't need\n\t\t\t\t   to account for carry-out here */\n      if (abs_usize > prec)\n\t{\n\t  up += abs_usize - prec;\n\t  abs_usize = prec;\n\t}\n      if (rp != up)\n\tMPN_COPY_INCR (rp, up, abs_usize);\n      r->_mp_exp = uexp - exp / GMP_NUMB_BITS;\n    }\n  else\n    {\n      mp_limb_t cy_limb;\n      mp_size_t adj;\n      if (abs_usize > prec)\n\t{\n\t  up += abs_usize - prec;\n\t  abs_usize = prec;\n\t  /* Use mpn_rshift since mpn_lshift operates downwards, and we\n\t     therefore would clobber part of U before using that part, in case\n\t     R is the same variable as U.  */\n\t  cy_limb = mpn_rshift (rp + 1, up, abs_usize, exp % GMP_NUMB_BITS);\n\t  rp[0] = cy_limb;\n\t  adj = rp[abs_usize] != 0;\n\t}\n      else\n\t{\n\t  cy_limb = mpn_lshift (rp, up, abs_usize,\n\t\t\t\tGMP_NUMB_BITS - exp % GMP_NUMB_BITS);\n\t  rp[abs_usize] = cy_limb;\n\t  adj = cy_limb != 0;\n\t}\n\n      abs_usize += adj;\n      r->_mp_exp = uexp - exp / GMP_NUMB_BITS - 1 + adj;\n    }\n  r->_mp_size = usize >= 0 ? abs_usize : -abs_usize;\n}\n"
  },
  {
    "path": "mpf/div_ui.c",
    "content": "/* mpf_div_ui -- Divide a float with an unsigned integer.\n\nCopyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nvoid\nmpf_div_ui (mpf_ptr r, mpf_srcptr u, mpir_ui v)\n{\n  mp_srcptr up;\n  mp_ptr rp, tp, rtp;\n  mp_size_t usize;\n  mp_size_t rsize, tsize;\n  mp_size_t sign_quotient;\n  mp_size_t prec;\n  mp_limb_t q_limb;\n  mp_exp_t rexp;\n  TMP_DECL;\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (v > GMP_NUMB_MAX)\n    {\n      mpf_t vf;\n      mp_limb_t vl[2];\n      SIZ(vf) = 2;\n      EXP(vf) = 2;\n      PTR(vf) = vl;\n      vl[0] = v & GMP_NUMB_MASK;\n      vl[1] = v >> GMP_NUMB_BITS;\n      mpf_div (r, u, vf);\n      return;\n    }\n#endif\n\n  usize = u->_mp_size;\n  sign_quotient = usize;\n  usize = ABS (usize);\n  prec = r->_mp_prec;\n\n  if (v == 0)\n    DIVIDE_BY_ZERO;\n\n  if (usize == 0)\n    {\n      r->_mp_size = 0;\n      r->_mp_exp = 0;\n      return;\n    }\n\n  TMP_MARK;\n\n  rp = r->_mp_d;\n  up = u->_mp_d;\n\n  tsize = 1 + prec;\n  tp = (mp_ptr) TMP_ALLOC ((tsize + 1) * BYTES_PER_MP_LIMB);\n\n  if (usize > tsize)\n    {\n      up += usize - tsize;\n      usize = tsize;\n      rtp = tp;\n    }\n  else\n    {\n      MPN_ZERO (tp, tsize - usize);\n      rtp = tp + (tsize - usize);\n    }\n\n  /* Move the dividend to the remainder.  */\n  MPN_COPY (rtp, up, usize);\n\n  mpn_divmod_1 (rp, tp, tsize, (mp_limb_t) v);\n  q_limb = rp[tsize - 1];\n\n  rsize = tsize - (q_limb == 0);\n  rexp = u->_mp_exp - (q_limb == 0);\n  r->_mp_size = sign_quotient >= 0 ? rsize : -rsize;\n  r->_mp_exp = rexp;\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpf/dump.c",
    "content": "/* mpf_dump -- Dump a float to stdout.\n\n   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS NOT SAFE TO\n   CALL THIS FUNCTION DIRECTLY.  IN FACT, IT IS ALMOST GUARANTEED THAT THIS\n   FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\n\nCopyright 1993, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <string.h> /* for strlen */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_dump (mpf_srcptr u)\n{\n  mp_exp_t exp;\n  char *str;\n\n  str = mpf_get_str (0, &exp, 10, 0, u);\n  if (str[0] == '-')\n    printf (\"-0.%se%ld\\n\", str + 1, exp);\n  else\n    printf (\"0.%se%ld\\n\", str, exp);\n  (*__gmp_free_func) (str, strlen (str) + 1);\n}\n"
  },
  {
    "path": "mpf/eq.c",
    "content": "/* mpf_eq -- Compare two floats up to a specified bit #.\n\nCopyright 1993, 1995, 1996, 2001, 2002 Free Software Foundation, Inc.\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nint\nmpf_eq (mpf_srcptr u, mpf_srcptr v, mp_bitcnt_t n_bits)\n{\n  mp_srcptr up, vp;\n  mp_size_t usize, vsize, size, i, cu, cv, n, k;\n  mp_limb_t uval, vval;\n  mp_exp_t uexp, vexp;\n\n  uexp = u->_mp_exp;\n  vexp = v->_mp_exp;\n\n  usize = u->_mp_size;\n  vsize = v->_mp_size;\n\n  /* 1. Are the signs different?  */\n  if ((usize ^ vsize) >= 0)\n    {\n      /* U and V are both non-negative or both negative.  */\n      if (usize == 0)\n\treturn vsize == 0;\n      if (vsize == 0)\n\treturn 0;\n\n      /* Fall out.  */\n    }\n  else\n    {\n      /* Either U or V is negative, but not both.  */\n      return 0;\n    }\n\n  /* U and V have the same sign and are both non-zero.  */\n\n  /* 2. Are the exponents different?  */\n  if (uexp > vexp)\n    return 0;\t\t\t/* ??? handle (uexp = vexp + 1)   */\n  if (vexp > uexp)\n    return 0;\t\t\t/* ??? handle (vexp = uexp + 1)   */\n\n  usize = ABS (usize);\n  vsize = ABS (vsize);\n\n  up = u->_mp_d;\n  vp = v->_mp_d;\n\n  count_leading_zeros (cu, up[usize - 1]);\n  count_leading_zeros (cv, vp[vsize - 1]);\n  if (cu != cv)\n    return 0;\n  n = BITS_TO_LIMBS (n_bits + cu);\n//compair bottom limb\n  k = n * GMP_NUMB_BITS - n_bits - cu;\n  uval = vval = 0;\n  if (usize - n >= 0)\n    uval = up[usize - n];\n  if (vsize - n >= 0)\n    vval = vp[vsize - n];\n  if ((uval >> k) != (vval >> k))\n    return 0;\n// compair rest of limbs\n  for (i = usize - n + 1; i < usize ; i++)\n    {\n      uval = vval = 0;\n      if (i >= 0)\n\tuval = up[i];\n      if (i - usize + vsize >= 0)\n\tvval = vp[i - usize + vsize];\n      if (uval != vval)\n\treturn 0;\n    }\n  return 1;\n}\n"
  },
  {
    "path": "mpf/fits_s.h",
    "content": "/* mpf_fits_s*_p -- test whether an mpf fits a C signed type.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Notice this is equivalent to mpz_set_f + mpz_fits_s*_p.  */\n\nint\nFUNCTION (mpf_srcptr f)\n{\n  mp_size_t  fs, fn;\n  mp_srcptr  fp;\n  mp_exp_t   exp;\n  mp_limb_t  fl;\n\n  fs = SIZ(f);\n  if (fs == 0)\n    return 1;  /* zero fits */\n\n  exp = EXP(f);\n  if (exp < 1)\n    return 1;  /* -1 < f < 1 truncates to zero, so fits */\n\n  fp = PTR(f);\n  fn = ABS (fs);\n\n  if (exp == 1)\n    {\n      fl = fp[fn-1];\n    }\n#if GMP_NAIL_BITS != 0\n  else if (exp == 2 && MAXIMUM > GMP_NUMB_MAX)\n    {\n      fl = fp[fn-1];\n      if ((fl >> GMP_NAIL_BITS) != 0)\n\treturn 0;\n      fl = (fl << GMP_NUMB_BITS);\n      if (fn >= 2)\n        fl |= fp[fn-2];\n    }\n#endif\n  else\n    return 0;\n\n  return fl <= (fs >= 0 ? (mp_limb_t) MAXIMUM : - (mp_limb_t) MINIMUM);\n}\n"
  },
  {
    "path": "mpf/fits_si.c",
    "content": "/* mpf_fits_sint_p -- test whether an mpf fits an int.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define FUNCTION   mpf_fits_si_p\n#define MAXIMUM    GMP_SI_MAX\n#define MINIMUM    GMP_SI_MIN\n\n#include \"fits_s.h\"\n"
  },
  {
    "path": "mpf/fits_sint.c",
    "content": "/* mpf_fits_sint_p -- test whether an mpf fits an int.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define FUNCTION   mpf_fits_sint_p\n#define MAXIMUM    INT_MAX\n#define MINIMUM    INT_MIN\n\n#include \"fits_s.h\"\n"
  },
  {
    "path": "mpf/fits_slong.c",
    "content": "/* mpf_fits_slong_p -- test whether an mpf fits a long.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define FUNCTION   mpf_fits_slong_p\n#define MAXIMUM    LONG_MAX\n#define MINIMUM    LONG_MIN\n\n#include \"fits_s.h\"\n"
  },
  {
    "path": "mpf/fits_sshort.c",
    "content": "/* mpf_fits_sshort_p -- test whether an mpf fits a short.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define FUNCTION   mpf_fits_sshort_p\n#define MAXIMUM    SHRT_MAX\n#define MINIMUM    SHRT_MIN\n\n#include \"fits_s.h\"\n"
  },
  {
    "path": "mpf/fits_u.h",
    "content": "/* mpf_fits_u*_p -- test whether an mpf fits a C unsigned type.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Notice this is equivalent to mpz_set_f + mpz_fits_u*_p.  */\n\nint\nFUNCTION (mpf_srcptr f)\n{\n  mp_size_t  fn;\n  mp_srcptr  fp;\n  mp_exp_t   exp;\n  mp_limb_t  fl;\n\n  exp = EXP(f);\n  if (exp < 1)\n    return 1;  /* -1 < f < 1 truncates to zero, so fits */\n  fn = SIZ(f);\n  if (fn <= 0)\n    return fn == 0;  /* zero fits, negatives don't */\n\n\n  fp = PTR(f);\n\n  if (exp == 1)\n    {\n      fl = fp[fn-1];\n    }\n#if GMP_NAIL_BITS != 0\n  else if (exp == 2 && MAXIMUM > GMP_NUMB_MAX)\n    {\n      fl = fp[fn-1];\n      if ((fl >> GMP_NAIL_BITS) != 0)\n\treturn 0;\n      fl = (fl << GMP_NUMB_BITS);\n      if (fn >= 2)\n        fl |= fp[fn-2];\n    }\n#endif\n  else\n    return 0;\n\n  return fl <= MAXIMUM;\n}\n"
  },
  {
    "path": "mpf/fits_ui.c",
    "content": "/* mpf_fits_uint_p -- test whether an mpf fits an unsigned int.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define FUNCTION  mpf_fits_ui_p\n#define MAXIMUM   GMP_UI_MAX\n\n#include \"fits_u.h\"\n"
  },
  {
    "path": "mpf/fits_uint.c",
    "content": "/* mpf_fits_uint_p -- test whether an mpf fits an unsigned int.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define FUNCTION  mpf_fits_uint_p\n#define MAXIMUM   UINT_MAX\n\n#include \"fits_u.h\"\n"
  },
  {
    "path": "mpf/fits_ulong.c",
    "content": "/* mpf_fits_ulong_p -- test whether an mpf fits an unsigned long.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define FUNCTION  mpf_fits_ulong_p\n#define MAXIMUM   ULONG_MAX\n\n#include \"fits_u.h\"\n"
  },
  {
    "path": "mpf/fits_ushort.c",
    "content": "/* mpf_fits_ushort_p -- test whether an mpf fits an unsigned short.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define FUNCTION  mpf_fits_ushort_p\n#define MAXIMUM   USHRT_MAX\n\n#include \"fits_u.h\"\n"
  },
  {
    "path": "mpf/get_d.c",
    "content": "/* double mpf_get_d (mpf_t src) -- return SRC truncated to a double.\n\nCopyright 1996, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\ndouble\nmpf_get_d (mpf_srcptr src)\n{\n  mp_size_t  size, abs_size;\n  long       exp;\n\n  size = SIZ (src);\n  if (UNLIKELY (size == 0))\n    return 0.0;\n\n  abs_size = ABS (size);\n  exp = (EXP (src) - abs_size) * GMP_NUMB_BITS;\n  return mpn_get_d (PTR (src), abs_size, size, exp);\n}\n"
  },
  {
    "path": "mpf/get_d_2exp.c",
    "content": "/* double mpf_get_d_2exp (signed long int *exp, mpf_t src).\n\nCopyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#ifndef _MSC_VER\nstatic\n#endif\nmpir_si\nmpf_get_2exp_d (double *r, mpf_srcptr src)\n{\n  mp_size_t size, abs_size;\n  mp_srcptr ptr;\n  int cnt;\n\n  size = SIZ(src);\n  if (UNLIKELY (size == 0))\n    {\n\t  *r = 0.0;\n\t  return 0;\n    }\n\n  ptr = PTR(src);\n  abs_size = ABS (size);\n  count_leading_zeros (cnt, ptr[abs_size - 1]);\n  cnt -= GMP_NAIL_BITS;\n\n  *r = mpn_get_d (ptr, abs_size, size,\n                    (long) - (abs_size * GMP_NUMB_BITS - cnt));\n  return EXP(src) * GMP_NUMB_BITS - cnt;\n}\n\ndouble\nmpf_get_d_2exp(signed long *exp2, mpf_srcptr src)\n{\n\tdouble r;\n\t*exp2 = mpf_get_2exp_d(&r, src);\n\treturn r;\n}\n"
  },
  {
    "path": "mpf/get_dfl_prec.c",
    "content": "/* mpf_get_default_prec -- return default precision in bits.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nmp_bitcnt_t\nmpf_get_default_prec (void)\n{\n  return __GMPF_PREC_TO_BITS (__gmp_default_fp_limb_precision);\n}\n"
  },
  {
    "path": "mpf/get_prc.c",
    "content": "/* mpf_get_prec(x) -- Return the precision in bits of x.\n\nCopyright 1996, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmp_bitcnt_t\nmpf_get_prec (mpf_srcptr x)\n{\n  return __GMPF_PREC_TO_BITS (x->_mp_prec);\n}\n"
  },
  {
    "path": "mpf/get_si.c",
    "content": "/* mpf_get_si -- mpf to long conversion\n\nCopyright 2001, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Any fraction bits are truncated, meaning simply discarded.\n\n   For values bigger than a long, the low bits are returned, like\n   mpz_get_si, but this isn't documented.\n\n   Notice this is equivalent to mpz_set_f + mpz_get_si.\n\n\n   Implementation:\n\n   fl is established in basically the same way as for mpf_get_ui, see that\n   code for explanations of the conditions.\n\n   However unlike mpf_get_ui we need an explicit return 0 for exp<=0.  When\n   f is a negative fraction (ie. size<0 and exp<=0) we can't let fl==0 go\n   through to the zany final \"~ ((fl - 1) & LONG_MAX)\", that would give\n   -0x80000000 instead of the desired 0.  */\n\nmpir_si\nmpf_get_si (mpf_srcptr f)\n{\n  mp_exp_t exp;\n  mp_size_t size, abs_size;\n  mp_srcptr fp;\n  mp_limb_t fl;\n\n  exp = EXP (f);\n  size = SIZ (f);\n  fp = PTR (f);\n\n  /* fraction alone truncates to zero\n     this also covers zero, since we have exp==0 for zero */\n  if (exp <= 0)\n    return 0L;\n\n  /* there are some limbs above the radix point */\n\n  fl = 0;\n  abs_size = ABS (size);\n  if (abs_size >= exp)\n    fl = fp[abs_size-exp];\n\n#if BITS_PER_UI > GMP_NUMB_BITS\n  if (exp > 1 && abs_size+1 >= exp)\n    fl |= fp[abs_size - exp + 1] << GMP_NUMB_BITS;\n#endif\n\n  if (size > 0)\n    return (mpir_si)(fl & GMP_SI_MAX);\n  else\n    /* this form necessary to correctly handle -0x80..00 */\n    return (mpir_si)(~((fl - 1) & GMP_SI_MAX));\n}\n"
  },
  {
    "path": "mpf/get_str.c",
    "content": "/* mpf_get_str (digit_ptr, exp, base, n_digits, a) -- Convert the floating\n  point number A to a base BASE number and store N_DIGITS raw digits at\n  DIGIT_PTR, and the base BASE exponent in the word pointed to by EXP.  For\n  example, the number 3.1416 would be returned as \"31416\" in DIGIT_PTR and\n  1 in EXP.\n\nCopyright 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2005 Free\nSoftware Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdlib.h>\t\t/* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\t\t/* for count_leading_zeros */\n\n/* Could use some more work.\n\n   1. Don't unconditionally allocate temps on the stack.\n   2. Make one temp alloc block, and split it manually.\n   3. Allocation is excessive.  Try to combine areas.  Perhaps use result\n      string area for temp limb space?\n   4. We generate up to two limbs worth of digits.  This is because we don't\n      check the exact number of bits in the input operand, and from that\n      compute an accurate exponent (variable e in the code).  It would be\n      cleaner and probably somewhat faster to change this.\n*/\n\n/* Compute base^exp and return the most significant prec limbs in rp[].\n   Put the count of omitted low limbs in *ign.\n   Return the actual size (which might be less than prec).\n   Allocation of rp[] and the temporary tp[] should be 2*prec+2 limbs.  */\nstatic mp_size_t\nmpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp,\n\t\t    mp_limb_t base, mpir_ui exp,\n\t\t    mp_size_t prec, mp_ptr tp)\n{\n  mp_size_t ign;\t\t/* counts number of ignored low limbs in r */\n  mp_size_t off;\t\t/* keeps track of offset where value starts */\n  mp_ptr passed_rp = rp;\n  mp_size_t rn;\n  int cnt;\n  int i;\n\n  if (exp == 0)\n    {\n      rp[0] = 1;\n      *ignp = 0;\n      return 1;\n    }\n\n  rp[0] = base;\n  rn = 1;\n  off = 0;\n  ign = 0;\n  count_leading_zeros (cnt, exp);\n  for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--)\n    {\n      mpn_sqr (tp, rp + off, rn);\n      rn = 2 * rn;\n      rn -= tp[rn - 1] == 0;\n      ign <<= 1;\n\n      off = 0;\n      if (rn > prec)\n\t{\n\t  ign += rn - prec;\n\t  off = rn - prec;\n\t  rn = prec;\n\t}\n      MP_PTR_SWAP (rp, tp);\n\n      if (((exp >> i) & 1) != 0)\n\t{\n\t  mp_limb_t cy;\n\t  cy = mpn_mul_1 (rp, rp + off, rn, base);\n\t  rp[rn] = cy;\n\t  rn += cy != 0;\n\t  off = 0;\n\t}\n    }\n\n  if (rn > prec)\n    {\n      ign += rn - prec;\n      rp += rn - prec;\n      rn = prec;\n    }\n\n  MPN_COPY_INCR (passed_rp, rp + off, rn);\n  *ignp = ign;\n  return rn;\n}\n\nchar *\nmpf_get_str (char *dbuf, mp_exp_t *exp, int base, size_t n_digits, mpf_srcptr u)\n{\n  mp_exp_t ue;\n  mp_size_t n_limbs_needed;\n  size_t max_digits;\n  mp_ptr up, pp, tp;\n  mp_size_t un, pn, tn;\n  unsigned char *tstr;\n  mp_exp_t exp_in_base;\n  size_t n_digits_computed;\n  mp_size_t i;\n  const char *num_to_text;\n  size_t alloc_size = 0;\n  char *dp;\n  TMP_DECL;\n\n  up = PTR(u);\n  un = ABSIZ(u);\n  ue = EXP(u);\n\n  if (base >= 0)\n    {\n      num_to_text = \"0123456789abcdefghijklmnopqrstuvwxyz\";\n      if (base == 0)\n\tbase = 10;\n      else if (base > 36)\n\t{\n\t  num_to_text = \"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\";\n\t  if (base > 62)\n\t    return NULL;\n\t}\n    }\n  else\n    {\n      base = -base;\n      num_to_text = \"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\";\n    }\n\n  MPF_SIGNIFICANT_DIGITS (max_digits, base, PREC(u));\n  if (n_digits == 0 || n_digits > max_digits)\n    n_digits = max_digits;\n\n  if (dbuf == 0)\n    {\n      /* We didn't get a string from the user.  Allocate one (and return\n\t a pointer to it) with space for `-' and terminating null.  */\n      alloc_size = n_digits + 2;\n      dbuf = (char *) (*__gmp_allocate_func) (n_digits + 2);\n    }\n\n  if (un == 0)\n    {\n      *exp = 0;\n      *dbuf = 0;\n      n_digits = 0;\n      goto done;\n    }\n\n  TMP_MARK;\n\n  /* Allocate temporary digit space.  We can't put digits directly in the user\n     area, since we generate more digits than requested.  (We allocate\n     2 * GMP_LIMB_BITS extra bytes because of the digit block nature of the\n     conversion.)  */\n  tstr = (unsigned char *) TMP_ALLOC (n_digits + 2 * GMP_LIMB_BITS + 3);\n\n  n_limbs_needed = 2 + ((mp_size_t) (n_digits / mp_bases[base].chars_per_bit_exactly)) / GMP_NUMB_BITS;\n\n  if (ue <= n_limbs_needed)\n    {\n      /* We need to multiply number by base^n to get an n_digits integer part.  */\n      mp_size_t n_more_limbs_needed, ign, off;\n      mpir_ui e;\n\n      n_more_limbs_needed = n_limbs_needed - ue;\n      e = (mpir_ui) n_more_limbs_needed * (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly);\n\n      if (un > n_limbs_needed)\n\t{\n\t  up += un - n_limbs_needed;\n\t  un = n_limbs_needed;\n\t}\n      pp = TMP_ALLOC_LIMBS (2 * n_limbs_needed + 2);\n      tp = TMP_ALLOC_LIMBS (2 * n_limbs_needed + 2);\n\n      pn = mpn_pow_1_highpart (pp, &ign, (mp_limb_t) base, e, n_limbs_needed, tp);\n      if (un > pn)\n\tmpn_mul (tp, up, un, pp, pn);\t/* FIXME: mpn_mul_highpart */\n      else\n\tmpn_mul (tp, pp, pn, up, un);\t/* FIXME: mpn_mul_highpart */\n      tn = un + pn;\n      tn -= tp[tn - 1] == 0;\n      off = un - ue - ign;\n      if (off < 0)\n\t{\n\t  MPN_COPY_DECR (tp - off, tp, tn);\n\t  MPN_ZERO (tp, -off);\n\t  tn -= off;\n\t  off = 0;\n\t}\n      n_digits_computed = mpn_get_str (tstr, base, tp + off, tn - off);\n\n      exp_in_base = n_digits_computed - e;\n    }\n  else\n    {\n      /* We need to divide number by base^n to get an n_digits integer part.  */\n      mp_size_t n_less_limbs_needed, ign, off, xn;\n      mpir_ui e;\n      mp_ptr dummyp, xp;\n\n      n_less_limbs_needed = ue - n_limbs_needed;\n      e = (mpir_ui) n_less_limbs_needed * (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly);\n\n      if (un > n_limbs_needed)\n\t{\n\t  up += un - n_limbs_needed;\n\t  un = n_limbs_needed;\n\t}\n      pp = TMP_ALLOC_LIMBS (2 * n_limbs_needed + 2);\n      tp = TMP_ALLOC_LIMBS (2 * n_limbs_needed + 2);\n\n      pn = mpn_pow_1_highpart (pp, &ign, (mp_limb_t) base, e, n_limbs_needed, tp);\n\n      xn = n_limbs_needed + (n_less_limbs_needed-ign);\n      xp = TMP_ALLOC_LIMBS (xn);\n      off = xn - un;\n      MPN_ZERO (xp, off);\n      MPN_COPY (xp + off, up, un);\n\n      dummyp = TMP_ALLOC_LIMBS (pn);\n      mpn_tdiv_qr (tp, dummyp, (mp_size_t) 0, xp, xn, pp, pn);\n      tn = xn - pn + 1;\n      tn -= tp[tn - 1] == 0;\n      n_digits_computed = mpn_get_str (tstr, base, tp, tn);\n\n      exp_in_base = n_digits_computed + e;\n    }\n\n  /* We should normally have computed too many digits.  Round the result\n     at the point indicated by n_digits.  */\n  if (n_digits_computed > n_digits)\n    {\n      size_t i;\n      /* Round the result.  */\n      if (tstr[n_digits] * 2 >= base)\n\t{\n\t  n_digits_computed = n_digits;\n\t  for (i = n_digits - 1;; i--)\n\t    {\n\t      unsigned int x;\n\t      x = ++(tstr[i]);\n\t      if (x != base)\n\t\tbreak;\n\t      n_digits_computed--;\n\t      if (i == 0)\n\t\t{\n\t\t  /* We had something like `bbbbbbb...bd', where 2*d >= base\n\t\t     and `b' denotes digit with significance base - 1.\n\t\t     This rounds up to `1', increasing the exponent.  */\n\t\t  tstr[0] = 1;\n\t\t  n_digits_computed = 1;\n\t\t  exp_in_base++;\n\t\t  break;\n\t\t}\n\t    }\n\t}\n    }\n\n  /* We might have fewer digits than requested as a result of rounding above,\n     (i.e. 0.999999 => 1.0) or because we have a number that simply doesn't\n     need many digits in this base (e.g., 0.125 in base 10).  */\n  if (n_digits > n_digits_computed)\n    n_digits = n_digits_computed;\n\n  /* Remove trailing 0.  There can be many zeros.  */\n  while (n_digits != 0 && tstr[n_digits - 1] == 0)\n    n_digits--;\n\n  dp = dbuf + (SIZ(u) < 0);\n\n  /* Translate to ASCII and copy to result string.  */\n  for (i = 0; i < n_digits; i++)\n    dp[i] = num_to_text[tstr[i]];\n  dp[n_digits] = 0;\n\n  *exp = exp_in_base;\n\n  if (SIZ(u) < 0)\n    {\n      dbuf[0] = '-';\n      n_digits++;\n    }\n\n  TMP_FREE;\n\n done:\n  /* If the string was alloced then resize it down to the actual space\n     required.  */\n  if (alloc_size != 0)\n    {\n      __GMP_REALLOCATE_FUNC_MAYBE_TYPE (dbuf, alloc_size, n_digits + 1, char);\n    }\n\n  return dbuf;\n}\n"
  },
  {
    "path": "mpf/get_ui.c",
    "content": "/* mpf_get_ui -- mpf to ulong conversion\n\nCopyright 2001, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Any fraction bits are truncated, meaning simply discarded.\n\n   For values bigger than a ulong, the low bits are returned (the low\n   absolute value bits actually), like mpz_get_ui, but this isn't\n   documented.\n\n   Notice this is equivalent to mpz_set_f + mpz_get_ui.\n\n\n   Implementation:\n\n   The limb just above the radix point for us to extract is ptr[size-exp].\n\n   We need to check that the size-exp index falls in our available data\n   range, 0 to size-1 inclusive.  We test this without risk of an overflow\n   involving exp by requiring size>=exp (giving size-exp >= 0) and exp>0\n   (giving size-exp <= size-1).\n\n   Notice if size==0 there's no fetch, since of course size>=exp and exp>0\n   can only be true if size>0.  So there's no special handling for size==0,\n   it comes out as 0 the same as any other time we have no data at our\n   target index.\n\n   For nails, the second limb above the radix point is also required, this\n   is ptr[size-exp+1].\n\n   Again we need to check that size-exp+1 falls in our data range, 0 to\n   size-1 inclusive.  We test without risk of overflow by requiring\n   size+1>=exp (giving size-exp+1 >= 0) and exp>1 (giving size-exp+1 <=\n   size-1).\n\n   And again if size==0 these second fetch conditions are not satisfied\n   either since size+1>=exp and exp>1 are only true if size>0.\n\n   The code is arranged with exp>0 wrapping the exp>1 test since exp>1 is\n   mis-compiled by alpha gcc prior to version 3.4.  It re-writes it as\n   exp-1>0, which is incorrect when exp==MP_EXP_T_MIN.  By having exp>0\n   tested first we ensure MP_EXP_T_MIN doesn't reach exp>1.  */\n\nmpir_ui\nmpf_get_ui (mpf_srcptr f)\n{\n  mp_size_t size;\n  mp_exp_t exp;\n  mp_srcptr fp;\n  mp_limb_t fl;\n\n  exp = EXP (f);\n  size = SIZ (f);\n  fp = PTR (f);\n\n  fl = 0;\n  if (exp > 0)\n    {\n      /* there are some limbs above the radix point */\n\n      size = ABS (size);\n      if (size >= exp)\n        fl = fp[size-exp];\n\n#if BITS_PER_UI > GMP_NUMB_BITS\n      if (exp > 1 && size+1 >= exp)\n        fl += (fp[size-exp+1] << GMP_NUMB_BITS);\n#endif\n    }\n\n  return (mpir_ui) fl;\n}\n"
  },
  {
    "path": "mpf/init.c",
    "content": "/* mpf_init() -- Make a new multiple precision number with value 0.\n\nCopyright 1993, 1994, 1995, 2000, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_init (mpf_ptr r)\n{\n  mp_size_t prec = __gmp_default_fp_limb_precision;\n  r->_mp_size = 0;\n  r->_mp_exp = 0;\n  r->_mp_prec = prec;\n  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);\n}\n"
  },
  {
    "path": "mpf/init2.c",
    "content": "/* mpf_init2() -- Make a new multiple precision number with value 0.\n\nCopyright 1993, 1994, 1995, 2000, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_init2 (mpf_ptr r, mp_bitcnt_t prec_in_bits)\n{\n  mp_size_t prec;\n\n  prec = __GMPF_BITS_TO_PREC (prec_in_bits);\n  r->_mp_size = 0;\n  r->_mp_exp = 0;\n  r->_mp_prec = prec;\n  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);\n}\n"
  },
  {
    "path": "mpf/inits.c",
    "content": "/* mpf_inits() -- Initialize multiple mpf_t variables and set them to 0.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\t\t/* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\n#if HAVE_STDARG\nmpf_inits (mpf_ptr x, ...)\n#else\nmpf_inits (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n\n#if HAVE_STDARG\n  va_start (ap, x);\n#else\n  mpf_ptr x;\n  va_start (ap);\n  x = va_arg (ap, mpf_ptr);\n#endif\n\n  while (x != NULL)\n    {\n      mpf_init (x);\n      x = va_arg (ap, mpf_ptr);\n    }\n  va_end (ap);\n}\n"
  },
  {
    "path": "mpf/inp_str.c",
    "content": "/* mpf_inp_str(dest_float, stream, base) -- Input a number in base\n   BASE from stdio stream STREAM and store the result in DEST_FLOAT.\n\nCopyright 1996, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <ctype.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nsize_t\nmpf_inp_str (mpf_ptr rop, FILE *stream, int base)\n{\n  char *str;\n  size_t alloc_size, str_size;\n  int c;\n  int res;\n  size_t nread;\n\n  if (stream == 0)\n    stream = stdin;\n\n  alloc_size = 100;\n  str = (char *) (*__gmp_allocate_func) (alloc_size);\n  str_size = 0;\n  nread = 0;\n\n  /* Skip whitespace.  */\n  do\n    {\n      c = getc (stream);\n      nread++;\n    }\n  while (isspace (c));\n\n  for (;;)\n    {\n      if (str_size >= alloc_size)\n\t{\n\t  size_t old_alloc_size = alloc_size;\n\t  alloc_size = alloc_size * 3 / 2;\n\t  str = (char *) (*__gmp_reallocate_func) (str, old_alloc_size, alloc_size);\n\t}\n      if (c == EOF || isspace (c))\n\tbreak;\n      str[str_size++] = c;\n      c = getc (stream);\n    }\n  ungetc (c, stream);\n  nread--;\n\n  if (str_size >= alloc_size)\n    {\n      size_t old_alloc_size = alloc_size;\n      alloc_size = alloc_size * 3 / 2;\n      str = (char *) (*__gmp_reallocate_func) (str, old_alloc_size, alloc_size);\n    }\n  str[str_size] = 0;\n\n  res = mpf_set_str (rop, str, base);\n  (*__gmp_free_func) (str, alloc_size);\n\n  if (res == -1)\n    return 0;\t\t\t/* error */\n\n  return str_size + nread;\n}\n"
  },
  {
    "path": "mpf/int_p.c",
    "content": "/* mpf_integer_p -- test whether an mpf is an integer */\n\n/*\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\nmpf_integer_p (mpf_srcptr f)\n{\n  mp_srcptr ptr;\n  mp_exp_t exp;\n  mp_size_t size, frac, i;\n\n  size = SIZ (f);\n  if (size == 0)\n    return 1;  /* zero is an integer */\n\n  exp = EXP (f);\n  if (exp <= 0)\n    return 0;  /* has only fraction limbs */\n\n  /* any fraction limbs must be zero */\n  frac = ABS (size) - exp;\n  ptr = PTR (f);\n  for (i = 0; i < frac; i++)\n    if (ptr[i] != 0)\n      return 0;\n\n  return 1;\n}\n"
  },
  {
    "path": "mpf/iset.c",
    "content": "/* mpf_init_set -- Initialize a float and assign it from another float.\n\nCopyright 1993, 1994, 1995, 2000, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_init_set (mpf_ptr r, mpf_srcptr s)\n{\n  mp_ptr rp, sp;\n  mp_size_t ssize, size;\n  mp_size_t prec;\n\n  prec = __gmp_default_fp_limb_precision;\n  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);\n  r->_mp_prec = prec;\n\n  prec++;\t\t/* lie not to lose precision in assignment */\n  ssize = s->_mp_size;\n  size = ABS (ssize);\n\n  rp = r->_mp_d;\n  sp = s->_mp_d;\n\n  if (size > prec)\n    {\n      sp += size - prec;\n      size = prec;\n    }\n\n  r->_mp_exp = s->_mp_exp;\n  r->_mp_size = ssize >= 0 ? size : -size;\n\n  MPN_COPY (rp, sp, size);\n}\n"
  },
  {
    "path": "mpf/iset_d.c",
    "content": "/* mpf_init_set_d -- Initialize a float and assign it from a double.\n\nCopyright 1993, 1994, 1995, 2000, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_init_set_d (mpf_ptr r, double val)\n{\n  mp_size_t prec = __gmp_default_fp_limb_precision;\n  r->_mp_prec = prec;\n  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);\n\n  mpf_set_d (r, val);\n}\n"
  },
  {
    "path": "mpf/iset_si.c",
    "content": "/* mpf_init_set_si() -- Initialize a float and assign it from a signed int.\n\nCopyright 1993, 1994, 1995, 2000, 2001, 2003, 2004 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_init_set_si (mpf_ptr r, mpir_si val)\n{\n  mp_size_t prec = __gmp_default_fp_limb_precision;\n  mp_size_t size;\n  mp_limb_t vl;\n\n  r->_mp_prec = prec;\n  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);\n\n  vl = (mp_limb_t) (mpir_ui) (val >= 0 ? val : -val);\n\n  r->_mp_d[0] = vl & GMP_NUMB_MASK;\n  size = vl != 0;\n\n#if BITS_PER_UI > GMP_NUMB_BITS\n  vl >>= GMP_NUMB_BITS;\n  r->_mp_d[1] = vl;\n  size += (vl != 0);\n#endif\n\n  r->_mp_exp = size;\n  r->_mp_size = val >= 0 ? size : -size;\n}\n"
  },
  {
    "path": "mpf/iset_str.c",
    "content": "/* mpf_init_set_str -- Initialize a float and assign it from a string.\n\nCopyright 1995, 1996, 2000, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpf_init_set_str (mpf_ptr r, const char *s, int base)\n{\n  mp_size_t prec = __gmp_default_fp_limb_precision;\n  r->_mp_size = 0;\n  r->_mp_exp = 0;\n  r->_mp_prec = prec;\n  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);\n\n  return mpf_set_str (r, s, base);\n}\n"
  },
  {
    "path": "mpf/iset_ui.c",
    "content": "/* mpf_init_set_ui() -- Initialize a float and assign it from an unsigned int.\n\nCopyright 1993, 1994, 1995, 2000, 2001, 2003, 2004 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_init_set_ui (mpf_ptr r, mpir_ui val)\n{\n  mp_size_t prec = __gmp_default_fp_limb_precision;\n  mp_size_t size;\n\n  r->_mp_prec = prec;\n  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);\n  r->_mp_d[0] = val & GMP_NUMB_MASK;\n  size = (val != 0);\n\n#if BITS_PER_UI > GMP_NUMB_BITS\n  val >>= GMP_NUMB_BITS;\n  r->_mp_d[1] = val;\n  size += (val != 0);\n#endif\n\n  r->_mp_size = size;\n  r->_mp_exp = size;\n}\n"
  },
  {
    "path": "mpf/mul.c",
    "content": "/* mpf_mul -- Multiply two floats.\n\nCopyright 1993, 1994, 1996, 2001, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_mul (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)\n{\n  mp_srcptr up, vp;\n  mp_size_t usize, vsize;\n  mp_size_t sign_product;\n  mp_size_t prec = r->_mp_prec;\n  TMP_DECL;\n\n  TMP_MARK;\n  usize = u->_mp_size;\n  vsize = v->_mp_size;\n  sign_product = usize ^ vsize;\n\n  usize = ABS (usize);\n  vsize = ABS (vsize);\n\n  up = u->_mp_d;\n  vp = v->_mp_d;\n  if (usize > prec)\n    {\n      up += usize - prec;\n      usize = prec;\n    }\n  if (vsize > prec)\n    {\n      vp += vsize - prec;\n      vsize = prec;\n    }\n\n  if (usize == 0 || vsize == 0)\n    {\n      r->_mp_size = 0;\n      r->_mp_exp = 0;\t\t/* ??? */\n    }\n  else\n    {\n      mp_size_t rsize;\n      mp_limb_t cy_limb;\n      mp_ptr rp, tp;\n      mp_size_t adj;\n\n      rsize = usize + vsize;\n      tp = (mp_ptr) TMP_ALLOC (rsize * BYTES_PER_MP_LIMB);\n      cy_limb = (usize >= vsize\n\t\t ? mpn_mul (tp, up, usize, vp, vsize)\n\t\t : mpn_mul (tp, vp, vsize, up, usize));\n\n      adj = cy_limb == 0;\n      rsize -= adj;\n      prec++;\n      if (rsize > prec)\n\t{\n\t  tp += rsize - prec;\n\t  rsize = prec;\n\t}\n      rp = r->_mp_d;\n      MPN_COPY (rp, tp, rsize);\n      r->_mp_exp = u->_mp_exp + v->_mp_exp - adj;\n      r->_mp_size = sign_product >= 0 ? rsize : -rsize;\n    }\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpf/mul_2exp.c",
    "content": "/* mpf_mul_2exp -- Multiply a float by 2^n.\n\nCopyright 1993, 1994, 1996, 2000, 2001, 2002, 2004 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Multiples of GMP_NUMB_BITS in exp simply mean an amount added to EXP(u)\n   to set EXP(r).  The remainder exp%GMP_NUMB_BITS is then a left shift for\n   the limb data.\n\n   If exp%GMP_NUMB_BITS == 0 then there's no shifting, we effectively just\n   do an mpz_set with changed EXP(r).  Like mpz_set we take prec+1 limbs in\n   this case.  Although just prec would suffice, it's nice to have\n   mpf_mul_2exp with exp==0 come out the same as mpz_set.\n\n   When shifting we take up to prec many limbs from the input.  Our shift is\n   cy = mpn_lshift (PTR(r), PTR(u)+k, size, ...), where k is the number of\n   low limbs dropped from u, and the carry out is stored to PTR(r)[size].\n\n   It may be noted that the low limb PTR(r)[0] doesn't incorporate bits from\n   PTR(u)[k-1] (when k>=1 makes that limb available).  Taking just prec\n   limbs from the input (with the high non-zero) is enough bits for the\n   application requested precision, there's no need for extra work.\n\n   If r==u the shift will have overlapping operands.  When k==0 (ie. when\n   usize <= prec), the overlap is supported by lshift (ie. dst == src).\n\n   But when r==u and k>=1 (ie. usize > prec), we would have an invalid\n   overlap (ie. mpn_lshift (rp, rp+k, ...)).  In this case we must instead\n   use mpn_rshift (PTR(r)+1, PTR(u)+k, size, NUMB-shift) with the carry out\n   stored to PTR(r)[0].  An rshift by NUMB-shift bits like this gives\n   identical data, it's just its overlap restrictions which differ.\n\n   Enhancements:\n\n   The way mpn_lshift is used means successive mpf_mul_2exp calls on the\n   same operand will accumulate low zero limbs, until prec+1 limbs is\n   reached.  This is wasteful for subsequent operations.  When abs_usize <=\n   prec, we should test the low exp%GMP_NUMB_BITS many bits of PTR(u)[0],\n   ie. those which would be shifted out by an mpn_rshift.  If they're zero\n   then use that mpn_rshift.  */\n\nvoid\nmpf_mul_2exp (mpf_ptr r, mpf_srcptr u, mp_bitcnt_t exp)\n{\n  mp_srcptr up;\n  mp_ptr rp = r->_mp_d;\n  mp_size_t usize;\n  mp_size_t abs_usize;\n  mp_size_t prec = r->_mp_prec;\n  mp_exp_t uexp = u->_mp_exp;\n\n  usize = u->_mp_size;\n\n  if (UNLIKELY (usize == 0))\n    {\n      r->_mp_size = 0;\n      r->_mp_exp = 0;\n      return;\n    }\n\n  abs_usize = ABS (usize);\n  up = u->_mp_d;\n\n  if (exp % GMP_NUMB_BITS == 0)\n    {\n      prec++;\t\t\t/* retain more precision here as we don't need\n\t\t\t\t   to account for carry-out here */\n      if (abs_usize > prec)\n\t{\n\t  up += abs_usize - prec;\n\t  abs_usize = prec;\n\t}\n      if (rp != up)\n\tMPN_COPY_INCR (rp, up, abs_usize);\n      r->_mp_exp = uexp + exp / GMP_NUMB_BITS;\n    }\n  else\n    {\n      mp_limb_t cy_limb;\n      mp_size_t adj;\n      if (abs_usize > prec)\n\t{\n\t  up += abs_usize - prec;\n\t  abs_usize = prec;\n\t  /* Use mpn_rshift since mpn_lshift operates downwards, and we\n\t     therefore would clobber part of U before using that part, in case\n\t     R is the same variable as U.  */\n\t  cy_limb = mpn_rshift (rp + 1, up, abs_usize,\n\t\t\t\tGMP_NUMB_BITS - exp % GMP_NUMB_BITS);\n\t  rp[0] = cy_limb;\n\t  adj = rp[abs_usize] != 0;\n\t}\n      else\n\t{\n\t  cy_limb = mpn_lshift (rp, up, abs_usize, exp % GMP_NUMB_BITS);\n\t  rp[abs_usize] = cy_limb;\n\t  adj = cy_limb != 0;\n\t}\n\n      abs_usize += adj;\n      r->_mp_exp = uexp + exp / GMP_NUMB_BITS + adj;\n    }\n  r->_mp_size = usize >= 0 ? abs_usize : -abs_usize;\n}\n"
  },
  {
    "path": "mpf/mul_ui.c",
    "content": "/* mpf_mul_ui -- Multiply a float and an unsigned integer.\n\nCopyright 1993, 1994, 1996, 2001, 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* The core operation is a multiply of PREC(r) limbs from u by v, producing\n   either PREC(r) or PREC(r)+1 result limbs.  If u is shorter than PREC(r),\n   then we take only as much as it has.  If u is longer we incorporate a\n   carry from the lower limbs.\n\n   If u has just 1 extra limb, then the carry to add is high(up[0]*v).  That\n   is of course what mpn_mul_1 would do if it was called with PREC(r)+1\n   limbs of input.\n\n   If u has more than 1 extra limb, then there can be a further carry bit\n   out of lower uncalculated limbs (the way the low of one product adds to\n   the high of the product below it).  This is of course what an mpn_mul_1\n   would do if it was called with the full u operand.  But we instead work\n   downwards explicitly, until a carry occurs or until a value other than\n   GMP_NUMB_MAX occurs (that being the only value a carry bit can propagate\n   across).\n\n   The carry determination normally requires two umul_ppmm's, only rarely\n   will GMP_NUMB_MAX occur and require further products.\n\n   The carry limb is conveniently added into the mul_1 using mpn_mul_1c when\n   that function exists, otherwise a subsequent mpn_add_1 is needed.\n\n   Clearly when mpn_mul_1c is used the carry must be calculated first.  But\n   this is also the case when add_1 is used, since if r==u and ABSIZ(r) >\n   PREC(r) then the mpn_mul_1 overwrites the low part of the input.\n\n   A reuse r==u with size > prec can occur from a size PREC(r)+1 in the\n   usual way, or it can occur from an mpf_set_prec_raw leaving a bigger\n   sized value.  In both cases we can end up calling mpn_mul_1 with\n   overlapping src and dst regions, but this will be with dst < src and such\n   an overlap is permitted.\n\n   Not done:\n\n   No attempt is made to determine in advance whether the result will be\n   PREC(r) or PREC(r)+1 limbs.  If it's going to be PREC(r)+1 then we could\n   take one less limb from u and generate just PREC(r), that of course\n   satisfying application requested precision.  But any test counting bits\n   or forming the high product would almost certainly take longer than the\n   incremental cost of an extra limb in mpn_mul_1.\n\n   Enhancements:\n\n   Repeated mpf_mul_ui's with an even v will accumulate low zero bits on the\n   result, leaving low zero limbs after a while, which it might be nice to\n   strip to save work in subsequent operations.  Calculating the low limb\n   explicitly would let us direct mpn_mul_1 to put the balance at rp when\n   the low is zero (instead of normally rp+1).  But it's not clear whether\n   this would be worthwhile.  Explicit code for the low limb will probably\n   be slower than having it done in mpn_mul_1, so we need to consider how\n   often a zero will be stripped and how much that's likely to save\n   later.  */\n\nvoid\nmpf_mul_ui (mpf_ptr r, mpf_srcptr u, mpir_ui v)\n{\n  mp_srcptr up;\n  mp_size_t usize;\n  mp_size_t size;\n  mp_size_t prec, excess;\n  mp_limb_t cy_limb, vl, cbit, cin;\n  mp_ptr rp;\n\n  usize = u->_mp_size;\n  if (UNLIKELY (v == 0) || UNLIKELY (usize == 0))\n    {\n      r->_mp_size = 0;\n      r->_mp_exp = 0;\n      return;\n    }\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (v > GMP_NUMB_MAX)\n    {\n      mpf_t     vf;\n      mp_limb_t vp[2];\n      vp[0] = v & GMP_NUMB_MASK;\n      vp[1] = v >> GMP_NUMB_BITS;\n      PTR(vf) = vp;\n      SIZ(vf) = 2;\n      ASSERT_CODE (PREC(vf) = 2);\n      EXP(vf) = 2;\n      mpf_mul (r, u, vf);\n      return;\n    }\n#endif\n\n  size = ABS (usize);\n  prec = r->_mp_prec;\n  up = u->_mp_d;\n  vl = v;\n  excess = size - prec;\n  cin = 0;\n\n  if (excess > 0)\n    {\n      /* up is bigger than desired rp, shorten it to prec limbs and\n         determine a carry-in */\n\n      mp_limb_t  vl_shifted = vl << GMP_NAIL_BITS;\n      mp_limb_t  hi, lo, next_lo, sum;\n      mp_size_t  i;\n\n      /* high limb of top product */\n      i = excess - 1;\n      umul_ppmm (cin, lo, up[i], vl_shifted);\n\n      /* and carry bit out of products below that, if any */\n      for (;;)\n        {\n          i--;\n          if (i < 0)\n            break;\n\n          umul_ppmm (hi, next_lo, up[i], vl_shifted);\n          lo >>= GMP_NAIL_BITS;\n          ADDC_LIMB (cbit, sum, hi, lo);\n          cin += cbit;\n          lo = next_lo;\n\n          /* Continue only if the sum is GMP_NUMB_MAX.  GMP_NUMB_MAX is the\n             only value a carry from below can propagate across.  If we've\n             just seen the carry out (ie. cbit!=0) then sum!=GMP_NUMB_MAX,\n             so this test stops us for that case too.  */\n          if (LIKELY (sum != GMP_NUMB_MAX))\n            break;\n        }\n\n      up += excess;\n      size = prec;\n    }\n\n  rp = r->_mp_d;\n#if HAVE_NATIVE_mpn_mul_1c\n  cy_limb = mpn_mul_1c (rp, up, size, vl, cin);\n#else\n  cy_limb = mpn_mul_1 (rp, up, size, vl);\n  __GMPN_ADD_1 (cbit, rp, rp, size, cin);\n  cy_limb += cbit;\n#endif\n  rp[size] = cy_limb;\n  cy_limb = cy_limb != 0;\n  r->_mp_exp = u->_mp_exp + cy_limb;\n  size += cy_limb;\n  r->_mp_size = usize >= 0 ? size : -size;\n}\n"
  },
  {
    "path": "mpf/neg.c",
    "content": "/* mpf_neg -- Negate a float.\n\nCopyright 1993, 1994, 1995, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_neg (mpf_ptr r, mpf_srcptr u)\n{\n  mp_size_t size;\n\n  size = -u->_mp_size;\n  if (r != u)\n    {\n      mp_size_t prec;\n      mp_size_t asize;\n      mp_ptr rp, up;\n\n      prec = r->_mp_prec + 1;\t/* lie not to lose precision in assignment */\n      asize = ABS (size);\n      rp = r->_mp_d;\n      up = u->_mp_d;\n\n      if (asize > prec)\n\t{\n\t  up += asize - prec;\n\t  asize = prec;\n\t}\n\n      MPN_COPY (rp, up, asize);\n      r->_mp_exp = u->_mp_exp;\n      size = size >= 0 ? asize : -asize;\n    }\n  r->_mp_size = size;\n}\n"
  },
  {
    "path": "mpf/out_str.c",
    "content": "/* mpf_out_str (stream, base, n_digits, op) -- Print N_DIGITS digits from\n   the float OP to STREAM in base BASE.  Return the number of characters\n   written, or 0 if an error occurred.\n\nCopyright 1996, 1997, 2001, 2002, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#ifndef _GNU_SOURCE\n#define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */\n#endif\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <string.h>\n\n#if HAVE_LANGINFO_H\n#include <langinfo.h>  /* for nl_langinfo */\n#endif\n\n#if HAVE_LOCALE_H\n#include <locale.h>    /* for localeconv */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nsize_t\nmpf_out_str (FILE *stream, int base, size_t n_digits, mpf_srcptr op)\n{\n  char *str;\n  mp_exp_t exp;\n  size_t written;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  if (base == 0)\n    base = 10;\n  if (n_digits == 0)\n    MPF_SIGNIFICANT_DIGITS (n_digits, base, op->_mp_prec);\n\n  if (stream == 0)\n    stream = stdout;\n\n  /* Consider these changes:\n     * Don't allocate memory here for huge n_digits; pass NULL to mpf_get_str.\n     * Make mpf_get_str allocate extra space when passed NULL, to avoid\n       allocating two huge string buffers.\n     * Implement more/other allocation reductions tricks.  */\n\n  str = (char *) TMP_ALLOC (n_digits + 2); /* extra for minus sign and \\0 */\n\n  mpf_get_str (str, &exp, base, n_digits, op);\n  n_digits = strlen (str);\n\n  written = 0;\n\n  /* Write sign */\n  if (str[0] == '-')\n    {\n      str++;\n      fputc ('-', stream);\n      written = 1;\n      n_digits--;\n    }\n\n  {\n    const char  *point = GMP_DECIMAL_POINT;\n    size_t      pointlen = strlen (point);\n    putc ('0', stream);\n    fwrite (point, 1, pointlen, stream);\n    written += pointlen + 1;\n  }\n\n  /* Write mantissa */\n  {\n    size_t fwret;\n    fwret = fwrite (str, 1, n_digits, stream);\n    written += fwret;\n  }\n\n  /* Write exponent */\n  {\n    int fpret;\n    fpret = fprintf (stream, (base <= 10 ? \"e%ld\" : \"@%ld\"), exp);\n    written += fpret;\n  }\n\n  TMP_FREE;\n  return ferror (stream) ? 0 : written;\n}\n"
  },
  {
    "path": "mpf/pow_ui.c",
    "content": "/* mpf_pow_ui -- Compute b^e.\n\nCopyright 1998, 1999, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_pow_ui (mpf_ptr r, mpf_srcptr b, mpir_ui e)\n{\n  mpf_t b2;\n  mpir_ui e2;\n\n  mpf_init2 (b2, mpf_get_prec (r));\n  mpf_set (b2, b);\n  mpf_set_ui (r, 1);\n\n  if ((e & 1) != 0)\n    mpf_set (r, b2);\n  for (e2 = e >> 1; e2 != 0; e2 >>= 1)\n    {\n      mpf_mul (b2, b2, b2);\n      if ((e2 & 1) != 0)\n\tmpf_mul (r, r, b2);\n    }\n\n  mpf_clear (b2);\n}\n"
  },
  {
    "path": "mpf/random2.c",
    "content": "/* mpf_random2 -- Generate a positive random mpf_t of specified size, with\n   long runs of consecutive ones and zeros in the binary representation.\n   Intended for testing of other MP routines.\n\nCopyright 1995, 1996, 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n// This function is Obsolete 13/09/2009\nvoid\nmpf_random2 (mpf_ptr x, mp_size_t xs, mp_exp_t exp)\n{\n  mp_size_t xn;\n  mp_size_t prec;\n  mp_limb_t elimb;\n\n  xn = ABS (xs);\n  prec = PREC(x);\n\n  if (xn == 0)\n    {\n      EXP(x) = 0;\n      SIZ(x) = 0;\n      return;\n    }\n\n  if (xn > prec + 1)\n    xn = prec + 1;\n\n  /* General random mantissa.  */\n  mpn_random2 (PTR(x), xn);\n\n  /* Generate random exponent.  */\n  _gmp_rand (&elimb, RANDS, GMP_NUMB_BITS);\n  exp = ABS (exp);\n  exp = elimb % (2 * exp + 1) - exp;\n\n  EXP(x) = exp;\n  SIZ(x) = xs < 0 ? -xn : xn;\n}\n"
  },
  {
    "path": "mpf/reldiff.c",
    "content": "/* mpf_reldiff -- Generate the relative difference of two floats.\n\nCopyright 1996, 2001, 2004, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* The precision we use for d = x-y is based on what mpf_div will want from\n   the dividend.  It calls mpn_tdiv_qr to produce a quotient of rprec+1\n   limbs.  So rprec+1 == dsize - xsize + 1, hence dprec = rprec+xsize.  */\n\nvoid\nmpf_reldiff (mpf_t rdiff, mpf_srcptr x, mpf_srcptr y)\n{\n  if (UNLIKELY (SIZ(x) == 0))\n    {\n      mpf_set_ui (rdiff, (mpir_ui) (mpf_sgn (y) != 0));\n    }\n  else\n    {\n      mp_size_t dprec;\n      mpf_t d;\n      TMP_DECL;\n\n      TMP_MARK;\n      dprec = PREC(rdiff) + ABSIZ(x);\n      ASSERT (PREC(rdiff)+1 == dprec - ABSIZ(x) + 1);\n\n      PREC(d) = dprec;\n      PTR(d) = TMP_ALLOC_LIMBS (dprec + 1);\n\n      mpf_sub (d, x, y);\n      SIZ(d) = ABSIZ(d);\n      mpf_div (rdiff, d, x);\n\n      TMP_FREE;\n    }\n}\n\n"
  },
  {
    "path": "mpf/rrandomb.c",
    "content": "/* mpf_rrandomb -- Generate a positive random mpf_t of specified size, with\n   long runs of consecutive ones and zeros in the binary representation.\n   Intended for testing of other MP routines.\n\nCopyright 1995, 1996, 2001, 2002, 2003 Free Software Foundation, Inc.\n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_rrandomb (mpf_ptr x, gmp_randstate_t rnd,mp_size_t xs, mp_exp_t exp)\n{\n  mp_size_t xn;\n  mp_size_t prec;\n  mp_limb_t elimb;\n\n  xn = ABS (xs);\n  prec = PREC(x);\n\n  if (xn == 0)\n    {\n      EXP(x) = 0;\n      SIZ(x) = 0;\n      return;\n    }\n\n  if (xn > prec + 1)\n    xn = prec + 1;\n\n  /* General random mantissa.  */\n  mpn_rrandom (PTR(x), rnd, xn);\n\n  /* Generate random exponent.  */\n  _gmp_rand (&elimb, rnd, GMP_NUMB_BITS);\n  exp = ABS (exp);\n  exp = elimb % (2 * exp + 1) - exp;\n\n  EXP(x) = exp;\n  SIZ(x) = xs < 0 ? -xn : xn;\n}\n"
  },
  {
    "path": "mpf/set.c",
    "content": "/* mpf_set -- Assign a float from another float.\n\nCopyright 1993, 1994, 1995, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_set (mpf_ptr r, mpf_srcptr u)\n{\n  mp_ptr rp, up;\n  mp_size_t size, asize;\n  mp_size_t prec;\n\n  prec = r->_mp_prec + 1;\t\t/* lie not to lose precision in assignment */\n  size = u->_mp_size;\n  asize = ABS (size);\n  rp = r->_mp_d;\n  up = u->_mp_d;\n\n  if (asize > prec)\n    {\n      up += asize - prec;\n      asize = prec;\n    }\n\n  r->_mp_exp = u->_mp_exp;\n  r->_mp_size = size >= 0 ? asize : -asize;\n  MPN_COPY_INCR (rp, up, asize);\n}\n"
  },
  {
    "path": "mpf/set_d.c",
    "content": "/* mpf_set_d -- Assign a float from a double.\n\nCopyright 1993, 1994, 1995, 1996, 2001, 2003, 2004 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_FLOAT_H\n#include <float.h>  /* for DBL_MAX */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_set_d (mpf_ptr r, double d)\n{\n  int negative;\n\n  DOUBLE_NAN_INF_ACTION (d,\n                         __gmp_invalid_operation (),\n                         __gmp_invalid_operation ());\n\n  if (UNLIKELY (d == 0))\n    {\n      SIZ(r) = 0;\n      EXP(r) = 0;\n      return;\n    }\n  negative = d < 0;\n  d = ABS (d);\n\n  SIZ(r) = negative ? -LIMBS_PER_DOUBLE : LIMBS_PER_DOUBLE;\n  EXP(r) = __gmp_extract_double (PTR(r), d);\n}\n"
  },
  {
    "path": "mpf/set_dfl_prec.c",
    "content": "/* mpf_set_default_prec --\n\nCopyright 1993, 1994, 1995, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmp_size_t __gmp_default_fp_limb_precision = __GMPF_BITS_TO_PREC (53);\n\nvoid\nmpf_set_default_prec (mp_bitcnt_t prec_in_bits)\n{\n  __gmp_default_fp_limb_precision = __GMPF_BITS_TO_PREC (prec_in_bits);\n}\n"
  },
  {
    "path": "mpf/set_prc.c",
    "content": "/* mpf_set_prec(x) -- Change the precision of x.\n\nCopyright 1993, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* A full new_prec+1 limbs are always retained, even though just new_prec\n   would satisfy the requested precision.  If size==new_prec+1 then\n   certainly new_prec+1 should be kept since no copying is needed in that\n   case.  If just new_prec was kept for size>new_prec+1 it'd be a bit\n   inconsistent.  */\n\nvoid\nmpf_set_prec (mpf_ptr x, mp_bitcnt_t new_prec_in_bits)\n{\n  mp_size_t  old_prec, new_prec, new_prec_plus1;\n  mp_size_t  size, sign;\n  mp_ptr     xp;\n\n  new_prec = __GMPF_BITS_TO_PREC (new_prec_in_bits);\n  old_prec = PREC(x);\n\n  /* do nothing if already the right precision */\n  if (new_prec == old_prec)\n    return;\n\n  PREC(x) = new_prec;\n  new_prec_plus1 = new_prec + 1;\n\n  /* retain most significant limbs */\n  sign = SIZ(x);\n  size = ABS (sign);\n  xp = PTR(x);\n  if (size > new_prec_plus1)\n    {\n      SIZ(x) = (sign >= 0 ? new_prec_plus1 : -new_prec_plus1);\n      MPN_COPY_INCR (xp, xp + size - new_prec_plus1, new_prec_plus1);\n    }\n\n  PTR(x) = __GMP_REALLOCATE_FUNC_LIMBS (xp, old_prec+1, new_prec_plus1);\n}\n"
  },
  {
    "path": "mpf/set_prc_raw.c",
    "content": "/* mpf_set_prec_raw(x,bits) -- Change the precision of x without changing\n   allocation.  For proper operation, the original precision need to be reset\n   sooner or later.\n\nCopyright 1996, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_set_prec_raw (mpf_ptr x, mp_bitcnt_t prec_in_bits)\n{\n  x->_mp_prec = __GMPF_BITS_TO_PREC (prec_in_bits);\n}\n"
  },
  {
    "path": "mpf/set_q.c",
    "content": "/* mpf_set_q (mpf_t rop, mpq_t op) -- Convert the rational op to the float rop.\n\nCopyright 1996, 1999, 2001, 2002, 2004, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>  /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* As usual the aim is to produce PREC(r) limbs, with the high non-zero.\n   The basic mpn_tdiv_qr produces a quotient of nsize-dsize+1 limbs, with\n   either the high or second highest limb non-zero.  We arrange for\n   nsize-dsize+1 to equal prec+1, hence giving either prec or prec+1 result\n   limbs at PTR(r).\n\n   nsize-dsize+1 == prec+1 is achieved by adjusting num(q), either dropping\n   low limbs if it's too big, or padding with low zeros if it's too small.\n   The full given den(q) is always used.\n\n   We cannot truncate den(q), because even when it's much bigger than prec\n   the last limbs can still influence the final quotient.  Often they don't,\n   but we leave optimization of that to a prospective quotient-only mpn\n   division.\n\n   Not done:\n\n   If den(q) is a power of 2 then we may end up with low zero limbs on the\n   result.  But nothing is done about this, since it should be unlikely on\n   random data, and can be left to an application to call mpf_div_2exp if it\n   might occur with any frequency.\n\n   Enhancements:\n\n   The high quotient limb is non-zero when high{np,dsize} >= {dp,dsize}.  We\n   could make that comparison and use qsize==prec instead of qsize==prec+1,\n   to save one limb in the division.\n\n   Future:\n\n   If/when mpn_tdiv_qr supports its qxn parameter we can use that instead of\n   padding n with zeros in temporary space.\n\n   If/when a quotient-only division exists it can be used here immediately.\n   remp is only to satisfy mpn_tdiv_qr, the remainder is not used.  */\n\nvoid\nmpf_set_q (mpf_t r, mpq_srcptr q)\n{\n  mp_srcptr np, dp;\n  mp_size_t prec, nsize, dsize, qsize, prospective_qsize, tsize, zeros;\n  mp_size_t sign_quotient, high_zero;\n  mp_ptr qp, tp, remp;\n  mp_exp_t exp;\n  TMP_DECL;\n\n  ASSERT (SIZ(&q->_mp_den) > 0);  /* canonical q */\n\n  nsize = SIZ (&q->_mp_num);\n  dsize = SIZ (&q->_mp_den);\n\n  if (UNLIKELY (nsize == 0))\n    {\n      SIZ (r) = 0;\n      EXP (r) = 0;\n      return;\n    }\n\n  TMP_MARK;\n\n  prec = PREC (r);\n  qp = PTR (r);\n\n  sign_quotient = nsize;\n  nsize = ABS (nsize);\n  np = PTR (&q->_mp_num);\n  dp = PTR (&q->_mp_den);\n\n  prospective_qsize = nsize - dsize + 1;  /* q from using given n,d sizes */\n  exp = prospective_qsize;                /* ie. number of integer limbs */\n  qsize = prec + 1;                       /* desired q */\n\n  zeros = qsize - prospective_qsize;   /* n zeros to get desired qsize */\n  tsize = nsize + zeros;               /* possible copy of n */\n\n  if (WANT_TMP_DEBUG)\n    {\n      /* separate alloc blocks, for malloc debugging */\n      remp = TMP_ALLOC_LIMBS (dsize);\n      tp = NULL;\n      if (zeros > 0)\n        tp = TMP_ALLOC_LIMBS (tsize);\n    }\n  else\n    {\n      /* one alloc with a conditionalized size, for efficiency */\n      mp_size_t size = dsize + (zeros > 0 ? tsize : 0);\n      remp = TMP_ALLOC_LIMBS (size);\n      tp = remp + dsize;\n    }\n\n  if (zeros > 0)\n    {\n      /* pad n with zeros into temporary space */\n      MPN_ZERO (tp, zeros);\n      MPN_COPY (tp+zeros, np, nsize);\n      np = tp;\n      nsize = tsize;\n    }\n  else\n    {\n      /* shorten n to get desired qsize */\n      nsize += zeros;\n      np -= zeros;\n    }\n\n  ASSERT (nsize-dsize+1 == qsize);\n  mpn_tdiv_qr (qp, remp, (mp_size_t) 0, np, nsize, dp, dsize);\n\n  /* strip possible zero high limb */\n  high_zero = (qp[qsize-1] == 0);\n  qsize -= high_zero;\n  exp -= high_zero;\n\n  EXP (r) = exp;\n  SIZ (r) = sign_quotient >= 0 ? qsize : -qsize;\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpf/set_si.c",
    "content": "/* mpf_set_si() -- Assign a float from a signed int.\n\nCopyright 1993, 1994, 1995, 2000, 2001, 2002, 2004 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_set_si (mpf_ptr dest, mpir_si val)\n{\n  mp_size_t size;\n  mp_limb_t vl;\n\n  vl = (mp_limb_t) (mpir_ui) (val >= 0 ? val : -val);\n\n  dest->_mp_d[0] = vl & GMP_NUMB_MASK;\n  size = vl != 0;\n\n#if BITS_PER_UI > GMP_NUMB_BITS\n  vl >>= GMP_NUMB_BITS;\n  dest->_mp_d[1] = vl;\n  size += (vl != 0);\n#endif\n\n  dest->_mp_exp = size;\n  dest->_mp_size = val >= 0 ? size : -size;\n}\n"
  },
  {
    "path": "mpf/set_str.c",
    "content": "/* mpf_set_str (dest, string, base) -- Convert the string STRING\n   in base BASE to a float in dest.  If BASE is zero, the leading characters\n   of STRING is used to figure out the base.\n\nCopyright 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2005 Free\nSoftware Foundation, Inc.\n\nCopyright 2009 B R Gladman\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/*\n  This still needs work, as suggested by some FIXME comments.\n  1. Don't depend on superfluous mantissa digits.\n  2. Allocate temp space more cleverly.\n  3. Use mpn_tdiv_qr instead of mpn_lshift+mpn_divrem.\n*/\n\n#ifndef _GNU_SOURCE\n#define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */\n#endif\n\n#include \"config.h\"\n\n#include <stdlib.h>\n#include <string.h>\n#include <ctype.h>\n\n#if HAVE_LANGINFO_H\n#include <langinfo.h>  /* for nl_langinfo */\n#endif\n\n#if HAVE_LOCALE_H\n#include <locale.h>    /* for localeconv */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nstatic mp_limb_t mpn_intdivrem (mp_ptr qp, mp_size_t qxn,\n\t    mp_ptr np, mp_size_t nn,\n\t    mp_srcptr dp, mp_size_t dn)\n{\n  ASSERT (qxn >= 0);\n  ASSERT (nn >= dn);\n  ASSERT (dn >= 1);\n  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);\n  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));\n  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, np, nn) || qp==np+dn+qxn);\n  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, dp, dn));\n  ASSERT_MPN (np, nn);\n  ASSERT_MPN (dp, dn);\n\n  if (dn == 1)\n    {\n      mp_limb_t ret;\n      mp_ptr q2p;\n      mp_size_t qn;\n      TMP_DECL;\n\n      TMP_MARK;\n      q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);\n\n      np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);\n      qn = nn + qxn - 1;\n      MPN_COPY (qp, q2p, qn);\n      ret = q2p[qn];\n\n      TMP_FREE;\n      return ret;\n    }\n  else if (dn == 2)\n    {\n      return mpn_divrem_2 (qp, qxn, np, nn, dp);\n    }\n  else\n    {\n      mp_ptr rp, q2p;\n      mp_limb_t qhl;\n      mp_size_t qn;\n      TMP_DECL;\n\n      TMP_MARK;\n      if (UNLIKELY (qxn != 0))\n\t{\n\t  mp_ptr n2p;\n\t  n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);\n\t  MPN_ZERO (n2p, qxn);\n\t  MPN_COPY (n2p + qxn, np, nn);\n\t  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB);\n\t  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);\n\t  mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);\n\t  MPN_COPY (np, rp, dn);\n\t  qn = nn - dn + qxn;\n\t  MPN_COPY (qp, q2p, qn);\n\t  qhl = q2p[qn];\n\t}\n      else\n\t{\n\t  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB);\n\t  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);\n\t  mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);\n\t  MPN_COPY (np, rp, dn);\t/* overwrite np area with remainder */\n\t  qn = nn - dn;\n\t  MPN_COPY (qp, q2p, qn);\n\t  qhl = q2p[qn];\n\t}\n      TMP_FREE;\n      return qhl;\n    }\n}\n\n\nextern const unsigned char __gmp_digit_value_tab[];\n#define digit_value_tab __gmp_digit_value_tab\n\n/* Compute base^exp and return the most significant prec limbs in rp[].\n   Put the count of omitted low limbs in *ign.\n   Return the actual size (which might be less than prec).  */\nstatic mp_size_t\nmpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp,\n\t\t    mp_limb_t base, mp_exp_t exp,\n\t\t    mp_size_t prec, mp_ptr tp)\n{\n  mp_size_t ign;\t\t/* counts number of ignored low limbs in r */\n  mp_size_t off;\t\t/* keeps track of offset where value starts */\n  mp_ptr passed_rp = rp;\n  mp_size_t rn;\n  int cnt;\n  int i;\n\n  rp[0] = base;\n  rn = 1;\n  off = 0;\n  ign = 0;\n  count_leading_zeros (cnt, exp);\n  for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--)\n    {\n      mpn_sqr (tp, rp + off, rn);\n      rn = 2 * rn;\n      rn -= tp[rn - 1] == 0;\n      ign <<= 1;\n\n      off = 0;\n      if (rn > prec)\n\t{\n\t  ign += rn - prec;\n\t  off = rn - prec;\n\t  rn = prec;\n\t}\n      MP_PTR_SWAP (rp, tp);\n\n      if (((exp >> i) & 1) != 0)\n\t{\n\t  mp_limb_t cy;\n\t  cy = mpn_mul_1 (rp, rp + off, rn, base);\n\t  rp[rn] = cy;\n\t  rn += cy != 0;\n\t  off = 0;\n\t}\n    }\n\n  if (rn > prec)\n    {\n      ign += rn - prec;\n      rp += rn - prec;\n      rn = prec;\n    }\n\n  MPN_COPY_INCR (passed_rp, rp + off, rn);\n  *ignp = ign;\n  return rn;\n}\n\nint\nmpf_set_str (mpf_ptr x, const char *str, int base)\n{\n  size_t str_size;\n  char *s, *begs;\n  size_t i, j;\n  int c;\n  int negative;\n  char *dotpos = 0;\n  const char *expptr;\n  int exp_base;\n  const char  *point = GMP_DECIMAL_POINT;\n  size_t      pointlen = strlen (point);\n  const unsigned char *digit_value;\n  TMP_DECL;\n\n  c = (unsigned char) *str;\n\n  /* Skip whitespace.  */\n  while (isspace (c))\n    c = (unsigned char) *++str;\n\n  negative = 0;\n  if (c == '-')\n    {\n      negative = 1;\n      c = (unsigned char) *++str;\n    }\n\n  digit_value = digit_value_tab;\n  exp_base = base;\n  if (base <= 0)\n    {\n      exp_base = 10;\n      base = base ? -base : 10;\n    }\n\n  if(base < 2 || base > 62)\n      return -1;\n  else if(base > 36)\n    {\n      /* For bases > 36, use the collating sequence\n\t 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */\n      digit_value += 224;\n    }\n\n  /* Require at least one digit, possibly after an initial decimal point.  */\n  if (digit_value[c] >= base)\n    {\n      /* not a digit, must be a decimal point */\n      for (i = 0; i < pointlen; i++)\n        if (str[i] != point[i])\n          return -1;\n      if (digit_value[(unsigned char) str[pointlen]] >= (base == 0 ? 10 : base))\n\treturn -1;\n    }\n\n  /* Locate exponent part of the input.  Look from the right of the string,\n     since the exponent is usually a lot shorter than the mantissa.  */\n  expptr = NULL;\n  str_size = strlen (str);\n  for (i = str_size - 1; i > 0; i--)\n    {\n      c = (unsigned char) str[i];\n      if (c == '@' || (base <= 10 && (c == 'e' || c == 'E')))\n\t{\n\t  expptr = str + i + 1;\n\t  str_size = i;\n\t  break;\n\t}\n    }\n\n  TMP_MARK;\n  s = begs = (char *) TMP_ALLOC (str_size + 1);\n\n  /* Loop through mantissa, converting it from ASCII to raw byte values.  */\n  for (i = 0; i < str_size; i++)\n    {\n      c = (unsigned char) *str;\n      if (!isspace (c))\n\t{\n\t  int dig;\n\n          for (j = 0; j < pointlen; j++)\n            if (str[j] != point[j])\n              goto not_point;\n          if (1)\n\t    {\n\t      if (dotpos != 0)\n\t\t{\n\t\t  /* already saw a decimal point, another is invalid */\n\t\t  TMP_FREE;\n\t\t  return -1;\n\t\t}\n\t      dotpos = s;\n\t      str += pointlen - 1;\n\t      i += pointlen - 1;\n\t    }\n\t  else\n\t    {\n            not_point:\n\t      dig = digit_value[c];\n\t      if (dig >= base)\n\t\t{\n\t\t  TMP_FREE;\n\t\t  return -1;\n\t\t}\n\t      *s++ = dig;\n\t    }\n\t}\n      c = (unsigned char) *++str;\n    }\n\n  str_size = s - begs;\n\n  {\n    long exp_in_base;\n    mp_size_t ra, ma, rn, mn;\n    int cnt;\n    mp_ptr mp, tp, rp;\n    mp_exp_t exp_in_limbs;\n    mp_size_t prec = PREC(x) + 1;\n    int divflag;\n    mp_size_t madj, radj;\n\n#if 0\n    size_t n_chars_needed;\n\n    /* This breaks things like 0.000...0001.  To safely ignore superfluous\n       digits, we need to skip over leadng zeros.  */\n    /* Just consider the relevant leading digits of the mantissa.  */\n    n_chars_needed = 2 + (size_t)\n      (((size_t) prec * GMP_NUMB_BITS) * mp_bases[base].chars_per_bit_exactly);\n    if (str_size > n_chars_needed)\n      str_size = n_chars_needed;\n#endif\n\n    ma = (mp_size_t) (str_size / mp_bases[base].chars_per_bit_exactly);\n    mp = TMP_ALLOC_LIMBS (ma / GMP_NUMB_BITS + 2);\n    mn = mpn_set_str (mp, (unsigned char *) begs, str_size, base);\n\n    if (mn == 0)\n      {\n\tSIZ(x) = 0;\n\tEXP(x) = 0;\n\tTMP_FREE;\n\treturn 0;\n      }\n\n    madj = 0;\n    /* Ignore excess limbs in MP,MSIZE.  */\n    if (mn > prec)\n      {\n\tmadj = mn - prec;\n\tmp += mn - prec;\n\tmn = prec;\n      }\n\n    exp_in_base = 0;\n    if (expptr != 0)\n    {   char sgn = '+';\n        int digit = 0, cnt = -1;\n        \n        if(*expptr == '+' || *expptr == '-')\n            sgn = *expptr++;\n\n        do\n        {\n            exp_in_base = exp_in_base * exp_base + digit;\n            digit = digit_value[*(unsigned char*)expptr++];\n            cnt++;\n        }\n        while\n            (digit < exp_base);\n\n        if(!cnt)\n        {\n            TMP_FREE;\n            return -1;\n        }\n\n        if(sgn == '-')\n            exp_in_base = -exp_in_base;\n    }\n\n    if (dotpos != 0)\n      exp_in_base -= s - dotpos;\n    divflag = exp_in_base < 0;\n    exp_in_base = ABS (exp_in_base);\n\n    if (exp_in_base == 0)\n      {\n\tMPN_COPY (PTR(x), mp, mn);\n\tSIZ(x) = negative ? -mn : mn;\n\tEXP(x) = mn + madj;\n\tTMP_FREE;\n\treturn 0;\n      }\n\n    ra = 2 * (prec + 1);\n    rp = TMP_ALLOC_LIMBS (ra);\n    tp = TMP_ALLOC_LIMBS (ra);\n    rn = mpn_pow_1_highpart (rp, &radj, (mp_limb_t) base, exp_in_base, prec, tp);\n\n    if (divflag)\n      {\n#if 0\n\t/* FIXME: Should use mpn_tdiv here.  */\n\tmpn_tdiv_qr (qp, mp, 0L, mp, mn, rp, rn);\n#else\n\tmp_ptr qp;\n\tmp_limb_t qlimb;\n\tif (mn < rn)\n\t  {\n\t    /* Pad out MP,MSIZE for current divrem semantics.  */\n\t    mp_ptr tmp = TMP_ALLOC_LIMBS (rn + 1);\n\t    MPN_ZERO (tmp, rn - mn);\n\t    MPN_COPY (tmp + rn - mn, mp, mn);\n\t    mp = tmp;\n\t    madj -= rn - mn;\n\t    mn = rn;\n\t  }\n\tif ((rp[rn - 1] & GMP_NUMB_HIGHBIT) == 0)\n\t  {\n\t    mp_limb_t cy;\n\t    count_leading_zeros (cnt, rp[rn - 1]);\n\t    cnt -= GMP_NAIL_BITS;\n\t    mpn_lshift (rp, rp, rn, cnt);\n\t    cy = mpn_lshift (mp, mp, mn, cnt);\n\t    if (cy)\n\t      mp[mn++] = cy;\n\t  }\n\n\tqp = TMP_ALLOC_LIMBS (prec + 1);\n\tqlimb = mpn_intdivrem (qp, prec - (mn - rn), mp, mn, rp, rn);\n\ttp = qp;\n\texp_in_limbs = qlimb + (mn - rn) + (madj - radj);\n\trn = prec;\n\tif (qlimb != 0)\n\t  {\n\t    tp[prec] = qlimb;\n\t    /* Skip the least significant limb not to overrun the destination\n\t       variable.  */\n\t    tp++;\n\t  }\n#endif\n      }\n    else\n      {\n\ttp = TMP_ALLOC_LIMBS (rn + mn);\n\tif (rn > mn)\n\t  mpn_mul (tp, rp, rn, mp, mn);\n\telse\n\t  mpn_mul (tp, mp, mn, rp, rn);\n\trn += mn;\n\trn -= tp[rn - 1] == 0;\n\texp_in_limbs = rn + madj + radj;\n\n\tif (rn > prec)\n\t  {\n\t    tp += rn - prec;\n\t    rn = prec;\n\t    exp_in_limbs += 0;\n\t  }\n      }\n\n    MPN_COPY (PTR(x), tp, rn);\n    SIZ(x) = negative ? -rn : rn;\n    EXP(x) = exp_in_limbs;\n    TMP_FREE;\n    return 0;\n  }\n}\n"
  },
  {
    "path": "mpf/set_ui.c",
    "content": "/* mpf_set_ui() -- Assign a float from an unsigned int.\n\nCopyright 1993, 1994, 1995, 2001, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_set_ui (mpf_ptr f, mpir_ui val)\n{\n  mp_size_t size;\n\n  f->_mp_d[0] = val & GMP_NUMB_MASK;\n  size = val != 0;\n\n#if BITS_PER_UI > GMP_NUMB_BITS\n  val >>= GMP_NUMB_BITS;\n  f->_mp_d[1] = val;\n  size += (val != 0);\n#endif\n\n  f->_mp_exp = f->_mp_size = size;\n}\n"
  },
  {
    "path": "mpf/set_z.c",
    "content": "/* mpf_set_z -- Assign a float from an integer.\n\nCopyright 1996, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_set_z (mpf_ptr r, mpz_srcptr u)\n{\n  mp_ptr rp, up;\n  mp_size_t size, asize;\n  mp_size_t prec;\n\n  prec = PREC (r) + 1;\n  size = SIZ (u);\n  asize = ABS (size);\n  rp = PTR (r);\n  up = PTR (u);\n\n  EXP (r) = asize;\n\n  if (asize > prec)\n    {\n      up += asize - prec;\n      asize = prec;\n    }\n\n  SIZ (r) = size >= 0 ? asize : -asize;\n  MPN_COPY (rp, up, asize);\n}\n"
  },
  {
    "path": "mpf/size.c",
    "content": "/* mpf_size(x) -- return the number of limbs currently used by the\n   value of the float X.\n\nCopyright 1993, 1994, 1995, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nsize_t\nmpf_size (mpf_srcptr f)\n{\n  return __GMP_ABS (f->_mp_size);\n}\n"
  },
  {
    "path": "mpf/sqrt.c",
    "content": "/* mpf_sqrt -- Compute the square root of a float.\n\nCopyright 1993, 1994, 1996, 2000, 2001, 2004, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h> /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* As usual, the aim is to produce PREC(r) limbs of result, with the high\n   limb non-zero.  This is accomplished by applying mpn_sqrtrem to either\n   2*prec or 2*prec-1 limbs, both such sizes resulting in prec limbs.\n\n   The choice between 2*prec or 2*prec-1 limbs is based on the input\n   exponent.  With b=2^GMP_NUMB_BITS the limb base then we can think of\n   effectively taking out a factor b^(2k), for suitable k, to get to an\n   integer input of the desired size ready for mpn_sqrtrem.  It must be an\n   even power taken out, ie. an even number of limbs, so the square root\n   gives factor b^k and the radix point is still on a limb boundary.  So if\n   EXP(r) is even we'll get an even number of input limbs 2*prec, or if\n   EXP(r) is odd we get an odd number 2*prec-1.\n\n   Further limbs below the 2*prec or 2*prec-1 used don't affect the result\n   and are simply truncated.  This can be seen by considering an integer x,\n   with s=floor(sqrt(x)).  s is the unique integer satisfying s^2 <= x <\n   (s+1)^2.  Notice that adding a fraction part to x (ie. some further bits)\n   doesn't change the inequality, s remains the unique solution.  Working\n   suitable factors of 2 into this argument lets it apply to an intended\n   precision at any position for any x, not just the integer binary point.\n\n   If the input is smaller than 2*prec or 2*prec-1, then we just pad with\n   zeros, that of course being our usual interpretation of short inputs.\n   The effect is to extend the root beyond the size of the input (for\n   instance into fractional limbs if u is an integer).  */\n\nvoid\nmpf_sqrt (mpf_ptr r, mpf_srcptr u)\n{\n  mp_size_t usize;\n  mp_ptr up, tp;\n  mp_size_t prec, tsize;\n  mp_exp_t uexp, expodd;\n  TMP_DECL;\n\n  usize = u->_mp_size;\n  if (usize <= 0)\n    {\n      if (usize < 0)\n        SQRT_OF_NEGATIVE;\n      r->_mp_size = 0;\n      r->_mp_exp = 0;\n      return;\n    }\n\n  TMP_MARK;\n\n  uexp = u->_mp_exp;\n  prec = r->_mp_prec;\n  up = u->_mp_d;\n\n  expodd = (uexp & 1);\n  tsize = 2 * prec - expodd;\n  r->_mp_size = prec;\n  r->_mp_exp = (uexp + expodd) / 2;    /* ceil(uexp/2) */\n\n  /* root size is ceil(tsize/2), this will be our desired \"prec\" limbs */\n  ASSERT ((tsize + 1) / 2 == prec);\n\n  tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);\n\n  if (usize > tsize)\n    {\n      up += usize - tsize;\n      usize = tsize;\n      MPN_COPY (tp, up, tsize);\n    }\n  else\n    {\n      MPN_ZERO (tp, tsize - usize);\n      MPN_COPY (tp + (tsize - usize), up, usize);\n    }\n\n  mpn_sqrtrem (r->_mp_d, NULL, tp, tsize);\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpf/sqrt_ui.c",
    "content": "/* mpf_sqrt_ui -- Compute the square root of an unsigned integer.\n\nCopyright 1993, 1994, 1996, 2000, 2001, 2004, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h> /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* As usual the aim is to produce PREC(r) limbs of result with the high limb\n   non-zero.  That high limb will end up floor(sqrt(u)), and limbs below are\n   produced by padding the input with zeros, two for each desired result\n   limb, being 2*(prec-1) for a total 2*prec-1 limbs passed to mpn_sqrtrem.\n   The way mpn_sqrtrem calculates floor(sqrt(x)) ensures the root is correct\n   to the intended accuracy, ie. truncated to prec limbs.\n\n   With nails, u might be two limbs, in which case a total 2*prec limbs is\n   passed to mpn_sqrtrem (still giving a prec limb result).  If uhigh is\n   zero we adjust back to 2*prec-1, since mpn_sqrtrem requires the high\n   non-zero.  2*prec limbs are always allocated, even when uhigh is zero, so\n   the store of uhigh can be done without a conditional.\n\n   u==0 is a special case so the rest of the code can assume the result is\n   non-zero (ie. will have a non-zero high limb on the result).\n\n   Not done:\n\n   No attempt is made to identify perfect squares.  It's considered this can\n   be left to an application if it might occur with any frequency.  As it\n   stands, mpn_sqrtrem does its normal amount of work on a perfect square\n   followed by zero limbs, though of course only an mpn_sqrtrem1 would be\n   actually needed.  We also end up leaving our mpf result with lots of low\n   trailing zeros, slowing down subsequent operations.\n\n   We're not aware of any optimizations that can be made using the fact the\n   input has lots of trailing zeros (apart from the perfect square\n   case).  */\n\n\n/* 1 if we (might) need two limbs for u */\n#define U2   (GMP_NUMB_BITS < BITS_PER_UI)\n\nvoid\nmpf_sqrt_ui (mpf_ptr r, mpir_ui u)\n{\n  mp_size_t rsize, zeros;\n  mp_ptr tp;\n  mp_size_t prec;\n  TMP_DECL;\n\n  if (UNLIKELY (u == 0))\n    {\n      r->_mp_size = 0;\n      r->_mp_exp = 0;\n      return;\n    }\n\n  TMP_MARK;\n\n  prec = r->_mp_prec;\n  zeros = 2 * prec - 2;\n  rsize = zeros + 1 + U2;\n\n  tp = (mp_ptr) TMP_ALLOC (rsize * BYTES_PER_MP_LIMB);\n\n  MPN_ZERO (tp, zeros);\n  tp[zeros] = u & GMP_NUMB_MASK;\n\n#if U2\n  {\n    mp_limb_t uhigh = u >> GMP_NUMB_BITS;\n    tp[zeros + 1] = uhigh;\n    rsize -= (uhigh == 0);\n  }\n#endif\n\n  mpn_sqrtrem (r->_mp_d, NULL, tp, rsize);\n\n  r->_mp_size = prec;\n  r->_mp_exp = 1;\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpf/sub.c",
    "content": "/* mpf_sub -- Subtract two floats.\n\nCopyright 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005 Free\nSoftware Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_sub (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)\n{\n  mp_srcptr up, vp;\n  mp_ptr rp, tp;\n  mp_size_t usize, vsize, rsize;\n  mp_size_t prec;\n  mp_exp_t exp;\n  mp_size_t ediff;\n  int negate;\n  TMP_DECL;\n\n  usize = u->_mp_size;\n  vsize = v->_mp_size;\n\n  /* Handle special cases that don't work in generic code below.  */\n  if (usize == 0)\n    {\n      mpf_neg (r, v);\n      return;\n    }\n  if (vsize == 0)\n    {\n      if (r != u)\n        mpf_set (r, u);\n      return;\n    }\n\n  /* If signs of U and V are different, perform addition.  */\n  if ((usize ^ vsize) < 0)\n    {\n      __mpf_struct v_negated;\n      v_negated._mp_size = -vsize;\n      v_negated._mp_exp = v->_mp_exp;\n      v_negated._mp_d = v->_mp_d;\n      mpf_add (r, u, &v_negated);\n      return;\n    }\n\n  TMP_MARK;\n\n  /* Signs are now known to be the same.  */\n  negate = usize < 0;\n\n  /* Make U be the operand with the largest exponent.  */\n  if (u->_mp_exp < v->_mp_exp)\n    {\n      mpf_srcptr t;\n      t = u; u = v; v = t;\n      negate ^= 1;\n      usize = u->_mp_size;\n      vsize = v->_mp_size;\n    }\n\n  usize = ABS (usize);\n  vsize = ABS (vsize);\n  up = u->_mp_d;\n  vp = v->_mp_d;\n  rp = r->_mp_d;\n  prec = r->_mp_prec + 1;\n  exp = u->_mp_exp;\n  ediff = u->_mp_exp - v->_mp_exp;\n\n  /* If ediff is 0 or 1, we might have a situation where the operands are\n     extremely close.  We need to scan the operands from the most significant\n     end ignore the initial parts that are equal.  */\n  if (ediff <= 1)\n    {\n      if (ediff == 0)\n\t{\n\t  /* Skip leading limbs in U and V that are equal.  */\n\t  if (up[usize - 1] == vp[vsize - 1])\n\t    {\n\t      /* This loop normally exits immediately.  Optimize for that.  */\n\t      do\n\t\t{\n\t\t  usize--;\n\t\t  vsize--;\n\t\t  exp--;\n\n\t\t  if (usize == 0)\n\t\t    {\n                      /* u cancels high limbs of v, result is rest of v */\n\t\t      negate ^= 1;\n                    cancellation:\n                      /* strip high zeros before truncating to prec */\n                      while (vsize != 0 && vp[vsize - 1] == 0)\n                        {\n                          vsize--;\n                          exp--;\n                        }\n\t\t      if (vsize > prec)\n\t\t\t{\n\t\t\t  vp += vsize - prec;\n\t\t\t  vsize = prec;\n\t\t\t}\n                      MPN_COPY_INCR (rp, vp, vsize);\n                      rsize = vsize;\n                      goto done;\n\t\t    }\n\t\t  if (vsize == 0)\n\t\t    {\n                      vp = up;\n                      vsize = usize;\n                      goto cancellation;\n\t\t    }\n\t\t}\n\t      while (up[usize - 1] == vp[vsize - 1]);\n\t    }\n\n\t  if (up[usize - 1] < vp[vsize - 1])\n\t    {\n\t      /* For simplicity, swap U and V.  Note that since the loop above\n\t\t wouldn't have exited unless up[usize - 1] and vp[vsize - 1]\n\t\t were non-equal, this if-statement catches all cases where U\n\t\t is smaller than V.  */\n\t      MPN_SRCPTR_SWAP (up,usize, vp,vsize);\n\t      negate ^= 1;\n\t      /* negating ediff not necessary since it is 0.  */\n\t    }\n\n\t  /* Check for\n\t     x+1 00000000 ...\n\t      x  ffffffff ... */\n\t  if (up[usize - 1] != vp[vsize - 1] + 1)\n\t    goto general_case;\n\t  usize--;\n\t  vsize--;\n\t  exp--;\n\t}\n      else /* ediff == 1 */\n\t{\n\t  /* Check for\n\t     1 00000000 ...\n\t     0 ffffffff ... */\n\n\t  if (up[usize - 1] != 1 || vp[vsize - 1] != GMP_NUMB_MAX\n\t      || (usize >= 2 && up[usize - 2] != 0))\n\t    goto general_case;\n\n\t  usize--;\n\t  exp--;\n\t}\n\n      /* Skip sequences of 00000000/ffffffff */\n      while (vsize != 0 && usize != 0 && up[usize - 1] == 0\n\t     && vp[vsize - 1] == GMP_NUMB_MAX)\n\t{\n\t  usize--;\n\t  vsize--;\n\t  exp--;\n\t}\n\n      if (usize == 0)\n\t{\n\t  while (vsize != 0 && vp[vsize - 1] == GMP_NUMB_MAX)\n\t    {\n\t      vsize--;\n\t      exp--;\n\t    }\n\t}\n\n      if (usize > prec - 1)\n\t{\n\t  up += usize - (prec - 1);\n\t  usize = prec - 1;\n\t}\n      if (vsize > prec - 1)\n\t{\n\t  vp += vsize - (prec - 1);\n\t  vsize = prec - 1;\n\t}\n\n      tp = (mp_ptr) TMP_ALLOC (prec * BYTES_PER_MP_LIMB);\n      {\n\tmp_limb_t cy_limb;\n\tif (vsize == 0)\n\t  {\n\t    mp_size_t size, i;\n\t    size = usize;\n\t    for (i = 0; i < size; i++)\n\t      tp[i] = up[i];\n\t    tp[size] = 1;\n\t    rsize = size + 1;\n\t    exp++;\n\t    goto normalize;\n\t  }\n\tif (usize == 0)\n\t  {\n\t    mp_size_t size, i;\n\t    size = vsize;\n\t    for (i = 0; i < size; i++)\n\t      tp[i] = ~vp[i] & GMP_NUMB_MASK;\n\t    cy_limb = 1 - mpn_add_1 (tp, tp, vsize, (mp_limb_t) 1);\n\t    rsize = vsize;\n\t    if (cy_limb == 0)\n\t      {\n\t\ttp[rsize] = 1;\n\t\trsize++;\n\t\texp++;\n\t      }\n\t    goto normalize;\n\t  }\n\tif (usize >= vsize)\n\t  {\n\t    /* uuuu     */\n\t    /* vv       */\n\t    mp_size_t size;\n\t    size = usize - vsize;\n\t    MPN_COPY (tp, up, size);\n\t    cy_limb = mpn_sub_n (tp + size, up + size, vp, vsize);\n\t    rsize = usize;\n\t  }\n\telse /* (usize < vsize) */\n\t  {\n\t    /* uuuu     */\n\t    /* vvvvvvv  */\n\t    mp_size_t size, i;\n\t    size = vsize - usize;\n\t    for (i = 0; i < size; i++)\n\t      tp[i] = ~vp[i] & GMP_NUMB_MASK;\n\t    cy_limb = mpn_sub_n (tp + size, up, vp + size, usize);\n\t    cy_limb+= mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);\n\t    cy_limb-= mpn_add_1 (tp, tp, vsize, (mp_limb_t) 1);\n\t    rsize = vsize;\n\t  }\n\tif (cy_limb == 0)\n\t  {\n\t    tp[rsize] = 1;\n\t    rsize++;\n\t    exp++;\n\t  }\n\tgoto normalize;\n      }\n    }\n\ngeneral_case:\n  /* If U extends beyond PREC, ignore the part that does.  */\n  if (usize > prec)\n    {\n      up += usize - prec;\n      usize = prec;\n    }\n\n  /* If V extends beyond PREC, ignore the part that does.\n     Note that this may make vsize negative.  */\n  if (vsize + ediff > prec)\n    {\n      vp += vsize + ediff - prec;\n      vsize = prec - ediff;\n    }\n\n  /* Allocate temp space for the result.  Allocate\n     just vsize + ediff later???  */\n  tp = (mp_ptr) TMP_ALLOC (prec * BYTES_PER_MP_LIMB);\n\n  if (ediff >= prec)\n    {\n      /* V completely cancelled.  */\n      if (tp != up)\n\tMPN_COPY (rp, up, usize);\n      rsize = usize;\n    }\n  else\n    {\n      /* Locate the least significant non-zero limb in (the needed\n\t parts of) U and V, to simplify the code below.  */\n      for (;;)\n\t{\n\t  if (vsize == 0)\n\t    {\n\t      MPN_COPY (rp, up, usize);\n\t      rsize = usize;\n\t      goto done;\n\t    }\n\t  if (vp[0] != 0)\n\t    break;\n\t  vp++, vsize--;\n\t}\n      for (;;)\n\t{\n\t  if (usize == 0)\n\t    {\n\t      MPN_COPY (rp, vp, vsize);\n\t      rsize = vsize;\n\t      negate ^= 1;\n\t      goto done;\n\t    }\n\t  if (up[0] != 0)\n\t    break;\n\t  up++, usize--;\n\t}\n\n      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */\n      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */\n\n      if (usize > ediff)\n\t{\n\t  /* U and V partially overlaps.  */\n\t  if (ediff == 0)\n\t    {\n\t      /* Have to compare the leading limbs of u and v\n\t\t to determine whether to compute u - v or v - u.  */\n\t      if (usize >= vsize)\n\t\t{\n\t\t  /* uuuu     */\n\t\t  /* vv       */\n\t\t  mp_size_t size;\n\t\t  size = usize - vsize;\n\t\t  MPN_COPY (tp, up, size);\n\t\t  mpn_sub_n (tp + size, up + size, vp, vsize);\n\t\t  rsize = usize;\n\t\t}\n\t      else /* (usize < vsize) */\n\t\t{\n\t\t  /* uuuu     */\n\t\t  /* vvvvvvv  */\n\t\t  mp_size_t size, i;\n\t\t  size = vsize - usize;\n\t\t  tp[0] = -vp[0] & GMP_NUMB_MASK;\n\t\t  for (i = 1; i < size; i++)\n\t\t    tp[i] = ~vp[i] & GMP_NUMB_MASK;\n\t\t  mpn_sub_n (tp + size, up, vp + size, usize);\n\t\t  mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);\n\t\t  rsize = vsize;\n\t\t}\n\t    }\n\t  else\n\t    {\n\t      if (vsize + ediff <= usize)\n\t\t{\n\t\t  /* uuuu     */\n\t\t  /*   v      */\n\t\t  mp_size_t size;\n\t\t  size = usize - ediff - vsize;\n\t\t  MPN_COPY (tp, up, size);\n\t\t  mpn_sub (tp + size, up + size, usize - size, vp, vsize);\n\t\t  rsize = usize;\n\t\t}\n\t      else\n\t\t{\n\t\t  /* uuuu     */\n\t\t  /*   vvvvv  */\n\t\t  mp_size_t size, i;\n\t\t  size = vsize + ediff - usize;\n\t\t  tp[0] = -vp[0] & GMP_NUMB_MASK;\n\t\t  for (i = 1; i < size; i++)\n\t\t    tp[i] = ~vp[i] & GMP_NUMB_MASK;\n\t\t  mpn_sub (tp + size, up, usize, vp + size, usize - ediff);\n\t\t  mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);\n\t\t  rsize = vsize + ediff;\n\t\t}\n\t    }\n\t}\n      else\n\t{\n\t  /* uuuu     */\n\t  /*      vv  */\n\t  mp_size_t size, i;\n\t  size = vsize + ediff - usize;\n\t  tp[0] = -vp[0] & GMP_NUMB_MASK;\n\t  for (i = 1; i < vsize; i++)\n\t    tp[i] = ~vp[i] & GMP_NUMB_MASK;\n\t  for (i = vsize; i < size; i++)\n\t    tp[i] = GMP_NUMB_MAX;\n\t  mpn_sub_1 (tp + size, up, usize, (mp_limb_t) 1);\n\t  rsize = size + usize;\n\t}\n\n    normalize:\n      /* Full normalize.  Optimize later.  */\n      while (rsize != 0 && tp[rsize - 1] == 0)\n\t{\n\t  rsize--;\n\t  exp--;\n\t}\n      MPN_COPY (rp, tp, rsize);\n    }\n\n done:\n  r->_mp_size = negate ? -rsize : rsize;\n  if (rsize == 0)\n    exp = 0;\n  r->_mp_exp = exp;\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpf/sub_ui.c",
    "content": "/* mpf_sub_ui -- Subtract an unsigned integer from a float.\n\nCopyright 1993, 1994, 1996, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_sub_ui (mpf_ptr sum, mpf_srcptr u, mpir_ui v)\n{\n  __mpf_struct vv;\n  mp_limb_t vl;\n\n  if (v == 0)\n    {\n      mpf_set (sum, u);\n      return;\n    }\n\n  vl = v;\n  vv._mp_size = 1;\n  vv._mp_d = &vl;\n  vv._mp_exp = 1;\n  mpf_sub (sum, u, &vv);\n}\n"
  },
  {
    "path": "mpf/swap.c",
    "content": "/* mpf_swap (U, V) -- Swap U and V.\n\nCopyright 1997, 1998, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_swap (mpf_ptr u, mpf_ptr v)\n{\n  mp_ptr up, vp;\n  mp_size_t usize, vsize;\n  mp_size_t uprec, vprec;\n  mp_exp_t  uexp, vexp;\n\n  uprec = u->_mp_prec;\n  vprec = v->_mp_prec;\n  v->_mp_prec = uprec;\n  u->_mp_prec = vprec;\n\n  usize = u->_mp_size;\n  vsize = v->_mp_size;\n  v->_mp_size = usize;\n  u->_mp_size = vsize;\n\n  uexp = u->_mp_exp;\n  vexp = v->_mp_exp;\n  v->_mp_exp = uexp;\n  u->_mp_exp = vexp;\n\n  up = u->_mp_d;\n  vp = v->_mp_d;\n  v->_mp_d = up;\n  u->_mp_d = vp;\n}\n"
  },
  {
    "path": "mpf/trunc.c",
    "content": "/* mpf_trunc -- truncate an mpf to an integer.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Notice the use of prec+1 ensures mpf_trunc is equivalent to mpf_set if u\n   is already an integer.  */\n\nvoid\nmpf_trunc (mpf_ptr r, mpf_srcptr u)\n{\n  mp_ptr     rp;\n  mp_srcptr  up;\n  mp_size_t  size, asize, prec;\n  mp_exp_t   exp;\n\n  exp = EXP(u);\n  size = SIZ(u);\n  if (size == 0 || exp <= 0)\n    {\n      /* u is only a fraction */\n      SIZ(r) = 0;\n      EXP(r) = 0;\n      return;\n    }\n\n  up = PTR(u);\n  EXP(r) = exp;\n  asize = ABS (size);\n  up += asize;\n\n  /* skip fraction part of u */\n  asize = MIN (asize, exp);\n\n  /* don't lose precision in the copy */\n  prec = PREC(r) + 1;\n\n  /* skip excess over target precision */\n  asize = MIN (asize, prec);\n\n  up -= asize;\n  rp = PTR(r);\n  SIZ(r) = (size >= 0 ? asize : -asize);\n  if (rp != up)\n    MPN_COPY_INCR (rp, up, asize);\n}\n"
  },
  {
    "path": "mpf/ui_div.c",
    "content": "/* mpf_ui_div -- Divide an unsigned integer with a float.\n\nCopyright 1993, 1994, 1995, 1996, 2000, 2001, 2002, 2004, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>  /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\nvoid\nmpf_ui_div (mpf_ptr r, mpir_ui u, mpf_srcptr v)\n{\n  mp_srcptr vp;\n  mp_ptr rp, tp, remp, new_vp;\n  mp_size_t vsize;\n  mp_size_t rsize, prospective_rsize, zeros, tsize, high_zero;\n  mp_size_t sign_quotient;\n  mp_size_t prec;\n  mp_exp_t rexp;\n  TMP_DECL;\n\n  vsize = v->_mp_size;\n  sign_quotient = vsize;\n  vsize = ABS (vsize);\n  prec = r->_mp_prec;\n\n  if (UNLIKELY (vsize == 0))\n    DIVIDE_BY_ZERO;\n\n  if (UNLIKELY (u == 0))\n    {\n      r->_mp_size = 0;\n      r->_mp_exp = 0;\n      return;\n    }\n\n  TMP_MARK;\n  rexp = 1 - v->_mp_exp + 1;\n\n  rp = r->_mp_d;\n  vp = v->_mp_d;\n\n  prospective_rsize = 1 - vsize + 1;    /* quot from using given u,v sizes */\n  rsize = prec + 1;                     /* desired quot size */\n\n  zeros = rsize - prospective_rsize;    /* padding u to give rsize */\n  tsize = 1 + zeros;                    /* u with zeros */\n\n  if (WANT_TMP_DEBUG)\n    {\n      /* separate alloc blocks, for malloc debugging */\n      remp = TMP_ALLOC_LIMBS (vsize);\n      tp = TMP_ALLOC_LIMBS (tsize);\n      new_vp = NULL;\n      if (rp == vp)\n        new_vp = TMP_ALLOC_LIMBS (vsize);\n    }\n  else\n    {\n      /* one alloc with calculated size, for efficiency */\n      mp_size_t size = vsize + tsize + (rp == vp ? vsize : 0);\n      remp = TMP_ALLOC_LIMBS (size);\n      tp = remp + vsize;\n      new_vp = tp + tsize;\n    }\n\n  /* ensure divisor doesn't overlap quotient */\n  if (rp == vp)\n    {\n      MPN_COPY (new_vp, vp, vsize);\n      vp = new_vp;\n    }\n\n  MPN_ZERO (tp, tsize-1);\n\n  tp[tsize - 1] = u & GMP_NUMB_MASK;\n#if BITS_PER_UI > GMP_NUMB_BITS\n  if (u > GMP_NUMB_MAX)\n    {\n      /* tsize-vsize+1 == rsize, so tsize >= rsize.  rsize == prec+1 >= 2,\n         so tsize >= 2, hence there's room for 2-limb u with nails */\n      ASSERT (tsize >= 2);\n      tp[tsize - 1] = u >> GMP_NUMB_BITS;\n      tp[tsize - 2] = u & GMP_NUMB_MASK;\n      rexp++;\n    }\n#endif\n\n  ASSERT (tsize-vsize+1 == rsize);\n  mpn_tdiv_qr (rp, remp, (mp_size_t) 0, tp, tsize, vp, vsize);\n\n  /* strip possible zero high limb */\n  high_zero = (rp[rsize-1] == 0);\n  rsize -= high_zero;\n  rexp -= high_zero;\n\n  r->_mp_size = sign_quotient >= 0 ? rsize : -rsize;\n  r->_mp_exp = rexp;\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpf/ui_sub.c",
    "content": "/* mpf_ui_sub -- Subtract a float from an unsigned long int.\n\nCopyright 1993, 1994, 1995, 1996, 2001, 2002, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_ui_sub (mpf_ptr r, mpir_ui u, mpf_srcptr v)\n{\n  mp_srcptr up, vp;\n  mp_ptr rp, tp;\n  mp_size_t usize, vsize, rsize;\n  mp_size_t prec;\n  mp_exp_t uexp;\n  mp_size_t ediff;\n  int negate;\n  mp_limb_t ulimb;\n  TMP_DECL;\n\n  vsize = v->_mp_size;\n\n  /* Handle special cases that don't work in generic code below.  */\n  if (u == 0)\n    {\n      mpf_neg (r, v);\n      return;\n    }\n  if (vsize == 0)\n    {\n      mpf_set_ui (r, u);\n      return;\n    }\n\n  /* If signs of U and V are different, perform addition.  */\n  if (vsize < 0)\n    {\n      __mpf_struct v_negated;\n      v_negated._mp_size = -vsize;\n      v_negated._mp_exp = v->_mp_exp;\n      v_negated._mp_d = v->_mp_d;\n      mpf_add_ui (r, &v_negated, u);\n      return;\n    }\n\n  TMP_MARK;\n\n  /* Signs are now known to be the same.  */\n\n  ulimb = u;\n  /* Make U be the operand with the largest exponent.  */\n  if (1 < v->_mp_exp)\n    {\n      negate = 1;\n      usize = ABS (vsize);\n      vsize = 1;\n      up = v->_mp_d;\n      vp = &ulimb;\n      rp = r->_mp_d;\n      prec = r->_mp_prec + 1;\n      uexp = v->_mp_exp;\n      ediff = uexp - 1;\n    }\n  else\n    {\n      negate = 0;\n      usize = 1;\n      vsize = ABS (vsize);\n      up = &ulimb;\n      vp = v->_mp_d;\n      rp = r->_mp_d;\n      prec = r->_mp_prec;\n      uexp = 1;\n      ediff = 1 - v->_mp_exp;\n    }\n\n  /* Ignore leading limbs in U and V that are equal.  Doing\n     this helps increase the precision of the result.  */\n  if (ediff == 0)\n    {\n      /* This loop normally exits immediately.  Optimize for that.  */\n      for (;;)\n\t{\n\t  usize--;\n\t  vsize--;\n\t  if (up[usize] != vp[vsize])\n\t    break;\n\t  uexp--;\n\t  if (usize == 0)\n\t    goto Lu0;\n\t  if (vsize == 0)\n\t    goto Lv0;\n\t}\n      usize++;\n      vsize++;\n      /* Note that either operand (but not both operands) might now have\n\t leading zero limbs.  It matters only that U is unnormalized if\n\t vsize is now zero, and vice versa.  And it is only in that case\n\t that we have to adjust uexp.  */\n      if (vsize == 0)\n      Lv0:\n\twhile (usize != 0 && up[usize - 1] == 0)\n\t  usize--, uexp--;\n      if (usize == 0)\n      Lu0:\n\twhile (vsize != 0 && vp[vsize - 1] == 0)\n\t  vsize--, uexp--;\n    }\n\n  /* If U extends beyond PREC, ignore the part that does.  */\n  if (usize > prec)\n    {\n      up += usize - prec;\n      usize = prec;\n    }\n\n  /* If V extends beyond PREC, ignore the part that does.\n     Note that this may make vsize negative.  */\n  if (vsize + ediff > prec)\n    {\n      vp += vsize + ediff - prec;\n      vsize = prec - ediff;\n    }\n\n  /* Allocate temp space for the result.  Allocate\n     just vsize + ediff later???  */\n  tp = (mp_ptr) TMP_ALLOC (prec * BYTES_PER_MP_LIMB);\n\n  if (ediff >= prec)\n    {\n      /* V completely cancelled.  */\n      if (tp != up)\n\tMPN_COPY (rp, up, usize);\n      rsize = usize;\n    }\n  else\n    {\n      /* Locate the least significant non-zero limb in (the needed\n\t parts of) U and V, to simplify the code below.  */\n      for (;;)\n\t{\n\t  if (vsize == 0)\n\t    {\n\t      MPN_COPY (rp, up, usize);\n\t      rsize = usize;\n\t      goto done;\n\t    }\n\t  if (vp[0] != 0)\n\t    break;\n\t  vp++, vsize--;\n\t}\n      for (;;)\n\t{\n\t  if (usize == 0)\n\t    {\n\t      MPN_COPY (rp, vp, vsize);\n\t      rsize = vsize;\n\t      negate ^= 1;\n\t      goto done;\n\t    }\n\t  if (up[0] != 0)\n\t    break;\n\t  up++, usize--;\n\t}\n\n      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */\n      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */\n\n      if (usize > ediff)\n\t{\n\t  /* U and V partially overlaps.  */\n\t  if (ediff == 0)\n\t    {\n\t      /* Have to compare the leading limbs of u and v\n\t\t to determine whether to compute u - v or v - u.  */\n\t      if (usize > vsize)\n\t\t{\n\t\t  /* uuuu     */\n\t\t  /* vv       */\n\t\t  int cmp;\n\t\t  cmp = mpn_cmp (up + usize - vsize, vp, vsize);\n\t\t  if (cmp >= 0)\n\t\t    {\n\t\t      mp_size_t size;\n\t\t      size = usize - vsize;\n\t\t      MPN_COPY (tp, up, size);\n\t\t      mpn_sub_n (tp + size, up + size, vp, vsize);\n\t\t      rsize = usize;\n\t\t    }\n\t\t  else\n\t\t    {\n\t\t      /* vv       */  /* Swap U and V. */\n\t\t      /* uuuu     */\n\t\t      mp_size_t size, i;\n\t\t      size = usize - vsize;\n\t\t      tp[0] = -up[0] & GMP_NUMB_MASK;\n\t\t      for (i = 1; i < size; i++)\n\t\t\ttp[i] = ~up[i] & GMP_NUMB_MASK;\n\t\t      mpn_sub_n (tp + size, vp, up + size, vsize);\n\t\t      mpn_sub_1 (tp + size, tp + size, vsize, (mp_limb_t) 1);\n\t\t      negate ^= 1;\n\t\t      rsize = usize;\n\t\t    }\n\t\t}\n\t      else if (usize < vsize)\n\t\t{\n\t\t  /* uuuu     */\n\t\t  /* vvvvvvv  */\n\t\t  int cmp;\n\t\t  cmp = mpn_cmp (up, vp + vsize - usize, usize);\n\t\t  if (cmp > 0)\n\t\t    {\n\t\t      mp_size_t size, i;\n\t\t      size = vsize - usize;\n\t\t      tp[0] = -vp[0] & GMP_NUMB_MASK;\n\t\t      for (i = 1; i < size; i++)\n\t\t\ttp[i] = ~vp[i] & GMP_NUMB_MASK;\n\t\t      mpn_sub_n (tp + size, up, vp + size, usize);\n\t\t      mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);\n\t\t      rsize = vsize;\n\t\t    }\n\t\t  else\n\t\t    {\n\t\t      /* vvvvvvv  */  /* Swap U and V. */\n\t\t      /* uuuu     */\n\t\t      /* This is the only place we can get 0.0.  */\n\t\t      mp_size_t size;\n\t\t      size = vsize - usize;\n\t\t      MPN_COPY (tp, vp, size);\n\t\t      mpn_sub_n (tp + size, vp + size, up, usize);\n\t\t      negate ^= 1;\n\t\t      rsize = vsize;\n\t\t    }\n\t\t}\n\t      else\n\t\t{\n\t\t  /* uuuu     */\n\t\t  /* vvvv     */\n\t\t  int cmp;\n\t\t  cmp = mpn_cmp (up, vp + vsize - usize, usize);\n\t\t  if (cmp > 0)\n\t\t    {\n\t\t      mpn_sub_n (tp, up, vp, usize);\n\t\t      rsize = usize;\n\t\t    }\n\t\t  else\n\t\t    {\n\t\t      mpn_sub_n (tp, vp, up, usize);\n\t\t      negate ^= 1;\n\t\t      rsize = usize;\n\t\t      /* can give zero */\n\t\t    }\n\t\t}\n\t    }\n\t  else\n\t    {\n\t      if (vsize + ediff <= usize)\n\t\t{\n\t\t  /* uuuu     */\n\t\t  /*   v      */\n\t\t  mp_size_t size;\n\t\t  size = usize - ediff - vsize;\n\t\t  MPN_COPY (tp, up, size);\n\t\t  mpn_sub (tp + size, up + size, usize - size, vp, vsize);\n\t\t  rsize = usize;\n\t\t}\n\t      else\n\t\t{\n\t\t  /* uuuu     */\n\t\t  /*   vvvvv  */\n\t\t  mp_size_t size, i;\n\t\t  size = vsize + ediff - usize;\n\t\t  tp[0] = -vp[0] & GMP_NUMB_MASK;\n\t\t  for (i = 1; i < size; i++)\n\t\t    tp[i] = ~vp[i] & GMP_NUMB_MASK;\n\t\t  mpn_sub (tp + size, up, usize, vp + size, usize - ediff);\n\t\t  mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);\n\t\t  rsize = vsize + ediff;\n\t\t}\n\t    }\n\t}\n      else\n\t{\n\t  /* uuuu     */\n\t  /*      vv  */\n\t  mp_size_t size, i;\n\t  size = vsize + ediff - usize;\n\t  tp[0] = -vp[0] & GMP_NUMB_MASK;\n\t  for (i = 1; i < vsize; i++)\n\t    tp[i] = ~vp[i] & GMP_NUMB_MASK;\n\t  for (i = vsize; i < size; i++)\n\t    tp[i] = GMP_NUMB_MAX;\n\t  mpn_sub_1 (tp + size, up, usize, (mp_limb_t) 1);\n\t  rsize = size + usize;\n\t}\n\n      /* Full normalize.  Optimize later.  */\n      while (rsize != 0 && tp[rsize - 1] == 0)\n\t{\n\t  rsize--;\n\t  uexp--;\n\t}\n      MPN_COPY (rp, tp, rsize);\n    }\n\n done:\n  r->_mp_size = negate ? -rsize : rsize;\n  r->_mp_exp = uexp;\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpf/urandomb.c",
    "content": "/* mpf_urandomb (rop, state, nbits) -- Generate a uniform pseudorandom\n   real number between 0 (inclusive) and 1 (exclusive) of size NBITS,\n   using STATE as the random state previously initialized by a call to\n   gmp_randinit().\n\nCopyright 1999, 2000, 2001, 2002  Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpf_urandomb (mpf_t rop, gmp_randstate_t rstate, mp_bitcnt_t nbits)\n{\n  mp_ptr rp;\n  mp_size_t nlimbs;\n  mp_exp_t exp;\n  mp_size_t prec;\n\n  rp = PTR (rop);\n  nlimbs = BITS_TO_LIMBS (nbits);\n  prec = PREC (rop);\n\n  if (nlimbs > prec + 1 || nlimbs == 0)\n    {\n      nlimbs = prec + 1;\n      nbits = nlimbs * GMP_NUMB_BITS;\n    }\n\n  _gmp_rand (rp, rstate, nbits);\n\n  /* If nbits isn't a multiple of GMP_NUMB_BITS, shift up.  */\n  if (nbits % GMP_NUMB_BITS != 0)\n    mpn_lshift (rp, rp, nlimbs, GMP_NUMB_BITS - nbits % GMP_NUMB_BITS);\n\n  exp = 0;\n  while (nlimbs != 0 && rp[nlimbs - 1] == 0)\n    {\n      nlimbs--;\n      exp--;\n    }\n  EXP (rop) = exp;\n  SIZ (rop) = nlimbs;\n}\n"
  },
  {
    "path": "mpir.net/build.vc/check_config.bat",
    "content": "@echo off\nrem %1 = platform\nrem %2 = configuration\nrem %3 - msvc version\n\nrem output_params.bat contains:\nrem (set ldir=architecture)   \nrem (set libr=lib) \nrem (set plat=x64) \nrem (set conf=Release) \n\ncall :clrerr\nif exist ..\\..\\build.vc\\output_params.bat (call ..\\..\\build.vc\\output_params.bat) else (call :seterr & echo ERROR: 'output_params.bat' not found & exit /b %errorlevel%)\n\nif /i \"%1\" EQU \"%plat%\" if /i \"%2\" EQU \"%conf%\" (exit /b 0)\n\ncall :seterr\necho ERROR The last MPIR build was for \\%plat%\\%conf%, not %1\\%2\necho Please set the correct platform and configuration to build MPIR.Net\nexit /b %errorlevel%\n\n:clrerr\nexit /b 0\n\n:seterr\nexit /b 1\n"
  },
  {
    "path": "mpir.net/build.vc11/mpir.net/mpir.net.vcxproj",
    "content": "﻿<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project DefaultTargets=\"Build\" ToolsVersion=\"12.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n  <ItemGroup Label=\"ProjectConfigurations\">\n    <ProjectConfiguration Include=\"Debug|Win32\">\n      <Configuration>Debug</Configuration>\n      <Platform>Win32</Platform>\n    </ProjectConfiguration>\n    <ProjectConfiguration Include=\"Debug|x64\">\n      <Configuration>Debug</Configuration>\n      <Platform>x64</Platform>\n    </ProjectConfiguration>\n    <ProjectConfiguration Include=\"Release|Win32\">\n      <Configuration>Release</Configuration>\n      <Platform>Win32</Platform>\n    </ProjectConfiguration>\n    <ProjectConfiguration Include=\"Release|x64\">\n      <Configuration>Release</Configuration>\n      <Platform>x64</Platform>\n    </ProjectConfiguration>\n  </ItemGroup>\n  <PropertyGroup Label=\"Globals\">\n    <ProjectGuid>{1E6E20CA-9F97-45A0-B797-C43A9716B95B}</ProjectGuid>\n    <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>\n    <Keyword>ManagedCProj</Keyword>\n    <RootNamespace>mpirnet</RootNamespace>\n  </PropertyGroup>\n  <Import Project=\"$(VCTargetsPath)\\Microsoft.Cpp.Default.props\" />\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>true</UseDebugLibraries>\n    <PlatformToolset>v110</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>true</UseDebugLibraries>\n    <PlatformToolset>v110</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>false</UseDebugLibraries>\n    <PlatformToolset>v110</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>false</UseDebugLibraries>\n    <PlatformToolset>v110</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <Import Project=\"$(VCTargetsPath)\\Microsoft.Cpp.props\" />\n  <ImportGroup Label=\"ExtensionSettings\">\n  </ImportGroup>\n  <ImportGroup Label=\"PropertySheets\" Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <ImportGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\" Label=\"PropertySheets\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <ImportGroup Label=\"PropertySheets\" Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <ImportGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\" Label=\"PropertySheets\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <PropertyGroup Label=\"UserMacros\" />\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">\n    <LinkIncremental>true</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\">\n    <LinkIncremental>true</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">\n    <LinkIncremental>false</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\">\n    <LinkIncremental>false</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <Optimization>Disabled</Optimization>\n      <PreprocessorDefinitions>WIN32;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 11</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <Optimization>Disabled</Optimization>\n      <PreprocessorDefinitions>WIN32;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 11</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 11</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 11</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemGroup>\n    <Reference Include=\"System\" />\n    <Reference Include=\"System.Data\" />\n    <Reference Include=\"System.Xml\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ClInclude Include=\"..\\..\\mpir.net\\Common.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeFloat.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeInt.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeRational.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\Random.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\resource.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\Stdafx.h\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ClCompile Include=\"..\\..\\mpir.net\\AssemblyInfo.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeFloat.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeInt.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeRational.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\Random.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\RandomFloat.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\Stdafx.cpp\">\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">Create</PrecompiledHeader>\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\">Create</PrecompiledHeader>\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">Create</PrecompiledHeader>\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\">Create</PrecompiledHeader>\n    </ClCompile>\n  </ItemGroup>\n  <ItemGroup>\n    <Text Include=\"..\\..\\mpir.net\\ReadMe.txt\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ResourceCompile Include=\"..\\..\\mpir.net\\app.rc\" />\n  </ItemGroup>\n  <ItemGroup>\n    <Image Include=\"..\\..\\mpir.net\\app.ico\" />\n  </ItemGroup>\n  <ItemGroup>\n    <None Include=\"..\\..\\mpir.net\\ExpressionMacros.h\" />\n  </ItemGroup>\n  <Import Project=\"$(VCTargetsPath)\\Microsoft.Cpp.targets\" />\n  <ImportGroup Label=\"ExtensionTargets\">\n  </ImportGroup>\n</Project>"
  },
  {
    "path": "mpir.net/build.vc11/mpir.net/mpir.net.vcxproj.filters",
    "content": "﻿<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"4.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n  <ItemGroup>\n    <Filter Include=\"Source Files\">\n      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>\n      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>\n    </Filter>\n    <Filter Include=\"Header Files\">\n      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>\n      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>\n    </Filter>\n    <Filter Include=\"Resource Files\">\n      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>\n      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>\n    </Filter>\n  </ItemGroup>\n  <ItemGroup>\n    <ClInclude Include=\"..\\..\\mpir.net\\Stdafx.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\resource.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeInt.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\Common.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\Random.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeRational.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeFloat.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n  </ItemGroup>\n  <ItemGroup>\n    <ClCompile Include=\"..\\..\\mpir.net\\AssemblyInfo.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\Stdafx.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeInt.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\Random.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeRational.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeFloat.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\RandomFloat.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n  </ItemGroup>\n  <ItemGroup>\n    <Text Include=\"..\\..\\mpir.net\\ReadMe.txt\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ResourceCompile Include=\"..\\..\\mpir.net\\app.rc\">\n      <Filter>Resource Files</Filter>\n    </ResourceCompile>\n  </ItemGroup>\n  <ItemGroup>\n    <Image Include=\"..\\..\\mpir.net\\app.ico\">\n      <Filter>Resource Files</Filter>\n    </Image>\n  </ItemGroup>\n  <ItemGroup>\n    <None Include=\"..\\..\\mpir.net\\ExpressionMacros.h\">\n      <Filter>Header Files</Filter>\n    </None>\n  </ItemGroup>\n</Project>"
  },
  {
    "path": "mpir.net/build.vc11/mpir.net-tests/mpir.net-tests.csproj",
    "content": "﻿<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"4.0\" DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n  <PropertyGroup>\n    <Configuration Condition=\" '$(Configuration)' == '' \">Debug</Configuration>\n    <Platform Condition=\" '$(Platform)' == '' \">AnyCPU</Platform>\n    <ProjectGuid>{DD861777-F312-4C3C-9E7C-148EAABE281D}</ProjectGuid>\n    <OutputType>Library</OutputType>\n    <AppDesignerFolder>Properties</AppDesignerFolder>\n    <RootNamespace>MPIR.Tests</RootNamespace>\n    <AssemblyName>mpir.net-tests</AssemblyName>\n    <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>\n    <FileAlignment>512</FileAlignment>\n    <ProjectTypeGuids>{3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>\n    <VisualStudioVersion Condition=\"'$(VisualStudioVersion)' == ''\">10.0</VisualStudioVersion>\n    <VSToolsPath Condition=\"'$(VSToolsPath)' == ''\">$(MSBuildExtensionsPath32)\\Microsoft\\VisualStudio\\v$(VisualStudioVersion)</VSToolsPath>\n    <ReferencePath>$(ProgramFiles)\\Common Files\\microsoft shared\\VSTT\\$(VisualStudioVersion)\\UITestExtensionPackages</ReferencePath>\n    <IsCodedUITest>False</IsCodedUITest>\n    <TestProjectType>UnitTest</TestProjectType>\n  </PropertyGroup>\n  <PropertyGroup Condition=\" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' \">\n    <DebugSymbols>true</DebugSymbols>\n    <DebugType>full</DebugType>\n    <Optimize>false</Optimize>\n    <OutputPath>bin\\Debug\\</OutputPath>\n    <DefineConstants>DEBUG;TRACE</DefineConstants>\n    <ErrorReport>prompt</ErrorReport>\n    <WarningLevel>4</WarningLevel>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n    <PlatformTarget>AnyCPU</PlatformTarget>\n  </PropertyGroup>\n  <PropertyGroup Condition=\" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' \">\n    <DebugType>pdbonly</DebugType>\n    <Optimize>true</Optimize>\n    <OutputPath>bin\\Release\\</OutputPath>\n    <DefineConstants>TRACE</DefineConstants>\n    <ErrorReport>prompt</ErrorReport>\n    <WarningLevel>4</WarningLevel>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Debug|x64'\">\n    <DebugSymbols>true</DebugSymbols>\n    <OutputPath>bin\\x64\\Debug\\</OutputPath>\n    <DefineConstants>TRACE;DEBUG;WIN64</DefineConstants>\n    <DebugType>full</DebugType>\n    <PlatformTarget>x64</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Release|x64'\">\n    <OutputPath>bin\\x64\\Release\\</OutputPath>\n    <DefineConstants>TRACE;WIN64</DefineConstants>\n    <Optimize>true</Optimize>\n    <DebugType>pdbonly</DebugType>\n    <PlatformTarget>x64</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Debug|x86'\">\n    <DebugSymbols>true</DebugSymbols>\n    <OutputPath>bin\\x86\\Debug\\</OutputPath>\n    <DefineConstants>DEBUG;TRACE</DefineConstants>\n    <DebugType>full</DebugType>\n    <PlatformTarget>x86</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Release|x86'\">\n    <OutputPath>bin\\x86\\Release\\</OutputPath>\n    <DefineConstants>TRACE</DefineConstants>\n    <Optimize>true</Optimize>\n    <DebugType>pdbonly</DebugType>\n    <PlatformTarget>x86</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <ItemGroup>\n    <Reference Include=\"System\" />\n    <Reference Include=\"System.XML\" />\n    <Reference Include=\"System.Xml.Serialization\" />\n  </ItemGroup>\n  <Choose>\n    <When Condition=\"('$(VisualStudioVersion)' == '10.0' or '$(VisualStudioVersion)' == '') and '$(TargetFrameworkVersion)' == 'v3.5'\">\n      <ItemGroup>\n        <Reference Include=\"Microsoft.VisualStudio.QualityTools.UnitTestFramework, Version=10.1.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\" />\n      </ItemGroup>\n    </When>\n    <Otherwise>\n      <ItemGroup>\n        <Reference Include=\"Microsoft.VisualStudio.QualityTools.UnitTestFramework\" />\n      </ItemGroup>\n    </Otherwise>\n  </Choose>\n  <ItemGroup>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Assignment.cs\">\n      <Link>HugeFloatTests\\Assignment.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Arithmetic.cs\">\n      <Link>HugeFloatTests\\Arithmetic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Comparisons.cs\">\n      <Link>HugeFloatTests\\Comparisons.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\ConstructionAndDisposal.cs\">\n      <Link>HugeFloatTests\\ConstructionAndDisposal.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Conversions.cs\">\n      <Link>HugeFloatTests\\Conversions.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\ExpressionTests.cs\">\n      <Link>HugeFloatTests\\ExpressionTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\IO.cs\">\n      <Link>HugeFloatTests\\IO.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Math.cs\">\n      <Link>HugeFloatTests\\Math.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Precision.cs\">\n      <Link>HugeFloatTests\\Precision.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Arithmetic.cs\">\n      <Link>HugeIntTests\\Arithmetic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Assignment.cs\">\n      <Link>HugeIntTests\\Assignment.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Bitwise.cs\">\n      <Link>HugeIntTests\\Bitwise.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Conversions.cs\">\n      <Link>HugeIntTests\\Conversions.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Comparisons.cs\">\n      <Link>HugeIntTests\\Comparisons.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\ExpressionTests.cs\">\n      <Link>HugeIntTests\\ExpressionTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\IO.cs\">\n      <Link>HugeIntTests\\IO.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Math.cs\">\n      <Link>HugeIntTests\\Math.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\NumberTheoretic.cs\">\n      <Link>HugeIntTests\\NumberTheoretic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Arithmetic.cs\">\n      <Link>HugeRationalTests\\Arithmetic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Assignment.cs\">\n      <Link>HugeRationalTests\\Assignment.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Comparisons.cs\">\n      <Link>HugeRationalTests\\Comparisons.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\ConstructionAndDisposal.cs\">\n      <Link>HugeRationalTests\\ConstructionAndDisposal.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Conversions.cs\">\n      <Link>HugeRationalTests\\Conversions.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\ExpressionTests.cs\">\n      <Link>HugeRationalTests\\ExpressionTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\IO.cs\">\n      <Link>HugeRationalTests\\IO.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\IntegrationTests\\XmlCommentsTests.cs\">\n      <Link>IntegrationTests\\XmlCommentsTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\OtherTests\\MpirSettings.cs\">\n      <Link>OtherTests\\MpirSettings.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\OtherTests\\Random.cs\">\n      <Link>OtherTests\\Random.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\Utilities\\Accessors.cs\">\n      <Link>Utilities\\Accessors.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\Utilities\\Platform.cs\">\n      <Link>Utilities\\Platform.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\ConstructionAndDisposal.cs\">\n      <Link>HugeIntTests\\ConstructionAndDisposal.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\Properties\\AssemblyInfo.cs\">\n      <Link>Properties\\AssemblyInfo.cs</Link>\n    </Compile>\n  </ItemGroup>\n  <ItemGroup>\n    <ProjectReference Include=\"..\\mpir.net\\mpir.net.vcxproj\">\n      <Project>{1e6e20ca-9f97-45a0-b797-c43a9716b95b}</Project>\n      <Name>mpir.net</Name>\n    </ProjectReference>\n  </ItemGroup>\n  <ItemGroup />\n  <Choose>\n    <When Condition=\"'$(VisualStudioVersion)' == '10.0' And '$(IsCodedUITest)' == 'True'\">\n      <ItemGroup>\n        <Reference Include=\"Microsoft.VisualStudio.QualityTools.CodedUITestFramework, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n        <Reference Include=\"Microsoft.VisualStudio.TestTools.UITest.Common, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n        <Reference Include=\"Microsoft.VisualStudio.TestTools.UITest.Extension, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n        <Reference Include=\"Microsoft.VisualStudio.TestTools.UITesting, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n      </ItemGroup>\n    </When>\n  </Choose>\n  <Import Project=\"$(VSToolsPath)\\TeamTest\\Microsoft.TestTools.targets\" Condition=\"Exists('$(VSToolsPath)\\TeamTest\\Microsoft.TestTools.targets')\" />\n  <Import Project=\"$(MSBuildToolsPath)\\Microsoft.CSharp.targets\" />\n  <PropertyGroup>\n    <PreBuildEvent>taskkill /F /IM vstest.executionengine.exe /FI \"MEMUSAGE gt 1\"</PreBuildEvent>\n  </PropertyGroup>\n  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. \n       Other similar extension points exist, see Microsoft.Common.targets.\n  <Target Name=\"BeforeBuild\">\n  </Target>\n  <Target Name=\"AfterBuild\">\n  </Target>\n  -->\n</Project>"
  },
  {
    "path": "mpir.net/build.vc11/mpir.net.sln",
    "content": "﻿\nMicrosoft Visual Studio Solution File, Format Version 12.00\n# Visual Studio 2012\nProject(\"{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}\") = \"mpir.net\", \"mpir.net\\mpir.net.vcxproj\", \"{1E6E20CA-9F97-45A0-B797-C43A9716B95B}\"\nEndProject\nProject(\"{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}\") = \"mpir.net-tests\", \"mpir.net-tests\\mpir.net-tests.csproj\", \"{DD861777-F312-4C3C-9E7C-148EAABE281D}\"\nEndProject\nGlobal\n\tGlobalSection(SolutionConfigurationPlatforms) = preSolution\n\t\tDebug|Win32 = Debug|Win32\n\t\tDebug|x64 = Debug|x64\n\t\tRelease|Win32 = Release|Win32\n\t\tRelease|x64 = Release|x64\n\tEndGlobalSection\n\tGlobalSection(ProjectConfigurationPlatforms) = postSolution\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|Win32.ActiveCfg = Debug|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|Win32.Build.0 = Debug|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|x64.ActiveCfg = Debug|x64\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|x64.Build.0 = Debug|x64\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|Win32.ActiveCfg = Release|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|Win32.Build.0 = Release|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|x64.ActiveCfg = Release|x64\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|x64.Build.0 = Release|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|Win32.ActiveCfg = Debug|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|Win32.Build.0 = Debug|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|x64.ActiveCfg = Debug|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|x64.Build.0 = Debug|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|Win32.ActiveCfg = Release|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|Win32.Build.0 = Release|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|x64.ActiveCfg = Release|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|x64.Build.0 = Release|x64\n\tEndGlobalSection\n\tGlobalSection(SolutionProperties) = preSolution\n\t\tHideSolutionNode = FALSE\n\tEndGlobalSection\nEndGlobal\n"
  },
  {
    "path": "mpir.net/build.vc12/mpir.net/mpir.net.vcxproj",
    "content": "﻿<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project DefaultTargets=\"Build\" ToolsVersion=\"12.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n  <ItemGroup Label=\"ProjectConfigurations\">\n    <ProjectConfiguration Include=\"Debug|Win32\">\n      <Configuration>Debug</Configuration>\n      <Platform>Win32</Platform>\n    </ProjectConfiguration>\n    <ProjectConfiguration Include=\"Debug|x64\">\n      <Configuration>Debug</Configuration>\n      <Platform>x64</Platform>\n    </ProjectConfiguration>\n    <ProjectConfiguration Include=\"Release|Win32\">\n      <Configuration>Release</Configuration>\n      <Platform>Win32</Platform>\n    </ProjectConfiguration>\n    <ProjectConfiguration Include=\"Release|x64\">\n      <Configuration>Release</Configuration>\n      <Platform>x64</Platform>\n    </ProjectConfiguration>\n  </ItemGroup>\n  <PropertyGroup Label=\"Globals\">\n    <ProjectGuid>{1E6E20CA-9F97-45A0-B797-C43A9716B95B}</ProjectGuid>\n    <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>\n    <Keyword>ManagedCProj</Keyword>\n    <RootNamespace>mpirnet</RootNamespace>\n  </PropertyGroup>\n  <Import Project=\"$(VCTargetsPath)\\Microsoft.Cpp.Default.props\" />\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>true</UseDebugLibraries>\n    <PlatformToolset>v120</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>true</UseDebugLibraries>\n    <PlatformToolset>v120</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>false</UseDebugLibraries>\n    <PlatformToolset>v120</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>false</UseDebugLibraries>\n    <PlatformToolset>v120</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <Import Project=\"$(VCTargetsPath)\\Microsoft.Cpp.props\" />\n  <ImportGroup Label=\"ExtensionSettings\">\n  </ImportGroup>\n  <ImportGroup Label=\"PropertySheets\" Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <ImportGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\" Label=\"PropertySheets\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <ImportGroup Label=\"PropertySheets\" Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <ImportGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\" Label=\"PropertySheets\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <PropertyGroup Label=\"UserMacros\" />\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">\n    <LinkIncremental>true</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\">\n    <LinkIncremental>true</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">\n    <LinkIncremental>false</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\">\n    <LinkIncremental>false</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <Optimization>Disabled</Optimization>\n      <PreprocessorDefinitions>WIN32;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 12</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <Optimization>Disabled</Optimization>\n      <PreprocessorDefinitions>WIN32;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 12</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 12</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 12</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemGroup>\n    <Reference Include=\"System\" />\n    <Reference Include=\"System.Data\" />\n    <Reference Include=\"System.Xml\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ClInclude Include=\"..\\..\\mpir.net\\Common.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeFloat.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeInt.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeRational.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\Random.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\resource.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\Stdafx.h\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ClCompile Include=\"..\\..\\mpir.net\\AssemblyInfo.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeFloat.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeInt.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeRational.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\Random.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\RandomFloat.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\Stdafx.cpp\">\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">Create</PrecompiledHeader>\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\">Create</PrecompiledHeader>\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">Create</PrecompiledHeader>\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\">Create</PrecompiledHeader>\n    </ClCompile>\n  </ItemGroup>\n  <ItemGroup>\n    <Text Include=\"..\\..\\mpir.net\\ReadMe.txt\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ResourceCompile Include=\"..\\..\\mpir.net\\app.rc\" />\n  </ItemGroup>\n  <ItemGroup>\n    <Image Include=\"..\\..\\mpir.net\\app.ico\" />\n  </ItemGroup>\n  <ItemGroup>\n    <None Include=\"..\\..\\mpir.net\\ExpressionMacros.h\" />\n  </ItemGroup>\n  <Import Project=\"$(VCTargetsPath)\\Microsoft.Cpp.targets\" />\n  <ImportGroup Label=\"ExtensionTargets\">\n  </ImportGroup>\n</Project>"
  },
  {
    "path": "mpir.net/build.vc12/mpir.net/mpir.net.vcxproj.filters",
    "content": "﻿<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"4.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n  <ItemGroup>\n    <Filter Include=\"Source Files\">\n      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>\n      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>\n    </Filter>\n    <Filter Include=\"Header Files\">\n      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>\n      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>\n    </Filter>\n    <Filter Include=\"Resource Files\">\n      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>\n      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>\n    </Filter>\n  </ItemGroup>\n  <ItemGroup>\n    <ClInclude Include=\"..\\..\\mpir.net\\Stdafx.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\resource.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeInt.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\Common.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\Random.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeRational.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeFloat.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n  </ItemGroup>\n  <ItemGroup>\n    <ClCompile Include=\"..\\..\\mpir.net\\AssemblyInfo.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\Stdafx.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeInt.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\Random.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeRational.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeFloat.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\RandomFloat.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n  </ItemGroup>\n  <ItemGroup>\n    <Text Include=\"..\\..\\mpir.net\\ReadMe.txt\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ResourceCompile Include=\"..\\..\\mpir.net\\app.rc\">\n      <Filter>Resource Files</Filter>\n    </ResourceCompile>\n  </ItemGroup>\n  <ItemGroup>\n    <Image Include=\"..\\..\\mpir.net\\app.ico\">\n      <Filter>Resource Files</Filter>\n    </Image>\n  </ItemGroup>\n  <ItemGroup>\n    <None Include=\"..\\..\\mpir.net\\ExpressionMacros.h\">\n      <Filter>Header Files</Filter>\n    </None>\n  </ItemGroup>\n</Project>"
  },
  {
    "path": "mpir.net/build.vc12/mpir.net-tests/mpir.net-tests.csproj",
    "content": "﻿<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"4.0\" DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n  <PropertyGroup>\n    <Configuration Condition=\" '$(Configuration)' == '' \">Debug</Configuration>\n    <Platform Condition=\" '$(Platform)' == '' \">AnyCPU</Platform>\n    <ProjectGuid>{DD861777-F312-4C3C-9E7C-148EAABE281D}</ProjectGuid>\n    <OutputType>Library</OutputType>\n    <AppDesignerFolder>Properties</AppDesignerFolder>\n    <RootNamespace>MPIR.Tests</RootNamespace>\n    <AssemblyName>mpir.net-tests</AssemblyName>\n    <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>\n    <FileAlignment>512</FileAlignment>\n    <ProjectTypeGuids>{3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>\n    <VisualStudioVersion Condition=\"'$(VisualStudioVersion)' == ''\">10.0</VisualStudioVersion>\n    <VSToolsPath Condition=\"'$(VSToolsPath)' == ''\">$(MSBuildExtensionsPath32)\\Microsoft\\VisualStudio\\v$(VisualStudioVersion)</VSToolsPath>\n    <ReferencePath>$(ProgramFiles)\\Common Files\\microsoft shared\\VSTT\\$(VisualStudioVersion)\\UITestExtensionPackages</ReferencePath>\n    <IsCodedUITest>False</IsCodedUITest>\n    <TestProjectType>UnitTest</TestProjectType>\n  </PropertyGroup>\n  <PropertyGroup Condition=\" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' \">\n    <DebugSymbols>true</DebugSymbols>\n    <DebugType>full</DebugType>\n    <Optimize>false</Optimize>\n    <OutputPath>bin\\Debug\\</OutputPath>\n    <DefineConstants>DEBUG;TRACE</DefineConstants>\n    <ErrorReport>prompt</ErrorReport>\n    <WarningLevel>4</WarningLevel>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n    <PlatformTarget>AnyCPU</PlatformTarget>\n  </PropertyGroup>\n  <PropertyGroup Condition=\" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' \">\n    <DebugType>pdbonly</DebugType>\n    <Optimize>true</Optimize>\n    <OutputPath>bin\\Release\\</OutputPath>\n    <DefineConstants>TRACE</DefineConstants>\n    <ErrorReport>prompt</ErrorReport>\n    <WarningLevel>4</WarningLevel>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Debug|x64'\">\n    <DebugSymbols>true</DebugSymbols>\n    <OutputPath>bin\\x64\\Debug\\</OutputPath>\n    <DefineConstants>TRACE;DEBUG;WIN64</DefineConstants>\n    <DebugType>full</DebugType>\n    <PlatformTarget>x64</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Release|x64'\">\n    <OutputPath>bin\\x64\\Release\\</OutputPath>\n    <DefineConstants>TRACE;WIN64</DefineConstants>\n    <Optimize>true</Optimize>\n    <DebugType>pdbonly</DebugType>\n    <PlatformTarget>x64</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Debug|x86'\">\n    <DebugSymbols>true</DebugSymbols>\n    <OutputPath>bin\\x86\\Debug\\</OutputPath>\n    <DefineConstants>DEBUG;TRACE</DefineConstants>\n    <DebugType>full</DebugType>\n    <PlatformTarget>x86</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Release|x86'\">\n    <OutputPath>bin\\x86\\Release\\</OutputPath>\n    <DefineConstants>TRACE</DefineConstants>\n    <Optimize>true</Optimize>\n    <DebugType>pdbonly</DebugType>\n    <PlatformTarget>x86</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <ItemGroup>\n    <Reference Include=\"System\" />\n    <Reference Include=\"System.XML\" />\n    <Reference Include=\"System.Xml.Serialization\" />\n  </ItemGroup>\n  <Choose>\n    <When Condition=\"('$(VisualStudioVersion)' == '10.0' or '$(VisualStudioVersion)' == '') and '$(TargetFrameworkVersion)' == 'v3.5'\">\n      <ItemGroup>\n        <Reference Include=\"Microsoft.VisualStudio.QualityTools.UnitTestFramework, Version=10.1.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\" />\n      </ItemGroup>\n    </When>\n    <Otherwise>\n      <ItemGroup>\n        <Reference Include=\"Microsoft.VisualStudio.QualityTools.UnitTestFramework\" />\n      </ItemGroup>\n    </Otherwise>\n  </Choose>\n  <ItemGroup>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Assignment.cs\">\n      <Link>HugeFloatTests\\Assignment.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Arithmetic.cs\">\n      <Link>HugeFloatTests\\Arithmetic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Comparisons.cs\">\n      <Link>HugeFloatTests\\Comparisons.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\ConstructionAndDisposal.cs\">\n      <Link>HugeFloatTests\\ConstructionAndDisposal.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Conversions.cs\">\n      <Link>HugeFloatTests\\Conversions.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\ExpressionTests.cs\">\n      <Link>HugeFloatTests\\ExpressionTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\IO.cs\">\n      <Link>HugeFloatTests\\IO.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Math.cs\">\n      <Link>HugeFloatTests\\Math.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Precision.cs\">\n      <Link>HugeFloatTests\\Precision.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Arithmetic.cs\">\n      <Link>HugeIntTests\\Arithmetic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Assignment.cs\">\n      <Link>HugeIntTests\\Assignment.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Bitwise.cs\">\n      <Link>HugeIntTests\\Bitwise.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Conversions.cs\">\n      <Link>HugeIntTests\\Conversions.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Comparisons.cs\">\n      <Link>HugeIntTests\\Comparisons.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\ExpressionTests.cs\">\n      <Link>HugeIntTests\\ExpressionTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\IO.cs\">\n      <Link>HugeIntTests\\IO.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Math.cs\">\n      <Link>HugeIntTests\\Math.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\NumberTheoretic.cs\">\n      <Link>HugeIntTests\\NumberTheoretic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Arithmetic.cs\">\n      <Link>HugeRationalTests\\Arithmetic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Assignment.cs\">\n      <Link>HugeRationalTests\\Assignment.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Comparisons.cs\">\n      <Link>HugeRationalTests\\Comparisons.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\ConstructionAndDisposal.cs\">\n      <Link>HugeRationalTests\\ConstructionAndDisposal.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Conversions.cs\">\n      <Link>HugeRationalTests\\Conversions.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\ExpressionTests.cs\">\n      <Link>HugeRationalTests\\ExpressionTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\IO.cs\">\n      <Link>HugeRationalTests\\IO.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\IntegrationTests\\XmlCommentsTests.cs\">\n      <Link>IntegrationTests\\XmlCommentsTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\OtherTests\\MpirSettings.cs\">\n      <Link>OtherTests\\MpirSettings.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\OtherTests\\Random.cs\">\n      <Link>OtherTests\\Random.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\Utilities\\Accessors.cs\">\n      <Link>Utilities\\Accessors.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\Utilities\\Platform.cs\">\n      <Link>Utilities\\Platform.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\ConstructionAndDisposal.cs\">\n      <Link>HugeIntTests\\ConstructionAndDisposal.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\Properties\\AssemblyInfo.cs\">\n      <Link>Properties\\AssemblyInfo.cs</Link>\n    </Compile>\n  </ItemGroup>\n  <ItemGroup>\n    <ProjectReference Include=\"..\\mpir.net\\mpir.net.vcxproj\">\n      <Project>{1e6e20ca-9f97-45a0-b797-c43a9716b95b}</Project>\n      <Name>mpir.net</Name>\n    </ProjectReference>\n  </ItemGroup>\n  <ItemGroup />\n  <Choose>\n    <When Condition=\"'$(VisualStudioVersion)' == '10.0' And '$(IsCodedUITest)' == 'True'\">\n      <ItemGroup>\n        <Reference Include=\"Microsoft.VisualStudio.QualityTools.CodedUITestFramework, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n        <Reference Include=\"Microsoft.VisualStudio.TestTools.UITest.Common, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n        <Reference Include=\"Microsoft.VisualStudio.TestTools.UITest.Extension, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n        <Reference Include=\"Microsoft.VisualStudio.TestTools.UITesting, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n      </ItemGroup>\n    </When>\n  </Choose>\n  <Import Project=\"$(VSToolsPath)\\TeamTest\\Microsoft.TestTools.targets\" Condition=\"Exists('$(VSToolsPath)\\TeamTest\\Microsoft.TestTools.targets')\" />\n  <Import Project=\"$(MSBuildToolsPath)\\Microsoft.CSharp.targets\" />\n  <PropertyGroup>\n    <PreBuildEvent>taskkill /F /IM vstest.executionengine.exe /FI \"MEMUSAGE gt 1\"</PreBuildEvent>\n  </PropertyGroup>\n  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. \n       Other similar extension points exist, see Microsoft.Common.targets.\n  <Target Name=\"BeforeBuild\">\n  </Target>\n  <Target Name=\"AfterBuild\">\n  </Target>\n  -->\n</Project>"
  },
  {
    "path": "mpir.net/build.vc12/mpir.net.sln",
    "content": "﻿\nMicrosoft Visual Studio Solution File, Format Version 12.00\n# Visual Studio 2013\nVisualStudioVersion = 12.0.40629.0\nMinimumVisualStudioVersion = 10.0.40219.1\nProject(\"{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}\") = \"mpir.net\", \"mpir.net\\mpir.net.vcxproj\", \"{1E6E20CA-9F97-45A0-B797-C43A9716B95B}\"\nEndProject\nProject(\"{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}\") = \"mpir.net-tests\", \"mpir.net-tests\\mpir.net-tests.csproj\", \"{DD861777-F312-4C3C-9E7C-148EAABE281D}\"\nEndProject\nGlobal\n\tGlobalSection(SolutionConfigurationPlatforms) = preSolution\n\t\tDebug|Win32 = Debug|Win32\n\t\tDebug|x64 = Debug|x64\n\t\tRelease|Win32 = Release|Win32\n\t\tRelease|x64 = Release|x64\n\tEndGlobalSection\n\tGlobalSection(ProjectConfigurationPlatforms) = postSolution\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|Win32.ActiveCfg = Debug|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|Win32.Build.0 = Debug|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|x64.ActiveCfg = Debug|x64\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|x64.Build.0 = Debug|x64\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|Win32.ActiveCfg = Release|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|Win32.Build.0 = Release|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|x64.ActiveCfg = Release|x64\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|x64.Build.0 = Release|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|Win32.ActiveCfg = Debug|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|Win32.Build.0 = Debug|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|x64.ActiveCfg = Debug|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|x64.Build.0 = Debug|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|Win32.ActiveCfg = Release|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|Win32.Build.0 = Release|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|x64.ActiveCfg = Release|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|x64.Build.0 = Release|x64\n\tEndGlobalSection\n\tGlobalSection(SolutionProperties) = preSolution\n\t\tHideSolutionNode = FALSE\n\tEndGlobalSection\nEndGlobal\n"
  },
  {
    "path": "mpir.net/build.vc14/mpir.net/mpir.net.vcxproj",
    "content": "﻿<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project DefaultTargets=\"Build\" ToolsVersion=\"14.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n  <ItemGroup Label=\"ProjectConfigurations\">\n    <ProjectConfiguration Include=\"Debug|Win32\">\n      <Configuration>Debug</Configuration>\n      <Platform>Win32</Platform>\n    </ProjectConfiguration>\n    <ProjectConfiguration Include=\"Debug|x64\">\n      <Configuration>Debug</Configuration>\n      <Platform>x64</Platform>\n    </ProjectConfiguration>\n    <ProjectConfiguration Include=\"Release|Win32\">\n      <Configuration>Release</Configuration>\n      <Platform>Win32</Platform>\n    </ProjectConfiguration>\n    <ProjectConfiguration Include=\"Release|x64\">\n      <Configuration>Release</Configuration>\n      <Platform>x64</Platform>\n    </ProjectConfiguration>\n  </ItemGroup>\n  <PropertyGroup Label=\"Globals\">\n    <ProjectGuid>{1E6E20CA-9F97-45A0-B797-C43A9716B95B}</ProjectGuid>\n    <TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion>\n    <Keyword>ManagedCProj</Keyword>\n    <RootNamespace>mpirnet</RootNamespace>\n  </PropertyGroup>\n  <Import Project=\"$(VCTargetsPath)\\Microsoft.Cpp.Default.props\" />\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>true</UseDebugLibraries>\n    <PlatformToolset>v140</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>true</UseDebugLibraries>\n    <PlatformToolset>v140</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>false</UseDebugLibraries>\n    <PlatformToolset>v140</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>false</UseDebugLibraries>\n    <PlatformToolset>v140</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <Import Project=\"$(VCTargetsPath)\\Microsoft.Cpp.props\" />\n  <ImportGroup Label=\"ExtensionSettings\">\n  </ImportGroup>\n  <ImportGroup Label=\"PropertySheets\" Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <ImportGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\" Label=\"PropertySheets\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <ImportGroup Label=\"PropertySheets\" Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <ImportGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\" Label=\"PropertySheets\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <PropertyGroup Label=\"UserMacros\" />\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">\n    <LinkIncremental>true</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\">\n    <LinkIncremental>true</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">\n    <LinkIncremental>false</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\">\n    <LinkIncremental>false</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <Optimization>Disabled</Optimization>\n      <PreprocessorDefinitions>WIN32;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 14</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <Optimization>Disabled</Optimization>\n      <PreprocessorDefinitions>WIN32;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 14</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 14</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 14</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemGroup>\n    <Reference Include=\"System\" />\n    <Reference Include=\"System.Data\" />\n    <Reference Include=\"System.Xml\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ClInclude Include=\"..\\..\\mpir.net\\Common.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeFloat.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeInt.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeRational.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\Random.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\resource.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\Stdafx.h\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ClCompile Include=\"..\\..\\mpir.net\\AssemblyInfo.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeFloat.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeInt.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeRational.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\Random.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\RandomFloat.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\Stdafx.cpp\">\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">Create</PrecompiledHeader>\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\">Create</PrecompiledHeader>\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">Create</PrecompiledHeader>\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\">Create</PrecompiledHeader>\n    </ClCompile>\n  </ItemGroup>\n  <ItemGroup>\n    <Text Include=\"..\\..\\mpir.net\\ReadMe.txt\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ResourceCompile Include=\"..\\..\\mpir.net\\app.rc\" />\n  </ItemGroup>\n  <ItemGroup>\n    <Image Include=\"..\\..\\mpir.net\\app.ico\" />\n  </ItemGroup>\n  <ItemGroup>\n    <None Include=\"..\\..\\mpir.net\\ExpressionMacros.h\" />\n  </ItemGroup>\n  <Import Project=\"$(VCTargetsPath)\\Microsoft.Cpp.targets\" />\n  <ImportGroup Label=\"ExtensionTargets\">\n  </ImportGroup>\n</Project>"
  },
  {
    "path": "mpir.net/build.vc14/mpir.net/mpir.net.vcxproj.filters",
    "content": "﻿<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"14.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n  <ItemGroup>\n    <Filter Include=\"Source Files\">\n      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>\n      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>\n    </Filter>\n    <Filter Include=\"Header Files\">\n      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>\n      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>\n    </Filter>\n    <Filter Include=\"Resource Files\">\n      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>\n      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>\n    </Filter>\n  </ItemGroup>\n  <ItemGroup>\n    <ClInclude Include=\"..\\..\\mpir.net\\Stdafx.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\resource.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeInt.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\Common.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\Random.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeRational.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeFloat.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n  </ItemGroup>\n  <ItemGroup>\n    <ClCompile Include=\"..\\..\\mpir.net\\AssemblyInfo.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\Stdafx.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeInt.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\Random.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeRational.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeFloat.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\RandomFloat.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n  </ItemGroup>\n  <ItemGroup>\n    <Text Include=\"..\\..\\mpir.net\\ReadMe.txt\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ResourceCompile Include=\"..\\..\\mpir.net\\app.rc\">\n      <Filter>Resource Files</Filter>\n    </ResourceCompile>\n  </ItemGroup>\n  <ItemGroup>\n    <Image Include=\"..\\..\\mpir.net\\app.ico\">\n      <Filter>Resource Files</Filter>\n    </Image>\n  </ItemGroup>\n  <ItemGroup>\n    <None Include=\"..\\..\\mpir.net\\ExpressionMacros.h\">\n      <Filter>Header Files</Filter>\n    </None>\n  </ItemGroup>\n</Project>"
  },
  {
    "path": "mpir.net/build.vc14/mpir.net-tests/mpir.net-tests.csproj",
    "content": "﻿<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"14.0\" DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n  <PropertyGroup>\n    <Configuration Condition=\" '$(Configuration)' == '' \">Debug</Configuration>\n    <Platform Condition=\" '$(Platform)' == '' \">AnyCPU</Platform>\n    <ProjectGuid>{DD861777-F312-4C3C-9E7C-148EAABE281D}</ProjectGuid>\n    <OutputType>Library</OutputType>\n    <AppDesignerFolder>Properties</AppDesignerFolder>\n    <RootNamespace>MPIR.Tests</RootNamespace>\n    <AssemblyName>mpir.net-tests</AssemblyName>\n    <TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion>\n    <FileAlignment>512</FileAlignment>\n    <ProjectTypeGuids>{3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>\n    <VisualStudioVersion Condition=\"'$(VisualStudioVersion)' == ''\">10.0</VisualStudioVersion>\n    <VSToolsPath Condition=\"'$(VSToolsPath)' == ''\">$(MSBuildExtensionsPath32)\\Microsoft\\VisualStudio\\v$(VisualStudioVersion)</VSToolsPath>\n    <ReferencePath>$(ProgramFiles)\\Common Files\\microsoft shared\\VSTT\\$(VisualStudioVersion)\\UITestExtensionPackages</ReferencePath>\n    <IsCodedUITest>False</IsCodedUITest>\n    <TestProjectType>UnitTest</TestProjectType>\n    <TargetFrameworkProfile />\n  </PropertyGroup>\n  <PropertyGroup Condition=\" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' \">\n    <DebugSymbols>true</DebugSymbols>\n    <DebugType>full</DebugType>\n    <Optimize>false</Optimize>\n    <OutputPath>bin\\Debug\\</OutputPath>\n    <DefineConstants>DEBUG;TRACE</DefineConstants>\n    <ErrorReport>prompt</ErrorReport>\n    <WarningLevel>4</WarningLevel>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n    <PlatformTarget>AnyCPU</PlatformTarget>\n  </PropertyGroup>\n  <PropertyGroup Condition=\" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' \">\n    <DebugType>pdbonly</DebugType>\n    <Optimize>true</Optimize>\n    <OutputPath>bin\\Release\\</OutputPath>\n    <DefineConstants>TRACE</DefineConstants>\n    <ErrorReport>prompt</ErrorReport>\n    <WarningLevel>4</WarningLevel>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Debug|x64'\">\n    <DebugSymbols>true</DebugSymbols>\n    <OutputPath>bin\\x64\\Debug\\</OutputPath>\n    <DefineConstants>TRACE;DEBUG;WIN64</DefineConstants>\n    <DebugType>full</DebugType>\n    <PlatformTarget>x64</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Release|x64'\">\n    <OutputPath>bin\\x64\\Release\\</OutputPath>\n    <DefineConstants>TRACE;WIN64</DefineConstants>\n    <Optimize>true</Optimize>\n    <DebugType>pdbonly</DebugType>\n    <PlatformTarget>x64</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Debug|x86'\">\n    <DebugSymbols>true</DebugSymbols>\n    <OutputPath>bin\\x86\\Debug\\</OutputPath>\n    <DefineConstants>DEBUG;TRACE</DefineConstants>\n    <DebugType>full</DebugType>\n    <PlatformTarget>x86</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Release|x86'\">\n    <OutputPath>bin\\x86\\Release\\</OutputPath>\n    <DefineConstants>TRACE</DefineConstants>\n    <Optimize>true</Optimize>\n    <DebugType>pdbonly</DebugType>\n    <PlatformTarget>x86</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <ItemGroup>\n    <Reference Include=\"System\" />\n    <Reference Include=\"System.XML\" />\n    <Reference Include=\"System.Xml.Serialization\" />\n  </ItemGroup>\n  <Choose>\n    <When Condition=\"('$(VisualStudioVersion)' == '10.0' or '$(VisualStudioVersion)' == '') and '$(TargetFrameworkVersion)' == 'v3.5'\">\n      <ItemGroup>\n        <Reference Include=\"Microsoft.VisualStudio.QualityTools.UnitTestFramework, Version=10.1.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\" />\n      </ItemGroup>\n    </When>\n    <Otherwise>\n      <ItemGroup>\n        <Reference Include=\"Microsoft.VisualStudio.QualityTools.UnitTestFramework\" />\n      </ItemGroup>\n    </Otherwise>\n  </Choose>\n  <ItemGroup>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Assignment.cs\">\n      <Link>HugeFloatTests\\Assignment.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Arithmetic.cs\">\n      <Link>HugeFloatTests\\Arithmetic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Comparisons.cs\">\n      <Link>HugeFloatTests\\Comparisons.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\ConstructionAndDisposal.cs\">\n      <Link>HugeFloatTests\\ConstructionAndDisposal.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Conversions.cs\">\n      <Link>HugeFloatTests\\Conversions.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\ExpressionTests.cs\">\n      <Link>HugeFloatTests\\ExpressionTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\IO.cs\">\n      <Link>HugeFloatTests\\IO.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Math.cs\">\n      <Link>HugeFloatTests\\Math.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Precision.cs\">\n      <Link>HugeFloatTests\\Precision.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Arithmetic.cs\">\n      <Link>HugeIntTests\\Arithmetic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Assignment.cs\">\n      <Link>HugeIntTests\\Assignment.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Bitwise.cs\">\n      <Link>HugeIntTests\\Bitwise.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Conversions.cs\">\n      <Link>HugeIntTests\\Conversions.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Comparisons.cs\">\n      <Link>HugeIntTests\\Comparisons.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\ExpressionTests.cs\">\n      <Link>HugeIntTests\\ExpressionTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\IO.cs\">\n      <Link>HugeIntTests\\IO.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Math.cs\">\n      <Link>HugeIntTests\\Math.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\NumberTheoretic.cs\">\n      <Link>HugeIntTests\\NumberTheoretic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Arithmetic.cs\">\n      <Link>HugeRationalTests\\Arithmetic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Assignment.cs\">\n      <Link>HugeRationalTests\\Assignment.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Comparisons.cs\">\n      <Link>HugeRationalTests\\Comparisons.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\ConstructionAndDisposal.cs\">\n      <Link>HugeRationalTests\\ConstructionAndDisposal.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Conversions.cs\">\n      <Link>HugeRationalTests\\Conversions.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\ExpressionTests.cs\">\n      <Link>HugeRationalTests\\ExpressionTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\IO.cs\">\n      <Link>HugeRationalTests\\IO.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\IntegrationTests\\XmlCommentsTests.cs\">\n      <Link>IntegrationTests\\XmlCommentsTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\OtherTests\\MpirSettings.cs\">\n      <Link>OtherTests\\MpirSettings.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\OtherTests\\Random.cs\">\n      <Link>OtherTests\\Random.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\Utilities\\Accessors.cs\">\n      <Link>Utilities\\Accessors.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\Utilities\\Platform.cs\">\n      <Link>Utilities\\Platform.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\ConstructionAndDisposal.cs\">\n      <Link>HugeIntTests\\ConstructionAndDisposal.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\Properties\\AssemblyInfo.cs\">\n      <Link>Properties\\AssemblyInfo.cs</Link>\n    </Compile>\n  </ItemGroup>\n  <ItemGroup>\n    <ProjectReference Include=\"..\\mpir.net\\mpir.net.vcxproj\">\n      <Project>{1e6e20ca-9f97-45a0-b797-c43a9716b95b}</Project>\n      <Name>mpir.net</Name>\n    </ProjectReference>\n  </ItemGroup>\n  <ItemGroup />\n  <Choose>\n    <When Condition=\"'$(VisualStudioVersion)' == '10.0' And '$(IsCodedUITest)' == 'True'\">\n      <ItemGroup>\n        <Reference Include=\"Microsoft.VisualStudio.QualityTools.CodedUITestFramework, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n        <Reference Include=\"Microsoft.VisualStudio.TestTools.UITest.Common, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n        <Reference Include=\"Microsoft.VisualStudio.TestTools.UITest.Extension, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n        <Reference Include=\"Microsoft.VisualStudio.TestTools.UITesting, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n      </ItemGroup>\n    </When>\n  </Choose>\n  <Import Project=\"$(VSToolsPath)\\TeamTest\\Microsoft.TestTools.targets\" Condition=\"Exists('$(VSToolsPath)\\TeamTest\\Microsoft.TestTools.targets')\" />\n  <Import Project=\"$(MSBuildToolsPath)\\Microsoft.CSharp.targets\" />\n  <PropertyGroup>\n    <PreBuildEvent>taskkill /F /IM vstest.executionengine.exe /FI \"MEMUSAGE gt 1\"</PreBuildEvent>\n  </PropertyGroup>\n  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. \n       Other similar extension points exist, see Microsoft.Common.targets.\n  <Target Name=\"BeforeBuild\">\n  </Target>\n  <Target Name=\"AfterBuild\">\n  </Target>\n  -->\n</Project>"
  },
  {
    "path": "mpir.net/build.vc14/mpir.net.sln",
    "content": "﻿\nMicrosoft Visual Studio Solution File, Format Version 12.00\n# Visual Studio 14\nVisualStudioVersion = 14.0.24720.0\nMinimumVisualStudioVersion = 10.0.40219.1\nProject(\"{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}\") = \"mpir.net\", \"mpir.net\\mpir.net.vcxproj\", \"{1E6E20CA-9F97-45A0-B797-C43A9716B95B}\"\nEndProject\nProject(\"{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}\") = \"mpir.net-tests\", \"mpir.net-tests\\mpir.net-tests.csproj\", \"{DD861777-F312-4C3C-9E7C-148EAABE281D}\"\nEndProject\nGlobal\n\tGlobalSection(SolutionConfigurationPlatforms) = preSolution\n\t\tDebug|Win32 = Debug|Win32\n\t\tDebug|x64 = Debug|x64\n\t\tRelease|Win32 = Release|Win32\n\t\tRelease|x64 = Release|x64\n\tEndGlobalSection\n\tGlobalSection(ProjectConfigurationPlatforms) = postSolution\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|Win32.ActiveCfg = Debug|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|Win32.Build.0 = Debug|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|x64.ActiveCfg = Debug|x64\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|x64.Build.0 = Debug|x64\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|Win32.ActiveCfg = Release|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|Win32.Build.0 = Release|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|x64.ActiveCfg = Release|x64\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|x64.Build.0 = Release|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|Win32.ActiveCfg = Debug|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|Win32.Build.0 = Debug|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|x64.ActiveCfg = Debug|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|x64.Build.0 = Debug|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|Win32.ActiveCfg = Release|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|Win32.Build.0 = Release|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|x64.ActiveCfg = Release|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|x64.Build.0 = Release|x64\n\tEndGlobalSection\n\tGlobalSection(SolutionProperties) = preSolution\n\t\tHideSolutionNode = FALSE\n\tEndGlobalSection\nEndGlobal\n"
  },
  {
    "path": "mpir.net/build.vc15/mpir.net/mpir.net.vcxproj",
    "content": "﻿<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project DefaultTargets=\"Build\" ToolsVersion=\"15.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n  <ItemGroup Label=\"ProjectConfigurations\">\n    <ProjectConfiguration Include=\"Debug|Win32\">\n      <Configuration>Debug</Configuration>\n      <Platform>Win32</Platform>\n    </ProjectConfiguration>\n    <ProjectConfiguration Include=\"Debug|x64\">\n      <Configuration>Debug</Configuration>\n      <Platform>x64</Platform>\n    </ProjectConfiguration>\n    <ProjectConfiguration Include=\"Release|Win32\">\n      <Configuration>Release</Configuration>\n      <Platform>Win32</Platform>\n    </ProjectConfiguration>\n    <ProjectConfiguration Include=\"Release|x64\">\n      <Configuration>Release</Configuration>\n      <Platform>x64</Platform>\n    </ProjectConfiguration>\n  </ItemGroup>\n  <PropertyGroup Label=\"Globals\">\n    <ProjectGuid>{1E6E20CA-9F97-45A0-B797-C43A9716B95B}</ProjectGuid>\n    <TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion>\n    <Keyword>ManagedCProj</Keyword>\n    <RootNamespace>mpirnet</RootNamespace>\n    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>\n  </PropertyGroup>\n  <Import Project=\"$(VCTargetsPath)\\Microsoft.Cpp.Default.props\" />\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>true</UseDebugLibraries>\n    <PlatformToolset>v141</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>true</UseDebugLibraries>\n    <PlatformToolset>v141</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>false</UseDebugLibraries>\n    <PlatformToolset>v141</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\" Label=\"Configuration\">\n    <ConfigurationType>DynamicLibrary</ConfigurationType>\n    <UseDebugLibraries>false</UseDebugLibraries>\n    <PlatformToolset>v141</PlatformToolset>\n    <CLRSupport>true</CLRSupport>\n    <CharacterSet>Unicode</CharacterSet>\n  </PropertyGroup>\n  <Import Project=\"$(VCTargetsPath)\\Microsoft.Cpp.props\" />\n  <ImportGroup Label=\"ExtensionSettings\">\n  </ImportGroup>\n  <ImportGroup Label=\"PropertySheets\" Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <ImportGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\" Label=\"PropertySheets\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <ImportGroup Label=\"PropertySheets\" Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <ImportGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\" Label=\"PropertySheets\">\n    <Import Project=\"$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props\" Condition=\"exists('$(UserRootDir)\\Microsoft.Cpp.$(Platform).user.props')\" Label=\"LocalAppDataPlatform\" />\n  </ImportGroup>\n  <PropertyGroup Label=\"UserMacros\" />\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">\n    <LinkIncremental>true</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\">\n    <LinkIncremental>true</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">\n    <LinkIncremental>false</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\">\n    <LinkIncremental>false</LinkIncremental>\n    <IncludePath>..\\..\\..\\lib\\$(Platform)\\$(Configuration);$(IncludePath)</IncludePath>\n  </PropertyGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <Optimization>Disabled</Optimization>\n      <PreprocessorDefinitions>WIN32;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 15</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <Optimization>Disabled</Optimization>\n      <PreprocessorDefinitions>WIN32;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 15</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 15</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemDefinitionGroup Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\">\n    <ClCompile>\n      <WarningLevel>Level3</WarningLevel>\n      <PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>\n      <PrecompiledHeader>Use</PrecompiledHeader>\n      <GenerateXMLDocumentationFiles>true</GenerateXMLDocumentationFiles>\n    </ClCompile>\n    <Link>\n      <GenerateDebugInformation>true</GenerateDebugInformation>\n      <AdditionalDependencies>..\\..\\..\\lib\\$(Platform)\\$(Configuration)\\mpir.lib</AdditionalDependencies>\n      <IgnoreSpecificDefaultLibraries>LIBCMTD;LIBCMT;%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>\n    </Link>\n    <PreBuildEvent>\n      <Command>cd $(SolutionDir)..\\build.vc\ncheck_config $(Platform) $(Configuration) 15</Command>\n    </PreBuildEvent>\n  </ItemDefinitionGroup>\n  <ItemGroup>\n    <Reference Include=\"System\" />\n    <Reference Include=\"System.Data\" />\n    <Reference Include=\"System.Xml\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ClInclude Include=\"..\\..\\mpir.net\\Common.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeFloat.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeInt.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeRational.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\Random.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\resource.h\" />\n    <ClInclude Include=\"..\\..\\mpir.net\\Stdafx.h\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ClCompile Include=\"..\\..\\mpir.net\\AssemblyInfo.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeFloat.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeInt.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeRational.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\Random.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\RandomFloat.cpp\" />\n    <ClCompile Include=\"..\\..\\mpir.net\\Stdafx.cpp\">\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Debug|Win32'\">Create</PrecompiledHeader>\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Debug|x64'\">Create</PrecompiledHeader>\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Release|Win32'\">Create</PrecompiledHeader>\n      <PrecompiledHeader Condition=\"'$(Configuration)|$(Platform)'=='Release|x64'\">Create</PrecompiledHeader>\n    </ClCompile>\n  </ItemGroup>\n  <ItemGroup>\n    <Text Include=\"..\\..\\mpir.net\\ReadMe.txt\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ResourceCompile Include=\"..\\..\\mpir.net\\app.rc\" />\n  </ItemGroup>\n  <ItemGroup>\n    <Image Include=\"..\\..\\mpir.net\\app.ico\" />\n  </ItemGroup>\n  <ItemGroup>\n    <None Include=\"..\\..\\mpir.net\\ExpressionMacros.h\" />\n  </ItemGroup>\n  <Import Project=\"$(VCTargetsPath)\\Microsoft.Cpp.targets\" />\n  <ImportGroup Label=\"ExtensionTargets\">\n  </ImportGroup>\n</Project>"
  },
  {
    "path": "mpir.net/build.vc15/mpir.net/mpir.net.vcxproj.filters",
    "content": "﻿<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"14.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n  <ItemGroup>\n    <Filter Include=\"Source Files\">\n      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>\n      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>\n    </Filter>\n    <Filter Include=\"Header Files\">\n      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>\n      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>\n    </Filter>\n    <Filter Include=\"Resource Files\">\n      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>\n      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>\n    </Filter>\n  </ItemGroup>\n  <ItemGroup>\n    <ClInclude Include=\"..\\..\\mpir.net\\Stdafx.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\resource.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeInt.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\Common.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\Random.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeRational.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n    <ClInclude Include=\"..\\..\\mpir.net\\HugeFloat.h\">\n      <Filter>Header Files</Filter>\n    </ClInclude>\n  </ItemGroup>\n  <ItemGroup>\n    <ClCompile Include=\"..\\..\\mpir.net\\AssemblyInfo.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\Stdafx.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeInt.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\Random.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeRational.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\HugeFloat.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n    <ClCompile Include=\"..\\..\\mpir.net\\RandomFloat.cpp\">\n      <Filter>Source Files</Filter>\n    </ClCompile>\n  </ItemGroup>\n  <ItemGroup>\n    <Text Include=\"..\\..\\mpir.net\\ReadMe.txt\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ResourceCompile Include=\"..\\..\\mpir.net\\app.rc\">\n      <Filter>Resource Files</Filter>\n    </ResourceCompile>\n  </ItemGroup>\n  <ItemGroup>\n    <Image Include=\"..\\..\\mpir.net\\app.ico\">\n      <Filter>Resource Files</Filter>\n    </Image>\n  </ItemGroup>\n  <ItemGroup>\n    <None Include=\"..\\..\\mpir.net\\ExpressionMacros.h\">\n      <Filter>Header Files</Filter>\n    </None>\n  </ItemGroup>\n</Project>"
  },
  {
    "path": "mpir.net/build.vc15/mpir.net-tests/mpir.net-tests.csproj",
    "content": "﻿<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"14.0\" DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n  <PropertyGroup>\n    <Configuration Condition=\" '$(Configuration)' == '' \">Debug</Configuration>\n    <Platform Condition=\" '$(Platform)' == '' \">AnyCPU</Platform>\n    <ProjectGuid>{DD861777-F312-4C3C-9E7C-148EAABE281D}</ProjectGuid>\n    <OutputType>Library</OutputType>\n    <AppDesignerFolder>Properties</AppDesignerFolder>\n    <RootNamespace>MPIR.Tests</RootNamespace>\n    <AssemblyName>mpir.net-tests</AssemblyName>\n    <TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion>\n    <FileAlignment>512</FileAlignment>\n    <ProjectTypeGuids>{3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>\n    <VisualStudioVersion Condition=\"'$(VisualStudioVersion)' == ''\">10.0</VisualStudioVersion>\n    <VSToolsPath Condition=\"'$(VSToolsPath)' == ''\">$(MSBuildExtensionsPath32)\\Microsoft\\VisualStudio\\v$(VisualStudioVersion)</VSToolsPath>\n    <ReferencePath>$(ProgramFiles)\\Common Files\\microsoft shared\\VSTT\\$(VisualStudioVersion)\\UITestExtensionPackages</ReferencePath>\n    <IsCodedUITest>False</IsCodedUITest>\n    <TestProjectType>UnitTest</TestProjectType>\n    <TargetFrameworkProfile />\n  </PropertyGroup>\n  <PropertyGroup Condition=\" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' \">\n    <DebugSymbols>true</DebugSymbols>\n    <DebugType>full</DebugType>\n    <Optimize>false</Optimize>\n    <OutputPath>bin\\Debug\\</OutputPath>\n    <DefineConstants>DEBUG;TRACE</DefineConstants>\n    <ErrorReport>prompt</ErrorReport>\n    <WarningLevel>4</WarningLevel>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n    <PlatformTarget>AnyCPU</PlatformTarget>\n  </PropertyGroup>\n  <PropertyGroup Condition=\" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' \">\n    <DebugType>pdbonly</DebugType>\n    <Optimize>true</Optimize>\n    <OutputPath>bin\\Release\\</OutputPath>\n    <DefineConstants>TRACE</DefineConstants>\n    <ErrorReport>prompt</ErrorReport>\n    <WarningLevel>4</WarningLevel>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Debug|x64'\">\n    <DebugSymbols>true</DebugSymbols>\n    <OutputPath>bin\\x64\\Debug\\</OutputPath>\n    <DefineConstants>TRACE;DEBUG;WIN64</DefineConstants>\n    <DebugType>full</DebugType>\n    <PlatformTarget>x64</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Release|x64'\">\n    <OutputPath>bin\\x64\\Release\\</OutputPath>\n    <DefineConstants>TRACE;WIN64</DefineConstants>\n    <Optimize>true</Optimize>\n    <DebugType>pdbonly</DebugType>\n    <PlatformTarget>x64</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Debug|x86'\">\n    <DebugSymbols>true</DebugSymbols>\n    <OutputPath>bin\\x86\\Debug\\</OutputPath>\n    <DefineConstants>DEBUG;TRACE</DefineConstants>\n    <DebugType>full</DebugType>\n    <PlatformTarget>x86</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <PropertyGroup Condition=\"'$(Configuration)|$(Platform)' == 'Release|x86'\">\n    <OutputPath>bin\\x86\\Release\\</OutputPath>\n    <DefineConstants>TRACE</DefineConstants>\n    <Optimize>true</Optimize>\n    <DebugType>pdbonly</DebugType>\n    <PlatformTarget>x86</PlatformTarget>\n    <ErrorReport>prompt</ErrorReport>\n    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\n  </PropertyGroup>\n  <ItemGroup>\n    <Reference Include=\"System\" />\n    <Reference Include=\"System.XML\" />\n    <Reference Include=\"System.Xml.Serialization\" />\n  </ItemGroup>\n  <Choose>\n    <When Condition=\"('$(VisualStudioVersion)' == '10.0' or '$(VisualStudioVersion)' == '') and '$(TargetFrameworkVersion)' == 'v3.5'\">\n      <ItemGroup>\n        <Reference Include=\"Microsoft.VisualStudio.QualityTools.UnitTestFramework, Version=10.1.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\" />\n      </ItemGroup>\n    </When>\n    <Otherwise>\n      <ItemGroup>\n        <Reference Include=\"Microsoft.VisualStudio.QualityTools.UnitTestFramework\" />\n      </ItemGroup>\n    </Otherwise>\n  </Choose>\n  <ItemGroup>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Assignment.cs\">\n      <Link>HugeFloatTests\\Assignment.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Arithmetic.cs\">\n      <Link>HugeFloatTests\\Arithmetic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Comparisons.cs\">\n      <Link>HugeFloatTests\\Comparisons.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\ConstructionAndDisposal.cs\">\n      <Link>HugeFloatTests\\ConstructionAndDisposal.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Conversions.cs\">\n      <Link>HugeFloatTests\\Conversions.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\ExpressionTests.cs\">\n      <Link>HugeFloatTests\\ExpressionTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\IO.cs\">\n      <Link>HugeFloatTests\\IO.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Math.cs\">\n      <Link>HugeFloatTests\\Math.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeFloatTests\\Precision.cs\">\n      <Link>HugeFloatTests\\Precision.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Arithmetic.cs\">\n      <Link>HugeIntTests\\Arithmetic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Assignment.cs\">\n      <Link>HugeIntTests\\Assignment.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Bitwise.cs\">\n      <Link>HugeIntTests\\Bitwise.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Conversions.cs\">\n      <Link>HugeIntTests\\Conversions.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Comparisons.cs\">\n      <Link>HugeIntTests\\Comparisons.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\ExpressionTests.cs\">\n      <Link>HugeIntTests\\ExpressionTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\IO.cs\">\n      <Link>HugeIntTests\\IO.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\Math.cs\">\n      <Link>HugeIntTests\\Math.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\NumberTheoretic.cs\">\n      <Link>HugeIntTests\\NumberTheoretic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Arithmetic.cs\">\n      <Link>HugeRationalTests\\Arithmetic.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Assignment.cs\">\n      <Link>HugeRationalTests\\Assignment.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Comparisons.cs\">\n      <Link>HugeRationalTests\\Comparisons.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\ConstructionAndDisposal.cs\">\n      <Link>HugeRationalTests\\ConstructionAndDisposal.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\Conversions.cs\">\n      <Link>HugeRationalTests\\Conversions.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\ExpressionTests.cs\">\n      <Link>HugeRationalTests\\ExpressionTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeRationalTests\\IO.cs\">\n      <Link>HugeRationalTests\\IO.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\IntegrationTests\\XmlCommentsTests.cs\">\n      <Link>IntegrationTests\\XmlCommentsTests.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\OtherTests\\MpirSettings.cs\">\n      <Link>OtherTests\\MpirSettings.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\OtherTests\\Random.cs\">\n      <Link>OtherTests\\Random.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\Utilities\\Accessors.cs\">\n      <Link>Utilities\\Accessors.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\Utilities\\Platform.cs\">\n      <Link>Utilities\\Platform.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\HugeIntTests\\ConstructionAndDisposal.cs\">\n      <Link>HugeIntTests\\ConstructionAndDisposal.cs</Link>\n    </Compile>\n    <Compile Include=\"..\\..\\mpir.net-tests\\Properties\\AssemblyInfo.cs\">\n      <Link>Properties\\AssemblyInfo.cs</Link>\n    </Compile>\n  </ItemGroup>\n  <ItemGroup>\n    <ProjectReference Include=\"..\\mpir.net\\mpir.net.vcxproj\">\n      <Project>{1e6e20ca-9f97-45a0-b797-c43a9716b95b}</Project>\n      <Name>mpir.net</Name>\n    </ProjectReference>\n  </ItemGroup>\n  <ItemGroup />\n  <Choose>\n    <When Condition=\"'$(VisualStudioVersion)' == '10.0' And '$(IsCodedUITest)' == 'True'\">\n      <ItemGroup>\n        <Reference Include=\"Microsoft.VisualStudio.QualityTools.CodedUITestFramework, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n        <Reference Include=\"Microsoft.VisualStudio.TestTools.UITest.Common, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n        <Reference Include=\"Microsoft.VisualStudio.TestTools.UITest.Extension, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n        <Reference Include=\"Microsoft.VisualStudio.TestTools.UITesting, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL\">\n          <Private>False</Private>\n        </Reference>\n      </ItemGroup>\n    </When>\n  </Choose>\n  <Import Project=\"$(VSToolsPath)\\TeamTest\\Microsoft.TestTools.targets\" Condition=\"Exists('$(VSToolsPath)\\TeamTest\\Microsoft.TestTools.targets')\" />\n  <Import Project=\"$(MSBuildToolsPath)\\Microsoft.CSharp.targets\" />\n  <PropertyGroup>\n    <PreBuildEvent>taskkill /F /IM vstest.executionengine.exe /FI \"MEMUSAGE gt 1\"</PreBuildEvent>\n  </PropertyGroup>\n  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. \n       Other similar extension points exist, see Microsoft.Common.targets.\n  <Target Name=\"BeforeBuild\">\n  </Target>\n  <Target Name=\"AfterBuild\">\n  </Target>\n  -->\n</Project>"
  },
  {
    "path": "mpir.net/build.vc15/mpir.net.sln",
    "content": "﻿\nMicrosoft Visual Studio Solution File, Format Version 12.00\n# Visual Studio 14\nVisualStudioVersion = 14.0.24720.0\nMinimumVisualStudioVersion = 10.0.40219.1\nProject(\"{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}\") = \"mpir.net\", \"mpir.net\\mpir.net.vcxproj\", \"{1E6E20CA-9F97-45A0-B797-C43A9716B95B}\"\nEndProject\nProject(\"{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}\") = \"mpir.net-tests\", \"mpir.net-tests\\mpir.net-tests.csproj\", \"{DD861777-F312-4C3C-9E7C-148EAABE281D}\"\nEndProject\nGlobal\n\tGlobalSection(SolutionConfigurationPlatforms) = preSolution\n\t\tDebug|Win32 = Debug|Win32\n\t\tDebug|x64 = Debug|x64\n\t\tRelease|Win32 = Release|Win32\n\t\tRelease|x64 = Release|x64\n\tEndGlobalSection\n\tGlobalSection(ProjectConfigurationPlatforms) = postSolution\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|Win32.ActiveCfg = Debug|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|Win32.Build.0 = Debug|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|x64.ActiveCfg = Debug|x64\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Debug|x64.Build.0 = Debug|x64\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|Win32.ActiveCfg = Release|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|Win32.Build.0 = Release|Win32\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|x64.ActiveCfg = Release|x64\n\t\t{1E6E20CA-9F97-45A0-B797-C43A9716B95B}.Release|x64.Build.0 = Release|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|Win32.ActiveCfg = Debug|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|Win32.Build.0 = Debug|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|x64.ActiveCfg = Debug|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Debug|x64.Build.0 = Debug|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|Win32.ActiveCfg = Release|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|Win32.Build.0 = Release|x86\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|x64.ActiveCfg = Release|x64\n\t\t{DD861777-F312-4C3C-9E7C-148EAABE281D}.Release|x64.Build.0 = Release|x64\n\tEndGlobalSection\n\tGlobalSection(SolutionProperties) = preSolution\n\t\tHideSolutionNode = FALSE\n\tEndGlobalSection\nEndGlobal\n"
  },
  {
    "path": "mpir.net/mpir.net/AssemblyInfo.cpp",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\n#include \"stdafx.h\"\n\nusing namespace System;\nusing namespace System::Reflection;\nusing namespace System::Runtime::CompilerServices;\nusing namespace System::Runtime::InteropServices;\nusing namespace System::Security::Permissions;\n\n//\n// General Information about an assembly is controlled through the following\n// set of attributes. Change these attribute values to modify the information\n// associated with an assembly.\n//\n[assembly:AssemblyTitleAttribute(\"mpirnet\")];\n[assembly:AssemblyDescriptionAttribute(\"Managed wrapper for the MPIR library\")];\n[assembly:AssemblyConfigurationAttribute(\"\")];\n[assembly:AssemblyCompanyAttribute(\"\")];\n[assembly:AssemblyProductAttribute(\"mpirnet\")];\n[assembly:AssemblyCopyrightAttribute(\"Copyright  Alex Dyachenko 2014\")];\n[assembly:AssemblyTrademarkAttribute(\"\")];\n[assembly:AssemblyCultureAttribute(\"\")];\n\n//\n// Version information for an assembly consists of the following four values:\n//\n//      Major Version\n//      Minor Version\n//      Build Number\n//      Revision\n//\n// You can specify all the value or you can default the Revision and Build Numbers\n// by using the '*' as shown below:\n\n[assembly:AssemblyVersionAttribute(\"1.0.0.0\")];\n[assembly:ComVisible(false)];\n[assembly:CLSCompliantAttribute(true)];\n//obsolete in .Net 4.0 [assembly:SecurityPermission(SecurityAction::RequestMinimum, UnmanagedCode = true)];\n[assembly:InternalsVisibleTo(\"mpir.net-tests\")];\n"
  },
  {
    "path": "mpir.net/mpir.net/Common.cpp",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\n#include \"Stdafx.h\"\n"
  },
  {
    "path": "mpir.net/mpir.net/Common.h",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\n#pragma once\n\n#pragma region misc macros\n\n#define LIT2(x) x\n#define LIT(x) LIT2(x)\n#define CONCAT2(x,y) x##y\n#define CONCAT(x,y) CONCAT2(x,y)\n#define IS_NULL(a) (Object::ReferenceEquals(a, nullptr))\n#define PIN(x) pin_ptr<T> pinptr##x = &x[0]; void* pinned_##x = pinptr##x;\n#define SGN(Z) ((Z) < 0 ? -1 : (Z) > 0)\n\n#define PEEK_NEXT_CHAR  \\\n    reader->Read();     \\\n    c = reader->Peek(); \\\n    nread++;\n\n#define WHEN_IS_DEST(i, a)                             \\\n    auto x##i = dynamic_cast<MPTYPE^>(a);              \\\n    if (!IS_NULL(x##i) && x##i->_value == destination)\n\n#define WHEN_IS(i, a, atype)              \\\n    auto x##i = dynamic_cast<atype^>(a);  \\\n    if (!IS_NULL(x##i))\n\n#define TYPE_FOR_ABBR_Int HugeInt^\n#define TYPE_FOR_ABBR_Rat HugeRational^\n#define TYPE_FOR_ABBR_Expr MPEXPR_NAME^\n#define TYPE_FOR_ABBR_Si mpir_si\n#define TYPE_FOR_ABBR_Ui mpir_ui\n#define TYPE_FOR_ABBR_Bits mp_bitcnt_t\n#define TYPE_FOR_ABBR_Rnd MpirRandom^\n#define TYPE_FOR_ABBR_IExpr IntegerExpression^\n\n#pragma endregion\n\n#pragma region enums\n\nnamespace MPIR\n{\n    /// <summary>\n    /// This enum defines the rounding modes MPIR supports.  Division and modulo operations take an optional rounding mode parameter, or use the default, which is set in the static MpirSettings class.\n    /// </summary>\n    public enum class RoundingModes\n    {\n        /// <summary>Rounding mode is unspecified.  Use a higher level default if available, fall back to Truncate.</summary>\n        Default,\n        /// <summary>Truncate.  Quotient is rounded toward zero, and remainder has the same sign as the source number.</summary>\n        Truncate,\n        /// <summary>Round up.  Quotient is rounded toward +infinity, and remainder has the opposite sign to the divisor.</summary>\n        Ceiling,\n        /// <summary>Round down.  Quotient is rounded toward -infinity, and remainder has the sames sign as the divisor.</summary>\n        Floor,\n    };\n\n    /// <summary>\n    /// This enum defines the limb order used when importing or exporting a number.\n    /// </summary>\n    public enum class LimbOrder : __int8\n    {\n        /// <summary>Most significant limb comes first.</summary>\n        MostSignificantFirst = 1,\n        /// <summary>Least significant limb comes first.</summary>\n        LeastSignificantFirst = -1,\n    };\n\n    /// <summary>\n    /// This enum defines the byte order within each limb when importing or exporting a number.\n    /// </summary>\n    public enum class Endianness : __int8\n    {\n        /// <summary>The native byte order of the CPU is used.</summary>\n        Native = 0,\n        /// <summary>Most significant byte comes first in a limb.</summary>\n        BigEndian = 1,\n        /// <summary>Least significant byte comes first in a limb.</summary>\n        LittleEndian = -1,\n    };\n}\n\nenum EvaluationOptions : __int32\n{\n    None = 0x0,\n\n    IntInitialized = 0x1, \n    Temp1InitializedInt = IntInitialized,\n    Temp2InitializedInt = IntInitialized << 1,\n    Temp3InitializedInt = IntInitialized << 2,\n\n    RationalInitialized = 0x10, \n    Temp1InitializedRational = RationalInitialized,\n    Temp2InitializedRational = RationalInitialized << 1,\n    Temp3InitializedRational = RationalInitialized << 2,\n\n    FloatInitialized = 0x100, \n    Temp1InitializedFloat = FloatInitialized,\n    Temp2InitializedFloat = FloatInitialized << 1,\n    Temp3InitializedFloat = FloatInitialized << 2,\n};\n\n#pragma endregion\n\nstruct EvaluationContext\n{\n    public:\n        union\n        {\n            __mpz_struct Integer;\n            __mpq_struct Rational;\n            __mpf_struct Float;\n        } \n        Temp[3];\n        union\n        {\n            mpz_ptr IntArgs[3];\n            mpq_ptr RationalArgs[3];\n            mpf_ptr FloatArgs[3];\n        };\n        union\n        {\n            struct\n            {\n                EvaluationOptions Options;\n                unsigned __int8 Index;\n            };\n            __int64 Zero;\n        };\n        mp_bitcnt_t FloatPrecision;\n\n        void inline Initialized(EvaluationOptions flag)\n        {\n            Options = (EvaluationOptions) (Options | (flag << Index));\n        }\n\n        EvaluationContext()\n        {\n            Zero = 0;\n            FloatPrecision = 0;\n        }\n\n#define CTXT_ADD_RATIONAL_SI(numerator, denominator)  \\\n    auto ptr = &context.Temp[context.Index].Rational; \\\n    context.RationalArgs[context.Index++] = ptr;      \\\n                                                      \\\n    auto _n = (mpir_ui)ABS(numerator);                \\\n    ptr->_mp_num._mp_alloc = 1;                       \\\n    ptr->_mp_num._mp_size = (int)SGN(numerator);      \\\n    ptr->_mp_num._mp_d = &_n;                         \\\n                                                      \\\n    auto _d = (mpir_ui)denominator;                   \\\n    ptr->_mp_den._mp_alloc = 1;                       \\\n    ptr->_mp_den._mp_size = (int)SGN(denominator);    \\\n    ptr->_mp_den._mp_d = &_d;\n\n#define CTXT_ADD_RATIONAL_UI(numerator, denominator)  \\\n    auto ptr = &context.Temp[context.Index].Rational; \\\n    context.RationalArgs[context.Index++] = ptr;      \\\n                                                      \\\n    auto _n = (mpir_ui)numerator;                     \\\n    ptr->_mp_num._mp_alloc = 1;                       \\\n    ptr->_mp_num._mp_size = (int)SGN(numerator);      \\\n    ptr->_mp_num._mp_d = &_n;                         \\\n                                                      \\\n    auto _d = (mpir_ui)denominator;                   \\\n    ptr->_mp_den._mp_alloc = 1;                       \\\n    ptr->_mp_den._mp_size = (int)SGN(denominator);    \\\n    ptr->_mp_den._mp_d = &_d;\n\n#define CTXT_ADD_RATIONAL_DOUBLE(value)               \\\n    context.Initialized(RationalInitialized);         \\\n    auto ptr = &context.Temp[context.Index].Rational; \\\n    context.RationalArgs[context.Index++] = ptr;      \\\n    mpq_init(ptr);                                    \\\n    mpq_set_d(ptr, value);\n\n#define CTXT_ADD_RATIONAL_INT(value)         \\\n    value->AssignToRational(context);\n\n        ~EvaluationContext()\n        {\n            if(Options & EvaluationOptions::Temp1InitializedInt)\n                mpz_clear(IntArgs[0]);\n            if(Options & EvaluationOptions::Temp2InitializedInt)\n                mpz_clear(IntArgs[1]);\n            if(Options & EvaluationOptions::Temp3InitializedInt)\n                mpz_clear(IntArgs[2]);\n\n            if(Options & EvaluationOptions::Temp1InitializedRational)\n                mpq_clear(RationalArgs[0]);\n            if(Options & EvaluationOptions::Temp2InitializedRational)\n                mpq_clear(RationalArgs[1]);\n            if(Options & EvaluationOptions::Temp3InitializedRational)\n                mpq_clear(RationalArgs[2]);\n\n            if(Options & EvaluationOptions::Temp1InitializedFloat)\n                mpf_clear(FloatArgs[0]);\n            if(Options & EvaluationOptions::Temp2InitializedFloat)\n                mpf_clear(FloatArgs[1]);\n            if(Options & EvaluationOptions::Temp3InitializedFloat)\n                mpf_clear(FloatArgs[2]);\n        }\n};"
  },
  {
    "path": "mpir.net/mpir.net/ExpressionMacros.h",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\n#pragma region Expression macros\n\n#ifdef ADDITIONAL_MEMBERS \n#undef ADDITIONAL_MEMBERS\n#endif\n#define ADDITIONAL_MEMBERS\n\n#define IN_CONTEXT_1(a)                \\\n    a->ASSIGN_TO(context)\n\n#define IN_CONTEXT_2(a, b)             \\\n    a->ASSIGN_TO(context);             \\\n    b->ASSIGN_TO(context)\n\n#define IN_CONTEXT_3(a, b, c)          \\\n    a->ASSIGN_TO(context);             \\\n    b->ASSIGN_TO(context);             \\\n    c->ASSIGN_TO(context)\n\n#define COUNT_ARGS_IMPL2(_1, _2, _3, name, ...) name\n#define COUNT_ARGS_IMPL(args) COUNT_ARGS_IMPL2 args\n#define COUNT_ARGS(...) COUNT_ARGS_IMPL((__VA_ARGS__, 3, 2, 1))\n#define MACRO_CHOOSE2(prefix, number) prefix##number\n#define MACRO_CHOOSE1(prefix, number) MACRO_CHOOSE2(prefix, number)\n#define MACRO_CHOOSE(prefix, number) MACRO_CHOOSE1(prefix, number)\n#define MACRO_GLUE(x, y) x y\n\n#define IN_CONTEXT(...)                                                           \\\n    EvaluationContext context;                                                    \\\n    SET_CONTEXT_PRECISION                                                         \\\n    MACRO_GLUE(MACRO_CHOOSE(IN_CONTEXT_, COUNT_ARGS(__VA_ARGS__)), (__VA_ARGS__))\n\n#define IN_DEFAULT_CONTEXT(...)                                                   \\\n    EvaluationContext context;                                                    \\\n    context.FloatPrecision = HugeFloat::DefaultPrecision;                         \\\n    MACRO_GLUE(MACRO_CHOOSE(IN_CONTEXT_, COUNT_ARGS(__VA_ARGS__)), (__VA_ARGS__))\n\n#define IN_SPECIFIC_CONTEXT(precision, ...)                                       \\\n    EvaluationContext context;                                                    \\\n    context.FloatPrecision = precision;                                           \\\n    MACRO_GLUE(MACRO_CHOOSE(IN_CONTEXT_, COUNT_ARGS(__VA_ARGS__)), (__VA_ARGS__))\n\n//defines a unary expression class\n#define DEFINE_UNARY_EXPRESSION(base, name, type)                \\\nprivate ref class MPEXPR(name) : base                            \\\n{                                                                \\\n    internal:                                                    \\\n        initonly type Operand;                                   \\\n        virtual void AssignTo(MP(ptr) destination) override;     \\\n        MPEXPR(name)(type operand)                               \\\n        {                                                        \\\n            Operand = operand;                                   \\\n        }                                                        \\\n};\n\n//defines a binary expression class\n#define DEFINE_BINARY_EXPRESSION(base, name, leftType, rightType)     \\\nprivate ref class MPEXPR(name) : base                                 \\\n{                                                                     \\\n    internal:                                                         \\\n        initonly leftType Left;                                       \\\n        initonly rightType Right;                                     \\\n        virtual void AssignTo(MP(ptr) destination) override;          \\\n        ADDITIONAL_MEMBERS                                            \\\n        MPEXPR(name)(leftType left, rightType right)                  \\\n        {                                                             \\\n            Left = left;                                              \\\n            Right = right;                                            \\\n        }                                                             \\\n};\n\n//defines a ternary expression class\n#define DEFINE_TERNARY_EXPRESSION(base, name, leftType, middleType, rightType)    \\\nprivate ref class MPEXPR(name) : base                                             \\\n{                                                                                 \\\n    internal:                                                                     \\\n        initonly leftType Left;                                                   \\\n        initonly middleType Middle;                                               \\\n        initonly rightType Right;                                                 \\\n        virtual void AssignTo(MP(ptr) destination) override;                      \\\n        MPEXPR(name)(leftType left, middleType middle, rightType right)           \\\n        {                                                                         \\\n            Left = left;                                                          \\\n            Middle = middle;                                                      \\\n            Right = right;                                                        \\\n        }                                                                         \\\n};\n\n//unary expressions\n#define DEFINE_UNARY_EXPRESSION_WITH_ONE(base, name, typeAbbr) \\\n    DEFINE_UNARY_EXPRESSION(base, name##typeAbbr, MPEXPR_NAME^)           \n\n#define DEFINE_UNARY_EXPRESSION_WITH_BUILT_INS_ONLY(base, name, typeAbbr)    \\\n    DEFINE_UNARY_EXPRESSION(base, name##typeAbbr, TYPE_FOR_ABBR_##typeAbbr)\n\n//binary expressions\n#define DEFINE_BINARY_EXPRESSION_WITH_BUILT_INS_ONLY(base, name, leftTypeAbbr, rightTypeAbbr)    \\\n    DEFINE_BINARY_EXPRESSION(base, name##leftTypeAbbr##rightTypeAbbr, TYPE_FOR_ABBR_##leftTypeAbbr, TYPE_FOR_ABBR_##rightTypeAbbr)\n\n#define DEFINE_BINARY_EXPRESSION_WITH_TWO(base, name, typeAbbr) \\\n    DEFINE_BINARY_EXPRESSION(base, name##typeAbbr##typeAbbr, MPEXPR_NAME^, MPEXPR_NAME^)\n\n#define DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT(base, name, leftTypeAbbr, rightTypeAbbr)    \\\n    DEFINE_BINARY_EXPRESSION(base, name##leftTypeAbbr##rightTypeAbbr, MPEXPR_NAME^, TYPE_FOR_ABBR_##rightTypeAbbr) \n\n#define DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_LEFT(base, name, leftTypeAbbr, rightTypeAbbr)     \\\n    DEFINE_BINARY_EXPRESSION(base, name##leftTypeAbbr##rightTypeAbbr, TYPE_FOR_ABBR_##leftTypeAbbr, MPEXPR_NAME^)\n\n//ternary expressions\n#define DEFINE_TERNARY_EXPRESSION_WITH_THREE(base, name, typeAbbr) \\\n    DEFINE_TERNARY_EXPRESSION(base, name##typeAbbr##typeAbbr##typeAbbr, MPEXPR_NAME^, MPEXPR_NAME^, MPEXPR_NAME^)\n\n#define DEFINE_TERNARY_EXPRESSION_WITH_BUILT_IN_MIDDLE(base, name, leftTypeAbbr, middleTypeAbbr, rightTypeAbbr)    \\\n    DEFINE_TERNARY_EXPRESSION(base, name##leftTypeAbbr##middleTypeAbbr##rightTypeAbbr, MPEXPR_NAME^, TYPE_FOR_ABBR_##middleTypeAbbr, MPEXPR_NAME^)\n\n#pragma endregion\n\n#pragma region Method macros\n\n//void functions\n#define MAKE_VOID_FUNCTION(base, action, op, type)  \\\n    MAKE_VOID_FUNCTION_##action(base, op, op##type)\n\n#define MAKE_VOID_FUNCTION_DECLARE(base, op, result)     \\\n    base^ op();\n\n#define MAKE_VOID_FUNCTION_DEFINE(base, op, result)      \\\n    base^ MPEXPR_NAME::op() { return gcnew MPEXPR(result)(this); }\n\n//one-arg functions\n#define MAKE_FUNCTION_WITH_ONE(base, action, op, argTypeAbbr)  \\\n    MAKE_FUNCTION_WITH_ONE_##action(base, op, Expr, CONCAT(op, LIT(Mpt))argTypeAbbr)\n\n#define MAKE_FUNCTION_WITH_LIMB(base, action, op, argTypeAbbr)  \\\n    MAKE_FUNCTION_WITH_ONE_##action(base, op, argTypeAbbr, CONCAT(op, LIT(Mpt))argTypeAbbr)\n\n#define MAKE_FUNCTION_WITH_ONE_DECLARE(base, op, argTypeAbbr, result)     \\\n    base^ op(TYPE_FOR_ABBR_##argTypeAbbr a);\n\n#define MAKE_FUNCTION_WITH_ONE_DEFINE(base, op, argTypeAbbr, result)      \\\n    base^ MPEXPR_NAME::op(TYPE_FOR_ABBR_##argTypeAbbr a) { return gcnew MPEXPR(result)(this, a); }\n\n//two-arg functions\n#define MAKE_FUNCTION_WITH_TWO(base, action, op, leftTypeAbbr, rightTypeAbbr)  \\\n    MAKE_FUNCTION_WITH_TWO_##action(base, op, Expr, Expr, CONCAT(op, LIT(Mpt))leftTypeAbbr##rightTypeAbbr)\n\n#define MAKE_FUNCTION_WITH_TWO_LLIMB(base, action, op, leftTypeAbbr, rightTypeAbbr)  \\\n    MAKE_FUNCTION_WITH_TWO_##action(base, op, leftTypeAbbr, Expr, CONCAT(op, LIT(Mpt))leftTypeAbbr##rightTypeAbbr)\n\n#define MAKE_FUNCTION_WITH_TWO_DECLARE(base, op, leftTypeAbbr, rightTypeAbbr, result)     \\\n    base^ op(TYPE_FOR_ABBR_##leftTypeAbbr a, TYPE_FOR_ABBR_##rightTypeAbbr b);\n\n#define MAKE_FUNCTION_WITH_TWO_DEFINE(base, op, leftTypeAbbr, rightTypeAbbr, result)      \\\n    base^ MPEXPR_NAME::op(TYPE_FOR_ABBR_##leftTypeAbbr a, TYPE_FOR_ABBR_##rightTypeAbbr b) { return gcnew MPEXPR(result)(this, a, b); }\n\n//functions with one argument and simple result\n//#define MAKE_SIMPLE_FUNCTION_WITH_ONE(base, action, op, resultType, argType) \\\n//    MAKE_SIMPLE_FUNCTION_WITH_ONE_##action(base, op, resultType, Expr)\n//\n//#define MAKE_SIMPLE_FUNCTION_WITH_LIMB(base, action, op, resultType, argType) \\\n//    MAKE_SIMPLE_FUNCTION_WITH_ONE_##action(base, op, resultType, argType)\n//\n//#define MAKE_SIMPLE_FUNCTION_WITH_ONE_DECLARE(base, op, resultTypeAbbr, argTypeAbbr)     \\\n//    TYPE_FOR_ABBR_##resultTypeAbbr op(TYPE_FOR_ABBR_##argTypeAbbr a);\n//\n//#define MAKE_SIMPLE_FUNCTION_WITH_ONE_DEFINE(base, op, resultTypeAbbr, argTypeAbbr)      \\\n//    TYPE_FOR_ABBR_##resultTypeAbbr HugeInt::op(TYPE_FOR_ABBR_##argTypeAbbr a) { return gcnew LIT(MPTYPE_NAME)result##Expression(this, a); }\n\n//unary operators\n#define MAKE_UNARY_OPERATOR(base, action, op, result, mpType) \\\n    MAKE_UNARY_OPERATOR_##action(base, op, result##mpType, Expr)\n\n#define MAKE_UNARY_OPERATOR_DECLARE(base, op, result, type)     \\\n    static base^ operator op(TYPE_FOR_ABBR_##type a);\n\n#define MAKE_UNARY_OPERATOR_DEFINE(base, op, result, type)      \\\n    base^ MPEXPR_NAME::operator op(TYPE_FOR_ABBR_##type a) { return gcnew MPEXPR(result)(a); }\n\n//binary operators\n#define MAKE_BINARY_OPERATOR_DECLARE(base, op, result, leftType, rightType, left, right)     \\\n    static base^ operator op(TYPE_FOR_ABBR_##leftType a, TYPE_FOR_ABBR_##rightType b);\n\n#define MAKE_BINARY_OPERATOR_DEFINE(base, op, result, leftType, rightType, left, right)      \\\n    base^ MPEXPR_NAME::operator op(TYPE_FOR_ABBR_##leftType a, TYPE_FOR_ABBR_##rightType b) { return gcnew MPEXPR(result)(left, right); }\n\n#define MAKE_BINARY_OPERATOR_STANDARD(base, action, op, result, leftType, rightType)        \\\n    MAKE_BINARY_OPERATOR_##action(base, op, result##leftType##rightType, Expr, Expr, a, b)      \n\n#define MAKE_BINARY_OPERATOR_RLIMB(base, action, op, result, mpType, limbType)  \\\n    MAKE_BINARY_OPERATOR_##action(base, op, result##mpType##limbType, Expr, limbType, a, b)          \n\n#define MAKE_BINARY_OPERATOR_LLIMB(base, action, op, result, mpType, limbType)  \\\n    MAKE_BINARY_OPERATOR_##action(base, op, result##limbType##mpType, limbType, Expr, a, b)           \n\n#define MAKE_BINARY_OPERATOR_LLIMB_R(base, action, op, result, mpType, limbType) \\\n    MAKE_BINARY_OPERATOR_##action(base, op, result##mpType##limbType, limbType, Expr, b, a)\n\n#pragma endregion\n\n#pragma region expression assignment macros\n\n#define DEFINE_ASSIGNMENT_PROLOG(name) void MPEXPR(name)::AssignTo(MP(ptr) destination)\n\n#define DEFINE_UNARY_ASSIGNMENT_REF(name, typeAbbr, operation)     \\\n    DEFINE_ASSIGNMENT_PROLOG(name##typeAbbr)                       \\\n    {                                                              \\\n        Operand->AssignTo(destination);                            \\\n        operation(destination, destination);                       \\\n    }\n\n#define DEFINE_UNARY_ASSIGNMENT_VAL(name, typeAbbr, operation)     \\\n    DEFINE_ASSIGNMENT_PROLOG(name##typeAbbr)                       \\\n    {                                                              \\\n        operation(destination, Operand);                           \\\n    }\n\n#define DEFINE_BINARY_ASSIGNMENT_REF_INTVAL(name, leftTypeAbbr, rightTypeAbbr, operation) \\\n    DEFINE_ASSIGNMENT_PROLOG(name##leftTypeAbbr##rightTypeAbbr)                           \\\n    {                                                                                     \\\n        IN_CONTEXT(Left, Right);                                                          \\\n        operation(destination, CTXT(0), CTXT(1));                                         \\\n    }\n\n#define DEFINE_BINARY_ASSIGNMENT_REF_REF(name, typeAbbr, operation)                       \\\n    DEFINE_BINARY_ASSIGNMENT_REF_INTVAL(name, typeAbbr, typeAbbr, operation)\n\n#define DEFINE_BINARY_ASSIGNMENT_REF_VAL(name, leftTypeAbbr, rightTypeAbbr, operation)    \\\n    DEFINE_ASSIGNMENT_PROLOG(name##leftTypeAbbr##rightTypeAbbr)                           \\\n    {                                                                                     \\\n        Left->AssignTo(destination);                                                      \\\n        operation(destination, destination, Right);                                       \\\n    }\n\n#define DEFINE_BINARY_ASSIGNMENT_REF_RATUI(name, leftTypeAbbr, rightTypeAbbr, operation)  \\\n    DEFINE_ASSIGNMENT_PROLOG(name##leftTypeAbbr##rightTypeAbbr)                           \\\n    {                                                                                     \\\n        IN_CONTEXT(Left);                                                                 \\\n        CTXT_ADD_RATIONAL_UI(Right, 1);                                                   \\\n        operation(destination, CTXT(0), CTXT(1));                                         \\\n    }\n\n#define DEFINE_BINARY_ASSIGNMENT_REF_RATSI(name, leftTypeAbbr, rightTypeAbbr, operation)  \\\n    DEFINE_ASSIGNMENT_PROLOG(name##leftTypeAbbr##rightTypeAbbr)                           \\\n    {                                                                                     \\\n        IN_CONTEXT(Left);                                                                 \\\n        CTXT_ADD_RATIONAL_SI(Right, 1);                                                   \\\n        operation(destination, CTXT(0), CTXT(1));                                         \\\n    }\n\n#define DEFINE_BINARY_ASSIGNMENT_VAL_REF(name, leftTypeAbbr, rightTypeAbbr, operation)    \\\n    DEFINE_ASSIGNMENT_PROLOG(name##leftTypeAbbr##rightTypeAbbr)                           \\\n    {                                                                                     \\\n        Right->AssignTo(destination);                                                     \\\n        operation(destination, Left, destination);                                        \\\n    }\n\n#define DEFINE_BINARY_ASSIGNMENT_RATUI_REF(name, leftTypeAbbr, rightTypeAbbr, operation)  \\\n    DEFINE_ASSIGNMENT_PROLOG(name##leftTypeAbbr##rightTypeAbbr)                           \\\n    {                                                                                     \\\n        IN_CONTEXT(Right);                                                                \\\n        CTXT_ADD_RATIONAL_UI(Left, 1);                                                    \\\n        operation(destination, CTXT(1), CTXT(0));                                         \\\n    }\n\n#define DEFINE_BINARY_ASSIGNMENT_RATSI_REF(name, leftTypeAbbr, rightTypeAbbr, operation)  \\\n    DEFINE_ASSIGNMENT_PROLOG(name##leftTypeAbbr##rightTypeAbbr)                           \\\n    {                                                                                     \\\n        IN_CONTEXT(Right);                                                                \\\n        CTXT_ADD_RATIONAL_SI(Left, 1);                                                    \\\n        operation(destination, CTXT(1), CTXT(0));                                         \\\n    }\n\n#define DEFINE_BINARY_ASSIGNMENT_VAL_VAL(name, leftTypeAbbr, rightTypeAbbr, operation)    \\\n    DEFINE_ASSIGNMENT_PROLOG(name##leftTypeAbbr##rightTypeAbbr)                           \\\n    {                                                                                     \\\n        operation(destination, Left, Right);                                              \\\n    }\n\n#define DEFINE_BINARY_ASSIGNMENT_REF_SI(name, leftTypeAbbr, rightTypeAbbr, positiveOp, negativeOp)      \\\n    DEFINE_ASSIGNMENT_PROLOG(name##leftTypeAbbr##rightTypeAbbr)                                         \\\n    {                                                                                                   \\\n        Left->AssignTo(destination);                                                                    \\\n        if (Right >= 0)                                                                                 \\\n            positiveOp(destination, destination, (mpir_ui)Right);                                       \\\n        else                                                                                            \\\n            negativeOp(destination, destination, (mpir_ui)-Right);                                      \\\n    }\n\n#define DEFINE_BINARY_ASSIGNMENT_REF_SI2(name, leftTypeAbbr, rightTypeAbbr, operation, negateOp)        \\\n    DEFINE_ASSIGNMENT_PROLOG(name##leftTypeAbbr##rightTypeAbbr)                                         \\\n    {                                                                                                   \\\n        Left->AssignTo(destination);                                                                    \\\n        operation(destination, destination, (Right >= 0) ? (mpir_ui)Right : (mpir_ui)-Right);           \\\n        if (Right < 0)                                                                                  \\\n            negateOp(destination, destination);                                                         \\\n    }\n\n#define DEFINE_BINARY_ASSIGNMENT_SI_REF(name, leftTypeAbbr, rightTypeAbbr, positiveOp, negativeOp1, negativeOp2)   \\\n    DEFINE_ASSIGNMENT_PROLOG(name##leftTypeAbbr##rightTypeAbbr)                                                    \\\n    {                                                                                                              \\\n        Right->AssignTo(destination);                                                                              \\\n        if (Left >= 0)                                                                                             \\\n            positiveOp(destination, (mpir_ui)Left, destination);                                                   \\\n        else                                                                                                       \\\n        {                                                                                                          \\\n            negativeOp1(destination, destination, (mpir_ui)-Left);                                                 \\\n            negativeOp2(destination, destination);                                                                 \\\n        }                                                                                                          \\\n    }\n\n#define DEFINE_BINARY_ASSIGNMENT_SI_REF2(name, leftTypeAbbr, rightTypeAbbr, positiveOp, negativeOp1, negativeOp2)  \\\n    DEFINE_ASSIGNMENT_PROLOG(name##leftTypeAbbr##rightTypeAbbr)                                                    \\\n    {                                                                                                              \\\n        Right->AssignTo(destination);                                                                              \\\n        if (Left >= 0)                                                                                             \\\n            positiveOp(destination, (mpir_ui)Left, destination);                                                   \\\n        else                                                                                                       \\\n        {                                                                                                          \\\n            negativeOp1(destination, (mpir_ui)-Left, destination);                                                 \\\n            negativeOp2(destination, destination);                                                                 \\\n        }                                                                                                          \\\n    }\n\n#define DEFINE_TERNARY_ASSIGNMENT_REF_REF_REF(name, typeAbbr, operation)   \\\n    DEFINE_ASSIGNMENT_PROLOG(name##typeAbbr##typeAbbr##typeAbbr)           \\\n    {                                                                      \\\n        IN_CONTEXT(Left, Middle, Right);                                   \\\n        operation(destination, CTXT(0), CTXT(1), CTXT(2));                 \\\n    }\n\n#define DEFINE_TERNARY_ASSIGNMENT_REF_VAL_REF(name, leftT, middleT, rightT, operation) \\\n    DEFINE_ASSIGNMENT_PROLOG(name##leftT##middleT##rightT)                             \\\n    {                                                                                  \\\n        IN_CONTEXT(Left, Right);                                                       \\\n        operation(destination, CTXT(0), Middle, CTXT(1));                              \\\n    }\n\n#pragma endregion\n"
  },
  {
    "path": "mpir.net/mpir.net/HugeFloat.cpp",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\n#include \"Stdafx.h\"\n#include \"HugeInt.h\"\n#include \"HugeRational.h\"\n#include \"HugeFloat.h\"\n\nusing namespace System::Runtime::InteropServices;\nusing namespace System::Text;\n\nnamespace MPIR\n{\n    #pragma region construction\n\n    MPTYPE::MPTYPE()\n    {\n        AllocateStruct();\n        MP(init)(_value);\n        _allocatedPrecision = MP(get_prec)(_value);\n    }\n\n    MPTYPE::MPTYPE(bool initialize)\n    {\n        AllocateStruct();\n        if(initialize)\n        {\n            MP(init)(_value);\n            _allocatedPrecision = MP(get_prec)(_value);\n        }\n    }\n\n    MPTYPE::MPTYPE(MPEXPR_NAME^ value)\n    {\n        AllocateStruct();\n        MP(init)(_value);\n        _allocatedPrecision = MP(get_prec)(_value);\n        value->AssignTo(_value);\n    }\n\n    MPTYPE::MPTYPE(IntegerExpression^ value)\n    {\n        AllocateStruct();\n        MP(init)(_value);\n        _allocatedPrecision = MP(get_prec)(_value);\n        SetTo(value);\n    }\n\n    MPTYPE::MPTYPE(RationalExpression^ value)\n    {\n        AllocateStruct();\n        MP(init)(_value);\n        SetTo(value);\n    }\n    \n    MPTYPE^ MPTYPE::Allocate(mp_bitcnt_t precision)\n    {\n        auto result = gcnew MPTYPE(false);\n        MP(init2)(result->_value, precision);\n        result->_allocatedPrecision = MP(get_prec)(result->_value);\n        return result;\n    }\n\n    void MPTYPE::FromString(String^ value, int base, bool exponentInDecimal)\n    {\n        AllocateStruct();\n        MP(init)(_value);\n        _allocatedPrecision = MP(get_prec)(_value);\n\n        base = Math::Abs(base);\n        IntPtr ptr = Marshal::StringToHGlobalAnsi(value);\n        bool success = 0 == MP(set_str)(_value, (char*)(void*)ptr, exponentInDecimal ? -base : base);\n        Marshal::FreeHGlobal(ptr);\n\n        if(!success)\n        {\n            DeallocateStruct();\n            throw gcnew ArgumentException(\"Invalid number\", \"value\");\n        }\n    }\n\n    void MPTYPE::SetTo(String^ value, int base, bool exponentInDecimal)\n    {\n        base = Math::Abs(base);\n        IntPtr ptr = Marshal::StringToHGlobalAnsi(value);\n        bool success = 0 == MP(set_str)(_value, (char*)(void*)ptr, exponentInDecimal ? -base : base);\n        Marshal::FreeHGlobal(ptr);\n\n        if(!success)\n            throw gcnew ArgumentException(\"Invalid number\", \"value\");\n    }\n\n    MPTYPE::MPTYPE(mpir_si value)\n    {\n        AllocateStruct();\n        MP(init_set_si)(_value, value);\n        _allocatedPrecision = MP(get_prec)(_value);\n    }\n\n    MPTYPE::MPTYPE(mpir_ui value)\n    {\n        AllocateStruct();\n        MP(init_set_ui)(_value, value);\n        _allocatedPrecision = MP(get_prec)(_value);\n    }\n\n    MPTYPE::MPTYPE(double value)\n    {\n        AllocateStruct();\n        MP(init_set_d)(_value, value);\n        _allocatedPrecision = MP(get_prec)(_value);\n    }\n\n    #pragma endregion\n\n    #pragma region object overrides\n\n    String^ MPTYPE::ToString(int base, bool lowercase, int maxDigits, bool exponentInDecimal)\n    {\n        if(base < 2 || base > 62)\n            throw gcnew ArgumentOutOfRangeException(\"base\", \"Invalid base\");\n\n        mp_exp_t exponent;\n        auto allocatedStr = MP(get_str)(NULL, &exponent, (!lowercase && base <= 36) ? -base : base, maxDigits, _value);\n        auto str = allocatedStr;\n\n        auto result = maxDigits > 0 ? gcnew StringBuilder(maxDigits + 70) : gcnew StringBuilder();\n        \n        size_t allocated = 1;\n        if (*str == '-')\n        {\n            result->Append((wchar_t)'-');\n            allocated++;\n            str++;\n        }\n        result->Append((wchar_t)'0');\n\n        if (*str != 0)\n        {\n            result->Append((wchar_t)'.');\n            while (*str != 0)\n            {\n                result->Append((wchar_t)*str);\n                allocated++;\n                str++;\n            }\n            result->Append((wchar_t)'@');\n\n            if(exponentInDecimal)\n            {\n                result->Append(exponent);\n            }\n            else\n            {\n                HugeInt exp((mpir_si) exponent);\n                result->Append(exp.ToString(base, lowercase));\n            }\n        }\n\n        (*__gmp_free_func)(allocatedStr, allocated);\n\n        return result->ToString();\n    }\n\n    int MPEXPR_NAME::GetHashCode()\n    {\n        IN_DEFAULT_CONTEXT(this);\n\n        mp_limb_t hash = CTXT(0)->_mp_exp;\n        mp_limb_t* ptr = CTXT(0)->_mp_d;\n        for(int i = abs(CTXT(0)->_mp_size); i > 0; i--)\n            hash ^= *ptr++;\n\n        if(CTXT(0)->_mp_size < 0)\n            hash = (mp_limb_t)-(mpir_si)hash;\n\n        return hash.GetHashCode();\n    }\n\n    #pragma endregion\n\n    #pragma region Interface implementations\n\n    int MPEXPR_NAME::CompareTo(Object^ a, bool& valid)\n    {\n        valid = true;\n\n        if (IS_NULL(a))\n            return 1;\n\n        WHEN_IS(1, a, MPEXPR_NAME)\n            return CompareTo(x1);\n\n        auto f = dynamic_cast<MPTYPE^>(this);\n        auto precision = IS_NULL(f) ? MPTYPE::DefaultPrecision : f->Precision;\n\n        if(a->GetType() == mpir_ui::typeid)\n        {\n            IN_SPECIFIC_CONTEXT(precision, this);\n            return MP(cmp_ui)(CTXT(0), (mpir_ui)a);\n        }\n\n        if(a->GetType() == mpir_si::typeid)\n        {\n            IN_SPECIFIC_CONTEXT(precision, this);\n            return MP(cmp_si)(CTXT(0), (mpir_si)a);\n        }\n\n        if(a->GetType() == double::typeid)\n        {\n            IN_SPECIFIC_CONTEXT(precision, this);\n            return MP(cmp_d)(CTXT(0), (double)a);\n        }\n\n        valid = false;\n        return 0;\n    }\n\n    int MPEXPR_NAME::CompareTo(Object^ a)\n    {\n        bool valid;\n        auto result = CompareTo(a, valid);\n\n        if (valid)\n            return result;\n\n        throw gcnew ArgumentException(\"Invalid argument type\", \"a\");\n    }\n\n    int MPEXPR_NAME::CompareTo(MPEXPR_NAME^ a)\n    {\n        if (IS_NULL(a))\n            return 1;\n\n        auto f = dynamic_cast<MPTYPE^>(this);\n        if (IS_NULL(f)) f = dynamic_cast<MPTYPE^>(a);\n        auto precision = IS_NULL(f) ? MPTYPE::DefaultPrecision : f->Precision;\n\n        IN_SPECIFIC_CONTEXT(precision, this, a);\n        return MP(cmp)(CTXT(0), CTXT(1));\n    }\n\n    bool MPEXPR_NAME::Equals(Object^ a)\n    {\n        bool valid;\n        auto result = CompareTo(a, valid);\n\n        return valid && result == 0;\n    }\n\n    bool MPEXPR_NAME::Equals(MPEXPR_NAME^ a)\n    {\n        return CompareTo(a) == 0;\n    }\n\n    #pragma endregion\n\n    #pragma region Arithmetic\n\n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR_NAME,        DEFINE, +, Add, Flt, Flt)           \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, +, Add, Flt, Ui)            \n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR_NAME,        DEFINE, +, Add, Flt, Ui)            \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, +, Add, Flt, Si)            \n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR_NAME,        DEFINE, +, Add, Flt, Si)            \n                                                                                                        \n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR_NAME,        DEFINE, -, Subtract, Flt, Flt)      \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Flt, Ui)       \n    MAKE_BINARY_OPERATOR_LLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Flt, Ui)       \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Flt, Si)\n    MAKE_BINARY_OPERATOR_LLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Flt, Si)\n                                                                                                        \n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR_NAME,        DEFINE, *, Multiply, Flt, Flt)\n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, *, Multiply, Flt, Ui)\n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR_NAME,        DEFINE, *, Multiply, Flt, Ui)\n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, *, Multiply, Flt, Si)\n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR_NAME,        DEFINE, *, Multiply, Flt, Si)\n                                                                                                        \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, <<, ShiftLeft, Flt, Bits)   \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, >>, ShiftRight, Flt, Bits)  \n\n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, ^, Power, Flt, Ui)          \n                                                                                                           \n    MAKE_UNARY_OPERATOR            (MPEXPR_NAME,        DEFINE, -, Negate, Flt)             \n    MAKE_VOID_FUNCTION             (MPEXPR_NAME,        DEFINE, Abs, Flt)                   \n    MAKE_VOID_FUNCTION             (MPEXPR_NAME,        DEFINE, Floor, Flt)                   \n    MAKE_VOID_FUNCTION             (MPEXPR_NAME,        DEFINE, Ceiling, Flt)                   \n    MAKE_VOID_FUNCTION             (MPEXPR_NAME,        DEFINE, Truncate, Flt)                   \n    MAKE_VOID_FUNCTION             (MPEXPR_NAME,        DEFINE, SquareRoot, Flt)                   \n    MAKE_FUNCTION_WITH_ONE         (MPEXPR_NAME,        DEFINE, RelativeDifferenceFrom, Flt)                   \n                                                                                                           \n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR_NAME,        DEFINE, /, Divide, Flt, Flt)        \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, /, Divide, Flt, Ui)       \n    MAKE_BINARY_OPERATOR_LLIMB     (MPEXPR_NAME,        DEFINE, /, Divide, Flt, Ui)       \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, /, Divide, Flt, Si)       \n    MAKE_BINARY_OPERATOR_LLIMB     (MPEXPR_NAME,        DEFINE, /, Divide, Flt, Si)       \n                                                                                                           \n    DEFINE_UNARY_ASSIGNMENT_REF(Negate, Flt, MP(neg))\n    DEFINE_UNARY_ASSIGNMENT_REF(Abs, Flt, MP(abs))\n    DEFINE_UNARY_ASSIGNMENT_REF(Floor, Flt, MP(floor))\n    DEFINE_UNARY_ASSIGNMENT_REF(Ceiling, Flt, MP(ceil))\n    DEFINE_UNARY_ASSIGNMENT_REF(Truncate, Flt, MP(trunc))\n    DEFINE_UNARY_ASSIGNMENT_REF(SquareRoot, Flt, MP(sqrt))\n    DEFINE_UNARY_ASSIGNMENT_VAL(SquareRoot, Ui, MP(sqrt_ui))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Add, Flt, MP(add))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Add, Flt, Ui, MP(add_ui))\n    DEFINE_BINARY_ASSIGNMENT_REF_SI (Add, Flt, Si, MP(add_ui), MP(sub_ui))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Subtract, Flt, MP(sub))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Subtract, Flt, Ui, MP(sub_ui))\n    DEFINE_BINARY_ASSIGNMENT_VAL_REF(Subtract, Ui, Flt, MP(ui_sub))\n    DEFINE_BINARY_ASSIGNMENT_REF_SI (Subtract, Flt, Si, MP(sub_ui), MP(add_ui))\n    DEFINE_BINARY_ASSIGNMENT_SI_REF (Subtract, Si, Flt, MP(ui_sub), MP(add_ui), MP(neg))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Multiply, Flt, MP(mul))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Multiply, Flt, Ui, MP(mul_ui))\n    DEFINE_BINARY_ASSIGNMENT_REF_SI2(Multiply, Flt, Si, MP(mul_ui), MP(neg))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Divide, Flt, MP(div))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Divide, Flt, Ui, MP(div_ui))\n    DEFINE_BINARY_ASSIGNMENT_VAL_REF(Divide, Ui, Flt, MP(ui_div))\n    DEFINE_BINARY_ASSIGNMENT_REF_SI2(Divide, Flt, Si, MP(div_ui), MP(neg))\n    DEFINE_BINARY_ASSIGNMENT_SI_REF2(Divide, Si, Flt, MP(ui_div), MP(ui_div), MP(neg))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(ShiftLeft, Flt, Bits, MP(mul_2exp))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(ShiftRight, Flt, Bits, MP(div_2exp))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Power, Flt, Ui, MP(pow_ui))\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(RelativeDifferenceFrom, Flt, MP(reldiff))\n\n    int MPEXPR_NAME::Sign()\n    {\n        IN_DEFAULT_CONTEXT(this); \n        return MP(sgn)(CTXT(0));\n    }\n\n    #pragma endregion\n\n    #pragma region IO\n\n    size_t MPTYPE::Write(TextWriter^ writer, int base, int maxDigits, bool lowercase, bool exponentInDecimal)\n    {\n        auto str = ToString(base, lowercase, maxDigits, exponentInDecimal);\n        writer->Write(str);\n        return str->Length;\n    }\n\n    size_t MPTYPE::Read(TextReader^ reader, int base, bool exponentInDecimal)\n    {\n        StringBuilder str;\n        \n        int c;\n        size_t nread = 0;\n        const unsigned char* digit_value = __gmp_digit_value_tab;\n        bool hasDecimal = false;\n        bool inExponent = false;\n\n        if (base < 2)\n            throw gcnew ArgumentException(\"Invalid base\", \"base\");\n\n        if (base > 36)\n        {\n            // For bases > 36, use the collating sequence\n            // 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n            digit_value += 224;\n            if (base > 62)\n                throw gcnew ArgumentException(\"Invalid base\", \"base\");\n        }\n\n        // Skip whitespace\n        while ((c = reader->Peek()) >= 0 && Char::IsWhiteSpace(c))\n        {\n            nread++;\n            reader->Read();\n        }\n\n        // possibly negative\n        if (c == '-')\n        {\n            str.Append((wchar_t)c);\n            PEEK_NEXT_CHAR;\n        }\n\n        while (c != EOF)\n        {\n            if (c == '.')\n            {\n                if (hasDecimal) break;\n                \n                hasDecimal = true;\n                str.Append((wchar_t)c);\n                PEEK_NEXT_CHAR;\n                continue;\n            }\n\n            if ((base <= 10 && Char::ToLower(c) == 'e') || c == '@')\n            {\n                if (inExponent) break;\n                \n                inExponent = true;\n                str.Append((wchar_t)c);\n                PEEK_NEXT_CHAR;\n\n                // possibly negative\n                if (c == '-')\n                {\n                    str.Append((wchar_t)c);\n                    PEEK_NEXT_CHAR;\n                }\n                continue;\n            }\n\n            if (inExponent && exponentInDecimal && !Char::IsDigit(c))\n                break;\n\n            int dig = digit_value[c];\n            if (dig >= base)\n                break;\n\n            str.Append((wchar_t)c);\n            PEEK_NEXT_CHAR;\n        }\n\n        SetTo(str.ToString(), base, exponentInDecimal);\n        return str.Length + nread;\n    }\n\n    #pragma endregion\n\n    #pragma region methods in other classes with float parameters\n\n    void HugeInt::SetTo(MPEXPR_NAME^ value)\n    {\n        IN_DEFAULT_CONTEXT(value);\n        mpz_set_f(_value, CTXT(0));\n    }\n\n    void HugeRational::SetTo(MPEXPR_NAME^ value)\n    {\n        IN_DEFAULT_CONTEXT(value);\n        mpq_set_f(_value, CTXT(0));\n    }\n\n    #pragma endregion\n};"
  },
  {
    "path": "mpir.net/mpir.net/HugeFloat.h",
    "content": "/*\r\nCopyright 2014 Alex Dyachenko\r\n\r\nThis file is part of the MPIR Library.\r\n\r\nThe MPIR Library is free software; you can redistribute it and/or modify\r\nit under the terms of the GNU Lesser General Public License as published\r\nby the Free Software Foundation; either version 3 of the License, or (at\r\nyour option) any later version.\r\n\r\nThe MPIR Library is distributed in the hope that it will be useful, but\r\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\r\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\r\nLicense for more details.\r\n\r\nYou should have received a copy of the GNU Lesser General Public License\r\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \r\n*/\r\n\r\n#pragma once\r\n\r\nusing namespace System;\r\nusing namespace System::IO;\r\nusing namespace System::Runtime::InteropServices;\r\n\r\n#ifdef SPECIALIZE_EXPRESSIONS\r\n#undef SPECIALIZE_EXPRESSIONS\r\n#undef MP\r\n#undef CUSTOM_MP\r\n#undef MPSTRUCT\r\n#undef MPTYPE\r\n#undef MPTYPE_NAME\r\n#undef MPEXPR_NAME\r\n#undef MPEXPR\r\n#undef CTXT\r\n#undef CTXTI\r\n#undef ASSIGN_TO\r\n#undef Mpt\r\n#undef SET_CONTEXT_PRECISION\r\n#endif\r\n#define SPECIALIZE_EXPRESSIONS\r\n#define Mpt Flt\r\n#define CUSTOM_MP(x) custom_mpf_##x\r\n#define MPSTRUCT __mpf_struct\r\n#define MP(x) mpf_##x\r\n#define MPTYPE HugeFloat\r\n#define MPTYPE_NAME Float\r\n#define MPEXPR_NAME LIT(MPTYPE_NAME)Expression\r\n#define MPEXPR(x) LIT(MPTYPE_NAME)##x##Expression\r\n#define CTXT(x) context.FloatArgs[x]\r\n#define CTXTI(x) context.IntArgs[x]\r\n#define CTXTR(x) context.RationalArgs[x]\r\n#define ASSIGN_TO CONCAT(AssignTo, LIT(MPTYPE_NAME))\r\n#define SET_CONTEXT_PRECISION context.FloatPrecision = mpf_get_prec(destination);\r\n#include \"ExpressionMacros.h\"\r\n\r\nnamespace MPIR\r\n{\r\n    ref class MpirRandom;\r\n    ref class MPTYPE;\r\n\r\n    #pragma region FloatExpression\r\n\r\n    /// <summary>\r\n    /// Base class for all float expressions resulting from many float operations on MPIR types.\r\n    /// <para>Expressions can be arbitrarily nested, and are lazily evaluated \r\n    /// when they are either assigned to the Value property of an MPIR object, or are consumed by a function or operator that returns a primitive type.\r\n    /// </para>Assignment to the Value property is necessary because .Net does not support overloading the assignment operator.\r\n    /// </summary>\r\n    public ref class MPEXPR_NAME abstract : public IComparable, IComparable<MPEXPR_NAME^>, IEquatable<MPEXPR_NAME^>\r\n    {\r\n        internal:\r\n            MPEXPR_NAME() { }\r\n            virtual void AssignTo(MP(ptr) destination) abstract;\r\n            virtual void ASSIGN_TO(EvaluationContext& context)\r\n            {\r\n                context.Initialized(FloatInitialized);\r\n                auto ptr = &context.Temp[context.Index].MPTYPE_NAME;\r\n                CTXT(context.Index++) = ptr;\r\n                MP(init2)(ptr, context.FloatPrecision);\r\n                AssignTo(ptr);\r\n            }\r\n\r\n        private:\r\n            int CompareTo(Object^ a, bool& valid);\r\n\r\n        public:\r\n            #pragma region Arithmetic\r\n\r\n            /// <summary>Adds two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to add to</param>\r\n            /// <param name=\"b\">Source value to add</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator + (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\r\n\r\n            /// <summary>Adds two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to add to</param>\r\n            /// <param name=\"b\">Source value to add</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator + (MPEXPR_NAME^ a, mpir_ui b);\r\n\r\n            /// <summary>Adds two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to add to</param>\r\n            /// <param name=\"b\">Source value to add</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator + (mpir_ui a, MPEXPR_NAME^ b);\r\n\r\n            /// <summary>Adds two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to add to</param>\r\n            /// <param name=\"b\">Source value to add</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator + (MPEXPR_NAME^ a, mpir_si b);\r\n\r\n            /// <summary>Adds two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to add to</param>\r\n            /// <param name=\"b\">Source value to add</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator + (mpir_si a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Subtracts two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to subtract from</param>\r\n            /// <param name=\"b\">Source value to subtract</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator - (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\r\n                                                                                              \r\n            /// <summary>Subtracts two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to subtract from</param>\r\n            /// <param name=\"b\">Source value to subtract</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator - (MPEXPR_NAME^ a, mpir_ui b);\r\n                                                                                              \r\n            /// <summary>Subtracts two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to subtract from</param>\r\n            /// <param name=\"b\">Source value to subtract</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator - (mpir_ui a, MPEXPR_NAME^ b);\r\n                                                                                              \r\n            /// <summary>Subtracts two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to subtract from</param>\r\n            /// <param name=\"b\">Source value to subtract</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator - (MPEXPR_NAME^ a, mpir_si b);\r\n                                                                                              \r\n            /// <summary>Subtracts two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to subtract from</param>\r\n            /// <param name=\"b\">Source value to subtract</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator - (mpir_si a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Multiplies two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"b\">Source value to multiply by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator * (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\r\n                                                                                              \r\n            /// <summary>Multiplies two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"b\">Source value to multiply by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator * (MPEXPR_NAME^ a, mpir_ui b);\r\n                                                                                              \r\n            /// <summary>Multiplies two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"b\">Source value to multiply by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator * (mpir_ui a, MPEXPR_NAME^ b);\r\n                                                                                              \r\n            /// <summary>Multiplies two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"b\">Source value to multiply by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator * (MPEXPR_NAME^ a, mpir_si b);\r\n                                                                                              \r\n            /// <summary>Multiplies two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"b\">Source value to multiply by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator * (mpir_si a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Shifts the <paramref name=\"a\"/> source operand to the left by <paramref name=\"bits\"/>, i.e. multiplies <paramref name=\"a\"/> by 2^<paramref name=\"bits\"/>.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"bits\">Number of bits to shift <paramref name=\"a\"/> by, i.e. power of 2 to multiply <paramref name=\"a\"/> by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator << (MPEXPR_NAME^ a, mp_bitcnt_t bits);\r\n                                                                                                          \r\n            /// <summary>Shifts the <paramref name=\"a\"/> source operand to the right by <paramref name=\"bits\"/>, i.e. divides <paramref name=\"a\"/> by 2^<paramref name=\"bits\"/>.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to divide</param>\r\n            /// <param name=\"bits\">Number of bits to shift <paramref name=\"a\"/> by, i.e. power of 2 to divide <paramref name=\"a\"/> by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\r\n            /// </returns>\r\n            static MPEXPR_NAME^ operator >> (MPEXPR_NAME^ a, mp_bitcnt_t bits);\r\n\r\n            /// <summary>Negates the source value.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to negate</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator - (MPEXPR_NAME^ a);\r\n                                                                                                          \r\n            /// <summary>Divides two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to divide</param>\r\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\r\n            /// </returns>\r\n            static MPEXPR_NAME^ operator / (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\r\n                                                                                              \r\n            /// <summary>Divides two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to divide</param>\r\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\r\n            /// </returns>\r\n            static MPEXPR_NAME^ operator / (MPEXPR_NAME^ a, mpir_ui b);\r\n                                                                                                          \r\n            /// <summary>Divides two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to divide</param>\r\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\r\n            /// </returns>\r\n            static MPEXPR_NAME^ operator / (mpir_ui a, MPEXPR_NAME^ b);\r\n                                                                                              \r\n            /// <summary>Divides two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to divide</param>\r\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\r\n            /// </returns>\r\n            static MPEXPR_NAME^ operator / (MPEXPR_NAME^ a, mpir_si b);\r\n                                                                                                          \r\n            /// <summary>Divides two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to divide</param>\r\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\r\n            /// </returns>\r\n            static MPEXPR_NAME^ operator / (mpir_si a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Raises the source value to the specified power.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"power\">Power to raise <paramref name=\"a\"/> to</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator ^ (MPEXPR_NAME^ a, mpir_ui power);\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region Math\r\n\r\n            /// <summary>Computes the absolute value of the source number.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR_NAME^ Abs();\r\n\r\n            /// <summary>Computes the square root of the source number.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR_NAME^ SquareRoot();\r\n\r\n            /// <summary>Rounds the source number down to the next integer.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR_NAME^ Floor();\r\n\r\n            /// <summary>Rounds the source number up to the next integer.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR_NAME^ Ceiling();\r\n\r\n            /// <summary>Rounds the source number to the next integer toward zero.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR_NAME^ Truncate();\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region Comparisons\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>The precision of the calculation is the precision of this instance if it is a computed number, otherwise the precision of <paramref name=\"a\"/> if that is a computed number,\r\n            /// otherwise the current default float precision.\r\n            /// <para>When the argument is a double, it may be an infinity, but results are undefined for a NaN.</para></summary>\r\n            /// <param name=\"a\">Value to compare the source with</param>\r\n            /// <returns>A positive number if the source is greater than <paramref name=\"a\"/>, negative if less, and zero if they are equal.</returns>\r\n            virtual int CompareTo(Object^ a) sealed;\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>The precision of the calculation is the precision of this instance if it is a computed number, otherwise the precision of <paramref name=\"a\"/> if that is a computed number,\r\n            /// otherwise the current default float precision if both are expressions.</summary>\r\n            /// <param name=\"a\">Value to compare the source with</param>\r\n            /// <returns>A positive number if the source is greater than <paramref name=\"a\"/>, negative if less, and zero if they are equal.</returns>\r\n            virtual int CompareTo(MPEXPR_NAME^ a) sealed;\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Value to compare the source with</param>\r\n            /// <returns>true if the values of the source and <paramref name=\"a\"/> are equal, false otherwise.</returns>\r\n            virtual bool Equals(MPEXPR_NAME^ a) sealed;\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>When the argument is a double, it may be an infinity, but results are undefined for a NaN.</summary>\r\n            /// <param name=\"a\">Value to compare the source with.  This can be a multi-precision number, an expression, or a supported primitive type (long, ulong, or double).</param>\r\n            /// <returns>true if the values of the source and <paramref name=\"a\"/> are equal, false otherwise.</returns>\r\n            virtual bool Equals(Object^ a) override sealed;\r\n\r\n            /// <summary>Compares two numbers approximately, taking into account <paramref name=\"precision\"/> most significant bits of the mantissa.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable with the specified <paramref name=\"precision\"/> before the comparison is performed.\r\n            /// </para>In the future values like 1000 and 0111 may be considered the same to 3 bits (on the basis that their difference is that small).\r\n            /// </summary>\r\n            /// <param name=\"a\">Value to compare the source with</param>\r\n            /// <param name=\"precision\">The number of most significant bits that must match for the two numbers to be considered equal</param>\r\n            /// <returns>true if the values of the source and <paramref name=\"a\"/> are equal to <paramref name=\"precision\"/>, false otherwise.</returns>\r\n            bool Equals(MPEXPR_NAME^ a, mp_bitcnt_t precision) { IN_SPECIFIC_CONTEXT(precision, this, a); return MP(eq)(CTXT(0), CTXT(1), precision) != 0; }\r\n\r\n            /// <summary>Computes the hash code of the source value.\r\n            /// <para>If called on an expression, it is evaluated into a temporary variable with the current default float precision before the calculation is performed.\r\n            /// </para>Multi-precision classes are mutable with value semantics.  The hash code is based on the value, and will change if the value changes.\r\n            /// For this reason, the value of an object must not be modified while the object is contained in a hash table.</summary>\r\n            /// <returns>a signed integer hash code for the value.</returns>\r\n            virtual int GetHashCode() override sealed;\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) ? !IS_NULL(b) : a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) ?  IS_NULL(b) : a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) ?  IS_NULL(b) : a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) ? !IS_NULL(b) : a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (MPEXPR_NAME^ a, mpir_ui b) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (MPEXPR_NAME^ a, mpir_ui b) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (MPEXPR_NAME^ a, mpir_ui b) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (MPEXPR_NAME^ a, mpir_ui b) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (MPEXPR_NAME^ a, mpir_ui b) { return  IS_NULL(a) || a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (MPEXPR_NAME^ a, mpir_ui b) { return !IS_NULL(a) && a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (mpir_ui b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (mpir_ui b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (mpir_ui b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (mpir_ui b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (mpir_ui b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (mpir_ui b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (MPEXPR_NAME^ a, mpir_si b) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (MPEXPR_NAME^ a, mpir_si b) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (MPEXPR_NAME^ a, mpir_si b) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (MPEXPR_NAME^ a, mpir_si b) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (MPEXPR_NAME^ a, mpir_si b) { return  IS_NULL(a) || a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (MPEXPR_NAME^ a, mpir_si b) { return !IS_NULL(a) && a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (mpir_si b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (mpir_si b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (mpir_si b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (mpir_si b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (mpir_si b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (mpir_si b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Can be called with an infinity, but results are undefined for a NaN.</summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (MPEXPR_NAME^ a, double b) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Can be called with an infinity, but results are undefined for a NaN.</summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (MPEXPR_NAME^ a, double b) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Can be called with an infinity, but results are undefined for a NaN.</summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (MPEXPR_NAME^ a, double b) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Can be called with an infinity, but results are undefined for a NaN.</summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (MPEXPR_NAME^ a, double b) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Can be called with an infinity, but results are undefined for a NaN.</summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (MPEXPR_NAME^ a, double b) { return  IS_NULL(a) || a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Can be called with an infinity, but results are undefined for a NaN.</summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (MPEXPR_NAME^ a, double b) { return !IS_NULL(a) && a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Can be called with an infinity, but results are undefined for a NaN.</summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (double b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Can be called with an infinity, but results are undefined for a NaN.</summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (double b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Can be called with an infinity, but results are undefined for a NaN.</summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (double b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Can be called with an infinity, but results are undefined for a NaN.</summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (double b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Can be called with an infinity, but results are undefined for a NaN.</summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (double b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Can be called with an infinity, but results are undefined for a NaN.</summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (double b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Calculates the sign (+1, 0, or -1) of the source value.\r\n            /// <para>If the source is an expression, it is evaluated into a temporary variable with the current default float precision before the sign is computed.\r\n            /// </para></summary>\r\n            /// <returns>+1 if the source is positive, -1 if negative, and 0 if zero.</returns>\r\n            int Sign();\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>The result equals | source - a | / source.</summary>\r\n            /// <param name=\"a\">Source value to compare with</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR_NAME^ RelativeDifferenceFrom(MPEXPR_NAME^ a);\r\n\r\n            #pragma endregion\r\n    };\r\n\r\n    #pragma endregion\r\n\r\n    #pragma region concrete expressions\r\n\r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Add, Flt)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Add, Flt, Ui)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Add, Flt, Si)\r\n                                                   \r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Subtract, Flt)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Subtract, Flt, Ui)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_LEFT    (MPEXPR_NAME, Subtract, Ui, Flt)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Subtract, Flt, Si)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_LEFT    (MPEXPR_NAME, Subtract, Si, Flt)\r\n                                                   \r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Multiply, Flt)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Multiply, Flt, Ui)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Multiply, Flt, Si)\r\n                                                   \r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Divide, Flt)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Divide, Flt, Ui)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_LEFT    (MPEXPR_NAME, Divide, Ui, Flt)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Divide, Flt, Si)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_LEFT    (MPEXPR_NAME, Divide, Si, Flt)\r\n                                                   \r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, ShiftLeft, Flt, Bits)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, ShiftRight, Flt, Bits)\r\n                                                   \r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Power, Flt, Ui)\r\n\r\n    DEFINE_UNARY_EXPRESSION_WITH_ONE               (MPEXPR_NAME, Negate, Flt)\r\n    DEFINE_UNARY_EXPRESSION_WITH_ONE               (MPEXPR_NAME, Abs, Flt)\r\n    DEFINE_UNARY_EXPRESSION_WITH_ONE               (MPEXPR_NAME, SquareRoot, Flt)\r\n    DEFINE_UNARY_EXPRESSION_WITH_BUILT_INS_ONLY    (MPEXPR_NAME, SquareRoot, Ui)\r\n                                                   \r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, RelativeDifferenceFrom, Flt)\r\n\r\n    DEFINE_UNARY_EXPRESSION_WITH_ONE               (MPEXPR_NAME, Floor, Flt)\r\n    DEFINE_UNARY_EXPRESSION_WITH_ONE               (MPEXPR_NAME, Ceiling, Flt)\r\n    DEFINE_UNARY_EXPRESSION_WITH_ONE               (MPEXPR_NAME, Truncate, Flt)\r\n\r\n    DEFINE_UNARY_EXPRESSION                        (MPEXPR_NAME, Random, MpirRandom^)\r\n    DEFINE_BINARY_EXPRESSION                       (MPEXPR_NAME, RandomBits, MpirRandom^, mp_bitcnt_t)\r\n    DEFINE_BINARY_EXPRESSION                       (MPEXPR_NAME, RandomChunky, MpirRandom^, mp_exp_t)\r\n    DEFINE_TERNARY_EXPRESSION                      (MPEXPR_NAME, RandomLimbsChunky, MpirRandom^, mp_size_t, mp_exp_t)\r\n\r\n    #pragma endregion\r\n\r\n    #pragma region HugeFloat class\r\n\r\n    /// <summary>\r\n    /// Multi-precision Float class.\r\n    /// </summary>\r\n    public ref class MPTYPE : MPEXPR_NAME\r\n    {\r\n        internal:\r\n            //fields\r\n            MP(ptr) _value;\r\n            mp_bitcnt_t _allocatedPrecision;\r\n\r\n        private:\r\n            //construction\r\n            void AllocateStruct()\r\n            {\r\n                _value = (MP(ptr))((*__gmp_allocate_func)(sizeof(MPSTRUCT)));\r\n            }\r\n            void FromString(String^ value, int base, bool exponentInDecimal);\r\n            MPTYPE(bool initialize);\r\n            String^ ToString(int base, bool lowercase, int maxDigits, bool exponentInDecimal);\r\n\r\n        internal:\r\n            virtual void DeallocateStruct()\r\n            {\r\n                MP(set_prec_raw)(_value, _allocatedPrecision);\r\n                MP(clear)(_value);\r\n                (*__gmp_free_func)(_value, sizeof(MPSTRUCT));\r\n                _value = nullptr;\r\n            }\r\n\r\n            //assignment\r\n            virtual void AssignTo(MP(ptr) destination) override\r\n            {\r\n                if(destination != _value)\r\n                    MP(set)(destination, _value);\r\n            }\r\n            virtual void ASSIGN_TO(EvaluationContext& context) override\r\n            {\r\n                CTXT(context.Index++) = _value;\r\n            }\r\n\r\n        public:\r\n            #pragma region construction and disposal\r\n\r\n            static MPTYPE()\r\n            {\r\n                DefaultPrecision = sizeof(mpir_ui) * 8 * 2; //2 limbs\r\n            }\r\n\r\n            /// <summary>\r\n            /// Gets or sets the default precision of a float variable in bits.\r\n            /// <para>The actual precision may be slightly greater if the value is not a whole limb multiple.\r\n            /// </para>All subsequently constructed variables will use this precision, but previously constructed variables are unaffected.\r\n            /// <para>The initial default value is 2 limbs.\r\n            /// </para></summary>\r\n            static property mp_bitcnt_t DefaultPrecision\r\n            {\r\n                mp_bitcnt_t get() { return MP(get_default_prec)(); }\r\n                void set(mp_bitcnt_t value) { MP(set_default_prec)(value); }\r\n            }\r\n\r\n            /// <summary>\r\n            /// Initializes a new float instance and sets its value to 0.\r\n            /// <para>The precision of the new variable's mantissa is set from the static DefaultPrecision property.\r\n            /// </para></summary>\r\n            MPTYPE();\r\n\r\n            /// <summary>\r\n            /// Initializes a new float instance, allocating enough memory to hold at least <paramref name=\"precision\"/> bits, and sets its value to 0.\r\n            /// <para>All float operations are performed to the precision of the destination.\r\n            /// </para></summary>\r\n            /// <param name=\"precision\">Minimum number of bits the initially allocated memory should hold for the mantissa</param>\r\n            /// <returns>the newly constructed instance</returns>\r\n            static MPTYPE^ Allocate(mp_bitcnt_t precision);\r\n\r\n            /// <summary>\r\n            /// Initializes a new float instance and sets its value from the specified string.\r\n            /// <para>No leading base characters are allowed.\r\n            /// </para>The exponent is always in decimal.\r\n            /// </summary>\r\n            /// <param name=\"value\">string representing the initial value for the new instance.  Whitespace in the string is ignored.</param>\r\n            MPTYPE(String^ value) { FromString(value, 0, true); }\r\n\r\n            /// <summary>\r\n            /// Initializes a new float instance and sets its value from the specified string\r\n            /// </summary>\r\n            /// <param name=\"value\">string representing the initial value for the new instance.  Whitespace in the string is ignored.</param>\r\n            /// <param name=\"base\">base the <paramref name=\"value\"/> string is in.\r\n            /// <para>The base may vary from 2 to 62.  No leading base characters are allowed.  The exponent is always in decimal\r\n            /// </para>For bases up to 36, case is ignored; upper-case and lower-case letters have the same value. \r\n            /// For bases 37 to 62, upper-case letter represent the usual 10..35 while lower-case letter represent 36..61.</param>\r\n            MPTYPE(String^ value, int base) { FromString(value, base, true); }\r\n\r\n            /// <summary>\r\n            /// Initializes a new float instance and sets its value from the specified string\r\n            /// </summary>\r\n            /// <param name=\"value\">string representing the initial value for the new instance.  Whitespace in the string is ignored.</param>\r\n            /// <param name=\"base\">base the <paramref name=\"value\"/> string is in.\r\n            /// <para>The base may vary from 2 to 62.  No leading base characters are allowed.\r\n            /// </para>For bases up to 36, case is ignored; upper-case and lower-case letters have the same value. \r\n            /// For bases 37 to 62, upper-case letter represent the usual 10..35 while lower-case letter represent 36..61.</param>\r\n            /// <param name=\"exponentInDecimal\">True if the exponent is in decimal, false to use the same base as the mantissa.</param>\r\n            MPTYPE(String^ value, int base, bool exponentInDecimal) { FromString(value, base, exponentInDecimal); }\r\n\r\n            /// <summary>\r\n            /// Initializes a new float instance and sets its value to the result of computing the source expression.\r\n            /// </summary>\r\n            /// <param name=\"value\">the expression that will be computed, and the result set as the initial value of the new instance.</param>\r\n            MPTYPE(MPEXPR_NAME^ value);\r\n\r\n            /// <summary>\r\n            /// Initializes a new float instance and sets its value to the result of computing the source expression.\r\n            /// </summary>\r\n            /// <param name=\"value\">the expression that will be computed, and the result set as the initial value of the new instance.</param>\r\n            MPTYPE(IntegerExpression^ value);\r\n\r\n            /// <summary>\r\n            /// Initializes a new float instance and sets its value to the result of computing the source expression.\r\n            /// </summary>\r\n            /// <param name=\"value\">the expression that will be computed, and the result set as the initial value of the new instance.</param>\r\n            MPTYPE(RationalExpression^ value);\r\n\r\n            /// <summary>\r\n            /// Constructs and returns a new float instance with its value set to <paramref name=\"value\"/>.\r\n            /// </summary>\r\n            /// <param name=\"value\">value for the initial value for the new float instance</param>\r\n            MPTYPE(mpir_si value);\r\n\r\n            /// <summary>\r\n            /// Constructs and returns a new float instance with its value set to <paramref name=\"value\"/>.\r\n            /// </summary>\r\n            /// <param name=\"value\">value for the initial value for the new float instance</param>\r\n            MPTYPE(mpir_ui value);\r\n\r\n            /// <summary>\r\n            /// Constructs and returns a new float instance with its value set to the <paramref name=\"value\"/> parameter.\r\n            /// <para>There is no rounding, this conversion is exact.</para>\r\n            /// </summary>\r\n            /// <param name=\"value\">Initial value for the new float instance.  This is an exact conversion.</param>\r\n            MPTYPE(double value);\r\n\r\n            //disposal\r\n\r\n            //creating a destructor in C++ implements IDisposable.\r\n\r\n            /// <summary>\r\n            /// Frees all memory allocated by the instance.\r\n            /// <para>To minimize memory footprint, multi-precision objects should be disposed of when no longer used, instead of relying on the garbage collector to free the memory.\r\n            /// </para></summary>\r\n            ~MPTYPE() { this->!MPTYPE(); }\r\n\r\n            /// <summary>\r\n            /// Frees all memory allocated by the instance.\r\n            /// <para>To minimize memory footprint, multi-precision objects should be disposed of when no longer used, instead of relying on the garbage collector to free the memory.\r\n            /// </para></summary>\r\n            !MPTYPE() { if(_value != 0) DeallocateStruct(); }\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region conversions\r\n\r\n            /// <summary>\r\n            /// Converts the number to a string.\r\n            /// <para>To avoid debugging performance problems, this method outputs at most the number of digits specified in MpirSettings.ToStringDigits.\r\n            /// If the number is larger, the least significant digits are shown with a leading ellipsis (i.e., [-]...NNNNN)\r\n            /// </para>Setting MpirSettings.ToStringDigits to 0 removes the upper limit.\r\n            /// </summary>\r\n            /// <returns>A string representation of the number in decimal, possibly cut off if the number has more digits than MpirSettings.ToStringDigits.</returns>\r\n            virtual String^ ToString() override { return ToString(10, false, MpirSettings::ToStringDigits, false); }\r\n\r\n            /// <summary>\r\n            /// Converts the number to a string in the specified base.\r\n            /// <para>This method always produces the complete output regardless of the MpirSettings.ToStringDigits setting.\r\n            /// </para></summary>\r\n            /// <param name=\"base\">The base to use for the output.  The base can be from 2 to 62; uppercase letters represent digits 10-35 and lowercase letters represent digits 36-61.</param>\r\n            /// <returns>A string representation of the number in the specified base.</returns>\r\n            String^ ToString(int base) { return ToString(base, false, 0, false); }\r\n\r\n            /// <summary>\r\n            /// Converts the number to a string in the specified base.\r\n            /// <para>This method always produces the complete output regardless of the MpirSettings.ToStringDigits setting.\r\n            /// </para></summary>\r\n            /// <param name=\"base\">The base to use for the output.\r\n            /// <para>The base can be from 2 to 62; Bases up to 36 use uppercase or lowercase letters based on the <paramref name=\"lowercase\"/> argument.\r\n            /// </para>For bases larger than 36, the <paramref name=\"lowercase\"/> argument is ignored and uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</param>\r\n            /// <param name=\"lowercase\">Indicates if lowercase or uppercase letters should be used for the output.\r\n            /// <para>This argument is ignored for bases larger than 36, where both uppercase and lowercase letters are used.</para></param>\r\n            /// <returns>A string representation of the number in the specified base.</returns>\r\n            String^ ToString(int base, bool lowercase) { return ToString(base, lowercase, 0, false); }\r\n\r\n            /// <summary>\r\n            /// Converts the number to a string in the specified base.\r\n            /// <para>This method always produces the complete output regardless of the MpirSettings.ToStringDigits setting.\r\n            /// </para></summary>\r\n            /// <param name=\"base\">The base to use for the output.\r\n            /// <para>The base can be from 2 to 62; Bases up to 36 use uppercase or lowercase letters based on the <paramref name=\"lowercase\"/> argument.\r\n            /// </para>For bases larger than 36, the <paramref name=\"lowercase\"/> argument is ignored and uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</param>\r\n            /// <param name=\"lowercase\">Indicates if lowercase or uppercase letters should be used for the output.\r\n            /// <para>This argument is ignored for bases larger than 36, where both uppercase and lowercase letters are used.</para></param>\r\n            /// <param name=\"exponentInDecimal\">True to always show the exponent in decimal, False to use the same base as the mantissa</param>\r\n            /// <returns>A string representation of the number in the specified base.</returns>\r\n            String^ ToString(int base, bool lowercase, bool exponentInDecimal) { return ToString(base, lowercase, 0, exponentInDecimal); }\r\n\r\n#if BITS_PER_MP_LIMB == 64\r\n            /// <summary>\r\n            /// Returns the absolute value of the number as a ulong, truncating any fractional part.\r\n            /// <para>If the number is too big, the result is undefined.  Call FitsUlong() to check if the number will fit.\r\n            /// </para>The sign of the number is ignored, only the absolute value is used.\r\n            /// <para>This method is supported only on 64-bit builds</para>\r\n            /// </summary>\r\n            /// <returns>The absolute value as a ulong, with any fractional part truncated.</returns>\r\n            mpir_ui ToUlong() { return MP(get_ui)(_value); }\r\n\r\n            /// <summary>\r\n            /// Returns the value of the number as a long.\r\n            /// <para>If the number is too big, the result is undefined.  Call FitsLong() to check if the number will fit.\r\n            /// </para>This method is supported only on 64-bit builds\r\n            /// </summary>\r\n            /// <returns>The value as a long, with any fractional part truncated.</returns>\r\n            mpir_si ToLong() { return MP(get_si)(_value); }\r\n#else\r\n            /// <summary>\r\n            /// Returns the absolute value of the number as a uint, truncating any fractional part.\r\n            /// <para>If the number is too big, the result is undefined.  Call FitsUint() to check if the number will fit.\r\n            /// </para>The sign of the number is ignored, only the absolute value is used.\r\n            /// <para>This method is supported only on 32-bit builds</para>\r\n            /// </summary>\r\n            /// <returns>The absolute value as a uint, with any fractional part truncated.</returns>\r\n            mpir_ui ToUint() { return MP(get_ui)(_value); }\r\n\r\n            /// <summary>\r\n            /// Returns the value of the number as an int.\r\n            /// <para>If the number is too big, the result is undefined.  Call FitsInt() to check if the number will fit.\r\n            /// </para>This method is supported only on 32-bit builds\r\n            /// </summary>\r\n            /// <returns>The value as an int, with any fractional part truncated.</returns>\r\n            mpir_si ToInt() { return MP(get_si)(_value); }\r\n#endif\r\n\r\n            /// <summary>\r\n            /// Returns the value of the number as a double, truncating if necessary (rounding towards zero).\r\n            /// <para>If the exponent from the conversion is too big, the result is system dependent. An infinity is returned where available. \r\n            /// A hardware overflow trap may or may not occur.\r\n            /// </para></summary>\r\n            /// <returns>The value as a double, possibly truncated.</returns>\r\n            double ToDouble() { return MP(get_d)(_value); }\r\n\r\n            /// <summary>\r\n            /// Returns the value of the number as a double, truncating if necessary (rounding towards zero), and returning the exponent separately.\r\n            /// <para>The return is the mantissa, its absolute value will be in the range [0.5 - 1).\r\n           \r\n            /// </para>The exponent is binary, i.e. mantissa * 2^exp is the value of the source number.\r\n            /// <para>If the source value is zero, both mantissa and exponent are returned as 0.\r\n            /// </para></summary>\r\n            /// <param name=\"exp\">variable to store the exponent in.</param>\r\n            /// <returns>The mantissa of the value as a double, possibly truncated.</returns>\r\n            double ToDouble([Out] mp_exp_t% exp) \r\n            { \r\n                mp_exp_t x; \r\n                auto result = MP(get_d_2exp)(&x, _value); \r\n                exp = x; \r\n                return result; \r\n            }\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region assignment\r\n\r\n            /// <summary>\r\n            /// When getting, returns this float.\r\n            /// <para>When setting, sets the value of the float object to the value resulting from computing the supplied expression.\r\n            /// </para>The getter is a no-op and never needs to be invoked directly, but makes compound operators such as +=, *=, etc. possible.\r\n            /// <para>Do not set the Value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <remarks>\r\n            /// MPIR types are implemented as reference types with value semantics.\r\n            /// Like Strings, the objects themselves are just lightweight pointers to data allocated elsewhere.\r\n            /// Unlike Strings, MPIR types are mutable.\r\n            /// <para>Value semantics requires you to be able to code, a = b + c.\r\n            /// However, .Net (outside of C++) does not allow overloading the assignment operator,\r\n            /// and assigning references would necessitate some unnecessary duplication and extra memory allocations.\r\n            /// </para>To solve this problem, MPIR.Net uses the property assignment.  \r\n            /// The setter of the Value property does what an overloaded assignment operator would do in C++.\r\n            /// The syntax is a little different: a.Value = b + c, but it is fluent enough to become a quick habit,\r\n            /// and additionally reinforces the concept that an existing object can change its value while reusing internally allocated memory.\r\n            /// <para>To this end, all overloaded operators and most functions that operate on MPIR types,\r\n            /// instead of eagerly computing a result, produce and return an expression that is basically a formula for the computation.\r\n            /// Expressions can then be composed using additional operators to achieve expression trees of arbitrary complexity.\r\n            /// All computations are deferred until an expression is assigned to the Value property of an MPIR object,\r\n            /// consumed by a method or operator that returns a primitive type,\r\n            /// or supplied as an argument to an MPIR type constructor.\r\n            /// </para>The getter is a no-op defined to make possible constructs such as a.Value += 5, a.Value *= 10, etc.\r\n            /// <para>Direct assignments such as a = b + c, a *= 10 will not compile because there is no implicit conversion from an expression.\r\n            /// Even if an implicit conversion were defined, such code would incur an extra allocation plus garbage collection,\r\n            /// and would not perform as well as doing the same operations on a.Value.\r\n            /// </para>It would also not compile if the source were a \"using\" variable, as all method-local floats should be.\r\n            /// </remarks>\r\n            property MPEXPR_NAME^ Value\r\n            {\r\n                void set(MPEXPR_NAME^ expr) { expr->AssignTo(_value); }\r\n                MPEXPR_NAME^ get() { return this; }\r\n            }\r\n\r\n            /// <summary>\r\n            /// Sets the value of the float object.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">value for the new value for the object</param>\r\n            void SetTo(mpir_ui value) { MP(set_ui)(_value, value); }\r\n\r\n            /// <summary>\r\n            /// Sets the value of the float object.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">value for the new value for the object</param>\r\n            void SetTo(mpir_si value) { MP(set_si)(_value, value); }\r\n\r\n            /// <summary>\r\n            /// Sets the value of the float object.  This is an exact conversion, there is no rounting.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">new value for the object</param>\r\n            void SetTo(double value) { MP(set_d)(_value, value); }\r\n\r\n            /// <summary>\r\n            /// Sets the value of the float object.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">new value for the object.\r\n            /// <para>May be an float or a pair of floats separated by a slash.\r\n            /// </para>The string's leading characters may indicate base:\r\n            /// 0x and 0X for hexadecimal, 0b and 0B for binary, 0 for octal, or decimal otherwise</param>\r\n            void SetTo(String^ value) { SetTo(value, 0); }\r\n\r\n            /// <summary>\r\n            /// Sets the value of the float object.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">new value for the object</param>\r\n            /// <param name=\"base\">base the mantissa in the <paramref name=\"value\"/> string is in.  Exponent is always in decimcal.\r\n            /// <para>The base may vary from 2 to 62.  No leading leading base characters are allowed.\r\n            /// </para>For bases up to 36, case is ignored; upper-case and lower-case letters have the same value. \r\n            /// For bases 37 to 62, upper-case letter represent the usual 10..35 while lower-case letter represent 36..61.</param>\r\n            void SetTo(String^ value, int base) { SetTo(value, base, true); }\r\n\r\n            /// <summary>\r\n            /// Sets the value of the float object.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para>If the fraction is not in canonical form, Canonicalize() must be called.\r\n            /// </summary>\r\n            /// <param name=\"value\">new value for the object</param>\r\n            /// <param name=\"base\">base the <paramref name=\"value\"/> string is in.\r\n            /// <para>The base may vary from 2 to 62.  No leading leading base characters are allowed.\r\n            /// </para>For bases up to 36, case is ignored; upper-case and lower-case letters have the same value. \r\n            /// For bases 37 to 62, upper-case letter represent the usual 10..35 while lower-case letter represent 36..61.</param>\r\n            /// <param name=\"exponentInDecimal\">If true, the exponent is in decimal; otherwise it is in the same base as the mantissa</param>\r\n            void SetTo(String^ value, int base, bool exponentInDecimal);\r\n\r\n            /// <summary>\r\n            /// Sets the value of the float object.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">new value for the object</param>\r\n            void SetTo(IntegerExpression^ value)\r\n            {\r\n                EvaluationContext context;\r\n                value->AssignToInteger(context);\r\n                MP(set_z)(_value, CTXTI(0));\r\n            }\r\n\r\n            /// <summary>\r\n            /// Sets the value of the float object.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">new value for the object</param>\r\n            void SetTo(RationalExpression^ value);\r\n\r\n            /// <summary>\r\n            /// Swaps the values of two floats.\r\n            /// <para>This operation is a pointer swap and doesn't affect allocated memory.\r\n            /// </para>Do not call this method while either object is contained in a hash table, because this would change their hash codes.\r\n            /// </summary>\r\n            /// <param name=\"a\">Source number to swap this instance's value with</param>\r\n            void Swap(MPTYPE^ a) \r\n            { \r\n                MP(swap)(_value, a->_value);\r\n\r\n                mp_bitcnt_t prec = a->_allocatedPrecision;\r\n                a->_allocatedPrecision = _allocatedPrecision;\r\n                _allocatedPrecision = prec;\r\n            }\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region Size checks\r\n\r\n            /// <summary>\r\n            /// Gets or sets the effective precision of the number without changing the memory allocated.\r\n            /// <para>The number of bits cannot exceed the precision with which the variable was initialized or last reallocated.\r\n            /// </para>The value of the number is unchanged, and in particular if it previously had a higher precision it will retain that higher precision.\r\n            /// <para>New values assigned to the Value property will use the new precision.\r\n            /// </para>This is an efficient way to use a float variable at different precisions during a calculation,\r\n            /// perhaps to gradually increase precision in an iteration, or just to use various different \r\n            /// precisions for different purposes during a calculation.\r\n            /// <para>The number can be safely disposed after modifying its precision (this would not be the case in unmanaged MPIR).\r\n            /// </para></summary>\r\n            property mp_bitcnt_t Precision\r\n            {\r\n                mp_bitcnt_t get() { return MP(get_prec)(_value); }\r\n                void set(mp_bitcnt_t value) \r\n                {\r\n                    if(value > _allocatedPrecision)\r\n                        throw gcnew ArgumentException(\"Cannot set precision higher than allocated\");\r\n\r\n                    MP(set_prec_raw)(_value, value);\r\n                }\r\n            }\r\n\r\n            /// <summary>\r\n            /// Gets the precision in bits that is currently allocated for internal storage of the mantissa.\r\n            /// <para>The precision actually in effect, used in calculations, is initially the same but may be reduced by setting the Precision property.\r\n            /// </para>However Precision cannot exceed AllocatedPrecision.\r\n            /// <para>To change AllocatedPrecision, call Reallocate().\r\n            /// </para>The value actually allocated may be slightly more than the number of bits requested by Allocate() or Reallocate().\r\n            /// </summary>\r\n            property mp_bitcnt_t AllocatedPrecision\r\n            {\r\n                mp_bitcnt_t get() { return _allocatedPrecision; }\r\n            }\r\n\r\n            /// <summary>\r\n            /// Set the precision of this instance to be at least prec bits, reallocating its limbs data.\r\n            /// <para>The value of this instance will be truncated to the new precision.\r\n            /// </para>This function requires a call to realloc, and so should not be used in a tight loop.\r\n            /// </summary>\r\n            /// <param name=\"precision\">Minimum number of bits the allocated memory should hold for the mantissa.</param>\r\n            void Reallocate(mp_bitcnt_t precision)\r\n            { \r\n                MP(set_prec_raw)(_value, _allocatedPrecision);\r\n                MP(set_prec)(_value, precision); \r\n                _allocatedPrecision = precision; \r\n            }\r\n\r\n#if BITS_PER_MP_LIMB == 64\r\n            /// <summary>\r\n            /// Returns true if the value of the source number, if truncated to an integer, is in the long range.\r\n            /// <para>This method is supported only on 64-bit builds</para>\r\n            /// </summary>\r\n            /// <returns>true if the value will fit in a long</returns>\r\n            bool FitsLong() { return MP(fits_si_p)(_value) != 0; }\r\n\r\n            /// <summary>\r\n            /// Returns true if the value of the source number, if truncated to an integer, is in the ulong range.\r\n            /// <para>This method is supported only on 64-bit builds</para>\r\n            /// </summary>\r\n            /// <returns>true if the value will fit in a long</returns>\r\n            bool FitsUlong() { return MP(fits_ui_p)(_value) != 0; }\r\n#endif\r\n\r\n            /// <summary>\r\n            /// Returns true if the value of the source number, if truncated to an integer, is in the int range.\r\n            /// </summary>\r\n            /// <returns>true if the value will fit in an int</returns>\r\n            bool FitsInt() { return MP(fits_sint_p)(_value) != 0; }\r\n\r\n            /// <summary>\r\n            /// Returns true if the value of the source number, if truncated to an integer, is in the uint range.\r\n            /// </summary>\r\n            /// <returns>true if the value will fit in an int</returns>\r\n            bool FitsUint() { return MP(fits_uint_p)(_value) != 0; }\r\n\r\n            /// <summary>\r\n            /// Returns true if the value of the source number, if truncated to an integer, is in the short range.\r\n            /// </summary>\r\n            /// <returns>true if the value will fit in a short</returns>\r\n            bool FitsShort() { return MP(fits_sshort_p)(_value) != 0; }\r\n\r\n            /// <summary>\r\n            /// Returns true if the value of the source number, if truncated to an integer, is in the ushort range.\r\n            /// </summary>\r\n            /// <returns>true if the value will fit in a short</returns>\r\n            bool FitsUshort() { return MP(fits_ushort_p)(_value) != 0; }\r\n\r\n            /// <summary>\r\n            /// Returns true if the source number is a whole integer.\r\n            /// </summary>\r\n            /// <returns>true if the value is an integer</returns>\r\n            bool IsInteger() { return MP(integer_p)(_value) != 0; }\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region IO\r\n\r\n            /// <summary>\r\n            /// Outputs the float to the <paramref name=\"writer\"/> as a string of digits in decimal.\r\n            /// <para>The mantissa is prefixed with an '0.', and is followed by '@' and an integer exponent.\r\n            /// </para>When writing multiple numbers that are to be read back with the Read(TextReader) method,\r\n            /// it is useful to separate the numbers with a character that is not valid for this format.\r\n            /// <para>This is because the Read method stops reading when it encounters an invalid character.\r\n            /// </para></summary>\r\n            /// <param name=\"writer\">Text writer to output the number to</param>\r\n            /// <returns>the number of characters written</returns>\r\n            size_t Write(TextWriter^ writer) { return Write(writer, 10, 0, false, true); }\r\n\r\n            /// <summary>\r\n            /// Outputs the float to the <paramref name=\"writer\"/> as a string of digits in base <paramref name=\"base\"/>.\r\n            /// <para>The mantissa is prefixed with an '0.', and is followed by '@' and an integer exponent in the same base.\r\n            /// </para>When writing multiple numbers that are to be read back with the Read(TextReader) method,\r\n            /// it is useful to separate the numbers with a character that is not valid for this format.\r\n            /// <para>This is because the Read method stops reading when it encounters an invalid character.\r\n            /// </para></summary>\r\n            /// <param name=\"writer\">Text writer to output the number to</param>\r\n            /// <param name=\"base\">The base to use for the output.\r\n            /// <para>The base can be from 2 to 62; uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</para></param>\r\n            /// <returns>the number of characters written</returns>\r\n            size_t Write(TextWriter^ writer, int base) { return Write(writer, base, 0, false, true); }\r\n\r\n            /// <summary>\r\n            /// Outputs the float to the <paramref name=\"writer\"/> as a string of digits in base <paramref name=\"base\"/>.\r\n            /// <para>The mantissa is prefixed with an '0.', and is followed by '@' and an integer exponent.\r\n            /// </para>When writing multiple numbers that are to be read back with the Read(TextReader) method,\r\n            /// it is useful to separate the numbers with a character that is not valid for this format.\r\n            /// <para>This is because the Read method stops reading when it encounters an invalid character.\r\n            /// </para></summary>\r\n            /// <param name=\"writer\">Text writer to output the number to</param>\r\n            /// <param name=\"lowercase\">Indicates if lowercase or uppercase letters should be used for the output.\r\n            /// <para>This argument is ignored for bases larger than 36, where both uppercase and lowercase letters are used.</para></param>\r\n            /// <param name=\"base\">The base to use for the output.\r\n            /// <para>The base can be from 2 to 62; Bases up to 36 use uppercase or lowercase letters based on the <paramref name=\"lowercase\"/> argument.\r\n            /// </para>For bases larger than 36, the <paramref name=\"lowercase\"/> argument is ignored and uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</param>\r\n            /// <returns>the number of characters written</returns>\r\n            size_t Write(TextWriter^ writer, int base, bool lowercase) { return Write(writer, base, 0, lowercase, true); }\r\n\r\n            /// <summary>\r\n            /// Outputs the float to the <paramref name=\"writer\"/> as a string of digits in base <paramref name=\"base\"/>.\r\n            /// <para>The mantissa is prefixed with an '0.', and is followed by '@' and an integer exponent.\r\n            /// </para>When writing multiple numbers that are to be read back with the Read(TextReader) method,\r\n            /// it is useful to separate the numbers with a character that is not valid for this format.\r\n            /// <para>This is because the Read method stops reading when it encounters an invalid character.\r\n            /// </para></summary>\r\n            /// <param name=\"writer\">Text writer to output the number to</param>\r\n            /// <param name=\"base\">The base to use for the output.\r\n            /// <para>The base can be from 2 to 62; Bases up to 36 use uppercase or lowercase letters based on the <paramref name=\"lowercase\"/> argument.\r\n            /// </para>For bases larger than 36, the <paramref name=\"lowercase\"/> argument is ignored and uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</param>\r\n            /// <param name=\"lowercase\">Indicates if lowercase or uppercase letters should be used for the output.\r\n            /// <para>This argument is ignored for bases larger than 36, where both uppercase and lowercase letters are used.</para></param>\r\n            /// <param name=\"exponentInDecimal\">Whether the exponent should always be written in decimal, or use the same base as the mantissa.</param>\r\n            /// <param name=\"maxDigits\">Maximum number of digits for the mantissa.\r\n            /// <para>No more digits than are accurately representable by the number's precision will be printed.\r\n            /// </para>Use zero to select that accurate maximum.</param>\r\n            /// <returns>the number of characters written</returns>\r\n            size_t Write(TextWriter^ writer, int base, int maxDigits, bool lowercase, bool exponentInDecimal);\r\n\r\n            /// <summary>\r\n            /// Inputs the number as a possibly white-space preceeded string.\r\n            /// <para>The string is in the form 'Mantissa@Exponent' or, if the base is 10 or less, alternatively 'MeN'.\r\n            /// </para>The mantissa and exponent are always in decimal, no leading base characters are allowed.\r\n            /// <para>Reading terminates at end-of-stream, or up to but not including a character that is not valid for this format.\r\n            /// </para>This method reads the output of a Write(TextWriter) when decimal base is used.\r\n            /// </summary>\r\n            /// <param name=\"reader\">Text reader to input the number from</param>\r\n            /// <returns>the number of characters read</returns>\r\n            size_t Read(TextReader^ reader) { return Read(reader, 0, true); }\r\n\r\n            /// <summary>\r\n            /// Inputs the number as a possibly white-space preceeded string.\r\n            /// <para>The string is in the form 'Mantissa@Exponent' or, if the base is 10 or less, alternatively 'MeN'.\r\n            /// </para>The mantissa is always in the specified base, the exponent is always in decimal.  No leading base characters are allowed.\r\n            /// <para>Reading terminates at end-of-stream, or up to but not including a character that is not valid for this format.\r\n            /// </para>This method reads the output of a Write(TextWriter) when decimal base is used.\r\n            /// </summary>\r\n            /// <param name=\"reader\">Text reader to input the number from</param>\r\n            /// <param name=\"base\">The base to use for the mantissa.\r\n            /// <para>The base can be from 2 to 62; uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.\r\n            /// </para>For bases 36 and less, uppercase and lowercase letters are equivalent.</param>\r\n            /// <returns>the number of characters read</returns>\r\n            size_t Read(TextReader^ reader, int base) { return Read(reader, base, true); }\r\n\r\n            /// <summary>\r\n            /// <para>The string is in the form 'Mantissa@Exponent' or, if the base is 10 or less, alternatively 'MeN'.\r\n            /// </para>The mantissa is always in the specified base, the exponent is either in the same base or in decimal.  No leading base characters are allowed.\r\n            /// <para>Reading terminates at end-of-stream, or up to but not including a character that is not valid for this format.\r\n            /// </para>This method reads the output of a Write(TextWriter, ...) method with the same base.\r\n            /// </summary>\r\n            /// <param name=\"reader\">Text reader to input the number from</param>\r\n            /// <param name=\"base\">The base to use for the input.\r\n            /// <para>The base can be from 2 to 62; For bases up to 36 case is ignored.\r\n            /// </para>For bases larger than 36, uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</param>\r\n            /// <param name=\"exponentInDecimal\">Whether the exponent should always be written in decimal, or use the same base as the mantissa.</param>\r\n            /// <returns>the number of characters read</returns>\r\n            size_t Read(TextReader^ reader, int base, bool exponentInDecimal);\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region Arithmetic\r\n\r\n            /// <summary>Computes the square root of the source number.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">source number to take the square root of</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ SquareRoot(mpir_ui a) { return gcnew MPEXPR(SquareRootUi)(a); }\r\n\r\n            #pragma endregion\r\n    };\r\n\r\n    #pragma endregion\r\n};\r\n"
  },
  {
    "path": "mpir.net/mpir.net/HugeInt.cpp",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\n#include \"Stdafx.h\"\n#include \"HugeInt.h\"\n#include \"Random.h\"\n\nusing namespace System::Runtime::InteropServices;\nusing namespace System::Text;\n\n__mpz_struct HugeIntConst1;\n\nnamespace MPIR\n{\n    #pragma region MpirSettings\n\n    void MpirSettings::ToStringDigits::set(int value)\n    {\n        _toStringDigits = value;\n        _toStringModulo = gcnew MPTYPE(MPTYPE::Power(10, value));\n    }\n\n    #pragma endregion\n\n    #pragma region construction\n\n    MPTYPE::MPTYPE()\n    {\n        AllocateStruct();\n        MP(init)(_value);\n    }\n\n    MPTYPE::MPTYPE(bool initialize)\n    {\n        AllocateStruct();\n        if(initialize)\n            MP(init)(_value);\n    }\n\n    MPTYPE::MPTYPE(MPEXPR_NAME^ value)\n    {\n        AllocateStruct();\n        MP(init)(_value);\n        value->AssignTo(_value);\n    }\n\n    MPTYPE^ MPTYPE::Allocate(mp_bitcnt_t bits)\n    {\n        auto result = gcnew MPTYPE(false);\n        MP(init2)(result->_value, bits);\n        return result;\n    }\n\n    void MPTYPE::FromString(String^ value, int base)\n    {\n        AllocateStruct();\n\n        IntPtr ptr = Marshal::StringToHGlobalAnsi(value);\n        bool success = 0 == MP(init_set_str)(_value, (char*)(void*)ptr, base);\n        Marshal::FreeHGlobal(ptr);\n\n        if(!success)\n        {\n            DeallocateStruct();\n            throw gcnew ArgumentException(\"Invalid number\", \"value\");\n        }\n    }\n\n    void MPTYPE::SetTo(String^ value, int base)\n    {\n        IntPtr ptr = Marshal::StringToHGlobalAnsi(value);\n        bool success = 0 == MP(set_str)(_value, (char*)(void*)ptr, base);\n        Marshal::FreeHGlobal(ptr);\n\n        if(!success)\n            throw gcnew ArgumentException(\"Invalid number\", \"value\");\n    }\n\n    MPTYPE::MPTYPE(mpir_si value)\n    {\n        AllocateStruct();\n        MP(init_set_si)(_value, value);\n    }\n\n    MPTYPE::MPTYPE(mpir_ui value)\n    {\n        AllocateStruct();\n        MP(init_set_ui)(_value, value);\n    }\n\n    MPTYPE::MPTYPE(double value)\n    {\n        AllocateStruct();\n        MP(init_set_d)(_value, value);\n    }\n\n    #pragma endregion\n\n    #pragma region object overrides\n\n    String^ MPTYPE::ToString(int base, bool lowercase, int maxDigits)\n    {\n        size_t allocated;\n        bool negative = false;\n        bool truncated = false;\n\n        EvaluationContext context;\n        if(maxDigits > 0 && CompareAbsTo(MpirSettings::_toStringModulo) >= 0)\n        {\n            (this->Abs() % MpirSettings::_toStringModulo)->Rounding(RoundingModes::Truncate)->ASSIGN_TO(context);\n            truncated = true;\n            negative = this->Sign() < 0;\n            allocated = maxDigits + 5;\n        }\n        else\n        {\n            ASSIGN_TO(context);\n            allocated = MP(sizeinbase)(_value, base == 0 ? 10 : base) + 2;\n        }\n\n        char* allocatedStr = (char*)(*__gmp_allocate_func)(allocated);\n        char* str = allocatedStr;\n\n        if(negative)\n            *str++ = '-';\n        if(truncated)\n        {\n            *str++ = '.';\n            *str++ = '.';\n            *str++ = '.';\n        }\n\n        MP(get_str)(str, (base <= 36 && !lowercase) ? -base : base, CTXT(0));\n        auto result = gcnew String(allocatedStr);\n        (*__gmp_free_func)(allocatedStr, allocated);\n\n        return result;\n    }\n\n    int MPEXPR_NAME::GetHashCode()\n    {\n        IN_CONTEXT(this);\n\n        mp_limb_t hash = 0;\n        mp_limb_t* ptr = CTXT(0)->_mp_d;\n        for(int i = abs(CTXT(0)->_mp_size); i > 0; i--)\n            hash ^= *ptr++;\n\n        if(CTXT(0)->_mp_size < 0)\n            hash = (mp_limb_t)-(mpir_si)hash;\n\n        return hash.GetHashCode();\n    }\n\n    #pragma endregion\n\n    #pragma region Interface implementations\n\n    //CompareTo has to be defined in HugeRational.cpp because it depends on HugeRational.h\n\n    int MPEXPR_NAME::CompareTo(Object^ a)\n    {\n        bool valid;\n        auto result = CompareTo(a, valid);\n\n        if (valid)\n            return result;\n\n        throw gcnew ArgumentException(\"Invalid argument type\", \"a\");\n    }\n\n    int MPEXPR_NAME::CompareTo(MPEXPR_NAME^ a)\n    {\n        if (IS_NULL(a))\n            return 1;\n\n        IN_CONTEXT(this, a);\n        return MP(cmp)(CTXT(0), CTXT(1));\n    }\n\n    bool MPEXPR_NAME::Equals(Object^ a)\n    {\n        bool valid;\n        auto result = CompareTo(a, valid);\n\n        return valid && result == 0;\n    }\n\n    bool MPEXPR_NAME::Equals(MPEXPR_NAME^ a)\n    {\n        return CompareTo(a) == 0;\n    }\n\n    #pragma endregion\n\n    #pragma region expression special cases\n\n    void MPEXPR(Divide)::CUSTOM_MP(div)(MP(ptr) q, MP(srcptr) n, MP(srcptr) d)\n    {\n        switch((rounding == RoundingModes::Default) ? MpirSettings::RoundingMode : rounding)\n        {\n            case RoundingModes::Floor:\n                IS_NULL(_remainder)\n                    ? MP(fdiv_q)(q, n, d)\n                    : MP(fdiv_qr)(q, _remainder->_value, n, d);\n                break;\n\n            case RoundingModes::Ceiling:\n                IS_NULL(_remainder)\n                    ? MP(cdiv_q)(q, n, d)\n                    : MP(cdiv_qr)(q, _remainder->_value, n, d);\n                break;\n\n            default:\n                IS_NULL(_remainder)\n                    ? MP(tdiv_q)(q, n, d)\n                    : MP(tdiv_qr)(q, _remainder->_value, n, d);\n                break;\n        }\n    }\n\n    void MPEXPR(DivideUi)::CUSTOM_MP(div_ui)(MP(ptr) q, MP(srcptr) n, mpir_ui d)\n    {\n        mpir_ui limb;\n\n        switch((rounding == RoundingModes::Default) ? MpirSettings::RoundingMode : rounding)\n        {\n            case RoundingModes::Floor:\n                limb = IS_NULL(_remainder)\n                    ? MP(fdiv_q_ui)(q, n, d)\n                    : MP(fdiv_qr_ui)(q, _remainder->_value, n, d);\n                break;\n\n            case RoundingModes::Ceiling:\n                limb = IS_NULL(_remainder)\n                    ? MP(cdiv_q_ui)(q, n, d)\n                    : MP(cdiv_qr_ui)(q, _remainder->_value, n, d);\n                break;\n\n            default:\n                limb = IS_NULL(_remainder)\n                    ? MP(tdiv_q_ui)(q, n, d)\n                    : MP(tdiv_qr_ui)(q, _remainder->_value, n, d);\n                break;\n        }\n\n        if(!IS_NULL(_limbRemainder))\n            _limbRemainder(limb);\n    }\n\n    void MPEXPR(ShiftRight)::CUSTOM_MP(div_2exp)(MP(ptr) q, MP(srcptr) n, mp_bitcnt_t d)\n    {\n        switch((rounding == RoundingModes::Default) ? MpirSettings::RoundingMode : rounding)\n        {\n            case RoundingModes::Floor:\n                _remainder\n                    ? MP(fdiv_r_2exp)(q, n, d)\n                    : MP(fdiv_q_2exp)(q, n, d);\n                break;\n\n            case RoundingModes::Ceiling:\n                _remainder\n                    ? MP(cdiv_r_2exp)(q, n, d)\n                    : MP(cdiv_q_2exp)(q, n, d);\n                break;\n\n            default:\n                _remainder\n                    ? MP(tdiv_r_2exp)(q, n, d)\n                    : MP(tdiv_q_2exp)(q, n, d);\n                break;\n        }\n    }\n\n    void MPEXPR(Mod)::CUSTOM_MP(mod)(MP(ptr) r, MP(srcptr) n, MP(srcptr) d)\n    {\n        switch((rounding == RoundingModes::Default) ? MpirSettings::RoundingMode : rounding)\n        {\n            case RoundingModes::Floor:\n                IS_NULL(_quotient)\n                    ? MP(fdiv_r)(r, n, d)\n                    : MP(fdiv_qr)(_quotient->_value, r, n, d);\n                break;\n\n            case RoundingModes::Ceiling:\n                IS_NULL(_quotient)\n                    ? MP(cdiv_r)(r, n, d)\n                    : MP(cdiv_qr)(_quotient->_value, r, n, d);\n                break;\n\n            default:\n                IS_NULL(_quotient)\n                    ? MP(tdiv_r)(r, n, d)\n                    : MP(tdiv_qr)(_quotient->_value, r, n, d);\n                break;\n        }\n    }\n\n    void MPEXPR(ModUi)::CUSTOM_MP(mod_ui)(MP(ptr) r, MP(srcptr) n, mpir_ui d)\n    {\n        mpir_ui limb;\n\n        switch((rounding == RoundingModes::Default) ? MpirSettings::RoundingMode : rounding)\n        {\n            case RoundingModes::Floor:\n                limb = IS_NULL(_quotient)\n                    ? MP(fdiv_r_ui)(r, n, d)\n                    : MP(fdiv_qr_ui)(_quotient->_value, r, n, d);\n                break;\n\n            case RoundingModes::Ceiling:\n                limb = IS_NULL(_quotient)\n                    ? MP(cdiv_r_ui)(r, n, d)\n                    : MP(cdiv_qr_ui)(_quotient->_value, r, n, d);\n                break;\n\n            default:\n                limb = IS_NULL(_quotient)\n                    ? MP(tdiv_r_ui)(r, n, d)\n                    : MP(tdiv_qr_ui)(_quotient->_value, r, n, d);\n                break;\n        }\n\n        if(!IS_NULL(_limbRemainder))\n            _limbRemainder(limb);\n    }\n\n    void MPEXPR(Root)::CUSTOM_MP(root)(MP(ptr) dest, MP(srcptr) oper, mpir_ui power)\n    {\n        if(!IS_NULL(_remainder))\n            MP(rootrem)(dest, _remainder->_value, oper, power);\n        else if (IS_NULL(_exact))\n            MP(nthroot)(dest, oper, power);\n        else\n            _exact(MP(root)(dest, oper, power) != 0);\n    }\n\n    void MPEXPR(SquareRoot)::CUSTOM_MP(sqrt)(MP(ptr) dest, MP(srcptr) oper)\n    {\n        IS_NULL(_remainder)\n            ? MP(sqrt)(dest, oper)\n            : MP(sqrtrem)(dest, _remainder->_value, oper);\n    }\n\n    void MPEXPR(Gcd)::CUSTOM_MP(gcd)(MP(ptr) dest, MP(srcptr) a, MP(srcptr) b)\n    {\n        switch ((IS_NULL(_s) ? 0 : 1) + (IS_NULL(_t) ? 0 : 2))\n        {\n            case 0:\n               MP(gcd)(dest, a, b);\n               break;\n\n            case 1:\n               MP(gcdext)(dest, _s->_value, NULL, a, b);\n               break;\n\n            case 2:\n               MP(gcdext)(dest, _t->_value, NULL, b, a);\n               break;\n\n            case 3:\n               MP(gcdext)(dest, _s->_value, _t->_value, a, b);\n               break;\n        }\n    }\n\n    #pragma endregion\n\n    #pragma region Arithmetic\n\n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR_NAME,        DEFINE, +, Add, Int, Int)           \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, +, Add, Int, Ui)            \n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR_NAME,        DEFINE, +, Add, Int, Ui)            \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, +, Add, Int, Si)            \n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR_NAME,        DEFINE, +, Add, Int, Si)            \n                                                                                                        \n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR_NAME,        DEFINE, -, Subtract, Int, Int)      \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Int, Ui)       \n    MAKE_BINARY_OPERATOR_LLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Int, Ui)       \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Int, Si)       \n    MAKE_BINARY_OPERATOR_LLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Int, Si)       \n\n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR(Multiply),   DEFINE, *, Multiply, Int, Int)      \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR(Multiply),   DEFINE, *, Multiply, Int, Ui)       \n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR(Multiply),   DEFINE, *, Multiply, Int, Ui)       \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR(Multiply),   DEFINE, *, Multiply, Int, Si)       \n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR(Multiply),   DEFINE, *, Multiply, Int, Si)       \n\n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, <<, ShiftLeft, Int, Bits)   \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR(ShiftRight), DEFINE, >>, ShiftRight, Int, Bits)  \n                                                                                                           \n    MAKE_UNARY_OPERATOR            (MPEXPR_NAME,        DEFINE, -, Negate, Int)             \n                                                                                                        \n    MAKE_VOID_FUNCTION             (MPEXPR_NAME,        DEFINE, Abs, Int)                   \n                                                                                                           \n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR(Divide),     DEFINE, /, Divide, Int, Int)        \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR(DivideUi),   DEFINE, /, Divide, Int, Ui)         \n                                                                                                           \n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR(Mod),        DEFINE, %, Mod, Int, Int)           \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR(ModUi),      DEFINE, %, Mod, Int, Ui)            \n                                                                                                           \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, ^, Power, Int, Ui)          \n                                                                                                        \n    MAKE_FUNCTION_WITH_ONE         (MPEXPR_NAME,        DEFINE, DivideExactly, Int)         \n    MAKE_FUNCTION_WITH_LIMB        (MPEXPR_NAME,        DEFINE, DivideExactly, Ui)          \n                                                                                                        \n    MAKE_FUNCTION_WITH_TWO         (MPEXPR_NAME,        DEFINE, PowerMod, Int, Int)         \n    MAKE_FUNCTION_WITH_TWO_LLIMB   (MPEXPR_NAME,        DEFINE, PowerMod, Ui, Int)          \n                                                                                                           \n    MAKE_FUNCTION_WITH_LIMB        (MPEXPR(Root),       DEFINE, Root, Ui)                   \n    MAKE_VOID_FUNCTION             (MPEXPR(SquareRoot), DEFINE, SquareRoot, Int)            \n                                                                                                           \n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR_NAME,        DEFINE, &, And, Int, Int)           \n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR_NAME,        DEFINE, |, Or, Int, Int)            \n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR_NAME,        DEFINE, ^, Xor, Int, Int)           \n    MAKE_UNARY_OPERATOR            (MPEXPR_NAME,        DEFINE, ~, Complement, Int)         \n\n    DEFINE_UNARY_ASSIGNMENT_REF(Complement, Int, MP(com))\n    DEFINE_UNARY_ASSIGNMENT_REF(Negate, Int, MP(neg))\n    DEFINE_UNARY_ASSIGNMENT_REF(Abs, Int, MP(abs))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Add, Int, Ui, MP(add_ui))\n    DEFINE_BINARY_ASSIGNMENT_REF_SI (Add, Int, Si, MP(add_ui), MP(sub_ui))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Subtract, Int, Ui, MP(sub_ui))\n    DEFINE_BINARY_ASSIGNMENT_VAL_REF(Subtract, Ui, Int, MP(ui_sub))\n    DEFINE_BINARY_ASSIGNMENT_REF_SI (Subtract, Int, Si, MP(sub_ui), MP(add_ui))\n    DEFINE_BINARY_ASSIGNMENT_SI_REF (Subtract, Si, Int, MP(ui_sub), MP(add_ui), MP(neg))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Multiply, Int, MP(mul))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Multiply, Int, Ui, MP(mul_ui))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Multiply, Int, Si, MP(mul_si))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Divide, Int, CUSTOM_MP(div))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Divide, Int, Ui, CUSTOM_MP(div_ui))\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Mod, Int, CUSTOM_MP(mod))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Mod, Int, Ui, CUSTOM_MP(mod_ui))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(ShiftLeft, Int, Bits, MP(mul_2exp))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(ShiftRight, Int, Bits, CUSTOM_MP(div_2exp))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(DivideExactly, Int, MP(divexact))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(DivideExactly, Int, Ui, MP(divexact_ui))\n    \n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Power, Int, Ui, MP(pow_ui))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Root, Int, Ui, CUSTOM_MP(root))\n    DEFINE_UNARY_ASSIGNMENT_REF     (SquareRoot, Int, CUSTOM_MP(sqrt))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(And, Int, MP(and))\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Or, Int, MP(ior))\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Xor, Int, MP(xor))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Gcd, Int, CUSTOM_MP(gcd))\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Lcm, Int, MP(lcm))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Lcm, Int, Ui, MP(lcm_ui))\n\n    DEFINE_TERNARY_ASSIGNMENT_REF_REF_REF(PowerMod, Int, MP(powm));\n    DEFINE_TERNARY_ASSIGNMENT_REF_VAL_REF(PowerMod, Int, Ui, Int, MP(powm_ui))\n\n    void MPEXPR(MultiplyIntInt)::AddTo(MP(ptr) destination)\n    {\n        IN_CONTEXT(Left, Right);\n        MP(addmul)(destination, CTXT(0), CTXT(1));\n    }\n\n    void MPEXPR(MultiplyIntUi)::AddTo(MP(ptr) destination)\n    {\n        IN_CONTEXT(Left);\n        MP(addmul_ui)(destination, CTXT(0), Right);\n    }\n\n    void MPEXPR(MultiplyIntSi)::AddTo(MP(ptr) destination)\n    {\n        IN_CONTEXT(Left);\n        if (Right < 0)\n            MP(submul_ui)(destination, CTXT(0), (mpir_ui)-Right);\n        else\n            MP(addmul_ui)(destination, CTXT(0), (mpir_ui)Right);\n        return;\n    }\n\n    void MPEXPR(MultiplyIntInt)::SubtractFrom(MP(ptr) destination)\n    {\n        IN_CONTEXT(Left, Right);\n        MP(submul)(destination, CTXT(0), CTXT(1));\n    }\n\n    void MPEXPR(MultiplyIntUi)::SubtractFrom(MP(ptr) destination)\n    {\n        IN_CONTEXT(Left);\n        MP(submul_ui)(destination, CTXT(0), Right);\n    }\n\n    void MPEXPR(MultiplyIntSi)::SubtractFrom(MP(ptr) destination)\n    {\n        IN_CONTEXT(Left);\n        if (Right < 0)\n            MP(addmul_ui)(destination, CTXT(0), (mpir_ui)-Right);\n        else\n            MP(submul_ui)(destination, CTXT(0), (mpir_ui)Right);\n    }\n\n    DEFINE_ASSIGNMENT_PROLOG(AddIntInt)\n    {\n        WHEN_IS_DEST(1, Left)\n        {\n            WHEN_IS(2, Right, MPEXPR(Multiply))\n            {\n                x2->AddTo(destination);\n                return;\n            }\n        }\n        WHEN_IS_DEST(3, Right)\n        {\n            WHEN_IS(4, Left, MPEXPR(Multiply))\n            {\n                x4->AddTo(destination);\n                return;\n            }\n        }\n\n        IN_CONTEXT(Left, Right);\n        MP(add)(destination, CTXT(0), CTXT(1));\n    }\n\n    DEFINE_ASSIGNMENT_PROLOG(SubtractIntInt)\n    {\n        WHEN_IS_DEST(1, Left)\n        {\n            WHEN_IS(2, Right, MPEXPR(Multiply))\n            {\n                x2->SubtractFrom(destination);\n                return;\n            }\n        }\n        WHEN_IS_DEST(3, Right)\n        {\n            WHEN_IS(4, Left, MPEXPR(Multiply))\n            {\n                x4->SubtractFrom(destination);\n                MP(neg)(destination, destination);\n                return;\n            }\n        }\n\n        IN_CONTEXT(Left, Right);\n        MP(sub)(destination, CTXT(0), CTXT(1));\n    }\n\n    mpir_ui MPEXPR_NAME::Mod(mpir_ui d, RoundingModes rounding)\n    {\n        IN_CONTEXT(this);\n\n        switch((rounding == RoundingModes::Default) ? MpirSettings::RoundingMode : rounding)\n        {\n            case RoundingModes::Floor:\n                return MP(fdiv_ui)(CTXT(0), d);\n\n            case RoundingModes::Ceiling:\n                return MP(cdiv_ui)(CTXT(0), d);\n\n            default:\n                return MP(tdiv_ui)(CTXT(0), d);\n        }\n    }\n    \n    #pragma endregion\n\n    #pragma region IO\n\n    #define chunkSize 1024\n\n    size_t MPTYPE::Write(Stream^ stream)\n    {\n        mpir_out_struct out;\n        MP(out_raw_m)(out, _value);\n\n        auto buffer = gcnew array<unsigned char>(chunkSize);\n        auto ptr = out->written;\n        auto toWrite = (int)out->writtenSize;\n\n        while(toWrite > 0)\n        {\n            auto len = Math::Min(chunkSize, toWrite);\n            Marshal::Copy(IntPtr(ptr), buffer, 0, len);\n            stream->Write(buffer, 0, len);\n            ptr += len;\n            toWrite -= len;\n        }\n\n        (*__gmp_free_func)(out->allocated, out->allocatedSize);\n\n        return out->writtenSize;\n    }\n\n    size_t MPTYPE::Read(Stream^ stream)\n    {\n        unsigned char csize_bytes[4];\n        mpir_out_struct out;\n\n        // 4 bytes for size\n        for(int i = 0; i < 4; i++)\n        {\n            auto byte = stream->ReadByte();\n            if(byte < 0)\n                throw gcnew Exception(\"Unexpected end of stream\");\n\n            csize_bytes[i] = byte;\n        }\n\n        MP(inp_raw_p)(_value, csize_bytes, out);\n\n        if(out->writtenSize != 0)\n        {\n            auto buffer = gcnew array<unsigned char>(chunkSize);\n            auto ptr = out->written;\n            auto toRead = (int)out->writtenSize;\n\n            while(toRead > 0)\n            {\n                auto len = Math::Min(chunkSize, toRead);\n                if (len != stream->Read(buffer, 0, len))\n                    throw gcnew Exception(\"Unexpected end of stream\");\n\n                Marshal::Copy(buffer, 0, IntPtr(ptr), len);\n                ptr += len;\n                toRead -= len;\n            }\n\n            MP(inp_raw_m)(_value, out);\n        }\n\n        return 4 + out->writtenSize;\n    }\n\n    size_t MPTYPE::Write(TextWriter^ writer, int base, bool lowercase)\n    {\n        auto str = ToString(base, lowercase);\n        writer->Write(str);\n        return str->Length;\n    }\n\n    size_t MPTYPE::Read(TextReader^ reader, int base)\n    {\n        int c;\n        size_t nread = 0;\n\n        // Skip whitespace\n        while ((c = reader->Peek()) >= 0 && Char::IsWhiteSpace(c))\n        {\n            nread++;\n            reader->Read();\n        }\n\n        return ReadNoWhite(reader, base, nread);\n    }\n\n    // adapted from inp_str, which is shared by mpq_inp_str\n    size_t MPTYPE::ReadNoWhite(TextReader^ reader, int base, size_t nread)\n    {\n        char *str;\n        size_t alloc_size, str_size;\n        bool negative = false;\n        mp_size_t xsize;\n        const unsigned char* digit_value = __gmp_digit_value_tab;\n        int c = reader->Peek();\n\n        if (base > 36)\n        {\n            // For bases > 36, use the collating sequence\n            // 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n            digit_value += 224;\n            if (base > 62)\n                throw gcnew ArgumentException(\"Invalid base\", \"base\");\n        }\n\n        if (c == '-')\n        {\n            negative = true;\n            PEEK_NEXT_CHAR;\n        }\n\n        if (c == EOF || digit_value[c] >= (base == 0 ? 10 : base))\n            throw gcnew Exception(\"No digits found\");\n\n        // If BASE is 0, try to find out the base by looking at the initial characters.\n        if (base == 0)\n        {\n            base = 10;\n            if (c == '0')\n            {\n                base = 8;\n                PEEK_NEXT_CHAR;\n\n                switch(c = reader->Peek())\n                {\n                    case 'x':\n                    case 'X':\n                        base = 16;\n                        PEEK_NEXT_CHAR;\n                        break;\n\n                    case 'b':\n                    case 'B':\n                        base = 2;\n                        PEEK_NEXT_CHAR;\n                }\n            }\n        }\n\n        // Skip leading zeros\n        while (c == '0')\n        {\n            PEEK_NEXT_CHAR;\n        }\n\n        alloc_size = 100;\n        str = (char *) (*__gmp_allocate_func) (alloc_size);\n        str_size = 0;\n\n        while (c != EOF)\n        {\n            int dig = digit_value[c];\n            if (dig >= base)\n                break;\n\n            if (str_size >= alloc_size)\n            {\n                size_t old_alloc_size = alloc_size;\n                alloc_size = alloc_size * 3 / 2;\n                str = (char *) (*__gmp_reallocate_func) (str, old_alloc_size, alloc_size);\n            }\n            str[str_size++] = dig;\n            reader->Read();\n            c = reader->Peek();\n        }\n        nread += str_size;\n\n        // Make sure the string is not empty, mpn_set_str would fail.\n        if (str_size == 0)\n        {\n            _value ->_mp_size = 0;\n        }\n        else\n        {\n            xsize = (((mp_size_t)\n                (str_size / __mp_bases[base].chars_per_bit_exactly))\n                / GMP_NUMB_BITS + 2);\n            MP(realloc) (_value, xsize);\n\n            // Convert the byte array in base BASE to our bignum format.\n            xsize = mpn_set_str (_value->_mp_d, (unsigned char *) str, str_size, base);\n            _value->_mp_size = (int)(negative ? -xsize : xsize);\n        }\n        (*__gmp_free_func) (str, alloc_size);\n        return nread;\n    }\n\n    void MPTYPE::ReadLimbs(array<mpir_ui>^ destination, int start, int length, int destinationIndex)\n    {\n        if (start < 0) throw gcnew ArgumentException(\"Invalid start\", \"start\");\n        if (length <= 0) throw gcnew ArgumentException(\"Invalid length\", \"length\");\n        if (_value->_mp_alloc < start + length) throw gcnew ArgumentException(\"Insufficient allocated limb data\", \"length\");\n        if (destinationIndex < 0) throw gcnew ArgumentException(\"Invalid destination index\", \"destinationIndex\");\n        if (destination->Length < destinationIndex + length) throw gcnew ArgumentException(\"Insufficient destination array size\", \"destinationIndex\");\n\n        auto src = &MP(limbs_read)(_value)[start];\n        for (int i = 0; i < length; i++)\n        {\n            destination[destinationIndex++] = *src++;\n        }\n    }\n\n    void MPTYPE::ModifyLimbs(array<mpir_ui>^ source, int start, int length, int sourceIndex, bool negative)\n    {\n        if (start < 0) throw gcnew ArgumentException(\"Invalid start\", \"start\");\n        if (length <= 0) throw gcnew ArgumentException(\"Invalid length\", \"length\");\n        if (sourceIndex < 0) throw gcnew ArgumentException(\"Invalid source index\", \"sourceIndex\");\n        if (source->Length < sourceIndex + length) throw gcnew ArgumentException(\"Insufficient source array size\", \"sourceIndex\");\n\n        auto oldAbsSize = ABS(_value->_mp_size);\n        auto newSize = MAX(_value->_mp_alloc, start + length);\n\n        auto ptr = MP(limbs_modify)(_value, newSize);\n        auto dest = &ptr[oldAbsSize];\n        for (int i = oldAbsSize; i < start; i++)\n            *dest++ = 0;\n\n        dest = &ptr[start];\n        for (int i = 0; i < length; i++)\n            *dest++ = source[sourceIndex++];\n\n        MP(limbs_finish)(_value, negative ? -newSize : newSize);\n    }\n\n    void MPTYPE::WriteLimbs(array<mpir_ui>^ source, int sourceIndex, mp_size_t newSize, bool negative)\n    {\n        auto length = newSize;\n        if (length == 0) throw gcnew ArgumentException(\"Invalid new size\", \"newSize\");\n        if (sourceIndex < 0) throw gcnew ArgumentException(\"Invalid source index\", \"sourceIndex\");\n        if (source->Length < sourceIndex + length) throw gcnew ArgumentException(\"Insufficient source array size\", \"sourceIndex\");\n\n        auto dest = MP(limbs_write)(_value, length);\n        for (mp_size_t i = 0; i < length; i++)\n            *dest++ = source[sourceIndex++];\n\n        MP(limbs_finish)(_value, negative ? -newSize : newSize);\n    }\n\n    #pragma endregion\n\n    #pragma region number-theoretic\n\n    bool MPTYPE::IsProbablePrime(MpirRandom^ random, int probability, mpir_ui pretested)\n    {\n        return MP(probable_prime_p)(_value, random->_value, probability, pretested) != 0;\n    }\n            \n    bool MPTYPE::IsLikelyPrime(MpirRandom^ random, mpir_ui pretested)\n    {\n        return MP(likely_prime_p)(_value, random->_value, pretested) != 0;\n    }\n\n    MAKE_FUNCTION_WITH_LIMB (MPEXPR_NAME, DEFINE, NextPrimeCandidate, Rnd)\n    DEFINE_ASSIGNMENT_PROLOG(NextPrimeCandidateIntRnd)\n    {\n        IN_CONTEXT(Left);\n        MP(next_prime_candidate)(destination, CTXT(0), Right->_value);\n    }\n\n    MAKE_FUNCTION_WITH_ONE (MPEXPR(Gcd), DEFINE, Gcd, Int)\n    MAKE_FUNCTION_WITH_ONE (MPEXPR_NAME, DEFINE, Lcm, Int)\n    MAKE_FUNCTION_WITH_LIMB (MPEXPR_NAME, DEFINE, Lcm, Ui)\n\n    MAKE_FUNCTION_WITH_ONE (MPEXPR_NAME, DEFINE, Invert, Int)\n    DEFINE_ASSIGNMENT_PROLOG(InvertIntInt)\n    {\n        IN_CONTEXT(Left, Right);\n        if (MP(invert)(destination, CTXT(0), CTXT(1)) == 0)\n            throw gcnew ArgumentException(\"Inverse does not exist\");\n    }\n\n    MAKE_FUNCTION_WITH_ONE (MPEXPR(RemoveFactors), DEFINE, RemoveFactors, Int)\n    DEFINE_ASSIGNMENT_PROLOG(RemoveFactorsIntInt)\n    {\n        IN_CONTEXT(Left, Right);\n        auto result = MP(remove)(destination, CTXT(0), CTXT(1));\n        if(!IS_NULL(_count))\n            _count(result);\n    }\n\n    DEFINE_BINARY_ASSIGNMENT_VAL_VAL(Power, Ui, Ui, MP(ui_pow_ui))\n\n    DEFINE_ASSIGNMENT_PROLOG(FactorialUiUi)\n    {\n        switch (Right)\n        {\n            case 1:\n                MP(fac_ui)(destination, Left);\n                break;\n\n            case 2:\n                MP(2fac_ui)(destination, Left);\n                break;\n\n            default:\n                MP(mfac_uiui)(destination, Left, Right);\n                break;\n        }\n    }\n\n    DEFINE_UNARY_ASSIGNMENT_VAL(Primorial, Ui, MP(primorial_ui))\n    DEFINE_BINARY_ASSIGNMENT_VAL_VAL(Binomial, Ui, Ui, MP(bin_uiui))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(Binomial, Int, Ui, MP(bin_ui))\n\n    DEFINE_ASSIGNMENT_PROLOG(FibonacciUi)\n    {\n        IS_NULL(_previous)\n            ? MP(fib_ui)(destination, Operand)\n            : MP(fib2_ui)(destination, _previous->_value, Operand);\n    }\n\n    DEFINE_ASSIGNMENT_PROLOG(LucasUi)\n    {\n        IS_NULL(_previous)\n            ? MP(lucnum_ui)(destination, Operand)\n            : MP(lucnum2_ui)(destination, _previous->_value, Operand);\n    }\n\n    #pragma endregion\n};"
  },
  {
    "path": "mpir.net/mpir.net/HugeInt.h",
    "content": "/*\r\nCopyright 2014 Alex Dyachenko\r\n\r\nThis file is part of the MPIR Library.\r\n\r\nThe MPIR Library is free software; you can redistribute it and/or modify\r\nit under the terms of the GNU Lesser General Public License as published\r\nby the Free Software Foundation; either version 3 of the License, or (at\r\nyour option) any later version.\r\n\r\nThe MPIR Library is distributed in the hope that it will be useful, but\r\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\r\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\r\nLicense for more details.\r\n\r\nYou should have received a copy of the GNU Lesser General Public License\r\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \r\n*/\r\n\r\n#pragma once\r\n\r\nusing namespace System;\r\nusing namespace System::IO;\r\nusing namespace System::Runtime::InteropServices;\r\n\r\n#ifdef SPECIALIZE_EXPRESSIONS\r\n#undef SPECIALIZE_EXPRESSIONS\r\n#undef MP\r\n#undef CUSTOM_MP\r\n#undef MPSTRUCT\r\n#undef MPTYPE\r\n#undef MPTYPE_NAME\r\n#undef MPEXPR_NAME\r\n#undef MPEXPR\r\n#undef CTXT\r\n#undef ASSIGN_TO\r\n#undef Mpt\r\n#undef SET_CONTEXT_PRECISION\r\n#endif\r\n#define SPECIALIZE_EXPRESSIONS\r\n#define Mpt Int\r\n#define CUSTOM_MP(x) custom_mpz_##x\r\n#define MPSTRUCT __mpz_struct\r\n#define MP(x) mpz_##x\r\n#define MPTYPE HugeInt\r\n#define MPTYPE_NAME Integer\r\n#define MPEXPR_NAME LIT(MPTYPE_NAME)Expression\r\n#define MPEXPR(x) LIT(MPTYPE_NAME)##x##Expression\r\n#define CTXT(x) context.IntArgs[x]\r\n#define ASSIGN_TO CONCAT(AssignTo, LIT(MPTYPE_NAME))\r\n#define SET_CONTEXT_PRECISION\r\n#include \"ExpressionMacros.h\"\r\n\r\nextern __mpz_struct HugeIntConst1;\r\n\r\nnamespace MPIR\r\n{\r\n    ref class MpirRandom;\r\n    ref class RationalExpression;\r\n    ref class FloatExpression;\r\n    ref class MPTYPE;\r\n    ref class MPEXPR(Multiply);\r\n    ref class MPEXPR(Divide);\r\n    ref class MPEXPR(DivideUi);\r\n    ref class MPEXPR(Mod);\r\n    ref class MPEXPR(DivMod);\r\n    ref class MPEXPR(ModUi);\r\n    ref class MPEXPR(ShiftRight);\r\n    ref class MPEXPR(Root);\r\n    ref class MPEXPR(SquareRoot);\r\n    ref class MPEXPR(Gcd);\r\n    ref class MPEXPR(RemoveFactors);\r\n    ref class MPEXPR(Sequence);\r\n    \r\n    #pragma region IntegerExpression\r\n\r\n    /// <summary>\r\n    /// Base class for all integer expressions resulting from many integer operations on MPIR types.\r\n    /// <para>Expressions can be arbitrarily nested, and are lazily evaluated \r\n    /// when they are either assigned to the Value property of an MPIR object, or are consumed by a function or operator that returns a primitive type.\r\n    /// </para>Assignment to the Value property is necessary because .Net does not support overloading the assignment operator.\r\n    /// </summary>\r\n    public ref class MPEXPR_NAME abstract : public IComparable, IComparable<MPEXPR_NAME^>, IEquatable<MPEXPR_NAME^>\r\n    {\r\n        internal:\r\n            MPEXPR_NAME() { }\r\n            virtual void AssignTo(MP(ptr) destination) abstract;\r\n            virtual void ASSIGN_TO(EvaluationContext& context)\r\n            {\r\n                context.Initialized(IntInitialized);\r\n                auto ptr = &context.Temp[context.Index].MPTYPE_NAME;\r\n                CTXT(context.Index++) = ptr;\r\n                MP(init)(ptr);\r\n                AssignTo(ptr); \r\n            }\r\n            virtual void AssignToRational(EvaluationContext& context)\r\n            {\r\n                context.Initialized(IntInitialized);\r\n                auto ptr = &context.Temp[context.Index].Rational;\r\n                context.RationalArgs[context.Index++] = ptr;\r\n                MP(init)(&ptr->_mp_num);\r\n                AssignTo(&ptr->_mp_num); \r\n                ptr->_mp_den = HugeIntConst1;\r\n            }\r\n            static MPEXPR_NAME()\r\n            {\r\n                MP(init_set_ui)(&HugeIntConst1, 1);\r\n            }\r\n\r\n        private:\r\n            int CompareTo(Object^ a, bool& valid);\r\n\r\n        public:\r\n            #pragma region Arithmetic and Bitwise logic\r\n\r\n            /// <summary>Adds two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to add to</param>\r\n            /// <param name=\"b\">Source value to add</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator + (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\r\n\r\n            /// <summary>Adds two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to add to</param>\r\n            /// <param name=\"b\">Source value to add</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator + (MPEXPR_NAME^ a, mpir_ui b);\r\n\r\n            /// <summary>Adds two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to add to</param>\r\n            /// <param name=\"b\">Source value to add</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator + (mpir_ui a, MPEXPR_NAME^ b);\r\n\r\n            /// <summary>Adds two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to add to</param>\r\n            /// <param name=\"b\">Source value to add</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator + (MPEXPR_NAME^ a, mpir_si b);\r\n\r\n            /// <summary>Adds two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to add to</param>\r\n            /// <param name=\"b\">Source value to add</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator + (mpir_si a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Subtracts two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to subtract from</param>\r\n            /// <param name=\"b\">Source value to subtract</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator - (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Subtracts two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to subtract from</param>\r\n            /// <param name=\"b\">Source value to subtract</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator - (MPEXPR_NAME^ a, mpir_ui b);\r\n                                                                                                          \r\n            /// <summary>Subtracts two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to subtract from</param>\r\n            /// <param name=\"b\">Source value to subtract</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator - (mpir_ui a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Subtracts two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to subtract from</param>\r\n            /// <param name=\"b\">Source value to subtract</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator - (MPEXPR_NAME^ a, mpir_si b);\r\n                                                                                                          \r\n            /// <summary>Subtracts two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to subtract from</param>\r\n            /// <param name=\"b\">Source value to subtract</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator - (mpir_si a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Multiplies two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"b\">Source value to multiply by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR(Multiply)^ operator * (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Multiplies two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"b\">Source value to multiply by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR(Multiply)^ operator * (MPEXPR_NAME^ a, mpir_ui b);\r\n                                                                                                          \r\n            /// <summary>Multiplies two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"b\">Source value to multiply by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR(Multiply)^ operator * (mpir_ui a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Multiplies two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"b\">Source value to multiply by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR(Multiply)^ operator * (MPEXPR_NAME^ a, mpir_si b);\r\n                                                                                                          \r\n            /// <summary>Multiplies two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"b\">Source value to multiply by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR(Multiply)^ operator * (mpir_si a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Shifts the <paramref name=\"a\"/> source operand to the left by <paramref name=\"bits\"/>, i.e. multiplies <paramref name=\"a\"/> by 2^<paramref name=\"bits\"/>.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"bits\">Number of bits to shift <paramref name=\"a\"/> by, i.e. power of 2 to multiply <paramref name=\"a\"/> by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator << (MPEXPR_NAME^ a, mp_bitcnt_t bits);\r\n                                                                                                          \r\n            /// <summary>Shifts the <paramref name=\"a\"/> source operand to the right by <paramref name=\"bits\"/>, i.e. divides <paramref name=\"a\"/> by 2^<paramref name=\"bits\"/>.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para>By default, the shifted value (i.e., the quotient of the division) is computed.  If instead the shifted bits (i.e., the remainder) is desired, you can call \r\n            /// the Remainder method on the resulting expression before you assign it.  You can also specify the rounding mode to use for the division by calling a method on the resulting expression.</summary>\r\n            /// <param name=\"a\">Source value to divide</param>\r\n            /// <param name=\"bits\">Number of bits to shift <paramref name=\"a\"/> by, i.e. power of 2 to divide <paramref name=\"a\"/> by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\r\n            /// <para>The expression exposes methods you can call to select whether you need to compute the quotient or remainder of the division, and/or to set the rounding mode.\r\n            /// </para>By default, the shifted value (i.e., quotient) is computed and the rounding mode defaults to the static MpirSettings.DefaultRoundingMode.\r\n            /// </returns>\r\n            static MPEXPR(ShiftRight)^ operator >> (MPEXPR_NAME^ a, mp_bitcnt_t bits);\r\n\r\n            /// <summary>Negates the source value.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to negate</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator - (MPEXPR_NAME^ a);\r\n                                                                                                          \r\n            /// <summary>Divides two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para>You can optionally save the remainder or specify the rounding mode to use for the division by calling methods on the resulting expression, before assigning it.</summary>\r\n            /// <param name=\"a\">Source value to divide</param>\r\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\r\n            /// <para>The expression exposes methods you can call optionally if you need to save the remainder of the division, and/or to set the rounding mode.\r\n            /// </para>By default, the remainder is not computed and the rounding mode defaults to the static MpirSettings.DefaultRoundingMode.\r\n            /// </returns>\r\n            static MPEXPR(Divide)^ operator / (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Divides two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para>You can optionally save the remainder or specify the rounding mode to use for the division by calling methods on the resulting expression, before assigning it.</summary>\r\n            /// <param name=\"a\">Source value to divide</param>\r\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\r\n            /// <para>The expression exposes methods you can call optionally if you need to save the remainder of the division, and/or to set the rounding mode.\r\n            /// </para>By default, the remainder is not computed and the rounding mode defaults to the static MpirSettings.DefaultRoundingMode.\r\n            /// </returns>\r\n            static MPEXPR(DivideUi)^ operator / (MPEXPR_NAME^ a, mpir_ui b);\r\n                                                                                                          \r\n            /// <summary>Calculates the remainder from the division of two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para>You can optionally save the quotient in addition to the remainder or specify the rounding mode to use for the division \r\n            /// by calling methods on the resulting expression, before assigning it.</summary>\r\n            /// <param name=\"a\">Source value to divide</param>\r\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\r\n            /// <para>The expression exposes methods you can call optionally if you need to save the quotient in addition to the remainder, and/or to set the rounding mode.\r\n            /// </para>By default, the remainder is not computed and the rounding mode defaults to the static MpirSettings.DefaultRoundingMode.\r\n            /// </returns>\r\n            static MPEXPR(Mod)^ operator % (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Calculates the remainder from the division of two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para>You can optionally save the quotient in addition to the remainder or specify the rounding mode to use for the division \r\n            /// by calling methods on the resulting expression, before assigning it.</summary>\r\n            /// <param name=\"a\">Source value to divide</param>\r\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\r\n            /// <para>The expression exposes methods you can call optionally if you need to save the quotient in addition to the remainder, and/or to set the rounding mode.\r\n            /// </para>By default, the remainder is not computed and the rounding mode defaults to the static MpirSettings.DefaultRoundingMode.\r\n            /// </returns>\r\n            static MPEXPR(ModUi)^ operator % (MPEXPR_NAME^ a, mpir_ui b);\r\n                                                                                                          \r\n            /// <summary>Raises the source value to the specified power.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to multiply</param>\r\n            /// <param name=\"power\">Power to raise <paramref name=\"a\"/> to</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator ^ (MPEXPR_NAME^ a, mpir_ui power);\r\n                                                                                                          \r\n            /// <summary>Computes the bitwise AND of two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to AND</param>\r\n            /// <param name=\"b\">Source value to AND with</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator & (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Computes the bitwise (inclusive) OR of two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to OR</param>\r\n            /// <param name=\"b\">Source value to OR with</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator | (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Computes the bitwise XOR (exclusive or) of two numbers.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to XOR</param>\r\n            /// <param name=\"b\">Source value to XOR with</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator ^ (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\r\n                                                                                                          \r\n            /// <summary>Computes the bitwise complement of a number.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to complement</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ operator ~ (MPEXPR_NAME^ a);\r\n\r\n            /// <summary>If the source is &gt;= 0, returns the population count of op, which is the number of 1 bits in the binary representation.\r\n#if BITS_PER_MP_LIMB == 64\r\n            /// <para>If the source is &lt; 0, the number of 1s is infinite, and the return value is ulong.MaxValue, the largest possible bit count.\r\n#else\r\n            /// <para>If the source is &lt; 0, the number of 1s is infinite, and the return value is uint.MaxValue, the largest possible bit count.\r\n#endif\r\n            /// </para>Because the result is a primitive type, it is computed immediately.\r\n            /// </summary>\r\n            /// <returns>The population count for a non-negative number</returns>\r\n            mp_bitcnt_t PopCount() { IN_CONTEXT(this); return MP(popcount)(CTXT(0)); }\r\n\r\n            /// <summary>If this number and the operand are both &gt;= 0 or both &lt; 0, returns the hamming distance between them, which is the number of bit positions with different bit values.\r\n#if BITS_PER_MP_LIMB == 64\r\n            /// <para>If one operand is &gt;= 0 and the other &lt; 0 then the number of bits different is infinite, and the return value is ulong.MaxValue, the largest possible bit count.\r\n#else\r\n            /// <para>If one operand is &gt;= 0 and the other &lt; 0 then the number of bits different is infinite, and the return value is uint.MaxValue, the largest possible bit count.\r\n#endif\r\n            /// </para>Because the result is a primitive type, it is computed immediately.\r\n            /// </summary>\r\n            /// <param name=\"a\">Source value to compute the hamming distance to</param>\r\n            /// <returns>The hamming distance between this number and <paramref name=\"a\"/></returns>\r\n            mp_bitcnt_t HammingDistance(MPEXPR_NAME^ a) { IN_CONTEXT(this, a); return MP(hamdist)(CTXT(0), CTXT(1)); }\r\n\r\n            /// <summary>Scans the source number, starting from the <paramref name=\"start\"/> bit, towards more significant bits, until the first 0 or 1 bit\r\n            /// (depending on the <paramref name=\"value\"/> is found, and return the index of the found bit.\r\n            /// <para>If the bit at the starting position is already what's sought, then <paramref name=\"start\"/> is returned.\r\n#if BITS_PER_MP_LIMB == 64\r\n            /// </para>If there's no bit found, then ulong.MaxValue (the largest possible bit count) is returned. \r\n#else\r\n            /// </para>If there's no bit found, then uint.MaxValue (the largest possible bit count) is returned.\r\n#endif\r\n            /// This will happen with <paramref name=\"value\"/> = true past the end of a non-negative number, or with <paramref name=\"value\"/> = false past the end of a negative number.\r\n            /// <para>A false bit will always be found at the <paramref name=\"start\"/> position past the end of a non-negatitve number, and a true bit past the end of a negative number.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">Value of the bit to scan for, true for 1, false for 0</param>\r\n            /// <param name=\"start\">Starting bit position to search.  The least significant bit is zero.</param>\r\n#if BITS_PER_MP_LIMB == 64\r\n            /// <returns>The index of the found bit, or ulong.MaxValue if no bit found.</returns>\r\n#else\r\n            /// <returns>The index of the found bit, or uint.MaxValue if no bit found.</returns>\r\n#endif\r\n            mp_bitcnt_t FindBit(bool value, mp_bitcnt_t start) { IN_CONTEXT(this); return value ? MP(scan1)(CTXT(0), start) : MP(scan0)(CTXT(0), start); }\r\n\r\n            /// <summary>Computes the absolute value of the source number.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR_NAME^ Abs();\r\n                                                                                                          \r\n            /// <summary>Divides two numbers where it is known in advance that the division is exact.  This method is faster than normal division,\r\n            /// but produces an undefined result when the division is not exact.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to divide by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\r\n            /// </returns>\r\n            MPEXPR_NAME^ DivideExactly(MPEXPR_NAME^ a);\r\n                                                                                                          \r\n            /// <summary>Divides two numbers where it is known in advance that the division is exact.  This method is faster than normal division,\r\n            /// but produces an undefined result when the division is not exact.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to divide by</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\r\n            /// </returns>\r\n            MPEXPR_NAME^ DivideExactly(mpir_ui a);\r\n                                                                                                          \r\n            /// <summary>Raises the source value to the specified <paramref name=\"power\"/> modulo <paramref name=\"modulo\"/>.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"power\">Power to raise the source value to.\r\n            /// <para>Negative power values are supported if an inverse mod <paramref name=\"modulo\"/> exists, otherwise divide by zero is raised.</para></param>\r\n            /// <param name=\"modulo\">Modulo to perform the powering operation with</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR_NAME^ PowerMod(MPEXPR_NAME^ power, MPEXPR_NAME^ modulo);\r\n                                                                                                          \r\n            /// <summary>Raises the source value to the specified <paramref name=\"power\"/> modulo <paramref name=\"modulo\"/>.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"power\">Power to raise the source value to</param>\r\n            /// <param name=\"modulo\">Modulo to perform the powering operation with</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR_NAME^ PowerMod(mpir_ui power, MPEXPR_NAME^ modulo);\r\n\r\n            /// <summary>Computes the truncated integer part of the root of the specified <paramref name=\"power\"/> from the source value.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para>You can optionally save the remainder from the root operation, or a flag indicating whether the root was exact, \r\n            /// by calling a method on the resulting expression, before assigning it.</summary>\r\n            /// <param name=\"power\">Power of the root to compute.</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR(Root)^ Root(mpir_ui power);\r\n\r\n            /// <summary>Computes the truncated integer part of the square root of the source value.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para>You can optionally save the remainder from the root operation\r\n            /// by calling a method on the resulting expression, before assigning it.</summary>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR(SquareRoot)^ SquareRoot();\r\n\r\n            /// <summary>Calculates the remainder from the division of two numbers, using the rounding mode set in MpirSettings.RoundingMode.\r\n            /// <para>Because the result is a primitive type, it is computed immediately.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">divisor to divide the source by</param>\r\n            /// <returns>The remainder of the division</returns>\r\n            mpir_ui Mod(mpir_ui a) { return Mod(a, RoundingModes::Default); }\r\n\r\n            /// <summary>Calculates the remainder from the division of two numbers, using the specified rounding mode.\r\n            /// <para>Because the result is a primitive type, it is computed immediately.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">divisor to divide the source by</param>\r\n            /// <param name=\"roundingMode\">rounding mode to use for the division</param>\r\n            /// <returns>The remainder of the division</returns>\r\n            mpir_ui Mod(mpir_ui a, RoundingModes roundingMode);\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region Comparisons\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Both this method and Equals() allow the argument to be a RationalExpression, however we do not define mixed equality operators,\r\n            /// because otherwise testing for a null/non-null expression would require an awkward explicit cast on the null.\r\n            /// <para>Although this only applies to equality operators, while comparison operators could have possibly worked, we're leaving out all mixed operators for now.\r\n            /// </para>Since comparison via CompareTo() or Equals() is possible between ints and rationals, operators would just be another way to do the same thing.</summary>\r\n            /// <param name=\"a\">Value to compare the source with.  This can be an integer or rational multi-precision number or expression, or a supported primitive type (long, ulong, or double).</param>\r\n            /// <returns>A positive number if the source is greater than <paramref name=\"a\"/>, negative if less, and zero if they are equal.</returns>\r\n            virtual int CompareTo(Object^ a) sealed;\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Value to compare the source with</param>\r\n            /// <returns>A positive number if the source is greater than <paramref name=\"a\"/>, negative if less, and zero if they are equal.</returns>\r\n            virtual int CompareTo(MPEXPR_NAME^ a) sealed;\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Value to compare the source with</param>\r\n            /// <returns>true if the values of the source and <paramref name=\"a\"/> are equal, false otherwise.</returns>\r\n            virtual bool Equals(MPEXPR_NAME^ a) sealed;\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Both this method and CompareTo() allow the argument to be a RationalExpression, however we do not define mixed equality operators,\r\n            /// because otherwise testing for a null/non-null expression would require an awkward explicit cast on the null.\r\n            /// <para>Although this only applies to equality operators, while comparison operators could have possibly worked, we're leaving out all mixed operators for now.\r\n            /// </para>Since comparison via CompareTo() or Equals() is possible between ints and rationals, operators would just be another way to do the same thing.</summary>\r\n            /// <param name=\"a\">Value to compare the source with.  This can be an integer or rational multi-precision number or expression, or a supported primitive type (long, ulong, or double).</param>\r\n            /// <returns>true if the values of the source and <paramref name=\"a\"/> are equal, false otherwise.</returns>\r\n            virtual bool Equals(Object^ a) override sealed;\r\n\r\n            /// <summary>Computes the hash code of the source value.\r\n            /// <para>If called on an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para>Multi-precision classes are mutable with value semantics.  The hash code is based on the value, and will change if the value changes.\r\n            /// For this reason, the value of an object must not be modified while the object is contained in a hash table.</summary>\r\n            /// <returns>a signed integer hash code for the value.</returns>\r\n            virtual int GetHashCode() override sealed;\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) ? !IS_NULL(b) : a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) ?  IS_NULL(b) : a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) ?  IS_NULL(b) : a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) ? !IS_NULL(b) : a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (MPEXPR_NAME^ a, mpir_ui b) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (MPEXPR_NAME^ a, mpir_ui b) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (MPEXPR_NAME^ a, mpir_ui b) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (MPEXPR_NAME^ a, mpir_ui b) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (MPEXPR_NAME^ a, mpir_ui b) { return  IS_NULL(a) || a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (MPEXPR_NAME^ a, mpir_ui b) { return !IS_NULL(a) && a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (mpir_ui b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (mpir_ui b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (mpir_ui b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (mpir_ui b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (mpir_ui b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (mpir_ui b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (MPEXPR_NAME^ a, mpir_si b) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (MPEXPR_NAME^ a, mpir_si b) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (MPEXPR_NAME^ a, mpir_si b) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (MPEXPR_NAME^ a, mpir_si b) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (MPEXPR_NAME^ a, mpir_si b) { return  IS_NULL(a) || a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (MPEXPR_NAME^ a, mpir_si b) { return !IS_NULL(a) && a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (mpir_si b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (mpir_si b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (mpir_si b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (mpir_si b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (mpir_si b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (mpir_si b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (MPEXPR_NAME^ a, double b) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (MPEXPR_NAME^ a, double b) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (MPEXPR_NAME^ a, double b) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (MPEXPR_NAME^ a, double b) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (MPEXPR_NAME^ a, double b) { return  IS_NULL(a) || a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (MPEXPR_NAME^ a, double b) { return !IS_NULL(a) && a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <  (double b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >= (double b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator >  (double b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator <= (double b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator != (double b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) != 0; }\r\n\r\n            /// <summary>Compares two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compare</param>\r\n            /// <param name=\"b\">Source value to compare with</param>\r\n            /// <returns>A boolean result of the comparison.</returns>\r\n            static bool operator == (double b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) == 0; }\r\n\r\n            /// <summary>Compares the absolute values of two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Value to compare the source with</param>\r\n            /// <returns>A positive number if the absolute value of the source is greater than the absolute value of <paramref name=\"a\"/>, negative if less, and zero if they are equal.</returns>\r\n            int CompareAbsTo(MPEXPR_NAME^ a) { IN_CONTEXT(this, a); return MP(cmpabs)(CTXT(0), CTXT(1)); }\r\n\r\n            /// <summary>Compares the absolute values of two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Value to compare the source with</param>\r\n            /// <returns>A positive number if the absolute value of the source is greater than <paramref name=\"a\"/>, negative if less, and zero if they are equal.</returns>\r\n            int CompareAbsTo(mpir_ui a) { IN_CONTEXT(this); return MP(cmpabs_ui)(CTXT(0), a); }\r\n\r\n            /// <summary>Compares the absolute values of two numbers.\r\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Value to compare the source with</param>\r\n            /// <returns>A positive number if the absolute value of the source is greater than the absolute value of <paramref name=\"a\"/>, negative if less, and zero if they are equal.</returns>\r\n            int CompareAbsTo(double a) { IN_CONTEXT(this); return MP(cmpabs_d)(CTXT(0), a); }\r\n\r\n            /// <summary>Calculates the sign (+1, 0, or -1) of the source value.\r\n            /// <para>If the source is an expression, it is evaluated into a temporary variable before the sign is computed.\r\n            /// </para></summary>\r\n            /// <returns>+1 if the source is positive, -1 if negative, and 0 if zero.</returns>\r\n            int Sign() { IN_CONTEXT(this); return MP(sgn)(CTXT(0)); }\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region Utility methods\r\n\r\n            /// <summary>\r\n            /// Checks if the source is evenly divisible by <paramref name=\"a\"/>.\r\n            /// Because this method returns a primitive type, it is computed immediately.\r\n            /// </summary>\r\n            /// <param name=\"a\">Divisor to test with.  This can be zero; only zero is considired divisible by zero.</param>\r\n            /// <returns>True if the source is evenly divisible by <paramref name=\"a\"/></returns>\r\n            bool IsDivisibleBy(MPEXPR_NAME^ a) { IN_CONTEXT(this, a); return MP(divisible_p)(CTXT(0), CTXT(1)) != 0; }\r\n\r\n            /// <summary>\r\n            /// Checks if the source is evenly divisible by <paramref name=\"a\"/>.\r\n            /// Because this method returns a primitive type, it is computed immediately.\r\n            /// </summary>\r\n            /// <param name=\"a\">Divisor to test with.  This can be zero; only zero is considired divisible by zero.</param>\r\n            /// <returns>True if the source is evenly divisible by <paramref name=\"a\"/></returns>\r\n            bool IsDivisibleBy(mpir_ui a) { IN_CONTEXT(this); return MP(divisible_ui_p)(CTXT(0), a) != 0; }\r\n\r\n            /// <summary>\r\n            /// Checks if the source is evenly divisible by 2^<paramref name=\"power\"/>.\r\n            /// Because this method returns a primitive type, it is computed immediately.\r\n            /// </summary>\r\n            /// <param name=\"power\">Power of 2 to use for the divisor</param>\r\n            /// <returns>True if the source is evenly divisible by 2^<paramref name=\"power\"/></returns>\r\n            bool IsDivisibleByPowerOf2(mp_bitcnt_t power) { IN_CONTEXT(this); return MP(divisible_2exp_p)(CTXT(0), power) != 0; }\r\n\r\n            /// <summary>\r\n            /// Checks if the source is congruent to <paramref name=\"a\"/> modulo <paramref name=\"mod\"/>.\r\n            /// Because this method returns a primitive type, it is computed immediately.\r\n            /// </summary>\r\n            /// <param name=\"a\">Divisor to test with.  This can be zero; only zero is considired divisible by zero.</param>\r\n            /// <param name=\"mod\">Modulo with respect to which to test for congruency</param>\r\n            /// <returns>True if the source is congruent to <paramref name=\"a\"/> modulo <paramref name=\"mod\"/></returns>\r\n            bool IsCongruentTo(MPEXPR_NAME^ a, MPEXPR_NAME^ mod) { IN_CONTEXT(this, a, mod); return MP(congruent_p)(CTXT(0), CTXT(1), CTXT(2)) != 0; }\r\n\r\n            /// <summary>\r\n            /// Checks if the source is congruent to <paramref name=\"a\"/> modulo <paramref name=\"mod\"/>.\r\n            /// Because this method returns a primitive type, it is computed immediately.\r\n            /// </summary>\r\n            /// <param name=\"a\">Divisor to test with.  This can be zero; only zero is considired divisible by zero.</param>\r\n            /// <param name=\"mod\">Modulo with respect to which to test for congruency</param>\r\n            /// <returns>True if the source is congruent to <paramref name=\"a\"/> modulo <paramref name=\"mod\"/></returns>\r\n            bool IsCongruentTo(mpir_ui a, mpir_ui mod) { IN_CONTEXT(this); return MP(congruent_ui_p)(CTXT(0), a, mod) != 0; }\r\n\r\n            /// <summary>\r\n            /// Checks if the source is congruent to <paramref name=\"a\"/> modulo 2^<paramref name=\"power\"/>.\r\n            /// Because this method returns a primitive type, it is computed immediately.\r\n            /// </summary>\r\n            /// <param name=\"a\">Divisor to test with</param>\r\n            /// <param name=\"power\">Power of 2 to use for the modulo</param>\r\n            /// <returns>True if the source is congruent to <paramref name=\"a\"/> modulo 2^<paramref name=\"power\"/></returns>\r\n            bool IsCongruentToModPowerOf2(MPEXPR_NAME^ a, mp_bitcnt_t power) { IN_CONTEXT(this, a); return MP(congruent_2exp_p)(CTXT(0), CTXT(1), power) != 0; }\r\n\r\n            /// <summary>\r\n            /// Checks if the source is a perfect power.\r\n            /// <para>Because this method returns a primitive type, it is computed immediately.\r\n            /// </para>Both 0 and 1 are considered perfect powers.\r\n            /// <para>Negative values are accepted, but of course can only be odd powers.\r\n            /// </para></summary>\r\n            /// <returns>True if the source is a perfect power</returns>\r\n            bool IsPerfectPower() { IN_CONTEXT(this); return MP(perfect_power_p)(CTXT(0)) != 0; }\r\n\r\n            /// <summary>\r\n            /// Checks if the source is a perfect square.\r\n            /// <para>Because this method returns a primitive type, it is computed immediately.\r\n            /// </para>Both 0 and 1 are considered perfect squares.\r\n            /// </summary>\r\n            /// <returns>True if the source is a perfect square</returns>\r\n            bool IsPerfectSquare() { IN_CONTEXT(this); return MP(perfect_square_p)(CTXT(0)) != 0; }\r\n\r\n            /// <summary>\r\n            /// Returns the size of the absolute value of the number, measured in number of limbs.\r\n            /// <para>If op is zero, the returned value will be zero.\r\n            /// </para>For a HugeInt object, this method simply retrieves the value from a stored private field.\r\n            /// <para>However for an expression it can potentially be very expensive, since it requires evaluation of the expression result.\r\n            /// </para>Because this method returns a primitive type, it is computed immediately.\r\n            /// </summary>\r\n            /// <returns>The number of limbs used to represent the number</returns>\r\n            size_t Size() { IN_CONTEXT(this); return MP(size)(CTXT(0)); }\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region Number-theoretic\r\n\r\n            /// <summary>\r\n            /// Looks for the next candidate prime greater than this number.\r\n            /// <para>Note that this function will occasionally return composites.\r\n            /// It is designed to give a quick method for generating numbers which do not have small prime factors (less than 1000)\r\n            /// and which pass a small number of rounds of Miller-Rabin (just two rounds).\r\n            /// </para>The test is designed for speed, assuming that a high quality followup test can then be run to ensure primality.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"random\">Random number generator to use for probabilistic primality tests</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, performs the requested operation</returns>\r\n            MPEXPR_NAME^ NextPrimeCandidate(MpirRandom^ random);\r\n\r\n            /// <summary>Computes the greatest common divisor of this number and <paramref name=\"a\"/>.\r\n            /// <para>The result is always positive even if one or both inputs are negative (or zero if both inputs are zero).\r\n            /// </para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </summary>\r\n            /// <param name=\"a\">Source value to compute the GCD with</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR(Gcd)^ Gcd(MPEXPR_NAME^ a);\r\n\r\n            /// <summary>\r\n            /// Computes the greatest common divisor of this number and <paramref name=\"a\"/>.\r\n            /// <para>The result is always positive even if the source number is negative.\r\n            /// </para>Because the result is a primitive type, it is computed immediately.\r\n            /// </summary>\r\n            /// <param name=\"a\">Source value to compute the GCD with.  If zero, zero is returned.</param>\r\n            /// <returns>The greatest common divisor of the absolute value of this number and <paramref name=\"a\"/>.</returns>\r\n            mpir_ui Gcd(mpir_ui a) { IN_CONTEXT(this); return MP(gcd_ui)(nullptr, CTXT(0), a); }\r\n                                                                                                          \r\n            /// <summary>Computes the least common multiple of this number and <paramref name=\"a\"/>.\r\n            /// <para>The result is always positive, irrespective of the signs of the source numbers.\r\n            /// </para>The result will be zero if either source number is zero.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compute the LCM with.</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR_NAME^ Lcm(MPEXPR_NAME^ a);\r\n                                                                                                          \r\n            /// <summary>Computes the least common multiple of this number and <paramref name=\"a\"/>.\r\n            /// <para>The result is always positive, irrespective of the signs of the source numbers.\r\n            /// </para>The result will be zero if either source number is zero.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">Source value to compute the LCM with.</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR_NAME^ Lcm(mpir_ui a);\r\n                                                                                                          \r\n            /// <summary>Computes the inverse of this number modulo <paramref name=\"modulo\"/>.\r\n            /// <para>If the inverse exists, the result will satisfy 0 &lt;= result &lt; <paramref name=\"modulo\"/>.\r\n            /// </para>If an inverse doesn't exist an ArgumentException is thrown.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"modulo\">Modulo with respect to which to invert the number.</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR_NAME^ Invert(MPEXPR_NAME^ modulo);\r\n\r\n            /// <summary>Remove all occurrences of the <paramref name=\"factor\"/> from the source number.\r\n            /// <para>You can optionally save the number of such occurrences that were removed.\r\n            /// </para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </summary>\r\n            /// <param name=\"factor\">Factor to remove from the source number.</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            MPEXPR(RemoveFactors)^ RemoveFactors(MPEXPR_NAME^ factor);\r\n\r\n            #pragma endregion\r\n    };\r\n\r\n    #pragma endregion\r\n\r\n    #pragma region MpirSettings\r\n\r\n    /// <summary>\r\n    /// Static class for MPIR settings such as rounding defaults\r\n    /// </summary>\r\n    public ref class MpirSettings abstract sealed\r\n    {\r\n        private:\r\n            static int _toStringDigits;\r\n\r\n        internal:\r\n            static MPTYPE^ _toStringModulo;\r\n\r\n        public:\r\n\r\n#undef GMP_VERSION\r\n\r\n            /// <summary>\r\n            /// Represents the total number of bits in a single MPIR limb, including data bits and nail bits\r\n            /// </summary>\r\n            literal int BITS_PER_LIMB = BITS_PER_UI;\r\n\r\n            /// <summary>\r\n            /// Represents the number of nail bits in a single MPIR limb.  Nail bits are used internally by MPIR\r\n            /// </summary>\r\n            literal int NAIL_BITS_PER_LIMB = GMP_NAIL_BITS;\r\n\r\n            /// <summary>\r\n            /// Represents the number of data bits in a single MPIR limb\r\n            /// </summary>\r\n            literal int USABLE_BITS_PER_LIMB = GMP_NUMB_BITS;\r\n\r\n            /// <summary>\r\n            /// Represents the version of GMP with which the underlying MPIR library is compatible\r\n            /// </summary>\r\n            static initonly const Version^ GMP_VERSION = gcnew Version(__GNU_MP_VERSION, __GNU_MP_VERSION_MINOR, __GNU_MP_VERSION_PATCHLEVEL);\r\n\r\n            /// <summary>\r\n            /// Represents the version of the underlying MPIR library\r\n            /// </summary>\r\n            static initonly const Version^ MPIR_VERSION = gcnew Version(_MSC_MPIR_VERSION);\r\n\r\n            /// <summary>\r\n            /// Gets or sets the default rounding mode used for MPIR division operations that don't explicitly specify a rounding mode.\r\n            /// <para>Defaults to Truncate.\r\n            /// </para></summary>\r\n            static property RoundingModes RoundingMode;\r\n\r\n            /// <summary>\r\n            /// Gets or sets the maximum number of digits the object.ToString() method override will output.  \r\n            /// <para>If a number is longer than this number of digits, it will be output as \"[-]...NNNNN\" with the least significant digits shown.\r\n            /// </para>Defaults to 256.\r\n            /// <para>Specifying 0 will cause all digits to be output.  This should be used with care, as for example, the debugger calls ToString()\r\n            /// on watched objects, and may have performance issues with large objects.\r\n            /// </para></summary>\r\n            static property int ToStringDigits\r\n            {\r\n                int get() { return _toStringDigits; }\r\n                void set(int value);\r\n            }\r\n\r\n            static MpirSettings()\r\n            {\r\n                RoundingMode = RoundingModes::Truncate;\r\n                ToStringDigits = 256;\r\n            }\r\n    };\r\n\r\n    #pragma endregion\r\n\r\n    #pragma region mid-level abstract expression specializations\r\n\r\n    /// <summary>\r\n    /// Expression that results from a multiplication operator.  Used internally to optimize computations that can use addmul/submul MPIR calls.\r\n    /// </summary>\r\n    public ref class MPEXPR(Multiply) abstract : MPEXPR_NAME\r\n    {\r\n        internal:\r\n            MPEXPR(Multiply)() { }\r\n            virtual void AddTo(MP(ptr) destination) abstract;\r\n            virtual void SubtractFrom(MP(ptr) destination) abstract;\r\n    };\r\n\r\n    /// <summary>\r\n    /// Expression that results from a division or modulo operator.  Allows to set the rounding mode for the division.\r\n    /// </summary>\r\n    public ref class MPEXPR(DivMod) abstract : MPEXPR_NAME \r\n    {\r\n        internal:\r\n            MPEXPR(DivMod)() { }\r\n            RoundingModes rounding;\r\n\r\n        public:\r\n            /// <summary>\r\n            /// Optionally sets the rounding mode for the division.  If not set, the static MpirSettings.RoundingMode will be used.\r\n            /// </summary>\r\n            /// <param name=\"mode\">the mode to use.  If this is Default, the static MpirSettings.Rounding mode is used.</param>\r\n            /// <returns>An updated expression, with its internal state updated to use the specified rounding mode.</returns>\r\n            MPEXPR_NAME^ Rounding(RoundingModes mode)\r\n            {\r\n                rounding = mode;\r\n                return this;\r\n            }\r\n    };\r\n\r\n    /// <summary>\r\n    /// Expression that results from a division operator.  Allows to save the remainder to a separate result object, and/or set the rounding mode for the division.\r\n    /// </summary>\r\n    public ref class MPEXPR(Divide) abstract : MPEXPR(DivMod) \r\n    {\r\n        internal:\r\n            MPEXPR(Divide)() { }\r\n            MPTYPE^ _remainder;\r\n            void CUSTOM_MP(div)(MP(ptr) q, MP(srcptr) n, MP(srcptr) d);\r\n\r\n        public:\r\n            /// <summary>\r\n            /// Optionally save the remainder of the division to a separate result.  This cannot be the same object the resulting division quotient is being assigned to.\r\n            /// </summary>\r\n            /// <param name=\"destination\">destination for the remainder.  This cannot be the same object the resulting division quotient is being assigned to.</param>\r\n            /// <returns>An updated expression, with its internal state updated to compute the remainder.</returns>\r\n            MPEXPR(DivMod)^ SavingRemainderTo(MPTYPE^ destination)\r\n            {\r\n                _remainder = destination;\r\n                return this;\r\n            }\r\n    };\r\n\r\n    /// <summary>\r\n    /// Expression that results from a division operator.  Allows to save the remainder, and/or set the rounding mode for the division.\r\n    /// </summary>\r\n    public ref class MPEXPR(DivideUi) abstract : MPEXPR(Divide) \r\n    {\r\n        private:\r\n            Action<mpir_ui>^ _limbRemainder;\r\n\r\n        internal:\r\n            MPEXPR(DivideUi)() { }\r\n            void CUSTOM_MP(div_ui)(MP(ptr) q, MP(srcptr) n, mpir_ui d);\r\n\r\n        public:\r\n            /// <summary>\r\n            /// Optionally saves the remainder of the division to a separate result.\r\n            /// </summary>\r\n            /// <param name=\"callback\">The delegate that will be called with the remainder of the division.  \r\n            /// The delegate is called when the division is evaluated, i.e. is assigned to the Value property or consumed by a method that returns a primitive type.</param>\r\n            /// <returns>An updated expression, with its internal state updated to compute the remainder.</returns>\r\n            MPEXPR(Divide)^ SettingRemainderTo(Action<mpir_ui>^ callback)\r\n            {\r\n                _limbRemainder = callback;\r\n                return this;\r\n            }\r\n    };\r\n\r\n    /// <summary>\r\n    /// Expression that results from a modulo operator.  Allows to save the division result to a separate object, and/or set the rounding mode for the division.\r\n    /// </summary>\r\n    public ref class MPEXPR(Mod) abstract : MPEXPR(DivMod) \r\n    {\r\n        internal:\r\n            MPEXPR(Mod)() { }\r\n            MPTYPE^ _quotient;\r\n            void CUSTOM_MP(mod)(MP(ptr) r, MP(srcptr) n, MP(srcptr) d);\r\n\r\n        public:\r\n            /// <summary>\r\n            /// Optionally save the quotient of the division to a separate result.  This cannot be the same object the resulting division modulo is being assigned to.\r\n            /// </summary>\r\n            /// <param name=\"destination\">destination for the quotient.  This cannot be the same object the resulting division modulo is being assigned to.</param>\r\n            /// <returns>An updated expression, with its internal state updated to compute the quotient.</returns>\r\n            MPEXPR(DivMod)^ SavingQuotientTo(MPTYPE^ destination)\r\n            {\r\n                _quotient = destination;\r\n                return this;\r\n            }\r\n    };\r\n\r\n    /// <summary>\r\n    /// Expression that results from a modulo operator.  Allows to save the division result to a separate object, and/or set the rounding mode for the division.\r\n    /// </summary>\r\n    public ref class MPEXPR(ModUi) abstract : MPEXPR(Mod) \r\n    {\r\n        private:\r\n            Action<mpir_ui>^ _limbRemainder;\r\n\r\n        internal:\r\n            MPEXPR(ModUi)() { }\r\n            void CUSTOM_MP(mod_ui)(MP(ptr) r, MP(srcptr) n, mpir_ui d);\r\n\r\n        public:\r\n            /// <summary>\r\n            /// Optionally saves the remainder of the division to a separate result.\r\n            /// </summary>\r\n            /// <param name=\"callback\">The delegate that will be called with the remainder of the division.  \r\n            /// The delegate is called when the division is evaluated, i.e. is assigned to the Value property or consumed by a method that returns a primitive type.</param>\r\n            /// <returns>An updated expression, with its internal state updated to compute the remainder.</returns>\r\n            MPEXPR(Mod)^ SettingRemainderTo(Action<mpir_ui>^ callback)\r\n            {\r\n                _limbRemainder = callback;\r\n                return this;\r\n            }\r\n    };\r\n\r\n    /// <summary>\r\n    /// Expression that results from a right shift operator.  Allows to save the remainder to a separate result object, and/or set the rounding mode for the division.\r\n    /// </summary>\r\n    public ref class MPEXPR(ShiftRight) abstract : MPEXPR(DivMod)\r\n    {\r\n        private:\r\n            bool _remainder;\r\n\r\n        internal:\r\n            MPEXPR(ShiftRight)() { }\r\n            void CUSTOM_MP(div_2exp)(MP(ptr) q, MP(srcptr) n, mp_bitcnt_t bits);\r\n\r\n        public:\r\n            /// <summary>\r\n            /// Computes the remainder of the division, rather than the quotient, which is the default.\r\n            /// </summary>\r\n            /// <returns>An updated expression, with its internal state updated to compute the remainder, rather than the quotient.</returns>\r\n            MPEXPR(DivMod)^ Remainder()\r\n            {\r\n                _remainder = true;\r\n                return this;\r\n            }\r\n    };\r\n\r\n    /// <summary>\r\n    /// Expression that results from a square root function.  Allows to save the remainder to a separate result object.\r\n    /// </summary>\r\n    public ref class MPEXPR(SquareRoot) abstract : MPEXPR_NAME \r\n    {\r\n        internal:\r\n            MPEXPR(SquareRoot)() { }\r\n            MPTYPE^ _remainder;\r\n            void CUSTOM_MP(sqrt)(MP(ptr) dest, MP(srcptr) oper);\r\n\r\n        public:\r\n            /// <summary>\r\n            /// Optionally saves the remainder of the root operation to a separate result.\r\n            /// </summary>\r\n            /// <param name=\"destination\">destination for the remainder.  This cannot be the same object the result of the root operation is being assigned to.</param>\r\n            /// <returns>An updated expression, with its internal state updated to save the remainder.</returns>\r\n            MPEXPR_NAME^ SavingRemainderTo(MPTYPE^ destination)\r\n            {\r\n                _remainder = destination;\r\n                return this;\r\n            }\r\n    };\r\n\r\n    /// <summary>\r\n    /// Expression that results from a root function.  Allows to save a flag indicating whether the root was exact, or to save the remainder to a separate result object.\r\n    /// </summary>\r\n    public ref class MPEXPR(Root) abstract : MPEXPR(SquareRoot) \r\n    {\r\n        private:\r\n            Action<bool>^ _exact;\r\n\r\n        internal:\r\n            MPEXPR(Root)() { }\r\n            void CUSTOM_MP(root)(MP(ptr) dest, MP(srcptr) oper, mpir_ui power);\r\n\r\n        public:\r\n            /// <summary>\r\n            /// Optionally gets a flag indicating whether the root operation was exact.\r\n            /// </summary>\r\n            /// <param name=\"callback\">Delegate that will be called with the exact flag.\r\n            /// The delegate is called when the root operation is evaluated, i.e. is assigned to the Value property or consumed by a method that returns a primitive type.</param>\r\n            /// <returns>An updated expression, with its internal state updated to compute the exact flag.</returns>\r\n            MPEXPR_NAME^ SettingExactTo(Action<bool>^ callback)\r\n            {\r\n                _exact = callback;\r\n                return this;\r\n            }\r\n    };\r\n\r\n    /// <summary>\r\n    /// Expression that results from a Gcd method.  Allows to additionally compute Diophantine equation multiplier(s).\r\n    /// </summary>\r\n    public ref class MPEXPR(Gcd) abstract : MPEXPR_NAME \r\n    {\r\n        internal:\r\n            MPEXPR(Gcd)() { }\r\n            MPTYPE^ _s;\r\n            MPTYPE^ _t;\r\n            void CUSTOM_MP(gcd)(MP(ptr) dest, MP(srcptr) a, MP(srcptr) b);\r\n\r\n        public:\r\n            /// <summary>\r\n            /// Optionally computes and saves the coefficients <paramref name=\"s\"/> and <paramref name=\"t\"/> such that a<paramref name=\"s\"/> + b<paramref name=\"t\"/> = g = gcd(a, b).\r\n            /// <para>If only one of the coefficients is needed, use null for the other.\r\n            /// </para>The values <paramref name=\"s\"/> and <paramref name=\"t\"/> are chosen such that normally, |<paramref name=\"s\"/>| &lt; |b|/(2g) and |<paramref name=\"t\"/>| &lt; |a|/(2g),\r\n            /// and these relations define <paramref name=\"s\"/> and <paramref name=\"t\"/> uniquely.\r\n            /// <para>There are a few exceptional cases:\r\n            /// </para>If |a| = |b|, then <paramref name=\"s\"/> = 0 and <paramref name=\"t\"/> = sgn(b).\r\n            /// <para>Otherwise, <paramref name=\"s\"/> = sgn(a) if b = 0 or |b| = 2g, and <paramref name=\"t\"/> = sgn(b) if a = 0 or |a| = 2g.\r\n            /// </para>In all cases, <paramref name=\"s\"/> = 0 if and only if g = |b|, i.e., if b divides a or a = b = 0.</summary>\r\n            /// <param name=\"s\">destination for the first coefficient. Can be null if not needed.</param>\r\n            /// <param name=\"t\">destination for the second coefficient. Can be null if not needed.</param>\r\n            /// <returns>An updated expression, with its internal state updated to save the coefficients.</returns>\r\n            MPEXPR_NAME^ SavingDiophantineMultipliersTo(MPTYPE^ s, MPTYPE^ t)\r\n            {\r\n                _s = s;\r\n                _t = t;\r\n                return this;\r\n            }\r\n    };\r\n\r\n    /// <summary>\r\n    /// Expression that results from a RemoveFactors method.  Allows to additionally save the number of factors that were removed.\r\n    /// </summary>\r\n    public ref class MPEXPR(RemoveFactors) abstract : MPEXPR_NAME \r\n    {\r\n        internal:\r\n            MPEXPR(RemoveFactors)() { }\r\n            Action<mp_bitcnt_t>^ _count;\r\n\r\n        public:\r\n            /// <summary>\r\n            /// Optionally gets the number of factors removed.\r\n            /// </summary>\r\n            /// <param name=\"callback\">Delegate that will be called with the number of factors that were removed.\r\n            /// The delegate is called when the root operation is evaluated, i.e. is assigned to the Value property or consumed by a method that returns a primitive type.</param>\r\n            /// <returns>An updated expression, with its internal state updated to save the number of factors.</returns>\r\n            MPEXPR_NAME^ SavingCountRemovedTo(Action<mp_bitcnt_t>^ callback)\r\n            {\r\n                _count = callback;\r\n                return this;\r\n            }\r\n    };\r\n\r\n    /// <summary>\r\n    /// Expression that results from a method calculating a single number from a sequence, such as a fibonacci or lucas number.  Allows to save the previous number in addition to the requested one, so that the sequence can be continued.\r\n    /// </summary>\r\n    public ref class MPEXPR(Sequence) abstract : MPEXPR_NAME \r\n    {\r\n        internal:\r\n            MPEXPR(Sequence)() { }\r\n            MPTYPE^ _previous;\r\n\r\n        public:\r\n            /// <summary>\r\n            /// Optionally save the previous number in the sequence to a separate result.  This cannot be the same object to which the expression is assigned.\r\n            /// </summary>\r\n            /// <param name=\"destination\">destination for the previous number.  This cannot be the same object to which the expression is assigned.</param>\r\n            /// <returns>An updated expression, with its internal state updated to additionally compute the previous number.</returns>\r\n            MPEXPR_NAME^ SavingPreviousTo(MPTYPE^ destination)\r\n            {\r\n                _previous = destination;\r\n                return this;\r\n            }\r\n    };\r\n\r\n    #pragma endregion\r\n\r\n    #pragma region concrete expressions\r\n\r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Add, Int)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Add, Int, Ui)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Add, Int, Si)\r\n                                                   \r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Subtract, Int)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Subtract, Int, Ui)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_LEFT    (MPEXPR_NAME, Subtract, Ui, Int)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Subtract, Int, Si)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_LEFT    (MPEXPR_NAME, Subtract, Si, Int)\r\n\r\n#undef ADDITIONAL_MEMBERS\r\n#define ADDITIONAL_MEMBERS                                   \\\r\n    void virtual AddTo(MP(ptr) destination) override;        \\\r\n    void virtual SubtractFrom(MP(ptr) destination) override;\r\n\r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR(Multiply), Multiply, Int)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR(Multiply), Multiply, Int, Ui)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR(Multiply), Multiply, Int, Si)\r\n\r\n#undef ADDITIONAL_MEMBERS\r\n#define ADDITIONAL_MEMBERS\r\n\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, ShiftLeft, Int, Bits)\r\n                                                   \r\n    DEFINE_UNARY_EXPRESSION_WITH_ONE               (MPEXPR_NAME, Negate, Int)\r\n    DEFINE_UNARY_EXPRESSION_WITH_ONE               (MPEXPR_NAME, Abs, Int)\r\n                                                   \r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR(Divide), Divide, Int)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR(DivideUi), Divide, Int, Ui)\r\n                                                   \r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR(ShiftRight), ShiftRight, Int, Bits)\r\n                                                   \r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR(Mod), Mod, Int)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR(ModUi), Mod, Int, Ui)\r\n                                                   \r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, DivideExactly, Int)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, DivideExactly, Int, Ui)\r\n\r\n    DEFINE_TERNARY_EXPRESSION_WITH_THREE           (MPEXPR_NAME, PowerMod, Int)\r\n    DEFINE_TERNARY_EXPRESSION_WITH_BUILT_IN_MIDDLE (MPEXPR_NAME, PowerMod, Int, Ui, Int)\r\n\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Power, Int, Ui)\r\n\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR(Root), Root, Int, Ui)\r\n    DEFINE_UNARY_EXPRESSION_WITH_ONE               (MPEXPR(SquareRoot), SquareRoot, Int)\r\n\r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, And, Int)\r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Or, Int)\r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Xor, Int)\r\n    DEFINE_UNARY_EXPRESSION_WITH_ONE               (MPEXPR_NAME, Complement, Int)\r\n\r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Invert, Int)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, NextPrimeCandidate, Int, Rnd)\r\n\r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR(Gcd), Gcd, Int)\r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR(RemoveFactors), RemoveFactors, Int)\r\n\r\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Lcm, Int)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Lcm, Int, Ui)\r\n\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_INS_ONLY   (MPEXPR_NAME, Power, Ui, Ui)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_INS_ONLY   (MPEXPR_NAME, Factorial, Ui, Ui)\r\n    DEFINE_UNARY_EXPRESSION_WITH_BUILT_INS_ONLY    (MPEXPR_NAME, Primorial, Ui)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_INS_ONLY   (MPEXPR_NAME, Binomial, Ui, Ui)\r\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Binomial, Int, Ui)\r\n    DEFINE_UNARY_EXPRESSION_WITH_BUILT_INS_ONLY    (MPEXPR(Sequence), Fibonacci, Ui)\r\n    DEFINE_UNARY_EXPRESSION_WITH_BUILT_INS_ONLY    (MPEXPR(Sequence), Lucas, Ui)\r\n\r\n    DEFINE_BINARY_EXPRESSION(MPEXPR_NAME, RandomBits, MpirRandom^, mp_bitcnt_t)\r\n    DEFINE_BINARY_EXPRESSION(MPEXPR_NAME, RandomBitsChunky, MpirRandom^, mp_bitcnt_t)\r\n    DEFINE_BINARY_EXPRESSION(MPEXPR_NAME, Random, MpirRandom^, MPEXPR_NAME^)\r\n\r\n    #pragma endregion\r\n\r\n    #pragma region HugeInt class\r\n\r\n    /// <summary>\r\n    /// Multi-precision Integer class.\r\n    /// </summary>\r\n    public ref class MPTYPE : MPEXPR_NAME\r\n    {\r\n        internal:\r\n            //fields\r\n            MP(ptr) _value;\r\n\r\n        private:\r\n            //construction\r\n            void AllocateStruct()\r\n            {\r\n                _value = (MP(ptr))((*__gmp_allocate_func)(sizeof(MPSTRUCT)));\r\n            }\r\n            void FromString(String^ value, int base);\r\n            MPTYPE(bool initialize);\r\n\r\n        internal:\r\n            String^ ToString(int base, bool lowercase, int maxDigits);\r\n            virtual void DeallocateStruct()\r\n            {\r\n                MP(clear)(_value);\r\n                (*__gmp_free_func)(_value, sizeof(MPSTRUCT));\r\n                _value = nullptr;\r\n            }\r\n\r\n            //assignment\r\n            virtual void AssignTo(MP(ptr) destination) override\r\n            {\r\n                if(destination != _value)\r\n                    MP(set)(destination, _value);\r\n            }\r\n            virtual void ASSIGN_TO(EvaluationContext& context) override\r\n            {\r\n                CTXT(context.Index++) = _value;\r\n            }\r\n\r\n        public:\r\n            #pragma region construction and disposal\r\n\r\n            /// <summary>\r\n            /// Initializes a new integer instance and sets its value to 0\r\n            /// </summary>\r\n            MPTYPE();\r\n\r\n            /// <summary>\r\n            /// Initializes a new integer instance, allocating enough memory to hold at least <paramref name=\"bits\"/> bits, and sets its value to 0.\r\n            /// <para>This is only the initial space, integer will grow automatically in the normal way, if necessary, for subsequent values stored.\r\n            /// </para>This makes it possible to avoid repeated reallocations if a maximum size is known in advance.\r\n            /// </summary>\r\n            /// <param name=\"bits\">Minimum number of bits the initially allocated memory should hold</param>\r\n            /// <returns>the newly constructed instance</returns>\r\n            static MPTYPE^ Allocate(mp_bitcnt_t bits);\r\n\r\n            /// <summary>\r\n            /// Initializes a new integer instance and sets its value from the specified string, using leading characters to recognize the base:\r\n            /// 0x and 0X for hexadecimal, 0b and 0B for binary, 0 for octal, or decimal otherwise.\r\n            /// </summary>\r\n            /// <param name=\"value\">string representing the initial value for the new instance.  Whitespace in the string is ignored.</param>\r\n            MPTYPE(String^ value) { FromString(value, 0); }\r\n\r\n            /// <summary>\r\n            /// Initializes a new integer instance and sets its value from the specified string\r\n            /// </summary>\r\n            /// <param name=\"value\">string representing the initial value for the new instance.  Whitespace in the string is ignored.</param>\r\n            /// <param name=\"base\">base the <paramref name=\"value\"/> string is in.\r\n            /// <para>The base may vary from 2 to 62, or if base is 0, then the leading characters are used: 0x and 0X for hexadecimal, 0b and 0B for binary, 0 for octal, or decimal otherwise.\r\n            /// </para>For bases up to 36, case is ignored; upper-case and lower-case letters have the same value. \r\n            /// For bases 37 to 62, upper-case letter represent the usual 10..35 while lower-case letter represent 36..61.</param>\r\n            MPTYPE(String^ value, int base) { FromString(value, base); }\r\n\r\n            /// <summary>\r\n            /// Initializes a new integer instance and sets its value to the result of computing the source expression.\r\n            /// </summary>\r\n            /// <param name=\"value\">the expression that will be computed, and the result set as the initial value of the new instance.</param>\r\n            MPTYPE(MPEXPR_NAME^ value);\r\n\r\n            /// <summary>\r\n            /// Constructs and returns a new integer instance with its value set to the <paramref name=\"value\"/> parameter.\r\n            /// </summary>\r\n            /// <param name=\"value\">Initial value for the new integer instance</param>\r\n            MPTYPE(mpir_si value);\r\n\r\n            /// <summary>\r\n            /// Constructs and returns a new integer instance with its value set to the <paramref name=\"value\"/> parameter.\r\n            /// </summary>\r\n            /// <param name=\"value\">Initial value for the new integer instance</param>\r\n            MPTYPE(mpir_ui value);\r\n\r\n            /// <summary>\r\n            /// Constructs and returns a new integer instance with its value set to the <paramref name=\"value\"/> parameter.\r\n            /// </summary>\r\n            /// <param name=\"value\">Initial value for the new integer instance.  Any fractional portion is truncated.</param>\r\n            MPTYPE(double value);\r\n\r\n            /// <summary>\r\n            /// Change the space allocated for integer to <paramref name=\"bits\"/> bits. The value in integer is preserved if it fits, or is set to 0 if not.\r\n            /// <para>This function can be used to increase the space for a variable in order to avoid repeated automatic reallocations, or to decrease it to give memory back to the heap.\r\n            /// </para></summary>\r\n            /// <param name=\"bits\">Minimum number of bits the allocated memory should hold</param>\r\n            void Reallocate(mp_bitcnt_t bits) { MP(realloc2)(_value, bits); }\r\n\r\n            //disposal\r\n\r\n            //creating a destructor in C++ implements IDisposable.\r\n\r\n            /// <summary>\r\n            /// Frees all memory allocated by the instance.\r\n            /// <para>To minimize memory footprint, multi-precision objects should be disposed of when no longer used, instead of relying on the garbage collector to free the memory.\r\n            /// </para></summary>\r\n            ~MPTYPE() { this->!MPTYPE(); }\r\n\r\n            /// <summary>\r\n            /// Frees all memory allocated by the instance.\r\n            /// <para>To minimize memory footprint, multi-precision objects should be disposed of when no longer used, instead of relying on the garbage collector to free the memory.\r\n            /// </para></summary>\r\n            !MPTYPE() { if(_value != 0) DeallocateStruct(); }\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region Properties\r\n\r\n            /// <summary>\r\n            /// Gets the number of limbs currently allocated. This value will never be less than Size().\r\n            /// <para>When a new value is assigned to the object, more space is automatically allocated if necessary.\r\n            /// </para>Reallocate() can also be used manually.\r\n            /// </summary>\r\n            property int AllocatedSize\r\n            {\r\n                int get() { return _value->_mp_alloc; }\r\n            }\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region conversions\r\n\r\n            /// <summary>\r\n            /// Converts the number to a string.\r\n            /// <para>To avoid debugging performance problems, this method outputs at most the number of digits specified in MpirSettings.ToStringDigits.\r\n            /// If the number is larger, the least significant digits are shown with a leading ellipsis (i.e., [-]...NNNNN)\r\n            /// </para>Setting MpirSettings.ToStringDigits to 0 removes the upper limit.\r\n            /// </summary>\r\n            /// <returns>A string representation of the number in decimal, possibly cut off if the number has more digits than MpirSettings.ToStringDigits.</returns>\r\n            virtual String^ ToString() override { return ToString(10, false, MpirSettings::ToStringDigits); }\r\n\r\n            /// <summary>\r\n            /// Converts the number to a string in the specified base.\r\n            /// <para>This method always produces the complete output regardless of the MpirSettings.ToStringDigits setting.\r\n            /// </para></summary>\r\n            /// <param name=\"base\">The base to use for the output.  The base can be from 2 to 62; uppercase letters represent digits 10-35 and lowercase letters represent digits 36-61.</param>\r\n            /// <returns>A string representation of the number in the specified base.</returns>\r\n            String^ ToString(int base) { return ToString(base, false, 0); }\r\n\r\n            /// <summary>\r\n            /// Converts the number to a string in the specified base.\r\n            /// <para>This method always produces the complete output regardless of the MpirSettings.ToStringDigits setting.\r\n            /// </para></summary>\r\n            /// <param name=\"base\">The base to use for the output.\r\n            /// <para>The base can be from 2 to 62; Bases up to 36 use uppercase or lowercase letters based on the <paramref name=\"lowercase\"/> argument.\r\n            /// </para>For bases larger than 36, the <paramref name=\"lowercase\"/> argument is ignored and uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</param>\r\n            /// <param name=\"lowercase\">Indicates if lowercase or uppercase letters should be used for the output.\r\n            /// <para>This argument is ignored for bases larger than 36, where both uppercase and lowercase letters are used.</para></param>\r\n            /// <returns>A string representation of the number in the specified base.</returns>\r\n            String^ ToString(int base, bool lowercase) { return ToString(base, lowercase, 0); }\r\n\r\n#if BITS_PER_MP_LIMB == 64\r\n            /// <summary>\r\n            /// Returns the absolute value of the number as a ulong.\r\n            /// <para>If the number is too big, then just the least significant bits that do fit are returned.\r\n            /// </para>The sign of the number is ignored, only the absolute value is used.\r\n            /// <para>This method is supported only on 64-bit builds</para>\r\n            /// </summary>\r\n            /// <returns>The absolute value as a ulong, possibly truncated to the least significant bits only.</returns>\r\n            mpir_ui ToUlong() { return MP(get_ui)(_value); }\r\n\r\n            /// <summary>\r\n            /// Returns the value of the number as a long.\r\n            /// <para>If the number is too big, then just the least significant bits that do fit are returned, with the same sign as the number.\r\n            /// </para>When truncation occurs, the result is propobly not very useful.  Call FitsLong() to check if the number will fit.\r\n            /// <para>This method is supported only on 64-bit builds</para>\r\n            /// </summary>\r\n            /// <returns>The value as a long, possibly truncated to the least significant bits only.</returns>\r\n            mpir_si ToLong() { return MP(get_si)(_value); }\r\n#else\r\n            /// <summary>\r\n            /// Returns the absolute value of the number as a uint.\r\n            /// <para>If the number is too big, then just the least significant bits that do fit are returned.\r\n            /// </para>The sign of the number is ignored, only the absolute value is used.\r\n            /// <para>This method is supported only on 32-bit builds</para>\r\n            /// </summary>\r\n            /// <returns>The absolute value as a uint, possibly truncated to the least significant bits only.</returns>\r\n            mpir_ui ToUint() { return MP(get_ui)(_value); }\r\n\r\n            /// <summary>\r\n            /// Returns the value of the number as an int.\r\n            /// <para>If the number is too big, then just the least significant bits that do fit are returned, with the same sign as the number.\r\n            /// </para>When truncation occurs, the result is propobly not very useful.  Call FitsInt() to check if the number will fit.\r\n            /// <para>This method is supported only on 32-bit builds</para>\r\n            /// </summary>\r\n            /// <returns>The value as an int, possibly truncated to the least significant bits only.</returns>\r\n            mpir_si ToInt() { return MP(get_si)(_value); }\r\n#endif\r\n\r\n            /// <summary>\r\n            /// Returns the value of the number as a double, truncating if necessary (rounding towards zero).\r\n            /// <para>If the exponent from the conversion is too big, the result is system dependent. An infinity is returned where available. \r\n            /// A hardware overflow trap may or may not occur.\r\n            /// </para></summary>\r\n            /// <returns>The value as a double, possibly truncated.</returns>\r\n            double ToDouble() { return MP(get_d)(_value); }\r\n\r\n            /// <summary>\r\n            /// Returns the value of the number as a double, truncating if necessary (rounding towards zero), and returning the exponent separately.\r\n            /// <para>The return is the mantissa, its absolute value will be in the range [0.5 - 1).\r\n            /// </para>If the source value is zero, both mantissa and exponent are returned as 0.\r\n            /// </summary>\r\n            /// <param name=\"exp\">variable to store the exponent in.</param>\r\n            /// <returns>The mantissa of the value as a double, possibly truncated.</returns>\r\n            double ToDouble([Out] mp_exp_t% exp) \r\n            { \r\n                mp_exp_t x; \r\n                auto result = MP(get_d_2exp)(&x, _value); \r\n                exp = x; \r\n                return result; \r\n            }\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region assignment\r\n\r\n            /// <summary>\r\n            /// When getting, returns this integer.\r\n            /// <para>When setting, sets the value of the integer object to the value resulting from computing the supplied expression.\r\n            /// </para>The getter is a no-op and never needs to be invoked directly, but makes compound operators such as +=, *=, etc. possible.\r\n            /// <para>Do not set the Value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <remarks>\r\n            /// MPIR types are implemented as reference types with value semantics.\r\n            /// Like Strings, the objects themselves are just lightweight pointers to data allocated elsewhere.\r\n            /// Unlike Strings, MPIR types are mutable.\r\n            /// <para>Value semantics requires you to be able to code, a = b + c.\r\n            /// However, .Net (outside of C++) does not allow overloading the assignment operator,\r\n            /// and assigning references would necessitate some unnecessary duplication and extra memory allocations.\r\n            /// </para>To solve this problem, MPIR.Net uses the property assignment.  \r\n            /// The setter of the Value property does what an overloaded assignment operator would do in C++.\r\n            /// The syntax is a little different: a.Value = b + c, but it is fluent enough to become a quick habit,\r\n            /// and additionally reinforces the concept that an existing object can change its value while reusing internally allocated memory.\r\n            /// <para>To this end, all overloaded operators and most functions that operate on MPIR types,\r\n            /// instead of eagerly computing a result, produce and return an expression that is basically a formula for the computation.\r\n            /// Expressions can then be composed using additional operators to achieve expression trees of arbitrary complexity.\r\n            /// All computations are deferred until an expression is assigned to the Value property of an MPIR object,\r\n            /// consumed by a method or operator that returns a primitive type,\r\n            /// or supplied as an argument to an MPIR type constructor.\r\n            /// </para>The getter is a no-op defined to make possible constructs such as a.Value += 5, a.Value *= 10, etc.\r\n            /// <para>Direct assignments such as a = b + c, a *= 10 will not compile because there is no implicit conversion from an expression.\r\n            /// Even if an implicit conversion were defined, such code would incur an extra allocation plus garbage collection,\r\n            /// and would not perform as well as doing the same operations on a.Value.\r\n            /// </para>It would also not compile if the source were a \"using\" variable, as all method-local integers should be.\r\n            /// </remarks>\r\n            property MPEXPR_NAME^ Value\r\n            {\r\n                void set(MPEXPR_NAME^ expr) { expr->AssignTo(_value); }\r\n                MPEXPR_NAME^ get() { return this; }\r\n            }\r\n\r\n            /// <summary>\r\n            /// Sets the value of the integer object.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">new value for the object</param>\r\n            void SetTo(mpir_ui value) { MP(set_ui)(_value, value); }\r\n\r\n            /// <summary>\r\n            /// Sets the value of the integer object.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">new value for the object</param>\r\n            void SetTo(mpir_si value) { MP(set_si)(_value, value); }\r\n\r\n            /// <summary>\r\n            /// Sets the value of the integer object.  Any fractional portion is truncated.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">new value for the object</param>\r\n            void SetTo(double value) { MP(set_d)(_value, value); }\r\n\r\n            /// <summary>\r\n            /// Sets the value of the integer object.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">new value for the object.  The string's leading characters may indicate base:\r\n            /// 0x and 0X for hexadecimal, 0b and 0B for binary, 0 for octal, or decimal otherwise</param>\r\n            void SetTo(String^ value) { SetTo(value, 0); }\r\n\r\n            /// <summary>\r\n            /// Sets the value of the integer object.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">new value for the object</param>\r\n            /// <param name=\"base\">base the <paramref name=\"value\"/> string is in.\r\n            /// <para>The base may vary from 2 to 62, or if base is 0, then the leading characters are used: 0x and 0X for hexadecimal, 0b and 0B for binary, 0 for octal, or decimal otherwise.\r\n            /// </para>For bases up to 36, case is ignored; upper-case and lower-case letters have the same value. \r\n            /// For bases 37 to 62, upper-case letter represent the usual 10..35 while lower-case letter represent 36..61.</param>\r\n            void SetTo(String^ value, int base);\r\n\r\n            /// <summary>\r\n            /// Sets the value of the integer object.  Any fractional portion is truncated.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">new value for the object</param>\r\n            void SetTo(RationalExpression^ value);\r\n\r\n            /// <summary>\r\n            /// Sets the value of the integer object.  Any fractional portion is truncated.\r\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\r\n            /// </para>If the argument is an expression, it is evaluated with the current default float precision.</summary>\r\n            /// <param name=\"value\">new value for the object</param>\r\n            void SetTo(FloatExpression^ value);\r\n\r\n            /// <summary>\r\n            /// Swaps the values of two integers.\r\n            /// <para>This operation is a pointer swap and doesn't affect allocated memory.\r\n            /// </para>Do not call this method while either object is contained in a hash table, because this would change their hash codes.\r\n            /// </summary>\r\n            /// <param name=\"a\">Source number to swap this instance's value with</param>\r\n            void Swap(MPTYPE^ a) \r\n            { \r\n                MP(swap)(_value, a->_value);\r\n            }\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region bit operations\r\n\r\n            /// <summary>\r\n            /// Sets a single bit at the specified position.\r\n            /// </summary>\r\n            /// <param name=\"value\">Value of the bit to set, true for 1, false for 0</param>\r\n            /// <param name=\"position\">Position of the bit to set.\r\n            /// <para>The least significant bit is zero.\r\n            /// </para>If position is beyond the current size of the number, the number is extended automatically.</param>\r\n            void SetBit(mp_bitcnt_t position, bool value) { value ? MP(setbit)(_value, position) : MP(clrbit)(_value, position); }\r\n\r\n            /// <summary>\r\n            /// Gets a single bit at the specified position.\r\n            /// </summary>\r\n            /// <param name=\"position\">Position of the bit to get.\r\n            /// <para>The least significant bit is zero.\r\n            /// </para>If position is beyond the current size of the number, returns true for negative number, false for non-negative; the number itself is not extended.</param>\r\n            /// <returns>true if the specified bit is 1, false if zero.\r\n            /// <para>If position is beyond the current size of the number, returns true for negative number, false for non-negative; the number itself is not extended.</para></returns>\r\n            bool GetBit(mp_bitcnt_t position) { return MP(tstbit)(_value, position) != 0; }\r\n\r\n            /// <summary>\r\n            /// Complements (inverts) a single bit at the specified position.\r\n            /// </summary>\r\n            /// <param name=\"position\">Position of the bit to flip.\r\n            /// <para>The least significant bit is zero.\r\n            /// </para>If position is beyond the current size of the number, the number is extended automatically.</param>\r\n            void ComplementBit(mp_bitcnt_t position) { MP(combit)(_value, position); }\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region size checks\r\n\r\n#if BITS_PER_MP_LIMB == 64\r\n            /// <summary>\r\n            /// Returns true if the value of the integer is in the ulong range.\r\n            /// <para>This method is supported only on 64-bit builds</para>\r\n            /// </summary>\r\n            /// <returns>true if the value will fit in a ulong</returns>\r\n            bool FitsUlong() { return MP(fits_ui_p)(_value) != 0; }\r\n\r\n            /// <summary>\r\n            /// Returns true if the value of the integer is in the long range.\r\n            /// <para>This method is supported only on 64-bit builds</para>\r\n            /// </summary>\r\n            /// <returns>true if the value will fit in a long</returns>\r\n            bool FitsLong() { return MP(fits_si_p)(_value) != 0; }\r\n#endif\r\n\r\n            /// <summary>\r\n            /// Returns true if the value of the integer is in the uint range.\r\n            /// </summary>\r\n            /// <returns>true if the value will fit in a uint</returns>\r\n            bool FitsUint() { return MP(fits_uint_p)(_value) != 0; }\r\n\r\n            /// <summary>\r\n            /// Returns true if the value of the integer is in the int range.\r\n            /// </summary>\r\n            /// <returns>true if the value will fit in a int</returns>\r\n            bool FitsInt() { return MP(fits_sint_p)(_value) != 0; }\r\n\r\n            /// <summary>\r\n            /// Returns true if the value of the integer is in the ushort range.\r\n            /// </summary>\r\n            /// <returns>true if the value will fit in a ushort</returns>\r\n            bool FitsUshort() { return MP(fits_ushort_p)(_value) != 0; }\r\n\r\n            /// <summary>\r\n            /// Returns true if the value of the integer is in the short range.\r\n            /// </summary>\r\n            /// <returns>true if the value will fit in a short</returns>\r\n            bool FitsShort() { return MP(fits_sshort_p)(_value) != 0; }\r\n\r\n            /// <summary>\r\n            /// Returns the number of digits the number would take if written in the specified base.\r\n            /// <para>The sign of the number is ignored, just the absolute value is used.\r\n            /// </para>The result will be either exact or 1 too big.\r\n            /// If <paramref name=\"base\"/> is a power of 2, the result will always be exact.\r\n            /// <para>If the number is 0, the result is always 1.\r\n            /// </para>This function can be used to estimate the space required when converting to a string.\r\n            /// The right amount of allocation is normally two more than the value returned,\r\n            /// one extra for a minus sign and one for the null-terminator.\r\n            /// <para>It will be noted that base=2 can be used to locate the most significant 1 bit in op,\r\n            /// counting from 1 (unlike all bitwise functions, which start from 0).\r\n            /// </para></summary>\r\n            /// <param name=\"base\">Numeric base for the would-be string conversion, in the range from 2 to 62.</param>\r\n            /// <returns>The number of digits the number would take written in the specified base, possibly 1 too big, not counting a leading minus.</returns>\r\n            mp_size_t ApproximateSizeInBase(int base) { return MP(sizeinbase)(_value, base); }\r\n\r\n            #pragma endregion\r\n\r\n            #pragma region IO\r\n\r\n            /// <summary>\r\n            /// Outputs the integer to the <paramref name=\"stream\"/> in raw binary format.\r\n            /// <para>The number is written in a portable format, with 4 bytes of size information, and that many bytes of limbs.\r\n            /// Both the size and the limbs are written in decreasing significance order (i.e., in big-endian).\r\n            /// </para>The output can be read with Read(Stream).\r\n            /// <para>The output cannot be read by MP(inp_raw) from GMP 1, because of changes necessary\r\n            /// for compatibility between 32-bit and 64-bit machines.\r\n            /// </para></summary>\r\n            /// <param name=\"stream\">Stream to output the number to</param>\r\n            /// <returns>the number of bytes written, or 0 if an error occurs.</returns>\r\n            size_t Write(Stream^ stream);\r\n\r\n            /// <summary>\r\n            /// Reads the integer value from the <paramref name=\"stream\"/> in raw binary format, as it would have been written by Write(Stream).\r\n            /// <para>The number is read in a portable format, with 4 bytes of size information, and that many bytes of limbs.\r\n            /// Both the size and the limbs are written in decreasing significance order (i.e., in big-endian).\r\n            /// </para>This routine can read the output from MP(out_raw) also from GMP 1, in spite of changes\r\n            /// necessary for compatibility between 32-bit and 64-bit machines.\r\n            /// </summary>\r\n            /// <param name=\"stream\">Stream to input the number from</param>\r\n            /// <returns>the number of bytes read, or 0 if an error occurs.</returns>\r\n            size_t Read(Stream^ stream);\r\n\r\n            /// <summary>\r\n            /// Outputs the integer to the <paramref name=\"writer\"/> as a string of digits in decimal.\r\n            /// <para>When writing multiple numbers that are to be read back with the Read(TextReader) method,\r\n            /// it is useful to separate the numbers with a character that is not a valid decimal digit.\r\n            /// </para>This is because the Read method stops reading when it encounters a character that cannot represent a digit.\r\n            /// </summary>\r\n            /// <param name=\"writer\">Text writer to output the number to</param>\r\n            /// <returns>the number of characters written</returns>\r\n            size_t Write(TextWriter^ writer) { return Write(writer, 0, false); }\r\n\r\n            /// <summary>\r\n            /// Outputs the integer to the <paramref name=\"writer\"/> as a string of digits in base <paramref name=\"base\"/>.\r\n            /// <para>When writing multiple numbers that are to be read back with the Read(TextReader) method,\r\n            /// it is useful to separate the numbers with a character that is not a valid digit in base <paramref name=\"base\"/>.\r\n            /// </para>This is because the Read method stops reading when it encounters a character that cannot represent a digit.\r\n            /// <para>For hexadecimal, binary, or octal, no leading base indication is written.\r\n            /// </para>Therefore, for bases other than 10, use the Read(reader, base) overload rather than Read(reader) to read the number back.\r\n            /// </summary>\r\n            /// <param name=\"writer\">Text writer to output the number to</param>\r\n            /// <param name=\"base\">The base to use for the output.\r\n            /// <para>The base can be from 2 to 62; uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</para></param>\r\n            /// <returns>the number of characters written</returns>\r\n            size_t Write(TextWriter^ writer, int base) { return Write(writer, base, false); }\r\n\r\n            /// <summary>\r\n            /// Outputs the integer to the <paramref name=\"writer\"/> as a string of digits in base <paramref name=\"base\"/>.\r\n            /// <para>When writing multiple numbers that are to be read back with the Read(TextReader) method,\r\n            /// it is useful to separate the numbers with a character that is not a valid digit in base <paramref name=\"base\"/>.\r\n            /// </para>This is because the Read method stops reading when it encounters a character that cannot represent a digit.\r\n            /// <para>For hexadecimal, binary, or octal, no leading base indication is written.\r\n            /// </para>Therefore, for bases other than 10, use the Read(reader, base) overload rather than Read(reader) to read the number back.\r\n            /// </summary>\r\n            /// <param name=\"writer\">Text writer to output the number to</param>\r\n            /// <param name=\"base\">The base to use for the output.\r\n            /// <para>The base can be from 2 to 62; Bases up to 36 use uppercase or lowercase letters based on the <paramref name=\"lowercase\"/> argument.\r\n            /// </para>For bases larger than 36, the <paramref name=\"lowercase\"/> argument is ignored and uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</param>\r\n            /// <param name=\"lowercase\">Indicates if lowercase or uppercase letters should be used for the output.\r\n            /// <para>This argument is ignored for bases larger than 36, where both uppercase and lowercase letters are used.</para></param>\r\n            /// <returns>the number of characters written</returns>\r\n            size_t Write(TextWriter^ writer, int base, bool lowercase);\r\n\r\n            /// <summary>\r\n            /// Inputs the number as a possibly white-space preceeded string.\r\n            /// <para>The base of the number is determined from the leading characters: 0x or 0X for hexadecimal, 0b or 0B for binary, 0 for octal, decimal otherwise.\r\n            /// </para>Reading terminates at end-of-stream, or up to but not including a character that is not a valid digit.\r\n            /// <para>This method reads the output of a Write(TextWriter) when decimal base is used.\r\n            /// </para>For hexadecimal, binary, or octal, because Write(TextWriter) doesn't write leading base indication characters, \r\n            /// using this overload of Read will fail to recognize the correct base.</summary>\r\n            /// <param name=\"reader\">Text reader to input the number from</param>\r\n            /// <returns>the number of characters read</returns>\r\n            size_t Read(TextReader^ reader) { return Read(reader, 0); }\r\n\r\n            /// <summary>\r\n            /// Inputs the number as a possibly white-space preceeded string in base <paramref name=\"base\"/> from the <paramref name=\"reader\"/>.\r\n            /// <para>Reading terminates at end-of-stream, or up to but not including a character that is not a valid digit.\r\n            /// </para>This method reads the output of a Write(TextWriter) method.\r\n            /// </summary>\r\n            /// <param name=\"reader\">Text reader to input the number from</param>\r\n            /// <param name=\"base\">The base to use for the input.\r\n            /// <para>The base can be from 2 to 62; For bases up to 36 case is ignored.\r\n            /// </para>For bases larger than 36, uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.\r\n            /// <para>If 0, the base of the number is determined from the leading characters: 0x or 0X for hexadecimal, 0b or 0B for binary, 0 for octal, decimal otherwise.\r\n            /// </para>Note that the leading base characters are not written by the Write method.</param>\r\n            /// <returns>the number of characters read</returns>\r\n            size_t Read(TextReader^ reader, int base);\r\n\r\n            /// <summary>\r\n            /// Imports the number from arbitrary words of binary data.\r\n            /// <para>No sign information is taken from the data, the imported number will be positive or zero.</para>\r\n            /// </summary>\r\n            /// <typeparam name=\"T\">Type of element in the data array.  This must be a value type, but does not need to represent a single limb.  Data is interpreted as a flat byte array.</typeparam>\r\n            /// <param name=\"data\">Array of binary \"limbs\" to import from.\r\n            /// <para>Elements don't necessarily need to be of the <paramref name=\"bytesPerLimb\"/> size; the data is interpreted as a flat byte array.</para></param>\r\n            /// <param name=\"limbCount\">Number of \"limbs\" to import</param>\r\n            /// <param name=\"bytesPerLimb\">Number of bytes per \"limb.\"</param>\r\n            /// <param name=\"limbOrder\">Specifies the order of the \"limbs.\"</param>\r\n            /// <param name=\"endianness\">Specifies the byte order within each \"limb.\"</param>\r\n            /// <param name=\"nails\">The number of most-significant bits to ignore in each \"limb.\"</param>\r\n            generic<typename T> where T : value class void Import(array<T>^ data, size_t limbCount, int bytesPerLimb, LimbOrder limbOrder, Endianness endianness, int nails)\r\n            {\r\n                if(limbCount == 0)\r\n                {\r\n                    MP(set_ui)(_value, 0);\r\n                    return;\r\n                }\r\n\r\n                PIN(data);\r\n                MP(import)(_value, limbCount, (int)limbOrder, bytesPerLimb, (int)endianness, nails, pinned_data);\r\n            }\r\n\r\n            /// <summary>\r\n            /// Exports the absolute value of the number to arbitrary words of binary data.\r\n            /// <para>The sign of op is ignored.\r\n            /// </para></summary>\r\n            /// <typeparam name=\"T\">Type of element in the data array.  This must be a value type, but does not need to represent a single limb.  Data is interpreted as a flat byte array.</typeparam>\r\n            /// <param name=\"data\">Array of binary \"limbs\" to export to.\r\n            /// <para>Elements don't necessarily need to be of the <paramref name=\"bytesPerLimb\"/> size; the data is interpreted as a flat byte array.\r\n            /// </para>The total size of the array in bytes must be sufficient for the export.</param>\r\n            /// <param name=\"bytesPerLimb\">Number of bytes per \"limb.\"</param>\r\n            /// <param name=\"limbOrder\">Specifies the order of the \"limbs.\"</param>\r\n            /// <param name=\"endianness\">Specifies the byte order within each \"limb.\"</param>\r\n            /// <param name=\"nails\">The number of most-significant bits to reserve, and set to zero, in each \"limb.\"</param>\r\n            /// <returns>The number of limbs exported.\r\n            /// <para>If the number is non-zero, then the most significant word produced will be non-zero.\r\n            /// </para>If the number is zero, then the count returned will be zero and nothing written to the data.</returns>\r\n            generic<typename T> where T : value class size_t Export(array<T>^ data, int bytesPerLimb, LimbOrder limbOrder, Endianness endianness, int nails)\r\n            {\r\n                PIN(data);\r\n                size_t limbCount;\r\n                MP(export)(pinned_data, &limbCount, (int)limbOrder, bytesPerLimb, (int)endianness, nails, _value);\r\n                return limbCount;\r\n            }\r\n\r\n            /// <summary>\r\n            /// Exports the absolute value of the number to arbitrary words of binary data.  An array of type T is allocated for the export.\r\n            /// <para>The sign of op is ignored.\r\n            /// </para></summary>\r\n            /// <typeparam name=\"T\">Type of element in the data array.  This must be a value type, but does not need to represent a single limb.  Data is interpreted as a flat byte array.</typeparam>\r\n            /// <param name=\"bytesPerLimb\">Number of bytes per \"limb.\"</param>\r\n            /// <param name=\"limbOrder\">Specifies the order of the \"limbs.\"</param>\r\n            /// <param name=\"endianness\">Specifies the byte order within each \"limb.\"</param>\r\n            /// <param name=\"nails\">The number of most-significant bits to reserve, and set to zero, in each \"limb.\"</param>\r\n            /// <returns>An array of type T containing the exported limb data.\r\n            /// <para>If the number is non-zero, then the most significant word produced will be non-zero.\r\n            /// </para>If the number is zero, then a zero-length array is returned.</returns>\r\n            generic<typename T> where T : value class array<T>^ Export(int bytesPerLimb, LimbOrder limbOrder, Endianness endianness, int nails)\r\n            {\r\n                if(this->Sign() == 0)\r\n                    return gcnew array<T>(0);\r\n\r\n                auto bitsPerLimb = 8 * bytesPerLimb - nails;\r\n                auto limbCount = (MP(sizeinbase)(_value, 2) - 1) / bitsPerLimb + 1;\r\n                auto arrayCount = (limbCount * bytesPerLimb - 1) / sizeof(T) + 1;\r\n                auto data = gcnew array<T>((int)arrayCount);\r\n\r\n                PIN(data);\r\n                MP(export)(pinned_data, &limbCount, (int)limbOrder, bytesPerLimb, (int)endianness, nails, _value);\r\n                return data;\r\n            }\r\n\r\n            /// <summary>\r\n            /// Reads limb data from the number into an array.\r\n            /// </summary>\r\n            /// <param name=\"destination\">The destination array into which limb data will be copied</param>\r\n            /// <param name=\"start\">The 0-based index of the first limb to copy</param>\r\n            /// <param name=\"length\">The number of limbs to copy</param>\r\n            /// <param name=\"destinationIndex\">The starting index within the <paramref name=\"destination\"/> array that will receive the first copied limb.</param>\r\n            void ReadLimbs(array<mpir_ui>^ destination, int start, int length, int destinationIndex);\r\n\r\n            /// <summary>\r\n            /// Modifies a portion or all of the limb data within the number by copying from an array.\r\n            /// <para>The number is reallocated if necessary to accommodate the limbs.\r\n            /// </para>The previous limb data is preserved, so that partial writes are supported.\r\n            /// <para>If a partial write begins beyond the currently allocated limb array, any gap is zeroed out.\r\n            /// </para></summary>\r\n            /// <param name=\"source\">The source array from which limb data will be copied.  The limb data need not be normalized</param>\r\n            /// <param name=\"start\">The 0-based index within the destination number's limb data that will receive the first copied limb</param>\r\n            /// <param name=\"length\">The number of limbs to copy</param>\r\n            /// <param name=\"sourceIndex\">The starting index within the <paramref name=\"source\"/> array from which the first limb is to be copied.</param>\r\n            /// <param name=\"negative\">Specifies the new sign of the number: true if negative, false if non-negative</param>\r\n            void ModifyLimbs(array<mpir_ui>^ source, int start, int length, int sourceIndex, bool negative);\r\n\r\n            /// <summary>\r\n            /// Writes limb data into the number from an array.\r\n            /// <para>The number is reallocated if necessary to accommodate the limbs.\r\n            /// </para>The previous limb data is not used; this method is suitable for full replacement of limb data only.\r\n            /// </summary>\r\n            /// <param name=\"source\">The source array from which limb data will be copied</param>\r\n            /// <param name=\"newSize\">The new size of the number.  The absolute value is the number of limbs to copy, and will be the new number of limbs, and the sign is the new sign of the number.\r\n            /// <para>Copying always starts at the beginning of the number's (re-)allocated limb data.  The new limb data must be valid, but need not be normalized</para></param>\r\n            /// <param name=\"sourceIndex\">The starting index within the <paramref name=\"source\"/> array from which the first limb is to be copied.</param>\r\n            /// <param name=\"negative\">Specifies the new sign of the number: true if negative, false if non-negative</param>\r\n            void WriteLimbs(array<mpir_ui>^ source, int sourceIndex, mp_size_t newSize, bool negative);\r\n\r\n        internal:\r\n            size_t ReadNoWhite(TextReader^ reader, int base, size_t nread);\r\n\r\n        public:\r\n\r\n            /// <summary>\r\n            /// Returns the specified limb of the number.\r\n            /// <para>The least significant limb is zero.\r\n            /// </para>The sign of the number is ignored.\r\n            /// </summary>\r\n            /// <param name=\"index\">The index of the limb to return.\r\n            /// <para>The least significant limb is zero.\r\n            /// </para>If the index is outside the range 0 to Size()-1, zero is returned.</param>\r\n            /// <returns>The specified limb, or zero if <paramref name=\"index\"/> is outside of the valid range.</returns>\r\n            size_t GetLimb(mp_size_t index) { return MP(getlimbn)(_value, index); }\r\n\r\n            #pragma endregion\r\n    \r\n            #pragma region number-theoretic\r\n\r\n            /// <summary>\r\n            /// Determines whether the number is a probable prime with the chance of error being at most 1 in 2^<paramref name=\"probability\"/>.\r\n            /// <para>This function does some trial divisions to speed up the average case, then some probabilistic\r\n            /// primality tests to achieve the desired level of error.\r\n            /// </para>This function interface is preliminary and may change in the future.\r\n            /// </summary>\r\n            /// <param name=\"random\">Random number generator to use for probabilistic primality tests</param>\r\n            /// <param name=\"probability\">Defines the maximum allowed probability of a false positive.\r\n            /// <para>The odds of a composite number being reported as a probable prime are at most 1 in 2^probability</para></param>\r\n            /// <param name=\"pretested\">Used to inform the function that trial division up to div has already been performed,\r\n            /// and so the number is known to have NO divisors &lt;= pretested.\r\n            /// <para>Use 0 to inform the function that no trial division has been done.</para></param>\r\n            /// <returns>true if the number is probably prime, or false if it is definitely composite.</returns>\r\n            bool IsProbablePrime(MpirRandom^ random, int probability, mpir_ui pretested);\r\n\r\n            /// <summary>\r\n            /// Determines whether the number is likely a prime, i.e. you can consider it a prime for practical purposes.\r\n            /// <para>This function does some trial divisions to speed up the average case, then some probabilistic primality tests.\r\n            /// </para>The term \"likely\" refers to the fact that the number will not have small factors.\r\n            /// <para>This function interface is preliminary and may change in the future.\r\n            /// </para></summary>\r\n            /// <param name=\"random\">Random number generator to use for probabilistic primality tests</param>\r\n            /// <param name=\"pretested\">Used to inform the function that trial division up to div has already been performed,\r\n            /// and so the number is known to have NO divisors &lt;= pretested.\r\n            /// <para>Use 0 to inform the function that no trial division has been done.</para></param>\r\n            /// <returns>true if the number is likely prime, or false if it is definitely composite.</returns>\r\n            bool IsLikelyPrime(MpirRandom^ random, mpir_ui pretested);\r\n\r\n            /// <summary>\r\n            /// Calculates the Jacobi symbol (<paramref name=\"a\"/>/<paramref name=\"b\"/>).\r\n            /// <para>This is defined only for <paramref name=\"b\"/> odd.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">First source value for the Jacobi symbol</param>\r\n            /// <param name=\"b\">Second source value for the Jacobi symbol</param>\r\n            /// <returns>The Jacobi symbol (-1, 0, or 1).  Return is undefined unless <paramref name=\"b\"/> is odd.</returns>\r\n            static int Jacobi(MPTYPE^ a, MPTYPE^ b) { return MP(jacobi)(a->_value, b->_value); }\r\n\r\n            /// <summary>\r\n            /// Calculates the Legendre symbol (<paramref name=\"a\"/>/<paramref name=\"b\"/>).\r\n            /// <para>This is defined only when <paramref name=\"b\"/> is an odd prime.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">First source value for the Legendre symbol</param>\r\n            /// <param name=\"b\">Second source value for the Legendre symbol</param>\r\n            /// <returns>The Legendre symbol (-1, 0, or 1).  Return is undefined unless <paramref name=\"b\"/> is an odd prime.</returns>\r\n            static int Legendre(MPTYPE^ a, MPTYPE^ b) { return MP(legendre)(a->_value, b->_value); }\r\n\r\n            /// <summary>\r\n            /// Calculates the Jacobi symbol (<paramref name=\"a\"/>/<paramref name=\"b\"/>) with the Kronecker extension\r\n            /// (<paramref name=\"a\"/>/2) = (2/<paramref name=\"a\"/>) when a odd, or (<paramref name=\"a\"/>/2) = 0 when a even.\r\n            /// <para>When <paramref name=\"b\"/> is odd the Jacobi symbol and Kronecker symbol are identical, so the various Kronecker overloads \r\n            /// can be used for mixed precision Jacobi symbols too.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">First source value for the Kronecker symbol</param>\r\n            /// <param name=\"b\">Second source value for the Kronecker symbol</param>\r\n            /// <returns>The Kronecker symbol (-1, 0, or 1).</returns>\r\n            static int Kronecker(MPTYPE^ a, MPTYPE^ b) { return MP(kronecker)(a->_value, b->_value); }\r\n\r\n            /// <summary>\r\n            /// Calculates the Jacobi symbol (<paramref name=\"a\"/>/<paramref name=\"b\"/>) with the Kronecker extension\r\n            /// (<paramref name=\"a\"/>/2) = (2/<paramref name=\"a\"/>) when a odd, or (<paramref name=\"a\"/>/2) = 0 when a even.\r\n            /// <para>When <paramref name=\"b\"/> is odd the Jacobi symbol and Kronecker symbol are identical, so the various Kronecker overloads \r\n            /// can be used for mixed precision Jacobi symbols too.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">First source value for the Kronecker symbol</param>\r\n            /// <param name=\"b\">Second source value for the Kronecker symbol</param>\r\n            /// <returns>The Kronecker symbol (-1, 0, or 1).</returns>\r\n            static int Kronecker(MPTYPE^ a, mpir_ui b) { return MP(kronecker_ui)(a->_value, b); }\r\n\r\n            /// <summary>\r\n            /// Calculates the Jacobi symbol (<paramref name=\"a\"/>/<paramref name=\"b\"/>) with the Kronecker extension\r\n            /// (<paramref name=\"a\"/>/2) = (2/<paramref name=\"a\"/>) when a odd, or (<paramref name=\"a\"/>/2) = 0 when a even.\r\n            /// <para>When <paramref name=\"b\"/> is odd the Jacobi symbol and Kronecker symbol are identical, so the various Kronecker overloads \r\n            /// can be used for mixed precision Jacobi symbols too.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">First source value for the Kronecker symbol</param>\r\n            /// <param name=\"b\">Second source value for the Kronecker symbol</param>\r\n            /// <returns>The Kronecker symbol (-1, 0, or 1).</returns>\r\n            static int Kronecker(MPTYPE^ a, mpir_si b) { return MP(kronecker_si)(a->_value, b); }\r\n\r\n            /// <summary>\r\n            /// Calculates the Jacobi symbol (<paramref name=\"a\"/>/<paramref name=\"b\"/>) with the Kronecker extension\r\n            /// (<paramref name=\"a\"/>/2) = (2/<paramref name=\"a\"/>) when a odd, or (<paramref name=\"a\"/>/2) = 0 when a even.\r\n            /// <para>When <paramref name=\"b\"/> is odd the Jacobi symbol and Kronecker symbol are identical, so the various Kronecker overloads \r\n            /// can be used for mixed precision Jacobi symbols too.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">First source value for the Kronecker symbol</param>\r\n            /// <param name=\"b\">Second source value for the Kronecker symbol</param>\r\n            /// <returns>The Kronecker symbol (-1, 0, or 1).</returns>\r\n            static int Kronecker(mpir_ui a, MPTYPE^ b) { return MP(ui_kronecker)(a, b->_value); }\r\n\r\n            /// <summary>\r\n            /// Calculates the Jacobi symbol (<paramref name=\"a\"/>/<paramref name=\"b\"/>) with the Kronecker extension\r\n            /// (<paramref name=\"a\"/>/2) = (2/<paramref name=\"a\"/>) when a odd, or (<paramref name=\"a\"/>/2) = 0 when a even.\r\n            /// <para>When <paramref name=\"b\"/> is odd the Jacobi symbol and Kronecker symbol are identical, so the various Kronecker overloads \r\n            /// can be used for mixed precision Jacobi symbols too.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">First source value for the Kronecker symbol</param>\r\n            /// <param name=\"b\">Second source value for the Kronecker symbol</param>\r\n            /// <returns>The Kronecker symbol (-1, 0, or 1).</returns>\r\n            static int Kronecker(mpir_si a, MPTYPE^ b) { return MP(si_kronecker)(a, b->_value); }\r\n\r\n            /// <summary>\r\n            /// Returns an expression for calculating <paramref name=\"value\"/> raised to the specified <paramref name=\"power\"/>.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"value\">Base for the initial value for the new integer instance</param>\r\n            /// <param name=\"power\">Power to raise the <paramref name=\"value\"/> to when calculating the initial value for the new instance</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ Power(mpir_ui value, mpir_ui power) { return gcnew MPEXPR(PowerUiUi)(value, power); }\r\n\r\n            /// <summary>\r\n            /// Returns an expression for calculating the factorial of <paramref name=\"a\"/>.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">The source number to take the factorial of</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ Factorial(mpir_ui a) { return gcnew MPEXPR(FactorialUiUi)(a, 1); }\r\n\r\n            /// <summary>\r\n            /// Returns an expression for calculating the multifactorial of <paramref name=\"a\"/> of the specified order.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">The source number to take the multifactorial of</param>\r\n            /// <param name=\"order\">The order of the multifactorial, i.e. 2 for <paramref name=\"a\"/>!!, 3 for <paramref name=\"a\"/>!!!, etc.</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ Factorial(mpir_ui a, mpir_ui order) { return gcnew MPEXPR(FactorialUiUi)(a, order); }\r\n\r\n            /// <summary>\r\n            /// Returns an expression for calculating the primorial of <paramref name=\"a\"/>, i.e. the product of all positive prime numbers &lt;= a.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"a\">The source number to take the primorial of</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ Primorial(mpir_ui a) { return gcnew MPEXPR(PrimorialUi)(a); }\r\n\r\n            /// <summary>\r\n            /// Returns an expression for calculating the binomial coefficient (<paramref name=\"n\"/>, <paramref name=\"k\"/>), a.k.a. number of k-element combinations out of an n-element set.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para></summary>\r\n            /// <param name=\"n\">The first source value of the binomial coefficient, a.k.a. set size</param>\r\n            /// <param name=\"k\">The second source value of the binomial coefficient, a.k.a. subset size</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ Binomial(mpir_ui n, mpir_ui k) { return gcnew MPEXPR(BinomialUiUi)(n, k); }\r\n\r\n            /// <summary>\r\n            /// Returns an expression for calculating the binomial coefficient (<paramref name=\"n\"/>, <paramref name=\"k\"/>), a.k.a. number of k-element combinations out of an n-element set.\r\n            /// <para>Negative values of <paramref name=\"n\"/> are supported, using the identity (-n, k) = (-1)^k * (n + k - 1, k).\r\n            /// </para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </summary>\r\n            /// <param name=\"n\">The first source value of the binomial coefficient, a.k.a. set size\r\n            /// <para>Negative values of <paramref name=\"n\"/> are supported, using the identity (-n, k) = (-1)^k * (n + k - 1, k).\r\n            /// </para></param>\r\n            /// <param name=\"k\">The second source value of the binomial coefficient, a.k.a. subset size</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR_NAME^ Binomial(MPEXPR_NAME^ n, mpir_ui k) { return gcnew MPEXPR(BinomialIntUi)(n, k); }\r\n\r\n            /// <summary>\r\n            /// Returns an expression for calculating the <paramref name=\"n\"/>th Fibonacci number.\r\n            /// <para>You can also optionally save the (<paramref name=\"n\"/>-1)th number by calling a method on the resulting expression.\r\n            /// </para>This method is designed for calculating isolated Fibonacci numbers. When a sequence of\r\n            /// values is wanted its best to start with a pair of numbers (Fn and Fn-1) by calling SettingPreviousTo(),\r\n            /// and then iterate the defining Fn+1 = Fn + Fn-1.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para>Fibonacci and Lucas numbers are closely related, and it's never necessary to calculate both Fn and Ln.\r\n            /// </summary>\r\n            /// <param name=\"n\">The index of the Fibonacci number to calculate</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR(Sequence)^ Fibonacci(mpir_ui n) { return gcnew MPEXPR(FibonacciUi)(n); }\r\n\r\n            /// <summary>\r\n            /// Returns an expression for calculating the <paramref name=\"n\"/>th Lucas number.\r\n            /// <para>You can also optionally save the (<paramref name=\"n\"/>-1)th number by calling a method on the resulting expression.\r\n            /// </para>This method is designed for calculating isolated Lucas numbers. When a sequence of\r\n            /// values is wanted its best to start with a pair of numbers (Ln and Ln-1) by calling SettingPreviousTo(),\r\n            /// and then iterate the defining Ln+1 = Ln + Ln-1.\r\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\r\n            /// </para>Fibonacci and Lucas numbers are closely related, and it's never necessary to calculate both Fn and Ln.\r\n            /// </summary>\r\n            /// <param name=\"n\">The index of the Lucas number to calculate</param>\r\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\r\n            static MPEXPR(Sequence)^ Lucas(mpir_ui n) { return gcnew MPEXPR(LucasUi)(n); }\r\n\r\n            #pragma endregion\r\n    };\r\n\r\n    #pragma endregion\r\n\r\n    #pragma region HugeIntComponent\r\n\r\n    /// <summary>This internal class is used to provide access to the numerator and denominator of a Rational number.\r\n    /// <para>It is a thin override of HugeInt with the only changes being that it does not perform any allocation/cleanup, \r\n    /// it simply reuses an mpz_ptr from a Rational struct that is allocated and freed by HugeRational.\r\n    /// </para>It inherits the IDisposable implementation from HugeInt, but overrides the DeallocateStruct worker with a no-op.\r\n    /// </summary>\r\n    private ref class HugeIntComponent sealed : MPTYPE\r\n    {\r\n        internal:\r\n            virtual void DeallocateStruct() override { }\r\n            HugeIntComponent(MP(ptr) value) { _value = value; }\r\n    };\r\n\r\n    #pragma endregion\r\n};"
  },
  {
    "path": "mpir.net/mpir.net/HugeRational.cpp",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\n#include \"Stdafx.h\"\n#include \"HugeInt.h\"\n#include \"HugeFloat.h\"\n#include \"HugeRational.h\"\n//#include \"Random.h\"\n\nusing namespace System::Runtime::InteropServices;\nusing namespace System::Text;\n\nnamespace MPIR\n{\n    #pragma region construction\n\n    MPTYPE::MPTYPE()\n    {\n        AllocateStruct();\n        MP(init)(_value);\n    }\n\n    MPTYPE::MPTYPE(bool initialize)\n    {\n        AllocateStruct();\n        if(initialize)\n            MP(init)(_value);\n    }\n\n    MPTYPE::MPTYPE(MPEXPR_NAME^ value)\n    {\n        AllocateStruct();\n        MP(init)(_value);\n        value->AssignTo(_value);\n    }\n\n    MPTYPE::MPTYPE(IntegerExpression^ value)\n    {\n        AllocateStruct();\n        MP(init)(_value);\n        SetTo(value);\n    }\n\n    MPTYPE::MPTYPE(FloatExpression^ value)\n    {\n        AllocateStruct();\n        MP(init)(_value);\n        SetTo(value);\n    }\n\n    MPTYPE::MPTYPE(IntegerExpression^ numerator, IntegerExpression^ denominator)\n    {\n        AllocateStruct();\n        MP(init)(_value);\n        SetTo(numerator, denominator);\n    }\n    \n    MPTYPE^ MPTYPE::Allocate(mp_bitcnt_t numeratorBits, mp_bitcnt_t denominatorBits)\n    {\n        auto result = gcnew MPTYPE(false);\n        mpz_init2(&result->_value->_mp_num, numeratorBits);\n        mpz_init2(&result->_value->_mp_den, denominatorBits);\n        mpz_set_ui(&result->_value->_mp_den, 1);\n        return result;\n    }\n\n    void MPTYPE::FromString(String^ value, int base)\n    {\n        AllocateStruct();\n        MP(init)(_value);\n\n        IntPtr ptr = Marshal::StringToHGlobalAnsi(value);\n        bool success = 0 == MP(set_str)(_value, (char*)(void*)ptr, base);\n        Marshal::FreeHGlobal(ptr);\n\n        if(!success)\n        {\n            DeallocateStruct();\n            throw gcnew ArgumentException(\"Invalid number\", \"value\");\n        }\n    }\n\n    void MPTYPE::SetTo(String^ value, int base)\n    {\n        IntPtr ptr = Marshal::StringToHGlobalAnsi(value);\n        bool success = 0 == MP(set_str)(_value, (char*)(void*)ptr, base);\n        Marshal::FreeHGlobal(ptr);\n\n        if(!success)\n            throw gcnew ArgumentException(\"Invalid number\", \"value\");\n    }\n\n    MPTYPE::MPTYPE(mpir_si numerator, mpir_ui denominator)\n    {\n        AllocateStruct();\n        MP(init)(_value);\n        MP(set_si)(_value, numerator, denominator);\n    }\n\n    MPTYPE::MPTYPE(mpir_ui numerator, mpir_ui denominator)\n    {\n        AllocateStruct();\n        MP(init)(_value);\n        MP(set_ui)(_value, numerator, denominator);\n    }\n\n    MPTYPE::MPTYPE(double value)\n    {\n        AllocateStruct();\n        MP(init)(_value);\n        MP(set_d)(_value, value);\n    }\n\n    #pragma endregion\n\n    #pragma region object overrides\n\n    String^ MPTYPE::ToString(int base, bool lowercase, int maxDigits)\n    {\n        auto result = gcnew StringBuilder();\n        result->Append(this->Numerator->ToString(base, lowercase, maxDigits));\n        result->Append((wchar_t)'/');\n        result->Append(this->Denominator->ToString(base, lowercase, maxDigits));\n        return result->ToString();\n    }\n\n    int MPEXPR_NAME::GetHashCode()\n    {\n        IN_CONTEXT(this);\n        auto n = gcnew HugeIntComponent(&CTXT(0)->_mp_num);\n        auto d = gcnew HugeIntComponent(&CTXT(0)->_mp_den);\n        return n->GetHashCode() ^ d->GetHashCode();\n    }\n\n    #pragma endregion\n\n    #pragma region Interface implementations\n\n    int MPEXPR_NAME::CompareTo(Object^ a, bool& valid)\n    {\n        valid = true;\n\n        if (IS_NULL(a))\n            return 1;\n\n        WHEN_IS(1, a, MPEXPR_NAME)\n            return CompareTo(x1);\n\n        EvaluationContext context;\n\n        WHEN_IS(2, a, IntegerExpression)\n        {\n            ASSIGN_TO(context);\n            x2->AssignToInteger(context);\n            return MP(cmp_z)(CTXT(0), CTXTI(1));\n        }\n\n        if(a->GetType() == mpir_ui::typeid)\n        {\n            ASSIGN_TO(context);\n            return MP(cmp_ui)(CTXT(0), (mpir_ui)a, 1);\n        }\n\n        if(a->GetType() == mpir_si::typeid)\n        {\n            ASSIGN_TO(context);\n            return MP(cmp_si)(CTXT(0), (mpir_si)a, 1);\n        }\n\n        if(a->GetType() == double::typeid)\n        {\n            ASSIGN_TO(context);\n            CTXT_ADD_RATIONAL_DOUBLE((double)a);\n            return MP(cmp)(CTXT(0), CTXT(1));\n        }\n\n        valid = false;\n        return 0;\n    }\n\n    int MPEXPR_NAME::CompareTo(mpir_si numerator, mpir_ui denominator)\n    {\n        IN_CONTEXT(this);\n        return MP(cmp_si)(CTXT(0), numerator, denominator);\n    }\n\n    int MPEXPR_NAME::CompareTo(mpir_ui numerator, mpir_ui denominator)\n    {\n        IN_CONTEXT(this);\n        return MP(cmp_ui)(CTXT(0), numerator, denominator);\n    }\n\n    int MPEXPR_NAME::CompareTo(Object^ a)\n    {\n        bool valid;\n        auto result = CompareTo(a, valid);\n\n        if (valid)\n            return result;\n\n        throw gcnew ArgumentException(\"Invalid argument type\", \"a\");\n    }\n\n    int MPEXPR_NAME::CompareTo(MPEXPR_NAME^ a)\n    {\n        if (IS_NULL(a))\n            return 1;\n\n        IN_CONTEXT(this, a);\n        return MP(cmp)(CTXT(0), CTXT(1));\n    }\n\n    bool MPEXPR_NAME::Equals(Object^ a)\n    {\n        if (IS_NULL(a))\n            return false;\n\n        WHEN_IS(1, a, MPEXPR_NAME)\n            return Equals(x1);\n\n        EvaluationContext context;\n\n        WHEN_IS(2, a, IntegerExpression)\n        {\n            ASSIGN_TO(context);\n            x2->AssignToRational(context);\n            return MP(equal)(CTXT(0), CTXT(1)) != 0;\n        }\n\n        if(a->GetType() == mpir_ui::typeid)\n        {\n            ASSIGN_TO(context);\n            CTXT_ADD_RATIONAL_UI((mpir_ui)a, 1);\n            return MP(equal)(CTXT(0), CTXT(1)) != 0;\n        }\n\n        if(a->GetType() == mpir_si::typeid)\n        {\n            ASSIGN_TO(context);\n            CTXT_ADD_RATIONAL_SI((mpir_si)a, 1);\n            return MP(equal)(CTXT(0), CTXT(1)) != 0;\n        }\n\n        if(a->GetType() == double::typeid)\n        {\n            ASSIGN_TO(context);\n            CTXT_ADD_RATIONAL_DOUBLE((double)a);\n            return MP(equal)(CTXT(0), CTXT(1)) != 0;\n        }\n\n        return false;\n    }\n\n    bool MPEXPR_NAME::Equals(MPEXPR_NAME^ a)\n    {\n        if (IS_NULL(a))\n            return false;\n\n        IN_CONTEXT(this, a);\n        return MP(equal)(CTXT(0), CTXT(1)) != 0;\n    }\n\n    bool MPEXPR_NAME::Equals(mpir_si numerator, mpir_ui denominator)\n    {\n        IN_CONTEXT(this);\n        CTXT_ADD_RATIONAL_SI(numerator, denominator);\n        return MP(equal)(CTXT(0), CTXT(1)) != 0;\n    }\n\n    bool MPEXPR_NAME::Equals(mpir_ui numerator, mpir_ui denominator)\n    {\n        IN_CONTEXT(this);\n        CTXT_ADD_RATIONAL_UI(numerator, denominator);\n        return MP(equal)(CTXT(0), CTXT(1)) != 0;\n    }\n\n    #pragma endregion\n\n    #pragma region Arithmetic\n\n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR_NAME,        DEFINE, +, Add, Rat, Rat)           \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, +, Add, Rat, Ui)            \n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR_NAME,        DEFINE, +, Add, Rat, Ui)            \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, +, Add, Rat, Si)            \n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR_NAME,        DEFINE, +, Add, Rat, Si)            \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, +, Add, Rat, IExpr)            \n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR_NAME,        DEFINE, +, Add, Rat, IExpr)            \n                                                                                                        \n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR_NAME,        DEFINE, -, Subtract, Rat, Rat)      \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Rat, Ui)       \n    MAKE_BINARY_OPERATOR_LLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Rat, Ui)       \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Rat, Si)\n    MAKE_BINARY_OPERATOR_LLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Rat, Si)\n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Rat, IExpr)\n    MAKE_BINARY_OPERATOR_LLIMB     (MPEXPR_NAME,        DEFINE, -, Subtract, Rat, IExpr)\n                                                                                                        \n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR_NAME,        DEFINE, *, Multiply, Rat, Rat)\n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, *, Multiply, Rat, Ui)\n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR_NAME,        DEFINE, *, Multiply, Rat, Ui)\n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, *, Multiply, Rat, Si)\n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR_NAME,        DEFINE, *, Multiply, Rat, Si)\n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, *, Multiply, Rat, IExpr)\n    MAKE_BINARY_OPERATOR_LLIMB_R   (MPEXPR_NAME,        DEFINE, *, Multiply, Rat, IExpr)\n                                                                                                        \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, <<, ShiftLeft, Rat, Bits)   \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, >>, ShiftRight, Rat, Bits)  \n\n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, ^, Power, Rat, Ui)          \n                                                                                                           \n    MAKE_UNARY_OPERATOR            (MPEXPR_NAME,        DEFINE, -, Negate, Rat)             \n    MAKE_VOID_FUNCTION             (MPEXPR_NAME,        DEFINE, Abs, Rat)                   \n    MAKE_VOID_FUNCTION             (MPEXPR_NAME,        DEFINE, Invert, Rat)\n                                                                                                           \n    MAKE_BINARY_OPERATOR_STANDARD  (MPEXPR_NAME,        DEFINE, /, Divide, Rat, Rat)        \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, /, Divide, Rat, Ui)       \n    MAKE_BINARY_OPERATOR_LLIMB     (MPEXPR_NAME,        DEFINE, /, Divide, Rat, Ui)       \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, /, Divide, Rat, Si)       \n    MAKE_BINARY_OPERATOR_LLIMB     (MPEXPR_NAME,        DEFINE, /, Divide, Rat, Si)       \n    MAKE_BINARY_OPERATOR_RLIMB     (MPEXPR_NAME,        DEFINE, /, Divide, Rat, IExpr)\n    MAKE_BINARY_OPERATOR_LLIMB     (MPEXPR_NAME,        DEFINE, /, Divide, Rat, IExpr)\n                                                                                                           \n    DEFINE_UNARY_ASSIGNMENT_REF(Negate, Rat, MP(neg))\n    DEFINE_UNARY_ASSIGNMENT_REF(Invert, Rat, MP(inv))\n    DEFINE_UNARY_ASSIGNMENT_REF(Abs, Rat, MP(abs))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Add, Rat, MP(add))\n    DEFINE_BINARY_ASSIGNMENT_REF_RATUI(Add, Rat, Ui, MP(add))\n    DEFINE_BINARY_ASSIGNMENT_REF_RATSI(Add, Rat, Si, MP(add))\n    DEFINE_BINARY_ASSIGNMENT_REF_INTVAL(Add, Rat, IExpr, MP(add))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Subtract, Rat, MP(sub))\n    DEFINE_BINARY_ASSIGNMENT_REF_RATUI(Subtract, Rat, Ui, MP(sub))\n    DEFINE_BINARY_ASSIGNMENT_RATUI_REF(Subtract, Ui, Rat, MP(sub))\n    DEFINE_BINARY_ASSIGNMENT_REF_RATSI(Subtract, Rat, Si, MP(sub))\n    DEFINE_BINARY_ASSIGNMENT_RATSI_REF(Subtract, Si, Rat, MP(sub))\n    DEFINE_BINARY_ASSIGNMENT_REF_INTVAL(Subtract, Rat, IExpr, MP(sub))\n    DEFINE_BINARY_ASSIGNMENT_REF_INTVAL(Subtract, IExpr, Rat, MP(sub))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Multiply, Rat, MP(mul))\n    DEFINE_BINARY_ASSIGNMENT_REF_RATUI(Multiply, Rat, Ui, MP(mul))\n    DEFINE_BINARY_ASSIGNMENT_REF_RATSI(Multiply, Rat, Si, MP(mul))\n    DEFINE_BINARY_ASSIGNMENT_REF_INTVAL(Multiply, Rat, IExpr, MP(mul))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_REF(Divide, Rat, MP(div))\n    DEFINE_BINARY_ASSIGNMENT_REF_RATUI(Divide, Rat, Ui, MP(div))\n    DEFINE_BINARY_ASSIGNMENT_RATUI_REF(Divide, Ui, Rat, MP(div))\n    DEFINE_BINARY_ASSIGNMENT_REF_RATSI(Divide, Rat, Si, MP(div))\n    DEFINE_BINARY_ASSIGNMENT_RATSI_REF(Divide, Si, Rat, MP(div))\n    DEFINE_BINARY_ASSIGNMENT_REF_INTVAL(Divide, Rat, IExpr, MP(div))\n    DEFINE_BINARY_ASSIGNMENT_REF_INTVAL(Divide, IExpr, Rat, MP(div))\n\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(ShiftLeft, Rat, Bits, MP(mul_2exp))\n    DEFINE_BINARY_ASSIGNMENT_REF_VAL(ShiftRight, Rat, Bits, MP(div_2exp))\n\n    DEFINE_ASSIGNMENT_PROLOG(PowerRatUi)\n    { \n        IN_CONTEXT(Left);\n        mpz_pow_ui(&destination->_mp_num, &CTXT(0)->_mp_num, Right);\n        mpz_pow_ui(&destination->_mp_den, &CTXT(0)->_mp_den, Right);\n    }\n\n    #pragma endregion\n\n    #pragma region IO\n\n    #define chunkSize 1024\n\n    size_t MPTYPE::Write(Stream^ stream)\n    {\n        auto writtenNumerator = Numerator->Write(stream);\n        if(writtenNumerator == 0)\n            return 0;\n\n        auto writtenDenominator = Denominator->Write(stream);\n        if(writtenDenominator == 0)\n            return 0;\n\n        return writtenNumerator + writtenDenominator;\n    }\n\n    size_t MPTYPE::Read(Stream^ stream)\n    {\n        auto readNumerator = Numerator->Read(stream);\n        if(readNumerator == 0)\n            return 0;\n\n        auto readDenominator = Denominator->Read(stream);\n        if(readDenominator == 0)\n            return 0;\n\n        return readNumerator + readDenominator;\n    }\n\n    size_t MPTYPE::Write(TextWriter^ writer, int base, bool lowercase)\n    {\n        auto str = ToString(base, lowercase);\n        writer->Write(str);\n        return str->Length;\n    }\n\n    size_t MPTYPE::Read(TextReader^ reader, int base)\n    {\n        auto readNumerator = Numerator->Read(reader, base);\n        if(readNumerator == 0)\n            return 0;\n\n        size_t readDenominator = 0;\n        char c = reader->Peek();\n        if (c == '/')\n        {\n            reader->Read();\n            readDenominator = 1 + Denominator->Read(reader, base);\n            if(readDenominator == 1)\n                return 0;\n        }\n\n        return readNumerator + readDenominator;\n    }\n\n    #pragma endregion\n\n    #pragma region methods in other classes with rational parameters\n\n    void HugeInt::SetTo(MPEXPR_NAME^ value)\n    {\n        IN_CONTEXT(value);\n        mpz_set_q(_value, CTXT(0));\n    }\n\n    void HugeFloat::SetTo(MPEXPR_NAME^ value)\n    {\n        IN_CONTEXT(value);\n        mpf_set_q(_value, CTXT(0));\n    }\n\n    int IntegerExpression::CompareTo(Object^ a, bool& valid)\n    {\n        valid = true;\n\n        if (IS_NULL(a))\n            return 1;\n\n        WHEN_IS(1, a, IntegerExpression)\n            return CompareTo(x1);\n\n        EvaluationContext context;\n\n        WHEN_IS(2, a, MPEXPR_NAME)\n        {\n            x2->AssignToRational(context);\n            AssignToInteger(context);\n            return -MP(cmp_z)(CTXT(0), CTXTI(1));\n        }\n\n        if (a->GetType() == mpir_ui::typeid)\n        {\n            AssignToInteger(context);\n            return mpz_cmp_ui(CTXTI(0), (mpir_ui)a);\n        }\n\n        if (a->GetType() == mpir_si::typeid)\n        {\n            AssignToInteger(context);\n            return mpz_cmp_si(CTXTI(0), (mpir_si)a);\n        }\n\n        if (a->GetType() == double::typeid)\n        {\n            AssignToInteger(context);\n            return mpz_cmp_d(CTXTI(0), (double)a);\n        }\n\n        valid = false;\n        return 0;\n    }\n\n    #pragma endregion\n};"
  },
  {
    "path": "mpir.net/mpir.net/HugeRational.h",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\n#pragma once\n\nusing namespace System;\nusing namespace System::IO;\nusing namespace System::Runtime::InteropServices;\n\n#ifdef SPECIALIZE_EXPRESSIONS\n#undef SPECIALIZE_EXPRESSIONS\n#undef MP\n#undef CUSTOM_MP\n#undef MPSTRUCT\n#undef MPTYPE\n#undef MPTYPE_NAME\n#undef MPEXPR_NAME\n#undef MPEXPR\n#undef CTXT\n#undef CTXTI\n#undef ASSIGN_TO\n#undef Mpt\n#undef SET_CONTEXT_PRECISION\n#endif\n#define SPECIALIZE_EXPRESSIONS\n#define Mpt Rat\n#define CUSTOM_MP(x) custom_mpq_##x\n#define MPSTRUCT __mpq_struct\n#define MP(x) mpq_##x\n#define MPTYPE HugeRational\n#define MPTYPE_NAME Rational\n#define MPEXPR_NAME LIT(MPTYPE_NAME)Expression\n#define MPEXPR(x) LIT(MPTYPE_NAME)##x##Expression\n#define CTXT(x) context.RationalArgs[x]\n#define CTXTI(x) context.IntArgs[x]\n#define ASSIGN_TO CONCAT(AssignTo, LIT(MPTYPE_NAME))\n#define SET_CONTEXT_PRECISION\n#include \"ExpressionMacros.h\"\n\nnamespace MPIR\n{\n    ref class MpirRandom;\n    ref class MPTYPE;\n\n    #pragma region RationalExpression\n\n    /// <summary>\n    /// Base class for all rational expressions resulting from many rational operations on MPIR types.\n    /// <para>Expressions can be arbitrarily nested, and are lazily evaluated \n    /// when they are either assigned to the Value property of an MPIR object, or are consumed by a function or operator that returns a primitive type.\n    /// </para>Assignment to the Value property is necessary because .Net does not support overloading the assignment operator.\n    /// </summary>\n    public ref class MPEXPR_NAME abstract : public IComparable, IComparable<MPEXPR_NAME^>, IEquatable<MPEXPR_NAME^>\n    {\n        internal:\n            MPEXPR_NAME() { }\n            virtual void AssignTo(MP(ptr) destination) abstract;\n            virtual void ASSIGN_TO(EvaluationContext& context)\n            {\n                context.Initialized(RationalInitialized);\n                auto ptr = &context.Temp[context.Index].MPTYPE_NAME;\n                CTXT(context.Index++) = ptr;\n                MP(init)(ptr);\n                AssignTo(ptr); \n            }\n\n        private:\n            int CompareTo(Object^ a, bool& valid);\n\n        public:\n            #pragma region Arithmetic\n\n            /// <summary>Adds two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to add to</param>\n            /// <param name=\"b\">Source value to add</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator + (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\n\n            /// <summary>Adds two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to add to</param>\n            /// <param name=\"b\">Source value to add</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator + (MPEXPR_NAME^ a, mpir_ui b);\n\n            /// <summary>Adds two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to add to</param>\n            /// <param name=\"b\">Source value to add</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator + (mpir_ui a, MPEXPR_NAME^ b);\n\n            /// <summary>Adds two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to add to</param>\n            /// <param name=\"b\">Source value to add</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator + (MPEXPR_NAME^ a, mpir_si b);\n\n            /// <summary>Adds two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to add to</param>\n            /// <param name=\"b\">Source value to add</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator + (mpir_si a, MPEXPR_NAME^ b);\n\n            /// <summary>Adds two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to add to</param>\n            /// <param name=\"b\">Source value to add</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator + (MPEXPR_NAME^ a, IntegerExpression^ b);\n\n            /// <summary>Adds two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to add to</param>\n            /// <param name=\"b\">Source value to add</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator + (IntegerExpression^ a, MPEXPR_NAME^ b);\n                                                                                                          \n            /// <summary>Subtracts two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to subtract from</param>\n            /// <param name=\"b\">Source value to subtract</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator - (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\n                                                                                              \n            /// <summary>Subtracts two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to subtract from</param>\n            /// <param name=\"b\">Source value to subtract</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator - (MPEXPR_NAME^ a, mpir_ui b);\n                                                                                              \n            /// <summary>Subtracts two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to subtract from</param>\n            /// <param name=\"b\">Source value to subtract</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator - (mpir_ui a, MPEXPR_NAME^ b);\n                                                                                              \n            /// <summary>Subtracts two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to subtract from</param>\n            /// <param name=\"b\">Source value to subtract</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator - (MPEXPR_NAME^ a, mpir_si b);\n                                                                                              \n            /// <summary>Subtracts two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to subtract from</param>\n            /// <param name=\"b\">Source value to subtract</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator - (mpir_si a, MPEXPR_NAME^ b);\n                                                                                              \n            /// <summary>Subtracts two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to subtract from</param>\n            /// <param name=\"b\">Source value to subtract</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator - (MPEXPR_NAME^ a, IntegerExpression^ b);\n                                                                                              \n            /// <summary>Subtracts two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to subtract from</param>\n            /// <param name=\"b\">Source value to subtract</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator - (IntegerExpression^ a, MPEXPR_NAME^ b);\n                                                                                                          \n            /// <summary>Multiplies two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to multiply</param>\n            /// <param name=\"b\">Source value to multiply by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator * (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\n                                                                                              \n            /// <summary>Multiplies two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to multiply</param>\n            /// <param name=\"b\">Source value to multiply by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator * (MPEXPR_NAME^ a, mpir_ui b);\n                                                                                              \n            /// <summary>Multiplies two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to multiply</param>\n            /// <param name=\"b\">Source value to multiply by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator * (mpir_ui a, MPEXPR_NAME^ b);\n                                                                                              \n            /// <summary>Multiplies two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to multiply</param>\n            /// <param name=\"b\">Source value to multiply by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator * (MPEXPR_NAME^ a, mpir_si b);\n                                                                                              \n            /// <summary>Multiplies two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to multiply</param>\n            /// <param name=\"b\">Source value to multiply by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator * (mpir_si a, MPEXPR_NAME^ b);\n                                                                                              \n            /// <summary>Multiplies two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to multiply</param>\n            /// <param name=\"b\">Source value to multiply by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator * (MPEXPR_NAME^ a, IntegerExpression^ b);\n                                                                                              \n            /// <summary>Multiplies two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to multiply</param>\n            /// <param name=\"b\">Source value to multiply by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator * (IntegerExpression^ a, MPEXPR_NAME^ b);\n                                                                                                          \n            /// <summary>Shifts the <paramref name=\"a\"/> source operand to the left by <paramref name=\"bits\"/>, i.e. multiplies <paramref name=\"a\"/> by 2^<paramref name=\"bits\"/>.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to multiply</param>\n            /// <param name=\"bits\">Number of bits to shift <paramref name=\"a\"/> by, i.e. power of 2 to multiply <paramref name=\"a\"/> by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator << (MPEXPR_NAME^ a, mp_bitcnt_t bits);\n                                                                                                          \n            /// <summary>Shifts the <paramref name=\"a\"/> source operand to the right by <paramref name=\"bits\"/>, i.e. divides <paramref name=\"a\"/> by 2^<paramref name=\"bits\"/>.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to divide</param>\n            /// <param name=\"bits\">Number of bits to shift <paramref name=\"a\"/> by, i.e. power of 2 to divide <paramref name=\"a\"/> by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\n            /// </returns>\n            static MPEXPR_NAME^ operator >> (MPEXPR_NAME^ a, mp_bitcnt_t bits);\n\n            /// <summary>Negates the source value.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to negate</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator - (MPEXPR_NAME^ a);\n                                                                                                          \n            /// <summary>Divides two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to divide</param>\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\n            /// </returns>\n            static MPEXPR_NAME^ operator / (MPEXPR_NAME^ a, MPEXPR_NAME^ b);\n                                                                                              \n            /// <summary>Divides two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to divide</param>\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\n            /// </returns>\n            static MPEXPR_NAME^ operator / (MPEXPR_NAME^ a, mpir_ui b);\n                                                                                                          \n            /// <summary>Divides two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to divide</param>\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\n            /// </returns>\n            static MPEXPR_NAME^ operator / (mpir_ui a, MPEXPR_NAME^ b);\n                                                                                              \n            /// <summary>Divides two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to divide</param>\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\n            /// </returns>\n            static MPEXPR_NAME^ operator / (MPEXPR_NAME^ a, mpir_si b);\n                                                                                                          \n            /// <summary>Divides two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to divide</param>\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\n            /// </returns>\n            static MPEXPR_NAME^ operator / (mpir_si a, MPEXPR_NAME^ b);\n                                                                                              \n            /// <summary>Divides two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to divide</param>\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\n            /// </returns>\n            static MPEXPR_NAME^ operator / (MPEXPR_NAME^ a, IntegerExpression^ b);\n                                                                                                          \n            /// <summary>Divides two numbers.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to divide</param>\n            /// <param name=\"b\">Source value to divide <paramref name=\"a\"/> by</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation.\n            /// </returns>\n            static MPEXPR_NAME^ operator / (IntegerExpression^ a, MPEXPR_NAME^ b);\n                                                                                                          \n            /// <summary>Raises the source value to the specified power.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to multiply</param>\n            /// <param name=\"power\">Power to raise <paramref name=\"a\"/> to</param>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            static MPEXPR_NAME^ operator ^ (MPEXPR_NAME^ a, mpir_ui power);\n\n            /// <summary>Computes the absolute value of the source number.\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para></summary>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            MPEXPR_NAME^ Abs();\n\n            /// <summary>Inverts the number (1/source).\n            /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n            /// </para>If the new denominator is zero, a division by zero is thrown.</summary>\n            /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, computes the requested operation</returns>\n            MPEXPR_NAME^ Invert();\n\n            #pragma endregion\n\n            #pragma region Comparisons\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para>Both this method and Equals() allow the argument to be an IntegerExpression, however we do not define mixed equality operators,\n            /// because otherwise testing for a null/non-null expression would require an awkward explicit cast on the null.\n            /// <para>Although this only applies to equality operators, while comparison operators could have possibly worked, we're leaving out all mixed operators for now.\n            /// </para>Since comparison via CompareTo() or Equals() is possible between ints and rationals, operators would just be another way to do the same thing.</summary>\n            /// <param name=\"a\">Value to compare the source with.  This can be an integer or rational multi-precision number or expression, or a supported primitive type (long, ulong, or double).</param>\n            /// <returns>A positive number if the source is greater than <paramref name=\"a\"/>, negative if less, and zero if they are equal.</returns>\n            virtual int CompareTo(Object^ a) sealed;\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Value to compare the source with</param>\n            /// <returns>A positive number if the source is greater than <paramref name=\"a\"/>, negative if less, and zero if they are equal.</returns>\n            virtual int CompareTo(MPEXPR_NAME^ a) sealed;\n\n            /// <summary>Compares two numbers.\n            /// <para>If the source number is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"numerator\">Numerator of the number to compare the source with</param>\n            /// <param name=\"denominator\">Denominator of the number to compare the source with</param>\n            /// <returns>A positive number if the source is greater than <paramref name=\"numerator\"/> / <paramref name=\"denominator\"/>, negative if less, and zero if they are equal.</returns>\n            int CompareTo(mpir_si numerator, mpir_ui denominator);\n\n            /// <summary>Compares two numbers.\n            /// <para>If the source number is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"numerator\">Numerator of the number to compare the source with</param>\n            /// <param name=\"denominator\">Denominator of the number to compare the source with</param>\n            /// <returns>A positive number if the source is greater than <paramref name=\"numerator\"/> / <paramref name=\"denominator\"/>, negative if less, and zero if they are equal.</returns>\n            int CompareTo(mpir_ui numerator, mpir_ui denominator);\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Value to compare the source with</param>\n            /// <returns>true if the values of the source and <paramref name=\"a\"/> are equal, false otherwise.</returns>\n            virtual bool Equals(MPEXPR_NAME^ a) sealed;\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para>Both this method and CompareTo() allow the argument to be an IntegerExpression, however we do not define mixed equality operators,\n            /// because otherwise testing for a null/non-null expression would require an awkward explicit cast on the null.\n            /// <para>Although this only applies to equality operators, while comparison operators could have possibly worked, we're leaving out all mixed operators for now.\n            /// </para>Since comparison via CompareTo() or Equals() is possible between ints and rationals, operators would just be another way to do the same thing.</summary>\n            /// <param name=\"a\">Value to compare the source with.  This can be an integer or rational multi-precision number or expression, or a supported primitive type (long, ulong, or double).</param>\n            /// <returns>true if the values of the source and <paramref name=\"a\"/> are equal, false otherwise.</returns>\n            virtual bool Equals(Object^ a) override sealed;\n\n            /// <summary>Compares two numbers.\n            /// <para>If the source number is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"numerator\">Numerator of the number to compare the source with</param>\n            /// <param name=\"denominator\">Denominator of the number to compare the source with</param>\n            /// <returns>true if the values of the source and <paramref name=\"numerator\"/> / <paramref name=\"denominator\"/> are equal, false otherwise.</returns>\n            bool Equals(mpir_si numerator, mpir_ui denominator);\n\n            /// <summary>Compares two numbers.\n            /// <para>If the source number is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"numerator\">Numerator of the number to compare the source with</param>\n            /// <param name=\"denominator\">Denominator of the number to compare the source with</param>\n            /// <returns>true if the values of the source and <paramref name=\"numerator\"/> / <paramref name=\"denominator\"/> are equal, false otherwise.</returns>\n            bool Equals(mpir_ui numerator, mpir_ui denominator);\n\n            /// <summary>Computes the hash code of the source value.\n            /// <para>If called on an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para>Multi-precision classes are mutable with value semantics.  The hash code is based on the value, and will change if the value changes.\n            /// For this reason, the value of an object must not be modified while the object is contained in a hash table.</summary>\n            /// <returns>a signed integer hash code for the value.</returns>\n            virtual int GetHashCode() override sealed;\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <  (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) ? !IS_NULL(b) : a->CompareTo(b) < 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >= (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) ?  IS_NULL(b) : a->CompareTo(b) >= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator == (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) ?  IS_NULL(b) : a->Equals(b); }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator != (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) ? !IS_NULL(b) : !a->Equals(b); }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >  (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <= (MPEXPR_NAME^ a, MPEXPR_NAME^ b) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <  (MPEXPR_NAME^ a, mpir_ui b) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >= (MPEXPR_NAME^ a, mpir_ui b) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >  (MPEXPR_NAME^ a, mpir_ui b) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <= (MPEXPR_NAME^ a, mpir_ui b) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator != (MPEXPR_NAME^ a, mpir_ui b) { return  IS_NULL(a) || !a->Equals(b); }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator == (MPEXPR_NAME^ a, mpir_ui b) { return !IS_NULL(a) && a->Equals(b); }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <  (mpir_ui b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >= (mpir_ui b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >  (mpir_ui b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <= (mpir_ui b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator != (mpir_ui b, MPEXPR_NAME^ a) { return  IS_NULL(a) || !a->Equals(b); }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator == (mpir_ui b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->Equals(b); }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <  (MPEXPR_NAME^ a, mpir_si b) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >= (MPEXPR_NAME^ a, mpir_si b) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >  (MPEXPR_NAME^ a, mpir_si b) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <= (MPEXPR_NAME^ a, mpir_si b) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator != (MPEXPR_NAME^ a, mpir_si b) { return  IS_NULL(a) || !a->Equals(b); }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator == (MPEXPR_NAME^ a, mpir_si b) { return !IS_NULL(a) && a->Equals(b); }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <  (mpir_si b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >= (mpir_si b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >  (mpir_si b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <= (mpir_si b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator != (mpir_si b, MPEXPR_NAME^ a) { return  IS_NULL(a) || !a->Equals(b); }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator == (mpir_si b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->Equals(b); }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <  (MPEXPR_NAME^ a, double b) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >= (MPEXPR_NAME^ a, double b) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >  (MPEXPR_NAME^ a, double b) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <= (MPEXPR_NAME^ a, double b) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator != (MPEXPR_NAME^ a, double b) { return  IS_NULL(a) || !a->Equals(b); }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator == (MPEXPR_NAME^ a, double b) { return !IS_NULL(a) && a->Equals(b); }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <  (double b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) > 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >= (double b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) <= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator >  (double b, MPEXPR_NAME^ a) { return  IS_NULL(a) || a->CompareTo(b) < 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator <= (double b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->CompareTo(b) >= 0; }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator != (double b, MPEXPR_NAME^ a) { return  IS_NULL(a) || !a->Equals(b); }\n\n            /// <summary>Compares two numbers.\n            /// <para>If any argument is an expression, it is evaluated into a temporary variable before the comparison is performed.\n            /// </para></summary>\n            /// <param name=\"a\">Source value to compare</param>\n            /// <param name=\"b\">Source value to compare with</param>\n            /// <returns>A boolean result of the comparison.</returns>\n            static bool operator == (double b, MPEXPR_NAME^ a) { return !IS_NULL(a) && a->Equals(b); }\n\n            /// <summary>Calculates the sign (+1, 0, or -1) of the source value.\n            /// <para>If the source is an expression, it is evaluated into a temporary variable before the sign is computed.\n            /// </para></summary>\n            /// <returns>+1 if the source is positive, -1 if negative, and 0 if zero.</returns>\n            int Sign() { IN_CONTEXT(this); return MP(sgn)(CTXT(0)); }\n\n            #pragma endregion\n    };\n\n    #pragma endregion\n\n    #pragma region concrete expressions\n\n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Add, Rat)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Add, Rat, Ui)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Add, Rat, Si)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Add, Rat, IExpr)\n                                                   \n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Subtract, Rat)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Subtract, Rat, Ui)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_LEFT    (MPEXPR_NAME, Subtract, Ui, Rat)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Subtract, Rat, Si)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_LEFT    (MPEXPR_NAME, Subtract, Si, Rat)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Subtract, Rat, IExpr)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_LEFT    (MPEXPR_NAME, Subtract, IExpr, Rat)\n                                                   \n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Multiply, Rat)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Multiply, Rat, Ui)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Multiply, Rat, Si)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Multiply, Rat, IExpr)\n                                                   \n    DEFINE_BINARY_EXPRESSION_WITH_TWO              (MPEXPR_NAME, Divide, Rat)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Divide, Rat, Ui)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_LEFT    (MPEXPR_NAME, Divide, Ui, Rat)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Divide, Rat, Si)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_LEFT    (MPEXPR_NAME, Divide, Si, Rat)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Divide, Rat, IExpr)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_LEFT    (MPEXPR_NAME, Divide, IExpr, Rat)\n                                                   \n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, ShiftLeft, Rat, Bits)\n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, ShiftRight, Rat, Bits)\n                                                   \n    DEFINE_BINARY_EXPRESSION_WITH_BUILT_IN_RIGHT   (MPEXPR_NAME, Power, Rat, Ui)\n\n    DEFINE_UNARY_EXPRESSION_WITH_ONE               (MPEXPR_NAME, Negate, Rat)\n    DEFINE_UNARY_EXPRESSION_WITH_ONE               (MPEXPR_NAME, Abs, Rat)\n    DEFINE_UNARY_EXPRESSION_WITH_ONE               (MPEXPR_NAME, Invert, Rat)\n                                                   \n    #pragma endregion\n\n    #pragma region HugeRational class\n\n    /// <summary>\n    /// Multi-precision Rational class.\n    /// </summary>\n    public ref class MPTYPE : MPEXPR_NAME\n    {\n        internal:\n            //fields\n            MP(ptr) _value;\n\n        private:\n            //construction\n            void AllocateStruct()\n            {\n                _value = (MP(ptr))((*__gmp_allocate_func)(sizeof(MPSTRUCT)));\n            }\n            void FromString(String^ value, int base);\n            MPTYPE(bool initialize);\n            String^ ToString(int base, bool lowercase, int maxDigits);\n\n        internal:\n            virtual void DeallocateStruct()\n            {\n                MP(clear)(_value);\n                (*__gmp_free_func)(_value, sizeof(MPSTRUCT));\n                _value = nullptr;\n            }\n\n            //assignment\n            virtual void AssignTo(MP(ptr) destination) override\n            {\n                if(destination != _value)\n                    MP(set)(destination, _value);\n            }\n            virtual void ASSIGN_TO(EvaluationContext& context) override\n            {\n                CTXT(context.Index++) = _value;\n            }\n\n        public:\n            #pragma region construction and disposal\n\n            /// <summary>\n            /// Initializes a new rational instance and sets its value to 0/1\n            /// </summary>\n            MPTYPE();\n\n            /// <summary>\n            /// Initializes a new rational instance, allocating enough memory to hold at least <paramref name=\"numeratorBits\"/> + <paramref name=\"denominatorBits\"/> bits, and sets its value to 0/1.\n            /// <para>This is only the initial space, rational will grow automatically in the normal way, if necessary, for subsequent values stored.\n            /// </para>This makes it possible to avoid repeated reallocations if a maximum size is known in advance.\n            /// </summary>\n            /// <param name=\"numeratorBits\">Minimum number of bits the initially allocated memory should hold for the numerator</param>\n            /// <param name=\"denominatorBits\">Minimum number of bits the initially allocated memory should hold for the denominator</param>\n            /// <returns>the newly constructed instance</returns>\n            static MPTYPE^ Allocate(mp_bitcnt_t numeratorBits, mp_bitcnt_t denominatorBits);\n\n            /// <summary>\n            /// Initializes a new rational instance and sets its value from the specified string, using leading characters to recognize the base:\n            /// 0x and 0X for hexadecimal, 0b and 0B for binary, 0 for octal, or decimal otherwise.\n            /// </summary>\n            /// <param name=\"value\">string representing the initial value for the new instance.  Whitespace in the string is ignored.</param>\n            MPTYPE(String^ value) { FromString(value, 0); }\n\n            /// <summary>\n            /// Initializes a new rational instance and sets its value from the specified string\n            /// </summary>\n            /// <param name=\"value\">string representing the initial value for the new instance.  Whitespace in the string is ignored.</param>\n            /// <param name=\"base\">base the <paramref name=\"value\"/> string is in.\n            /// <para>The base may vary from 2 to 62, or if base is 0, then the leading characters are used: 0x and 0X for hexadecimal, 0b and 0B for binary, 0 for octal, or decimal otherwise.\n            /// </para>For bases up to 36, case is ignored; upper-case and lower-case letters have the same value. \n            /// For bases 37 to 62, upper-case letter represent the usual 10..35 while lower-case letter represent 36..61.</param>\n            MPTYPE(String^ value, int base) { FromString(value, base); }\n\n            /// <summary>\n            /// Initializes a new rational instance and sets its value to the result of computing the source expression.\n            /// </summary>\n            /// <param name=\"value\">the expression that will be computed, and the result set as the initial value of the new instance.</param>\n            MPTYPE(MPEXPR_NAME^ value);\n\n            /// <summary>\n            /// Initializes a new rational instance and sets its value to the result of computing the source expression.\n            /// </summary>\n            /// <param name=\"value\">the expression that will be computed, and the result set as the initial value of the new instance.</param>\n            MPTYPE(IntegerExpression^ value);\n\n            /// <summary>\n            /// Initializes a new rational instance and sets its value to the result of computing the source expression.\n            /// </summary>\n            /// <param name=\"value\">the expression that will be computed, and the result set as the initial value of the new instance.</param>\n            MPTYPE(FloatExpression^ value);\n\n            /// <summary>\n            /// Constructs and returns a new rational instance with its value set to <paramref name=\"numerator\"/> / <paramref name=\"denominator\"/>.\n            /// <para>If the fraction is not in canonical form, Canonicalize() must be called.</para>\n            /// </summary>\n            /// <param name=\"numerator\">Numerator for the initial value for the new rational instance</param>\n            /// <param name=\"denominator\">Denominator for the initial value for the new rational instance</param>\n            MPTYPE(IntegerExpression^ numerator, IntegerExpression^ denominator);\n\n            /// <summary>\n            /// Constructs and returns a new rational instance with its value set to <paramref name=\"numerator\"/> / <paramref name=\"denominator\"/>.\n            /// <para>If the fraction is not in canonical form, Canonicalize() must be called.</para>\n            /// </summary>\n            /// <param name=\"numerator\">Numerator for the initial value for the new rational instance</param>\n            /// <param name=\"denominator\">Denominator for the initial value for the new rational instance</param>\n            MPTYPE(mpir_si numerator, mpir_ui denominator);\n\n            /// <summary>\n            /// Constructs and returns a new rational instance with its value set to <paramref name=\"numerator\"/> / <paramref name=\"denominator\"/>.\n            /// <para>If the fraction is not in canonical form, Canonicalize() must be called.</para>\n            /// </summary>\n            /// <param name=\"numerator\">Numerator for the initial value for the new rational instance</param>\n            /// <param name=\"denominator\">Denominator for the initial value for the new rational instance</param>\n            MPTYPE(mpir_ui numerator, mpir_ui denominator);\n\n            /// <summary>\n            /// Constructs and returns a new rational instance with its value set to the <paramref name=\"value\"/> parameter.\n            /// <para>There is no rounding, this conversion is exact.</para>\n            /// </summary>\n            /// <param name=\"value\">Initial value for the new rational instance.  This is an exact conversion.</param>\n            MPTYPE(double value);\n\n            /// <summary>\n            /// Removes any factors that are common to the numerator and denominator, and makes the denominator positive.\n            /// <para>Because this operation is expensive for large numbers, it must be called manually only when needed.\n            /// </para>Constructors do not automatically canonicalize the new instance.\n            /// <para>Changing the numerator or denominator directly may, obviously, violate canonical form.\n            /// </para>Normal rational operations assume canonical form of all operands and guarantee it for the result.\n            /// </summary>\n            void Canonicalize() { MP(canonicalize)(_value); }\n\n            //disposal\n\n            //creating a destructor in C++ implements IDisposable.\n\n            /// <summary>\n            /// Frees all memory allocated by the instance.\n            /// <para>To minimize memory footprint, multi-precision objects should be disposed of when no longer used, instead of relying on the garbage collector to free the memory.\n            /// </para></summary>\n            ~MPTYPE() { this->!MPTYPE(); }\n\n            /// <summary>\n            /// Frees all memory allocated by the instance.\n            /// <para>To minimize memory footprint, multi-precision objects should be disposed of when no longer used, instead of relying on the garbage collector to free the memory.\n            /// </para></summary>\n            !MPTYPE() { if(_value != 0) DeallocateStruct(); }\n\n            #pragma endregion\n\n            #pragma region conversions\n\n            /// <summary>\n            /// Converts the number to a string.\n            /// <para>To avoid debugging performance problems, this method outputs at most the number of digits specified in MpirSettings.ToStringDigits.\n            /// If the number is larger, the least significant digits are shown with a leading ellipsis (i.e., [-]...NNNNN)\n            /// </para>Setting MpirSettings.ToStringDigits to 0 removes the upper limit.\n            /// </summary>\n            /// <returns>A string representation of the number in decimal, possibly cut off if the number has more digits than MpirSettings.ToStringDigits.</returns>\n            virtual String^ ToString() override { return ToString(10, false, MpirSettings::ToStringDigits); }\n\n            /// <summary>\n            /// Converts the number to a string in the specified base.\n            /// <para>This method always produces the complete output regardless of the MpirSettings.ToStringDigits setting.\n            /// </para></summary>\n            /// <param name=\"base\">The base to use for the output.  The base can be from 2 to 62; uppercase letters represent digits 10-35 and lowercase letters represent digits 36-61.</param>\n            /// <returns>A string representation of the number in the specified base.</returns>\n            String^ ToString(int base) { return ToString(base, false, 0); }\n\n            /// <summary>\n            /// Converts the number to a string in the specified base.\n            /// <para>This method always produces the complete output regardless of the MpirSettings.ToStringDigits setting.\n            /// </para></summary>\n            /// <param name=\"base\">The base to use for the output.\n            /// <para>The base can be from 2 to 62; Bases up to 36 use uppercase or lowercase letters based on the <paramref name=\"lowercase\"/> argument.\n            /// </para>For bases larger than 36, the <paramref name=\"lowercase\"/> argument is ignored and uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</param>\n            /// <param name=\"lowercase\">Indicates if lowercase or uppercase letters should be used for the output.\n            /// <para>This argument is ignored for bases larger than 36, where both uppercase and lowercase letters are used.</para></param>\n            /// <returns>A string representation of the number in the specified base.</returns>\n            String^ ToString(int base, bool lowercase) { return ToString(base, lowercase, 0); }\n\n            /// <summary>\n            /// Returns the value of the number as a double, truncating if necessary (rounding towards zero).\n            /// <para>If the exponent from the conversion is too big, the result is system dependent. An infinity is returned where available. \n            /// A hardware overflow trap may or may not occur.\n            /// </para></summary>\n            /// <returns>The value as a double, possibly truncated.</returns>\n            double ToDouble() { return MP(get_d)(_value); }\n\n            /// <summary>\n            /// Gets the numerator of this rational.\n            /// <para>The numerator can be used as a regular integer for any read or write operations.  It does not need to be disposed of.\n            /// </para>If setting the Value of the numerator is not known to maintain canonical form for the rational, Canonicalize() must be called.\n            /// <para>Do not set the Value of the numerator while the rational object is contained in a hash table, because that changes its hash code.\n            /// </para></summary>\n            property HugeInt^ Numerator\n            {\n                HugeInt^ get() { return gcnew HugeIntComponent(MP(numref)(_value)); }\n            };\n\n            /// <summary>\n            /// Gets the denominator of this rational.\n            /// <para>The denominator can be used as a regular integer for any read or write operations.  It does not need to be disposed of.\n            /// </para>If setting the Value of the denominator is not known to maintain canonical form for the rational, Canonicalize() must be called.\n            /// <para>Do not set the Value of the denominator while the rational object is contained in a hash table, because that changes its hash code.\n            /// </para></summary>\n            property HugeInt^ Denominator\n            {\n                HugeInt^ get() { return gcnew HugeIntComponent(MP(denref)(_value)); }\n            };\n\n            #pragma endregion\n\n            #pragma region assignment\n\n            /// <summary>\n            /// When getting, returns this rational.\n            /// <para>When setting, sets the value of the rational object to the value resulting from computing the supplied expression.\n            /// </para>The getter is a no-op and never needs to be invoked directly, but makes compound operators such as +=, *=, etc. possible.\n            /// <para>Do not set the Value of an object while it is contained in a hash table, because that changes its hash code.\n            /// </para></summary>\n            /// <remarks>\n            /// MPIR types are implemented as reference types with value semantics.\n            /// Like Strings, the objects themselves are just lightweight pointers to data allocated elsewhere.\n            /// Unlike Strings, MPIR types are mutable.\n            /// <para>Value semantics requires you to be able to code, a = b + c.\n            /// However, .Net (outside of C++) does not allow overloading the assignment operator,\n            /// and assigning references would necessitate some unnecessary duplication and extra memory allocations.\n            /// </para>To solve this problem, MPIR.Net uses the property assignment.  \n            /// The setter of the Value property does what an overloaded assignment operator would do in C++.\n            /// The syntax is a little different: a.Value = b + c, but it is fluent enough to become a quick habit,\n            /// and additionally reinforces the concept that an existing object can change its value while reusing internally allocated memory.\n            /// <para>To this end, all overloaded operators and most functions that operate on MPIR types,\n            /// instead of eagerly computing a result, produce and return an expression that is basically a formula for the computation.\n            /// Expressions can then be composed using additional operators to achieve expression trees of arbitrary complexity.\n            /// All computations are deferred until an expression is assigned to the Value property of an MPIR object,\n            /// consumed by a method or operator that returns a primitive type,\n            /// or supplied as an argument to an MPIR type constructor.\n            /// </para>The getter is a no-op defined to make possible constructs such as a.Value += 5, a.Value *= 10, etc.\n            /// <para>Direct assignments such as a = b + c, a *= 10 will not compile because there is no implicit conversion from an expression.\n            /// Even if an implicit conversion were defined, such code would incur an extra allocation plus garbage collection,\n            /// and would not perform as well as doing the same operations on a.Value.\n            /// </para>It would also not compile if the source were a \"using\" variable, as all method-local rationals should be.\n            /// </remarks>\n            property MPEXPR_NAME^ Value\n            {\n                void set(MPEXPR_NAME^ expr) { expr->AssignTo(_value); }\n                MPEXPR_NAME^ get() { return this; }\n            }\n\n            /// <summary>\n            /// Sets the value of the rational object.\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\n            /// </para>If the fraction is not in canonical form, Canonicalize() must be called.\n            /// </summary>\n            /// <param name=\"numerator\">numerator for the new value for the object</param>\n            /// <param name=\"denominator\">denominator for the new value for the object</param>\n            void SetTo(mpir_ui numerator, mpir_ui denominator) { MP(set_ui)(_value, numerator, denominator); }\n\n            /// <summary>\n            /// Sets the value of the rational object.\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\n            /// </para></summary>\n            /// <param name=\"value\">rational value for the new value for the object</param>\n            void SetTo(mpir_ui value) { SetTo(value, 1); }\n\n            /// <summary>\n            /// Sets the value of the rational object.\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\n            /// </para>If the fraction is not in canonical form, Canonicalize() must be called.\n            /// </summary>\n            /// <param name=\"numerator\">numerator for the new value for the object</param>\n            /// <param name=\"denominator\">denominator for the new value for the object</param>\n            void SetTo(mpir_si numerator, mpir_ui denominator) { MP(set_si)(_value, numerator, denominator); }\n\n            /// <summary>\n            /// Sets the value of the rational object.\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\n            /// </para></summary>\n            /// <param name=\"value\">rational value for the new value for the object</param>\n            void SetTo(mpir_si value) { SetTo(value, 1); }\n\n            /// <summary>\n            /// Sets the value of the rational object.  This is an exact conversion, there is no rounting.\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\n            /// </para></summary>\n            /// <param name=\"value\">new value for the object</param>\n            void SetTo(double value) { MP(set_d)(_value, value); }\n\n            /// <summary>\n            /// Sets the value of the rational object.\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\n            /// </para>If the fraction is not in canonical form, Canonicalize() must be called.\n            /// </summary>\n            /// <param name=\"value\">new value for the object.\n            /// <para>May be an rational or a pair of rationals separated by a slash.\n            /// </para>The string's leading characters may indicate base:\n            /// 0x and 0X for hexadecimal, 0b and 0B for binary, 0 for octal, or decimal otherwise</param>\n            void SetTo(String^ value) { SetTo(value, 0); }\n\n            /// <summary>\n            /// Sets the value of the rational object.\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\n            /// </para>If the fraction is not in canonical form, Canonicalize() must be called.\n            /// </summary>\n            /// <param name=\"value\">new value for the object</param>\n            /// <param name=\"base\">base the <paramref name=\"value\"/> string is in.\n            /// <para>The base may vary from 2 to 62, or if base is 0, then the leading characters are used: 0x and 0X for hexadecimal, 0b and 0B for binary, 0 for octal, or decimal otherwise.\n            /// </para>For bases up to 36, case is ignored; upper-case and lower-case letters have the same value. \n            /// For bases 37 to 62, upper-case letter represent the usual 10..35 while lower-case letter represent 36..61.</param>\n            void SetTo(String^ value, int base);\n\n            /// <summary>\n            /// Sets the value of the rational object.\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\n            /// </para></summary>\n            /// <param name=\"value\">new value for the object</param>\n            void SetTo(IntegerExpression^ value)\n            {\n                value->AssignTo(&_value->_mp_num);\n                mpz_set_ui(&_value->_mp_den, 1);\n            }\n\n            /// <summary>\n            /// Sets the value of the raitonal object.\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\n            /// </para>If the fraction is not in canonical form, Canonicalize() must be called.\n            /// </summary>\n            /// <param name=\"numerator\">Numerator for the new value for the object</param>\n            /// <param name=\"denominator\">Denominator for the new value for the object</param>\n            void SetTo(IntegerExpression^ numerator, IntegerExpression^ denominator)\n            {\n                //use context in case source expressions reference the previous numerator or denominator of the rational\n                EvaluationContext context;\n                numerator->AssignToInteger(context);\n                denominator->AssignToInteger(context);\n                mpz_set(&_value->_mp_num, CTXTI(0));\n                mpz_set(&_value->_mp_den, CTXTI(1));\n            }\n\n            /// <summary>\n            /// Sets the value of the rational object.  There is no rounding, this conversion is exact.\n            /// <para>Do not change the value of an object while it is contained in a hash table, because that changes its hash code.\n            /// </para>If the argument is an expression, it is evaluated with the current default float precision.</summary>\n            /// <param name=\"value\">new value for the object</param>\n            void SetTo(FloatExpression^ value);\n\n            /// <summary>\n            /// Swaps the values of two rationals.\n            /// <para>This operation is a pointer swap and doesn't affect allocated memory.\n            /// </para>Do not call this method while either object is contained in a hash table, because this would change their hash codes.\n            /// </summary>\n            /// <param name=\"a\">Source number to swap this instance's value with</param>\n            void Swap(MPTYPE^ a) \n            { \n                MP(swap)(_value, a->_value);\n            }\n\n            #pragma endregion\n\n            #pragma region Size checks\n\n            /// <summary>\n            /// Returns the number of digits the number would take if written in the specified base.\n            /// <para>The sign of the number is ignored, just the absolute value is used.\n            /// </para>The result will be either exact or at most 2 characters too big.\n            /// If <paramref name=\"base\"/> is a power of 2, the result will always be exact.\n            /// <para>If the number is 0, the result is always 3.\n            /// </para>This function can be used to estimate the space required when converting to a string.\n            /// The right amount of allocation is normally two more than the value returned,\n            /// one extra for a minus sign and one for the null-terminator.\n            /// <para>A slash between numerator and denominator is accounted for.</para></summary>\n            /// <param name=\"base\">Numeric base for the would-be string conversion, in the range from 2 to 62.</param>\n            /// <returns>The number of digits the number would take written in the specified base, possibly 1 or 2 too big, not counting a leading minus.</returns>\n            mp_size_t ApproximateSizeInBase(int base) { return mpz_sizeinbase(&_value->_mp_num, base) + mpz_sizeinbase(&_value->_mp_den, base) + 1; }\n\n            #pragma endregion\n\n            #pragma region IO\n\n            /// <summary>\n            /// Outputs the rational to the <paramref name=\"stream\"/> in raw binary format.\n            /// <para>The number's numerator and denominator are written in sequence, each in a portable format,\n            /// with 4 bytes of size information, and that many bytes of limbs.\n            /// </para>Both the size and the limbs are written in decreasing significance order (i.e., in big-endian).\n            /// <para>The output can be read with Read(Stream).\n            /// </para>The output cannot be read by MP(inp_raw) from GMP 1, because of changes necessary\n            /// for compatibility between 32-bit and 64-bit machines.\n            /// </summary>\n            /// <param name=\"stream\">Stream to output the number to</param>\n            /// <returns>the number of bytes written, or 0 if an error occurs.</returns>\n            size_t Write(Stream^ stream);\n\n            /// <summary>\n            /// Reads the rational value from the <paramref name=\"stream\"/> in raw binary format, as it would have been written by Write(Stream).\n            /// <para>The number's numerator and denominator are read in sequence, each in a portable format,\n            /// with 4 bytes of size information, and that many bytes of limbs.\n            /// </para>Both the size and the limbs are written in decreasing significance order (i.e., in big-endian).\n            /// <para>This routine can read the output from MP(out_raw) also from GMP 1, in spite of changes\n            /// necessary for compatibility between 32-bit and 64-bit machines.\n            /// </para></summary>\n            /// <param name=\"stream\">Stream to input the number from</param>\n            /// <returns>the number of bytes read, or 0 if an error occurs.</returns>\n            size_t Read(Stream^ stream);\n\n            /// <summary>\n            /// Outputs the rational to the <paramref name=\"writer\"/> as a string of digits in decimal.\n            /// <para>When writing multiple numbers that are to be read back with the Read(TextReader) method,\n            /// it is useful to separate the numbers with a character that is not a valid decimal digit.\n            /// </para>This is because the Read method stops reading when it encounters a character that cannot represent a digit.\n            /// </summary>\n            /// <param name=\"writer\">Text writer to output the number to</param>\n            /// <returns>the number of characters written</returns>\n            size_t Write(TextWriter^ writer) { return Write(writer, 0, false); }\n\n            /// <summary>\n            /// Outputs the rational to the <paramref name=\"writer\"/> as a string of digits in base <paramref name=\"base\"/>.\n            /// <para>When writing multiple numbers that are to be read back with the Read(TextReader) method,\n            /// it is useful to separate the numbers with a character that is not a valid digit in base <paramref name=\"base\"/>.\n            /// </para>This is because the Read method stops reading when it encounters a character that cannot represent a digit.\n            /// <para>For hexadecimal, binary, or octal, no leading base indication is written.\n            /// </para>Therefore, for bases other than 10, use the Read(reader, base) overload rather than Read(reader) to read the number back.\n            /// </summary>\n            /// <param name=\"writer\">Text writer to output the number to</param>\n            /// <param name=\"base\">The base to use for the output.\n            /// <para>The base can be from 2 to 62; uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</para></param>\n            /// <returns>the number of characters written</returns>\n            size_t Write(TextWriter^ writer, int base) { return Write(writer, base, false); }\n\n            /// <summary>\n            /// Outputs the rational to the <paramref name=\"writer\"/> as a string of digits in base <paramref name=\"base\"/>.\n            /// <para>When writing multiple numbers that are to be read back with the Read(TextReader) method,\n            /// it is useful to separate the numbers with a character that is not a valid digit in base <paramref name=\"base\"/>.\n            /// </para>This is because the Read method stops reading when it encounters a character that cannot represent a digit.\n            /// <para>For hexadecimal, binary, or octal, no leading base indication is written.\n            /// </para>Therefore, for bases other than 10, use the Read(reader, base) overload rather than Read(reader) to read the number back.\n            /// </summary>\n            /// <param name=\"writer\">Text writer to output the number to</param>\n            /// <param name=\"base\">The base to use for the output.\n            /// <para>The base can be from 2 to 62; Bases up to 36 use uppercase or lowercase letters based on the <paramref name=\"lowercase\"/> argument.\n            /// </para>For bases larger than 36, the <paramref name=\"lowercase\"/> argument is ignored and uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</param>\n            /// <param name=\"lowercase\">Indicates if lowercase or uppercase letters should be used for the output.\n            /// <para>This argument is ignored for bases larger than 36, where both uppercase and lowercase letters are used.</para></param>\n            /// <returns>the number of characters written</returns>\n            size_t Write(TextWriter^ writer, int base, bool lowercase);\n\n            /// <summary>\n            /// Inputs the number as a possibly white-space preceeded string.\n            /// <para>The base of the number is determined from the leading characters: 0x or 0X for hexadecimal, 0b or 0B for binary, 0 for octal, decimal otherwise.\n            /// </para>Reading terminates at end-of-stream, or up to but not including a character that is not a valid digit.\n            /// <para>This method reads the output of a Write(TextWriter) when decimal base is used.\n            /// </para>For hexadecimal, binary, or octal, because Write(TextWriter) doesn't write leading base indication characters, \n            /// using this overload of Read will fail to recognize the correct base.</summary>\n            /// <param name=\"reader\">Text reader to input the number from</param>\n            /// <returns>the number of characters read</returns>\n            size_t Read(TextReader^ reader) { return Read(reader, 0); }\n\n            /// <summary>\n            /// Inputs the number as a possibly white-space preceeded string in base <paramref name=\"base\"/> from the <paramref name=\"reader\"/>.\n            /// <para>Reading terminates at end-of-stream, or up to but not including a character that is not a valid digit.\n            /// </para>This method reads the output of a Write(TextWriter) method.\n            /// </summary>\n            /// <param name=\"reader\">Text reader to input the number from</param>\n            /// <param name=\"base\">The base to use for the input.\n            /// <para>The base can be from 2 to 62; For bases up to 36 case is ignored.\n            /// </para>For bases larger than 36, uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.\n            /// <para>If 0, the base of the number is determined from the leading characters: 0x or 0X for hexadecimal, 0b or 0B for binary, 0 for octal, decimal otherwise.\n            /// </para>Note that the leading base characters are not written by the Write method.</param>\n            /// <returns>the number of characters read</returns>\n            size_t Read(TextReader^ reader, int base);\n\n            #pragma endregion\n    };\n\n    #pragma endregion\n};\n"
  },
  {
    "path": "mpir.net/mpir.net/Random.cpp",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\n#include \"StdAfx.h\"\n#include \"HugeInt.h\"\n#include \"Random.h\"\n\nnamespace MPIR\n{\n    DEFINE_ASSIGNMENT_PROLOG(Random)\n    {\n        IN_CONTEXT(Right);\n        MP(urandomm)(destination, Left->_value, context.IntArgs[0]);\n    }\n\n    DEFINE_ASSIGNMENT_PROLOG(RandomBits)\n    {\n        MP(urandomb)(destination, Left->_value, Right);\n    }\n\n    DEFINE_ASSIGNMENT_PROLOG(RandomBitsChunky)\n    {\n        MP(rrandomb)(destination, Left->_value, Right);\n    }\n};\n"
  },
  {
    "path": "mpir.net/mpir.net/Random.h",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\n#pragma once\n\nusing namespace System;\n\nnamespace MPIR\n{\n    /// <summary>\n    /// This class encapsulates a random number generator algorithm and state\n    /// </summary>\n    public ref class MpirRandom\n    {\n        internal:\n            //fields\n            gmp_randstate_ptr _value;\n\n        #pragma region Construction and disposal\n\n        private:\n            MpirRandom()\n            {\n                _value = (gmp_randstate_ptr)((*__gmp_allocate_func)(sizeof(__gmp_randstate_struct)));\n            }\n\n        public:\n\n            //creating a destructor in C++ implements IDisposable.\n\n            /// <summary>\n            /// Frees all memory allocated by the instance.\n            /// <para>To minimize memory footprint, MPIR objects should be disposed of when no longer used, instead of relying on the garbage collector to free the memory.\n            /// </para></summary>\n            ~MpirRandom() { this->!MpirRandom(); }\n\n            /// <summary>\n            /// Frees all memory allocated by the instance.\n            /// <para>To minimize memory footprint, MPIR objects should be disposed of when no longer used, instead of relying on the garbage collector to free the memory.\n            /// </para></summary>\n            !MpirRandom()\n            {\n                if(_value != 0) \n                {\n                    gmp_randclear(_value);\n                    (*__gmp_free_func)(_value, sizeof(__gmp_randstate_struct));\n                    _value = nullptr;\n                }\n            }\n\n        #pragma endregion\n\n        #pragma region Initialization factory methods\n\n            /// <summary>\n            /// Creates a new random generator with a copy of the algorithm and state from this instance\n            /// </summary>\n            /// <returns>The newly initialized instance</returns>\n            MpirRandom^ Copy()\n            {\n                auto result = gcnew MpirRandom();\n                gmp_randinit_set(result->_value, _value);\n                return result;\n            }\n\n            /// <summary>\n            /// Creates a new random generator with the default algorithm.\n            /// <para>This will be a compromise between speed and randomness, and is recommended for applications with no special requirements.\n            /// </para>Currently Mersenne Twister is the default algorithm.\n            /// </summary>\n            /// <returns>The newly initialized instance</returns>\n            static MpirRandom^ Default()\n            {\n                auto result = gcnew MpirRandom();\n                gmp_randinit_default(result->_value);\n                return result;\n            }\n            \n            /// <summary>\n            /// Creates a new random generator with the Mersenne Twister algorithm. This algorithm is fast and has good randomness properties.\n            /// </summary>\n            /// <returns>The newly initialized instance</returns>\n            static MpirRandom^ MersenneTwister()\n            {\n                auto result = gcnew MpirRandom();\n                gmp_randinit_mt(result->_value);\n                return result;\n            }\n\n            /// <summary>\n            /// Creates a new random generator with a linear congruential algorithm X = (aX + c) mod 2^m.\n            /// <para>The low bits of X in this algorithm are not very random. The least significant bit will have\n            /// a period no more than 2, and the second bit no more than 4, etc. For this reason only the\n            /// high half of each X is actually used.\n            /// </para>When a random number of more than m/2 bits is to be generated, multiple iterations\n            /// of the recurrence are used and the results concatenated.\n            /// </summary>\n            /// <param name=\"a\">The multiplier in the algorithm's formula X = (aX + c) mod 2^m</param>\n            /// <param name=\"c\">The addend in the algorithm's formula X = (aX + c) mod 2^m</param>\n            /// <param name=\"m\">The power of 2 for the divisor in the algorithm's formula X = (aX + c) mod 2^m</param>\n            /// <returns>The newly initialized instance</returns>\n            static MpirRandom^ LinearCongruential(HugeInt^ a, mpir_ui c, mp_bitcnt_t m)\n            {\n                auto result = gcnew MpirRandom();\n                gmp_randinit_lc_2exp(result->_value, a->_value, c, m);\n                return result;\n            }\n\n            /// <summary>\n            /// Creates a new random generator with a linear congruential algorithm X = (aX + c) mod 2^m.\n            /// <para>a, c and m are selected from a table, chosen so that size bits (or more) of each X will be used, i.e. m/2 >= size.\n            /// </para>The maximum size currently supported is 128.\n            /// </summary>\n            /// <param name=\"size\">The number of high bits that should be used from each iteration of the algorithm.  The current maximum is 128.</param>\n            /// <returns>The newly initialized instance</returns>\n            static MpirRandom^ LinearCongruential(mp_bitcnt_t size)\n            {\n                auto result = gcnew MpirRandom();\n                if(gmp_randinit_lc_2exp_size(result->_value, size) == 0)\n                {\n                    result->!MpirRandom();\n                    throw gcnew ArgumentException(\"Invalid size\", \"size\");\n                }\n                return result;\n            }\n\n        #pragma endregion\n\n        #pragma region Seeding\n\n            /// <summary>\n            /// Sets an initial seed value into the random number generator.\n            /// </summary>\n            /// <param name=\"seed\">The seed value</param>\n            /// <remarks>\n            /// The size of a seed determines how many different sequences of random numbers it's\n            /// possible to generate. The \"quality\" of the seed is the randomness of a given seed compared\n            /// to the previous seed used, and this affects the randomness of separate number sequences. The\n            /// method for choosing a seed is critical if the generated numbers are to be used for important\n            /// applications, such as generating cryptographic keys.\n            /// <para>Traditionally the system time has been used to seed, but care needs to be taken with this.\n            /// If an application seeds often and the resolution of the system clock is low, then the same\n            /// sequence of numbers might be repeated. Also, the system time is quite easy to guess, so if\n            /// unpredictability is required then it should definitely not be the only source for the seed value.\n            /// </para>On some systems there's a special device '/dev/random' which provides random data better\n            /// suited for use as a seed.\n            /// </remarks>\n            void Seed(mpir_ui seed) { gmp_randseed_ui(_value, seed); }\n\n            /// <summary>\n            /// Sets an initial seed value into the random number generator.\n            /// </summary>\n            /// <param name=\"seed\">The seed value</param>\n            /// <remarks>\n            /// The size of a seed determines how many different sequences of random numbers it's\n            /// possible to generate. The \"quality\" of the seed is the randomness of a given seed compared\n            /// to the previous seed used, and this affects the randomness of separate number sequences. The\n            /// method for choosing a seed is critical if the generated numbers are to be used for important\n            /// applications, such as generating cryptographic keys.\n            /// <para>Traditionally the system time has been used to seed, but care needs to be taken with this.\n            /// If an application seeds often and the resolution of the system clock is low, then the same\n            /// sequence of numbers might be repeated. Also, the system time is quite easy to guess, so if\n            /// unpredictability is required then it should definitely not be the only source for the seed value.\n            /// </para>On some systems there's a special device '/dev/random' which provides random data better\n            /// suited for use as a seed.\n            /// </remarks>\n            void Seed(HugeInt^ seed) { gmp_randseed(_value, seed->_value); }\n\n        #pragma endregion\n\n        #pragma region Random Limb\n\n            /// <summary>\n            /// Generates a uniformly distributed random number of <paramref name=\"bitCount\"/> bits, i.e. in the range 0 to 2^<paramref name=\"bitCount\"/>-1 inclusive.\n            /// </summary>\n            /// <param name=\"bitCount\">The number of random bits to generate.  Must be less than or equal to the number of bits in a limb.</param>\n            /// <returns>The newly generated number</returns>\n            mpir_ui GetLimbBits(mpir_ui bitCount) { return gmp_urandomb_ui(_value, bitCount); }\n\n            /// <summary>\n            /// Generates a uniformly distributed random number in the range 0 to <paramref name=\"max\"/>-1 inclusive.\n            /// </summary>\n            /// <param name=\"max\">The exclusive upper bound for the random number</param>\n            /// <returns>The newly generated number</returns>\n            mpir_ui GetLimb(mpir_ui max) { return gmp_urandomm_ui(_value, max); }\n\n        #pragma endregion\n\n        #pragma region Random Int\n\n        /// <summary>\n        /// Generates a uniformly distributed random integer in the range 0 to 2^<paramref name=\"bitCount\"/> - 1, inclusive.\n        /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n        /// </para></summary>\n        /// <param name=\"bitCount\">number of bits to generate</param>\n        /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, generates the random number</returns>\n        IntegerExpression^ GetIntBits(mp_bitcnt_t bitCount) { return gcnew IntegerRandomBitsExpression(this, bitCount); }\n\n        /// <summary>\n        /// Generates a random integer with long strings of zeros and ones in the binary representation.\n        /// <para>Useful for testing functions and algorithms, since this kind of random numbers have proven\n        /// to be more likely to trigger corner-case bugs.\n        /// </para>The random number will be in the range 0 to 2^<paramref name=\"bitCount\"/> - 1, inclusive.\n        /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n        /// </para></summary>\n        /// <param name=\"bitCount\">number of bits to generate</param>\n        /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, generates the random number</returns>\n        IntegerExpression^ GetIntBitsChunky(mp_bitcnt_t bitCount) { return gcnew IntegerRandomBitsChunkyExpression(this, bitCount); }\n\n        /// <summary>\n        /// Generates a uniformly distributed random integer in the range 0 to <paramref name=\"max\"/> - 1, inclusive.\n        /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n        /// </para></summary>\n        /// <param name=\"max\">exclusive upper bound for the number to generate</param>\n        /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, generates the random number</returns>\n        IntegerExpression^ GetInt(IntegerExpression^ max) { return gcnew IntegerRandomExpression(this, max); }\n\n        #pragma endregion\n\n        #pragma region Random Float\n\n        /// <summary>\n        /// Generates a uniformly distributed random float in the range 0 &lt;= n &lt; 1, using the precision of the destination.\n        /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n        /// </para></summary>\n        /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, generates the random number</returns>\n        FloatExpression^ GetFloat();\n\n        /// <summary>\n        /// Generates a uniformly distributed random float in the range 0 &lt;= n &lt; 1 with the specified number of significant bits in the mantissa.\n        /// <para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n        /// </para></summary>\n        /// <param name=\"bitCount\">number of mantissa bits to generate</param>\n        /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, generates the random number</returns>\n        FloatExpression^ GetFloatBits(mp_bitcnt_t bitCount);\n\n        /// <summary>\n        /// Generates a random float with long strings of zeros and ones in the binary representation, using the precision of the destination.\n        /// <para>Useful for testing functions and algorithms, since this kind of random numbers have proven\n        /// to be more likely to trigger corner-case bugs.\n        /// </para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n        /// </summary>\n        /// <param name=\"maxExponent\">The maximum absolute value for the exponent of the generated number (expressed in limbs).  Generated exponent may be positive or negative.</param>\n        /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, generates the random number</returns>\n        FloatExpression^ GetFloatChunky(mp_exp_t maxExponent);\n\n        /// <summary>\n        /// Generates a random float with long strings of zeros and ones in the binary representation, and the specified number of significant limbs in the mantissa.\n        /// <para>Useful for testing functions and algorithms, since this kind of random numbers have proven\n        /// to be more likely to trigger corner-case bugs.\n        /// </para>As with all expressions, the result is not computed until the expression is assigned to the Value property or consumed by a method.\n        /// </summary>\n        /// <param name=\"limbCount\">number of mantissa limbs to generate.\n        /// <para>The sign of this parameter determines the sign of the generated mantissa.</para></param>\n        /// <param name=\"maxExponent\">The maximum absolute value for the exponent of the generated number (expressed in limbs).  Generated exponent may be positive or negative.</param>\n        /// <returns>An expression object that, when assigned to the Value property or consumed by a primitive-returning method, generates the random number</returns>\n        FloatExpression^ GetFloatLimbsChunky(mp_size_t limbCount, mp_exp_t maxExponent);\n\n        #pragma endregion\n    };\n};\n"
  },
  {
    "path": "mpir.net/mpir.net/RandomFloat.cpp",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\n#include \"StdAfx.h\"\n#include \"HugeInt.h\"\n#include \"Random.h\"\n#include \"HugeRational.h\"\n#include \"HugeFloat.h\"\n\nnamespace MPIR\n{\n    MPEXPR_NAME^ MpirRandom::GetFloat() { return gcnew FloatRandomExpression(this); }\n    MPEXPR_NAME^ MpirRandom::GetFloatBits(mp_bitcnt_t bitCount) { return gcnew FloatRandomBitsExpression(this, bitCount); }\n    MPEXPR_NAME^ MpirRandom::GetFloatChunky(mp_exp_t maxExponent) { return gcnew FloatRandomChunkyExpression(this, maxExponent); }\n    MPEXPR_NAME^ MpirRandom::GetFloatLimbsChunky(mp_size_t limbCount, mp_exp_t maxExponent) { return gcnew FloatRandomLimbsChunkyExpression(this, limbCount, maxExponent); }\n\n    DEFINE_ASSIGNMENT_PROLOG(Random)\n    {\n        MP(urandomb)(destination, Operand->_value, MP(get_prec)(destination));\n    }\n\n    DEFINE_ASSIGNMENT_PROLOG(RandomBits)\n    {\n        MP(urandomb)(destination, Left->_value, Right);\n    }\n\n    DEFINE_ASSIGNMENT_PROLOG(RandomChunky)\n    {\n        MP(rrandomb)(destination, Left->_value, BITS_TO_LIMBS(MP(get_prec)(destination)), Right);\n    }\n\n    //TODO investigate implementing raw IO for floats\n\n    DEFINE_ASSIGNMENT_PROLOG(RandomLimbsChunky)\n    {\n        MP(rrandomb)(destination, Left->_value, Middle, Right);\n    }\n};\n"
  },
  {
    "path": "mpir.net/mpir.net/ReadMe.txt",
    "content": "Copyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n\n========================================================================\nThis project is a .Net wrapper for the MPIR library.\nIt is a new implementation of the MPIR interface, \ninspired by the MPIR C++ interface but using C++/CLI,\nfor consumption by managed .Net languages.\nIt is a Visual Studio build only, targeting .Net on Windows x86 or x64.\nIt can be linked with a Win32 or x64 MPIR static (Lib) build for any \nMPIR-supported processor architecture.\n========================================================================\n"
  },
  {
    "path": "mpir.net/mpir.net/Stdafx.cpp",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\n// stdafx.cpp : source file that includes just the standard includes\n// mpir.net.pch will be the pre-compiled header\n// stdafx.obj will contain the pre-compiled type information\n\n#include \"stdafx.h\"\n"
  },
  {
    "path": "mpir.net/mpir.net/Stdafx.h",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\n// stdafx.h : include file for standard system include files,\n// or project specific include files that are used frequently,\n// but are changed infrequently\n\n#pragma once\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"Common.h\"\n"
  },
  {
    "path": "mpir.net/mpir.net/resource.h",
    "content": "//{{NO_DEPENDENCIES}}\n// Microsoft Visual C++ generated include file.\n// Used by app.rc\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeFloatTests/Arithmetic.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeFloatTests\n{\n    public static class FloatAssert\n    {\n        public static void AreEqual(string expected, HugeFloat actual)\n        {\n            var actualStr = actual.ToString();\n            if(expected[0] == '-')\n            {\n                Assert.AreEqual(expected[0], actualStr[0]);\n                actualStr = actualStr.TrimStart('-');\n                expected = expected.TrimStart('-');\n            }\n\n            var exponent = expected.IndexOf('.');\n            if(exponent < 0) exponent = expected.Length;\n            expected = expected.Replace(\".\", \"\");\n\n            var exponentStr = \"@\" + exponent;\n\n            Assert.IsTrue(actualStr.StartsWith(\"0.\"));\n            actualStr = actualStr.Substring(2);\n\n            Assert.IsTrue(actualStr.EndsWith(exponentStr));\n            actualStr = actualStr.Substring(0, actualStr.Length - exponentStr.Length);\n\n            if (expected.Length > actualStr.Length)\n            {\n                var roundedUp = expected[actualStr.Length] >= '5';\n                expected = expected.Substring(0, actualStr.Length);\n                if(roundedUp)\n                {\n                    using (var a = new HugeInt(expected))\n                    {\n                        a.Value += 1;\n                        expected = a.ToString(10);\n                    }\n                }\n            }\n            Assert.AreEqual(expected, actualStr);\n        }\n    }\n\n    [TestClass]\n    public class Arithmetic\n    {\n        [ClassInitialize]\n        public static void Setup(TestContext context)\n        {\n            HugeFloat.DefaultPrecision = 128;\n        }\n\n        [ClassCleanup]\n        public static void Cleanup()\n        {\n            HugeFloat.DefaultPrecision = 64;\n        }\n\n        #region Add\n\n        [TestMethod]\n        public void FloatAddHugeFloat()\n        {\n            using (var a = new HugeFloat(\"22250983250345029834502983.5740293845720\"))\n            using (var b = new HugeFloat(\"2229874359879827.30594288574029879874539\"))\n            using (var c = new HugeFloat(a + b))\n            {\n                FloatAssert.AreEqual(\"22250983252574904194382810.87997227031229879874539\", c);\n                c.Value = a + (b + 1);\n                FloatAssert.AreEqual(\"22250983252574904194382811.87997227031229879874539\", c);\n            }\n        }\n\n        [TestMethod]\n        public void FloatAddLimb()\n        {\n            using (var a = new HugeFloat(\"22250983250345029834502983.5740293845720\"))\n            using (var c = new HugeFloat())\n            {\n                var b = Platform.Ui(4288574029879874539UL, 4288574029U);\n                var expected = Platform.Select(\"22250987538919059714377522.5740293845720\", \"22250983250345034123077012.5740293845720\");\n                c.Value = a + b;\n                FloatAssert.AreEqual(expected, c);\n                c.Value = b + a;\n                FloatAssert.AreEqual(expected, c);\n            }\n        }\n\n        [TestMethod]\n        public void FloatAddSignedLimb()\n        {\n            using (var a = new HugeFloat(\"22250983250345029834502983.5740293845720\"))\n            using (var c = new HugeFloat())\n            {\n                var b = Platform.Si(-4288574029879874539, -1288574029);\n                var expected = Platform.Select(\"22250978961770999954628444.5740293845720\", \"22250983250345028545928954.5740293845720\");\n                c.Value = a + b;\n                FloatAssert.AreEqual(expected, c);\n                c.Value = b + a;\n                FloatAssert.AreEqual(expected, c);\n            }\n        }\n\n        #endregion\n\n        #region Subtract\n\n        [TestMethod]\n        public void FloatSubtractHugeFloat()\n        {\n            using (var a = new HugeFloat(\"22250983250345029834502983.5740293845720\"))\n            using (var b = new HugeFloat(\"2229874359879827.30594288574029879874539\"))\n            using (var c = new HugeFloat(a - b))\n            {\n                FloatAssert.AreEqual(\"22250983248115155474623156.26808649883170120125461\", c);\n                c.Value = b - (a + 1);\n                FloatAssert.AreEqual(\"-22250983248115155474623157.26808649883170120125461\", c);\n            }\n        }\n\n        [TestMethod]\n        public void FloatSubtractLimb()\n        {\n            using(var a = new HugeFloat(\"22250983250345029834502983.5740293845720\"))\n            using(var c = new HugeFloat())\n            {\n                var b = Platform.Ui(4288574029879874539UL, 2885740298U);\n                var expected = Platform.Select(\"22250978961770999954628444.5740293845720\", \"22250983250345026948762685.5740293845720\");\n                c.Value = a - b;\n                FloatAssert.AreEqual(expected, c);\n                c.Value = b - a;\n                FloatAssert.AreEqual(\"-\" + expected, c);\n            }\n        }\n\n        [TestMethod]\n        public void FloatSubtractSignedLimb()\n        {\n            using(var a = new HugeFloat(\"22250983250345029834502983.5740293845720\"))\n            using(var c = new HugeFloat())\n            {\n                var b = Platform.Si(-4288574029879874539, -1885740298);\n                var expected = Platform.Select(\"22250987538919059714377522.5740293845720\", \"22250983250345031720243281.5740293845720\");\n                c.Value = a - b;\n                FloatAssert.AreEqual(expected, c);\n                c.Value = b - a;\n                FloatAssert.AreEqual(\"-\" + expected, c);\n            }\n        }\n\n        #endregion\n\n        #region Multiply\n\n        [TestMethod]\n        public void FloatMultiplyByHugeFloat()\n        {\n            using (var a = new HugeFloat(\"90234098723098475098479385.345098345\"))\n            using (var b = new HugeFloat(\"78594873598734.59872354987610987897\"))\n            using (var c = new HugeFloat(a * b))\n            {\n                FloatAssert.AreEqual(\"7091937583437663707014199538801620613535.95657143399816050772069730465\", c);\n                c.Value = b * (a + 1);\n                FloatAssert.AreEqual(\"7091937583437663707014199617396494212270.55529498387427038669069730465\", c);\n            }\n        }\n\n        [TestMethod]\n        public void FloatMultiplyByLimb()\n        {\n            using (var a = new HugeFloat(\"9023409872309847509847.9385345098345\"))\n            using (var c = new HugeFloat())\n            {\n                var b = Platform.Ui(17390538260286101342, 1500450271);\n                var expected = Platform.Select(\"156921954622647727368660197878904460649174.746962647899\", \"13539177788251386092120014542896.6240276901495\");\n                c.Value = a * b;\n                FloatAssert.AreEqual(expected, c);\n                c.Value = b * -a;\n                FloatAssert.AreEqual(\"-\" + expected, c);\n            }\n        }\n\n        [TestMethod]\n        public void FloatMultiplyBySignedLimb()\n        {\n            using (var a = new HugeFloat(\"9023409872309847509847.9385345098345\"))\n            using (var c = new HugeFloat())\n            {\n                var b = Platform.Si(-7390538260286101342, -1500450271);\n                var expected = Platform.Select(\"66687855899549252270180812533806115649174.746962647899\", \"13539177788251386092120014542896.6240276901495\");\n                c.Value = a * b;\n                FloatAssert.AreEqual(\"-\" + expected, c);\n                c.Value = b * -a;\n                FloatAssert.AreEqual(expected, c);\n            }\n        }\n\n        #endregion\n\n        #region Shift Left\n\n        [TestMethod]\n        public void FloatShiftLeft()\n        {\n            using (var a = new HugeFloat(\"-12345700987ABA245230948.17607EF\", 16))\n            using (var e = new HugeFloat(\"-12345700987ABA24523094817607.EF\", 16))\n            {\n                uint b = 20;\n                a.Value = a << b;\n                Assert.AreEqual(e, a);\n            }\n        }\n\n        #endregion\n\n        #region Shift Right\n\n        [TestMethod]\n        public void FloatShiftRight()\n        {\n            using (var a = new HugeFloat(\"-12345700987ABA24523094817607.EF\", 16))\n            using (var e = new HugeFloat(\"-12345700987ABA245230948.17607EF\", 16))\n            {\n                uint b = 20;\n                a.Value = a >> b;\n                Assert.AreEqual(e, a);\n            }\n        }\n\n        #endregion\n\n        #region Negate\n\n        [TestMethod]\n        public void FloatNegate()\n        {\n            using(var a = new HugeFloat(\"9023409872309847509847.9385345098345\"))\n            {\n                a.Value = -a;\n                FloatAssert.AreEqual(\"-9023409872309847509847.9385345098345\", a);\n                a.Value = -a;\n                FloatAssert.AreEqual(\"9023409872309847509847.9385345098345\", a);\n            }\n        }\n\n        #endregion\n\n        #region Abs\n\n        [TestMethod]\n        public void FloatMakeAbsolute()\n        {\n            using(var a = new HugeFloat(\"-9023409872309847509847.9385345098345\"))\n            {\n                a.Value = a.Abs();\n                FloatAssert.AreEqual(\"9023409872309847509847.9385345098345\", a);\n                a.Value = a.Abs();\n                FloatAssert.AreEqual(\"9023409872309847509847.9385345098345\", a);\n            }\n        }\n\n        #endregion\n\n        #region Power\n\n        [TestMethod]\n        public void FloatPower()\n        {\n            using(var a = new HugeFloat(\"-902340.945098345\"))\n            {\n                a.Value = a ^ 5;\n                FloatAssert.AreEqual(\"-598209523815275040074985233466.4619735146023546465747916785912044\", a);\n            }\n        }\n\n        #endregion\n\n        #region Sqrt\n\n        [TestMethod]\n        public void FloatSqrt()\n        {\n            using(var a = new HugeFloat(\"9023404035982394058230584.9434590783455098345\"))\n            {\n                a.Value = a.SquareRoot();\n                FloatAssert.AreEqual(\"3003898140081.0504277325488426221152130989784856425363142756\", a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatSqrtLimb()\n        {\n            using(var a = new HugeFloat())\n            {\n                var source = Platform.Ui(2540928740928357403U, 254092874U);\n                var expected = Platform.Select(\"1594029090.3645257511790832426801336140951314995369452070983\", \"15940.290900733273631771121246631364490515292845317661757570\");\n                a.Value = HugeFloat.SquareRoot(source);\n                FloatAssert.AreEqual(expected, a);\n            }\n        }\n\n        #endregion\n\n        #region Divide\n\n        #region Int\n\n        [TestMethod]\n        public void FloatDivideHugeFloat()\n        {\n            using (var a = new HugeFloat(\"1157569866683036578989624354347957.394580293847\"))\n            using (var b = new HugeFloat(\"593169091750307653294.549782395235784\"))\n            {\n                a.Value = a / b;\n                FloatAssert.AreEqual(\"1951500647593.2689953514865540344827449639493356367018584357\", a);\n            }\n        }\n\n        #endregion\n\n        #region Limb\n\n        [TestMethod]\n        public void FloatDivideLimb()\n        {\n            using (var a = new HugeFloat(\"1157569866683036578989624354347957.394580293847\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3931690917);\n                a.Value = a / b;\n                FloatAssert.AreEqual(Platform.Select(\"195150064759326.89956625512472902395197480398952074748799190\", \"294420362922703412240180.51527521877035185414779643015361677\"), a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatDivideSignedLimb()\n        {\n            using(var a = new HugeFloat(\"1157569866683036578989624354347957.394580293847\"))\n            {\n                var b = Platform.Si(-5931690917503076532, -1288574029);\n                a.Value = a / b;\n                FloatAssert.AreEqual(Platform.Select(\"-195150064759326.89956625512472902395197480398952074748799190\", \"-898334003814565921993779.6482998629445295873233838084750038\"), a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatDivideLimbBy()\n        {\n            using(var a = new HugeFloat(\"115756986.6683039458029384723405203984572\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3931690917);\n                a.Value = b / a;\n                FloatAssert.AreEqual(Platform.Select(\"51242616866.833708737257760720580856722540469109813901673959\", \"33.96504202606854669329208733080818384500235983519843605\"), a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatDivideSignedLimbBy()\n        {\n            using(var a = new HugeFloat(\"115756986.6683039458029384723405203984572\"))\n            {\n                var b = Platform.Si(-5931690917503076532, -1288574029);\n                a.Value = b / a;\n                FloatAssert.AreEqual(Platform.Select(\"-51242616866.833708737257760720580856722540469109813901673959\", \"-11.13171710915684632967551560098368670502676808644707224\"), a);\n            }\n        }\n\n        #endregion\n\n        #endregion\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeFloatTests/Assignment.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeFloatTests\n{\n    [TestClass]\n    public class Assignment\n    {\n        [TestInitialize]\n        public void Setup()\n        {\n            HugeFloat.DefaultPrecision = 256;\n        }\n\n        [TestCleanup]\n        public void Cleanup()\n        {\n            HugeFloat.DefaultPrecision = 64;\n        }\n\n        [TestMethod]\n        public void FloatAssignCopy()\n        {\n            var s = \"-1.22250983250345029834502983574029384572\";\n            using (var a = new HugeFloat(s))\n            using (var b = new HugeFloat())\n            {\n                b.Value = a;\n                FloatAssert.AreEqual(s, b);\n            }\n        }\n\n        [TestMethod]\n        public void FloatSwap()\n        {\n            using (var a = HugeFloat.Allocate(192))\n            using (var b = HugeFloat.Allocate(128))\n            {\n                a.SetTo(123.5);\n                b.SetTo(432.25);\n                var aValue = a._value();\n                var bValue = b._value();\n                var aPrec = a._allocatedPrecision;\n                var bPrec = b._allocatedPrecision;\n                a.Swap(b);\n                Assert.AreNotEqual(aValue, bValue);\n                Assert.AreEqual(aValue, a._value());\n                Assert.AreEqual(bValue, b._value());\n                Assert.IsTrue(432.25 == a);\n                Assert.IsTrue(123.5 == b);\n                Assert.AreNotEqual(aPrec, bPrec);\n                Assert.AreEqual(bPrec, a._allocatedPrecision);\n                Assert.AreEqual(aPrec, b._allocatedPrecision);\n            }\n        }\n\n        [TestMethod]\n        public void FloatCompoundOperators()\n        {\n            using (var a = new HugeFloat(\"938475092834705928347523452345.115756986668303657898962467957\"))\n            {\n                a.Value += 1;\n                a.Value *= 10;\n                a.Precision = 224;\n                FloatAssert.AreEqual(\"9384750928347059283475234523461.15756986668303657898962467957\", a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatAssignInt()\n        {\n            using (var a = new HugeInt(\"222509832503450298345029835740293845720\"))\n            using (var b = new HugeFloat())\n            {\n                b.SetTo(a);\n                FloatAssert.AreEqual(\"222509832503450298345029835740293845720.\", b);\n            }\n        }\n\n        public void FloatAssignDouble()\n        {\n            using(var a = new HugeFloat())\n            {\n                a.SetTo(22250983250345.125);\n                Assert.IsTrue(a == 22250983250345.125);\n            }\n        }\n\n        [TestMethod]\n        public void FloatAssignRational()\n        {\n            using (var a = new HugeRational(1, 3))\n            using (var b = new HugeFloat())\n            {\n                b.SetTo(a);\n                FloatAssert.AreEqual(\".33333333333333333333333333333333333333333333333333333333333333333333333333333333333\", b);\n            }\n        }\n        //more tests coming here\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeFloatTests/Comparisons.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeFloatTests\n{\n    [TestClass]\n    public class Comparisons\n    {\n        #region CompareTo\n\n        [TestMethod]\n        public void FloatCompareToHugeFloat()\n        {\n            using (var a = new HugeFloat(\"-22250983250345029834503.9835740293845721345345354\"))\n            using (var b = new HugeFloat(\"22250983250345029834502.9835740293845721345345354\"))\n            {\n                Assert.AreEqual(1, Math.Sign(b.CompareTo(a)));\n                Assert.AreEqual(-1,Math.Sign(a.CompareTo(b + 1)));\n                Assert.AreEqual(0, Math.Sign((a + 1).CompareTo(-b)));\n                Assert.AreEqual(1, Math.Sign(a.CompareTo(null)));\n            }\n        }\n\n        [TestMethod]\n        public void FloatCompareToObject()\n        {\n            using (var a = new HugeFloat(\"-22250983250345029834503.9835740293845721345345354\"))\n            using (var b = new HugeFloat(\"22250983250345029834502.9835740293845721345345354\"))\n            {\n                Assert.AreEqual(1, Math.Sign(((IComparable)b).CompareTo((object)a)));\n                Assert.AreEqual(-1,Math.Sign(((IComparable)a).CompareTo((object)b)));\n                Assert.AreEqual(1, Math.Sign(((IComparable)a).CompareTo(null)));\n                Assert.AreEqual(0, Math.Sign(((IComparable)(a + 1)).CompareTo((object)-b)));\n            }\n        }\n\n        [TestMethod]\n        public void FloatCompareToExpression()\n        {\n            using (var a = new HugeFloat(\"-22250983250345029834503.9835740293845721345345354\"))\n            using (var b = new HugeFloat(\"22250983250345029834502.9835740293845721345345354\"))\n            {\n                Assert.AreEqual(1, Math.Sign(((IComparable<FloatExpression>)b).CompareTo(a)));\n                Assert.AreEqual(-1,Math.Sign(((IComparable<FloatExpression>)a).CompareTo(b)));\n                Assert.AreEqual(1, Math.Sign(((IComparable<FloatExpression>)a).CompareTo(null)));\n                Assert.AreEqual(0, Math.Sign(((IComparable<FloatExpression>)(a + 1)).CompareTo(-b)));\n            }\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void FloatCompareToNonExpression()\n        {\n            using (var a = new HugeFloat(\"-222509832503450298345029835740293845721.57898962467957\"))\n            {\n                a.CompareTo(\"abc\");\n            }\n        }\n\n        [TestMethod]\n        public void FloatCompareToLimb()\n        {\n            using (var a = new HugeFloat(Platform.Select(\"-22250982876541\", \"-222509821\")))\n            {\n                var b = Platform.Ui(22250982876540, 222509820);\n                Assert.AreEqual(-1,Math.Sign(a.CompareTo(b)));\n                Assert.AreEqual(-1,Math.Sign(a.CompareTo(b + 1)));\n                Assert.AreEqual(0, Math.Sign((-(a + 1)).CompareTo(b)));\n                Assert.AreEqual(0, Math.Sign((-a).CompareTo(b + 1)));\n                Assert.AreEqual(1, Math.Sign((-a).CompareTo(b)));\n            }\n        }\n\n        [TestMethod]\n        public void FloatCompareToSignedLimb()\n        {\n            using (var a = new HugeFloat(Platform.Select(\"-222509825551\", \"-222509821\")))\n            {\n                var b = Platform.Si(-222509825550, -222509820);\n                Assert.AreEqual(-1,Math.Sign(a.CompareTo(b)));\n                Assert.AreEqual(-1,Math.Sign(a.CompareTo(b + 1)));\n                Assert.AreEqual(0, Math.Sign((a + 1).CompareTo(b)));\n                Assert.AreEqual(0, Math.Sign(a.CompareTo(b - 1)));\n                Assert.AreEqual(1, Math.Sign(a.CompareTo(b - 2)));\n            }\n        }\n\n        [TestMethod]\n        public void FloatCompareToSignedDouble()\n        {\n            using (var a = new HugeFloat(\"-222509821\"))\n            {\n                double b = -222509820;\n                Assert.AreEqual(-1,Math.Sign(a.CompareTo(b)));\n                Assert.AreEqual(-1,Math.Sign(a.CompareTo(b + 1)));\n                Assert.AreEqual(0, Math.Sign((a + 1).CompareTo(b)));\n                Assert.AreEqual(0, Math.Sign(a.CompareTo(b - 1)));\n                Assert.AreEqual(1, Math.Sign(a.CompareTo(b - 1.1)));\n            }\n        }\n\n        #endregion\n\n        #region comparison operators with expr\n\n        [TestMethod]\n        public void FloatOperatorLessThan()\n        {\n            using (var a = new HugeFloat(\"-2225098325034502983450.29835740293845721\"))\n            using (var b = new HugeFloat(\"2225098325034502983450.29835740293845721\"))\n            using (var c = new HugeFloat())\n            {\n                c.Value = a;\n                Assert.IsTrue(a < b);\n                Assert.IsFalse(b < a);\n                Assert.IsFalse(a < c);\n                Assert.IsFalse(a > c);\n                Assert.IsFalse(a < null);\n                Assert.IsTrue(null < a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatOperatorLessThanOrEqual()\n        {\n            using (var a = new HugeFloat(\"-2225098325034502983451.29835740293845721\"))\n            using (var b = new HugeFloat(\"2225098325034502983450.29835740293845721\"))\n            using (var c = new HugeFloat())\n            {\n                c.Value = a;\n                Assert.IsTrue(a <= b);\n                Assert.IsFalse(b <= a);\n                Assert.IsTrue(a <= c);\n                Assert.IsFalse(a <= null);\n                Assert.IsTrue(null <= a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatOperatorGreaterThan()\n        {\n            using (var a = new HugeFloat(\"-2225098325034502983451.29835740293845721\"))\n            using (var b = new HugeFloat(\"2225098325034502983450.29835740293845721\"))\n            using (var c = new HugeFloat())\n            {\n                c.Value = a;\n                Assert.IsFalse(a > b);\n                Assert.IsTrue(b > a);\n                Assert.IsFalse(a > c);\n                Assert.IsTrue(a > null);\n                Assert.IsFalse(null > a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatOperatorGreaterThanOrEqual()\n        {\n            using (var a = new HugeFloat(\"-2225098325034502983451.29835740293845721\"))\n            using (var b = new HugeFloat(\"2225098325034502983450.29835740293845721\"))\n            using (var c = new HugeFloat())\n            {\n                c.Value = a;\n                Assert.IsFalse(a >= b);\n                Assert.IsTrue(b >= a);\n                Assert.IsTrue(a >= c);\n                Assert.IsTrue(a >= null);\n                Assert.IsFalse(null >= a);\n            }\n        }\n\n        #endregion\n\n        #region comparison operators with limb\n\n        [TestMethod]\n        public void FloatOperatorLessThanLimb()\n        {\n            using (var a = new HugeFloat(\"3845721\"))\n            {\n                ulong c = 5432;\n                ulong b = 5432349587;\n                Assert.IsTrue(a < b);\n                Assert.IsFalse(b < a);\n                Assert.IsFalse(a < c);\n                Assert.IsTrue(c < a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatOperatorLessThanOrEqualLimb()\n        {\n            using (var a = new HugeFloat(\"3845721\"))\n            {\n                ulong c = 5432;\n                ulong b = 5432349587;\n                ulong d = 3845721;\n                Assert.IsTrue(a <= b);\n                Assert.IsFalse(b <= a);\n                Assert.IsFalse(a <= c);\n                Assert.IsTrue(c <= a);\n                Assert.IsTrue(a <= d);\n                Assert.IsTrue(d <= a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatOperatorGreaterThanLimb()\n        {\n            using (var a = new HugeFloat(\"3845721\"))\n            {\n                ulong c = 5432;\n                ulong b = 5432349587;\n                Assert.IsFalse(a > b);\n                Assert.IsTrue(b > a);\n                Assert.IsTrue(a > c);\n                Assert.IsFalse(c > a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatOperatorGreaterThanOrEqualLimb()\n        {\n            using (var a = new HugeFloat(\"3845721\"))\n            {\n                ulong c = 5432;\n                ulong b = 5432349587;\n                ulong d = 3845721;\n                Assert.IsFalse(a >= b);\n                Assert.IsTrue(b >= a);\n                Assert.IsTrue(a >= c);\n                Assert.IsFalse(c >= a);\n                Assert.IsTrue(a >= d);\n                Assert.IsTrue(d >= a);\n            }\n        }\n\n        #endregion\n\n        #region comparison operators with signed limb\n\n        [TestMethod]\n        public void FloatOperatorLessThanSignedLimb()\n        {\n            using (var a = new HugeFloat(\"-3845721\"))\n            {\n                long c = -543254325432;\n                long b = -9587;\n                Assert.IsTrue(a < b);\n                Assert.IsFalse(b < a);\n                Assert.IsFalse(a < c);\n                Assert.IsTrue(c < a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatOperatorLessThanOrEqualSignedLimb()\n        {\n            using (var a = new HugeFloat(\"-3845721\"))\n            {\n                long c = -543254325432;\n                long b = -9587;\n                long d = -3845721;\n                Assert.IsTrue(a <= b);\n                Assert.IsFalse(b <= a);\n                Assert.IsFalse(a <= c);\n                Assert.IsTrue(c <= a);\n                Assert.IsTrue(a <= d);\n                Assert.IsTrue(d <= a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatOperatorGreaterThanSignedLimb()\n        {\n            using (var a = new HugeFloat(\"-3845721\"))\n            {\n                long c = -543254325432;\n                long b = -9587;\n                Assert.IsFalse(a > b);\n                Assert.IsTrue(b > a);\n                Assert.IsTrue(a > c);\n                Assert.IsFalse(c > a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatOperatorGreaterThanOrEqualSignedLimb()\n        {\n            using (var a = new HugeFloat(\"-3845721\"))\n            {\n                long c = -543254325432;\n                long b = -9587;\n                long d = -3845721;\n                Assert.IsFalse(a >= b);\n                Assert.IsTrue(b >= a);\n                Assert.IsTrue(a >= c);\n                Assert.IsFalse(c >= a);\n                Assert.IsTrue(a >= d);\n                Assert.IsTrue(d >= a);\n            }\n        }\n\n        #endregion\n\n        #region comparison operators with double\n\n        [TestMethod]\n        public void FloatOperatorLessThanDouble()\n        {\n            using (var a = new HugeFloat(\"-3845721\"))\n            {\n                double c = -543254325432;\n                double b = -9587;\n                Assert.IsTrue(a < b);\n                Assert.IsFalse(b < a);\n                Assert.IsFalse(a < c);\n                Assert.IsTrue(c < a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatOperatorLessThanOrEqualDouble()\n        {\n            using (var a = new HugeFloat(\"-3845721\"))\n            {\n                double c = -543254325432;\n                double b = -9587;\n                double d = -3845721;\n                Assert.IsTrue(a <= b);\n                Assert.IsFalse(b <= a);\n                Assert.IsFalse(a <= c);\n                Assert.IsTrue(c <= a);\n                Assert.IsTrue(a <= d);\n                Assert.IsTrue(d <= a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatOperatorGreaterThanDouble()\n        {\n            using (var a = new HugeFloat(\"-3845721\"))\n            {\n                double c = -543254325432;\n                double b = -9587;\n                Assert.IsFalse(a > b);\n                Assert.IsTrue(b > a);\n                Assert.IsTrue(a > c);\n                Assert.IsFalse(c > a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatOperatorGreaterThanOrEqualDouble()\n        {\n            using (var a = new HugeFloat(\"-3845721\"))\n            {\n                double c = -543254325432;\n                double b = -9587;\n                double d = -3845721;\n                Assert.IsFalse(a >= b);\n                Assert.IsTrue(b >= a);\n                Assert.IsTrue(a >= c);\n                Assert.IsFalse(c >= a);\n                Assert.IsTrue(a >= d);\n                Assert.IsTrue(d >= a);\n                Assert.IsFalse(d - 0.1 >= a);\n            }\n        }\n\n        #endregion\n\n        #region Equals\n\n        [TestMethod]\n        public void FloatEqualsHugeFloat()\n        {\n            using (var a = new HugeFloat(\"-2225098325034502983451.29835740293845721\"))\n            using (var b = new HugeFloat(\"2225098325034502983450.29835740293845721\"))\n            {\n                Assert.IsFalse(b.Equals(a));\n                Assert.IsFalse(a.Equals(b + 1));\n                Assert.IsTrue((a + 1).Equals(-b));\n                Assert.IsFalse(a.Equals(null));\n                Assert.IsTrue(Equals(a + 1, -b));\n            }\n        }\n\n        [TestMethod]\n        public void FloatEqualsExpression()\n        {\n            using (var a = new HugeFloat(\"-2225098325034502983451.29835740293845721\"))\n            using (var b = new HugeFloat(\"2225098325034502983450.29835740293845721\"))\n            {\n                Assert.IsFalse(((IEquatable<FloatExpression>)b).Equals(a));\n                Assert.IsFalse(((IEquatable<FloatExpression>)a).Equals(b));\n                Assert.IsFalse(((IEquatable<FloatExpression>)a).Equals(null));\n                Assert.IsTrue(((IEquatable<FloatExpression>)(a + 1)).Equals(-b));\n            }\n        }\n\n        [TestMethod]\n        public void FloatEqualsNonExpression()\n        {\n            using (var a = new HugeFloat(\"-2225098325034502983450.29835740293845721\"))\n            {\n                Assert.IsFalse(a.Equals(\"abc\"));\n            }\n        }\n\n        [TestMethod]\n        public void FloatEqualsLimb()\n        {\n            using (var a = new HugeFloat(Platform.Select(\"222509832503\", \"2225098323\")))\n            {\n                var b = Platform.Ui(222509832504, 2225098324);\n                Assert.IsFalse(a.Equals(b + 1));\n                Assert.IsTrue(a.Equals(b - 1));\n                Assert.IsTrue((a + 1).Equals(b));\n            }\n        }\n\n        [TestMethod]\n        public void FloatEqualsSignedLimb()\n        {\n            using (var a = new HugeFloat(Platform.Select(\"-222509832505\", \"-222509835\")))\n            {\n                var b = Platform.Si(-222509832504, -222509834);\n                Assert.IsFalse(a.Equals(b + 1));\n                Assert.IsTrue(a.Equals(b - 1));\n                Assert.IsTrue((a + 1).Equals(b));\n            }\n        }\n\n        [TestMethod]\n        public void FloatEqualsDouble()\n        {\n            using (var a = new HugeFloat(\"-222509832505\"))\n            {\n                double b = -222509832504;\n                Assert.IsFalse(a.Equals(b + 1));\n                Assert.IsTrue(a.Equals(b - 1));\n                Assert.IsTrue((a + 1).Equals(b));\n                Assert.IsFalse((a + 1).Equals(b + 0.1));\n            }\n        }\n\n        [TestMethod]\n        public void FloatEqualsHugeFloatApproximately()\n        {\n            using (var a = HugeFloat.Allocate(128))\n            using (var b = HugeFloat.Allocate(128))\n            {\n                a.SetTo(\"ABCDEF12948576AB49587.ACD34EFB345\", 16);\n                b.SetTo(\"ABCDEF12948576AB49587.ACD34EFB245\", 16);\n\n                Assert.IsTrue(a.Equals(b, 119));\n                Assert.IsFalse(a.Equals(b, 120));\n                Assert.IsTrue(a.Equals(b - 1, 83));\n                Assert.IsFalse(a.Equals(b - 1, 84));\n                Assert.IsTrue((a + 512).Equals(b, 74));\n                Assert.IsFalse((a + 512).Equals(b, 75));\n\n                //same mantissa, different exponent should always return false\n                Assert.IsFalse(a.Equals(a >> 1, 119));\n                Assert.IsFalse(a.Equals(a << 1, 119));\n                Assert.IsFalse(a.Equals(a * 2, 119));\n                Assert.IsFalse(a.Equals(a / 2, 119));\n            }\n        }\n\n        [TestMethod]\n        public void FloatRelDiff()\n        {\n            using (var a = new HugeFloat(\"1234523549876.24935230589472305894245\"))\n            using (var b = new HugeFloat(\"1234523549541.45207354209357842979873\"))\n            {\n                Assert.AreEqual(a.RelativeDifferenceFrom(b), (a - b).Abs() / a);\n                Assert.AreNotEqual(a.RelativeDifferenceFrom(b), (a - b).Abs() / b);\n                Assert.AreEqual(b.RelativeDifferenceFrom(a), (a - b).Abs() / b);\n                Assert.AreNotEqual(b.RelativeDifferenceFrom(a), (a - b).Abs() / a);\n            }\n        }\n\n        #endregion\n\n        #region Equality operators with expr\n\n        [TestMethod]\n        public void FloatEqualsOperatorHugeFloat()\n        {\n            using (var a = new HugeFloat(\"-2225098325034502983451.29835740293845721\"))\n            using (var b = new HugeFloat(\"2225098325034502983450.29835740293845721\"))\n            {\n                Assert.IsFalse(b == a);\n                Assert.IsFalse(a == b + 1);\n                Assert.IsTrue(a + 1 == -b);\n                Assert.IsFalse(a == null);\n            }\n        }\n\n        [TestMethod]\n        public void FloatNotEqualOperatorHugeFloat()\n        {\n            using (var a = new HugeFloat(\"-2225098325034502983451.29835740293845721\"))\n            using (var b = new HugeFloat(\"2225098325034502983450.29835740293845721\"))\n            {\n                Assert.IsTrue(b != a);\n                Assert.IsTrue(a != b + 1);\n                Assert.IsFalse(a + 1 != -b);\n                Assert.IsTrue(a != null);\n            }\n        }\n\n        #endregion\n\n        #region Equality operators with Limb\n\n        [TestMethod]\n        public void FloatEqualsOperatorLimb()\n        {\n            using (var a = new HugeFloat(\"-835740293845721\"))\n            {\n                ulong b = 835740293845720;\n                Assert.IsFalse(b == a);\n                Assert.IsFalse(a == b + 1);\n                Assert.IsTrue(-(a + 1) == b);\n            }\n        }\n\n        [TestMethod]\n        public void FloatNotEqualOperatorLimb()\n        {\n            using (var a = new HugeFloat(\"-835740293845721\"))\n            {\n                ulong b = 835740293845720;\n                Assert.IsTrue(b != a);\n                Assert.IsTrue(a != b + 1);\n                Assert.IsFalse(-(a + 1) != b);\n            }\n        }\n\n        #endregion\n        \n        #region Equality operators with Signed Limb\n\n        [TestMethod]\n        public void FloatEqualsOperatorSignedLimb()\n        {\n            using (var a = new HugeFloat(\"-835740293845721\"))\n            {\n                long b = -835740293845720;\n                Assert.IsFalse(b == a);\n                Assert.IsFalse(a == b + 1);\n                Assert.IsTrue(a + 1 == b);\n            }\n        }\n\n        [TestMethod]\n        public void FloatNotEqualOperatorSignedLimb()\n        {\n            using (var a = new HugeFloat(\"-835740293845721\"))\n            {\n                long b = -835740293845720;\n                Assert.IsTrue(b != a);\n                Assert.IsTrue(a != b + 1);\n                Assert.IsFalse(a + 1 != b);\n            }\n        }\n\n        #endregion\n\n        #region Equality operators with Double\n\n        [TestMethod]\n        public void FloatEqualsOperatorDouble()\n        {\n            using (var a = new HugeFloat(\"-835740293845721\"))\n            {\n                double b = -835740293845720;\n                Assert.IsFalse(b == a);\n                Assert.IsFalse(a == b + 1);\n                Assert.IsTrue(a + 1 == b);\n                Assert.IsFalse(a + 1 == b + 0.1);\n            }\n        }\n\n        [TestMethod]\n        public void FloatNotEqualOperatorDouble()\n        {\n            using (var a = new HugeFloat(\"-835740293845721\"))\n            {\n                double b = -835740293845720;\n                Assert.IsTrue(b != a);\n                Assert.IsTrue(a != b + 1);\n                Assert.IsFalse(a + 1 != b);\n                Assert.IsTrue(a + 1 != b + 0.1);\n            }\n        }\n\n        #endregion\n\n        #region GetHashCode\n\n        [TestMethod]\n        public void FloatGetHashCodeTest()\n        {\n            using (var a = new HugeFloat(\"-2225098325034502983450298357.40293845721\"))\n            {\n                Assert.AreNotEqual(0, a.GetHashCode());\n                Assert.AreEqual(a.GetHashCode(), (a + 0).GetHashCode());\n                Assert.AreNotEqual(a.GetHashCode(), (-a).GetHashCode());\n            }\n        }\n\n        #endregion\n\n        #region Sign\n\n        [TestMethod]\n        public void FloatSign()\n        {\n            using (var a = new HugeFloat(\"-22250983250345029834.502983574029384572134354\"))\n            {\n                Assert.AreEqual(-1, a.Sign());\n                Assert.AreEqual(1, (-a).Sign());\n                Assert.AreEqual(0, (a-a).Sign());\n            }\n        }\n\n        #endregion\n\n        //more tests coming here\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeFloatTests/ConstructionAndDisposal.cs",
    "content": "﻿/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeFloatTests\n{\n    [TestClass]\n    public class ConstructionAndDisposal\n    {\n        [TestInitialize]\n        public void Setup()\n        {\n            HugeFloat.DefaultPrecision = 128;\n        }\n\n        [TestCleanup]\n        public void Cleanup()\n        {\n            HugeFloat.DefaultPrecision = 64;\n        }\n\n        [TestMethod]\n        public void FloatDefaultConstructor()\n        {\n            using (var a = new HugeFloat())\n            {\n                Assert.AreEqual(0, a.NumberOfLimbsUsed());\n                Assert.AreEqual(128UL, a.Precision);\n                Assert.AreEqual(128UL, a._allocatedPrecision);\n                Assert.AreNotEqual(IntPtr.Zero, a.Limbs());\n                Assert.AreEqual(0, a.Exponent());\n                Assert.AreEqual(\"0\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void FloatDispose()\n        {\n            var a = new HugeFloat();\n            a.Dispose();\n\n            Assert.AreEqual(0, a.NumberOfLimbsUsed());\n            Assert.AreEqual(IntPtr.Zero, a.Limbs());\n        }\n\n        [TestMethod]\n        public void FloatConstructorFromLong()\n        {\n            var n = Platform.Si(0x3123456789123456L, 0x49384756);\n            using (var a = new HugeFloat(n))\n            {\n                Assert.AreEqual(128UL, a._allocatedPrecision);\n                Assert.AreEqual(128UL, a.Precision);\n                Assert.AreEqual(1, a.Exponent());\n                Assert.AreEqual(\"0.\" + n.ToString(\"X\") + \"@\" + MpirSettings.BITS_PER_LIMB / 4, a.ToString(16, false, true));\n            }\n        }\n\n        [TestMethod]\n        public void FloatConstructorFromLongNegative()\n        {\n            var n = Platform.Si(0x3123456789123456L, 0x49384756);\n            using(var a = new HugeFloat(-n))\n            {\n                Assert.AreEqual(128UL, a._allocatedPrecision);\n                Assert.AreEqual(128UL, a.Precision);\n                Assert.AreEqual(1, a.Exponent());\n                Assert.AreEqual(\"-0.\" + n.ToString(\"X\") + \"@\" + MpirSettings.BITS_PER_LIMB / 4, a.ToString(16, false, true));\n            }\n        }\n\n        [TestMethod]\n        public void FloatConstructorFromULong()\n        {\n            var n = Platform.Ui(ulong.MaxValue, uint.MaxValue);\n            using(var a = new HugeFloat(n))\n            {\n                Assert.AreEqual(128UL, a._allocatedPrecision);\n                Assert.AreEqual(128UL, a.Precision);\n                Assert.AreEqual(1, a.Exponent());\n                Assert.AreEqual(\"0.\" + n.ToString(\"X\") + \"@\" + MpirSettings.BITS_PER_LIMB / 4, a.ToString(16, false, true));\n            }\n        }\n\n        [TestMethod]\n        public void FloatConstructorFromDouble()\n        {\n            using (var a = new HugeFloat(123456789123456.75))\n            {\n                FloatAssert.AreEqual(\"123456789123456.75\", a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatConstructorFromDoubleNegative()\n        {\n            using (var a = new HugeFloat(-123456789123456.75))\n            {\n                FloatAssert.AreEqual(\"-123456789123456.75\", a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatAllocate()\n        {\n            using (var a = HugeFloat.Allocate(193))\n            {\n                Assert.AreEqual(192U + MpirSettings.BITS_PER_LIMB, a.Precision);\n                Assert.AreEqual(192U + MpirSettings.BITS_PER_LIMB, a._allocatedPrecision);\n                Assert.AreEqual(\"0\", a.ToString());\n\n                var n = \"234095827340957823409582587\";\n                a.SetTo(n);\n                Assert.AreEqual(\"0.\" + n + \"@\" + n.Length, a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void FloatDefaultConstructorHigherPrecision()\n        {\n            var oldPrecision = HugeFloat.DefaultPrecision;\n            HugeFloat.DefaultPrecision = 256;\n            using (var a = new HugeFloat())\n            {\n                Assert.AreEqual(256 / MpirSettings.BITS_PER_LIMB + 1, a.NumberOfLimbsAllocated());\n                Assert.AreEqual(0, a.NumberOfLimbsUsed());\n                Assert.AreEqual(\"0\", a.ToString());\n\n                var n = \"2340958273409578234534534534523425343409582587\";\n                a.SetTo(n);\n                Assert.AreEqual(\"0.\" + n + \"@\" + n.Length, a.ToString());\n            }\n            HugeFloat.DefaultPrecision = oldPrecision;\n        }\n\n        [TestMethod]\n        public void FloatStringConstructor()\n        {\n            var n = \"5432109876543212345789.70515331128527330659\";\n            using (var a = new HugeFloat(n))\n            {\n                FloatAssert.AreEqual(n, a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatStringConstructorWithAlternativeExponentMarker()\n        {\n            var n = \"5432109876543212345789.70515331128527330659e3\";\n            using(var a = new HugeFloat(n))\n            {\n                FloatAssert.AreEqual(\"5432109876543212345789705.15331128527330659\", a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatStringConstructorWithAlternativeExponentMarker2()\n        {\n            var n = \"5432109876543212345789.70515331128527330659E3\";\n            using(var a = new HugeFloat(n))\n            {\n                FloatAssert.AreEqual(\"5432109876543212345789705.15331128527330659\", a);\n            }\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void FloatStringConstructorWithAlternativeExponentMarkerInvalid()\n        {\n            var n = \"5432109876543212345789.70515331128527330659e3\";\n            var a = new HugeFloat(n, 11);\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void FloatStringConstructorInvalid()\n        {\n            var a = new HugeFloat(\"12345A\");\n        }\n\n        [TestMethod]\n        public void FloatStringConstructorHex()\n        {\n            var n = \"143210ABCDEF32123457ACDB324.59879\";\n            using (var a = new HugeFloat(n, 16))\n            {\n                Assert.AreEqual(\"0.143210ABCDEF32123457ACDB32459879@27\", a.ToString(16, false, true));\n            }\n        }\n\n        [TestMethod]\n        public void FloatStringConstructorHexExponentDecimal()\n        {\n            var n = \"143210ABCDEF32123457ACDB324.59879@10\";\n            using(var a = new HugeFloat(n, 16))\n            {\n                Assert.AreEqual(\"0.143210ABCDEF32123457ACDB32459879@37\", a.ToString(16, false, true));\n            }\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void FloatStringConstructorHexPrefix()\n        {\n            var n = \"0x143210ABCDEF32123457ACDB324598799\";\n            var a = new HugeFloat(n);\n        }\n\n        [TestMethod]\n        public void FloatStringConstructorHexExponentInBase()\n        {\n            var n = \"143210ABCDEF32123457ACDB324.59879@10\";\n            using(var a = new HugeFloat(n, 16, false))\n            {\n                Assert.AreEqual(\"0.143210ABCDEF32123457ACDB32459879@2B\", a.ToString(16, false, false));\n                Assert.AreEqual(\"0.143210ABCDEF32123457ACDB32459879@43\", a.ToString(16, false, true));\n            }\n        }\n\n        [TestMethod]\n        public void FloatConstructorFromExpression()\n        {\n            using (var a = new HugeFloat(\"29340273582305894258424059.2345293574\"))\n            using (var b = new HugeFloat(a + 1))\n            {\n                Assert.AreEqual(a + 1, b);\n            }\n        }\n\n        [TestMethod]\n        public void FloatConstructorFromIntExpression()\n        {\n            using (var a = new HugeInt(\"2340958273409578234095823045723490587\"))\n            using (var b = new HugeFloat(a + 1))\n            {\n                FloatAssert.AreEqual(\"2340958273409578234095823045723490588.\", b);\n            }\n        }\n\n        [TestMethod]\n        public void FloatSetPrecision()\n        {\n            using (var a = new HugeFloat(1))\n            using (var b = new HugeFloat())\n            {\n                a.Value = a/3;\n                b.Value = a;\n                Assert.AreEqual(128UL, a._allocatedPrecision);\n                Assert.AreEqual(128UL, a.Precision);\n                Assert.AreEqual(\"0.3333333333333333333333333333333333333333@0\", a.ToString());\n\n                a.Precision = 64;\n                Assert.AreEqual(128UL, a._allocatedPrecision);\n                Assert.AreEqual(64UL, a.Precision);\n                Assert.AreEqual(\"0.333333333333333333333@0\", a.ToString());\n                Assert.AreEqual(a, b);\n\n                a.Precision = 128;\n                Assert.AreEqual(\"0.3333333333333333333333333333333333333333@0\", a.ToString());\n\n                a.Precision = 64;\n                a.SetTo(1);\n                a.Value = a / 3;\n                Assert.AreNotEqual(a, b);\n                Assert.AreEqual(\"0.333333333333333333333@0\", a.ToString());\n\n                a.Precision = 128;\n                Assert.AreNotEqual(a, b);\n            }\n        }\n\n        [TestMethod]\n        public void FloatAllocatedPrecision()\n        {\n            using (var a = new HugeFloat(1))\n            using (var b = HugeFloat.Allocate(256))\n            {\n                var bAllocated = b.AllocatedPrecision;\n                var aAllocated = a.AllocatedPrecision;\n                Assert.IsTrue(bAllocated > aAllocated);\n\n                a.Precision = 64;\n                b.Precision = 64;\n                Assert.AreEqual(bAllocated, b.AllocatedPrecision);\n                Assert.AreEqual(aAllocated, a.AllocatedPrecision);\n            }\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void FloatSettingPrecisionOverAllocated()\n        {\n            using (var a = new HugeFloat())\n            {\n                a.Precision++;\n            }\n        }\n\n        [TestMethod]\n        public void FloatReallocate()\n        {\n            using (var a = HugeFloat.Allocate(128))\n            {\n                Assert.AreEqual(128UL, a._allocatedPrecision);\n                Assert.AreEqual(128UL, a.Precision);\n\n                a.Reallocate(256);\n                Assert.AreEqual(256UL, a._allocatedPrecision);\n                Assert.AreEqual(256UL, a.Precision);\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeFloatTests/Conversions.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.Linq;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeFloatTests\n{\n    [TestClass]\n    public class Conversions\n    {\n        private static HugeFloat AlmostOne;\n\n        [ClassInitialize]\n        public static void Init(TestContext context)\n        {\n            AlmostOne = new HugeFloat(0.99999);\n            HugeFloat.DefaultPrecision = 128;\n        }\n\n        [ClassCleanup]\n        public static void Cleanup()\n        {\n            HugeFloat.DefaultPrecision = 64;\n        }\n\n        [TestMethod]\n        public void FloatToStringDecimal()\n        {\n            var n = \"-234293847562.98357462983476598345623984756\";\n            using (var a = new HugeFloat(n))\n            {\n                FloatAssert.AreEqual(n, a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatToStringHex()\n        {\n            var s = \"-23429abcdef29835746298.3fedcba34562\";\n            using (var a = new HugeFloat(s, 16))\n            {\n                Assert.AreEqual(\"-0.23429abcdef298357462983fedcba34562@16\", a.ToString(16, true));\n                Assert.AreEqual(\"-0.23429ABCDEF298357462983FEDCBA34562@16\", a.ToString(16));\n\n                Assert.AreEqual(\"-0.23429abcdef298357462983fedcba34562@22\", a.ToString(16, true, true));\n                Assert.AreEqual(\"-0.23429ABCDEF298357462983FEDCBA34562@22\", a.ToString(16, false, true));\n            }\n        }\n\n        [TestMethod]\n        public void FloatFromUlong()\n        {\n            using (var a = new HugeFloat())\n            {\n                var b = Platform.Ui(0xF84739ABCDEF4876, 0xF84739AB);\n                a.SetTo(b);\n                FloatAssert.AreEqual(b + \".\", a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatFromLong()\n        {\n            using (var a = new HugeFloat())\n            {\n                var b = Platform.Si(-0x784739ABCDEF4876, -0x784739AB);\n                a.SetTo(b);\n                FloatAssert.AreEqual(b + \".\", a);\n            }\n        }\n\n        [TestMethod]\n        public void FloatToAndFromDouble()\n        {\n            using (var a = new HugeFloat())\n            {\n                a.SetTo(-123.25);\n\n                double c = a.ToDouble();\n                Assert.IsTrue(c.Equals(-123.25));\n                Assert.IsTrue(a.Equals(-123.25));\n                Assert.AreEqual(\"-0.12325@3\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void FloatToAndFromDouble2()\n        {\n            using (var a = new HugeFloat())\n            {\n                a.SetTo(-123.25);\n                var exp = 0;\n                double c = a.ToDouble(out exp);\n                Assert.IsTrue(c.Equals(-0.962890625));\n                Assert.AreEqual(7L, exp);\n                Assert.IsTrue(a.Equals(-123.25));\n                Assert.AreEqual(\"-0.12325@3\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void FloatToAndFromFloat()\n        {\n            using (var a = new HugeFloat())\n            {\n                a.SetTo(-123.125f);\n\n                double c = a.ToDouble();\n                Assert.IsTrue(-123.125 == c);\n                Assert.IsTrue(-123.125 == a);\n                Assert.IsTrue(-123.125f == a);\n            }\n        }\n\n#if WIN64\n        [TestMethod]\n        public void FloatToAndFromUlong()\n        {\n            using (var a = new HugeFloat())\n            {\n                ulong b = 0xF84739ABCDEF4876;\n                a.SetTo(b);\n                FloatAssert.AreEqual(b + \".\", a);\n\n                a.Value = -a;\n                ulong c = a.ToUlong();\n                Assert.AreEqual(b, c);\n            }\n        }\n\n        [TestMethod]\n        public void FloatToAndFromLong()\n        {\n            using(var a = new HugeFloat())\n            {\n                long b = -0x784739ABCDEF4876;\n                a.SetTo(b);\n                FloatAssert.AreEqual(b + \".\", a);\n\n                long c = a.ToLong();\n                Assert.AreEqual(b, c);\n            }\n        }\n#else\n        [TestMethod]\n        public void FloatToAndFromUint()\n        {\n            using (var a = new HugeFloat())\n            {\n                ulong b = 0xF84739AB;\n                a.SetTo(b);\n                FloatAssert.AreEqual(b + \".\", a);\n\n                a.Value = -a;\n                ulong c = a.ToUint();\n                Assert.AreEqual(b, c);\n            }\n        }\n\n        [TestMethod]\n        public void FloatToAndFromInt()\n        {\n            using (var a = new HugeFloat())\n            {\n                long b = -0x784739AB;\n                a.SetTo(b);\n                FloatAssert.AreEqual(b + \".\", a);\n\n                long c = a.ToInt();\n                Assert.AreEqual(b, c);\n            }\n        }\n#endif\n\n        [TestMethod]\n        public void FloatToAndFromDoubleOutExp()\n        {\n            using(var a = new HugeFloat())\n            {\n                a.SetTo(-123.45e20);\n                var exp = 0;\n                var zillion = Platform.Si(10000000000, 1000000000);\n                a.Value = a + a;\n                double c = a.ToDouble(out exp);\n\n                Assert.AreEqual(75, exp);\n                c *= Math.Pow(2, exp);\n\n                Assert.IsTrue(a + zillion >= c);\n                Assert.IsTrue(a - zillion <= c);\n            }\n        }\n\n#if WIN64\n        [TestMethod]\n        public void FloatToLong2()\n        {\n            using (var a = new HugeFloat())\n            using (var small = new HugeFloat(0.0001))\n            {\n                long b = long.MaxValue;\n                a.SetTo(b);\n                a.Value = a + 1 - small;\n                var c = a.ToLong();\n\n                Assert.AreEqual(b, c);\n\n                b = long.MinValue;\n                a.SetTo(b);\n                a.Value -= 1 - small;\n                c = a.ToLong();\n                Assert.AreEqual(b, c);\n            }\n        }\n#else\n        [TestMethod]\n        public void FloatToInt2()\n        {\n            using (var a = new HugeFloat())\n            using (var small = new HugeFloat(0.0001))\n            {\n                int b = int.MaxValue;\n                a.SetTo(b);\n                a.Value = a + 1 - small;\n                var c = a.ToInt();\n\n                Assert.AreEqual(b, c);\n\n                b = int.MinValue;\n                a.SetTo(b);\n                a.Value -= 1 - small;\n                c = a.ToInt();\n                Assert.AreEqual(b, c);\n            }\n        }\n#endif\n\n        [TestMethod]\n        public void FloatFromString()\n        {\n            using (var a = HugeFloat.Allocate(256))\n            {\n                var n = \"98762934876529834765234123984761\";\n                a.SetTo(n);\n                Assert.AreEqual(\"0.\" + n + \"@\" + n.Length, a.ToString());\n\n                n = \"-98ABCDEF876529834765234123984761\";\n                a.SetTo(n, 16);\n                Assert.AreEqual(\"-0.\" + n.Substring(1) + \"@\" + (n.Length - 1).ToString(\"x\"), a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void FloatFromStringExpDecimal()\n        {\n            using(var a = new HugeFloat())\n            {\n                var n = \"0.12354523094527035843ABCDEF54@10\";\n                \n                a.SetTo(n, 16);\n                Assert.AreEqual(\"0.12354523094527035843ABCDEF54@10\", a.ToString(16, false, true));\n                Assert.AreEqual(\"0.12354523094527035843ABCDEF54@A\", a.ToString(16, false, false));\n\n                a.SetTo(n, 16, false);\n                Assert.AreEqual(\"0.12354523094527035843ABCDEF54@16\", a.ToString(16, false, true));\n                Assert.AreEqual(\"0.12354523094527035843ABCDEF54@10\", a.ToString(16, false, false));\n            }\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void FloatFromInvalidString()\n        {\n            using (var a = new HugeFloat())\n            {\n                a.SetTo(\"12345A\");\n            }\n        }\n\n        [TestMethod]\n        public void FloatToStringTruncated()\n        {\n            var n = string.Concat(\"123456783\".Select(c => new string(c, 30)));\n            using (var a = HugeFloat.Allocate(2000))\n            {\n                a.SetTo(n);\n                Assert.AreEqual(\"0.\" + n + \"@\" + n.Length, a.ToString(10));\n                Assert.AreEqual(\"0.\" + n.Substring(0, 256) + \"@\" + n.Length, a.ToString());\n                a.Value = -a;\n                Assert.AreEqual(\"-0.\" + n.Substring(0, 256) + \"@\" + n.Length, a.ToString());\n            }\n        }\n\n#if WIN64\n        [TestMethod]\n        public void FloatFitsUlong()\n        {\n            using (var a = new HugeFloat(ulong.MaxValue))\n            using (var small = new HugeFloat(0.0001))\n            {\n                Assert.IsTrue(a.FitsUlong());\n                a.Value = a + 1;\n                Assert.IsFalse(a.FitsUlong());\n                a.Value = a - small;\n                Assert.IsTrue(a.FitsUlong());\n                a.SetTo(0);\n                Assert.IsTrue(a.FitsUlong());\n                a.Value = a - 1;\n                Assert.IsFalse(a.FitsUlong());\n                a.Value = a + small;\n                Assert.IsTrue(a.FitsUlong());\n            }\n        }\n\n        [TestMethod]\n        public void FloatFitsLong()\n        {\n            using (var a = new HugeFloat(long.MaxValue))\n            using (var small = new HugeFloat(0.0001))\n            {\n                Assert.IsTrue(a.FitsLong());\n                a.Value = a + 1;\n                Assert.IsFalse(a.FitsLong());\n                a.Value = a - small;\n                Assert.IsTrue(a.FitsLong());\n                a.SetTo(long.MinValue);\n                Assert.IsTrue(a.FitsLong());\n                a.Value = a - 1;\n                Assert.IsFalse(a.FitsLong());\n                a.Value = a + small;\n                Assert.IsTrue(a.FitsLong());\n            }\n        }\n#endif\n\n        [TestMethod]\n        public void FloatFitsUint()\n        {\n            using (var a = new HugeFloat(uint.MaxValue))\n            using (var small = new HugeFloat(0.0001))\n            {\n                Assert.IsTrue(a.FitsUint());\n                a.Value = a + 1;\n                Assert.IsFalse(a.FitsUint());\n                a.Value = a - small;\n                Assert.IsTrue(a.FitsUint());\n                a.SetTo(0);\n                Assert.IsTrue(a.FitsUint());\n                a.Value = a - 1;\n                Assert.IsFalse(a.FitsUint());\n                a.Value = a + small;\n                Assert.IsTrue(a.FitsUint());\n            }\n        }\n\n        [TestMethod]\n        public void FloatFitsInt()\n        {\n            using (var a = new HugeFloat(int.MaxValue))\n            using (var small = new HugeFloat(0.0001))\n            {\n                Assert.IsTrue(a.FitsInt());\n                a.Value = a + 1;\n                Assert.IsFalse(a.FitsInt());\n                a.Value = a - small;\n                Assert.IsTrue(a.FitsInt());\n                a.SetTo(int.MinValue);\n                Assert.IsTrue(a.FitsInt());\n                a.Value = a - 1;\n                Assert.IsFalse(a.FitsInt());\n                a.Value = a + small;\n                Assert.IsTrue(a.FitsInt());\n            }\n        }\n\n        [TestMethod]\n        public void FloatFitsUshort()\n        {\n            using (var a = new HugeFloat(ushort.MaxValue))\n            using (var small = new HugeFloat(0.0001))\n            {\n                Assert.IsTrue(a.FitsUshort());\n                a.Value = a + 1;\n                Assert.IsFalse(a.FitsUshort());\n                a.Value = a - small;\n                Assert.IsTrue(a.FitsUshort());\n                a.SetTo(0);\n                Assert.IsTrue(a.FitsUshort());\n                a.Value = a - 1;\n                Assert.IsFalse(a.FitsUshort());\n                a.Value = a + small;\n                Assert.IsTrue(a.FitsUshort());\n            }\n        }\n\n        [TestMethod]\n        public void FloatFitsShort()\n        {\n            using (var a = new HugeFloat(short.MaxValue))\n            using (var small = new HugeFloat(0.0001))\n            {\n                Assert.IsTrue(a.FitsShort());\n                a.Value = a + 1;\n                Assert.IsFalse(a.FitsShort());\n                a.Value = a - small;\n                Assert.IsTrue(a.FitsShort());\n                a.SetTo(short.MinValue);\n                Assert.IsTrue(a.FitsShort());\n                a.Value = a - 1;\n                Assert.IsFalse(a.FitsShort());\n                a.Value = a + small;\n                Assert.IsTrue(a.FitsShort());\n            }\n        }\n\n        [TestMethod]\n        public void FloatIsInteger()\n        {\n            using (var a = new HugeFloat(\"-233454059287409285742345.125\"))\n            {\n                Assert.IsFalse(a.IsInteger());\n                a.Value = a * 8;\n                Assert.IsTrue(a.IsInteger());\n            }\n        }\n\n        //private void AssertBetween(int min, int max, long actual)\n        //{\n        //    Assert.IsTrue(actual >= min && actual <= max, \"Expected {0} to {1}, actual {2}\", min, max, actual);\n        //}\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeFloatTests/ExpressionTests.cs",
    "content": "﻿/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Reflection;\nusing System.Text;\nusing System.Threading.Tasks;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeFloatTests\n{\n    [TestClass]\n    public class ExpressionTests\n    {\n        [ClassInitialize]\n        public static void Setup(TestContext context)\n        {\n            HugeFloat.DefaultPrecision = 64;\n        }\n\n        [TestMethod]\n        public void FloatTestAllExpressions()\n        {\n            var baseExpr = typeof(FloatExpression);\n            var allExpressions =\n                baseExpr.Assembly.GetTypes()\n                .Where(x => baseExpr.IsAssignableFrom(x) && !x.IsAbstract)\n                .ToList();\n\n            var one = Platform.Ui(1UL, 1U);\n\n            using (var a = new HugeFloat(-9))\n            using (var b = new HugeFloat(4))\n            using (var c = new HugeInt(3))\n            using(var r = MpirRandom.Default())\n            {\n                var expr = a + (-a * 2) * 3 * (a.Abs() * -2 + -64 + a * a) + (one * 116U) + a;\n                VerifyPartialResult(r, expr, 44);\n                expr = expr + a * 5 + (a+b) * (b + 1) * (b + -3) * b + (b * -a) - (b * (one * 25U)) - a + (b << 3) - ((a*b) << 1);\n                VerifyPartialResult(r, expr, -52);\n                expr = expr - 2 - 3U + (b - (a << 1)) + (b * b - 15U) * (b - a) * (a - 11) * (b - (one * 3U)) - (-340 - a) + ((one * 20U) - b);\n                VerifyPartialResult(r, expr, 52);\n                expr = expr + (-7 - 2 * a) + (28U - 4 * b) + -(a + b * 2) + (3 * a).Abs();\n                VerifyPartialResult(r, expr, 103);\n                expr = 36 * (expr / a + expr / (3 * b) - a / b) - b / (a + 10) + 6;\n                VerifyPartialResult(r, expr, -20);\n                expr = expr + (b >> 1) + ((b / -7) + (a / (one * 7U))) * 7 + (7 / a) - ((one * 2U) / (b + 5));\n                VerifyPartialResult(r, expr, -32);\n                expr = expr - (b + 13 + 64) / a / -3;\n                VerifyPartialResult(r, expr, -35);\n                expr = expr + b.SquareRoot() + HugeFloat.SquareRoot(25) + ((b - 2) ^ 3) - (-b).RelativeDifferenceFrom(a + 1);\n                VerifyPartialResult(r, expr, -19);\n                expr = expr - (a / 4).Floor() + (b / 3).Ceiling() - (a / b).Truncate();\n                VerifyPartialResult(r, expr, -12);\n                expr = expr + (r.GetFloatBits(64) * 10).Ceiling();\n                VerifyPartialResult(r, expr, -10);\n                //float random generation seems to give different results in Win32 and Win64.  Thus, we're having to adjust the results for Win32.\n                expr = expr + (r.GetFloatLimbsChunky(128 / MpirSettings.BITS_PER_LIMB, 256 / MpirSettings.BITS_PER_LIMB) << 233 >> Platform.Ui(0, 480)).Ceiling();\n                VerifyPartialResult(r, expr, -6);\n                expr = expr + (r.GetFloat() * 10).Floor() - Platform.Ui(0, 3);\n                VerifyPartialResult(r, expr, -2);\n                expr = expr + (r.GetFloatChunky(3) << 101 >> Platform.Ui(177, 23)).Truncate();\n                VerifyPartialResult(r, expr, 13);\n\n                MarkExpressionsUsed(allExpressions, expr);\n            }\n\n            Assert.AreEqual(0, allExpressions.Count, \"Expression types not exercised: \" + string.Join(\"\",\n                allExpressions.Select(x => Environment.NewLine + x.Name).OrderBy(x => x)));\n        }\n\n        private void VerifyPartialResult(MpirRandom rnd, FloatExpression expr, long expected)\n        {\n            rnd.Seed(123);\n\n            using (var r = new HugeFloat())\n            {\n                using (var exp = new HugeFloat(expected))\n                using (var epsilon = new HugeFloat(\"0.001\"))\n                {\n                    r.Value = expr;\n                    Assert.IsTrue(r - epsilon < exp && r + epsilon > exp, \"Expected {0}, Actual {1}\", exp, r);\n                }\n            }\n        }\n\n        private void MarkExpressionsUsed(List<Type> allExpressions, FloatExpression expr)\n        {\n            var type = expr.GetType();\n            allExpressions.Remove(type);\n            \n            var children = type.GetFields(BindingFlags.NonPublic | BindingFlags.Instance)\n                .Where(x => typeof(FloatExpression).IsAssignableFrom(x.FieldType))\n                .Select(x => (FloatExpression)x.GetValue(expr))\n                .Where(x => x != null)\n                .ToList();\n\n            foreach (var childExpr in children)\n                MarkExpressionsUsed(allExpressions, childExpr);\n        }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeFloatTests/IO.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.IO;\nusing System.Text;\nusing System.Linq;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeFloatTests\n{\n    [TestClass]\n    public class IO\n    {\n        [TestMethod]\n        public void FloatInputOutputStr()\n        {\n            using (var a = new HugeFloat(\"10123456789ABCDEF012345.6789ABCDE\", 16))\n            using (var b = HugeFloat.Allocate(12800))\n            using (var ms = new MemoryStream())\n            {\n                a.Reallocate(12800);\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                    a.Write(writer, 10, 0, false, false);\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader, 10, false);\n\n                Assert.AreEqual(a.ToString(10), b.ToString(10));\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + a.ToString(10), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        [TestMethod]\n        public void FloatInputOutputStrHex()\n        {\n            using(var a = new HugeFloat(\"10123456789ABCDEF012345.6789ABCDE\", 16))\n            using (var b = HugeFloat.Allocate(12800))\n            using (var ms = new MemoryStream())\n            {\n                a.Reallocate(12800);\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                    a.Write(writer, 16, 0, false, false);\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader, 16, false);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + a.ToString(16), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        [TestMethod]\n        public void FloatInputOutputStrHexLower()\n        {\n            using(var a = new HugeFloat(\"10123456789ABCDEF012345.6789ABCDE\", 16))\n            using(var b = HugeFloat.Allocate(12800))\n            using(var ms = new MemoryStream())\n            {\n                a.Reallocate(12800);\n                a.Value = a ^ 100;\n                using(var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                    a.Write(writer, 16, 0, true, false);\n\n                ms.Position = 0;\n\n                using(var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader, 16, false);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + a.ToString(16, true), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        [TestMethod]\n        public void FloatInputOutputStrHexExpDecimal()\n        {\n            using(var a = new HugeFloat(\"10123456789ABCDEF012345.6789ABCDE\", 16))\n            using(var b = HugeFloat.Allocate(12800))\n            using(var ms = new MemoryStream())\n            {\n                a.Reallocate(12800);\n                a.Value = a ^ 100;\n                using(var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                    a.Write(writer, 16, 0, false, true);\n\n                ms.Position = 0;\n\n                using(var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader, 16, true);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + a.ToString(16, false, true), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        [TestMethod]\n        public void FloatInputOutputStr62()\n        {\n            using(var a = new HugeFloat(\"10123456789ABCDEF012345.6789ABCDE\", 16))\n            using(var b = HugeFloat.Allocate(12800))\n            using(var ms = new MemoryStream())\n            {\n                a.Reallocate(12800);\n                a.Value = a ^ 100;\n                using(var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                    a.Write(writer, 62, 0, false, false);\n\n                ms.Position = 0;\n\n                using(var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader, 62, false);\n\n                Assert.AreEqual(a.ToString(62), b.ToString(62));\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + a.ToString(62), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        //more tests coming here\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeFloatTests/Math.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeFloatTests\n{\n    [TestClass]\n    public class MiscMath\n    {\n        [TestMethod]\n        public void FloatFloor()\n        {\n            using(var a = new HugeFloat(\"-9023409872309847509847.9385345098345\"))\n            using(var b = new HugeFloat())\n            {\n                b.Value = a.Floor();\n                FloatAssert.AreEqual(\"-9023409872309847509848\", b);\n                b.Value = (-a).Floor();\n                FloatAssert.AreEqual(\"9023409872309847509847\", b);\n            }\n        }\n\n        [TestMethod]\n        public void FloatCeiling()\n        {\n            using(var a = new HugeFloat(\"-9023409872309847509847.9385345098345\"))\n            using(var b = new HugeFloat())\n            {\n                b.Value = a.Ceiling();\n                FloatAssert.AreEqual(\"-9023409872309847509847\", b);\n                b.Value = (-a).Ceiling();\n                FloatAssert.AreEqual(\"9023409872309847509848\", b);\n            }\n        }\n\n        [TestMethod]\n        public void FloatTruncate()\n        {\n            using(var a = new HugeFloat(\"-9023409872309847509847.9385345098345\"))\n            using(var b = new HugeFloat())\n            {\n                b.Value = a.Truncate();\n                FloatAssert.AreEqual(\"-9023409872309847509847\", b);\n                b.Value = (-a).Truncate();\n                FloatAssert.AreEqual(\"9023409872309847509847\", b);\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeFloatTests/Precision.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeFloatTests\n{\n    /// <summary>\n    /// tests in this class verify that the correct precision is used when calculating floating point numbers\n    /// </summary>\n    [TestClass]\n    public class Precision\n    {\n        [ClassInitialize]\n        public static void Setup(TestContext context)\n        {\n            HugeFloat.DefaultPrecision = 128;\n        }\n\n        [ClassCleanup]\n        public static void Cleanup()\n        {\n            HugeFloat.DefaultPrecision = 64;\n        }\n\n        #region Expression arithmetic\n\n        [TestMethod]\n        public void ExpressionsCalculatedToDestinationPrecision()\n        {\n            using (var a = new HugeFloat(1))\n            using (var b = new HugeFloat(3))\n            using (var c = new HugeFloat(5))\n            using (var d = new HugeFloat(a / b + c))\n            {\n                Assert.AreEqual(\"0.5333333333333333333333333333333333333332@1\", d.ToString());\n                d.Reallocate(256);\n                d.Value = a / b + c;\n                Assert.AreEqual(\"0.533333333333333333333333333333333333333333333333333333333333333333333333333333@1\", d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void ExpressionsCalculatedToSpecificPrecisionForEquals()\n        {\n            using (var a = new HugeFloat(1))\n            using (var b = new HugeFloat(3))\n            using (var c = HugeFloat.Allocate(256))\n            using (var d = HugeFloat.Allocate(256))\n            {\n                c.SetTo(\"12345234589234059823475029384572323452034958723049823408955\");\n                Assert.IsTrue(c.Equals(c + a / b, 128));\n                Assert.IsFalse(c.Equals(c + a / b, 256));\n\n                d.SetTo(\"12345234589234059823475029384572323452034958723049823408955.333333333333333333333333333333333\");\n                Assert.IsTrue(d.Equals(c + a / b, 256));\n                Assert.IsTrue(d.Equals(c + a / b, 128));\n            }\n        }\n\n        [TestMethod]\n        public void ExpressionHashCodeCalculatedToDefaultPrecision()\n        {\n            using (var a = new HugeFloat(1))\n            using (var b = new HugeFloat(13))\n            using (var c = new HugeFloat(\"12345234589234059823475029384572323\"))\n            {\n                ShiftLeftBy62(c);\n                var cHash = c.GetHashCode();\n                var expr = a / b + c;\n                Assert.AreEqual(cHash, expr.GetHashCode());\n                HugeFloat.DefaultPrecision = 256;\n                Assert.AreEqual(cHash, c.GetHashCode());\n                Assert.AreNotEqual(cHash, expr.GetHashCode());\n                HugeFloat.DefaultPrecision = 128;\n            }\n        }\n\n        private static void ShiftLeftBy62(HugeFloat c)\n        {\n#if WIN64\n            c.Value *= 0x4000000000000000L;\n#else\n            c.Value *= 0x80000000;\n            c.Value *= 0x80000000;\n#endif\n        }\n\n        [TestMethod]\n        public void CompareToCalculatedToDefaultPrecision()\n        {\n            using (var a = new HugeFloat(1))\n            using (var b = new HugeFloat(13))\n            using (var c = new HugeFloat(\"12345234589234059823475029384572323\"))\n            using (var d = HugeFloat.Allocate(256))\n            {\n                ShiftLeftBy62(c);\n                d.Value = c;\n                var expr = a / b + c;\n                Assert.AreEqual(0, c.CompareTo(expr)); //to precision of c\n                Assert.AreEqual(0, expr.CompareTo(c)); //to precision of c\n                Assert.IsFalse(expr > c);              //to precision of c\n                Assert.IsTrue(c == expr);              //to precision of c\n                Assert.AreEqual(0, (c + 0).CompareTo(expr)); //to default precision\n                Assert.AreEqual(0, expr.CompareTo(c + 0));   //to default precision\n                Assert.IsFalse(expr > c + 0);                //to default precision\n                Assert.IsTrue(c + 0 == expr);                //to default precision\n                HugeFloat.DefaultPrecision = 256;\n                Assert.AreEqual(0, c.CompareTo(expr)); //to precision of c\n                Assert.AreEqual(0, expr.CompareTo(c)); //to precision of c\n                Assert.IsTrue(c == expr);              //to precision of c\n                Assert.IsFalse(expr > c);              //to precision of c\n                Assert.AreEqual(-1, d.CompareTo(expr)); //to precision of d\n                Assert.AreEqual(1, expr.CompareTo(d));  //to precision of d\n                Assert.IsFalse(d == expr);              //to precision of d\n                Assert.IsTrue(expr > d);                //to precision of d\n                Assert.AreEqual(-1, (c * 1).CompareTo(expr)); //to default precision\n                Assert.AreEqual(1, expr.CompareTo(c + 0));    //to default precision\n                Assert.IsFalse(c + 0 == expr);                //to default precision\n                Assert.IsTrue(expr > c + 0);                  //to default precision\n                HugeFloat.DefaultPrecision = 128;\n            }\n        }\n\n        [TestMethod]\n        public void CompareToPrimitiveCalculatedToDefaultPrecision()\n        {\n            using (var a = new HugeFloat(1))\n            using (var b = new HugeFloat(13))\n            using (var c = new HugeFloat(\"12345234589234059823475029384572323\"))\n            using (var d = HugeFloat.Allocate(256))\n            {\n                ShiftLeftBy62(c);\n                d.Value = c;\n                var expr = a / b + c - c;\n                Assert.AreEqual(0, Math.Sign(expr.CompareTo(Platform.Si(0, 0))));\n                Assert.AreEqual(0, Math.Sign(expr.CompareTo(Platform.Ui(0, 0))));\n                Assert.AreEqual(0, Math.Sign(expr.CompareTo(0.0)));\n                HugeFloat.DefaultPrecision = 256;\n                Assert.AreEqual(1, Math.Sign(expr.CompareTo(Platform.Si(0, 0))));\n                Assert.AreEqual(1, Math.Sign(expr.CompareTo(Platform.Ui(0, 0))));\n                Assert.AreEqual(1, Math.Sign(expr.CompareTo(0.0)));\n                HugeFloat.DefaultPrecision = 128;\n            }\n        }\n\n        [TestMethod]\n        public void EqualsToPrimitiveCalculatedToDefaultPrecision()\n        {\n            using (var a = new HugeFloat(1))\n            using (var b = new HugeFloat(13))\n            using (var c = new HugeFloat(\"12345234589234059823475029384572323\"))\n            using (var d = HugeFloat.Allocate(256))\n            {\n                ShiftLeftBy62(c);\n                d.Value = c;\n                var expr = a / b + c - c;\n                Assert.IsTrue(expr.Equals(Platform.Si(0, 0)));\n                Assert.IsTrue(expr.Equals(Platform.Ui(0, 0)));\n                Assert.IsTrue(expr.Equals(0.0));\n                HugeFloat.DefaultPrecision = 256;\n                Assert.IsFalse(expr.Equals(Platform.Si(0, 0)));\n                Assert.IsFalse(expr.Equals(Platform.Ui(0, 0)));\n                Assert.IsFalse(expr.Equals(0.0));\n                HugeFloat.DefaultPrecision = 128;\n            }\n        }\n\n        [TestMethod]\n        public void SignCalculatedToDefaultPrecision()\n        {\n            using (var a = new HugeFloat(1))\n            using (var b = new HugeFloat(13))\n            using (var c = new HugeFloat(\"12345234589234059823475029384572323\"))\n            using (var d = HugeFloat.Allocate(256))\n            {\n                ShiftLeftBy62(c);\n                var expr = (a / b + c) - c;\n                d.Value = expr;\n                Assert.AreEqual(0, expr.Sign());\n                Assert.AreEqual(1, d.Sign());\n                HugeFloat.DefaultPrecision = 256;\n                Assert.AreEqual(1, expr.Sign());\n                Assert.AreEqual(1, d.Sign());\n                d.Precision = 128;\n                Assert.AreEqual(1, d.Sign());\n                d.Value = expr;\n                Assert.AreEqual(0, d.Sign());\n                HugeFloat.DefaultPrecision = 128;\n            }\n        }\n\n        [TestMethod]\n        public void HugeIntSetToPerformedToDefaultPrecision()\n        {\n            using (var a = new HugeFloat(14))\n            using (var b = new HugeFloat(13))\n            using (var c = new HugeFloat(\"1234523458923405982347445029384572323\"))\n            using (var d = new HugeInt())\n            {\n                ShiftLeftBy62(c);\n                ShiftLeftBy62(c);\n                var expr = a / b + c - c;\n                d.SetTo(expr);\n                Assert.IsTrue(d == 0);\n\n                HugeFloat.DefaultPrecision = 256;\n                d.SetTo(expr);\n                Assert.IsTrue(d == 1);\n                HugeFloat.DefaultPrecision = 128;\n            }\n        }\n\n        [TestMethod]\n        public void HugeRationalSetToPerformedToDefaultPrecision()\n        {\n            using (var a = new HugeFloat(14))\n            using (var b = new HugeFloat(13))\n            using (var c = new HugeFloat(\"1234523458923405982347445029384572323\"))\n            using (var d = new HugeRational())\n            {\n                ShiftLeftBy62(c);\n                ShiftLeftBy62(c);\n                var expr = a / b + c - c;\n                d.SetTo(expr);\n                Assert.IsTrue(d == 0);\n\n                HugeFloat.DefaultPrecision = 256;\n                d.SetTo(expr);\n                Assert.IsTrue(d > 1);\n                Assert.IsTrue(d < 2);\n                HugeFloat.DefaultPrecision = 128;\n            }\n        }\n\n        #endregion\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeIntTests/Arithmetic.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeIntTests\n{\n    [TestClass]\n    public class Arithmetic\n    {\n        #region Add\n\n        [TestMethod]\n        public void IntAddHugeInt()\n        {\n            using (var a = new HugeInt(\"222509832503450298345029835740293845720\"))\n            using (var b = new HugeInt(\"222987435987982730594288574029879874539\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = a + b;\n                Assert.AreEqual(\"445497268491433028939318409770173720259\", c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddLimb()\n        {\n            using (var a = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                var b = Platform.Ui(4288574029879874539UL, 4288574029U);\n                a.Value = a + b;\n                Assert.AreEqual(Platform.Select(\"222509832503450298349318409770173720259\", \"222509832503450298345029835744582419749\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddToLimb()\n        {\n            using (var a = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                var b = Platform.Ui(4288574029879874539UL, 4288574029U);\n                a.Value = b + a;\n                Assert.AreEqual(Platform.Select(\"222509832503450298349318409770173720259\", \"222509832503450298345029835744582419749\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddSignedLimb()\n        {\n            using (var a = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                var b = Platform.Si(-4288574029879874539L, -1288574029);\n                a.Value = a + b;\n                Assert.AreEqual(Platform.Select(\"222509832503450298340741261710413971181\", \"222509832503450298345029835739005271691\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddToSignedLimb()\n        {\n            using (var a = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                var b = Platform.Si(-4288574029879874539L, -1288574029);\n                a.Value = b + a;\n                Assert.AreEqual(Platform.Select(\"222509832503450298340741261710413971181\", \"222509832503450298345029835739005271691\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddToMaxSignedLimb()\n        {\n            using (var a = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                var b = Platform.Si(long.MinValue, int.MinValue);\n                a.Value = b + a;\n                Assert.AreEqual(Platform.Select(\"222509832503450298335806463703439069912\", \"222509832503450298345029835738146362072\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddExpressionHugeInt()\n        {\n            using (var a = new HugeInt(\"222509832503450298345029835740293845720\"))\n            using (var b = new HugeInt(\"222987435987982730594288574029879874539\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = 1 + (a + b);\n                Assert.AreEqual(\"445497268491433028939318409770173720260\", c.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Subtract\n\n        [TestMethod]\n        public void IntSubtractHugeInt()\n        {\n            using (var a = new HugeInt(\"445497268491433028939318409770173720259\"))\n            using (var b = new HugeInt(\"222987435987982730594288574029879874539\"))\n            {\n                a.Value = a - b;\n                Assert.AreEqual(\"222509832503450298345029835740293845720\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractLimb()\n        {\n            using (var a = new HugeInt(\"222509832503450298349318409770173720259\"))\n            {\n                var b = Platform.Ui(4288574029879874539UL, 2885740298U);\n                a.Value = a - b;\n                Assert.AreEqual(Platform.Select(\"222509832503450298345029835740293845720\", \"222509832503450298349318409767287979961\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractFromLimb()\n        {\n            using (var a = new HugeInt(\"222509832503450298349318409770173720259\"))\n            {\n                var b = Platform.Ui(4288574029879874539UL, 2885740298U);\n                a.Value = b - a;\n                Assert.AreEqual(Platform.Select(\"-222509832503450298345029835740293845720\", \"-222509832503450298349318409767287979961\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractPositiveSignedLimb()\n        {\n            using (var a = new HugeInt(\"222509832503450298349318409770173720259\"))\n            {\n                var b = Platform.Si(4288574029879874539L, 1885740298);\n                a.Value = a - b;\n                Assert.AreEqual(Platform.Select(\"222509832503450298345029835740293845720\", \"222509832503450298349318409768287979961\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractFromPositiveSignedLimb()\n        {\n            using (var a = new HugeInt(\"222509832503450298349318409770173720259\"))\n            {\n                var b = Platform.Si(4288574029879874539L, 1885740298);\n                a.Value = b - a;\n                Assert.AreEqual(Platform.Select(\"-222509832503450298345029835740293845720\", \"-222509832503450298349318409768287979961\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractSignedLimb()\n        {\n            using (var a = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                var b = Platform.Si(-4288574029879874539L, -1885740298);\n                a.Value = a - b;\n                Assert.AreEqual(Platform.Select(\"222509832503450298349318409770173720259\", \"222509832503450298345029835742179586018\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractFromSignedLimb()\n        {\n            using (var a = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                var b = Platform.Si(-4288574029879874539L, -1885740298);\n                a.Value = b - a;\n                Assert.AreEqual(Platform.Select(\"-222509832503450298349318409770173720259\", \"-222509832503450298345029835742179586018\"), a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Multiply\n\n        [TestMethod]\n        public void IntMultiplyByHugeInt()\n        {\n            using (var a = new HugeInt(\"90234098723098475098479385345098345\"))\n            using (var b = new HugeInt(\"7859487359873459872354987610987897\"))\n            {\n                a.Value = a * b;\n                Assert.AreEqual(\"709193758343766370701419953880162061353595657143399816050772069730465\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntMultiplyByLimb()\n        {\n            using (var a = new HugeInt(\"90234098723098475098479385345098345\"))\n            {\n                var b = Platform.Ui(17390538260286101342, 1500450271);\n                a.Value = a * b;\n                Assert.AreEqual(Platform.Select(\"1569219546226477273686601978789044606491747469626478990\", \"135391777882513860921200145428966240276901495\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntMultiplyLimbBy()\n        {\n            using (var a = new HugeInt(\"90234098723098475098479385345098345\"))\n            {\n                var b = Platform.Ui(17390538260286101342, 1500450271);\n                a.Value = b * a;\n                Assert.AreEqual(Platform.Select(\"1569219546226477273686601978789044606491747469626478990\", \"135391777882513860921200145428966240276901495\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntMultiplyBySignedLimb()\n        {\n            using (var a = new HugeInt(\"90234098723098475098479385345098345\"))\n            {\n                var b = Platform.Si(-7390538260286101342, -1500450271);\n                a.Value = a * b;\n                Assert.AreEqual(Platform.Select(\"-666878558995492522701808125338061156491747469626478990\", \"-135391777882513860921200145428966240276901495\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntMultiplySignedLimbBy()\n        {\n            using (var a = new HugeInt(\"90234098723098475098479385345098345\"))\n            {\n                var b = Platform.Si(-7390538260286101342, -1500450271);\n                a.Value = b * a;\n                Assert.AreEqual(Platform.Select(\"-666878558995492522701808125338061156491747469626478990\", \"-135391777882513860921200145428966240276901495\"), a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Add Product\n\n        [TestMethod]\n        public void IntAddProductHugeInt()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"23094582093845093574093845093485039450934\"))\n            using (var b = new HugeInt(\"-394580293847502987609283945873594873409587\"))\n            {\n                var expr = a + c*b;\n                a.Value = expr;\n                Assert.AreEqual(\"-9112666988874677841199955832262586145147830205230375090322356322089362221491205901\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddProductHugeInt2()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"23094582093845093574093845093485039450934\"))\n            using (var b = new HugeInt(\"-394580293847502987609283945873594873409587\"))\n            {\n                var expr = c * b + a;\n                a.Value = expr;\n                Assert.AreEqual(\"-9112666988874677841199955832262586145147830205230375090322356322089362221491205901\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddProductLimb()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"-23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Ui(498734523097853458, 3997853458);\n                var expr = a + c*b;\n                a.Value = expr;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-92328754786193194014003719366476113668089432731415\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddProductLimbTo()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"-23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Ui(498734523097853458, 3997853458);\n                var expr = a + b*c;\n                a.Value = expr;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-92328754786193194014003719366476113668089432731415\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddProductLimbTo2()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"-23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Ui(498734523097853458, 3997853458);\n                var expr = b * c + a;\n                a.Value = expr;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-92328754786193194014003719366476113668089432731415\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddProductLimbTo3()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"-23094582093845093574093845093485039450934\"))\n            using (var d = new HugeInt())\n            {\n                var b = Platform.Ui(498734523097853458, 3997853458);\n                var expr = b * c + a;\n                d.Value = expr;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-92328754786193194014003719366476113668089432731415\"), d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddProductSignedLimb()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Si(-498734523097853458, -2017853458);\n                var expr = a + c*b;\n                a.Value = expr;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-46601482240379908737297906081375735555240112731415\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddProductSignedLimbTo()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"-23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Si(498734523097853458, 2017853458);\n                var expr = a + b*c;\n                a.Value = expr;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-46601482240379908737297906081375735555240112731415\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddProductSignedLimbTo2()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"-23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Si(498734523097853458, 2017853458);\n                var expr = b * c + a;\n                a.Value = expr;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-46601482240379908737297906081375735555240112731415\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddProductSignedLimbTo3()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"23094582093845093574093845093485039450934\"))\n            using (var d = new HugeInt())\n            {\n                var b = Platform.Si(-498734523097853458, -2017853458);\n                var expr = b * c + a;\n                d.Value = expr;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-46601482240379908737297906081375735555240112731415\"), d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAddProductSignedLimbTo4()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Si(-498734523097853458, -2017853458);\n                var expr = b * c + a;\n                a.Value = expr;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-46601482240379908737297906081375735555240112731415\"), a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Subtract Product\n\n        [TestMethod]\n        public void IntSubtractProductHugeInt()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"23094582093845093574093845093485039450934\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                var expr = a - c*b;\n                a.Value = expr;\n                Assert.AreEqual(\"-9112666988874677841199955832262586145147830205230375090322356322089362221491205901\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractProductHugeIntFrom()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498359\"))\n            using (var c = new HugeInt(\"23094582093845093574093845093485039450934\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                var expr = c * b - a;\n                a.Value = expr;\n                Assert.AreEqual(\"9112666988874677841199955832262586145147830205230375090322356322089362221491205899\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractProductLimb()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Ui(498734523097853458, 3997853458);\n                var expr = a - c*b;\n                a.Value = expr;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-92328754786193194014003719366476113668089432731415\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractProductLimbFrom()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498359\"))\n            using (var c = new HugeInt(\"23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Ui(498734523097853458, 3997853458);\n                var expr = c * b - a;\n                a.Value = expr;\n                Assert.AreEqual(Platform.Select(\"11518065386718058599763388064972875060082210203928832731413\", \"92328754786193194014003719366476113668089432731413\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractProductSignedLimb()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"-23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Si(-498734523097853458, -2017853458);\n                var expr = a - b*c;\n                a.Value = expr;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-46601482240379908737297906081375735555240112731415\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractProductSignedLimbFrom()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498359\"))\n            using (var c = new HugeInt(\"-23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Si(-498734523097853458, -2017853458);\n                var expr = b * c - a;\n                a.Value = expr;\n                Assert.AreEqual(Platform.Select(\"11518065386718058599763388064972875060082210203928832731413\", \"46601482240379908737297906081375735555240112731413\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractProductSignedLimb2()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"-23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Si(-498734523097853458, -2017853458);\n                a.Value = a - c*b;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-46601482240379908737297906081375735555240112731415\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractProductSignedLimb3()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"23094582093845093574093845093485039450934\"))\n            using (var d = new HugeInt())\n            {\n                var b = Platform.Si(498734523097853458, 2017853458);\n                d.Value = a - c*b;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-46601482240379908737297906081375735555240112731415\"), d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractProductSignedLimbFrom3()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498359\"))\n            using (var c = new HugeInt(\"-23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Si(498734523097853458, 2017853458);\n                a.Value = c * b - a;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058797263975760014751974140979348115793728131\", \"-46601482437880496432339782995434504699427073728131\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSubtractProductSignedLimb4()\n        {\n            using (var a = new HugeInt(\"98750293847520938457029384572093480498357\"))\n            using (var c = new HugeInt(\"23094582093845093574093845093485039450934\"))\n            {\n                var b = Platform.Si(498734523097853458, 2017853458);\n                a.Value = a - c * b;\n                Assert.AreEqual(Platform.Select(\"-11518065386718058599763388064972875060082210203928832731415\", \"-46601482240379908737297906081375735555240112731415\"), a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Shift Left\n\n        [TestMethod]\n        public void IntShiftLeft()\n        {\n            using (var a = new HugeInt(\"-12345700987ABCDEF2345CBDEFA245230948\", 16))\n            {\n                uint b = 40;\n                a.Value = a << b;\n                Assert.AreEqual(\"-12345700987ABCDEF2345CBDEFA2452309480000000000\", a.ToString(16));\n            }\n        }\n\n        #endregion\n\n        #region Shift Right\n\n        [TestMethod]\n        public void IntShiftRight()\n        {\n            using (var a = new HugeInt(\"ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = a >> b;\n                Assert.AreEqual(\"ABCDEF052834524092854092874502983745029345\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntShiftRightCeiling()\n        {\n            using (var a = new HugeInt(\"ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = (a >> b).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(\"ABCDEF052834524092854092874502983745029346\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntShiftRightNegativeCeiling()\n        {\n            using (var a = new HugeInt(\"-ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = (a >> b).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(\"-ABCDEF052834524092854092874502983745029345\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntShiftRightFloor()\n        {\n            using (var a = new HugeInt(\"ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = (a >> b).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(\"ABCDEF052834524092854092874502983745029345\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntShiftRightNegativeFloor()\n        {\n            using (var a = new HugeInt(\"-ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = (a >> b).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(\"-ABCDEF052834524092854092874502983745029346\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntShiftRightTruncate()\n        {\n            using (var a = new HugeInt(\"ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = (a >> b).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(\"ABCDEF052834524092854092874502983745029345\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntShiftRightNegativeTruncate()\n        {\n            using (var a = new HugeInt(\"-ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = (a >> b).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(\"-ABCDEF052834524092854092874502983745029345\", a.ToString(16));\n            }\n        }\n\n        #endregion\n\n        #region Negate\n\n        [TestMethod]\n        public void IntNegate()\n        {\n            using (var a = new HugeInt(\"24092854092874502983745029345723098457209\"))\n            {\n                a.Value = -a;\n                Assert.AreEqual(\"-24092854092874502983745029345723098457209\", a.ToString());\n                a.Value = -a;\n                Assert.AreEqual(\"24092854092874502983745029345723098457209\", a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Abs\n\n        [TestMethod]\n        public void IntMakeAbsolute()\n        {\n            using (var a = new HugeInt(\"-24092854092874502983745029345723098457209\"))\n            {\n                a.Value = a.Abs();\n                Assert.AreEqual(\"24092854092874502983745029345723098457209\", a.ToString());\n                a.Value = a.Abs();\n                Assert.AreEqual(\"24092854092874502983745029345723098457209\", a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Divide\n\n        #region Int\n\n        [TestMethod]\n        public void IntDivideHugeInt()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = a / b;\n                Assert.AreEqual(\"593169091750307653294\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideHugeIntCeiling()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = (a / b).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(\"593169091750307653295\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideHugeIntNegativeCeiling()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = (a / b).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(\"-593169091750307653294\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideHugeIntFloor()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = (a / b).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(\"593169091750307653294\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideHugeIntNegativeFloor()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = (a / b).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(\"-593169091750307653295\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideHugeIntTruncate()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = (a / b).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(\"593169091750307653294\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideHugeIntNegativeTruncate()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = (a / b).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(\"-593169091750307653294\", a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Int with mod\n\n        [TestMethod]\n        public void IntDivideHugeIntCeilingWithMod()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt())\n            {\n                a.Value = (a / b).SavingRemainderTo(c).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(\"593169091750307653295\", a.ToString());\n                Assert.AreEqual(\"-280211579611742400636461191697511341704820\", c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideHugeIntNegativeCeilingWithMod()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt())\n            {\n                a.Value = (a / b).SavingRemainderTo(c).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(\"-593169091750307653294\", a.ToString());\n                Assert.AreEqual(\"-114368714235760586972822754176083531704767\", c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideHugeIntFloorWithMod()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt())\n            {\n                a.Value = (a / b).SavingRemainderTo(c).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(\"593169091750307653294\", a.ToString());\n                Assert.AreEqual(\"114368714235760586972822754176083531704767\", c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideHugeIntNegativeFloorWithMod()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt())\n            {\n                a.Value = (a / b).SavingRemainderTo(c).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(\"-593169091750307653295\", a.ToString());\n                Assert.AreEqual(\"280211579611742400636461191697511341704820\", c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideHugeIntTruncateWithMod()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt())\n            {\n                a.Value = (a / b).SavingRemainderTo(c).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(\"593169091750307653294\", a.ToString());\n                Assert.AreEqual(\"114368714235760586972822754176083531704767\", c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideHugeIntNegativeTruncateWithMod()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt())\n            {\n                a.Value = (a / b).SavingRemainderTo(c).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(\"-593169091750307653294\", a.ToString());\n                Assert.AreEqual(\"-114368714235760586972822754176083531704767\", c.ToString());\n            }\n        }\n            \n        #endregion\n\n        #region Limb\n\n        [TestMethod]\n        public void IntDivideLimb()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = a / b;\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbCeiling()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a / b).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522499\", \"65137878909366687748339296418588133550785040629984012\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbNegativeCeiling()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a / b).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522498\", \"-65137878909366687748339296418588133550785040629984011\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbFloor()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a / b).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbNegativeFloor()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a / b).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522499\", \"-65137878909366687748339296418588133550785040629984012\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbTruncate()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a / b).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbNegativeTruncate()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a / b).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522498\", \"-65137878909366687748339296418588133550785040629984011\"), a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Limb with Mod\n\n        [TestMethod]\n        public void IntDivideLimbWithMod()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a / b).SavingRemainderTo(c);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbCeilingWithMod()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a / b).SavingRemainderTo(c).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522499\", \"65137878909366687748339296418588133550785040629984012\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"-332273015685459123\", \"-640264599\"), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbNegativeCeilingWithMod()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a / b).SavingRemainderTo(c).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522498\", \"-65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"-5599417901817617409\", \"-2952926313\"), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbFloorWithMod()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a / b).SavingRemainderTo(c).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbNegativeFloorWithMod()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a / b).SavingRemainderTo(c).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522499\", \"-65137878909366687748339296418588133550785040629984012\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"332273015685459123\", \"640264599\"), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbTruncateWithMod()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a / b).SavingRemainderTo(c).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbNegativeTruncateWithMod()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a / b).SavingRemainderTo(c).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522498\", \"-65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"-5599417901817617409\", \"-2952926313\"), c.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Limb with Mod x 2\n\n        [TestMethod]\n        public void IntDivideLimbWithMod2()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x).SavingRemainderTo(c);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), c.ToString());\n                Assert.AreEqual(d.ToString(), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbCeilingWithMod2()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x).SavingRemainderTo(c).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522499\", \"65137878909366687748339296418588133550785040629984012\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"-332273015685459123\", \"-640264599\"), c.ToString());\n                Assert.AreEqual(\"-\" + d.ToString(), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbNegativeCeilingWithMod2()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x).SavingRemainderTo(c).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522498\", \"-65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"-5599417901817617409\", \"-2952926313\"), c.ToString());\n                Assert.AreEqual(\"-\" + d.ToString(), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbFloorWithMod2()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x).SavingRemainderTo(c).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), c.ToString());\n                Assert.AreEqual(d.ToString(), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbNegativeFloorWithMod2()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x).SavingRemainderTo(c).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522499\", \"-65137878909366687748339296418588133550785040629984012\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"332273015685459123\", \"640264599\"), c.ToString());\n                Assert.AreEqual(d.ToString(), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbTruncateWithMod2()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x).SavingRemainderTo(c).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), c.ToString());\n                Assert.AreEqual(d.ToString(), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbNegativeTruncateWithMod2()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x).SavingRemainderTo(c).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522498\", \"-65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"-5599417901817617409\", \"-2952926313\"), c.ToString());\n                Assert.AreEqual(\"-\" + d.ToString(), c.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Limb with Limb Mod\n\n        [TestMethod]\n        public void IntDivideLimbWithMod3()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbCeilingWithMod3()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522499\", \"65137878909366687748339296418588133550785040629984012\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"332273015685459123\", \"640264599\"), d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbNegativeCeilingWithMod3()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522498\", \"-65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbFloorWithMod3()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbNegativeFloorWithMod3()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522499\", \"-65137878909366687748339296418588133550785040629984012\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"332273015685459123\", \"640264599\"), d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbTruncateWithMod3()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideLimbNegativeTruncateWithMod3()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a / b).SettingRemainderTo(x => d = x).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522498\", \"-65137878909366687748339296418588133550785040629984011\"), a.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), d.ToString());\n            }\n        }\n\n        #endregion\n \n        #region Exact\n\n        [TestMethod]\n        public void IntDivideExactlyHugeInt()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = (a * b).DivideExactly(b);\n                Assert.AreEqual(a.ToString(), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDivideExactlyLimb()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                c.Value = (a * b).DivideExactly(b);\n                Assert.AreEqual(a.ToString(), c.ToString());\n            }\n        }\n\n        #endregion\n\n        #region IsDivisible\n\n        [TestMethod]\n        public void IntIsDivisibleByHugeInt()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = (a * b);\n                Assert.IsTrue(a.IsDivisibleBy(b));\n                a.Value = a + 1;\n                Assert.IsFalse(a.IsDivisibleBy(b));\n            }\n        }\n\n        [TestMethod]\n        public void IntIsDivisibleByLimb()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593169091);\n                a.Value = (a * b);\n                Assert.IsTrue(a.IsDivisibleBy(b));\n                a.Value = a + 1;\n                Assert.IsFalse(a.IsDivisibleBy(b));\n            }\n        }\n\n        [TestMethod]\n        public void IntIsDivisibleByPowerOf2()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                uint b = 40;\n                a.Value = (a << b);\n                Assert.IsTrue(a.IsDivisibleByPowerOf2(b));\n                Assert.IsFalse(a.IsDivisibleByPowerOf2(b + 1));\n            }\n        }\n\n        #endregion\n        \n        #region IsCongruent\n\n        [TestMethod]\n        public void IntIsCongruentToHugeInt()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt(\"293847502987609283945873594873409587\"))\n            {\n                a.Value = (a * b) + c;\n                Assert.IsTrue(a.IsCongruentTo(c, b));\n                a.Value = a + 1;\n                Assert.IsFalse(a.IsCongruentTo(c, b));\n            }\n        }\n\n        [TestMethod]\n        public void IntIsCongruentToLimb()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 533076532);\n                var c = Platform.Ui(98764938475983745, 97983745);\n                a.Value = (a * b) + c;\n                Assert.IsTrue(a.IsCongruentTo(c, b));\n                a.Value = a + 1;\n                Assert.IsFalse(a.IsCongruentTo(c, b));\n            }\n        }\n\n        [TestMethod]\n        public void IntIsCongruentToPowerOf2()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt(\"293847502987609283945873594873409587\"))\n            {\n                uint b = 120;\n                a.Value = (a << b) + c;\n                Assert.IsTrue(a.IsCongruentToModPowerOf2(c, b));\n                Assert.IsFalse(a.IsCongruentToModPowerOf2(c, b + 3));\n                c.Value = c + 1;\n                Assert.IsFalse(a.IsCongruentToModPowerOf2(c, b));\n            }\n        }\n\n        #endregion\n\n        #endregion\n\n        #region Mod\n\n        #region Int\n\n        [TestMethod]\n        public void IntModHugeInt()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = a % b;\n                Assert.AreEqual(\"114368714235760586972822754176083531704767\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModHugeIntCeiling()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = (a % b).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(\"-280211579611742400636461191697511341704820\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModHugeIntNegativeCeiling()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = (a % b).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(\"-114368714235760586972822754176083531704767\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModHugeIntFloor()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = (a % b).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(\"114368714235760586972822754176083531704767\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModHugeIntNegativeFloor()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = (a % b).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(\"280211579611742400636461191697511341704820\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModHugeIntTruncate()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = (a % b).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(\"114368714235760586972822754176083531704767\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModHugeIntNegativeTruncate()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            {\n                a.Value = (a % b).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(\"-114368714235760586972822754176083531704767\", a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Int with quotient\n\n        [TestMethod]\n        public void IntModHugeIntCeilingWithQuotient()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = (a % b).SavingQuotientTo(a).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(\"593169091750307653295\", a.ToString());\n                Assert.AreEqual(\"-280211579611742400636461191697511341704820\", c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModHugeIntNegativeCeilingWithQuotient()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = (a % b).SavingQuotientTo(a).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(\"-593169091750307653294\", a.ToString());\n                Assert.AreEqual(\"-114368714235760586972822754176083531704767\", c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModHugeIntFloorWithQuotient()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = (a % b).SavingQuotientTo(a).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(\"593169091750307653294\", a.ToString());\n                Assert.AreEqual(\"114368714235760586972822754176083531704767\", c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModHugeIntNegativeFloorWithQuotient()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = (a % b).SavingQuotientTo(a).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(\"-593169091750307653295\", a.ToString());\n                Assert.AreEqual(\"280211579611742400636461191697511341704820\", c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModHugeIntTruncateWithQuotient()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = (a % b).SavingQuotientTo(a).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(\"593169091750307653294\", a.ToString());\n                Assert.AreEqual(\"114368714235760586972822754176083531704767\", c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModHugeIntNegativeTruncateWithQuotient()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var b = new HugeInt(\"394580293847502987609283945873594873409587\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = (a % b).SavingQuotientTo(a).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(\"-593169091750307653294\", a.ToString());\n                Assert.AreEqual(\"-114368714235760586972822754176083531704767\", c.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Limb\n\n        [TestMethod]\n        public void IntModLimb()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b);\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbCeiling()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"-332273015685459123\", \"-640264599\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbNegativeCeiling()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"-5599417901817617409\", \"-2952926313\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbFloor()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbNegativeFloor()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"332273015685459123\", \"640264599\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbTruncate()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbNegativeTruncate()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"-5599417901817617409\", \"-2952926313\"), a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Limb With Quotient\n\n        [TestMethod]\n        public void IntModLimbWithQuotient()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b).SavingQuotientTo(c);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbCeilingWithQuotient()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b).SavingQuotientTo(c).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522499\", \"65137878909366687748339296418588133550785040629984012\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"-332273015685459123\", \"-640264599\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbNegativeCeilingWithQuotient()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b).SavingQuotientTo(c).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522498\", \"-65137878909366687748339296418588133550785040629984011\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"-5599417901817617409\", \"-2952926313\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbFloorWithQuotient()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b).SavingQuotientTo(c).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbNegativeFloorWithQuotient()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b).SavingQuotientTo(c).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522499\", \"-65137878909366687748339296418588133550785040629984012\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"332273015685459123\", \"640264599\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbTruncateWithQuotient()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b).SavingQuotientTo(c).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbNegativeTruncateWithQuotient()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                a.Value = (a % b).SavingQuotientTo(c).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522498\", \"-65137878909366687748339296418588133550785040629984011\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"-5599417901817617409\", \"-2952926313\"), a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Limb With Quotient and Limb Mod\n\n        [TestMethod]\n        public void IntModLimbWithQuotient2()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x).SavingQuotientTo(c);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), a.ToString());\n                Assert.AreEqual(d.ToString(), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbCeilingWithQuotient2()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x).SavingQuotientTo(c).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522499\", \"65137878909366687748339296418588133550785040629984012\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"-332273015685459123\", \"-640264599\"), a.ToString());\n                Assert.AreEqual(\"-\" + d.ToString(), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbNegativeCeilingWithQuotient2()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x).SavingQuotientTo(c).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522498\", \"-65137878909366687748339296418588133550785040629984011\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"-5599417901817617409\", \"-2952926313\"), a.ToString());\n                Assert.AreEqual(\"-\" + d.ToString(), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbFloorWithQuotient2()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x).SavingQuotientTo(c).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), a.ToString());\n                Assert.AreEqual(d.ToString(), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbNegativeFloorWithQuotient2()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x).SavingQuotientTo(c).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522499\", \"-65137878909366687748339296418588133550785040629984012\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"332273015685459123\", \"640264599\"), a.ToString());\n                Assert.AreEqual(d.ToString(), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbTruncateWithQuotient2()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x).SavingQuotientTo(c).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"39458029384750298767200622330399462537522498\", \"65137878909366687748339296418588133550785040629984011\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), a.ToString());\n                Assert.AreEqual(d.ToString(), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbNegativeTruncateWithQuotient2()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            using (var c = new HugeInt())\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x).SavingQuotientTo(c).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"-39458029384750298767200622330399462537522498\", \"-65137878909366687748339296418588133550785040629984011\"), c.ToString());\n                Assert.AreEqual(Platform.Select(\"-5599417901817617409\", \"-2952926313\"), a.ToString());\n                Assert.AreEqual(\"-\" + d.ToString(), a.ToString());\n            } \n\n        }\n\n        #endregion\n\n        #region Limb With Limb Mod\n\n        [TestMethod]\n        public void IntModLimbWithQuotient3()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x);\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), a.ToString());\n                Assert.AreEqual(d.ToString(), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbCeilingWithQuotient3()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"-332273015685459123\", \"-640264599\"), a.ToString());\n                Assert.AreEqual(\"-\" + d.ToString(), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbNegativeCeilingWithQuotient3()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x).Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(Platform.Select(\"-5599417901817617409\", \"-2952926313\"), a.ToString());\n                Assert.AreEqual(\"-\" + d.ToString(), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbFloorWithQuotient3()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), a.ToString());\n                Assert.AreEqual(d.ToString(), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbNegativeFloorWithQuotient3()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x).Rounding(RoundingModes.Floor);\n                Assert.AreEqual(Platform.Select(\"332273015685459123\", \"640264599\"), a.ToString());\n                Assert.AreEqual(d.ToString(), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbTruncateWithQuotient3()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"5599417901817617409\", \"2952926313\"), a.ToString());\n                Assert.AreEqual(d.ToString(), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbNegativeTruncateWithQuotient3()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3593190912);\n                var d = Platform.Ui(0, 0);\n                a.Value = (a % b).SettingRemainderTo(x => d = x).Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(Platform.Select(\"-5599417901817617409\", \"-2952926313\"), a.ToString());\n                Assert.AreEqual(\"-\" + d.ToString(), a.ToString());\n            }\n\n        }\n\n        #endregion\n\n        #region Mod Limb function\n\n        [TestMethod]\n        public void IntModLimbFunction()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532U, 593169092U);\n                var expected = Platform.Select(\"5599417901817617409\", \"119445933\");\n                var d = a.Mod(b);\n                Assert.AreEqual(expected, d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbFunctionCeiling()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532U, 593169092U);\n                var expected = Platform.Select(\"332273015685459123\", \"473723159\");\n                var d = a.Mod(b, RoundingModes.Ceiling);\n                Assert.AreEqual(expected, d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbFunctionNegativeCeiling()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532U, 593169092U);\n                var expected = Platform.Select(\"5599417901817617409\", \"119445933\");\n                ulong d = a.Mod(b, RoundingModes.Ceiling);\n                Assert.AreEqual(expected, d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbFunctionFloor()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532U, 593169092U);\n                var expected = Platform.Select(\"5599417901817617409\", \"119445933\");\n                var d = a.Mod(b, RoundingModes.Floor);\n                Assert.AreEqual(expected, d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbFunctionNegativeFloor()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532U, 593169092U);\n                var expected = Platform.Select(\"332273015685459123\", \"473723159\");\n                var d = a.Mod(b, RoundingModes.Floor);\n                Assert.AreEqual(expected, d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbFunctionTruncate()\n        {\n            using (var a = new HugeInt(\"234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532U, 593169092U);\n                var expected = Platform.Select(\"5599417901817617409\", \"119445933\");\n                var d = a.Mod(b, RoundingModes.Truncate);\n                Assert.AreEqual(expected, d.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntModLimbFunctionNegativeTruncate()\n        {\n            using (var a = new HugeInt(\"-234052834524092854092874502983745029345723098457209305983434345\"))\n            {\n                var b = Platform.Ui(5931690917503076532U, 593169092U);\n                var expected = Platform.Select(\"5599417901817617409\", \"119445933\");\n                var d = a.Mod(b, RoundingModes.Truncate);\n                Assert.AreEqual(expected, d.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Shift Right Remainder\n\n        [TestMethod]\n        public void IntShiftRightRemainder()\n        {\n            using (var a = new HugeInt(\"ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = (a >> b).Remainder();\n                Assert.AreEqual(\"723098457209305983434345\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntShiftRightRemainderCeiling()\n        {\n            using (var a = new HugeInt(\"ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = (a >> b).Remainder().Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(\"-8DCF67BA8DF6CFA67CBCBCBB\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntShiftRightRemainderNegativeCeiling()\n        {\n            using (var a = new HugeInt(\"-ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = (a >> b).Remainder().Rounding(RoundingModes.Ceiling);\n                Assert.AreEqual(\"-723098457209305983434345\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntShiftRightRemainderFloor()\n        {\n            using (var a = new HugeInt(\"ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = (a >> b).Remainder().Rounding(RoundingModes.Floor);\n                Assert.AreEqual(\"723098457209305983434345\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntShiftRightRemainderNegativeFloor()\n        {\n            using (var a = new HugeInt(\"-ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = (a >> b).Remainder().Rounding(RoundingModes.Floor);\n                Assert.AreEqual(\"8DCF67BA8DF6CFA67CBCBCBB\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntShiftRightRemainderTruncate()\n        {\n            using (var a = new HugeInt(\"ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = (a >> b).Remainder().Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(\"723098457209305983434345\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntShiftRightRemainderNegativeTruncate()\n        {\n            using (var a = new HugeInt(\"-ABCDEF052834524092854092874502983745029345723098457209305983434345\", 16))\n            {\n                uint b = 96;\n                a.Value = (a >> b).Remainder().Rounding(RoundingModes.Truncate);\n                Assert.AreEqual(\"-723098457209305983434345\", a.ToString(16));\n            }\n        }\n\n        #endregion\n\n        #endregion\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeIntTests/Assignment.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeIntTests\n{\n    [TestClass]\n    public class Assignment\n    {\n        [TestMethod]\n        public void IntAssignCopy()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845720\"))\n            using (var b = new HugeInt())\n            {\n                b.Value = a;\n                Assert.AreEqual(\"-222509832503450298345029835740293845720\", b.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAssignRational()\n        {\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/7\"))\n            using (var b = new HugeInt())\n            {\n                b.SetTo(a);\n                Assert.AreEqual(\"31787118929064328335004262248613406531\", b.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSwap()\n        {\n            var strA = \"-222509832503450298345029835740293845720\";\n            var strB = \"2039847290878794872059384789347534534\";\n            using (var a = new HugeInt(strA))\n            using (var b = new HugeInt(strB))\n            {\n                var aValue = a._value();\n                var bValue = b._value();\n                a.Swap(b);\n                Assert.AreEqual(aValue, a._value());\n                Assert.AreEqual(bValue, b._value());\n                Assert.AreEqual(strA, b.ToString());\n                Assert.AreEqual(strB, a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntCompoundOperators()\n        {\n            using (var a = new HugeInt(\"938475092834705928347523452345\"))\n            {\n                a.Value += 1;\n                a.Value *= 10;\n                Assert.AreEqual(\"9384750928347059283475234523460\", a.ToString());\n            }\n        }\n        //more tests coming here\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeIntTests/Bitwise.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeIntTests\n{\n    [TestClass]\n    public class Bitwise\n    {\n        [TestMethod]\n        public void IntAndHugeInt()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt(\"0x100000000000000000123456789ABCDEFFFFFFFFFFFFFFFFF\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = a & b;\n                Assert.AreEqual(\"100000000000000000123456789ABCDEF0123456789ABCDEF\", c.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntOrHugeInt()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt(\"0x100000000000000000123456789ABCDEFFFFFFFFFFFFFFFFF\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = a | b;\n                Assert.AreEqual(\"10123456789ABCDEF0123456789ABCDEFFFFFFFFFFFFFFFFF\", c.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntXorHugeInt()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt(\"0x000000000000000000123456789ABCDEFFFFFFFFFFFFFFFFF\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = a ^ b;\n                Assert.AreEqual(\"10123456789ABCDEF0000000000000000FEDCBA9876543210\", c.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntNot()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = ~a;\n                Assert.AreEqual(-a - 1, c);\n            }\n        }\n\n        [TestMethod]\n        public void IntPopCount()\n        {\n            using (var a = new HugeInt(\"0x1ABCDEF8984948281360922385394772450147012613851354303\"))\n            {\n                var max = Platform.Ui(ulong.MaxValue, uint.MaxValue);\n                Assert.AreEqual(83UL, a.PopCount());\n                Assert.AreEqual(max, (-a).PopCount());\n            }\n        }\n\n        [TestMethod]\n        public void IntHammingDistance()\n        {\n            using (var a = new HugeInt(\"0x1ABCDE08984948281360922385394772450147012613851354F03\"))\n            using (var b = new HugeInt(\"0x1ABCDEF8984948281360922345394772450147012613851354303\"))\n            {\n                var max = Platform.Ui(ulong.MaxValue, uint.MaxValue);\n                Assert.AreEqual(8U, a.HammingDistance(b));\n                Assert.AreEqual(8U, (-b).HammingDistance(-a));\n                Assert.AreEqual(max, (-a).HammingDistance(b));\n                Assert.AreEqual(max, b.HammingDistance(-a));\n            }\n        }\n\n        [TestMethod]\n        public void IntFindBit()\n        {\n            using (var a = new HugeInt(\"0xA0000000000000000000800000000001\"))\n            {\n                var max = Platform.Ui(ulong.MaxValue, uint.MaxValue);\n                Assert.AreEqual(0UL, a.FindBit(true, 0));\n                Assert.AreEqual(47UL, a.FindBit(true, 1));\n                Assert.AreEqual(47UL, a.FindBit(true, 47));\n                Assert.AreEqual(125UL, a.FindBit(true, 48));\n                Assert.AreEqual(127UL, a.FindBit(true, 126));\n                Assert.AreEqual(max, a.FindBit(true, 128));\n\n                Assert.AreEqual(1UL, a.FindBit(false, 0));\n                Assert.AreEqual(1UL, a.FindBit(false, 1));\n                Assert.AreEqual(9UL, a.FindBit(false, 9));\n                Assert.AreEqual(128UL, a.FindBit(false, 127));\n                Assert.AreEqual(227UL, a.FindBit(false, 227));\n\n                a.Value = ~a;\n\n                Assert.AreEqual(0UL, a.FindBit(false, 0));\n                Assert.AreEqual(47UL, a.FindBit(false, 1));\n                Assert.AreEqual(47UL, a.FindBit(false, 47));\n                Assert.AreEqual(125UL, a.FindBit(false, 48));\n                Assert.AreEqual(127UL, a.FindBit(false, 126));\n                Assert.AreEqual(max, a.FindBit(false, 128));\n\n                Assert.AreEqual(1UL, a.FindBit(true, 0));\n                Assert.AreEqual(1UL, a.FindBit(true, 1));\n                Assert.AreEqual(9UL, a.FindBit(true, 9));\n                Assert.AreEqual(128UL, a.FindBit(true, 127));\n                Assert.AreEqual(227UL, a.FindBit(true, 227));\n            }\n        }\n\n        [TestMethod]\n        public void IntSetBit()\n        {\n            using (var a = new HugeInt(\"0xA0000000000000000000200000000001\"))\n            {\n                a.SetBit(47, true);\n                Assert.AreEqual(\"A0000000000000000000A00000000001\", a.ToString(16));\n                a.SetBit(47, true);\n                Assert.AreEqual(\"A0000000000000000000A00000000001\", a.ToString(16));\n                a.SetBit(45, false);\n                Assert.AreEqual(\"A0000000000000000000800000000001\", a.ToString(16));\n                a.SetBit(45, false);\n                Assert.AreEqual(\"A0000000000000000000800000000001\", a.ToString(16));\n                a.SetBit(131, false);\n                Assert.AreEqual(\"A0000000000000000000800000000001\", a.ToString(16));\n                a.SetBit(131, true);\n                Assert.AreEqual(\"8A0000000000000000000800000000001\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntGetBit()\n        {\n            using (var a = new HugeInt(\"0xA000000000000000000200000000001\"))\n            {\n                var limbs = 128 / MpirSettings.BITS_PER_LIMB;\n\n                Assert.IsTrue(a.GetBit(45));\n                Assert.IsFalse(a.GetBit(46));\n                Assert.AreEqual(limbs, a.NumberOfLimbsUsed());\n                Assert.AreEqual(limbs + 1, a.NumberOfLimbsAllocated());\n                Assert.IsFalse(a.GetBit(246));\n                Assert.AreEqual(limbs, a.NumberOfLimbsUsed());\n                Assert.AreEqual(limbs + 1, a.NumberOfLimbsAllocated());\n                a.Value = ~a;\n                Assert.AreEqual(-limbs, a.NumberOfLimbsUsed());\n                Assert.AreEqual(limbs + 1, a.NumberOfLimbsAllocated());\n                Assert.IsTrue(a.GetBit(246));\n                Assert.AreEqual(-limbs, a.NumberOfLimbsUsed());\n                Assert.AreEqual(limbs + 1, a.NumberOfLimbsAllocated());\n            }\n        }\n\n        [TestMethod]\n        public void IntComplementBit()\n        {\n            using (var a = new HugeInt(\"0xA0000000000000000000800000000001\"))\n            {\n                a.ComplementBit(46);\n                Assert.AreEqual(\"A0000000000000000000C00000000001\", a.ToString(16));\n                a.ComplementBit(47);\n                Assert.AreEqual(\"A0000000000000000000400000000001\", a.ToString(16));\n                a.ComplementBit(131);\n                Assert.AreEqual(\"8A0000000000000000000400000000001\", a.ToString(16));\n            }\n        }\n        //more tests coming here\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeIntTests/Comparisons.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeIntTests\n{\n    [TestClass]\n    public class Comparisons\n    {\n        #region CompareTo\n\n        [TestMethod]\n        public void IntCompareToHugeInt()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            using (var b = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                Assert.AreEqual(1, System.Math.Sign(b.CompareTo(a)));\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b + 1)));\n                Assert.AreEqual(0, System.Math.Sign((a + 1).CompareTo(-b)));\n                Assert.AreEqual(1, System.Math.Sign(a.CompareTo(null)));\n            }\n        }\n\n        [TestMethod]\n        public void IntCompareToObject()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            using (var b = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                Assert.AreEqual(1, System.Math.Sign(((IComparable)b).CompareTo((object)a)));\n                Assert.AreEqual(-1, System.Math.Sign(((IComparable)a).CompareTo((object)b)));\n                Assert.AreEqual(1, System.Math.Sign(((IComparable)a).CompareTo(null)));\n                Assert.AreEqual(0, System.Math.Sign(((IComparable)(a + 1)).CompareTo((object)-b)));\n            }\n        }\n\n        [TestMethod]\n        public void IntCompareToExpression()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            using (var b = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                Assert.AreEqual(1, System.Math.Sign(((IComparable<IntegerExpression>)b).CompareTo(a)));\n                Assert.AreEqual(-1, System.Math.Sign(((IComparable<IntegerExpression>)a).CompareTo(b)));\n                Assert.AreEqual(1, System.Math.Sign(((IComparable<IntegerExpression>)a).CompareTo(null)));\n                Assert.AreEqual(0, System.Math.Sign(((IComparable<IntegerExpression>)(a + 1)).CompareTo(-b)));\n            }\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void IntCompareToNonExpression()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            {\n                a.CompareTo(\"abc\");\n            }\n        }\n\n        [TestMethod]\n        public void IntCompareToLimb()\n        {\n            using (var a = new HugeInt(\"-222509821\"))\n            {\n                var b = Platform.Ui(222509820, 222509820);\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b)));\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b + 1)));\n                Assert.AreEqual(0, System.Math.Sign((-(a + 1)).CompareTo(b)));\n                Assert.AreEqual(1, System.Math.Sign((-a).CompareTo(b)));\n            }\n        }\n\n        [TestMethod]\n        public void IntCompareToSignedLimb()\n        {\n            using (var a = new HugeInt(\"-222509821\"))\n            {\n                var b = Platform.Si(-222509820, -222509820);\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b)));\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b + 1)));\n                Assert.AreEqual(0, System.Math.Sign((a + 1).CompareTo(b)));\n                Assert.AreEqual(0, System.Math.Sign(a.CompareTo(b - 1)));\n                Assert.AreEqual(1, System.Math.Sign(a.CompareTo(b - 2)));\n            }\n        }\n\n        [TestMethod]\n        public void IntCompareToSignedDouble()\n        {\n            using (var a = new HugeInt(\"-222509821\"))\n            {\n                double b = -222509820;\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b)));\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b + 1)));\n                Assert.AreEqual(0, System.Math.Sign((a + 1).CompareTo(b)));\n                Assert.AreEqual(0, System.Math.Sign(a.CompareTo(b - 1)));\n                Assert.AreEqual(1, System.Math.Sign(a.CompareTo(b - 1.1)));\n            }\n        }\n\n        #endregion\n\n        #region comparison operators with expr\n\n        [TestMethod]\n        public void IntOperatorLessThan()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            using (var b = new HugeInt(\"222509832503450298345029835740293845720\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = a;\n                Assert.IsTrue(a < b);\n                Assert.IsFalse(b < a);\n                Assert.IsFalse(a < c);\n                Assert.IsFalse(a < null);\n                Assert.IsTrue(null < a);\n            }\n        }\n\n        [TestMethod]\n        public void IntOperatorLessThanOrEqual()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            using (var b = new HugeInt(\"222509832503450298345029835740293845720\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = a;\n                Assert.IsTrue(a <= b);\n                Assert.IsFalse(b <= a);\n                Assert.IsTrue(a <= c);\n                Assert.IsFalse(a <= null);\n                Assert.IsTrue(null <= a);\n            }\n        }\n\n        [TestMethod]\n        public void IntOperatorGreaterThan()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            using (var b = new HugeInt(\"222509832503450298345029835740293845720\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = a;\n                Assert.IsFalse(a > b);\n                Assert.IsTrue(b > a);\n                Assert.IsFalse(a > c);\n                Assert.IsTrue(a > null);\n                Assert.IsFalse(null > a);\n            }\n        }\n\n        [TestMethod]\n        public void IntOperatorGreaterThanOrEqual()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            using (var b = new HugeInt(\"222509832503450298345029835740293845720\"))\n            using (var c = new HugeInt())\n            {\n                c.Value = a;\n                Assert.IsFalse(a >= b);\n                Assert.IsTrue(b >= a);\n                Assert.IsTrue(a >= c);\n                Assert.IsTrue(a >= null);\n                Assert.IsFalse(null >= a);\n            }\n        }\n\n        #endregion\n\n        #region comparison operators with limb\n\n        [TestMethod]\n        public void IntOperatorLessThanLimb()\n        {\n            using (var a = new HugeInt(\"3845721\"))\n            {\n                ulong c = 5432;\n                ulong b = 5432349587;\n                Assert.IsTrue(a < b);\n                Assert.IsFalse(b < a);\n                Assert.IsFalse(a < c);\n                Assert.IsTrue(c < a);\n            }\n        }\n\n        [TestMethod]\n        public void IntOperatorLessThanOrEqualLimb()\n        {\n            using (var a = new HugeInt(\"3845721\"))\n            {\n                ulong c = 5432;\n                ulong b = 5432349587;\n                ulong d = 3845721;\n                Assert.IsTrue(a <= b);\n                Assert.IsFalse(b <= a);\n                Assert.IsFalse(a <= c);\n                Assert.IsTrue(c <= a);\n                Assert.IsTrue(a <= d);\n                Assert.IsTrue(d <= a);\n            }\n        }\n\n        [TestMethod]\n        public void IntOperatorGreaterThanLimb()\n        {\n            using (var a = new HugeInt(\"3845721\"))\n            {\n                ulong c = 5432;\n                ulong b = 5432349587;\n                Assert.IsFalse(a > b);\n                Assert.IsTrue(b > a);\n                Assert.IsTrue(a > c);\n                Assert.IsFalse(c > a);\n            }\n        }\n\n        [TestMethod]\n        public void IntOperatorGreaterThanOrEqualLimb()\n        {\n            using (var a = new HugeInt(\"3845721\"))\n            {\n                ulong c = 5432;\n                ulong b = 5432349587;\n                ulong d = 3845721;\n                Assert.IsFalse(a >= b);\n                Assert.IsTrue(b >= a);\n                Assert.IsTrue(a >= c);\n                Assert.IsFalse(c >= a);\n                Assert.IsTrue(a >= d);\n                Assert.IsTrue(d >= a);\n            }\n        }\n\n        #endregion\n\n        #region comparison operators with signed limb\n\n        [TestMethod]\n        public void IntOperatorLessThanSignedLimb()\n        {\n            using (var a = new HugeInt(\"-3845721\"))\n            {\n                long c = -543254325432;\n                long b = -9587;\n                Assert.IsTrue(a < b);\n                Assert.IsFalse(b < a);\n                Assert.IsFalse(a < c);\n                Assert.IsTrue(c < a);\n            }\n        }\n\n        [TestMethod]\n        public void IntOperatorLessThanOrEqualSignedLimb()\n        {\n            using (var a = new HugeInt(\"-3845721\"))\n            {\n                long c = -543254325432;\n                long b = -9587;\n                long d = -3845721;\n                Assert.IsTrue(a <= b);\n                Assert.IsFalse(b <= a);\n                Assert.IsFalse(a <= c);\n                Assert.IsTrue(c <= a);\n                Assert.IsTrue(a <= d);\n                Assert.IsTrue(d <= a);\n            }\n        }\n\n        [TestMethod]\n        public void IntOperatorGreaterThanSignedLimb()\n        {\n            using (var a = new HugeInt(\"-3845721\"))\n            {\n                long c = -543254325432;\n                long b = -9587;\n                Assert.IsFalse(a > b);\n                Assert.IsTrue(b > a);\n                Assert.IsTrue(a > c);\n                Assert.IsFalse(c > a);\n            }\n        }\n\n        [TestMethod]\n        public void IntOperatorGreaterThanOrEqualSignedLimb()\n        {\n            using (var a = new HugeInt(\"-3845721\"))\n            {\n                long c = -543254325432;\n                long b = -9587;\n                long d = -3845721;\n                Assert.IsFalse(a >= b);\n                Assert.IsTrue(b >= a);\n                Assert.IsTrue(a >= c);\n                Assert.IsFalse(c >= a);\n                Assert.IsTrue(a >= d);\n                Assert.IsTrue(d >= a);\n            }\n        }\n\n        #endregion\n\n        #region comparison operators with double\n\n        [TestMethod]\n        public void IntOperatorLessThanDouble()\n        {\n            using (var a = new HugeInt(\"-3845721\"))\n            {\n                double c = -543254325432;\n                double b = -9587;\n                Assert.IsTrue(a < b);\n                Assert.IsFalse(b < a);\n                Assert.IsFalse(a < c);\n                Assert.IsTrue(c < a);\n            }\n        }\n\n        [TestMethod]\n        public void IntOperatorLessThanOrEqualDouble()\n        {\n            using (var a = new HugeInt(\"-3845721\"))\n            {\n                double c = -543254325432;\n                double b = -9587;\n                double d = -3845721;\n                Assert.IsTrue(a <= b);\n                Assert.IsFalse(b <= a);\n                Assert.IsFalse(a <= c);\n                Assert.IsTrue(c <= a);\n                Assert.IsTrue(a <= d);\n                Assert.IsTrue(d <= a);\n            }\n        }\n\n        [TestMethod]\n        public void IntOperatorGreaterThanDouble()\n        {\n            using (var a = new HugeInt(\"-3845721\"))\n            {\n                double c = -543254325432;\n                double b = -9587;\n                Assert.IsFalse(a > b);\n                Assert.IsTrue(b > a);\n                Assert.IsTrue(a > c);\n                Assert.IsFalse(c > a);\n            }\n        }\n\n        [TestMethod]\n        public void IntOperatorGreaterThanOrEqualDouble()\n        {\n            using (var a = new HugeInt(\"-3845721\"))\n            {\n                double c = -543254325432;\n                double b = -9587;\n                double d = -3845721;\n                Assert.IsFalse(a >= b);\n                Assert.IsTrue(b >= a);\n                Assert.IsTrue(a >= c);\n                Assert.IsFalse(c >= a);\n                Assert.IsTrue(a >= d);\n                Assert.IsTrue(d >= a);\n                Assert.IsFalse(d - 0.1 >= a);\n            }\n        }\n\n        #endregion\n\n        #region Equals\n\n        [TestMethod]\n        public void IntEqualsHugeInt()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            using (var b = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                Assert.IsFalse(b.Equals(a));\n                Assert.IsFalse(a.Equals(b + 1));\n                Assert.IsTrue((a + 1).Equals(-b));\n                Assert.IsFalse(a.Equals(null));\n                Assert.IsTrue(Equals(a + 1, -b));\n            }\n        }\n\n        [TestMethod]\n        public void IntEqualsExpression()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            using (var b = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                Assert.IsFalse(((IEquatable<IntegerExpression>)b).Equals(a));\n                Assert.IsFalse(((IEquatable<IntegerExpression>)a).Equals(b));\n                Assert.IsFalse(((IEquatable<IntegerExpression>)a).Equals(null));\n                Assert.IsTrue(((IEquatable<IntegerExpression>)(a + 1)).Equals(-b));\n            }\n        }\n\n        [TestMethod]\n        public void IntEqualsNonExpression()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            {\n                Assert.IsFalse(a.Equals(\"abc\"));\n            }\n        }\n\n        [TestMethod]\n        public void IntEqualsLimb()\n        {\n            var source = Platform.Select(\"222509832503\", \"2225032503\");\n            var b = Platform.Ui(222509832504, 2225032504);\n\n            using (var a = new HugeInt(source))\n            {\n                Assert.IsFalse(a.Equals(b + 1));\n                Assert.IsTrue(a.Equals(b - 1));\n                Assert.IsTrue((a + 1).Equals(b));\n            }\n        }\n\n        [TestMethod]\n        public void IntEqualsSignedLimb()\n        {\n            var source = Platform.Select(\"-222509832505\", \"-225032505\");\n            var b = Platform.Si(-222509832504, -225032504);\n\n            using (var a = new HugeInt(source))\n            {\n                Assert.IsFalse(a.Equals(b + 1));\n                Assert.IsTrue(a.Equals(b - 1));\n                Assert.IsTrue((a + 1).Equals(b));\n            }\n        }\n\n        [TestMethod]\n        public void IntEqualsDouble()\n        {\n            using (var a = new HugeInt(\"-222509832505\"))\n            {\n                double b = -222509832504;\n                Assert.IsFalse(a.Equals(b + 1));\n                Assert.IsTrue(a.Equals(b - 1));\n                Assert.IsTrue((a + 1).Equals(b));\n                Assert.IsFalse((a + 1).Equals(b + 0.1));\n            }\n        }\n\n        #endregion\n\n        #region Equality operators with expr\n\n        [TestMethod]\n        public void IntEqualsOperatorHugeInt()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            using (var b = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                Assert.IsFalse(b == a);\n                Assert.IsFalse(a == b + 1);\n                Assert.IsTrue(a + 1 == -b);\n                Assert.IsFalse(a == null);\n            }\n        }\n\n        [TestMethod]\n        public void IntNotEqualOperatorHugeInt()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            using (var b = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                Assert.IsTrue(b != a);\n                Assert.IsTrue(a != b + 1);\n                Assert.IsFalse(a + 1 != -b);\n                Assert.IsTrue(a != null);\n            }\n        }\n\n        #endregion\n\n        #region Equality operators with Limb\n\n        [TestMethod]\n        public void IntEqualsOperatorLimb()\n        {\n            using (var a = new HugeInt(\"-835740293845721\"))\n            {\n                ulong b = 835740293845720;\n                Assert.IsFalse(b == a);\n                Assert.IsFalse(a == b + 1);\n                Assert.IsTrue(-(a + 1) == b);\n            }\n        }\n\n        [TestMethod]\n        public void IntNotEqualOperatorLimb()\n        {\n            using (var a = new HugeInt(\"-835740293845721\"))\n            {\n                ulong b = 835740293845720;\n                Assert.IsTrue(b != a);\n                Assert.IsTrue(a != b + 1);\n                Assert.IsFalse(-(a + 1) != b);\n            }\n        }\n\n        #endregion\n        \n        #region Equality operators with Signed Limb\n\n        [TestMethod]\n        public void IntEqualsOperatorSignedLimb()\n        {\n            using (var a = new HugeInt(\"-835740293845721\"))\n            {\n                long b = -835740293845720;\n                Assert.IsFalse(b == a);\n                Assert.IsFalse(a == b + 1);\n                Assert.IsTrue(a + 1 == b);\n            }\n        }\n\n        [TestMethod]\n        public void IntNotEqualOperatorSignedLimb()\n        {\n            using (var a = new HugeInt(\"-835740293845721\"))\n            {\n                long b = -835740293845720;\n                Assert.IsTrue(b != a);\n                Assert.IsTrue(a != b + 1);\n                Assert.IsFalse(a + 1 != b);\n            }\n        }\n\n        #endregion\n\n        #region Equality operators with Double\n\n        [TestMethod]\n        public void IntEqualsOperatorDouble()\n        {\n            using (var a = new HugeInt(\"-835740293845721\"))\n            {\n                double b = -835740293845720;\n                Assert.IsFalse(b == a);\n                Assert.IsFalse(a == b + 1);\n                Assert.IsTrue(a + 1 == b);\n                Assert.IsFalse(a + 1 == b + 0.1);\n            }\n        }\n\n        [TestMethod]\n        public void IntNotEqualOperatorDouble()\n        {\n            using (var a = new HugeInt(\"-835740293845721\"))\n            {\n                double b = -835740293845720;\n                Assert.IsTrue(b != a);\n                Assert.IsTrue(a != b + 1);\n                Assert.IsFalse(a + 1 != b);\n                Assert.IsTrue(a + 1 != b + 0.1);\n            }\n        }\n\n        #endregion\n\n        #region GetHashCode\n\n        [TestMethod]\n        public void IntGetHashCodeTest()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            {\n                Assert.AreNotEqual(0, a.GetHashCode());\n                Assert.AreEqual(a.GetHashCode(), (a + 0).GetHashCode());\n                Assert.AreNotEqual(a.GetHashCode(), (-a).GetHashCode());\n            }\n        }\n\n        #endregion\n\n        #region CompareAbsTo\n\n        [TestMethod]\n        public void IntCompareAbsToHugeInt()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            using (var b = new HugeInt(\"222509832503450298345029835740293845720\"))\n            {\n                Assert.AreEqual(1, System.Math.Sign(a.CompareAbsTo(b)));\n                Assert.AreEqual(-1, System.Math.Sign(b.CompareAbsTo(a)));\n                Assert.AreEqual(0, System.Math.Sign(a.CompareAbsTo(b + 1)));\n                Assert.AreEqual(0, System.Math.Sign((a + 1).CompareAbsTo(b)));\n                Assert.AreEqual(0, System.Math.Sign((a + 1).CompareAbsTo(-b)));\n            }\n        }\n\n        [TestMethod]\n        public void IntCompareAbsToLimb()\n        {\n            using (var a = new HugeInt(\"-222509821\"))\n            {\n                ulong b = 222509820;\n                Assert.AreEqual(1, System.Math.Sign(a.CompareAbsTo(b)));\n                Assert.AreEqual(0, System.Math.Sign(a.CompareAbsTo(b + 1)));\n                Assert.AreEqual(0, System.Math.Sign((-a).CompareAbsTo(b + 1)));\n                Assert.AreEqual(0, System.Math.Sign((a + 1).CompareAbsTo(b)));\n                Assert.AreEqual(1, System.Math.Sign(a.CompareAbsTo(b)));\n                Assert.AreEqual(1, System.Math.Sign((-a).CompareAbsTo(b)));\n            }\n        }\n\n        [TestMethod]\n        public void IntCompareAbsToDouble()\n        {\n            using (var a = new HugeInt(\"-222509821\"))\n            {\n                double b = 222509820;\n                Assert.AreEqual(1, System.Math.Sign(a.CompareAbsTo(b)));\n                Assert.AreEqual(0, System.Math.Sign(a.CompareAbsTo(b + 1)));\n                Assert.AreEqual(0, System.Math.Sign((-a).CompareAbsTo(b + 1)));\n                Assert.AreEqual(0, System.Math.Sign((a + 1).CompareAbsTo(b)));\n                Assert.AreEqual(0, System.Math.Sign((a + 1).CompareAbsTo(-b)));\n                Assert.AreEqual(1, System.Math.Sign((a + 1).CompareAbsTo(b - 0.1)));\n                Assert.AreEqual(1, System.Math.Sign(a.CompareAbsTo(b)));\n                Assert.AreEqual(1, System.Math.Sign((-a).CompareAbsTo(b)));\n            }\n        }\n\n        #endregion\n\n        #region Sign\n\n        [TestMethod]\n        public void IntSign()\n        {\n            using (var a = new HugeInt(\"-222509832503450298345029835740293845721\"))\n            {\n                Assert.AreEqual(-1, a.Sign());\n                Assert.AreEqual(1, (-a).Sign());\n                Assert.AreEqual(0, (a-a).Sign());\n            }\n        }\n\n        #endregion\n        //more tests coming here\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeIntTests/ConstructionAndDisposal.cs",
    "content": "﻿/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeIntTests\n{\n    [TestClass]\n    public class ConstructionAndDisposal\n    {\n        [TestMethod]\n        public void IntDefaultConstructor()\n        {\n            using (var a = new HugeInt())\n            {\n                Assert.AreNotEqual(0, a.NumberOfLimbsAllocated());\n                Assert.AreEqual(0, a.NumberOfLimbsUsed());\n                Assert.AreNotEqual(IntPtr.Zero, a.Limbs());\n                Assert.AreEqual(\"0\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntDispose()\n        {\n            var a = new HugeInt();\n            a.Dispose();\n            Assert.AreEqual(0, a.NumberOfLimbsAllocated());\n            Assert.AreEqual(0, a.NumberOfLimbsUsed());\n            Assert.AreEqual(IntPtr.Zero, a.Limbs());\n        }\n\n        [TestMethod]\n        public void IntConstructorFromLong()\n        {\n            var n = \"123456789123456\";\n            using (var a = new HugeInt(long.Parse(n)))\n            {\n                Assert.AreEqual(64 / MpirSettings.BITS_PER_LIMB, a.NumberOfLimbsAllocated());\n                Assert.AreEqual(64 / MpirSettings.BITS_PER_LIMB, a.NumberOfLimbsUsed());\n                Assert.AreEqual(n, a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntConstructorFromLongNegative()\n        {\n            var n = \"-123456789123456\";\n            using (var a = new HugeInt(long.Parse(n)))\n            {\n                Assert.AreEqual(64 / MpirSettings.BITS_PER_LIMB, a.NumberOfLimbsAllocated());\n                Assert.AreEqual(-64 / MpirSettings.BITS_PER_LIMB, a.NumberOfLimbsUsed());\n                Assert.AreEqual(n, a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntConstructorFromULong()\n        {\n            var max = Platform.Ui(ulong.MaxValue, uint.MaxValue);\n            using (var a = new HugeInt(max))\n            {\n                Assert.AreEqual(1, a.NumberOfLimbsAllocated());\n                Assert.AreEqual(1, a.NumberOfLimbsUsed());\n                Assert.AreEqual(max.ToString(), a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntConstructorFromDouble()\n        {\n            using (var a = new HugeInt(123456789123456.9))\n            {\n                Assert.AreEqual(\"123456789123456\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntConstructorFromPower()\n        {\n            using (var a = new HugeInt(HugeInt.Power(10, 41)))\n            {\n                Assert.AreEqual(\"100000000000000000000000000000000000000000\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntConstructorFromDoubleNegative()\n        {\n            using (var a = new HugeInt(-123456789123456.9))\n            {\n                Assert.AreEqual(\"-123456789123456\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntAllocate()\n        {\n            using (var a = HugeInt.Allocate(129))\n            {\n                Assert.AreEqual(1 + 128 / MpirSettings.BITS_PER_LIMB, a.NumberOfLimbsAllocated());\n                Assert.AreEqual(0, a.NumberOfLimbsUsed());\n                Assert.AreEqual(\"0\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntReallocate()\n        {\n            using (var a = new HugeInt(\"543209879487374938579837\"))\n            {\n                Assert.AreEqual(2 + 64 / MpirSettings.BITS_PER_LIMB, a.NumberOfLimbsAllocated());\n                Assert.AreEqual(\"543209879487374938579837\", a.ToString());\n\n                a.Reallocate(257);\n                Assert.AreEqual(1 + 256 / MpirSettings.BITS_PER_LIMB, a.NumberOfLimbsAllocated());\n                Assert.AreEqual(\"543209879487374938579837\", a.ToString());\n\n                a.Reallocate(129);\n                Assert.AreEqual(1 + 128 / MpirSettings.BITS_PER_LIMB, a.NumberOfLimbsAllocated());\n                Assert.AreEqual(\"543209879487374938579837\", a.ToString());\n\n                a.Reallocate(64);\n                Assert.AreEqual(64 / MpirSettings.BITS_PER_LIMB, a.NumberOfLimbsAllocated());\n                Assert.AreEqual(\"0\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntStringConstructor()\n        {\n            var n = \"5432109876543212345789023245987\";\n            using (var a = new HugeInt(n))\n            {\n                Assert.AreEqual(128 / MpirSettings.BITS_PER_LIMB, a.NumberOfLimbsUsed());\n                Assert.AreEqual(n, a.ToString());\n            }\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void IntStringConstructorInvalid()\n        {\n            var a = new HugeInt(\"12345A\");\n        }\n\n        [TestMethod]\n        public void IntStringConstructorHex()\n        {\n            using (var a = new HugeInt(\"143210ABCDEF32123457ACDB324598799\", 16))\n            {\n                Assert.AreEqual(1 + 128 / MpirSettings.BITS_PER_LIMB, a.NumberOfLimbsUsed());\n            }\n        }\n\n        [TestMethod]\n        public void IntStringConstructorHexPrefix()\n        {\n            var n = \"143210ABCDEF32123457ACDB324598799\";\n            using (var a = new HugeInt(\"0x\" + n))\n            {\n                Assert.AreEqual(n, a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntStringAssignmentHexPrefix()\n        {\n            var n = \"143210ABCDEF32123457ACDB324598799\";\n            using (var a = new HugeInt(\"0x\" + n))\n            {\n                Assert.AreEqual(n, a.ToString(16));\n                Assert.AreEqual(n, a.ToString(16, false));\n                Assert.AreEqual(n.ToLower(), a.ToString(16, true));\n                a.SetTo(\"-0x\" + n);\n                Assert.AreEqual(\"-\" + n, a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void IntConstructorFromExpression()\n        {\n            using (var a = new HugeInt(\"2340958273409578234095823045723490587\"))\n            using (var b = new HugeInt(a + 1))\n            {\n                Assert.AreEqual(a + 1, b);\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeIntTests/Conversions.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.Linq;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeIntTests\n{\n    [TestClass]\n    public class Conversions\n    {\n        [TestMethod]\n        public void IntToStringDecimal()\n        {\n            var n = \"-23429384756298357462983476598345623984756\";\n            using (var a = new HugeInt(n))\n            {\n                Assert.AreEqual(n, a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntToStringHex()\n        {\n            var n = \"-23429abcdef298357462983fedcba345623984756\";\n            using (var a = new HugeInt(n, 16))\n            {\n                Assert.AreEqual(n, a.ToString(16, true));\n                Assert.AreEqual(n.ToUpper(), a.ToString(16));\n            }\n        }\n\n#if WIN64\n        [TestMethod]\n        public void IntToAndFromUlong()\n        {\n            using (var a = new HugeInt())\n            {\n                ulong b = 0xF84739ABCDEF4876;\n                a.SetTo(b);\n                Assert.AreEqual(b.ToString(), a.ToString());\n\n                a.Value = -a;\n                ulong c = a.ToUlong();\n                Assert.AreEqual(b.ToString(), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntToAndFromLong()\n        {\n            using (var a = new HugeInt())\n            {\n                long b = -0x784739ABCDEF4876;\n                a.SetTo(b);\n                Assert.AreEqual(b.ToString(), a.ToString());\n\n                long c = a.ToLong();\n                Assert.AreEqual(b.ToString(), c.ToString());\n            }\n        }\n#else\n        [TestMethod]\n        public void IntToAndFromUint()\n        {\n            using (var a = new HugeInt())\n            {\n                uint b = 0xF84739AB;\n                a.SetTo(b);\n                Assert.AreEqual(b.ToString(), a.ToString());\n\n                a.Value = -a;\n                uint c = a.ToUint();\n                Assert.AreEqual(b.ToString(), c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntToAndFromInt()\n        {\n            using (var a = new HugeInt())\n            {\n                int b = -0x784739AB;\n                a.SetTo(b);\n                Assert.AreEqual(b.ToString(), a.ToString());\n\n                long c = a.ToInt();\n                Assert.AreEqual(b.ToString(), c.ToString());\n            }\n        }\n#endif\n\n        [TestMethod]\n        public void IntToAndFromDouble()\n        {\n            using (var a = new HugeInt())\n            using (var lo = new HugeInt())\n            using (var hi = new HugeInt())\n            {\n                var source = Platform.Select(-123.45e20, -123.45e19);\n                var zillion = Platform.Ui(10000000000U, 1000000000U);\n                var factor = Platform.Ui(1, 10);\n                var exp = 0;\n\n                a.SetTo(source);\n                lo.Value = (a/zillion).Rounding(RoundingModes.Floor);\n                hi.Value = (a/zillion).Rounding(RoundingModes.Ceiling);\n\n                Assert.IsTrue(lo.ToString() == \"-1234500000000\" || hi.ToString() == \"-1234500000000\");\n\n                double c = a.ToDouble();\n                Assert.AreEqual(source, c);\n\n                a.Value = (a + a) * factor;\n                c = a.ToDouble(out exp);\n\n                Assert.AreEqual(75, exp);\n                c *= System.Math.Pow(2, exp);\n\n                Assert.IsTrue(a + zillion >= c);\n                Assert.IsTrue(a - zillion <= c);\n            }\n        }\n\n        [TestMethod]\n        public void IntFromString()\n        {\n            using (var a = new HugeInt())\n            {\n                var n = \"98762934876529834765234123984761\";\n                a.SetTo(n);\n                Assert.AreEqual(n, a.ToString());\n\n                n = \"-98ABCDEF876529834765234123984761\";\n                a.SetTo(n, 16);\n                Assert.AreEqual(n, a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void IntFromInvalidString()\n        {\n            using (var a = new HugeInt())\n            {\n                a.SetTo(\"12345A\");\n            }\n        }\n\n        [TestMethod]\n        public void IntToStringTruncated()\n        {\n            var n = string.Concat(\"123456789\".Select(c => new string(c, 30)));\n            using (var a = new HugeInt(n))\n            {\n                Assert.AreEqual(n, a.ToString(10));\n                Assert.AreEqual(\"...\" + n.Substring(n.Length - 256), a.ToString());\n                a.Value = -a;\n                Assert.AreEqual(\"-...\" + n.Substring(n.Length - 256), a.ToString());\n            }\n        }\n\n#if WIN64\n        [TestMethod]\n        public void IntFitsUlong()\n        {\n            using (var a = new HugeInt(ulong.MaxValue))\n            {\n                Assert.IsTrue(a.FitsUlong());\n                a.Value = a + 1;\n                Assert.IsFalse(a.FitsUlong());\n                a.SetTo(0);\n                Assert.IsTrue(a.FitsUlong());\n                a.Value = a - 1;\n                Assert.IsFalse(a.FitsUlong());\n            }\n        }\n\n        [TestMethod]\n        public void IntFitsLong()\n        {\n            using (var a = new HugeInt(long.MaxValue))\n            {\n                Assert.IsTrue(a.FitsLong());\n                a.Value = a + 1;\n                Assert.IsFalse(a.FitsLong());\n                a.SetTo(long.MinValue);\n                Assert.IsTrue(a.FitsLong());\n                a.Value = a - 1;\n                Assert.IsFalse(a.FitsLong());\n            }\n        }\n#endif\n\n        [TestMethod]\n        public void IntFitsUint()\n        {\n            using (var a = new HugeInt(uint.MaxValue))\n            {\n                Assert.IsTrue(a.FitsUint());\n                a.Value = a + 1;\n                Assert.IsFalse(a.FitsUint());\n                a.SetTo(0);\n                Assert.IsTrue(a.FitsUint());\n                a.Value = a - 1;\n                Assert.IsFalse(a.FitsUint());\n            }\n        }\n\n        [TestMethod]\n        public void IntFitsInt()\n        {\n            using (var a = new HugeInt(int.MaxValue))\n            {\n                Assert.IsTrue(a.FitsInt());\n                a.Value = a + 1;\n                Assert.IsFalse(a.FitsInt());\n                a.SetTo(int.MinValue);\n                Assert.IsTrue(a.FitsInt());\n                a.Value = a - 1;\n                Assert.IsFalse(a.FitsInt());\n            }\n        }\n\n        [TestMethod]\n        public void IntFitsUshort()\n        {\n            using (var a = new HugeInt(ushort.MaxValue))\n            {\n                Assert.IsTrue(a.FitsUshort());\n                a.Value = a + 1;\n                Assert.IsFalse(a.FitsUshort());\n                a.SetTo(0);\n                Assert.IsTrue(a.FitsUshort());\n                a.Value = a - 1;\n                Assert.IsFalse(a.FitsUshort());\n            }\n        }\n\n        [TestMethod]\n        public void IntFitsShort()\n        {\n            using (var a = new HugeInt(short.MaxValue))\n            {\n                Assert.IsTrue(a.FitsShort());\n                a.Value = a + 1;\n                Assert.IsFalse(a.FitsShort());\n                a.SetTo(short.MinValue);\n                Assert.IsTrue(a.FitsShort());\n                a.Value = a - 1;\n                Assert.IsFalse(a.FitsShort());\n            }\n        }\n\n        [TestMethod]\n        public void IntApproximateSizeInBase()\n        {\n            using (var a = new HugeInt(\"2983475029834750293429834750298347502934298347502983475029342983475029834750293429834750298347502934\"))\n            {\n                AssertEither(100, 101, a.ApproximateSizeInBase(10));\n                AssertEither(331, 331, a.ApproximateSizeInBase(2));\n                AssertEither(83, 83, a.ApproximateSizeInBase(16));\n                AssertEither(64, 65, a.ApproximateSizeInBase(36));\n                AssertEither(56, 57, a.ApproximateSizeInBase(62));\n            }\n        }\n\n        private void AssertEither(int expected1, int expected2, long actual)\n        {\n            Assert.IsTrue(actual == expected1 || actual == expected2, \"Expected {0} or {1}, actual {2}\", expected1, expected2, actual);\n        }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeIntTests/ExpressionTests.cs",
    "content": "﻿/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Reflection;\nusing System.Text;\nusing System.Threading.Tasks;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeIntTests\n{\n    [TestClass]\n    public class ExpressionTests\n    {\n        [TestMethod]\n        public void IntTestAllExpressions()\n        {\n            var baseExpr = typeof(IntegerExpression);\n            var allExpressions =\n                baseExpr.Assembly.GetTypes()\n                .Where(x => baseExpr.IsAssignableFrom(x) && !x.IsAbstract)\n                .ToList();\n\n            var one = Platform.Ui(1, 1);\n\n            using (var a = new HugeInt(-9L))\n            using (var b = new HugeInt(4L))\n            using (var c = new HugeRational(6, 7))\n            using (var r = MpirRandom.Default())\n            {\n                var expr = a + (-a * 2) * 3 * (a.Abs() * -2 + -64 + a * a) + (one * 116U) + a;\n                VerifyPartialResult(r, expr, 44);\n                expr = expr + a * 5 + (a+b) * (b + 1) * (b + -3) * b + (b * -a) - (b * (one * 25U)) - a + (b << 3) - ((a*b) << 1);\n                VerifyPartialResult(r, expr, -52);\n                expr = expr - 2 - 3U + (b - (a << 1)) + (b * b - (one * 15U)) * (b - a) * (a - 11) * (b - 3U) - (-340 - a) + ((one * 20U) - b);\n                VerifyPartialResult(r, expr, 52);\n                expr = expr + (-7 - 2 * a) + (28U - 4 * b) + -(a + b * 2) + (3 * a).Abs();\n                VerifyPartialResult(r, expr, 103);\n                expr = expr / a + expr / (3 * b) - a / b - b / (a + 10) + a % b - (3 * b) % a + a % (2 * b) - (12 * b) % (-5 * a) + (a * 4 / 8).Rounding(RoundingModes.Floor) + (b * 3 % 7).Rounding(RoundingModes.Ceiling);\n                VerifyPartialResult(r, expr, -20);\n                expr = expr - (a * 5).DivideExactly(a) + (b * 7 * 5432198).DivideExactly(5432198) + (b >> 1);\n                VerifyPartialResult(r, expr, 5);\n                expr = expr + (b ^ 3) + a.PowerMod(2, b) + (a + 6).PowerMod(b - 1, b * 5) + (a * a * a).Root(3) + (b * b).SquareRoot();\n                VerifyPartialResult(r, expr, 78);\n                expr = expr + ((b + 1) & -a) + (b | -a) - (b ^ a) + ~b;\n                VerifyPartialResult(r, expr, 100);\n                expr = expr + r.GetInt(b + 1) + r.GetIntBits(3) + r.GetIntBitsChunky(3) + (b * 2).NextPrimeCandidate(r) - b.Gcd(a - 1);\n                VerifyPartialResult(r, expr, 124);\n                expr = expr - a.Lcm(b * 3) - (b + 1).Lcm(2) - (-a).Invert(b + 7) - (1-a).RemoveFactors(b / 2) - HugeInt.Power(2, 3) - HugeInt.Factorial(4);\n                VerifyPartialResult(r, expr, 36);\n                expr = expr - HugeInt.Primorial(6) + HugeInt.Binomial(4, 2) + HugeInt.Binomial(b, 3) + HugeInt.Fibonacci(6) + HugeInt.Lucas(7);\n                VerifyPartialResult(r, expr, 53);\n                expr = expr + c.Numerator + c.Denominator;\n                VerifyPartialResult(r, expr, 66);\n\n                MarkExpressionsUsed(allExpressions, expr);\n            }\n\n            Assert.AreEqual(0, allExpressions.Count, \"Expression types not exercised: \" + string.Join(\"\",\n                allExpressions.Select(x => Environment.NewLine + x.Name).OrderBy(x => x)));\n        }\n\n        private void VerifyPartialResult(MpirRandom rnd, IntegerExpression expr, long expected)\n        {\n            rnd.Seed(123);\n\n            using (var r = new HugeInt())\n            {\n                r.Value = expr;\n                Assert.AreEqual(expected.ToString(), r.ToString());\n            }\n        }\n\n        private void MarkExpressionsUsed(List<Type> allExpressions, IntegerExpression expr)\n        {\n            var type = expr.GetType();\n            allExpressions.Remove(type);\n            \n            var children = type.GetFields(BindingFlags.NonPublic | BindingFlags.Instance)\n                .Where(x => typeof(IntegerExpression).IsAssignableFrom(x.FieldType))\n                .Select(x => (IntegerExpression)x.GetValue(expr))\n                .Where(x => x != null)\n                .ToList();\n\n            foreach (var childExpr in children)\n                MarkExpressionsUsed(allExpressions, childExpr);\n        }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeIntTests/IO.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.IO;\nusing System.Text;\nusing System.Linq;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeIntTests\n{\n    [TestClass]\n    public class IO\n    {\n        [TestMethod]\n        public void IntInputOutputRaw()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                a.Write(ms);\n                ms.Position = 0;\n                b.Read(ms);\n                Assert.AreEqual(a, b);\n                Assert.IsTrue(b > 0);\n                Assert.AreEqual(ms.Length, ms.Position);\n            }\n        }\n\n        [TestMethod]\n        public void IntInputOutputRawNegative()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = -(a ^ 100);\n                a.Write(ms);\n                ms.Position = 0;\n                b.Read(ms);\n                Assert.AreEqual(a, b);\n                Assert.IsTrue(b < 0);\n                Assert.AreEqual(ms.Length, ms.Position);\n            }\n        }\n\n        [TestMethod]\n        public void IntInputOutputStr()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                    a.Write(writer);\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n            }\n        }\n\n        [TestMethod]\n        public void IntInputOutputStrHex()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                {\n                    writer.Write(\"0x\");\n                    a.Write(writer, 16);\n                }\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + \"0x\" + a.ToString(16), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        [TestMethod]\n        public void IntInputOutputStrHexLower()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                {\n                    writer.Write(\"0x\");\n                    a.Write(writer, 16, true);\n                }\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + \"0x\" + a.ToString(16, true), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        [TestMethod]\n        public void IntInputOutputStrOctal()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                {\n                    writer.Write('0');\n                    a.Write(writer, 8);\n                }\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + \"0\" + a.ToString(8), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        [TestMethod]\n        public void IntInputOutputStrBinary()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                {\n                    writer.Write(\"0b\");\n                    a.Write(writer, 2);\n                }\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + \"0b\" + a.ToString(2), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        [TestMethod]\n        public void IntInputOutputStr62()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                    a.Write(writer, 62);\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader, 62);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + a.ToString(62), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        [TestMethod]\n        public void IntImportExport()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt())\n            {\n                var bytes = new byte[1000];\n\n                foreach (var order in Enum.GetValues(typeof(LimbOrder)).Cast<LimbOrder>())\n                foreach (var endianness in Enum.GetValues(typeof(Endianness)).Cast<Endianness>())\n                foreach (var nails in new[] { 0, 5, 10, 16 })\n                foreach (var size in new[] { 8, 11, 16 })\n                {\n                    var words = a.Export(bytes, size, order, endianness, nails);\n                    var expected = (ulong)System.Math.Ceiling(193m / (size * 8 - nails));\n                    Assert.AreEqual(expected, words);\n\n                    b.SetTo(0);\n                    b.Import(bytes, words, size, order, endianness, nails);\n                    Assert.AreEqual(a, b);\n                }\n            }\n        }\n\n        [TestMethod]\n        public void IntImportExportAllocating()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt())\n            {\n                foreach (var order in Enum.GetValues(typeof(LimbOrder)).Cast<LimbOrder>())\n                    foreach (var endianness in Enum.GetValues(typeof(Endianness)).Cast<Endianness>())\n                        foreach (var nails in new[] { 0, 5, 10, 16 })\n                            foreach (var size in new[] { 8, 11, 16 })\n                            {\n                                var bytes = a.Export<byte>(size, order, endianness, nails);\n                                var expected = (int)System.Math.Ceiling(193m / (size * 8 - nails));\n                                Assert.AreEqual(expected, bytes.Length / size);\n\n                                b.SetTo(0);\n                                b.Import(bytes, (uint)(bytes.Length / size), size, order, endianness, nails);\n                                Assert.AreEqual(a, b);\n                            }\n            }\n        }\n\n        [TestMethod]\n        public void IntImportExportAllocatingShort()\n        {\n            using (var a = new HugeInt(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            using (var b = new HugeInt())\n            {\n                foreach (var order in Enum.GetValues(typeof(LimbOrder)).Cast<LimbOrder>())\n                    foreach (var endianness in Enum.GetValues(typeof(Endianness)).Cast<Endianness>())\n                        foreach (var nails in new[] { 0, 5, 10, 16 })\n                            foreach (var size in new[] { 8, 11, 16 })\n                            {\n                                var bytes = a.Export<short>(size, order, endianness, nails);\n                                var expected = (int)System.Math.Ceiling(193m / (size * 8 - nails));\n                                Assert.AreEqual(expected, bytes.Length * 2 / size);\n\n                                b.SetTo(0);\n                                b.Import(bytes, (uint)(bytes.Length * 2 / size), size, order, endianness, nails);\n                                Assert.AreEqual(a, b);\n                            }\n            }\n        }\n\n        [TestMethod]\n        public void IntImportExportAllocatingZero()\n        {\n            using (var a = new HugeInt())\n            using (var b = new HugeInt())\n            {\n                var order = LimbOrder.LeastSignificantFirst;\n                var endianness = Endianness.Native;\n                var nails = 5;\n                var size = 4;\n\n                var bytes = a.Export<byte>(size, order, endianness, nails);\n                Assert.AreEqual(0, bytes.Length);\n\n                b.SetTo(1);\n                b.Import(bytes, 0, size, order, endianness, nails);\n                Assert.AreEqual(a, b);\n            }\n        }\n\n        [TestMethod]\n        public void IntSize()\n        {\n            using (var a = new HugeInt(\"-0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            {\n                var limbs = 192U / MpirSettings.BITS_PER_LIMB + 1;\n                Assert.AreEqual(limbs, a.Size());\n                Assert.AreEqual(limbs, (-a).Size());\n            }\n        }\n\n        [TestMethod]\n        public void IntAllocatedSize()\n        {\n            using (var a = new HugeInt(\"-0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\"))\n            {\n                var allocated = a.AllocatedSize;\n                Assert.IsTrue(allocated >= (int)a.Size());\n                a.Value = -a;\n                Assert.AreEqual(allocated, a.AllocatedSize);\n                Assert.AreEqual(192U / MpirSettings.BITS_PER_LIMB + 1, a.Size());\n\n                a.Value >>= 64;\n                Assert.AreEqual(128U / MpirSettings.BITS_PER_LIMB + 1, a.Size());\n                Assert.AreEqual(allocated, a.AllocatedSize);\n            }\n        }\n\n        [TestMethod]\n        public void IntGetLimb()\n        {\n            using (var a = new HugeInt(\"-0x10123456789ABCDEFA123456789ABCDEF0123456789ABCDEF\"))\n            {\n                Assert.AreEqual(Platform.Ui(0x0123456789ABCDEFUL, 0x89ABCDEFU), a.GetLimb(0));\n                Assert.AreEqual(Platform.Ui(0xA123456789ABCDEFUL, 0x01234567U), a.GetLimb(1));\n            }\n        }\n\n        [TestMethod]\n        public void IntReadLimbs()\n        {\n            var dest = Enumerable.Repeat(Platform.Ui(0, 0), 8).ToArray();\n\n            using (var a = new HugeInt(\"-0x55533123456789ABCDEF02468ACEFDB9753171122334455667788\"))\n            {\n                a.ReadLimbs(dest, 1, 2, 3);\n                a.ReadLimbs(dest, 2, 2, 6);\n                Assert.AreEqual(Platform.Ui(0x2468ACEFDB975317UL, 0x11223344U), dest[3]);\n                Assert.AreEqual(Platform.Ui(0x123456789ABCDEF0UL, 0xDB975317U), dest[4]);\n                Assert.AreEqual(Platform.Ui(0x123456789ABCDEF0UL, 0xDB975317U), dest[6]);\n                Assert.AreEqual(Platform.Ui(0x0000000000055533UL, 0x2468ACEFU), dest[7]);\n            }\n        }\n\n        [TestMethod]\n        public void IntModifyLimbs()\n        {\n            var src = new[]\n                {\n                    Platform.Ui(0, 0),\n                    Platform.Ui(0, 0),\n                    Platform.Ui(0, 0),\n                    Platform.Ui(0x2468ACEFDB975317UL, 0x11223344U),\n                    Platform.Ui(0x3456789ABCDEF044UL, 0x09872458U),\n                    Platform.Ui(0, 0),\n                    Platform.Ui(0x123456789ABCDEF0UL, 0xDB975317U),\n                    Platform.Ui(0x0000000000055533UL, 0x2468ACEFU),\n                };\n\n            using (var a = new HugeInt(Platform.Select(\"0x1122334455667788\", \"0x55667788\")))\n            using (var expected1 = new HugeInt(Platform.Select(      \"0x3456789ABCDEF0442468ACEFDB9753171122334455667788\",          \"0x098724581122334455667788\")))\n            using (var expected2 = new HugeInt(Platform.Select(\"-0x55533123456789ABCDEF02468ACEFDB9753171122334455667788\", \"-0x2468ACEFDB9753171122334455667788\")))\n            {\n                a.ModifyLimbs(src, 1, 2, 3, false);\n                Assert.AreEqual(expected1, a);\n\n                a.ModifyLimbs(src, 2, 2, 6, true);\n                Assert.AreEqual(expected2, a);\n            }\n        }\n\n        [TestMethod]\n        public void IntModifyLimbsWithGap()\n        {\n            var src = new[]\n                {\n                    Platform.Ui(0, 0),\n                    Platform.Ui(0, 0),\n                    Platform.Ui(0, 0),\n                    Platform.Ui(0x2468ACEFDB975317UL, 0x11223344U),\n                    Platform.Ui(0x3456789ABCDEF044UL, 0x09872458U),\n                    Platform.Ui(0, 0),\n                    Platform.Ui(0x123456789ABCDEF0UL, 0xDB975317U),\n                    Platform.Ui(0x0000000000055533UL, 0x2468ACEFU),\n                };\n\n            using (var a = new HugeInt(Platform.Select(\"0x1122334455667788\", \"0x55667788\")))\n            using (var expected = new HugeInt(Platform.Select(\"0x3456789ABCDEF0442468ACEFDB97531700000000000000001122334455667788\", \"0x09872458112233440000000055667788\")))\n            {\n                a.ModifyLimbs(src, 2, 2, 3, false);\n                Assert.AreEqual(expected, a);\n            }\n        }\n\n        [TestMethod]\n        public void IntWriteLimbs()\n        {\n            var src = new[]\n                {\n                    Platform.Ui(0, 0),\n                    Platform.Ui(0, 0),\n                    Platform.Ui(0, 0),\n                    Platform.Ui(0x2468ACEFDB975317UL, 0x11223344U),\n                    Platform.Ui(0x3456789ABCDEF044UL, 0x09872458U),\n                    Platform.Ui(0, 0),\n                    Platform.Ui(0x123456789ABCDEF0UL, 0xDB975317U),\n                    Platform.Ui(0x0000000000055533UL, 0x2468ACEFU),\n                };\n\n            using (var a = new HugeInt(Platform.Select(\"0x1122334455667788\", \"0x55667788\")))\n            using (var expected = new HugeInt(Platform.Select(\"-0x123456789ABCDEF000000000000000003456789ABCDEF0442468ACEFDB975317\", \"-0xDB975317000000000987245811223344\")))\n            {\n                a.WriteLimbs(src, 3, 4, false);\n                Assert.IsTrue(-expected == a);\n\n                a.WriteLimbs(src, 3, 4, true);\n                Assert.AreEqual(expected, a);\n            }\n        }\n        //more tests coming here\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeIntTests/Math.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeIntTests\n{\n    [TestClass]\n    public class Math\n    {\n        #region Exponentiation\n\n        [TestMethod]\n        public void IntPowerModHugeInt()\n        {\n            using (var a = new HugeInt(\"2835698356928736487698769283645293409781234\"))\n            using (var b = new HugeInt(\"3\"))\n            using (var c = new HugeInt(\"9786459872639458729387304958673243509870923452745892673402935742456\"))\n            {\n                a.Value = a.PowerMod(b, c);\n                Assert.AreEqual(\"5346078446724436806099093819990997994355321605000165187447171753448\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntPowerModLimb()\n        {\n            using (var a = new HugeInt(\"2835698356928736487698769283645293409781234\"))\n            using (var c = new HugeInt(\"9786459872639458729387304958673243509870923452745892673402935742456\"))\n            {\n                a.Value = a.PowerMod(3, c);\n                Assert.AreEqual(\"5346078446724436806099093819990997994355321605000165187447171753448\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntPowerLimb()\n        {\n            using (var a = new HugeInt(\"9785412309485720938412983404349\"))\n            {\n                a.Value = a ^ 4;\n                Assert.AreEqual(\"9168884832199547717402442404668238841010784738902226284286664833331445628675177089723224507720724521226586825967635414667601\", a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Roots\n\n        [TestMethod]\n        public void IntRoot()\n        {\n            var n = \"8984948281360922385394772450147012613851354303\";\n            using (var a = new HugeInt(n))\n            using (var b = new HugeInt())\n            using (var c = new HugeInt())\n            {\n                a.Value = a * a * a;\n\n                b.Value = a.Root(3);\n                Assert.AreEqual(n, b.ToString());\n\n                bool exact = false;\n                b.Value = a.Root(3).SettingExactTo(x => exact = x);\n                Assert.AreEqual(n, b.ToString());\n                Assert.IsTrue(exact);\n                \n                b.Value = (a + 123).Root(3).SettingExactTo(x => exact = x);\n                Assert.AreEqual(n, b.ToString());\n                Assert.IsFalse(exact);\n\n                b.Value = (a + 123).Root(3).SavingRemainderTo(c);\n                Assert.AreEqual(n, b.ToString());\n                Assert.AreEqual(\"123\", c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntSquareRoot()\n        {\n            var n = \"8984948281360922385394772450147012613851354303\";\n            using (var a = new HugeInt(n))\n            using (var b = new HugeInt())\n            using (var c = new HugeInt())\n            {\n                a.Value = a * a;\n\n                b.Value = a.SquareRoot();\n                Assert.AreEqual(n, b.ToString());\n\n                b.SetTo(0);\n                b.Value = (a + 123).SquareRoot().SavingRemainderTo(c);\n                Assert.AreEqual(n, b.ToString());\n                Assert.AreEqual(\"123\", c.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntIsPerfectPower()\n        {\n            using (var a = new HugeInt(\"8984948281360922385394772450147012613851354303\"))\n            using (var b = new HugeInt())\n            {\n                b.Value = a * a * a;\n                Assert.IsTrue(b.IsPerfectPower());\n                b.Value = a * a;\n                Assert.IsTrue(b.IsPerfectPower());\n                b.Value = a * a * a + 1;\n                Assert.IsFalse(b.IsPerfectPower());\n                b.Value = a * a + 1;\n                Assert.IsFalse(b.IsPerfectPower());\n            }\n        }\n\n        [TestMethod]\n        public void IntIsPerfectSquare()\n        {\n            using (var a = new HugeInt(\"8984948281360922385394772450147012613851354303\"))\n            using (var b = new HugeInt())\n            {\n                b.Value = a * a * a;\n                Assert.IsFalse(b.IsPerfectSquare());\n                b.Value = a * a;\n                Assert.IsTrue(b.IsPerfectSquare());\n                b.Value = a * a * a + 1;\n                Assert.IsFalse(b.IsPerfectSquare());\n                b.Value = a * a + 1;\n                Assert.IsFalse(b.IsPerfectSquare());\n            }\n        }\n\n        #endregion\n        //more tests coming here\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeIntTests/NumberTheoretic.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.IO;\nusing System.Text;\nusing System.Linq;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeIntTests\n{\n    [TestClass]\n    public class NumberTheoretic\n    {\n        [TestMethod]\n        public void IntIsProbablePrime()\n        {\n            using (var a = new HugeInt(\"622288097498926496141095869268883999563096063592498055290461\"))\n            using (var random = MpirRandom.Default())\n            {\n                Assert.IsTrue(a.IsProbablePrime(random, 10, 0));\n                a.Value = a * 2;\n                Assert.IsFalse(a.IsProbablePrime(random, 10, 0));\n            }\n        }\n\n        [TestMethod]\n        public void IntIsLikelyPrime()\n        {\n            using (var a = new HugeInt(\"622288097498926496141095869268883999563096063592498055290461\"))\n            using (var random = MpirRandom.Default())\n            {\n                Assert.IsTrue(a.IsLikelyPrime(random, 0));\n                a.Value = a * 2;\n                Assert.IsFalse(a.IsLikelyPrime(random, 0));\n            }\n        }\n\n        [TestMethod]\n        public void IntNextPrimeCandidate()\n        {\n            using (var a = new HugeInt(\"622288097498926496141095869268883999563096063592498055290460\"))\n            using (var random = MpirRandom.Default())\n            {\n                Assert.AreEqual(a + 1, a.NextPrimeCandidate(random));\n            }\n        }\n\n        [TestMethod]\n        public void IntGcd()\n        {\n            using (var a = new HugeInt(\"29927402397991286489627837734179186385188296382227\"))\n            using (var b = new HugeInt())\n            using (var c = new HugeInt())\n            {\n                b.Value = a * 39;\n                c.Value = a * 41;\n                Assert.AreEqual(a, b.Gcd(c));\n            }\n        }\n\n        [TestMethod]\n        public void IntGcdExt()\n        {\n            using (var a = new HugeInt(12))\n            using (var b = new HugeInt(21))\n            using (var g = new HugeInt())\n            using (var s = new HugeInt())\n            using (var t = new HugeInt())\n            {\n                g.Value = a.Gcd(b).SavingDiophantineMultipliersTo(s, t);\n                Assert.AreEqual(\"3\", g.ToString());\n                Assert.AreEqual(\"2\", s.ToString());\n                Assert.AreEqual(\"-1\", t.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntGcdExt1()\n        {\n            using (var a = new HugeInt(12))\n            using (var b = new HugeInt(21))\n            using (var g = new HugeInt())\n            using (var s = new HugeInt())\n            {\n                g.Value = a.Gcd(b).SavingDiophantineMultipliersTo(s, null);\n                Assert.AreEqual(\"3\", g.ToString());\n                Assert.AreEqual(\"2\", s.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntGcdExt2()\n        {\n            using (var a = new HugeInt(12))\n            using (var b = new HugeInt(21))\n            using (var g = new HugeInt())\n            using (var t = new HugeInt())\n            {\n                g.Value = a.Gcd(b).SavingDiophantineMultipliersTo(null, t);\n                Assert.AreEqual(\"3\", g.ToString());\n                Assert.AreEqual(\"-1\", t.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void IntGcdLimb()\n        {\n            using (var a = new HugeInt(\"29927402397991286489627837734179186385188296382227\"))\n            {\n                a.Value = a * 127;\n                Assert.AreEqual(127UL, a.Gcd(254));\n            }\n        }\n\n        [TestMethod]\n        public void IntLcm()\n        {\n            using (var a = new HugeInt(\"29927402397991286489627837734179186385188296382227\"))\n            {\n                var b = a * 39;\n                var c = a * 41;\n                Assert.AreEqual(a * 39 * 41, b.Lcm(c));\n            }\n        }\n\n        [TestMethod]\n        public void IntLcmLimb()\n        {\n            using (var a = new HugeInt(\"29927402397991286489627837734179186385188296382227\"))\n            {\n                var b = a * 41;\n                Assert.AreEqual(a * 39 * 41, b.Lcm(39));\n            }\n        }\n\n        [TestMethod]\n        public void IntInvert()\n        {\n            using (var a = new HugeInt(\"29927402397991286489627837734179186385188296382227\"))\n            using (var m = new HugeInt(\"622288097498926496141095869268883999563096063592498055290461\"))\n            using (var r = new HugeInt())\n            {\n                r.Value = a.Invert(m);\n                Assert.IsTrue(r > 0);\n                Assert.IsTrue(1 == (r * a) % m);\n            }\n        }\n\n        [TestMethod]\n        public void IntJacobi()\n        {\n            using (var a = new HugeInt(\"9288562863495827364985273645298367452\"))\n            using (var b = new HugeInt(\"876428957629387610928574612341\"))\n            {\n                Assert.AreEqual(-1, HugeInt.Jacobi(a, b));\n            }\n        }\n\n        [TestMethod]\n        public void IntLegendre()\n        {\n            using (var a = new HugeInt(\"9288562863495827364985273645298367452\"))\n            using (var p = new HugeInt(\"29927402397991286489627837734179186385188296382227\"))\n            {\n                Assert.AreEqual(-1, HugeInt.Legendre(a, p));\n            }\n        }\n\n        [TestMethod]\n        public void IntKronecker()\n        {\n            using (var a = new HugeInt(\"9288562863495827364985273645298367452\"))\n            using (var b = new HugeInt(\"29927402397991286489627837734179186385188296382227\"))\n            {\n                Assert.AreEqual(-1, HugeInt.Kronecker(a, b));\n                Assert.AreEqual(0, HugeInt.Kronecker(a, 2));\n                Assert.AreEqual(-1, HugeInt.Kronecker(b, 2));\n                Assert.AreEqual(-1, HugeInt.Kronecker(2, b));\n                Assert.AreEqual(-1, HugeInt.Kronecker(b, 2U));\n                Assert.AreEqual(-1, HugeInt.Kronecker(2U, b));\n            }\n        }\n\n        [TestMethod]\n        public void IntRemoveFactors()\n        {\n            using (var a = new HugeInt(\"9288562863495827364985273645298367452\"))\n            using (var b = new HugeInt(\"29927402397991286489627837734179186385188296382227\"))\n            {\n                Assert.AreEqual(a, (a * b * b).RemoveFactors(b));\n                ulong count = 0;\n                Assert.AreEqual(a, (a * b * b).RemoveFactors(b).SavingCountRemovedTo(x => count = x));\n                Assert.AreEqual(2UL, count);\n            }\n        }\n\n        [TestMethod]\n        public void IntFactorial()\n        {\n            using (var a = new HugeInt(HugeInt.Factorial(30)))\n            using (var b = new HugeInt(\"50\"))\n            using (var c = new HugeInt(\"70\"))\n            using (var x = new HugeInt())\n            {\n                for (x.Value = b - 2; x > 0; x.Value -= 2)\n                    b.Value *= x;\n\n                for (x.Value = c - 3; x > 0; x.Value -= 3)\n                    c.Value *= x;\n\n                Assert.AreEqual(\"265252859812191058636308480000000\", a.ToString());\n                Assert.AreEqual(b, HugeInt.Factorial(50, 2));\n                Assert.AreEqual(c, HugeInt.Factorial(70, 3));\n            }\n        }\n\n        [TestMethod]\n        public void IntPrimorial()\n        {\n            var N = 256U;\n            var sieve = new bool[N];\n            for (var x = 2; x < 16; x++)\n                if (!sieve[x])\n                    for (var n = x * 2; n < N; n += x)\n                        sieve[n] = true;\n\n            using (var a = new HugeInt(1))\n            {\n                for (var n = 2; n < N; n++)\n                    if (!sieve[n])\n                        a.Value *= n;\n\n                Assert.AreEqual(a, HugeInt.Primorial(N));\n            }\n        }\n\n        [TestMethod]\n        public void IntBinomial()\n        {\n            using (var a = new HugeInt(HugeInt.Binomial(40, 15)))\n            using (var b = new HugeInt(\"41\"))\n            using (var c = new HugeInt(HugeInt.Binomial(b, 15)))\n            {\n                Assert.AreEqual(HugeInt.Factorial(40).DivideExactly(HugeInt.Factorial(15) * HugeInt.Factorial(25)), a);\n                Assert.AreEqual(a * 41 / 26, c);\n            }\n        }\n\n        [TestMethod]\n        public void IntFibonacci()\n        {\n            using (var a = new HugeInt(HugeInt.Fibonacci(300)))\n            using (var b = new HugeInt())\n            using (var c = new HugeInt(HugeInt.Fibonacci(300).SavingPreviousTo(b)))\n            using (var even = new HugeInt(\"1\"))\n            using (var odd = new HugeInt(\"1\"))\n            {\n                for(var i = 3; i <= 299; i += 2)\n                {\n                    odd.Value += even;\n                    even.Value += odd;\n                }\n                Assert.AreEqual(even, a);\n                Assert.AreEqual(odd, b);\n                Assert.AreEqual(even, c);\n            }\n        }\n\n        [TestMethod]\n        public void IntLucas()\n        {\n            using (var f299 = new HugeInt())\n            using (var f300 = new HugeInt(HugeInt.Fibonacci(300).SavingPreviousTo(f299)))\n            using (var a = new HugeInt())\n            {\n                var f298 = f300 - f299;\n                Assert.AreEqual(f299 * 2 + f300, HugeInt.Lucas(300));\n                Assert.AreEqual(f299 * 2 + f300, HugeInt.Lucas(300).SavingPreviousTo(a));\n                Assert.AreEqual(f298 * 2 + f299, a);\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeRationalTests/Arithmetic.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeRationalTests\n{\n    [TestClass]\n    public class Arithmetic\n    {\n        #region Add\n\n        [TestMethod]\n        public void RationalAddHugeRational()\n        {\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222987435987982730594288574029879874539/590872612825179551336102196593\"))\n            using (var c = new HugeRational(a + b))\n            {\n                Assert.AreEqual(a.Numerator * b.Denominator + b.Numerator * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator * b.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalAddHugeInt()\n        {\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var b = new HugeInt(\"222987435987982730594288574029879874539\"))\n            using (var c = new HugeRational(a + b))\n            {\n                Assert.AreEqual(a.Numerator + b * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n                c.Value = (b + 1) + a;\n                Assert.AreEqual(a.Numerator + (b + 1) * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalAddLimb()\n        {\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational())\n            {\n                var b = Platform.Ui(4288574029879874539UL, 4288574029U);\n                c.Value = a + b;\n                Assert.AreEqual(a.Numerator + b * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalAddToLimb()\n        {\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational())\n            {\n                var b = Platform.Ui(4288574029879874539UL, 4279874539U);\n                c.Value = b + a;\n                Assert.AreEqual(a.Numerator + b * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalAddSignedLimb()\n        {\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational())\n            {\n                var b = Platform.Si(-4288574029879874539L, -1279874539);\n                c.Value = a + b;\n                Assert.AreEqual(a.Numerator + b * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalAddToSignedLimb()\n        {\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational())\n            {\n                var b = Platform.Si(-4288574029879874539L, -1279874539);\n                c.Value = b + a;\n                Assert.AreEqual(a.Numerator + b * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalAddToMaxSignedLimb()\n        {\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational())\n            {\n                var b = Platform.Si(long.MinValue, int.MinValue);\n                c.Value = b + a;\n                Assert.AreEqual(a.Numerator + b * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalAddExpressionHugeRational()\n        {\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222987435987982730594288574029879874539/590872612825179551336102196593\"))\n            using (var c = new HugeRational())\n            {\n                c.Value = 1 + (a + b);\n                Assert.AreEqual(a.Numerator * b.Denominator + b.Numerator * a.Denominator + a.Denominator * b.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator * b.Denominator, c.Denominator);\n            }\n        }\n\n        #endregion\n\n        #region Subtract\n\n        [TestMethod]\n        public void RationalSubtractHugeRational()\n        {\n            using (var a = new HugeRational(\"445497268491433028939318409770173720259/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222987435987982730594288574029879874539/590872612825179551336102196593\"))\n            using (var c = new HugeRational(a - b))\n            {\n                Assert.AreEqual(a.Numerator * b.Denominator - b.Numerator * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator * b.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalSubtractHugeInt()\n        {\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var b = new HugeInt(\"222987435987982730594288574029879874539\"))\n            using (var c = new HugeRational(a - b))\n            {\n                Assert.AreEqual(a.Numerator - b * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n                c.Value = (b + 1) - a;\n                Assert.AreEqual((b + 1) * a.Denominator - a.Numerator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalSubtractLimb()\n        {\n                var b = Platform.Ui(4288574029879874539UL, 2885740298U);\n            using (var a = new HugeRational(\"222509832503450298349318409770173720259/115756986668303657898962467957\"))\n            using (var c = new HugeRational(a - b))\n            {\n                Assert.AreEqual(a.Numerator - b * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalSubtractFromLimb()\n        {\n                var b = Platform.Ui(4288574029879874539UL, 2885740298U);\n            using (var a = new HugeRational(\"222509832503450298349318409770173720259/115756986668303657898962467957\"))\n            using (var c = new HugeRational(b - a))\n            {\n                Assert.AreEqual(b * a.Denominator - a.Numerator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalSubtractPositiveSignedLimb()\n        {\n                var b = Platform.Si(4288574029879874539L, 1885740298);\n            using (var a = new HugeRational(\"222509832503450298349318409770173720259/115756986668303657898962467957\"))\n            using (var c = new HugeRational(a - b))\n            {\n                Assert.AreEqual(a.Numerator - b * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalSubtractFromPositiveSignedLimb()\n        {\n                var b = Platform.Si(4288574029879874539L, 1885740298);\n            using (var a = new HugeRational(\"222509832503450298349318409770173720259/115756986668303657898962467957\"))\n            using (var c = new HugeRational(a - b))\n            {\n                Assert.AreEqual(a.Numerator - b * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalSubtractSignedLimb()\n        {\n                var b = Platform.Si(-4288574029879874539L, -1885740298);\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational(a - b))\n            {\n                Assert.AreEqual(a.Numerator - b * a.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalSubtractFromSignedLimb()\n        {\n                var b = Platform.Si(-4288574029879874539L, -1885740298);\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational(b - a))\n            {\n                Assert.AreEqual(b * a.Denominator - a.Numerator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        #endregion\n\n        #region Multiply\n\n        [TestMethod]\n        public void RationalMultiplyByHugeRational()\n        {\n            using (var a = new HugeRational(\"90234098723098475098479385345098345/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"7859487359873459872354987610987897/590872612825179551336102196593\"))\n            using (var c = new HugeRational(a * b))\n            {\n                Assert.AreEqual(a.Numerator * b.Numerator, c.Numerator);\n                Assert.AreEqual(a.Denominator * b.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalMultiplytHugeInt()\n        {\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var b = new HugeInt(\"222987435987982730594288574029879874539\"))\n            using (var c = new HugeRational(a * b))\n            {\n                Assert.AreEqual(a.Numerator * b, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n                c.Value = (b + 1) * a;\n                Assert.AreEqual((b + 1) * a.Numerator, c.Numerator);\n                Assert.AreEqual(a.Denominator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalMultiplyByLimb()\n        {\n            using (var a = new HugeRational(\"90234098723098475098479385345098345/115756986668303657898962467957\"))\n            {\n                var b = Platform.Ui(17390538260286101342, 1500450271);\n                a.Value = a * b;\n                Assert.AreEqual(Platform.Select(\n                    \"1569219546226477273686601978789044606491747469626478990/115756986668303657898962467957\",\n                    \"135391777882513860921200145428966240276901495/115756986668303657898962467957\"),\n                    a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalMultiplyLimbBy()\n        {\n            using (var a = new HugeRational(\"90234098723098475098479385345098345/115756986668303657898962467957\"))\n            {\n                var b = Platform.Ui(17390538260286101342, 1500450271);\n                a.Value = b * a;\n                Assert.AreEqual(Platform.Select(\n                    \"1569219546226477273686601978789044606491747469626478990/115756986668303657898962467957\",\n                    \"135391777882513860921200145428966240276901495/115756986668303657898962467957\"),\n                    a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalMultiplyBySignedLimb()\n        {\n            using (var a = new HugeRational(\"90234098723098475098479385345098345/115756986668303657898962467957\"))\n            {\n                var b = Platform.Si(-7390538260286101342, -1987450271);\n                a.Value = a * b;\n                Assert.AreEqual(Platform.Select(\n                    \"-666878558995492522701808125338061156491747469626478990/115756986668303657898962467957\",\n                    \"-179335783960662818294159606092029134291901495/115756986668303657898962467957\"),\n                    a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalMultiplySignedLimbBy()\n        {\n            using (var a = new HugeRational(\"90234098723098475098479385345098345/115756986668303657898962467957\"))\n            {\n                var b = Platform.Si(-7390538260286101342, -1987450271);\n                a.Value = b * a;\n                Assert.AreEqual(Platform.Select(\n                    \"-666878558995492522701808125338061156491747469626478990/115756986668303657898962467957\",\n                    \"-179335783960662818294159606092029134291901495/115756986668303657898962467957\"),\n                    a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Shift Left\n\n        [TestMethod]\n        public void RationalShiftLeft()\n        {\n            using (var a = new HugeRational(\"-12345700987ABCDEF2345CBDEFA245230948/17607EF654EB9A13FFA163C75\", 16))\n            {\n                uint b = 40;\n                a.Value = a << b;\n                Assert.AreEqual(\"-12345700987ABCDEF2345CBDEFA2452309480000000000/17607EF654EB9A13FFA163C75\", a.ToString(16));\n            }\n        }\n\n        #endregion\n\n        #region Shift Right\n\n        [TestMethod]\n        public void RationalShiftRight()\n        {\n            using (var a = new HugeRational(\"ABCDEF052834524092854092874502983745029345723098457209305983434345/17607EF654EB9A13FFA163C75\", 16))\n            {\n                uint b = 96;\n                a.Value = a >> b;\n                Assert.AreEqual(\"ABCDEF052834524092854092874502983745029345723098457209305983434345/17607EF654EB9A13FFA163C75000000000000000000000000\", a.ToString(16));\n            }\n        }\n\n        #endregion\n\n        #region Negate\n\n        [TestMethod]\n        public void RationalNegate()\n        {\n            using (var a = new HugeRational(\"24092854092874502983745029345723098457209/115756986668303657898962467957\"))\n            {\n                a.Value = -a;\n                Assert.AreEqual(\"-24092854092874502983745029345723098457209/115756986668303657898962467957\", a.ToString());\n                a.Value = -a;\n                Assert.AreEqual(\"24092854092874502983745029345723098457209/115756986668303657898962467957\", a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Abs\n\n        [TestMethod]\n        public void RationalMakeAbsolute()\n        {\n            using (var a = new HugeRational(\"-24092854092874502983745029345723098457209/115756986668303657898962467957\"))\n            {\n                a.Value = a.Abs();\n                Assert.AreEqual(\"24092854092874502983745029345723098457209/115756986668303657898962467957\", a.ToString());\n                a.Value = a.Abs();\n                Assert.AreEqual(\"24092854092874502983745029345723098457209/115756986668303657898962467957\", a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Invert\n\n        [TestMethod]\n        public void RationalInvert()\n        {\n            using(var a = new HugeRational(\"-24092854092874502983745029345723098457209/115756986668303657898962467957\"))\n            {\n                a.Value = a.Invert();\n                Assert.AreEqual(\"-115756986668303657898962467957/24092854092874502983745029345723098457209\", a.ToString());\n                a.Value = a.Invert();\n                Assert.AreEqual(\"-24092854092874502983745029345723098457209/115756986668303657898962467957\", a.ToString());\n            }\n        }\n\n        #endregion\n\n        #region Power\n\n        [TestMethod]\n        public void RationalPower()\n        {\n            using(var n = new HugeInt(\"-24092854092874502983745029345723098457209\"))\n            using(var d = new HugeInt(\"115756986668303657898962467957\"))\n            using(var a = new HugeRational(n, d))\n            {\n                a.Value = a ^ 5;\n                Assert.AreEqual(n ^ 5, a.Numerator);\n                Assert.AreEqual(d ^ 5, a.Denominator);\n            }\n        }\n\n        #endregion\n\n        #region Divide\n\n        #region Int\n\n        [TestMethod]\n        public void RationalDivideHugeRational()\n        {\n            using (var a = new HugeRational(\"115756986668303657898962467957/394580293847502987609283945873594873409587\"))\n            using (var b = new HugeRational(\"593169091750307653294\"))\n            {\n                a.Value = a / b;\n                Assert.AreEqual(\"115756986668303657898962467957/234052834524092854092760134269509268758750275703033222451729578\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalDivideHugeRational2()\n        {\n            using (var a = new HugeRational(\"90234098723098475098479385345098345/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"6847944682037444681162770672798288913849/590872612825179551336102196593\"))\n            using (var c = new HugeRational(a / b))\n            {\n                Assert.AreEqual(a.Numerator * b.Denominator, c.Numerator);\n                Assert.AreEqual(a.Denominator * b.Numerator, c.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalDivideHugeInt()\n        {\n            using (var a = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var b = new HugeInt(\"222987435987982730594288574029879874539\"))\n            using (var c = new HugeRational(a / b))\n            using (var d = new HugeRational())\n            {\n                Assert.AreEqual(a.Numerator, c.Numerator);\n                Assert.AreEqual(a.Denominator * b, c.Denominator);\n                c.Value = (b + 1) / a;\n                d.Numerator.Value = (b + 1) * a.Denominator;\n                d.Denominator.Value = a.Numerator;\n                d.Canonicalize();\n                Assert.AreEqual(d, c);\n            }\n        }\n\n        #endregion\n\n        #region Limb\n\n        [TestMethod]\n        public void RationalDivideLimb()\n        {\n            using (var a = new HugeRational(\"115756986668303657898962467957/39458029384750298767200622330399462537522498\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3367900313);\n                a.Value = a / b;\n                Assert.AreEqual(Platform.Select(\n                    \"115756986668303657898962467957/234052834524092854092874502983745029345723092857791404165816936\",\n                    \"115756986668303657898962467957/132890709515263728644898490080347139295153795258741874\"),\n                    a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalDivideSignedLimb()\n        {\n            using(var a = new HugeRational(\"115756986668303657898962467957/39458029384750298767200622330399462537522498\"))\n            {\n                var b = Platform.Si(-5931690917503076532, -1500450271);\n                a.Value = a / b;\n                Assert.AreEqual(Platform.Select(\n                    \"-115756986668303657898962467957/234052834524092854092874502983745029345723092857791404165816936\",\n                    \"-115756986668303657898962467957/59204810883474549052577139687016525102679979792696958\"),\n                    a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalDivideLimbBy()\n        {\n            using(var a = new HugeRational(\"115756986668303657898962467957/39458029384750298767200622330399462537522498\"))\n            {\n                var b = Platform.Ui(5931690917503076532, 3367900313);\n                a.Value = b / a;\n                Assert.AreEqual(Platform.Select(\n                    \"234052834524092854092874502983745029345723092857791404165816936/115756986668303657898962467957\",\n                    \"132890709515263728644898490080347139295153795258741874/115756986668303657898962467957\"),\n                    a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalDivideSignedLimbBy()\n        {\n            using(var a = new HugeRational(\"115756986668303657898962467957/39458029384750298767200622330399462537522498\"))\n            {\n                var b = Platform.Si(-5931690917503076532, -1500450271);\n                a.Value = b / a;\n                Assert.AreEqual(Platform.Select(\n                    \"-234052834524092854092874502983745029345723092857791404165816936/115756986668303657898962467957\",\n                    \"-59204810883474549052577139687016525102679979792696958/115756986668303657898962467957\"),\n                    a.ToString());\n            }\n        }\n\n        #endregion\n\n        #endregion\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeRationalTests/Assignment.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeRationalTests\n{\n    [TestClass]\n    public class Assignment\n    {\n        [TestMethod]\n        public void RationalAssignCopy()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var b = new HugeRational())\n            {\n                b.Value = a;\n                Assert.AreEqual(\"-222509832503450298345029835740293845720/115756986668303657898962467957\", b.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalSwap()\n        {\n            var strA = \"-222509832503450298345029835740293845720/115756986668303657898962467957\";\n            var strB = \"2039847290878794872059384789347534534/590872612825179551336102196593\";\n            using (var a = new HugeRational(strA))\n            using (var b = new HugeRational(strB))\n            {\n                var aValue = a._value();\n                var bValue = b._value();\n\n                var an = a.Numerator;\n                var ad = a.Denominator;\n                var bn = b.Numerator;\n                var bd = b.Denominator;\n\n                a.Swap(b);\n                Assert.AreEqual(aValue, a._value());\n                Assert.AreEqual(bValue, b._value());\n                Assert.AreEqual(strB, a.ToString());\n                Assert.AreEqual(strA, b.ToString());\n                Assert.AreEqual(strB, string.Format(\"{0}/{1}\", an, ad));\n                Assert.AreEqual(strA, string.Format(\"{0}/{1}\", bn, bd));\n            }\n        }\n\n        [TestMethod]\n        public void RationalCompoundOperators()\n        {\n            using (var a = new HugeRational(\"938475092834705928347523452345/115756986668303657898962467957\"))\n            {\n                a.Value += 1;\n                a.Value *= 10;\n                Assert.AreEqual(\"10542320795030095862464859203020/115756986668303657898962467957\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalAssignFloat()\n        {\n            using (var a = new HugeFloat(\"9363833093638312937.25\"))\n            using (var b = new HugeRational())\n            {\n                b.SetTo(a);\n                Assert.AreEqual(\"37455332374553251749/4\", b.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalAssignInt()\n        {\n            using (var a = new HugeInt(\"222509832503450298345029835740293845720\"))\n            using (var b = new HugeRational(\"1/3\"))\n            {\n                b.SetTo(a);\n                Assert.AreEqual(\"222509832503450298345029835740293845720/1\", b.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalAssignInt2()\n        {\n            using (var a = new HugeInt(\"222509832503450298345029835740293845719\"))\n            using (var d = new HugeInt(\"115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"1/3\"))\n            {\n                b.SetTo(a, d);\n                Assert.AreEqual(\"222509832503450298345029835740293845719/115756986668303657898962467957\", b.ToString());\n                b.SetTo(b.Numerator - b.Denominator, b.Denominator * 5);\n                Assert.AreEqual(a - d, b.Numerator);\n                Assert.AreEqual(d * 5, b.Denominator);\n            }\n        }\n\n        [TestMethod]\n        public void RationalAssignFloat2()\n        {\n            using(var a = new HugeFloat(\"222509832503450298342455029.125\"))\n            using(var b = new HugeRational(\"1/3\"))\n            {\n                b.SetTo(a);\n                Assert.AreEqual(\"1780078660027602386739640233/8\", b.ToString());\n            }\n        }\n        //more tests coming here\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeRationalTests/Comparisons.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeRationalTests\n{\n    [TestClass]\n    public class Comparisons\n    {\n        #region CompareTo\n\n        [TestMethod]\n        public void RationalCompareToHugeRational()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational(\"1/115756986668303657898962467957\"))\n            {\n                Assert.AreEqual(1, System.Math.Sign(b.CompareTo(a)));\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b + 1)));\n                Assert.AreEqual(0, System.Math.Sign((a + c).CompareTo(-b)));\n                Assert.AreEqual(1, System.Math.Sign(a.CompareTo(null)));\n            }\n        }\n\n        [TestMethod]\n        public void RationalCompareToHugeInt()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            using (var b = new HugeInt(\"115756986668303657898962467957\"))\n            using (var c = new HugeRational(\"115756986668303657898962467957/1\"))\n            using (var d = new HugeInt(1922215141))\n            {\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(d)));\n                Assert.AreEqual(1, System.Math.Sign((-a).CompareTo(d)));\n                Assert.AreEqual(-1, System.Math.Sign((-a).CompareTo(d + 1)));\n                Assert.AreEqual(1, System.Math.Sign(d.CompareTo(a)));\n                Assert.AreEqual(-1, System.Math.Sign(d.CompareTo(-a)));\n                Assert.AreEqual(1, System.Math.Sign((d + 1).CompareTo(-a)));\n\n                Assert.AreEqual(0, System.Math.Sign(b.CompareTo(c)));\n                Assert.AreEqual(0, System.Math.Sign(c.CompareTo(b)));\n                Assert.AreEqual(0, System.Math.Sign((-b).CompareTo(-c)));\n                Assert.AreEqual(0, System.Math.Sign((-c).CompareTo(-b)));\n\n                Assert.AreEqual(1, System.Math.Sign(b.CompareTo(c - 1)));\n                Assert.AreEqual(1, System.Math.Sign(c.CompareTo(b - 1)));\n                Assert.AreEqual(-1, System.Math.Sign((-b).CompareTo(1 - c)));\n                Assert.AreEqual(-1, System.Math.Sign((-c).CompareTo(1 - b)));\n            }\n        }\n\n        [TestMethod]\n        public void RationalCompareToObject()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational(\"1/115756986668303657898962467957\"))\n            {\n                Assert.AreEqual(1, System.Math.Sign(((IComparable)b).CompareTo((object)a)));\n                Assert.AreEqual(-1, System.Math.Sign(((IComparable)a).CompareTo((object)b)));\n                Assert.AreEqual(1, System.Math.Sign(((IComparable)a).CompareTo(null)));\n                Assert.AreEqual(0, System.Math.Sign(((IComparable)(a + c)).CompareTo((object)-b)));\n            }\n        }\n\n        [TestMethod]\n        public void RationalCompareToExpression()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational(\"1/115756986668303657898962467957\"))\n            {\n                Assert.AreEqual(1, System.Math.Sign(((IComparable<RationalExpression>)b).CompareTo(a)));\n                Assert.AreEqual(-1, System.Math.Sign(((IComparable<RationalExpression>)a).CompareTo(b)));\n                Assert.AreEqual(1, System.Math.Sign(((IComparable<RationalExpression>)a).CompareTo(null)));\n                Assert.AreEqual(0, System.Math.Sign(((IComparable<RationalExpression>)(a + c)).CompareTo(-b)));\n            }\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void RationalCompareToNonExpression()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            {\n                a.CompareTo(\"abc\");\n            }\n        }\n\n        [TestMethod]\n        public void RationalCompareToLimb()\n        {\n            using (var a = new HugeRational(\"-222509821\"))\n            {\n                var b = Platform.Ui(222509820, 222509820);\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b)));\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b + 1)));\n                Assert.AreEqual(0, System.Math.Sign((-(a + 1)).CompareTo(b)));\n                Assert.AreEqual(1, System.Math.Sign((-a).CompareTo(b)));\n            }\n        }\n\n        [TestMethod]\n        public void RationalCompareToLimb2()\n        {\n            using(var a = new HugeRational(\"-222509947/127\"))\n            {\n                uint b = 222509820;\n                uint d = 127;\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b, d)));\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b + 1, d)));\n                Assert.AreEqual(0, System.Math.Sign((-(a + 1)).CompareTo(b, d)));\n                Assert.AreEqual(0, System.Math.Sign((-a).CompareTo(b + d, d)));\n                Assert.AreEqual(1, System.Math.Sign((-a).CompareTo(b, d)));\n            }\n        }\n\n        [TestMethod]\n        public void RationalCompareToSignedLimb()\n        {\n            using (var a = new HugeRational(\"-222509821\"))\n            {\n                var b = Platform.Si(-222509820, -222509820);\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b)));\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b + 1)));\n                Assert.AreEqual(0, System.Math.Sign((a + 1).CompareTo(b)));\n                Assert.AreEqual(0, System.Math.Sign(a.CompareTo(b - 1)));\n                Assert.AreEqual(1, System.Math.Sign(a.CompareTo(b - 2)));\n            }\n        }\n\n        [TestMethod]\n        public void RationalCompareToSignedLimb2()\n        {\n            using(var a = new HugeRational(\"-222509947/127\"))\n            {\n                int b = -222509820;\n                int d = 127;\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b, (uint)d)));\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b + 1, (uint) d)));\n                Assert.AreEqual(0, System.Math.Sign((a + 1).CompareTo(b, (uint) d)));\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b - d + 1, (uint) d)));\n                Assert.AreEqual(0, System.Math.Sign(a.CompareTo(b - d, (uint) d)));\n                Assert.AreEqual(1, System.Math.Sign(a.CompareTo(b - d - 1, (uint) d)));\n            }\n        }\n\n        [TestMethod]\n        public void RationalCompareToSignedDouble()\n        {\n            using (var a = new HugeRational(\"-222509821\"))\n            {\n                double b = -222509820;\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b)));\n                Assert.AreEqual(-1, System.Math.Sign(a.CompareTo(b + 1)));\n                Assert.AreEqual(0, System.Math.Sign((a + 1).CompareTo(b)));\n                Assert.AreEqual(0, System.Math.Sign(a.CompareTo(b - 1)));\n                Assert.AreEqual(1, System.Math.Sign(a.CompareTo(b - 1.1)));\n            }\n        }\n\n        #endregion\n\n        #region comparison operators with expr\n\n        [TestMethod]\n        public void RationalOperatorLessThan()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational())\n            {\n                c.Value = a;\n                Assert.IsTrue(a < b);\n                Assert.IsFalse(b < a);\n                Assert.IsFalse(a < c);\n                Assert.IsFalse(a < null);\n                Assert.IsTrue(null < a);\n            }\n        }\n\n        [TestMethod]\n        public void RationalOperatorLessThanOrEqual()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational())\n            {\n                c.Value = a;\n                Assert.IsTrue(a <= b);\n                Assert.IsFalse(b <= a);\n                Assert.IsTrue(a <= c);\n                Assert.IsFalse(a <= null);\n                Assert.IsTrue(null <= a);\n            }\n        }\n\n        [TestMethod]\n        public void RationalOperatorGreaterThan()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational())\n            {\n                c.Value = a;\n                Assert.IsFalse(a > b);\n                Assert.IsTrue(b > a);\n                Assert.IsFalse(a > c);\n                Assert.IsTrue(a > null);\n                Assert.IsFalse(null > a);\n            }\n        }\n\n        [TestMethod]\n        public void RationalOperatorGreaterThanOrEqual()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational())\n            {\n                c.Value = a;\n                Assert.IsFalse(a >= b);\n                Assert.IsTrue(b >= a);\n                Assert.IsTrue(a >= c);\n                Assert.IsTrue(a >= null);\n                Assert.IsFalse(null >= a);\n            }\n        }\n\n        #endregion\n\n        #region comparison operators with limb\n\n        [TestMethod]\n        public void RationalOperatorLessThanLimb()\n        {\n            using (var a = new HugeRational(\"3845721\"))\n            {\n                uint c = 5432;\n                var b = Platform.Ui(5432349587, 543234958);\n                Assert.IsTrue(a < b);\n                Assert.IsFalse(b < a);\n                Assert.IsFalse(a < c);\n                Assert.IsTrue(c < a);\n            }\n        }\n\n        [TestMethod]\n        public void RationalOperatorLessThanOrEqualLimb()\n        {\n            using (var a = new HugeRational(\"3845721\"))\n            {\n                ulong c = 5432;\n                ulong b = 5432349587;\n                ulong d = 3845721;\n                Assert.IsTrue(a <= b);\n                Assert.IsFalse(b <= a);\n                Assert.IsFalse(a <= c);\n                Assert.IsTrue(c <= a);\n                Assert.IsTrue(a <= d);\n                Assert.IsTrue(d <= a);\n            }\n        }\n\n        [TestMethod]\n        public void RationalOperatorGreaterThanLimb()\n        {\n            using (var a = new HugeRational(\"3845721\"))\n            {\n                ulong c = 5432;\n                ulong b = 5432349587;\n                Assert.IsFalse(a > b);\n                Assert.IsTrue(b > a);\n                Assert.IsTrue(a > c);\n                Assert.IsFalse(c > a);\n            }\n        }\n\n        [TestMethod]\n        public void RationalOperatorGreaterThanOrEqualLimb()\n        {\n            using (var a = new HugeRational(\"3845721\"))\n            {\n                ulong c = 5432;\n                ulong b = 5432349587;\n                ulong d = 3845721;\n                Assert.IsFalse(a >= b);\n                Assert.IsTrue(b >= a);\n                Assert.IsTrue(a >= c);\n                Assert.IsFalse(c >= a);\n                Assert.IsTrue(a >= d);\n                Assert.IsTrue(d >= a);\n            }\n        }\n\n        #endregion\n\n        #region comparison operators with signed limb\n\n        [TestMethod]\n        public void RationalOperatorLessThanSignedLimb()\n        {\n            using (var a = new HugeRational(\"-3845721\"))\n            {\n                long c = -543254325432;\n                long b = -9587;\n                Assert.IsTrue(a < b);\n                Assert.IsFalse(b < a);\n                Assert.IsFalse(a < c);\n                Assert.IsTrue(c < a);\n            }\n        }\n\n        [TestMethod]\n        public void RationalOperatorLessThanOrEqualSignedLimb()\n        {\n            using (var a = new HugeRational(\"-3845721\"))\n            {\n                long c = -543254325432;\n                long b = -9587;\n                long d = -3845721;\n                Assert.IsTrue(a <= b);\n                Assert.IsFalse(b <= a);\n                Assert.IsFalse(a <= c);\n                Assert.IsTrue(c <= a);\n                Assert.IsTrue(a <= d);\n                Assert.IsTrue(d <= a);\n            }\n        }\n\n        [TestMethod]\n        public void RationalOperatorGreaterThanSignedLimb()\n        {\n            using (var a = new HugeRational(\"-3845721\"))\n            {\n                long c = -543254325432;\n                long b = -9587;\n                Assert.IsFalse(a > b);\n                Assert.IsTrue(b > a);\n                Assert.IsTrue(a > c);\n                Assert.IsFalse(c > a);\n            }\n        }\n\n        [TestMethod]\n        public void RationalOperatorGreaterThanOrEqualSignedLimb()\n        {\n            using (var a = new HugeRational(\"-3845721\"))\n            {\n                long c = -543254325432;\n                long b = -9587;\n                long d = -3845721;\n                Assert.IsFalse(a >= b);\n                Assert.IsTrue(b >= a);\n                Assert.IsTrue(a >= c);\n                Assert.IsFalse(c >= a);\n                Assert.IsTrue(a >= d);\n                Assert.IsTrue(d >= a);\n            }\n        }\n\n        #endregion\n\n        #region comparison operators with double\n\n        [TestMethod]\n        public void RationalOperatorLessThanDouble()\n        {\n            using (var a = new HugeRational(\"-3845721\"))\n            {\n                double c = -543254325432;\n                double b = -9587;\n                Assert.IsTrue(a < b);\n                Assert.IsFalse(b < a);\n                Assert.IsFalse(a < c);\n                Assert.IsTrue(c < a);\n            }\n        }\n\n        [TestMethod]\n        public void RationalOperatorLessThanOrEqualDouble()\n        {\n            using (var a = new HugeRational(\"-3845721\"))\n            {\n                double c = -543254325432;\n                double b = -9587;\n                double d = -3845721;\n                Assert.IsTrue(a <= b);\n                Assert.IsFalse(b <= a);\n                Assert.IsFalse(a <= c);\n                Assert.IsTrue(c <= a);\n                Assert.IsTrue(a <= d);\n                Assert.IsTrue(d <= a);\n            }\n        }\n\n        [TestMethod]\n        public void RationalOperatorGreaterThanDouble()\n        {\n            using (var a = new HugeRational(\"-3845721\"))\n            {\n                double c = -543254325432;\n                double b = -9587;\n                Assert.IsFalse(a > b);\n                Assert.IsTrue(b > a);\n                Assert.IsTrue(a > c);\n                Assert.IsFalse(c > a);\n            }\n        }\n\n        [TestMethod]\n        public void RationalOperatorGreaterThanOrEqualDouble()\n        {\n            using (var a = new HugeRational(\"-3845721\"))\n            {\n                double c = -543254325432;\n                double b = -9587;\n                double d = -3845721;\n                Assert.IsFalse(a >= b);\n                Assert.IsTrue(b >= a);\n                Assert.IsTrue(a >= c);\n                Assert.IsFalse(c >= a);\n                Assert.IsTrue(a >= d);\n                Assert.IsTrue(d >= a);\n                Assert.IsFalse(d - 0.1 >= a);\n            }\n        }\n\n        #endregion\n\n        #region Equals\n\n        [TestMethod]\n        public void RationalEqualsHugeRational()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational(\"1/115756986668303657898962467957\"))\n            {\n                Assert.IsFalse(b.Equals(a));\n                Assert.IsFalse(a.Equals(b + c));\n                Assert.IsTrue((a + c).Equals(-b));\n                Assert.IsFalse(a.Equals(null));\n                Assert.IsTrue(Equals(a + c, -b));\n            }\n        }\n\n        [TestMethod]\n        public void RationalEqualsHugeInt()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            using (var b = new HugeInt(1922215142))\n            using (var c = new HugeRational(\"115756986668303657898962467957/1\"))\n            using (var d = new HugeInt(\"-115756986668303657898962467957\"))\n            {\n                Assert.IsFalse(a.Equals(b));\n                Assert.IsFalse(b.Equals(a));\n                Assert.IsFalse(a.Equals(-b));\n                Assert.IsFalse(b.Equals(-a));\n                Assert.IsFalse(c.Equals(d));\n                Assert.IsTrue(c.Equals(-d));\n                Assert.IsTrue((-c).Equals(d));\n                Assert.IsTrue(d.Equals(-c));\n                Assert.IsTrue((-d).Equals(c));\n                Assert.IsTrue(Equals(c, -d));\n                Assert.IsTrue(Equals(-c, d));\n            }\n        }\n\n        [TestMethod]\n        public void RationalEqualsExpression()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational(\"1/115756986668303657898962467957\"))\n            {\n                Assert.IsFalse(((IEquatable<RationalExpression>)b).Equals(a));\n                Assert.IsFalse(((IEquatable<RationalExpression>)a).Equals(b));\n                Assert.IsFalse(((IEquatable<RationalExpression>)a).Equals(null));\n                Assert.IsTrue(((IEquatable<RationalExpression>)(a + c)).Equals(-b));\n            }\n        }\n\n        [TestMethod]\n        public void RationalEqualsNonExpression()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            {\n                Assert.IsFalse(a.Equals(\"abc\"));\n            }\n        }\n\n        [TestMethod]\n        public void RationalEqualsLimb()\n        {\n            using (var a = new HugeRational(Platform.Select(\"222509832503\", \"2225098323\")))\n            {\n                var b = Platform.Ui(222509832504, 2225098324);\n                Assert.IsFalse(a.Equals(b + 1));\n                Assert.IsTrue(a.Equals(b - 1));\n                Assert.IsTrue((a + 1).Equals(b));\n            }\n        }\n\n        [TestMethod]\n        public void RationalEqualsSignedLimb()\n        {\n            using (var a = new HugeRational(Platform.Select(\"-222509832505\", \"-222509835\")))\n            {\n                var b = Platform.Si(-222509832504, -222509834);\n                Assert.IsFalse(a.Equals(b + 1));\n                Assert.IsTrue(a.Equals(b - 1));\n                Assert.IsTrue((a + 1).Equals(b));\n            }\n        }\n\n        [TestMethod]\n        public void RationalEqualsLimb2()\n        {\n            using (var a = new HugeRational(Platform.Select(\"222509832377/127\", \"2509832377/127\")))\n            {\n                var b = Platform.Ui(222509832504, 2509832504);\n                uint d = 127;\n                Assert.IsFalse(a.Equals(b + 1, d));\n                Assert.IsTrue(a.Equals(b - d, d));\n                Assert.IsTrue((a + 1).Equals(b, d));\n            }\n        }\n\n        [TestMethod]\n        public void RationalEqualsSignedLimb2()\n        {\n            using (var a = new HugeRational(Platform.Select(\"-222509832377/127\", \"-1509832377/127\")))\n            {\n                var b = Platform.Si(-222509832504, -1509832504);\n                uint d = 127;\n                Assert.IsFalse(a.Equals(b + 1, d));\n                Assert.IsTrue(a.Equals(b + (int)d, d));\n                Assert.IsTrue((a - 1).Equals(b, d));\n            }\n        }\n\n        [TestMethod]\n        public void RationalEqualsDouble()\n        {\n            using (var a = new HugeRational(\"-222509832505\"))\n            {\n                double b = -222509832504;\n                Assert.IsFalse(a.Equals(b + 1));\n                Assert.IsTrue(a.Equals(b - 1));\n                Assert.IsTrue((a + 1).Equals(b));\n                Assert.IsFalse((a + 1).Equals(b + 0.1));\n            }\n        }\n\n        #endregion\n\n        #region Equality operators with expr\n\n        [TestMethod]\n        public void RationalEqualsOperatorHugeRational()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational(\"1/115756986668303657898962467957\"))\n            {\n                Assert.IsFalse(b == a);\n                Assert.IsFalse(a == b + c);\n                Assert.IsTrue(a + c == -b);\n                Assert.IsFalse(a == null);\n            }\n        }\n\n        [TestMethod]\n        public void RationalNotEqualOperatorHugeRational()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            using (var b = new HugeRational(\"222509832503450298345029835740293845720/115756986668303657898962467957\"))\n            using (var c = new HugeRational(\"1/115756986668303657898962467957\"))\n            {\n                Assert.IsTrue(b != a);\n                Assert.IsTrue(a != b + c);\n                Assert.IsFalse(a + c != -b);\n                Assert.IsTrue(a != null);\n            }\n        }\n\n        #endregion\n\n        #region Equality operators with Limb\n\n        [TestMethod]\n        public void RationalEqualsOperatorLimb()\n        {\n            using (var a = new HugeRational(\"-835740293845721\"))\n            {\n                ulong b = 835740293845720;\n                Assert.IsFalse(b == a);\n                Assert.IsFalse(a == b + 1);\n                Assert.IsTrue(-(a + 1) == b);\n            }\n        }\n\n        [TestMethod]\n        public void RationalNotEqualOperatorLimb()\n        {\n            using (var a = new HugeRational(\"-835740293845721\"))\n            {\n                ulong b = 835740293845720;\n                Assert.IsTrue(b != a);\n                Assert.IsTrue(a != b + 1);\n                Assert.IsFalse(-(a + 1) != b);\n            }\n        }\n\n        #endregion\n        \n        #region Equality operators with Signed Limb\n\n        [TestMethod]\n        public void RationalEqualsOperatorSignedLimb()\n        {\n            using (var a = new HugeRational(\"-835740293845721\"))\n            {\n                long b = -835740293845720;\n                Assert.IsFalse(b == a);\n                Assert.IsFalse(a == b + 1);\n                Assert.IsTrue(a + 1 == b);\n            }\n        }\n\n        [TestMethod]\n        public void RationalNotEqualOperatorSignedLimb()\n        {\n            using (var a = new HugeRational(\"-835740293845721\"))\n            {\n                long b = -835740293845720;\n                Assert.IsTrue(b != a);\n                Assert.IsTrue(a != b + 1);\n                Assert.IsFalse(a + 1 != b);\n            }\n        }\n\n        #endregion\n\n        #region Equality operators with Double\n\n        [TestMethod]\n        public void RationalEqualsOperatorDouble()\n        {\n            using (var a = new HugeRational(\"-835740293845721\"))\n            {\n                double b = -835740293845720;\n                Assert.IsFalse(b == a);\n                Assert.IsFalse(a == b + 1);\n                Assert.IsTrue(a + 1 == b);\n                Assert.IsFalse(a + 1 == b + 0.1);\n            }\n        }\n\n        [TestMethod]\n        public void RationalNotEqualOperatorDouble()\n        {\n            using (var a = new HugeRational(\"-835740293845721\"))\n            {\n                double b = -835740293845720;\n                Assert.IsTrue(b != a);\n                Assert.IsTrue(a != b + 1);\n                Assert.IsFalse(a + 1 != b);\n                Assert.IsTrue(a + 1 != b + 0.1);\n            }\n        }\n\n        #endregion\n\n        #region GetHashCode\n\n        [TestMethod]\n        public void RationalGetHashCodeTest()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            {\n                Assert.AreNotEqual(0, a.GetHashCode());\n                Assert.AreEqual(a.GetHashCode(), (a + 0).GetHashCode());\n                Assert.AreNotEqual(a.GetHashCode(), (-a).GetHashCode());\n            }\n        }\n\n        #endregion\n\n        #region Sign\n\n        [TestMethod]\n        public void RationalSign()\n        {\n            using (var a = new HugeRational(\"-222509832503450298345029835740293845721/115756986668303657898962467957\"))\n            {\n                Assert.AreEqual(-1, a.Sign());\n                Assert.AreEqual(1, (-a).Sign());\n                Assert.AreEqual(0, (a-a).Sign());\n            }\n        }\n\n        #endregion\n\n        //more tests coming here\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeRationalTests/ConstructionAndDisposal.cs",
    "content": "﻿/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeRationalTests\n{\n    [TestClass]\n    public class ConstructionAndDisposal\n    {\n        [TestMethod]\n        public void RationalDefaultConstructor()\n        {\n            using (var a = new HugeRational())\n            {\n                Assert.AreNotEqual(0, a.NumeratorNumberOfLimbsAllocated());\n                Assert.AreEqual(0, a.NumeratorNumberOfLimbsUsed());\n                Assert.AreNotEqual(IntPtr.Zero, a.NumeratorLimbs());\n                Assert.AreEqual(\"0\", a.Numerator.ToString());\n\n                Assert.AreNotEqual(0, a.DenominatorNumberOfLimbsAllocated());\n                Assert.AreEqual(1, a.DenominatorNumberOfLimbsUsed());\n                Assert.AreNotEqual(IntPtr.Zero, a.DenominatorLimbs());\n                Assert.AreEqual(\"1\", a.Denominator.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalNumerator()\n        {\n            using (var a = new HugeRational())\n            {\n                a.Numerator.Dispose();\n                Assert.AreNotEqual(0, a.Numerator.NumberOfLimbsAllocated());\n                Assert.AreEqual(\"0\", a.Numerator.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalDenominator()\n        {\n            using (var a = new HugeRational())\n            {\n                a.Denominator.Dispose();\n                Assert.AreNotEqual(0, a.Denominator.NumberOfLimbsAllocated());\n                Assert.AreEqual(\"1\", a.Denominator.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalDispose()\n        {\n            var a = new HugeRational();\n            a.Dispose();\n            \n            Assert.AreEqual(0, a.NumeratorNumberOfLimbsAllocated());\n            Assert.AreEqual(0, a.NumeratorNumberOfLimbsUsed());\n            Assert.AreEqual(IntPtr.Zero, a.NumeratorLimbs());\n\n            Assert.AreEqual(0, a.DenominatorNumberOfLimbsAllocated());\n            Assert.AreEqual(0, a.DenominatorNumberOfLimbsUsed());\n            Assert.AreEqual(IntPtr.Zero, a.DenominatorLimbs());\n        }\n\n        [TestMethod]\n        public void RationalConstructorFromLong()\n        {\n            var n = Platform.Si(123456789123456, 1234567891);\n            var d = Platform.Ui(12764787846358441471U, 2860486313U);\n\n            using (var a = new HugeRational(n, d))\n            {\n                Assert.AreEqual(1, a.NumeratorNumberOfLimbsAllocated());\n                Assert.AreEqual(1, a.NumeratorNumberOfLimbsUsed());\n                Assert.AreEqual(1, a.DenominatorNumberOfLimbsAllocated());\n                Assert.AreEqual(1, a.DenominatorNumberOfLimbsUsed());\n                Assert.AreEqual(n + \"/\" + d, a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalConstructorFromLongNegative()\n        {\n            var n = Platform.Si(-123456789123456, -1234567891);\n            var d = Platform.Ui(12764787846358441471U, 2860486313U);\n\n            using (var a = new HugeRational(n, d))\n            {\n                Assert.AreEqual(1, a.NumeratorNumberOfLimbsAllocated());\n                Assert.AreEqual(-1, a.NumeratorNumberOfLimbsUsed());\n                Assert.AreEqual(1, a.DenominatorNumberOfLimbsAllocated());\n                Assert.AreEqual(1, a.DenominatorNumberOfLimbsUsed());\n                Assert.AreEqual(n + \"/\" + d, a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalConstructorFromULong()\n        {\n            var n = Platform.Ui(ulong.MaxValue, uint.MaxValue);\n            var d = Platform.Ui(12764787846358441471U, 2860486313U);\n\n            using (var a = new HugeRational(n, d))\n            {\n                Assert.AreEqual(1, a.NumeratorNumberOfLimbsAllocated());\n                Assert.AreEqual(1, a.NumeratorNumberOfLimbsUsed());\n                Assert.AreEqual(1, a.DenominatorNumberOfLimbsAllocated());\n                Assert.AreEqual(1, a.DenominatorNumberOfLimbsUsed());\n                Assert.AreEqual(n.ToString(), a.Numerator.ToString());\n                Assert.AreEqual(n.ToString() + \"/\" + d, a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalConstructorFromDouble()\n        {\n            using (var a = new HugeRational(123456789123456.75))\n            {\n                Assert.AreEqual(\"493827156493827/4\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalConstructorFromDoubleNegative()\n        {\n            using (var a = new HugeRational(-123456789123456.75))\n            {\n                Assert.AreEqual(\"-493827156493827/4\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalAllocate()\n        {\n            using (var a = HugeRational.Allocate(129, 193))\n            {\n                Assert.AreEqual(1 + 128 / MpirSettings.BITS_PER_LIMB, a.NumeratorNumberOfLimbsAllocated());\n                Assert.AreEqual(0, a.NumeratorNumberOfLimbsUsed());\n                Assert.AreEqual(1 + 192 / MpirSettings.BITS_PER_LIMB, a.DenominatorNumberOfLimbsAllocated());\n                Assert.AreEqual(1, a.DenominatorNumberOfLimbsUsed());\n                Assert.AreEqual(\"0/1\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalCanonicalize()\n        {\n            using (var a = new HugeRational(198, 15))\n            {\n                a.Denominator.Value = -a.Denominator;\n                Assert.AreEqual(\"198/-15\", a.ToString());\n                a.Canonicalize();\n                Assert.AreEqual(\"-66/5\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalStringConstructor()\n        {\n            var n = \"5432109876543212345789023245987/362736035870515331128527330659\";\n            using (var a = new HugeRational(n))\n            {\n                Assert.AreEqual(128 / MpirSettings.BITS_PER_LIMB, a.NumeratorNumberOfLimbsUsed());\n                Assert.AreEqual(n, a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalStringConstructorNumeratorOnly()\n        {\n            var n = \"5432109876543212345789023245987\";\n            using(var a = new HugeRational(n))\n            {\n                Assert.AreEqual(128 / MpirSettings.BITS_PER_LIMB, a.NumeratorNumberOfLimbsUsed());\n                Assert.AreEqual(n + \"/1\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void RationalStringConstructorInvalid()\n        {\n            var a = new HugeRational(\"12345A\");\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void RationalStringConstructorInvalid2()\n        {\n            var a = new HugeRational(\"12345/54321A\");\n        }\n\n        [TestMethod]\n        public void RationalStringConstructorHex()\n        {\n            using (var i = new HugeInt(\"362736035870515331128527330659\"))\n            {\n                var d = i.ToString(16);\n                var n = \"143210ABCDEF32123457ACDB324598799\";\n                using (var a = new HugeRational(n + \"/\" + d, 16))\n                {\n                    Assert.AreEqual(n, a.Numerator.ToString(16));\n                    Assert.AreEqual(1 + 128 / MpirSettings.BITS_PER_LIMB, a.NumeratorNumberOfLimbsUsed());\n                    Assert.AreEqual(i, a.Denominator);\n                }\n            }\n        }\n\n        [TestMethod]\n        public void RationalStringConstructorHexPrefix()\n        {\n            using (var i = new HugeInt(\"362736035870515331128527330659\"))\n            {\n                var d = i.ToString(16);\n                var n = \"143210ABCDEF32123457ACDB324598799\";\n                using (var a = new HugeRational(\"0x\" + n + \"/0x\" + d))\n                {\n                    Assert.AreEqual(n + \"/\" + d, a.ToString(16));\n                    Assert.AreEqual(i, a.Denominator);\n                }\n            }\n        }\n\n        [TestMethod]\n        public void RationalStringAssignmentHexPrefix()\n        {\n            using (var i = new HugeInt(\"362736035870515331128527330659\"))\n            {\n                var d = i.ToString(16);\n                var n = \"143210ABCDEF32123457ACDB324598799\";\n                using (var a = new HugeRational(\"0x\" + n + \"/0x\" + d))\n                {\n                    Assert.AreEqual(n + \"/\" + d, a.ToString(16));\n                    Assert.AreEqual(n + \"/\" + d, a.ToString(16, false));\n                    Assert.AreEqual((n + \"/\" + d).ToLower(), a.ToString(16, true));\n                    a.SetTo(\"-0x\" + n + \"/0x17\");\n                    Assert.AreEqual(\"-\" + n + \"/17\", a.ToString(16));\n                }\n            }\n        }\n\n        [TestMethod]\n        public void RationalConstructorFromExpression()\n        {\n            using (var a = new HugeRational(\"2340958273409578234095823045723490587/362736035870515331128527330659\"))\n            using (var b = new HugeRational(a + 1))\n            {\n                Assert.AreEqual(a + 1, b);\n            }\n        }\n\n        [TestMethod]\n        public void RationalConstructorFromFloatExpression()\n        {\n            using (var a = new HugeFloat(\"2340958273409578234.25\"))\n            using (var b = new HugeRational(a + 1))\n            {\n                Assert.AreEqual(\"9363833093638312941/4\", b.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalConstructorFromIntExpression()\n        {\n            using (var a = new HugeInt(\"2340958273409578234095823045723490587\"))\n            using (var b = new HugeRational(a + 1))\n            {\n                Assert.AreEqual(\"2340958273409578234095823045723490588/1\", b.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalConstructorFromIntExpression2()\n        {\n            using (var a = new HugeInt(\"2340958273409578234095823045723490587\"))\n            using (var d = new HugeInt(\"362736035870515331128527330659\"))\n            using (var b = new HugeRational(a + 2, d * 2))\n            {\n                Assert.AreEqual(a + 2, b.Numerator);\n                Assert.AreEqual(d * 2, b.Denominator);\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeRationalTests/Conversions.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.Linq;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeRationalTests\n{\n    [TestClass]\n    public class Conversions\n    {\n        [TestMethod]\n        public void RationalToStringDecimal()\n        {\n            var n = \"-23429384756298357462983476598345623984756\";\n            var d = \"115756986668303657898962467957\";\n            var s = n + \"/\" + d;\n            using (var a = new HugeRational(s))\n            {\n                Assert.AreEqual(s, a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalToStringHex()\n        {\n            var n = \"-23429abcdef298357462983fedcba345623984756\";\n            var d = \"17607ef654eb9a13ffa163c75\";\n            var s = n + \"/\" + d;\n            using (var a = new HugeRational(s, 16))\n            {\n                Assert.AreEqual(s, a.ToString(16, true));\n                Assert.AreEqual(s.ToUpper(), a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void RationalFromUlong()\n        {\n            using (var a = new HugeRational())\n            {\n                var b = Platform.Ui(0xF84739ABCDEF4876, 0xF84776);\n                var d = Platform.Ui(12764787846358441471, 3628273133);\n                a.SetTo(b, d);\n                Assert.AreEqual(b.ToString() + \"/\" + d.ToString(), a.ToString());\n\n                a.SetTo(b);\n                Assert.AreEqual(b.ToString() + \"/1\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalFromLong()\n        {\n            using (var a = new HugeRational())\n            {\n                var b = Platform.Si(-0x784739ABCDEF4876, -0xF84776);\n                var d = Platform.Ui(12764787846358441471, 3628273133);\n                a.SetTo(b, d);\n                Assert.AreEqual(b.ToString() + \"/\" + d.ToString(), a.ToString());\n\n                a.SetTo(b);\n                Assert.AreEqual(b.ToString() + \"/1\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RationalToAndFromDouble()\n        {\n            using (var a = new HugeRational())\n            {\n                a.SetTo(-123.25);\n\n                double c = a.ToDouble();\n                Assert.IsTrue(c.Equals(-123.25));\n                Assert.IsTrue(a.Equals(-123.25));\n            }\n        }\n\n        [TestMethod]\n        public void RationalToAndFromFloat()\n        {\n            using (var a = new HugeRational())\n            {\n                a.SetTo(-123.125f);\n\n                double c = a.ToDouble();\n                Assert.IsTrue(-123.125 == c);\n                Assert.IsTrue(-123.125 == a);\n                Assert.IsTrue(-123.125f == a);\n            }\n        }\n\n        [TestMethod]\n        public void RationalFromString()\n        {\n            using (var a = new HugeRational())\n            {\n                var n = \"98762934876529834765234123984761/115756986668303657898962467957\";\n                a.SetTo(n);\n                Assert.AreEqual(n, a.ToString());\n\n                n = \"-98ABCDEF876529834765234123984761/17607EF654EB9A13FFA163C75\";\n                a.SetTo(n, 16);\n                Assert.AreEqual(n, a.ToString(16));\n\n                n = \"-98ABCDEF876529834765234123984761\";\n                a.SetTo(n, 16);\n                Assert.AreEqual(n + \"/1\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void RationalFromInvalidString()\n        {\n            using (var a = new HugeRational())\n            {\n                a.SetTo(\"12345A\");\n            }\n        }\n\n        [TestMethod]\n        [ExpectedException(typeof(ArgumentException))]\n        public void RationalFromInvalidString2()\n        {\n            using (var a = new HugeRational())\n            {\n                a.SetTo(\"12345/13345A\");\n            }\n        }\n\n        [TestMethod]\n        public void RationalToStringTruncated()\n        {\n            using (var d = new HugeInt(\"115756986668303657898962467957\"))\n            using (var e = new HugeInt(d ^ 10))\n            {\n                var s = e.ToString(10);\n                var n = string.Concat(\"123456789\".Select(c => new string(c, 30)));\n                using (var a = new HugeRational(n + \"/\" + s))\n                {\n                    Assert.AreEqual(n + \"/\" + s, a.ToString(10));\n                    Assert.AreEqual(\"...\" + n.Substring(n.Length - 256) + \"/...\" + s.Substring(s.Length - 256), a.ToString());\n                    a.Value = -a;\n                    Assert.AreEqual(\"-...\" + n.Substring(n.Length - 256) + \"/...\" + s.Substring(s.Length - 256), a.ToString());\n                }\n            }\n        }\n\n        [TestMethod]\n        public void RationalApproximateSizeInBase()\n        {\n            using (var a = new HugeRational(\"2983475029834750293429834750298347502934298347502983475029342983475029834750293429834750298347502934/115756986668303657898962467957\"))\n            {\n                AssertBetween(131, 133, a.ApproximateSizeInBase(10));\n                AssertBetween(429, 429, a.ApproximateSizeInBase(2));\n                AssertBetween(109, 109, a.ApproximateSizeInBase(16));\n                AssertBetween(84, 86, a.ApproximateSizeInBase(36));\n                AssertBetween(74, 76, a.ApproximateSizeInBase(62));\n            }\n        }\n\n        private void AssertBetween(int min, int max, long actual)\n        {\n            Assert.IsTrue(actual >= min && actual <= max, \"Expected {0} to {1}, actual {2}\", min, max, actual);\n        }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeRationalTests/ExpressionTests.cs",
    "content": "﻿/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Reflection;\nusing System.Text;\nusing System.Threading.Tasks;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeRationalTests\n{\n    [TestClass]\n    public class ExpressionTests\n    {\n        [TestMethod]\n        public void RationalTestAllExpressions()\n        {\n            var baseExpr = typeof(RationalExpression);\n            var allExpressions =\n                baseExpr.Assembly.GetTypes()\n                .Where(x => baseExpr.IsAssignableFrom(x) && !x.IsAbstract)\n                .ToList();\n\n            var one = Platform.Ui(1, 1);\n\n            using (var a = new HugeRational(-9, 1))\n            using (var b = new HugeRational(4, 1))\n            using (var c = new HugeInt(3))\n            using (var r = MpirRandom.Default())\n            {\n                var expr = a + (-a * 2) * 3 * (a.Abs() * -2 + -64 + a * a) + (one * 116U) + a;\n                VerifyPartialResult(r, expr, 44);\n                expr = expr + a * 5 + (a+b) * (b + 1) * (b + -3) * b + (b * -a) - (b * (one * 25U)) - a + (b << 3) - ((a*b) << 1);\n                VerifyPartialResult(r, expr, -52);\n                expr = expr - 2 - 3U + (b - (a << 1)) + (b * b - (one * 15U)) * (b - a) * (a - 11) * (b - 3U) - (-340 - a) + ((one * 20U) - b);\n                VerifyPartialResult(r, expr, 52);\n                expr = expr + (-7 - 2 * a) + (28U - 4 * b) + -(a + b * 2) + (3 * a).Abs();\n                VerifyPartialResult(r, expr, 103);\n                expr = 36 * (expr / a + expr / (3 * b) - a / b) - b / (a + 10) + 6;\n                VerifyPartialResult(r, expr, -20);\n                expr = expr + (b >> 1) + ((b / -7) + (a / (one * 7U))) * 7 + (7 / a) - ((one * 2U) / (b + 5));\n                VerifyPartialResult(r, expr, -32);\n                expr = expr + (((a / b).Invert() * 3) ^ 3) - (b + 13) / a / -3;\n                VerifyPartialResult(r, expr, -35);\n                expr = expr + c + (b - 2 * c) + (-4 * c - a) - (c - 1) * (b - 1) - (a / c) + (c * 2) / (b - 1);\n                VerifyPartialResult(r, expr, -38);\n\n                MarkExpressionsUsed(allExpressions, expr);\n            }\n\n            Assert.AreEqual(0, allExpressions.Count, \"Expression types not exercised: \" + string.Join(\"\",\n                allExpressions.Select(x => Environment.NewLine + x.Name).OrderBy(x => x)));\n        }\n\n        private void VerifyPartialResult(MpirRandom rnd, RationalExpression expr, long expected)\n        {\n            rnd.Seed(123);\n\n            using (var r = new HugeRational())\n            {\n                r.Value = expr;\n                Assert.AreEqual(expected.ToString() + \"/1\", r.ToString());\n            }\n        }\n\n        private void MarkExpressionsUsed(List<Type> allExpressions, RationalExpression expr)\n        {\n            var type = expr.GetType();\n            allExpressions.Remove(type);\n            \n            var children = type.GetFields(BindingFlags.NonPublic | BindingFlags.Instance)\n                .Where(x => typeof(RationalExpression).IsAssignableFrom(x.FieldType))\n                .Select(x => (RationalExpression)x.GetValue(expr))\n                .Where(x => x != null)\n                .ToList();\n\n            foreach (var childExpr in children)\n                MarkExpressionsUsed(allExpressions, childExpr);\n        }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/HugeRationalTests/IO.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.IO;\nusing System.Text;\nusing System.Linq;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeRationalTests\n{\n    [TestClass]\n    public class IO\n    {\n        [TestMethod]\n        public void RationalInputOutputRaw()\n        {\n            using (var a = new HugeRational(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF/361720912810755408215708460645842859722715865206816237944587\"))\n            using (var b = new HugeRational())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                a.Write(ms);\n                ms.Position = 0;\n                b.Read(ms);\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n            }\n        }\n\n        [TestMethod]\n        public void RationalInputOutputStr()\n        {\n            using (var a = new HugeRational(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF/361720912810755408215708460645842859722715865206816237944587\"))\n            using (var b = new HugeRational())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                    a.Write(writer);\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n            }\n        }\n\n        [TestMethod]\n        public void RationalInputOutputStrHex()\n        {\n            using (var a = new HugeRational(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF/361720912810755408215708460645842859722715865206816237944587\"))\n            using (var b = new HugeRational())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                    a.Write(writer, 16);\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader, 16);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + a.ToString(16), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        [TestMethod]\n        public void RationalInputOutputStrHexPrefix()\n        {\n            using (var a = new HugeRational(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF/361720912810755408215708460645842859722715865206816237944587\"))\n            using (var b = new HugeRational())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                {\n                    writer.Write(\"0x\");\n                    a.Numerator.Write(writer, 16);\n                    writer.Write(\"/0\");\n                    a.Denominator.Write(writer, 8);\n                }\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n            }\n        }\n\n        [TestMethod]\n        public void RationalInputOutputStrHexLower()\n        {\n            using (var a = new HugeRational(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF/361720912810755408215708460645842859722715865206816237944587\"))\n            using (var b = new HugeRational())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                    a.Write(writer, 16, true);\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader, 16);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n            }\n        }\n\n        [TestMethod]\n        public void RationalInputOutputStrOctal()\n        {\n            using (var a = new HugeRational(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF/361720912810755408215708460645842859722715865206816237944587\"))\n            using (var b = new HugeRational())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                    a.Write(writer, 8);\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader, 8);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + a.ToString(8), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        [TestMethod]\n        public void RationalInputOutputStrBinary()\n        {\n            using (var a = new HugeRational(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF/361720912810755408215708460645842859722715865206816237944587\"))\n            using (var b = new HugeRational())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                    a.Write(writer, 2);\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader, 2);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + a.ToString(2), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        [TestMethod]\n        public void RationalInputOutputStr62()\n        {\n            using (var a = new HugeRational(\"0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF/361720912810755408215708460645842859722715865206816237944587\"))\n            using (var b = new HugeRational())\n            using (var ms = new MemoryStream())\n            {\n                a.Value = a ^ 100;\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n                    a.Write(writer, 62);\n\n                ms.Position = 0;\n\n                using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))\n                    b.Read(reader, 62);\n\n                Assert.AreEqual(a, b);\n                Assert.AreEqual(ms.Length, ms.Position);\n                Assert.AreEqual((char)0xFEFF + a.ToString(62), Encoding.UTF8.GetString(ms.ToArray()));\n            }\n        }\n\n        //more tests coming here\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/IntegrationTests/XmlCommentsTests.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing System.Reflection;\nusing System.Text;\nusing System.Text.RegularExpressions;\nusing System.Threading.Tasks;\nusing System.Xml;\nusing System.Xml.Serialization;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.HugeIntTests\n{\n    [TestClass]\n    public class XmlCommentsTests\n    {\n        private static readonly string[] ValidMemberTypePrefixes = { \"M:\", \"P:\", \"F:\", \"C:\" };\n\n        [TestMethod]\n        public void TestComments()\n        {\n            var assembly = typeof(HugeInt).Assembly;\n            var xmlPath = Path.GetFileNameWithoutExtension(assembly.Location) + \".xml\";\n            var xml = XmlCommentsDoc.Deserialize(xmlPath);\n\n            var publicTypes = assembly.GetTypes().Where(x => x.IsPublic).ToArray();\n            var definedClassComments = xml.Members.Where(x => x.Name.StartsWith(\"T:\") && (\"\" + x.Summary).Trim().Length > 0);\n            var missingPublicTypes = publicTypes.Select(x => x.FullName).Except(definedClassComments.Select(x => x.MemberName)).ToArray();\n            Assert.AreEqual(0, missingPublicTypes.Length, \"XML Comments are not defined on class(es): \" + string.Join(Environment.NewLine, missingPublicTypes));\n\n            var publicMembers = publicTypes.SelectMany(x => x.GetMembers())\n                .Where(XmlCommentsMember.IsIncluded)\n                .ToArray();\n            var definedMemberComments = xml.Members.Where(x => ValidMemberTypePrefixes.Contains(x.Name.Substring(0, 2)) && (\"\" + x.Summary).Trim().Length > 0).ToArray();\n            var invalidComments = publicMembers.Join(definedMemberComments, XmlCommentsMember.Signature, x => x.MemberName, (Member, Comment) => new { Member, Comment })\n                .SelectMany(x => x.Comment.Validate(x.Member)).ToArray();\n            Assert.AreEqual(0, invalidComments.Length, string.Join(Environment.NewLine, invalidComments));\n\n            var missingComments = publicMembers.Select(XmlCommentsMember.Signature).Except(definedMemberComments.Select(x => x.MemberName)).ToArray();\n            Assert.AreEqual(0, missingComments.Length, \"XML Comments are not defined on public member(s): \" + string.Join(Environment.NewLine, missingComments));\n        }\n    }\n\n    [Serializable]\n    [XmlRoot(\"doc\")]\n    public class XmlCommentsDoc\n    {\n        [XmlElement(\"assembly\")]\n        public string Assembly { get; set; }\n        [XmlArray(\"members\"), XmlArrayItem(\"member\")]\n        public List<XmlCommentsMember> Members { get; set; }\n\n        public static XmlCommentsDoc Deserialize(string path)\n        {\n            var contents = File.ReadAllText(path);\n            contents = Regex.Replace(contents, \"<paramref[^>]*>\", \"paramref\");\n            contents = Regex.Replace(contents, \"</?para>\", \"\");\n            contents = contents.Replace(\"!System.Runtime.CompilerServices.IsLong\", \"\");\n\n            var serializer = new XmlSerializer(typeof(XmlCommentsDoc));\n            using (var reader = new StringReader(contents))\n                return (XmlCommentsDoc)serializer.Deserialize(reader);\n        }\n    }\n\n    [Serializable]\n    public class XmlCommentsMember\n    {\n        [XmlAttribute(\"name\")]\n        public string Name { get; set; }\n        [XmlElement(\"summary\")]\n        public string Summary { get; set; }\n        [XmlElement(\"typeparam\")]\n        public List<XmlCommentsParam> TypeParams { get; set; }\n        [XmlElement(\"param\")]\n        public List<XmlCommentsParam> Params { get; set; }\n        [XmlElement(\"returns\")]\n        public string Returns { get; set; }\n\n        [XmlIgnore]\n        public string MemberName { get { return Name.Substring(2); } }\n\n        public IEnumerable<string> Validate(MemberInfo member)\n        {\n            if ((\"\" + Summary).Trim().Length == 0)\n                yield return \"Missing Summary on \" + MemberName;\n\n            var method = member as MethodBase;\n            if (method != null)\n            {\n                var missingParams = method.GetParameters().Select(x => x.Name).Except(Params.Where(x => (\"\" + x.Text).Trim().Length > 0).Select(x => x.Name)).ToArray();\n                if (missingParams.Any())\n                    yield return string.Format(\"Missing param {0} on {1}\", string.Join(\", \", missingParams), MemberName);\n\n                var invalidParams = Params.Select(x => x.Name).Except(method.GetParameters().Select(x => x.Name)).ToArray();\n                if (invalidParams.Any())\n                    yield return string.Format(\"Invalid param {0} on {1}\", string.Join(\", \", invalidParams), MemberName);\n\n                if (method.ContainsGenericParameters)\n                {\n                    var missingGenericParms = method.GetGenericArguments().Select(x => x.Name).Except(TypeParams.Where(x => (\"\" + x.Text).Trim().Length > 0).Select(x => x.Name)).ToArray();\n                    if (missingGenericParms.Any())\n                        yield return string.Format(\"Missing generic param {0} on {1}\", string.Join(\", \", missingGenericParms), MemberName);\n\n                    var invalidGenericParams = TypeParams.Select(x => x.Name).Except(method.GetGenericArguments().Select(x => x.Name)).ToArray();\n                    if (invalidGenericParams.Any())\n                        yield return string.Format(\"Invalid generic param {0} on {1}\", string.Join(\", \", invalidGenericParams), MemberName);\n                }\n            }\n\n            var method2 = member as MethodInfo;\n            if (method2 != null)\n            {\n                if (method2.ReturnType != typeof(void) && (\"\" + Returns).Trim().Length == 0)\n                    yield return \"Missing Returns on \" + MemberName;\n            }\n        }\n\n        public static bool IsIncluded(MemberInfo member)\n        {\n            var method = member as MethodInfo;\n            if (method != null)\n            {\n                if (method.Name.StartsWith(\"get_\") || method.Name.StartsWith(\"set_\"))\n                    return false;\n\n                if (method.ReflectedType != method.DeclaringType)\n                    return false;\n            }\n\n            if (member.Name == \"value__\" && member.DeclaringType.IsEnum)\n                return false;\n\n            return true;\n        }\n\n        public static string Signature(MemberInfo member)\n        {\n            var sig = new StringBuilder();\n            sig.Append(member.DeclaringType.FullName).Append('.');\n\n            var method = member as MethodInfo;\n            var ctor = member as ConstructorInfo;\n            var methodBase = member as MethodBase;\n\n            if (ctor != null)\n                sig.Append(\"#ctor\");\n            else\n                sig.Append(member.Name);\n\n            if(methodBase != null)\n            {\n                if (methodBase.IsGenericMethodDefinition)\n                {\n                    sig.Append(\"``\").Append(methodBase.GetGenericArguments().Length);\n                }\n\n                var parameters = methodBase.GetParameters();\n                if(parameters.Length > 0)\n                {\n                    sig.Append('(')\n                       .Append(string.Join(\",\", parameters.Select(FormatParameterType)))\n                       .Append(')');\n                }\n            }\n\n            return sig.ToString();\n        }\n\n        public static string FormatParameterType(ParameterInfo p)\n        {\n            if (p.ParameterType.IsGenericType)\n                return p.ParameterType.Namespace + \".\" + p.ParameterType.Name + \"{\" + string.Join(\",\", p.ParameterType.GetGenericArguments().Select(x => x.FullName)) + \"}\";\n\n            if (p.ParameterType.FullName == null)\n                return \"``\" + p.ParameterType.Name.Replace((p.Member as MethodInfo).GetGenericArguments()[p.Position].Name, p.Position.ToString());\n\n            if (p.IsOut)\n                return p.ParameterType.FullName.Replace('&', '@');\n\n            return p.ParameterType.FullName;\n        }\n\n        public override string ToString()\n        {\n            return MemberName;\n        }\n    }\n\n    [Serializable]\n    public class XmlCommentsParam\n    {\n        [XmlAttribute(\"name\")]\n        public string Name { get; set; }\n        [XmlText]\n        public string Text { get; set; }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/OtherTests/MpirSettings.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.IO;\nusing System.Text;\nusing System.Linq;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.RandomTests\n{\n    [TestClass]\n    public class MpirSettingsTests\n    {\n        [TestMethod]\n        public void BitsPerLimb()\n        {\n            unsafe\n            {\n                Assert.AreEqual(8 * sizeof(IntPtr), MpirSettings.BITS_PER_LIMB);\n                Assert.AreEqual(8 * sizeof(IntPtr), MpirSettings.USABLE_BITS_PER_LIMB);\n                Assert.AreEqual(0, MpirSettings.NAIL_BITS_PER_LIMB);\n            }\n        }\n\n        [TestMethod]\n        public void Version()\n        {\n            Assert.AreEqual(\"6.0.0\", MpirSettings.GMP_VERSION.ToString());\n            Assert.AreEqual(\"3.0.0\", MpirSettings.MPIR_VERSION.ToString());\n        }\n\n        [TestMethod]\n        public void ToStringDigits()\n        {\n            const string strA = \"9520398475029834502983475028934705982734095827304958723409758230498573034928750938475987498473958743\";\n            using (var a = new HugeInt(strA))\n            {\n                Assert.AreEqual(256, MpirSettings.ToStringDigits);\n                Assert.AreEqual(strA, a.ToString());\n\n                MpirSettings.ToStringDigits = 32;\n                Assert.AreEqual(\"...\" + strA.Substring(strA.Length - 32), a.ToString());\n\n                MpirSettings.ToStringDigits = 256;\n                Assert.AreEqual(strA, a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RoundingMode()\n        {\n            const string strA = \"9520398475029834502983475028934705982734095827304958723409758230498573034928750938475987498473958743\";\n            var down = strA.Substring(0, strA.Length - 1);\n            var up = strA.Substring(0, strA.Length - 2) + (char) (strA[strA.Length - 2] + 1);\n\n            using (var a = new HugeInt(strA))\n            using (var b = new HugeInt())\n            {\n                Assert.AreEqual(RoundingModes.Truncate, MpirSettings.RoundingMode);\n                b.Value = a / 10;\n                Assert.AreEqual(down, b.ToString());\n                b.Value = -a / 10;\n                Assert.AreEqual(\"-\" + down, b.ToString());\n\n                MpirSettings.RoundingMode = RoundingModes.Default;\n                b.Value = a / 10;\n                Assert.AreEqual(down, b.ToString());\n                b.Value = -a / 10;\n                Assert.AreEqual(\"-\" + down, b.ToString());\n\n                MpirSettings.RoundingMode = RoundingModes.Floor;\n                b.Value = a / 10;\n                Assert.AreEqual(down, b.ToString());\n                b.Value = -a / 10;\n                Assert.AreEqual(\"-\" + up, b.ToString());\n\n                MpirSettings.RoundingMode = RoundingModes.Ceiling;\n                b.Value = a / 10;\n                Assert.AreEqual(up, b.ToString());\n                b.Value = -a / 10;\n                Assert.AreEqual(\"-\" + down, b.ToString());\n\n                MpirSettings.RoundingMode = RoundingModes.Truncate;\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/OtherTests/Random.cs",
    "content": "/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.IO;\nusing System.Text;\nusing System.Linq;\nusing Microsoft.VisualStudio.TestTools.UnitTesting;\n\nnamespace MPIR.Tests.RandomTests\n{\n    [TestClass]\n    public class RandomTests\n    {\n#if WIN64\n        private void TestRandom(MpirRandom r, ulong seed, Action<MpirRandom> assert)\n#else\n        private void TestRandom(MpirRandom r, uint seed, Action<MpirRandom> assert)\n#endif\n        {\n            MpirRandom copy = null;\n\n            for (var i = 0; i < 6; i++)\n            {\n                switch (i)\n                {\n                    case 0:\n                    case 1:\n                    case 5:\n                        r.Seed(seed);\n                        break;\n\n                    case 2:\n                        using (var s = new HugeInt(seed))\n                            r.Seed(s);\n                        break;\n\n                    case 3:\n                        using (var s = new HugeInt(seed))\n                            r.Seed(s);\n                        copy = r.Copy();\n                        break;\n\n                    case 4:\n                        r.Seed(seed); //todo r = copy; temporarily disabled copy tests due to MPIR bug\n                        break;\n                }\n\n                assert(r);\n            }\n\n            copy.Dispose();\n        }\n\n        [TestMethod]\n        public void RandomSeedingMersenneTwister()\n        {\n            using (var r = MpirRandom.MersenneTwister())\n            {\n                uint seed = 12345789;\n                uint max = 10000000;\n                uint bits = 62;\n                uint expected1 = 3801341;\n                uint expected2 = 747743;\n                var expected3 = Platform.Ui(3637762780660169521, 2925722417);\n\n                TestRandom(r, seed, x =>\n                {\n                    Assert.AreEqual(expected1, x.GetLimb(max));\n                    Assert.AreEqual(expected2, x.GetLimb(max));\n                    Assert.AreEqual(expected3, x.GetLimbBits(bits));\n                });\n            }\n        }\n\n        [TestMethod]\n        public void RandomSeedingDefault()\n        {\n            using (var r = MpirRandom.Default())\n            {\n                uint seed = 12345789;\n                uint max = 10000000;\n                uint bits = 62;\n                uint expected1 = 3801341;\n                uint expected2 = 747743;\n                var expected3 = Platform.Ui(3637762780660169521, 2925722417);\n\n                TestRandom(r, seed, x =>\n                {\n                    Assert.AreEqual(expected1, x.GetLimb(max));\n                    Assert.AreEqual(expected2, x.GetLimb(max));\n                    Assert.AreEqual(expected3, x.GetLimbBits(bits));\n                });\n            }\n        }\n\n        [TestMethod]\n        public void RandomSeedingLCSize()\n        {\n            using (var r = MpirRandom.LinearCongruential(128))\n            {\n                uint seed = 12345789;\n                uint max = 10000000;\n                uint bits = 62;\n                uint expected1 = 8017343;\n                uint expected2 = 2122346;\n                var expected3 = Platform.Ui(1653945017297503111, 4060840839);\n\n                TestRandom(r, seed, x =>\n                {\n                    Assert.AreEqual(expected1, x.GetLimb(max));\n                    Assert.AreEqual(expected2, x.GetLimb(max));\n                    Assert.AreEqual(expected3, x.GetLimbBits(bits));\n                });\n            }\n        }\n\n        [TestMethod]\n        public void RandomSeedingLC()\n        {\n            uint seed = 12345789;\n            uint max = 10000000;\n            uint bits = 62;\n            var expected1 = Platform.Ui(6524662, 5635868);\n            var expected2 = Platform.Ui(5428780, 5488683);\n            var expected3 = Platform.Ui(4189233241027086562, 278391078);\n            var c = Platform.Ui(98570948725939831, 985709487);\n\n            using (var a = new HugeInt(\"5209384572093847098342590872309452304529345409827509283745078\"))\n            using (var r = MpirRandom.LinearCongruential(a, c, 256))\n            {\n\n                TestRandom(r, seed, x =>\n                {\n                    Assert.AreEqual(expected1, x.GetLimb(max));\n                    Assert.AreEqual(expected2, x.GetLimb(max));\n                    Assert.AreEqual(expected3, x.GetLimbBits(bits));\n                });\n            }\n        }\n\n        [TestMethod]\n        public void RandomHugeIntBits()\n        {\n            using (var r = MpirRandom.Default())\n            using (var a = new HugeInt())\n            {\n                r.Seed(12345789);\n                a.Value = r.GetIntBits(256);\n                Assert.AreEqual(\"9E056474F27BEDF9AE62FB31A30B68DFA0B96F29D0C8767A88F8937D6F3A00FD\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void RandomHugeInt1()\n        {\n            using (var r = MpirRandom.Default())\n            using (var a = new HugeInt())\n            {\n                r.Seed(12345789);\n                a.Value = r.GetInt((a + 2345234534) ^ 10);\n                Assert.AreEqual(\"1157052590454954734126533665801738434624905444540864190630086248362027895885687865247828410621\", a.ToString());\n            }\n        }\n\n        [TestMethod]\n        public void RandomHugeIntBitsChunky()\n        {\n            using (var r = MpirRandom.Default())\n            using (var a = new HugeInt())\n            {\n                r.Seed(12345789);\n                a.Value = r.GetIntBitsChunky(256);\n                Assert.AreEqual(\"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC0000000000000000000000000000007F\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void RandomHugeFloat()\n        {\n            using(var r = MpirRandom.Default())\n            using(var a = HugeFloat.Allocate(256))\n            {\n                r.Seed(12345789);\n                a.Value = r.GetFloat();\n                Assert.AreEqual(\"0.9E056474F27BEDF9AE62FB31A30B68DFA0B96F29D0C8767A88F8937D6F3A00FD@0\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void RandomHugeFloatBits()\n        {\n            using(var r = MpirRandom.Default())\n            using(var a = HugeFloat.Allocate(256))\n            {\n                r.Seed(12345789);\n                a.Value = r.GetFloatBits(128);\n                Assert.AreEqual(\"0.A0B96F29D0C8767A88F8937D6F3A00FD@0\", a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void RandomHugeFloatChunky()\n        {\n            using(var r = MpirRandom.Default())\n            using(var a = HugeFloat.Allocate(256))\n            {\n                r.Seed(12345789);\n                a.Value = r.GetFloatChunky(100);\n                Assert.AreEqual(Platform.Select(\"0.7FFFFFFF0180000000000000000007FFFFFFFFFFFFFFFFFFF@-2EF\", \"0.7FFFFFFFFFFFF00000000001FFE000000000000000000000007FFFFFF@29\"), a.ToString(16));\n            }\n        }\n\n        [TestMethod]\n        public void RandomHugeFloatLimbsChunky()\n        {\n            using(var r = MpirRandom.Default())\n            using(var a = HugeFloat.Allocate(256))\n            {\n                r.Seed(12345789);\n                a.Value = r.GetFloatLimbsChunky(128 / MpirSettings.BITS_PER_LIMB, 100);\n                Assert.AreEqual(Platform.Select(\"0.7FFFFFF8000007FFF@2C1\", \"0.7FFFFFFFFFC000000003FFFFF@2A1\"), a.ToString(16));\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/Properties/AssemblyInfo.cs",
    "content": "﻿using System.Reflection;\nusing System.Runtime.CompilerServices;\nusing System.Runtime.InteropServices;\n\n// General Information about an assembly is controlled through the following \n// set of attributes. Change these attribute values to modify the information\n// associated with an assembly.\n[assembly: AssemblyTitle(\"mpir.net-tests\")]\n[assembly: AssemblyDescription(\"Unit Tests for the mpir.net project\")]\n[assembly: AssemblyConfiguration(\"\")]\n[assembly: AssemblyCompany(\"\")]\n[assembly: AssemblyProduct(\"mpir.net-tests\")]\n[assembly: AssemblyCopyright(\"Copyright © Alex Dyachenko 2014\")]\n[assembly: AssemblyTrademark(\"\")]\n[assembly: AssemblyCulture(\"\")]\n\n// Setting ComVisible to false makes the types in this assembly not visible \n// to COM components.  If you need to access a type in this assembly from \n// COM, set the ComVisible attribute to true on that type.\n[assembly: ComVisible(false)]\n\n// The following GUID is for the ID of the typelib if this project is exposed to COM\n[assembly: Guid(\"b2d6c60d-1243-4284-aa0c-2944a83fb2d7\")]\n\n// Version information for an assembly consists of the following four values:\n//\n//      Major Version\n//      Minor Version \n//      Build Number\n//      Revision\n//\n// You can specify all the values or you can default the Build and Revision Numbers \n// by using the '*' as shown below:\n// [assembly: AssemblyVersion(\"1.0.*\")]\n[assembly: AssemblyVersion(\"1.0.0.0\")]\n[assembly: AssemblyFileVersion(\"1.0.0.0\")]\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/Utilities/Accessors.cs",
    "content": "﻿/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\nusing System.Reflection;\n\nnamespace MPIR.Tests\n{\n    internal static class Accessors<T>\n    {\n        private static readonly ConstructorInfo _intPtrConstructor;\n        private static readonly FieldInfo _getValue;\n\n        static Accessors()\n        {\n            _getValue = GetAccessor(\"_value\");\n\n            _intPtrConstructor = typeof(IntPtr).GetConstructor(new[] { Type.GetType(\"System.Void*\") });\n        }\n\n        internal static FieldInfo GetAccessor(string name)\n        {\n            return typeof(T).GetField(name, BindingFlags.NonPublic | BindingFlags.Instance);\n        }\n\n        internal static IntPtr _value(T x)\n        {\n            return (IntPtr)_intPtrConstructor.Invoke(new object[] { _getValue.GetValue(x) });\n        }\n    }\n\n    internal static class IntAccessors\n    {\n        internal static IntPtr _value(this HugeInt x)\n        {\n            return Accessors<HugeInt>._value(x);\n        }\n\n        internal static int NumberOfLimbsAllocated(this HugeInt x)\n        {\n            if (_value(x) == IntPtr.Zero)\n                return 0;\n\n            return x.AllocatedSize;\n        }\n\n        internal static int NumberOfLimbsUsed(this HugeInt x)\n        {\n            if (_value(x) == IntPtr.Zero)\n                return 0;\n\n            unsafe\n            {\n                return ((int*)_value(x).ToPointer())[1];\n            }\n        }\n\n        internal static IntPtr Limbs(this HugeInt x)\n        {\n            if (_value(x) == IntPtr.Zero)\n                return IntPtr.Zero;\n\n            unsafe\n            {\n                return new IntPtr(*(void**)(((int*)_value(x).ToPointer()) + 2));\n            }\n        }\n    }\n\n    internal static class RationalAccessors\n    {\n        internal static IntPtr _value(this HugeRational x)\n        {\n            return Accessors<HugeRational>._value(x);\n        }\n\n        internal static int NumeratorNumberOfLimbsAllocated(this HugeRational x)\n        {\n            if (_value(x) == IntPtr.Zero)\n                return 0;\n\n            unsafe\n            {\n                return ((int*)_value(x).ToPointer())[0];\n            }\n        }\n\n        internal static int NumeratorNumberOfLimbsUsed(this HugeRational x)\n        {\n            if (_value(x) == IntPtr.Zero)\n                return 0;\n\n            unsafe\n            {\n                return ((int*)_value(x).ToPointer())[1];\n            }\n        }\n\n        internal static IntPtr NumeratorLimbs(this HugeRational x)\n        {\n            if (_value(x) == IntPtr.Zero)\n                return IntPtr.Zero;\n\n            unsafe\n            {\n                return new IntPtr(*(void**)(((int*) _value(x).ToPointer()) + 2));\n            }\n        }\n\n        internal static int DenominatorNumberOfLimbsAllocated(this HugeRational x)\n        {\n            if (_value(x) == IntPtr.Zero)\n                return 0;\n\n            unsafe\n            {\n                return ((int*)_value(x).ToPointer())[2 + sizeof(IntPtr) / 4];\n            }\n        }\n\n        internal static int DenominatorNumberOfLimbsUsed(this HugeRational x)\n        {\n            if (_value(x) == IntPtr.Zero)\n                return 0;\n\n            unsafe\n            {\n                return ((int*)_value(x).ToPointer())[3 + sizeof (IntPtr) / 4];\n            }\n        }\n\n        internal static IntPtr DenominatorLimbs(this HugeRational x)\n        {\n            if (_value(x) == IntPtr.Zero)\n                return IntPtr.Zero;\n\n            unsafe\n            {\n                return new IntPtr(*(void**)(((int*)_value(x).ToPointer()) + 4 + sizeof(IntPtr) / 4));\n            }\n        }\n    }\n\n    internal static class FloatAccessors\n    {\n        internal static IntPtr _value(this HugeFloat x)\n        {\n            return Accessors<HugeFloat>._value(x);\n        }\n\n        internal static int NumberOfLimbsAllocated(this HugeFloat x)\n        {\n            if (_value(x) == IntPtr.Zero)\n                return 0;\n\n            unsafe\n            {\n                return ((int*)_value(x).ToPointer())[0];\n            }\n        }\n\n        internal static int NumberOfLimbsUsed(this HugeFloat x)\n        {\n            if (_value(x) == IntPtr.Zero)\n                return 0;\n\n            unsafe\n            {\n                return ((int*)_value(x).ToPointer())[1];\n            }\n        }\n\n        internal static int Exponent(this HugeFloat x)\n        {\n            if (_value(x) == IntPtr.Zero)\n                return 0;\n\n            unsafe\n            {\n                return ((int*)_value(x).ToPointer())[2];\n            }\n        }\n\n        internal static IntPtr Limbs(this HugeFloat x)\n        {\n            if (_value(x) == IntPtr.Zero)\n                return IntPtr.Zero;\n\n            unsafe\n            {\n                return new IntPtr(*(void**)(((int*)_value(x).ToPointer()) + 2 + sizeof(IntPtr) / 4));\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "mpir.net/mpir.net-tests/Utilities/Platform.cs",
    "content": "﻿/*\nCopyright 2014 Alex Dyachenko\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 3 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  \n*/\n\nusing System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\nusing System.Reflection;\n\nnamespace MPIR.Tests\n{\n    /// <summary>\n    /// This class provides a syntactic shortcut for selecting one of two (or several) values based on the selected solution platform (CPU architecture)\n    /// </summary>\n    internal static class Platform\n    {\n#if WIN64\n        public static ulong Ui(ulong win64, uint win32) { return win64; }\n        public static long Si(long win64, int win32) { return win64; }\n        public static string Select(string win64, string win32) { return win64; }\n        public static double Select(double win64, double win32) { return win64; }\n#else\n        public static uint Ui(ulong win64, uint win32) { return win32; }\n        public static int Si(long win64, int win32) { return win32; }\n        public static string Select(string win64, string win32) { return win32; }\n        public static double Select(double win64, double win32) { return win32; }\n#endif\n    }\n}\n"
  },
  {
    "path": "mpirxx.h",
    "content": "/* gmpxx.h -- C++ class wrapper for GMP types.  -*- C++ -*-\n\nCopyright 2001, 2002, 2003, 2006, 2008, 2011, 2012 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n/* the C++ compiler must implement the following features:\n   - member templates\n   - partial specialization of templates\n   - namespace support\n   for g++, this means version 2.91 or higher\n   for other compilers, I don't know */\n#ifdef __GNUC__\n#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 91)\n#error mpirxx.h requires g++ version 2.91 (egcs 1.1.2) or higher\n#endif\n#endif\n#ifndef __GMP_PLUSPLUS__\n#define __GMP_PLUSPLUS__\n\n#include <cstddef>     /* for size_t */\n\n#include <iosfwd>\n\n#include <cstring>  /* for strlen */\n#include <limits>  /* numeric_limits */\n#include <utility>\n#include <string>\n#include <stdexcept>\n#include <cfloat>\n#include <algorithm>  /* swap */\n#include <mpir.h>\n\n#if defined( _MSC_VER ) && _MSC_VER >= 1700\n#  define MSC_CXX_11 1\n#elif defined( __INTEL_COMPILER ) && __INTEL_COMPILER > 1310\n#  define MSC_CXX_11 1\n#elif defined( __ICL ) && __ICL > 1310\n#  define MSC_CXX_11 1\n#endif\n\n#if defined(LLONG_MAX) && defined(LONG_MAX)\n#if LLONG_MAX != LONG_MAX\n#define MPIRXX_HAVE_LLONG 1\n#endif\n#endif\n\n/* check availability of stdint.h -- note we do not include this ourselves */\n#if defined(INTMAX_MAX)\n#  if defined(LONG_MAX) && defined(INTMAX_MAX) && INTMAX_MAX != LONG_MAX && (INTMAX_MAX != LLONG_MAX || !defined(MPIRXX_HAVE_LLONG))\n#    define MPIRXX_INTMAX_T 1\n#  endif\n#  if defined(ULONG_MAX) && defined(UINTMAX_MAX) && UINTMAX_MAX != ULONG_MAX && (UINTMAX_MAX != ULLONG_MAX || !defined(MPIRXX_HAVE_LLONG))\n#    define MPIRXX_UINTMAX_T 1\n#  endif\n#endif\n\n// wrapper for gcc's __builtin_constant_p\n// __builtin_constant_p has been in gcc since forever,\n// but g++-3.4 miscompiles it.\n#if __GMP_GNUC_PREREQ(4, 2)\n#define __GMPXX_CONSTANT(X) __builtin_constant_p(X)\n#else\n#define __GMPXX_CONSTANT(X) false\n#endif\n\n// Use C++11 features\n#ifndef __GMPXX_USE_CXX11\n#if __cplusplus >= 201103L\n#define __GMPXX_USE_CXX11 1\n#else\n#define __GMPXX_USE_CXX11 0\n#endif\n#endif\n\n#if __GMPXX_USE_CXX11\n#define __GMPXX_NOEXCEPT noexcept\n#include <type_traits> // for common_type\n#else\n#define __GMPXX_NOEXCEPT\n#endif\n\n// Max allocations for plain types when converted to mpz_t\n#define __GMPZ_DBL_LIMBS (2 + DBL_MAX_EXP / GMP_NUMB_BITS)\n\n#if GMP_NAIL_BITS != 0 && ! defined _LONG_LONG_LIMB\n#define __GMPZ_ULI_LIMBS 2\n#else\n#define __GMPZ_ULI_LIMBS 1\n#endif\n\ninline void __mpz_set_ui_safe(mpz_ptr p, mpir_ui l)\n{\n  p->_mp_size = (l != 0);\n  p->_mp_d[0] = l & GMP_NUMB_MASK;\n#if __GMPZ_ULI_LIMBS > 1\n  l >>= GMP_NUMB_BITS;\n  p->_mp_d[1] = l;\n  p->_mp_size += (l != 0);\n#endif\n}\n\ninline void __mpz_set_si_safe(mpz_ptr p, mpir_si l)\n{\n  if(l < 0)\n  {\n    __mpz_set_ui_safe(p, static_cast<mpir_ui>(-l));\n    mpz_neg(p, p);\n  }\n  else\n    __mpz_set_ui_safe(p, l);\n    // Note: we know the high bit of l is 0 so we could do slightly better\n}\n\n// Fake temporary variables\n#define __GMPXX_TMPZ_UI\t\t\t\t\t\t\t\\\n  mpz_t temp;\t\t\t\t\t\t\t\t\\\n  mp_limb_t limbs[__GMPZ_ULI_LIMBS];\t\t\t\t\t\\\n  temp->_mp_d = limbs;\t\t\t\t\t\t\t\\\n  __mpz_set_ui_safe (temp, l)\n#define __GMPXX_TMPZ_SI\t\t\t\t\t\t\t\\\n  mpz_t temp;\t\t\t\t\t\t\t\t\\\n  mp_limb_t limbs[__GMPZ_ULI_LIMBS];\t\t\t\t\t\\\n  temp->_mp_d = limbs;\t\t\t\t\t\t\t\\\n  __mpz_set_si_safe (temp, l)\n#define __GMPXX_TMPZ_D\t\t\t\t\t\t\t\\\n  mpz_t temp;\t\t\t\t\t\t\t\t\\\n  mp_limb_t limbs[__GMPZ_DBL_LIMBS];\t\t\t\t\t\\\n  temp->_mp_d = limbs;\t\t\t\t\t\t\t\\\n  temp->_mp_alloc = __GMPZ_DBL_LIMBS;\t\t\t\t\t\\\n  mpz_set_d (temp, d)\n\n#define __GMPXX_TMPQ_UI\t\t\t\t\t\t\t\\\n  mpq_t temp;\t\t\t\t\t\t\t\t\\\n  mp_limb_t limbs[__GMPZ_ULI_LIMBS+1];\t\t\t\t\t\\\n  mpq_numref(temp)->_mp_d = limbs;\t\t\t\t\t\\\n  __mpz_set_ui_safe (mpq_numref(temp), l);\t\t\t\t\\\n  mpq_denref(temp)->_mp_d = limbs + __GMPZ_ULI_LIMBS;\t\t\t\\\n  mpq_denref(temp)->_mp_size = 1;\t\t\t\t\t\\\n  mpq_denref(temp)->_mp_d[0] = 1\n#define __GMPXX_TMPQ_SI\t\t\t\t\t\t\t\\\n  mpq_t temp;\t\t\t\t\t\t\t\t\\\n  mp_limb_t limbs[__GMPZ_ULI_LIMBS+1];\t\t\t\t\t\\\n  mpq_numref(temp)->_mp_d = limbs;\t\t\t\t\t\\\n  __mpz_set_si_safe (mpq_numref(temp), l);\t\t\t\t\\\n  mpq_denref(temp)->_mp_d = limbs + __GMPZ_ULI_LIMBS;\t\t\t\\\n  mpq_denref(temp)->_mp_size = 1;\t\t\t\t\t\\\n  mpq_denref(temp)->_mp_d[0] = 1\n\ninline mpir_ui __gmpxx_abs_ui (mpir_si l)\n{\n  return l >= 0 ? static_cast<mpir_ui>(l)\n\t  : static_cast<mpir_ui>(-l);\n}\n\n/**************** Function objects ****************/\n/* Any evaluation of a __gmp_expr ends up calling one of these functions\n   all intermediate functions being inline, the evaluation should optimize\n   to a direct call to the relevant function, thus yielding no overhead\n   over the C interface. */\n\nstruct __gmp_unary_plus\n{\n  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_set(z, w); }\n  static void eval(mpq_ptr q, mpq_srcptr r) { mpq_set(q, r); }\n  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_set(f, g); }\n};\n\nstruct __gmp_unary_minus\n{\n  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_neg(z, w); }\n  static void eval(mpq_ptr q, mpq_srcptr r) { mpq_neg(q, r); }\n  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_neg(f, g); }\n};\n\nstruct __gmp_unary_com\n{\n  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_com(z, w); }\n};\n\nstruct __gmp_binary_plus\n{\n  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)\n  { mpz_add(z, w, v); }\n\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l)\n  {\n    // Ideally, those checks should happen earlier so that the tree\n    // generated for a+0+b would just be sum(a,b).\n    if (__GMPXX_CONSTANT(l) && l == 0)\n    {\n      if (z != w) mpz_set(z, w);\n    }\n    else\n      mpz_add_ui(z, w, l);\n  }\n  static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w)\n  { eval(z, w, l); }\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l)\n  {\n    if (l >= 0)\n      eval(z, w, static_cast<mpir_ui>(l));\n    else\n      mpz_sub_ui(z, w, static_cast<mpir_ui>(-l));\n  }\n  static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w)\n  { eval(z, w, l); }\n  static void eval(mpz_ptr z, mpz_srcptr w, double d)\n  {  __GMPXX_TMPZ_D;    mpz_add (z, w, temp); }\n  static void eval(mpz_ptr z, double d, mpz_srcptr w)\n  { eval(z, w, d); }\n\n  static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)\n  { mpq_add(q, r, s); }\n\n  static void eval(mpq_ptr q, mpq_srcptr r, mpir_ui l)\n  {\n    if (__GMPXX_CONSTANT(l) && l == 0)\n    {\n      if (q != r) mpq_set(q, r);\n    }\n    else\n    {\n      if (q == r)\n        mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l);\n      else\n      {\n        mpz_mul_ui(mpq_numref(q), mpq_denref(r), l);\n        mpz_add(mpq_numref(q), mpq_numref(q), mpq_numref(r));\n        mpz_set(mpq_denref(q), mpq_denref(r));\n      }\n    }\n  }\n  static void eval(mpq_ptr q, mpir_ui l, mpq_srcptr r)\n  { eval(q, r, l); }\n  static inline void eval(mpq_ptr q, mpq_srcptr r, mpir_si l);\n  // defined after __gmp_binary_minus\n  static void eval(mpq_ptr q, mpir_si l, mpq_srcptr r)\n  { eval(q, r, l); }\n  static void eval(mpq_ptr q, mpq_srcptr r, double d)\n  {\n    mpq_t temp;\n    mpq_init(temp);\n    mpq_set_d(temp, d);\n    mpq_add(q, r, temp);\n    mpq_clear(temp);\n  }\n  static void eval(mpq_ptr q, double d, mpq_srcptr r)\n  { eval(q, r, d); }\n\n  static void eval(mpq_ptr q, mpq_srcptr r, mpz_srcptr z)\n  {\n    if (q == r)\n      mpz_addmul(mpq_numref(q), mpq_denref(q), z);\n    else\n    {\n      mpz_mul(mpq_numref(q), mpq_denref(r), z);\n      mpz_add(mpq_numref(q), mpq_numref(q), mpq_numref(r));\n      mpz_set(mpq_denref(q), mpq_denref(r));\n    }\n  }\n  static void eval(mpq_ptr q, mpz_srcptr z, mpq_srcptr r)\n  { eval(q, r, z); }\n\n  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)\n  { mpf_add(f, g, h); }\n\n  static void eval(mpf_ptr f, mpf_srcptr g, mpir_ui l)\n  { mpf_add_ui(f, g, l); }\n  static void eval(mpf_ptr f, mpir_ui l, mpf_srcptr g)\n  { mpf_add_ui(f, g, l); }\n  static void eval(mpf_ptr f, mpf_srcptr g, mpir_si l)\n  {\n    if (l >= 0)\n      mpf_add_ui(f, g, l);\n    else\n      mpf_sub_ui(f, g, static_cast<mpir_ui>(-l));\n  }\n  static void eval(mpf_ptr f, mpir_si l, mpf_srcptr g)\n  { eval(f, g, l); }\n  static void eval(mpf_ptr f, mpf_srcptr g, double d)\n  {\n    mpf_t temp;\n    mpf_init2(temp, 8*sizeof(double));\n    mpf_set_d(temp, d);\n    mpf_add(f, g, temp);\n    mpf_clear(temp);\n  }\n  static void eval(mpf_ptr f, double d, mpf_srcptr g)\n  { eval(f, g, d); }\n};\n\nstruct __gmp_binary_minus\n{\n  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)\n  { mpz_sub(z, w, v); }\n\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l)\n  {\n    if (__GMPXX_CONSTANT(l) && l == 0)\n    {\n      if (z != w) mpz_set(z, w);\n    }\n    else\n      mpz_sub_ui(z, w, l);\n  }\n  static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w)\n  {\n    if (__GMPXX_CONSTANT(l) && l == 0)\n    {\n      mpz_neg(z, w);\n    }\n    else\n      mpz_ui_sub(z, l, w);\n  }\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l)\n  {\n    if (l >= 0)\n      eval(z, w, static_cast<mpir_ui>(l));\n    else\n      mpz_add_ui(z, w, static_cast<mpir_ui>(-l));\n  }\n  static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w)\n  {\n    if (l >= 0)\n      eval(z, static_cast<mpir_ui>(l), w);\n    else\n      {\n        mpz_add_ui(z, w, static_cast<mpir_ui>(-l));\n        mpz_neg(z, z);\n      }\n  }\n  static void eval(mpz_ptr z, mpz_srcptr w, double d)\n  {  __GMPXX_TMPZ_D;    mpz_sub (z, w, temp); }\n  static void eval(mpz_ptr z, double d, mpz_srcptr w)\n  {  __GMPXX_TMPZ_D;    mpz_sub (z, temp, w); }\n\n  static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)\n  { mpq_sub(q, r, s); }\n\n  static void eval(mpq_ptr q, mpq_srcptr r, mpir_ui l)\n  {\n    if (__GMPXX_CONSTANT(l) && l == 0)\n    {\n      if (q != r) mpq_set(q, r);\n    }\n    else\n    {\n      if (q == r)\n        mpz_submul_ui(mpq_numref(q), mpq_denref(q), l);\n      else\n      {\n        mpz_mul_ui(mpq_numref(q), mpq_denref(r), l);\n        mpz_sub(mpq_numref(q), mpq_numref(r), mpq_numref(q));\n        mpz_set(mpq_denref(q), mpq_denref(r));\n      }\n    }\n  }\n  static void eval(mpq_ptr q, mpir_ui l, mpq_srcptr r)\n  { eval(q, r, l); mpq_neg(q, q); }\n  static void eval(mpq_ptr q, mpq_srcptr r, mpir_si l)\n  {\n    if (l >= 0)\n      eval(q, r, static_cast<mpir_ui>(l));\n    else\n      __gmp_binary_plus::eval(q, r, static_cast<mpir_ui>(-l));\n  }\n  static void eval(mpq_ptr q, mpir_si l, mpq_srcptr r)\n  { eval(q, r, l); mpq_neg(q, q); }\n  static void eval(mpq_ptr q, mpq_srcptr r, double d)\n  {\n    mpq_t temp;\n    mpq_init(temp);\n    mpq_set_d(temp, d);\n    mpq_sub(q, r, temp);\n    mpq_clear(temp);\n  }\n  static void eval(mpq_ptr q, double d, mpq_srcptr r)\n  {\n    mpq_t temp;\n    mpq_init(temp);\n    mpq_set_d(temp, d);\n    mpq_sub(q, temp, r);\n    mpq_clear(temp);\n  }\n\n  static void eval(mpq_ptr q, mpq_srcptr r, mpz_srcptr z)\n  {\n    if (q == r)\n      mpz_submul(mpq_numref(q), mpq_denref(q), z);\n    else\n    {\n      mpz_mul(mpq_numref(q), mpq_denref(r), z);\n      mpz_sub(mpq_numref(q), mpq_numref(r), mpq_numref(q));\n      mpz_set(mpq_denref(q), mpq_denref(r));\n    }\n  }\n  static void eval(mpq_ptr q, mpz_srcptr z, mpq_srcptr r)\n  { eval(q, r, z); mpq_neg(q, q); }\n\n  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)\n  { mpf_sub(f, g, h); }\n\n  static void eval(mpf_ptr f, mpf_srcptr g, mpir_ui l)\n  { mpf_sub_ui(f, g, l); }\n  static void eval(mpf_ptr f, mpir_ui l, mpf_srcptr g)\n  { mpf_ui_sub(f, l, g); }\n  static void eval(mpf_ptr f, mpf_srcptr g, mpir_si l)\n  {\n    if (l >= 0)\n      mpf_sub_ui(f, g, l);\n    else\n      mpf_add_ui(f, g, static_cast<mpir_ui>(-l));\n  }\n  static void eval(mpf_ptr f, mpir_si l, mpf_srcptr g)\n  {\n    if (l >= 0)\n      mpf_sub_ui(f, g, l);\n    else\n      mpf_add_ui(f, g, static_cast<mpir_ui>(-l));\n    mpf_neg(f, f);\n  }\n  static void eval(mpf_ptr f, mpf_srcptr g, double d)\n  {\n    mpf_t temp;\n    mpf_init2(temp, 8*sizeof(double));\n    mpf_set_d(temp, d);\n    mpf_sub(f, g, temp);\n    mpf_clear(temp);\n  }\n  static void eval(mpf_ptr f, double d, mpf_srcptr g)\n  {\n    mpf_t temp;\n    mpf_init2(temp, 8*sizeof(double));\n    mpf_set_d(temp, d);\n    mpf_sub(f, temp, g);\n    mpf_clear(temp);\n  }\n};\n\n// defined here so it can reference __gmp_binary_minus\ninline void\n__gmp_binary_plus::eval(mpq_ptr q, mpq_srcptr r, mpir_si l)\n{\n  if (l >= 0)\n    eval(q, r, static_cast<mpir_ui>(l));\n  else\n    __gmp_binary_minus::eval(q, r, static_cast<mpir_ui>(-l));\n}\n\nstruct __gmp_binary_lshift\n{\n  static void eval(mpz_ptr z, mpz_srcptr w, mp_bitcnt_t l)\n  {\n    if (__GMPXX_CONSTANT(l) && (l == 0))\n    {\n      if (z != w) mpz_set(z, w);\n    }\n    else\n      mpz_mul_2exp(z, w, l);\n  }\n  static void eval(mpq_ptr q, mpq_srcptr r, mp_bitcnt_t l)\n  {\n    if (__GMPXX_CONSTANT(l) && (l == 0))\n    {\n      if (q != r) mpq_set(q, r);\n    }\n    else\n      mpq_mul_2exp(q, r, l);\n  }\n  static void eval(mpf_ptr f, mpf_srcptr g, mp_bitcnt_t l)\n  { mpf_mul_2exp(f, g, l); }\n};\n\nstruct __gmp_binary_rshift\n{\n  static void eval(mpz_ptr z, mpz_srcptr w, mp_bitcnt_t l)\n  {\n    if (__GMPXX_CONSTANT(l) && (l == 0))\n    {\n      if (z != w) mpz_set(z, w);\n    }\n    else\n      mpz_fdiv_q_2exp(z, w, l);\n  }\n  static void eval(mpq_ptr q, mpq_srcptr r, mp_bitcnt_t l)\n  {\n    if (__GMPXX_CONSTANT(l) && (l == 0))\n    {\n      if (q != r) mpq_set(q, r);\n    }\n    else\n      mpq_div_2exp(q, r, l);\n  }\n  static void eval(mpf_ptr f, mpf_srcptr g, mp_bitcnt_t l)\n  { mpf_div_2exp(f, g, l); }\n};\n\nstruct __gmp_binary_multiplies\n{\n  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)\n  { mpz_mul(z, w, v); }\n\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l)\n  {\n// gcc-3.3 doesn't have __builtin_ctzl. Don't bother optimizing for old gcc.\n#if __GMP_GNUC_PREREQ(3, 4)\n    if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0)\n    {\n      if (l == 0)\n      {\n        z->_mp_size = 0;\n      }\n      else\n      {\n        __gmp_binary_lshift::eval(z, w, __builtin_ctzl(l));\n      }\n    }\n    else\n#endif\n      mpz_mul_ui(z, w, l);\n  }\n  static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w)\n  { eval(z, w, l); }\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l)\n  {\n    if (__GMPXX_CONSTANT(l))\n    {\n      if (l >= 0)\n        eval(z, w, static_cast<mpir_ui>(l));\n      else\n      {\n        eval(z, w, static_cast<mpir_ui>(-l));\n\tmpz_neg(z, z);\n      }\n    }\n    else\n      mpz_mul_si (z, w, l);\n  }\n  static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w)\n  { eval(z, w, l); }\n  static void eval(mpz_ptr z, mpz_srcptr w, double d)\n  {  __GMPXX_TMPZ_D;    mpz_mul (z, w, temp); }\n  static void eval(mpz_ptr z, double d, mpz_srcptr w)\n  { eval(z, w, d); }\n\n  static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)\n  { mpq_mul(q, r, s); }\n\n  static void eval(mpq_ptr q, mpq_srcptr r, mpir_ui l)\n  {\n#if __GMP_GNUC_PREREQ(3, 4)\n    if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0)\n    {\n      if (l == 0)\n      {\n\tmpq_set_ui(q, 0, 1);\n      }\n      else\n      {\n        __gmp_binary_lshift::eval(q, r, __builtin_ctzl(l));\n      }\n    }\n    else\n#endif\n    {\n      __GMPXX_TMPQ_UI;\n      mpq_mul (q, r, temp);\n    }\n  }\n  static void eval(mpq_ptr q, mpir_ui l, mpq_srcptr r)\n  { eval(q, r, l); }\n  static void eval(mpq_ptr q, mpq_srcptr r, mpir_si l)\n  {\n    if (__GMPXX_CONSTANT(l))\n    {\n      if (l >= 0)\n        eval(q, r, static_cast<mpir_ui>(l));\n      else\n      {\n        eval(q, r, static_cast<mpir_ui>(-l));\n\tmpq_neg(q, q);\n      }\n    }\n    else\n    {\n      __GMPXX_TMPQ_SI;\n      mpq_mul (q, r, temp);\n    }\n  }\n  static void eval(mpq_ptr q, mpir_si l, mpq_srcptr r)\n  { eval(q, r, l); }\n  static void eval(mpq_ptr q, mpq_srcptr r, double d)\n  {\n    mpq_t temp;\n    mpq_init(temp);\n    mpq_set_d(temp, d);\n    mpq_mul(q, r, temp);\n    mpq_clear(temp);\n  }\n  static void eval(mpq_ptr q, double d, mpq_srcptr r)\n  { eval(q, r, d); }\n\n  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)\n  { mpf_mul(f, g, h); }\n\n  static void eval(mpf_ptr f, mpf_srcptr g, mpir_ui l)\n  { mpf_mul_ui(f, g, l); }\n  static void eval(mpf_ptr f, mpir_ui l, mpf_srcptr g)\n  { mpf_mul_ui(f, g, l); }\n  static void eval(mpf_ptr f, mpf_srcptr g, mpir_si l)\n  {\n    if (l >= 0)\n      mpf_mul_ui(f, g, l);\n    else\n      {\n\tmpf_mul_ui(f, g, static_cast<mpir_ui>(-l));\n\tmpf_neg(f, f);\n      }\n  }\n  static void eval(mpf_ptr f, mpir_si l, mpf_srcptr g)\n  { eval(f, g, l); }\n  static void eval(mpf_ptr f, mpf_srcptr g, double d)\n  {\n    mpf_t temp;\n    mpf_init2(temp, 8*sizeof(double));\n    mpf_set_d(temp, d);\n    mpf_mul(f, g, temp);\n    mpf_clear(temp);\n  }\n  static void eval(mpf_ptr f, double d, mpf_srcptr g)\n  { eval(f, g, d); }\n};\n\nstruct __gmp_binary_divides\n{\n  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)\n  { mpz_tdiv_q(z, w, v); }\n\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l)\n  {\n#if __GMP_GNUC_PREREQ(3, 4)\n    // Don't optimize division by 0...\n    if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0 && l != 0)\n    {\n      if (l == 1)\n      {\n        if (z != w) mpz_set(z, w);\n      }\n      else\n        mpz_tdiv_q_2exp(z, w, __builtin_ctzl(l));\n        // warning: do not use rshift (fdiv)\n    }\n    else\n#endif\n      mpz_tdiv_q_ui(z, w, l);\n  }\n  static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w)\n  {\n    if (mpz_sgn(w) >= 0)\n      {\n\tif (mpz_fits_ui_p(w))\n\t  mpz_set_ui(z, l / mpz_get_ui(w));\n\telse\n\t  mpz_set_ui(z, 0);\n      }\n    else\n      {\n\tmpz_neg(z, w);\n\tif (mpz_fits_ui_p(z))\n\t  {\n\t    mpz_set_ui(z, l / mpz_get_ui(z));\n\t    mpz_neg(z, z);\n\t  }\n\telse\n\t  mpz_set_ui(z, 0);\n      }\n  }\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l)\n  {\n    if (l >= 0)\n      eval(z, w, static_cast<mpir_ui>(l));\n    else\n      {\n\teval(z, w, static_cast<mpir_ui>(-l));\n\tmpz_neg(z, z);\n      }\n  }\n  static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w)\n  {\n    if (mpz_fits_si_p(w))\n      mpz_set_si(z, l / mpz_get_si(w));\n    else\n      {\n        /* if w is bigger than a long then the quotient must be zero, unless\n           l==LONG_MIN and w==-LONG_MIN in which case the quotient is -1 */\n        mpz_set_si (z, (mpz_cmpabs_ui (w, (l >= 0 ? l : -l)) == 0 ? -1 : 0));\n      }\n  }\n  static void eval(mpz_ptr z, mpz_srcptr w, double d)\n  {  __GMPXX_TMPZ_D;    mpz_tdiv_q (z, w, temp); }\n  static void eval(mpz_ptr z, double d, mpz_srcptr w)\n  {  __GMPXX_TMPZ_D;    mpz_tdiv_q (z, temp, w); }\n\n  static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)\n  { mpq_div(q, r, s); }\n\n  static void eval(mpq_ptr q, mpq_srcptr r, mpir_ui l)\n  {\n#if __GMP_GNUC_PREREQ(3, 4)\n    if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0 && l != 0)\n      __gmp_binary_rshift::eval(q, r, __builtin_ctzl(l));\n    else\n#endif\n    {\n      __GMPXX_TMPQ_UI;\n      mpq_div (q, r, temp);\n    }\n  }\n  static void eval(mpq_ptr q, mpir_ui l, mpq_srcptr r)\n  {  __GMPXX_TMPQ_UI;   mpq_div (q, temp, r); }\n  static void eval(mpq_ptr q, mpq_srcptr r, mpir_si l)\n  {\n    if (__GMPXX_CONSTANT(l))\n    {\n      if (l >= 0)\n        eval(q, r, static_cast<mpir_ui>(l));\n      else\n      {\n        eval(q, r, static_cast<mpir_ui>(-l));\n\tmpq_neg(q, q);\n      }\n    }\n    else\n    {\n      __GMPXX_TMPQ_SI;\n      mpq_div (q, r, temp);\n    }\n  }\n  static void eval(mpq_ptr q, mpir_si l, mpq_srcptr r)\n  {  __GMPXX_TMPQ_SI;   mpq_div (q, temp, r); }\n  static void eval(mpq_ptr q, mpq_srcptr r, double d)\n  {\n    mpq_t temp;\n    mpq_init(temp);\n    mpq_set_d(temp, d);\n    mpq_div(q, r, temp);\n    mpq_clear(temp);\n  }\n  static void eval(mpq_ptr q, double d, mpq_srcptr r)\n  {\n    mpq_t temp;\n    mpq_init(temp);\n    mpq_set_d(temp, d);\n    mpq_div(q, temp, r);\n    mpq_clear(temp);\n  }\n\n  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)\n  { mpf_div(f, g, h); }\n\n  static void eval(mpf_ptr f, mpf_srcptr g, mpir_ui l)\n  { mpf_div_ui(f, g, l); }\n  static void eval(mpf_ptr f, mpir_ui l, mpf_srcptr g)\n  { mpf_ui_div(f, l, g); }\n  static void eval(mpf_ptr f, mpf_srcptr g, mpir_si l)\n  {\n    if (l >= 0)\n      mpf_div_ui(f, g, l);\n    else\n      {\n\tmpf_div_ui(f, g, static_cast<mpir_ui>(-l));\n\tmpf_neg(f, f);\n      }\n  }\n  static void eval(mpf_ptr f, mpir_si l, mpf_srcptr g)\n  {\n    if (l >= 0)\n      mpf_ui_div(f, l, g);\n    else\n      {\n\tmpf_ui_div(f, static_cast<mpir_ui>(-l), g);\n\tmpf_neg(f, f);\n      }\n  }\n  static void eval(mpf_ptr f, mpf_srcptr g, double d)\n  {\n    mpf_t temp;\n    mpf_init2(temp, 8*sizeof(double));\n    mpf_set_d(temp, d);\n    mpf_div(f, g, temp);\n    mpf_clear(temp);\n  }\n  static void eval(mpf_ptr f, double d, mpf_srcptr g)\n  {\n    mpf_t temp;\n    mpf_init2(temp, 8*sizeof(double));\n    mpf_set_d(temp, d);\n    mpf_div(f, temp, g);\n    mpf_clear(temp);\n  }\n};\n\nstruct __gmp_binary_modulus\n{\n  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)\n  { mpz_tdiv_r(z, w, v); }\n\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l)\n  { mpz_tdiv_r_ui(z, w, l); }\n  static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w)\n  {\n    if (mpz_sgn(w) >= 0)\n      {\n\tif (mpz_fits_ui_p(w))\n\t  mpz_set_ui(z, l % mpz_get_ui(w));\n\telse\n\t  mpz_set_ui(z, l);\n      }\n    else\n      {\n\tmpz_neg(z, w);\n\tif (mpz_fits_ui_p(z))\n\t  mpz_set_ui(z, l % mpz_get_ui(z));\n\telse\n\t  mpz_set_ui(z, l);\n      }\n  }\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l)\n  {\n    mpz_tdiv_r_ui (z, w, (l >= 0 ? l : -l));\n  }\n  static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w)\n  {\n    if (mpz_fits_si_p(w))\n      mpz_set_si(z, l % mpz_get_si(w));\n    else\n      {\n        /* if w is bigger than a long then the remainder is l unchanged,\n           unless l==LONG_MIN and w==-LONG_MIN in which case it's 0 */\n        mpz_set_si (z, mpz_cmpabs_ui (w, (l >= 0 ? l : -l)) == 0 ? 0 : l);\n      }\n  }\n  static void eval(mpz_ptr z, mpz_srcptr w, double d)\n  {  __GMPXX_TMPZ_D;    mpz_tdiv_r (z, w, temp); }\n  static void eval(mpz_ptr z, double d, mpz_srcptr w)\n  {  __GMPXX_TMPZ_D;    mpz_tdiv_r (z, temp, w); }\n};\n\nstruct __gmp_binary_and\n{\n  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)\n  { mpz_and(z, w, v); }\n\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l)\n  {  __GMPXX_TMPZ_UI;   mpz_and (z, w, temp);  }\n  static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w)\n  { eval(z, w, l);  }\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l)\n  {  __GMPXX_TMPZ_SI;   mpz_and (z, w, temp);  }\n  static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w)\n  { eval(z, w, l);  }\n  static void eval(mpz_ptr z, mpz_srcptr w, double d)\n  {  __GMPXX_TMPZ_D;    mpz_and (z, w, temp); }\n  static void eval(mpz_ptr z, double d, mpz_srcptr w)\n  { eval(z, w, d);  }\n};\n\nstruct __gmp_binary_ior\n{\n  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)\n  { mpz_ior(z, w, v); }\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l)\n  {  __GMPXX_TMPZ_UI;   mpz_ior (z, w, temp);  }\n  static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w)\n  { eval(z, w, l);  }\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l)\n  {  __GMPXX_TMPZ_SI;   mpz_ior (z, w, temp);  }\n  static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w)\n  { eval(z, w, l);  }\n  static void eval(mpz_ptr z, mpz_srcptr w, double d)\n  {  __GMPXX_TMPZ_D;    mpz_ior (z, w, temp); }\n  static void eval(mpz_ptr z, double d, mpz_srcptr w)\n  { eval(z, w, d);  }\n};\n\nstruct __gmp_binary_xor\n{\n  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)\n  { mpz_xor(z, w, v); }\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l)\n  {  __GMPXX_TMPZ_UI;   mpz_xor (z, w, temp);  }\n  static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w)\n  { eval(z, w, l);  }\n  static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l)\n  {  __GMPXX_TMPZ_SI;   mpz_xor (z, w, temp);  }\n  static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w)\n  { eval(z, w, l);  }\n  static void eval(mpz_ptr z, mpz_srcptr w, double d)\n  {  __GMPXX_TMPZ_D;    mpz_xor (z, w, temp); }\n  static void eval(mpz_ptr z, double d, mpz_srcptr w)\n  { eval(z, w, d);  }\n};\n\nstruct __gmp_binary_equal\n{\n  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) == 0; }\n\n  static bool eval(mpz_srcptr z, mpir_ui l)\n  { return mpz_cmp_ui(z, l) == 0; }\n  static bool eval(mpir_ui l, mpz_srcptr z)\n  { return mpz_cmp_ui(z, l) == 0; }\n  static bool eval(mpz_srcptr z, mpir_si l)\n  { return mpz_cmp_si(z, l) == 0; }\n  static bool eval(mpir_si l, mpz_srcptr z)\n  { return mpz_cmp_si(z, l) == 0; }\n  static bool eval(mpz_srcptr z, double d)\n  { return mpz_cmp_d(z, d) == 0; }\n  static bool eval(double d, mpz_srcptr z)\n  { return mpz_cmp_d(z, d) == 0; }\n\n  static bool eval(mpq_srcptr q, mpq_srcptr r)\n  { return mpq_equal(q, r) != 0; }\n\n  static bool eval(mpq_srcptr q, mpir_ui l)\n  { return mpq_cmp_ui(q, l, 1) == 0; }\n  static bool eval(mpir_ui l, mpq_srcptr q)\n  { return mpq_cmp_ui(q, l, 1) == 0; }\n  static bool eval(mpq_srcptr q, mpir_si l)\n  { return mpq_cmp_si(q, l, 1) == 0; }\n  static bool eval(mpir_si l, mpq_srcptr q)\n  { return mpq_cmp_si(q, l, 1) == 0; }\n  static bool eval(mpq_srcptr q, double d)\n  {\n    bool b;\n    mpq_t temp;\n    mpq_init(temp);\n    mpq_set_d(temp, d);\n    b = (mpq_equal(q, temp) != 0);\n    mpq_clear(temp);\n    return b;\n  }\n  static bool eval(double d, mpq_srcptr q)\n  {\n    return eval(q, d);\n  }\n\n  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) == 0; }\n\n  static bool eval(mpf_srcptr f, mpir_ui l)\n  { return mpf_cmp_ui(f, l) == 0; }\n  static bool eval(mpir_ui l, mpf_srcptr f)\n  { return mpf_cmp_ui(f, l) == 0; }\n  static bool eval(mpf_srcptr f, mpir_si l)\n  { return mpf_cmp_si(f, l) == 0; }\n  static bool eval(mpir_si l, mpf_srcptr f)\n  { return mpf_cmp_si(f, l) == 0; }\n  static bool eval(mpf_srcptr f, double d)\n  { return mpf_cmp_d(f, d) == 0; }\n  static bool eval(double d, mpf_srcptr f)\n  { return mpf_cmp_d(f, d) == 0; }\n};\n\nstruct __gmp_binary_less\n{\n  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) < 0; }\n\n  static bool eval(mpz_srcptr z, mpir_ui l)\n  { return mpz_cmp_ui(z, l) < 0; }\n  static bool eval(mpir_ui l, mpz_srcptr z)\n  { return mpz_cmp_ui(z, l) > 0; }\n  static bool eval(mpz_srcptr z, mpir_si l)\n  { return mpz_cmp_si(z, l) < 0; }\n  static bool eval(mpir_si l, mpz_srcptr z)\n  { return mpz_cmp_si(z, l) > 0; }\n  static bool eval(mpz_srcptr z, double d)\n  { return mpz_cmp_d(z, d) < 0; }\n  static bool eval(double d, mpz_srcptr z)\n  { return mpz_cmp_d(z, d) > 0; }\n\n  static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) < 0; }\n\n  static bool eval(mpq_srcptr q, mpir_ui l)\n  { return mpq_cmp_ui(q, l, 1) < 0; }\n  static bool eval(mpir_ui l, mpq_srcptr q)\n  { return mpq_cmp_ui(q, l, 1) > 0; }\n  static bool eval(mpq_srcptr q, mpir_si l)\n  { return mpq_cmp_si(q, l, 1) < 0; }\n  static bool eval(mpir_si l, mpq_srcptr q)\n  { return mpq_cmp_si(q, l, 1) > 0; }\n  static bool eval(mpq_srcptr q, double d)\n  {\n    bool b;\n    mpq_t temp;\n    mpq_init(temp);\n    mpq_set_d(temp, d);\n    b = (mpq_cmp(q, temp) < 0);\n    mpq_clear(temp);\n    return b;\n  }\n  static bool eval(double d, mpq_srcptr q)\n  {\n    bool b;\n    mpq_t temp;\n    mpq_init(temp);\n    mpq_set_d(temp, d);\n    b = (mpq_cmp(temp, q) < 0);\n    mpq_clear(temp);\n    return b;\n  }\n\n  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) < 0; }\n\n  static bool eval(mpf_srcptr f, mpir_ui l)\n  { return mpf_cmp_ui(f, l) < 0; }\n  static bool eval(mpir_ui l, mpf_srcptr f)\n  { return mpf_cmp_ui(f, l) > 0; }\n  static bool eval(mpf_srcptr f, mpir_si l)\n  { return mpf_cmp_si(f, l) < 0; }\n  static bool eval(mpir_si l, mpf_srcptr f)\n  { return mpf_cmp_si(f, l) > 0; }\n  static bool eval(mpf_srcptr f, double d)\n  { return mpf_cmp_d(f, d) < 0; }\n  static bool eval(double d, mpf_srcptr f)\n  { return mpf_cmp_d(f, d) > 0; }\n};\n\nstruct __gmp_binary_greater\n{\n  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) > 0; }\n\n  static bool eval(mpz_srcptr z, mpir_ui l)\n  { return mpz_cmp_ui(z, l) > 0; }\n  static bool eval(mpir_ui l, mpz_srcptr z)\n  { return mpz_cmp_ui(z, l) < 0; }\n  static bool eval(mpz_srcptr z, mpir_si l)\n  { return mpz_cmp_si(z, l) > 0; }\n  static bool eval(mpir_si l, mpz_srcptr z)\n  { return mpz_cmp_si(z, l) < 0; }\n  static bool eval(mpz_srcptr z, double d)\n  { return mpz_cmp_d(z, d) > 0; }\n  static bool eval(double d, mpz_srcptr z)\n  { return mpz_cmp_d(z, d) < 0; }\n\n  static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) > 0; }\n\n  static bool eval(mpq_srcptr q, mpir_ui l)\n  { return mpq_cmp_ui(q, l, 1) > 0; }\n  static bool eval(mpir_ui l, mpq_srcptr q)\n  { return mpq_cmp_ui(q, l, 1) < 0; }\n  static bool eval(mpq_srcptr q, mpir_si l)\n  { return mpq_cmp_si(q, l, 1) > 0; }\n  static bool eval(mpir_si l, mpq_srcptr q)\n  { return mpq_cmp_si(q, l, 1) < 0; }\n  static bool eval(mpq_srcptr q, double d)\n  {\n    bool b;\n    mpq_t temp;\n    mpq_init(temp);\n    mpq_set_d(temp, d);\n    b = (mpq_cmp(q, temp) > 0);\n    mpq_clear(temp);\n    return b;\n  }\n  static bool eval(double d, mpq_srcptr q)\n  {\n    bool b;\n    mpq_t temp;\n    mpq_init(temp);\n    mpq_set_d(temp, d);\n    b = (mpq_cmp(temp, q) > 0);\n    mpq_clear(temp);\n    return b;\n  }\n\n  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) > 0; }\n\n  static bool eval(mpf_srcptr f, mpir_ui l)\n  { return mpf_cmp_ui(f, l) > 0; }\n  static bool eval(mpir_ui l, mpf_srcptr f)\n  { return mpf_cmp_ui(f, l) < 0; }\n  static bool eval(mpf_srcptr f, mpir_si l)\n  { return mpf_cmp_si(f, l) > 0; }\n  static bool eval(mpir_si l, mpf_srcptr f)\n  { return mpf_cmp_si(f, l) < 0; }\n  static bool eval(mpf_srcptr f, double d)\n  { return mpf_cmp_d(f, d) > 0; }\n  static bool eval(double d, mpf_srcptr f)\n  { return mpf_cmp_d(f, d) < 0; }\n};\n\nstruct __gmp_unary_increment\n{\n  static void eval(mpz_ptr z) { mpz_add_ui(z, z, 1); }\n  static void eval(mpq_ptr q)\n  { mpz_add(mpq_numref(q), mpq_numref(q), mpq_denref(q)); }\n  static void eval(mpf_ptr f) { mpf_add_ui(f, f, 1); }\n};\n\nstruct __gmp_unary_decrement\n{\n  static void eval(mpz_ptr z) { mpz_sub_ui(z, z, 1); }\n  static void eval(mpq_ptr q)\n  { mpz_sub(mpq_numref(q), mpq_numref(q), mpq_denref(q)); }\n  static void eval(mpf_ptr f) { mpf_sub_ui(f, f, 1); }\n};\n\nstruct __gmp_abs_function\n{\n  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_abs(z, w); }\n  static void eval(mpq_ptr q, mpq_srcptr r) { mpq_abs(q, r); }\n  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_abs(f, g); }\n};\n\nstruct __gmp_trunc_function\n{\n  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_trunc(f, g); }\n};\n\nstruct __gmp_floor_function\n{\n  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_floor(f, g); }\n};\n\nstruct __gmp_ceil_function\n{\n  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_ceil(f, g); }\n};\n\nstruct __gmp_sqrt_function\n{\n  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_sqrt(z, w); }\n  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_sqrt(f, g); }\n};\n\nstruct __gmp_hypot_function\n{\n  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)\n  {\n    mpf_t temp;\n    mpf_init2(temp, mpf_get_prec(f));\n    mpf_mul(temp, g, g);\n    mpf_mul(f, h, h);\n    mpf_add(f, f, temp);\n    mpf_sqrt(f, f);\n    mpf_clear(temp);\n  }\n\n  static void eval(mpf_ptr f, mpf_srcptr g, mpir_ui l)\n  {\n    mpf_t temp;\n    mpf_init2(temp, mpf_get_prec(f));\n    mpf_mul(temp, g, g);\n    mpf_set_ui(f, l);\n    mpf_mul(f, f, f);\n    mpf_add(f, f, temp);\n    mpf_sqrt(f, f);\n    mpf_clear(temp);\n  }\n  static void eval(mpf_ptr f, mpir_ui l, mpf_srcptr g)\n  { eval(f, g, l); }\n  static void eval(mpf_ptr f, mpf_srcptr g, mpir_si l)\n  {\n    mpf_t temp;\n    mpf_init2(temp, mpf_get_prec(f));\n    mpf_mul(temp, g, g);\n    mpf_set_si(f, l);\n    mpf_mul(f, f, f);\n    mpf_add(f, f, temp);\n    mpf_sqrt(f, f);\n    mpf_clear(temp);\n  }\n  static void eval(mpf_ptr f, mpir_si l, mpf_srcptr g)\n  { eval(f, g, l); }\n  static void eval(mpf_ptr f, mpf_srcptr g, double d)\n  {\n    mpf_t temp;\n    mpf_init2(temp, mpf_get_prec(f));\n    mpf_mul(temp, g, g);\n    mpf_set_d(f, d);\n    mpf_mul(f, f, f);\n    mpf_add(f, f, temp);\n    mpf_sqrt(f, f);\n    mpf_clear(temp);\n  }\n  static void eval(mpf_ptr f, double d, mpf_srcptr g)\n  { eval(f, g, d); }\n};\n\nstruct __gmp_sgn_function\n{\n  static int eval(mpz_srcptr z) { return mpz_sgn(z); }\n  static int eval(mpq_srcptr q) { return mpq_sgn(q); }\n  static int eval(mpf_srcptr f) { return mpf_sgn(f); }\n};\n\nstruct __gmp_gcd_function\n{\n    static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)\n    {\n        mpz_gcd(z, w, v);\n    }\n    static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l)\n    {\n        mpz_gcd_ui(z, w, l);\n    }\n    static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w)\n    {\n        eval(z, w, l);\n    }\n    static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l)\n    {\n        eval(z, w, __gmpxx_abs_ui(l));\n    }\n    static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w)\n    {\n        eval(z, w, l);\n    }\n    static void eval(mpz_ptr z, mpz_srcptr w, double d)\n    {\n        __GMPXX_TMPZ_D;    mpz_gcd(z, w, temp);\n    }\n    static void eval(mpz_ptr z, double d, mpz_srcptr w)\n    {\n        eval(z, w, d);\n    }\n};\n\nstruct __gmp_lcm_function\n{\n    static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)\n    {\n        mpz_lcm(z, w, v);\n    }\n    static void eval(mpz_ptr z, mpz_srcptr w, mpir_ui l)\n    {\n        mpz_lcm_ui(z, w, l);\n    }\n    static void eval(mpz_ptr z, mpir_ui l, mpz_srcptr w)\n    {\n        eval(z, w, l);\n    }\n    static void eval(mpz_ptr z, mpz_srcptr w, mpir_si l)\n    {\n        eval(z, w, __gmpxx_abs_ui(l));\n    }\n    static void eval(mpz_ptr z, mpir_si l, mpz_srcptr w)\n    {\n        eval(z, w, l);\n    }\n    static void eval(mpz_ptr z, mpz_srcptr w, double d)\n    {\n        __GMPXX_TMPZ_D;    mpz_lcm(z, w, temp);\n    }\n    static void eval(mpz_ptr z, double d, mpz_srcptr w)\n    {\n        eval(z, w, d);\n    }\n};\n\nstruct __gmp_cmp_function\n{\n  static int eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w); }\n\n  static int eval(mpz_srcptr z, mpir_ui l)\n  { return mpz_cmp_ui(z, l); }\n  static int eval(mpir_ui l, mpz_srcptr z)\n  { return -mpz_cmp_ui(z, l); }\n  static int eval(mpz_srcptr z, mpir_si l)\n  { return mpz_cmp_si(z, l); }\n  static int eval(mpir_si l, mpz_srcptr z)\n  { return -mpz_cmp_si(z, l); }\n  static int eval(mpz_srcptr z, double d)\n  { return mpz_cmp_d(z, d); }\n  static int eval(double d, mpz_srcptr z)\n  { return -mpz_cmp_d(z, d); }\n\n  static int eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r); }\n\n  static int eval(mpq_srcptr q, mpir_ui l)\n  { return mpq_cmp_ui(q, l, 1); }\n  static int eval(mpir_ui l, mpq_srcptr q)\n  { return -mpq_cmp_ui(q, l, 1); }\n  static int eval(mpq_srcptr q, mpir_si l)\n  { return mpq_cmp_si(q, l, 1); }\n  static int eval(mpir_si l, mpq_srcptr q)\n  { return -mpq_cmp_si(q, l, 1); }\n  static int eval(mpq_srcptr q, double d)\n  {\n    int i;\n    mpq_t temp;\n    mpq_init(temp);\n    mpq_set_d(temp, d);\n    i = mpq_cmp(q, temp);\n    mpq_clear(temp);\n    return i;\n  }\n  static int eval(double d, mpq_srcptr q)\n  {\n    int i;\n    mpq_t temp;\n    mpq_init(temp);\n    mpq_set_d(temp, d);\n    i = mpq_cmp(temp, q);\n    mpq_clear(temp);\n    return i;\n  }\n\n  static int eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g); }\n\n  static int eval(mpf_srcptr f, mpir_ui l)\n  { return mpf_cmp_ui(f, l); }\n  static int eval(mpir_ui l, mpf_srcptr f)\n  { return -mpf_cmp_ui(f, l); }\n  static int eval(mpf_srcptr f, mpir_si l)\n  { return mpf_cmp_si(f, l); }\n  static int eval(mpir_si l, mpf_srcptr f)\n  { return -mpf_cmp_si(f, l); }\n  static int eval(mpf_srcptr f, double d)\n  { return mpf_cmp_d(f, d); }\n  static int eval(double d, mpf_srcptr f)\n  { return -mpf_cmp_d(f, d); }\n};\n\nstruct __gmp_rand_function\n{\n  static void eval(mpz_ptr z, gmp_randstate_t s, mp_bitcnt_t l)\n  { mpz_urandomb(z, s, l); }\n  static void eval(mpz_ptr z, gmp_randstate_t s, mpz_srcptr w)\n  { mpz_urandomm(z, s, w); }\n  static void eval(mpf_ptr f, gmp_randstate_t s, mp_bitcnt_t prec)\n  { mpf_urandomb(f, s, prec); }\n};\n\n\n/**************** Auxiliary classes ****************/\n\n/* this is much the same as gmp_allocated_string in gmp-impl.h\n   since gmp-impl.h is not publicly available, I redefine it here\n   I use a different name to avoid possible clashes */\n\nextern \"C\" {\n  typedef void (*__gmp_freefunc_t) (void *, size_t);\n}\nstruct __gmp_alloc_cstring\n{\n  char *str;\n  __gmp_alloc_cstring(char *s) { str = s; }\n  ~__gmp_alloc_cstring()\n  {\n    __gmp_freefunc_t freefunc;\n    mp_get_memory_functions (NULL, NULL, &freefunc);\n    (*freefunc) (str, std::strlen(str)+1);\n  }\n};\n\n\n// general expression template class\ntemplate <class T, class U>\nclass __gmp_expr;\n\n\n// templates for resolving expression types\ntemplate <class T>\nstruct __gmp_resolve_ref\n{\n  typedef T ref_type;\n};\n\ntemplate <class T, class U>\nstruct __gmp_resolve_ref<__gmp_expr<T, U> >\n{\n  typedef const __gmp_expr<T, U> & ref_type;\n};\n\n\ntemplate <class T, class U = T>\nstruct __gmp_resolve_expr;\n\ntemplate <>\nstruct __gmp_resolve_expr<mpz_t>\n{\n  typedef mpz_t value_type;\n  typedef mpz_ptr ptr_type;\n  typedef mpz_srcptr srcptr_type;\n};\n\ntemplate <>\nstruct __gmp_resolve_expr<mpq_t>\n{\n  typedef mpq_t value_type;\n  typedef mpq_ptr ptr_type;\n  typedef mpq_srcptr srcptr_type;\n};\n\ntemplate <>\nstruct __gmp_resolve_expr<mpf_t>\n{\n  typedef mpf_t value_type;\n  typedef mpf_ptr ptr_type;\n  typedef mpf_srcptr srcptr_type;\n};\n\ntemplate <>\nstruct __gmp_resolve_expr<mpz_t, mpq_t>\n{\n  typedef mpq_t value_type;\n};\n\ntemplate <>\nstruct __gmp_resolve_expr<mpq_t, mpz_t>\n{\n  typedef mpq_t value_type;\n};\n\ntemplate <>\nstruct __gmp_resolve_expr<mpz_t, mpf_t>\n{\n  typedef mpf_t value_type;\n};\n\ntemplate <>\nstruct __gmp_resolve_expr<mpf_t, mpz_t>\n{\n  typedef mpf_t value_type;\n};\n\ntemplate <>\nstruct __gmp_resolve_expr<mpq_t, mpf_t>\n{\n  typedef mpf_t value_type;\n};\n\ntemplate <>\nstruct __gmp_resolve_expr<mpf_t, mpq_t>\n{\n  typedef mpf_t value_type;\n};\n\n#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 )\nnamespace std {\n  template <class T, class U, class V, class W>\n  struct common_type <__gmp_expr<T, U>, __gmp_expr<V, W> >\n  {\n  private:\n    typedef typename __gmp_resolve_expr<T, V>::value_type X;\n  public:\n    typedef __gmp_expr<X, X> type;\n  };\n\n  template <class T, class U>\n  struct common_type <__gmp_expr<T, U>, __gmp_expr<T, U> >\n  {\n    typedef __gmp_expr<T, U> type;\n  };\n\n#define __GMPXX_DECLARE_COMMON_TYPE(typ)\t\\\n  template <class T, class U>\t\t\t\\\n  struct common_type <__gmp_expr<T, U>, typ >\t\\\n  {\t\t\t\t\t\t\\\n    typedef __gmp_expr<T, T> type;\t\t\\\n  };\t\t\t\t\t\t\\\n\t\t\t\t\t\t\\\n  template <class T, class U>\t\t\t\\\n  struct common_type <typ, __gmp_expr<T, U> >\t\\\n  {\t\t\t\t\t\t\\\n    typedef __gmp_expr<T, T> type;\t\t\\\n  }\n\n  __GMPXX_DECLARE_COMMON_TYPE(signed char);\n  __GMPXX_DECLARE_COMMON_TYPE(unsigned char);\n  __GMPXX_DECLARE_COMMON_TYPE(signed int);\n  __GMPXX_DECLARE_COMMON_TYPE(unsigned int);\n  __GMPXX_DECLARE_COMMON_TYPE(signed short int);\n  __GMPXX_DECLARE_COMMON_TYPE(unsigned short int);\n  __GMPXX_DECLARE_COMMON_TYPE(signed long int);\n  __GMPXX_DECLARE_COMMON_TYPE(unsigned long int);\n  __GMPXX_DECLARE_COMMON_TYPE(float);\n  __GMPXX_DECLARE_COMMON_TYPE(double);\n#undef __GMPXX_DECLARE_COMMON_TYPE\n}\n#endif\n\n// classes for evaluating unary and binary expressions\ntemplate <class T, class Op>\nstruct __gmp_unary_expr\n{\n  const T &val;\n\n  __gmp_unary_expr(const T &v) : val(v) { }\nprivate:\n  __gmp_unary_expr();\n};\n\ntemplate <class T, class U, class Op>\nstruct __gmp_binary_expr\n{\n  typename __gmp_resolve_ref<T>::ref_type val1;\n  typename __gmp_resolve_ref<U>::ref_type val2;\n\n  __gmp_binary_expr(const T &v1, const U &v2) : val1(v1), val2(v2) { }\nprivate:\n  __gmp_binary_expr();\n};\n\n\n\n/**************** Macros for in-class declarations ****************/\n/* This is just repetitive code that is easier to maintain if it's written\n   only once */\n\n#define __GMPP_DECLARE_COMPOUND_OPERATOR(fun)                         \\\n  template <class T, class U>                                         \\\n  __gmp_expr<value_type, value_type> & fun(const __gmp_expr<T, U> &);\n#ifdef MPIRXX_HAVE_LLONG \n#define __GMPN_DECLARE_COMPOUND_OPERATOR(fun) \\\n  __gmp_expr & fun(signed char);              \\\n  __gmp_expr & fun(unsigned char);            \\\n  __gmp_expr & fun(signed int);               \\\n  __gmp_expr & fun(unsigned int);             \\\n  __gmp_expr & fun(signed short int);         \\\n  __gmp_expr & fun(unsigned short int);       \\\n  __gmp_expr & fun(signed long int);          \\\n  __gmp_expr & fun(unsigned long int);        \\\n  __gmp_expr & fun(signed long long int);     \\\n  __gmp_expr & fun(unsigned long long int);   \\\n  __gmp_expr & fun(float);                    \\\n  __gmp_expr & fun(double);                   \\\n  __gmp_expr & fun(long double);\n#else\n#define __GMPN_DECLARE_COMPOUND_OPERATOR(fun) \\\n  __gmp_expr & fun(signed char);              \\\n  __gmp_expr & fun(unsigned char);            \\\n  __gmp_expr & fun(signed int);               \\\n  __gmp_expr & fun(unsigned int);             \\\n  __gmp_expr & fun(signed short int);         \\\n  __gmp_expr & fun(unsigned short int);       \\\n  __gmp_expr & fun(signed long int);          \\\n  __gmp_expr & fun(unsigned long int);        \\\n  __gmp_expr & fun(float);                    \\\n  __gmp_expr & fun(double);                   \\\n  __gmp_expr & fun(long double);\n#endif\n\n#define __GMP_DECLARE_COMPOUND_OPERATOR(fun) \\\n__GMPP_DECLARE_COMPOUND_OPERATOR(fun)        \\\n__GMPN_DECLARE_COMPOUND_OPERATOR(fun)\n\n#define __GMP_DECLARE_COMPOUND_OPERATOR_UI(fun) \\\n  __gmp_expr & fun(mp_bitcnt_t);\n\n#define __GMP_DECLARE_INCREMENT_OPERATOR(fun) \\\n  inline __gmp_expr & fun();                  \\\n  inline __gmp_expr fun(int);\n\n\n/**************** mpz_class -- wrapper for mpz_t ****************/\n\ntemplate <>\nclass __gmp_expr<mpz_t, mpz_t>\n{\nprivate:\n  typedef mpz_t value_type;\n  value_type mp;\npublic:\n  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }\n\n  // constructors and destructor\n  __gmp_expr() { mpz_init(mp); }\n\n  __gmp_expr(const __gmp_expr &z) { mpz_init_set(mp, z.mp); }\n#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 )\n  __gmp_expr(__gmp_expr &&z)\n  { *mp = *z.mp; mpz_init(z.mp); }\n#endif\n  template <class T>\n  __gmp_expr(const __gmp_expr<mpz_t, T> &expr)\n  { mpz_init(mp); __gmp_set_expr(mp, expr); }\n  template <class T, class U>\n  explicit __gmp_expr(const __gmp_expr<T, U> &expr)\n  { mpz_init(mp); __gmp_set_expr(mp, expr); }\n\n  __gmp_expr(signed char c) { mpz_init_set_si(mp, c); }\n  __gmp_expr(unsigned char c) { mpz_init_set_ui(mp, c); }\n\n  __gmp_expr(signed int i) { mpz_init_set_si(mp, i); }\n  __gmp_expr(unsigned int i) { mpz_init_set_ui(mp, i); }\n\n  __gmp_expr(signed short int s) { mpz_init_set_si(mp, s); }\n  __gmp_expr(unsigned short int s) { mpz_init_set_ui(mp, s); }\n\n  __gmp_expr(signed long int l) { mpz_init_set_si(mp, l); }\n  __gmp_expr(unsigned long int l) { mpz_init_set_ui(mp, l); }\n\n#ifdef MPIRXX_HAVE_LLONG\n  __gmp_expr(signed long long int l) { mpz_init_set_si(mp, l); }\n  __gmp_expr(unsigned long long int  l) { mpz_init_set_ui(mp, l); }\n#endif\n\n#ifdef MPIRXX_INTMAX_T\n  __gmp_expr(intmax_t l) { mpz_init_set_sx(mp, l); }\n#endif\n\n#ifdef MPIRXX_UINTMAX_T\n  __gmp_expr(uintmax_t l) { mpz_init_set_ux(mp, l); }\n#endif\n\n  __gmp_expr(float f) { mpz_init_set_d(mp, f); }\n  __gmp_expr(double d) { mpz_init_set_d(mp, d); }\n  // __gmp_expr(long double ld) { mpz_init_set_d(mp, ld); }\n\n  explicit __gmp_expr(const char *s, int base = 0)\n  {\n    if (mpz_init_set_str (mp, s, base) != 0)\n      {\n        mpz_clear (mp);\n        throw std::invalid_argument (\"mpz_set_str\");\n      }\n  }\n  explicit __gmp_expr(const std::string &s, int base = 0)\n  {\n    if (mpz_init_set_str(mp, s.c_str(), base) != 0)\n      {\n        mpz_clear (mp);\n        throw std::invalid_argument (\"mpz_set_str\");\n      }\n  }\n\n  explicit __gmp_expr(mpz_srcptr z) { mpz_init_set(mp, z); }\n\n  ~__gmp_expr() { mpz_clear(mp); }\n\n  void swap(__gmp_expr& z) __GMPXX_NOEXCEPT { std::swap(*mp, *z.mp); }\n\n  // assignment operators\n  __gmp_expr & operator=(const __gmp_expr &z)\n  { mpz_set(mp, z.mp); return *this; }\n#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 )\n  __gmp_expr & operator=(__gmp_expr &&z) __GMPXX_NOEXCEPT\n  { swap(z); return *this; }\n#endif\n  template <class T, class U>\n  __gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)\n  { __gmp_set_expr(mp, expr); return *this; }\n\n__gmp_expr & operator=(signed char c) { mpz_set_si(mp, c); return *this; }\n__gmp_expr & operator=(unsigned char c) { mpz_set_ui(mp, c); return *this; }\n\n__gmp_expr & operator=(signed int i) { mpz_set_si(mp, i); return *this; }\n__gmp_expr & operator=(unsigned int i) { mpz_set_ui(mp, i); return *this; }\n\n  __gmp_expr & operator=(signed short int s)\n  { mpz_set_si(mp, s); return *this; }\n  __gmp_expr & operator=(unsigned short int s)\n  { mpz_set_ui(mp, s); return *this; }\n\n  __gmp_expr & operator=(signed long int l)\n  { mpz_set_si(mp, l); return *this; }\n  __gmp_expr & operator=(unsigned long int l)\n  { mpz_set_ui(mp, l); return *this; }\n\n#ifdef MPIRXX_HAVE_LLONG\n  __gmp_expr & operator=(signed long long int i) { mpz_set_si(mp, i); return *this; }\n  __gmp_expr & operator=(unsigned long long int i) { mpz_set_ui(mp, i); return *this; }\n#endif\n\n#ifdef MPIRXX_INTMAX_T\n  __gmp_expr & operator=(intmax_t i) { mpz_set_sx(mp, i); return *this; }\n#endif\n\n#ifdef MPIRXX_UINTMAX_T\n  __gmp_expr & operator=(uintmax_t i) { mpz_set_ux(mp, i); return *this; }\n#endif\n\n  __gmp_expr & operator=(float f) { mpz_set_d(mp, f); return *this; }\n  __gmp_expr & operator=(double d) { mpz_set_d(mp, d); return *this; }\n  // __gmp_expr & operator=(long double ld)\n  // { mpz_set_ld(mp, ld); return *this; }\n\n  __gmp_expr & operator=(const char *s)\n  {\n    if (mpz_set_str (mp, s, 0) != 0)\n      throw std::invalid_argument (\"mpz_set_str\");\n    return *this;\n  }\n  __gmp_expr & operator=(const std::string &s)\n  {\n    if (mpz_set_str(mp, s.c_str(), 0) != 0)\n      throw std::invalid_argument (\"mpz_set_str\");\n    return *this;\n  }\n\n  // string input/output functions\n  int set_str(const char *s, int base)\n  { return mpz_set_str(mp, s, base); }\n  int set_str(const std::string &s, int base)\n  { return mpz_set_str(mp, s.c_str(), base); }\n  std::string get_str(int base = 10) const\n  {\n    __gmp_alloc_cstring temp(mpz_get_str(0, base, mp));\n    return std::string(temp.str);\n  }\n\n  // conversion functions\n  mpz_srcptr __get_mp() const { return mp; }\n  mpz_ptr __get_mp() { return mp; }\n  mpz_srcptr get_mpz_t() const { return mp; }\n  mpz_ptr get_mpz_t() { return mp; }\n\n  mpir_si get_si() const { return mpz_get_si(mp); }\n  mpir_ui get_ui() const { return mpz_get_ui(mp); }\n\n#ifdef MPIRXX_INTMAX_T\n  intmax_t get_sx() const { return mpz_get_sx(mp); }\n#endif\n#ifdef MPIRXX_UINTMAX_T\n  uintmax_t get_ux() const { return mpz_get_ux(mp); }\n#endif\n\n  double get_d() const { return mpz_get_d(mp); }\n\n  // bool fits_schar_p() const { return mpz_fits_schar_p(mp); }\n  // bool fits_uchar_p() const { return mpz_fits_uchar_p(mp); }\n  bool fits_sint_p() const { return mpz_fits_sint_p(mp) != 0; }\n  bool fits_uint_p() const { return mpz_fits_uint_p(mp) != 0; }\n  bool fits_si_p() const { return mpz_fits_si_p(mp) != 0; }\n  bool fits_ui_p() const { return mpz_fits_ui_p(mp) != 0; }\n  bool fits_sshort_p() const { return mpz_fits_sshort_p(mp) != 0; }\n  bool fits_ushort_p() const { return mpz_fits_ushort_p(mp) != 0; }\n  bool fits_slong_p() const { return mpz_fits_slong_p(mp) != 0; }\n  bool fits_ulong_p() const { return mpz_fits_ulong_p(mp) != 0; }\n  // bool fits_float_p() const { return mpz_fits_float_p(mp) != 0; }\n  // bool fits_double_p() const { return mpz_fits_double_p(mp) != 0; }\n  // bool fits_ldouble_p() const { return mpz_fits_ldouble_p(mp) != 0; }\n\n#if __GMPXX_USE_CXX11\n  explicit operator bool() const { return mp->_mp_size != 0; }\n#endif\n\n  // member operators\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator+=)\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator-=)\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator*=)\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator/=)\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator%=)\n\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator&=)\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator|=)\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator^=)\n\n  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator<<=)\n  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator>>=)\n\n  __GMP_DECLARE_INCREMENT_OPERATOR(operator++)\n  __GMP_DECLARE_INCREMENT_OPERATOR(operator--)\n};\n\ntypedef __gmp_expr<mpz_t, mpz_t> mpz_class;\n\n\n/**************** mpq_class -- wrapper for mpq_t ****************/\n\ntemplate <>\nclass __gmp_expr<mpq_t, mpq_t>\n{\nprivate:\n  typedef mpq_t value_type;\n  value_type mp;\npublic:\n  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }\n  void canonicalize() { mpq_canonicalize(mp); }\n\n  // constructors and destructor\n  __gmp_expr() { mpq_init(mp); }\n\n  __gmp_expr(const __gmp_expr &q)\n  {\n    mpz_init_set(mpq_numref(mp), mpq_numref(q.mp));\n    mpz_init_set(mpq_denref(mp), mpq_denref(q.mp));\n  }\n#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 )\n  __gmp_expr(__gmp_expr &&q)\n  { *mp = *q.mp; mpq_init(q.mp); }\n#endif\n  template <class T>\n  __gmp_expr(const __gmp_expr<mpz_t, T> &expr)\n  { mpq_init(mp); __gmp_set_expr(mp, expr); }\n  template <class T>\n  __gmp_expr(const __gmp_expr<mpq_t, T> &expr)\n  { mpq_init(mp); __gmp_set_expr(mp, expr); }\n  template <class T, class U>\n  explicit __gmp_expr(const __gmp_expr<T, U> &expr)\n  { mpq_init(mp); __gmp_set_expr(mp, expr); }\n\n  __gmp_expr(signed char c) { mpq_init(mp); mpq_set_si(mp, c, 1); }\n  __gmp_expr(unsigned char c) { mpq_init(mp); mpq_set_ui(mp, c, 1); }\n\n  __gmp_expr(signed int i) { mpq_init(mp); mpq_set_si(mp, i, 1); }\n  __gmp_expr(unsigned int i) { mpq_init(mp); mpq_set_ui(mp, i, 1); }\n\n  __gmp_expr(signed short int s) { mpq_init(mp); mpq_set_si(mp, s, 1); }\n  __gmp_expr(unsigned short int s) { mpq_init(mp); mpq_set_ui(mp, s, 1); }\n\n  __gmp_expr(signed long int l) { mpq_init(mp); mpq_set_si(mp, l, 1); }\n  __gmp_expr(unsigned long int l) { mpq_init(mp); mpq_set_ui(mp, l, 1); }\n\n#ifdef MPIRXX_HAVE_LLONG\n  __gmp_expr(signed long long int l) { mpq_init(mp); mpq_set_si(mp, l, 1); }\n  __gmp_expr(unsigned long long int l) { mpq_init(mp); mpq_set_ui(mp, l, 1); }\n#endif\n\n  __gmp_expr(float f) { mpq_init(mp); mpq_set_d(mp, f); }\n  __gmp_expr(double d) { mpq_init(mp); mpq_set_d(mp, d); }\n  // __gmp_expr(long double ld) { mpq_init(mp); mpq_set_ld(mp, ld); }\n\n  explicit __gmp_expr(const char *s, int base = 0)\n  {\n    mpq_init (mp);\n    // If s is the literal 0, we meant to call another constructor.\n    // If s just happens to evaluate to 0, we would crash, so whatever.\n    if (s == 0)\n      {\n\t// Don't turn mpq_class(0,0) into 0\n\tmpz_set_si(mpq_denref(mp), base);\n      }\n    else if (mpq_set_str(mp, s, base) != 0)\n      {\n        mpq_clear (mp);\n        throw std::invalid_argument (\"mpq_set_str\");\n      }\n  }\n  explicit __gmp_expr(const std::string &s, int base = 0)\n  {\n    mpq_init(mp);\n    if (mpq_set_str (mp, s.c_str(), base) != 0)\n      {\n        mpq_clear (mp);\n        throw std::invalid_argument (\"mpq_set_str\");\n      }\n  }\n  explicit __gmp_expr(mpq_srcptr q)\n  {\n    mpz_init_set(mpq_numref(mp), mpq_numref(q));\n    mpz_init_set(mpq_denref(mp), mpq_denref(q));\n  }\n\n  __gmp_expr(const mpz_class &num, const mpz_class &den)\n  {\n    mpz_init_set(mpq_numref(mp), num.get_mpz_t());\n    mpz_init_set(mpq_denref(mp), den.get_mpz_t());\n  }\n\n  ~__gmp_expr() { mpq_clear(mp); }\n\n  void swap(__gmp_expr& q) __GMPXX_NOEXCEPT { std::swap(*mp, *q.mp); }\n\n  // assignment operators\n  __gmp_expr & operator=(const __gmp_expr &q)\n  { mpq_set(mp, q.mp); return *this; }\n#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 )\n  __gmp_expr & operator=(__gmp_expr &&q) __GMPXX_NOEXCEPT\n  { swap(q); return *this; }\n  __gmp_expr & operator=(mpz_class &&z)__GMPXX_NOEXCEPT\n  { get_num() = std::move(z); get_den() = 1u; return *this; }\n#endif\n  template <class T, class U>\n  __gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)\n  { __gmp_set_expr(mp, expr); return *this; }\n\n  __gmp_expr & operator=(signed char c)\n  { mpq_set_si(mp, c, 1); return *this; }\n  __gmp_expr & operator=(unsigned char c)\n  { mpq_set_ui(mp, c, 1); return *this; }\n\n  __gmp_expr & operator=(signed int i) { mpq_set_si(mp, i, 1); return *this; }\n  __gmp_expr & operator=(unsigned int i)\n  { mpq_set_ui(mp, i, 1); return *this; }\n\n  __gmp_expr & operator=(signed short int s)\n  { mpq_set_si(mp, s, 1); return *this; }\n  __gmp_expr & operator=(unsigned short int s)\n  { mpq_set_ui(mp, s, 1); return *this; }\n\n  __gmp_expr & operator=(signed long int l)\n  { mpq_set_si(mp, l, 1); return *this; }\n  __gmp_expr & operator=(unsigned long int l)\n  { mpq_set_ui(mp, l, 1); return *this; }\n\n#ifdef MPIRXX_HAVE_LLONG\n  __gmp_expr & operator=(signed long long int l)\n  { mpq_set_si(mp, l, 1); return *this; }\n  __gmp_expr & operator=(unsigned long long int l)\n  { mpq_set_ui(mp, l, 1); return *this; }\n#endif\n\n  __gmp_expr & operator=(float f) { mpq_set_d(mp, f); return *this; }\n  __gmp_expr & operator=(double d) { mpq_set_d(mp, d); return *this; }\n  // __gmp_expr & operator=(long double ld)\n  // { mpq_set_ld(mp, ld); return *this; }\n\n  __gmp_expr & operator=(const char *s)\n  {\n    if (mpq_set_str (mp, s, 0) != 0)\n      throw std::invalid_argument (\"mpq_set_str\");\n    return *this;\n  }\n  __gmp_expr & operator=(const std::string &s)\n  {\n    if (mpq_set_str(mp, s.c_str(), 0) != 0)\n      throw std::invalid_argument (\"mpq_set_str\");\n    return *this;\n  }\n\n  // string input/output functions\n  int set_str(const char *s, int base)\n  { return mpq_set_str(mp, s, base); }\n  int set_str(const std::string &s, int base)\n  { return mpq_set_str(mp, s.c_str(), base); }\n  std::string get_str(int base = 10) const\n  {\n    __gmp_alloc_cstring temp(mpq_get_str(0, base, mp));\n    return std::string(temp.str);\n  }\n\n  // conversion functions\n\n  // casting a reference to an mpz_t to mpz_class & is a dirty hack,\n  // but works because the internal representation of mpz_class is\n  // exactly an mpz_t\n  const mpz_class & get_num() const\n  { return reinterpret_cast<const mpz_class &>(*mpq_numref(mp)); }\n  mpz_class & get_num()\n  { return reinterpret_cast<mpz_class &>(*mpq_numref(mp)); }\n  const mpz_class & get_den() const\n  { return reinterpret_cast<const mpz_class &>(*mpq_denref(mp)); }\n  mpz_class & get_den()\n  { return reinterpret_cast<mpz_class &>(*mpq_denref(mp)); }\n\n  mpq_srcptr __get_mp() const { return mp; }\n  mpq_ptr __get_mp() { return mp; }\n  mpq_srcptr get_mpq_t() const { return mp; }\n  mpq_ptr get_mpq_t() { return mp; }\n\n  mpz_srcptr get_num_mpz_t() const { return mpq_numref(mp); }\n  mpz_ptr get_num_mpz_t() { return mpq_numref(mp); }\n  mpz_srcptr get_den_mpz_t() const { return mpq_denref(mp); }\n  mpz_ptr get_den_mpz_t() { return mpq_denref(mp); }\n\n  double get_d() const { return mpq_get_d(mp); }\n\n#if __GMPXX_USE_CXX11\n  explicit operator bool() const { return mpq_numref(mp)->_mp_size != 0; }\n#endif\n\n  // compound assignments\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator+=)\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator-=)\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator*=)\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator/=)\n\n  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator<<=)\n  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator>>=)\n\n  __GMP_DECLARE_INCREMENT_OPERATOR(operator++)\n  __GMP_DECLARE_INCREMENT_OPERATOR(operator--)\n};\n\ntypedef __gmp_expr<mpq_t, mpq_t> mpq_class;\n\n\n/**************** mpf_class -- wrapper for mpf_t ****************/\n\ntemplate <>\nclass __gmp_expr<mpf_t, mpf_t>\n{\nprivate:\n  typedef mpf_t value_type;\n  value_type mp;\npublic:\n  mp_bitcnt_t get_prec() const { return mpf_get_prec(mp); }\n\n  void set_prec(mp_bitcnt_t prec) { mpf_set_prec(mp, prec); }\n  void set_prec_raw(mp_bitcnt_t prec) { mpf_set_prec_raw(mp, prec); }\n\n  // constructors and destructor\n  __gmp_expr() { mpf_init(mp); }\n\n  __gmp_expr(const __gmp_expr &f)\n  { mpf_init2(mp, f.get_prec()); mpf_set(mp, f.mp); }\n#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 )\n  __gmp_expr(__gmp_expr &&f)\n  { *mp = *f.mp; mpf_init2(f.mp, get_prec()); }\n#endif\n  __gmp_expr(const __gmp_expr &f, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set(mp, f.mp); }\n  template <class T, class U>\n  __gmp_expr(const __gmp_expr<T, U> &expr)\n  { mpf_init2(mp, expr.get_prec()); __gmp_set_expr(mp, expr); }\n  template <class T, class U>\n  __gmp_expr(const __gmp_expr<T, U> &expr, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); __gmp_set_expr(mp, expr); }\n\n  __gmp_expr(signed char c) { mpf_init_set_si(mp, c); }\n  __gmp_expr(signed char c, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set_si(mp, c); }\n  __gmp_expr(unsigned char c) { mpf_init_set_ui(mp, c); }\n  __gmp_expr(unsigned char c, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set_ui(mp, c); }\n\n  __gmp_expr(signed int i) { mpf_init_set_si(mp, i); }\n  __gmp_expr(signed int i, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set_si(mp, i); }\n  __gmp_expr(unsigned int i) { mpf_init_set_ui(mp, i); }\n  __gmp_expr(unsigned int i, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set_ui(mp, i); }\n\n  __gmp_expr(signed short int s) { mpf_init_set_si(mp, s); }\n  __gmp_expr(signed short int s, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set_si(mp, s); }\n  __gmp_expr(unsigned short int s) { mpf_init_set_ui(mp, s); }\n  __gmp_expr(unsigned short int s, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set_ui(mp, s); }\n\n  __gmp_expr(signed long int l) { mpf_init_set_si(mp, l); }\n  __gmp_expr(signed long int l, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set_si(mp, l); }\n  __gmp_expr(unsigned long int l) { mpf_init_set_ui(mp, l); }\n  __gmp_expr(unsigned long int l, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set_ui(mp, l); }\n#ifdef MPIRXX_HAVE_LLONG\n  __gmp_expr(signed long long int s) { mpf_init_set_si(mp, s); }\n  __gmp_expr(signed long long int s, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set_si(mp, s); }\n  __gmp_expr(unsigned long long int s) { mpf_init_set_ui(mp, s); }\n  __gmp_expr(unsigned long long int s, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set_ui(mp, s); }\n#endif\n\n  __gmp_expr(float f) { mpf_init_set_d(mp, f); }\n  __gmp_expr(float f, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set_d(mp, f); }\n  __gmp_expr(double d) { mpf_init_set_d(mp, d); }\n  __gmp_expr(double d, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set_d(mp, d); }\n  // __gmp_expr(long double ld) { mpf_init_set_d(mp, ld); }\n  // __gmp_expr(long double ld, mp_bitcnt_t prec)\n  // { mpf_init2(mp, prec); mpf_set_d(mp, ld); }\n\n  explicit __gmp_expr(const char *s)\n  {\n    if (mpf_init_set_str (mp, s, 0) != 0)\n      {\n        mpf_clear (mp);\n        throw std::invalid_argument (\"mpf_set_str\");\n      }\n  }\n  __gmp_expr(const char *s, mp_bitcnt_t prec, int base = 0)\n  {\n    mpf_init2(mp, prec);\n    if (mpf_set_str(mp, s, base) != 0)\n      {\n        mpf_clear (mp);\n        throw std::invalid_argument (\"mpf_set_str\");\n      }\n  }\n  explicit __gmp_expr(const std::string &s)\n  {\n    if (mpf_init_set_str(mp, s.c_str(), 0) != 0)\n      {\n        mpf_clear (mp);\n        throw std::invalid_argument (\"mpf_set_str\");\n      }\n  }\n  __gmp_expr(const std::string &s, mp_bitcnt_t prec, int base = 0)\n  {\n    mpf_init2(mp, prec);\n    if (mpf_set_str(mp, s.c_str(), base) != 0)\n      {\n        mpf_clear (mp);\n        throw std::invalid_argument (\"mpf_set_str\");\n      }\n  }\n\n  explicit __gmp_expr(mpf_srcptr f)\n  { mpf_init2(mp, mpf_get_prec(f)); mpf_set(mp, f); }\n  __gmp_expr(mpf_srcptr f, mp_bitcnt_t prec)\n  { mpf_init2(mp, prec); mpf_set(mp, f); }\n\n  ~__gmp_expr() { mpf_clear(mp); }\n\n  void swap(__gmp_expr& f) __GMPXX_NOEXCEPT { std::swap(*mp, *f.mp); }\n\n  // assignment operators\n  __gmp_expr & operator=(const __gmp_expr &f)\n  { mpf_set(mp, f.mp); return *this; }\n#if __GMPXX_USE_CXX11 || defined( MSC_CXX_11 )\n  __gmp_expr & operator=(__gmp_expr &&f) __GMPXX_NOEXCEPT\n  { swap(f); return *this; }\n#endif\n  template <class T, class U>\n  __gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)\n  { __gmp_set_expr(mp, expr); return *this; }\n\n  __gmp_expr & operator=(signed char c) { mpf_set_si(mp, c); return *this; }\n  __gmp_expr & operator=(unsigned char c) { mpf_set_ui(mp, c); return *this; }\n\n  __gmp_expr & operator=(signed int i) { mpf_set_si(mp, i); return *this; }\n  __gmp_expr & operator=(unsigned int i) { mpf_set_ui(mp, i); return *this; }\n\n  __gmp_expr & operator=(signed short int s)\n  { mpf_set_si(mp, s); return *this; }\n  __gmp_expr & operator=(unsigned short int s)\n  { mpf_set_ui(mp, s); return *this; }\n\n  __gmp_expr & operator=(signed long int l)\n  { mpf_set_si(mp, l); return *this; }\n  __gmp_expr & operator=(unsigned long int l)\n  { mpf_set_ui(mp, l); return *this; }\n\n#ifdef MPIRXX_HAVE_LLONG\n  __gmp_expr & operator=(signed long long int l)\n  { mpf_set_si(mp, l); return *this; }\n  __gmp_expr & operator=(unsigned long long int l)\n  { mpf_set_ui(mp, l); return *this; }\n#endif\n\n  __gmp_expr & operator=(float f) { mpf_set_d(mp, f); return *this; }\n  __gmp_expr & operator=(double d) { mpf_set_d(mp, d); return *this; }\n  // __gmp_expr & operator=(long double ld)\n  // { mpf_set_ld(mp, ld); return *this; }\n\n  __gmp_expr & operator=(const char *s)\n  {\n    if (mpf_set_str (mp, s, 0) != 0)\n      throw std::invalid_argument (\"mpf_set_str\");\n    return *this;\n  }\n  __gmp_expr & operator=(const std::string &s)\n  {\n    if (mpf_set_str(mp, s.c_str(), 0) != 0)\n      throw std::invalid_argument (\"mpf_set_str\");\n    return *this;\n  }\n\n  // string input/output functions\n  int set_str(const char *s, int base)\n  { return mpf_set_str(mp, s, base); }\n  int set_str(const std::string &s, int base)\n  { return mpf_set_str(mp, s.c_str(), base); }\n  std::string get_str(mp_exp_t &expo, int base = 10, size_t size = 0) const\n  {\n    __gmp_alloc_cstring temp(mpf_get_str(0, &expo, base, size, mp));\n    return std::string(temp.str);\n  }\n\n  // conversion functions\n  mpf_srcptr __get_mp() const { return mp; }\n  mpf_ptr __get_mp() { return mp; }\n  mpf_srcptr get_mpf_t() const { return mp; }\n  mpf_ptr get_mpf_t() { return mp; }\n\n  mpir_si get_si() const { return mpf_get_si(mp); }\n  mpir_ui get_ui() const { return mpf_get_ui(mp); }\n  double get_d() const { return mpf_get_d(mp); }\n\n  // bool fits_schar_p() const { return mpf_fits_schar_p(mp)!= 0; }\n  // bool fits_uchar_p() const { return mpf_fits_uchar_p(mp)!= 0; }\n  bool fits_sint_p() const { return mpf_fits_sint_p(mp) != 0; }\n  bool fits_uint_p() const { return mpf_fits_uint_p(mp) != 0; }\n  bool fits_si_p() const { return mpf_fits_si_p(mp) != 0; }\n  bool fits_ui_p() const { return mpf_fits_ui_p(mp) != 0; }\n  bool fits_sshort_p() const { return mpf_fits_sshort_p(mp) != 0; }\n  bool fits_ushort_p() const { return mpf_fits_ushort_p(mp) != 0; }\n  bool fits_slong_p() const { return mpf_fits_slong_p(mp) != 0; }\n  bool fits_ulong_p() const { return mpf_fits_ulong_p(mp) != 0; }\n  // bool fits_float_p() const { return mpf_fits_float_p(mp)!= 0; }\n  // bool fits_double_p() const { return mpf_fits_double_p(mp)!= 0; }\n  // bool fits_ldouble_p() const { return mpf_fits_ldouble_p(mp)!= 0; }\n\n#if __GMPXX_USE_CXX11\n  explicit operator bool() const { return mp->_mp_size != 0; }\n#endif\n\n  // compound assignments\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator+=)\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator-=)\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator*=)\n  __GMP_DECLARE_COMPOUND_OPERATOR(operator/=)\n\n  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator<<=)\n  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator>>=)\n\n  __GMP_DECLARE_INCREMENT_OPERATOR(operator++)\n  __GMP_DECLARE_INCREMENT_OPERATOR(operator--)\n};\n\ntypedef __gmp_expr<mpf_t, mpf_t> mpf_class;\n\n\n\n/**************** User-defined literals ****************/\n\n#if __GMPXX_USE_CXX11\ninline mpz_class operator\"\" _mpz(const char* s)\n{\n  return mpz_class(s);\n}\n\ninline mpq_class operator\"\" _mpq(const char* s)\n{\n  mpq_class q;\n  q.get_num() = s;\n  return q;\n}\n\ninline mpf_class operator\"\" _mpf(const char* s)\n{\n  return mpf_class(s);\n}\n#endif\n\n/**************** I/O operators ****************/\n\n// these should (and will) be provided separately\n\ntemplate <class T, class U>\ninline std::ostream & operator<<\n(std::ostream &o, const __gmp_expr<T, U> &expr)\n{\n  __gmp_expr<T, T> const& temp(expr);\n  return o << temp.__get_mp();\n}\n\ntemplate <class T>\ninline std::istream & operator>>(std::istream &i, __gmp_expr<T, T> &expr)\n{\n  return i >> expr.__get_mp();\n}\n\n/*\n// you might want to uncomment this\ninline std::istream & operator>>(std::istream &i, mpq_class &q)\n{\n  i >> q.get_mpq_t();\n  q.canonicalize();\n  return i;\n}\n*/\n\n\n/**************** Functions for type conversion ****************/\n\ninline void __gmp_set_expr(mpz_ptr z, const mpz_class &w)\n{\n  mpz_set(z, w.get_mpz_t());\n}\n\ntemplate <class T>\ninline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpz_t, T> &expr)\n{\n  expr.eval(z);\n}\n\ntemplate <class T>\ninline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpq_t, T> &expr)\n{\n  mpq_class const& temp(expr);\n  mpz_set_q(z, temp.get_mpq_t());\n}\n\ntemplate <class T>\ninline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpf_t, T> &expr)\n{\n  mpf_class const& temp(expr);\n  mpz_set_f(z, temp.get_mpf_t());\n}\n\ninline void __gmp_set_expr(mpq_ptr q, const mpz_class &z)\n{\n  mpq_set_z(q, z.get_mpz_t());\n}\n\ntemplate <class T>\ninline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpz_t, T> &expr)\n{\n  __gmp_set_expr(mpq_numref(q), expr);\n  mpz_set_ui(mpq_denref(q), 1);\n}\n\ninline void __gmp_set_expr(mpq_ptr q, const mpq_class &r)\n{\n  mpq_set(q, r.get_mpq_t());\n}\n\ntemplate <class T>\ninline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpq_t, T> &expr)\n{\n  expr.eval(q);\n}\n\ntemplate <class T>\ninline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpf_t, T> &expr)\n{\n  mpf_class const& temp(expr);\n  mpq_set_f(q, temp.get_mpf_t());\n}\n\ntemplate <class T>\ninline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpz_t, T> &expr)\n{\n  mpz_class const& temp(expr);\n  mpf_set_z(f, temp.get_mpz_t());\n}\n\ntemplate <class T>\ninline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpq_t, T> &expr)\n{\n  mpq_class const& temp(expr);\n  mpf_set_q(f, temp.get_mpq_t());\n}\n\ninline void __gmp_set_expr(mpf_ptr f, const mpf_class &g)\n{\n  mpf_set(f, g.get_mpf_t());\n}\n\ntemplate <class T>\ninline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpf_t, T> &expr)\n{\n  expr.eval(f);\n}\n\n\n/* Temporary objects */\n\ntemplate <class T>\nclass __gmp_temp\n{\n  __gmp_expr<T, T> val;\n  public:\n  template<class U, class V>\n  __gmp_temp(U const& u, V) : val (u) {}\n  typename __gmp_resolve_expr<T>::srcptr_type\n  __get_mp() const { return val.__get_mp(); }\n};\n\ntemplate <>\nclass __gmp_temp <mpf_t>\n{\n  mpf_class val;\n  public:\n  template<class U>\n  __gmp_temp(U const& u, mpf_ptr res) : val (u, mpf_get_prec(res)) {}\n  mpf_srcptr __get_mp() const { return val.__get_mp(); }\n};\n\n/**************** Specializations of __gmp_expr ****************/\n/* The eval() method of __gmp_expr<T, U> evaluates the corresponding\n   expression and assigns the result to its argument, which is either an\n   mpz_t, mpq_t, or mpf_t as specified by the T argument.\n   Compound expressions are evaluated recursively (temporaries are created\n   to hold intermediate values), while for simple expressions the eval()\n   method of the appropriate function object (available as the Op argument\n   of either __gmp_unary_expr<T, Op> or __gmp_binary_expr<T, U, Op>) is\n   called. */\n\n\n/**************** Unary expressions ****************/\n/* cases:\n   - simple:   argument is mp*_class, that is, __gmp_expr<T, T>\n   - compound: argument is __gmp_expr<T, U> (with U not equal to T) */\n\n\n// simple expressions\n\ntemplate <class T, class Op>\nclass __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, T>, Op> >\n{\nprivate:\n  typedef __gmp_expr<T, T> val_type;\n\n  __gmp_unary_expr<val_type, Op> expr;\npublic:\n  explicit __gmp_expr(const val_type &val) : expr(val) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  { Op::eval(p, expr.val.__get_mp()); }\n  const val_type & get_val() const { return expr.val; }\n  mp_bitcnt_t get_prec() const { return expr.val.get_prec(); }\n};\n\n\n// compound expressions\n\ntemplate <class T, class U, class Op>\nclass __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, Op> >\n{\nprivate:\n  typedef __gmp_expr<T, U> val_type;\n\n  __gmp_unary_expr<val_type, Op> expr;\npublic:\n  explicit __gmp_expr(const val_type &val) : expr(val) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  { expr.val.eval(p); Op::eval(p, p); }\n  const val_type & get_val() const { return expr.val; }\n  mp_bitcnt_t get_prec() const { return expr.val.get_prec(); }\n};\n\n\n/**************** Binary expressions ****************/\n/* simple:\n   - arguments are both mp*_class\n   - one argument is mp*_class, one is a built-in type\n   compound:\n   - one is mp*_class, one is __gmp_expr<T, U>\n   - one is __gmp_expr<T, U>, one is built-in\n   - both arguments are __gmp_expr<...> */\n\n\n// simple expressions\n\ntemplate <class T, class Op>\nclass __gmp_expr\n<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, T>, Op> >\n{\nprivate:\n  typedef __gmp_expr<T, T> val1_type;\n  typedef __gmp_expr<T, T> val2_type;\n\n  __gmp_binary_expr<val1_type, val2_type, Op> expr;\npublic:\n  __gmp_expr(const val1_type &val1, const val2_type &val2)\n    : expr(val1, val2) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  { Op::eval(p, expr.val1.__get_mp(), expr.val2.__get_mp()); }\n  const val1_type & get_val1() const { return expr.val1; }\n  const val2_type & get_val2() const { return expr.val2; }\n  mp_bitcnt_t get_prec() const\n  {\n    mp_bitcnt_t prec1 = expr.val1.get_prec(),\n      prec2 = expr.val2.get_prec();\n    return (prec1 > prec2) ? prec1 : prec2;\n  }\n};\n\n\n// simple expressions, T is a built-in numerical type\n\ntemplate <class T, class U, class Op>\nclass __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, U, Op> >\n{\nprivate:\n  typedef __gmp_expr<T, T> val1_type;\n  typedef U val2_type;\n\n  __gmp_binary_expr<val1_type, val2_type, Op> expr;\npublic:\n  __gmp_expr(const val1_type &val1, const val2_type &val2)\n    : expr(val1, val2) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  { Op::eval(p, expr.val1.__get_mp(), expr.val2); }\n  const val1_type & get_val1() const { return expr.val1; }\n  const val2_type & get_val2() const { return expr.val2; }\n  mp_bitcnt_t get_prec() const { return expr.val1.get_prec(); }\n};\n\ntemplate <class T, class U, class Op>\nclass __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, T>, Op> >\n{\nprivate:\n  typedef U val1_type;\n  typedef __gmp_expr<T, T> val2_type;\n\n  __gmp_binary_expr<val1_type, val2_type, Op> expr;\npublic:\n  __gmp_expr(const val1_type &val1, const val2_type &val2)\n    : expr(val1, val2) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  { Op::eval(p, expr.val1, expr.val2.__get_mp()); }\n  const val1_type & get_val1() const { return expr.val1; }\n  const val2_type & get_val2() const { return expr.val2; }\n  mp_bitcnt_t get_prec() const { return expr.val2.get_prec(); }\n};\n\n\n// compound expressions, one argument is a subexpression\n\ntemplate <class T, class U, class V, class Op>\nclass __gmp_expr\n<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<U, V>, Op> >\n{\nprivate:\n  typedef __gmp_expr<T, T> val1_type;\n  typedef __gmp_expr<U, V> val2_type;\n\n  __gmp_binary_expr<val1_type, val2_type, Op> expr;\npublic:\n  __gmp_expr(const val1_type &val1, const val2_type &val2)\n    : expr(val1, val2) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  {\n    if(p != expr.val1.__get_mp())\n    {\n      __gmp_set_expr(p, expr.val2);\n      Op::eval(p, expr.val1.__get_mp(), p);\n    }\n    else\n    {\n      __gmp_temp<T> temp(expr.val2, p);\n      Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());\n    }\n  }\n  const val1_type & get_val1() const { return expr.val1; }\n  const val2_type & get_val2() const { return expr.val2; }\n  mp_bitcnt_t get_prec() const\n  {\n    mp_bitcnt_t prec1 = expr.val1.get_prec(),\n      prec2 = expr.val2.get_prec();\n    return (prec1 > prec2) ? prec1 : prec2;\n  }\n};\n\ntemplate <class T, class U, class V, class Op>\nclass __gmp_expr\n<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, T>, Op> >\n{\nprivate:\n  typedef __gmp_expr<U, V> val1_type;\n  typedef __gmp_expr<T, T> val2_type;\n\n  __gmp_binary_expr<val1_type, val2_type, Op> expr;\npublic:\n  __gmp_expr(const val1_type &val1, const val2_type &val2)\n    : expr(val1, val2) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  {\n    if(p != expr.val2.__get_mp())\n    {\n      __gmp_set_expr(p, expr.val1);\n      Op::eval(p, p, expr.val2.__get_mp());\n    }\n    else\n    {\n      __gmp_temp<T> temp(expr.val1, p);\n      Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());\n    }\n  }\n  const val1_type & get_val1() const { return expr.val1; }\n  const val2_type & get_val2() const { return expr.val2; }\n  mp_bitcnt_t get_prec() const\n  {\n    mp_bitcnt_t prec1 = expr.val1.get_prec(),\n      prec2 = expr.val2.get_prec();\n    return (prec1 > prec2) ? prec1 : prec2;\n  }\n};\n\ntemplate <class T, class U, class Op>\nclass __gmp_expr\n<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, U>, Op> >\n{\nprivate:\n  typedef __gmp_expr<T, T> val1_type;\n  typedef __gmp_expr<T, U> val2_type;\n\n  __gmp_binary_expr<val1_type, val2_type, Op> expr;\npublic:\n  __gmp_expr(const val1_type &val1, const val2_type &val2)\n    : expr(val1, val2) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  {\n    if(p != expr.val1.__get_mp())\n    {\n      __gmp_set_expr(p, expr.val2);\n      Op::eval(p, expr.val1.__get_mp(), p);\n    }\n    else\n    {\n      __gmp_temp<T> temp(expr.val2, p);\n      Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());\n    }\n  }\n  const val1_type & get_val1() const { return expr.val1; }\n  const val2_type & get_val2() const { return expr.val2; }\n  mp_bitcnt_t get_prec() const\n  {\n    mp_bitcnt_t prec1 = expr.val1.get_prec(),\n      prec2 = expr.val2.get_prec();\n    return (prec1 > prec2) ? prec1 : prec2;\n  }\n};\n\ntemplate <class T, class U, class Op>\nclass __gmp_expr\n<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, T>, Op> >\n{\nprivate:\n  typedef __gmp_expr<T, U> val1_type;\n  typedef __gmp_expr<T, T> val2_type;\n\n  __gmp_binary_expr<val1_type, val2_type, Op> expr;\npublic:\n  __gmp_expr(const val1_type &val1, const val2_type &val2)\n    : expr(val1, val2) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  {\n    if(p != expr.val2.__get_mp())\n    {\n      __gmp_set_expr(p, expr.val1);\n      Op::eval(p, p, expr.val2.__get_mp());\n    }\n    else\n    {\n      __gmp_temp<T> temp(expr.val1, p);\n      Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());\n    }\n  }\n  const val1_type & get_val1() const { return expr.val1; }\n  const val2_type & get_val2() const { return expr.val2; }\n  mp_bitcnt_t get_prec() const\n  {\n    mp_bitcnt_t prec1 = expr.val1.get_prec(),\n      prec2 = expr.val2.get_prec();\n    return (prec1 > prec2) ? prec1 : prec2;\n  }\n};\n\n\n// one argument is a subexpression, one is a built-in\n\ntemplate <class T, class U, class V, class Op>\nclass __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, V, Op> >\n{\nprivate:\n  typedef __gmp_expr<T, U> val1_type;\n  typedef V val2_type;\n\n  __gmp_binary_expr<val1_type, val2_type, Op> expr;\npublic:\n  __gmp_expr(const val1_type &val1, const val2_type &val2)\n    : expr(val1, val2) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  {\n    expr.val1.eval(p);\n    Op::eval(p, p, expr.val2);\n  }\n  const val1_type & get_val1() const { return expr.val1; }\n  const val2_type & get_val2() const { return expr.val2; }\n  mp_bitcnt_t get_prec() const { return expr.val1.get_prec(); }\n};\n\ntemplate <class T, class U, class V, class Op>\nclass __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, V>, Op> >\n{\nprivate:\n  typedef U val1_type;\n  typedef __gmp_expr<T, V> val2_type;\n\n  __gmp_binary_expr<val1_type, val2_type, Op> expr;\npublic:\n  __gmp_expr(const val1_type &val1, const val2_type &val2)\n    : expr(val1, val2) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  {\n    expr.val2.eval(p);\n    Op::eval(p, expr.val1, p);\n  }\n  const val1_type & get_val1() const { return expr.val1; }\n  const val2_type & get_val2() const { return expr.val2; }\n  mp_bitcnt_t get_prec() const { return expr.val2.get_prec(); }\n};\n\n\n// both arguments are subexpressions\n\ntemplate <class T, class U, class V, class W, class Op>\nclass __gmp_expr\n<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >\n{\nprivate:\n  typedef __gmp_expr<T, U> val1_type;\n  typedef __gmp_expr<V, W> val2_type;\n\n  __gmp_binary_expr<val1_type, val2_type, Op> expr;\npublic:\n  __gmp_expr(const val1_type &val1, const val2_type &val2)\n    : expr(val1, val2) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  {\n    __gmp_temp<T> temp2(expr.val2, p);\n    expr.val1.eval(p);\n    Op::eval(p, p, temp2.__get_mp());\n  }\n  const val1_type & get_val1() const { return expr.val1; }\n  const val2_type & get_val2() const { return expr.val2; }\n  mp_bitcnt_t get_prec() const\n  {\n    mp_bitcnt_t prec1 = expr.val1.get_prec(),\n      prec2 = expr.val2.get_prec();\n    return (prec1 > prec2) ? prec1 : prec2;\n  }\n};\n\ntemplate <class T, class U, class V, class W, class Op>\nclass __gmp_expr\n<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >\n{\nprivate:\n  typedef __gmp_expr<U, V> val1_type;\n  typedef __gmp_expr<T, W> val2_type;\n\n  __gmp_binary_expr<val1_type, val2_type, Op> expr;\npublic:\n  __gmp_expr(const val1_type &val1, const val2_type &val2)\n    : expr(val1, val2) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  {\n    __gmp_temp<T> temp1(expr.val1, p);\n    expr.val2.eval(p);\n    Op::eval(p, temp1.__get_mp(), p);\n  }\n  const val1_type & get_val1() const { return expr.val1; }\n  const val2_type & get_val2() const { return expr.val2; }\n  mp_bitcnt_t get_prec() const\n  {\n    mp_bitcnt_t prec1 = expr.val1.get_prec(),\n      prec2 = expr.val2.get_prec();\n    return (prec1 > prec2) ? prec1 : prec2;\n  }\n};\n\ntemplate <class T, class U, class V, class Op>\nclass __gmp_expr\n<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, V>, Op> >\n{\nprivate:\n  typedef __gmp_expr<T, U> val1_type;\n  typedef __gmp_expr<T, V> val2_type;\n\n  __gmp_binary_expr<val1_type, val2_type, Op> expr;\npublic:\n  __gmp_expr(const val1_type &val1, const val2_type &val2)\n    : expr(val1, val2) { }\n  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const\n  {\n    __gmp_temp<T> temp2(expr.val2, p);\n    expr.val1.eval(p);\n    Op::eval(p, p, temp2.__get_mp());\n  }\n  const val1_type & get_val1() const { return expr.val1; }\n  const val2_type & get_val2() const { return expr.val2; }\n  mp_bitcnt_t get_prec() const\n  {\n    mp_bitcnt_t prec1 = expr.val1.get_prec(),\n      prec2 = expr.val2.get_prec();\n    return (prec1 > prec2) ? prec1 : prec2;\n  }\n};\n\n\n/**************** Special cases ****************/\n\n/* Some operations (i.e., add and subtract) with mixed mpz/mpq arguments\n   can be done directly without first converting the mpz to mpq.\n   Appropriate specializations of __gmp_expr are required. */\n\n\n#define __GMPZQ_DEFINE_EXPR(eval_fun)                                       \\\n                                                                            \\\ntemplate <>                                                                 \\\nclass __gmp_expr<mpq_t, __gmp_binary_expr<mpz_class, mpq_class, eval_fun> > \\\n{                                                                           \\\nprivate:                                                                    \\\n  typedef mpz_class val1_type;                                              \\\n  typedef mpq_class val2_type;                                              \\\n                                                                            \\\n  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \\\npublic:                                                                     \\\n  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \\\n    : expr(val1, val2) { }                                                  \\\n  void eval(mpq_ptr q) const                                                \\\n  { eval_fun::eval(q, expr.val1.get_mpz_t(), expr.val2.get_mpq_t()); }      \\\n  const val1_type & get_val1() const { return expr.val1; }                  \\\n  const val2_type & get_val2() const { return expr.val2; }                  \\\n  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \\\n};                                                                          \\\n                                                                            \\\ntemplate <>                                                                 \\\nclass __gmp_expr<mpq_t, __gmp_binary_expr<mpq_class, mpz_class, eval_fun> > \\\n{                                                                           \\\nprivate:                                                                    \\\n  typedef mpq_class val1_type;                                              \\\n  typedef mpz_class val2_type;                                              \\\n                                                                            \\\n  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \\\npublic:                                                                     \\\n  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \\\n    : expr(val1, val2) { }                                                  \\\n  void eval(mpq_ptr q) const                                                \\\n  { eval_fun::eval(q, expr.val1.get_mpq_t(), expr.val2.get_mpz_t()); }      \\\n  const val1_type & get_val1() const { return expr.val1; }                  \\\n  const val2_type & get_val2() const { return expr.val2; }                  \\\n  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \\\n};                                                                          \\\n                                                                            \\\ntemplate <class T>                                                          \\\nclass __gmp_expr                                                            \\\n<mpq_t, __gmp_binary_expr<mpz_class, __gmp_expr<mpq_t, T>, eval_fun> >      \\\n{                                                                           \\\nprivate:                                                                    \\\n  typedef mpz_class val1_type;                                              \\\n  typedef __gmp_expr<mpq_t, T> val2_type;                                   \\\n                                                                            \\\n  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \\\npublic:                                                                     \\\n  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \\\n    : expr(val1, val2) { }                                                  \\\n  void eval(mpq_ptr q) const                                                \\\n  {                                                                         \\\n    mpq_class temp(expr.val2);                                              \\\n    eval_fun::eval(q, expr.val1.get_mpz_t(), temp.get_mpq_t());             \\\n  }                                                                         \\\n  const val1_type & get_val1() const { return expr.val1; }                  \\\n  const val2_type & get_val2() const { return expr.val2; }                  \\\n  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \\\n};                                                                          \\\n                                                                            \\\ntemplate <class T>                                                          \\\nclass __gmp_expr                                                            \\\n<mpq_t, __gmp_binary_expr<mpq_class, __gmp_expr<mpz_t, T>, eval_fun> >      \\\n{                                                                           \\\nprivate:                                                                    \\\n  typedef mpq_class val1_type;                                              \\\n  typedef __gmp_expr<mpz_t, T> val2_type;                                   \\\n                                                                            \\\n  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \\\npublic:                                                                     \\\n  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \\\n    : expr(val1, val2) { }                                                  \\\n  void eval(mpq_ptr q) const                                                \\\n  {                                                                         \\\n    mpz_class temp(expr.val2);                                              \\\n    eval_fun::eval(q, expr.val1.get_mpq_t(), temp.get_mpz_t());             \\\n  }                                                                         \\\n  const val1_type & get_val1() const { return expr.val1; }                  \\\n  const val2_type & get_val2() const { return expr.val2; }                  \\\n  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \\\n};                                                                          \\\n                                                                            \\\ntemplate <class T>                                                          \\\nclass __gmp_expr                                                            \\\n<mpq_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, mpq_class, eval_fun> >      \\\n{                                                                           \\\nprivate:                                                                    \\\n  typedef __gmp_expr<mpz_t, T> val1_type;                                   \\\n  typedef mpq_class val2_type;                                              \\\n                                                                            \\\n  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \\\npublic:                                                                     \\\n  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \\\n    : expr(val1, val2) { }                                                  \\\n  void eval(mpq_ptr q) const                                                \\\n  {                                                                         \\\n    mpz_class temp(expr.val1);                                              \\\n    eval_fun::eval(q, temp.get_mpz_t(), expr.val2.get_mpq_t());             \\\n  }                                                                         \\\n  const val1_type & get_val1() const { return expr.val1; }                  \\\n  const val2_type & get_val2() const { return expr.val2; }                  \\\n  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \\\n};                                                                          \\\n                                                                            \\\ntemplate <class T>                                                          \\\nclass __gmp_expr                                                            \\\n<mpq_t, __gmp_binary_expr<__gmp_expr<mpq_t, T>, mpz_class, eval_fun> >      \\\n{                                                                           \\\nprivate:                                                                    \\\n  typedef __gmp_expr<mpq_t, T> val1_type;                                   \\\n  typedef mpz_class val2_type;                                              \\\n                                                                            \\\n  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \\\npublic:                                                                     \\\n  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \\\n    : expr(val1, val2) { }                                                  \\\n  void eval(mpq_ptr q) const                                                \\\n  {                                                                         \\\n    mpq_class temp(expr.val1);                                              \\\n    eval_fun::eval(q, temp.get_mpq_t(), expr.val2.get_mpz_t());             \\\n  }                                                                         \\\n  const val1_type & get_val1() const { return expr.val1; }                  \\\n  const val2_type & get_val2() const { return expr.val2; }                  \\\n  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \\\n};                                                                          \\\n                                                                            \\\ntemplate <class T, class U>                                                 \\\nclass __gmp_expr<mpq_t, __gmp_binary_expr                                   \\\n<__gmp_expr<mpz_t, T>, __gmp_expr<mpq_t, U>, eval_fun> >                    \\\n{                                                                           \\\nprivate:                                                                    \\\n  typedef __gmp_expr<mpz_t, T> val1_type;                                   \\\n  typedef __gmp_expr<mpq_t, U> val2_type;                                   \\\n                                                                            \\\n  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \\\npublic:                                                                     \\\n  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \\\n    : expr(val1, val2) { }                                                  \\\n  void eval(mpq_ptr q) const                                                \\\n  {                                                                         \\\n    mpz_class temp1(expr.val1);                                             \\\n    expr.val2.eval(q);                                                      \\\n    eval_fun::eval(q, temp1.get_mpz_t(), q);                                \\\n  }                                                                         \\\n  const val1_type & get_val1() const { return expr.val1; }                  \\\n  const val2_type & get_val2() const { return expr.val2; }                  \\\n  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \\\n};                                                                          \\\n                                                                            \\\ntemplate <class T, class U>                                                 \\\nclass __gmp_expr<mpq_t, __gmp_binary_expr                                   \\\n<__gmp_expr<mpq_t, T>, __gmp_expr<mpz_t, U>, eval_fun> >                    \\\n{                                                                           \\\nprivate:                                                                    \\\n  typedef __gmp_expr<mpq_t, T> val1_type;                                   \\\n  typedef __gmp_expr<mpz_t, U> val2_type;                                   \\\n                                                                            \\\n  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \\\npublic:                                                                     \\\n  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \\\n    : expr(val1, val2) { }                                                  \\\n  void eval(mpq_ptr q) const                                                \\\n  {                                                                         \\\n    mpz_class temp2(expr.val2);                                             \\\n    expr.val1.eval(q);                                             \\\n    eval_fun::eval(q, q, temp2.get_mpz_t());                \\\n  }                                                                         \\\n  const val1_type & get_val1() const { return expr.val1; }                  \\\n  const val2_type & get_val2() const { return expr.val2; }                  \\\n  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \\\n};\n\n\n__GMPZQ_DEFINE_EXPR(__gmp_binary_plus)\n__GMPZQ_DEFINE_EXPR(__gmp_binary_minus)\n\n\n\n/**************** Macros for defining functions ****************/\n/* Results of operators and functions are instances of __gmp_expr<T, U>.\n   T determines the numerical type of the expression: it can be either\n   mpz_t, mpq_t, or mpf_t.  When the arguments of a binary\n   expression have different numerical types, __gmp_resolve_expr is used\n   to determine the \"larger\" type.\n   U is either __gmp_unary_expr<V, Op> or __gmp_binary_expr<V, W, Op>,\n   where V and W are the arguments' types -- they can in turn be\n   expressions, thus allowing to build compound expressions to any\n   degree of complexity.\n   Op is a function object that must have an eval() method accepting\n   appropriate arguments.\n   Actual evaluation of a __gmp_expr<T, U> object is done when it gets\n   assigned to an mp*_class (\"lazy\" evaluation): this is done by calling\n   its eval() method. */\n\n\n// non-member unary operators and functions\n\n#define __GMP_DEFINE_UNARY_FUNCTION(fun, eval_fun)                           \\\n                                                                             \\\ntemplate <class T, class U>                                                  \\\ninline __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, eval_fun> >          \\\nfun(const __gmp_expr<T, U> &expr)                                            \\\n{                                                                            \\\n  return __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, eval_fun> >(expr); \\\n}\n\n#define __GMP_DEFINE_UNARY_TYPE_FUNCTION(type, fun, eval_fun) \\\n                                                              \\\ntemplate <class T, class U>                                   \\\ninline type fun(const __gmp_expr<T, U> &expr)                 \\\n{                                                             \\\n  __gmp_expr<T, T> const& temp(expr); \\\n  return eval_fun::eval(temp.__get_mp());                     \\\n}\n\n\n// non-member binary operators and functions\n\n#define __GMPP_DEFINE_BINARY_FUNCTION(fun, eval_fun)                   \\\n                                                                       \\\ntemplate <class T, class U, class V, class W>                          \\\ninline __gmp_expr<typename __gmp_resolve_expr<T, V>::value_type,       \\\n__gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, eval_fun> >      \\\nfun(const __gmp_expr<T, U> &expr1, const __gmp_expr<V, W> &expr2)      \\\n{                                                                      \\\n  return __gmp_expr<typename __gmp_resolve_expr<T, V>::value_type,     \\\n     __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, eval_fun> > \\\n    (expr1, expr2);                                                    \\\n}\n\n#define __GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, bigtype)       \\\n                                                                           \\\ntemplate <class T, class U>                                                \\\ninline __gmp_expr                                                          \\\n<T, __gmp_binary_expr<__gmp_expr<T, U>, bigtype, eval_fun> >               \\\nfun(const __gmp_expr<T, U> &expr, type t)                                  \\\n{                                                                          \\\n  return __gmp_expr                                                        \\\n    <T, __gmp_binary_expr<__gmp_expr<T, U>, bigtype, eval_fun> >(expr, t); \\\n}                                                                          \\\n                                                                           \\\ntemplate <class T, class U>                                                \\\ninline __gmp_expr                                                          \\\n<T, __gmp_binary_expr<bigtype, __gmp_expr<T, U>, eval_fun> >               \\\nfun(type t, const __gmp_expr<T, U> &expr)                                  \\\n{                                                                          \\\n  return __gmp_expr                                                        \\\n    <T, __gmp_binary_expr<bigtype, __gmp_expr<T, U>, eval_fun> >(t, expr); \\\n}\n\n#define __GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, type)          \\\n__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, mpir_si)\n\n#define __GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, type)            \\\n__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, mpir_ui)\n\n#define __GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, type) \\\n__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, double)\n\n#define __GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, type)     \\\n__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, long double)\n\n#ifdef MPIRXX_HAVE_LLONG\n#define __GMPN_DEFINE_BINARY_FUNCTION(fun, eval_fun)              \\\n__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed char)        \\\n__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned char)      \\\n__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed int)         \\\n__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned int)       \\\n__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed short int)   \\\n__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned short int) \\\n__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed long int)    \\\n__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned long int)  \\\n__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed long long int)    \\\n__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned long long int)  \\\n__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, float)              \\\n__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, double)             \\\n__GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, long double)\n#else\n#define __GMPN_DEFINE_BINARY_FUNCTION(fun, eval_fun)              \\\n__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed char)        \\\n__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned char)      \\\n__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed int)         \\\n__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned int)       \\\n__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed short int)   \\\n__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned short int) \\\n__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed long int)    \\\n__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned long int)  \\\n__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, float)              \\\n__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, double)             \\\n__GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, long double)\n#endif\n\n#define __GMP_DEFINE_BINARY_FUNCTION(fun, eval_fun) \\\n__GMPP_DEFINE_BINARY_FUNCTION(fun, eval_fun)        \\\n__GMPN_DEFINE_BINARY_FUNCTION(fun, eval_fun)\n\n\n#define __GMP_DEFINE_BINARY_FUNCTION_UI(fun, eval_fun)                 \\\n                                                                       \\\ntemplate <class T, class U>                                            \\\ninline __gmp_expr                                                      \\\n<T, __gmp_binary_expr<__gmp_expr<T, U>, mp_bitcnt_t, eval_fun> > \\\nfun(const __gmp_expr<T, U> &expr, mp_bitcnt_t l)                 \\\n{                                                                      \\\n  return __gmp_expr<T, __gmp_binary_expr                               \\\n    <__gmp_expr<T, U>, mpir_ui, eval_fun> >(expr, l);                   \\\n}\n\n\n#define __GMPP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)         \\\n                                                                        \\\ntemplate <class T, class U, class V, class W>                           \\\ninline type fun(const __gmp_expr<T, U> &expr1,                          \\\n\t\tconst __gmp_expr<V, W> &expr2)                          \\\n{                                                                       \\\n  typedef typename __gmp_resolve_expr<T, V>::value_type eval_type;      \\\n  __gmp_expr<eval_type, eval_type> const& temp1(expr1); \\\n  __gmp_expr<eval_type, eval_type> const& temp2(expr2); \\\n  return eval_fun::eval(temp1.__get_mp(), temp2.__get_mp());            \\\n}\n\n#define __GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun,   \\\n\t\t\t\t\t    type2, bigtype)        \\\n                                                                   \\\ntemplate <class T, class U>                                        \\\ninline type fun(const __gmp_expr<T, U> &expr, type2 t)             \\\n{                                                                  \\\n  __gmp_expr<T, T> const& temp(expr);      \\\n  return eval_fun::eval(temp.__get_mp(), static_cast<bigtype>(t)); \\\n}                                                                  \\\n                                                                   \\\ntemplate <class T, class U>                                        \\\ninline type fun(type2 t, const __gmp_expr<T, U> &expr)             \\\n{                                                                  \\\n  __gmp_expr<T, T> const& temp(expr);      \\\n  return eval_fun::eval(static_cast<bigtype>(t), temp.__get_mp()); \\\n}\n\n#define __GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2) \\\n__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun,                \\\n\t\t\t\t    type2, mpir_si)\n\n#define __GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2) \\\n__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun,                \\\n\t\t\t\t    type2, mpir_ui)\n\n#define __GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2) \\\n__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2, double)\n\n#define __GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2)     \\\n__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2, long double)\n\n#ifdef MPIRXX_HAVE_LLONG\n#define __GMPN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)              \\\n__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed char)        \\\n__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned char)      \\\n__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed int)         \\\n__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned int)       \\\n__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed short int)   \\\n__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned short int) \\\n__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed long int)    \\\n__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned long int)  \\\n__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed long long int)    \\\n__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned long long int)  \\\n__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, float)              \\\n__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, double)             \\\n__GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, long double)\n#else\n#define __GMPN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)              \\\n__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed char)        \\\n__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned char)      \\\n__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed int)         \\\n__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned int)       \\\n__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed short int)   \\\n__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned short int) \\\n__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed long int)    \\\n__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned long int)  \\\n__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, float)              \\\n__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, double)             \\\n__GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, long double)\n#endif\n\n#define __GMP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun) \\\n__GMPP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)        \\\n__GMPN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)\n\n\n// member operators\n\n#define __GMPP_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun)                 \\\n                                                                             \\\ntemplate <class T, class U>                                                  \\\ninline type##_class & type##_class::fun(const __gmp_expr<T, U> &expr)        \\\n{                                                                            \\\n  __gmp_set_expr(mp, __gmp_expr<type##_t, __gmp_binary_expr                  \\\n\t\t <type##_class, __gmp_expr<T, U>, eval_fun> >(*this, expr)); \\\n  return *this;                                                              \\\n}\n\n#define __GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun,    \\\n\t\t\t\t\t type2, bigtype)         \\\n                                                                 \\\ninline type##_class & type##_class::fun(type2 t)                 \\\n{                                                                \\\n  __gmp_set_expr(mp, __gmp_expr<type##_t, __gmp_binary_expr      \\\n\t\t <type##_class, bigtype, eval_fun> >(*this, t)); \\\n  return *this;                                                  \\\n}\n\n#define __GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2) \\\n__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun,                \\\n\t\t\t\t type2, mpir_si)\n\n#define __GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2) \\\n__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun,                \\\n\t\t\t\t type2, mpir_ui)\n\n#define __GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2) \\\n__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2, double)\n\n#define __GMPNLD_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2)     \\\n__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2, long double)\n\n#ifdef MPIRXX_HAVE_LLONG\n#define __GMPN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun)              \\\n__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed char)        \\\n__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned char)      \\\n__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed int)         \\\n__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned int)       \\\n__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed short int)   \\\n__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned short int) \\\n__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed long int)    \\\n__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned long int)  \\\n__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed long long int)    \\\n__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned long long int)  \\\n__GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, float)              \\\n__GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, double)             \\\n/* __GMPNLD_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, long double) */\n#else\n#define __GMPN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun)              \\\n__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed char)        \\\n__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned char)      \\\n__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed int)         \\\n__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned int)       \\\n__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed short int)   \\\n__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned short int) \\\n__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed long int)    \\\n__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned long int)  \\\n__GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, float)              \\\n__GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, double)             \\\n/* __GMPNLD_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, long double) */\n#endif\n\n#define __GMP_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun) \\\n__GMPP_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun)        \\\n__GMPN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun)\n\n#define __GMPZ_DEFINE_COMPOUND_OPERATOR(fun, eval_fun) \\\n__GMP_DEFINE_COMPOUND_OPERATOR(mpz, fun, eval_fun)\n\n#define __GMPQ_DEFINE_COMPOUND_OPERATOR(fun, eval_fun) \\\n__GMP_DEFINE_COMPOUND_OPERATOR(mpq, fun, eval_fun)\n\n#define __GMPF_DEFINE_COMPOUND_OPERATOR(fun, eval_fun) \\\n__GMP_DEFINE_COMPOUND_OPERATOR(mpf, fun, eval_fun)\n\n\n\n#define __GMP_DEFINE_COMPOUND_OPERATOR_UI(type, fun, eval_fun)  \\\n                                                                \\\ninline type##_class & type##_class::fun(mpir_ui l)               \\\n{                                                               \\\n  __gmp_set_expr(mp, __gmp_expr<type##_t, __gmp_binary_expr     \\\n    <type##_class, mpir_ui, eval_fun> >(*this, l));              \\\n  return *this;                                                 \\\n}\n\n#define __GMPZ_DEFINE_COMPOUND_OPERATOR_UI(fun, eval_fun) \\\n__GMP_DEFINE_COMPOUND_OPERATOR_UI(mpz, fun, eval_fun)\n\n#define __GMPQ_DEFINE_COMPOUND_OPERATOR_UI(fun, eval_fun) \\\n__GMP_DEFINE_COMPOUND_OPERATOR_UI(mpq, fun, eval_fun)\n\n#define __GMPF_DEFINE_COMPOUND_OPERATOR_UI(fun, eval_fun) \\\n__GMP_DEFINE_COMPOUND_OPERATOR_UI(mpf, fun, eval_fun)\n\n\n\n#define __GMP_DEFINE_INCREMENT_OPERATOR(type, fun, eval_fun) \\\n                                                             \\\ninline type##_class & type##_class::fun()                    \\\n{                                                            \\\n  eval_fun::eval(mp);                                        \\\n  return *this;                                              \\\n}                                                            \\\n                                                             \\\ninline type##_class type##_class::fun(int)                   \\\n{                                                            \\\n  type##_class temp(*this);                                  \\\n  eval_fun::eval(mp);                                        \\\n  return temp;                                               \\\n}\n\n#define __GMPZ_DEFINE_INCREMENT_OPERATOR(fun, eval_fun) \\\n__GMP_DEFINE_INCREMENT_OPERATOR(mpz, fun, eval_fun)\n\n#define __GMPQ_DEFINE_INCREMENT_OPERATOR(fun, eval_fun) \\\n__GMP_DEFINE_INCREMENT_OPERATOR(mpq, fun, eval_fun)\n\n#define __GMPF_DEFINE_INCREMENT_OPERATOR(fun, eval_fun) \\\n__GMP_DEFINE_INCREMENT_OPERATOR(mpf, fun, eval_fun)\n\n\n\n/**************** Arithmetic operators and functions ****************/\n\n// non-member operators and functions\n\n__GMP_DEFINE_UNARY_FUNCTION(operator+, __gmp_unary_plus)\n__GMP_DEFINE_UNARY_FUNCTION(operator-, __gmp_unary_minus)\n__GMP_DEFINE_UNARY_FUNCTION(operator~, __gmp_unary_com)\n\n__GMP_DEFINE_BINARY_FUNCTION(operator+, __gmp_binary_plus)\n__GMP_DEFINE_BINARY_FUNCTION(operator-, __gmp_binary_minus)\n__GMP_DEFINE_BINARY_FUNCTION(operator*, __gmp_binary_multiplies)\n__GMP_DEFINE_BINARY_FUNCTION(operator/, __gmp_binary_divides)\n__GMP_DEFINE_BINARY_FUNCTION(operator%, __gmp_binary_modulus)\n__GMP_DEFINE_BINARY_FUNCTION(operator&, __gmp_binary_and)\n__GMP_DEFINE_BINARY_FUNCTION(operator|, __gmp_binary_ior)\n__GMP_DEFINE_BINARY_FUNCTION(operator^, __gmp_binary_xor)\n\n__GMP_DEFINE_BINARY_FUNCTION_UI(operator<<, __gmp_binary_lshift)\n__GMP_DEFINE_BINARY_FUNCTION_UI(operator>>, __gmp_binary_rshift)\n\n__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator==, __gmp_binary_equal)\n__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator!=, ! __gmp_binary_equal)\n__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<, __gmp_binary_less)\n__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<=, ! __gmp_binary_greater)\n__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>, __gmp_binary_greater)\n__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>=, ! __gmp_binary_less)\n\n__GMP_DEFINE_UNARY_FUNCTION(abs, __gmp_abs_function)\n__GMP_DEFINE_UNARY_FUNCTION(trunc, __gmp_trunc_function)\n__GMP_DEFINE_UNARY_FUNCTION(floor, __gmp_floor_function)\n__GMP_DEFINE_UNARY_FUNCTION(ceil, __gmp_ceil_function)\n__GMP_DEFINE_UNARY_FUNCTION(sqrt, __gmp_sqrt_function)\n__GMP_DEFINE_BINARY_FUNCTION(hypot, __gmp_hypot_function)\n__GMP_DEFINE_BINARY_FUNCTION(gcd, __gmp_gcd_function)\n__GMP_DEFINE_BINARY_FUNCTION(lcm, __gmp_lcm_function)\n\n__GMP_DEFINE_UNARY_TYPE_FUNCTION(int, sgn, __gmp_sgn_function)\n__GMP_DEFINE_BINARY_TYPE_FUNCTION(int, cmp, __gmp_cmp_function)\n\ntemplate <class T>\nvoid swap(__gmp_expr<T, T>& x, __gmp_expr<T, T>& y) __GMPXX_NOEXCEPT\n{ x.swap(y); }\n\n// member operators for mpz_class\n\n__GMPZ_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus)\n__GMPZ_DEFINE_COMPOUND_OPERATOR(operator-=, __gmp_binary_minus)\n__GMPZ_DEFINE_COMPOUND_OPERATOR(operator*=, __gmp_binary_multiplies)\n__GMPZ_DEFINE_COMPOUND_OPERATOR(operator/=, __gmp_binary_divides)\n__GMPZ_DEFINE_COMPOUND_OPERATOR(operator%=, __gmp_binary_modulus)\n\n__GMPZ_DEFINE_COMPOUND_OPERATOR(operator&=, __gmp_binary_and)\n__GMPZ_DEFINE_COMPOUND_OPERATOR(operator|=, __gmp_binary_ior)\n__GMPZ_DEFINE_COMPOUND_OPERATOR(operator^=, __gmp_binary_xor)\n\n__GMPZ_DEFINE_COMPOUND_OPERATOR_UI(operator<<=, __gmp_binary_lshift)\n__GMPZ_DEFINE_COMPOUND_OPERATOR_UI(operator>>=, __gmp_binary_rshift)\n\n__GMPZ_DEFINE_INCREMENT_OPERATOR(operator++, __gmp_unary_increment)\n__GMPZ_DEFINE_INCREMENT_OPERATOR(operator--, __gmp_unary_decrement)\n\n// member operators for mpq_class\n\n__GMPQ_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus)\n__GMPQ_DEFINE_COMPOUND_OPERATOR(operator-=, __gmp_binary_minus)\n__GMPQ_DEFINE_COMPOUND_OPERATOR(operator*=, __gmp_binary_multiplies)\n__GMPQ_DEFINE_COMPOUND_OPERATOR(operator/=, __gmp_binary_divides)\n\n__GMPQ_DEFINE_COMPOUND_OPERATOR_UI(operator<<=, __gmp_binary_lshift)\n__GMPQ_DEFINE_COMPOUND_OPERATOR_UI(operator>>=, __gmp_binary_rshift)\n\n__GMPQ_DEFINE_INCREMENT_OPERATOR(operator++, __gmp_unary_increment)\n__GMPQ_DEFINE_INCREMENT_OPERATOR(operator--, __gmp_unary_decrement)\n\n// member operators for mpf_class\n\n__GMPF_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus)\n__GMPF_DEFINE_COMPOUND_OPERATOR(operator-=, __gmp_binary_minus)\n__GMPF_DEFINE_COMPOUND_OPERATOR(operator*=, __gmp_binary_multiplies)\n__GMPF_DEFINE_COMPOUND_OPERATOR(operator/=, __gmp_binary_divides)\n\n__GMPF_DEFINE_COMPOUND_OPERATOR_UI(operator<<=, __gmp_binary_lshift)\n__GMPF_DEFINE_COMPOUND_OPERATOR_UI(operator>>=, __gmp_binary_rshift)\n\n__GMPF_DEFINE_INCREMENT_OPERATOR(operator++, __gmp_unary_increment)\n__GMPF_DEFINE_INCREMENT_OPERATOR(operator--, __gmp_unary_decrement)\n\n\n\n/**************** Class wrapper for gmp_randstate_t ****************/\n\nclass __gmp_urandomb_value { };\nclass __gmp_urandomm_value { };\n\ntemplate <>\nclass __gmp_expr<mpz_t, __gmp_urandomb_value>\n{\nprivate:\n  __gmp_randstate_struct *state;\n  mp_bitcnt_t bits;\npublic:\n  __gmp_expr(gmp_randstate_t s, mp_bitcnt_t l) : state(s), bits(l) { }\n  void eval(mpz_ptr z) const { __gmp_rand_function::eval(z, state, bits); }\n  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }\n};\n\ntemplate <>\nclass __gmp_expr<mpz_t, __gmp_urandomm_value>\n{\nprivate:\n  __gmp_randstate_struct *state;\n  mpz_class range;\npublic:\n  __gmp_expr(gmp_randstate_t s, const mpz_class &z) : state(s), range(z) { }\n  void eval(mpz_ptr z) const\n  { __gmp_rand_function::eval(z, state, range.get_mpz_t()); }\n  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }\n};\n\ntemplate <>\nclass __gmp_expr<mpf_t, __gmp_urandomb_value>\n{\nprivate:\n  __gmp_randstate_struct *state;\n  mp_bitcnt_t bits;\npublic:\n  __gmp_expr(gmp_randstate_t s, mp_bitcnt_t l) : state(s), bits(l) { }\n  void eval(mpf_ptr f) const\n  {\n    __gmp_rand_function::eval(f, state,\n\t(bits>0) ? bits : mpf_get_prec(f));\n  }\n  mp_bitcnt_t get_prec() const\n  {\n    if (bits == 0)\n      return mpf_get_default_prec();\n    else\n      return bits;\n  }\n};\n\nextern \"C\" {\n  typedef void __gmp_randinit_default_t (gmp_randstate_t);\n  typedef void __gmp_randinit_lc_2exp_t (gmp_randstate_t, mpz_srcptr, mpir_ui, mp_bitcnt_t);\n  typedef int __gmp_randinit_lc_2exp_size_t (gmp_randstate_t, mp_bitcnt_t);\n}\n\nclass gmp_randclass\n{\nprivate:\n  gmp_randstate_t state;\n\n  // copy construction and assignment not allowed\n  gmp_randclass(const gmp_randclass &);\n  void operator=(const gmp_randclass &);\npublic:\n  // constructors and destructor\n  gmp_randclass(gmp_randalg_t alg, mp_bitcnt_t size)\n  {\n    switch (alg)\n      {\n      case GMP_RAND_ALG_LC: // no other cases for now\n      default:\n\tgmp_randinit(state, alg, size);\n\tbreak;\n      }\n  }\n\n  // gmp_randinit_default\n  gmp_randclass(__gmp_randinit_default_t* f) { f(state); }\n\n  // gmp_randinit_lc_2exp\n  gmp_randclass(__gmp_randinit_lc_2exp_t* f,\n\t\tmpz_class z, mpir_ui l1, mp_bitcnt_t l2)\n  { f(state, z.get_mpz_t(), l1, l2); }\n\n  // gmp_randinit_lc_2exp_size\n  gmp_randclass(__gmp_randinit_lc_2exp_size_t* f,\n\t\tmp_bitcnt_t size)\n  {\n    if (f (state, size) == 0)\n      throw std::length_error (\"gmp_randinit_lc_2exp_size\");\n  }\n\n  ~gmp_randclass() { gmp_randclear(state); }\n\n  // initialize\n  void seed(); // choose a random seed some way (?)\n  void seed(mpir_ui s) { gmp_randseed_ui(state, s); }\n  void seed(const mpz_class &z) { gmp_randseed(state, z.get_mpz_t()); }\n\n  //get randstate_t for compatibility with non-OO API functions\n  randstate_srcptr get_randstate_t() const { return this->state; }\n  randstate_ptr get_randstate_t() { return this->state; }\n\n  // get random number\n  __gmp_expr<mpz_t, __gmp_urandomb_value> get_z_bits(mp_bitcnt_t l)\n  { return __gmp_expr<mpz_t, __gmp_urandomb_value>(state, l); }\n  __gmp_expr<mpz_t, __gmp_urandomb_value> get_z_bits(const mpz_class &z)\n  { return get_z_bits(z.get_ui()); }\n  // FIXME: z.get_bitcnt_t() ?\n\n  __gmp_expr<mpz_t, __gmp_urandomm_value> get_z_range(const mpz_class &z)\n  { return __gmp_expr<mpz_t, __gmp_urandomm_value>(state, z); }\n\n  __gmp_expr<mpf_t, __gmp_urandomb_value> get_f(mp_bitcnt_t prec = 0)\n  { return __gmp_expr<mpf_t, __gmp_urandomb_value>(state, prec); }\n};\n\n\n/**************** Specialize std::numeric_limits ****************/\n\nnamespace std {\n  template <> class numeric_limits<mpz_class>\n  {\n  public:\n    static const bool is_specialized = true;\n    static mpz_class min() { return mpz_class(); }\n    static mpz_class max() { return mpz_class(); }\n    static mpz_class lowest() { return mpz_class(); }\n    static const int digits = 0;\n    static const int digits10 = 0;\n    static const int max_digits10 = 0;\n    static const bool is_signed = true;\n    static const bool is_integer = true;\n    static const bool is_exact = true;\n    static const int radix = 2;\n    static mpz_class epsilon() { return mpz_class(); }\n    static mpz_class round_error() { return mpz_class(); }\n    static const int min_exponent = 0;\n    static const int min_exponent10 = 0;\n    static const int max_exponent = 0;\n    static const int max_exponent10 = 0;\n    static const bool has_infinity = false;\n    static const bool has_quiet_NaN = false;\n    static const bool has_signaling_NaN = false;\n    static const float_denorm_style has_denorm = denorm_absent;\n    static const bool has_denorm_loss = false;\n    static mpz_class infinity() { return mpz_class(); }\n    static mpz_class quiet_NaN() { return mpz_class(); }\n    static mpz_class signaling_NaN() { return mpz_class(); }\n    static mpz_class denorm_min() { return mpz_class(); }\n    static const bool is_iec559 = false;\n    static const bool is_bounded = false;\n    static const bool is_modulo = false;\n    static const bool traps = false;\n    static const bool tinyness_before = false;\n    static const float_round_style round_style = round_toward_zero;\n  };\n\n  template <> class numeric_limits<mpq_class>\n  {\n  public:\n    static const bool is_specialized = true;\n    static mpq_class min() { return mpq_class(); }\n    static mpq_class max() { return mpq_class(); }\n    static mpq_class lowest() { return mpq_class(); }\n    static const int digits = 0;\n    static const int digits10 = 0;\n    static const int max_digits10 = 0;\n    static const bool is_signed = true;\n    static const bool is_integer = false;\n    static const bool is_exact = true;\n    static const int radix = 2;\n    static mpq_class epsilon() { return mpq_class(); }\n    static mpq_class round_error() { return mpq_class(); }\n    static const int min_exponent = 0;\n    static const int min_exponent10 = 0;\n    static const int max_exponent = 0;\n    static const int max_exponent10 = 0;\n    static const bool has_infinity = false;\n    static const bool has_quiet_NaN = false;\n    static const bool has_signaling_NaN = false;\n    static const float_denorm_style has_denorm = denorm_absent;\n    static const bool has_denorm_loss = false;\n    static mpq_class infinity() { return mpq_class(); }\n    static mpq_class quiet_NaN() { return mpq_class(); }\n    static mpq_class signaling_NaN() { return mpq_class(); }\n    static mpq_class denorm_min() { return mpq_class(); }\n    static const bool is_iec559 = false;\n    static const bool is_bounded = false;\n    static const bool is_modulo = false;\n    static const bool traps = false;\n    static const bool tinyness_before = false;\n    static const float_round_style round_style = round_toward_zero;\n  };\n\n  template <> class numeric_limits<mpf_class>\n  {\n  public:\n    static const bool is_specialized = true;\n    static mpf_class min() { return mpf_class(); }\n    static mpf_class max() { return mpf_class(); }\n    static mpf_class lowest() { return mpf_class(); }\n    static const int digits = 0;\n    static const int digits10 = 0;\n    static const int max_digits10 = 0;\n    static const bool is_signed = true;\n    static const bool is_integer = false;\n    static const bool is_exact = false;\n    static const int radix = 2;\n    static mpf_class epsilon() { return mpf_class(); }\n    static mpf_class round_error() { return mpf_class(); }\n    static const int min_exponent = 0;\n    static const int min_exponent10 = 0;\n    static const int max_exponent = 0;\n    static const int max_exponent10 = 0;\n    static const bool has_infinity = false;\n    static const bool has_quiet_NaN = false;\n    static const bool has_signaling_NaN = false;\n    static const float_denorm_style has_denorm = denorm_absent;\n    static const bool has_denorm_loss = false;\n    static mpf_class infinity() { return mpf_class(); }\n    static mpf_class quiet_NaN() { return mpf_class(); }\n    static mpf_class signaling_NaN() { return mpf_class(); }\n    static mpf_class denorm_min() { return mpf_class(); }\n    static const bool is_iec559 = false;\n    static const bool is_bounded = false;\n    static const bool is_modulo = false;\n    static const bool traps = false;\n    static const bool tinyness_before = false;\n    static const float_round_style round_style = round_indeterminate;\n  };\n}\n\n\n/**************** #undef all private macros ****************/\n\n#undef __GMPP_DECLARE_COMPOUND_OPERATOR\n#undef __GMPN_DECLARE_COMPOUND_OPERATOR\n#undef __GMP_DECLARE_COMPOUND_OPERATOR\n#undef __GMP_DECLARE_COMPOUND_OPERATOR_UI\n#undef __GMP_DECLARE_INCREMENT_OPERATOR\n\n#undef __GMPZQ_DEFINE_EXPR\n\n#undef __GMP_DEFINE_UNARY_FUNCTION\n#undef __GMP_DEFINE_UNARY_TYPE_FUNCTION\n\n#undef __GMPP_DEFINE_BINARY_FUNCTION\n#undef __GMPNN_DEFINE_BINARY_FUNCTION\n#undef __GMPNS_DEFINE_BINARY_FUNCTION\n#undef __GMPNU_DEFINE_BINARY_FUNCTION\n#undef __GMPND_DEFINE_BINARY_FUNCTION\n#undef __GMPNLD_DEFINE_BINARY_FUNCTION\n#undef __GMPN_DEFINE_BINARY_FUNCTION\n#undef __GMP_DEFINE_BINARY_FUNCTION\n\n#undef __GMP_DEFINE_BINARY_FUNCTION_UI\n\n#undef __GMPP_DEFINE_BINARY_TYPE_FUNCTION\n#undef __GMPNN_DEFINE_BINARY_TYPE_FUNCTION\n#undef __GMPNS_DEFINE_BINARY_TYPE_FUNCTION\n#undef __GMPNU_DEFINE_BINARY_TYPE_FUNCTION\n#undef __GMPND_DEFINE_BINARY_TYPE_FUNCTION\n#undef __GMPNLD_DEFINE_BINARY_TYPE_FUNCTION\n#undef __GMPN_DEFINE_BINARY_TYPE_FUNCTION\n#undef __GMP_DEFINE_BINARY_TYPE_FUNCTION\n\n#undef __GMPZ_DEFINE_COMPOUND_OPERATOR\n\n#undef __GMPP_DEFINE_COMPOUND_OPERATOR\n#undef __GMPNN_DEFINE_COMPOUND_OPERATOR\n#undef __GMPNS_DEFINE_COMPOUND_OPERATOR\n#undef __GMPNU_DEFINE_COMPOUND_OPERATOR\n#undef __GMPND_DEFINE_COMPOUND_OPERATOR\n#undef __GMPNLD_DEFINE_COMPOUND_OPERATOR\n#undef __GMPN_DEFINE_COMPOUND_OPERATOR\n#undef __GMP_DEFINE_COMPOUND_OPERATOR\n\n#undef __GMPQ_DEFINE_COMPOUND_OPERATOR\n#undef __GMPF_DEFINE_COMPOUND_OPERATOR\n\n#undef __GMP_DEFINE_COMPOUND_OPERATOR_UI\n#undef __GMPZ_DEFINE_COMPOUND_OPERATOR_UI\n#undef __GMPQ_DEFINE_COMPOUND_OPERATOR_UI\n#undef __GMPF_DEFINE_COMPOUND_OPERATOR_UI\n\n#undef __GMP_DEFINE_INCREMENT_OPERATOR\n#undef __GMPZ_DEFINE_INCREMENT_OPERATOR\n#undef __GMPQ_DEFINE_INCREMENT_OPERATOR\n#undef __GMPF_DEFINE_INCREMENT_OPERATOR\n\n#undef __GMPXX_CONSTANT\n\n#endif /* __GMP_PLUSPLUS__ */\n"
  },
  {
    "path": "mpn/Makeasm.am",
    "content": "## Automake asm file rules.\n  \n# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,\n# Inc.\n#\n# Copyright 2008 William Hart\n#\n# This file is part of the MPIR Library.\n#\n# The MPIR Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The MPIR Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the MPIR Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\n# COMPILE minus CC.\n#\nCOMPILE_FLAGS = $(DEFS) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) $(INCLUDES) $(DEFAULT_INCLUDES) \n\n# Flags used for preprocessing (in ansi2knr rules).\n#\nPREPROCESS_FLAGS = $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \\\n\t$(CPPFLAGS)\n\n\n# Recent versions of automake (1.5 and up for instance) append automake\n# generated suffixes to this $(SUFFIXES) list.  This is essential for us,\n# since .c must come after .s, .S and .asm.  If .c is before .s, for\n# instance, then in the mpn directory \"make\" will see add_n.c mentioned in\n# an explicit rule (the ansi2knr stuff) and decide it must have add_n.c,\n# even if add_n.c doesn't exist but add_n.s does.  See GNU make\n# documentation \"(make)Implicit Rule Search\", part 5c.\n#\n# On IRIX 6 native make this doesn't work properly though.  Somehow .c\n# remains ahead of .s, perhaps because .c.s is a builtin rule.  .asm works\n# fine though, and mpn/mips3 uses this.\n#\nSUFFIXES = .s .S .as .asm\n\n\n# .s assembler, no preprocessing.\n#\n.s.o:\n\t$(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<\n.s.obj:\n\t$(CCAS) $(COMPILE_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi`\n.s.lo:\n\t$(LIBTOOL) --mode=compile --tag=CC $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<\n\n# can be overridden during development, eg. \"make RM_TMP=: mul_1.lo\"\nRM_TMP = rm -f\n\n\n# .S assembler, preprocessed with cpp.\n#\n# It's necessary to run $(CPP) separately, since it seems not all compilers\n# recognise .S files, in particular \"cc\" on HP-UX 10 and 11 doesn't (and\n# will silently do nothing if given a .S).\n#\n# For .lo we need a helper script, as described below for .asm.lo.\n#\n.S.o:\n\t$(CPP) $(PREPROCESS_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$< | grep -v '^#' >tmp-$*.s\n\t$(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@\n\t$(RM_TMP) tmp-$*.s\n.S.obj:\n\t$(CPP) $(PREPROCESS_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` | grep -v '^#' >tmp-$*.s\n\t$(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@\n\t$(RM_TMP) tmp-$*.s\n.S.lo:\n\t$(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/cpp-ccas --cpp=\"$(CPP) $(PREPROCESS_FLAGS)\" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<\n\n# .asm assembler, preprocessed with m4 unless we are in MINGW64\n#\n# .o and .obj are non-PIC and just need m4 followed by a compile.\n#\n# .lo is a bit tricky.  Libtool (as of version 1.5) has foo.lo as a little\n# text file, and .libs/foo.o and foo.o as the PIC and non-PIC objects,\n# respectively.  It'd be asking for lots of trouble to try to create foo.lo\n# ourselves, so instead arrange to invoke libtool like a --mode=compile, but\n# with a special m4-ccas script which first m4 preprocesses, then compiles.\n# --tag=CC is necessary since foo.asm is otherwise unknown to libtool.\n#\n# Libtool adds -DPIC when building a shared object and the .asm files look\n# for that.  But it should be noted that the other PIC flags are on occasion\n# important too, in particular FreeBSD 2.2.8 gas 1.92.3 requires -k before\n# it accepts PIC constructs like @GOT, and gcc adds that flag only under\n# -fPIC.  (Later versions of gas are happy to accept PIC stuff any time.)\n#\n\n.asm.o:\n\t$(M4) -DOPERATION_$* `test -f '$<' || echo '$(srcdir)/'`$< >tmp-$*.s\n\t$(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@\n\t$(RM_TMP) tmp-$*.s\n.asm.obj:\n\t$(M4) -DOPERATION_$* `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` >tmp-$*.s\n\t$(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@\n\t$(RM_TMP) tmp-$*.s\n\n#if BUILD_YASM\n#ASSEMBLER = $(top_srcdir)/strip_fPIC.sh $(MPIR_AS) -p gas -I $(top_builddir) $(OBJECT_FORMAT) $(GSYM_FLAG) -o $@ \n#else\nASSEMBLER = $(CCAS) $(COMPILE_FLAGS)\n#endif\n\n.asm.lo:\n\t$(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/m4-ccas --m4=\"$(M4)\" $(ASSEMBLER) `test -f '$<' || echo '$(srcdir)/'`$<\n\n#  .as assembler, assembled with Yasm\n\n.as.lo:\n\t$(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/strip_fPIC.sh $(MPIR_AS) -I $(top_builddir) $(OBJECT_FORMAT) $(GSYM_FLAG) -o $@ `test -f '$<' || echo '$(srcdir)/'`$<\n"
  },
  {
    "path": "mpn/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2005 Free Software Foundation,\n# Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -D__GMP_WITHIN_GMP -I$(top_srcdir) \\\n  -DOPERATION_`echo $* | sed 's/_$$//'`\n\nOFILES = @mpn_objects@\n\nnoinst_LTLIBRARIES = libmpn.la\nnodist_libmpn_la_SOURCES = dummy1.c\nlibmpn_la_LIBADD = $(OFILES)\nlibmpn_la_DEPENDENCIES = $(OFILES)\n\nTARG_DIST = alpha arm generic ia64 mips32 mips64 powerpc32 powerpc64 sparc32 sparc64 x86 x86_64\n\nEXTRA_DIST = asm-defs.m4 cpp-ccas m4-ccas $(TARG_DIST)\n\n# These are BUILT_SOURCES at the top-level, so normally they're built before\n# recursing into this directory.\n#\ndummy1.c:\n\tcd ..; $(MAKE) $(AM_MAKEFLAGS) mpn/dummy1.c\n\ninclude Makeasm.am\n"
  },
  {
    "path": "mpn/README",
    "content": "Copyright 1996, 1999 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n\n\n\n\nThis directory contains all code for the mpn layer of GMP.\n\nMost subdirectories contain machine-dependent code, written in assembly or C.\nThe `generic' subdirectory contains default code, used when there is no\nmachine-dependent replacement for a particular machine.\n\nThere is one subdirectory for each ISA family.  Note that e.g., 32-bit SPARC\nand 64-bit SPARC are very different ISA's, and thus cannot share any code.\n\nA particular compile will only use code from one subdirectory, and the\n`generic' subdirectory.  The ISA-specific subdirectories contain hierachies of\ndirectories for various architecture variants and implementations; the\ntop-most level contains code that runs correctly on all variants.\n"
  },
  {
    "path": "mpn/alpha/README",
    "content": "Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify it\nunder the terms of the GNU Lesser General Public License as published by the\nFree Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\nFITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\nfor more details.\n\nYou should have received a copy of the GNU Lesser General Public License along\nwith the GNU MP Library; see the file COPYING.LIB.  If not, write to the Free\nSoftware Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,\nUSA.\n\n\n\n\n\nThis directory contains mpn functions optimized for DEC Alpha processors.\n\nALPHA ASSEMBLY RULES AND REGULATIONS\n\nThe `.prologue N' pseudo op marks the end of instruction that needs special\nhandling by unwinding.  It also says whether $27 is really needed for computing\nthe gp.  The `.mask M' pseudo op says which registers are saved on the stack,\nand at what offset in the frame.\n\nCray T3 code is very very different...\n\n\"$6\" / \"$f6\" etc is the usual syntax for registers, but on Unicos instead \"r6\"\n/ \"f6\" is required.  We use the \"r6\" / \"f6\" forms, and have m4 defines expand\nthem to \"$6\" or \"$f6\" where necessary.\n\n\"0x\" introduces a hex constant in gas and DEC as, but on Unicos \"^X\" is\nrequired.  The X() macro accomodates this difference.\n\n\"cvttqc\" is required by DEC as, \"cvttq/c\" is required by Unicos, and gas will\naccept either.  We use cvttqc and have an m4 define expand to cvttq/c where\nnecessary.\n\n\"not\" as an alias for \"ornot r31, ...\" is available in gas and DEC as, but not\nthe Unicos assembler.  The full \"ornot\" must be used.\n\n\"unop\" is not available in Unicos.  We make an m4 define to the usual \"ldq_u\nr31,0(r30)\", and in fact use that define on all systems since it comes out the\nsame.\n\n\"!literal!123\" etc explicit relocations as per Tru64 4.0 are apparently not\navailable in older alpha assemblers (including gas prior to 2.12), according to\nthe GCC manual, so the assembler macro forms must be used (eg. ldgp).\n\n\n\nRELEVANT OPTIMIZATION ISSUES\n\nEV4\n\n1. This chip has very limited store bandwidth.  The on-chip L1 cache is write-\n   through, and a cache line is transfered from the store buffer to the off-\n   chip L2 in as much 15 cycles on most systems.  This delay hurts mpn_add_n,\n   mpn_sub_n, mpn_lshift, and mpn_rshift.\n\n2. Pairing is possible between memory instructions and integer arithmetic\n   instructions.\n\n3. mulq and umulh are documented to have a latency of 23 cycles, but 2 of these\n   cycles are pipelined.  Thus, multiply instructions can be issued at a rate\n   of one each 21st cycle.\n\nEV5\n\n1. The memory bandwidth of this chip is good, both for loads and stores.  The\n   L1 cache can handle two loads or one store per cycle, but two cycles after a\n   store, no ld can issue.\n\n2. mulq has a latency of 12 cycles and an issue rate of 1 each 8th cycle.\n   umulh has a latency of 14 cycles and an issue rate of 1 each 10th cycle.\n   (Note that published documentation gets these numbers slightly wrong.)\n\n3. mpn_add_n.  With 4-fold unrolling, we need 37 instructions, whereof 12\n   are memory operations.  This will take at least\n\tceil(37/2) [dual issue] + 1 [taken branch] = 19 cycles\n   We have 12 memory cycles, plus 4 after-store conflict cycles, or 16 data\n   cache cycles, which should be completely hidden in the 19 issue cycles.\n   The computation is inherently serial, with these dependencies:\n\n\t       ldq  ldq\n\t\t \\  /\\\n\t  (or)   addq |\n\t   |\\   /   \\ |\n\t   | addq  cmpult\n\t    \\  |     |\n\t     cmpult  |\n\t\t \\  /\n\t\t  or\n\n   I.e., 3 operations are needed between carry-in and carry-out, making 12\n   cycles the absolute minimum for the 4 limbs.  We could replace the `or' with\n   a cmoveq/cmovne, which could issue one cycle earlier that the `or', but that\n   might waste a cycle on EV4.  The total depth remain unaffected, since cmov\n   has a latency of 2 cycles.\n\n     addq\n     /   \\\n   addq  cmpult\n     |      \\\n   cmpult -> cmovne\n\n  Montgomery has a slightly different way of computing carry that requires one\n  less instruction, but has depth 4 (instead of the current 3).  Since the code\n  is currently instruction issue bound, Montgomery's idea should save us 1/2\n  cycle per limb, or bring us down to a total of 17 cycles or 4.25 cycles/limb.\n  Unfortunately, this method will not be good for the EV6.\n\n4. addmul_1 and friends: We previously had a scheme for splitting the single-\n   limb operand in 21-bits chunks and the multi-limb operand in 32-bit chunks,\n   and then use FP operations for every 2nd multiply, and integer operations\n   for every 2nd multiply.\n\n   But it seems much better to split the single-limb operand in 16-bit chunks,\n   since we save many integer shifts and adds that way.  See powerpc64/README\n   for some more details.\n\nEV6\n\nHere we have a really parallel pipeline, capable of issuing up to 4 integer\ninstructions per cycle.  In actual practice, it is never possible to sustain\nmore than 3.5 integer insns/cycle due to rename register shortage.  One integer\nmultiply instruction can issue each cycle.  To get optimal speed, we need to\npretend we are vectorizing the code, i.e., minimize the depth of recurrences.\n\nThere are two dependencies to watch out for.  1) Address arithmetic\ndependencies, and 2) carry propagation dependencies.\n\nWe can avoid serializing due to address arithmetic by unrolling loops, so that\naddresses don't depend heavily on an index variable.  Avoiding serializing\nbecause of carry propagation is trickier; the ultimate performance of the code\nwill be determined of the number of latency cycles it takes from accepting\ncarry-in to a vector point until we can generate carry-out.\n\nMost integer instructions can execute in either the L0, U0, L1, or U1\npipelines.  Shifts only execute in U0 and U1, and multiply only in U1.\n\nCMOV instructions split into two internal instructions, CMOV1 and CMOV2.  CMOV\nsplit the mapping process (see pg 2-26 in cmpwrgd.pdf), suggesting the CMOV\nshould always be placed as the last instruction of an aligned 4 instruction\nblock, or perhaps simply avoided.\n\nPerhaps the most important issue is the latency between the L0/U0 and L1/U1\nclusters; a result obtained on either cluster has an extra cycle of latency for\nconsumers in the opposite cluster.  Because of the dynamic nature of the\nimplementation, it is hard to predict where an instruction will execute.\n\n\n\nREFERENCES\n\n\"Alpha Architecture Handbook\", version 4, Compaq, October 1998, order number\nEC-QD2KC-TE.\n\n\"Alpha 21164 Microprocessor Hardware Reference Manual\", Compaq, December 1998,\norder number EC-QP99C-TE.\n\n\"Alpha 21264/EV67 Microprocessor Hardware Reference Manual\", revision 1.4,\nCompaq, September 2000, order number DS-0028B-TE.\n\n\"Compiler Writer's Guide for the Alpha 21264\", Compaq, June 1999, order number\nEC-RJ66A-TE.\n\nAll of the above are available online from\n\n  http://ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html\n  ftp://ftp.compaq.com/pub/products/alphaCPUdocs\n\n\"Tru64 Unix Assembly Language Programmer's Guide\", Compaq, March 1996, part\nnumber AA-PS31D-TE.\n\n\"Digital UNIX Calling Standard for Alpha Systems\", Digital Equipment Corp,\nMarch 1996, part number AA-PY8AC-TE.\n\nThe above are available online,\n\n  http://h30097.www3.hp.com/docs/pub_page/V40F_DOCS.HTM\n\n(Dunno what h30097 means in this URL, but if it moves try searching for \"tru64\nonline documentation\" from the main www.hp.com page.)\n\n\n\n----------------\nLocal variables:\nmode: text\nfill-column: 79\nEnd:\n"
  },
  {
    "path": "mpn/alpha/add_n.asm",
    "content": "dnl  Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and\ndnl  store sum in a third limb vector.\n\ndnl  Copyright 1995, 2000, 2002, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     7.75\nC EV5:     5.75\nC EV6:     4\n\nC  INPUT PARAMETERS\nC  rp\tr16\nC  up\tr17\nC  vp\tr18\nC  n\tr19\n\nASM_START()\nPROLOGUE(mpn_add_n)\n\tldq\tr3,0(r17)\n\tldq\tr4,0(r18)\n\n\tsubq\tr19,1,r19\n\tand\tr19,4-1,r2\tC number of limbs in first loop\n\tbis\tr31,r31,r0\n\tbeq\tr2,$L0\t\tC if multiple of 4 limbs, skip first loop\n\n\tsubq\tr19,r2,r19\n\n$Loop0:\tsubq\tr2,1,r2\n\tldq\tr5,8(r17)\n\taddq\tr4,r0,r4\n\tldq\tr6,8(r18)\n\tcmpult\tr4,r0,r1\n\taddq\tr3,r4,r4\n\tcmpult\tr4,r3,r0\n\tstq\tr4,0(r16)\n\tbis\tr0,r1,r0\n\n\taddq\tr17,8,r17\n\taddq\tr18,8,r18\n\tbis\tr5,r5,r3\n\tbis\tr6,r6,r4\n\taddq\tr16,8,r16\n\tbne\tr2,$Loop0\n\n$L0:\tbeq\tr19,$Lend\n\n\tALIGN(8)\n$Loop:\tsubq\tr19,4,r19\n\n\tldq\tr5,8(r17)\n\taddq\tr4,r0,r4\n\tldq\tr6,8(r18)\n\tcmpult\tr4,r0,r1\n\taddq\tr3,r4,r4\n\tcmpult\tr4,r3,r0\n\tstq\tr4,0(r16)\n\tbis\tr0,r1,r0\n\n\tldq\tr3,16(r17)\n\taddq\tr6,r0,r6\n\tldq\tr4,16(r18)\n\tcmpult\tr6,r0,r1\n\taddq\tr5,r6,r6\n\tcmpult\tr6,r5,r0\n\tstq\tr6,8(r16)\n\tbis\tr0,r1,r0\n\n\tldq\tr5,24(r17)\n\taddq\tr4,r0,r4\n\tldq\tr6,24(r18)\n\tcmpult\tr4,r0,r1\n\taddq\tr3,r4,r4\n\tcmpult\tr4,r3,r0\n\tstq\tr4,16(r16)\n\tbis\tr0,r1,r0\n\n\tldq\tr3,32(r17)\n\taddq\tr6,r0,r6\n\tldq\tr4,32(r18)\n\tcmpult\tr6,r0,r1\n\taddq\tr5,r6,r6\n\tcmpult\tr6,r5,r0\n\tstq\tr6,24(r16)\n\tbis\tr0,r1,r0\n\n\taddq\tr17,32,r17\n\taddq\tr18,32,r18\n\taddq\tr16,32,r16\n\tbne\tr19,$Loop\n\n$Lend:\taddq\tr4,r0,r4\n\tcmpult\tr4,r0,r1\n\taddq\tr3,r4,r4\n\tcmpult\tr4,r3,r0\n\tstq\tr4,0(r16)\n\tbis\tr0,r1,r0\n\tret\tr31,(r26),1\nEPILOGUE(mpn_add_n)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/addlsh1_n.asm",
    "content": "dnl  Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).\n\ndnl  Copyright 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:    12.5\nC EV5:     6.25\nC EV6:     4.375 (i.e., worse than separate mpn_lshift and mpn_add_n at 3.875)\n\nC TODO\nC  * Write special version for ev6, as this is a slowdown for 100 < n < 2200\nC    compared to separate mpn_lshift and mpn_add_n.\nC  * Use addq instead of sll for left shift, and similarly cmplt instead of srl\nC    for right shift.\n\ndnl  INPUT PARAMETERS\ndefine(`rp',`r16')\ndefine(`up',`r17')\ndefine(`vp',`r18')\ndefine(`n', `r19')\n\ndefine(`u0', `r8')\ndefine(`u1', `r1')\ndefine(`u2', `r2')\ndefine(`u3', `r3')\ndefine(`v0', `r4')\ndefine(`v1', `r5')\ndefine(`v2', `r6')\ndefine(`v3', `r7')\n\ndefine(`cy0', `r0')\ndefine(`cy1', `r20')\ndefine(`cy', `r22')\ndefine(`rr', `r24')\ndefine(`ps', `r25')\ndefine(`sl', `r28')\n\ndefine(`OPERATION_addlsh1_n',1)\n\nifdef(`OPERATION_addlsh1_n',`\n  define(ADDSUB,       addq)\n  define(CARRY,       `cmpult $1,$2,$3')\n  define(func, mpn_addlsh1_n)\n')\nifdef(`OPERATION_sublsh1_n',`\n  define(ADDSUB,       subq)\n  define(CARRY,       `cmpult $2,$1,$3')\n  define(func, mpn_sublsh1_n)\n')\n\nASM_START()\nPROLOGUE(func)\n\tlda\tn, -4(n)\n\tbis\tr31, r31, cy1\n\tand\tn, 3, r1\n\tbeq\tr1, $Lb00\n\tcmpeq\tr1, 1, r2\n\tbne\tr2, $Lb01\n\tcmpeq\tr1, 2, r2\n\tbne\tr2, $Lb10\n$Lb11:\tC n = 3, 7, 11, ...\n\tldq\tv0, 0(vp)\n\tldq\tu0, 0(up)\n\tldq\tv1, 8(vp)\n\tldq\tu1, 8(up)\n\tldq\tv2, 16(vp)\n\tldq\tu2, 16(up)\n\tlda\tvp, 24(vp)\n\tlda\tup, 24(up)\n\tbge\tn, $Loop\n\tbr\tr31, $Lcj3\n$Lb10:\tC n = 2, 6, 10, ...\n\tbis\tr31, r31, cy0\n\tldq\tv1, 0(vp)\n\tldq\tu1, 0(up)\n\tldq\tv2, 8(vp)\n\tldq\tu2, 8(up)\n\tlda\trp, -8(rp)\n\tblt\tn, $Lcj2\n\tldq\tv3, 16(vp)\n\tldq\tu3, 16(up)\n\tlda\tvp, 48(vp)\n\tlda\tup, 16(up)\n\tbr\tr31, $LL10\n$Lb01:\tC n = 1, 5, 9, ...\n\tldq\tv2, 0(vp)\n\tldq\tu2, 0(up)\n\tlda\trp, -16(rp)\n\tblt\tn, $Lcj1\n\tldq\tv3, 8(vp)\n\tldq\tu3, 8(up)\n\tldq\tv0, 16(vp)\n\tldq\tu0, 16(up)\n\tlda\tvp, 40(vp)\n\tlda\tup, 8(up)\n\tlda\trp, 32(rp)\n\tbr\tr31, $LL01\n$Lb00:\tC n = 4, 8, 12, ...\n\tbis\tr31, r31, cy0\n\tldq\tv3, 0(vp)\n\tldq\tu3, 0(up)\n\tldq\tv0, 8(vp)\n\tldq\tu0, 8(up)\n\tldq\tv1, 16(vp)\n\tldq\tu1, 16(up)\n\tlda\tvp, 32(vp)\n\tlda\trp, 8(rp)\n\tbr\tr31, $LL00x\n\tALIGN(16)\nC 0\n$Loop:\tsll\tv0, 1, sl\tC left shift vlimb\n\tldq\tv3, 0(vp)\nC 1\n\tADDSUB\tu0, sl, ps\tC ulimb + (vlimb << 1)\n\tldq\tu3, 0(up)\nC 2\n\tADDSUB\tps, cy1, rr\tC consume carry from previous operation\n\tsrl\tv0, 63, cy0\tC carry out #1\nC 3\n\tCARRY(\tps, u0, cy)\tC carry out #2\n\tstq\trr, 0(rp)\nC 4\n\taddq\tcy, cy0, cy0\tC combine carry out #1 and #2\n\tCARRY(\trr, ps, cy)\tC carry out #3\nC 5\n\taddq\tcy, cy0, cy0\tC final carry out\n\tlda\tvp, 32(vp)\tC bookkeeping\nC 6\n$LL10:\tsll\tv1, 1, sl\n\tldq\tv0, -24(vp)\nC 7\n\tADDSUB\tu1, sl, ps\n\tldq\tu0, 8(up)\nC 8\n\tADDSUB\tps, cy0, rr\n\tsrl\tv1, 63, cy1\nC 9\n\tCARRY(\tps, u1, cy)\n\tstq\trr, 8(rp)\nC 10\n\taddq\tcy, cy1, cy1\n\tCARRY(\trr, ps, cy)\nC 11\n\taddq\tcy, cy1, cy1\n\tlda\trp, 32(rp)\tC bookkeeping\nC 12\n$LL01:\tsll\tv2, 1, sl\n\tldq\tv1, -16(vp)\nC 13\n\tADDSUB\tu2, sl, ps\n\tldq\tu1, 16(up)\nC 14\n\tADDSUB\tps, cy1, rr\n\tsrl\tv2, 63, cy0\nC 15\n\tCARRY(\tps, u2, cy)\n\tstq\trr, -16(rp)\nC 16\n\taddq\tcy, cy0, cy0\n\tCARRY(\trr, ps, cy)\nC 17\n\taddq\tcy, cy0, cy0\n$LL00x:\tlda\tup, 32(up)\tC bookkeeping\nC 18\n\tsll\tv3, 1, sl\n\tldq\tv2, -8(vp)\nC 19\n\tADDSUB\tu3, sl, ps\n\tldq\tu2, -8(up)\nC 20\n\tADDSUB\tps, cy0, rr\n\tsrl\tv3, 63, cy1\nC 21\n\tCARRY(\tps, u3, cy)\n\tstq\trr, -8(rp)\nC 22\n\taddq\tcy, cy1, cy1\n\tCARRY(\trr, ps, cy)\nC 23\n\taddq\tcy, cy1, cy1\n\tlda\tn, -4(n)\tC bookkeeping\nC 24\n\tbge\tn, $Loop\n\n$Lcj3:\tsll\tv0, 1, sl\n\tADDSUB\tu0, sl, ps\n\tADDSUB\tps, cy1, rr\n\tsrl\tv0, 63, cy0\n\tCARRY(\tps, u0, cy)\n\tstq\trr, 0(rp)\n\taddq\tcy, cy0, cy0\n\tCARRY(\trr, ps, cy)\n\taddq\tcy, cy0, cy0\n\n$Lcj2:\tsll\tv1, 1, sl\n\tADDSUB\tu1, sl, ps\n\tADDSUB\tps, cy0, rr\n\tsrl\tv1, 63, cy1\n\tCARRY(\tps, u1, cy)\n\tstq\trr, 8(rp)\n\taddq\tcy, cy1, cy1\n\tCARRY(\trr, ps, cy)\n\taddq\tcy, cy1, cy1\n\n$Lcj1:\tsll\tv2, 1, sl\n\tADDSUB\tu2, sl, ps\n\tADDSUB\tps, cy1, rr\n\tsrl\tv2, 63, cy0\n\tCARRY(\tps, u2, cy)\n\tstq\trr, 16(rp)\n\taddq\tcy, cy0, cy0\n\tCARRY(\trr, ps, cy)\n\taddq\tcy, cy0, cy0\n\n\tret\tr31,(r26),1\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/addmul_1.asm",
    "content": "dnl Alpha mpn_addmul_1 -- Multiply a limb vector with a limb and add the\ndnl result to a second limb vector.\n\ndnl  Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     42\nC EV5:     18\nC EV6:      7\n\nC  INPUT PARAMETERS\nC  rp\tr16\nC  up\tr17\nC  n\tr18\nC  vl\tr19\n\n\nASM_START()\nPROLOGUE(mpn_addmul_1)\n\tldq\tr2,0(r17)\tC r2 = s1_limb\n\taddq\tr17,8,r17\tC s1_ptr++\n\tsubq\tr18,1,r18\tC size--\n\tmulq\tr2,r19,r3\tC r3 = prod_low\n\tldq\tr5,0(r16)\tC r5 = *res_ptr\n\tumulh\tr2,r19,r0\tC r0 = prod_high\n\tbeq\tr18,$Lend1\tC jump if size was == 1\n\tldq\tr2,0(r17)\tC r2 = s1_limb\n\taddq\tr17,8,r17\tC s1_ptr++\n\tsubq\tr18,1,r18\tC size--\n\taddq\tr5,r3,r3\n\tcmpult\tr3,r5,r4\n\tstq\tr3,0(r16)\n\taddq\tr16,8,r16\tC res_ptr++\n\tbeq\tr18,$Lend2\tC jump if size was == 2\n\n\tALIGN(8)\n$Loop:\tmulq\tr2,r19,r3\tC r3 = prod_low\n\tldq\tr5,0(r16)\tC r5 = *res_ptr\n\taddq\tr4,r0,r0\tC cy_limb = cy_limb + 'cy'\n\tsubq\tr18,1,r18\tC size--\n\tumulh\tr2,r19,r4\tC r4 = cy_limb\n\tldq\tr2,0(r17)\tC r2 = s1_limb\n\taddq\tr17,8,r17\tC s1_ptr++\n\taddq\tr3,r0,r3\tC r3 = cy_limb + prod_low\n\tcmpult\tr3,r0,r0\tC r0 = carry from (cy_limb + prod_low)\n\taddq\tr5,r3,r3\n\tcmpult\tr3,r5,r5\n\tstq\tr3,0(r16)\n\taddq\tr16,8,r16\tC res_ptr++\n\taddq\tr5,r0,r0\tC combine carries\n\tbne\tr18,$Loop\n\n$Lend2:\tmulq\tr2,r19,r3\tC r3 = prod_low\n\tldq\tr5,0(r16)\tC r5 = *res_ptr\n\taddq\tr4,r0,r0\tC cy_limb = cy_limb + 'cy'\n\tumulh\tr2,r19,r4\tC r4 = cy_limb\n\taddq\tr3,r0,r3\tC r3 = cy_limb + prod_low\n\tcmpult\tr3,r0,r0\tC r0 = carry from (cy_limb + prod_low)\n\taddq\tr5,r3,r3\n\tcmpult\tr3,r5,r5\n\tstq\tr3,0(r16)\n\taddq\tr5,r0,r0\tC combine carries\n\taddq\tr4,r0,r0\tC cy_limb = prod_high + cy\n\tret\tr31,(r26),1\n$Lend1:\taddq\tr5,r3,r3\n\tcmpult\tr3,r5,r5\n\tstq\tr3,0(r16)\n\taddq\tr0,r5,r0\n\tret\tr31,(r26),1\nEPILOGUE(mpn_addmul_1)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/alpha-defs.m4",
    "content": "divert(-1)\n\ndnl  m4 macros for Alpha assembler.\n\ndnl  Copyright 2003, 2004 Free Software Foundation, Inc.\ndnl \ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\n\ndnl  Usage: ASSERT([reg] [,code])\ndnl\ndnl  Require that the given reg is non-zero after executing the test code.\ndnl  For example,\ndnl\ndnl         ASSERT(r8,\ndnl         `       cmpult r16, r17, r8')\ndnl\ndnl  If the register argument is empty then nothing is tested, the code is\ndnl  just executed.  This can be used for setups required by later ASSERTs.\ndnl  If the code argument is omitted then the register is just tested, with\ndnl  no special setup code.\n\ndefine(ASSERT,\nm4_assert_numargs_range(1,2)\nm4_assert_defined(`WANT_ASSERT')\n`ifelse(WANT_ASSERT,1,\n`ifelse(`$2',,,`$2')\nifelse(`$1',,,\n`\tbne\t$1, L(ASSERTok`'ASSERT_label_counter)\n\t.long\t0\tC halt\nL(ASSERTok`'ASSERT_label_counter):\ndefine(`ASSERT_label_counter',eval(ASSERT_label_counter+1))\n')\n')')\ndefine(`ASSERT_label_counter',1)\n\n\ndnl  Usage: bigend(`code')\ndnl\ndnl  Emit the given code only for a big-endian system, like Unicos.  This\ndnl  can be used for instance for extra stuff needed by extwl.\n\ndefine(bigend,\nm4_assert_numargs(1)\n`ifdef(`HAVE_LIMB_BIG_ENDIAN',`$1',\n`ifdef(`HAVE_LIMB_LITTLE_ENDIAN',`',\n`m4_error(`Cannot assemble, unknown limb endianness')')')')\n\ndnl  Usage: unop\ndnl\ndnl  The Cray Unicos assembler lacks unop, so give the equivalent ldq_u\ndnl  explicitly.\n\ndefine(unop,\nm4_assert_numargs(-1)\n`ldq_u\tr31, 0(r30)')\n\n\ndivert\n"
  },
  {
    "path": "mpn/alpha/cntlz.asm",
    "content": "dnl  Alpha auxiliary for longlong.h's count_leading_zeros\n\ndnl  Copyright 1997, 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nASM_START()\nEXTERN(__clz_tab)\nPROLOGUE(mpn_count_leading_zeros,gp)\n\tcmpbge\tr31,  r16, r1\n\tLEA(r3,__clz_tab)\n\tsra\tr1,   1,   r1\n\txor\tr1,   127, r1\n\tsrl\tr16,  1,   r16\n\taddq\tr1,   r3,  r1\n\tldq_u\tr0,   0(r1)\n\tlda\tr2,   64\n\textbl\tr0,   r1,   r0\n\ts8subl\tr0,   8,    r0\n\tsrl\tr16,  r0,   r16\n\taddq\tr16,  r3,   r16\n\tldq_u\tr1,   0(r16)\n\textbl\tr1,   r16,  r1\n\tsubq\tr2,   r1,   r2\n\tsubq\tr2,   r0,   r0\n\tret\tr31,  (r26),1\nEPILOGUE(mpn_count_leading_zeros)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/copyd.asm",
    "content": "dnl  Alpha mpn_copyd -- copy, decrementing.\n\ndnl  Copyright 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     4\nC EV5:     1.75\nC EV6:     1\n\nC INPUT PARAMETERS\nC rp\tr16\nC up\tr17\nC n\tr18\n\n\nASM_START()\nPROLOGUE(mpn_copyd)\n\ts8addq\tr18,r16,r16\t\tC E0\n\ts8addq\tr18,r17,r17\t\tC E1\n\tlda\tr18,-8(r18)\t\tC E0\n\tblt\tr18,$Lend\t\tC E1\n$Loop:\tldq\tr0,-8(r17)\t\tC E0\n\tldq\tr1,-16(r17)\t\tC E1\n\tldq\tr2,-24(r17)\t\tC E0\n\tldq\tr3,-32(r17)\t\tC E1\n\tldq\tr4,-40(r17)\t\tC E0\n\tldq\tr5,-48(r17)\t\tC E1\n\tldq\tr6,-56(r17)\t\tC E0\n\tldq\tr7,-64(r17)\t\tC E1\n\tstq\tr0,-8(r16)\t\tC E0\n\tlda\tr17,-64(r17)\t\tC E1\n\tstq\tr1,-16(r16)\t\tC E0\n\tbis\tr31, r31, r31\t\tC E1\n\tstq\tr2,-24(r16)\t\tC E0\n\tlda\tr18,-8(r18)\t\tC E1\n\tstq\tr3,-32(r16)\t\tC E0\n\tbis\tr31, r31, r31\t\tC E1\n\tstq\tr4,-40(r16)\t\tC E0\n\tbis\tr31, r31, r31\t\tC E1\n\tstq\tr5,-48(r16)\t\tC E0\n\tbis\tr31, r31, r31\t\tC E1\n\tstq\tr6,-56(r16)\t\tC E0\n\tbis\tr31, r31, r31\t\tC E1\n\tstq\tr7,-64(r16)\t\tC E0\n\tlda\tr16,-64(r16)\t\tC E1\n\tbge\tr18,$Loop\t\tC E1\n$Lend:\tlda\tr18,7(r18)\t\tC E0\n\tblt\tr18,$Lret\t\tC E1\n\tldq\tr0,-8(r17)\t\tC E0\n\tbeq\tr18,$Lend0\t\tC E1\n$Loop0:\tstq\tr0,-8(r16)\t\tC E0\n\tlda\tr16,-8(r16)\t\tC E1\n\tldq\tr0,-16(r17)\t\tC E0\n\tlda\tr18,-1(r18)\t\tC E1\n\tlda\tr17,-8(r17)\t\tC E0\n\tbgt\tr18,$Loop0\t\tC E1\n$Lend0:\tstq\tr0,-8(r16)\t\tC E0\n$Lret:\tret\tr31,(r26),1\t\tC E1\nEPILOGUE(mpn_copyd)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/copyi.asm",
    "content": "dnl  Alpha mpn_copyi -- copy, incrementing.\n\ndnl  Copyright 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     4\nC EV5:     1.75\nC EV6:     1\n\nC INPUT PARAMETERS\nC rp\tr16\nC up\tr17\nC n\tr18\n\n\nASM_START()\nPROLOGUE(mpn_copyi)\n\tlda\tr18,-8(r18)\t\tC E0\n\tblt\tr18,$Lend\t\tC E1\n$Loop:\tldq\tr0,0(r17)\t\tC E0\n\tldq\tr1,8(r17)\t\tC E1\n\tldq\tr2,16(r17)\t\tC E0\n\tldq\tr3,24(r17)\t\tC E1\n\tldq\tr4,32(r17)\t\tC E0\n\tldq\tr5,40(r17)\t\tC E1\n\tldq\tr6,48(r17)\t\tC E0\n\tldq\tr7,56(r17)\t\tC E1\n\tstq\tr0,0(r16)\t\tC E0\n\tlda\tr17,64(r17)\t\tC E1\n\tstq\tr1,8(r16)\t\tC E0\n\tbis\tr31, r31, r31\t\tC E1\n\tstq\tr2,16(r16)\t\tC E0\n\tlda\tr18,-8(r18)\t\tC E1\n\tstq\tr3,24(r16)\t\tC E0\n\tbis\tr31, r31, r31\t\tC E1\n\tstq\tr4,32(r16)\t\tC E0\n\tbis\tr31, r31, r31\t\tC E1\n\tstq\tr5,40(r16)\t\tC E0\n\tbis\tr31, r31, r31\t\tC E1\n\tstq\tr6,48(r16)\t\tC E0\n\tbis\tr31, r31, r31\t\tC E1\n\tstq\tr7,56(r16)\t\tC E0\n\tlda\tr16,64(r16)\t\tC E1\n\tbge\tr18,$Loop\t\tC E1\n$Lend:\tlda\tr18,7(r18)\t\tC E0\n\tblt\tr18,$Lret\t\tC E1\n\tldq\tr0,0(r17)\t\tC E0\n\tbeq\tr18,$Lend0\t\tC E1\n$Loop0:\tstq\tr0,0(r16)\t\tC E0\n\tlda\tr16,8(r16)\t\tC E1\n\tldq\tr0,8(r17)\t\tC E0\n\tlda\tr18,-1(r18)\t\tC E1\n\tlda\tr17,8(r17)\t\tC E0\n\tbgt\tr18,$Loop0\t\tC E1\n$Lend0:\tstq\tr0,0(r16)\t\tC E0\n$Lret:\tret\tr31,(r26),1\t\tC E1\nEPILOGUE(mpn_copyi)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/default.m4",
    "content": "divert(-1)\n\ndnl  m4 macros for alpha assembler (everywhere except unicos).\n\n\ndnl  Copyright 2000, 2002, 2003, 2004 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\n\ndnl  Usage: ASM_START()\ndefine(`ASM_START',\nm4_assert_numargs(0)\n`\t.set noreorder\n\t.set noat')\n\ndnl  Usage: X(value)\ndefine(`X',\nm4_assert_numargs(1)\n`0x$1')\n\ndnl  Usage: FLOAT64(label,value)\ndefine(`FLOAT64',\nm4_assert_numargs(2)\n`\t.align\t3\n$1:\t.t_floating $2')\n\n\ndnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo[,gp|noalign])\ndnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)\n\ndefine(`PROLOGUE_cpu',\nm4_assert_numargs_range(1,2)\n`ifelse(`$2',gp,,\n`ifelse(`$2',noalign,,\n`ifelse(`$2',,,`m4_error(`Unrecognised PROLOGUE parameter\n')')')')dnl\n\t.text\nifelse(`$2',noalign,,`\tALIGN(16)')\n\t.globl\t$1\n\t.ent\t$1\n$1:\nifelse(`$2',gp,`\tldgp\tr29,0(r27)')\n\t.frame r30,0,r26\n\t.prologue ifelse(`$2',gp,1,0)')\n\ndefine(`EPILOGUE_cpu',\nm4_assert_numargs(1)\n`\t.end\t$1')\n\n\ndnl  Usage: LDGP(dst,src)\ndnl\ndnl  Emit an \"ldgp dst,src\", but only if the system uses a GOT.\n\ndefine(LDGP,\nm4_assert_numargs(2)\n`ldgp\t`$1', `$2'')\n\n\ndnl  Usage: EXTERN(variable_name)\ndefine(`EXTERN',\nm4_assert_numargs(1)\n)\n\ndnl  Usage: r0 ... r31\ndnl         f0 ... f31\ndnl\ndnl  Map register names r0 to $0, and f0 to $f0, etc.\ndnl  This is needed on all systems but Unicos\ndnl\ndnl  defreg() is used to protect the $ in $0 (otherwise it would represent a\ndnl  macro argument).  Double quoting is used to protect the f0 in $f0\ndnl  (otherwise it would be an infinite recursion).\n\nforloop(i,0,31,`defreg(`r'i,$i)')\nforloop(i,0,31,`deflit(`f'i,``$f''i)')\n\n\ndnl  Usage: DATASTART(name)\ndnl         DATAEND()\n\ndefine(`DATASTART',\nm4_assert_numargs(1)\n`\tDATA\n$1:')\ndefine(`DATAEND',\nm4_assert_numargs(0)\n)\n\ndnl  Load a symbolic address into a register\ndefine(`LEA',\nm4_assert_numargs(2)\n`lda   $1,  $2')\n\ndnl  Usage: ASM_END()\ndefine(`ASM_END',\nm4_assert_numargs(0)\n)\n\ndivert\n"
  },
  {
    "path": "mpn/alpha/divexact_1.c",
    "content": "/* Alpha mpn_divexact_1 -- mpn by limb exact division.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/*      cycles/limb\n   EV4:    47.0\n   EV5:    30.0\n   EV6:    15.0\n*/\n\n\n/* The dependent chain is as follows (the same as modexact), and this is\n   what the code runs as.\n\n       ev4    ev5   ev6\n        1      1     1    sub    y = x - h\n       23     13     7    mulq   q = y * inverse\n       23     15     7    umulh  h = high (q * d)\n       --     --    --\n       47     30    15\n\n   The time to load src[i+1] and establish x hides under the umulh latency.  */\n\nvoid\nmpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)\n{\n  mp_limb_t  inverse, lshift_mask, s, sr, s_next, c, h, x, y, q, dummy;\n  unsigned   rshift, lshift;\n\n  ASSERT (size >= 1);\n  ASSERT (divisor != 0);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));\n  ASSERT_MPN (src, size);\n  ASSERT_LIMB (divisor);\n\n  s_next = *src++;   /* src[0] */\n\n  rshift = 0;\n  lshift_mask = 0;\n  if ((divisor & 1) == 0)\n    {\n      count_trailing_zeros (rshift, divisor);\n      lshift_mask = MP_LIMB_T_MAX;\n      divisor >>= rshift;\n    }\n\n  modlimb_invert (inverse, divisor);\n  lshift = 64 - rshift;\n\n  c = 0;\n  h = 0;\n  sr = s_next >> rshift;\n\n  size--;\n  if (LIKELY (size != 0))\n    {\n      do\n        {\n          s_next = *src++;      /* src[i+1] */\n          s = sr | ((s_next << lshift) & lshift_mask);\n          x = s - c;\n          c = s < c;\n          sr = s_next >> rshift;\n\n          y = x - h;\n          c += (x < h);\n          q = y * inverse;\n          *dst++ = q;\n          umul_ppmm (h, dummy, q, divisor);\n\n          size--;\n        }\n      while (size != 0);\n    }\n\n  x = sr - c;\n  y = x - h;\n  q = y * inverse;\n  *dst = q;         /* dst[size-1] */\n}\n"
  },
  {
    "path": "mpn/alpha/divexact_by3c.asm",
    "content": "dnl  Alpha mpn_divexact_by3c -- mpn division by 3, expecting no remainder.\n\ndnl  Copyright 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:    22\nC EV5:    11.5\nC EV6:     6.3\n\nC TODO\nC  * Trim this to 6.0 c/l for ev6.\nC  * Write special ev5 version, should reach 9 c/l, and could be smaller.\nC  * Try prefetch for destination, using lds.\nC  * Improve feed-in code, by moving initial mulq earlier; make initial load\nC    to u0/u0 to save some copying.\nC  * Combine u0 and u2, u1 and u3.\n\nC INPUT PARAMETERS\ndefine(`rp',\t`r16')\ndefine(`up',\t`r17')\ndefine(`n',\t`r18')\ndefine(`cy',\t`r19')\n\nASM_START()\n\nDATASTART(L(LC))\n\t.quad\t0xAAAAAAAAAAAAAAAB\n\t.quad\t0x5555555555555555\n\t.quad\t0xAAAAAAAAAAAAAAAA\nDATAEND()\n\ndefine(`xAAAAAAAAAAAAAAAB',\t`r20')\ndefine(`x5555555555555555',\t`r21')\ndefine(`xAAAAAAAAAAAAAAAA',\t`r22')\ndefine(`u0',\t`r0')\tdefine(`u1',\t`r1')\ndefine(`u2',\t`r2')\tdefine(`u3',\t`r3')\ndefine(`l0',\t`r25')\tdefine(`x',\t`r8')\ndefine(`q0',\t`r4')\tdefine(`q1',\t`r5')\ndefine(`p6',\t`r6')\tdefine(`p7',\t`r7')\ndefine(`t0',\t`r23')\tdefine(`t1',\t`r24')\ndefine(`cymask',`r28')\n\n\nPROLOGUE(mpn_divexact_by3c,gp)\n\n\tldq\tr28, 0(up)\t\t\tC load first limb early\n\nC Put magic constants in registers\n\tlda\tr0, L(LC)\n\tldq\txAAAAAAAAAAAAAAAB, 0(r0)\n\tldq\tx5555555555555555, 8(r0)\n\tldq\txAAAAAAAAAAAAAAAA, 16(r0)\n\nC Compute initial l0 value\n\tcmpeq\tcy, 1, p6\n\tcmpeq\tcy, 2, p7\n\tnegq\tp6, p6\n\tand\tp6, x5555555555555555, l0\n\tcmovne\tp7, xAAAAAAAAAAAAAAAA, l0\n\nC Feed-in depending on (n mod 4)\n\tand\tn, 3, r8\n\tlda\tn, -3(n)\n\tcmpeq\tr8, 1, r4\n\tcmpeq\tr8, 2, r5\n\tbne\tr4, $Lb01\n\tbne\tr5, $Lb10\n\tbeq\tr8, $Lb00\n\n$Lb11:\tldq\tu3, 8(up)\n\tlda\tup, -24(up)\n\tlda\trp, -24(rp)\n\tmulq\tr28, xAAAAAAAAAAAAAAAB, q0\n\tmov\tr28, u2\n\tbr\tr31, $L11\n\n$Lb00:\tldq\tu2, 8(up)\n\tlda\tup, -16(up)\n\tlda\trp, -16(rp)\n\tmulq\tr28, xAAAAAAAAAAAAAAAB, q1\n\tmov\tr28, u1\n\tbr\tr31, $L00\n\n$Lb01:\tlda\trp, -8(rp)\n\tmulq\tr28, xAAAAAAAAAAAAAAAB, q0\n\tmov\tr28, u0\n\tblt\tn, $Lcj1\n\tldq\tu1, 8(up)\n\tlda\tup, -8(up)\n\tbr\tr31, $L01\n\n$Lb10:\tldq\tu0, 8(up)\n\tmulq\tr28, xAAAAAAAAAAAAAAAB, q1\n\tmov\tr28, u3\n\tblt\tn, $Lend\n\n\tALIGN(16)\n$Ltop:\nC 0\n\tcmpult\tu3, cy, cy\t\t\tC L0\n\tmulq\tu0, xAAAAAAAAAAAAAAAB, q0\tC U1\n\tldq\tu1, 16(up)\t\t\tC L1\n\taddq\tq1, l0, x\t\t\tC U0\nC 1\n\tnegq\tcy, cymask\t\t\tC L0\n\tunop\t\t\t\t\tC U1\n\tunop\t\t\t\t\tC L1\n\tcmpult\tx5555555555555555, x, p6\tC U0\nC 2\n\tcmpult\txAAAAAAAAAAAAAAAA, x, p7\tC U1\n\tunop\n\tunop\n\tnegq\tp6, t0\t\t\t\tC L0\nC 3\n\tnegq\tp7, t1\t\t\t\tC L0\n\tand\tcymask, x5555555555555555, l0\tC U1\n\taddq\tp6, cy, cy\n\tand\tt0, x5555555555555555, t0\nC 4\n\tand\tt1, x5555555555555555, t1\n\taddq\tp7, cy, cy\n\tunop\n\taddq\tt0, l0, l0\nC 5\n\taddq\tt1, l0, l0\n\tunop\n\tstq\tx, 0(rp)\t\t\tC L1\n\tunop\n$L01:\nC 0\n\tcmpult\tu0, cy, cy\t\t\tC L0\n\tmulq\tu1, xAAAAAAAAAAAAAAAB, q1\tC U1\n\tldq\tu2, 24(up)\t\t\tC L1\n\taddq\tq0, l0, x\t\t\tC U0\nC 1\n\tnegq\tcy, cymask\t\t\tC L0\n\tunop\t\t\t\t\tC U1\n\tunop\t\t\t\t\tC L1\n\tcmpult\tx5555555555555555, x, p6\tC U0\nC 2\n\tcmpult\txAAAAAAAAAAAAAAAA, x, p7\tC U1\n\tunop\n\tunop\n\tnegq\tp6, t0\t\t\t\tC L0\nC 3\n\tnegq\tp7, t1\t\t\t\tC L0\n\tand\tcymask, x5555555555555555, l0\tC U1\n\taddq\tp6, cy, cy\n\tand\tt0, x5555555555555555, t0\nC 4\n\tand\tt1, x5555555555555555, t1\n\taddq\tp7, cy, cy\n\tunop\n\taddq\tt0, l0, l0\nC 5\n\taddq\tt1, l0, l0\n\tunop\n\tstq\tx, 8(rp)\t\t\tC L1\n\tunop\n$L00:\nC 0\n\tcmpult\tu1, cy, cy\t\t\tC L0\n\tmulq\tu2, xAAAAAAAAAAAAAAAB, q0\tC U1\n\tldq\tu3, 32(up)\t\t\tC L1\n\taddq\tq1, l0, x\t\t\tC U0\nC 1\n\tnegq\tcy, cymask\t\t\tC L0\n\tunop\t\t\t\t\tC U1\n\tunop\t\t\t\t\tC L1\n\tcmpult\tx5555555555555555, x, p6\tC U0\nC 2\n\tcmpult\txAAAAAAAAAAAAAAAA, x, p7\tC U1\n\tunop\n\tunop\n\tnegq\tp6, t0\t\t\t\tC L0\nC 3\n\tnegq\tp7, t1\t\t\t\tC L0\n\tand\tcymask, x5555555555555555, l0\tC U1\n\taddq\tp6, cy, cy\n\tand\tt0, x5555555555555555, t0\nC 4\n\tand\tt1, x5555555555555555, t1\n\taddq\tp7, cy, cy\n\tunop\n\taddq\tt0, l0, l0\nC 5\n\taddq\tt1, l0, l0\n\tunop\n\tstq\tx, 16(rp)\t\t\tC L1\n\tunop\n$L11:\nC 0\n\tcmpult\tu2, cy, cy\t\t\tC L0\n\tmulq\tu3, xAAAAAAAAAAAAAAAB, q1\tC U1\n\tldq\tu0, 40(up)\t\t\tC L1\n\taddq\tq0, l0, x\t\t\tC U0\nC 1\n\tnegq\tcy, cymask\t\t\tC L0\n\tunop\t\t\t\t\tC U1\n\tunop\t\t\t\t\tC L1\n\tcmpult\tx5555555555555555, x, p6\tC U0\nC 2\n\tcmpult\txAAAAAAAAAAAAAAAA, x, p7\tC U1\n\tlda\tn, -4(n)\t\t\tC L1 bookkeeping\n\tunop\n\tnegq\tp6, t0\t\t\t\tC L0\nC 3\n\tnegq\tp7, t1\t\t\t\tC L0\n\tand\tcymask, x5555555555555555, l0\tC U1\n\taddq\tp6, cy, cy\n\tand\tt0, x5555555555555555, t0\nC 4\n\tand\tt1, x5555555555555555, t1\n\taddq\tp7, cy, cy\n\tunop\n\taddq\tt0, l0, l0\nC 5\n\taddq\tt1, l0, l0\n\tunop\n\tstq\tx, 24(rp)\t\t\tC L1\n\tlda\tup, 32(up)\nC\n\tldl\tr31, 256(up)\t\t\tC prefetch\n\tunop\n\tlda\trp, 32(rp)\n\tbge\tn, $Ltop\t\t\tC U1\nC *** MAIN LOOP END ***\n$Lend:\n\n\tcmpult\tu3, cy, cy\t\t\tC L0\n\tmulq\tu0, xAAAAAAAAAAAAAAAB, q0\tC U1\n\tunop\n\taddq\tq1, l0, x\t\t\tC U0\nC 1\n\tnegq\tcy, cymask\t\t\tC L0\n\tunop\t\t\t\t\tC U1\n\tunop\t\t\t\t\tC L1\n\tcmpult\tx5555555555555555, x, p6\tC U0\nC 2\n\tcmpult\txAAAAAAAAAAAAAAAA, x, p7\tC U1\n\tunop\n\tunop\n\tnegq\tp6, t0\t\t\t\tC L0\nC 3\n\tnegq\tp7, t1\t\t\t\tC L0\n\tand\tcymask, x5555555555555555, l0\tC U1\n\taddq\tp6, cy, cy\n\tand\tt0, x5555555555555555, t0\nC 4\n\tand\tt1, x5555555555555555, t1\n\taddq\tp7, cy, cy\n\tunop\n\taddq\tt0, l0, l0\nC 5\n\taddq\tt1, l0, l0\n\tunop\n\tstq\tx, 0(rp)\t\t\tC L1\n\tunop\n$Lcj1:\n\tcmpult\tu0, cy, cy\t\t\tC L0\n\taddq\tq0, l0, x\t\t\tC U0\n\tcmpult\tx5555555555555555, x, p6\tC U0\n\tcmpult\txAAAAAAAAAAAAAAAA, x, p7\tC U1\n\taddq\tp6, cy, cy\n\taddq\tp7, cy, r0\n\tstq\tx, 8(rp)\t\t\tC L1\n\n\tret\tr31,(r26),1\nEPILOGUE()\nASM_END()\n\nC This is useful for playing with various schedules.\nC Expand as: one(0)one(1)one(2)one(3)\ndefine(`one',`\nC 0\n\tcmpult\t`$'eval(($1+3)%4), cy, cy\t\tC L0\n\tmulq\t`$'$1, xAAAAAAAAAAAAAAAB, `$'eval(4+$1%2) C U1\n\tldq\t`$'eval(($1+1)%4), eval($1*8+16)(up)\tC L1\n\taddq\t`$'eval(4+($1+1)%2), l0, x\t\tC U0\nC 1\n\tnegq\tcy, cymask\t\t\t\tC L0\n\tunop\t\t\t\t\t\tC U1\n\tunop\t\t\t\t\t\tC L1\n\tcmpult\tx5555555555555555, x, p6\t\tC U0\nC 2\n\tcmpult\txAAAAAAAAAAAAAAAA, x, p7\t\tC U1\n\tunop\n\tunop\n\tnegq\tp6, t0\t\t\t\t\tC L0\nC 3\n\tnegq\tp7, t1\t\t\t\t\tC L0\n\tand\tcymask, x5555555555555555, l0\t\tC U1\n\taddq\tp6, cy, cy\n\tand\tt0, x5555555555555555, t0\nC 4\n\tand\tt1, x5555555555555555, t1\n\taddq\tp7, cy, cy\n\tunop\n\taddq\tt0, l0, l0\nC 5\n\taddq\tt1, l0, l0\n\tunop\n\tstq\tx, eval($1*8)(rp)\t\t\tC L1\n\tunop\n')\n"
  },
  {
    "path": "mpn/alpha/ev5/add_n.asm",
    "content": "dnl  Alpha EV5 mpn_add_n -- Add two limb vectors of the same length > 0 and\ndnl  store sum in a third limb vector.\n\ndnl  Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     ?\nC EV5:     4.75\nC EV6:     3\n\ndnl  INPUT PARAMETERS\ndnl  res_ptr\tr16\ndnl  s1_ptr\tr17\ndnl  s2_ptr\tr18\ndnl  size\tr19\n\nASM_START()\nPROLOGUE(mpn_add_n)\n\tbis\tr31,r31,r25\t\tC clear cy\n\tsubq\tr19,4,r19\t\tC decr loop cnt\n\tblt\tr19,$Lend2\t\tC if less than 4 limbs, goto 2nd loop\nC Start software pipeline for 1st loop\n\tldq\tr0,0(r18)\n\tldq\tr4,0(r17)\n\tldq\tr1,8(r18)\n\tldq\tr5,8(r17)\n\taddq\tr17,32,r17\t\tC update s1_ptr\n\tldq\tr2,16(r18)\n\taddq\tr0,r4,r20\t\tC 1st main add\n\tldq\tr3,24(r18)\n\tsubq\tr19,4,r19\t\tC decr loop cnt\n\tldq\tr6,-16(r17)\n\tcmpult\tr20,r0,r25\t\tC compute cy from last add\n\tldq\tr7,-8(r17)\n\taddq\tr1,r5,r28\t\tC 2nd main add\n\taddq\tr18,32,r18\t\tC update s2_ptr\n\taddq\tr28,r25,r21\t\tC 2nd carry add\n\tcmpult\tr28,r5,r8\t\tC compute cy from last add\n\tblt\tr19,$Lend1\t\tC if less than 4 limbs remain, jump\nC 1st loop handles groups of 4 limbs in a software pipeline\n\tALIGN(16)\n$Loop:\tcmpult\tr21,r28,r25\t\tC compute cy from last add\n\tldq\tr0,0(r18)\n\tbis\tr8,r25,r25\t\tC combine cy from the two adds\n\tldq\tr1,8(r18)\n\taddq\tr2,r6,r28\t\tC 3rd main add\n\tldq\tr4,0(r17)\n\taddq\tr28,r25,r22\t\tC 3rd carry add\n\tldq\tr5,8(r17)\n\tcmpult\tr28,r6,r8\t\tC compute cy from last add\n\tcmpult\tr22,r28,r25\t\tC compute cy from last add\n\tstq\tr20,0(r16)\n\tbis\tr8,r25,r25\t\tC combine cy from the two adds\n\tstq\tr21,8(r16)\n\taddq\tr3,r7,r28\t\tC 4th main add\n\taddq\tr28,r25,r23\t\tC 4th carry add\n\tcmpult\tr28,r7,r8\t\tC compute cy from last add\n\tcmpult\tr23,r28,r25\t\tC compute cy from last add\n\t\taddq\tr17,32,r17\t\tC update s1_ptr\n\tbis\tr8,r25,r25\t\tC combine cy from the two adds\n\t\taddq\tr16,32,r16\t\tC update res_ptr\n\taddq\tr0,r4,r28\t\tC 1st main add\n\tldq\tr2,16(r18)\n\taddq\tr25,r28,r20\t\tC 1st carry add\n\tldq\tr3,24(r18)\n\tcmpult\tr28,r4,r8\t\tC compute cy from last add\n\tldq\tr6,-16(r17)\n\tcmpult\tr20,r28,r25\t\tC compute cy from last add\n\tldq\tr7,-8(r17)\n\tbis\tr8,r25,r25\t\tC combine cy from the two adds\n\tsubq\tr19,4,r19\t\tC decr loop cnt\n\tstq\tr22,-16(r16)\n\taddq\tr1,r5,r28\t\tC 2nd main add\n\tstq\tr23,-8(r16)\n\taddq\tr25,r28,r21\t\tC 2nd carry add\n\t\taddq\tr18,32,r18\t\tC update s2_ptr\n\tcmpult\tr28,r5,r8\t\tC compute cy from last add\n\tbge\tr19,$Loop\nC Finish software pipeline for 1st loop\n$Lend1:\tcmpult\tr21,r28,r25\t\tC compute cy from last add\n\tbis\tr8,r25,r25\t\tC combine cy from the two adds\n\taddq\tr2,r6,r28\t\tC 3rd main add\n\taddq\tr28,r25,r22\t\tC 3rd carry add\n\tcmpult\tr28,r6,r8\t\tC compute cy from last add\n\tcmpult\tr22,r28,r25\t\tC compute cy from last add\n\tstq\tr20,0(r16)\n\tbis\tr8,r25,r25\t\tC combine cy from the two adds\n\tstq\tr21,8(r16)\n\taddq\tr3,r7,r28\t\tC 4th main add\n\taddq\tr28,r25,r23\t\tC 4th carry add\n\tcmpult\tr28,r7,r8\t\tC compute cy from last add\n\tcmpult\tr23,r28,r25\t\tC compute cy from last add\n\tbis\tr8,r25,r25\t\tC combine cy from the two adds\n\taddq\tr16,32,r16\t\tC update res_ptr\n\tstq\tr22,-16(r16)\n\tstq\tr23,-8(r16)\n$Lend2:\taddq\tr19,4,r19\t\tC restore loop cnt\n\tbeq\tr19,$Lret\nC Start software pipeline for 2nd loop\n\tldq\tr0,0(r18)\n\tldq\tr4,0(r17)\n\tsubq\tr19,1,r19\n\tbeq\tr19,$Lend0\nC 2nd loop handles remaining 1-3 limbs\n\tALIGN(16)\n$Loop0:\taddq\tr0,r4,r28\t\tC main add\n\tldq\tr0,8(r18)\n\tcmpult\tr28,r4,r8\t\tC compute cy from last add\n\tldq\tr4,8(r17)\n\taddq\tr28,r25,r20\t\tC carry add\n\taddq\tr18,8,r18\n\taddq\tr17,8,r17\n\tstq\tr20,0(r16)\n\tcmpult\tr20,r28,r25\t\tC compute cy from last add\n\tsubq\tr19,1,r19\t\tC decr loop cnt\n\tbis\tr8,r25,r25\t\tC combine cy from the two adds\n\taddq\tr16,8,r16\n\tbne\tr19,$Loop0\n$Lend0:\taddq\tr0,r4,r28\t\tC main add\n\taddq\tr28,r25,r20\t\tC carry add\n\tcmpult\tr28,r4,r8\t\tC compute cy from last add\n\tcmpult\tr20,r28,r25\t\tC compute cy from last add\n\tstq\tr20,0(r16)\n\tbis\tr8,r25,r25\t\tC combine cy from the two adds\n\n$Lret:\tbis\tr25,r31,r0\t\tC return cy\n\tret\tr31,(r26),1\nEPILOGUE(mpn_add_n)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/ev5/com_n.asm",
    "content": "dnl  Alpha EV5 mpn_com_n -- mpn one's complement.\n\ndnl  Copyright 2003 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC      cycles/limb\nC EV4:    4.75\nC EV5:    2.0\nC EV6:    1.5\n\n\nC mp_limb_t mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);\nC\nC For ev5 the main loop is 7 cycles plus 1 taken branch bubble, for a total\nC 2.0 c/l.  In general, a pattern like this unrolled to N limbs per loop\nC will be 1.5+2/N c/l.\nC\nC 2 cycles of loop control are unavoidable, for pointer updates and the\nC taken branch bubble, but also since ldq cannot issue two cycles after stq\nC (and with a run of stqs that means neither of two cycles at the end of the\nC loop.\nC\nC The fbeq is forced into the second cycle of the loop using unops, since\nC the first time through it must wait for the cvtqt result.  Once that\nC result is ready (a 1 cycle stall) then both the branch and following loads\nC can issue together.\nC\nC The main loop handles an odd count of limbs, being two limbs loaded before\nC each size test, plus one pipelined around from the previous iteration (or\nC setup in the entry sequence).\nC\nC An even number of limbs is handled by an explicit dst[0]=~src[0] in the\nC entry sequence, and an increment of the pointers.  For an odd size there's\nC no increment and the first store in the loop (r24) is a repeat of dst[0].\nC\nC Note that the load for r24 after the possible pointer increment is done\nC before the explicit store to dst[0], in case src==dst.\n\n\nASM_START()\n\nFLOAT64(L(dat), 2.0)\n\n\tALIGN(16)\n\nPROLOGUE(mpn_com_n,gp)\n\n\tC r16\tdst\n\tC r17\tsrc\n\tC r18\tsize\n\n\tlda\tr30, -16(r30)\t\tC temporary stack space\n\tlda\tr7, -3(r18)\t\tC size - 3\n\n\tldq\tr20, 0(r17)\t\tC src[0]\n\tsrl\tr7, 1, r6\t\tC (size-3)/2\n\n\tstq\tr6, 8(r30)\t\tC (size-3)/2\n\tand\tr7, 1, r5\t\tC 1 if size even\n\n\tLEA(\tr8, L(dat))\n\ts8addq\tr5, r17, r17\t\tC skip src[0] if even\n\n\tornot\tr31, r20, r20\t\tC ~src[0]\n\tunop\n\n\tldt\tf0, 8(r30)\t\tC (size-3)/2\n\tldq\tr24, 0(r17)\t\tC src[0 or 1]\n\n\tstq\tr20, 0(r16)\t\tC dst[0]\n\ts8addq\tr5, r16, r19\t\tC skip dst[0] if even\n\n\tldt\tf1, 0(r8)\t\tC data 2.0\n\tlda\tr30, 16(r30)\t\tC restore stack\n\tunop\n\tcvtqt\tf0, f0\t\t\tC (size-3)/2 as float\n\n\tornot\tr31, r24, r24\n\tblt\tr7, L(done_1)\t\tC if size<=2\n\tunop\n\tunop\n\n\n\tC 16-byte alignment here\nL(top):\n\tC r17\tsrc, incrementing\n\tC r19\tdst, incrementing\n\tC r24\tdst[i] result, ready to store\n\tC f0\t(size-3)/2, decrementing\n\tC f1\t2.0\n\n\tldq\tr20, 8(r17)\t\tC src[i+1]\n\tldq\tr21, 16(r17)\t\tC src[i+2]\n\tunop\n\tunop\n\n\tfbeq\tf0, L(done_2)\n\tunop\n\tldq\tr22, 24(r17)\t\tC src[i+3]\n\tldq\tr23, 32(r17)\t\tC src[i+4]\n\n\tstq\tr24, 0(r19)\t\tC dst[i]\n\tornot\tr31, r20, r20\n\tsubt\tf0, f1, f0\t\tC count -= 2\n\tunop\n\n\tstq\tr20, 8(r19)\t\tC dst[i+1]\n\tornot\tr31, r21, r21\n\tunop\n\tunop\n\n\tstq\tr21, 16(r19)\t\tC dst[i+2]\n\tornot\tr31, r22, r22\n\n\tstq\tr22, 24(r19)\t\tC dst[i+3]\n\tornot\tr31, r23, r24\n\n\tlda\tr17, 32(r17)\t\tC src += 4\n\tlda\tr19, 32(r19)\t\tC dst += 4\n\tunop\n\tfbge\tf0, L(top)\n\n\nL(done_1):\n\tC r19\t&dst[size-1]\n\tC r24\tresult for dst[size-1]\n\n\tstq\tr24, 0(r19)\t\tC dst[size-1]\n\tret\tr31, (r26), 1\n\n\nL(done_2):\n\tC r19\t&dst[size-3]\n\tC r20\tsrc[size-2]\n\tC r21\tsrc[size-1]\n\tC r24\tresult for dst[size-3]\n\n\tstq\tr24, 0(r19)\t\tC dst[size-3]\n\tornot\tr31, r20, r20\n\n\tstq\tr20, 8(r19)\t\tC dst[size-2]\n\tornot\tr31, r21, r21\n\n\tstq\tr21, 16(r19)\t\tC dst[size-1]\n\tret\tr31, (r26), 1\n\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/ev5/lshift.asm",
    "content": "dnl  Alpha EV5 mpn_lshift -- Shift a number left.\n\ndnl  Copyright 1994, 1995, 2000, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     ?\nC EV5:     3.25\nC EV6:     1.75\n\nC  INPUT PARAMETERS\nC  rp\tr16\nC  up\tr17\nC  n\tr18\nC  cnt\tr19\n\n\nASM_START()\nPROLOGUE(mpn_lshift)\n\ts8addq\tr18,r17,r17\tC make r17 point at end of s1\n\tldq\tr4,-8(r17)\tC load first limb\n\tsubq\tr31,r19,r20\n\ts8addq\tr18,r16,r16\tC make r16 point at end of RES\n\tsubq\tr18,1,r18\n\tand\tr18,4-1,r28\tC number of limbs in first loop\n\tsrl\tr4,r20,r0\tC compute function result\n\n\tbeq\tr28,$L0\n\tsubq\tr18,r28,r18\n\n\tALIGN(8)\n$Loop0:\tldq\tr3,-16(r17)\n\tsubq\tr16,8,r16\n\tsll\tr4,r19,r5\n\tsubq\tr17,8,r17\n\tsubq\tr28,1,r28\n\tsrl\tr3,r20,r6\n\tbis\tr3,r3,r4\n\tbis\tr5,r6,r8\n\tstq\tr8,0(r16)\n\tbne\tr28,$Loop0\n\n$L0:\tsll\tr4,r19,r24\n\tbeq\tr18,$Lend\nC warm up phase 1\n\tldq\tr1,-16(r17)\n\tsubq\tr18,4,r18\n\tldq\tr2,-24(r17)\n\tldq\tr3,-32(r17)\n\tldq\tr4,-40(r17)\n\tbeq\tr18,$Lend1\nC warm up phase 2\n\tsrl\tr1,r20,r7\n\tsll\tr1,r19,r21\n\tsrl\tr2,r20,r8\n\tldq\tr1,-48(r17)\n\tsll\tr2,r19,r22\n\tldq\tr2,-56(r17)\n\tsrl\tr3,r20,r5\n\tbis\tr7,r24,r7\n\tsll\tr3,r19,r23\n\tbis\tr8,r21,r8\n\tsrl\tr4,r20,r6\n\tldq\tr3,-64(r17)\n\tsll\tr4,r19,r24\n\tldq\tr4,-72(r17)\n\tsubq\tr18,4,r18\n\tbeq\tr18,$Lend2\n\tALIGN(16)\nC main loop\n$Loop:\tstq\tr7,-8(r16)\n\tbis\tr5,r22,r5\n\tstq\tr8,-16(r16)\n\tbis\tr6,r23,r6\n\n\tsrl\tr1,r20,r7\n\tsubq\tr18,4,r18\n\tsll\tr1,r19,r21\n\tunop\tC ldq\tr31,-96(r17)\n\n\tsrl\tr2,r20,r8\n\tldq\tr1,-80(r17)\n\tsll\tr2,r19,r22\n\tldq\tr2,-88(r17)\n\n\tstq\tr5,-24(r16)\n\tbis\tr7,r24,r7\n\tstq\tr6,-32(r16)\n\tbis\tr8,r21,r8\n\n\tsrl\tr3,r20,r5\n\tunop\tC ldq\tr31,-96(r17)\n\tsll\tr3,r19,r23\n\tsubq\tr16,32,r16\n\n\tsrl\tr4,r20,r6\n\tldq\tr3,-96(r17)\n\tsll\tr4,r19,r24\n\tldq\tr4,-104(r17)\n\n\tsubq\tr17,32,r17\n\tbne\tr18,$Loop\nC cool down phase 2/1\n$Lend2:\tstq\tr7,-8(r16)\n\tbis\tr5,r22,r5\n\tstq\tr8,-16(r16)\n\tbis\tr6,r23,r6\n\tsrl\tr1,r20,r7\n\tsll\tr1,r19,r21\n\tsrl\tr2,r20,r8\n\tsll\tr2,r19,r22\n\tstq\tr5,-24(r16)\n\tbis\tr7,r24,r7\n\tstq\tr6,-32(r16)\n\tbis\tr8,r21,r8\n\tsrl\tr3,r20,r5\n\tsll\tr3,r19,r23\n\tsrl\tr4,r20,r6\n\tsll\tr4,r19,r24\nC cool down phase 2/2\n\tstq\tr7,-40(r16)\n\tbis\tr5,r22,r5\n\tstq\tr8,-48(r16)\n\tbis\tr6,r23,r6\n\tstq\tr5,-56(r16)\n\tstq\tr6,-64(r16)\nC cool down phase 2/3\n\tstq\tr24,-72(r16)\n\tret\tr31,(r26),1\n\nC cool down phase 1/1\n$Lend1:\tsrl\tr1,r20,r7\n\tsll\tr1,r19,r21\n\tsrl\tr2,r20,r8\n\tsll\tr2,r19,r22\n\tsrl\tr3,r20,r5\n\tbis\tr7,r24,r7\n\tsll\tr3,r19,r23\n\tbis\tr8,r21,r8\n\tsrl\tr4,r20,r6\n\tsll\tr4,r19,r24\nC cool down phase 1/2\n\tstq\tr7,-8(r16)\n\tbis\tr5,r22,r5\n\tstq\tr8,-16(r16)\n\tbis\tr6,r23,r6\n\tstq\tr5,-24(r16)\n\tstq\tr6,-32(r16)\n\tstq\tr24,-40(r16)\n\tret\tr31,(r26),1\n\n$Lend:\tstq\tr24,-8(r16)\n\tret\tr31,(r26),1\nEPILOGUE(mpn_lshift)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/ev5/rshift.asm",
    "content": "dnl  Alpha EV5 mpn_rshift -- Shift a number right.\n\ndnl  Copyright 1994, 1995, 2000 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     ?\nC EV5:     3.25\nC EV6:     1.75\n\nC  INPUT PARAMETERS\nC  rp\tr16\nC  up\tr17\nC  n\tr18\nC  cnt\tr19\n\n\nASM_START()\nPROLOGUE(mpn_rshift)\n\tldq\tr4,0(r17)\tC load first limb\n\tsubq\tr31,r19,r20\n\tsubq\tr18,1,r18\n\tand\tr18,4-1,r28\tC number of limbs in first loop\n\tsll\tr4,r20,r0\tC compute function result\n\n\tbeq\tr28,$L0\n\tsubq\tr18,r28,r18\n\n\tALIGN(8)\n$Loop0:\tldq\tr3,8(r17)\n\taddq\tr16,8,r16\n\tsrl\tr4,r19,r5\n\taddq\tr17,8,r17\n\tsubq\tr28,1,r28\n\tsll\tr3,r20,r6\n\tbis\tr3,r3,r4\n\tbis\tr5,r6,r8\n\tstq\tr8,-8(r16)\n\tbne\tr28,$Loop0\n\n$L0:\tsrl\tr4,r19,r24\n\tbeq\tr18,$Lend\nC warm up phase 1\n\tldq\tr1,8(r17)\n\tsubq\tr18,4,r18\n\tldq\tr2,16(r17)\n\tldq\tr3,24(r17)\n\tldq\tr4,32(r17)\n\tbeq\tr18,$Lend1\nC warm up phase 2\n\tsll\tr1,r20,r7\n\tsrl\tr1,r19,r21\n\tsll\tr2,r20,r8\n\tldq\tr1,40(r17)\n\tsrl\tr2,r19,r22\n\tldq\tr2,48(r17)\n\tsll\tr3,r20,r5\n\tbis\tr7,r24,r7\n\tsrl\tr3,r19,r23\n\tbis\tr8,r21,r8\n\tsll\tr4,r20,r6\n\tldq\tr3,56(r17)\n\tsrl\tr4,r19,r24\n\tldq\tr4,64(r17)\n\tsubq\tr18,4,r18\n\tbeq\tr18,$Lend2\n\tALIGN(16)\nC main loop\n$Loop:\tstq\tr7,0(r16)\n\tbis\tr5,r22,r5\n\tstq\tr8,8(r16)\n\tbis\tr6,r23,r6\n\n\tsll\tr1,r20,r7\n\tsubq\tr18,4,r18\n\tsrl\tr1,r19,r21\n\tunop\tC ldq\tr31,-96(r17)\n\n\tsll\tr2,r20,r8\n\tldq\tr1,72(r17)\n\tsrl\tr2,r19,r22\n\tldq\tr2,80(r17)\n\n\tstq\tr5,16(r16)\n\tbis\tr7,r24,r7\n\tstq\tr6,24(r16)\n\tbis\tr8,r21,r8\n\n\tsll\tr3,r20,r5\n\tunop\tC ldq\tr31,-96(r17)\n\tsrl\tr3,r19,r23\n\taddq\tr16,32,r16\n\n\tsll\tr4,r20,r6\n\tldq\tr3,88(r17)\n\tsrl\tr4,r19,r24\n\tldq\tr4,96(r17)\n\n\taddq\tr17,32,r17\n\tbne\tr18,$Loop\nC cool down phase 2/1\n$Lend2:\tstq\tr7,0(r16)\n\tbis\tr5,r22,r5\n\tstq\tr8,8(r16)\n\tbis\tr6,r23,r6\n\tsll\tr1,r20,r7\n\tsrl\tr1,r19,r21\n\tsll\tr2,r20,r8\n\tsrl\tr2,r19,r22\n\tstq\tr5,16(r16)\n\tbis\tr7,r24,r7\n\tstq\tr6,24(r16)\n\tbis\tr8,r21,r8\n\tsll\tr3,r20,r5\n\tsrl\tr3,r19,r23\n\tsll\tr4,r20,r6\n\tsrl\tr4,r19,r24\nC cool down phase 2/2\n\tstq\tr7,32(r16)\n\tbis\tr5,r22,r5\n\tstq\tr8,40(r16)\n\tbis\tr6,r23,r6\n\tstq\tr5,48(r16)\n\tstq\tr6,56(r16)\nC cool down phase 2/3\n\tstq\tr24,64(r16)\n\tret\tr31,(r26),1\n\nC cool down phase 1/1\n$Lend1:\tsll\tr1,r20,r7\n\tsrl\tr1,r19,r21\n\tsll\tr2,r20,r8\n\tsrl\tr2,r19,r22\n\tsll\tr3,r20,r5\n\tbis\tr7,r24,r7\n\tsrl\tr3,r19,r23\n\tbis\tr8,r21,r8\n\tsll\tr4,r20,r6\n\tsrl\tr4,r19,r24\nC cool down phase 1/2\n\tstq\tr7,0(r16)\n\tbis\tr5,r22,r5\n\tstq\tr8,8(r16)\n\tbis\tr6,r23,r6\n\tstq\tr5,16(r16)\n\tstq\tr6,24(r16)\n\tstq\tr24,32(r16)\n\tret\tr31,(r26),1\n\n$Lend:\tstq\tr24,0(r16)\n\tret\tr31,(r26),1\nEPILOGUE(mpn_rshift)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/ev5/sub_n.asm",
    "content": "dnl  Alpha EV5 mpn_sub_n -- Subtract two limb vectors of the same length > 0\ndnl  and store difference in a third limb vector.\n\ndnl  Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     ?\nC EV5:     4.75\nC EV6:     3\n\ndnl  INPUT PARAMETERS\ndnl  res_ptr\tr16\ndnl  s1_ptr\tr17\ndnl  s2_ptr\tr18\ndnl  size\tr19\n\nASM_START()\nPROLOGUE(mpn_sub_n)\n\tbis\tr31,r31,r25\t\tC clear cy\n\tsubq\tr19,4,r19\t\tC decr loop cnt\n\tblt\tr19,$Lend2\t\tC if less than 4 limbs, goto 2nd loop\nC Start software pipeline for 1st loop\n\tldq\tr0,0(r18)\n\tldq\tr4,0(r17)\n\tldq\tr1,8(r18)\n\tldq\tr5,8(r17)\n\taddq\tr17,32,r17\t\tC update s1_ptr\n\tldq\tr2,16(r18)\n\tsubq\tr4,r0,r20\t\tC 1st main subtract\n\tldq\tr3,24(r18)\n\tsubq\tr19,4,r19\t\tC decr loop cnt\n\tldq\tr6,-16(r17)\n\tcmpult\tr4,r0,r25\t\tC compute cy from last subtract\n\tldq\tr7,-8(r17)\n\tsubq\tr5,r1,r28\t\tC 2nd main subtract\n\taddq\tr18,32,r18\t\tC update s2_ptr\n\tsubq\tr28,r25,r21\t\tC 2nd carry subtract\n\tcmpult\tr5,r1,r8\t\tC compute cy from last subtract\n\tblt\tr19,$Lend1\t\tC if less than 4 limbs remain, jump\nC 1st loop handles groups of 4 limbs in a software pipeline\n\tALIGN(16)\n$Loop:\tcmpult\tr28,r25,r25\t\tC compute cy from last subtract\n\tldq\tr0,0(r18)\n\tbis\tr8,r25,r25\t\tC combine cy from the two subtracts\n\tldq\tr1,8(r18)\n\tsubq\tr6,r2,r28\t\tC 3rd main subtract\n\tldq\tr4,0(r17)\n\tsubq\tr28,r25,r22\t\tC 3rd carry subtract\n\tldq\tr5,8(r17)\n\tcmpult\tr6,r2,r8\t\tC compute cy from last subtract\n\tcmpult\tr28,r25,r25\t\tC compute cy from last subtract\n\tstq\tr20,0(r16)\n\tbis\tr8,r25,r25\t\tC combine cy from the two subtracts\n\tstq\tr21,8(r16)\n\tsubq\tr7,r3,r28\t\tC 4th main subtract\n\tsubq\tr28,r25,r23\t\tC 4th carry subtract\n\tcmpult\tr7,r3,r8\t\tC compute cy from last subtract\n\tcmpult\tr28,r25,r25\t\tC compute cy from last subtract\n\t\taddq\tr17,32,r17\t\tC update s1_ptr\n\tbis\tr8,r25,r25\t\tC combine cy from the two subtracts\n\t\taddq\tr16,32,r16\t\tC update res_ptr\n\tsubq\tr4,r0,r28\t\tC 1st main subtract\n\tldq\tr2,16(r18)\n\tsubq\tr28,r25,r20\t\tC 1st carry subtract\n\tldq\tr3,24(r18)\n\tcmpult\tr4,r0,r8\t\tC compute cy from last subtract\n\tldq\tr6,-16(r17)\n\tcmpult\tr28,r25,r25\t\tC compute cy from last subtract\n\tldq\tr7,-8(r17)\n\tbis\tr8,r25,r25\t\tC combine cy from the two subtracts\n\tsubq\tr19,4,r19\t\tC decr loop cnt\n\tstq\tr22,-16(r16)\n\tsubq\tr5,r1,r28\t\tC 2nd main subtract\n\tstq\tr23,-8(r16)\n\tsubq\tr28,r25,r21\t\tC 2nd carry subtract\n\t\taddq\tr18,32,r18\t\tC update s2_ptr\n\tcmpult\tr5,r1,r8\t\tC compute cy from last subtract\n\tbge\tr19,$Loop\nC Finish software pipeline for 1st loop\n$Lend1:\tcmpult\tr28,r25,r25\t\tC compute cy from last subtract\n\tbis\tr8,r25,r25\t\tC combine cy from the two subtracts\n\tsubq\tr6,r2,r28\t\tC cy add\n\tsubq\tr28,r25,r22\t\tC 3rd main subtract\n\tcmpult\tr6,r2,r8\t\tC compute cy from last subtract\n\tcmpult\tr28,r25,r25\t\tC compute cy from last subtract\n\tstq\tr20,0(r16)\n\tbis\tr8,r25,r25\t\tC combine cy from the two subtracts\n\tstq\tr21,8(r16)\n\tsubq\tr7,r3,r28\t\tC cy add\n\tsubq\tr28,r25,r23\t\tC 4th main subtract\n\tcmpult\tr7,r3,r8\t\tC compute cy from last subtract\n\tcmpult\tr28,r25,r25\t\tC compute cy from last subtract\n\tbis\tr8,r25,r25\t\tC combine cy from the two subtracts\n\taddq\tr16,32,r16\t\tC update res_ptr\n\tstq\tr22,-16(r16)\n\tstq\tr23,-8(r16)\n$Lend2:\taddq\tr19,4,r19\t\tC restore loop cnt\n\tbeq\tr19,$Lret\nC Start software pipeline for 2nd loop\n\tldq\tr0,0(r18)\n\tldq\tr4,0(r17)\n\tsubq\tr19,1,r19\n\tbeq\tr19,$Lend0\nC 2nd loop handles remaining 1-3 limbs\n\tALIGN(16)\n$Loop0:\tsubq\tr4,r0,r28\t\tC main subtract\n\tcmpult\tr4,r0,r8\t\tC compute cy from last subtract\n\tldq\tr0,8(r18)\n\tldq\tr4,8(r17)\n\tsubq\tr28,r25,r20\t\tC carry subtract\n\taddq\tr18,8,r18\n\taddq\tr17,8,r17\n\tstq\tr20,0(r16)\n\tcmpult\tr28,r25,r25\t\tC compute cy from last subtract\n\tsubq\tr19,1,r19\t\tC decr loop cnt\n\tbis\tr8,r25,r25\t\tC combine cy from the two subtracts\n\taddq\tr16,8,r16\n\tbne\tr19,$Loop0\n$Lend0:\tsubq\tr4,r0,r28\t\tC main subtract\n\tsubq\tr28,r25,r20\t\tC carry subtract\n\tcmpult\tr4,r0,r8\t\tC compute cy from last subtract\n\tcmpult\tr28,r25,r25\t\tC compute cy from last subtract\n\tstq\tr20,0(r16)\n\tbis\tr8,r25,r25\t\tC combine cy from the two subtracts\n\n$Lret:\tbis\tr25,r31,r0\t\tC return cy\n\tret\tr31,(r26),1\nEPILOGUE(mpn_sub_n)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/ev6/add_n.asm",
    "content": "dnl  Alpha ev6 mpn_add_n -- Add two limb vectors of the same length > 0 and\ndnl  store sum in a third limb vector.\n\ndnl  Copyright 2000, 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     ?\nC EV5:     5.4\nC EV6:     2.125\n\nC  INPUT PARAMETERS\nC  rp\tr16\nC  up\tr17\nC  vp\tr18\nC  n\tr19\nC  cy\tr20   (for mpn_add_nc)\n\nC TODO\nC   Finish cleaning up cy registers r22, r23 (make them use cy0/cy1)\nC   Use multi-pronged feed-in.\nC   Perform additional micro-tuning\n\nC  This code was written in cooperation with ev6 pipeline expert Steve Root.\n\nC  Pair loads and stores where possible\nC  Store pairs oct-aligned where possible (didn't need it here)\nC  Stores are delayed every third cycle\nC  Loads and stores are delayed by fills\nC  U stays still, put code there where possible (note alternation of U1 and U0)\nC  L moves because of loads and stores\nC  Note dampers in L to limit damage\n\nC  This odd-looking optimization expects that were having random bits in our\nC  data, so that a pure zero result is unlikely. so we penalize the unlikely\nC  case to help the common case.\n\ndefine(`u0', `r0')  define(`u1', `r3')\ndefine(`v0', `r1')  define(`v1', `r4')\n\ndefine(`cy0', `r20')  define(`cy1', `r21')\n\nMULFUNC_PROLOGUE(mpn_add_n mpn_add_nc)\n\nASM_START()\nPROLOGUE(mpn_add_nc)\n\tbr\tr31,\t$entry\nEPILOGUE()\nPROLOGUE(mpn_add_n)\n\tbis\tr31,\tr31,\tcy0\tC clear carry in\n$entry:\tcmpult\tr19,\t5,\tr22\tC L1 move counter\n\tldq\tu1,\t0(r17)\t\tC L0 get next ones\n\tldq\tv1,\t0(r18)\t\tC L1\n\tbne\tr22,\t$Lsmall\n\n\tldq\tu0,\t8(r17)\t\tC L0 get next ones\n\tldq\tv0,\t8(r18)\t\tC L1\n\taddq\tu1,\tv1,\tr5\tC U0 add two data\n\n\tcmpult\tr5,\tv1,\tr23\tC U0 did it carry\n\tldq\tu1,\t16(r17)\t\tC L0 get next ones\n\tldq\tv1,\t16(r18)\t\tC L1\n\n\taddq\tu0,\tv0,\tr8\tC U1 add two data\n\taddq\tr5,\tcy0,\tr5\tC U0 carry in\n\n\tcmpult\tr8,\tv0,\tr22\tC U1 did it carry\n\tbeq\tr5,\t$fix5f\t\tC U0 fix exact zero\n$ret5f:\tldq\tu0,\t24(r17)\t\tC L0 get next ones\n\tldq\tv0,\t24(r18)\t\tC L1\n\n\taddq\tr8,\tr23,\tr8\tC U1 carry from last\n\taddq\tu1,\tv1,\tr7\tC U0 add two data\n\n\tbeq\tr8,\t$fix6f\t\tC U1 fix exact zero\n$ret6f:\tcmpult\tr7,\tv1,\tr23\tC U0 did it carry\n\tldq\tu1,\t32(r17)\t\tC L0 get next ones\n\tldq\tv1,\t32(r18)\t\tC L1\n\n\tlda\tr17,\t40(r17)\t\tC L0 move pointer\n\tlda\tr18,\t40(r18)\t\tC L1 move pointer\n\n\tlda\tr16,\t-8(r16)\n\tlda\tr19,\t-13(r19)\tC L1 move counter\n\tblt\tr19,\t$Lend\t\tC U1 loop control\n\n\nC Main loop.  8-way unrolled.\n\tALIGN(16)\n$Loop:\taddq\tu0,\tv0,\tr2\tC U1 add two data\n\taddq\tr7,\tr22,\tr7\tC U0 add in carry\n\tstq\tr5,\t8(r16)\t\tC L0 put an answer\n\tstq\tr8,\t16(r16)\t\tC L1 pair\n\n\tcmpult\tr2,\tv0,\tcy1\tC U1 did it carry\n\tbeq\tr7,\t$fix7\t\tC U0 fix exact 0\n$ret7:\tldq\tu0,\t0(r17)\t\tC L0 get next ones\n\tldq\tv0,\t0(r18)\t\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC L  damp out\n\taddq\tr2,\tr23,\tr2\tC U1 carry from last\n\tbis\tr31,\tr31,\tr31\tC L  moves in L !\n\taddq\tu1,\tv1,\tr5\tC U0 add two data\n\n\tbeq\tr2,\t$fix0\t\tC U1 fix exact zero\n$ret0:\tcmpult\tr5,\tv1,\tcy0\tC U0 did it carry\n\tldq\tu1,\t8(r17)\t\tC L0 get next ones\n\tldq\tv1,\t8(r18)\t\tC L1\n\n\taddq\tu0,\tv0,\tr8\tC U1 add two data\n\taddq\tr5,\tcy1,\tr5\tC U0 carry from last\n\tstq\tr7,\t24(r16)\t\tC L0 store pair\n\tstq\tr2,\t32(r16)\t\tC L1\n\n\tcmpult\tr8,\tv0,\tr22\tC U1 did it carry\n\tbeq\tr5,\t$fix1\t\tC U0 fix exact zero\n$ret1:\tldq\tu0,\t16(r17)\t\tC L0 get next ones\n\tldq\tv0,\t16(r18)\t\tC L1\n\n\tlda\tr16,\t64(r16)\t\tC L0 move pointer\n\taddq\tr8,\tcy0,\tr8\tC U1 carry from last\n\tlda\tr19,\t-8(r19)\t\tC L1 move counter\n\taddq\tu1,\tv1,\tr7\tC U0 add two data\n\n\tbeq\tr8,\t$fix2\t\tC U1 fix exact zero\n$ret2:\tcmpult\tr7,\tv1,\tr23\tC U0 did it carry\n\tldq\tu1,\t24(r17)\t\tC L0 get next ones\n\tldq\tv1,\t24(r18)\t\tC L1\n\n\taddq\tu0,\tv0,\tr2\tC U1 add two data\n\taddq\tr7,\tr22,\tr7\tC U0 add in carry\n\tstq\tr5,\t-24(r16)\tC L0 put an answer\n\tstq\tr8,\t-16(r16)\tC L1 pair\n\n\tcmpult\tr2,\tv0,\tcy1\tC U1 did it carry\n\tbeq\tr7,\t$fix3\t\tC U0 fix exact 0\n$ret3:\tldq\tu0,\t32(r17)\t\tC L0 get next ones\n\tldq\tv0,\t32(r18)\t\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC L  damp out\n\taddq\tr2,\tr23,\tr2\tC U1 carry from last\n\tbis\tr31,\tr31,\tr31\tC L  moves in L !\n\taddq\tu1,\tv1,\tr5\tC U0 add two data\n\n\tbeq\tr2,\t$fix4\t\tC U1 fix exact zero\n$ret4:\tcmpult\tr5,\tv1,\tcy0\tC U0 did it carry\n\tldq\tu1,\t40(r17)\t\tC L0 get next ones\n\tldq\tv1,\t40(r18)\t\tC L1\n\n\taddq\tu0,\tv0,\tr8\tC U1 add two data\n\taddq\tr5,\tcy1,\tr5\tC U0 carry from last\n\tstq\tr7,\t-8(r16)\t\tC L0 store pair\n\tstq\tr2,\t0(r16)\t\tC L1\n\n\tcmpult\tr8,\tv0,\tr22\tC U1 did it carry\n\tbeq\tr5,\t$fix5\t\tC U0 fix exact zero\n$ret5:\tldq\tu0,\t48(r17)\t\tC L0 get next ones\n\tldq\tv0,\t48(r18)\t\tC L1\n\n\tldl\tr31, 256(r17)\t\tC L0 prefetch\n\taddq\tr8,\tcy0,\tr8\tC U1 carry from last\n\tldl\tr31, 256(r18)\t\tC L1 prefetch\n\taddq\tu1,\tv1,\tr7\tC U0 add two data\n\n\tbeq\tr8,\t$fix6\t\tC U1 fix exact zero\n$ret6:\tcmpult\tr7,\tv1,\tr23\tC U0 did it carry\n\tldq\tu1,\t56(r17)\t\tC L0 get next ones\n\tldq\tv1,\t56(r18)\t\tC L1\n\n\tlda\tr17,\t64(r17)\t\tC L0 move pointer\n\tbis\tr31,\tr31,\tr31\tC U\n\tlda\tr18,\t64(r18)\t\tC L1 move pointer\n\tbge\tr19,\t$Loop\t\tC U1 loop control\nC ==== main loop end\n\n$Lend:\taddq\tu0,\tv0,\tr2\tC U1 add two data\n\taddq\tr7,\tr22,\tr7\tC U0 add in carry\n\tstq\tr5,\t8(r16)\t\tC L0 put an answer\n\tstq\tr8,\t16(r16)\t\tC L1 pair\n\tcmpult\tr2,\tv0,\tcy1\tC U1 did it carry\n\tbeq\tr7,\t$fix7c\t\tC U0 fix exact 0\n$ret7c:\taddq\tr2,\tr23,\tr2\tC U1 carry from last\n\taddq\tu1,\tv1,\tr5\tC U0 add two data\n\tbeq\tr2,\t$fix0c\t\tC U1 fix exact zero\n$ret0c:\tcmpult\tr5,\tv1,\tcy0\tC U0 did it carry\n\taddq\tr5,\tcy1,\tr5\tC U0 carry from last\n\tstq\tr7,\t24(r16)\t\tC L0 store pair\n\tstq\tr2,\t32(r16)\t\tC L1\n\tbeq\tr5,\t$fix1c\t\tC U0 fix exact zero\n$ret1c:\tstq\tr5,\t40(r16)\t\tC L0 put an answer\n\tlda\tr16,\t48(r16)\t\tC L0 move pointer\n\n\tlda\tr19,\t8(r19)\n\tbeq\tr19,\t$Lret\n\n\tldq\tu1,\t0(r17)\n\tldq\tv1,\t0(r18)\n$Lsmall:\n\tlda\tr19,\t-1(r19)\n\tbeq\tr19,\t$Lend0\n\n\tALIGN(8)\n$Loop0:\taddq\tu1,\tv1,\tr2\tC main add\n\tcmpult\tr2,\tv1,\tr8\tC compute cy from last add\n\tldq\tu1,\t8(r17)\n\tldq\tv1,\t8(r18)\n\taddq\tr2,\tcy0,\tr5\tC carry add\n\tlda\tr17,\t8(r17)\n\tlda\tr18,\t8(r18)\n\tstq\tr5,\t0(r16)\n\tcmpult\tr5,\tr2,\tcy0\tC compute cy from last add\n\tlda\tr19,\t-1(r19)\t\tC decr loop cnt\n\tbis\tr8,\tcy0,\tcy0\tC combine cy from the two adds\n\tlda\tr16,\t8(r16)\n\tbne\tr19,\t$Loop0\n$Lend0:\taddq\tu1,\tv1,\tr2\tC main add\n\taddq\tr2,\tcy0,\tr5\tC carry add\n\tcmpult\tr2,\tv1,\tr8\tC compute cy from last add\n\tcmpult\tr5,\tr2,\tcy0\tC compute cy from last add\n\tstq\tr5,\t0(r16)\n\tbis\tr8,\tcy0,\tr0\tC combine cy from the two adds\n\tret\tr31,(r26),1\n\n\tALIGN(8)\n$Lret:\tlda\tr0,\t0(cy0)\t\tC copy carry into return register\n\tret\tr31,(r26),1\n\n$fix5f:\tbis\tr23,\tcy0,\tr23\tC bring forward carry\n\tbr\tr31,\t$ret5f\n$fix6f:\tbis\tr22,\tr23,\tr22\tC bring forward carry\n\tbr\tr31,\t$ret6f\n$fix0:\tbis\tcy1,\tr23,\tcy1\tC bring forward carry\n\tbr\tr31,\t$ret0\n$fix1:\tbis\tcy0,\tcy1,\tcy0\tC bring forward carry\n\tbr\tr31,\t$ret1\n$fix2:\tbis\tr22,\tcy0,\tr22\tC bring forward carry\n\tbr\tr31,\t$ret2\n$fix3:\tbis\tr23,\tr22,\tr23\tC bring forward carry\n\tbr\tr31,\t$ret3\n$fix4:\tbis\tcy1,\tr23,\tcy1\tC bring forward carry\n\tbr\tr31,\t$ret4\n$fix5:\tbis\tcy1,\tcy0,\tcy0\tC bring forward carry\n\tbr\tr31,\t$ret5\n$fix6:\tbis\tr22,\tcy0,\tr22\tC bring forward carry\n\tbr\tr31,\t$ret6\n$fix7:\tbis\tr23,\tr22,\tr23\tC bring forward carry\n\tbr\tr31,\t$ret7\n$fix0c:\tbis\tcy1,\tr23,\tcy1\tC bring forward carry\n\tbr\tr31,\t$ret0c\n$fix1c:\tbis\tcy0,\tcy1,\tcy0\tC bring forward carry\n\tbr\tr31,\t$ret1c\n$fix7c:\tbis\tr23,\tr22,\tr23\tC bring forward carry\n\tbr\tr31,\t$ret7c\n\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/ev6/addmul_1.asm",
    "content": "dnl  Alpha ev6 mpn_addmul_1 -- Multiply a limb vector with a limb and add the\ndnl  result to a second limb vector.\n\ndnl  Copyright 2000, 2003, 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:    42\nC EV5:    18\nC EV6:     3.5\n\nC  INPUT PARAMETERS\nC  rp\t  r16\nC  up\t  r17\nC  n\t  r18\nC  vlimb  r19\n\ndnl  This code was written in cooperation with ev6 pipeline expert Steve Root.\n\ndnl  The stores can issue a cycle late so we have paired no-op's to 'catch'\ndnl  them, so that further disturbance to the schedule is damped.\n\ndnl  We couldn't pair the loads, because the entangled schedule of the carry's\ndnl  has to happen on one side {0} of the machine.\n\ndnl  This is a great schedule for the d_cache, a poor schedule for the b_cache.\ndnl  The lockup on U0 means that any stall can't be recovered from.  Consider a\ndnl  ldq in L1, say that load gets stalled because it collides with a fill from\ndnl  the b_cache.  On the next cycle, this load gets priority.  If first looks\ndnl  at L0, and goes there.  The instruction we intended for L0 gets to look at\ndnl  L1, which is NOT where we want it.  It either stalls 1, because it can't\ndnl  go in L0, or goes there, and causes a further instruction to stall.\n\ndnl  So for b_cache, we're likely going to want to put one or more cycles back\ndnl  into the code! And, of course, put in lds prefetch for the rp[] operand.\ndnl  At a place where we have an mt followed by a bookkeeping, put the\ndnl  bookkeeping in upper, and the prefetch into lower.\n\ndnl  Note, the ldq's and stq's are at the end of the quadpacks.  Note, we'd\ndnl  like not to have an ldq or an stq to preceded a conditional branch in a\ndnl  quadpack.  The conditional branch moves the retire pointer one cycle\ndnl  later.\n\n\nASM_START()\nPROLOGUE(mpn_addmul_1)\n\tldq\tr3,\t0(r17)\t\tC\n\tand\tr18,\t7,\tr20\tC\n\tlda\tr18,\t-9(r18)\t\tC\n\tcmpeq\tr20,\t1,\tr21\tC\n\tbeq\tr21,\t$L1\t\tC\n\n$1mod8:\tldq\tr5,\t0(r16)\t\tC\n\tmulq\tr19,\tr3,\tr7\tC\n\tumulh\tr19,\tr3,\tr8\tC\n\taddq\tr5,\tr7,\tr23\tC\n\tcmpult\tr23,\tr7,\tr20\tC\n\taddq\tr8,\tr20,\tr0\tC\n\tstq\tr23,\t0(r16)\t\tC\n\tbge\tr18,\t$ent1\t\tC\n\tret\tr31,\t(r26),\t1\tC\n\n$L1:\tlda\tr8,\t0(r31)\t\tC zero carry reg\n\tlda\tr24,\t0(r31)\t\tC zero carry reg\n\tcmpeq\tr20,\t2,\tr21\tC\n\tbne\tr21,\t$2mod8\t\tC\n\tcmpeq\tr20,\t3,\tr21\tC\n\tbne\tr21,\t$3mod8\t\tC\n\tcmpeq\tr20,\t4,\tr21\tC\n\tbne\tr21,\t$4mod8\t\tC\n\tcmpeq\tr20,\t5,\tr21\tC\n\tbne\tr21,\t$5mod8\t\tC\n\tcmpeq\tr20,\t6,\tr21\tC\n\tbne\tr21,\t$6mod8\t\tC\n\tcmpeq\tr20,\t7,\tr21\tC\n\tbeq\tr21,\t$0mod8\t\tC\n\n$7mod8:\tldq\tr5,\t0(r16)\t\tC\n\tlda\tr17,\t8(r17)\t\tC\n\tmulq\tr19,\tr3,\tr7\tC\n\tumulh\tr19,\tr3,\tr24\tC\n\taddq\tr5,\tr7,\tr23\tC\n\tcmpult\tr23,\tr7,\tr20\tC\n\taddq\tr24,\tr20,\tr24\tC\n\tstq\tr23,\t0(r16)\t\tC\n\tlda\tr16,\t8(r16)\t\tC\n\tldq\tr3,\t0(r17)\t\tC\n$6mod8:\tldq\tr1,\t8(r17)\t\tC\n\tmulq\tr19,\tr3,\tr25\tC\n\tumulh\tr19,\tr3,\tr3\tC\n\tmulq\tr19,\tr1,\tr28\tC\n\tldq\tr0,\t16(r17)\t\tC\n\tldq\tr4,\t0(r16)\t\tC\n\tumulh\tr19,\tr1,\tr8\tC\n\tldq\tr1,\t24(r17)\t\tC\n\tlda\tr17,\t48(r17)\t\tC L1 bookkeeping\n\tmulq\tr19,\tr0,\tr2\tC\n\tldq\tr5,\t8(r16)\t\tC\n\tlda\tr16,\t-32(r16)\tC L1 bookkeeping\n\tumulh\tr19,\tr0,\tr6\tC\n\taddq\tr4,\tr25,\tr4\tC lo + acc\n\tmulq\tr19,\tr1,\tr7\tC\n\tbr\tr31,\t$ent6\t\tC\n\n$ent1:\tlda\tr17,\t8(r17)\t\tC\n\tlda\tr16,\t8(r16)\t\tC\n\tlda\tr8,\t0(r0)\t\tC\n\tldq\tr3,\t0(r17)\t\tC\n$0mod8:\tldq\tr1,\t8(r17)\t\tC\n\tmulq\tr19,\tr3,\tr2\tC\n\tumulh\tr19,\tr3,\tr6\tC\n\tmulq\tr19,\tr1,\tr7\tC\n\tldq\tr0,\t16(r17)\t\tC\n\tldq\tr4,\t0(r16)\t\tC\n\tumulh\tr19,\tr1,\tr24\tC\n\tldq\tr1,\t24(r17)\t\tC\n\tmulq\tr19,\tr0,\tr25\tC\n\tldq\tr5,\t8(r16)\t\tC\n\tumulh\tr19,\tr0,\tr3\tC\n\taddq\tr4,\tr2,\tr4\tC lo + acc\n\tmulq\tr19,\tr1,\tr28\tC\n\tlda\tr16,\t-16(r16)\tC\n\tbr\tr31,\t$ent0\t\tC\n\n$3mod8:\tldq\tr5,\t0(r16)\t\tC\n\tlda\tr17,\t8(r17)\t\tC\n\tmulq\tr19,\tr3,\tr7\tC\n\tumulh\tr19,\tr3,\tr8\tC\n\taddq\tr5,\tr7,\tr23\tC\n\tcmpult\tr23,\tr7,\tr20\tC\n\taddq\tr8,\tr20,\tr24\tC\n\tstq\tr23,\t0(r16)\t\tC\n\tlda\tr16,\t8(r16)\t\tC\n\tldq\tr3,\t0(r17)\t\tC\n$2mod8:\tldq\tr1,\t8(r17)\t\tC\n\tmulq\tr19,\tr3,\tr25\tC\n\tumulh\tr19,\tr3,\tr3\tC\n\tmulq\tr19,\tr1,\tr28\tC\n\tble\tr18,\t$n23\t\tC\n\tldq\tr0,\t16(r17)\t\tC\n\tldq\tr4,\t0(r16)\t\tC\n\tumulh\tr19,\tr1,\tr8\tC\n\tldq\tr1,\t24(r17)\t\tC\n\tlda\tr17,\t16(r17)\t\tC L1 bookkeeping\n\tmulq\tr19,\tr0,\tr2\tC\n\tldq\tr5,\t8(r16)\t\tC\n\tlda\tr16,\t0(r16)\t\tC L1 bookkeeping\n\tumulh\tr19,\tr0,\tr6\tC\n\taddq\tr4,\tr25,\tr4\tC lo + acc\n\tmulq\tr19,\tr1,\tr7\tC\n\tbr\tr31,\t$ent2\t\tC\n\n$5mod8:\tldq\tr5,\t0(r16)\t\tC\n\tlda\tr17,\t8(r17)\t\tC\n\tmulq\tr19,\tr3,\tr7\tC\n\tumulh\tr19,\tr3,\tr24\tC\n\taddq\tr5,\tr7,\tr23\tC\n\tcmpult\tr23,\tr7,\tr20\tC\n\taddq\tr24,\tr20,\tr8\tC\n\tstq\tr23,\t0(r16)\t\tC\n\tlda\tr16,\t8(r16)\t\tC\n\tldq\tr3,\t0(r17)\t\tC\n$4mod8:\tldq\tr1,\t8(r17)\t\tC\n\tmulq\tr19,\tr3,\tr2\tC\n\tumulh\tr19,\tr3,\tr6\tC\n\tmulq\tr19,\tr1,\tr7\tC\n\tldq\tr0,\t16(r17)\t\tC\n\tldq\tr4,\t0(r16)\t\tC\n\tumulh\tr19,\tr1,\tr24\tC\n\tldq\tr1,\t24(r17)\t\tC\n\tlda\tr17,\t32(r17)\t\tC L1 bookkeeping\n\tmulq\tr19,\tr0,\tr25\tC\n\tldq\tr5,\t8(r16)\t\tC\n\tlda\tr16,\t16(r16)\t\tC L1 bookkeeping\n\tumulh\tr19,\tr0,\tr3\tC\n\taddq\tr4,\tr2,\tr4\tC lo + acc\n\tmulq\tr19,\tr1,\tr28\tC\n\tcmpult\tr4,\tr2,\tr20\tC L0 lo add => carry\n\taddq\tr4,\tr8,\tr22\tC U0 hi add => answer\n\tble\tr18,\t$Lend\t\tC\nALIGN(16)\n$Loop:\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tcmpult\tr22,\tr8,\tr21\tC L0 hi add => carry\n\taddq\tr6,\tr20,\tr6\tC U0 hi mul + carry\n\tldq\tr0,\t0(r17)\t\tC\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\taddq\tr5,\tr7,\tr23\tC L0 lo + acc\n\taddq\tr6,\tr21,\tr6\tC U0 hi mul + carry\n\tldq\tr4,\t0(r16)\t\tC L1\n\n\tumulh\tr19,\tr1,\tr8\tC U1\n\tcmpult\tr23,\tr7,\tr20\tC L0 lo add => carry\n\taddq\tr23,\tr6,\tr23\tC U0 hi add => answer\n\tldq\tr1,\t8(r17)\t\tC L1\n\n\tmulq\tr19,\tr0,\tr2\tC U1\n\tcmpult\tr23,\tr6,\tr21\tC L0 hi add => carry\n\taddq\tr24,\tr20,\tr24\tC U0 hi mul + carry\n\tldq\tr5,\t8(r16)\t\tC L1\n\n\tumulh\tr19,\tr0,\tr6\tC U1\n\taddq\tr4,\tr25,\tr4\tC U0 lo + acc\n\tstq\tr22,\t-16(r16)\tC L0\n\tstq\tr23,\t-8(r16)\t\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC L0 st slosh\n\tmulq\tr19,\tr1,\tr7\tC U1\n\tbis\tr31,\tr31,\tr31\tC L1 st slosh\n\taddq\tr24,\tr21,\tr24\tC U0 hi mul + carry\n$ent2:\n\tcmpult\tr4,\tr25,\tr20\tC L0 lo add => carry\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tlda\tr18,\t-8(r18)\t\tC L1 bookkeeping\n\taddq\tr4,\tr24,\tr22\tC U0 hi add => answer\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tcmpult\tr22,\tr24,\tr21\tC L0 hi add => carry\n\taddq\tr3,\tr20,\tr3\tC U0 hi mul + carry\n\tldq\tr0,\t16(r17)\t\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\taddq\tr5,\tr28,\tr23\tC L0 lo + acc\n\taddq\tr3,\tr21,\tr3\tC U0 hi mul + carry\n\tldq\tr4,\t16(r16)\t\tC L1\n\n\tumulh\tr19,\tr1,\tr24\tC U1\n\tcmpult\tr23,\tr28,\tr20\tC L0 lo add => carry\n\taddq\tr23,\tr3,\tr23\tC U0 hi add => answer\n\tldq\tr1,\t24(r17)\t\tC L1\n\n\tmulq\tr19,\tr0,\tr25\tC U1\n\tcmpult\tr23,\tr3,\tr21\tC L0 hi add => carry\n\taddq\tr8,\tr20,\tr8\tC U0 hi mul + carry\n\tldq\tr5,\t24(r16)\t\tC L1\n\n\tumulh\tr19,\tr0,\tr3\tC U1\n\taddq\tr4,\tr2,\tr4\tC U0 lo + acc\n\tstq\tr22,\t0(r16)\t\tC L0\n\tstq\tr23,\t8(r16)\t\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC L0 st slosh\n\tmulq\tr19,\tr1,\tr28\tC U1\n\tbis\tr31,\tr31,\tr31\tC L1 st slosh\n\taddq\tr8,\tr21,\tr8\tC L0 hi mul + carry\n$ent0:\n\tcmpult\tr4,\tr2,\tr20\tC L0 lo add => carry\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tlda\tr17,\t64(r17)\t\tC L1 bookkeeping\n\taddq\tr4,\tr8,\tr22\tC U0 hi add => answer\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tcmpult\tr22,\tr8,\tr21\tC L0 hi add => carry\n\taddq\tr6,\tr20,\tr6\tC U0 hi mul + carry\n\tldq\tr0,\t-32(r17)\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\taddq\tr5,\tr7,\tr23\tC L0 lo + acc\n\taddq\tr6,\tr21,\tr6\tC U0 hi mul + carry\n\tldq\tr4,\t32(r16)\t\tC L1\n\n\tumulh\tr19,\tr1,\tr8\tC U1\n\tcmpult\tr23,\tr7,\tr20\tC L0 lo add => carry\n\taddq\tr23,\tr6,\tr23\tC U0 hi add => answer\n\tldq\tr1,\t-24(r17)\tC L1\n\n\tmulq\tr19,\tr0,\tr2\tC U1\n\tcmpult\tr23,\tr6,\tr21\tC L0 hi add => carry\n\taddq\tr24,\tr20,\tr24\tC U0 hi mul + carry\n\tldq\tr5,\t40(r16)\t\tC L1\n\n\tumulh\tr19,\tr0,\tr6\tC U1\n\taddq\tr4,\tr25,\tr4\tC U0 lo + acc\n\tstq\tr22,\t16(r16)\t\tC L0\n\tstq\tr23,\t24(r16)\t\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC L0 st slosh\n\tmulq\tr19,\tr1,\tr7\tC U1\n\tbis\tr31,\tr31,\tr31\tC L1 st slosh\n\taddq\tr24,\tr21,\tr24\tC U0 hi mul + carry\n$ent6:\n\tcmpult\tr4,\tr25,\tr20\tC L0 lo add => carry\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tlda\tr16,\t64(r16)\t\tC L1 bookkeeping\n\taddq\tr4,\tr24,\tr22\tC U0 hi add => answer\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tcmpult\tr22,\tr24,\tr21\tC L0 hi add => carry\n\taddq\tr3,\tr20,\tr3\tC U0 hi mul + carry\n\tldq\tr0,\t-16(r17)\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\taddq\tr5,\tr28,\tr23\tC L0 lo + acc\n\taddq\tr3,\tr21,\tr3\tC U0 hi mul + carry\n\tldq\tr4,\t-16(r16)\tC L1\n\n\tumulh\tr19,\tr1,\tr24\tC U1\n\tcmpult\tr23,\tr28,\tr20\tC L0 lo add => carry\n\taddq\tr23,\tr3,\tr23\tC U0 hi add => answer\n\tldq\tr1,\t-8(r17)\t\tC L1\n\n\tmulq\tr19,\tr0,\tr25\tC U1\n\tcmpult\tr23,\tr3,\tr21\tC L0 hi add => carry\n\taddq\tr8,\tr20,\tr8\tC U0 hi mul + carry\n\tldq\tr5,\t-8(r16)\t\tC L1\n\n\tumulh\tr19,\tr0,\tr3\tC U1\n\taddq\tr4,\tr2,\tr4\tC L0 lo + acc\n\tstq\tr22,\t-32(r16)\tC L0\n\tstq\tr23,\t-24(r16)\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC L0 st slosh\n\tmulq\tr19,\tr1,\tr28\tC U1\n\tbis\tr31,\tr31,\tr31\tC L1 st slosh\n\taddq\tr8,\tr21,\tr8\tC U0 hi mul + carry\n\n\tcmpult\tr4,\tr2,\tr20\tC L0 lo add => carry\n\taddq\tr4,\tr8,\tr22\tC U0 hi add => answer\n\tldl\tr31,\t256(r17)\tC prefetch up[]\n\tbgt\tr18,\t$Loop\t\tC U1 bookkeeping\n\n$Lend:\tcmpult\tr22,\tr8,\tr21\tC\n\taddq\tr6,\tr20,\tr6\tC\n\taddq\tr5,\tr7,\tr23\tC\n\taddq\tr6,\tr21,\tr6\tC\n\tldq\tr4,\t0(r16)\t\tC\n\tumulh\tr19,\tr1,\tr8\tC\n\tcmpult\tr23,\tr7,\tr20\tC\n\taddq\tr23,\tr6,\tr23\tC\n\tcmpult\tr23,\tr6,\tr21\tC\n\taddq\tr24,\tr20,\tr24\tC\n\tldq\tr5,\t8(r16)\t\tC\n\taddq\tr4,\tr25,\tr4\tC\n\tstq\tr22,\t-16(r16)\tC\n\tstq\tr23,\t-8(r16)\t\tC\n\taddq\tr24,\tr21,\tr24\tC\n\tcmpult\tr4,\tr25,\tr20\tC\n\taddq\tr4,\tr24,\tr22\tC\n\tcmpult\tr22,\tr24,\tr21\tC\n\taddq\tr3,\tr20,\tr3\tC\n\taddq\tr5,\tr28,\tr23\tC\n\taddq\tr3,\tr21,\tr3\tC\n\tcmpult\tr23,\tr28,\tr20\tC\n\taddq\tr23,\tr3,\tr23\tC\n\tcmpult\tr23,\tr3,\tr21\tC\n\taddq\tr8,\tr20,\tr8\tC\n\tstq\tr22,\t0(r16)\t\tC\n\tstq\tr23,\t8(r16)\t\tC\n\taddq\tr8,\tr21,\tr0\tC\n\tret\tr31,\t(r26),\t1\tC\n\n$n23:\tldq\tr4,\t0(r16)\t\tC\n\tldq\tr5,\t8(r16)\t\tC\n\tumulh\tr19,\tr1,\tr8\tC\n\taddq\tr4,\tr25,\tr4\tC\n\tcmpult\tr4,\tr25,\tr20\tC\n\taddq\tr4,\tr24,\tr22\tC\n\tcmpult\tr22,\tr24,\tr21\tC\n\taddq\tr3,\tr20,\tr3\tC\n\taddq\tr5,\tr28,\tr23\tC\n\taddq\tr3,\tr21,\tr3\tC\n\tcmpult\tr23,\tr28,\tr20\tC\n\taddq\tr23,\tr3,\tr23\tC\n\tcmpult\tr23,\tr3,\tr21\tC\n\taddq\tr8,\tr20,\tr8\tC\n\tstq\tr22,\t0(r16)\t\tC\n\tstq\tr23,\t8(r16)\t\tC\n\taddq\tr8,\tr21,\tr0\tC\n\tret\tr31,\t(r26),\t1\tC\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/ev6/mul_1.asm",
    "content": "dnl  Alpha ev6 mpn_mul_1 -- Multiply a limb vector with a limb and store the\ndnl  result in a second limb vector.\n\ndnl  Copyright 2000, 2001, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\tr16\nC s1_ptr\tr17\nC size\t\tr18\nC s2_limb\tr19\n\nC This code runs at 2.25 cycles/limb on EV6.\n\nC This code was written in close cooperation with ev6 pipeline expert\nC Steve Root.  Any errors are tege's fault, though.\n\nC Code structure:\n\nC  code for n < 8\nC  code for n > 8\tcode for (n mod 8)\nC\t\t\tcode for (n div 8)\tfeed-in code\nC\t\t\t\t\t\t8-way unrolled loop\nC\t\t\t\t\t\twind-down code\n\nC Some notes about unrolled loop:\nC\nC   r1-r8     multiplies and workup\nC   r21-r28   multiplies and workup\nC   r9-r12    loads\nC   r0       -1\nC   r20,r29,r13-r15  scramble\nC\nC   We're doing 7 of the 8 carry propagations with a br fixup code and 1 with a\nC   put-the-carry-into-hi.  The idea is that these branches are very rarely\nC   taken, and since a non-taken branch consumes no resurces, that is better\nC   than an addq.\nC\nC   Software pipeline: a load in cycle #09, feeds a mul in cycle #16, feeds an\nC   add NEXT cycle #09 which feeds a store in NEXT cycle #02\n\nC The code could use some further work:\nC   1. Speed up really small multiplies.  The default alpha/mul_1.asm code is\nC      faster than this for size < 3.\nC   2. Improve feed-in code, perhaps with the equivalent of switch(n%8) unless\nC      that is too costly.\nC   3. Consider using 4-way unrolling, even if that runs slower.\nC   4. Reduce register usage.  In particular, try to avoid using r29.\n\nASM_START()\nPROLOGUE(mpn_mul_1)\n\tcmpult\tr18,\t8,\tr1\n\tbeq\tr1,\t$Large\n$Lsmall:\n\tldq\tr2,0(r17)\tC r2 = s1_limb\n\tlda\tr18,-1(r18)\tC size--\n\tmulq\tr2,r19,r3\tC r3 = prod_low\n\tbic\tr31,r31,r4\tC clear cy_limb\n\tumulh\tr2,r19,r0\tC r0 = prod_high\n\tbeq\tr18,$Le1a\tC jump if size was == 1\n\tldq\tr2,8(r17)\tC r2 = s1_limb\n\tlda\tr18,-1(r18)\tC size--\n\tstq\tr3,0(r16)\n\tbeq\tr18,$Le2a\tC jump if size was == 2\n\tALIGN(8)\n$Lopa:\tmulq\tr2,r19,r3\tC r3 = prod_low\n\taddq\tr4,r0,r0\tC cy_limb = cy_limb + 'cy'\n\tlda\tr18,-1(r18)\tC size--\n\tumulh\tr2,r19,r4\tC r4 = cy_limb\n\tldq\tr2,16(r17)\tC r2 = s1_limb\n\tlda\tr17,8(r17)\tC s1_ptr++\n\taddq\tr3,r0,r3\tC r3 = cy_limb + prod_low\n\tstq\tr3,8(r16)\n\tcmpult\tr3,r0,r0\tC r0 = carry from (cy_limb + prod_low)\n\tlda\tr16,8(r16)\tC res_ptr++\n\tbne\tr18,$Lopa\n\n$Le2a:\tmulq\tr2,r19,r3\tC r3 = prod_low\n\taddq\tr4,r0,r0\tC cy_limb = cy_limb + 'cy'\n\tumulh\tr2,r19,r4\tC r4 = cy_limb\n\taddq\tr3,r0,r3\tC r3 = cy_limb + prod_low\n\tcmpult\tr3,r0,r0\tC r0 = carry from (cy_limb + prod_low)\n\tstq\tr3,8(r16)\n\taddq\tr4,r0,r0\tC cy_limb = prod_high + cy\n\tret\tr31,(r26),1\n$Le1a:\tstq\tr3,0(r16)\n\tret\tr31,(r26),1\n\n$Large:\n\tlda\tr30,\t-224(r30)\n\tstq\tr26,\t0(r30)\n\tstq\tr9,\t8(r30)\n\tstq\tr10,\t16(r30)\n\tstq\tr11,\t24(r30)\n\tstq\tr12,\t32(r30)\n\tstq\tr13,\t40(r30)\n\tstq\tr14,\t48(r30)\n\tstq\tr15,\t56(r30)\n\tstq\tr29,\t64(r30)\n\n\tand\tr18,\t7,\tr20\tC count for the first loop, 0-7\n\tsrl\tr18,\t3,\tr18\tC count for unrolled loop\n\tbis\tr31,\tr31,\tr21\n\tbeq\tr20,\t$L_8_or_more\tC skip first loop\n\n$L_9_or_more:\n\tldq\tr2,0(r17)\tC r2 = s1_limb\n\tlda\tr17,8(r17)\tC s1_ptr++\n\tlda\tr20,-1(r20)\tC size--\n\tmulq\tr2,r19,r3\tC r3 = prod_low\n\tumulh\tr2,r19,r21\tC r21 = prod_high\n\tbeq\tr20,$Le1b\tC jump if size was == 1\n\tbis\tr31, r31, r0\tC FIXME: shouldtn't need this\n\tldq\tr2,0(r17)\tC r2 = s1_limb\n\tlda\tr17,8(r17)\tC s1_ptr++\n\tlda\tr20,-1(r20)\tC size--\n\tstq\tr3,0(r16)\n\tlda\tr16,8(r16)\tC res_ptr++\n\tbeq\tr20,$Le2b\tC jump if size was == 2\n\tALIGN(8)\n$Lopb:\tmulq\tr2,r19,r3\tC r3 = prod_low\n\taddq\tr21,r0,r0\tC cy_limb = cy_limb + 'cy'\n\tlda\tr20,-1(r20)\tC size--\n\tumulh\tr2,r19,r21\tC r21 = prod_high\n\tldq\tr2,0(r17)\tC r2 = s1_limb\n\tlda\tr17,8(r17)\tC s1_ptr++\n\taddq\tr3,r0,r3\tC r3 = cy_limb + prod_low\n\tstq\tr3,0(r16)\n\tcmpult\tr3,r0,r0\tC r0 = carry from (cy_limb + prod_low)\n\tlda\tr16,8(r16)\tC res_ptr++\n\tbne\tr20,$Lopb\n\n$Le2b:\tmulq\tr2,r19,r3\tC r3 = prod_low\n\taddq\tr21,r0,r0\tC cy_limb = cy_limb + 'cy'\n\tumulh\tr2,r19,r21\tC r21 = prod_high\n\taddq\tr3,r0,r3\tC r3 = cy_limb + prod_low\n\tcmpult\tr3,r0,r0\tC r0 = carry from (cy_limb + prod_low)\n\tstq\tr3,0(r16)\n\tlda\tr16,8(r16)\tC res_ptr++\n\taddq\tr21,r0,r21\tC cy_limb = prod_high + cy\n\tbr\tr31,\t$L_8_or_more\n$Le1b:\tstq\tr3,0(r16)\n\tlda\tr16,8(r16)\tC res_ptr++\n\n$L_8_or_more:\n\tlda\tr0,\t-1(r31)\t\tC put -1 in r0, for tricky loop control\n\tlda\tr17,\t-32(r17)\tC L1 bookkeeping\n\tlda\tr18,\t-1(r18)\t\tC decrement count\n\n\tldq\tr9,\t32(r17)\t\tC L1\n\tldq\tr10,\t40(r17)\t\tC L1\n\tmulq\tr9,\tr19,\tr22\tC U1 #07\n\tldq\tr11,\t48(r17)\t\tC L1\n\tumulh\tr9,\tr19,\tr23\tC U1 #08\n\tldq\tr12,\t56(r17)\t\tC L1\n\tmulq\tr10,\tr19,\tr24\tC U1 #09\n\tldq\tr9,\t64(r17)\t\tC L1\n\n\tlda\tr17,\t64(r17)\t\tC L1 bookkeeping\n\n\tumulh\tr10,\tr19,\tr25\tC U1 #11\n\tmulq\tr11,\tr19,\tr26\tC U1 #12\n\tumulh\tr11,\tr19,\tr27\tC U1 #13\n\tmulq\tr12,\tr19,\tr28\tC U1 #14\n\tldq\tr10,\t8(r17)\t\tC L1\n\tumulh\tr12,\tr19,\tr1\tC U1 #15\n\tldq\tr11,\t16(r17)\t\tC L1\n\tmulq\tr9,\tr19,\tr2\tC U1 #16\n\tldq\tr12,\t24(r17)\t\tC L1\n\tumulh\tr9,\tr19,\tr3\tC U1 #17\n\taddq\tr21,\tr22,\tr13\tC L1 mov\n\tmulq\tr10,\tr19,\tr4\tC U1 #18\n\taddq\tr23,\tr24,\tr22\tC L0 sum 2 mul's\n\tcmpult\tr13,\tr21,\tr14\tC L1 carry from sum\n\tbgt\tr18,\t$L_16_or_more\n\n\tcmpult\tr22,\tr24,\tr24\tC U0 carry from sum\n\tumulh\tr10,\tr19,\tr5\tC U1 #02\n\taddq\tr25,\tr26,\tr23\tC U0 sum 2 mul's\n\tmulq\tr11,\tr19,\tr6\tC U1 #03\n\tcmpult\tr23,\tr26,\tr25\tC U0 carry from sum\n\tumulh\tr11,\tr19,\tr7\tC U1 #04\n\taddq\tr27,\tr28,\tr28\tC U0 sum 2 mul's\n\tmulq\tr12,\tr19,\tr8\tC U1 #05\n\tcmpult\tr28,\tr27,\tr15\tC L0 carry from sum\n\tlda\tr16,\t32(r16)\t\tC L1 bookkeeping\n\taddq\tr13,\tr31,\tr13\tC U0 start carry cascade\n\tumulh\tr12,\tr19,\tr21\tC U1 #06\n\tbr\tr31,\t$ret0c\n\n$L_16_or_more:\nC ---------------------------------------------------------------\n\tsubq\tr18,1,r18\n\tcmpult\tr22,\tr24,\tr24\tC U0 carry from sum\n\tldq\tr9,\t32(r17)\t\tC L1\n\n\tumulh\tr10,\tr19,\tr5\tC U1 #02\n\taddq\tr25,\tr26,\tr23\tC U0 sum 2 mul's\n\tmulq\tr11,\tr19,\tr6\tC U1 #03\n\tcmpult\tr23,\tr26,\tr25\tC U0 carry from sum\n\tumulh\tr11,\tr19,\tr7\tC U1 #04\n\taddq\tr27,\tr28,\tr28\tC U0 sum 2 mul's\n\tmulq\tr12,\tr19,\tr8\tC U1 #05\n\tcmpult\tr28,\tr27,\tr15\tC L0 carry from sum\n\tlda\tr16,\t32(r16)\t\tC L1 bookkeeping\n\taddq\tr13,\tr31,\tr13\tC U0 start carry cascade\n\n\tumulh\tr12,\tr19,\tr21\tC U1 #06\nC\tbeq\tr13,\t$fix0w\t\tC U0\n$ret0w:\taddq\tr22,\tr14,\tr26\tC L0\n\tldq\tr10,\t40(r17)\t\tC L1\n\n\tmulq\tr9,\tr19,\tr22\tC U1 #07\n\tbeq\tr26,\t$fix1w\t\tC U0\n$ret1w:\taddq\tr23,\tr24,\tr27\tC L0\n\tldq\tr11,\t48(r17)\t\tC L1\n\n\tumulh\tr9,\tr19,\tr23\tC U1 #08\n\tbeq\tr27,\t$fix2w\t\tC U0\n$ret2w:\taddq\tr28,\tr25,\tr28\tC L0\n\tldq\tr12,\t56(r17)\t\tC L1\n\n\tmulq\tr10,\tr19,\tr24\tC U1 #09\n\tbeq\tr28,\t$fix3w\t\tC U0\n$ret3w:\taddq\tr1,\tr2,\tr20\tC L0 sum 2 mul's\n\tldq\tr9,\t64(r17)\t\tC L1\n\n\taddq\tr3,\tr4,\tr2\tC L0 #10 2 mul's\n\tlda\tr17,\t64(r17)\t\tC L1 bookkeeping\n\tcmpult\tr20,\tr1,\tr29\tC U0 carry from sum\n\n\tumulh\tr10,\tr19,\tr25\tC U1 #11\n\tcmpult\tr2,\tr4,\tr4\tC U0 carry from sum\n\tstq\tr13,\t-32(r16)\tC L0\n\tstq\tr26,\t-24(r16)\tC L1\n\n\tmulq\tr11,\tr19,\tr26\tC U1 #12\n\taddq\tr5,\tr6,\tr14\tC U0 sum 2 mul's\n\tstq\tr27,\t-16(r16)\tC L0\n\tstq\tr28,\t-8(r16)\t\tC L1\n\n\tumulh\tr11,\tr19,\tr27\tC U1 #13\n\tcmpult\tr14,\tr6,\tr3\tC U0 carry from sum\nC could do cross-jumping here:\nC\tbra\t$L_middle_of_unrolled_loop\n\tmulq\tr12,\tr19,\tr28\tC U1 #14\n\taddq\tr7,\tr3,\tr5\tC L0 eat carry\n\taddq\tr20,\tr15,\tr20\tC U0 carry cascade\n\tldq\tr10,\t8(r17)\t\tC L1\n\n\tumulh\tr12,\tr19,\tr1\tC U1 #15\n\tbeq\tr20,\t$fix4\t\tC U0\n$ret4w:\taddq\tr2,\tr29,\tr6\tC L0\n\tldq\tr11,\t16(r17)\t\tC L1\n\n\tmulq\tr9,\tr19,\tr2\tC U1 #16\n\tbeq\tr6,\t$fix5\t\tC U0\n$ret5w:\taddq\tr14,\tr4,\tr7\tC L0\n\tldq\tr12,\t24(r17)\t\tC L1\n\n\tumulh\tr9,\tr19,\tr3\tC U1 #17\n\tbeq\tr7,\t$fix6\t\tC U0\n$ret6w:\taddq\tr5,\tr8,\tr8\tC L0 sum 2\n\taddq\tr21,\tr22,\tr13\tC L1 sum 2 mul's\n\n\tmulq\tr10,\tr19,\tr4\tC U1 #18\n\taddq\tr23,\tr24,\tr22\tC L0 sum 2 mul's\n\tcmpult\tr13,\tr21,\tr14\tC L1 carry from sum\n\tble\tr18,\t$Lend\t\tC U0\nC ---------------------------------------------------------------\n\tALIGN(16)\n$Loop:\n\tumulh\tr0,\tr18,\tr18\tC U1 #01 decrement r18!\n\tcmpult\tr8,\tr5,\tr29\tC L0 carry from last bunch\n\tcmpult\tr22,\tr24,\tr24\tC U0 carry from sum\n\tldq\tr9,\t32(r17)\t\tC L1\n\n\tumulh\tr10,\tr19,\tr5\tC U1 #02\n\taddq\tr25,\tr26,\tr23\tC U0 sum 2 mul's\n\tstq\tr20,\t0(r16)\t\tC L0\n\tstq\tr6,\t8(r16)\t\tC L1\n\n\tmulq\tr11,\tr19,\tr6\tC U1 #03\n\tcmpult\tr23,\tr26,\tr25\tC U0 carry from sum\n\tstq\tr7,\t16(r16)\t\tC L0\n\tstq\tr8,\t24(r16)\t\tC L1\n\n\tumulh\tr11,\tr19,\tr7\tC U1 #04\n\tbis\tr31,\tr31,\tr31\tC L0 st slosh\n\tbis\tr31,\tr31,\tr31\tC L1 st slosh\n\taddq\tr27,\tr28,\tr28\tC U0 sum 2 mul's\n\n\tmulq\tr12,\tr19,\tr8\tC U1 #05\n\tcmpult\tr28,\tr27,\tr15\tC L0 carry from sum\n\tlda\tr16,\t64(r16)\t\tC L1 bookkeeping\n\taddq\tr13,\tr29,\tr13\tC U0 start carry cascade\n\n\tumulh\tr12,\tr19,\tr21\tC U1 #06\n\tbeq\tr13,\t$fix0\t\tC U0\n$ret0:\taddq\tr22,\tr14,\tr26\tC L0\n\tldq\tr10,\t40(r17)\t\tC L1\n\n\tmulq\tr9,\tr19,\tr22\tC U1 #07\n\tbeq\tr26,\t$fix1\t\tC U0\n$ret1:\taddq\tr23,\tr24,\tr27\tC L0\n\tldq\tr11,\t48(r17)\t\tC L1\n\n\tumulh\tr9,\tr19,\tr23\tC U1 #08\n\tbeq\tr27,\t$fix2\t\tC U0\n$ret2:\taddq\tr28,\tr25,\tr28\tC L0\n\tldq\tr12,\t56(r17)\t\tC L1\n\n\tmulq\tr10,\tr19,\tr24\tC U1 #09\n\tbeq\tr28,\t$fix3\t\tC U0\n$ret3:\taddq\tr1,\tr2,\tr20\tC L0 sum 2 mul's\n\tldq\tr9,\t64(r17)\t\tC L1\n\n\taddq\tr3,\tr4,\tr2\tC L0 #10 2 mul's\n\tbis\tr31,\tr31,\tr31\tC U1 mul hole\n\tlda\tr17,\t64(r17)\t\tC L1 bookkeeping\n\tcmpult\tr20,\tr1,\tr29\tC U0 carry from sum\n\n\tumulh\tr10,\tr19,\tr25\tC U1 #11\n\tcmpult\tr2,\tr4,\tr4\tC U0 carry from sum\n\tstq\tr13,\t-32(r16)\tC L0\n\tstq\tr26,\t-24(r16)\tC L1\n\n\tmulq\tr11,\tr19,\tr26\tC U1 #12\n\taddq\tr5,\tr6,\tr14\tC U0 sum 2 mul's\n\tstq\tr27,\t-16(r16)\tC L0\n\tstq\tr28,\t-8(r16)\t\tC L1\n\n\tumulh\tr11,\tr19,\tr27\tC U1 #13\n\tbis\tr31,\tr31,\tr31\tC L0 st slosh\n\tbis\tr31,\tr31,\tr31\tC L1 st slosh\n\tcmpult\tr14,\tr6,\tr3\tC U0 carry from sum\n$L_middle_of_unrolled_loop:\n\tmulq\tr12,\tr19,\tr28\tC U1 #14\n\taddq\tr7,\tr3,\tr5\tC L0 eat carry\n\taddq\tr20,\tr15,\tr20\tC U0 carry cascade\n\tldq\tr10,\t8(r17)\t\tC L1\n\n\tumulh\tr12,\tr19,\tr1\tC U1 #15\n\tbeq\tr20,\t$fix4\t\tC U0\n$ret4:\taddq\tr2,\tr29,\tr6\tC L0\n\tldq\tr11,\t16(r17)\t\tC L1\n\n\tmulq\tr9,\tr19,\tr2\tC U1 #16\n\tbeq\tr6,\t$fix5\t\tC U0\n$ret5:\taddq\tr14,\tr4,\tr7\tC L0\n\tldq\tr12,\t24(r17)\t\tC L1\n\n\tumulh\tr9,\tr19,\tr3\tC U1 #17\n\tbeq\tr7,\t$fix6\t\tC U0\n$ret6:\taddq\tr5,\tr8,\tr8\tC L0 sum 2\n\taddq\tr21,\tr22,\tr13\tC L1 sum 2 mul's\n\n\tmulq\tr10,\tr19,\tr4\tC U1 #18\n\taddq\tr23,\tr24,\tr22\tC L0 sum 2 mul's\n\tcmpult\tr13,\tr21,\tr14\tC L1 carry from sum\n\tbgt\tr18,\t$Loop\t\tC U0\nC ---------------------------------------------------------------\n$Lend:\n\tcmpult\tr8,\tr5,\tr29\tC L0 carry from last bunch\n\tcmpult\tr22,\tr24,\tr24\tC U0 carry from sum\n\n\tumulh\tr10,\tr19,\tr5\tC U1 #02\n\taddq\tr25,\tr26,\tr23\tC U0 sum 2 mul's\n\tstq\tr20,\t0(r16)\t\tC L0\n\tstq\tr6,\t8(r16)\t\tC L1\n\n\tmulq\tr11,\tr19,\tr6\tC U1 #03\n\tcmpult\tr23,\tr26,\tr25\tC U0 carry from sum\n\tstq\tr7,\t16(r16)\t\tC L0\n\tstq\tr8,\t24(r16)\t\tC L1\n\n\tumulh\tr11,\tr19,\tr7\tC U1 #04\n\taddq\tr27,\tr28,\tr28\tC U0 sum 2 mul's\n\n\tmulq\tr12,\tr19,\tr8\tC U1 #05\n\tcmpult\tr28,\tr27,\tr15\tC L0 carry from sum\n\tlda\tr16,\t64(r16)\t\tC L1 bookkeeping\n\taddq\tr13,\tr29,\tr13\tC U0 start carry cascade\n\n\tumulh\tr12,\tr19,\tr21\tC U1 #06\n\tbeq\tr13,\t$fix0c\t\tC U0\n$ret0c:\taddq\tr22,\tr14,\tr26\tC L0\n\tbeq\tr26,\t$fix1c\t\tC U0\n$ret1c:\taddq\tr23,\tr24,\tr27\tC L0\n\tbeq\tr27,\t$fix2c\t\tC U0\n$ret2c:\taddq\tr28,\tr25,\tr28\tC L0\n\tbeq\tr28,\t$fix3c\t\tC U0\n$ret3c:\taddq\tr1,\tr2,\tr20\tC L0 sum 2 mul's\n\taddq\tr3,\tr4,\tr2\tC L0 #10 2 mul's\n\tlda\tr17,\t64(r17)\t\tC L1 bookkeeping\n\tcmpult\tr20,\tr1,\tr29\tC U0 carry from sum\n\tcmpult\tr2,\tr4,\tr4\tC U0 carry from sum\n\tstq\tr13,\t-32(r16)\tC L0\n\tstq\tr26,\t-24(r16)\tC L1\n\taddq\tr5,\tr6,\tr14\tC U0 sum 2 mul's\n\tstq\tr27,\t-16(r16)\tC L0\n\tstq\tr28,\t-8(r16)\t\tC L1\n\tcmpult\tr14,\tr6,\tr3\tC U0 carry from sum\n\taddq\tr7,\tr3,\tr5\tC L0 eat carry\n\taddq\tr20,\tr15,\tr20\tC U0 carry cascade\n\tbeq\tr20,\t$fix4c\t\tC U0\n$ret4c:\taddq\tr2,\tr29,\tr6\tC L0\n\tbeq\tr6,\t$fix5c\t\tC U0\n$ret5c:\taddq\tr14,\tr4,\tr7\tC L0\n\tbeq\tr7,\t$fix6c\t\tC U0\n$ret6c:\taddq\tr5,\tr8,\tr8\tC L0 sum 2\n\tcmpult\tr8,\tr5,\tr29\tC L0 carry from last bunch\n\tstq\tr20,\t0(r16)\t\tC L0\n\tstq\tr6,\t8(r16)\t\tC L1\n\tstq\tr7,\t16(r16)\t\tC L0\n\tstq\tr8,\t24(r16)\t\tC L1\n\taddq\tr29,\tr21,\tr0\n\n\tldq\tr26,\t0(r30)\n\tldq\tr9,\t8(r30)\n\tldq\tr10,\t16(r30)\n\tldq\tr11,\t24(r30)\n\tldq\tr12,\t32(r30)\n\tldq\tr13,\t40(r30)\n\tldq\tr14,\t48(r30)\n\tldq\tr15,\t56(r30)\n\tldq\tr29,\t64(r30)\n\tlda\tr30,\t224(r30)\n\tret\tr31,\t(r26),\t1\n\nC $fix0w:\tbis\tr14,\tr29,\tr14\tC join carries\nC\tbr\tr31,\t$ret0w\n$fix1w:\tbis\tr24,\tr14,\tr24\tC join carries\n\tbr\tr31,\t$ret1w\n$fix2w:\tbis\tr25,\tr24,\tr25\tC join carries\n\tbr\tr31,\t$ret2w\n$fix3w:\tbis\tr15,\tr25,\tr15\tC join carries\n\tbr\tr31,\t$ret3w\n$fix0:\tbis\tr14,\tr29,\tr14\tC join carries\n\tbr\tr31,\t$ret0\n$fix1:\tbis\tr24,\tr14,\tr24\tC join carries\n\tbr\tr31,\t$ret1\n$fix2:\tbis\tr25,\tr24,\tr25\tC join carries\n\tbr\tr31,\t$ret2\n$fix3:\tbis\tr15,\tr25,\tr15\tC join carries\n\tbr\tr31,\t$ret3\n$fix4:\tbis\tr29,\tr15,\tr29\tC join carries\n\tbr\tr31,\t$ret4\n$fix5:\tbis\tr4,\tr29,\tr4\tC join carries\n\tbr\tr31,\t$ret5\n$fix6:\taddq\tr5,\tr4,\tr5\tC can't carry twice!\n\tbr\tr31,\t$ret6\n$fix0c:\tbis\tr14,\tr29,\tr14\tC join carries\n\tbr\tr31,\t$ret0c\n$fix1c:\tbis\tr24,\tr14,\tr24\tC join carries\n\tbr\tr31,\t$ret1c\n$fix2c:\tbis\tr25,\tr24,\tr25\tC join carries\n\tbr\tr31,\t$ret2c\n$fix3c:\tbis\tr15,\tr25,\tr15\tC join carries\n\tbr\tr31,\t$ret3c\n$fix4c:\tbis\tr29,\tr15,\tr29\tC join carries\n\tbr\tr31,\t$ret4c\n$fix5c:\tbis\tr4,\tr29,\tr4\tC join carries\n\tbr\tr31,\t$ret5c\n$fix6c:\taddq\tr5,\tr4,\tr5\tC can't carry twice!\n\tbr\tr31,\t$ret6c\n\nEPILOGUE(mpn_mul_1)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/ev6/slot.pl",
    "content": "#!/usr/bin/perl -w\n\n# Copyright 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published\n# by the Free Software Foundation; either version 2.1 of the License, or (at\n# your option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\n# Usage: slot.pl [filename.o]...\n#\n# Run \"objdump\" to produce a disassembly of the given object file(s) and\n# annotate the output with \"U\" or \"L\" slotting which Alpha EV6 will use.\n#\n# When an instruction is E (ie. either U or L), an \"eU\" or \"eL\" is shown, as\n# a reminder that it wasn't a fixed requirement that gave the U or L, but\n# the octaword slotting rules.\n#\n# If an instruction is not recognised, that octaword does not get any U/L\n# shown, only lower-case \"u\", \"l\" or \"e\" for the instructions which are\n# known.  Add any unknown instructions to %optable below.\n\n\nuse strict;\n\n# The U or L which various instructions demand, or E if either.\n#\nmy %optable = \n  (\n   'addq'   => 'E',\n   'and'    => 'E',\n   'beq'    => 'U',\n   'bge'    => 'U',\n   'bgt'    => 'U',\n   'blt'    => 'U',\n   'bne'    => 'U',\n   'br'     => 'L',\n   'clr'    => 'E',\n   'cmpule' => 'E',\n   'cmpult' => 'E',\n   'cmpeq'  => 'E',\n   'cmoveq' => 'E',\n   'cmovne' => 'E',\n   'ctpop'  => 'U',\n   'ctlz'   => 'U',\n   'cttz'   => 'U',\n   'extbl'  => 'U',\n   'extlh'  => 'U',\n   'extll'  => 'U',\n   'extqh'  => 'U',\n   'extql'  => 'U',\n   'extwh'  => 'U',\n   'extwl'  => 'U',\n   'jsr'    => 'L',\n   'lda'    => 'E',\n   'ldah'   => 'E',\n   'ldbu'   => 'L',\n   'ldl'    => 'L',\n   'ldq'    => 'L',\n   'ldt'    => 'L',\n   'ret'    => 'L',\n   'mov'    => 'E',\n   'mulq'   => 'U',\n   'negq'   => 'E',\n   'nop'    => 'E',\n   'not'    => 'E',\n   's8addq' => 'E',\n   's8subq' => 'E',\n   # 'sextb'  => ?\n   # 'sextl'  => ?\n   'sll'    => 'U',\n   'srl'    => 'U',\n   'stq'    => 'L',\n   'subq'   => 'E',\n   'umulh'  => 'U',\n   'unop'   => 'E',\n   'xor'    => 'E',\n  );\n\n# Slottings used for a given pattern of U/L/E in an octaword.  This is as\n# per the \"Ebox Slotting\" section of the EV6 hardware reference manual.\n#\nmy %slottable = \n  (\n   'EEEE' => 'ULUL',\n   'EEEL' => 'ULUL',\n   'EEEU' => 'ULLU',\n   'EELE' => 'ULLU',\n   'EELL' => 'UULL',\n   'EELU' => 'ULLU',\n   'EEUE' => 'ULUL',\n   'EEUL' => 'ULUL',\n   'EEUU' => 'LLUU',\n   'ELEE' => 'ULUL',\n   'ELEL' => 'ULUL',\n   'ELEU' => 'ULLU',\n   'ELLE' => 'ULLU',\n   'ELLL' => 'ULLL',\n   'ELLU' => 'ULLU',\n   'ELUE' => 'ULUL',\n   'ELUL' => 'ULUL',\n\n   'LLLL' => 'LLLL',\n   'LLLU' => 'LLLU',\n   'LLUE' => 'LLUU',\n   'LLUL' => 'LLUL',\n   'LLUU' => 'LLUU',\n   'LUEE' => 'LULU',\n   'LUEL' => 'LUUL',\n   'LUEU' => 'LULU',\n   'LULE' => 'LULU',\n   'LULL' => 'LULL',\n   'LULU' => 'LULU',\n   'LUUE' => 'LUUL',\n   'LUUL' => 'LUUL',\n   'LUUU' => 'LUUU',\n   'UEEE' => 'ULUL',\n   'UEEL' => 'ULUL',\n   'UEEU' => 'ULLU',\n\n   'ELUU' => 'LLUU',\n   'EUEE' => 'LULU',\n   'EUEL' => 'LUUL',\n   'EUEU' => 'LULU',\n   'EULE' => 'LULU',\n   'EULL' => 'UULL',\n   'EULU' => 'LULU',\n   'EUUE' => 'LUUL',\n   'EUUL' => 'LUUL',\n   'EUUU' => 'LUUU',\n   'LEEE' => 'LULU',\n   'LEEL' => 'LUUL',\n   'LEEU' => 'LULU',\n   'LELE' => 'LULU',\n   'LELL' => 'LULL',\n   'LELU' => 'LULU',\n   'LEUE' => 'LUUL',\n   'LEUL' => 'LUUL',\n   'LEUU' => 'LLUU',\n   'LLEE' => 'LLUU',\n   'LLEL' => 'LLUL',\n   'LLEU' => 'LLUU',\n   'LLLE' => 'LLLU',\n\n   'UELE' => 'ULLU',\n   'UELL' => 'UULL',\n   'UELU' => 'ULLU',\n   'UEUE' => 'ULUL',\n   'UEUL' => 'ULUL',\n   'UEUU' => 'ULUU',\n   'ULEE' => 'ULUL',\n   'ULEL' => 'ULUL',\n   'ULEU' => 'ULLU',\n   'ULLE' => 'ULLU',\n   'ULLL' => 'ULLL',\n   'ULLU' => 'ULLU',\n   'ULUE' => 'ULUL',\n   'ULUL' => 'ULUL',\n   'ULUU' => 'ULUU',\n   'UUEE' => 'UULL',\n   'UUEL' => 'UULL',\n   'UUEU' => 'UULU',\n   'UULE' => 'UULL',\n   'UULL' => 'UULL',\n   'UULU' => 'UULU',\n   'UUUE' => 'UUUL',\n   'UUUL' => 'UUUL',\n   'UUUU' => 'UUUU',\n  );\n\n# Check all combinations of U/L/E are present in %slottable.\nsub coverage {\n  foreach my $a ('U', 'L', 'E') {\n    foreach my $b ('U', 'L', 'E') {\n      foreach my $c ('U', 'L', 'E') {\n        foreach my $d ('U', 'L', 'E') {\n          my $x = $a . $b . $c . $d;\n          if (! defined $slottable{$x}) {\n            print \"slottable missing: $x\\n\"\n          }\n        }\n      }\n    }\n  }\n}\n\n# Certain consistency checks for %slottable.\nsub check {\n  foreach my $x (keys %slottable) {\n    my $a = substr($x,0,1);\n    my $b = substr($x,1,1);\n    my $c = substr($x,2,1);\n    my $d = substr($x,3,1);\n    my $es = ($a eq 'E') + ($b eq 'E') + ($c eq 'E') + ($d eq 'E');\n    my $ls = ($a eq 'L') + ($b eq 'L') + ($c eq 'L') + ($d eq 'L');\n    my $us = ($a eq 'U') + ($b eq 'U') + ($c eq 'U') + ($d eq 'U');\n\n    my $got = $slottable{$x};\n    my $want = $x;\n\n    if ($es == 0) {\n\n    } elsif ($es == 1) {\n      # when only one E, it's mapped to whichever of U or L is otherwise\n      # used the least\n      if ($ls > $us) {\n        $want =~ s/E/U/;\n      } else {\n        $want =~ s/E/L/;\n      }\n    } elsif ($es == 2) {\n      # when two E's and two U, then the E's map to L; vice versa for two E\n      # and two L\n      if ($ls == 2) {\n        $want =~ s/E/U/g;\n      } elsif ($us == 2) {\n        $want =~ s/E/L/g;\n      } else {\n        next;\n      }\n    } elsif ($es == 3) {\n      next;\n\n    } else { # $es == 4\n      next;\n    }\n\n    if ($want ne $got) {\n      print \"slottable $x want $want got $got\\n\";\n    }\n  }\n}\n\nsub disassemble {\n  my ($file) = @_;\n\n  open (IN, \"objdump -Srfh $file |\") || die \"Cannot open pipe from objdump\\n\";\n\n  my (%pre, %post, %type);\n  while (<IN>) {\n    my $line = $_ . \"\";\n\n    if ($line =~ /(^[ \\t]*[0-9a-f]*([0-9a-f]):[ \\t]*[0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] )\\t(([a-z0-9]+).*)/) {\n      my ($this_pre, $addr, $this_post, $opcode) = ($1, $2, $3, $4);\n\n      my $this_type = $optable{$opcode};\n      if (! defined ($this_type)) { $this_type = ' '; }\n\n      $pre{$addr} = $this_pre;\n      $post{$addr} = $this_post;\n      $type{$addr} = $this_type;\n\n      if ($addr eq 'c') {\n        my %slot = ('0'=>' ', '4'=>' ', '8'=>' ', 'c'=>' ');\n\n        my $str = $type{'c'} . $type{'8'} . $type{'4'} . $type{'0'};\n        $str = $slottable{$str};\n        if (defined $str) {\n          $slot{'c'} = substr($str,0,1);\n          $slot{'8'} = substr($str,1,1);\n          $slot{'4'} = substr($str,2,1);\n          $slot{'0'} = substr($str,3,1);\n        }\n\n        foreach my $i ('0', '4', '8', 'c') {\n          if ($slot{$i} eq $type{$i}) { $type{$i} = ' '; }\n          print $pre{$i}, ' ', lc($type{$i}),$slot{$i}, '  ', $post{$i}, \"\\n\";\n        }\n\n        %pre = ();\n        %type = ();\n        %post = ();\n      }\n    }\n  }\n  \n  close IN || die \"Error from objdump (or objdump not available)\\n\";\n}\n\ncoverage();\ncheck();\n\nmy @files;\nif ($#ARGV >= 0) {\n  @files = @ARGV;\n} else {\n  die\n}\n\nforeach (@files)  {\n    disassemble($_);\n}\n"
  },
  {
    "path": "mpn/alpha/ev6/sqr_diagonal.asm",
    "content": "dnl  Alpha mpn_sqr_diagonal.\n\ndnl  Copyright 2001, 2002, 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:      ?\nC EV5:      ?\nC EV6:      2.3\n\nC  INPUT PARAMETERS\nC  rp\tr16\nC  up\tr17\nC  n\tr18\n\n\nASM_START()\nPROLOGUE(mpn_sqr_diagonal)\n\tlda\tr18, -2(r18)\tC n -= 2\n\tldq\tr0,   0(r17)\n\tmulq\tr0, r0, r4\n\tumulh\tr0, r0, r20\n\tblt\tr18, L(ex1)\n\tldq\tr1,   8(r17)\n\tmulq\tr1, r1, r5\n\tumulh\tr1, r1, r21\n\tbeq\tr18, L(ex2)\n\tlda\tr18, -2(r18)\tC n -= 2\n\tldq\tr0,  16(r17)\n\tblt\tr18, L(ex3)\n\tldq\tr1,  24(r17)\n\tbeq\tr18, L(ex4)\n\n\tALIGN(16)\nL(top):\tlda\tr18, -2(r18)\tC n -= 2\n\tstq\tr4,   0(r16)\n\tmulq\tr0, r0, r4\n\tstq\tr20,  8(r16)\n\tumulh\tr0, r0, r20\n\tldq\tr0,  32(r17)\n\tblt\tr18, L(x)\n\tstq\tr5,  16(r16)\n\tmulq\tr1, r1, r5\n\tstq\tr21, 24(r16)\n\tumulh\tr1, r1, r21\n\tldq\tr1,  40(r17)\n\tlda\tr16, 32(r16)\tC rp += 4\n\tlda\tr17, 16(r17)\tC up += 2\n\tbne\tr18, L(top)\n\n\tALIGN(16)\nL(ex4):\tstq\tr4,   0(r16)\n\tmulq\tr0, r0, r4\n\tstq\tr20,  8(r16)\n\tumulh\tr0, r0, r20\n\tstq\tr5,  16(r16)\n\tmulq\tr1, r1, r5\n\tstq\tr21, 24(r16)\n\tumulh\tr1, r1, r21\n\tstq\tr4,  32(r16)\n\tstq\tr20, 40(r16)\n\tstq\tr5,  48(r16)\n\tstq\tr21, 56(r16)\n\tret\tr31, (r26), 1\n\tALIGN(16)\nL(x):\tstq\tr5,  16(r16)\n\tmulq\tr1, r1, r5\n\tstq\tr21, 24(r16)\n\tumulh\tr1, r1, r21\n\tstq\tr4,  32(r16)\n\tmulq\tr0, r0, r4\n\tstq\tr20, 40(r16)\n\tumulh\tr0, r0, r20\n\tstq\tr5,  48(r16)\n\tstq\tr21, 56(r16)\n\tstq\tr4,  64(r16)\n\tstq\tr20, 72(r16)\n\tret\tr31, (r26), 1\nL(ex1):\tstq\tr4,   0(r16)\n\tstq\tr20,  8(r16)\n\tret\tr31, (r26), 1\n\tALIGN(16)\nL(ex2):\tstq\tr4,   0(r16)\n\tstq\tr20,  8(r16)\n\tstq\tr5,  16(r16)\n\tstq\tr21, 24(r16)\n\tret\tr31, (r26), 1\n\tALIGN(16)\nL(ex3):\tstq\tr4,   0(r16)\n\tmulq\tr0, r0, r4\n\tstq\tr20,  8(r16)\n\tumulh\tr0, r0, r20\n\tstq\tr5,  16(r16)\n\tstq\tr21, 24(r16)\n\tstq\tr4,  32(r16)\n\tstq\tr20, 40(r16)\n\tret\tr31, (r26), 1\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/ev6/sub_n.asm",
    "content": "dnl  Alpha ev6 mpn_sub_n -- Subtract two limb vectors of the same length > 0\ndnl  and store difference in a third limb vector.\n\ndnl  Copyright 2000, 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     ?\nC EV5:     5.4\nC EV6:     2.125\n\nC  INPUT PARAMETERS\nC  rp\tr16\nC  up\tr17\nC  vp\tr18\nC  n\tr19\nC  cy\tr20   (for mpn_add_nc)\n\nC TODO\nC   Finish cleaning up cy registers r22, r23 (make them use cy0/cy1)\nC   Use multi-pronged feed-in.\nC   Perform additional micro-tuning\n\nC  This code was written in cooperation with ev6 pipeline expert Steve Root.\n\nC  Pair loads and stores where possible\nC  Store pairs oct-aligned where possible (didn't need it here)\nC  Stores are delayed every third cycle\nC  Loads and stores are delayed by fills\nC  U stays still, put code there where possible (note alternation of U1 and U0)\nC  L moves because of loads and stores\nC  Note dampers in L to limit damage\n\nC  This odd-looking optimization expects that were having random bits in our\nC  data, so that a pure zero result is unlikely. so we penalize the unlikely\nC  case to help the common case.\n\ndefine(`u0', `r0')  define(`u1', `r3')\ndefine(`v0', `r1')  define(`v1', `r4')\n\ndefine(`cy0', `r20')  define(`cy1', `r21')\n\nMULFUNC_PROLOGUE(mpn_sub_n mpn_sub_nc)\n\nASM_START()\nPROLOGUE(mpn_sub_nc)\n\tbr\tr31,\t$entry\nEPILOGUE()\nPROLOGUE(mpn_sub_n)\n\tbis\tr31,\tr31,\tcy0\tC clear carry in\n$entry:\tcmpult\tr19,\t5,\tr22\tC L1 move counter\n\tldq\tu1,\t0(r17)\t\tC L0 get next ones\n\tldq\tv1,\t0(r18)\t\tC L1\n\tbne\tr22,\t$Lsmall\n\n\tldq\tu0,\t8(r17)\t\tC L0 get next ones\n\tldq\tv0,\t8(r18)\t\tC L1\n\tsubq\tu1,\tv1,\tr5\tC U0 sub two data\n\n\tcmpult\tu1,\tv1,\tr23\tC U0 did it borrow\n\tldq\tu1,\t16(r17)\t\tC L0 get next ones\n\tldq\tv1,\t16(r18)\t\tC L1\n\n\tsubq\tu0,\tv0,\tr8\tC U1 sub two data\n\tsubq\tr5,\tcy0,\tr24\tC U0 borrow in\n\n\tcmpult\tu0,\tv0,\tr22\tC U1 did it borrow\n\tbeq\tr5,\t$fix5f\t\tC U0 fix exact zero\n$ret5f:\tldq\tu0,\t24(r17)\t\tC L0 get next ones\n\tldq\tv0,\t24(r18)\t\tC L1\n\n\tsubq\tr8,\tr23,\tr25\tC U1 borrow from last\n\tsubq\tu1,\tv1,\tr7\tC U0 sub two data\n\n\tbeq\tr8,\t$fix6f\t\tC U1 fix exact zero\n$ret6f:\tcmpult\tu1,\tv1,\tr23\tC U0 did it borrow\n\tldq\tu1,\t32(r17)\t\tC L0 get next ones\n\tldq\tv1,\t32(r18)\t\tC L1\n\n\tlda\tr17,\t40(r17)\t\tC L0 move pointer\n\tlda\tr18,\t40(r18)\t\tC L1 move pointer\n\n\tlda\tr16,\t-8(r16)\n\tlda\tr19,\t-13(r19)\tC L1 move counter\n\tblt\tr19,\t$Lend\t\tC U1 loop control\n\n\nC Main loop.  8-way unrolled.\n\tALIGN(16)\n$Loop:\tsubq\tu0,\tv0,\tr2\tC U1 sub two data\n\tstq\tr24,\t8(r16)\t\tC L0 put an answer\n\tsubq\tr7,\tr22,\tr24\tC U0 borrow from last\n\tstq\tr25,\t16(r16)\t\tC L1 pair\n\n\tcmpult\tu0,\tv0,\tcy1\tC U1 did it borrow\n\tbeq\tr7,\t$fix7\t\tC U0 fix exact 0\n$ret7:\tldq\tu0,\t0(r17)\t\tC L0 get next ones\n\tldq\tv0,\t0(r18)\t\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC L  damp out\n\tsubq\tr2,\tr23,\tr25\tC U1 borrow from last\n\tbis\tr31,\tr31,\tr31\tC L  moves in L !\n\tsubq\tu1,\tv1,\tr5\tC U0 sub two data\n\n\tbeq\tr2,\t$fix0\t\tC U1 fix exact zero\n$ret0:\tcmpult\tu1,\tv1,\tcy0\tC U0 did it borrow\n\tldq\tu1,\t8(r17)\t\tC L0 get next ones\n\tldq\tv1,\t8(r18)\t\tC L1\n\n\tsubq\tu0,\tv0,\tr8\tC U1 sub two data\n\tstq\tr24,\t24(r16)\t\tC L0 store pair\n\tsubq\tr5,\tcy1,\tr24\tC U0 borrow from last\n\tstq\tr25,\t32(r16)\t\tC L1\n\n\tcmpult\tu0,\tv0,\tr22\tC U1 did it borrow\n\tbeq\tr5,\t$fix1\t\tC U0 fix exact zero\n$ret1:\tldq\tu0,\t16(r17)\t\tC L0 get next ones\n\tldq\tv0,\t16(r18)\t\tC L1\n\n\tlda\tr16,\t64(r16)\t\tC L0 move pointer\n\tsubq\tr8,\tcy0,\tr25\tC U1 borrow from last\n\tlda\tr19,\t-8(r19)\t\tC L1 move counter\n\tsubq\tu1,\tv1,\tr7\tC U0 sub two data\n\n\tbeq\tr8,\t$fix2\t\tC U1 fix exact zero\n$ret2:\tcmpult\tu1,\tv1,\tr23\tC U0 did it borrow\n\tldq\tu1,\t24(r17)\t\tC L0 get next ones\n\tldq\tv1,\t24(r18)\t\tC L1\n\n\tsubq\tu0,\tv0,\tr2\tC U1 sub two data\n\tstq\tr24,\t-24(r16)\tC L0 put an answer\n\tsubq\tr7,\tr22,\tr24\tC U0 borrow from last\n\tstq\tr25,\t-16(r16)\tC L1 pair\n\n\tcmpult\tu0,\tv0,\tcy1\tC U1 did it borrow\n\tbeq\tr7,\t$fix3\t\tC U0 fix exact 0\n$ret3:\tldq\tu0,\t32(r17)\t\tC L0 get next ones\n\tldq\tv0,\t32(r18)\t\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC L  damp out\n\tsubq\tr2,\tr23,\tr25\tC U1 borrow from last\n\tbis\tr31,\tr31,\tr31\tC L  moves in L !\n\tsubq\tu1,\tv1,\tr5\tC U0 sub two data\n\n\tbeq\tr2,\t$fix4\t\tC U1 fix exact zero\n$ret4:\tcmpult\tu1,\tv1,\tcy0\tC U0 did it borrow\n\tldq\tu1,\t40(r17)\t\tC L0 get next ones\n\tldq\tv1,\t40(r18)\t\tC L1\n\n\tsubq\tu0,\tv0,\tr8\tC U1 sub two data\n\tstq\tr24,\t-8(r16)\t\tC L0 store pair\n\tsubq\tr5,\tcy1,\tr24\tC U0 borrow from last\n\tstq\tr25,\t0(r16)\t\tC L1\n\n\tcmpult\tu0,\tv0,\tr22\tC U1 did it borrow\n\tbeq\tr5,\t$fix5\t\tC U0 fix exact zero\n$ret5:\tldq\tu0,\t48(r17)\t\tC L0 get next ones\n\tldq\tv0,\t48(r18)\t\tC L1\n\n\tldl\tr31, 256(r17)\t\tC L0 prefetch\n\tsubq\tr8,\tcy0,\tr25\tC U1 borrow from last\n\tldl\tr31, 256(r18)\t\tC L1 prefetch\n\tsubq\tu1,\tv1,\tr7\tC U0 sub two data\n\n\tbeq\tr8,\t$fix6\t\tC U1 fix exact zero\n$ret6:\tcmpult\tu1,\tv1,\tr23\tC U0 did it borrow\n\tldq\tu1,\t56(r17)\t\tC L0 get next ones\n\tldq\tv1,\t56(r18)\t\tC L1\n\n\tlda\tr17,\t64(r17)\t\tC L0 move pointer\n\tbis\tr31,\tr31,\tr31\tC U\n\tlda\tr18,\t64(r18)\t\tC L1 move pointer\n\tbge\tr19,\t$Loop\t\tC U1 loop control\nC ==== main loop end\n\n$Lend:\tsubq\tu0,\tv0,\tr2\tC U1 sub two data\n\tstq\tr24,\t8(r16)\t\tC L0 put an answer\n\tsubq\tr7,\tr22,\tr24\tC U0 borrow from last\n\tstq\tr25,\t16(r16)\t\tC L1 pair\n\tcmpult\tu0,\tv0,\tcy1\tC U1 did it borrow\n\tbeq\tr7,\t$fix7c\t\tC U0 fix exact 0\n$ret7c:\tsubq\tr2,\tr23,\tr25\tC U1 borrow from last\n\tsubq\tu1,\tv1,\tr5\tC U0 sub two data\n\tbeq\tr2,\t$fix0c\t\tC U1 fix exact zero\n$ret0c:\tcmpult\tu1,\tv1,\tcy0\tC U0 did it borrow\n\tstq\tr24,\t24(r16)\t\tC L0 store pair\n\tsubq\tr5,\tcy1,\tr24\tC U0 borrow from last\n\tstq\tr25,\t32(r16)\t\tC L1\n\tbeq\tr5,\t$fix1c\t\tC U0 fix exact zero\n$ret1c:\tstq\tr24,\t40(r16)\t\tC L0 put an answer\n\tlda\tr16,\t48(r16)\t\tC L0 move pointer\n\n\tlda\tr19,\t8(r19)\n\tbeq\tr19,\t$Lret\n\n\tldq\tu1,\t0(r17)\n\tldq\tv1,\t0(r18)\n$Lsmall:\n\tlda\tr19,\t-1(r19)\n\tbeq\tr19,\t$Lend0\n\n\tALIGN(8)\n$Loop0:\tsubq\tu1,\tv1,\tr2\tC main sub\n\tcmpult\tu1,\tv1,\tr8\tC compute bw from last sub\n\tldq\tu1,\t8(r17)\n\tldq\tv1,\t8(r18)\n\tsubq\tr2,\tcy0,\tr5\tC borrow sub\n\tlda\tr17,\t8(r17)\n\tlda\tr18,\t8(r18)\n\tstq\tr5,\t0(r16)\n\tcmpult\tr2,\tcy0,\tcy0\tC compute bw from last sub\n\tlda\tr19,\t-1(r19)\t\tC decr loop cnt\n\tbis\tr8,\tcy0,\tcy0\tC combine bw from the two subs\n\tlda\tr16,\t8(r16)\n\tbne\tr19,\t$Loop0\n$Lend0:\tsubq\tu1,\tv1,\tr2\tC main sub\n\tsubq\tr2,\tcy0,\tr5\tC borrow sub\n\tcmpult\tu1,\tv1,\tr8\tC compute bw from last sub\n\tcmpult\tr2,\tcy0,\tcy0\tC compute bw from last sub\n\tstq\tr5,\t0(r16)\n\tbis\tr8,\tcy0,\tr0\tC combine bw from the two subs\n\tret\tr31,(r26),1\n\n\tALIGN(8)\n$Lret:\tlda\tr0,\t0(cy0)\t\tC copy borrow into return register\n\tret\tr31,(r26),1\n\n$fix5f:\tbis\tr23,\tcy0,\tr23\tC bring forward borrow\n\tbr\tr31,\t$ret5f\n$fix6f:\tbis\tr22,\tr23,\tr22\tC bring forward borrow\n\tbr\tr31,\t$ret6f\n$fix0:\tbis\tcy1,\tr23,\tcy1\tC bring forward borrow\n\tbr\tr31,\t$ret0\n$fix1:\tbis\tcy0,\tcy1,\tcy0\tC bring forward borrow\n\tbr\tr31,\t$ret1\n$fix2:\tbis\tr22,\tcy0,\tr22\tC bring forward borrow\n\tbr\tr31,\t$ret2\n$fix3:\tbis\tr23,\tr22,\tr23\tC bring forward borrow\n\tbr\tr31,\t$ret3\n$fix4:\tbis\tcy1,\tr23,\tcy1\tC bring forward borrow\n\tbr\tr31,\t$ret4\n$fix5:\tbis\tcy1,\tcy0,\tcy0\tC bring forward borrow\n\tbr\tr31,\t$ret5\n$fix6:\tbis\tr22,\tcy0,\tr22\tC bring forward borrow\n\tbr\tr31,\t$ret6\n$fix7:\tbis\tr23,\tr22,\tr23\tC bring forward borrow\n\tbr\tr31,\t$ret7\n$fix0c:\tbis\tcy1,\tr23,\tcy1\tC bring forward borrow\n\tbr\tr31,\t$ret0c\n$fix1c:\tbis\tcy0,\tcy1,\tcy0\tC bring forward borrow\n\tbr\tr31,\t$ret1c\n$fix7c:\tbis\tr23,\tr22,\tr23\tC bring forward borrow\n\tbr\tr31,\t$ret7c\n\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/ev6/submul_1.asm",
    "content": "dnl Alpha ev6 mpn_submul_1 -- Multiply a limb vector with a limb and subtract\ndnl the result from a second limb vector.\n\ndnl  Copyright 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\ndnl  INPUT PARAMETERS\ndnl  res_ptr\tr16\ndnl  s1_ptr\tr17\ndnl  size\tr18\ndnl  s2_limb\tr19\n\ndnl  This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and\ndnl  exactly 3.5 cycles/limb on EV6...\n\ndnl This code was written in close cooperation with ev6 pipeline expert\ndnl Steve Root.  Any errors are tege's fault, though.\ndnl\ndnl   Register usages for unrolled loop:\ndnl\t  0-3     mul's\ndnl\t  4-7     acc's\ndnl\t  8-15    mul results\ndnl\t  20,21   carry's\ndnl\t  22,23   save for stores\n\ndnl   Sustains 8 mul-adds in 28 cycles in the unrolled inner loop.\n\ndnl   The stores can issue a cycle late so we have paired no-op's to 'catch'\ndnl   them, so that further disturbance to the schedule is damped.\n\ndnl   We couldn't pair the loads, because the entangled schedule of the\ndnl   carry's has to happen on one side {0} of the machine. Note, the total\ndnl   use of U0, and the total use of L0 (after attending to the stores).\ndnl   which is part of the reason why....\n\ndnl   This is a great schedule for the d_cache, a poor schedule for the\ndnl   b_cache. The lockup on U0 means that any stall can't be recovered\ndnl   from. Consider a ldq in L1.  say that load gets stalled because it\ndnl   collides with a fill from the b_Cache. On the next cycle, this load\ndnl   gets priority. If first looks at L0, and goes there. The instruction\ndnl   we intended for L0 gets to look at L1, which is NOT where we want\ndnl   it. It either stalls 1, because it can't go in L0, or goes there, and\ndnl   causes a further instruction to stall.\n\ndnl   So for b_cache, we're likely going to want to put one or more cycles\ndnl   back into the code! And, of course, put in prefetches. For the\ndnl   accumulator, lds, intent to modify.  For the multiplier, you might\ndnl   want ldq, evict next, if you're not wanting to use it again soon. Use\ndnl   256 ahead of present pointer value. At a place where we have an mt\ndnl   followed by a bookkeeping, put the bookkeeping in upper, and the\ndnl   prefetch into lower.\n\ndnl   Note, the usage of physical registers per cycle is smoothed off, as\ndnl   much as possible.\n\ndnl   Note, the ldq's and stq's are at the end of the quadpacks.  note, we'd\ndnl   like not to have a ldq or stq to preceded a conditional branch in a\ndnl   quadpack. The conditional branch moves the retire pointer one cycle\ndnl   later.\n\ndnl   Optimization notes:\ndnl   Callee-saves regs: r9 r10 r11 r12 r13 r14 r15 r26 ?r27?\ndnl   Reserved regs:\t r29 r30 r31\ndnl   Free caller-saves regs in unrolled code: r24 r25 r28\ndnl   We should swap some of the callee-saves regs for some of the free\ndnl   caller-saves regs, saving some overhead cycles.\ndnl   Most importantly, we should write fast code for the 0-7 case.\ndnl   The code we use there are for the 21164, and runs at 7 cycles/limb\ndnl   on the 21264.  Should not be hard, if we write specialized code for\ndnl   1-7 limbs (the one for 0 limbs should be straightforward).  We then just\ndnl   need a jump table indexed by the low 3 bits of the count argument.\n\n\nASM_START()\nPROLOGUE(mpn_submul_1)\n\tcmpult\tr18,\t8,\tr1\n\tbeq\tr1,\t$Large\n\n\tldq\tr2,\t0(r17)\t\tC r2 = s1_limb\n\taddq\tr17,\t8,\tr17\tC s1_ptr++\n\tsubq\tr18,\t1,\tr18\tC size--\n\tmulq\tr2,\tr19,\tr3\tC r3 = prod_low\n\tldq\tr5,\t0(r16)\t\tC r5 = *res_ptr\n\tumulh\tr2,\tr19,\tr0\tC r0 = prod_high\n\tbeq\tr18,\t$Lend0b\t\tC jump if size was == 1\n\tldq\tr2,\t0(r17)\t\tC r2 = s1_limb\n\taddq\tr17,\t8,\tr17\tC s1_ptr++\n\tsubq\tr18,\t1,\tr18\tC size--\n\tsubq\tr5,\tr3,\tr3\n\tcmpult\tr5,\tr3,\tr4\n\tstq\tr3,\t0(r16)\n\taddq\tr16,\t8,\tr16\tC res_ptr++\n\tbeq\tr18,\t$Lend0a\t\tC jump if size was == 2\n\n\tALIGN(8)\n$Loop0:\tmulq\tr2,\tr19,\tr3\tC r3 = prod_low\n\tldq\tr5,\t0(r16)\t\tC r5 = *res_ptr\n\taddq\tr4,\tr0,\tr0\tC cy_limb = cy_limb + 'cy'\n\tsubq\tr18,\t1,\tr18\tC size--\n\tumulh\tr2,\tr19,\tr4\tC r4 = cy_limb\n\tldq\tr2,\t0(r17)\t\tC r2 = s1_limb\n\taddq\tr17,\t8,\tr17\tC s1_ptr++\n\taddq\tr3,\tr0,\tr3\tC r3 = cy_limb + prod_low\n\tcmpult\tr3,\tr0,\tr0\tC r0 = carry from (cy_limb + prod_low)\n\tsubq\tr5,\tr3,\tr3\n\tcmpult\tr5,\tr3,\tr5\n\tstq\tr3,\t0(r16)\n\taddq\tr16,\t8,\tr16\tC res_ptr++\n\taddq\tr5,\tr0,\tr0\tC combine carries\n\tbne\tr18,\t$Loop0\n$Lend0a:\n\tmulq\tr2,\tr19,\tr3\tC r3 = prod_low\n\tldq\tr5,\t0(r16)\t\tC r5 = *res_ptr\n\taddq\tr4,\tr0,\tr0\tC cy_limb = cy_limb + 'cy'\n\tumulh\tr2,\tr19,\tr4\tC r4 = cy_limb\n\taddq\tr3,\tr0,\tr3\tC r3 = cy_limb + prod_low\n\tcmpult\tr3,\tr0,\tr0\tC r0 = carry from (cy_limb + prod_low)\n\tsubq\tr5,\tr3,\tr3\n\tcmpult\tr5,\tr3,\tr5\n\tstq\tr3,\t0(r16)\n\taddq\tr5,\tr0,\tr0\tC combine carries\n\taddq\tr4,\tr0,\tr0\tC cy_limb = prod_high + cy\n\tret\tr31,\t(r26),\t1\n$Lend0b:\n\tsubq\tr5,\tr3,\tr3\n\tcmpult\tr5,\tr3,\tr5\n\tstq\tr3,\t0(r16)\n\taddq\tr0,\tr5,\tr0\n\tret\tr31,\t(r26),\t1\n\n$Large:\n\tlda\t$30,\t-240($30)\n\tstq\t$9,\t8($30)\n\tstq\t$10,\t16($30)\n\tstq\t$11,\t24($30)\n\tstq\t$12,\t32($30)\n\tstq\t$13,\t40($30)\n\tstq\t$14,\t48($30)\n\tstq\t$15,\t56($30)\n\n\tand\tr18,\t7,\tr20\tC count for the first loop, 0-7\n\tsrl\tr18,\t3,\tr18\tC count for unrolled loop\n\tbis\tr31,\tr31,\tr0\n\tbeq\tr20,\t$Lunroll\n\tldq\tr2,\t0(r17)\t\tC r2 = s1_limb\n\taddq\tr17,\t8,\tr17\tC s1_ptr++\n\tsubq\tr20,\t1,\tr20\tC size--\n\tmulq\tr2,\tr19,\tr3\tC r3 = prod_low\n\tldq\tr5,\t0(r16)\t\tC r5 = *res_ptr\n\tumulh\tr2,\tr19,\tr0\tC r0 = prod_high\n\tbeq\tr20,\t$Lend1b\t\tC jump if size was == 1\n\tldq\tr2,\t0(r17)\t\tC r2 = s1_limb\n\taddq\tr17,\t8,\tr17\tC s1_ptr++\n\tsubq\tr20,\t1,\tr20\tC size--\n\tsubq\tr5,\tr3,\tr3\n\tcmpult\tr5,\tr3,\tr4\n\tstq\tr3,\t0(r16)\n\taddq\tr16,\t8,\tr16\tC res_ptr++\n\tbeq\tr20,\t$Lend1a\t\tC jump if size was == 2\n\n\tALIGN(8)\n$Loop1:\tmulq\tr2,\tr19,\tr3\tC r3 = prod_low\n\tldq\tr5,\t0(r16)\t\tC r5 = *res_ptr\n\taddq\tr4,\tr0,\tr0\tC cy_limb = cy_limb + 'cy'\n\tsubq\tr20,\t1,\tr20\tC size--\n\tumulh\tr2,\tr19,\tr4\tC r4 = cy_limb\n\tldq\tr2,\t0(r17)\t\tC r2 = s1_limb\n\taddq\tr17,\t8,\tr17\tC s1_ptr++\n\taddq\tr3,\tr0,\tr3\tC r3 = cy_limb + prod_low\n\tcmpult\tr3,\tr0,\tr0\tC r0 = carry from (cy_limb + prod_low)\n\tsubq\tr5,\tr3,\tr3\n\tcmpult\tr5,\tr3,\tr5\n\tstq\tr3,\t0(r16)\n\taddq\tr16,\t8,\tr16\tC res_ptr++\n\taddq\tr5,\tr0,\tr0\tC combine carries\n\tbne\tr20,\t$Loop1\n\n$Lend1a:\n\tmulq\tr2,\tr19,\tr3\tC r3 = prod_low\n\tldq\tr5,\t0(r16)\t\tC r5 = *res_ptr\n\taddq\tr4,\tr0,\tr0\tC cy_limb = cy_limb + 'cy'\n\tumulh\tr2,\tr19,\tr4\tC r4 = cy_limb\n\taddq\tr3,\tr0,\tr3\tC r3 = cy_limb + prod_low\n\tcmpult\tr3,\tr0,\tr0\tC r0 = carry from (cy_limb + prod_low)\n\tsubq\tr5,\tr3,\tr3\n\tcmpult\tr5,\tr3,\tr5\n\tstq\tr3,\t0(r16)\n\taddq\tr16,\t8,\tr16\tC res_ptr++\n\taddq\tr5,\tr0,\tr0\tC combine carries\n\taddq\tr4,\tr0,\tr0\tC cy_limb = prod_high + cy\n\tbr\tr31,\t$Lunroll\n$Lend1b:\n\tsubq\tr5,\tr3,\tr3\n\tcmpult\tr5,\tr3,\tr5\n\tstq\tr3,\t0(r16)\n\taddq\tr16,\t8,\tr16\tC res_ptr++\n\taddq\tr0,\tr5,\tr0\n\n$Lunroll:\n\tlda\tr17,\t-16(r17)\tC L1 bookkeeping\n\tlda\tr16,\t-16(r16)\tC L1 bookkeeping\n\tbis\tr0,\tr31,\tr12\n\nC ____ UNROLLED LOOP SOFTWARE PIPELINE STARTUP ____\n\n\tldq\tr2,\t16(r17)\t\tC L1\n\tldq\tr3,\t24(r17)\t\tC L1\n\tlda\tr18,\t-1(r18)\t\tC L1 bookkeeping\n\tldq\tr6,\t16(r16)\t\tC L1\n\tldq\tr7,\t24(r16)\t\tC L1\n\tldq\tr0,\t32(r17)\t\tC L1\n\tmulq\tr19,\tr2,\tr13\tC U1\n\tldq\tr1,\t40(r17)\t\tC L1\n\tumulh\tr19,\tr2,\tr14\tC U1\n\tmulq\tr19,\tr3,\tr15\tC U1\n\tlda\tr17,\t64(r17)\t\tC L1 bookkeeping\n\tldq\tr4,\t32(r16)\t\tC L1\n\tldq\tr5,\t40(r16)\t\tC L1\n\tumulh\tr19,\tr3,\tr8\tC U1\n\tldq\tr2,\t-16(r17)\tC L1\n\tmulq\tr19,\tr0,\tr9\tC U1\n\tldq\tr3,\t-8(r17)\t\tC L1\n\tumulh\tr19,\tr0,\tr10\tC U1\n\tsubq\tr6,\tr13,\tr13\tC L0 lo + acc\n\tmulq\tr19,\tr1,\tr11\tC U1\n\tcmpult\tr6,\tr13,\tr20\tC L0 lo add => carry\n\tlda\tr16,\t64(r16)\t\tC L1 bookkeeping\n\tsubq\tr13,\tr12,\tr22\tC U0 hi add => answer\n\tcmpult\tr13,\tr12,\tr21\tC L0 hi add => carry\n\taddq\tr14,\tr20,\tr14\tC U0 hi mul + carry\n\tldq\tr6,\t-16(r16)\tC L1\n\tsubq\tr7,\tr15,\tr28\tC L0 lo + acc\n\taddq\tr14,\tr21,\tr14\tC U0 hi mul + carry\n\tcmpult\tr7,\tr15,\tr20\tC L0 lo add => carry\n\tldq\tr7,\t-8(r16)\t\tC L1\n\tumulh\tr19,\tr1,\tr12\tC U1\n\tsubq\tr28,\tr14,\tr23\tC U0 hi add => answer\n\tldq\tr0,\t0(r17)\t\tC L1\n\tmulq\tr19,\tr2,\tr13\tC U1\n\tcmpult\tr28,\tr14,\tr21\tC L0 hi add => carry\n\taddq\tr8,\tr20,\tr8\tC U0 hi mul + carry\n\tldq\tr1,\t8(r17)\t\tC L1\n\tumulh\tr19,\tr2,\tr14\tC U1\n\tsubq\tr4,\tr9,\tr9\tC L0 lo + acc\n\tstq\tr22,\t-48(r16)\tC L0\n\tstq\tr23,\t-40(r16)\tC L1\n\tmulq\tr19,\tr3,\tr15\tC U1\n\taddq\tr8,\tr21,\tr8\tC U0 hi mul + carry\n\tcmpult\tr4,\tr9,\tr20\tC L0 lo add => carry\n\tsubq\tr9,\tr8,\tr22\tC U0 hi add => answer\n\tble\tr18,\t$Lend\t\tC U1 bookkeeping\n\nC ____ MAIN UNROLLED LOOP ____\n\tALIGN(16)\n$Loop:\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tcmpult\tr9,\tr8,\tr21\tC L0 hi add => carry\n\taddq\tr10,\tr20,\tr10\tC U0 hi mul + carry\n\tldq\tr4,\t0(r16)\t\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tsubq\tr5,\tr11,\tr23\tC L0 lo + acc\n\taddq\tr10,\tr21,\tr10\tC L0 hi mul + carry\n\tldq\tr2,\t16(r17)\t\tC L1\n\n\tumulh\tr19,\tr3,\tr8\tC U1\n\tcmpult\tr5,\tr11,\tr20\tC L0 lo add => carry\n\tsubq\tr23,\tr10,\tr28\tC U0 hi add => answer\n\tldq\tr5,\t8(r16)\t\tC L1\n\n\tmulq\tr19,\tr0,\tr9\tC U1\n\tcmpult\tr23,\tr10,\tr21\tC L0 hi add => carry\n\taddq\tr12,\tr20,\tr12\tC U0 hi mul + carry\n\tldq\tr3,\t24(r17)\t\tC L1\n\n\tumulh\tr19,\tr0,\tr10\tC U1\n\tsubq\tr6,\tr13,\tr13\tC U0 lo + acc\n\tstq\tr22,\t-32(r16)\tC L0\n\tstq\tr28,\t-24(r16)\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC L0 st slosh\n\tmulq\tr19,\tr1,\tr11\tC U1\n\tbis\tr31,\tr31,\tr31\tC L1 st slosh\n\taddq\tr12,\tr21,\tr12\tC U0 hi mul + carry\n\n\tcmpult\tr6,\tr13,\tr20\tC L0 lo add => carry\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tlda\tr18,\t-1(r18)\t\tC L1 bookkeeping\n\tsubq\tr13,\tr12,\tr22\tC U0 hi add => answer\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tcmpult\tr13,\tr12,\tr21\tC L0 hi add => carry\n\taddq\tr14,\tr20,\tr14\tC U0 hi mul + carry\n\tldq\tr6,\t16(r16)\t\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tsubq\tr7,\tr15,\tr23\tC L0 lo + acc\n\taddq\tr14,\tr21,\tr14\tC U0 hi mul + carry\n\tldq\tr0,\t32(r17)\t\tC L1\n\n\tumulh\tr19,\tr1,\tr12\tC U1\n\tcmpult\tr7,\tr15,\tr20\tC L0 lo add => carry\n\tsubq\tr23,\tr14,\tr28\tC U0 hi add => answer\n\tldq\tr7,\t24(r16)\t\tC L1\n\n\tmulq\tr19,\tr2,\tr13\tC U1\n\tcmpult\tr23,\tr14,\tr21\tC L0 hi add => carry\n\taddq\tr8,\tr20,\tr8\tC U0 hi mul + carry\n\tldq\tr1,\t40(r17)\t\tC L1\n\n\tumulh\tr19,\tr2,\tr14\tC U1\n\tsubq\tr4,\tr9,\tr9\tC U0 lo + acc\n\tstq\tr22,\t-16(r16)\tC L0\n\tstq\tr28,\t-8(r16)\t\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC L0 st slosh\n\tmulq\tr19,\tr3,\tr15\tC U1\n\tbis\tr31,\tr31,\tr31\tC L1 st slosh\n\taddq\tr8,\tr21,\tr8\tC L0 hi mul + carry\n\n\tcmpult\tr4,\tr9,\tr20\tC L0 lo add => carry\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tlda\tr17,\t64(r17)\t\tC L1 bookkeeping\n\tsubq\tr9,\tr8,\tr22\tC U0 hi add => answer\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tcmpult\tr9,\tr8,\tr21\tC L0 hi add => carry\n\taddq\tr10,\tr20,\tr10\tC U0 hi mul + carry\n\tldq\tr4,\t32(r16)\t\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tsubq\tr5,\tr11,\tr23\tC L0 lo + acc\n\taddq\tr10,\tr21,\tr10\tC L0 hi mul + carry\n\tldq\tr2,\t-16(r17)\tC L1\n\n\tumulh\tr19,\tr3,\tr8\tC U1\n\tcmpult\tr5,\tr11,\tr20\tC L0 lo add => carry\n\tsubq\tr23,\tr10,\tr28\tC U0 hi add => answer\n\tldq\tr5,\t40(r16)\t\tC L1\n\n\tmulq\tr19,\tr0,\tr9\tC U1\n\tcmpult\tr23,\tr10,\tr21\tC L0 hi add => carry\n\taddq\tr12,\tr20,\tr12\tC U0 hi mul + carry\n\tldq\tr3,\t-8(r17)\t\tC L1\n\n\tumulh\tr19,\tr0,\tr10\tC U1\n\tsubq\tr6,\tr13,\tr13\tC U0 lo + acc\n\tstq\tr22,\t0(r16)\t\tC L0\n\tstq\tr28,\t8(r16)\t\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC L0 st slosh\n\tmulq\tr19,\tr1,\tr11\tC U1\n\tbis\tr31,\tr31,\tr31\tC L1 st slosh\n\taddq\tr12,\tr21,\tr12\tC U0 hi mul + carry\n\n\tcmpult\tr6,\tr13,\tr20\tC L0 lo add => carry\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tlda\tr16,\t64(r16)\t\tC L1 bookkeeping\n\tsubq\tr13,\tr12,\tr22\tC U0 hi add => answer\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tcmpult\tr13,\tr12,\tr21\tC L0 hi add => carry\n\taddq\tr14,\tr20,\tr14\tC U0 hi mul + carry\n\tldq\tr6,\t-16(r16)\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC U1 mt\n\tsubq\tr7,\tr15,\tr23\tC L0 lo + acc\n\taddq\tr14,\tr21,\tr14\tC U0 hi mul + carry\n\tldq\tr0,\t0(r17)\t\tC L1\n\n\tumulh\tr19,\tr1,\tr12\tC U1\n\tcmpult\tr7,\tr15,\tr20\tC L0 lo add => carry\n\tsubq\tr23,\tr14,\tr28\tC U0 hi add => answer\n\tldq\tr7,\t-8(r16)\t\tC L1\n\n\tmulq\tr19,\tr2,\tr13\tC U1\n\tcmpult\tr23,\tr14,\tr21\tC L0 hi add => carry\n\taddq\tr8,\tr20,\tr8\tC U0 hi mul + carry\n\tldq\tr1,\t8(r17)\t\tC L1\n\n\tumulh\tr19,\tr2,\tr14\tC U1\n\tsubq\tr4,\tr9,\tr9\tC U0 lo + acc\n\tstq\tr22,\t-48(r16)\tC L0\n\tstq\tr28,\t-40(r16)\tC L1\n\n\tbis\tr31,\tr31,\tr31\tC L0 st slosh\n\tmulq\tr19,\tr3,\tr15\tC U1\n\tbis\tr31,\tr31,\tr31\tC L1 st slosh\n\taddq\tr8,\tr21,\tr8\tC U0 hi mul + carry\n\n\tcmpult\tr4,\tr9,\tr20\tC L0 lo add => carry\n\tsubq\tr9,\tr8,\tr22\tC U0 hi add => answer\n\tbis\tr31,\tr31,\tr31\tC L1 mt\n\tbgt\tr18,\t$Loop\t\tC U1 bookkeeping\n\nC ____ UNROLLED LOOP SOFTWARE PIPELINE FINISH ____\n$Lend:\n\tcmpult\tr9,\tr8,\tr21\tC L0 hi add => carry\n\taddq\tr10,\tr20,\tr10\tC U0 hi mul + carry\n\tldq\tr4,\t0(r16)\t\tC L1\n\tsubq\tr5,\tr11,\tr23\tC L0 lo + acc\n\taddq\tr10,\tr21,\tr10\tC L0 hi mul + carry\n\tumulh\tr19,\tr3,\tr8\tC U1\n\tcmpult\tr5,\tr11,\tr20\tC L0 lo add => carry\n\tsubq\tr23,\tr10,\tr28\tC U0 hi add => answer\n\tldq\tr5,\t8(r16)\t\tC L1\n\tmulq\tr19,\tr0,\tr9\tC U1\n\tcmpult\tr23,\tr10,\tr21\tC L0 hi add => carry\n\taddq\tr12,\tr20,\tr12\tC U0 hi mul + carry\n\tumulh\tr19,\tr0,\tr10\tC U1\n\tsubq\tr6,\tr13,\tr13\tC L0 lo + acc\n\tstq\tr22,\t-32(r16)\tC L0\n\tstq\tr28,\t-24(r16)\tC L1\n\tmulq\tr19,\tr1,\tr11\tC U1\n\taddq\tr12,\tr21,\tr12\tC U0 hi mul + carry\n\tcmpult\tr6,\tr13,\tr20\tC L0 lo add => carry\n\tsubq\tr13,\tr12,\tr22\tC U0 hi add => answer\n\tcmpult\tr13,\tr12,\tr21\tC L0 hi add => carry\n\taddq\tr14,\tr20,\tr14\tC U0 hi mul + carry\n\tsubq\tr7,\tr15,\tr23\tC L0 lo + acc\n\taddq\tr14,\tr21,\tr14\tC U0 hi mul + carry\n\tumulh\tr19,\tr1,\tr12\tC U1\n\tcmpult\tr7,\tr15,\tr20\tC L0 lo add => carry\n\tsubq\tr23,\tr14,\tr28\tC U0 hi add => answer\n\tcmpult\tr23,\tr14,\tr21\tC L0 hi add => carry\n\taddq\tr8,\tr20,\tr8\tC U0 hi mul + carry\n\tsubq\tr4,\tr9,\tr9\tC U0 lo + acc\n\tstq\tr22,\t-16(r16)\tC L0\n\tstq\tr28,\t-8(r16)\t\tC L1\n\taddq\tr8,\tr21,\tr8\tC L0 hi mul + carry\n\tcmpult\tr4,\tr9,\tr20\tC L0 lo add => carry\n\tsubq\tr9,\tr8,\tr22\tC U0 hi add => answer\n\tcmpult\tr9,\tr8,\tr21\tC L0 hi add => carry\n\taddq\tr10,\tr20,\tr10\tC U0 hi mul + carry\n\tsubq\tr5,\tr11,\tr23\tC L0 lo + acc\n\taddq\tr10,\tr21,\tr10\tC L0 hi mul + carry\n\tcmpult\tr5,\tr11,\tr20\tC L0 lo add => carry\n\tsubq\tr23,\tr10,\tr28\tC U0 hi add => answer\n\tcmpult\tr23,\tr10,\tr21\tC L0 hi add => carry\n\taddq\tr12,\tr20,\tr12\tC U0 hi mul + carry\n\tstq\tr22,\t0(r16)\t\tC L0\n\tstq\tr28,\t8(r16)\t\tC L1\n\taddq\tr12,\tr21,\tr0\tC U0 hi mul + carry\n\n\tldq\t$9,\t8($30)\n\tldq\t$10,\t16($30)\n\tldq\t$11,\t24($30)\n\tldq\t$12,\t32($30)\n\tldq\t$13,\t40($30)\n\tldq\t$14,\t48($30)\n\tldq\t$15,\t56($30)\n\tlda\t$30,\t240($30)\n\tret\tr31,\t(r26),\t1\nEPILOGUE(mpn_submul_1)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/ev67/gcd_1.asm",
    "content": "dnl  Alpha ev67 mpn_gcd_1 -- Nx1 greatest common divisor.\n\ndnl  Copyright 2003, 2004 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC ev67: 3.4 cycles/bitpair for 1x1 part\n\n\nC mp_limb_t mpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y);\nC\nC In the 1x1 part, the algorithm is to change x,y to abs(x-y),min(x,y) and\nC strip trailing zeros from abs(x-y) to maintain x and y both odd.\nC\nC The trailing zeros are calculated from just x-y, since in twos-complement\nC there's the same number of trailing zeros on d or -d.  This means the cttz\nC runs in parallel with abs(x-y).\nC\nC The loop takes 5 cycles, and at 0.68 iterations per bit for two N-bit\nC operands with this algorithm gives the measured 3.4 c/l.\nC\nC The slottings shown are for SVR4 style systems, Unicos differs in the\nC initial gp setup and the LEA.\nC\nC Enhancement:\nC\nC On the jsr, !lituse_jsr! (when available) would allow the linker to relax\nC it to a bsr, but probably only in a static binary.  Plain \"jsr foo\" gives\nC the right object code for relaxation, and ought to be available\nC everywhere, but we prefer to schedule the GOT ldq (LEA) back earlier, for\nC the usual case of running in a shared library.\nC\nC bsr could perhaps be used explicitly anyway.  We should be able to assume\nC modexact is in the same module as us (ie. shared library or mainline).\nC Would there be any worries about the size of the displacement?  Could\nC always put modexact and gcd_1 in the same .o to be certain.\n\nASM_START()\nPROLOGUE(mpn_gcd_1, gp)\n\n\tC r16\txp\n\tC r17\tsize\n\tC r18\ty\n\n\tC ldah\t\t\t\tC l\n\tC lda\t\t\t\tC u\n\n\tldq\tr0, 0(r16)\t\tC L   x = xp[0]\n\tlda\tr30, -32(r30)\t\tC u   alloc stack\n\n\tLEA(  r27, mpn_modexact_1c_odd)\tC L   modexact addr, ldq (gp)\n\tstq\tr10, 16(r30)\t\tC L   save r10\n\tcttz\tr18, r10\t\tC U0  y twos\n\tcmpeq\tr17, 1, r5\t\tC u   test size==1\n\n\tstq\tr9, 8(r30)\t\tC L   save r9\n\tclr\tr19\t\t\tC u   zero c for modexact\n\tunop\n\tunop\n\n\tcttz\tr0, r6\t\t\tC U0  x twos\n\tstq\tr26, 0(r30)\t\tC L   save ra\n\n\tsrl\tr18, r10, r18\t\tC U   y odd\n\n\tmov\tr18, r9\t\t\tC l   hold y across call\n\n\tcmpult\tr6, r10, r2\t\tC u   test x_twos < y_twos\n\n\tcmovne\tr2, r6, r10\t\tC l   common_twos = min(x_twos,y_twos)\n\tbne\tr5, L(one)\t\tC U   no modexact if size==1\n\tjsr\tr26, (r27), mpn_modexact_1c_odd   C L0\n\n\tLDGP(\tr29, 0(r26))\t\tC u,l ldah,lda\n\tcttz\tr0, r6\t\t\tC U0  new x twos\n\tldq\tr26, 0(r30)\t\tC L   restore ra\n\nL(one):\n\tmov\tr9, r1\t\t\tC u   y\n\tldq\tr9, 8(r30)\t\tC L   restore r9\n\tmov\tr10, r2\t\t\tC u   common twos\n\tldq\tr10, 16(r30)\t\tC L   restore r10\n\n\tlda\tr30, 32(r30)\t\tC l   free stack\n\tbeq\tr0, L(done)\t\tC U   return y if x%y==0\n\n\tsrl\tr0, r6, r0\t\tC U   x odd\n\tunop\n\n\tALIGN(16)\nL(top):\n\tC r0\tx\n\tC r1\ty\n\tC r2\tcommon twos, for use at end\n\n\tsubq\tr0, r1, r7\t\tC l0  d = x - y\n\tcmpult\tr0, r1, r16\t\tC u0  test x >= y\n\n\tsubq\tr1, r0, r4\t\tC l0  new_x = y - x\n\tcttz\tr7, r8\t\t\tC U0  d twos\n\n\tcmoveq\tr16, r7, r4\t\tC l0  new_x = d if x>=y\n\tcmovne\tr16, r0, r1\t\tC u0  y = x if x<y\n\tunop\t\t\t\tC l   \\ force cmoveq into l0\n\tunop\t\t\t\tC u   /\n\n\tC\t\t\t\tC cmoveq2 L0, cmovne2 U0\n\n\tsrl\tr4, r8, r0\t\tC U0  x = new_x >> twos\n\tbne\tr7, L(top)\t\tC U1  stop when d==0\n\n\nL(done):\n\tsll\tr1, r2, r0\t\tC U0  return y << common_twos\n\tret\tr31, (r26), 1\t\tC L0\n\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/ev67/hamdist.asm",
    "content": "dnl  Alpha ev67 mpn_hamdist -- mpn hamming distance.\n\ndnl  Copyright 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC ev67: 2.5 cycles/limb\n\n\nC unsigned long mpn_hamdist (mp_srcptr xp, mp_srcptr yp, mp_size_t size);\nC\nC The hope was for 2.0 c/l here, but that isn't achieved.  We're limited by\nC renaming register shortage.  Since we need 5 instructions per limb, further\nC unrolling could approach 1.5 c/l.\nC\nC The main loop processes two limbs from each operand on each iteration.  An\nC odd size is handled by processing xp[0]^yp[0] at the start.  If the size\nC is even that result is discarded, and is repeated by the main loop.\nC\n\nASM_START()\nPROLOGUE(mpn_hamdist)\n\n\tC r16\txp\n\tC r17\typ\n\tC r18\tsize\n\n\tldq\tr1, 0(r16)\t\tC L0  xp[0]\n\tldq\tr2, 0(r17)\t\tC L1  yp[0]\n\tand\tr18, 1, r8\t\tC U1  1 if size odd\n\tsrl\tr18, 1, r18\t\tC U0  size, limb pairs\n\n\tclr\tr0\t\t\tC L0  initial total\n\ts8addq\tr8, r17, r17\t\tC U1  yp++ if size odd\n\ts8addq\tr8, r16, r16\t\tC L1  xp++ if size odd\n\tclr\tr6\t\t\tC U0  dummy initial xor 1\n\n\txor\tr1, r2, r5\t\tC L   initial xor 0\n\tbeq\tr18, L(one)\t\tC U   if size==1\n\n\tcmoveq\tr8, r31, r5\t\tC L   discard first limb if size even\n\tunop\t\t\t\tC U\n\n\n\tALIGN(16)\nL(top):\n\tC r0\ttotal accumulating\n\tC r7\txor 0\n\tC r8\txor 1\n\tC r16\txp, incrementing\n\tC r17\typ, incrementing\n\tC r18\tsize, limb pairs, decrementing\n\n\tldq\tr1, 0(r16)\t\tC L\n\tldq\tr2, 0(r17)\t\tC L\n\tctpop\tr5, r7\t\t\tC U0\n\tlda\tr16, 16(r16)\t\tC U\n\n\tldq\tr3, -8(r16)\t\tC L\n\tldq\tr4, 8(r17)\t\tC L\n\tctpop\tr6, r8\t\t\tC U0\n\tlda\tr17, 16(r17)\t\tC U\n\n\tldl\tr31, 256(r16)\t\tC L\tprefetch\n\tldl\tr31, 256(r17)\t\tC L\tprefetch\n\txor\tr1, r2, r5\t\tC U\n\tlda\tr18, -1(r18)\t\tC U\n\n\txor\tr3, r4, r6\t\tC U\n\taddq\tr0, r7, r0\t\tC L\n\taddq\tr0, r8, r0\t\tC L\n\tbne\tr18, L(top)\t\tC U\n\n\n\tctpop\tr6, r8\t\t\tC U0\n\taddq\tr0, r8, r0\t\tC L\nL(one):\n\tctpop\tr5, r7\t\t\tC U0\n\taddq\tr0, r7, r0\t\tC L\n\n\tret\tr31, (r26), 1\t\tC L0\n\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/ev67/popcount.asm",
    "content": "dnl  Alpha ev67 mpn_popcount -- mpn bit population count.\n\ndnl  Copyright 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC ev67: 1.5 cycles/limb\n\n\nC unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);\nC\nC This schedule seems necessary for the full 1.5 c/l, the IQ can't quite hide\nC all latencies, the addq's must be deferred to the next iteration.\nC\nC Since we need just 3 instructions per limb, further unrolling could approach\nC 1.0 c/l.\nC\nC The main loop processes two limbs at a time.  An odd size is handled by\nC processing src[0] at the start.  If the size is even that result is\nC discarded, and src[0] is repeated by the main loop.\nC\n\nASM_START()\nPROLOGUE(mpn_popcount)\n\n\tC r16\tsrc\n\tC r17\tsize\n\n\tldq\tr0, 0(r16)\t\tC L0  src[0]\n\tand\tr17, 1, r8\t\tC U1  1 if size odd\n\tsrl\tr17, 1, r17\t\tC U0  size, limb pairs\n\n\ts8addq\tr8, r16, r16\t\tC L1  src++ if size odd\n\tctpop\tr0, r0\t\t\tC U0\n\tbeq\tr17, L(one)\t\tC U1  if size==1\n\n\tcmoveq\tr8, r31, r0\t\tC L   discard first limb if size even\n\tclr\tr3\t\t\tC L\n\n\tclr\tr4\t\t\tC L\n\tunop\t\t\t\tC U\n\tunop\t\t\t\tC L\n\tunop\t\t\t\tC U\n\n\n\tALIGN(16)\nL(top):\n\tC r0\ttotal accumulating\n\tC r3\tpop 0\n\tC r4\tpop 1\n\tC r16\tsrc, incrementing\n\tC r17\tsize, decrementing\n\n\tldq\tr1, 0(r16)\t\tC L\n\tldq\tr2, 8(r16)\t\tC L\n\tlda\tr16, 16(r16)\t\tC U\n\tlda\tr17, -1(r17)\t\tC U\n\n\taddq\tr0, r3, r0\t\tC L\n\taddq\tr0, r4, r0\t\tC L\n\tctpop\tr1, r3\t\t\tC U0\n\tctpop\tr2, r4\t\t\tC U0\n\n\tldl\tr31, 512(r16)\t\tC L\tprefetch\n\tbne\tr17, L(top)\t\tC U\n\n\n\taddq\tr0, r3, r0\t\tC L\n\taddq\tr0, r4, r0\t\tC U\nL(one):\n\tret\tr31, (r26), 1\t\tC L0\n\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/invert_limb.asm",
    "content": "dnl  Alpha mpn_invert_limb -- Invert a normalized limb.\n\ndnl  Copyright 1996, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:    175\nC EV5:    125\nC EV6:     67\n\nC  This is based on ideas of Peter L. Montgomery.\n\nASM_START()\n\nFLOAT64($C36,9223372036854775808.0)\t\tC 2^63\n\nPROLOGUE(mpn_invert_limb,gp)\n\tlda\tr30,-16(r30)\n\taddq\tr16,r16,r1\n\tbne\tr1,$73\n\tlda\tr0,-1\n\tbr\tr31,$Lend\n$73:\n\tsrl\tr16,1,r1\n\tstq\tr1,0(r30)\n\tldt\tf11,0(r30)\n\tcvtqt\tf11,f1\n\tLEA(r1,$C36)\n\tldt\tf10,0(r1)\t\tC f10 = 2^63\n\tdivt\tf10,f1,f10\t\tC f10 = 2^63 / (u / 2)\n\tLEA(r2,$invtab-4096)\n\tsrl\tr16,52,r1\t\tC extract high 8 bits\n\taddq\tr1,r1,r1\t\tC align ...0000bbbbbbbb0\n\taddq\tr1,r2,r1\t\tC compute array offset\n\tldq_u\tr2,0(r1)\t\tC load quadword containing our 16 bits\nbigend(`addq\tr1,1,r1')\n\textwl\tr2,r1,r2\t\tC extract desired 16 bits\n\tsll\tr2,48,r0\n\tumulh\tr16,r0,r1\n\taddq\tr16,r1,r3\n\tstq\tr3,0(r30)\n\tldt\tf11,0(r30)\n\tcvtqt\tf11,f1\n\tmult\tf1,f10,f1\n\tcvttqc\tf1,f1\n\tstt\tf1,0(r30)\n\tldq\tr4,0(r30)\n\tsubq\tr0,r4,r0\n\tumulh\tr16,r0,r1\n\tmulq\tr16,r0,r2\n\taddq\tr16,r1,r3\n\tbge\tr3,$Loop2\n$Loop1:\taddq\tr2,r16,r2\n\tcmpult\tr2,r16,r1\n\taddq\tr3,r1,r3\n\taddq\tr0,1,r0\n\tblt\tr3,$Loop1\n$Loop2:\tcmpult\tr2,r16,r1\n\tsubq\tr0,1,r0\n\tsubq\tr3,r1,r3\n\tsubq\tr2,r16,r2\n\tbge\tr3,$Loop2\n$Lend:\n\tlda\tr30,16(r30)\n\tret\tr31,(r26),1\nEPILOGUE(mpn_invert_limb)\nDATASTART($invtab)\n\t.word 0xffff,0xffc0,0xff80,0xff40,0xff00,0xfec0,0xfe81,0xfe41\n\t.word 0xfe01,0xfdc2,0xfd83,0xfd43,0xfd04,0xfcc5,0xfc86,0xfc46\n\t.word 0xfc07,0xfbc8,0xfb8a,0xfb4b,0xfb0c,0xfacd,0xfa8e,0xfa50\n\t.word 0xfa11,0xf9d3,0xf994,0xf956,0xf918,0xf8d9,0xf89b,0xf85d\n\t.word 0xf81f,0xf7e1,0xf7a3,0xf765,0xf727,0xf6ea,0xf6ac,0xf66e\n\t.word 0xf631,0xf5f3,0xf5b6,0xf578,0xf53b,0xf4fd,0xf4c0,0xf483\n\t.word 0xf446,0xf409,0xf3cc,0xf38f,0xf352,0xf315,0xf2d8,0xf29c\n\t.word 0xf25f,0xf222,0xf1e6,0xf1a9,0xf16d,0xf130,0xf0f4,0xf0b8\n\t.word 0xf07c,0xf03f,0xf003,0xefc7,0xef8b,0xef4f,0xef14,0xeed8\n\t.word 0xee9c,0xee60,0xee25,0xede9,0xedae,0xed72,0xed37,0xecfb\n\t.word 0xecc0,0xec85,0xec4a,0xec0e,0xebd3,0xeb98,0xeb5d,0xeb22\n\t.word 0xeae8,0xeaad,0xea72,0xea37,0xe9fd,0xe9c2,0xe988,0xe94d\n\t.word 0xe913,0xe8d8,0xe89e,0xe864,0xe829,0xe7ef,0xe7b5,0xe77b\n\t.word 0xe741,0xe707,0xe6cd,0xe694,0xe65a,0xe620,0xe5e6,0xe5ad\n\t.word 0xe573,0xe53a,0xe500,0xe4c7,0xe48d,0xe454,0xe41b,0xe3e2\n\t.word 0xe3a9,0xe370,0xe336,0xe2fd,0xe2c5,0xe28c,0xe253,0xe21a\n\t.word 0xe1e1,0xe1a9,0xe170,0xe138,0xe0ff,0xe0c7,0xe08e,0xe056\n\t.word 0xe01e,0xdfe5,0xdfad,0xdf75,0xdf3d,0xdf05,0xdecd,0xde95\n\t.word 0xde5d,0xde25,0xdded,0xddb6,0xdd7e,0xdd46,0xdd0f,0xdcd7\n\t.word 0xdca0,0xdc68,0xdc31,0xdbf9,0xdbc2,0xdb8b,0xdb54,0xdb1d\n\t.word 0xdae6,0xdaae,0xda78,0xda41,0xda0a,0xd9d3,0xd99c,0xd965\n\t.word 0xd92f,0xd8f8,0xd8c1,0xd88b,0xd854,0xd81e,0xd7e8,0xd7b1\n\t.word 0xd77b,0xd745,0xd70e,0xd6d8,0xd6a2,0xd66c,0xd636,0xd600\n\t.word 0xd5ca,0xd594,0xd55f,0xd529,0xd4f3,0xd4bd,0xd488,0xd452\n\t.word 0xd41d,0xd3e7,0xd3b2,0xd37c,0xd347,0xd312,0xd2dd,0xd2a7\n\t.word 0xd272,0xd23d,0xd208,0xd1d3,0xd19e,0xd169,0xd134,0xd100\n\t.word 0xd0cb,0xd096,0xd061,0xd02d,0xcff8,0xcfc4,0xcf8f,0xcf5b\n\t.word 0xcf26,0xcef2,0xcebe,0xce89,0xce55,0xce21,0xcded,0xcdb9\n\t.word 0xcd85,0xcd51,0xcd1d,0xcce9,0xccb5,0xcc81,0xcc4e,0xcc1a\n\t.word 0xcbe6,0xcbb3,0xcb7f,0xcb4c,0xcb18,0xcae5,0xcab1,0xca7e\n\t.word 0xca4b,0xca17,0xc9e4,0xc9b1,0xc97e,0xc94b,0xc918,0xc8e5\n\t.word 0xc8b2,0xc87f,0xc84c,0xc819,0xc7e7,0xc7b4,0xc781,0xc74f\n\t.word 0xc71c,0xc6e9,0xc6b7,0xc684,0xc652,0xc620,0xc5ed,0xc5bb\n\t.word 0xc589,0xc557,0xc524,0xc4f2,0xc4c0,0xc48e,0xc45c,0xc42a\n\t.word 0xc3f8,0xc3c7,0xc395,0xc363,0xc331,0xc300,0xc2ce,0xc29c\n\t.word 0xc26b,0xc239,0xc208,0xc1d6,0xc1a5,0xc174,0xc142,0xc111\n\t.word 0xc0e0,0xc0af,0xc07e,0xc04d,0xc01c,0xbfeb,0xbfba,0xbf89\n\t.word 0xbf58,0xbf27,0xbef6,0xbec5,0xbe95,0xbe64,0xbe33,0xbe03\n\t.word 0xbdd2,0xbda2,0xbd71,0xbd41,0xbd10,0xbce0,0xbcb0,0xbc80\n\t.word 0xbc4f,0xbc1f,0xbbef,0xbbbf,0xbb8f,0xbb5f,0xbb2f,0xbaff\n\t.word 0xbacf,0xba9f,0xba6f,0xba40,0xba10,0xb9e0,0xb9b1,0xb981\n\t.word 0xb951,0xb922,0xb8f2,0xb8c3,0xb894,0xb864,0xb835,0xb806\n\t.word 0xb7d6,0xb7a7,0xb778,0xb749,0xb71a,0xb6eb,0xb6bc,0xb68d\n\t.word 0xb65e,0xb62f,0xb600,0xb5d1,0xb5a2,0xb574,0xb545,0xb516\n\t.word 0xb4e8,0xb4b9,0xb48a,0xb45c,0xb42e,0xb3ff,0xb3d1,0xb3a2\n\t.word 0xb374,0xb346,0xb318,0xb2e9,0xb2bb,0xb28d,0xb25f,0xb231\n\t.word 0xb203,0xb1d5,0xb1a7,0xb179,0xb14b,0xb11d,0xb0f0,0xb0c2\n\t.word 0xb094,0xb067,0xb039,0xb00b,0xafde,0xafb0,0xaf83,0xaf55\n\t.word 0xaf28,0xaefb,0xaecd,0xaea0,0xae73,0xae45,0xae18,0xadeb\n\t.word 0xadbe,0xad91,0xad64,0xad37,0xad0a,0xacdd,0xacb0,0xac83\n\t.word 0xac57,0xac2a,0xabfd,0xabd0,0xaba4,0xab77,0xab4a,0xab1e\n\t.word 0xaaf1,0xaac5,0xaa98,0xaa6c,0xaa40,0xaa13,0xa9e7,0xa9bb\n\t.word 0xa98e,0xa962,0xa936,0xa90a,0xa8de,0xa8b2,0xa886,0xa85a\n\t.word 0xa82e,0xa802,0xa7d6,0xa7aa,0xa77e,0xa753,0xa727,0xa6fb\n\t.word 0xa6d0,0xa6a4,0xa678,0xa64d,0xa621,0xa5f6,0xa5ca,0xa59f\n\t.word 0xa574,0xa548,0xa51d,0xa4f2,0xa4c6,0xa49b,0xa470,0xa445\n\t.word 0xa41a,0xa3ef,0xa3c4,0xa399,0xa36e,0xa343,0xa318,0xa2ed\n\t.word 0xa2c2,0xa297,0xa26d,0xa242,0xa217,0xa1ed,0xa1c2,0xa197\n\t.word 0xa16d,0xa142,0xa118,0xa0ed,0xa0c3,0xa098,0xa06e,0xa044\n\t.word 0xa01a,0x9fef,0x9fc5,0x9f9b,0x9f71,0x9f47,0x9f1c,0x9ef2\n\t.word 0x9ec8,0x9e9e,0x9e74,0x9e4b,0x9e21,0x9df7,0x9dcd,0x9da3\n\t.word 0x9d79,0x9d50,0x9d26,0x9cfc,0x9cd3,0x9ca9,0x9c80,0x9c56\n\t.word 0x9c2d,0x9c03,0x9bda,0x9bb0,0x9b87,0x9b5e,0x9b34,0x9b0b\n\t.word 0x9ae2,0x9ab9,0x9a8f,0x9a66,0x9a3d,0x9a14,0x99eb,0x99c2\n\t.word 0x9999,0x9970,0x9947,0x991e,0x98f6,0x98cd,0x98a4,0x987b\n\t.word 0x9852,0x982a,0x9801,0x97d8,0x97b0,0x9787,0x975f,0x9736\n\t.word 0x970e,0x96e5,0x96bd,0x9695,0x966c,0x9644,0x961c,0x95f3\n\t.word 0x95cb,0x95a3,0x957b,0x9553,0x952b,0x9503,0x94db,0x94b3\n\t.word 0x948b,0x9463,0x943b,0x9413,0x93eb,0x93c3,0x939b,0x9374\n\t.word 0x934c,0x9324,0x92fd,0x92d5,0x92ad,0x9286,0x925e,0x9237\n\t.word 0x920f,0x91e8,0x91c0,0x9199,0x9172,0x914a,0x9123,0x90fc\n\t.word 0x90d4,0x90ad,0x9086,0x905f,0x9038,0x9011,0x8fea,0x8fc3\n\t.word 0x8f9c,0x8f75,0x8f4e,0x8f27,0x8f00,0x8ed9,0x8eb2,0x8e8b\n\t.word 0x8e65,0x8e3e,0x8e17,0x8df1,0x8dca,0x8da3,0x8d7d,0x8d56\n\t.word 0x8d30,0x8d09,0x8ce3,0x8cbc,0x8c96,0x8c6f,0x8c49,0x8c23\n\t.word 0x8bfc,0x8bd6,0x8bb0,0x8b8a,0x8b64,0x8b3d,0x8b17,0x8af1\n\t.word 0x8acb,0x8aa5,0x8a7f,0x8a59,0x8a33,0x8a0d,0x89e7,0x89c1\n\t.word 0x899c,0x8976,0x8950,0x892a,0x8904,0x88df,0x88b9,0x8893\n\t.word 0x886e,0x8848,0x8823,0x87fd,0x87d8,0x87b2,0x878d,0x8767\n\t.word 0x8742,0x871d,0x86f7,0x86d2,0x86ad,0x8687,0x8662,0x863d\n\t.word 0x8618,0x85f3,0x85ce,0x85a9,0x8583,0x855e,0x8539,0x8514\n\t.word 0x84f0,0x84cb,0x84a6,0x8481,0x845c,0x8437,0x8412,0x83ee\n\t.word 0x83c9,0x83a4,0x8380,0x835b,0x8336,0x8312,0x82ed,0x82c9\n\t.word 0x82a4,0x8280,0x825b,0x8237,0x8212,0x81ee,0x81ca,0x81a5\n\t.word 0x8181,0x815d,0x8138,0x8114,0x80f0,0x80cc,0x80a8,0x8084\n\t.word 0x8060,0x803c,0x8018,0x7ff4,0x7fd0,0x7fac,0x7f88,0x7f64\n\t.word 0x7f40,0x7f1c,0x7ef8,0x7ed4,0x7eb1,0x7e8d,0x7e69,0x7e45\n\t.word 0x7e22,0x7dfe,0x7ddb,0x7db7,0x7d93,0x7d70,0x7d4c,0x7d29\n\t.word 0x7d05,0x7ce2,0x7cbf,0x7c9b,0x7c78,0x7c55,0x7c31,0x7c0e\n\t.word 0x7beb,0x7bc7,0x7ba4,0x7b81,0x7b5e,0x7b3b,0x7b18,0x7af5\n\t.word 0x7ad2,0x7aaf,0x7a8c,0x7a69,0x7a46,0x7a23,0x7a00,0x79dd\n\t.word 0x79ba,0x7997,0x7975,0x7952,0x792f,0x790c,0x78ea,0x78c7\n\t.word 0x78a4,0x7882,0x785f,0x783c,0x781a,0x77f7,0x77d5,0x77b2\n\t.word 0x7790,0x776e,0x774b,0x7729,0x7706,0x76e4,0x76c2,0x76a0\n\t.word 0x767d,0x765b,0x7639,0x7617,0x75f5,0x75d2,0x75b0,0x758e\n\t.word 0x756c,0x754a,0x7528,0x7506,0x74e4,0x74c2,0x74a0,0x747e\n\t.word 0x745d,0x743b,0x7419,0x73f7,0x73d5,0x73b4,0x7392,0x7370\n\t.word 0x734f,0x732d,0x730b,0x72ea,0x72c8,0x72a7,0x7285,0x7264\n\t.word 0x7242,0x7221,0x71ff,0x71de,0x71bc,0x719b,0x717a,0x7158\n\t.word 0x7137,0x7116,0x70f5,0x70d3,0x70b2,0x7091,0x7070,0x704f\n\t.word 0x702e,0x700c,0x6feb,0x6fca,0x6fa9,0x6f88,0x6f67,0x6f46\n\t.word 0x6f26,0x6f05,0x6ee4,0x6ec3,0x6ea2,0x6e81,0x6e60,0x6e40\n\t.word 0x6e1f,0x6dfe,0x6dde,0x6dbd,0x6d9c,0x6d7c,0x6d5b,0x6d3a\n\t.word 0x6d1a,0x6cf9,0x6cd9,0x6cb8,0x6c98,0x6c77,0x6c57,0x6c37\n\t.word 0x6c16,0x6bf6,0x6bd6,0x6bb5,0x6b95,0x6b75,0x6b54,0x6b34\n\t.word 0x6b14,0x6af4,0x6ad4,0x6ab4,0x6a94,0x6a73,0x6a53,0x6a33\n\t.word 0x6a13,0x69f3,0x69d3,0x69b3,0x6993,0x6974,0x6954,0x6934\n\t.word 0x6914,0x68f4,0x68d4,0x68b5,0x6895,0x6875,0x6855,0x6836\n\t.word 0x6816,0x67f6,0x67d7,0x67b7,0x6798,0x6778,0x6758,0x6739\n\t.word 0x6719,0x66fa,0x66db,0x66bb,0x669c,0x667c,0x665d,0x663e\n\t.word 0x661e,0x65ff,0x65e0,0x65c0,0x65a1,0x6582,0x6563,0x6544\n\t.word 0x6524,0x6505,0x64e6,0x64c7,0x64a8,0x6489,0x646a,0x644b\n\t.word 0x642c,0x640d,0x63ee,0x63cf,0x63b0,0x6391,0x6373,0x6354\n\t.word 0x6335,0x6316,0x62f7,0x62d9,0x62ba,0x629b,0x627c,0x625e\n\t.word 0x623f,0x6221,0x6202,0x61e3,0x61c5,0x61a6,0x6188,0x6169\n\t.word 0x614b,0x612c,0x610e,0x60ef,0x60d1,0x60b3,0x6094,0x6076\n\t.word 0x6058,0x6039,0x601b,0x5ffd,0x5fdf,0x5fc0,0x5fa2,0x5f84\n\t.word 0x5f66,0x5f48,0x5f2a,0x5f0b,0x5eed,0x5ecf,0x5eb1,0x5e93\n\t.word 0x5e75,0x5e57,0x5e39,0x5e1b,0x5dfd,0x5de0,0x5dc2,0x5da4\n\t.word 0x5d86,0x5d68,0x5d4a,0x5d2d,0x5d0f,0x5cf1,0x5cd3,0x5cb6\n\t.word 0x5c98,0x5c7a,0x5c5d,0x5c3f,0x5c21,0x5c04,0x5be6,0x5bc9\n\t.word 0x5bab,0x5b8e,0x5b70,0x5b53,0x5b35,0x5b18,0x5afb,0x5add\n\t.word 0x5ac0,0x5aa2,0x5a85,0x5a68,0x5a4b,0x5a2d,0x5a10,0x59f3\n\t.word 0x59d6,0x59b8,0x599b,0x597e,0x5961,0x5944,0x5927,0x590a\n\t.word 0x58ed,0x58d0,0x58b3,0x5896,0x5879,0x585c,0x583f,0x5822\n\t.word 0x5805,0x57e8,0x57cb,0x57ae,0x5791,0x5775,0x5758,0x573b\n\t.word 0x571e,0x5702,0x56e5,0x56c8,0x56ac,0x568f,0x5672,0x5656\n\t.word 0x5639,0x561c,0x5600,0x55e3,0x55c7,0x55aa,0x558e,0x5571\n\t.word 0x5555,0x5538,0x551c,0x5500,0x54e3,0x54c7,0x54aa,0x548e\n\t.word 0x5472,0x5456,0x5439,0x541d,0x5401,0x53e5,0x53c8,0x53ac\n\t.word 0x5390,0x5374,0x5358,0x533c,0x5320,0x5304,0x52e8,0x52cb\n\t.word 0x52af,0x5293,0x5277,0x525c,0x5240,0x5224,0x5208,0x51ec\n\t.word 0x51d0,0x51b4,0x5198,0x517c,0x5161,0x5145,0x5129,0x510d\n\t.word 0x50f2,0x50d6,0x50ba,0x509f,0x5083,0x5067,0x504c,0x5030\n\t.word 0x5015,0x4ff9,0x4fdd,0x4fc2,0x4fa6,0x4f8b,0x4f6f,0x4f54\n\t.word 0x4f38,0x4f1d,0x4f02,0x4ee6,0x4ecb,0x4eb0,0x4e94,0x4e79\n\t.word 0x4e5e,0x4e42,0x4e27,0x4e0c,0x4df0,0x4dd5,0x4dba,0x4d9f\n\t.word 0x4d84,0x4d69,0x4d4d,0x4d32,0x4d17,0x4cfc,0x4ce1,0x4cc6\n\t.word 0x4cab,0x4c90,0x4c75,0x4c5a,0x4c3f,0x4c24,0x4c09,0x4bee\n\t.word 0x4bd3,0x4bb9,0x4b9e,0x4b83,0x4b68,0x4b4d,0x4b32,0x4b18\n\t.word 0x4afd,0x4ae2,0x4ac7,0x4aad,0x4a92,0x4a77,0x4a5d,0x4a42\n\t.word 0x4a27,0x4a0d,0x49f2,0x49d8,0x49bd,0x49a3,0x4988,0x496e\n\t.word 0x4953,0x4939,0x491e,0x4904,0x48e9,0x48cf,0x48b5,0x489a\n\t.word 0x4880,0x4865,0x484b,0x4831,0x4817,0x47fc,0x47e2,0x47c8\n\t.word 0x47ae,0x4793,0x4779,0x475f,0x4745,0x472b,0x4711,0x46f6\n\t.word 0x46dc,0x46c2,0x46a8,0x468e,0x4674,0x465a,0x4640,0x4626\n\t.word 0x460c,0x45f2,0x45d8,0x45be,0x45a5,0x458b,0x4571,0x4557\n\t.word 0x453d,0x4523,0x4509,0x44f0,0x44d6,0x44bc,0x44a2,0x4489\n\t.word 0x446f,0x4455,0x443c,0x4422,0x4408,0x43ef,0x43d5,0x43bc\n\t.word 0x43a2,0x4388,0x436f,0x4355,0x433c,0x4322,0x4309,0x42ef\n\t.word 0x42d6,0x42bc,0x42a3,0x428a,0x4270,0x4257,0x423d,0x4224\n\t.word 0x420b,0x41f2,0x41d8,0x41bf,0x41a6,0x418c,0x4173,0x415a\n\t.word 0x4141,0x4128,0x410e,0x40f5,0x40dc,0x40c3,0x40aa,0x4091\n\t.word 0x4078,0x405f,0x4046,0x402d,0x4014,0x3ffb,0x3fe2,0x3fc9\n\t.word 0x3fb0,0x3f97,0x3f7e,0x3f65,0x3f4c,0x3f33,0x3f1a,0x3f01\n\t.word 0x3ee8,0x3ed0,0x3eb7,0x3e9e,0x3e85,0x3e6c,0x3e54,0x3e3b\n\t.word 0x3e22,0x3e0a,0x3df1,0x3dd8,0x3dc0,0x3da7,0x3d8e,0x3d76\n\t.word 0x3d5d,0x3d45,0x3d2c,0x3d13,0x3cfb,0x3ce2,0x3cca,0x3cb1\n\t.word 0x3c99,0x3c80,0x3c68,0x3c50,0x3c37,0x3c1f,0x3c06,0x3bee\n\t.word 0x3bd6,0x3bbd,0x3ba5,0x3b8d,0x3b74,0x3b5c,0x3b44,0x3b2b\n\t.word 0x3b13,0x3afb,0x3ae3,0x3acb,0x3ab2,0x3a9a,0x3a82,0x3a6a\n\t.word 0x3a52,0x3a3a,0x3a22,0x3a09,0x39f1,0x39d9,0x39c1,0x39a9\n\t.word 0x3991,0x3979,0x3961,0x3949,0x3931,0x3919,0x3901,0x38ea\n\t.word 0x38d2,0x38ba,0x38a2,0x388a,0x3872,0x385a,0x3843,0x382b\n\t.word 0x3813,0x37fb,0x37e3,0x37cc,0x37b4,0x379c,0x3785,0x376d\n\t.word 0x3755,0x373e,0x3726,0x370e,0x36f7,0x36df,0x36c8,0x36b0\n\t.word 0x3698,0x3681,0x3669,0x3652,0x363a,0x3623,0x360b,0x35f4\n\t.word 0x35dc,0x35c5,0x35ae,0x3596,0x357f,0x3567,0x3550,0x3539\n\t.word 0x3521,0x350a,0x34f3,0x34db,0x34c4,0x34ad,0x3496,0x347e\n\t.word 0x3467,0x3450,0x3439,0x3422,0x340a,0x33f3,0x33dc,0x33c5\n\t.word 0x33ae,0x3397,0x3380,0x3368,0x3351,0x333a,0x3323,0x330c\n\t.word 0x32f5,0x32de,0x32c7,0x32b0,0x3299,0x3282,0x326c,0x3255\n\t.word 0x323e,0x3227,0x3210,0x31f9,0x31e2,0x31cb,0x31b5,0x319e\n\t.word 0x3187,0x3170,0x3159,0x3143,0x312c,0x3115,0x30fe,0x30e8\n\t.word 0x30d1,0x30ba,0x30a4,0x308d,0x3076,0x3060,0x3049,0x3033\n\t.word 0x301c,0x3005,0x2fef,0x2fd8,0x2fc2,0x2fab,0x2f95,0x2f7e\n\t.word 0x2f68,0x2f51,0x2f3b,0x2f24,0x2f0e,0x2ef8,0x2ee1,0x2ecb\n\t.word 0x2eb4,0x2e9e,0x2e88,0x2e71,0x2e5b,0x2e45,0x2e2e,0x2e18\n\t.word 0x2e02,0x2dec,0x2dd5,0x2dbf,0x2da9,0x2d93,0x2d7c,0x2d66\n\t.word 0x2d50,0x2d3a,0x2d24,0x2d0e,0x2cf8,0x2ce1,0x2ccb,0x2cb5\n\t.word 0x2c9f,0x2c89,0x2c73,0x2c5d,0x2c47,0x2c31,0x2c1b,0x2c05\n\t.word 0x2bef,0x2bd9,0x2bc3,0x2bad,0x2b97,0x2b81,0x2b6c,0x2b56\n\t.word 0x2b40,0x2b2a,0x2b14,0x2afe,0x2ae8,0x2ad3,0x2abd,0x2aa7\n\t.word 0x2a91,0x2a7c,0x2a66,0x2a50,0x2a3a,0x2a25,0x2a0f,0x29f9\n\t.word 0x29e4,0x29ce,0x29b8,0x29a3,0x298d,0x2977,0x2962,0x294c\n\t.word 0x2937,0x2921,0x290c,0x28f6,0x28e0,0x28cb,0x28b5,0x28a0\n\t.word 0x288b,0x2875,0x2860,0x284a,0x2835,0x281f,0x280a,0x27f5\n\t.word 0x27df,0x27ca,0x27b4,0x279f,0x278a,0x2774,0x275f,0x274a\n\t.word 0x2735,0x271f,0x270a,0x26f5,0x26e0,0x26ca,0x26b5,0x26a0\n\t.word 0x268b,0x2676,0x2660,0x264b,0x2636,0x2621,0x260c,0x25f7\n\t.word 0x25e2,0x25cd,0x25b8,0x25a2,0x258d,0x2578,0x2563,0x254e\n\t.word 0x2539,0x2524,0x250f,0x24fa,0x24e5,0x24d1,0x24bc,0x24a7\n\t.word 0x2492,0x247d,0x2468,0x2453,0x243e,0x2429,0x2415,0x2400\n\t.word 0x23eb,0x23d6,0x23c1,0x23ad,0x2398,0x2383,0x236e,0x235a\n\t.word 0x2345,0x2330,0x231c,0x2307,0x22f2,0x22dd,0x22c9,0x22b4\n\t.word 0x22a0,0x228b,0x2276,0x2262,0x224d,0x2239,0x2224,0x2210\n\t.word 0x21fb,0x21e6,0x21d2,0x21bd,0x21a9,0x2194,0x2180,0x216c\n\t.word 0x2157,0x2143,0x212e,0x211a,0x2105,0x20f1,0x20dd,0x20c8\n\t.word 0x20b4,0x20a0,0x208b,0x2077,0x2063,0x204e,0x203a,0x2026\n\t.word 0x2012,0x1ffd,0x1fe9,0x1fd5,0x1fc1,0x1fac,0x1f98,0x1f84\n\t.word 0x1f70,0x1f5c,0x1f47,0x1f33,0x1f1f,0x1f0b,0x1ef7,0x1ee3\n\t.word 0x1ecf,0x1ebb,0x1ea7,0x1e93,0x1e7f,0x1e6a,0x1e56,0x1e42\n\t.word 0x1e2e,0x1e1a,0x1e06,0x1df3,0x1ddf,0x1dcb,0x1db7,0x1da3\n\t.word 0x1d8f,0x1d7b,0x1d67,0x1d53,0x1d3f,0x1d2b,0x1d18,0x1d04\n\t.word 0x1cf0,0x1cdc,0x1cc8,0x1cb5,0x1ca1,0x1c8d,0x1c79,0x1c65\n\t.word 0x1c52,0x1c3e,0x1c2a,0x1c17,0x1c03,0x1bef,0x1bdb,0x1bc8\n\t.word 0x1bb4,0x1ba0,0x1b8d,0x1b79,0x1b66,0x1b52,0x1b3e,0x1b2b\n\t.word 0x1b17,0x1b04,0x1af0,0x1add,0x1ac9,0x1ab6,0x1aa2,0x1a8f\n\t.word 0x1a7b,0x1a68,0x1a54,0x1a41,0x1a2d,0x1a1a,0x1a06,0x19f3\n\t.word 0x19e0,0x19cc,0x19b9,0x19a5,0x1992,0x197f,0x196b,0x1958\n\t.word 0x1945,0x1931,0x191e,0x190b,0x18f8,0x18e4,0x18d1,0x18be\n\t.word 0x18ab,0x1897,0x1884,0x1871,0x185e,0x184b,0x1837,0x1824\n\t.word 0x1811,0x17fe,0x17eb,0x17d8,0x17c4,0x17b1,0x179e,0x178b\n\t.word 0x1778,0x1765,0x1752,0x173f,0x172c,0x1719,0x1706,0x16f3\n\t.word 0x16e0,0x16cd,0x16ba,0x16a7,0x1694,0x1681,0x166e,0x165b\n\t.word 0x1648,0x1635,0x1623,0x1610,0x15fd,0x15ea,0x15d7,0x15c4\n\t.word 0x15b1,0x159f,0x158c,0x1579,0x1566,0x1553,0x1541,0x152e\n\t.word 0x151b,0x1508,0x14f6,0x14e3,0x14d0,0x14bd,0x14ab,0x1498\n\t.word 0x1485,0x1473,0x1460,0x144d,0x143b,0x1428,0x1416,0x1403\n\t.word 0x13f0,0x13de,0x13cb,0x13b9,0x13a6,0x1394,0x1381,0x136f\n\t.word 0x135c,0x1349,0x1337,0x1325,0x1312,0x1300,0x12ed,0x12db\n\t.word 0x12c8,0x12b6,0x12a3,0x1291,0x127f,0x126c,0x125a,0x1247\n\t.word 0x1235,0x1223,0x1210,0x11fe,0x11ec,0x11d9,0x11c7,0x11b5\n\t.word 0x11a3,0x1190,0x117e,0x116c,0x1159,0x1147,0x1135,0x1123\n\t.word 0x1111,0x10fe,0x10ec,0x10da,0x10c8,0x10b6,0x10a4,0x1091\n\t.word 0x107f,0x106d,0x105b,0x1049,0x1037,0x1025,0x1013,0x1001\n\t.word 0x0fef,0x0fdc,0x0fca,0x0fb8,0x0fa6,0x0f94,0x0f82,0x0f70\n\t.word 0x0f5e,0x0f4c,0x0f3a,0x0f28,0x0f17,0x0f05,0x0ef3,0x0ee1\n\t.word 0x0ecf,0x0ebd,0x0eab,0x0e99,0x0e87,0x0e75,0x0e64,0x0e52\n\t.word 0x0e40,0x0e2e,0x0e1c,0x0e0a,0x0df9,0x0de7,0x0dd5,0x0dc3\n\t.word 0x0db2,0x0da0,0x0d8e,0x0d7c,0x0d6b,0x0d59,0x0d47,0x0d35\n\t.word 0x0d24,0x0d12,0x0d00,0x0cef,0x0cdd,0x0ccb,0x0cba,0x0ca8\n\t.word 0x0c97,0x0c85,0x0c73,0x0c62,0x0c50,0x0c3f,0x0c2d,0x0c1c\n\t.word 0x0c0a,0x0bf8,0x0be7,0x0bd5,0x0bc4,0x0bb2,0x0ba1,0x0b8f\n\t.word 0x0b7e,0x0b6c,0x0b5b,0x0b4a,0x0b38,0x0b27,0x0b15,0x0b04\n\t.word 0x0af2,0x0ae1,0x0ad0,0x0abe,0x0aad,0x0a9c,0x0a8a,0x0a79\n\t.word 0x0a68,0x0a56,0x0a45,0x0a34,0x0a22,0x0a11,0x0a00,0x09ee\n\t.word 0x09dd,0x09cc,0x09bb,0x09a9,0x0998,0x0987,0x0976,0x0965\n\t.word 0x0953,0x0942,0x0931,0x0920,0x090f,0x08fe,0x08ec,0x08db\n\t.word 0x08ca,0x08b9,0x08a8,0x0897,0x0886,0x0875,0x0864,0x0853\n\t.word 0x0842,0x0831,0x081f,0x080e,0x07fd,0x07ec,0x07db,0x07ca\n\t.word 0x07b9,0x07a8,0x0798,0x0787,0x0776,0x0765,0x0754,0x0743\n\t.word 0x0732,0x0721,0x0710,0x06ff,0x06ee,0x06dd,0x06cd,0x06bc\n\t.word 0x06ab,0x069a,0x0689,0x0678,0x0668,0x0657,0x0646,0x0635\n\t.word 0x0624,0x0614,0x0603,0x05f2,0x05e1,0x05d1,0x05c0,0x05af\n\t.word 0x059e,0x058e,0x057d,0x056c,0x055c,0x054b,0x053a,0x052a\n\t.word 0x0519,0x0508,0x04f8,0x04e7,0x04d6,0x04c6,0x04b5,0x04a5\n\t.word 0x0494,0x0484,0x0473,0x0462,0x0452,0x0441,0x0431,0x0420\n\t.word 0x0410,0x03ff,0x03ef,0x03de,0x03ce,0x03bd,0x03ad,0x039c\n\t.word 0x038c,0x037b,0x036b,0x035b,0x034a,0x033a,0x0329,0x0319\n\t.word 0x0309,0x02f8,0x02e8,0x02d7,0x02c7,0x02b7,0x02a6,0x0296\n\t.word 0x0286,0x0275,0x0265,0x0255,0x0245,0x0234,0x0224,0x0214\n\t.word 0x0204,0x01f3,0x01e3,0x01d3,0x01c3,0x01b2,0x01a2,0x0192\n\t.word 0x0182,0x0172,0x0161,0x0151,0x0141,0x0131,0x0121,0x0111\n\t.word 0x0101,0x00f0,0x00e0,0x00d0,0x00c0,0x00b0,0x00a0,0x0090\n\t.word 0x0080,0x0070,0x0060,0x0050,0x0040,0x0030,0x0020,0x0010\nDATAEND()\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/longlong_inc.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 Free Software Foundation, Inc.\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/* Most alpha-based machines, except Cray systems. */\n#if defined (__GNUC__)\n#define umul_ppmm(ph, pl, m0, m1) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UDItype __m0 = (m0), __m1 = (m1);\t\t\t\t\t\\\n    __asm__ (\"umulh %r1,%2,%0\"\t\t\t\t\t\t\\\n\t     : \"=r\" (ph)\t\t\t\t\t\t\\\n\t     : \"%rJ\" (m0), \"rI\" (m1));\t\t\t\t\t\\\n    (pl) = __m0 * __m1;\t\t\t\t\t\t\t\\\n  } while (0)\n#else /* ! __GNUC__ */\n#include <machine/builtins.h>\n#define umul_ppmm(ph, pl, m0, m1) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UDItype __m0 = (m0), __m1 = (m1);\t\t\t\t\t\\\n    (ph) = __UMULH (m0, m1);\t\t\t\t\t\t\\\n    (pl) = __m0 * __m1;\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n#ifndef LONGLONG_STANDALONE\n#define udiv_qrnnd(q, r, n1, n0, d) \\\n  do { UWtype __di;\t\t\t\t\t\t\t\\\n    __di = __MPN(invert_limb) (d);\t\t\t\t\t\\\n    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);\t\t\t\t\\\n  } while (0)\n#define UDIV_PREINV_ALWAYS  1\n#define UDIV_NEEDS_NORMALIZATION 1\n#endif /* LONGLONG_STANDALONE */\n\n/* clz_tab is required in all configurations, since mpn/alpha/cntlz.asm\n   always goes into libmpir.so, even when not actually used.  */\n#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB\n\n#if ! defined (count_leading_zeros)                             \\\n  && defined (__GNUC__) && ! defined (LONGLONG_STANDALONE)\n/* ALPHA_CMPBGE_0 gives \"cmpbge $31,src,dst\", ie. test src bytes == 0.\n   \"$31\" is written explicitly in the asm, since an \"r\" constraint won't\n   select reg 31.  There seems no need to worry about \"r31\" syntax for cray,\n   since gcc itself (pre-release 3.4) emits just $31 in various places.  */\n#define ALPHA_CMPBGE_0(dst, src)                                        \\\n  do { asm (\"cmpbge $31, %1, %0\" : \"=r\" (dst) : \"r\" (src)); } while (0)\n/* Zero bytes are turned into bits with cmpbge, a __clz_tab lookup counts\n   them, locating the highest non-zero byte.  A second __clz_tab lookup\n   counts the leading zero bits in that byte, giving the result.  */\n#define count_leading_zeros(count, x)                                   \\\n  do {                                                                  \\\n    UWtype  __clz__b, __clz__c, __clz__x = (x);                         \\\n    ALPHA_CMPBGE_0 (__clz__b,  __clz__x);           /* zero bytes */    \\\n    __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F];  /* 8 to 1 byte */   \\\n    __clz__b = __clz__b * 8 - 7;                    /* 57 to 1 shift */ \\\n    __clz__x >>= __clz__b;                                              \\\n    __clz__c = __clz_tab [__clz__x];                /* 8 to 1 bit */    \\\n    __clz__b = 65 - __clz__b;                                           \\\n    (count) = __clz__b - __clz__c;                                      \\\n  } while (0)\n#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB\n#endif /* clz using cmpbge */\n\n#if ! defined (count_leading_zeros) && ! defined (LONGLONG_STANDALONE)\n#if HAVE_ATTRIBUTE_CONST\nlong __MPN(count_leading_zeros)(UDItype) __attribute__ ((const));\n#else\nlong __MPN(count_leading_zeros)(UDItype);\n#endif\n#define count_leading_zeros(count, x) \\\n  ((count) = __MPN(count_leading_zeros) (x))\n#endif /* clz using mpn */\n\n"
  },
  {
    "path": "mpn/alpha/lshift.asm",
    "content": "dnl  Alpha mpn_lshift -- Shift a number left.\n\ndnl  Copyright 1994, 1995, 2000, 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     4.75\nC EV5:     4\nC EV6:     2\n\nC  INPUT PARAMETERS\nC  rp\tr16\nC  up\tr17\nC  n\tr18\nC  cnt\tr19\n\n\nASM_START()\nPROLOGUE(mpn_lshift)\n\ts8addq\tr18,r17,r17\tC make r17 point at end of s1\n\tldq\tr4,-8(r17)\tC load first limb\n\tsubq\tr17,8,r17\n\tsubq\tr31,r19,r7\n\ts8addq\tr18,r16,r16\tC make r16 point at end of RES\n\tsubq\tr18,1,r18\n\tand\tr18,4-1,r20\tC number of limbs in first loop\n\tsrl\tr4,r7,r0\tC compute function result\n\n\tbeq\tr20,$L0\n\tsubq\tr18,r20,r18\n\n\tALIGN(8)\n$Loop0:\tldq\tr3,-8(r17)\n\tsubq\tr16,8,r16\n\tsubq\tr17,8,r17\n\tsubq\tr20,1,r20\n\tsll\tr4,r19,r5\n\tsrl\tr3,r7,r6\n\tbis\tr3,r3,r4\n\tbis\tr5,r6,r8\n\tstq\tr8,0(r16)\n\tbne\tr20,$Loop0\n\n$L0:\tbeq\tr18,$Lend\n\n\tALIGN(8)\n$Loop:\tldq\tr3,-8(r17)\n\tsubq\tr16,32,r16\n\tsubq\tr18,4,r18\n\tsll\tr4,r19,r5\n\tsrl\tr3,r7,r6\n\n\tldq\tr4,-16(r17)\n\tsll\tr3,r19,r1\n\tbis\tr5,r6,r8\n\tstq\tr8,24(r16)\n\tsrl\tr4,r7,r2\n\n\tldq\tr3,-24(r17)\n\tsll\tr4,r19,r5\n\tbis\tr1,r2,r8\n\tstq\tr8,16(r16)\n\tsrl\tr3,r7,r6\n\n\tldq\tr4,-32(r17)\n\tsll\tr3,r19,r1\n\tbis\tr5,r6,r8\n\tstq\tr8,8(r16)\n\tsrl\tr4,r7,r2\n\n\tsubq\tr17,32,r17\n\tbis\tr1,r2,r8\n\tstq\tr8,0(r16)\n\n\tbgt\tr18,$Loop\n\n$Lend:\tsll\tr4,r19,r8\n\tstq\tr8,-8(r16)\n\tret\tr31,(r26),1\nEPILOGUE(mpn_lshift)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/mod_34lsub1.asm",
    "content": "dnl Alpha mpn_mod_34lsub1.\n\ndnl  Copyright 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     4 (?)\nC EV5:     2.67\nC EV6:     1.67\n\n\ndnl  INPUT PARAMETERS\ndnl  up\t\tr16\ndnl  n\t\tr17\n\ndefine(`l0',`r18')\ndefine(`l1',`r19')\ndefine(`l2',`r20')\ndefine(`a0',`r21')\ndefine(`a1',`r22')\ndefine(`a2',`r23')\ndefine(`c0',`r24')\ndefine(`c1',`r5')\ndefine(`c2',`r6')\n\nASM_START()\nPROLOGUE(mpn_mod_34lsub1)\n\tbis\tr31, r31, c0\n\tbis\tr31, r31, c1\n\tbis\tr31, r31, c2\n\n\tlda\tr17, -3(r17)\n\tbge\tr17, $L_3_or_more\n\tbis\tr31, r31, a0\n\tbis\tr31, r31, a1\n\tbis\tr31, r31, a2\n\tbr\tr31, $L_012\n\n$L_3_or_more:\n\tldq\ta0, 0(r16)\n\tldq\ta1, 8(r16)\n\tldq\ta2, 16(r16)\n\tlda\tr16, 24(r16)\n\tlda\tr17, -3(r17)\n\tblt\tr17, $L_012\n\n$L_6_or_more:\n\tldq\tl0, 0(r16)\n\tldq\tl1, 8(r16)\n\tldq\tl2, 16(r16)\n\taddq\tl0, a0, a0\n\n\tlda\tr16, 24(r16)\n\tlda\tr17, -3(r17)\n\tblt\tr17, $L_end\n\n\tALIGN(16)\nC Main loop\n$L_9_or_more:\n$Loop:\tcmpult\ta0, l0, r0\n\tldq\tl0, 0(r16)\n\taddq\tr0, c0, c0\n\taddq\tl1, a1, a1\n\tcmpult\ta1, l1, r0\n\tldq\tl1, 8(r16)\n\taddq\tr0, c1, c1\n\taddq\tl2, a2, a2\n\tcmpult\ta2, l2, r0\n\tldq\tl2, 16(r16)\n\taddq\tr0, c2, c2\n\taddq\tl0, a0, a0\n\tlda\tr16, 24(r16)\n\tlda\tr17, -3(r17)\n\tbge\tr17, $Loop\n\n$L_end:\tcmpult\ta0, l0, r0\n\taddq\tr0, c0, c0\n\taddq\tl1, a1, a1\n\tcmpult\ta1, l1, r0\n\taddq\tr0, c1, c1\n\taddq\tl2, a2, a2\n\tcmpult\ta2, l2, r0\n\taddq\tr0, c2, c2\n\nC Handle the last (n mod 3) limbs\n$L_012:\tlda\tr17, 2(r17)\n\tblt\tr17, $L_0\n\tldq\tl0, 0(r16)\n\taddq\tl0, a0, a0\n\tcmpult\ta0, l0, r0\n\taddq\tr0, c0, c0\n\tbeq\tr17, $L_0\n\tldq\tl1, 8(r16)\n\taddq\tl1, a1, a1\n\tcmpult\ta1, l1, r0\n\taddq\tr0, c1, c1\n\nC Align and sum our 3 main accumulators and 3 carry accumulators\n$L_0:\tsrl\ta0, 48, r2\n\tsrl\ta1, 32, r4\nifdef(`HAVE_LIMB_LITTLE_ENDIAN',\n`\tinsll\ta1, 2, r1',\t\tC (a1 & 0xffffffff) << 16\n`\tzapnot\ta1, 15, r25\n\tsll\tr25, 16, r1')\n\tzapnot\ta0, 63, r0\t\tC a0 & 0xffffffffffff\n\tsrl\ta2, 16, a1\nifdef(`HAVE_LIMB_LITTLE_ENDIAN',\n`\tinswl\ta2, 4, r3',\t\tC (a2 & 0xffff) << 32\n`\tzapnot\ta2, 3, r25\n\tsll\tr25, 32, r3')\n\taddq\tr1, r4, r1\n\taddq\tr0, r2, r0\n\tsrl\tc0, 32, a2\nifdef(`HAVE_LIMB_LITTLE_ENDIAN',\n`\tinsll\tc0, 2, r4',\t\tC (c0 & 0xffffffff) << 16\n`\tzapnot\tc0, 15, r25\n\tsll\tr25, 16, r4')\n\taddq\tr0, r1, r0\n\taddq\tr3, a1, r3\n\taddq\tr0, r3, r0\n\tsrl\tc1, 16, c0\nifdef(`HAVE_LIMB_LITTLE_ENDIAN',\n`\tinswl\tc1, 4, r2',\t\tC (c1 & 0xffff) << 32\n`\tzapnot\tc1, 3, r25\n\tsll\tr25, 32, r2')\n\taddq\tr4, a2, r4\nC\tsrl\tc2, 48, r3\t\tC This will be 0 in practise\n\tzapnot\tc2, 63, r1\t\tC r1 = c2 & 0xffffffffffff\n\taddq\tr0, r4, r0\n\taddq\tr2, c0, r2\n\taddq\tr0, r2, r0\nC\taddq\tr1, r3, r1\n\taddq\tr0, r1, r0\n\n\tret\tr31, (r26), 1\nEPILOGUE(mpn_mod_34lsub1)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/modexact_1c_odd.asm",
    "content": "dnl  Alpha mpn_modexact_1c_odd -- mpn exact remainder\n\ndnl  Copyright 2003, 2004 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC      cycles/limb\nC EV4:    47\nC EV5:    30\nC EV6:    15\n\n\nC mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d,\nC                                mp_limb_t c)\nC\nC This code follows the \"alternate\" code in mpn/generic/mode1o.c,\nC eliminating cbit+climb from the dependent chain.  This leaves,\nC\nC        ev4   ev5   ev6\nC         1     3     1    subq   y = x - h\nC        23    13     7    mulq   q = y * inverse\nC        23    14     7    umulh  h = high (q * d)\nC        --    --    --\nC        47    30    15\nC\nC In each case, the load latency, loop control, and extra carry bit handling\nC hide under the multiply latencies.  Those latencies are long enough that\nC we don't need to worry about alignment or pairing to squeeze out\nC performance.\nC\nC For the first limb, some of the loop code is broken out and scheduled back\nC since it can be done earlier.\nC\nC   - The first ldq src[0] is near the start of the routine, for maximum\nC     time from memory.\nC\nC   - The subq y=x-climb can be done without waiting for the inverse.\nC\nC   - The mulq y*inverse is replicated after the final subq for the inverse,\nC     instead of branching to the mulq in the main loop.  On ev4 a branch\nC     there would cost cycles, but we can hide them under the mulq latency.\nC\nC For the last limb, high<divisor is tested and if that's true a subtract\nC and addback is done, as per the main mpn/generic/mode1o.c code.  This is a\nC data-dependent branch, but we're waiting for umulh so any penalty should\nC hide there.  The multiplies saved would be worth the cost anyway.\nC\nC Enhancements:\nC\nC For size==1, a plain division (done bitwise say) might be faster than\nC calculating an inverse, the latter taking about 130 cycles on ev4 or 70 on\nC ev5.  A call to gcc __remqu might be a possibility.\n\nASM_START()\nPROLOGUE(mpn_modexact_1c_odd,gp)\n\n\tC r16\tsrc\n\tC r17\tsize\n\tC r18\td\n\tC r19\tc\n\n\tLEA(r0, modlimb_invert_table)\n\tsrl\tr18, 1, r20\t\tC d >> 1\n\n\tand\tr20, 127, r20\t\tC idx = d>>1 & 0x7F\n\n\taddq\tr0, r20, r21\t\tC table + idx\n\n\tldq_u\tr20, 0(r21)\t\tC table[idx] qword\n\textbl\tr20, r21, r20\t\tC table[idx], inverse 8 bits\n\n\tmull\tr20, r20, r7\t\tC i*i\n\taddq\tr20, r20, r20\t\tC 2*i\n\n\tldq\tr2, 0(r16)\t\tC x = s = src[0]\n\tlda\tr17, -1(r17)\t\tC size--\n\tclr\tr0\t\t\tC initial cbit=0\n\n\tmull\tr7, r18, r7\t\tC i*i*d\n\n\tsubq\tr20, r7, r20\t\tC 2*i-i*i*d, inverse 16 bits\n\n\tmull\tr20, r20, r7\t\tC i*i\n\taddq\tr20, r20, r20\t\tC 2*i\n\n\tmull\tr7, r18, r7\t\tC i*i*d\n\n\tsubq\tr20, r7, r20\t\tC 2*i-i*i*d, inverse 32 bits\n\n\tmulq\tr20, r20, r7\t\tC i*i\n\taddq\tr20, r20, r20\t\tC 2*i\n\n\tmulq\tr7, r18, r7\t\tC i*i*d\n\tsubq\tr2, r19, r3\t\tC y = x - climb\n\n\tsubq\tr20, r7, r20\t\tC inv = 2*i-i*i*d, inverse 64 bits\n\nASSERT(r7, C should have d*inv==1 mod 2^64\n`\tmulq\tr18, r20, r7\n\tcmpeq\tr7, 1, r7')\n\n\tmulq\tr3, r20, r4\t\tC first q = y * inv\n\n\tbeq\tr17, L(one)\t\tC if size==1\n\tbr\tL(entry)\n\n\nL(top):\n\tC r0\tcbit\n\tC r16\tsrc, incrementing\n\tC r17\tsize, decrementing\n\tC r18\td\n\tC r19\tclimb\n\tC r20\tinv\n\n\tldq\tr1, 0(r16)\t\tC s = src[i]\n\tsubq\tr1, r0, r2\t\tC x = s - cbit\n\tcmpult\tr1, r0, r0\t\tC new cbit = s < cbit\n\n\tsubq\tr2, r19, r3\t\tC y = x - climb\n\n\tmulq\tr3, r20, r4\t\tC q = y * inv\nL(entry):\n\tcmpult\tr2, r19, r5\t\tC cbit2 = x < climb\n\taddq\tr5, r0, r0\t\tC cbit += cbit2\n\tlda\tr16, 8(r16)\t\tC src++\n\tlda\tr17, -1(r17)\t\tC size--\n\n\tumulh\tr4, r18, r19\t\tC climb = q * d\n\tbne\tr17, L(top)\t\tC while 2 or more limbs left\n\n\n\n\tC r0\tcbit\n\tC r18\td\n\tC r19\tclimb\n\tC r20\tinv\n\n\tldq\tr1, 0(r16)\t\tC s = src[size-1] high limb\n\n\tcmpult\tr1, r18, r2\t\tC test high<divisor\n\tbne\tr2, L(skip)\t\tC skip if so\n\n\tC can't skip a division, repeat loop code\n\n\tsubq\tr1, r0, r2\t\tC x = s - cbit\n\tcmpult\tr1, r0, r0\t\tC new cbit = s < cbit\n\n\tsubq\tr2, r19, r3\t\tC y = x - climb\n\n\tmulq\tr3, r20, r4\t\tC q = y * inv\nL(one):\n\tcmpult\tr2, r19, r5\t\tC cbit2 = x < climb\n\taddq\tr5, r0, r0\t\tC cbit += cbit2\n\n\tumulh\tr4, r18, r19\t\tC climb = q * d\n\n\taddq\tr19, r0, r0\t\tC return climb + cbit\n\tret\tr31, (r26), 1\n\n\n\tALIGN(8)\nL(skip):\n\tC with high<divisor, the final step can be just (cbit+climb)-s and\n\tC an addback of d if that underflows\n\n\taddq\tr19, r0, r19\t\tC c = climb + cbit\n\n\tsubq\tr19, r1, r2\t\tC c - s\n\tcmpult\tr19, r1, r3\t\tC c < s\n\n\taddq\tr2, r18, r0\t\tC return c-s + divisor\n\n\tcmoveq\tr3, r2, r0\t\tC return c-s if no underflow\n\tret\tr31, (r26), 1\n\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/mul_1.asm",
    "content": "dnl  Alpha mpn_mul_1 -- Multiply a limb vector with a limb and store\ndnl  the result in a second limb vector.\n\ndnl  Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     42\nC EV5:     18\nC EV6:      7\n\nC  INPUT PARAMETERS\nC  rp\tr16\nC  up\tr17\nC  n\tr18\nC  vl\tr19\nC  cl\tr20\n\n\nASM_START()\nPROLOGUE(mpn_mul_1c)\n\tldq\tr2,0(r17)\tC r2 = s1_limb\n\tlda\tr18,-1(r18)\tC size--\n\tmulq\tr2,r19,r3\tC r3 = prod_low\n\tumulh\tr2,r19,r4\tC r4 = prod_high\n\tbeq\tr18,$Le1c\tC jump if size was == 1\n\tldq\tr2,8(r17)\tC r2 = s1_limb\n\tlda\tr18,-1(r18)\tC size--\n\taddq\tr3,r20,r3\tC r3 = cy_limb + cl\n\tstq\tr3,0(r16)\n\tcmpult\tr3,r20,r0\tC r0 = carry from (cy_limb + cl)\n\tbne\tr18,$Loop\tC jump if size was == 2\n\tbr\tr31,$Le2\n$Le1c:\taddq\tr3,r20,r3\tC r3 = cy_limb + cl\n\tcmpult\tr3,r20,r0\tC r0 = carry from (cy_limb + cl)\n$Le1:\tstq\tr3,0(r16)\n\taddq\tr4,r0,r0\n\tret\tr31,(r26),1\nEPILOGUE(mpn_mul_1c)\n\nPROLOGUE(mpn_mul_1)\n\tldq\tr2,0(r17)\tC r2 = s1_limb\n\tlda\tr18,-1(r18)\tC size--\n\tmulq\tr2,r19,r3\tC r3 = prod_low\n\tbic\tr31,r31,r0\tC clear cy_limb\n\tumulh\tr2,r19,r4\tC r4 = prod_high\n\tbeq\tr18,$Le1\tC jump if size was == 1\n\tldq\tr2,8(r17)\tC r2 = s1_limb\n\tlda\tr18,-1(r18)\tC size--\n\tstq\tr3,0(r16)\n\tbeq\tr18,$Le2\tC jump if size was == 2\n\n\tALIGN(8)\n$Loop:\tmulq\tr2,r19,r3\tC r3 = prod_low\n\taddq\tr4,r0,r0\tC cy_limb = cy_limb + 'cy'\n\tlda\tr18,-1(r18)\tC size--\n\tumulh\tr2,r19,r4\tC r4 = prod_high\n\tldq\tr2,16(r17)\tC r2 = s1_limb\n\tlda\tr17,8(r17)\tC s1_ptr++\n\taddq\tr3,r0,r3\tC r3 = cy_limb + prod_low\n\tstq\tr3,8(r16)\n\tcmpult\tr3,r0,r0\tC r0 = carry from (cy_limb + prod_low)\n\tlda\tr16,8(r16)\tC res_ptr++\n\tbne\tr18,$Loop\n\n$Le2:\tmulq\tr2,r19,r3\tC r3 = prod_low\n\taddq\tr4,r0,r0\tC cy_limb = cy_limb + 'cy'\n\tumulh\tr2,r19,r4\tC r4 = prod_high\n\taddq\tr3,r0,r3\tC r3 = cy_limb + prod_low\n\tcmpult\tr3,r0,r0\tC r0 = carry from (cy_limb + prod_low)\n\tstq\tr3,8(r16)\n\taddq\tr4,r0,r0\tC cy_limb = prod_high + cy\n\tret\tr31,(r26),1\nEPILOGUE(mpn_mul_1)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/rshift.asm",
    "content": "dnl  Alpha mpn_rshift -- Shift a number right.\n\ndnl  Copyright 1994, 1995, 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     4.75\nC EV5:     3.75\nC EV6:     2\n\nC  INPUT PARAMETERS\nC  rp\tr16\nC  up\tr17\nC  n\tr18\nC  cnt\tr19\n\n\nASM_START()\nPROLOGUE(mpn_rshift)\n\tldq\tr4,0(r17)\tC load first limb\n\taddq\tr17,8,r17\n\tsubq\tr31,r19,r7\n\tsubq\tr18,1,r18\n\tand\tr18,4-1,r20\tC number of limbs in first loop\n\tsll\tr4,r7,r0\tC compute function result\n\n\tbeq\tr20,$L0\n\tsubq\tr18,r20,r18\n\n\tALIGN(8)\n$Loop0:\tldq\tr3,0(r17)\n\taddq\tr16,8,r16\n\taddq\tr17,8,r17\n\tsubq\tr20,1,r20\n\tsrl\tr4,r19,r5\n\tsll\tr3,r7,r6\n\tbis\tr3,r3,r4\n\tbis\tr5,r6,r8\n\tstq\tr8,-8(r16)\n\tbne\tr20,$Loop0\n\n$L0:\tbeq\tr18,$Lend\n\n\tALIGN(8)\n$Loop:\tldq\tr3,0(r17)\n\taddq\tr16,32,r16\n\tsubq\tr18,4,r18\n\tsrl\tr4,r19,r5\n\tsll\tr3,r7,r6\n\n\tldq\tr4,8(r17)\n\tsrl\tr3,r19,r1\n\tbis\tr5,r6,r8\n\tstq\tr8,-32(r16)\n\tsll\tr4,r7,r2\n\n\tldq\tr3,16(r17)\n\tsrl\tr4,r19,r5\n\tbis\tr1,r2,r8\n\tstq\tr8,-24(r16)\n\tsll\tr3,r7,r6\n\n\tldq\tr4,24(r17)\n\tsrl\tr3,r19,r1\n\tbis\tr5,r6,r8\n\tstq\tr8,-16(r16)\n\tsll\tr4,r7,r2\n\n\taddq\tr17,32,r17\n\tbis\tr1,r2,r8\n\tstq\tr8,-8(r16)\n\n\tbgt\tr18,$Loop\n\n$Lend:\tsrl\tr4,r19,r8\n\tstq\tr8,0(r16)\n\tret\tr31,(r26),1\nEPILOGUE(mpn_rshift)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/sqr_diagonal.asm",
    "content": "dnl  Alpha mpn_sqr_diagonal.\n\ndnl  Copyright 2001, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     42\nC EV5:     18\nC EV6:      4\n\nC  INPUT PARAMETERS\nC  rp\tr16\nC  up\tr17\nC  n\tr18\n\n\nASM_START()\nPROLOGUE(mpn_sqr_diagonal)\n\tldq\tr2,0(r17)\tC r2 = s1_limb\n\tlda\tr18,-2(r18)\tC size -= 2\n\tmulq\tr2,r2,r3\tC r3 = prod_low\n\tumulh\tr2,r2,r4\tC r4 = prod_high\n\tblt\tr18,$Lend1\tC jump if size was == 1\n\tldq\tr2,8(r17)\tC r2 = s1_limb\n\tbeq\tr18,$Lend2\tC jump if size was == 2\n\n\tALIGN(8)\n$Loop:\tstq\tr3,0(r16)\n\tmulq\tr2,r2,r3\tC r3 = prod_low\n\tlda\tr18,-1(r18)\tC size--\n\tstq\tr4,8(r16)\n\tumulh\tr2,r2,r4\tC r4 = cy_limb\n\tldq\tr2,16(r17)\tC r2 = s1_limb\n\tlda\tr17,8(r17)\tC s1_ptr++\n\tlda\tr16,16(r16)\tC res_ptr++\n\tbne\tr18,$Loop\n\n$Lend2:\tstq\tr3,0(r16)\n\tmulq\tr2,r2,r3\tC r3 = prod_low\n\tstq\tr4,8(r16)\n\tumulh\tr2,r2,r4\tC r4 = cy_limb\n\tstq\tr3,16(r16)\n\tstq\tr4,24(r16)\n\tret\tr31,(r26),1\n$Lend1:\tstq\tr3,0(r16)\n\tstq\tr4,8(r16)\n\tret\tr31,(r26),1\nEPILOGUE(mpn_sqr_diagonal)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/sub_n.asm",
    "content": "dnl  Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0 and\ndnl  store difference in a third limb vector.\n\ndnl  Copyright 1995, 2000, 2002, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     7.75\nC EV5:     5.75\nC EV6:     4\n\nC  INPUT PARAMETERS\nC  rp\tr16\nC  up\tr17\nC  vp\tr18\nC  n\tr19\n\nASM_START()\nPROLOGUE(mpn_sub_n)\n\tldq\tr3,0(r17)\n\tldq\tr4,0(r18)\n\n\tsubq\tr19,1,r19\n\tand\tr19,4-1,r2\tC number of limbs in first loop\n\tbis\tr31,r31,r0\n\tbeq\tr2,$L0\t\tC if multiple of 4 limbs, skip first loop\n\n\tsubq\tr19,r2,r19\n\n$Loop0:\tsubq\tr2,1,r2\n\tldq\tr5,8(r17)\n\taddq\tr4,r0,r4\n\tldq\tr6,8(r18)\n\tcmpult\tr4,r0,r1\n\tsubq\tr3,r4,r4\n\tcmpult\tr3,r4,r0\n\tstq\tr4,0(r16)\n\tbis\tr0,r1,r0\n\n\taddq\tr17,8,r17\n\taddq\tr18,8,r18\n\tbis\tr5,r5,r3\n\tbis\tr6,r6,r4\n\taddq\tr16,8,r16\n\tbne\tr2,$Loop0\n\n$L0:\tbeq\tr19,$Lend\n\n\tALIGN(8)\n$Loop:\tsubq\tr19,4,r19\n\n\tldq\tr5,8(r17)\n\taddq\tr4,r0,r4\n\tldq\tr6,8(r18)\n\tcmpult\tr4,r0,r1\n\tsubq\tr3,r4,r4\n\tcmpult\tr3,r4,r0\n\tstq\tr4,0(r16)\n\tbis\tr0,r1,r0\n\n\tldq\tr3,16(r17)\n\taddq\tr6,r0,r6\n\tldq\tr4,16(r18)\n\tcmpult\tr6,r0,r1\n\tsubq\tr5,r6,r6\n\tcmpult\tr5,r6,r0\n\tstq\tr6,8(r16)\n\tbis\tr0,r1,r0\n\n\tldq\tr5,24(r17)\n\taddq\tr4,r0,r4\n\tldq\tr6,24(r18)\n\tcmpult\tr4,r0,r1\n\tsubq\tr3,r4,r4\n\tcmpult\tr3,r4,r0\n\tstq\tr4,16(r16)\n\tbis\tr0,r1,r0\n\n\tldq\tr3,32(r17)\n\taddq\tr6,r0,r6\n\tldq\tr4,32(r18)\n\tcmpult\tr6,r0,r1\n\tsubq\tr5,r6,r6\n\tcmpult\tr5,r6,r0\n\tstq\tr6,24(r16)\n\tbis\tr0,r1,r0\n\n\taddq\tr17,32,r17\n\taddq\tr18,32,r18\n\taddq\tr16,32,r16\n\tbne\tr19,$Loop\n\n$Lend:\taddq\tr4,r0,r4\n\tcmpult\tr4,r0,r1\n\tsubq\tr3,r4,r4\n\tcmpult\tr3,r4,r0\n\tstq\tr4,0(r16)\n\tbis\tr0,r1,r0\n\tret\tr31,(r26),1\nEPILOGUE(mpn_sub_n)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/sublsh1_n.asm",
    "content": "dnl  Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).\n\ndnl  Copyright 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:    12.5\nC EV5:     6.25\nC EV6:     4.375 (i.e., worse than separate mpn_lshift and mpn_add_n at 3.875)\n\nC TODO\nC  * Write special version for ev6, as this is a slowdown for 100 < n < 2200\nC    compared to separate mpn_lshift and mpn_add_n.\nC  * Use addq instead of sll for left shift, and similarly cmplt instead of srl\nC    for right shift.\n\ndnl  INPUT PARAMETERS\ndefine(`rp',`r16')\ndefine(`up',`r17')\ndefine(`vp',`r18')\ndefine(`n', `r19')\n\ndefine(`u0', `r8')\ndefine(`u1', `r1')\ndefine(`u2', `r2')\ndefine(`u3', `r3')\ndefine(`v0', `r4')\ndefine(`v1', `r5')\ndefine(`v2', `r6')\ndefine(`v3', `r7')\n\ndefine(`cy0', `r0')\ndefine(`cy1', `r20')\ndefine(`cy', `r22')\ndefine(`rr', `r24')\ndefine(`ps', `r25')\ndefine(`sl', `r28')\n\ndefine(`OPERATION_sublsh1_n',1)\n\nifdef(`OPERATION_addlsh1_n',`\n  define(ADDSUB,       addq)\n  define(CARRY,       `cmpult $1,$2,$3')\n  define(func, mpn_addlsh1_n)\n')\nifdef(`OPERATION_sublsh1_n',`\n  define(ADDSUB,       subq)\n  define(CARRY,       `cmpult $2,$1,$3')\n  define(func, mpn_sublsh1_n)\n')\n\nASM_START()\nPROLOGUE(func)\n\tlda\tn, -4(n)\n\tbis\tr31, r31, cy1\n\tand\tn, 3, r1\n\tbeq\tr1, $Lb00\n\tcmpeq\tr1, 1, r2\n\tbne\tr2, $Lb01\n\tcmpeq\tr1, 2, r2\n\tbne\tr2, $Lb10\n$Lb11:\tC n = 3, 7, 11, ...\n\tldq\tv0, 0(vp)\n\tldq\tu0, 0(up)\n\tldq\tv1, 8(vp)\n\tldq\tu1, 8(up)\n\tldq\tv2, 16(vp)\n\tldq\tu2, 16(up)\n\tlda\tvp, 24(vp)\n\tlda\tup, 24(up)\n\tbge\tn, $Loop\n\tbr\tr31, $Lcj3\n$Lb10:\tC n = 2, 6, 10, ...\n\tbis\tr31, r31, cy0\n\tldq\tv1, 0(vp)\n\tldq\tu1, 0(up)\n\tldq\tv2, 8(vp)\n\tldq\tu2, 8(up)\n\tlda\trp, -8(rp)\n\tblt\tn, $Lcj2\n\tldq\tv3, 16(vp)\n\tldq\tu3, 16(up)\n\tlda\tvp, 48(vp)\n\tlda\tup, 16(up)\n\tbr\tr31, $LL10\n$Lb01:\tC n = 1, 5, 9, ...\n\tldq\tv2, 0(vp)\n\tldq\tu2, 0(up)\n\tlda\trp, -16(rp)\n\tblt\tn, $Lcj1\n\tldq\tv3, 8(vp)\n\tldq\tu3, 8(up)\n\tldq\tv0, 16(vp)\n\tldq\tu0, 16(up)\n\tlda\tvp, 40(vp)\n\tlda\tup, 8(up)\n\tlda\trp, 32(rp)\n\tbr\tr31, $LL01\n$Lb00:\tC n = 4, 8, 12, ...\n\tbis\tr31, r31, cy0\n\tldq\tv3, 0(vp)\n\tldq\tu3, 0(up)\n\tldq\tv0, 8(vp)\n\tldq\tu0, 8(up)\n\tldq\tv1, 16(vp)\n\tldq\tu1, 16(up)\n\tlda\tvp, 32(vp)\n\tlda\trp, 8(rp)\n\tbr\tr31, $LL00x\n\tALIGN(16)\nC 0\n$Loop:\tsll\tv0, 1, sl\tC left shift vlimb\n\tldq\tv3, 0(vp)\nC 1\n\tADDSUB\tu0, sl, ps\tC ulimb + (vlimb << 1)\n\tldq\tu3, 0(up)\nC 2\n\tADDSUB\tps, cy1, rr\tC consume carry from previous operation\n\tsrl\tv0, 63, cy0\tC carry out #1\nC 3\n\tCARRY(\tps, u0, cy)\tC carry out #2\n\tstq\trr, 0(rp)\nC 4\n\taddq\tcy, cy0, cy0\tC combine carry out #1 and #2\n\tCARRY(\trr, ps, cy)\tC carry out #3\nC 5\n\taddq\tcy, cy0, cy0\tC final carry out\n\tlda\tvp, 32(vp)\tC bookkeeping\nC 6\n$LL10:\tsll\tv1, 1, sl\n\tldq\tv0, -24(vp)\nC 7\n\tADDSUB\tu1, sl, ps\n\tldq\tu0, 8(up)\nC 8\n\tADDSUB\tps, cy0, rr\n\tsrl\tv1, 63, cy1\nC 9\n\tCARRY(\tps, u1, cy)\n\tstq\trr, 8(rp)\nC 10\n\taddq\tcy, cy1, cy1\n\tCARRY(\trr, ps, cy)\nC 11\n\taddq\tcy, cy1, cy1\n\tlda\trp, 32(rp)\tC bookkeeping\nC 12\n$LL01:\tsll\tv2, 1, sl\n\tldq\tv1, -16(vp)\nC 13\n\tADDSUB\tu2, sl, ps\n\tldq\tu1, 16(up)\nC 14\n\tADDSUB\tps, cy1, rr\n\tsrl\tv2, 63, cy0\nC 15\n\tCARRY(\tps, u2, cy)\n\tstq\trr, -16(rp)\nC 16\n\taddq\tcy, cy0, cy0\n\tCARRY(\trr, ps, cy)\nC 17\n\taddq\tcy, cy0, cy0\n$LL00x:\tlda\tup, 32(up)\tC bookkeeping\nC 18\n\tsll\tv3, 1, sl\n\tldq\tv2, -8(vp)\nC 19\n\tADDSUB\tu3, sl, ps\n\tldq\tu2, -8(up)\nC 20\n\tADDSUB\tps, cy0, rr\n\tsrl\tv3, 63, cy1\nC 21\n\tCARRY(\tps, u3, cy)\n\tstq\trr, -8(rp)\nC 22\n\taddq\tcy, cy1, cy1\n\tCARRY(\trr, ps, cy)\nC 23\n\taddq\tcy, cy1, cy1\n\tlda\tn, -4(n)\tC bookkeeping\nC 24\n\tbge\tn, $Loop\n\n$Lcj3:\tsll\tv0, 1, sl\n\tADDSUB\tu0, sl, ps\n\tADDSUB\tps, cy1, rr\n\tsrl\tv0, 63, cy0\n\tCARRY(\tps, u0, cy)\n\tstq\trr, 0(rp)\n\taddq\tcy, cy0, cy0\n\tCARRY(\trr, ps, cy)\n\taddq\tcy, cy0, cy0\n\n$Lcj2:\tsll\tv1, 1, sl\n\tADDSUB\tu1, sl, ps\n\tADDSUB\tps, cy0, rr\n\tsrl\tv1, 63, cy1\n\tCARRY(\tps, u1, cy)\n\tstq\trr, 8(rp)\n\taddq\tcy, cy1, cy1\n\tCARRY(\trr, ps, cy)\n\taddq\tcy, cy1, cy1\n\n$Lcj1:\tsll\tv2, 1, sl\n\tADDSUB\tu2, sl, ps\n\tADDSUB\tps, cy1, rr\n\tsrl\tv2, 63, cy0\n\tCARRY(\tps, u2, cy)\n\tstq\trr, 16(rp)\n\taddq\tcy, cy0, cy0\n\tCARRY(\trr, ps, cy)\n\taddq\tcy, cy0, cy0\n\n\tret\tr31,(r26),1\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/submul_1.asm",
    "content": "dnl  Alpha mpn_submul_1 -- Multiply a limb vector with a limb and subtract\ndnl  the result from a second limb vector.\n\ndnl  Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC      cycles/limb\nC EV4:     42\nC EV5:     18\nC EV6:      7\n\nC  INPUT PARAMETERS\nC  rp\tr16\nC  up\tr17\nC  n\tr18\nC  limb\tr19\n\n\nASM_START()\nPROLOGUE(mpn_submul_1)\n\tldq\tr2,0(r17)\tC r2 = s1_limb\n\taddq\tr17,8,r17\tC s1_ptr++\n\tsubq\tr18,1,r18\tC size--\n\tmulq\tr2,r19,r3\tC r3 = prod_low\n\tldq\tr5,0(r16)\tC r5 = *res_ptr\n\tumulh\tr2,r19,r0\tC r0 = prod_high\n\tbeq\tr18,$Lend1\tC jump if size was == 1\n\tldq\tr2,0(r17)\tC r2 = s1_limb\n\taddq\tr17,8,r17\tC s1_ptr++\n\tsubq\tr18,1,r18\tC size--\n\tsubq\tr5,r3,r3\n\tcmpult\tr5,r3,r4\n\tstq\tr3,0(r16)\n\taddq\tr16,8,r16\tC res_ptr++\n\tbeq\tr18,$Lend2\tC jump if size was == 2\n\n\tALIGN(8)\n$Loop:\tmulq\tr2,r19,r3\tC r3 = prod_low\n\tldq\tr5,0(r16)\tC r5 = *res_ptr\n\taddq\tr4,r0,r0\tC cy_limb = cy_limb + 'cy'\n\tsubq\tr18,1,r18\tC size--\n\tumulh\tr2,r19,r4\tC r4 = cy_limb\n\tldq\tr2,0(r17)\tC r2 = s1_limb\n\taddq\tr17,8,r17\tC s1_ptr++\n\taddq\tr3,r0,r3\tC r3 = cy_limb + prod_low\n\tcmpult\tr3,r0,r0\tC r0 = carry from (cy_limb + prod_low)\n\tsubq\tr5,r3,r3\n\tcmpult\tr5,r3,r5\n\tstq\tr3,0(r16)\n\taddq\tr16,8,r16\tC res_ptr++\n\taddq\tr5,r0,r0\tC combine carries\n\tbne\tr18,$Loop\n\n$Lend2:\tmulq\tr2,r19,r3\tC r3 = prod_low\n\tldq\tr5,0(r16)\tC r5 = *res_ptr\n\taddq\tr4,r0,r0\tC cy_limb = cy_limb + 'cy'\n\tumulh\tr2,r19,r4\tC r4 = cy_limb\n\taddq\tr3,r0,r3\tC r3 = cy_limb + prod_low\n\tcmpult\tr3,r0,r0\tC r0 = carry from (cy_limb + prod_low)\n\tsubq\tr5,r3,r3\n\tcmpult\tr5,r3,r5\n\tstq\tr3,0(r16)\n\taddq\tr5,r0,r0\tC combine carries\n\taddq\tr4,r0,r0\tC cy_limb = prod_high + cy\n\tret\tr31,(r26),1\n$Lend1:\tsubq\tr5,r3,r3\n\tcmpult\tr5,r3,r5\n\tstq\tr3,0(r16)\n\taddq\tr0,r5,r0\n\tret\tr31,(r26),1\nEPILOGUE(mpn_submul_1)\nASM_END()\n"
  },
  {
    "path": "mpn/alpha/umul.asm",
    "content": "dnl  mpn_umul_ppmm -- 1x1->2 limb multiplication\n\ndnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2);\nC\n\nASM_START()\nPROLOGUE(mpn_umul_ppmm)\n\tmulq\tr17, r18, r1\n\tumulh\tr17, r18, r0\n\tstq\tr1, 0(r16)\n\tret\tr31, (r26), 1\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/arm/add_n.asm",
    "content": "dnl  ARM mpn_add_n -- Add two limb vectors of the same length > 0 and store sum\ndnl  in a third limb vector.\ndnl  Contributed by Robert Harley.\n\ndnl  Copyright 1997, 2000, 2001 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC This code runs at 5 cycles/limb.\n\ndefine(`rp',`r0')\ndefine(`up',`r1')\ndefine(`vp',`r2')\ndefine(`n',`r3')\n\n\nASM_START()\nPROLOGUE(mpn_add_n)\n\tstmfd\tsp!, { r8, r9, lr }\n\tmovs\tn, n, lsr #1\n\tbcc\tL(skip1)\n\tldr\tr12, [up], #4\n\tldr\tlr, [vp], #4\n\tadds\tr12, r12, lr\n\tstr\tr12, [rp], #4\nL(skip1):\n\ttst\tn, #1\n\tbeq\tL(skip2)\n\tldmia\tup!, { r8, r9 }\n\tldmia\tvp!, { r12, lr }\n\tadcs\tr8, r8, r12\n\tadcs\tr9, r9, lr\n\tstmia\trp!, { r8, r9 }\nL(skip2):\n\tbics\tn, n, #1\n\tbeq\tL(return)\n\tstmfd\tsp!, { r4, r5, r6, r7 }\nL(add_n_loop):\n\tldmia\tup!, { r4, r5, r6, r7 }\n\tldmia\tvp!, { r8, r9, r12, lr }\n\tadcs\tr4, r4, r8\n\tldr\tr8, [rp, #12]\t\t\tC cache allocate\n\tadcs\tr5, r5, r9\n\tadcs\tr6, r6, r12\n\tadcs\tr7, r7, lr\n\tstmia\trp!, { r4, r5, r6, r7 }\n\tsub\tn, n, #2\n\tteq\tn, #0\n\tbne\tL(add_n_loop)\n\tldmfd\tsp!, { r4, r5, r6, r7 }\nL(return):\n\tadc\tr0, n, #0\n\tldmfd\tsp!, { r8, r9, pc }\nEPILOGUE(mpn_add_n)\n"
  },
  {
    "path": "mpn/arm/addmul_1.asm",
    "content": "dnl  ARM mpn_addmul_1 -- Multiply a limb vector with a limb and add the result\ndnl  to a second limb vector.\n\ndnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC            cycles/limb\nC StrongARM:  7.75-9.75  (dependent on vl value)\nC XScale:        8-9     (dependent on vl value, estimated)\n\ndefine(`rp',`r0')\ndefine(`up',`r1')\ndefine(`n',`r2')\ndefine(`vl',`r3')\ndefine(`rl',`r12')\ndefine(`ul',`r6')\ndefine(`r',`lr')\n\n\nASM_START()\nPROLOGUE(mpn_addmul_1)\n\tstmfd\tsp!, { r4-r6, lr }\n\tmov\tr4, #0\t\t\tC clear r4\n\tadds\tr0, r0, #0\t\tC clear cy\n\ttst\tn, #1\n\tbeq\tL(skip1)\n\tldr\tul, [up], #4\n\tldr\trl, [rp, #0]\n\tumull\tr5, r4, ul, vl\n\tadds\tr, rl, r5\n\tstr\tr, [rp], #4\nL(skip1):\n\ttst\tn, #2\n\tbeq\tL(skip2)\n\tldr\tul, [up], #4\n\tldr\trl, [rp, #0]\n\tmov\tr5, #0\n\tumlal\tr4, r5, ul, vl\n\tldr\tul, [up], #4\n\tadcs\tr, rl, r4\n\tldr\trl, [rp, #4]\n\tmov\tr4, #0\n\tumlal\tr5, r4, ul, vl\n\tstr\tr, [rp], #4\n\tadcs\tr, rl, r5\n\tstr\tr, [rp], #4\nL(skip2):\n\tbics\tr, n, #3\n\tbeq\tL(return)\n\n\tldr\tul, [up], #4\n\tldr\trl, [rp, #0]\n\tmov\tr5, #0\n\tumlal\tr4, r5, ul, vl\n\tb\tL(in)\n\nL(loop):\n\tldr\tul, [up], #4\n\tadcs\tr, rl, r5\n\tldr\trl, [rp, #4]\n\tmov\tr5, #0\n\tumlal\tr4, r5, ul, vl\n\tstr\tr, [rp], #4\nL(in):\tldr\tul, [up], #4\n\tadcs\tr, rl, r4\n\tldr\trl, [rp, #4]\n\tmov\tr4, #0\n\tumlal\tr5, r4, ul, vl\n\tstr\tr, [rp], #4\n\tldr\tul, [up], #4\n\tadcs\tr, rl, r5\n\tldr\trl, [rp, #4]\n\tmov\tr5, #0\n\tumlal\tr4, r5, ul, vl\n\tstr\tr, [rp], #4\n\tldr\tul, [up], #4\n\tadcs\tr, rl, r4\n\tldr\trl, [rp, #4]\n\tmov\tr4, #0\n\tumlal\tr5, r4, ul, vl\n\tstr\tr, [rp], #4\n\tsub\tn, n, #4\n\tbics\tr, n, #3\n\tbne\tL(loop)\n\n\tadcs\tr, rl, r5\n\tstr\tr, [rp], #4\nL(return):\n\tadc\tr0, r4, #0\n\tldmfd\tsp!, { r4-r6, pc }\nEPILOGUE(mpn_addmul_1)\n"
  },
  {
    "path": "mpn/arm/arm-defs.m4",
    "content": "divert(-1)\n\ndnl  m4 macros for ARM assembler.\n\ndnl  Copyright 2001 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\n\ndnl  Standard commenting is with @, the default m4 # is for constants and we\ndnl  don't want to disable macro expansions in or after them.\n\nchangecom(@)\n\n\ndnl  APCS register names.\n\ndeflit(a1,r0)\ndeflit(a2,r1)\ndeflit(a3,r2)\ndeflit(a4,r3)\ndeflit(v1,r4)\ndeflit(v2,r5)\ndeflit(v3,r6)\ndeflit(v4,r7)\ndeflit(v5,r8)\ndeflit(v6,r9)\ndeflit(sb,r9)\ndeflit(v7,r10)\ndeflit(sl,r10)\ndeflit(fp,r11)\ndeflit(ip,r12)\ndeflit(sp,r13)\ndeflit(lr,r14)\ndeflit(pc,r15)\n\ndivert\n"
  },
  {
    "path": "mpn/arm/copyd.asm",
    "content": "dnl  ARM mpn_copyd.\n\ndnl  Copyright 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC This runs at 3 cycles/limb in the StrongARM.\n\ndefine(`rp',`r0')\ndefine(`up',`r1')\ndefine(`n',`r2')\n\n\nASM_START()\nPROLOGUE(mpn_copyd)\n\tmov\tr12, n, lsl #2\n\tsub\tr12, r12, #4\n\tadd\trp, rp, r12\t\t\tC make rp point at last limb\n\tadd\tup, up, r12\t\t\tC make up point at last limb\n\n\ttst\tn, #1\n\tbeq\tL(skip1)\n\tldr\tr3, [up], #-4\n\tstr\tr3, [rp], #-4\nL(skip1):\n\ttst\tn, #2\n\tbeq\tL(skip2)\n\tldmda\tup!, { r3, r12 }\t\tC load 2 limbs\n\tstmda\trp!, { r3, r12 }\t\tC store 2 limbs\nL(skip2):\n\tbics\tn, n, #3\n\tbeq\tL(return)\n\tstmfd\tsp!, { r7, r8, r9 }\t\tC save regs on stack\nL(loop):\n\tldmda\tup!, { r3, r8, r9, r12 }\tC load 4 limbs\n\tldr\tr7, [rp, #-12]\t\t\tC cache allocate\n\tsubs\tn, n, #4\n\tstmda\trp!, { r3, r8, r9, r12 }\tC store 4 limbs\n\tbne\tL(loop)\n\tldmfd\tsp!, { r7, r8, r9 }\t\tC restore regs from stack\nL(return):\n\tmov\tpc, lr\nEPILOGUE(mpn_copyd)\n"
  },
  {
    "path": "mpn/arm/copyi.asm",
    "content": "dnl  ARM mpn_copyi.\n\ndnl  Copyright 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC This runs at 3 cycles/limb in the StrongARM.\n\ndefine(`rp',`r0')\ndefine(`up',`r1')\ndefine(`n',`r2')\n\n\nASM_START()\nPROLOGUE(mpn_copyi)\n\ttst\tn, #1\n\tbeq\tL(skip1)\n\tldr\tr3, [up], #4\n\tstr\tr3, [rp], #4\nL(skip1):\n\ttst\tn, #2\n\tbeq\tL(skip2)\n\tldmia\tup!, { r3, r12 }\t\tC load 2 limbs\n\tstmia\trp!, { r3, r12 }\t\tC store 2 limbs\nL(skip2):\n\tbics\tn, n, #3\n\tbeq\tL(return)\n\tstmfd\tsp!, { r7, r8, r9 }\t\tC save regs on stack\nL(loop):\n\tldmia\tup!, { r3, r8, r9, r12 }\tC load 4 limbs\n\tldr\tr7, [rp, #12]\t\t\tC cache allocate\n\tsubs\tn, n, #4\n\tstmia\trp!, { r3, r8, r9, r12 }\tC store 4 limbs\n\tbne\tL(loop)\n\tldmfd\tsp!, { r7, r8, r9 }\t\tC restore regs from stack\nL(return):\n\tmov\tpc, lr\nEPILOGUE(mpn_copyi)\n"
  },
  {
    "path": "mpn/arm/invert_limb.asm",
    "content": "dnl  ARM mpn_invert_limb -- Invert a normalized limb.\n\ndnl  Copyright 2001 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\ndefine(`d',`r0')\tC number to be inverted\n\n\nPROLOGUE(mpn_invert_limb)\n\tstmfd\tsp!, {r4, lr}\n\tmov\tr3, d, lsr #23\n\tsub\tr3, r3, #256\n\tadd\tr2, pc, #invtab-.-8\n\tmov\tr3, r3, lsl #1\n\tldrh\tr1, [r2, r3]\t\tC get initial approximation from table\n\tmov\tr2, r1, lsl #6\t\tC start iteration 1\n\tmul\tip, r2, r2\n\tumull\tlr, r4, ip, d\n\tmov\tr2, r4, lsl #1\n\trsb\tr2, r2, r1, lsl #23\tC iteration 1 complete\n\tumull\tip, r3, r2, r2\t\tC start iteration 2\n\tumull\tlr, r4, r3, d\n\tumull\tr3, r1, ip, d\n\tadds\tlr, lr, r1\n\taddcs\tr4, r4, #1\n\tmov\tr3, lr, lsr #30\n\torr\tr4, r3, r4, lsl #2\n\tmov\tlr, lr, lsl #2\n\tcmn\tlr, #1\n\trsc\tr2, r4, r2, lsl #2\tC iteration 2 complete\n\tumull\tip, r1, d, r2\t\tC start adjustment step\n\tadd\tr1, r1, d\n\tcmn\tr1, #1\n\tbeq\tL(1)\n\tadds\tip, ip, d\n\tadc\tr1, r1, #0\n\tadd\tr2, r2, #1\nL(1):\n\tadds\tr3, ip, d\n\tadcs\tr1, r1, #0\n\tmoveq\tr0, r2\n\taddne\tr0, r2, #1\n\tldmfd\tsp!, {r4, pc}\n\ninvtab:\n\t.short\t1023,1020,1016,1012,1008,1004,1000,996\n\t.short\t992,989,985,981,978,974,970,967\n\t.short\t963,960,956,953,949,946,942,939\n\t.short\t936,932,929,926,923,919,916,913\n\t.short\t910,907,903,900,897,894,891,888\n\t.short\t885,882,879,876,873,870,868,865\n\t.short\t862,859,856,853,851,848,845,842\n\t.short\t840,837,834,832,829,826,824,821\n\t.short\t819,816,814,811,809,806,804,801\n\t.short\t799,796,794,791,789,787,784,782\n\t.short\t780,777,775,773,771,768,766,764\n\t.short\t762,759,757,755,753,751,748,746\n\t.short\t744,742,740,738,736,734,732,730\n\t.short\t728,726,724,722,720,718,716,714\n\t.short\t712,710,708,706,704,702,700,699\n\t.short\t697,695,693,691,689,688,686,684\n\t.short\t682,680,679,677,675,673,672,670\n\t.short\t668,667,665,663,661,660,658,657\n\t.short\t655,653,652,650,648,647,645,644\n\t.short\t642,640,639,637,636,634,633,631\n\t.short\t630,628,627,625,624,622,621,619\n\t.short\t618,616,615,613,612,611,609,608\n\t.short\t606,605,604,602,601,599,598,597\n\t.short\t595,594,593,591,590,589,587,586\n\t.short\t585,583,582,581,579,578,577,576\n\t.short\t574,573,572,571,569,568,567,566\n\t.short\t564,563,562,561,560,558,557,556\n\t.short\t555,554,553,551,550,549,548,547\n\t.short\t546,544,543,542,541,540,539,538\n\t.short\t537,536,534,533,532,531,530,529\n\t.short\t528,527,526,525,524,523,522,521\n\t.short\t520,519,518,517,516,515,514,513\nEPILOGUE(mpn_invert_limb)\n"
  },
  {
    "path": "mpn/arm/longlong_inc.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 Free Software Foundation, Inc.\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#if defined (__GNUC__)\n\n#define __CLOBBER_CC : \"cc\"\n#define __AND_CLOBBER_CC , \"cc\"\n\n#define add_ssaaaa(sh, sl, ah, al, bh, bl) \\\n  __asm__ (\"adds\\t%1, %4, %5\\n\\tadc\\t%0, %2, %3\"\t\t\t\\\n\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t   : \"r\" (ah), \"rI\" (bh), \"%r\" (al), \"rI\" (bl) __CLOBBER_CC)\n#define sub_ddmmss(sh, sl, ah, al, bh, bl) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    if (__builtin_constant_p (al))\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tif (__builtin_constant_p (ah))\t\t\t\t\t\\\n\t  __asm__ (\"rsbs\\t%1, %5, %4\\n\\trsc\\t%0, %3, %2\"\t\t\\\n\t\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\\\n\t\t   : \"rI\" (ah), \"r\" (bh), \"rI\" (al), \"r\" (bl) __CLOBBER_CC); \\\n\telse\t\t\t\t\t\t\t\t\\\n\t  __asm__ (\"rsbs\\t%1, %5, %4\\n\\tsbc\\t%0, %2, %3\"\t\t\\\n\t\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\\\n\t\t   : \"r\" (ah), \"rI\" (bh), \"rI\" (al), \"r\" (bl) __CLOBBER_CC); \\\n      }\t\t\t\t\t\t\t\t\t\\\n    else if (__builtin_constant_p (ah))\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tif (__builtin_constant_p (bl))\t\t\t\t\t\\\n\t  __asm__ (\"subs\\t%1, %4, %5\\n\\trsc\\t%0, %3, %2\"\t\t\\\n\t\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\\\n\t\t   : \"rI\" (ah), \"r\" (bh), \"r\" (al), \"rI\" (bl) __CLOBBER_CC); \\\n\telse\t\t\t\t\t\t\t\t\\\n\t  __asm__ (\"rsbs\\t%1, %5, %4\\n\\trsc\\t%0, %3, %2\"\t\t\\\n\t\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\\\n\t\t   : \"rI\" (ah), \"r\" (bh), \"rI\" (al), \"r\" (bl) __CLOBBER_CC); \\\n      }\t\t\t\t\t\t\t\t\t\\\n    else if (__builtin_constant_p (bl))\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tif (__builtin_constant_p (bh))\t\t\t\t\t\\\n\t  __asm__ (\"subs\\t%1, %4, %5\\n\\tsbc\\t%0, %2, %3\"\t\t\\\n\t\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\\\n\t\t   : \"r\" (ah), \"rI\" (bh), \"r\" (al), \"rI\" (bl) __CLOBBER_CC); \\\n\telse\t\t\t\t\t\t\t\t\\\n\t  __asm__ (\"subs\\t%1, %4, %5\\n\\trsc\\t%0, %3, %2\"\t\t\\\n\t\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\\\n\t\t   : \"rI\" (ah), \"r\" (bh), \"r\" (al), \"rI\" (bl) __CLOBBER_CC); \\\n      }\t\t\t\t\t\t\t\t\t\\\n    else /* only bh might be a constant */\t\t\t\t\\\n      __asm__ (\"subs\\t%1, %4, %5\\n\\tsbc\\t%0, %2, %3\"\t\t\t\\\n\t       : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t       : \"r\" (ah), \"rI\" (bh), \"r\" (al), \"rI\" (bl) __CLOBBER_CC);\\\n    } while (0)\n#if 1 || defined (__arm_m__)\t/* `M' series has widening multiply support */\n#define umul_ppmm(xh, xl, a, b) \\\n  __asm__ (\"umull %0,%1,%2,%3\" : \"=&r\" (xl), \"=&r\" (xh) : \"r\" (a), \"r\" (b))\n#define smul_ppmm(xh, xl, a, b) \\\n  __asm__ (\"smull %0,%1,%2,%3\" : \"=&r\" (xl), \"=&r\" (xh) : \"r\" (a), \"r\" (b))\n#ifndef LONGLONG_STANDALONE\n#define udiv_qrnnd(q, r, n1, n0, d) \\\n  do { UWtype __di;\t\t\t\t\t\t\t\\\n    __di = __MPN(invert_limb) (d);\t\t\t\t\t\\\n    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);\t\t\t\t\\\n  } while (0)\n#define UDIV_PREINV_ALWAYS  1\n#define UDIV_NEEDS_NORMALIZATION 1\n#endif /* LONGLONG_STANDALONE */\n#else\n#define umul_ppmm(xh, xl, a, b) \\\n  __asm__ (\"%@ Inlined umul_ppmm\\n\"\t\t\t\t\t\\\n\"\tmov\t%|r0, %2, lsr #16\\n\"\t\t\t\t\t\\\n\"\tmov\t%|r2, %3, lsr #16\\n\"\t\t\t\t\t\\\n\"\tbic\t%|r1, %2, %|r0, lsl #16\\n\"\t\t\t\t\\\n\"\tbic\t%|r2, %3, %|r2, lsl #16\\n\"\t\t\t\t\\\n\"\tmul\t%1, %|r1, %|r2\\n\"\t\t\t\t\t\\\n\"\tmul\t%|r2, %|r0, %|r2\\n\"\t\t\t\t\t\\\n\"\tmul\t%|r1, %0, %|r1\\n\"\t\t\t\t\t\\\n\"\tmul\t%0, %|r0, %0\\n\"\t\t\t\t\t\t\\\n\"\tadds\t%|r1, %|r2, %|r1\\n\"\t\t\t\t\t\\\n\"\taddcs\t%0, %0, #65536\\n\"\t\t\t\t\t\\\n\"\tadds\t%1, %1, %|r1, lsl #16\\n\"\t\t\t\t\\\n\"\tadc\t%0, %0, %|r1, lsr #16\"\t\t\t\t\t\\\n\t   : \"=&r\" (xh), \"=r\" (xl)\t\t\t\t\t\\\n\t   : \"r\" (a), \"r\" (b)\t\t\t\t\t\t\\\n\t   : \"r0\", \"r1\", \"r2\")\n#ifndef LONGLONG_STANDALONE\n#define udiv_qrnnd(q, r, n1, n0, d) \\\n  do { UWtype __r;\t\t\t\t\t\t\t\\\n    (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d));\t\t\t\\\n    (r) = __r;\t\t\t\t\t\t\t\t\\\n  } while (0)\nextern UWtype __MPN(udiv_qrnnd)(UWtype *, UWtype, UWtype, UWtype);\n#endif /* LONGLONG_STANDALONE */\n#endif\n\n#endif\n"
  },
  {
    "path": "mpn/arm/mul_1.asm",
    "content": "dnl  ARM mpn_mul_1 -- Multiply a limb vector with a limb and store the result\ndnl  in a second limb vector.\ndnl  Contributed by Robert Harley.\n\ndnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC            cycles/limb\nC StrongARM:     6-8  (dependent on vl value)\nC XScale:        ?-?\n\nC We should rewrite this along the lines of addmul_1.asm.  That should save a\nC cycle on StrongARM, and several cycles on XScale.\n\ndefine(`rp',`r0')\ndefine(`up',`r1')\ndefine(`n',`r2')\ndefine(`vl',`r3')\n\n\nASM_START()\nPROLOGUE(mpn_mul_1)\n\tstmfd\tsp!, { r8, r9, lr }\n\tands\tr12, n, #1\n\tbeq\tL(skip1)\n\tldr\tlr, [up], #4\n\tumull\tr9, r12, lr, vl\n\tstr\tr9, [rp], #4\nL(skip1):\n\ttst\tn, #2\n\tbeq\tL(skip2)\n\tmov\tr8, r12\n\tldmia\tup!, { r12, lr }\n\tmov\tr9, #0\n\tumlal\tr8, r9, r12, vl\n\tmov\tr12, #0\n\tumlal\tr9, r12, lr, vl\n\tstmia\trp!, { r8, r9 }\nL(skip2):\n\tbics\tn, n, #3\n\tbeq\tL(return)\n\tstmfd\tsp!, { r6, r7 }\nL(loop):\n\tmov\tr6, r12\n\tldmia\tup!, { r8, r9, r12, lr }\n\tldr\tr7, [rp, #12]\t\t\tC cache allocate\n\tmov\tr7, #0\n\tumlal\tr6, r7, r8, vl\n\tmov\tr8, #0\n\tumlal\tr7, r8, r9, vl\n\tmov\tr9, #0\n\tumlal\tr8, r9, r12, vl\n\tmov\tr12, #0\n\tumlal\tr9, r12, lr, vl\n\tsubs\tn, n, #4\n\tstmia\trp!, { r6, r7, r8, r9 }\n\tbne\tL(loop)\n\tldmfd\tsp!, { r6, r7 }\nL(return):\n\tmov\tr0, r12\n\tldmfd\tsp!, { r8, r9, pc }\nEPILOGUE(mpn_mul_1)\n"
  },
  {
    "path": "mpn/arm/sub_n.asm",
    "content": "dnl  ARM mpn_sub_n -- Subtract two limb vectors of the same length > 0 and\ndnl  store difference in a third limb vector.\ndnl  Contributed by Robert Harley.\n\ndnl  Copyright 1997, 2000, 2001 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC This code runs at 5 cycles/limb.\n\ndefine(`rp',`r0')\ndefine(`up',`r1')\ndefine(`vp',`r2')\ndefine(`n',`r3')\n\n\nASM_START()\nPROLOGUE(mpn_sub_n)\n\tstmfd\tsp!, { r8, r9, lr }\n\tsubs\tr12, r12, r12\n\ttst\tn, #1\n\tbeq\tL(skip1)\n\tldr\tr12, [up], #4\n\tldr\tlr, [vp], #4\n\tsubs\tr12, r12, lr\n\tstr\tr12, [rp], #4\nL(skip1):\n\ttst\tn, #2\n\tbeq\tL(skip2)\n\tldmia\tup!, { r8, r9 }\n\tldmia\tvp!, { r12, lr }\n\tsbcs\tr8, r8, r12\n\tsbcs\tr9, r9, lr\n\tstmia\trp!, { r8, r9 }\nL(skip2):\n\tbics\tn, n, #3\n\tbeq\tL(return)\n\tstmfd\tsp!, { r4, r5, r6, r7 }\nL(sub_n_loop):\n\tldmia\tup!, { r4, r5, r6, r7 }\n\tldmia\tvp!, { r8, r9, r12, lr }\n\tsbcs\tr4, r4, r8\n\tldr\tr8, [rp, #12]\t\t\tC cache allocate\n\tsbcs\tr5, r5, r9\n\tsbcs\tr6, r6, r12\n\tsbcs\tr7, r7, lr\n\tstmia\trp!, { r4, r5, r6, r7 }\n\tsub\tn, n, #4\n\tteq\tn, #0\n\tbne\tL(sub_n_loop)\n\tldmfd\tsp!, { r4, r5, r6, r7 }\nL(return):\n\tsbc\tr0, r0, r0\n\tand\tr0, r0, #1\n\tldmfd\tsp!, { r8, r9, pc }\nEPILOGUE(mpn_sub_n)\n"
  },
  {
    "path": "mpn/arm/submul_1.asm",
    "content": "dnl  ARM mpn_submul_1 -- Multiply a limb vector with a limb and subtract the\ndnl  result from a second limb vector.\n\ndnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC            cycles/limb\nC StrongARM:  7.75-9.75  (dependent on vl value)\nC XScale:        8-9     (dependent on vl value, estimated)\n\ndefine(`rp',`r0')\ndefine(`up',`r1')\ndefine(`n',`r2')\ndefine(`vl',`r3')\ndefine(`rl',`r12')\ndefine(`ul',`r6')\ndefine(`r',`lr')\n\n\nASM_START()\nPROLOGUE(mpn_submul_1)\n\tstmfd\tsp!, { r4-r6, lr }\n\tsubs\tr4, r0, r0\t\tC clear r4, set cy\n\ttst\tn, #1\n\tbeq\tL(skip1)\n\tldr\tul, [up], #4\n\tldr\trl, [rp, #0]\n\tumull\tr5, r4, ul, vl\n\tsubs\tr, rl, r5\n\tstr\tr, [rp], #4\nL(skip1):\n\ttst\tn, #2\n\tbeq\tL(skip2)\n\tldr\tul, [up], #4\n\tldr\trl, [rp, #0]\n\tmov\tr5, #0\n\tumlal\tr4, r5, ul, vl\n\tldr\tul, [up], #4\n\tsbcs\tr, rl, r4\n\tldr\trl, [rp, #4]\n\tmov\tr4, #0\n\tumlal\tr5, r4, ul, vl\n\tstr\tr, [rp], #4\n\tsbcs\tr, rl, r5\n\tstr\tr, [rp], #4\nL(skip2):\n\tbics\tr, n, #3\n\tbeq\tL(return)\n\n\tldr\tul, [up], #4\n\tldr\trl, [rp, #0]\n\tmov\tr5, #0\n\tumlal\tr4, r5, ul, vl\n\tb\tL(in)\n\nL(loop):\n\tldr\tul, [up], #4\n\tsbcs\tr, rl, r5\n\tldr\trl, [rp, #4]\n\tmov\tr5, #0\n\tumlal\tr4, r5, ul, vl\n\tstr\tr, [rp], #4\nL(in):\tldr\tul, [up], #4\n\tsbcs\tr, rl, r4\n\tldr\trl, [rp, #4]\n\tmov\tr4, #0\n\tumlal\tr5, r4, ul, vl\n\tstr\tr, [rp], #4\n\tldr\tul, [up], #4\n\tsbcs\tr, rl, r5\n\tldr\trl, [rp, #4]\n\tmov\tr5, #0\n\tumlal\tr4, r5, ul, vl\n\tstr\tr, [rp], #4\n\tldr\tul, [up], #4\n\tsbcs\tr, rl, r4\n\tldr\trl, [rp, #4]\n\tmov\tr4, #0\n\tumlal\tr5, r4, ul, vl\n\tstr\tr, [rp], #4\n\tsub\tn, n, #4\n\tbics\tr, n, #3\n\tbne\tL(loop)\n\n\tsbcs\tr, rl, r5\n\tstr\tr, [rp], #4\nL(return):\n\tsbc\tr0, r0, r0\n\tsub\tr0, r4, r0\n\tldmfd\tsp!, { r4-r6, pc }\nEPILOGUE(mpn_submul_1)\n"
  },
  {
    "path": "mpn/arm/udiv.asm",
    "content": "dnl  ARM mpn_udiv_qrnnd -- divide a two limb dividend and a one limb divisor.\ndnl  Return quotient and store remainder through a supplied pointer.\n\ndnl  Copyright 2001 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\ndefine(`rem_ptr',`r0')\ndefine(`n1',`r1')\ndefine(`n0',`r2')\ndefine(`d',`r3')\n\nC divstep -- develop one quotient bit.  Dividend in $1$2, divisor in $3.\nC Quotient bit is shifted into $2.\ndefine(`divstep',\n       `adcs\t$2, $2, $2\n\tadc\t$1, $1, $1\n\tcmp\t$1, $3\n\tsubcs\t$1, $1, $3')\n\nASM_START()\nPROLOGUE(mpn_udiv_qrnnd)\n\tmov\tr12, #8\t\t\tC loop counter for both loops below\n\tcmp\td, #0x80000000\t\tC check divisor msb and clear carry\n\tbcs\tL(_large_divisor)\n\nL(oop):\tdivstep(n1,n0,d)\n\tdivstep(n1,n0,d)\n\tdivstep(n1,n0,d)\n\tdivstep(n1,n0,d)\n\tsub\tr12, r12, #1\n\tteq\tr12, #0\n\tbne\tL(oop)\n\n\tstr\tn1, [ rem_ptr ]\t\tC store remainder\n\tadc\tr0, n0, n0\t\tC quotient: add last carry from divstep\n\tmov\tpc, lr\n\nL(_large_divisor):\n\tstmfd\tsp!, { r8, lr }\n\n\tand\tr8, n0, #1\t\tC save lsb of dividend\n\tmov\tlr, n1, lsl #31\n\torrs\tn0, lr, n0, lsr #1\tC n0 = lo(n1n0 >> 1)\n\tmov\tn1, n1, lsr #1\t\tC n1 = hi(n1n0 >> 1)\n\n\tand\tlr, d, #1\t\tC save lsb of divisor\n\tmovs\td, d, lsr #1\t\tC d = floor(orig_d / 2)\n\tadc\td, d, #0\t\tC d = ceil(orig_d / 2)\n\nL(oop2):\n\tdivstep(n1,n0,d)\n\tdivstep(n1,n0,d)\n\tdivstep(n1,n0,d)\n\tdivstep(n1,n0,d)\n\tsub\tr12, r12, #1\n\tteq\tr12, #0\n\tbne\tL(oop2)\n\n\tadc\tn0, n0, n0\t\tC shift and add last carry from divstep\n\tadd\tn1, r8, n1, lsl #1\tC shift in omitted dividend lsb\n\ttst\tlr, lr\t\t\tC test saved divisor lsb\n\tbeq\tL(_even_divisor)\n\n\trsb\td, lr, d, lsl #1\tC restore orig d value\n\tadds\tn1, n1, n0\t\tC fix remainder for omitted divisor lsb\n\taddcs\tn0, n0, #1\t\tC adjust quotient if rem. fix carried\n\tsubcs\tn1, n1, d\t\tC adjust remainder accordingly\n\tcmp\tn1, d\t\t\tC remainder >= divisor?\n\tsubcs\tn1, n1, d\t\tC adjust remainder\n\taddcs\tn0, n0, #1\t\tC adjust quotient\n\nL(_even_divisor):\n\tstr\tn1, [ rem_ptr ]\t\tC store remainder\n\tmov\tr0, n0\t\t\tC quotient\n\tldmfd\tsp!, { r8, pc }\nEPILOGUE(mpn_udiv_qrnnd)\n"
  },
  {
    "path": "mpn/asm-defs.m4",
    "content": "divert(-1)\ndnl\ndnl  m4 macros for gmp assembly code, shared by all CPUs.\n\ndnl  Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software\ndnl  Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\n\ndnl  These macros are designed for use with any m4 and have been used on\ndnl  GNU, FreeBSD, NetBSD, OpenBSD and SysV.\ndnl\ndnl  GNU m4 and OpenBSD 2.7 m4 will give filenames and line numbers in error\ndnl  messages.\ndnl\ndnl\ndnl  Macros:\ndnl\ndnl  Most new m4 specific macros have an \"m4_\" prefix to emphasise they're\ndnl  m4 expansions.  But new defining things like deflit() and defreg() are\ndnl  named like the builtin define(), and forloop() is named following the\ndnl  GNU m4 example on which it's based.\ndnl\ndnl  GNU m4 with the -P option uses \"m4_\" as a prefix for builtins, but that\ndnl  option isn't going to be used, so there's no conflict or confusion.\ndnl\ndnl\ndnl  Comments in output:\ndnl\ndnl  The m4 comment delimiters are left at # and \\n, the normal assembler\ndnl  commenting for most CPUs.  m4 passes comment text through without\ndnl  expanding macros in it, which is generally a good thing since it stops\ndnl  unexpected expansions and possible resultant errors.\ndnl\ndnl  But note that when a quoted string is being read, a # isn't special, so\ndnl  apostrophes in comments in quoted strings must be avoided or they'll be\ndnl  interpreted as a closing quote mark.  But when the quoted text is\ndnl  re-read # will still act like a normal comment, supressing macro\ndnl  expansion.\ndnl\ndnl  For example,\ndnl\ndnl          # apostrophes in comments that're outside quotes are ok\ndnl          # and using macro names like PROLOGUE is ok too\ndnl          ...\ndnl          ifdef(`PIC',`\ndnl                  # but apostrophes aren't ok inside quotes\ndnl                  #                     ^--wrong\ndnl                  ...\ndnl                  # though macro names like PROLOGUE are still ok\ndnl                  ...\ndnl          ')\ndnl\ndnl  If macro expansion in a comment is wanted, use `#' in the .asm (ie. a\ndnl  quoted hash symbol), which will turn into # in the .s but get\ndnl  expansions done on that line.  This can make the .s more readable to\ndnl  humans, but it won't make a blind bit of difference to the assembler.\ndnl\ndnl  All the above applies, mutatis mutandis, when changecom() is used to\ndnl  select @ ! ; or whatever other commenting.\ndnl\ndnl\ndnl  Variations in m4 affecting gmp:\ndnl\ndnl  $# - When a macro is called as \"foo\" with no brackets, BSD m4 sets $#\ndnl       to 1, whereas GNU or SysV m4 set it to 0.  In all cases though\ndnl       \"foo()\" sets $# to 1.  This is worked around in various places.\ndnl\ndnl  len() - When \"len()\" is given an empty argument, BSD m4 evaluates to\ndnl       nothing, whereas GNU, SysV, and the new OpenBSD, evaluate to 0.\ndnl       See m4_length() below which works around this.\ndnl\ndnl  translit() - GNU m4 accepts character ranges like A-Z, and the new\ndnl       OpenBSD m4 does under option -g, but basic BSD and SysV don't.\ndnl\ndnl  popdef() - in BSD and SysV m4 popdef() takes multiple arguments and\ndnl       pops each, but GNU m4 only takes one argument.\ndnl\ndnl  push back - BSD m4 has some limits on the amount of text that can be\ndnl       pushed back.  The limit is reasonably big and so long as macros\ndnl       don't gratuitously duplicate big arguments it isn't a problem.\ndnl       Normally an error message is given, but sometimes it just hangs.\ndnl\ndnl  eval() &,|,^ - GNU and SysV m4 have bitwise operators &,|,^ available,\ndnl       but BSD m4 doesn't (contrary to what the man page suggests) and\ndnl       instead ^ is exponentiation.\ndnl\ndnl  eval() ?: - The C ternary operator \"?:\" is available in BSD m4, but not\ndnl       in SysV or GNU m4 (as of GNU m4 1.4 and betas of 1.5).\ndnl\ndnl  eval() -2^31 - BSD m4 has a bug where an eval() resulting in -2^31\ndnl       (ie. -2147483648) gives \"-(\".  Using -2147483648 within an\ndnl       expression is ok, it just can't be a final result.  \"-(\" will of\ndnl       course upset parsing, with all sorts of strange effects.\ndnl\ndnl  eval() <<,>> - SysV m4 doesn't support shift operators in eval() (on\ndnl       Solaris 7 /usr/xpg4/m4 has them but /usr/ccs/m4 doesn't).  See\ndnl       m4_lshift() and m4_rshift() below for workarounds.\ndnl\ndnl  ifdef() - OSF 4.0 m4 considers a macro defined to a zero value `0' or\ndnl       `00' etc as not defined.  See m4_ifdef below for a workaround.\ndnl\ndnl  m4wrap() sequence - in BSD m4, m4wrap() replaces any previous m4wrap()\ndnl       string, in SysV m4 it appends to it, and in GNU m4 it prepends.\ndnl       See m4wrap_prepend() below which brings uniformity to this.\ndnl\ndnl  m4wrap() 0xFF - old versions of BSD m4 store EOF in a C \"char\" under an\ndnl       m4wrap() and on systems where char is unsigned by default a\ndnl       spurious 0xFF is output.  This has been observed on recent Cray\ndnl       Unicos Alpha, Apple MacOS X, and HPUX 11 systems.  An autoconf\ndnl       test is used to check for this, see the m4wrap handling below.  It\ndnl       might work to end the m4wrap string with a dnl to consume the\ndnl       0xFF, but that probably induces the offending m4's to read from an\ndnl       already closed \"FILE *\", which could be bad on a glibc style\ndnl       stdio.\ndnl\ndnl  __file__,__line__ - GNU m4 and OpenBSD 2.7 m4 provide these, and\ndnl       they're used here to make error messages more informative.  GNU m4\ndnl       gives an unhelpful \"NONE 0\" in an m4wrap(), but that's worked\ndnl       around.\ndnl\ndnl  __file__ quoting - OpenBSD m4, unlike GNU m4, doesn't quote the\ndnl       filename in __file__, so care should be taken that no macro has\ndnl       the same name as a file, or an unwanted expansion will occur when\ndnl       printing an error or warning.\ndnl\ndnl  changecom() - BSD m4 changecom doesn't quite work like the man page\ndnl       suggests, in particular \"changecom\" or \"changecom()\" doesn't\ndnl       disable the comment feature, and multi-character comment sequences\ndnl       don't seem to work.  If the default `#' and newline aren't\ndnl       suitable it's necessary to change it to something else,\ndnl       eg. changecom(;).\ndnl\ndnl  OpenBSD 2.6 m4 - in this m4, eval() rejects decimal constants containing\ndnl       an 8 or 9, making it pretty much unusable.  The bug is confined to\ndnl       version 2.6 (it's not in 2.5, and was fixed in 2.7).\ndnl\ndnl  SunOS /usr/bin/m4 - this m4 lacks a number of desired features,\ndnl       including $# and $@, defn(), m4exit(), m4wrap(), pushdef(),\ndnl       popdef().  /usr/5bin/m4 is a SysV style m4 which should always be\ndnl       available, and \"configure\" will reject /usr/bin/m4 in favour of\ndnl       /usr/5bin/m4 (if necessary).\ndnl\ndnl       The sparc code actually has modest m4 requirements currently and\ndnl       could manage with /usr/bin/m4, but there's no reason to put our\ndnl       macros through contortions when /usr/5bin/m4 is available or GNU\ndnl       m4 can be installed.\n\n\nifdef(`__ASM_DEFS_M4_INCLUDED__',\n`m4_error(`asm-defs.m4 already included, dont include it twice\n')m4exit(1)')\ndefine(`__ASM_DEFS_M4_INCLUDED__')\n\n\ndnl  Detect and give a message about the unsuitable OpenBSD 2.6 m4.\n\nifelse(eval(89),89,,\n`errprint(\n`This m4 doesnt accept 8 and/or 9 in constants in eval(), making it unusable.\nThis is probably OpenBSD 2.6 m4 (September 1999).  Upgrade to OpenBSD 2.7,\nor get a bug fix from the CVS (expr.c rev 1.9), or get GNU m4.  Dont forget\nto configure with M4=/wherever/m4 if you install one of these in a directory\nnot in $PATH.\n')m4exit(1)')\n\n\ndnl  Detect and give a message about the unsuitable SunOS /usr/bin/m4.\ndnl\ndnl  Unfortunately this test doesn't work when m4 is run in the normal way\ndnl  from mpn/Makefile with \"m4 -DOPERATION_foo foo.asm\", since the bad m4\ndnl  takes \"-\" in \"-D...\" to mean read stdin, so it will look like it just\ndnl  hangs.  But running \"m4 asm-defs.m4\" to try it out will work.\ndnl\ndnl  We'd like to abort immediately on finding a problem, but unfortunately\ndnl  the bad m4 doesn't have an m4exit(), nor does an invalid eval() kill\ndnl  it.  Unexpanded $#'s in some m4_assert_numargs() later on will comment\ndnl  out some closing parentheses and kill it with \"m4: arg stack overflow\".\n\ndefine(m4_dollarhash_works_test,``$#'')\nifelse(m4_dollarhash_works_test(x),1,,\n`errprint(\n`This m4 doesnt support $# and cant be used for GMP asm processing.\nIf this is on SunOS, ./configure should choose /usr/5bin/m4 if you have that\nor can get it, otherwise install GNU m4.  Dont forget to configure with\nM4=/wherever/m4 if you install in a directory not in $PATH.\n')')\nundefine(`m4_dollarhash_works_test')\n\n\ndnl  --------------------------------------------------------------------------\ndnl  Basic error handling things.\n\n\ndnl  Usage: m4_dollarhash_1_if_noparen_p\ndnl\ndnl  Expand to 1 if a call \"foo\" gives $# set to 1 (as opposed to 0 like GNU\ndnl  and SysV m4 give).\n\ndefine(m4_dollarhash_1_if_noparen_test,`$#')\ndefine(m4_dollarhash_1_if_noparen_p,\neval(m4_dollarhash_1_if_noparen_test==1))\nundefine(`m4_dollarhash_1_if_noparen_test')\n\n\ndnl  Usage: m4wrap_prepend(string)\ndnl\ndnl  Prepend the given string to what will be exapanded under m4wrap at the\ndnl  end of input.\ndnl\ndnl  This macro exists to work around variations in m4wrap() behaviour in\ndnl  the various m4s (notes at the start of this file).  Don't use m4wrap()\ndnl  directly since it will interfere with this scheme.\n\ndefine(m4wrap_prepend,\nm4_assert_numargs(1)\n`define(`m4wrap_string',`$1'defn(`m4wrap_string'))')\n\ndefine(m4wrap_string,`')\n\ndefine(m4wrap_works_p,\n`ifelse(M4WRAP_SPURIOUS,yes,0,1)')\n\nifelse(m4wrap_works_p,1,\n`m4wrap(`m4wrap_string')')\n\n\ndnl  Usage: m4_file_and_line\ndnl\ndnl  Expand to the current file and line number, if the GNU m4 extensions\ndnl  __file__ and __line__ are available.\ndnl\ndnl  In GNU m4 1.4 at the end of input when m4wrap text is expanded,\ndnl  __file__ is NONE and __line__ is 0, which is not a helpful thing to\ndnl  print.  If m4_file_seen() has been called to note the last file seen,\ndnl  then that file at a big line number is used, otherwise \"end of input\"\ndnl  is used (although \"end of input\" won't parse as an error message).\n\ndefine(m4_file_and_line,\n`ifdef(`__file__',\n`ifelse(__file__`'__line__,`NONE0',\n`ifdef(`m4_file_seen_last',`m4_file_seen_last: 999999: ',`end of input: ')',\n`__file__: __line__: ')')')\n\n\ndnl  Usage: m4_errprint_commas(arg,...)\ndnl\ndnl  The same as errprint(), but commas are printed between arguments\ndnl  instead of spaces.\n\ndefine(m4_errprint_commas,\n`errprint(`$1')dnl\nifelse(eval($#>1),1,`errprint(`,')m4_errprint_commas(shift($@))')')\n\n\ndnl  Usage: m4_error(args...)\ndnl         m4_warning(args...)\ndnl\ndnl  Print an error message, using m4_errprint_commas, prefixed with the\ndnl  current filename and line number (if available).  m4_error sets up to\ndnl  give an error exit at the end of processing, m4_warning just prints.\ndnl  These macros are the recommended way to print errors.\ndnl\ndnl  The arguments here should be quoted in the usual way to prevent them\ndnl  being expanded when the macro call is read.  (m4_error takes care not\ndnl  to do any further expansion.)\ndnl\ndnl  For example,\ndnl\ndnl         m4_error(`some error message\ndnl         ')\ndnl\ndnl  which prints\ndnl\ndnl         foo.asm:123: some error message\ndnl\ndnl  or if __file__ and __line__ aren't available\ndnl\ndnl         some error message\ndnl\ndnl  The \"file:line:\" format is a basic style, used by gcc and GNU m4, so\ndnl  emacs and other editors will recognise it in their normal error message\ndnl  parsing.\n\ndefine(m4_warning,\n`m4_errprint_commas(m4_file_and_line`'$@)')\n\ndefine(m4_error,\n`define(`m4_error_occurred',1)m4_warning($@)dnl\nifelse(m4wrap_works_p,0,`m4exit(1)')')\n\ndefine(`m4_error_occurred',0)\n\ndnl  This m4wrap_prepend() is first, so it'll be executed last.\nm4wrap_prepend(\n`ifelse(m4_error_occurred,1,\n`m4_error(`Errors occurred during m4 processing\n')m4exit(1)')')\n\n\ndnl  Usage: m4_assert_numargs(num)\ndnl\ndnl  Put this unquoted on a line on its own at the start of a macro\ndnl  definition to add some code to check that num many arguments get passed\ndnl  to the macro.  For example,\ndnl\ndnl         define(foo,\ndnl         m4_assert_numargs(2)\ndnl         `something `$1' and `$2' blah blah')\ndnl\ndnl  Then a call like foo(one,two,three) will provoke an error like\ndnl\ndnl         file:10: foo expected 2 arguments, got 3 arguments\ndnl\ndnl  Here are some calls and how many arguments they're interpreted as passing.\ndnl\ndnl         foo(abc,def)  2\ndnl         foo(xyz)      1\ndnl         foo()         0\ndnl         foo          -1\ndnl\ndnl  The -1 for no parentheses at all means a macro that's meant to be used\ndnl  that way can be checked with m4_assert_numargs(-1).  For example,\ndnl\ndnl         define(SPECIAL_SUFFIX,\ndnl         m4_assert_numargs(-1)\ndnl         `ifdef(`FOO',`_foo',`_bar')')\ndnl\ndnl  But as an alternative see also deflit() below where parenthesized\ndnl  expressions following a macro are passed through to the output.\ndnl\ndnl  Note that in BSD m4 there's no way to differentiate calls \"foo\" and\ndnl  \"foo()\", so in BSD m4 the distinction between the two isn't enforced.\ndnl  (In GNU and SysV m4 it can be checked, and is.)\n\n\ndnl  m4_assert_numargs is able to check its own arguments by calling\ndnl  assert_numargs_internal directly.\ndnl\ndnl  m4_doublequote($`'0) expands to ``$0'', whereas ``$`'0'' would expand\ndnl  to `$`'0' and do the wrong thing, and likewise for $1.  The same is\ndnl  done in other assert macros.\ndnl\ndnl  $`#' leaves $# in the new macro being defined, and stops # being\ndnl  interpreted as a comment character.\ndnl\ndnl  `dnl ' means an explicit dnl isn't necessary when m4_assert_numargs is\ndnl  used.  The space means that if there is a dnl it'll still work.\n\ndnl  Usage: m4_doublequote(x) expands to ``x''\ndefine(m4_doublequote,\n`m4_assert_numargs_internal(`$0',1,$#,len(`$1'))``$1''')\n\ndefine(m4_assert_numargs,\n`m4_assert_numargs_internal(`$0',1,$#,len(`$1'))dnl\n`m4_assert_numargs_internal'(m4_doublequote($`'0),$1,$`#',`len'(m4_doublequote($`'1)))`dnl '')\n\ndnl  Called: m4_assert_numargs_internal(`macroname',wantargs,$#,len(`$1'))\ndefine(m4_assert_numargs_internal,\n`m4_assert_numargs_internal_check(`$1',`$2',m4_numargs_count(`$3',`$4'))')\n\ndnl  Called: m4_assert_numargs_internal_check(`macroname',wantargs,gotargs)\ndnl\ndnl  If m4_dollarhash_1_if_noparen_p (BSD m4) then gotargs can be 0 when it\ndnl  should be -1.  If wantargs is -1 but gotargs is 0 and the two can't be\ndnl  distinguished then it's allowed to pass.\ndnl\ndefine(m4_assert_numargs_internal_check,\n`ifelse(eval($2 == $3\n             || ($2==-1 && $3==0 && m4_dollarhash_1_if_noparen_p)),0,\n`m4_error(`$1 expected 'm4_Narguments(`$2')`, got 'm4_Narguments(`$3')\n)')')\n\ndnl  Called: m4_numargs_count($#,len(`$1'))\ndnl  If $#==0 then -1 args, if $#==1 but len(`$1')==0 then 0 args, otherwise\ndnl  $# args.\ndefine(m4_numargs_count,\n`ifelse($1,0, -1,\n`ifelse(eval($1==1 && $2-0==0),1, 0, $1)')')\n\ndnl  Usage: m4_Narguments(N)\ndnl  \"$1 argument\" or \"$1 arguments\" with the plural according to $1.\ndefine(m4_Narguments,\n`$1 argument`'ifelse(`$1',1,,s)')\n\n\ndnl  --------------------------------------------------------------------------\ndnl  Additional error checking things.\n\n\ndnl  Usage: m4_file_seen()\ndnl\ndnl  Record __file__ for the benefit of m4_file_and_line in m4wrap text.\ndnl\ndnl  The basic __file__ macro comes out quoted in GNU m4, like `foo.asm',\ndnl  and m4_file_seen_last is defined like that too.\ndnl\ndnl  This is used by PROLOGUE, since that's normally in the main .asm file,\ndnl  and in particular it sets up m4wrap error checks for missing EPILOGUE.\n\ndefine(m4_file_seen,\nm4_assert_numargs(0)\n`ifelse(__file__,`NONE',,\n`define(`m4_file_seen_last',m4_doublequote(__file__))')')\n\n\ndnl  Usage: m4_assert_onearg()\ndnl\ndnl  Put this, unquoted, at the start of a macro definition to add some code\ndnl  to check that one argument is passed to the macro, but with that\ndnl  argument allowed to be empty.  For example,\ndnl\ndnl          define(foo,\ndnl          m4_assert_onearg()\ndnl          `blah blah $1 blah blah')\ndnl\ndnl  Calls \"foo(xyz)\" or \"foo()\" are accepted.  A call \"foo(xyz,abc)\" fails.\ndnl  A call \"foo\" fails too, but BSD m4 can't detect this case (GNU and SysV\ndnl  m4 can).\n\ndefine(m4_assert_onearg,\nm4_assert_numargs(0)\n`m4_assert_onearg_internal'(m4_doublequote($`'0),$`#')`dnl ')\n\ndnl  Called: m4_assert_onearg(`macroname',$#)\ndefine(m4_assert_onearg_internal,\n`ifelse($2,1,,\n`m4_error(`$1 expected 1 argument, got 'm4_Narguments(`$2')\n)')')\n\n\ndnl  Usage: m4_assert_numargs_range(low,high)\ndnl\ndnl  Put this, unquoted, at the start of a macro definition to add some code\ndnl  to check that between low and high many arguments get passed to the\ndnl  macro.  For example,\ndnl\ndnl         define(foo,\ndnl         m4_assert_numargs_range(3,5)\ndnl         `mandatory $1 $2 $3 optional $4 $5 end')\ndnl\ndnl  See m4_assert_numargs() for more info.\n\ndefine(m4_assert_numargs_range,\nm4_assert_numargs(2)\n``m4_assert_numargs_range_internal'(m4_doublequote($`'0),$1,$2,$`#',`len'(m4_doublequote($`'1)))`dnl '')\n\ndnl  Called: m4_assert_numargs_range_internal(`name',low,high,$#,len(`$1'))\ndefine(m4_assert_numargs_range_internal,\nm4_assert_numargs(5)\n`m4_assert_numargs_range_check(`$1',`$2',`$3',m4_numargs_count(`$4',`$5'))')\n\ndnl  Called: m4_assert_numargs_range_check(`name',low,high,gotargs)\ndnl\ndnl  If m4_dollarhash_1_if_noparen_p (BSD m4) then gotargs can be 0 when it\ndnl  should be -1.  To ensure a `high' of -1 works, a fudge is applied to\ndnl  gotargs if it's 0 and the 0 and -1 cases can't be distinguished.\ndnl\ndefine(m4_assert_numargs_range_check,\nm4_assert_numargs(4)\n`ifelse(eval($2 <= $4 &&\n             ($4 - ($4==0 && m4_dollarhash_1_if_noparen_p) <= $3)),0,\n`m4_error(`$1 expected $2 to $3 arguments, got 'm4_Narguments(`$4')\n)')')\n\n\ndnl  Usage: m4_assert_defined(symbol)\ndnl\ndnl  Put this unquoted on a line of its own at the start of a macro\ndnl  definition to add some code to check that the given symbol is defined\ndnl  when the macro is used.  For example,\ndnl\ndnl          define(foo,\ndnl          m4_assert_defined(`FOO_PREFIX')\ndnl          `FOO_PREFIX whatever')\ndnl\ndnl  This is a convenient way to check that the user or ./configure or\ndnl  whatever has defined the things needed by a macro, as opposed to\ndnl  silently generating garbage.\n\ndefine(m4_assert_defined,\nm4_assert_numargs(1)\n``m4_assert_defined_internal'(m4_doublequote($`'0),``$1'')`dnl '')\n\ndnl  Called: m4_assert_defined_internal(`macroname',`define_required')\ndefine(m4_assert_defined_internal,\nm4_assert_numargs(2)\n`m4_ifdef(`$2',,\n`m4_error(`$1 needs $2 defined\n')')')\n\n\ndnl  Usage: m4_not_for_expansion(`SYMBOL')\ndnl         define_not_for_expansion(`SYMBOL')\ndnl\ndnl  m4_not_for_expansion turns SYMBOL, if defined, into something which\ndnl  will give an error if expanded.  For example,\ndnl\ndnl         m4_not_for_expansion(`PIC')\ndnl\ndnl  define_not_for_expansion is the same, but always makes a definition.\ndnl\ndnl  These are for symbols that should be tested with ifdef(`FOO',...)\ndnl  rather than be expanded as such.  They guard against accidentally\ndnl  omitting the quotes, as in ifdef(FOO,...).  Note though that they only\ndnl  catches this when FOO is defined, so be sure to test code both with and\ndnl  without each definition.\n\ndefine(m4_not_for_expansion,\nm4_assert_numargs(1)\n`ifdef(`$1',`define_not_for_expansion(`$1')')')\n\ndefine(define_not_for_expansion,\nm4_assert_numargs(1)\n`ifelse(defn(`$1'),,,\n`m4_error(``$1' has a non-empty value, maybe it shouldnt be munged with m4_not_for_expansion()\n')')dnl\ndefine(`$1',`m4_not_for_expansion_internal(`$1')')')\n\ndefine(m4_not_for_expansion_internal,\n`m4_error(``$1' is not meant to be expanded, perhaps you mean `ifdef(`$1',...)'\n')')\n\n\ndnl  --------------------------------------------------------------------------\ndnl  Various generic m4 things.\n\n\ndnl  Usage: m4_unquote(macro)\ndnl\ndnl  Allow the argument text to be re-evaluated.  This is useful for \"token\ndnl  pasting\" like m4_unquote(foo`'bar).\n\ndefine(m4_unquote,\nm4_assert_onearg()\n`$1')\n\n\ndnl  Usage: m4_ifdef(name,yes[,no])\ndnl\ndnl  Expand to the yes argument if name is defined, or to the no argument if\ndnl  not.\ndnl\ndnl  This is the same as the builtin \"ifdef\", but avoids an OSF 4.0 m4 bug\ndnl  in which a macro with a zero value `0' or `00' etc is considered not\ndnl  defined.\ndnl\ndnl  There's no particular need to use this everywhere, only if there might\ndnl  be a zero value.\n\ndefine(m4_ifdef,\nm4_assert_numargs_range(2,3)\n`ifelse(eval(ifdef(`$1',1,0)+m4_length(defn(`$1'))),0,\n`$3',`$2')')\n\n\ndnl  Usage: m4_ifdef_anyof_p(`symbol',...)\ndnl\ndnl  Expand to 1 if any of the symbols in the argument list are defined, or\ndnl  to 0 if not.\n\ndefine(m4_ifdef_anyof_p,\n`ifelse(eval($#<=1 && m4_length(`$1')==0),1, 0,\n`ifdef(`$1', 1,\n`m4_ifdef_anyof_p(shift($@))')')')\n\n\ndnl  Usage: m4_length(string)\ndnl\ndnl  Determine the length of a string.  This is the same as len(), but\ndnl  always expands to a number, working around the BSD len() which\ndnl  evaluates to nothing given an empty argument.\n\ndefine(m4_length,\nm4_assert_onearg()\n`eval(len(`$1')-0)')\n\n\ndnl  Usage: m4_stringequal_p(x,y)\ndnl\ndnl  Expand to 1 or 0 according as strings x and y are equal or not.\n\ndefine(m4_stringequal_p,\n`ifelse(`$1',`$2',1,0)')\n\n\ndnl  Usage: m4_incr_or_decr(n,last)\ndnl\ndnl  Do an incr(n) or decr(n), whichever is in the direction of \"last\".\ndnl  Both n and last must be numbers of course.\n\ndefine(m4_incr_or_decr,\nm4_assert_numargs(2)\n`ifelse(eval($1<$2),1,incr($1),decr($1))')\n\n\ndnl  Usage: forloop(i, first, last, statement)\ndnl\ndnl  Based on GNU m4 examples/forloop.m4, but extended.\ndnl\ndnl  statement is expanded repeatedly, with i successively defined as\ndnl\ndnl         first, first+1, ..., last-1, last\ndnl\ndnl  Or if first > last, then it's\ndnl\ndnl         first, first-1, ..., last+1, last\ndnl\ndnl  If first == last, then one expansion is done.\ndnl\ndnl  A pushdef/popdef of i is done to preserve any previous definition (or\ndnl  lack of definition).  first and last are eval()ed and so can be\ndnl  expressions.\ndnl\ndnl  forloop_first is defined to 1 on the first iteration, 0 on the rest.\ndnl  forloop_last is defined to 1 on the last iteration, 0 on the others.\ndnl  Nested forloops are allowed, in which case forloop_first and\ndnl  forloop_last apply to the innermost loop that's open.\ndnl\ndnl  A simple example,\ndnl\ndnl         forloop(i, 1, 2*2+1, `dnl\ndnl         iteration number i ... ifelse(forloop_first,1,FIRST)\ndnl         ')\n\n\ndnl  \"i\" and \"statement\" are carefully quoted, but \"first\" and \"last\" are\ndnl  just plain numbers once eval()ed.\n\ndefine(`forloop',\nm4_assert_numargs(4)\n`pushdef(`$1',eval(`$2'))dnl\npushdef(`forloop_first',1)dnl\npushdef(`forloop_last',0)dnl\nforloop_internal(`$1',eval(`$3'),`$4')`'dnl\npopdef(`forloop_first')dnl\npopdef(`forloop_last')dnl\npopdef(`$1')')\n\ndnl  Called: forloop_internal(`var',last,statement)\ndefine(`forloop_internal',\nm4_assert_numargs(3)\n`ifelse($1,$2,\n`define(`forloop_last',1)$3',\n`$3`'dnl\ndefine(`forloop_first',0)dnl\ndefine(`$1',m4_incr_or_decr($1,$2))dnl\nforloop_internal(`$1',$2,`$3')')')\n\n\ndnl  Usage: foreach(var,body, item1,item2,...,itemN)\ndnl\ndnl  For each \"item\" argument, define \"var\" to that value and expand \"body\".\ndnl  For example,\ndnl\ndnl         foreach(i, `something i\ndnl         ', one, two)\ndnl  gives\ndnl         something one\ndnl         something two\ndnl\ndnl  Any previous definition of \"var\", or lack thereof, is saved and\ndnl  restored.  Empty \"item\"s are not allowed.\n\ndefine(foreach,\nm4_assert_numargs_range(2,1000)\n`ifelse(`$3',,,\n`pushdef(`$1',`$3')$2`'popdef(`$1')dnl\nforeach(`$1',`$2',shift(shift(shift($@))))')')\n\n\ndnl  Usage: m4_toupper(x)\ndnl         m4_tolower(x)\ndnl\ndnl  Convert the argument string to upper or lower case, respectively.\ndnl  Only one argument accepted.\ndnl\ndnl  BSD m4 doesn't take ranges like a-z in translit(), so the full alphabet\ndnl  is written out.\n\ndefine(m4_alphabet_lower, `abcdefghijklmnopqrstuvwxyz')\ndefine(m4_alphabet_upper, `ABCDEFGHIJKLMNOPQRSTUVWXYZ')\n\ndefine(m4_toupper,\nm4_assert_onearg()\n`translit(`$1', m4_alphabet_lower, m4_alphabet_upper)')\n\ndefine(m4_tolower,\nm4_assert_onearg()\n`translit(`$1', m4_alphabet_upper, m4_alphabet_lower)')\n\n\ndnl  Usage: m4_empty_if_zero(x)\ndnl\ndnl  Evaluate to x, or to nothing if x is 0.  x is eval()ed and so can be an\ndnl  expression.\ndnl\ndnl  This is useful for x86 addressing mode displacements since forms like\ndnl  (%ebx) are one byte shorter than 0(%ebx).  A macro `foo' for use as\ndnl  foo(%ebx) could be defined with the following so it'll be empty if the\ndnl  expression comes out zero.\ndnl\ndnl\t   deflit(`foo', `m4_empty_if_zero(a+b*4-c)')\ndnl\ndnl  Naturally this shouldn't be done if, say, a computed jump depends on\ndnl  the code being a particular size.\n\ndefine(m4_empty_if_zero,\nm4_assert_onearg()\n`ifelse(eval($1),0,,eval($1))')\n\n\ndnl  Usage: m4_log2(x)\ndnl\ndnl  Calculate a logarithm to base 2.\ndnl  x must be an integral power of 2, between 2**0 and 2**30.\ndnl  x is eval()ed, so it can be an expression.\ndnl  An error results if x is invalid.\ndnl\ndnl  2**31 isn't supported, because an unsigned 2147483648 is out of range\ndnl  of a 32-bit signed int.  Also, the bug in BSD m4 where an eval()\ndnl  resulting in 2147483648 (or -2147483648 as the case may be) gives `-('\ndnl  means tests like eval(1<<31==(x)) would be necessary, but that then\ndnl  gives an unattractive explosion of eval() error messages if x isn't\ndnl  numeric.\n\ndefine(m4_log2,\nm4_assert_numargs(1)\n`m4_log2_internal(0,1,eval(`$1'))')\n\ndnl  Called: m4_log2_internal(n,2**n,target)\ndefine(m4_log2_internal,\nm4_assert_numargs(3)\n`ifelse($2,$3,$1,\n`ifelse($1,30,\n`m4_error(`m4_log2() argument too big or not a power of two: $3\n')',\n`m4_log2_internal(incr($1),eval(2*$2),$3)')')')\n\n\ndnl  Usage:  m4_div2_towards_zero\ndnl\ndnl  m4 division is probably whatever a C signed division is, and C doesn't\ndnl  specify what rounding gets used on negatives, so this expression forces\ndnl  a rounding towards zero.\n\ndefine(m4_div2_towards_zero,\nm4_assert_numargs(1)\n`eval((($1) + ((($1)<0) & ($1))) / 2)')\n\n\ndnl  Usage: m4_lshift(n,count)\ndnl         m4_rshift(n,count)\ndnl\ndnl  Calculate n shifted left or right by count many bits.  Both n and count\ndnl  are eval()ed and so can be expressions.\ndnl\ndnl  Negative counts are allowed and mean a shift in the opposite direction.\ndnl  Negative n is allowed and right shifts will be arithmetic (meaning\ndnl  divide by 2**count, rounding towards zero, also meaning the sign bit is\ndnl  duplicated).\ndnl\ndnl  Use these macros instead of << and >> in eval() since the basic ccs\ndnl  SysV m4 doesn't have those operators.\n\ndefine(m4_rshift,\nm4_assert_numargs(2)\n`m4_lshift(`$1',-(`$2'))')\n\ndefine(m4_lshift,\nm4_assert_numargs(2)\n`m4_lshift_internal(eval(`$1'),eval(`$2'))')\n\ndefine(m4_lshift_internal,\nm4_assert_numargs(2)\n`ifelse(eval($2-0==0),1,$1,\n`ifelse(eval($2>0),1,\n`m4_lshift_internal(eval($1*2),decr($2))',\n`m4_lshift_internal(m4_div2_towards_zero($1),incr($2))')')')\n\n\ndnl  Usage: m4_popcount(n)\ndnl\ndnl  Expand to the number 1 bits in n.\n\ndefine(m4_popcount,\nm4_assert_numargs(1)\n`m4_popcount_internal(0,eval(`$1'))')\n\ndnl  Called: m4_popcount_internal(count,rem)\ndefine(m4_popcount_internal,\nm4_assert_numargs(2)\n`ifelse($2,0,$1,\n`m4_popcount_internal(eval($1+($2%2)),eval($2/2))')')\n\n\ndnl  Usage: m4_count_trailing_zeros(N)\ndnl\ndnl  Determine the number of trailing zero bits on N.  N is eval()ed and so\ndnl  can be an expression.  If N is zero an error is generated.\n\ndefine(m4_count_trailing_zeros,\nm4_assert_numargs(1)\n`m4_count_trailing_zeros_internal(eval(`$1'),0)')\n\ndnl  Called: m4_count_trailing_zeros_internal(val,count)\ndefine(m4_count_trailing_zeros_internal,\nm4_assert_numargs(2)\n`ifelse($1,0,\n`m4_error(`m4_count_trailing_zeros() given a zero value')',\n`ifelse(eval(($1)%2),1,`$2',\n`m4_count_trailing_zeros_internal(eval($1/2),incr($2))')')')\n\n\ndnl  Usage: deflit(name,value)\ndnl\ndnl  Like define(), but \"name\" expands like a literal, rather than taking\ndnl  arguments.  For example \"name(%eax)\" expands to \"value(%eax)\".\ndnl\ndnl  Limitations:\ndnl\ndnl  $ characters in the value part must have quotes to stop them looking\ndnl  like macro parameters.  For example, deflit(reg,`123+$`'4+567').  See\ndnl  defreg() below for handling simple register definitions like $7 etc.\ndnl\ndnl  \"name()\" is turned into \"name\", unfortunately.  In GNU and SysV m4 an\ndnl  error is generated when this happens, but in BSD m4 it will happen\ndnl  silently.  The problem is that in BSD m4 $# is 1 in both \"name\" or\ndnl  \"name()\", so there's no way to differentiate them.  Because we want\ndnl  plain \"name\" to turn into plain \"value\", we end up with \"name()\"\ndnl  turning into plain \"value\" too.\ndnl\ndnl  \"name(foo)\" will lose any whitespace after commas in \"foo\", for example\ndnl  \"disp(%eax, %ecx)\" would become \"128(%eax,%ecx)\".\ndnl\ndnl  These parentheses oddities shouldn't matter in assembler text, but if\ndnl  they do the suggested workaround is to write \"name ()\" or \"name (foo)\"\ndnl  to stop the parentheses looking like a macro argument list.  If a space\ndnl  isn't acceptable in the output, then write \"name`'()\" or \"name`'(foo)\".\ndnl  The `' is stripped when read, but again stops the parentheses looking\ndnl  like parameters.\n\ndnl  Quoting for deflit_emptyargcheck is similar to m4_assert_numargs.  The\ndnl  stuff in the ifelse gives a $#, $1 and $@ evaluated in the new macro\ndnl  created, not in deflit.\ndefine(deflit,\nm4_assert_numargs(2)\n`define(`$1',\n`deflit_emptyargcheck'(``$1'',$`#',m4_doublequote($`'1))`dnl\n$2`'dnl\nifelse(eval($'`#>1 || m4_length('m4_doublequote($`'1)`)!=0),1,($'`@))')')\n\ndnl  Called: deflit_emptyargcheck(macroname,$#,`$1')\ndefine(deflit_emptyargcheck,\n`ifelse(eval($2==1 && !m4_dollarhash_1_if_noparen_p && m4_length(`$3')==0),1,\n`m4_error(`dont use a deflit as $1() because it loses the brackets (see deflit in asm-incl.m4 for more information)\n')')')\n\n\ndnl  Usage: m4_assert(`expr')\ndnl\ndnl  Test a compile-time requirement with an m4 expression.  The expression\ndnl  should be quoted, and will be eval()ed and expected to be non-zero.\ndnl  For example,\ndnl\ndnl         m4_assert(`FOO*2+6 < 14')\n\ndefine(m4_assert,\nm4_assert_numargs(1)\n`ifelse(eval($1),1,,\n`m4_error(`assertion failed: $1\n')')')\n\n\ndnl  Usage: m4_repeat(count,text)\ndnl\ndnl  Expand to the given repetitions of the given text.  A zero count is\ndnl  allowed, and expands to nothing.\n\ndefine(m4_repeat,\nm4_assert_numargs(2)\n`m4_repeat_internal(eval($1),`$2')')\n\ndefine(m4_repeat_internal,\nm4_assert_numargs(2)\n`ifelse(`$1',0,,\n`forloop(m4_repeat_internal_counter,1,$1,``$2'')')')\n\n\ndnl  Usage: m4_hex_lowmask(bits)\ndnl\ndnl  Generate a hex constant which is a low mask of the given number of\ndnl  bits.  For example m4_hex_lowmask(10) would give 0x3ff.\n\ndefine(m4_hex_lowmask,\nm4_assert_numargs(1)\n`m4_cpu_hex_constant(m4_hex_lowmask_internal1(eval(`$1')))')\n\ndnl  Called: m4_hex_lowmask_internal1(bits)\ndefine(m4_hex_lowmask_internal1,\nm4_assert_numargs(1)\n`ifelse($1,0,`0',\n`m4_hex_lowmask_internal2(eval(($1)%4),eval(($1)/4))')')\n\ndnl  Called: m4_hex_lowmask_internal(remainder,digits)\ndefine(m4_hex_lowmask_internal2,\nm4_assert_numargs(2)\n`ifelse($1,1,`1',\n`ifelse($1,2,`3',\n`ifelse($1,3,`7')')')dnl\nm4_repeat($2,`f')')\n\n\ndnl  --------------------------------------------------------------------------\ndnl  The following m4_list functions take a list as multiple arguments.\ndnl  Arguments are evaluated multiple times, there's no attempt at strict\ndnl  quoting.  Empty list elements are not allowed, since an empty final\ndnl  argument is ignored.  These restrictions don't affect the current uses,\ndnl  and make the implementation easier.\n\n\ndnl  Usage: m4_list_quote(list,...)\ndnl\ndnl  Produce a list with quoted commas, so it can be a single argument\ndnl  string.  For instance m4_list_quote(a,b,c) gives\ndnl\ndnl         a`,'b`,'c`,'\ndnl\ndnl  This can be used to put a list in a define,\ndnl\ndnl         define(foolist, m4_list_quote(a,b,c))\ndnl\ndnl  Which can then be used for instance as\ndnl\ndnl         m4_list_find(target, foolist)\n\ndefine(m4_list_quote,\n`ifelse(`$1',,,\n`$1`,'m4_list_quote(shift($@))')')\n\n\ndnl  Usage: m4_list_find(key,list,...)\ndnl\ndnl  Evaluate to 1 or 0 according to whether key is in the list elements.\n\ndefine(m4_list_find,\nm4_assert_numargs_range(1,1000)\n`ifelse(`$2',,0,\n`ifelse(`$1',`$2',1,\n`m4_list_find(`$1',shift(shift($@)))')')')\n\n\ndnl  Usage: m4_list_remove(key,list,...)\ndnl\ndnl  Evaluate to the given list with `key' removed (if present).\n\ndefine(m4_list_remove,\nm4_assert_numargs_range(1,1000)\n`ifelse(`$2',,,\n`ifelse(`$1',`$2',,`$2,')dnl\nm4_list_remove(`$1',shift(shift($@)))')')\n\n\ndnl  Usage: m4_list_first(list,...)\ndnl\ndnl  Evaluate to the first element of the list (if any).\n\ndefine(m4_list_first,`$1')\n\n\ndnl  Usage: m4_list_count(list,...)\ndnl\ndnl  Evaluate to the number of elements in the list.  This can't just use $#\ndnl  because the last element might be empty.\n\ndefine(m4_list_count,\n`m4_list_count_internal(0,$@)')\n\ndnl  Called: m4_list_internal(count,list,...)\ndefine(m4_list_count_internal,\nm4_assert_numargs_range(1,1000)\n`ifelse(`$2',,$1,\n`m4_list_count_internal(eval($1+1),shift(shift($@)))')')\n\n\ndnl  --------------------------------------------------------------------------\ndnl  Various assembler things, not specific to any particular CPU.\ndnl\n\n\ndnl  Usage: include_mpn(`filename')\ndnl\ndnl  Like include(), but adds a path to the mpn source directory.  For\ndnl  example,\ndnl\ndnl         include_mpn(`sparc64/addmul_1h.asm')\n\ndefine(include_mpn,\nm4_assert_numargs(1)\nm4_assert_defined(`CONFIG_TOP_SRCDIR')\n`include(CONFIG_TOP_SRCDIR`/mpn/$1')')\n\n\ndnl  Usage: C comment ...\ndnl\ndnl  This works like a FORTRAN-style comment character.  It can be used for\ndnl  comments to the right of assembly instructions, where just dnl would\ndnl  remove the newline and concatenate adjacent lines.\ndnl\ndnl  C and/or dnl are useful when an assembler doesn't support comments, or\ndnl  where different assemblers for a particular CPU need different styles.\ndnl  The intermediate \".s\" files will end up with no comments, just code.\ndnl\ndnl  Using C is not intended to cause offence to anyone who doesn't like\ndnl  FORTRAN; but if that happens it's an unexpected bonus.\ndnl\ndnl  During development, if comments are wanted in the .s files to help see\ndnl  what's expanding where, C can be redefined with something like\ndnl\ndnl         define(`C',`#')\n\ndefine(C, `\ndnl')\n\n\ndnl  Normally PIC is defined (or not) by libtool, but it doesn't set it on\ndnl  systems which are always PIC.  PIC_ALWAYS established in config.m4\ndnl  identifies these for us.\n\nifelse(`PIC_ALWAYS',`yes',`define(`PIC')')\n\n\ndnl  Various possible defines passed from the Makefile that are to be tested\ndnl  with ifdef() rather than be expanded.\n\nm4_not_for_expansion(`PIC')\nm4_not_for_expansion(`DLL_EXPORT')\n\ndnl  Usage: m4_config_gmp_mparam(`symbol')\ndnl\ndnl  Check that `symbol' is defined.  If it isn't, issue an error and\ndnl  terminate immediately.  The error message explains that the symbol\ndnl  should be in config.m4, copied from gmp-mparam.h.\ndnl\ndnl  Termination is immediate since missing say SQR_KARATSUBA_THRESHOLD can\ndnl  lead to infinite loops and endless error messages.\n\ndefine(m4_config_gmp_mparam,\nm4_assert_numargs(1)\n`ifdef(`$1',,\n`m4_error(`$1 is not defined.\n\t\"configure\" should have extracted this from gmp-mparam.h and put it\n\tin config.m4 (or in <cpu>_<file>.asm for a fat binary), but somehow\n        this has failed.\n')m4exit(1)')')\n\n\ndnl  Usage: defreg(name,reg)\ndnl\ndnl  Give a name to a $ style register.  For example,\ndnl\ndnl         defreg(foo,$12)\ndnl\ndnl  defreg() inserts an extra pair of quotes after the $ so that it's not\ndnl  interpreted as an m4 macro parameter, ie. foo is actually $`'12.  m4\ndnl  strips those quotes when foo is expanded.\ndnl\ndnl  deflit() is used to make the new definition, so it will expand\ndnl  literally even if followed by parentheses ie. foo(99) will become\ndnl  $12(99).  (But there's nowhere that would be used is there?)\ndnl\ndnl  When making further definitions from existing defreg() macros, remember\ndnl  to use defreg() again to protect the $ in the new definitions too.  For\ndnl  example,\ndnl\ndnl         defreg(a0,$4)\ndnl         defreg(a1,$5)\ndnl         ...\ndnl\ndnl         defreg(PARAM_DST,a0)\ndnl\ndnl  This is only because a0 is expanding at the time the PARAM_DST\ndnl  definition is made, leaving a literal $4 that must be re-quoted.  On\ndnl  the other hand in something like the following ra is only expanded when\ndnl  ret is used and its $`'31 protection will have its desired effect at\ndnl  that time.\ndnl\ndnl         defreg(ra,$31)\ndnl         ...\ndnl         define(ret,`j ra')\ndnl\ndnl  Note that only $n forms are meant to be used here, and something like\ndnl  128($30) doesn't get protected and will come out wrong.\n\ndefine(defreg,\nm4_assert_numargs(2)\n`deflit(`$1',\nsubstr(`$2',0,1)``''substr(`$2',1))')\n\n\ndnl  Usage: m4_instruction_wrapper()\ndnl\ndnl  Put this, unquoted, on a line on its own, at the start of a macro\ndnl  that's a wrapper around an assembler instruction.  It adds code to give\ndnl  a descriptive error message if the macro is invoked without arguments.\ndnl\ndnl  For example, suppose jmp needs to be wrapped,\ndnl\ndnl         define(jmp,\ndnl         m4_instruction_wrapper()\ndnl         m4_assert_numargs(1)\ndnl                 `.byte 0x42\ndnl                 .long  $1\ndnl                 nop')\ndnl\ndnl  The point of m4_instruction_wrapper is to get a better error message\ndnl  than m4_assert_numargs would give if jmp is accidentally used as plain\ndnl  \"jmp foo\" instead of the intended \"jmp( foo)\".  \"jmp()\" with no\ndnl  argument also provokes the error message.\ndnl\ndnl  m4_instruction_wrapper should only be used with wrapped instructions\ndnl  that take arguments, since obviously something meant to be used as say\ndnl  plain \"ret\" doesn't want to give an error when used that way.\n\ndefine(m4_instruction_wrapper,\nm4_assert_numargs(0)\n``m4_instruction_wrapper_internal'(m4_doublequote($`'0),dnl\nifdef(`__file__',`m4_doublequote(__file__)',``the m4 sources''),dnl\n$`#',m4_doublequote($`'1))`dnl'')\n\ndnl  Called: m4_instruction_wrapper_internal($0,`filename',$#,$1)\ndefine(m4_instruction_wrapper_internal,\n`ifelse(eval($3<=1 && m4_length(`$4')==0),1,\n`m4_error(`$1 is a macro replacing that instruction and needs arguments, see $2 for details\n')')')\n\n\ndnl  Usage: m4_cpu_hex_constant(string)\ndnl\ndnl  Expand to the string prefixed by a suitable `0x' hex marker.  This\ndnl  should be redefined as necessary for CPUs with different conventions.\n\ndefine(m4_cpu_hex_constant,\nm4_assert_numargs(1)\n`0x`$1'')\n\n\ndnl  Usage: UNROLL_LOG2, UNROLL_MASK, UNROLL_BYTES\ndnl         CHUNK_LOG2, CHUNK_MASK, CHUNK_BYTES\ndnl\ndnl  When code supports a variable amount of loop unrolling, the convention\ndnl  is to define UNROLL_COUNT to the number of limbs processed per loop.\ndnl  When testing code this can be varied to see how much the loop overhead\ndnl  is costing.  For example,\ndnl\ndnl         deflit(UNROLL_COUNT, 32)\ndnl\ndnl  If the forloop() generating the unrolled loop has a pattern processing\ndnl  more than one limb, the convention is to express this with CHUNK_COUNT.\ndnl  For example,\ndnl\ndnl         deflit(CHUNK_COUNT, 2)\ndnl\ndnl  The LOG2, MASK and BYTES definitions below are derived from these COUNT\ndnl  definitions.  If COUNT is redefined, the LOG2, MASK and BYTES follow\ndnl  the new definition automatically.\ndnl\ndnl  LOG2 is the log base 2 of COUNT.  MASK is COUNT-1, which can be used as\ndnl  a bit mask.  BYTES is BYTES_PER_MP_LIMB*COUNT, the number of bytes\ndnl  processed in each unrolled loop.\ndnl\ndnl  BYTES_PER_MP_LIMB is defined in a CPU specific m4 include file.  It\ndnl  exists only so the BYTES definitions here can be common to all CPUs.\ndnl  In the actual code for a given CPU, an explicit 4 or 8 may as well be\ndnl  used because the code is only for a particular CPU, it doesn't need to\ndnl  be general.\ndnl\ndnl  Note that none of these macros do anything except give conventional\ndnl  names to commonly used things.  You still have to write your own\ndnl  expressions for a forloop() and the resulting address displacements.\ndnl  Something like the following would be typical for 4 bytes per limb.\ndnl\ndnl         forloop(`i',0,UNROLL_COUNT-1,`\ndnl                 deflit(`disp',eval(i*4))\ndnl                 ...\ndnl         ')\ndnl\ndnl  Or when using CHUNK_COUNT,\ndnl\ndnl         forloop(`i',0,UNROLL_COUNT/CHUNK_COUNT-1,`\ndnl                 deflit(`disp0',eval(i*CHUNK_COUNT*4))\ndnl                 deflit(`disp1',eval(disp0+4))\ndnl                 ...\ndnl         ')\ndnl\ndnl  Clearly `i' can be run starting from 1, or from high to low or whatever\ndnl  best suits.\n\ndeflit(UNROLL_LOG2,\nm4_assert_defined(`UNROLL_COUNT')\n`m4_log2(UNROLL_COUNT)')\n\ndeflit(UNROLL_MASK,\nm4_assert_defined(`UNROLL_COUNT')\n`eval(UNROLL_COUNT-1)')\n\ndeflit(UNROLL_BYTES,\nm4_assert_defined(`UNROLL_COUNT')\nm4_assert_defined(`BYTES_PER_MP_LIMB')\n`eval(UNROLL_COUNT * BYTES_PER_MP_LIMB)')\n\ndeflit(CHUNK_LOG2,\nm4_assert_defined(`CHUNK_COUNT')\n`m4_log2(CHUNK_COUNT)')\n\ndeflit(CHUNK_MASK,\nm4_assert_defined(`CHUNK_COUNT')\n`eval(CHUNK_COUNT-1)')\n\ndeflit(CHUNK_BYTES,\nm4_assert_defined(`CHUNK_COUNT')\nm4_assert_defined(`BYTES_PER_MP_LIMB')\n`eval(CHUNK_COUNT * BYTES_PER_MP_LIMB)')\n\n\ndnl  Usage: MPN(name)\ndnl\ndnl  Add MPN_PREFIX to a name.\ndnl  MPN_PREFIX defaults to \"__gmpn_\" if not defined.\ndnl\ndnl  m4_unquote is used in MPN so that when it expands to say __gmpn_foo,\ndnl  that identifier will be subject to further macro expansion.  This is\ndnl  used by some of the fat binary support for renaming symbols.\n\nifdef(`MPN_PREFIX',,\n`define(`MPN_PREFIX',`__gmpn_')')\n\ndefine(MPN,\nm4_assert_numargs(1)\n`m4_unquote(MPN_PREFIX`'$1)')\n\n\ndnl  Usage: mpn_add_n, etc\ndnl\ndnl  Convenience definitions using MPN(), like the #defines in mpir.h.  Each\ndnl  function that might be implemented in assembler is here.\n\ndefine(define_mpn,\nm4_assert_numargs(1)\n`define(`mpn_$1',`MPN(`$1')')')\n\ndefine_mpn(add)\ndefine_mpn(add_1)\ndefine_mpn(add_err1_n)\ndefine_mpn(add_err2_n)\ndefine_mpn(add_n)\ndefine_mpn(add_nc)\ndefine_mpn(addadd_n)\ndefine_mpn(addlsh_n)\ndefine_mpn(addlsh_nc)\ndefine_mpn(addlsh1_n)\ndefine_mpn(addmul_1)\ndefine_mpn(addmul_1c)\ndefine_mpn(addmul_2)\ndefine_mpn(addmul_3)\ndefine_mpn(addmul_4)\ndefine_mpn(addsub_n)\ndefine_mpn(sumdiff_n)\ndefine_mpn(sumdiff_nc)\ndefine_mpn(and_n)\ndefine_mpn(andn_n)\ndefine_mpn(bdivmod)\ndefine_mpn(cmp)\ndefine_mpn(com_n)\ndefine_mpn(not)\ndefine_mpn(copyd)\ndefine_mpn(copyi)\ndefine_mpn(count_leading_zeros)\ndefine_mpn(count_trailing_zeros)\ndefine_mpn(declsh_n)\ndefine_mpn(divexact_1)\ndefine_mpn(divexact_by3c)\ndefine_mpn(divexact_byff)\ndefine_mpn(divexact_byfobm1)\ndefine_mpn(divrem_1)\ndefine_mpn(divrem_euclidean_qr_1)\ndefine_mpn(divrem_euclidean_qr_2)\ndefine_mpn(divrem_euclidean_r_1)\ndefine_mpn(divrem_hensel_r_1)\ndefine_mpn(divrem_hensel_qr_1)\ndefine_mpn(divrem_hensel_qr_1_1)\ndefine_mpn(divrem_hensel_qr_1_2)\ndefine_mpn(divrem_hensel_rsh_qr_1)\ndefine_mpn(rsh_divrem_hensel_qr_1)\ndefine_mpn(rsh_divrem_hensel_qr_1_1)\ndefine_mpn(rsh_divrem_hensel_qr_1_2)\ndefine_mpn(divrem_1c)\ndefine_mpn(divrem_2)\ndefine_mpn(divrem_classic)\ndefine_mpn(divrem_newton)\ndefine_mpn(double)\ndefine_mpn(dump)\ndefine_mpn(gcd)\ndefine_mpn(gcd_1)\ndefine_mpn(gcdext)\ndefine_mpn(get_str)\ndefine_mpn(half)\ndefine_mpn(hamdist)\ndefine_mpn(inclsh_n)\ndefine_mpn(invert_limb)\ndefine_mpn(ior_n)\ndefine_mpn(iorn_n)\ndefine_mpn(kara_mul_n)\ndefine_mpn(kara_sqr_n)\ndefine_mpn(karaadd)\ndefine_mpn(karasub)\ndefine_mpn(lshift)\ndefine_mpn(lshift1)\ndefine_mpn(lshift2)\ndefine_mpn(lshiftc)\ndefine_mpn(mod_1)\ndefine_mpn(mod_1_1)\ndefine_mpn(mod_1_2)\ndefine_mpn(mod_1_3)\ndefine_mpn(mod_1c)\ndefine_mpn(mod_34lsub1)\ndefine_mpn(modexact_1_odd)\ndefine_mpn(modexact_1c_odd)\ndefine_mpn(mul)\ndefine_mpn(mul_1)\ndefine_mpn(mul_1c)\ndefine_mpn(mul_2)\ndefine_mpn(mul_basecase)\ndefine_mpn(mul_n)\ndefine_mpn(mullow_n_basecase)\ndefine_mpn(mullow_basecase)\ndefine_mpn(mulmid_basecase)\ndefine_mpn(mulshort_n_basecase)\ndefine_mpn(neg_n)\ndefine_mpn(perfect_square_p)\ndefine_mpn(popcount)\ndefine_mpn(preinv_divrem_1)\ndefine_mpn(preinv_mod_1)\ndefine_mpn(nand_n)\ndefine_mpn(nior_n)\ndefine_mpn(powm)\ndefine_mpn(powlo)\ndefine_mpn(random)\ndefine_mpn(random2)\ndefine_mpn(redc_1)\ndefine_mpn(redc_2)\ndefine_mpn(rsh1add_n)\ndefine_mpn(rsh1sub_n)\ndefine_mpn(rshift)\ndefine_mpn(rshift1)\ndefine_mpn(rshift2)\ndefine_mpn(rshiftc)\ndefine_mpn(scan0)\ndefine_mpn(scan1)\ndefine_mpn(set_str)\ndefine_mpn(sqr_basecase)\ndefine_mpn(sqr_diagonal)\ndefine_mpn(sub_err1_n)\ndefine_mpn(sub_err2_n)\ndefine_mpn(sub_n)\ndefine_mpn(sublsh_n)\ndefine_mpn(sublsh_nc)\ndefine_mpn(sublsh1_n)\ndefine_mpn(sqrtrem)\ndefine_mpn(store)\ndefine_mpn(sub)\ndefine_mpn(sub_1)\ndefine_mpn(sub_n)\ndefine_mpn(sub_nc)\ndefine_mpn(subadd_n)\ndefine_mpn(submul_1)\ndefine_mpn(submul_1c)\ndefine_mpn(toom3_mul_n)\ndefine_mpn(toom3_sqr_n)\ndefine_mpn(umul_ppmm)\ndefine_mpn(umul_ppmm_r)\ndefine_mpn(udiv_qrnnd)\ndefine_mpn(udiv_qrnnd_r)\ndefine_mpn(xnor_n)\ndefine_mpn(xor_n)\n\n\ndnl  Defines for C global arrays and variables, with names matching what's\ndnl  used in the C code.\ndnl\ndnl  Notice that GSYM_PREFIX is included, unlike with the function defines\ndnl  above.  Also, \"deflit\" is used so that something like __clz_tab(%ebx)\ndnl  comes out as __gmpn_clz_tab(%ebx), for the benefit of CPUs with that\ndnl  style assembler syntax.\n\ndeflit(__clz_tab,\nm4_assert_defined(`GSYM_PREFIX')\n`GSYM_PREFIX`'MPN(`clz_tab')')\n\ndeflit(modlimb_invert_table,\nm4_assert_defined(`GSYM_PREFIX')\n`GSYM_PREFIX`'__gmp_modlimb_invert_table')\n\n\ndnl  Usage: ASM_START()\ndnl\ndnl  Emit any directives needed once at the start of an assembler file, like\ndnl  \".set noreorder\" or whatever.  The default for this is nothing, but\ndnl  it's redefined by CPU specific m4 files.\n\ndefine(ASM_START)\n\n\ndnl  Usage: ASM_END()\ndnl\ndnl  Emit any directives needed once at the end of an assembler file.  The\ndnl  default for this is nothing, but it's redefined by CPU specific m4 files.\n\ndefine(ASM_END)\n\n\ndnl  Usage: PROLOGUE(foo[,param])\ndnl         EPILOGUE(foo)\ndnl\ndnl  Emit directives to start or end a function.  GSYM_PREFIX is added by\ndnl  these macros if necessary, so the given \"foo\" is what the function will\ndnl  be called in C.\ndnl\ndnl  The second parameter to PROLOGUE is used only for some CPUs and should\ndnl  be omitted if not required.\ndnl\ndnl  Nested or overlapping PROLOGUE/EPILOGUE pairs are allowed, if that\ndnl  makes sense for the system.  The name given to EPILOGUE must be a\ndnl  currently open PROLOGUE.\ndnl\ndnl  If only one PROLOGUE is open then the name can be omitted from\ndnl  EPILOGUE.  This is encouraged, since it means the name only has to\ndnl  appear in one place, not two.\ndnl\ndnl  The given name \"foo\" is not fully quoted here, it will be macro\ndnl  expanded more than once.  This is the way the m4_list macros work, and\ndnl  it also helps the tune/many.pl program do a renaming like\ndnl  -D__gmpn_add_n=mpn_add_n_foo when GSYM_PREFIX is not empty.\n\ndefine(PROLOGUE,\nm4_assert_numargs_range(1,2)\n`m4_file_seen()dnl\ndefine(`PROLOGUE_list',m4_list_quote($1,PROLOGUE_list))dnl\nifelse(`$2',,\n`PROLOGUE_cpu(GSYM_PREFIX`'$1)',\n`PROLOGUE_cpu(GSYM_PREFIX`'$1,`$2')')')\n\ndefine(EPILOGUE,\nm4_assert_numargs_range(0,1)\n`ifelse(`$1',,\n`ifelse(m4_list_count(PROLOGUE_list),0,\n`m4_error(`no open functions for EPILOGUE\n')',\n`ifelse(m4_list_count(PROLOGUE_list),1,\n`EPILOGUE_internal(PROLOGUE_current_function)',\n`m4_error(`more than one open function for EPILOGUE\n')')')',\n`EPILOGUE_internal(`$1')')')\n\ndefine(EPILOGUE_internal,\nm4_assert_numargs(1)\nm4_assert_defined(`EPILOGUE_cpu')\n`ifelse(m4_list_find($1,PROLOGUE_list),0,\n`m4_error(`EPILOGUE without PROLOGUE: $1\n')')dnl\ndefine(`PROLOGUE_list',m4_list_quote(m4_list_remove($1,PROLOGUE_list)))dnl\nEPILOGUE_cpu(GSYM_PREFIX`$1')')\n\ndnl  Currently open PROLOGUEs, as a comma-separated list.\ndefine(PROLOGUE_list)\n\n\ndnl  Called: PROLOGUE_check(list,...)\ndnl  Check there's no remaining open PROLOGUEs at the end of input.\ndefine(PROLOGUE_check,\n`ifelse($1,,,\n`m4_error(`no EPILOGUE for: $1\n')dnl\nPROLOGUE_check(shift($@))')')\n\nm4wrap_prepend(`PROLOGUE_check(PROLOGUE_list)')\n\n\ndnl  Usage: PROLOGUE_current_function\ndnl\ndnl  This macro expands to the current PROLOGUE/EPILOGUE function, or the\ndnl  most recent PROLOGUE if such pairs are nested or overlapped.\n\ndefine(PROLOGUE_current_function,\nm4_assert_numargs(-1)\n`m4_list_first(PROLOGUE_list)')\n\n\ndnl  Usage: PROLOGUE_cpu(GSYM_PREFIX`'foo[,param])\ndnl         EPILOGUE_cpu(GSYM_PREFIX`'foo)\ndnl\ndnl  These macros hold the CPU-specific parts of PROLOGUE and EPILOGUE.\ndnl  Both are called with the function name, with GSYM_PREFIX already\ndnl  prepended.\ndnl\ndnl  The definitions here are something typical and sensible, but CPU or\ndnl  system specific m4 files should redefine them as necessary.  The\ndnl  optional extra parameter to PROLOGUE_cpu is not expected and not\ndnl  accepted here.\n\ndefine(PROLOGUE_cpu,\nm4_assert_numargs(1)\n`\tTEXT\n\tALIGN(8)\n\tGLOBL\t`$1' GLOBL_ATTR\n\tTYPE(`$1',`function')\n`$1'LABEL_SUFFIX')\n\ndefine(EPILOGUE_cpu,\n`\tSIZE(`$1',.-`$1')')\n\n\ndnl  Usage: L(name)\ndnl\ndnl  Generate a local label with the given name.  This is simply a\ndnl  convenient way to add LSYM_PREFIX.\ndnl\ndnl  LSYM_PREFIX might be L$, so defn() must be used to quote it or the L\ndnl  will expand again as the L macro, making an infinite recursion.\n\ndefine(`L',\nm4_assert_numargs(1)\n`defn(`LSYM_PREFIX')$1')\n\n\ndnl  Usage: LDEF(name)\ndnl\ndnl  Generate a directive to define a local label.\ndnl\ndnl  On systems with a fixed syntax for defining labels there's no need to\ndnl  use this macro, it's only meant for systems where the syntax varies,\ndnl  like hppa which is \"L(foo):\" with gas, but just \"L(foo)\" in column 0\ndnl  with the system `as'.\ndnl\ndnl  The extra `' after LABEL_SUFFIX avoids any chance of a following\ndnl  \"(...)\"  being interpreted as an argument list.  Not that it'd be\ndnl  sensible to write anything like that after an LDEF(), but just in case.\n\ndefine(LDEF,\nm4_assert_numargs(1)\nm4_assert_defined(`LABEL_SUFFIX')\n`L(`$1')`'LABEL_SUFFIX`'')\n\n\ndnl  Usage: INT32(label,value)\ndnl         INT64(label,first,second)\n\ndefine(`INT32',\nm4_assert_defined(`W32')\n`\tALIGN(4)\nLDEF(`$1')\n\tW32\t$2')\n\ndefine(`INT64',\nm4_assert_defined(`W32')\n`\tALIGN(8)\nLDEF(`$1')\n\tW32\t$2\n\tW32\t$3')\n\n\ndnl  Usage: ALIGN(bytes)\ndnl\ndnl  Emit a \".align\" directive.  The alignment is specified in bytes, and\ndnl  will normally need to be a power of 2.  The actual \".align\" generated\ndnl  is either bytes or logarithmic according to what ./configure finds the\ndnl  assembler needs.\ndnl\ndnl  If ALIGN_FILL_0x90 is defined and equal to \"yes\", then \", 0x90\" is\ndnl  appended.  This is for x86, see mpn/x86/README.\n\ndefine(ALIGN,\nm4_assert_numargs(1)\nm4_assert_defined(`ALIGN_LOGARITHMIC')\n`.align\tifelse(ALIGN_LOGARITHMIC,yes,`m4_log2($1)',`eval($1)')dnl\nifelse(ALIGN_FILL_0x90,yes,`, 0x90')')\n\n\ndnl  Usage: MULFUNC_PROLOGUE(function function...)\ndnl\ndnl  A dummy macro which is grepped for by ./configure to know what\ndnl  functions a multi-function file is providing.  Use this if there aren't\ndnl  explicit PROLOGUE()s for each possible function.\ndnl\ndnl  Multiple MULFUNC_PROLOGUEs can be used, or just one with the function\ndnl  names separated by spaces.\n\ndefine(`MULFUNC_PROLOGUE',\nm4_assert_numargs(1)\n)\n\n\ndnl  Usage: NAILS_SUPPORT(spec spec ...)\ndnl\ndnl  A dummy macro which is grepped for by ./configure to know what nails\ndnl  are supported in an asm file.\ndnl\ndnl  Ranges can be given, or just individual values.  Multiple values or\ndnl  ranges can be given, separated by spaces.  Multiple NAILS_SUPPORT\ndnl  declarations work too.  Some examples,\ndnl\ndnl         NAILS_SUPPORT(1-20)\ndnl         NAILS_SUPPORT(1 6 9-12)\ndnl         NAILS_SUPPORT(1-10 16-20)\n\ndefine(NAILS_SUPPORT,\nm4_assert_numargs(1)\n)\n\n\ndnl  Usage: GMP_NUMB_MASK\ndnl\ndnl  A bit mask for the number part of a limb.  Eg. with 6 bit nails in a\ndnl  32 bit limb, GMP_NUMB_MASK would be 0x3ffffff.\n\ndefine(GMP_NUMB_MASK,\nm4_assert_numargs(-1)\nm4_assert_defined(`GMP_NUMB_BITS')\n`m4_hex_lowmask(GMP_NUMB_BITS)')\n\n\ndivert`'dnl\n"
  },
  {
    "path": "mpn/cpp-ccas",
    "content": "#!/bin/sh\n#\n# A helper script for Makeasm.am .S.lo rule.\n\n# Copyright 2001 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\n# Usage: cpp-cc --cpp=CPP CC ... file.S ...\n#\n# Process file.S with the given CPP command plus any -D options in the\n# rest of the arguments, then assemble with the given CC plus all\n# arguments.\n#\n# The CPP command must be in a single --cpp= argument, and will be\n# split on whitespace.  It should include -I options required.\n#\n# When CC is invoked, file.S is replaced with a temporary .s file\n# which is the CPP output.\n#\n# Any lines starting with \"#\" are removed from the CPP output, usually\n# these will be #line and #file markers from CPP, but they might also\n# be comments from the .S.\n#\n# To allow parallel builds, the temp file name is based on the .S file\n# name, which will be the output object filename for all uses we put\n# this script to.\n\nCPP=\nCPPDEFS=\nCC=\nS=\nSEEN_O=no\n\nfor i in \"$@\"; do\n  case $i in\n    --cpp=*)\n      CPP=`echo \"$i\" | sed 's/^--cpp=//'`\n      ;;\n    -D*)\n      CPPDEFS=\"$CPPDEFS $i\"\n      CC=\"$CC $i\"\n      ;;\n    *.S)\n      if test -n \"$S\"; then\n        echo \"Only one .S file permitted\"\n        exit 1\n      fi\n      BASENAME=`echo \"$i\" | sed -e 's/\\.S$//' -e 's/^.*[\\\\/:]//'`\n      S=$i\n      TMP_I=tmp-$BASENAME.i\n      TMP_S=tmp-$BASENAME.s\n      CC=\"$CC $TMP_S\"\n      ;;\n    -o)\n      SEEN_O=yes\n      CC=\"$CC $i\"\n      ;;\n    *)\n      CC=\"$CC $i\"\n      ;;\n  esac\ndone\n\nif test -z \"$CPP\"; then\n  echo \"No --cpp specified\"\n  exit 1\nfi\n\nif test -z \"$S\"; then\n  echo \"No .S specified\"\n  exit 1\nfi\n\n# Libtool adds it's own -o when sending output to .libs/foo.o, but not\n# when just wanting foo.o in the current directory.  We need an\n# explicit -o in both cases since we're assembling tmp-foo.s.\n#\nif test $SEEN_O = no; then\n  CC=\"$CC -o $BASENAME.o\"\nfi\n\necho \"$CPP $CPPDEFS $S >$TMP_I\"\n$CPP $CPPDEFS $S >$TMP_I || exit\n\necho \"grep -v '^#' $TMP_I >$TMP_S\"\ngrep -v '^#' $TMP_I >$TMP_S\n\necho \"$CC\"\n$CC || exit\n\n# Comment this out to preserve .s intermediates\nrm -f $TMP\n"
  },
  {
    "path": "mpn/generic/add.c",
    "content": "/* mpn_add - add mpn to mpn.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpn_add 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpn/generic/add_1.c",
    "content": "/* mpn_add_1 - add limb to mpn.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#define __GMP_FORCE_mpn_add_1 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpn/generic/add_err1_n.c",
    "content": "/* mpn_add_err1_n -- add_n with single error term\n\nCopyright (C) 2009, David Harvey\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/*\n  Computes:\n\n  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,\n  return value is carry out.\n\n  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).\n  Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.\n\n  Assumes n >= 1.\n */\nmp_limb_t\nmpn_add_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_ptr ep, mp_srcptr yp,\n                mp_size_t n, mp_limb_t cy)\n{\n  mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));\n  ASSERT (!MPN_OVERLAP_P (rp, n, yp, n));\n\n  /* FIXME: first addition into eh:el is redundant */\n\n  yp += n - 1;\n  el = eh = 0;\n\n  do\n    {\n      yl = *yp--;\n      ul = *up++;\n      vl = *vp++;\n\n      /* ordinary add_n */\n      ADDC_LIMB (cy1, sl, ul, vl);\n      ADDC_LIMB (cy2, rl, sl, cy);\n      cy = cy1 | cy2;\n      *rp++ = rl;\n\n      /* update (eh:el) */\n      zl = cy ? yl : 0;\n      /* FIXME: consider alternative:\n            zl = (-cy) & yl;\n         Might be better on some machines?\n         Ditto for sub_err1_n, add_err2_n etc. */\n      el += zl;\n      eh += el < zl;\n    }\n  while (--n != 0);\n\n#if GMP_NAIL_BITS != 0\n  eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);\n  el &= GMP_NUMB_MASK;\n#endif\n\n  ep[0] = el;\n  ep[1] = eh;\n\n  return cy;\n}\n"
  },
  {
    "path": "mpn/generic/add_err2_n.c",
    "content": "/* mpn_add_err2_n -- add_n with two error terms\n\nCopyright (C) 2009, David Harvey\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/*\n  Same as mpn_add_err1_n, but computes both\n\n          c[1]*yp1[n-1] + ... + c[n]*yp1[0]\n    and   c[1]*yp2[n-1] + ... + c[n]*yp2[0],\n\n  storing results at {ep,2}, {ep+2,2} respectively.\n */\nmp_limb_t\nmpn_add_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,\n                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,\n                mp_size_t n, mp_limb_t cy)\n{\n  mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));\n  ASSERT (!MPN_OVERLAP_P (rp, n, yp1, n));\n  ASSERT (!MPN_OVERLAP_P (rp, n, yp2, n));\n\n  /* FIXME: first addition into eh:el is redundant */\n\n  yp1 += n - 1;\n  yp2 += n - 1;\n  el1 = eh1 = 0;\n  el2 = eh2 = 0;\n\n  do\n    {\n      yl1 = *yp1--;\n      yl2 = *yp2--;\n      ul = *up++;\n      vl = *vp++;\n\n      /* ordinary add_n */\n      ADDC_LIMB (cy1, sl, ul, vl);\n      ADDC_LIMB (cy2, rl, sl, cy);\n      cy = cy1 | cy2;\n      *rp++ = rl;\n\n      /* update (eh1:el1) */\n      zl1 = cy ? yl1 : 0;\n      el1 += zl1;\n      eh1 += el1 < zl1;\n\n      /* update (eh2:el2) */\n      zl2 = cy ? yl2 : 0;\n      el2 += zl2;\n      eh2 += el2 < zl2;\n    }\n  while (--n != 0);\n\n#if GMP_NAIL_BITS != 0\n  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);\n  el1 &= GMP_NUMB_MASK;\n  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);\n  el2 &= GMP_NUMB_MASK;\n#endif\n\n  ep[0] = el1;\n  ep[1] = eh1;\n  ep[2] = el2;\n  ep[3] = eh2;\n\n  return cy;\n}\n"
  },
  {
    "path": "mpn/generic/add_n.c",
    "content": "/* mpn_add_n -- Add equal length limb vectors.\n\nCopyright 1992, 1993, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n#if GMP_NAIL_BITS == 0\n\nmp_limb_t\nmpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));\n\n  cy = 0;\n  do\n    {\n      ul = *up++;\n      vl = *vp++;\n      sl = ul + vl;\n      cy1 = sl < ul;\n      rl = sl + cy;\n      cy2 = rl < sl;\n      cy = cy1 | cy2;\n      *rp++ = rl;\n    }\n  while (--n != 0);\n\n  return cy;\n}\n\n#endif\n\n#if GMP_NAIL_BITS >= 1\n\nmp_limb_t\nmpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  mp_limb_t ul, vl, rl, cy;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));\n\n  cy = 0;\n  do\n    {\n      ul = *up++;\n      vl = *vp++;\n      rl = ul + vl + cy;\n      cy = rl >> GMP_NUMB_BITS;\n      *rp++ = rl & GMP_NUMB_MASK;\n    }\n  while (--n != 0);\n\n  return cy;\n}\n\n#endif\n"
  },
  {
    "path": "mpn/generic/addadd_n.c",
    "content": "/*   Copyright 2012 The Code cavern\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmp_limb_t mpn_addadd_n(mp_ptr t, mp_srcptr x, mp_srcptr y, mp_srcptr z, mp_size_t n)\n{\n   mp_limb_t ret;\n   mp_srcptr a = x, b = y, c = z;\n\n   ASSERT(n > 0);\n   ASSERT_MPN(x, n);\n   ASSERT_MPN(y, n);\n   ASSERT_MPN(z, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(t, x, n));\n   ASSERT(MPN_SAME_OR_SEPARATE_P(t, y, n));\n   ASSERT(MPN_SAME_OR_SEPARATE_P(t, z, n));\n\n   if(t == x)\n   {\n      if (t == y)\n      {\n         if (t == z)\n         {\n#ifdef HAVE_NATIVE_mpn_addlsh1_n\n            return mpn_addlsh1_n(t, x, y, n);\n#else\n            return mpn_mul_1(t, x, n, 3);\n#endif  \n         }\n      }\n      else\n      {\n         MP_SRCPTR_SWAP(b, c);\n      }\n   }\n   else\n   {\n      MP_SRCPTR_SWAP(a, c);\n      \n      if (t == y)\n         MP_SRCPTR_SWAP(a, b);\n   }\n\n   ret = mpn_add_n(t, a, b, n);\n   \n   return ret + mpn_add_n(t, t, c, n);\n}\n"
  },
  {
    "path": "mpn/generic/addmul_1.c",
    "content": "/* mpn_addmul_1 -- multiply the N long limb vector pointed to by UP by VL,\n   add the N least significant limbs of the product to the limb vector\n   pointed to by RP.  Return the most significant limb of the product,\n   adjusted for carry-out from the addition.\n\nCopyright 1992, 1993, 1994, 1996, 2000, 2002, 2004 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n#if GMP_NAIL_BITS == 0\n\nmp_limb_t\nmpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)\n{\n  mp_limb_t ul, cl, hpl, lpl, rl;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n\n  cl = 0;\n  do\n    {\n      ul = *up++;\n      umul_ppmm (hpl, lpl, ul, vl);\n\n      lpl += cl;\n      cl = (lpl < cl) + hpl;\n\n      rl = *rp;\n      lpl = rl + lpl;\n      cl += lpl < rl;\n      *rp++ = lpl;\n    }\n  while (--n != 0);\n\n  return cl;\n}\n\n#endif\n\n#if GMP_NAIL_BITS == 1\n\nmp_limb_t\nmpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)\n{\n  mp_limb_t shifted_vl, ul, rl, lpl, hpl, prev_hpl, cl, xl, c1, c2, c3;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n  ASSERT_MPN (rp, n);\n  ASSERT_MPN (up, n);\n  ASSERT_LIMB (vl);\n\n  shifted_vl = vl << GMP_NAIL_BITS;\n  cl = 0;\n  prev_hpl = 0;\n  do\n    {\n      ul = *up++;\n      rl = *rp;\n      umul_ppmm (hpl, lpl, ul, shifted_vl);\n      lpl >>= GMP_NAIL_BITS;\n      ADDC_LIMB (c1, xl, prev_hpl, lpl);\n      ADDC_LIMB (c2, xl, xl, rl);\n      ADDC_LIMB (c3, xl, xl, cl);\n      cl = c1 + c2 + c3;\n      *rp++ = xl;\n      prev_hpl = hpl;\n    }\n  while (--n != 0);\n\n  return prev_hpl + cl;\n}\n\n#endif\n\n#if GMP_NAIL_BITS >= 2\n\nmp_limb_t\nmpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)\n{\n  mp_limb_t shifted_vl, ul, rl, lpl, hpl, prev_hpl, xw, cl, xl;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n  ASSERT_MPN (rp, n);\n  ASSERT_MPN (up, n);\n  ASSERT_LIMB (vl);\n\n  shifted_vl = vl << GMP_NAIL_BITS;\n  cl = 0;\n  prev_hpl = 0;\n  do\n    {\n      ul = *up++;\n      rl = *rp;\n      umul_ppmm (hpl, lpl, ul, shifted_vl);\n      lpl >>= GMP_NAIL_BITS;\n      xw = prev_hpl + lpl + rl + cl;\n      cl = xw >> GMP_NUMB_BITS;\n      xl = xw & GMP_NUMB_MASK;\n      *rp++ = xl;\n      prev_hpl = hpl;\n    }\n  while (--n != 0);\n\n  return prev_hpl + cl;\n}\n\n#endif\n"
  },
  {
    "path": "mpn/generic/addsub_n.c",
    "content": "/*   Copyright 2012 The Code cavern\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* t = x + y - z */\nint mpn_addsub_n(mp_ptr t, mp_srcptr x, mp_srcptr y, mp_srcptr z, mp_size_t n)\n{\n   mp_limb_t ret;\n\n   ASSERT(n > 0);\n   ASSERT_MPN(x, n);\n   ASSERT_MPN(y, n);\n   ASSERT_MPN(z, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(t, x, n));\n   ASSERT(MPN_SAME_OR_SEPARATE_P(t, y, n));\n   ASSERT(MPN_SAME_OR_SEPARATE_P(t, z, n));\n\n   if (t == x && t == y && t == z)\n      return 0;\n\n   if (t == x && t == y)\n   {\n      ret = mpn_add_n(t, x, y, n);\n      ret -= mpn_sub_n(t, t, z, n);\n   \n      return ret;\n   }\n\n   if (t == x && t == z)\n   {\n      ret = -mpn_sub_n(t, x, z, n);\n      ret += mpn_add_n(t, t, y, n);\n   \n      return ret;\n   }\n\n   if (t == y && t == z)\n   {\n      ret = -mpn_sub_n(t, y, z, n);\n      ret += mpn_add_n(t, t, x, n);\n   \n      return ret;\n   }\n\n   if (t == x)\n   {\n      ret = mpn_add_n(t, x, y, n);\n      ret -= mpn_sub_n(t, t, z, n);\n   \n      return ret;\n   }\n\n   if (t == y)\n   {\n      ret = mpn_add_n(t, y, x, n);\n      ret -= mpn_sub_n(t, t, z, n);\n    \n      return ret;\n   }\n\n   if (t == z)\n   {\n      ret = -mpn_sub_n(t, x, z, n);\n      ret += mpn_add_n(t, t, y, n);\n   \n      return ret;\n   }\n\n   ret = mpn_add_n(t, x, y, n);\n   ret -= mpn_sub_n(t, t, z, n);\n\n   return ret;\n}\n"
  },
  {
    "path": "mpn/generic/and_n.c",
    "content": "/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#define OPERATION_and_n\t1\n\n#ifndef _MSC_VER\n\n#ifdef OPERATION_and_n\n#define func __MPN(and_n)\n#define call mpn_and_n\n#endif\n\n#ifdef OPERATION_andn_n\n#define func __MPN(andn_n)\n#define call mpn_andn_n\n#endif\n\n#ifdef OPERATION_nand_n\n#define func __MPN(nand_n)\n#define call mpn_nand_n\n#endif\n\n#ifdef OPERATION_ior_n\n#define func __MPN(ior_n)\n#define call mpn_ior_n\n#endif\n\n#ifdef OPERATION_iorn_n\n#define func __MPN(iorn_n)\n#define call mpn_iorn_n\n#endif\n\n#ifdef OPERATION_nior_n\n#define func __MPN(nior_n)\n#define call mpn_nior_n\n#endif\n\n#ifdef OPERATION_xor_n\n#define func __MPN(xor_n)\n#define call mpn_xor_n\n#endif\n\n#ifdef OPERATION_xnor_n\n#define func __MPN(xnor_n)\n#define call mpn_xnor_n\n#endif\n\nvoid\nfunc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  call (rp, up, vp, n);\n}\n\n#else\n\n#define _logicop(x) void __MPN(x ## _n)(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mpn_ ## x ## _n(rp, up, vp, n); }\n\n_logicop(and)\n\n#endif\n"
  },
  {
    "path": "mpn/generic/andn_n.c",
    "content": "/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#define OPERATION_andn_n\t1\n\n#ifndef _MSC_VER\n\n#ifdef OPERATION_and_n\n#define func __MPN(and_n)\n#define call mpn_and_n\n#endif\n\n#ifdef OPERATION_andn_n\n#define func __MPN(andn_n)\n#define call mpn_andn_n\n#endif\n\n#ifdef OPERATION_nand_n\n#define func __MPN(nand_n)\n#define call mpn_nand_n\n#endif\n\n#ifdef OPERATION_ior_n\n#define func __MPN(ior_n)\n#define call mpn_ior_n\n#endif\n\n#ifdef OPERATION_iorn_n\n#define func __MPN(iorn_n)\n#define call mpn_iorn_n\n#endif\n\n#ifdef OPERATION_nior_n\n#define func __MPN(nior_n)\n#define call mpn_nior_n\n#endif\n\n#ifdef OPERATION_xor_n\n#define func __MPN(xor_n)\n#define call mpn_xor_n\n#endif\n\n#ifdef OPERATION_xnor_n\n#define func __MPN(xnor_n)\n#define call mpn_xnor_n\n#endif\n\nvoid\nfunc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  call (rp, up, vp, n);\n}\n\n#else\n\n#define _logicop(x) void __MPN(x ## _n)(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mpn_ ## x ## _n(rp, up, vp, n); }\n\n_logicop(andn)\n\n#endif\n"
  },
  {
    "path": "mpn/generic/bdivmod.c",
    "content": "/* mpn/bdivmod.c: mpn_bdivmod for computing U/V mod 2^d.\n\nCopyright 1991, 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/* q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d).\n\n   Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and\n   returns the high d%BITS_PER_MP_LIMB bits of Q as the result.\n\n   Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up.  Since the\n   low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows\n   the limb vectors at qp to overwrite the low limbs at up, provided qp <= up.\n\n   Preconditions:\n   1.  V is odd.\n   2.  usize * BITS_PER_MP_LIMB >= d.\n   3.  If Q and U overlap, qp <= up.\n\n   Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)\n\n   Funding for this work has been partially provided by Conselho Nacional\n   de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant\n   301314194-2, and was done while I was a visiting reseacher in the Instituto\n   de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).\n\n   References:\n       T. Jebelean, An algorithm for exact division, Journal of Symbolic\n       Computation, v. 15, 1993, pp. 169-180.\n\n       K. Weber, The accelerated integer GCD algorithm, ACM Transactions on\n       Mathematical Software, v. 21 (March), 1995, pp. 111-122.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\nmp_limb_t\nmpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize,\n\t     mp_srcptr vp, mp_size_t vsize, mpir_ui d)\n{\n  mp_limb_t v_inv;\n\n  ASSERT (usize >= 1);\n  ASSERT (vsize >= 1);\n  ASSERT (usize * GMP_NUMB_BITS >= d);\n  ASSERT (! MPN_OVERLAP_P (up, usize, vp, vsize));\n  ASSERT (! MPN_OVERLAP_P (qp, d/GMP_NUMB_BITS, vp, vsize));\n  ASSERT (MPN_SAME_OR_INCR2_P (qp, d/GMP_NUMB_BITS, up, usize));\n  ASSERT_MPN (up, usize);\n  ASSERT_MPN (vp, vsize);\n\n  /* 1/V mod 2^GMP_NUMB_BITS. */\n  modlimb_invert (v_inv, vp[0]);\n\n  /* Fast code for two cases previously used by the accel part of mpn_gcd.\n     (Could probably remove this now it's inlined there.) */\n  if (usize == 2 && vsize == 2 &&\n      (d == GMP_NUMB_BITS || d == 2*GMP_NUMB_BITS))\n    {\n      mp_limb_t hi, lo;\n      mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;\n      umul_ppmm (hi, lo, q, vp[0] << GMP_NAIL_BITS);\n      up[0] = 0;\n      up[1] -= hi + q*vp[1];\n      qp[0] = q;\n      if (d == 2*GMP_NUMB_BITS)\n        {\n          q = (up[1] * v_inv) & GMP_NUMB_MASK;\n          up[1] = 0;\n          qp[1] = q;\n        }\n      return 0;\n    }\n\n  /* Main loop.  */\n  while (d >= GMP_NUMB_BITS)\n    {\n      mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;\n      mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);\n      if (usize > vsize)\n\tmpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);\n      d -= GMP_NUMB_BITS;\n      up += 1, usize -= 1;\n      *qp++ = q;\n    }\n\n  if (d)\n    {\n      mp_limb_t b;\n      mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1);\n      if (q <= 1)\n\t{\n\t  if (q == 0)\n\t    return 0;\n\t  else\n\t    b = mpn_sub_n (up, up, vp, MIN (usize, vsize));\n\t}\n      else\n\tb = mpn_submul_1 (up, vp, MIN (usize, vsize), q);\n\n      if (usize > vsize)\n\tmpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);\n      return q;\n    }\n\n  return 0;\n}\n"
  },
  {
    "path": "mpn/generic/binvert.c",
    "content": "/* Compute {up,n}^(-1) mod B^n.\n\n   Contributed to the GNU project by Torbjorn Granlund.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright (C) 2004-2007, 2009, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/*\n  r[k+1] = r[k] - r[k] * (u*r[k] - 1)\n  r[k+1] = r[k] + r[k] - r[k]*(u*r[k])\n*/\n\n#if TUNE_PROGRAM_BUILD\n#define NPOWS \\\n ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))\n#else\n#define NPOWS \\\n ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (BINV_NEWTON_THRESHOLD))\n#endif\n\nmp_size_t\nmpn_binvert_itch (mp_size_t n)\n{\n  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (n);\n  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, n, (n + 1) >> 1);\n  return itch_local + itch_out;\n}\n\nvoid\nmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)\n{\n  mp_ptr xp;\n  mp_size_t rn, newrn;\n  mp_size_t sizes[NPOWS], *sizp;\n  mp_limb_t di;\n\n  /* Compute the computation precisions from highest to lowest, leaving the\n     base case size in 'rn'.  */\n  sizp = sizes;\n  for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1)\n    *sizp++ = rn;\n\n  xp = scratch;\n\n  /* Compute a base value of rn limbs.  */\n  MPN_ZERO (xp, rn);\n  xp[0] = 1;\n  /* JPF: GMP goes the other way around and has renamed to binvert_limb */\n  modlimb_invert (di, up[0]);\n  /* JPF: GMP vs MPIR diff; MPIR returns a two limbs overflow and use sub rather than add */\n  if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD))\n    mpn_sb_bdiv_q (rp, xp+rn, xp, rn, up, rn, di);\n  else\n    mpn_dc_bdiv_q (rp, xp, rn, up, rn, di);\n\n  /* Use Newton iterations to get the desired precision.  */\n  if (rn == n)\n    return;\n  newrn = *--sizp;\n  for (; newrn < n;)\n    {\n      mp_size_t m;\n\n      /* X <- UR. */\n      m = mpn_mulmod_bnm1_next_size (newrn);\n      mpn_mulmod_bnm1 (xp, m, up, newrn, rp, rn, xp + m);\n      mpn_sub_1 (xp + m, xp, rn - (m - newrn), 1);\n\n      /* R = R(X/B^rn) */\n      mpn_mullow_n (rp + rn, rp, xp + rn, newrn - rn);\n      mpn_neg (rp + rn, rp + rn, newrn - rn);\n\n      rn = newrn;\n      newrn = *--sizp;\n    }\n  /* Last iteration would overflow in the mullow call */\n    {\n      mp_size_t m;\n\n      /* X <- UR. */\n      m = mpn_mulmod_bnm1_next_size (newrn);\n      mpn_mulmod_bnm1 (xp, m, up, newrn, rp, rn, xp + m);\n      mpn_sub_1 (xp + m, xp, rn - (m - newrn), 1);\n\n      /* R = R(X/B^rn) */\n      mpn_mullow_n (xp + newrn, rp, xp + rn, newrn - rn); /* JPF: would overflow */\n      /* At most we need 2*(newrn - rn) limbs at xp + newrn, so need 3*newrn - 2*rn */\n      /* As 2rn > newrn, and n == newrn, this gives at max 2*n for xp */\n      /* which we already ensure */\n      mpn_neg (rp + rn, xp + newrn, newrn - rn);\n    }\n}\n"
  },
  {
    "path": "mpn/generic/cmp.c",
    "content": "/* mpn_cmp -- Compare two low-level natural-number integers.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpn_cmp 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpn/generic/com_n.c",
    "content": "/* mpn_com - complement an mpn.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef mpn_com_n\n#define mpn_com_n __MPN(com_n)\n\nvoid\nmpn_com_n (mp_ptr rp, mp_srcptr up, mp_size_t n)\n{\n  mp_limb_t ul;\n  do {\n      ul = *up++;\n      *rp++ = ~ul & GMP_NUMB_MASK;\n  } while (--n != 0);\n}\n"
  },
  {
    "path": "mpn/generic/comb_tables.c",
    "content": "/* Const tables shared among combinatoric functions.\n\n   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO\n   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.\n\nCopyright 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if GMP_LIMB_BITS == 64\n\n/* This table is 0!,1!,2!/2,3!/2,...,n!/2^sn where n!/2^sn is an */\n/* odd integer for each n, and n!/2^sn has <= GMP_NUMB_BITS bits */\n#define ONE_LIMB_ODD_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x3),CNST_LIMB(0x3),CNST_LIMB(0xf),CNST_LIMB(0x2d),CNST_LIMB(0x13b),CNST_LIMB(0x13b),CNST_LIMB(0xb13),CNST_LIMB(0x375f),CNST_LIMB(0x26115),CNST_LIMB(0x7233f),CNST_LIMB(0x5cca33),CNST_LIMB(0x2898765),CNST_LIMB(0x260eeeeb),CNST_LIMB(0x260eeeeb),CNST_LIMB(0x286fddd9b),CNST_LIMB(0x16beecca73),CNST_LIMB(0x1b02b930689),CNST_LIMB(0x870d9df20ad),CNST_LIMB(0xb141df4dae31),CNST_LIMB(0x79dd498567c1b),CNST_LIMB(0xaf2e19afc5266d),CNST_LIMB(0x20d8a4d0f4f7347),CNST_LIMB(0x335281867ec241ef)\n\n/* Previous table, continued, values modulo 2^GMP_NUMB_BITS */\n#define ONE_LIMB_ODD_FACTORIAL_EXTTABLE CNST_LIMB(0x9b3093d46fdd5923),CNST_LIMB(0x5e1f9767cc5866b1),CNST_LIMB(0x92dd23d6966aced7),CNST_LIMB(0xa30d0f4f0a196e5b),CNST_LIMB(0x8dc3e5a1977d7755),CNST_LIMB(0x2ab8ce915831734b),CNST_LIMB(0x2ab8ce915831734b),CNST_LIMB(0x81d2a0bc5e5fdcab),CNST_LIMB(0x9efcac82445da75b),CNST_LIMB(0xbc8b95cf58cde171),CNST_LIMB(0xa0e8444a1f3cecf9),CNST_LIMB(0x4191deb683ce3ffd),CNST_LIMB(0xddd3878bc84ebfc7),CNST_LIMB(0xcb39a64b83ff3751),CNST_LIMB(0xf8203f7993fc1495),CNST_LIMB(0xbd2a2a78b35f4bdd),CNST_LIMB(0x84757be6b6d13921),CNST_LIMB(0x3fbbcfc0b524988b),CNST_LIMB(0xbd11ed47c8928df9),CNST_LIMB(0x3c26b59e41c2f4c5),CNST_LIMB(0x677a5137e883fdb3),CNST_LIMB(0xff74e943b03b93dd),CNST_LIMB(0xfe5ebbcb10b2bb97),CNST_LIMB(0xb021f1de3235e7e7),CNST_LIMB(0x33509eb2e743a58f),CNST_LIMB(0x390f9da41279fb7d),CNST_LIMB(0xe5cb0154f031c559),CNST_LIMB(0x93074695ba4ddb6d),CNST_LIMB(0x81c471caa636247f),CNST_LIMB(0xe1347289b5a1d749),CNST_LIMB(0x286f21c3f76ce2ff),CNST_LIMB(0xbe84a2173e8ac7),CNST_LIMB(0x1595065ca215b88b),CNST_LIMB(0xf95877595b018809),CNST_LIMB(0x9c2efe3c5516f887),CNST_LIMB(0x373294604679382b),CNST_LIMB(0xaf1ff7a888adcd35),CNST_LIMB(0x18ddf279a2c5800b),CNST_LIMB(0x18ddf279a2c5800b),CNST_LIMB(0x505a90e2542582cb),CNST_LIMB(0x5bacad2cd8d5dc2b),CNST_LIMB(0xfe3152bcbff89f41)\n\n/* This table is 1!!,3!!,...,(2n+1)!! where (2n+1)!! has <= GMP_NUMB_BITS bits */\n#define ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x3),CNST_LIMB(0xf),CNST_LIMB(0x69),CNST_LIMB(0x3b1),CNST_LIMB(0x289b),CNST_LIMB(0x20fdf),CNST_LIMB(0x1eee11),CNST_LIMB(0x20dcf21),CNST_LIMB(0x27065f73),CNST_LIMB(0x33385d46f),CNST_LIMB(0x49a10615f9),CNST_LIMB(0x730b9982551),CNST_LIMB(0xc223930bef8b),CNST_LIMB(0x15fe07a85a22bf),CNST_LIMB(0x2a9c2ed62ea3521),CNST_LIMB(0x57e22099c030d941)\n\n/* This table contains 2n-popc(2n) for small n */\n\n/* It begins with 2-1=1 (n=1) */\n#define TABLE_2N_MINUS_POPC_2N 1,3,4,7,8,10,11,15,16,18,19,22,23,25,26,31,32,34,35,38,39,41,42,46,47,49,50,53,54,56,57,63,64,66,67,70,71,73,74,78\n\n/* This table x_1, x_2,... contains values s.t. x_n^n has <= GMP_NUMB_BITS bits */\n#define NTH_ROOT_NUMB_MASK_TABLE (GMP_NUMB_MASK),CNST_LIMB(0xffffffff),CNST_LIMB(0x285145),CNST_LIMB(0xffff),CNST_LIMB(0x1bdb),CNST_LIMB(0x659),CNST_LIMB(0x235),CNST_LIMB(0xff)\n\n#else\n\n/* This table is 0!,1!,2!/2,3!/2,...,n!/2^sn where n!/2^sn is an */\n/* odd integer for each n, and n!/2^sn has <= GMP_NUMB_BITS bits */\n#define ONE_LIMB_ODD_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x3),CNST_LIMB(0x3),CNST_LIMB(0xf),CNST_LIMB(0x2d),CNST_LIMB(0x13b),CNST_LIMB(0x13b),CNST_LIMB(0xb13),CNST_LIMB(0x375f),CNST_LIMB(0x26115),CNST_LIMB(0x7233f),CNST_LIMB(0x5cca33),CNST_LIMB(0x2898765),CNST_LIMB(0x260eeeeb),CNST_LIMB(0x260eeeeb)\n\n/* Previous table, continued, values modulo 2^GMP_NUMB_BITS */\n#define ONE_LIMB_ODD_FACTORIAL_EXTTABLE CNST_LIMB(0x86fddd9b),CNST_LIMB(0xbeecca73),CNST_LIMB(0x2b930689),CNST_LIMB(0xd9df20ad),CNST_LIMB(0xdf4dae31),CNST_LIMB(0x98567c1b),CNST_LIMB(0xafc5266d),CNST_LIMB(0xf4f7347),CNST_LIMB(0x7ec241ef),CNST_LIMB(0x6fdd5923),CNST_LIMB(0xcc5866b1),CNST_LIMB(0x966aced7),CNST_LIMB(0xa196e5b),CNST_LIMB(0x977d7755),CNST_LIMB(0x5831734b),CNST_LIMB(0x5831734b),CNST_LIMB(0x5e5fdcab),CNST_LIMB(0x445da75b)\n\n/* This table is 1!!,3!!,...,(2n+1)!! where (2n+1)!! has <= GMP_NUMB_BITS bits */\n#define ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x3),CNST_LIMB(0xf),CNST_LIMB(0x69),CNST_LIMB(0x3b1),CNST_LIMB(0x289b),CNST_LIMB(0x20fdf),CNST_LIMB(0x1eee11),CNST_LIMB(0x20dcf21),CNST_LIMB(0x27065f73)\n\n/* This table contains 2n-popc(2n) for small n */\n\n/* It begins with 2-1=1 (n=1) */\n#define TABLE_2N_MINUS_POPC_2N 1,3,4,7,8,10,11,15,16,18,19,22,23,25,26,31,32,34,35,38,39,41,42,46\n\n/* This table x_1, x_2,... contains values s.t. x_n^n has <= GMP_NUMB_BITS bits */\n#define NTH_ROOT_NUMB_MASK_TABLE (GMP_NUMB_MASK),CNST_LIMB(0xffff),CNST_LIMB(0x659),CNST_LIMB(0xff),CNST_LIMB(0x54),CNST_LIMB(0x28),CNST_LIMB(0x17),CNST_LIMB(0xf)\n\n#endif\n\n/* Entry i contains (i!/2^t) where t is chosen such that the parenthesis\n   is an odd integer. */\nconst mp_limb_t __gmp_oddfac_table[] = { ONE_LIMB_ODD_FACTORIAL_TABLE, ONE_LIMB_ODD_FACTORIAL_EXTTABLE };\n\n/* Entry i contains ((2i+1)!!/2^t) where t is chosen such that the parenthesis\n   is an odd integer. */\nconst mp_limb_t __gmp_odd2fac_table[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE };\n\n/* Entry i contains 2i-popc(2i). */\nconst unsigned char __gmp_fac2cnt_table[] = { TABLE_2N_MINUS_POPC_2N };\n\nconst mp_limb_t __gmp_limbroots_table[] = { NTH_ROOT_NUMB_MASK_TABLE };\n"
  },
  {
    "path": "mpn/generic/copyd.c",
    "content": "/* Fat binary fallback mpn_copyd\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid mpn_copyd(mp_ptr rp, mp_srcptr sp, mp_size_t n)\n{\n\tMPN_COPY_DECR(rp, sp, n);\n}\n"
  },
  {
    "path": "mpn/generic/copyi.c",
    "content": "/* Fat binary fallback mpn_copyi\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid mpn_copyi(mp_ptr rp, mp_srcptr sp, mp_size_t n)\n{\n\tMPN_COPY_INCR(rp, sp, n);\n}\n"
  },
  {
    "path": "mpn/generic/dc_bdiv_q.c",
    "content": "/* mpn_dc_bdiv_q -- divide-and-conquer Hensel division with precomputed\n   inverse, returning quotient.\n\n   Contributed to the GNU project by Niels Möller and Torbjorn Granlund.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.\n\nCopyright 2010 William Hart (minor modifications)\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* Computes Q = N / D mod B^nn, destroys N.\n\n   N = {np,nn}\n   D = {dp,dn}\n*/\n\nvoid\nmpn_dc_bdiv_q (mp_ptr qp,\n\t\t  mp_ptr np, mp_size_t nn,\n\t\t  mp_srcptr dp, mp_size_t dn,\n\t\t  mp_limb_t dinv)\n{\n  mp_size_t qn;\n  mp_limb_t cy, wp[2];\n  mp_ptr tp;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  ASSERT (dn >= 6);\n  ASSERT (nn - dn >= 0);\n  ASSERT (dp[0] & 1);\n\n  tp = TMP_ALLOC_LIMBS (MAX(dn, DC_BDIV_Q_N_ITCH(dn)));\n\n  qn = nn;\n\n  if (qn > dn)\n    {\n      /* Reduce qn mod dn in a super-efficient manner.  */\n      do\n\tqn -= dn;\n      while (qn > dn);\n\n      /* Perform the typically smaller block first.  */\n      if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))\n\tcy = mpn_sb_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);\n      else\n\tcy = mpn_dc_bdiv_qr_n (qp, np, dp, qn, dinv, tp);\n\n      if (qn != dn)\n\t{\n\t  if (qn > dn - qn)\n\t    mpn_mul (tp, qp, qn, dp + qn, dn - qn);\n\t  else\n\t    mpn_mul (tp, dp + qn, dn - qn, qp, qn);\n\t  mpn_incr_u (tp + qn, cy);\n\n\t  mpn_sub (np + qn, np + qn, nn - qn, tp, dn);\n\t  cy = 0;\n\t}\n\n      np += qn;\n      qp += qn;\n\n      qn = nn - qn;\n      while (qn > dn)\n\t{\n\t  mpn_sub_1 (np + dn, np + dn, qn, cy);\n\t  cy = mpn_dc_bdiv_qr_n (qp, np, dp, dn, dinv, tp);\n\t  qp += dn;\n\t  np += dn;\n\t  qn -= dn;\n\t}\n      mpn_dc_bdiv_q_n (qp, wp, np, dp, dn, dinv, tp);\n    }\n  else\n    {\n      if (BELOW_THRESHOLD (qn, DC_BDIV_Q_THRESHOLD))\n\tmpn_sb_bdiv_q (qp, wp, np, qn, dp, qn, dinv);\n      else\n\tmpn_dc_bdiv_q_n (qp, wp, np, dp, qn, dinv, tp);\n    }\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpn/generic/dc_bdiv_q_n.c",
    "content": "/* mpn_dc_bdiv_q_n -- binary division, producing quotient and overflow,\n                        divide-and-conquer algorithm\n\nCopyright (C) 2009, David Harvey\nCopyright (C) 2010, William Hart.\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/*\n  Computes {np, n} / {dp, n} mod B^n, using divide-and-conquer\n  algorithm, switching to classical for n <= BDIV_Q_DC_THRESHOLD.\n\n  Also computes a 2 limb \"overflow\". See sb_bdiv_q.c for a definition.\n\n  scratch is workspace.\n*/\nvoid\nmpn_dc_bdiv_q_n (mp_ptr qp, mp_ptr wp, mp_ptr np, mp_srcptr dp,\n\t\t   mp_size_t n, mp_limb_t dinv, mp_ptr scratch)\n{\n  mp_size_t s, t;\n  mp_limb_t cy;\n\n  ASSERT (n >= 6);\n  ASSERT (! MPN_OVERLAP_P (qp, n, np, n));\n  ASSERT (! MPN_OVERLAP_P (qp, n, dp, n));\n  ASSERT (! MPN_OVERLAP_P (wp, 2, np, n));\n  ASSERT (! MPN_OVERLAP_P (wp, 2, dp, n));\n  ASSERT (! MPN_OVERLAP_P (np, n, dp, n));\n\n  /*\n    Example with s = 4, t = 3, n = 7:\n\n         C\n         C C\n         C C C\n  qp  .  A B B B\n      .  A A B B B\n      1  A A A B B B\n      0  A A A A B B B\n         0 1 ...\n           dp\n  */\n\n  t = n / 2;    /*  t = floor(n/2)  */\n  s = n - t;    /*  s = ceil(n/2)   */\n\n  /*  recurse into low half of quotient (region A)  */\n  if (s <= DC_BDIV_Q_THRESHOLD)\n    mpn_sb_bdiv_q (qp, wp, np, s, dp, s, dinv);\n  else\n    mpn_dc_bdiv_q_n (qp, wp, np, dp, s, dinv, scratch);\n\n  /*  remove region B and overflow from A from N\n      (if n odd, do first row of B separately --- we could have used\n      mpn_mulmid, but this saves some logic) */\n  mpn_mulmid_n (scratch, dp + 1, qp + (n & 1), t);\n  if (n & 1)\n    {\n      cy = mpn_addmul_1 (scratch, dp + s, t, qp[0]);\n      MPN_INCR_U (scratch + t, 2, cy);\n    }\n  ADDC_LIMB (cy, scratch[0], scratch[0], wp[0]);      /* overflow from A */\n  MPN_INCR_U (scratch + 1, t + 1, wp[1] + cy);\n  cy = mpn_sub_n (np + s, np + s, scratch, t);\n  MPN_INCR_U (scratch + t, 2, cy);\n\n  /*  recurse into top half of quotient (region C)\n      (this does not overwrite {scratch + t, 2}, because n >= 6 implies\n      t >= 3 implies floor(t/2) + 2 <= t) */\n  if (t <= DC_BDIV_Q_THRESHOLD)\n    mpn_sb_bdiv_q (qp + s, wp, np + s, t, dp, t, dinv);\n  else\n    mpn_dc_bdiv_q_n (qp + s, wp, np + s, dp, t, dinv, scratch);\n\n  /*  combine overflows from B and C  */\n  ADDC_LIMB (cy, wp[0], wp[0], scratch[t]);\n  wp[1] += scratch[t + 1] + cy;\n}\n"
  },
  {
    "path": "mpn/generic/dc_bdiv_qr.c",
    "content": "/* mpn_dc_bdiv_qr -- divide-and-conquer Hensel division with precomputed\n   inverse, returning quotient and remainder.\n\n   Contributed to the GNU project by Niels Möller and Torbjorn Granlund.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.\n\nCopyright 2010 William Hart (minor modifications)\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* Computes Hensel binary division of {np, 2*n} by {dp, n}.\n\n   Output:\n\n      q = n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},\n\n      r = (n - q * d) * 2^{-qn * GMP_NUMB_BITS}\n\n   Stores q at qp. Stores the n least significant limbs of r at the high half\n   of np, and returns the borrow from the subtraction n - q*d.\n\n   d must be odd. dinv is d^-1 mod 2^GMP_NUMB_BITS. \n*/\nmp_limb_t\nmpn_dc_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,\n\t\t   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)\n{\n  mp_size_t qn;\n  mp_limb_t rr, cy;\n  mp_ptr tp;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  ASSERT (dn >= 2);\t\t/* to adhere to mpn_sbpi1_div_qr's limits */\n  ASSERT (nn - dn >= 1);\t/* to adhere to mpn_sbpi1_div_qr's limits */\n  ASSERT (dp[0] & 1);\n\n  tp = TMP_ALLOC_LIMBS (dn);\n\n  qn = nn - dn;\n\n  if (qn > dn)\n    {\n      /* Reduce qn mod dn without division, optimizing small operations.  */\n      do\n\tqn -= dn;\n      while (qn > dn);\n\n      /* Perform the typically smaller block first.  */\n      if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))\n\tcy = mpn_sb_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);\n      else\n\tcy = mpn_dc_bdiv_qr_n (qp, np, dp, qn, dinv, tp);\n\n      rr = 0;\n      if (qn != dn)\n\t{\n\t  if (qn > dn - qn)\n\t    mpn_mul (tp, qp, qn, dp + qn, dn - qn);\n\t  else\n\t    mpn_mul (tp, dp + qn, dn - qn, qp, qn);\n\t  mpn_incr_u (tp + qn, cy);\n\n\t  rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);\n\t  cy = 0;\n\t}\n\n      np += qn;\n      qp += qn;\n\n      qn = nn - dn - qn;\n      do\n\t{\n\t  rr += mpn_sub_1 (np + dn, np + dn, qn, cy);\n\t  cy = mpn_dc_bdiv_qr_n (qp, np, dp, dn, dinv, tp);\n\t  qp += dn;\n\t  np += dn;\n\t  qn -= dn;\n\t}\n      while (qn > 0);\n      TMP_FREE;\n      return rr + cy;\n    }\n\n  if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))\n    cy = mpn_sb_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);\n  else\n    cy = mpn_dc_bdiv_qr_n (qp, np, dp, qn, dinv, tp);\n\n  rr = 0;\n  if (qn != dn)\n    {\n      if (qn > dn - qn)\n\tmpn_mul (tp, qp, qn, dp + qn, dn - qn);\n      else\n\tmpn_mul (tp, dp + qn, dn - qn, qp, qn);\n      mpn_incr_u (tp + qn, cy);\n\n      rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);\n      cy = 0;\n    }\n\n  TMP_FREE;\n  return rr + cy;\n}\n"
  },
  {
    "path": "mpn/generic/dc_bdiv_qr_n.c",
    "content": "/* mpn_dc_bdiv_qr -- divide-and-conquer Hensel division with precomputed\n   inverse, returning quotient and remainder.\n\n   Contributed to the GNU project by Niels Möller and Torbjorn Granlund.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.\n\nCopyright 2010 William Hart (minor modifications)\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* Computes Hensel binary division of {np, 2*n} by {dp, n}.\n\n   Output:\n\n      q = n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},\n\n      r = (n - q * d) * 2^{-qn * GMP_NUMB_BITS}\n\n   Stores q at qp. Stores the n least significant limbs of r at the high half\n   of np, and returns the borrow from the subtraction n - q*d.\n\n   d must be odd. dinv is d^-1 mod 2^GMP_NUMB_BITS. \n*/\nmp_limb_t\nmpn_dc_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,\n\t\t     mp_limb_t dinv, mp_ptr tp)\n{\n  mp_size_t lo, hi;\n  mp_limb_t cy;\n  mp_limb_t rh;\n\n  lo = n >> 1;\t/* floor(n/2) */\n  hi = n - lo;\t/* ceil(n/2) */\n\n  if (BELOW_THRESHOLD (lo, DC_BDIV_QR_THRESHOLD))\n    cy = mpn_sb_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv);\n  else\n    cy = mpn_dc_bdiv_qr_n (qp, np, dp, lo, dinv, tp);\n\n  mpn_mul (tp, dp + lo, hi, qp, lo);\n\n  mpn_incr_u (tp + lo, cy);\n  rh = mpn_sub (np + lo, np + lo, n + hi, tp, n);\n\n  if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))\n    cy = mpn_sb_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv);\n  else\n    cy = mpn_dc_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp);\n\n  mpn_mul (tp, qp + lo, hi, dp + hi, lo);\n\n  mpn_incr_u (tp + hi, cy);\n  rh += mpn_sub_n (np + n, np + n, tp, n);\n\n  return rh;\n}\n\n"
  },
  {
    "path": "mpn/generic/dc_div_q.c",
    "content": "/* mpn_dc_div_q -- divide-and-conquer division, returning exact quotient\n   only.\n\n   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nmp_limb_t\nmpn_dc_div_q (mp_ptr qp, mp_ptr np, mp_size_t nn,\n\t\t mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)\n{\n  mp_ptr tp, wp;\n  mp_limb_t qh;\n  mp_size_t qn;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  ASSERT (dn >= 6);\n  ASSERT (nn - dn >= 3);\n  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);\n\n  tp = TMP_ALLOC_LIMBS (nn + 1);\n  MPN_COPY (tp + 1, np, nn);\n  tp[0] = 0;\n\n  qn = nn - dn;\n  wp = TMP_ALLOC_LIMBS (qn + 1);\n\n  qh = mpn_dc_divappr_q (wp, tp, nn + 1, dp, dn, dinv);\n\n  if (wp[0] == 0)\n    {\n      mp_limb_t cy;\n\n      if (qn > dn)\n\tmpn_mul (tp, wp + 1, qn, dp, dn);\n      else\n\tmpn_mul (tp, dp, dn, wp + 1, qn);\n\n      cy = (qh != 0) ? mpn_add_n (tp + qn, tp + qn, dp, dn) : 0;\n\n      if (cy || mpn_cmp (tp, np, nn) > 0) /* At most is wrong by one, no cycle. */\n\tqh -= mpn_sub_1 (qp, wp + 1, qn, 1);\n      else /* Same as below */\n\tMPN_COPY (qp, wp + 1, qn);\n    }\n  else\n    MPN_COPY (qp, wp + 1, qn);\n\n  TMP_FREE;\n  return qh;\n}\n"
  },
  {
    "path": "mpn/generic/dc_div_qr.c",
    "content": "/* mpn_dc_div_qr_n -- recursive divide-and-conquer division for arbitrary\n   size operands.\n\n   Contributed to the GNU project by Torbjorn Granlund.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2006, 2007, 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\nmp_limb_t\nmpn_dc_div_qr (mp_ptr qp,\n\t\t  mp_ptr np, mp_size_t nn,\n\t\t  mp_srcptr dp, mp_size_t dn,\n\t\t  mp_limb_t dinv)\n{\n  mp_size_t qn;\n  mp_limb_t qh, cy;\n  mp_ptr tp;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  ASSERT (dn >= 6);\t\t/* to adhere to mpn_sb_div_qr's limits */\n  ASSERT (nn - dn >= 3);\t/* to adhere to mpn_sb_div_qr's limits */\n  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);\n\n  tp = TMP_ALLOC_LIMBS (DC_DIVAPPR_Q_N_ITCH(dn));\n\n  qn = nn - dn;\n  qp += qn;\n  np += nn;\n  dp += dn;\n\n  if (qn > dn)\n    {\n      /* Reduce qn mod dn without division, optimizing small operations.  */\n      do\n\tqn -= dn;\n      while (qn > dn);\n\n      qp -= qn;\t\t\t/* point at low limb of next quotient block */\n      np -= qn;\t\t\t/* point in the middle of partial remainder */\n\n      /* Perform the typically smaller block first.  */\n      if (qn == 1)\n\t{\n\t  mp_limb_t q, n2, n1, n0, d1, d0;\n\n\t  /* Handle qh up front, for simplicity. */\n\t  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;\n\t  if (qh)\n\t    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));\n\n\t  /* A single iteration of schoolbook: One 3/2 division,\n\t     followed by the bignum update and adjustment. */\n\t  n2 = np[0];\n\t  n1 = np[-1];\n\t  n0 = np[-2];\n\t  d1 = dp[-1];\n\t  d0 = dp[-2];\n     \n\t  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));\n\n\t  if (UNLIKELY (n2 == d1) && n1 == d0)\n\t    {\n\t      q = GMP_NUMB_MASK;\n\t      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);\n\t      ASSERT (cy == n2);\n\t    }\n\t  else\n\t    {\n\t      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv);\n\n\t      if (dn > 2)\n\t\t{\n\t\t  mp_limb_t cy, cy1;\n\t\t  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);\n\n\t\t  cy1 = n0 < cy;\n\t\t  n0 = (n0 - cy) & GMP_NUMB_MASK;\n\t\t  cy = n1 < cy1;\n\t\t  n1 = (n1 - cy1) & GMP_NUMB_MASK;\n\t\t  np[-2] = n0;\n\n\t\t  if (UNLIKELY (cy != 0))\n\t\t    {\n\t\t      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);\n\t\t      qh -= (q == 0);\n\t\t      q = (q - 1) & GMP_NUMB_MASK;\n\t\t    }\n\t\t}\n\t      else\n\t\tnp[-2] = n0;\n\n\t      np[-1] = n1;\n\t    }\n\t  qp[0] = q;\n\t}\n      else\n\t{\n      /* Do a 2qn / qn division */\n\t  if (qn == 2)\n\t    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */\n\t  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))\n\t    qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv);\n\t  else\n\t    qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);\n\n\t  if (qn != dn)\n\t    {\n\t      if (qn > dn - qn)\n\t\tmpn_mul (tp, qp, qn, dp - dn, dn - qn);\n\t      else\n\t\tmpn_mul (tp, dp - dn, dn - qn, qp, qn);\n\n\t      cy = mpn_sub_n (np - dn, np - dn, tp, dn);\n\t      if (qh != 0)\n\t\tcy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);\n\n\t      while (cy != 0)\n\t\t{\n\t\t  qh -= mpn_sub_1 (qp, qp, qn, 1);\n\t\t  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);\n\t\t}\n\t    }\n\t}\n\n      qn = nn - dn - qn;\n      do\n\t{\n\t  qp -= dn;\n\t  np -= dn;\n\t  ASSERT_NOCARRY(mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp));\n\t  qn -= dn;\n\t}\n      while (qn > 0);\n    }\n  else\n    {\n      qp -= qn;\t\t\t/* point at low limb of next quotient block */\n      np -= qn;\t\t\t/* point in the middle of partial remainder */\n\n      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))\n\tqh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv);\n      else\n\tqh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);\n\n      if (qn != dn)\n\t{\n\t  if (qn > dn - qn)\n\t    mpn_mul (tp, qp, qn, dp - dn, dn - qn);\n\t  else\n\t    mpn_mul (tp, dp - dn, dn - qn, qp, qn);\n\n\t  cy = mpn_sub_n (np - dn, np - dn, tp, dn);\n\t  if (qh != 0)\n\t    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);\n\n\t  while (cy != 0)\n\t    {\n\t      qh -= mpn_sub_1 (qp, qp, qn, 1);\n\t      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);\n\t    }\n\t}\n    }\n\n  TMP_FREE;\n  return qh;\n}\n"
  },
  {
    "path": "mpn/generic/dc_div_qr_n.c",
    "content": "/* mpn_dc_div_qr_n -- recursive divide-and-conquer division for arbitrary\n   size operands.\n\n   Contributed to the GNU project by Torbjorn Granlund.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2006, 2007, 2009 Free Software Foundation, Inc.\n\nCopyright 2010 William Hart (minor modifications)\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_limb_t\nmpn_dc_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,\n\t\t    mp_limb_t dinv, mp_ptr tp)\n{\n  mp_size_t lo, hi;\n  mp_limb_t cy, qh, ql;\n\n  lo = n >> 1; /* floor(n/2) */\n  hi = n - lo;\t/* ceil(n/2) */\n\n  \n  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))\n    qh = mpn_sb_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv);\n  else\n    qh = mpn_dc_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);\n\n  mpn_mul (tp, qp + lo, hi, dp, lo);\n\n  cy = mpn_sub_n (np + lo, np + lo, tp, n);\n  if (qh != 0)\n    cy += mpn_sub_n (np + n, np + n, dp, lo);\n\n  while (cy != 0)\n    {\n      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);\n      cy -= mpn_add_n (np + lo, np + lo, dp, n);\n    }\n\n  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))\n    ql = mpn_sb_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv);\n  else\n    ql = mpn_dc_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);\n\n  mpn_mul (tp, dp, hi, qp, lo);\n\n  cy = mpn_sub_n (np, np, tp, n);\n  if (ql != 0)\n    cy += mpn_sub_n (np + lo, np + lo, dp, hi);\n\n  while (cy != 0)\n    {\n      mpn_sub_1 (qp, qp, lo, 1);\n      cy -= mpn_add_n (np, np, dp, n);\n    }\n\n  return qh;\n}\n\n"
  },
  {
    "path": "mpn/generic/dc_divappr_q.c",
    "content": "/* mpn_dc_divappr_q -- divide-and-conquer division, returning approximate\n   quotient.  The quotient returned is either correct, or one too large.\n\n   Contributed to the GNU project by Torbjorn Granlund.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.\n\nCopyright 2010, 2013 William Hart\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#define SB_DIVAPPR_Q_CUTOFF 43 /* must be at least 3 */\n\nmp_limb_t\nmpn_dc_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,\n\t\t     mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)\n{\n  mp_size_t q_orig, qn, sh, sl, i;\n  mp_limb_t qh, cy, cy2;\n  mp_ptr tp;\n  TMP_DECL;\n\n  ASSERT (dn >= 6);\n  ASSERT (nn >= dn + 3);\n  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);\n\n  qn = nn - dn;\n  if (qn + 1 < dn)\n    {\n      dp += dn - (qn + 1);\n      dn = qn + 1;\n    }\n  q_orig = qn;\n\n  qh = mpn_cmp(np + nn - dn, dp, dn) >= 0;\n  if (qh != 0)\n    mpn_sub_n(np + nn - dn, np + nn - dn, dp, dn);\n\n  np += nn - dn - qn;\n  nn = dn + qn;\n\n  /* Reduce until dn - 1 >= qn */\n  while (dn - 1 < qn)\n  {\n     sh = MIN(dn, qn - dn + 1);\n     if (sh <= DC_DIV_QR_THRESHOLD) cy2 = mpn_sb_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv);\n     else cy2 = mpn_dc_div_qr(qp + qn - sh, np + nn - dn - sh, dn + sh, dp, dn, dinv);\n     qn -= sh; nn -= sh; \n  }\n\n  cy = np[nn - 1];\n\n  /* split into two parts */\n  sh = qn/2; sl = qn - sh;\n\n  /* Rare case where truncation ruins normalisation */\n  if (cy > dp[dn - 1] || (cy == dp[dn - 1] \n     && mpn_cmp(np + nn - qn, dp + dn - qn, qn - 1) >= 0))\n     {\n        __divappr_helper(qp, np + nn - qn - 2, dp + dn - qn - 1, qn);\n        return qh;\n     }\n\n  if (mpn_cmp(np + sl + dn - 1, dp + dn - sh - 1, sh + 1) >= 0)\n     __divappr_helper(qp + sl, np + dn + sl - 2, dp + dn - sh - 1, sh);\n  else\n  {\n     if (sh < SB_DIVAPPR_Q_CUTOFF)\n        mpn_sb_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv);\n     else\n        mpn_dc_divappr_q(qp + sl, np + sl, dn + sh, dp, dn, dinv);\n  }\n\n  cy = np[nn - sh];\n\n  TMP_MARK;\n  tp = TMP_ALLOC_LIMBS(sl + 2);\n\n  mpn_mulmid(tp, dp + dn - qn - 1, qn - 1, qp + sl, sh);\n  cy -= mpn_sub_n(np + nn - qn - 2, np + nn - qn - 2, tp, sl + 2);\n\n  TMP_FREE;\n\n  if ((mp_limb_signed_t) cy < 0)\n  {\n      \n     qh -= mpn_sub_1(qp + sl, qp + sl, q_orig - sl, 1); /* ensure quotient is not too big */\n     \n     /*\n        correct remainder, noting that \"digits\" of quotient aren't base B\n        but in base varying with truncation, thus correction needs fixup\n     */\n     cy += mpn_add_n(np + nn - qn - 2, np + nn - qn - 2, dp + dn - sl - 2, sl + 2); \n\n     for (i = 0; i < sh - 1 && qp[sl + i] == ~CNST_LIMB(0); i++)\n        cy += mpn_add_1(np + nn - qn - 2, np + nn - qn - 2, sl + 2, dp[dn - sl - 3 - i]);\n  }\n   \n  if (cy != 0) /* special case: unable to canonicalise */\n     __divappr_helper(qp, np + nn - qn - 2, dp + dn - sl - 1, sl);\n  else\n  {\n     if (mpn_cmp(np + dn - 1, dp + dn - sl - 1, sl + 1) >= 0)\n        __divappr_helper(qp, np + dn - 2, dp + dn - sl - 1, sl);\n     else\n     {\n        if (sl < SB_DIVAPPR_Q_CUTOFF)\n           mpn_sb_divappr_q(qp, np, dn + sl, dp, dn, dinv);\n        else\n           mpn_dc_divappr_q(qp, np, dn + sl, dp, dn, dinv);\n     }\n\n  }\n\n  return qh;\n}\n"
  },
  {
    "path": "mpn/generic/divexact.c",
    "content": "/* mpn_divexact(qp,np,nn,dp,dn,tp) -- Divide N = {np,nn} by D = {dp,dn} storing\n   the result in Q = {qp,nn-dn+1} expecting no remainder.  Overlap allowed\n   between Q and N; all other overlap disallowed.\n\n   Contributed to the GNU project by Torbjorn Granlund.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2006, 2007, 2009 Free Software Foundation, Inc.\nCopyright 2010, William Hart (modified for MPIR)\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* FIXME: the INV_DIV_Q_THRESHOLD should be replaced with a tuned cutoff specifically for this function */\n\nvoid\nmpn_divexact (mp_ptr qp,\n\t      mp_srcptr np, mp_size_t nn,\n\t      mp_srcptr dp, mp_size_t dn)\n{\n  unsigned shift, shift2;\n  int q_even;\n  mp_size_t qn;\n  mp_ptr tp, n2p, inv;\n  mp_limb_t dinv, wp[2], cy;\n  int extend = 0;\n\n  TMP_DECL;\n\n  ASSERT (dn > 0);\n  ASSERT (nn >= dn);\n  ASSERT (dp[dn-1] > 0);\n\n  while (dp[0] == 0)\n    {\n      ASSERT (np[0] == 0);\n      dp++;\n      np++;\n      dn--;\n      nn--;\n    }\n\n  if (dn == 1)\n    {\n      MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);\n      return;\n    }\n\n  TMP_MARK;\n\n  qn = nn + 1 - dn;\n  count_trailing_zeros (shift, dp[0]);\n\n  if ((BELOW_THRESHOLD (qn, INV_DIV_QR_THRESHOLD) && BELOW_THRESHOLD(dn, INV_DIV_QR_THRESHOLD)) || (dn <= 6))\n  {\n    if (shift > 0)\n      {\n        mp_size_t ss = (dn > qn) ? qn + 1 : dn;\n\n        tp = TMP_ALLOC_LIMBS (ss);\n        mpn_rshift (tp, dp, ss, shift);\n        dp = tp;\n\n        /* Since we have excluded dn == 1, we have nn > qn, and we need\n\t   to shift one limb beyond qn. */\n        n2p = TMP_ALLOC_LIMBS (qn + 1);\n        mpn_rshift (n2p, np, qn + 1, shift);\n      }\n    else\n      {\n        n2p = TMP_ALLOC_LIMBS (qn);\n        MPN_COPY (n2p, np, qn);\n      }\n\n    if (dn > qn)\n      dn = qn;\n\n    modlimb_invert(dinv, dp[0]);\n\n    if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))\n      mpn_sb_bdiv_q (qp, wp, n2p, qn, dp, dn, dinv);\n    else \n      mpn_dc_bdiv_q (qp, n2p, qn, dp, dn, dinv);\n  } else\n  {\n    /* determine if the quotient is even */\n    count_trailing_zeros (shift2, np[0]);\n    q_even = ((np[0] == 0) || (shift2 > shift)) ? 1 : 0; \n\n    if (dp[dn - 1] & GMP_LIMB_HIGHBIT)\n    {\n       n2p = TMP_ALLOC_LIMBS(nn);\n       MPN_COPY(n2p, np, nn);\n    } else\n    { \n       count_leading_zeros (shift, dp[dn - 1]);\n       n2p = TMP_ALLOC_LIMBS(nn + 1);\n       n2p[nn] = mpn_lshift(n2p, np, nn, shift);\n       extend = (n2p[nn] != 0);\n       nn += extend;\n\n       tp = TMP_ALLOC_LIMBS(dn);\n       mpn_lshift(tp, dp, dn, shift);   \n       dp = tp;\n    }\n\n    qn = nn - dn;\n if(qn)\n   {       \n    inv = TMP_ALLOC_LIMBS(dn);\n    mpn_invert(inv, dp, dn);\n    cy = mpn_inv_divappr_q(qp, n2p, nn, dp, dn, inv);\n    if (!extend) qp[qn] = cy;\n\n    if ((qp[0] & 1) + q_even != 1) /* quotient is out by 1 */\n       mpn_sub_1(qp, qp, qn + 1, 1);\n   } else\n     qp[0] = 1; /* as dp is normalised, exact division means qp = 1 */\n  }\n\n  TMP_FREE;\n}\n\n"
  },
  {
    "path": "mpn/generic/divexact_1.c",
    "content": "/* mpn_divexact_1 -- mpn by limb exact division.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n\n/* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}.\n   q will only be correct if d divides a exactly.\n\n   A separate loop is used for shift==0 because n<<BITS_PER_MP_LIMB doesn't\n   give zero on all CPUs (for instance it doesn't on the x86s).  This\n   separate loop might run faster too, helping odd divisors.\n\n   Possibilities:\n\n   mpn_divexact_1c could be created, accepting and returning c.  This would\n   let a long calculation be done piece by piece.  Currently there's no\n   particular need for that, and not returning c means that a final umul can\n   be skipped.\n\n   Another use for returning c would be letting the caller know whether the\n   division was in fact exact.  It would work just to return the carry bit\n   \"c=(l>s)\" and let the caller do a final umul if interested.\n\n   When the divisor is even, the factors of two could be handled with a\n   separate mpn_rshift, instead of shifting on the fly.  That might be\n   faster on some CPUs and would mean just the shift==0 style loop would be\n   needed.\n\n   If n<<BITS_PER_MP_LIMB gives zero on a particular CPU then the separate\n   shift==0 loop is unnecessary, and could be eliminated if there's no great\n   speed difference.\n\n   It's not clear whether \"/\" is the best way to handle size==1.  Alpha gcc\n   2.95 for instance has a poor \"/\" and might prefer the modular method.\n   Perhaps a tuned parameter should control this.\n\n   If src[size-1] < divisor then dst[size-1] will be zero, and one divide\n   step could be skipped.  A test at last step for s<divisor (or ls in the\n   even case) might be a good way to do that.  But if this code is often\n   used with small divisors then it might not be worth bothering  */\n\nvoid\nmpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)\n{\n  mp_size_t  i;\n  mp_limb_t  c, h, l, ls, s, s_next, inverse, dummy;\n  unsigned   shift;\n\n  ASSERT (size >= 1);\n  ASSERT (divisor != 0);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));\n  ASSERT_MPN (src, size);\n  ASSERT_LIMB (divisor);\n\n  s = src[0];\n\n  if (size == 1)\n    {\n      dst[0] = s / divisor;\n      return;\n    }\n\n  if ((divisor & 1) == 0)\n    {\n      count_trailing_zeros (shift, divisor);\n      divisor >>= shift;\n    }\n  else\n    shift = 0;\n\n  modlimb_invert (inverse, divisor);\n  divisor <<= GMP_NAIL_BITS;\n\n  if (shift != 0)\n    {\n      c = 0;\n      i = 0;\n      size--;\n\n      do\n\t{\n\t  s_next = src[i+1];\n\t  ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;\n\t  s = s_next;\n\n\t  SUBC_LIMB (c, l, ls, c);\n\n\t  l = (l * inverse) & GMP_NUMB_MASK;\n\t  dst[i] = l;\n\n\t  umul_ppmm (h, dummy, l, divisor);\n\t  c += h;\n\n\t  i++;\n\t}\n      while (i < size);\n\n      ls = s >> shift;\n      l = ls - c;\n      l = (l * inverse) & GMP_NUMB_MASK;\n      dst[i] = l;\n    }\n  else\n    {\n      l = (s * inverse) & GMP_NUMB_MASK;\n      dst[0] = l;\n      i = 1;\n      c = 0;\n\n      do\n\t{\n\t  umul_ppmm (h, dummy, l, divisor);\n\t  c += h;\n\n\t  s = src[i];\n\t  SUBC_LIMB (c, l, s, c);\n\n\t  l = (l * inverse) & GMP_NUMB_MASK;\n\t  dst[i] = l;\n\t  i++;\n\t}\n      while (i < size);\n    }\n}\n"
  },
  {
    "path": "mpn/generic/divexact_by3c.c",
    "content": "/* mpn_divexact_by3c exact division by 3.\n\n  Copyright 2009 Jason Moxham\n\n  This file is part of the MPIR Library.\n\n  The MPIR Library is free software; you can redistribute it and/or modify\n  it under the terms of the GNU Lesser General Public License as published\n  by the Free Software Foundation; either version 2.1 of the License, or (at\n  your option) any later version.\n\n  The MPIR Library is distributed in the hope that it will be useful, but\n  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n  License for more details.\n\n  You should have received a copy of the GNU Lesser General Public License\n  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n  Boston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* (xp, n) = (qp, n)*3 - ret*B^n and 0 <= ret < 3 */\nmp_limb_t mpn_divexact_by3c(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t ci)\n{\n   mp_size_t j;\n   mp_limb_t c, m, acc, ax, dx;\n\n   ASSERT(n > 0);\n   ASSERT_MPN(xp, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));\n\n   m = 0;\n   m = ~m;\n   m = m/3; /* m = (B - 1)/3 */\n\n   acc = ci*m;\n\n   for (j = 0; j <= n - 1; j++)\n   {\n      umul_ppmm(dx, ax, xp[j], m);\n    \n      SUBC_LIMB(c, acc, acc, ax);\n    \n      qp[j] = acc;\n      acc -= dx + c;\n   }\n\n   /* return next quotient*(-3) */    \n\n   return acc*(-3);\n}   \n\n\n\n\n#if 0\n#define ADD ADDC_LIMB\n#define SUB SUBC_LIMB\n\n#define ADC(co,w,x,y,ci) do{mp_limb_t c1,c2,t;ADD(c1,t,x,y);ADD(c2,w,t,ci);co=c1|c2;}while(0)\n#define SBB(co,w,x,y,ci) do{mp_limb_t c1,c2,t;SUB(c1,t,x,y);SUB(c2,w,t,ci);co=c1|c2;}while(0)\n\n// basic divexact\nmp_limb_t\tdivexact_basic(mp_ptr qp,mp_ptr xp,mp_size_t n,mp_limb_t d)\n{int j;mp_limb_t c,h,q,dummy,h1,t,m;\n\nASSERT(n>0);ASSERT(d!=0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));\nASSERT(d%2==1);modlimb_invert(m,d);\nc=0;h=0;t=0;\nfor(j=0;j<=n-1;j++)\n   {h1=xp[j];\n    t=h+c;if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}// set borrow to c ; sbb t,h1 ; set c to borrow\n    q=h1*m;\n    qp[j]=q;\n    umul_ppmm(h,dummy,q,d);\n    ASSERT(dummy==h1);}\n// ie returns next quotient*-d\nreturn h+c;}   // so  (xp,n) = (qp,n)*d -ret*B^n    and 0 <= ret < d\n\n/*\n  A divexact by 3 can be obtained via   X * ((B-1)/3)  / (B-1)\n  The advantage of this is that the multiplications are taken out of the dependant chain.\n  The exact division by B-1 can be done like mpn_divexact_byff and can be thought of as a \n  multi-limb subtraction but with an accumulator instead of a destination.\n  Combining the multiplication and exact division together we get the function below which\n  is the usual mpn_submul_1 but with an accumulator instead of a destination.\n*/\n\n\nmp_limb_t\tdivexact_submul(mp_ptr qp,mp_ptr xp,mp_size_t n)\n{int j;mp_limb_t c,m,t1,t2,t3,acc,ax,dx,t;\n\nASSERT(n>0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));\nm=0;m=~m;m=m/3;// m=(B-1)/3\nc=0;t1=t2=t3=acc=0;\n    umul_ppmm(dx,ax,xp[0],m);\n    SUB(c,acc,0,t1);\n    ADC(c,t2,0,ax,c);\n    ADC(c,t3,0,dx,c);\n    ASSERT(c==0);\n    t1=t2;t2=t3;\nfor(j=1;j<=n-1;j++)\n   {\n    t3=0;\n    umul_ppmm(dx,ax,xp[j],m);\n    SUB(c,acc,acc,t1);\n    qp[j-1]=acc;\n    ADC(c,t2,t2,ax,c);\n    ADC(c,t3,t3,dx,c);\n    ASSERT(c==0);\n    t1=t2;t2=t3;\n   }\n    SUB(c,acc,acc,t1);\n    qp[n-1]=acc;\n    ADC(c,t2,t2,0,c);\n    t=(t2-acc)*3;\n// return next quotient*-3    \nreturn t;}   // so  (xp,n) = (qp,n)*3 -ret*B^n    and 0 <= ret < 3\n\n\n/*\nLooking at the above submul type function we can see that for each quotient limb we \nsubtract the high part of one mul and the low part of the next mul from the accumulator.\n*/\n\nmp_limb_t\tdivexact3_direct(mp_ptr qp,mp_ptr xp,mp_size_t n)\n{int j;mp_limb_t c,m,acc,ax,dx;\n\nASSERT(n>0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));\nm=0;m=~m;m=m/3;// m=(B-1)/3\nc=0;t1=t2=t3=acc=0;\nfor(j=0;j<=n-1;j++)\n   {\n    umul_ppmm(dx,ax,xp[j],m);\n    SBB(c,acc,acc,ax,c);\n    qp[j]=acc;\n    SBB(c,acc,acc,dx,c);\n   }\n    SBB(c,acc,acc,0,c);\n// return next quotient*-3    \nreturn acc*-3;}   // so  (xp,n) = (qp,n)*3 -ret*B^n    and 0 <= ret < 3\n\n/*\n   Using supernatural powers we discover that the second carry is always zero \n   , ie like below , leading to the used algorithm\n\n*/\n\nmp_limb_t\tdivexact3_byluck(mp_ptr qp,mp_ptr xp,mp_size_t n)\n{int j;mp_limb_t c,m,acc,ax,dx;\n\nASSERT(n>0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));\nm=0;m=~m;m=m/3;// m=(B-1)/3\nc=0;acc=0;\nfor(j=0;j<=n-1;j++)\n   {   \n    umul_ppmm(dx,ax,xp[j],m);\t// line 1\n    SUB(c,acc,acc,ax);\t\t// line 2\n    qp[j]=acc;\t\t\t// line 3\n    SBB(c,acc,acc,dx,c);\t// line 4\n    if(c!=0){printf(\"c not zero\\n\");abort();}\n   }\n// return next quotient*-3    \nreturn acc*-3;}   // so  (xp,n) = (qp,n)*3 -ret*B^n    and 0 <= ret < 3\n\n/*\n  Not wanting to rely on the above comments , a proof below\n\n*/\n#endif\n\n/*\n\nConsider one iteration\n\nlet m=(B-1)/3\n\nat line 1  acc=m*some_ret \n\nwrite  xp[j]=3*p+d    with  0 < d <= 3      ie division but with remainder+1  and p<(B-1)/3=m\n\ndx:ax = xp[j]*m = (3p+d)*(B-1)/3 = p(B-1)+d*(B-1)/3 = pB+(d(B-1)/3-p) so dx=p and ax=d(B-1)/3-p    (*)\n\nat line 2 we have two cases if (1) ax>acc  or (2) ax<=acc\nand we want to show that the carry at line 4 is zero , ie dx+c<=acc \nso we get \ncase (1)   show that dx+1<=acc-ax+B  ie  ax+dx <= B-1+acc\ncase (2)   show that dx+0<=acc-ax    ie  ax+dx <=     acc\n\nfrom (*) ax+dx=d(B-1)/3 = d*m <= 3(B-1)/3=B-1  \n\nso case (1) is OK\n\ncase (2) says ax<=acc=m*r for some r : 0 <= r < 3\n             as ax+dx=m*d   so r<=d \n             and as dx=p<m so d<r+1   therefore d=r so ax+dx=mr=acc<=acc\n             so case is OK\n*/\n"
  },
  {
    "path": "mpn/generic/divexact_byff.c",
    "content": "/*  mpn_divexact_byff\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/*\n\tWhere (xp,n) = (qp, n)*(B - 1) - ret*B^n and 0 <= ret < B - 1, B = 2^GMP_NUMB_BITS\n\t\n\tThis is the standard divexact algorithm with simplifications for the divisior B-1\n*/\nmp_limb_t\nmpn_divexact_byff (mp_ptr qp, mp_srcptr xp, mp_size_t n)\n{\n  mp_size_t j;\n  mp_limb_t t, a, b;\n\n  ASSERT (n > 0);\n  ASSERT_MPN (xp, n);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (qp, xp, n));\n\n  a = 0;\n  for (j = 0; j <= n - 1; j++)\n    {\n      t = xp[j];\n      if (t > a)\n\t{\n\t  b = 1;\n\t}\n      else\n\t{\n\t  b = 0;\n\t}\n      a = a - t;\n      qp[j] = a;\n      a = a - b;\t\t/* no borrow from this sub */\n    }\n  return a;\n}\n"
  },
  {
    "path": "mpn/generic/divexact_byfobm1.c",
    "content": "/* mpn_divexact_by exact division by (B-1)/f\n\n  Copyright 2009 Jason Moxham\n\n  This file is part of the MPIR Library.\n\n  The MPIR Library is free software; you can redistribute it and/or modify\n  it under the terms of the GNU Lesser General Public License as published\n  by the Free Software Foundation; either version 2.1 of the License, or (at\n  your option) any later version.\n\n  The MPIR Library is distributed in the hope that it will be useful, but\n  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n  License for more details.\n\n  You should have received a copy of the GNU Lesser General Public License\n  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n  Boston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/*\n   (xp, n) = (qp, n)*f - ret*B^n and 0 <= ret < f\n\n   Note the divexact_by3 code is just a special case of this\n*/\nmp_limb_t mpn_divexact_byfobm1(mp_ptr qp, mp_srcptr xp, mp_size_t n,\n                                             mp_limb_t f, mp_limb_t Bm1of)\n{\n   mp_size_t j;\n   mp_limb_t c, acc, ax, dx;\n\n   ASSERT(n > 0);\n   ASSERT_MPN(xp, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));\n   ASSERT(Bm1of*f + 1 == 0);\n\n   acc = 0*Bm1of; /* carry in is 0 */\n\n   for (j = 0; j <= n - 1; j++)\n   {\n      umul_ppmm(dx, ax, xp[j], Bm1of);\n    \n      SUBC_LIMB(c, acc, acc, ax);\n    \n      qp[j] = acc;\n      acc -= dx + c;\n   }\n\n   /* return next quotient*(-f) */ \n\n   return acc*(-f);\n}   \n"
  },
  {
    "path": "mpn/generic/divisible_p.c",
    "content": "/* mpn_divisible_p -- mpn by mpn divisibility test\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001, 2002, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* Determine whether {ap,asize} is divisible by {dp,dsize}.  Must have both\n   operands normalized, meaning high limbs non-zero, except that asize==0 is\n   allowed.\n\n   There usually won't be many low zero bits on d, but the checks for this\n   are fast and might pick up a few operand combinations, in particular they\n   might reduce d to fit the single-limb mod_1/modexact_1 code.\n\n   Future:\n\n   This is currently not much faster than the user doing an mpz_tdiv_r\n   and testing for a zero remainder, but hopefully it can be improved.\n\n   mpn_bdivmod is one possibility, but it only trades udiv_qrnnd's for\n   multiplies, it won't save crossproducts the way it can in mpz_divexact.\n   Definitely worthwhile on small operands for most processors, but a\n   sub-quadratic version will be wanted before it can be used on all sizes.\n\n   Getting the remainder limb by limb would make an early exit possible on\n   finding a non-zero.  This would probably have to be bdivmod style so\n   there's no addback, but it would need a multi-precision inverse and so\n   might be slower than the plain method (on small sizes at least).\n\n   When d must be normalized (shifted to high bit set), it's possible to\n   just append a low zero limb to \"a\" rather than bit-shifting as\n   mpn_tdiv_qr does internally, so long as it's already been checked that a\n   has at least as many trailing zeros bits as d.  Or equivalently, pass\n   qxn==1 to mpn_tdiv_qr, if/when it accepts that.\n\n   When called from mpz_congruent_p, {ap,asize} is a temporary which can be\n   destroyed.  Maybe it'd be possible to get into mpn_tdiv_qr at a lower\n   level to save copying it, or maybe that function could accept rp==ap.\n\n   Could use __attribute__ ((regparm (2))) on i386, so the parameters\n   wouldn't need extra stack when called from mpz_divisible_p, but a\n   pre-release gcc 3 didn't generate particularly good register juggling in\n   that case, so this isn't done for now.  */\n\nint\nmpn_divisible_p (mp_srcptr ap, mp_size_t asize,\n\t\t mp_srcptr dp, mp_size_t dsize)\n{\n  mp_limb_t  alow, dlow, dmask;\n  mp_ptr     qp, rp;\n  mp_size_t  i;\n  TMP_DECL;\n\n  ASSERT (asize >= 0);\n  ASSERT (asize == 0 || ap[asize-1] != 0);\n  ASSERT (dsize >= 1);\n  ASSERT (dp[dsize-1] != 0);\n  ASSERT_MPN (ap, asize);\n  ASSERT_MPN (dp, dsize);\n\n  /* When a<d only a==0 is divisible.\n     Notice this test covers all cases of asize==0. */\n  if (asize < dsize)\n    return (asize == 0);\n\n  /* Strip low zero limbs from d, requiring a==0 on those. */\n  for (;;)\n    {\n      alow = *ap;\n      dlow = *dp;\n\n      if (dlow != 0)\n\tbreak;\n\n      if (alow != 0)\n\treturn 0;  /* a has fewer low zero limbs than d, so not divisible */\n\n      /* a!=0 and d!=0 so won't get to size==0 */\n      asize--; ASSERT (asize >= 1);\n      dsize--; ASSERT (dsize >= 1);\n      ap++;\n      dp++;\n    }\n\n  /* a must have at least as many low zero bits as d */\n  dmask = LOW_ZEROS_MASK (dlow);\n  if ((alow & dmask) != 0)\n    return 0;\n\n  if (dsize == 1)\n    {\n      if (BELOW_THRESHOLD (asize, MODEXACT_1_ODD_THRESHOLD))\n\treturn mpn_mod_1 (ap, asize, dlow) == 0;\n\n      if ((dlow & 1) == 0)\n\t{\n\t  unsigned  twos;\n\t  count_trailing_zeros (twos, dlow);\n\t  dlow >>= twos;\n\t}\n      return mpn_modexact_1_odd (ap, asize, dlow) == 0;\n    }\n\n  if (dsize == 2)\n    {\n      mp_limb_t  dsecond = dp[1];\n      if (dsecond <= dmask)\n\t{\n\t  unsigned  twos;\n\t  count_trailing_zeros (twos, dlow);\n\t  dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));\n          ASSERT_LIMB (dlow);\n\t  return MPN_MOD_OR_MODEXACT_1_ODD (ap, asize, dlow) == 0;\n\t}\n    }\n\n  TMP_MARK;\n\n  rp = TMP_ALLOC_LIMBS (asize+1);\n  qp = rp + dsize;\n\n  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, ap, asize, dp, dsize);\n\n  /* test for {rp,dsize} zero or non-zero */\n  i = 0;\n  do\n    {\n      if (rp[i] != 0)\n\t{\n\t  TMP_FREE;\n\t  return 0;\n\t}\n    }\n  while (++i < dsize);\n\n  TMP_FREE;\n  return 1;\n}\n"
  },
  {
    "path": "mpn/generic/divrem.c",
    "content": "/* mpn_divrem -- Divide natural numbers, producing both remainder and\n   quotient.  This is now just a middle layer for calling the new\n   internal mpn_tdiv_qr.\n\nCopyright 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2005 Free\nSoftware Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_limb_t\nmpn_divrem (mp_ptr qp, mp_size_t qxn,\n\t    mp_ptr np, mp_size_t nn,\n\t    mp_srcptr dp, mp_size_t dn)\n{\n  ASSERT (qxn >= 0);\n  ASSERT (nn >= dn);\n  ASSERT (dn >= 1);\n  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);\n  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));\n  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, np, nn) || qp==np+dn+qxn);\n  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, dp, dn));\n  ASSERT_MPN (np, nn);\n  ASSERT_MPN (dp, dn);\n\n  if (dn == 1)\n    {\n      mp_limb_t ret;\n      mp_ptr q2p;\n      mp_size_t qn;\n      TMP_DECL;\n\n      TMP_MARK;\n      q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);\n\n      np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);\n      qn = nn + qxn - 1;\n      MPN_COPY (qp, q2p, qn);\n      ret = q2p[qn];\n\n      TMP_FREE;\n      return ret;\n    }\n  else if (dn == 2)\n    {\n      return mpn_divrem_2 (qp, qxn, np, nn, dp);\n    }\n  else\n    {\n      mp_ptr rp, q2p;\n      mp_limb_t qhl;\n      mp_size_t qn;\n      TMP_DECL;\n\n      TMP_MARK;\n      if (UNLIKELY (qxn != 0))\n\t{\n\t  mp_ptr n2p;\n\t  n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);\n\t  MPN_ZERO (n2p, qxn);\n\t  MPN_COPY (n2p + qxn, np, nn);\n\t  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB);\n\t  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);\n\t  mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);\n\t  MPN_COPY (np, rp, dn);\n\t  qn = nn - dn + qxn;\n\t  MPN_COPY (qp, q2p, qn);\n\t  qhl = q2p[qn];\n\t}\n      else\n\t{\n\t  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB);\n\t  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);\n\t  mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);\n\t  MPN_COPY (np, rp, dn);\t/* overwrite np area with remainder */\n\t  qn = nn - dn;\n\t  MPN_COPY (qp, q2p, qn);\n\t  qhl = q2p[qn];\n\t}\n      TMP_FREE;\n      return qhl;\n    }\n}\n"
  },
  {
    "path": "mpn/generic/divrem_1.c",
    "content": "/* mpn_divrem_1 -- mpn by limb division.\n\nCopyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002, 2003 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,\n   meaning the quotient size where that should happen, the quotient size\n   being how many udiv divisions will be done.\n\n   The default is to use preinv always, CPUs where this doesn't suit have\n   tuned thresholds.  Note in particular that preinv should certainly be\n   used if that's the only division available (USE_PREINV_ALWAYS).  */\n\n#ifndef DIVREM_1_NORM_THRESHOLD\n#define DIVREM_1_NORM_THRESHOLD  0\n#endif\n#ifndef DIVREM_1_UNNORM_THRESHOLD\n#define DIVREM_1_UNNORM_THRESHOLD  0\n#endif\n\n\n\n/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM\n   and UNNORM thresholds are 0 and only the inversion code is included.\n\n   If multiply-by-inverse is never viable, then NORM and UNNORM thresholds\n   will be MP_SIZE_T_MAX and only the plain division code is included.\n\n   Otherwise mul-by-inverse is better than plain division above some\n   threshold, and best results are obtained by having code for both present.\n\n   The main reason for separating the norm and unnorm cases is that not all\n   CPUs give zero for \"n0 >> BITS_PER_MP_LIMB\" which would arise in the\n   unnorm code used on an already normalized divisor.\n\n   If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same\n   non-shifting code for both the norm and unnorm cases, though with\n   different criteria for skipping a division, and with different thresholds\n   of course.  And in fact if inversion is never viable, then that simple\n   non-shifting division would be all that's left.\n\n   The NORM and UNNORM thresholds might not differ much, but if there's\n   going to be separate code for norm and unnorm then it makes sense to have\n   separate thresholds.  One thing that's possible is that the\n   mul-by-inverse might be better only for normalized divisors, due to that\n   case not needing variable bit shifts.\n\n   Notice that the thresholds are tested after the decision to possibly skip\n   one divide step, so they're based on the actual number of divisions done.\n\n   For the unnorm case, it would be possible to call mpn_lshift to adjust\n   the dividend all in one go (into the quotient space say), rather than\n   limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster\n   than what the compiler can generate for EXTRACT.  But this is left to CPU\n   specific implementations to consider, especially since EXTRACT isn't on\n   the dependent chain.  */\n\nmp_limb_t\nmpn_divrem_1 (mp_ptr qp, mp_size_t qxn,\n\t      mp_srcptr up, mp_size_t un, mp_limb_t d)\n{\n  mp_size_t  n;\n  mp_size_t  i;\n  mp_limb_t  n1, n0;\n  mp_limb_t  r = 0;\n\n  ASSERT (qxn >= 0);\n  ASSERT (un >= 0);\n  ASSERT (d != 0);\n  /* FIXME: What's the correct overlap rule when qxn!=0? */\n  ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));\n\n  n = un + qxn;\n  if (n == 0)\n    return 0;\n\n  d <<= GMP_NAIL_BITS;\n\n  if(qxn==0)\n    {\n     if(d<=GMP_LIMB_HIGHBIT/2+1 && ABOVE_THRESHOLD(un,DIVREM_EUCLID_HENSEL_THRESHOLD))\n       {r=mpn_divrem_euclidean_r_1(up,un,d);\n        count_trailing_zeros(i,d);\n        mpn_rsh_divrem_hensel_qr_1(qp,up,un,d>>i,i,r);\n        return r;}\n  #if HAVE_NATIVE_mpn_divrem_euclidean_qr_1\n     return mpn_divrem_euclidean_qr_1(qp,0,up,un,d);\n  #endif\n    }\n  qp += (n - 1);   /* Make qp point at most significant quotient limb */\n\n  if ((d & GMP_LIMB_HIGHBIT) != 0)\n    {\n      if (un != 0)\n\t{\n\t  /* High quotient limb is 0 or 1, skip a divide step. */\n\t  mp_limb_t q;\n\t  r = up[un - 1] << GMP_NAIL_BITS;\n\t  q = (r >= d);\n\t  *qp-- = q;\n\t  r -= (d & -q);\n\t  r >>= GMP_NAIL_BITS;\n\t  n--;\n\t  un--;\n\t}\n\n      if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))\n\t{\n\tplain:\n\t  for (i = un - 1; i >= 0; i--)\n\t    {\n\t      n0 = up[i] << GMP_NAIL_BITS;\n\t      udiv_qrnnd (*qp, r, r, n0, d);\n\t      r >>= GMP_NAIL_BITS;\n\t      qp--;\n\t    }\n\t  for (i = qxn - 1; i >= 0; i--)\n\t    {\n\t      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);\n\t      r >>= GMP_NAIL_BITS;\n\t      qp--;\n\t    }\n\t  return r;\n\t}\n      else\n\t{\n\t  /* Multiply-by-inverse, divisor already normalized. */\n\t  mp_limb_t dinv;\n\t  invert_limb (dinv, d);\n\n\t  for (i = un - 1; i >= 0; i--)\n\t    {\n\t      n0 = up[i] << GMP_NAIL_BITS;\n\t      udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);\n\t      r >>= GMP_NAIL_BITS;\n\t      qp--;\n\t    }\n\t  for (i = qxn - 1; i >= 0; i--)\n\t    {\n\t      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);\n\t      r >>= GMP_NAIL_BITS;\n\t      qp--;\n\t    }\n\t  return r;\n\t}\n    }\n  else\n    {\n      /* Most significant bit of divisor == 0.  */\n      int norm;\n\n      /* Skip a division if high < divisor (high quotient 0).  Testing here\n\t before normalizing will still skip as often as possible.  */\n      if (un != 0)\n\t{\n\t  n1 = up[un - 1] << GMP_NAIL_BITS;\n\t  if (n1 < d)\n\t    {\n\t      r = n1 >> GMP_NAIL_BITS;\n\t      *qp-- = 0;\n\t      n--;\n\t      if (n == 0)\n\t\treturn r;\n\t      un--;\n\t    }\n\t}\n\n      if (! UDIV_NEEDS_NORMALIZATION\n\t  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))\n\tgoto plain;\n\n      count_leading_zeros (norm, d);\n      d <<= norm;\n      r <<= norm;\n\n      if (UDIV_NEEDS_NORMALIZATION\n\t  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))\n\t{\n\t  if (un != 0)\n\t    {\n\t      n1 = up[un - 1] << GMP_NAIL_BITS;\n\t      r |= (n1 >> (GMP_LIMB_BITS - norm));\n\t      for (i = un - 2; i >= 0; i--)\n\t\t{\n\t\t  n0 = up[i] << GMP_NAIL_BITS;\n\t\t  udiv_qrnnd (*qp, r, r,\n\t\t\t      (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),\n\t\t\t      d);\n\t\t  r >>= GMP_NAIL_BITS;\n\t\t  qp--;\n\t\t  n1 = n0;\n\t\t}\n\t      udiv_qrnnd (*qp, r, r, n1 << norm, d);\n\t      r >>= GMP_NAIL_BITS;\n\t      qp--;\n\t    }\n\t  for (i = qxn - 1; i >= 0; i--)\n\t    {\n\t      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);\n\t      r >>= GMP_NAIL_BITS;\n\t      qp--;\n\t    }\n\t  return r >> norm;\n\t}\n      else\n\t{\n\t  mp_limb_t  dinv;\n\t  invert_limb (dinv, d);\n\t  if (un != 0)\n\t    {\n\t      n1 = up[un - 1] << GMP_NAIL_BITS;\n\t      r |= (n1 >> (GMP_LIMB_BITS - norm));\n\t      for (i = un - 2; i >= 0; i--)\n\t\t{\n\t\t  n0 = up[i] << GMP_NAIL_BITS;\n\t\t  udiv_qrnnd_preinv (*qp, r, r, \n\t\t\t\t     ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),\n\t\t\t\t     d, dinv);\n\t\t  r >>= GMP_NAIL_BITS;\n\t\t  qp--;\n\t\t  n1 = n0;\n\t\t}\n\t      udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);\n\t      r >>= GMP_NAIL_BITS;\n\t      qp--;\n\t    }\n\t  for (i = qxn - 1; i >= 0; i--)\n\t    {\n\t      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);\n\t      r >>= GMP_NAIL_BITS;\n\t      qp--;\n\t    }\n\t  return r >> norm;\n\t}\n    }\n}\n"
  },
  {
    "path": "mpn/generic/divrem_2.c",
    "content": "/* mpn_divrem_2 -- Divide natural numbers, producing both remainder and\n   quotient.  The divisor is two limbs.\n\n   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS\n   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS\n   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP\n   RELEASE.\n\n\nCopyright 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,\n   meaning the quotient size where that should happen, the quotient size\n   being how many udiv divisions will be done.\n\n   The default is to use preinv always, CPUs where this doesn't suit have\n   tuned thresholds.  Note in particular that preinv should certainly be\n   used if that's the only division available (USE_PREINV_ALWAYS).  */\n\n#ifndef DIVREM_2_THRESHOLD\n#define DIVREM_2_THRESHOLD  0\n#endif\n\n\n/* Divide num (NP/NSIZE) by den (DP/2) and write\n   the NSIZE-2 least significant quotient limbs at QP\n   and the 2 long remainder at NP.  If QEXTRA_LIMBS is\n   non-zero, generate that many fraction bits and append them after the\n   other quotient limbs.\n   Return the most significant limb of the quotient, this is always 0 or 1.\n\n   Preconditions:\n   0. NSIZE >= 2.\n   1. The most significant bit of the divisor must be set.\n   2. QP must either not overlap with the input operands at all, or\n      QP + 2 >= NP must hold true.  (This means that it's\n      possible to put the quotient in the high part of NUM, right after the\n      remainder in NUM.\n   3. NSIZE >= 2, even if QEXTRA_LIMBS is non-zero.  */\n\nmp_limb_t\nmpn_divrem_2 (mp_ptr qp, mp_size_t qxn,\n\t      mp_ptr np, mp_size_t nn,\n\t      mp_srcptr dp)\n{\n  mp_limb_t most_significant_q_limb = 0;\n  mp_size_t i;\n  mp_limb_t n1, n0, n2;\n  mp_limb_t d1, d0;\n  mp_limb_t d1inv;\n  int use_preinv;\n\n  ASSERT (nn >= 2);\n  ASSERT (qxn >= 0);\n  ASSERT (dp[1] & GMP_NUMB_HIGHBIT);\n  ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp+2 >= np);\n  ASSERT_MPN (np, nn);\n  ASSERT_MPN (dp, 2);\n\n#if HAVE_NATIVE_mpn_divrem_euclidean_qr_2\nif (qxn==0) return mpn_divrem_euclidean_qr_2(qp,np,nn,dp);\n#endif\n\n  np += nn - 2;\n  d1 = dp[1];\n  d0 = dp[0];\n  n1 = np[1];\n  n0 = np[0];\n\n  if (n1 >= d1 && (n1 > d1 || n0 >= d0))\n    {\n#if GMP_NAIL_BITS == 0\n      sub_ddmmss (n1, n0, n1, n0, d1, d0);\n#else\n      n0 = n0 - d0;\n      n1 = n1 - d1 - (n0 >> GMP_LIMB_BITS - 1);\n      n0 &= GMP_NUMB_MASK;\n#endif\n      most_significant_q_limb = 1;\n    }\n\n  use_preinv = ABOVE_THRESHOLD (qxn + nn - 2, DIVREM_2_THRESHOLD);\n  if (use_preinv)\n    invert_limb (d1inv, d1);\n\n  for (i = qxn + nn - 2 - 1; i >= 0; i--)\n    {\n      mp_limb_t q;\n      mp_limb_t r;\n\n      if (i >= qxn)\n\tnp--;\n      else\n\tnp[0] = 0;\n\n      if (n1 == d1)\n\t{\n\t  /* Q should be either 111..111 or 111..110.  Need special handling\n\t     of this rare case as normal division would give overflow.  */\n\t  q = GMP_NUMB_MASK;\n\n\t  r = (n0 + d1) & GMP_NUMB_MASK;\n\t  if (r < d1)\t/* Carry in the addition? */\n\t    {\n#if GMP_NAIL_BITS == 0\n\t      add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);\n#else\n\t      n0 = np[0] + d0;\n\t      n1 = (r - d0 + (n0 >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;\n\t      n0 &= GMP_NUMB_MASK;\n#endif\n\t      qp[i] = q;\n\t      continue;\n\t    }\n\t  n1 = d0 - (d0 != 0);\n\t  n0 = -d0 & GMP_NUMB_MASK;\n\t}\n      else\n\t{\n\t  if (use_preinv)\n\t    udiv_qrnnd_preinv (q, r, n1, n0, d1, d1inv);\n\t  else\n\t    udiv_qrnnd (q, r, n1, n0 << GMP_NAIL_BITS, d1 << GMP_NAIL_BITS);\n\t  r >>= GMP_NAIL_BITS;\n\t  umul_ppmm (n1, n0, d0, q << GMP_NAIL_BITS);\n\t  n0 >>= GMP_NAIL_BITS;\n\t}\n\n      n2 = np[0];\n\n    q_test:\n      if (n1 > r || (n1 == r && n0 > n2))\n\t{\n\t  /* The estimated Q was too large.  */\n\t  q--;\n\n#if GMP_NAIL_BITS == 0\n\t  sub_ddmmss (n1, n0, n1, n0, 0, d0);\n#else\n\t  n0 = n0 - d0;\n\t  n1 = n1 - (n0 >> GMP_LIMB_BITS - 1);\n\t  n0 &= GMP_NUMB_MASK;\n#endif\n\t  r += d1;\n\t  if (r >= d1)\t/* If not carry, test Q again.  */\n\t    goto q_test;\n\t}\n\n      qp[i] = q;\n#if GMP_NAIL_BITS == 0\n      sub_ddmmss (n1, n0, r, n2, n1, n0);\n#else\n      n0 = n2 - n0;\n      n1 = r - n1 - (n0 >> GMP_LIMB_BITS - 1);\n      n0 &= GMP_NUMB_MASK;\n#endif\n    }\n  np[1] = n1;\n  np[0] = n0;\n\n  return most_significant_q_limb;\n}\n"
  },
  {
    "path": "mpn/generic/divrem_euclidean_qr_1.c",
    "content": "/*   \n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* d is mormalized */\n#define udiv_inverse(i,d) \\\n   do { \\\n      mp_limb_t __X; \\\n      udiv_qrnnd(i, __X, ~(d), GMP_LIMB_MAX, d); \\\n   } while(0)\n\n/* \n   note double length inverse can easily be calculated from single length inverse\n   d is normalised\n*/\n#define udiv_double_inverse(ih, il, d) \\\n  do { \\\n     mp_limb_t __X; \\\n     udiv_qrnnd(ih, __X, ~(d), GMP_LIMB_MAX, d); \\\n     udiv_qrnnd(il, __X, __X, GMP_LIMB_MAX, d); \\\n  }while(0)\n\n/* set to 1 = store or 0 = not store */\n#define STORE_QUOTIENT 1\n/* set to 0 = udiv  1 = gmp-preinv   2 = barrett */\n#define UDIV_METHOD 1\n\n#if UDIV_NEEDS_NORMALIZATION == 1 || UDIV_METHOD == 1\n#define NORMALIZE 1\n#else\n#define NORMALIZE 0\n#endif\n\n#if UDIV_METHOD == 0\n#define UDIV(q, r, h, l, d, i) udiv_qrnnd(q, r, h, l, d)\n#endif\n\n#if UDIV_METHOD == 1\n#define UDIV udiv_qrnnd_preinv\n#endif\n\n#if UDIV_METHOD == 2\n#define UDIV udiv_qrnnd_barrett\n#endif\n\n/*\n   (xp, n) = (qp, n)*d + r and 0 <= r < d\n\n   In hensel-div we use shiftout which means we can use mmx shifting \n   and don't need to always use it\n   \n   In euclid-div shiftout needs a final div for the remainder\n*/\n#if STORE_QUOTIENT\nmp_limb_t mpn_divrem_euclidean_qr_1(mp_ptr qp, mp_size_t qxn, \n                                   mp_srcptr xp, mp_size_t n, mp_limb_t d)\n#else\nmp_limb_t mpn_divrem_euclidean_r_1(mp_srcptr xp, mp_size_t n, mp_limb_t d)\n#endif\n{\n   mp_size_t j;\n   mp_limb_t r = 0, s = 0, h, l, q, i;\n\n#if STORE_QUOTIENT\n   ASSERT_ALWAYS(qxn == 0);\n#endif\n\n   ASSERT(n > 0);\n   ASSERT(d != 0);\n   ASSERT_MPN(xp, n);\n#if STORE_QUOTIENT\n   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));\n#endif\n\n   /* for n = 1 or n = 2 probably faster to do a special case */\n#if NORMALIZE == 1\n   count_leading_zeros(s, d);\n   \n   d = d<<s;\n\n   invert_limb(i, d);\n#endif\n\n   for (j = n - 1; j >= 0; j--)\n   {\n      l = xp[j]; /* out dlimb is (h = r,l) */\n      h = (l>>((GMP_LIMB_BITS - 1 - s))>>1);\n      l = l<<s; /* shift dlimb left by s */\n      h = h + r; /* carry in */\n     \n      UDIV(q, r, h, l, d, i);\t/* carry out, carry-out to carry-in is the critical latency bottleneck */\n   \n#if STORE_QUOTIENT\n      qp[j] = q;\n#endif\n   }\n\n   ASSERT((r<<(GMP_LIMB_BITS - 1 - s)<<1) ==0 ); /* i.e. bottom s bits of r are zero */\n\n   r>>=s;\n\n   return r;\n}    \n"
  },
  {
    "path": "mpn/generic/divrem_euclidean_qr_2.c",
    "content": "/*   \n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_limb_t mpn_divrem_euclidean_qr_2(mp_ptr qp, mp_ptr xp, mp_size_t xn, mp_srcptr dp)\n{\n   mp_size_t qn;\n   mp_limb_t qf, t[2], t1[2], q, h, l, d1, d2, i;\n   int c1, c3, c4;\n\n   ASSERT(xn >= 2);\n   ASSERT_MPN(dp, 2);\n   ASSERT_MPN(xp, xn);\n   ASSERT(dp[1] != 0);\n\n   qn = xn - 1;\n\n   /* ASSERT(!MPN_OVERLAP_P(qp, qn, xp, xn)); */ /* FIXME: correct this overlap requirement */\n   ASSERT((dp[1]>>(GMP_NUMB_BITS - 1)) != 0);\n\n   h = 0;\n   d1 = dp[1];\n   d2 = dp[0];\n   \n   invert_limb(i, d1);\n   \n   l = xp[xn - 1];\n   qn = xn - 2;\n   t[0] = xp[qn];\n\n   if (l < d1)\n   { \n      h = t[1] = l;\n      l = t[0] = xp[qn];\n      qf = 0;\n   }\n   else\n   {\n      qf = 1;\n      t[1] = l - d1;\n      t1[1] = 0;\n      t1[0] = d2;\n   \n      if (mpn_sub_n(t, t, t1, 2))\n      {\n         qf--;\n         mpn_add_n(t, t, dp, 2);\n      }\n   \n      h = t[1];\n      l = t[0];\n   }\n\n   for (qn = xn - 3; qn >= 0; qn--)\n   {\n      t[0] = xp[qn];\n    \n      if (h < d1)\n      {\n         udiv_qrnnd_preinv(q, t[1], h, l, d1, i);\n         umul_ppmm(t1[1], t1[0], q, d2);\n         if (mpn_sub_n(t, t, t1, 2))\n         {\n            q--;\n            if (mpn_add_n(t, t, dp, 2) == 0)\n            {\n               q--;\n               \n               ASSERT_CARRY(mpn_add_n(t, t, dp, 2));\n            }\n         }\n      }\n      else\n      {\n         ASSERT(h == d1);\n         q = -1;\n         t[1] = l;\n         c3 = mpn_add_n(t, t, dp, 2);\n         c1 = mpn_sub_1(t + 1, t + 1, 1, d2);\n         c4 = c3 - c1;\n       \n         if (l >= d1)\n         {\n            ASSERT(c3 != 0);\n            ASSERT(c4 == 0);\n         } /* our guess is B + 1, so q = B - 1 is correct */\n         else\n         {\n            ASSERT(c4 <= 0); /* our guess is B so q = B - 1 or B - 2 */\n            if (c4 != 0)\n            {\n               q--;\n               mpn_add_n(t, t, dp, 2);\n            }\n         }       \n      }\n    \n      h = t[1];\n      l = t[0];\n      qp[qn] = q;\n   }\n\n   xp[1] = t[1];\n   xp[0] = t[0];\n\n   return qf;\n}\n"
  },
  {
    "path": "mpn/generic/divrem_euclidean_r_1.c",
    "content": "/*   \n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* in each round we remove one limb from the body, i.e. k = 1 */\nmp_limb_t mpn_mod_1_1_wrap(mp_srcptr xp, mp_size_t xn, mp_limb_t d)\n{\n   mp_limb_t dummy, h, l, sh, sl, ret, i, c, ds, db[2], rem[2];\n   mp_size_t j;\n \n   ASSERT_MPN(xp, xn);\n\n   if (xn == 0)\n      return 0;\n\n   if (xn == 1)\n      return xp[0]%d;\n\n   ASSERT(d - 1 <= GMP_LIMB_HIGHBIT);\n\n   count_leading_zeros(c, d);\n   ds = d<<c;\n\n   invert_limb(i, ds);\n\n   udiv_qrnnd_preinv(dummy, db[0], ((mp_limb_t) 1)<<c, 0, ds, i);  /* this is B%ds */\n   udiv_qrnnd_preinv(dummy, db[1], db[0], 0, ds, i); /* this is B^2%ds, could calc. indep. of db[0] */\n\n   db[0] >>= c;\n   db[1] >>= c; /* these are now B^i %d */\n\n   mpn_mod_1_1(rem, xp, xn, db);\n\n   sh = rem[1];\n   sl = rem[0];\n\n   ASSERT(sh < d);\n\n   udiv_qrnnd_preinv(dummy, ret, (sh<<c) | ((sl>>(GMP_LIMB_BITS - 1 - c))>>1), sl<<c, ds, i);\n\n   return ret>>c;\n}\n\n/* in each round we remove two limbs from the body, i.e. k = 2 */\nmp_limb_t mpn_mod_1_2_wrap(mp_srcptr xp, mp_size_t xn, mp_limb_t d)\n{\n   mp_limb_t dummy, h, l, sh, sl, th, tl, i, ret, ds, c, db[3], rem[2];\n   mp_size_t j;\n \n   ASSERT_MPN(xp, xn);\n\n   if (xn == 0)\n      return 0;\n\n   if (xn == 1)\n      return xp[0]%d;\n\n   umul_ppmm(h, l, d - 1, 3);\n   \n   ASSERT(h == 0 || (h == 1 && l == 0)); /* i.e. (k + 1)(d - 1) <= B */\n\n   count_leading_zeros(c, d);\n   \n   ds = d<<c;\n\n   invert_limb(i, ds);\n\n   udiv_qrnnd_preinv(dummy, db[0], ((mp_limb_t) 1)<<c, 0, ds, i);\n   udiv_qrnnd_preinv(dummy, db[1], db[0], 0, ds, i);\n   db[0]>>=c;\n   udiv_qrnnd_preinv(dummy, db[2], db[1], 0, ds, i);\n   db[1]>>=c;\n   db[2]>>=c;\n\n   mpn_mod_1_2(rem, xp, xn, db);\n   h = rem[1];\n   l = rem[0];\n\n   ASSERT(h < d);\n\n   udiv_qrnnd_preinv(dummy, ret, (h<<c) | ((l>>(GMP_LIMB_BITS - 1 - c))>>1), l<<c, ds, i);\n\n   return ret>>c;\n}\n\n/* in each round we remove 3 limbs from the body */\nmp_limb_t mpn_mod_1_3_wrap(mp_srcptr xp, mp_size_t xn, mp_limb_t d)\n{\n   mp_limb_t dummy, h, l, sh, sl, th, tl, i, ret, ds, c, db[4], rem[2];\n   mp_size_t j, jj;\n \n   ASSERT_MPN(xp, xn);\n\n   if (xn == 0)\n      return 0;\n\n   if (xn == 1)\n      return xp[0]%d;\n\n   umul_ppmm(h, l, d - 1, 4);\n   \n   ASSERT(h == 0 || (h == 1 && l == 0)); /* i.e. (k + 1)(d - 1) <= B */\n\n   count_leading_zeros(c, d);\n   ds = d<<c;\n\n   invert_limb(i, ds);\n\n   udiv_qrnnd_preinv(dummy, db[0], ((mp_limb_t) 1)<<c, 0, ds, i);\n   udiv_qrnnd_preinv(dummy, db[1], db[0], 0, ds, i);\n   \n   db[0]>>=c;\n   udiv_qrnnd_preinv(dummy, db[2], db[1], 0, ds, i);\n   db[1]>>=c;\n   udiv_qrnnd_preinv(dummy, db[3], db[2], 0, ds, i);\n   db[2]>>=c;\n   db[3]>>=c;\n\n   mpn_mod_1_3(rem, xp, xn, db);\n   h = rem[1];\n   l = rem[0];\n\n   ASSERT(h < d);\n\n   udiv_qrnnd_preinv(dummy, ret, (h<<c) | ((l>>(GMP_LIMB_BITS - 1 - c))>>1), l<<c, ds, i);\n\n   return ret>>c;\n}\n\n/* \n   This is a generic version for k >= 2 \n   In each round we remove k limbs from the body\n*/\nmp_limb_t mpn_mod_1_k(mp_srcptr xp, mp_size_t xn, mp_limb_t d, mp_size_t k)\n{\n   mp_limb_t dummy, h, l, sh, sl, th, tl, i, ret, ds, c, db[30]; /* need k + 1 entries in array */\n   mp_size_t j,jj;\n \n   ASSERT_MPN(xp, xn);\n\n   if (xn == 0)\n      return 0;\n\n   if (xn == 1)\n      return xp[0]%d;\n\n   ASSERT(k >= 2);\n\n   umul_ppmm(h, l, d - 1, k + 1);\n   \n   ASSERT(h == 0 || (h == 1 && l == 0)); /* i.e. (k + 1)(d - 1) <= B */\n\n   count_leading_zeros(c, d);\n   \n   ds = d<<c;\n\n   invert_limb(i, ds);\n\n   udiv_qrnnd_preinv(dummy, db[0], ((mp_limb_t) 1)<<c, 0, ds, i);\n\n   ASSERT_ALWAYS(k + 1 <= numberof(db));\n\n   for (j = 1; j <= k; j++)\n   {\n      udiv_qrnnd_preinv(dummy, db[j], db[j - 1], 0, ds, i);\n      db[j - 1]>>=c;\n   }\n   \n   /* now db[j] = B^j % d */\n\n   db[k]>>=c;\n   tl = xp[xn - 2];\n   th = xp[xn - 1];\n\n   for (j = xn - k - 2; j >= 0; j -= k)\n   {\n      umul_ppmm(sh, sl, xp[j + 1], db[0]);\n      add_ssaaaa(sh, sl, sh, sl, 0, xp[j]);\n     \n      for (jj = 2; jj <= k - 1; jj++)\n      {\n         umul_ppmm(h, l, xp[j + jj], db[jj - 1]);\n         add_ssaaaa(sh, sl, sh, sl, h, l);\n      }\n    \n      umul_ppmm(h, l, tl, db[k - 1]);\n      add_ssaaaa(sh, sl, sh, sl, h, l);\n      umul_ppmm(th, tl, th, db[k]);\n      add_ssaaaa(th, tl, th, tl, sh, sl);\n   }\n\n   if (j + k > 0) /* we have at least three limbs to do i.e. xp[0], ..., tl, th */\n   {\n      sh = 0;\n      sl = xp[0];\n\n      for (jj = 1; jj < j + k; jj++)\n      {\n         umul_ppmm(h, l, xp[jj], db[jj - 1]);\n         add_ssaaaa(sh, sl, sh, sl, h, l);\n      }\n   \n      umul_ppmm(h, l, tl, db[jj - 1]);\n      add_ssaaaa(sh, sl, sh, sl, h, l);\n      umul_ppmm(th, tl, th, db[jj]);\n      add_ssaaaa(th, tl, th, tl, sh, sl);\n   }\n   \n   umul_ppmm(h, l, th, db[0]);\n   add_ssaaaa(h, l, h, l, 0, tl);\n\n   ASSERT(h < d);\n\n   udiv_qrnnd_preinv(dummy, ret, (h<<c) | (l>>(GMP_LIMB_BITS - c)), l<<c, ds, i);\n\n   return ret>>c;\n}\n\n/* d is mormalized */\n#define udiv_inverse(i,d) \\\n   do { \\\n      mp_limb_t __X; \\\n      udiv_qrnnd(i, __X, ~(d), GMP_LIMB_MAX, d); \\\n   } while(0)\n\n/* \n   note double length inverse can easily be calculated from single length inverse\n   d is normalised\n*/\n#define udiv_double_inverse(ih,il,d) \\\n  do { \\\n     mp_limb_t __X; \\\n     udiv_qrnnd(ih, __X, ~(d), GMP_LIMB_MAX, d); \\\n     udiv_qrnnd(il, __X, __X, GMP_LIMB_MAX, d); \\\n  } while(0)\n\n/* set to 1 = store or 0 = not store */\n#define STORE_QUOTIENT 0\n/* set to 0 = udiv, 1 = gmp-preinv, 2 = barrett */\n#define UDIV_METHOD 1\n\n#if UDIV_NEEDS_NORMALIZATION == 1 || UDIV_METHOD == 1\n#define NORMALIZE 1\n#else\n#define NORMALIZE 0\n#endif\n\n#if UDIV_METHOD == 0\n#define UDIV(q,r,h,l,d,i) udiv_qrnnd(q,r,h,l,d)\n#endif\n\n#if UDIV_METHOD == 1\n#define UDIV udiv_qrnnd_preinv\n#endif\n\n#if UDIV_METHOD == 2\n#define UDIV udiv_qrnnd_barrett\n#endif\n\n/*\n   (xp, n) = (qp, n)*d + r and 0 <= r < d\n\n   In hensel-div we use shiftout which means we can use mmx shifting and \n   dont need to always use it\n   \n   In euclid-div shiftout needs a final div for the remainder\n*/\n#if STORE_QUOTIENT\nmp_limb_t mpn_divrem_euclidean_qr_1(mp_ptr qp, mp_size_t qxn, \n                                   mp_srcptr xp, mp_size_t n, mp_limb_t d)\n#else\nmp_limb_t mpn_divrem_euclidean_r_1(mp_srcptr xp, mp_size_t n, mp_limb_t d)\n#endif\n{\n   mp_size_t j;\n   mp_limb_t r = 0, s = 0, h, l, q, i;\n\n#if STORE_QUOTIENT\n   ASSET_ALWAYS(qxn == 0);\n#endif\n\n   ASSERT(n > 0);\n   ASSERT(d != 0);\n   ASSERT_MPN(xp, n);\n\n#if STORE_QUOTIENT\n   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));\n#endif\n\n   if (d <= GMP_LIMB_HIGHBIT/2 + 1 && ABOVE_THRESHOLD(n, MOD_1_3_THRESHOLD))\n      return mpn_mod_1_3_wrap(xp, n, d);\n\n   if (d <= MP_LIMB_T_MAX/3 + 1 && ABOVE_THRESHOLD(n, MOD_1_2_THRESHOLD))\n      return mpn_mod_1_2_wrap(xp, n, d);\n\n   if (d <= GMP_LIMB_HIGHBIT + 1 && ABOVE_THRESHOLD(n, MOD_1_1_THRESHOLD))\n      return mpn_mod_1_1_wrap(xp, n, d);\n\n   /* for n = 1 or n = 2 probably faster to do a special case */\n\n#if NORMALIZE == 1\n   count_leading_zeros(s, d);\n   \n   d = d<<s;\n \n   invert_limb(i, d);\n#endif\n\n   for (j = n - 1; j >= 0; j--)\n   {\n      l = xp[j]; /* out dlimb is (h = r, l) */\n      h = (l>>((GMP_LIMB_BITS - 1 - s))>>1);\n      l = l<<s; /* shift dlimb left by s */\n      h = h + r; /* carry in */\n    \n      UDIV(q, r, h, l, d, i);\t/* carry out, carry-out to carry-in is the critical latency bottleneck */\n    \n#if STORE_QUOTIENT\n      qp[j] = q;\n#endif\n   }\n\n   ASSERT((r<<(GMP_LIMB_BITS - 1 - s)<<1) == 0); /* ie bottom s bits of r are zero */\n\n   r>>=s;\n\n   return r;\n}    \n"
  },
  {
    "path": "mpn/generic/divrem_hensel_qr_1.c",
    "content": "/*\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* (xp, n) = (qp, n)*d - ret*B^n and 0 <= ret < d */\nmp_limb_t mpn_divrem_hensel_qr_1(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t d)\n{\n   ASSERT(n > 0);\n   ASSERT_MPN(xp, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));\n   ASSERT(d%2 == 1);\n\n   if (BELOW_THRESHOLD(n, DIVREM_HENSEL_QR_1_THRESHOLD))\n      return mpn_divrem_hensel_qr_1_1(qp, xp, n, d);\n\n   return mpn_divrem_hensel_qr_1_2(qp, xp, n, d);\n}\n"
  },
  {
    "path": "mpn/generic/divrem_hensel_qr_1_1.c",
    "content": "/*\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* (xp, n) = (qp, n)*d - ret*B^n and 0 <= ret < d */\nmp_limb_t mpn_divrem_hensel_qr_1_1(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t d)\n{\n   mp_size_t j;\n   mp_limb_t c, h, q, dummy, h1, t, m;\n\n   ASSERT(n > 0);\n   ASSERT_MPN(xp, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));\n   ASSERT(d%2 == 1);\n   \n   modlimb_invert(m, d);\n\n   c = 0;\n   h = 0;\n   t = 0;\n\n   for (j = 0; j <= n - 1; j++)\n   {\n      h1 = xp[j];\n      t = h + c;\n      \n      if (t > h1)\n      {\n         h1 = h1 - t;\n         c = 1;\n      }\n      else\n      {\n         h1 = h1 - t;\n         c = 0;\n      }\n    \n      q = h1*m;\n      qp[j] = q;\n      umul_ppmm(h, dummy, q, d);\n    \n      ASSERT(dummy == h1);\n   }\n\n   return h + c;\n}   \n"
  },
  {
    "path": "mpn/generic/divrem_hensel_qr_1_2.c",
    "content": "/*\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* \n   using a two limb inverse of a one limb divisor \n   (xp, n) = (qp, n)*d - ret*B^n and 0 <= ret < d\n*/\nmp_limb_t mpn_divrem_hensel_qr_1_2(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t d)\n{\n   mp_size_t j;\n   mp_limb_t c, h, q, dummy, h1, t, ml, mh, xl, xh, ql, qh;\n\n   ASSERT(n >= 2);\n   ASSERT_MPN(xp, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));\n   ASSERT(d%2 == 1);\n\n   modlimb_invert(ml, d);\n\n   umul_ppmm(h, dummy, d, ml);\n   \n   ASSERT(dummy == 1);\n\n   h = -h;\n   mh = ml*h; /* (mh, ml) is our two limb inverse */\n   c = 0;\n   h = 0;\n\n   for (j = 0; j + 1 <= n - 1; j += 2)\n   {\n      xl = xp[j];\n      xh = xp[j + 1];\n      t = h + c;\n      \n      if (xh == 0 && t > xl) c = 1;\n      else c = 0;\n    \n      sub_ddmmss(xh, xl, xh, xl, 0, t);\n      umul_ppmm(qh, ql, xl, ml);\n      qh = qh + xh*ml + xl*mh;\n      qp[j] = ql;\n      qp[j + 1] = qh;\n      umul_ppmm(h, h1, qh, d);\n    \n      if (h1 > xh)\n         h++;\n   }\n\n   if (j <= n - 1)\n   {\n      h1 = xp[j];\n      t = h + c;\n      \n      if (t > h1)\n      {\n         h1 = h1 - t;\n         c = 1;\n      }\n      else\n      {\n         h1 = h1 - t;\n         c = 0;\n      }\n   \n      q = h1*ml;\n      qp[j] = q;\n      umul_ppmm(h, dummy, q, d);\n   \n      ASSERT(dummy == h1);\n   }\n \n   return h + c;\n}   \n"
  },
  {
    "path": "mpn/generic/divrem_hensel_r_1.c",
    "content": "/*\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* \n   (xp, n) = (qp, n)*d - ret*B^n and 0 <= ret < d  \n   same as qr version but with q not stored\n*/\nmp_limb_t mpn_divrem_hensel_r_1(mp_srcptr xp, mp_size_t n, mp_limb_t d)\n{\n   mp_size_t j;\n   mp_limb_t c, h, q, dummy, h1, t, m;\n\n   ASSERT(n > 0);\n   ASSERT_MPN(xp, n);\n   ASSERT(d%2 == 1);\n\n   modlimb_invert(m, d);\n\n   c = 0;\n   h = 0;\n   t = 0;\n\n   for (j = 0; j <= n - 1; j++)\n   {\n      h1 = xp[j];\n      t = h + c;\n      \n      if (t > h1)\n      {\n         h1 = h1 - t;\n         c = 1;\n      }\n      else\n      {\n         h1 = h1 - t;\n         c = 0;\n      }\n     \n      q = h1*m;\n      umul_ppmm(h, dummy, q, d);\n    \n      ASSERT(dummy == h1);\n   }\n\n   return h + c;\n}   \n"
  },
  {
    "path": "mpn/generic/divrem_hensel_rsh_qr_1.c",
    "content": "/*\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_limb_t mpn_divrem_hensel_rsh_qr_1_preinv(mp_ptr qp, mp_srcptr xp, \n                                     mp_size_t n, mp_limb_t d, mp_limb_t m, int s)\n{\n   mp_size_t j;\n   mp_limb_t c, h, q, dummy, h1, t;\n\n   ASSERT(n > 0);\n   ASSERT(d%2 == 1);\n   ASSERT_MPN(xp, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));\n   ASSERT(s >= 0);\n   \n   c = 0;\n   h = 0;\n   t = 0;\n\n   for (j = 0; j <= n - 2; j++)\n   {\n      h1 = (xp[j]>>s) | (xp[j + 1]<<(GMP_LIMB_BITS - 1 - s)<<1);\n      t = h + c;\n      \n      if (t > h1)\n      {\n         h1 = h1 - t;\n         c = 1;\n      }\n      else\n      {\n         h1 = h1 - t;\n         c = 0;\n      }\n    \n      q = h1*m;\n      qp[j] = q;\n      umul_ppmm(h, dummy, q, d);\n      ASSERT(dummy == h1);\n   }\n\n   h1 = xp[n - 1]>>s;\n   t = h + c;\n   \n   if (t > h1)\n   {\n      h1 = h1 - t;\n      c = 1;\n   }\n   else\n   {\n      h1 = h1 - t;\n      c = 0;\n   }\n\n   q = h1*m;\n   qp[n - 1] = q;\n   umul_ppmm(h, dummy, q, d);\n\n   ASSERT(dummy == h1);\n\n   return h + c;\n}   \n\nmp_limb_t mpn_divrem_hensel_rsh_qr_1(mp_ptr qp, mp_srcptr xp, \n                                     mp_size_t n, mp_limb_t d, int s)\n{\n   mp_limb_t m;\n\n   modlimb_invert(m, d); /* should we allow s=0 ?? */\n\n   return mpn_divrem_hensel_rsh_qr_1_preinv(qp, xp, n, d, m, s);\n}\n"
  },
  {
    "path": "mpn/generic/dump.c",
    "content": "/* THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS NOT SAFE TO\n   CALL THIS FUNCTION DIRECTLY.  IN FACT, IT IS ALMOST GUARANTEED THAT THIS\n   FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\n\nCopyright 1996, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if GMP_NUMB_BITS % 4 == 0\nvoid\nmpn_dump (mp_srcptr ptr, mp_size_t n)\n{\n  MPN_NORMALIZE (ptr, n);\n\n  if (n == 0)\n    printf (\"0\\n\");\n  else\n    {\n      n--;\n#if _LONG_LONG_LIMB\n      if ((ptr[n] >> GMP_LIMB_BITS / 2) != 0)\n\t{\n\t  printf (\"%lX\", (unsigned long) (ptr[n] >> GMP_LIMB_BITS / 2));\n\t  printf (\"%0*lX\", (GMP_LIMB_BITS / 2 / 4), (unsigned long) ptr[n]);\n\t}\n      else\n#endif\n\tprintf (\"%lX\", (unsigned long) ptr[n]);\n\n      while (n)\n\t{\n\t  n--;\n#if _LONG_LONG_LIMB\n\t  printf (\"%0*lX\", (GMP_NUMB_BITS - GMP_LIMB_BITS / 2) / 4,\n\t\t  (unsigned long) (ptr[n] >> GMP_LIMB_BITS / 2));\n\t  printf (\"%0*lX\", GMP_LIMB_BITS / 2 / 4, (unsigned long) ptr[n]);\n#else\n\t  printf (\"%0*lX\", GMP_NUMB_BITS / 4, (unsigned long) ptr[n]);\n#endif\n\t}\n      printf (\"\\n\");\n    }\n}\n\n#else\n\nstatic void\nmpn_recdump (mp_ptr p, mp_size_t n)\n{\n  mp_limb_t lo;\n  if (n != 0)\n    {\n      lo = p[0] & 0xf;\n      mpn_rshift (p, p, n, 4);\n      mpn_recdump (p, n);\n      printf (\"%lX\", lo);\n    }\n}\n\nvoid\nmpn_dump (mp_srcptr p, mp_size_t n)\n{\n  mp_ptr tp;\n  TMP_DECL;\n  TMP_MARK;\n  tp = TMP_ALLOC_LIMBS (n);\n  MPN_COPY (tp, p, n);\n  TMP_FREE;\n}\n\n#endif\n"
  },
  {
    "path": "mpn/generic/fib2_ui.c",
    "content": "/* mpn_fib2_ui -- calculate Fibonacci numbers.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001, 2002, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* change this to \"#define TRACE(x) x\" for diagnostics */\n#define TRACE(x)\n\n\n/* Store F[n] at fp and F[n-1] at f1p.  fp and f1p should have room for\n   MPN_FIB2_SIZE(n) limbs.\n\n   The return value is the actual number of limbs stored, this will be at\n   least 1.  fp[size-1] will be non-zero, except when n==0, in which case\n   fp[0] is 0 and f1p[0] is 1.  f1p[size-1] can be zero, since F[n-1]<F[n]\n   (for n>0).\n\n   Notes:\n\n   In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the\n   low limb.\n\n   In F[2k+1] with k odd, -2 is applied to the low limb of 4*F[k]^2 -\n   F[k-1]^2.  This F[2k+1] is an F[4m+3] and such numbers are congruent to\n   1, 2 or 5 mod 8, which means no underflow reaching it with a -2 (since\n   that would leave 6 or 7 mod 8).\n\n   This property of F[4m+3] can be verified by induction on F[4m+3] =\n   7*F[4m-1] - F[4m-5], that formula being a standard lucas sequence\n   identity U[i+j] = U[i]*V[j] - U[i-j]*Q^j.\n\n   Enhancements:\n\n   If there was an mpn_addlshift, it'd be possible to eliminate the yp\n   temporary, using xp=F[k]^2, fp=F[k-1]^2, f1p=xp+fp, fp+=4*fp, fp-=f1p,\n   fp+=2*(-1)^n, etc.  */\n\nmp_size_t\nmpn_fib2_ui (mp_ptr fp, mp_ptr f1p, mpir_ui n)\n{\n  mp_ptr         xp, yp;\n  mp_size_t      size;\n  mpir_ui  nfirst, mask;\n  TMP_DECL;\n\n  TRACE (printf (\"mpn_fib2_ui n=%lu\\n\", n));\n\n  ASSERT (! MPN_OVERLAP_P (fp, MPN_FIB2_SIZE(n), f1p, MPN_FIB2_SIZE(n)));\n\n  /* Take a starting pair from the table. */\n  mask = 1;\n  for (nfirst = n; nfirst > FIB_TABLE_LIMIT; nfirst /= 2)\n    mask <<= 1;\n  TRACE (printf (\"nfirst=%lu mask=0x%lX\\n\", nfirst, mask));\n\n  f1p[0] = FIB_TABLE ((int) nfirst - 1);\n  fp[0]  = FIB_TABLE (nfirst);\n  size = 1;\n\n  /* Skip to the end if the table lookup gives the final answer. */\n  if (mask != 1)\n    {\n      mp_size_t  alloc;\n\n      TMP_MARK;\n      alloc = MPN_FIB2_SIZE (n);\n      TMP_ALLOC_LIMBS_2 (xp,alloc, yp,alloc);\n\n      do\n\t{\n\t  mp_limb_t  c;\n\n\t  /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from\n\t     n&mask upwards.\n\n\t     The next bit of n is n&(mask>>1) and we'll double to the pair\n\t     fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as\n\t     that bit is 0 or 1 respectively.  */\n\n\t  TRACE (printf (\"k=%lu mask=0x%lX size=%ld alloc=%ld\\n\",\n\t\t\t n >> refmpn_count_trailing_zeros(mask),\n\t\t\t mask, size, alloc);\n\t\t mpn_trace (\"fp \", fp, size);\n\t\t mpn_trace (\"f1p\", f1p, size));\n\n\t  /* fp normalized, f1p at most one high zero */\n\t  ASSERT (fp[size-1] != 0);\n\t  ASSERT (f1p[size-1] != 0 || f1p[size-2] != 0);\n\n\t  /* f1p[size-1] might be zero, but this occurs rarely, so it's not\n\t     worth bothering checking for it */\n\t  ASSERT (alloc >= 2*size);\n\t  mpn_sqr (xp, fp,  size);\n\t  mpn_sqr (yp, f1p, size);\n\t  size *= 2;\n\n\t  /* Shrink if possible.  Since fp was normalized there'll be at\n\t     most one high zero on xp (and if there is then there's one on\n\t     yp too).  */\n\t  ASSERT (xp[size-1] != 0 || yp[size-1] == 0);\n\t  size -= (xp[size-1] == 0);\n\t  ASSERT (xp[size-1] != 0);  /* only one xp high zero */\n\n\t  /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.\n\t     n&mask is the low bit of our implied k.  */\n\t  c = mpn_lshift2 (fp, xp, size);\n\t  fp[0] |= (n & mask ? 0 : 2);\t /* possible +2 */\n\t  c -= mpn_sub_n (fp, fp, yp, size);\n\t  ASSERT (n & (mask << 1) ? fp[0] != 0 && fp[0] != 1 : 1);\n\t  fp[0] -= (n & mask ? 2 : 0);\t /* possible -2 */\n\t  ASSERT (alloc >= size+1);\n\t  xp[size] = 0;\n\t  yp[size] = 0;\n\t  fp[size] = c;\n\t  size += (c != 0);\n\n\t  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2.\n\t     F[2k-1]<F[2k+1] so no carry out of \"size\" limbs. */\n\t  ASSERT_NOCARRY (mpn_add_n (f1p, xp, yp, size));\n\n\t  /* now n&mask is the new bit of n being considered */\n\t  mask >>= 1;\n\n\t  /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of\n\t     F[2k+1] and F[2k-1].  */\n\t  ASSERT_NOCARRY (mpn_sub_n ((n & mask ? f1p : fp), fp, f1p, size));\n\n\t  /* Can have a high zero after replacing F[2k+1] with F[2k].\n\t     f1p will have a high zero if fp does. */\n\t  ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);\n\t  size -= (fp[size-1] == 0);\n\t}\n      while (mask != 1);\n\n      TMP_FREE;\n    }\n\n  TRACE (printf (\"done size=%ld\\n\", size);\n\t mpn_trace (\"fp \", fp, size);\n\t mpn_trace (\"f1p\", f1p, size));\n\n  return size;\n}\n"
  },
  {
    "path": "mpn/generic/fib_table.c",
    "content": "\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if GMP_NUMB_BITS == 32\n\nconst mp_limb_t\n__gmp_fib_table[FIB_TABLE_LIMIT+2] = {\n  CNST_LIMB (0x1),  /* -1 */\n  CNST_LIMB (0x0),  /* 0 */\n  CNST_LIMB (0x1),  /* 1 */\n  CNST_LIMB (0x1),  /* 2 */\n  CNST_LIMB (0x2),  /* 3 */\n  CNST_LIMB (0x3),  /* 4 */\n  CNST_LIMB (0x5),  /* 5 */\n  CNST_LIMB (0x8),  /* 6 */\n  CNST_LIMB (0xd),  /* 7 */\n  CNST_LIMB (0x15),  /* 8 */\n  CNST_LIMB (0x22),  /* 9 */\n  CNST_LIMB (0x37),  /* 10 */\n  CNST_LIMB (0x59),  /* 11 */\n  CNST_LIMB (0x90),  /* 12 */\n  CNST_LIMB (0xe9),  /* 13 */\n  CNST_LIMB (0x179),  /* 14 */\n  CNST_LIMB (0x262),  /* 15 */\n  CNST_LIMB (0x3db),  /* 16 */\n  CNST_LIMB (0x63d),  /* 17 */\n  CNST_LIMB (0xa18),  /* 18 */\n  CNST_LIMB (0x1055),  /* 19 */\n  CNST_LIMB (0x1a6d),  /* 20 */\n  CNST_LIMB (0x2ac2),  /* 21 */\n  CNST_LIMB (0x452f),  /* 22 */\n  CNST_LIMB (0x6ff1),  /* 23 */\n  CNST_LIMB (0xb520),  /* 24 */\n  CNST_LIMB (0x12511),  /* 25 */\n  CNST_LIMB (0x1da31),  /* 26 */\n  CNST_LIMB (0x2ff42),  /* 27 */\n  CNST_LIMB (0x4d973),  /* 28 */\n  CNST_LIMB (0x7d8b5),  /* 29 */\n  CNST_LIMB (0xcb228),  /* 30 */\n  CNST_LIMB (0x148add),  /* 31 */\n  CNST_LIMB (0x213d05),  /* 32 */\n  CNST_LIMB (0x35c7e2),  /* 33 */\n  CNST_LIMB (0x5704e7),  /* 34 */\n  CNST_LIMB (0x8cccc9),  /* 35 */\n  CNST_LIMB (0xe3d1b0),  /* 36 */\n  CNST_LIMB (0x1709e79),  /* 37 */\n  CNST_LIMB (0x2547029),  /* 38 */\n  CNST_LIMB (0x3c50ea2),  /* 39 */\n  CNST_LIMB (0x6197ecb),  /* 40 */\n  CNST_LIMB (0x9de8d6d),  /* 41 */\n  CNST_LIMB (0xff80c38),  /* 42 */\n  CNST_LIMB (0x19d699a5),  /* 43 */\n  CNST_LIMB (0x29cea5dd),  /* 44 */\n  CNST_LIMB (0x43a53f82),  /* 45 */\n  CNST_LIMB (0x6d73e55f),  /* 46 */\n  CNST_LIMB (0xb11924e1),  /* 47 */\n};\n\n#elif GMP_NUMB_BITS == 64\n\nconst mp_limb_t\n__gmp_fib_table[FIB_TABLE_LIMIT+2] = {\n  CNST_LIMB (0x1),  /* -1 */\n  CNST_LIMB (0x0),  /* 0 */\n  CNST_LIMB (0x1),  /* 1 */\n  CNST_LIMB (0x1),  /* 2 */\n  CNST_LIMB (0x2),  /* 3 */\n  CNST_LIMB (0x3),  /* 4 */\n  CNST_LIMB (0x5),  /* 5 */\n  CNST_LIMB (0x8),  /* 6 */\n  CNST_LIMB (0xd),  /* 7 */\n  CNST_LIMB (0x15),  /* 8 */\n  CNST_LIMB (0x22),  /* 9 */\n  CNST_LIMB (0x37),  /* 10 */\n  CNST_LIMB (0x59),  /* 11 */\n  CNST_LIMB (0x90),  /* 12 */\n  CNST_LIMB (0xe9),  /* 13 */\n  CNST_LIMB (0x179),  /* 14 */\n  CNST_LIMB (0x262),  /* 15 */\n  CNST_LIMB (0x3db),  /* 16 */\n  CNST_LIMB (0x63d),  /* 17 */\n  CNST_LIMB (0xa18),  /* 18 */\n  CNST_LIMB (0x1055),  /* 19 */\n  CNST_LIMB (0x1a6d),  /* 20 */\n  CNST_LIMB (0x2ac2),  /* 21 */\n  CNST_LIMB (0x452f),  /* 22 */\n  CNST_LIMB (0x6ff1),  /* 23 */\n  CNST_LIMB (0xb520),  /* 24 */\n  CNST_LIMB (0x12511),  /* 25 */\n  CNST_LIMB (0x1da31),  /* 26 */\n  CNST_LIMB (0x2ff42),  /* 27 */\n  CNST_LIMB (0x4d973),  /* 28 */\n  CNST_LIMB (0x7d8b5),  /* 29 */\n  CNST_LIMB (0xcb228),  /* 30 */\n  CNST_LIMB (0x148add),  /* 31 */\n  CNST_LIMB (0x213d05),  /* 32 */\n  CNST_LIMB (0x35c7e2),  /* 33 */\n  CNST_LIMB (0x5704e7),  /* 34 */\n  CNST_LIMB (0x8cccc9),  /* 35 */\n  CNST_LIMB (0xe3d1b0),  /* 36 */\n  CNST_LIMB (0x1709e79),  /* 37 */\n  CNST_LIMB (0x2547029),  /* 38 */\n  CNST_LIMB (0x3c50ea2),  /* 39 */\n  CNST_LIMB (0x6197ecb),  /* 40 */\n  CNST_LIMB (0x9de8d6d),  /* 41 */\n  CNST_LIMB (0xff80c38),  /* 42 */\n  CNST_LIMB (0x19d699a5),  /* 43 */\n  CNST_LIMB (0x29cea5dd),  /* 44 */\n  CNST_LIMB (0x43a53f82),  /* 45 */\n  CNST_LIMB (0x6d73e55f),  /* 46 */\n  CNST_LIMB (0xb11924e1),  /* 47 */\n  CNST_LIMB (0x11e8d0a40),  /* 48 */\n  CNST_LIMB (0x1cfa62f21),  /* 49 */\n  CNST_LIMB (0x2ee333961),  /* 50 */\n  CNST_LIMB (0x4bdd96882),  /* 51 */\n  CNST_LIMB (0x7ac0ca1e3),  /* 52 */\n  CNST_LIMB (0xc69e60a65),  /* 53 */\n  CNST_LIMB (0x1415f2ac48),  /* 54 */\n  CNST_LIMB (0x207fd8b6ad),  /* 55 */\n  CNST_LIMB (0x3495cb62f5),  /* 56 */\n  CNST_LIMB (0x5515a419a2),  /* 57 */\n  CNST_LIMB (0x89ab6f7c97),  /* 58 */\n  CNST_LIMB (0xdec1139639),  /* 59 */\n  CNST_LIMB (0x1686c8312d0),  /* 60 */\n  CNST_LIMB (0x2472d96a909),  /* 61 */\n  CNST_LIMB (0x3af9a19bbd9),  /* 62 */\n  CNST_LIMB (0x5f6c7b064e2),  /* 63 */\n  CNST_LIMB (0x9a661ca20bb),  /* 64 */\n  CNST_LIMB (0xf9d297a859d),  /* 65 */\n  CNST_LIMB (0x19438b44a658),  /* 66 */\n  CNST_LIMB (0x28e0b4bf2bf5),  /* 67 */\n  CNST_LIMB (0x42244003d24d),  /* 68 */\n  CNST_LIMB (0x6b04f4c2fe42),  /* 69 */\n  CNST_LIMB (0xad2934c6d08f),  /* 70 */\n  CNST_LIMB (0x1182e2989ced1),  /* 71 */\n  CNST_LIMB (0x1c5575e509f60),  /* 72 */\n  CNST_LIMB (0x2dd8587da6e31),  /* 73 */\n  CNST_LIMB (0x4a2dce62b0d91),  /* 74 */\n  CNST_LIMB (0x780626e057bc2),  /* 75 */\n  CNST_LIMB (0xc233f54308953),  /* 76 */\n  CNST_LIMB (0x13a3a1c2360515),  /* 77 */\n  CNST_LIMB (0x1fc6e116668e68),  /* 78 */\n  CNST_LIMB (0x336a82d89c937d),  /* 79 */\n  CNST_LIMB (0x533163ef0321e5),  /* 80 */\n  CNST_LIMB (0x869be6c79fb562),  /* 81 */\n  CNST_LIMB (0xd9cd4ab6a2d747),  /* 82 */\n  CNST_LIMB (0x16069317e428ca9),  /* 83 */\n  CNST_LIMB (0x23a367c34e563f0),  /* 84 */\n  CNST_LIMB (0x39a9fadb327f099),  /* 85 */\n  CNST_LIMB (0x5d4d629e80d5489),  /* 86 */\n  CNST_LIMB (0x96f75d79b354522),  /* 87 */\n  CNST_LIMB (0xf444c01834299ab),  /* 88 */\n  CNST_LIMB (0x18b3c1d91e77decd),  /* 89 */\n  CNST_LIMB (0x27f80ddaa1ba7878),  /* 90 */\n  CNST_LIMB (0x40abcfb3c0325745),  /* 91 */\n  CNST_LIMB (0x68a3dd8e61eccfbd),  /* 92 */\n  CNST_LIMB (0xa94fad42221f2702),  /* 93 */\n};\n\n#else\n#error no data available for this limb size in fib_table.c\n#endif\n"
  },
  {
    "path": "mpn/generic/gcd.c",
    "content": "/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.\n\nCopyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,\n2004, 2005, 2008, 2010, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* Uses the HGCD operation described in\n\n     N. Möller, On Schönhage's algorithm and subquadratic integer gcd\n     computation, Math. Comp. 77 (2008), 589-607.\n\n  to reduce inputs until they are of size below GCD_DC_THRESHOLD, and\n  then uses Lehmer's algorithm.\n*/\n\n/* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +\n * 2)/3, which gives a balanced multiplication in\n * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better\n * performance. The matrix-vector multiplication is then\n * 4:1-unbalanced, with matrix elements of size n/6, and vector\n * elements of size p = 2n/3. */\n\n/* From analysis of the theoretical running time, it appears that when\n * multiplication takes time O(n^alpha), p should be chosen so that\n * the ratio of the time for the mpn_hgcd call, and the time for the\n * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -\n * 1). */\n#ifdef TUNE_GCD_P\n#define P_TABLE_SIZE 10000\nmp_size_t p_table[P_TABLE_SIZE];\n#define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)\n#else\n#define CHOOSE_P(n) (2*(n) / 3)\n#endif\n\nstruct gcd_ctx\n{\n  mp_ptr gp;\n  mp_size_t gn;\n};\n\nstatic void\ngcd_hook (void *p, mp_srcptr gp, mp_size_t gn,\n\t  mp_srcptr qp, mp_size_t qn, int d)\n{\n  struct gcd_ctx *ctx = (struct gcd_ctx *) p;\n  MPN_COPY (ctx->gp, gp, gn);\n  ctx->gn = gn;\n}\n\n#if GMP_NAIL_BITS > 0\n/* Nail supports should be easy, replacing the sub_ddmmss with nails\n * logic. */\n#error Nails not supported.\n#endif\n\n/* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.\n   Both U and V must be odd. */\nstatic inline mp_size_t\ngcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)\n{\n  mp_limb_t u0, u1, v0, v1;\n  mp_size_t gn;\n\n  u0 = up[0];\n  u1 = up[1];\n  v0 = vp[0];\n  v1 = vp[1];\n\n  ASSERT (u0 & 1);\n  ASSERT (v0 & 1);\n\n  /* Check for u0 != v0 needed to ensure that argument to\n   * count_trailing_zeros is non-zero. */\n  while (u1 != v1 && u0 != v0)\n    {\n      unsigned long int r;\n      if (u1 > v1)\n\t{\n\t  sub_ddmmss (u1, u0, u1, u0, v1, v0);\n\t  count_trailing_zeros (r, u0);\n\t  u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);\n\t  u1 >>= r;\n\t}\n      else  /* u1 < v1.  */\n\t{\n\t  sub_ddmmss (v1, v0, v1, v0, u1, u0);\n\t  count_trailing_zeros (r, v0);\n\t  v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);\n\t  v1 >>= r;\n\t}\n    }\n\n  gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);\n\n  /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */\n  if (u1 == v1 && u0 == v0)\n    return gn;\n\n  v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);\n  gp[0] = mpn_gcd_1 (gp, gn, v0);\n\n  return 1;\n}\n\nmp_size_t\nmpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)\n{\n  mp_size_t talloc;\n  mp_size_t scratch;\n  mp_size_t matrix_scratch;\n\n  struct gcd_ctx ctx;\n  mp_ptr tp;\n  TMP_DECL;\n\n  ASSERT (usize >= n);\n  ASSERT (n > 0);\n  ASSERT (vp[n-1] > 0);\n\n  /* FIXME: Check for small sizes first, before setting up temporary\n     storage etc. */\n  talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);\n\n  /* For initial division */\n  scratch = usize - n + 1;\n  if (scratch > talloc)\n    talloc = scratch;\n\n#if TUNE_GCD_P\n  if (CHOOSE_P (n) > 0)\n#else\n  if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))\n#endif\n    {\n      mp_size_t hgcd_scratch;\n      mp_size_t update_scratch;\n      mp_size_t p = CHOOSE_P (n);\n      mp_size_t scratch;\n#if TUNE_GCD_P\n      /* Worst case, since we don't guarantee that n - CHOOSE_P(n)\n\t is increasing */\n      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);\n      hgcd_scratch = mpn_hgcd_itch (n);\n      update_scratch = 2*(n - 1);\n#else\n      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);\n      hgcd_scratch = mpn_hgcd_itch (n - p);\n      update_scratch = p + n - 1;\n#endif\n      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);\n      if (scratch > talloc)\n\ttalloc = scratch;\n    }\n\n  TMP_MARK;\n  tp = TMP_ALLOC_LIMBS(talloc);\n\n  if (usize > n)\n    {\n      mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);\n\n      if (mpn_zero_p (up, n))\n\t{\n\t  MPN_COPY (gp, vp, n);\n\t  ctx.gn = n;\n\t  goto done;\n\t}\n    }\n\n  ctx.gp = gp;\n\n#if TUNE_GCD_P\n  while (CHOOSE_P (n) > 0)\n#else\n  while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))\n#endif\n    {\n      struct hgcd_matrix M;\n      mp_size_t p = CHOOSE_P (n);\n      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);\n      mp_size_t nn;\n      mpn_hgcd_matrix_init (&M, n - p, tp);\n      nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);\n      if (nn > 0)\n\t{\n\t  ASSERT (M.n <= (n - p - 1)/2);\n\t  ASSERT (M.n + p <= (p + n - 1) / 2);\n\t  /* Temporary storage 2 (p + M->n) <= p + n - 1. */\n\t  n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);\n\t}\n      else\n\t{\n\t  /* Temporary storage n */\n\t  n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);\n\t  if (n == 0)\n\t    goto done;\n\t}\n    }\n\n  while (n > 2)\n    {\n      struct hgcd_matrix1 M;\n      mp_limb_t uh, ul, vh, vl;\n      mp_limb_t mask;\n\n      mask = up[n-1] | vp[n-1];\n      ASSERT (mask > 0);\n\n      if (mask & GMP_NUMB_HIGHBIT)\n\t{\n\t  uh = up[n-1]; ul = up[n-2];\n\t  vh = vp[n-1]; vl = vp[n-2];\n\t}\n      else\n\t{\n\t  int shift;\n\n\t  count_leading_zeros (shift, mask);\n\t  uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);\n\t  ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);\n\t  vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);\n\t  vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);\n\t}\n\n      /* Try an mpn_hgcd2 step */\n      if (mpn_hgcd2 (uh, ul, vh, vl, &M))\n\t{\n\t  n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);\n\t  MP_PTR_SWAP (up, tp);\n\t}\n      else\n\t{\n\t  /* mpn_hgcd2 has failed. Then either one of a or b is very\n\t     small, or the difference is very small. Perform one\n\t     subtraction followed by one division. */\n\n\t  /* Temporary storage n */\n\t  n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);\n\t  if (n == 0)\n\t    goto done;\n\t}\n    }\n\n  ASSERT(up[n-1] | vp[n-1]);\n\n  if (n == 1)\n    {\n      *gp = mpn_gcd_1(up, 1, vp[0]);\n      ctx.gn = 1;\n      goto done;\n    }\n\n  /* Due to the calling convention for mpn_gcd, at most one can be\n     even. */\n\n  if (! (up[0] & 1))\n    MP_PTR_SWAP (up, vp);\n\n  ASSERT (up[0] & 1);\n\n  if (vp[0] == 0)\n    {\n      *gp = mpn_gcd_1 (up, 2, vp[1]);\n      ctx.gn = 1;\n      goto done;\n    }\n  else if (! (vp[0] & 1))\n    {\n      int r;\n      count_trailing_zeros (r, vp[0]);\n      vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r);\n      vp[1] >>= r;\n    }\n\n  ctx.gn = gcd_2(gp, up, vp);\n\ndone:\n  TMP_FREE;\n  return ctx.gn;\n}\n"
  },
  {
    "path": "mpn/generic/gcd_1.c",
    "content": "/* mpn_gcd_1 -- mpn and limb greatest common divisor.\n\nCopyright 1994, 1996, 2000, 2001, 2009, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#ifndef GCD_1_METHOD\n#define GCD_1_METHOD 2\n#endif\n\n#define USE_ZEROTAB 0\n\n#if USE_ZEROTAB\n#define MAXSHIFT 4\n#define MASK ((1 << MAXSHIFT) - 1)\nstatic const unsigned char zerotab[1 << MAXSHIFT] =\n{\n#if MAXSHIFT > 4\n  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,\n#endif\n  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0\n};\n#endif\n\n/* Does not work for U == 0 or V == 0.  It would be tough to make it work for\n   V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t.\n\n   The threshold for doing u%v when size==1 will vary by CPU according to\n   the speed of a division and the code generated for the main loop.  Any\n   tuning for this is left to a CPU specific implementation.  */\n\nmp_limb_t\nmpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)\n{\n  mp_limb_t      ulimb;\n  unsigned long  zero_bits, u_low_zero_bits;\n\n  ASSERT (size >= 1);\n  ASSERT (vlimb != 0);\n  ASSERT_MPN_NONZERO_P (up, size);\n\n  ulimb = up[0];\n\n  /* Need vlimb odd for modexact, want it odd to get common zeros. */\n  count_trailing_zeros (zero_bits, vlimb);\n  vlimb >>= zero_bits;\n\n  if (size > 1)\n    {\n      /* Must get common zeros before the mod reduction.  If ulimb==0 then\n\t vlimb already gives the common zeros.  */\n      if (ulimb != 0)\n\t{\n\t  count_trailing_zeros (u_low_zero_bits, ulimb);\n\t  zero_bits = MIN (zero_bits, u_low_zero_bits);\n\t}\n\n      ulimb = MPN_MOD_OR_MODEXACT_1_ODD (up, size, vlimb);\n      if (ulimb == 0)\n\tgoto done;\n\n      goto strip_u_maybe;\n    }\n\n  /* size==1, so up[0]!=0 */\n  count_trailing_zeros (u_low_zero_bits, ulimb);\n  ulimb >>= u_low_zero_bits;\n  zero_bits = MIN (zero_bits, u_low_zero_bits);\n\n  /* make u bigger */\n  if (vlimb > ulimb)\n    MP_LIMB_T_SWAP (ulimb, vlimb);\n\n  /* if u is much bigger than v, reduce using a division rather than\n     chipping away at it bit-by-bit */\n  if ((ulimb >> 16) > vlimb)\n    {\n      ulimb %= vlimb;\n      if (ulimb == 0)\n\tgoto done;\n      goto strip_u_maybe;\n    }\n\n  ASSERT (ulimb & 1);\n  ASSERT (vlimb & 1);\n\n#if GCD_1_METHOD == 1\n  while (ulimb != vlimb)\n    {\n      ASSERT (ulimb & 1);\n      ASSERT (vlimb & 1);\n\n      if (ulimb > vlimb)\n\t{\n\t  ulimb -= vlimb;\n\t  do\n\t    {\n\t      ulimb >>= 1;\n\t      ASSERT (ulimb != 0);\n\t    strip_u_maybe:\n\t      ;\n\t    }\n\t  while ((ulimb & 1) == 0);\n\t}\n      else /*  vlimb > ulimb.  */\n\t{\n\t  vlimb -= ulimb;\n\t  do\n\t    {\n\t      vlimb >>= 1;\n\t      ASSERT (vlimb != 0);\n\t    }\n\t  while ((vlimb & 1) == 0);\n\t}\n    }\n#else\n# if GCD_1_METHOD  == 2\n\n  ulimb >>= 1;\n  vlimb >>= 1;\n\n  while (ulimb != vlimb)\n    {\n      int c;\n      mp_limb_t t;\n      mp_limb_t vgtu;\n\n      t = ulimb - vlimb;\n      vgtu = LIMB_HIGHBIT_TO_MASK (t);\n\n      /* v <-- min (u, v) */\n      vlimb += (vgtu & t);\n\n      /* u <-- |u - v| */\n      ulimb = (t ^ vgtu) - vgtu;\n\n#if USE_ZEROTAB\n      /* Number of trailing zeros is the same no matter if we look at\n       * t or ulimb, but using t gives more parallelism. */\n      c = zerotab[t & MASK];\n\n      while (UNLIKELY (c == MAXSHIFT))\n\t{\n\t  ulimb >>= MAXSHIFT;\n\t  if (0)\n\t  strip_u_maybe:\n\t    vlimb >>= 1;\n\n\t  c = zerotab[ulimb & MASK];\n\t}\n#else\n      if (0)\n\t{\n\tstrip_u_maybe:\n\t  vlimb >>= 1;\n\t  t = ulimb;\n\t}\n      count_trailing_zeros (c, t);\n#endif\n      ulimb >>= (c + 1);\n    }\n\n  vlimb = (vlimb << 1) | 1;\n# else\n#  error Unknown GCD_1_METHOD\n# endif\n#endif\n\n done:\n  return vlimb << zero_bits;\n}\n"
  },
  {
    "path": "mpn/generic/gcd_subdiv_step.c",
    "content": "/* gcd_subdiv_step.c.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2003, 2004, 2005, 2008, 2010, 2011 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include <stdlib.h>\t\t/* for NULL */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or\n   b is small, or the difference is small. Perform one subtraction\n   followed by one division. The normal case is to compute the reduced\n   a and b, and return the new size.\n\n   If s == 0 (used for gcd and gcdext), returns zero if the gcd is\n   found.\n\n   If s > 0, don't reduce to size <= s, and return zero if no\n   reduction is possible (if either a, b or |a-b| is of size <= s). */\n\n/* The hook function is called as\n\n     hook(ctx, gp, gn, qp, qn, d)\n\n   in the following cases:\n\n   + If A = B at the start, G is the gcd, Q is NULL, d = -1.\n\n   + If one input is zero at the start, G is the gcd, Q is NULL,\n     d = 0 if A = G and d = 1 if B = G.\n\n   Otherwise, if d = 0 we have just subtracted a multiple of A from B,\n   and if d = 1 we have subtracted a multiple of B from A.\n\n   + If A = B after subtraction, G is the gcd, Q is NULL.\n\n   + If we get a zero remainder after division, G is the gcd, Q is the\n     quotient.\n\n   + Otherwise, G is NULL, Q is the quotient (often 1).\n\n */\n\nmp_size_t\nmpn_gcd_subdiv_step (mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t s,\n\t\t     gcd_subdiv_step_hook *hook, void *ctx,\n\t\t     mp_ptr tp)\n{\n  static const mp_limb_t one = CNST_LIMB(1);\n  mp_size_t an, bn, qn;\n\n  int swapped;\n\n  ASSERT (n > 0);\n  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);\n\n  an = bn = n;\n  MPN_NORMALIZE (ap, an);\n  MPN_NORMALIZE (bp, bn);\n\n  swapped = 0;\n\n  /* Arrange so that a < b, subtract b -= a, and maintain\n     normalization. */\n  if (an == bn)\n    {\n      int c;\n      MPN_CMP (c, ap, bp, an);\n      if (UNLIKELY (c == 0))\n\t{\n\t  /* For gcdext, return the smallest of the two cofactors, so\n\t     pass d = -1. */\n\t  if (s == 0)\n\t    hook (ctx, ap, an, NULL, 0, -1);\n\t  return 0;\n\t}\n      else if (c > 0)\n\t{\n\t  MP_PTR_SWAP (ap, bp);\n\t  swapped ^= 1;\n\t}\n    }\n  else\n    {\n      if (an > bn)\n\t{\n\t  MPN_PTR_SWAP (ap, an, bp, bn);\n\t  swapped ^= 1;\n\t}\n    }\n  if (an <= s)\n    {\n      if (s == 0)\n\thook (ctx, bp, bn, NULL, 0, swapped ^ 1);\n      return 0;\n    }\n\n  ASSERT_NOCARRY (mpn_sub (bp, bp, bn, ap, an));\n  MPN_NORMALIZE (bp, bn);\n  ASSERT (bn > 0);\n\n  if (bn <= s)\n    {\n      /* Undo subtraction. */\n      mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);\n      if (cy > 0)\n\tbp[an] = cy;\n      return 0;\n    }\n\n  /* Arrange so that a < b */\n  if (an == bn)\n    {\n      int c;\n      MPN_CMP (c, ap, bp, an);\n      if (UNLIKELY (c == 0))\n\t{\n\t  if (s > 0)\n\t    /* Just record subtraction and return */\n\t    hook (ctx, NULL, 0, &one, 1, swapped);\n\t  else\n\t    /* Found gcd. */\n\t    hook (ctx, bp, bn, NULL, 0, swapped);\n\t  return 0;\n\t}\n\n      hook (ctx, NULL, 0, &one, 1, swapped);\n\n      if (c > 0)\n\t{\n\t  MP_PTR_SWAP (ap, bp);\n\t  swapped ^= 1;\n\t}\n    }\n  else\n    {\n      hook (ctx, NULL, 0, &one, 1, swapped);\n\n      if (an > bn)\n\t{\n\t  MPN_PTR_SWAP (ap, an, bp, bn);\n\t  swapped ^= 1;\n\t}\n    }\n\n  mpn_tdiv_qr (tp, bp, 0, bp, bn, ap, an);\n  qn = bn - an + 1;\n  bn = an;\n  MPN_NORMALIZE (bp, bn);\n\n  if (UNLIKELY (bn <= s))\n    {\n      if (s == 0)\n\t{\n\t  hook (ctx, ap, an, tp, qn, swapped);\n\t  return 0;\n\t}\n\n      /* Quotient is one too large, so decrement it and add back A. */\n      if (bn > 0)\n\t{\n\t  mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);\n\t  if (cy)\n\t    bp[an++] = cy;\n\t}\n      else\n\tMPN_COPY (bp, ap, an);\n\n      MPN_DECR_U (tp, qn, 1);\n    }\n\n  hook (ctx, NULL, 0, tp, qn, swapped);\n  return an;\n}\n"
  },
  {
    "path": "mpn/generic/gcdext.c",
    "content": "/* mpn_gcdext -- Extended Greatest Common Divisor.\n\nCopyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2012 Free\nSoftware Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* Computes (r;b) = (a; b) M. Result is of size n + M->n +/- 1, and\n   the size is returned (if inputs are non-normalized, result may be\n   non-normalized too). Temporary space needed is M->n + n.\n */\nstatic size_t\nhgcd_mul_matrix_vector (struct hgcd_matrix *M,\n\t\t\tmp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)\n{\n  mp_limb_t ah, bh;\n\n  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as\n\n     t  = u00 * a\n     r  = u10 * b\n     r += t;\n\n     t  = u11 * b\n     b  = u01 * a\n     b += t;\n  */\n\n  if (M->n >= n)\n    {\n      mpn_mul (tp, M->p[0][0], M->n, ap, n);\n      mpn_mul (rp, M->p[1][0], M->n, bp, n);\n    }\n  else\n    {\n      mpn_mul (tp, ap, n, M->p[0][0], M->n);\n      mpn_mul (rp, bp, n, M->p[1][0], M->n);\n    }\n\n  ah = mpn_add_n (rp, rp, tp, n + M->n);\n\n  if (M->n >= n)\n    {\n      mpn_mul (tp, M->p[1][1], M->n, bp, n);\n      mpn_mul (bp, M->p[0][1], M->n, ap, n);\n    }\n  else\n    {\n      mpn_mul (tp, bp, n, M->p[1][1], M->n);\n      mpn_mul (bp, ap, n, M->p[0][1], M->n);\n    }\n  bh = mpn_add_n (bp, bp, tp, n + M->n);\n\n  n += M->n;\n  if ( (ah | bh) > 0)\n    {\n      rp[n] = ah;\n      bp[n] = bh;\n      n++;\n    }\n  else\n    {\n      /* Normalize */\n      while ( (rp[n-1] | bp[n-1]) == 0)\n\tn--;\n    }\n\n  return n;\n}\n\n#define COMPUTE_V_ITCH(n) (2*(n))\n\n/* Computes |v| = |(g - u a)| / b, where u may be positive or\n   negative, and v is of the opposite sign. max(a, b) is of size n, u and\n   v at most size n, and v must have space for n+1 limbs. */\nstatic mp_size_t\ncompute_v (mp_ptr vp,\n\t   mp_srcptr ap, mp_srcptr bp, mp_size_t n,\n\t   mp_srcptr gp, mp_size_t gn,\n\t   mp_srcptr up, mp_size_t usize,\n\t   mp_ptr tp)\n{\n  mp_size_t size;\n  mp_size_t an;\n  mp_size_t bn;\n  mp_size_t vn;\n\n  ASSERT (n > 0);\n  ASSERT (gn > 0);\n  ASSERT (usize != 0);\n\n  size = ABS (usize);\n  ASSERT (size <= n);\n  ASSERT (up[size-1] > 0);\n\n  an = n;\n  MPN_NORMALIZE (ap, an);\n  ASSERT (gn <= an);\n\n  if (an >= size)\n    mpn_mul (tp, ap, an, up, size);\n  else\n    mpn_mul (tp, up, size, ap, an);\n\n  size += an;\n\n  if (usize > 0)\n    {\n      /* |v| = -v = (u a - g) / b */\n\n      ASSERT_NOCARRY (mpn_sub (tp, tp, size, gp, gn));\n      MPN_NORMALIZE (tp, size);\n      if (size == 0)\n\treturn 0;\n    }\n  else\n    { /* |v| = v = (g - u a) / b = (g + |u| a) / b. Since g <= a,\n\t (g + |u| a) always fits in (|usize| + an) limbs. */\n\n      ASSERT_NOCARRY (mpn_add (tp, tp, size, gp, gn));\n      size -= (tp[size - 1] == 0);\n    }\n\n  /* Now divide t / b. There must be no remainder */\n  bn = n;\n  MPN_NORMALIZE (bp, bn);\n  ASSERT (size >= bn);\n\n  vn = size + 1 - bn;\n  ASSERT (vn <= n + 1);\n\n  mpn_divexact (vp, tp, size, bp, bn);\n  vn -= (vp[vn-1] == 0);\n\n  return vn;\n}\n\n/* Temporary storage:\n\n   Initial division: Quotient of at most an - n + 1 <= an limbs.\n\n   Storage for u0 and u1: 2(n+1).\n\n   Storage for hgcd matrix M, with input ceil(n/2): 5 * ceil(n/4)\n\n   Storage for hgcd, input (n + 1)/2: 9 n/4 plus some.\n\n   When hgcd succeeds: 1 + floor(3n/2) for adjusting a and b, and 2(n+1) for the cofactors.\n\n   When hgcd fails: 2n + 1 for mpn_gcdext_subdiv_step, which is less.\n\n   For the lehmer call after the loop, Let T denote\n   GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for\n   u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T\n   for u, T+1 for v and 2T scratch space. In all, 7T + 3 is\n   sufficient for both operations.\n\n*/\n\n/* Optimal choice of p seems difficult. In each iteration the division\n * of work between hgcd and the updates of u0 and u1 depends on the\n * current size of the u. It may be desirable to use a different\n * choice of p in each iteration. Also the input size seems to matter;\n * choosing p = n / 3 in the first iteration seems to improve\n * performance slightly for input size just above the threshold, but\n * degrade performance for larger inputs. */\n#define CHOOSE_P_1(n) ((n) / 2)\n#define CHOOSE_P_2(n) ((n) / 3)\n\nmp_size_t\nmpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,\n\t    mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)\n{\n  mp_size_t talloc;\n  mp_size_t scratch;\n  mp_size_t matrix_scratch;\n  mp_size_t ualloc = n + 1;\n\n  struct gcdext_ctx ctx;\n  mp_size_t un;\n  mp_ptr u0;\n  mp_ptr u1;\n\n  mp_ptr tp;\n\n  TMP_DECL;\n\n  ASSERT (an >= n);\n  ASSERT (n > 0);\n  ASSERT (bp[n-1] > 0);\n\n  TMP_MARK;\n\n  /* FIXME: Check for small sizes first, before setting up temporary\n     storage etc. */\n  talloc = MPN_GCDEXT_LEHMER_N_ITCH(n);\n\n  /* For initial division */\n  scratch = an - n + 1;\n  if (scratch > talloc)\n    talloc = scratch;\n\n  if (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))\n    {\n      /* For hgcd loop. */\n      mp_size_t hgcd_scratch;\n      mp_size_t update_scratch;\n      mp_size_t p1 = CHOOSE_P_1 (n);\n      mp_size_t p2 = CHOOSE_P_2 (n);\n      mp_size_t min_p = MIN(p1, p2);\n      mp_size_t max_p = MAX(p1, p2);\n      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - min_p);\n      hgcd_scratch = mpn_hgcd_itch (n - min_p);\n      update_scratch = max_p + n - 1;\n\n      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);\n      if (scratch > talloc)\n\ttalloc = scratch;\n\n      /* Final mpn_gcdext_lehmer_n call. Need space for u and for\n\t copies of a and b. */\n      scratch = MPN_GCDEXT_LEHMER_N_ITCH (GCDEXT_DC_THRESHOLD)\n\t+ 3*GCDEXT_DC_THRESHOLD;\n\n      if (scratch > talloc)\n\ttalloc = scratch;\n\n      /* Cofactors u0 and u1 */\n      talloc += 2*(n+1);\n    }\n\n  tp = TMP_ALLOC_LIMBS(talloc);\n\n  if (an > n)\n    {\n      mpn_tdiv_qr (tp, ap, 0, ap, an, bp, n);\n\n      if (mpn_zero_p (ap, n))\n\t{\n\t  MPN_COPY (gp, bp, n);\n\t  *usizep = 0;\n\t  TMP_FREE;\n\t  return n;\n\t}\n    }\n\n  if (BELOW_THRESHOLD (n, GCDEXT_DC_THRESHOLD))\n    {\n      mp_size_t gn = mpn_gcdext_lehmer_n(gp, up, usizep, ap, bp, n, tp);\n\n      TMP_FREE;\n      return gn;\n    }\n\n  MPN_ZERO (tp, 2*ualloc);\n  u0 = tp; tp += ualloc;\n  u1 = tp; tp += ualloc;\n\n  ctx.gp = gp;\n  ctx.up = up;\n  ctx.usize = usizep;\n\n  {\n    /* For the first hgcd call, there are no u updates, and it makes\n       some sense to use a different choice for p. */\n\n    /* FIXME: We could trim use of temporary storage, since u0 and u1\n       are not used yet. For the hgcd call, we could swap in the u0\n       and u1 pointers for the relevant matrix elements. */\n\n    struct hgcd_matrix M;\n    mp_size_t p = CHOOSE_P_1 (n);\n    mp_size_t nn;\n\n    mpn_hgcd_matrix_init (&M, n - p, tp);\n    nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);\n    if (nn > 0)\n      {\n\tASSERT (M.n <= (n - p - 1)/2);\n\tASSERT (M.n + p <= (p + n - 1) / 2);\n\n\t/* Temporary storage 2 (p + M->n) <= p + n - 1 */\n\tn = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);\n\n\tMPN_COPY (u0, M.p[1][0], M.n);\n\tMPN_COPY (u1, M.p[1][1], M.n);\n\tun = M.n;\n\twhile ( (u0[un-1] | u1[un-1] ) == 0)\n\t  un--;\n      }\n    else\n      {\n\t/* mpn_hgcd has failed. Then either one of a or b is very\n\t   small, or the difference is very small. Perform one\n\t   subtraction followed by one division. */\n\tu1[0] = 1;\n\n\tctx.u0 = u0;\n\tctx.u1 = u1;\n\tctx.tp = tp + n; /* ualloc */\n\tctx.un = 1;\n\n\t/* Temporary storage n */\n\tn = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);\n\tif (n == 0)\n\t  {\n\t    TMP_FREE;\n\t    return ctx.gn;\n\t  }\n\n\tun = ctx.un;\n\tASSERT (un < ualloc);\n      }\n  }\n\n  while (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))\n    {\n      struct hgcd_matrix M;\n      mp_size_t p = CHOOSE_P_2 (n);\n      mp_size_t nn;\n\n      mpn_hgcd_matrix_init (&M, n - p, tp);\n      nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);\n      if (nn > 0)\n\t{\n\t  mp_ptr t0;\n\n\t  t0 = tp + matrix_scratch;\n\t  ASSERT (M.n <= (n - p - 1)/2);\n\t  ASSERT (M.n + p <= (p + n - 1) / 2);\n\n\t  /* Temporary storage 2 (p + M->n) <= p + n - 1 */\n\t  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, t0);\n\n\t  /* By the same analysis as for mpn_hgcd_matrix_mul */\n\t  ASSERT (M.n + un <= ualloc);\n\n\t  /* FIXME: This copying could be avoided by some swapping of\n\t   * pointers. May need more temporary storage, though. */\n\t  MPN_COPY (t0, u0, un);\n\n\t  /* Temporary storage ualloc */\n\t  un = hgcd_mul_matrix_vector (&M, u0, t0, u1, un, t0 + un);\n\n\t  ASSERT (un < ualloc);\n\t  ASSERT ( (u0[un-1] | u1[un-1]) > 0);\n\t}\n      else\n\t{\n\t  /* mpn_hgcd has failed. Then either one of a or b is very\n\t     small, or the difference is very small. Perform one\n\t     subtraction followed by one division. */\n\t  ctx.u0 = u0;\n\t  ctx.u1 = u1;\n\t  ctx.tp = tp + n; /* ualloc */\n\t  ctx.un = un;\n\n\t  /* Temporary storage n */\n\t  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);\n\t  if (n == 0)\n\t    {\n\t      TMP_FREE;\n\t      return ctx.gn;\n\t    }\n\n\t  un = ctx.un;\n\t  ASSERT (un < ualloc);\n\t}\n    }\n  /* We have A = ... a + ... b\n\t     B =  u0 a +  u1 b\n\n\t     a = u1  A + ... B\n\t     b = -u0 A + ... B\n\n     with bounds\n\n       |u0|, |u1| <= B / min(a, b)\n\n     We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,\n     in which case the only reduction done so far is a = A - k B for\n     some k.\n\n     Compute g = u a + v b = (u u1 - v u0) A + (...) B\n     Here, u, v are bounded by\n\n       |u| <= b,\n       |v| <= a\n  */\n\n  ASSERT ( (ap[n-1] | bp[n-1]) > 0);\n\n  if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))\n    {\n      /* Must return the smallest cofactor, +u1 or -u0 */\n      int c;\n\n      MPN_COPY (gp, ap, n);\n\n      MPN_CMP (c, u0, u1, un);\n      /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in\n\t this case we choose the cofactor + 1, corresponding to G = A\n\t - k B, rather than -1, corresponding to G = - A + (k+1) B. */\n      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));\n      if (c < 0)\n\t{\n\t  MPN_NORMALIZE (u0, un);\n\t  MPN_COPY (up, u0, un);\n\t  *usizep = -un;\n\t}\n      else\n\t{\n\t  MPN_NORMALIZE_NOT_ZERO (u1, un);\n\t  MPN_COPY (up, u1, un);\n\t  *usizep = un;\n\t}\n\n      TMP_FREE;\n      return n;\n    }\n  else if (UNLIKELY (u0[0] == 0) && un == 1)\n    {\n      mp_size_t gn;\n      ASSERT (u1[0] == 1);\n\n      /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */\n      gn = mpn_gcdext_lehmer_n (gp, up, usizep, ap, bp, n, tp);\n\n      TMP_FREE;\n      return gn;\n    }\n  else\n    {\n      mp_size_t u0n;\n      mp_size_t u1n;\n      mp_size_t lehmer_un;\n      mp_size_t lehmer_vn;\n      mp_size_t gn;\n\n      mp_ptr lehmer_up;\n      mp_ptr lehmer_vp;\n      int negate;\n\n      lehmer_up = tp; tp += n;\n\n      /* Call mpn_gcdext_lehmer_n with copies of a and b. */\n      MPN_COPY (tp, ap, n);\n      MPN_COPY (tp + n, bp, n);\n      gn = mpn_gcdext_lehmer_n (gp, lehmer_up, &lehmer_un, tp, tp + n, n, tp + 2*n);\n\n      u0n = un;\n      MPN_NORMALIZE (u0, u0n);\n      ASSERT (u0n > 0);\n\n      if (lehmer_un == 0)\n\t{\n\t  /* u == 0  ==>  v = g / b == 1  ==> g = - u0 A + (...) B */\n\t  MPN_COPY (up, u0, u0n);\n\t  *usizep = -u0n;\n\n\t  TMP_FREE;\n\t  return gn;\n\t}\n\n      lehmer_vp = tp;\n      /* Compute v = (g - u a) / b */\n      lehmer_vn = compute_v (lehmer_vp,\n\t\t\t     ap, bp, n, gp, gn, lehmer_up, lehmer_un, tp + n + 1);\n\n      if (lehmer_un > 0)\n\tnegate = 0;\n      else\n\t{\n\t  lehmer_un = -lehmer_un;\n\t  negate = 1;\n\t}\n\n      u1n = un;\n      MPN_NORMALIZE (u1, u1n);\n      ASSERT (u1n > 0);\n\n      ASSERT (lehmer_un + u1n <= ualloc);\n      ASSERT (lehmer_vn + u0n <= ualloc);\n\n      /* We may still have v == 0 */\n\n      /* Compute u u0 */\n      if (lehmer_un <= u1n)\n\t/* Should be the common case */\n\tmpn_mul (up, u1, u1n, lehmer_up, lehmer_un);\n      else\n\tmpn_mul (up, lehmer_up, lehmer_un, u1, u1n);\n\n      un = u1n + lehmer_un;\n      un -= (up[un - 1] == 0);\n\n      if (lehmer_vn > 0)\n\t{\n\t  mp_limb_t cy;\n\n\t  /* Overwrites old u1 value */\n\t  if (lehmer_vn <= u0n)\n\t    /* Should be the common case */\n\t    mpn_mul (u1, u0, u0n, lehmer_vp, lehmer_vn);\n\t  else\n\t    mpn_mul (u1, lehmer_vp, lehmer_vn, u0, u0n);\n\n\t  u1n = u0n + lehmer_vn;\n\t  u1n -= (u1[u1n - 1] == 0);\n\n\t  if (u1n <= un)\n\t    {\n\t      cy = mpn_add (up, up, un, u1, u1n);\n\t    }\n\t  else\n\t    {\n\t      cy = mpn_add (up, u1, u1n, up, un);\n\t      un = u1n;\n\t    }\n\t  up[un] = cy;\n\t  un += (cy != 0);\n\n\t  ASSERT (un < ualloc);\n\t}\n      *usizep = negate ? -un : un;\n\n      TMP_FREE;\n      return gn;\n    }\n}\n"
  },
  {
    "path": "mpn/generic/gcdext_1.c",
    "content": "/* mpn_gcdext -- Extended Greatest Common Divisor.\n\nCopyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#ifndef GCDEXT_1_USE_BINARY\n#define GCDEXT_1_USE_BINARY 0\n#endif\n\n#ifndef GCDEXT_1_BINARY_METHOD\n#define GCDEXT_1_BINARY_METHOD 2\n#endif\n\n#ifndef USE_ZEROTAB\n#define USE_ZEROTAB 1\n#endif\n\n#if GCDEXT_1_USE_BINARY\n\n#if USE_ZEROTAB\nstatic unsigned char zerotab[0x40] = {\n  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,\n  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,\n  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,\n  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0\n};\n#endif\n\nmp_limb_t\nmpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,\n\t      mp_limb_t u, mp_limb_t v)\n{\n  /* Maintain\n\n     U = t1 u + t0 v\n     V = s1 u + s0 v\n\n     where U, V are the inputs (without any shared power of two),\n     and the matris has determinant ± 2^{shift}.\n  */\n  mp_limb_t s0 = 1;\n  mp_limb_t t0 = 0;\n  mp_limb_t s1 = 0;\n  mp_limb_t t1 = 1;\n  mp_limb_t ug;\n  mp_limb_t vg;\n  mp_limb_t ugh;\n  mp_limb_t vgh;\n  unsigned zero_bits;\n  unsigned shift;\n  unsigned i;\n#if GCDEXT_1_BINARY_METHOD == 2\n  mp_limb_t det_sign;\n#endif\n\n  ASSERT (u > 0);\n  ASSERT (v > 0);\n\n  count_trailing_zeros (zero_bits, u | v);\n  u >>= zero_bits;\n  v >>= zero_bits;\n\n  if ((u & 1) == 0)\n    {\n      count_trailing_zeros (shift, u);\n      u >>= shift;\n      t1 <<= shift;\n    }\n  else if ((v & 1) == 0)\n    {\n      count_trailing_zeros (shift, v);\n      v >>= shift;\n      s0 <<= shift;\n    }\n  else\n    shift = 0;\n\n#if GCDEXT_1_BINARY_METHOD == 1\n  while (u != v)\n    {\n      unsigned count;\n      if (u > v)\n\t{\n\t  u -= v;\n#if USE_ZEROTAB\n\t  count = zerotab [u & 0x3f];\n\t  u >>= count;\n\t  if (UNLIKELY (count == 6))\n\t    {\n\t      unsigned c;\n\t      do\n\t\t{\n\t\t  c = zerotab[u & 0x3f];\n\t\t  u >>= c;\n\t\t  count += c;\n\t\t}\n\t      while (c == 6);\n\t    }\n#else\n\t  count_trailing_zeros (count, u);\n\t  u >>= count;\n#endif\n\t  t0 += t1; t1 <<= count;\n\t  s0 += s1; s1 <<= count;\n\t}\n      else\n\t{\n\t  v -= u;\n#if USE_ZEROTAB\n\t  count = zerotab [v & 0x3f];\n\t  v >>= count;\n\t  if (UNLIKELY (count == 6))\n\t    {\n\t      unsigned c;\n\t      do\n\t\t{\n\t\t  c = zerotab[v & 0x3f];\n\t\t  v >>= c;\n\t\t  count += c;\n\t\t}\n\t      while (c == 6);\n\t    }\n#else\n\t  count_trailing_zeros (count, v);\n\t  v >>= count;\n#endif\n\t  t1 += t0; t0 <<= count;\n\t  s1 += s0; s0 <<= count;\n\t}\n      shift += count;\n    }\n#else\n# if GCDEXT_1_BINARY_METHOD == 2\n  u >>= 1;\n  v >>= 1;\n\n  det_sign = 0;\n\n  while (u != v)\n    {\n      unsigned count;\n      mp_limb_t d =  u - v;\n      mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);\n      mp_limb_t sx;\n      mp_limb_t tx;\n\n      /* When v <= u (vgtu == 0), the updates are:\n\n\t   (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)\n\t   (t1, t0) <-- (t1 << count, t0 + t1)\n\n\t and when v > 0, the updates are\n\n\t   (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))\n\t   (t1, t0) <-- (t0 << count, t0 + t1)\n\n\t and similarly for s1, s0\n      */\n\n      /* v <-- min (u, v) */\n      v += (vgtu & d);\n\n      /* u <-- |u - v| */\n      u = (d ^ vgtu) - vgtu;\n\n      /* Number of trailing zeros is the same no matter if we look at\n       * d or u, but using d gives more parallelism. */\n#if USE_ZEROTAB\n      count = zerotab[d & 0x3f];\n      if (UNLIKELY (count == 6))\n\t{\n\t  unsigned c = 6;\n\t  do\n\t    {\n\t      d >>= c;\n\t      c = zerotab[d & 0x3f];\n\t      count += c;\n\t    }\n\t  while (c == 6);\n\t}\n#else\n      count_trailing_zeros (count, d);\n#endif\n      det_sign ^= vgtu;\n\n      tx = vgtu & (t0 - t1);\n      sx = vgtu & (s0 - s1);\n      t0 += t1;\n      s0 += s1;\n      t1 += tx;\n      s1 += sx;\n\n      count++;\n      u >>= count;\n      t1 <<= count;\n      s1 <<= count;\n      shift += count;\n    }\n  u = (u << 1) + 1;\n# else /* GCDEXT_1_BINARY_METHOD == 2 */\n#  error Unknown GCDEXT_1_BINARY_METHOD\n# endif\n#endif\n\n  /* Now u = v = g = gcd (u,v). Compute U/g and V/g */\n  ug = t0 + t1;\n  vg = s0 + s1;\n\n  ugh = ug/2 + (ug & 1);\n  vgh = vg/2 + (vg & 1);\n\n  /* Now ±2^{shift} g = s0 U - t0 V. Get rid of the power of two, using\n     s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */\n  for (i = 0; i < shift; i++)\n    {\n      mp_limb_t mask = - ( (s0 | t0) & 1);\n\n      s0 /= 2;\n      t0 /= 2;\n      s0 += mask & vgh;\n      t0 += mask & ugh;\n    }\n  /* FIXME: Try simplifying this condition. */\n  if ( (s0 > 1 && 2*s0 >= vg) || (t0 > 1 && 2*t0 >= ug) )\n    {\n      s0 -= vg;\n      t0 -= ug;\n    }\n#if GCDEXT_1_BINARY_METHOD == 2\n  /* Conditional negation. */\n  s0 = (s0 ^ det_sign) - det_sign;\n  t0 = (t0 ^ det_sign) - det_sign;\n#endif\n  *sp = s0;\n  *tp = -t0;\n\n  return u << zero_bits;\n}\n\n#else /* !GCDEXT_1_USE_BINARY */\n\n\n/* FIXME: Takes two single-word limbs. It could be extended to a\n * function that accepts a bignum for the first input, and only\n * returns the first co-factor. */\n\nmp_limb_t\nmpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,\n\t      mp_limb_t a, mp_limb_t b)\n{\n  /* Maintain\n\n     a =  u0 A + v0 B\n     b =  u1 A + v1 B\n\n     where A, B are the original inputs.\n  */\n  mp_limb_signed_t u0 = 1;\n  mp_limb_signed_t v0 = 0;\n  mp_limb_signed_t u1 = 0;\n  mp_limb_signed_t v1 = 1;\n\n  ASSERT (a > 0);\n  ASSERT (b > 0);\n\n  if (a < b)\n    goto divide_by_b;\n\n  for (;;)\n    {\n      mp_limb_t q;\n\n      q = a / b;\n      a -= q * b;\n\n      if (a == 0)\n\t{\n\t  *up = u1;\n\t  *vp = v1;\n\t  return b;\n\t}\n      u0 -= q * u1;\n      v0 -= q * v1;\n\n    divide_by_b:\n      q = b / a;\n      b -= q * a;\n\n      if (b == 0)\n\t{\n\t  *up = u0;\n\t  *vp = v0;\n\t  return a;\n\t}\n      u1 -= q * u0;\n      v1 -= q * v0;\n    }\n}\n#endif /* !GCDEXT_1_USE_BINARY */\n"
  },
  {
    "path": "mpn/generic/gcdext_lehmer.c",
    "content": "/* mpn_gcdext -- Extended Greatest Common Divisor.\n\nCopyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2012 Free\nSoftware Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* Here, d is the index of the cofactor to update. FIXME: Could use qn\n   = 0 for the common case q = 1. */\nvoid\nmpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,\n\t\t mp_srcptr qp, mp_size_t qn, int d)\n{\n  struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;\n  mp_size_t un = ctx->un;\n\n  if (gp)\n    {\n      mp_srcptr up;\n\n      ASSERT (gn > 0);\n      ASSERT (gp[gn-1] > 0);\n\n      MPN_COPY (ctx->gp, gp, gn);\n      ctx->gn = gn;\n\n      if (d < 0)\n\t{\n\t  int c;\n\n\t  /* Must return the smallest cofactor, +u1 or -u0 */\n\t  MPN_CMP (c, ctx->u0, ctx->u1, un);\n\t  ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));\n\n\t  d = c < 0;\n\t}\n\n      up = d ? ctx->u0 : ctx->u1;\n\n      MPN_NORMALIZE (up, un);\n      MPN_COPY (ctx->up, up, un);\n\n      *ctx->usize = d ? -un : un;\n    }\n  else\n    {\n      mp_limb_t cy;\n      mp_ptr u0 = ctx->u0;\n      mp_ptr u1 = ctx->u1;\n\n      ASSERT (d >= 0);\n\n      if (d)\n\tMP_PTR_SWAP (u0, u1);\n\n      qn -= (qp[qn-1] == 0);\n\n      /* Update u0 += q  * u1 */\n      if (qn == 1)\n\t{\n\t  mp_limb_t q = qp[0];\n\n\t  if (q == 1)\n\t    /* A common case. */\n\t    cy = mpn_add_n (u0, u0, u1, un);\n\t  else\n\t    cy = mpn_addmul_1 (u0, u1, un, q);\n\t}\n      else\n\t{\n\t  mp_size_t u1n;\n\t  mp_ptr tp;\n\n\t  u1n = un;\n\t  MPN_NORMALIZE (u1, u1n);\n\n\t  if (u1n == 0)\n\t    return;\n\n\t  /* Should always have u1n == un here, and u1 >= u0. The\n\t     reason is that we alternate adding u0 to u1 and u1 to u0\n\t     (corresponding to subtractions a - b and b - a), and we\n\t     can get a large quotient only just after a switch, which\n\t     means that we'll add (a multiple of) the larger u to the\n\t     smaller. */\n\n\t  tp = ctx->tp;\n\n\t  if (qn > u1n)\n\t    mpn_mul (tp, qp, qn, u1, u1n);\n\t  else\n\t    mpn_mul (tp, u1, u1n, qp, qn);\n\n\t  u1n += qn;\n\t  u1n -= tp[u1n-1] == 0;\n\n\t  if (u1n >= un)\n\t    {\n\t      cy = mpn_add (u0, tp, u1n, u0, un);\n\t      un = u1n;\n\t    }\n\t  else\n\t    /* Note: Unlikely case, maybe never happens? */\n\t    cy = mpn_add (u0, u0, un, tp, u1n);\n\n\t}\n      u0[un] = cy;\n      ctx->un = un + (cy > 0);\n    }\n}\n\n/* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for\n   the matrix-vector multiplication adjusting a, b. If hgcd fails, we\n   need at most n for the quotient and n+1 for the u update (reusing\n   the extra u). In all, 4n + 3. */\n\nmp_size_t\nmpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,\n\t\t     mp_ptr ap, mp_ptr bp, mp_size_t n,\n\t\t     mp_ptr tp)\n{\n  mp_size_t ualloc = n + 1;\n\n  /* Keeps track of the second row of the reduction matrix\n   *\n   *   M = (v0, v1 ; u0, u1)\n   *\n   * which correspond to the first column of the inverse\n   *\n   *   M^{-1} = (u1, -v1; -u0, v0)\n   *\n   * This implies that\n   *\n   *   a =  u1 A (mod B)\n   *   b = -u0 A (mod B)\n   *\n   * where A, B denotes the input values.\n   */\n\n  struct gcdext_ctx ctx;\n  mp_size_t un;\n  mp_ptr u0;\n  mp_ptr u1;\n  mp_ptr u2;\n\n  MPN_ZERO (tp, 3*ualloc);\n  u0 = tp; tp += ualloc;\n  u1 = tp; tp += ualloc;\n  u2 = tp; tp += ualloc;\n\n  u1[0] = 1; un = 1;\n\n  ctx.gp = gp;\n  ctx.up = up;\n  ctx.usize = usize;\n\n  /* FIXME: Handle n == 2 differently, after the loop? */\n  while (n >= 2)\n    {\n      struct hgcd_matrix1 M;\n      mp_limb_t ah, al, bh, bl;\n      mp_limb_t mask;\n\n      mask = ap[n-1] | bp[n-1];\n      ASSERT (mask > 0);\n\n      if (mask & GMP_NUMB_HIGHBIT)\n\t{\n\t  ah = ap[n-1]; al = ap[n-2];\n\t  bh = bp[n-1]; bl = bp[n-2];\n\t}\n      else if (n == 2)\n\t{\n\t  /* We use the full inputs without truncation, so we can\n\t     safely shift left. */\n\t  int shift;\n\n\t  count_leading_zeros (shift, mask);\n\t  ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);\n\t  al = ap[0] << shift;\n\t  bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);\n\t  bl = bp[0] << shift;\n\t}\n      else\n\t{\n\t  int shift;\n\n\t  count_leading_zeros (shift, mask);\n\t  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);\n\t  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);\n\t  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);\n\t  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);\n\t}\n\n      /* Try an mpn_nhgcd2 step */\n      if (mpn_hgcd2 (ah, al, bh, bl, &M))\n\t{\n\t  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);\n\t  MP_PTR_SWAP (ap, tp);\n\t  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);\n\t  MP_PTR_SWAP (u0, u2);\n\t}\n      else\n\t{\n\t  /* mpn_hgcd2 has failed. Then either one of a or b is very\n\t     small, or the difference is very small. Perform one\n\t     subtraction followed by one division. */\n\t  ctx.u0 = u0;\n\t  ctx.u1 = u1;\n\t  ctx.tp = u2;\n\t  ctx.un = un;\n\n\t  /* Temporary storage n for the quotient and ualloc for the\n\t     new cofactor. */\n\t  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);\n\t  if (n == 0)\n\t    return ctx.gn;\n\n\t  un = ctx.un;\n\t}\n    }\n  ASSERT_ALWAYS (ap[0] > 0);\n  ASSERT_ALWAYS (bp[0] > 0);\n\n  if (ap[0] == bp[0])\n    {\n      int c;\n\n      /* Which cofactor to return now? Candidates are +u1 and -u0,\n\t depending on which of a and b was most recently reduced,\n\t which we don't keep track of. So compare and get the smallest\n\t one. */\n\n      gp[0] = ap[0];\n\n      MPN_CMP (c, u0, u1, un);\n      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));\n      if (c < 0)\n\t{\n\t  MPN_NORMALIZE (u0, un);\n\t  MPN_COPY (up, u0, un);\n\t  *usize = -un;\n\t}\n      else\n\t{\n\t  MPN_NORMALIZE_NOT_ZERO (u1, un);\n\t  MPN_COPY (up, u1, un);\n\t  *usize = un;\n\t}\n      return 1;\n    }\n  else\n    {\n      mp_limb_t uh, vh;\n      mp_limb_signed_t u;\n      mp_limb_signed_t v;\n      int negate;\n\n      gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);\n\n      /* Set up = u u1 - v u0. Keep track of size, un grows by one or\n\t two limbs. */\n\n      if (u == 0)\n\t{\n\t  ASSERT (v == 1);\n\t  MPN_NORMALIZE (u0, un);\n\t  MPN_COPY (up, u0, un);\n\t  *usize = -un;\n\t  return 1;\n\t}\n      else if (v == 0)\n\t{\n\t  ASSERT (u == 1);\n\t  MPN_NORMALIZE (u1, un);\n\t  MPN_COPY (up, u1, un);\n\t  *usize = un;\n\t  return 1;\n\t}\n      else if (u > 0)\n\t{\n\t  negate = 0;\n\t  ASSERT (v < 0);\n\t  v = -v;\n\t}\n      else\n\t{\n\t  negate = 1;\n\t  ASSERT (v > 0);\n\t  u = -u;\n\t}\n\n      uh = mpn_mul_1 (up, u1, un, u);\n      vh = mpn_addmul_1 (up, u0, un, v);\n\n      if ( (uh | vh) > 0)\n\t{\n\t  uh += vh;\n\t  up[un++] = uh;\n\t  if (uh < vh)\n\t    up[un++] = 1;\n\t}\n\n      MPN_NORMALIZE_NOT_ZERO (up, un);\n\n      *usize = negate ? -un : un;\n      return 1;\n    }\n}\n"
  },
  {
    "path": "mpn/generic/get_d.c",
    "content": "/* mpn_get_d -- limbs to double conversion.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n#define CONST_1024\t      (1024)\n#define CONST_NEG_1023\t      (-1023)\n#define CONST_NEG_1022_SUB_53 (-1022 - 53)\n\n/* Return the value {ptr,size}*2^exp, and negative if sign<0.\n   Must have size>=1, and a non-zero high limb ptr[size-1].\n\n   {ptr,size} is truncated towards zero.  This is consistent with other gmp\n   conversions, like mpz_set_f or mpz_set_q, and is easy to implement and\n   test.\n\n   In the past conversions had attempted (imperfectly) to let the hardware\n   float rounding mode take effect, but that gets tricky since multiple\n   roundings need to be avoided, or taken into account, and denorms mean the\n   effective precision of the mantissa is not constant.\t (For reference,\n   mpz_get_d on IEEE systems was ok, except it operated on the absolute\n   value.  mpf_get_d and mpq_get_d suffered from multiple roundings and from\n   not always using enough bits to get the rounding right.)\n\n   It's felt that GMP is not primarily concerned with hardware floats, and\n   really isn't enhanced by getting involved with hardware rounding modes\n   (which could even be some weird unknown style), so something unambiguous\n   and straightforward is best.\n\n\n   The IEEE code below is the usual case, it knows either a 32-bit or 64-bit\n   limb and is done with shifts and masks.  The 64-bit case in particular\n   should come out nice and compact.\n\n   The generic code works one bit at a time, which will be quite slow, but\n   should support any binary-based \"double\" and be safe against any rounding\n   mode.  Note in particular it works on IEEE systems too.\n\n\n   Traps:\n\n   Hardware traps for overflow to infinity, underflow to zero, or\n   unsupported denorms may or may not be taken.\t The IEEE code works bitwise\n   and so probably won't trigger them, the generic code works by float\n   operations and so probably will.  This difference might be thought less\n   than ideal, but again its felt straightforward code is better than trying\n   to get intimate with hardware exceptions (of perhaps unknown nature).\n\n\n   Not done:\n\n   mpz_get_d in the past handled size==1 with a cast limb->double.  This\n   might still be worthwhile there (for up to the mantissa many bits), but\n   for mpn_get_d here, the cost of applying \"exp\" to the resulting exponent\n   would probably use up any benefit a cast may have over bit twiddling.\n   Also, if the exponent is pushed into denorm range then bit twiddling is\n   the only option, to ensure the desired truncation is obtained.\n\n\n   Other:\n\n   For reference, note that HPPA 8000, 8200, 8500 and 8600 trap FCNV,UDW,DBL\n   to the kernel for values >= 2^63.  This makes it slow, and worse the\n   Linux kernel (what versions?) apparently uses untested code in its trap\n   handling routines, and gets the sign wrong.  We don't use such a limb to\n   double cast, neither in the IEEE or generic code.  */\n\n\ndouble\nmpn_get_d (mp_srcptr ptr, mp_size_t size, mp_size_t sign, long exp)\n{\n  ASSERT (size >= 0);\n  ASSERT_MPN (ptr, size);\n  ASSERT (size == 0 || ptr[size-1] != 0);\n\n  if (size == 0)\n    return 0.0;\n\n  /* Adjust exp to a radix point just above {ptr,size}, guarding against\n     overflow.\tAfter this exp can of course be reduced to anywhere within\n     the {ptr,size} region without underflow.  */\n  if (UNLIKELY ((mpir_ui) (GMP_NUMB_BITS * size)\n\t\t> (mpir_ui) (LONG_MAX - exp)))\n    {\n      goto ieee_infinity;\n\n      /* generic */\n      exp = LONG_MAX;\n    }\n  else\n    {\n      exp += GMP_NUMB_BITS * size;\n    }\n\n#define ONE_LIMB    (GMP_LIMB_BITS == 64 && 2*GMP_NUMB_BITS >= 53)\n#define TWO_LIMBS   (GMP_LIMB_BITS == 32 && 3*GMP_NUMB_BITS >= 53)\n\n  if (ONE_LIMB || TWO_LIMBS)\n    {\n      union ieee_double_extract\t u;\n      mp_limb_t\t m0, m1, m2, rmask;\n      int\t lshift, rshift;\n\n      m0 = ptr[size-1];\t\t\t    /* high limb */\n      m1 = (size >= 2 ? ptr[size-2] : 0);   /* second highest limb */\n      count_leading_zeros (lshift, m0);\n\n      /* relative to just under high non-zero bit */\n      exp -= (lshift - GMP_NAIL_BITS) + 1;\n\n      if (ONE_LIMB)\n\t{\n\t  /* lshift to have high of m0 non-zero, and collapse nails */\n\t  rshift = GMP_LIMB_BITS - lshift;\n\t  m1 <<= GMP_NAIL_BITS;\n\t  rmask = GMP_NAIL_BITS == 0 && lshift == 0 ? 0 : MP_LIMB_T_MAX;\n\t  m0 = (m0 << lshift) | ((m1 >> rshift) & rmask);\n\n\t  /* rshift back to have bit 53 of m0 the high non-zero */\n\t  m0 >>= 11;\n\t}\n      else /* TWO_LIMBS */\n\t{\n\t  m2 = (size >= 3 ? ptr[size-3] : 0);  /* third highest limb */\n\n\t  /* collapse nails from m1 and m2 */\n#if GMP_NAIL_BITS != 0\n\t  m1 = (m1 << GMP_NAIL_BITS) | (m2 >> (GMP_NUMB_BITS-GMP_NAIL_BITS));\n\t  m2 <<= 2*GMP_NAIL_BITS;\n#endif\n\n\t  /* lshift to have high of m0:m1 non-zero, collapse nails from m0 */\n\t  rshift = GMP_LIMB_BITS - lshift;\n\t  rmask = (GMP_NAIL_BITS == 0 && lshift == 0 ? 0 : MP_LIMB_T_MAX);\n\t  m0 = (m0 << lshift) | ((m1 >> rshift) & rmask);\n\t  m1 = (m1 << lshift) | ((m2 >> rshift) & rmask);\n\n\t  /* rshift back to have bit 53 of m0:m1 the high non-zero */\n\t  m1 = (m1 >> 11) | (m0 << (GMP_LIMB_BITS-11));\n\t  m0 >>= 11;\n\t}\n\n      if (UNLIKELY (exp >= CONST_1024))\n\t{\n\t  /* overflow, return infinity */\n\tieee_infinity:\n\t  m0 = 0;\n\t  m1 = 0;\n\t  exp = 1024;\n\t}\n      else if (UNLIKELY (exp <= CONST_NEG_1023))\n\t{\n\t  if (LIKELY (exp <= CONST_NEG_1022_SUB_53))\n\t    return 0.0;\t /* denorm underflows to zero */\n\n\t  rshift = -1022 - exp;\n\t  ASSERT (rshift > 0 && rshift < 53);\n\t  if (ONE_LIMB)\n\t    {\n\t      m0 >>= rshift;\n\t    }\n\t  else /* TWO_LIMBS */\n\t    {\n\t      if (rshift >= 32)\n\t\t{\n\t\t  m1 = m0;\n\t\t  m0 = 0;\n\t\t  rshift -= 32;\n\t\t}\n\t      lshift = GMP_LIMB_BITS - rshift;\n\t      m1 = (m1 >> rshift) | (rshift == 0 ? 0 : m0 << lshift);\n\t      m0 >>= rshift;\n\t    }\n\t  exp = -1023;\n\t}\n\n      if (ONE_LIMB)\n\t{\n#if GMP_LIMB_BITS > 32\t/* avoid compiler warning about big shift */\n\t  u.s.manh = m0 >> 32;\n#endif\n\t  u.s.manl = m0;\n\t}\n      else /* TWO_LIMBS */\n\t{\n\t  u.s.manh = m0;\n\t  u.s.manl = m1;\n\t}\n\n      u.s.exp = exp + 1023;\n      u.s.sig = (sign < 0);\n      return u.d;\n    }\n  else\n    {\n      /* Non-IEEE or strange limb size, do something generic. */\n\n      mp_size_t\t     i;\n      mp_limb_t\t     limb, bit;\n      int\t     shift;\n      double\t     base, factor, prev_factor, d, new_d, diff;\n\n      /* \"limb\" is \"ptr[i]\" the limb being examined, \"bit\" is a mask for the\n\t bit being examined, initially the highest non-zero bit.  */\n      i = size-1;\n      limb = ptr[i];\n      count_leading_zeros (shift, limb);\n      bit = GMP_LIMB_HIGHBIT >> shift;\n\n      /* relative to just under high non-zero bit */\n      exp -= (shift - GMP_NAIL_BITS) + 1;\n\n      /* Power up \"factor\" to 2^exp, being the value of the \"bit\" in \"limb\"\n\t being examined.  */\n      base = (exp >= 0 ? 2.0 : 0.5);\n      exp = ABS (exp);\n      factor = 1.0;\n      for (;;)\n\t{\n\t  if (exp & 1)\n\t    {\n\t      prev_factor = factor;\n\t      factor *= base;\n\t      if (factor == 0.0)\n\t\treturn 0.0;\t/* underflow */\n\t      if (factor == prev_factor)\n\t\t{\n\t\t  d = factor;\t  /* overflow, apparent infinity */\n\t\t  goto generic_done;\n\t\t}\n\t    }\n\t  exp >>= 1;\n\t  if (exp == 0)\n\t    break;\n\t  base *= base;\n\t}\n\n      /* Add a \"factor\" for each non-zero bit, working from high to low.\n\t Stop if any rounding occurs, hence implementing a truncation.\n\n\t Note no attention is paid to DBL_MANT_DIG, since the effective\n\t number of bits in the mantissa isn't constant when in denorm range.\n\t We also encountered an ARM system with apparently somewhat doubtful\n\t software floats where DBL_MANT_DIG claimed 53 bits but only 32\n\t actually worked.  */\n\n      d = factor;  /* high bit */\n      for (;;)\n\t{\n\t  factor *= 0.5;  /* next bit */\n\t  bit >>= 1;\n\t  if (bit == 0)\n\t    {\n\t      /* next limb, if any */\n\t      i--;\n\t      if (i < 0)\n\t\tbreak;\n\t      limb = ptr[i];\n\t      bit = GMP_NUMB_HIGHBIT;\n\t    }\n\n\t  if (bit & limb)\n\t    {\n\t      new_d = d + factor;\n\t      diff = new_d - d;\n\t      if (diff != factor)\n\t\tbreak;\t /* rounding occured, stop now */\n\t      d = new_d;\n\t    }\n\t}\n\n    generic_done:\n      return (sign >= 0 ? d : -d);\n    }\n}\n"
  },
  {
    "path": "mpn/generic/get_str.c",
    "content": "/* mpn_get_str -- Convert {UP,USIZE} to a base BASE string in STR.\n\n   Contributed to the GNU project by Torbjorn Granlund.\n\n   THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH A MUTABLE\n   INTERFACE.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN\n   FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE\n   GNU MP RELEASE.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,\n2008 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* Conversion of U {up,un} to a string in base b.  Internally, we convert to\n   base B = b^m, the largest power of b that fits a limb.  Basic algorithms:\n\n  A) Divide U repeatedly by B, generating a quotient and remainder, until the\n     quotient becomes zero.  The remainders hold the converted digits.  Digits\n     come out from right to left.  (Used in mpn_sb_get_str.)\n\n  B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.\n     Then develop digits by multiplying the fraction repeatedly by b.  Digits\n     come out from left to right.  (Currently not used herein, except for in\n     code for converting single limbs to individual digits.)\n\n  C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s is just above\n     sqrt(U).  Then divide U by B^s, generating quotient and remainder.\n     Recursively convert the quotient, then the remainder, using the\n     precomputed powers.  Digits come out from left to right.  (Used in\n     mpn_dc_get_str.)\n\n  When using algorithm C, algorithm B might be suitable for basecase code,\n  since the required b^g power will be readily accessible.\n\n  Optimization ideas:\n  1. The recursive function of (C) could use less temporary memory.  The powtab\n     allocation could be trimmed with some computation, and the tmp area could\n     be reduced, or perhaps eliminated if up is reused for both quotient and\n     remainder (it is currently used just for remainder).\n  2. Store the powers of (C) in normalized form, with the normalization count.\n     Quotients will usually need to be left-shifted before each divide, and\n     remainders will either need to be left-shifted of right-shifted.\n  3. In the code for developing digits from a single limb, we could avoid using\n     a full umul_ppmm except for the first (or first few) digits, provided base\n     is even.  Subsequent digits can be developed using plain multiplication.\n     (This saves on register-starved machines (read x86) and on all machines\n     that generate the upper product half using a separate instruction (alpha,\n     powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).\n  4. Separate mpn_dc_get_str basecase code from code for small conversions. The\n     former code will have the exact right power readily available in the\n     powtab parameter for dividing the current number into a fraction.  Convert\n     that using algorithm B.\n  5. Completely avoid division.  Compute the inverses of the powers now in\n     powtab instead of the actual powers.\n  6. Decrease powtab allocation for even bases.  E.g. for base 10 we could save\n     about 30% (1-log(5)/log(10)).\n\n  Basic structure of (C):\n    mpn_get_str:\n      if POW2_P (n)\n\t...\n      else\n\tif (un < GET_STR_PRECOMPUTE_THRESHOLD)\n\t  mpn_sb_get_str (str, base, up, un);\n\telse\n\t  precompute_power_tables\n\t  mpn_dc_get_str\n\n    mpn_dc_get_str:\n\tmpn_tdiv_qr\n\tif (qn < GET_STR_DC_THRESHOLD)\n\t  mpn_sb_get_str\n\telse\n\t  mpn_dc_get_str\n\tif (rn < GET_STR_DC_THRESHOLD)\n\t  mpn_sb_get_str\n\telse\n\t  mpn_dc_get_str\n\n\n  The reason for the two threshold values is the cost of\n  precompute_power_tables.  GET_STR_PRECOMPUTE_THRESHOLD will be considerably\n  larger than GET_STR_PRECOMPUTE_THRESHOLD.  */\n\n\n#define udiv_qrnd_unnorm(q,r,n,d)       \\\n  do {                                  \\\n    mp_limb_t  __q = (n) / (d);         \\\n    mp_limb_t  __r = (n) - __q*(d);     \\\n    (q) = __q;                          \\\n    (r) = __r;                          \\\n  } while (0)\n  \n\n/* Convert {up,un} to a string in base base, and put the result in str.\n   Generate len characters, possibly padding with zeros to the left.  If len is\n   zero, generate as many characters as required.  Return a pointer immediately\n   after the last digit of the result string.  Complexity is O(un^2); intended\n   for small conversions.  */\nstatic unsigned char *\nmpn_sb_get_str (unsigned char *str, size_t len,\n\t\tmp_ptr up, mp_size_t un, int base)\n{\n  mp_limb_t rl, ul;\n  unsigned char *s;\n  size_t l;\n  /* Allocate memory for largest possible string, given that we only get here\n     for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest\n     base is 3.  7/11 is an approximation to 1/log2(3).  */\n#if TUNE_PROGRAM_BUILD\n#define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11)\n#else\n#define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11)\n#endif\n  unsigned char buf[BUF_ALLOC];\n#if TUNE_PROGRAM_BUILD\n  mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];\n#else\n  mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];\n#endif\n\n  if (base == 10)\n    {\n      /* Special case code for base==10 so that the compiler has a chance to\n\t optimize things.  */\n\n      MPN_COPY (rp + 1, up, un);\n\n      s = buf + BUF_ALLOC;\n      while (un > 1)\n\t{\n\t  int i;\n\t  mp_limb_t frac, digit;\n\t  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,\n\t\t\t\t\t MP_BASES_BIG_BASE_10,\n\t\t\t\t\t MP_BASES_BIG_BASE_INVERTED_10,\n\t\t\t\t\t MP_BASES_NORMALIZATION_STEPS_10);\n\t  un -= rp[un] == 0;\n\t  frac = (rp[0] + 1) << GMP_NAIL_BITS;\n\t  s -= MP_BASES_CHARS_PER_LIMB_10;\n\t  /* Use the fact that 10 in binary is 1010, with the lowest bit 0.\n\t     After a few umul_ppmm, we will have accumulated enough low zeros\n\t     to use a plain multiply.  */\n\t  if (MP_BASES_NORMALIZATION_STEPS_10 == 0)\n\t    {\n\t      umul_ppmm (digit, frac, frac, 10);\n\t      *s++ = digit;\n\t    }\n\t  if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)\n\t    {\n\t      umul_ppmm (digit, frac, frac, 10);\n\t      *s++ = digit;\n\t    }\n\t  if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)\n\t    {\n\t      umul_ppmm (digit, frac, frac, 10);\n\t      *s++ = digit;\n\t    }\n\t  if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)\n\t    {\n\t      umul_ppmm (digit, frac, frac, 10);\n\t      *s++ = digit;\n\t    }\n\t  i = (MP_BASES_CHARS_PER_LIMB_10 - ((MP_BASES_NORMALIZATION_STEPS_10 < 4)\n\t\t\t\t\t     ? (4-MP_BASES_NORMALIZATION_STEPS_10)\n\t\t\t\t\t     : 0));\n\t  frac = (frac + 0xf) >> 4;\n\t  do\n\t    {\n\t      frac *= 10;\n\t      digit = frac >> (GMP_LIMB_BITS - 4);\n\t      *s++ = digit;\n\t      frac &= (~(mp_limb_t) 0) >> 4;\n\t    }\n\t  while (--i);\n\t  s -= MP_BASES_CHARS_PER_LIMB_10;\n\t}\n\n      ul = rp[1];\n      while (ul != 0)\n\t{\n\t  udiv_qrnd_unnorm (ul, rl, ul, 10);\n\t  *--s = rl;\n\t}\n    }\n  else /* not base 10 */\n    {\n      unsigned chars_per_limb;\n      mp_limb_t big_base, big_base_inverted;\n      unsigned normalization_steps;\n\n      chars_per_limb = mp_bases[base].chars_per_limb;\n      big_base = mp_bases[base].big_base;\n      big_base_inverted = mp_bases[base].big_base_inverted;\n      count_leading_zeros (normalization_steps, big_base);\n\n      MPN_COPY (rp + 1, up, un);\n\n      s = buf + BUF_ALLOC;\n      while (un > 1)\n\t{\n\t  int i;\n\t  mp_limb_t frac;\n\t  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,\n\t\t\t\t\t big_base, big_base_inverted,\n\t\t\t\t\t normalization_steps);\n\t  un -= rp[un] == 0;\n\t  frac = (rp[0] + 1) << GMP_NAIL_BITS;\n\t  s -= chars_per_limb;\n\t  i = chars_per_limb;\n\t  do\n\t    {\n\t      mp_limb_t digit;\n\t      umul_ppmm (digit, frac, frac, base);\n\t      *s++ = digit;\n\t    }\n\t  while (--i);\n\t  s -= chars_per_limb;\n\t}\n\n      ul = rp[1];\n      while (ul != 0)\n\t{\n\t  udiv_qrnd_unnorm (ul, rl, ul, base);\n\t  *--s = rl;\n\t}\n    }\n\n  l = buf + BUF_ALLOC - s;\n  while (l < len)\n    {\n      *str++ = 0;\n      len--;\n    }\n  while (l != 0)\n    {\n      *str++ = *s++;\n      l--;\n    }\n  return str;\n}\n\n\f\n/* Convert {UP,UN} to a string with a base as represented in POWTAB, and put\n   the string in STR.  Generate LEN characters, possibly padding with zeros to\n   the left.  If LEN is zero, generate as many characters as required.\n   Return a pointer immediately after the last digit of the result string.\n   This uses divide-and-conquer and is intended for large conversions.  */\nstatic unsigned char *\nmpn_dc_get_str (unsigned char *str, size_t len,\n\t\tmp_ptr up, mp_size_t un,\n\t\tconst powers_t *powtab, mp_ptr tmp)\n{\n  if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD))\n    {\n      if (un != 0)\n\tstr = mpn_sb_get_str (str, len, up, un, powtab->base);\n      else\n\t{\n\t  while (len != 0)\n\t    {\n\t      *str++ = 0;\n\t      len--;\n\t    }\n\t}\n    }\n  else\n    {\n      mp_ptr pwp, qp, rp;\n      mp_size_t pwn, qn;\n      mp_size_t sn;\n\n      pwp = powtab->p;\n      pwn = powtab->n;\n      sn = powtab->shift;\n\n      if (un < pwn + sn || (un == pwn + sn && mpn_cmp (up + sn, pwp, un - sn) < 0))\n\t{\n\t  str = mpn_dc_get_str (str, len, up, un, powtab - 1, tmp);\n\t}\n      else\n\t{\n\t  qp = tmp;\t\t/* (un - pwn + 1) limbs for qp */\n\t  rp = up;\t\t/* pwn limbs for rp; overwrite up area */\n\n\t  mpn_tdiv_qr (qp, rp + sn, 0L, up + sn, un - sn, pwp, pwn);\n\t  qn = un - sn - pwn; qn += qp[qn] != 0;\t\t/* quotient size */\n\n\t  ASSERT (qn < pwn + sn || (qn == pwn + sn && mpn_cmp (qp + sn, pwp, pwn) < 0));\n\n\t  if (len != 0)\n\t    len = len - powtab->digits_in_base;\n\n\t  str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + qn);\n\t  str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn + sn, powtab - 1, tmp);\n\t}\n    }\n  return str;\n}\n\n\f\n/* There are no leading zeros on the digits generated at str, but that's not\n   currently a documented feature.  */\n\nsize_t\nmpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)\n{\n  mp_ptr powtab_mem, powtab_mem_ptr;\n  mp_limb_t big_base;\n  size_t digits_in_base;\n  powers_t powtab[GMP_LIMB_BITS];\n  int pi;\n  mp_size_t n;\n  mp_ptr p, t;\n  size_t out_len;\n  mp_ptr tmp;\n  TMP_DECL;\n\n  /* Special case zero, as the code below doesn't handle it.  */\n  if (un == 0)\n    {\n      str[0] = 0;\n      return 1;\n    }\n\n  if (POW2_P (base))\n    {\n      /* The base is a power of 2.  Convert from most significant end.  */\n      mp_limb_t n1, n0;\n      int bits_per_digit = mp_bases[base].big_base;\n      int cnt;\n      int bit_pos;\n      mp_size_t i;\n      unsigned char *s = str;\n      mp_bitcnt_t bits;\n\n      n1 = up[un - 1];\n      count_leading_zeros (cnt, n1);\n\n      /* BIT_POS should be R when input ends in least significant nibble,\n\t R + bits_per_digit * n when input ends in nth least significant\n\t nibble. */\n\n      bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;\n      cnt = bits % bits_per_digit;\n      if (cnt != 0)\n\tbits += bits_per_digit - cnt;\n      bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;\n\n      /* Fast loop for bit output.  */\n      i = un - 1;\n      for (;;)\n\t{\n\t  bit_pos -= bits_per_digit;\n\t  while (bit_pos >= 0)\n\t    {\n\t      *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);\n\t      bit_pos -= bits_per_digit;\n\t    }\n\t  i--;\n\t  if (i < 0)\n\t    break;\n\t  n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);\n\t  n1 = up[i];\n\t  bit_pos += GMP_NUMB_BITS;\n\t  *s++ = n0 | (n1 >> bit_pos);\n\t}\n\n      return s - str;\n    }\n\n  /* General case.  The base is not a power of 2.  */\n\n  if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD))\n    return mpn_sb_get_str (str, (size_t) 0, up, un, base) - str;\n\n  TMP_MARK;\n\n  /* Allocate one large block for the powers of big_base.  */\n  powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_get_str_powtab_alloc (un));\n  powtab_mem_ptr = powtab_mem;\n\n  /* Compute a table of powers, were the largest power is >= sqrt(U).  */\n\n  big_base = mp_bases[base].big_base;\n  digits_in_base = mp_bases[base].chars_per_limb;\n\n  {\n    mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp;\n    mp_limb_t cy;\n    mp_size_t shift;\n\n    n_pows = 0;\n    xn = 1 + un*(mp_bases[base].chars_per_bit_exactly*GMP_NUMB_BITS)/mp_bases[base].chars_per_limb;\n    for (pn = xn; pn != 1; pn = (pn + 1) >> 1)\n      {\n\texptab[n_pows] = pn;\n\tn_pows++;\n      }\n    exptab[n_pows] = 1;\n\n    powtab[0].p = &big_base;\n    powtab[0].n = 1;\n    powtab[0].digits_in_base = digits_in_base;\n    powtab[0].base = base;\n    powtab[0].shift = 0;\n\n    powtab[1].p = powtab_mem_ptr;  powtab_mem_ptr += 2;\n    powtab[1].p[0] = big_base;\n    powtab[1].n = 1;\n    powtab[1].digits_in_base = digits_in_base;\n    powtab[1].base = base;\n    powtab[1].shift = 0;\n\n    n = 1;\n    p = &big_base;\n    bexp = 1;\n    shift = 0;\n    for (pi = 2; pi < n_pows; pi++)\n      {\n\tt = powtab_mem_ptr;\n\tpowtab_mem_ptr += 2 * n + 2;\n\n\tASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_get_str_powtab_alloc (un));\n\n\tmpn_sqr (t, p, n);\n\n\tdigits_in_base *= 2;\n\tn *= 2;  n -= t[n - 1] == 0;\n\tbexp *= 2;\n\n\tif (bexp + 1 < exptab[n_pows - pi])\n\t  {\n\t    digits_in_base += mp_bases[base].chars_per_limb;\n\t    cy = mpn_mul_1 (t, t, n, big_base);\n\t    t[n] = cy;\n\t    n += cy != 0;\n\t    bexp += 1;\n\t  }\n\tshift *= 2;\n\t/* Strip low zero limbs.  */\n\twhile (t[0] == 0)\n\t  {\n\t    t++;\n\t    n--;\n\t    shift++;\n\t  }\n\tp = t;\n\tpowtab[pi].p = p;\n\tpowtab[pi].n = n;\n\tpowtab[pi].digits_in_base = digits_in_base;\n\tpowtab[pi].base = base;\n\tpowtab[pi].shift = shift;\n      }\n\n    for (pi = 1; pi < n_pows; pi++)\n      {\n\tt = powtab[pi].p;\n\tn = powtab[pi].n;\n\tcy = mpn_mul_1 (t, t, n, big_base);\n\tt[n] = cy;\n\tn += cy != 0;\n\tif (t[0] == 0)\n\t  {\n\t    powtab[pi].p = t + 1;\n\t    n--;\n\t    powtab[pi].shift++;\n\t  }\n\tpowtab[pi].n = n;\n\tpowtab[pi].digits_in_base += mp_bases[base].chars_per_limb;\n      }\n\n#if 0\n    { int i;\n      printf (\"Computed table values for base=%d, un=%d, xn=%d:\\n\", base, un, xn);\n      for (i = 0; i < n_pows; i++)\n\tprintf (\"%2d: %10ld %10ld %11ld %ld\\n\", i, exptab[n_pows-i], powtab[i].n, powtab[i].digits_in_base, powtab[i].shift);\n    }\n#endif\n  }\n\n  /* Using our precomputed powers, now in powtab[], convert our number.  */\n  tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));\n  out_len = mpn_dc_get_str (str, 0, up, un, powtab - 1 + pi, tmp) - str;\n  TMP_FREE;\n\n  return out_len;\n}\n"
  },
  {
    "path": "mpn/generic/hamdist.c",
    "content": "/* mpn_popcount, mpn_hamdist -- mpn bit population count/hamming distance.\n\nCopyright 1994, 1996, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#define OPERATION_hamdist\t1\n\n#if OPERATION_popcount\n#define FNAME mpn_popcount\n#define POPHAM(u,v) u\n#endif\n\n#if OPERATION_hamdist\n#define FNAME mpn_hamdist\n#define POPHAM(u,v) u ^ v\n#endif\n\nmp_bitcnt_t\nFNAME (mp_srcptr up,\n#if OPERATION_hamdist\n       mp_srcptr vp,\n#endif\n       mp_size_t n)\n{\n  mp_bitcnt_t result = 0;\n  mp_limb_t p0, p1, p2, p3, x, p01, p23;\n  mp_size_t i;\n\n  ASSERT (n >= 1);\t\t/* Actually, this code handles any n, but some\n\t\t\t\t   assembly implementations does not.  */\n\n  for (i = n >> 2; i != 0; i--)\n    {\n      p0 = POPHAM (up[0], vp[0]);\n      p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t/* 2 0-2 */\n      p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5);\t/* 4 0-4 */\n\n      p1 = POPHAM (up[1], vp[1]);\n      p1 -= (p1 >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t/* 2 0-2 */\n      p1 = ((p1 >> 2) & MP_LIMB_T_MAX/5) + (p1 & MP_LIMB_T_MAX/5);\t/* 4 0-4 */\n\n      p01 = p0 + p1;\t\t\t\t\t\t\t/* 8 0-8 */\n      p01 = ((p01 >> 4) & MP_LIMB_T_MAX/17) + (p01 & MP_LIMB_T_MAX/17);\t/* 8 0-16 */\n\n      p2 = POPHAM (up[2], vp[2]);\n      p2 -= (p2 >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t/* 2 0-2 */\n      p2 = ((p2 >> 2) & MP_LIMB_T_MAX/5) + (p2 & MP_LIMB_T_MAX/5);\t/* 4 0-4 */\n\n      p3 = POPHAM (up[3], vp[3]);\n      p3 -= (p3 >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t/* 2 0-2 */\n      p3 = ((p3 >> 2) & MP_LIMB_T_MAX/5) + (p3 & MP_LIMB_T_MAX/5);\t/* 4 0-4 */\n\n      p23 = p2 + p3;\t\t\t\t\t\t\t/* 8 0-8 */\n      p23 = ((p23 >> 4) & MP_LIMB_T_MAX/17) + (p23 & MP_LIMB_T_MAX/17);\t/* 8 0-16 */\n\n      x = p01 + p23;\t\t\t\t\t\t\t/* 8 0-32 */\n      x = (x >> 8) + x;\t\t\t\t\t\t\t/* 8 0-64 */\n      x = (x >> 16) + x;\t\t\t\t\t\t/* 8 0-128 */\n#if GMP_LIMB_BITS > 32\n      x = ((x >> 32) & 0xff) + (x & 0xff);\t\t\t\t/* 8 0-256 */\n      result += x;\n#else\n      result += x & 0xff;\n#endif\n      up += 4;\n#if OPERATION_hamdist\n      vp += 4;\n#endif\n    }\n\n  n &= 3;\n  if (n != 0)\n    {\n      x = 0;\n      do\n\t{\n\t  p0 = POPHAM (up[0], vp[0]);\n\t  p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t/* 2 0-2 */\n\t  p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5);\t/* 4 0-4 */\n\t  p0 = ((p0 >> 4) + p0) & MP_LIMB_T_MAX/17;\t\t\t/* 8 0-8 */\n\n\t  x += p0;\n\t  up += 1;\n#if OPERATION_hamdist\n\t  vp += 1;\n#endif\n\t}\n      while (--n);\n\n      x = (x >> 8) + x;\n      x = (x >> 16) + x;\n#if GMP_LIMB_BITS > 32\n      x = (x >> 32) + x;\n#endif\n      result += x & 0xff;\n    }\n\n  return result;\n}\n"
  },
  {
    "path": "mpn/generic/hgcd.c",
    "content": "/* hgcd.c.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2003, 2004, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* Size analysis for hgcd:\n\n   For the recursive calls, we have n1 <= ceil(n / 2). Then the\n   storage need is determined by the storage for the recursive call\n   computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1\n   (after this, the storage needed for M1 can be recycled).\n\n   Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1)\n   = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2,\n   and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total,\n   4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12.\n\n   For the recursive call, we need S(n1) = S(ceil(n/2)).\n\n   S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2))\n\t<= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k))\n\t<= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k))\n\t<= 20 ceil(n/4) + 22k + S(ceil(n/2^k))\n*/\n\nmp_size_t\nmpn_hgcd_itch (mp_size_t n)\n{\n  unsigned k;\n  int count;\n  mp_size_t nscaled;\n\n  if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))\n    return n;\n\n  /* Get the recursion depth. */\n  nscaled = (n - 1) / (HGCD_THRESHOLD - 1);\n  count_leading_zeros (count, nscaled);\n  k = GMP_LIMB_BITS - count;\n\n  return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;\n}\n\n/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M\n   with elements of size at most (n+1)/2 - 1. Returns new size of a,\n   b, or zero if no reduction is possible. */\n\nmp_size_t\nmpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,\n\t  struct hgcd_matrix *M, mp_ptr tp)\n{\n  mp_size_t s = n/2 + 1;\n\n  mp_size_t nn;\n  int success = 0;\n\n  if (n <= s)\n    /* Happens when n <= 2, a fairly uninteresting case but exercised\n       by the random inputs of the testsuite. */\n    return 0;\n\n  ASSERT ((ap[n-1] | bp[n-1]) > 0);\n\n  ASSERT ((n+1)/2 - 1 < M->alloc);\n\n  if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))\n    {\n      mp_size_t n2 = (3*n)/4 + 1;\n      mp_size_t p = n/2;\n\n      nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);\n      if (nn)\n\t{\n\t  n = nn;\n\t  success = 1;\n\t}\n\n      /* NOTE: It apppears this loop never runs more than once (at\n\t least when not recursing to hgcd_appr). */\n      while (n > n2)\n\t{\n\t  /* Needs n + 1 storage */\n\t  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);\n\t  if (!nn)\n\t    return success ? n : 0;\n\n\t  n = nn;\n\t  success = 1;\n\t}\n\n      if (n > s + 2)\n\t{\n\t  struct hgcd_matrix M1;\n\t  mp_size_t scratch;\n\n\t  p = 2*s - n + 1;\n\t  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);\n\n\t  mpn_hgcd_matrix_init(&M1, n - p, tp);\n\n\t  /* FIXME: Should use hgcd_reduce, but that may require more\n\t     scratch space, which requires review. */\n\n\t  nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);\n\t  if (nn > 0)\n\t    {\n\t      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */\n\t      ASSERT (M->n + 2 >= M1.n);\n\n\t      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),\n\t\t then either q or q + 1 is a correct quotient, and M1 will\n\t\t start with either (1, 0; 1, 1) or (2, 1; 1, 1). This\n\t\t rules out the case that the size of M * M1 is much\n\t\t smaller than the expected M->n + M1->n. */\n\n\t      ASSERT (M->n + M1.n < M->alloc);\n\n\t      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)\n\t\t = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */\n\t      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);\n\n\t      /* We need a bound for of M->n + M1.n. Let n be the original\n\t\t input size. Then\n\n\t\t ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2\n\n\t\t and it follows that\n\n\t\t M.n + M1.n <= ceil(n/2) + 1\n\n\t\t Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the\n\t\t amount of needed scratch space. */\n\t      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);\n\t      success = 1;\n\t    }\n\t}\n    }\n\n  for (;;)\n    {\n      /* Needs s+3 < n */\n      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);\n      if (!nn)\n\treturn success ? n : 0;\n\n      n = nn;\n      success = 1;\n    }\n}\n"
  },
  {
    "path": "mpn/generic/hgcd2.c",
    "content": "/* hgcd2.c\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2012 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#if GMP_NAIL_BITS == 0\n\n/* Copied from the old mpn/generic/gcdext.c, and modified slightly to return\n   the remainder. */\n\n/* Single-limb division optimized for small quotients. */\nstatic inline mp_limb_t\ndiv1 (mp_ptr rp,\n      mp_limb_t n0,\n      mp_limb_t d0)\n{\n  mp_limb_t q = 0;\n\n  if ((mp_limb_signed_t) n0 < 0)\n    {\n      int cnt;\n      for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)\n\t{\n\t  d0 = d0 << 1;\n\t}\n\n      q = 0;\n      while (cnt)\n\t{\n\t  q <<= 1;\n\t  if (n0 >= d0)\n\t    {\n\t      n0 = n0 - d0;\n\t      q |= 1;\n\t    }\n\t  d0 = d0 >> 1;\n\t  cnt--;\n\t}\n    }\n  else\n    {\n      int cnt;\n      for (cnt = 0; n0 >= d0; cnt++)\n\t{\n\t  d0 = d0 << 1;\n\t}\n\n      q = 0;\n      while (cnt)\n\t{\n\t  d0 = d0 >> 1;\n\t  q <<= 1;\n\t  if (n0 >= d0)\n\t    {\n\t      n0 = n0 - d0;\n\t      q |= 1;\n\t    }\n\t  cnt--;\n\t}\n    }\n  *rp = n0;\n  return q;\n}\n\n/* Two-limb division optimized for small quotients.  */\nstatic inline mp_limb_t\ndiv2 (mp_ptr rp,\n      mp_limb_t nh, mp_limb_t nl,\n      mp_limb_t dh, mp_limb_t dl)\n{\n  mp_limb_t q = 0;\n\n  if ((mp_limb_signed_t) nh < 0)\n    {\n      int cnt;\n      for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)\n\t{\n\t  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));\n\t  dl = dl << 1;\n\t}\n\n      while (cnt)\n\t{\n\t  q <<= 1;\n\t  if (nh > dh || (nh == dh && nl >= dl))\n\t    {\n\t      sub_ddmmss (nh, nl, nh, nl, dh, dl);\n\t      q |= 1;\n\t    }\n\t  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);\n\t  dh = dh >> 1;\n\t  cnt--;\n\t}\n    }\n  else\n    {\n      int cnt;\n      for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)\n\t{\n\t  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));\n\t  dl = dl << 1;\n\t}\n\n      while (cnt)\n\t{\n\t  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);\n\t  dh = dh >> 1;\n\t  q <<= 1;\n\t  if (nh > dh || (nh == dh && nl >= dl))\n\t    {\n\t      sub_ddmmss (nh, nl, nh, nl, dh, dl);\n\t      q |= 1;\n\t    }\n\t  cnt--;\n\t}\n    }\n\n  rp[0] = nl;\n  rp[1] = nh;\n\n  return q;\n}\n\n#if 0\n/* This div2 uses less branches, but it seems to nevertheless be\n   slightly slower than the above code. */\nstatic inline mp_limb_t\ndiv2 (mp_ptr rp,\n      mp_limb_t nh, mp_limb_t nl,\n      mp_limb_t dh, mp_limb_t dl)\n{\n  mp_limb_t q = 0;\n  int ncnt;\n  int dcnt;\n\n  count_leading_zeros (ncnt, nh);\n  count_leading_zeros (dcnt, dh);\n  dcnt -= ncnt;\n\n  dh = (dh << dcnt) + (-(dcnt > 0) & (dl >> (GMP_LIMB_BITS - dcnt)));\n  dl <<= dcnt;\n\n  do\n    {\n      mp_limb_t bit;\n      q <<= 1;\n      if (UNLIKELY (nh == dh))\n\tbit = (nl >= dl);\n      else\n\tbit = (nh > dh);\n\n      q |= bit;\n\n      sub_ddmmss (nh, nl, nh, nl, (-bit) & dh, (-bit) & dl);\n\n      dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);\n      dh = dh >> 1;\n    }\n  while (dcnt--);\n\n  rp[0] = nl;\n  rp[1] = nh;\n\n  return q;\n}\n#endif\n\n#else /* GMP_NAIL_BITS != 0 */\n/* Check all functions for nail support. */\n/* hgcd2 should be defined to take inputs including nail bits, and\n   produce a matrix with elements also including nail bits. This is\n   necessary, for the matrix elements to be useful with mpn_mul_1,\n   mpn_addmul_1 and friends. */\n#error Not implemented\n#endif /* GMP_NAIL_BITS != 0 */\n\n/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs\n   matrix M. Returns 1 if we make progress, i.e. can perform at least\n   one subtraction. Otherwise returns zero. */\n\n/* FIXME: Possible optimizations:\n\n   The div2 function starts with checking the most significant bit of\n   the numerator. We can maintained normalized operands here, call\n   hgcd with normalized operands only, which should make the code\n   simpler and possibly faster.\n\n   Experiment with table lookups on the most significant bits.\n\n   This function is also a candidate for assembler implementation.\n*/\nint\nmpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,\n\t   struct hgcd_matrix1 *M)\n{\n  mp_limb_t u00, u01, u10, u11;\n\n  if (ah < 2 || bh < 2)\n    return 0;\n\n  if (ah > bh || (ah == bh && al > bl))\n    {\n      sub_ddmmss (ah, al, ah, al, bh, bl);\n      if (ah < 2)\n\treturn 0;\n\n      u00 = u01 = u11 = 1;\n      u10 = 0;\n    }\n  else\n    {\n      sub_ddmmss (bh, bl, bh, bl, ah, al);\n      if (bh < 2)\n\treturn 0;\n\n      u00 = u10 = u11 = 1;\n      u01 = 0;\n    }\n\n  if (ah < bh)\n    goto subtract_a;\n\n  for (;;)\n    {\n      ASSERT (ah >= bh);\n      if (ah == bh)\n\tgoto done;\n\n      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))\n\t{\n\t  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));\n\t  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));\n\n\t  break;\n\t}\n\n      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0\n\t 1), affecting the second column of M. */\n      ASSERT (ah > bh);\n      sub_ddmmss (ah, al, ah, al, bh, bl);\n\n      if (ah < 2)\n\tgoto done;\n\n      if (ah <= bh)\n\t{\n\t  /* Use q = 1 */\n\t  u01 += u00;\n\t  u11 += u10;\n\t}\n      else\n\t{\n\t  mp_limb_t r[2];\n\t  mp_limb_t q = div2 (r, ah, al, bh, bl);\n\t  al = r[0]; ah = r[1];\n\t  if (ah < 2)\n\t    {\n\t      /* A is too small, but q is correct. */\n\t      u01 += q * u00;\n\t      u11 += q * u10;\n\t      goto done;\n\t    }\n\t  q++;\n\t  u01 += q * u00;\n\t  u11 += q * u10;\n\t}\n    subtract_a:\n      ASSERT (bh >= ah);\n      if (ah == bh)\n\tgoto done;\n\n      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))\n\t{\n\t  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));\n\t  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));\n\n\t  goto subtract_a1;\n\t}\n\n      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q\n\t 1), affecting the first column of M. */\n      sub_ddmmss (bh, bl, bh, bl, ah, al);\n\n      if (bh < 2)\n\tgoto done;\n\n      if (bh <= ah)\n\t{\n\t  /* Use q = 1 */\n\t  u00 += u01;\n\t  u10 += u11;\n\t}\n      else\n\t{\n\t  mp_limb_t r[2];\n\t  mp_limb_t q = div2 (r, bh, bl, ah, al);\n\t  bl = r[0]; bh = r[1];\n\t  if (bh < 2)\n\t    {\n\t      /* B is too small, but q is correct. */\n\t      u00 += q * u01;\n\t      u10 += q * u11;\n\t      goto done;\n\t    }\n\t  q++;\n\t  u00 += q * u01;\n\t  u10 += q * u11;\n\t}\n    }\n\n  /* NOTE: Since we discard the least significant half limb, we don't\n     get a truly maximal M (corresponding to |a - b| <\n     2^{GMP_LIMB_BITS +1}). */\n  /* Single precision loop */\n  for (;;)\n    {\n      ASSERT (ah >= bh);\n\n      ah -= bh;\n      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))\n\tbreak;\n\n      if (ah <= bh)\n\t{\n\t  /* Use q = 1 */\n\t  u01 += u00;\n\t  u11 += u10;\n\t}\n      else\n\t{\n\t  mp_limb_t r;\n\t  mp_limb_t q = div1 (&r, ah, bh);\n\t  ah = r;\n\t  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))\n\t    {\n\t      /* A is too small, but q is correct. */\n\t      u01 += q * u00;\n\t      u11 += q * u10;\n\t      break;\n\t    }\n\t  q++;\n\t  u01 += q * u00;\n\t  u11 += q * u10;\n\t}\n    subtract_a1:\n      ASSERT (bh >= ah);\n\n      bh -= ah;\n      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))\n\tbreak;\n\n      if (bh <= ah)\n\t{\n\t  /* Use q = 1 */\n\t  u00 += u01;\n\t  u10 += u11;\n\t}\n      else\n\t{\n\t  mp_limb_t r;\n\t  mp_limb_t q = div1 (&r, bh, ah);\n\t  bh = r;\n\t  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))\n\t    {\n\t      /* B is too small, but q is correct. */\n\t      u00 += q * u01;\n\t      u10 += q * u11;\n\t      break;\n\t    }\n\t  q++;\n\t  u00 += q * u01;\n\t  u10 += q * u11;\n\t}\n    }\n\n done:\n  M->u[0][0] = u00; M->u[0][1] = u01;\n  M->u[1][0] = u10; M->u[1][1] = u11;\n\n  return 1;\n}\n"
  },
  {
    "path": "mpn/generic/hgcd2_jacobi.c",
    "content": "/* hgcd2_jacobi.c\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2011 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#if GMP_NAIL_BITS > 0\n#error Nails not supported.\n#endif\n\n/* FIXME: Duplicated in hgcd2.c. Should move to gmp-impl.h, and\n   possibly be renamed. */\nstatic inline mp_limb_t\ndiv1 (mp_ptr rp,\n      mp_limb_t n0,\n      mp_limb_t d0)\n{\n  mp_limb_t q = 0;\n\n  if ((mp_limb_signed_t) n0 < 0)\n    {\n      int cnt;\n      for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)\n\t{\n\t  d0 = d0 << 1;\n\t}\n\n      q = 0;\n      while (cnt)\n\t{\n\t  q <<= 1;\n\t  if (n0 >= d0)\n\t    {\n\t      n0 = n0 - d0;\n\t      q |= 1;\n\t    }\n\t  d0 = d0 >> 1;\n\t  cnt--;\n\t}\n    }\n  else\n    {\n      int cnt;\n      for (cnt = 0; n0 >= d0; cnt++)\n\t{\n\t  d0 = d0 << 1;\n\t}\n\n      q = 0;\n      while (cnt)\n\t{\n\t  d0 = d0 >> 1;\n\t  q <<= 1;\n\t  if (n0 >= d0)\n\t    {\n\t      n0 = n0 - d0;\n\t      q |= 1;\n\t    }\n\t  cnt--;\n\t}\n    }\n  *rp = n0;\n  return q;\n}\n\n/* Two-limb division optimized for small quotients.  */\nstatic inline mp_limb_t\ndiv2 (mp_ptr rp,\n      mp_limb_t nh, mp_limb_t nl,\n      mp_limb_t dh, mp_limb_t dl)\n{\n  mp_limb_t q = 0;\n\n  if ((mp_limb_signed_t) nh < 0)\n    {\n      int cnt;\n      for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)\n\t{\n\t  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));\n\t  dl = dl << 1;\n\t}\n\n      while (cnt)\n\t{\n\t  q <<= 1;\n\t  if (nh > dh || (nh == dh && nl >= dl))\n\t    {\n\t      sub_ddmmss (nh, nl, nh, nl, dh, dl);\n\t      q |= 1;\n\t    }\n\t  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);\n\t  dh = dh >> 1;\n\t  cnt--;\n\t}\n    }\n  else\n    {\n      int cnt;\n      for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)\n\t{\n\t  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));\n\t  dl = dl << 1;\n\t}\n\n      while (cnt)\n\t{\n\t  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);\n\t  dh = dh >> 1;\n\t  q <<= 1;\n\t  if (nh > dh || (nh == dh && nl >= dl))\n\t    {\n\t      sub_ddmmss (nh, nl, nh, nl, dh, dl);\n\t      q |= 1;\n\t    }\n\t  cnt--;\n\t}\n    }\n\n  rp[0] = nl;\n  rp[1] = nh;\n\n  return q;\n}\n\nint\nmpn_hgcd2_jacobi (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,\n\t\t  struct hgcd_matrix1 *M, unsigned *bitsp)\n{\n  mp_limb_t u00, u01, u10, u11;\n  unsigned bits = *bitsp;\n\n  if (ah < 2 || bh < 2)\n    return 0;\n\n  if (ah > bh || (ah == bh && al > bl))\n    {\n      sub_ddmmss (ah, al, ah, al, bh, bl);\n      if (ah < 2)\n\treturn 0;\n\n      u00 = u01 = u11 = 1;\n      u10 = 0;\n      bits = mpn_jacobi_update (bits, 1, 1);\n    }\n  else\n    {\n      sub_ddmmss (bh, bl, bh, bl, ah, al);\n      if (bh < 2)\n\treturn 0;\n\n      u00 = u10 = u11 = 1;\n      u01 = 0;\n      bits = mpn_jacobi_update (bits, 0, 1);\n    }\n\n  if (ah < bh)\n    goto subtract_a;\n\n  for (;;)\n    {\n      ASSERT (ah >= bh);\n      if (ah == bh)\n\tgoto done;\n\n      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))\n\t{\n\t  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));\n\t  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));\n\n\t  break;\n\t}\n\n      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0\n\t 1), affecting the second column of M. */\n      ASSERT (ah > bh);\n      sub_ddmmss (ah, al, ah, al, bh, bl);\n\n      if (ah < 2)\n\tgoto done;\n\n      if (ah <= bh)\n\t{\n\t  /* Use q = 1 */\n\t  u01 += u00;\n\t  u11 += u10;\n\t  bits = mpn_jacobi_update (bits, 1, 1);\n\t}\n      else\n\t{\n\t  mp_limb_t r[2];\n\t  mp_limb_t q = div2 (r, ah, al, bh, bl);\n\t  al = r[0]; ah = r[1];\n\t  if (ah < 2)\n\t    {\n\t      /* A is too small, but q is correct. */\n\t      u01 += q * u00;\n\t      u11 += q * u10;\n\t      bits = mpn_jacobi_update (bits, 1, q & 3);\n\t      goto done;\n\t    }\n\t  q++;\n\t  u01 += q * u00;\n\t  u11 += q * u10;\n\t  bits = mpn_jacobi_update (bits, 1, q & 3);\n\t}\n    subtract_a:\n      ASSERT (bh >= ah);\n      if (ah == bh)\n\tgoto done;\n\n      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))\n\t{\n\t  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));\n\t  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));\n\n\t  goto subtract_a1;\n\t}\n\n      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q\n\t 1), affecting the first column of M. */\n      sub_ddmmss (bh, bl, bh, bl, ah, al);\n\n      if (bh < 2)\n\tgoto done;\n\n      if (bh <= ah)\n\t{\n\t  /* Use q = 1 */\n\t  u00 += u01;\n\t  u10 += u11;\n\t  bits = mpn_jacobi_update (bits, 0, 1);\n\t}\n      else\n\t{\n\t  mp_limb_t r[2];\n\t  mp_limb_t q = div2 (r, bh, bl, ah, al);\n\t  bl = r[0]; bh = r[1];\n\t  if (bh < 2)\n\t    {\n\t      /* B is too small, but q is correct. */\n\t      u00 += q * u01;\n\t      u10 += q * u11;\n\t      bits = mpn_jacobi_update (bits, 0, q & 3);\n\t      goto done;\n\t    }\n\t  q++;\n\t  u00 += q * u01;\n\t  u10 += q * u11;\n\t  bits = mpn_jacobi_update (bits, 0, q & 3);\n\t}\n    }\n\n  /* NOTE: Since we discard the least significant half limb, we don't\n     get a truly maximal M (corresponding to |a - b| <\n     2^{GMP_LIMB_BITS +1}). */\n  /* Single precision loop */\n  for (;;)\n    {\n      ASSERT (ah >= bh);\n      if (ah == bh)\n\tbreak;\n\n      ah -= bh;\n      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))\n\tbreak;\n\n      if (ah <= bh)\n\t{\n\t  /* Use q = 1 */\n\t  u01 += u00;\n\t  u11 += u10;\n\t  bits = mpn_jacobi_update (bits, 1, 1);\n\t}\n      else\n\t{\n\t  mp_limb_t r;\n\t  mp_limb_t q = div1 (&r, ah, bh);\n\t  ah = r;\n\t  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))\n\t    {\n\t      /* A is too small, but q is correct. */\n\t      u01 += q * u00;\n\t      u11 += q * u10;\n\t      bits = mpn_jacobi_update (bits, 1, q & 3);\n\t      break;\n\t    }\n\t  q++;\n\t  u01 += q * u00;\n\t  u11 += q * u10;\n\t  bits = mpn_jacobi_update (bits, 1, q & 3);\n\t}\n    subtract_a1:\n      ASSERT (bh >= ah);\n      if (ah == bh)\n\tbreak;\n\n      bh -= ah;\n      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))\n\tbreak;\n\n      if (bh <= ah)\n\t{\n\t  /* Use q = 1 */\n\t  u00 += u01;\n\t  u10 += u11;\n\t  bits = mpn_jacobi_update (bits, 0, 1);\n\t}\n      else\n\t{\n\t  mp_limb_t r;\n\t  mp_limb_t q = div1 (&r, bh, ah);\n\t  bh = r;\n\t  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))\n\t    {\n\t      /* B is too small, but q is correct. */\n\t      u00 += q * u01;\n\t      u10 += q * u11;\n\t      bits = mpn_jacobi_update (bits, 0, q & 3);\n\t      break;\n\t    }\n\t  q++;\n\t  u00 += q * u01;\n\t  u10 += q * u11;\n\t  bits = mpn_jacobi_update (bits, 0, q & 3);\n\t}\n    }\n\n done:\n  M->u[0][0] = u00; M->u[0][1] = u01;\n  M->u[1][0] = u10; M->u[1][1] = u11;\n  *bitsp = bits;\n\n  return 1;\n}\n"
  },
  {
    "path": "mpn/generic/hgcd_appr.c",
    "content": "/* hgcd_appr.c.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* Identical to mpn_hgcd_itch. FIXME: Do we really need to add\n   HGCD_THRESHOLD at the end? */\nmp_size_t\nmpn_hgcd_appr_itch (mp_size_t n)\n{\n  if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))\n    return n;\n  else\n    {\n      unsigned k;\n      int count;\n      mp_size_t nscaled;\n\n      /* Get the recursion depth. */\n      nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);\n      count_leading_zeros (count, nscaled);\n      k = GMP_LIMB_BITS - count;\n\n      return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;\n    }\n}\n\n/* Destroys inputs. */\nint\nmpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,\n\t       struct hgcd_matrix *M, mp_ptr tp)\n{\n  mp_size_t s;\n  int success = 0;\n\n  ASSERT (n > 0);\n\n  ASSERT ((ap[n-1] | bp[n-1]) != 0);\n\n  if (n <= 2)\n    /* Implies s = n. A fairly uninteresting case but exercised by the\n       random inputs of the testsuite. */\n    return 0;\n\n  ASSERT ((n+1)/2 - 1 < M->alloc);\n\n  /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time\n     we discard some of the least significant limbs, we must keep one\n     additional bit to account for the truncation error. We maintain\n     the GMP_NUMB_BITS * s - extra_bits as the current target size. */\n\n  s = n/2 + 1;\n  if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))\n    {\n      unsigned extra_bits = 0;\n\n      while (n > 2)\n\t{\n\t  mp_size_t nn;\n\n\t  ASSERT (n > s);\n\t  ASSERT (n <= 2*s);\n\n\t  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);\n\t  if (!nn)\n\t    break;\n\n\t  n = nn;\n\t  success = 1;\n\n\t  /* We can truncate and discard the lower p bits whenever nbits <=\n\t     2*sbits - p. To account for the truncation error, we must\n\t     adjust\n\n\t     sbits <-- sbits + 1 - p,\n\n\t     rather than just sbits <-- sbits - p. This adjustment makes\n\t     the produced matrix sligthly smaller than it could be. */\n\n\t  if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)\n\t    {\n\t      mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;\n\n\t      if (extra_bits == 0)\n\t\t{\n\t\t  /* We cross a limb boundary and bump s. We can't do that\n\t\t     if the result is that it makes makes min(U, V)\n\t\t     smaller than 2^{GMP_NUMB_BITS} s. */\n\t\t  if (s + 1 == n\n\t\t      || mpn_zero_p (ap + s + 1, n - s - 1)\n\t\t      || mpn_zero_p (bp + s + 1, n - s - 1))\n\t\t    continue;\n\n\t\t  extra_bits = GMP_NUMB_BITS - 1;\n\t\t  s++;\n\t\t}\n\t      else\n\t\t{\n\t\t  extra_bits--;\n\t\t}\n\n\t      /* Drop the p least significant limbs */\n\t      ap += p; bp += p; n -= p; s -= p;\n\t    }\n\t}\n\n      ASSERT (s > 0);\n\n      if (extra_bits > 0)\n\t{\n\t  /* We can get here only of we have dropped at least one of the\n\t     least significant bits, so we can decrement ap and bp. We can\n\t     then shift left extra bits using mpn_shiftr. */\n\t  /* NOTE: In the unlikely case that n is large, it would be\n\t     preferable to do an initial subdiv step to reduce the size\n\t     before shifting, but that would mean daplicating\n\t     mpn_gcd_subdiv_step with a bit count rather than a limb\n\t     count. */\n\t  ap--; bp--;\n\t  ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);\n\t  bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);\n\t  n += (ap[n] | bp[n]) > 0;\n\n\t  ASSERT (success);\n\n\t  while (n > 2)\n\t    {\n\t      mp_size_t nn;\n\n\t      ASSERT (n > s);\n\t      ASSERT (n <= 2*s);\n\n\t      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);\n\n\t      if (!nn)\n\t\treturn 1;\n\n\t      n = nn;\n\t    }\n\t}\n\n      if (n == 2)\n\t{\n\t  struct hgcd_matrix1 M1;\n\t  ASSERT (s == 1);\n\n\t  if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))\n\t    {\n\t      /* Multiply M <- M * M1 */\n\t      mpn_hgcd_matrix_mul_1 (M, &M1, tp);\n\t      success = 1;\n\t    }\n\t}\n      return success;\n    }\n  else\n    {\n      mp_size_t n2 = (3*n)/4 + 1;\n      mp_size_t p = n/2;\n      mp_size_t nn;\n\n      nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);\n      if (nn)\n\t{\n\t  n = nn;\n\t  /* FIXME: Discard some of the low limbs immediately? */\n\t  success = 1;\n\t}\n\n      while (n > n2)\n\t{\n\t  mp_size_t nn;\n\n\t  /* Needs n + 1 storage */\n\t  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);\n\t  if (!nn)\n\t    return success;\n\n\t  n = nn;\n\t  success = 1;\n\t}\n      if (n > s + 2)\n\t{\n\t  struct hgcd_matrix M1;\n\t  mp_size_t scratch;\n\n\t  p = 2*s - n + 1;\n\t  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);\n\n\t  mpn_hgcd_matrix_init(&M1, n - p, tp);\n\t  if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))\n\t    {\n\t      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */\n\t      ASSERT (M->n + 2 >= M1.n);\n\n\t      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),\n\t\t then either q or q + 1 is a correct quotient, and M1 will\n\t\t start with either (1, 0; 1, 1) or (2, 1; 1, 1). This\n\t\t rules out the case that the size of M * M1 is much\n\t\t smaller than the expected M->n + M1->n. */\n\n\t      ASSERT (M->n + M1.n < M->alloc);\n\n\t      /* We need a bound for of M->n + M1.n. Let n be the original\n\t\t input size. Then\n\n\t\t ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2\n\n\t\t and it follows that\n\n\t\t M.n + M1.n <= ceil(n/2) + 1\n\n\t\t Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the\n\t\t amount of needed scratch space. */\n\t      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);\n\t      return 1;\n\t    }\n\t}\n\n      for(;;)\n\t{\n\t  mp_size_t nn;\n\n\t  ASSERT (n > s);\n\t  ASSERT (n <= 2*s);\n\n\t  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);\n\n\t  if (!nn)\n\t    return success;\n\n\t  n = nn;\n\t  success = 1;\n\t}\n    }\n}\n"
  },
  {
    "path": "mpn/generic/hgcd_jacobi.c",
    "content": "/* hgcd_jacobi.c.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2003, 2004, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* This file is almost a copy of hgcd.c, with some added calls to\n   mpn_jacobi_update */\n\nstruct hgcd_jacobi_ctx\n{\n  struct hgcd_matrix *M;\n  unsigned *bitsp;\n};\n\nstatic void\nhgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,\n\t\t  mp_srcptr qp, mp_size_t qn, int d)\n{\n  ASSERT (!gp);\n  ASSERT (d >= 0);\n\n  MPN_NORMALIZE (qp, qn);\n  if (qn > 0)\n    {\n      struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p;\n      /* NOTES: This is a bit ugly. A tp area is passed to\n\t gcd_subdiv_step, which stores q at the start of that area. We\n\t now use the rest. */\n      mp_ptr tp = (mp_ptr) qp + qn;\n\n      mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp);\n      *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3);\n    }\n}\n\n/* Perform a few steps, using some of mpn_hgcd2, subtraction and\n   division. Reduces the size by almost one limb or more, but never\n   below the given size s. Return new size for a and b, or 0 if no\n   more steps are possible.\n\n   If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n\n   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2\n   fails, needs space for the quotient, qn <= n - s + 1 limbs, for and\n   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=\n   resulting size of M.\n\n   If N is the input size to the calling hgcd, then s = floor(N/2) +\n   1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1\n   < N, so N is sufficient.\n*/\n\nstatic mp_size_t\nhgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,\n\t\t  struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)\n{\n  struct hgcd_matrix1 M1;\n  mp_limb_t mask;\n  mp_limb_t ah, al, bh, bl;\n\n  ASSERT (n > s);\n\n  mask = ap[n-1] | bp[n-1];\n  ASSERT (mask > 0);\n\n  if (n == s + 1)\n    {\n      if (mask < 4)\n\tgoto subtract;\n\n      ah = ap[n-1]; al = ap[n-2];\n      bh = bp[n-1]; bl = bp[n-2];\n    }\n  else if (mask & GMP_NUMB_HIGHBIT)\n    {\n      ah = ap[n-1]; al = ap[n-2];\n      bh = bp[n-1]; bl = bp[n-2];\n    }\n  else\n    {\n      int shift;\n\n      count_leading_zeros (shift, mask);\n      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);\n      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);\n      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);\n      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);\n    }\n\n  /* Try an mpn_hgcd2 step */\n  if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp))\n    {\n      /* Multiply M <- M * M1 */\n      mpn_hgcd_matrix_mul_1 (M, &M1, tp);\n\n      /* Can't swap inputs, so we need to copy. */\n      MPN_COPY (tp, ap, n);\n      /* Multiply M1^{-1} (a;b) */\n      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);\n    }\n\n subtract:\n  {\n    struct hgcd_jacobi_ctx ctx;\n    ctx.M = M;\n    ctx.bitsp = bitsp;\n\n    return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp);\n  }\n}\n\n/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M\n   with elements of size at most (n+1)/2 - 1. Returns new size of a,\n   b, or zero if no reduction is possible. */\n\n/* Same scratch requirements as for mpn_hgcd. */\nmp_size_t\nmpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n,\n\t\t struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)\n{\n  mp_size_t s = n/2 + 1;\n\n  mp_size_t nn;\n  int success = 0;\n\n  if (n <= s)\n    /* Happens when n <= 2, a fairly uninteresting case but exercised\n       by the random inputs of the testsuite. */\n    return 0;\n\n  ASSERT ((ap[n-1] | bp[n-1]) > 0);\n\n  ASSERT ((n+1)/2 - 1 < M->alloc);\n\n  if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))\n    {\n      mp_size_t n2 = (3*n)/4 + 1;\n      mp_size_t p = n/2;\n\n      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp);\n      if (nn > 0)\n\t{\n\t  /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)\n\t     = 2 (n - 1) */\n\t  n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);\n\t  success = 1;\n\t}\n      while (n > n2)\n\t{\n\t  /* Needs n + 1 storage */\n\t  nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);\n\t  if (!nn)\n\t    return success ? n : 0;\n\t  n = nn;\n\t  success = 1;\n\t}\n\n      if (n > s + 2)\n\t{\n\t  struct hgcd_matrix M1;\n\t  mp_size_t scratch;\n\n\t  p = 2*s - n + 1;\n\t  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);\n\n\t  mpn_hgcd_matrix_init(&M1, n - p, tp);\n\t  nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch);\n\t  if (nn > 0)\n\t    {\n\t      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */\n\t      ASSERT (M->n + 2 >= M1.n);\n\n\t      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),\n\t\t then either q or q + 1 is a correct quotient, and M1 will\n\t\t start with either (1, 0; 1, 1) or (2, 1; 1, 1). This\n\t\t rules out the case that the size of M * M1 is much\n\t\t smaller than the expected M->n + M1->n. */\n\n\t      ASSERT (M->n + M1.n < M->alloc);\n\n\t      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)\n\t\t = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */\n\t      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);\n\n\t      /* We need a bound for of M->n + M1.n. Let n be the original\n\t\t input size. Then\n\n\t\t ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2\n\n\t\t and it follows that\n\n\t\t M.n + M1.n <= ceil(n/2) + 1\n\n\t\t Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the\n\t\t amount of needed scratch space. */\n\t      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);\n\t      success = 1;\n\t    }\n\t}\n    }\n\n  for (;;)\n    {\n      /* Needs s+3 < n */\n      nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);\n      if (!nn)\n\treturn success ? n : 0;\n\n      n = nn;\n      success = 1;\n    }\n}\n"
  },
  {
    "path": "mpn/generic/hgcd_matrix.c",
    "content": "/* hgcd_matrix.c.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2003, 2004, 2005, 2008, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* For input of size n, matrix elements are of size at most ceil(n/2)\n   - 1, but we need two limbs extra. */\nvoid\nmpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)\n{\n  mp_size_t s = (n+1)/2 + 1;\n  M->alloc = s;\n  M->n = 1;\n  MPN_ZERO (p, 4 * s);\n  M->p[0][0] = p;\n  M->p[0][1] = p + s;\n  M->p[1][0] = p + 2 * s;\n  M->p[1][1] = p + 3 * s;\n\n  M->p[0][0][0] = M->p[1][1][0] = 1;\n}\n\n/* Update column COL, adding in Q * column (1-COL). Temporary storage:\n * qn + n <= M->alloc, where n is the size of the largest element in\n * column 1 - COL. */\nvoid\nmpn_hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,\n\t\t\t  unsigned col, mp_ptr tp)\n{\n  ASSERT (col < 2);\n\n  if (qn == 1)\n    {\n      mp_limb_t q = qp[0];\n      mp_limb_t c0, c1;\n\n      c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);\n      c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);\n\n      M->p[0][col][M->n] = c0;\n      M->p[1][col][M->n] = c1;\n\n      M->n += (c0 | c1) != 0;\n    }\n  else\n    {\n      unsigned row;\n\n      /* Carries for the unlikely case that we get both high words\n\t from the multiplication and carries from the addition. */\n      mp_limb_t c[2];\n      mp_size_t n;\n\n      /* The matrix will not necessarily grow in size by qn, so we\n\t need normalization in order not to overflow M. */\n\n      for (n = M->n; n + qn > M->n; n--)\n\t{\n\t  ASSERT (n > 0);\n\t  if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)\n\t    break;\n\t}\n\n      ASSERT (qn + n <= M->alloc);\n\n      for (row = 0; row < 2; row++)\n\t{\n\t  if (qn <= n)\n\t    mpn_mul (tp, M->p[row][1-col], n, qp, qn);\n\t  else\n\t    mpn_mul (tp, qp, qn, M->p[row][1-col], n);\n\n\t  ASSERT (n + qn >= M->n);\n\t  c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);\n\t}\n\n      n += qn;\n\n      if (c[0] | c[1])\n\t{\n\t  M->p[0][col][n] = c[0];\n\t  M->p[1][col][n] = c[1];\n\t  n++;\n\t}\n      else\n\t{\n\t  n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;\n\t  ASSERT (n >= M->n);\n\t}\n      M->n = n;\n    }\n\n  ASSERT (M->n < M->alloc);\n}\n\n/* Multiply M by M1 from the right. Since the M1 elements fit in\n   GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs\n   temporary space M->n */\nvoid\nmpn_hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,\n\t\t       mp_ptr tp)\n{\n  mp_size_t n0, n1;\n\n  /* Could avoid copy by some swapping of pointers. */\n  MPN_COPY (tp, M->p[0][0], M->n);\n  n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);\n  MPN_COPY (tp, M->p[1][0], M->n);\n  n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);\n\n  /* Depends on zero initialization */\n  M->n = MAX(n0, n1);\n  ASSERT (M->n < M->alloc);\n}\n\n/* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs\n   of temporary storage (see mpn_matrix22_mul_itch). */\nvoid\nmpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,\n\t\t     mp_ptr tp)\n{\n  mp_size_t n;\n\n  /* About the new size of M:s elements. Since M1's diagonal elements\n     are > 0, no element can decrease. The new elements are of size\n     M->n + M1->n, one limb more or less. The computation of the\n     matrix product produces elements of size M->n + M1->n + 1. But\n     the true size, after normalization, may be three limbs smaller.\n\n     The reason that the product has normalized size >= M->n + M1->n -\n     2 is subtle. It depends on the fact that M and M1 can be factored\n     as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have\n     M ending with a large power and M1 starting with a large power of\n     the same matrix. */\n\n  /* FIXME: Strassen multiplication gives only a small speedup. In FFT\n     multiplication range, this function could be sped up quite a lot\n     using invariance. */\n  ASSERT (M->n + M1->n < M->alloc);\n\n  ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]\n\t   | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);\n\n  ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]\n\t   | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);\n\n  mpn_matrix22_mul (M->p[0][0], M->p[0][1],\n\t\t    M->p[1][0], M->p[1][1], M->n,\n\t\t    M1->p[0][0], M1->p[0][1],\n\t\t    M1->p[1][0], M1->p[1][1], M1->n, tp);\n\n  /* Index of last potentially non-zero limb, size is one greater. */\n  n = M->n + M1->n;\n\n  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);\n  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);\n  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);\n\n  ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);\n\n  M->n = n + 1;\n}\n\n/* Multiplies the least significant p limbs of (a;b) by M^-1.\n   Temporary space needed: 2 * (p + M->n)*/\nmp_size_t\nmpn_hgcd_matrix_adjust (const struct hgcd_matrix *M,\n\t\t\tmp_size_t n, mp_ptr ap, mp_ptr bp,\n\t\t\tmp_size_t p, mp_ptr tp)\n{\n  /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)\n     = (r11 a - r01 b; - r10 a + r00 b */\n\n  mp_ptr t0 = tp;\n  mp_ptr t1 = tp + p + M->n;\n  mp_limb_t ah, bh;\n  mp_limb_t cy;\n\n  ASSERT (p + M->n  < n);\n\n  /* First compute the two values depending on a, before overwriting a */\n\n  if (M->n >= p)\n    {\n      mpn_mul (t0, M->p[1][1], M->n, ap, p);\n      mpn_mul (t1, M->p[1][0], M->n, ap, p);\n    }\n  else\n    {\n      mpn_mul (t0, ap, p, M->p[1][1], M->n);\n      mpn_mul (t1, ap, p, M->p[1][0], M->n);\n    }\n\n  /* Update a */\n  MPN_COPY (ap, t0, p);\n  ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);\n\n  if (M->n >= p)\n    mpn_mul (t0, M->p[0][1], M->n, bp, p);\n  else\n    mpn_mul (t0, bp, p, M->p[0][1], M->n);\n\n  cy = mpn_sub (ap, ap, n, t0, p + M->n);\n  ASSERT (cy <= ah);\n  ah -= cy;\n\n  /* Update b */\n  if (M->n >= p)\n    mpn_mul (t0, M->p[0][0], M->n, bp, p);\n  else\n    mpn_mul (t0, bp, p, M->p[0][0], M->n);\n\n  MPN_COPY (bp, t0, p);\n  bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);\n  cy = mpn_sub (bp, bp, n, t1, p + M->n);\n  ASSERT (cy <= bh);\n  bh -= cy;\n\n  if (ah > 0 || bh > 0)\n    {\n      ap[n] = ah;\n      bp[n] = bh;\n      n++;\n    }\n  else\n    {\n      /* The subtraction can reduce the size by at most one limb. */\n      if (ap[n-1] == 0 && bp[n-1] == 0)\n\tn--;\n    }\n  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);\n  return n;\n}\n\n/* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must\n * have space for n + 1 limbs. Uses three buffers to avoid a copy*/\nmp_size_t\nmpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,\n\t\t\t     mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)\n{\n  mp_limb_t ah, bh;\n\n  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as\n\n     r  = u00 * a\n     r += u10 * b\n     b *= u11\n     b += u01 * a\n  */\n\n#if HAVE_NATIVE_mpn_addaddmul_1msb0\n  ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);\n  bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);\n#else\n  ah =     mpn_mul_1 (rp, ap, n, M->u[0][0]);\n  ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);\n\n  bh =     mpn_mul_1 (bp, bp, n, M->u[1][1]);\n  bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);\n#endif\n  rp[n] = ah;\n  bp[n] = bh;\n\n  n += (ah | bh) > 0;\n  return n;\n}\n"
  },
  {
    "path": "mpn/generic/hgcd_reduce.c",
    "content": "/* hgcd_reduce.c.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* Computes R -= A * B. Result must be non-negative. Normalized down\n   to size an, and resulting size is returned. */\nstatic mp_size_t\nsubmul (mp_ptr rp, mp_size_t rn,\n\tmp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)\n{\n  mp_ptr tp;\n  TMP_DECL;\n\n  ASSERT (bn > 0);\n  ASSERT (an >= bn);\n  ASSERT (rn >= an);\n  ASSERT (an + bn <= rn + 1);\n\n  TMP_MARK;\n  tp = TMP_ALLOC_LIMBS (an + bn);\n\n  mpn_mul (tp, ap, an, bp, bn);\n  if (an + bn > rn)\n    {\n      ASSERT (tp[rn] == 0);\n      bn--;\n    }\n  ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn));\n  TMP_FREE;\n\n  while (rn > an && (rp[rn-1] == 0))\n    rn--;\n\n  return rn;\n}\n\n/* Computes (a, b)  <--  M^{-1} (a; b) */\n/* FIXME:\n    x Take scratch parameter, and figure out scratch need.\n\n    x Use some fallback for small M->n?\n*/\nstatic mp_size_t\nhgcd_matrix_apply (const struct hgcd_matrix *M,\n\t\t   mp_ptr ap, mp_ptr bp,\n\t\t   mp_size_t n)\n{\n  mp_size_t an, bn, un, vn, nn;\n  mp_size_t mn[2][2];\n  mp_size_t modn;\n  mp_ptr tp, sp, scratch;\n  mp_limb_t cy;\n  unsigned i, j;\n\n  TMP_DECL;\n\n  ASSERT ( (ap[n-1] | bp[n-1]) > 0);\n\n  an = n;\n  MPN_NORMALIZE (ap, an);\n  bn = n;\n  MPN_NORMALIZE (bp, bn);\n\n  for (i = 0; i < 2; i++)\n    for (j = 0; j < 2; j++)\n      {\n\tmp_size_t k;\n\tk = M->n;\n\tMPN_NORMALIZE (M->p[i][j], k);\n\tmn[i][j] = k;\n      }\n\n  ASSERT (mn[0][0] > 0);\n  ASSERT (mn[1][1] > 0);\n  ASSERT ( (mn[0][1] | mn[1][0]) > 0);\n\n  TMP_MARK;\n\n  if (mn[0][1] == 0)\n    {\n      /* A unchanged, M = (1, 0; q, 1) */\n      ASSERT (mn[0][0] == 1);\n      ASSERT (M->p[0][0][0] == 1);\n      ASSERT (mn[1][1] == 1);\n      ASSERT (M->p[1][1][0] == 1);\n\n      /* Put B <-- B - q A */\n      nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);\n    }\n  else if (mn[1][0] == 0)\n    {\n      /* B unchanged, M = (1, q; 0, 1) */\n      ASSERT (mn[0][0] == 1);\n      ASSERT (M->p[0][0][0] == 1);\n      ASSERT (mn[1][1] == 1);\n      ASSERT (M->p[1][1][0] == 1);\n\n      /* Put A  <-- A - q * B */\n      nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);\n    }\n  else\n    {\n      /* A = m00 a + m01 b  ==> a <= A / m00, b <= A / m01.\n\t B = m10 a + m11 b  ==> a <= B / m10, b <= B / m11. */\n      un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;\n      vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;\n\n      nn = MAX (un, vn);\n      /* In the range of interest, mulmod_bnm1 should always beat mullo. */\n      modn = mpn_mulmod_bnm1_next_size (nn + 1);\n\n      scratch = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (modn, modn, M->n));\n      tp = TMP_ALLOC_LIMBS (modn);\n      sp = TMP_ALLOC_LIMBS (modn);\n\n      ASSERT (n <= 2*modn);\n\n      if (n > modn)\n\t{\n\t  cy = mpn_add (ap, ap, modn, ap + modn, n - modn);\n\t  MPN_INCR_U (ap, modn, cy);\n\n\t  cy = mpn_add (bp, bp, modn, bp + modn, n - modn);\n\t  MPN_INCR_U (bp, modn, cy);\n\n\t  n = modn;\n\t}\n\n      mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);\n      mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);\n\n      /* FIXME: Handle the small n case in some better way. */\n      if (n + mn[1][1] < modn)\n\tMPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);\n      if (n + mn[0][1] < modn)\n\tMPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);\n\n      cy = mpn_sub_n (tp, tp, sp, modn);\n      MPN_DECR_U (tp, modn, cy);\n\n      ASSERT (mpn_zero_p (tp + nn, modn - nn));\n\n      mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);\n      MPN_COPY (ap, tp, nn);\n      mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);\n\n      if (n + mn[1][0] < modn)\n\tMPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);\n      if (n + mn[0][0] < modn)\n\tMPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);\n\n      cy = mpn_sub_n (tp, tp, sp, modn);\n      MPN_DECR_U (tp, modn, cy);\n\n      ASSERT (mpn_zero_p (tp + nn, modn - nn));\n      MPN_COPY (bp, tp, nn);\n\n      while ( (ap[nn-1] | bp[nn-1]) == 0)\n\t{\n\t  nn--;\n\t  ASSERT (nn > 0);\n\t}\n    }\n  TMP_FREE;\n\n  return nn;\n}\n\nmp_size_t\nmpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p)\n{\n  mp_size_t itch;\n  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))\n    {\n      itch = mpn_hgcd_itch (n-p);\n\n      /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 *\n\t (p + ceil((n-p)/2) - 1 <= n + p - 1 */\n      if (itch < n + p - 1)\n\titch = n + p - 1;\n    }\n  else\n    {\n      itch = 2*(n-p) + mpn_hgcd_itch (n-p);\n      /* Currently, hgcd_matrix_apply allocates its own storage. */\n    }\n  return itch;\n}\n\n/* FIXME: Document storage need. */\nmp_size_t\nmpn_hgcd_reduce (struct hgcd_matrix *M,\n\t\t mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p,\n\t\t mp_ptr tp)\n{\n  mp_size_t nn;\n  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))\n    {\n      nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);\n      if (nn > 0)\n\t/* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)\n\t   = 2 (n - 1) */\n\treturn mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);\n    }\n  else\n    {\n      MPN_COPY (tp, ap + p, n - p);\n      MPN_COPY (tp + n - p, bp + p, n - p);\n      if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p)))\n\treturn hgcd_matrix_apply (M, ap, bp, n);\n    }\n  return 0;\n}\n"
  },
  {
    "path": "mpn/generic/hgcd_step.c",
    "content": "/* hgcd_step.c.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2003, 2004, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\nstatic void\nhgcd_hook (void *p, mp_srcptr gp, mp_size_t gn,\n\t   mp_srcptr qp, mp_size_t qn, int d)\n{\n  ASSERT (!gp);\n  ASSERT (d >= 0);\n  ASSERT (d <= 1);\n\n  MPN_NORMALIZE (qp, qn);\n  if (qn > 0)\n    {\n      struct hgcd_matrix *M = (struct hgcd_matrix *) p;\n      /* NOTES: This is a bit ugly. A tp area is passed to\n\t gcd_subdiv_step, which stores q at the start of that area. We\n\t now use the rest. */\n      mp_ptr tp = (mp_ptr) qp + qn;\n      mpn_hgcd_matrix_update_q (M, qp, qn, d, tp);\n    }\n}\n\n/* Perform a few steps, using some of mpn_hgcd2, subtraction and\n   division. Reduces the size by almost one limb or more, but never\n   below the given size s. Return new size for a and b, or 0 if no\n   more steps are possible.\n\n   If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n\n   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2\n   fails, needs space for the quotient, qn <= n - s limbs, for and\n   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=\n   (resulting size of M) + 1.\n\n   If N is the input size to the calling hgcd, then s = floor(N/2) +\n   1, M->n < N, qn + product size <= n - s + n - s + 1 = 2 (n - s) + 1\n   <= N.\n*/\n\nmp_size_t\nmpn_hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,\n\t       struct hgcd_matrix *M, mp_ptr tp)\n{\n  struct hgcd_matrix1 M1;\n  mp_limb_t mask;\n  mp_limb_t ah, al, bh, bl;\n\n  ASSERT (n > s);\n\n  mask = ap[n-1] | bp[n-1];\n  ASSERT (mask > 0);\n\n  if (n == s + 1)\n    {\n      if (mask < 4)\n\tgoto subtract;\n\n      ah = ap[n-1]; al = ap[n-2];\n      bh = bp[n-1]; bl = bp[n-2];\n    }\n  else if (mask & GMP_NUMB_HIGHBIT)\n    {\n      ah = ap[n-1]; al = ap[n-2];\n      bh = bp[n-1]; bl = bp[n-2];\n    }\n  else\n    {\n      int shift;\n\n      count_leading_zeros (shift, mask);\n      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);\n      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);\n      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);\n      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);\n    }\n\n  /* Try an mpn_hgcd2 step */\n  if (mpn_hgcd2 (ah, al, bh, bl, &M1))\n    {\n      /* Multiply M <- M * M1 */\n      mpn_hgcd_matrix_mul_1 (M, &M1, tp);\n\n      /* Can't swap inputs, so we need to copy. */\n      MPN_COPY (tp, ap, n);\n      /* Multiply M1^{-1} (a;b) */\n      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);\n    }\n\n subtract:\n\n  return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_hook, M, tp);\n}\n"
  },
  {
    "path": "mpn/generic/inv_div_q.c",
    "content": "/* mpn_inv_div_q -- divide-and-conquer division, returning exact quotient\n   only.\n\n   Derived from a file originally contributed to the GMP project by Torbjorn \n   Granlund and Marco Bodrato.\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.\n\nCopyright 2010 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nmp_limb_t\nmpn_inv_div_q (mp_ptr qp, mp_ptr np, mp_size_t nn,\n\t\t mp_srcptr dp, mp_size_t dn, mp_srcptr dinv)\n{\n  mp_ptr tp, wp;\n  mp_limb_t qh;\n  mp_size_t qn;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  ASSERT (dn >= 6);\n  ASSERT (nn - dn >= 3);\n  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);\n\n  tp = TMP_ALLOC_LIMBS (nn + 1);\n  MPN_COPY (tp + 1, np, nn);\n  tp[0] = 0;\n\n  qn = nn - dn;\n  wp = TMP_ALLOC_LIMBS (qn + 1);\n\n  qh = mpn_inv_divappr_q (wp, tp, nn + 1, dp, dn, dinv);\n\n  if (wp[0] == 0)\n    {\n      mp_limb_t cy;\n\n      if (qn > dn)\n\tmpn_mul (tp, wp + 1, qn, dp, dn);\n      else\n\tmpn_mul (tp, dp, dn, wp + 1, qn);\n\n      cy = (qh != 0) ? mpn_add_n (tp + qn, tp + qn, dp, dn) : 0;\n\n      if (cy || mpn_cmp (tp, np, nn) > 0) /* At most is wrong by one, no cycle. */\n\tqh -= mpn_sub_1 (qp, wp + 1, qn, 1);\n      else /* Same as below */\n\tMPN_COPY (qp, wp + 1, qn);\n    }\n  else\n    MPN_COPY (qp, wp + 1, qn);\n\n  TMP_FREE;\n  return qh;\n}\n"
  },
  {
    "path": "mpn/generic/inv_div_qr.c",
    "content": "/* mpn_inv_div_qr -- division with remainder for arbitrary\n   size operands using a precomputed inverse.\n\n   Derived from a file originally contributed to the GMP project by Torbjorn \n   Granlund.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2006, 2007, 2009 Free Software Foundation, Inc.\n\nCopyright 2010 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_limb_t\nmpn_inv_div_qr (mp_ptr qp,\n\t\t  mp_ptr np, mp_size_t nn,\n\t\t  mp_srcptr dp, mp_size_t dn,\n\t\t  mp_srcptr dinv)\n{\n  mp_size_t qn;\n  mp_limb_t qh, cy, dinv2;\n  mp_ptr tp;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  ASSERT (dn >= 6);\t\t/* to adhere to mpn_sb_div_qr's limits */\n  ASSERT (nn - dn >= 3);\t/* to adhere to mpn_sb_div_qr's limits */\n  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);\n\n  mpir_invert_pi1(dinv2, dp[dn - 1], dp[dn - 2]);\n\n  tp = TMP_ALLOC_LIMBS (DC_DIVAPPR_Q_N_ITCH(dn));\n\n  qn = nn - dn;\n  qp += qn;\n  np += nn;\n  dp += dn;\n\n  if (qn > dn)\n    {\n      /* Reduce qn mod dn without division, optimizing small operations.  */\n      do\n\tqn -= dn;\n      while (qn > dn);\n\n      qp -= qn;\t\t\t/* point at low limb of next quotient block */\n      np -= qn;\t\t\t/* point in the middle of partial remainder */\n\n      /* Perform the typically smaller block first.  */\n      if (qn == 1)\n\t{\n\t  mp_limb_t q, n2, n1, n0, d1, d0, d11, d01;\n\n\t  /* Handle qh up front, for simplicity. */\n\t  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;\n\t  if (qh)\n\t    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));\n\n\t  /* A single iteration of schoolbook: One 3/2 division,\n\t     followed by the bignum update and adjustment. */\n\t  n2 = np[0];\n\t  n1 = np[-1];\n\t  n0 = np[-2];\n\t  d1 = dp[-1];\n\t  d0 = dp[-2];\n     d01 = d0 + 1;\n     d11 = d1 + (d01 < d0);\n\n\t  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));\n\n\t  if (UNLIKELY (n2 == d1) && n1 == d0)\n\t    {\n\t      q = GMP_NUMB_MASK;\n\t      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);\n\t      ASSERT (cy == n2);\n\t    }\n\t  else\n\t    {\n\t      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv2);\n\n\t      if (dn > 2)\n\t\t{\n\t\t  mp_limb_t cy, cy1;\n\t\t  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);\n\n\t\t  cy1 = n0 < cy;\n\t\t  n0 = (n0 - cy) & GMP_NUMB_MASK;\n\t\t  cy = n1 < cy1;\n\t\t  n1 = (n1 - cy1) & GMP_NUMB_MASK;\n\t\t  np[-2] = n0;\n\n\t\t  if (UNLIKELY (cy != 0))\n\t\t    {\n\t\t      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);\n\t\t      qh -= (q == 0);\n\t\t      q = (q - 1) & GMP_NUMB_MASK;\n\t\t    }\n\t\t}\n\t      else\n\t\tnp[-2] = n0;\n\n\t      np[-1] = n1;\n\t    }\n\t  qp[0] = q;\n\t}\n      else\n\t{\n      /* Do a 2qn / qn division */\n\t  if (qn == 2)\n\t    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */\n\t  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))\n\t    qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv2);\n\t  else if (BELOW_THRESHOLD (qn, INV_DIV_QR_THRESHOLD))\n\t    qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv2, tp);\n\t  else\n         {\n   \t    mpn_invert_trunc(tp, qn, dinv, dn, dp - dn);\n           qh = mpn_inv_div_qr_n (qp, np - qn, dp - qn, qn, tp);\n         }\n\n\t  if (qn != dn)\n\t    {\n\t      if (qn > dn - qn)\n\t\tmpn_mul (tp, qp, qn, dp - dn, dn - qn);\n\t      else\n\t\tmpn_mul (tp, dp - dn, dn - qn, qp, qn);\n\n\t      cy = mpn_sub_n (np - dn, np - dn, tp, dn);\n\t      if (qh != 0)\n\t\tcy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);\n\n\t      while (cy != 0)\n\t\t{\n\t\t  qh -= mpn_sub_1 (qp, qp, qn, 1);\n\t\t  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);\n\t\t}\n\t    }\n\t}\n\n      qn = nn - dn - qn;\n      do\n\t{\n\t  qp -= dn;\n\t  np -= dn;\n\t  ASSERT_NOCARRY(mpn_inv_div_qr_n (qp, np - dn, dp - dn, dn, dinv));\n\t  qn -= dn;\n\t}\n      while (qn > 0);\n    }\n  else\n    {\n      qp -= qn;\t\t\t/* point at low limb of next quotient block */\n      np -= qn;\t\t\t/* point in the middle of partial remainder */\n\n      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))\n\tqh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv2);\n      else if (BELOW_THRESHOLD (qn, INV_DIV_QR_THRESHOLD))\n\tqh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv2, tp);\n      else\n\t{\n   \t    mpn_invert_trunc(tp, qn, dinv, dn, dp - dn);\n           qh = mpn_inv_div_qr_n (qp, np - qn, dp - qn, qn, tp);\n       }\n\n      if (qn != dn)\n\t{\n\t  if (qn > dn - qn)\n\t    mpn_mul (tp, qp, qn, dp - dn, dn - qn);\n\t  else\n\t    mpn_mul (tp, dp - dn, dn - qn, qp, qn);\n\n\t  cy = mpn_sub_n (np - dn, np - dn, tp, dn);\n\t  if (qh != 0)\n\t    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);\n\n\t  while (cy != 0)\n\t    {\n\t      qh -= mpn_sub_1 (qp, qp, qn, 1);\n\t      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);\n\t    }\n\t}\n    }\n\n  TMP_FREE;\n  return qh;\n}\n"
  },
  {
    "path": "mpn/generic/inv_div_qr_n.c",
    "content": "/* inv_div_qr_n - quotient and remainder using a precomputed inverse \n\nCopyright 2010 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* \n   Computes the quotient and remainder of { np, 2*dn } by { dp, dn }.\n   We require dp to be normalised and inv to be a precomputed inverse \n   of { dp, dn } given by mpn_invert.\n*/\nmp_limb_t \nmpn_inv_div_qr_n(mp_ptr qp, mp_ptr np, \n                           mp_srcptr dp, mp_size_t dn, mp_srcptr inv)\n{\n   mp_limb_t cy, lo, ret = 0, ret2 = 0;\n   mp_size_t m, i;\n   mp_ptr tp;\n   TMP_DECL;\n\n   TMP_MARK;\n\n   ASSERT(mpn_is_invert(inv, dp, dn));\n\n   if (mpn_cmp(np + dn, dp, dn) >= 0)\n   {\n      ret2 = 1;\n      mpn_sub_n(np + dn, np + dn, dp, dn);\n   }\n\n   tp = TMP_ALLOC_LIMBS(2*dn + 1);\n   mpn_mul(tp, np + dn - 1, dn + 1, inv, dn);\n   add_ssaaaa(cy, lo, 0, np[dn - 1], 0, tp[dn]);\n   ret += mpn_add_n(qp, tp + dn + 1, np + dn, dn);\n   ret += mpn_add_1(qp, qp, dn, cy);\n\n   /* \n      Let X = B^dn + inv, D = { dp, dn }, N = { np, 2*dn }, then\n      DX < B^{2*dn} <= D(X+1), thus\n      Let N' = { np + n - 1, n + 1 }\n\t   N'X/B^{dn+1} < B^{dn-1}N'/D <= N'X/B^{dn+1} + N'/B^{dn+1} < N'X/B^{dn+1} + 1\n      N'X/B^{dn+1} < N/D <= N'X/B^{dn+1} + 1 + 2/B\n      There is either one integer in this range, or two. However, in the latter case\n\t  the left hand bound is either an integer or < 2/B below one.\n   */\n     \n   if (UNLIKELY(ret == 1))\n   {\n      ret -= mpn_sub_1(qp, qp, dn, 1);\n      ASSERT(ret == 0);\n   }\n\n   ret -= mpn_sub_1(qp, qp, dn, 1); \n   if (UNLIKELY(ret == ~CNST_LIMB(0))) \n      ret += mpn_add_1(qp, qp, dn, 1);\n   /* ret is now guaranteed to be 0 or 1*/\n   ASSERT(ret == 0);\n\n   m = dn + 1;\n   if ((dn <= MPN_FFT_MUL_N_MINSIZE) || (ret))\n   {\n      mpn_mul_n(tp, qp, dp, dn);\n   } else\n   {\n      mp_limb_t cy, cy2;\n      \n      if (m >= FFT_MULMOD_2EXPP1_CUTOFF)\n         m = mpir_fft_adjust_limbs (m);\n      cy = mpn_mulmod_Bexpp1_fft (tp, m, qp, dn, dp, dn);\n      \n      /* cy, {tp, m} = qp * dp mod (B^m+1) */ \n      cy2 = mpn_add_n(tp, tp, np + m, 2*dn - m);\n      mpn_add_1(tp + 2*dn - m, tp + 2*dn - m, 2*m - 2*dn, cy2);\n          \n      /* Make correction */    \n      mpn_sub_1(tp, tp, m, tp[0] - dp[0]*qp[0]);\n   }\n   \n   mpn_sub_n(np, np, tp, m);\n   MPN_ZERO(np + m, 2*dn - m);\n   while (np[dn] || mpn_cmp(np, dp, dn) >= 0)\n   {\n\t   ret += mpn_add_1(qp, qp, dn, 1);\n\t   np[dn] -= mpn_sub_n(np, np, dp, dn);\n   }\n  \n   /* Not possible for ret == 2 as we have qp*dp <= np */\n   ASSERT(ret + ret2 < 2);\n\n   TMP_FREE;\n\n   return ret + ret2;\n}\n\n"
  },
  {
    "path": "mpn/generic/inv_divappr_q.c",
    "content": "/* mpn_inv_divappr_q -- divide-and-conquer division, returning approximate\n   quotient.  The quotient returned is either correct, or one too large.\n\n   Derived from a file orignally contributed to the GMP project by Torbjorn \n   Granlund.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.\n\nCopyright 2010 William Hart.\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_limb_t\nmpn_inv_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,\n\t\t     mp_srcptr dp, mp_size_t dn, mp_srcptr dinv)\n{\n  mp_size_t qn;\n  mp_limb_t qh, cy, qsave, dinv2;\n  mp_ptr tp, temp;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  ASSERT (dn >= 6);\n  ASSERT (nn > dn);\n  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);\n\n  qn = nn - dn;\n  qp += qn;\n  np += nn;\n  dp += dn;\n\n  if (qn >= dn)\n    {\n      qn++;\t\t\t/* pretend we'll need an extra limb */\n      /* Reduce qn mod dn without division, optimizing small operations.  */\n      do\n\tqn -= dn;\n      while (qn > dn);\n\n      qp -= qn;\t\t\t/* point at low limb of next quotient block */\n      np -= qn;\t\t\t/* point in the middle of partial remainder */\n\n      tp = TMP_ALLOC_LIMBS (DC_DIVAPPR_Q_N_ITCH(dn));\n\n      /* Perform the typically smaller block first.  */\n      if (qn == 1)\n\t{\n\t  mp_limb_t q, n2, n1, n0, d1, d0, d11, d01;\n\n\t  /* Handle qh up front, for simplicity. */\n\t  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;\n\t  if (qh)\n\t    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));\n\n\t  /* A single iteration of schoolbook: One 3/2 division,\n\t     followed by the bignum update and adjustment. */\n\n\t  n2 = np[0];\n\t  n1 = np[-1];\n\t  n0 = np[-2];\n\t  d1 = dp[-1];\n\t  d0 = dp[-2];\n     d01 = d0 + 1;\n     d11 = d1 + (d01 < d0);\n\n\t  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));\n\n\t  if (UNLIKELY (n2 == d1) && n1 == d0)\n\t    {\n\t      q = GMP_NUMB_MASK;\n\t      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);\n\t      ASSERT (cy == n2);\n\t    }\n\t  else\n\t    {\n\t      mpir_invert_pi1(dinv2, d1, d0);\n         udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv2);\n\n\t      if (dn > 2)\n\t\t{\n\t\t  mp_limb_t cy, cy1;\n\t\t  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);\n\n\t\t  cy1 = n0 < cy;\n\t\t  n0 = (n0 - cy) & GMP_NUMB_MASK;\n\t\t  cy = n1 < cy1;\n\t\t  n1 = (n1 - cy1) & GMP_NUMB_MASK;\n\t\t  np[-2] = n0;\n\n\t\t  if (UNLIKELY (cy != 0))\n\t\t    {\n\t\t      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);\n\t\t      qh -= (q == 0);\n\t\t      q = (q - 1) & GMP_NUMB_MASK;\n\t\t    }\n\t\t}\n\t      else\n\t\tnp[-2] = n0;\n\n\t      np[-1] = n1;\n\t    }\n\t  qp[0] = q;\n\t}\n      else\n\t{\n\t  mpir_invert_pi1(dinv2, dp[-1], dp[-2]);\n     if (qn == 2)\n\t    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);\n\t  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))\n        qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv2);\n     else if (BELOW_THRESHOLD (qn, INV_DIV_QR_THRESHOLD))\n        qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv2, tp);\n     else\n        {\n   \t    mpn_invert_trunc(tp, qn, dinv, dn, dp - dn);\n           qh = mpn_inv_div_qr_n (qp, np - qn, dp - qn, qn, tp);\n        }\n\n\t  \n      if (qn != dn)\n\t    {\n\t      if (qn > dn - qn)\n\t\tmpn_mul (tp, qp, qn, dp - dn, dn - qn);\n\t      else\n\t\tmpn_mul (tp, dp - dn, dn - qn, qp, qn);\n\n\t      cy = mpn_sub_n (np - dn, np - dn, tp, dn);\n\t      if (qh != 0)\n\t\tcy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);\n\n\t      while (cy != 0)\n\t\t{\n\t\t  qh -= mpn_sub_1 (qp, qp, qn, 1);\n\t\t  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);\n\t\t}\n\t    }\n\t}\n      qn = nn - dn - qn + 1;\n      while (qn > dn)\n\t{\n\t  qp -= dn;\n\t  np -= dn;\n      ASSERT_NOCARRY(mpn_inv_div_qr_n (qp, np - dn, dp - dn, dn, dinv));\n\t  qn -= dn;\n\t}\n\n      /* Since we pretended we'd need an extra quotient limb before, we now\n\t have made sure the code above left just dn-1=qn quotient limbs to\n\t develop.  Develop that plus a guard limb. */\n      qn--;\n      qp -= qn;\n      np -= dn;\n      qsave = qp[qn];\n      cy = mpn_inv_divappr_q_n (qp, np - dn, dp - dn, dn, dinv);\n      if (UNLIKELY(cy)) mpn_sub_1(qp, qp, dn, 1);\n      MPN_COPY_INCR (qp, qp + 1, qn);\n      qp[qn] = qsave;\n    }\n  else    /* (qn < dn) */\n    {\n      mp_ptr q2p;\n\n      qp -= qn;\t\t\t/* point at low limb of next quotient block */\n      np -= qn;\t\t\t/* point in the middle of partial remainder */\n\n      q2p = TMP_ALLOC_LIMBS (qn + 1);\n      \n      mpir_invert_pi1(dinv2, dp[-1], dp[-2]);\n      \n       if (qn == 1)\n        {\n          qh = mpn_divrem_2(q2p, 0, np - 3, 4, dp - 2);\n        }\n      else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))\n\t{\n\t  qh = mpn_sb_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),\n\t\t\t\t    dp - (qn + 1), qn + 1, dinv2);\n\t}\n      else if (BELOW_THRESHOLD (qn, INV_DIVAPPR_Q_N_THRESHOLD))\n\t{\n\t  /* It is tempting to use qp for recursive scratch and put quotient in\n\t     tp, but the recursive scratch needs one limb too many.  */\n\t  tp = TMP_ALLOC_LIMBS (DC_DIVAPPR_Q_N_ITCH(qn + 1));\n\t  qh = mpn_dc_divappr_q (q2p, np - qn - 2, 2 * (qn + 1), dp - (qn + 1), qn + 1, dinv2);\n       } \n      else \n\t{\n   \t    tp = TMP_ALLOC_LIMBS (qn + 1);\n\t    mpn_invert_trunc(tp, qn + 1, dinv, dn, dp - dn);\n           qh = mpn_inv_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, tp);\n       }\n\n\n      MPN_COPY (qp, q2p + 1, qn);\n    }\n\n  TMP_FREE;\n  return qh;\n}\n"
  },
  {
    "path": "mpn/generic/inv_divappr_q_n.c",
    "content": "/* inv_divappr_q_n - approximate quotient using a precomputed inverse \n\nCopyright 2010 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* \n   Computes an approximate quotient of { np, 2*dn } by { dp, dn } which is\n   either correct or one too large. We require dp to be normalised and inv\n   to be a precomputed inverse given by mpn_invert.\n*/\nmp_limb_t \nmpn_inv_divappr_q_n(mp_ptr qp, mp_ptr np, \n                              mp_srcptr dp, mp_size_t dn, mp_srcptr inv)\n{\n   mp_limb_t cy, lo, ret = 0, ret2 = 0;\n   mp_ptr tp;\n   TMP_DECL;\n\n   TMP_MARK;\n\n   ASSERT(dp[dn-1] & GMP_LIMB_HIGHBIT);\n   ASSERT(mpn_is_invert(inv, dp, dn));\n\n   if (mpn_cmp(np + dn, dp, dn) >= 0)\n   {\n      ret2 = 1;\n      mpn_sub_n(np + dn, np + dn, dp, dn);\n   }\n   \n   tp = TMP_ALLOC_LIMBS(2*dn + 1);\n   mpn_mul(tp, np + dn - 1, dn + 1, inv, dn);\n   add_ssaaaa(cy, lo, 0, np[dn - 1], 0, tp[dn]);\n   ret += mpn_add_n(qp, tp + dn + 1, np + dn, dn);\n   ret += mpn_add_1(qp, qp, dn, cy + 1);\n\n   /* \n      Let X = B^dn + inv, D = { dp, dn }, N = { np, 2*dn }, then\n      DX < B^{2*dn} <= D(X+1), thus\n      Let N' = { np + n - 1, n + 1 }\n\t   N'X/B^{dn+1} < B^{dn-1}N'/D <= N'X/B^{dn+1} + N'/B^{dn+1} < N'X/B^{dn+1} + 1\n      N'X/B^{dn+1} < N/D <=  N'X/B^{dn+1} + 1 + 2/B\n      There is either one integer in this range, or two. However, in the latter case\n\t  the left hand bound is either an integer or < 2/B below one.\n   */\n    \n   if (UNLIKELY(ret == 1))\n   {\n      ret -= mpn_sub_1(qp, qp, dn, 1);\n      ASSERT(ret == 0);\n   }\n  \n   if (UNLIKELY((lo == ~CNST_LIMB(0)) || (lo == ~CNST_LIMB(1)))) \n   {\n\t   /* Special case, multiply out to get accurate quotient */\n\t   ret -= mpn_sub_1(qp, qp, dn, 1);\n      if (UNLIKELY(ret == ~CNST_LIMB(0)))\n         ret += mpn_add_1(qp, qp, dn, 1);\n      \n      /* ret is now guaranteed to be 0 */\n      ASSERT(ret == 0);\n       \n      mpn_mul_n(tp, qp, dp, dn);\n      mpn_sub_n(tp, np, tp, dn+1);\n      while (tp[dn] || mpn_cmp(tp, dp, dn) >= 0)\n\t   {\n\t\t   ret += mpn_add_1(qp, qp, dn, 1);\n\t\t   tp[dn] -= mpn_sub_n(tp, tp, dp, dn);\n\t   }\n       \n      /* Not possible for ret == 2 as we have qp*dp <= np */\n      ASSERT(ret + ret2 < 2);\n   }\n\n   TMP_FREE;\n\n   return ret + ret2;\n}\n\n"
  },
  {
    "path": "mpn/generic/invert.c",
    "content": "/* floating-point Newton, with inversion in 3M(n) */\n\n/* mpn_invert\n\nCopyright 2009, 2015 Paul Zimmermann\nCopyright 2009, 2015 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <assert.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#define ZERO (mp_limb_t) 0\n#define ONE  (mp_limb_t) 1\n#define WRAP_AROUND_BOUND 1500\n\nint\nmpn_is_invert (mp_srcptr xp, mp_srcptr ap, mp_size_t n)\n{\n  int res = 1;\n  mp_size_t i;\n  mp_ptr tp, up;\n  mp_limb_t cy;\n  TMP_DECL;\n\n  TMP_MARK;\n  tp = TMP_ALLOC_LIMBS (2 * n);\n  up = TMP_ALLOC_LIMBS (2 * n);\n\n  /* first check X*A < B^(2*n) */\n  mpn_mul_n (tp, xp, ap, n);\n  cy = mpn_add_n (tp + n, tp + n, ap, n); /* A * msb(X) */\n  if (cy != 0)\n    return 0;\n\n  /* now check B^(2n) - X*A <= A */\n  mpn_not (tp, 2 * n);\n  mpn_add_1 (tp, tp, 2 * n, 1); /* B^(2n) - X*A */\n  MPN_ZERO (up, 2 * n);\n  MPN_COPY (up, ap, n);\n  res = mpn_cmp (tp, up, 2 * n) <= 0;\n  TMP_FREE;\n  return res;\n}\n\n/* Input: A = {ap, n} with most significant bit set.\n   Output: X = B^n + {xp, n} where B = 2^GMP_NUMB_BITS.\n\n   X is a lower approximation of B^(2n)/A with implicit msb.\n   More precisely, one has:\n\n              A*X < B^(2n) <= A*(X+1)\n\n   or X = ceil(B^(2n)/A) - 1.\n*/\nvoid\nmpn_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n)\n{\n  if (n == 1)\n    {\n      /* invert_limb returns min(B-1, floor(B^2/ap[0])-B),\n\t which is B-1 when ap[0]=B/2, and 1 when ap[0]=B-1.\n\t For X=B+xp[0], we have A*X < B^2 <= A*(X+1) where\n\t the equality holds only when A=B/2.\n\n\t We thus have A*X < B^2 <= A*(X+1).\n      */\n      invert_limb (xp[0], ap[0]);\n    }\n  else if (n == 2)\n    {\n      mp_limb_t tp[4], up[2], sp[2], cy;\n\n      tp[0] = ZERO;\n      invert_limb (xp[1], ap[1]);\n      tp[3] = mpn_mul_1 (tp + 1, ap, 2, xp[1]);\n      cy = mpn_add_n (tp + 2, tp + 2, ap, 2);\n      while (cy) /* Xh is too large */\n\t{\n\t  xp[1] --;\n\t  cy -= mpn_sub (tp + 1, tp + 1, 3, ap, 2);\n\t}\n      /* tp[3] should be 111...111 */\n\n      mpn_com_n (sp, tp + 1, 2);\n      cy = mpn_add_1 (sp, sp, 2, ONE);\n      /* cy should be 0 */\n\n      up[1] = mpn_mul_1 (up, sp + 1, 1, xp[1]);\n      cy = mpn_add_1 (up + 1, up + 1, 1, sp[1]);\n      /* cy should be 0 */\n      xp[0] = up[1];\n\n      /* update tp */\n      cy = mpn_addmul_1 (tp, ap, 2, xp[0]);\n      cy = mpn_add_1 (tp + 2, tp + 2, 2, cy);\n      do\n\t{\n\t  cy = mpn_add (tp, tp, 4, ap, 2);\n\t  if (cy == ZERO)\n\t    mpn_add_1 (xp, xp, 2, ONE);\n\t}\n      while (cy == ZERO);\n\n      /* now A*X < B^4 <= A*(X+1) */\n    }\n  else\n    {\n      mp_size_t l, h;\n      mp_ptr tp, up;\n      mp_limb_t cy, th;\n      TMP_DECL;\n\n      l = (n - 1) / 2;\n      h = n - l;\n\n      mpn_invert (xp + l, ap + l, h);\n\n      TMP_MARK;\n      tp = TMP_ALLOC_LIMBS (n + h);\n      up = TMP_ALLOC_LIMBS (2 * h);\n\n      if (n <= WRAP_AROUND_BOUND)\n\t{\n          mpn_mul (tp, ap, n, xp + l, h);\n          cy = mpn_add_n (tp + h, tp + h, ap, n);\n        }\n      else\n\t{\n          mp_size_t m = n + 1;\n          mpir_ui k;\n          int cc;\n\n          if (m >= FFT_MULMOD_2EXPP1_CUTOFF)\n             m = mpir_fft_adjust_limbs (m);\n          /* we have m >= n + 1 by construction, thus m > h */\n          ASSERT(m < n + h);\n          cy = mpn_mulmod_Bexpp1_fft (tp, m, ap, n, xp + l, h);\n          /* cy, {tp, m} = A * {xp + l, h} mod (B^m+1) */\n          cy += mpn_add_n (tp + h, tp + h, ap, m - h);\n          cc = mpn_sub_n (tp, tp, ap + m - h, n + h - m);\n          cc = mpn_sub_1 (tp + n + h - m, tp + n + h - m, 2 * m - n - h, cc);\n          if (cc > cy) /* can only occur if cc=1 and cy=0 */\n            cy = mpn_add_1 (tp, tp, m, ONE);\n          else\n            cy -= cc;\n          /* cy, {tp, m} = A * Xh */\n\n          /* add B^(n+h) + B^(n+h-m) */\n          MPN_ZERO (tp + m, n + h - m);\n          tp[m] = cy;\n          /* note: since tp[n+h-1] is either 0, or cy<=1 if m=n+h-1,\n             the mpn_incr_u() below cannot produce a carry */\n          mpn_incr_u (tp + n + h - m, ONE);\n          cy = 1;\n          do /* check if T >= B^(n+h) + 2*B^n */\n            {\n              mp_size_t i;\n\n              if (cy == ZERO)\n                break; /* surely T < B^(n+h) */\n              if (cy == ONE)\n                {\n                  for (i = n + h - 1; tp[i] == ZERO && i > n; i--);\n                  if (i == n && tp[i] < (mp_limb_t) 2)\n                    break;\n                }\n              /* subtract B^m+1 */\n              cy -= mpn_sub_1 (tp, tp, n + h, ONE);\n              cy -= mpn_sub_1 (tp + m, tp + m, n + h - m, ONE);\n            }\n          while (1);\n        }\n\n      while (cy)\n\t{\n\t  mpn_sub_1 (xp + l, xp + l, h, ONE);\n\t  cy -= mpn_sub (tp, tp, n + h, ap, n);\n\t}\n\n\t  /* \n\t     Note that we work with the inequality AX < B^2n < A(X+1)\n\t     as per the revised version of the paper found here:\n\t\t http://www.loria.fr/~zimmerma/papers/invert.pdf\n\t  */\n      mpn_not (tp, n);\n      mpn_add_1 (tp, tp, n, ONE);\n      mpn_mul_n (up, tp + l, xp + l, h);\n      cy = mpn_add_n (up + h, up + h, tp + l, h - l);\n      mpn_add_n (xp, up + 2*h - l, tp + h, l);\n      mpn_add_1 (xp, xp, l, cy);\n      if (up[2*h-l-1] + 3 <= CNST_LIMB(2) && !mpn_is_invert(xp, ap, n))\n         mpn_add_1 (xp, xp, n, 1); \n      TMP_FREE;\n    }\n}\n\nvoid mpn_invert_trunc(mp_ptr x_new, mp_size_t m, mp_srcptr xp, mp_size_t n, mp_srcptr ap)\n{\n  mp_ptr tp;\n  mp_limb_t cy;\n  TMP_DECL;\n\n  TMP_MARK;\n  tp = TMP_ALLOC_LIMBS (2 * m);\n  \n  MPN_COPY(x_new, xp + n - m, m);\n  ap += (n - m);\n\n  mpn_mul_n (tp, x_new, ap, m);\n  mpn_add_n (tp + m, tp + m, ap, m); /* A * msb(X) */\n  \n  /* now check B^(2n) - X*A <= A */\n  mpn_not (tp, 2 * m);\n  mpn_add_1 (tp, tp, 2 * m, 1); /* B^(2m) - X*A */\n  \n  while (tp[m] || mpn_cmp (tp, ap, m) > 0)\n  {\n     mpn_add_1(x_new, x_new, m, 1);\n     tp[m] -= mpn_sub_n(tp, tp, ap, m);\n  }\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpn/generic/ior_n.c",
    "content": "/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#define OPERATION_ior_n\t1\n\n#ifndef _MSC_VER\n\n#ifdef OPERATION_and_n\n#define func __MPN(and_n)\n#define call mpn_and_n\n#endif\n\n#ifdef OPERATION_andn_n\n#define func __MPN(andn_n)\n#define call mpn_andn_n\n#endif\n\n#ifdef OPERATION_nand_n\n#define func __MPN(nand_n)\n#define call mpn_nand_n\n#endif\n\n#ifdef OPERATION_ior_n\n#define func __MPN(ior_n)\n#define call mpn_ior_n\n#endif\n\n#ifdef OPERATION_iorn_n\n#define func __MPN(iorn_n)\n#define call mpn_iorn_n\n#endif\n\n#ifdef OPERATION_nior_n\n#define func __MPN(nior_n)\n#define call mpn_nior_n\n#endif\n\n#ifdef OPERATION_xor_n\n#define func __MPN(xor_n)\n#define call mpn_xor_n\n#endif\n\n#ifdef OPERATION_xnor_n\n#define func __MPN(xnor_n)\n#define call mpn_xnor_n\n#endif\n\nvoid\nfunc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  call (rp, up, vp, n);\n}\n\n#else\n\n#define _logicop(x) void __MPN(x ## _n)(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mpn_ ## x ## _n(rp, up, vp, n); }\n\n_logicop(ior)\n\n#endif\n"
  },
  {
    "path": "mpn/generic/iorn_n.c",
    "content": "/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#define OPERATION_iorn_n\t1\n\n#ifndef _MSC_VER\n\n#ifdef OPERATION_and_n\n#define func __MPN(and_n)\n#define call mpn_and_n\n#endif\n\n#ifdef OPERATION_andn_n\n#define func __MPN(andn_n)\n#define call mpn_andn_n\n#endif\n\n#ifdef OPERATION_nand_n\n#define func __MPN(nand_n)\n#define call mpn_nand_n\n#endif\n\n#ifdef OPERATION_ior_n\n#define func __MPN(ior_n)\n#define call mpn_ior_n\n#endif\n\n#ifdef OPERATION_iorn_n\n#define func __MPN(iorn_n)\n#define call mpn_iorn_n\n#endif\n\n#ifdef OPERATION_nior_n\n#define func __MPN(nior_n)\n#define call mpn_nior_n\n#endif\n\n#ifdef OPERATION_xor_n\n#define func __MPN(xor_n)\n#define call mpn_xor_n\n#endif\n\n#ifdef OPERATION_xnor_n\n#define func __MPN(xnor_n)\n#define call mpn_xnor_n\n#endif\n\nvoid\nfunc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  call (rp, up, vp, n);\n}\n\n#else\n\n#define _logicop(x) void __MPN(x ## _n)(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mpn_ ## x ## _n(rp, up, vp, n); }\n\n_logicop(iorn)\n\n#endif\n"
  },
  {
    "path": "mpn/generic/jacobi.c",
    "content": "/* jacobi.c\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2010, 2011 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#ifndef JACOBI_DC_THRESHOLD\n#define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD\n#endif\n\n/* Schönhage's rules:\n *\n * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3\n *\n * If r1 is odd, then\n *\n *   (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)\n *\n * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].\n *\n * If r1 is even, r2 must be odd. We have\n *\n *   (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)\n *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)\n *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)\n *\n * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating\n * q1 times gives\n *\n *   (r1 | r0) = (r1 | r2) = (r3 | r2)\n *\n * On the other hand, if r1 = 2 (mod 4), the sign factor is\n * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent\n *\n *   (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2\n *   = q1 (r0-1)/2 + q1 (q1-1)/2\n *\n * and we can summarize the even case as\n *\n *   (r1 | r0) = t(r1, r0, q1) (r3 | r2)\n *\n * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}\n *\n * What about termination? The remainder sequence ends with (0|1) = 1\n * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is\n * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and\n * hence non-zero. We may have r3 = r1 - q2 r2 = 0.\n *\n * Examples: (11|15) = - (15|11) = - (4|11)\n *            (4|11) =    (4| 3) =   (1| 3)\n *            (1| 3) = (3|1) = (0|1) = 1\n *\n *             (2|7) = (2|1) = (0|1) = 1\n *\n * Detail:     (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)\n *             (2|5) = (2-5|5) = (-1|5)(3|5) =  (5|3) =  (2|3)\n *             (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)\n *\n */\n\n/* In principle, the state consists of four variables: e (one bit), a,\n   b (two bits each), d (one bit). Collected factors are (-1)^e. a and\n   b are the least significant bits of the current remainders. d\n   (denominator) is 0 if we're currently subtracting multiplies of a\n   from b, and 1 if we're subtracting b from a.\n\n   e is stored in the least significant bit, while a, b and d are\n   coded as only 13 distinct values in bits 1-4, according to the\n   following table. For rows not mentioning d, the value is either\n   implied, or it doesn't matter. */\n\n#if WANT_ASSERT\nstatic const struct\n{\n  unsigned char a;\n  unsigned char b;\n} decode_table[13] = {\n  /*  0 */ { 0, 1 },\n  /*  1 */ { 0, 3 },\n  /*  2 */ { 1, 1 },\n  /*  3 */ { 1, 3 },\n  /*  4 */ { 2, 1 },\n  /*  5 */ { 2, 3 },\n  /*  6 */ { 3, 1 },\n  /*  7 */ { 3, 3 }, /* d = 1 */\n  /*  8 */ { 1, 0 },\n  /*  9 */ { 1, 2 },\n  /* 10 */ { 3, 0 },\n  /* 11 */ { 3, 2 },\n  /* 12 */ { 3, 3 }, /* d = 0 */\n};\n#define JACOBI_A(bits) (decode_table[(bits)>>1].a)\n#define JACOBI_B(bits) (decode_table[(bits)>>1].b)\n#endif /* WANT_ASSERT */\n\nconst unsigned char jacobi_table[208] = {\n 0, 0, 0, 0, 0,12, 8, 4, 1, 1, 1, 1, 1,13, 9, 5,\n 2, 2, 2, 2, 2, 6,10,14, 3, 3, 3, 3, 3, 7,11,15,\n 4,16, 6,18, 4, 0,12, 8, 5,17, 7,19, 5, 1,13, 9,\n 6,18, 4,16, 6,10,14, 2, 7,19, 5,17, 7,11,15, 3,\n 8,10, 9,11, 8, 4, 0,12, 9,11, 8,10, 9, 5, 1,13,\n10, 9,11, 8,10,14, 2, 6,11, 8,10, 9,11,15, 3, 7,\n12,22,24,20,12, 8, 4, 0,13,23,25,21,13, 9, 5, 1,\n25,21,13,23,14, 2, 6,10,24,20,12,22,15, 3, 7,11,\n16, 6,18, 4,16,16,16,16,17, 7,19, 5,17,17,17,17,\n18, 4,16, 6,18,22,19,23,19, 5,17, 7,19,23,18,22,\n20,12,22,24,20,20,20,20,21,13,23,25,21,21,21,21,\n22,24,20,12,22,19,23,18,23,25,21,13,23,18,22,19,\n24,20,12,22,15, 3, 7,11,25,21,13,23,14, 2, 6,10,\n};\n\n#define BITS_FAIL 31\n\nstatic void\njacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,\n\t     mp_srcptr qp, mp_size_t qn, int d)\n{\n  unsigned *bitsp = (unsigned *) p;\n\n  if (gp)\n    {\n      ASSERT (gn > 0);\n      if (gn != 1 || gp[0] != 1)\n\t{\n\t  *bitsp = BITS_FAIL;\n\t  return;\n\t}\n    }\n\n  if (qp)\n    {\n      ASSERT (qn > 0);\n      ASSERT (d >= 0);\n      *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);\n    }\n}\n\n#define CHOOSE_P(n) (2*(n) / 3)\n\nint\nmpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)\n{\n  mp_size_t scratch;\n  mp_size_t matrix_scratch;\n  mp_ptr tp;\n\n  TMP_DECL;\n\n  ASSERT (n > 0);\n  ASSERT ( (ap[n-1] | bp[n-1]) > 0);\n  ASSERT ( (bp[0] | ap[0]) & 1);\n\n  /* FIXME: Check for small sizes first, before setting up temporary\n     storage etc. */\n  scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);\n\n  if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))\n    {\n      mp_size_t hgcd_scratch;\n      mp_size_t update_scratch;\n      mp_size_t p = CHOOSE_P (n);\n      mp_size_t dc_scratch;\n\n      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);\n      hgcd_scratch = mpn_hgcd_itch (n - p);\n      update_scratch = p + n - 1;\n\n      dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);\n      if (dc_scratch > scratch)\n\tscratch = dc_scratch;\n    }\n\n  TMP_MARK;\n  tp = TMP_ALLOC_LIMBS(scratch);\n\n  while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))\n    {\n      struct hgcd_matrix M;\n      mp_size_t p = 2*n/3;\n      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);\n      mp_size_t nn;\n      mpn_hgcd_matrix_init (&M, n - p, tp);\n\n      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,\n\t\t\t    tp + matrix_scratch);\n      if (nn > 0)\n\t{\n\t  ASSERT (M.n <= (n - p - 1)/2);\n\t  ASSERT (M.n + p <= (p + n - 1) / 2);\n\t  /* Temporary storage 2 (p + M->n) <= p + n - 1. */\n\t  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);\n\t}\n      else\n\t{\n\t  /* Temporary storage n */\n\t  n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);\n\t  if (!n)\n\t    {\n\t      TMP_FREE;\n\t      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);\n\t    }\n\t}\n    }\n\n  while (n > 2)\n    {\n      struct hgcd_matrix1 M;\n      mp_limb_t ah, al, bh, bl;\n      mp_limb_t mask;\n\n      mask = ap[n-1] | bp[n-1];\n      ASSERT (mask > 0);\n\n      if (mask & GMP_NUMB_HIGHBIT)\n\t{\n\t  ah = ap[n-1]; al = ap[n-2];\n\t  bh = bp[n-1]; bl = bp[n-2];\n\t}\n      else\n\t{\n\t  int shift;\n\n\t  count_leading_zeros (shift, mask);\n\t  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);\n\t  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);\n\t  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);\n\t  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);\n\t}\n\n      /* Try an mpn_nhgcd2 step */\n      if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))\n\t{\n\t  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);\n\t  MP_PTR_SWAP (ap, tp);\n\t}\n      else\n\t{\n\t  /* mpn_hgcd2 has failed. Then either one of a or b is very\n\t     small, or the difference is very small. Perform one\n\t     subtraction followed by one division. */\n\t  n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);\n\t  if (!n)\n\t    {\n\t      TMP_FREE;\n\t      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);\n\t    }\n\t}\n    }\n\n  if (bits >= 16)\n    MP_PTR_SWAP (ap, bp);\n\n  ASSERT (bp[0] & 1);\n\n  if (n == 1)\n    {\n      mp_limb_t al, bl;\n      al = ap[0];\n      bl = bp[0];\n\n      TMP_FREE;\n      if (bl == 1)\n\treturn 1 - 2*(bits & 1);\n      else\n\treturn mpn_jacobi_base (al, bl, bits << 1);\n    }\n\n  else\n    {\n      int res = mpn_jacobi_2 (ap, bp, bits & 1);\n      TMP_FREE;\n      return res;\n    }\n}\n"
  },
  {
    "path": "mpn/generic/jacobi_2.c",
    "content": "/* jacobi_2.c\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2010 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#ifndef JACOBI_2_METHOD\n#define JACOBI_2_METHOD 2\n#endif\n\n/* Computes (a / b) where b is odd, and a and b are otherwise arbitrary\n   two-limb numbers. */\n#if JACOBI_2_METHOD == 1\nint\nmpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)\n{\n  mp_limb_t ah, al, bh, bl;\n  int c;\n\n  al = ap[0];\n  ah = ap[1];\n  bl = bp[0];\n  bh = bp[1];\n\n  ASSERT (bl & 1);\n\n  bl = ((bh << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK) | (bl >> 1);\n  bh >>= 1;\n\n  if ( (bh | bl) == 0)\n    return 1 - 2*(bit & 1);\n\n  if ( (ah | al) == 0)\n    return 0;\n\n  if (al == 0)\n    {\n      al = ah;\n      ah = 0;\n      bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));\n    }\n  count_trailing_zeros (c, al);\n  bit ^= c & (bl ^ (bl >> 1));\n\n  c++;\n  if (UNLIKELY (c == GMP_NUMB_BITS))\n    {\n      al = ah;\n      ah = 0;\n    }\n  else\n    {\n      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);\n      ah >>= c;\n    }\n  while ( (ah | bh) > 0)\n    {\n      mp_limb_t th, tl;\n      mp_limb_t bgta;\n\n      sub_ddmmss (th, tl, ah, al, bh, bl);\n      if ( (tl | th) == 0)\n\treturn 0;\n\n      bgta = LIMB_HIGHBIT_TO_MASK (th);\n\n      /* If b > a, invoke reciprocity */\n      bit ^= (bgta & al & bl);\n\n      /* b <-- min (a, b) */\n      add_ssaaaa (bh, bl, bh, bl, th & bgta, tl & bgta);\n\n      if ( (bh | bl) == 0)\n\treturn 1 - 2*(bit & 1);\n\n      /* a <-- |a - b| */\n      al = (bgta ^ tl) - bgta;\n      ah = (bgta ^ th);\n\n      if (UNLIKELY (al == 0))\n\t{\n\t  /* If b > a, al == 0 implies that we have a carry to\n\t     propagate. */\n\t  al = ah - bgta;\n\t  ah = 0;\n\t  bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));\n\t}\n      count_trailing_zeros (c, al);\n      c++;\n      bit ^= c & (bl ^ (bl >> 1));\n\n      if (UNLIKELY (c == GMP_NUMB_BITS))\n\t{\n\t  al = ah;\n\t  ah = 0;\n\t}\n      else\n\t{\n\t  al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);\n\t  ah >>= c;\n\t}\n    }\n\n  ASSERT (bl > 0);\n\n  while ( (al | bl) & GMP_LIMB_HIGHBIT)\n    {\n      /* Need an extra comparison to get the mask. */\n      mp_limb_t t = al - bl;\n      mp_limb_t bgta = - (bl > al);\n\n      if (t == 0)\n\treturn 0;\n\n      /* If b > a, invoke reciprocity */\n      bit ^= (bgta & al & bl);\n\n      /* b <-- min (a, b) */\n      bl += (bgta & t);\n\n      /* a <-- |a - b| */\n      al = (t ^ bgta) - bgta;\n\n      /* Number of trailing zeros is the same no matter if we look at\n       * t or a, but using t gives more parallelism. */\n      count_trailing_zeros (c, t);\n      c ++;\n      /* (2/b) = -1 if b = 3 or 5 mod 8 */\n      bit ^= c & (bl ^ (bl >> 1));\n\n      if (UNLIKELY (c == GMP_NUMB_BITS))\n\treturn 1 - 2*(bit & 1);\n\n      al >>= c;\n    }\n\n  /* Here we have a little impedance mismatch. Better to inline it? */\n  return mpn_jacobi_base (2*al+1, 2*bl+1, bit << 1);\n}\n#elif JACOBI_2_METHOD == 2\nint\nmpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)\n{\n  mp_limb_t ah, al, bh, bl;\n  int c;\n\n  al = ap[0];\n  ah = ap[1];\n  bl = bp[0];\n  bh = bp[1];\n\n  ASSERT (bl & 1);\n\n  /* Use bit 1. */\n  bit <<= 1;\n\n  if (bh == 0 && bl == 1)\n    /* (a|1) = 1 */\n    return 1 - (bit & 2);\n\n  if (al == 0)\n    {\n      if (ah == 0)\n\t/* (0|b) = 0, b > 1 */\n\treturn 0;\n\n      count_trailing_zeros (c, ah);\n      bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));\n\n      al = bl;\n      bl = ah >> c;\n\n      if (bl == 1)\n\t/* (1|b) = 1 */\n\treturn 1 - (bit & 2);\n\n      ah = bh;\n\n      bit ^= al & bl;\n\n      goto b_reduced;\n    }\n  if ( (al & 1) == 0)\n    {\n      count_trailing_zeros (c, al);\n\n      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);\n      ah >>= c;\n      bit ^= (c << 1) & (bl ^ (bl >> 1));\n    }\n  if (ah == 0)\n    {\n      if (bh > 0)\n\t{\n\t  bit ^= al & bl;\n\t  MP_LIMB_T_SWAP (al, bl);\n\t  ah = bh;\n\t  goto b_reduced;\n\t}\n      goto ab_reduced;\n    }\n\n  while (bh > 0)\n    {\n      /* Compute (a|b) */\n      while (ah > bh)\n\t{\n\t  sub_ddmmss (ah, al, ah, al, bh, bl);\n\t  if (al == 0)\n\t    {\n\t      count_trailing_zeros (c, ah);\n\t      bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));\n\n\t      al = bl;\n\t      bl = ah >> c;\n\t      ah = bh;\n\n\t      bit ^= al & bl;\n\t      goto b_reduced;\n\t    }\n\t  count_trailing_zeros (c, al);\n\t  bit ^= (c << 1) & (bl ^ (bl >> 1));\n\t  al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);\n\t  ah >>= c;\n\t}\n      if (ah == bh)\n\tgoto cancel_hi;\n\n      if (ah == 0)\n\t{\n\t  bit ^= al & bl;\n\t  MP_LIMB_T_SWAP (al, bl);\n\t  ah = bh;\n\t  break;\n\t}\n\n      bit ^= al & bl;\n\n      /* Compute (b|a) */\n      while (bh > ah)\n\t{\n\t  sub_ddmmss (bh, bl, bh, bl, ah, al);\n\t  if (bl == 0)\n\t    {\n\t      count_trailing_zeros (c, bh);\n\t      bit ^= ((GMP_NUMB_BITS + c) << 1) & (al ^ (al >> 1));\n\n\t      bl = bh >> c;\n\t      bit ^= al & bl;\n\t      goto b_reduced;\n\t    }\n\t  count_trailing_zeros (c, bl);\n\t  bit ^= (c << 1) & (al ^ (al >> 1));\n\t  bl = ((bh << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (bl >> c);\n\t  bh >>= c;\n\t}\n      bit ^= al & bl;\n\n      /* Compute (a|b) */\n      if (ah == bh)\n\t{\n\tcancel_hi:\n\t  if (al < bl)\n\t    {\n\t      MP_LIMB_T_SWAP (al, bl);\n\t      bit ^= al & bl;\n\t    }\n\t  al -= bl;\n\t  if (al == 0)\n\t    return 0;\n\n\t  count_trailing_zeros (c, al);\n\t  bit ^= (c << 1) & (bl ^ (bl >> 1));\n\t  al >>= c;\n\n\t  if (al == 1)\n\t    return 1 - (bit & 2);\n\n\t  MP_LIMB_T_SWAP (al, bl);\n\t  bit ^= al & bl;\n\t  break;\n\t}\n    }\n\n b_reduced:\n  /* Compute (a|b), with b a single limb. */\n  ASSERT (bl & 1);\n\n  if (bl == 1)\n    /* (a|1) = 1 */\n    return 1 - (bit & 2);\n\n  while (ah > 0)\n    {\n      ah -= (al < bl);\n      al -= bl;\n      if (al == 0)\n\t{\n\t  if (ah == 0)\n\t    return 0;\n\t  count_trailing_zeros (c, ah);\n\t  bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));\n\t  al = ah >> c;\n\t  goto ab_reduced;\n\t}\n      count_trailing_zeros (c, al);\n\n      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);\n      ah >>= c;\n      bit ^= (c << 1) & (bl ^ (bl >> 1));\n    }\n ab_reduced:\n  ASSERT (bl & 1);\n  ASSERT (bl > 1);\n\n  return mpn_jacobi_base (al, bl, bit);\n}\n#else\n#error Unsupported value for JACOBI_2_METHOD\n#endif\n"
  },
  {
    "path": "mpn/generic/jacobi_base.c",
    "content": "/* mpn_jacobi_base -- limb/limb Jacobi symbol with restricted arguments.\n\n   THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO\n   INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP.\n\nCopyright 1999, 2000, 2001, 2002, 2010 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* Use the simple loop by default.  The generic count_trailing_zeros is not\n   very fast, and the extra trickery of method 3 has proven to be less use\n   than might have been though.  */\n#ifndef JACOBI_BASE_METHOD\n#define JACOBI_BASE_METHOD  2\n#endif\n\n\n/* Use count_trailing_zeros.  */\n#if JACOBI_BASE_METHOD == 1\n#define PROCESS_TWOS_ANY                                \\\n  {                                                     \\\n    mp_limb_t  twos;                                    \\\n    count_trailing_zeros (twos, a);                     \\\n    result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b);        \\\n    a >>= twos;                                         \\\n  }\n#define PROCESS_TWOS_EVEN  PROCESS_TWOS_ANY\n#endif\n\n/* Use a simple loop.  A disadvantage of this is that there's a branch on a\n   50/50 chance of a 0 or 1 low bit.  */\n#if JACOBI_BASE_METHOD == 2\n#define PROCESS_TWOS_EVEN               \\\n  {                                     \\\n    int  two;                           \\\n    two = JACOBI_TWO_U_BIT1 (b);        \\\n    do                                  \\\n      {                                 \\\n\ta >>= 1;                        \\\n\tresult_bit1 ^= two;             \\\n\tASSERT (a != 0);                \\\n      }                                 \\\n    while ((a & 1) == 0);               \\\n  }\n#define PROCESS_TWOS_ANY        \\\n  if ((a & 1) == 0)             \\\n    PROCESS_TWOS_EVEN;\n#endif\n\n/* Process one bit arithmetically, then a simple loop.  This cuts the loop\n   condition down to a 25/75 chance, which should branch predict better.\n   The CPU will need a reasonable variable left shift.  */\n#if JACOBI_BASE_METHOD == 3\n#define PROCESS_TWOS_EVEN               \\\n  {                                     \\\n    int  two, mask, shift;              \\\n\t\t\t\t\t\\\n    two = JACOBI_TWO_U_BIT1 (b);        \\\n    mask = (~a & 2);                    \\\n    a >>= 1;                            \\\n\t\t\t\t\t\\\n    shift = (~a & 1);                   \\\n    a >>= shift;                        \\\n    result_bit1 ^= two ^ (two & mask);  \\\n\t\t\t\t\t\\\n    while ((a & 1) == 0)                \\\n      {                                 \\\n\ta >>= 1;                        \\\n\tresult_bit1 ^= two;             \\\n\tASSERT (a != 0);                \\\n      }                                 \\\n  }\n#define PROCESS_TWOS_ANY                \\\n  {                                     \\\n    int  two, mask, shift;              \\\n\t\t\t\t\t\\\n    two = JACOBI_TWO_U_BIT1 (b);        \\\n    shift = (~a & 1);                   \\\n    a >>= shift;                        \\\n\t\t\t\t\t\\\n    mask = shift << 1;                  \\\n    result_bit1 ^= (two & mask);        \\\n\t\t\t\t\t\\\n    while ((a & 1) == 0)                \\\n      {                                 \\\n\ta >>= 1;                        \\\n\tresult_bit1 ^= two;             \\\n\tASSERT (a != 0);                \\\n      }                                 \\\n  }\n#endif\n\n#if JACOBI_BASE_METHOD < 4\n/* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but\n   with a restricted range of inputs accepted, namely b>1, b odd.\n\n   The initial result_bit1 is taken as a parameter for the convenience of\n   mpz_kronecker_ui() et al.  The sign changes both here and in those\n   routines accumulate nicely in bit 1, see the JACOBI macros.\n\n   The return value here is the normal +1, 0, or -1.  Note that +1 and -1\n   have bit 1 in the \"BIT1\" sense, which could be useful if the caller is\n   accumulating it into some extended calculation.\n\n   Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be\n   possible, but a couple of tests suggest it's not a significant speedup,\n   and may even be a slowdown, so what's here is good enough for now. */\n\nint\nmpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)\n{\n  ASSERT (b & 1);  /* b odd */\n  ASSERT (b != 1);\n\n  if (a == 0)\n    return 0;\n\n  PROCESS_TWOS_ANY;\n  if (a == 1)\n    goto done;\n\n  if (a >= b)\n    goto a_gt_b;\n\n  for (;;)\n    {\n      result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);\n      MP_LIMB_T_SWAP (a, b);\n\n    a_gt_b:\n      do\n\t{\n\t  /* working on (a/b), a,b odd, a>=b */\n\t  ASSERT (a & 1);\n\t  ASSERT (b & 1);\n\t  ASSERT (a >= b);\n\n\t  if ((a -= b) == 0)\n\t    return 0;\n\n\t  PROCESS_TWOS_EVEN;\n\t  if (a == 1)\n\t    goto done;\n\t}\n      while (a >= b);\n    }\n\n done:\n  return JACOBI_BIT1_TO_PN (result_bit1);\n}\n#endif\n\n#if JACOBI_BASE_METHOD == 4\n/* Computes (a/b) for odd b > 1 and any a. The initial bit is taken as a\n * parameter. We have no need for the convention that the sign is in\n * bit 1, internally we use bit 0. */\n\n/* FIXME: Could try table-based count_trailing_zeros. */\nint\nmpn_jacobi_base (mp_limb_t a, mp_limb_t b, int bit)\n{\n  int c;\n\n  ASSERT (b & 1);\n  ASSERT (b > 1);\n\n  if (a == 0)\n    /* This is the only line which depends on b > 1 */\n    return 0;\n\n  bit >>= 1;\n\n  /* Below, we represent a and b shifted right so that the least\n     significant one bit is implicit. */\n\n  b >>= 1;\n\n  count_trailing_zeros (c, a);\n  bit ^= c & (b ^ (b >> 1));\n\n  /* We may have c==GMP_LIMB_BITS-1, so we can't use a>>c+1. */\n  a >>= c;\n  a >>= 1;\n\n  do\n    {\n      mp_limb_t t = a - b;\n      mp_limb_t bgta = LIMB_HIGHBIT_TO_MASK (t);\n\n      if (t == 0)\n\treturn 0;\n\n      /* If b > a, invoke reciprocity */\n      bit ^= (bgta & a & b);\n\n      /* b <-- min (a, b) */\n      b += (bgta & t);\n\n      /* a <-- |a - b| */\n      a = (t ^ bgta) - bgta;\n\n      /* Number of trailing zeros is the same no matter if we look at\n       * t or a, but using t gives more parallelism. */\n      count_trailing_zeros (c, t);\n      c ++;\n      /* (2/b) = -1 if b = 3 or 5 mod 8 */\n      bit ^= c & (b ^ (b >> 1));\n      a >>= c;\n    }\n  while (b > 0);\n\n  return 1-2*(bit & 1);\n}\n#endif /* JACOBI_BASE_METHOD == 4 */\n"
  },
  {
    "path": "mpn/generic/longlong_inc.h",
    "content": "/* An empty file, although later we put the generic case in here */\n"
  },
  {
    "path": "mpn/generic/lshift.c",
    "content": "/* mpn_lshift -- Shift left low level.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* Shift U (pointed to by up and n limbs long) cnt bits to the left\n   and store the n least significant limbs of the result at rp.\n   Return the bits shifted out from the most significant limb.\n\n   Argument constraints:\n   1. 0 < cnt < GMP_NUMB_BITS.\n   2. If the result is to be written over the input, rp must be >= up.\n*/\n\nmp_limb_t\nmpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)\n{\n  mp_limb_t high_limb, low_limb;\n  unsigned int tnc;\n  mp_size_t i;\n  mp_limb_t retval;\n\n  ASSERT (n >= 1);\n  ASSERT (cnt >= 1);\n  ASSERT (cnt < GMP_NUMB_BITS);\n  ASSERT (MPN_SAME_OR_DECR_P (rp, up, n));\n\n  up += n;\n  rp += n;\n\n  tnc = GMP_NUMB_BITS - cnt;\n  low_limb = *--up;\n  retval = low_limb >> tnc;\n  high_limb = (low_limb << cnt) & GMP_NUMB_MASK;\n\n  for (i = n - 1; i != 0; i--)\n    {\n      low_limb = *--up;\n      *--rp = high_limb | (low_limb >> tnc);\n      high_limb = (low_limb << cnt) & GMP_NUMB_MASK;\n    }\n  *--rp = high_limb;\n\n  return retval;\n}\n"
  },
  {
    "path": "mpn/generic/matrix22_mul.c",
    "content": "/* matrix22_mul.c.\n\n   Contributed by Niels Möller and Marco Bodrato.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2003, 2004, 2005, 2008, 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#define MUL(rp, ap, an, bp, bn) do {\t\t\\\n  if (an >= bn)\t\t\t\t\t\\\n    mpn_mul (rp, ap, an, bp, bn);\t\t\\\n  else\t\t\t\t\t\t\\\n    mpn_mul (rp, bp, bn, ap, an);\t\t\\\n} while (0)\n\n/* Inputs are unsigned. */\nstatic int\nabs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)\n{\n  int c;\n  MPN_CMP (c, ap, bp, n);\n  if (c >= 0)\n    {\n      mpn_sub_n (rp, ap, bp, n);\n      return 0;\n    }\n  else\n    {\n      mpn_sub_n (rp, bp, ap, n);\n      return 1;\n    }\n}\n\nstatic int\nadd_signed_n (mp_ptr rp,\n\t      mp_srcptr ap, int as, mp_srcptr bp, int bs, mp_size_t n)\n{\n  if (as != bs)\n    return as ^ abs_sub_n (rp, ap, bp, n);\n  else\n    {\n      ASSERT_NOCARRY (mpn_add_n (rp, ap, bp, n));\n      return as;\n    }\n}\n\nmp_size_t\nmpn_matrix22_mul_itch (mp_size_t rn, mp_size_t mn)\n{\n  if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)\n      || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))\n    return 3*rn + 2*mn;\n  else\n    return 3*(rn + mn) + 5;\n}\n\n/* Algorithm:\n\n    / s0 \\   /  1  0  0  0 \\ / r0 \\\n    | s1 |   |  0  1  0  1 | | r1 |\n    | s2 |   |  0  0 -1  1 | | r2 |\n    | s3 | = |  0  1 -1  1 | \\ r3 /\n    | s4 |   | -1  1 -1  1 |\n    | s5 |   |  0  1  0  0 |\n    \\ s6 /   \\  0  0  1  0 /\n\n    / t0 \\   /  1  0  0  0 \\ / m0 \\\n    | t1 |   |  0  1  0  1 | | m1 |\n    | t2 |   |  0  0 -1  1 | | m2 |\n    | t3 | = |  0  1 -1  1 | \\ m3 /\n    | t4 |   | -1  1 -1  1 |\n    | t5 |   |  0  1  0  0 |\n    \\ t6 /   \\  0  0  1  0 /\n\n  Note: the two matrices above are the same, but s_i and t_i are used\n  in the same product, only for i<4, see \"A Strassen-like Matrix\n  Multiplication suited for squaring and higher power computation\" by\n  M. Bodrato, in Proceedings of ISSAC 2010.\n\n    / r0 \\   / 1 0  0  0  0  1  0 \\ / s0*t0 \\\n    | r1 | = | 0 0 -1  1 -1  1  0 | | s1*t1 |\n    | r2 |   | 0 1  0 -1  0 -1 -1 | | s2*t2 |\n    \\ r3 /   \\ 0 1  1 -1  0 -1  0 / | s3*t3 |\n\t\t\t\t    | s4*t5 |\n\t\t\t\t    | s5*t6 |\n\t\t\t\t    \\ s6*t4 /\n\n  The scheduling uses two temporaries U0 and U1 to store products, and\n  two, S0 and T0, to store combinations of entries of the two\n  operands.\n*/\n\n/* Computes R = R * M. Elements are numbers R = (r0, r1; r2, r3).\n *\n * Resulting elements are of size up to rn + mn + 1.\n *\n * Temporary storage: 3 rn + 3 mn + 5. */\nvoid\nmpn_matrix22_mul_strassen (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,\n\t\t\t   mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,\n\t\t\t   mp_ptr tp)\n{\n  mp_ptr s0, t0, u0, u1;\n  int r1s, r3s, s0s, t0s, u1s;\n  s0 = tp; tp += rn + 1;\n  t0 = tp; tp += mn + 1;\n  u0 = tp; tp += rn + mn + 1;\n  u1 = tp; /* rn + mn + 2 */\n\n  MUL (u0, r1, rn, m2, mn);\t\t/* u5 = s5 * t6 */\n  r3s = abs_sub_n (r3, r3, r2, rn);\t/* r3 - r2 */\n  if (r3s)\n    {\n      r1s = abs_sub_n (r1, r1, r3, rn);\n      r1[rn] = 0;\n    }\n  else\n    {\n      r1[rn] = mpn_add_n (r1, r1, r3, rn);\n      r1s = 0;\t\t\t\t/* r1 - r2 + r3  */\n    }\n  if (r1s)\n    {\n      s0[rn] = mpn_add_n (s0, r1, r0, rn);\n      s0s = 0;\n    }\n  else if (r1[rn] != 0)\n    {\n      s0[rn] = r1[rn] - mpn_sub_n (s0, r1, r0, rn);\n      s0s = 1;\t\t\t\t/* s4 = -r0 + r1 - r2 + r3 */\n\t\t\t\t\t/* Reverse sign! */\n    }\n  else\n    {\n      s0s = abs_sub_n (s0, r0, r1, rn);\n      s0[rn] = 0;\n    }\n  MUL (u1, r0, rn, m0, mn);\t\t/* u0 = s0 * t0 */\n  r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn);\n  ASSERT (r0[rn+mn] < 2);\t\t/* u0 + u5 */\n\n  t0s = abs_sub_n (t0, m3, m2, mn);\n  u1s = r3s^t0s^1;\t\t\t/* Reverse sign! */\n  MUL (u1, r3, rn, t0, mn);\t\t/* u2 = s2 * t2 */\n  u1[rn+mn] = 0;\n  if (t0s)\n    {\n      t0s = abs_sub_n (t0, m1, t0, mn);\n      t0[mn] = 0;\n    }\n  else\n    {\n      t0[mn] = mpn_add_n (t0, t0, m1, mn);\n    }\n\n  /* FIXME: Could be simplified if we had space for rn + mn + 2 limbs\n     at r3. I'd expect that for matrices of random size, the high\n     words t0[mn] and r1[rn] are non-zero with a pretty small\n     probability. If that can be confirmed this should be done as an\n     unconditional rn x (mn+1) followed by an if (UNLIKELY (r1[rn]))\n     add_n. */\n  if (t0[mn] != 0)\n    {\n      MUL (r3, r1, rn, t0, mn + 1);\t/* u3 = s3 * t3 */\n      ASSERT (r1[rn] < 2);\n      if (r1[rn] != 0)\n\tmpn_add_n (r3 + rn, r3 + rn, t0, mn + 1);\n    }\n  else\n    {\n      MUL (r3, r1, rn + 1, t0, mn);\n    }\n\n  ASSERT (r3[rn+mn] < 4);\n\n  u0[rn+mn] = 0;\n  if (r1s^t0s)\n    {\n      r3s = abs_sub_n (r3, u0, r3, rn + mn + 1);\n    }\n  else\n    {\n      ASSERT_NOCARRY (mpn_add_n (r3, r3, u0, rn + mn + 1));\n      r3s = 0;\t\t\t\t/* u3 + u5 */\n    }\n\n  if (t0s)\n    {\n      t0[mn] = mpn_add_n (t0, t0, m0, mn);\n    }\n  else if (t0[mn] != 0)\n    {\n      t0[mn] -= mpn_sub_n (t0, t0, m0, mn);\n    }\n  else\n    {\n      t0s = abs_sub_n (t0, t0, m0, mn);\n    }\n  MUL (u0, r2, rn, t0, mn + 1);\t\t/* u6 = s6 * t4 */\n  ASSERT (u0[rn+mn] < 2);\n  if (r1s)\n    {\n      ASSERT_NOCARRY (mpn_sub_n (r1, r2, r1, rn));\n    }\n  else\n    {\n      r1[rn] += mpn_add_n (r1, r1, r2, rn);\n    }\n  rn++;\n  t0s = add_signed_n (r2, r3, r3s, u0, t0s, rn + mn);\n\t\t\t\t\t/* u3 + u5 + u6 */\n  ASSERT (r2[rn+mn-1] < 4);\n  r3s = add_signed_n (r3, r3, r3s, u1, u1s, rn + mn);\n\t\t\t\t\t/* -u2 + u3 + u5  */\n  ASSERT (r3[rn+mn-1] < 3);\n  MUL (u0, s0, rn, m1, mn);\t\t/* u4 = s4 * t5 */\n  ASSERT (u0[rn+mn-1] < 2);\n  t0[mn] = mpn_add_n (t0, m3, m1, mn);\n  MUL (u1, r1, rn, t0, mn + 1);\t\t/* u1 = s1 * t1 */\n  mn += rn;\n  ASSERT (u1[mn-1] < 4);\n  ASSERT (u1[mn] == 0);\n  ASSERT_NOCARRY (add_signed_n (r1, r3, r3s, u0, s0s, mn));\n\t\t\t\t\t/* -u2 + u3 - u4 + u5  */\n  ASSERT (r1[mn-1] < 2);\n  if (r3s)\n    {\n      ASSERT_NOCARRY (mpn_add_n (r3, u1, r3, mn));\n    }\n  else\n    {\n      ASSERT_NOCARRY (mpn_sub_n (r3, u1, r3, mn));\n\t\t\t\t\t/* u1 + u2 - u3 - u5  */\n    }\n  ASSERT (r3[mn-1] < 2);\n  if (t0s)\n    {\n      ASSERT_NOCARRY (mpn_add_n (r2, u1, r2, mn));\n    }\n  else\n    {\n      ASSERT_NOCARRY (mpn_sub_n (r2, u1, r2, mn));\n\t\t\t\t\t/* u1 - u3 - u5 - u6  */\n    }\n  ASSERT (r2[mn-1] < 2);\n}\n\nvoid\nmpn_matrix22_mul (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,\n\t\t  mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,\n\t\t  mp_ptr tp)\n{\n  if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)\n      || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))\n    {\n      mp_ptr p0, p1;\n      unsigned i;\n\n      /* Temporary storage: 3 rn + 2 mn */\n      p0 = tp + rn;\n      p1 = p0 + rn + mn;\n\n      for (i = 0; i < 2; i++)\n\t{\n\t  MPN_COPY (tp, r0, rn);\n\n\t  if (rn >= mn)\n\t    {\n\t      mpn_mul (p0, r0, rn, m0, mn);\n\t      mpn_mul (p1, r1, rn, m3, mn);\n\t      mpn_mul (r0, r1, rn, m2, mn);\n\t      mpn_mul (r1, tp, rn, m1, mn);\n\t    }\n\t  else\n\t    {\n\t      mpn_mul (p0, m0, mn, r0, rn);\n\t      mpn_mul (p1, m3, mn, r1, rn);\n\t      mpn_mul (r0, m2, mn, r1, rn);\n\t      mpn_mul (r1, m1, mn, tp, rn);\n\t    }\n\t  r0[rn+mn] = mpn_add_n (r0, r0, p0, rn + mn);\n\t  r1[rn+mn] = mpn_add_n (r1, r1, p1, rn + mn);\n\n\t  r0 = r2; r1 = r3;\n\t}\n    }\n  else\n    mpn_matrix22_mul_strassen (r0, r1, r2, r3, rn,\n\t\t\t       m0, m1, m2, m3, mn, tp);\n}\n"
  },
  {
    "path": "mpn/generic/matrix22_mul1_inverse_vector.c",
    "content": "/* matrix22_mul1_inverse_vector.c\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2008, 2010 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from\n   the left. Uses three buffers, to avoid a copy. */\nmp_size_t\nmpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *M,\n\t\t\t\t  mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)\n{\n  mp_limb_t h0, h1;\n\n  /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as\n\n     r  = u11 * a\n     r -= u01 * b\n     b *= u00\n     b -= u10 * a\n  */\n\n  h0 =    mpn_mul_1 (rp, ap, n, M->u[1][1]);\n  h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);\n  ASSERT (h0 == h1);\n\n  h0 =    mpn_mul_1 (bp, bp, n, M->u[0][0]);\n  h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);\n  ASSERT (h0 == h1);\n\n  n -= (rp[n-1] | bp[n-1]) == 0;\n  return n;\n}\n"
  },
  {
    "path": "mpn/generic/mod_1.c",
    "content": "/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --\n   Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.\n   Return the single-limb remainder.\n   There are no constraints on the value of the divisor.\n\nCopyright 1991, 1993, 1994, 1999, 2000, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,\n   meaning the quotient size where that should happen, the quotient size\n   being how many udiv divisions will be done.\n\n   The default is to use preinv always, CPUs where this doesn't suit have\n   tuned thresholds.  Note in particular that preinv should certainly be\n   used if that's the only division available (USE_PREINV_ALWAYS).  */\n\n#ifndef MOD_1_NORM_THRESHOLD\n#define MOD_1_NORM_THRESHOLD  0\n#endif\n#ifndef MOD_1_UNNORM_THRESHOLD\n#define MOD_1_UNNORM_THRESHOLD  0\n#endif\n\n\n/* The comments in mpn/generic/divrem_1.c apply here too.\n\n   As noted in the algorithms section of the manual, the shifts in the loop\n   for the unnorm case can be avoided by calculating r = a%(d*2^n), followed\n   by a final (r*2^n)%(d*2^n).  In fact if it happens that a%(d*2^n) can\n   skip a division where (a*2^n)%(d*2^n) can't then there's the same number\n   of divide steps, though how often that happens depends on the assumed\n   distributions of dividend and divisor.  In any case this idea is left to\n   CPU specific implementations to consider.  */\n\nmp_limb_t\nmpn_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d)\n{\n  mp_size_t  i;\n  mp_limb_t  n1, n0, r;\n  mp_limb_t  dummy;\n\n  ASSERT (un >= 0);\n  ASSERT (d != 0);\n\n  /* Botch: Should this be handled at all?  Rely on callers?\n     But note un==0 is currently required by mpz/fdiv_r_ui.c and possibly\n     other places.  */\n  if (un == 0)\n    return 0;\n\n  #if HAVE_NATIVE_mpn_divrem_euclidean_r_1\n  return mpn_divrem_euclidean_r_1(up,un,d);\n  #endif\n\n  d <<= GMP_NAIL_BITS;\n\n  if ((d & GMP_LIMB_HIGHBIT) != 0)\n    {\n      /* High limb is initial remainder, possibly with one subtract of\n\t d to get r<d.  */\n      r = up[un - 1] << GMP_NAIL_BITS;\n      if (r >= d)\n\tr -= d;\n      r >>= GMP_NAIL_BITS;\n      un--;\n      if (un == 0)\n\treturn r;\n\n      if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD))\n\t{\n\tplain:\n\t  for (i = un - 1; i >= 0; i--)\n\t    {\n\t      n0 = up[i] << GMP_NAIL_BITS;\n\t      udiv_qrnnd (dummy, r, r, n0, d);\n\t      r >>= GMP_NAIL_BITS;\n\t    }\n\t  return r;\n\t}\n      else\n\t{\n\t  mp_limb_t  inv;\n\t  invert_limb (inv, d);\n\t  for (i = un - 1; i >= 0; i--)\n\t    {\n\t      n0 = up[i] << GMP_NAIL_BITS;\n\t      udiv_qrnnd_preinv (dummy, r, r, n0, d, inv);\n\t      r >>= GMP_NAIL_BITS;\n\t    }\n\t  return r;\n\t}\n    }\n  else\n    {\n      int norm;\n\n      /* Skip a division if high < divisor.  Having the test here before\n\t normalizing will still skip as often as possible.  */\n      r = up[un - 1] << GMP_NAIL_BITS;\n      if (r < d)\n\t{\n\t  r >>= GMP_NAIL_BITS;\n\t  un--;\n\t  if (un == 0)\n\t    return r;\n\t}\n      else\n\tr = 0;\n\n      /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple\n\t code above. */\n      if (! UDIV_NEEDS_NORMALIZATION\n\t  && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))\n\tgoto plain;\n\n      count_leading_zeros (norm, d);\n      d <<= norm;\n\n      n1 = up[un - 1] << GMP_NAIL_BITS;\n      r = (r << norm) | (n1 >> (GMP_LIMB_BITS - norm));\n\n      if (UDIV_NEEDS_NORMALIZATION\n\t  && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))\n\t{\n\t  for (i = un - 2; i >= 0; i--)\n\t    {\n\t      n0 = up[i] << GMP_NAIL_BITS;\n\t      udiv_qrnnd (dummy, r, r,\n\t\t\t  (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),\n\t\t\t  d);\n\t      r >>= GMP_NAIL_BITS;\n\t      n1 = n0;\n\t    }\n\t  udiv_qrnnd (dummy, r, r, n1 << norm, d);\n\t  r >>= GMP_NAIL_BITS;\n\t  return r >> norm;\n\t}\n      else\n\t{\n\t  mp_limb_t inv;\n\t  invert_limb (inv, d);\n\n\t  for (i = un - 2; i >= 0; i--)\n\t    {\n\t      n0 = up[i] << GMP_NAIL_BITS;\n\t      udiv_qrnnd_preinv (dummy, r, r,\n\t\t\t\t (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),\n\t\t\t\t d, inv);\n\t      r >>= GMP_NAIL_BITS;\n\t      n1 = n0;\n\t    }\n\t  udiv_qrnnd_preinv (dummy, r, r, n1 << norm, d, inv);\n\t  r >>= GMP_NAIL_BITS;\n\t  return r >> norm;\n\t}\n    }\n}\n"
  },
  {
    "path": "mpn/generic/mod_1_1.c",
    "content": "/*   \n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* in each round we remove one limb from the body, i.e. k = 1 */\nvoid mpn_mod_1_1(mp_ptr rem, mp_srcptr xp, mp_size_t xn, mp_srcptr db)\n{\n   mp_limb_t h, l, sh, sl;\n   mp_size_t j;\n \n   ASSERT(xn >= 3);\n   ASSERT_MPN(xp, xn);\n   ASSERT_LIMB(db[0]);\n   ASSERT_LIMB(db[1]);\n\n   h = xp[xn - 1];\n   l = xp[xn - 2];\n\n   for (j = xn - 3; j >= 0; j--)\n   {\n      umul_ppmm(sh, sl, l, db[0]);\n      add_ssaaaa(sh, sl, sh, sl, 0, xp[j]);\n      umul_ppmm(h, l, h, db[1]);\n      add_ssaaaa(h, l, h, l, sh, sl);\n   }\n\n   umul_ppmm(sh, sl, h, db[0]);\n   add_ssaaaa(sh, sl, sh, sl, 0, l);\n\n   rem[0] = sl;\n   rem[1] = sh;\n}\n"
  },
  {
    "path": "mpn/generic/mod_1_2.c",
    "content": "/*   \n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* in each round we remove one limb from the body, i.e. k = 1 */\nvoid mpn_mod_1_2(mp_ptr rem, mp_srcptr xp, mp_size_t xn, mp_srcptr db)\n{\n   mp_limb_t h, l, sh, sl, th, tl;\n   mp_size_t j;\n \n   ASSERT(xn >= 4);\n   ASSERT_MPN(xp, xn);\n   ASSERT_LIMB(db[0]);\n   ASSERT_LIMB(db[1]);\n   ASSERT_LIMB(db[2]);\n\n   tl = xp[xn - 2];\n   th = xp[xn - 1];\n\n   for (j = xn - 4; j >= 0; j -= 2)\n   {\n      umul_ppmm(sh, sl, xp[j + 1], db[0]);\n      add_ssaaaa(sh, sl, sh, sl, 0, xp[j]);\n      umul_ppmm(h, l, tl, db[1]);\n      add_ssaaaa(sh, sl, sh, sl, h, l);\n      umul_ppmm(th, tl, th, db[2]);\n      add_ssaaaa(th, tl, th, tl, sh, sl);\n   }\n\n   if (j > -2) /* we have at least three limbs to do i.e. xp[0], ..., tl, th */\n   {\n      umul_ppmm(sh, sl, tl, db[0]);\n      add_ssaaaa(sh, sl, sh, sl, 0, xp[0]);\n      umul_ppmm(th, tl, th, db[1]);\n      add_ssaaaa(th, tl, th, tl, sh, sl);\n   }\n\n   umul_ppmm(h, l, th, db[0]);\n   add_ssaaaa(h, l, h, l, 0, tl);\n\n   rem[0] = l;\n   rem[1] = h;\n}\n"
  },
  {
    "path": "mpn/generic/mod_1_3.c",
    "content": "/*   \n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* in each round we remove one limb from the body, i.e. k = 1 */\nvoid mpn_mod_1_3(mp_ptr rem, mp_srcptr xp, mp_size_t xn, mp_srcptr db)\n{\n   mp_limb_t h, l, sh, sl, th, tl;\n   mp_size_t j, jj;\n \n   ASSERT(xn >= 5);\n   ASSERT_MPN(xp, xn);\n   ASSERT_LIMB(db[0]);\n   ASSERT_LIMB(db[1]);\n   ASSERT_LIMB(db[2]);\n   ASSERT_LIMB(db[3]);\n\n   tl = xp[xn - 2];\n   th = xp[xn - 1];\n\n   for (j = xn - 5; j >= 0; j -= 3)\n   {\n      umul_ppmm(sh, sl, xp[j + 1], db[0]);\n      add_ssaaaa(sh, sl, sh, sl, 0, xp[j]);\n      umul_ppmm(h, l, xp[j + 2], db[1]);\n      add_ssaaaa(sh, sl, sh, sl, h, l);\n      umul_ppmm(h, l, tl, db[2]);\n      add_ssaaaa(sh, sl, sh, sl, h, l);\n      umul_ppmm(th, tl, th, db[3]);\n      add_ssaaaa(th, tl, th, tl, sh, sl);\n   }\n\n   if (j > -3) /* we have at least three limbs to do, i.e. xp[0], ..., tl, th */\n   {\n      sh = 0;\n      sl = xp[0];\n      jj = 1;\n\n      if (j == -1)\n      {\n         umul_ppmm(sh, sl, xp[1], db[0]);\n         add_ssaaaa(sh, sl, sh, sl, 0, xp[0]);\n         jj = 2;\n      }\n\n      umul_ppmm(h, l, tl, db[jj - 1]);\n      add_ssaaaa(sh, sl, sh, sl, h, l);\n      umul_ppmm(th, tl, th, db[jj]);\n      add_ssaaaa(th, tl, th, tl, sh, sl);\n   }\n\n   umul_ppmm(h, l, th, db[0]);\n   add_ssaaaa(h, l, h, l, 0, tl);\n\n   rem[0] = l;\n   rem[1] = h;\n}\n"
  },
  {
    "path": "mpn/generic/mod_34lsub1.c",
    "content": "/* mpn_mod_34lsub1 -- remainder modulo 2^(GMP_NUMB_BITS*3/4)-1.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Calculate a remainder from {p,n} divided by 2^(GMP_NUMB_BITS*3/4)-1.\n   The remainder is not fully reduced, it's any limb value congruent to\n   {p,n} modulo that divisor.\n\n   This implementation is only correct when GMP_NUMB_BITS is a multiple of\n   4.\n\n   FIXME: If GMP_NAIL_BITS is some silly big value during development then\n   it's possible the carry accumulators c0,c1,c2 could overflow.\n\n   General notes:\n\n   The basic idea is to use a set of N accumulators (N=3 in this case) to\n   effectively get a remainder mod 2^(GMP_NUMB_BITS*N)-1 followed at the end\n   by a reduction to GMP_NUMB_BITS*N/M bits (M=4 in this case) for a\n   remainder mod 2^(GMP_NUMB_BITS*N/M)-1.  N and M are chosen to give a good\n   set of small prime factors in 2^(GMP_NUMB_BITS*N/M)-1.\n\n   N=3 M=4 suits GMP_NUMB_BITS==32 and GMP_NUMB_BITS==64 quite well, giving\n   a few more primes than a single accumulator N=1 does, and for no extra\n   cost (assuming the processor has a decent number of registers).\n\n   For strange nailified values of GMP_NUMB_BITS the idea would be to look\n   for what N and M give good primes.  With GMP_NUMB_BITS not a power of 2\n   the choices for M may be opened up a bit.  But such things are probably\n   best done in separate code, not grafted on here.  */\n\n#if GMP_NUMB_BITS % 4 == 0\n\n#define B1  (GMP_NUMB_BITS / 4)\n#define B2  (B1 * 2)\n#define B3  (B1 * 3)\n\n#define M1  ((CNST_LIMB(1) << B1) - 1)\n#define M2  ((CNST_LIMB(1) << B2) - 1)\n#define M3  ((CNST_LIMB(1) << B3) - 1)\n\n#define LOW0(n)      ((n) & M3)\n#define HIGH0(n)     ((n) >> B3)\n\n#define LOW1(n)      (((n) & M2) << B1)\n#define HIGH1(n)     ((n) >> B2)\n\n#define LOW2(n)      (((n) & M1) << B2)\n#define HIGH2(n)     ((n) >> B1)\n\n#define PARTS0(n)    (LOW0(n) + HIGH0(n))\n#define PARTS1(n)    (LOW1(n) + HIGH1(n))\n#define PARTS2(n)    (LOW2(n) + HIGH2(n))\n\n#define ADD(c,a,val)                    \\\n  do {                                  \\\n    mp_limb_t  new_c;                   \\\n    ADDC_LIMB (new_c, a, a, val);       \\\n    (c) += new_c;                       \\\n  } while (0)\n\nmp_limb_t\nmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)\n{\n  mp_limb_t  c0 = 0;\n  mp_limb_t  c1 = 0;\n  mp_limb_t  c2 = 0;\n  mp_limb_t  a0, a1, a2;\n\n  ASSERT (n >= 1);\n  ASSERT (n/3 < GMP_NUMB_MAX);\n\n  a0 = a1 = a2 = 0;\n  c0 = c1 = c2 = 0;\n\n  while ((n -= 3) >= 0)\n    {\n      ADD (c0, a0, p[0]);\n      ADD (c1, a1, p[1]);\n      ADD (c2, a2, p[2]);\n      p += 3;\n    }\n\n  if (n != -3)\n    {\n      ADD (c0, a0, p[0]);\n      if (n != -2)\n\tADD (c1, a1, p[1]);\n    }\n\n  return\n    PARTS0 (a0) + PARTS1 (a1) + PARTS2 (a2)\n    + PARTS1 (c0) + PARTS2 (c1) + PARTS0 (c2);\n}\n\n#endif\n"
  },
  {
    "path": "mpn/generic/modexact_1c_odd.c",
    "content": "/* mpn_modexact_1c_odd -- mpn by limb exact division style remainder.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* Calculate an r satisfying\n\n           r*b^k + a - c == q*d\n\n   where b=2^BITS_PER_MP_LIMB, a is {src,size}, k is either size or size-1\n   (the caller won't know which), and q is the quotient (discarded).  d must\n   be odd, c can be any limb value.\n\n   If c<d then r will be in the range 0<=r<d, or if c>=d then 0<=r<=d.\n\n   This slightly strange function suits the initial Nx1 reduction for GCDs\n   or Jacobi symbols since the factors of 2 in b^k can be ignored, leaving\n   -r == a mod d (by passing c=0).  For a GCD the factor of -1 on r can be\n   ignored, or for the Jacobi symbol it can be accounted for.  The function\n   also suits divisibility and congruence testing since if r=0 (or r=d) is\n   obtained then a==c mod d.\n\n\n   r is a bit like the remainder returned by mpn_divexact_by3c, and is the\n   sort of remainder mpn_divexact_1 might return.  Like mpn_divexact_by3c, r\n   represents a borrow, since effectively quotient limbs are chosen so that\n   subtracting that multiple of d from src at each step will produce a zero\n   limb.\n\n   A long calculation can be done piece by piece from low to high by passing\n   the return value from one part as the carry parameter to the next part.\n   The effective final k becomes anything between size and size-n, if n\n   pieces are used.\n\n\n   A similar sort of routine could be constructed based on adding multiples\n   of d at each limb, much like redc in mpz_powm does.  Subtracting however\n   has a small advantage that when subtracting to cancel out l there's never\n   a borrow into h, whereas using an addition would put a carry into h\n   depending whether l==0 or l!=0.\n\n\n   In terms of efficiency, this function is similar to a mul-by-inverse\n   mpn_mod_1.  Both are essentially two multiplies and are best suited to\n   CPUs with low latency multipliers (in comparison to a divide instruction\n   at least.)  But modexact has a few less supplementary operations, only\n   needs low part and high part multiplies, and has fewer working quantities\n   (helping CPUs with few registers).\n\n\n   In the main loop it will be noted that the new carry (call it r) is the\n   sum of the high product h and any borrow from l=s-c.  If c<d then we will\n   have r<d too, for the following reasons.  Let q=l*inverse be the quotient\n   limb, so that q*d = b*h + l, where b=2^GMP_NUMB_BITS.  Now if h=d-1 then\n\n       l = q*d - b*(d-1) <= (b-1)*d - b*(d-1) = b-d\n\n   But if l=s-c produces a borrow when c<d, then l>=b-d+1 and hence will\n   never have h=d-1 and so r=h+borrow <= d-1.\n\n   When c>=d, on the other hand, h=d-1 can certainly occur together with a\n   borrow, thereby giving only r<=d, as per the function definition above.\n\n   As a design decision it's left to the caller to check for r=d if it might\n   be passing c>=d.  Several applications have c<d initially so the extra\n   test is often unnecessary, for example the GCDs or a plain divisibility\n   d|a test will pass c=0.\n\n\n   The special case for size==1 is so that it can be assumed c<=d in the\n   high<=divisor test at the end.  c<=d is only guaranteed after at least\n   one iteration of the main loop.  There's also a decent chance one % is\n   faster than a modlimb_invert, though that will depend on the processor.\n\n   A CPU specific implementation might want to omit the size==1 code or the\n   high<divisor test.  mpn/x86/k6/mode1o.asm for instance finds neither\n   useful.  */\n\n\nmp_limb_t\nmpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d,\n                     mp_limb_t orig_c)\n{\n  mp_limb_t  s, h, l, inverse, dummy, dmul, ret;\n  mp_limb_t  c = orig_c;\n  mp_size_t  i;\n\n  ASSERT (size >= 1);\n  ASSERT (d & 1);\n  ASSERT_MPN (src, size);\n  ASSERT_LIMB (d);\n  ASSERT_LIMB (c);\n\n  if (size == 1)\n    {\n      s = src[0];\n      if (s > c)\n\t{\n\t  l = s-c;\n\t  h = l % d;\n\t  if (h != 0)\n\t    h = d - h;\n\t}\n      else\n\t{\n\t  l = c-s;\n\t  h = l % d;\n\t}\n      return h;\n    }\n\n\n  modlimb_invert (inverse, d);\n  dmul = d << GMP_NAIL_BITS;\n\n  i = 0;\n  do\n    {\n      s = src[i];\n      SUBC_LIMB (c, l, s, c);\n      l = (l * inverse) & GMP_NUMB_MASK;\n      umul_ppmm (h, dummy, l, dmul);\n      c += h;\n    }\n  while (++i < size-1);\n\n\n  s = src[i];\n  if (s <= d)\n    {\n      /* With high<=d the final step can be a subtract and addback.  If c==0\n\t then the addback will restore to l>=0.  If c==d then will get l==d\n\t if s==0, but that's ok per the function definition.  */\n\n      l = c - s;\n      if (c < s)\n\tl += d;\n\n      ret = l;\n    }\n  else\n    {\n      /* Can't skip a divide, just do the loop code once more. */\n\n      SUBC_LIMB (c, l, s, c);\n      l = (l * inverse) & GMP_NUMB_MASK;\n      umul_ppmm (h, dummy, l, dmul);\n      c += h;\n      ret = c;\n    }\n\n  ASSERT (orig_c < d ? ret < d : ret <= d);\n  return ret;\n}\n\n\n\n#if 0\n\n/* The following is an alternate form that might shave one cycle on a\n   superscalar processor since it takes c+=h off the dependent chain,\n   leaving just a low product, high product, and a subtract.\n\n   This is for CPU specific implementations to consider.  A special case for\n   high<divisor and/or size==1 can be added if desired.\n\n   Notice that c is only ever 0 or 1, since if s-c produces a borrow then\n   x=0xFF..FF and x-h cannot produce a borrow.  The c=(x>s) could become\n   c=(x==0xFF..FF) too, if that helped.  */\n\nmp_limb_t\nmpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)\n{\n  mp_limb_t  s, x, y, inverse, dummy, dmul, c1, c2;\n  mp_limb_t  c = 0;\n  mp_size_t  i;\n\n  ASSERT (size >= 1);\n  ASSERT (d & 1);\n\n  modlimb_invert (inverse, d);\n  dmul = d << GMP_NAIL_BITS;\n\n  for (i = 0; i < size; i++)\n    {\n      ASSERT (c==0 || c==1);\n\n      s = src[i];\n      SUBC_LIMB (c1, x, s, c);\n\n      SUBC_LIMB (c2, y, x, h);\n      c = c1 + c2;\n\n      y = (y * inverse) & GMP_NUMB_MASK;\n      umul_ppmm (h, dummy, y, dmul);\n    }\n\n  h += c;\n  return h;\n}\n\n#endif\n"
  },
  {
    "path": "mpn/generic/mp_bases.c",
    "content": "\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if GMP_NUMB_BITS == 32\n\nconst struct bases mp_bases[257] =\n{\n  /*   0 */ { 0, 0.0, 0 },\n  /*   1 */ { 0, 1e37, 0 },\n  /*   2 */ { 32, 1.0000000000000000, 0x1 },\n  /*   3 */ { 20, 0.6309297535714574, CNST_LIMB(0xcfd41b91), CNST_LIMB(0x3b563c24) },\n  /*   4 */ { 16, 0.5000000000000000, 0x2 },\n  /*   5 */ { 13, 0.4306765580733931, CNST_LIMB(0x48c27395), CNST_LIMB(0xc25c2684) },\n  /*   6 */ { 12, 0.3868528072345416, CNST_LIMB(0x81bf1000), CNST_LIMB(0xf91bd1b6) },\n  /*   7 */ { 11, 0.3562071871080222, CNST_LIMB(0x75db9c97), CNST_LIMB(0x1607a2cb) },\n  /*   8 */ { 10, 0.3333333333333334, 0x3 },\n  /*   9 */ { 10, 0.3154648767857287, CNST_LIMB(0xcfd41b91), CNST_LIMB(0x3b563c24) },\n  /*  10 */ { 9, 0.3010299956639811, CNST_LIMB(0x3b9aca00), CNST_LIMB(0x12e0be82) },\n  /*  11 */ { 9, 0.2890648263178878, CNST_LIMB(0x8c8b6d2b), CNST_LIMB(0xd24cde04) },\n  /*  12 */ { 8, 0.2789429456511298, CNST_LIMB(0x19a10000), CNST_LIMB(0x3fa39ab5) },\n  /*  13 */ { 8, 0.2702381544273197, CNST_LIMB(0x309f1021), CNST_LIMB(0x50f8ac5f) },\n  /*  14 */ { 8, 0.2626495350371936, CNST_LIMB(0x57f6c100), CNST_LIMB(0x74843b1e) },\n  /*  15 */ { 8, 0.2559580248098155, CNST_LIMB(0x98c29b81), CNST_LIMB(0xad0326c2) },\n  /*  16 */ { 8, 0.2500000000000000, 0x4 },\n  /*  17 */ { 7, 0.2446505421182260, CNST_LIMB(0x18754571), CNST_LIMB(0x4ef0b6bd) },\n  /*  18 */ { 7, 0.2398124665681315, CNST_LIMB(0x247dbc80), CNST_LIMB(0xc0fc48a1) },\n  /*  19 */ { 7, 0.2354089133666382, CNST_LIMB(0x3547667b), CNST_LIMB(0x33838942) },\n  /*  20 */ { 7, 0.2313782131597592, CNST_LIMB(0x4c4b4000), CNST_LIMB(0xad7f29ab) },\n  /*  21 */ { 7, 0.2276702486969530, CNST_LIMB(0x6b5a6e1d), CNST_LIMB(0x313c3d15) },\n  /*  22 */ { 7, 0.2242438242175754, CNST_LIMB(0x94ace180), CNST_LIMB(0xb8cca9e0) },\n  /*  23 */ { 7, 0.2210647294575037, CNST_LIMB(0xcaf18367), CNST_LIMB(0x42ed6de9) },\n  /*  24 */ { 6, 0.2181042919855316, CNST_LIMB(0xb640000), CNST_LIMB(0x67980e0b) },\n  /*  25 */ { 6, 0.2153382790366965, CNST_LIMB(0xe8d4a51), CNST_LIMB(0x19799812) },\n  /*  26 */ { 6, 0.2127460535533632, CNST_LIMB(0x1269ae40), CNST_LIMB(0xbce85396) },\n  /*  27 */ { 6, 0.2103099178571525, CNST_LIMB(0x17179149), CNST_LIMB(0x62c103a9) },\n  /*  28 */ { 6, 0.2080145976765095, CNST_LIMB(0x1cb91000), CNST_LIMB(0x1d353d43) },\n  /*  29 */ { 6, 0.2058468324604345, CNST_LIMB(0x23744899), CNST_LIMB(0xce1decea) },\n  /*  30 */ { 6, 0.2037950470905062, CNST_LIMB(0x2b73a840), CNST_LIMB(0x790fc511) },\n  /*  31 */ { 6, 0.2018490865820999, CNST_LIMB(0x34e63b41), CNST_LIMB(0x35b865a0) },\n  /*  32 */ { 6, 0.2000000000000000, 0x5 },\n  /*  33 */ { 6, 0.1982398631705605, CNST_LIMB(0x4cfa3cc1), CNST_LIMB(0xa9aed1b3) },\n  /*  34 */ { 6, 0.1965616322328226, CNST_LIMB(0x5c13d840), CNST_LIMB(0x63dfc229) },\n  /*  35 */ { 6, 0.1949590218937863, CNST_LIMB(0x6d91b519), CNST_LIMB(0x2b0fee30) },\n  /*  36 */ { 6, 0.1934264036172708, CNST_LIMB(0x81bf1000), CNST_LIMB(0xf91bd1b6) },\n  /*  37 */ { 6, 0.1919587200065601, CNST_LIMB(0x98ede0c9), CNST_LIMB(0xac89c3a9) },\n  /*  38 */ { 6, 0.1905514124267734, CNST_LIMB(0xb3773e40), CNST_LIMB(0x6d2c32fe) },\n  /*  39 */ { 6, 0.1892003595168700, CNST_LIMB(0xd1bbc4d1), CNST_LIMB(0x387907c9) },\n  /*  40 */ { 6, 0.1879018247091076, CNST_LIMB(0xf4240000), CNST_LIMB(0xc6f7a0b) },\n  /*  41 */ { 5, 0.1866524112389434, CNST_LIMB(0x6e7d349), CNST_LIMB(0x28928154) },\n  /*  42 */ { 5, 0.1854490234153689, CNST_LIMB(0x7ca30a0), CNST_LIMB(0x6e8629d) },\n  /*  43 */ { 5, 0.1842888331487062, CNST_LIMB(0x8c32bbb), CNST_LIMB(0xd373dca0) },\n  /*  44 */ { 5, 0.1831692509136336, CNST_LIMB(0x9d46c00), CNST_LIMB(0xa0b17895) },\n  /*  45 */ { 5, 0.1820879004699383, CNST_LIMB(0xaffacfd), CNST_LIMB(0x746811a5) },\n  /*  46 */ { 5, 0.1810425967800402, CNST_LIMB(0xc46bee0), CNST_LIMB(0x4da6500f) },\n  /*  47 */ { 5, 0.1800313266566926, CNST_LIMB(0xdab86ef), CNST_LIMB(0x2ba23582) },\n  /*  48 */ { 5, 0.1790522317510414, CNST_LIMB(0xf300000), CNST_LIMB(0xdb20a88) },\n  /*  49 */ { 5, 0.1781035935540111, CNST_LIMB(0x10d63af1), CNST_LIMB(0xe68d5ce4) },\n  /*  50 */ { 5, 0.1771838201355579, CNST_LIMB(0x12a05f20), CNST_LIMB(0xb7cdfd9d) },\n  /*  51 */ { 5, 0.1762914343888821, CNST_LIMB(0x1490aae3), CNST_LIMB(0x8e583933) },\n  /*  52 */ { 5, 0.1754250635819545, CNST_LIMB(0x16a97400), CNST_LIMB(0x697cc3ea) },\n  /*  53 */ { 5, 0.1745834300480449, CNST_LIMB(0x18ed2825), CNST_LIMB(0x48a5ca6c) },\n  /*  54 */ { 5, 0.1737653428714400, CNST_LIMB(0x1b5e4d60), CNST_LIMB(0x2b52db16) },\n  /*  55 */ { 5, 0.1729696904450771, CNST_LIMB(0x1dff8297), CNST_LIMB(0x111586a6) },\n  /*  56 */ { 5, 0.1721954337940981, CNST_LIMB(0x20d38000), CNST_LIMB(0xf31d2b36) },\n  /*  57 */ { 5, 0.1714416005739134, CNST_LIMB(0x23dd1799), CNST_LIMB(0xc8d76d19) },\n  /*  58 */ { 5, 0.1707072796637201, CNST_LIMB(0x271f35a0), CNST_LIMB(0xa2cb1eb4) },\n  /*  59 */ { 5, 0.1699916162869140, CNST_LIMB(0x2a9ce10b), CNST_LIMB(0x807c3ec3) },\n  /*  60 */ { 5, 0.1692938075987814, CNST_LIMB(0x2e593c00), CNST_LIMB(0x617ec8bf) },\n  /*  61 */ { 5, 0.1686130986895011, CNST_LIMB(0x3257844d), CNST_LIMB(0x45746cbe) },\n  /*  62 */ { 5, 0.1679487789570419, CNST_LIMB(0x369b13e0), CNST_LIMB(0x2c0aa273) },\n  /*  63 */ { 5, 0.1673001788101741, CNST_LIMB(0x3b27613f), CNST_LIMB(0x14f90805) },\n  /*  64 */ { 5, 0.1666666666666667, 0x6 },\n  /*  65 */ { 5, 0.1660476462159378, CNST_LIMB(0x4528a141), CNST_LIMB(0xd9cf0829) },\n  /*  66 */ { 5, 0.1654425539190583, CNST_LIMB(0x4aa51420), CNST_LIMB(0xb6fc4841) },\n  /*  67 */ { 5, 0.1648508567221604, CNST_LIMB(0x50794633), CNST_LIMB(0x973054cb) },\n  /*  68 */ { 5, 0.1642720499620502, CNST_LIMB(0x56a94400), CNST_LIMB(0x7a1dbe4b) },\n  /*  69 */ { 5, 0.1637056554452156, CNST_LIMB(0x5d393975), CNST_LIMB(0x5f7fcd7f) },\n  /*  70 */ { 5, 0.1631512196835108, CNST_LIMB(0x642d7260), CNST_LIMB(0x47196c84) },\n  /*  71 */ { 5, 0.1626083122716341, CNST_LIMB(0x6b8a5ae7), CNST_LIMB(0x30b43635) },\n  /*  72 */ { 5, 0.1620765243931223, CNST_LIMB(0x73548000), CNST_LIMB(0x1c1fa5f6) },\n  /*  73 */ { 5, 0.1615554674429964, CNST_LIMB(0x7b908fe9), CNST_LIMB(0x930634a) },\n  /*  74 */ { 5, 0.1610447717564444, CNST_LIMB(0x84435aa0), CNST_LIMB(0xef7f4a3c) },\n  /*  75 */ { 5, 0.1605440854340214, CNST_LIMB(0x8d71d25b), CNST_LIMB(0xcf5552d2) },\n  /*  76 */ { 5, 0.1600530732548214, CNST_LIMB(0x97210c00), CNST_LIMB(0xb1a47c8e) },\n  /*  77 */ { 5, 0.1595714156699382, CNST_LIMB(0xa1563f9d), CNST_LIMB(0x9634b43e) },\n  /*  78 */ { 5, 0.1590988078692942, CNST_LIMB(0xac16c8e0), CNST_LIMB(0x7cd3817d) },\n  /*  79 */ { 5, 0.1586349589155960, CNST_LIMB(0xb768278f), CNST_LIMB(0x65536761) },\n  /*  80 */ { 5, 0.1581795909397823, CNST_LIMB(0xc3500000), CNST_LIMB(0x4f8b588e) },\n  /*  81 */ { 5, 0.1577324383928644, CNST_LIMB(0xcfd41b91), CNST_LIMB(0x3b563c24) },\n  /*  82 */ { 5, 0.1572932473495469, CNST_LIMB(0xdcfa6920), CNST_LIMB(0x28928154) },\n  /*  83 */ { 5, 0.1568617748594410, CNST_LIMB(0xeac8fd83), CNST_LIMB(0x1721bfb0) },\n  /*  84 */ { 5, 0.1564377883420716, CNST_LIMB(0xf9461400), CNST_LIMB(0x6e8629d) },\n  /*  85 */ { 4, 0.1560210650222250, CNST_LIMB(0x31c84b1), CNST_LIMB(0x491cc17c) },\n  /*  86 */ { 4, 0.1556113914024940, CNST_LIMB(0x342ab10), CNST_LIMB(0x3a11d83b) },\n  /*  87 */ { 4, 0.1552085627701551, CNST_LIMB(0x36a2c21), CNST_LIMB(0x2be074cd) },\n  /*  88 */ { 4, 0.1548123827357682, CNST_LIMB(0x3931000), CNST_LIMB(0x1e7a02e7) },\n  /*  89 */ { 4, 0.1544226628011101, CNST_LIMB(0x3bd5ee1), CNST_LIMB(0x11d10edd) },\n  /*  90 */ { 4, 0.1540392219542636, CNST_LIMB(0x3e92110), CNST_LIMB(0x5d92c68) },\n  /*  91 */ { 4, 0.1536618862898642, CNST_LIMB(0x4165ef1), CNST_LIMB(0xf50dbfb2) },\n  /*  92 */ { 4, 0.1532904886526781, CNST_LIMB(0x4452100), CNST_LIMB(0xdf9f1316) },\n  /*  93 */ { 4, 0.1529248683028321, CNST_LIMB(0x4756fd1), CNST_LIMB(0xcb52a684) },\n  /*  94 */ { 4, 0.1525648706011593, CNST_LIMB(0x4a75410), CNST_LIMB(0xb8163e97) },\n  /*  95 */ { 4, 0.1522103467132434, CNST_LIMB(0x4dad681), CNST_LIMB(0xa5d8f269) },\n  /*  96 */ { 4, 0.1518611533308632, CNST_LIMB(0x5100000), CNST_LIMB(0x948b0fcd) },\n  /*  97 */ { 4, 0.1515171524096389, CNST_LIMB(0x546d981), CNST_LIMB(0x841e0215) },\n  /*  98 */ { 4, 0.1511782109217764, CNST_LIMB(0x57f6c10), CNST_LIMB(0x74843b1e) },\n  /*  99 */ { 4, 0.1508442006228941, CNST_LIMB(0x5b9c0d1), CNST_LIMB(0x65b11e6e) },\n  /* 100 */ { 4, 0.1505149978319906, CNST_LIMB(0x5f5e100), CNST_LIMB(0x5798ee23) },\n  /* 101 */ { 4, 0.1501904832236879, CNST_LIMB(0x633d5f1), CNST_LIMB(0x4a30b99b) },\n  /* 102 */ { 4, 0.1498705416319474, CNST_LIMB(0x673a910), CNST_LIMB(0x3d6e4d94) },\n  /* 103 */ { 4, 0.1495550618645152, CNST_LIMB(0x6b563e1), CNST_LIMB(0x314825b0) },\n  /* 104 */ { 4, 0.1492439365274121, CNST_LIMB(0x6f91000), CNST_LIMB(0x25b55f2e) },\n  /* 105 */ { 4, 0.1489370618588283, CNST_LIMB(0x73eb721), CNST_LIMB(0x1aadaccb) },\n  /* 106 */ { 4, 0.1486343375718350, CNST_LIMB(0x7866310), CNST_LIMB(0x10294ba2) },\n  /* 107 */ { 4, 0.1483356667053618, CNST_LIMB(0x7d01db1), CNST_LIMB(0x620f8f6) },\n  /* 108 */ { 4, 0.1480409554829326, CNST_LIMB(0x81bf100), CNST_LIMB(0xf91bd1b6) },\n  /* 109 */ { 4, 0.1477501131786861, CNST_LIMB(0x869e711), CNST_LIMB(0xe6d37b2a) },\n  /* 110 */ { 4, 0.1474630519902391, CNST_LIMB(0x8ba0a10), CNST_LIMB(0xd55cff6e) },\n  /* 111 */ { 4, 0.1471796869179853, CNST_LIMB(0x90c6441), CNST_LIMB(0xc4ad2db2) },\n  /* 112 */ { 4, 0.1468999356504447, CNST_LIMB(0x9610000), CNST_LIMB(0xb4b985cf) },\n  /* 113 */ { 4, 0.1466237184553111, CNST_LIMB(0x9b7e7c1), CNST_LIMB(0xa5782bef) },\n  /* 114 */ { 4, 0.1463509580758620, CNST_LIMB(0xa112610), CNST_LIMB(0x96dfdd2a) },\n  /* 115 */ { 4, 0.1460815796324244, CNST_LIMB(0xa6cc591), CNST_LIMB(0x88e7e509) },\n  /* 116 */ { 4, 0.1458155105286054, CNST_LIMB(0xacad100), CNST_LIMB(0x7b8813d3) },\n  /* 117 */ { 4, 0.1455526803620167, CNST_LIMB(0xb2b5331), CNST_LIMB(0x6eb8b595) },\n  /* 118 */ { 4, 0.1452930208392428, CNST_LIMB(0xb8e5710), CNST_LIMB(0x627289db) },\n  /* 119 */ { 4, 0.1450364656948130, CNST_LIMB(0xbf3e7a1), CNST_LIMB(0x56aebc07) },\n  /* 120 */ { 4, 0.1447829506139581, CNST_LIMB(0xc5c1000), CNST_LIMB(0x4b66dc33) },\n  /* 121 */ { 4, 0.1445324131589439, CNST_LIMB(0xcc6db61), CNST_LIMB(0x4094d8a3) },\n  /* 122 */ { 4, 0.1442847926987864, CNST_LIMB(0xd345510), CNST_LIMB(0x3632f7a5) },\n  /* 123 */ { 4, 0.1440400303421672, CNST_LIMB(0xda48871), CNST_LIMB(0x2c3bd1f0) },\n  /* 124 */ { 4, 0.1437980688733775, CNST_LIMB(0xe178100), CNST_LIMB(0x22aa4d5f) },\n  /* 125 */ { 4, 0.1435588526911310, CNST_LIMB(0xe8d4a51), CNST_LIMB(0x19799812) },\n  /* 126 */ { 4, 0.1433223277500932, CNST_LIMB(0xf05f010), CNST_LIMB(0x10a523e5) },\n  /* 127 */ { 4, 0.1430884415049874, CNST_LIMB(0xf817e01), CNST_LIMB(0x828a237) },\n  /* 128 */ { 4, 0.1428571428571429, 0x7 },\n  /* 129 */ { 4, 0.1426283821033600, CNST_LIMB(0x10818201), CNST_LIMB(0xf04ec452) },\n  /* 130 */ { 4, 0.1424021108869747, CNST_LIMB(0x11061010), CNST_LIMB(0xe136444a) },\n  /* 131 */ { 4, 0.1421782821510107, CNST_LIMB(0x118db651), CNST_LIMB(0xd2af9589) },\n  /* 132 */ { 4, 0.1419568500933153, CNST_LIMB(0x12188100), CNST_LIMB(0xc4b42a83) },\n  /* 133 */ { 4, 0.1417377701235801, CNST_LIMB(0x12a67c71), CNST_LIMB(0xb73dccf5) },\n  /* 134 */ { 4, 0.1415209988221527, CNST_LIMB(0x1337b510), CNST_LIMB(0xaa4698c5) },\n  /* 135 */ { 4, 0.1413064939005528, CNST_LIMB(0x13cc3761), CNST_LIMB(0x9dc8f729) },\n  /* 136 */ { 4, 0.1410942141636095, CNST_LIMB(0x14641000), CNST_LIMB(0x91bf9a30) },\n  /* 137 */ { 4, 0.1408841194731412, CNST_LIMB(0x14ff4ba1), CNST_LIMB(0x86257887) },\n  /* 138 */ { 4, 0.1406761707131039, CNST_LIMB(0x159df710), CNST_LIMB(0x7af5c98c) },\n  /* 139 */ { 4, 0.1404703297561400, CNST_LIMB(0x16401f31), CNST_LIMB(0x702c01a0) },\n  /* 140 */ { 4, 0.1402665594314587, CNST_LIMB(0x16e5d100), CNST_LIMB(0x65c3ceb1) },\n  /* 141 */ { 4, 0.1400648234939879, CNST_LIMB(0x178f1991), CNST_LIMB(0x5bb91502) },\n  /* 142 */ { 4, 0.1398650865947379, CNST_LIMB(0x183c0610), CNST_LIMB(0x5207ec23) },\n  /* 143 */ { 4, 0.1396673142523192, CNST_LIMB(0x18eca3c1), CNST_LIMB(0x48ac9c19) },\n  /* 144 */ { 4, 0.1394714728255649, CNST_LIMB(0x19a10000), CNST_LIMB(0x3fa39ab5) },\n  /* 145 */ { 4, 0.1392775294872041, CNST_LIMB(0x1a592841), CNST_LIMB(0x36e98912) },\n  /* 146 */ { 4, 0.1390854521985406, CNST_LIMB(0x1b152a10), CNST_LIMB(0x2e7b3140) },\n  /* 147 */ { 4, 0.1388952096850913, CNST_LIMB(0x1bd51311), CNST_LIMB(0x2655840b) },\n  /* 148 */ { 4, 0.1387067714131417, CNST_LIMB(0x1c98f100), CNST_LIMB(0x1e7596ea) },\n  /* 149 */ { 4, 0.1385201075671775, CNST_LIMB(0x1d60d1b1), CNST_LIMB(0x16d8a20d) },\n  /* 150 */ { 4, 0.1383351890281540, CNST_LIMB(0x1e2cc310), CNST_LIMB(0xf7bfe87) },\n  /* 151 */ { 4, 0.1381519873525671, CNST_LIMB(0x1efcd321), CNST_LIMB(0x85d2492) },\n  /* 152 */ { 4, 0.1379704747522905, CNST_LIMB(0x1fd11000), CNST_LIMB(0x179a9f4) },\n  /* 153 */ { 4, 0.1377906240751463, CNST_LIMB(0x20a987e1), CNST_LIMB(0xf59e80eb) },\n  /* 154 */ { 4, 0.1376124087861776, CNST_LIMB(0x21864910), CNST_LIMB(0xe8b768db) },\n  /* 155 */ { 4, 0.1374358029495937, CNST_LIMB(0x226761f1), CNST_LIMB(0xdc39d6d5) },\n  /* 156 */ { 4, 0.1372607812113589, CNST_LIMB(0x234ce100), CNST_LIMB(0xd021c5d1) },\n  /* 157 */ { 4, 0.1370873187823978, CNST_LIMB(0x2436d4d1), CNST_LIMB(0xc46b5e37) },\n  /* 158 */ { 4, 0.1369153914223921, CNST_LIMB(0x25254c10), CNST_LIMB(0xb912f39c) },\n  /* 159 */ { 4, 0.1367449754241439, CNST_LIMB(0x26185581), CNST_LIMB(0xae150294) },\n  /* 160 */ { 4, 0.1365760475984821, CNST_LIMB(0x27100000), CNST_LIMB(0xa36e2eb1) },\n  /* 161 */ { 4, 0.1364085852596902, CNST_LIMB(0x280c5a81), CNST_LIMB(0x991b4094) },\n  /* 162 */ { 4, 0.1362425662114337, CNST_LIMB(0x290d7410), CNST_LIMB(0x8f19241e) },\n  /* 163 */ { 4, 0.1360779687331669, CNST_LIMB(0x2a135bd1), CNST_LIMB(0x8564e6b7) },\n  /* 164 */ { 4, 0.1359147715670014, CNST_LIMB(0x2b1e2100), CNST_LIMB(0x7bfbb5b4) },\n  /* 165 */ { 4, 0.1357529539050150, CNST_LIMB(0x2c2dd2f1), CNST_LIMB(0x72dadcc8) },\n  /* 166 */ { 4, 0.1355924953769863, CNST_LIMB(0x2d428110), CNST_LIMB(0x69ffc498) },\n  /* 167 */ { 4, 0.1354333760385373, CNST_LIMB(0x2e5c3ae1), CNST_LIMB(0x6167f154) },\n  /* 168 */ { 4, 0.1352755763596663, CNST_LIMB(0x2f7b1000), CNST_LIMB(0x5911016e) },\n  /* 169 */ { 4, 0.1351190772136599, CNST_LIMB(0x309f1021), CNST_LIMB(0x50f8ac5f) },\n  /* 170 */ { 4, 0.1349638598663645, CNST_LIMB(0x31c84b10), CNST_LIMB(0x491cc17c) },\n  /* 171 */ { 4, 0.1348099059658079, CNST_LIMB(0x32f6d0b1), CNST_LIMB(0x417b26d8) },\n  /* 172 */ { 4, 0.1346571975321549, CNST_LIMB(0x342ab100), CNST_LIMB(0x3a11d83b) },\n  /* 173 */ { 4, 0.1345057169479844, CNST_LIMB(0x3563fc11), CNST_LIMB(0x32dee622) },\n  /* 174 */ { 4, 0.1343554469488779, CNST_LIMB(0x36a2c210), CNST_LIMB(0x2be074cd) },\n  /* 175 */ { 4, 0.1342063706143054, CNST_LIMB(0x37e71341), CNST_LIMB(0x2514bb58) },\n  /* 176 */ { 4, 0.1340584713587980, CNST_LIMB(0x39310000), CNST_LIMB(0x1e7a02e7) },\n  /* 177 */ { 4, 0.1339117329233981, CNST_LIMB(0x3a8098c1), CNST_LIMB(0x180ea5d0) },\n  /* 178 */ { 4, 0.1337661393673756, CNST_LIMB(0x3bd5ee10), CNST_LIMB(0x11d10edd) },\n  /* 179 */ { 4, 0.1336216750601997, CNST_LIMB(0x3d311091), CNST_LIMB(0xbbfb88e) },\n  /* 180 */ { 4, 0.1334783246737591, CNST_LIMB(0x3e921100), CNST_LIMB(0x5d92c68) },\n  /* 181 */ { 4, 0.1333360731748201, CNST_LIMB(0x3ff90031), CNST_LIMB(0x1c024c) },\n  /* 182 */ { 4, 0.1331949058177136, CNST_LIMB(0x4165ef10), CNST_LIMB(0xf50dbfb2) },\n  /* 183 */ { 4, 0.1330548081372441, CNST_LIMB(0x42d8eea1), CNST_LIMB(0xea30efa3) },\n  /* 184 */ { 4, 0.1329157659418126, CNST_LIMB(0x44521000), CNST_LIMB(0xdf9f1316) },\n  /* 185 */ { 4, 0.1327777653067443, CNST_LIMB(0x45d16461), CNST_LIMB(0xd555c0c9) },\n  /* 186 */ { 4, 0.1326407925678156, CNST_LIMB(0x4756fd10), CNST_LIMB(0xcb52a684) },\n  /* 187 */ { 4, 0.1325048343149731, CNST_LIMB(0x48e2eb71), CNST_LIMB(0xc193881f) },\n  /* 188 */ { 4, 0.1323698773862368, CNST_LIMB(0x4a754100), CNST_LIMB(0xb8163e97) },\n  /* 189 */ { 4, 0.1322359088617821, CNST_LIMB(0x4c0e0f51), CNST_LIMB(0xaed8b724) },\n  /* 190 */ { 4, 0.1321029160581950, CNST_LIMB(0x4dad6810), CNST_LIMB(0xa5d8f269) },\n  /* 191 */ { 4, 0.1319708865228925, CNST_LIMB(0x4f535d01), CNST_LIMB(0x9d15039d) },\n  /* 192 */ { 4, 0.1318398080287045, CNST_LIMB(0x51000000), CNST_LIMB(0x948b0fcd) },\n  /* 193 */ { 4, 0.1317096685686114, CNST_LIMB(0x52b36301), CNST_LIMB(0x8c394d1d) },\n  /* 194 */ { 4, 0.1315804563506306, CNST_LIMB(0x546d9810), CNST_LIMB(0x841e0215) },\n  /* 195 */ { 4, 0.1314521597928493, CNST_LIMB(0x562eb151), CNST_LIMB(0x7c3784f8) },\n  /* 196 */ { 4, 0.1313247675185968, CNST_LIMB(0x57f6c100), CNST_LIMB(0x74843b1e) },\n  /* 197 */ { 4, 0.1311982683517524, CNST_LIMB(0x59c5d971), CNST_LIMB(0x6d02985d) },\n  /* 198 */ { 4, 0.1310726513121844, CNST_LIMB(0x5b9c0d10), CNST_LIMB(0x65b11e6e) },\n  /* 199 */ { 4, 0.1309479056113158, CNST_LIMB(0x5d796e61), CNST_LIMB(0x5e8e5c64) },\n  /* 200 */ { 4, 0.1308240206478128, CNST_LIMB(0x5f5e1000), CNST_LIMB(0x5798ee23) },\n  /* 201 */ { 4, 0.1307009860033912, CNST_LIMB(0x614a04a1), CNST_LIMB(0x50cf7bde) },\n  /* 202 */ { 4, 0.1305787914387386, CNST_LIMB(0x633d5f10), CNST_LIMB(0x4a30b99b) },\n  /* 203 */ { 4, 0.1304574268895465, CNST_LIMB(0x65383231), CNST_LIMB(0x43bb66bd) },\n  /* 204 */ { 4, 0.1303368824626505, CNST_LIMB(0x673a9100), CNST_LIMB(0x3d6e4d94) },\n  /* 205 */ { 4, 0.1302171484322746, CNST_LIMB(0x69448e91), CNST_LIMB(0x374842ee) },\n  /* 206 */ { 4, 0.1300982152363760, CNST_LIMB(0x6b563e10), CNST_LIMB(0x314825b0) },\n  /* 207 */ { 4, 0.1299800734730872, CNST_LIMB(0x6d6fb2c1), CNST_LIMB(0x2b6cde75) },\n  /* 208 */ { 4, 0.1298627138972530, CNST_LIMB(0x6f910000), CNST_LIMB(0x25b55f2e) },\n  /* 209 */ { 4, 0.1297461274170591, CNST_LIMB(0x71ba3941), CNST_LIMB(0x2020a2c5) },\n  /* 210 */ { 4, 0.1296303050907487, CNST_LIMB(0x73eb7210), CNST_LIMB(0x1aadaccb) },\n  /* 211 */ { 4, 0.1295152381234257, CNST_LIMB(0x7624be11), CNST_LIMB(0x155b891f) },\n  /* 212 */ { 4, 0.1294009178639407, CNST_LIMB(0x78663100), CNST_LIMB(0x10294ba2) },\n  /* 213 */ { 4, 0.1292873358018581, CNST_LIMB(0x7aafdeb1), CNST_LIMB(0xb160fe9) },\n  /* 214 */ { 4, 0.1291744835645007, CNST_LIMB(0x7d01db10), CNST_LIMB(0x620f8f6) },\n  /* 215 */ { 4, 0.1290623529140715, CNST_LIMB(0x7f5c3a21), CNST_LIMB(0x14930ef) },\n  /* 216 */ { 4, 0.1289509357448472, CNST_LIMB(0x81bf1000), CNST_LIMB(0xf91bd1b6) },\n  /* 217 */ { 4, 0.1288402240804449, CNST_LIMB(0x842a70e1), CNST_LIMB(0xefdcb0c7) },\n  /* 218 */ { 4, 0.1287302100711567, CNST_LIMB(0x869e7110), CNST_LIMB(0xe6d37b2a) },\n  /* 219 */ { 4, 0.1286208859913518, CNST_LIMB(0x891b24f1), CNST_LIMB(0xddfeb94a) },\n  /* 220 */ { 4, 0.1285122442369443, CNST_LIMB(0x8ba0a100), CNST_LIMB(0xd55cff6e) },\n  /* 221 */ { 4, 0.1284042773229231, CNST_LIMB(0x8e2ef9d1), CNST_LIMB(0xcceced50) },\n  /* 222 */ { 4, 0.1282969778809442, CNST_LIMB(0x90c64410), CNST_LIMB(0xc4ad2db2) },\n  /* 223 */ { 4, 0.1281903386569819, CNST_LIMB(0x93669481), CNST_LIMB(0xbc9c75f9) },\n  /* 224 */ { 4, 0.1280843525090381, CNST_LIMB(0x96100000), CNST_LIMB(0xb4b985cf) },\n  /* 225 */ { 4, 0.1279790124049078, CNST_LIMB(0x98c29b81), CNST_LIMB(0xad0326c2) },\n  /* 226 */ { 4, 0.1278743114199984, CNST_LIMB(0x9b7e7c10), CNST_LIMB(0xa5782bef) },\n  /* 227 */ { 4, 0.1277702427352035, CNST_LIMB(0x9e43b6d1), CNST_LIMB(0x9e1771a9) },\n  /* 228 */ { 4, 0.1276667996348261, CNST_LIMB(0xa1126100), CNST_LIMB(0x96dfdd2a) },\n  /* 229 */ { 4, 0.1275639755045533, CNST_LIMB(0xa3ea8ff1), CNST_LIMB(0x8fd05c41) },\n  /* 230 */ { 4, 0.1274617638294791, CNST_LIMB(0xa6cc5910), CNST_LIMB(0x88e7e509) },\n  /* 231 */ { 4, 0.1273601581921741, CNST_LIMB(0xa9b7d1e1), CNST_LIMB(0x8225759d) },\n  /* 232 */ { 4, 0.1272591522708010, CNST_LIMB(0xacad1000), CNST_LIMB(0x7b8813d3) },\n  /* 233 */ { 4, 0.1271587398372755, CNST_LIMB(0xafac2921), CNST_LIMB(0x750eccf9) },\n  /* 234 */ { 4, 0.1270589147554692, CNST_LIMB(0xb2b53310), CNST_LIMB(0x6eb8b595) },\n  /* 235 */ { 4, 0.1269596709794558, CNST_LIMB(0xb5c843b1), CNST_LIMB(0x6884e923) },\n  /* 236 */ { 4, 0.1268610025517973, CNST_LIMB(0xb8e57100), CNST_LIMB(0x627289db) },\n  /* 237 */ { 4, 0.1267629036018709, CNST_LIMB(0xbc0cd111), CNST_LIMB(0x5c80c07b) },\n  /* 238 */ { 4, 0.1266653683442337, CNST_LIMB(0xbf3e7a10), CNST_LIMB(0x56aebc07) },\n  /* 239 */ { 4, 0.1265683910770258, CNST_LIMB(0xc27a8241), CNST_LIMB(0x50fbb19b) },\n  /* 240 */ { 4, 0.1264719661804097, CNST_LIMB(0xc5c10000), CNST_LIMB(0x4b66dc33) },\n  /* 241 */ { 4, 0.1263760881150453, CNST_LIMB(0xc91209c1), CNST_LIMB(0x45ef7c7c) },\n  /* 242 */ { 4, 0.1262807514205999, CNST_LIMB(0xcc6db610), CNST_LIMB(0x4094d8a3) },\n  /* 243 */ { 4, 0.1261859507142915, CNST_LIMB(0xcfd41b91), CNST_LIMB(0x3b563c24) },\n  /* 244 */ { 4, 0.1260916806894653, CNST_LIMB(0xd3455100), CNST_LIMB(0x3632f7a5) },\n  /* 245 */ { 4, 0.1259979361142023, CNST_LIMB(0xd6c16d31), CNST_LIMB(0x312a60c3) },\n  /* 246 */ { 4, 0.1259047118299582, CNST_LIMB(0xda488710), CNST_LIMB(0x2c3bd1f0) },\n  /* 247 */ { 4, 0.1258120027502338, CNST_LIMB(0xdddab5a1), CNST_LIMB(0x2766aa45) },\n  /* 248 */ { 4, 0.1257198038592741, CNST_LIMB(0xe1781000), CNST_LIMB(0x22aa4d5f) },\n  /* 249 */ { 4, 0.1256281102107963, CNST_LIMB(0xe520ad61), CNST_LIMB(0x1e06233c) },\n  /* 250 */ { 4, 0.1255369169267456, CNST_LIMB(0xe8d4a510), CNST_LIMB(0x19799812) },\n  /* 251 */ { 4, 0.1254462191960791, CNST_LIMB(0xec940e71), CNST_LIMB(0x15041c33) },\n  /* 252 */ { 4, 0.1253560122735751, CNST_LIMB(0xf05f0100), CNST_LIMB(0x10a523e5) },\n  /* 253 */ { 4, 0.1252662914786691, CNST_LIMB(0xf4359451), CNST_LIMB(0xc5c2749) },\n  /* 254 */ { 4, 0.1251770521943144, CNST_LIMB(0xf817e010), CNST_LIMB(0x828a237) },\n  /* 255 */ { 4, 0.1250882898658682, CNST_LIMB(0xfc05fc01), CNST_LIMB(0x40a1423) },\n  /* 256 */ { 4, 0.1250000000000000, 0x8 },\n};\n\n#elif GMP_NUMB_BITS == 64\n\nconst struct bases mp_bases[257] =\n{\n  /*   0 */ { 0, 0.0, 0 },\n  /*   1 */ { 0, 1e37, 0 },\n  /*   2 */ { 64, 1.0000000000000000, 0x1 },\n  /*   3 */ { 40, 0.6309297535714574, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d) },\n  /*   4 */ { 32, 0.5000000000000000, 0x2 },\n  /*   5 */ { 27, 0.4306765580733931, CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90) },\n  /*   6 */ { 24, 0.3868528072345416, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295) },\n  /*   7 */ { 22, 0.3562071871080222, CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b) },\n  /*   8 */ { 21, 0.3333333333333334, 0x3 },\n  /*   9 */ { 20, 0.3154648767857287, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d) },\n  /*  10 */ { 19, 0.3010299956639811, CNST_LIMB(0x8ac7230489e80000), CNST_LIMB(0xd83c94fb6d2ac34a) },\n  /*  11 */ { 18, 0.2890648263178878, CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b) },\n  /*  12 */ { 17, 0.2789429456511298, CNST_LIMB(0x1eca170c00000000), CNST_LIMB(0xa10c2bec5da8f8f) },\n  /*  13 */ { 17, 0.2702381544273197, CNST_LIMB(0x780c7372621bd74d), CNST_LIMB(0x10f4becafe412ec3) },\n  /*  14 */ { 16, 0.2626495350371936, CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86) },\n  /*  15 */ { 16, 0.2559580248098155, CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48) },\n  /*  16 */ { 16, 0.2500000000000000, 0x4 },\n  /*  17 */ { 15, 0.2446505421182260, CNST_LIMB(0x27b95e997e21d9f1), CNST_LIMB(0x9c71e11bab279323) },\n  /*  18 */ { 15, 0.2398124665681315, CNST_LIMB(0x5da0e1e53c5c8000), CNST_LIMB(0x5dfaa697ec6f6a1c) },\n  /*  19 */ { 15, 0.2354089133666382, CNST_LIMB(0xd2ae3299c1c4aedb), CNST_LIMB(0x3711783f6be7e9ec) },\n  /*  20 */ { 14, 0.2313782131597592, CNST_LIMB(0x16bcc41e90000000), CNST_LIMB(0x6849b86a12b9b01e) },\n  /*  21 */ { 14, 0.2276702486969530, CNST_LIMB(0x2d04b7fdd9c0ef49), CNST_LIMB(0x6bf097ba5ca5e239) },\n  /*  22 */ { 14, 0.2242438242175754, CNST_LIMB(0x5658597bcaa24000), CNST_LIMB(0x7b8015c8d7af8f08) },\n  /*  23 */ { 14, 0.2210647294575037, CNST_LIMB(0xa0e2073737609371), CNST_LIMB(0x975a24b3a3151b38) },\n  /*  24 */ { 13, 0.2181042919855316, CNST_LIMB(0xc29e98000000000), CNST_LIMB(0x50bd367972689db1) },\n  /*  25 */ { 13, 0.2153382790366965, CNST_LIMB(0x14adf4b7320334b9), CNST_LIMB(0x8c240c4aecb13bb5) },\n  /*  26 */ { 13, 0.2127460535533632, CNST_LIMB(0x226ed36478bfa000), CNST_LIMB(0xdbd2e56854e118c9) },\n  /*  27 */ { 13, 0.2103099178571525, CNST_LIMB(0x383d9170b85ff80b), CNST_LIMB(0x2351ffcaa9c7c4ae) },\n  /*  28 */ { 13, 0.2080145976765095, CNST_LIMB(0x5a3c23e39c000000), CNST_LIMB(0x6b24188ca33b0636) },\n  /*  29 */ { 13, 0.2058468324604345, CNST_LIMB(0x8e65137388122bcd), CNST_LIMB(0xcc3dceaf2b8ba99d) },\n  /*  30 */ { 13, 0.2037950470905062, CNST_LIMB(0xdd41bb36d259e000), CNST_LIMB(0x2832e835c6c7d6b6) },\n  /*  31 */ { 12, 0.2018490865820999, CNST_LIMB(0xaee5720ee830681), CNST_LIMB(0x76b6aa272e1873c5) },\n  /*  32 */ { 12, 0.2000000000000000, 0x5 },\n  /*  33 */ { 12, 0.1982398631705605, CNST_LIMB(0x172588ad4f5f0981), CNST_LIMB(0x61eaf5d402c7bf4f) },\n  /*  34 */ { 12, 0.1965616322328226, CNST_LIMB(0x211e44f7d02c1000), CNST_LIMB(0xeeb658123ffb27ec) },\n  /*  35 */ { 12, 0.1949590218937863, CNST_LIMB(0x2ee56725f06e5c71), CNST_LIMB(0x5d5e3762e6fdf509) },\n  /*  36 */ { 12, 0.1934264036172708, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295) },\n  /*  37 */ { 12, 0.1919587200065601, CNST_LIMB(0x5b5b57f8a98a5dd1), CNST_LIMB(0x66ae7831762efb6f) },\n  /*  38 */ { 12, 0.1905514124267734, CNST_LIMB(0x7dcff8986ea31000), CNST_LIMB(0x47388865a00f544) },\n  /*  39 */ { 12, 0.1892003595168700, CNST_LIMB(0xabd4211662a6b2a1), CNST_LIMB(0x7d673c33a123b54c) },\n  /*  40 */ { 12, 0.1879018247091076, CNST_LIMB(0xe8d4a51000000000), CNST_LIMB(0x19799812dea11197) },\n  /*  41 */ { 11, 0.1866524112389434, CNST_LIMB(0x7a32956ad081b79), CNST_LIMB(0xc27e62e0686feae) },\n  /*  42 */ { 11, 0.1854490234153689, CNST_LIMB(0x9f49aaff0e86800), CNST_LIMB(0x9b6e7507064ce7c7) },\n  /*  43 */ { 11, 0.1842888331487062, CNST_LIMB(0xce583bb812d37b3), CNST_LIMB(0x3d9ac2bf66cfed94) },\n  /*  44 */ { 11, 0.1831692509136336, CNST_LIMB(0x109b79a654c00000), CNST_LIMB(0xed46bc50ce59712a) },\n  /*  45 */ { 11, 0.1820879004699383, CNST_LIMB(0x1543beff214c8b95), CNST_LIMB(0x813d97e2c89b8d46) },\n  /*  46 */ { 11, 0.1810425967800402, CNST_LIMB(0x1b149a79459a3800), CNST_LIMB(0x2e81751956af8083) },\n  /*  47 */ { 11, 0.1800313266566926, CNST_LIMB(0x224edfb5434a830f), CNST_LIMB(0xdd8e0a95e30c0988) },\n  /*  48 */ { 11, 0.1790522317510414, CNST_LIMB(0x2b3fb00000000000), CNST_LIMB(0x7ad4dd48a0b5b167) },\n  /*  49 */ { 11, 0.1781035935540111, CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b) },\n  /*  50 */ { 11, 0.1771838201355579, CNST_LIMB(0x43c33c1937564800), CNST_LIMB(0xe392010175ee5962) },\n  /*  51 */ { 11, 0.1762914343888821, CNST_LIMB(0x54411b2441c3cd8b), CNST_LIMB(0x84eaf11b2fe7738e) },\n  /*  52 */ { 11, 0.1754250635819545, CNST_LIMB(0x6851455acd400000), CNST_LIMB(0x3a1e3971e008995d) },\n  /*  53 */ { 11, 0.1745834300480449, CNST_LIMB(0x80a23b117c8feb6d), CNST_LIMB(0xfd7a462344ffce25) },\n  /*  54 */ { 11, 0.1737653428714400, CNST_LIMB(0x9dff7d32d5dc1800), CNST_LIMB(0x9eca40b40ebcef8a) },\n  /*  55 */ { 11, 0.1729696904450771, CNST_LIMB(0xc155af6faeffe6a7), CNST_LIMB(0x52fa161a4a48e43d) },\n  /*  56 */ { 11, 0.1721954337940981, CNST_LIMB(0xebb7392e00000000), CNST_LIMB(0x1607a2cbacf930c1) },\n  /*  57 */ { 10, 0.1714416005739134, CNST_LIMB(0x50633659656d971), CNST_LIMB(0x97a014f8e3be55f1) },\n  /*  58 */ { 10, 0.1707072796637201, CNST_LIMB(0x5fa8624c7fba400), CNST_LIMB(0x568df8b76cbf212c) },\n  /*  59 */ { 10, 0.1699916162869140, CNST_LIMB(0x717d9faa73c5679), CNST_LIMB(0x20ba7c4b4e6ef492) },\n  /*  60 */ { 10, 0.1692938075987814, CNST_LIMB(0x86430aac6100000), CNST_LIMB(0xe81ee46b9ef492f5) },\n  /*  61 */ { 10, 0.1686130986895011, CNST_LIMB(0x9e64d9944b57f29), CNST_LIMB(0x9dc0d10d51940416) },\n  /*  62 */ { 10, 0.1679487789570419, CNST_LIMB(0xba5ca5392cb0400), CNST_LIMB(0x5fa8ed2f450272a5) },\n  /*  63 */ { 10, 0.1673001788101741, CNST_LIMB(0xdab2ce1d022cd81), CNST_LIMB(0x2ba9eb8c5e04e641) },\n  /*  64 */ { 10, 0.1666666666666667, 0x6 },\n  /*  65 */ { 10, 0.1660476462159378, CNST_LIMB(0x12aeed5fd3e2d281), CNST_LIMB(0xb67759cc00287bf1) },\n  /*  66 */ { 10, 0.1654425539190583, CNST_LIMB(0x15c3da1572d50400), CNST_LIMB(0x78621feeb7f4ed33) },\n  /*  67 */ { 10, 0.1648508567221604, CNST_LIMB(0x194c05534f75ee29), CNST_LIMB(0x43d55b5f72943bc0) },\n  /*  68 */ { 10, 0.1642720499620502, CNST_LIMB(0x1d56299ada100000), CNST_LIMB(0x173decb64d1d4409) },\n  /*  69 */ { 10, 0.1637056554452156, CNST_LIMB(0x21f2a089a4ff4f79), CNST_LIMB(0xe29fb54fd6b6074f) },\n  /*  70 */ { 10, 0.1631512196835108, CNST_LIMB(0x2733896c68d9a400), CNST_LIMB(0xa1f1f5c210d54e62) },\n  /*  71 */ { 10, 0.1626083122716341, CNST_LIMB(0x2d2cf2c33b533c71), CNST_LIMB(0x6aac7f9bfafd57b2) },\n  /*  72 */ { 10, 0.1620765243931223, CNST_LIMB(0x33f506e440000000), CNST_LIMB(0x3b563c2478b72ee2) },\n  /*  73 */ { 10, 0.1615554674429964, CNST_LIMB(0x3ba43bec1d062211), CNST_LIMB(0x12b536b574e92d1b) },\n  /*  74 */ { 10, 0.1610447717564444, CNST_LIMB(0x4455872d8fd4e400), CNST_LIMB(0xdf86c03020404fa5) },\n  /*  75 */ { 10, 0.1605440854340214, CNST_LIMB(0x4e2694539f2f6c59), CNST_LIMB(0xa34adf02234eea8e) },\n  /*  76 */ { 10, 0.1600530732548214, CNST_LIMB(0x5938006c18900000), CNST_LIMB(0x6f46eb8574eb59dd) },\n  /*  77 */ { 10, 0.1595714156699382, CNST_LIMB(0x65ad9912474aa649), CNST_LIMB(0x42459b481df47cec) },\n  /*  78 */ { 10, 0.1590988078692942, CNST_LIMB(0x73ae9ff4241ec400), CNST_LIMB(0x1b424b95d80ca505) },\n  /*  79 */ { 10, 0.1586349589155960, CNST_LIMB(0x836612ee9c4ce1e1), CNST_LIMB(0xf2c1b982203a0dac) },\n  /*  80 */ { 10, 0.1581795909397823, CNST_LIMB(0x9502f90000000000), CNST_LIMB(0xb7cdfd9d7bdbab7d) },\n  /*  81 */ { 10, 0.1577324383928644, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d) },\n  /*  82 */ { 10, 0.1572932473495469, CNST_LIMB(0xbebf59a07dab4400), CNST_LIMB(0x57931eeaf85cf64f) },\n  /*  83 */ { 10, 0.1568617748594410, CNST_LIMB(0xd7540d4093bc3109), CNST_LIMB(0x305a944507c82f47) },\n  /*  84 */ { 10, 0.1564377883420716, CNST_LIMB(0xf2b96616f1900000), CNST_LIMB(0xe007ccc9c22781a) },\n  /*  85 */ { 9, 0.1560210650222250, CNST_LIMB(0x336de62af2bca35), CNST_LIMB(0x3e92c42e000eeed4) },\n  /*  86 */ { 9, 0.1556113914024940, CNST_LIMB(0x39235ec33d49600), CNST_LIMB(0x1ebe59130db2795e) },\n  /*  87 */ { 9, 0.1552085627701551, CNST_LIMB(0x3f674e539585a17), CNST_LIMB(0x268859e90f51b89) },\n  /*  88 */ { 9, 0.1548123827357682, CNST_LIMB(0x4645b6958000000), CNST_LIMB(0xd24cde0463108cfa) },\n  /*  89 */ { 9, 0.1544226628011101, CNST_LIMB(0x4dcb74afbc49c19), CNST_LIMB(0xa536009f37adc383) },\n  /*  90 */ { 9, 0.1540392219542636, CNST_LIMB(0x56064e1d18d9a00), CNST_LIMB(0x7cea06ce1c9ace10) },\n  /*  91 */ { 9, 0.1536618862898642, CNST_LIMB(0x5f04fe2cd8a39fb), CNST_LIMB(0x58db032e72e8ba43) },\n  /*  92 */ { 9, 0.1532904886526781, CNST_LIMB(0x68d74421f5c0000), CNST_LIMB(0x388cc17cae105447) },\n  /*  93 */ { 9, 0.1529248683028321, CNST_LIMB(0x738df1f6ab4827d), CNST_LIMB(0x1b92672857620ce0) },\n  /*  94 */ { 9, 0.1525648706011593, CNST_LIMB(0x7f3afbc9cfb5e00), CNST_LIMB(0x18c6a9575c2ade4) },\n  /*  95 */ { 9, 0.1522103467132434, CNST_LIMB(0x8bf187fba88f35f), CNST_LIMB(0xd44da7da8e44b24f) },\n  /*  96 */ { 9, 0.1518611533308632, CNST_LIMB(0x99c600000000000), CNST_LIMB(0xaa2f78f1b4cc6794) },\n  /*  97 */ { 9, 0.1515171524096389, CNST_LIMB(0xa8ce21eb6531361), CNST_LIMB(0x843c067d091ee4cc) },\n  /*  98 */ { 9, 0.1511782109217764, CNST_LIMB(0xb92112c1a0b6200), CNST_LIMB(0x62005e1e913356e3) },\n  /*  99 */ { 9, 0.1508442006228941, CNST_LIMB(0xcad7718b8747c43), CNST_LIMB(0x4316eed01dedd518) },\n  /* 100 */ { 9, 0.1505149978319906, CNST_LIMB(0xde0b6b3a7640000), CNST_LIMB(0x2725dd1d243aba0e) },\n  /* 101 */ { 9, 0.1501904832236879, CNST_LIMB(0xf2d8cf5fe6d74c5), CNST_LIMB(0xddd9057c24cb54f) },\n  /* 102 */ { 9, 0.1498705416319474, CNST_LIMB(0x1095d25bfa712600), CNST_LIMB(0xedeee175a736d2a1) },\n  /* 103 */ { 9, 0.1495550618645152, CNST_LIMB(0x121b7c4c3698faa7), CNST_LIMB(0xc4699f3df8b6b328) },\n  /* 104 */ { 9, 0.1492439365274121, CNST_LIMB(0x13c09e8d68000000), CNST_LIMB(0x9ebbe7d859cb5a7c) },\n  /* 105 */ { 9, 0.1489370618588283, CNST_LIMB(0x15876ccb0b709ca9), CNST_LIMB(0x7c828b9887eb2179) },\n  /* 106 */ { 9, 0.1486343375718350, CNST_LIMB(0x17723c2976da2a00), CNST_LIMB(0x5d652ab99001adcf) },\n  /* 107 */ { 9, 0.1483356667053618, CNST_LIMB(0x198384e9c259048b), CNST_LIMB(0x4114f1754e5d7b32) },\n  /* 108 */ { 9, 0.1480409554829326, CNST_LIMB(0x1bbde41dfeec0000), CNST_LIMB(0x274b7c902f7e0188) },\n  /* 109 */ { 9, 0.1477501131786861, CNST_LIMB(0x1e241d6e3337910d), CNST_LIMB(0xfc9e0fbb32e210c) },\n  /* 110 */ { 9, 0.1474630519902391, CNST_LIMB(0x20b91cee9901ee00), CNST_LIMB(0xf4afa3e594f8ea1f) },\n  /* 111 */ { 9, 0.1471796869179853, CNST_LIMB(0x237ff9079863dfef), CNST_LIMB(0xcd85c32e9e4437b0) },\n  /* 112 */ { 9, 0.1468999356504447, CNST_LIMB(0x267bf47000000000), CNST_LIMB(0xa9bbb147e0dd92a8) },\n  /* 113 */ { 9, 0.1466237184553111, CNST_LIMB(0x29b08039fbeda7f1), CNST_LIMB(0x8900447b70e8eb82) },\n  /* 114 */ { 9, 0.1463509580758620, CNST_LIMB(0x2d213df34f65f200), CNST_LIMB(0x6b0a92adaad5848a) },\n  /* 115 */ { 9, 0.1460815796324244, CNST_LIMB(0x30d201d957a7c2d3), CNST_LIMB(0x4f990ad8740f0ee5) },\n  /* 116 */ { 9, 0.1458155105286054, CNST_LIMB(0x34c6d52160f40000), CNST_LIMB(0x3670a9663a8d3610) },\n  /* 117 */ { 9, 0.1455526803620167, CNST_LIMB(0x3903f855d8f4c755), CNST_LIMB(0x1f5c44188057be3c) },\n  /* 118 */ { 9, 0.1452930208392428, CNST_LIMB(0x3d8de5c8ec59b600), CNST_LIMB(0xa2bea956c4e4977) },\n  /* 119 */ { 9, 0.1450364656948130, CNST_LIMB(0x4269541d1ff01337), CNST_LIMB(0xed68b23033c3637e) },\n  /* 120 */ { 9, 0.1447829506139581, CNST_LIMB(0x479b38e478000000), CNST_LIMB(0xc99cf624e50549c5) },\n  /* 121 */ { 9, 0.1445324131589439, CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b) },\n  /* 122 */ { 9, 0.1442847926987864, CNST_LIMB(0x5317871fa13aba00), CNST_LIMB(0x8a5bc740b1c113e5) },\n  /* 123 */ { 9, 0.1440400303421672, CNST_LIMB(0x596d2f44de9fa71b), CNST_LIMB(0x6e6c7efb81cfbb9b) },\n  /* 124 */ { 9, 0.1437980688733775, CNST_LIMB(0x602fd125c47c0000), CNST_LIMB(0x54aba5c5cada5f10) },\n  /* 125 */ { 9, 0.1435588526911310, CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90) },\n  /* 126 */ { 9, 0.1433223277500932, CNST_LIMB(0x6f15be069b847e00), CNST_LIMB(0x26fb43de2c8cd2a8) },\n  /* 127 */ { 9, 0.1430884415049874, CNST_LIMB(0x7746b3e82a77047f), CNST_LIMB(0x12b94793db8486a1) },\n  /* 128 */ { 9, 0.1428571428571429, 0x7 },\n  /* 129 */ { 9, 0.1426283821033600, CNST_LIMB(0x894953f7ea890481), CNST_LIMB(0xdd5deca404c0156d) },\n  /* 130 */ { 9, 0.1424021108869747, CNST_LIMB(0x932abffea4848200), CNST_LIMB(0xbd51373330291de0) },\n  /* 131 */ { 9, 0.1421782821510107, CNST_LIMB(0x9dacb687d3d6a163), CNST_LIMB(0x9fa4025d66f23085) },\n  /* 132 */ { 9, 0.1419568500933153, CNST_LIMB(0xa8d8102a44840000), CNST_LIMB(0x842530ee2db4949d) },\n  /* 133 */ { 9, 0.1417377701235801, CNST_LIMB(0xb4b60f9d140541e5), CNST_LIMB(0x6aa7f2766b03dc25) },\n  /* 134 */ { 9, 0.1415209988221527, CNST_LIMB(0xc15065d4856e4600), CNST_LIMB(0x53035ba7ebf32e8d) },\n  /* 135 */ { 9, 0.1413064939005528, CNST_LIMB(0xceb1363f396d23c7), CNST_LIMB(0x3d12091fc9fb4914) },\n  /* 136 */ { 9, 0.1410942141636095, CNST_LIMB(0xdce31b2488000000), CNST_LIMB(0x28b1cb81b1ef1849) },\n  /* 137 */ { 9, 0.1408841194731412, CNST_LIMB(0xebf12a24bca135c9), CNST_LIMB(0x15c35be67ae3e2c9) },\n  /* 138 */ { 9, 0.1406761707131039, CNST_LIMB(0xfbe6f8dbf88f4a00), CNST_LIMB(0x42a17bd09be1ff0) },\n  /* 139 */ { 8, 0.1404703297561400, CNST_LIMB(0x1ef156c084ce761), CNST_LIMB(0x8bf461f03cf0bbf) },\n  /* 140 */ { 8, 0.1402665594314587, CNST_LIMB(0x20c4e3b94a10000), CNST_LIMB(0xf3fbb43f68a32d05) },\n  /* 141 */ { 8, 0.1400648234939879, CNST_LIMB(0x22b0695a08ba421), CNST_LIMB(0xd84f44c48564dc19) },\n  /* 142 */ { 8, 0.1398650865947379, CNST_LIMB(0x24b4f35d7a4c100), CNST_LIMB(0xbe58ebcce7956abe) },\n  /* 143 */ { 8, 0.1396673142523192, CNST_LIMB(0x26d397284975781), CNST_LIMB(0xa5fac463c7c134b7) },\n  /* 144 */ { 8, 0.1394714728255649, CNST_LIMB(0x290d74100000000), CNST_LIMB(0x8f19241e28c7d757) },\n  /* 145 */ { 8, 0.1392775294872041, CNST_LIMB(0x2b63b3a37866081), CNST_LIMB(0x799a6d046c0ae1ae) },\n  /* 146 */ { 8, 0.1390854521985406, CNST_LIMB(0x2dd789f4d894100), CNST_LIMB(0x6566e37d746a9e40) },\n  /* 147 */ { 8, 0.1388952096850913, CNST_LIMB(0x306a35e51b58721), CNST_LIMB(0x526887dbfb5f788f) },\n  /* 148 */ { 8, 0.1387067714131417, CNST_LIMB(0x331d01712e10000), CNST_LIMB(0x408af3382b8efd3d) },\n  /* 149 */ { 8, 0.1385201075671775, CNST_LIMB(0x35f14200a827c61), CNST_LIMB(0x2fbb374806ec05f1) },\n  /* 150 */ { 8, 0.1383351890281540, CNST_LIMB(0x38e858b62216100), CNST_LIMB(0x1fe7c0f0afce87fe) },\n  /* 151 */ { 8, 0.1381519873525671, CNST_LIMB(0x3c03b2c13176a41), CNST_LIMB(0x11003d517540d32e) },\n  /* 152 */ { 8, 0.1379704747522905, CNST_LIMB(0x3f44c9b21000000), CNST_LIMB(0x2f5810f98eff0dc) },\n  /* 153 */ { 8, 0.1377906240751463, CNST_LIMB(0x42ad23cef3113c1), CNST_LIMB(0xeb72e35e7840d910) },\n  /* 154 */ { 8, 0.1376124087861776, CNST_LIMB(0x463e546b19a2100), CNST_LIMB(0xd27de19593dc3614) },\n  /* 155 */ { 8, 0.1374358029495937, CNST_LIMB(0x49f9fc3f96684e1), CNST_LIMB(0xbaf391fd3e5e6fc2) },\n  /* 156 */ { 8, 0.1372607812113589, CNST_LIMB(0x4de1c9c5dc10000), CNST_LIMB(0xa4bd38c55228c81d) },\n  /* 157 */ { 8, 0.1370873187823978, CNST_LIMB(0x51f77994116d2a1), CNST_LIMB(0x8fc5a8de8e1de782) },\n  /* 158 */ { 8, 0.1369153914223921, CNST_LIMB(0x563cd6bb3398100), CNST_LIMB(0x7bf9265bea9d3a3b) },\n  /* 159 */ { 8, 0.1367449754241439, CNST_LIMB(0x5ab3bb270beeb01), CNST_LIMB(0x69454b325983dccd) },\n  /* 160 */ { 8, 0.1365760475984821, CNST_LIMB(0x5f5e10000000000), CNST_LIMB(0x5798ee2308c39df9) },\n  /* 161 */ { 8, 0.1364085852596902, CNST_LIMB(0x643dce0ec16f501), CNST_LIMB(0x46e40ba0fa66a753) },\n  /* 162 */ { 8, 0.1362425662114337, CNST_LIMB(0x6954fe21e3e8100), CNST_LIMB(0x3717b0870b0db3a7) },\n  /* 163 */ { 8, 0.1360779687331669, CNST_LIMB(0x6ea5b9755f440a1), CNST_LIMB(0x2825e6775d11cdeb) },\n  /* 164 */ { 8, 0.1359147715670014, CNST_LIMB(0x74322a1c0410000), CNST_LIMB(0x1a01a1c09d1b4dac) },\n  /* 165 */ { 8, 0.1357529539050150, CNST_LIMB(0x79fc8b6ae8a46e1), CNST_LIMB(0xc9eb0a8bebc8f3e) },\n  /* 166 */ { 8, 0.1355924953769863, CNST_LIMB(0x80072a66d512100), CNST_LIMB(0xffe357ff59e6a004) },\n  /* 167 */ { 8, 0.1354333760385373, CNST_LIMB(0x86546633b42b9c1), CNST_LIMB(0xe7dfd1be05fa61a8) },\n  /* 168 */ { 8, 0.1352755763596663, CNST_LIMB(0x8ce6b0861000000), CNST_LIMB(0xd11ed6fc78f760e5) },\n  /* 169 */ { 8, 0.1351190772136599, CNST_LIMB(0x93c08e16a022441), CNST_LIMB(0xbb8db609dd29ebfe) },\n  /* 170 */ { 8, 0.1349638598663645, CNST_LIMB(0x9ae49717f026100), CNST_LIMB(0xa71aec8d1813d532) },\n  /* 171 */ { 8, 0.1348099059658079, CNST_LIMB(0xa25577ae24c1a61), CNST_LIMB(0x93b612a9f20fbc02) },\n  /* 172 */ { 8, 0.1346571975321549, CNST_LIMB(0xaa15f068e610000), CNST_LIMB(0x814fc7b19a67d317) },\n  /* 173 */ { 8, 0.1345057169479844, CNST_LIMB(0xb228d6bf7577921), CNST_LIMB(0x6fd9a03f2e0a4b7c) },\n  /* 174 */ { 8, 0.1343554469488779, CNST_LIMB(0xba91158ef5c4100), CNST_LIMB(0x5f4615a38d0d316e) },\n  /* 175 */ { 8, 0.1342063706143054, CNST_LIMB(0xc351ad9aec0b681), CNST_LIMB(0x4f8876863479a286) },\n  /* 176 */ { 8, 0.1340584713587980, CNST_LIMB(0xcc6db6100000000), CNST_LIMB(0x4094d8a3041b60eb) },\n  /* 177 */ { 8, 0.1339117329233981, CNST_LIMB(0xd5e85d09025c181), CNST_LIMB(0x32600b8ed883a09b) },\n  /* 178 */ { 8, 0.1337661393673756, CNST_LIMB(0xdfc4e816401c100), CNST_LIMB(0x24df8c6eb4b6d1f1) },\n  /* 179 */ { 8, 0.1336216750601997, CNST_LIMB(0xea06b4c72947221), CNST_LIMB(0x18097a8ee151acef) },\n  /* 180 */ { 8, 0.1334783246737591, CNST_LIMB(0xf4b139365210000), CNST_LIMB(0xbd48cc8ec1cd8e3) },\n  /* 181 */ { 8, 0.1333360731748201, CNST_LIMB(0xffc80497d520961), CNST_LIMB(0x3807a8d67485fb) },\n  /* 182 */ { 8, 0.1331949058177136, CNST_LIMB(0x10b4ebfca1dee100), CNST_LIMB(0xea5768860b62e8d8) },\n  /* 183 */ { 8, 0.1330548081372441, CNST_LIMB(0x117492de921fc141), CNST_LIMB(0xd54faf5b635c5005) },\n  /* 184 */ { 8, 0.1329157659418126, CNST_LIMB(0x123bb2ce41000000), CNST_LIMB(0xc14a56233a377926) },\n  /* 185 */ { 8, 0.1327777653067443, CNST_LIMB(0x130a8b6157bdecc1), CNST_LIMB(0xae39a88db7cd329f) },\n  /* 186 */ { 8, 0.1326407925678156, CNST_LIMB(0x13e15dede0e8a100), CNST_LIMB(0x9c10bde69efa7ab6) },\n  /* 187 */ { 8, 0.1325048343149731, CNST_LIMB(0x14c06d941c0ca7e1), CNST_LIMB(0x8ac36c42a2836497) },\n  /* 188 */ { 8, 0.1323698773862368, CNST_LIMB(0x15a7ff487a810000), CNST_LIMB(0x7a463c8b84f5ef67) },\n  /* 189 */ { 8, 0.1322359088617821, CNST_LIMB(0x169859ddc5c697a1), CNST_LIMB(0x6a8e5f5ad090fd4b) },\n  /* 190 */ { 8, 0.1321029160581950, CNST_LIMB(0x1791c60f6fed0100), CNST_LIMB(0x5b91a2943596fc56) },\n  /* 191 */ { 8, 0.1319708865228925, CNST_LIMB(0x18948e8c0e6fba01), CNST_LIMB(0x4d4667b1c468e8f0) },\n  /* 192 */ { 8, 0.1318398080287045, CNST_LIMB(0x19a1000000000000), CNST_LIMB(0x3fa39ab547994daf) },\n  /* 193 */ { 8, 0.1317096685686114, CNST_LIMB(0x1ab769203dafc601), CNST_LIMB(0x32a0a9b2faee1e2a) },\n  /* 194 */ { 8, 0.1315804563506306, CNST_LIMB(0x1bd81ab557f30100), CNST_LIMB(0x26357ceac0e96962) },\n  /* 195 */ { 8, 0.1314521597928493, CNST_LIMB(0x1d0367a69fed1ba1), CNST_LIMB(0x1a5a6f65caa5859e) },\n  /* 196 */ { 8, 0.1313247675185968, CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86) },\n  /* 197 */ { 8, 0.1311982683517524, CNST_LIMB(0x1f7b2a18f29ac3e1), CNST_LIMB(0x4383340615612ca) },\n  /* 198 */ { 8, 0.1310726513121844, CNST_LIMB(0x20c850694c2aa100), CNST_LIMB(0xf3c77969ee4be5a2) },\n  /* 199 */ { 8, 0.1309479056113158, CNST_LIMB(0x222173cc014980c1), CNST_LIMB(0xe00993cc187c5ec9) },\n  /* 200 */ { 8, 0.1308240206478128, CNST_LIMB(0x2386f26fc1000000), CNST_LIMB(0xcd2b297d889bc2b6) },\n  /* 201 */ { 8, 0.1307009860033912, CNST_LIMB(0x24f92ce8af296d41), CNST_LIMB(0xbb214d5064862b22) },\n  /* 202 */ { 8, 0.1305787914387386, CNST_LIMB(0x2678863cd0ece100), CNST_LIMB(0xa9e1a7ca7ea10e20) },\n  /* 203 */ { 8, 0.1304574268895465, CNST_LIMB(0x280563f0a9472d61), CNST_LIMB(0x99626e72b39ea0cf) },\n  /* 204 */ { 8, 0.1303368824626505, CNST_LIMB(0x29a02e1406210000), CNST_LIMB(0x899a5ba9c13fafd9) },\n  /* 205 */ { 8, 0.1302171484322746, CNST_LIMB(0x2b494f4efe6d2e21), CNST_LIMB(0x7a80a705391e96ff) },\n  /* 206 */ { 8, 0.1300982152363760, CNST_LIMB(0x2d0134ef21cbc100), CNST_LIMB(0x6c0cfe23de23042a) },\n  /* 207 */ { 8, 0.1299800734730872, CNST_LIMB(0x2ec84ef4da2ef581), CNST_LIMB(0x5e377df359c944dd) },\n  /* 208 */ { 8, 0.1298627138972530, CNST_LIMB(0x309f102100000000), CNST_LIMB(0x50f8ac5fc8f53985) },\n  /* 209 */ { 8, 0.1297461274170591, CNST_LIMB(0x3285ee02a1420281), CNST_LIMB(0x44497266278e35b7) },\n  /* 210 */ { 8, 0.1296303050907487, CNST_LIMB(0x347d6104fc324100), CNST_LIMB(0x382316831f7ee175) },\n  /* 211 */ { 8, 0.1295152381234257, CNST_LIMB(0x3685e47dade53d21), CNST_LIMB(0x2c7f377833b8946e) },\n  /* 212 */ { 8, 0.1294009178639407, CNST_LIMB(0x389ff6bb15610000), CNST_LIMB(0x2157c761ab4163ef) },\n  /* 213 */ { 8, 0.1292873358018581, CNST_LIMB(0x3acc1912ebb57661), CNST_LIMB(0x16a7071803cc49a9) },\n  /* 214 */ { 8, 0.1291744835645007, CNST_LIMB(0x3d0acff111946100), CNST_LIMB(0xc6781d80f8224fc) },\n  /* 215 */ { 8, 0.1290623529140715, CNST_LIMB(0x3f5ca2e692eaf841), CNST_LIMB(0x294092d370a900b) },\n  /* 216 */ { 8, 0.1289509357448472, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295) },\n  /* 217 */ { 8, 0.1288402240804449, CNST_LIMB(0x443bcb714399a5c1), CNST_LIMB(0xe03b98f103fad6d2) },\n  /* 218 */ { 8, 0.1287302100711567, CNST_LIMB(0x46ca406c81af2100), CNST_LIMB(0xcee3d32cad2a9049) },\n  /* 219 */ { 8, 0.1286208859913518, CNST_LIMB(0x496e106ac22aaae1), CNST_LIMB(0xbe3f9df9277fdada) },\n  /* 220 */ { 8, 0.1285122442369443, CNST_LIMB(0x4c27d39fa5410000), CNST_LIMB(0xae46f0d94c05e933) },\n  /* 221 */ { 8, 0.1284042773229231, CNST_LIMB(0x4ef825c296e43ca1), CNST_LIMB(0x9ef2280fb437a33d) },\n  /* 222 */ { 8, 0.1282969778809442, CNST_LIMB(0x51dfa61f5ad88100), CNST_LIMB(0x9039ff426d3f284b) },\n  /* 223 */ { 8, 0.1281903386569819, CNST_LIMB(0x54def7a6d2f16901), CNST_LIMB(0x82178c6d6b51f8f4) },\n  /* 224 */ { 8, 0.1280843525090381, CNST_LIMB(0x57f6c10000000000), CNST_LIMB(0x74843b1ee4c1e053) },\n  /* 225 */ { 8, 0.1279790124049078, CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48) },\n  /* 226 */ { 8, 0.1278743114199984, CNST_LIMB(0x5e7268b9bbdf8100), CNST_LIMB(0x5af23c74f9ad9fe9) },\n  /* 227 */ { 8, 0.1277702427352035, CNST_LIMB(0x61d7a7932ff3d6a1), CNST_LIMB(0x4ee7eae2acdc617e) },\n  /* 228 */ { 8, 0.1276667996348261, CNST_LIMB(0x65581f53c8c10000), CNST_LIMB(0x43556aa2ac262a0b) },\n  /* 229 */ { 8, 0.1275639755045533, CNST_LIMB(0x68f48a385b8320e1), CNST_LIMB(0x3835949593b8ddd1) },\n  /* 230 */ { 8, 0.1274617638294791, CNST_LIMB(0x6cada69ed07c2100), CNST_LIMB(0x2d837fbe78458762) },\n  /* 231 */ { 8, 0.1273601581921741, CNST_LIMB(0x70843718cdbf27c1), CNST_LIMB(0x233a7e150a54a555) },\n  /* 232 */ { 8, 0.1272591522708010, CNST_LIMB(0x7479027ea1000000), CNST_LIMB(0x19561984a50ff8fe) },\n  /* 233 */ { 8, 0.1271587398372755, CNST_LIMB(0x788cd40268f39641), CNST_LIMB(0xfd211159fe3490f) },\n  /* 234 */ { 8, 0.1270589147554692, CNST_LIMB(0x7cc07b437ecf6100), CNST_LIMB(0x6aa563e655033e3) },\n  /* 235 */ { 8, 0.1269596709794558, CNST_LIMB(0x8114cc6220762061), CNST_LIMB(0xfbb614b3f2d3b14c) },\n  /* 236 */ { 8, 0.1268610025517973, CNST_LIMB(0x858aa0135be10000), CNST_LIMB(0xeac0f8837fb05773) },\n  /* 237 */ { 8, 0.1267629036018709, CNST_LIMB(0x8a22d3b53c54c321), CNST_LIMB(0xda6e4c10e8615ca5) },\n  /* 238 */ { 8, 0.1266653683442337, CNST_LIMB(0x8ede496339f34100), CNST_LIMB(0xcab755a8d01fa67f) },\n  /* 239 */ { 8, 0.1265683910770258, CNST_LIMB(0x93bde80aec3a1481), CNST_LIMB(0xbb95a9ae71aa3e0c) },\n  /* 240 */ { 8, 0.1264719661804097, CNST_LIMB(0x98c29b8100000000), CNST_LIMB(0xad0326c296b4f529) },\n  /* 241 */ { 8, 0.1263760881150453, CNST_LIMB(0x9ded549671832381), CNST_LIMB(0x9ef9f21eed31b7c1) },\n  /* 242 */ { 8, 0.1262807514205999, CNST_LIMB(0xa33f092e0b1ac100), CNST_LIMB(0x91747422be14b0b2) },\n  /* 243 */ { 8, 0.1261859507142915, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d) },\n  /* 244 */ { 8, 0.1260916806894653, CNST_LIMB(0xae5b564ac3a10000), CNST_LIMB(0x77df79e9a96c06f6) },\n  /* 245 */ { 8, 0.1259979361142023, CNST_LIMB(0xb427f4b3be74c361), CNST_LIMB(0x6bc6019636c7d0c2) },\n  /* 246 */ { 8, 0.1259047118299582, CNST_LIMB(0xba1f9a938041e100), CNST_LIMB(0x601c4205aebd9e47) },\n  /* 247 */ { 8, 0.1258120027502338, CNST_LIMB(0xc0435871d1110f41), CNST_LIMB(0x54ddc59756f05016) },\n  /* 248 */ { 8, 0.1257198038592741, CNST_LIMB(0xc694446f01000000), CNST_LIMB(0x4a0648979c838c18) },\n  /* 249 */ { 8, 0.1256281102107963, CNST_LIMB(0xcd137a5b57ac3ec1), CNST_LIMB(0x3f91b6e0bb3a053d) },\n  /* 250 */ { 8, 0.1255369169267456, CNST_LIMB(0xd3c21bcecceda100), CNST_LIMB(0x357c299a88ea76a5) },\n  /* 251 */ { 8, 0.1254462191960791, CNST_LIMB(0xdaa150410b788de1), CNST_LIMB(0x2bc1e517aecc56e3) },\n  /* 252 */ { 8, 0.1253560122735751, CNST_LIMB(0xe1b24521be010000), CNST_LIMB(0x225f56ceb3da9f5d) },\n  /* 253 */ { 8, 0.1252662914786691, CNST_LIMB(0xe8f62df12777c1a1), CNST_LIMB(0x1951136d53ad63ac) },\n  /* 254 */ { 8, 0.1251770521943144, CNST_LIMB(0xf06e445906fc0100), CNST_LIMB(0x1093d504b3cd7d93) },\n  /* 255 */ { 8, 0.1250882898658682, CNST_LIMB(0xf81bc845c81bf801), CNST_LIMB(0x824794d1ec1814f) },\n  /* 256 */ { 8, 0.1250000000000000, 0x8 },\n};\n\n#else\n#error no data available for this limb size in mp_bases.c\n#endif\n"
  },
  {
    "path": "mpn/generic/mul.c",
    "content": "/* mpn_mul -- Multiply two natural numbers.\n\n   THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul)\n   ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY SAFE TO REACH\n   THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED\n   THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\n\nCopyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005\nFree Software Foundation, Inc.\n\nCopyright William Hart 2009\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n#ifndef MUL_BASECASE_MAX_UN\n#define MUL_BASECASE_MAX_UN 500\n#endif\n\n/* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v\n   (pointed to by VP, with VN limbs), and store the result at PRODP.  The\n   result is UN + VN limbs.  Return the most significant limb of the result.\n\n   NOTE: The space pointed to by PRODP is overwritten before finished with U\n   and V, so overlap is an error.\n\n   Argument constraints:\n   1. UN >= VN.\n   2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from\n      the multiplier and the multiplicand.  */\n\nmp_limb_t\nmpn_mul (mp_ptr prodp,\n\t mp_srcptr up, mp_size_t un,\n\t mp_srcptr vp, mp_size_t vn)\n{\n  mp_size_t l, k;\n  mp_limb_t c;\n\n  ASSERT (un >= vn);\n  ASSERT (vn >= 1);\n  ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un));\n  ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn));\n\n  if (un == vn)\n   {\n    if (up == vp)\n    {\n      mpn_sqr (prodp, up, un);\n      return prodp[2 * un - 1];\n    }\n    else\n    {\n      mpn_mul_n (prodp, up, vp, un);\n      return prodp[2 * un - 1];\n    }\n   }\n\n  if (vn < MUL_KARATSUBA_THRESHOLD)\n    { /* plain schoolbook multiplication */\n      if (un <= MUL_BASECASE_MAX_UN)\n\tmpn_mul_basecase (prodp, up, un, vp, vn);\n      else\n\t{\n\t  /* We have un >> MUL_BASECASE_MAX_UN > vn.  For better memory\n\t     locality, split up[] into MUL_BASECASE_MAX_UN pieces and multiply\n\t     these pieces with the vp[] operand.  After each such partial\n\t     multiplication (but the last) we copy the most significant vn\n\t     limbs into a temporary buffer since that part would otherwise be\n\t     overwritten by the next multiplication.  After the next\n\t     multiplication, we add it back.  This illustrates the situation:\n\n                                                    -->vn<--\n                                                      |  |<------- un ------->|\n                                                         _____________________|\n                                                        X                    /|\n                                                      /XX__________________/  |\n                                    _____________________                     |\n                                   X                    /                     |\n                                 /XX__________________/                       |\n               _____________________                                          |\n              /                    /                                          |\n            /____________________/                                            |\n\t    ==================================================================\n\n\t    The parts marked with X are the parts whose sums are copied into\n\t    the temporary buffer.  */\n\n\t  mp_limb_t tp[MUL_KARATSUBA_THRESHOLD_LIMIT];\n\t  mp_limb_t cy;\n          ASSERT (MUL_KARATSUBA_THRESHOLD <= MUL_KARATSUBA_THRESHOLD_LIMIT);\n\n\t  mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);\n\t  prodp += MUL_BASECASE_MAX_UN;\n\t  MPN_COPY (tp, prodp, vn);\t\t/* preserve high triangle */\n\t  up += MUL_BASECASE_MAX_UN;\n\t  un -= MUL_BASECASE_MAX_UN;\n\t  while (un > MUL_BASECASE_MAX_UN)\n\t    {\n\t      mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);\n\t      cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */\n\t      mpn_incr_u (prodp + vn, cy);\t\t/* safe? */\n\t      prodp += MUL_BASECASE_MAX_UN;\n\t      MPN_COPY (tp, prodp, vn);\t\t/* preserve high triangle */\n\t      up += MUL_BASECASE_MAX_UN;\n\t      un -= MUL_BASECASE_MAX_UN;\n\t    }\n\t  if (un > vn)\n\t    {\n\t      mpn_mul_basecase (prodp, up, un, vp, vn);\n\t    }\n\t  else\n\t    {\n\t      ASSERT_ALWAYS (un > 0);\n\t      mpn_mul_basecase (prodp, vp, vn, up, un);\n\t    }\n\t  cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */\n\t  mpn_incr_u (prodp + vn, cy);\t\t/* safe? */\n\t}\n      return prodp[un + vn - 1];\n  }\n\n  if (ABOVE_THRESHOLD (un + vn, 2*MUL_FFT_FULL_THRESHOLD)\n      && ABOVE_THRESHOLD (3*vn, MUL_FFT_FULL_THRESHOLD))\n    {\n      mpn_mul_fft_main (prodp, up, un, vp, vn);\n      return prodp[un + vn - 1];\n    }\n\n  k = (un + 3)/4; // ceil(un/4)\n\n#if GMP_NUMB_BITS == 32\n  if ((ABOVE_THRESHOLD (un + vn, 2*MUL_TOOM8H_THRESHOLD)) && (vn>=86) && (5*un <= 11*vn))\n#else\n  if ((ABOVE_THRESHOLD (un + vn, 2*MUL_TOOM8H_THRESHOLD)) && (vn>=86) && (4*un <= 13*vn))\n#endif\n  {\n      mpn_toom8h_mul(prodp, up, un, vp, vn);\n      return prodp[un + vn - 1];\n  }\n  \n  if (ABOVE_THRESHOLD (un + vn, 2*MUL_TOOM4_THRESHOLD))\n  {\n          if (vn > 3*k)\n          {\n             mpn_toom4_mul(prodp, up, un, vp, vn);\n             return prodp[un + vn - 1];\n          } else\n          {\n             l = (un + 4)/5; // ceil(un/5)\n             if ((((vn > 9*k/4) && (un+vn <= 6*MUL_TOOM4_THRESHOLD)) \n                 || ((vn > 2*l) && (un+vn > 6*MUL_TOOM4_THRESHOLD)))\n                 && (vn <= 3*l))\n             {\n                mpn_toom53_mul(prodp, up, un, vp, vn);\n                return prodp[un + vn - 1];\n             }\n          }\n  } \n  \n  if (ABOVE_THRESHOLD (un + vn, 2*MUL_TOOM3_THRESHOLD) && (vn > k))\n  {\n          mp_ptr ws;\n          TMP_DECL;\n          TMP_MARK;\n\n          if (vn < 2*k) // un/2 >= vn > un/4\n          {\n                  ws = TMP_ALLOC_LIMBS (MPN_TOOM3_MUL_TSIZE(un));\n                  mpn_toom42_mul(prodp, up, un, vp, vn, ws);\n                  TMP_FREE;\n                  return prodp[un + vn - 1];\n          }\n\n          l = (un+2)/3; //ceil(u/3)\n          if (vn > 2*l) // un >= vn > 2un/3\n          {\n                  ws = TMP_ALLOC_LIMBS (MPN_TOOM3_MUL_TSIZE(un));\n                  mpn_toom3_mul(prodp, up, un, vp, vn, ws);\n                  TMP_FREE;\n                  return prodp[un + vn - 1];\n          } else // 2un/3 >= vn > un/3\n          {\n                  ws = TMP_ALLOC_LIMBS (MPN_TOOM3_MUL_TSIZE(un));\n                  mpn_toom32_mul(prodp, up, un, vp, vn, ws);\n                  TMP_FREE;\n                  return prodp[un + vn - 1];\n          }\n  }\n\n  mpn_mul_n (prodp, up, vp, vn);\n\n  if (un != vn)\n    { mp_limb_t t;\n      mp_ptr ws;\n      TMP_DECL;\n      TMP_MARK;\n\n      prodp += vn;\n      l = vn;\n      up += vn;\n      un -= vn;\n\n      if (un < vn)\n\t{\n\t  /* Swap u's and v's. */\n\t  MPN_SRCPTR_SWAP (up,un, vp,vn);\n\t}\n\n      ws = TMP_ALLOC_LIMBS ((vn >= MUL_KARATSUBA_THRESHOLD ? vn : un) + vn);\n\n      t = 0;\n      while (vn >= MUL_KARATSUBA_THRESHOLD)\n\t{\n\t  mpn_mul_n (ws, up, vp, vn);\n\t  if (l <= 2*vn)\n\t    {\n\t      t += mpn_add_n (prodp, prodp, ws, l);\n\t      if (l != 2*vn)\n\t\t{\n\t\t  t = mpn_add_1 (prodp + l, ws + l, 2*vn - l, t);\n\t\t  l = 2*vn;\n\t\t}\n\t    }\n\t  else\n\t    {\n\t      c = mpn_add_n (prodp, prodp, ws, 2*vn);\n\t      t += mpn_add_1 (prodp + 2*vn, prodp + 2*vn, l - 2*vn, c);\n\t    }\n\t  prodp += vn;\n\t  l -= vn;\n\t  up += vn;\n\t  un -= vn;\n\t  if (un < vn)\n\t    {\n\t      /* Swap u's and v's. */\n\t      MPN_SRCPTR_SWAP (up,un, vp,vn);\n\t    }\n\t\t}\n\n      if (vn != 0)\n\t{\n\t  mpn_mul_basecase (ws, up, un, vp, vn);\n\t  if (l <= un + vn)\n\t    {\n\t      t += mpn_add_n (prodp, prodp, ws, l);\n\t      if (l != un + vn)\n\t\tt = mpn_add_1 (prodp + l, ws + l, un + vn - l, t);\n\t    }\n\t  else\n\t    {\n\t      c = mpn_add_n (prodp, prodp, ws, un + vn);\n\t      t += mpn_add_1 (prodp + un + vn, prodp + un + vn, l - un - vn, c);\n\t    }\n\t}\n\n      TMP_FREE;\n  }\n\n  return prodp[un + vn - 1];\n}\n"
  },
  {
    "path": "mpn/generic/mul_1.c",
    "content": "/* mpn_mul_1 -- Multiply a limb vector with a single limb and store the\n   product in a second limb vector.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n#if GMP_NAIL_BITS == 0\n\nmp_limb_t\nmpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)\n{\n  mp_limb_t ul, cl, hpl, lpl;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));\n\n  cl = 0;\n  do\n    {\n      ul = *up++;\n      umul_ppmm (hpl, lpl, ul, vl);\n\n      lpl += cl;\n      cl = (lpl < cl) + hpl;\n\n      *rp++ = lpl;\n    }\n  while (--n != 0);\n\n  return cl;\n}\n\n#endif\n\n#if GMP_NAIL_BITS >= 1\n\nmp_limb_t\nmpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)\n{\n  mp_limb_t shifted_vl, ul, lpl, hpl, prev_hpl, xw, cl, xl;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));\n  ASSERT_MPN (up, n);\n  ASSERT_LIMB (vl);\n\n  shifted_vl = vl << GMP_NAIL_BITS;\n  cl = 0;\n  prev_hpl = 0;\n  do\n    {\n      ul = *up++;\n\n      umul_ppmm (hpl, lpl, ul, shifted_vl);\n      lpl >>= GMP_NAIL_BITS;\n      xw = prev_hpl + lpl + cl;\n      cl = xw >> GMP_NUMB_BITS;\n      xl = xw & GMP_NUMB_MASK;\n      *rp++ = xl;\n      prev_hpl = hpl;\n    }\n  while (--n != 0);\n\n  return prev_hpl + cl;\n}\n\n#endif\n"
  },
  {
    "path": "mpn/generic/mul_basecase.c",
    "content": "/* mpn_mul_basecase -- Internal routine to multiply two natural numbers\n   of length m and n.\n\n   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.\n\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Multiply {up,usize} by {vp,vsize} and write the result to\n   {prodp,usize+vsize}.  Must have usize>=vsize.\n\n   Note that prodp gets usize+vsize limbs stored, even if the actual result\n   only needs usize+vsize-1.\n\n   There's no good reason to call here with vsize>=MUL_KARATSUBA_THRESHOLD.\n   Currently this is allowed, but it might not be in the future.\n\n   This is the most critical code for multiplication.  All multiplies rely\n   on this, both small and huge.  Small ones arrive here immediately, huge\n   ones arrive here as this is the base case for Karatsuba's recursive\n   algorithm.  */\n\nvoid\nmpn_mul_basecase (mp_ptr rp,\n\t\t  mp_srcptr up, mp_size_t un,\n\t\t  mp_srcptr vp, mp_size_t vn)\n{\n  ASSERT (un >= vn);\n  ASSERT (vn >= 1);\n  ASSERT (! MPN_OVERLAP_P (rp, un+vn, up, un));\n  ASSERT (! MPN_OVERLAP_P (rp, un+vn, vp, vn));\n\n  /* We first multiply by the low order limb (or depending on optional function\n     availability, limbs).  This result can be stored, not added, to rp.  We\n     also avoid a loop for zeroing this way.  */\n\n#if HAVE_NATIVE_mpn_mul_2\n  if (vn >= 2)\n    {\n      rp[un + 1] = mpn_mul_2 (rp, up, un, vp);\n      rp += 2, vp += 2, vn -= 2;\n    }\n  else\n    {\n      rp[un] = mpn_mul_1 (rp, up, un, vp[0]);\n      return;\n    }\n#else\n  rp[un] = mpn_mul_1 (rp, up, un, vp[0]);\n  rp += 1, vp += 1, vn -= 1;\n#endif\n\n  /* Now accumulate the product of up[] and the next higher limb (or depending\n     on optional function availability, limbs) from vp[].  */\n\n#define MAX_LEFT MP_SIZE_T_MAX\t/* Used to simplify loops into if statements */\n\n\n#if HAVE_NATIVE_mpn_addmul_6\n  while (vn >= 6)\n    {\n      rp[un + 6 - 1] = mpn_addmul_6 (rp, up, un, vp);\n      if (MAX_LEFT == 6)\n\treturn;\n      rp += 6, vp += 6, vn -= 6;\n      if (MAX_LEFT < 2 * 6)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (6 - 1)\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_5\n  while (vn >= 5)\n    {\n      rp[un + 5 - 1] = mpn_addmul_5 (rp, up, un, vp);\n      if (MAX_LEFT == 5)\n\treturn;\n      rp += 5, vp += 5, vn -= 5;\n      if (MAX_LEFT < 2 * 5)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (5 - 1)\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_4\n  while (vn >= 4)\n    {\n      rp[un + 4 - 1] = mpn_addmul_4 (rp, up, un, vp);\n      if (MAX_LEFT == 4)\n\treturn;\n      rp += 4, vp += 4, vn -= 4;\n      if (MAX_LEFT < 2 * 4)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (4 - 1)\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_3\n  while (vn >= 3)\n    {\n      rp[un + 3 - 1] = mpn_addmul_3 (rp, up, un, vp);\n      if (MAX_LEFT == 3)\n\treturn;\n      rp += 3, vp += 3, vn -= 3;\n      if (MAX_LEFT < 2 * 3)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (3 - 1)\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_2\n  while (vn >= 2)\n    {\n      rp[un + 2 - 1] = mpn_addmul_2 (rp, up, un, vp);\n      if (MAX_LEFT == 2)\n\treturn;\n      rp += 2, vp += 2, vn -= 2;\n      if (MAX_LEFT < 2 * 2)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (2 - 1)\n#endif\n\n  while (vn >= 1)\n    {\n      rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);\n      if (MAX_LEFT == 1)\n\treturn;\n      rp += 1, vp += 1, vn -= 1;\n    }\n}\n"
  },
  {
    "path": "mpn/generic/mul_fft.c",
    "content": "\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint \nmpn_mul_fft(mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an,\n    mp_srcptr bp, mp_size_t bn, int k)\n{\n    mp_ptr rpp, app, bpp, tpp;\n    mp_size_t t = rn + 1;\n    TMP_DECL;\n    TMP_MARK;\n\n    rpp = (mp_ptr)TMP_ALLOC_LIMBS(t);\n    tpp = (mp_ptr)TMP_ALLOC_LIMBS(t);\n    app = (mp_ptr)TMP_ALLOC_LIMBS(t);\n    bpp = (mp_ptr)TMP_ALLOC_LIMBS(t);\n\n    mpn_copyi(app, ap, an); mpn_zero(app + an, t - an);\n    mpn_copyi(bpp, bp, bn); mpn_zero(bpp + bn, t - bn);\n\n    mpn_mulmod_Bexpp1(rpp, app, bpp, rn, tpp);\n    mpn_copyi(rp, rpp, rn);\n    t = rpp[rn];\n\n    TMP_FREE;\n    return t;\n}\n"
  },
  {
    "path": "mpn/generic/mul_n.c",
    "content": "/* mpn_mul_n and helper function -- Multiply/square natural numbers.\n\n   THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul_n)\n   ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY SAFE TO REACH\n   THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED\n   THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\n\nCopyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,\n2005, Free Software Foundation, Inc.\n\nCopyright 2009 Jason Moxham\nCopyright 2009 William Hart\nCopyright 2011 The Code Cavern\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n#if ! HAVE_NATIVE_mpn_karasub && HAVE_NATIVE_mpn_addsub_n\n\nstatic void\tmpn_karasub(mp_ptr rp, mp_ptr tp, mp_size_t n)\n{\n   mp_size_t n2, n3;\n   mp_limb_t c1 = 0, c2, c3, top[2];\n\n   n2 = n>>1;\n   n3 = n - n2;\n\n   c2 = mpn_addsub_n(tp, rp, rp + 2*n2, tp, 2*n2);\n   c3 = mpn_add_n(rp + n2, rp + n2, tp, 2*n2);\n\n   top[1] = rp[2*n2 + 2*n3 - 1];\n   top[0] = rp[2*n2 + 2*n3 - 2];\n\n   mpn_incr_u(rp + 3*n2, c3);\n\n   if (c2 == 1) mpn_incr_u(rp + 3*n2, 1);\n   if (c2 == -1) mpn_decr_u(rp + 3*n2, 1);\n\n   if (n2 == n3) \n      return;\n\n   c1=mpn_sub_n(rp + 3*n2, rp + 3*n2, tp + 2*n2, 2);\n   c2=mpn_add_n(rp + 3*n2, rp + 3*n2, top, 2);\n\n   if(c2 == 1 && c1 == 0) mpn_incr_u(rp + 3*n2 + 2, 1);\n   if(c2 == 0 && c1 == 1) mpn_decr_u(rp + 3*n2 + 2, 1);\n}\n#endif\n\n#if ! HAVE_NATIVE_mpn_karaadd && HAVE_NATIVE_mpn_addadd_n\n\nstatic void\tmpn_karaadd(mp_ptr rp, mp_ptr tp, mp_size_t n)\n{\n   mp_size_t n2, n3;\n   mp_limb_t c1 = 0, c2, c3;\n\n   n2 = n>>1;\n   n3 = n - n2;\n\n   c2 = mpn_addadd_n(tp, rp, rp + 2*n2, tp, 2*n2);\n\n   if (n3 != n2) c1 = mpn_add_n(tp + 2*n2, rp + 4*n2, tp + 2*n2, 2);\n\n   c3 = mpn_add_n(rp + n2, rp + n2, tp, 2*n3);\n\n   mpn_incr_u(rp + n2 + 2*n3, c3 + c1);\n   mpn_incr_u(rp + n2 + 2*n2, c2);\n}\n#endif\n\n#if ! HAVE_NATIVE_mpn_karasub && ! HAVE_NATIVE_mpn_addsub_n\n\nstatic void\tmpn_karasub(mp_ptr rp, mp_ptr tp, mp_size_t n)\n{\n   mp_size_t n2, n3;\n   mp_limb_t c1, c2, c3, top[2];\n\n   n2 = n>>1;\n   n3 = n - n2;\n\n   c1 = mpn_sub_n(tp, rp + 2*n2, tp, 2*n2);\n   c2 = mpn_add_n(tp, tp, rp, 2*n2);\n   c3 = mpn_add_n(rp + n2, rp + n2, tp, 2*n2);\n\n   top[1] = rp[2*n2 + 2*n3 - 1];\n   top[0] = rp[2*n2 + 2*n3 - 2];\n\n   mpn_incr_u(rp + 3*n2, c3);\n   mpn_incr_u(rp + 3*n2, c2);\n   mpn_decr_u(rp + 3*n2, c1);\n\n   if(n2 == n3)\n      return;\n\n   c1 = mpn_sub_n(rp + 3*n2, rp + 3*n2, tp + 2*n2, 2);\n   c2 = mpn_add_n(rp + 3*n2, rp + 3*n2, top, 2);\n\n   if(c2 == 1 && c1 == 0) mpn_incr_u(rp + 3*n2 + 2, 1);\n   if(c2 == 0 && c1 == 1) mpn_decr_u(rp + 3*n2 + 2, 1);\n}\n#endif\n\n#if ! HAVE_NATIVE_mpn_karaadd && ! HAVE_NATIVE_mpn_addadd_n\n\nstatic void\tmpn_karaadd(mp_ptr rp, mp_ptr tp, mp_size_t n)\n{\n   mp_size_t n2, n3;\n   mp_limb_t c1, c2, c3;\n\n   n2 = n>>1;\n   n3 = n - n2;\n\n   c1 = mpn_add_n(tp, rp + 2*n2, tp, 2*n3);\n   c2 = mpn_add_n(tp, tp, rp, 2*n2);\n   c3 = mpn_add_n(rp + n2, rp + n2, tp, 2*n3);\n   \n   mpn_incr_u(rp + n2 + 2*n3, c3 + c1);\n   mpn_incr_u(rp + n2 + 2*n2, c2);\n}\n#endif\n\n/* (rp, 2n) = (xp, n)*(yp, n) with temp space (tp, 2*n + C) */\nvoid\tmpn_kara_mul_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n, mp_ptr tp)\n{\n   mp_size_t n2, n3;\n   mp_srcptr xl, yl, xh, yh;\n   mp_ptr dx, dy;\n   int suboradd;\n   mp_limb_t c;\n\n   n2 = n>>1;\n   suboradd = -1;\n   xl = xp;\n   xh = xp + n2;\n   yl = yp;\n   yh = yp + n2;\n   n3 = n - n2;\n   dx = rp + 2*n2;\n   dy = dx + n3;\n\n   if ((n&1) == 0)\n   {\n      if (mpn_cmp(xh, xl, n2) >= 0)\n         mpn_sub_n(dx, xh, xl, n2);\n      else\n      {\n         mpn_sub_n(dx, xl, xh, n2);\n         suboradd = -suboradd;\n      }\n      \n      if (mpn_cmp(yh, yl, n2) >= 0)\n         mpn_sub_n(dy, yh, yl, n2);\n      else\n      {\n         mpn_sub_n(dy, yl, yh, n2);\n         suboradd = -suboradd;\n      }\n   }\n   else\n   {\n      if (xh[n2] !=0 || mpn_cmp(xh, xl, n2) >= 0)\n      {\n         c = mpn_sub_n(dx, xh, xl, n2);\n         dx[n2] = xh[n2] - c;\n      }\n      else\n      {\n         mpn_sub_n(dx, xl, xh, n2);\n         dx[n2] = 0;\n         suboradd = -suboradd;\n      }\n      \n      if (yh[n2] != 0 || mpn_cmp(yh, yl, n2) >= 0)\n      {\n         c = mpn_sub_n(dy, yh, yl, n2);\n         dy[n2] = yh[n2] - c;\n      }\n      else\n      {\n         mpn_sub_n(dy, yl, yh, n2);\n         dy[n2] = 0;\n         suboradd = -suboradd;\n      }\n   }\n\n   if (BELOW_THRESHOLD(n3, MUL_KARATSUBA_THRESHOLD))\n   {\n      mpn_mul_basecase(rp, xl, n2, yl, n2);\n      mpn_mul_basecase(tp, dx, n3, dy, n3);\n      mpn_mul_basecase(rp + 2*n2, xh, n3, yh, n3);\n   }\n   else\n   {\n      mpn_kara_mul_n(rp, xl, yl, n2, tp + 2*n3);\n      mpn_kara_mul_n(tp, dx, dy, n3, tp + 2*n3);   \n      mpn_kara_mul_n(rp + 2*n2, xh, yh, n3, tp + 2*n3);\n   }\n\n   if (suboradd == -1)\n      mpn_karasub(rp, tp, n);\n   else\n      mpn_karaadd(rp, tp, n);\n}\n\n/* (rp, 2n) = (xp, n)^2 with temp space (tp, 2*n + C) */\nvoid mpn_kara_sqr_n(mp_ptr rp, mp_srcptr xp, mp_size_t n, mp_ptr tp)\n{\n   mp_size_t n2, n3;\n   mp_srcptr xl, xh;\n   mp_ptr dx;\n   mp_limb_t c;\n\n   n2 = n>>1;\n   xl = xp;\n   xh = xp + n2;\n   n3 = n - n2;\n   dx = rp + 2*n2;\n\n   if ((n&1) == 0)\n   {\n      if (mpn_cmp(xh, xl, n2) >=0)\n         mpn_sub_n(dx, xh, xl, n2);\n      else\n         mpn_sub_n(dx, xl, xh, n2);\n   }\n   else\n   {\n      if (xh[n2] != 0 || mpn_cmp(xh, xl, n2) >= 0)\n      {\n         c = mpn_sub_n(dx, xh, xl, n2);\n         dx[n2] = xh[n2] - c;\n      }\n      else\n      {\n         mpn_sub_n(dx, xl, xh, n2);\n         dx[n2] = 0;\n      }\n   }\n\n   if (BELOW_THRESHOLD(n3, SQR_BASECASE_THRESHOLD))\n   {\n      mpn_mul_basecase(rp, xl, n2, xl, n2);\n      mpn_mul_basecase(tp, dx, n3, dx, n3);\n      mpn_mul_basecase(rp + 2*n2, xh, n3, xh, n3);\n   }\n   else if (BELOW_THRESHOLD(n3, SQR_KARATSUBA_THRESHOLD))\n   {\n      mpn_sqr_basecase(rp, xl, n2);\n      mpn_sqr_basecase(tp, dx, n3);\n      mpn_sqr_basecase(rp + 2*n2, xh, n3);\n   }\n   else\n   {\n      mpn_kara_sqr_n(rp, xl, n2, tp + 2*n3);\n      mpn_kara_sqr_n(tp, dx, n3, tp + 2*n3);   \n      mpn_kara_sqr_n(rp + 2*n2, xh, n3, tp + 2*n3);\n   }\n\n   mpn_karasub(rp, tp, n);\n}\n\nvoid\nmpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)\n{\n  ASSERT (n >= 1);\n  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));\n  ASSERT (! MPN_OVERLAP_P (p, 2 * n, b, n));\n\n  if (BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))\n    {\n      mpn_mul_basecase (p, a, n, b, n);\n    }\n  else if (BELOW_THRESHOLD (n, MUL_TOOM3_THRESHOLD))\n    {\n      /* Allocate workspace of fixed size on stack: fast! */\n      mp_limb_t ws[MPN_KARA_MUL_N_TSIZE (MUL_TOOM3_THRESHOLD_LIMIT-1)];\n      ASSERT (MUL_TOOM3_THRESHOLD <= MUL_TOOM3_THRESHOLD_LIMIT);\n      mpn_kara_mul_n (p, a, b, n, ws);\n    }\n  else if (BELOW_THRESHOLD (n, MUL_TOOM4_THRESHOLD))\n    {\n      mp_ptr ws;\n      TMP_SDECL;\n      TMP_SMARK;\n      ws = TMP_SALLOC_LIMBS (MPN_TOOM3_MUL_N_TSIZE (n));\n      mpn_toom3_mul_n (p, a, b, n, ws);\n      TMP_SFREE;\n    }\n  else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD))\n    {\n       mpn_toom4_mul_n (p, a, b, n);\n    }\n#if WANT_FFT || TUNE_PROGRAM_BUILD\n  else if (BELOW_THRESHOLD (n, MUL_FFT_FULL_THRESHOLD))\n    {\n       mpn_toom8h_mul (p, a, n, b, n);\n    }\n#endif\n  else\n#if WANT_FFT || TUNE_PROGRAM_BUILD\n    {\n       mpn_mul_fft_main(p, a, n, b, n); \n    }\n#else\n    {\n      /* Toom8 for large operands. */\n      mpn_toom8h_mul (p, a, n, b, n);\n    }\n#endif\n}\n\nvoid\nmpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n)\n{\n  ASSERT (n >= 1);\n  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));\n\n#if 0\n  /* FIXME: Can this be removed? */\n  if (n == 0)\n    return;\n#endif\n\n  if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))\n    { \n      /* mul_basecase is faster than sqr_basecase on small sizes sometimes */\n      mpn_mul_basecase (p, a, n, a, n);\n    }\n  else if (BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))\n    {\n      mpn_sqr_basecase (p, a, n);\n    }\n  else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))\n    {\n      /* Allocate workspace of fixed size on stack: fast! */\n      mp_limb_t ws[MPN_KARA_SQR_N_TSIZE (SQR_TOOM3_THRESHOLD_LIMIT-1)];\n      ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);\n      mpn_kara_sqr_n (p, a, n, ws);\n    }\n  else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))\n    {\n      mp_ptr ws;\n      TMP_SDECL;\n      TMP_SMARK;\n      ws = TMP_SALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (n));\n      mpn_toom3_sqr_n (p, a, n, ws);\n      TMP_SFREE;\n    }\n  else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))\n    {\n       mpn_toom4_sqr_n (p, a, n);\n    }\n#if WANT_FFT || TUNE_PROGRAM_BUILD\n  else if (BELOW_THRESHOLD (n, SQR_FFT_FULL_THRESHOLD))\n#else\n  else \n#endif\n    {\n       mpn_toom8_sqr_n (p, a, n);\n    }\n#if WANT_FFT || TUNE_PROGRAM_BUILD\n  else\n    {\n       mpn_mul_fft_main(p, a, n, a, n); \n    }\n#endif\n}\n"
  },
  {
    "path": "mpn/generic/mulhigh_n.c",
    "content": "/* mpn_mulhigh_n\n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/*\n   Let X = sum over 0 <= i < n of x[i]B^i\n   Let Y = sum over 0 <= i < n of y[i]B^i\n\n   Define the usual multiplication as \n\n   XY = sum over 0 <= i < n, 0 <= j < n, x[i]y[j]B^(i + j)\n\n   Define short product as\n\n   XY_k = sum over i + j >= k, x[i]y[j]B^(i + j)\n\n   and approx short product as a superset of short product and subset of usual product\n\n   Now consider the usual product XY \n\n   XY = sum over {0 <= i < n, 0 <= j < n}  x[i]y[j]B^(i+j)     \n   \n   from now we just show the sum bounds with these implicit limits on i and j\n\n   = {0 <= i < n, 0 <= j < n}\n\n   split into four pieces (requires 0 <= m <= n)\n\n   = {i < n - m, j < n - m} {i >= n-m, j >= n - m} \n     {i < n - m, j >= n - m} {i >= n - m, j < n - m}\n\n   split last two pieces again (requires n - m <= m - 1)\n\n   = {i < n - m, j < n - m} {i >= n - m, j >= n - m} {i < n - m, n - m <= j < m} \n     {i < n - m, m <= j} {n - m <= i < m, j < n - m} {m <= i, j < n - m}\n\n   rearrange\n\n   = {i < n - m, j < n - m} {i >= n - m, j >= n - m} {i < n - m, m <= j} \n     {m <= i, j < n - m} {i < n - m, n - m <= j < m} { n - m <= i < m, j < n - m}\n\n  split last two again (requires n - m <= m - 2)\n\n  = {i < n - m, j < n - m} {i >= n - m, j >= n - m} {i < n - m, m <= j}\n    {m <= i, j < n - m} {i < n - m, n - m <= j <= m - 2} {i < n - m, m - 2 < j < m}    \n    {n - m <= i <= m-2, j < n - m} {m - 2 < i < m, j < n - m}\n\n  rearrange\n\n  = {i < n - m, j < n - m} {i >= n - m, j >= n - m} {i < n - m, m <= j} \n    {m <= i, j < n - m} {i < n - m, n - m <= j <= m - 2} {n - m <= i <= m - 2, j < n - m}             {i<n-m,j=m-1}                    {i=m-1,j<n-m}\n\n  split last two again\n\n  = {i < n - m, j < n - m} {i >= n - m, j >= n - m}\n    {i < n - m, m <= j} {m <= i, j < n - m} {i < n - m, n - m <= j <= m - 2} \n    {n - m <= i <= m - 2, j < n - m} {i < n - m - 1, j = m - 1} \n    {i = n - m - 1, j = m - 1} {i = m - 1, j < n - m - 1} {i = m - 1, j = n - m - 1}\n\n  Now choose any m such that n + 2 <= 2m, m <= n \n  so n - m <= m - 2 so our requirements above are satisfied \n\n  Now consider the short product with k = n - 2, so we discard those \n  with i + j < k = n - 2 \n  \n  = {i < n - m, j < n - m}, i + j <= 2(n - m) - 2 \n  \n  as n + 2 <= 2m, so n < 2m so 2n < 2m + n so 2n - 2m < n so i + j < n - 2 = k     \n  so empty\n \n  {i >= n - m, j >= n - m}, i + j >= 2(n - m) keep most\n  {i < n - m, m <= j}, keep some\n  {m <= i, j < n - m}, keep some\n  {i < n - m, n - m <= j <= m - 2}, i + j <= n - m - 1 + m - 2 = n - 3 < n - 2 = k, empty\n  {n - m <= i <= m - 2, j < n - m}, i + j <= n - m - 1 + m - 2 = n - 3 < n - 2 = k, empty\n  {i < n - m - 1, j = m - 1}, i + j <= n - m - 2 + m - 1 = n - 3 < n - 2 = k, empty\n  {i = n - m - 1, j = m - 1},\ti + j = n - m - 1 + m - 1 = n - 2 = k, keep all\n  {i = m - 1, j < n - m - 1}, i + j <= m - 1 + n - m - 2 = n - 3 < n - 2 = k, empty\n  {i = m - 1, j = n - m - 1},\ti + j = m - 1 + n - m - 1 = n - 2 = k, keep all\n \n  so the approx short product XY_k is \n  \n  {i >= n - m, j >= n - m} {i < n - m, m <= j} \n  {m <= i, j < n - m} {i = n - m - 1, j = m - 1} {i = m - 1, j = n - m - 1}\n\n  Now for {i < n - m, m <= j} with i + j > = k = n - 2, let u = i, v = j - m  \n  so we have {0 <= u < n - m, 0 <= v < n - m} with u + v >= n - m - 2\n  which is the same short product \n\n  Summary\n  -----------\n \n  Given n digit xp and yp, \n  define mulshort_n(xp,yp,n) to be sum \n  \n  {i + j >= n - 2, and perhaps some i + j < n - 2} xp[i]yp[j]B^(i+j)\n  choose m such that n+2 <= 2m and m < n then from above  \n  \n  mulshort_n(xp, yp, n) = mul(xp + n - m, yp + n - m, m)B^(2n - 2m)\n                   + mulshort_n(xp + m,yp, n - m)B^m\n                   + mulshort_n(xp, yp + m, n - m)B^m\n                   + xp[n - m - 1]yp[m - 1]B^(n - 2)\n                   + xp[m - 1]yp[n - m - 1]B^(n - 2)\n\n  and clearly when summing the above we can ignore any products from i + j < n - 2\n\n  Theorem\n\n  Let (zp, 2n) = mulshort_n(xp, yp, n) \n  if zp[n - 1] + n - 2 < B then mulhigh_n(xp, yp, n) = (zp, 2n) \n*/\n\n/* (rp, 2n) = (xp, n)*(yp, n) / B^n */ \ninline static void\nmpn_mulshort_n_basecase(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)\n{\n  mp_size_t i, k;\n\n  ASSERT(n >= 3);  /* this restriction doesn't make a lot of sense in general */\n  ASSERT_MPN(xp, n);\n  ASSERT_MPN(yp, n);\n  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n));\n  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n));\n\n  k = n - 2; /* so want short product sum_(i + j >= k) x[i]y[j]B^(i + j) */\n  i = 0;\n\n  /* Multiply w limbs from y + i to (2 + i + w - 1) limbs from x + (n - 2 - i - w + 1)\n     and put it into r + (n - 2 - w + 1), \"overflow\" (i.e. last) limb into\n     r + (n + w - 1) for i between 0 and n - 2.\n     i == n - w needs special treatment. */\n\n  /* We first multiply by the low order limb (or depending on optional function\n     availability, limbs).  This result can be stored, not added, to rp.  We\n     also avoid a loop for zeroing this way.  */\n\n#if HAVE_NATIVE_mpn_mul_2\n  rp[n + 1] = mpn_mul_2 (rp + k - 1, xp + k - 1, 2 + 1, yp);\n  i += 2;\n#else\n  rp[n] = mpn_mul_1 (rp + k, xp + k, 2, yp[0]);\n  i += 1;\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_6\n  while (i < n - 6)\n    {\n      rp[n + i + 6 - 1] = mpn_addmul_6 (rp + k - 6 + 1, xp + k - i - 6 + 1, 2 + i + 6 - 1, yp + i);\n      i += 6;\n    }\n  if (i == n - 6)\n    {\n      rp[n + n - 1] = mpn_addmul_6 (rp + i, xp, n, yp + i);\n      return;\n    }\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_5\n  while (i < n - 5)\n    {\n      rp[n + i + 5 - 1] = mpn_addmul_5 (rp + k - 5 + 1, xp + k - i - 5 + 1, 2 + i + 5 - 1, yp + i)\n      i += 5;\n    }\n  if (i == n - 5)\n    {\n      rp[n + n - 1] = mpn_addmul_5 (rp + i, xp, n, yp + i);\n      return;\n    }\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_4\n  while (i < n - 4)\n    {\n      rp[n + i + 4 - 1] = mpn_addmul_4 (rp + k - 4 + 1, xp + k - i - 4 + 1, 2 + i + 4 - 1, yp + i);\n      i += 4;\n    }\n  if (i == n - 4)\n    {\n      rp[n + n - 1] = mpn_addmul_4 (rp + i, xp, n, yp + i);\n      return;\n    }\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_3\n  while (i < n - 3)\n    {\n      rp[n + i + 3 - 1] = mpn_addmul_3 (rp + k - 3 + 1, xp + k - i - 3 + 1, 2 + i + 3 - 1, yp + i);\n      i += 3;\n    }\n  if (i == n - 3)\n    {\n      rp[n + n - 1] = mpn_addmul_3 (rp + i, xp, n, yp + i);\n      return;\n    }\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_2\n  while (i < n - 2)\n    {\n      rp[n + i + 2 - 1] = mpn_addmul_2 (rp + k - 2 + 1, xp + k - i - 2 + 1, 2 + i + 2 - 1, yp + i);\n      i += 2;\n    }\n  if (i == n - 2)\n    {\n      rp[n + n - 1] = mpn_addmul_2 (rp + i, xp, n, yp + i);\n      return;\n    }\n#endif\n\n  while (i < n - 1)\n    {\n      rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]);\n      i += 1;\n    }\n  rp[n + n - 1] = mpn_addmul_1 (rp + i, xp, n, yp[i]);\n  return;\n}\n\n/* (rp, 2n) = (xp, n)*(yp, n) */\nstatic void\nmpn_mulshort_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)\n{\n  mp_size_t m;\n  mp_limb_t t;\n  mp_ptr rpn2;\n\n  ASSERT(n >= 1);\n  ASSERT_MPN(xp, n);\n  ASSERT_MPN(yp, n);\n  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n));\n  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n));\n  \n  if (BELOW_THRESHOLD(n, MULHIGH_BASECASE_THRESHOLD))\n    {\n      mpn_mul_basecase(rp, xp, n, yp, n);\n      \n      return;\n    }\n\n  if (BELOW_THRESHOLD (n, MULHIGH_DC_THRESHOLD))\n    {\n      mpn_mulshort_n_basecase(rp, xp, yp, n);\n      \n      return;\n    }\n\n  /* choose optimal m s.t. n + 2 <= 2m,  m < n */\n  ASSERT (n >= 4);\n\n  m = 87 * n / 128;\n  \n  if (2 * m < n + 2)\n    m = (n + 1) / 2 + 1;\n  \n  if (m >= n)\n    m = n - 1;\n  \n  ASSERT (n + 2 <= 2 * m);\n  ASSERT (m < n);\n  \n  rpn2 = rp + n - 2;\n  \n  mpn_mul_n (rp + n - m + n - m, xp + n - m, yp + n - m, m);\n  mpn_mulshort_n (rp, xp, yp + m, n - m);\n\n  ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2));\n  \n  mpn_mulshort_n (rp, xp + m, yp, n - m);\n  \n  ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2));\n  \n  umul_ppmm (rp[1], t, xp[m - 1], yp[n - m - 1] << GMP_NAIL_BITS);\n  rp[0] = t >> GMP_NAIL_BITS;\n  \n  ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2));\n  \n  umul_ppmm (rp[1], t, xp[n - m - 1], yp[m - 1] << GMP_NAIL_BITS);\n  rp[0] = t >> GMP_NAIL_BITS;\n  \n  ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2));\n  \n  return;\n}\n\n/* (rp, 2n) = (xp, n)*(yp, n) */\nvoid\nmpn_mulhigh_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)\n{\n  mp_limb_t t;\n\n  ASSERT(n > 0);\n  ASSERT_MPN(xp, n);\n  ASSERT_MPN(yp, n);\n  ASSERT(!MPN_OVERLAP_P(rp, 2 * n, xp, n));\n  ASSERT(!MPN_OVERLAP_P(rp, 2 * n, yp, n));\n  \n  if (BELOW_THRESHOLD(n, MULHIGH_BASECASE_THRESHOLD))\n    {\n      mpn_mul_basecase(rp, xp, n, yp, n);\n      \n      return;\n    }\n  \n  if (ABOVE_THRESHOLD (n, MULHIGH_MUL_THRESHOLD))\n    {\n      mpn_mul_n(rp, xp, yp, n);\n      \n      return;\n    }\n\n  mpn_mulshort_n(rp, xp, yp, n);\n  t = rp[n - 1] + n - 2;\n  \n  if (UNLIKELY(t < n - 2))\n    mpn_mul_n(rp, xp, yp, n);\n  \n  return;\n}\n"
  },
  {
    "path": "mpn/generic/mullow_basecase.c",
    "content": "/* \nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/*\n  FIXME: Should use mpn_addmul_2 (and higher).\n*/\n\n/* (rp, xn + yn) = (xp, xn)*(yp, yn) mod B^n */\nvoid\nmpn_mullow_basecase (mp_ptr rp, mp_srcptr xp, mp_size_t xn, mp_srcptr yp,\n\t\t     mp_size_t yn, mp_size_t n)\n{\n  mp_limb_t c;\n  mp_size_t i;\n\n  /* \n     want some sort of threshold to call mpn_mul() etc \n     as for the mpn_mullow_n cases\n  */\n  ASSERT(0 < yn);\n  ASSERT(yn <= xn);\n  ASSERT(xn <= n);\n  ASSERT(n <= xn + yn);\n  ASSERT_MPN(xp, xn);\n  ASSERT_MPN(yp, yn);\n  ASSERT(!MPN_OVERLAP_P(rp, xn + yn, xp, xn));\n  ASSERT(!MPN_OVERLAP_P(rp, xn + yn, yp, yn));\n  \n  rp[xn] = mpn_mul_1(rp, xp, xn, yp[0]);\n  \n  for (i = 1; i <= n - xn && i < yn; i++)\n    rp[xn + i] = mpn_addmul_1(rp + i, xp, xn, yp[i]);\n  \n  for ( ; i > n - xn && i < yn; i++)\n    mpn_addmul_1(rp + i, xp, n - i, yp[i]);\n\n  return;\n}\n"
  },
  {
    "path": "mpn/generic/mullow_n.c",
    "content": "/* mpn_mullow_n -- multiply two n-limb nunbers and return the low n limbs\n   of their products.\n            \n Copyright 2004, 2005 Free Software Foundation, Inc.\n Copyright 2009 Jason Moxham\n           \n This file is part of the GNU MP Library.\n            \n The GNU MP Library is free software; you can redistribute it and/or modify\n it under the terms of the GNU Lesser General Public License as published by\n the Free Software Foundation; either version 2.1 of the License, or (at your\n option) any later version.\n            \n The GNU MP Library is distributed in the hope that it will be useful, but\n WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n License for more details.\n           \n You should have received a copy of the GNU Lesser General Public License\n along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n MA 02110-1301, USA. */\n\n /*  Copyright 2008 Complete rewrite  Only the name is the same\n    Note: sets 2n limbs \n\n  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpn_mullow_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)\n{\n  mp_size_t m;\n\n  ASSERT (n > 0);\n  ASSERT_MPN (xp, n);\n  ASSERT_MPN (yp, n);\n  ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n));\n  ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n));\n\n  if (BELOW_THRESHOLD (n, MULLOW_BASECASE_THRESHOLD))\n    {\n      mpn_mul_basecase (rp, xp, n, yp, n);\n      return;\n    }\n\n  if (BELOW_THRESHOLD (n, MULLOW_DC_THRESHOLD))\n    {\n      mpn_mullow_n_basecase (rp, xp, yp, n);\n      return;\n    }\n  \n  if (ABOVE_THRESHOLD (n, MULLOW_MUL_THRESHOLD))\n    {\n      mpn_mul_n (rp, xp, yp, n);\n      return;\n    }\n\n  /* choose optimal m st n/2 <= m <= n, choosing m == n is same as above */\n  m = n * 87 / 128;\n\n  if (2 * m < n)\n    m = n - n / 2;\n  \n  if (m > n)\n    m = n;\n  \n  ASSERT (n / 2 <= m);\n  ASSERT (m <= n);\n  \n  mpn_mul_n(rp, xp, yp, m);\n  mpn_mullow_n(rp + 2 * m, xp, yp + m, n - m);\n  mpn_add_n(rp + m, rp + m, rp + 2 * m, n - m);\n  mpn_mullow_n(rp + 2 * m, xp + m, yp, n - m);\n  mpn_add_n(rp + m, rp + m, rp + 2 * m, n - m);\n  \n  return;\n}\n"
  },
  {
    "path": "mpn/generic/mullow_n_basecase.c",
    "content": "/* mpn_mullow_n_basecase -- Internal routine to multiply two natural\n   numbers of length n and return the low part.\n\n   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.\n\n\nCopyright (C) 2000, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/*\n  FIXME: Should this function also output the two limbs overflow?\n*/\n\nvoid\nmpn_mullow_n_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  mp_size_t i;\n\n  ASSERT (n >= 1);\n  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));\n  ASSERT (! MPN_OVERLAP_P (rp, 2*n, vp, n));\n\n  /* We first multiply by the low order limb (or depending on optional function\n     availability, limbs).  This result can be stored, not added, to rp.  We\n     also avoid a loop for zeroing this way.  */\n\n#if HAVE_NATIVE_mpn_mul_2\n  if (n >= 2)\n    {\n      mpn_mul_2 (rp, up, n, vp);\n      rp += 2, vp += 2, n -= 2;\n    }\n  else\n    {\n      mpn_mul_1 (rp, up, n, vp[0]);\n      return;\n    }\n#else\n  mpn_mul_1 (rp, up, n, vp[0]);\n  rp += 1, vp += 1, n -= 1;\n#endif\n\n  /* Now accumulate the product of up[] and the next higher limb (or depending\n     on optional function availability, limbs) from vp[].  */\n\n#define MAX_LEFT MP_SIZE_T_MAX\t/* Used to simplify loops into if statements */\n\n\n#if HAVE_NATIVE_mpn_addmul_6\n  while (n >= 6)\n    {\n      mpn_addmul_6 (rp, up, n, vp);\n      if (MAX_LEFT == 6)\n\treturn;\n      rp += 6, vp += 6, n -= 6;\n      if (MAX_LEFT < 2 * 6)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (6 - 1)\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_5\n  while (n >= 5)\n    {\n      mpn_addmul_5 (rp, up, n, vp);\n      if (MAX_LEFT == 5)\n\treturn;\n      rp += 5, vp += 5, n -= 5;\n      if (MAX_LEFT < 2 * 5)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (5 - 1)\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_4\n  while (n >= 4)\n    {\n      mpn_addmul_4 (rp, up, n, vp);\n      if (MAX_LEFT == 4)\n\treturn;\n      rp += 4, vp += 4, n -= 4;\n      if (MAX_LEFT < 2 * 4)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (4 - 1)\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_3\n  while (n >= 3)\n    {\n      mpn_addmul_3 (rp, up, n, vp);\n      if (MAX_LEFT == 3)\n\treturn;\n      rp += 3, vp += 3, n -= 3;\n      if (MAX_LEFT < 2 * 3)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (3 - 1)\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_2\n  while (n >= 2)\n    {\n      mpn_addmul_2 (rp, up, n, vp);\n      if (MAX_LEFT == 2)\n\treturn;\n      rp += 2, vp += 2, n -= 2;\n      if (MAX_LEFT < 2 * 2)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (2 - 1)\n#endif\n\n  while (n >= 1)\n    {\n      mpn_addmul_1 (rp, up, n, vp[0]);\n      if (MAX_LEFT == 1)\n\treturn;\n      rp += 1, vp += 1, n -= 1;\n    }\n\n}\n"
  },
  {
    "path": "mpn/generic/mulmid.c",
    "content": "/* mpn_mulmid -- middle product\n\nCopyright (C) 2009, David Harvey\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n#define CHUNK (200 + MULMID_TOOM42_THRESHOLD)\n\n/* Let a = sum_0^{m-1} a_i B^i and b = sum_0^{n-1} b_j B^j\n\n   then MP(a, m, b, n) = sum_{0<=i<m, 0<=j<n, n-1<=i+j<=m-1} a_ib_j B^{i+j-n+1}\n\n   Note there are m-n+1 different values of i+j and each product a_ib_j will be two limbs. Thus when added together, the sum must take up n-m+3 limbs of space.\n\n   This function computes MP(ap,an,bp,bn), placing the result in {rp, an-bn+3}.\n   \n   It is required that bn << GMP_NUMBMAX.\n*/\n\nvoid\nmpn_mulmid (mp_ptr rp,\n            mp_srcptr ap, mp_size_t an,\n            mp_srcptr bp, mp_size_t bn)\n{\n  mp_size_t rn, k;\n  mp_ptr scratch, temp;\n\n  ASSERT (an >= bn);\n  ASSERT (bn >= 1);\n  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));\n  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));\n\n  if (bn < MULMID_TOOM42_THRESHOLD)\n    {\n      /* region not tall enough to make toom42 worthwhile for any portion */\n\n      if (an < CHUNK)\n\t{\n\t  /* region not too wide either, just call basecase directly */\n\t  mpn_mulmid_basecase (rp, ap, an, bp, bn);\n\t  return;\n\t}\n\n      /* Region quite wide. For better locality, use basecase on chunks:\n\n\t AAABBBCC..\n\t .AAABBBCC.\n\t ..AAABBBCC\n      */\n\n      k = CHUNK - bn + 1;    /* number of diagonals per chunk */\n\n      /* first chunk (marked A in the above diagram) */\n      mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);\n\n      /* remaining chunks (B, C, etc) */\n      an -= k;\n\n      while (an >= CHUNK)\n\t{\n\t  mp_limb_t t0, t1, cy;\n\t  ap += k, rp += k;\n\t  t0 = rp[0], t1 = rp[1];\n\t  mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);\n\t  ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */\n\t  MPN_INCR_U (rp + 1, k + 1, t1 + cy);\n\t  an -= k;\n\t}\n\n      if (an >= bn)\n\t{\n\t  /* last remaining chunk */\n\t  mp_limb_t t0, t1, cy;\n\t  ap += k, rp += k;\n\t  t0 = rp[0], t1 = rp[1];\n\t  mpn_mulmid_basecase (rp, ap, an, bp, bn);\n\t  ADDC_LIMB (cy, rp[0], rp[0], t0);\n\t  MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);\n\t}\n\n      return;\n    }\n\n  /* region is tall enough for toom42 */\n\n  rn = an - bn + 1;\n\n  if (rn < MULMID_TOOM42_THRESHOLD)\n    {\n      /* region not wide enough to make toom42 worthwhile for any portion */\n\n      if (bn < CHUNK)\n\t{\n\t  /* region not too tall either, just call basecase directly */\n\t  mpn_mulmid_basecase (rp, ap, an, bp, bn);\n\t  return;\n\t}\n\n      /* Region quite tall. For better locality, use basecase on chunks:\n\n\t AAAAA....\n\t .AAAAA...\n\t ..BBBBB..\n\t ...BBBBB.\n\t ....CCCCC\n      */\n      {\n          TMP_DECL;\n          TMP_MARK;\n\n          temp = TMP_ALLOC_LIMBS (rn + 2);\n\n          /* first chunk (marked A in the above diagram) */\n          bp += bn - CHUNK, an -= bn - CHUNK;\n          mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);\n\n          /* remaining chunks (B, C, etc) */\n          bn -= CHUNK;\n\n          while (bn >= CHUNK)\n\t    {\n\t      ap += CHUNK, bp -= CHUNK;\n\t      mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);\n\t      mpn_add_n (rp, rp, temp, rn + 2);\n\t      bn -= CHUNK;\n\t    }\n\n          if (bn)\n\t    {\n\t      /* last remaining chunk */\n\t      ap += CHUNK, bp -= bn;\n\t      mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);\n\t      mpn_add_n (rp, rp, temp, rn + 2);\n\t    }\n\n          TMP_FREE;\n      }\n      return;\n    }\n\n  /* we're definitely going to use toom42 somewhere */\n\n  if (bn > rn)\n    {\n      /* slice region into chunks, use toom42 on all chunks except possibly\n\t the last:\n\n         AA....\n         .AA...\n         ..BB..\n         ...BB.\n         ....CC\n      */\n\n      TMP_DECL;\n      TMP_MARK;\n\n      temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));\n      scratch = temp + rn + 2;\n\n      /* first chunk (marked A in the above diagram) */\n      bp += bn - rn;\n      mpn_toom42_mulmid (rp, ap, bp, rn, scratch);\n\n      /* remaining chunks (B, C, etc) */\n      bn -= rn;\n\n      while (bn >= rn)\n        {\n          ap += rn, bp -= rn;\n\t  mpn_toom42_mulmid (temp, ap, bp, rn, scratch);\n          mpn_add_n (rp, rp, temp, rn + 2);\n          bn -= rn;\n        }\n\n      if (bn)\n        {\n          /* last remaining chunk */\n          ap += rn, bp -= bn;\n\t  mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);\n          mpn_add_n (rp, rp, temp, rn + 2);\n        }\n\n      TMP_FREE;\n    }\n  else\n    {\n      /* slice region into chunks, use toom42 on all chunks except possibly\n\t the last:\n\n         AAABBBCC..\n         .AAABBBCC.\n         ..AAABBBCC\n      */\n\n      TMP_DECL;\n      TMP_MARK;\n\n      scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));\n\n      /* first chunk (marked A in the above diagram) */\n      mpn_toom42_mulmid (rp, ap, bp, bn, scratch);\n\n      /* remaining chunks (B, C, etc) */\n      rn -= bn;\n\n      while (rn >= bn)\n        {\n\t  mp_limb_t t0, t1, cy;\n          ap += bn, rp += bn;\n          t0 = rp[0], t1 = rp[1];\n          mpn_toom42_mulmid (rp, ap, bp, bn, scratch);\n\t  ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */\n\t  MPN_INCR_U (rp + 1, bn + 1, t1 + cy);\n\t  rn -= bn;\n        }\n\n      TMP_FREE;\n\n      if (rn)\n        {\n          /* last remaining chunk */\n\t  mp_limb_t t0, t1, cy;\n          ap += bn, rp += bn;\n          t0 = rp[0], t1 = rp[1];\n          mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);\n\t  ADDC_LIMB (cy, rp[0], rp[0], t0);\n\t  MPN_INCR_U (rp + 1, rn + 1, t1 + cy);\n        }\n    }\n}\n"
  },
  {
    "path": "mpn/generic/mulmid_basecase.c",
    "content": "/* mpn_mulmid_basecase -- classical middle product algorithm\n\nCopyright (C) 2009, David Harvey\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* Let a = sum_0^{m-1} a_i B^i and b = sum_0^{n-1} b_j B^j\n\n   then MP(a, m, b, n) = sum_{0<=i<m, 0<=j<n, n-1<=i+j<=m-1} a_ib_j B^{i+j-n+1}\n\n   Note there are m-n+1 different values of i+j and each product a_ib_j will be two limbs. Thus when added together, the sum must take up m-n+3 limbs of space.\n\n   This function computes MP(up,un,vp,vn), writing the result to {rp,un-vn+3}.\n   Must have un >= vn >= 1.\n\n   Neither input buffer may overlap with the output buffer. \n\n   It is required that vn << GMP_NUMBMAX. \n*/\n\nvoid\nmpn_mulmid_basecase (mp_ptr rp,\n                     mp_srcptr up, mp_size_t un,\n                     mp_srcptr vp, mp_size_t vn)\n{\n  mp_limb_t lo, hi;  /* last two limbs of output */\n  mp_limb_t temp;\n\n  ASSERT (un >= vn);\n  ASSERT (vn >= 1);\n  ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, up, un));\n  ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, vp, vn));\n\n  up += vn - 1;\n  un -= vn - 1;\n\n  /* We first multiply by the first limb (or depending on optional function\n     availability, limbs).  This result can be stored, not added, to rp.  We\n     also avoid a loop for zeroing this way.  */\n\n  hi = 0;\n#if HAVE_NATIVE_mpn_mul_2\n  if (vn >= 2)\n    {\n      lo = mpn_mul_2 (rp, up, un, vp);\n      up -= 2, vp += 2, vn -= 2;\n    }\n  else\n    {\n      rp[un] = mpn_mul_1 (rp, up, un, vp[0]);\n      return;\n    }\n#else\n  lo = mpn_mul_1 (rp, up, un, vp[0]);\n  up -= 1, vp += 1, vn -= 1;\n#endif\n\n  /* Now accumulate the product of up[] and the next higher limb (or depending\n     on optional function availability, limbs) from vp[].  */\n\n#define MAX_LEFT MP_SIZE_T_MAX\t/* Used to simplify loops into if statements */\n\n\n#if HAVE_NATIVE_mpn_addmul_6\n  while (vn >= 6)\n    {\n      temp = mpn_addmul_6 (rp, up, un, vp);\n      add_ssaaaa (hi, lo, hi, lo, 0, temp);\n      if (MAX_LEFT == 6)\n\tgoto overflow;\n      up -= 6, vp += 6, vn -= 6;\n      if (MAX_LEFT < 2 * 6)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (6 - 1)\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_5\n  while (vn >= 5)\n    {\n      temp = mpn_addmul_5 (rp, up, un, vp);\n      add_ssaaaa (hi, lo, hi, lo, 0, temp);\n      if (MAX_LEFT == 5)\n\tgoto overflow;\n      up -= 5, vp += 5, vn -= 5;\n      if (MAX_LEFT < 2 * 5)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (5 - 1)\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_4\n  while (vn >= 4)\n    {\n      temp = mpn_addmul_4 (rp, up, un, vp);\n      add_ssaaaa (hi, lo, hi, lo, 0, temp);\n      if (MAX_LEFT == 4)\n\tgoto overflow;\n      up -= 4, vp += 4, vn -= 4;\n      if (MAX_LEFT < 2 * 4)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (4 - 1)\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_3\n  while (vn >= 3)\n    {\n      temp = mpn_addmul_3 (rp, up, un, vp);\n      add_ssaaaa (hi, lo, hi, lo, 0, temp);\n      if (MAX_LEFT == 3)\n\tgoto overflow;\n      up -= 3, vp += 3, vn -= 3;\n      if (MAX_LEFT < 2 * 3)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (3 - 1)\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_2\n  while (vn >= 2)\n    {\n      temp = mpn_addmul_2 (rp, up, un, vp);\n      add_ssaaaa (hi, lo, hi, lo, 0, temp);\n      if (MAX_LEFT == 2)\n\tgoto overflow;\n      up -= 2, vp += 2, vn -= 2;\n      if (MAX_LEFT < 2 * 2)\n\tbreak;\n    }\n#undef MAX_LEFT\n#define MAX_LEFT (2 - 1)\n#endif\n\n  while (vn >= 1)\n    {\n      temp = mpn_addmul_1 (rp, up, un, vp[0]);\n      add_ssaaaa (hi, lo, hi, lo, 0, temp);\n      if (MAX_LEFT == 1)\n\tgoto overflow;\n      up -= 1, vp += 1, vn -= 1;\n    }\n\noverflow:\n  /* store final limbs */\n#if GMP_NAIL_BITS != 0\n  hi = (hi << GMP_NAIL_BITS) + (lo >> GMP_NUMB_BITS);\n  lo &= GMP_NUMB_MASK;\n#endif\n\n  rp[un] = lo;\n  rp[un + 1] = hi;\n}\n"
  },
  {
    "path": "mpn/generic/mulmid_n.c",
    "content": "/* mpn_mulmid_n -- balanced middle product\n\nCopyright (C) 2009, David Harvey\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/*\n   Let a = sum_0^{m-1} a_i B^i and b = sum_0^{n-1} b_j B^j\n\n   then MP(a, m, b, n) = sum_{0<=i<m, 0<=j<n, n-1<=i+j<=m-1} a_ib_j B^{i+j-n+1}\n\n   Note there are m-n+1 different values of i+j and each product a_ib_j will be two limbs. Thus when added together, the sum must take up n-m+3 limbs of space.\n   \n   This function computes MP(ap,2n-1,bp,n)\n\n   It is required that n << GMP_NUMBMAX. \n*/\n\nvoid\nmpn_mulmid_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)\n{\n  ASSERT (n >= 1);\n  ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));\n  ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));\n\n  if (n < MULMID_TOOM42_THRESHOLD)\n    {\n      mpn_mulmid_basecase (rp, ap, 2*n - 1, bp, n);\n    }\n  else\n    {\n      mp_size_t k;\n      mp_ptr scratch;\n\t   TMP_DECL;\n\t  \n      k = mpn_toom42_mulmid_itch (n);\n\n      if (k <= 1000) k = 1000;\n\t\n\t   TMP_MARK;\n\t   scratch = TMP_ALLOC_LIMBS (k);\n\t   mpn_toom42_mulmid (rp, ap, bp, n, scratch);\n\t   TMP_FREE;\n    }\n}\n"
  },
  {
    "path": "mpn/generic/mulmod_2expm1.c",
    "content": "/* mpn_mulmod_2expm1 \n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* \n   (xp, n) = (yp, n)*(zp, n) % 2^b - 1  \n\n   needs (tp,2n) temp space, everything reduced mod 2^b \n   inputs, outputs not fully reduced\n\n   N.B: 2n is not the same as 2b rounded up to nearest limb!\n\n   NOTE: not reduced fully means the representation is redundant, although\n   only 0 has two representations i.e. 0 and 2^b - 1\n*/\ninline static void\nmpn_mulmod_2expm1_basecase (mp_ptr xp, mp_srcptr yp, mp_srcptr zp,\n\t\t\t    mpir_ui b, mp_ptr tp)\n{\n  mp_size_t n, k;\n  mp_limb_t c;\n\n  n = BITS_TO_LIMBS(b);\n  k = GMP_NUMB_BITS * n - b;\n\n  ASSERT(n > 0);\n  ASSERT_MPN(yp, n);\n  ASSERT_MPN(zp, n);\n  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n));\n  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n));\n  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n));\n  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n));\n  ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0);\n  ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0);\n  \n  /* \n     as n is small, could use mpn_mul_basecase and save a fn call, \n     but this code is also used for large n when b is odd \n  */\n  mpn_mul_n (tp, yp, zp, n);\t\n\n  if (k == 0)\n    {\n      c = mpn_add_n (xp, tp, tp + n, n);\n      MPN_INCR_U (xp, n, c);\n  \n      return;\n    }\n\n  c = tp[n - 1];\n  tp[n - 1] &= GMP_NUMB_MASK >> k;\n\n#if HAVE_NATIVE_mpn_addlsh_nc\n  ASSERT_NOCARRY(mpn_addlsh_nc (xp, tp, tp + n, n, k, c));\n#else\n  {\n    mp_limb_t c1;\n\n    c1 = mpn_lshift (tp + n, tp + n, n, k);\n    tp[n] |= c >> (GMP_NUMB_BITS - k);\n    c = mpn_add_n (xp, tp, tp + n, n) + c1;\n    \n    ASSERT (c == 0);\n  }\n#endif\n\n  c = xp[n - 1] >> (GMP_NUMB_BITS - k);\n  xp[n - 1] &= GMP_NUMB_MASK >> k;\n  MPN_INCR_U (xp, n, c);\n\n  return;\n}\n\n/* \n   Improvements:\n   use 2^3n - 1 , 2^4n + 2 + 1 factorizations or others\n   shift by byte values, i.e. copy unaligned\n   addsublshift or a rshift1_lshift dual_rshift i.e. for both the mods\n   separate out the k = 0 case \n   unroll the recursion\n   if yp = zp i.e. a square then could do better\n   different thresholds depending on how many twos divide b\n\n   tp requires 5(n + lg(b)) space\n*/\nvoid\nmpn_mulmod_2expm1(mp_ptr xp, mp_ptr yp, mp_ptr zp, mpir_ui b,\n\t\t   mp_ptr tp)\n{\n  mp_size_t h, n, m, k;\n  int bor, c, c1, c2;\n  mp_ptr S, D, typm, tzpm, typp, tzpp, temp;\n  mp_limb_t car, Dm, car1;\n\n  /* \n     want y*z %(2^n - 1)         \n     want y*z%(2^(2m) - 1) = y*z%((2^m - 1)(2^m + 1)) -- use CRT \n     \n     note (2^m - 1)*2^(m - 1) == 1 mod 2^m+1        \n     note (2^m + 1)*2^(m - 1) == 1 mod 2^m-1\n     \n     let A = y*z%(2^m - 1)  B = y*z%(2^m + 1)        \n     then A(2^m + 1)2^(m - 1) + B(2^m - 1)2^(m-1) = ((A - B) + (A + B)2^m)2^(m-1)\n  */\n  \n  ASSERT (b > 0);\n  \n  n = BITS_TO_LIMBS (b);\n  \n  if (b % 2 == 1 || BELOW_THRESHOLD (n, MULMOD_2EXPM1_THRESHOLD))\n    {\n      mpn_mulmod_2expm1_basecase (xp, yp, zp, b, tp);\n      return;\n    }\n  \n  h = b / 2;\n  m = BITS_TO_LIMBS (h);\n  k = GMP_NUMB_BITS * m - h;\t// if k==0 then n=2*m\n  \n  ASSERT_MPN(yp, n);\n  ASSERT_MPN(zp, n);\n  ASSERT(GMP_NUMB_BITS * n - b == 0\n\t  || yp[n - 1] >> (GMP_NUMB_BITS - (GMP_NUMB_BITS * n - b)) == 0);\n  ASSERT(GMP_NUMB_BITS * n - b == 0\n\t  || zp[n - 1] >> (GMP_NUMB_BITS - (GMP_NUMB_BITS * n - b)) == 0);\n  ASSERT(!MPN_OVERLAP_P (yp, n, tp, 5 * n));\n  ASSERT(!MPN_OVERLAP_P (zp, n, tp, 5 * n));\n  ASSERT(!MPN_OVERLAP_P (xp, n, tp, 5 * n));\n  ASSERT(!MPN_OVERLAP_P (xp, n, yp, n));\n  ASSERT(!MPN_OVERLAP_P (xp, n, zp, n));\n\n  /* S, D, typm, tzpm, typp, tzpp all require m limbs */\n  S = xp;\n  D = tp;\n  typm = tp + m;\n  typp = typm + m;\n  tzpm = typp + m;\n  tzpp = tzpm + m;\n  temp = tzpp + m;\n\n  if (k == 0)\n    D = xp + m;\n\n  if (k == 0)\n    {\n      c = mpn_sumdiff_n (typm, typp, yp, yp + m, m);\n      MPN_INCR_U (typm, m, c >> 1);\n      c1 = mpn_add_1 (typp, typp, m, c & 1);\n    }\n  else\n    {\n      mpn_rshift (typp, yp + m - 1, m, GMP_NUMB_BITS - k);\n      if (n == 2 * m)\n\t{\n\t  typp[m - 1] |= yp[2 * m - 1] << k;\n\t  ASSERT (yp[2 * m - 1] >> (GMP_NUMB_BITS - k) == 0);\n\t}\n      ASSERT (typp[m - 1] >> (GMP_NUMB_BITS - k) == 0);\t/* have h bits */\n\n      car = yp[m - 1];\n      yp[m - 1] &= GMP_NUMB_MASK >> k;\n      \n      ASSERT (yp[m - 1] >> (GMP_NUMB_BITS - k) == 0);\t/* have h bits */  \n      \n      c1 = mpn_sumdiff_n (typm, typp, yp, typp, m);\n      c = typm[m - 1] >> (GMP_NUMB_BITS - k);\n      yp[m - 1] = car;\n      MPN_INCR_U (typm, m, c);\n      c1 = mpn_add_1 (typp, typp, m, c1);\n      typm[m - 1] &= GMP_NUMB_MASK >> k;\n      typp[m - 1] &= GMP_NUMB_MASK >> k;\n    }\n\n  if (k == 0)\n    {\n      c = mpn_sumdiff_n (tzpm, tzpp, zp, zp + m, m);\n      MPN_INCR_U (tzpm, m, c >> 1);\n      c2 = mpn_add_1 (tzpp, tzpp, m, c & 1);\n    }\n  else\n    {\n      mpn_rshift (tzpp, zp + m - 1, m, GMP_NUMB_BITS - k);\n      if (n == 2 * m)\n\t{\n\t  tzpp[m - 1] |= zp[2 * m - 1] << k;\n\t  ASSERT (zp[2 * m - 1] >> (GMP_NUMB_BITS - k) == 0);\n\t}\n      ASSERT (tzpp[m - 1] >> (GMP_NUMB_BITS - k) == 0);\t/* have h bits */\n      \n      car = zp[m - 1];\n      zp[m - 1] &= GMP_NUMB_MASK >> k;\n      \n      ASSERT (zp[m - 1] >> (GMP_NUMB_BITS - k) == 0);\t/* have h bits */\n      \n      c2 = mpn_sumdiff_n (tzpm, tzpp, zp, tzpp, m);\n      c = tzpm[m - 1] >> (GMP_NUMB_BITS - k);\n      zp[m - 1] = car;\n      MPN_INCR_U (tzpm, m, c);\n      c2 = mpn_add_1 (tzpp, tzpp, m, c2);\n      tzpm[m - 1] &= GMP_NUMB_MASK >> k;\n      tzpp[m - 1] &= GMP_NUMB_MASK >> k;\n    }\n  \n  mpn_mulmod_2expm1(S, typm, tzpm, h, temp);\t/* unroll this recursion, S = A rename */\n  c = mpn_mulmod_2expp1_basecase (D, typp, tzpp, c1 * 2 + c2, h, temp);\t/* D = B rename */\n  \n  if (LIKELY (c == 0))\n    {\n      c1 = mpn_sumdiff_n (S, D, S, D, m);\n      bor = c1 & 1;\n      c = c1 >> 1;\n      D[m - 1] &= GMP_NUMB_MASK >> k;\n      \n      if (k != 0 && S[m - 1] >> (GMP_NUMB_BITS - k) != 0)\n\tc = 1;\n      \n      S[m - 1] &= GMP_NUMB_MASK >> k;\n    }\n  else\n    {\n      c = 1;\n      bor = 1;\n      MPN_COPY (D, S, m);\n    }\n  \n  bor = mpn_sub_1 (S, S, m, bor);\n  S[m - 1] &= GMP_NUMB_MASK >> k;\n  \n  if (bor == 0)\n    {\n      c = mpn_add_1 (D, D, m, c);\n      \n      if (k != 0 && D[m - 1] >> (GMP_NUMB_BITS - k) != 0)\n\tc = 1;\n      \n      D[m - 1] &= GMP_NUMB_MASK >> k;\n      \n      if (c != 0)\n\tS[0] |= 1;\n    }\n  \n  if (k == 0)\n    {\n      car = mpn_half (xp, n);\n      xp[n - 1] |= car;\n    }\t\t\t\t/* C sequence point rule */\n  else\n    {\n      car = mpn_half (xp, m);\n      car1 = xp[m - 1];\n      \n      if (GMP_NUMB_BITS - k - 1 != 0)\n\t{\n\t  Dm = mpn_lshift (xp + m - 1, D, m, GMP_NUMB_BITS - k - 1);\n\t}\n      else\n\t{\n\t  Dm = 0;\n\t  MPN_COPY (xp + m - 1, D, m);\n\t}\n      \n      xp[m - 1] |= car1;\n      \n      if (2 * m == n)\n\txp[n - 1] = Dm;\n      \n      xp[n - 1] |= car >> (GMP_NUMB_BITS * n - b);\n    }\n\n  return;\n}\n\n/*\n   {rp, min(rn, an + bn)} = {ap, an} * {bp, bn} mod(B^rn - 1)\n*/\nvoid\nmpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, \n                                          mp_srcptr bp, mp_size_t bn, mp_ptr scratch)\n{\n  mp_ptr tp, rp2;\n  TMP_DECL;\n\n  ASSERT (0 < bn);\n  ASSERT (bn <= an);\n  ASSERT (an <= rn);\n\n  TMP_MARK;\n\n  if (an < rn)\n  {\n     tp = TMP_ALLOC_LIMBS(rn);\n     MPN_COPY(tp, ap, an);\n     MPN_ZERO(tp + an, rn - an);\n     ap = tp;\n  }\n\n  if (bn < rn)\n  {\n     tp = TMP_ALLOC_LIMBS(rn);\n     MPN_COPY(tp, bp, bn);\n     MPN_ZERO(tp + bn, rn - bn);\n     bp = tp;\n  }\n\n  if (an + bn < rn)\n  {\n     tp = TMP_ALLOC_LIMBS(rn);\n     mpn_mulmod_2expm1(tp, (mp_ptr) ap, (mp_ptr) bp, rn*GMP_LIMB_BITS, scratch);\n     MPN_COPY(rp, tp, an + bn);\n  } else\n     mpn_mulmod_2expm1(rp, (mp_ptr) ap, (mp_ptr) bp, rn*GMP_LIMB_BITS, scratch);\n     \n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpn/generic/mulmod_2expp1_basecase.c",
    "content": "/* mpn_mulmod_2expp1_basecase \n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nstatic mp_size_t mulmod_2expp1_table_n[FFT_N_NUM] = MULMOD_TAB;\n\n/*\n   ret + (xp, n) = (yp, n)*(zp, n) % 2^b + 1  \n   needs (tp, 2n) temp space, everything reduced mod 2^b \n   inputs, outputs are fully reduced\n  \n   N.B: 2n is not the same as 2b rounded up to nearest limb!\n*/\ninline static int\nmpn_mulmod_2expp1_internal (mp_ptr xp, mp_srcptr yp, mp_srcptr zp,\n\t\t\t                                         mpir_ui b, mp_ptr tp)\n{\n  mp_size_t n, k;\n  mp_limb_t c;\n\n  TMP_DECL;\n\n  n = BITS_TO_LIMBS (b);\n  k = GMP_NUMB_BITS * n - b;\n\n  ASSERT(b > 0);\n  ASSERT(n > 0);\n  ASSERT_MPN(yp, n);\n  ASSERT_MPN(zp, n);\n  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n));\n  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n));\n  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n));\n  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n));\n  ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0);\n  ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0);\n\n#ifndef TUNE_PROGRAM_BUILD\n  if (k == 0 && n > FFT_MULMOD_2EXPP1_CUTOFF && n == mpir_fft_adjust_limbs(n))\n  {\n      mp_bitcnt_t depth1, depth = 1;\n      mp_size_t w1, off;\n      mp_ptr tx, ty, tz;\n      mp_limb_t ret;\n\n      TMP_MARK;\n\n      tx = TMP_BALLOC_LIMBS(3*n + 3);\n      ty = tx + n + 1;\n      tz = ty + n + 1;\n\n      MPN_COPY(ty, yp, n);\n      MPN_COPY(tz, zp, n);\n      ty[n] = 0;\n      tz[n] = 0;\n\n      while ((((mp_limb_t)1)<<depth) < b) depth++;\n   \n      if (depth < 12) off = mulmod_2expp1_table_n[0];\n      else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12];\n      depth1 = depth/2 - off;\n   \n      w1 = b/(((mp_limb_t)1)<<(2*depth1));\n\n      mpir_fft_mulmod_2expp1(tx, ty, tz, n, depth1, w1);\n\n      MPN_COPY(xp, tx, n);\n      ret = tx[n];\n      \n      TMP_FREE;\n\n\t   return ret;\n  }\n#endif\n\n  if (yp == zp)\n     mpn_sqr(tp, yp, n);\n  else\n     mpn_mul_n (tp, yp, zp, n);\n\n  if (k == 0)\n    {\n      c = mpn_sub_n (xp, tp, tp + n, n);\n\n      return mpn_add_1 (xp, xp, n, c);\n    }\n\n  c = tp[n - 1];\n  tp[n - 1] &= GMP_NUMB_MASK >> k;\n\n#if HAVE_NATIVE_mpn_sublsh_nc\n  c = mpn_sublsh_nc (xp, tp, tp + n, n, k, c);\n#else\n  {\n    mp_limb_t c1;\n    c1 = mpn_lshift (tp + n, tp + n, n, k);\n    tp[n] |= c >> (GMP_NUMB_BITS - k);\n    c = mpn_sub_n (xp, tp, tp + n, n) + c1;\n  }\n#endif\n\n  c = mpn_add_1 (xp, xp, n, c);\n  xp[n - 1] &= GMP_NUMB_MASK >> k;\n\n  return c;\n}\n\n/* \n   c is the top bits of the inputs, (fully reduced)\n   c & 2 is the top bit of y\n   c & 1 is the top bit of z\n*/\nint\nmpn_mulmod_2expp1_basecase (mp_ptr xp, mp_srcptr yp, mp_srcptr zp, \n                                           int c, mpir_ui b, mp_ptr tp)\n{\n  int cy, cz;\n  mp_size_t n, k;\n\n  cy = c & 2;\n  cz = c & 1;\n\n  n = BITS_TO_LIMBS (b);\n  k = GMP_NUMB_BITS * n - b;\n\n  ASSERT(b > 0);\n  ASSERT(n > 0);\n  ASSERT_MPN(yp, n);\n  ASSERT_MPN(zp, n);\n  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n));\n  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n));\n  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n));\n  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n));\n  ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0);\n  ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0);\n\n#if WANT_ASSERT\n  {\n     mp_size_t t = n;\n\n     MPN_NORMALIZE(yp, t);\n     ASSERT(cy == 0 || t == 0);\n     \n     t = n; \n     MPN_NORMALIZE(zp, t);\n     ASSERT(cz == 0 || t == 0);\n  }\n#endif\n\n  if (LIKELY (cy == 0))\n    {\n      if (LIKELY (cz == 0))\n\t{\n\t  c = mpn_mulmod_2expp1_internal (xp, yp, zp, b, tp);\n\t}\n      else\n\t{\n\t  c = mpn_neg_n (xp, yp, n);\n\t  c = mpn_add_1 (xp, xp, n, c);\n\t  xp[n - 1] &= GMP_NUMB_MASK >> k;\n\t}\n    }\n  else\n    {\n      if (LIKELY (cz == 0))\n\t{\n\t  c = mpn_neg_n (xp, zp, n);\n\t  c = mpn_add_1 (xp, xp, n, c);\n\t  xp[n - 1] &= GMP_NUMB_MASK >> k;\n\t}\n      else\n\t{\n\t  c = 0;\n\t  xp[0] = 1;\n\t  MPN_ZERO (xp + 1, n - 1);\n\t}\n    }\n  return c;\n}\n"
  },
  {
    "path": "mpn/generic/mulmod_bexpp1.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nstatic mp_size_t mulmod_2expp1_table_n[FFT_N_NUM] = MULMOD_TAB;\n\nint mpn_mulmod_Bexpp1(mp_ptr r, mp_srcptr i1, mp_srcptr i2, mp_size_t limbs, mp_ptr tt)\n{\n   mp_size_t bits = limbs * GMP_LIMB_BITS;\n   mp_bitcnt_t depth1, depth = 1;\n   mp_size_t w1, off;\n\n   mp_limb_t c = 2 * i1[limbs] + i2[limbs];\n   \n   if (c & 1)\n   {\n      mpn_neg_n(r, i1, limbs + 1);\n      mpn_normmod_2expp1(r, limbs);\n      return 0;\n   } else if (c & 2)\n   {\n      mpn_neg_n(r, i2, limbs + 1);\n      mpn_normmod_2expp1(r, limbs);\n      return 0;\n   }\n\n   if (limbs <= FFT_MULMOD_2EXPP1_CUTOFF) \n   {\n       if(bits)\n          r[limbs] = mpn_mulmod_2expp1_basecase(r, i1, i2, c, bits, tt);\n       else\n          r[limbs] = 0;\n       return r[limbs];\n   }\n   while ((((mp_limb_t)1)<<depth) < bits) depth++;\n   \n   if (depth < 12) off = mulmod_2expp1_table_n[0];\n   else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12];\n   depth1 = depth/2 - off;\n   \n   w1 = bits/(((mp_limb_t)1)<<(2*depth1));\n\n   mpir_fft_mulmod_2expp1(r, i1, i2, limbs, depth1, w1);\n\n   return r[limbs];\n}\n"
  },
  {
    "path": "mpn/generic/nand_n.c",
    "content": "/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#define OPERATION_nand_n\t1\n\n#ifndef _MSC_VER\n\n#ifdef OPERATION_and_n\n#define func __MPN(and_n)\n#define call mpn_and_n\n#endif\n\n#ifdef OPERATION_andn_n\n#define func __MPN(andn_n)\n#define call mpn_andn_n\n#endif\n\n#ifdef OPERATION_nand_n\n#define func __MPN(nand_n)\n#define call mpn_nand_n\n#endif\n\n#ifdef OPERATION_ior_n\n#define func __MPN(ior_n)\n#define call mpn_ior_n\n#endif\n\n#ifdef OPERATION_iorn_n\n#define func __MPN(iorn_n)\n#define call mpn_iorn_n\n#endif\n\n#ifdef OPERATION_nior_n\n#define func __MPN(nior_n)\n#define call mpn_nior_n\n#endif\n\n#ifdef OPERATION_xor_n\n#define func __MPN(xor_n)\n#define call mpn_xor_n\n#endif\n\n#ifdef OPERATION_xnor_n\n#define func __MPN(xnor_n)\n#define call mpn_xnor_n\n#endif\n\nvoid\nfunc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  call (rp, up, vp, n);\n}\n\n#else\n\n#define _logicop(x) void __MPN(x ## _n)(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mpn_ ## x ## _n(rp, up, vp, n); }\n\n_logicop(nand)\n\n#endif\n"
  },
  {
    "path": "mpn/generic/neg_n.c",
    "content": "/* mpn_neg_n\n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n Boston, MA 02110-1301, USA.\n\n\n*/\n\n#define __GMP_FORCE_mpn_neg_n 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpn/generic/nior_n.c",
    "content": "/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#define OPERATION_nior_n\t1\n\n#ifndef _MSC_VER\n\n#ifdef OPERATION_and_n\n#define func __MPN(and_n)\n#define call mpn_and_n\n#endif\n\n#ifdef OPERATION_andn_n\n#define func __MPN(andn_n)\n#define call mpn_andn_n\n#endif\n\n#ifdef OPERATION_nand_n\n#define func __MPN(nand_n)\n#define call mpn_nand_n\n#endif\n\n#ifdef OPERATION_ior_n\n#define func __MPN(ior_n)\n#define call mpn_ior_n\n#endif\n\n#ifdef OPERATION_iorn_n\n#define func __MPN(iorn_n)\n#define call mpn_iorn_n\n#endif\n\n#ifdef OPERATION_nior_n\n#define func __MPN(nior_n)\n#define call mpn_nior_n\n#endif\n\n#ifdef OPERATION_xor_n\n#define func __MPN(xor_n)\n#define call mpn_xor_n\n#endif\n\n#ifdef OPERATION_xnor_n\n#define func __MPN(xnor_n)\n#define call mpn_xnor_n\n#endif\n\nvoid\nfunc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  call (rp, up, vp, n);\n}\n\n#else\n\n#define _logicop(x) void __MPN(x ## _n)(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mpn_ ## x ## _n(rp, up, vp, n); }\n\n_logicop(nior)\n\n#endif\n"
  },
  {
    "path": "mpn/generic/nsumdiff_n.c",
    "content": "/*   Copyright 2012 The Code cavern\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmp_limb_t mpn_nsumdiff_n(mp_ptr s, mp_ptr d, mp_srcptr x, mp_srcptr y, mp_size_t n)\n{\n   mp_limb_t ret;\n   mp_ptr t;\n\n   ASSERT_MPN(x, n);\n   ASSERT_MPN(y, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(s, x, n));\n   ASSERT(MPN_SAME_OR_SEPARATE_P(s, y, n));\n   ASSERT(MPN_SAME_OR_SEPARATE_P(d, x, n));\n   ASSERT(MPN_SAME_OR_SEPARATE_P(d, y, n));\n   ASSERT(!MPN_OVERLAP_P(s, n, d, n));\n\n   if (n == 0)\n\t   return 0;\n\n   if ((s == x && d == y) || (s == y && d == x))\n   {\n      t = __GMP_ALLOCATE_FUNC_LIMBS(n);\n\n      ret = mpn_sub_n(t, x, y, n);\n      ret += 2*mpn_add_n(s, x, y, n);\n      ret += 2*mpn_neg_n(s, s, n);\n      MPN_COPY(d, t, n);\n\n      __GMP_FREE_FUNC_LIMBS(t, n);\n\n      return ret;\n   }\n\n   if (s == x || s == y)\n   {\n      ret = mpn_sub_n(d, x, y, n);\n      ret += 2*mpn_add_n(s, x, y, n);\n      ret += 2*mpn_neg_n(s, s, n);\n      \n      return ret;\n   }\n\n   ret = 2*mpn_add_n(s, x, y, n);\n   ret = 2*mpn_neg_n(s, s, n);\n   ret += mpn_sub_n(d, x, y, n);\n\n   return ret;\n}\n"
  },
  {
    "path": "mpn/generic/perfect_square_p.c",
    "content": "/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,\n   zero otherwise.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h> /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* change this to \"#define TRACE(x) x\" for diagnostics */\n#define TRACE(x) \n\n\n\n/* PERFSQR_MOD_* detects non-squares using residue tests.\n\n   A macro PERFSQR_MOD_TEST is setup by gen-psqr.c in perfsqr.h.  It takes\n   {up,usize} modulo a selected modulus to get a remainder r.  For 32-bit or\n   64-bit limbs this modulus will be 2^24-1 or 2^48-1 using PERFSQR_MOD_34,\n   or for other limb or nail sizes a PERFSQR_PP is chosen and PERFSQR_MOD_PP\n   used.  PERFSQR_PP_NORM and PERFSQR_PP_INVERTED are pre-calculated in this\n   case too.\n\n   PERFSQR_MOD_TEST then makes various calls to PERFSQR_MOD_1 or\n   PERFSQR_MOD_2 with divisors d which are factors of the modulus, and table\n   data indicating residues and non-residues modulo those divisors.  The\n   table data is in 1 or 2 limbs worth of bits respectively, per the size of\n   each d.\n\n   A \"modexact\" style remainder is taken to reduce r modulo d.\n   PERFSQR_MOD_IDX implements this, producing an index \"idx\" for use with\n   the table data.  Notice there's just one multiplication by a constant\n   \"inv\", for each d.\n\n   The modexact doesn't produce a true r%d remainder, instead idx satisfies\n   \"-(idx<<PERFSQR_MOD_BITS) == r mod d\".  Because d is odd, this factor\n   -2^PERFSQR_MOD_BITS is a one-to-one mapping between r and idx, and is\n   accounted for by having the table data suitably permuted.\n\n   The remainder r fits within PERFSQR_MOD_BITS which is less than a limb.\n   In fact the GMP_LIMB_BITS - PERFSQR_MOD_BITS spare bits are enough to fit\n   each divisor d meaning the modexact multiply can take place entirely\n   within one limb, giving the compiler the chance to optimize it, in a way\n   that say umul_ppmm would not give.\n\n   There's no need for the divisors d to be prime, in fact gen-psqr.c makes\n   a deliberate effort to combine factors so as to reduce the number of\n   separate tests done on r.  But such combining is limited to d <=\n   2*GMP_LIMB_BITS so that the table data fits in at most 2 limbs.\n\n   Alternatives:\n\n   It'd be possible to use bigger divisors d, and more than 2 limbs of table\n   data, but this doesn't look like it would be of much help to the prime\n   factors in the usual moduli 2^24-1 or 2^48-1.\n\n   The moduli 2^24-1 or 2^48-1 are nothing particularly special, they're\n   just easy to calculate (see mpn_mod_34lsub1) and have a nice set of prime\n   factors.  2^32-1 and 2^64-1 would be equally easy to calculate, but have\n   fewer prime factors.\n\n   The nails case usually ends up using mpn_mod_1, which is a lot slower\n   than mpn_mod_34lsub1.  Perhaps other such special moduli could be found\n   for the nails case.  Two-term things like 2^30-2^15-1 might be\n   candidates.  Or at worst some on-the-fly de-nailing would allow the plain\n   2^24-1 to be used.  Currently nails are too preliminary to be worried\n   about.\n\n*/\n\n#define PERFSQR_MOD_MASK       ((CNST_LIMB(1) << PERFSQR_MOD_BITS) - 1)\n\n#define MOD34_BITS  (GMP_NUMB_BITS / 4 * 3)\n#define MOD34_MASK  ((CNST_LIMB(1) << MOD34_BITS) - 1)\n\n#define PERFSQR_MOD_34(r, up, usize)                    \\\n  do {                                                  \\\n    (r) = mpn_mod_34lsub1 (up, usize);                  \\\n    (r) = ((r) & MOD34_MASK) + ((r) >> MOD34_BITS);     \\\n  } while (0)\n\n/* FIXME: The %= here isn't good, and might destroy any savings from keeping\n   the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm).\n   Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor\n   and a shift count, like mpn_preinv_divrem_1.  But mod_34lsub1 is our\n   normal case, so lets not worry too much about mod_1.  */\n#define PERFSQR_MOD_PP(r, up, usize)                            \\\n  do {                                                          \\\n    if (USE_PREINV_MOD_1)                                       \\\n      {                                                         \\\n        (r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM,     \\\n                                PERFSQR_PP_INVERTED);           \\\n        (r) %= PERFSQR_PP;                                      \\\n      }                                                         \\\n    else                                                        \\\n      {                                                         \\\n        (r) = mpn_mod_1 (up, usize, PERFSQR_PP);                \\\n      }                                                         \\\n  } while (0)\n\n#define PERFSQR_MOD_IDX(idx, r, d, inv)                 \\\n  do {                                                  \\\n    mp_limb_t  q;                                       \\\n    ASSERT ((r) <= PERFSQR_MOD_MASK);                   \\\n    ASSERT ((((inv) * (d)) & PERFSQR_MOD_MASK) == 1);   \\\n    ASSERT (MP_LIMB_T_MAX / (d) >= PERFSQR_MOD_MASK);   \\\n                                                        \\\n    q = ((r) * (inv)) & PERFSQR_MOD_MASK;               \\\n    ASSERT (r == ((q * (d)) & PERFSQR_MOD_MASK));       \\\n    (idx) = (q * (d)) >> PERFSQR_MOD_BITS;              \\\n  } while (0)\n\n#define PERFSQR_MOD_1(r, d, inv, mask)                          \\\n  do {                                                          \\\n    unsigned   idx;                                             \\\n    ASSERT ((d) <= GMP_LIMB_BITS);                              \\\n    PERFSQR_MOD_IDX(idx, r, d, inv);                            \\\n    TRACE (printf (\"  PERFSQR_MOD_1 d=%u r=%lu idx=%u\\n\",       \\\n                   d, r%d, idx));                               \\\n    if ((((mask) >> idx) & 1) == 0)                             \\\n      {                                                         \\\n        TRACE (printf (\"  non-square\\n\"));                      \\\n        return 0;                                               \\\n      }                                                         \\\n  } while (0)\n\n/* The expression \"(int) idx - GMP_LIMB_BITS < 0\" lets the compiler use the\n   sign bit from \"idx-GMP_LIMB_BITS\", which might help avoid a branch.  */\n#define PERFSQR_MOD_2(r, d, inv, mhi, mlo)                      \\\n  do {                                                          \\\n    mp_limb_t  m;                                               \\\n    unsigned   idx;                                             \\\n    ASSERT ((d) <= 2*GMP_LIMB_BITS);                            \\\n                                                                \\\n    PERFSQR_MOD_IDX (idx, r, d, inv);                           \\\n    TRACE (printf (\"  PERFSQR_MOD_2 d=%u r=%lu idx=%u\\n\",       \\\n                   d, r%d, idx));                               \\\n    m = ((int) idx - GMP_LIMB_BITS < 0 ? (mlo) : (mhi));        \\\n    idx %= GMP_LIMB_BITS;                                       \\\n    if (((m >> idx) & 1) == 0)                                  \\\n      {                                                         \\\n        TRACE (printf (\"  non-square\\n\"));                      \\\n        return 0;                                               \\\n      }                                                         \\\n  } while (0)\n\n\nint\nmpn_perfect_square_p (mp_srcptr up, mp_size_t usize)\n{\n  ASSERT (usize >= 1);\n\n  TRACE (gmp_printf (\"mpn_perfect_square_p %Nd\\n\", up, usize));\n\n  /* The first test excludes 212/256 (82.8%) of the perfect square candidates\n     in O(1) time.  */\n  {\n    unsigned  idx = up[0] % 0x100;\n    if (((sq_res_0x100[idx / GMP_LIMB_BITS]\n          >> (idx % GMP_LIMB_BITS)) & 1) == 0)\n      return 0;\n  }\n\n#if 0\n  /* Check that we have even multiplicity of 2, and then check that the rest is\n     a possible perfect square.  Leave disabled until we can determine this\n     really is an improvement.  It it is, it could completely replace the\n     simple probe above, since this should through out more non-squares, but at\n     the expense of somewhat more cycles.  */\n  {\n    mp_limb_t lo;\n    int cnt;\n    lo = up[0];\n    while (lo == 0)\n      up++, lo = up[0], usize--;\n    count_trailing_zeros (cnt, lo);\n    if ((cnt & 1) != 0)\n      return 0;\t\t\t/* return of not even multiplicity of 2 */\n    lo >>= cnt;\t\t\t/* shift down to align lowest non-zero bit */\n    lo >>= 1;\t\t\t/* shift away lowest non-zero bit */\n    if ((lo & 3) != 0)\n      return 0;\n  }\n#endif\n\n\n  /* The second test uses mpn_mod_34lsub1 or mpn_mod_1 to detect non-squares\n     according to their residues modulo small primes (or powers of\n     primes).  See perfsqr.h.  */\n  PERFSQR_MOD_TEST (up, usize);\n\n\n  /* For the third and last test, we finally compute the square root,\n     to make sure we've really got a perfect square.  */\n  {\n    mp_ptr root_ptr;\n    int res;\n    TMP_DECL;\n\n    TMP_MARK;\n    root_ptr = (mp_ptr) TMP_ALLOC ((usize + 1) / 2 * BYTES_PER_MP_LIMB);\n\n    /* Iff mpn_sqrtrem returns zero, the square is perfect.  */\n    res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);\n    TMP_FREE;\n\n    return res;\n  }\n}\n"
  },
  {
    "path": "mpn/generic/popcount.c",
    "content": "/* mpn_popcount, mpn_hamdist -- mpn bit population count/hamming distance.\n\nCopyright 1994, 1996, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#define OPERATION_popcount\t1\n\n#if OPERATION_popcount\n#define FNAME mpn_popcount\n#define POPHAM(u,v) u\n#endif\n\n#if OPERATION_hamdist\n#define FNAME mpn_hamdist\n#define POPHAM(u,v) u ^ v\n#endif\n\nmp_bitcnt_t\nFNAME (mp_srcptr up,\n#if OPERATION_hamdist\n       mp_srcptr vp,\n#endif\n       mp_size_t n)\n{\n  mp_bitcnt_t result = 0;\n  mp_limb_t p0, p1, p2, p3, x, p01, p23;\n  mp_size_t i;\n\n  ASSERT (n >= 1);\t\t/* Actually, this code handles any n, but some\n\t\t\t\t   assembly implementations does not.  */\n\n  for (i = n >> 2; i != 0; i--)\n    {\n      p0 = POPHAM (up[0], vp[0]);\n      p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t/* 2 0-2 */\n      p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5);\t/* 4 0-4 */\n\n      p1 = POPHAM (up[1], vp[1]);\n      p1 -= (p1 >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t/* 2 0-2 */\n      p1 = ((p1 >> 2) & MP_LIMB_T_MAX/5) + (p1 & MP_LIMB_T_MAX/5);\t/* 4 0-4 */\n\n      p01 = p0 + p1;\t\t\t\t\t\t\t/* 8 0-8 */\n      p01 = ((p01 >> 4) & MP_LIMB_T_MAX/17) + (p01 & MP_LIMB_T_MAX/17);\t/* 8 0-16 */\n\n      p2 = POPHAM (up[2], vp[2]);\n      p2 -= (p2 >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t/* 2 0-2 */\n      p2 = ((p2 >> 2) & MP_LIMB_T_MAX/5) + (p2 & MP_LIMB_T_MAX/5);\t/* 4 0-4 */\n\n      p3 = POPHAM (up[3], vp[3]);\n      p3 -= (p3 >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t/* 2 0-2 */\n      p3 = ((p3 >> 2) & MP_LIMB_T_MAX/5) + (p3 & MP_LIMB_T_MAX/5);\t/* 4 0-4 */\n\n      p23 = p2 + p3;\t\t\t\t\t\t\t/* 8 0-8 */\n      p23 = ((p23 >> 4) & MP_LIMB_T_MAX/17) + (p23 & MP_LIMB_T_MAX/17);\t/* 8 0-16 */\n\n      x = p01 + p23;\t\t\t\t\t\t\t/* 8 0-32 */\n      x = (x >> 8) + x;\t\t\t\t\t\t\t/* 8 0-64 */\n      x = (x >> 16) + x;\t\t\t\t\t\t/* 8 0-128 */\n#if GMP_LIMB_BITS > 32\n      x = ((x >> 32) & 0xff) + (x & 0xff);\t\t\t\t/* 8 0-256 */\n      result += x;\n#else\n      result += x & 0xff;\n#endif\n      up += 4;\n#if OPERATION_hamdist\n      vp += 4;\n#endif\n    }\n\n  n &= 3;\n  if (n != 0)\n    {\n      x = 0;\n      do\n\t{\n\t  p0 = POPHAM (up[0], vp[0]);\n\t  p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3;\t\t\t\t/* 2 0-2 */\n\t  p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5);\t/* 4 0-4 */\n\t  p0 = ((p0 >> 4) + p0) & MP_LIMB_T_MAX/17;\t\t\t/* 8 0-8 */\n\n\t  x += p0;\n\t  up += 1;\n#if OPERATION_hamdist\n\t  vp += 1;\n#endif\n\t}\n      while (--n);\n\n      x = (x >> 8) + x;\n      x = (x >> 16) + x;\n#if GMP_LIMB_BITS > 32\n      x = (x >> 32) + x;\n#endif\n      result += x & 0xff;\n    }\n\n  return result;\n}\n"
  },
  {
    "path": "mpn/generic/pow_1.c",
    "content": "/* mpn_pow_1 -- Compute powers R = U^exp.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify it\nunder the terms of the GNU Lesser General Public License as published by the\nFree Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\nFITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\nfor more details.\n\nYou should have received a copy of the GNU Lesser General Public License along\nwith the GNU MP Library; see the file COPYING.LIB.  If not, write to the Free\nSoftware Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,\nUSA.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_size_t\nmpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)\n{\n  mp_limb_t x;\n  int cnt, i;\n  mp_size_t rn;\n  int par;\n\n  if (exp <= 1)\n    {\n      if (exp == 0)\n\t{\n\t  rp[0] = 1;\n\t  return 1;\n\t}\n      else\n\t{\n\t  MPN_COPY (rp, bp, bn);\n\t  return bn;\n\t}\n    }\n\n  /* Count number of bits in exp, and compute where to put initial square in\n     order to magically get results in the entry rp.  Use simple code,\n     optimized for small exp.  For large exp, the bignum operations will take\n     so much time that the slowness of this code will be negligible.  */\n  par = 0;\n  cnt = GMP_LIMB_BITS;\n  for (x = exp; x != 0; x >>= 1)\n    {\n      par ^= x & 1;\n      cnt--;\n    }\n  exp <<= cnt;\n\n  if (bn == 1)\n    {\n      mp_limb_t bl = bp[0];\n\n      if ((cnt & 1) != 0)\n\tMP_PTR_SWAP (rp, tp);\n\n      mpn_sqr (rp, bp, bn);\n      rn = 2 * bn; rn -= rp[rn - 1] == 0;\n\n      for (i = GMP_LIMB_BITS - cnt - 1;;)\n\t{\n\t  exp <<= 1;\n\t  if ((exp & GMP_LIMB_HIGHBIT) != 0)\n\t    {\n\t      rp[rn] = mpn_mul_1 (rp, rp, rn, bl);\n\t      rn += rp[rn] != 0;\n\t    }\n\n\t  if (--i == 0)\n\t    break;\n\n\t  mpn_sqr (tp, rp, rn);\n\t  rn = 2 * rn; rn -= tp[rn - 1] == 0;\n\t  MP_PTR_SWAP (rp, tp);\n\t}\n    }\n  else\n    {\n      if (((par ^ cnt) & 1) == 0)\n\tMP_PTR_SWAP (rp, tp);\n\n      mpn_sqr (rp, bp, bn);\n      rn = 2 * bn; rn -= rp[rn - 1] == 0;\n\n      for (i = GMP_LIMB_BITS - cnt - 1;;)\n\t{\n\t  exp <<= 1;\n\t  if ((exp & GMP_LIMB_HIGHBIT) != 0)\n\t    {\n\t      rn = rn + bn - (mpn_mul (tp, rp, rn, bp, bn) == 0);\n\t      MP_PTR_SWAP (rp, tp);\n\t    }\n\n\t  if (--i == 0)\n\t    break;\n\n\t  mpn_sqr (tp, rp, rn);\n\t  rn = 2 * rn; rn -= tp[rn - 1] == 0;\n\t  MP_PTR_SWAP (rp, tp);\n\t}\n    }\n\n  return rn;\n}\n"
  },
  {
    "path": "mpn/generic/powlo.c",
    "content": "/* mpn_powlo -- Compute R = U^E mod B^n, where B is the limb base.\n\nCopyright 2007-2009, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n#define getbit(p,bi) \\\n  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)\n\nstatic inline mp_limb_t\ngetbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)\n{\n  int nbits_in_r;\n  mp_limb_t r;\n  mp_size_t i;\n\n  if (bi < nbits)\n    {\n      return p[0] & (((mp_limb_t) 1 << bi) - 1);\n    }\n  else\n    {\n      bi -= nbits;\t\t\t/* bit index of low bit to extract */\n      i = bi / GMP_NUMB_BITS;\t\t/* word index of low bit to extract */\n      bi %= GMP_NUMB_BITS;\t\t/* bit index in low word */\n      r = p[i] >> bi;\t\t\t/* extract (low) bits */\n      nbits_in_r = GMP_NUMB_BITS - bi;\t/* number of bits now in r */\n      if (nbits_in_r < nbits)\t\t/* did we get enough bits? */\n\tr += p[i + 1] << nbits_in_r;\t/* prepend bits from higher word */\n      return r & (((mp_limb_t ) 1 << nbits) - 1);\n    }\n}\n\nstatic inline int\nwin_size (mp_bitcnt_t eb)\n{\n  int k;\n  static mp_bitcnt_t x[] = {1,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};\n  for (k = 0; eb > x[k]; k++)\n    ;\n  return k;\n}\n\n/* rp[n-1..0] = bp[n-1..0] ^ ep[en-1..0] mod B^n, B is the limb base.\n   Requires that ep[en-1] is non-zero.\n   Uses scratch space tp[3n-1..0], i.e., 3n words.  */\nvoid\nmpn_powlo (mp_ptr rp, mp_srcptr bp,\n\t   mp_srcptr ep, mp_size_t en,\n\t   mp_size_t n, mp_ptr tp)\n{\n  int cnt;\n  mp_bitcnt_t ebi;\n  int windowsize, this_windowsize;\n  mp_limb_t expbits;\n  mp_limb_t *pp, *this_pp, *last_pp;\n  mp_limb_t *b2p;\n  long i;\n  TMP_DECL;\n\n  ASSERT (en > 1 || (en == 1 && ep[0] > 1));\n\n  TMP_MARK;\n\n  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);\n\n  windowsize = win_size (ebi);\n\n  /* JPF: Not sure what ign is right now */\n  pp = TMP_ALLOC_LIMBS (((n) << (windowsize - 1)) + n); /* + n is for mullow ign part */\n\n  this_pp = pp;\n\n  MPN_COPY (this_pp, bp, n); /* JPF: this only needs n and not 2*n */\n\n  b2p = tp + 2*n;\n\n  /* Store b^2 in b2.  */\n  mpn_sqr (tp, bp, n);\t/* FIXME: Use \"mpn_sqrlo\" */\n  MPN_COPY (b2p, tp, n);\n\n  /* Precompute odd powers of b and put them in the temporary area at pp.  */\n  for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)\n    {\n      last_pp = this_pp;\n      this_pp += n;\n      /* Note that last mullow overflows but we have n spare limbs for the \"ign part\" */\n      mpn_mullow_n (this_pp, last_pp, b2p, n);\n    }\n\n  expbits = getbits (ep, ebi, windowsize);\n  if (ebi < windowsize)\n    ebi = 0;\n  else\n    ebi -= windowsize;\n\n  count_trailing_zeros (cnt, expbits);\n  ebi += cnt;\n  expbits >>= cnt;\n\n  MPN_COPY (rp, pp + n * (expbits >> 1), n); /* JPF */\n\n  while (ebi != 0)\n    {\n      while (getbit (ep, ebi) == 0)\n\t{\n\t  mpn_sqr (tp, rp, n);\t/* FIXME: Use \"mpn_sqrlo\" */\n\t  MPN_COPY (rp, tp, n);\n\t  ebi--;\n\t  if (ebi == 0)\n\t    goto done;\n\t}\n\n      /* The next bit of the exponent is 1.  Now extract the largest block of\n\t bits <= windowsize, and such that the least significant bit is 1.  */\n\n      expbits = getbits (ep, ebi, windowsize);\n      this_windowsize = windowsize;\n      if (ebi < windowsize)\n\t{\n\t  this_windowsize -= windowsize - ebi;\n\t  ebi = 0;\n\t}\n      else\n\tebi -= windowsize;\n\n      count_trailing_zeros (cnt, expbits);\n      this_windowsize -= cnt;\n      ebi += cnt;\n      expbits >>= cnt;\n\n      do\n\t{\n\t  mpn_sqr (tp, rp, n);\n\t  MPN_COPY (rp, tp, n);\n\t  this_windowsize--;\n\t}\n      while (this_windowsize != 0);\n\n      mpn_mullow_n (tp, rp, pp + n * (expbits >> 1), n); /* JPF */\n      MPN_COPY (rp, tp, n);\n    }\n\n done:\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpn/generic/powm.c",
    "content": "/* mpn_powm -- Compute R = U^E mod M.\n\n   Contributed to the GNU project by Torbjorn Granlund.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2007-2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n\n/*\n  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.\n\n  1. W <- U\n\n  2. T <- (B^n * U) mod M                Convert to REDC form\n\n  3. Compute table U^1, U^3, U^5... of E-dependent size\n\n  4. While there are more bits in E\n       W <- power left-to-right base-k\n\n\n  TODO:\n\n   * Make getbits a macro, thereby allowing it to update the index operand.\n     That will simplify the code using getbits.  (Perhaps make getbits' sibling\n     getbit then have similar form, for symmetry.)\n\n   * Write an itch function.  Or perhaps get rid of tp parameter since the huge\n     pp area is allocated locally anyway?\n\n   * Choose window size without looping.  (Superoptimize or think(tm).)\n\n   * Handle small bases with initial, reduction-free exponentiation.\n\n   * Call new division functions, not mpn_tdiv_qr.\n\n   * Consider special code for one-limb M.\n\n   * How should we handle the redc1/redc2/redc_n choice?\n     - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))\n     - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))\n     - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))\n     This disregards the addmul_N constant term, but we could think of\n     that as part of the respective mullo.\n\n   * When U (the base) is small, we should start the exponentiation with plain\n     operations, then convert that partial result to REDC form.\n\n   * When U is just one limb, should it be handled without the k-ary tricks?\n     We could keep a factor of B^n in W, but use U' = BU as base.  After\n     multiplying by this (pseudo two-limb) number, we need to multiply by 1/B\n     mod M.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#undef MPN_REDC_1\n#define MPN_REDC_1(rp, up, mp, n, invm)\t\t\t\t\t\\\n    (mpn_redc_1 (rp, up, mp, n, invm))\n\n#undef MPN_REDC_2\n#define MPN_REDC_2(rp, up, mp, n, mip)\t\t\t\t\t\\\n    (mpn_redc_2 (rp, up, mp, n, mip))\n\n#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2\n#define WANT_REDC_2 1\n#endif\n\n#define getbit(p,bi) \\\n  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)\n\nstatic inline mp_limb_t\ngetbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)\n{\n  int nbits_in_r;\n  mp_limb_t r;\n  mp_size_t i;\n\n  if (bi < nbits)\n    {\n      return p[0] & (((mp_limb_t) 1 << bi) - 1);\n    }\n  else\n    {\n      bi -= nbits;\t\t\t/* bit index of low bit to extract */\n      i = bi / GMP_NUMB_BITS;\t\t/* word index of low bit to extract */\n      bi %= GMP_NUMB_BITS;\t\t/* bit index in low word */\n      r = p[i] >> bi;\t\t\t/* extract (low) bits */\n      nbits_in_r = GMP_NUMB_BITS - bi;\t/* number of bits now in r */\n      if (nbits_in_r < nbits)\t\t/* did we get enough bits? */\n\tr += p[i + 1] << nbits_in_r;\t/* prepend bits from higher word */\n      return r & (((mp_limb_t ) 1 << nbits) - 1);\n    }\n}\n\nstatic inline int\nwin_size (mp_bitcnt_t eb)\n{\n  int k;\n  static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};\n  for (k = 1; eb > x[k]; k++)\n    ;\n  return k;\n}\n\n/* Convert U to REDC form, U_r = B^n * U mod M */\nstatic void\nredcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)\n{\n  mp_ptr tp, qp;\n  TMP_DECL;\n  TMP_MARK;\n\n  tp = TMP_ALLOC_LIMBS (un + n);\n  qp = TMP_ALLOC_LIMBS (un + 1);\t/* FIXME: Put at tp+? */\n\n  MPN_ZERO (tp, n);\n  MPN_COPY (tp + n, up, un);\n  mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);\n  TMP_FREE;\n}\n\n/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]\n   Requires that mp[n-1..0] is odd.\n   Requires that ep[en-1..0] is > 1.\n   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */\nvoid\nmpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,\n\t  mp_srcptr ep, mp_size_t en,\n\t  mp_srcptr mp, mp_size_t n, mp_ptr tp)\n{\n  mp_limb_t ip[2], *mip;\n  int cnt;\n  mp_bitcnt_t ebi;\n  int windowsize, this_windowsize;\n  mp_limb_t expbits;\n  mp_ptr pp, this_pp;\n  long i;\n  TMP_DECL;\n\n  ASSERT (en > 1 || (en == 1 && ep[0] > 1));\n  ASSERT (n >= 1 && ((mp[0] & 1) != 0));\n\n  TMP_MARK;\n\n  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);\n\n#if 0\n  if (bn < n)\n    {\n      /* Do the first few exponent bits without mod reductions,\n\t until the result is greater than the mod argument.  */\n      for (;;)\n\t{\n\t  mpn_sqr (tp, this_pp, tn);\n\t  tn = tn * 2 - 1,  tn += tp[tn] != 0;\n\t  if (getbit (ep, ebi) != 0)\n\t    mpn_mul (..., tp, tn, bp, bn);\n\t  ebi--;\n\t}\n    }\n#endif\n\n  windowsize = win_size (ebi);\n\n#if WANT_REDC_2\n  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))\n    {\n      mip = ip;\n      modlimb_invert (mip[0], mp[0]);\n      mip[0] = -mip[0];\n    }\n  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))\n    {\n      mip = ip;\n      mpn_binvert (mip, mp, 2, tp);\n      mip[0] = -mip[0]; mip[1] = ~mip[1];\n    }\n#else\n  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))\n    {\n      mip = ip;\n      modlimb_invert (mip[0], mp[0]);\n      mip[0] = -mip[0];\n    }\n#endif\n  else\n    {\n      mip = TMP_ALLOC_LIMBS (n);\n      mpn_binvert (mip, mp, n, tp);\n    }\n\n  pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));\n\n  this_pp = pp;\n  redcify (this_pp, bp, bn, mp, n);\n\n  /* Store b^2 at rp.  */\n  mpn_sqr (tp, this_pp, n);\n#if WANT_REDC_2\n  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))\n    MPN_REDC_1 (rp, tp, mp, n, mip[0]);\n  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))\n    MPN_REDC_2 (rp, tp, mp, n, mip);\n#else\n  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))\n    MPN_REDC_1 (rp, tp, mp, n, mip[0]);\n#endif\n  else\n    mpn_redc_n (rp, tp, mp, n, mip);\n\n  /* Precompute odd powers of b and put them in the temporary area at pp.  */\n  for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)\n    {\n      mpn_mul_n (tp, this_pp, rp, n);\n      this_pp += n;\n#if WANT_REDC_2\n      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))\n\tMPN_REDC_1 (this_pp, tp, mp, n, mip[0]);\n      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))\n\tMPN_REDC_2 (this_pp, tp, mp, n, mip);\n#else\n      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))\n\tMPN_REDC_1 (this_pp, tp, mp, n, mip[0]);\n#endif\n      else\n\tmpn_redc_n (this_pp, tp, mp, n, mip);\n    }\n\n  expbits = getbits (ep, ebi, windowsize);\n  if (ebi < windowsize)\n    ebi = 0;\n  else\n    ebi -= windowsize;\n\n  count_trailing_zeros (cnt, expbits);\n  ebi += cnt;\n  expbits >>= cnt;\n\n  MPN_COPY (rp, pp + n * (expbits >> 1), n);\n\n#define INNERLOOP\t\t\t\t\t\t\t\\\n  while (ebi != 0)\t\t\t\t\t\t\t\\\n    {\t\t\t\t\t\t\t\t\t\\\n      while (getbit (ep, ebi) == 0)\t\t\t\t\t\\\n\t{\t\t\t\t\t\t\t\t\\\n\t  MPN_SQR (tp, rp, n);\t\t\t\t\t\t\\\n\t  MPN_REDUCE (rp, tp, mp, n, mip);\t\t\t\t\\\n\t  ebi--;\t\t\t\t\t\t\t\\\n\t  if (ebi == 0)\t\t\t\t\t\t\t\\\n\t    goto done;\t\t\t\t\t\t\t\\\n\t}\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n      /* The next bit of the exponent is 1.  Now extract the largest\t\\\n\t block of bits <= windowsize, and such that the least\t\t\\\n\t significant bit is 1.  */\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n      expbits = getbits (ep, ebi, windowsize);\t\t\t\t\\\n      this_windowsize = windowsize;\t\t\t\t\t\\\n      if (ebi < windowsize)\t\t\t\t\t\t\\\n\t{\t\t\t\t\t\t\t\t\\\n\t  this_windowsize -= windowsize - ebi;\t\t\t\t\\\n\t  ebi = 0;\t\t\t\t\t\t\t\\\n\t}\t\t\t\t\t\t\t\t\\\n      else\t\t\t\t\t\t\t\t\\\n        ebi -= windowsize;\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n      count_trailing_zeros (cnt, expbits);\t\t\t\t\\\n      this_windowsize -= cnt;\t\t\t\t\t\t\\\n      ebi += cnt;\t\t\t\t\t\t\t\\\n      expbits >>= cnt;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n      do\t\t\t\t\t\t\t\t\\\n\t{\t\t\t\t\t\t\t\t\\\n\t  MPN_SQR (tp, rp, n);\t\t\t\t\t\t\\\n\t  MPN_REDUCE (rp, tp, mp, n, mip);\t\t\t\t\\\n\t  this_windowsize--;\t\t\t\t\t\t\\\n\t}\t\t\t\t\t\t\t\t\\\n      while (this_windowsize != 0);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n      MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);\t\t\t\\\n      MPN_REDUCE (rp, tp, mp, n, mip);\t\t\t\t\t\\\n    }\n\n\n#if WANT_REDC_2\n  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_KARATSUBA_THRESHOLD)\n    {\n      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))\n\t{\n\t  if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD\n\t      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))\n\t    {\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_basecase (r,a,n,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_mul_basecase (r,a,n,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_1 (rp, tp, mp, n, mip[0])\n\t      INNERLOOP;\n\t    }\n\t  else\n\t    {\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_basecase (r,a,n,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr_basecase (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_1 (rp, tp, mp, n, mip[0])\n\t      INNERLOOP;\n\t    }\n\t}\n      else if (BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))\n\t{\n\t  if (MUL_KARATSUBA_THRESHOLD < SQR_BASECASE_THRESHOLD\n\t      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))\n\t    {\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_basecase (r,a,n,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_mul_basecase (r,a,n,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_2 (rp, tp, mp, n, mip)\n\t      INNERLOOP;\n\t    }\n\t  else\n\t    {\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_basecase (r,a,n,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr_basecase (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_2 (rp, tp, mp, n, mip)\n\t      INNERLOOP;\n\t    }\n\t}\n      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))\n\t{\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_n (r,a,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_2 (rp, tp, mp, n, mip)\n\t  INNERLOOP;\n\t}\n      else\n\t{\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_n (r,a,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tmpn_redc_n (rp, tp, mp, n, mip)\n\t  INNERLOOP;\n\t}\n    }\n  else\n    {\n      if (BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))\n\t{\n\t  if (MUL_KARATSUBA_THRESHOLD < SQR_BASECASE_THRESHOLD\n\t      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))\n\t    {\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_basecase (r,a,n,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_mul_basecase (r,a,n,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_1 (rp, tp, mp, n, mip[0])\n\t      INNERLOOP;\n\t    }\n\t  else\n\t    {\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_basecase (r,a,n,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr_basecase (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_1 (rp, tp, mp, n, mip[0])\n\t      INNERLOOP;\n\t    }\n\t}\n      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))\n\t{\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_n (r,a,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_1 (rp, tp, mp, n, mip[0])\n\t  INNERLOOP;\n\t}\n      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))\n\t{\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_n (r,a,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_2 (rp, tp, mp, n, mip)\n\t  INNERLOOP;\n\t}\n      else\n\t{\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_n (r,a,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tmpn_redc_n (rp, tp, mp, n, mip)\n\t  INNERLOOP;\n\t}\n    }\n\n#else  /* WANT_REDC_2 */\n\n  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_KARATSUBA_THRESHOLD)\n    {\n      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))\n\t{\n\t  if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD\n\t      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))\n\t    {\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_basecase (r,a,n,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_mul_basecase (r,a,n,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_1 (rp, tp, mp, n, mip[0])\n\t      INNERLOOP;\n\t    }\n\t  else\n\t    {\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_basecase (r,a,n,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr_basecase (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_1 (rp, tp, mp, n, mip[0])\n\t      INNERLOOP;\n\t    }\n\t}\n      else if (BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))\n\t{\n\t  if (MUL_KARATSUBA_THRESHOLD < SQR_BASECASE_THRESHOLD\n\t      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))\n\t    {\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_basecase (r,a,n,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_mul_basecase (r,a,n,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tmpn_redc_n (rp, tp, mp, n, mip)\n\t      INNERLOOP;\n\t    }\n\t  else\n\t    {\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_basecase (r,a,n,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr_basecase (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tmpn_redc_n (rp, tp, mp, n, mip)\n\t      INNERLOOP;\n\t    }\n\t}\n      else\n\t{\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_n (r,a,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tmpn_redc_n (rp, tp, mp, n, mip)\n\t  INNERLOOP;\n\t}\n    }\n  else\n    {\n      if (BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))\n\t{\n\t  if (MUL_KARATSUBA_THRESHOLD < SQR_BASECASE_THRESHOLD\n\t      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))\n\t    {\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_basecase (r,a,n,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_mul_basecase (r,a,n,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_1 (rp, tp, mp, n, mip[0])\n\t      INNERLOOP;\n\t    }\n\t  else\n\t    {\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_basecase (r,a,n,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr_basecase (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_1 (rp, tp, mp, n, mip[0])\n\t      INNERLOOP;\n\t    }\n\t}\n      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))\n\t{\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_n (r,a,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tMPN_REDC_1 (rp, tp, mp, n, mip[0])\n\t  INNERLOOP;\n\t}\n      else\n\t{\n#undef MPN_MUL_N\n#undef MPN_SQR\n#undef MPN_REDUCE\n#define MPN_MUL_N(r,a,b,n)\t\tmpn_mul_n (r,a,b,n)\n#define MPN_SQR(r,a,n)\t\t\tmpn_sqr (r,a,n)\n#define MPN_REDUCE(rp,tp,mp,n,mip)\tmpn_redc_n (rp, tp, mp, n, mip)\n\t  INNERLOOP;\n\t}\n    }\n#endif  /* WANT_REDC_2 */\n\n done:\n\n  MPN_COPY (tp, rp, n);\n  MPN_ZERO (tp + n, n);\n\n#if WANT_REDC_2\n  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))\n    MPN_REDC_1 (rp, tp, mp, n, mip[0]);\n  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))\n    MPN_REDC_2 (rp, tp, mp, n, mip);\n#else\n  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))\n    MPN_REDC_1 (rp, tp, mp, n, mip[0]);\n#endif\n  else\n    mpn_redc_n (rp, tp, mp, n, mip);\n\n  if (mpn_cmp (rp, mp, n) >= 0)\n    mpn_sub_n (rp, rp, mp, n);\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpn/generic/preinv_divrem_1.c",
    "content": "/* mpn_preinv_divrem_1 -- mpn by limb division with pre-inverted divisor.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#if !HAVE_NATIVE_mpn_preinv_divrem_1 \n\n/* Don't bloat a shared library with unused code. */\n#if USE_PREINV_DIVREM_1\n\n/* Same test here for skipping one divide step as in mpn_divrem_1.\n\n   The main reason for a separate shift==0 case is that not all CPUs give\n   zero for \"n0 >> BITS_PER_MP_LIMB\" which would arise in the general case\n   code used on shift==0.  shift==0 is also reasonably common in __mp_bases\n   big_base, for instance base==10 on a 64-bit limb.\n\n   Under shift!=0 it would be possible to call mpn_lshift to adjust the\n   dividend all in one go (into the quotient space say), rather than\n   limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster\n   than what the compiler can generate for EXTRACT.  But this is left to CPU\n   specific implementations to consider, especially since EXTRACT isn't on\n   the dependent chain.\n\n   If size==0 then the result is simply xsize limbs of zeros, but nothing\n   special is done for that, since it wouldn't be a usual call, and\n   certainly never arises from mpn_get_str which is our main caller.  */\n\nmp_limb_t\nmpn_preinv_divrem_1 (mp_ptr qp, mp_size_t xsize,\n\t\t     mp_srcptr ap, mp_size_t size, mp_limb_t d_unnorm,\n\t\t     mp_limb_t dinv, int shift)\n{\n  mp_limb_t  ahigh, qhigh, r;\n  mp_size_t  i;\n  mp_limb_t  n1, n0;\n  mp_limb_t  d;\n\n  ASSERT (xsize >= 0);\n  ASSERT (size >= 1);\n  ASSERT (d_unnorm != 0);\n#if WANT_ASSERT\n  {\n    int        want_shift;\n    mp_limb_t  want_dinv;\n    count_leading_zeros (want_shift, d_unnorm);\n    ASSERT (shift == want_shift);\n    invert_limb (want_dinv, d_unnorm << shift);\n    ASSERT (dinv == want_dinv);\n  }\n#endif\n  /* FIXME: What's the correct overlap rule when xsize!=0? */\n  ASSERT (MPN_SAME_OR_SEPARATE_P (qp+xsize, ap, size));\n\n  ahigh = ap[size-1];\n  d = d_unnorm << shift;\n  qp += (size + xsize - 1);   /* dest high limb */\n\n  if (shift == 0)\n    {\n      /* High quotient limb is 0 or 1, and skip a divide step. */\n      r = ahigh;\n      qhigh = (r >= d);\n      r = (qhigh ? r-d : r);\n      *qp-- = qhigh;\n      size--;\n\n      for (i = size-1; i >= 0; i--)\n\t{\n\t  n0 = ap[i];\n\t  udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);\n\t  qp--;\n\t}\n    }\n  else\n    {\n      r = 0;\n      if (ahigh < d_unnorm)\n\t{\n\t  r = ahigh << shift;\n\t  *qp-- = 0;\n\t  size--;\n\t  if (size == 0)\n\t    goto done_integer;\n\t}\n\n      n1 = ap[size-1];\n      r |= n1 >> (BITS_PER_MP_LIMB - shift);\n\n      for (i = size-2; i >= 0; i--)\n\t{\n\t  ASSERT (r < d);\n\t  n0 = ap[i];\n\t  udiv_qrnnd_preinv (*qp, r, r,\n\t\t\t     ((n1 << shift) | (n0 >> (BITS_PER_MP_LIMB - shift))),\n\t\t\t     d, dinv);\n\t  qp--;\n\t  n1 = n0;\n\t}\n      udiv_qrnnd_preinv (*qp, r, r, n1 << shift, d, dinv);\n      qp--;\n    }\n\n done_integer:\n  for (i = 0; i < xsize; i++)\n    {\n      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);\n      qp--;\n    }\n\n  return r >> shift;\n}\n\n#endif /* USE_PREINV_DIVREM_1 */\n#endif\n"
  },
  {
    "path": "mpn/generic/preinv_mod_1.c",
    "content": "/* mpn_preinv_mod_1 (up, un, d, dinv) -- Divide (UP,,UN) by the normalized D.\n   DINV should be 2^(2*GMP_LIMB_BITS) / D - 2^GMP_LIMB_BITS.\n   Return the single-limb remainder.\n\nCopyright 1991, 1993, 1994, 2000, 2001, 2002, 2004, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify it\nunder the terms of the GNU Lesser General Public License as published by the\nFree Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\nFITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\nfor more details.\n\nYou should have received a copy of the GNU Lesser General Public License along\nwith the GNU MP Library; see the file COPYING.LIB.  If not, write to the Free\nSoftware Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,\nUSA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#if !HAVE_NATIVE_mpn_preinv_mod_1\n\n/* This function used to be documented, but is now considered obsolete.  It\n   continues to exist for binary compatibility, even when not required\n   internally.  */\n\nmp_limb_t\nmpn_preinv_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d, mp_limb_t dinv)\n{\n  mp_size_t i;\n  mp_limb_t n0, r;\n  mp_limb_t dummy;\n\n  ASSERT (un >= 1);\n  ASSERT (d & GMP_LIMB_HIGHBIT);\n\n  r = up[un - 1];\n  if (r >= d)\n    r -= d;\n\n  for (i = un - 2; i >= 0; i--)\n    {\n      n0 = up[i];\n      udiv_qrnnd_preinv (dummy, r, r, n0, d, dinv);\n    }\n  return r;\n}\n\n#endif\n"
  },
  {
    "path": "mpn/generic/random.c",
    "content": "/* mpn_random -- Generate random numbers.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* This function is Obsolete 13/09/2009 */\nvoid\nmpn_random (mp_ptr ptr, mp_size_t size)\n{\n  gmp_randstate_ptr  rands;\n\n  /* FIXME: Is size==0 supposed to be allowed? */\n  ASSERT (size >= 0);\n\n  if (size == 0)\n    return;\n\n  rands = RANDS;\n  _gmp_rand (ptr, rands, size * GMP_NUMB_BITS);\n\n  /* Make sure the most significant limb is non-zero.  */\n  while (ptr[size-1] == 0)\n    _gmp_rand (&ptr[size-1], rands, GMP_NUMB_BITS);\n}\n"
  },
  {
    "path": "mpn/generic/random2.c",
    "content": "/* mpn_random2 -- Generate random numbers with relatively long strings\n   of ones and zeroes.  Suitable for border testing.\n\nCopyright 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nstatic void gmp_rrandomb(mp_ptr, gmp_randstate_t, mp_bitcnt_t);\n\n/* Ask _gmp_rand for 32 bits per call unless that's more than a limb can hold.\n   Thus, we get the same random number sequence in the common cases.\n   FIXME: We should always generate the same random number sequence!  */\n#if GMP_NUMB_BITS < 32\n#define BITS_PER_RANDCALL GMP_NUMB_BITS\n#else\n#define BITS_PER_RANDCALL 32\n#endif\n\n/* This function is obsolete 13/09/2009 */\nvoid\nmpn_random2(mp_ptr rp, mp_size_t n)\n{\n  gmp_randstate_ptr rstate = RANDS;\n  int bit_pos;\t\t\t/* bit number of least significant bit where\n\t\t\t\t   next bit field to be inserted */\n  mp_limb_t ran, ranm;\t\t/* buffer for random bits */\n\n  /* FIXME: Is n==0 supposed to be allowed? */\n  ASSERT (n >= 0);\n\n  _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);\n  ran = ranm;\n\n  /* Start off at a random bit position in the most significant limb.  */\n  bit_pos = ran % GMP_NUMB_BITS;\n\n  gmp_rrandomb (rp, rstate, n * GMP_NUMB_BITS - bit_pos);\n}\n\nstatic void\ngmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, mp_bitcnt_t nbits)\n{\n  mp_bitcnt_t bi;\n  mp_limb_t ranm;\t\t/* buffer for random bits */\n  unsigned cap_chunksize, chunksize;\n  mp_size_t i;\n\n  /* Set entire result to 111..1  */\n  i = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS - 1;\n  rp[i] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - (nbits % GMP_NUMB_BITS)) % GMP_NUMB_BITS;\n  for (i = i - 1; i >= 0; i--)\n    rp[i] = GMP_NUMB_MAX;\n\n  _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);\n  cap_chunksize = nbits / (ranm % 4 + 1);\n  cap_chunksize += cap_chunksize == 0; /* make it at least 1 */\n\n  bi = nbits;\n\n  for (;;)\n    {\n      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);\n      chunksize = 1 + ranm % cap_chunksize;\n      bi = (bi < chunksize) ? 0 : bi - chunksize;\n\n      if (bi == 0)\n\tbreak;\t\t\t/* low chunk is ...1 */\n\n      rp[bi / GMP_NUMB_BITS] ^= CNST_LIMB (1) << bi % GMP_NUMB_BITS;\n\n      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);\n      chunksize = 1 + ranm % cap_chunksize;\n      bi = (bi < chunksize) ? 0 : bi - chunksize;\n\n      mpn_incr_u (rp + bi / GMP_NUMB_BITS, CNST_LIMB (1) << bi % GMP_NUMB_BITS);\n\n      if (bi == 0)\n\tbreak;\t\t\t/* low chunk is ...0 */\n    }\n}\n"
  },
  {
    "path": "mpn/generic/randomb.c",
    "content": "/*\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid mpn_randomb(mp_ptr rp, gmp_randstate_t rnd, mp_size_t n)\n{\n   ASSERT(n > 0);\n\n   _gmp_rand(rp, rnd, n*GMP_NUMB_BITS);\n\n   while (rp[n - 1] == 0)\n      _gmp_rand(rp + n - 1, rnd, GMP_NUMB_BITS);\n\n   return;\n}\n"
  },
  {
    "path": "mpn/generic/redc_1.c",
    "content": "/* mpn_redc_1\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005 Free Software\nFoundation, Inc.  Contributed by Paul Zimmermann.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* Set cp[] <- tp[]/R^n mod mp[].  Clobber tp[].\n   mp[] is n limbs; tp[] is 2n limbs.  */\nvoid mpn_redc_1 (mp_ptr cp, mp_ptr tp, mp_srcptr mp, mp_size_t n, mp_limb_t Nprim)\n{\n  mp_limb_t cy;\n  mp_limb_t q;\n  mp_size_t j;\n\n  ASSERT_MPN (tp, 2*n);\n\n  for (j = 0; j < n; j++)\n    {\n      q = (tp[0] * Nprim) & GMP_NUMB_MASK;\n      tp[0] = mpn_addmul_1 (tp, mp, n, q);\n      tp++;\n    }\n\n  cy = mpn_add_n (cp, tp, tp - n, n);\n  \n  if (cy != 0)\n    mpn_sub_n (cp, cp, mp, n);\n}\n\n"
  },
  {
    "path": "mpn/generic/redc_2.c",
    "content": "/* mpn_redc_2.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].\n   mp[] is n limbs; up[] is 2n limbs.\n\n   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.\n\nCopyright (C) 2000, 2001, 2002, 2004, 2008 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n#if GMP_NAIL_BITS != 0\n#error not supported\n#endif\n\n/* For testing purposes, define our own mpn_addmul_2 if there is none already\n   available.  */\n#ifndef HAVE_NATIVE_mpn_addmul_2\nmp_limb_t\nmpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)\n{\n  rp[n] = mpn_addmul_1 (rp, up, n, vp[0]);\n  return mpn_addmul_1 (rp + 1, up, n, vp[1]);\n}\n#endif\n\n#if defined (__GNUC__) && defined (__ia64) && W_TYPE_SIZE == 64\n#define umul2low(ph, pl, uh, ul, vh, vl) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_limb_t _ph, _pl;\t\t\t\t\t\t\t\\\n    __asm__ (\"xma.hu %0 = %3, %5, f0\\n\\t\"\t\t\t\t\\\n\t     \"xma.l %1 = %3, %5, f0\\n\\t\"\t\t\t\t\\\n\t     \";;\\n\\t\"\t\t\t\t\t\t\t\\\n\t     \"xma.l %0 = %3, %4, %0\\n\\t\"\t\t\t\t\\\n\t     \";;\\n\\t\"\t\t\t\t\t\t\t\\\n\t     \"xma.l %0 = %2, %5, %0\"\t\t\t\t\t\\\n\t     : \"=&f\" (ph), \"=&f\" (pl)\t\t\t\t\t\\\n\t     : \"f\" (uh), \"f\" (ul), \"f\" (vh), \"f\" (vl));\t\t\t\\\n  } while (0)\n#endif\n\n#ifndef umul2low\n#define umul2low(ph, pl, uh, ul, vh, vl) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_limb_t _ph, _pl;\t\t\t\t\t\t\t\\\n    umul_ppmm (_ph, _pl, ul, vl);\t\t\t\t\t\\\n    (ph) = _ph + (ul) * (vh) + (uh) * (vl);\t\t\t\t\\\n    (pl) = _pl;\t\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\nvoid\nmpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)\n{\n  mp_limb_t q[2];\n  mp_size_t j;\n  mp_limb_t upn;\n  mp_limb_t cy;\n\n  ASSERT (n > 0);\n  ASSERT_MPN (up, 2*n);\n\n  if ((n & 1) != 0)\n    {\n      up[0] = mpn_addmul_1 (up, mp, n, (up[0] * mip[0]) & GMP_NUMB_MASK);\n      up++;\n    }\n\n  for (j = n - 2; j >= 0; j -= 2)\n    {\n      umul2low (q[1], q[0], mip[1], mip[0], up[1], up[0]);\n      upn = up[n];\t\t/* mpn_addmul_2 overwrites this */\n      up[1] = mpn_addmul_2 (up, mp, n, q);\n      up[0] = up[n];\n      up[n] = upn;\n      up += 2;\n    }\n  cy = mpn_add_n (rp, up, up - n, n);\n  if (cy != 0)\n    mpn_sub_n (rp, rp, mp, n);\n}\n"
  },
  {
    "path": "mpn/generic/redc_n.c",
    "content": "/* mpn_redc_n.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].\n   mp[] is n limbs; up[] is 2n limbs, the inverse ip[] is n limbs.\n\n   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.\n\nCopyright 2009, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/*\n  TODO\n\n  * We assume mpn_mulmod_bnm1 is always faster than plain mpn_mul_n (or a\n    future mpn_mulhi) for the range we will be called.  Follow up that\n    assumption.\n\n  * Decrease scratch usage.\n\n  * Consider removing the residue canonicalisation.\n*/\n\nvoid\nmpn_redc_n (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr ip)\n{\n  mp_ptr xp, yp, scratch;\n  mp_limb_t cy;\n  mp_size_t rn;\n  TMP_DECL;\n  TMP_MARK;\n\n  ASSERT (n > 8);\n\n  rn = mpn_mulmod_bnm1_next_size (n);\n\n  scratch = TMP_ALLOC_LIMBS (n + rn + mpn_mulmod_bnm1_itch (rn, n, n));\n\n  xp = scratch;\n  mpn_mullow_n (xp, up, ip, n); /* JPF: assuming now it does not overfolw */\n\n  yp = scratch + n;\n  mpn_mulmod_bnm1 (yp, rn, xp, n, mp, n, scratch + n + rn);\n\n  ASSERT_ALWAYS (2 * n > rn);\t\t\t\t/* could handle this */\n\n  cy = mpn_sub_n (yp + rn, yp, up, 2*n - rn);\t\t/* undo wrap around */\n  MPN_DECR_U (yp + 2*n - rn, rn, cy);\n\n  cy = mpn_sub_n (rp, up + n, yp + n, n);\n  if (cy != 0)\n    mpn_add_n (rp, rp, mp, n);\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpn/generic/rootrem.c",
    "content": "/* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and\n   store the truncated integer part at rootp and the remainder at remp.\n\n   Contributed by Paul Zimmermann (algorithm) and\n   Paul Zimmermann and Torbjorn Granlund (implementation).\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES.  IT'S\n   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2002, 2005, 2009, 2010 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n/* FIXME:\n     This implementation is not optimal when remp == NULL, since the complexity\n     is M(n), whereas it should be M(n/k) on average.\n*/\n\n#include <stdio.h>\t\t/* for NULL */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nstatic mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,\n\t\t\t\t       mp_limb_t, int);\n\n#define MPN_RSHIFT(cy,rp,up,un,cnt) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    if ((cnt) != 0)\t\t\t\t\t\t\t\\\n      cy = mpn_rshift (rp, up, un, cnt);\t\t\t\t\\\n    else\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tMPN_COPY_INCR (rp, up, un);\t\t\t\t\t\\\n\tcy = 0;\t\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n  } while (0)\n\n#define MPN_LSHIFT(cy,rp,up,un,cnt) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    if ((cnt) != 0)\t\t\t\t\t\t\t\\\n      cy = mpn_lshift (rp, up, un, cnt);\t\t\t\t\\\n    else\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tMPN_COPY_DECR (rp, up, un);\t\t\t\t\t\\\n\tcy = 0;\t\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n  } while (0)\n\n\n/* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.\n   If remp <> NULL, put in {remp, un} the remainder.\n   Return the size (in limbs) of the remainder if remp <> NULL,\n\t  or a non-zero value iff the remainder is non-zero when remp = NULL.\n   Assumes:\n   (a) up[un-1] is not zero\n   (b) rootp has at least space for ceil(un/k) limbs\n   (c) remp has at least space for un limbs (in case remp <> NULL)\n   (d) the operands do not overlap.\n\n   The auxiliary memory usage is 3*un+2 if remp = NULL,\n   and 2*un+2 if remp <> NULL.  FIXME: This is an incorrect comment.\n*/\nmp_size_t\nmpn_rootrem (mp_ptr rootp, mp_ptr remp,\n\t     mp_srcptr up, mp_size_t un, mp_limb_t k)\n{\n  ASSERT (un > 0);\n  ASSERT (up[un - 1] != 0);\n  ASSERT (k > 1);\n\n  if(BELOW_THRESHOLD(un,ROOTREM_THRESHOLD))\n  {\n\t  if (remp == NULL)\n\t  {\n\t\t  mp_ptr temp;\n\t\t  mp_size_t ret;\n\t\t  TMP_DECL;\n\t\t  TMP_MARK;\n\t\t  temp = TMP_ALLOC_LIMBS(un);\n\t\t  ret = mpn_rootrem_basecase(rootp,temp,up,un,k);\n\t\t  TMP_FREE;\n          return ret;\n\t  } else \n\t\t  return mpn_rootrem_basecase(rootp,remp,up,un,k);\n  }\n\n\n  if ((remp == NULL) && (un / k > 2))\n    /* call mpn_rootrem recursively, padding {up,un} with k zero limbs,\n       which will produce an approximate root with one more limb,\n       so that in most cases we can conclude. */\n    {\n      mp_ptr sp, wp;\n      mp_size_t rn, sn, wn;\n      TMP_DECL;\n      TMP_MARK;\n      wn = un + k;\n      wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */\n      sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */\n      sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */\n      MPN_COPY (wp + k, up, un);\n      MPN_ZERO (wp, k);\n      rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);\n      /* the approximate root S = {sp,sn} is either the correct root of\n\t {sp,sn}, or one too large. Thus unless the least significant limb\n\t of S is 0 or 1, we can deduce the root of {up,un} is S truncated by\n\t one limb. (In case sp[0]=1, we can deduce the root, but not decide\n\t whether it is exact or not.) */\n      MPN_COPY (rootp, sp + 1, sn - 1);\n      TMP_FREE;\n      return rn;\n    }\n  else /* remp <> NULL */\n    {\n      return mpn_rootrem_internal (rootp, remp, up, un, k, 0);\n    }\n}\n\n/* if approx is non-zero, does not compute the final remainder */\nstatic mp_size_t\nmpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,\n\t\t      mp_limb_t k, int approx)\n{\n  mp_ptr qp, rp, sp, wp, scratch;\n  mp_size_t qn, rn, sn, wn, nl, bn;\n  mp_limb_t save, save2, cy;\n  unsigned long int unb; /* number of significant bits of {up,un} */\n  unsigned long int xnb; /* number of significant bits of the result */\n  unsigned int cnt;\n  unsigned long b, kk;\n  unsigned long sizes[GMP_NUMB_BITS + 1];\n  int ni, i;\n  int c;\n  int logk;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  /* qp and wp need enough space to store S'^k where S' is an approximate\n     root. Since S' can be as large as S+2, the worst case is when S=2 and\n     S'=4. But then since we know the number of bits of S in advance, S'\n     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.\n     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k\n     fits in un limbs, the number of extra limbs needed is bounded by\n     ceil(k*log2(3/2)/GMP_NUMB_BITS). */\n#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)\n  qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder\n\t\t\t\t\tof R/(k*S^(k-1)), and S^k */\n  if (remp == NULL)\n    {\n      rp = TMP_ALLOC_LIMBS (un + 1);     /* will contain the remainder */\n      scratch = rp;\t\t\t /* used by mpn_div_q */\n    }\n  else\n    {\n      scratch = TMP_ALLOC_LIMBS (un + 1); /* used by mpn_div_q */\n      rp = remp;\n    }\n  sp = rootp;\n  wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),\n\t\t\t\t\tand temporary for mpn_pow_1 */\n  count_leading_zeros (cnt, up[un - 1]);\n  unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;\n  /* unb is the number of bits of the input U */\n\n  xnb = (unb - 1) / k + 1;\t/* ceil (unb / k) */\n  /* xnb is the number of bits of the root R */\n\n  if (xnb == 1) /* root is 1 */\n    {\n      if (remp == NULL)\n\tremp = rp;\n      mpn_sub_1 (remp, up, un, (mp_limb_t) 1);\n      MPN_NORMALIZE (remp, un);\t/* There should be at most one zero limb,\n\t\t\t\t   if we demand u to be normalized  */\n      rootp[0] = 1;\n      TMP_FREE;\n      return un;\n    }\n\n  /* We initialize the algorithm with a 1-bit approximation to zero: since we\n     know the root has exactly xnb bits, we write r0 = 2^(xnb-1), so that\n     r0^k = 2^(k*(xnb-1)), that we subtract to the input. */\n  kk = k * (xnb - 1);\t\t/* number of truncated bits in the input */\n  rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */\n  MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);\n  mpn_sub_1 (rp, rp, rn, 1);\t/* subtract the initial approximation: since\n\t\t\t\t   the non-truncated part is less than 2^k, it\n\t\t\t\t   is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */\n  sp[0] = 1;\t\t\t/* initial approximation */\n  sn = 1;\t\t\t/* it has one limb */\n\n  for (logk = 1; ((k - 1) >> logk) != 0; logk++)\n    ;\n  /* logk = ceil(log(k)/log(2)) */\n\n  b = xnb - 1; /* number of remaining bits to determine in the kth root */\n  ni = 0;\n  while (b != 0)\n    {\n      /* invariant: here we want b+1 total bits for the kth root */\n      sizes[ni] = b;\n      /* if c is the new value of b, this means that we'll go from a root\n\t of c+1 bits (say s') to a root of b+1 bits.\n\t It is proved in the book \"Modern Computer Arithmetic\" from Brent\n\t and Zimmermann, Chapter 1, that\n\t if s' >= k*beta, then at most one correction is necessary.\n\t Here beta = 2^(b-c), and s' >= 2^c, thus it suffices that\n\t c >= ceil((b + log2(k))/2). */\n      b = (b + logk + 1) / 2;\n      if (b >= sizes[ni])\n\tb = sizes[ni] - 1;\t/* add just one bit at a time */\n      ni++;\n    }\n  sizes[ni] = 0;\n  ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);\n  /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with\n     sizes[i] <= 2 * sizes[i+1].\n     Newton iteration will first compute sizes[ni-1] extra bits,\n     then sizes[ni-2], ..., then sizes[0] = b. */\n\n  wp[0] = 1; /* {sp,sn}^(k-1) = 1 */\n  wn = 1;\n  for (i = ni; i != 0; i--)\n    {\n      /* 1: loop invariant:\n\t {sp, sn} is the current approximation of the root, which has\n\t\t  exactly 1 + sizes[ni] bits.\n\t {rp, rn} is the current remainder\n\t {wp, wn} = {sp, sn}^(k-1)\n\t kk = number of truncated bits of the input\n      */\n      b = sizes[i - 1] - sizes[i]; /* number of bits to compute in that\n\t\t\t\t      iteration */\n\n      /* Reinsert a low zero limb if we normalized away the entire remainder */\n      if (rn == 0)\n\t{\n\t  rp[0] = 0;\n\t  rn = 1;\n\t}\n\n      /* first multiply the remainder by 2^b */\n      MPN_LSHIFT (cy, rp + b / GMP_NUMB_BITS, rp, rn, b % GMP_NUMB_BITS);\n      rn = rn + b / GMP_NUMB_BITS;\n      if (cy != 0)\n\t{\n\t  rp[rn] = cy;\n\t  rn++;\n\t}\n\n      kk = kk - b;\n\n      /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */\n\n      /* Now insert bits [kk,kk+b-1] from the input U */\n      bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[] */\n      save = rp[bn];\n      /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */\n      nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);\n      /* nl  = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)\n\t\t - floor(kk / GMP_NUMB_BITS)\n\t     <= 1 + (kk + b - 1) / GMP_NUMB_BITS\n\t\t  - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS\n\t     = 2 + (b - 2) / GMP_NUMB_BITS\n\t thus since nl is an integer:\n\t nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */\n      /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */\n      if (nl - 1 > bn)\n\tsave2 = rp[bn + 1];\n      MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);\n      /* set to zero high bits of rp[bn] */\n      rp[bn] &= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)) - 1;\n      /* restore corresponding bits */\n      rp[bn] |= save;\n      if (nl - 1 > bn)\n\trp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since\n\t\t\t       they start by bit 0 in rp[0], so they use\n\t\t\t       at most ceil(b/GMP_NUMB_BITS) limbs */\n\n      /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */\n\n      /* compute {wp, wn} = k * {sp, sn}^(k-1) */\n      cy = mpn_mul_1 (wp, wp, wn, k);\n      wp[wn] = cy;\n      wn += cy != 0;\n\n      /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */\n\n      /* now divide {rp, rn} by {wp, wn} to get the low part of the root */\n      if (rn < wn)\n\t{\n\t  qn = 0;\n\t}\n      else\n\t{\n\t  qn = rn - wn; /* expected quotient size */\n\t  mpn_tdiv_q (qp, rp, rn, wp, wn);\n\t  qn += qp[qn] != 0;\n\t}\n\n      /* 5: current buffers: {sp,sn}, {qp,qn}.\n\t Note: {rp,rn} is not needed any more since we'll compute it from\n\t scratch at the end of the loop.\n       */\n\n      /* Number of limbs used by b bits, when least significant bit is\n\t aligned to least limb */\n      bn = (b - 1) / GMP_NUMB_BITS + 1;\n\n      /* the quotient should be smaller than 2^b, since the previous\n\t approximation was correctly rounded toward zero */\n      if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&\n\t\t      qp[qn - 1] >= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS))))\n\t{\n\t  qn = b / GMP_NUMB_BITS + 1; /* b+1 bits */\n\t  MPN_ZERO (qp, qn);\n\t  qp[qn - 1] = (mp_limb_t) 1 << (b % GMP_NUMB_BITS);\n\t  MPN_DECR_U (qp, qn, 1);\n\t  qn -= qp[qn - 1] == 0;\n\t}\n\n      /* 6: current buffers: {sp,sn}, {qp,qn} */\n\n      /* multiply the root approximation by 2^b */\n      MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);\n      sn = sn + b / GMP_NUMB_BITS;\n      if (cy != 0)\n\t{\n\t  sp[sn] = cy;\n\t  sn++;\n\t}\n\n      /* 7: current buffers: {sp,sn}, {qp,qn} */\n\n      ASSERT_ALWAYS (bn >= qn); /* this is ok since in the case qn > bn\n\t\t\t\t   above, q is set to 2^b-1, which has\n\t\t\t\t   exactly bn limbs */\n\n      /* Combine sB and q to form sB + q.  */\n      save = sp[b / GMP_NUMB_BITS];\n      MPN_COPY (sp, qp, qn);\n      MPN_ZERO (sp + qn, bn - qn);\n      sp[b / GMP_NUMB_BITS] |= save;\n\n      /* 8: current buffer: {sp,sn} */\n\n      /* Since each iteration treats b bits from the root and thus k*b bits\n\t from the input, and we already considered b bits from the input,\n\t we now have to take another (k-1)*b bits from the input. */\n      kk -= (k - 1) * b; /* remaining input bits */\n      /* {rp, rn} = floor({up, un} / 2^kk) */\n      MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS);\n      rn = un - kk / GMP_NUMB_BITS;\n      rn -= rp[rn - 1] == 0;\n\n      /* 9: current buffers: {sp,sn}, {rp,rn} */\n\n     for (c = 0;; c++)\n\t{\n\t  /* Compute S^k in {qp,qn}. */\n\t  if (i == 1)\n\t    {\n\t      /* Last iteration: we don't need W anymore. */\n\t      /* mpn_pow_1 requires that both qp and wp have enough space to\n\t\t store the result {sp,sn}^k + 1 limb */\n\t      approx = approx && (sp[0] > 1);\n\t      qn = (approx == 0) ? mpn_pow_1 (qp, sp, sn, k, wp) : 0;\n\t    }\n\t  else\n\t    {\n\t      /* W <- S^(k-1) for the next iteration,\n\t\t and S^k = W * S. */\n\t      wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);\n\t      mpn_mul (qp, wp, wn, sp, sn);\n\t      qn = wn + sn;\n\t      qn -= qp[qn - 1] == 0;\n\t    }\n\n\t  /* if S^k > floor(U/2^kk), the root approximation was too large */\n\t  if (qn > rn || (qn == rn && mpn_cmp (qp, rp, rn) > 0))\n\t    MPN_DECR_U (sp, sn, 1);\n\t  else\n\t    break;\n\t}\n\n      /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */\n\n      ASSERT_ALWAYS (c <= 1);\n      ASSERT_ALWAYS (rn >= qn);\n\n      /* R = R - Q = floor(U/2^kk) - S^k */\n      if ((i > 1) || (approx == 0))\n\t{\n\t  mpn_sub (rp, rp, rn, qp, qn);\n\t  MPN_NORMALIZE (rp, rn);\n\t}\n      /* otherwise we have rn > 0, thus the return value is ok */\n\n      /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */\n    }\n\n  TMP_FREE;\n  return rn;\n}\n"
  },
  {
    "path": "mpn/generic/rootrem_basecase.c",
    "content": "/*  mpn_rootrem  \n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n\n/*\n   HERE IS A COPY OF THE OLD GMP ROOTREM WHICH WE RENAMED MPN-ROOTREM_BASECASE\n   WE USE THIS FOR SMALL SIZES\n   AND OF THE COURSE THE OLD GMP BOILERPLATE\n*/\n\n/* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and\n   store the truncated integer part at rootp and the remainder at remp.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE\n   INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.\n   IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A\n   FUTURE GNU MP RELEASE.\n\n\nCopyright 2002, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA. */\n\n/*\n  We use Newton's method to compute the root of a:\n\n           n\n  f(x) := x  - a\n\n\n            n - 1\n  f'(x) := x      n\n\n\n                                       n-1            n-1           n-1\n                                x - a/x            a/x   - x     a/x   + (n-1)x\n  new x = x - f(x)/f'(x) =  x - ----------  =  x + ---------  =  --------------\n                                     n                 n                n\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_size_t\nmpn_rootrem_basecase (mp_ptr rootp, mp_ptr remp,mp_srcptr up, mp_size_t un, mp_limb_t nth)\n{\n  mp_ptr pp, qp, xp;\n  mp_size_t pn, xn, qn;\n  unsigned long int unb, xnb, bit;\n  unsigned int cnt;\n  mp_size_t i;\n  unsigned long int n_valid_bits, adj;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  /* The extra factor 1.585 = log(3)/log(2) here is for the worst case\n     overestimate of the root, i.e., when the code rounds a root that is\n     2+epsilon to 3, and then powers this to a potentially huge power.  We\n     could generalize the code for detecting root=1 a few lines below to deal\n     with xnb <= k, for some small k.  For example, when xnb <= 2, meaning\n     the root should be 1, 2, or 3, we could replace this factor by the much\n     smaller log(5)/log(4).  */\n\n#define PP_ALLOC (2 + (mp_size_t) (un*1.585))\n  pp = TMP_ALLOC_LIMBS (PP_ALLOC);\n\n  count_leading_zeros (cnt, up[un - 1]);\n  unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;\n\n  xnb = (unb - 1) / nth + 1;\n  if (xnb == 1)\n    {\n      if (remp == 0)\n\tremp = pp;\n      mpn_sub_1 (remp, up, un, (mp_limb_t) 1);\n      MPN_NORMALIZE (remp, un);\n      rootp[0] = 1;\n      TMP_FREE;\n      return un;\n    }\n\n  xn = (xnb + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;\n\n  qp = TMP_ALLOC_LIMBS (PP_ALLOC);\n  xp = TMP_ALLOC_LIMBS (xn + 1);\n\n  /* Set initial root to only ones.  This is an overestimate of the actual root\n     by less than a factor of 2.  */\n  for (i = 0; i < xn; i++)\n    xp[i] = GMP_NUMB_MAX;\n  xp[xnb / GMP_NUMB_BITS] = ((mp_limb_t) 1 << (xnb % GMP_NUMB_BITS)) - 1;\n\n  /* Improve the initial approximation, one bit at a time.  Keep the\n     approximations >= root(U,nth).  */\n  bit = xnb - 2;\n  n_valid_bits = 0;\n  for (i = 0; (nth >> i) != 0; i++)\n    {\n      mp_limb_t xl = xp[bit / GMP_NUMB_BITS];\n      xp[bit / GMP_NUMB_BITS] = xl ^ (mp_limb_t) 1 << bit % GMP_NUMB_BITS;\n      pn = mpn_pow_1 (pp, xp, xn, nth, qp);\n      ASSERT_ALWAYS (pn < PP_ALLOC);\n      /* If the new root approximation is too small, restore old value.  */\n      if (! (un < pn || (un == pn && mpn_cmp (up, pp, pn) < 0)))\n\txp[bit / GMP_NUMB_BITS] = xl;\t\t/* restore old value */\n      n_valid_bits += 1;\n      if (bit == 0)\n\tgoto done;\n      bit--;\n    }\n\n  adj = n_valid_bits - 1;\n\n  /* Newton loop.  Converges downwards towards root(U,nth).  Currently we use\n     full precision from iteration 1.  Clearly, we should use just n_valid_bits\n     of precision in each step, and thus save most of the computations.  */\n  while (n_valid_bits <= xnb)\n    {\n      mp_limb_t cy;\n\n      pn = mpn_pow_1 (pp, xp, xn, nth - 1, qp);\n      ASSERT_ALWAYS (pn < PP_ALLOC);\n      qp[xn - 1] = 0;\t\t/* pad quotient to make it always xn limbs */\n      mpn_tdiv_qr (qp, pp, (mp_size_t) 0, up, un, pp, pn); /* junk remainder */\n      cy = mpn_addmul_1 (qp, xp, xn, nth - 1);\n      if (un - pn == xn)\n\t{\n\t  cy += qp[xn];\n\t  if (cy == nth)\n\t    {\n\t      for (i = xn - 1; i >= 0; i--)\n\t\tqp[i] = GMP_NUMB_MAX;\n\t      cy = nth - 1;\n\t    }\n\t}\n\n      qp[xn] = cy;\n      qn = xn + (cy != 0);\n\n      mpn_divrem_1 (xp, 0, qp, qn, nth);\n      n_valid_bits = n_valid_bits * 2 - adj;\n    }\n\n  /* The computed result might be one unit too large.  Adjust as necessary.  */\n done:\n  pn = mpn_pow_1 (pp, xp, xn, nth, qp);\n  ASSERT_ALWAYS (pn < PP_ALLOC);\n  if (un < pn || (un == pn && mpn_cmp (up, pp, pn) < 0))\n    {\n      mpn_decr_u (xp, 1);\n      pn = mpn_pow_1 (pp, xp, xn, nth, qp);\n      ASSERT_ALWAYS (pn < PP_ALLOC);\n\n      ASSERT_ALWAYS (! (un < pn || (un == pn && mpn_cmp (up, pp, pn) < 0)));\n    }\n\n  if (remp == 0)\n    remp = pp;\n  mpn_sub (remp, up, un, pp, pn);\n  MPN_NORMALIZE (remp, un);\n  MPN_COPY (rootp, xp, xn);\n  TMP_FREE;\n  return un;\n}\n\n\n#if 0 /* this code needs more work to be faster than that in rootrem.c */\n\n/* HERE IS THE NEW CODE */\n\n/*\n   TODO\n\n   For large k we can calulate x^k faster as a float ie exp(k*ln(x)) or \n   x^(1/k) = exp(ln(x)/k).\n\n   rather than doing it bitwise , round up all the truncation to the next limb,\n   this should save quite a lot of shifts, don't know how much this will save (if \n   any) in practice.\n        \n   The powering is now a base2 left to right binary expansion , we could the usual \n   sliding base 2^k expansion, although the most common roots are small so this is \n   not likely to give us much in the common case.\n        \n   As most roots are for small k , we can do the powering via an optimized addition \n   chain, ie some sort of table lookup.\n        \n   Merge this reciprocal with our reciprocal used in our barratt (and/or newton \n   division).\n        \n   Currently we calc x^(1/k) as\t(x^(-1/k))^(-1/1)\n   or (x^(-1/1))^(-1/k)  \n   could also try x(x^(-1/k)^(k-1))\t(*)\n   or (x^(-1/a))^(-1/b)  where k=ab    \n   this last one is SLOWER as hi.gh k is fast as so make out computation as small as \n   possible as fast as possible\n        \n   So (*) is the only alternative, which I guess is only faster for small k ???\n        \n   Rewrite in term of mpf (or similar) like it was when I started, but I lost it, \n   will make the code below much clearer and smaller.\n        \n   multrunc can use high half mul.\n\n   if k < 496 (32 bit cpus) then nroot_vsmall can be further reduced for a nroot_vvsmall.\n        \n   change signed long etc to mp_size_t ?  mainly for MSVC.\n        \n   At the moment we have just one threshold, need a separate one for each k, and some \n   sort of rule for large k.\n*/\n\n\n/* Algortihms from \"Detecting Perfect Powers in Essentially Linear Time\" ,\n   Daniel J Bernstein  http://cr.yp.to/papers.html   */\n\n/* define this to 1 to test the nroot_small code */\n#define TESTSMALL 0\n\n/* if k<=floor((2^(GMP_LIMB_BITS-1)-33)/66) &&  k<=2^(GMP_LIMB_BITS-4) then call vsmall\n         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ is always the smallest */\n#define NROOT_VSMALL_MIN (((((mp_limb_t)1)<<(GMP_LIMB_BITS - 1)) - 33)/66)\n\n/* shiftrights requires an extra gmp_numb_bits */\n#define shiftright(x,xn,c)\t\t\t\t\t\t\t\\\n   do { \\\n      (xn)=(xn)-(c)/GMP_NUMB_BITS;\t\t\t\t\t\t\t\\\n      if((c)%GMP_NUMB_BITS != 0)\t{ \\\n         mpn_rshift((x),(x) + (c)/GMP_NUMB_BITS, (xn), (c)%GMP_NUMB_BITS);\t\t\\\n         if((x)[(xn) - 1] == 0) (xn)--; \\\n      }\t\t\t\t\t\t\\\n      else \\\n      { \\\n      if ((c)/GMP_NUMB_BITS != 0) \\\n         MPN_COPY_INCR((x),(x) + (c)/GMP_NUMB_BITS,(xn)); \\\n      } \\\n   } while(0)\n\n// shiftrights requires an extra gmp_numb_bits\n#define shiftrights(x, xn, y, yn, c)\t\t\t\t\t\\\n   do { \\\n      (xn) = (yn) - (c)/GMP_NUMB_BITS;\t\t\t\t\t\t\\\n      if ((c)%GMP_NUMB_BITS != 0) { \\\n         mpn_rshift((x), (y) + (c)/GMP_NUMB_BITS, (xn), (c)%GMP_NUMB_BITS);\t\\\n         if((x)[(xn) - 1] == 0) (xn)--; \\\n      }\t\t\t\t\t\\\n      else  \\\n      { \\\n         MPN_COPY_INCR((x), (y) + (c)/GMP_NUMB_BITS, (xn)); \\\n      }\t\t\t\\\n   } while(0)\n\n#define shiftleft(x, xn, c)\t\t\t\t\t\t\\\n   do { \\\n      mp_limb_t __t;\t\t\t\t\t\t\t\\\n      if ((c)%GMP_NUMB_BITS != 0) { \\\n         __t = mpn_lshift((x) + (c)/GMP_NUMB_BITS, (x), (xn), (c)%GMP_NUMB_BITS);\t\\\n         (xn) = (xn) + (c)/GMP_NUMB_BITS;\t\t\t\t\t\\\n         if (__t != 0) \\\n            (x)[(xn)] = __t;(xn)++; \\\n      } \\\n      else\t\t\t\t\t\t\t\t\t\\\n      { \n         if ((c)/GMP_NUMB_BITS != 0) {\n            MPN_COPY_DECR((x) + (c)/GMP_NUMB_BITS, (x), (xn));\t\t\t\\\n            (xn) = (xn) + (c)/GMP_NUMB_BITS; \\\n         } \\\n      }\t\t\t\t\t\\\n      if ((c)/GMP_NUMB_BITS != 0) \\\n         MPN_ZERO((x), (c)/GMP_NUMB_BITS);\t\t\\\n   } while(0)\n\n#define shiftlefts(x, xn, y, yn, c)\t\t\t\t\t\t\\\n   do { \\\n      mp_limb_t __t;\t\t\t\t\t\t\t\\\n      if ((c)%GMP_NUMB_BITS != 0) { \\\n         __t = mpn_lshift((x) + (c)/GMP_NUMB_BITS, (y), (yn), (c)%GMP_NUMB_BITS);\t\\\n         (xn) = (yn) + (c)/GMP_NUMB_BITS;\t\t\t\t\t\\\n         if (__t != 0) \\\n           (x)[(xn)]=__t;(xn)++; \\\n      }\t\t\t\t\\\n      else \\\n      {     \\\n         MPN_COPY_DECR((x) + (c)/GMP_NUMB_BITS, (y), (yn));\t\t\t\\\n        (xn) = (yn) + (c)/GMP_NUMB_BITS; \\\n      }\t\t\t\t\t\\\n      if ((c)/GMP_NUMB_BITS != 0) \\\n         MPN_ZERO((x), (c)/GMP_NUMB_BITS);\t\t\\\n   } while(0)\n\n#define mul_ui(x, xn, k) \\\n   do { \\\n      mp_limb_t __t = mpn_mul_1((x), (x), (xn), (k)); \\\n      if (__t != 0) { \\\n         (x)[(xn)] = __t; \\\n         (xn)++; \\\n      } \\\n   }while(0)\n\n/* tdiv_q_ui requires an extra gmp_numb_bits */\n#define tdiv_q_ui(x, xn, k) \\\n   do { \\\n      mpn_divrem_1((x), 0, (x), (xn), (k)); \\\n      if ((x)[(xn)-1] == 0) \\\n      (xn)--; \\\n   } while(0)\n\n/* bigmultrunc requires an extra gmp_numb_bits */\n#define bigmultrunc(xv, xn, xp, yv, yn, yp, B)\t\t\\\n   do { \\\n      signed long __f; \\\n      mp_limb_t __t;\t\t\t\\\n      (xp) += (yp);\t\t\t\t\t\t\\\n      if ((xn) >= (yn))  \\\n         __t = mpn_mul(t1, (xv), (xn), (yv), (yn));\t\\\n      else \\\n         __t = mpn_mul(t1, (yv), (yn), (xv), (xn));\t\t\\\n      t1n = (xn) + (yn); \\\n      if (__t == 0) t1n--;\t\t\t\\\n      __f = sizetwo(t1,t1n); \\\n      __f = __f - (B);\t\t\t\\\n      if (__f > 0) { \\\n         shiftrights((xv), (xn), t1, t1n, __f); \\\n         (xp) += __f; \\\n      }\t\\\n      else \\\n      { \\\n         MPN_COPY_INCR((xv), t1, t1n); \\\n         (xn) = t1n; \\\n      }\t\t\\\n   } while(0)\n\n/* bigsqrtrunc requires an extra gmp_numb_bits */\n#define bigsqrtrunc(xv, xn, xp, B)\t\t\t\t\\\n   do { \\\n      signed long __f;\t\t\t\t\t\\\n      (xp) += (xp);\t\t\t\t\t\t\\\n      mpn_sqr(t1, (xv), (xn));\t\t\t\t\\\n      t1n = (xn)*2; \\\n      if (t1[t1n - 1] == 0) t1n--;\t\t\t\\\n      __f = sizetwo(t1,t1n); \\\n      __f = __f - (B);\t\t\t\\\n      if (__f > 0) { \\\n         shiftrights((xv), (xn), t1, t1n, __f); \\\n         (xp) += __f; \\\n      }\t\\\n      else \\\n      { \\\n         MPN_COPY_INCR((xv), t1, t1n); \\\n         (xn) = t1n; \\\n      }\t\t\\\n   } while(0)\n\n/* must have y > z value wise */\n#define subtract(x, xn, y, yn, z, zn)\t\t\t\\\n   do { \\\n      mpn_sub((x), (y), (yn), (z), (zn)); /* no carry */\t\\\n      (xn) = (yn); \\\n      while ((x)[(xn) - 1] == 0)(xn)--;\t\t\\\n   } while(0)\n\n/* returns ceil(lg(x)) where x != 0 */\nsigned long\nclg (unsigned long x)\n{\n   mp_limb_t t;\n\n   ASSERT (x != 0);\n\n#if BITS_PER_ULONG <= GMP_LIMB_BITS\n   if (x == 1)\n      return 0;\n   \n   count_leading_zeros (t, (mp_limb_t) (x - 1));\n   \n   return GMP_LIMB_BITS - t;\n#endif\n\n#if BITS_PER_ULONG > GMP_LIMB_BITS\n#error FIXME\n#endif\n}\n\n// returns sizeinbase(x,2) x!=0\nstatic inline signed long\nsizetwo (mp_srcptr x, mp_size_t xn)\n{\n  signed long r;\n  count_leading_zeros (r, x[xn - 1]);\n  return xn * GMP_NUMB_BITS + GMP_NAIL_BITS - r;\n}\n\n//          returns sizeinbase(x-1,2) and returns 0 if x=1\nstatic inline signed long\nsizetwom1 (mp_srcptr x, mp_size_t xn)\n{\n  signed long r, i;\n  mp_limb_t v;\n\n  ASSERT (xn > 1 || (xn == 1 && x[0] != 0));\n  if (xn == 1 && x[0] == 1)\n    return 0;\n  r = sizetwo (x, xn);\n  i = xn - 1;\n  v = x[i];\n  if ((v & (v - 1)) != 0)\n    return r;\n  for (i--; i >= 0; i--)\n    if (x[i] != 0)\n      return r;\n  return r - 1;\n}\n\n\n/* Algorithm B\n\n   Calculates Z such that Z*(1-2^(-b)) < Y^(-1/k) < Z*(1+2^(-b)) \n   ie a b bit approximation the reciprocal of the kth root of Y\n   where Z,Y>0 are real , 1<=b<=3+ceil(lg(k)) is an int , k>=1 is an int\n   \n   Z={z,zn}*2^zp\twhere zp is the return value , and zn is modified\n   Y={y,yn}*2^yp\twhere {y,yn}>=2 and leading limb of {y,yn} is not zero\n   {z,zn} requires space for GMP_LIMB_BITS+4 bits\n   {z,zn} and {y,yn} must be completly distinct\n*/\nstatic signed long\nnroot_small (mp_ptr z, mp_size_t * zn, mp_srcptr y, mp_size_t yn,\n\t     signed long yp, mp_limb_t b, mp_limb_t k)\n{\n  signed long zp, f, j, g, B, t2p, t3p, t4p;\n  int ret;\n  mp_limb_t mask;\n  mp_size_t t1n, t2n, t3n, t4n;\n  mp_limb_t t1[BITS_TO_LIMBS (2 * (GMP_LIMB_BITS + 8) + GMP_NUMB_BITS)];\n  mp_limb_t t2[BITS_TO_LIMBS (GMP_LIMB_BITS + 8 + GMP_NUMB_BITS)];\n  mp_limb_t t3[BITS_TO_LIMBS (GMP_LIMB_BITS + 8 + 2 + GMP_NUMB_BITS)];\n  mp_limb_t t4[BITS_TO_LIMBS (GMP_LIMB_BITS + 8 + GMP_NUMB_BITS)];\n\n  ASSERT (k != 0);\n  ASSERT (b >= 1);\n  ASSERT (b <= (mp_limb_t) (clg (k) + 3));\t// bit counts are maximums , ie can have less\n  ASSERT (yn > 1 || (yn == 1 && y[0] >= 2));\n  ASSERT (y[yn - 1] != 0);\n  g = sizetwom1 (y, yn);\n  g = g + yp;\n  g = -g;\n  if (g >= 0)\n    {\n      g = g / k;\n    }\n  else\n    {\n      g = -((k - 1 - g) / k);\n    }\n  B = 66 * (2 * k + 1);\n  B = clg (B);\n  ASSERT (B <= GMP_LIMB_BITS + 8);\n  ASSERT (b + 1 <= GMP_LIMB_BITS + 4);\n  f = sizetwo (y, yn);\n  if (f > B)\n    {\n      shiftrights (t4, t4n, y, yn, f - B);\n      t4p = yp + f - B;\n    }\n  else\n    {\n      MPN_COPY_INCR (t4, y, yn);\n      t4n = yn;\n      t4p = yp;\n    }\t\t\t\t// t4 has B bits+numb space\n  *zn = 1;\n  z[0] = 3;\n  zp = g - 1;\t\t\t// z has 2 bits\n  for (j = 1; (unsigned long) j < b; j++)\n    {\n      f = sizetwo (z, *zn);\n      if (f > B)\n\t{\n\t  shiftrights (t2, t2n, z, *zn, f - B);\n\t  t2p = zp + f - B;\n\t}\n      else\n\t{\n\t  MPN_COPY_INCR (t2, z, *zn);\n\t  t2n = *zn;\n\t  t2p = zp;\n\t}\t\t\t// t2 has B bits+numb space\n      if (k != 1)\n\t{\n\t  MPN_COPY_INCR (t3, t2, t2n);\n\t  t3n = t2n;\n\t  t3p = t2p;\t\t// t3 has B bits\n\t  mask = (((mp_limb_t) 1) << (GMP_LIMB_BITS - 1));\n\t  while ((mask & k) == 0)\n\t    mask >>= 1;\n\t  mask >>= 1;\n\t  for (; mask != 0; mask >>= 1)\n\t    {\n\t      bigsqrtrunc (t2, t2n, t2p, B);\t// t2 has B bits+numb space , t1 has 2*B bits+numb space\n\t      if ((k & mask) != 0)\n\t\t{\n\t\t  bigmultrunc (t2, t2n, t2p, t3, t3n, t3p, B);\n\t\t}\n\t    }\n\t}\t\t\t// t2 has B bits+numb space , t1 has 2*B bits+numb space\n      bigmultrunc (t2, t2n, t2p, t4, t4n, t4p, B);\t// t2 has B bits+numb space , t1 has 2*B bits+numb space\n      ret = 0;\n      f = sizetwo (t2, t2n);\n      if (f - 1 <= 8 - (t2p + 10))\n\tret = 1;\n      if (f - 1 >= 10 - (t2p + 10))\n\tret = 0;\n      if (f - 1 == 9 - (t2p + 10))\n\t{\t\t\t// so 512 <= t2.2^(t2p+10) < 1024\n\t  if (t2p + 10 >= 0)\n\t    {\n\t      shiftlefts (t3, t3n, t2, t2n, t2p + 10);\n\t    }\t\t\t// t3 has 10 bits\n\t  else\n\t    {\n\t      shiftrights (t3, t3n, t2, t2n, -t2p - 10);\n\t    }\t\t\t// t3 has 10 bits+numb space\n\t  if (t3n == 1 && t3[0] <= 993)\n\t    ret = 1;\n\t}\n      if (ret != 0)\n\t{\n\t  shiftleft (z, *zn, zp - (g - j - 1));\t// z has j+2 bits\n\t  {\n\t    mp_limb_t __t;\n\t    __t = mpn_add_1 (z, z, *zn, 1);\n\t    if (__t != 0)\n\t      {\n\t\tz[*zn] = 1;\n\t\t(*zn)++;\n\t      }\n\t  }\n\t  zp = g - j - 1;\n\t  continue;\n\t}\n      f = sizetwom1 (t2, t2n);\n      if (f + t2p >= 1)\n\t{\n\t  shiftleft (z, *zn, zp - (g - j - 1));\n\t  mpn_sub_1 (z, z, *zn, 1);\n\t  zp = g - j - 1;\n\t}\n    }\t\t\t\t// z has j+2 bits\n  return zp;\n}\t\t\t\t// z has b+1 bits\n\n\n/* Algorithm B for k<=NROOT_VSMALL_MIN=(((((mp_limb_t)1)<<(GMP_LIMB_BITS-1))-33)/66)\n\n   Calculates Z such that Z*(1-2^(-b)) < Y^(-1/k) < Z*(1+2^(-b)) \n   ie a b bit approximation the reciprocal of the kth root of Y\n   where Z,Y>0 are real , 1<=b<=3+ceil(lg(k)) is an int , k>=1 is an int\n   \n   Z=z[0]*2^zp\t\twhere zp is the return value\n   Y={y,yn}*2^yp\twhere {y,yn}>=2 and leading limb of {y,yn} is not zero\n   {z,1} and {y,yn} must be completly distinct\n   \n   Note : the restriction on k allows calculations to be less than limb sized\n   assumes GMP_LIMB_BITS>=10\n   \n*/\nstatic signed long\nnroot_vsmall (mp_ptr z, mp_srcptr y, mp_size_t yn, signed long yp,\n\t      mp_limb_t b, mp_limb_t k)\n{\n  signed long f1, zp, f, j, g, B, t1p, t2p, t3p;\n  int ret;\n  mp_limb_t t1, t2, t3, qh, ql, mask;\n\n  ASSERT (k != 0);\n  ASSERT (b >= 1);\n  ASSERT (b <= (mp_limb_t) (clg (k) + 3));\n  ASSERT (yn > 1 || (yn == 1 && y[0] >= 2));\n  ASSERT (y[yn - 1] != 0);\n  ASSERT (GMP_LIMB_BITS >= 10);\n  ASSERT (k <= NROOT_VSMALL_MIN);\n\n  g = sizetwom1 (y, yn);\n  B = 66 * (2 * k + 1);\n  B = clg (B);\n\n  ASSERT (B <= GMP_LIMB_BITS);\n  ASSERT (b <= GMP_LIMB_BITS - 1);\n\n#if GMP_NAIL_BITS == 0\n  t3p = yp;\n  t3 = y[yn - 1];\n  count_leading_zeros (f1, t3);\n  f = yn * GMP_NUMB_BITS + GMP_NAIL_BITS - f1;\t//related to g(internally)\n  f1 = GMP_LIMB_BITS - f1;\n  if (f1 >= B)\n    {\n      t3 >>= f1 - B;\n      t3p += f - B;\n    }\n  else\n    {\n      if (yn != 1)\n\t{\n\t  t3 = (t3 << (B - f1)) | ((y[yn - 2]) >> (GMP_LIMB_BITS - B + f1));\n\t  t3p += f - B;\n\t}\n    }\n#endif\n\n#if GMP_NAIL_BITS != 0\n#if GMP_NUMB_BITS*2 < GMP_LIMB_BITS\n#error not supported\n#endif\n\n  f = sizetwo (y, yn);\n  if (f > B)\n    {\n      mp_limb_t t3t[2];\n      mp_size_t t3n;\n      t3p = yp + f - B;\n      shiftrights (t3t, t3n, y, yn, f - B);\n      t3 = t3t[0];\n    }\n  else\n    {\n      t3p = yp;\n      if (f <= GMP_NUMB_BITS)\n\t{\n\t  t3 = y[0];\n\t}\n      else\n\t{\n\t  t3 = (y[0] | (y[1] << (GMP_NUMB_BITS)));\n\t}\n    }\n#endif\n\n  g = g + yp;\n  g = -g;\n  if (g >= 0)\n    {\n      g = g / k;\n    }\n  else\n    {\n      g = -((k - 1 - g) / k);\n    }\n  z[0] = 3;\n  zp = g - 1;\n  for (j = 1; (unsigned long) j < b; j++)\n    {\n      count_leading_zeros (f, z[0]);\n      f = GMP_LIMB_BITS - f;\n      if (f > B)\n\t{\n\t  t1 = (z[0] >> (f - B));\n\t  t1p = zp + f - B;\n\t}\n      else\n\t{\n\t  t1 = z[0];\n\t  t1p = zp;\n\t}\n      if (k != 1)\n\t{\n\t  t2 = t1;\n\t  t2p = t1p;\n\t  mask = (((mp_limb_t) 1) << (GMP_LIMB_BITS - 1));\n\t  while ((mask & k) == 0)\n\t    mask >>= 1;\n\t  mask >>= 1;\n\t  for (; mask != 0; mask >>= 1)\n\t    {\n\t      umul_ppmm (qh, ql, t1, t1);\n\t      t1p += t1p;\n\t      if (qh == 0)\n\t\t{\t\t//count_leading_zeros(f,ql);f=GMP_LIMB_BITS-f;f=f-B;if(f>0){t1=(ql>>f);t1p+=f;}else{t1=ql;}\n\t\t  t1 = ql;\t// be lazy\n\t\t}\n\t      else\n\t\t{\n\t\t  count_leading_zeros (f, qh);\n\t\t  f = 2 * GMP_LIMB_BITS - f;\n\t\t  f = f - B;\n\t\t  t1p += f;\t//only need these cases when B>=16\n\t\t  if (f < GMP_LIMB_BITS)\n\t\t    {\n\t\t      t1 = (ql >> f);\n\t\t      t1 |= (qh << (GMP_LIMB_BITS - f));\n\t\t    }\n\t\t  else\n\t\t    {\n\t\t      t1 = (qh >> (f - GMP_LIMB_BITS));\n\t\t    }\n\t\t}\n\t      if ((k & mask) != 0)\n\t\t{\n\t\t  umul_ppmm (qh, ql, t1, t2);\n\t\t  t1p += t2p;\n\t\t  if (qh == 0)\n\t\t    {\t\t//count_leading_zeros(f,ql);f=GMP_LIMB_BITS-f;f=f-B;if(f>0){t1=(ql>>f);t1p+=f;}else{t1=ql;}\n\t\t      t1 = ql;\t// be lazy\n\t\t    }\n\t\t  else\n\t\t    {\n\t\t      count_leading_zeros (f, qh);\n\t\t      f = 2 * GMP_LIMB_BITS - f;\n\t\t      f = f - B;\n\t\t      t1p += f;\n\t\t      if (f < GMP_LIMB_BITS)\n\t\t\t{\n\t\t\t  t1 = (ql >> f);\n\t\t\t  t1 |= (qh << (GMP_LIMB_BITS - f));\n\t\t\t}\n\t\t      else\n\t\t\t{\n\t\t\t  t1 = (qh >> (f - GMP_LIMB_BITS));\n\t\t\t}\n\t\t    }\n\t\t}\n\t    }\n\t}\n      umul_ppmm (qh, ql, t1, t3);\n      t1p += t3p;\n      if (qh == 0)\n\t{\n\t  count_leading_zeros (f, ql);\n\t  f = GMP_LIMB_BITS - f;\n\t  f = f - B;\n\t  if (f > 0)\n\t    {\n\t      t1 = (ql >> f);\n\t      t1p += f;\n\t    }\n\t  else\n\t    {\n\t      t1 = ql;\n\t    }\n\t  // dont be lazy here as it could screw up the compairison below\n\t}\n      else\n\t{\n\t  count_leading_zeros (f, qh);\n\t  f = 2 * GMP_LIMB_BITS - f;\n\t  f = f - B;\n\t  t1p += f;\n\t  if (f < GMP_LIMB_BITS)\n\t    {\n\t      t1 = (ql >> f);\n\t      t1 |= (qh << (GMP_LIMB_BITS - f));\n\t    }\n\t  else\n\t    {\n\t      t1 = (qh >> (f - GMP_LIMB_BITS));\n\t    }\n\t}\n      ret = 0;\n      ASSERT (t1 != 0);\n      count_leading_zeros (f, t1);\n      f = GMP_LIMB_BITS - f;\n      if (f - 1 <= 8 - (t1p + 10))\n\tret = 1;\n      if (f - 1 >= 10 - (t1p + 10))\n\tret = 0;\n      if (f - 1 == 9 - (t1p + 10))\n\t{\t\t\t// so 512 <= t1.2^(t1p+10) < 1024\n\t  if (t1p + 10 >= 0)\n\t    {\n\t      t2 = (t1 << (t1p + 10));\n\t    }\n\t  else\n\t    {\n\t      t2 = (t1 >> (-t1p - 10));\n\t    }\n\t  if (t2 <= 993)\n\t    ret = 1;\n\t}\n      if (ret != 0)\n\t{\n\t  z[0] = (z[0] << (zp - (g - j - 1)));\n\t  z[0]++;\n\t  zp = g - j - 1;\n\t  continue;\n\t}\n      if (t1 == 1)\n\t{\n\t  f = 0;\n\t}\n      else\n\t{\n\t  count_leading_zeros (f, t1 - 1);\n\t  f = GMP_LIMB_BITS - f;\n\t}\n      if (f + t1p >= 1)\n\t{\n\t  z[0] = (z[0] << (zp - (g - j - 1)));\n\t  z[0]--;\n\t  zp = g - j - 1;\n\t}\n    }\n  return zp;\n}\n\n/* Algorithm N\n\n   Calculates Z such that Z*(1 - 2^(-b)) < Y^(-1/k) < Z*(1 + 2^(-b)) \n   ie a b bit approximation the reciprocal of the kth root of Y\n   where Z,Y > 0 are real , b >= 1 is an int, k >= 1 is an int\n   \n   Z={z, zn}*2^zp\twhere zp is the return value , and zn is modified\n   Y={y, yn}*2^yp\twhere {y, yn} >= 2 and leading limb of {y, yn} is not zero\n\n   z  satisfies 1 <= z < 2^(b + 7)\n   zp satisfies -lg(Y)/k - b - 7 - lg(3/2) < zp < -lg(Y)/k + 1\n   \n   {z, zn} and {y, yn} and temps t1, t2, t3 must be completely distinct\n   z  requires b + 6 + GMP_NUMB_BITS + max(1, clgk)\n   t1 requires max(2*b + 12 + GMP_NUMB_BITS, b + 6 + clg(k + 1))   \n   t2 requires b + 6 + GMP_NUMB_BITS\n   t3 requires b + 6 + GMP_NUMB_BITS\n*/\nstatic signed long\nnroot(mp_ptr z, mp_size_t * zn, mp_srcptr y, mp_size_t yn, signed long yp,\n       mp_limb_t b, mp_limb_t k, signed long clgk, mp_ptr t1, mp_ptr t2, mp_ptr t3)\n{ \n   mp_size_t t1n, t2n, t3n; \n   mp_limb_t mask, kpow2, k1pow2;\n   signed long t1p, zp, t2p, t3p, f, bd, bs[GMP_LIMB_BITS * 2], c;\t/* FIXME how many */\n  \n   ASSERT(k != 0);\n   ASSERT(yn > 1 || (yn == 1 && y[0] >= 2));\n   ASSERT (y[yn - 1] != 0);\n  \n   bs[0] = b;\t\t\t/* bit counts are maximums, i.e. can have less */\n  \n   for (c = 0; ; c++) / *bs[c] <= 3 + clgk */\n   { \n      if (bs[c] <= 3 + clgk)\n\t      break;\n      \n      bs[c + 1] = 1 + (bs[c] + clgk) / 2;   \n   }\t\t\n\n#if GMP_LIMB_BITS >= 10 && TESTSMALL == 0\n  if (k <= NROOT_VSMALL_MIN)\n  { \n     zp = nroot_vsmall(z, y, yn, yp, bs[c], k); \n     *zn = 1; \n  }\n  else\n  { \n     zp = nroot_small(z, (mp_limb_t *) zn, y, yn, yp, bs[c], k); \n  }\n#endif\n\n/* bs[1] = 1 + floor((b + clgk)/2) max bd = b + 6, z has bs[c]+1 bits */\n#if GMP_LIMB_BITS < 10 || TESTSMALL == 1 \n  zp = nroot_small (z, zn, y, yn, yp, bs[c], k);\n#endif \n  \n  kpow2 = 0;\n  k1pow2 = 0; /* shortcut for div, mul to a shift instead */\n  \n  if (POW2_P(k)) /* k=2^(kpow2 - 1) */\n  {\n     count_leading_zeros(kpow2, k);\n     kpow2 = GMP_LIMB_BITS - kpow2;\n  }\n\n  if (POW2_P(k + 1)) /* k + 1 = 2^(k1pow2-1) */\n  {\n     count_leading_zeros(k1pow2, k + 1);\n     k1pow2 = GMP_LIMB_BITS - k1pow2;\n  }\t\n  \n  for ( ; c != 0; c--)\n  {  bd = 2 * bs[c] + 4 - clgk;\n     f = sizetwo (z, *zn);\t/* is this trunc ever going to do something real? */\n     if (f > bd) \n     { \n        shiftright (z, *zn, f - bd); \n        zp = zp + f - bd;\n     }\t\t\t\n     /* z has bd bits + numb space */\n\n     MPN_COPY_INCR (t3, z, *zn); t3n = *zn;t3p = zp;\n      \n     mask = (((mp_limb_t) 1) << (GMP_LIMB_BITS - 1));\t/* t3 has bd bits */\n     while ((mask & (k + 1)) == 0) mask >>= 1;\n      \n     for (mask >>= 1; mask != 0; mask >>= 1)\n\t  { \n        /* t3 has bd bits + numb space, t1 has 2*bd bits + numb space */\n        bigsqrtrunc (t3, t3n, t3p, bd);\t\n\t  \n        if (((k + 1) & mask) != 0)\n        {\n           bigmultrunc (t3, t3n, t3p, z, *zn, zp, bd);\n        }\n     }\n     \n     /* t3 has bd bits + numb space t1 has 2*bd bits + numb space */\n      \n     if (k1pow2)\n     {\n        shiftleft(z, *zn, k1pow2 - 1);\n     } else\n     {\n        mul_ui(z, *zn, k + 1);\n     }\n     \n     /* z has bd+clg(k + 1) bits */\n      \n     f = sizetwo (y, yn);\n     if (f > bd) { \n        shiftrights (t2, t2n, y, yn, f - bd);\n        t2p = yp + f - bd;\n\n        /* t2 has bd bits + numb space */\n     }\t   \n     else /* this case may not happen if this is only called by mpn_root */\n     { \n        MPN_COPY_INCR (t2, y, yn);\n        t2n = yn;\n        t2p = yp;\n     }\t\n\n     bigmultrunc (t3, t3n, t3p, t2, t2n, t2p, bd);\t\n     \n     /* t3 has bd bits + numb space t1 has 2*bd bits + numb space */\n      \n     if (zp <= t3p) /* which branch depends on yp ? and only want top bd + clgk bits exactly */\n\t  { \n        shiftlefts(t1, t1n, t3, t3n, t3p - zp);\t/* t1 has bd + clg(k + 1) bits */\n\t     subtract(t1, t1n, z, *zn, t1, t1n);\n\t     t1p = zp;\n        \n        /* t1 has bd + clg(k + 1) bits */\n     } else\n\t  { \n        ASSERT(zp - t3p + sizetwo (z, *zn) <= 2 * b + 12 + GMP_NUMB_BITS); //* not allocated enough mem */\n\t  \n        shiftlefts(t1, t1n, z, *zn, zp - t3p);\t/* t1 has 2*b+12+numb */\n\t     subtract(t1, t1n, t1, t1n, t3, t3n);\n\t     t1p = t3p;\n     }\t\t\t\n     \n     /* t1 has 2*b + 12 + numb */\n      \n     f = sizetwo (t1, t1n);\n     if (f >= bd + clgk)\n        shiftrights(z, *zn, t1, t1n, f - bd - clgk);\n     else\n        shiftlefts(z, *zn, t1, t1n, bd + clgk - f); \n       \n     /* z has bd + clgk bits + numb space */\n\n     zp = t1p + f - bd - clgk;\n     if (kpow2)\n        shiftright(z, *zn, kpow2 - 1);\n     else\n        tdiv_q_ui (z, *zn, k);\n    }\t\n  \n     /* z has bd + 1 bits + numb space (maybe prove just bd bits ?) */\n  return zp;\n}\n\n/* same as Algorithm N but for k = 1\n\n   Calculates Z such that Z*(1 - 2^(-b)) < Y^(-1/k) < Z*(1 + 2^(-b)) \n   ie a b bit approximation the reciprocal of the kth root of Y\n   where Z,Y > 0 are real, b >= 1 is an int, k >= 1 is an int\n   \n   Z={z, zn}*2^zp\twhere zp is the return value, and zn is modified\n   Y={y, yn}*2^yp\twhere {y, yn} >= 2 and leading limb of {y, yn} is not zero\n\n   and z  satisfies 2^b <= z <= 2^(b + 1)\n   and zp satisfies zp = -sizetwo(y, yn) - b - yp\n\n   {z, zn} and {y, yn} and temps t1, t2 must be completely distinct\n   z  requires 2 + floor(((sizetwo(y, yn) + b + 1)/GMP_NUMB_BITS) - yn limbs\n   t1 requires 1 + floor((sizetwo(y, yn) + b + 1)/GMP_NUMB_BITS) limbs\n   t2 requires yn limbs   \n*/\nstatic signed long\nfinv_fast (mp_ptr z, int *zn, mp_srcptr y, mp_size_t yn, signed long yp,\n\t   unsigned long b, mp_ptr t1, mp_ptr t2)\n{\n  signed long c;\n  signed long zp;\n  mp_size_t t1n;\n\n  c = sizetwo (y, yn) + b;\n  MPN_COPY_INCR (t1, y, yn);\n  t1n = yn;\t\t\t/* t1 has yn limbs */\n  \n  MPN_ZERO(t1 + t1n, (c + 1) / GMP_NUMB_BITS + 1 - t1n);\t \n  /* t1 has 1 + floor((c+1)/numb) limbs */\n  \n  t1[(c + 1) / GMP_NUMB_BITS] = (((mp_limb_t) 1) << ((c + 1) % GMP_NUMB_BITS));\t\n  /*  t1 has 1+floor((c+1)/numb) limbs */\n  \n  t1n = (c + 1) / GMP_NUMB_BITS + 1;\n  \n  ASSERT (y[yn - 1] != 0);\n  \n  mpn_tdiv_qr (z, t2, 0, t1, t1n, y, yn);\t/*bdivmod could be faster */\n  /* z has 2 + floor((c + 1)/numb) - yn, t2 has yn limbs */\n\n  *zn = t1n - yn + 1;\n  \n  while (*zn != 0 && z[*zn - 1] == 0)\n    (*zn)--;\n  \n  shiftright (z, *zn, 1);\n  zp = -c - yp;\n  \n  return zp;\n}\n\n\n/* calculates X and R such that X^k <= Y and (X + 1)^k > Y\n   where X = {x, xn}, Y = {y, yn}, R = {r, rn}, only calculates R if r != NULL\n   \n   R satisfies R < (X+1)^k-X^k\n   X satisfies X^k <= Y\n   \n   X needs ceil(yn/k) limb space\n   R needs yn limb space if r != 0\n   return sizeof remainder if r != 0\n*/\nmp_size_t mpn_rootrem(mp_ptr xp, mp_ptr r, mp_srcptr y,mp_size_t yn, mp_limb_t k)\n{\n  unsigned long b, clgk;\n  signed long d, tp, zp;\n  mpz_t t4, t3;\n  mp_ptr x, t1, t2;\n  mp_size_t t2n, xn, rn;\n  mp_limb_t val;\n  mp_size_t pos, bit;\n  \n  if (BELOW_THRESHOLD(yn, ROOTREM_THRESHOLD))\n     return mpn_rootrem_basecase(xp, r, y, yn, k);\n\n  d = 8;\t\t\t/* any d >= 1 will do, for testing to its limits use d = 1 TUNEME */\n  b = sizetwo (y, yn);\n  b = (b + k - 1) / k + 2 + d;\n  clgk = clg(k);\n\n  x = __GMP_ALLOCATE_FUNC_LIMBS(BITS_TO_LIMBS(b + 7 + GMP_NUMB_BITS));\n  t1 = __GMP_ALLOCATE_FUNC_LIMBS(BITS_TO_LIMBS (2 * b + 12 + GMP_NUMB_BITS));\n  t2 = __GMP_ALLOCATE_FUNC_LIMBS(BITS_TO_LIMBS (b + 6 + clgk + 1 + GMP_NUMB_BITS));\n  \n  mpz_init2(t3, b + 6 + GMP_NUMB_BITS * 2);\n  mpz_init2(t4, b + 6 + GMP_NUMB_BITS);\n  \n  zp = nroot(t2, &t2n, y, yn, 0, b, k, clgk, t1, PTR (t3), PTR (t4));\n\n  /*  1 <= t2 < 2^(b+7), -lg(Y)/k - b - 7 - lg(3/2) < zp < -lg(Y)/k + 1  where Y = {y,yn} */\n  tp = finv_fast (PTR (t3), &SIZ (t3), t2, t2n, zp, b, t1, PTR (t4)); /* t3 is our approx root */\n\n  /*  2^b <= t3 <= 2^(b+1)    tp=-sizetwo(t2,t2n)-b-zp  */\n  ASSERT (tp <= -d - 1);\n  \n  pos = (-tp - d - 1 + 1) / GMP_NUMB_BITS;\n  bit = (-tp - d - 1 + 1) % GMP_NUMB_BITS;\n  val = (((mp_limb_t) 1) << bit);\n  mpn_sub_1 (PTR (t3) + pos, PTR (t3) + pos, SIZ (t3) - pos, val);\n\n  if (PTR (t3)[SIZ (t3) - 1] == 0)\n    SIZ (t3)--;\n\n  shiftrights (PTR (t4), SIZ (t4), PTR (t3), SIZ (t3), -tp);\n  \n  if (mpn_add_1 (PTR (t3) + pos, PTR (t3) + pos, SIZ (t3) - pos, val))\n  {\n     PTR (t3)[SIZ (t3)] = 1;\n     SIZ (t3)++;\n  }\n  \n  pos = (-tp - d - 1) / GMP_NUMB_BITS;\n  bit = (-tp - d - 1) % GMP_NUMB_BITS;\n  val = (((mp_limb_t) 1) << bit);\n  \n  if (mpn_add_1 (PTR (t3) + pos, PTR (t3) + pos, SIZ (t3) - pos, val))\n  {\n     PTR (t3)[SIZ (t3)] = 1;\n     SIZ (t3)++;\n  }\n \n  shiftright (PTR (t3), SIZ (t3), -tp);\n\n  if (mpz_cmp (t4, t3) == 0)\n  {\n     xn = SIZ (t3);\n     MPN_COPY_INCR (x, PTR (t3), xn);\n     \n     if (r != 0)\n\t  {\n\t     mpz_pow_ui (t4, t3, k);\n\t     mpn_sub (r, y, yn, PTR (t4), SIZ (t4));\t/* no carry */\n\t     rn = yn;\n\t     while (rn != 0 && r[rn - 1] == 0) rn--;\n\t  }\n     \n     mpz_clear (t4);\n     mpz_clear (t3);\n     \n     MPN_COPY(xp, x, (yn + k - 1)/k);\n      \n     __GMP_FREE_FUNC_LIMBS(x, BITS_TO_LIMBS(b + 7 + GMP_NUMB_BITS));\n     __GMP_FREE_FUNC_LIMBS(t1, BITS_TO_LIMBS(2*b + 12 + GMP_NUMB_BITS));\n     __GMP_FREE_FUNC_LIMBS(t2, BITS_TO_LIMBS(b + 6 + clgk + 1 + GMP_NUMB_BITS));\n      \n     return rn;\n  }\n\n  mpz_pow_ui (t4, t3, k);\n\n  if (SIZ (t4) > yn || (SIZ (t4) == yn && mpn_cmp(PTR (t4), y, yn) > 0))\n  {\n     mpz_sub_ui (t3, t3, 1);\n     xn = SIZ (t3);\n     MPN_COPY_INCR(x, PTR (t3), xn);\n     if (r != 0)\n\t  {\n\t     mpz_pow_ui(t4, t3, k);\n\t     mpn_sub(r, y, yn, PTR (t4), SIZ (t4));\t/* no carry */\n\t     rn = yn;\n\t     while (rn != 0 && r[rn - 1] == 0) rn--;\n     }\n      \n     mpz_clear (t4);\n     mpz_clear (t3);\n      \n     MPN_COPY(xp,x,(yn+k-1)/k);\n      \n     __GMP_FREE_FUNC_LIMBS(x, BITS_TO_LIMBS(b + 7 + GMP_NUMB_BITS));\n     __GMP_FREE_FUNC_LIMBS(t1,BITS_TO_LIMBS(2*b+ 12 + GMP_NUMB_BITS));\n     __GMP_FREE_FUNC_LIMBS(t2,BITS_TO_LIMBS(b + 6 + clgk + 1 + GMP_NUMB_BITS));\n     \n     return rn;\n  }\n  \n  xn = SIZ(t3);\n  MPN_COPY_INCR(x, PTR (t3), xn);\n  \n  if (r != 0)\n  {\n     mpn_sub(r, y, yn, PTR (t4), SIZ (t4));\t/* no carry */\n     rn = yn;\n     \n     while(rn != 0 && r[rn - 1] == 0) rn--;\n  }\n  \n  mpz_clear(t4);\n  mpz_clear(t3);\n  \n  MPN_COPY(xp, x, (yn + k - 1)/k);\n  \n  __GMP_FREE_FUNC_LIMBS(x, BITS_TO_LIMBS(b + 7 + GMP_NUMB_BITS));\n  __GMP_FREE_FUNC_LIMBS(t1, BITS_TO_LIMBS(2*b + 12 + GMP_NUMB_BITS));\n  __GMP_FREE_FUNC_LIMBS(t2, BITS_TO_LIMBS(b + 6 + clgk + 1 + GMP_NUMB_BITS));\n  \n  return rn;\n}\n\n#endif\n"
  },
  {
    "path": "mpn/generic/rrandom.c",
    "content": "/* mpn_rrandom -- Generate random numbers with relatively long strings\n   of ones and zeroes.  Suitable for border testing.\n\nCopyright 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004 Free Software\nFoundation, Inc.\n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nstatic void gmp_rrandomb(mp_ptr rp, gmp_randstate_t rstate, mpir_ui nbits);\n\n/* Ask _gmp_rand for 32 bits per call unless that's more than a limb can hold.\n   Thus, we get the same random number sequence in the common cases.\n   FIXME: We should always generate the same random number sequence! */\n#if GMP_NUMB_BITS < 32\n#define BITS_PER_RANDCALL GMP_NUMB_BITS\n#else\n#define BITS_PER_RANDCALL 32\n#endif\n\nvoid\nmpn_rrandom (mp_ptr rp, gmp_randstate_t rnd, mp_size_t n)\n{\n  int bit_pos;\t\t\t/* bit number of least significant bit where\n\t\t\t\t            next bit field to be inserted */\n  mp_limb_t ran, ranm; /* buffer for random bits */\n\n  /* FIXME: Is n == 0 supposed to be allowed? */\n  ASSERT (n >= 0);\n\n  _gmp_rand (&ranm, rnd, BITS_PER_RANDCALL);\n  ran = ranm;\n\n  /* Start off at a random bit position in the most significant limb. */\n  bit_pos = ran % GMP_NUMB_BITS;\n\n  gmp_rrandomb (rp, rnd, n * GMP_NUMB_BITS - bit_pos);\n}\n\nstatic void\ngmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, mpir_ui nbits)\n{\n  mpir_ui bi;\n  mp_limb_t ranm; /* buffer for random bits */\n  unsigned cap_chunksize, chunksize;\n  mp_size_t i;\n\n  /* Set entire result to 111..1  */\n  i = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS - 1;\n  rp[i] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - (nbits % GMP_NUMB_BITS)) % GMP_NUMB_BITS;\n  for (i = i - 1; i >= 0; i--)\n    rp[i] = GMP_NUMB_MAX;\n\n  _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);\n  cap_chunksize = nbits / (ranm % 4 + 1);\n  cap_chunksize += cap_chunksize == 0; /* make it at least 1 */\n\n  bi = nbits;\n\n  for (;;)\n    {\n      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);\n      chunksize = 1 + ranm % cap_chunksize;\n      bi = (bi < chunksize) ? 0 : bi - chunksize;\n\n      if (bi == 0)\n\tbreak;\t\t\t/* low chunk is ...1 */\n\n      rp[bi / GMP_NUMB_BITS] ^= CNST_LIMB (1) << bi % GMP_NUMB_BITS;\n\n      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);\n      chunksize = 1 + ranm % cap_chunksize;\n      bi = (bi < chunksize) ? 0 : bi - chunksize;\n\n      mpn_incr_u (rp + bi / GMP_NUMB_BITS, CNST_LIMB (1) << bi % GMP_NUMB_BITS);\n\n      if (bi == 0)\n\tbreak; /* low chunk is ...0 */\n    }\n}\n"
  },
  {
    "path": "mpn/generic/rsh_divrem_hensel_qr_1.c",
    "content": "/*\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_limb_t mpn_rsh_divrem_hensel_qr_1(mp_ptr qp, mp_srcptr xp, \n                               mp_size_t n, mp_limb_t d, int s, mp_limb_t cin)\n{\n   ASSERT(n > 0);\n   ASSERT(s >= 0);\n   ASSERT_MPN(xp, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));\n   ASSERT(d%2 == 1);\n\n   if (BELOW_THRESHOLD(n, RSH_DIVREM_HENSEL_QR_1_THRESHOLD))\n      return mpn_rsh_divrem_hensel_qr_1_1(qp, xp, n, d, s, cin);\n\n   return mpn_rsh_divrem_hensel_qr_1_2(qp, xp, n, d, s, cin);\n}\n"
  },
  {
    "path": "mpn/generic/rsh_divrem_hensel_qr_1_1.c",
    "content": "/*\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_limb_t mpn_rsh_divrem_hensel_qr_1_1(mp_ptr qp, mp_srcptr xp, \n                                mp_size_t n, mp_limb_t d, int s, mp_limb_t cin)\n{\n   mp_size_t j;\n   mp_limb_t c, h, q, dummy, h1, t, m, qo, qb;\n\n   ASSERT(n > 0);\n   ASSERT(d%2 == 1);\n   ASSERT_MPN(xp, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));\n   ASSERT(s >= 0);\n\n   modlimb_invert(m, d); /*should we allow s = 0 ?? */\n   h1 = xp[0];\n   c = 0;\n   h = cin;\n   t = h + c;\n   \n   if (t > h1)\n   {\n      h1 = h1 - t;\n      c = 1;\n   } \n   else\n   {\n      h1 = h1 - t;\n      c = 0;\n   }\n\n   q = h1*m;\n   qo = q>>s;\n   umul_ppmm(h, dummy, q, d);\n\n   for (j = 1; j <= n - 1; j++)\n   {\n      h1 = xp[j];\n      t = h + c;\n      if (t > h1)\n      {\n         h1 = h1 - t;\n         c = 1;\n      }\n      else\n      {\n         h1 = h1 - t;\n         c = 0;\n      }\n    \n      q = h1*m;\n      qo = qo | (q<<(GMP_LIMB_BITS - 1 - s)<<1);\n      qp[j - 1] = qo;\n      qo = q>>s;\n      umul_ppmm(h, dummy, q, d);\n    \n      ASSERT(dummy == h1);\n   }\n\n   qp[n - 1] = qo;\n   return h + c;\n}   \n"
  },
  {
    "path": "mpn/generic/rsh_divrem_hensel_qr_1_2.c",
    "content": "/*\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* \n   using a two limb inverse of a one limb divisor \n   (xp,n) = (qp,n)*d - ret*B^n and 0 <= ret < d\n*/\nmp_limb_t mpn_rsh_divrem_hensel_qr_1_2(mp_ptr qp, mp_srcptr xp, \n                                mp_size_t n, mp_limb_t d, int s, mp_limb_t cin)\n{\n   mp_size_t j;\n   mp_limb_t c, h, q, dummy, h1, t, ml, mh, xl, xh, ql, qh, qo;\n\n   ASSERT(n >= 2);\n   ASSERT_MPN(xp, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));\n   ASSERT(d%2 == 1);\n   ASSERT(s >= 0);\n  \n   modlimb_invert(ml, d);\n   umul_ppmm(h, dummy, d, ml);\n\n   ASSERT(dummy == 1);\n\n   h = -h;\n   mh = ml*h; /* (mh, ml) is our two limb inverse */\n   h1 = xp[0];\n   h = cin;\n   c = 0;\n   t = h + c;\n   \n   if (t > h1)\n   {\n      h1 = h1 - t;\n      c = 1;\n   } else\n   {\n      h1 = h1 - t;\n      c = 0;\n   }\n\n   q = h1*ml;\n   qo = q>>s;\n   umul_ppmm(h, dummy, q, d);\n\n   for (j = 1; j + 1 <= n - 1; j += 2)\n   {\n      xl = xp[j];\n      xh = xp[j + 1];\n      t = h + c;\n      \n      if (xh == 0 && t > xl)\n         c = 1;\n      else\n         c = 0;\n\n      sub_ddmmss(xh, xl, xh, xl, 0, t);\n      umul_ppmm(qh, ql, xl, ml);\n      qh = qh + xh*ml + xl*mh;\n\n      qo = qo|(ql<<(GMP_LIMB_BITS - 1 - s)<<1);\n      qp[j - 1] = qo;\n      qo = ql>>s;\n    \n      qo = qo | (qh<<(GMP_LIMB_BITS - 1 - s)<<1);\n      qp[j + 1 - 1] = qo;\n      qo = qh>>s;\n    \n      umul_ppmm(h, h1, qh, d);\n    \n      if (h1 > xh)\n         h++;\n   }\n\n   if (j <= n-1)\n   {\n      h1 = xp[j];\n      t = h + c;\n      \n      if (t > h1)\n      {\n         h1 = h1 - t;\n         c = 1;\n      }\n      else\n      {\n         h1 = h1 - t;\n         c = 0;\n      }\n    \n      q = h1*ml;\n      qo = qo | (q<<(GMP_LIMB_BITS - 1 - s)<<1);\n      qp[j - 1] = qo;\n      qo = q>>s;\n      umul_ppmm(h, dummy, q, d);\n      \n      ASSERT(dummy == h1);\n   }\n\n   qp[n - 1] = qo;\n   \n   return h+c;\n}\n"
  },
  {
    "path": "mpn/generic/rshift.c",
    "content": "/* mpn_rshift -- Shift right low level.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* Shift U (pointed to by up and N limbs long) cnt bits to the right\n   and store the n least significant limbs of the result at rp.\n   The bits shifted out to the right are returned.\n\n   Argument constraints:\n   1. 0 < cnt < GMP_NUMB_BITS.\n   2. If the result is to be written over the input, rp must be <= up.\n*/\n\nmp_limb_t\nmpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)\n{\n  mp_limb_t high_limb, low_limb;\n  unsigned int tnc;\n  mp_size_t i;\n  mp_limb_t retval;\n\n  ASSERT (n >= 1);\n  ASSERT (cnt >= 1);\n  ASSERT (cnt < GMP_NUMB_BITS);\n  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));\n\n  tnc = GMP_NUMB_BITS - cnt;\n  high_limb = *up++;\n  retval = (high_limb << tnc) & GMP_NUMB_MASK;\n  low_limb = high_limb >> cnt;\n\n  for (i = n - 1; i != 0; i--)\n    {\n      high_limb = *up++;\n      *rp++ = low_limb | ((high_limb << tnc) & GMP_NUMB_MASK);\n      low_limb = high_limb >> cnt;\n    }\n  *rp = low_limb;\n\n  return retval;\n}\n"
  },
  {
    "path": "mpn/generic/sb_bdiv_q.c",
    "content": "/* mpn_sb_bdiv_q -- schoolbook Hensel division with precomputed inverse,\n   returning quotient only.\n\n   Contributed to the GNU project by Niels Möller.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.\n   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS\n   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2005, 2006, 2009 Free Software Foundation, Inc.\nCopyright 2009 David Havey\nCopyright 2010 William Hart\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Computes Q = N / D mod B^nn, with an overflow W. Destroys N.\n\n   D must be odd. dinv is D^-1 mod B.\n\n   Note Q will be nn limbs.\n\n   To compute Q we cancel one limb at a time, using\n\n     qp[i] = D^{-1} * np[i] (mod B)\n     N -= B^i * qp[i] * D\n\n   The overflow is defined as follows: \n   Let X = sum(dp[i]*qp[j]*B^(i+j), 0 <= i < dn, 0 <= i+j < nn).\n   Then X is nn + 2 limbs. The low nn limbs by definition agree with N. \n   The overflow is the remaining high two limbs.\n*/\n\nvoid\nmpn_sb_bdiv_q (mp_ptr qp, mp_ptr wp,\n\t\t  mp_ptr np, mp_size_t nn,\n\t\t  mp_srcptr dp, mp_size_t dn,\n\t\t  mp_limb_t dinv)\n{\n  mp_size_t i;\n  mp_limb_t cy, q, hi, w0, w1;\n\n  ASSERT (dn > 0);\n  ASSERT ((dp[0] & 1) != 0);\n  ASSERT (((dinv * dp[0]) & GMP_NUMB_MASK) == 1);\n  ASSERT (nn >= dn);\n\n  w0 = 0;\n\n  for (i = nn - dn; i > 0; i--)\n    {\n      q = dinv * np[0];\n      cy = mpn_submul_1 (np, dp, dn, q);\n      w0 += mpn_sub_1 (np + dn, np + dn, i, cy);\n      ASSERT (np[0] == 0);\n      qp[0] = q;\n      qp++;\n      np++;\n    }\n\n  w1 = 0;\n\n  for (i = dn; i > 0; i--)\n    {\n      q = dinv * np[0];\n      hi = mpn_submul_1 (np, dp, i, q);\n      ADDC_LIMB(hi, w0, w0, hi);\n      w1 += hi;\n      ASSERT (np[0] == 0);\n      qp[0] = q;\n      qp++;\n      np++;\n    }\n\n  wp[0] = w0;\n  wp[1] = w1;\n}\n"
  },
  {
    "path": "mpn/generic/sb_bdiv_qr.c",
    "content": "/* mpn_sb_bdiv_qr -- schoolbook Hensel division with precomputed inverse,\n   returning quotient and remainder.\n\n   Contributed to the GNU project by Niels Möller.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.\n   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS\n   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2006, 2009 Free Software Foundation, Inc.\n\nCopyright 2010 William Hart (minor modifications)\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Computes a binary quotient of size qn = nn - dn.\n   Output:\n\n      Q = N * D^{-1} mod B^qn,\n\n      R = (N - Q * D) * B^(-qn)\n\n   Stores the dn least significant limbs of R at {np + nn - dn, dn},\n   and returns the borrow from the subtraction N - Q*D.\n\n   D must be odd. dinv is D^-1 mod B. */\n\nmp_limb_t\nmpn_sb_bdiv_qr (mp_ptr qp,\n\t\t   mp_ptr np, mp_size_t nn,\n\t\t   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)\n{\n  mp_size_t qn;\n  mp_size_t i;\n  mp_limb_t rh;\n  mp_limb_t ql;\n\n  ASSERT (dn > 0);\n  ASSERT (nn > dn);\n  ASSERT ((dp[0] & 1) != 0);\n  ASSERT (((dinv * dp[0]) & GMP_NUMB_MASK) == 1);\n\n  dinv = -dinv;\n\n  qn = nn - dn;\n\n  rh = 0;\n\n  /* To complete the negation, this value is added to q. */\n  ql = 1;\n  while (qn > dn)\n    {\n      for (i = 0; i < dn; i++)\n\t{\n\t  mp_limb_t q;\n\n\t  q = dinv * np[i];\n\t  qp[i] = ~q;\n\n\t  np[i] = mpn_addmul_1 (np + i, dp, dn, q);\n\t}\n      rh += mpn_add (np + dn, np + dn, qn, np, dn);\n      ql = mpn_add_1 (qp, qp, dn, ql);\n\n      qp += dn; qn -= dn;\n      np += dn; nn -= dn;\n    }\n\n  for (i = 0; i < qn; i++)\n    {\n      mp_limb_t q;\n\n      q = dinv * np[i];\n      qp[i] = ~q;\n\n      np[i] = mpn_addmul_1 (np + i, dp, dn, q);\n    }\n\n  rh += mpn_add_n (np + dn, np + dn, np, qn);\n  ql = mpn_add_1 (qp, qp, qn, ql);\n\n  if (UNLIKELY (ql > 0))\n    {\n      /* q == 0 */\n      ASSERT (rh == 0);\n      return 0;\n    }\n  else\n    {\n      mp_limb_t cy;\n\n      cy = mpn_sub_n (np + qn, np + qn, dp, dn);\n      ASSERT (cy >= rh);\n      return cy - rh;\n    }\n}\n"
  },
  {
    "path": "mpn/generic/sb_div_q.c",
    "content": "/* mpn_sb_div_q -- Schoolbook division using the Möller-Granlund 3/2\n   division algorithm.\n\n   Contributed to the GNU project by Torbjorn Granlund.\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2007, 2009 Free Software Foundation, Inc.\n\nCopyright 2010, 2013 William Hart\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_limb_t\nmpn_sb_div_q (mp_ptr qp,\n\t\t mp_ptr np, mp_size_t nn,\n\t\t mp_srcptr dp, mp_size_t dn,\n\t\t mp_limb_t dinv)\n{\n  mp_limb_t qh;\n  mp_size_t qn, i;\n  mp_limb_t n1, n0;\n  mp_limb_t d1, d0;\n  mp_limb_t cy, cy1;\n  mp_limb_t q;\n  mp_limb_t flag;\n\n  mp_size_t dn_orig = dn, qn_orig;\n  mp_srcptr dp_orig = dp;\n  mp_ptr np_orig = np;\n\n  ASSERT (dn > 2);\n  ASSERT (nn >= dn);\n  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);\n\n  np += nn;\n\n  qn = nn - dn;\n  if (qn + 1 < dn)\n    {\n      dp += dn - (qn + 1);\n      dn = qn + 1;\n    }\n\n  qh = mpn_cmp (np - dn, dp, dn) >= 0;\n  if (qh != 0)\n    mpn_sub_n (np - dn, np - dn, dp, dn);\n\n  qp += qn;\n\n  dn -= 2;\t\t\t/* offset dn by 2 for main division loops,\n\t\t\t\t   saving two iterations in mpn_submul_1.  */\n  d1 = dp[dn + 1];\n  d0 = dp[dn + 0];\n\n  np -= 2;\n\n  n1 = np[1];\n\n  for (i = qn - (dn + 2); i >= 0; i--)\n    {\n      np--;\n      if (UNLIKELY (n1 == d1) && np[1] == d0)\n\t{\n\t  q = GMP_NUMB_MASK;\n\t  mpn_submul_1 (np - dn, dp, dn + 2, q);\n\t  n1 = np[1];\t\t/* update n1, last loop's value will now be invalid */\n\t}\n      else\n\t{\n\t  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);\n\n\t  cy = mpn_submul_1 (np - dn, dp, dn, q);\n\n\t  cy1 = n0 < cy;\n\t  n0 = (n0 - cy) & GMP_NUMB_MASK;\n\t  cy = n1 < cy1;\n\t  n1 -= cy1;\n\t  np[0] = n0;\n\n\t  if (UNLIKELY (cy != 0))\n\t    {\n\t      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);\n\t      q--;\n\t    }\n\t}\n\n      *--qp = q;\n    }\n\n  flag = ~CNST_LIMB(0);\n\n  if (dn >= 0)\n    {\n      for (i = dn; i > 0; i--)\n\t{\n\t  np--;\n\t  if (UNLIKELY (n1 >= (d1 & flag)))\n\t    {\n\t      q = GMP_NUMB_MASK;\n\t      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);\n\n\t      if (UNLIKELY (n1 != cy))\n\t\t{\n\t\t  if (n1 < (cy & flag))\n\t\t    {\n\t\t      q--;\n\t\t      mpn_add_n (np - dn, np - dn, dp, dn + 2);\n\t\t    }\n\t\t  else\n\t\t    flag = 0;\n\t\t}\n\t      n1 = np[1];\n\t    }\n\t  else\n\t    {\n\t      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);\n\n\t      cy = mpn_submul_1 (np - dn, dp, dn, q);\n\n\t      cy1 = n0 < cy;\n\t      n0 = (n0 - cy) & GMP_NUMB_MASK;\n\t      cy = n1 < cy1;\n\t      n1 -= cy1;\n\t      np[0] = n0;\n\n\t      if (UNLIKELY (cy != 0))\n\t\t{\n\t\t  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);\n\t\t  q--;\n\t\t}\n\t    }\n\n\t  *--qp = q;\n\n\t  /* Truncate operands.  */\n\t  dn--;\n\t  dp++;\n\t}\n\n      np--;\n      if (UNLIKELY (n1 >= (d1 & flag)))\n\t{\n\t  q = GMP_NUMB_MASK;\n\t  cy = mpn_submul_1 (np, dp, 2, q);\n\n\t  if (UNLIKELY (n1 != cy))\n\t    {\n\t      if (n1 < (cy & flag))\n\t\t{\n\t\t  q--;\n\t\t  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);\n\t\t}\n\t      else\n\t\tflag = 0;\n\t    }\n\t  n1 = np[1];\n\t}\n      else\n\t{\n\t  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);\n\n\t  np[0] = n0;\n\t  np[1] = n1;\n\t}\n\n      *--qp = q;\n    }\n  ASSERT_ALWAYS (np[1] == n1);\n\n  np += 2;\n\n  dn = dn_orig;\n  if (UNLIKELY (n1 < (dn & flag)))\n    {\n      mp_limb_t q, x;\n\n      /* The quotient may be too large if the remainder is small.  Recompute\n\t for above ignored operand parts, until the remainder spills.\n\n\t FIXME: The quality of this code isn't the same as the code above.\n\t 1. We don't compute things in an optimal order, high-to-low, in order\n\t    to terminate as quickly as possible.\n\t 2. We mess with pointers and sizes, adding and subtracting and\n\t    adjusting to get things right.  It surely could be streamlined.\n\t 3. The only termination criteria are that we determine that the\n\t    quotient needs to be adjusted, or that we have recomputed\n\t    everything.  We should stop when the remainder is so large\n\t    that no additional subtracting could make it spill.\n\t 4. If nothing else, we should not do two loops of submul_1 over the\n\t    data, instead handle both the triangularization and chopping at\n\t    once.  */\n\n      x = n1;\n\n      if (dn > 2)\n\t{\n\t  /* Compensate for triangularization.  */\n\t  mp_limb_t y;\n\n\t  dp = dp_orig;\n\t  if (qn + 1 < dn)\n\t    {\n\t      dp += dn - (qn + 1);\n\t      dn = qn + 1;\n\t    }\n\n\t  y = np[-2];\n\n\t  for (i = dn - 3; i >= 0; i--)\n\t    {\n\t      q = qp[i];\n\t      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);\n\n\t      if (y < cy)\n\t\t{\n\t\t  if (x == 0)\n\t\t    {\n\t\t      cy = mpn_sub_1 (qp, qp, qn, 1);\n\t\t      ASSERT_ALWAYS (cy == 0);\n\t\t      return qh - cy;\n\t\t    }\n\t\t  x--;\n\t\t}\n\t      y -= cy;\n\t    }\n\t  np[-2] = y;\n\t}\n\n      dn = dn_orig;\n      if (qn + 1 < dn)\n\t{\n\t  /* Compensate for ignored dividend and divisor tails.  */\n\n\t  dp = dp_orig;\n\t  np = np_orig;\n\n\t  if (qh != 0)\n\t    {\n\t      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));\n\t      if (cy != 0)\n\t\t{\n\t\t  if (x == 0)\n\t\t    {\n\t\t      if (qn != 0)\n\t\t\tcy = mpn_sub_1 (qp, qp, qn, 1);\n\t\t      return qh - cy;\n\t\t    }\n\t\t  x--;\n\t\t}\n\t    }\n\n\t  if (qn == 0)\n\t    return qh;\n\n\t  for (i = dn - qn - 2; i >= 0; i--)\n\t    {\n\t      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);\n\t      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);\n\t      if (cy != 0)\n\t\t{\n\t\t  if (x == 0)\n\t\t    {\n\t\t      cy = mpn_sub_1 (qp, qp, qn, 1);\n\t\t      return qh;\n\t\t    }\n\t\t  x--;\n\t\t}\n\t    }\n\t}\n    }\n\n  return qh;\n}\n"
  },
  {
    "path": "mpn/generic/sb_div_qr.c",
    "content": "/* mpn_sb_div_qr -- Schoolbook division using the Möller-Granlund 3/2\n   division algorithm.\n\n   Contributed to the GNU project by Torbjorn Granlund.\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2007, 2009 Free Software Foundation, Inc.\n\nCopyright 2010, 2013 William Hart\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_limb_t\nmpn_sb_div_qr (mp_ptr qp,\n\t\t  mp_ptr np, mp_size_t nn,\n\t\t  mp_srcptr dp, mp_size_t dn,\n\t\t  mp_limb_t dinv)\n{\n  mp_limb_t qh;\n  mp_size_t i;\n  mp_limb_t n1, n0;\n  mp_limb_t d1, d0;\n  mp_limb_t cy, cy1, cy2;\n  mp_limb_t q;\n\n  ASSERT (dn > 2);\n  ASSERT (nn >= dn);\n  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);\n\n  np += nn;\n\n  qh = mpn_cmp (np - dn, dp, dn) >= 0;\n  if (qh != 0)\n    mpn_sub_n (np - dn, np - dn, dp, dn);\n\n  d1 = dp[dn - 1];\n\n  qp += nn - dn;\n\n  dn -= 2;\t\t\t/* offset dn by 2 for main division loops,\n\t\t\t\t   saving two iterations in mpn_submul_1.  */\n  d0 = dp[dn];\n\n  np -= 2;\n\n  n1 = np[1];\n\n  for (i = nn - (dn + 2); i > 0; i--)\n    {\n      np--;\n      if (UNLIKELY (n1 == d1) && np[1] == d0)\n\t{\n\t  q = GMP_NUMB_MASK;\n\t  mpn_submul_1 (np - dn, dp, dn + 2, q);\n\t  n1 = np[1];\t\t/* update n1, last loop's value will now be invalid */\n\t}\n      else\n\t{\n\t  udiv_qr_3by2(q, n1, n0, n1, np[1], np[0], d1, d0, dinv);\n\n     cy2 = mpn_submul_1 (np - dn, dp, dn, q);\n\n\t  sub_333(cy, n1, n0, 0, n1, n0, 0, 0, cy2);\n\n\t  np[0] = n0;\n\n\t  if (UNLIKELY (cy != 0))\n\t    {\n\t      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);\n         q--;\n\t    }\n\t}\n\n      *--qp = q;\n    }\n  np[1] = n1;\n\n  return qh;\n}\n"
  },
  {
    "path": "mpn/generic/sb_divappr_q.c",
    "content": "/* mpn_sb_divappr_q -- Schoolbook division using the Möller-Granlund 3/2\n   division algorithm, returning approximate quotient.  The quotient returned\n   is either correct, or one too large.\n\n   Contributed to the GNU project by Torbjorn Granlund.\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.\n\nCopyright 2007, 2009 Free Software Foundation, Inc.\n\nCopyright 2010, 2013 William Hart\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nvoid __divappr_helper(mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t qn)\n{   \n   mpn_sub_n(np + 1, np + 1, dp, qn + 1);\n   add_ssaaaa(np[2], np[1], np[2], np[1], 0, dp[qn]);\n   \n   for (qn--; qn >= 0; qn--)\n   {\n      qp[qn] = ~CNST_LIMB(0);\n      mpn_add_1(np, np, 3, dp[qn]);\n   }\n}\n\n           \nmp_limb_t\nmpn_sb_divappr_q (mp_ptr qp,\n\t\t     mp_ptr np, mp_size_t nn,\n\t\t     mp_srcptr dp, mp_size_t dn,\n\t\t     mp_limb_t dinv)\n{\n  mp_limb_t qh;\n  mp_size_t qn, i;\n  mp_limb_t n1, n0;\n  mp_limb_t d1, d0, r1, r2;\n  mp_limb_t cy, cy1, cy2;\n  mp_limb_t q;\n  \n  ASSERT (dn > 2);\n  ASSERT (nn >= dn);\n  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);\n\n  np += nn;\n\n  qn = nn - dn;\n  if (qn + 1 < dn)\n    {\n      dp += dn - (qn + 1);\n      dn = qn + 1;\n    }\n\n  qh = mpn_cmp (np - dn, dp, dn) >= 0;\n  if (qh != 0)\n    mpn_sub_n (np - dn, np - dn, dp, dn);\n\n  d1 = dp[dn - 1];\n  d0 = dp[dn - 2];\n  \n  qn--, np--;\n  \n  if (qn > dn - 2)\n  {\n   cy = np[0];\n   n1 = np[-1];\n\n   /* Reduce until dn - 2 >= qn */\n   for ( ; qn > dn - 2; qn--)\n     {\n       np --;\n       \n       if (UNLIKELY(cy == d1 && n1 == d0))\n       {\n          q = ~CNST_LIMB(0);\n\n          /* np -= dp*q */\n          np[0] = n1;\n          cy2 = cy - mpn_submul_1(np - dn + 1, dp, dn, q);\n          cy = np[0];\n          n1 = np[-1];\n       }\n       else\n       {\n          udiv_qr_3by2(q, cy, n1, cy, n1, np[-1], d1, d0, dinv);\n          \n          /* np -= dp*q */\n          cy1 = mpn_submul_1(np - dn + 1, dp, dn - 2, q);\n          sub_333(cy2, cy, n1, 0, cy, n1, 0, 0, cy1);\n       }\n\n       /* correct if remainder is too large */\n       if (UNLIKELY(cy2 != 0))\n         {\n          q--;\n          cy1 = mpn_add_n(np - dn + 1, np - dn + 1, dp, dn - 2);\n          add_ssaaaa(cy, n1, cy, n1, d1, d0);\n          add_ssaaaa(cy, n1, cy, n1, 0, cy1);\n         }\n       \n       qp[qn] = q;\n     }\n   \n   np[0] = cy;\n   np[-1] = n1;\n  }\n\n   dp = dp + dn - qn - 2; /* make dp length qn + 1 */\n   np--;\n   \n   if (qn > 0)\n  {\n   cy = np[1];\n   n1 = np[0];\n\n   for ( ; qn > 0; qn--)\n     {\n       /* fetch next word */\n       np--;\n\n       /* rare case where truncation ruins normalisation */\n       if (UNLIKELY(cy >= d1))\n         {\n\n       np[1] = n1;\n       \n       if (cy > d1 || (cy == d1 && mpn_cmp(np - qn + 1, dp, qn + 1) >= 0))\n         {\n       __divappr_helper(qp, np - qn, dp, qn + 1);\n       return qh;\n         }\n\n       if (n1 >= d0)\n       {\n          q = ~CNST_LIMB(0);       \n\n          /* np -= dp*q */\n          cy2 = cy - mpn_submul_1(np - qn, dp, qn + 2, q);\n          cy = np[1];\n          n1 = np[0];\n       }\n       else\n       {\n          udiv_qr_3by2(q, cy, n1, cy, n1, np[0], d1, d0, dinv);\n          \n          /* np -= dp*q */\n          cy1 = mpn_submul_1(np - qn, dp, qn, q);\n          sub_333(cy2, cy, n1, 0, cy, n1, 0, 0, cy1);\n       }\n\n         } \n       else\n       {\n          udiv_qr_3by2(q, cy, n1, cy, n1, np[0], d1, d0, dinv);\n\n          /* np -= dp*q */\n          cy1 = mpn_submul_1(np - qn, dp, qn, q);\n          sub_333(cy2, cy, n1, 0, cy, n1, 0, 0, cy1);\n       }\n         \n       /* correct if quotient is too large */\n       if (UNLIKELY(cy2 != 0))\n         {\n       q--;\n       cy1 = mpn_add_n(np - qn, np - qn, dp, qn);\n       add_ssaaaa(cy, n1, cy, n1, d1, d0);\n       add_ssaaaa(cy, n1, cy, n1, 0, cy1);\n         }\n       \n       qp[qn] = q;\n       dp++;\n     }\n\n   np[1] = cy;\n   np[0] = n1;\n   }\n\n\n     {\n       /* fetch next word */\n       cy = np[1];\n \n       np--;\n\n       /* rare case where truncation ruins normalisation */\n       if (UNLIKELY(cy >= d1))\n         {\n       if (cy > d1 || (cy == d1 && np[1] >= dp[0]))\n         {\n       __divappr_helper(qp, np, dp, 1);\n       return qh;\n         }\n       if (np[1] >= d0)\n       {\n          q = ~CNST_LIMB(0);\n\n          /* np -= dp*q */\n          cy -= mpn_submul_1(np, dp, 2, q);\n       \n          /* correct if quotient is too large */\n          if (UNLIKELY(cy != 0))\n          {\n             q--;\n             np[2] = cy + mpn_add_n(np, np, dp, 2);\n          }\n       } else\n       {\n          udiv_qr_3by2(q, np[1], np[0], cy, np[1], np[0], d1, d0, dinv);\n\n          np[2] = 0;\n       }\n\n         }\n       else\n       {\n          udiv_qr_3by2(q, np[1], np[0], cy, np[1], np[0], d1, d0, dinv);\n\n          np[2] = 0;\n       }       \n       \n       qp[0] = q;\n     }\n\n  return qh;\n}\n"
  },
  {
    "path": "mpn/generic/scan0.c",
    "content": "/* mpn_scan0 -- Scan from a given bit position for the next clear bit.\n\nCopyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* Argument constraints:\n   1. U must sooner or later have a limb with a clear bit.\n */\n\nmp_bitcnt_t\nmpn_scan0 (mp_srcptr up, mp_bitcnt_t starting_bit)\n{\n  mp_size_t starting_word;\n  mp_limb_t alimb;\n  int cnt;\n  mp_srcptr p;\n\n  /* Start at the word implied by STARTING_BIT.  */\n  starting_word = starting_bit / GMP_NUMB_BITS;\n  p = up + starting_word;\n  alimb = *p++ ^ GMP_NUMB_MASK;\n\n  /* Mask off any bits before STARTING_BIT in the first limb.  */\n  alimb &= - (mp_limb_t) 1 << (starting_bit % GMP_NUMB_BITS);\n\n  while (alimb == 0)\n    alimb = *p++ ^ GMP_NUMB_MASK;\n\n  count_trailing_zeros (cnt, alimb);\n  return (p - up - 1) * GMP_NUMB_BITS + cnt;\n}\n"
  },
  {
    "path": "mpn/generic/scan1.c",
    "content": "/* mpn_scan1 -- Scan from a given bit position for the next set bit.\n\nCopyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* Argument constraints:\n   1. U must sooner or later have a limb != 0.\n */\n\nmp_bitcnt_t\nmpn_scan1 (mp_srcptr up, mp_bitcnt_t starting_bit)\n{\n  mp_size_t starting_word;\n  mp_limb_t alimb;\n  int cnt;\n  mp_srcptr p;\n\n  /* Start at the word implied by STARTING_BIT.  */\n  starting_word = starting_bit / GMP_NUMB_BITS;\n  p = up + starting_word;\n  alimb = *p++;\n\n  /* Mask off any bits before STARTING_BIT in the first limb.  */\n  alimb &= - (mp_limb_t) 1 << (starting_bit % GMP_NUMB_BITS);\n\n  while (alimb == 0)\n    alimb = *p++;\n\n  count_trailing_zeros (cnt, alimb);\n  return (p - up - 1) * GMP_NUMB_BITS + cnt;\n}\n"
  },
  {
    "path": "mpn/generic/set_str.c",
    "content": "/* mpn_set_str (mp_ptr res_ptr, const char *str, size_t str_len, int base) --\n   Convert a STR_LEN long base BASE byte string pointed to by STR to a limb\n   vector pointed to by RES_PTR.  Return the number of limbs in RES_PTR.\n\n   Contributed to the GNU project by Torbjorn Granlund.\n\n   THE FUNCTIONS IN THIS FILE, EXCEPT mpn_set_str, ARE INTERNAL WITH A MUTABLE\n   INTERFACE.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN\n   FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE\n   GNU MP RELEASE.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,\n2008 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n/* TODO:\n\n      Perhaps do not compute the highest power?\n      Instead, multiply twice by the 2nd highest power:\n\n\t       _______\n\t      |_______|  hp\n\t      |_______|  pow\n       _______________\n      |_______________|  final result\n\n\n\t       _______\n\t      |_______|  hp\n\t\t  |___|  pow[-1]\n\t   ___________\n\t  |___________|  intermediate result\n\t\t  |___|  pow[-1]\n       _______________\n      |_______________|  final result\n\n      Generalizing that idea, perhaps we should make powtab contain successive\n      cubes, not squares.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_size_t\nmpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)\n{\n  if (POW2_P (base))\n    {\n      /* The base is a power of 2.  Read the input string from least to most\n\t significant character/digit.  */\n\n      const unsigned char *s;\n      int next_bitpos;\n      mp_limb_t res_digit;\n      mp_size_t size;\n      int bits_per_indigit = mp_bases[base].big_base;\n\n      size = 0;\n      res_digit = 0;\n      next_bitpos = 0;\n\n      for (s = str + str_len - 1; s >= str; s--)\n\t{\n\t  int inp_digit = *s;\n\n\t  res_digit |= ((mp_limb_t) inp_digit << next_bitpos) & GMP_NUMB_MASK;\n\t  next_bitpos += bits_per_indigit;\n\t  if (next_bitpos >= GMP_NUMB_BITS)\n\t    {\n\t      rp[size++] = res_digit;\n\t      next_bitpos -= GMP_NUMB_BITS;\n\t      res_digit = inp_digit >> (bits_per_indigit - next_bitpos);\n\t    }\n\t}\n\n      if (res_digit != 0)\n\trp[size++] = res_digit;\n      return size;\n    }\n\n  if (BELOW_THRESHOLD (str_len, SET_STR_PRECOMPUTE_THRESHOLD))\n    return mpn_bc_set_str (rp, str, str_len, base);\n  else\n    {\n      mp_ptr powtab_mem, tp;\n      powers_t powtab[GMP_LIMB_BITS];\n      int chars_per_limb;\n      mp_size_t size;\n      mp_size_t un;\n      TMP_DECL;\n\n      TMP_MARK;\n\n      chars_per_limb = mp_bases[base].chars_per_limb;\n\n      un = str_len / chars_per_limb + 1;\n\n      /* Allocate one large block for the powers of big_base.  */\n      powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un));\n\n      mpn_set_str_compute_powtab (powtab, powtab_mem, un, base);\n\n      tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un));\n      size = mpn_dc_set_str (rp, str, str_len, powtab, tp);\n\n      TMP_FREE;\n      return size;\n    }\n}\n\nvoid\nmpn_set_str_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)\n{\n  mp_ptr powtab_mem_ptr;\n  long i, pi;\n  mp_size_t n;\n  mp_ptr p, t;\n  unsigned normalization_steps;\n  mp_limb_t big_base, big_base_inverted;\n  int chars_per_limb;\n  size_t digits_in_base;\n  mp_size_t shift;\n\n  powtab_mem_ptr = powtab_mem;\n\n  chars_per_limb = mp_bases[base].chars_per_limb;\n  big_base = mp_bases[base].big_base;\n  big_base_inverted = mp_bases[base].big_base_inverted;\n  count_leading_zeros (normalization_steps, big_base);\n\n  p = powtab_mem_ptr;\n  powtab_mem_ptr += 1;\n\n  digits_in_base = chars_per_limb;\n\n  p[0] = big_base;\n  n = 1;\n\n  count_leading_zeros (i, un - 1);\n  i = GMP_LIMB_BITS - 1 - i;\n\n  powtab[i].p = p;\n  powtab[i].n = n;\n  powtab[i].digits_in_base = digits_in_base;\n  powtab[i].base = base;\n  powtab[i].shift = 0;\n\n  shift = 0;\n  for (pi = i - 1; pi >= 0; pi--)\n    {\n      t = powtab_mem_ptr;\n      powtab_mem_ptr += 2 * n;\n\n      ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_set_str_powtab_alloc (un));\n\n      mpn_sqr (t, p, n);\n      n = 2 * n - 1; n += t[n] != 0;\n      digits_in_base *= 2;\n#if 1\n      if ((((un - 1) >> pi) & 2) == 0)\n\t{\n\t  mpn_divexact_1 (t, t, n, big_base);\n\t  n -= t[n - 1] == 0;\n\t  digits_in_base -= chars_per_limb;\n\t}\n#else\n      if (CLEVER_CONDITION_1 ())\n\t{\n\t  /* perform adjustment operation of previous */\n\t  cy = mpn_mul_1 (p, p, n, big_base);\n\t}\n      if (CLEVER_CONDITION_2 ())\n\t{\n\t  /* perform adjustment operation of new */\n\t  cy = mpn_mul_1 (t, t, n, big_base);\n\t}\n#endif\n      shift *= 2;\n      /* Strip low zero limbs, but be careful to keep the result divisible by\n\t big_base.  */\n      while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0)\n\t{\n\t  t++;\n\t  n--;\n\t  shift++;\n\t}\n      p = t;\n      powtab[pi].p = p;\n      powtab[pi].n = n;\n      powtab[pi].digits_in_base = digits_in_base;\n      powtab[pi].base = base;\n      powtab[pi].shift = shift;\n    }\n}\n\nmp_size_t\nmpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len,\n\t\tconst powers_t *powtab, mp_ptr tp)\n{\n  size_t len_lo, len_hi;\n  mp_limb_t cy;\n  mp_size_t ln, hn, n, sn;\n\n  len_lo = powtab->digits_in_base;\n\n  if (str_len <= len_lo)\n    {\n      if (BELOW_THRESHOLD (str_len, SET_STR_DC_THRESHOLD))\n\treturn mpn_bc_set_str (rp, str, str_len, powtab->base);\n      else\n\treturn mpn_dc_set_str (rp, str, str_len, powtab + 1, tp);\n    }\n\n  len_hi = str_len - len_lo;\n  ASSERT (len_lo >= len_hi);\n\n  if (BELOW_THRESHOLD (len_hi, SET_STR_DC_THRESHOLD))\n    hn = mpn_bc_set_str (tp, str, len_hi, powtab->base);\n  else\n    hn = mpn_dc_set_str (tp, str, len_hi, powtab + 1, rp);\n\n  sn = powtab->shift;\n\n  if (hn == 0)\n    {\n      MPN_ZERO (rp, powtab->n + sn);\n    }\n  else\n    {\n      if (powtab->n > hn)\n\tmpn_mul (rp + sn, powtab->p, powtab->n, tp, hn);\n      else\n\tmpn_mul (rp + sn, tp, hn, powtab->p, powtab->n);\n      MPN_ZERO (rp, sn);\n    }\n\n  str = str + str_len - len_lo;\n  if (BELOW_THRESHOLD (len_lo, SET_STR_DC_THRESHOLD))\n    ln = mpn_bc_set_str (tp, str, len_lo, powtab->base);\n  else\n    ln = mpn_dc_set_str (tp, str, len_lo, powtab + 1, tp + powtab->n + sn + 1);\n\n  if (ln != 0)\n    {\n      cy = mpn_add_n (rp, rp, tp, ln);\n      mpn_incr_u (rp + ln, cy);\n    }\n  n = hn + powtab->n + sn;\n  return n - (rp[n - 1] == 0);\n}\n\nmp_size_t\nmpn_bc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)\n{\n  mp_size_t size;\n  size_t i;\n  long j;\n  mp_limb_t cy_limb;\n\n  mp_limb_t big_base;\n  int chars_per_limb;\n  mp_limb_t res_digit;\n\n  ASSERT (base >= 2);\n  ASSERT (base < numberof (mp_bases));\n  ASSERT (str_len >= 1);\n\n  big_base = mp_bases[base].big_base;\n  chars_per_limb = mp_bases[base].chars_per_limb;\n\n  size = 0;\n  for (i = chars_per_limb; i < str_len; i += chars_per_limb)\n    {\n      res_digit = *str++;\n      if (base == 10)\n\t{ /* This is a common case.\n\t     Help the compiler to avoid multiplication.  */\n\t  for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)\n\t    res_digit = res_digit * 10 + *str++;\n\t}\n      else\n\t{\n\t  for (j = chars_per_limb - 1; j != 0; j--)\n\t    res_digit = res_digit * base + *str++;\n\t}\n\n      if (size == 0)\n\t{\n\t  if (res_digit != 0)\n\t    {\n\t      rp[0] = res_digit;\n\t      size = 1;\n\t    }\n\t}\n      else\n\t{\n#if HAVE_NATIVE_mpn_mul_1c\n\t  cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);\n#else\n\t  cy_limb = mpn_mul_1 (rp, rp, size, big_base);\n\t  cy_limb += mpn_add_1 (rp, rp, size, res_digit);\n#endif\n\t  if (cy_limb != 0)\n\t    rp[size++] = cy_limb;\n\t}\n    }\n\n  big_base = base;\n  res_digit = *str++;\n  if (base == 10)\n    { /* This is a common case.\n\t Help the compiler to avoid multiplication.  */\n      for (j = str_len - (i - MP_BASES_CHARS_PER_LIMB_10) - 1; j > 0; j--)\n\t{\n\t  res_digit = res_digit * 10 + *str++;\n\t  big_base *= 10;\n\t}\n    }\n  else\n    {\n      for (j = str_len - (i - chars_per_limb) - 1; j > 0; j--)\n\t{\n\t  res_digit = res_digit * base + *str++;\n\t  big_base *= base;\n\t}\n    }\n\n  if (size == 0)\n    {\n      if (res_digit != 0)\n\t{\n\t  rp[0] = res_digit;\n\t  size = 1;\n\t}\n    }\n  else\n    {\n#if HAVE_NATIVE_mpn_mul_1c\n      cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);\n#else\n      cy_limb = mpn_mul_1 (rp, rp, size, big_base);\n      cy_limb += mpn_add_1 (rp, rp, size, res_digit);\n#endif\n      if (cy_limb != 0)\n\trp[size++] = cy_limb;\n    }\n  return size;\n}\n"
  },
  {
    "path": "mpn/generic/sizeinbase.c",
    "content": "/* mpn_sizeinbase -- approximation to chars required for an mpn.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* Same as mpz_sizeinbase, meaning exact for power-of-2 bases, and either\n   exact or 1 too big for other bases.  */\n\nsize_t\nmpn_sizeinbase (mp_srcptr xp, mp_size_t xsize, int base)\n{\n  int lb_base, cnt;\n  mp_size_t totbits;\n\n  ASSERT (xsize >= 0);\n  ASSERT (base >= 2);\n  ASSERT (base < numberof (__mp_bases));\n\n  /* Special case for X == 0.  */\n  if (xsize == 0)\n    return 1;\n\n  /* Calculate the total number of significant bits of X.  */\n  count_leading_zeros (cnt, xp[xsize-1]);\n  totbits = xsize * BITS_PER_MP_LIMB - cnt;\n\n  if (POW2_P (base))\n    {\n      /* Special case for powers of 2, giving exact result.  */\n      lb_base = __mp_bases[base].big_base;\n      return (totbits + lb_base - 1) / lb_base;\n    }\n  else\n    return (size_t) (totbits * __mp_bases[base].chars_per_bit_exactly) + 1;\n}\n"
  },
  {
    "path": "mpn/generic/sqr_basecase.c",
    "content": "/* mpn_sqr_basecase -- Internal routine to square a natural number\n   of length n.\n\n   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.\n\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,\n2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n#if HAVE_NATIVE_mpn_sqr_diagonal\n#define MPN_SQR_DIAGONAL(rp, up, n)\t\t\t\t\t\\\n  mpn_sqr_diagonal (rp, up, n)\n#else\n#define MPN_SQR_DIAGONAL(rp, up, n)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_size_t _i;\t\t\t\t\t\t\t\\\n    for (_i = 0; _i < (n); _i++)\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tmp_limb_t ul, lpl;\t\t\t\t\t\t\\\n\tul = (up)[_i];\t\t\t\t\t\t\t\\\n\tumul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);\t\\\n\t(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n\n#undef READY_WITH_mpn_sqr_basecase\n\n\n#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s\nvoid\nmpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)\n{\n  mp_size_t i;\n  mp_limb_t tarr[2 * SQR_KARATSUBA_THRESHOLD];\n  mp_ptr tp = tarr;\n  mp_limb_t cy;\n\n  /* must fit 2*n limbs in tarr */\n  ASSERT (n <= SQR_KARATSUBA_THRESHOLD);\n\n  if ((n & 1) != 0)\n    {\n      if (n == 1)\n\t{\n\t  mp_limb_t ul, lpl;\n\t  ul = up[0];\n\t  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);\n\t  rp[0] = lpl >> GMP_NAIL_BITS;\n\t  return;\n\t}\n\n      MPN_ZERO (tp, n);\n\n      for (i = 0; i <= n - 2; i += 2)\n\t{\n\t  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);\n\t  tp[n + i] = cy;\n\t}\n    }\n  else\n    {\n      if (n == 2)\n\t{\n\t  rp[0] = 0;\n\t  rp[1] = 0;\n\t  rp[3] = mpn_addmul_2 (rp, up, 2, up);\n\t  return;\n\t}\n\n      MPN_ZERO (tp, n);\n\n      for (i = 0; i <= n - 4; i += 2)\n\t{\n\t  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);\n\t  tp[n + i] = cy;\n\t}\n      cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);\n      tp[2 * n - 3] = cy;\n    }\n\n  MPN_SQR_DIAGONAL (rp, up, n);\n\n#if HAVE_NATIVE_mpn_addlsh1_n\n  cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);\n#else\n  cy = mpn_double (tp, 2 * n - 2);\n  cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);\n  rp[2 * n - 1] += cy;\n#endif\n}\n#define READY_WITH_mpn_sqr_basecase\n#endif\n\n\n#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2\n\n/* mpn_sqr_basecase using plain mpn_addmul_2.\n\n   This is tricky, since we have to let mpn_addmul_2 make some undesirable\n   multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.\n   This forces us to conditionally add or subtract the mpn_sqr_diagonal\n   results.  Examples of the product we form:\n\n   n = 4              n = 5\t\tn = 6\n   u1u0 * u3u2u1      u1u0 * u4u3u2u1\tu1u0 * u5u4u3u2u1\n   u2 * u3\t      u3u2 * u4u3\tu3u2 * u5u4u3\n\t\t\t\t\tu4 * u5\n   add: u0 u2 u3      add: u0 u2 u4\tadd: u0 u2 u4 u5\n   sub: u1\t      sub: u1 u3\tsub: u1 u3\n*/\n\nvoid\nmpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)\n{\n  mp_size_t i;\n  mp_limb_t tarr[2 * SQR_KARATSUBA_THRESHOLD];\n  mp_ptr tp = tarr;\n  mp_limb_t cy;\n\n  /* must fit 2*n limbs in tarr */\n  ASSERT (n <= SQR_KARATSUBA_THRESHOLD);\n\n  if ((n & 1) != 0)\n    {\n      mp_limb_t x0, x1;\n\n      if (n == 1)\n\t{\n\t  mp_limb_t ul, lpl;\n\t  ul = up[0];\n\t  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);\n\t  rp[0] = lpl >> GMP_NAIL_BITS;\n\t  return;\n\t}\n\n      /* The code below doesn't like unnormalized operands.  Since such\n\t operands are unusual, handle them with a dumb recursion.  */\n      if (up[n - 1] == 0)\n\t{\n\t  rp[2 * n - 2] = 0;\n\t  rp[2 * n - 1] = 0;\n\t  mpn_sqr_basecase (rp, up, n - 1);\n\t  return;\n\t}\n\n      MPN_ZERO (tp, n);\n\n      for (i = 0; i <= n - 2; i += 2)\n\t{\n\t  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);\n\t  tp[n + i] = cy;\n\t}\n\n      MPN_SQR_DIAGONAL (rp, up, n);\n\n      for (i = 2;; i += 4)\n\t{\n\t  x0 = rp[i + 0];\n\t  rp[i + 0] = (-x0) & GMP_NUMB_MASK;\n\t  x1 = rp[i + 1];\n\t  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;\n\t  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);\n\t  if (i + 4 >= 2 * n)\n\t    break;\n\t  mpn_incr_u (rp + i + 4, cy);\n\t}\n    }\n  else\n    {\n      mp_limb_t x0, x1;\n\n      if (n == 2)\n\t{\n\t  rp[0] = 0;\n\t  rp[1] = 0;\n\t  rp[3] = mpn_addmul_2 (rp, up, 2, up);\n\t  return;\n\t}\n\n      /* The code below doesn't like unnormalized operands.  Since such\n\t operands are unusual, handle them with a dumb recursion.  */\n      if (up[n - 1] == 0)\n\t{\n\t  rp[2 * n - 2] = 0;\n\t  rp[2 * n - 1] = 0;\n\t  mpn_sqr_basecase (rp, up, n - 1);\n\t  return;\n\t}\n\n      MPN_ZERO (tp, n);\n\n      for (i = 0; i <= n - 4; i += 2)\n\t{\n\t  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);\n\t  tp[n + i] = cy;\n\t}\n      cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);\n      tp[2 * n - 3] = cy;\n\n      MPN_SQR_DIAGONAL (rp, up, n);\n\n      for (i = 2;; i += 4)\n\t{\n\t  x0 = rp[i + 0];\n\t  rp[i + 0] = (-x0) & GMP_NUMB_MASK;\n\t  x1 = rp[i + 1];\n\t  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;\n\t  if (i + 6 >= 2 * n)\n\t    break;\n\t  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);\n\t  mpn_incr_u (rp + i + 4, cy);\n\t}\n      mpn_decr_u (rp + i + 2, (x1 | x0) != 0);\n    }\n\n#if HAVE_NATIVE_mpn_addlsh1_n\n  cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);\n#else\n  cy = mpn_double (tp, 2 * n - 2);\n  cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);\n#endif\n  rp[2 * n - 1] += cy;\n}\n#define READY_WITH_mpn_sqr_basecase\n#endif\n\n\n#if ! defined (READY_WITH_mpn_sqr_basecase)\n\n/* Default mpn_sqr_basecase using mpn_addmul_1.  */\n\nvoid\nmpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)\n{\n  mp_size_t i;\n\n  ASSERT (n >= 1);\n  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));\n\n  {\n    mp_limb_t ul, lpl;\n    ul = up[0];\n    umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);\n    rp[0] = lpl >> GMP_NAIL_BITS;\n  }\n  if (n > 1)\n    {\n      mp_limb_t tarr[2 * SQR_KARATSUBA_THRESHOLD];\n      mp_ptr tp = tarr;\n      mp_limb_t cy;\n\n      /* must fit 2*n limbs in tarr */\n      ASSERT (n <= SQR_KARATSUBA_THRESHOLD);\n\n      cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);\n      tp[n - 1] = cy;\n      for (i = 2; i < n; i++)\n\t{\n\t  mp_limb_t cy;\n\t  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);\n\t  tp[n + i - 2] = cy;\n\t}\n      MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);\n\n      {\n\tmp_limb_t cy;\n#if HAVE_NATIVE_mpn_addlsh1_n\n\tcy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);\n#else\n\tcy = mpn_double (tp, 2 * n - 2);\n\tcy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);\n#endif\n\trp[2 * n - 1] += cy;\n      }\n    }\n}\n#endif\n"
  },
  {
    "path": "mpn/generic/sqrtrem.c",
    "content": "/* mpn_sqrtrem -- square root and remainder\n\nCopyright 1999, 2000, 2001, 2002, 2004, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n/* Contributed by Paul Zimmermann.\n   See \"Karatsuba Square Root\", reference in gmp.texi.  */\n\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nstatic mp_limb_t mpn_intdivrem (mp_ptr qp, mp_size_t qxn,\n\t    mp_ptr np, mp_size_t nn,\n\t    mp_srcptr dp, mp_size_t dn)\n{\n  ASSERT (qxn >= 0);\n  ASSERT (nn >= dn);\n  ASSERT (dn >= 1);\n  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);\n  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));\n  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, np, nn) || qp==np+dn+qxn);\n  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, dp, dn));\n  ASSERT_MPN (np, nn);\n  ASSERT_MPN (dp, dn);\n\n  if (dn == 1)\n    {\n      mp_limb_t ret;\n      mp_ptr q2p;\n      mp_size_t qn;\n      TMP_DECL;\n\n      TMP_MARK;\n      q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);\n\n      np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);\n      qn = nn + qxn - 1;\n      MPN_COPY (qp, q2p, qn);\n      ret = q2p[qn];\n\n      TMP_FREE;\n      return ret;\n    }\n  else if (dn == 2)\n    {\n      return mpn_divrem_2 (qp, qxn, np, nn, dp);\n    }\n  else\n    {\n      mp_ptr rp, q2p;\n      mp_limb_t qhl;\n      mp_size_t qn;\n      TMP_DECL;\n\n      TMP_MARK;\n      if (UNLIKELY (qxn != 0))\n\t{\n\t  mp_ptr n2p;\n\t  n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);\n\t  MPN_ZERO (n2p, qxn);\n\t  MPN_COPY (n2p + qxn, np, nn);\n\t  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB);\n\t  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);\n\t  mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);\n\t  MPN_COPY (np, rp, dn);\n\t  qn = nn - dn + qxn;\n\t  MPN_COPY (qp, q2p, qn);\n\t  qhl = q2p[qn];\n\t}\n      else\n\t{\n\t  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB);\n\t  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);\n\t  mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);\n\t  MPN_COPY (np, rp, dn);\t/* overwrite np area with remainder */\n\t  qn = nn - dn;\n\t  MPN_COPY (qp, q2p, qn);\n\t  qhl = q2p[qn];\n\t}\n      TMP_FREE;\n      return qhl;\n    }\n}\n\n\n\n\n/* Square roots table.  Generated by the following program:\n#include \"mpir.h\"\nmain(){mpz_t x;int i;mpz_init(x);for(i=64;i<256;i++){mpz_set_ui(x,256*i);\nmpz_sqrt(x,x);mpz_out_str(0,10,x);printf(\",\");if(i%16==15)printf(\"\\n\");}}\n*/\nstatic const unsigned char approx_tab[192] =\n  {\n    128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,\n    143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,\n    156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,\n    169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,\n    181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,\n    192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,\n    202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,\n    212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,\n    221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,\n    230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,\n    239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,\n    247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255\n  };\n\n#define HALF_NAIL (GMP_NAIL_BITS / 2)\n\n/* same as mpn_sqrtrem, but for size=1 and {np, 1} normalized */\nstatic mp_size_t\nmpn_sqrtrem1 (mp_ptr sp, mp_ptr rp, mp_srcptr np)\n{\n  mp_limb_t np0, s, r, q, u;\n  int prec;\n\n  ASSERT (np[0] >= GMP_NUMB_HIGHBIT / 2);\n  ASSERT (GMP_LIMB_BITS >= 16);\n\n  /* first compute a 8-bit approximation from the high 8-bits of np[0] */\n  np0 = np[0] << GMP_NAIL_BITS;\n  q = np0 >> (GMP_LIMB_BITS - 8);\n  /* 2^6 = 64 <= q < 256 = 2^8 */\n  s = approx_tab[q - 64];\t\t\t\t/* 128 <= s < 255 */\n  r = (np0 >> (GMP_LIMB_BITS - 16)) - s * s;\t\t/* r <= 2*s */\n  if (r > 2 * s)\n    {\n      r -= 2 * s + 1;\n      s++;\n    }\n\n  prec = 8;\n  np0 <<= 2 * prec;\n  while (2 * prec < GMP_LIMB_BITS)\n    {\n      /* invariant: s has prec bits, and r <= 2*s */\n      r = (r << prec) + (np0 >> (GMP_LIMB_BITS - prec));\n      np0 <<= prec;\n      u = 2 * s;\n      q = r / u;\n      u = r - q * u;\n      s = (s << prec) + q;\n      u = (u << prec) + (np0 >> (GMP_LIMB_BITS - prec));\n      q = q * q;\n      r = u - q;\n      if (u < q)\n\t{\n\t  r += 2 * s - 1;\n\t  s --;\n\t}\n      np0 <<= prec;\n      prec = 2 * prec;\n    }\n\n  ASSERT (2 * prec == GMP_LIMB_BITS); /* GMP_LIMB_BITS must be a power of 2 */\n\n  /* normalize back, assuming GMP_NAIL_BITS is even */\n  ASSERT (GMP_NAIL_BITS % 2 == 0);\n  sp[0] = s >> HALF_NAIL;\n  u = s - (sp[0] << HALF_NAIL); /* s mod 2^HALF_NAIL */\n  r += u * ((sp[0] << (HALF_NAIL + 1)) + u);\n  r = r >> GMP_NAIL_BITS;\n\n  if (rp != NULL)\n    rp[0] = r;\n  return r != 0 ? 1 : 0;\n}\n\n\n#define Prec (GMP_NUMB_BITS >> 1)\n\n/* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized\n   return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */\nstatic mp_limb_t\nmpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)\n{\n  mp_limb_t qhl, q, u, np0;\n  int cc;\n\n  ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);\n\n  np0 = np[0];\n  mpn_sqrtrem1 (sp, rp, np + 1);\n  qhl = 0;\n  while (rp[0] >= sp[0])\n    {\n      qhl++;\n      rp[0] -= sp[0];\n    }\n  /* now rp[0] < sp[0] < 2^Prec */\n  rp[0] = (rp[0] << Prec) + (np0 >> Prec);\n  u = 2 * sp[0];\n  q = rp[0] / u;\n  u = rp[0] - q * u;\n  q += (qhl & 1) << (Prec - 1);\n  qhl >>= 1; /* if qhl=1, necessary q=0 as qhl*2^Prec + q <= 2^Prec */\n  /* now we have (initial rp[0])<<Prec + np0>>Prec = (qhl<<Prec + q) * (2sp[0]) + u */\n  sp[0] = ((sp[0] + qhl) << Prec) + q;\n  cc = u >> Prec;\n  rp[0] = ((u << Prec) & GMP_NUMB_MASK) + (np0 & (((mp_limb_t) 1 << Prec) - 1));\n  /* subtract q * q or qhl*2^(2*Prec) from rp */\n  cc -= mpn_sub_1 (rp, rp, 1, q * q) + qhl;\n  /* now subtract 2*q*2^Prec + 2^(2*Prec) if qhl is set */\n  if (cc < 0)\n    {\n      cc += sp[0] != 0 ? mpn_add_1 (rp, rp, 1, sp[0]) : 1;\n      cc += mpn_add_1 (rp, rp, 1, --sp[0]);\n    }\n\n  return cc;\n}\n\n/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},\n   and in {np, n} the low n limbs of the remainder, returns the high\n   limb of the remainder (which is 0 or 1).\n   Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4\n   where B=2^GMP_NUMB_BITS.  */\nstatic mp_limb_t\nmpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)\n{\n  mp_limb_t q;\t\t\t/* carry out of {sp, n} */\n  int c, b;\t\t\t/* carry out of remainder */\n  mp_size_t l, h;\n\n  ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);\n\n  if (n == 1)\n    c = mpn_sqrtrem2 (sp, np, np);\n  else\n    {\n      l = n / 2;\n      h = n - l;\n      q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h);\n      if (q != 0)\n        mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h);\n      q += mpn_intdivrem (sp, 0, np + l, n, sp + l, h);\n      c = sp[0] & 1;\n      mpn_half (sp, l);\n      sp[l - 1] |= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;\n      q >>= 1;\n      if (c != 0)\n        c = mpn_add_n (np + l, np + l, sp + l, h);\n      mpn_sqr (np + n, sp, l);\n      b = q + mpn_sub_n (np, np, np + n, 2 * l);\n      c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b);\n      q = mpn_add_1 (sp + l, sp + l, h, q);\n\n      if (c < 0)\n        {\n          c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;\n          c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));\n          q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));\n        }\n    }\n\n  return c;\n}\n\n\nmp_size_t\nmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)\n{\n  mp_limb_t *tp, s0[1], cc, high, rl;\n  int c;\n  mp_size_t rn, tn;\n  TMP_DECL;\n\n  ASSERT (nn >= 0);\n  ASSERT_MPN (np, nn);\n\n  /* If OP is zero, both results are zero.  */\n  if (nn == 0)\n    return 0;\n\n  ASSERT (np[nn - 1] != 0);\n  ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nn));\n  ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));\n  ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));\n\n  high = np[nn - 1];\n  if (nn == 1 && (high & GMP_NUMB_HIGHBIT))\n    return mpn_sqrtrem1 (sp, rp, np);\n  count_leading_zeros (c, high);\n  c -= GMP_NAIL_BITS;\n\n  c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */\n  tn = (nn + 1) / 2; /* 2*tn is the smallest even integer >= nn */\n\n  TMP_MARK;\n  if (nn % 2 != 0 || c > 0)\n    {\n      tp = TMP_ALLOC_LIMBS (2 * tn);\n      tp[0] = 0;\t     /* needed only when 2*tn > nn, but saves a test */\n      if (c != 0)\n\tmpn_lshift (tp + 2 * tn - nn, np, nn, 2 * c);\n      else\n\tMPN_COPY (tp + 2 * tn - nn, np, nn);\n      rl = mpn_dc_sqrtrem (sp, tp, tn);\n      /* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,\n\t thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */\n      c += (nn % 2) * GMP_NUMB_BITS / 2;\t\t/* c now represents k */\n      s0[0] = sp[0] & (((mp_limb_t) 1 << c) - 1);\t/* S mod 2^k */\n      rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]);\t/* R = R + 2*s0*S */\n      cc = mpn_submul_1 (tp, s0, 1, s0[0]);\n      rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;\n      mpn_rshift (sp, sp, tn, c);\n      tp[tn] = rl;\n      if (rp == NULL)\n\trp = tp;\n      c = c << 1;\n      if (c < GMP_NUMB_BITS)\n\ttn++;\n      else\n\t{\n\t  tp++;\n\t  c -= GMP_NUMB_BITS;\n\t}\n      if (c != 0)\n\tmpn_rshift (rp, tp, tn, c);\n      else\n\tMPN_COPY_INCR (rp, tp, tn);\n      rn = tn;\n    }\n  else\n    {\n      if (rp == NULL)\n\trp = TMP_ALLOC_LIMBS (nn);\n      if (rp != np)\n\tMPN_COPY (rp, np, nn);\n      rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn));\n    }\n\n  MPN_NORMALIZE (rp, rn);\n\n  TMP_FREE;\n  return rn;\n}\n"
  },
  {
    "path": "mpn/generic/sub.c",
    "content": "/* mpn_sub - subtract mpn from mpn.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpn_sub 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpn/generic/sub_1.c",
    "content": "/* mpn_sub_1 - subtract limb from mpn.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#define __GMP_FORCE_mpn_sub_1 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpn/generic/sub_err1_n.c",
    "content": "/* mpn_sub_err1_n -- sub_n with single error term\n\nCopyright (C) 2009, David Harvey\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/*\n  Computes:\n\n  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,\n  return value is borrow out.\n\n  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).\n  Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.\n\n  Assumes n >= 1.\n */\nmp_limb_t\nmpn_sub_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_ptr ep, mp_srcptr yp,\n                mp_size_t n, mp_limb_t cy)\n{\n  mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));\n  ASSERT (!MPN_OVERLAP_P (rp, n, yp, n));\n\n  /* FIXME: first addition into eh:el is redundant */\n\n  yp += n - 1;\n  el = eh = 0;\n\n  do\n    {\n      yl = *yp--;\n      ul = *up++;\n      vl = *vp++;\n\n      /* ordinary sub_n */\n      SUBC_LIMB (cy1, sl, ul, vl);\n      SUBC_LIMB (cy2, rl, sl, cy);\n      cy = cy1 | cy2;\n      *rp++ = rl;\n\n      /* update (eh:el) */\n      zl = cy ? yl : 0;\n      el += zl;\n      eh += el < zl;\n    }\n  while (--n != 0);\n\n#if GMP_NAIL_BITS != 0\n  eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);\n  el &= GMP_NUMB_MASK;\n#endif\n\n  ep[0] = el;\n  ep[1] = eh;\n\n  return cy;\n}\n"
  },
  {
    "path": "mpn/generic/sub_err2_n.c",
    "content": "/* mpn_sub_err2_n -- sub_n with two error terms\n\nCopyright (C) 2009, David Harvey\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/*\n  Same as mpn_sub_err1_n, but computes both\n\n          c[1]*yp1[n-1] + ... + c[n]*yp1[0]\n    and   c[1]*yp2[n-1] + ... + c[n]*yp2[0],\n\n  storing results at {ep,2}, {ep+2,2} respectively.\n */\nmp_limb_t\nmpn_sub_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,\n                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,\n                mp_size_t n, mp_limb_t cy)\n{\n  mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));\n  ASSERT (!MPN_OVERLAP_P (rp, n, yp1, n));\n  ASSERT (!MPN_OVERLAP_P (rp, n, yp2, n));\n  \n  /* FIXME: first addition into eh:el is redundant */\n\n  yp1 += n - 1;\n  yp2 += n - 1;\n  el1 = eh1 = 0;\n  el2 = eh2 = 0;\n\n  do\n    {\n      yl1 = *yp1--;\n      yl2 = *yp2--;\n      ul = *up++;\n      vl = *vp++;\n\n      /* ordinary sub_n */\n      SUBC_LIMB (cy1, sl, ul, vl);\n      SUBC_LIMB (cy2, rl, sl, cy);\n      cy = cy1 | cy2;\n      *rp++ = rl;\n\n      /* update (eh1:el1) */\n      zl1 = cy ? yl1 : 0;\n      el1 += zl1;\n      eh1 += el1 < zl1;\n\n      /* update (eh2:el2) */\n      zl2 = cy ? yl2 : 0;\n      el2 += zl2;\n      eh2 += el2 < zl2;\n    }\n  while (--n != 0);\n\n#if GMP_NAIL_BITS != 0\n  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);\n  el1 &= GMP_NUMB_MASK;\n  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);\n  el2 &= GMP_NUMB_MASK;\n#endif\n\n  ep[0] = el1;\n  ep[1] = eh1;\n  ep[2] = el2;\n  ep[3] = eh2;\n\n  return cy;\n}\n"
  },
  {
    "path": "mpn/generic/sub_n.c",
    "content": "/* mpn_sub_n -- Subtract equal length limb vectors.\n\nCopyright 1992, 1993, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n#if GMP_NAIL_BITS == 0\n\nmp_limb_t\nmpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));\n\n  cy = 0;\n  do\n    {\n      ul = *up++;\n      vl = *vp++;\n      sl = ul - vl;\n      cy1 = sl > ul;\n      rl = sl - cy;\n      cy2 = rl > sl;\n      cy = cy1 | cy2;\n      *rp++ = rl;\n    }\n  while (--n != 0);\n\n  return cy;\n}\n\n#endif\n\n#if GMP_NAIL_BITS >= 1\n\nmp_limb_t\nmpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  mp_limb_t ul, vl, rl, cy;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));\n\n  cy = 0;\n  do\n    {\n      ul = *up++;\n      vl = *vp++;\n      rl = ul - vl - cy;\n      cy = rl >> (GMP_LIMB_BITS - 1);\n      *rp++ = rl & GMP_NUMB_MASK;\n    }\n  while (--n != 0);\n\n  return cy;\n}\n\n#endif\n"
  },
  {
    "path": "mpn/generic/subadd_n.c",
    "content": "/*   Copyright 2012 The Code cavern\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* \n   t = x - y - z or t = x - (y + z) which explains the name \n*/\nmp_limb_t mpn_subadd_n(mp_ptr t, mp_srcptr x, mp_srcptr y, mp_srcptr z, mp_size_t n)\n{\n   mp_limb_t ret;\n\n   ASSERT(n > 0);\n   ASSERT_MPN(x, n);\n   ASSERT_MPN(y, n);\n   ASSERT_MPN(z, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(t, x, n));\n   ASSERT(MPN_SAME_OR_SEPARATE_P(t, y, n));\n   ASSERT(MPN_SAME_OR_SEPARATE_P(t, z, n));\n\n   if (t == x && t == y && t == z)\n      return mpn_neg(t,z,n);  \n\n   if (t == x && t == y)\n   {\n      ret = mpn_sub_n(t, x, y, n);\n      ret += mpn_sub_n(t, t, z, n);\n      \n      return ret;\n   }\n\n   if (t == x && t == z)\n   {\n      ret = mpn_sub_n(t, x, z, n);\n      ret += mpn_sub_n(t, t, y, n);\n   \n      return ret;\n   }\n\n   if (t == y && t == z)\n   {\n      ret = mpn_add_n(t, y, z, n);\n      ret += mpn_sub_n(t, x, t, n);\n   \n      return ret;\n   }\n\n   if (t == x)\n   {\n      ret = mpn_sub_n(t, x, y, n);\n      ret += mpn_sub_n(t, t, z, n);\n   \n      return ret;\n   }\n \n   if (t == y)\n   {\n      ret = mpn_sub_n(t, x, y, n);\n      ret += mpn_sub_n(t, t, z, n);\n   \n      return ret;\n   }\n\n   if (t == z)\n   {\n      ret = mpn_sub_n(t, x, z, n);\n      ret += mpn_sub_n(t, t, y, n);\n   \n      return ret;\n   }\n\n   ret = mpn_sub_n(t, x, z, n);\n   ret += mpn_sub_n(t, t, y, n);\n\n   return ret;\n}\n"
  },
  {
    "path": "mpn/generic/submul_1.c",
    "content": "/* mpn_submul_1 -- multiply the N long limb vector pointed to by UP by VL,\n   subtract the N least significant limbs of the product from the limb\n   vector pointed to by RP.  Return the most significant limb of the\n   product, adjusted for carry-out from the subtraction.\n\nCopyright 1992, 1993, 1994, 1996, 2000, 2002, 2004 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n#if GMP_NAIL_BITS == 0\n\nmp_limb_t\nmpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)\n{\n  mp_limb_t ul, cl, hpl, lpl, rl;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n\n  cl = 0;\n  do\n    {\n      ul = *up++;\n      umul_ppmm (hpl, lpl, ul, vl);\n\n      lpl += cl;\n      cl = (lpl < cl) + hpl;\n\n      rl = *rp;\n      lpl = rl - lpl;\n      cl += lpl > rl;\n      *rp++ = lpl;\n    }\n  while (--n != 0);\n\n  return cl;\n}\n\n#endif\n\n#if GMP_NAIL_BITS == 1\n\nmp_limb_t\nmpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)\n{\n  mp_limb_t shifted_vl, ul, rl, lpl, hpl, prev_hpl, cl, xl, c1, c2, c3;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n  ASSERT_MPN (rp, n);\n  ASSERT_MPN (up, n);\n  ASSERT_LIMB (vl);\n\n  shifted_vl = vl << GMP_NAIL_BITS;\n  cl = 0;\n  prev_hpl = 0;\n  do\n    {\n      ul = *up++;\n      rl = *rp;\n      umul_ppmm (hpl, lpl, ul, shifted_vl);\n      lpl >>= GMP_NAIL_BITS;\n      SUBC_LIMB (c1, xl, rl, prev_hpl);\n      SUBC_LIMB (c2, xl, xl, lpl);\n      SUBC_LIMB (c3, xl, xl, cl);\n      cl = c1 + c2 + c3;\n      *rp++ = xl;\n      prev_hpl = hpl;\n    }\n  while (--n != 0);\n\n  return prev_hpl + cl;\n}\n\n#endif\n\n#if GMP_NAIL_BITS >= 2\n\nmp_limb_t\nmpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)\n{\n  mp_limb_t shifted_vl, ul, rl, lpl, hpl, prev_hpl, xw, cl, xl;\n\n  ASSERT (n >= 1);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));\n  ASSERT_MPN (rp, n);\n  ASSERT_MPN (up, n);\n  ASSERT_LIMB (vl);\n\n  shifted_vl = vl << GMP_NAIL_BITS;\n  cl = 0;\n  prev_hpl = 0;\n  do\n    {\n      ul = *up++;\n      rl = *rp;\n      umul_ppmm (hpl, lpl, ul, shifted_vl);\n      lpl >>= GMP_NAIL_BITS;\n      xw = rl - (prev_hpl + lpl) + cl;\n      cl = (mp_limb_signed_t) xw >> GMP_NUMB_BITS; /* FIXME: non-portable */\n      xl = xw & GMP_NUMB_MASK;\n      *rp++ = xl;\n      prev_hpl = hpl;\n    }\n  while (--n != 0);\n\n  return prev_hpl - cl;\n}\n\n#endif\n"
  },
  {
    "path": "mpn/generic/sumdiff_n.c",
    "content": "/*   Copyright 2012 The Code cavern\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmp_limb_t mpn_sumdiff_n(mp_ptr s, mp_ptr d, mp_srcptr x, mp_srcptr y, mp_size_t n)\n{\n   mp_limb_t ret;\n   mp_ptr t;\n\n   ASSERT_MPN(x, n);\n   ASSERT_MPN(y, n);\n   ASSERT(MPN_SAME_OR_SEPARATE_P(s, x, n));\n   ASSERT(MPN_SAME_OR_SEPARATE_P(s, y, n));\n   ASSERT(MPN_SAME_OR_SEPARATE_P(d, x, n));\n   ASSERT(MPN_SAME_OR_SEPARATE_P(d, y, n));\n   ASSERT(!MPN_OVERLAP_P(s, n, d, n));\n\n   if (n == 0)\n\t   return 0;\n\n   if ((s == x && d == y) || (s == y && d == x))\n   {\n      t = __GMP_ALLOCATE_FUNC_LIMBS(n);\n\n      ret = mpn_sub_n(t, x, y, n);\n      ret += 2*mpn_add_n(s, x, y, n);\n      MPN_COPY(d, t, n);\n\n      __GMP_FREE_FUNC_LIMBS(t, n);\n\n      return ret;\n   }\n\n   if (s == x || s == y)\n   {\n      ret = mpn_sub_n(d, x, y, n);\n      ret += 2*mpn_add_n(s, x, y, n);\n      \n      return ret;\n   }\n\n   ret = 2*mpn_add_n(s, x, y, n);\n   ret += mpn_sub_n(d, x, y, n);\n\n   return ret;\n}\n"
  },
  {
    "path": "mpn/generic/tdiv_q.c",
    "content": "/* mpn_tdiv_q -- division for arbitrary size operands.\n\n   Contributed to the GNU project by Torbjorn Granlund.\n\nCopyright 2009, 2010 Free Software Foundation, Inc.\n\nCopyright 2010 William Hart (modified to work with MPIR functions).\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/* The DIV_QR_THRESHOLDs should be replaced with DIV_Q_THRESHOLDs */\n\n/* Compute Q = N/D with truncation.\n     N = {np,nn}\n     D = {dp,dn}\n     Q = {qp,nn-dn+1}\n     T = {scratch,nn+1} is scratch space\n   N and D are both untouched by the computation.\n   N and T may overlap; pass the same space if N is irrelevant after the call,\n   but note that tp needs an extra limb.\n\n   Operand requirements:\n     N >= D > 0\n     dp[dn-1] != 0\n     No overlap between the N, D, and Q areas.\n\n   This division function does not clobber its input operands, since it is\n   intended to support average-O(qn) division, and for that to be effective, it\n   cannot put requirements on callers to copy a O(nn) operand.\n\n   If a caller does not care about the value of {np,nn+1} after calling this\n   function, it should pass np also for the scratch argument.  This function\n   will then save some time and space by avoiding allocation and copying.\n   (FIXME: Is this a good design?  We only really save any copying for\n   already-normalised divisors, which should be rare.  It also prevents us from\n   reasonably asking for all scratch space we need.)\n\n   We write nn-dn+1 limbs for the quotient, but return void.  Why not return\n   the most significant quotient limb?  Look at the 4 main code blocks below\n   (consisting of an outer if-else where each arm contains an if-else). It is\n   tricky for the first code block, since the mpn_*_div_q calls will typically\n   generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless\n   we generate the most significant quotient limb here, before calling\n   mpn_*_div_q, or put the quotient in a temporary area.  Since this is a\n   critical division case (the SB sub-case in particular) copying is not a good\n   idea.\n\n   It might make sense to split the if-else parts of the (qn + FUDGE\n   >= dn) blocks into separate functions, since we could promise quite\n   different things to callers in these two cases.  The 'then' case\n   benefits from np=scratch, and it could perhaps even tolerate qp=np,\n   saving some headache for many callers.\n\n   FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size\n   operands, we do not reuse the huge scratch for adjustments.  This can be a\n   serious waste of memory for the largest operands.\n*/\n\n/* FUDGE determines when to try getting an approximate quotient from the upper\n   parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2\n   for the code to be correct.  */\n#define FUDGE 5\t\t\t/* FIXME: tune this */\n\nvoid\nmpn_tdiv_q (mp_ptr qp,\n\t   mp_srcptr np, mp_size_t nn,\n\t   mp_srcptr dp, mp_size_t dn)\n{\n  mp_ptr new_dp, new_np, tp, rp, scratch;\n  mp_limb_t cy, dh, qh;\n  mp_size_t new_nn, qn;\n  mp_limb_t dinv;\n  int cnt;\n  TMP_DECL;\n  TMP_MARK;\n\n  ASSERT (nn >= dn);\n  ASSERT (dn > 0);\n  ASSERT (dp[dn - 1] != 0);\n  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));\n  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));\n\n  ASSERT_ALWAYS (FUDGE >= 2);\n  \n  if (dn == 1)\n    {\n      mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);\n      return;\n    }\n\n  scratch = TMP_ALLOC_LIMBS(nn + 1);\n  \n  qn = nn - dn + 1;\t\t/* Quotient size, high limb might be zero */\n\n  if (qn + FUDGE >= dn)\n    {\n      /* |________________________|\n                          |_______|  */\n      new_np = scratch;\n\n      dh = dp[dn - 1];\n      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))\n\t{\n\t  count_leading_zeros (cnt, dh);\n\n\t  cy = mpn_lshift (new_np, np, nn, cnt);\n\t  new_np[nn] = cy;\n\t  new_nn = nn + (cy != 0);\n\n\t  new_dp = TMP_ALLOC_LIMBS (dn);\n\t  mpn_lshift (new_dp, dp, dn, cnt);\n\n\t  if (dn == 2)\n\t    {\n\t      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);\n\t    }\n\t  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||\n\t\t   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))\n\t    {\n          mpir_invert_pi1(dinv, new_dp[dn - 1], new_dp[dn - 2]);\n\t      qh = mpn_sb_div_q (qp, new_np, new_nn, new_dp, dn, dinv);\n\t    }\n\t  else if (BELOW_THRESHOLD (dn, INV_DIV_Q_THRESHOLD) || \n\t\t   BELOW_THRESHOLD (nn, 2 * INV_DIV_Q_THRESHOLD)) \n\t    {\n          mpir_invert_pi1(dinv, new_dp[dn - 1], new_dp[dn - 2]);\n          qh = mpn_dc_div_q (qp, new_np, new_nn, new_dp, dn, dinv);\n\t    }\n\t  else\n\t    {\n           mp_ptr inv = TMP_ALLOC_LIMBS(dn);\n           mpn_invert(inv, new_dp, dn);\n           qh = mpn_inv_div_q (qp, new_np, new_nn, new_dp, dn, inv);\n\t    }\n\t  if (cy == 0)\n\t    qp[qn - 1] = qh;\n\t  else if (UNLIKELY (qh != 0))\n\t    {\n\t      /* This happens only when the quotient is close to B^n and\n\t\t mpn_*_divappr_q returned B^n.  */\n\t      mp_size_t i, n;\n\t      n = new_nn - dn;\n\t      for (i = 0; i < n; i++)\n\t\tqp[i] = GMP_NUMB_MAX;\n\t      qh = 0;\t\t/* currently ignored */\n\t    }\n\t}\n      else  /* divisor is already normalised */\n\t{\n\t  MPN_COPY (new_np, np, nn);\n\n\t  if (dn == 2)\n\t    {\n\t      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);\n\t    }\n\t  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||\n\t\t   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))\n\t    {\n           mpir_invert_pi1(dinv, dh, dp[dn - 2]);\n           qh = mpn_sb_div_q (qp, new_np, nn, dp, dn, dinv);\n\t    }\n\t  else if (BELOW_THRESHOLD (dn, INV_DIV_Q_THRESHOLD) || \n\t\t   BELOW_THRESHOLD (nn, 2 * INV_DIV_Q_THRESHOLD))\n\t    {\n           mpir_invert_pi1(dinv, dh, dp[dn - 2]);\n           qh = mpn_dc_div_q (qp, new_np, nn, dp, dn, dinv);\n\t    }\n\t  else\n\t    {\n           mp_ptr inv = TMP_ALLOC_LIMBS(dn);\n           mpn_invert(inv, dp, dn);\n           qh = mpn_inv_div_q (qp, new_np, nn, dp, dn, inv);\n\t    }\n\t  qp[nn - dn] = qh;\n\t}\n    }\n  else\n    {\n      /* |________________________|\n                |_________________|  */\n      tp = TMP_ALLOC_LIMBS (qn + 1);\n\n      new_np = scratch;\n      new_nn = 2 * qn + 1;\n      \n      dh = dp[dn - 1];\n      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))\n\t{\n\t  count_leading_zeros (cnt, dh);\n\n\t  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);\n\t  new_np[new_nn] = cy;\n\n\t  new_nn += (cy != 0);\n\n\t  new_dp = TMP_ALLOC_LIMBS (qn + 1);\n\t  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);\n\t  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);\n\n\t  if (qn + 1 == 2)\n\t    {\n\t      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);\n\t    }\n\t  else if (BELOW_THRESHOLD (qn - 1, DC_DIVAPPR_Q_THRESHOLD))\n\t    {\n          mpir_invert_pi1(dinv, new_dp[qn], new_dp[qn - 1]);\n\t      qh = mpn_sb_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);\n\t    }\n\t  else if (BELOW_THRESHOLD (qn - 1, INV_DIVAPPR_Q_THRESHOLD))\n\t    {\n          mpir_invert_pi1(dinv, new_dp[qn], new_dp[qn - 1]);\n\t      qh = mpn_dc_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);\n\t    }\n\t  else\n\t    {\n           mp_ptr inv = TMP_ALLOC_LIMBS(qn + 1);\n           mpn_invert(inv, new_dp, qn + 1);\n           qh = mpn_inv_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, inv); \n\t    }\n\t  if (cy == 0)\n\t    tp[qn] = qh;\n\t  else if (UNLIKELY (qh != 0))\n\t    {\n\t      /* This happens only when the quotient is close to B^n and\n\t\t mpn_*_divappr_q returned B^n.  */\n\t      mp_size_t i, n;\n\t      n = new_nn - (qn + 1);\n\t      for (i = 0; i < n; i++)\n\t\ttp[i] = GMP_NUMB_MAX;\n\t      qh = 0;\t\t/* currently ignored */\n\t    }\n\t}\n      else  /* divisor is already normalised */\n\t{\n\t  MPN_COPY (new_np, np + nn - new_nn, new_nn); \n\n\t  new_dp = (mp_ptr) dp + dn - (qn + 1);\n\n\t  if (qn == 2 - 1)\n\t    {\n\t      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);\n\t    }\n\t  else if (BELOW_THRESHOLD (qn - 1, DC_DIVAPPR_Q_THRESHOLD))\n\t    {\n          mpir_invert_pi1(dinv, dh, new_dp[qn - 1]);\n          qh = mpn_sb_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);\n\t    }\n\t  else if (BELOW_THRESHOLD (qn - 1, INV_DIVAPPR_Q_THRESHOLD))\n\t    {\n          mpir_invert_pi1(dinv, dh, new_dp[qn - 1]);\n          qh = mpn_dc_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);\n\t    }\n\t  else\n\t    {\n          mp_ptr inv = TMP_ALLOC_LIMBS(qn + 1);\n\t       mpn_invert(inv, new_dp, qn + 1);\n          qh = mpn_inv_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, inv);\n\t    }\n\t  tp[qn] = qh;\n\t}\n\n      MPN_COPY (qp, tp + 1, qn);\n      if (UNLIKELY(tp[0] <= 4))\n        {\n\t  mp_size_t rn;\n\n          rp = TMP_ALLOC_LIMBS (dn + qn);\n          mpn_mul (rp, dp, dn, tp + 1, qn);\n\t  rn = dn + qn;\n\t  rn -= rp[rn - 1] == 0;\n\n          if (rn > nn || mpn_cmp (np, rp, nn) < 0)\n            mpn_decr_u (qp, 1);\n        }\n    }\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpn/generic/tdiv_qr.c",
    "content": "/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and\n   write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If\n   qxn is non-zero, generate that many fraction limbs and append them after the\n   other quotient limbs, and update the remainder accordingly.  The input\n   operands are unaffected.\n\n   Preconditions:\n   1. The most significant limb of of the divisor must be non-zero.\n   2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)\n\n   The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time\n   complexity of multiplication.\n\nCopyright 1997, 2000, 2001, 2002, 2005, 2009 Free Software Foundation, Inc.\n\nCopyright 2010 William Hart (modified to work with MPIR functions).\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nvoid\nmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,\n\t     mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)\n{\n  ASSERT_ALWAYS (qxn == 0);\n\n  ASSERT (nn >= 0);\n  ASSERT (dn >= 0);\n  ASSERT (dn == 0 || dp[dn - 1] != 0);\n  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));\n  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));\n\n  switch (dn)\n    {\n    case 0:\n      DIVIDE_BY_ZERO;\n\n    case 1:\n      {\n\trp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);\n\treturn;\n      }\n\n    case 2:\n      {\n\tmp_ptr n2p, d2p;\n\tmp_limb_t qhl, cy;\n\tTMP_DECL;\n\tTMP_MARK;\n\tif ((dp[1] & GMP_NUMB_HIGHBIT) == 0)\n\t  {\n\t    int cnt;\n\t    mp_limb_t dtmp[2];\n\t    count_leading_zeros (cnt, dp[1]);\n\t    cnt -= GMP_NAIL_BITS;\n\t    d2p = dtmp;\n\t    d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));\n\t    d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;\n\t    n2p = TMP_ALLOC_LIMBS (nn + 1);\n\t    cy = mpn_lshift (n2p, np, nn, cnt);\n\t    n2p[nn] = cy;\n\t    qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);\n\t    if (cy == 0)\n\t      qp[nn - 2] = qhl;\t/* always store nn-2+1 quotient limbs */\n\t    rp[0] = (n2p[0] >> cnt)\n\t      | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);\n\t    rp[1] = (n2p[1] >> cnt);\n\t  }\n\telse\n\t  {\n\t    d2p = (mp_ptr) dp;\n\t    n2p = TMP_ALLOC_LIMBS (nn);\n\t    MPN_COPY (n2p, np, nn);\n\t    qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);\n\t    qp[nn - 2] = qhl;\t/* always store nn-2+1 quotient limbs */\n\t    rp[0] = n2p[0];\n\t    rp[1] = n2p[1];\n\t  }\n\tTMP_FREE;\n\treturn;\n      }\n\n    default:\n      {\n\tint adjust;\n\tmp_limb_t dinv;\n\tTMP_DECL;\n\tTMP_MARK;\n\tadjust = np[nn - 1] >= dp[dn - 1];\t/* conservative tests for quotient size */\n\tif (nn + adjust >= 2 * dn)\n\t  {\n\t    mp_ptr n2p, d2p;\n\t    mp_limb_t cy;\n\t    int cnt;\n\n\t    qp[nn - dn] = 0;\t\t\t  /* zero high quotient limb */\n\t    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */\n\t      {\n\t\tcount_leading_zeros (cnt, dp[dn - 1]);\n\t\tcnt -= GMP_NAIL_BITS;\n\t\td2p = TMP_ALLOC_LIMBS (dn);\n\t\tmpn_lshift (d2p, dp, dn, cnt);\n\t\tn2p = TMP_ALLOC_LIMBS (nn + 1);\n\t\tcy = mpn_lshift (n2p, np, nn, cnt);\n\t\tn2p[nn] = cy;\n\t\tnn += adjust;\n\t      }\n\t    else\n\t      {\n\t\tcnt = 0;\n\t\td2p = (mp_ptr) dp;\n\t\tn2p = TMP_ALLOC_LIMBS (nn + 1);\n\t\tMPN_COPY (n2p, np, nn);\n\t\tn2p[nn] = 0;\n\t\tnn += adjust;\n\t      }\n\n\t    mpir_invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);\n\t    if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))\n\t      ASSERT_NOCARRY(mpn_sb_div_qr (qp, n2p, nn, d2p, dn, dinv));\n\t    else if (BELOW_THRESHOLD (dn, INV_DIV_QR_THRESHOLD) ||\n\t\t     BELOW_THRESHOLD (nn, 2 * INV_DIV_QR_THRESHOLD))     \n\t      ASSERT_NOCARRY(mpn_dc_div_qr (qp, n2p, nn, d2p, dn, dinv));\n\t    else\n\t\t{\n          mp_ptr dinv2 = TMP_ALLOC_LIMBS(dn);\n\t\t  mpn_invert(dinv2, d2p, dn);\n\t\t  ASSERT_NOCARRY(mpn_inv_div_qr (qp, n2p, nn, d2p, dn, dinv2));\n\t\t}\n\n\t    if (cnt != 0)\n\t      mpn_rshift (rp, n2p, dn, cnt);\n\t    else\n\t      MPN_COPY (rp, n2p, dn);\n\t    TMP_FREE;\n\t    return;\n\t  }\n\n\t/* When we come here, the numerator/partial remainder is less\n\t   than twice the size of the denominator.  */\n\n\t  {\n\t    /* Problem:\n\n\t       Divide a numerator N with nn limbs by a denominator D with dn\n\t       limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small\n\t       compared to dn, conventional division algorithms perform poorly.\n\t       We want an algorithm that has an expected running time that is\n\t       dependent only on qn.\n\n\t       Algorithm (very informally stated):\n\n\t       1) Divide the 2 x qn most significant limbs from the numerator\n\t\t  by the qn most significant limbs from the denominator.  Call\n\t\t  the result qest.  This is either the correct quotient, but\n\t\t  might be 1 or 2 too large.  Compute the remainder from the\n\t\t  division.  (This step is implemented by a mpn_divrem call.)\n\n\t       2) Is the most significant limb from the remainder < p, where p\n\t\t  is the product of the most significant limb from the quotient\n\t\t  and the next(d)?  (Next(d) denotes the next ignored limb from\n\t\t  the denominator.)  If it is, decrement qest, and adjust the\n\t\t  remainder accordingly.\n\n\t       3) Is the remainder >= qest?  If it is, qest is the desired\n\t\t  quotient.  The algorithm terminates.\n\n\t       4) Subtract qest x next(d) from the remainder.  If there is\n\t\t  borrow out, decrement qest, and adjust the remainder\n\t\t  accordingly.\n\n\t       5) Skip one word from the denominator (i.e., let next(d) denote\n\t\t  the next less significant limb.  */\n\n\t    mp_size_t qn;\n\t    mp_ptr n2p, d2p;\n\t    mp_ptr tp;\n\t    mp_limb_t cy;\n\t    mp_size_t in, rn;\n\t    mp_limb_t quotient_too_large;\n\t    unsigned int cnt;\n\n\t    qn = nn - dn;\n\t    qp[qn] = 0;\t\t\t\t/* zero high quotient limb */\n\t    qn += adjust;\t\t\t/* qn cannot become bigger */\n\n\t    if (qn == 0)\n\t      {\n\t\tMPN_COPY (rp, np, dn);\n\t\tTMP_FREE;\n\t\treturn;\n\t      }\n\n\t    in = dn - qn;\t\t/* (at least partially) ignored # of limbs in ops */\n\t    /* Normalize denominator by shifting it to the left such that its\n\t       most significant bit is set.  Then shift the numerator the same\n\t       amount, to mathematically preserve quotient.  */\n\t    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)\n\t      {\n\t\tcount_leading_zeros (cnt, dp[dn - 1]);\n\t\tcnt -= GMP_NAIL_BITS;\n\n\t\td2p = TMP_ALLOC_LIMBS (qn);\n\t\tmpn_lshift (d2p, dp + in, qn, cnt);\n\t\td2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);\n\n\t\tn2p = TMP_ALLOC_LIMBS (2 * qn + 1);\n\t\tcy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);\n\t\tif (adjust)\n\t\t  {\n\t\t    n2p[2 * qn] = cy;\n\t\t    n2p++;\n\t\t  }\n\t\telse\n\t\t  {\n\t\t    n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);\n\t\t  }\n\t      }\n\t    else\n\t      {\n\t\tcnt = 0;\n\t\td2p = (mp_ptr) dp + in;\n\n\t\tn2p = TMP_ALLOC_LIMBS (2 * qn + 1);\n\t\tMPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);\n\t\tif (adjust)\n\t\t  {\n\t\t    n2p[2 * qn] = 0;\n\t\t    n2p++;\n\t\t  }\n\t      }\n\n\t    /* Get an approximate quotient using the extracted operands.  */\n\t    if (qn == 1)\n\t      {\n\t\tmp_limb_t q0, r0;\n\t\tudiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);\n\t\tn2p[0] = r0 >> GMP_NAIL_BITS;\n\t\tqp[0] = q0;\n\t      }\n\t    else if (qn == 2)\n\t      mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */\n\t    else\n\t      {\n\t\tmpir_invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);\n\t\tif (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))\n\t\t  ASSERT_NOCARRY(mpn_sb_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv));\n\t\telse if (BELOW_THRESHOLD (qn, INV_DIV_QR_THRESHOLD))\n\t\t{\n\t\t\tmp_ptr temp = TMP_ALLOC_LIMBS(DC_DIVAPPR_Q_N_ITCH(qn));\n\t\t\tASSERT_NOCARRY(mpn_dc_div_qr_n (qp, n2p, d2p, qn, dinv, temp));\n\t\t} else\n\t\t{\n\t      mp_ptr dinv2 = TMP_ALLOC_LIMBS(qn);\n\t\t  mpn_invert(dinv2, d2p, qn);\n          ASSERT_NOCARRY(mpn_inv_div_qr_n (qp, n2p, d2p, qn, dinv2));\n\t\t}  \n\t      }\n\n\t    rn = qn;\n\t    /* Multiply the first ignored divisor limb by the most significant\n\t       quotient limb.  If that product is > the partial remainder's\n\t       most significant limb, we know the quotient is too large.  This\n\t       test quickly catches most cases where the quotient is too large;\n\t       it catches all cases where the quotient is 2 too large.  */\n\t    {\n\t      mp_limb_t dl, x;\n\t      mp_limb_t h, dummy;\n\n\t      if (in - 2 < 0)\n\t\tdl = 0;\n\t      else\n\t\tdl = dp[in - 2];\n\n#if GMP_NAIL_BITS == 0\n\t      x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));\n#else\n\t      x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;\n\t      if (cnt != 0)\n\t\tx |= dl >> (GMP_NUMB_BITS - cnt);\n#endif\n\t      umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);\n\n\t      if (n2p[qn - 1] < h)\n\t\t{\n\t\t  mp_limb_t cy;\n\n\t\t  mpn_decr_u (qp, (mp_limb_t) 1);\n\t\t  cy = mpn_add_n (n2p, n2p, d2p, qn);\n\t\t  if (cy)\n\t\t    {\n\t\t      /* The partial remainder is safely large.  */\n\t\t      n2p[qn] = cy;\n\t\t      ++rn;\n\t\t    }\n\t\t}\n\t    }\n\n\t    quotient_too_large = 0;\n\t    if (cnt != 0)\n\t      {\n\t\tmp_limb_t cy1, cy2;\n\n\t\t/* Append partially used numerator limb to partial remainder.  */\n\t\tcy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);\n\t\tn2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);\n\n\t\t/* Update partial remainder with partially used divisor limb.  */\n\t\tcy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));\n\t\tif (qn != rn)\n\t\t  {\n\t\t    ASSERT_ALWAYS (n2p[qn] >= cy2);\n\t\t    n2p[qn] -= cy2;\n\t\t  }\n\t\telse\n\t\t  {\n\t\t    n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */\n\n\t\t    quotient_too_large = (cy1 < cy2);\n\t\t    ++rn;\n\t\t  }\n\t\t--in;\n\t      }\n\t    /* True: partial remainder now is neutral, i.e., it is not shifted up.  */\n\n\t    tp = TMP_ALLOC_LIMBS (dn);\n\n\t    if (in < qn)\n\t      {\n\t\tif (in == 0)\n\t\t  {\n\t\t    MPN_COPY (rp, n2p, rn);\n\t\t    ASSERT_ALWAYS (rn == dn);\n\t\t    goto foo;\n\t\t  }\n\t\tmpn_mul (tp, qp, qn, dp, in);\n\t      }\n\t    else\n\t      mpn_mul (tp, dp, in, qp, qn);\n\n\t    cy = mpn_sub (n2p, n2p, rn, tp + in, qn);\n\t    MPN_COPY (rp + in, n2p, dn - in);\n\t    quotient_too_large |= cy;\n\t    cy = mpn_sub_n (rp, np, tp, in);\n\t    cy = mpn_sub_1 (rp + in, rp + in, rn, cy);\n\t    quotient_too_large |= cy;\n\t  foo:\n\t    if (quotient_too_large)\n\t      {\n\t\tmpn_decr_u (qp, (mp_limb_t) 1);\n\t\tmpn_add_n (rp, rp, dp, dn);\n\t      }\n\t  }\n\tTMP_FREE;\n\treturn;\n      }\n    }\n}\n"
  },
  {
    "path": "mpn/generic/toom3_mul.c",
    "content": "/* mpn_toom3_mul and helper functions -- Multiply/square natural numbers.\n\n   THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul_n)\n   ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY SAFE TO REACH\n   THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED\n   THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\n\nCopyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,\n2005, Free Software Foundation, Inc.\n\nCopyright 2009 Jason Moxham\nCopyright 2009 William Hart\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/******************************************************************************\n *                                                                            *\n *                      Toom 3-way multiplication                             *\n *                                                                            *\n *****************************************************************************/\n\n/*\n   We have\n\n\t{v0,2k} {v1,2k+1} {c+4k+1,r+r2-1} \n\t\tv0 \tv1       {-}vinf\n\n\tvinf0 is the first limb of vinf, which is overwritten by v1\n\n\t{vm1,2k+1} {v2, 2k+1}\n\n\tws is temporary space\n\n\tsa is the sign of vm1\n\n\trr2 is r+r2\n\n\tWe want to compute\n\n     t1   <- (3*v0+2*vm1+v2)/6-2*vinf\n     t2   <- (v1+vm1)/2\n  then the result is c0+c1*t+c2*t^2+c3*t^3+c4*t^4 where\n     c0   <- v0\n     c1   <- v1 - t1\n     c2   <- t2 - v0 - vinf\n     c3   <- t1 - t2\n     c4   <- vinf\n*/ \nvoid\nmpn_toom3_interpolate (mp_ptr c, mp_ptr v1, mp_ptr v2, mp_ptr vm1,\n\t\t                 mp_ptr vinf, mp_size_t k, mp_size_t rr2, int sa,\n\t\t                                       mp_limb_t vinf0, mp_ptr ws)\n{\n  mp_limb_t cy, saved;\n  mp_size_t twok = k + k;\n  mp_size_t kk1 = twok + 1;\n  mp_ptr c1, c2, c3, c4, c5;\n  mp_limb_t cout; /* final carry, should be zero at the end */\n\n  c1 = c + k;\n  c2 = c1 + k;\n  c3 = c2 + k;\n  c4 = c3 + k;\n  c5 = c4 + k;\n\n#define v0 (c)\n\n/* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\tv0 \t v1          {-}vinf\n\n\t{vm1,2k+1} {v2, 2k+1}\n*/\n\n  /* v2 <- v2 - vm1 */\n  if (sa < 0)\n  {\n\t  mpn_add_n(v2, v2, vm1, kk1);\n  } else\n  {\n\t  mpn_sub_n(v2, v2, vm1, kk1);\n  }\n\n  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */\n\n /* vm1 <- t2 := (v1 - sa*vm1) / 2\n */\n  if (sa < 0)\n    {\n#ifdef HAVE_NATIVE_mpn_rsh1add_n\n      mpn_rsh1add_n (vm1, v1, vm1, kk1);\n#else\n      mpn_add_n (vm1, vm1, v1, kk1);\n      mpn_half (vm1, kk1);\n#endif\n    }\n  else\n    {\n#ifdef HAVE_NATIVE_mpn_rsh1sub_n\n      mpn_rsh1sub_n (vm1, v1, vm1, kk1);\n#else\n      mpn_sub_n (vm1, v1, vm1, kk1);\n      mpn_half (vm1, kk1);\n#endif\n    }\n\n  /* v1 <- v1 - v0 - vinf */\n\n  saved = c4[0];\n  c4[0] = vinf0;\n#if HAVE_NATIVE_mpn_subadd_n\n  cy = mpn_subadd_n(v1, v1, v0, c4, rr2);\n#else\n  cy = mpn_sub_n(v1, v1, v0, rr2);\n  cy += mpn_sub_n(v1, v1, c4, rr2);\n#endif\n  c4[0] = saved;\n  if (rr2 < twok)\n  {\n\t  v1[twok] -= mpn_sub_n(v1 + rr2, v1 + rr2, v0 + rr2, twok - rr2); \n\t  MPN_DECR_U(v1 + rr2, kk1 - rr2, cy);\n  }\n  else v1[twok] -= cy;\n\n  saved = c4[0];\n  c4[0] = vinf0;\n/* subtract 5*vinf from v2,\n  */\n  cy = mpn_submul_1 (v2, c4, rr2, CNST_LIMB(5));\n  MPN_DECR_U (v2 + rr2, kk1 - rr2, cy);\n  c4[0] = saved;\n\n  /* v2 = (v2 - v1)/2 (exact)\n  */\n#ifdef HAVE_NATIVE_mpn_rsh1sub_n\n  mpn_rsh1sub_n (v2, v2, v1, kk1);\n#else\n  mpn_sub_n (v2, v2, v1, kk1);\n  mpn_half (v2, kk1);\n#endif\n\n  /* v1 = v1 - vm1\n  */\n  mpn_sub_n(v1, v1, vm1, kk1);\n\n  /* vm1 = vm1 - v2 and add vm1 in {c+k, ...} */\n#if HAVE_NATIVE_mpn_addsub_n\n  cy = mpn_addsub_n(c1, c1, vm1, v2, kk1);\n#else\n  mpn_sub_n(vm1, vm1, v2, kk1);\n  cy = mpn_add_n (c1, c1, vm1, kk1);\n#endif\n  ASSERT_NOCARRY (mpn_add_1(c3 + 1, c3 + 1, rr2 + k - 1, cy)); /* 4k+rr2-(3k+1) = rr2+k-1 */\n\n  /* don't forget to add vinf0 in {c+4k, ...} */\n  ASSERT_NOCARRY (mpn_add_1(c4, c4, rr2, vinf0));\n\n  /* add v2 in {c+3k, ...} */\n  if (rr2 <= k + 1)\n     ASSERT_NOCARRY (mpn_add_n (c3, c3, v2, k+rr2));\n  else\n  {\n\t  cy = mpn_add_n (c3, c3, v2, kk1);\n     if (cy) ASSERT_NOCARRY (mpn_add_1(c5 + 1, c5 + 1, rr2 - k - 1, cy)); /* 4k+rr2-(5k+1) = rr2-k-1 */\n  }\n\n#undef v0\n}\n\n#define TOOM3_MUL_REC(p, a, b, n, ws) \\\n  do {\t\t\t\t\t\t\t\t\\\n    if (MUL_TOOM3_THRESHOLD / 3 < MUL_KARATSUBA_THRESHOLD\t\\\n\t&& BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))\t\\\n      mpn_mul_basecase (p, a, n, b, n);\t\t\t\t\\\n    else if (BELOW_THRESHOLD (n, MUL_TOOM3_THRESHOLD))\t\t\\\n      mpn_kara_mul_n (p, a, b, n, ws);\t\t\t\t\\\n    else\t\t\t\t\t\t\t\\\n      mpn_toom3_mul_n (p, a, b, n, ws);\t\t\t\t\\\n  } while (0)\n\n/* \n   This code is based on mpn_toom3_mul_n. Here a and b may not have the same size, but an >= bn.\n\n  The algorithm is the following:\n\n  0. k = ceil(an/3), r = an - 2k, r2 = bn - 2k,  B = 2^(GMP_NUMB_BITS), t = B^k\n  1. split a and b in three parts each a0, a1, a2 and b0, b1, b2\n     with a0, a1, b0, b1 of k limbs, and a2, b2 of r, r2 limbs, respectively\n  2. v0   <- a0*b0\n     v1   <- (a0+a1+a2)*(b0+b1+b2)\n     v2   <- (a0+2*a1+4*a2)*(b0+2*b1+4*b2)\n     vm1  <- (a0-a1+a2)*(b0-b1+b2)\n     vinf <- a2*b2\n\n  As with mpn_toom3_mul_n we do our best to save space. \n\n  In mpn_toom3_interpolate we are going to do:\n\n     t1   <- (3*v0+2*vm1+v2)/6-2*vinf\n     t2   <- (v1+vm1)/2\n  then the result is c0+c1*t+c2*t^2+c3*t^3+c4*t^4 where\n     c0   <- v0\n     c1   <- v1 - t1\n     c2   <- t2 - v0 - vinf\n     c3   <- t1 - t2\n     c4   <- vinf\n\n  In particular we know that v0 will be 2k limbs, and vinf will be r+r2 limbs.\n  We may as well stick those into place immediately. That leaves 2k limbs between,\n  which we can store v1 in (though we have to preserve the bottom limb of vinf\n  which will get overwritten - we pass this to the interpolate function).\n\n  We don't have as much space to play with as in mpn_toom3_mul_n as an may be 3k-2\n  and bn may be as little as 2k + 1. Thus the output may have as little as 5k-1 limbs\n  of space.\n\n  We'll pass v2 and vm1 to the interpolate function in temporary space.\n\n  Temporary space needed is t(na) = 4k+4 + t(k+1) where k = (na + 2)/3\n  i.e t(na) = (4na + 20)/3 + t((na+5)/3) <= 5na/3 + t(5na/12) \n                                         <= (5na/3)/(1-5/12) = 60na/21 < 3na\n  for na >= 20.\n */\n\nvoid\nmpn_toom3_mul (mp_ptr c, mp_srcptr a, mp_size_t an, mp_srcptr b, mp_size_t bn, mp_ptr t)\n{\n  mp_size_t k, k1, kk1, r, r2, twok, rr2;\n  mp_limb_t cy, cc, saved, vinf0, c20, c21;\n  mp_ptr trec;\n  int sa, sb;\n  mp_ptr c1, c2, c3, c4, t1, t2, t3, t4;\n\n  ASSERT(GMP_NUMB_BITS >= 6);\n\n  k = (an + 2) / 3; /* ceil(an/3) */\n  ASSERT(bn > 2*k);\n  ASSERT(an >= 20);\n  \n  twok = 2 * k;\n  k1 = k + 1;\n  kk1 = k + k1;\n  r = an - twok;   /* last chunk */\n  r2 = bn - twok;   /* last chunk */\n  rr2 = r + r2;\n\n  c1 = c + k;\n  c2 = c1 + k;\n  c3 = c2 + k;\n  c4 = c3 + k;\n  \n  t1 = t + k;\n  t2 = t1 + k;\n  t3 = t2 + k;\n  t4 = t3 + k;\n\n  trec = t + 4 * k + 4; \n\n  /* put a0+a2 in {t, k+1}, and b0+b2 in {t1 + 1, k+1};\n     put a0+a1+a2 in {t2 + 2, k+1} and b0+b1+b2 in {t3 + 3,k+1}\n  */\n  cy = mpn_add_n (t, a, a + twok, r);\n  cc = mpn_add_n (t1 + 1, b, b + twok, r2);\n  if (r < k)\n    {\n      __GMPN_ADD_1 (cy, t + r, a + r, k - r, cy);\n    }\n  if (r2 < k)\n    {\n\t  __GMPN_ADD_1 (cc, t1 + 1 + r2, b + r2, k - r2, cc);\n    }\n  t3[2] = (t1[0] = cy) + mpn_add_n (t2 + 2, t, a + k, k);\n  t4[3] = (t2[1] = cc) + mpn_add_n (t3 + 3, t1 + 1, b + k, k);\n\n  /* compute v1 := (a0+a1+a2)*(b0+b1+b2) in {c2, 2k+1};\n     since v1 < 9*B^(2k), v1 uses only 2k+1 words if GMP_NUMB_BITS >= 4 */\n  TOOM3_MUL_REC (c2, t2 + 2, t3 + 3, k1, trec);\n\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n  */\n\n  /* put |a0-a1+a2| in {t2 + 2, k+1} and |b0-b1+b2| in {t3 + 3,k+1} */\n  /* sa = sign(a0-a1+a2) */\n  /* sb = sign(b0-b1+b2) */\n  sa = (t[k] != 0) ? 1 : mpn_cmp (t, a + k, k);\n  t3[2] = (sa >= 0) ? t[k] - mpn_sub_n (t2 + 2, t, a + k, k)\n\t\t   : mpn_sub_n (t2 + 2, a + k, t, k);\n  /* b0+b2 is in {c+k+1, k+1} now */\n  sb = (t2[1] != 0) ? 1 : mpn_cmp (t1 + 1, b + k, k);\n  t4[3] = (sb >= 0) ? t2[1] - mpn_sub_n (t3 + 3, t1 + 1, b + k, k)\n\t\t    : mpn_sub_n (t3 + 3, b + k, t1 + 1, k);\n  sa *= sb; /* sign of vm1 */\n\n  /* compute vm1 := (a0-a1+a2)*(b0-b1+b2) in {t, 2k+1};\n     since |vm1| < 4*B^(2k), vm1 uses only 2k+1 limbs */\n  TOOM3_MUL_REC (t, t2 + 2, t3 + 3, k1, trec);\n\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1\n  */\n\n  c20 = c2[0]; /* save c2[0] and c2[1] giving space 2k+2 at c */\n  c21 = c2[1];\n\n  /* \n     compute a0+2a1+4a2 in {c, k+1} and b0+2b1+4b2 in {c1 + 1, k+1}\n  */\n#if HAVE_NATIVE_mpn_addlsh1_n\n  c1[0] = mpn_addlsh1_n (c, a + k, a + twok, r);\n  c2[1] = mpn_addlsh1_n (c1 + 1, b + k, b + twok, r2);\n  if (r < k)\n    {\n      __GMPN_ADD_1 (c1[0], c + r, a + k + r, k - r, c1[0]);\n    }\n  if (r2 < k)\n    {__GMPN_ADD_1 (c2[1], c1 + 1 + r2, b + k + r2, k - r2, c2[1]);\n    }\n  c1[0] = 2 * c1[0] + mpn_addlsh1_n (c, a, c, k);\n  c2[1] = 2 * c2[1] + mpn_addlsh1_n (c1 + 1, b, c1 + 1, k);\n#else\n  c[r] = mpn_lshift1 (c, a + twok, r);\n  c1[r2 + 1] = mpn_lshift1 (c1 + 1, b + twok, r2);\n  if (r  < k)\n    {\n      MPN_ZERO(c + r + 1, k - r);\n    }\n  if (r2 < k)\n    {\n      MPN_ZERO(c1 + r2 + 2, k - r2);\n    }\n  c1[0] += mpn_add_n (c, c, a + k, k);\n  c2[1] += mpn_add_n (c1 + 1, c1 + 1, b + k, k);\n  mpn_double (c, k1);\n  mpn_double (c1 + 1, k1);\n  c1[0] += mpn_add_n (c, c, a, k);\n  c2[1] += mpn_add_n (c1 + 1, c1 + 1, b, k);\n#endif\n\n#define v2 (t+2*k+1)\n\n  /* compute v2 := (a0+2a1+4a2)*(b0+2b1+4b2) in {t+2k+1, 2k+1}\n     v2 < 49*B^k so v2 uses at most 2k+1 limbs if GMP_NUMB_BITS >= 6 */\n  TOOM3_MUL_REC (v2, c, c1 + 1, k1, trec);\n\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1        v2\n  */\n\n  c2[0] = c20; /* restore c2[0] and c2[1] */\n  c2[1] = c21;\n\n  /* compute v0 := a0*b0 in {c, 2k} */\n  TOOM3_MUL_REC (c, a, b, k, trec);\n\n /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\tv0 \t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1        v2\n  */\n\n#define vinf (c+4*k)\n\n  /* compute vinf := a2*b2 in {c4, r + r2},\n  */\n  saved = c4[0];\n\n  if (r == r2) TOOM3_MUL_REC (c4, a + twok, b + twok, r, trec);\n  else if (r > r2) mpn_mul(c4, a + twok, r, b + twok, r2);\n  else mpn_mul(c4, b + twok, r2, a + twok, r);\n\n  vinf0 = c4[0];\n  c4[0] = saved;\n\n /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\tv0 \t\tv1        {-}vinf\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1        v2\n\n\t  vinf0 = {-}\n  */\n\n  mpn_toom3_interpolate (c, c2, v2, t, vinf, k, rr2, sa, vinf0, t4+2);\n\n#undef v2\n#undef vinf\n}\n\nvoid\nmpn_toom42_mul (mp_ptr c, mp_srcptr a, mp_size_t an, \n                                      mp_srcptr b, mp_size_t bn, mp_ptr t)\n{\n  mp_size_t k, k1, kk1, r, r2, twok, threek, rr2, n1, n2;\n  mp_limb_t cy, cc, saved, vinf0, c20, c21;\n  mp_ptr trec;\n  int sa, sb;\n  mp_ptr c1, c2, c3, c4, t1, t2, t3, t4;\n\n  ASSERT(GMP_NUMB_BITS >= 6);\n\n  k = (an + 3) / 4; /* ceil(an/4) */\n  ASSERT(bn > k);\n  ASSERT(bn <= 2*k);\n  ASSERT(an >= 20);\n  \n  twok = 2 * k;\n  threek = 3 * k;\n  k1 = k + 1;\n  kk1 = k + k1;\n  r = an - threek;   /* last chunk */\n  r2 = bn - k;   /* last chunk */\n  rr2 = r + r2;\n\n  c1 = c + k;\n  c2 = c1 + k;\n  c3 = c2 + k;\n  c4 = c3 + k;\n  \n  t1 = t + k;\n  t2 = t1 + k;\n  t3 = t2 + k;\n  t4 = t3 + k;\n\n  trec = t + 4 * k + 4; \n\n  /* put a0+a2 in {t, k+1}, and b0+b1 in {t1 + 1, k+1};\n     put a1+a3 in {t3+3, k+1}, put a0+a1+a2+a3 in {t2 + 2, k+1} \n  */\n  t[k] = mpn_add_n (t, a, a + twok, k);\n  \n  t4[3] = mpn_add_n (t3 + 3, a + k, a + threek, r);\n  if (k > r)\n     t4[3] = mpn_add_1(t3 + r + 3, a + k + r, k - r, t4[3]);\n\n  mpn_add_n (t2 + 2, t, t3 + 3, k1);\n\n  t2[1] = mpn_add_n (t1 + 1, b, b + k, r2);\n  if (k > r2)\n\t  t2[1] = mpn_add_1(t1 + 1 + r2, b + r2, k - r2, t2[1]);\n\n  /* compute v1 := (a0+a1+a2+a3)*(b0+b1) in {c2, 2k+1};\n     since v1 < 6*B^(2k), v1 uses only 2k+1 words if GMP_NUMB_BITS >= 3 */\n  TOOM3_MUL_REC (c2, t2 + 2, t1 + 1, k1, trec);\n\n  ASSERT(c2[k+k] < 6);\n\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n  */\n\n  /* put |a0-a1+a2-a3| in {t2 + 2, k+1} and |b0-b1| in {t3 + 3,k+1} */\n  /* sa = sign(a0-a1+a2-a3) */\n  /* sb = sign(b0-b1) */\n  sa = mpn_cmp (t, t3 + 3, k1);\n  if (sa >= 0) mpn_sub_n (t2 + 2, t, t3 + 3, k1);\n  else mpn_sub_n (t2 + 2, t3 + 3, t, k1);\n\n  n1 = k;\n  n2 = r2;\n  MPN_NORMALIZE(b, n1);\n  MPN_NORMALIZE(b+k, n2);\n  if (n1 != n2) sb = (n1 > n2) ? 1 : -1;\n  else sb = mpn_cmp (b, b + k, n2);\n\n  if (sb >= 0) \n  {\n\t  t4[3] = mpn_sub_n (t3 + 3, b, b + k, r2);\n\t  if (k > r2) t4[3] = -mpn_sub_1(t3 + 3 + r2, b + r2, k - r2, t4[3]);\n  } else\n  {\n\t  mpn_sub_n (t3 + 3, b + k, b, r2);\n\t  MPN_ZERO(t3 + r2 + 3, k1 - r2);\n  }\n  sa *= sb; /* sign of vm1 */\n\n  /* compute vm1 := (a0-a1+a2-a3)*(b0-b1) in {t, 2k+1};\n     since |vm1| < 2*B^(2k), vm1 uses only 2k+1 limbs */\n  TOOM3_MUL_REC (t, t2 + 2, t3 + 3, k1, trec);\n\n  ASSERT(t[k+k] < 2);\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1\n  */\n\n  c20 = c2[0]; /* save c2[0] and c2[1] giving space 2k+2 at c */\n  c21 = c2[1];\n\n  /* \n     compute a0+2a1+4a2+8a3 in {c, k+1} and b0+2b1 in {c1 + 1, k+1}\n  */\n#if HAVE_NATIVE_mpn_addlsh1_n\n  c1[0] = mpn_addlsh1_n (c, a + twok, a + threek, r);\n  if (r < k)\n    {\n      c1[0] = mpn_add_1 (c + r, a + twok + r, k - r, c1[0]);\n    }\n  c1[0] = 2 * c1[0] + mpn_addlsh1_n (c, a + k, c, k);\n  c1[0] = 2 * c1[0] + mpn_addlsh1_n (c, a, c, k);\n  \n  c2[1] = mpn_addlsh1_n (c1 + 1, b, b + k, r2);\n  if (r2 < k)\n    {\n\t\t c2[1] = mpn_add_1(c1 + 1 + r2, b + r2, k - r2, c2[1]);\n    }\n#else\n  c[r] = mpn_lshift1 (c, a + threek, r);\n  if (r < k)\n    {\n      MPN_ZERO(c + r + 1, k - r);\n    }\n  c1[0] += mpn_add_n (c, c, a + twok, k);\n  mpn_double (c, k1);\n  c1[0] += mpn_add_n (c, c, a + k, k);\n  mpn_double (c, k1);\n  c1[0] += mpn_add_n (c, c, a, k);\n  \n  c1[r2 + 1] = mpn_lshift1 (c1 + 1, b + k, r2);\n  if (r2 < k)\n    {\n      MPN_ZERO(c1 + r2 + 2, k - r2);\n    }\n  c2[1] += mpn_add_n (c1 + 1, c1 + 1, b, k);\n#endif\n\n#define v2 (t+2*k+1)\n\n  /* compute v2 := (a0+2a1+4a2+8a3)*(b0+2b1) in {t+2k+1, 2k+1}\n     v2 < 45*B^k so v2 uses at most 2k+1 limbs if GMP_NUMB_BITS >= 6 */\n  TOOM3_MUL_REC (v2, c, c1 + 1, k1, trec);\n\n  ASSERT(v2[k+k] < 45);\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1        v2\n  */\n\n  c2[0] = c20; /* restore c2[0] and c2[1] */\n  c2[1] = c21;\n\n  /* compute v0 := a0*b0 in {c, 2k} */\n  TOOM3_MUL_REC (c, a, b, k, trec);\n\n /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\tv0 \t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1        v2\n  */\n\n#define vinf (c+4*k)\n\n  /* compute vinf := a3*b1 in {c4, r + r2},\n  */\n  saved = c4[0];\n\n  if (r == r2) TOOM3_MUL_REC (c4, a + threek, b + k, r, trec);\n  else if (r > r2) mpn_mul(c4, a + threek, r, b + k, r2);\n  else mpn_mul(c4, b + k, r2, a + threek, r);\n\n  vinf0 = c4[0];\n  c4[0] = saved;\n\n /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\tv0 \t\tv1        {-}vinf\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1        v2\n\n\t  vinf0 = {-}\n  */\n\n  mpn_toom3_interpolate (c, c2, v2, t, vinf, k, rr2, sa, vinf0, t4+2);\n\n#undef v2\n#undef vinf\n}\n\n/*\n   We have a 3x2 blocked multiplication and therefore the output is of length \n   4 blocks. Therefore we evaluate at the 4 points 0, inf, -1, 1, i.e. we need\n\t(a0*b0), (a2*b1), (a0-a1+a2)*(b0-b1), (a0+a1+a2)*(b0+b1).\n\tThe multiplication will be (2k+r) x (k + r2) and therefore the output has\n\tspace for 3k + rr2 limbs.\n*/\nvoid\nmpn_toom32_mul (mp_ptr c, mp_srcptr a, mp_size_t an, \n                                           mp_srcptr b, mp_size_t bn, mp_ptr t)\n{\n  mp_size_t k, k1, kk1, r, r2, twok, threek, rr2, n1, n2;\n  mp_limb_t cy, cc, saved;\n  mp_ptr trec;\n  int sa, sb;\n  mp_ptr c1, c2, c3, c4, c5, t1, t2, t3, t4;\n\n  ASSERT(GMP_NUMB_BITS >= 6);\n\n  k = (an + 2) / 3; /* ceil(an/3) */\n  ASSERT(bn > k);\n  ASSERT(an >= 20);\n  \n  twok = 2 * k;\n  threek = 3 * k;\n  k1 = k + 1;\n  kk1 = k + k1;\n  r = an - twok;   /* last chunk */\n  r2 = bn - k;   /* last chunk */\n  rr2 = r + r2;\n\n  c1 = c + k;\n  c2 = c1 + k;\n  c3 = c2 + k;\n  c4 = c3 + k;\n  c5 = c4 + k;\n  \n  t1 = t + k;\n  t2 = t1 + k;\n  t3 = t2 + k;\n  t4 = t3 + k;\n\n  trec = t + 3 * k + 3; \n\n  /* put a0+a2 in {t, k+1}, and b0+b1 in {t2 + 2, k+1};\n     put a0+a1+a2 in {t1 + 1, k+1}\n  */\n  cy = mpn_add_n (t, a, a + twok, r);\n  t3[2] = mpn_add_n (t2 + 2, b, b + k, r2);\n  if (r < k)\n    {\n      cy = mpn_add_1 (t + r, a + r, k - r, cy);\n    }\n  if (r2 < k)\n    {\n\t  t3[2] = mpn_add_1 (t2 + 2 + r2, b + r2, k - r2, t3[2]);\n    }\n  t2[1] = (t1[0] = cy) + mpn_add_n (t1 + 1, t, a + k, k);\n  \n  /* compute v1 := (a0+a1+a2)*(b0+b1) in {c1, 2k+1};\n     since v1 < 6*B^(2k), v1 uses only 2k+1 words if GMP_NUMB_BITS >= 3 */\n  TOOM3_MUL_REC (c1, t1 + 1, t2 + 2, k1, trec);\n\n  saved = c1[0];\n\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n  */\n\n  /* put |a0-a1+a2| in {c0, k+1} and |b0-b1| in {t2 + 2,k+1} */\n  /* sa = sign(a0-a1+a2) */\n  /* sb = sign(b0-b1) */\n  sa = (t[k] != 0) ? 1 : mpn_cmp (t, a + k, k);\n  if (sa >= 0) c[k] = t[k] - mpn_sub_n (c, t, a + k, k);\n  else c[k] = -mpn_sub_n (c, a + k, t, k);\n  \n  n1 = k;\n  n2 = r2;\n  MPN_NORMALIZE(b, n1);\n  MPN_NORMALIZE(b+k, n2);\n  if (n1 != n2) sb = (n1 > n2) ? 1 : -1;\n  else sb = mpn_cmp (b, b + k, n2);\n\n  if (sb >= 0) \n  {\n\t  t3[2] = mpn_sub_n (t2 + 2, b, b + k, r2);\n\t  if (k > r2) t3[2] = -mpn_sub_1(t2 + 2 + r2, b + r2, k - r2, t3[2]);\n  } else\n  {\n\t  mpn_sub_n (t2 + 2, b + k, b, r2);\n\t  MPN_ZERO(t2 + r2 + 2, k1 - r2);\n  }\n  \n  sa *= sb; /* sign of vm1 */\n\n  /* compute vm1 := (a0-a1+a2)*(b0-b1) in {t, 2k+1};\n     since |vm1| < 2*B^(2k), vm1 uses only 2k+1 limbs */\n  TOOM3_MUL_REC (t, t2 + 2, c, k1, trec);\n\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1\n  */\n\n  c1[0] = saved; \n\n\n  /* {c,k} {c+k,2k+1} {c+3k+1,r+r2-1} \n\t\t\t\t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 2}\n\t     vm1        \n  */\n\n  /* Compute vm1 <-- (vm1 + v1)/2 (note vm1 + v1 is positive) */\n\n  if (sa > 0)\n  {\n#if HAVE_NATIVE_mpn_rsh1add_n\n     mpn_rsh1add_n(t, t, c1, kk1);\n#else\n\t  mpn_add_n(t, t, c1, kk1);\n\t  mpn_half(t, kk1);\n#endif\n  } else\n  {\n#if HAVE_NATIVE_mpn_rsh1sub_n\n\t  mpn_rsh1sub_n(t, c1, t, kk1);\n#else\n\t  mpn_sub_n(t, c1, t, kk1);\n     mpn_half(t, kk1);\n#endif\n  }\n  \n  /* Compute v1 <-- v1 - vm1 */\n\n  mpn_sub_n(c1, c1, t, kk1);\n\n  /* Note we could technically overflow\n     the end of the output if we add\n\t  everything in place without subtracting\n\t  the right things first. We get around \n\t  this by throwing away any high limbs\n\t  and carries, which must of necessity\n\t  cancel.\n\n\t  First we add vm1 in its place...\n  */\n\n  n1 = kk1;\n  MPN_NORMALIZE(t, n1);\n\n  if (n1 >= k + rr2) /* if > here, high limb of vm1 and carry may be discarded */\n  { \n\t  cy = mpn_add_n(c2, c2, t, k1);\n\t  mpn_add_1(c3 + 1, t + k1, rr2 - 1, cy);\n\t  n2 = threek + rr2;\n  } else \n  {\n\t  c2[k1] = mpn_add_n(c2, c2, t, k1);\n     if (n1 > k1) c2[n1] = mpn_add_1(c3 + 1, t + k1, n1 - k1, c2[k1]); \n\t  n2 = twok + MAX(n1, k1) + 1;\n  } \n\n  /* Compute vinf := a2*b1 in {t, rr2} */\n\n  if (r == r2) TOOM3_MUL_REC (t, a + twok, b + k, r, trec);\n  else if (r > r2) mpn_mul(t, a + twok, r, b + k, r2);\n  else mpn_mul(t, b + k, r2, a + twok, r);\n\n  /* Add vinf into place */\n\n  cy = mpn_add_n(c3, c3, t, n2 - threek);\n  if (rr2 + threek > n2) \n\t  mpn_add_1(c + n2, t + n2 - threek, rr2 + threek - n2, cy);\n\n  /* v1 <-- v1 - vinf */\n\n  cy = mpn_sub_n(c1, c1, t, rr2);\n  if (cy) mpn_sub_1(c1 + rr2, c1 + rr2, twok, cy);\n  \n  /* compute v0 := a0*b0 in {t, 2k} */\n\n  TOOM3_MUL_REC (t, a, b, k, trec);\n\n  /* Add v0 into place */\n\n  MPN_COPY(c, t, k);\n  cy = mpn_add_n(c + k, c + k, t + k, k);\n  if (cy) mpn_add_1(c + twok, c + twok, k + rr2, cy);\n\n  /* vm1 <-- vm1 - v0 */\n\n  if (twok >= k + rr2)\n     mpn_sub_n(c2, c2, t, k + rr2);\n  else\n  {\n\t  cy = mpn_sub_n(c2, c2, t, twok);\n     mpn_sub_1(c4, c4, rr2 + k - twok, cy);\n  }\n}\n\n"
  },
  {
    "path": "mpn/generic/toom3_mul_n.c",
    "content": "/* mpn_toom3_mul and helper functions -- Multiply/square natural numbers.\n\n   THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul_n)\n   ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY SAFE TO REACH\n   THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED\n   THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\n\nCopyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,\n2005, Free Software Foundation, Inc.\n\nCopyright 2009 Jason Moxham\nCopyright 2009 William Hart\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n/******************************************************************************\n *                                                                            *\n *              Toom 3-way multiplication and squaring                        *\n *                                                                            *\n *****************************************************************************/\n#define TOOM3_MUL_REC(p, a, b, n, ws) \\\n  do {\t\t\t\t\t\t\t\t\\\n    if (MUL_TOOM3_THRESHOLD / 3 < MUL_KARATSUBA_THRESHOLD\t\\\n\t&& BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))\t\\\n      mpn_mul_basecase (p, a, n, b, n);\t\t\t\t\\\n    else if (BELOW_THRESHOLD (n, MUL_TOOM3_THRESHOLD))\t\t\\\n      mpn_kara_mul_n (p, a, b, n, ws);\t\t\t\t\\\n    else\t\t\t\t\t\t\t\\\n      mpn_toom3_mul_n (p, a, b, n, ws);\t\t\t\t\\\n  } while (0)\n\n#define TOOM3_SQR_REC(p, a, n, ws)\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\\\n    if (SQR_TOOM3_THRESHOLD / 3 < SQR_BASECASE_THRESHOLD\t\\\n\t&& BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))\t\t\\\n      mpn_mul_basecase (p, a, n, a, n);\t\t\t\t\\\n    else if (SQR_TOOM3_THRESHOLD / 3 < SQR_KARATSUBA_THRESHOLD\t\\\n\t&& BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))\t\\\n      mpn_sqr_basecase (p, a, n);\t\t\t\t\\\n    else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))\t\t\\\n      mpn_kara_sqr_n (p, a, n, ws);\t\t\t\t\\\n    else\t\t\t\t\t\t\t\\\n      mpn_toom3_sqr_n (p, a, n, ws);\t\t\t\t\\\n  } while (0)\n\n/* The necessary temporary space T(n) satisfies T(n)=0 for n < THRESHOLD,\n   and T(n) <= max(2n+2, 6k+3, 4k+3+T(k+1)) otherwise, where k = ceil(n/3).\n\n   Assuming T(n) >= 2n, 6k+3 <= 4k+3+T(k+1).\n   Similarly, 2n+2 <= 6k+2 <= 4k+3+T(k+1).\n\n   With T(n) = 2n+S(n), this simplifies to S(n) <= 9 + S(k+1).\n   Since THRESHOLD >= 17, we have n/(k+1) >= 19/8\n   thus S(n) <= S(n/(19/8)) + 9 thus S(n) <= 9*log(n)/log(19/8) <= 8*log2(n).\n\n   We need in addition 2*r for mpn_sublsh1_n, so the total is at most\n   8/3*n+8*log2(n).\n*/\nvoid\nmpn_toom3_mul_n (mp_ptr c, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr t)\n{\n  mp_size_t k, k1, kk1, r, twok, rr2;\n  mp_limb_t cy, cc, saved, vinf0;\n  mp_ptr trec;\n  int sa, sb;\n  mp_ptr c1, c2, c3, c4, c5, t1, t2, t3, t4;\n\n  ASSERT(GMP_NUMB_BITS >= 6);\n\n  k = (n + 2) / 3; /* ceil(n/3) */\n  ASSERT(GMP_NUMB_BITS >= 6);\n  ASSERT(n >= 17); /* so that r <> 0 and 5k+3 <= 2n */\n\n  twok = 2 * k;\n  k1 = k + 1;\n  kk1 = k + k1;\n  r = n - twok;   /* last chunk */\n  rr2 = 2*r;\n\n  c1 = c + k;\n  c2 = c1 + k;\n  c3 = c2 + k;\n  c4 = c3 + k;\n  c5 = c4 + k;\n  \n  t1 = t + k;\n  t2 = t1 + k;\n  t3 = t2 + k;\n  t4 = t3 + k;\n\n  trec = t + 4 * k + 4; \n\n  /* put a0+a2 in {c, k+1}, and b0+b2 in {c4 + 2, k+1};\n     put a0+a1+a2 in {t2 + 1, k+1} and b0+b1+b2 in {t3 + 2,k+1}\n  */\n  c1[0] = mpn_add_n (c, a, a + twok, r);\n  c5[2] = mpn_add_n (c4 + 2, b, b + twok, r);\n  if (r < k)\n    {\n      c1[0] = mpn_add_1 (c + r, a + r, k - r, c1[0]);\n\t   c5[2] = mpn_add_1 (c4 + 2 + r, b + r, k - r, c5[2]);\n    }\n  t3[1] = c1[0] + mpn_add_n (t2 + 1, c, a + k, k);\n  t4[2] = c5[2] + mpn_add_n (t3 + 2, c4 + 2, b + k, k);\n\n  ASSERT(c1[0] < 2);\n  ASSERT(c5[2] < 2);\n  ASSERT(t3[1] < 3);\n  ASSERT(t4[2] < 3);\n\n  /* compute v1 := (a0+a1+a2)*(b0+b1+b2) in {c2, 2k+1};\n     since v1 < 9*B^(2k), v1 uses only 2k+1 words if GMP_NUMB_BITS >= 4 */\n  TOOM3_MUL_REC (c2, t2 + 1, t3 + 2, k1, trec);\n\n  ASSERT(c2[k+k] < 9);\n\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n  */\n\n  /* put |a0-a1+a2| in {c,k+1} and |b0-b1+b2| in {c4 + 2,k+1} */\n  /* sa = sign(a0-a1+a2) */\n  /* sb = sign(b0-b1+b2) */\n  sa = (c[k] != 0) ? 1 : mpn_cmp (c, a + k, k);\n  c[k] = (sa >= 0) ? c[k] - mpn_sub_n (c, c, a + k, k)\n\t\t   : mpn_sub_n (c, a + k, c, k);\n  /* b0+b2 is in {c4+2, k+1} now */\n  sb = (c5[2] != 0) ? 1 : mpn_cmp (c4 + 2, b + k, k);\n  c5[2] = (sb >= 0) ? c5[2] - mpn_sub_n (c4 + 2, c4 + 2, b + k, k)\n\t\t    : mpn_sub_n (c4 + 2, b + k, c4 + 2, k);\n  \n  ASSERT(c[k] < 2);\n  ASSERT(c5[2] < 2);\n\n  sa *= sb; /* sign of vm1 */\n\n  /* compute vm1 := (a0-a1+a2)*(b0-b1+b2) in {t, 2k+1};\n     since |vm1| < 4*B^(2k), vm1 uses only 2k+1 limbs */\n  TOOM3_MUL_REC (t, c, c4 + 2, k1, trec);\n\n  ASSERT(t[k+k] < 4);\n\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1\n  */\n\n  /* \n     compute a0+2a1+4a2 in {c, k+1} and b0+2b1+4b2 in {c4 + 2, k+1}\n  */\n#if HAVE_NATIVE_mpn_addlsh1_n\n  c1[0] = mpn_addlsh1_n (c, a + k, a + twok, r);\n  c5[2] = mpn_addlsh1_n (c4 + 2, b + k, b + twok, r);\n  if (r < k)\n    {\n      c1[0] = mpn_add_1(c + r, a + k + r, k - r, c1[0]);\n      c5[2] = mpn_add_1(c4 + 2 + r, b + k + r, k - r, c5[2]);\n    }\n  c1[0] = 2 * c1[0] + mpn_addlsh1_n (c, a, c, k);\n  c5[2] = 2 * c5[2] + mpn_addlsh1_n (c4 + 2, b, c4 + 2, k);\n#else\n  c[r] = mpn_lshift1 (c, a + twok, r);\n  c4[r + 2] = mpn_lshift1 (c4 + 2, b + twok, r);\n  if (r < k)\n    {\n      MPN_ZERO(c + r + 1, k - r);\n      MPN_ZERO(c4 + r + 3, k - r);\n    }\n  c1[0] += mpn_add_n (c, c, a + k, k);\n  c5[2] += mpn_add_n (c4 + 2, c4 + 2, b + k, k);\n  mpn_double (c, k1);\n  mpn_double (c4 + 2, k1);\n  c1[0] += mpn_add_n (c, c, a, k);\n  c5[2] += mpn_add_n (c4 + 2, c4 + 2, b, k);\n#endif\n\n  ASSERT(c[k] < 7);\n  ASSERT(c5[2] < 7);\n\n#define v2 (t+2*k+1)\n\n  /* compute v2 := (a0+2a1+4a2)*(b0+2b1+4b2) in {t+2k+1, 2k+1}\n     v2 < 49*B^k so v2 uses at most 2k+1 limbs if GMP_NUMB_BITS >= 6 */\n  TOOM3_MUL_REC (v2, c, c4 + 2, k1, trec);\n\n  ASSERT(v2[k+k] < 49);\n\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1        v2\n  */\n\n  /* compute v0 := a0*b0 in {c, 2k} */\n  TOOM3_MUL_REC (c, a, b, k, trec);\n\n /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\tv0 \t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1        v2\n  */\n\n#define vinf (c+4*k)\n\n  /* compute vinf := a2*b2 in {c4, r + r2},\n  */\n  saved = c4[0];\n\n  TOOM3_MUL_REC (c4, a + twok, b + twok, r, trec);\n  \n  vinf0 = c4[0];\n  c4[0] = saved;\n  \n /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\tv0 \t\tv1        {-}vinf\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1        v2\n\n\t  vinf0 = {-}\n  */\n\n  mpn_toom3_interpolate (c, c2, v2, t, vinf, k, rr2, sa, vinf0, t4+2);\n\n#undef v2\n#undef vinf\n}\n\nvoid\nmpn_toom3_sqr_n (mp_ptr c, mp_srcptr a, mp_size_t n, mp_ptr t)\n{\n  mp_size_t k, k1, kk1, r, twok, rr2;\n  mp_limb_t cy, cc, saved, vinf0;\n  mp_ptr trec;\n  int sa;\n  mp_ptr c1, c2, c3, c4, c5, t1, t2, t3, t4;\n\n  ASSERT(GMP_NUMB_BITS >= 6);\n\n  k = (n + 2) / 3; /* ceil(n/3) */\n  ASSERT(GMP_NUMB_BITS >= 6);\n  ASSERT(n >= 17); /* so that r <> 0 and 5k+3 <= 2n */\n\n  twok = 2 * k;\n  k1 = k + 1;\n  kk1 = k + k1;\n  r = n - twok;   /* last chunk */\n  rr2 = 2*r;\n\n  c1 = c + k;\n  c2 = c1 + k;\n  c3 = c2 + k;\n  c4 = c3 + k;\n  c5 = c4 + k;\n  \n  t1 = t + k;\n  t2 = t1 + k;\n  t3 = t2 + k;\n  t4 = t3 + k;\n\n  trec = t + 4 * k + 3; \n\n  /* put a0+a2 in {c, k+1}\n     put a0+a1+a2 in {t2 + 1, k+1}\n  */\n  cy = mpn_add_n (c, a, a + twok, r);\n  if (r < k)\n    {\n      __GMPN_ADD_1 (cy, c + r, a + r, k - r, cy);\n   }\n  t3[1] = (c1[0] = cy) + mpn_add_n (t2 + 1, c, a + k, k);\n\n  /* compute v1 := (a0+a1+a2)^2 in {c2, 2k+1};\n     since v1 < 9*B^(2k), v1 uses only 2k+1 words if GMP_NUMB_BITS >= 4 */\n  TOOM3_SQR_REC (c2, t2 + 1, k1, trec);\n\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n  */\n\n  /* put |a0-a1+a2| in {c,k+1} */\n  sa = (c[k] != 0) ? 1 : mpn_cmp (c, a + k, k);\n  c[k] = (sa >= 0) ? c[k] - mpn_sub_n (c, c, a + k, k)\n\t\t   : mpn_sub_n (c, a + k, c, k);\n  \n  /* compute vm1 := (a0-a1+a2)^2 in {t, 2k+1};\n     since |vm1| < 4*B^(2k), vm1 uses only 2k+1 limbs */\n  TOOM3_SQR_REC (t, c, k1, trec);\n\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1\n  */\n\n  /* \n     compute a0+2a1+4a2 in {c, k+1}\n  */\n#if HAVE_NATIVE_mpn_addlsh1_n\n  c1[0] = mpn_addlsh1_n (c, a + k, a + twok, r);\n  if (r < k)\n    {\n      __GMPN_ADD_1 (c1[0], c + r, a + k + r, k - r, c1[0]);\n    }\n  c1[0] = 2 * c1[0] + mpn_addlsh1_n (c, a, c, k);\n#else\n  c[r] = mpn_lshift1 (c, a + twok, r);\n  if (r < k)\n    {\n      MPN_ZERO(c + r + 1, k - r);\n    }\n  c1[0] += mpn_add_n (c, c, a + k, k);\n  mpn_double (c, k1);\n  c1[0] += mpn_add_n (c, c, a, k);\n#endif\n\n#define v2 (t+2*k+1)\n\n  /* compute v2 := (a0+2a1+4a2)^2 in {t+2k+1, 2k+1}\n     v2 < 49*B^k so v2 uses at most 2k+1 limbs if GMP_NUMB_BITS >= 6 */\n  TOOM3_SQR_REC (v2, c, k1, trec);\n\n  /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\t\t\t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1        v2\n  */\n\n  /* compute v0 := a0^2 in {c, 2k} */\n  TOOM3_SQR_REC (c, a, k, trec);\n\n /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\tv0 \t\tv1\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1        v2\n  */\n\n#define vinf (c+4*k)\n\n  /* compute vinf := a2*b2 in {c4, r + r2},\n  */\n  saved = c4[0];\n\n  TOOM3_SQR_REC (c4, a + twok, r, trec);\n  \n  vinf0 = c4[0];\n  c4[0] = saved;\n\n /* {c,2k} {c+2k,2k+1} {c+4k+1,r+r2-1} \n\t\tv0 \t\tv1        {-}vinf\n\n\t  {t, 2k+1} {t+2k+1, 2k + 1}\n\t     vm1        v2\n\n\t  vinf0 = {-}\n  */\n\n  mpn_toom3_interpolate (c, c2, v2, t, vinf, k, rr2, 1, vinf0, t4+2);\n\n#undef v2\n#undef vinf\n}\n"
  },
  {
    "path": "mpn/generic/toom42_mulmid.c",
    "content": "/* mpn_toom42_mulmid -- toom42 middle product\n\nCopyright (C) 2009, David Harvey\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/*\n  Let a = sum_0^{m-1} a_i B^i and b = sum_0^{n-1} b_j B^j\n\n  then MP(a, m, b, n) = sum_{0<=i<m, 0<=j<n, n-1<=i+j<=m-1} a_ib_j B^{i+j-n+1}\n\n  Note there are m-n+1 different values of i+j and each product a_ib_j will be two limbs. Thus when added together, the sum must take up n-m+3 limbs of space.\n\n  This function computes the middle product of {ap,2n-1} and {bp,n}, \n  output written to {rp,n+2}, i.e. it computes MP(ap,2n-1,bp,n).\n\n  Neither ap nor bp may overlap rp.\n\n  Must have n >= 4.\n\n  See mpn_toom42_mulmid_itch() for the amount of scratch space required.\n\n  FIXME: this code assumes that n is small compared to GMP_NUMB_MAX. The exact\n  requirements should be clarified.\n*/\nvoid\nmpn_toom42_mulmid (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n,\n                   mp_ptr scratch)\n{\n  mp_limb_t cy, e[12], t0, t1;\n  mp_size_t m;\n  int neg;\n\n  ASSERT (n >= 4);\n  ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));\n  ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));\n\n  ap += n & 1;   /* handle odd row and diagonal later */\n  m = n / 2;\n\n  /* (e0h:e0l) etc are correction terms, in 2's complement */\n#define e0l (e[0])\n#define e0h (e[1])\n#define e1l (e[2])\n#define e1h (e[3])\n#define e2l (e[4])\n#define e2h (e[5])\n#define e3l (e[6])\n#define e3h (e[7])\n#define e4l (e[8])\n#define e4h (e[9])\n#define e5l (e[10])\n#define e5h (e[11])\n\n#define s scratch\n#define d rp\n#define p0 rp\n#define p1 (scratch + 2*m - 1)\n#define p2 (rp + m)\n#define rec_scratch (scratch + 3*m + 1)\n\n  /*\n            rp                            scratch\n  |---------|-----------|    |---------|---------|----------|\n  0         m         2m+2   0         m         2m        3m+1\n  <----d---><---p2----->     <-------------s------------->\n  <----p0---->                                  <----p1---->\n  */\n\n  /* transposed interpolation, and compute correction terms */\n  cy = mpn_add_err1_n (s, ap, ap + m, &e0l, bp + m, m - 1, 0);\n  cy = mpn_add_err2_n (s + m - 1, ap + m - 1, ap + 2*m - 1, &e1l,\n\t\t       bp + m, bp, m, cy);\n  mpn_add_err1_n (s + 2*m - 1, ap + 2*m - 1, ap + 3*m - 1, &e3l, bp, m, cy);\n\n  if (mpn_cmp (bp + m, bp, m) < 0)\n    {\n      ASSERT_NOCARRY (mpn_sub_err2_n (d, bp, bp + m, &e4l,\n\t\t\t\t      ap + m - 1, ap + 2*m - 1, m, 0));\n      neg = 1;\n    }\n  else\n    {\n      ASSERT_NOCARRY (mpn_sub_err2_n (d, bp + m, bp, &e4l,\n\t\t\t\t      ap + m - 1, ap + 2*m - 1, m, 0));\n      neg = 0;\n    }\n\n  /* recursive middle products. The picture is:\n\n      b[2m-1]   A   A   A   B   B   B   -   -   -   -   -\n      ...       -   A   A   A   B   B   B   -   -   -   -\n      b[m]      -   -   A   A   A   B   B   B   -   -   -\n      b[m-1]    -   -   -   C   C   C   D   D   D   -   -\n      ...       -   -   -   -   C   C   C   D   D   D   -\n      b[0]      -   -   -   -   -   C   C   C   D   D   D\n               a[0]   ...  a[m]  ...  a[2m]    ...    a[4m-2]\n  */\n\n  if (m < MULMID_TOOM42_THRESHOLD)\n    {\n      /* C + D */\n      mpn_mulmid_basecase (p2, s + m, 2*m - 1, bp, m);\n      t0 = p2[0], t1 = p2[1];    /* save first two limbs of p2 */\n      /* B - C  (or C - B if neg == 1) */\n      mpn_mulmid_basecase (p1, ap + m, 2*m - 1, d, m);\n      /* A + B */\n      mpn_mulmid_basecase (p0, s, 2*m - 1, bp + m, m);\n    }\n  else\n    {\n      /* as above, but call toom42 instead */\n      mpn_toom42_mulmid (p2, s + m, bp, m, rec_scratch);\n      t0 = p2[0], t1 = p2[1];\n      mpn_toom42_mulmid (p1, ap + m, d, m, rec_scratch);\n      mpn_toom42_mulmid (p0, s, bp + m, m, rec_scratch);\n    }\n\n  /* apply correction terms */\n\n  /* -e0 at p0[0] */\n  SUBC_LIMB (cy, p0[0], p0[0], e0l);\n  e0h += cy;\n  SUBC_LIMB (cy, p0[1], p0[1], e0h);\n  if (UNLIKELY (cy))\n    e2h += mpn_sub_1 (p0 + 2, p0 + 2, m - 1, 1);\n\n  /* absorb t0 into e1 */\n  ADDC_LIMB (cy, e1l, e1l, t0);\n  e1h += cy;\n\n  /* e1 at p0[m] */\n  ADDC_LIMB (cy, p0[m], p0[m], e1l);\n  p0[m + 1] += e1h + cy;\n\n  /* add back t1 */\n  ADDC_LIMB (cy, p2[1], p2[1], t1);\n  if (UNLIKELY (cy))\n    mpn_add_1 (p2 + 2, p2 + 2, m, 1);\n\n  /* -e2 at p2[0] */\n  SUBC_LIMB (cy, p2[0], p2[0], e2l);\n  e2h += cy;\n  SUBC_LIMB (cy, p2[1], p2[1], e2h);\n  if (UNLIKELY (cy))\n    mpn_sub_1 (p2 + 2, p2 + 2, m, 1);\n\n  /* e3 at p2[m] */\n  ADDC_LIMB (cy, p2[m], p2[m], e3l);\n  p2[m + 1] = (p2[m + 1] + e3h + cy) & GMP_NUMB_MASK;\n\n  /* e4 at p1[0] */\n  ADDC_LIMB (cy, p1[0], p1[0], e4l);\n  e4h += cy;\n  ADDC_LIMB (cy, p1[1], p1[1], e4h);\n  if (UNLIKELY (cy))\n    mpn_add_1 (p1 + 2, p1 + 2, m, 1);\n\n  /* -e5 at p1[m] */\n  SUBC_LIMB (cy, p1[m], p1[m], e5l);\n  p1[m + 1] = (p1[m + 1] - e5h - cy) & GMP_NUMB_MASK;\n\n  /* adjustment if p1 ends up negative */\n  cy = (p1[m + 1] & GMP_NUMB_HIGHBIT) != 0;\n\n  /* transposed evaluation */\n  if (neg)\n    {\n      mpn_sub_1 (rp + m + 2, rp + m + 2, m, cy);\n      mpn_add (rp, rp, 2*m + 2, p1, m + 2);             /* A + C */\n      mpn_sub_n (rp + m, rp + m, p1, m + 2);            /* B + D */\n    }\n  else\n    {\n      mpn_add_1 (rp + m + 2, rp + m + 2, m, cy);\n      mpn_sub (rp, rp, 2*m + 2, p1, m + 2);             /* A + C */\n      mpn_add_n (rp + m, rp + m, p1, m + 2);            /* B + D */\n    }\n\n  /* odd row and diagonal */\n  if (n & 1)\n    {\n      /*\n        Cells marked E are already done. We need to do cells marked O.\n\n        OOOOO----\n        -EEEEO---\n        --EEEEO--\n        ---EEEEO-\n        ----EEEEO\n       */\n\n      /* first row of O's */\n      cy = mpn_addmul_1 (rp, ap - 1, n, bp[n - 1]);\n      ADDC_LIMB (rp[n + 1], rp[n], rp[n], cy);\n\n      /* O's on diagonal */\n      /* FIXME: should probably define an interface \"mpn_mulmid_diag_1\"\n         that can handle the sum below. Currently we're relying on\n         mulmid_basecase being pretty fast for a diagonal sum like this,\n\t which is true at least for the K8 asm verion, but surely false\n\t for the generic version. */\n      mpn_mulmid_basecase (e, ap + n - 1, n - 1, bp, n - 1);\n      mpn_add_n (rp + n - 1, rp + n - 1, e, 3);\n    }\n}\n"
  },
  {
    "path": "mpn/generic/toom4_mul.c",
    "content": "/* mpn_toom4_mul -- Internal routine to multiply two natural numbers\n   using unbalanced toom4.\n\n   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.\n*/\n\n/* Implementation of the Bodrato-Zanoni algorithm for Toom-Cook 4-way.\n\nCopyright 2001, 2002, 2004, 2005, 2006 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/*\n   This implementation is based on that of Paul Zimmmermann, which is available\n\tfor mpz_t's at http://www.loria.fr/~zimmerma/software/toom4.c\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#define MUL_TC4_UNSIGNED(r3xx, n3xx, r1xx, n1xx, r2xx, n2xx) \\\n   do \\\n   { \\\n      if ((n1xx != 0) && (n2xx != 0)) \\\n      { mp_size_t len; \\\n\t      if (n1xx == n2xx) \\\n\t\t   { \\\n\t\t\t   if (n1xx > MUL_TOOM4_THRESHOLD) mpn_toom4_mul_n(r3xx, r1xx, r2xx, n1xx); \\\n            else mpn_mul_n(r3xx, r1xx, r2xx, n1xx); \\\n\t\t   } else if (n1xx > n2xx) \\\n\t\t      mpn_mul(r3xx, r1xx, n1xx, r2xx, n2xx); \\\n\t\t   else \\\n\t\t      mpn_mul(r3xx, r2xx, n2xx, r1xx, n1xx); \\\n\t      len = n1xx + n2xx; \\\n\t\t   MPN_NORMALIZE(r3xx, len); \\\n\t\t   n3xx = len; \\\n      } else \\\n         n3xx = 0; \\\n   } while (0)\n\n#define MUL_TC4(r3xx, n3xx, r1xx, n1xx, r2xx, n2xx) \\\n\tdo \\\n\t{ \\\n\t   mp_size_t sign = n1xx ^ n2xx; \\\n\t   mp_size_t un1 = ABS(n1xx); \\\n\t   mp_size_t un2 = ABS(n2xx); \\\n\t\tMUL_TC4_UNSIGNED(r3xx, n3xx, r1xx, un1, r2xx, un2); \\\n\t\tif (sign < 0) n3xx = -n3xx; \\\n\t} while (0)\n\n#define TC4_NORM(rxx, nxx, sxx) \\\n\tdo \\\n\t{ \\\n\t   nxx = sxx; \\\n\t   MPN_NORMALIZE(rxx, nxx); \\\n\t} while(0)\n\n/* Zero out limbs to end of integer */\n#define TC4_DENORM(rxx, nxx, sxx) \\\n\tdo { \\\n\tMPN_ZERO(rxx + ABS(nxx), sxx - ABS(nxx)); \\\n\t} while (0)\n\n/* Two's complement divexact by power of 2 */\n#define TC4_DIVEXACT_2EXP(rxx, nxx, sxx) \\\n\tdo { \\\n\t   mp_limb_t sign = (LIMB_HIGHBIT_TO_MASK(rxx[nxx-1]) << (GMP_LIMB_BITS - sxx)); \\\n      mpn_rshift(rxx, rxx, nxx, sxx); \\\n\t   rxx[nxx-1] |= sign; \\\n\t} while (0)\n\n#if HAVE_NATIVE_mpn_rshift1\n#define TC4_RSHIFT1(rxx, nxx) \\\n\tdo { \\\n\t   mp_limb_t sign = (LIMB_HIGHBIT_TO_MASK(rxx[nxx-1]) << (GMP_LIMB_BITS - 1)); \\\n       mpn_half(rxx, nxx); \\\n\t   rxx[nxx-1] |= sign; \\\n\t} while (0)\n#else\n#define TC4_RSHIFT1(rxx, nxx) \\\n\tdo { \\\n\t   mp_limb_t sign = (LIMB_HIGHBIT_TO_MASK(rxx[nxx-1]) << (GMP_LIMB_BITS - 1)); \\\n       mpn_rshift(rxx, rxx, nxx, 1); \\\n\t   rxx[nxx-1] |= sign; \\\n\t} while (0)\n#endif\n\n#define r1 (tp)\n#define r2 (tp + t4)\n#define r4 (tp + 2*t4)\n#define r6 (tp + 3*t4)\n\n#define r3 (rp + 4*sn)\n#define r5 (rp + 2*sn)\n#define r7 (rp)\n\n#define mpn_clearit(rxx, nxx) \\\n  do { \\\n     mp_size_t ind = 0; \\\n     for ( ; ind < nxx; ind++) \\\n        (rxx)[ind] = CNST_LIMB(0); \\\n  } while (0)\n\n/* Multiply {up, un} by {vp, vn} and write the result to\n   {prodp, un + vn} assuming vn > 3*ceil(un/4).\n\n   Note that prodp gets un + vn limbs stored, even if the actual \n   result only needs un + vn - 1.\n*/\nvoid\nmpn_toom4_mul (mp_ptr rp, mp_srcptr up, mp_size_t un,\n\t\t          mp_srcptr vp, mp_size_t vn)\n{\n  mp_size_t ind;\n  mp_limb_t cy, cy2, r30, r31;\n  mp_ptr tp;\n  mp_size_t sn, n1, n2, n3, n4, n5, n6, n7, n8, rpn, t4, h1, h2;\n  TMP_DECL;\n\n  sn = (un + 3) / 4;\n\n  h1 = un - 3*sn;\n  h2 = vn - 3*sn;\n\n  ASSERT (vn > 3*sn);\n  \n#define a0 (up)\n#define a1 (up + sn)\n#define a2 (up + 2*sn)\n#define a3 (up + 3*sn)\n#define b0 (vp)\n#define b1 (vp + sn)\n#define b2 (vp + 2*sn)\n#define b3 (vp + 3*sn)\n\n   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs\n\n   TMP_MARK;\n\n   tp = TMP_ALLOC_LIMBS(4*t4 + 5*(sn + 1));\n\n#define u2 (tp + 4*t4)\n#define u3 (tp + 4*t4 + (sn+1))\n#define u4 (tp + 4*t4 + 2*(sn+1))\n#define u5 (tp + 4*t4 + 3*(sn+1))\n#define u6 (tp + 4*t4 + 4*(sn+1))\n\n   u6[sn] = mpn_add(u6, a1, sn, a3, h1);\n   u5[sn] = mpn_add_n(u5, a2, a0, sn);\n   mpn_add_n(u3, u5, u6, sn + 1);\n   n4 = sn + 1;\n   if (mpn_cmp(u5, u6, sn + 1) >= 0)\n      mpn_sub_n(u4, u5, u6, sn + 1);\n   else\n   {  \n      mpn_sub_n(u4, u6, u5, sn + 1);\n      n4 = -n4;\n   }\n\n   u6[sn] = mpn_add(u6, b1, sn, b3, h2);\n   u5[sn] = mpn_add_n(u5, b2, b0, sn);\n   mpn_add_n(r2, u5, u6, sn + 1);\n   n5 = sn + 1;\n   if (mpn_cmp(u5, u6, sn + 1) >= 0)\n      mpn_sub_n(u5, u5, u6, sn + 1);\n   else\n   {  \n      mpn_sub_n(u5, u6, u5, sn + 1);\n      n5 = -n5;\n   }\n \n   MUL_TC4_UNSIGNED(r3, n3, u3, sn + 1, r2, sn + 1); /* 1 */\n   MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */\n   \n#if HAVE_NATIVE_mpn_addlsh_n\n   r1[sn] = mpn_addlsh_n(r1, a2, a0, sn, 2);\n   mpn_lshift(r1, r1, sn + 1, 1);\n   cy = mpn_addlsh_n(r2, a3, a1, h1, 2);\n#else\n   r1[sn] = mpn_lshift(r1, a2, sn, 1);\n   MPN_COPY(r2, a3, h1);\n   r1[sn] += mpn_addmul_1(r1, a0, sn, 8);\n   cy = mpn_addmul_1(r2, a1, h1, 4);\n#endif\n   if (sn > h1) \n   {\n      cy2 = mpn_lshift(r2 + h1, a1 + h1, sn - h1, 2);\n      cy = cy2 + mpn_add_1(r2 + h1, r2 + h1, sn - h1, cy);\n   }\n   r2[sn] = cy;\n   mpn_add_n(u5, r1, r2, sn + 1);\n   n6 = sn + 1;\n   if (mpn_cmp(r1, r2, sn + 1) >= 0)\n      mpn_sub_n(u6, r1, r2, sn + 1);\n   else\n   {  \n      mpn_sub_n(u6, r2, r1, sn + 1);\n      n6 = -n6;\n   }\n \n#if HAVE_NATIVE_mpn_addlsh_n\n   r1[sn] = mpn_addlsh_n(r1, b2, b0, sn, 2);\n   mpn_lshift(r1, r1, sn + 1, 1);\n   cy = mpn_addlsh_n(r2, b3, b1, h2, 2);\n#else\n   r1[sn] = mpn_lshift(r1, b2, sn, 1);\n   MPN_COPY(r2, b3, h2);\n   r1[sn] += mpn_addmul_1(r1, b0, sn, 8);\n   cy = mpn_addmul_1(r2, b1, h2, 4);\n#endif\n   if (sn > h2) \n   {\n      cy2 = mpn_lshift(r2 + h2, b1 + h2, sn - h2, 2);\n      cy = cy2 + mpn_add_1(r2 + h2, r2 + h2, sn - h2, cy);\n   }\n   r2[sn] = cy;\n   mpn_add_n(u2, r1, r2, sn + 1);\n   n8 = sn + 1;\n   if (mpn_cmp(r1, r2, sn + 1) >= 0)\n      mpn_sub_n(r2, r1, r2, sn + 1);\n   else\n   {  \n      mpn_sub_n(r2, r2, r1, sn + 1);\n      n8 = -n8;\n   }\n    \n   r30 = r3[0];\n   r31 = r3[1];\n   MUL_TC4_UNSIGNED(r5, n5, u5, sn + 1, u2, sn + 1); /* 1/2 */\n   MUL_TC4(r6, n6, u6, n6, r2, n8); /* -1/2 */\n   r3[1] = r31;\n\n#if HAVE_NATIVE_mpn_addlsh1_n\n   cy = mpn_addlsh1_n(u2, a2, a3, h1);\n   if (sn > h1)\n      cy = mpn_add_1(u2 + h1, a2 + h1, sn - h1, cy); \n   u2[sn] = cy;\n   u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a1, u2, sn);     \n   u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a0, u2, sn);     \n#else\n   MPN_COPY(u2, a0, sn);\n   u2[sn] = mpn_addmul_1(u2, a1, sn, 2);\n   u2[sn] += mpn_addmul_1(u2, a2, sn, 4);\n   cy = mpn_addmul_1(u2, a3, h1, 8);\n   if (sn > h1) cy = mpn_add_1(u2 + h1, u2 + h1, sn - h1, cy);\n   u2[sn] += cy;\n#endif\n\n#if HAVE_NATIVE_mpn_addlsh1_n\n   cy = mpn_addlsh1_n(r1, b2, b3, h2);\n   if (sn > h2)\n      cy = mpn_add_1(r1 + h2, b2 + h2, sn - h2, cy); \n   r1[sn] = cy;\n   r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b1, r1, sn);     \n   r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b0, r1, sn);     \n#else\n   MPN_COPY(r1, b0, sn);\n   r1[sn] = mpn_addmul_1(r1, b1, sn, 2);\n   r1[sn] += mpn_addmul_1(r1, b2, sn, 4);\n   cy = mpn_addmul_1(r1, b3, h2, 8);\n   if (sn > h2) cy = mpn_add_1(r1 + h2, r1 + h2, sn - h2, cy);\n   r1[sn] += cy;\n#endif\n   \n   MUL_TC4_UNSIGNED(r2, n2, u2, sn + 1, r1, sn + 1); /* 2 */\n   \n   MUL_TC4_UNSIGNED(r1, n1, a3, h1, b3, h2); /* oo */\n   MUL_TC4_UNSIGNED(r7, n7, a0, sn, b0, sn); /* 0 */\n\n   TC4_DENORM(r1, n1, t4 - 1);\n\n/*\trp        rp1          rp2           rp3          rp4           rp5         rp6           rp7\n<----------- r7-----------><------------r5-------------->            \n                                                       <-------------r3------------->\n\n              <-------------r6------------->                        < -----------r2------------>{           }\n                                         <-------------r4-------------->         <--------------r1---->\n*/\n\n   mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);\n\n   if (rpn != un + vn) \n   {\n\t  MPN_ZERO((rp + rpn), un + vn - rpn);\n   }\n\n   TMP_FREE;\n}\n\n/* Multiply {up, un} by {vp, vn} and write the result to\n   {prodp, un + vn} assuming vn > 2*ceil(un/5).\n\n   Note that prodp gets un + vn limbs stored, even if the actual \n   result only needs un + vn - 1.\n*/\nvoid\nmpn_toom53_mul (mp_ptr rp, mp_srcptr up, mp_size_t un,\n\t\t          mp_srcptr vp, mp_size_t vn)\n{\n  mp_size_t ind;\n  mp_limb_t cy, r30, r31;\n  mp_ptr tp;\n  mp_size_t a0n, a1n, a2n, a3n, a4n, b0n, b1n, b2n, sn; \n  mp_size_t n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, rpn, t4;\n\n  sn = (un + 4) / 5;\n\n  ASSERT (vn > 2*sn);\n  \n#define a0 (up)\n#define a1 (up + sn)\n#define a2 (up + 2*sn)\n#define a3 (up + 3*sn)\n#define a4 (up + 4*sn)\n#define b0 (vp)\n#define b1 (vp + sn)\n#define b2 (vp + 2*sn)\n\n   TC4_NORM(a0, a0n, sn);\n   TC4_NORM(a1, a1n, sn);\n   TC4_NORM(a2, a2n, sn);\n   TC4_NORM(a3, a3n, sn);\n   TC4_NORM(a4, a4n, un - 4*sn); \n   TC4_NORM(b0, b0n, sn);\n   TC4_NORM(b1, b1n, sn);\n   TC4_NORM(b2, b2n, vn - 2*sn); \n\n   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs\n\n   tp = __GMP_ALLOCATE_FUNC_LIMBS(4*t4 + 4*(sn + 1));\n\n#define u2 (tp + 4*t4)\n#define u3 (tp + 4*t4 + (sn+1))\n#define u4 (tp + 4*t4 + 2*(sn+1))\n#define u5 (tp + 4*t4 + 3*(sn+1))\n\n   tc4_add_unsigned(u2, &n2, a3, a3n, a1, a1n); \n   tc4_add_unsigned(u5, &n5, a2, a2n, a0, a0n); \n   tc4_add_unsigned(u5, &n5, u5, n5, a4, a4n); \n   tc4_add_unsigned(u3, &n3, u5, n5, u2, n2); \n   tc4_sub(u4, &n4, u5, n5, u2, n2);\n\n   tc4_add_unsigned(u5, &n5, b2, b2n, b0, b0n);\n   tc4_add_unsigned(r2, &n8, u5, n5, b1, b1n); \n   tc4_sub(u5, &n5, u5, n5, b1, b1n);\n\n   MUL_TC4_UNSIGNED(r3, n3, u3, n3, r2, n8); /* 1 */\n   MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */\n   \n   tc4_lshift(r1, &n1, a0, a0n, 4);\n   tc4_lshift(u3, &n9, a2, a2n, 2);\n   tc4_add_unsigned(r1, &n1, r1, n1, u3, n9);\n   tc4_add_unsigned(r1, &n1, r1, n1, a4, a4n);\n   tc4_lshift(r2, &n8, a1, a1n, 3);\n   tc4_addlsh1_unsigned(r2, &n8, a3, a3n);\n   tc4_add_unsigned(u5, &n5, r1, n1, r2, n8);\n   tc4_sub(u3, &n9, r1, n1, r2, n8);\n\n   tc4_lshift(r1, &n1, b0, b0n, 2);\n   tc4_add_unsigned(r1, &n1, r1, n1, b2, b2n);\n   tc4_lshift(u4, &n10, b1, b1n, 1);\n   tc4_add_unsigned(u2, &n2, r1, n1, u4, n10);\n   tc4_sub(r2, &n8, r1, n1, u4, n10);\n   \n   r30 = r3[0];\n   if (!n3) r30 = CNST_LIMB(0);\n   r31 = r3[1];\n   MUL_TC4_UNSIGNED(r5, n5, u5, n5, u2, n2); /* 1/2 */\n   MUL_TC4(r6, n6, u3, n9, r2, n8); /* -1/2 */\n   r3[1] = r31;\n\n   tc4_lshift(u2, &n2, a4, a4n, 4);\n   tc4_addmul_1(u2, &n2, a3, a3n, 8);\n   tc4_addmul_1(u2, &n2, a2, a2n, 4);\n   tc4_addlsh1_unsigned(u2, &n2, a1, a1n);\n   tc4_add(u2, &n2, u2, n2, a0, a0n);\n\n   tc4_lshift(r1, &n1, b2, b2n, 2);\n   tc4_addlsh1_unsigned(r1, &n1, b1, b1n);\n   tc4_add(r1, &n1, r1, n1, b0, b0n);\n   \n   MUL_TC4_UNSIGNED(r2, n2, u2, n2, r1, n1); /* 2 */\n\n   MUL_TC4_UNSIGNED(r1, n1, a4, a4n, b2, b2n); /* oo */\n   MUL_TC4_UNSIGNED(r7, n7, a0, a0n, b0, b0n); /* 0 */\n\n   TC4_DENORM(r1, n1,  t4 - 1);\n   TC4_DENORM(r2, n2,  t4 - 1);\n   if (n3)\n     TC4_DENORM(r3, n3,  t4 - 1); \n   else {\n     /* MPN_ZERO defeats gcc 4.1.2 here, hence the explicit for loop */\n     for (ind = 1 ; ind < t4 - 1; ind++) \n        (r3)[ind] = CNST_LIMB(0); \n   }\n   TC4_DENORM(r4, n4,  t4 - 1);\n   TC4_DENORM(r5, n5,  t4 - 1);\n   TC4_DENORM(r6, n6,  t4 - 1);\n   TC4_DENORM(r7, n7,  t4 - 2); // we treat r7 differently (it cannot exceed t4-2 in length)\n\n/*\trp        rp1          rp2           rp3          rp4           rp5         rp6           rp7\n<----------- r7-----------><------------r5-------------->            \n                                                       <-------------r3------------->\n\n              <-------------r6------------->                        < -----------r2------------>{           }\n                                         <-------------r4-------------->         <--------------r1---->\n*/\n\n   mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);\n\n   if (rpn != un + vn) \n   {\n\t  MPN_ZERO((rp + rpn), un + vn - rpn);\n   }\n\n   __GMP_FREE_FUNC_LIMBS (tp, 4*t4 + 4*(sn+1));\n}\n\n/*\n   Toom 4 interpolation. Interpolates the value at 2^(sn*B) of a \n\tpolynomial p(x) with 7 coefficients given the values \n\tp(oo), p(2), p(1), p(-1), 2^6*p(1/2), 2^6*p(-1/2), p(0).\n\tThe output is placed in rp and the final number of limbs of the\n\toutput is given in rpn.\n\tThe 4th and 6th values may be negative, and if so, n4 and n6 \n\tshould be set to a negative value respectively.\n   To save space we pass r3, r5, r7 in place in the output rp.\n\tThe other r's are stored separately in space tp.\n\tThe low limb of r3 is stored in r30, as it will be overwritten\n\tby the high limb of r5.\n\nrp          rp1          rp2           rp3          rp4           rp5         rp6           rp7\n<----------- r7-----------><------------r5-------------->            \n                                                      <-------------r3------------->\n\n   We assume that r1 is stored at tp, r2 at (tp + t4), r4 at (tp + 2*t4) \n\tand r6 (tp + 3*t4). Each of these r's has t4 = s4 + 1 limbs allocated.\n*/\n"
  },
  {
    "path": "mpn/generic/toom4_mul_n.c",
    "content": "/* mpn_toom4_mul_n -- Internal routine to multiply two natural numbers\n   of length n.\n\n   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.\n*/\n\n/* Implementation of the Bodrato-Zanoni algorithm for Toom-Cook 4-way.\n\nCopyright 2001, 2002, 2004, 2005, 2006 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/*\n   This implementation is based on that of Paul Zimmmermann, which is available\n\tfor mpz_t's at http://www.loria.fr/~zimmerma/software/toom4.c\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nvoid\nmpn_toom4_mul_n (mp_ptr rp, mp_srcptr up,\n\t\t          mp_srcptr vp, mp_size_t n);\n\nvoid _tc4_add(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, mp_size_t r1n, \n                                                 mp_srcptr r2, mp_size_t r2n)\n{\n   mp_limb_t cy;\n   mp_size_t s1 = ABS(r1n);\n   mp_size_t s2 = ABS(r2n);\n   \n   if (!s1)\n   {\n      *rn = 0;\n   } else if (!s2)\n   {\n      if (rp != r1) MPN_COPY(rp, r1, s1);\n\t\t*rn = r1n;\n   } else if ((r1n ^ r2n) >= 0)\n   {\n      *rn = r1n;\n      cy = mpn_add(rp, r1, s1, r2, s2);\n      if (cy) \n      {\n         rp[s1] = cy;\n         if ((*rn) < 0) (*rn)--;\n         else (*rn)++;\n      }\n   } else\n   {\n      mp_size_t ct;\n\t\tif (s1 != s2) ct = 1;\n\t\telse MPN_CMP(ct, r1, r2, s1); \n\t\t    \n      if (!ct) *rn = 0;\n      else if (ct > 0) \n      {\n         mpn_sub(rp, r1, s1, r2, s2);\n         *rn = s1;\n         MPN_NORMALIZE(rp, (*rn));\n\t\t\tif (r1n < 0) *rn = -(*rn);\n      }\n      else\n      {\n         mpn_sub_n(rp, r2, r1, s1);\n         *rn = s1;\n         MPN_NORMALIZE(rp, (*rn));\n\t\t\tif (r1n > 0) *rn = -(*rn);\n      }\n   }\n}\n\nvoid tc4_add(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, mp_size_t r1n, \n                                                 mp_srcptr r2, mp_size_t r2n)\n{\n   mp_size_t s1 = ABS(r1n);\n   mp_size_t s2 = ABS(r2n);\n   \n   if (s1 < s2) _tc4_add(rp, rn, r2, r2n, r1, r1n);\n\telse _tc4_add(rp, rn, r1, r1n, r2, r2n);\n} \n\nvoid _tc4_add_unsigned(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, \n                                   mp_size_t r1n, mp_srcptr r2, mp_size_t r2n)\n{\n   mp_limb_t cy;\n   mp_size_t s1 = r1n;\n   mp_size_t s2 = r2n;\n   \n   if (!s2)\n   {\n      if (!s1) *rn = 0;\n      else\n      {\n         if (rp != r1) MPN_COPY(rp, r1, s1);\n\t\t   *rn = r1n;\n\t\t}\n   } else\n   {\n      *rn = r1n;\n      cy = mpn_add(rp, r1, s1, r2, s2);\n      if (cy) \n      {\n         rp[s1] = cy;\n         if ((*rn) < 0) (*rn)--;\n         else (*rn)++;\n      }\n   } \n}\n\nvoid tc4_add_unsigned(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, \n                                    mp_size_t r1n, mp_srcptr r2, mp_size_t r2n)\n{\n   if (r1n < r2n) _tc4_add_unsigned(rp, rn, r2, r2n, r1, r1n);\n\telse _tc4_add_unsigned(rp, rn, r1, r1n, r2, r2n);\n} \n\nvoid tc4_sub(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, mp_size_t r1n, \n                                                   mp_srcptr r2, mp_size_t r2n)\n{\n   tc4_add(rp, rn, r1, r1n, r2, -r2n);\n}\n\t\nvoid tc4_lshift(mp_ptr rp, mp_size_t * rn, mp_srcptr xp, \n                                                  mp_size_t xn, mp_size_t bits)\n{\n   if (xn == 0) *rn = 0;\n   else\n\t{\n\t\tmp_size_t xu = ABS(xn);\n\t\tmp_limb_t msl = mpn_lshift(rp, xp, xu, bits);\n      if (msl) \n\t\t{\n\t\t\trp[xu] = msl;\n\t\t\t*rn = (xn >= 0 ? xn + 1 : xn - 1);\n\t\t} else\n\t\t   *rn = xn;\n\t}\n}\n\nvoid tc4_rshift_inplace(mp_ptr rp, mp_size_t * rn, mp_size_t bits)\n{\n   if (*rn)\n\t{\n\t\tif ((*rn) > 0) \n\t\t{\n\t\t\tmpn_rshift(rp, rp, *rn, bits);\n\t      if (rp[(*rn) - 1] == CNST_LIMB(0)) (*rn)--;\n\t\t} else \n\t\t{\n\t\t\tmpn_rshift(rp, rp, -(*rn), bits);\n\t      if (rp[-(*rn) - 1] == CNST_LIMB(0)) (*rn)++;\n\t\t}\n\t}\n}\n\nvoid tc4_addlsh1_unsigned(mp_ptr rp, mp_size_t * rn, mp_srcptr xp, mp_size_t xn)\n{\n\tif (xn)\n\t{\n\t\tif (xn >= *rn)\n\t\t{\n            mp_limb_t cy;\n\t\t\tif (xn > *rn) MPN_ZERO(rp + *rn, xn - *rn);\n#if HAVE_NATIVE_mpn_addlsh1_n\n            cy = mpn_addlsh1_n(rp, rp, xp, xn);\n#else\n            cy = mpn_add_n(rp, rp, xp, xn);\n            cy += mpn_add_n(rp, rp, xp, xn);\n#endif\n\t\t\tif (cy) \n\t\t\t{\n\t\t\t\trp[xn] = cy;\n\t\t\t\t*rn = xn + 1;\n\t\t\t} else *rn = xn;\n\t\t} else\n\t   {\n\t\t   mp_limb_t cy;\n#if HAVE_NATIVE_mpn_addlsh1_n\n            cy = mpn_addlsh1_n(rp, rp, xp, xn);\n#else\n            cy = mpn_add_n(rp, rp, xp, xn);\n            cy += mpn_add_n(rp, rp, xp, xn);\n#endif\n\t      if (cy) cy = mpn_add_1(rp + xn, rp + xn, *rn - xn, cy);\n\t\t   if (cy) \n\t\t   {\n\t\t\t   rp[*rn] = cy;\n\t\t\t   (*rn)++;\n\t\t   }\n\t\t}\n\t}\n}\n\nvoid tc4_divexact_ui(mp_ptr rp, mp_size_t * rn, mp_ptr x, mp_size_t xn, mp_limb_t c)\n{\n  mp_size_t abs_size;\n  if (xn == 0)\n    {\n      *rn = 0;\n      return;\n    }\n  abs_size = ABS (xn);\n\n  MPN_DIVREM_OR_DIVEXACT_1 (rp, x, abs_size, c);\n  abs_size -= (rp[abs_size-1] == 0);\n  *rn = (xn >= 0 ? abs_size : -abs_size);\n}\n\nvoid tc4_divexact_by3(mp_ptr rp, mp_size_t * rn, mp_ptr x, mp_size_t xn)\n{\n\tif (xn)\n\t{\n\t\tmp_size_t xu = ABS(xn);\n\t\tmpn_divexact_by3(rp, x, xu);\n\t\tif (xn > 0)\n\t\t{\n\t\t\tif (rp[xu - 1] == CNST_LIMB(0)) *rn = xn - 1;\n\t\t\telse *rn = xn;\n\t\t} else\n\t\t{\n\t\t\tif (rp[xu - 1] == CNST_LIMB(0)) *rn = xn + 1;\n\t\t\telse *rn = xn;\n\t\t}\t\n\t} else *rn = 0;\n}\n\nvoid tc4_divexact_by15(mp_ptr rp, mp_size_t * rn, mp_ptr x, mp_size_t xn)\n{\n\tif (xn)\n\t{\n\t\tmp_size_t xu = ABS(xn);\n\t\tmpn_divexact_byfobm1(rp, x, xu, CNST_LIMB(15), CNST_LIMB((~0)/15)); /* works for 32 and 64 bits */\n\t\tif (xn > 0)\n\t\t{\n\t\t\tif (rp[xu - 1] == CNST_LIMB(0)) *rn = xn - 1;\n\t\t\telse *rn = xn;\n\t\t} else\n\t\t{\n\t\t\tif (rp[xu - 1] == CNST_LIMB(0)) *rn = xn + 1;\n\t\t\telse *rn = xn;\n\t\t}\t\n\t} else *rn = 0;\n}\n\n#if HAVE_NATIVE_mpn_mul_1c\n#define MPN_MUL_1C(cout, dst, src, size, n, cin)        \\\n  do {                                                  \\\n    (cout) = mpn_mul_1c (dst, src, size, n, cin);       \\\n  } while (0)\n#else\n#define MPN_MUL_1C(cout, dst, src, size, n, cin)        \\\n  do {                                                  \\\n    mp_limb_t __cy;                                     \\\n    __cy = mpn_mul_1 (dst, src, size, n);               \\\n    (cout) = __cy + mpn_add_1 (dst, dst, size, cin);    \\\n  } while (0)\n#endif\n\nvoid tc4_addmul_1(mp_ptr wp, mp_size_t * wn, mp_srcptr xp, mp_size_t xn, mp_limb_t y)\n{\n  mp_size_t  sign, wu, xu, ws, new_wn, min_size, dsize;\n  mp_limb_t  cy;\n\n  /* w unaffected if x==0 or y==0 */\n  if (xn == 0 || y == 0)\n    return;\n\n  sign = xn;\n  xu = ABS (xn);\n\n  ws = *wn;\n  if (*wn == 0)\n  {\n      /* nothing to add to, just set x*y, \"sign\" gives the sign */\n      cy = mpn_mul_1 (wp, xp, xu, y);\n      if (cy)\n\t\t{\n\t\t\twp[xu] = cy;\n         xu = xu + 1;\n\t\t} \n      *wn = (sign >= 0 ? xu : -xu);\n      return;\n  }\n  \n  sign ^= *wn;\n  wu = ABS (*wn);\n\n  new_wn = MAX (wu, xu);\n  min_size = MIN (wu, xu);\n\n  if (sign >= 0)\n  {\n      /* addmul of absolute values */\n\n      cy = mpn_addmul_1 (wp, xp, min_size, y);\n      \n      dsize = xu - wu;\n#if HAVE_NATIVE_mpn_mul_1c\n      if (dsize > 0)\n        cy = mpn_mul_1c (wp + min_size, xp + min_size, dsize, y, cy);\n      else if (dsize < 0)\n      {\n          dsize = -dsize;\n          cy = mpn_add_1 (wp + min_size, wp + min_size, dsize, cy);\n      }\n#else\n      if (dsize != 0)\n      {\n          mp_limb_t cy2;\n          if (dsize > 0)\n            cy2 = mpn_mul_1 (wp + min_size, xp + min_size, dsize, y);\n          else\n          {\n              dsize = -dsize;\n              cy2 = 0;\n          }\n          cy = cy2 + mpn_add_1 (wp + min_size, wp + min_size, dsize, cy);\n      }\n#endif\n\n      if (cy)\n\t\t{\n\t\t\twp[dsize + min_size] = cy;\n         new_wn ++;\n\t\t}\n   } else\n   {\n      /* submul of absolute values */\n\n      cy = mpn_submul_1 (wp, xp, min_size, y);\n      if (wu >= xu)\n      {\n          /* if w bigger than x, then propagate borrow through it */\n          if (wu != xu)\n            cy = mpn_sub_1 (wp + xu, wp + xu, wu - xu, cy);\n\n          if (cy != 0)\n          {\n              /* Borrow out of w, take twos complement negative to get\n                 absolute value, flip sign of w.  */\n              wp[new_wn] = ~-cy;  /* extra limb is 0-cy */\n              mpn_not (wp, new_wn);\n              new_wn++;\n              MPN_INCR_U (wp, new_wn, CNST_LIMB(1));\n              ws = -*wn;\n          }\n      } else /* wu < xu */\n      {\n          /* x bigger than w, so want x*y-w.  Submul has given w-x*y, so\n             take twos complement and use an mpn_mul_1 for the rest.  */\n\n          mp_limb_t  cy2;\n\n          /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */\n          mpn_not (wp, wu);\n          cy += mpn_add_1 (wp, wp, wu, CNST_LIMB(1));\n          cy -= 1;\n\n          /* If cy-1 == -1 then hold that -1 for latter.  mpn_submul_1 never\n             returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */\n          cy2 = (cy == MP_LIMB_T_MAX);\n          cy += cy2;\n          MPN_MUL_1C (cy, wp + wu, xp + wu, xu - wu, y, cy);\n          wp[new_wn] = cy;\n          new_wn += (cy != 0);\n\n          /* Apply any -1 from above.  The value at wp+wsize is non-zero\n             because y!=0 and the high limb of x will be non-zero.  */\n          if (cy2)\n            MPN_DECR_U (wp+wu, new_wn - wu, CNST_LIMB(1));\n\n          ws = -*wn;\n        }\n\n      /* submul can produce high zero limbs due to cancellation, both when w\n         has more limbs or x has more  */\n      MPN_NORMALIZE (wp, new_wn);\n  }\n\n  *wn = (ws >= 0 ? new_wn : -new_wn);\n\n  ASSERT (new_wn == 0 || wp[new_wn - 1] != 0);\n}\n\nvoid tc4_submul_1(mp_ptr wp, mp_size_t * wn, mp_srcptr x, mp_size_t xn, mp_limb_t y)\n{\n\ttc4_addmul_1(wp, wn, x, -xn, y);\n}\n\nvoid tc4_copy (mp_ptr yp, mp_size_t * yn, mp_size_t offset, mp_srcptr xp, mp_size_t xn)\n{\n  mp_size_t yu = ABS(*yn);\n  mp_size_t xu = ABS(xn);\n  mp_limb_t cy = 0;\n\n  if (xn == 0)\n    return;\n\n  if (offset < yu) /* low part of x overlaps with y */\n  {\n      if (offset + xu <= yu) /* x entirely inside y */\n      {\n          cy = mpn_add_n (yp + offset, yp + offset, xp, xu);\n          if (offset + xu < yu)\n            cy = mpn_add_1 (yp + offset + xu, yp + offset + xu,\n                            yu - (offset + xu), cy);\n      } else\n        cy = mpn_add_n (yp + offset, yp + offset, xp, yu - offset);\n      /* now cy is the carry at yp + yu */\n      if (xu + offset > yu) /* high part of x exceeds y */\n      {\n          MPN_COPY (yp + yu, xp + yu - offset, xu + offset - yu);\n          cy = mpn_add_1 (yp + yu, yp + yu, xu + offset - yu, cy);\n          yu = xu + offset;\n      }\n      /* now cy is the carry at yp + yn */\n      if (cy)\n        yp[yu++] = cy;\n      MPN_NORMALIZE(yp, yu);\n      *yn = yu;\n  } else /* x does not overlap */\n  {\n      if (offset > yu)\n        MPN_ZERO (yp + yu, offset - yu);\n      MPN_COPY (yp + offset, xp, xu);\n      *yn = offset + xu;\n  }\n}\n\n#define MUL_TC4_UNSIGNED(r3xx, n3xx, r1xx, n1xx, r2xx, n2xx) \\\n   do \\\n   { \\\n      if ((n1xx != 0) && (n2xx != 0)) \\\n      { mp_size_t len; \\\n\t      if (n1xx == n2xx) \\\n\t\t   { \\\n\t\t\t   if (n1xx > MUL_TOOM4_THRESHOLD) mpn_toom4_mul_n(r3xx, r1xx, r2xx, n1xx); \\\n            else mpn_mul_n(r3xx, r1xx, r2xx, n1xx); \\\n\t\t   } else if (n1xx > n2xx) \\\n\t\t      mpn_mul(r3xx, r1xx, n1xx, r2xx, n2xx); \\\n\t\t   else \\\n\t\t      mpn_mul(r3xx, r2xx, n2xx, r1xx, n1xx); \\\n\t      len = n1xx + n2xx; \\\n\t\t   MPN_NORMALIZE(r3xx, len); \\\n\t\t   n3xx = len; \\\n      } else \\\n         n3xx = 0; \\\n   } while (0)\n\n#define MUL_TC4(r3xx, n3xx, r1xx, n1xx, r2xx, n2xx) \\\n\tdo \\\n\t{ \\\n\t   mp_size_t sign = n1xx ^ n2xx; \\\n\t   mp_size_t un1 = ABS(n1xx); \\\n\t   mp_size_t un2 = ABS(n2xx); \\\n\t\tMUL_TC4_UNSIGNED(r3xx, n3xx, r1xx, un1, r2xx, un2); \\\n\t\tif (sign < 0) n3xx = -n3xx; \\\n\t} while (0)\n\n#define SQR_TC4_UNSIGNED(r3xx, n3xx, r1xx, n1xx) \\\n   do \\\n   { \\\n      if (n1xx != 0) \\\n      { mp_size_t len; \\\n\t      if (n1xx > MUL_TOOM4_THRESHOLD) mpn_toom4_sqr_n(r3xx, r1xx, n1xx); \\\n         else mpn_sqr(r3xx, r1xx, n1xx); \\\n\t\t   len = 2*n1xx; \\\n\t\t   MPN_NORMALIZE(r3xx, len); \\\n\t\t   n3xx = len; \\\n      } else \\\n         n3xx = 0; \\\n   } while (0)\n\n#define SQR_TC4(r3xx, n3xx, r1xx, n1xx) \\\n\tdo \\\n\t{ \\\n\t   mp_size_t un1 = ABS(n1xx); \\\n\t   SQR_TC4_UNSIGNED(r3xx, n3xx, r1xx, un1); \\\n\t} while (0)\n\n#define TC4_NORM(rxx, nxx, sxx) \\\n\tdo \\\n\t{ \\\n\t   nxx = sxx; \\\n\t   MPN_NORMALIZE(rxx, nxx); \\\n\t} while(0)\n\n/* Zero out limbs to end of integer */\n#define TC4_DENORM(rxx, nxx, sxx) \\\n\tdo { \\\n\tMPN_ZERO(rxx + ABS(nxx), sxx - ABS(nxx)); \\\n\t} while (0)\n\n/* Two's complement divexact by power of 2 */\n#define TC4_DIVEXACT_2EXP(rxx, nxx, sxx) \\\n\tdo { \\\n\t   mp_limb_t sign = (LIMB_HIGHBIT_TO_MASK(rxx[nxx-1]) << (GMP_LIMB_BITS - sxx)); \\\n      mpn_rshift(rxx, rxx, nxx, sxx); \\\n\t   rxx[nxx-1] |= sign; \\\n\t} while (0)\n\n#if HAVE_NATIVE_mpn_rshift1\n#define TC4_RSHIFT1(rxx, nxx) \\\n\tdo { \\\n\t   mp_limb_t sign = (LIMB_HIGHBIT_TO_MASK(rxx[nxx-1]) << (GMP_LIMB_BITS - 1)); \\\n       mpn_half(rxx, nxx); \\\n\t   rxx[nxx-1] |= sign; \\\n\t} while (0)\n#else\n#define TC4_RSHIFT1(rxx, nxx) \\\n\tdo { \\\n\t   mp_limb_t sign = (LIMB_HIGHBIT_TO_MASK(rxx[nxx-1]) << (GMP_LIMB_BITS - 1)); \\\n       mpn_rshift(rxx, rxx, nxx, 1); \\\n\t   rxx[nxx-1] |= sign; \\\n\t} while (0)\n#endif\n\n#define r1 (tp)\n#define r2 (tp + t4)\n#define r4 (tp + 2*t4)\n#define r6 (tp + 3*t4)\n\n#define r3 (rp + 4*sn)\n#define r5 (rp + 2*sn)\n#define r7 (rp)\n\n/* Multiply {up, n} by {vp, n} and write the result to\n   {prodp, 2n}.\n\n   Note that prodp gets 2n limbs stored, even if the actual result\n   only needs 2n - 1.\n*/\n\n#define mpn_clearit(rxx, nxx) \\\n  do { \\\n     mp_size_t ind = 0; \\\n     for ( ; ind < nxx; ind++) \\\n        (rxx)[ind] = CNST_LIMB(0); \\\n  } while (0)\n\nvoid\nmpn_toom4_mul_n (mp_ptr rp, mp_srcptr up,\n\t\t          mp_srcptr vp, mp_size_t n)\n{\n  mp_size_t ind;\n  mp_limb_t cy, cy2, r30, r31;\n  mp_ptr tp;\n  mp_size_t sn, n1, n2, n3, n4, n5, n6, n7, n8, rpn, t4, h1;\n  TMP_DECL;\n\n  sn = (n + 3) / 4;\n\n  h1 = n - 3*sn;\n  \n#define a0 (up)\n#define a1 (up + sn)\n#define a2 (up + 2*sn)\n#define a3 (up + 3*sn)\n#define b0 (vp)\n#define b1 (vp + sn)\n#define b2 (vp + 2*sn)\n#define b3 (vp + 3*sn)\n\n   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs\n\n   TMP_MARK;\n\n   tp = TMP_ALLOC_LIMBS(4*t4 + 5*(sn + 1));\n\n#define u2 (tp + 4*t4)\n#define u3 (tp + 4*t4 + (sn+1))\n#define u4 (tp + 4*t4 + 2*(sn+1))\n#define u5 (tp + 4*t4 + 3*(sn+1))\n#define u6 (tp + 4*t4 + 4*(sn+1))\n\n   u6[sn] = mpn_add(u6, a1, sn, a3, h1);\n   u5[sn] = mpn_add_n(u5, a2, a0, sn);\n   mpn_add_n(u3, u5, u6, sn + 1);\n   n4 = sn + 1;\n   if (mpn_cmp(u5, u6, sn + 1) >= 0)\n      mpn_sub_n(u4, u5, u6, sn + 1);\n   else\n   {  \n      mpn_sub_n(u4, u6, u5, sn + 1);\n      n4 = -n4;\n   }\n\n   u6[sn] = mpn_add(u6, b1, sn, b3, h1);\n   u5[sn] = mpn_add_n(u5, b2, b0, sn);\n   mpn_add_n(r2, u5, u6, sn + 1);\n   n5 = sn + 1;\n   if (mpn_cmp(u5, u6, sn + 1) >= 0)\n      mpn_sub_n(u5, u5, u6, sn + 1);\n   else\n   {  \n      mpn_sub_n(u5, u6, u5, sn + 1);\n      n5 = -n5;\n   }\n \n   MUL_TC4_UNSIGNED(r3, n3, u3, sn + 1, r2, sn + 1); /* 1 */\n   MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */\n   \n#if HAVE_NATIVE_mpn_addlsh_n\n   r1[sn] = mpn_addlsh_n(r1, a2, a0, sn, 2);\n   mpn_lshift(r1, r1, sn + 1, 1);\n   cy = mpn_addlsh_n(r2, a3, a1, h1, 2);\n#else\n   r1[sn] = mpn_lshift(r1, a2, sn, 1);\n   MPN_COPY(r2, a3, h1);\n   r1[sn] += mpn_addmul_1(r1, a0, sn, 8);\n   cy = mpn_addmul_1(r2, a1, h1, 4);\n#endif\n   if (sn > h1) \n   {\n      cy2 = mpn_lshift(r2 + h1, a1 + h1, sn - h1, 2);\n      cy = cy2 + mpn_add_1(r2 + h1, r2 + h1, sn - h1, cy);\n   }\n   r2[sn] = cy;\n   mpn_add_n(u5, r1, r2, sn + 1);\n   n6 = sn + 1;\n   if (mpn_cmp(r1, r2, sn + 1) >= 0)\n      mpn_sub_n(u6, r1, r2, sn + 1);\n   else\n   {  \n      mpn_sub_n(u6, r2, r1, sn + 1);\n      n6 = -n6;\n   }\n \n#if HAVE_NATIVE_mpn_addlsh_n\n   r1[sn] = mpn_addlsh_n(r1, b2, b0, sn, 2);\n   mpn_lshift(r1, r1, sn + 1, 1);\n   cy = mpn_addlsh_n(r2, b3, b1, h1, 2);\n#else\n   r1[sn] = mpn_lshift(r1, b2, sn, 1);\n   MPN_COPY(r2, b3, h1);\n   r1[sn] += mpn_addmul_1(r1, b0, sn, 8);\n   cy = mpn_addmul_1(r2, b1, h1, 4);\n#endif\n   if (sn > h1) \n   {\n      cy2 = mpn_lshift(r2 + h1, b1 + h1, sn - h1, 2);\n      cy = cy2 + mpn_add_1(r2 + h1, r2 + h1, sn - h1, cy);\n   }\n   r2[sn] = cy;\n   mpn_add_n(u2, r1, r2, sn + 1);\n   n8 = sn + 1;\n   if (mpn_cmp(r1, r2, sn + 1) >= 0)\n      mpn_sub_n(r2, r1, r2, sn + 1);\n   else\n   {  \n      mpn_sub_n(r2, r2, r1, sn + 1);\n      n8 = -n8;\n   }\n    \n   r30 = r3[0];\n   r31 = r3[1];\n   MUL_TC4_UNSIGNED(r5, n5, u5, sn + 1, u2, sn + 1); /* 1/2 */\n   MUL_TC4(r6, n6, u6, n6, r2, n8); /* -1/2 */\n   r3[1] = r31;\n\n#if HAVE_NATIVE_mpn_addlsh1_n\n   cy = mpn_addlsh1_n(u2, a2, a3, h1);\n   if (sn > h1)\n      cy = mpn_add_1(u2 + h1, a2 + h1, sn - h1, cy); \n   u2[sn] = cy;\n   u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a1, u2, sn);     \n   u2[sn] = 2*u2[sn] + mpn_addlsh1_n(u2, a0, u2, sn);     \n#else\n   MPN_COPY(u2, a0, sn);\n   u2[sn] = mpn_addmul_1(u2, a1, sn, 2);\n   u2[sn] += mpn_addmul_1(u2, a2, sn, 4);\n   cy = mpn_addmul_1(u2, a3, h1, 8);\n   if (sn > h1) cy = mpn_add_1(u2 + h1, u2 + h1, sn - h1, cy);\n   u2[sn] += cy;\n#endif\n\n#if HAVE_NATIVE_mpn_addlsh1_n\n   cy = mpn_addlsh1_n(r1, b2, b3, h1);\n   if (sn > h1)\n      cy = mpn_add_1(r1 + h1, b2 + h1, sn - h1, cy); \n   r1[sn] = cy;\n   r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b1, r1, sn);     \n   r1[sn] = 2*r1[sn] + mpn_addlsh1_n(r1, b0, r1, sn);     \n#else\n   MPN_COPY(r1, b0, sn);\n   r1[sn] = mpn_addmul_1(r1, b1, sn, 2);\n   r1[sn] += mpn_addmul_1(r1, b2, sn, 4);\n   cy = mpn_addmul_1(r1, b3, h1, 8);\n   if (sn > h1) cy = mpn_add_1(r1 + h1, r1 + h1, sn - h1, cy);\n   r1[sn] += cy;\n#endif\n   \n   MUL_TC4_UNSIGNED(r2, n2, u2, sn + 1, r1, sn + 1); /* 2 */\n   \n   MUL_TC4_UNSIGNED(r1, n1, a3, h1, b3, h1); /* oo */\n   MUL_TC4_UNSIGNED(r7, n7, a0, sn, b0, sn); /* 0 */\n\n   TC4_DENORM(r1, n1, t4 - 1);\n\n/*\trp        rp1          rp2           rp3          rp4           rp5         rp6           rp7\n<----------- r7-----------><------------r5-------------->            \n                                                       <-------------r3------------->\n\n              <-------------r6------------->                        < -----------r2------------>{           }\n                                         <-------------r4-------------->         <--------------r1---->\n*/\n\n   mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);\n\n   if (rpn != 2*n) \n   {\n\t  MPN_ZERO((rp + rpn), 2*n - rpn);\n   }\n\n   TMP_FREE;\n}\n\n/* Square {up, n} and write the result to {prodp, 2n}.\n\n   Note that prodp gets 2n limbs stored, even if the actual result\n   only needs 2n - 1.\n*/\n\nvoid\nmpn_toom4_sqr_n (mp_ptr rp, mp_srcptr up, mp_size_t n)\n{\n  mp_size_t len1, ind;\n  mp_limb_t cy, r30, r31;\n  mp_ptr tp;\n  mp_size_t a0n, a1n, a2n, a3n, sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, rpn, t4;\n\n  len1 = n;\n  ASSERT (n >= 1);\n\n  MPN_NORMALIZE(up, len1);\n  \n  sn = (n - 1) / 4 + 1;\n\n  /* a0 - a3 are defined in mpn_toom4_mul_n above */\n  \n   TC4_NORM(a0, a0n, sn);\n\tTC4_NORM(a1, a1n, sn);\n\tTC4_NORM(a2, a2n, sn);\n\tTC4_NORM(a3, a3n, n - 3*sn); \n\n   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs\n\n   tp = __GMP_ALLOCATE_FUNC_LIMBS(4*t4 + 4*(sn + 1));\n\n   tc4_add_unsigned(u5, &n5, a3, a3n, a1, a1n); \n   tc4_add_unsigned(u4, &n4, a2, a2n, a0, a0n); \n\ttc4_add_unsigned(u2, &n2, u4, n4, u5, n5); \n   tc4_sub(u3, &n3, u4, n4, u5, n5);\n\n\tSQR_TC4(r4, n4, u3, n3);\n   SQR_TC4_UNSIGNED(r3, n3, u2, n2);\n\t\n\ttc4_lshift(r1, &n1, a0, a0n, 3);\n\ttc4_addlsh1_unsigned(r1, &n1, a2, a2n);\n \ttc4_lshift(r2, &n8, a1, a1n, 2);\n   tc4_add(r2, &n8, r2, n8, a3, a3n);\n   tc4_add(u4, &n9, r1, n1, r2, n8);\n   tc4_sub(u5, &n5, r1, n1, r2, n8);\n   \n\tr30 = r3[0];\n\tif (!n3) r30 = CNST_LIMB(0);\n   r31 = r3[1];\n\tSQR_TC4(r6, n6, u5, n5);\n   SQR_TC4_UNSIGNED(r5, n5, u4, n9);\n   r3[1] = r31;\n\n   tc4_lshift(u2, &n8, a3, a3n, 3);\n   tc4_addmul_1(u2, &n8, a2, a2n, 4);\n\ttc4_addlsh1_unsigned(u2, &n8, a1, a1n);\n\ttc4_add(u2, &n8, u2, n8, a0, a0n);\n   \n\tSQR_TC4_UNSIGNED(r2, n2, u2, n8);\n   SQR_TC4_UNSIGNED(r1, n1, a3, a3n);\n   SQR_TC4_UNSIGNED(r7, n7, a0, a0n);\n\n\tTC4_DENORM(r1, n1,  t4 - 1);\n   TC4_DENORM(r2, n2,  t4 - 1);\n   if (n3)\n     TC4_DENORM(r3, n3,  t4 - 1);\n   else {\n     /* MPN_ZERO defeats gcc 4.1.2 here, hence the explicit for loop */\n     for (ind = 1 ; ind < t4 - 1; ind++)\n        (r3)[ind] = CNST_LIMB(0);\n   }\n   TC4_DENORM(r4, n4,  t4 - 1);\n   TC4_DENORM(r5, n5,  t4 - 1);\n   TC4_DENORM(r6, n6,  t4 - 1);\n   TC4_DENORM(r7, n7,  t4 - 2); // we treat r7 differently (it cannot exceed t4-2 in length)\n\n/*\trp        rp1          rp2           rp3          rp4           rp5         rp6           rp7\n<----------- r7-----------><------------r5-------------->            \n                                                       <-------------r3------------->\n\n              <-------------r6------------->                        < -----------r2------------>{           }\n                                         <-------------r4-------------->         <--------------r1---->\n*/\n\n\tmpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);\n\n\tif (rpn != 2*n) \n\t{\n\t\tMPN_ZERO((rp + rpn), 2*n - rpn);\n\t}\n\n   __GMP_FREE_FUNC_LIMBS (tp, 4*t4 + 4*(sn+1));\n}\n\n/*\n   Toom 4 interpolation. Interpolates the value at 2^(sn*B) of a \n\tpolynomial p(x) with 7 coefficients given the values \n\tp(oo), p(2), p(1), p(-1), 2^6*p(1/2), 2^6*p(-1/2), p(0).\n\tThe output is placed in rp and the final number of limbs of the\n\toutput is given in rpn.\n\tThe 4th and 6th values may be negative, and if so, n4 and n6 \n\tshould be set to a negative value respectively.\n   To save space we pass r3, r5, r7 in place in the output rp.\n\tThe other r's are stored separately in space tp.\n\tThe low limb of r3 is stored in r30, as it will be overwritten\n\tby the high limb of r5.\n\nrp          rp1          rp2           rp3          rp4           rp5         rp6           rp7\n<----------- r7-----------><------------r5-------------->            \n                                                      <-------------r3------------->\n\n   We assume that r1 is stored at tp, r2 at (tp + t4), r4 at (tp + 2*t4) \n\tand r6 (tp + 3*t4). Each of these r's has t4 = s4 + 1 limbs allocated.\n*/\nvoid mpn_toom4_interpolate(mp_ptr rp, mp_size_t * rpn, mp_size_t sn,  \n\t\t       mp_ptr tp, mp_size_t s4, mp_size_t n4, mp_size_t n6, mp_limb_t r30)\n{\n\tmp_size_t n1, n2, n3, n5, n7, t4;\n\tmp_limb_t saved, saved2, cy;\n\n   t4 = s4 + 1; \n   \n\tmpn_add_n(r2, r2, r5, s4);\n\n   if (n6 < 0) \n\t\tmpn_add_n(r6, r5, r6, s4);\n\telse\n      mpn_sub_n(r6, r5, r6, s4);\n\t/* r6 is now in twos complement format */\n\n\tsaved = r3[0];\n\tr3[0] = r30;\n\tif (n4 < 0) \n\t\tmpn_add_n(r4, r3, r4, s4);\n\telse\n      mpn_sub_n(r4, r3, r4, s4);\n\tr3[0] = saved;\n\t/* r4 is now in twos complement format */\n\t\n\tmpn_sub_n(r5, r5, r1, s4);\n\n#if HAVE_NATIVE_mpn_sublsh_n\n\tr5[s4-1] -= mpn_sublsh_n(r5, r5, r7, s4-1, 6);\n#else\n\tr5[s4-1] -= mpn_submul_1(r5, r7, s4-1, 64);\n#endif\n   \n   TC4_RSHIFT1(r4, s4); \n\t\n\tsaved = r3[0];\n\tr3[0] = r30;\n\tmpn_sub_n(r3, r3, r4, s4);\n\tr30 = r3[0];\n\tr3[0] = saved;\n\n\tmpn_double(r5, s4); \n\n\tmpn_sub_n(r5, r5, r6, s4);\n\n   saved = r3[0];\n\tr3[0] = r30;\n\tmpn_submul_1(r2, r3, s4, 65);\n   r3[0] = saved;\n\t\n\tsaved2 = r7[s4-1];\n\tr7[s4-1] = CNST_LIMB(0); // r7 is always positive so no sign extend needed\n\tsaved = r3[0];\n\tr3[0] = r30;\n#if HAVE_NATIVE_mpn_subadd_n\n\tmpn_subadd_n(r3, r3, r7, r1, s4);\n#else\n    mpn_sub_n(r3, r3, r7, s4);\n    mpn_sub_n(r3, r3, r1, s4);\n#endif\n\tr7[s4-1] = saved2;\n   r30 = r3[0];\n\t\n   mpn_addmul_1(r2, r3, s4, 45);\n\n#if HAVE_NATIVE_mpn_sublsh_n\n   cy = mpn_sublsh_n(r5, r5, r3, s4 - 1, 3);\n#else\n   cy = mpn_submul_1(r5, r3, s4 - 1, 8);\n#endif\n   r3[0] = saved;\n\tr3[0] -= (cy + 8*r3[s4-1]);\n   \n\tmpn_rshift(r5, r5, s4, 3); \n\n\tmpn_divexact_by3(r5, r5, s4); \n   \n\tmpn_sub_n(r6, r6, r2, s4);\n\n#if HAVE_NATIVE_mpn_sublsh_n\n\tmpn_sublsh_n(r2, r2, r4, s4, 4);\n#else\n\tmpn_submul_1(r2, r4, s4, 16);\n#endif\n   \n   mpn_rshift(r2, r2, s4, 1); \n\n\tmpn_divexact_by3(r2, r2, s4); \n\n   mpn_divexact_by3(r2, r2, s4); \n   \n   saved = r3[0];\n\tr3[0] = r30;\n   cy = mpn_sub_n(r3, r3, r5, s4 - 1);\n   r30 = r3[0];\n\tr3[0] = saved;\n\tr3[s4-1] -= (cy + r5[s4-1]);\n   \n\tmpn_sub_n(r4, r4, r2, s4);\n\t\n\tmpn_addmul_1(r6, r2, s4, 30);\n\n   mpn_divexact_byfobm1(r6, r6, s4, CNST_LIMB(15), CNST_LIMB(~0/15));\n\n\tmpn_rshift(r6, r6, s4, 2);\n\n\tmpn_sub_n(r2, r2, r6, s4);\n\n\tTC4_NORM(r1, n1, s4);\n   TC4_NORM(r2, n2, s4);\n   \n   (*rpn) = 6*sn+1;\n\tcy = mpn_add_1(r3, r3, *rpn - 4*sn, r30); /* don't forget to add r3[0] back in */\n   if (cy) \n\t{\n\t\trp[*rpn] = cy;\n\t   (*rpn)++;\n\t}\n\n\ttc4_copy(rp, rpn, 5*sn, r2, n2);\n   tc4_copy(rp, rpn, 6*sn, r1, n1);\n\n\ttc4_copy(rp, rpn, sn, r6, s4);\n   tc4_copy(rp, rpn, 3*sn, r4, s4); \n}\n"
  },
  {
    "path": "mpn/generic/toom8_sqr_n.c",
    "content": "/* Implementation of the squaring algorithm with Toom-Cook 8.5-way.\n\n   Contributed to the GNU project by Marco Bodrato.\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if GMP_NUMB_BITS < 29\n#error Not implemented.\n#endif\n\n#if GMP_NUMB_BITS < 43\n#define BIT_CORRECTION 1\n#define CORRECTION_BITS GMP_NUMB_BITS\n#else\n#define BIT_CORRECTION 0\n#define CORRECTION_BITS 0\n#endif\n\n#ifndef SQR_TOOM8_THRESHOLD\n#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD\n#endif\n\n#define TOOM8_SQR_REC(p, a, n)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))\t\t\\\n      mpn_sqr (p, a, n);\t\t\t\t\t\\\n    else\t\t\t\t\t\t\t\t\\\n      mpn_toom8_sqr_n (p, a, n);\t\t\t\t\t\\\n  } while (0)\n\nvoid\nmpn_toom8_sqr_n  (mp_ptr pp, mp_srcptr ap, mp_size_t an)\n{\n  mp_size_t n, s;\n  mp_ptr scratch;\n\n  TMP_DECL;\n  \n  TMP_MARK;\n\n  /***************************** decomposition *******************************/\n\n  ASSERT ( an >= 40 );\n\n  n = 1 + ((an - 1)>>3);\n\n  s = an - 7 * n;\n\n  ASSERT (0 < s && s <= n);\n  ASSERT ( s + s > 3 );\n\n  scratch = TMP_ALLOC_LIMBS (30 * n + 6);\n\n#define   r6    (pp + 3 * n)\t\t\t/* 3n+1 */\n#define   r4    (pp + 7 * n)\t\t\t/* 3n+1 */\n#define   r2    (pp +11 * n)\t\t\t/* 3n+1 */\n#define   r0    (pp +15 * n)\t\t\t/* s+t <= 2*n */\n#define   r7    (scratch)\t\t\t/* 3n+1 */\n#define   r5    (scratch + 3 * n + 1)\t\t/* 3n+1 */\n#define   r3    (scratch + 6 * n + 2)\t\t/* 3n+1 */\n#define   r1    (scratch + 9 * n + 3)\t\t/* 3n+1 */\n#define   v0    (pp +11 * n)\t\t\t/* n+1 */\n#define   v2    (pp +13 * n+2)\t\t\t/* n+1 */\n#define   wse   (scratch +12 * n + 4)\t\t/* 3n+1 */\n\n  /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may\n     need all of them, when DO_mpn_sublsh_n use a a scratch  */\n\n  /********************** evaluation and recursive calls *********************/\n  /* $\\pm1/8$ */\n  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);\n  TOOM8_SQR_REC(pp, v0, n + 1); /* A(-1/8)*B(-1/8)*8^. */\n  TOOM8_SQR_REC(r7, v2, n + 1); /* A(+1/8)*B(+1/8)*8^. */\n  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);\n\n  /* $\\pm1/4$ */\n  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);\n  TOOM8_SQR_REC(pp, v0, n + 1); /* A(-1/4)*B(-1/4)*4^. */\n  TOOM8_SQR_REC(r5, v2, n + 1); /* A(+1/4)*B(+1/4)*4^. */\n  mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);\n\n  /* $\\pm2$ */\n  mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);\n  TOOM8_SQR_REC(pp, v0, n + 1); /* A(-2)*B(-2) */\n  TOOM8_SQR_REC(r3, v2, n + 1); /* A(+2)*B(+2) */\n  mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);\n\n  /* $\\pm8$ */\n  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);\n  TOOM8_SQR_REC(pp, v0, n + 1); /* A(-8)*B(-8) */\n  TOOM8_SQR_REC(r1, v2, n + 1); /* A(+8)*B(+8) */\n  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);\n\n  /* $\\pm1/2$ */\n  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);\n  TOOM8_SQR_REC(pp, v0, n + 1); /* A(-1/2)*B(-1/2)*2^. */\n  TOOM8_SQR_REC(r6, v2, n + 1); /* A(+1/2)*B(+1/2)*2^. */\n  mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);\n\n  /* $\\pm1$ */\n  mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s,    pp);\n  TOOM8_SQR_REC(pp, v0, n + 1); /* A(-1)*B(-1) */\n  TOOM8_SQR_REC(r4, v2, n + 1); /* A(1)*B(1) */\n  mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);\n\n  /* $\\pm4$ */\n  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);\n  TOOM8_SQR_REC(pp, v0, n + 1); /* A(-4)*B(-4) */\n  TOOM8_SQR_REC(r2, v2, n + 1); /* A(+4)*B(+4) */\n  mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);\n\n#undef v0\n#undef v2\n\n  /* A(0)*B(0) */\n  TOOM8_SQR_REC(pp, ap, n);\n\n  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);\n\n  TMP_FREE;\n\n#undef r0\n#undef r1\n#undef r2\n#undef r3\n#undef r4\n#undef r5\n#undef r6\n#undef wse\n\n}\n"
  },
  {
    "path": "mpn/generic/toom8h_mul.c",
    "content": "/* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.\n\n   Contributed to the GNU project by Marco Bodrato.\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2009, 2010 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n#if GMP_NUMB_BITS < 29\n#error Not implemented.\n#endif\n\n#if GMP_NUMB_BITS < 43\n#define BIT_CORRECTION 1\n#define CORRECTION_BITS GMP_NUMB_BITS\n#else\n#define BIT_CORRECTION 0\n#define CORRECTION_BITS 0\n#endif\n\n#define TOOM8H_MUL_N_REC(p, a, b, n)\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD))\t\t\\\n      mpn_mul_n (p, a, b, n);\t\t\t\t\\\n    else\t\t\t\t\t\t\t\t\\\n      mpn_toom8h_mul (p, a, n, b, n);\t\t\t\t\\\n  } while (0)\n\n#define TOOM8H_MUL_REC(p, a, na, b, nb)\t\t\\\n  do {\tmpn_mul (p, a, na, b, nb);\t\t\t\\\n  } while (0)\n\n/* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}\n   With: an >= bn >= 86, an*5 <  bn * 11.\n   It _may_ work with bn<=?? and bn*?? < an*? < bn*??\n\n   Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.\n*/\n/* Estimate on needed scratch:\n   S(n) <= (n+7)\\8*13+5+MAX(S((n+7)\\8),1+2*(n+7)\\8),\n   since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\\8 < n*15\\8 + lg2(n)*6\n */\n\nvoid\nmpn_toom8h_mul   (mp_ptr pp,\n\t\t  mp_srcptr ap, mp_size_t an,\n\t\t  mp_srcptr bp, mp_size_t bn)\n{\n  mp_size_t n, s, t;\n  int p, q, half;\n  int sign;\n  mp_ptr scratch;\n\n  TMP_DECL;\n\n  TMP_MARK;\n\n  /***************************** decomposition *******************************/\n\n  ASSERT (an >= bn);\n  /* Can not handle too small operands */\n  ASSERT (bn >= 86);\n  /* Can not handle too much unbalancement */\n  ASSERT (an*4 <= bn*13);\n  ASSERT (GMP_NUMB_BITS > 12*3 || an*4 <= bn*12);\n  ASSERT (GMP_NUMB_BITS > 11*3 || an*5 <= bn*11);\n  ASSERT (GMP_NUMB_BITS > 10*3 || an*6 <= bn*10);\n  ASSERT (GMP_NUMB_BITS >  9*3 || an*7 <= bn* 9);\n\n  /* Limit num/den is a rational number between\n     (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */\n#define LIMIT_numerator (21)\n#define LIMIT_denominat (20)\n\n  if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */\n    {\n      half = 0;\n      n = 1 + ((an - 1)>>3);\n      p = q = 7;\n      s = an - p * n;\n      t = bn - q * n;\n    }\n  else\n    {\n      if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */\n\t{ p = 9; q = 8; }\n      else if (GMP_NUMB_BITS <= 9*3 ||\n\t       an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))\n\t{ p = 9; q = 7; }\n      else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */\n\t{ p =10; q = 7; }\n      else if (GMP_NUMB_BITS <= 10*3 ||\n\t       an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)\n\t{ p =10; q = 6; }\n      else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/\n\t{ p =11; q = 6; }\n      else if (GMP_NUMB_BITS <= 11*3 ||\n\t       an * 4 < 9 * bn)\n\t{ p =11; q = 5; }\n      else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn )  /* is 4*... <12*... */\n\t{ p =12; q = 5; }\n      else if (GMP_NUMB_BITS <= 12*3 ||\n\t       an * 9 < 28 * bn )  /* is 4*... <12*... */\n\t{ p =12; q = 4; }\n      else\n\t{ p =13; q = 4; }\n\n      half = (p+q)&1;\n      n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);\n      p--; q--;\n\n      s = an - p * n;\n      t = bn - q * n;\n\n      if(half) { /* Recover from badly chosen splitting */\n\tif (s<1) {p--; s+=n; half=0;}\n\telse if (t<1) {q--; t+=n; half=0;}\n      }\n    }\n#undef LIMIT_numerator\n#undef LIMIT_denominat\n\n  ASSERT (0 < s && s <= n);\n  ASSERT (0 < t && t <= n);\n  ASSERT (half || s + t > 3);\n  ASSERT (n > 2);\n\n  scratch = TMP_ALLOC_LIMBS(n*15 + GMP_LIMB_BITS*6);\n  \n#define   r6    (pp + 3 * n)\t\t\t/* 3n+1 */\n#define   r4    (pp + 7 * n)\t\t\t/* 3n+1 */\n#define   r2    (pp +11 * n)\t\t\t/* 3n+1 */\n#define   r0    (pp +15 * n)\t\t\t/* s+t <= 2*n */\n#define   r7    (scratch)\t\t\t/* 3n+1 */\n#define   r5    (scratch + 3 * n + 1)\t\t/* 3n+1 */\n#define   r3    (scratch + 6 * n + 2)\t\t/* 3n+1 */\n#define   r1    (scratch + 9 * n + 3)\t\t/* 3n+1 */\n#define   v0    (pp +11 * n)\t\t\t/* n+1 */\n#define   v1    (pp +12 * n+1)\t\t\t/* n+1 */\n#define   v2    (pp +13 * n+2)\t\t\t/* n+1 */\n#define   v3    (scratch +12 * n + 4)\t\t/* n+1 */\n#define   wsi   (scratch +12 * n + 4)\t\t/* 3n+1 */\n\n  /********************** evaluation and recursive calls *********************/\n\n  /* $\\pm1/8$ */\n  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^\n\t mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);\n  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1); /* A(-1/8)*B(-1/8)*8^. */\n  TOOM8H_MUL_N_REC(r7, v2, v3, n + 1); /* A(+1/8)*B(+1/8)*8^. */\n  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));\n\n  /* $\\pm1/4$ */\n  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^\n\t mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);\n  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1); /* A(-1/4)*B(-1/4)*4^. */\n  TOOM8H_MUL_N_REC(r5, v2, v3, n + 1); /* A(+1/4)*B(+1/4)*4^. */\n  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));\n\n  /* $\\pm2$ */\n  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^\n\t mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);\n  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1); /* A(-2)*B(-2) */\n  TOOM8H_MUL_N_REC(r3, v2, v3, n + 1); /* A(+2)*B(+2) */\n  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);\n\n  /* $\\pm8$ */\n  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^\n\t mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);\n  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1); /* A(-8)*B(-8) */\n  TOOM8H_MUL_N_REC(r1, v2, v3, n + 1); /* A(+8)*B(+8) */\n  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);\n\n  /* $\\pm1/2$ */\n  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^\n\t mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);\n  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1); /* A(-1/2)*B(-1/2)*2^. */\n  TOOM8H_MUL_N_REC(r6, v2, v3, n + 1); /* A(+1/2)*B(+1/2)*2^. */\n  mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);\n\n  /* $\\pm1$ */\n  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp);\n  if (q == 3)\n    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp);\n  else\n    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);\n  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1); /* A(-1)*B(-1) */\n  TOOM8H_MUL_N_REC(r4, v2, v3, n + 1); /* A(1)*B(1) */\n  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);\n\n  /* $\\pm4$ */\n  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^\n\t mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);\n  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1); /* A(-4)*B(-4) */\n  TOOM8H_MUL_N_REC(r2, v2, v3, n + 1); /* A(+4)*B(+4) */\n  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);\n\n#undef v0\n#undef v1\n#undef v2\n#undef v3\n\n  /* A(0)*B(0) */\n  TOOM8H_MUL_N_REC(pp, ap, bp, n);\n\n  /* Infinity */\n  if( half != 0) {\n    if(s>t) {\n      TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t);\n    } else {\n      TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s);\n    };\n  };\n\n  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);\n\n  TMP_FREE;\n\n#undef r0\n#undef r1\n#undef r2\n#undef r3\n#undef r4\n#undef r5\n#undef r6\n#undef wsi\n}\n"
  },
  {
    "path": "mpn/generic/toom_couple_handling.c",
    "content": "/* Helper function for high degree Toom-Cook algorithms.\n\n   Contributed to the GNU project by Marco Bodrato.\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2009, 2010 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* Gets {pp,n} and (sign?-1:1)*{np,n}. Computes at once:\n     {pp,n} <- ({pp,n}+{np,n})/2^{ps+1}\n     {pn,n} <- ({pp,n}-{np,n})/2^{ns+1}\n   Finally recompose them obtaining:\n     {pp,n+off} <- {pp,n}+{np,n}*2^{off*GMP_NUMB_BITS}\n*/\nvoid\nmpn_toom_couple_handling (mp_ptr pp, mp_size_t n, mp_ptr np,\n\t\t\t  int nsign, mp_size_t off, int ps, int ns)\n{\n  if (nsign) {\n#ifdef HAVE_NATIVE_mpn_rsh1sub_n\n    mpn_rsh1sub_n (np, pp, np, n);\n#else\n    mpn_sub_n (np, pp, np, n);\n    mpn_rshift (np, np, n, 1);\n#endif\n  } else {\n#ifdef HAVE_NATIVE_mpn_rsh1add_n\n    mpn_rsh1add_n (np, pp, np, n);\n#else\n    mpn_add_n (np, pp, np, n);\n    mpn_rshift (np, np, n, 1);\n#endif\n  }\n\n#ifdef HAVE_NATIVE_mpn_rsh1sub_n\n  if (ps == 1)\n    mpn_rsh1sub_n (pp, pp, np, n);\n  else\n#endif\n  {\n    mpn_sub_n (pp, pp, np, n);\n    if (ps > 0)\n      mpn_rshift (pp, pp, n, ps);\n  }\n  if (ns > 0)\n    mpn_rshift (np, np, n, ns);\n  pp[n] = mpn_add_n (pp+off, pp+off, np, n-off);\n  ASSERT_NOCARRY (mpn_add_1(pp+n, np+n-off, off, pp[n]) );\n}\n"
  },
  {
    "path": "mpn/generic/toom_eval_dgr3_pm1.c",
    "content": "/* mpn_toom_eval_dgr3_pm1 -- Evaluate a degree 3 polynomial in +1 and -1\n\n   Contributed to the GNU project by Niels Möller\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2009 Free Software Foundation, Inc.\nCopyright 2011 Jason Moxham\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpn_toom_eval_dgr3_pm1 (mp_ptr xp1, mp_ptr xm1,\n\t\t\tmp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)\n{\n  int neg;\n\n  ASSERT (x3n > 0);\n  ASSERT (x3n <= n);\n\n  xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);\n  tp[n] = mpn_add (tp, xp + n, n, xp + 3*n, x3n);\n\n  neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;\n\n#if HAVE_NATIVE_mpn_sumdiff_n\n  if (neg)\n    mpn_sumdiff_n (xp1, xm1, tp, xp1, n + 1);\n  else\n    mpn_sumdiff_n (xp1, xm1, xp1, tp, n + 1);\n#else\n  if (neg)\n    mpn_sub_n (xm1, tp, xp1, n + 1);\n  else\n    mpn_sub_n (xm1, xp1, tp, n + 1);\n\n  mpn_add_n (xp1, xp1, tp, n + 1);\n#endif\n\n  ASSERT (xp1[n] <= 3);\n  ASSERT (xm1[n] <= 1);\n\n  return neg;\n}\n"
  },
  {
    "path": "mpn/generic/toom_eval_dgr3_pm2.c",
    "content": "/* mpn_toom_eval_dgr3_pm2 -- Evaluate a degree 3 polynomial in +2 and -2\n\n   Contributed to the GNU project by Niels Möller\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* Needs n+1 limbs of temporary storage. */\nint\nmpn_toom_eval_dgr3_pm2 (mp_ptr xp2, mp_ptr xm2,\n\t\t\tmp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)\n{\n  mp_limb_t cy;\n  int neg;\n\n  ASSERT (x3n > 0);\n  ASSERT (x3n <= n);\n\n  /* (x0 + 4 * x2) +/- (2 x1 + 8 x_3) */\n#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n\n#if HAVE_NATIVE_mpn_addlsh2_n\n  xp2[n] = mpn_addlsh2_n (xp2, xp, xp + 2*n, n);\n\n  cy = mpn_addlsh2_n (tp, xp + n, xp + 3*n, x3n);\n#else /* HAVE_NATIVE_mpn_addlsh_n */\n  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2);\n\n  cy = mpn_addlsh_n (tp, xp + n, xp + 3*n, x3n, 2);\n#endif\n  if (x3n < n)\n    cy = mpn_add_1 (tp + x3n, xp + n + x3n, n - x3n, cy);\n  tp[n] = cy;\n#else\n  cy = mpn_lshift (tp, xp + 2*n, n, 2);\n  xp2[n] = cy + mpn_add_n (xp2, tp, xp, n);\n\n  tp[x3n] = mpn_lshift (tp, xp + 3*n, x3n, 2);\n  if (x3n < n)\n    tp[n] = mpn_add (tp, xp + n, n, tp, x3n + 1);\n  else\n    tp[n] += mpn_add_n (tp, xp + n, tp, n);\n#endif\n  mpn_lshift (tp, tp, n+1, 1);\n\n  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;\n\n#if HAVE_NATIVE_mpn_sumdiff_n\n  if (neg)\n    mpn_sumdiff_n (xp2, xm2, tp, xp2, n + 1);\n  else\n    mpn_sumdiff_n (xp2, xm2, xp2, tp, n + 1);\n#else\n  if (neg)\n    mpn_sub_n (xm2, tp, xp2, n + 1);\n  else\n    mpn_sub_n (xm2, xp2, tp, n + 1);\n\n  mpn_add_n (xp2, xp2, tp, n + 1);\n#endif\n\n  ASSERT (xp2[n] < 15);\n  ASSERT (xm2[n] < 10);\n\n  return neg;\n}\n"
  },
  {
    "path": "mpn/generic/toom_eval_pm1.c",
    "content": "/* toom_eval_pm1\n\nCopyright 2011 The Code Cavern\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n// k degree poly so have k+1 coeffs and first k are size n\n// k>3 so we can do the first add unconditionally \nint\tmpn_toom_eval_pm1(mp_ptr pp,mp_ptr mp,unsigned int k,mp_srcptr xp,mp_size_t n,mp_size_t m,mp_ptr tp)\n{int isneg=0;unsigned int i;\n\nASSERT(k>3);ASSERT(n>=m);ASSERT(m>0);ASSERT_MPN(xp,n*k+m);\n//ASSERT_SPACE(pp,n+1);ASSERT_SPACE(mp,n+1);ASSERT_SPACE(tp,n+1);\nASSERT(!MPN_OVERLAP_P(pp,n+1,mp,n+1));ASSERT(!MPN_OVERLAP_P(pp,n+1,xp,n*k+m));ASSERT(!MPN_OVERLAP_P(pp,n+1,tp,n+1));\nASSERT(!MPN_OVERLAP_P(mp,n+1,xp,n*k+m));ASSERT(!MPN_OVERLAP_P(xp,n*k+m,tp,n+1));\n#if ! HAVE_NATIVE_mpn_sumdiff_n\nASSERT(!MPN_OVERLAP_P(mp,n+1,tp,n+1));\n#endif\n#if HAVE_NATIVE_mpn_addadd_n\nif(k==4){pp[n]=mpn_add_n(pp,xp,xp+2*n,n);tp[n]=mpn_add_n(tp,xp+n,xp+3*n,n);}else\nif(k==5){pp[n]=mpn_addadd_n(pp,xp,xp+2*n,xp+4*n,n);tp[n]=mpn_add_n(tp,xp+n,xp+3*n,n);}else\n  {pp[n]=mpn_addadd_n(pp,xp,xp+2*n,xp+4*n,n);tp[n]=mpn_addadd_n(tp,xp+n,xp+3*n,xp+5*n,n);\n   for(i=7;i<k-2;i+=4){pp[n]+=mpn_addadd_n(pp,pp,xp+(i-1)*n,xp+(i+1)*n,n);tp[n]+=mpn_addadd_n(tp,tp,xp+i*n,xp+(i+2)*n,n);}\n   if(k%4==3){pp[n]+=mpn_add_n(pp,pp,xp+(k-1)*n,n);}\n   if(k%4==0){pp[n]+=mpn_add_n(pp,pp,xp+(k-2)*n,n);tp[n]+=mpn_add_n(tp,tp,xp+(k-1)*n,n);}\n   if(k%4==1){pp[n]+=mpn_addadd_n(pp,pp,xp+(k-3)*n,xp+(k-1)*n,n);tp[n]+=mpn_add_n(tp,tp,xp+(k-2)*n,n);}}\nif(k%2==0){pp[n]+=mpn_add(pp,pp,n,xp+k*n,m);}else{tp[n]+=mpn_add(tp,tp,n,xp+k*n,m);}\n#else\n// pp is xp+0 xp+2n xp+4n xp+6n ... xp+jn where j<=k-1\n// mp is xp+1 xp+3n xp+5n xp+7n ... xp+jn where j<=k-1\npp[n]=mpn_add_n(pp,xp,xp+2*n,n);tp[n]=mpn_add_n(tp,xp+n,xp+3*n,n);\nfor(i=5;i<k;i+=2){pp[n]+=mpn_add_n(pp,pp,xp+(i-1)*n,n);tp[n]+=mpn_add_n(tp,tp,xp+i*n,n);}\nif(k%2==1){pp[n]+=mpn_add_n(pp,pp,xp+(k-1)*n,n);tp[n]+=mpn_add(tp,tp,n,xp+k*n,m);}else{pp[n]+=mpn_add(pp,pp,n,xp+k*n,m);}\n#endif\nif(mpn_cmp(tp,pp,n+1)>0)isneg=-1;\n#if HAVE_NATIVE_mpn_sumdiff_n\nif(isneg==0){mpn_sumdiff_n(pp,mp,pp,tp,n+1);}else{mpn_sumdiff_n(pp,mp,tp,pp,n+1);}\n#else\nif(isneg==0){mpn_sub_n(mp,pp,tp,n+1);}else{mpn_sub_n(mp,tp,pp,n+1);}\nmpn_add_n(pp,pp,tp,n+1);  \n#endif\nreturn isneg;}\n"
  },
  {
    "path": "mpn/generic/toom_eval_pm2.c",
    "content": "/* mpn_toom_eval_pm2 -- Evaluate a polynomial in +2 and -2\n\n   Contributed to the GNU project by Niels Möller and Marco Bodrato\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* DO_addlsh2(d,a,b,n,cy) computes cy,{d,n} <- {a,n} + 4*(cy,{b,n}), it\n   can be used as DO_addlsh2(d,a,d,n,d[n]), for accumulation on {d,n+1}. */\n#if HAVE_NATIVE_mpn_addlsh2_n\n#define DO_addlsh2(d, a, b, n, cy)\t\\\ndo {\t\t\t\t\t\\\n  (cy) <<= 2;\t\t\t\t\\\n  (cy) += mpn_addlsh2_n(d, a, b, n);\t\\\n} while (0)\n#else\n#if HAVE_NATIVE_mpn_addlsh_n\n#define DO_addlsh2(d, a, b, n, cy)\t\\\ndo {\t\t\t\t\t\\\n  (cy) <<= 2;\t\t\t\t\\\n  (cy) += mpn_addlsh_n(d, a, b, n, 2);\t\\\n} while (0)\n#else\n/* The following is not a general substitute for addlsh2.\n   It is correct if d == b, but it is not if d == a.\t*/\n#define DO_addlsh2(d, a, b, n, cy)\t\\\ndo {\t\t\t\t\t\\\n  (cy) <<= 2;\t\t\t\t\\\n  (cy) += mpn_lshift(d, b, n, 2);\t\\\n  (cy) += mpn_add_n(d, d, a, n);\t\\\n} while (0)\n#endif\n#endif\n\n/* Evaluates a polynomial of degree 2 < k < GMP_NUMB_BITS, in the\n   points +2 and -2. */\nint\nmpn_toom_eval_pm2 (mp_ptr xp2, mp_ptr xm2, unsigned k,\n\t\t   mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)\n{\n  int i;\n  int neg;\n  mp_limb_t cy;\n\n  ASSERT (k >= 3);\n  ASSERT (k < GMP_NUMB_BITS);\n\n  ASSERT (hn > 0);\n  ASSERT (hn <= n);\n\n  /* The degree k is also the number of full-size coefficients, so\n   * that last coefficient, of size hn, starts at xp + k*n. */\n\n  cy = 0;\n  DO_addlsh2 (xp2, xp + (k-2) * n, xp + k * n, hn, cy);\n  if (hn != n)\n    cy = mpn_add_1 (xp2 + hn, xp + (k-2) * n + hn, n - hn, cy);\n  for (i = k - 4; i >= 0; i -= 2)\n    DO_addlsh2 (xp2, xp + i * n, xp2, n, cy);\n  xp2[n] = cy;\n\n  k--;\n\n  cy = 0;\n  DO_addlsh2 (tp, xp + (k-2) * n, xp + k * n, n, cy);\n  for (i = k - 4; i >= 0; i -= 2)\n    DO_addlsh2 (tp, xp + i * n, tp, n, cy);\n  tp[n] = cy;\n\n  if (k & 1)\n    ASSERT_NOCARRY(mpn_lshift (tp , tp , n + 1, 1));\n  else\n    ASSERT_NOCARRY(mpn_lshift (xp2, xp2, n + 1, 1));\n\n  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;\n\n#if HAVE_NATIVE_mpn_sumdiff_n\n  if (neg)\n    mpn_sumdiff_n (xp2, xm2, tp, xp2, n + 1);\n  else\n    mpn_sumdiff_n (xp2, xm2, xp2, tp, n + 1);\n#else \n  if (neg)\n    mpn_sub_n (xm2, tp, xp2, n + 1);\n  else\n    mpn_sub_n (xm2, xp2, tp, n + 1);\n\n  mpn_add_n (xp2, xp2, tp, n + 1);\n#endif\n\n  ASSERT (xp2[n] < (1<<(k+2))-1);\n  ASSERT (xm2[n] < ((1<<(k+3))-1 - (1^k&1))/3);\n\n  neg ^= ((k & 1) - 1);\n\n  return neg;\n}\n\n#undef DO_addlsh2\n"
  },
  {
    "path": "mpn/generic/toom_eval_pm2exp.c",
    "content": "/* mpn_toom_eval_pm2exp -- Evaluate a polynomial in +2^k and -2^k\n\n   Contributed to the GNU project by Niels Möller\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* Evaluates a polynomial of degree k > 2, in the points +2^shift and -2^shift. */\nint\nmpn_toom_eval_pm2exp (mp_ptr xp2, mp_ptr xm2, unsigned k,\n\t\t      mp_srcptr xp, mp_size_t n, mp_size_t hn, unsigned shift,\n\t\t      mp_ptr tp)\n{\n  unsigned i;\n  int neg;\n#if HAVE_NATIVE_mpn_addlsh_n\n  mp_limb_t cy;\n#endif\n\n  ASSERT (k >= 3);\n  ASSERT (shift*k < GMP_NUMB_BITS);\n\n  ASSERT (hn > 0);\n  ASSERT (hn <= n);\n\n  /* The degree k is also the number of full-size coefficients, so\n   * that last coefficient, of size hn, starts at xp + k*n. */\n\n#if HAVE_NATIVE_mpn_addlsh_n\n  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2*shift);\n  for (i = 4; i < k; i += 2)\n    xp2[n] += mpn_addlsh_n (xp2, xp2, xp + i*n, n, i*shift);\n\n  tp[n] = mpn_lshift (tp, xp+n, n, shift);\n  for (i = 3; i < k; i+= 2)\n    tp[n] += mpn_addlsh_n (tp, tp, xp+i*n, n, i*shift);\n\n  if (k & 1)\n    {\n      cy = mpn_addlsh_n (tp, tp, xp+k*n, hn, k*shift);\n      MPN_INCR_U (tp + hn, n+1 - hn, cy);\n    }\n  else\n    {\n      cy = mpn_addlsh_n (xp2, xp2, xp+k*n, hn, k*shift);\n      MPN_INCR_U (xp2 + hn, n+1 - hn, cy);\n    }\n\n#else /* !HAVE_NATIVE_mpn_addlsh_n */\n  xp2[n] = mpn_lshift (tp, xp+2*n, n, 2*shift);\n  xp2[n] += mpn_add_n (xp2, xp, tp, n);\n  for (i = 4; i < k; i += 2)\n    {\n      xp2[n] += mpn_lshift (tp, xp + i*n, n, i*shift);\n      xp2[n] += mpn_add_n (xp2, xp2, tp, n);\n    }\n\n  tp[n] = mpn_lshift (tp, xp+n, n, shift);\n  for (i = 3; i < k; i+= 2)\n    {\n      tp[n] += mpn_lshift (xm2, xp + i*n, n, i*shift);\n      tp[n] += mpn_add_n (tp, tp, xm2, n);\n    }\n\n  xm2[hn] = mpn_lshift (xm2, xp + k*n, hn, k*shift);\n  if (k & 1)\n    mpn_add (tp, tp, n+1, xm2, hn+1);\n  else\n    mpn_add (xp2, xp2, n+1, xm2, hn+1);\n#endif /* !HAVE_NATIVE_mpn_addlsh_n */\n\n  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;\n\n#if HAVE_NATIVE_mpn_sumdiff_n\n  if (neg)\n    mpn_sumdiff_n (xp2, xm2, tp, xp2, n + 1);\n  else\n    mpn_sumdiff_n (xp2, xm2, xp2, tp, n + 1);\n#else \n  if (neg)\n    mpn_sub_n (xm2, tp, xp2, n + 1);\n  else\n    mpn_sub_n (xm2, xp2, tp, n + 1);\n\n  mpn_add_n (xp2, xp2, tp, n + 1);\n#endif\n\n  /* FIXME: the following asserts are useless if (k+1)*shift >= GMP_LIMB_BITS */\n  ASSERT ((k+1)*shift >= GMP_LIMB_BITS ||\n\t  xp2[n] < ((CNST_LIMB(1)<<((k+1)*shift))-1)/((CNST_LIMB(1)<<shift)-1));\n  ASSERT ((k+2)*shift >= GMP_LIMB_BITS ||\n\t  xm2[n] < ((CNST_LIMB(1)<<((k+2)*shift))-((k&1)?(CNST_LIMB(1)<<shift):1))/((CNST_LIMB(1)<<(2*shift))-1));\n\n  return neg;\n}\n"
  },
  {
    "path": "mpn/generic/toom_eval_pm2rexp.c",
    "content": "/* mpn_toom_eval_pm2rexp -- Evaluate a polynomial in +2^-k and -2^-k\n\n   Contributed to the GNU project by Marco Bodrato\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if HAVE_NATIVE_mpn_addlsh_n\n#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)\n#else\nstatic mp_limb_t\nDO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)\n{\n#if USE_MUL_1 && 0\n  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));\n#else\n  mp_limb_t __cy;\n  __cy = mpn_lshift(ws,src,n,s);\n  return    __cy + mpn_add_n(dst,dst,ws,n);\n#endif\n}\n#endif\n\n/* Evaluates a polynomial of degree k >= 3. */\nint\nmpn_toom_eval_pm2rexp (mp_ptr rp, mp_ptr rm,\n\t\t      unsigned int q, mp_srcptr ap, mp_size_t n, mp_size_t t,\n\t\t      unsigned int s, mp_ptr ws)\n{\n  unsigned int i;\n  int neg;\n  /* {ap,q*n+t} -> {rp,n+1} {rm,n+1} , with {ws, n+1}*/\n  ASSERT (n >= t);\n  ASSERT (s != 0); /* or _eval_pm1 should be used */\n  ASSERT (q > 1);\n  ASSERT (s*q < GMP_NUMB_BITS);\n  rp[n] = mpn_lshift(rp, ap, n, s*q);\n  ws[n] = mpn_lshift(ws, ap+n, n, s*(q-1));\n  if( (q & 1) != 0) {\n    ASSERT_NOCARRY(mpn_add(ws,ws,n+1,ap+n*q,t));\n    rp[n] += DO_mpn_addlsh_n(rp, ap+n*(q-1), n, s, rm);\n  } else {\n    ASSERT_NOCARRY(mpn_add(rp,rp,n+1,ap+n*q,t));\n  }\n  for(i=2; i<q-1; i++)\n  {\n    rp[n] += DO_mpn_addlsh_n(rp, ap+n*i, n, s*(q-i), rm);\n    i++;\n    ws[n] += DO_mpn_addlsh_n(ws, ap+n*i, n, s*(q-i), rm);\n  };\n\n  neg = (mpn_cmp (rp, ws, n + 1) < 0) ? ~0 : 0;\n\n#if HAVE_NATIVE_mpn_sumdiff_n\n  if (neg)\n    mpn_sumdiff_n (rp, rm, ws, rp, n + 1);\n  else\n    mpn_sumdiff_n (rp, rm, rp, ws, n + 1);\n#else \n  if (neg)\n    mpn_sub_n (rm, ws, rp, n + 1);\n  else\n    mpn_sub_n (rm, rp, ws, n + 1);\n\n  ASSERT_NOCARRY (mpn_add_n (rp, rp, ws, n + 1));\n#endif\n\n  return neg;\n}\n"
  },
  {
    "path": "mpn/generic/toom_interpolate_16pts.c",
    "content": "/* Interpolaton for the algorithm Toom-Cook 8.5-way.\n\n   Contributed to the GNU project by Marco Bodrato.\n\n   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY\n   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2009, 2010, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if GMP_NUMB_BITS < 29\n#error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB.\n#endif\n\n#if GMP_NUMB_BITS < 28\n#error Not implemented: divexact_by188513325 and _by182712915 will not work.\n#endif\n\n\n#if HAVE_NATIVE_mpn_sublsh_n\n#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)\n#else\nstatic mp_limb_t\nDO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)\n{\n#if USE_MUL_1 && 0\n  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));\n#else\n  mp_limb_t __cy;\n  __cy = mpn_lshift(ws,src,n,s);\n  return    __cy + mpn_sub_n(dst,dst,ws,n);\n#endif\n}\n#endif\n\n#if HAVE_NATIVE_mpn_addlsh_n\n#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)\n#else\nstatic mp_limb_t\nDO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)\n{\n#if USE_MUL_1 && 0\n  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));\n#else\n  mp_limb_t __cy;\n  __cy = mpn_lshift(ws,src,n,s);\n  return    __cy + mpn_add_n(dst,dst,ws,n);\n#endif\n}\n#endif\n\n#if HAVE_NATIVE_mpn_subrsh\n#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)\n#else\n/* FIXME: This is not a correct definition, it assumes no carry */\n#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)\t\t\t\t\\\ndo {\t\t\t\t\t\t\t\t\t\\\n  mp_limb_t __cy;\t\t\t\t\t\t\t\\\n  MPN_DECR_U (dst, nd, src[0] >> s);\t\t\t\t\t\\\n  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);\t\\\n  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);\t\t\t\t\\\n} while (0)\n#endif\n\n\n/* FIXME: tuneup should decide the best variant */\n#ifndef AORSMUL_FASTER_AORS_AORSLSH\n#define AORSMUL_FASTER_AORS_AORSLSH 1\n#endif\n#ifndef AORSMUL_FASTER_AORS_2AORSLSH\n#define AORSMUL_FASTER_AORS_2AORSLSH 1\n#endif\n#ifndef AORSMUL_FASTER_2AORSLSH\n#define AORSMUL_FASTER_2AORSLSH 1\n#endif\n#ifndef AORSMUL_FASTER_3AORSLSH\n#define AORSMUL_FASTER_3AORSLSH 1\n#endif\n\n#if GMP_NUMB_BITS < 43\n#define BIT_CORRECTION 1\n#define CORRECTION_BITS GMP_NUMB_BITS\n#else\n#define BIT_CORRECTION 0\n#define CORRECTION_BITS 0\n#endif\n\n#define BINVERT_9 \\\n  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)\n\n#define BINVERT_255 \\\n  (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))\n\n  /* FIXME: find some more general expressions for inverses */\n#if GMP_LIMB_BITS == 32\n#define BINVERT_2835  (GMP_NUMB_MASK &\t\tCNST_LIMB(0x53E3771B))\n#define BINVERT_42525 (GMP_NUMB_MASK &\t\tCNST_LIMB(0x9F314C35))\n#define BINVERT_182712915 (GMP_NUMB_MASK &\tCNST_LIMB(0x550659DB))\n#define BINVERT_188513325 (GMP_NUMB_MASK &\tCNST_LIMB(0xFBC333A5))\n#define BINVERT_255x182712915L (GMP_NUMB_MASK &\tCNST_LIMB(0x6FC4CB25))\n#define BINVERT_255x188513325L (GMP_NUMB_MASK &\tCNST_LIMB(0x6864275B))\n#if GMP_NAIL_BITS == 0\n#define BINVERT_255x182712915H CNST_LIMB(0x1B649A07)\n#define BINVERT_255x188513325H CNST_LIMB(0x06DB993A)\n#else /* GMP_NAIL_BITS != 0 */\n#define BINVERT_255x182712915H \\\n  (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS)))\n#define BINVERT_255x188513325H \\\n  (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS)))\n#endif\n#else\n#if GMP_LIMB_BITS == 64\n#define BINVERT_2835  (GMP_NUMB_MASK &\tCNST_LIMB(0x938CC70553E3771B))\n#define BINVERT_42525 (GMP_NUMB_MASK &\tCNST_LIMB(0xE7B40D449F314C35))\n#define BINVERT_255x182712915  (GMP_NUMB_MASK &\tCNST_LIMB(0x1B649A076FC4CB25))\n#define BINVERT_255x188513325  (GMP_NUMB_MASK &\tCNST_LIMB(0x06DB993A6864275B))\n#endif\n#endif\n\n#ifndef mpn_divexact_by255\n#if GMP_NUMB_BITS % 8 == 0\n#define mpn_divexact_by255(dst,src,size) \\\n  (255 & 1 * mpn_divexact_byfobm1(dst, src, size, 255, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))\n#else\n#if HAVE_NATIVE_mpn_pi1_bdiv_q_1\n#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)\n#else\n#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))\n#endif\n#endif\n#endif\n\n#ifndef mpn_divexact_by255x4\n#if HAVE_NATIVE_mpn_pi1_bdiv_q_1\n#define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2)\n#else\n#define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2)\n#endif\n#endif\n\n#ifndef mpn_divexact_by9x16\n#if HAVE_NATIVE_mpn_pi1_bdiv_q_1\n#define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4)\n#else\n#define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4)\n#endif\n#endif\n\n#ifndef mpn_divexact_by42525x16\n#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)\n#define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4)\n#else\n#define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4)\n#endif\n#endif\n\n#ifndef mpn_divexact_by2835x64\n#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)\n#define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6)\n#else\n#define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6)\n#endif\n#endif\n\n#ifndef  mpn_divexact_by255x182712915\n#if GMP_NUMB_BITS < 36\n#if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H)\n/* FIXME: use mpn_bdiv_q_2_pi2 */\n#endif\n#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915)\n#define mpn_divexact_by255x182712915(dst,src,size)\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\t\\\n    mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0);\t\\\n    mpn_divexact_by255(dst,dst,size);\t\t\t\t\t\t\\\n  } while(0)\n#else\n#define mpn_divexact_by255x182712915(dst,src,size)\t\\\n  do {\t\t\t\t\t\t\t\\\n    mpn_divexact_1(dst,src,size,CNST_LIMB(182712915));\t\\\n    mpn_divexact_by255(dst,dst,size);\t\t\t\\\n  } while(0)\n#endif\n#else /* GMP_NUMB_BITS > 35 */\n#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915)\n#define mpn_divexact_by255x182712915(dst,src,size) \\\n  mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0)\n#else\n#define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915))\n#endif\n#endif /* GMP_NUMB_BITS >?< 36 */\n#endif\n\n#ifndef  mpn_divexact_by255x188513325\n#if GMP_NUMB_BITS < 36\n#if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H)\n/* FIXME: use mpn_bdiv_q_1_pi2 */\n#endif\n#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325)\n#define mpn_divexact_by255x188513325(dst,src,size)\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0);\t\\\n    mpn_divexact_by255(dst,dst,size);\t\t\t\t\t\\\n  } while(0)\n#else\n#define mpn_divexact_by255x188513325(dst,src,size)\t\\\n  do {\t\t\t\t\t\t\t\\\n    mpn_divexact_1(dst,src,size,CNST_LIMB(188513325));\t\\\n    mpn_divexact_by255(dst,dst,size);\t\t\t\\\n  } while(0)\n#endif\n#else /* GMP_NUMB_BITS > 35 */\n#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325)\n#define mpn_divexact_by255x188513325(dst,src,size) \\\n  mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0)\n#else\n#define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325))\n#endif\n#endif /* GMP_NUMB_BITS >?< 36 */\n#endif\n\n/* Interpolation for Toom-8.5 (or Toom-8), using the evaluation\n   points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2,\n   +-1/8, 0. More precisely, we want to compute\n   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or\n   14), given the 16 (rsp. 15) values:\n\n     r0 = limit at infinity of f(x) / x^7,\n     r1 = f(8),f(-8),\n     r2 = f(4),f(-4),\n     r3 = f(2),f(-2),\n     r4 = f(1),f(-1),\n     r5 = f(1/4),f(-1/4),\n     r6 = f(1/2),f(-1/2),\n     r7 = f(1/8),f(-1/8),\n     r8 = f(0).\n\n   All couples of the form f(n),f(-n) must be already mixed with\n   toom_couple_handling(f(n),...,f(-n),...)\n\n   The result is stored in {pp, spt + 7*n (or 8*n)}.\n   At entry, r8 is stored at {pp, 2n},\n   r6 is stored at {pp + 3n, 3n + 1}.\n   r4 is stored at {pp + 7n, 3n + 1}.\n   r2 is stored at {pp +11n, 3n + 1}.\n   r0 is stored at {pp +15n, spt}.\n\n   The other values are 3n+1 limbs each (with most significant limbs small).\n\n   Negative intermediate results are stored two-complemented.\n   Inputs are destroyed.\n*/\n\nvoid\nmpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7,\n\t\t\tmp_size_t n, mp_size_t spt, int half, mp_ptr wsi)\n{\n  mp_limb_t cy;\n  mp_size_t n3;\n  mp_size_t n3p1;\n  n3 = 3 * n;\n  n3p1 = n3 + 1;\n\n#define   r6    (pp + n3)\t\t\t/* 3n+1 */\n#define   r4    (pp + 7 * n)\t\t\t/* 3n+1 */\n#define   r2    (pp +11 * n)\t\t\t/* 3n+1 */\n#define   r0    (pp +15 * n)\t\t\t/* s+t <= 2*n */\n\n  ASSERT( spt <= 2 * n );\n  /******************************* interpolation *****************************/\n  if( half != 0) {\n    cy = mpn_sub_n (r4, r4, r0, spt);\n    MPN_DECR_U (r4 + spt, n3p1 - spt, cy);\n\n    cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi);\n    MPN_DECR_U (r3 + spt, n3p1 - spt, cy);\n    DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi);\n\n    cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi);\n    MPN_DECR_U (r2 + spt, n3p1 - spt, cy);\n    DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi);\n\n    cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi);\n    //MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy);\n#if BIT_CORRECTION\n    cy = mpn_sub_1 (r1 + spt + BIT_CORRECTION, r1 + spt + BIT_CORRECTION,\n                    n3p1 - spt - BIT_CORRECTION, cy);\n    ASSERT (BIT_CORRECTION > 0 || cy == 0);\n    /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */\n    cy = r7[n3p1];\n    r7[n3p1] = 0x80;\n#else\n    MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy);\n#endif\n    DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi);\n#if BIT_CORRECTION\n    /* FIXME: assumes r7[n3p1] is writable. */\n    ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 );\n    r7[n3p1] = cy;\n#endif\n  };\n\n  r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi);\n  DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi);\n\n#if HAVE_NATIVE_mpn_sumdiff_n\n  mpn_sumdiff_n (r2, wsi, r5, r2, n3p1);\n  MP_PTR_SWAP(r5,wsi);\n#else\n  mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */\n  ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));\n  MP_PTR_SWAP(r5, wsi);\n#endif\n\n  r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi);\n  DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi);\n\n#if HAVE_NATIVE_mpn_sumdiff_n\n  mpn_sumdiff_n (wsi, r6, r6, r3, n3p1);\n  MP_PTR_SWAP(r3, wsi);\n#else\n  ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1));\n  mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */\n  MP_PTR_SWAP(r3, wsi);\n#endif\n\n  r7[n3] -= DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi)\n\t    * (1-BIT_CORRECTION); /* if BIT_CORRECTION != 0, discard the carry. */\n#if BIT_CORRECTION\n  MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6);\n  cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi);\n  cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy);\n  ASSERT ( BIT_CORRECTION > 0 || cy != 0 );\n#else\n  DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi);\n#endif\n\n#if HAVE_NATIVE_mpn_sumdiff_n\n  mpn_sumdiff_n (r1, wsi, r7, r1, n3p1);\n  MP_PTR_SWAP(r7, wsi);\n#else\n  mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */\n  mpn_add_n (r1, r1, r7, n3p1);  /* if BIT_CORRECTION != 0, can give a carry. */\n  MP_PTR_SWAP(r7, wsi);\n#endif\n\n  r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n);\n\n#if AORSMUL_FASTER_2AORSLSH\n  mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */\n#else\n  DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */\n  DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */\n#endif\n\n  mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */\n#if AORSMUL_FASTER_3AORSLSH\n  mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */\n#else\n  DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */\n  DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */\n  DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */\n#endif\n  mpn_divexact_by255x188513325(r7, r7, n3p1);\n\n  mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */\n  /* A division by 2835x64 followsi. Warning: the operand can be negative! */\n  mpn_divexact_by2835x64(r5, r5, n3p1);\n  if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0)\n    r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6));\n\n#if AORSMUL_FASTER_AORS_AORSLSH\n  mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */\n#else\n  mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */\n  DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */\n#endif\n#if AORSMUL_FASTER_2AORSLSH\n  mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */\n#else\n  DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */\n  DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */\n#endif\n  /* A division by 255x4 followsi. Warning: the operand can be negative! */\n  mpn_divexact_by255x4(r6, r6, n3p1);\n  if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)\n    r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));\n\n  ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi));\n\n  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi));\n  ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400));\n\n  /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/\n  DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi);\n  mpn_submul_1 (r1, r2, n3p1, 1428);\n  mpn_submul_1 (r1, r3, n3p1, 112896);\n  mpn_divexact_by255x182712915(r1, r1, n3p1);\n\n  ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425));\n  mpn_divexact_by42525x16(r2, r2, n3p1);\n\n#if AORSMUL_FASTER_AORS_2AORSLSH\n  ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969));\n#else\n  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));\n  ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi));\n  ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi));\n#endif\n  ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900));\n  mpn_divexact_by9x16(r3, r3, n3p1);\n\n  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1));\n  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1));\n  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1));\n\n  mpn_add_n (r6, r2, r6, n3p1);\n  ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1));\n  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1));\n\n  mpn_sub_n (r5, r3, r5, n3p1);\n  ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));\n  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1));\n\n  mpn_add_n (r7, r1, r7, n3p1);\n  ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1));\n  ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1));\n\n  /* last interpolation steps... */\n  /* ... could be mixed with recomposition\n\t||H-r7|M-r7|L-r7|   ||H-r5|M-r5|L-r5|\n  */\n\n  /***************************** recomposition *******************************/\n  /*\n    pp[] prior to operations:\n    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp\n\n    summation scheme for remaining operations:\n    |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp\n    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp\n\t||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|   ||H r7|M r7|L r7|\n  */\n\n  cy = mpn_add_n (pp + n, pp + n, r7, n);\n  cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy);\n#if HAVE_NATIVE_mpn_add_nc\n  cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy);\n#else\n  MPN_INCR_U (r7 + 2 * n, n + 1, cy);\n  cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n);\n#endif\n  MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy);\n\n  pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n);\n  cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]);\n#if HAVE_NATIVE_mpn_add_nc\n  cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy);\n#else\n  MPN_INCR_U (r5 + 2 * n, n + 1, cy);\n  cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n);\n#endif\n  MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);\n\n  pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n);\n  cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]);\n#if HAVE_NATIVE_mpn_add_nc\n  cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy);\n#else\n  MPN_INCR_U (r3 + 2 * n, n + 1, cy);\n  cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n);\n#endif\n  MPN_INCR_U (pp +12 * n, 2 * n + 1, cy);\n\n  pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n);\n  if ( half ) {\n    cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]);\n#if HAVE_NATIVE_mpn_add_nc\n    if(LIKELY(spt > n)) {\n      cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy);\n      MPN_INCR_U (pp + 16 * n, spt - n, cy);\n    } else {\n      ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy));\n    }\n#else\n    MPN_INCR_U (r1 + 2 * n, n + 1, cy);\n    if(LIKELY(spt > n)) {\n      cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n);\n      MPN_INCR_U (pp + 16 * n, spt - n, cy);\n    } else {\n      ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt));\n    }\n#endif\n  } else {\n    ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n]));\n  }\n\n#undef   r0\n#undef   r2\n#undef   r4\n#undef   r6\n}\n"
  },
  {
    "path": "mpn/generic/udiv_w_sdiv.c",
    "content": "/* mpn_udiv_w_sdiv -- implement udiv_qrnnd on machines with only signed\n   division.\n\n   Contributed by Peter L. Montgomery.\n\n   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY SAFE\n   TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS\n   ALMOST GUARANTEED THAT THIS FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE\n   GNU MP RELEASE.\n\n\nCopyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmp_limb_t\nmpn_udiv_w_sdiv (rp, a1, a0, d)\n     mp_limb_t *rp, a1, a0, d;\n{\n  mp_limb_t q, r;\n  mp_limb_t c0, c1, b1;\n\n  ASSERT (d != 0);\n  ASSERT (a1 < d);\n\n  if ((mp_limb_signed_t) d >= 0)\n    {\n      if (a1 < d - a1 - (a0 >> (BITS_PER_MP_LIMB - 1)))\n\t{\n\t  /* dividend, divisor, and quotient are nonnegative */\n\t  sdiv_qrnnd (q, r, a1, a0, d);\n\t}\n      else\n\t{\n\t  /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */\n\t  sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (BITS_PER_MP_LIMB - 1));\n\t  /* Divide (c1*2^32 + c0) by d */\n\t  sdiv_qrnnd (q, r, c1, c0, d);\n\t  /* Add 2^31 to quotient */\n\t  q += (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);\n\t}\n    }\n  else\n    {\n      b1 = d >> 1;\t\t\t/* d/2, between 2^30 and 2^31 - 1 */\n      c1 = a1 >> 1;\t\t\t/* A/2 */\n      c0 = (a1 << (BITS_PER_MP_LIMB - 1)) + (a0 >> 1);\n\n      if (a1 < b1)\t\t\t/* A < 2^32*b1, so A/2 < 2^31*b1 */\n\t{\n\t  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */\n\n\t  r = 2*r + (a0 & 1);\t\t/* Remainder from A/(2*b1) */\n\t  if ((d & 1) != 0)\n\t    {\n\t      if (r >= q)\n\t\tr = r - q;\n\t      else if (q - r <= d)\n\t\t{\n\t\t  r = r - q + d;\n\t\t  q--;\n\t\t}\n\t      else\n\t\t{\n\t\t  r = r - q + 2*d;\n\t\t  q -= 2;\n\t\t}\n\t    }\n\t}\n      else if (c1 < b1)\t\t\t/* So 2^31 <= (A/2)/b1 < 2^32 */\n\t{\n\t  c1 = (b1 - 1) - c1;\n\t  c0 = ~c0;\t\t\t/* logical NOT */\n\n\t  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */\n\n\t  q = ~q;\t\t\t/* (A/2)/b1 */\n\t  r = (b1 - 1) - r;\n\n\t  r = 2*r + (a0 & 1);\t\t/* A/(2*b1) */\n\n\t  if ((d & 1) != 0)\n\t    {\n\t      if (r >= q)\n\t\tr = r - q;\n\t      else if (q - r <= d)\n\t\t{\n\t\t  r = r - q + d;\n\t\t  q--;\n\t\t}\n\t      else\n\t\t{\n\t\t  r = r - q + 2*d;\n\t\t  q -= 2;\n\t\t}\n\t    }\n\t}\n      else\t\t\t\t/* Implies c1 = b1 */\n\t{\t\t\t\t/* Hence a1 = d - 1 = 2*b1 - 1 */\n\t  if (a0 >= -d)\n\t    {\n\t      q = -1;\n\t      r = a0 + d;\n\t    }\n\t  else\n\t    {\n\t      q = -2;\n\t      r = a0 + 2*d;\n\t    }\n\t}\n    }\n\n  *rp = r;\n  return q;\n}\n"
  },
  {
    "path": "mpn/generic/urandomb.c",
    "content": "/*\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid mpn_urandomb(mp_ptr rp, gmp_randstate_t rnd, mpir_ui n)\n{\n   _gmp_rand(rp, rnd, n);\n}\n"
  },
  {
    "path": "mpn/generic/urandomm.c",
    "content": "/*\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nvoid mpn_urandomm(mp_ptr rp, gmp_randstate_t rnd, mp_srcptr mp, mp_size_t n)\n{\n   mp_size_t c, b;\n\n   ASSERT(n > 0);\n   ASSERT_MPN(mp, n);\n   ASSERT(mp[n - 1] != 0);\n\n   count_leading_zeros(c, mp[n - 1]); /* compute number of bits of n */\n   c = GMP_LIMB_BITS - c;\n   b = GMP_NUMB_BITS*(n - 1) + c;\n\n   do\n   {\n      _gmp_rand(rp, rnd, b);\n   } while(mpn_cmp(rp, mp, n) >= 0);\nreturn;}\n"
  },
  {
    "path": "mpn/generic/xnor_n.c",
    "content": "/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#define OPERATION_xnor_n\t1\n\n#ifndef _MSC_VER\n\n#ifdef OPERATION_and_n\n#define func __MPN(and_n)\n#define call mpn_and_n\n#endif\n\n#ifdef OPERATION_andn_n\n#define func __MPN(andn_n)\n#define call mpn_andn_n\n#endif\n\n#ifdef OPERATION_nand_n\n#define func __MPN(nand_n)\n#define call mpn_nand_n\n#endif\n\n#ifdef OPERATION_ior_n\n#define func __MPN(ior_n)\n#define call mpn_ior_n\n#endif\n\n#ifdef OPERATION_iorn_n\n#define func __MPN(iorn_n)\n#define call mpn_iorn_n\n#endif\n\n#ifdef OPERATION_nior_n\n#define func __MPN(nior_n)\n#define call mpn_nior_n\n#endif\n\n#ifdef OPERATION_xor_n\n#define func __MPN(xor_n)\n#define call mpn_xor_n\n#endif\n\n#ifdef OPERATION_xnor_n\n#define func __MPN(xnor_n)\n#define call mpn_xnor_n\n#endif\n\nvoid\nfunc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  call (rp, up, vp, n);\n}\n\n#else\n\n#define _logicop(x) void __MPN(x ## _n)(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mpn_ ## x ## _n(rp, up, vp, n); }\n\n_logicop(xnor)\n\n#endif\n"
  },
  {
    "path": "mpn/generic/xor_n.c",
    "content": "/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#define OPERATION_xor_n\t1\n\n#ifndef _MSC_VER\n\n#ifdef OPERATION_and_n\n#define func __MPN(and_n)\n#define call mpn_and_n\n#endif\n\n#ifdef OPERATION_andn_n\n#define func __MPN(andn_n)\n#define call mpn_andn_n\n#endif\n\n#ifdef OPERATION_nand_n\n#define func __MPN(nand_n)\n#define call mpn_nand_n\n#endif\n\n#ifdef OPERATION_ior_n\n#define func __MPN(ior_n)\n#define call mpn_ior_n\n#endif\n\n#ifdef OPERATION_iorn_n\n#define func __MPN(iorn_n)\n#define call mpn_iorn_n\n#endif\n\n#ifdef OPERATION_nior_n\n#define func __MPN(nior_n)\n#define call mpn_nior_n\n#endif\n\n#ifdef OPERATION_xor_n\n#define func __MPN(xor_n)\n#define call mpn_xor_n\n#endif\n\n#ifdef OPERATION_xnor_n\n#define func __MPN(xnor_n)\n#define call mpn_xnor_n\n#endif\n\nvoid\nfunc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  call (rp, up, vp, n);\n}\n\n#else\n\n#define _logicop(x) void __MPN(x ## _n)(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mpn_ ## x ## _n(rp, up, vp, n); }\n\n_logicop(xor)\n\n#endif\n"
  },
  {
    "path": "mpn/generic/zero.c",
    "content": "/* mpn_zero\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpn_zero (mp_ptr rp, mp_size_t n)\n{\n  mp_size_t i;\n\n  rp += n;\n  for (i = -n; i != 0; i++)\n    rp[i] = 0;\n}\n"
  },
  {
    "path": "mpn/generic/zero_p.c",
    "content": "/* mpn_zero_p (x,xsize) -- Return 1 if X is zero, 0 if it is non-zero.\n\nCopyright 2015 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n#define __GMP_FORCE_mpn_zero_p 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpn/ia64/README",
    "content": "Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n\n\n                      IA-64 MPN SUBROUTINES\n\n\nThis directory contains mpn functions for the IA-64 architecture.\n\n\nCODE ORGANIZATION\n\n\tmpn/ia64          itanium-2, and generic ia64\n\nThe code here has been optimized primarily for Itanium 2.  Very few Itanium 1\nchips were ever sold, and Itanium 2 is more powerful, so the latter is what\nwe concentrate on.\n\n\n\nCHIP NOTES\n\nThe IA-64 ISA keeps instructions three and three in 128 bit bundles.\nProgrammers/compilers need to put explicit breaks `;;' when there are WAW or\nRAW dependencies, with some notable exceptions.  Such \"breaks\" are typically\nat the end of a bundle, but can be put between operations within some bundle\ntypes too.\n\nThe Itanium 1 and Itanium 2 implementations can under ideal conditions\nexecute two bundles per cycle.  The Itanium 2 allows 4 of these instructions\nto do integer operations, while the Itanium 2 allows all 6 to be integer\noperations.\n\nTaken cloop branches seem to insert a bubble into the pipeline most of the\ntime on Itanium 1.\n\nLoads to the fp registers bypass the L1 cache and thus get extremely long\nlatencies, 9 cycles on the Itanium 1 and 6 cycles on the Itanium 2.\n\nThe software pipeline stuff using br.ctop instruction causes delays, since\nmany issue slots are taken up by instructions with zero predicates, and\nsince many extra instructions are needed to set things up.  These features\nare clearly designed for code density, not speed.\n\nMisc pipeline limitations (Itanium 1):\n* The getf.sig instruction can only execute in M0.\n* At most four integer instructions/cycle.\n* Nops take up resources like any plain instructions.\n\nMisc pipeline limitations (Itanium 2):\n* The getf.sig instruction can only execute in M0.\n* Nops take up resources like any plain instructions.\n\n\nASSEMBLY SYNTAX\n\n.align pads with nops in a text segment, but gas 2.14 and earlier\nincorrectly byte-swaps its nop bundle in big endian mode (eg. hpux), making\nit come out as break instructions.  We use the ALIGN() macro in\nmpn/ia64/ia64-defs.m4 when it might be executed across.  That macro\nsuppresses any .align if the problem is detected by configure.  Lack of\nalignment might hurt performance but will at least be correct.\n\nfoo:: to create a global symbol is not accepted by gas.  Use separate\n\".global foo\" and \"foo:\" instead.\n\n.global is the standard global directive.  gas accepts .globl, but hpux \"as\"\ndoesn't.\n\n.proc / .endp generates the appropriate .type and .size information for ELF,\nso the latter directives don't need to be given explicitly.\n\n.pred.rel \"mutex\"... is standard for annotating predicate register\nrelationships.  gas also accepts .pred.rel.mutex, but hpux \"as\" doesn't.\n\n.pred directives can't be put on a line with a label, like\n\".Lfoo: .pred ...\", the HP assembler on HP-UX 11.23 rejects that.\ngas is happy with it, and past versions of HP had seemed ok.\n\n// is the standard comment sequence, but we prefer \"C\" since it inhibits m4\nmacro expansion.  See comments in ia64-defs.m4.\n\n\nREGISTER USAGE\n\nSpecial:\n   r0: constant 0\n   r1: global pointer (gp)\n   r8: return value\n   r12: stack pointer (sp)\n   r13: thread pointer (tp)\nCaller-saves: r8-r11 r14-r31 f6-f15 f32-f127\nCaller-saves but rotating: r32-\n\n\nREFERENCES\n\nIntel Itanium Architecture Software Developer's Manual, volumes 1 to 3,\nIntel document 245317-004, 245318-004, 245319-004 October 2002.  Volume 1\nincludes an Itanium optimization guide.\n\nIntel Itanium Processor-specific Application Binary Interface (ABI), Intel\ndocument 245370-003, May 2001.  Describes C type sizes, dynamic linking,\netc.\n\nIntel Itanium Architecture Assembly Language Reference Guide, Intel document\n248801-004, 2000-2002.  Describes assembly instruction syntax and other\ndirectives.\n\nItanium Software Conventions and Runtime Architecture Guide, Intel document\n245358-003, May 2001.  Describes calling conventions, including stack\nunwinding requirements.\n\nIntel Itanium Processor Reference Manual for Software Optimization, Intel\ndocument 245473-003, November 2001.\n\nIntel Itanium-2 Processor Reference Manual for Software Development and\nOptimization, Intel document 251110-003, May 2004.\n\nAll the above documents can be found online at\n\n    http://developer.intel.com/design/itanium/manuals.htm\n\n\n----------------\nLocal variables:\nmode: text\nfill-column: 76\nEnd:\n"
  },
  {
    "path": "mpn/ia64/add_n.asm",
    "content": "dnl  IA-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      2.67\nC Itanium 2:    1.25\n\nC TODO\nC  * Consider using special code for small n, using something like\nC    \"switch (8 * (n >= 8) + (n mod 8))\" to enter it and feed-in code.\n\nC INPUT PARAMETERS\ndefine(`rp',`r32')\ndefine(`up',`r33')\ndefine(`vp',`r34')\ndefine(`n',`r35')\n\ndefine(`OPERATION_add_n',1)\n\nifdef(`OPERATION_add_n',`\n  define(ADDSUB,\tadd)\n  define(PRED,\t\tltu)\n  define(INCR,\t\t1)\n  define(LIM,\t\t-1)\n  define(func, mpn_add_n)\n')\nifdef(`OPERATION_sub_n',`\n  define(ADDSUB,\tsub)\n  define(PRED,\t\tgtu)\n  define(INCR,\t\t-1)\n  define(LIM,\t\t0)\n  define(func, mpn_sub_n)\n')\n\nC Some useful aliases for registers we use\ndefine(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')\ndefine(`u4',`r18') define(`u5',`r19') define(`u6',`r20') define(`u7',`r21')\ndefine(`v0',`r24') define(`v1',`r25') define(`v2',`r26') define(`v3',`r27')\ndefine(`v4',`r28') define(`v5',`r29') define(`v6',`r30') define(`v7',`r31')\ndefine(`w0',`r22') define(`w1',`r9') define(`w2',`r8') define(`w3',`r23')\ndefine(`w4',`r22') define(`w5',`r9') define(`w6',`r8') define(`w7',`r23')\ndefine(`rpx',`r3')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',`\n\taddp4\t\trp = 0, rp\t\tC\t\t\tM I\n\taddp4\t\tup = 0, up\t\tC\t\t\tM I\n\taddp4\t\tvp = 0, vp\t\tC\t\t\tM I\n\tzxt4\t\tn = n\t\t\tC\t\t\tI\n\t;;\n')\n{.mmi\t\tC 00\n\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}\n{.mmi\n\tand\t\tr14 = 7, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p14 = 8, n\t\tC\t\t\tM I\n\tadd\t\tn = -8, n\t\tC\t\t\tM I\n\t;;\n}\n{.mmi\t\tC 01\n\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}\n{.bbb\n   (p6)\tbr.dptk\t\t.Lb001\t\t\tC\t\t\tB\n   (p7)\tbr.dptk\t\t.Lb010\t\t\tC\t\t\tB\n   (p8)\tbr.dptk\t\t.Lb011\t\t\tC\t\t\tB\n\t;;\n}\n{.mmi\t\tC 02\n\tcmp.eq\t\tp9, p0 = 4, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp10, p0 = 5, r14\tC\t\t\tM I\n\tcmp.eq\t\tp11, p0 = 6, r14\tC\t\t\tM I\n}\n{.bbb\n   (p9)\tbr.dptk\t\t.Lb100\t\t\tC\t\t\tB\n  (p10)\tbr.dptk\t\t.Lb101\t\t\tC\t\t\tB\n  (p11)\tbr.dptk\t\t.Lb110\t\t\tC\t\t\tB\n\t;;\n}\t\tC 03\n{.mmb\n\tcmp.eq\t\tp12, p0 = 7, r14\tC\t\t\tM I\n\tadd\t\tn = -1, n\t\tC loop count\t\tM I\n  (p12)\tbr.dptk\t\t.Lb111\t\t\tC\t\t\tB\n}\n\n\n.Lb000:\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 8, rp\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w1, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w2, u2\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w3, u3\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w2\tC\t\t\tM I\n   (p7)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n  (p14)\tbr.cond.dptk\t.Lcj8\t\t\tC\t\t\tB\n\t;;\n\n.grt8:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tnop.i\t\t0\n\tnop.b\t\t0\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tbr\t\t.LL000\t\t\tC\t\t\tB\n\n.Lb001:\tadd\t\trpx = 16, rp\t\tC\t\t\tM I\n\tADDSUB\t\tw0 = r10, r11\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt1\t\t\tC\t\t\tB\n\t;;\n\tcmp.PRED\tp6, p0 = w0, r10\tC\t\t\tM I\n\tmov\t\tr8 = 0\t\t\tC\t\t\tM I\n\tbr\t\t.Lcj1\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tcmp.ne\t\tp9, p0 = r0, r0\t\tC read near Loop\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w0, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w1, u1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lcj9\t\t\tC\t\t\tB\n\n.Lb010:\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 24, rp\t\tC\t\t\tM I\n\tADDSUB\t\tw7 = r10, r11\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt2\t\t\tC\t\t\tB\n\t;;\n\tcmp.PRED\tp9, p0 = w7, r10\tC\t\t\tM I\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w7, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tbr\t\t.LL01x\t\t\tC\t\t\tB\n\n.Lb011:\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw6 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n  (p15)\tbr.cond.dpnt\t.grt3\t\t\tC\t\t\tB\n\t;;\n\tcmp.PRED\tp8, p0 = w6, r10\tC\t\t\tM I\n\tADDSUB\t\tw7 = u7, v7\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w6, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp9, p0 = w7, u7\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 32, rp\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w6, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tADDSUB\t\tw7 = u7, v7\t\tC\t\t\tM I\n\tnop.i\t\t0\n\tnop.b\t\t0\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w7, u7\t\tC\t\t\tM I\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w7\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n   (p8)\tadd\t\tw7 = INCR, w7\t\tC\t\t\tM I\n\tst8\t\t[rp] = w6, 8\t\tC\t\t\tM23\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\tbr\t\t.LL01x\t\t\tC\t\t\tB\n\n.Lb100:\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 8, rp\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw5 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n  (p15)\tbr.cond.dpnt\t.grt4\t\t\tC\t\t\tB\n\t;;\n\tcmp.PRED\tp7, p0 = w5, r10\tC\t\t\tM I\n\tADDSUB\t\tw6 = u6, v6\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp8, p0 = w6, u6\t\tC\t\t\tM I\n\tADDSUB\t\tw7 = u7, v7\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n\tcmp.PRED\tp7, p0 = w5, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw6 = u6, v6\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w6, u6\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw7 = u7, v7\t\tC\t\t\tM I\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w7, u7\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w6\tC\t\t\tM I\n   (p7)\tadd\t\tw6 = INCR, w6\t\tC\t\t\tM I\n\tbr\t\t.LL100\t\t\tC\t\t\tB\n\n.Lb101:\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 16, rp\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw4 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w4, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw5 = u5, v5\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n  (p15)\tbr.cond.dpnt\t.grt5\t\t\tC\t\t\tB\n\t;;\n\tcmp.PRED\tp7, p0 = w5, u5\t\tC\t\t\tM I\n\tADDSUB\t\tw6 = u6, v6\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w5, u5\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw6 = u6, v6\t\tC\t\t\tM I\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tbr\t\t.LL101\t\t\tC\t\t\tB\n\n.Lb110:\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 24, rp\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w3, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw4 = u4, v4\t\tC\t\t\tM I\n  (p14)\tbr.cond.dptk\t.Lcj67\t\t\tC\t\t\tB\n\t;;\n\n.grt6:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tcmp.PRED\tp9, p0 = w3, r10\tC\t\t\tM I\n\tnop.i\t\t0\n\tnop.b\t\t0\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw4 = u4, v4\t\tC\t\t\tM I\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tbr\t\t.LL11x\t\t\tC\t\t\tB\n\n.Lb111:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 32, rp\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw2 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w2, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w3, u3\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n  (p15)\tbr.cond.dpnt\t.grt7\t\t\tC\t\t\tB\n\t;;\n\tst8\t\t[rp] = w2, 8\t\tC\t\t\tM23\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w3\tC\t\t\tM I\n   (p8)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tADDSUB\t\tw4 = u4, v4\t\tC\t\t\tM I\n\tbr\t\t.Lcj67\t\t\tC\t\t\tB\n\n.grt7:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w3\tC\t\t\tM I\n\tnop.i\t\t0\n\tnop.b\t\t0\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n   (p8)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tnop.b\t\t0\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tst8\t\t[rp] = w2, 8\t\tC\t\t\tM23\n\tADDSUB\t\tw4 = u4, v4\t\tC\t\t\tM I\n\tbr\t\t.LL11x\t\t\tC\t\t\tB\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w1, u1\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w0\tC\t\t\tM I\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n   (p9)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w2, u2\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w1\tC\t\t\tM I\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n   (p6)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w0, 8\t\tC\t\t\tM23\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w3, u3\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w2\tC\t\t\tM I\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n   (p7)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n.LL000:\tst8\t\t[rp] = w1, 16\t\tC\t\t\tM23\n\tst8\t\t[rpx] = w2, 32\t\tC\t\t\tM23\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w3\tC\t\t\tM I\n\tlfetch\t\t[r10], 64\n   (p8)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tADDSUB\t\tw4 = u4, v4\t\tC\t\t\tM I\n\t;;\n.LL11x:\tst8\t\t[rp] = w3, 8\t\tC\t\t\tM23\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w4, u4\t\tC\t\t\tM I\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw5 = u5, v5\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w5, u5\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w4\tC\t\t\tM I\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n   (p9)\tadd\t\tw4 = INCR, w4\t\tC\t\t\tM I\n\tADDSUB\t\tw6 = u6, v6\t\tC\t\t\tM I\n\t;;\n.LL101:\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w6, u6\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w5\tC\t\t\tM I\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n   (p6)\tadd\t\tw5 = INCR, w5\t\tC\t\t\tM I\n\tADDSUB\t\tw7 = u7, v7\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w4, 8\t\tC\t\t\tM23\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w7, u7\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w6\tC\t\t\tM I\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n   (p7)\tadd\t\tw6 = INCR, w6\t\tC\t\t\tM I\n\t;;\n.LL100:\tst8\t\t[rp] = w5, 16\t\tC\t\t\tM23\n\tst8\t\t[rpx] = w6, 32\t\tC\t\t\tM23\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w7\tC\t\t\tM I\n\tlfetch\t\t[r11], 64\n   (p8)\tadd\t\tw7 = INCR, w7\t\tC\t\t\tM I\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\t;;\n.LL01x:\tst8\t\t[rp] = w7, 8\t\tC\t\t\tM23\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w0, u0\t\tC\t\t\tM I\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\t;;\nC *** MAIN LOOP END ***\n\n\tcmp.PRED\tp7, p0 = w1, u1\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w0\tC\t\t\tM I\n   (p9)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\t;;\n.Lcj9:\tcmp.PRED\tp8, p0 = w2, u2\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w1\tC\t\t\tM I\n\tst8\t\t[rp] = w0, 8\t\tC\t\t\tM23\n   (p6)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp9, p0 = w3, u3\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w2\tC\t\t\tM I\n   (p7)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n.Lcj8:\tst8\t\t[rp] = w1, 16\t\tC\t\t\tM23\n\tst8\t\t[rpx] = w2, 32\t\tC\t\t\tM23\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w3\tC\t\t\tM I\n   (p8)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tADDSUB\t\tw4 = u4, v4\t\tC\t\t\tM I\n\t;;\n.Lcj67:\tst8\t\t[rp] = w3, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp6, p0 = w4, u4\t\tC\t\t\tM I\n\tADDSUB\t\tw5 = u5, v5\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp7, p0 = w5, u5\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w4\tC\t\t\tM I\n   (p9)\tadd\t\tw4 = INCR, w4\t\tC\t\t\tM I\n\tADDSUB\t\tw6 = u6, v6\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tcmp.PRED\tp8, p0 = w6, u6\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w5\tC\t\t\tM I\n\tst8\t\t[rp] = w4, 8\t\tC\t\t\tM23\n   (p6)\tadd\t\tw5 = INCR, w5\t\tC\t\t\tM I\n\tADDSUB\t\tw7 = u7, v7\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tcmp.PRED\tp9, p0 = w7, u7\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w6\tC\t\t\tM I\n   (p7)\tadd\t\tw6 = INCR, w6\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w5, 16\t\tC\t\t\tM23\n\tst8\t\t[rpx] = w6, 32\t\tC\t\t\tM23\n.Lcj3:\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w7\tC\t\t\tM I\n   (p8)\tadd\t\tw7 = INCR, w7\t\tC\t\t\tM I\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\t;;\n.Lcj2:\tst8\t\t[rp] = w7, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp6, p0 = w0, u0\t\tC\t\t\tM I\n\t;;\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w0\tC\t\t\tM I\n   (p9)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tmov\t\tr8 = 0\t\t\tC\t\t\tM I\n\t;;\n.Lcj1:\tst8\t\t[rp] = w0, 8\t\tC\t\t\tM23\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n   (p6)\tmov\t\tr8 = 1\t\t\tC\t\t\tM I\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/addlsh1_n.asm",
    "content": "dnl  IA-64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      3.0\nC Itanium 2:    1.5\n\nC TODO\nC  * Use shladd in feed-in code (for mpn_addlsh1_n).\n\nC INPUT PARAMETERS\ndefine(`rp',`r32')\ndefine(`up',`r33')\ndefine(`vp',`r34')\ndefine(`n',`r35')\n\ndefine(`OPERATION_addlsh1_n',1)\n\nifdef(`OPERATION_addlsh1_n',`\n  define(ADDSUB,       add)\n  define(PRED,\t       ltu)\n  define(INCR,\t       1)\n  define(LIM,\t       -1)\n  define(func, mpn_addlsh1_n)\n')\nifdef(`OPERATION_sublsh1_n',`\n  define(ADDSUB,       sub)\n  define(PRED,\t       gtu)\n  define(INCR,\t       -1)\n  define(LIM,\t       0)\n  define(func, mpn_sublsh1_n)\n')\n\nC Some useful aliases for registers we use\ndefine(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')\ndefine(`v0',`r18') define(`v1',`r19') define(`v2',`r20') define(`v3',`r21')\ndefine(`w0',`r22') define(`w1',`r23') define(`w2',`r24') define(`w3',`r25')\ndefine(`s0',`r26') define(`s1',`r27') define(`s2',`r28') define(`s3',`r29')\ndefine(`x0',`r30') define(`x1',`r31') define(`x2',`r30') define(`x3',`r31')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',`\n\taddp4\t\trp = 0, rp\t\tC\t\t\tM I\n\taddp4\t\tup = 0, up\t\tC\t\t\tM I\n\taddp4\t\tvp = 0, vp\t\tC\t\t\tM I\n\tzxt4\t\tn = n\t\t\tC\t\t\tI\n\t;;\n')\n {.mmi;\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}{.mmi;\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p0 = 4, n\t\tC\t\t\tM I\n\tadd\t\tn = -4, n\t\tC\t\t\tM I\n\t;;\n}{.mmi;\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}{.bbb\n  (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n  (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n  (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n.Lb00:\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tadd\t\tx3 = r11, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = r10, x3\t\tC\t\t\tM I\n  (p15)\tbr.dpnt\t\t.grt4\t\t\tC\t\t\tB\n\t;;\n\tshrp\t\tx0 = v0, r11, 63\tC\t\t\tI0\n\tcmp.PRED\tp8, p0 = w3, r10\tC\t\t\tM I\n\t;;\n\tshrp\t\tx1 = v1, v0, 63\t\tC\t\t\tI0\n\tADDSUB\t\tw0 = u0, x0\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w0, u0\t\tC\t\t\tM I\n\tADDSUB\t\tw1 = u1, x1\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tshrp\t\tx0 = v0, r11, 63\tC\t\t\tI0\n\tcmp.PRED\tp8, p0 = w3, r10\tC\t\t\tM I\n\tadd\t\tn = -1, n\n\t;;\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tshrp\t\tx1 = v1, v0, 63\t\tC\t\t\tI0\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw0 = u0, x0\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w0, u0\t\tC\t\t\tM I\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, x1\t\tC\t\t\tM I\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n.Lb01:\tadd\t\tx2 = r11, r11\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n  (p15)\tbr.dpnt\t\t.grt1\t\t\tC\t\t\tB\n\t;;\n\tADDSUB\t\tw2 = r10, x2\t\tC\t\t\tM I\n\tshr.u\t\tr8 = r11, 63\t\tC retval\t\tI0\n\t;;\n\tcmp.PRED\tp6, p0 = w2, r10\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w2, 8\t\tC\t\t\tM23\n   (p6)\tadd\t\tr8 = 1, r8\t\tC\t\t\tM I\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC FIXME swap with next\tI0\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw2 = r10, x2\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshrp\t\tx3 = v3, r11, 63\tC\t\t\tI0\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w2, r10\tC\t\t\tM I\n\tADDSUB\t\tw3 = u3, x3\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.grt5\t\t\tC\t\t\tB\n\t;;\n\tshrp\t\tx0 = v0, v3, 63\t\tC\t\t\tI0\n\tcmp.PRED\tp8, p0 = w3, u3\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tshrp\t\tx0 = v0, v3, 63\t\tC\t\t\tI0\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w3, u3\t\tC\t\t\tM I\n\tbr\t\t.LL01\t\t\tC\t\t\tB\n\n.Lb10:\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\tadd\t\tx1 = r11, r11\t\tC\t\t\tM I\n  (p15)\tbr.dpnt\t\t.grt2\t\t\tC\t\t\tB\n\t;;\n\tADDSUB\t\tw1 = r10, x1\t\tC\t\t\tM I\n\tshrp\t\tx2 = v2, r11, 63\tC\t\t\tI0\n\t;;\n\tcmp.PRED\tp8, p0 = w1, r10\tC\t\t\tM I\n\tADDSUB\t\tw2 = u2, x2\t\tC\t\t\tM I\n\tshr.u\t\tr8 = v2, 63\t\tC retval\t\tI0\n\t;;\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = r10, x1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tshrp\t\tx2 = v2, r11, 63\tC\t\t\tI0\n\tcmp.PRED\tp8, p0 = w1, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshrp\t\tx3 = v3, v2, 63\t\tC\t\t\tI0\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw2 = u2, x2\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, x3\t\tC\t\t\tM I\n\tbr.cloop.dpnt\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lskip\t\t\tC\t\t\tB\n\n.Lb11:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\tadd\t\tx0 = r11, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n  (p15)\tbr.dpnt\t\t.grt3\t\t\tC\t\t\tB\n\t;;\n\n\tshrp\t\tx1 = v1, r11, 63\tC\t\t\tI0\n\tADDSUB\t\tw0 = r10, x0\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w0, r10\tC\t\t\tM I\n\tADDSUB\t\tw1 = u1, x1\t\tC\t\t\tM I\n\t;;\n\tshrp\t\tx2 = v2, v1, 63\t\tC\t\t\tI0\n\tcmp.PRED\tp8, p0 = w1, u1\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tshrp\t\tx1 = v1, r11, 63\tC\t\t\tI0\n\tADDSUB\t\tw0 = r10, x0\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w0, r10\tC\t\t\tM I\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, x1\t\tC\t\t\tM I\n\t;;\n\tshrp\t\tx2 = v2, v1, 63\t\tC\t\t\tI0\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w1, u1\t\tC\t\t\tM I\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tst8\t\t[rp] = w1, 8\t\tC\t\t\tM23\n\tshrp\t\tx0 = v0, v3, 63\t\tC\t\t\tI0\n   (p8)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p8)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w3, u3\t\tC\t\t\tM I\n\t;;\n.LL01:\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tshrp\t\tx1 = v1, v0, 63\t\tC\t\t\tI0\n   (p6)\tcmp.eq.or\tp8, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw0 = u0, x0\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w2, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp6, p0 = w0, u0\t\tC\t\t\tM I\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, x1\t\tC\t\t\tM I\n\t;;\n.LL00:\tst8\t\t[rp] = w3, 8\t\tC\t\t\tM23\n\tshrp\t\tx2 = v2, v1, 63\t\tC\t\t\tI0\n   (p8)\tcmp.eq.or\tp6, p0 = LIM, w0\tC\t\t\tM I\n   (p8)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w1, u1\t\tC\t\t\tM I\n\t;;\n.LL11:\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshrp\t\tx3 = v3, v2, 63\t\tC\t\t\tI0\n   (p6)\tcmp.eq.or\tp8, p0 = LIM, w1\tC\t\t\tM I\n   (p6)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw2 = u2, x2\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w0, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, x3\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\t;;\nC *** MAIN LOOP END ***\n\n.Lskip:\tst8\t\t[rp] = w1, 8\t\tC\t\t\tM23\n\tshrp\t\tx0 = v0, v3, 63\t\tC\t\t\tI0\n   (p8)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p8)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\tcmp.PRED\tp8, p0 = w3, u3\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tshrp\t\tx1 = v1, v0, 63\t\tC\t\t\tI0\n   (p6)\tcmp.eq.or\tp8, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tADDSUB\t\tw0 = u0, x0\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w2, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp6, p0 = w0, u0\t\tC\t\t\tM I\n\tADDSUB\t\tw1 = u1, x1\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tst8\t\t[rp] = w3, 8\t\tC\t\t\tM23\n\tshrp\t\tx2 = v2, v1, 63\t\tC\t\t\tI0\n   (p8)\tcmp.eq.or\tp6, p0 = LIM, w0\tC\t\t\tM I\n   (p8)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tcmp.PRED\tp8, p0 = w1, u1\t\tC\t\t\tM I\n\t;;\n.Lcj3:\tshr.u\t\tr8 = v2, 63\t\tC\t\t\tI0\n   (p6)\tcmp.eq.or\tp8, p0 = LIM, w1\tC\t\t\tM I\n   (p6)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\tADDSUB\t\tw2 = u2, x2\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w0, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\t;;\n.Lcj2:\tst8\t\t[rp] = w1, 8\t\tC\t\t\tM23\n   (p8)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p8)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n.Lcj1:\tst8\t\t[rp] = w2, 8\t\tC\t\t\tM23\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n   (p6)\tadd\t\tr8 = 1, r8\t\tC\t\t\tM I\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/addmul_1.asm",
    "content": "dnl  IA-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add the\ndnl  result to a second limb vector.\n\ndnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2007 Free Software\ndnl  Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC         cycles/limb\nC Itanium:    3.0\nC Itanium 2:  2.0\n\nC TODO\nC  * Further optimize feed-in and wind-down code, both for speed and code size.\nC  * Handle low limb input and results specially, using a common stf8 in the\nC    epilogue.\nC  * Use 1 c/l carry propagation scheme in wind-down code.\nC  * Use extra pointer registers for `up' and rp to speed up feed-in loads.\nC  * Work out final differences with mul_1.asm.  That function is 300 bytes\nC    smaller than this due to better loop scheduling and thus simpler feed-in\nC    code.\n\nC INPUT PARAMETERS\ndefine(`rp', `r32')\ndefine(`up', `r33')\ndefine(`n', `r34')\ndefine(`vl', `r35')\n\nASM_START()\nPROLOGUE(mpn_addmul_1)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\n\nifdef(`HAVE_ABI_32',\n`\taddp4\t\trp = 0, rp\t\tC M I\n\taddp4\t\tup = 0, up\t\tC M I\n\tzxt4\t\tn = n\t\t\tC I\n\t;;\n')\n{.mmi\n\tadds\t\tr15 = -1, n\t\tC M I\n\tmov\t\tr20 = rp\t\tC M I\n\tmov.i\t\tr2 = ar.lc\t\tC I0\n}\n{.mmi\n\tldf8\t\tf7 = [up], 8\t\tC M\n\tldf8\t\tf8 = [rp], 8\t\tC M\n\tand\t\tr14 = 3, n\t\tC M I\n\t;;\n}\n{.mmi\n\tsetf.sig\tf6 = vl\t\t\tC M2 M3\n\tcmp.eq\t\tp10, p0 = 0, r14\tC M I\n\tshr.u\t\tr31 = r15, 2\t\tC I0\n}\n{.mmi\n\tcmp.eq\t\tp11, p0 = 2, r14\tC M I\n\tcmp.eq\t\tp12, p0 = 3, r14\tC M I\n\tnop.i\t\t0\t\t\tC I\n\t;;\n}\n{.mii\n\tcmp.ne\t\tp6, p7 = r0, r0\t\tC M I\n\tmov.i\t\tar.lc = r31\t\tC I0\n\tcmp.ne\t\tp8, p9 = r0, r0\t\tC M I\n}\n{.bbb\n  (p10)\tbr.dptk\t\t.Lb00\t\t\tC B\n  (p11)\tbr.dptk\t\t.Lb10\t\t\tC B\n  (p12)\tbr.dptk\t\t.Lb11\t\t\tC B\n\t;;\n}\n\n.Lb01:\tbr.cloop.dptk\t.grt1\t\t\tC B\n\n\txma.l\t\tf39 = f7, f6, f8\tC F\n\txma.hu\t\tf43 = f7, f6, f8\tC F\n\t;;\n\tgetf.sig\tr8 = f43\t\tC M2\n\tstf8\t\t[r20] = f39\t\tC M2 M3\n\tmov.i\t\tar.lc = r2\t\tC I0\n\tbr.ret.sptk.many b0\t\t\tC B\n\n.grt1:\n\tldf8\t\tf32 = [up], 8\n\tldf8\t\tf44 = [rp], 8\n\t;;\n\tldf8\t\tf33 = [up], 8\n\tldf8\t\tf45 = [rp], 8\n\t;;\n\tldf8\t\tf34 = [up], 8\n\txma.l\t\tf39 = f7, f6, f8\n\tldf8\t\tf46 = [rp], 8\n\txma.hu\t\tf43 = f7, f6, f8\n\t;;\n\tldf8\t\tf35 = [up], 8\n\tldf8\t\tf47 = [rp], 8\n\tbr.cloop.dptk\t.grt5\n\n\txma.l\t\tf36 = f32, f6, f44\n\txma.hu\t\tf40 = f32, f6, f44\n\t;;\n\tstf8\t\t[r20] = f39, 8\n\txma.l\t\tf37 = f33, f6, f45\n\txma.hu\t\tf41 = f33, f6, f45\n\t;;\n\tgetf.sig\tr31 = f43\n\tgetf.sig\tr24 = f36\n\txma.l\t\tf38 = f34, f6, f46\n\txma.hu\t\tf42 = f34, f6, f46\n\t;;\n\tgetf.sig\tr28 = f40\n\tgetf.sig\tr25 = f37\n\txma.l\t\tf39 = f35, f6, f47\n\txma.hu\t\tf43 = f35, f6, f47\n\t;;\n\tgetf.sig\tr29 = f41\n\tgetf.sig\tr26 = f38\n\tbr\t\t.Lcj5\n\n.grt5:\n\tmov\t\tr30 = 0\n\txma.l\t\tf36 = f32, f6, f44\n\txma.hu\t\tf40 = f32, f6, f44\n\t;;\n\tldf8\t\tf32 = [up], 8\n\txma.l\t\tf37 = f33, f6, f45\n\tldf8\t\tf44 = [rp], 8\n\txma.hu\t\tf41 = f33, f6, f45\n\t;;\n\tldf8\t\tf33 = [up], 8\n\tgetf.sig\tr27 = f39\n\t;;\n\tgetf.sig\tr31 = f43\n\txma.l\t\tf38 = f34, f6, f46\n\tldf8\t\tf45 = [rp], 8\n\txma.hu\t\tf42 = f34, f6, f46\n\t;;\n\tldf8\t\tf34 = [up], 8\n\tgetf.sig\tr24 = f36\n\t;;\n\tgetf.sig\tr28 = f40\n\txma.l\t\tf39 = f35, f6, f47\n\tldf8\t\tf46 = [rp], 8\n\txma.hu\t\tf43 = f35, f6, f47\n\t;;\n\tldf8\t\tf35 = [up], 8\n\tgetf.sig\tr25 = f37\n\tbr.cloop.dptk\t.Loop\n\tbr\t\t.Le0\n\n\n.Lb10:\tldf8\t\tf35 = [up], 8\n\tldf8\t\tf47 = [rp], 8\n\tbr.cloop.dptk\t.grt2\n\n\txma.l\t\tf38 = f7, f6, f8\n\txma.hu\t\tf42 = f7, f6, f8\n\t;;\n\txma.l\t\tf39 = f35, f6, f47\n\txma.hu\t\tf43 = f35, f6, f47\n\t;;\n\tgetf.sig\tr30 = f42\n\tstf8\t\t[r20] = f38, 8\n\tgetf.sig\tr27 = f39\n\tgetf.sig\tr8 = f43\n\tbr\t\t.Lcj2\n\n.grt2:\n\tldf8\t\tf32 = [up], 8\n\tldf8\t\tf44 = [rp], 8\n\t;;\n\tldf8\t\tf33 = [up], 8\n\txma.l\t\tf38 = f7, f6, f8\n\tldf8\t\tf45 = [rp], 8\n\txma.hu\t\tf42 = f7, f6, f8\n\t;;\n\tldf8\t\tf34 = [up], 8\n\txma.l\t\tf39 = f35, f6, f47\n\tldf8\t\tf46 = [rp], 8\n\txma.hu\t\tf43 = f35, f6, f47\n\t;;\n\tldf8\t\tf35 = [up], 8\n\tldf8\t\tf47 = [rp], 8\n\tbr.cloop.dptk\t.grt6\n\n\tstf8\t\t[r20] = f38, 8\n\txma.l\t\tf36 = f32, f6, f44\n\txma.hu\t\tf40 = f32, f6, f44\n\t;;\n\tgetf.sig\tr30 = f42\n\tgetf.sig\tr27 = f39\n\txma.l\t\tf37 = f33, f6, f45\n\txma.hu\t\tf41 = f33, f6, f45\n\t;;\n\tgetf.sig\tr31 = f43\n\tgetf.sig\tr24 = f36\n\txma.l\t\tf38 = f34, f6, f46\n\txma.hu\t\tf42 = f34, f6, f46\n\t;;\n\tgetf.sig\tr28 = f40\n\tgetf.sig\tr25 = f37\n\txma.l\t\tf39 = f35, f6, f47\n\txma.hu\t\tf43 = f35, f6, f47\n\tbr\t\t.Lcj6\n\n.grt6:\n\tmov\t\tr29 = 0\n\txma.l\t\tf36 = f32, f6, f44\n\txma.hu\t\tf40 = f32, f6, f44\n\t;;\n\tldf8\t\tf32 = [up], 8\n\tgetf.sig\tr26 = f38\n\t;;\n\tgetf.sig\tr30 = f42\n\txma.l\t\tf37 = f33, f6, f45\n\tldf8\t\tf44 = [rp], 8\n\txma.hu\t\tf41 = f33, f6, f45\n\t;;\n\tldf8\t\tf33 = [up], 8\n\tgetf.sig\tr27 = f39\n\t;;\n\tgetf.sig\tr31 = f43\n\txma.l\t\tf38 = f34, f6, f46\n\tldf8\t\tf45 = [rp], 8\n\txma.hu\t\tf42 = f34, f6, f46\n\t;;\n\tldf8\t\tf34 = [up], 8\n\tgetf.sig\tr24 = f36\n\tbr\t\t.LL10\n\n\n.Lb11:\tldf8\t\tf34 = [up], 8\n\tldf8\t\tf46 = [rp], 8\n\t;;\n\tldf8\t\tf35 = [up], 8\n\tldf8\t\tf47 = [rp], 8\n\tbr.cloop.dptk\t.grt3\n\t;;\n\n\txma.l\t\tf37 = f7, f6, f8\n\txma.hu\t\tf41 = f7, f6, f8\n\txma.l\t\tf38 = f34, f6, f46\n\txma.hu\t\tf42 = f34, f6, f46\n\txma.l\t\tf39 = f35, f6, f47\n\txma.hu\t\tf43 = f35, f6, f47\n\t;;\n\tgetf.sig\tr29 = f41\n\tstf8\t\t[r20] = f37, 8\n\tgetf.sig\tr26 = f38\n\tgetf.sig\tr30 = f42\n\tgetf.sig\tr27 = f39\n\tgetf.sig\tr8 = f43\n\tbr\t\t.Lcj3\n\n.grt3:\n\tldf8\t\tf32 = [up], 8\n\txma.l\t\tf37 = f7, f6, f8\n\tldf8\t\tf44 = [rp], 8\n\txma.hu\t\tf41 = f7, f6, f8\n\t;;\n\tldf8\t\tf33 = [up], 8\n\txma.l\t\tf38 = f34, f6, f46\n\tldf8\t\tf45 = [rp], 8\n\txma.hu\t\tf42 = f34, f6, f46\n\t;;\n\tldf8\t\tf34 = [up], 8\n\txma.l\t\tf39 = f35, f6, f47\n\tldf8\t\tf46 = [rp], 8\n\txma.hu\t\tf43 = f35, f6, f47\n\t;;\n\tldf8\t\tf35 = [up], 8\n\tgetf.sig\tr25 = f37\t\tC FIXME\n\tldf8\t\tf47 = [rp], 8\n\tbr.cloop.dptk\t.grt7\n\n\tgetf.sig\tr29 = f41\n\tstf8\t\t[r20] = f37, 8\t\tC FIXME\n\txma.l\t\tf36 = f32, f6, f44\n\tgetf.sig\tr26 = f38\n\txma.hu\t\tf40 = f32, f6, f44\n\t;;\n\tgetf.sig\tr30 = f42\n\txma.l\t\tf37 = f33, f6, f45\n\tgetf.sig\tr27 = f39\n\txma.hu\t\tf41 = f33, f6, f45\n\t;;\n\tgetf.sig\tr31 = f43\n\txma.l\t\tf38 = f34, f6, f46\n\tgetf.sig\tr24 = f36\n\txma.hu\t\tf42 = f34, f6, f46\n\tbr\t\t.Lcj7\n\n.grt7:\n\tgetf.sig\tr29 = f41\n\txma.l\t\tf36 = f32, f6, f44\n\tmov\t\tr28 = 0\n\txma.hu\t\tf40 = f32, f6, f44\n\t;;\n\tldf8\t\tf32 = [up], 8\n\tgetf.sig\tr26 = f38\n\t;;\n\tgetf.sig\tr30 = f42\n\txma.l\t\tf37 = f33, f6, f45\n\tldf8\t\tf44 = [rp], 8\n\txma.hu\t\tf41 = f33, f6, f45\n\t;;\n\tldf8\t\tf33 = [up], 8\n\tgetf.sig\tr27 = f39\n\tbr\t\t.LL11\n\n\n.Lb00:\tldf8\t\tf33 = [up], 8\n\tldf8\t\tf45 = [rp], 8\n\t;;\n\tldf8\t\tf34 = [up], 8\n\tldf8\t\tf46 = [rp], 8\n\t;;\n\tldf8\t\tf35 = [up], 8\n\txma.l\t\tf36 = f7, f6, f8\n\tldf8\t\tf47 = [rp], 8\n\txma.hu\t\tf40 = f7, f6, f8\n\tbr.cloop.dptk\t.grt4\n\n\txma.l\t\tf37 = f33, f6, f45\n\txma.hu\t\tf41 = f33, f6, f45\n\txma.l\t\tf38 = f34, f6, f46\n\txma.hu\t\tf42 = f34, f6, f46\n\t;;\n\tgetf.sig\tr28 = f40\n\tstf8\t\t[r20] = f36, 8\n\txma.l\t\tf39 = f35, f6, f47\n\tgetf.sig\tr25 = f37\n\txma.hu\t\tf43 = f35, f6, f47\n\t;;\n\tgetf.sig\tr29 = f41\n\tgetf.sig\tr26 = f38\n\tgetf.sig\tr30 = f42\n\tgetf.sig\tr27 = f39\n\tbr\t\t.Lcj4\n\n.grt4:\n\tldf8\t\tf32 = [up], 8\n\txma.l\t\tf37 = f33, f6, f45\n\tldf8\t\tf44 = [rp], 8\n\txma.hu\t\tf41 = f33, f6, f45\n\t;;\n\tldf8\t\tf33 = [up], 8\n\txma.l\t\tf38 = f34, f6, f46\n\tldf8\t\tf45 = [rp], 8\n\txma.hu\t\tf42 = f34, f6, f46\n\t;;\n\tldf8\t\tf34 = [up], 8\n\tgetf.sig\tr24 = f36\t\tC FIXME\n\txma.l\t\tf39 = f35, f6, f47\n\tldf8\t\tf46 = [rp], 8\n\tgetf.sig\tr28 = f40\n\txma.hu\t\tf43 = f35, f6, f47\n\t;;\n\tldf8\t\tf35 = [up], 8\n\tgetf.sig\tr25 = f37\n\tldf8\t\tf47 = [rp], 8\n\tbr.cloop.dptk\t.grt8\n\n\tgetf.sig\tr29 = f41\n\tstf8\t\t[r20] = f36, 8\t\tC FIXME\n\txma.l\t\tf36 = f32, f6, f44\n\tgetf.sig\tr26 = f38\n\tgetf.sig\tr30 = f42\n\txma.hu\t\tf40 = f32, f6, f44\n\t;;\n\txma.l\t\tf37 = f33, f6, f45\n\tgetf.sig\tr27 = f39\n\txma.hu\t\tf41 = f33, f6, f45\n\tbr\t\t.Lcj8\n\n.grt8:\n\tgetf.sig\tr29 = f41\n\txma.l\t\tf36 = f32, f6, f44\n\tmov\t\tr31 = 0\n\txma.hu\t\tf40 = f32, f6, f44\n\t;;\n\tldf8\t\tf32 = [up], 8\n\tgetf.sig\tr26 = f38\n\tbr\t\t.LL00\n\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\t\t\t\tC insn\tfed\tcycle #\n.Loop:\n\t.pred.rel \"mutex\", p6, p7\t\tC num\tby\ti1 i2\n\tgetf.sig\tr29 = f41\t\tC 00\t16\t0   0\n\txma.l\t\tf36 = f32, f6, f44\tC 01\t06,15\t0   0\n   (p6)\tadd\t\tr14 = r30, r27, 1\tC 02\t\t0   0\n\tldf8\t\tf47 = [rp], 8\t\tC 03\t\t0   0\n\txma.hu\t\tf40 = f32, f6, f44\tC 04\t06,15\t0   0\n   (p7)\tadd\t\tr14 = r30, r27\t\tC 05\t\t0   0\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\tldf8\t\tf32 = [up], 8\t\tC 06\t\t1   1\n   (p6)\tcmp.leu\t\tp8, p9 = r14, r27\tC 07\t\t1   1\n   (p7)\tcmp.ltu\t\tp8, p9 = r14, r27\tC 08\t\t1   1\n\tgetf.sig\tr26 = f38\t\tC 09\t25\t2   1\n\tst8\t\t[r20] = r14, 8\t\tC 10\t\t2   1\n\tnop.b\t\t0\t\t\tC 11\t\t2   1\n\t;;\n.LL00:\n\t.pred.rel \"mutex\", p8, p9\n\tgetf.sig\tr30 = f42\t\tC 12\t28\t3   2\n\txma.l\t\tf37 = f33, f6, f45\tC 13\t18,27\t3   2\n   (p8)\tadd\t\tr16 = r31, r24, 1\tC 14\t\t3   2\n\tldf8\t\tf44 = [rp], 8\t\tC 15\t\t3   2\n\txma.hu\t\tf41 = f33, f6, f45\tC 16\t18,27\t3   2\n   (p9)\tadd\t\tr16 = r31, r24\t\tC 17\t\t3   2\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\tldf8\t\tf33 = [up], 8\t\tC 18\t\t4   3\n   (p8)\tcmp.leu\t\tp6, p7 = r16, r24\tC 19\t\t4   3\n   (p9)\tcmp.ltu\t\tp6, p7 = r16, r24\tC 20\t\t4   3\n\tgetf.sig\tr27 = f39\t\tC 21\t37\t5   3\n\tst8\t\t[r20] = r16, 8\t\tC 22\t\t5   3\n\tnop.b\t\t0\t\t\tC 23\t\t5   3\n\t;;\n.LL11:\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tr31 = f43\t\tC 24\t40\t6   4\n\txma.l\t\tf38 = f34, f6, f46\tC 25\t30,39\t6   4\n   (p6)\tadd\t\tr14 = r28, r25, 1\tC 26\t\t6   4\n\tldf8\t\tf45 = [rp], 8\t\tC 27\t\t6   4\n\txma.hu\t\tf42 = f34, f6, f46\tC 28\t30,39\t6   4\n   (p7)\tadd\t\tr14 = r28, r25\t\tC 29\t\t6   4\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\tldf8\t\tf34 = [up], 8\t\tC 30\t\t7   5\n   (p6)\tcmp.leu\t\tp8, p9 = r14, r25\tC 31\t\t7   5\n   (p7)\tcmp.ltu\t\tp8, p9 = r14, r25\tC 32\t\t7   5\n\tgetf.sig\tr24 = f36\t\tC 33\t01\t8   5\n\tst8\t\t[r20] = r14, 8\t\tC 34\t\t8   5\n\tnop.b\t\t0\t\t\tC 35\t\t8   5\n\t;;\n.LL10:\n\t.pred.rel \"mutex\", p8, p9\n\tgetf.sig\tr28 = f40\t\tC 36\t04\t9   6\n\txma.l\t\tf39 = f35, f6, f47\tC 37\t42,03\t9   6\n   (p8)\tadd\t\tr16 = r29, r26, 1\tC 38\t\t9   6\n\tldf8\t\tf46 = [rp], 8\t\tC 39\t\t9   6\n\txma.hu\t\tf43 = f35, f6, f47\tC 40\t42,03\t9   6\n   (p9)\tadd\t\tr16 = r29, r26\t\tC 41\t\t9   6\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\tldf8\t\tf35 = [up], 8\t\tC 42\t       10   7\n   (p8)\tcmp.leu\t\tp6, p7 = r16, r26\tC 43\t       10   7\n   (p9)\tcmp.ltu\t\tp6, p7 = r16, r26\tC 44\t       10   7\n\tgetf.sig\tr25 = f37\t\tC 45\t13     11   7\n\tst8\t\t[r20] = r16, 8\t\tC 46\t       11   7\n\tbr.cloop.dptk\t.Loop\t\t\tC 47\t       11   7\nC *** MAIN LOOP END ***\n\t;;\n.Le0:\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tr29 = f41\t\tC\n\txma.l\t\tf36 = f32, f6, f44\tC\n   (p6)\tadd\t\tr14 = r30, r27, 1\tC\n\tldf8\t\tf47 = [rp], 8\t\tC\n\txma.hu\t\tf40 = f32, f6, f44\tC\n   (p7)\tadd\t\tr14 = r30, r27\t\tC\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n   (p6)\tcmp.leu\t\tp8, p9 = r14, r27\tC\n   (p7)\tcmp.ltu\t\tp8, p9 = r14, r27\tC\n\tgetf.sig\tr26 = f38\t\tC\n\tst8\t\t[r20] = r14, 8\t\tC\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\tgetf.sig\tr30 = f42\t\tC\n\txma.l\t\tf37 = f33, f6, f45\tC\n   (p8)\tadd\t\tr16 = r31, r24, 1\tC\n\txma.hu\t\tf41 = f33, f6, f45\tC\n   (p9)\tadd\t\tr16 = r31, r24\t\tC\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n   (p8)\tcmp.leu\t\tp6, p7 = r16, r24\tC\n   (p9)\tcmp.ltu\t\tp6, p7 = r16, r24\tC\n\tgetf.sig\tr27 = f39\t\tC\n\tst8\t\t[r20] = r16, 8\t\tC\n\t;;\n.Lcj8:\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tr31 = f43\t\tC\n\txma.l\t\tf38 = f34, f6, f46\tC\n   (p6)\tadd\t\tr14 = r28, r25, 1\tC\n\txma.hu\t\tf42 = f34, f6, f46\tC\n   (p7)\tadd\t\tr14 = r28, r25\t\tC\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n   (p6)\tcmp.leu\t\tp8, p9 = r14, r25\tC\n   (p7)\tcmp.ltu\t\tp8, p9 = r14, r25\tC\n\tgetf.sig\tr24 = f36\t\tC\n\tst8\t\t[r20] = r14, 8\t\tC\n\t;;\n.Lcj7:\n\t.pred.rel \"mutex\", p8, p9\n\tgetf.sig\tr28 = f40\t\tC\n\txma.l\t\tf39 = f35, f6, f47\tC\n   (p8)\tadd\t\tr16 = r29, r26, 1\tC\n\txma.hu\t\tf43 = f35, f6, f47\tC\n   (p9)\tadd\t\tr16 = r29, r26\t\tC\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n   (p8)\tcmp.leu\t\tp6, p7 = r16, r26\tC\n   (p9)\tcmp.ltu\t\tp6, p7 = r16, r26\tC\n\tgetf.sig\tr25 = f37\t\tC\n\tst8\t\t[r20] = r16, 8\t\tC\n\t;;\n.Lcj6:\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tr29 = f41\t\tC\n   (p6)\tadd\t\tr14 = r30, r27, 1\tC\n   (p7)\tadd\t\tr14 = r30, r27\t\tC\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n   (p6)\tcmp.leu\t\tp8, p9 = r14, r27\tC\n   (p7)\tcmp.ltu\t\tp8, p9 = r14, r27\tC\n\tgetf.sig\tr26 = f38\t\tC\n\tst8\t\t[r20] = r14, 8\t\tC\n\t;;\n.Lcj5:\n\t.pred.rel \"mutex\", p8, p9\n\tgetf.sig\tr30 = f42\t\tC\n   (p8)\tadd\t\tr16 = r31, r24, 1\tC\n   (p9)\tadd\t\tr16 = r31, r24\t\tC\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n   (p8)\tcmp.leu\t\tp6, p7 = r16, r24\tC\n   (p9)\tcmp.ltu\t\tp6, p7 = r16, r24\tC\n\tgetf.sig\tr27 = f39\t\tC\n\tst8\t\t[r20] = r16, 8\t\tC\n\t;;\n.Lcj4:\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tr8 = f43\t\tC\n   (p6)\tadd\t\tr14 = r28, r25, 1\tC\n   (p7)\tadd\t\tr14 = r28, r25\t\tC\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\tst8\t\t[r20] = r14, 8\t\tC\n   (p6)\tcmp.leu\t\tp8, p9 = r14, r25\tC\n   (p7)\tcmp.ltu\t\tp8, p9 = r14, r25\tC\n\t;;\n.Lcj3:\n\t.pred.rel \"mutex\", p8, p9\n   (p8)\tadd\t\tr16 = r29, r26, 1\tC\n   (p9)\tadd\t\tr16 = r29, r26\t\tC\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\tst8\t\t[r20] = r16, 8\t\tC\n   (p8)\tcmp.leu\t\tp6, p7 = r16, r26\tC\n   (p9)\tcmp.ltu\t\tp6, p7 = r16, r26\tC\n\t;;\n.Lcj2:\n\t.pred.rel \"mutex\", p6, p7\n   (p6)\tadd\t\tr14 = r30, r27, 1\tC\n   (p7)\tadd\t\tr14 = r30, r27\t\tC\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\tst8\t\t[r20] = r14\t\tC\n   (p6)\tcmp.leu\t\tp8, p9 = r14, r27\tC\n   (p7)\tcmp.ltu\t\tp8, p9 = r14, r27\tC\n\t;;\n   (p8)\tadd\t\tr8 = 1, r8\t\tC M I\n\tmov.i\t\tar.lc = r2\t\tC I0\n\tbr.ret.sptk.many b0\t\t\tC B\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/addmul_2.asm",
    "content": "dnl  IA-64 mpn_addmul_2 -- Multiply a n-limb number with a 2-limb number and\ndnl  add the result to a (n+1)-limb number.\n\ndnl  Copyright 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC         cycles/limb\nC Itanium:    3.65\nC Itanium 2:  1.625\n\nC Note that this is very similar to mul_2.asm.  If you change this file,\nC please change that file too.\n\nC TODO\nC  * Clean up variable names, and try to decrease the number of distinct\nC    registers used.\nC  * Cleanup feed-in code to not require zeroing several registers.\nC  * Make sure we don't depend on uninitialized predicate registers.\nC  * We currently cross-jump very aggressively, at the expense of a few cycles\nC    per operation.  Consider changing that.\nC  * Could perhaps save a few cycles by using 1 c/l carry propagation in\nC    wind-down code.\nC  * Ultimately rewrite.  The problem with this code is that it first uses a\nC    loaded u value in one xma pair, then leaves it live over several unrelated\nC    xma pairs, before it uses it again.  It should actually be quite possible\nC    to just swap some aligned xma pairs around.  But we should then schedule\nC    u loads further from the first use.\n\nC INPUT PARAMETERS\ndefine(`rp',`r32')\ndefine(`up',`r33')\ndefine(`n',`r34')\ndefine(`vp',`r35')\n\ndefine(`srp',`r3')\n\ndefine(`v0',`f6')\ndefine(`v1',`f7')\n\ndefine(`s0',`r14')\ndefine(`acc0',`r15')\n\ndefine(`pr0_0',`r16') define(`pr0_1',`r17')\ndefine(`pr0_2',`r18') define(`pr0_3',`r19')\n\ndefine(`pr1_0',`r20') define(`pr1_1',`r21')\ndefine(`pr1_2',`r22') define(`pr1_3',`r23')\n\ndefine(`acc1_0',`r24') define(`acc1_1',`r25')\ndefine(`acc1_2',`r26') define(`acc1_3',`r27')\n\ndnl define(`',`r28')\ndnl define(`',`r29')\ndnl define(`',`r30')\ndnl define(`',`r31')\n\ndefine(`fp0b_0',`f8') define(`fp0b_1',`f9')\ndefine(`fp0b_2',`f10') define(`fp0b_3',`f11')\n\ndefine(`fp1a_0',`f12') define(`fp1a_1',`f13')\ndefine(`fp1a_2',`f14') define(`fp1a_3',`f15')\n\ndefine(`fp1b_0',`f32') define(`fp1b_1',`f33')\ndefine(`fp1b_2',`f34') define(`fp1b_3',`f35')\n\ndefine(`fp2a_0',`f36') define(`fp2a_1',`f37')\ndefine(`fp2a_2',`f38') define(`fp2a_3',`f39')\n\ndefine(`r_0',`f40') define(`r_1',`f41')\ndefine(`r_2',`f42') define(`r_3',`f43')\n\ndefine(`u_0',`f44') define(`u_1',`f45')\ndefine(`u_2',`f46') define(`u_3',`f47')\n\ndefine(`rx',`f48')\ndefine(`ux',`f49')\ndefine(`ry',`f50')\ndefine(`uy',`f51')\n\nASM_START()\nPROLOGUE(mpn_addmul_2)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\n\nifdef(`HAVE_ABI_32',\n`\taddp4\t\trp = 0, rp\t\tC\t\t\tM I\n\taddp4\t\tup = 0, up\t\tC\t\t\tM I\n\taddp4\t\tvp = 0, vp\t\tC\t\t\tM I\n\tzxt4\t\tn = n\t\t\tC\t\t\tI\n\t;;')\n\n{.mmi\t\tC 00\n\tldf8\t\tux = [up], 8\t\tC\t\t\tM\n\tldf8\t\tv0 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}{.mmi\n\tldf8\t\trx = [rp], 8\t\tC\t\t\tM\n\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tadd\t\tn = -2, n\t\tC\t\t\tM I\n\t;;\n}{.mmi\t\tC 01\n\tldf8\t\tuy = [up], 8\t\tC\t\t\tM\n\tldf8\t\tv1 = [vp]\t\tC\t\t\tM\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n}{.mmi\n\tldf8\t\try = [rp], -8\t\tC\t\t\tM\n\tcmp.eq\t\tp10, p0 = 1, r14\tC\t\t\tM I\n\tcmp.eq\t\tp11, p0 = 2, r14\tC\t\t\tM I\n\t;;\n}{.mmi\t\tC 02\n\tadd\t\tsrp = 16, rp\t\tC\t\t\tM I\n\tcmp.eq\t\tp12, p0 = 3, r14\tC\t\t\tM I\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n}{.bbb\n  (p10) br.dptk\t\t.Lb01\t\t\tC\t\t\tB\n  (p11) br.dptk\t\t.Lb10\t\t\tC\t\t\tB\n  (p12) br.dptk\t\t.Lb11\t\t\tC\t\t\tB\n\t;;\n}\n\n\tALIGN(32)\n.Lb00:\tldf8\t\tr_1 = [srp], 8\n\tldf8\t\tu_1 = [up], 8\n\tmov\t\tacc1_2 = 0\n\tmov\t\tpr1_2 = 0\n\tmov\t\tpr0_3 = 0\n\tcmp.ne\t\tp8, p9 = r0, r0\n\t;;\n\tldf8\t\tr_2 = [srp], 8\n\txma.l\t\tfp0b_3 = ux, v0, rx\n\tcmp.ne\t\tp12, p13 = r0, r0\n\tldf8\t\tu_2 = [up], 8\n\txma.hu\t\tfp1a_3 = ux, v0, rx\n\tbr.cloop.dptk\t.grt4\n\n\txma.l\t\tfp0b_0 = uy, v0, ry\n\txma.hu\t\tfp1a_0 = uy, v0, ry\n\t;;\n\tgetf.sig\tacc0 = fp0b_3\n\txma.l\t\tfp1b_3 = ux, v1, fp1a_3\n\txma.hu\t\tfp2a_3 = ux, v1, fp1a_3\n\t;;\n\txma.l\t\tfp0b_1 = u_1, v0, r_1\n\txma.hu\t\tfp1a_1 = u_1, v0, r_1\n\t;;\n\tgetf.sig\tpr0_0 = fp0b_0\n\txma.l\t\tfp1b_0 = uy, v1, fp1a_0\n\txma.hu\t\tfp2a_0 = uy, v1, fp1a_0\n\t;;\n\tgetf.sig\tpr1_3 = fp1b_3\n\tgetf.sig\tacc1_3 = fp2a_3\n\txma.l\t\tfp0b_2 = u_2, v0, r_2\n\txma.hu\t\tfp1a_2 = u_2, v0, r_2\n\tbr\t\t.Lcj4\n\n.grt4:\txma.l\t\tfp0b_0 = uy, v0, ry\n\txma.hu\t\tfp1a_0 = uy, v0, ry\n\t;;\n\tldf8\t\tr_3 = [srp], 8\n\tgetf.sig\tacc0 = fp0b_3\n\txma.l\t\tfp1b_3 = ux, v1, fp1a_3\n\tldf8\t\tu_3 = [up], 8\n\txma.hu\t\tfp2a_3 = ux, v1, fp1a_3\n\t;;\n\txma.l\t\tfp0b_1 = u_1, v0, r_1\n\txma.hu\t\tfp1a_1 = u_1, v0, r_1\n\t;;\n\tldf8\t\tr_0 = [srp], 8\n\tgetf.sig\tpr0_0 = fp0b_0\n\txma.l\t\tfp1b_0 = uy, v1, fp1a_0\n\txma.hu\t\tfp2a_0 = uy, v1, fp1a_0\n\t;;\n\tldf8\t\tu_0 = [up], 8\n\tgetf.sig\tpr1_3 = fp1b_3\n\t;;\n\tgetf.sig\tacc1_3 = fp2a_3\n\txma.l\t\tfp0b_2 = u_2, v0, r_2\n\txma.hu\t\tfp1a_2 = u_2, v0, r_2\n\tbr\t\t.LL00\n\n\n\tALIGN(32)\n.Lb01:\tldf8\t\tr_0 = [srp], 8\t\tC M\n\tldf8\t\tu_0 = [up], 8\t\tC M\n\tmov\t\tacc1_1 = 0\t\tC M I\n\tmov\t\tpr1_1 = 0\t\tC M I\n\tmov\t\tpr0_2 = 0\t\tC M I\n\tcmp.ne\t\tp6, p7 = r0, r0\t\tC M I\n\t;;\n\tldf8\t\tr_1 = [srp], 8\t\tC M\n\txma.l\t\tfp0b_2 = ux, v0, rx\tC F\n\tcmp.ne\t\tp10, p11 = r0, r0\tC M I\n\tldf8\t\tu_1 = [up], 8\t\tC M\n\txma.hu\t\tfp1a_2 = ux, v0, rx\tC F\n\t;;\n\txma.l\t\tfp0b_3 = uy, v0, ry\tC F\n\txma.hu\t\tfp1a_3 = uy, v0, ry\tC F\n\t;;\n\tgetf.sig\tacc0 = fp0b_2\t\tC M\n\tldf8\t\tr_2 = [srp], 8\t\tC M\n\txma.l\t\tfp1b_2 = ux, v1,fp1a_2\tC F\n\txma.hu\t\tfp2a_2 = ux, v1,fp1a_2\tC F\n\tldf8\t\tu_2 = [up], 8\t\tC M\n\tbr.cloop.dptk\t.grt5\n\n\txma.l\t\tfp0b_0 = u_0, v0, r_0\tC F\n\txma.hu\t\tfp1a_0 = u_0, v0, r_0\tC F\n\t;;\n\tgetf.sig\tpr0_3 = fp0b_3\t\tC M\n\txma.l\t\tfp1b_3 = uy, v1,fp1a_3\tC F\n\txma.hu\t\tfp2a_3 = uy, v1,fp1a_3\tC F\n\t;;\n\tgetf.sig\tpr1_2 = fp1b_2\t\tC M\n\tgetf.sig\tacc1_2 = fp2a_2\t\tC M\n\txma.l\t\tfp0b_1 = u_1, v0, r_1\tC F\n\txma.hu\t\tfp1a_1 = u_1, v0, r_1\tC F\n\tbr\t\t.Lcj5\n\n.grt5:\txma.l\t\tfp0b_0 = u_0, v0, r_0\n\txma.hu\t\tfp1a_0 = u_0, v0, r_0\n\t;;\n\tgetf.sig\tpr0_3 = fp0b_3\n\tldf8\t\tr_3 = [srp], 8\n\txma.l\t\tfp1b_3 = uy, v1, fp1a_3\n\txma.hu\t\tfp2a_3 = uy, v1, fp1a_3\n\t;;\n\tldf8\t\tu_3 = [up], 8\n\tgetf.sig\tpr1_2 = fp1b_2\n\t;;\n\tgetf.sig\tacc1_2 = fp2a_2\n\txma.l\t\tfp0b_1 = u_1, v0, r_1\n\txma.hu\t\tfp1a_1 = u_1, v0, r_1\n\tbr\t\t.LL01\n\n\n\tALIGN(32)\n.Lb10:\t\tC 03\n\tbr.cloop.dptk\t.grt2\n\t\tC 04\n\t\tC 05\n\t\tC 06\n\txma.l\t\tfp0b_1 = ux, v0, rx\n\txma.hu\t\tfp1a_1 = ux, v0, rx\n\t;;\tC 07\n\txma.l\t\tfp0b_2 = uy, v0, ry\n\txma.hu\t\tfp1a_2 = uy, v0, ry\n\t;;\tC 08\n\t\tC 09\n\t\tC 10\n\tstf8\t\t[rp] = fp0b_1, 8\n\txma.l\t\tfp1b_1 = ux, v1, fp1a_1\n\txma.hu\t\tfp2a_1 = ux, v1, fp1a_1\n\t;;\tC 11\n\tgetf.sig\tacc0 = fp0b_2\n\txma.l\t\tfp1b_2 = uy, v1, fp1a_2\n\txma.hu\t\tfp2a_2 = uy, v1, fp1a_2\n\t;;\tC 12\n\t\tC 13\n\t\tC 14\n\tgetf.sig\tpr1_1 = fp1b_1\n\t\tC 15\n\tgetf.sig\tacc1_1 = fp2a_1\n\t\tC 16\n\tgetf.sig\tpr1_2 = fp1b_2\n\t\tC 17\n\tgetf.sig\tr8 = fp2a_2\n\t;;\tC 18\n\t\tC 19\n\tadd\t\ts0 = pr1_1, acc0\n\t;;\tC 20\n\tst8\t\t[rp] = s0, 8\n\tcmp.ltu\t\tp8, p9 = s0, pr1_1\n\tsub\t\tr31 = -1, acc1_1\n\t;;\tC 21\n\t.pred.rel \"mutex\", p8, p9\n  (p8)\tadd\t\tacc0 = pr1_2, acc1_1, 1\n  (p9)\tadd\t\tacc0 = pr1_2, acc1_1\n  (p8)\tcmp.leu\t\tp10, p0 = r31, pr1_2\n  (p9)\tcmp.ltu\t\tp10, p0 = r31, pr1_2\n\t;;\tC 22\n\tst8\t\t[rp] = acc0, 8\n\tmov.i\t\tar.lc = r2\n  (p10)\tadd\t\tr8 = 1, r8\n\tbr.ret.sptk.many b0\n\n\n.grt2:\tldf8\t\tr_3 = [srp], 8\n\tldf8\t\tu_3 = [up], 8\n\tmov\t\tacc1_0 = 0\n\t;;\n\tldf8\t\tr_0 = [srp], 8\n\txma.l\t\tfp0b_1 = ux, v0, rx\n\tmov\t\tpr1_0 = 0\n\tldf8\t\tu_0 = [up], 8\n\txma.hu\t\tfp1a_1 = ux, v0, rx\n\tmov\t\tpr0_1 = 0\n\t;;\n\txma.l\t\tfp0b_2 = uy, v0, ry\n\txma.hu\t\tfp1a_2 = uy, v0, ry\n\t;;\n\tgetf.sig\tacc0 = fp0b_1\n\tldf8\t\tr_1 = [srp], 8\n\txma.l\t\tfp1b_1 = ux, v1, fp1a_1\n\txma.hu\t\tfp2a_1 = ux, v1, fp1a_1\n\t;;\n\tldf8\t\tu_1 = [up], 8\n\txma.l\t\tfp0b_3 = u_3, v0, r_3\n\txma.hu\t\tfp1a_3 = u_3, v0, r_3\n\t;;\n\tgetf.sig\tpr0_2 = fp0b_2\n\tldf8\t\tr_2 = [srp], 8\n\txma.l\t\tfp1b_2 = uy, v1, fp1a_2\n\txma.hu\t\tfp2a_2 = uy, v1, fp1a_2\n\t;;\n\tldf8\t\tu_2 = [up], 8\n\tgetf.sig\tpr1_1 = fp1b_1\n\t;;\n\tgetf.sig\tacc1_1 = fp2a_1\n\txma.l\t\tfp0b_0 = u_0, v0, r_0\n\tcmp.ne\t\tp8, p9 = r0, r0\n\tcmp.ne\t\tp12, p13 = r0, r0\n\txma.hu\t\tfp1a_0 = u_0, v0, r_0\n\tbr\t\t.LL10\n\n\n\tALIGN(32)\n.Lb11:\tmov\t\tacc1_3 = 0\n\tmov\t\tpr1_3 = 0\n\tmov\t\tpr0_0 = 0\n\tcmp.ne\t\tp6, p7 = r0, r0\n\t;;\n\tldf8\t\tr_2 = [srp], 8\n\tldf8\t\tu_2 = [up], 8\n\tbr.cloop.dptk\t.grt3\n\t;;\n\txma.l\t\tfp0b_0 = ux, v0, rx\n\txma.hu\t\tfp1a_0 = ux, v0, rx\n\t;;\n\tcmp.ne\t\tp10, p11 = r0, r0\n\txma.l\t\tfp0b_1 = uy, v0, ry\n\txma.hu\t\tfp1a_1 = uy, v0, ry\n\t;;\n\tgetf.sig\tacc0 = fp0b_0\n\txma.l\t\tfp1b_0 = ux, v1, fp1a_0\n\txma.hu\t\tfp2a_0 = ux, v1, fp1a_0\n\t;;\n\txma.l\t\tfp0b_2 = u_2, v0, r_2\n\txma.hu\t\tfp1a_2 = u_2, v0, r_2\n\t;;\n\tgetf.sig\tpr0_1 = fp0b_1\n\txma.l\t\tfp1b_1 = uy, v1, fp1a_1\n\txma.hu\t\tfp2a_1 = uy, v1, fp1a_1\n\t;;\n\tgetf.sig\tpr1_0 = fp1b_0\n\tgetf.sig\tacc1_0 = fp2a_0\n\tbr\t\t.Lcj3\n\n.grt3:\tldf8\t\tr_3 = [srp], 8\n\txma.l\t\tfp0b_0 = ux, v0, rx\n\tcmp.ne\t\tp10, p11 = r0, r0\n\tldf8\t\tu_3 = [up], 8\n\txma.hu\t\tfp1a_0 = ux, v0, rx\n\t;;\n\txma.l\t\tfp0b_1 = uy, v0, ry\n\txma.hu\t\tfp1a_1 = uy, v0, ry\n\t;;\n\tgetf.sig\tacc0 = fp0b_0\n\tldf8\t\tr_0 = [srp], 8\n\txma.l\t\tfp1b_0 = ux, v1, fp1a_0\n\tldf8\t\tu_0 = [up], 8\n\txma.hu\t\tfp2a_0 = ux, v1, fp1a_0\n\t;;\n\txma.l\t\tfp0b_2 = u_2, v0, r_2\n\txma.hu\t\tfp1a_2 = u_2, v0, r_2\n\t;;\n\tgetf.sig\tpr0_1 = fp0b_1\n\tldf8\t\tr_1 = [srp], 8\n\txma.l\t\tfp1b_1 = uy, v1, fp1a_1\n\txma.hu\t\tfp2a_1 = uy, v1, fp1a_1\n\t;;\n\tldf8\t\tu_1 = [up], 8\n\tgetf.sig\tpr1_0 = fp1b_0\n\t;;\n\tgetf.sig\tacc1_0 = fp2a_0\n\txma.l\t\tfp0b_3 = u_3, v0, r_3\n\txma.hu\t\tfp1a_3 = u_3, v0, r_3\n\tbr\t\t.LL11\n\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\t\t\t\t\t\tC 00\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr0_3 = fp0b_3\n\tldf8\t\tr_3 = [srp], 8\n\txma.l\t\tfp1b_3 = u_3, v1, fp1a_3\n  (p12)\tadd\t\ts0 = pr1_0, acc0, 1\n  (p13)\tadd\t\ts0 = pr1_0, acc0\n\txma.hu\t\tfp2a_3 = u_3, v1, fp1a_3\n\t;;\t\t\t\t\tC 01\n\t.pred.rel \"mutex\", p8, p9\n\t.pred.rel \"mutex\", p12, p13\n\tldf8\t\tu_3 = [up], 8\n\tgetf.sig\tpr1_2 = fp1b_2\n  (p8)\tcmp.leu\t\tp6, p7 = acc0, pr0_1\n  (p9)\tcmp.ltu\t\tp6, p7 = acc0, pr0_1\n  (p12)\tcmp.leu\t\tp10, p11 = s0, pr1_0\n  (p13)\tcmp.ltu\t\tp10, p11 = s0, pr1_0\n\t;;\t\t\t\t\tC 02\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tacc1_2 = fp2a_2\n\tst8\t\t[rp] = s0, 8\n\txma.l\t\tfp0b_1 = u_1, v0, r_1\n  (p6)\tadd\t\tacc0 = pr0_2, acc1_0, 1\n  (p7)\tadd\t\tacc0 = pr0_2, acc1_0\n\txma.hu\t\tfp1a_1 = u_1, v0, r_1\n\t;;\t\t\t\t\tC 03\n.LL01:\n\t.pred.rel \"mutex\", p10, p11\n\tgetf.sig\tpr0_0 = fp0b_0\n\tldf8\t\tr_0 = [srp], 8\n\txma.l\t\tfp1b_0 = u_0, v1, fp1a_0\n  (p10)\tadd\t\ts0 = pr1_1, acc0, 1\n  (p11)\tadd\t\ts0 = pr1_1, acc0\n\txma.hu\t\tfp2a_0 = u_0, v1, fp1a_0\n\t;;\t\t\t\t\tC 04\n\t.pred.rel \"mutex\", p6, p7\n\t.pred.rel \"mutex\", p10, p11\n\tldf8\t\tu_0 = [up], 8\n\tgetf.sig\tpr1_3 = fp1b_3\n  (p6)\tcmp.leu\t\tp8, p9 = acc0, pr0_2\n  (p7)\tcmp.ltu\t\tp8, p9 = acc0, pr0_2\n  (p10)\tcmp.leu\t\tp12, p13 = s0, pr1_1\n  (p11)\tcmp.ltu\t\tp12, p13 = s0, pr1_1\n\t;;\t\t\t\t\tC 05\n\t.pred.rel \"mutex\", p8, p9\n\tgetf.sig\tacc1_3 = fp2a_3\n\tst8\t\t[rp] = s0, 8\n\txma.l\t\tfp0b_2 = u_2, v0, r_2\n  (p8)\tadd\t\tacc0 = pr0_3, acc1_1, 1\n  (p9)\tadd\t\tacc0 = pr0_3, acc1_1\n\txma.hu\t\tfp1a_2 = u_2, v0, r_2\n\t;;\t\t\t\t\tC 06\n.LL00:\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr0_1 = fp0b_1\n\tldf8\t\tr_1 = [srp], 8\n\txma.l\t\tfp1b_1 = u_1, v1, fp1a_1\n  (p12)\tadd\t\ts0 = pr1_2, acc0, 1\n  (p13)\tadd\t\ts0 = pr1_2, acc0\n\txma.hu\t\tfp2a_1 = u_1, v1, fp1a_1\n\t;;\t\t\t\t\tC 07\n\t.pred.rel \"mutex\", p8, p9\n\t.pred.rel \"mutex\", p12, p13\n\tldf8\t\tu_1 = [up], 8\n\tgetf.sig\tpr1_0 = fp1b_0\n  (p8)\tcmp.leu\t\tp6, p7 = acc0, pr0_3\n  (p9)\tcmp.ltu\t\tp6, p7 = acc0, pr0_3\n  (p12)\tcmp.leu\t\tp10, p11 = s0, pr1_2\n  (p13)\tcmp.ltu\t\tp10, p11 = s0, pr1_2\n\t;;\t\t\t\t\tC 08\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tacc1_0 = fp2a_0\n\tst8\t\t[rp] = s0, 8\n\txma.l\t\tfp0b_3 = u_3, v0, r_3\n  (p6)\tadd\t\tacc0 = pr0_0, acc1_2, 1\n  (p7)\tadd\t\tacc0 = pr0_0, acc1_2\n\txma.hu\t\tfp1a_3 = u_3, v0, r_3\n\t;;\t\t\t\t\tC 09\n.LL11:\n\t.pred.rel \"mutex\", p10, p11\n\tgetf.sig\tpr0_2 = fp0b_2\n\tldf8\t\tr_2 = [srp], 8\n\txma.l\t\tfp1b_2 = u_2, v1, fp1a_2\n  (p10)\tadd\t\ts0 = pr1_3, acc0, 1\n  (p11)\tadd\t\ts0 = pr1_3, acc0\n\txma.hu\t\tfp2a_2 = u_2, v1, fp1a_2\n\t;;\t\t\t\t\tC 10\n\t.pred.rel \"mutex\", p6, p7\n\t.pred.rel \"mutex\", p10, p11\n\tldf8\t\tu_2 = [up], 8\n\tgetf.sig\tpr1_1 = fp1b_1\n  (p6)\tcmp.leu\t\tp8, p9 = acc0, pr0_0\n  (p7)\tcmp.ltu\t\tp8, p9 = acc0, pr0_0\n  (p10)\tcmp.leu\t\tp12, p13 = s0, pr1_3\n  (p11)\tcmp.ltu\t\tp12, p13 = s0, pr1_3\n\t;;\t\t\t\t\tC 11\n\t.pred.rel \"mutex\", p8, p9\n\tgetf.sig\tacc1_1 = fp2a_1\n\tst8\t\t[rp] = s0, 8\n\txma.l\t\tfp0b_0 = u_0, v0, r_0\n  (p8)\tadd\t\tacc0 = pr0_1, acc1_3, 1\n  (p9)\tadd\t\tacc0 = pr0_1, acc1_3\n\txma.hu\t\tfp1a_0 = u_0, v0, r_0\n.LL10:\tbr.cloop.dptk\t.Loop\t\t\tC 12\n\t;;\nC *** MAIN LOOP END ***\n\n.Lcj6:\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr0_3 = fp0b_3\n\txma.l\t\tfp1b_3 = u_3, v1, fp1a_3\n  (p12)\tadd\t\ts0 = pr1_0, acc0, 1\n  (p13)\tadd\t\ts0 = pr1_0, acc0\n\txma.hu\t\tfp2a_3 = u_3, v1, fp1a_3\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr1_2 = fp1b_2\n  (p8)\tcmp.leu\t\tp6, p7 = acc0, pr0_1\n  (p9)\tcmp.ltu\t\tp6, p7 = acc0, pr0_1\n  (p12)\tcmp.leu\t\tp10, p11 = s0, pr1_0\n  (p13)\tcmp.ltu\t\tp10, p11 = s0, pr1_0\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tacc1_2 = fp2a_2\n\tst8\t\t[rp] = s0, 8\n\txma.l\t\tfp0b_1 = u_1, v0, r_1\n  (p6)\tadd\t\tacc0 = pr0_2, acc1_0, 1\n  (p7)\tadd\t\tacc0 = pr0_2, acc1_0\n\txma.hu\t\tfp1a_1 = u_1, v0, r_1\n\t;;\n.Lcj5:\n\t.pred.rel \"mutex\", p10, p11\n\tgetf.sig\tpr0_0 = fp0b_0\n\txma.l\t\tfp1b_0 = u_0, v1, fp1a_0\n  (p10)\tadd\t\ts0 = pr1_1, acc0, 1\n  (p11)\tadd\t\ts0 = pr1_1, acc0\n\txma.hu\t\tfp2a_0 = u_0, v1, fp1a_0\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\t.pred.rel \"mutex\", p10, p11\n\tgetf.sig\tpr1_3 = fp1b_3\n  (p6)\tcmp.leu\t\tp8, p9 = acc0, pr0_2\n  (p7)\tcmp.ltu\t\tp8, p9 = acc0, pr0_2\n  (p10)\tcmp.leu\t\tp12, p13 = s0, pr1_1\n  (p11)\tcmp.ltu\t\tp12, p13 = s0, pr1_1\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\tgetf.sig\tacc1_3 = fp2a_3\n\tst8\t\t[rp] = s0, 8\n\txma.l\t\tfp0b_2 = u_2, v0, r_2\n  (p8)\tadd\t\tacc0 = pr0_3, acc1_1, 1\n  (p9)\tadd\t\tacc0 = pr0_3, acc1_1\n\txma.hu\t\tfp1a_2 = u_2, v0, r_2\n\t;;\n.Lcj4:\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr0_1 = fp0b_1\n\txma.l\t\tfp1b_1 = u_1, v1, fp1a_1\n  (p12)\tadd\t\ts0 = pr1_2, acc0, 1\n  (p13)\tadd\t\ts0 = pr1_2, acc0\n\txma.hu\t\tfp2a_1 = u_1, v1, fp1a_1\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr1_0 = fp1b_0\n  (p8)\tcmp.leu\t\tp6, p7 = acc0, pr0_3\n  (p9)\tcmp.ltu\t\tp6, p7 = acc0, pr0_3\n  (p12)\tcmp.leu\t\tp10, p11 = s0, pr1_2\n  (p13)\tcmp.ltu\t\tp10, p11 = s0, pr1_2\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tacc1_0 = fp2a_0\n\tst8\t\t[rp] = s0, 8\n  (p6)\tadd\t\tacc0 = pr0_0, acc1_2, 1\n  (p7)\tadd\t\tacc0 = pr0_0, acc1_2\n\t;;\n.Lcj3:\n\t.pred.rel \"mutex\", p10, p11\n\tgetf.sig\tpr0_2 = fp0b_2\n\txma.l\t\tfp1b_2 = u_2, v1, fp1a_2\n  (p10)\tadd\t\ts0 = pr1_3, acc0, 1\n  (p11)\tadd\t\ts0 = pr1_3, acc0\n\txma.hu\t\tfp2a_2 = u_2, v1, fp1a_2\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\t.pred.rel \"mutex\", p10, p11\n\tgetf.sig\tpr1_1 = fp1b_1\n  (p6)\tcmp.leu\t\tp8, p9 = acc0, pr0_0\n  (p7)\tcmp.ltu\t\tp8, p9 = acc0, pr0_0\n  (p10)\tcmp.leu\t\tp12, p13 = s0, pr1_3\n  (p11)\tcmp.ltu\t\tp12, p13 = s0, pr1_3\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\tgetf.sig\tacc1_1 = fp2a_1\n\tst8\t\t[rp] = s0, 8\n  (p8)\tadd\t\tacc0 = pr0_1, acc1_3, 1\n  (p9)\tadd\t\tacc0 = pr0_1, acc1_3\n\t;;\n.Lcj2:\n\t.pred.rel \"mutex\", p12, p13\n  (p12)\tadd\t\ts0 = pr1_0, acc0, 1\n  (p13)\tadd\t\ts0 = pr1_0, acc0\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr1_2 = fp1b_2\n  (p8)\tcmp.leu\t\tp6, p7 = acc0, pr0_1\n  (p9)\tcmp.ltu\t\tp6, p7 = acc0, pr0_1\n  (p12)\tcmp.leu\t\tp10, p11 = s0, pr1_0\n  (p13)\tcmp.ltu\t\tp10, p11 = s0, pr1_0\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tacc1_2 = fp2a_2\n\tst8\t\t[rp] = s0, 8\n  (p6)\tadd\t\tacc0 = pr0_2, acc1_0, 1\n  (p7)\tadd\t\tacc0 = pr0_2, acc1_0\n\t;;\n\t.pred.rel \"mutex\", p10, p11\n  (p10)\tadd\t\ts0 = pr1_1, acc0, 1\n  (p11)\tadd\t\ts0 = pr1_1, acc0\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\t.pred.rel \"mutex\", p10, p11\n  (p6)\tcmp.leu\t\tp8, p9 = acc0, pr0_2\n  (p7)\tcmp.ltu\t\tp8, p9 = acc0, pr0_2\n  (p10)\tcmp.leu\t\tp12, p13 = s0, pr1_1\n  (p11)\tcmp.ltu\t\tp12, p13 = s0, pr1_1\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\tst8\t\t[rp] = s0, 8\n  (p8)\tadd\t\tacc0 = pr1_2, acc1_1, 1\n  (p9)\tadd\t\tacc0 = pr1_2, acc1_1\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n  (p8)\tcmp.leu\t\tp10, p11 = acc0, pr1_2\n  (p9)\tcmp.ltu\t\tp10, p11 = acc0, pr1_2\n  (p12)\tadd\t\tacc0 = 1, acc0\n\t;;\n\tst8\t\t[rp] = acc0, 8\n  (p12)\tcmp.eq.or\tp10, p0 = 0, acc0\n\tmov\t\tr8 = acc1_2\n\t;;\n\t.pred.rel \"mutex\", p10, p11\n  (p10)\tadd\t\tr8 = 1, r8\n\tmov.i\t\tar.lc = r2\n\tbr.ret.sptk.many b0\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/addmul_4.asm",
    "content": "include(`../config.m4')\nC FILE: addmul_4_asm.s\nC\nC AUTHOR: Jason Worth Martin <jason.worth.martin@gmail.com>\nC\nC LICENSE: GNU LGPLv3+\nC\nC DESCRIPTION: uint64_t addmul_4(uint64_t *rp,\nC                                uint64_t *up,\nC                                uint64_t n,\nC                                uint64_t *vp)\nC\nC Form the product of the n-limb number pointed to by up with the\nC 4-limb number pointed to by vp.  Accumulate n limbs of the product\nC to the n-limb number pointed to by rp.  Returns the sum of the\nC most significant limb of the product with the last carry from the\nC addition.\nC\nC WARNING: This code will only work correctly if n > 1.  This is okay since\nC for n = 1 it isn't worth the overhead of starting up all the pipelines.\nC So, mul_basecase needs to check for the size of n and use a smaller routine,\nC such as addmul_2 or addmul_1 if n is small.\nC\nC HOW THIS CODE WORKS\nC -------------------\nC\nC This code is highly pipelined.  There are four multiplication pipelines,\nC two load pipelines, and four addition pipelines.  All the pipelines are run\nC in parallel to hide the latency of the operations.  This means we have a\nC large startup penalty, but once the pipelines are full we get four\nC multiplications every six cycles for a throughput of 1.5 cycles/mul.\nC\nC The multiplication pipelines are m1, m2, m3, m4 with the mklo and mkhi\nC for k=1,2,3,4 denoting the lo and hi words.\nC\nC The addition pipelines are a1, a2, a3, a4.  The addition pipelines have\nC varying lengths to accomidate the staggered additions.  In other words,\nC we have add lines that look like this:\nC\nC **** **** **** a1_1 a1_0\nC **** **** a2_2 a2_1 a2_0\nC **** a3_3 a3_2 a3_1 a3_0\nC a4_4 a4_3 a4_2 a4_1 a4_0\nC\nC where the additions are performed in columns.  We put the results from mul1\nC into a1, mul2 into a2, etc.\nC\nC Note that we use the add+add+cmp+cmp carry propigation method instead of\nC the add+cmp+add method.  This is not a disadvantage because the main loop\nC needs the ldf8, ld8, st8, and br instructions anyway, so we bury the extra\nC compares in the bundles used by those instructions.  This means that we are\nC propigatin single bit carries along each of these add lines separately instead\nC of dealing with word-wise carries.  Using the word-wise carry propigation\nC would reduce the number of instructions needed (add+cmp+add) but increases\nC the latency.  At this point, we need at least 16 limbs in the input before\nC we break even with two times the addmul_2 routine, so it isn't worth using\nC the longer latency loops because somewhere between 50 and 60 limbs we\nC transition into Karasuba multiplication anyway.\nC\nC The code uses the register rotation supported on Itanium.  On each iteration\nC of the main loop, the value appears to move up a register.  We label the\nC registers to take this into account so that, for eaxmple, the value in a1_1\nC appears in a1_0 on the next loop cycle.\nC\nC We also use the rotating predicate registers to turn on and off the loads and\nC stores.  This is because we need to wait until the first result has moved\nC all the way through the pipelines before storing it.  Likewise, we stop the\nC loads in the epilogue stages of the loop but continue the stores.  See the\nC Itanium documentation for a full explaination of register rotation and the\nC br.ctop instruction that supports it.\nC\n\n\nC\nC General Register use:\nC\nC r0      - Always 0\nC r1      - Global Pointer (preserve)\nC r2      - stores ar.pfs (scratch)\nC r3      - scratch\nC r4-r7   - preserve\nC r8      - return0 (structure/union return pointer on entry)\nC r9      - lc store (return1)\nC r10     - small loop trip count (return2)\nC r11     - scratch (return3)\nC r12     - stack pointer\nC r13     - thread pointer (preserve)\n\n\nC v1p = vp -- only used at beginning of routine.  Originally in r35\nC v2p = vp + 1*8\nC v3p = vp + 2*8\nC v4p = vp + 3*8\nC numlim = number of limbs\nC res1 = (*rp) + a1_0\nC res2 = res1  + a2_0\nC res3 = res2  + a3_0\nC res4 = res3  + a4_0\nC sp = store pointer.  This is where we write the result.  It is originally rp\npr_store = r3\nlc_store = r9\ntripcnt_s = r10\nrptr\t= r14\nuptr\t= r15\nv1p\t= r35\nv2p\t= r16\nv3p\t= r17\nv4p\t= r18\ntripcnt\t= r19\nres1\t= r20\nres2\t= r21\nres3\t= r22\nres4\t= r8\nsptr\t= r23\nm1hi_s\t= r24\nm2hi_s\t= r25\nm3hi_s\t= r26\nm4hi_s\t= r27\n\t\nC\nC General Register Rotating Pipelines\nC\nC\nC r32     - add line 1 stage 1 (rp on entry)\nC r33     - add line 1 stage 0 (uptr on entry)\na1_1\t= r32\na1_0\t= r33\n\nC r34     - add line 2 stage 2 (n on entry)\nC r35     - add line 2 stage 1 (vp on entry)\nC r36     - add line 2 stage 0 (vp on entry)\na2_2\t= r34\na2_1\t= r35\na2_0\t= r36\n\nC r37     - add line 3 stage 3\nC r38     - add line 3 stage 2\nC r39     - add line 3 stage 1\nC r40     - add line 3 stage 0\na3_3\t= r37\na3_2\t= r38\na3_1\t= r39\na3_0\t= r40\n\t\nC r41     - add line 4 stage 4\nC r42     - add line 4 stage 3\nC r43     - add line 4 stage 2\nC r44     - add line 4 stage 1\nC r45     - add line 4 stage 0\na4_4\t= r41\na4_3\t= r42\na4_2\t= r43\na4_1\t= r44\na4_0\t= r45\n\nC rp input pipeline\nC rpin_c\t= r46\nrpin_c\t= r47\nrpin_3\t= r48\nrpin_2\t= r49\nrpin_1\t= r50\nrpin_0\t= r51\n\nC Remainder of rotating registers\ntoprot\t= r55\n\n\nC\nC Floating Point Register Use\nC\nv1\t= f6\nv2\t= f7\nv3\t= f8\nv4\t= f9\n\nC\nC Floating point rotating register pipelines:\nC\n\nC fp load pipeline\nupld_c  = f127\nupld_2\t= f32\nupld_1\t= f33\nupld_0\t= f34\n\nC mul_line_1:\nm1lo_1\t= f35\nm1lo_0\t= f36\nm1hi_1\t= f37\nm1hi_0\t= f38\n\t\nC mul_line_2:\nm2lo_1\t= f39\nm2lo_0\t= f40\nm2hi_1\t= f41\nm2hi_0\t= f42\n\t\nC mul_line_3:\nm3lo_1\t= f43\nm3lo_0\t= f44\nm3hi_1\t= f45\nm3hi_0\t= f46\n\t\nC mul_line_4:\nm4lo_1\t= f47\nm4lo_0\t= f48\nm4hi_1\t= f49\nm4hi_0\t= f50\n\n\n\nC\nC Predicate Register Use\nC\nc1_1\t= p47\nc1_0\t= p48\nc1n_1\t= p49\nc1n_0\t= p50\n\nc2_1\t= p51\nc2_0\t= p52\nc2n_1\t= p53\nc2n_0\t= p54\n\nc3_1\t= p55\nc3_0\t= p56\nc3n_1\t= p57\nc3n_0\t= p58\n\nc4_1\t= p59\nc4_0\t= p60\nc4n_1\t= p61\nc4n_0\t= p62\n\n\n\nASM_START()\nPROLOGUE(mpn_addmul_4)\n\t.explicit\n\t.pred.rel \"mutex\", p14, p15\n\t.pred.rel \"mutex\", c1_0, c1n_0\n\t.pred.rel \"mutex\", c1_1, c1n_1\n\t.pred.rel \"mutex\", c2_0, c2n_0\n\t.pred.rel \"mutex\", c2_1, c2n_1\n\t.pred.rel \"mutex\", c3_0, c3n_0\n\t.pred.rel \"mutex\", c3_1, c3n_1\n\t.pred.rel \"mutex\", c4_0, c4n_0\n\t.pred.rel \"mutex\", c4_1, c4n_1\n\tC cycle 1\n{\n\t.prologue\n\t.save\tar.pfs, r2\n\talloc\tr2 = ar.pfs,4,20,0,24\n\t.save\tar.lc, lc_store\n\tmov\tlc_store = ar.lc\n\t.save\tpr, pr_store\n\tmov\tpr_store = pr\n}\n{\n\t.body\n\tmov\trptr = r32\n\tmov\tsptr = r32\n\tmov\tuptr = r33\n}\n\t;; \n\tC cycle 2\n{\n\tldf8\tv1 = [v1p]\t\n\tcmp.eq\tp14,p15 = 1, r34\n\tadd\tv2p = 8, r35\n}\n{\n\tldf8\tupld_0 = [uptr], 8\n\tadd\tv3p = 16, r35\n\tadd\tv4p = 24, r35\n}\n\t;;\n\tC cycle 3\n{\n\tldf8\tv2 = [v2p]\n(p15)\tadd\ttripcnt = -2, r34\n\tmov\tm1lo_0 = f0\n}\n{\n\tldf8\tv3 = [v3p]\n(p14)\tmov\ttripcnt = 0\n\tmov\tm1hi_0 = f0\n}\n\tC cycle 4\n{\n\tldf8\tv4 = [v4p]\n\tmov\tm2lo_0 = f0\n(p15)\tmov\tpr.rot = 0x0fff0000 C PR16 = 16-27 set to 1\n}\n{\n\tld8\trpin_2 = [rptr], 8\n\tmov\tm2hi_0 = f0\n(p14)\tmov\tpr.rot = 0x0ffe0000 C Single Limb\n}\n\t;;\n\tC cycle 5\n{\n\tmov\trpin_3 = 0\n\tmov\trpin_1 = 0\n\tmov\tm3lo_0 = f0\n}\n{\n\tmov\trpin_0 = 0\n\tcmp.ne\tc3_0, c3n_0 = r0, r0\n\tmov\tm3hi_0 = f0\n}\n\tC cycle 6\n{\n\tcmp.ne\tc4_0, c4n_0 = r0, r0\n\tmov\ta1_1 = 0\n\tmov\tm4lo_0 = f0\n}\n{\n\tmov\ta1_0 = 0\n\tmov\ta2_2 = 0\n\tmov\tm4hi_0 = f0\n}\n\tC cycle 7\n{\n\tmov\ta2_1 = 0\n\tmov\ta2_0 = 0\n\tmov\tupld_1 = f0\n}\n{\n\tmov\ta3_3 = 0\n\tmov\tupld_2 = f0\n(p14)\tmov\tar.ec = 7\n}\n\t;;\n\tC cycle 8\n{\n\tmov\ta3_2 = 0\n\tmov\tupld_c = f0\n(p15)\tmov\tar.ec = 8\n}\n{\n\tcmp.ne\tc1_0, c1n_0 = r0, r0\n\tmov\ttoprot = 0\n\tmov\tar.lc = tripcnt\n}\n\tC cycle 9\n{\n\tcmp.ne\tc2_0, c2n_0 = r0, r0\n\tmov\ta3_0 = 0\n\tmov\ta3_1 = 0\n}\n{\n\tmov\ta4_4 = 0\n\tmov\ta4_3 = 0\n\tmov\ta4_0 = 0\n}\n\tC cycle 10\n{\n\tmov\ta4_1 = 0\n\tmov\ta4_2 = 0\n\tnop\t0\n}\n{\n\tnop\t0\n\tnop\t0\n\tnop\t0\n}\n\t;;\t\naddmul_4_main_loop:\n\t.pred.rel \"mutex\", c1_0, c1n_0\n\t.pred.rel \"mutex\", c1_1, c1n_1\n\t.pred.rel \"mutex\", c2_0, c2n_0\n\t.pred.rel \"mutex\", c2_1, c2n_1\n\t.pred.rel \"mutex\", c3_0, c3n_0\n\t.pred.rel \"mutex\", c3_1, c3n_1\n\t.pred.rel \"mutex\", c4_0, c4n_0\n\t.pred.rel \"mutex\", c4_1, c4n_1\n\tC loop cycle 1\n{\n\tgetf.sig\ta1_1 = m1lo_0\n(p16)\tldf8\tupld_1 = [uptr], 8\n\txma.lu\tm1lo_1 = upld_0, v1, m1hi_0\n}\n{\n\tnop\t0\n\tnop\t0\n\txma.hu\tm1hi_1 = upld_0, v1, m1hi_0\n}\n\t;; \n\tC loop cycle 2\n{\n\tgetf.sig\ta2_2 = m2lo_0\n\txma.lu\tm2lo_1 = upld_0, v2, m2hi_0\n(c1_0)\tadd\tres1 = rpin_0, a1_0, 1\n}\n{\n(p30)\tst8\t[sptr] = res4, 8\n(c1n_0)\tadd\tres1 = rpin_0, a1_0\n\txma.hu\tm2hi_1 = upld_0, v2, m2hi_0\n}\n\t;; \n\tC loop cycle 3\n{\n\tgetf.sig\ta3_3 = m3lo_0\n(p16)\tld8\trpin_3 = [rptr], 8\n\txma.lu\tm3lo_1 = upld_0, v3, m3hi_0\n}\n{\n(c2_0)\tadd\tres2 = res1, a2_0, 1\n(c2n_0)\tadd\tres2 = res1, a2_0\n\txma.hu\tm3hi_1 = upld_0, v3, m3hi_0\t\n}\n\t;; \n\tC loop cycle 4\n{\n\tgetf.sig\ta4_4 = m4lo_0\n\txma.lu\tm4lo_1 = upld_0, v4, m4hi_0\n(c1n_0)\tcmp.ltu\tc1_1, c1n_1 = res1, rpin_0\n}\n{\n(c3_0)\tadd\tres3 = res2, a3_0, 1\n(c3n_0)\tadd\tres3 = res2, a3_0\n\txma.hu\tm4hi_1 = upld_0, v4, m4hi_0\t\n}\n\t;; \n\tC loop cycle 5\n{\n(c1_0)\tcmp.leu\tc1_1, c1n_1 = res1, rpin_0\n(c4_0)\tadd\tres4 = res3, a4_0, 1\n(c4n_0)\tadd\tres4 = res3, a4_0\n}\n{\n(c2n_0)\tcmp.ltu\tc2_1, c2n_1 = res2, res1\n(c2_0)\tcmp.leu\tc2_1, c2n_1 = res2, res1\n(c3n_0)\tcmp.ltu\tc3_1, c3n_1 = res3, res2\n}\n\t;;\n\tC loop cycle 6\n{\n(c3_0)\tcmp.leu\tc3_1, c3n_1 = res3, res2\n\tmov\tupld_c = f0\n\tmov\trpin_c = 0\n}\n{\n(c4_0)\tcmp.leu\tc4_1, c4n_1 = res4, res3\n(c4n_0)\tcmp.ltu\tc4_1, c4n_1 = res4, res3\n\tbr.ctop.sptk.many\taddmul_4_main_loop\n}\n\t;;\n\t\n\t.auto\n\tmov\tpr = pr_store,-1\n\tmov\tar.lc = lc_store\n\tmov\tar.pfs = r2\n\tbr.ret.sptk.many\tb0\nEPILOGUE()\nASM_END()\n\n"
  },
  {
    "path": "mpn/ia64/and_n.asm",
    "content": "dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,\ndnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      2\nC Itanium 2:    1\n\nC TODO\nC  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in\nC    wind-down code).\n\nC INPUT PARAMETERS\ndefine(`rp', `r32')\ndefine(`up', `r33')\ndefine(`vp', `r34')\ndefine(`n', `r35')\n\ndefine(`OPERATION_and_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',`mpn_and_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',`mpn_andn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',`mpn_nand_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',`mpn_ior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',`mpn_iorn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $3, $2')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',`mpn_nior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',`mpn_xor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\trp = 0, rp\t\t\tC\t\t\tM I\n\taddp4\tup = 0, up\t\t\tC\t\t\tM I\n\taddp4\tvp = 0, vp\t\t\tC\t\t\tM I\n\tzxt4\tn = n\t\t\t\tC\t\t\tI\n\t;;\n')\n{.mmi\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}\n{.mmi\n\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p14 = 4, n\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n}\n{.mmi\n\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}\n{.bbb\n   (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n   (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n   (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n.Lb00:\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -2, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt4\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n.Lb01:\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt1\t\t\tC\t\t\tB\n\t;;\n\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj1\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.grt5\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL01\t\t\tC\t\t\tB\n\n.Lb10:\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt2\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lcj6\t\t\tC\t\t\tB\n\n.Lb11:\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt3\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL01:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL00:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL11:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t;;\t\tC\t\t\tB\nC *** MAIN LOOP END ***\n\n.Lcj6:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj3:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj2:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj1:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/andn_n.asm",
    "content": "dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,\ndnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      2\nC Itanium 2:    1\n\nC TODO\nC  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in\nC    wind-down code).\n\nC INPUT PARAMETERS\ndefine(`rp', `r32')\ndefine(`up', `r33')\ndefine(`vp', `r34')\ndefine(`n', `r35')\n\ndefine(`OPERATION_andn_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',`mpn_and_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',`mpn_andn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',`mpn_nand_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',`mpn_ior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',`mpn_iorn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $3, $2')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',`mpn_nior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',`mpn_xor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\trp = 0, rp\t\t\tC\t\t\tM I\n\taddp4\tup = 0, up\t\t\tC\t\t\tM I\n\taddp4\tvp = 0, vp\t\t\tC\t\t\tM I\n\tzxt4\tn = n\t\t\t\tC\t\t\tI\n\t;;\n')\n{.mmi\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}\n{.mmi\n\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p14 = 4, n\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n}\n{.mmi\n\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}\n{.bbb\n   (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n   (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n   (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n.Lb00:\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -2, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt4\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n.Lb01:\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt1\t\t\tC\t\t\tB\n\t;;\n\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj1\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.grt5\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL01\t\t\tC\t\t\tB\n\n.Lb10:\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt2\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lcj6\t\t\tC\t\t\tB\n\n.Lb11:\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt3\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL01:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL00:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL11:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t;;\t\tC\t\t\tB\nC *** MAIN LOOP END ***\n\n.Lcj6:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj3:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj2:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj1:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/copyd.asm",
    "content": "dnl  IA-64 mpn_copyd -- copy limb vector, decrementing.\n\ndnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC         cycles/limb\nC Itanium:    1\nC Itanium 2:  0.5\n\nC INPUT PARAMETERS\nC rp = r32\nC sp = r33\nC n = r34\n\nASM_START()\nPROLOGUE(mpn_copyd)\n\t.prologue\n\t.save ar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\t\tr32 = 0, r32\n\taddp4\t\tr33 = 0, r33\n\tsxt4\t\tr34 = r34\n\t;;\n')\n{.mmi\n\tshladd\t\tr32 = r34, 3, r32\n\tshladd\t\tr33 = r34, 3, r33\n\tmov.i\t\tr2 = ar.lc\n}\n{.mmi\n\tand\t\tr14 = 3, r34\n\tcmp.ge\t\tp14, p15 = 3, r34\n\tadd\t\tr34 = -4, r34\n\t;;\n}\n{.mmi\n\tcmp.eq\t\tp8, p0 = 1, r14\n\tcmp.eq\t\tp10, p0 = 2, r14\n\tcmp.eq\t\tp12, p0 = 3, r14\n}\n{.bbb\n  (p8)\tbr.dptk\t\t.Lb01\n  (p10)\tbr.dptk\t\t.Lb10\n  (p12)\tbr.dptk\t\t.Lb11\n}\n\n.Lb00:\tC  n = 0, 4, 8, 12, ...\n\tadd\t\tr32 = -8, r32\n\tadd\t\tr33 = -8, r33\n  (p14)\tbr.dptk\t\t.Ls00\n\t;;\n\tadd\t\tr21 = -8, r33\n\tld8\t\tr16 = [r33], -16\n\tshr\t\tr15 = r34, 2\n\t;;\n\tld8\t\tr17 = [r21], -16\n\tmov.i\t\tar.lc = r15\n\tld8\t\tr18 = [r33], -16\n\tadd\t\tr20 = -8, r32\n\t;;\n\tld8\t\tr19 = [r21], -16\n\tbr.cloop.dptk\t.Loop\n\t;;\n\tbr.sptk\t\t.Lend\n\t;;\n\n.Lb01:\tC  n = 1, 5, 9, 13, ...\n\tadd\t\tr21 = -8, r33\n\tadd\t\tr20 = -8, r32\n\tadd\t\tr33 = -16, r33\n\tadd\t\tr32 = -16, r32\n\t;;\n\tld8\t\tr19 = [r21], -16\n\tshr\t\tr15 = r34, 2\n  (p14)\tbr.dptk\t\t.Ls01\n\t;;\n\tld8\t\tr16 = [r33], -16\n\tmov.i\t\tar.lc = r15\n\t;;\n\tld8\t\tr17 = [r21], -16\n\tld8\t\tr18 = [r33], -16\n\tbr.sptk\t\t.Li01\n\t;;\n\n.Lb10:\tC  n = 2,6, 10, 14, ...\n\tadd\t\tr21 = -16, r33\n\tshr\t\tr15 = r34, 2\n\tadd\t\tr20 = -16, r32\n\tadd\t\tr32 = -8, r32\n\tadd\t\tr33 = -8, r33\n\t;;\n\tld8\t\tr18 = [r33], -16\n\tld8\t\tr19 = [r21], -16\n\tmov.i\t\tar.lc = r15\n  (p14)\tbr.dptk\t\t.Ls10\n\t;;\n\tld8\t\tr16 = [r33], -16\n\tld8\t\tr17 = [r21], -16\n\tbr.sptk\t\t.Li10\n\t;;\n\n.Lb11:\tC  n = 3, 7, 11, 15, ...\n\tadd\t\tr21 = -8, r33\n\tadd\t\tr20 = -8, r32\n\tadd\t\tr33 = -16, r33\n\tadd\t\tr32 = -16, r32\n\t;;\n\tld8\t\tr17 = [r21], -16\n\tshr\t\tr15 = r34, 2\n\t;;\n\tld8\t\tr18 = [r33], -16\n\tmov.i\t\tar.lc = r15\n\tld8\t\tr19 = [r21], -16\n  (p14)\tbr.dptk\t\t.Ls11\n\t;;\n\tld8\t\tr16 = [r33], -16\n\tbr.sptk\t\t.Li11\n\t;;\n\n\tALIGN(32)\n.Loop:\n.Li00:\n{.mmb\n\tst8\t\t[r32] = r16, -16\n\tld8\t\tr16 = [r33], -16\n\tnop.b\t\t0\n}\n.Li11:\n{.mmb\n\tst8\t\t[r20] = r17, -16\n\tld8\t\tr17 = [r21], -16\n\tnop.b\t\t0\n\t;;\n}\n.Li10:\n{.mmb\n\tst8\t\t[r32] = r18, -16\n\tld8\t\tr18 = [r33], -16\n\tnop.b\t\t0\n}\n.Li01:\n{.mmb\n\tst8\t\t[r20] = r19, -16\n\tld8\t\tr19 = [r21], -16\n\tbr.cloop.dptk\t.Loop\n\t;;\n}\n.Lend:\tst8\t\t[r32] = r16, -16\n.Ls11:\tst8\t\t[r20] = r17, -16\n\t;;\n.Ls10:\tst8\t\t[r32] = r18, -16\n.Ls01:\tst8\t\t[r20] = r19, -16\n.Ls00:\tmov.i\t\tar.lc = r2\n\tbr.ret.sptk.many b0\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/copyi.asm",
    "content": "dnl  IA-64 mpn_copyi -- copy limb vector, incrementing.\n\ndnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC         cycles/limb\nC Itanium:    1\nC Itanium 2:  0.5\n\nC INPUT PARAMETERS\nC rp = r32\nC sp = r33\nC n = r34\n\nASM_START()\nPROLOGUE(mpn_copyi)\n\t.prologue\n\t.save ar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\t\tr32 = 0, r32\n\taddp4\t\tr33 = 0, r33\n\tsxt4\t\tr34 = r34\n\t;;\n')\n{.mmi\n\tnop\t\t0\n\tnop\t\t0\n\tmov.i\t\tr2 = ar.lc\n}\n{.mmi\n\tand\t\tr14 = 3, r34\n\tcmp.ge\t\tp14, p15 = 3, r34\n\tadd\t\tr34 = -4, r34\n\t;;\n}\n{.mmi\n\tcmp.eq\t\tp8, p0 = 1, r14\n\tcmp.eq\t\tp10, p0 = 2, r14\n\tcmp.eq\t\tp12, p0 = 3, r14\n}\n{.bbb\n  (p8)\tbr.dptk\t\t.Lb01\n  (p10)\tbr.dptk\t\t.Lb10\n  (p12)\tbr.dptk\t\t.Lb11\n}\n\n.Lb00:\tC  n = 0, 4, 8, 12, ...\n  (p14)\tbr.dptk\t\t.Ls00\n\t;;\n\tadd\t\tr21 = 8, r33\n\tld8\t\tr16 = [r33], 16\n\tshr\t\tr15 = r34, 2\n\t;;\n\tld8\t\tr17 = [r21], 16\n\tmov.i\t\tar.lc = r15\n\tld8\t\tr18 = [r33], 16\n\tadd\t\tr20 = 8, r32\n\t;;\n\tld8\t\tr19 = [r21], 16\n\tbr.cloop.dptk\t.Loop\n\t;;\n\tbr.sptk\t\t.Lend\n\t;;\n\n.Lb01:\tC  n = 1, 5, 9, 13, ...\n\tadd\t\tr21 = 0, r33\n\tadd\t\tr20 = 0, r32\n\tadd\t\tr33 = 8, r33\n\tadd\t\tr32 = 8, r32\n\t;;\n\tld8\t\tr19 = [r21], 16\n\tshr\t\tr15 = r34, 2\n  (p14)\tbr.dptk\t\t.Ls01\n\t;;\n\tld8\t\tr16 = [r33], 16\n\tmov.i\t\tar.lc = r15\n\t;;\n\tld8\t\tr17 = [r21], 16\n\tld8\t\tr18 = [r33], 16\n\tbr.sptk\t\t.Li01\n\t;;\n\n.Lb10:\tC  n = 2,6, 10, 14, ...\n\tadd\t\tr21 = 8, r33\n\tadd\t\tr20 = 8, r32\n\tld8\t\tr18 = [r33], 16\n\tshr\t\tr15 = r34, 2\n\t;;\n\tld8\t\tr19 = [r21], 16\n\tmov.i\t\tar.lc = r15\n  (p14)\tbr.dptk\t\t.Ls10\n\t;;\n\tld8\t\tr16 = [r33], 16\n\tld8\t\tr17 = [r21], 16\n\tbr.sptk\t\t.Li10\n\t;;\n\n.Lb11:\tC  n = 3, 7, 11, 15, ...\n\tadd\t\tr21 = 0, r33\n\tadd\t\tr20 = 0, r32\n\tadd\t\tr33 = 8, r33\n\tadd\t\tr32 = 8, r32\n\t;;\n\tld8\t\tr17 = [r21], 16\n\tshr\t\tr15 = r34, 2\n\t;;\n\tld8\t\tr18 = [r33], 16\n\tmov.i\t\tar.lc = r15\n\tld8\t\tr19 = [r21], 16\n  (p14)\tbr.dptk\t\t.Ls11\n\t;;\n\tld8\t\tr16 = [r33], 16\n\tbr.sptk\t\t.Li11\n\t;;\n\n\tALIGN(32)\n.Loop:\n.Li00:\n{.mmb\n\tst8\t\t[r32] = r16, 16\n\tld8\t\tr16 = [r33], 16\n\tnop.b\t\t0\n}\n.Li11:\n{.mmb\n\tst8\t\t[r20] = r17, 16\n\tld8\t\tr17 = [r21], 16\n\tnop.b\t\t0\n\t;;\n}\n.Li10:\n{.mmb\n\tst8\t\t[r32] = r18, 16\n\tld8\t\tr18 = [r33], 16\n\tnop.b\t\t0\n}\n.Li01:\n{.mmb\n\tst8\t\t[r20] = r19, 16\n\tld8\t\tr19 = [r21], 16\n\tbr.cloop.dptk\t.Loop\n\t;;\n}\n.Lend:\tst8\t\t[r32] = r16, 16\n.Ls11:\tst8\t\t[r20] = r17, 16\n\t;;\n.Ls10:\tst8\t\t[r32] = r18, 16\n.Ls01:\tst8\t\t[r20] = r19, 16\n.Ls00:\tmov.i\t\tar.lc = r2\n\tbr.ret.sptk.many b0\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/divexact_1.asm",
    "content": "dnl  IA-64 mpn_divexact_1 -- mpn by limb exact division.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC            cycles/limb\nC Itanium:      16\nC Itanium 2:     8\n\nC INPUT PARAMETERS\ndefine(`rp', `r32')\ndefine(`up', `r33')\ndefine(`n',  `r34')\ndefine(`divisor', `r35')\n\ndefine(`lshift', `r24')\ndefine(`rshift', `r25')\n\nC This code is a bit messy, and not as similar to mode1o.asm as desired.\n\nC The critical path during initialization is for computing the inverse of the\nC divisor.  Since odd divisors are probably common, we conditionally execute\nC the initial count_traling_zeros code and the downshift.\n\nC Possible improvement: Merge more of the feed-in code into the inverse\nC computation.\n\nASM_START()\n\t.text\n\t.align\t32\n.Ltab:\ndata1\t0,0x01, 0,0xAB, 0,0xCD, 0,0xB7, 0,0x39, 0,0xA3, 0,0xC5, 0,0xEF\ndata1\t0,0xF1, 0,0x1B, 0,0x3D, 0,0xA7, 0,0x29, 0,0x13, 0,0x35, 0,0xDF\ndata1\t0,0xE1, 0,0x8B, 0,0xAD, 0,0x97, 0,0x19, 0,0x83, 0,0xA5, 0,0xCF\ndata1\t0,0xD1, 0,0xFB, 0,0x1D, 0,0x87, 0,0x09, 0,0xF3, 0,0x15, 0,0xBF\ndata1\t0,0xC1, 0,0x6B, 0,0x8D, 0,0x77, 0,0xF9, 0,0x63, 0,0x85, 0,0xAF\ndata1\t0,0xB1, 0,0xDB, 0,0xFD, 0,0x67, 0,0xE9, 0,0xD3, 0,0xF5, 0,0x9F\ndata1\t0,0xA1, 0,0x4B, 0,0x6D, 0,0x57, 0,0xD9, 0,0x43, 0,0x65, 0,0x8F\ndata1\t0,0x91, 0,0xBB, 0,0xDD, 0,0x47, 0,0xC9, 0,0xB3, 0,0xD5, 0,0x7F\ndata1\t0,0x81, 0,0x2B, 0,0x4D, 0,0x37, 0,0xB9, 0,0x23, 0,0x45, 0,0x6F\ndata1\t0,0x71, 0,0x9B, 0,0xBD, 0,0x27, 0,0xA9, 0,0x93, 0,0xB5, 0,0x5F\ndata1\t0,0x61, 0,0x0B, 0,0x2D, 0,0x17, 0,0x99, 0,0x03, 0,0x25, 0,0x4F\ndata1\t0,0x51, 0,0x7B, 0,0x9D, 0,0x07, 0,0x89, 0,0x73, 0,0x95, 0,0x3F\ndata1\t0,0x41, 0,0xEB, 0,0x0D, 0,0xF7, 0,0x79, 0,0xE3, 0,0x05, 0,0x2F\ndata1\t0,0x31, 0,0x5B, 0,0x7D, 0,0xE7, 0,0x69, 0,0x53, 0,0x75, 0,0x1F\ndata1\t0,0x21, 0,0xCB, 0,0xED, 0,0xD7, 0,0x59, 0,0xC3, 0,0xE5, 0,0x0F\ndata1\t0,0x11, 0,0x3B, 0,0x5D, 0,0xC7, 0,0x49, 0,0x33, 0,0x55, 0,0xFF\n\n\nPROLOGUE(mpn_divexact_1)\n\t.prologue\n\t.save\t\tar.lc, r2\n\t.body\n\n {.mmi;\tadd\t\tr8 = -1, divisor\tC M0\n\tnop\t\t0\t\t\tC M1\n\ttbit.z\t\tp8, p9 = divisor, 0\tC I0\n}\nifdef(`HAVE_ABI_32',\n`\taddp4\t\trp = 0, rp\t\tC M2  rp extend\n\taddp4\t\tup = 0, up\t\tC M3  up extend\n\tsxt4\t\tn = n')\t\t\tC I1  size extend\n\t;;\n.Lhere:\n {.mmi;\tld8\t\tr20 = [up], 8\t\tC M0  up[0]\n  (p8)\tandcm\t\tr8 = r8, divisor\tC M1\n\tmov\t\tr15 = ip\t\tC I0  .Lhere\n\t;;\n}{.mii\n\t.pred.rel \"mutex\", p8, p9\n  (p9)\tmov\t\trshift = 0\t\tC M0\n  (p8)\tpopcnt\t\trshift = r8\t\tC I0 r8 = cnt_lo_zeros(divisor)\n\tcmp.eq\t\tp6, p10 = 1, n\t\tC I1\n\t;;\n}{.mii;\tadd\t\tr9 = .Ltab-.Lhere, r15\tC M0\n  (p8)\tshr.u\t\tdivisor = divisor, rshift C I0\n\tnop\t\t0\t\t\tC I1\n\t;;\n}{.mmi;\tadd\t\tn = -4, n\t\tC M0  size-1\n  (p10)\tld8\t\tr21 = [up], 8\t\tC M1  up[1]\n\tmov\t\tr14 = 2\t\t\tC M1  2\n}{.mfi;\tsetf.sig\tf6 = divisor\t\tC M2  divisor\n\tmov\t\tf9 = f0\t\t\tC M3  carry\t\tFIXME\n\tzxt1\t\tr3 = divisor\t\tC I1  divisor low byte\n\t;;\n}{.mmi;\tadd\t\tr3 = r9, r3\t\tC M0  table offset ip and index\n\tsub\t\tr16 = 0, divisor\tC M1  -divisor\n\tmov\t\tr2 = ar.lc\t\tC I0\n}{.mmi;\tsub\t\tlshift = 64, rshift\tC M2\n\tsetf.sig\tf13 = r14\t\tC M3  2 in significand\n\tmov\t\tr17 = -1\t\tC I1  -1\n\t;;\n}{.mmi;\tld1\t\tr3 = [r3]\t\tC M0  inverse, 8 bits\n\tnop\t\t0\t\t\tC M1\n\tmov\t\tar.lc = n\t\tC I0  size-1 loop count\n}{.mmi;\tsetf.sig\tf12 = r16\t\tC M2  -divisor\n\tsetf.sig\tf8 = r17\t\tC M3  -1\n\tcmp.eq\t\tp7, p0 = -2, n\t\tC I1\n\t;;\n}{.mmi;\tsetf.sig\tf7 = r3\t\t\tC M2  inverse, 8 bits\n\tcmp.eq\t\tp8, p0 = -1, n\t\tC M0\n\tshr.u\t\tr23 = r20, rshift\tC I0\n\t;;\n}\n\n\tC f6\tdivisor\n\tC f7\tinverse, being calculated\n\tC f8\t-1, will be -inverse\n\tC f9\tcarry\n\tC f12\t-divisor\n\tC f13\t2\n\tC f14\tscratch\n\n\txmpy.l\t\tf14 = f13, f7\t\tC Newton 2*i\n\txmpy.l\t\tf7 = f7, f7\t\tC Newton i*i\n\t;;\n\txma.l\t\tf7 = f7, f12, f14\tC Newton i*i*-d + 2*i, 16 bits\n\t;;\n\tsetf.sig\tf10 = r23\t\tC speculative, used iff n = 1\n\txmpy.l\t\tf14 = f13, f7\t\tC Newton 2*i\n\tshl\t\tr22 = r21, lshift\tC speculative, used iff n > 1\n\txmpy.l\t\tf7 = f7, f7\t\tC Newton i*i\n\t;;\n\tor\t\tr31 = r22, r23\t\tC speculative, used iff n > 1\n\txma.l\t\tf7 = f7, f12, f14\tC Newton i*i*-d + 2*i, 32 bits\n\tshr.u\t\tr23 = r21, rshift\tC speculative, used iff n > 1\n\t;;\n\tsetf.sig\tf11 = r31\t\tC speculative, used iff n > 1\n\txmpy.l\t\tf14 = f13, f7\t\tC Newton 2*i\n\txmpy.l\t\tf7 = f7, f7\t\tC Newton i*i\n\t;;\n\txma.l\t\tf7 = f7, f12, f14\tC Newton i*i*-d + 2*i, 64 bits\n\n  (p7)\tbr.cond.dptk\t.Ln2\n  (p10)\tbr.cond.dptk\t.grt3\n\t;;\n\n.Ln1:\txmpy.l\t\tf12 = f10, f7\t\tC q = ulimb * inverse\n\tbr\t\t.Lx1\n\n.Ln2:\n\txmpy.l\t\tf8 = f7, f8\t\tC -inverse = inverse * -1\n\txmpy.l\t\tf12 = f11, f7\t\tC q = ulimb * inverse\n\tsetf.sig\tf11 = r23\n\tbr\t\t.Lx2\n\n.grt3:\n\tld8\t\tr21 = [up], 8\t\tC up[2]\n\txmpy.l\t\tf8 = f7, f8\t\tC -inverse = inverse * -1\n\t;;\n\tshl\t\tr22 = r21, lshift\n\t;;\n\txmpy.l\t\tf12 = f11, f7\t\tC q = ulimb * inverse\n\t;;\n\tor\t\tr31 = r22, r23\n\tshr.u\t\tr23 = r21, rshift\n\t;;\n\tsetf.sig\tf11 = r31\n  (p8)\tbr.cond.dptk\t.Lx3\t\t\tC branch for n = 3\n\t;;\n\tld8\t\tr21 = [up], 8\n\tbr\t\t.Lent\n\n.Loop:\tld8\t\tr21 = [up], 8\n\txma.l\t\tf12 = f9, f8, f10\tC q = c * -inverse + si\n\t;;\n.Lent:\tadd\t\tr16 = 160, up\n\tshl\t\tr22 = r21, lshift\n\t;;\n\tstf8\t\t[rp] = f12, 8\n\txma.hu\t\tf9 = f12, f6, f9\tC c = high(q * divisor + c)\n\txmpy.l\t\tf10 = f11, f7\t\tC si = ulimb * inverse\n\t;;\n\tor\t\tr31 = r22, r23\n\tshr.u\t\tr23 = r21, rshift\n\t;;\n\tlfetch\t\t[r16]\n\tsetf.sig\tf11 = r31\n\tbr.cloop.sptk.few.clr .Loop\n\n\n\txma.l\t\tf12 = f9, f8, f10\tC q = c * -inverse + si\n\t;;\n.Lx3:\tstf8\t\t[rp] = f12, 8\n\txma.hu\t\tf9 = f12, f6, f9\tC c = high(q * divisor + c)\n\txmpy.l\t\tf10 = f11, f7\t\tC si = ulimb * inverse\n\t;;\n\tsetf.sig\tf11 = r23\n\t;;\n\txma.l\t\tf12 = f9, f8, f10\tC q = c * -inverse + si\n\t;;\n.Lx2:\tstf8\t\t[rp] = f12, 8\n\txma.hu\t\tf9 = f12, f6, f9\tC c = high(q * divisor + c)\n\txmpy.l\t\tf10 = f11, f7\t\tC si = ulimb * inverse\n\t;;\n\txma.l\t\tf12 = f9, f8, f10\tC q = c * -inverse + si\n\t;;\n.Lx1:\tstf8\t\t[rp] = f12, 8\n\tmov\t\tar.lc = r2\t\tC I0\n\tbr.ret.sptk.many b0\nEPILOGUE()\n"
  },
  {
    "path": "mpn/ia64/divrem_2.asm",
    "content": "dnl  IA-64 mpn_divrem_2 -- Divide an n-limb number by a 2-limb number.\n\ndnl  Copyright 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC         cycles/limb\nC Itanium:    63\nC Itanium 2:  46\n\n\nC TODO\nC  * Further optimize the loop.  We could probably do some more trickery with\nC    arithmetic in the FPU, or perhaps use a non-zero addend of xma in more\nC    places.\nC  * Software pipeline for perhaps 5 saved cycles, around the end and start of\nC    the loop.\nC  * Schedule code outside of loop better.\nC  * Update the comments.  They are now using the same name for the same\nC    logical quantity.\nC  * Handle conditional zeroing of r31 in loop more cleanly.\nC  * Inline mpn_invert_limb and schedule its insns across the entire init code.\nC  * Ultimately, use 2-limb, or perhaps 3-limb or 4-limb inverse.\n\ndefine(`qp',`r32')\ndefine(`qxn',`r33')\ndefine(`np',`r34')\ndefine(`nn',`r35')\ndefine(`dp',`r36')\n\ndefine(`fnh',`f11')\ndefine(`fminus1',`f10')\ndefine(`fd0',`f13')\ndefine(`fd1',`f14')\ndefine(`d0',`r39')\ndefine(`d1',`r36')\ndefine(`fnl',`f32')\ndefine(`fdinv',`f12')\n\ndefine(`R1',`r38') define(`R0',`r37')\ndefine(`P1',`r28') define(`P0',`r27')\n\nASM_START()\n\nC HP's assembler requires these declarations for importing mpn_invert_limb\n\t.global\tmpn_invert_limb\n\t.type\tmpn_invert_limb,@function\n\nPROLOGUE(mpn_divrem_2)\n\t.prologue\n\t.save ar.pfs, r42\n\t.save ar.lc, r44\n\t.save rp, r41\nifdef(`HAVE_ABI_32',\n`\taddp4\t\tqp = 0, qp\t\tC M I\n\taddp4\t\tnp = 0, np\t\tC M I\n\taddp4\t\tdp = 0, dp\t\tC M I\n\tzxt4\t\tnn = nn\t\t\tC I\n\tzxt4\t\tqxn = qxn\t\tC I\n\t;;\n')\n\n\talloc\t\tr42 = ar.pfs, 5,8,1,0\tC M2\n\tld8\t\td0 = [dp], 8\t\tC M0M1\td0\n\tmov\t\tr44 = ar.lc\t\tC I0\n\tshladd\t\tnp = nn, 3, np\t\tC M I\n\t;;\n\tld8\t\td1 = [dp]\t\tC M0M1\td1\n\tmov\t\tr41 = b0\t\tC I0\n\tadd\t\tr15 = -8, np\t\tC M I\n\tadd\t\tnp = -16, np\t\tC M I\n\tmov\t\tr40 = r0\t\tC M I\n\t;;\n\tld8\t\tR1 = [r15]\t\tC M0M1\tn1\n\tld8\t\tR0 = [r34], -8\t\tC M0M1\tn0\n\t;;\n\tcmp.ltu\t\tp6, p0 = d1, R1\t\tC M I\n\tcmp.eq\t\tp8, p0 = d1, R1\t\tC M I\n\t;;\n  (p8)\tcmp.leu\t\tp6, p0 = d0, R0\n\tcmp.ltu\t\tp8, p9 = R0, d0\n  (p6)\tbr.cond.dpnt\t.L_high_limb_1\t\tC FIXME: inline!\n.L8:\n\n\tmov\t\tr45 = d1\n\tbr.call.sptk.many b0 = mpn_invert_limb\tC FIXME: inline+schedule\n\t;;\n\tsetf.sig\tfd1 = d1\t\tC d1\n\tsetf.sig\tfd0 = d0\t\tC d0\n\tadd\t\tr14 = r33, r35\t\tC nn + qxn\n\t;;\n\tsetf.sig\tfdinv = r8\t\tC dinv\n\tmov\t\tr9 = -1\n\tadd\t\tr35 = -3, r14\n\t;;\n\tsetf.sig\tfminus1 = r9\n\tcmp.gt\t\tp6, p0 = r0, r35\n\tshladd\t\tqp = r35, 3, qp\n\tmov\t\tar.lc = r35\n\tmov\t\tr31 = 0\t\t\tC n0\n  (p6)\tbr.cond.dpnt\t.Ldone\n\t;;\n\tALIGN(16)\nC *** MAIN LOOP START ***\n.Loop:\t\tC 00\n\tmov\t\tr15 = R0\t\tC nadj = n10\n\tcmp.le\t\tp14, p15 = 0, R0\tC check high bit of R0\n\tcmp.le\t\tp8, p0 = r33, r35\tC dividend limbs remaining?\n\t;;\tC 01\n\t.pred.rel \"mutex\", p14, p15\n  (p8)\tld8\t\tr31 = [r34], -8\t\tC n0\n  (p15)\tadd\t\tr15 = d1, R0\t\tC nadj = n10 + d1\n  (p15)\tadd\t\tr14 = 1, R1\t\tC nh + (nl:63)\n  (p14)\tmov\t\tr14 = R1\t\tC nh\n\tcmp.eq\t\tp6, p0 = d1, R1\t\tC nh == d1\n  (p6)\tbr.cond.spnt\t.L_R1_eq_d1\n\t;;\tC 02\n\tsetf.sig\tf8 = r14\t\tC n2 + (nl:63)\n\tsetf.sig\tf15 = r15\t\tC nadj\n\tsub\t\tr23 = -1, R1\t\tC r23 = ~nh\n\t;;\tC 03\n\tsetf.sig\tfnh = r23\n\tsetf.sig\tfnl = R0\n\t;;\tC 08\n\txma.hu\t\tf7 = fdinv, f8, f15\tC xh = HI(dinv*(nh-nmask)+nadj)\n\t;;\tC 12\n\txma.l\t\tf7 = f7, fminus1, fnh\tC nh + xh\n\t;;\tC 16\n\tgetf.sig\tr14 = f7\n\txma.hu\t\tf9 = f7, fd1, fnl\tC xh = HI(q1*d1+nl)\n\txma.l\t\tf33 = f7, fd1, fnl\tC xh = LO(q1*d1+nl)\n\t;;\tC 20\n\tgetf.sig\tr16 = f9\n\tsub\t\tr24 = d1, R1\n\t\tC 21\n\tgetf.sig\tr17 = f33\n\t;;\tC 25\n\tcmp.eq\t\tp6, p7 = r16, r24\n\t;;\tC 26\n\t.pred.rel \"mutex\", p6, p7\n  (p6)\txma.l\t\tf8 = f7, fminus1, f0\tC f8 = -f7\n  (p7)\txma.l\t\tf8 = f7,fminus1,fminus1\tC f8 = -f7-1\n\t;;\tC 27\n\t.pred.rel \"mutex\", p6, p7\n  (p6)\tsub\t\tr18 = 0, r14\t\tC q = -q1\n  (p7)\tsub\t\tr18 = -1, r14\t\tC q = -q1-1\n  (p6)\tadd\t\tr14 = 0, r17\t\tC n1 = xl\n  (p7)\tadd\t\tr14 = d1, r17\t\tC n1 = xl + d1\n\t;;\tC 30\n\txma.hu\t\tf9 = fd0, f8, f0\tC d0*(-f7-1) = -d0*f7-d0\n\txma.l\t\tf35 = fd0, f8, f0\n\t;;\tC 34\n\tgetf.sig\tP1 = f9\t\tC P1\n\t\tC 35\n\tgetf.sig\tP0 = f35\t\tC P0\n\t;;\n.L_adj:\t\tC 40\n\tcmp.ltu\t\tp8, p0 = r31, P0\tC p8 = cy from low limb\n\tcmp.ltu\t\tp6, p0 = r14, P1\tC p6 = prel cy from high limb\n\tsub\t\tR0 = r31, P0\n\tsub\t\tR1 = r14, P1\n\t;;\tC 41\n  (p8)\tcmp.eq.or\tp6, p0 = 0, R1\t\tC p6 = final cy from high limb\n  (p8)\tadd\t\tR1 = -1, R1\n\tcmp.ne\t\tp10, p0 = r0, r0\tC clear p10 FIXME: use unc below!\n\tcmp.ne\t\tp13, p0 = r0, r0\tC clear p13 FIXME: use unc below!\n\t;;\tC 42\n  (p6)\tadd\t\tR0 = R0, d0\n  (p6)\tadd\t\tR1 = R1, d1\n  (p6)\tadd\t\tr18 = -1, r18\t\tC q--\n\t;;\tC 43\n  (p6)\tcmp.ltu\t\tp10, p0 = R0, d0\n  (p6)\tcmp.ltu\t\tp0, p13 = R1, d1\n\t;;\tC 44\n  (p10)\tcmp.ne.and\tp0, p13 = -1, R1\tC p13 = !cy\n  (p10)\tadd\t\tR1 = 1, R1\n  (p13)\tbr.cond.spnt\t.L_two_too_big\t\tC jump if not cy\n\t;;\tC 45\n\tst8\t\t[qp] = r18, -8\n\tadd\t\tr35 = -1, r35\n\tmov\t\tr31 = 0\t\t\tC n0, next iteration\n\tbr.cloop.sptk\t.Loop\nC *** MAIN LOOP END ***\n\t;;\n.Ldone:\n\tmov\t\tr8 = r40\n\tmov\t\tb0 = r41\n\tadd\t\tr21 = 8, r34\n\tadd\t\tr22 = 16, r34\n\t;;\n\tst8\t\t[r21] = R0\n\tst8\t\t[r22] = R1\n\tmov\t\tar.pfs = r42\n\tmov\t\tar.lc = r44\n\tbr.ret.sptk.many b0\n\n.L_high_limb_1:\n\t.pred.rel \"mutex\", p8, p9\n\tsub\t\tR0 = R0, d0\n  (p8)\tsub\t\tR1 = R1, d1, 1\n  (p9)\tsub\t\tR1 = R1, d1\n\tmov\t\tr40 = 1\n\tbr.sptk\t\t.L8\n\t;;\n\n.L_two_too_big:\n\tadd\t\tR0 = R0, d0\n\tadd\t\tR1 = R1, d1\n\t;;\n\tadd\t\tr18 = -1, r18\t\tC q--\n\tcmp.ltu\t\tp10, p0 = R0, d0\n\t;;\n  (p10)\tadd\t\tR1 = 1, R1\n\tst8\t\t[qp] = r18, -8\n\tadd\t\tr35 = -1, r35\n\tmov\t\tr31 = 0\t\t\tC n0, next iteration\n\tbr.cloop.sptk\t.Loop\n\tbr.sptk\t\t.Ldone\n\n.L_R1_eq_d1:\n\tadd\t\tr14 = R0, d1\t\tC r = R0 + d1\n\tmov\t\tr18 = -1\t\tC q = -1\n\t;;\n\tcmp.leu\t\tp6, p0 = R0, r14\n (p6)\tbr.cond.spnt\t.L20\t\t\tC jump unless cy\n\t;;\n\tsub\t\tP1 = r14, d0\n\tadd\t\tR0 = r31, d0\n\t;;\n\tcmp.ltu\t\tp8, p9 = R0, r31\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\tst8\t\t[qp] = r18, -8\n  (p8)\tadd\t\tR1 = r0, P1, 1\t\tC R1 = n1 - P1 - cy\n  (p9)\tadd\t\tR1 = r0, P1\t\tC R1 = n1 - P1\n\tadd\t\tr35 = -1, r35\n\tmov\t\tr31 = 0\t\t\tC n0, next iteration\n\tbr.cloop.sptk\t.Loop\n\tbr.sptk\t\t.Ldone\n\t;;\n.L20:\tcmp.ne\t\tp6, p7 = 0, d0\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n  (p6)\tadd\t\tP1 = -1, d0\n  (p7)\tmov\t\tP1 = d0\n\tsub\t\tP0 = r0, d0\n\tbr.sptk\t\t.L_adj\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/divrem_euclidean_qr_1.asm",
    "content": "dnl  IA-64 mpn_divrem_euclidean_qr_1 and mpn_preinv_divrem_1 -- Divide an mpn number by an\ndnl  unnormalized limb.\n\ndnl  Copyright 2002, 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\n\nC         cycles/limb\nC Itanium:    40-42\nC Itanium 2:  29-30\n\nC This was generated by gcc, then the loops were optimized.  The preinv entry\nC point was shoehorned into the file.  Lots of things outside the loops could\nC be streamlined.  It would probably be a good idea to merge the loops for\nC normalized and unnormalized divisor, since the shifting stuff is done for\nC free in parallel with other operations.  It would even be possible to merge\nC all loops, if the ld8 were made conditional.\n\nC TODO\nC  * Consider delaying inversion for normalized mpn_divrem_1 entry till after\nC    computing leading limb.\nC  * Inline and interleave limb inversion code with loop setup code.\n\nASM_START()\n\nC HP's assembler requires these declarations for importing mpn_invert_limb\n\t.global\tmpn_invert_limb\n\t.type\tmpn_invert_limb,@function\n\nC INPUT PARAMETERS\nC rp    = r32\nC qxn   = r33\nC up    = r34\nC n     = r35\nC vl    = r36\nC vlinv = r37  (preinv only)\nC cnt = r38    (preinv only)\n\nPROLOGUE(mpn_preinv_divrem_1)\n\t.prologue\n\t.save\tar.pfs, r42\n\talloc\t\tr42 = ar.pfs, 7, 8, 1, 0\n\t.save\tar.lc, r44\n\tmov\t\tr44 = ar.lc\n\t.save\trp, r41\n\tmov\t\tr41 = b0\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\t\tr32 = 0, r32\n\tsxt4\t\tr33 = r33\n\taddp4\t\tr34 = 0, r34\n\tsxt4\t\tr35 = r35\n\t;;\n')\n\tmov\t\tr40 = r38\n\tshladd\t\tr34 = r35, 3, r34\n\t;;\n\tadds\t\tr34 = -8, r34\n\t;;\n\tld8\t\tr39 = [r34], -8\n\t;;\n\n\tadd\t\tr15 = r35, r33\n\t;;\n\tmov\t\tr8 = r37\n\tshladd\t\tr32 = r15, 3, r32\tC r32 = rp + n + qxn\n\tcmp.le\t\tp8, p0 = 0, r36\n\t;;\n\tadds\t\tr32 = -8, r32\t\tC r32 = rp + n + qxn - 1\n\tcmp.leu\t\tp6, p7 = r36, r39\n   (p8)\tbr.cond.dpnt\t.Lpunnorm\n\t;;\n\n   (p6)\taddl\t\tr15 = 1, r0\n   (p7)\tmov\t\tr15 = r0\n\t;;\n   (p6)\tsub\t\tr38 = r39, r36\n   (p7)\tmov\t\tr38 = r39\n\tst8\t\t[r32] = r15, -8\n\tadds\t\tr35 = -2, r35\t\tC un -= 2\n\tbr\t.Lpn\n\n.Lpunnorm:\n   (p6)\tadd\t\tr34 = 8, r34\n\tmov\t\tr38 = 0\t\t\tC r = 0\n\tshl\t\tr36 = r36, r40\n   (p6)\tbr.cond.dptk\t.Lpu\n\t;;\n\tshl\t\tr38 = r39, r40\t\tC r = ahigh << cnt\n\tcmp.ne\t\tp8, p0 = 1, r35\n\tst8\t\t[r32] = r0, -8\n\tadds\t\tr35 = -1, r35\t\tC un--\n   (p8)\tbr.cond.dpnt\t.Lpu\n\n\tmov\t\tr23 = 1\n\t;;\n\tsetf.sig\tf6 = r8\n\tsetf.sig\tf12 = r23\n\tbr\t\t.L435\nEPILOGUE()\n\n\nPROLOGUE(mpn_divrem_euclidean_qr_1)\n\t.prologue\n\t.save\tar.pfs, r42\n\talloc\t\tr42 = ar.pfs, 5, 8, 1, 0\n\t.save\tar.lc, r44\n\tmov\t\tr44 = ar.lc\n\t.save\trp, r41\n\tmov\t\tr41 = b0\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\t\tr32 = 0, r32\n\tsxt4\t\tr33 = r33\n\taddp4\t\tr34 = 0, r34\n\tsxt4\t\tr35 = r35\n\t;;\n')\n\tmov\t\tr38 = r0\n\tadd\t\tr15 = r35, r33\n\t;;\n\tcmp.ne\t\tp6, p7 = 0, r15\n\t;;\n   (p7)\tmov\t\tr8 = r0\n   (p7)\tbr.cond.dpnt\t.Lret\n\tshladd\t\tr14 = r15, 3, r32\tC r14 = rp + n + qxn\n\tcmp.le\t\tp6, p7 = 0, r36\n\t;;\n\tadds\t\tr32 = -8, r14\t\tC r32 = rp + n + qxn - 1\n   (p6)\tbr.cond.dpnt\t.Lunnorm\n\tcmp.eq\t\tp6, p7 = 0, r35\n   (p6)\tbr.cond.dpnt\t.L179\n\tshladd\t\tr14 = r35, 3, r34\n\t;;\n\tadds\t\tr14 = -8, r14\n\tadds\t\tr35 = -1, r35\n\t;;\n\tld8\t\tr38 = [r14]\n\t;;\n\tcmp.leu\t\tp6, p7 = r36, r38\n\t;;\n   (p6)\taddl\t\tr15 = 1, r0\n   (p7)\tmov\t\tr15 = r0\n\t;;\n\tst8\t\t[r32] = r15, -8\n  (p6)\tsub\t\tr38 = r38, r36\n\n.L179:\n\tmov\t\tr45 = r36\n\tadds\t\tr35 = -1, r35\n\tbr.call.sptk.many b0 = mpn_invert_limb\n\t;;\n\tshladd\t\tr34 = r35, 3, r34\n.Lpn:\n\tmov\t\tr23 = 1\n\t;;\n\tsetf.sig\tf6 = r8\n\tsetf.sig\tf12 = r23\n\tcmp.le\t\tp6, p7 = 0, r35\n\tmov\t\tr40 = 0\n   (p7)\tbr.cond.dpnt\t.L435\n\tsetf.sig\tf10 = r36\n\tmov\t\tar.lc = r35\n\tsetf.sig\tf7 = r38\n\t;;\n\tsub\t\tr28 = -1, r36\nC Develop quotient limbs for normalized divisor\n.Loop1:\t\tC 00\t\t\t\tC q=r18 nh=r38/f7\n\tld8\t\tr20 = [r34], -8\n\txma.hu\t\tf11 = f7, f6, f0\n\t;;\tC 04\n\txma.l\t\tf8 = f11, f12, f7\tC q = q + nh\n\t;;\tC 08\n\tgetf.sig\tr18 = f8\n\txma.hu\t\tf9 = f8, f10, f0\n\txma.l\t\tf8 = f8, f10, f0\n\t;;\tC 12\n\tgetf.sig\tr16 = f9\n\t\tC 13\n\tgetf.sig\tr15 = f8\n\t;;\tC 18\n\tcmp.ltu\t\tp6, p7 = r20, r15\n\tsub\t\tr15 = r20, r15\n\tsub\t\tr16 = r38, r16\n\t;;\tC 19\n   (p6)\tcmp.ne\t\tp8, p9 = 1, r16\t\tC is rH != 0?\n   (p7)\tcmp.ne\t\tp8, p9 = 0, r16\t\tC is rH != 0?\n   (p6)\tadd\t\tr16 = -1, r16\n   (p0)\tcmp.ne.unc\tp6, p7 = r0, r0\n\t;;\tC 20\n   (p8)\tcmp.ltu\t\tp6, p7 = r15, r36\n   (p8)\tsub\t\tr15 = r15, r36\n   (p8)\tadd\t\tr18 = 1, r18\t\tC q = q + 1;\tdone if: rH > 0\n\t;;\tC 21\n\t.pred.rel \"mutex\",p6,p7\n   (p6)\tcmp.ne\t\tp8, p9 = 1, r16\t\tC is rH != 0 still?\n   (p7)\tcmp.ne\t\tp8, p9 = 0, r16\t\tC is rH != 0 still?\n\tcmp.ltu\t\tp6, p7 = r15, r36\tC speculative\n\tsub\t\tr28 = r15, r36\t\tC speculative, just for cmp\n\t;;\tC 22\n   (p8)\tcmp.ltu\t\tp6, p7 = r28, r36\tC redo last cmp if needed\n   (p8)\tmov\t\tr15 = r28\n   (p8)\tadd\t\tr18 = 1, r18\t\tC q = q + 1;\tdone if: rH > 0\n\t;;\tC 23\n   (p6)\tsetf.sig\tf7 = r15\n   (p7)\tsub\t\tr15 = r15, r36\n   (p7)\tadd\t\tr18 = 1, r18\t\tC q = q + 1;\tdone if: rH > 0\n\t;;\tC 24\n   (p7)\tsetf.sig\tf7 = r15\n\tst8\t\t[r32] = r18, -8\n\tmov\t\tr38 = r15\n\tbr.cloop.dptk\t.Loop1\n\t\tC 29/30\n\tbr.sptk\t\t.L435\n\t;;\n.Lunnorm:\n\tmux1\t\tr16 = r36, @rev\n\tcmp.eq\t\tp6, p7 = 0, r35\n   (p6)\tbr.cond.dpnt\t.L322\n\tshladd\t\tr34 = r35, 3, r34\n\t;;\n\tadds\t\tr34 = -8, r34\n\t;;\n\tld8\t\tr39 = [r34]\n\t;;\n\tcmp.leu\t\tp6, p7 = r36, r39\n   (p6)\tbr.cond.dptk\t.L322\n\tadds\t\tr34 = -8, r34\n\t;;\n\tmov\t\tr38 = r39\n\t;;\n\tcmp.ne\t\tp6, p7 = 1, r15\n\tst8\t\t[r32] = r0, -8\n\t;;\n   (p7)\tmov\t\tr8 = r38\n   (p7)\tbr.cond.dpnt\t.Lret\n\tadds\t\tr35 = -1, r35\n.L322:\n\tsub\t\tr14 = r0, r16\n\t;;\n\tor\t\tr14 = r16, r14\n\t;;\n\tmov\t\tr16 = -8\n\tczx1.l\t\tr14 = r14\n\t;;\n\tshladd\t\tr16 = r14, 3, r16\n\t;;\n\tshr.u\t\tr14 = r36, r16\n\t;;\n\tcmp.geu\t\tp6, p7 = 15, r14\n\t;;\n   (p7)\tshr.u\t\tr14 = r14, 4\n   (p7)\tadds\t\tr16 = 4, r16\n\t;;\n\tcmp.geu\t\tp6, p7 = 3, r14\n\t;;\n   (p7)\tshr.u\t\tr14 = r14, 2\n   (p7)\tadds\t\tr16 = 2, r16\n\t;;\n\ttbit.nz\t\tp6, p7 = r14, 1\n\t;;\n\t.pred.rel \"mutex\",p6,p7\n  (p6)\tsub\t\tr40 = 62, r16\n  (p7)\tsub\t\tr40 = 63, r16\n\t;;\n\tshl\t\tr45 = r36, r40\n\tshl\t\tr36 = r36, r40\n\tshl\t\tr38 = r38, r40\n\tbr.call.sptk.many b0 = mpn_invert_limb\n\t;;\n.Lpu:\n\tmov\t\tr23 = 1\n\t;;\n\tsetf.sig\tf6 = r8\n\tsetf.sig\tf12 = r23\n\tcmp.eq\t\tp6, p7 = 0, r35\n   (p6)\tbr.cond.dpnt\t.L435\n\tsub\t\tr16 = 64, r40\n\tadds\t\tr35 = -2, r35\n\t;;\n\tld8\t\tr39 = [r34], -8\n\tcmp.le\t\tp6, p7 = 0, r35\n\t;;\n\tshr.u\t\tr14 = r39, r16\n\t;;\n\tor\t\tr38 = r14, r38\n   (p7)\tbr.cond.dpnt\t.Lend3\n\t;;\n\tmov\t\tr22 = r16\n\tsetf.sig\tf10 = r36\n\tsetf.sig\tf7 = r38\n\tmov\t\tar.lc = r35\n\t;;\nC Develop quotient limbs for unnormalized divisor\n.Loop3:\n\tld8\t\tr14 = [r34], -8\n\txma.hu\t\tf11 = f7, f6, f0\n\t;;\n\txma.l\t\tf8 = f11, f12, f7\tC q = q + nh\n\t;;\n\tgetf.sig\tr18 = f8\n\txma.hu\t\tf9 = f8, f10, f0\n\tshl\t\tr20 = r39, r40\n\txma.l\t\tf8 = f8, f10, f0\n\tshr.u\t\tr24 = r14, r22\n\t;;\n\tgetf.sig\tr16 = f9\n\tgetf.sig\tr15 = f8\n\tor\t\tr20 = r24, r20\n\t;;\n\tcmp.ltu\t\tp6, p7 = r20, r15\n\tsub\t\tr15 = r20, r15\n\tsub\t\tr16 = r38, r16\n\t;;\n   (p6)\tcmp.ne\t\tp8, p9 = 1, r16\t\tC is rH != 0?\n   (p7)\tcmp.ne\t\tp8, p9 = 0, r16\t\tC is rH != 0?\n   (p6)\tadd\t\tr16 = -1, r16\n   (p0)\tcmp.ne.unc\tp6, p7 = r0, r0\n\t;;\n   (p8)\tcmp.ltu\t\tp6, p7 = r15, r36\n   (p8)\tsub\t\tr15 = r15, r36\n   (p8)\tadd\t\tr18 = 1, r18\t\tC q = q + 1;\tdone if: rH > 0\n\t;;\n\t.pred.rel \"mutex\",p6,p7\n   (p6)\tcmp.ne\t\tp8, p9 = 1, r16\t\tC is rH != 0 still?\n   (p7)\tcmp.ne\t\tp8, p9 = 0, r16\t\tC is rH != 0 still?\n\tcmp.ltu\t\tp6, p7 = r15, r36\tC speculative\n\tsub\t\tr28 = r15, r36\t\tC speculative, just for cmp\n\t;;\n   (p8)\tcmp.ltu\t\tp6, p7 = r28, r36\tC redo last cmp if needed\n   (p8)\tmov\t\tr15 = r28\n   (p8)\tadd\t\tr18 = 1, r18\t\tC q = q + 1;\tdone if: rH > 0\n\t;;\n   (p6)\tsetf.sig\tf7 = r15\n   (p7)\tsub\t\tr15 = r15, r36\n   (p7)\tadd\t\tr18 = 1, r18\t\tC q = q + 1;\tdone if: rH > 0\n\t;;\n   (p7)\tsetf.sig\tf7 = r15\n\tst8\t\t[r32] = r18, -8\n\tmov\t\tr39 = r14\n\tmov\t\tr38 = r15\n\tbr.cloop.dptk\t.Loop3\n\t;;\n.Lend3:\n\tsetf.sig\tf10 = r36\n\tsetf.sig\tf7 = r38\n\t;;\n\txma.hu\t\tf11 = f7, f6, f0\n\t;;\n\txma.l\t\tf8 = f11, f12, f7\tC q = q + nh\n\t;;\n\tgetf.sig\tr18 = f8\n\txma.hu\t\tf9 = f8, f10, f0\n\tshl\t\tr20 = r39, r40\n\txma.l\t\tf8 = f8, f10, f0\n\t;;\n\tgetf.sig\tr16 = f9\n\tgetf.sig\tr15 = f8\n\t;;\n\tcmp.ltu\t\tp6, p7 = r20, r15\n\tsub\t\tr15 = r20, r15\n\tsub\t\tr16 = r38, r16\n\t;;\n   (p6)\tcmp.ne\t\tp8, p9 = 1, r16\t\tC is rH != 0?\n   (p7)\tcmp.ne\t\tp8, p9 = 0, r16\t\tC is rH != 0?\n   (p6)\tadd\t\tr16 = -1, r16\n   (p0)\tcmp.ne.unc\tp6, p7 = r0, r0\n\t;;\n   (p8)\tcmp.ltu\t\tp6, p7 = r15, r36\n   (p8)\tsub\t\tr15 = r15, r36\n   (p8)\tadd\t\tr18 = 1, r18\t\tC q = q + 1;\tdone if: rH > 0\n\t;;\n\t.pred.rel \"mutex\",p6,p7\n   (p6)\tcmp.ne\t\tp8, p9 = 1, r16\t\tC is rH != 0 still?\n   (p7)\tcmp.ne\t\tp8, p9 = 0, r16\t\tC is rH != 0 still?\n\t;;\n   (p8)\tsub\t\tr15 = r15, r36\n   (p8)\tadd\t\tr18 = 1, r18\t\tC q = q + 1;\tdone if: rH > 0\n\t;;\n\tcmp.ltu\t\tp6, p7 = r15, r36\n\t;;\n   (p7)\tsub\t\tr15 = r15, r36\n   (p7)\tadd\t\tr18 = 1, r18\t\tC q = q + 1;\tdone if: rH > 0\n\t;;\n\tst8\t\t[r32] = r18, -8\n\tmov\t\tr38 = r15\n.L435:\n\tadds\t\tr35 = -1, r33\n\tcmp.le\t\tp6, p7 = 1, r33\n   (p7)\tbr.cond.dpnt\t.Lend4\n\t;;\n\tsetf.sig\tf7 = r38\n\tsetf.sig\tf10 = r36\n\tmov\t\tar.lc = r35\n\t;;\n.Loop4:\n\txma.hu\t\tf11 = f7, f6, f0\n\t;;\n\txma.l\t\tf8 = f11, f12, f7\tC q = q + nh\n\t;;\n\tgetf.sig\tr18 = f8\n\txma.hu\t\tf9 = f8, f10, f0\n\txma.l\t\tf8 = f8, f10, f0\n\t;;\n\tgetf.sig\tr16 = f9\n\tgetf.sig\tr15 = f8\n\t;;\n\tcmp.ltu\t\tp6, p7 = 0, r15\n\tsub\t\tr15 = 0, r15\n\tsub\t\tr16 = r38, r16\n\t;;\n   (p6)\tcmp.ne\t\tp8, p9 = 1, r16\t\tC is rH != 0?\n   (p7)\tcmp.ne\t\tp8, p9 = 0, r16\t\tC is rH != 0?\n   (p6)\tadd\t\tr16 = -1, r16\n   (p0)\tcmp.ne.unc\tp6, p7 = r0, r0\n\t;;\n   (p8)\tcmp.ltu\t\tp6, p7 = r15, r36\n   (p8)\tsub\t\tr15 = r15, r36\n   (p8)\tadd\t\tr18 = 1, r18\t\tC q = q + 1;\tdone if: rH > 0\n\t;;\n\t.pred.rel \"mutex\",p6,p7\n   (p6)\tcmp.ne\t\tp8, p9 = 1, r16\t\tC is rH != 0 still?\n   (p7)\tcmp.ne\t\tp8, p9 = 0, r16\t\tC is rH != 0 still?\n\tcmp.ltu\t\tp6, p7 = r15, r36\tC speculative\n\tsub\t\tr28 = r15, r36\t\tC speculative, just for cmp\n\t;;\n   (p8)\tcmp.ltu\t\tp6, p7 = r28, r36\tC redo last cmp if needed\n   (p8)\tmov\t\tr15 = r28\n   (p8)\tadd\t\tr18 = 1, r18\t\tC q = q + 1;\tdone if: rH > 0\n\t;;\n   (p6)\tsetf.sig\tf7 = r15\n   (p7)\tsub\t\tr15 = r15, r36\n   (p7)\tadd\t\tr18 = 1, r18\t\tC q = q + 1;\tdone if: rH > 0\n\t;;\n   (p7)\tsetf.sig\tf7 = r15\n\tst8\t\t[r32] = r18, -8\n\tmov\t\tr38 = r15\n\tbr.cloop.dptk\t.Loop4\n\t;;\n.Lend4:\n\tshr.u\t\tr8 = r38, r40\n.Lret:\n\tmov\t\tar.pfs = r42\n\tmov\t\tar.lc = r44\n\tmov\t\tb0 = r41\n\tbr.ret.sptk.many b0\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/gcd_1.asm",
    "content": "dnl  Itanium-2 mpn_gcd_1 -- mpn by 1 gcd.\n\ndnl  Copyright 2002, 2003, 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\n\nC           cycles/bitpair (1x1 gcd)\nC Itanium:      14 (approx)\nC Itanium 2:     6.3\n\n\nC mpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y);\nC\nC The entry sequence is designed to expect xsize>1 and hence a modexact\nC call.  This ought to be more common than a 1x1 operation.  Our critical\nC path is thus stripping factors of 2 from y, calling modexact, then\nC stripping factors of 2 from the x remainder returned.\nC\nC The common factors of 2 between x and y must be determined using the\nC original x, not the remainder from the modexact.  This is done with\nC x_orig which is xp[0].  There's plenty of time to do this while the rest\nC of the modexact etc is happening.\nC\nC It's possible xp[0] is zero.  In this case the trailing zeros calculation\nC popc((x-1)&~x) gives 63, and that's clearly no less than what y will\nC have, making min(x_twos,y_twos) == y_twos.\nC\nC The main loop consists of transforming x,y to abs(x-y),min(x,y), and then\nC stripping factors of 2 from abs(x-y).  Those factors of two are\nC determined from just y-x, without the abs(), since there's the same\nC number of trailing zeros on n or -n in twos complement.  That makes the\nC dependent chain\nC\nC\tcycles\nC\t  1    sub     x-y and x-y-1\nC\t  3    andcm   (x-y-1)&~(x-y)\nC\t  2    popcnt  trailing zeros\nC\t  3    shr.u   strip abs(x-y)\nC\t ---\nC\t  9\nC\nC The selection of x-y versus y-x for abs(x-y), and the selection of the\nC minimum of x and y, is done in parallel with the above.\nC\nC The algorithm takes about 0.68 iterations per bit (two N bit operands) on\nC average, hence the final 6.3 cycles/bitpair.\nC\nC The loop is not as fast as one might hope, since there's extra latency\nC from andcm going across to the `multimedia' popcnt, and vice versa from\nC multimedia shr.u back to the integer sub.\nC\nC The loop branch is .sptk.clr since we usually expect a good number of\nC iterations, and the iterations are data dependent so it's unlikely past\nC results will predict anything much about the future.\nC\nC Not done:\nC\nC An alternate algorithm which didn't strip all twos, but instead applied\nC tbit and predicated extr on x, and then y, was attempted.  The loop was 6\nC cycles, but the algorithm is an average 1.25 iterations per bitpair for a\nC total 7.25 c/bp, which is slower than the current approach.\nC\nC Alternatives:\nC\nC Perhaps we could do something tricky by extracting a few high bits and a\nC few low bits from the operands, and looking up a table which would give a\nC set of predicates to control some shifts or subtracts or whatever.  That\nC could knock off multiple bits per iteration.\nC\nC The right shifts are a bit of a bottleneck (shr at 2 or 3 cycles, or extr\nC only going down I0), perhaps it'd be possible to shift left instead,\nC using add.  That would mean keeping track of the lowest not-yet-zeroed\nC bit, using some sort of mask.\nC\nC Itanium-1:\nC\nC This code is not designed for itanium-1 and in fact doesn't run well on\nC that chip.  The loop seems to be about 21 cycles, probably because we end\nC up with a 10 cycle replay for not forcibly scheduling the shr.u latency.\nC Lack of branch hints might introduce a couple of bubbles too.\nC\n\nASM_START()\n\t.explicit\t\t\t\tC What does this mean?\n\nC HP's assembler requires these declarations for importing mpn_modexact_1c_odd\n\t.global\tmpn_modexact_1c_odd\n\t.type\tmpn_modexact_1c_odd,@function\n\nPROLOGUE(mpn_gcd_1)\n\n\t\tC r32\txp\n\t\tC r33\txsize\n\t\tC r34\ty\n\ndefine(x,           r8)\ndefine(xp_orig,     r32)\ndefine(xsize,       r33)\ndefine(y,           r34)  define(inputs, 3)\ndefine(save_rp,     r35)\ndefine(save_pfs,    r36)\ndefine(x_orig,      r37)\ndefine(x_orig_one,  r38)\ndefine(y_twos,      r39)  define(locals, 5)\ndefine(out_xp,      r40)\ndefine(out_xsize,   r41)\ndefine(out_divisor, r42)\ndefine(out_carry,   r43)  define(outputs, 4)\n\n\t.prologue\n{ .mmi;\nifdef(`HAVE_ABI_32',\n`\t\taddp4\tr9 = 0, xp_orig   define(xp,r9)',\tC M0\n`\t\t\t\t\t  define(xp,xp_orig)')\n\t.save ar.pfs, save_pfs\n\t\talloc\tsave_pfs = ar.pfs, inputs, locals, outputs, 0 C M2\n\t.save rp, save_rp\n\t\tmov\tsave_rp = b0\t\tC I0\n}{\t.body\n\t\tadd\tr10 = -1, y\t\tC M3  y-1\n}\t\t;;\n\n{ .mmi;\t\tld8\tx = [xp]\t\tC M0  x = xp[0] if no modexact\n\t\tld8\tx_orig = [xp]\t\tC M1  orig x for common twos\n\t\tcmp.ne\tp6,p0 = 1, xsize\tC I0\n}{ .mmi;\tandcm\ty_twos = r10, y\t\tC M2  (y-1)&~y\n\t\tmov\tout_xp = xp_orig\tC M3\n\t\tmov\tout_xsize = xsize\tC I1\n}\t\t;;\n\n\t\tmov\tout_carry = 0\n\n\t\tC\n\n\t\tpopcnt\ty_twos = y_twos\t\tC I0  y twos\n\t\t;;\n\n\t\tC\n\n{ .mmi;\t\tadd\tx_orig_one = -1, x_orig\tC M0  orig x-1\n\t\tshr.u\tout_divisor = y, y_twos\tC I0  y without twos\n}{\t\tshr.u\ty = y, y_twos\t\tC I1  y without twos\n\t(p6)\tbr.call.sptk.many b0 = mpn_modexact_1c_odd  C if xsize>1\n}\t\t;;\n\n\t\tC modexact can leave x==0\n{ .mmi;\t\tcmp.eq\tp6,p0 = 0, x\t\tC M0  if {xp,xsize} % y == 0\n\t\tandcm\tx_orig = x_orig_one, x_orig\tC M1  orig (x-1)&~x\n\t\tadd\tr9 = -1, x\t\tC I0  x-1\n}\t\t;;\n\n{ .mmi;\t\tandcm\tr9 = r9, x\t\tC M0  (x-1)&~x\n\t\tmov\tb0 = save_rp\t\tC I0\n}\t\t;;\n\n\t\tC\n\n\t\tpopcnt\tx_orig = x_orig\t\tC I0  orig x twos\n\n\t\tpopcnt\tr9 = r9\t\t\tC I0  x twos\n\t\t;;\n\n\t\tC\n\n{\t\tcmp.lt\tp7,p0 = x_orig, y_twos\tC M0  orig x_twos < y_twos\n\t\tshr.u\tx = x, r9\t\tC I0  x odd\n}\t\t;;\n\n{\t(p7)\tmov\ty_twos = x_orig\t\tC M0  common twos\n\t\tadd\tr10 = -1, y\t\tC I0  y-1\n\t(p6)\tbr.dpnt.few .Ldone_y\t\tC B0  x%y==0 then result y\n}\t\t;;\n\n\t\tC\n\n\n\t\tC No noticable difference in speed for the loop aligned to\n\t\tC 32 or just 16.\n.Ltop:\n\t\tC r8\tx\n\t\tC r10  y-1\n\t\tC r34\ty\n\t\tC r38\tcommon twos, for use at end\n\n{  .mmi;\tcmp.gtu\tp8,p9 = x, y\tC M0  x>y\n\t\tcmp.ne\tp10,p0 = x, y\tC M1  x==y\n\t\tsub\tr9 = y, x\tC I0  d = y - x\n}{ .mmi;\tsub\tr10 = r10, x\tC M2  d-1 = y - x - 1\n}\t\t;;\n\n{ .mmi;\t.pred.rel \"mutex\", p8, p9\n\t(p8)\tsub\tx = x, y\tC M0  x>y  use x=x-y, y unchanged\n\t(p9)\tmov\ty = x\t\tC M1  y>=x use y=x\n\t(p9)\tmov\tx = r9\t\tC I0  y>=x use x=y-x\n}{ .mmi;\tandcm\tr9 = r10, r9\tC M2  (d-1)&~d\n\t\t;;\n\n\t\tadd\tr10 = -1, y\tC M0  new y-1\n\t\tpopcnt\tr9 = r9\t\tC I0  twos on x-y\n}\t\t;;\n\n{\t\tshr.u\tx = x, r9\tC I0   new x without twos\n\t(p10)\tbr.sptk.few.clr .Ltop\n}\t\t;;\n\n\n\n\t\tC result is y\n.Ldone_y:\n\t\tshl\tr8 = y, y_twos\t\tC I   common factors of 2\n\t\t;;\n\t\tmov\tar.pfs = save_pfs\tC I0\n\t\tbr.ret.sptk.many b0\n\nEPILOGUE()\n"
  },
  {
    "path": "mpn/ia64/hamdist.asm",
    "content": "dnl  IA-64 mpn_hamdist -- mpn hamming distance.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:       2\nC Itanium 2:     1\n\nC INPUT PARAMETERS\ndefine(`up', `r32')\ndefine(`vp', `r33')\ndefine(`n', `r34')\n\ndefine(`u0',`r16') define(`u1',`r17') define(`u2',`r18') define(`u3',`r19')\ndefine(`v0',`r20') define(`v1',`r21') define(`v2',`r22') define(`v3',`r23')\ndefine(`x0',`r24') define(`x1',`r25') define(`x2',`r26') define(`x3',`r27')\ndefine(`c0',`r28') define(`c1',`r29') define(`c2',`r30') define(`c3',`r31')\ndefine(`s',`r8')\n\n\nASM_START()\nPROLOGUE(mpn_hamdist)\n\t.prologue\nifdef(`HAVE_ABI_32',\n`\taddp4\t\tup = 0, up\t\tC\t\t\tM I\n\taddp4\t\tvp = 0, vp\t\tC\t\t\tM I\n\tzxt4\t\tn = n\t\t\tC\t\t\tI\n\t;;\n')\n\n {.mmi;\tld8\t\tr10 = [up], 8\t\tC load first ulimb\tM01\n\tld8\t\tr11 = [vp], 8\t\tC load first vlimb\tM01\n\tmov.i\t\tr2 = ar.lc\t\tC save ar.lc\t\tI0\n}{.mmi;\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p0 = 4, n\t\tC small count?\t\tM I\n\tadd\t\tn = -5, n\t\tC\t\t\tM I\n\t;;\n}{.mmi;\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}{.bbb\n  (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n  (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n  (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n\n.Lb00:\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\txor\t\tx0 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\txor\t\tx1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\txor\t\tx2 = u2, v2\t\tC\t\t\tM I\n\tmov\t\ts = 0\t\t\tC\t\t\tM I\n  (p15)\tbr.cond.dptk\t.grt4\t\t\tC\t\t\tB\n\t;;\n\tpopcnt\t\tc0 = x0\t\t\tC\t\t\tI0\n\txor\t\tx3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tpopcnt\t\tc1 = x1\t\t\tC\t\t\tI0\n\t;;\n\tpopcnt\t\tc2 = x2\t\t\tC\t\t\tI0\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\txor\t\tx1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\txor\t\tx2 = u2, v2\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc0 = x0\t\t\tC\t\t\tI0\n\txor\t\tx3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc1 = x1\t\t\tC\t\t\tI0\n\txor\t\tx0 = u0, v0\t\tC\t\t\tM I\n\tbr.cloop.dpnt\t.grt8\t\t\tC\t\t\tB\n\n\tpopcnt\t\tc2 = x2\t\t\tC\t\t\tI0\n\txor\t\tx1 = u1, v1\t\tC\t\t\tM I\n\tbr\t\t.Lcj8\t\t\tC\t\t\tB\n\n.grt8:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc2 = x2\t\t\tC\t\t\tI0\n\txor\t\tx1 = u1, v1\t\tC\t\t\tM I\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n\n.Lb01:\txor\t\tx3 = r10, r11\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n  (p15)\tbr.cond.dptk\t.grt1\t\t\tC\t\t\tB\n\t;;\n\tpopcnt\t\tr8 = x3\t\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tmov\t\ts = 0\t\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\txor\t\tx0 = u0, v0\t\tC\t\t\tM I\n\tbr.cloop.dpnt\t.grt5\t\t\tC\t\t\tB\n\n\txor\t\tx1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tpopcnt\t\tc3 = x3\t\t\tC\t\t\tI0\n\txor\t\tx2 = u2, v2\t\tC\t\t\tM I\n\t;;\n\tpopcnt\t\tc0 = x0\t\t\tC\t\t\tI0\n\txor\t\tx3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tpopcnt\t\tc1 = x1\t\t\tC\t\t\tI0\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\txor\t\tx1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc3 = x3\t\t\tC\t\t\tI0\n\txor\t\tx2 = u2, v2\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc0 = x0\t\t\tC\t\t\tI0\n\txor\t\tx3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc1 = x1\t\t\tC\t\t\tI0\n\txor\t\tx0 = u0, v0\t\tC\t\t\tM I\n\tbr.cloop.dpnt\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lend\t\t\tC\t\t\tB\n\n\n.Lb10:\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\txor\t\tx2 = r10, r11\t\tC\t\t\tM I\n  (p15)\tbr.cond.dptk\t.grt2\t\t\tC\t\t\tB\n\t;;\n\txor\t\tx3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tpopcnt\t\tc2 = x2\t\t\tC\t\t\tI0\n\t;;\n\tpopcnt\t\tc3 = x3\t\t\tC\t\t\tI0\n\t;;\n\tadd\t\ts = c2, c3\t\tC\t\t\tM I\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tmov\t\ts = 0\t\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\txor\t\tx3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\txor\t\tx0 = u0, v0\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.grt6\t\t\tC\t\t\tB\n\n\tpopcnt\t\tc2 = x2\t\t\tC\t\t\tI0\n\txor\t\tx1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tpopcnt\t\tc3 = x3\t\t\tC\t\t\tI0\n\txor\t\tx2 = u2, v2\t\tC\t\t\tM I\n\t;;\n\tpopcnt\t\tc0 = x0\t\t\tC\t\t\tI0\n\txor\t\tx3 = u3, v3\t\tC\t\t\tM I\n\tbr\t\t.Lcj6\t\t\tC\t\t\tB\n\n.grt6:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc2 = x2\t\t\tC\t\t\tI0\n\txor\t\tx1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc3 = x3\t\t\tC\t\t\tI0\n\txor\t\tx2 = u2, v2\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc0 = x0\t\t\tC\t\t\tI0\n\txor\t\tx3 = u3, v3\t\tC\t\t\tM I\n\tbr\t\t.LL10\t\t\tC\t\t\tB\n\n\n.Lb11:\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\txor\t\tx1 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\txor\t\tx2 = u2, v2\t\tC\t\t\tM I\n  (p15)\tbr.cond.dptk\t.grt3\t\t\tC\t\t\tB\n\t;;\n\txor\t\tx3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tpopcnt\t\tc1 = x1\t\t\tC\t\t\tI0\n\t;;\n\tpopcnt\t\tc2 = x2\t\t\tC\t\t\tI0\n\t;;\n\tpopcnt\t\tc3 = x3\t\t\tC\t\t\tI0\n\t;;\n\tadd\t\ts = c1, c2\t\tC\t\t\tM I\n\t;;\n\tadd\t\ts = s, c3\t\tC\t\t\tM I\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tmov\t\ts = 0\t\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\txor\t\tx3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc1 = x1\t\t\tC\t\t\tI0\n\txor\t\tx0 = u0, v0\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.grt7\t\t\tC\t\t\tB\n\tpopcnt\t\tc2 = x2\t\t\tC\t\t\tI0\n\txor\t\tx1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tpopcnt\t\tc3 = x3\t\t\tC\t\t\tI0\n\txor\t\tx2 = u2, v2\t\tC\t\t\tM I\n\tbr\t\t.Lcj7\t\t\tC\t\t\tB\n\n.grt7:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc2 = x2\t\t\tC\t\t\tI0\n\txor\t\tx1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc3 = x3\t\t\tC\t\t\tI0\n\txor\t\tx2 = u2, v2\t\tC\t\t\tM I\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\n\n\tALIGN(32)\n.Loop:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc2 = x2\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c3\t\tC\t\t\tM I\n\txor\t\tx1 = u1, v1\t\tC\t\t\tM I\n\tnop.b\t\t1\t\t\tC\t\t\t-\n\t;;\n.LL00:\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc3 = x3\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c0\t\tC\t\t\tM I\n\txor\t\tx2 = u2, v2\t\tC\t\t\tM I\n\tnop.b\t\t1\t\t\tC\t\t\t-\n\t;;\n.LL11:\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc0 = x0\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c1\t\tC\t\t\tM I\n\txor\t\tx3 = u3, v3\t\tC\t\t\tM I\n\tnop.b\t\t1\t\t\tC\t\t\t-\n\t;;\n.LL10:\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc1 = x1\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c2\t\tC\t\t\tM I\n\txor\t\tx0 = u0, v0\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\t;;\n\n.Lend:\tpopcnt\t\tc2 = x2\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c3\t\tC\t\t\tM I\n\txor\t\tx1 = u1, v1\t\tC\t\t\tM I\n\t;;\n.Lcj8:\tpopcnt\t\tc3 = x3\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c0\t\tC\t\t\tM I\n\txor\t\tx2 = u2, v2\t\tC\t\t\tM I\n\t;;\n.Lcj7:\tpopcnt\t\tc0 = x0\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c1\t\tC\t\t\tM I\n\txor\t\tx3 = u3, v3\t\tC\t\t\tM I\n\t;;\n.Lcj6:\tpopcnt\t\tc1 = x1\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c2\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tpopcnt\t\tc2 = x2\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c3\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tpopcnt\t\tc3 = x3\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c0\t\tC\t\t\tM I\n\t;;\n\tadd\t\ts = s, c1\t\tC\t\t\tM I\n\t;;\n\tadd\t\ts = s, c2\t\tC\t\t\tM I\n\t;;\n\tadd\t\ts = s, c3\t\tC\t\t\tM I\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/ia64-defs.m4",
    "content": "divert(-1)\n\n\ndnl  Copyright 2000, 2002, 2003 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\n\ndnl  ia64 assembler comments are C++ style \"//\" to the end of line.  gas\ndnl  also accepts \"#\" as a comment, if it's the first non-blank on a line.\ndnl\ndnl  BSD m4 can't handle a multi-character comment like \"//\" (see notes in\ndnl  mpn/asm-defs.m4).  For now the default \"#\" is left, but with care taken\ndnl  not to put any macros after \"foo#\" (since of course they won't expand).\n\n\ndefine(`ASM_START',\nm4_assert_numargs(0)\n`')\n\n\ndnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)\ndnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)\ndnl\ndnl  32-byte alignment is used for the benefit of itanium-2, where the code\ndnl  fetcher will only take 2 bundles from a 32-byte aligned target.  At\ndnl  16mod32 it only reads 1 in the first cycle.  This might not make any\ndnl  difference if the rotate buffers are full or there's other work holding\ndnl  up execution, but we use 32-bytes to give the best chance of peak\ndnl  throughput.\ndnl\ndnl  We can use .align here despite the gas bug noted in mpn/ia64/README,\ndnl  since we're not expecting to execute across a PROLOGUE(), at least not\ndnl  currently.\n\ndefine(`PROLOGUE_cpu',\nm4_assert_numargs(1)\n\t`\n\t.text\n\t.align\t32\n\t.global\t$1#\n\t.proc\t$1#\n$1:')\n\ndefine(`EPILOGUE_cpu',\nm4_assert_numargs(1)\n\t`\n\t.endp\t$1#\n')\n\ndefine(`DATASTART',\n\t`dnl\n\tDATA\n$1:')\ndefine(`DATAEND',`dnl')\n\ndefine(`ASM_END',`dnl')\n\n\ndnl  Usage: ALIGN(bytes)\ndnl\ndnl  Emit a \".align\" directive.  \"bytes\" is eval()ed, so can be an\ndnl  expression.\ndnl\ndnl  This version overrides the definition in mpn/asm-defs.m4.  We supress\ndnl  any .align if the gas byte-swapped-nops bug was detected by configure\ndnl  GMP_ASM_IA64_ALIGN_OK.\n\ndefine(`ALIGN',\nm4_assert_numargs(1)\nm4_assert_defined(`IA64_ALIGN_OK')\n`ifelse(IA64_ALIGN_OK,no,,\n`.align\teval($1)')')\n\n\ndnl  Usage: ASSERT([pr] [,code])\ndnl\ndnl  Require that the given predictate register is true after executing the\ndnl  test code.  For example,\ndnl\ndnl         ASSERT(p6,\ndnl         `       cmp.eq  p6,p0 = r3, r4')\ndnl\ndnl  If the predicate register argument is empty then nothing is tested, the\ndnl  code is just executed.  This can be used for setups required by later\ndnl  ASSERTs.  The code argument can be omitted to just test a predicate\ndnl  with no special setup code.\ndnl\ndnl  For convenience, stops are inserted before and after the code emitted.\n\ndefine(ASSERT,\nm4_assert_numargs_range(1,2)\nm4_assert_defined(`WANT_ASSERT')\n`ifelse(WANT_ASSERT,1,\n`\t;;\nifelse(`$2',,,\n`$2\n\t;;\n')\nifelse(`$1',,,\n`($1)\tbr\t.LASSERTok`'ASSERT_label_counter ;;\n\tcmp.ne\tp6,p6 = r0, r0\tC illegal instruction\n\t;;\n.LASSERTok`'ASSERT_label_counter:\ndefine(`ASSERT_label_counter',eval(ASSERT_label_counter+1))\n')\n')')\ndefine(`ASSERT_label_counter',1)\n\n\ndivert\n"
  },
  {
    "path": "mpn/ia64/invert_limb.asm",
    "content": "dnl  IA-64 mpn_invert_limb -- Invert a normalized limb.\n\ndnl  Copyright 2000, 2002, 2004 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC d = r32\n\nC           cycles\nC Itanium:    74\nC Itanium 2:  50+6\n\nC It should be possible to avoid the xmpy.hu and the following tests by\nC explicitly chopping in the last fma.  That would save about 10 cycles.\n\nASM_START()\n\t.sdata\n\t.align 16\nifdef(`HAVE_DOUBLE_IEEE_LITTLE_ENDIAN',`\n.LC0:\tdata4 0x00000000, 0x80000000, 0x0000403f, 0x00000000\tC 2^64\n.LC1:\tdata4 0x00000000, 0x80000000, 0x0000407f, 0x00000000\tC 2^128\n\n',`ifdef(`HAVE_DOUBLE_IEEE_BIG_ENDIAN',`\n.LC0:\tdata4 0x403f8000, 0x00000000, 0x00000000, 0x00000000\tC 2^64\n.LC1:\tdata4 0x407f8000, 0x00000000, 0x00000000, 0x00000000\tC 2^128\n\n',`m4_error(`Oops, need to know float endianness\n')')')\n\n\nPROLOGUE(mpn_invert_limb)\n\t\tC 00\n\taddl\t\tr14 = @gprel(.LC0), gp\n\taddl\t\tr15 = @gprel(.LC1), gp\n\tsetf.sig\tf7 = r32\n\tadd\t\tr9 = r32, r32\t\tC check for d = 2^63\n\t;;\tC 01\n\tldfe\t\tf10 = [r14]\t\tC 2^64\n\tldfe\t\tf8 = [r15]\t\tC 2^128\n\tcmp.eq\t\tp6, p0 = 0, r9\t\tC check for d = 2^63\n\tmov\t\tr8 = -1\t\t\tC retval for 2^63\n   (p6)\tbr.ret.spnt.many b0\n\t;;\tC 07\n\tfmpy.s1\t\tf11 = f7, f10\t\tC f11 = d * 2^64\n\tfnma.s1\t\tf6 = f7, f10, f8\tC f6 = 2^128 - d * 2^64\n\t;;\tC 11\n\tfrcpa.s1\tf8, p6 = f6, f7\n\t;;\tC 15\n   (p6)\tfnma.s1\t\tf9 = f7, f8, f1\n   (p6)\tfmpy.s1\t\tf10 = f6, f8\n\t;;\tC 19\n   (p6)\tfmpy.s1\t\tf11 = f9, f9\n   (p6)\tfma.s1\t\tf10 = f9, f10, f10\n\t;;\tC 23\n   (p6)\tfma.s1\t\tf8 = f9, f8, f8\n   (p6)\tfma.s1\t\tf9 = f11, f10, f10\n\t;;\tC 27\n   (p6)\tfma.s1\t\tf8 = f11, f8, f8\n   (p6)\tfnma.s1\t\tf10 = f7, f9, f6\n\t;;\tC 31\n   (p6)\tfma.s1\t\tf8 = f10, f8, f9\n\t;;\tC 35\n\tfcvt.fxu.trunc.s1 f8 = f8\n\t;;\tC 39\n\tgetf.sig\tr8 = f8\n\txmpy.hu\t\tf10 = f8, f7\t\tC di * d\n\t;;\tC 43\n\tgetf.sig\tr14 = f10\n\tandcm\t\tr9 = -1, r32\t\tC one's complement\n\t;;\tC 48\n\tcmp.ltu\t\tp6, p0 = r9, r14\tC got overflow?\n\t;;\tC 49\n   (p6)\tadd\t\tr8 = -1, r8\t\tC adjust di down\n\tbr.ret.sptk.many b0\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/ior_n.asm",
    "content": "dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,\ndnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      2\nC Itanium 2:    1\n\nC TODO\nC  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in\nC    wind-down code).\n\nC INPUT PARAMETERS\ndefine(`rp', `r32')\ndefine(`up', `r33')\ndefine(`vp', `r34')\ndefine(`n', `r35')\n\ndefine(`OPERATION_ior_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',`mpn_and_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',`mpn_andn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',`mpn_nand_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',`mpn_ior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',`mpn_iorn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $3, $2')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',`mpn_nior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',`mpn_xor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\trp = 0, rp\t\t\tC\t\t\tM I\n\taddp4\tup = 0, up\t\t\tC\t\t\tM I\n\taddp4\tvp = 0, vp\t\t\tC\t\t\tM I\n\tzxt4\tn = n\t\t\t\tC\t\t\tI\n\t;;\n')\n{.mmi\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}\n{.mmi\n\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p14 = 4, n\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n}\n{.mmi\n\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}\n{.bbb\n   (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n   (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n   (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n.Lb00:\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -2, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt4\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n.Lb01:\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt1\t\t\tC\t\t\tB\n\t;;\n\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj1\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.grt5\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL01\t\t\tC\t\t\tB\n\n.Lb10:\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt2\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lcj6\t\t\tC\t\t\tB\n\n.Lb11:\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt3\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL01:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL00:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL11:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t;;\t\tC\t\t\tB\nC *** MAIN LOOP END ***\n\n.Lcj6:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj3:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj2:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj1:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/iorn_n.asm",
    "content": "dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,\ndnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      2\nC Itanium 2:    1\n\nC TODO\nC  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in\nC    wind-down code).\n\nC INPUT PARAMETERS\ndefine(`rp', `r32')\ndefine(`up', `r33')\ndefine(`vp', `r34')\ndefine(`n', `r35')\n\ndefine(`OPERATION_iorn_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',`mpn_and_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',`mpn_andn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',`mpn_nand_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',`mpn_ior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',`mpn_iorn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $3, $2')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',`mpn_nior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',`mpn_xor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\trp = 0, rp\t\t\tC\t\t\tM I\n\taddp4\tup = 0, up\t\t\tC\t\t\tM I\n\taddp4\tvp = 0, vp\t\t\tC\t\t\tM I\n\tzxt4\tn = n\t\t\t\tC\t\t\tI\n\t;;\n')\n{.mmi\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}\n{.mmi\n\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p14 = 4, n\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n}\n{.mmi\n\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}\n{.bbb\n   (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n   (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n   (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n.Lb00:\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -2, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt4\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n.Lb01:\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt1\t\t\tC\t\t\tB\n\t;;\n\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj1\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.grt5\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL01\t\t\tC\t\t\tB\n\n.Lb10:\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt2\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lcj6\t\t\tC\t\t\tB\n\n.Lb11:\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt3\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL01:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL00:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL11:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t;;\t\tC\t\t\tB\nC *** MAIN LOOP END ***\n\n.Lcj6:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj3:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj2:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj1:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/longlong_inc.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 Free Software Foundation, Inc.\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/* This form encourages gcc (pre-release 3.4 at least) to emit predicated\n   \"sub r=r,r\" and \"sub r=r,r,1\", giving a 2 cycle latency.  The generic\n   code using \"al<bl\" arithmetically comes out making an actual 0 or 1 in a\n   register, which takes an extra cycle.  */\n#define sub_ddmmss(sh, sl, ah, al, bh, bl)      \\\n  do {                                          \\\n    UWtype __x;                                 \\\n    __x = (al) - (bl);                          \\\n    if ((al) < (bl))                            \\\n      (sh) = (ah) - (bh) - 1;                   \\\n    else                                        \\\n      (sh) = (ah) - (bh);                       \\\n    (sl) = __x;                                 \\\n  } while (0)\n#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)\n/* Do both product parts in assembly, since that gives better code with\n   all gcc versions.  Some callers will just use the upper part, and in\n   that situation we waste an instruction, but not any cycles.  */\n#define umul_ppmm(ph, pl, m0, m1) \\\n    __asm__ (\"xma.hu %0 = %2, %3, f0\\n\\txma.l %1 = %2, %3, f0\"\t\t\\\n\t     : \"=&f\" (ph), \"=f\" (pl)\t\t\t\t\t\\\n\t     : \"f\" (m0), \"f\" (m1))\n#define count_leading_zeros(count, x) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype _x = (x), _y, _a, _c;\t\t\t\t\t\\\n    __asm__ (\"mux1 %0 = %1, @rev\" : \"=r\" (_y) : \"r\" (_x));\t\t\\\n    __asm__ (\"czx1.l %0 = %1\" : \"=r\" (_a) : \"r\" (-_y | _y));\t\t\\\n    _c = (_a - 1) << 3;\t\t\t\t\t\t\t\\\n    _x >>= _c;\t\t\t\t\t\t\t\t\\\n    if (_x >= 1 << 4)\t\t\t\t\t\t\t\\\n      _x >>= 4, _c += 4;\t\t\t\t\t\t\\\n    if (_x >= 1 << 2)\t\t\t\t\t\t\t\\\n      _x >>= 2, _c += 2;\t\t\t\t\t\t\\\n    _c += _x >> 1;\t\t\t\t\t\t\t\\\n    (count) =  W_TYPE_SIZE - 1 - _c;\t\t\t\t\t\\\n  } while (0)\n/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1\n   based, and we don't need a special case for x==0 here */\n#define count_trailing_zeros(count, x)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype __ctz_x = (x);\t\t\t\t\t\t\\\n    __asm__ (\"popcnt %0 = %1\"\t\t\t\t\t\t\\\n\t     : \"=r\" (count)\t\t\t\t\t\t\\\n\t     : \"r\" ((__ctz_x-1) & ~__ctz_x));\t\t\t\t\\\n  } while (0)\n#endif\n#if defined (__INTEL_COMPILER)\n#include <ia64intrin.h>\n#define umul_ppmm(ph, pl, m0, m1)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UWtype _m0 = (m0), _m1 = (m1);\t\t\t\t\t\\\n    ph = _m64_xmahu (_m0, _m1, 0);\t\t\t\t\t\\\n    pl = _m0 * _m1;\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n#ifndef LONGLONG_STANDALONE\n#define udiv_qrnnd(q, r, n1, n0, d) \\\n  do { UWtype __di;\t\t\t\t\t\t\t\\\n    __di = __MPN(invert_limb) (d);\t\t\t\t\t\\\n    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);\t\t\t\t\\\n  } while (0)\n#define UDIV_PREINV_ALWAYS  1\n#define UDIV_NEEDS_NORMALIZATION 1\n#endif\n\n#if !defined(ULONG_PARITY) && defined (__GNUC__) && ! defined (__INTEL_COMPILER)\n/* unsigned long is either 32 or 64 bits depending on the ABI, zero extend\n   to a 64 bit unsigned long long for popcnt */\n#define ULONG_PARITY(p, n)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    unsigned long long  __n = (unsigned long) (n);\t\t\t\\\n    int  __p;\t\t\t\t\t\t\t\t\\\n    __asm__ (\"popcnt %0 = %1\" : \"=r\" (__p) : \"r\" (__n));\t\t\\\n    (p) = __p & 1;\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n#if !defined(BSWAP_LIMB) && defined (__GNUC__) && ! defined (__INTEL_COMPILER)\n#define BSWAP_LIMB(dst, src)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    __asm__ (\"mux1 %0 = %1, @rev\" : \"=r\" (dst) :  \"r\" (src));\t\t\\\n  } while (0)\n#endif\n\n\n#if !defined(popc_limb) && defined (__GNUC__) && ! defined (__INTEL_COMPILER)\n#define popc_limb(result, input)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    __asm__ (\"popcnt %0 = %1\" : \"=r\" (result) : \"r\" (input));\t\t\\\n  } while (0)\n#endif\n"
  },
  {
    "path": "mpn/ia64/lshift.asm",
    "content": "dnl  IA-64 mpn_lshift/mpn_rshift.\n\ndnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,\ndnl  Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      2.0\nC Itanium 2:    1.0\n\nC This code is scheduled deeply since the plain shift instructions shr and shl\nC have a latency of 4 (on Itanium) or 3 (on Itanium 2).  Poor scheduling of\nC these instructions cause a 10 cycle replay trap on Itanium.\n\nC TODO\nC  * Optimize function entry and feed-in code.\n\nC INPUT PARAMETERS\ndefine(`rp',`r32')\ndefine(`up',`r33')\ndefine(`n',`r34')\ndefine(`cnt',`r35')\n\ndefine(`tnc',`r9')\n\ndefine(`OPERATION_lshift',1)\n\nifdef(`OPERATION_lshift',`\n\tdefine(`FSH',`shl')\n\tdefine(`BSH',`shr.u')\n\tdefine(`UPD',`-8')\n\tdefine(`POFF',`-512')\n\tdefine(`PUPD',`-32')\n\tdefine(`func',`mpn_lshift')\n')\nifdef(`OPERATION_rshift',`\n\tdefine(`FSH',`shr.u')\n\tdefine(`BSH',`shl')\n\tdefine(`UPD',`8')\n\tdefine(`POFF',`512')\n\tdefine(`PUPD',`32')\n\tdefine(`func',`mpn_rshift')\n')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\t\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\t\trp = 0, rp\t\tC\t\t\tM I\n\taddp4\t\tup = 0, up\t\tC\t\t\tM I\n\tsxt4\t\tn = n\t\t\tC\t\t\tM I\n\tzxt4\t\tcnt = cnt\t\tC\t\t\tI\n\t;;\n')\n\n {.mmi;\tcmp.lt\t\tp14, p15 = 4, n\t\tC\t\t\tM I\n\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}{.mmi;\tadd\t\tr15 = -1, n\t\tC\t\t\tM I\n\tsub\t\ttnc = 64, cnt\t\tC\t\t\tM I\n\tadd\t\tr16 = -5, n\n\t;;\n}{.mmi;\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tshr.u\t\tn = r16, 2\t\tC\t\t\tI0\n}{.mmi;\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\nifdef(`OPERATION_lshift',\n`\tshladd\t\tup = r15, 3, up\t\tC\t\t\tM I\n\tshladd\t\trp = r15, 3, rp')\tC\t\t\tM I\n\t;;\n}{.mmi;\tadd\t\tr11 = POFF, up\t\tC\t\t\tM I\n\tld8\t\tr10 = [up], UPD\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n}{.bbb;\n   (p6)\tbr.dptk\t\t.Lb01\n   (p7)\tbr.dptk\t\t.Lb10\n   (p8)\tbr.dptk\t\t.Lb11\n\t;;\n}\n\n.Lb00:\tld8\t\tr19 = [up], UPD\n\t;;\n\tld8\t\tr16 = [up], UPD\n\t;;\n\tld8\t\tr17 = [up], UPD\n\tBSH\t\tr8 = r10, tnc\t\tC function return value\n  (p14)\tbr.cond.dptk\t.grt4\n\n\tFSH\t\tr24 = r10, cnt\n\tBSH\t\tr25 = r19, tnc\n\t;;\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tor\t\tr14 = r25, r24\n\tFSH\t\tr22 = r17, cnt\n\tBSH\t\tr23 = r10, tnc\n\tbr\t\t.Lr4\n\n.grt4:\tFSH\t\tr24 = r10, cnt\n\tBSH\t\tr25 = r19, tnc\n\t;;\n\tld8\t\tr18 = [up], UPD\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tld8\t\tr19 = [up], UPD\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tld8\t\tr16 = [up], UPD\n\tFSH\t\tr22 = r17, cnt\n\tBSH\t\tr23 = r18, tnc\n\t;;\n\tor\t\tr14 = r25, r24\n\tld8\t\tr17 = [up], UPD\n\tbr.cloop.dpnt\t.Ltop\n\tbr\t\t.Lbot\n\n.Lb01:\n  (p15)\tBSH\t\tr8 = r10, tnc\t\tC function return value\tI\n  (p15)\tFSH\t\tr22 = r10, cnt\t\tC\t\t\tI\n  (p15)\tbr.cond.dptk\t.Lr1\t\t\tC return\t\tB\n\n.grt1:\tld8\t\tr18 = [up], UPD\n\t;;\n\tld8\t\tr19 = [up], UPD\n\tBSH\t\tr8 = r10, tnc\t\tC function return value\n\t;;\n\tld8\t\tr16 = [up], UPD\n\tFSH\t\tr22 = r10, cnt\n\tBSH\t\tr23 = r18, tnc\n\t;;\n\tld8\t\tr17 = [up], UPD\n\tbr.cloop.dpnt\t.grt5\n\t;;\n\n\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\t;;\n\tor\t\tr15 = r23, r22\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\tbr\t\t.Lr5\n\n.grt5:\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\t;;\n\tld8\t\tr18 = [up], UPD\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tld8\t\tr19 = [up], UPD\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tor\t\tr15 = r23, r22\n\tld8\t\tr16 = [up], UPD\n\tbr\t\t.LL01\n\n\n.Lb10:\tld8\t\tr17 = [up], UPD\n  (p14)\tbr.cond.dptk\t.grt2\n\n\tBSH\t\tr8 = r10, tnc\t\tC function return value\n\t;;\n\tFSH\t\tr20 = r10, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tor\t\tr14 = r21, r20\n\tFSH\t\tr22 = r17, cnt\n\tbr\t\t.Lr2\t\t\tC return\n\n.grt2:\tld8\t\tr18 = [up], UPD\n\tBSH\t\tr8 = r10, tnc\t\tC function return value\n\t;;\n\tld8\t\tr19 = [up], UPD\n\tFSH\t\tr20 = r10, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tld8\t\tr16 = [up], UPD\n\tFSH\t\tr22 = r17, cnt\n\tBSH\t\tr23 = r18, tnc\n\t;;\n\tld8\t\tr17 = [up], UPD\n\tbr.cloop.dpnt\t.grt6\n\t;;\n\n\tor\t\tr14 = r21, r20\n\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\t;;\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\tbr\t\t.Lr6\n\n.grt6:\tor\t\tr14 = r21, r20\n\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\t;;\n\tld8\t\tr18 = [up], UPD\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tld8\t\tr19 = [up], UPD\n\tbr\t\t.LL10\n\n\n.Lb11:\tld8\t\tr16 = [up], UPD\n\t;;\n\tld8\t\tr17 = [up], UPD\n\tBSH\t\tr8 = r10, tnc\t\tC function return value\n  (p14)\tbr.cond.dptk\t.grt3\n\t;;\n\n\tFSH\t\tr26 = r10, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tor\t\tr15 = r27, r26\n\tFSH\t\tr22 = r17, cnt\n\tbr\t\t.Lr3\t\t\tC return\n\n.grt3:\tld8\t\tr18 = [up], UPD\n\tFSH\t\tr26 = r10, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tld8\t\tr19 = [up], UPD\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tld8\t\tr16 = [up], UPD\n\tFSH\t\tr22 = r17, cnt\n\tBSH\t\tr23 = r18, tnc\n\t;;\n\tld8\t\tr17 = [up], UPD\n\tbr.cloop.dpnt\t.grt7\n\n\tor\t\tr15 = r27, r26\n\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\tbr\t\t.Lr7\n\n.grt7:\tor\t\tr15 = r27, r26\n\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\tld8\t\tr18 = [up], UPD\n\tbr\t\t.LL11\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Ltop:\n {.mmi;\tst8\t\t[rp] = r14, UPD\t\tC M2\n\tor\t\tr15 = r27, r26\t\tC M3\n\tFSH\t\tr24 = r18, cnt\t\tC I0\n}{.mmi;\tld8\t\tr18 = [up], UPD\t\tC M1\n\tlfetch\t\t[r11], PUPD\n\tBSH\t\tr25 = r19, tnc\t\tC I1\n\t;; }\n.LL11:\n {.mmi;\tst8\t\t[rp] = r15, UPD\n\tor\t\tr14 = r21, r20\n\tFSH\t\tr26 = r19, cnt\n}{.mmi;\tld8\t\tr19 = [up], UPD\n\tnop.m\t\t0\n\tBSH\t\tr27 = r16, tnc\n\t;; }\n.LL10:\n {.mmi;\tst8\t\t[rp] = r14, UPD\n\tor\t\tr15 = r23, r22\n\tFSH\t\tr20 = r16, cnt\n}{.mmi;\tld8\t\tr16 = [up], UPD\n\tnop.m\t\t0\n\tBSH\t\tr21 = r17, tnc\n\t;; }\n.LL01:\n {.mmi;\tst8\t\t[rp] = r15, UPD\n\tor\t\tr14 = r25, r24\n\tFSH\t\tr22 = r17, cnt\n}{.mib;\tld8\t\tr17 = [up], UPD\n\tBSH\t\tr23 = r18, tnc\n\tbr.cloop.dptk\t.Ltop\n\t;; }\n\nC *** MAIN LOOP END ***\n\n.Lbot:\tor\t\tr15 = r27, r26\n\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\tst8\t\t[rp] = r14, UPD\n\t;;\n.Lr7:\tor\t\tr14 = r21, r20\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\tst8\t\t[rp] = r15, UPD\n\t;;\n.Lr6:\tor\t\tr15 = r23, r22\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\tst8\t\t[rp] = r14, UPD\n\t;;\n.Lr5:\tst8\t\t[rp] = r15, UPD\n\tor\t\tr14 = r25, r24\n\tFSH\t\tr22 = r17, cnt\n\t;;\n.Lr4:\tor\t\tr15 = r27, r26\n\tst8\t\t[rp] = r14, UPD\n\t;;\n.Lr3:\tor\t\tr14 = r21, r20\n\tst8\t\t[rp] = r15, UPD\n\t;;\n.Lr2:\tst8\t\t[rp] = r14, UPD\n\t;;\n.Lr1:\tst8\t\t[rp] = r22, UPD\t\tC\t\t\tM23\n\tmov\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE(func)\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/modexact_1c_odd.asm",
    "content": "dnl  Itanium-2 mpn_modexact_1c_odd -- mpn by 1 exact remainder.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 3 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\n\nC            cycles/limb\nC Itanium:      15\nC Itanium 2:     8\n\n\ndnl  Usage: ABI32(`code')\ndnl\ndnl  Emit the given code only under HAVE_ABI_32.\ndnl\ndefine(ABI32,\nm4_assert_onearg()\n`ifdef(`HAVE_ABI_32',`$1')')\n\n\nC mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,\nC                                mp_limb_t divisor, mp_limb_t carry);\nC\nC The modexact algorithm is usually conceived as a dependent chain\nC\nC\tl = src[i] - c\nC\tq = low(l * inverse)\nC\tc = high(q*divisor) + (src[i]<c)\nC\nC but we can work the src[i]-c into an xma by calculating si=src[i]*inverse\nC separately (off the dependent chain) and using\nC\nC\tq = low(c * inverse + si)\nC\tc = high(q*divisor + c)\nC\nC This means the dependent chain is simply xma.l followed by xma.hu, for a\nC total 8 cycles/limb on itanium-2.\nC\nC The reason xma.hu works for the new c is that the low of q*divisor is\nC src[i]-c (being the whole purpose of the q generated, and it can be\nC verified algebraically).  If there was an underflow from src[i]-c, then\nC there will be an overflow from (src-c)+c, thereby adding 1 to the new c\nC the same as the borrow bit (src[i]<c) gives in the first style shown.\nC\nC Incidentally, fcmp is not an option for treating src[i]-c, since it\nC apparently traps to the kernel for unnormalized operands like those used\nC and generated by ldf8 and xma.  On one GNU/Linux system it took about 1200\nC cycles.\nC\nC\nC First Limb:\nC\nC The first limb uses q = (src[0]-c) * inverse shown in the first style.\nC This lets us get the first q as soon as the inverse is ready, without\nC going through si=s*inverse.  Basically at the start we have c and can use\nC it while waiting for the inverse, whereas for the second and subsequent\nC limbs it's the other way around, ie. we have the inverse and are waiting\nC for c.\nC\nC At .Lentry the first two instructions in the loop have been done already.\nC The load of f11=src[1] at the start (predicated on size>=2), and the\nC calculation of q by the initial different scheme.\nC\nC\nC Entry Sequence:\nC\nC In the entry sequence, the critical path is the calculation of the\nC inverse, so this is begun first and optimized.  Apart from that, ar.lc is\nC established nice and early so the br.cloop's should predict perfectly.\nC And the load for the low limbs src[0] and src[1] can be initiated long\nC ahead of where they're needed.\nC\nC\nC Inverse Calculation:\nC\nC The initial 8-bit inverse is calculated using a table lookup.  If it hits\nC L1 (which is likely if we're called several times) then it should take a\nC total 4 cycles, otherwise hopefully L2 for 9 cycles.  This is considered\nC the best approach, on balance.  It could be done bitwise, but that would\nC probably be about 14 cycles (2 per bit beyond the first couple).  Or it\nC could be taken from 4 bits to 8 with xmpy doubling as used beyond 8 bits,\nC but that would be about 11 cycles.\nC\nC The table is not the same as binvert_limb_table, instead it's 256 bytes,\nC designed to be indexed by the low byte of the divisor.  The divisor is\nC always odd, so the relevant data is every second byte in the table.  The\nC padding lets us use zxt1 instead of extr.u, the latter would cost an extra\nC cycle because it must go down I0, and we're using the first I0 slot to get\nC ip.  The extra 128 bytes of padding should be insignificant compared to\nC typical ia64 code bloat.\nC\nC Having the table in .text allows us to use IP-relative addressing,\nC avoiding a fetch from ltoff.  .rodata is apparently not suitable for use\nC IP-relative, it gets a linker relocation overflow on GNU/Linux.\nC\nC\nC Load Scheduling:\nC\nC In the main loop, the data loads are scheduled for an L2 hit, which means\nC 6 cycles for the data ready to use.  In fact we end up 7 cycles ahead.  In\nC any case that scheduling is achieved simply by doing the load (and xmpy.l\nC for \"si\") in the immediately preceding iteration.\nC\nC The main loop requires size >= 2, and we handle size==1 by an initial\nC br.cloop to enter the loop only if size>1.  Since ar.lc is established\nC early, this should predict perfectly.\nC\nC\nC Not done:\nC\nC Consideration was given to using a plain \"(src[0]-c) % divisor\" for\nC size==1, but cycle counting suggests about 50 for the sort of approach\nC taken by gcc __umodsi3, versus about 47 for the modexact.  (Both assuming\nC L1 hits for their respective fetching.)\nC\nC Consideration was given to a test for high<divisor and replacing the last\nC loop iteration with instead c-=src[size-1] followed by c+=d if underflow.\nC Branching on high<divisor wouldn't be good since a mispredict would cost\nC more than the loop iteration saved, and the condition is of course data\nC dependent.  So the theory would be to shorten the loop count if\nC high<divisor, and predicate extra operations at the end.  That would mean\nC a gain of 6 when high<divisor, or a cost of 2 if not.\nC\nC Whether such a tradeoff is a win on average depends on assumptions about\nC how many bits in the high and the divisor.  If both are uniformly\nC distributed then high<divisor about 50% of the time.  But smallish\nC divisors (less chance of high<divisor) might be more likely from\nC applications (mpz_divisible_ui, mpz_gcd_ui, etc).  Though biggish divisors\nC would be normal internally from say mpn/generic/perfsqr.c.  On balance,\nC for the moment, it's felt the gain is not really enough to be worth the\nC trouble.\nC\nC\nC Enhancement:\nC\nC Process two source limbs per iteration using a two-limb inverse and a\nC sequence like\nC\nC\tql  = low (c * il + sil)\tquotient low limb\nC\tqlc = high(c * il + sil)\nC\tqh1 = low (c * ih + sih)\tquotient high, partial\nC\nC\tcl = high (ql * d + c)\t\tcarry out of low\nC\tqh = low (qlc * 1 + qh1)\tquotient high limb\nC\nC\tnew c = high (qh * d + cl)\tcarry out of high\nC\nC This would be 13 cycles/iteration, giving 6.5 cycles/limb.  The two limb\nC s*inverse as sih:sil = sh:sl * ih:il would be calculated off the dependent\nC chain with 4 multiplies.  The bigger inverse would take extra time to\nC calculate, but a one limb iteration to handle an odd size could be done as\nC soon as 64-bits of inverse were ready.\nC\nC Perhaps this could even extend to a 3 limb inverse, which might promise 17\nC or 18 cycles for 3 limbs, giving 5.66 or 6.0 cycles/limb.\nC\n\nASM_START()\n\t.explicit\n\n\t.text\n\t.align\t32\n.Ltable:\ndata1\t0,0x01, 0,0xAB, 0,0xCD, 0,0xB7, 0,0x39, 0,0xA3, 0,0xC5, 0,0xEF\ndata1\t0,0xF1, 0,0x1B, 0,0x3D, 0,0xA7, 0,0x29, 0,0x13, 0,0x35, 0,0xDF\ndata1\t0,0xE1, 0,0x8B, 0,0xAD, 0,0x97, 0,0x19, 0,0x83, 0,0xA5, 0,0xCF\ndata1\t0,0xD1, 0,0xFB, 0,0x1D, 0,0x87, 0,0x09, 0,0xF3, 0,0x15, 0,0xBF\ndata1\t0,0xC1, 0,0x6B, 0,0x8D, 0,0x77, 0,0xF9, 0,0x63, 0,0x85, 0,0xAF\ndata1\t0,0xB1, 0,0xDB, 0,0xFD, 0,0x67, 0,0xE9, 0,0xD3, 0,0xF5, 0,0x9F\ndata1\t0,0xA1, 0,0x4B, 0,0x6D, 0,0x57, 0,0xD9, 0,0x43, 0,0x65, 0,0x8F\ndata1\t0,0x91, 0,0xBB, 0,0xDD, 0,0x47, 0,0xC9, 0,0xB3, 0,0xD5, 0,0x7F\ndata1\t0,0x81, 0,0x2B, 0,0x4D, 0,0x37, 0,0xB9, 0,0x23, 0,0x45, 0,0x6F\ndata1\t0,0x71, 0,0x9B, 0,0xBD, 0,0x27, 0,0xA9, 0,0x93, 0,0xB5, 0,0x5F\ndata1\t0,0x61, 0,0x0B, 0,0x2D, 0,0x17, 0,0x99, 0,0x03, 0,0x25, 0,0x4F\ndata1\t0,0x51, 0,0x7B, 0,0x9D, 0,0x07, 0,0x89, 0,0x73, 0,0x95, 0,0x3F\ndata1\t0,0x41, 0,0xEB, 0,0x0D, 0,0xF7, 0,0x79, 0,0xE3, 0,0x05, 0,0x2F\ndata1\t0,0x31, 0,0x5B, 0,0x7D, 0,0xE7, 0,0x69, 0,0x53, 0,0x75, 0,0x1F\ndata1\t0,0x21, 0,0xCB, 0,0xED, 0,0xD7, 0,0x59, 0,0xC3, 0,0xE5, 0,0x0F\ndata1\t0,0x11, 0,0x3B, 0,0x5D, 0,0xC7, 0,0x49, 0,0x33, 0,0x55, 0,0xFF\n\n\nPROLOGUE(mpn_modexact_1c_odd)\n\n\tC r32\tsrc\n\tC r33\tsize\n\tC r34\tdivisor\n\tC r35\tcarry\n\n\t.prologue\n.Lhere:\n{ .mmi;\tadd\tr33 = -1, r33\t\tC M0  size-1\n\tmov\tr14 = 2\t\t\tC M1  2\n\tmov\tr15 = ip\t\tC I0  .Lhere\n}{.mmi;\tsetf.sig f6 = r34\t\tC M2  divisor\n\tsetf.sig f9 = r35\t\tC M3  carry\n\tzxt1\tr3 = r34\t\tC I1  divisor low byte\n}\t;;\n\n{ .mmi;\tadd\tr3 = .Ltable-.Lhere, r3\tC M0  table offset ip and index\n\tsub\tr16 = 0, r34\t\tC M1  -divisor\n\t.save\tar.lc, r2\n\tmov\tr2 = ar.lc\t\tC I0\n}{.mmi;\t.body\n\tsetf.sig f13 = r14\t\tC M2  2 in significand\n\tmov\tr17 = -1\t\tC M3  -1\nABI32(`\tzxt4\tr33 = r33')\t\tC I1  size extend\n}\t;;\n\n{ .mmi;\tadd\tr3 = r3, r15\t\tC M0  table entry address\nABI32(` addp4\tr32 = 0, r32')\t\tC M1  src extend\n\tmov\tar.lc = r33\t\tC I0  size-1 loop count\n}{.mmi;\tsetf.sig f12 = r16\t\tC M2  -divisor\n\tsetf.sig f8 = r17\t\tC M3  -1\n}\t;;\n\n{ .mmi;\tld1\tr3 = [r3]\t\tC M0  inverse, 8 bits\n\tldf8\tf10 = [r32], 8\t\tC M1  src[0]\n\tcmp.ne\tp6,p0 = 0, r33\t\tC I0  test size!=1\n}\t;;\n\n\tC Wait for table load.\n\tC Hope for an L1 hit of 1 cycles to ALU, but could be more.\n\tsetf.sig f7 = r3\t\tC M2  inverse, 8 bits\n(p6)\tldf8\tf11 = [r32], 8\t\tC M1  src[1], if size!=1\n\t;;\n\n\tC 5 cycles\n\n\tC f6\tdivisor\n\tC f7\tinverse, being calculated\n\tC f8\t-1, will be -inverse\n\tC f9\tcarry\n\tC f10\tsrc[0]\n\tC f11\tsrc[1]\n\tC f12\t-divisor\n\tC f13\t2\n\tC f14\tscratch\n\n\txmpy.l\tf14 = f13, f7\t\tC 2*i\n\txmpy.l\tf7 = f7, f7\t\tC i*i\n\t;;\n\txma.l\tf7 = f7, f12, f14\tC i*i*-d + 2*i, inverse 16 bits\n\t;;\n\n\txmpy.l\tf14 = f13, f7\t\tC 2*i\n\txmpy.l\tf7 = f7, f7\t\tC i*i\n\t;;\n\txma.l\tf7 = f7, f12, f14\tC i*i*-d + 2*i, inverse 32 bits\n\t;;\n\n\txmpy.l\tf14 = f13, f7\t\tC 2*i\n\txmpy.l\tf7 = f7, f7\t\tC i*i\n\t;;\n\n\txma.l\tf7 = f7, f12, f14\tC i*i*-d + 2*i, inverse 64 bits\n\txma.l\tf10 = f9, f8, f10\tC sc = c * -1 + src[0]\n\t;;\nC ASSERT(p6, `\nC\txmpy.l\tf15 = f6, f7 ;;\tC divisor*inverse\nC\tgetf.sig r31 = f15 ;;\nC\tcmp.eq\tp6,p0 = 1, r31\tC should == 1\nC ')\n\n\txmpy.l\tf10 = f10, f7\t\tC q = sc * inverse\n\txmpy.l\tf8 = f7, f8\t\tC -inverse = inverse * -1\n\tbr.cloop.sptk.few.clr .Lentry\tC main loop, if size > 1\n\t;;\n\n\tC size==1, finish up now\n\txma.hu\tf9 = f10, f6, f9\tC c = high(q * divisor + c)\n\tmov\tar.lc = r2\t\tC I0\n\t;;\n\tgetf.sig r8 = f9\t\tC M2  return c\n\tbr.ret.sptk.many b0\n\n\n\n.Ltop:\n\tC r2\tsaved ar.lc\n\tC f6\tdivisor\n\tC f7\tinverse\n\tC f8\t-inverse\n\tC f9\tcarry\n\tC f10\tsrc[i] * inverse\n\tC f11\tscratch src[i+1]\n\n\tadd\tr16 = 160, r32\n\tldf8\tf11 = [r32], 8\t\tC src[i+1]\n\t;;\n\tC 2 cycles\n\n\tlfetch\t[r16]\n\txma.l\tf10 = f9, f8, f10\tC q = c * -inverse + si\n\t;;\n\tC 3 cycles\n\n.Lentry:\n\txma.hu\tf9 = f10, f6, f9\tC c = high(q * divisor + c)\n\txmpy.l\tf10 = f11, f7\t\tC si = src[i] * inverse\n\tbr.cloop.sptk.few.clr .Ltop\n\t;;\n\n\n\n\txma.l\tf10 = f9, f8, f10\tC q = c * -inverse + si\n\tmov\tar.lc = r2\t\tC I0\n\t;;\n\txma.hu\tf9 = f10, f6, f9\tC c = high(q * divisor + c)\n\t;;\n\tgetf.sig r8 = f9\t\tC M2  return c\n\tbr.ret.sptk.many b0\n\nEPILOGUE()\n"
  },
  {
    "path": "mpn/ia64/mul_1.asm",
    "content": "dnl  IA-64 mpn_mul_1, mpn_mul_1c -- Multiply a limb vector with a limb and\ndnl  store the result in a second limb vector.\n\ndnl  Copyright 2000, 2001, 2002, 2003, 2004, 2006, 2007 Free Software\ndnl  Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC         cycles/limb\nC Itanium:    4.0\nC Itanium 2:  2.0\n\nC TODO\nC  * Further optimize feed-in and wind-down code, both for speed and code size.\nC  * Handle low limb input and results specially, using a common stf8 in the\nC    epilogue.\nC  * Use 1 c/l carry propagation scheme in wind-down code.\nC  * Use extra pointer register for `up' to speed up feed-in loads.\nC  * Work out final differences with addmul_1.asm.\n\nC INPUT PARAMETERS\ndefine(`rp', `r32')\ndefine(`up', `r33')\ndefine(`n', `r34')\ndefine(`vl', `r35')\ndefine(`cy', `r36')\tC for mpn_mul_1c\n\nASM_START()\nPROLOGUE(mpn_mul_1)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\n\nifdef(`HAVE_ABI_32',\n`\taddp4\t\trp = 0, rp\t\tC M I\n\taddp4\t\tup = 0, up\t\tC M I\n\tzxt4\t\tn = n\t\t\tC I\n\t;;\n')\n{.mfi\n\tadds\t\tr15 = -1, n\t\tC M I\n\tmov\t\tf9 = f0\t\t\tC F\n\tmov.i\t\tr2 = ar.lc\t\tC I0\n}\n{.mmi\n\tldf8\t\tf7 = [up], 8\t\tC M\n\tnop.m\t\t0\t\t\tC M\n\tand\t\tr14 = 3, n\t\tC M I\n\t;;\n}\n.Lcommon:\n{.mii\n\tsetf.sig\tf6 = vl\t\t\tC M2 M3\n\tshr.u\t\tr31 = r15, 2\t\tC I0\n\tcmp.eq\t\tp10, p0 = 0, r14\tC M I\n}\n{.mii\n\tcmp.eq\t\tp11, p0 = 2, r14\tC M I\n\tcmp.eq\t\tp12, p0 = 3, r14\tC M I\n\tnop.i\t\t0\t\t\tC I\n\t;;\n}\n{.mii\n\tcmp.ne\t\tp6, p7 = r0, r0\t\tC M I\n\tmov.i\t\tar.lc = r31\t\tC I0\n\tcmp.ne\t\tp8, p9 = r0, r0\t\tC M I\n}\n{.bbb\n  (p10)\tbr.dptk\t\t.Lb00\t\t\tC B\n  (p11)\tbr.dptk\t\t.Lb10\t\t\tC B\n  (p12)\tbr.dptk\t\t.Lb11\t\t\tC B\n\t;;\n}\n\n.Lb01:\tmov\t\tr20 = 0\n\tbr.cloop.dptk\t.grt1\t\t\tC B\n\n\txma.l\t\tf39 = f7, f6, f9\tC F\n\txma.hu\t\tf43 = f7, f6, f9\tC F\n\t;;\n\tgetf.sig\tr8 = f43\t\tC M2\n\tstf8\t\t[rp] = f39\t\tC M2 M3\n\tmov.i\t\tar.lc = r2\t\tC I0\n\tbr.ret.sptk.many b0\t\t\tC B\n\n.grt1:\n\tldf8\t\tf32 = [up], 8\n\t;;\n\tldf8\t\tf33 = [up], 8\n\t;;\n\tldf8\t\tf34 = [up], 8\n\txma.l\t\tf39 = f7, f6, f9\n\txma.hu\t\tf43 = f7, f6, f9\n\t;;\n\tldf8\t\tf35 = [up], 8\n\tbr.cloop.dptk\t.grt5\n\n\txma.l\t\tf36 = f32, f6, f0\n\txma.hu\t\tf40 = f32, f6, f0\n\t;;\n\tstf8\t\t[rp] = f39, 8\n\txma.l\t\tf37 = f33, f6, f0\n\txma.hu\t\tf41 = f33, f6, f0\n\t;;\n\tgetf.sig\tr21 = f43\n\tgetf.sig\tr18 = f36\n\txma.l\t\tf38 = f34, f6, f0\n\txma.hu\t\tf42 = f34, f6, f0\n\t;;\n\tgetf.sig\tr22 = f40\n\tgetf.sig\tr19 = f37\n\txma.l\t\tf39 = f35, f6, f0\n\txma.hu\t\tf43 = f35, f6, f0\n\t;;\n\tgetf.sig\tr23 = f41\n\tgetf.sig\tr16 = f38\n\tbr\t\t.Lcj5\n\n.grt5:\n\txma.l\t\tf36 = f32, f6, f0\n\txma.hu\t\tf40 = f32, f6, f0\n\t;;\n\tgetf.sig\tr17 = f39\n\tldf8\t\tf32 = [up], 8\n\txma.l\t\tf37 = f33, f6, f0\n\txma.hu\t\tf41 = f33, f6, f0\n\t;;\n\tgetf.sig\tr21 = f43\n\tldf8\t\tf33 = [up], 8\n\txma.l\t\tf38 = f34, f6, f0\n\t;;\n\tgetf.sig\tr18 = f36\n\txma.hu\t\tf42 = f34, f6, f0\n\t;;\n\tgetf.sig\tr22 = f40\n\tldf8\t\tf34 = [up], 8\n\txma.l\t\tf39 = f35, f6, f0\n\t;;\n\tgetf.sig\tr19 = f37\n\txma.hu\t\tf43 = f35, f6, f0\n\tbr\t\t.LL01\n\n\n.Lb10:\tldf8\t\tf35 = [up], 8\n\tmov\t\tr23 = 0\n\tbr.cloop.dptk\t.grt2\n\n\txma.l\t\tf38 = f7, f6, f9\n\txma.hu\t\tf42 = f7, f6, f9\n\t;;\n\tstf8\t\t[rp] = f38, 8\n\txma.l\t\tf39 = f35, f6, f42\n\txma.hu\t\tf43 = f35, f6, f42\n\t;;\n\tgetf.sig\tr8 = f43\n\tstf8\t\t[rp] = f39\n\tmov.i\t\tar.lc = r2\n\tbr.ret.sptk.many b0\n\n\n.grt2:\n\tldf8\t\tf32 = [up], 8\n\t;;\n\tldf8\t\tf33 = [up], 8\n\txma.l\t\tf38 = f7, f6, f9\n\txma.hu\t\tf42 = f7, f6, f9\n\t;;\n\tldf8\t\tf34 = [up], 8\n\txma.l\t\tf39 = f35, f6, f0\n\txma.hu\t\tf43 = f35, f6, f0\n\t;;\n\tldf8\t\tf35 = [up], 8\n\tbr.cloop.dptk\t.grt6\n\n\tstf8\t\t[rp] = f38, 8\n\txma.l\t\tf36 = f32, f6, f0\n\txma.hu\t\tf40 = f32, f6, f0\n\t;;\n\tgetf.sig\tr20 = f42\n\tgetf.sig\tr17 = f39\n\txma.l\t\tf37 = f33, f6, f0\n\txma.hu\t\tf41 = f33, f6, f0\n\t;;\n\tgetf.sig\tr21 = f43\n\tgetf.sig\tr18 = f36\n\txma.l\t\tf38 = f34, f6, f0\n\txma.hu\t\tf42 = f34, f6, f0\n\t;;\n\tgetf.sig\tr22 = f40\n\tgetf.sig\tr19 = f37\n\txma.l\t\tf39 = f35, f6, f0\n\txma.hu\t\tf43 = f35, f6, f0\n\tbr\t\t.Lcj6\n\n.grt6:\n\tgetf.sig\tr16 = f38\n\txma.l\t\tf36 = f32, f6, f0\n\txma.hu\t\tf40 = f32, f6, f0\n\t;;\n\tgetf.sig\tr20 = f42\n\tldf8\t\tf32 = [up], 8\n\txma.l\t\tf37 = f33, f6, f0\n\t;;\n\tgetf.sig\tr17 = f39\n\txma.hu\t\tf41 = f33, f6, f0\n\t;;\n\tgetf.sig\tr21 = f43\n\tldf8\t\tf33 = [up], 8\n\txma.l\t\tf38 = f34, f6, f0\n\t;;\n\tgetf.sig\tr18 = f36\n\txma.hu\t\tf42 = f34, f6, f0\n\tbr\t\t.LL10\n\n\n.Lb11:\tldf8\t\tf34 = [up], 8\n\tmov\t\tr22 = 0\n\t;;\n\tldf8\t\tf35 = [up], 8\n\tbr.cloop.dptk\t.grt3\n\t;;\n\n\txma.l\t\tf37 = f7, f6, f9\n\txma.hu\t\tf41 = f7, f6, f9\n\txma.l\t\tf38 = f34, f6, f0\n\txma.hu\t\tf42 = f34, f6, f0\n\txma.l\t\tf39 = f35, f6, f0\n\txma.hu\t\tf43 = f35, f6, f0\n\t;;\n\tgetf.sig\tr23 = f41\n\tstf8\t\t[rp] = f37, 8\n\tgetf.sig\tr16 = f38\n\tgetf.sig\tr20 = f42\n\tgetf.sig\tr17 = f39\n\tgetf.sig\tr8 = f43\n\tbr\t\t.Lcj3\n\n.grt3:\n\tldf8\t\tf32 = [up], 8\n\txma.l\t\tf37 = f7, f6, f9\n\txma.hu\t\tf41 = f7, f6, f9\n\t;;\n\tldf8\t\tf33 = [up], 8\n\txma.l\t\tf38 = f34, f6, f0\n\txma.hu\t\tf42 = f34, f6, f0\n\t;;\n\tgetf.sig\tr19 = f37\n\tldf8\t\tf34 = [up], 8\n\txma.l\t\tf39 = f35, f6, f0\n\txma.hu\t\tf43 = f35, f6, f0\n\t;;\n\tgetf.sig\tr23 = f41\n\tldf8\t\tf35 = [up], 8\n\tbr.cloop.dptk\t.grt7\n\n\tgetf.sig\tr16 = f38\n\txma.l\t\tf36 = f32, f6, f0\n\tgetf.sig\tr20 = f42\n\txma.hu\t\tf40 = f32, f6, f0\n\t;;\n\tgetf.sig\tr17 = f39\n\txma.l\t\tf37 = f33, f6, f0\n\tgetf.sig\tr21 = f43\n\txma.hu\t\tf41 = f33, f6, f0\n\t;;\n\tgetf.sig\tr18 = f36\n\tst8\t\t[rp] = r19, 8\n\txma.l\t\tf38 = f34, f6, f0\n\txma.hu\t\tf42 = f34, f6, f0\n\tbr\t\t.Lcj7\n\n.grt7:\n\tgetf.sig\tr16 = f38\n\txma.l\t\tf36 = f32, f6, f0\n\txma.hu\t\tf40 = f32, f6, f0\n\t;;\n\tgetf.sig\tr20 = f42\n\tldf8\t\tf32 = [up], 8\n\txma.l\t\tf37 = f33, f6, f0\n\t;;\n\tgetf.sig\tr17 = f39\n\txma.hu\t\tf41 = f33, f6, f0\n\tbr\t\t.LL11\n\n\n.Lb00:\tldf8\t\tf33 = [up], 8\n\tmov\t\tr21 = 0\n\t;;\n\tldf8\t\tf34 = [up], 8\n\t;;\n\tldf8\t\tf35 = [up], 8\n\txma.l\t\tf36 = f7, f6, f9\n\txma.hu\t\tf40 = f7, f6, f9\n\tbr.cloop.dptk\t.grt4\n\n\txma.l\t\tf37 = f33, f6, f0\n\txma.hu\t\tf41 = f33, f6, f0\n\txma.l\t\tf38 = f34, f6, f0\n\txma.hu\t\tf42 = f34, f6, f0\n\t;;\n\tgetf.sig\tr22 = f40\n\tstf8\t\t[rp] = f36, 8\n\txma.l\t\tf39 = f35, f6, f0\n\tgetf.sig\tr19 = f37\n\txma.hu\t\tf43 = f35, f6, f0\n\t;;\n\tgetf.sig\tr23 = f41\n\tgetf.sig\tr16 = f38\n\tgetf.sig\tr20 = f42\n\tgetf.sig\tr17 = f39\n\tbr\t\t.Lcj4\n\n.grt4:\n\tldf8\t\tf32 = [up], 8\n\txma.l\t\tf37 = f33, f6, f0\n\txma.hu\t\tf41 = f33, f6, f0\n\t;;\n\tgetf.sig\tr18 = f36\n\tldf8\t\tf33 = [up], 8\n\txma.l\t\tf38 = f34, f6, f0\n\txma.hu\t\tf42 = f34, f6, f0\n\t;;\n\tgetf.sig\tr22 = f40\n\tldf8\t\tf34 = [up], 8\n\txma.l\t\tf39 = f35, f6, f0\n\t;;\n\tgetf.sig\tr19 = f37\n\tgetf.sig\tr23 = f41\n\txma.hu\t\tf43 = f35, f6, f0\n\tldf8\t\tf35 = [up], 8\n\tbr.cloop.dptk\t.grt8\n\n\tgetf.sig\tr16 = f38\n\txma.l\t\tf36 = f32, f6, f0\n\tgetf.sig\tr20 = f42\n\txma.hu\t\tf40 = f32, f6, f0\n\t;;\n\tgetf.sig\tr17 = f39\n\tst8\t\t[rp] = r18, 8\n\txma.l\t\tf37 = f33, f6, f0\n\txma.hu\t\tf41 = f33, f6, f0\n\tbr\t\t.Lcj8\n\n.grt8:\n\tgetf.sig\tr16 = f38\n\txma.l\t\tf36 = f32, f6, f0\n\txma.hu\t\tf40 = f32, f6, f0\n\tbr\t\t.LL00\n\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\n\t.pred.rel \"mutex\",p6,p7\n\tgetf.sig\tr16 = f38\n\txma.l\t\tf36 = f32, f6, f0\n   (p6)\tcmp.leu\t\tp8, p9 = r24, r17\n\tst8\t\t[rp] = r24, 8\n\txma.hu\t\tf40 = f32, f6, f0\n   (p7)\tcmp.ltu\t\tp8, p9 = r24, r17\n\t;;\n.LL00:\n\t.pred.rel \"mutex\",p8,p9\n\tgetf.sig\tr20 = f42\n   (p8)\tadd\t\tr24 = r18, r21, 1\n\tnop.b\t\t0\n\tldf8\t\tf32 = [up], 8\n   (p9)\tadd\t\tr24 = r18, r21\n\tnop.b\t\t0\n\t;;\n\t.pred.rel \"mutex\",p8,p9\n\tgetf.sig\tr17 = f39\n\txma.l\t\tf37 = f33, f6, f0\n   (p8)\tcmp.leu\t\tp6, p7 = r24, r18\n\tst8\t\t[rp] = r24, 8\n\txma.hu\t\tf41 = f33, f6, f0\n   (p9)\tcmp.ltu\t\tp6, p7 = r24, r18\n\t;;\n.LL11:\n\t.pred.rel \"mutex\",p6,p7\n\tgetf.sig\tr21 = f43\n   (p6)\tadd\t\tr24 = r19, r22, 1\n\tnop.b\t\t0\n\tldf8\t\tf33 = [up], 8\n   (p7)\tadd\t\tr24 = r19, r22\n\tnop.b\t\t0\n\t;;\n\t.pred.rel \"mutex\",p6,p7\n\tgetf.sig\tr18 = f36\n\txma.l\t\tf38 = f34, f6, f0\n   (p6)\tcmp.leu\t\tp8, p9 = r24, r19\n\tst8\t\t[rp] = r24, 8\n\txma.hu\t\tf42 = f34, f6, f0\n   (p7)\tcmp.ltu\t\tp8, p9 = r24, r19\n\t;;\n.LL10:\n\t.pred.rel \"mutex\",p8,p9\n\tgetf.sig\tr22 = f40\n   (p8)\tadd\t\tr24 = r16, r23, 1\n\tnop.b\t\t0\n\tldf8\t\tf34 = [up], 8\n   (p9)\tadd\t\tr24 = r16, r23\n\tnop.b\t\t0\n\t;;\n\t.pred.rel \"mutex\",p8,p9\n\tgetf.sig\tr19 = f37\n\txma.l\t\tf39 = f35, f6, f0\n   (p8)\tcmp.leu\t\tp6, p7 = r24, r16\n\tst8\t\t[rp] = r24, 8\n\txma.hu\t\tf43 = f35, f6, f0\n   (p9)\tcmp.ltu\t\tp6, p7 = r24, r16\n\t;;\n.LL01:\n\t.pred.rel \"mutex\",p6,p7\n\tgetf.sig\tr23 = f41\n   (p6)\tadd\t\tr24 = r17, r20, 1\n\tnop.b\t\t0\n\tldf8\t\tf35 = [up], 8\n   (p7)\tadd\t\tr24 = r17, r20\n\tbr.cloop.dptk\t.Loop\nC *** MAIN LOOP END ***\n\t;;\n\n.Lcj9:\n\t.pred.rel \"mutex\",p6,p7\n\tgetf.sig\tr16 = f38\n\txma.l\t\tf36 = f32, f6, f0\n   (p6)\tcmp.leu\t\tp8, p9 = r24, r17\n\tst8\t\t[rp] = r24, 8\n\txma.hu\t\tf40 = f32, f6, f0\n   (p7)\tcmp.ltu\t\tp8, p9 = r24, r17\n\t;;\n\t.pred.rel \"mutex\",p8,p9\n\tgetf.sig\tr20 = f42\n   (p8)\tadd\t\tr24 = r18, r21, 1\n   (p9)\tadd\t\tr24 = r18, r21\n\t;;\n\t.pred.rel \"mutex\",p8,p9\n\tgetf.sig\tr17 = f39\n\txma.l\t\tf37 = f33, f6, f0\n   (p8)\tcmp.leu\t\tp6, p7 = r24, r18\n\tst8\t\t[rp] = r24, 8\n\txma.hu\t\tf41 = f33, f6, f0\n   (p9)\tcmp.ltu\t\tp6, p7 = r24, r18\n\t;;\n.Lcj8:\n\t.pred.rel \"mutex\",p6,p7\n\tgetf.sig\tr21 = f43\n   (p6)\tadd\t\tr24 = r19, r22, 1\n   (p7)\tadd\t\tr24 = r19, r22\n\t;;\n\t.pred.rel \"mutex\",p6,p7\n\tgetf.sig\tr18 = f36\n\txma.l\t\tf38 = f34, f6, f0\n   (p6)\tcmp.leu\t\tp8, p9 = r24, r19\n\tst8\t\t[rp] = r24, 8\n\txma.hu\t\tf42 = f34, f6, f0\n   (p7)\tcmp.ltu\t\tp8, p9 = r24, r19\n\t;;\n.Lcj7:\n\t.pred.rel \"mutex\",p8,p9\n\tgetf.sig\tr22 = f40\n   (p8)\tadd\t\tr24 = r16, r23, 1\n   (p9)\tadd\t\tr24 = r16, r23\n\t;;\n\t.pred.rel \"mutex\",p8,p9\n\tgetf.sig\tr19 = f37\n\txma.l\t\tf39 = f35, f6, f0\n   (p8)\tcmp.leu\t\tp6, p7 = r24, r16\n\tst8\t\t[rp] = r24, 8\n\txma.hu\t\tf43 = f35, f6, f0\n   (p9)\tcmp.ltu\t\tp6, p7 = r24, r16\n\t;;\n.Lcj6:\n\t.pred.rel \"mutex\",p6,p7\n\tgetf.sig\tr23 = f41\n   (p6)\tadd\t\tr24 = r17, r20, 1\n   (p7)\tadd\t\tr24 = r17, r20\n\t;;\n\t.pred.rel \"mutex\",p6,p7\n   (p6)\tcmp.leu\t\tp8, p9 = r24, r17\n   (p7)\tcmp.ltu\t\tp8, p9 = r24, r17\n\tgetf.sig\tr16 = f38\n\tst8\t\t[rp] = r24, 8\n\t;;\n.Lcj5:\n\t.pred.rel \"mutex\",p8,p9\n\tgetf.sig\tr20 = f42\n   (p8)\tadd\t\tr24 = r18, r21, 1\n   (p9)\tadd\t\tr24 = r18, r21\n\t;;\n\t.pred.rel \"mutex\",p8,p9\n   (p8)\tcmp.leu\t\tp6, p7 = r24, r18\n   (p9)\tcmp.ltu\t\tp6, p7 = r24, r18\n\tgetf.sig\tr17 = f39\n\tst8\t\t[rp] = r24, 8\n\t;;\n.Lcj4:\n\t.pred.rel \"mutex\",p6,p7\n\tgetf.sig\tr8 = f43\n   (p6)\tadd\t\tr24 = r19, r22, 1\n   (p7)\tadd\t\tr24 = r19, r22\n\t;;\n\t.pred.rel \"mutex\",p6,p7\n\tst8\t\t[rp] = r24, 8\n   (p6)\tcmp.leu\t\tp8, p9 = r24, r19\n   (p7)\tcmp.ltu\t\tp8, p9 = r24, r19\n\t;;\n.Lcj3:\n\t.pred.rel \"mutex\",p8,p9\n   (p8)\tadd\t\tr24 = r16, r23, 1\n   (p9)\tadd\t\tr24 = r16, r23\n\t;;\n\t.pred.rel \"mutex\",p8,p9\n\tst8\t\t[rp] = r24, 8\n   (p8)\tcmp.leu\t\tp6, p7 = r24, r16\n   (p9)\tcmp.ltu\t\tp6, p7 = r24, r16\n\t;;\n.Lcj2:\n\t.pred.rel \"mutex\",p6,p7\n   (p6)\tadd\t\tr24 = r17, r20, 1\n   (p7)\tadd\t\tr24 = r17, r20\n\t;;\n\t.pred.rel \"mutex\",p6,p7\n\tst8\t\t[rp] = r24, 8\n   (p6)\tcmp.leu\t\tp8, p9 = r24, r17\n   (p7)\tcmp.ltu\t\tp8, p9 = r24, r17\n\t;;\n\t.pred.rel \"mutex\",p8,p9\n   (p8)\tadd\t\tr8 = 1, r8\n\tmov.i\t\tar.lc = r2\n\tbr.ret.sptk.many b0\nEPILOGUE()\n\nPROLOGUE(mpn_mul_1c)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\n\nifdef(`HAVE_ABI_32',\n`\taddp4\t\trp = 0, rp\t\tC M I\n\taddp4\t\tup = 0, up\t\tC M I\n\tzxt4\t\tn = n\t\t\tC I\n\t;;\n')\n{.mmi\n\tadds\t\tr15 = -1, n\t\tC M I\n\tsetf.sig\tf9 = cy\t\t\tC M2 M3\n\tmov.i\t\tr2 = ar.lc\t\tC I0\n}\n{.mmb\n\tldf8\t\tf7 = [up], 8\t\tC M\n\tand\t\tr14 = 3, n\t\tC M I\n\tbr.sptk\t\t.Lcommon\n\t;;\n}\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/mul_2.asm",
    "content": "dnl  IA-64 mpn_mul_2 -- Multiply a n-limb number with a 2-limb number and store\ndnl  store the result to a (n+1)-limb number.\n\ndnl  Copyright 2004 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC         cycles/limb\nC Itanium:    3.15\nC Itanium 2:  1.625\n\nC Note that this is very similar to addmul_2.asm.  If you change this file,\nC please change that file too.\n\nC TODO\nC  * Clean up variable names, and try to decrease the number of distinct\nC    registers used.\nC  * Cleanup feed-in code to not require zeroing several registers.\nC  * Make sure we don't depend on uninitialized predicate registers.\nC  * We currently cross-jump very aggressively, at the expense of a few cycles\nC    per operation.  Consider changing that.\nC  * Could perhaps save a few cycles by using 1 c/l carry propagation in\nC    wind-down code.\nC  * Ultimately rewrite.  The problem with this code is that it first uses a\nC    loaded u value in one xma pair, then leaves it live over several unrelated\nC    xma pairs, before it uses it again.  It should actually be quite possible\nC    to just swap some aligned xma pairs around.  But we should then schedule\nC    u loads further from the first use.\n\nC INPUT PARAMETERS\ndefine(`rp',`r32')\ndefine(`up',`r33')\ndefine(`n',`r34')\ndefine(`vp',`r35')\n\ndefine(`srp',`r3')\n\ndefine(`v0',`f6')\ndefine(`v1',`f7')\n\ndefine(`s0',`r14')\ndefine(`acc0',`r15')\n\ndefine(`pr0_0',`r16') define(`pr0_1',`r17')\ndefine(`pr0_2',`r18') define(`pr0_3',`r19')\n\ndefine(`pr1_0',`r20') define(`pr1_1',`r21')\ndefine(`pr1_2',`r22') define(`pr1_3',`r23')\n\ndefine(`acc1_0',`r24') define(`acc1_1',`r25')\ndefine(`acc1_2',`r26') define(`acc1_3',`r27')\n\ndnl define(`',`r28')\ndnl define(`',`r29')\ndnl define(`',`r30')\ndnl define(`',`r31')\n\ndefine(`fp0b_0',`f8') define(`fp0b_1',`f9')\ndefine(`fp0b_2',`f10') define(`fp0b_3',`f11')\n\ndefine(`fp1a_0',`f12') define(`fp1a_1',`f13')\ndefine(`fp1a_2',`f14') define(`fp1a_3',`f15')\n\ndefine(`fp1b_0',`f32') define(`fp1b_1',`f33')\ndefine(`fp1b_2',`f34') define(`fp1b_3',`f35')\n\ndefine(`fp2a_0',`f36') define(`fp2a_1',`f37')\ndefine(`fp2a_2',`f38') define(`fp2a_3',`f39')\n\ndefine(`u_0',`f44') define(`u_1',`f45')\ndefine(`u_2',`f46') define(`u_3',`f47')\n\ndefine(`ux',`f49')\ndefine(`uy',`f51')\n\nASM_START()\nPROLOGUE(mpn_mul_2)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\n\nifdef(`HAVE_ABI_32',\n`\taddp4\t\trp = 0, rp\t\tC\t\t\tM I\n\taddp4\t\tup = 0, up\t\tC\t\t\tM I\n\taddp4\t\tvp = 0, vp\t\tC\t\t\tM I\n\tzxt4\t\tn = n\t\t\tC\t\t\tI\n\t;;')\n\n{.mmi\t\tC 00\n\tldf8\t\tux = [up], 8\t\tC\t\t\tM\n\tldf8\t\tv0 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}{.mmi\n\tcmp.eq\t\tp6, p7 = 1, n\t\tC Test for single limb case\n\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tadd\t\tn = -2, n\t\tC\t\t\tM I\n\t;;\n}{.mmi\t\tC 01\n(p7)\tldf8\t\tuy = [up], 8\t\tC\t\t\tM\n\tldf8\t\tv1 = [vp]\t\tC\t\t\tM\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI\n}{.mmb\n\tcmp.eq\t\tp10, p0 = 1, r14\tC\t\t\tM I\n\tcmp.eq\t\tp11, p0 = 2, r14\tC\t\t\tM I\n(p6)\tbr.dpnt\t\tsingle_limb_case\tC\n\t;;\n}{.mmi\t\tC 02\n\tnop\t\t0\t\t\tC\t\t\tM\n\tcmp.eq\t\tp12, p0 = 3, r14\tC\t\t\tM I\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n}{.bbb\n  (p10) br.dptk\t\t.Lb01\t\t\tC\t\t\tB\n  (p11) br.dptk\t\t.Lb10\t\t\tC\t\t\tB\n  (p12) br.dptk\t\t.Lb11\t\t\tC\t\t\tB\n\t;;\n}\n\n\tALIGN(32)\n.Lb00:\tldf8\t\tu_1 = [up], 8\n\tmov\t\tacc1_2 = 0\n\tmov\t\tpr1_2 = 0\n\tmov\t\tpr0_3 = 0\n\tcmp.ne\t\tp8, p9 = r0, r0\n\t;;\n\txma.l\t\tfp0b_3 = ux, v0, f0\n\tcmp.ne\t\tp12, p13 = r0, r0\n\tldf8\t\tu_2 = [up], 8\n\txma.hu\t\tfp1a_3 = ux, v0, f0\n\tbr.cloop.dptk\t.grt4\n\n\txma.l\t\tfp0b_0 = uy, v0, f0\n\txma.hu\t\tfp1a_0 = uy, v0, f0\n\t;;\n\tgetf.sig\tacc0 = fp0b_3\n\txma.l\t\tfp1b_3 = ux, v1, fp1a_3\n\txma.hu\t\tfp2a_3 = ux, v1, fp1a_3\n\t;;\n\txma.l\t\tfp0b_1 = u_1, v0, f0\n\txma.hu\t\tfp1a_1 = u_1, v0, f0\n\t;;\n\tgetf.sig\tpr0_0 = fp0b_0\n\txma.l\t\tfp1b_0 = uy, v1, fp1a_0\n\txma.hu\t\tfp2a_0 = uy, v1, fp1a_0\n\t;;\n\tgetf.sig\tpr1_3 = fp1b_3\n\tgetf.sig\tacc1_3 = fp2a_3\n\txma.l\t\tfp0b_2 = u_2, v0, f0\n\txma.hu\t\tfp1a_2 = u_2, v0, f0\n\tbr\t\t.Lcj4\n\n.grt4:\txma.l\t\tfp0b_0 = uy, v0, f0\n\txma.hu\t\tfp1a_0 = uy, v0, f0\n\t;;\n\tgetf.sig\tacc0 = fp0b_3\n\txma.l\t\tfp1b_3 = ux, v1, fp1a_3\n\tldf8\t\tu_3 = [up], 8\n\txma.hu\t\tfp2a_3 = ux, v1, fp1a_3\n\t;;\n\txma.l\t\tfp0b_1 = u_1, v0, f0\n\txma.hu\t\tfp1a_1 = u_1, v0, f0\n\t;;\n\tgetf.sig\tpr0_0 = fp0b_0\n\txma.l\t\tfp1b_0 = uy, v1, fp1a_0\n\txma.hu\t\tfp2a_0 = uy, v1, fp1a_0\n\t;;\n\tldf8\t\tu_0 = [up], 8\n\tgetf.sig\tpr1_3 = fp1b_3\n\t;;\n\tgetf.sig\tacc1_3 = fp2a_3\n\txma.l\t\tfp0b_2 = u_2, v0, f0\n\txma.hu\t\tfp1a_2 = u_2, v0, f0\n\tbr\t\t.LL00\n\n\n\tALIGN(32)\n.Lb01:\tldf8\t\tu_0 = [up], 8\t\tC M\n\tmov\t\tacc1_1 = 0\t\tC M I\n\tmov\t\tpr1_1 = 0\t\tC M I\n\tmov\t\tpr0_2 = 0\t\tC M I\n\tcmp.ne\t\tp6, p7 = r0, r0\t\tC M I\n\t;;\n\txma.l\t\tfp0b_2 = ux, v0, f0\tC F\n\tcmp.ne\t\tp10, p11 = r0, r0\tC M I\n\tldf8\t\tu_1 = [up], 8\t\tC M\n\txma.hu\t\tfp1a_2 = ux, v0, f0\tC F\n\t;;\n\txma.l\t\tfp0b_3 = uy, v0, f0\tC F\n\txma.hu\t\tfp1a_3 = uy, v0, f0\tC F\n\t;;\n\tgetf.sig\tacc0 = fp0b_2\t\tC M\n\txma.l\t\tfp1b_2 = ux, v1,fp1a_2\tC F\n\txma.hu\t\tfp2a_2 = ux, v1,fp1a_2\tC F\n\tldf8\t\tu_2 = [up], 8\t\tC M\n\tbr.cloop.dptk\t.grt5\n\n\txma.l\t\tfp0b_0 = u_0, v0, f0\tC F\n\txma.hu\t\tfp1a_0 = u_0, v0, f0\tC F\n\t;;\n\tgetf.sig\tpr0_3 = fp0b_3\t\tC M\n\txma.l\t\tfp1b_3 = uy, v1,fp1a_3\tC F\n\txma.hu\t\tfp2a_3 = uy, v1,fp1a_3\tC F\n\t;;\n\tgetf.sig\tpr1_2 = fp1b_2\t\tC M\n\tgetf.sig\tacc1_2 = fp2a_2\t\tC M\n\txma.l\t\tfp0b_1 = u_1, v0, f0\tC F\n\txma.hu\t\tfp1a_1 = u_1, v0, f0\tC F\n\tbr\t\t.Lcj5\n\n.grt5:\txma.l\t\tfp0b_0 = u_0, v0, f0\n\txma.hu\t\tfp1a_0 = u_0, v0, f0\n\t;;\n\tgetf.sig\tpr0_3 = fp0b_3\n\txma.l\t\tfp1b_3 = uy, v1, fp1a_3\n\txma.hu\t\tfp2a_3 = uy, v1, fp1a_3\n\t;;\n\tldf8\t\tu_3 = [up], 8\n\tgetf.sig\tpr1_2 = fp1b_2\n\t;;\n\tgetf.sig\tacc1_2 = fp2a_2\n\txma.l\t\tfp0b_1 = u_1, v0, f0\n\txma.hu\t\tfp1a_1 = u_1, v0, f0\n\tbr\t\t.LL01\n\n\n\nC\nC This is the n = 1 case.  In general, this routine shouldn't be\nC called for n = 1, but we don't want to create an error if it is.\nC\n\tALIGN(32)\nsingle_limb_case:\n\txma.lu\t\tfp1a_0 = ux, v0, f0\n\txma.hu\t\tfp1a_1 = ux, v0, f0\n\t;;\n\tgetf.sig\tacc1_0 = fp1a_0\n\txma.lu\t\tfp2a_0 = ux, v1, fp1a_1\n\txma.hu\t\tfp2a_1 = ux, v1, fp1a_1\n\t;;\n\tst8\t\t[rp] = acc1_0, 8\n\tgetf.sig\tacc1_1 = fp2a_0\n\tgetf.sig\tr8 = fp2a_1\n\t;;\n\tst8\t\t[rp] = acc1_1, 8\n\tmov\t\tar.lc = r2\n\tbr.ret.sptk.many b0\n\n\n\nC We have two variants for n = 2.  They turn out to run at exactly the same\nC speed.  But the first, odd variant might allow one cycle to be trimmed.\n\tALIGN(32)\nifdef(`',`\n.Lb10:\t\tC 03\n\tbr.cloop.dptk\t.grt2\n\t\tC 04\n\t\tC 05\n\t\tC 06\n\txma.l\t\tfp0b_1 = ux, v0, f0\tC 0\n\txma.hu\t\tfp1a_1 = ux, v0, f0\tC 1\n\t;;\tC 07\n\txma.l\t\tfp0b_2 = uy, v0, f0\tC 1\n\txma.l\t\tfp1b_1 = ux, v1, f0\tC 1\n\t;;\tC 08\n\txma.hu\t\tfp1a_2 = uy, v0, f0\tC 2\n\txma.hu\t\tfp2a_1 = ux, v1, f0\tC 2\n\t;;\tC 09\n\txma.l\t\tfp1b_2 = uy, v1, f0\tC 2\n\txma.hu\t\tfp2a_2 = uy, v1, f0\tC 3\n\t;;\tC 10\n\tgetf.sig\tr16 = fp1a_1\n\tstf8\t\t[rp] = fp0b_1, 8\n\t;;\tC 11\n\tgetf.sig\tr17 = fp0b_2\n\t\tC 12\n\tgetf.sig\tr18 = fp1b_1\n\t\tC 13\n\tgetf.sig\tr19 = fp1a_2\n\t\tC 14\n\tgetf.sig\tr20 = fp2a_1\n\t\tC 15\n\tgetf.sig\tr21 = fp1b_2\n\t;;\tC 16\n\tgetf.sig\tr8 = fp2a_2\n\tadd\t\tr24 = r16, r17\n\t;;\tC 17\n\tcmp.ltu\t\tp6, p7 = r24, r16\n\tadd\t\tr26 = r24, r18\n\t;;\tC 18\n\tcmp.ltu\t\tp8, p9 = r26, r24\n\t;;\tC 19\n\tst8\t\t[rp] = r26, 8\n  (p6)\tadd\t\tr25 = r19, r20, 1\n  (p7)\tadd\t\tr25 = r19, r20\n\t;;\tC 20\n  (p8)\tadd\t\tr27 = r25, r21, 1\n  (p9)\tadd\t\tr27 = r25, r21\n  (p6)\tcmp.leu\t\tp10, p0 = r25, r19\n  (p7)\tcmp.ltu\t\tp10, p0 = r25, r19\n\t;;\tC 21\n  (p10)\tadd\t\tr8 = 1, r8\n  (p8)\tcmp.leu\t\tp12, p0 = r27, r25\n  (p9)\tcmp.ltu\t\tp12, p0 = r27, r25\n\t;;\tC 22\n\tst8\t\t[rp] = r27, 8\n\tmov.i\t\tar.lc = r2\n  (p12)\tadd\t\tr8 = 1, r8\n\tbr.ret.sptk.many b0\n')\n\n.Lb10:\t\tC 03\n\tbr.cloop.dptk\t.grt2\n\t\tC 04\n\t\tC 05\n\t\tC 06\n\txma.l\t\tfp0b_1 = ux, v0, f0\n\txma.hu\t\tfp1a_1 = ux, v0, f0\n\t;;\tC 07\n\txma.l\t\tfp0b_2 = uy, v0, f0\n\txma.hu\t\tfp1a_2 = uy, v0, f0\n\t;;\tC 08\n\t\tC 09\n\t\tC 10\n\tstf8\t\t[rp] = fp0b_1, 8\n\txma.l\t\tfp1b_1 = ux, v1, fp1a_1\n\txma.hu\t\tfp2a_1 = ux, v1, fp1a_1\n\t;;\tC 11\n\tgetf.sig\tacc0 = fp0b_2\n\txma.l\t\tfp1b_2 = uy, v1, fp1a_2\n\txma.hu\t\tfp2a_2 = uy, v1, fp1a_2\n\t;;\tC 12\n\t\tC 13\n\t\tC 14\n\tgetf.sig\tpr1_1 = fp1b_1\n\t\tC 15\n\tgetf.sig\tacc1_1 = fp2a_1\n\t\tC 16\n\tgetf.sig\tpr1_2 = fp1b_2\n\t\tC 17\n\tgetf.sig\tr8 = fp2a_2\n\t;;\tC 18\n\t\tC 19\n\tadd\t\ts0 = pr1_1, acc0\n\t;;\tC 20\n\tst8\t\t[rp] = s0, 8\n\tcmp.ltu\t\tp8, p9 = s0, pr1_1\n\tsub\t\tr31 = -1, acc1_1\n\t;;\tC 21\n\t.pred.rel \"mutex\", p8, p9\n  (p8)\tadd\t\tacc0 = pr1_2, acc1_1, 1\n  (p9)\tadd\t\tacc0 = pr1_2, acc1_1\n  (p8)\tcmp.leu\t\tp10, p0 = r31, pr1_2\n  (p9)\tcmp.ltu\t\tp10, p0 = r31, pr1_2\n\t;;\tC 22\n\tst8\t\t[rp] = acc0, 8\n\tmov.i\t\tar.lc = r2\n  (p10)\tadd\t\tr8 = 1, r8\n\tbr.ret.sptk.many b0\n\n\n.grt2:\tldf8\t\tu_3 = [up], 8\n\tmov\t\tacc1_0 = 0\n\tmov\t\tpr1_0 = 0\n\t;;\n\tmov\t\tpr0_1 = 0\n\txma.l\t\tfp0b_1 = ux, v0, f0\n\tldf8\t\tu_0 = [up], 8\n\txma.hu\t\tfp1a_1 = ux, v0, f0\n\t;;\n\txma.l\t\tfp0b_2 = uy, v0, f0\n\txma.hu\t\tfp1a_2 = uy, v0, f0\n\t;;\n\tgetf.sig\tacc0 = fp0b_1\n\txma.l\t\tfp1b_1 = ux, v1, fp1a_1\n\txma.hu\t\tfp2a_1 = ux, v1, fp1a_1\n\t;;\n\tldf8\t\tu_1 = [up], 8\n\txma.l\t\tfp0b_3 = u_3, v0, f0\n\txma.hu\t\tfp1a_3 = u_3, v0, f0\n\t;;\n\tgetf.sig\tpr0_2 = fp0b_2\n\txma.l\t\tfp1b_2 = uy, v1, fp1a_2\n\txma.hu\t\tfp2a_2 = uy, v1, fp1a_2\n\t;;\n\tldf8\t\tu_2 = [up], 8\n\tgetf.sig\tpr1_1 = fp1b_1\n\t;;\n\tgetf.sig\tacc1_1 = fp2a_1\n\txma.l\t\tfp0b_0 = u_0, v0, f0\n\tcmp.ne\t\tp8, p9 = r0, r0\n\tcmp.ne\t\tp12, p13 = r0, r0\n\txma.hu\t\tfp1a_0 = u_0, v0, f0\n\tbr\t\t.LL10\n\n\n\tALIGN(32)\n.Lb11:\tmov\t\tacc1_3 = 0\n\tmov\t\tpr1_3 = 0\n\tmov\t\tpr0_0 = 0\n\tcmp.ne\t\tp6, p7 = r0, r0\n\t;;\n\tldf8\t\tu_2 = [up], 8\n\tbr.cloop.dptk\t.grt3\n\t;;\n\txma.l\t\tfp0b_0 = ux, v0, f0\n\txma.hu\t\tfp1a_0 = ux, v0, f0\n\t;;\n\tcmp.ne\t\tp10, p11 = r0, r0\n\txma.l\t\tfp0b_1 = uy, v0, f0\n\txma.hu\t\tfp1a_1 = uy, v0, f0\n\t;;\n\tgetf.sig\tacc0 = fp0b_0\n\txma.l\t\tfp1b_0 = ux, v1, fp1a_0\n\txma.hu\t\tfp2a_0 = ux, v1, fp1a_0\n\t;;\n\txma.l\t\tfp0b_2 = u_2, v0, f0\n\txma.hu\t\tfp1a_2 = u_2, v0, f0\n\t;;\n\tgetf.sig\tpr0_1 = fp0b_1\n\txma.l\t\tfp1b_1 = uy, v1, fp1a_1\n\txma.hu\t\tfp2a_1 = uy, v1, fp1a_1\n\t;;\n\tgetf.sig\tpr1_0 = fp1b_0\n\tgetf.sig\tacc1_0 = fp2a_0\n\tbr\t\t.Lcj3\n\n.grt3:\txma.l\t\tfp0b_0 = ux, v0, f0\n\tcmp.ne\t\tp10, p11 = r0, r0\n\tldf8\t\tu_3 = [up], 8\n\txma.hu\t\tfp1a_0 = ux, v0, f0\n\t;;\n\txma.l\t\tfp0b_1 = uy, v0, f0\n\txma.hu\t\tfp1a_1 = uy, v0, f0\n\t;;\n\tgetf.sig\tacc0 = fp0b_0\n\txma.l\t\tfp1b_0 = ux, v1, fp1a_0\n\tldf8\t\tu_0 = [up], 8\n\txma.hu\t\tfp2a_0 = ux, v1, fp1a_0\n\t;;\n\txma.l\t\tfp0b_2 = u_2, v0, f0\n\txma.hu\t\tfp1a_2 = u_2, v0, f0\n\t;;\n\tgetf.sig\tpr0_1 = fp0b_1\n\txma.l\t\tfp1b_1 = uy, v1, fp1a_1\n\txma.hu\t\tfp2a_1 = uy, v1, fp1a_1\n\t;;\n\tldf8\t\tu_1 = [up], 8\n\tgetf.sig\tpr1_0 = fp1b_0\n\t;;\n\tgetf.sig\tacc1_0 = fp2a_0\n\txma.l\t\tfp0b_3 = u_3, v0, f0\n\txma.hu\t\tfp1a_3 = u_3, v0, f0\n\tbr\t\t.LL11\n\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\t\t\t\t\t\tC 00\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr0_3 = fp0b_3\n\txma.l\t\tfp1b_3 = u_3, v1, fp1a_3\n  (p12)\tadd\t\ts0 = pr1_0, acc0, 1\n  (p13)\tadd\t\ts0 = pr1_0, acc0\n\txma.hu\t\tfp2a_3 = u_3, v1, fp1a_3\n\t;;\t\t\t\t\tC 01\n\t.pred.rel \"mutex\", p8, p9\n\t.pred.rel \"mutex\", p12, p13\n\tldf8\t\tu_3 = [up], 8\n\tgetf.sig\tpr1_2 = fp1b_2\n  (p8)\tcmp.leu\t\tp6, p7 = acc0, pr0_1\n  (p9)\tcmp.ltu\t\tp6, p7 = acc0, pr0_1\n  (p12)\tcmp.leu\t\tp10, p11 = s0, pr1_0\n  (p13)\tcmp.ltu\t\tp10, p11 = s0, pr1_0\n\t;;\t\t\t\t\tC 02\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tacc1_2 = fp2a_2\n\tst8\t\t[rp] = s0, 8\n\txma.l\t\tfp0b_1 = u_1, v0, f0\n  (p6)\tadd\t\tacc0 = pr0_2, acc1_0, 1\n  (p7)\tadd\t\tacc0 = pr0_2, acc1_0\n\txma.hu\t\tfp1a_1 = u_1, v0, f0\n\t;;\t\t\t\t\tC 03\n.LL01:\n\t.pred.rel \"mutex\", p10, p11\n\tgetf.sig\tpr0_0 = fp0b_0\n\txma.l\t\tfp1b_0 = u_0, v1, fp1a_0\n  (p10)\tadd\t\ts0 = pr1_1, acc0, 1\n  (p11)\tadd\t\ts0 = pr1_1, acc0\n\txma.hu\t\tfp2a_0 = u_0, v1, fp1a_0\n\t;;\t\t\t\t\tC 04\n\t.pred.rel \"mutex\", p6, p7\n\t.pred.rel \"mutex\", p10, p11\n\tldf8\t\tu_0 = [up], 8\n\tgetf.sig\tpr1_3 = fp1b_3\n  (p6)\tcmp.leu\t\tp8, p9 = acc0, pr0_2\n  (p7)\tcmp.ltu\t\tp8, p9 = acc0, pr0_2\n  (p10)\tcmp.leu\t\tp12, p13 = s0, pr1_1\n  (p11)\tcmp.ltu\t\tp12, p13 = s0, pr1_1\n\t;;\t\t\t\t\tC 05\n\t.pred.rel \"mutex\", p8, p9\n\tgetf.sig\tacc1_3 = fp2a_3\n\tst8\t\t[rp] = s0, 8\n\txma.l\t\tfp0b_2 = u_2, v0, f0\n  (p8)\tadd\t\tacc0 = pr0_3, acc1_1, 1\n  (p9)\tadd\t\tacc0 = pr0_3, acc1_1\n\txma.hu\t\tfp1a_2 = u_2, v0, f0\n\t;;\t\t\t\t\tC 06\n.LL00:\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr0_1 = fp0b_1\n\txma.l\t\tfp1b_1 = u_1, v1, fp1a_1\n  (p12)\tadd\t\ts0 = pr1_2, acc0, 1\n  (p13)\tadd\t\ts0 = pr1_2, acc0\n\txma.hu\t\tfp2a_1 = u_1, v1, fp1a_1\n\t;;\t\t\t\t\tC 07\n\t.pred.rel \"mutex\", p8, p9\n\t.pred.rel \"mutex\", p12, p13\n\tldf8\t\tu_1 = [up], 8\n\tgetf.sig\tpr1_0 = fp1b_0\n  (p8)\tcmp.leu\t\tp6, p7 = acc0, pr0_3\n  (p9)\tcmp.ltu\t\tp6, p7 = acc0, pr0_3\n  (p12)\tcmp.leu\t\tp10, p11 = s0, pr1_2\n  (p13)\tcmp.ltu\t\tp10, p11 = s0, pr1_2\n\t;;\t\t\t\t\tC 08\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tacc1_0 = fp2a_0\n\tst8\t\t[rp] = s0, 8\n\txma.l\t\tfp0b_3 = u_3, v0, f0\n  (p6)\tadd\t\tacc0 = pr0_0, acc1_2, 1\n  (p7)\tadd\t\tacc0 = pr0_0, acc1_2\n\txma.hu\t\tfp1a_3 = u_3, v0, f0\n\t;;\t\t\t\t\tC 09\n.LL11:\n\t.pred.rel \"mutex\", p10, p11\n\tgetf.sig\tpr0_2 = fp0b_2\n\txma.l\t\tfp1b_2 = u_2, v1, fp1a_2\n  (p10)\tadd\t\ts0 = pr1_3, acc0, 1\n  (p11)\tadd\t\ts0 = pr1_3, acc0\n\txma.hu\t\tfp2a_2 = u_2, v1, fp1a_2\n\t;;\t\t\t\t\tC 10\n\t.pred.rel \"mutex\", p6, p7\n\t.pred.rel \"mutex\", p10, p11\n\tldf8\t\tu_2 = [up], 8\n\tgetf.sig\tpr1_1 = fp1b_1\n  (p6)\tcmp.leu\t\tp8, p9 = acc0, pr0_0\n  (p7)\tcmp.ltu\t\tp8, p9 = acc0, pr0_0\n  (p10)\tcmp.leu\t\tp12, p13 = s0, pr1_3\n  (p11)\tcmp.ltu\t\tp12, p13 = s0, pr1_3\n\t;;\t\t\t\t\tC 11\n\t.pred.rel \"mutex\", p8, p9\n\tgetf.sig\tacc1_1 = fp2a_1\n\tst8\t\t[rp] = s0, 8\n\txma.l\t\tfp0b_0 = u_0, v0, f0\n  (p8)\tadd\t\tacc0 = pr0_1, acc1_3, 1\n  (p9)\tadd\t\tacc0 = pr0_1, acc1_3\n\txma.hu\t\tfp1a_0 = u_0, v0, f0\n.LL10:\tbr.cloop.dptk\t.Loop\t\t\tC 12\n\t;;\nC *** MAIN LOOP END ***\n\n.Lcj6:\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr0_3 = fp0b_3\n\txma.l\t\tfp1b_3 = u_3, v1, fp1a_3\n  (p12)\tadd\t\ts0 = pr1_0, acc0, 1\n  (p13)\tadd\t\ts0 = pr1_0, acc0\n\txma.hu\t\tfp2a_3 = u_3, v1, fp1a_3\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr1_2 = fp1b_2\n  (p8)\tcmp.leu\t\tp6, p7 = acc0, pr0_1\n  (p9)\tcmp.ltu\t\tp6, p7 = acc0, pr0_1\n  (p12)\tcmp.leu\t\tp10, p11 = s0, pr1_0\n  (p13)\tcmp.ltu\t\tp10, p11 = s0, pr1_0\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tacc1_2 = fp2a_2\n\tst8\t\t[rp] = s0, 8\n\txma.l\t\tfp0b_1 = u_1, v0, f0\n  (p6)\tadd\t\tacc0 = pr0_2, acc1_0, 1\n  (p7)\tadd\t\tacc0 = pr0_2, acc1_0\n\txma.hu\t\tfp1a_1 = u_1, v0, f0\n\t;;\n.Lcj5:\n\t.pred.rel \"mutex\", p10, p11\n\tgetf.sig\tpr0_0 = fp0b_0\n\txma.l\t\tfp1b_0 = u_0, v1, fp1a_0\n  (p10)\tadd\t\ts0 = pr1_1, acc0, 1\n  (p11)\tadd\t\ts0 = pr1_1, acc0\n\txma.hu\t\tfp2a_0 = u_0, v1, fp1a_0\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\t.pred.rel \"mutex\", p10, p11\n\tgetf.sig\tpr1_3 = fp1b_3\n  (p6)\tcmp.leu\t\tp8, p9 = acc0, pr0_2\n  (p7)\tcmp.ltu\t\tp8, p9 = acc0, pr0_2\n  (p10)\tcmp.leu\t\tp12, p13 = s0, pr1_1\n  (p11)\tcmp.ltu\t\tp12, p13 = s0, pr1_1\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\tgetf.sig\tacc1_3 = fp2a_3\n\tst8\t\t[rp] = s0, 8\n\txma.l\t\tfp0b_2 = u_2, v0, f0\n  (p8)\tadd\t\tacc0 = pr0_3, acc1_1, 1\n  (p9)\tadd\t\tacc0 = pr0_3, acc1_1\n\txma.hu\t\tfp1a_2 = u_2, v0, f0\n\t;;\n.Lcj4:\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr0_1 = fp0b_1\n\txma.l\t\tfp1b_1 = u_1, v1, fp1a_1\n  (p12)\tadd\t\ts0 = pr1_2, acc0, 1\n  (p13)\tadd\t\ts0 = pr1_2, acc0\n\txma.hu\t\tfp2a_1 = u_1, v1, fp1a_1\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr1_0 = fp1b_0\n  (p8)\tcmp.leu\t\tp6, p7 = acc0, pr0_3\n  (p9)\tcmp.ltu\t\tp6, p7 = acc0, pr0_3\n  (p12)\tcmp.leu\t\tp10, p11 = s0, pr1_2\n  (p13)\tcmp.ltu\t\tp10, p11 = s0, pr1_2\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tacc1_0 = fp2a_0\n\tst8\t\t[rp] = s0, 8\n  (p6)\tadd\t\tacc0 = pr0_0, acc1_2, 1\n  (p7)\tadd\t\tacc0 = pr0_0, acc1_2\n\t;;\n.Lcj3:\n\t.pred.rel \"mutex\", p10, p11\n\tgetf.sig\tpr0_2 = fp0b_2\n\txma.l\t\tfp1b_2 = u_2, v1, fp1a_2\n  (p10)\tadd\t\ts0 = pr1_3, acc0, 1\n  (p11)\tadd\t\ts0 = pr1_3, acc0\n\txma.hu\t\tfp2a_2 = u_2, v1, fp1a_2\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\t.pred.rel \"mutex\", p10, p11\n\tgetf.sig\tpr1_1 = fp1b_1\n  (p6)\tcmp.leu\t\tp8, p9 = acc0, pr0_0\n  (p7)\tcmp.ltu\t\tp8, p9 = acc0, pr0_0\n  (p10)\tcmp.leu\t\tp12, p13 = s0, pr1_3\n  (p11)\tcmp.ltu\t\tp12, p13 = s0, pr1_3\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\tgetf.sig\tacc1_1 = fp2a_1\n\tst8\t\t[rp] = s0, 8\n  (p8)\tadd\t\tacc0 = pr0_1, acc1_3, 1\n  (p9)\tadd\t\tacc0 = pr0_1, acc1_3\n\t;;\n\t.pred.rel \"mutex\", p12, p13\n  (p12)\tadd\t\ts0 = pr1_0, acc0, 1\n  (p13)\tadd\t\ts0 = pr1_0, acc0\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\t.pred.rel \"mutex\", p12, p13\n\tgetf.sig\tpr1_2 = fp1b_2\n  (p8)\tcmp.leu\t\tp6, p7 = acc0, pr0_1\n  (p9)\tcmp.ltu\t\tp6, p7 = acc0, pr0_1\n  (p12)\tcmp.leu\t\tp10, p11 = s0, pr1_0\n  (p13)\tcmp.ltu\t\tp10, p11 = s0, pr1_0\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\tgetf.sig\tacc1_2 = fp2a_2\n\tst8\t\t[rp] = s0, 8\n  (p6)\tadd\t\tacc0 = pr0_2, acc1_0, 1\n  (p7)\tadd\t\tacc0 = pr0_2, acc1_0\n\t;;\n\t.pred.rel \"mutex\", p10, p11\n  (p10)\tadd\t\ts0 = pr1_1, acc0, 1\n  (p11)\tadd\t\ts0 = pr1_1, acc0\n\t;;\n\t.pred.rel \"mutex\", p6, p7\n\t.pred.rel \"mutex\", p10, p11\n  (p6)\tcmp.leu\t\tp8, p9 = acc0, pr0_2\n  (p7)\tcmp.ltu\t\tp8, p9 = acc0, pr0_2\n  (p10)\tcmp.leu\t\tp12, p13 = s0, pr1_1\n  (p11)\tcmp.ltu\t\tp12, p13 = s0, pr1_1\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n\tst8\t\t[rp] = s0, 8\n  (p8)\tadd\t\tacc0 = pr1_2, acc1_1, 1\n  (p9)\tadd\t\tacc0 = pr1_2, acc1_1\n\t;;\n\t.pred.rel \"mutex\", p8, p9\n  (p8)\tcmp.leu\t\tp10, p11 = acc0, pr1_2\n  (p9)\tcmp.ltu\t\tp10, p11 = acc0, pr1_2\n  (p12)\tadd\t\tacc0 = 1, acc0\n\t;;\n\tst8\t\t[rp] = acc0, 8\n  (p12)\tcmp.eq.or\tp10, p0 = 0, acc0\n\tmov\t\tr8 = acc1_2\n\t;;\n\t.pred.rel \"mutex\", p10, p11\n  (p10)\tadd\t\tr8 = 1, r8\n\tmov.i\t\tar.lc = r2\n\tbr.ret.sptk.many b0\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/nand_n.asm",
    "content": "dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,\ndnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      2\nC Itanium 2:    1\n\nC TODO\nC  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in\nC    wind-down code).\n\nC INPUT PARAMETERS\ndefine(`rp', `r32')\ndefine(`up', `r33')\ndefine(`vp', `r34')\ndefine(`n', `r35')\n\ndefine(`OPERATION_nand_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',`mpn_and_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',`mpn_andn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',`mpn_nand_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',`mpn_ior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',`mpn_iorn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $3, $2')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',`mpn_nior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',`mpn_xor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\trp = 0, rp\t\t\tC\t\t\tM I\n\taddp4\tup = 0, up\t\t\tC\t\t\tM I\n\taddp4\tvp = 0, vp\t\t\tC\t\t\tM I\n\tzxt4\tn = n\t\t\t\tC\t\t\tI\n\t;;\n')\n{.mmi\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}\n{.mmi\n\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p14 = 4, n\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n}\n{.mmi\n\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}\n{.bbb\n   (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n   (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n   (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n.Lb00:\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -2, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt4\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n.Lb01:\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt1\t\t\tC\t\t\tB\n\t;;\n\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj1\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.grt5\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL01\t\t\tC\t\t\tB\n\n.Lb10:\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt2\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lcj6\t\t\tC\t\t\tB\n\n.Lb11:\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt3\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL01:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL00:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL11:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t;;\t\tC\t\t\tB\nC *** MAIN LOOP END ***\n\n.Lcj6:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj3:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj2:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj1:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/nior_n.asm",
    "content": "dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,\ndnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      2\nC Itanium 2:    1\n\nC TODO\nC  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in\nC    wind-down code).\n\nC INPUT PARAMETERS\ndefine(`rp', `r32')\ndefine(`up', `r33')\ndefine(`vp', `r34')\ndefine(`n', `r35')\n\ndefine(`OPERATION_nior_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',`mpn_and_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',`mpn_andn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',`mpn_nand_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',`mpn_ior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',`mpn_iorn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $3, $2')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',`mpn_nior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',`mpn_xor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\trp = 0, rp\t\t\tC\t\t\tM I\n\taddp4\tup = 0, up\t\t\tC\t\t\tM I\n\taddp4\tvp = 0, vp\t\t\tC\t\t\tM I\n\tzxt4\tn = n\t\t\t\tC\t\t\tI\n\t;;\n')\n{.mmi\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}\n{.mmi\n\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p14 = 4, n\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n}\n{.mmi\n\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}\n{.bbb\n   (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n   (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n   (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n.Lb00:\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -2, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt4\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n.Lb01:\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt1\t\t\tC\t\t\tB\n\t;;\n\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj1\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.grt5\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL01\t\t\tC\t\t\tB\n\n.Lb10:\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt2\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lcj6\t\t\tC\t\t\tB\n\n.Lb11:\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt3\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL01:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL00:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL11:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t;;\t\tC\t\t\tB\nC *** MAIN LOOP END ***\n\n.Lcj6:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj3:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj2:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj1:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/popcount.asm",
    "content": "dnl  IA-64 mpn_popcount -- mpn population count.\n\ndnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,\ndnl  Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:       1.5\nC Itanium 2:     1\n\nC INPUT PARAMETERS\ndefine(`up', `r32')\ndefine(`n', `r33')\n\ndefine(`u0',`r16') define(`u1',`r17') define(`u2',`r18') define(`u3',`r19')\ndefine(`c0',`r28') define(`c1',`r29') define(`c2',`r30') define(`c3',`r31')\ndefine(`s',`r8')\n\n\nASM_START()\nPROLOGUE(mpn_popcount)\n\t.prologue\nifdef(`HAVE_ABI_32',\n`\taddp4\t\tup = 0, up\t\tC\t\t\tM I\n\tzxt4\t\tn = n\t\t\tC\t\t\tI\n\t;;\n')\n\n {.mmi;\tadd\t\tr9 = 512, up\t\tC prefetch pointer\tM I\n\tld8\t\tr10 = [up], 8\t\tC load first limb\tM01\n\tmov.i\t\tr2 = ar.lc\t\tC save ar.lc\t\tI0\n}{.mmi;\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p14 = 4, n\t\tC small count?\t\tM I\n\tadd\t\tn = -5, n\t\tC\t\t\tM I\n\t;;\n}{.mmi;\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}{.bbb\n  (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n  (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n  (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n\n.Lb00:\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\tmov\t\ts = 0\t\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc0 = r10\t\tC\t\t\tI0\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc1 = u1\t\t\tC\t\t\tI0\n  (p15)\tbr.cond.dptk\t.grt4\t\t\tC\t\t\tB\n\t;;\n\tnop.m\t0\t\t\t\tC\t\t\t-\n\tnop.m\t0\t\t\t\tC\t\t\t-\n\tpopcnt\t\tc2 = u2\t\t\tC\t\t\tI0\n\t;;\n\tmov\t\ts = c0\t\t\tC\t\t\tM I\n\tpopcnt\t\tc3 = u3\t\t\tC\t\t\tI0\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc2 = u2\t\t\tC\t\t\tI0\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n\n.Lb01:\n\tpopcnt\t\ts = r10\t\t\tC\t\t\tI0\n  (p14)\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc0 = u0\t\t\tC\t\t\tI0\n\tmov\t\tc3 = 0\t\t\tC\t\t\tI0\n\n\t;;\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc1 = u1\t\t\tC\t\t\tI0\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lend\t\t\tC\t\t\tB\n\n\n.Lb10:\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n  (p15)\tbr.cond.dptk\t.grt2\t\t\tC\t\t\tB\n\n\tpopcnt\t\ts = r10\t\t\tC\t\t\tI0\n\t;;\n\tpopcnt\t\tc3 = u3\t\t\tC\t\t\tI0\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tpopcnt\t\tc2 = r10\t\tC\t\t\tI0\n\t;;\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc3 = u3\t\t\tC\t\t\tI0\n\tmov\t\ts = 0\t\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc0 = u0\t\t\tC\t\t\tI0\n\tbr\t\t.LL10\t\t\tC\t\t\tB\n\n\n.Lb11:\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\tmov\t\ts = 0\t\t\tC\t\t\tM I\n\t;;\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\ts = r10\t\t\tC\t\t\tI0\n  (p15)\tbr.cond.dptk\t.grt3\t\t\tC\t\t\tB\n\n\tpopcnt\t\tc2 = u2\t\t\tC\t\t\tI0\n\t;;\n\tpopcnt\t\tc3 = u3\t\t\tC\t\t\tI0\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc2 = u2\t\t\tC\t\t\tI0\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tmov\t\tc1 = 0\n\t;;\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc3 = u3\t\t\tC\t\t\tI0\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\n\n.Loop:\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc2 = u2\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c3\t\tC\t\t\tM I\n\t;;\n.LL00:\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc3 = u3\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c0\t\tC\t\t\tM I\n\t;;\n.LL11:\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc0 = u0\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c1\t\tC\t\t\tM I\n\t;;\n.LL10:\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tpopcnt\t\tc1 = u1\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c2\t\tC\t\t\tM I\n\tlfetch\t\t[r9], 32\t\tC\t\t\tM01\n\tnop.m\t\t0\t\t\tC\t\t\t-\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\t;;\n\n.Lend:\tpopcnt\t\tc2 = u2\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c3\t\tC\t\t\tM I\n\t;;\n\tpopcnt\t\tc3 = u3\t\t\tC\t\t\tI0\n\tadd\t\ts = s, c0\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tadd\t\ts = s, c1\t\tC\t\t\tM I\n\t;;\n.Lcj3:\tadd\t\ts = s, c2\t\tC\t\t\tM I\n\t;;\n.Lcj2:\tadd\t\ts = s, c3\t\tC\t\t\tM I\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/rsh1add_n.asm",
    "content": "dnl  IA-64 mpn_rsh1add_n/mpn_rsh1sub_n -- rp[] = (up[] +- vp[]) >> 1.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC         cycles/limb\nC Itanium:    2.5\nC Itanium 2:  1.5\n\nC TODO\nC  * Rewrite function entry code using aorslsh1_n.asm style.\nC  * Micro-optimize feed-in and wind-down code.\n\nC INPUT PARAMETERS\ndefine(`rp',`r32')\ndefine(`up',`r33')\ndefine(`vp',`r34')\ndefine(`n',`r35')\n\ndefine(`OPERATION_rsh1add_n',1)\n\nifdef(`OPERATION_rsh1add_n',`\n  define(ADDSUB,       add)\n  define(PRED,\t       ltu)\n  define(INCR,\t       1)\n  define(LIM,\t       -1)\n  define(func, mpn_rsh1add_n)\n')\nifdef(`OPERATION_rsh1sub_n',`\n  define(ADDSUB,       sub)\n  define(PRED,\t       gtu)\n  define(INCR,\t       -1)\n  define(LIM,\t       0)\n  define(func, mpn_rsh1sub_n)\n')\n\nC Some useful aliases for registers we use\ndefine(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')\ndefine(`v0',`r18') define(`v1',`r19') define(`v2',`r20') define(`v3',`r21')\ndefine(`w0',`r22') define(`w1',`r23') define(`w2',`r24') define(`w3',`r25')\ndefine(`x0',`r26') define(`x1',`r9') define(`x2',`r30') define(`x3',`r31')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',`\n\taddp4\t\trp = 0, rp\t\tC\t\t\tM I\n\taddp4\t\tup = 0, up\t\tC\t\t\tM I\n\taddp4\t\tvp = 0, vp\t\tC\t\t\tM I\n\tzxt4\t\tn = n\t\t\tC\t\t\tI\n\t;;\n')\n {.mmi;\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}{.mmi;\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p0 = 4, n\t\tC\t\t\tM I\n\tadd\t\tn = -4, n\t\tC\t\t\tM I\n\t;;\n}{.mmi;\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}{.bbb\n  (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n  (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n  (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n.Lb00:\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n  (p15)\tbr.dpnt\t\t.grt4\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp7, p0 = w3, r10\tC\t\t\tM I\n\tand\t\tr8 = 1, w3\t\tC\t\t\tM I\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp8, p0 = w0, u0\t\tC\t\t\tM I\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w0\tC\t\t\tM I\n   (p7)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\t;;\n\tshrp\t\tx3 = w0, w3, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w1\tC\t\t\tM I\n   (p8)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w3, r10\tC\t\t\tM I\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tand\t\tr8 = 1, w3\t\tC\t\t\tM I\n\t;;\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tn = -1, n\n\t;;\n\tcmp.PRED\tp8, p0 = w0, u0\t\tC\t\t\tM I\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w0\tC\t\t\tM I\n   (p7)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\t;;\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tshrp\t\tx3 = w0, w3, 1\t\tC\t\t\tI0\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w1\tC\t\t\tM I\n   (p8)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n\n.Lb01:\tADDSUB\t\tw2 = r10, r11\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n  (p15)\tbr.dpnt\t\t.grt1\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp6, p7 = w2, r10\tC\t\t\tM I\n\tshr.u\t\tx2 = w2, 1\t\tC\t\t\tI0\n\tand\t\tr8 = 1, w2\t\tC\t\t\tM I\n\t;;\n   (p6)\tdep\t\tx2 = -1, x2, 63, 1\tC\t\t\tI0\n\tbr\t\t.Lcj1\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC FIXME swap with next\tI0\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w2, r10\tC\t\t\tM I\n\tand\t\tr8 = 1, w2\t\tC\t\t\tM I\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.grt5\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n\t;;\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp8, p0 = w0, u0\t\tC\t\t\tM I\n\tshrp\t\tx2 = w3, w2, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w0\tC\t\t\tM I\n   (p7)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp8, p0 = w0, u0\t\tC\t\t\tM I\n\tshrp\t\tx2 = w3, w2, 1\t\tC\t\t\tI0\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w0\tC\t\t\tM I\n   (p7)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tbr\t\t.LL01\t\t\tC\t\t\tB\n\n\n.Lb10:\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\tADDSUB\t\tw1 = r10, r11\t\tC\t\t\tM I\n  (p15)\tbr.dpnt\t\t.grt2\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp9, p0 = w1, r10\tC\t\t\tM I\n\tand\t\tr8 = 1, w1\t\tC\t\t\tM I\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\t;;\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n\tshrp\t\tx1 = w2, w1, 1\t\tC\t\t\tI0\n\tshr.u\t\tx2 = w2, 1\t\tC\t\t\tI0\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w1, r10\tC\t\t\tM I\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tand\t\tr8 = 1, w1\t\tC\t\t\tM I\n\t;;\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\t;;\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.grt6\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n\tshrp\t\tx1 = w2, w1, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tbr\t\t.Lcj6\t\t\tC\t\t\tB\n\n.grt6:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n\tshrp\t\tx1 = w2, w1, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tbr\t\t.LL10\t\t\tC\t\t\tB\n\n\n.Lb11:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw0 = r10, r11\t\tC\t\t\tM I\n  (p15)\tbr.dpnt\t\t.grt3\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp8, p0 = w0, r10\tC\t\t\tM I\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\tand\t\tr8 = 1, w0\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n\t;;\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w1\tC\t\t\tM I\n   (p8)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tshrp\t\tx0 = w1, w0, 1\t\tC\t\t\tI0\n\t;;\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tcmp.PRED\tp8, p0 = w0, r10\tC\t\t\tM I\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\tand\t\tr8 = 1, w0\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w1\tC\t\t\tM I\n   (p8)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tshrp\t\tx0 = w1, w0, 1\t\tC\t\t\tI0\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.grt7\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\tbr\t\t.Lcj7\t\t\tC\t\t\tB\n\n.grt7:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tst8\t\t[rp] = x3, 8\t\tC\t\t\tM23\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n.LL11:\tst8\t\t[rp] = x0, 8\t\tC\t\t\tM23\n\tshrp\t\tx1 = w2, w1, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\t;;\n.LL10:\tcmp.PRED\tp8, p0 = w0, u0\t\tC\t\t\tM I\n\tshrp\t\tx2 = w3, w2, 1\t\tC\t\t\tI0\n\tnop.b\t\t0\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\tnop.b\t\t0\n\t;;\n\tst8\t\t[rp] = x1, 8\t\tC\t\t\tM23\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w0\tC\t\t\tM I\n   (p7)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\t;;\n.LL01:\tst8\t\t[rp] = x2, 8\t\tC\t\t\tM23\n\tshrp\t\tx3 = w0, w3, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w1\tC\t\t\tM I\n   (p8)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\t;;\n.LL00:\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tshrp\t\tx0 = w1, w0, 1\t\tC\t\t\tI0\n\tnop.b\t\t0\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\t;;\nC *** MAIN LOOP END ***\n\n.Lskip:\tst8\t\t[rp] = x3, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n.Lcj7:\tst8\t\t[rp] = x0, 8\t\tC\t\t\tM23\n\tshrp\t\tx1 = w2, w1, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\t;;\n.Lcj6:\tcmp.PRED\tp8, p0 = w0, u0\t\tC\t\t\tM I\n\tshrp\t\tx2 = w3, w2, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = x1, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w0\tC\t\t\tM I\n   (p7)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tst8\t\t[rp] = x2, 8\t\tC\t\t\tM23\n\tshrp\t\tx3 = w0, w3, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w1\tC\t\t\tM I\n   (p8)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tshrp\t\tx0 = w1, w0, 1\t\tC\t\t\tI0\n\t;;\n\tst8\t\t[rp] = x3, 8\t\tC\t\t\tM23\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n.Lcj3:\tst8\t\t[rp] = x0, 8\t\tC\t\t\tM23\n\tshrp\t\tx1 = w2, w1, 1\t\tC\t\t\tI0\n\tshr.u\t\tx2 = w2, 1\t\tC\t\t\tI0\n\t;;\n.Lcj2:\tst8\t\t[rp] = x1, 8\t\tC\t\t\tM23\n   (p6)\tdep\t\tx2 = -1, x2, 63, 1\tC\t\t\tI0\n\t;;\n.Lcj1:\tst8\t\t[rp] = x2\t\tC\t\t\tM23\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\n"
  },
  {
    "path": "mpn/ia64/rsh1sub_n.asm",
    "content": "dnl  IA-64 mpn_rsh1add_n/mpn_rsh1sub_n -- rp[] = (up[] +- vp[]) >> 1.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC         cycles/limb\nC Itanium:    2.5\nC Itanium 2:  1.5\n\nC TODO\nC  * Rewrite function entry code using aorslsh1_n.asm style.\nC  * Micro-optimize feed-in and wind-down code.\n\nC INPUT PARAMETERS\ndefine(`rp',`r32')\ndefine(`up',`r33')\ndefine(`vp',`r34')\ndefine(`n',`r35')\n\ndefine(`OPERATION_rsh1sub_n',1)\n\nifdef(`OPERATION_rsh1add_n',`\n  define(ADDSUB,       add)\n  define(PRED,\t       ltu)\n  define(INCR,\t       1)\n  define(LIM,\t       -1)\n  define(func, mpn_rsh1add_n)\n')\nifdef(`OPERATION_rsh1sub_n',`\n  define(ADDSUB,       sub)\n  define(PRED,\t       gtu)\n  define(INCR,\t       -1)\n  define(LIM,\t       0)\n  define(func, mpn_rsh1sub_n)\n')\n\nC Some useful aliases for registers we use\ndefine(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')\ndefine(`v0',`r18') define(`v1',`r19') define(`v2',`r20') define(`v3',`r21')\ndefine(`w0',`r22') define(`w1',`r23') define(`w2',`r24') define(`w3',`r25')\ndefine(`x0',`r26') define(`x1',`r9') define(`x2',`r30') define(`x3',`r31')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',`\n\taddp4\t\trp = 0, rp\t\tC\t\t\tM I\n\taddp4\t\tup = 0, up\t\tC\t\t\tM I\n\taddp4\t\tvp = 0, vp\t\tC\t\t\tM I\n\tzxt4\t\tn = n\t\t\tC\t\t\tI\n\t;;\n')\n {.mmi;\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}{.mmi;\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p0 = 4, n\t\tC\t\t\tM I\n\tadd\t\tn = -4, n\t\tC\t\t\tM I\n\t;;\n}{.mmi;\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}{.bbb\n  (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n  (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n  (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n.Lb00:\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n  (p15)\tbr.dpnt\t\t.grt4\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp7, p0 = w3, r10\tC\t\t\tM I\n\tand\t\tr8 = 1, w3\t\tC\t\t\tM I\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp8, p0 = w0, u0\t\tC\t\t\tM I\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w0\tC\t\t\tM I\n   (p7)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\t;;\n\tshrp\t\tx3 = w0, w3, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w1\tC\t\t\tM I\n   (p8)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w3, r10\tC\t\t\tM I\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tand\t\tr8 = 1, w3\t\tC\t\t\tM I\n\t;;\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tn = -1, n\n\t;;\n\tcmp.PRED\tp8, p0 = w0, u0\t\tC\t\t\tM I\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w0\tC\t\t\tM I\n   (p7)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\t;;\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tshrp\t\tx3 = w0, w3, 1\t\tC\t\t\tI0\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w1\tC\t\t\tM I\n   (p8)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n\n.Lb01:\tADDSUB\t\tw2 = r10, r11\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n  (p15)\tbr.dpnt\t\t.grt1\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp6, p7 = w2, r10\tC\t\t\tM I\n\tshr.u\t\tx2 = w2, 1\t\tC\t\t\tI0\n\tand\t\tr8 = 1, w2\t\tC\t\t\tM I\n\t;;\n   (p6)\tdep\t\tx2 = -1, x2, 63, 1\tC\t\t\tI0\n\tbr\t\t.Lcj1\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC FIXME swap with next\tI0\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w2, r10\tC\t\t\tM I\n\tand\t\tr8 = 1, w2\t\tC\t\t\tM I\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.grt5\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n\t;;\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp8, p0 = w0, u0\t\tC\t\t\tM I\n\tshrp\t\tx2 = w3, w2, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w0\tC\t\t\tM I\n   (p7)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp8, p0 = w0, u0\t\tC\t\t\tM I\n\tshrp\t\tx2 = w3, w2, 1\t\tC\t\t\tI0\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w0\tC\t\t\tM I\n   (p7)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tbr\t\t.LL01\t\t\tC\t\t\tB\n\n\n.Lb10:\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\tADDSUB\t\tw1 = r10, r11\t\tC\t\t\tM I\n  (p15)\tbr.dpnt\t\t.grt2\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp9, p0 = w1, r10\tC\t\t\tM I\n\tand\t\tr8 = 1, w1\t\tC\t\t\tM I\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\t;;\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n\tshrp\t\tx1 = w2, w1, 1\t\tC\t\t\tI0\n\tshr.u\t\tx2 = w2, 1\t\tC\t\t\tI0\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w1, r10\tC\t\t\tM I\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tand\t\tr8 = 1, w1\t\tC\t\t\tM I\n\t;;\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\t;;\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.grt6\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n\tshrp\t\tx1 = w2, w1, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tbr\t\t.Lcj6\t\t\tC\t\t\tB\n\n.grt6:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n\tshrp\t\tx1 = w2, w1, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tbr\t\t.LL10\t\t\tC\t\t\tB\n\n\n.Lb11:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw0 = r10, r11\t\tC\t\t\tM I\n  (p15)\tbr.dpnt\t\t.grt3\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp8, p0 = w0, r10\tC\t\t\tM I\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\tand\t\tr8 = 1, w0\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n\t;;\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w1\tC\t\t\tM I\n   (p8)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tshrp\t\tx0 = w1, w0, 1\t\tC\t\t\tI0\n\t;;\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tcmp.PRED\tp8, p0 = w0, r10\tC\t\t\tM I\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\tand\t\tr8 = 1, w0\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w1\tC\t\t\tM I\n   (p8)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tshrp\t\tx0 = w1, w0, 1\t\tC\t\t\tI0\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.grt7\t\t\tC\t\t\tB\n\t;;\n\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\tbr\t\t.Lcj7\t\t\tC\t\t\tB\n\n.grt7:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tst8\t\t[rp] = x3, 8\t\tC\t\t\tM23\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n.LL11:\tst8\t\t[rp] = x0, 8\t\tC\t\t\tM23\n\tshrp\t\tx1 = w2, w1, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\t;;\n.LL10:\tcmp.PRED\tp8, p0 = w0, u0\t\tC\t\t\tM I\n\tshrp\t\tx2 = w3, w2, 1\t\tC\t\t\tI0\n\tnop.b\t\t0\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\tnop.b\t\t0\n\t;;\n\tst8\t\t[rp] = x1, 8\t\tC\t\t\tM23\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w0\tC\t\t\tM I\n   (p7)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\t;;\n.LL01:\tst8\t\t[rp] = x2, 8\t\tC\t\t\tM23\n\tshrp\t\tx3 = w0, w3, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w1\tC\t\t\tM I\n   (p8)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\t;;\n.LL00:\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tshrp\t\tx0 = w1, w0, 1\t\tC\t\t\tI0\n\tnop.b\t\t0\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\t;;\nC *** MAIN LOOP END ***\n\n.Lskip:\tst8\t\t[rp] = x3, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp7, p0 = w3, u3\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n.Lcj7:\tst8\t\t[rp] = x0, 8\t\tC\t\t\tM23\n\tshrp\t\tx1 = w2, w1, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\t;;\n.Lcj6:\tcmp.PRED\tp8, p0 = w0, u0\t\tC\t\t\tM I\n\tshrp\t\tx2 = w3, w2, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = x1, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp9, p0 = w1, u1\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w0\tC\t\t\tM I\n   (p7)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tst8\t\t[rp] = x2, 8\t\tC\t\t\tM23\n\tshrp\t\tx3 = w0, w3, 1\t\tC\t\t\tI0\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w1\tC\t\t\tM I\n   (p8)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tshrp\t\tx0 = w1, w0, 1\t\tC\t\t\tI0\n\t;;\n\tst8\t\t[rp] = x3, 8\t\tC\t\t\tM23\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p9)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n.Lcj3:\tst8\t\t[rp] = x0, 8\t\tC\t\t\tM23\n\tshrp\t\tx1 = w2, w1, 1\t\tC\t\t\tI0\n\tshr.u\t\tx2 = w2, 1\t\tC\t\t\tI0\n\t;;\n.Lcj2:\tst8\t\t[rp] = x1, 8\t\tC\t\t\tM23\n   (p6)\tdep\t\tx2 = -1, x2, 63, 1\tC\t\t\tI0\n\t;;\n.Lcj1:\tst8\t\t[rp] = x2\t\tC\t\t\tM23\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\n"
  },
  {
    "path": "mpn/ia64/rshift.asm",
    "content": "dnl  IA-64 mpn_lshift/mpn_rshift.\n\ndnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,\ndnl  Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      2.0\nC Itanium 2:    1.0\n\nC This code is scheduled deeply since the plain shift instructions shr and shl\nC have a latency of 4 (on Itanium) or 3 (on Itanium 2).  Poor scheduling of\nC these instructions cause a 10 cycle replay trap on Itanium.\n\nC TODO\nC  * Optimize function entry and feed-in code.\n\nC INPUT PARAMETERS\ndefine(`rp',`r32')\ndefine(`up',`r33')\ndefine(`n',`r34')\ndefine(`cnt',`r35')\n\ndefine(`tnc',`r9')\n\ndefine(`OPERATION_rshift',1)\n\nifdef(`OPERATION_lshift',`\n\tdefine(`FSH',`shl')\n\tdefine(`BSH',`shr.u')\n\tdefine(`UPD',`-8')\n\tdefine(`POFF',`-512')\n\tdefine(`PUPD',`-32')\n\tdefine(`func',`mpn_lshift')\n')\nifdef(`OPERATION_rshift',`\n\tdefine(`FSH',`shr.u')\n\tdefine(`BSH',`shl')\n\tdefine(`UPD',`8')\n\tdefine(`POFF',`512')\n\tdefine(`PUPD',`32')\n\tdefine(`func',`mpn_rshift')\n')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\t\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\t\trp = 0, rp\t\tC\t\t\tM I\n\taddp4\t\tup = 0, up\t\tC\t\t\tM I\n\tsxt4\t\tn = n\t\t\tC\t\t\tM I\n\tzxt4\t\tcnt = cnt\t\tC\t\t\tI\n\t;;\n')\n\n {.mmi;\tcmp.lt\t\tp14, p15 = 4, n\t\tC\t\t\tM I\n\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}{.mmi;\tadd\t\tr15 = -1, n\t\tC\t\t\tM I\n\tsub\t\ttnc = 64, cnt\t\tC\t\t\tM I\n\tadd\t\tr16 = -5, n\n\t;;\n}{.mmi;\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tshr.u\t\tn = r16, 2\t\tC\t\t\tI0\n}{.mmi;\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\nifdef(`OPERATION_lshift',\n`\tshladd\t\tup = r15, 3, up\t\tC\t\t\tM I\n\tshladd\t\trp = r15, 3, rp')\tC\t\t\tM I\n\t;;\n}{.mmi;\tadd\t\tr11 = POFF, up\t\tC\t\t\tM I\n\tld8\t\tr10 = [up], UPD\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n}{.bbb;\n   (p6)\tbr.dptk\t\t.Lb01\n   (p7)\tbr.dptk\t\t.Lb10\n   (p8)\tbr.dptk\t\t.Lb11\n\t;;\n}\n\n.Lb00:\tld8\t\tr19 = [up], UPD\n\t;;\n\tld8\t\tr16 = [up], UPD\n\t;;\n\tld8\t\tr17 = [up], UPD\n\tBSH\t\tr8 = r10, tnc\t\tC function return value\n  (p14)\tbr.cond.dptk\t.grt4\n\n\tFSH\t\tr24 = r10, cnt\n\tBSH\t\tr25 = r19, tnc\n\t;;\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tor\t\tr14 = r25, r24\n\tFSH\t\tr22 = r17, cnt\n\tBSH\t\tr23 = r10, tnc\n\tbr\t\t.Lr4\n\n.grt4:\tFSH\t\tr24 = r10, cnt\n\tBSH\t\tr25 = r19, tnc\n\t;;\n\tld8\t\tr18 = [up], UPD\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tld8\t\tr19 = [up], UPD\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tld8\t\tr16 = [up], UPD\n\tFSH\t\tr22 = r17, cnt\n\tBSH\t\tr23 = r18, tnc\n\t;;\n\tor\t\tr14 = r25, r24\n\tld8\t\tr17 = [up], UPD\n\tbr.cloop.dpnt\t.Ltop\n\tbr\t\t.Lbot\n\n.Lb01:\n  (p15)\tBSH\t\tr8 = r10, tnc\t\tC function return value\tI\n  (p15)\tFSH\t\tr22 = r10, cnt\t\tC\t\t\tI\n  (p15)\tbr.cond.dptk\t.Lr1\t\t\tC return\t\tB\n\n.grt1:\tld8\t\tr18 = [up], UPD\n\t;;\n\tld8\t\tr19 = [up], UPD\n\tBSH\t\tr8 = r10, tnc\t\tC function return value\n\t;;\n\tld8\t\tr16 = [up], UPD\n\tFSH\t\tr22 = r10, cnt\n\tBSH\t\tr23 = r18, tnc\n\t;;\n\tld8\t\tr17 = [up], UPD\n\tbr.cloop.dpnt\t.grt5\n\t;;\n\n\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\t;;\n\tor\t\tr15 = r23, r22\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\tbr\t\t.Lr5\n\n.grt5:\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\t;;\n\tld8\t\tr18 = [up], UPD\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tld8\t\tr19 = [up], UPD\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tor\t\tr15 = r23, r22\n\tld8\t\tr16 = [up], UPD\n\tbr\t\t.LL01\n\n\n.Lb10:\tld8\t\tr17 = [up], UPD\n  (p14)\tbr.cond.dptk\t.grt2\n\n\tBSH\t\tr8 = r10, tnc\t\tC function return value\n\t;;\n\tFSH\t\tr20 = r10, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tor\t\tr14 = r21, r20\n\tFSH\t\tr22 = r17, cnt\n\tbr\t\t.Lr2\t\t\tC return\n\n.grt2:\tld8\t\tr18 = [up], UPD\n\tBSH\t\tr8 = r10, tnc\t\tC function return value\n\t;;\n\tld8\t\tr19 = [up], UPD\n\tFSH\t\tr20 = r10, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tld8\t\tr16 = [up], UPD\n\tFSH\t\tr22 = r17, cnt\n\tBSH\t\tr23 = r18, tnc\n\t;;\n\tld8\t\tr17 = [up], UPD\n\tbr.cloop.dpnt\t.grt6\n\t;;\n\n\tor\t\tr14 = r21, r20\n\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\t;;\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\tbr\t\t.Lr6\n\n.grt6:\tor\t\tr14 = r21, r20\n\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\t;;\n\tld8\t\tr18 = [up], UPD\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tld8\t\tr19 = [up], UPD\n\tbr\t\t.LL10\n\n\n.Lb11:\tld8\t\tr16 = [up], UPD\n\t;;\n\tld8\t\tr17 = [up], UPD\n\tBSH\t\tr8 = r10, tnc\t\tC function return value\n  (p14)\tbr.cond.dptk\t.grt3\n\t;;\n\n\tFSH\t\tr26 = r10, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tor\t\tr15 = r27, r26\n\tFSH\t\tr22 = r17, cnt\n\tbr\t\t.Lr3\t\t\tC return\n\n.grt3:\tld8\t\tr18 = [up], UPD\n\tFSH\t\tr26 = r10, cnt\n\tBSH\t\tr27 = r16, tnc\n\t;;\n\tld8\t\tr19 = [up], UPD\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\t;;\n\tld8\t\tr16 = [up], UPD\n\tFSH\t\tr22 = r17, cnt\n\tBSH\t\tr23 = r18, tnc\n\t;;\n\tld8\t\tr17 = [up], UPD\n\tbr.cloop.dpnt\t.grt7\n\n\tor\t\tr15 = r27, r26\n\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\tbr\t\t.Lr7\n\n.grt7:\tor\t\tr15 = r27, r26\n\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\tld8\t\tr18 = [up], UPD\n\tbr\t\t.LL11\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Ltop:\n {.mmi;\tst8\t\t[rp] = r14, UPD\t\tC M2\n\tor\t\tr15 = r27, r26\t\tC M3\n\tFSH\t\tr24 = r18, cnt\t\tC I0\n}{.mmi;\tld8\t\tr18 = [up], UPD\t\tC M1\n\tlfetch\t\t[r11], PUPD\n\tBSH\t\tr25 = r19, tnc\t\tC I1\n\t;; }\n.LL11:\n {.mmi;\tst8\t\t[rp] = r15, UPD\n\tor\t\tr14 = r21, r20\n\tFSH\t\tr26 = r19, cnt\n}{.mmi;\tld8\t\tr19 = [up], UPD\n\tnop.m\t\t0\n\tBSH\t\tr27 = r16, tnc\n\t;; }\n.LL10:\n {.mmi;\tst8\t\t[rp] = r14, UPD\n\tor\t\tr15 = r23, r22\n\tFSH\t\tr20 = r16, cnt\n}{.mmi;\tld8\t\tr16 = [up], UPD\n\tnop.m\t\t0\n\tBSH\t\tr21 = r17, tnc\n\t;; }\n.LL01:\n {.mmi;\tst8\t\t[rp] = r15, UPD\n\tor\t\tr14 = r25, r24\n\tFSH\t\tr22 = r17, cnt\n}{.mib;\tld8\t\tr17 = [up], UPD\n\tBSH\t\tr23 = r18, tnc\n\tbr.cloop.dptk\t.Ltop\n\t;; }\n\nC *** MAIN LOOP END ***\n\n.Lbot:\tor\t\tr15 = r27, r26\n\tFSH\t\tr24 = r18, cnt\n\tBSH\t\tr25 = r19, tnc\n\tst8\t\t[rp] = r14, UPD\n\t;;\n.Lr7:\tor\t\tr14 = r21, r20\n\tFSH\t\tr26 = r19, cnt\n\tBSH\t\tr27 = r16, tnc\n\tst8\t\t[rp] = r15, UPD\n\t;;\n.Lr6:\tor\t\tr15 = r23, r22\n\tFSH\t\tr20 = r16, cnt\n\tBSH\t\tr21 = r17, tnc\n\tst8\t\t[rp] = r14, UPD\n\t;;\n.Lr5:\tst8\t\t[rp] = r15, UPD\n\tor\t\tr14 = r25, r24\n\tFSH\t\tr22 = r17, cnt\n\t;;\n.Lr4:\tor\t\tr15 = r27, r26\n\tst8\t\t[rp] = r14, UPD\n\t;;\n.Lr3:\tor\t\tr14 = r21, r20\n\tst8\t\t[rp] = r15, UPD\n\t;;\n.Lr2:\tst8\t\t[rp] = r14, UPD\n\t;;\n.Lr1:\tst8\t\t[rp] = r22, UPD\t\tC\t\t\tM23\n\tmov\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE(func)\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/sqr_diagonal.asm",
    "content": "dnl  IA-64 mpn_sqr_diagonal.  Helper for sqr_basecase.\n\ndnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC         cycles/limb\nC Itanium:    4\nC Itanium 2:  2\n\nC TODO\nC  * Perhaps avoid ctop loop.  Unfortunately, a cloop loop running at 1 c/l\nC    would need prohibitive 8-way unrolling.\nC  * Instead of messing too much with this, write a nifty mpn_sqr_basecase.\n\nC INPUT PARAMETERS\nC rp = r32\nC sp = r33\nC n = r34\n\nASM_START()\nPROLOGUE(mpn_sqr_diagonal)\n\t.prologue\n\t.save\tar.lc, r2\n\t.save\tpr, r15\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\tr32 = 0, r32\n\taddp4\tr33 = 0, r33\n\tzxt4\tr34 = r34\n\t;;\n')\n\tldf8\t\tf32 = [r33], 8\t\tC M\tload rp[0] early\n\tmov\t\tr2 = ar.lc\t\tC I0\n\tmov\t\tr14 = ar.ec\t\tC I0\n\tmov\t\tr15 = pr\t\tC I0\n\tadd\t\tr19 = -1, r34\t\tC M I\tdecr n\n\tadd\t\tr18 = 8, r32\t\tC M I\trp for high limb\n\t;;\n\tmov\t\tar.lc = r19\t\tC I0\n\tmov\t\tar.ec = 5\t\tC I0\n\tmov\t\tpr.rot = 1<<16\t\tC I0\n\t;;\n\tbr.cexit.spnt\t.Ldone\t\t\tC B\n\t;;\n\tALIGN(32)\n.Loop:\n  (p16)\tldf8\t\tf32 = [r33], 8\t\tC M\n  (p19)\txma.l\t\tf36 = f35, f35, f0\tC F\n  (p21)\tstf8\t\t[r32] = f38, 16\t\tC M2 M3\n  (p19)\txma.hu\t\tf40 = f35, f35, f0\tC F\n  (p21)\tstf8\t\t[r18] = f42, 16\t\tC M2 M3\n\tbr.ctop.dptk\t.Loop\t\t\tC B\n\t;;\n.Ldone:\n\tstf8\t\t[r32] = f38\t\tC M2 M3\n\tstf8\t\t[r18] = f42\t\tC M2 M3\n\tmov\t\tar.ec = r14\t\tC I0\n\t;;\n\tmov\t\tpr = r15, 0x1ffff\tC I0\n\tmov\t\tar.lc = r2\t\tC I0\n\tbr.ret.sptk.many b0\t\t\tC B\nEPILOGUE(mpn_sqr_diagonal)\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/sub_n.asm",
    "content": "dnl  IA-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      2.67\nC Itanium 2:    1.25\n\nC TODO\nC  * Consider using special code for small n, using something like\nC    \"switch (8 * (n >= 8) + (n mod 8))\" to enter it and feed-in code.\n\nC INPUT PARAMETERS\ndefine(`rp',`r32')\ndefine(`up',`r33')\ndefine(`vp',`r34')\ndefine(`n',`r35')\n\ndefine(`OPERATION_sub_n',1)\n\nifdef(`OPERATION_add_n',`\n  define(ADDSUB,\tadd)\n  define(PRED,\t\tltu)\n  define(INCR,\t\t1)\n  define(LIM,\t\t-1)\n  define(func, mpn_add_n)\n')\nifdef(`OPERATION_sub_n',`\n  define(ADDSUB,\tsub)\n  define(PRED,\t\tgtu)\n  define(INCR,\t\t-1)\n  define(LIM,\t\t0)\n  define(func, mpn_sub_n)\n')\n\nC Some useful aliases for registers we use\ndefine(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')\ndefine(`u4',`r18') define(`u5',`r19') define(`u6',`r20') define(`u7',`r21')\ndefine(`v0',`r24') define(`v1',`r25') define(`v2',`r26') define(`v3',`r27')\ndefine(`v4',`r28') define(`v5',`r29') define(`v6',`r30') define(`v7',`r31')\ndefine(`w0',`r22') define(`w1',`r9') define(`w2',`r8') define(`w3',`r23')\ndefine(`w4',`r22') define(`w5',`r9') define(`w6',`r8') define(`w7',`r23')\ndefine(`rpx',`r3')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',`\n\taddp4\t\trp = 0, rp\t\tC\t\t\tM I\n\taddp4\t\tup = 0, up\t\tC\t\t\tM I\n\taddp4\t\tvp = 0, vp\t\tC\t\t\tM I\n\tzxt4\t\tn = n\t\t\tC\t\t\tI\n\t;;\n')\n{.mmi\t\tC 00\n\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}\n{.mmi\n\tand\t\tr14 = 7, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p14 = 8, n\t\tC\t\t\tM I\n\tadd\t\tn = -8, n\t\tC\t\t\tM I\n\t;;\n}\n{.mmi\t\tC 01\n\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}\n{.bbb\n   (p6)\tbr.dptk\t\t.Lb001\t\t\tC\t\t\tB\n   (p7)\tbr.dptk\t\t.Lb010\t\t\tC\t\t\tB\n   (p8)\tbr.dptk\t\t.Lb011\t\t\tC\t\t\tB\n\t;;\n}\n{.mmi\t\tC 02\n\tcmp.eq\t\tp9, p0 = 4, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp10, p0 = 5, r14\tC\t\t\tM I\n\tcmp.eq\t\tp11, p0 = 6, r14\tC\t\t\tM I\n}\n{.bbb\n   (p9)\tbr.dptk\t\t.Lb100\t\t\tC\t\t\tB\n  (p10)\tbr.dptk\t\t.Lb101\t\t\tC\t\t\tB\n  (p11)\tbr.dptk\t\t.Lb110\t\t\tC\t\t\tB\n\t;;\n}\t\tC 03\n{.mmb\n\tcmp.eq\t\tp12, p0 = 7, r14\tC\t\t\tM I\n\tadd\t\tn = -1, n\t\tC loop count\t\tM I\n  (p12)\tbr.dptk\t\t.Lb111\t\t\tC\t\t\tB\n}\n\n\n.Lb000:\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 8, rp\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w1, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w2, u2\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w3, u3\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w2\tC\t\t\tM I\n   (p7)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n  (p14)\tbr.cond.dptk\t.Lcj8\t\t\tC\t\t\tB\n\t;;\n\n.grt8:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tnop.i\t\t0\n\tnop.b\t\t0\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tbr\t\t.LL000\t\t\tC\t\t\tB\n\n.Lb001:\tadd\t\trpx = 16, rp\t\tC\t\t\tM I\n\tADDSUB\t\tw0 = r10, r11\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt1\t\t\tC\t\t\tB\n\t;;\n\tcmp.PRED\tp6, p0 = w0, r10\tC\t\t\tM I\n\tmov\t\tr8 = 0\t\t\tC\t\t\tM I\n\tbr\t\t.Lcj1\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tcmp.ne\t\tp9, p0 = r0, r0\t\tC read near Loop\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w0, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w1, u1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lcj9\t\t\tC\t\t\tB\n\n.Lb010:\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 24, rp\t\tC\t\t\tM I\n\tADDSUB\t\tw7 = r10, r11\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt2\t\t\tC\t\t\tB\n\t;;\n\tcmp.PRED\tp9, p0 = w7, r10\tC\t\t\tM I\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w7, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tbr\t\t.LL01x\t\t\tC\t\t\tB\n\n.Lb011:\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw6 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n  (p15)\tbr.cond.dpnt\t.grt3\t\t\tC\t\t\tB\n\t;;\n\tcmp.PRED\tp8, p0 = w6, r10\tC\t\t\tM I\n\tADDSUB\t\tw7 = u7, v7\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w6, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp9, p0 = w7, u7\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 32, rp\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w6, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tADDSUB\t\tw7 = u7, v7\t\tC\t\t\tM I\n\tnop.i\t\t0\n\tnop.b\t\t0\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w7, u7\t\tC\t\t\tM I\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w7\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n   (p8)\tadd\t\tw7 = INCR, w7\t\tC\t\t\tM I\n\tst8\t\t[rp] = w6, 8\t\tC\t\t\tM23\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\tbr\t\t.LL01x\t\t\tC\t\t\tB\n\n.Lb100:\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 8, rp\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw5 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n  (p15)\tbr.cond.dpnt\t.grt4\t\t\tC\t\t\tB\n\t;;\n\tcmp.PRED\tp7, p0 = w5, r10\tC\t\t\tM I\n\tADDSUB\t\tw6 = u6, v6\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp8, p0 = w6, u6\t\tC\t\t\tM I\n\tADDSUB\t\tw7 = u7, v7\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n\tcmp.PRED\tp7, p0 = w5, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw6 = u6, v6\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w6, u6\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw7 = u7, v7\t\tC\t\t\tM I\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w7, u7\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w6\tC\t\t\tM I\n   (p7)\tadd\t\tw6 = INCR, w6\t\tC\t\t\tM I\n\tbr\t\t.LL100\t\t\tC\t\t\tB\n\n.Lb101:\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 16, rp\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw4 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w4, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw5 = u5, v5\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n  (p15)\tbr.cond.dpnt\t.grt5\t\t\tC\t\t\tB\n\t;;\n\tcmp.PRED\tp7, p0 = w5, u5\t\tC\t\t\tM I\n\tADDSUB\t\tw6 = u6, v6\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w5, u5\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw6 = u6, v6\t\tC\t\t\tM I\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tbr\t\t.LL101\t\t\tC\t\t\tB\n\n.Lb110:\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 24, rp\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w3, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw4 = u4, v4\t\tC\t\t\tM I\n  (p14)\tbr.cond.dptk\t.Lcj67\t\t\tC\t\t\tB\n\t;;\n\n.grt6:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tcmp.PRED\tp9, p0 = w3, r10\tC\t\t\tM I\n\tnop.i\t\t0\n\tnop.b\t\t0\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw4 = u4, v4\t\tC\t\t\tM I\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tbr\t\t.LL11x\t\t\tC\t\t\tB\n\n.Lb111:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tadd\t\trpx = 32, rp\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw2 = r10, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w2, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w3, u3\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n  (p15)\tbr.cond.dpnt\t.grt7\t\t\tC\t\t\tB\n\t;;\n\tst8\t\t[rp] = w2, 8\t\tC\t\t\tM23\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w3\tC\t\t\tM I\n   (p8)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tADDSUB\t\tw4 = u4, v4\t\tC\t\t\tM I\n\tbr\t\t.Lcj67\t\t\tC\t\t\tB\n\n.grt7:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 3\t\tC\t\t\tI0\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w3\tC\t\t\tM I\n\tnop.i\t\t0\n\tnop.b\t\t0\n\t;;\n\tadd\t\tr11 = 512, vp\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tadd\t\tr10 = 512, up\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n   (p8)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tnop.b\t\t0\n\t;;\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tst8\t\t[rp] = w2, 8\t\tC\t\t\tM23\n\tADDSUB\t\tw4 = u4, v4\t\tC\t\t\tM I\n\tbr\t\t.LL11x\t\t\tC\t\t\tB\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w1, u1\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w0\tC\t\t\tM I\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n   (p9)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w2, u2\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w1\tC\t\t\tM I\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n   (p6)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w0, 8\t\tC\t\t\tM23\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w3, u3\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w2\tC\t\t\tM I\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n   (p7)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n.LL000:\tst8\t\t[rp] = w1, 16\t\tC\t\t\tM23\n\tst8\t\t[rpx] = w2, 32\t\tC\t\t\tM23\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w3\tC\t\t\tM I\n\tlfetch\t\t[r10], 64\n   (p8)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tADDSUB\t\tw4 = u4, v4\t\tC\t\t\tM I\n\t;;\n.LL11x:\tst8\t\t[rp] = w3, 8\t\tC\t\t\tM23\n\tld8\t\tv4 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w4, u4\t\tC\t\t\tM I\n\tld8\t\tu4 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw5 = u5, v5\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv5 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp7, p0 = w5, u5\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w4\tC\t\t\tM I\n\tld8\t\tu5 = [up], 8\t\tC\t\t\tM01\n   (p9)\tadd\t\tw4 = INCR, w4\t\tC\t\t\tM I\n\tADDSUB\t\tw6 = u6, v6\t\tC\t\t\tM I\n\t;;\n.LL101:\tld8\t\tv6 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w6, u6\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w5\tC\t\t\tM I\n\tld8\t\tu6 = [up], 8\t\tC\t\t\tM01\n   (p6)\tadd\t\tw5 = INCR, w5\t\tC\t\t\tM I\n\tADDSUB\t\tw7 = u7, v7\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w4, 8\t\tC\t\t\tM23\n\tld8\t\tv7 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp9, p0 = w7, u7\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w6\tC\t\t\tM I\n\tld8\t\tu7 = [up], 8\t\tC\t\t\tM01\n   (p7)\tadd\t\tw6 = INCR, w6\t\tC\t\t\tM I\n\t;;\n.LL100:\tst8\t\t[rp] = w5, 16\t\tC\t\t\tM23\n\tst8\t\t[rpx] = w6, 32\t\tC\t\t\tM23\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w7\tC\t\t\tM I\n\tlfetch\t\t[r11], 64\n   (p8)\tadd\t\tw7 = INCR, w7\t\tC\t\t\tM I\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\t;;\n.LL01x:\tst8\t\t[rp] = w7, 8\t\tC\t\t\tM23\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w0, u0\t\tC\t\t\tM I\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, v1\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\t;;\nC *** MAIN LOOP END ***\n\n\tcmp.PRED\tp7, p0 = w1, u1\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w0\tC\t\t\tM I\n   (p9)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tADDSUB\t\tw2 = u2, v2\t\tC\t\t\tM I\n\t;;\n.Lcj9:\tcmp.PRED\tp8, p0 = w2, u2\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w1\tC\t\t\tM I\n\tst8\t\t[rp] = w0, 8\t\tC\t\t\tM23\n   (p6)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\tADDSUB\t\tw3 = u3, v3\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp9, p0 = w3, u3\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w2\tC\t\t\tM I\n   (p7)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n.Lcj8:\tst8\t\t[rp] = w1, 16\t\tC\t\t\tM23\n\tst8\t\t[rpx] = w2, 32\t\tC\t\t\tM23\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w3\tC\t\t\tM I\n   (p8)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tADDSUB\t\tw4 = u4, v4\t\tC\t\t\tM I\n\t;;\n.Lcj67:\tst8\t\t[rp] = w3, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp6, p0 = w4, u4\t\tC\t\t\tM I\n\tADDSUB\t\tw5 = u5, v5\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp7, p0 = w5, u5\t\tC\t\t\tM I\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w4\tC\t\t\tM I\n   (p9)\tadd\t\tw4 = INCR, w4\t\tC\t\t\tM I\n\tADDSUB\t\tw6 = u6, v6\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tcmp.PRED\tp8, p0 = w6, u6\t\tC\t\t\tM I\n   (p6)\tcmp.eq.or\tp7, p0 = LIM, w5\tC\t\t\tM I\n\tst8\t\t[rp] = w4, 8\t\tC\t\t\tM23\n   (p6)\tadd\t\tw5 = INCR, w5\t\tC\t\t\tM I\n\tADDSUB\t\tw7 = u7, v7\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tcmp.PRED\tp9, p0 = w7, u7\t\tC\t\t\tM I\n   (p7)\tcmp.eq.or\tp8, p0 = LIM, w6\tC\t\t\tM I\n   (p7)\tadd\t\tw6 = INCR, w6\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w5, 16\t\tC\t\t\tM23\n\tst8\t\t[rpx] = w6, 32\t\tC\t\t\tM23\n.Lcj3:\n   (p8)\tcmp.eq.or\tp9, p0 = LIM, w7\tC\t\t\tM I\n   (p8)\tadd\t\tw7 = INCR, w7\t\tC\t\t\tM I\n\tADDSUB\t\tw0 = u0, v0\t\tC\t\t\tM I\n\t;;\n.Lcj2:\tst8\t\t[rp] = w7, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp6, p0 = w0, u0\t\tC\t\t\tM I\n\t;;\n   (p9)\tcmp.eq.or\tp6, p0 = LIM, w0\tC\t\t\tM I\n   (p9)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tmov\t\tr8 = 0\t\t\tC\t\t\tM I\n\t;;\n.Lcj1:\tst8\t\t[rp] = w0, 8\t\tC\t\t\tM23\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n   (p6)\tmov\t\tr8 = 1\t\t\tC\t\t\tM I\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/sublsh1_n.asm",
    "content": "dnl  IA-64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      3.0\nC Itanium 2:    1.5\n\nC TODO\nC  * Use shladd in feed-in code (for mpn_addlsh1_n).\n\nC INPUT PARAMETERS\ndefine(`rp',`r32')\ndefine(`up',`r33')\ndefine(`vp',`r34')\ndefine(`n',`r35')\n\ndefine(`OPERATION_sublsh1_n',1)\n\nifdef(`OPERATION_addlsh1_n',`\n  define(ADDSUB,       add)\n  define(PRED,\t       ltu)\n  define(INCR,\t       1)\n  define(LIM,\t       -1)\n  define(func, mpn_addlsh1_n)\n')\nifdef(`OPERATION_sublsh1_n',`\n  define(ADDSUB,       sub)\n  define(PRED,\t       gtu)\n  define(INCR,\t       -1)\n  define(LIM,\t       0)\n  define(func, mpn_sublsh1_n)\n')\n\nC Some useful aliases for registers we use\ndefine(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')\ndefine(`v0',`r18') define(`v1',`r19') define(`v2',`r20') define(`v3',`r21')\ndefine(`w0',`r22') define(`w1',`r23') define(`w2',`r24') define(`w3',`r25')\ndefine(`s0',`r26') define(`s1',`r27') define(`s2',`r28') define(`s3',`r29')\ndefine(`x0',`r30') define(`x1',`r31') define(`x2',`r30') define(`x3',`r31')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',`\n\taddp4\t\trp = 0, rp\t\tC\t\t\tM I\n\taddp4\t\tup = 0, up\t\tC\t\t\tM I\n\taddp4\t\tvp = 0, vp\t\tC\t\t\tM I\n\tzxt4\t\tn = n\t\t\tC\t\t\tI\n\t;;\n')\n {.mmi;\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}{.mmi;\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p0 = 4, n\t\tC\t\t\tM I\n\tadd\t\tn = -4, n\t\tC\t\t\tM I\n\t;;\n}{.mmi;\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}{.bbb\n  (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n  (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n  (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n.Lb00:\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tadd\t\tx3 = r11, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = r10, x3\t\tC\t\t\tM I\n  (p15)\tbr.dpnt\t\t.grt4\t\t\tC\t\t\tB\n\t;;\n\tshrp\t\tx0 = v0, r11, 63\tC\t\t\tI0\n\tcmp.PRED\tp8, p0 = w3, r10\tC\t\t\tM I\n\t;;\n\tshrp\t\tx1 = v1, v0, 63\t\tC\t\t\tI0\n\tADDSUB\t\tw0 = u0, x0\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w0, u0\t\tC\t\t\tM I\n\tADDSUB\t\tw1 = u1, x1\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tshrp\t\tx0 = v0, r11, 63\tC\t\t\tI0\n\tcmp.PRED\tp8, p0 = w3, r10\tC\t\t\tM I\n\tadd\t\tn = -1, n\n\t;;\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tshrp\t\tx1 = v1, v0, 63\t\tC\t\t\tI0\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw0 = u0, x0\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w0, u0\t\tC\t\t\tM I\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, x1\t\tC\t\t\tM I\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n.Lb01:\tadd\t\tx2 = r11, r11\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n  (p15)\tbr.dpnt\t\t.grt1\t\t\tC\t\t\tB\n\t;;\n\tADDSUB\t\tw2 = r10, x2\t\tC\t\t\tM I\n\tshr.u\t\tr8 = r11, 63\t\tC retval\t\tI0\n\t;;\n\tcmp.PRED\tp6, p0 = w2, r10\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w2, 8\t\tC\t\t\tM23\n   (p6)\tadd\t\tr8 = 1, r8\t\tC\t\t\tM I\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC FIXME swap with next\tI0\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw2 = r10, x2\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshrp\t\tx3 = v3, r11, 63\tC\t\t\tI0\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w2, r10\tC\t\t\tM I\n\tADDSUB\t\tw3 = u3, x3\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.grt5\t\t\tC\t\t\tB\n\t;;\n\tshrp\t\tx0 = v0, v3, 63\t\tC\t\t\tI0\n\tcmp.PRED\tp8, p0 = w3, u3\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tshrp\t\tx0 = v0, v3, 63\t\tC\t\t\tI0\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w3, u3\t\tC\t\t\tM I\n\tbr\t\t.LL01\t\t\tC\t\t\tB\n\n.Lb10:\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\tadd\t\tx1 = r11, r11\t\tC\t\t\tM I\n  (p15)\tbr.dpnt\t\t.grt2\t\t\tC\t\t\tB\n\t;;\n\tADDSUB\t\tw1 = r10, x1\t\tC\t\t\tM I\n\tshrp\t\tx2 = v2, r11, 63\tC\t\t\tI0\n\t;;\n\tcmp.PRED\tp8, p0 = w1, r10\tC\t\t\tM I\n\tADDSUB\t\tw2 = u2, x2\t\tC\t\t\tM I\n\tshr.u\t\tr8 = v2, 63\t\tC retval\t\tI0\n\t;;\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = r10, x1\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tshrp\t\tx2 = v2, r11, 63\tC\t\t\tI0\n\tcmp.PRED\tp8, p0 = w1, r10\tC\t\t\tM I\n\t;;\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshrp\t\tx3 = v3, v2, 63\t\tC\t\t\tI0\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw2 = u2, x2\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, x3\t\tC\t\t\tM I\n\tbr.cloop.dpnt\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lskip\t\t\tC\t\t\tB\n\n.Lb11:\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\tadd\t\tx0 = r11, r11\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n  (p15)\tbr.dpnt\t\t.grt3\t\t\tC\t\t\tB\n\t;;\n\n\tshrp\t\tx1 = v1, r11, 63\tC\t\t\tI0\n\tADDSUB\t\tw0 = r10, x0\t\tC\t\t\tM I\n\t;;\n\tcmp.PRED\tp6, p0 = w0, r10\tC\t\t\tM I\n\tADDSUB\t\tw1 = u1, x1\t\tC\t\t\tM I\n\t;;\n\tshrp\t\tx2 = v2, v1, 63\t\tC\t\t\tI0\n\tcmp.PRED\tp8, p0 = w1, u1\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tshrp\t\tx1 = v1, r11, 63\tC\t\t\tI0\n\tADDSUB\t\tw0 = r10, x0\t\tC\t\t\tM I\n\t;;\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp6, p0 = w0, r10\tC\t\t\tM I\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, x1\t\tC\t\t\tM I\n\t;;\n\tshrp\t\tx2 = v2, v1, 63\t\tC\t\t\tI0\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w1, u1\t\tC\t\t\tM I\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tst8\t\t[rp] = w1, 8\t\tC\t\t\tM23\n\tshrp\t\tx0 = v0, v3, 63\t\tC\t\t\tI0\n   (p8)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p8)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\tld8\t\tv3 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w3, u3\t\tC\t\t\tM I\n\t;;\n.LL01:\tld8\t\tu3 = [up], 8\t\tC\t\t\tM01\n\tshrp\t\tx1 = v1, v0, 63\t\tC\t\t\tI0\n   (p6)\tcmp.eq.or\tp8, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tld8\t\tv0 = [vp], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw0 = u0, x0\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w2, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp6, p0 = w0, u0\t\tC\t\t\tM I\n\tld8\t\tu0 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw1 = u1, x1\t\tC\t\t\tM I\n\t;;\n.LL00:\tst8\t\t[rp] = w3, 8\t\tC\t\t\tM23\n\tshrp\t\tx2 = v2, v1, 63\t\tC\t\t\tI0\n   (p8)\tcmp.eq.or\tp6, p0 = LIM, w0\tC\t\t\tM I\n   (p8)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tld8\t\tv1 = [vp], 8\t\tC\t\t\tM01\n\tcmp.PRED\tp8, p0 = w1, u1\t\tC\t\t\tM I\n\t;;\n.LL11:\tld8\t\tu1 = [up], 8\t\tC\t\t\tM01\n\tshrp\t\tx3 = v3, v2, 63\t\tC\t\t\tI0\n   (p6)\tcmp.eq.or\tp8, p0 = LIM, w1\tC\t\t\tM I\n   (p6)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\tld8\t\tv2 = [vp], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw2 = u2, x2\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w0, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\tld8\t\tu2 = [up], 8\t\tC\t\t\tM01\n\tADDSUB\t\tw3 = u3, x3\t\tC\t\t\tM I\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\t;;\nC *** MAIN LOOP END ***\n\n.Lskip:\tst8\t\t[rp] = w1, 8\t\tC\t\t\tM23\n\tshrp\t\tx0 = v0, v3, 63\t\tC\t\t\tI0\n   (p8)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p8)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\tcmp.PRED\tp8, p0 = w3, u3\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tshrp\t\tx1 = v1, v0, 63\t\tC\t\t\tI0\n   (p6)\tcmp.eq.or\tp8, p0 = LIM, w3\tC\t\t\tM I\n   (p6)\tadd\t\tw3 = INCR, w3\t\tC\t\t\tM I\n\tADDSUB\t\tw0 = u0, x0\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w2, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp6, p0 = w0, u0\t\tC\t\t\tM I\n\tADDSUB\t\tw1 = u1, x1\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tst8\t\t[rp] = w3, 8\t\tC\t\t\tM23\n\tshrp\t\tx2 = v2, v1, 63\t\tC\t\t\tI0\n   (p8)\tcmp.eq.or\tp6, p0 = LIM, w0\tC\t\t\tM I\n   (p8)\tadd\t\tw0 = INCR, w0\t\tC\t\t\tM I\n\tcmp.PRED\tp8, p0 = w1, u1\t\tC\t\t\tM I\n\t;;\n.Lcj3:\tshr.u\t\tr8 = v2, 63\t\tC\t\t\tI0\n   (p6)\tcmp.eq.or\tp8, p0 = LIM, w1\tC\t\t\tM I\n   (p6)\tadd\t\tw1 = INCR, w1\t\tC\t\t\tM I\n\tADDSUB\t\tw2 = u2, x2\t\tC\t\t\tM I\n\t;;\n\tst8\t\t[rp] = w0, 8\t\tC\t\t\tM23\n\tcmp.PRED\tp6, p0 = w2, u2\t\tC\t\t\tM I\n\t;;\n.Lcj2:\tst8\t\t[rp] = w1, 8\t\tC\t\t\tM23\n   (p8)\tcmp.eq.or\tp6, p0 = LIM, w2\tC\t\t\tM I\n   (p8)\tadd\t\tw2 = INCR, w2\t\tC\t\t\tM I\n\t;;\n.Lcj1:\tst8\t\t[rp] = w2, 8\t\tC\t\t\tM23\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n   (p6)\tadd\t\tr8 = 1, r8\t\tC\t\t\tM I\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/submul_1.c",
    "content": "/* IA-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract\n   the result from a second limb vector.\n\nCopyright 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n\nThis code runs at about 9 cycles/limb on the Itanium.  That's far from the peak\nexecution speed of the Itanium pipeline.  With well-tuned assembly code we\nshould be able to get to around 3 cycles/limb.  See README.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmp_limb_t\nmpn_submul_1 (mp_ptr rp, mp_srcptr s1p, mp_size_t n, mp_limb_t s2d)\n{\n  mp_ptr tp;\n  mp_limb_t cy;\n  TMP_DECL;\n\n  TMP_MARK;\n  tp = TMP_ALLOC_LIMBS (n);\n  cy = mpn_mul_1 (tp, s1p, n, s2d);\n  cy += mpn_sub_n (rp, rp, tp, n);\n  TMP_FREE;\n  return cy;\n}\n"
  },
  {
    "path": "mpn/ia64/xnor_n.asm",
    "content": "dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,\ndnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      2\nC Itanium 2:    1\n\nC TODO\nC  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in\nC    wind-down code).\n\nC INPUT PARAMETERS\ndefine(`rp', `r32')\ndefine(`up', `r33')\ndefine(`vp', `r34')\ndefine(`n', `r35')\n\ndefine(`OPERATION_xnor_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',`mpn_and_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',`mpn_andn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',`mpn_nand_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',`mpn_ior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',`mpn_iorn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $3, $2')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',`mpn_nior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',`mpn_xor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\trp = 0, rp\t\t\tC\t\t\tM I\n\taddp4\tup = 0, up\t\t\tC\t\t\tM I\n\taddp4\tvp = 0, vp\t\t\tC\t\t\tM I\n\tzxt4\tn = n\t\t\t\tC\t\t\tI\n\t;;\n')\n{.mmi\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}\n{.mmi\n\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p14 = 4, n\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n}\n{.mmi\n\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}\n{.bbb\n   (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n   (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n   (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n.Lb00:\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -2, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt4\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n.Lb01:\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt1\t\t\tC\t\t\tB\n\t;;\n\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj1\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.grt5\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL01\t\t\tC\t\t\tB\n\n.Lb10:\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt2\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lcj6\t\t\tC\t\t\tB\n\n.Lb11:\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt3\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL01:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL00:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL11:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t;;\t\tC\t\t\tB\nC *** MAIN LOOP END ***\n\n.Lcj6:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj3:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj2:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj1:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/ia64/xor_n.asm",
    "content": "dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,\ndnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.\n\ndnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC           cycles/limb\nC Itanium:      2\nC Itanium 2:    1\n\nC TODO\nC  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in\nC    wind-down code).\n\nC INPUT PARAMETERS\ndefine(`rp', `r32')\ndefine(`up', `r33')\ndefine(`vp', `r34')\ndefine(`n', `r35')\n\ndefine(`OPERATION_xor_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',`mpn_and_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',`mpn_andn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',`mpn_nand_n')\n\tdefine(`logop',\t\t`and\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',`mpn_ior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',`mpn_iorn_n')\n\tdefine(`logop',\t\t`andcm\t$1 = $3, $2')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',`mpn_nior_n')\n\tdefine(`logop',\t\t`or\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',`mpn_xor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`mov\t$1 = $2')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logop',\t\t`xor\t$1 = $2, $3')\n\tdefine(`notormov',\t`sub\t$1 = -1, $2')')\n\nASM_START()\nPROLOGUE(func)\n\t.prologue\n\t.save\tar.lc, r2\n\t.body\nifdef(`HAVE_ABI_32',\n`\taddp4\trp = 0, rp\t\t\tC\t\t\tM I\n\taddp4\tup = 0, up\t\t\tC\t\t\tM I\n\taddp4\tvp = 0, vp\t\t\tC\t\t\tM I\n\tzxt4\tn = n\t\t\t\tC\t\t\tI\n\t;;\n')\n{.mmi\n\tld8\t\tr10 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr11 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tr2 = ar.lc\t\tC\t\t\tI0\n}\n{.mmi\n\tand\t\tr14 = 3, n\t\tC\t\t\tM I\n\tcmp.lt\t\tp15, p14 = 4, n\t\tC\t\t\tM I\n\tshr.u\t\tn = n, 2\t\tC\t\t\tI0\n\t;;\n}\n{.mmi\n\tcmp.eq\t\tp6, p0 = 1, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp7, p0 = 2, r14\t\tC\t\t\tM I\n\tcmp.eq\t\tp8, p0 = 3, r14\t\tC\t\t\tM I\n}\n{.bbb\n   (p6)\tbr.dptk\t\t.Lb01\t\t\tC\t\t\tB\n   (p7)\tbr.dptk\t\t.Lb10\t\t\tC\t\t\tB\n   (p8)\tbr.dptk\t\t.Lb11\t\t\tC\t\t\tB\n}\n\n.Lb00:\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -2, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt4\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj4\t\t\tC\t\t\tB\n\n.grt4:\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL00\t\t\tC\t\t\tB\n\n.Lb01:\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt1\t\t\tC\t\t\tB\n\t;;\n\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj1\t\t\tC\t\t\tB\n\n.grt1:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.grt5\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj5\t\t\tC\t\t\tB\n\n.grt5:\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL01\t\t\tC\t\t\tB\n\n.Lb10:\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n  (p15)\tbr.cond.dpnt\t.grt2\t\t\tC\t\t\tB\n\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tbr\t\t.Lcj2\t\t\tC\t\t\tB\n\n.grt2:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r10, r11)\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t\t\tC\t\t\tB\n\tbr\t\t.Lcj6\t\t\tC\t\t\tB\n\n.Lb11:\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tadd\t\tn = -1, n\t\tC\t\t\tM I\n\t;;\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r10, r11)\t\tC\t\t\tM I\n  (p15)\tbr.cond.dpnt\t.grt3\t\t\tC\t\t\tB\n\t;;\n\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tbr\t\t.Lcj3\t\t\tC\t\t\tB\n\n.grt3:\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\t;;\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = n\t\tC\t\t\tI0\n\t;;\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tbr\t\t.LL11\t\t\tC\t\t\tB\n\nC *** MAIN LOOP START ***\n\tALIGN(32)\n.Loop:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr16 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr20 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL01:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr17 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr21 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL00:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\tld8\t\tr18 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr22 = [vp], 8\t\tC\t\t\tM\n\tnop.b\t\t0\n\t;;\n.LL11:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\tld8\t\tr19 = [up], 8\t\tC\t\t\tM\n\tld8\t\tr23 = [vp], 8\t\tC\t\t\tM\n\tbr.cloop.dptk\t.Loop\t;;\t\tC\t\t\tB\nC *** MAIN LOOP END ***\n\n.Lcj6:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r16, r20)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj5:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r17, r21)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj4:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tlogop(\t\tr14, r18, r22)\t\tC\t\t\tM I\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj3:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tlogop(\t\tr15, r19, r23)\t\tC\t\t\tM I\n\tnotormov(\tr8, r14)\t\tC\t\t\tM I\n\t;;\n.Lcj2:\tst8\t\t[rp] = r8, 8\t\tC\t\t\tM\n\tnotormov(\tr9, r15)\t\tC\t\t\tM I\n\t;;\n.Lcj1:\tst8\t\t[rp] = r9, 8\t\tC\t\t\tM\n\tmov.i\t\tar.lc = r2\t\tC\t\t\tI0\n\tbr.ret.sptk.many b0\t\t\tC\t\t\tB\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/m4-ccas",
    "content": "#!/bin/sh\n#\n# A helper script for Makeasm.am .asm.lo rule.\n\n# Copyright 2001 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\n# Usage: m4-ccas --m4=M4 CC ... file.asm ...\n#\n# Process file.asm with the given M4 plus any -D arguments, then\n# assemble with the given CC plus all arguments.\n#\n# The M4 command must be in a single --m4= argument, and will be split\n# on whitespace.  When CC is invoked file.asm is replaced with a\n# temporary .s file which is the M4 output.\n#\n# To allow parallel builds, the temp file name is based on the .asm\n# file name, which will be the output object filename for all uses we\n# put this script to.\n\nM4=\nCC=\nDEFS=\nASM=\nSEEN_O=no\n\nfor i in \"$@\"; do\n  case $i in\n    --m4=*)\n      M4=`echo \"$i\" | sed 's/^--m4=//'`\n      ;;\n    -D*)\n      DEFS=\"$DEFS $i\"\n      CC=\"$CC $i\"\n      ;;\n    *.asm)\n      if test -n \"$ASM\"; then\n        echo \"Only one .asm file permitted\"\n        exit 1\n      fi\n      BASENAME=`echo \"$i\" | sed -e 's/\\.asm$//' -e 's/^.*[\\\\/:]//'`\n      TMP=tmp-$BASENAME.s\n      ASM=$i\n      CC=\"$CC $TMP\"\n      ;;\n    -o)\n      SEEN_O=yes\n      CC=\"$CC $i\"\n      ;;\n    *)\n      CC=\"$CC $i\"\n      ;;\n  esac\ndone\n\nif test -z \"$M4\"; then\n  echo \"No --m4 specified\"\n  exit 1\nfi\n\nif test -z \"$ASM\"; then\n  echo \"No .asm specified\"\n  exit 1\nfi\n\n# Libtool adds it's own -o when sending output to .libs/foo.o, but not\n# when just wanting foo.o in the current directory.  We need an\n# explicit -o in both cases since we're assembling tmp-foo.s.\n#\nif test $SEEN_O = no; then\n  CC=\"$CC -o $BASENAME.o\"\nfi\n\necho \"$M4 $DEFS $ASM >$TMP\"\n$M4 $DEFS $ASM >$TMP || exit\n\necho \"$CC\"\n$CC || exit\n\n# Comment this out to preserve .s intermediates\nrm -f $TMP\n"
  },
  {
    "path": "mpn/mips32/add_n.asm",
    "content": "dnl  MIPS32 mpn_add_n -- Add two limb vectors of the same length > 0 and store\ndnl  sum in a third limb vector.\n\ndnl  Copyright 1995, 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC s1_ptr\t$5\nC s2_ptr\t$6\nC size\t\t$7\n\nASM_START()\nPROLOGUE(mpn_add_n)\n\n\tlw\t$10,0($5)\n\tlw\t$11,0($6)\n\n\taddiu\t$7,$7,-1\n\tand\t$9,$7,4-1\tC number of limbs in first loop\n\tbeq\t$9,$0,.L0\tC if multiple of 4 limbs, skip first loop\n\t move\t$2,$0\n\n\tsubu\t$7,$7,$9\n\n.Loop0:\taddiu\t$9,$9,-1\n\tlw\t$12,4($5)\n\taddu\t$11,$11,$2\n\tlw\t$13,4($6)\n\tsltu\t$8,$11,$2\n\taddu\t$11,$10,$11\n\tsltu\t$2,$11,$10\n\tsw\t$11,0($4)\n\tor\t$2,$2,$8\n\n\taddiu\t$5,$5,4\n\taddiu\t$6,$6,4\n\tmove\t$10,$12\n\tmove\t$11,$13\n\tbne\t$9,$0,.Loop0\n\t addiu\t$4,$4,4\n\n.L0:\tbeq\t$7,$0,.Lend\n\t nop\n\n.Loop:\taddiu\t$7,$7,-4\n\n\tlw\t$12,4($5)\n\taddu\t$11,$11,$2\n\tlw\t$13,4($6)\n\tsltu\t$8,$11,$2\n\taddu\t$11,$10,$11\n\tsltu\t$2,$11,$10\n\tsw\t$11,0($4)\n\tor\t$2,$2,$8\n\n\tlw\t$10,8($5)\n\taddu\t$13,$13,$2\n\tlw\t$11,8($6)\n\tsltu\t$8,$13,$2\n\taddu\t$13,$12,$13\n\tsltu\t$2,$13,$12\n\tsw\t$13,4($4)\n\tor\t$2,$2,$8\n\n\tlw\t$12,12($5)\n\taddu\t$11,$11,$2\n\tlw\t$13,12($6)\n\tsltu\t$8,$11,$2\n\taddu\t$11,$10,$11\n\tsltu\t$2,$11,$10\n\tsw\t$11,8($4)\n\tor\t$2,$2,$8\n\n\tlw\t$10,16($5)\n\taddu\t$13,$13,$2\n\tlw\t$11,16($6)\n\tsltu\t$8,$13,$2\n\taddu\t$13,$12,$13\n\tsltu\t$2,$13,$12\n\tsw\t$13,12($4)\n\tor\t$2,$2,$8\n\n\taddiu\t$5,$5,16\n\taddiu\t$6,$6,16\n\n\tbne\t$7,$0,.Loop\n\t addiu\t$4,$4,16\n\n.Lend:\taddu\t$11,$11,$2\n\tsltu\t$8,$11,$2\n\taddu\t$11,$10,$11\n\tsltu\t$2,$11,$10\n\tsw\t$11,0($4)\n\tj\t$31\n\tor\t$2,$2,$8\nEPILOGUE(mpn_add_n)\n"
  },
  {
    "path": "mpn/mips32/addmul_1.asm",
    "content": "dnl  MIPS32 mpn_addmul_1 -- Multiply a limb vector with a single limb and add\ndnl  the product to a second limb vector.\n\ndnl  Copyright 1992, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC s1_ptr\t$5\nC size\t\t$6\nC s2_limb\t$7\n\nASM_START()\nPROLOGUE(mpn_addmul_1)\n\nC feed-in phase 0\n\tlw\t$8,0($5)\n\nC feed-in phase 1\n\taddiu\t$5,$5,4\n\tmultu\t$8,$7\n\n\taddiu\t$6,$6,-1\n\tbeq\t$6,$0,$LC0\n\t move\t$2,$0\t\tC zero cy2\n\n\taddiu\t$6,$6,-1\n\tbeq\t$6,$0,$LC1\n\tlw\t$8,0($5)\tC load new s1 limb as early as possible\n\nLoop:\tlw\t$10,0($4)\n\tmflo\t$3\n\tmfhi\t$9\n\taddiu\t$5,$5,4\n\taddu\t$3,$3,$2\tC add old carry limb to low product limb\n\tmultu\t$8,$7\n\tlw\t$8,0($5)\tC load new s1 limb as early as possible\n\taddiu\t$6,$6,-1\tC decrement loop counter\n\tsltu\t$2,$3,$2\tC carry from previous addition -> $2\n\taddu\t$3,$10,$3\n\tsltu\t$10,$3,$10\n\taddu\t$2,$2,$10\n\tsw\t$3,0($4)\n\taddiu\t$4,$4,4\n\tbne\t$6,$0,Loop\n\t addu\t$2,$9,$2\tC add high product limb and carry from addition\n\nC wind-down phase 1\n$LC1:\tlw\t$10,0($4)\n\tmflo\t$3\n\tmfhi\t$9\n\taddu\t$3,$3,$2\n\tsltu\t$2,$3,$2\n\tmultu\t$8,$7\n\taddu\t$3,$10,$3\n\tsltu\t$10,$3,$10\n\taddu\t$2,$2,$10\n\tsw\t$3,0($4)\n\taddiu\t$4,$4,4\n\taddu\t$2,$9,$2\tC add high product limb and carry from addition\n\nC wind-down phase 0\n$LC0:\tlw\t$10,0($4)\n\tmflo\t$3\n\tmfhi\t$9\n\taddu\t$3,$3,$2\n\tsltu\t$2,$3,$2\n\taddu\t$3,$10,$3\n\tsltu\t$10,$3,$10\n\taddu\t$2,$2,$10\n\tsw\t$3,0($4)\n\tj\t$31\n\taddu\t$2,$9,$2\tC add high product limb and carry from addition\nEPILOGUE(mpn_addmul_1)\n"
  },
  {
    "path": "mpn/mips32/longlong_inc.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 , 2007 , 2008 , 2009 Free Software Foundation, Inc.\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#if defined (__GNUC__)\n\n#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)\n#define umul_ppmm(w1, w0, u, v) \\\n  do {                                                                  \\\n    UDItype __ll = (UDItype)(u) * (v);                                  \\\n    w1 = __ll >> 32;                                                    \\\n    w0 = __ll;                                                          \\\n  } while (0)\n#endif\n#if !defined (umul_ppmm) && (__GNUC__ > 2 || __GNUC_MINOR__ >= 7)\n#define umul_ppmm(w1, w0, u, v) \\\n  __asm__ (\"multu %2,%3\" : \"=l\" (w0), \"=h\" (w1) : \"d\" (u), \"d\" (v))\n#endif\n#if !defined (umul_ppmm)\n#define umul_ppmm(w1, w0, u, v) \\\n  __asm__ (\"multu %2,%3\\n\\tmflo %0\\n\\tmfhi %1\"\t\t\t\t\\\n\t   : \"=d\" (w0), \"=d\" (w1) : \"d\" (u), \"d\" (v))\n#endif\n\n#endif\n"
  },
  {
    "path": "mpn/mips32/lshift.asm",
    "content": "dnl  MIPS32 mpn_lshift -- Left shift.\n\ndnl  Copyright 1995, 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC src_ptr\t$5\nC size\t\t$6\nC cnt\t\t$7\n\nASM_START()\nPROLOGUE(mpn_lshift)\n\tsll\t$2,$6,2\n\taddu\t$5,$5,$2\tC make r5 point at end of src\n\tlw\t$10,-4($5)\tC load first limb\n\tsubu\t$13,$0,$7\n\taddu\t$4,$4,$2\tC make r4 point at end of res\n\taddiu\t$6,$6,-1\n\tand\t$9,$6,4-1\tC number of limbs in first loop\n\tbeq\t$9,$0,.L0\tC if multiple of 4 limbs, skip first loop\n\t srl\t$2,$10,$13\tC compute function result\n\n\tsubu\t$6,$6,$9\n\n.Loop0:\tlw\t$3,-8($5)\n\taddiu\t$4,$4,-4\n\taddiu\t$5,$5,-4\n\taddiu\t$9,$9,-1\n\tsll\t$11,$10,$7\n\tsrl\t$12,$3,$13\n\tmove\t$10,$3\n\tor\t$8,$11,$12\n\tbne\t$9,$0,.Loop0\n\t sw\t$8,0($4)\n\n.L0:\tbeq\t$6,$0,.Lend\n\t nop\n\n.Loop:\tlw\t$3,-8($5)\n\taddiu\t$4,$4,-16\n\taddiu\t$6,$6,-4\n\tsll\t$11,$10,$7\n\tsrl\t$12,$3,$13\n\n\tlw\t$10,-12($5)\n\tsll\t$14,$3,$7\n\tor\t$8,$11,$12\n\tsw\t$8,12($4)\n\tsrl\t$9,$10,$13\n\n\tlw\t$3,-16($5)\n\tsll\t$11,$10,$7\n\tor\t$8,$14,$9\n\tsw\t$8,8($4)\n\tsrl\t$12,$3,$13\n\n\tlw\t$10,-20($5)\n\tsll\t$14,$3,$7\n\tor\t$8,$11,$12\n\tsw\t$8,4($4)\n\tsrl\t$9,$10,$13\n\n\taddiu\t$5,$5,-16\n\tor\t$8,$14,$9\n\tbgtz\t$6,.Loop\n\t sw\t$8,0($4)\n\n.Lend:\tsll\t$8,$10,$7\n\tj\t$31\n\tsw\t$8,-4($4)\nEPILOGUE(mpn_lshift)\n"
  },
  {
    "path": "mpn/mips32/mips-defs.m4",
    "content": "divert(-1)\n\ndnl  m4 macros for MIPS assembly code (both 32-bit and 64-bit).\n\n\ndnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\ndnl  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\ndnl  MA 02110-1301, USA.\n\n\ndnl  Usage: ASM_START()\ndefine(`ASM_START',\nm4_assert_numargs(0)\n`\t.set noreorder\n\t.set noat\n\t.set nomacro')\n\ndnl  Usage: X(value)\ndefine(`X',\nm4_assert_numargs(1)\n`0x$1')\n\ndnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)\ndnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)\n\ndefine(`PROLOGUE_cpu',\nm4_assert_numargs(1)\n`\t.text\n\t.align\t4\n\t.globl\t$1\n\t.ent\t$1\n$1:')\n\ndefine(`EPILOGUE_cpu',\nm4_assert_numargs(1)\n`\t.end\t$1')\n\n\ndnl  Usage: r0 ... r31\ndnl         f0 ... f31\ndnl\ndnl  Map register names r0 to $0, and f0 to $f0, etc.\ndnl\ndnl  defreg() is used to protect the $ in $0 (otherwise it would represent a\ndnl  macro argument).  Double quoting is used to protect the f0 in $f0\ndnl  (otherwise it would be an infinite recursion).\n\nforloop(i,0,31,`defreg(`r'i,$i)')\nforloop(i,0,31,`deflit(`f'i,``$f''i)')\n\n\ndnl  Usage: ASM_END()\ndefine(`ASM_END',\nm4_assert_numargs(0)\n)\n\ndivert\n"
  },
  {
    "path": "mpn/mips32/mips.m4",
    "content": "divert(-1)\n\ndnl  m4 macros for MIPS assembly code.\n\n\ndnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\ndnl  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\ndnl  MA 02110-1301, USA.\n\n\ndnl  Usage: ASM_START()\ndefine(`ASM_START',\nm4_assert_numargs(0)\n`\t.set noreorder\n\t.set noat\n\t.set nomacro')\n\ndnl  Usage: X(value)\ndefine(`X',\nm4_assert_numargs(1)\n`0x$1')\n\ndnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)\ndnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)\n\ndefine(`PROLOGUE_cpu',\nm4_assert_numargs(1)\n`\t.text\n\t.align\t4\n\t.globl\t$1\n\t.ent\t$1\n$1:')\n\ndefine(`EPILOGUE_cpu',\nm4_assert_numargs(1)\n`\t.end\t$1')\n\n\ndnl  Usage: r0 ... r31\ndnl         f0 ... f31\ndnl\ndnl  Map register names r0 to $0, and f0 to $f0, etc.\ndnl\ndnl  defreg() is used to protect the $ in $0 (otherwise it would represent a\ndnl  macro argument).  Double quoting is used to protect the f0 in $f0\ndnl  (otherwise it would be an infinite recursion).\n\nforloop(i,0,31,`defreg(`r'i,$i)')\nforloop(i,0,31,`deflit(`f'i,``$f''i)')\n\n\ndnl  Usage: ASM_END()\ndefine(`ASM_END',\nm4_assert_numargs(0)\n)\n\ndivert\n"
  },
  {
    "path": "mpn/mips32/mul_1.asm",
    "content": "dnl  MIPS32 mpn_mul_1 -- Multiply a limb vector with a single limb and store\ndnl  the product in a second limb vector.\n\ndnl  Copyright 1992, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC s1_ptr\t$5\nC size\t\t$6\nC s2_limb\t$7\n\nASM_START()\nPROLOGUE(mpn_mul_1)\n\nC feed-in phase 0\n\tlw\t$8,0($5)\n\nC feed-in phase 1\n\taddiu\t$5,$5,4\n\tmultu\t$8,$7\n\n\taddiu\t$6,$6,-1\n\tbeq\t$6,$0,$LC0\n\t move\t$2,$0\t\tC zero cy2\n\n\taddiu\t$6,$6,-1\n\tbeq\t$6,$0,$LC1\n\tlw\t$8,0($5)\tC load new s1 limb as early as possible\n\nLoop:\tmflo\t$10\n\tmfhi\t$9\n\taddiu\t$5,$5,4\n\taddu\t$10,$10,$2\tC add old carry limb to low product limb\n\tmultu\t$8,$7\n\tlw\t$8,0($5)\tC load new s1 limb as early as possible\n\taddiu\t$6,$6,-1\tC decrement loop counter\n\tsltu\t$2,$10,$2\tC carry from previous addition -> $2\n\tsw\t$10,0($4)\n\taddiu\t$4,$4,4\n\tbne\t$6,$0,Loop\n\t addu\t$2,$9,$2\tC add high product limb and carry from addition\n\nC wind-down phase 1\n$LC1:\tmflo\t$10\n\tmfhi\t$9\n\taddu\t$10,$10,$2\n\tsltu\t$2,$10,$2\n\tmultu\t$8,$7\n\tsw\t$10,0($4)\n\taddiu\t$4,$4,4\n\taddu\t$2,$9,$2\tC add high product limb and carry from addition\n\nC wind-down phase 0\n$LC0:\tmflo\t$10\n\tmfhi\t$9\n\taddu\t$10,$10,$2\n\tsltu\t$2,$10,$2\n\tsw\t$10,0($4)\n\tj\t$31\n\taddu\t$2,$9,$2\tC add high product limb and carry from addition\nEPILOGUE(mpn_mul_1)\n"
  },
  {
    "path": "mpn/mips32/rshift.asm",
    "content": "dnl  MIPS32 mpn_rshift -- Right shift.\n\ndnl  Copyright 1995, 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC src_ptr\t$5\nC size\t\t$6\nC cnt\t\t$7\n\nASM_START()\nPROLOGUE(mpn_rshift)\n\tlw\t$10,0($5)\tC load first limb\n\tsubu\t$13,$0,$7\n\taddiu\t$6,$6,-1\n\tand\t$9,$6,4-1\tC number of limbs in first loop\n\tbeq\t$9,$0,.L0\tC if multiple of 4 limbs, skip first loop\n\t sll\t$2,$10,$13\tC compute function result\n\n\tsubu\t$6,$6,$9\n\n.Loop0:\tlw\t$3,4($5)\n\taddiu\t$4,$4,4\n\taddiu\t$5,$5,4\n\taddiu\t$9,$9,-1\n\tsrl\t$11,$10,$7\n\tsll\t$12,$3,$13\n\tmove\t$10,$3\n\tor\t$8,$11,$12\n\tbne\t$9,$0,.Loop0\n\t sw\t$8,-4($4)\n\n.L0:\tbeq\t$6,$0,.Lend\n\t nop\n\n.Loop:\tlw\t$3,4($5)\n\taddiu\t$4,$4,16\n\taddiu\t$6,$6,-4\n\tsrl\t$11,$10,$7\n\tsll\t$12,$3,$13\n\n\tlw\t$10,8($5)\n\tsrl\t$14,$3,$7\n\tor\t$8,$11,$12\n\tsw\t$8,-16($4)\n\tsll\t$9,$10,$13\n\n\tlw\t$3,12($5)\n\tsrl\t$11,$10,$7\n\tor\t$8,$14,$9\n\tsw\t$8,-12($4)\n\tsll\t$12,$3,$13\n\n\tlw\t$10,16($5)\n\tsrl\t$14,$3,$7\n\tor\t$8,$11,$12\n\tsw\t$8,-8($4)\n\tsll\t$9,$10,$13\n\n\taddiu\t$5,$5,16\n\tor\t$8,$14,$9\n\tbgtz\t$6,.Loop\n\t sw\t$8,-4($4)\n\n.Lend:\tsrl\t$8,$10,$7\n\tj\t$31\n\tsw\t$8,0($4)\nEPILOGUE(mpn_rshift)\n"
  },
  {
    "path": "mpn/mips32/sub_n.asm",
    "content": "dnl  MIPS32 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and\ndnl  store difference in a third limb vector.\n\ndnl  Copyright 1995, 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC s1_ptr\t$5\nC s2_ptr\t$6\nC size\t\t$7\n\nASM_START()\nPROLOGUE(mpn_sub_n)\n\tlw\t$10,0($5)\n\tlw\t$11,0($6)\n\n\taddiu\t$7,$7,-1\n\tand\t$9,$7,4-1\tC number of limbs in first loop\n\tbeq\t$9,$0,.L0\tC if multiple of 4 limbs, skip first loop\n\t move\t$2,$0\n\n\tsubu\t$7,$7,$9\n\n.Loop0:\taddiu\t$9,$9,-1\n\tlw\t$12,4($5)\n\taddu\t$11,$11,$2\n\tlw\t$13,4($6)\n\tsltu\t$8,$11,$2\n\tsubu\t$11,$10,$11\n\tsltu\t$2,$10,$11\n\tsw\t$11,0($4)\n\tor\t$2,$2,$8\n\n\taddiu\t$5,$5,4\n\taddiu\t$6,$6,4\n\tmove\t$10,$12\n\tmove\t$11,$13\n\tbne\t$9,$0,.Loop0\n\t addiu\t$4,$4,4\n\n.L0:\tbeq\t$7,$0,.Lend\n\t nop\n\n.Loop:\taddiu\t$7,$7,-4\n\n\tlw\t$12,4($5)\n\taddu\t$11,$11,$2\n\tlw\t$13,4($6)\n\tsltu\t$8,$11,$2\n\tsubu\t$11,$10,$11\n\tsltu\t$2,$10,$11\n\tsw\t$11,0($4)\n\tor\t$2,$2,$8\n\n\tlw\t$10,8($5)\n\taddu\t$13,$13,$2\n\tlw\t$11,8($6)\n\tsltu\t$8,$13,$2\n\tsubu\t$13,$12,$13\n\tsltu\t$2,$12,$13\n\tsw\t$13,4($4)\n\tor\t$2,$2,$8\n\n\tlw\t$12,12($5)\n\taddu\t$11,$11,$2\n\tlw\t$13,12($6)\n\tsltu\t$8,$11,$2\n\tsubu\t$11,$10,$11\n\tsltu\t$2,$10,$11\n\tsw\t$11,8($4)\n\tor\t$2,$2,$8\n\n\tlw\t$10,16($5)\n\taddu\t$13,$13,$2\n\tlw\t$11,16($6)\n\tsltu\t$8,$13,$2\n\tsubu\t$13,$12,$13\n\tsltu\t$2,$12,$13\n\tsw\t$13,12($4)\n\tor\t$2,$2,$8\n\n\taddiu\t$5,$5,16\n\taddiu\t$6,$6,16\n\n\tbne\t$7,$0,.Loop\n\t addiu\t$4,$4,16\n\n.Lend:\taddu\t$11,$11,$2\n\tsltu\t$8,$11,$2\n\tsubu\t$11,$10,$11\n\tsltu\t$2,$10,$11\n\tsw\t$11,0($4)\n\tj\t$31\n\tor\t$2,$2,$8\nEPILOGUE(mpn_sub_n)\n"
  },
  {
    "path": "mpn/mips32/submul_1.asm",
    "content": "dnl  MIPS32 mpn_submul_1 -- Multiply a limb vector with a single limb and\ndnl  subtract the product from a second limb vector.\n\ndnl  Copyright 1992, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC s1_ptr\t$5\nC size\t\t$6\nC s2_limb\t$7\n\nASM_START()\nPROLOGUE(mpn_submul_1)\n\nC feed-in phase 0\n\tlw\t$8,0($5)\n\nC feed-in phase 1\n\taddiu\t$5,$5,4\n\tmultu\t$8,$7\n\n\taddiu\t$6,$6,-1\n\tbeq\t$6,$0,$LC0\n\t move\t$2,$0\t\tC zero cy2\n\n\taddiu\t$6,$6,-1\n\tbeq\t$6,$0,$LC1\n\tlw\t$8,0($5)\tC load new s1 limb as early as possible\n\nLoop:\tlw\t$10,0($4)\n\tmflo\t$3\n\tmfhi\t$9\n\taddiu\t$5,$5,4\n\taddu\t$3,$3,$2\tC add old carry limb to low product limb\n\tmultu\t$8,$7\n\tlw\t$8,0($5)\tC load new s1 limb as early as possible\n\taddiu\t$6,$6,-1\tC decrement loop counter\n\tsltu\t$2,$3,$2\tC carry from previous addition -> $2\n\tsubu\t$3,$10,$3\n\tsgtu\t$10,$3,$10\n\taddu\t$2,$2,$10\n\tsw\t$3,0($4)\n\taddiu\t$4,$4,4\n\tbne\t$6,$0,Loop\n\t addu\t$2,$9,$2\tC add high product limb and carry from addition\n\nC wind-down phase 1\n$LC1:\tlw\t$10,0($4)\n\tmflo\t$3\n\tmfhi\t$9\n\taddu\t$3,$3,$2\n\tsltu\t$2,$3,$2\n\tmultu\t$8,$7\n\tsubu\t$3,$10,$3\n\tsgtu\t$10,$3,$10\n\taddu\t$2,$2,$10\n\tsw\t$3,0($4)\n\taddiu\t$4,$4,4\n\taddu\t$2,$9,$2\tC add high product limb and carry from addition\n\nC wind-down phase 0\n$LC0:\tlw\t$10,0($4)\n\tmflo\t$3\n\tmfhi\t$9\n\taddu\t$3,$3,$2\n\tsltu\t$2,$3,$2\n\tsubu\t$3,$10,$3\n\tsgtu\t$10,$3,$10\n\taddu\t$2,$2,$10\n\tsw\t$3,0($4)\n\tj\t$31\n\taddu\t$2,$9,$2\tC add high product limb and carry from addition\nEPILOGUE(mpn_submul_1)\n"
  },
  {
    "path": "mpn/mips32/umul.asm",
    "content": "dnl  MIPS32 umul_ppmm -- longlong.h support.\n\ndnl  Copyright 1999, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC plp   $4\nC u     $5\nC v     $6\n\nASM_START()\nPROLOGUE(mpn_umul_ppmm)\n\tmultu\t$5,$6\n\tmflo\t$3\n\tmfhi\t$2\n\tj\t$31\n\tsw\t$3,0($4)\nEPILOGUE(mpn_umul_ppmm)\n"
  },
  {
    "path": "mpn/mips64/README",
    "content": "Copyright 1996 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n\n\n\n\nThis directory contains mpn functions optimized for MIPS3.  Example of\nprocessors that implement MIPS3 are R4000, R4400, R4600, R4700, and R8000.\n\nRELEVANT OPTIMIZATION ISSUES\n\n1. On the R4000 and R4400, branches, both the plain and the \"likely\" ones,\n   take 3 cycles to execute.  (The fastest possible loop will take 4 cycles,\n   because of the delay insn.)\n\n   On the R4600, branches takes a single cycle\n\n   On the R8000, branches often take no noticable cycles, as they are\n   executed in a separate function unit..\n\n2. The R4000 and R4400 have a load latency of 4 cycles.\n\n3. On the R4000 and R4400, multiplies take a data-dependent number of\n   cycles, contrary to the SGI documentation.  There seem to be 3 or 4\n   possible latencies.\n\n4. The R1x000 processors can issue one floating-point operation, two integer\n   operations, and one memory operation per cycle.  The FPU has very short\n   latencies, while the integer multiply unit is non-pipelined.  We should\n   therefore write fp based mpn_Xmul_1.\n\nSTATUS\n\nGood...\n"
  },
  {
    "path": "mpn/mips64/add_n.asm",
    "content": "dnl  MIPS64 mpn_add_n -- Add two limb vectors of the same length > 0 and store\ndnl  sum in a third limb vector.\n\ndnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC s1_ptr\t$5\nC s2_ptr\t$6\nC size\t\t$7\n\nASM_START()\nPROLOGUE(mpn_add_n)\n\tld\t$10,0($5)\n\tld\t$11,0($6)\n\n\tdaddiu\t$7,$7,-1\n\tand\t$9,$7,4-1\tC number of limbs in first loop\n\tbeq\t$9,$0,.L0\tC if multiple of 4 limbs, skip first loop\n\t move\t$2,$0\n\n\tdsubu\t$7,$7,$9\n\n.Loop0:\tdaddiu\t$9,$9,-1\n\tld\t$12,8($5)\n\tdaddu\t$11,$11,$2\n\tld\t$13,8($6)\n\tsltu\t$8,$11,$2\n\tdaddu\t$11,$10,$11\n\tsltu\t$2,$11,$10\n\tsd\t$11,0($4)\n\tor\t$2,$2,$8\n\n\tdaddiu\t$5,$5,8\n\tdaddiu\t$6,$6,8\n\tmove\t$10,$12\n\tmove\t$11,$13\n\tbne\t$9,$0,.Loop0\n\t daddiu\t$4,$4,8\n\n.L0:\tbeq\t$7,$0,.Lend\n\t nop\n\n.Loop:\tdaddiu\t$7,$7,-4\n\n\tld\t$12,8($5)\n\tdaddu\t$11,$11,$10\n\tld\t$13,8($6)\n\tsltu\t$8,$11,$10\n\tdaddu\t$11,$11,$2\n\tsltu\t$2,$11,$2\n\tsd\t$11,0($4)\n\tor\t$2,$2,$8\n\n\tld\t$10,16($5)\n\tdaddu\t$13,$13,$12\n\tld\t$11,16($6)\n\tsltu\t$8,$13,$12\n\tdaddu\t$13,$13,$2\n\tsltu\t$2,$13,$2\n\tsd\t$13,8($4)\n\tor\t$2,$2,$8\n\n\tld\t$12,24($5)\n\tdaddu\t$11,$11,$10\n\tld\t$13,24($6)\n\tsltu\t$8,$11,$10\n\tdaddu\t$11,$11,$2\n\tsltu\t$2,$11,$2\n\tsd\t$11,16($4)\n\tor\t$2,$2,$8\n\n\tld\t$10,32($5)\n\tdaddu\t$13,$13,$12\n\tld\t$11,32($6)\n\tsltu\t$8,$13,$12\n\tdaddu\t$13,$13,$2\n\tsltu\t$2,$13,$2\n\tsd\t$13,24($4)\n\tor\t$2,$2,$8\n\n\tdaddiu\t$5,$5,32\n\tdaddiu\t$6,$6,32\n\n\tbne\t$7,$0,.Loop\n\t daddiu\t$4,$4,32\n\n.Lend:\tdaddu\t$11,$11,$2\n\tsltu\t$8,$11,$2\n\tdaddu\t$11,$10,$11\n\tsltu\t$2,$11,$10\n\tsd\t$11,0($4)\n\tj\t$31\n\tor\t$2,$2,$8\nEPILOGUE(mpn_add_n)\n"
  },
  {
    "path": "mpn/mips64/addmul_1.asm",
    "content": "dnl  MIPS64 mpn_addmul_1 -- Multiply a limb vector with a single limb and add\ndnl  the product to a second limb vector.\n\ndnl  Copyright 1992, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,\ndnl  Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC s1_ptr\t$5\nC size\t\t$6\nC s2_limb\t$7\n\nASM_START()\nPROLOGUE(mpn_addmul_1)\n\nC feed-in phase 0\n\tld\t$8,0($5)\n\nC feed-in phase 1\n\tdaddiu\t$5,$5,8\n\tdmultu\t$8,$7\n\n\tdaddiu\t$6,$6,-1\n\tbeq\t$6,$0,$LC0\n\t move\t$2,$0\t\tC zero cy2\n\n\tdaddiu\t$6,$6,-1\n\tbeq\t$6,$0,$LC1\n\tld\t$8,0($5)\tC load new s1 limb as early as possible\n\nLoop:\tld\t$10,0($4)\n\tmflo\t$3\n\tmfhi\t$9\n\tdaddiu\t$5,$5,8\n\tdaddu\t$3,$3,$2\tC add old carry limb to low product limb\n\tdmultu\t$8,$7\n\tld\t$8,0($5)\tC load new s1 limb as early as possible\n\tdaddiu\t$6,$6,-1\tC decrement loop counter\n\tsltu\t$2,$3,$2\tC carry from previous addition -> $2\n\tdaddu\t$3,$10,$3\n\tsltu\t$10,$3,$10\n\tdaddu\t$2,$2,$10\n\tsd\t$3,0($4)\n\tdaddiu\t$4,$4,8\n\tbne\t$6,$0,Loop\n\t daddu\t$2,$9,$2\tC add high product limb and carry from addition\n\nC wind-down phase 1\n$LC1:\tld\t$10,0($4)\n\tmflo\t$3\n\tmfhi\t$9\n\tdaddu\t$3,$3,$2\n\tsltu\t$2,$3,$2\n\tdmultu\t$8,$7\n\tdaddu\t$3,$10,$3\n\tsltu\t$10,$3,$10\n\tdaddu\t$2,$2,$10\n\tsd\t$3,0($4)\n\tdaddiu\t$4,$4,8\n\tdaddu\t$2,$9,$2\tC add high product limb and carry from addition\n\nC wind-down phase 0\n$LC0:\tld\t$10,0($4)\n\tmflo\t$3\n\tmfhi\t$9\n\tdaddu\t$3,$3,$2\n\tsltu\t$2,$3,$2\n\tdaddu\t$3,$10,$3\n\tsltu\t$10,$3,$10\n\tdaddu\t$2,$2,$10\n\tsd\t$3,0($4)\n\tj\t$31\n\tdaddu\t$2,$9,$2\tC add high product limb and carry from addition\nEPILOGUE(mpn_addmul_1)\n"
  },
  {
    "path": "mpn/mips64/divrem_1.asm",
    "content": "dnl  MIPS64 mpn_divrem_1 -- Divide an mpn number by an unnormalized limb.\n\ndnl  Copyright 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC qp  = r4\nC qxn = r5\nC up  = r6\nC n   = r7\nC vl  = r8\n\nC\t cycles/limb\nC R4000:     ??\nC R1x000:    35\n\nC This was generated by gcc, then the code was manually edited.  Lots of things\nC could be streamlined.  It would probably be a good idea to merge the loops\nC for normalized and unnormalized divisor, since the shifting stuff is done for\nC free in parallel with other operations.\n\nASM_START()\nPROLOGUE(mpn_divrem_1)\n\tdsubu\t$sp,$sp,32\n\tsd\t$28,16($sp)\n\tlui\t$1,%hi(%neg(%gp_rel(__gmpn_divrem_1)))\n\taddiu\t$1,$1,%lo(%neg(%gp_rel(__gmpn_divrem_1)))\n\tdaddu\t$gp,$1,$25\n\tmove\t$14,$4\n\tmove\t$15,$5\n\tdaddu\t$4,$7,$15\n\tbne\t$4,$0,.L176\n\tmove\t$13,$0\n\tb\t.L490\n\tmove\t$2,$0\n.L491:\n\tb\t.L229\n\tdli\t$5,-1\n.L176:\n\tdsll\t$2,$4,3\n\tdaddu\t$2,$2,-8\n\tdli\t$3,0x8000000000000000\n\tand\t$3,$8,$3\n\tbeq\t$3,$0,.L177\n\tdaddu\t$14,$14,$2\n\tbeq\t$7,$0,.L494\n\tdsll\t$2,$8,1\n\tdsll\t$2,$7,3\n\tdaddu\t$2,$2,$6\n\tld\t$13,-8($2)\n\tdaddu\t$7,$7,-1\n\tsltu\t$2,$13,$8\n\txori\t$2,$2,0x1\n\tdsll\t$2,$2,32\n\tdsra\t$2,$2,32\n\tdsll\t$2,$2,32\n\tdsrl\t$2,$2,32\n\tsd\t$2,0($14)\n\tdaddu\t$14,$14,-8\n\tdsubu\t$2,$0,$2\n\tand\t$2,$8,$2\n\tdsubu\t$13,$13,$2\n\tdsll\t$2,$8,1\n.L494:\n\tbeq\t$2,$0,.L491\n\tdsrl\t$9,$8,32\n\tdsubu\t$4,$0,$8\n\tddivu\t$0,$4,$9\n\tdli\t$2,0xffffffff\n\tand\t$12,$8,$2\n\tmflo\t$5\n\tmove\t$11,$5\n\tdmult\t$11,$9\n\tmflo\t$3\n\tnop\n\tnop\n\tdmult\t$11,$12\n\tmflo\t$10\n\tdli\t$2,0x100000000\n\tdsubu\t$4,$4,$3\n\tdmult\t$4,$2\n\tmflo\t$4\n\tsltu\t$3,$4,$10\n\tbeql\t$3,$0,.L495\n\tdsubu\t$4,$4,$10\n\tdaddu\t$4,$4,$8\n\tsltu\t$2,$4,$8\n\tbne\t$2,$0,.L248\n\tdsubu\t$11,$11,1\n\tsltu\t$2,$4,$10\n\tbeql\t$2,$0,.L495\n\tdsubu\t$4,$4,$10\n\tdsubu\t$11,$11,1\n\tdaddu\t$4,$4,$8\n.L248:\n\tdsubu\t$4,$4,$10\n.L495:\n\tddivu\t$0,$4,$9\n\tmflo\t$2\n\tmove\t$5,$2\n\tdmult\t$5,$9\n\tmflo\t$3\n\tnop\n\tnop\n\tdmult\t$5,$12\n\tmflo\t$10\n\tdli\t$2,0x100000000\n\tdsubu\t$4,$4,$3\n\tdmult\t$4,$2\n\tmflo\t$4\n\tsltu\t$3,$4,$10\n\tbeq\t$3,$0,.L504\n\tdaddu\t$4,$4,$8\n\tsltu\t$2,$4,$8\n\tbne\t$2,$0,.L251\n\tdsubu\t$5,$5,1\n\tsltu\t$2,$4,$10\n\tbnel\t$2,$0,.L251\n\tdsubu\t$5,$5,1\n.L251:\n\tdli\t$2,0x100000000\n.L504:\n\tdmult\t$11,$2\n\tmflo\t$2\n\tor\t$5,$2,$5\n.L229:\n\tdaddu\t$7,$7,-1\n\tbltz\t$7,.L257\n\tdaddu\t$25,$15,-1\n\tdsll\t$2,$7,3\n\tdaddu\t$6,$2,$6\n.Loop1:\n\tld\t$11,0($6)\n\tdmultu\t$13,$5\n\tmfhi\t$10\n\tdaddu\t$10,$10,$13\n\tnop\n\tdmultu\t$10,$8\n\tmflo\t$2\n\tmfhi\t$9\n\tdsubu\t$2,$11,$2\n\tdsubu\t$3,$13,$9\n\tsltu\t$9,$11,$2\n\tdsubu\t$9,$3,$9\n\tbeq\t$9,$0,.L271\n\tmove\t$4,$2\n\tdsubu\t$2,$4,$8\n\tsltu\t$3,$4,$2\n\tmove\t$4,$2\n\tbeq\t$9,$3,.L271\n\tdaddu\t$10,$10,1\n\tdsubu\t$4,$4,$8\n\tdaddu\t$10,$10,1\n.L271:\tsltu\t$2,$4,$8\n\tbne\t$2,$0,.L496\n\tmove\t$13,$4\n\tdsubu\t$4,$4,$8\n\tdaddu\t$10,$10,1\n\tmove\t$13,$4\n.L496:\tsd\t$10,0($14)\n\tdaddu\t$14,$14,-8\n\tdaddu\t$7,$7,-1\n\tbgez\t$7,.Loop1\n\tdaddu\t$6,$6,-8\n.L257:\n\tmove\t$7,$25\n\tbltz\t$7,.L490\n\tmove\t$2,$13\n.Loop2:\n\tdmultu\t$13,$5\n\tmfhi\t$9\n\tdaddu\t$9,$9,$13\n\tnop\n\tdmultu\t$9,$8\n\tmflo\t$2\n\tmfhi\t$6\n\tdsubu\t$2,$0,$2\n\tdsubu\t$3,$13,$6\n\tsltu\t$6,$0,$2\n\tdsubu\t$6,$3,$6\n\tbeq\t$6,$0,.L295\n\tmove\t$4,$2\n\tdsubu\t$2,$4,$8\n\tsltu\t$3,$4,$2\n\tmove\t$4,$2\n\tbeq\t$6,$3,.L295\n\tdaddu\t$9,$9,1\n\tdsubu\t$4,$4,$8\n\tdaddu\t$9,$9,1\n.L295:\tsltu\t$2,$4,$8\n\tbne\t$2,$0,.L497\n\tmove\t$13,$4\n\tdsubu\t$4,$4,$8\n\tdaddu\t$9,$9,1\n\tmove\t$13,$4\n.L497:\tsd\t$9,0($14)\n\tdaddu\t$7,$7,-1\n\tbgez\t$7,.Loop2\n\tdaddu\t$14,$14,-8\n\tb\t.L490\n\tmove\t$2,$13\n.L177:\n\tbeq\t$7,$0,.L308\n\tdsll\t$2,$7,3\n\tdaddu\t$2,$2,$6\n\tld\t$12,-8($2)\n\tsltu\t$3,$12,$8\n\tbeq\t$3,$0,.L308\n\tdaddu\t$4,$4,-1\n\tmove\t$13,$12\n\tsd\t$0,0($14)\n\tbne\t$4,$0,.L307\n\tdaddu\t$14,$14,-8\n\tb\t.L490\n\tmove\t$2,$13\n.L492:\n\tb\t.L395\n\tdli\t$5,-1\n.L307:\n\tdaddu\t$7,$7,-1\n.L308:\n\tdli\t$5,0x38\n\tdsrl\t$2,$8,56\n\tandi\t$2,$2,0xff\n\tdla\t$3,__gmpn_clz_tab\n\tbne\t$2,$0,.L321\n\tdaddu\t$25,$15,-1\n\tdsubu\t$5,$5,8\n.L499:\n\tbeql\t$5,$0,.L498\n\tdaddu\t$5,$5,1\n\tdsll\t$2,$5,32\n\tdsra\t$2,$2,32\n\tdsrl\t$2,$8,$2\n\tandi\t$2,$2,0xff\n\tbeql\t$2,$0,.L499\n\tdsubu\t$5,$5,8\n.L321:\n\tdaddu\t$5,$5,1\n.L498:\n\tdsll\t$2,$5,32\n\tdsra\t$2,$2,32\n\tdsrl\t$2,$8,$2\n\tdsll\t$2,$2,32\n\tdsra\t$2,$2,32\n\tdaddu\t$2,$2,$3\n\tlbu\t$4,0($2)\n\tdli\t$3,0x41\n\tdsubu\t$3,$3,$4\n\tdsubu\t$3,$3,$5\n\tdsll\t$24,$3,32\n\tdsra\t$24,$24,32\n\tdsll\t$8,$8,$24\n\tdsll\t$2,$8,1\n\tbeq\t$2,$0,.L492\n\tdsll\t$13,$13,$24\n\tdsrl\t$9,$8,32\n\tdsubu\t$4,$0,$8\n\tddivu\t$0,$4,$9\n\tdli\t$2,0xffffffff\n\tand\t$12,$8,$2\n\tmflo\t$5\n\tmove\t$11,$5\n\tdmult\t$11,$9\n\tmflo\t$3\n\tnop\n\tnop\n\tdmult\t$11,$12\n\tmflo\t$10\n\tdli\t$2,0x100000000\n\tdsubu\t$4,$4,$3\n\tdmult\t$4,$2\n\tmflo\t$4\n\tsltu\t$3,$4,$10\n\tbeql\t$3,$0,.L500\n\tdsubu\t$4,$4,$10\n\tdaddu\t$4,$4,$8\n\tsltu\t$2,$4,$8\n\tbne\t$2,$0,.L414\n\tdsubu\t$11,$11,1\n\tsltu\t$2,$4,$10\n\tbeql\t$2,$0,.L500\n\tdsubu\t$4,$4,$10\n\tdsubu\t$11,$11,1\n\tdaddu\t$4,$4,$8\n.L414:\n\tdsubu\t$4,$4,$10\n.L500:\n\tddivu\t$0,$4,$9\n\tmflo\t$2\n\tmove\t$5,$2\n\tdmult\t$5,$9\n\tmflo\t$3\n\tnop\n\tnop\n\tdmult\t$5,$12\n\tmflo\t$10\n\tdli\t$2,0x100000000\n\tdsubu\t$4,$4,$3\n\tdmult\t$4,$2\n\tmflo\t$4\n\tsltu\t$3,$4,$10\n\tbeq\t$3,$0,.L505\n\tdaddu\t$4,$4,$8\n\tsltu\t$2,$4,$8\n\tbne\t$2,$0,.L417\n\tdsubu\t$5,$5,1\n\tsltu\t$2,$4,$10\n\tbnel\t$2,$0,.L417\n\tdsubu\t$5,$5,1\n.L417:\n\tdli\t$2,0x100000000\n.L505:\n\tdmult\t$11,$2\n\tmflo\t$2\n\tor\t$5,$2,$5\n.L395:\n\tbeq\t$7,$0,.L422\n\tdsll\t$2,$7,3\n\tdaddu\t$2,$2,$6\n\tld\t$12,-8($2)\n\tdaddu\t$7,$7,-2\n\tli\t$2,64\n\tsubu\t$2,$2,$24\n\tdsrl\t$3,$12,$2\n\tbltz\t$7,.L424\n\tor\t$13,$13,$3\n\tmove\t$15,$2\n\tdsll\t$2,$7,3\n\tdaddu\t$6,$2,$6\n.Loop3:\n\tld\t$11,0($6)\n\tdmultu\t$13,$5\n\tmfhi\t$9\n\tdaddu\t$9,$9,$13\n\tnop\n\tdmultu\t$9,$8\n\tmflo\t$4\n\tmfhi\t$10\n\tdsll\t$3,$12,$24\n\tdsrl\t$2,$11,$15\n\tor\t$3,$3,$2\n\tdsubu\t$4,$3,$4\n\tdsubu\t$2,$13,$10\n\tsltu\t$10,$3,$4\n\tdsubu\t$10,$2,$10\n\tbeq\t$10,$0,.L438\n\tdsubu\t$2,$4,$8\n\tsltu\t$3,$4,$2\n\tmove\t$4,$2\n\tbeq\t$10,$3,.L438\n\tdaddu\t$9,$9,1\n\tdsubu\t$4,$4,$8\n\tdaddu\t$9,$9,1\n.L438:\tsltu\t$2,$4,$8\n\tbne\t$2,$0,.L501\n\tmove\t$13,$4\n\tdsubu\t$4,$4,$8\n\tdaddu\t$9,$9,1\n\tmove\t$13,$4\n.L501:\tsd\t$9,0($14)\n\tdaddu\t$14,$14,-8\n\tmove\t$12,$11\n\tdaddu\t$7,$7,-1\n\tbgez\t$7,.Loop3\n\tdaddu\t$6,$6,-8\n.L424:\n\tdmultu\t$13,$5\n\tmfhi\t$7\n\tdaddu\t$7,$7,$13\n\tnop\n\tdmultu\t$7,$8\n\tmflo\t$2\n\tmfhi\t$6\n\tdsll\t$3,$12,$24\n\tdsubu\t$2,$3,$2\n\tdsubu\t$4,$13,$6\n\tsltu\t$6,$3,$2\n\tdsubu\t$6,$4,$6\n\tbeq\t$6,$0,.L458\n\tmove\t$4,$2\n\tdsubu\t$2,$4,$8\n\tsltu\t$3,$4,$2\n\tmove\t$4,$2\n\tbeq\t$6,$3,.L458\n\tdaddu\t$7,$7,1\n\tdsubu\t$4,$4,$8\n\tdaddu\t$7,$7,1\n.L458:\n\tsltu\t$2,$4,$8\n\tbne\t$2,$0,.L502\n\tmove\t$13,$4\n\tdsubu\t$4,$4,$8\n\tdaddu\t$7,$7,1\n\tmove\t$13,$4\n.L502:\n\tsd\t$7,0($14)\n\tdaddu\t$14,$14,-8\n.L422:\n\tmove\t$7,$25\n\tbltz\t$7,.L490\n\tdsrl\t$2,$13,$24\n.Loop4:\n\tdmultu\t$13,$5\n\tmfhi\t$9\n\tdaddu\t$9,$9,$13\n\tnop\n\tdmultu\t$9,$8\n\tmflo\t$2\n\tmfhi\t$6\n\tdsubu\t$2,$0,$2\n\tdsubu\t$3,$13,$6\n\tsltu\t$6,$0,$2\n\tdsubu\t$6,$3,$6\n\tbeq\t$6,$0,.L481\n\tmove\t$4,$2\n\tdsubu\t$2,$4,$8\n\tsltu\t$3,$4,$2\n\tmove\t$4,$2\n\tbeq\t$6,$3,.L481\n\tdaddu\t$9,$9,1\n\tdsubu\t$4,$4,$8\n\tdaddu\t$9,$9,1\n.L481:\tsltu\t$2,$4,$8\n\tbne\t$2,$0,.L503\n\tmove\t$13,$4\n\tdsubu\t$4,$4,$8\n\tdaddu\t$9,$9,1\n\tmove\t$13,$4\n.L503:\tsd\t$9,0($14)\n\tdaddu\t$7,$7,-1\n\tbgez\t$7,.Loop4\n\tdaddu\t$14,$14,-8\n\tdsrl\t$2,$13,$24\n.L490:\n\tld\t$28,16($sp)\n\tj\t$31\n\tdaddu\t$sp,$sp,32\nEPILOGUE(mpn_divrem_1)\n"
  },
  {
    "path": "mpn/mips64/longlong_inc.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 , 2007 , 2008 , 2009 Free Software Foundation, Inc.\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#if defined (__GNUC__)\n#if __mips >= 3\n#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)\n#define umul_ppmm(w1, w0, u, v) \\\n  do {                                                                  \\\n    typedef unsigned int __ll_UTItype __attribute__((mode(TI)));        \\\n    __ll_UTItype __ll = (__ll_UTItype)(u) * (v);                        \\\n    w1 = __ll >> 64;                                                    \\\n    w0 = __ll;                                                          \\\n  } while (0)\n#endif\n#if !defined (umul_ppmm) && (__GNUC__ > 2 || __GNUC_MINOR__ >= 7)\n#define umul_ppmm(w1, w0, u, v) \\\n  __asm__ (\"dmultu %2,%3\" : \"=l\" (w0), \"=h\" (w1) : \"d\" (u), \"d\" (v))\n#endif\n#if !defined (umul_ppmm)\n#define umul_ppmm(w1, w0, u, v) \\\n  __asm__ (\"dmultu %2,%3\\n\\tmflo %0\\n\\tmfhi %1\"\t\t\t\t\\\n\t   : \"=d\" (w0), \"=d\" (w1) : \"d\" (u), \"d\" (v))\n#endif\n#endif\n#endif\n"
  },
  {
    "path": "mpn/mips64/lshift.asm",
    "content": "dnl  MIPS64 mpn_lshift -- Left shift.\n\ndnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC src_ptr\t$5\nC size\t\t$6\nC cnt\t\t$7\n\nASM_START()\nPROLOGUE(mpn_lshift)\n\tdsll\t$2,$6,3\n\tdaddu\t$5,$5,$2\tC make r5 point at end of src\n\tld\t$10,-8($5)\tC load first limb\n\tdsubu\t$13,$0,$7\n\tdaddu\t$4,$4,$2\tC make r4 point at end of res\n\tdaddiu\t$6,$6,-1\n\tand\t$9,$6,4-1\tC number of limbs in first loop\n\tbeq\t$9,$0,.L0\tC if multiple of 4 limbs, skip first loop\n\t dsrl\t$2,$10,$13\tC compute function result\n\n\tdsubu\t$6,$6,$9\n\n.Loop0:\tld\t$3,-16($5)\n\tdaddiu\t$4,$4,-8\n\tdaddiu\t$5,$5,-8\n\tdaddiu\t$9,$9,-1\n\tdsll\t$11,$10,$7\n\tdsrl\t$12,$3,$13\n\tmove\t$10,$3\n\tor\t$8,$11,$12\n\tbne\t$9,$0,.Loop0\n\t sd\t$8,0($4)\n\n.L0:\tbeq\t$6,$0,.Lend\n\t nop\n\n.Loop:\tld\t$3,-16($5)\n\tdaddiu\t$4,$4,-32\n\tdaddiu\t$6,$6,-4\n\tdsll\t$11,$10,$7\n\tdsrl\t$12,$3,$13\n\n\tld\t$10,-24($5)\n\tdsll\t$14,$3,$7\n\tor\t$8,$11,$12\n\tsd\t$8,24($4)\n\tdsrl\t$9,$10,$13\n\n\tld\t$3,-32($5)\n\tdsll\t$11,$10,$7\n\tor\t$8,$14,$9\n\tsd\t$8,16($4)\n\tdsrl\t$12,$3,$13\n\n\tld\t$10,-40($5)\n\tdsll\t$14,$3,$7\n\tor\t$8,$11,$12\n\tsd\t$8,8($4)\n\tdsrl\t$9,$10,$13\n\n\tdaddiu\t$5,$5,-32\n\tor\t$8,$14,$9\n\tbgtz\t$6,.Loop\n\t sd\t$8,0($4)\n\n.Lend:\tdsll\t$8,$10,$7\n\tj\t$31\n\tsd\t$8,-8($4)\nEPILOGUE(mpn_lshift)\n"
  },
  {
    "path": "mpn/mips64/mul_1.asm",
    "content": "dnl  MIPS64 mpn_mul_1 -- Multiply a limb vector with a single limb and store\ndnl  the product in a second limb vector.\n\ndnl  Copyright 1992, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,\ndnl  Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC s1_ptr\t$5\nC size\t\t$6\nC s2_limb\t$7\n\nASM_START()\nPROLOGUE(mpn_mul_1)\n\nC feed-in phase 0\n\tld\t$8,0($5)\n\nC feed-in phase 1\n\tdaddiu\t$5,$5,8\n\tdmultu\t$8,$7\n\n\tdaddiu\t$6,$6,-1\n\tbeq\t$6,$0,$LC0\n\t move\t$2,$0\t\tC zero cy2\n\n\tdaddiu\t$6,$6,-1\n\tbeq\t$6,$0,$LC1\n\tld\t$8,0($5)\tC load new s1 limb as early as possible\n\nLoop:\tnop\n\tmflo\t$10\n\tmfhi\t$9\n\tdaddiu\t$5,$5,8\n\tdaddu\t$10,$10,$2\tC add old carry limb to low product limb\n\tdmultu\t$8,$7\n\tld\t$8,0($5)\tC load new s1 limb as early as possible\n\tdaddiu\t$6,$6,-1\tC decrement loop counter\n\tsltu\t$2,$10,$2\tC carry from previous addition -> $2\n\tnop\n\tnop\n\tsd\t$10,0($4)\n\tdaddiu\t$4,$4,8\n\tbne\t$6,$0,Loop\n\t daddu\t$2,$9,$2\tC add high product limb and carry from addition\n\nC wind-down phase 1\n$LC1:\tmflo\t$10\n\tmfhi\t$9\n\tdaddu\t$10,$10,$2\n\tsltu\t$2,$10,$2\n\tdmultu\t$8,$7\n\tsd\t$10,0($4)\n\tdaddiu\t$4,$4,8\n\tdaddu\t$2,$9,$2\tC add high product limb and carry from addition\n\nC wind-down phase 0\n$LC0:\tmflo\t$10\n\tmfhi\t$9\n\tdaddu\t$10,$10,$2\n\tsltu\t$2,$10,$2\n\tsd\t$10,0($4)\n\tj\t$31\n\tdaddu\t$2,$9,$2\tC add high product limb and carry from addition\nEPILOGUE(mpn_mul_1)\n"
  },
  {
    "path": "mpn/mips64/rshift.asm",
    "content": "dnl  MIPS64 mpn_rshift -- Right shift.\n\ndnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC src_ptr\t$5\nC size\t\t$6\nC cnt\t\t$7\n\nASM_START()\nPROLOGUE(mpn_rshift)\n\tld\t$10,0($5)\tC load first limb\n\tdsubu\t$13,$0,$7\n\tdaddiu\t$6,$6,-1\n\tand\t$9,$6,4-1\tC number of limbs in first loop\n\tbeq\t$9,$0,.L0\tC if multiple of 4 limbs, skip first loop\n\t dsll\t$2,$10,$13\tC compute function result\n\n\tdsubu\t$6,$6,$9\n\n.Loop0:\tld\t$3,8($5)\n\tdaddiu\t$4,$4,8\n\tdaddiu\t$5,$5,8\n\tdaddiu\t$9,$9,-1\n\tdsrl\t$11,$10,$7\n\tdsll\t$12,$3,$13\n\tmove\t$10,$3\n\tor\t$8,$11,$12\n\tbne\t$9,$0,.Loop0\n\t sd\t$8,-8($4)\n\n.L0:\tbeq\t$6,$0,.Lend\n\t nop\n\n.Loop:\tld\t$3,8($5)\n\tdaddiu\t$4,$4,32\n\tdaddiu\t$6,$6,-4\n\tdsrl\t$11,$10,$7\n\tdsll\t$12,$3,$13\n\n\tld\t$10,16($5)\n\tdsrl\t$14,$3,$7\n\tor\t$8,$11,$12\n\tsd\t$8,-32($4)\n\tdsll\t$9,$10,$13\n\n\tld\t$3,24($5)\n\tdsrl\t$11,$10,$7\n\tor\t$8,$14,$9\n\tsd\t$8,-24($4)\n\tdsll\t$12,$3,$13\n\n\tld\t$10,32($5)\n\tdsrl\t$14,$3,$7\n\tor\t$8,$11,$12\n\tsd\t$8,-16($4)\n\tdsll\t$9,$10,$13\n\n\tdaddiu\t$5,$5,32\n\tor\t$8,$14,$9\n\tbgtz\t$6,.Loop\n\t sd\t$8,-8($4)\n\n.Lend:\tdsrl\t$8,$10,$7\n\tj\t$31\n\tsd\t$8,0($4)\nEPILOGUE(mpn_rshift)\n"
  },
  {
    "path": "mpn/mips64/sqr_diagonal.asm",
    "content": "dnl  MIPS64 mpn_sqr_diagonal.\n\ndnl  Copyright 2001, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ndnl  INPUT PARAMETERS\ndnl  rp\t\t$4\ndnl  up\t\t$5\ndnl  n\t\t$6\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_sqr_diagonal)\n\tld\tr8,0(r5)\n\tdaddiu\tr6,r6,-2\n\tdmultu\tr8,r8\n\tbltz\tr6,$Lend1\n\tnop\n\tld\tr8,8(r5)\n\tbeq\tr6,r0,$Lend2\n\tnop\n\n$Loop:\tmflo\tr10\n\tmfhi\tr9\n\tdaddiu\tr6,r6,-1\n\tsd\tr10,0(r4)\n\tsd\tr9,8(r4)\n\tdmultu\tr8,r8\n\tld\tr8,16(r5)\n\tdaddiu\tr5,r5,8\n\tbne\tr6,r0,$Loop\n\tdaddiu\tr4,r4,16\n\n$Lend2: mflo\tr10\n\tmfhi\tr9\n\tsd\tr10,0(r4)\n\tsd\tr9,8(r4)\n\tdmultu\tr8,r8\n\tmflo\tr10\n\tmfhi\tr9\n\tsd\tr10,16(r4)\n\tj\tr31\n\tsd\tr9,24(r4)\n\n$Lend1: mflo\tr10\n\tmfhi\tr9\n\tsd\tr10,0(r4)\n\tj\tr31\n\tsd\tr9,8(r4)\nEPILOGUE(mpn_sqr_diagonal)\n"
  },
  {
    "path": "mpn/mips64/sub_n.asm",
    "content": "dnl  MIPS64 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and\ndnl  store difference in a third limb vector.\n\ndnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC s1_ptr\t$5\nC s2_ptr\t$6\nC size\t\t$7\n\nASM_START()\nPROLOGUE(mpn_sub_n)\n\tld\t$10,0($5)\n\tld\t$11,0($6)\n\n\tdaddiu\t$7,$7,-1\n\tand\t$9,$7,4-1\tC number of limbs in first loop\n\tbeq\t$9,$0,.L0\tC if multiple of 4 limbs, skip first loop\n\t move\t$2,$0\n\n\tdsubu\t$7,$7,$9\n\n.Loop0:\tdaddiu\t$9,$9,-1\n\tld\t$12,8($5)\n\tdaddu\t$11,$11,$2\n\tld\t$13,8($6)\n\tsltu\t$8,$11,$2\n\tdsubu\t$11,$10,$11\n\tsltu\t$2,$10,$11\n\tsd\t$11,0($4)\n\tor\t$2,$2,$8\n\n\tdaddiu\t$5,$5,8\n\tdaddiu\t$6,$6,8\n\tmove\t$10,$12\n\tmove\t$11,$13\n\tbne\t$9,$0,.Loop0\n\t daddiu\t$4,$4,8\n\n.L0:\tbeq\t$7,$0,.Lend\n\t nop\n\n.Loop:\tdaddiu\t$7,$7,-4\n\n\tld\t$12,8($5)\n\tdsubu\t$11,$10,$11\n\tld\t$13,8($6)\n\tsltu\t$8,$10,$11\n\tdsubu\t$14,$11,$2\n\tsltu\t$2,$11,$14\n\tsd\t$14,0($4)\n\tor\t$2,$2,$8\n\n\tld\t$10,16($5)\n\tdsubu\t$13,$12,$13\n\tld\t$11,16($6)\n\tsltu\t$8,$12,$13\n\tdsubu\t$14,$13,$2\n\tsltu\t$2,$13,$14\n\tsd\t$14,8($4)\n\tor\t$2,$2,$8\n\n\tld\t$12,24($5)\n\tdsubu\t$11,$10,$11\n\tld\t$13,24($6)\n\tsltu\t$8,$10,$11\n\tdsubu\t$14,$11,$2\n\tsltu\t$2,$11,$14\n\tsd\t$14,16($4)\n\tor\t$2,$2,$8\n\n\tld\t$10,32($5)\n\tdsubu\t$13,$12,$13\n\tld\t$11,32($6)\n\tsltu\t$8,$12,$13\n\tdsubu\t$14,$13,$2\n\tsltu\t$2,$13,$14\n\tsd\t$14,24($4)\n\tor\t$2,$2,$8\n\n\tdaddiu\t$5,$5,32\n\tdaddiu\t$6,$6,32\n\n\tbne\t$7,$0,.Loop\n\t daddiu\t$4,$4,32\n\n.Lend:\tdaddu\t$11,$11,$2\n\tsltu\t$8,$11,$2\n\tdsubu\t$11,$10,$11\n\tsltu\t$2,$10,$11\n\tsd\t$11,0($4)\n\tj\t$31\n\tor\t$2,$2,$8\nEPILOGUE(mpn_sub_n)\n"
  },
  {
    "path": "mpn/mips64/submul_1.asm",
    "content": "dnl  MIPS64 mpn_submul_1 -- Multiply a limb vector with a single limb and\ndnl  subtract the product from a second limb vector.\n\ndnl  Copyright 1992, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,\ndnl  Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t$4\nC s1_ptr\t$5\nC size\t\t$6\nC s2_limb\t$7\n\nASM_START()\nPROLOGUE(mpn_submul_1)\n\nC feed-in phase 0\n\tld\t$8,0($5)\n\nC feed-in phase 1\n\tdaddiu\t$5,$5,8\n\tdmultu\t$8,$7\n\n\tdaddiu\t$6,$6,-1\n\tbeq\t$6,$0,$LC0\n\t move\t$2,$0\t\tC zero cy2\n\n\tdaddiu\t$6,$6,-1\n\tbeq\t$6,$0,$LC1\n\tld\t$8,0($5)\tC load new s1 limb as early as possible\n\nLoop:\tld\t$10,0($4)\n\tmflo\t$3\n\tmfhi\t$9\n\tdaddiu\t$5,$5,8\n\tdaddu\t$3,$3,$2\tC add old carry limb to low product limb\n\tdmultu\t$8,$7\n\tld\t$8,0($5)\tC load new s1 limb as early as possible\n\tdaddiu\t$6,$6,-1\tC decrement loop counter\n\tsltu\t$2,$3,$2\tC carry from previous addition -> $2\n\tdsubu\t$3,$10,$3\n\tsgtu\t$10,$3,$10\n\tdaddu\t$2,$2,$10\n\tsd\t$3,0($4)\n\tdaddiu\t$4,$4,8\n\tbne\t$6,$0,Loop\n\t daddu\t$2,$9,$2\tC add high product limb and carry from addition\n\nC wind-down phase 1\n$LC1:\tld\t$10,0($4)\n\tmflo\t$3\n\tmfhi\t$9\n\tdaddu\t$3,$3,$2\n\tsltu\t$2,$3,$2\n\tdmultu\t$8,$7\n\tdsubu\t$3,$10,$3\n\tsgtu\t$10,$3,$10\n\tdaddu\t$2,$2,$10\n\tsd\t$3,0($4)\n\tdaddiu\t$4,$4,8\n\tdaddu\t$2,$9,$2\tC add high product limb and carry from addition\n\nC wind-down phase 0\n$LC0:\tld\t$10,0($4)\n\tmflo\t$3\n\tmfhi\t$9\n\tdaddu\t$3,$3,$2\n\tsltu\t$2,$3,$2\n\tdsubu\t$3,$10,$3\n\tsgtu\t$10,$3,$10\n\tdaddu\t$2,$2,$10\n\tsd\t$3,0($4)\n\tj\t$31\n\tdaddu\t$2,$9,$2\tC add high product limb and carry from addition\nEPILOGUE(mpn_submul_1)\n"
  },
  {
    "path": "mpn/mips64/umul.asm",
    "content": "dnl  MIPS64 umul_ppmm -- longlong.h support.\n\ndnl  Copyright 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC plp   $4\nC u     $5\nC v     $6\n\nASM_START()\nPROLOGUE(mpn_umul_ppmm)\n\tdmultu\t$5,$6\n\tmflo\t$3\n\tmfhi\t$2\n\tj\t$31\n\tsd\t$3,0($4)\nEPILOGUE(mpn_umul_ppmm)\n"
  },
  {
    "path": "mpn/powerpc32/750/com_n.asm",
    "content": "dnl  PowerPC 750 mpn_com_n -- mpn bitwise one's complement\n\ndnl  Copyright 2002, 2003 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC                cycles/limb\nC 603e:            ?\nC 604e:            3.0\nC 75x (G3):        2.0\nC 7400,7410 (G4):  2.0\nC 744x,745x (G4+): 3.0\n\nC void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);\nC\nC This loop form is necessary for the claimed speed.\n\nASM_START()\nPROLOGUE(mpn_com_n)\n\n\tC r3\tdst\n\tC r4\tsrc\n\tC r5\tsize\n\n\tmtctr\tr5\t\tC size\n\tlwz\tr5, 0(r4)\tC src low limb\n\n\tsub\tr4, r4, r3\tC src-dst\n\tsubi\tr3, r3, 4\tC dst-4\n\n\taddi\tr4, r4, 8\tC src-dst+8\n\tbdz\tL(one)\n\nL(top):\n\tC r3\t&dst[i-1]\n\tC r4\tsrc-dst\n\tC r5\tsrc[i]\n\tC r6\tscratch\n\n\tnot\tr6, r5\t\tC ~src[i]\n\tlwzx\tr5, r4,r3\tC src[i+1]\n\n\tstwu\tr6, 4(r3)\tC dst[i]\n\tbdnz\tL(top)\n\nL(one):\n\tnot\tr6, r5\n\n\tstw\tr6, 4(r3)\tC dst[size-1]\n\tblr\n\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc32/750/lshift.asm",
    "content": "dnl  PowerPC 750 mpn_lshift -- mpn left shift.\n\ndnl  Copyright 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC       cycles/limb\nC 750:     3.0\nC 7400:    3.0\n\n\nC mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,\nC                       unsigned shift);\nC\nC This code is the same per-limb speed as mpn/powerpc32/lshift.asm, but\nC smaller and saving about 30 or so cycles of overhead.\n\nASM_START()\nPROLOGUE(mpn_lshift)\n\n\tC r3\tdst\n\tC r4\tsrc\n\tC r5\tsize\n\tC r6\tshift\n\n\tmtctr\tr5\t\tC size\n\tslwi\tr5, r5, 2\tC 4*size\n\n\tsubfic\tr7, r6, 32\tC 32-shift\n\tadd\tr4, r4, r5\tC &src[size]\n\n\tadd\tr5, r3, r5\tC &dst[size]\n\tlwz\tr8, -4(r4)\tC src[size-1]\n\tbdz\tL(one)\n\n\tlwzu\tr9, -8(r4)\tC src[size-2]\n\n\tsrw\tr3, r8, r7\tC return value\n\tslw\tr8, r8, r6\tC src[size-1] << shift\n\tbdz\tL(two)\n\n\nL(top):\n\tC r3\treturn value\n\tC r4\tsrc, incrementing\n\tC r5\tdst, incrementing\n\tC r6\tlshift\n\tC r7\t32-shift\n\tC r8\tsrc[i+1] << shift\n\tC r9\tsrc[i]\n\tC r10\n\n\tlwzu\tr10, -4(r4)\n\tsrw\tr11, r9, r7\n\n\tor\tr8, r8, r11\n\tstwu\tr8, -4(r5)\n\n\tslw\tr8, r9, r6\n\tbdz\tL(odd)\n\n\tC r8\tsrc[i+1] << shift\n\tC r9\n\tC r10\tsrc[i]\n\n\tlwzu\tr9, -4(r4)\n\tsrw\tr11, r10, r7\n\n\tor\tr8, r8, r11\n\tstwu\tr8, -4(r5)\n\n\tslw\tr8, r10, r6\n\tbdnz\tL(top)\n\n\nL(two):\n\tC r3\treturn value\n\tC r4\n\tC r5\t&dst[2]\n\tC r6\tshift\n\tC r7\t32-shift\n\tC r8\tsrc[1] << shift\n\tC r9\tsrc[0]\n\tC r10\n\n\tsrw\tr11, r9, r7\n\tslw\tr12, r9, r6\tC src[0] << shift\n\n\tor\tr8, r8, r11\n\tstw\tr12, -8(r5)\tC dst[0]\n\n\tstw\tr8, -4(r5)\tC dst[1]\n\tblr\n\n\nL(odd):\n\tC r3\treturn value\n\tC r4\n\tC r5\t&dst[2]\n\tC r6\tshift\n\tC r7\t32-shift\n\tC r8\tsrc[1] << shift\n\tC r9\n\tC r10\tsrc[0]\n\n\tsrw\tr11, r10, r7\n\tslw\tr12, r10, r6\n\n\tor\tr8, r8, r11\n\tstw\tr12, -8(r5)\tC dst[0]\n\n\tstw\tr8, -4(r5)\tC dst[1]\n\tblr\n\n\nL(one):\n\tC r5\t&dst[1]\n\tC r6\tshift\n\tC r7\t32-shift\n\tC r8\tsrc[0]\n\n\tsrw\tr3, r8, r7\tC return value\n\tslw\tr8, r8, r6\tC src[size-1] << shift\n\n\tstw\tr8, -4(r5)\tC dst[0]\n\tblr\n\nEPILOGUE(mpn_lshift)\n"
  },
  {
    "path": "mpn/powerpc32/750/rshift.asm",
    "content": "dnl  PowerPC 750 mpn_rshift -- mpn right shift.\n\ndnl  Copyright 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC       cycles/limb\nC 750:     3.0\nC 7400:    3.0\n\n\nC mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,\nC                       unsigned shift);\nC\nC This code is the same per-limb speed as mpn/powerpc32/rshift.asm, but\nC smaller and saving about 30 or so cycles of overhead.\n\nASM_START()\nPROLOGUE(mpn_rshift)\n\n\tC r3\tdst\n\tC r4\tsrc\n\tC r5\tsize\n\tC r6\tshift\n\n\tmtctr\tr5\t\tC size\n\tlwz\tr8, 0(r4)\tC src[0]\n\n\tsubfic\tr7, r6, 32\tC 32-shift\n\taddi\tr5, r3, -4\tC dst-4\n\n\tslw\tr3, r8, r7\tC return value\n\tbdz\tL(one)\n\n\tlwzu\tr9, 4(r4)\tC src[1]\n\tsrw\tr8, r8, r6\tC src[0] >> shift\n\tbdz\tL(two)\n\n\nL(top):\n\tC r3\treturn value\n\tC r4\tsrc, incrementing\n\tC r5\tdst, incrementing\n\tC r6\tshift\n\tC r7\t32-shift\n\tC r8\tsrc[i-1] >> shift\n\tC r9\tsrc[i]\n\tC r10\n\n\tlwzu\tr10, 4(r4)\n\tslw\tr11, r9, r7\n\n\tor\tr8, r8, r11\n\tstwu\tr8, 4(r5)\n\n\tsrw\tr8, r9, r6\n\tbdz\tL(odd)\n\n\tC r8\tsrc[i-1] >> shift\n\tC r9\n\tC r10\tsrc[i]\n\n\tlwzu\tr9, 4(r4)\n\tslw\tr11, r10, r7\n\n\tor\tr8, r8, r11\n\tstwu\tr8, 4(r5)\n\n\tsrw\tr8, r10, r6\n\tbdnz\tL(top)\n\n\nL(two):\n\tC r3\treturn value\n\tC r4\n\tC r5\t&dst[size-2]\n\tC r6\tshift\n\tC r7\t32-shift\n\tC r8\tsrc[size-2] >> shift\n\tC r9\tsrc[size-1]\n\tC r10\n\n\tslw\tr11, r9, r7\n\tsrw\tr12, r9, r6\tC src[size-1] >> shift\n\n\tor\tr8, r8, r11\n\tstw\tr12, 8(r5)\tC dst[size-1]\n\n\tstw\tr8, 4(r5)\tC dst[size-2]\n\tblr\n\n\nL(odd):\n\tC r3\treturn value\n\tC r4\n\tC r5\t&dst[size-2]\n\tC r6\tshift\n\tC r7\t32-shift\n\tC r8\tsrc[size-2] >> shift\n\tC r9\n\tC r10\tsrc[size-1]\n\n\tslw\tr11, r10, r7\n\tsrw\tr12, r10, r6\n\n\tor\tr8, r8, r11\n\tstw\tr12, 8(r5)\tC dst[size-1]\n\n\tstw\tr8, 4(r5)\tC dst[size-2]\n\tblr\n\n\nL(one):\n\tC r3\treturn value\n\tC r4\n\tC r5\tdst-4\n\tC r6\tshift\n\tC r7\n\tC r8\tsrc[0]\n\n\tsrw\tr8, r8, r6\n\n\tstw\tr8, 4(r5)\tC dst[0]\n\tblr\n\nEPILOGUE(mpn_rshift)\n"
  },
  {
    "path": "mpn/powerpc32/README",
    "content": "Copyright 2002, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n\n\n\n\n                    POWERPC 32-BIT MPN SUBROUTINES\n\n\nThis directory contains mpn functions for various 32-bit PowerPC chips.\n\n\nCODE ORGANIZATION\n\n\tdirectory\t  used for\n\t================================================\n\tpowerpc           generic, 604, 604e, 744x, 745x\n\tpowerpc/750       740, 750, 7400, 7410\n\n\nThe top-level powerpc directory is currently mostly aimed at 604/604e but\nshould be reasonable on all powerpcs.\n\n\n\nSTATUS\n\nThe code is quite well optimized for the 604e, other chips have had less\nattention.\n\nAltivec SIMD available in 74xx might hold some promise, but unfortunately\nGMP only guarantees 32-bit data alignment, so there's lots of fiddling\naround with partial operations at the start and end of limb vectors.  A\n128-bit limb would be a novel idea, but is unlikely to be practical, since\nit would have to work with ordinary +, -, * etc in the C code.\n\nAlso, Altivec isn't very well suited for the GMP multiplication needs.\nUsing floating-point based multiplication has much better better performance\npotential for all current powerpcs, both the ones with slow integer multiply\nunits (603, 740, 750, 7400, 7410) and those with fast (604, 604e, 744x,\n745x).  This is because all powerpcs do some level of pipelining in the FPU:\n\n603 and 750 can sustain one fmadd every 2nd cycle.\n604 and 604e can sustain one fmadd per cycle.\n7400 and 7410 can sustain 3 fmadd in 4 cycles.\n744x and 745x can sustain 4 fmadd in 5 cycles.\n\n\n\nREGISTER NAMES\n\nThe normal powerpc convention is to give registers as plain numbers, like\n\"mtctr 6\", but on Apple MacOS X (powerpc*-*-rhapsody* and\npowerpc*-*-darwin*) the assembler demands an \"r\" like \"mtctr r6\".  Note\nhowever when register 0 in an instruction means a literal zero the \"r\" is\nomitted, for instance \"lwzx r6,0,r7\".\n\nThe GMP code uses the \"r\" forms, powerpc-defs.m4 transforms them to plain\nnumbers according to what GMP_ASM_POWERPC_R_REGISTERS finds is needed.\n(Note that this style isn't fully general, as the identifier r4 and the\nregister r4 will not be distinguishable on some systems.  However, this is\nnot a problem for the limited GMP assembly usage.)\n\n\n\nGLOBAL REFERENCES\n\nLinux non-PIC\n\tlis\t9, __gmp_modlimb_invert_table@ha\n\trlwinm\t11, 5, 31, 25, 31\n\tla\t9, __gmp_modlimb_invert_table@l(9)\n\tlbzx\t11, 9, 11\n\nLinux PIC (FIXME)\n.LCL0:\n\t.long .LCTOC1-.LCF0\n\tbcl\t20, 31, .LCF0\n.LCF0:\n\tmflr\t30\n\tlwz\t7, .LCL0-.LCF0(30)\n\tadd\t30, 7, 30\n\tlwz\t11, .LC0-.LCTOC1(30)\n\trlwinm\t3, 5, 31, 25, 31\n\tlbzx\t7, 11, 3\n\nAIX (always PIC)\nLC..0:\n\t.tc __gmp_modlimb_invert_table[TC],__gmp_modlimb_invert_table[RW]\n\tlwz\t9, LC..0(2)\n\trlwinm\t0, 5, 31, 25, 31\n\tlbzx\t0, 9, 0\n\nDarwin (non-PIC)\n\tlis\tr2, ha16(___gmp_modlimb_invert_table)\n\trlwinm\tr9, r5, 31, 25, 31\n\tla\tr2, lo16(___gmp_modlimb_invert_table)(r2)\n\tlbzx\tr0, r2, r9\nDarwin (PIC)\n\tmflr\tr0\n\tbcl\t20, 31, L0001$pb\nL0001$pb:\n\tmflr\tr7\n\tmtlr\tr0\n\taddis\tr2, r7, ha16(L___gmp_modlimb_invert_table$non_lazy_ptr-L0001$pb)\n\trlwinm\tr9, r5, 31, 25, 31\n\tlwz\tr2, lo16(L___gmp_modlimb_invert_table$non_lazy_ptr-L0001$pb)(r2)\n\tlbzx\tr0, r2, r9\n------\n\t.non_lazy_symbol_pointer\nL___gmp_modlimb_invert_table$non_lazy_ptr:\n\t.indirect_symbol ___gmp_modlimb_invert_table\n\t.long\t0\n\t.subsections_via_symbols\n\n\nFor GNU/Linux and Darwin, we might want to duplicate __gmp_modlimb_invert_table\ninto the text section in this file.  We should thus be able to reach it like\nthis:\n\n\tblr\tL0\nL0:\tmflr\tr2\n\trlwinm\tr9, r5, 31, 25, 31\n\taddi\tr9, r9, lo16(local_modlimb_table-L0)\n\tlbzx\tr0, r2, r9\n\n\n\nREFERENCES\n\nPowerPC Microprocessor Family: The Programming Environments for 32-bit\nMicroprocessors, IBM document G522-0290-01, 2000.\n\nPowerPC 604e RISC Microprocessor User's Manual with Supplement for PowerPC\n604 Microprocessor, IBM document G552-0330-00, Motorola document\nMPC604EUM/AD, 1998.\n\nMPC7400 RISC Microprocessor User's Manual, Motorola document MPC7400UM/D,\nrev 0, 3/2000.\n\nThe above are available online from\n\n\thttp://chips.ibm.com/techlib/products/powerpc/manuals\n\thttp://www.mot.com/PowerPC\n\n\n\n----------------\nLocal variables:\nmode: text\nfill-column: 76\nEnd:\n"
  },
  {
    "path": "mpn/powerpc32/add_n.asm",
    "content": "dnl  PowerPC-32 mpn_add_n -- add limb vectors.\n\ndnl  Copyright 2002, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                cycles/limb\nC 603e:            ?\nC 604e:            3.25\nC 75x (G3):        3.5\nC 7400,7410 (G4):  3.5\nC 744x,745x (G4+): 4.25\nC power4/ppc970:   2.0\nC power5:          2.5\n\nC INPUT PARAMETERS\nC rp\tr3\nC s1p\tr4\nC s2p\tr5\nC n\tr6\nC cy\tr7\n\nASM_START()\nPROLOGUE(mpn_add_nc)\n\taddic\tr0,r7,-1\tC set hw cy from cy argument\n\tcmpwi\tcr0,r6,15\tC more than 15 limbs?\n\tble\tL(com)\t\tC branch if <= 15 limbs\n\tb\tL(BIG)\nEPILOGUE(mpn_add_nc)\nPROLOGUE(mpn_add_n)\n\taddic\tr0,r0,0\t\tC clear hw cy\n\tcmpwi\tcr0,r6,15\tC more than 15 limbs?\n\tbgt\tL(BIG)\t\tC branch if > 15 limbs\n\nL(com):\tmtctr\tr6\t\tC copy size into CTR\n\taddi\tr3,r3,-4\tC offset rp, it's updated before it's used\n\tlwz\tr0,0(r4)\tC load s1 limb\n\tlwz\tr7,0(r5)\tC load s2 limb\n\tadde\tr10,r7,r0\n\tbdz\tL(endS)\nL(loopS):\n\tlwzu\tr0,4(r4)\tC load s1 limb\n\tlwzu\tr7,4(r5)\tC load s2 limb\n\tstwu\tr10,4(r3)\tC store result limb\n\tadde\tr10,r7,r0\n\tbdnz\tL(loopS)\nL(endS):\n\tstwu\tr10,4(r3)\tC store result limb\n\tli\tr3,0\n\taddze\tr3,r3\n\tblr\n\nL(BIG):\n\tstmw\tr30,-8(r1)\tC should avoid this for small sizes!\n\tandi.\tr12,r6,3\n\tmtctr\tr12\t\tC copy size into CTR\n\taddi\tr4,r4,-4\n\taddi\tr5,r5,-4\n\taddi\tr3,r3,-4\n\tbeq\tL(multiple_of_4)\n\tlwzu\tr0,4(r4)\tC load s1 limb\n\tlwzu\tr7,4(r5)\tC load s2 limb\n\tadde\tr10,r7,r0\n\tbdz\tL(end0)\nL(loop0):\n\tlwzu\tr0,4(r4)\tC load s1 limb\n\tlwzu\tr7,4(r5)\tC load s2 limb\n\tstwu\tr10,4(r3)\tC store result limb\n\tadde\tr10,r7,r0\n\tbdnz\tL(loop0)\nL(end0):\n\tstwu\tr10,4(r3)\tC store result limb\nL(multiple_of_4):\n\tsrwi\tr6,r6,2\n\tmtctr\tr6\t\tC copy size into CTR\n\n\tlwz\tr0,4(r4)\tC load s1 limb\n\tlwz\tr7,4(r5)\tC load s2 limb\n\tlwz\tr8,8(r4)\tC load s1 limb\n\tlwz\tr9,8(r5)\tC load s2 limb\n\tlwz\tr10,12(r4)\tC load s1 limb\n\tlwz\tr11,12(r5)\tC load s2 limb\n\tlwzu\tr12,16(r4)\tC load s1 limb\n\tadde\tr31,r7,r0\tC add limbs with cy, set cy\n\tlwzu\tr6,16(r5)\tC load s2 limb\n\tbdz\tL(enda)\n\nL(loop):\n\tlwz\tr0,4(r4)\tC load s1 limb\n\tadde\tr30,r9,r8\tC add limbs with cy, set cy\n\tlwz\tr7,4(r5)\tC load s2 limb\n\tstw\tr31,4(r3)\tC store result limb\n\tlwz\tr8,8(r4)\tC load s1 limb\n\tadde\tr31,r11,r10\tC add limbs with cy, set cy\n\tlwz\tr9,8(r5)\tC load s2 limb\n\tstw\tr30,8(r3)\tC store result limb\n\tlwz\tr10,12(r4)\tC load s1 limb\n\tadde\tr30,r6,r12\tC add limbs with cy, set cy\n\tlwz\tr11,12(r5)\tC load s2 limb\n\tstw\tr31,12(r3)\tC store result limb\n\tlwzu\tr12,16(r4)\tC load s1 limb\n\tadde\tr31,r7,r0\tC add limbs with cy, set cy\n\tstwu\tr30,16(r3)\tC store result limb\n\tlwzu\tr6,16(r5)\tC load s2 limb\n\tbdnz\tL(loop)\t\tC decrement CTR and loop back\nL(enda):\n\tadde\tr30,r9,r8\tC add limbs with cy, set cy\n\tstw\tr31,4(r3)\tC store result limb\n\tadde\tr31,r11,r10\tC add limbs with cy, set cy\n\tstw\tr30,8(r3)\tC store result limb\n\tadde\tr30,r6,r12\tC add limbs with cy, set cy\n\tstw\tr31,12(r3)\tC store result limb\n\tstw\tr30,16(r3)\tC store result limb\nL(end):\n\tli\tr3,0\n\taddze\tr3,r3\n\tlmw\tr30,-8(r1)\n\tblr\nEPILOGUE(mpn_add_n)\n"
  },
  {
    "path": "mpn/powerpc32/addlsh1_n.asm",
    "content": "dnl  PowerPC-32 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)\n\ndnl  Copyright 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                cycles/limb\nC 603e:            ?\nC 604e:            4.0\nC 75x (G3):        5.0\nC 7400,7410 (G4):  5.0\nC 744x,745x (G4+): 5.0\nC power4/ppc970:   4.25\nC power5:          5.0\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC vp\tr5\nC n\tr6\n\ndefine(`rp',`r3')\ndefine(`up',`r4')\ndefine(`vp',`r5')\n\ndefine(`s0',`r6')\ndefine(`s1',`r7')\ndefine(`u0',`r8')\ndefine(`v0',`r10')\ndefine(`v1',`r11')\n\nASM_START()\nPROLOGUE(mpn_addlsh1_n)\n\tmtctr\tr6\t\tC copy n in ctr\n\taddic\tr31, r31, 0\tC clear cy\n\n\tlwz\tv0, 0(vp)\tC load v limb\n\tlwz\tu0, 0(up)\tC load u limb\n\taddi\tup, up, -4\tC update up\n\taddi\trp, rp, -4\tC update rp\n\tslwi\ts1, v0, 1\n\tbdz\t.Lend\t\tC If done, skip loop\n\n.Loop:\tlwz\tv1, 4(vp)\tC load v limb\n\tadde\ts1, s1, u0\tC add limbs with cy, set cy\n\tsrwi\ts0, v0, 31\tC shift down previous v limb\n\tstw\ts1, 4(rp)\tC store result limb\n\tlwzu\tu0, 8(up)\tC load u limb and update up\n\trlwimi\ts0, v1, 1, 0,30\tC left shift v limb and merge with prev v limb\n\n\tbdz\t.Lexit\t\tC decrement ctr and exit if done\n\n\tlwzu\tv0, 8(vp)\tC load v limb and update vp\n\tadde\ts0, s0, u0\tC add limbs with cy, set cy\n\tsrwi\ts1, v1, 31\tC shift down previous v limb\n\tstwu\ts0, 8(rp)\tC store result limb and update rp\n\tlwz\tu0, 4(up)\tC load u limb\n\trlwimi\ts1, v0, 1, 0,30\tC left shift v limb and merge with prev v limb\n\n\tbdnz\t.Loop\t\tC decrement ctr and loop back\n\n.Lend:\tadde\tr7, s1, u0\n\tsrwi\tr4, v0, 31\n\tstw\tr7, 4(rp)\tC store last result limb\n\taddze\tr3, r4\n\tblr\n.Lexit:\tadde\tr7, s0, u0\n\tsrwi\tr4, v1, 31\n\tstw\tr7, 8(rp)\tC store last result limb\n\taddze\tr3, r4\n\tblr\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc32/addmul_1.asm",
    "content": "dnl  PowerPC-32 mpn_addmul_1 -- Multiply a limb vector with a limb and add the\ndnl  result to a second limb vector.\n\ndnl  Copyright 1995, 1997, 1998, 2000, 2001, 2002, 2003, 2005 Free Software\ndnl  Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                cycles/limb\nC 603e:            ?\nC 604e:            6.75\nC 75x (G3):        8.7-14.3\nC 7400,7410 (G4):  8.7-14.3\nC 744x,745x (G4+): 9.5\nC power4/ppc970:   6.25\nC power5:          6.25\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC n\tr5\nC vl\tr6\n\nC This is optimized for the PPC604.  It has not been tuned for other\nC PowerPC processors.\nC\nC Loop Analysis for the 604:\nC 12 mem insn\nC 8 serializing insn\nC 8 int multiply\nC 25 int reg write\nC 9 int ops (8 of which serialize)\nC\nC The multiply insns need 16 cycles/4limb.\nC The integer register writes will need 13 cycles/4limb.\nC All-in-all, it should be possible to get to 4 or 5 cycles/limb on PPC604,\nC but that will require some clever FPNOPS and BNOPS for exact\nC issue control.\n\n\nASM_START()\nPROLOGUE(mpn_addmul_1)\n\tcmpwi\tcr0,r5,9\tC more than 9 limbs?\n\tbgt\tcr0,L(big)\tC branch if more than 9 limbs\n\n\tmtctr\tr5\n\tlwz\tr0,0(r4)\n\tmullw\tr7,r0,r6\n\tmulhwu\tr10,r0,r6\n\tlwz\tr9,0(r3)\n\taddc\tr8,r7,r9\n\taddi\tr3,r3,-4\n\tbdz\tL(end)\nL(loop):\n\tlwzu\tr0,4(r4)\n\tstwu\tr8,4(r3)\n\tmullw\tr8,r0,r6\n\tadde\tr7,r8,r10\n\tmulhwu\tr10,r0,r6\n\tlwz\tr9,4(r3)\n\taddze\tr10,r10\n\taddc\tr8,r7,r9\n\tbdnz\tL(loop)\nL(end):\tstw\tr8,4(r3)\n\taddze\tr3,r10\n\tblr\n\nL(big):\tstmw\tr30,-32(r1)\n\taddi\tr5,r5,-1\n\tsrwi\tr0,r5,2\n\tmtctr\tr0\n\n\tlwz\tr7,0(r4)\n\tmullw\tr8,r7,r6\n\tmulhwu\tr0,r7,r6\n\tlwz\tr7,0(r3)\n\taddc\tr8,r8,r7\n\tstw\tr8,0(r3)\n\nL(loopU):\n\tlwz\tr7,4(r4)\n\tlwz\tr12,8(r4)\n\tlwz\tr30,12(r4)\n\tlwzu\tr31,16(r4)\n\tmullw\tr8,r7,r6\n\tmullw\tr9,r12,r6\n\tmullw\tr10,r30,r6\n\tmullw\tr11,r31,r6\n\tadde\tr8,r8,r0\tC add cy_limb\n\tmulhwu\tr0,r7,r6\n\tlwz\tr7,4(r3)\n\tadde\tr9,r9,r0\n\tmulhwu\tr0,r12,r6\n\tlwz\tr12,8(r3)\n\tadde\tr10,r10,r0\n\tmulhwu\tr0,r30,r6\n\tlwz\tr30,12(r3)\n\tadde\tr11,r11,r0\n\tmulhwu\tr0,r31,r6\n\tlwz\tr31,16(r3)\n\taddze\tr0,r0\t\tC new cy_limb\n\taddc\tr8,r8,r7\n\tstw\tr8,4(r3)\n\tadde\tr9,r9,r12\n\tstw\tr9,8(r3)\n\tadde\tr10,r10,r30\n\tstw\tr10,12(r3)\n\tadde\tr11,r11,r31\n\tstwu\tr11,16(r3)\n\tbdnz\tL(loopU)\n\n\tandi.\tr31,r5,3\n\tmtctr\tr31\n\tbeq\tcr0,L(endx)\n\nL(loopE):\n\tlwzu\tr7,4(r4)\n\tmullw\tr8,r7,r6\n\tadde\tr8,r8,r0\tC add cy_limb\n\tmulhwu\tr0,r7,r6\n\tlwz\tr7,4(r3)\n\taddze\tr0,r0\t\tC new cy_limb\n\taddc\tr8,r8,r7\n\tstwu\tr8,4(r3)\n\tbdnz\tL(loopE)\nL(endx):\n\taddze\tr3,r0\n\tlmw\tr30,-32(r1)\n\tblr\nEPILOGUE(mpn_addmul_1)\n"
  },
  {
    "path": "mpn/powerpc32/aix.m4",
    "content": "divert(-1)\ndnl  m4 macros for AIX 32-bit assembly.\n\ndnl  Copyright 2000, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ndefine(`ASM_START',\n`\t.toc')\n\ndnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)\ndnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)\ndnl\ndnl  Don't want ELF style .size in the epilogue.\n\ndefine(`PROLOGUE_cpu',\nm4_assert_numargs(1)\n\t`\n\t.globl\t$1\n\t.globl\t.$1\n\t.csect\t[DS], 2\n$1:\n\t.long\t.$1, TOC[tc0], 0\n\t.csect\t[PR]\n\t.align\t2\n.$1:')\n\ndefine(`EPILOGUE_cpu',\nm4_assert_numargs(1)\n`')\n\ndefine(`TOC_ENTRY', `')\n\ndefine(`LEA',\nm4_assert_numargs(2)\n`define(`TOC_ENTRY',\n`\t.toc\ntc$2:\n\t.tc\t$2[TC], $2')'\n`\tlwz\t$1, tc$2(2)')\n\ndefine(`EXTERN',\nm4_assert_numargs(1)\n`\t.globl\t$1')\n\ndefine(`DEF_OBJECT',\nm4_assert_numargs_range(1,2)\n`\t.csect\t[RO], 3\n\tALIGN(ifelse($#,1,2,$2))\n$1:\n')\n\ndefine(`END_OBJECT',\nm4_assert_numargs(1))\n\ndefine(`ASM_END', `TOC_ENTRY')\n\ndivert\n"
  },
  {
    "path": "mpn/powerpc32/darwin.m4",
    "content": "divert(-1)\ndnl  m4 macros for Mac OS 32-bit assembly.\n\ndnl  Copyright 2005, 2006 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ndefine(`ASM_START',`')\n\ndnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)\ndnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)\ndnl\n\ndefine(`PROLOGUE_cpu',\nm4_assert_numargs(1)\n`\t.text\n\t.globl\t$1\n\t.align\t3\n$1:')\n\ndefine(`EPILOGUE_cpu',\nm4_assert_numargs(1))\n\n\ndefine(`LEA',\nm4_assert_numargs(2)\n`ifdef(`PIC',\n`\tmflr\tr0\t\t\tC save return address\n\tbcl\t20, 31, 1f\n1:\tmflr\t$1\n\taddis\t$1, $1, ha16($2-1b)\n\tla\t$1, lo16($2-1b)($1)\n\tmtlr\tr0\t\t\tC restore return address\n',`\n\tlis\t$1, ha16($2)\n\tla\t$1, lo16($2)($1)\n')')\n\ndefine(`EXTERN',\nm4_assert_numargs(1)\n`dnl')\n\ndefine(`DEF_OBJECT',\nm4_assert_numargs_range(1,2)\n`\t.const\n\tALIGN(ifelse($#,1,2,$2))\n$1:\n')\n\ndefine(`END_OBJECT',\nm4_assert_numargs(1))\n\ndefine(`ASM_END', `dnl')\n\nifdef(`PIC',`\ndefine(`PIC_SLOW')')\n\ndivert\n"
  },
  {
    "path": "mpn/powerpc32/divexact_by3c.asm",
    "content": "dnl  PowerPC-32 mpn_divexact_by3 -- mpn by 3 exact division\n\ndnl  Copyright 2002, 2003, 2005, 2006 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                cycles/limb\nC 603e:              ?\nC 604e:              ?\nC 75x (G3):          ?\nC 7400,7410 (G4):    8\nC 744x,745x (G4+):   6\nC power4/ppc970:    12\nC power5:            ?\n\nC void mpn_divexact_by3 (mp_ptr dst, mp_srcptr src, mp_size_t size);\nC\nC We avoid the slow subfe instruction and instead rely on an extremely unlikely\nC branch.\nC\nC The mullw has the inverse in the first operand, since 0xAA..AB won't allow\nC any early-out.  The src[] data normally won't either, but there's at least\nC a chance, whereas 0xAA..AB never will.  If, for instance, src[] is all\nC zeros (not a sensible input of course) we run at 7.0 c/l on ppc750.\nC\nC The mulhwu has the \"3\" multiplier in the second operand, which lets 750 and\nC 7400 use an early-out.\n\nC INPUT PARAMETERS\ndefine(`rp', `r3')\ndefine(`up', `r4')\ndefine(`n',  `r5')\ndefine(`cy', `r6')\n\nASM_START()\nPROLOGUE(mpn_divexact_by3c)\n\tlwz\tr11, 0(up)\n\tmtctr\tn\n\tlis\tr12, 0xAAAA\n\tori\tr12, r12, 0xAAAB\n\tli\tr10, 3\n\n\tcmplw\tcr7, cy, r11\n\tsubf\tr11, cy, r11\n\n\tmullw\tr0, r11, r12\n\tstw\tr0, 0(rp)\n\tbdz\tL(one)\n\nL(top):\tlwzu\tr9, 4(up)\n\tmulhwu\tr7, r0, r10\n\tbgt-\tcr7, L(adj)\t\tC very unlikely branch\nL(bko):\tcmplw\tcr7, r7, r9\n\tsubf\tr0, r7, r9\n\tmullw\tr0, r12, r0\n\tstwu\tr0, 4(rp)\n\tbdnz\tL(top)\n\nL(one):\tmulhwu\tr3, r0, r10\n\tblelr+\tcr7\n\taddi\tr3, r3, 1\n\tblr\n\nL(adj):\taddi\tr7, r7, 1\n\tb\tL(bko)\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/powerpc32/eabi.m4",
    "content": "divert(-1)\ndnl  m4 macros for powerpc32 eABI assembly.\n\ndnl  Copyright 2003, 2005, 2006 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ndefine(`ASM_START',`')\n\ndnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)\ndnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)\ndnl\n\ndefine(`PROLOGUE_cpu',\nm4_assert_numargs(1)\n\t`\n\t.section\t\".text\"\n\t.align\t3\n\t.globl\t$1\n\t.type\t$1, @function\n$1:')\n\ndefine(`EPILOGUE_cpu',\nm4_assert_numargs(1)\n`\t.size\t$1, .-$1')\n\ndnl  This ought to support PIC, but it is unclear how that is done for eABI\ndefine(`LEA',\nm4_assert_numargs(2)\n`\n\tlis\t$1, $2@ha\n\tla\t$1, $2@l($1)\n')\n\ndefine(`EXTERN',\nm4_assert_numargs(1)\n`dnl')\n\ndefine(`DEF_OBJECT',\nm4_assert_numargs_range(1,2)\n`\n\t.section\t.rodata\n\tALIGN(ifelse($#,1,2,$2))\n\t.type\t$1, @object\n$1:\n')\n\ndefine(`END_OBJECT',\nm4_assert_numargs(1)\n`\t.size\t$1, .-$1')\n\ndefine(`ASM_END', `dnl')\n\nifdef(`PIC',`\ndefine(`PIC_SLOW')')\n\ndnl  64-bit \"long long\" parameters are put in an even-odd pair, skipping an\ndnl  even register if that was in turn.  I wish somebody could explain why that\ndnl  is a good idea.\ndefine(`BROKEN_LONGLONG_PARAM')\n\ndivert\n"
  },
  {
    "path": "mpn/powerpc32/elf.m4",
    "content": "divert(-1)\ndnl  m4 macros for powerpc32 GNU/Linux assembly.\n\ndnl  Copyright 2003, 2005, 2006 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ndefine(`ASM_START',`')\n\ndnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)\ndnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)\ndnl\n\ndefine(`PROLOGUE_cpu',\nm4_assert_numargs(1)\n\t`\n\t.section\t\".text\"\n\t.align\t3\n\t.globl\t$1\n\t.type\t$1, @function\n$1:')\n\ndefine(`EPILOGUE_cpu',\nm4_assert_numargs(1)\n`\t.size\t$1, .-$1')\n\ndefine(`LEA',\nm4_assert_numargs(2)\n`ifdef(`PIC',`\n\tmflr\tr0\n\tbl\t_GLOBAL_OFFSET_TABLE_@local-4\n\tmflr\t$1\n\tmtlr\tr0\n\tlwz\t$1, $2@got($1)\n',`\n\tlis\t$1, $2@ha\n\tla\t$1, $2@l($1)\n')')\n\ndefine(`EXTERN',\nm4_assert_numargs(1)\n`dnl')\n\ndefine(`DEF_OBJECT',\nm4_assert_numargs_range(1,2)\n`\n\t.section\t.rodata\n\tALIGN(ifelse($#,1,2,$2))\n\t.type\t$1, @object\n$1:\n')\n\ndefine(`END_OBJECT',\nm4_assert_numargs(1)\n`\t.size\t$1, .-$1')\n\ndefine(`ASM_END', `dnl')\n\nifdef(`PIC',`\ndefine(`PIC_SLOW')')\n\ndnl  64-bit \"long long\" parameters are put in an even-odd pair, skipping an\ndnl  even register if that was in turn.  I wish somebody could explain why that\ndnl  is a good idea.\ndefine(`BROKEN_LONGLONG_PARAM')\n\ndivert\n"
  },
  {
    "path": "mpn/powerpc32/longlong_inc.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 Free Software Foundation, Inc.\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#if defined (__GNUC__)\n\n#define add_ssaaaa(sh, sl, ah, al, bh, bl) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    if (__builtin_constant_p (bh) && (bh) == 0)\t\t\t\t\\\n      __asm__ (\"{a%I4|add%I4c} %1,%3,%4\\n\\t{aze|addze} %0,%2\"\t\t\\\n\t     : \"=r\" (sh), \"=&r\" (sl) : \"r\" (ah), \"%r\" (al), \"rI\" (bl));\\\n    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)\t\t\\\n      __asm__ (\"{a%I4|add%I4c} %1,%3,%4\\n\\t{ame|addme} %0,%2\"\t\t\\\n\t     : \"=r\" (sh), \"=&r\" (sl) : \"r\" (ah), \"%r\" (al), \"rI\" (bl));\\\n    else\t\t\t\t\t\t\t\t\\\n      __asm__ (\"{a%I5|add%I5c} %1,%4,%5\\n\\t{ae|adde} %0,%2,%3\"\t\t\\\n\t     : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t     : \"r\" (ah), \"r\" (bh), \"%r\" (al), \"rI\" (bl));\t\t\\\n  } while (0)\n#define sub_ddmmss(sh, sl, ah, al, bh, bl) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    if (__builtin_constant_p (ah) && (ah) == 0)\t\t\t\t\\\n      __asm__ (\"{sf%I3|subf%I3c} %1,%4,%3\\n\\t{sfze|subfze} %0,%2\"\t\\\n\t       : \"=r\" (sh), \"=&r\" (sl) : \"r\" (bh), \"rI\" (al), \"r\" (bl));\\\n    else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)\t\t\\\n      __asm__ (\"{sf%I3|subf%I3c} %1,%4,%3\\n\\t{sfme|subfme} %0,%2\"\t\\\n\t       : \"=r\" (sh), \"=&r\" (sl) : \"r\" (bh), \"rI\" (al), \"r\" (bl));\\\n    else if (__builtin_constant_p (bh) && (bh) == 0)\t\t\t\\\n      __asm__ (\"{sf%I3|subf%I3c} %1,%4,%3\\n\\t{ame|addme} %0,%2\"\t\t\\\n\t       : \"=r\" (sh), \"=&r\" (sl) : \"r\" (ah), \"rI\" (al), \"r\" (bl));\\\n    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)\t\t\\\n      __asm__ (\"{sf%I3|subf%I3c} %1,%4,%3\\n\\t{aze|addze} %0,%2\"\t\t\\\n\t       : \"=r\" (sh), \"=&r\" (sl) : \"r\" (ah), \"rI\" (al), \"r\" (bl));\\\n    else\t\t\t\t\t\t\t\t\\\n      __asm__ (\"{sf%I4|subf%I4c} %1,%5,%4\\n\\t{sfe|subfe} %0,%3,%2\"\t\\\n\t       : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t       : \"r\" (ah), \"r\" (bh), \"rI\" (al), \"r\" (bl));\t\t\\\n  } while (0)\n#define count_leading_zeros(count, x) \\\n  __asm__ (\"{cntlz|cntlzw} %0,%1\" : \"=r\" (count) : \"r\" (x))\n#define COUNT_LEADING_ZEROS_0 32\n\n#define umul_ppmm(ph, pl, m0, m1) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    USItype __m0 = (m0), __m1 = (m1);\t\t\t\t\t\\\n    __asm__ (\"mulhwu %0,%1,%2\" : \"=r\" (ph) : \"%r\" (m0), \"r\" (m1));\t\\\n    (pl) = __m0 * __m1;\t\t\t\t\t\t\t\\\n  } while (0)\n#define smul_ppmm(ph, pl, m0, m1) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    SItype __m0 = (m0), __m1 = (m1);\t\t\t\t\t\\\n    __asm__ (\"mulhw %0,%1,%2\" : \"=r\" (ph) : \"%r\" (m0), \"r\" (m1));\t\\\n    (pl) = __m0 * __m1;\t\t\t\t\t\t\t\\\n  } while (0)\n\n#endif\n\n/* 3 cycles on 604 or 750 since shifts and rlwimi's can pair.  gcc (as of\n   version 3.1 at least) doesn't seem to know how to generate rlwimi for\n   anything other than bit-fields, so use \"asm\".  */\n#if !defined(BSWAP_LIMB) && defined (__GNUC__)\n#define BSWAP_LIMB(dst, src)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_limb_t  __bswapl_src = (src);\t\t\t\t\t\\\n    mp_limb_t  __tmp1 = __bswapl_src >> 24;\t\t/* low byte */\t\\\n    mp_limb_t  __tmp2 = __bswapl_src << 24;\t\t/* high byte */\t\\\n    __asm__ (\"rlwimi %0, %2, 24, 16, 23\"\t\t/* 2nd low */\t\\\n\t : \"=r\" (__tmp1) : \"0\" (__tmp1), \"r\" (__bswapl_src));\t\t\\\n    __asm__ (\"rlwimi %0, %2,  8,  8, 15\"\t\t/* 3nd high */\t\\\n\t : \"=r\" (__tmp2) : \"0\" (__tmp2), \"r\" (__bswapl_src));\t\t\\\n    (dst) = __tmp1 | __tmp2;\t\t\t\t/* whole */\t\\\n  } while (0)\n#endif\n\n/* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to\n   those we know are fast.  */\n#if !defined(BSWAP_LIMB_FETCH) && defined (__GNUC__) && HAVE_LIMB_BIG_ENDIAN    \n#define BSWAP_LIMB_FETCH(limb, src)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_srcptr  __blf_src = (src);\t\t\t\t\t\\\n    mp_limb_t  __limb;\t\t\t\t\t\t\t\\\n    __asm__ (\"lwbrx %0, 0, %1\"\t\t\t\t\t\t\\\n\t     : \"=r\" (__limb)\t\t\t\t\t\t\\\n\t     : \"r\" (__blf_src),\t\t\t\t\t\t\\\n\t       \"m\" (*__blf_src));\t\t\t\t\t\\\n    (limb) = __limb;\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n\n/* On the same basis that lwbrx might be slow, restrict stwbrx to those we\n   know are fast.  FIXME: Is this necessary?  */\n#if !defined(BSWAP_LIMB_STORE) && defined (__GNUC__) && HAVE_LIMB_BIG_ENDIAN\n#define BSWAP_LIMB_STORE(dst, limb)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr     __dst = (dst);\t\t\t\t\t\t\\\n    mp_limb_t  __limb = (limb);\t\t\t\t\t\t\\\n    __asm__ (\"stwbrx %1, 0, %2\"\t\t\t\t\t\t\\\n\t     : \"=m\" (*__dst)\t\t\t\t\t\t\\\n\t     : \"r\" (__limb),\t\t\t\t\t\t\\\n\t       \"r\" (__dst));\t\t\t\t\t\t\\\n  } while (0)\n#endif\n"
  },
  {
    "path": "mpn/powerpc32/lshift.asm",
    "content": "dnl  PowerPC-32 mpn_lshift -- Shift a number left.\n\ndnl  Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005 Free Software\ndnl  Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                cycles/limb\nC 603e:            ?\nC 604e:            3.0\nC 75x (G3):        3.0\nC 7400,7410 (G4):  3.0\nC 7445,7455 (G4+): 2.5\nC 7447,7457 (G4+): 2.25\nC power4/ppc970:   2.5\nC power5:          2.5\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC n\tr5\nC cnt\tr6\n\nASM_START()\nPROLOGUE(mpn_lshift)\n\tcmpwi\tcr0, r5, 12\tC more than 12 limbs?\n\tslwi\tr0, r5, 2\n\tadd\tr4, r4, r0\tC make r4 point at end of s1\n\tadd\tr7, r3, r0\tC make r7 point at end of res\n\tbgt\tL(BIG)\t\tC branch if more than 12 limbs\n\n\tmtctr\tr5\t\tC copy size into CTR\n\tsubfic\tr8, r6, 32\n\tlwzu\tr11, -4(r4)\tC load first s1 limb\n\tsrw\tr3, r11, r8\tC compute function return value\n\tbdz\tL(end1)\n\nL(oop):\tlwzu\tr10, -4(r4)\n\tslw\tr9, r11, r6\n\tsrw\tr12, r10, r8\n\tor\tr9, r9, r12\n\tstwu\tr9, -4(r7)\n\tbdz\tL(end2)\n\tlwzu\tr11, -4(r4)\n\tslw\tr9, r10, r6\n\tsrw\tr12, r11, r8\n\tor\tr9, r9, r12\n\tstwu\tr9, -4(r7)\n\tbdnz\tL(oop)\n\nL(end1):\n\tslw\tr0, r11, r6\n\tstw\tr0, -4(r7)\n\tblr\nL(end2):\n\tslw\tr0, r10, r6\n\tstw\tr0, -4(r7)\n\tblr\n\nL(BIG):\n\tstmw\tr24, -32(r1)\tC save registers we are supposed to preserve\n\tlwzu\tr9, -4(r4)\n\tsubfic\tr8, r6, 32\n\tsrw\tr3, r9, r8\tC compute function return value\n\tslw\tr0, r9, r6\n\taddi\tr5, r5, -1\n\n\tandi.\tr10, r5, 3\tC count for spill loop\n\tbeq\tL(e)\n\tmtctr\tr10\n\tlwzu\tr28, -4(r4)\n\tbdz\tL(xe0)\n\nL(loop0):\n\tslw\tr12, r28, r6\n\tsrw\tr24, r28, r8\n\tlwzu\tr28, -4(r4)\n\tor\tr24, r0, r24\n\tstwu\tr24, -4(r7)\n\tmr\tr0, r12\n\tbdnz\tL(loop0)\tC taken at most once!\n\nL(xe0):\tslw\tr12, r28, r6\n\tsrw\tr24, r28, r8\n\tor\tr24, r0, r24\n\tstwu\tr24, -4(r7)\n\tmr\tr0, r12\n\nL(e):\tsrwi\tr5, r5, 2\tC count for unrolled loop\n\taddi\tr5, r5, -1\n\tmtctr\tr5\n\tlwz\tr28, -4(r4)\n\tlwz\tr29, -8(r4)\n\tlwz\tr30, -12(r4)\n\tlwzu\tr31, -16(r4)\n\nL(loopU):\n\tslw\tr9, r28, r6\n\tsrw\tr24, r28, r8\n\tlwz\tr28, -4(r4)\n\tslw\tr10, r29, r6\n\tsrw\tr25, r29, r8\n\tlwz\tr29, -8(r4)\n\tslw\tr11, r30, r6\n\tsrw\tr26, r30, r8\n\tlwz\tr30, -12(r4)\n\tslw\tr12, r31, r6\n\tsrw\tr27, r31, r8\n\tlwzu\tr31, -16(r4)\n\tor\tr24, r0, r24\n\tstw\tr24, -4(r7)\n\tor\tr25, r9, r25\n\tstw\tr25, -8(r7)\n\tor\tr26, r10, r26\n\tstw\tr26, -12(r7)\n\tor\tr27, r11, r27\n\tstwu\tr27, -16(r7)\n\tmr\tr0, r12\n\tbdnz\tL(loopU)\n\n\tslw\tr9, r28, r6\n\tsrw\tr24, r28, r8\n\tslw\tr10, r29, r6\n\tsrw\tr25, r29, r8\n\tslw\tr11, r30, r6\n\tsrw\tr26, r30, r8\n\tslw\tr12, r31, r6\n\tsrw\tr27, r31, r8\n\tor\tr24, r0, r24\n\tstw\tr24, -4(r7)\n\tor\tr25, r9, r25\n\tstw\tr25, -8(r7)\n\tor\tr26, r10, r26\n\tstw\tr26, -12(r7)\n\tor\tr27, r11, r27\n\tstw\tr27, -16(r7)\n\n\tstw\tr12, -20(r7)\n\tlmw\tr24, -32(r1)\tC restore registers\n\tblr\nEPILOGUE(mpn_lshift)\n"
  },
  {
    "path": "mpn/powerpc32/mod_34lsub1.asm",
    "content": "dnl  PowerPC-32 mpn_mod_34lsub1 -- mpn remainder mod 2^24-1.\n\ndnl  Copyright 2002, 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC                cycles/limb\nC 603e:            ?\nC 604e:            ?\nC 75x (G3):        3.0\nC 7400,7410 (G4):  3.0\nC 744x,745x (G4+): 3.0\nC power4/ppc970:   2.5\nC power5:          2.5\n\nC mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)\nC\nC There seems no need to schedule the loads back, the code is still 3.0 c/l\nC on 750/7400 no matter where they're placed.\nC\nC Alternatives:\nC\nC Fetching half words would allow add instead for accumulating, instead of\nC adde and its serialization.  An outer loop would be required though, since\nC 2^16 halfwords can overflow.  lhz+add would be 2.0 c/l, but if there's\nC also a bdz or bdnz for each and a pointer update say every three limbs\nC then the total would be 2.67 c/l which isn't much faster than the current\nC simpler code.\n\nASM_START()\nPROLOGUE(mpn_mod_34lsub1)\n\n\tC r3\tsrc\n\tC r4\tsize\n\n\tmtctr\tr4\n\taddic\tr6, r3, 8\t\tC &src[2], and clear CA\n\n\tlwz\tr3, 0(r3)\t\tC acc0 = src[0]\n\tbdz\tL(done)\n\n\tlwz\tr4, -4(r6)\t\tC acc1 = src[1]\n\tbdz\tL(two)\n\n\tlwz\tr5, 0(r6)\t\tC acc2 = src[2]\n\tlis\tr7, 0\t\t\tC no carry if just three limbs\n\n\tbdz\tL(three)\n\tlis\tr7, 1\t\t\tC 0x10000 carry pos\n\nL(top):\n\tC r3\tacc0\n\tC r4\tacc1\n\tC r5\tacc2\n\tC r6\tsrc, incrementing\n\tC r7\tcarry pos\n\n\tlwz\tr0, 4(r6)\n\tadde\tr3, r3, r0\n\tbdz\tL(end0)\n\n\tlwz\tr0, 8(r6)\n\tadde\tr4, r4, r0\n\tbdz\tL(end1)\n\n\tlwzu\tr0, 12(r6)\n\tadde\tr5, r5, r0\n\tbdnz\tL(top)\n\n\n\tsrwi\tr7, r7, 8\nL(end0):\n\tsrwi\tr7, r7, 8\nL(end1):\n\tsubfe\tr0, r0, r0\t\tC -1 if not CA\n\n\tandc\tr7, r7, r0\t\tC final carry, 0x10000, 0x100, 1 or 0\nL(three):\n\trlwinm\tr6, r3, 0,8,31\t\tC acc0 low\n\n\tadd\tr7, r7, r6\n\trlwinm\tr6, r3, 8,24,31\t\tC acc0 high\n\n\tadd\tr7, r7, r6\n\trlwinm\tr6, r4, 8,8,23\t\tC acc1 low\n\n\tadd\tr7, r7, r6\n\trlwinm\tr6, r4, 16,16,31\tC acc1 high\n\n\tadd\tr7, r7, r6\n\trlwinm\tr6, r5, 16,8,15\t\tC acc2 low\n\n\tadd\tr7, r7, r6\n\trlwinm\tr6, r5, 24,8,31\t\tC acc2 high\n\n\tadd\tr3, r7, r6\n\nL(done):\n\tblr\n\nL(two):\n\tC r3\tacc0\n\tC r4\tacc1\n\n\trlwinm\tr5, r3, 8,24,31\t\tC acc0 high\n\trlwinm\tr3, r3, 0,8,31\t\tC acc0 low\n\n\tadd\tr3, r3, r5\t\tC acc0 high + low\n\trlwinm\tr5, r4, 16,16,31\tC acc1 high\n\n\tadd\tr3, r3, r5\t\tC add acc1 high\n\trlwinm\tr5, r4, 8,8,23\t\tC acc1 low\n\n\tadd\tr3, r3, r5\t\tC add acc1 low\n\n\tblr\n\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc32/modexact_1c_odd.asm",
    "content": "dnl  PowerPC-32 mpn_modexact_1_odd -- mpn by limb exact remainder.\n\ndnl  Copyright 2002, 2003, 2005, 2006 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC                cycles/limb\nC 603e:             ?\nC 604e:             6.0\nC 75x (G3):         6.0-13.0, depending on divisor\nC 7400,7410 (G4):   6.0-13.0, depending on divisor\nC 744x,745x (G4+):  8.0-10.0, depending on divisor\nC power4/ppc970:   12.0\nC power5:          12.0\n\n\nC mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,\nC                               mp_limb_t divisor);\nC mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,\nC                                mp_limb_t divisor, mp_limb_t carry);\nC\nC For PIC, the inverse is established arithmetically since it measures about\nC 5 cycles faster than the nonsense needed to access modlimb_invert_table in\nC SVR4 or Darwin style PIC.  AIX might be better, since it avoids bl/mflr to\nC get at the GOT/TOC/whatever.\nC\nC Using divwu for size==1 measured about 10 cycles slower on 604e, or about\nC 3-5 cycles faster on 750.  For now it doesn't seem worth bothering with.\nC\nC The loop allows an early-out on mullw for the inverse, and on mulhwu for\nC the divisor.  So the fastest is for instance divisor==1 (inverse==-1), and\nC the slowest is anything giving a full 32-bits in both, such as\nC divisor==0xDEADBEEF (inverse==0x904B300F).  These establish the stated\nC range above for 750 and 7400.\n\n\nASM_START()\n\nEXTERN(modlimb_invert_table)\n\nPROLOGUE(mpn_modexact_1_odd)\n\tli\tr6, 0\n\nPROLOGUE(mpn_modexact_1c_odd)\n\n\tmtctr\tr4\t\t\tC size\n\nifdef(`PIC_SLOW',`\nC Load from our table with PIC is so slow on Linux and Darwin that we avoid it\n\trlwinm\tr7, r5, 1,28,28\t\tC (divisor << 1) & 8\n\trlwinm\tr8, r5, 2,28,28\t\tC (divisor << 2) & 8\n\txor\tr7, r7, r8\t\tC ((divisor << 1) ^ (divisor << 2)) & 8\n\trlwinm\tr4, r5, 0,28,31\t\tC divisor low 4 bits, speedup mullw\n\txor\tr4, r4, r7\t\tC inverse, 4 bits\n\tmullw\tr7, r4, r4\t\tC i*i\n\tslwi\tr4, r4, 1\t\tC 2*i\n\trlwinm\tr8, r5, 0,24,31\t\tC divisor low 8 bits, speedup mullw\n\tmullw\tr7, r7, r8\t\tC i*i*d\n\tsub\tr4, r4, r7\t\tC inverse, 8 bits\n',`\n\tLEA(\tr7, modlimb_invert_table)\n\trlwinm\tr4, r5, 31,25,31\tC (divisor/2) & 0x7F\n\tlbzx\tr4, r4,r7\t\tC inverse, 8 bits\n')\n\n\tmullw\tr7, r4, r4\t\tC i*i\n\tslwi\tr4, r4, 1\t\tC 2*i\n\tmullw\tr7, r5, r7\t\tC i*i*d   [i*i is 16 bits, so second operand]\n\tsub\tr4, r4, r7\t\tC inverse, 16 bits\n\tmullw\tr7, r4, r4\t\tC i*i\n\tslwi\tr4, r4, 1\t\tC 2*i\n\tmullw\tr7, r7, r5\t\tC i*i*d\n\tlwz\tr0, 0(r3)\t\tC src[0]\n\tsub\tr4, r4, r7\t\tC inverse, 32 bits\n\tsubfc\tr7, r6, r0\t\tC l = src[0] - carry\n\n\tmullw\tr7, r7, r4\t\tC q = l * inverse\n\tbdz\tL(one)\n\n\tlwzu\tr0, 4(r3)\t\tC src[1]\n\tmulhwu\tr6, r7, r5\t\tC carry = high(q*divisor)\n\tsubfe\tr7, r6, r0\t\tC l = src[1] - carry\n\tbdz\tL(two)\n\nL(top):\n\tmullw\tr7, r7, r4\t\tC q = l * inverse\n\tlwzu\tr0, 4(r3)\t\tC src[i]\n\tmulhwu\tr6, r7, r5\t\tC carry = high(q*divisor)\n\tsubfe\tr7, r6, r0\t\tC l = src[i] - carry\n\tbdnz\tL(top)\n\nL(two):\tmullw\tr7, r7, r4\t\tC q = l * inverse\nL(one):\tsubfe\tr3, r3, r3\t\tC ca 0 or -1\n\tmulhwu\tr6, r7, r5\t\tC carry = high(q*divisor)\n\tsubf\tr3, r3, r6\t\tC carry + ca\n\tblr\n\nEPILOGUE(mpn_modexact_1c_odd)\nEPILOGUE(mpn_modexact_1_odd)\nASM_END()\n"
  },
  {
    "path": "mpn/powerpc32/mul_1.asm",
    "content": "dnl  PowerPC-32 mpn_mul_1 -- Multiply a limb vector with a limb and store the\ndnl  result in a second limb vector.\n\ndnl  Copyright 1995, 1997, 2000, 2002, 2003, 2005 Free Software Foundation,\ndnl  Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                cycles/limb\nC 603e:            ?\nC 604e:            4.0\nC 75x (G3):        4.5-11\nC 7400,7410 (G4):  4.5-11\nC 744x,745x (G4+): 6.0\nC power4/ppc970:   6.0\nC power5:          5.63\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC n\tr5\nC vl\tr6\n\nASM_START()\nPROLOGUE(mpn_mul_1)\n\tmtctr\tr5\n\taddi\tr3,r3,-4\tC adjust res_ptr, it's offset before it's used\n\tli\tr12,0\t\tC clear upper product reg\n\taddic\tr0,r0,0\t\tC clear cy\nC Start software pipeline\n\tlwz\tr8,0(r4)\n\tbdz\tL(end3)\n\tlwzu\tr9,4(r4)\n\tmullw\tr11,r8,r6\n\tmulhwu\tr0,r8,r6\n\tbdz\tL(end1)\nC Software pipelined main loop\nL(loop):\n\tlwz\tr8,4(r4)\n\tmullw\tr10,r9,r6\n\tadde\tr5,r11,r12\n\tmulhwu\tr12,r9,r6\n\tstw\tr5,4(r3)\n\tbdz\tL(end2)\n\tlwzu\tr9,8(r4)\n\tmullw\tr11,r8,r6\n\tadde\tr7,r10,r0\n\tmulhwu\tr0,r8,r6\n\tstwu\tr7,8(r3)\n\tbdnz\tL(loop)\nC Finish software pipeline\nL(end1):\n\tmullw\tr10,r9,r6\n\tadde\tr5,r11,r12\n\tmulhwu\tr12,r9,r6\n\tstw\tr5,4(r3)\n\tadde\tr7,r10,r0\n\tstwu\tr7,8(r3)\n\taddze\tr3,r12\n\tblr\nL(end2):\n\tmullw\tr11,r8,r6\n\tadde\tr7,r10,r0\n\tmulhwu\tr0,r8,r6\n\tstwu\tr7,8(r3)\n\tadde\tr5,r11,r12\n\tstw\tr5,4(r3)\n\taddze\tr3,r0\n\tblr\nL(end3):\n\tmullw\tr11,r8,r6\n\tstw\tr11,4(r3)\n\tmulhwu\tr3,r8,r6\n\tblr\nEPILOGUE(mpn_mul_1)\n"
  },
  {
    "path": "mpn/powerpc32/powerpc-defs.m4",
    "content": "divert(-1)\n\ndnl  m4 macros for PowerPC assembler (32 and 64 bit).\n\ndnl  Copyright 2000, 2002, 2003 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\n\ndnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)\ndnl\ndnl  This is the same as the default in mpn/asm-defs.m4, but with ALIGN(4)\ndnl  not 8.\ndnl\ndnl  4-byte alignment is normally enough, certainly it's what gcc gives.  We\ndnl  don't want bigger alignment within PROLOGUE since it can introduce\ndnl  padding into multiple-entrypoint routines, and with gas such padding is\ndnl  zero words, which are not valid instructions.\n\ndefine(`PROLOGUE_cpu',\nm4_assert_numargs(1)\n`\tTEXT\n\tALIGN(4)\n\tGLOBL\t`$1' GLOBL_ATTR\n\tTYPE(`$1',`function')\n`$1'LABEL_SUFFIX')\n\n\ndnl  Usage: r0 ... r31, cr0 ... cr7\ndnl\ndnl  Registers names, either left as \"r0\" etc or mapped to plain 0 etc,\ndnl  according to the result of the GMP_ASM_POWERPC_REGISTERS configure\ndnl  test.\n\nifelse(WANT_R_REGISTERS,no,`\nforloop(i,0,31,`deflit(`r'i,i)')\nforloop(i,0,31,`deflit(`v'i,i)')\nforloop(i,0,31,`deflit(`f'i,i)')\nforloop(i,0,7, `deflit(`cr'i,i)')\n')\n\n\ndnl  Usage: ASSERT(cond,instructions)\ndnl\ndnl  If WANT_ASSERT is 1, output the given instructions and expect the given\ndnl  flags condition to then be satisfied.  For example,\ndnl\ndnl         ASSERT(eq, `cmpwi r6, 123')\ndnl\ndnl  The instructions can be omitted to just assert a flags condition with\ndnl  no extra calculation.  For example,\ndnl\ndnl         ASSERT(ne)\ndnl\ndnl  The condition can be omitted to just output the given instructions when\ndnl  assertion checking is wanted.  For example,\ndnl\ndnl         ASSERT(, `mr r11, r0')\ndnl\ndnl  Using a zero word for an illegal instruction is probably not ideal,\ndnl  since it marks the beginning of a traceback table in the 64-bit ABI.\ndnl  But assertions are only for development, so it doesn't matter too much.\n\ndefine(ASSERT,\nm4_assert_numargs_range(1,2)\nm4_assert_defined(`WANT_ASSERT')\n`ifelse(WANT_ASSERT,1,\n\t`C ASSERT\n\t$2\nifelse(`$1',,,\n`\tb$1\tL(ASSERT_ok`'ASSERT_counter)\n\tW32\t0\tC assertion failed\nL(ASSERT_ok`'ASSERT_counter):\ndefine(`ASSERT_counter',incr(ASSERT_counter))\n')')')\n\ndefine(ASSERT_counter,1)\n\n\ndivert\n"
  },
  {
    "path": "mpn/powerpc32/rshift.asm",
    "content": "dnl  PowerPC-32 mpn_rshift -- Shift a number right.\n\ndnl  Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005 Free Software\ndnl  Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                cycles/limb\nC 603e:            ?\nC 604e:            3.0\nC 75x (G3):        3.0\nC 7400,7410 (G4):  3.0\nC 7445,7455 (G4+): 2.5\nC 7447,7457 (G4+): 2.25\nC power4/ppc970:   2.5\nC power5:          2.5\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC n\tr5\nC cnt\tr6\n\nASM_START()\nPROLOGUE(mpn_rshift)\n\tcmpwi\tcr0, r5, 12\tC more than 12 limbs?\n\taddi\tr7, r3, -4\tC dst-4\n\tbgt\tL(BIG)\t\tC branch if more than 12 limbs\n\n\tmtctr\tr5\t\tC copy size into CTR\n\tsubfic\tr8, r6, 32\n\tlwz\tr11, 0(r4)\tC load first s1 limb\n\tslw\tr3, r11, r8\tC compute function return value\n\tbdz\tL(end1)\n\nL(oop):\tlwzu\tr10, 4(r4)\n\tsrw\tr9, r11, r6\n\tslw\tr12, r10, r8\n\tor\tr9, r9, r12\n\tstwu\tr9, 4(r7)\n\tbdz\tL(end2)\n\tlwzu\tr11, 4(r4)\n\tsrw\tr9, r10, r6\n\tslw\tr12, r11, r8\n\tor\tr9, r9, r12\n\tstwu\tr9, 4(r7)\n\tbdnz\tL(oop)\n\nL(end1):\n\tsrw\tr0, r11, r6\n\tstw\tr0, 4(r7)\n\tblr\nL(end2):\n\tsrw\tr0, r10, r6\n\tstw\tr0, 4(r7)\n\tblr\n\nL(BIG):\n\tstmw\tr24, -32(r1)\tC save registers we are supposed to preserve\n\tlwz\tr9, 0(r4)\n\tsubfic\tr8, r6, 32\n\tslw\tr3, r9, r8\tC compute function return value\n\tsrw\tr0, r9, r6\n\taddi\tr5, r5, -1\n\n\tandi.\tr10, r5, 3\tC count for spill loop\n\tbeq\tL(e)\n\tmtctr\tr10\n\tlwzu\tr28, 4(r4)\n\tbdz\tL(xe0)\n\nL(loop0):\n\tsrw\tr12, r28, r6\n\tslw\tr24, r28, r8\n\tlwzu\tr28, 4(r4)\n\tor\tr24, r0, r24\n\tstwu\tr24, 4(r7)\n\tmr\tr0, r12\n\tbdnz\tL(loop0)\tC taken at most once!\n\nL(xe0):\tsrw\tr12, r28, r6\n\tslw\tr24, r28, r8\n\tor\tr24, r0, r24\n\tstwu\tr24, 4(r7)\n\tmr\tr0, r12\n\nL(e):\tsrwi\tr5, r5, 2\tC count for unrolled loop\n\taddi\tr5, r5, -1\n\tmtctr\tr5\n\tlwz\tr28, 4(r4)\n\tlwz\tr29, 8(r4)\n\tlwz\tr30, 12(r4)\n\tlwzu\tr31, 16(r4)\n\nL(loopU):\n\tsrw\tr9, r28, r6\n\tslw\tr24, r28, r8\n\tlwz\tr28, 4(r4)\n\tsrw\tr10, r29, r6\n\tslw\tr25, r29, r8\n\tlwz\tr29, 8(r4)\n\tsrw\tr11, r30, r6\n\tslw\tr26, r30, r8\n\tlwz\tr30, 12(r4)\n\tsrw\tr12, r31, r6\n\tslw\tr27, r31, r8\n\tlwzu\tr31, 16(r4)\n\tor\tr24, r0, r24\n\tstw\tr24, 4(r7)\n\tor\tr25, r9, r25\n\tstw\tr25, 8(r7)\n\tor\tr26, r10, r26\n\tstw\tr26, 12(r7)\n\tor\tr27, r11, r27\n\tstwu\tr27, 16(r7)\n\tmr\tr0, r12\n\tbdnz\tL(loopU)\n\n\tsrw\tr9, r28, r6\n\tslw\tr24, r28, r8\n\tsrw\tr10, r29, r6\n\tslw\tr25, r29, r8\n\tsrw\tr11, r30, r6\n\tslw\tr26, r30, r8\n\tsrw\tr12, r31, r6\n\tslw\tr27, r31, r8\n\tor\tr24, r0, r24\n\tstw\tr24, 4(r7)\n\tor\tr25, r9, r25\n\tstw\tr25, 8(r7)\n\tor\tr26, r10, r26\n\tstw\tr26, 12(r7)\n\tor\tr27, r11, r27\n\tstw\tr27, 16(r7)\n\n\tstw\tr12, 20(r7)\n\tlmw\tr24, -32(r1)\tC restore registers\n\tblr\nEPILOGUE(mpn_rshift)\n"
  },
  {
    "path": "mpn/powerpc32/sqr_diagonal.asm",
    "content": "dnl  PowerPC-32 mpn_sqr_diagonal.\n\ndnl  Copyright 2001, 2002, 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                cycles/limb\nC 603e:             ?\nC 604e:             4.0\nC 75x (G3):        10.5\nC 7400,7410 (G4):  10.5\nC 744x,745x (G4+):  4.0\nC power4/ppc970:    8.6\nC power5:           7.0\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC n\tr5\n\nASM_START()\nPROLOGUE(mpn_sqr_diagonal)\n\tlwz\tr6,0(r4)\n\tmtctr\tr5\n\n\taddi\tr3,r3,-4\n\tbdz\tL(end1)\n\n\tlwzu\tr7,4(r4)\n\tmullw\tr9,r6,r6\n\tmulhwu\tr11,r6,r6\n\tbdz\tL(end2)\n\n\tlwzu\tr6,4(r4)\n\tmullw\tr8,r7,r7\n\tmulhwu\tr10,r7,r7\n\tbdz\tL(ende)\n\nL(loop):\n\tlwzu\tr7,4(r4)\n\tstw\tr9,4(r3)\n\tmullw\tr9,r6,r6\n\tstwu\tr11,8(r3)\n\tmulhwu\tr11,r6,r6\n\tbdz\tL(endo)\n\tlwzu\tr6,4(r4)\n\tstw\tr8,4(r3)\n\tmullw\tr8,r7,r7\n\tstwu\tr10,8(r3)\n\tmulhwu\tr10,r7,r7\n\tbdnz\tL(loop)\n\nL(ende):\n\tstw\tr9,4(r3)\n\tmullw\tr9,r6,r6\n\tstw\tr11,8(r3)\n\tmulhwu\tr11,r6,r6\n\tstw\tr8,12(r3)\n\tstw\tr10,16(r3)\n\tstw\tr9,20(r3)\n\tstw\tr11,24(r3)\n\tblr\nL(endo):\n\tstw\tr8,4(r3)\n\tmullw\tr8,r7,r7\n\tstw\tr10,8(r3)\n\tmulhwu\tr10,r7,r7\n\tstw\tr9,12(r3)\n\tstw\tr11,16(r3)\n\tstw\tr8,20(r3)\n\tstw\tr10,24(r3)\n\tblr\n\nL(end2):\n\tmullw\tr8,r7,r7\n\tstw\tr9,4(r3)\n\tmulhwu\tr10,r7,r7\n\tstw\tr11,8(r3)\n\tstw\tr8,12(r3)\n\tstw\tr10,16(r3)\n\tblr\nL(end1):\n\tmullw\tr9,r6,r6\n\tmulhwu\tr11,r6,r6\n\tstw\tr9,4(r3)\n\tstw\tr11,8(r3)\n\tblr\nEPILOGUE(mpn_sqr_diagonal)\n"
  },
  {
    "path": "mpn/powerpc32/sub_n.asm",
    "content": "dnl  PowerPC-32 mpn_sub_n -- subtract limb vectors.\n\ndnl  Copyright 2002, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                cycles/limb\nC 603e:            ?\nC 604e:            3.25\nC 75x (G3):        3.5\nC 7400,7410 (G4):  3.5\nC 744x,745x (G4+): 4.25\nC power4/ppc970:   2.0\nC power5:          2.5\n\nC INPUT PARAMETERS\nC rp\tr3\nC s1p\tr4\nC s2p\tr5\nC n\tr6\nC cy\tr7\n\nASM_START()\nPROLOGUE(mpn_sub_nc)\n\tsubfic\tr0,r7,0\t\tC set hw cy from cy argument\n\tcmpwi\tcr0,r6,15\tC more than 15 limbs?\n\tble\tL(com)\t\tC branch if <= 15 limbs\n\tb\tL(BIG)\nEPILOGUE(mpn_sub_nc)\nPROLOGUE(mpn_sub_n)\n\tsubfc\tr0,r0,r0\tC set hw cy\n\tcmpwi\tcr0,r6,15\tC more than 15 limbs?\n\tbgt\tL(BIG)\t\tC branch if > 15 limbs\n\nL(com):\tmtctr\tr6\t\tC copy size into CTR\n\taddi\tr3,r3,-4\tC offset rp, it's updated before it's used\n\tlwz\tr0,0(r4)\tC load s1 limb\n\tlwz\tr7,0(r5)\tC load s2 limb\n\tsubfe\tr10,r7,r0\n\tbdz\tL(endS)\nL(loopS):\n\tlwzu\tr0,4(r4)\tC load s1 limb\n\tlwzu\tr7,4(r5)\tC load s2 limb\n\tstwu\tr10,4(r3)\tC store result limb\n\tsubfe\tr10,r7,r0\n\tbdnz\tL(loopS)\nL(endS):\n\tstwu\tr10,4(r3)\tC store result limb\n\tsubfe\tr3, r0, r0\tC 0 or -1\n\tsubfic\tr3, r3, 0\tC 0 or 1\n\tblr\n\nL(BIG):\n\tstmw\tr30,-8(r1)\tC should avoid this for small sizes!\n\tandi.\tr12,r6,3\n\tmtctr\tr12\t\tC copy size into CTR\n\taddi\tr4,r4,-4\n\taddi\tr5,r5,-4\n\taddi\tr3,r3,-4\n\tbeq\tL(multiple_of_4)\n\tlwzu\tr0,4(r4)\tC load s1 limb\n\tlwzu\tr7,4(r5)\tC load s2 limb\n\tsubfe\tr10,r7,r0\n\tbdz\tL(end0)\nL(loop0):\n\tlwzu\tr0,4(r4)\tC load s1 limb\n\tlwzu\tr7,4(r5)\tC load s2 limb\n\tstwu\tr10,4(r3)\tC store result limb\n\tsubfe\tr10,r7,r0\n\tbdnz\tL(loop0)\nL(end0):\n\tstwu\tr10,4(r3)\tC store result limb\nL(multiple_of_4):\n\tsrwi\tr6,r6,2\n\tmtctr\tr6\t\tC copy size into CTR\n\n\tlwz\tr0,4(r4)\tC load s1 limb\n\tlwz\tr7,4(r5)\tC load s2 limb\n\tlwz\tr8,8(r4)\tC load s1 limb\n\tlwz\tr9,8(r5)\tC load s2 limb\n\tlwz\tr10,12(r4)\tC load s1 limb\n\tlwz\tr11,12(r5)\tC load s2 limb\n\tlwzu\tr12,16(r4)\tC load s1 limb\n\tsubfe\tr31,r7,r0\tC add limbs with cy, set cy\n\tlwzu\tr6,16(r5)\tC load s2 limb\n\tbdz\tL(enda)\n\nL(loop):\n\tlwz\tr0,4(r4)\tC load s1 limb\n\tsubfe\tr30,r9,r8\tC add limbs with cy, set cy\n\tlwz\tr7,4(r5)\tC load s2 limb\n\tstw\tr31,4(r3)\tC store result limb\n\tlwz\tr8,8(r4)\tC load s1 limb\n\tsubfe\tr31,r11,r10\tC add limbs with cy, set cy\n\tlwz\tr9,8(r5)\tC load s2 limb\n\tstw\tr30,8(r3)\tC store result limb\n\tlwz\tr10,12(r4)\tC load s1 limb\n\tsubfe\tr30,r6,r12\tC add limbs with cy, set cy\n\tlwz\tr11,12(r5)\tC load s2 limb\n\tstw\tr31,12(r3)\tC store result limb\n\tlwzu\tr12,16(r4)\tC load s1 limb\n\tsubfe\tr31,r7,r0\tC add limbs with cy, set cy\n\tstwu\tr30,16(r3)\tC store result limb\n\tlwzu\tr6,16(r5)\tC load s2 limb\n\tbdnz\tL(loop)\t\tC decrement CTR and loop back\nL(enda):\n\tsubfe\tr30,r9,r8\tC add limbs with cy, set cy\n\tstw\tr31,4(r3)\tC store result limb\n\tsubfe\tr31,r11,r10\tC add limbs with cy, set cy\n\tstw\tr30,8(r3)\tC store result limb\n\tsubfe\tr30,r6,r12\tC add limbs with cy, set cy\n\tstw\tr31,12(r3)\tC store result limb\n\tstw\tr30,16(r3)\tC store result limb\nL(end):\n\tsubfe\tr3, r0, r0\tC 0 or -1\n\tsubfic\tr3, r3, 0\tC 0 or 1\n\tlmw\tr30,-8(r1)\n\tblr\nEPILOGUE(mpn_sub_n)\n"
  },
  {
    "path": "mpn/powerpc32/sublsh1_n.asm",
    "content": "dnl  PowerPC-32 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)\n\ndnl  Copyright 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                cycles/limb\nC 603e:            ?\nC 604e:            4.0\nC 75x (G3):        5.0\nC 7400,7410 (G4):  5.0\nC 744x,745x (G4+): 5.0\nC power4/ppc970:   4.25\nC power5:          5.0\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC vp\tr5\nC n\tr6\n\ndefine(`rp',`r3')\ndefine(`up',`r4')\ndefine(`vp',`r5')\n\ndefine(`s0',`r6')\ndefine(`s1',`r7')\ndefine(`u0',`r8')\ndefine(`v0',`r10')\ndefine(`v1',`r11')\n\nASM_START()\nPROLOGUE(mpn_sublsh1_n)\n\tmtctr\tr6\t\tC copy n in ctr\n\n\tlwz\tv0, 0(vp)\tC load v limb\n\tlwz\tu0, 0(up)\tC load u limb\n\taddic\tup, up, -4\tC update up; set cy\n\taddi\trp, rp, -4\tC update rp\n\tslwi\ts1, v0, 1\n\tbdz\t.Lend\t\tC If done, skip loop\n\n.Loop:\tlwz\tv1, 4(vp)\tC load v limb\n\tsubfe\ts1, s1, u0\tC add limbs with cy, set cy\n\tsrwi\ts0, v0, 31\tC shift down previous v limb\n\tstw\ts1, 4(rp)\tC store result limb\n\tlwzu\tu0, 8(up)\tC load u limb and update up\n\trlwimi\ts0, v1, 1, 0,30\tC left shift v limb and merge with prev v limb\n\n\tbdz\t.Lexit\t\tC decrement ctr and exit if done\n\n\tlwzu\tv0, 8(vp)\tC load v limb and update vp\n\tsubfe\ts0, s0, u0\tC add limbs with cy, set cy\n\tsrwi\ts1, v1, 31\tC shift down previous v limb\n\tstwu\ts0, 8(rp)\tC store result limb and update rp\n\tlwz\tu0, 4(up)\tC load u limb\n\trlwimi\ts1, v0, 1, 0,30\tC left shift v limb and merge with prev v limb\n\n\tbdnz\t.Loop\t\tC decrement ctr and loop back\n\n.Lend:\tsubfe\tr7, s1, u0\n\tsrwi\tr4, v0, 31\n\tstw\tr7, 4(rp)\tC store last result limb\n\tsubfze\tr3, r4\n\tneg\tr3, r3\n\tblr\n.Lexit:\tsubfe\tr7, s0, u0\n\tsrwi\tr4, v1, 31\n\tstw\tr7, 8(rp)\tC store last result limb\n\tsubfze\tr3, r4\n\tneg\tr3, r3\n\tblr\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc32/submul_1.asm",
    "content": "dnl  PowerPC-32 mpn_submul_1 -- Multiply a limb vector with a limb and subtract\ndnl  the result from a second limb vector.\n\ndnl  Copyright 1995, 1997, 1998, 2000, 2002, 2005 Free Software Foundation,\ndnl  Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                cycles/limb\nC 603e:            ?\nC 604e:            7.5\nC 75x (G3):        9.3-15\nC 7400,7410 (G4):  9.3-15\nC 744x,745x (G4+): 10.5\nC power4/ppc970:   6.75\nC power5:          6.5\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC n\tr5\nC vl\tr6\n\nC This is optimized for the PPC604.  See addmul_1.asm for additional comments.\n\nASM_START()\nPROLOGUE(mpn_submul_1)\n\tcmpwi\tcr0,r5,9\tC more than 9 limbs?\n\tbgt\tcr0,L(big)\tC branch if more than 9 limbs\n\n\tmtctr\tr5\n\tlwz\tr0,0(r4)\n\tmullw\tr7,r0,r6\n\tmulhwu\tr10,r0,r6\n\tlwz\tr9,0(r3)\n\tsubfc\tr8,r7,r9\n\taddc\tr7,r7,r8\tC invert cy (r7 is junk)\n\taddi\tr3,r3,-4\n\tbdz\tL(end)\nL(loop):\n\tlwzu\tr0,4(r4)\n\tstwu\tr8,4(r3)\n\tmullw\tr8,r0,r6\n\tadde\tr7,r8,r10\n\tmulhwu\tr10,r0,r6\n\tlwz\tr9,4(r3)\n\taddze\tr10,r10\n\tsubfc\tr8,r7,r9\n\taddc\tr7,r7,r8\tC invert cy (r7 is junk)\n\tbdnz\tL(loop)\nL(end):\tstw\tr8,4(r3)\n\taddze\tr3,r10\n\tblr\n\nL(big):\tstmw\tr30,-32(r1)\n\taddi\tr5,r5,-1\n\tsrwi\tr0,r5,2\n\tmtctr\tr0\n\n\tlwz\tr7,0(r4)\n\tmullw\tr8,r7,r6\n\tmulhwu\tr0,r7,r6\n\tlwz\tr7,0(r3)\n\tsubfc\tr7,r8,r7\n\taddc\tr8,r8,r7\n\tstw\tr7,0(r3)\n\nL(loopU):\n\tlwz\tr7,4(r4)\n\tlwz\tr12,8(r4)\n\tlwz\tr30,12(r4)\n\tlwzu\tr31,16(r4)\n\tmullw\tr8,r7,r6\n\tmullw\tr9,r12,r6\n\tmullw\tr10,r30,r6\n\tmullw\tr11,r31,r6\n\tadde\tr8,r8,r0\tC add cy_limb\n\tmulhwu\tr0,r7,r6\n\tlwz\tr7,4(r3)\n\tadde\tr9,r9,r0\n\tmulhwu\tr0,r12,r6\n\tlwz\tr12,8(r3)\n\tadde\tr10,r10,r0\n\tmulhwu\tr0,r30,r6\n\tlwz\tr30,12(r3)\n\tadde\tr11,r11,r0\n\tmulhwu\tr0,r31,r6\n\tlwz\tr31,16(r3)\n\taddze\tr0,r0\t\tC new cy_limb\n\tsubfc\tr7,r8,r7\n\tstw\tr7,4(r3)\n\tsubfe\tr12,r9,r12\n\tstw\tr12,8(r3)\n\tsubfe\tr30,r10,r30\n\tstw\tr30,12(r3)\n\tsubfe\tr31,r11,r31\n\tstwu\tr31,16(r3)\n\tsubfe\tr11,r11,r11\tC invert ...\n\taddic\tr11,r11,1\tC ... carry\n\tbdnz\tL(loopU)\n\n\tandi.\tr31,r5,3\n\tmtctr\tr31\n\tbeq\tcr0,L(endx)\n\nL(loopE):\n\tlwzu\tr7,4(r4)\n\tmullw\tr8,r7,r6\n\tadde\tr8,r8,r0\tC add cy_limb\n\tmulhwu\tr0,r7,r6\n\tlwz\tr7,4(r3)\n\taddze\tr0,r0\t\tC new cy_limb\n\tsubfc\tr7,r8,r7\n\taddc\tr8,r8,r7\n\tstwu\tr7,4(r3)\n\tbdnz\tL(loopE)\nL(endx):\n\taddze\tr3,r0\n\tlmw\tr30,-32(r1)\n\tblr\nEPILOGUE(mpn_submul_1)\n"
  },
  {
    "path": "mpn/powerpc32/umul.asm",
    "content": "dnl PowerPC-32 umul_ppmm -- support for longlong.h\n\ndnl Copyright 2000, 2001 Free Software Foundation, Inc.\ndnl\ndnl This file is part of the GNU MP Library.\ndnl\ndnl The GNU MP Library is free software; you can redistribute it and/or\ndnl modify it under the terms of the GNU Lesser General Public License as\ndnl published by the Free Software Foundation; either version 2.1 of the\ndnl License, or (at your option) any later version.\ndnl\ndnl The GNU MP Library is distributed in the hope that it will be useful,\ndnl but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser\ndnl General Public License for more details.\ndnl\ndnl You should have received a copy of the GNU Lesser General Public License\ndnl along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2);\nC\n\nASM_START()\nPROLOGUE(mpn_umul_ppmm)\n\n\tC r3\tlowptr\n\tC r4\tm1\n\tC r5\tm2\n\n\tmullw\tr0, r4, r5\n\tmulhwu\tr9, r4, r5\n\tstw\tr0, 0(r3)\n\tmr\tr3, r9\n\tblr\n\nEPILOGUE(mpn_umul_ppmm)\n"
  },
  {
    "path": "mpn/powerpc32/vmx/and_n.asm",
    "content": "dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_and_n, mpn_andn_n, mpn_nand_n,\ndnl  mpn_ior_n, mpn_iorn_n, mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise\ndnl  logical operations.\n\ndnl  Copyright 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC               and,ior,andn,nior,xor    iorn,xnor         nand\nC                   cycles/limb         cycles/limb    cycles/limb\nC 7400,7410 (G4):       1.39                 ?              ?\nC 744x,745x (G4+):      1.14                1.39           1.39\nC 970:                  1.7                 2.0            2.0\n\nC STATUS\nC  * Works for all sizes and alignment for 32-bit limbs.\nC  * Works for n >= 4 for 64-bit limbs; untested for smaller operands.\nC  * Current performance makes this pointless for 970\n\nC TODO\nC  * Might want to make variants when just one of the source operands needs\nC    vperm, and when neither needs it.  The latter runs 50% faster on 7400.\nC  * Idea: If the source operands are equally aligned, we could do the logops\nC    first, then vperm before storing!  That means we never need more than one\nC    vperm, ever!\nC  * Perhaps align `rp' after initial alignment loop?\nC  * Instead of having scalar code in the beginning and end, consider using\nC    read-modify-write vector code.\nC  * Software pipeline?  Hopefully not too important, this is hairy enough\nC    already.\nC  * At least be more clever about operand loading, i.e., load v operands before\nC    u operands, since v operands are sometimes negated.\n\ndefine(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))\ndefine(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))\ndefine(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))\n\ndefine(`vnegb', `')\t\tC default neg-before to null\ndefine(`vnega', `')\t\tC default neg-before to null\n\ndefine(`OPERATION_and_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',\t`mpn_and_n')\n\tdefine(`logopS',`and\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',\t`mpn_andn_n')\n\tdefine(`logopS',`andc\t$1,$2,$3')\n\tdefine(`logop',\t`vandc\t$1,$2,$3')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',\t`mpn_nand_n')\n\tdefine(`logopS',`nand\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')\n\tdefine(`vnega',\t`vnor\t$1,$2,$2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',\t`mpn_ior_n')\n\tdefine(`logopS',`or\t$1,$2,$3')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',\t`mpn_iorn_n')\n\tdefine(`logopS',`orc\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',\t`mpn_nior_n')\n\tdefine(`logopS',`nor\t$1,$2,$3')\n\tdefine(`logop',\t`vnor\t$1,$2,$3')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',\t`mpn_xor_n')\n\tdefine(`logopS',`xor\t$1,$2,$3')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logopS',`eqv\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\n\nifelse(GMP_LIMB_BITS,`32',`\n\tdefine(`LIMB32',`\t$1')\n\tdefine(`LIMB64',`')\n',`\n\tdefine(`LIMB32',`')\n\tdefine(`LIMB64',`\t$1')\n')\n\nC INPUT PARAMETERS\ndefine(`rp',\t`r3')\ndefine(`up',\t`r4')\ndefine(`vp',\t`r5')\ndefine(`n',\t`r6')\n\ndefine(`us',\t`v8')\ndefine(`vs',\t`v9')\n\nASM_START()\nPROLOGUE(func)\n\nLIMB32(`cmpwi\tcr0, n, 8\t')\nLIMB64(`cmpdi\tcr0, n, 4\t')\n\tbge\tL(big)\n\n\tmtctr\tn\n\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB32(`logopS(\tr0, r8, r9)\t')\nLIMB32(`stw\tr0, 0(rp)\t')\nLIMB32(`bdz\tL(endS)\t\t')\n\nL(topS):\nLIMB32(`lwzu\tr8, 4(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\nLIMB64(`addi\tup, up, GMP_LIMB_BYTES\t')\nLIMB32(`lwzu\tr9, 4(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\nLIMB64(`addi\tvp, vp, GMP_LIMB_BYTES\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwu\tr0, 4(rp)\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB64(`addi\trp, rp, GMP_LIMB_BYTES\t')\n\tbdnz\tL(topS)\nL(endS):\n\tblr\n\nL(big):\tmfspr\tr12, 256\n\toris\tr0, r12, 0xfffc\t\tC Set VRSAVE bit 0-13 FIXME\n\tmtspr\t256, r0\n\nC First loop until the destination is 16-byte aligned.  This will execute 0 or 1\nC times for 64-bit machines, and 0 to 3 times for 32-bit machines.\n\nLIMB32(`rlwinm.\tr0, rp, 30,30,31')\tC (rp >> 2) mod 4\nLIMB64(`rlwinm.\tr0, rp, 29,31,31')\tC (rp >> 3) mod 2\n\tbeq\tL(aligned)\n\n\tsubfic\tr7, r0, LIMBS_PER_VR\nLIMB32(`li\tr10, 0\t\t')\n\tsubf\tn, r7, n\nL(top0):\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\n\taddi\tup, up, GMP_LIMB_BYTES\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\n\taddi\tvp, vp, GMP_LIMB_BYTES\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top0)\t\t')\n\n\taddi\trp, rp, 16\t\tC update rp, but preserve its alignment\n\nL(aligned):\nLIMB64(`srdi\tr7, n, 1\t')\tC loop count corresponding to n\nLIMB32(`srwi\tr7, n, 2\t')\tC loop count corresponding to n\n\tmtctr\tr7\t\t\tC copy n to count register\n\n\tli\tr10, 16\n\tlvsl\tus, 0, up\n\tlvsl\tvs, 0, vp\n\n\tlvx\tv2, 0, up\n\tlvx\tv3, 0, vp\n\tbdnz\tL(gt1)\n\tlvx\tv0, r10, up\n\tlvx\tv1, r10, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(gt1):\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\nL(top):\tlvx\tv0, 0, up\n\tlvx\tv1, 0, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\tbdz\tL(end)\n\tlvx\tv2, r10, up\n\tlvx\tv3, r10, vp\n\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\taddi\tup, up, 32\n\taddi\tvp, vp, 32\n\taddi\trp, rp, 32\n\tbdnz\tL(top)\n\n\tandi.\tr0, up, 15\n\tvxor\tv0, v0, v0\n\tbeq\t1f\n\tlvx\tv0, 0, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv1, v1, v1\n\tbeq\t1f\n\tlvx\tv1, 0, vp\n1:\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(end):\tandi.\tr0, up, 15\n\tvxor\tv2, v2, v2\n\tbeq\t1f\n\tlvx\tv2, r10, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv3, v3, v3\n\tbeq\t1f\n\tlvx\tv3, r10, vp\n1:\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 20\n\nL(tail):\nLIMB32(`rlwinm.\tr7, n, 0,30,31\t')\tC r7 = n mod 4\nLIMB64(`rlwinm.\tr7, n, 0,31,31\t')\tC r7 = n mod 2\n\tbeq\tL(ret)\n\taddi\trp, rp, 15\nLIMB32(`rlwinm\trp, rp, 0,0,27\t')\nLIMB64(`rldicr\trp, rp, 0,59\t')\n\tli\tr10, 0\nL(top2):\nLIMB32(`lwzx\tr8, r10, up\t')\nLIMB64(`ldx\tr8, r10, up\t')\nLIMB32(`lwzx\tr9, r10, vp\t')\nLIMB64(`ldx\tr9, r10, vp\t')\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top2)\t\t')\n\nL(ret):\tmtspr\t256, r12\n\tblr\nEPILOGUE()\n\nC This works for 64-bit PowerPC, since a limb ptr can only be aligned\nC in 2 relevant ways, which means we can always find a pair of aligned\nC pointers of rp, up, and vp.\nC process words until rp is 16-byte aligned\nC if (((up | vp) & 15) == 0)\nC   process with VMX without any vperm\nC else if ((up & 15) != 0 && (vp & 15) != 0)\nC   process with VMX using vperm on store data\nC else if ((up & 15) != 0)\nC   process with VMX using vperm on up data\nC else\nC   process with VMX using vperm on vp data\nC\nC\trlwinm,\tr0, up, 0,28,31\nC\trlwinm\tr0, vp, 0,28,31\nC\tcmpwi\tcr7, r0, 0\nC\tcror\tcr6, cr0, cr7\nC\tcrand\tcr0, cr0, cr7\nC\t\n"
  },
  {
    "path": "mpn/powerpc32/vmx/andn_n.asm",
    "content": "dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_and_n, mpn_andn_n, mpn_nand_n,\ndnl  mpn_ior_n, mpn_iorn_n, mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise\ndnl  logical operations.\n\ndnl  Copyright 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC               and,ior,andn,nior,xor    iorn,xnor         nand\nC                   cycles/limb         cycles/limb    cycles/limb\nC 7400,7410 (G4):       1.39                 ?              ?\nC 744x,745x (G4+):      1.14                1.39           1.39\nC 970:                  1.7                 2.0            2.0\n\nC STATUS\nC  * Works for all sizes and alignment for 32-bit limbs.\nC  * Works for n >= 4 for 64-bit limbs; untested for smaller operands.\nC  * Current performance makes this pointless for 970\n\nC TODO\nC  * Might want to make variants when just one of the source operands needs\nC    vperm, and when neither needs it.  The latter runs 50% faster on 7400.\nC  * Idea: If the source operands are equally aligned, we could do the logops\nC    first, then vperm before storing!  That means we never need more than one\nC    vperm, ever!\nC  * Perhaps align `rp' after initial alignment loop?\nC  * Instead of having scalar code in the beginning and end, consider using\nC    read-modify-write vector code.\nC  * Software pipeline?  Hopefully not too important, this is hairy enough\nC    already.\nC  * At least be more clever about operand loading, i.e., load v operands before\nC    u operands, since v operands are sometimes negated.\n\ndefine(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))\ndefine(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))\ndefine(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))\n\ndefine(`vnegb', `')\t\tC default neg-before to null\ndefine(`vnega', `')\t\tC default neg-before to null\n\ndefine(`OPERATION_andn_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',\t`mpn_and_n')\n\tdefine(`logopS',`and\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',\t`mpn_andn_n')\n\tdefine(`logopS',`andc\t$1,$2,$3')\n\tdefine(`logop',\t`vandc\t$1,$2,$3')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',\t`mpn_nand_n')\n\tdefine(`logopS',`nand\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')\n\tdefine(`vnega',\t`vnor\t$1,$2,$2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',\t`mpn_ior_n')\n\tdefine(`logopS',`or\t$1,$2,$3')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',\t`mpn_iorn_n')\n\tdefine(`logopS',`orc\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',\t`mpn_nior_n')\n\tdefine(`logopS',`nor\t$1,$2,$3')\n\tdefine(`logop',\t`vnor\t$1,$2,$3')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',\t`mpn_xor_n')\n\tdefine(`logopS',`xor\t$1,$2,$3')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logopS',`eqv\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\n\nifelse(GMP_LIMB_BITS,`32',`\n\tdefine(`LIMB32',`\t$1')\n\tdefine(`LIMB64',`')\n',`\n\tdefine(`LIMB32',`')\n\tdefine(`LIMB64',`\t$1')\n')\n\nC INPUT PARAMETERS\ndefine(`rp',\t`r3')\ndefine(`up',\t`r4')\ndefine(`vp',\t`r5')\ndefine(`n',\t`r6')\n\ndefine(`us',\t`v8')\ndefine(`vs',\t`v9')\n\nASM_START()\nPROLOGUE(func)\n\nLIMB32(`cmpwi\tcr0, n, 8\t')\nLIMB64(`cmpdi\tcr0, n, 4\t')\n\tbge\tL(big)\n\n\tmtctr\tn\n\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB32(`logopS(\tr0, r8, r9)\t')\nLIMB32(`stw\tr0, 0(rp)\t')\nLIMB32(`bdz\tL(endS)\t\t')\n\nL(topS):\nLIMB32(`lwzu\tr8, 4(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\nLIMB64(`addi\tup, up, GMP_LIMB_BYTES\t')\nLIMB32(`lwzu\tr9, 4(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\nLIMB64(`addi\tvp, vp, GMP_LIMB_BYTES\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwu\tr0, 4(rp)\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB64(`addi\trp, rp, GMP_LIMB_BYTES\t')\n\tbdnz\tL(topS)\nL(endS):\n\tblr\n\nL(big):\tmfspr\tr12, 256\n\toris\tr0, r12, 0xfffc\t\tC Set VRSAVE bit 0-13 FIXME\n\tmtspr\t256, r0\n\nC First loop until the destination is 16-byte aligned.  This will execute 0 or 1\nC times for 64-bit machines, and 0 to 3 times for 32-bit machines.\n\nLIMB32(`rlwinm.\tr0, rp, 30,30,31')\tC (rp >> 2) mod 4\nLIMB64(`rlwinm.\tr0, rp, 29,31,31')\tC (rp >> 3) mod 2\n\tbeq\tL(aligned)\n\n\tsubfic\tr7, r0, LIMBS_PER_VR\nLIMB32(`li\tr10, 0\t\t')\n\tsubf\tn, r7, n\nL(top0):\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\n\taddi\tup, up, GMP_LIMB_BYTES\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\n\taddi\tvp, vp, GMP_LIMB_BYTES\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top0)\t\t')\n\n\taddi\trp, rp, 16\t\tC update rp, but preserve its alignment\n\nL(aligned):\nLIMB64(`srdi\tr7, n, 1\t')\tC loop count corresponding to n\nLIMB32(`srwi\tr7, n, 2\t')\tC loop count corresponding to n\n\tmtctr\tr7\t\t\tC copy n to count register\n\n\tli\tr10, 16\n\tlvsl\tus, 0, up\n\tlvsl\tvs, 0, vp\n\n\tlvx\tv2, 0, up\n\tlvx\tv3, 0, vp\n\tbdnz\tL(gt1)\n\tlvx\tv0, r10, up\n\tlvx\tv1, r10, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(gt1):\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\nL(top):\tlvx\tv0, 0, up\n\tlvx\tv1, 0, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\tbdz\tL(end)\n\tlvx\tv2, r10, up\n\tlvx\tv3, r10, vp\n\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\taddi\tup, up, 32\n\taddi\tvp, vp, 32\n\taddi\trp, rp, 32\n\tbdnz\tL(top)\n\n\tandi.\tr0, up, 15\n\tvxor\tv0, v0, v0\n\tbeq\t1f\n\tlvx\tv0, 0, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv1, v1, v1\n\tbeq\t1f\n\tlvx\tv1, 0, vp\n1:\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(end):\tandi.\tr0, up, 15\n\tvxor\tv2, v2, v2\n\tbeq\t1f\n\tlvx\tv2, r10, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv3, v3, v3\n\tbeq\t1f\n\tlvx\tv3, r10, vp\n1:\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 20\n\nL(tail):\nLIMB32(`rlwinm.\tr7, n, 0,30,31\t')\tC r7 = n mod 4\nLIMB64(`rlwinm.\tr7, n, 0,31,31\t')\tC r7 = n mod 2\n\tbeq\tL(ret)\n\taddi\trp, rp, 15\nLIMB32(`rlwinm\trp, rp, 0,0,27\t')\nLIMB64(`rldicr\trp, rp, 0,59\t')\n\tli\tr10, 0\nL(top2):\nLIMB32(`lwzx\tr8, r10, up\t')\nLIMB64(`ldx\tr8, r10, up\t')\nLIMB32(`lwzx\tr9, r10, vp\t')\nLIMB64(`ldx\tr9, r10, vp\t')\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top2)\t\t')\n\nL(ret):\tmtspr\t256, r12\n\tblr\nEPILOGUE()\n\nC This works for 64-bit PowerPC, since a limb ptr can only be aligned\nC in 2 relevant ways, which means we can always find a pair of aligned\nC pointers of rp, up, and vp.\nC process words until rp is 16-byte aligned\nC if (((up | vp) & 15) == 0)\nC   process with VMX without any vperm\nC else if ((up & 15) != 0 && (vp & 15) != 0)\nC   process with VMX using vperm on store data\nC else if ((up & 15) != 0)\nC   process with VMX using vperm on up data\nC else\nC   process with VMX using vperm on vp data\nC\nC\trlwinm,\tr0, up, 0,28,31\nC\trlwinm\tr0, vp, 0,28,31\nC\tcmpwi\tcr7, r0, 0\nC\tcror\tcr6, cr0, cr7\nC\tcrand\tcr0, cr0, cr7\nC\t\n"
  },
  {
    "path": "mpn/powerpc32/vmx/copyd.asm",
    "content": "dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_copyd.\n\ndnl  Copyright 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                16-byte coaligned      unaligned\nC                   cycles/limb        cycles/limb\nC 7400,7410 (G4):       0.5                0.64\nC 744x,745x (G4+):      0.75               0.82\nC 970 (G5):             0.78               1.02\t\t(64-bit limbs)\n\nC STATUS\nC  * Works for all sizes and alignments.\n\nC TODO\nC  * Optimize unaligned case.  Some basic tests with 2-way and 4-way unrolling\nC    indicate that we can reach 0.56 c/l for 7400, 0.75 c/l for 745x, and 0.80\nC    c/l for 970.\nC  * Consider using VMX instructions also for head and tail, by using some\nC    read-modify-write tricks.\nC  * The VMX code is used from the smallest sizes it handles, but measurements\nC    show a large speed bump at the cutoff points.  Small copying (perhaps\nC    using some read-modify-write technique) should be optimized.\nC  * Make a mpn_com_n based on this code.\n\ndefine(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))\ndefine(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))\ndefine(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))\n\n\nifelse(GMP_LIMB_BITS,32,`\n\tdefine(`LIMB32',`\t$1')\n\tdefine(`LIMB64',`')\n',`\n\tdefine(`LIMB32',`')\n\tdefine(`LIMB64',`\t$1')\n')\n\nC INPUT PARAMETERS\ndefine(`rp',\t`r3')\ndefine(`up',\t`r4')\ndefine(`n',\t`r5')\n\ndefine(`us',\t`v4')\n\n\nASM_START()\nPROLOGUE(mpn_copyd)\n\nLIMB32(`slwi.\tr0, n, 2\t')\nLIMB64(`sldi.\tr0, n, 3\t')\n\tadd\trp, rp, r0\n\tadd\tup, up, r0\n\nLIMB32(`cmpi\tcr7, n, 11\t')\nLIMB64(`cmpdi\tcr7, n, 5\t')\n\tbge\tcr7, L(big)\n\n\tbeqlr\tcr0\n\nC Handle small cases with plain operations\n\tmtctr\tn\nL(topS):\nLIMB32(`lwz\tr0, -4(up)\t')\nLIMB64(`ld\tr0, -8(up)\t')\n\taddi\tup, up, -GMP_LIMB_BYTES\nLIMB32(`stw\tr0, -4(rp)\t')\nLIMB64(`std\tr0, -8(rp)\t')\n\taddi\trp, rp, -GMP_LIMB_BYTES\n\tbdnz\tL(topS)\n\tblr\n\nC Handle large cases with VMX operations\nL(big):\n\taddi\trp, rp, -16\n\taddi\tup, up, -16\n\tmfspr\tr12, 256\n\toris\tr0, r12, 0xf800\t\tC Set VRSAVE bit 0-4\n\tmtspr\t256, r0\n\nLIMB32(`rlwinm.\tr7, rp, 30,30,31')\tC (rp >> 2) mod 4\nLIMB64(`rlwinm.\tr7, rp, 29,31,31')\tC (rp >> 3) mod 2\n\tbeq\tL(rp_aligned)\n\n\tsubf\tn, r7, n\nL(top0):\nLIMB32(`lwz\tr0, 12(up)\t')\nLIMB64(`ld\tr0, 8(up)\t')\n\taddi\tup, up, -GMP_LIMB_BYTES\nLIMB32(`addic.\tr7, r7, -1\t')\nLIMB32(`stw\tr0, 12(rp)\t')\nLIMB64(`std\tr0, 8(rp)\t')\n\taddi\trp, rp, -GMP_LIMB_BYTES\nLIMB32(`bne\tL(top0)\t\t')\n\nL(rp_aligned):\n\nLIMB32(`rlwinm.\tr0, up, 30,30,31')\tC (up >> 2) mod 4\nLIMB64(`rlwinm.\tr0, up, 29,31,31')\tC (up >> 3) mod 2\n\nLIMB64(`srdi\tr7, n, 2\t')\tC loop count corresponding to n\nLIMB32(`srwi\tr7, n, 3\t')\tC loop count corresponding to n\n\tmtctr\tr7\t\t\tC copy n to count register\n\n\tli\tr10, -16\n\n\tbeq\tL(up_aligned)\n\n\tlvsl\tus, 0, up\n\n\taddi\tup, up, 16\nLIMB32(`andi.\tr0, n, 0x4\t')\nLIMB64(`andi.\tr0, n, 0x2\t')\n\tbeq\tL(1)\n\tlvx\tv0, 0, up\n\tlvx\tv2, r10, up\n\tvperm\tv3, v2, v0, us\n\tstvx\tv3, 0, rp\n\taddi\tup, up, -32\n\taddi\trp, rp, -16\n\tb\tL(lpu)\nL(1):\tlvx\tv2, 0, up\n\taddi\tup, up, -16\n\tb\tL(lpu)\n\n\tALIGN(32)\nL(lpu):\tlvx\tv0, 0, up\n\tvperm\tv3, v0, v2, us\n\tstvx\tv3, 0, rp\n\tlvx\tv2, r10, up\n\taddi\tup, up, -32\n\tvperm\tv3, v2, v0, us\n\tstvx\tv3, r10, rp\n\taddi\trp, rp, -32\n\tbdnz\tL(lpu)\n\n\tb\tL(tail)\n\nL(up_aligned):\n\nLIMB32(`andi.\tr0, n, 0x4\t')\nLIMB64(`andi.\tr0, n, 0x2\t')\n\tbeq\tL(lpa)\n\tlvx\tv0, 0,   up\n\tstvx\tv0, 0,   rp\n\taddi\tup, up, -16\n\taddi\trp, rp, -16\n\tb\tL(lpa)\n\n\tALIGN(32)\nL(lpa):\tlvx\tv0, 0,   up\n\tlvx\tv1, r10, up\n\taddi\tup, up, -32\n\tnop\n\tstvx\tv0, 0,   rp\n\tstvx\tv1, r10, rp\n\taddi\trp, rp, -32\n\tbdnz\tL(lpa)\n\nL(tail):\nLIMB32(`rlwinm.\tr7, n, 0,30,31\t')\tC r7 = n mod 4\nLIMB64(`rlwinm.\tr7, n, 0,31,31\t')\tC r7 = n mod 2\n\tbeq\tL(ret)\nLIMB32(`li\tr10, 12\t\t')\nL(top2):\nLIMB32(`lwzx\tr0, r10, up\t')\nLIMB64(`ld\tr0, 8(up)\t')\nLIMB32(`addic.\tr7, r7, -1\t')\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 8(rp)\t')\nLIMB32(`addi\tr10, r10, -GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top2)\t\t')\n\nL(ret):\tmtspr\t256, r12\n\tblr\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc32/vmx/copyi.asm",
    "content": "dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_copyi.\n\ndnl  Copyright 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                16-byte coaligned      unaligned\nC                   cycles/limb        cycles/limb\nC 7400,7410 (G4):       0.5                0.64\nC 744x,745x (G4+):      0.75               0.82\nC 970 (G5):             0.78               1.02\t\t(64-bit limbs)\n\nC STATUS\nC  * Works for all sizes and alignments.\n\nC TODO\nC  * Optimize unaligned case.  Some basic tests with 2-way and 4-way unrolling\nC    indicate that we can reach 0.56 c/l for 7400, 0.75 c/l for 745x, and 0.80\nC    c/l for 970.\nC  * Consider using VMX instructions also for head and tail, by using some\nC    read-modify-write tricks.\nC  * The VMX code is used from the smallest sizes it handles, but measurements\nC    show a large speed bump at the cutoff points.  Small copying (perhaps\nC    using some read-modify-write technique) should be optimized.\nC  * Make a mpn_com_n based on this code.\n\ndefine(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))\ndefine(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))\ndefine(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))\n\n\nifelse(GMP_LIMB_BITS,32,`\n\tdefine(`LIMB32',`\t$1')\n\tdefine(`LIMB64',`')\n',`\n\tdefine(`LIMB32',`')\n\tdefine(`LIMB64',`\t$1')\n')\n\nC INPUT PARAMETERS\ndefine(`rp',\t`r3')\ndefine(`up',\t`r4')\ndefine(`n',\t`r5')\n\ndefine(`us',\t`v4')\n\n\nASM_START()\nPROLOGUE(mpn_copyi)\n\nLIMB32(`cmpi\tcr7, n, 11\t')\nLIMB64(`cmpdi\tcr7, n, 5\t')\n\tbge\tcr7, L(big)\n\n\tor.\tr0, n, n\n\tbeqlr\tcr0\n\nC Handle small cases with plain operations\n\tmtctr\tn\nL(topS):\nLIMB32(`lwz\tr0, 0(up)\t')\nLIMB64(`ld\tr0, 0(up)\t')\n\taddi\tup, up, GMP_LIMB_BYTES\nLIMB32(`stw\tr0, 0(rp)\t')\nLIMB64(`std\tr0, 0(rp)\t')\n\taddi\trp, rp, GMP_LIMB_BYTES\n\tbdnz\tL(topS)\n\tblr\n\nC Handle large cases with VMX operations\nL(big):\n\tmfspr\tr12, 256\n\toris\tr0, r12, 0xf800\t\tC Set VRSAVE bit 0-4\n\tmtspr\t256, r0\n\nLIMB32(`rlwinm.\tr7, rp, 30,30,31')\tC (rp >> 2) mod 4\nLIMB64(`rlwinm.\tr7, rp, 29,31,31')\tC (rp >> 3) mod 2\n\tbeq\tL(rp_aligned)\n\n\tsubfic\tr7, r7, LIMBS_PER_VR\n\tsubf\tn, r7, n\nL(top0):\nLIMB32(`lwz\tr0, 0(up)\t')\nLIMB64(`ld\tr0, 0(up)\t')\n\taddi\tup, up, GMP_LIMB_BYTES\nLIMB32(`addic.\tr7, r7, -1\t')\nLIMB32(`stw\tr0, 0(rp)\t')\nLIMB64(`std\tr0, 0(rp)\t')\n\taddi\trp, rp, GMP_LIMB_BYTES\nLIMB32(`bne\tL(top0)\t\t')\n\nL(rp_aligned):\n\nLIMB32(`rlwinm.\tr0, up, 30,30,31')\tC (up >> 2) mod 4\nLIMB64(`rlwinm.\tr0, up, 29,31,31')\tC (up >> 3) mod 2\n\nLIMB64(`srdi\tr7, n, 2\t')\tC loop count corresponding to n\nLIMB32(`srwi\tr7, n, 3\t')\tC loop count corresponding to n\n\tmtctr\tr7\t\t\tC copy n to count register\n\n\tli\tr10, 16\n\n\tbeq\tL(up_aligned)\n\n\tlvsl\tus, 0, up\n\nLIMB32(`andi.\tr0, n, 0x4\t')\nLIMB64(`andi.\tr0, n, 0x2\t')\n\tbeq\tL(1)\n\tlvx\tv0, 0, up\n\tlvx\tv2, r10, up\n\tvperm\tv3, v0, v2, us\n\tstvx\tv3, 0, rp\n\taddi\tup, up, 32\n\taddi\trp, rp, 16\n\tb\tL(lpu)\nL(1):\tlvx\tv2, 0, up\n\taddi\tup, up, 16\n\tb\tL(lpu)\n\n\tALIGN(32)\nL(lpu):\tlvx\tv0, 0, up\n\tvperm\tv3, v2, v0, us\n\tstvx\tv3, 0, rp\n\tlvx\tv2, r10, up\n\taddi\tup, up, 32\n\tvperm\tv3, v0, v2, us\n\tstvx\tv3, r10, rp\n\taddi\trp, rp, 32\n\tbdnz\tL(lpu)\n\n\taddi\tup, up, -16\n\tb\tL(tail)\n\nL(up_aligned):\n\nLIMB32(`andi.\tr0, n, 0x4\t')\nLIMB64(`andi.\tr0, n, 0x2\t')\n\tbeq\tL(lpa)\n\tlvx\tv0, 0,   up\n\tstvx\tv0, 0,   rp\n\taddi\tup, up, 16\n\taddi\trp, rp, 16\n\tb\tL(lpa)\n\n\tALIGN(32)\nL(lpa):\tlvx\tv0, 0,   up\n\tlvx\tv1, r10, up\n\taddi\tup, up, 32\n\tnop\n\tstvx\tv0, 0,   rp\n\tstvx\tv1, r10, rp\n\taddi\trp, rp, 32\n\tbdnz\tL(lpa)\n\nL(tail):\nLIMB32(`rlwinm.\tr7, n, 0,30,31\t')\tC r7 = n mod 4\nLIMB64(`rlwinm.\tr7, n, 0,31,31\t')\tC r7 = n mod 2\n\tbeq\tL(ret)\nLIMB32(`li\tr10, 0\t\t')\nL(top2):\nLIMB32(`lwzx\tr0, r10, up\t')\nLIMB64(`ld\tr0, 0(up)\t')\nLIMB32(`addic.\tr7, r7, -1\t')\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top2)\t\t')\n\nL(ret):\tmtspr\t256, r12\n\tblr\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc32/vmx/ior_n.asm",
    "content": "dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_and_n, mpn_andn_n, mpn_nand_n,\ndnl  mpn_ior_n, mpn_iorn_n, mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise\ndnl  logical operations.\n\ndnl  Copyright 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC               and,ior,andn,nior,xor    iorn,xnor         nand\nC                   cycles/limb         cycles/limb    cycles/limb\nC 7400,7410 (G4):       1.39                 ?              ?\nC 744x,745x (G4+):      1.14                1.39           1.39\nC 970:                  1.7                 2.0            2.0\n\nC STATUS\nC  * Works for all sizes and alignment for 32-bit limbs.\nC  * Works for n >= 4 for 64-bit limbs; untested for smaller operands.\nC  * Current performance makes this pointless for 970\n\nC TODO\nC  * Might want to make variants when just one of the source operands needs\nC    vperm, and when neither needs it.  The latter runs 50% faster on 7400.\nC  * Idea: If the source operands are equally aligned, we could do the logops\nC    first, then vperm before storing!  That means we never need more than one\nC    vperm, ever!\nC  * Perhaps align `rp' after initial alignment loop?\nC  * Instead of having scalar code in the beginning and end, consider using\nC    read-modify-write vector code.\nC  * Software pipeline?  Hopefully not too important, this is hairy enough\nC    already.\nC  * At least be more clever about operand loading, i.e., load v operands before\nC    u operands, since v operands are sometimes negated.\n\ndefine(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))\ndefine(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))\ndefine(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))\n\ndefine(`vnegb', `')\t\tC default neg-before to null\ndefine(`vnega', `')\t\tC default neg-before to null\n\ndefine(`OPERATION_ior_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',\t`mpn_and_n')\n\tdefine(`logopS',`and\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',\t`mpn_andn_n')\n\tdefine(`logopS',`andc\t$1,$2,$3')\n\tdefine(`logop',\t`vandc\t$1,$2,$3')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',\t`mpn_nand_n')\n\tdefine(`logopS',`nand\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')\n\tdefine(`vnega',\t`vnor\t$1,$2,$2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',\t`mpn_ior_n')\n\tdefine(`logopS',`or\t$1,$2,$3')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',\t`mpn_iorn_n')\n\tdefine(`logopS',`orc\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',\t`mpn_nior_n')\n\tdefine(`logopS',`nor\t$1,$2,$3')\n\tdefine(`logop',\t`vnor\t$1,$2,$3')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',\t`mpn_xor_n')\n\tdefine(`logopS',`xor\t$1,$2,$3')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logopS',`eqv\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\n\nifelse(GMP_LIMB_BITS,`32',`\n\tdefine(`LIMB32',`\t$1')\n\tdefine(`LIMB64',`')\n',`\n\tdefine(`LIMB32',`')\n\tdefine(`LIMB64',`\t$1')\n')\n\nC INPUT PARAMETERS\ndefine(`rp',\t`r3')\ndefine(`up',\t`r4')\ndefine(`vp',\t`r5')\ndefine(`n',\t`r6')\n\ndefine(`us',\t`v8')\ndefine(`vs',\t`v9')\n\nASM_START()\nPROLOGUE(func)\n\nLIMB32(`cmpwi\tcr0, n, 8\t')\nLIMB64(`cmpdi\tcr0, n, 4\t')\n\tbge\tL(big)\n\n\tmtctr\tn\n\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB32(`logopS(\tr0, r8, r9)\t')\nLIMB32(`stw\tr0, 0(rp)\t')\nLIMB32(`bdz\tL(endS)\t\t')\n\nL(topS):\nLIMB32(`lwzu\tr8, 4(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\nLIMB64(`addi\tup, up, GMP_LIMB_BYTES\t')\nLIMB32(`lwzu\tr9, 4(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\nLIMB64(`addi\tvp, vp, GMP_LIMB_BYTES\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwu\tr0, 4(rp)\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB64(`addi\trp, rp, GMP_LIMB_BYTES\t')\n\tbdnz\tL(topS)\nL(endS):\n\tblr\n\nL(big):\tmfspr\tr12, 256\n\toris\tr0, r12, 0xfffc\t\tC Set VRSAVE bit 0-13 FIXME\n\tmtspr\t256, r0\n\nC First loop until the destination is 16-byte aligned.  This will execute 0 or 1\nC times for 64-bit machines, and 0 to 3 times for 32-bit machines.\n\nLIMB32(`rlwinm.\tr0, rp, 30,30,31')\tC (rp >> 2) mod 4\nLIMB64(`rlwinm.\tr0, rp, 29,31,31')\tC (rp >> 3) mod 2\n\tbeq\tL(aligned)\n\n\tsubfic\tr7, r0, LIMBS_PER_VR\nLIMB32(`li\tr10, 0\t\t')\n\tsubf\tn, r7, n\nL(top0):\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\n\taddi\tup, up, GMP_LIMB_BYTES\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\n\taddi\tvp, vp, GMP_LIMB_BYTES\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top0)\t\t')\n\n\taddi\trp, rp, 16\t\tC update rp, but preserve its alignment\n\nL(aligned):\nLIMB64(`srdi\tr7, n, 1\t')\tC loop count corresponding to n\nLIMB32(`srwi\tr7, n, 2\t')\tC loop count corresponding to n\n\tmtctr\tr7\t\t\tC copy n to count register\n\n\tli\tr10, 16\n\tlvsl\tus, 0, up\n\tlvsl\tvs, 0, vp\n\n\tlvx\tv2, 0, up\n\tlvx\tv3, 0, vp\n\tbdnz\tL(gt1)\n\tlvx\tv0, r10, up\n\tlvx\tv1, r10, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(gt1):\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\nL(top):\tlvx\tv0, 0, up\n\tlvx\tv1, 0, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\tbdz\tL(end)\n\tlvx\tv2, r10, up\n\tlvx\tv3, r10, vp\n\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\taddi\tup, up, 32\n\taddi\tvp, vp, 32\n\taddi\trp, rp, 32\n\tbdnz\tL(top)\n\n\tandi.\tr0, up, 15\n\tvxor\tv0, v0, v0\n\tbeq\t1f\n\tlvx\tv0, 0, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv1, v1, v1\n\tbeq\t1f\n\tlvx\tv1, 0, vp\n1:\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(end):\tandi.\tr0, up, 15\n\tvxor\tv2, v2, v2\n\tbeq\t1f\n\tlvx\tv2, r10, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv3, v3, v3\n\tbeq\t1f\n\tlvx\tv3, r10, vp\n1:\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 20\n\nL(tail):\nLIMB32(`rlwinm.\tr7, n, 0,30,31\t')\tC r7 = n mod 4\nLIMB64(`rlwinm.\tr7, n, 0,31,31\t')\tC r7 = n mod 2\n\tbeq\tL(ret)\n\taddi\trp, rp, 15\nLIMB32(`rlwinm\trp, rp, 0,0,27\t')\nLIMB64(`rldicr\trp, rp, 0,59\t')\n\tli\tr10, 0\nL(top2):\nLIMB32(`lwzx\tr8, r10, up\t')\nLIMB64(`ldx\tr8, r10, up\t')\nLIMB32(`lwzx\tr9, r10, vp\t')\nLIMB64(`ldx\tr9, r10, vp\t')\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top2)\t\t')\n\nL(ret):\tmtspr\t256, r12\n\tblr\nEPILOGUE()\n\nC This works for 64-bit PowerPC, since a limb ptr can only be aligned\nC in 2 relevant ways, which means we can always find a pair of aligned\nC pointers of rp, up, and vp.\nC process words until rp is 16-byte aligned\nC if (((up | vp) & 15) == 0)\nC   process with VMX without any vperm\nC else if ((up & 15) != 0 && (vp & 15) != 0)\nC   process with VMX using vperm on store data\nC else if ((up & 15) != 0)\nC   process with VMX using vperm on up data\nC else\nC   process with VMX using vperm on vp data\nC\nC\trlwinm,\tr0, up, 0,28,31\nC\trlwinm\tr0, vp, 0,28,31\nC\tcmpwi\tcr7, r0, 0\nC\tcror\tcr6, cr0, cr7\nC\tcrand\tcr0, cr0, cr7\nC\t\n"
  },
  {
    "path": "mpn/powerpc32/vmx/iorn_n.asm",
    "content": "dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_and_n, mpn_andn_n, mpn_nand_n,\ndnl  mpn_ior_n, mpn_iorn_n, mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise\ndnl  logical operations.\n\ndnl  Copyright 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC               and,ior,andn,nior,xor    iorn,xnor         nand\nC                   cycles/limb         cycles/limb    cycles/limb\nC 7400,7410 (G4):       1.39                 ?              ?\nC 744x,745x (G4+):      1.14                1.39           1.39\nC 970:                  1.7                 2.0            2.0\n\nC STATUS\nC  * Works for all sizes and alignment for 32-bit limbs.\nC  * Works for n >= 4 for 64-bit limbs; untested for smaller operands.\nC  * Current performance makes this pointless for 970\n\nC TODO\nC  * Might want to make variants when just one of the source operands needs\nC    vperm, and when neither needs it.  The latter runs 50% faster on 7400.\nC  * Idea: If the source operands are equally aligned, we could do the logops\nC    first, then vperm before storing!  That means we never need more than one\nC    vperm, ever!\nC  * Perhaps align `rp' after initial alignment loop?\nC  * Instead of having scalar code in the beginning and end, consider using\nC    read-modify-write vector code.\nC  * Software pipeline?  Hopefully not too important, this is hairy enough\nC    already.\nC  * At least be more clever about operand loading, i.e., load v operands before\nC    u operands, since v operands are sometimes negated.\n\ndefine(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))\ndefine(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))\ndefine(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))\n\ndefine(`vnegb', `')\t\tC default neg-before to null\ndefine(`vnega', `')\t\tC default neg-before to null\n\ndefine(`OPERATION_iorn_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',\t`mpn_and_n')\n\tdefine(`logopS',`and\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',\t`mpn_andn_n')\n\tdefine(`logopS',`andc\t$1,$2,$3')\n\tdefine(`logop',\t`vandc\t$1,$2,$3')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',\t`mpn_nand_n')\n\tdefine(`logopS',`nand\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')\n\tdefine(`vnega',\t`vnor\t$1,$2,$2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',\t`mpn_ior_n')\n\tdefine(`logopS',`or\t$1,$2,$3')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',\t`mpn_iorn_n')\n\tdefine(`logopS',`orc\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',\t`mpn_nior_n')\n\tdefine(`logopS',`nor\t$1,$2,$3')\n\tdefine(`logop',\t`vnor\t$1,$2,$3')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',\t`mpn_xor_n')\n\tdefine(`logopS',`xor\t$1,$2,$3')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logopS',`eqv\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\n\nifelse(GMP_LIMB_BITS,`32',`\n\tdefine(`LIMB32',`\t$1')\n\tdefine(`LIMB64',`')\n',`\n\tdefine(`LIMB32',`')\n\tdefine(`LIMB64',`\t$1')\n')\n\nC INPUT PARAMETERS\ndefine(`rp',\t`r3')\ndefine(`up',\t`r4')\ndefine(`vp',\t`r5')\ndefine(`n',\t`r6')\n\ndefine(`us',\t`v8')\ndefine(`vs',\t`v9')\n\nASM_START()\nPROLOGUE(func)\n\nLIMB32(`cmpwi\tcr0, n, 8\t')\nLIMB64(`cmpdi\tcr0, n, 4\t')\n\tbge\tL(big)\n\n\tmtctr\tn\n\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB32(`logopS(\tr0, r8, r9)\t')\nLIMB32(`stw\tr0, 0(rp)\t')\nLIMB32(`bdz\tL(endS)\t\t')\n\nL(topS):\nLIMB32(`lwzu\tr8, 4(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\nLIMB64(`addi\tup, up, GMP_LIMB_BYTES\t')\nLIMB32(`lwzu\tr9, 4(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\nLIMB64(`addi\tvp, vp, GMP_LIMB_BYTES\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwu\tr0, 4(rp)\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB64(`addi\trp, rp, GMP_LIMB_BYTES\t')\n\tbdnz\tL(topS)\nL(endS):\n\tblr\n\nL(big):\tmfspr\tr12, 256\n\toris\tr0, r12, 0xfffc\t\tC Set VRSAVE bit 0-13 FIXME\n\tmtspr\t256, r0\n\nC First loop until the destination is 16-byte aligned.  This will execute 0 or 1\nC times for 64-bit machines, and 0 to 3 times for 32-bit machines.\n\nLIMB32(`rlwinm.\tr0, rp, 30,30,31')\tC (rp >> 2) mod 4\nLIMB64(`rlwinm.\tr0, rp, 29,31,31')\tC (rp >> 3) mod 2\n\tbeq\tL(aligned)\n\n\tsubfic\tr7, r0, LIMBS_PER_VR\nLIMB32(`li\tr10, 0\t\t')\n\tsubf\tn, r7, n\nL(top0):\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\n\taddi\tup, up, GMP_LIMB_BYTES\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\n\taddi\tvp, vp, GMP_LIMB_BYTES\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top0)\t\t')\n\n\taddi\trp, rp, 16\t\tC update rp, but preserve its alignment\n\nL(aligned):\nLIMB64(`srdi\tr7, n, 1\t')\tC loop count corresponding to n\nLIMB32(`srwi\tr7, n, 2\t')\tC loop count corresponding to n\n\tmtctr\tr7\t\t\tC copy n to count register\n\n\tli\tr10, 16\n\tlvsl\tus, 0, up\n\tlvsl\tvs, 0, vp\n\n\tlvx\tv2, 0, up\n\tlvx\tv3, 0, vp\n\tbdnz\tL(gt1)\n\tlvx\tv0, r10, up\n\tlvx\tv1, r10, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(gt1):\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\nL(top):\tlvx\tv0, 0, up\n\tlvx\tv1, 0, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\tbdz\tL(end)\n\tlvx\tv2, r10, up\n\tlvx\tv3, r10, vp\n\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\taddi\tup, up, 32\n\taddi\tvp, vp, 32\n\taddi\trp, rp, 32\n\tbdnz\tL(top)\n\n\tandi.\tr0, up, 15\n\tvxor\tv0, v0, v0\n\tbeq\t1f\n\tlvx\tv0, 0, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv1, v1, v1\n\tbeq\t1f\n\tlvx\tv1, 0, vp\n1:\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(end):\tandi.\tr0, up, 15\n\tvxor\tv2, v2, v2\n\tbeq\t1f\n\tlvx\tv2, r10, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv3, v3, v3\n\tbeq\t1f\n\tlvx\tv3, r10, vp\n1:\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 20\n\nL(tail):\nLIMB32(`rlwinm.\tr7, n, 0,30,31\t')\tC r7 = n mod 4\nLIMB64(`rlwinm.\tr7, n, 0,31,31\t')\tC r7 = n mod 2\n\tbeq\tL(ret)\n\taddi\trp, rp, 15\nLIMB32(`rlwinm\trp, rp, 0,0,27\t')\nLIMB64(`rldicr\trp, rp, 0,59\t')\n\tli\tr10, 0\nL(top2):\nLIMB32(`lwzx\tr8, r10, up\t')\nLIMB64(`ldx\tr8, r10, up\t')\nLIMB32(`lwzx\tr9, r10, vp\t')\nLIMB64(`ldx\tr9, r10, vp\t')\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top2)\t\t')\n\nL(ret):\tmtspr\t256, r12\n\tblr\nEPILOGUE()\n\nC This works for 64-bit PowerPC, since a limb ptr can only be aligned\nC in 2 relevant ways, which means we can always find a pair of aligned\nC pointers of rp, up, and vp.\nC process words until rp is 16-byte aligned\nC if (((up | vp) & 15) == 0)\nC   process with VMX without any vperm\nC else if ((up & 15) != 0 && (vp & 15) != 0)\nC   process with VMX using vperm on store data\nC else if ((up & 15) != 0)\nC   process with VMX using vperm on up data\nC else\nC   process with VMX using vperm on vp data\nC\nC\trlwinm,\tr0, up, 0,28,31\nC\trlwinm\tr0, vp, 0,28,31\nC\tcmpwi\tcr7, r0, 0\nC\tcror\tcr6, cr0, cr7\nC\tcrand\tcr0, cr0, cr7\nC\t\n"
  },
  {
    "path": "mpn/powerpc32/vmx/nand_n.asm",
    "content": "dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_and_n, mpn_andn_n, mpn_nand_n,\ndnl  mpn_ior_n, mpn_iorn_n, mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise\ndnl  logical operations.\n\ndnl  Copyright 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC               and,ior,andn,nior,xor    iorn,xnor         nand\nC                   cycles/limb         cycles/limb    cycles/limb\nC 7400,7410 (G4):       1.39                 ?              ?\nC 744x,745x (G4+):      1.14                1.39           1.39\nC 970:                  1.7                 2.0            2.0\n\nC STATUS\nC  * Works for all sizes and alignment for 32-bit limbs.\nC  * Works for n >= 4 for 64-bit limbs; untested for smaller operands.\nC  * Current performance makes this pointless for 970\n\nC TODO\nC  * Might want to make variants when just one of the source operands needs\nC    vperm, and when neither needs it.  The latter runs 50% faster on 7400.\nC  * Idea: If the source operands are equally aligned, we could do the logops\nC    first, then vperm before storing!  That means we never need more than one\nC    vperm, ever!\nC  * Perhaps align `rp' after initial alignment loop?\nC  * Instead of having scalar code in the beginning and end, consider using\nC    read-modify-write vector code.\nC  * Software pipeline?  Hopefully not too important, this is hairy enough\nC    already.\nC  * At least be more clever about operand loading, i.e., load v operands before\nC    u operands, since v operands are sometimes negated.\n\ndefine(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))\ndefine(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))\ndefine(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))\n\ndefine(`vnegb', `')\t\tC default neg-before to null\ndefine(`vnega', `')\t\tC default neg-before to null\n\ndefine(`OPERATION_nand_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',\t`mpn_and_n')\n\tdefine(`logopS',`and\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',\t`mpn_andn_n')\n\tdefine(`logopS',`andc\t$1,$2,$3')\n\tdefine(`logop',\t`vandc\t$1,$2,$3')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',\t`mpn_nand_n')\n\tdefine(`logopS',`nand\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')\n\tdefine(`vnega',\t`vnor\t$1,$2,$2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',\t`mpn_ior_n')\n\tdefine(`logopS',`or\t$1,$2,$3')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',\t`mpn_iorn_n')\n\tdefine(`logopS',`orc\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',\t`mpn_nior_n')\n\tdefine(`logopS',`nor\t$1,$2,$3')\n\tdefine(`logop',\t`vnor\t$1,$2,$3')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',\t`mpn_xor_n')\n\tdefine(`logopS',`xor\t$1,$2,$3')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logopS',`eqv\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\n\nifelse(GMP_LIMB_BITS,`32',`\n\tdefine(`LIMB32',`\t$1')\n\tdefine(`LIMB64',`')\n',`\n\tdefine(`LIMB32',`')\n\tdefine(`LIMB64',`\t$1')\n')\n\nC INPUT PARAMETERS\ndefine(`rp',\t`r3')\ndefine(`up',\t`r4')\ndefine(`vp',\t`r5')\ndefine(`n',\t`r6')\n\ndefine(`us',\t`v8')\ndefine(`vs',\t`v9')\n\nASM_START()\nPROLOGUE(func)\n\nLIMB32(`cmpwi\tcr0, n, 8\t')\nLIMB64(`cmpdi\tcr0, n, 4\t')\n\tbge\tL(big)\n\n\tmtctr\tn\n\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB32(`logopS(\tr0, r8, r9)\t')\nLIMB32(`stw\tr0, 0(rp)\t')\nLIMB32(`bdz\tL(endS)\t\t')\n\nL(topS):\nLIMB32(`lwzu\tr8, 4(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\nLIMB64(`addi\tup, up, GMP_LIMB_BYTES\t')\nLIMB32(`lwzu\tr9, 4(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\nLIMB64(`addi\tvp, vp, GMP_LIMB_BYTES\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwu\tr0, 4(rp)\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB64(`addi\trp, rp, GMP_LIMB_BYTES\t')\n\tbdnz\tL(topS)\nL(endS):\n\tblr\n\nL(big):\tmfspr\tr12, 256\n\toris\tr0, r12, 0xfffc\t\tC Set VRSAVE bit 0-13 FIXME\n\tmtspr\t256, r0\n\nC First loop until the destination is 16-byte aligned.  This will execute 0 or 1\nC times for 64-bit machines, and 0 to 3 times for 32-bit machines.\n\nLIMB32(`rlwinm.\tr0, rp, 30,30,31')\tC (rp >> 2) mod 4\nLIMB64(`rlwinm.\tr0, rp, 29,31,31')\tC (rp >> 3) mod 2\n\tbeq\tL(aligned)\n\n\tsubfic\tr7, r0, LIMBS_PER_VR\nLIMB32(`li\tr10, 0\t\t')\n\tsubf\tn, r7, n\nL(top0):\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\n\taddi\tup, up, GMP_LIMB_BYTES\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\n\taddi\tvp, vp, GMP_LIMB_BYTES\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top0)\t\t')\n\n\taddi\trp, rp, 16\t\tC update rp, but preserve its alignment\n\nL(aligned):\nLIMB64(`srdi\tr7, n, 1\t')\tC loop count corresponding to n\nLIMB32(`srwi\tr7, n, 2\t')\tC loop count corresponding to n\n\tmtctr\tr7\t\t\tC copy n to count register\n\n\tli\tr10, 16\n\tlvsl\tus, 0, up\n\tlvsl\tvs, 0, vp\n\n\tlvx\tv2, 0, up\n\tlvx\tv3, 0, vp\n\tbdnz\tL(gt1)\n\tlvx\tv0, r10, up\n\tlvx\tv1, r10, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(gt1):\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\nL(top):\tlvx\tv0, 0, up\n\tlvx\tv1, 0, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\tbdz\tL(end)\n\tlvx\tv2, r10, up\n\tlvx\tv3, r10, vp\n\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\taddi\tup, up, 32\n\taddi\tvp, vp, 32\n\taddi\trp, rp, 32\n\tbdnz\tL(top)\n\n\tandi.\tr0, up, 15\n\tvxor\tv0, v0, v0\n\tbeq\t1f\n\tlvx\tv0, 0, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv1, v1, v1\n\tbeq\t1f\n\tlvx\tv1, 0, vp\n1:\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(end):\tandi.\tr0, up, 15\n\tvxor\tv2, v2, v2\n\tbeq\t1f\n\tlvx\tv2, r10, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv3, v3, v3\n\tbeq\t1f\n\tlvx\tv3, r10, vp\n1:\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 20\n\nL(tail):\nLIMB32(`rlwinm.\tr7, n, 0,30,31\t')\tC r7 = n mod 4\nLIMB64(`rlwinm.\tr7, n, 0,31,31\t')\tC r7 = n mod 2\n\tbeq\tL(ret)\n\taddi\trp, rp, 15\nLIMB32(`rlwinm\trp, rp, 0,0,27\t')\nLIMB64(`rldicr\trp, rp, 0,59\t')\n\tli\tr10, 0\nL(top2):\nLIMB32(`lwzx\tr8, r10, up\t')\nLIMB64(`ldx\tr8, r10, up\t')\nLIMB32(`lwzx\tr9, r10, vp\t')\nLIMB64(`ldx\tr9, r10, vp\t')\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top2)\t\t')\n\nL(ret):\tmtspr\t256, r12\n\tblr\nEPILOGUE()\n\nC This works for 64-bit PowerPC, since a limb ptr can only be aligned\nC in 2 relevant ways, which means we can always find a pair of aligned\nC pointers of rp, up, and vp.\nC process words until rp is 16-byte aligned\nC if (((up | vp) & 15) == 0)\nC   process with VMX without any vperm\nC else if ((up & 15) != 0 && (vp & 15) != 0)\nC   process with VMX using vperm on store data\nC else if ((up & 15) != 0)\nC   process with VMX using vperm on up data\nC else\nC   process with VMX using vperm on vp data\nC\nC\trlwinm,\tr0, up, 0,28,31\nC\trlwinm\tr0, vp, 0,28,31\nC\tcmpwi\tcr7, r0, 0\nC\tcror\tcr6, cr0, cr7\nC\tcrand\tcr0, cr0, cr7\nC\t\n"
  },
  {
    "path": "mpn/powerpc32/vmx/nior_n.asm",
    "content": "dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_and_n, mpn_andn_n, mpn_nand_n,\ndnl  mpn_ior_n, mpn_iorn_n, mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise\ndnl  logical operations.\n\ndnl  Copyright 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC               and,ior,andn,nior,xor    iorn,xnor         nand\nC                   cycles/limb         cycles/limb    cycles/limb\nC 7400,7410 (G4):       1.39                 ?              ?\nC 744x,745x (G4+):      1.14                1.39           1.39\nC 970:                  1.7                 2.0            2.0\n\nC STATUS\nC  * Works for all sizes and alignment for 32-bit limbs.\nC  * Works for n >= 4 for 64-bit limbs; untested for smaller operands.\nC  * Current performance makes this pointless for 970\n\nC TODO\nC  * Might want to make variants when just one of the source operands needs\nC    vperm, and when neither needs it.  The latter runs 50% faster on 7400.\nC  * Idea: If the source operands are equally aligned, we could do the logops\nC    first, then vperm before storing!  That means we never need more than one\nC    vperm, ever!\nC  * Perhaps align `rp' after initial alignment loop?\nC  * Instead of having scalar code in the beginning and end, consider using\nC    read-modify-write vector code.\nC  * Software pipeline?  Hopefully not too important, this is hairy enough\nC    already.\nC  * At least be more clever about operand loading, i.e., load v operands before\nC    u operands, since v operands are sometimes negated.\n\ndefine(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))\ndefine(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))\ndefine(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))\n\ndefine(`vnegb', `')\t\tC default neg-before to null\ndefine(`vnega', `')\t\tC default neg-before to null\n\ndefine(`OPERATION_nior_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',\t`mpn_and_n')\n\tdefine(`logopS',`and\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',\t`mpn_andn_n')\n\tdefine(`logopS',`andc\t$1,$2,$3')\n\tdefine(`logop',\t`vandc\t$1,$2,$3')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',\t`mpn_nand_n')\n\tdefine(`logopS',`nand\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')\n\tdefine(`vnega',\t`vnor\t$1,$2,$2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',\t`mpn_ior_n')\n\tdefine(`logopS',`or\t$1,$2,$3')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',\t`mpn_iorn_n')\n\tdefine(`logopS',`orc\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',\t`mpn_nior_n')\n\tdefine(`logopS',`nor\t$1,$2,$3')\n\tdefine(`logop',\t`vnor\t$1,$2,$3')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',\t`mpn_xor_n')\n\tdefine(`logopS',`xor\t$1,$2,$3')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logopS',`eqv\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\n\nifelse(GMP_LIMB_BITS,`32',`\n\tdefine(`LIMB32',`\t$1')\n\tdefine(`LIMB64',`')\n',`\n\tdefine(`LIMB32',`')\n\tdefine(`LIMB64',`\t$1')\n')\n\nC INPUT PARAMETERS\ndefine(`rp',\t`r3')\ndefine(`up',\t`r4')\ndefine(`vp',\t`r5')\ndefine(`n',\t`r6')\n\ndefine(`us',\t`v8')\ndefine(`vs',\t`v9')\n\nASM_START()\nPROLOGUE(func)\n\nLIMB32(`cmpwi\tcr0, n, 8\t')\nLIMB64(`cmpdi\tcr0, n, 4\t')\n\tbge\tL(big)\n\n\tmtctr\tn\n\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB32(`logopS(\tr0, r8, r9)\t')\nLIMB32(`stw\tr0, 0(rp)\t')\nLIMB32(`bdz\tL(endS)\t\t')\n\nL(topS):\nLIMB32(`lwzu\tr8, 4(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\nLIMB64(`addi\tup, up, GMP_LIMB_BYTES\t')\nLIMB32(`lwzu\tr9, 4(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\nLIMB64(`addi\tvp, vp, GMP_LIMB_BYTES\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwu\tr0, 4(rp)\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB64(`addi\trp, rp, GMP_LIMB_BYTES\t')\n\tbdnz\tL(topS)\nL(endS):\n\tblr\n\nL(big):\tmfspr\tr12, 256\n\toris\tr0, r12, 0xfffc\t\tC Set VRSAVE bit 0-13 FIXME\n\tmtspr\t256, r0\n\nC First loop until the destination is 16-byte aligned.  This will execute 0 or 1\nC times for 64-bit machines, and 0 to 3 times for 32-bit machines.\n\nLIMB32(`rlwinm.\tr0, rp, 30,30,31')\tC (rp >> 2) mod 4\nLIMB64(`rlwinm.\tr0, rp, 29,31,31')\tC (rp >> 3) mod 2\n\tbeq\tL(aligned)\n\n\tsubfic\tr7, r0, LIMBS_PER_VR\nLIMB32(`li\tr10, 0\t\t')\n\tsubf\tn, r7, n\nL(top0):\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\n\taddi\tup, up, GMP_LIMB_BYTES\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\n\taddi\tvp, vp, GMP_LIMB_BYTES\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top0)\t\t')\n\n\taddi\trp, rp, 16\t\tC update rp, but preserve its alignment\n\nL(aligned):\nLIMB64(`srdi\tr7, n, 1\t')\tC loop count corresponding to n\nLIMB32(`srwi\tr7, n, 2\t')\tC loop count corresponding to n\n\tmtctr\tr7\t\t\tC copy n to count register\n\n\tli\tr10, 16\n\tlvsl\tus, 0, up\n\tlvsl\tvs, 0, vp\n\n\tlvx\tv2, 0, up\n\tlvx\tv3, 0, vp\n\tbdnz\tL(gt1)\n\tlvx\tv0, r10, up\n\tlvx\tv1, r10, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(gt1):\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\nL(top):\tlvx\tv0, 0, up\n\tlvx\tv1, 0, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\tbdz\tL(end)\n\tlvx\tv2, r10, up\n\tlvx\tv3, r10, vp\n\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\taddi\tup, up, 32\n\taddi\tvp, vp, 32\n\taddi\trp, rp, 32\n\tbdnz\tL(top)\n\n\tandi.\tr0, up, 15\n\tvxor\tv0, v0, v0\n\tbeq\t1f\n\tlvx\tv0, 0, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv1, v1, v1\n\tbeq\t1f\n\tlvx\tv1, 0, vp\n1:\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(end):\tandi.\tr0, up, 15\n\tvxor\tv2, v2, v2\n\tbeq\t1f\n\tlvx\tv2, r10, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv3, v3, v3\n\tbeq\t1f\n\tlvx\tv3, r10, vp\n1:\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 20\n\nL(tail):\nLIMB32(`rlwinm.\tr7, n, 0,30,31\t')\tC r7 = n mod 4\nLIMB64(`rlwinm.\tr7, n, 0,31,31\t')\tC r7 = n mod 2\n\tbeq\tL(ret)\n\taddi\trp, rp, 15\nLIMB32(`rlwinm\trp, rp, 0,0,27\t')\nLIMB64(`rldicr\trp, rp, 0,59\t')\n\tli\tr10, 0\nL(top2):\nLIMB32(`lwzx\tr8, r10, up\t')\nLIMB64(`ldx\tr8, r10, up\t')\nLIMB32(`lwzx\tr9, r10, vp\t')\nLIMB64(`ldx\tr9, r10, vp\t')\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top2)\t\t')\n\nL(ret):\tmtspr\t256, r12\n\tblr\nEPILOGUE()\n\nC This works for 64-bit PowerPC, since a limb ptr can only be aligned\nC in 2 relevant ways, which means we can always find a pair of aligned\nC pointers of rp, up, and vp.\nC process words until rp is 16-byte aligned\nC if (((up | vp) & 15) == 0)\nC   process with VMX without any vperm\nC else if ((up & 15) != 0 && (vp & 15) != 0)\nC   process with VMX using vperm on store data\nC else if ((up & 15) != 0)\nC   process with VMX using vperm on up data\nC else\nC   process with VMX using vperm on vp data\nC\nC\trlwinm,\tr0, up, 0,28,31\nC\trlwinm\tr0, vp, 0,28,31\nC\tcmpwi\tcr7, r0, 0\nC\tcror\tcr6, cr0, cr7\nC\tcrand\tcr0, cr0, cr7\nC\t\n"
  },
  {
    "path": "mpn/powerpc32/vmx/popcount.asm",
    "content": "dnl  PowerPC-32/VMX mpn_popcount.\n\ndnl  Copyright 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\t\ninclude_mpn(`powerpc64/vmx/popcount.asm')\n\nC                   cycles/limb\nC 7400,7410 (G4):       2.75\nC 744x,745x (G4+):      2.25\nC 970 (G5):             5.3\n"
  },
  {
    "path": "mpn/powerpc32/vmx/xnor_n.asm",
    "content": "dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_and_n, mpn_andn_n, mpn_nand_n,\ndnl  mpn_ior_n, mpn_iorn_n, mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise\ndnl  logical operations.\n\ndnl  Copyright 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC               and,ior,andn,nior,xor    iorn,xnor         nand\nC                   cycles/limb         cycles/limb    cycles/limb\nC 7400,7410 (G4):       1.39                 ?              ?\nC 744x,745x (G4+):      1.14                1.39           1.39\nC 970:                  1.7                 2.0            2.0\n\nC STATUS\nC  * Works for all sizes and alignment for 32-bit limbs.\nC  * Works for n >= 4 for 64-bit limbs; untested for smaller operands.\nC  * Current performance makes this pointless for 970\n\nC TODO\nC  * Might want to make variants when just one of the source operands needs\nC    vperm, and when neither needs it.  The latter runs 50% faster on 7400.\nC  * Idea: If the source operands are equally aligned, we could do the logops\nC    first, then vperm before storing!  That means we never need more than one\nC    vperm, ever!\nC  * Perhaps align `rp' after initial alignment loop?\nC  * Instead of having scalar code in the beginning and end, consider using\nC    read-modify-write vector code.\nC  * Software pipeline?  Hopefully not too important, this is hairy enough\nC    already.\nC  * At least be more clever about operand loading, i.e., load v operands before\nC    u operands, since v operands are sometimes negated.\n\ndefine(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))\ndefine(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))\ndefine(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))\n\ndefine(`vnegb', `')\t\tC default neg-before to null\ndefine(`vnega', `')\t\tC default neg-before to null\n\ndefine(`OPERATION_xnor_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',\t`mpn_and_n')\n\tdefine(`logopS',`and\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',\t`mpn_andn_n')\n\tdefine(`logopS',`andc\t$1,$2,$3')\n\tdefine(`logop',\t`vandc\t$1,$2,$3')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',\t`mpn_nand_n')\n\tdefine(`logopS',`nand\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')\n\tdefine(`vnega',\t`vnor\t$1,$2,$2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',\t`mpn_ior_n')\n\tdefine(`logopS',`or\t$1,$2,$3')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',\t`mpn_iorn_n')\n\tdefine(`logopS',`orc\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',\t`mpn_nior_n')\n\tdefine(`logopS',`nor\t$1,$2,$3')\n\tdefine(`logop',\t`vnor\t$1,$2,$3')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',\t`mpn_xor_n')\n\tdefine(`logopS',`xor\t$1,$2,$3')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logopS',`eqv\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\n\nifelse(GMP_LIMB_BITS,`32',`\n\tdefine(`LIMB32',`\t$1')\n\tdefine(`LIMB64',`')\n',`\n\tdefine(`LIMB32',`')\n\tdefine(`LIMB64',`\t$1')\n')\n\nC INPUT PARAMETERS\ndefine(`rp',\t`r3')\ndefine(`up',\t`r4')\ndefine(`vp',\t`r5')\ndefine(`n',\t`r6')\n\ndefine(`us',\t`v8')\ndefine(`vs',\t`v9')\n\nASM_START()\nPROLOGUE(func)\n\nLIMB32(`cmpwi\tcr0, n, 8\t')\nLIMB64(`cmpdi\tcr0, n, 4\t')\n\tbge\tL(big)\n\n\tmtctr\tn\n\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB32(`logopS(\tr0, r8, r9)\t')\nLIMB32(`stw\tr0, 0(rp)\t')\nLIMB32(`bdz\tL(endS)\t\t')\n\nL(topS):\nLIMB32(`lwzu\tr8, 4(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\nLIMB64(`addi\tup, up, GMP_LIMB_BYTES\t')\nLIMB32(`lwzu\tr9, 4(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\nLIMB64(`addi\tvp, vp, GMP_LIMB_BYTES\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwu\tr0, 4(rp)\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB64(`addi\trp, rp, GMP_LIMB_BYTES\t')\n\tbdnz\tL(topS)\nL(endS):\n\tblr\n\nL(big):\tmfspr\tr12, 256\n\toris\tr0, r12, 0xfffc\t\tC Set VRSAVE bit 0-13 FIXME\n\tmtspr\t256, r0\n\nC First loop until the destination is 16-byte aligned.  This will execute 0 or 1\nC times for 64-bit machines, and 0 to 3 times for 32-bit machines.\n\nLIMB32(`rlwinm.\tr0, rp, 30,30,31')\tC (rp >> 2) mod 4\nLIMB64(`rlwinm.\tr0, rp, 29,31,31')\tC (rp >> 3) mod 2\n\tbeq\tL(aligned)\n\n\tsubfic\tr7, r0, LIMBS_PER_VR\nLIMB32(`li\tr10, 0\t\t')\n\tsubf\tn, r7, n\nL(top0):\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\n\taddi\tup, up, GMP_LIMB_BYTES\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\n\taddi\tvp, vp, GMP_LIMB_BYTES\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top0)\t\t')\n\n\taddi\trp, rp, 16\t\tC update rp, but preserve its alignment\n\nL(aligned):\nLIMB64(`srdi\tr7, n, 1\t')\tC loop count corresponding to n\nLIMB32(`srwi\tr7, n, 2\t')\tC loop count corresponding to n\n\tmtctr\tr7\t\t\tC copy n to count register\n\n\tli\tr10, 16\n\tlvsl\tus, 0, up\n\tlvsl\tvs, 0, vp\n\n\tlvx\tv2, 0, up\n\tlvx\tv3, 0, vp\n\tbdnz\tL(gt1)\n\tlvx\tv0, r10, up\n\tlvx\tv1, r10, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(gt1):\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\nL(top):\tlvx\tv0, 0, up\n\tlvx\tv1, 0, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\tbdz\tL(end)\n\tlvx\tv2, r10, up\n\tlvx\tv3, r10, vp\n\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\taddi\tup, up, 32\n\taddi\tvp, vp, 32\n\taddi\trp, rp, 32\n\tbdnz\tL(top)\n\n\tandi.\tr0, up, 15\n\tvxor\tv0, v0, v0\n\tbeq\t1f\n\tlvx\tv0, 0, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv1, v1, v1\n\tbeq\t1f\n\tlvx\tv1, 0, vp\n1:\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(end):\tandi.\tr0, up, 15\n\tvxor\tv2, v2, v2\n\tbeq\t1f\n\tlvx\tv2, r10, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv3, v3, v3\n\tbeq\t1f\n\tlvx\tv3, r10, vp\n1:\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 20\n\nL(tail):\nLIMB32(`rlwinm.\tr7, n, 0,30,31\t')\tC r7 = n mod 4\nLIMB64(`rlwinm.\tr7, n, 0,31,31\t')\tC r7 = n mod 2\n\tbeq\tL(ret)\n\taddi\trp, rp, 15\nLIMB32(`rlwinm\trp, rp, 0,0,27\t')\nLIMB64(`rldicr\trp, rp, 0,59\t')\n\tli\tr10, 0\nL(top2):\nLIMB32(`lwzx\tr8, r10, up\t')\nLIMB64(`ldx\tr8, r10, up\t')\nLIMB32(`lwzx\tr9, r10, vp\t')\nLIMB64(`ldx\tr9, r10, vp\t')\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top2)\t\t')\n\nL(ret):\tmtspr\t256, r12\n\tblr\nEPILOGUE()\n\nC This works for 64-bit PowerPC, since a limb ptr can only be aligned\nC in 2 relevant ways, which means we can always find a pair of aligned\nC pointers of rp, up, and vp.\nC process words until rp is 16-byte aligned\nC if (((up | vp) & 15) == 0)\nC   process with VMX without any vperm\nC else if ((up & 15) != 0 && (vp & 15) != 0)\nC   process with VMX using vperm on store data\nC else if ((up & 15) != 0)\nC   process with VMX using vperm on up data\nC else\nC   process with VMX using vperm on vp data\nC\nC\trlwinm,\tr0, up, 0,28,31\nC\trlwinm\tr0, vp, 0,28,31\nC\tcmpwi\tcr7, r0, 0\nC\tcror\tcr6, cr0, cr7\nC\tcrand\tcr0, cr0, cr7\nC\t\n"
  },
  {
    "path": "mpn/powerpc32/vmx/xor_n.asm",
    "content": "dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_and_n, mpn_andn_n, mpn_nand_n,\ndnl  mpn_ior_n, mpn_iorn_n, mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise\ndnl  logical operations.\n\ndnl  Copyright 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC               and,ior,andn,nior,xor    iorn,xnor         nand\nC                   cycles/limb         cycles/limb    cycles/limb\nC 7400,7410 (G4):       1.39                 ?              ?\nC 744x,745x (G4+):      1.14                1.39           1.39\nC 970:                  1.7                 2.0            2.0\n\nC STATUS\nC  * Works for all sizes and alignment for 32-bit limbs.\nC  * Works for n >= 4 for 64-bit limbs; untested for smaller operands.\nC  * Current performance makes this pointless for 970\n\nC TODO\nC  * Might want to make variants when just one of the source operands needs\nC    vperm, and when neither needs it.  The latter runs 50% faster on 7400.\nC  * Idea: If the source operands are equally aligned, we could do the logops\nC    first, then vperm before storing!  That means we never need more than one\nC    vperm, ever!\nC  * Perhaps align `rp' after initial alignment loop?\nC  * Instead of having scalar code in the beginning and end, consider using\nC    read-modify-write vector code.\nC  * Software pipeline?  Hopefully not too important, this is hairy enough\nC    already.\nC  * At least be more clever about operand loading, i.e., load v operands before\nC    u operands, since v operands are sometimes negated.\n\ndefine(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))\ndefine(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))\ndefine(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))\n\ndefine(`vnegb', `')\t\tC default neg-before to null\ndefine(`vnega', `')\t\tC default neg-before to null\n\ndefine(`OPERATION_xor_n',1)\n\nifdef(`OPERATION_and_n',\n`\tdefine(`func',\t`mpn_and_n')\n\tdefine(`logopS',`and\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')')\nifdef(`OPERATION_andn_n',\n`\tdefine(`func',\t`mpn_andn_n')\n\tdefine(`logopS',`andc\t$1,$2,$3')\n\tdefine(`logop',\t`vandc\t$1,$2,$3')')\nifdef(`OPERATION_nand_n',\n`\tdefine(`func',\t`mpn_nand_n')\n\tdefine(`logopS',`nand\t$1,$2,$3')\n\tdefine(`logop',\t`vand\t$1,$2,$3')\n\tdefine(`vnega',\t`vnor\t$1,$2,$2')')\nifdef(`OPERATION_ior_n',\n`\tdefine(`func',\t`mpn_ior_n')\n\tdefine(`logopS',`or\t$1,$2,$3')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_iorn_n',\n`\tdefine(`func',\t`mpn_iorn_n')\n\tdefine(`logopS',`orc\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vor\t$1,$2,$3')')\nifdef(`OPERATION_nior_n',\n`\tdefine(`func',\t`mpn_nior_n')\n\tdefine(`logopS',`nor\t$1,$2,$3')\n\tdefine(`logop',\t`vnor\t$1,$2,$3')')\nifdef(`OPERATION_xor_n',\n`\tdefine(`func',\t`mpn_xor_n')\n\tdefine(`logopS',`xor\t$1,$2,$3')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\nifdef(`OPERATION_xnor_n',\n`\tdefine(`func',`mpn_xnor_n')\n\tdefine(`logopS',`eqv\t$1,$2,$3')\n\tdefine(`vnegb',\t`vnor\t$1,$2,$2')\n\tdefine(`logop',\t`vxor\t$1,$2,$3')')\n\nifelse(GMP_LIMB_BITS,`32',`\n\tdefine(`LIMB32',`\t$1')\n\tdefine(`LIMB64',`')\n',`\n\tdefine(`LIMB32',`')\n\tdefine(`LIMB64',`\t$1')\n')\n\nC INPUT PARAMETERS\ndefine(`rp',\t`r3')\ndefine(`up',\t`r4')\ndefine(`vp',\t`r5')\ndefine(`n',\t`r6')\n\ndefine(`us',\t`v8')\ndefine(`vs',\t`v9')\n\nASM_START()\nPROLOGUE(func)\n\nLIMB32(`cmpwi\tcr0, n, 8\t')\nLIMB64(`cmpdi\tcr0, n, 4\t')\n\tbge\tL(big)\n\n\tmtctr\tn\n\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB32(`logopS(\tr0, r8, r9)\t')\nLIMB32(`stw\tr0, 0(rp)\t')\nLIMB32(`bdz\tL(endS)\t\t')\n\nL(topS):\nLIMB32(`lwzu\tr8, 4(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\nLIMB64(`addi\tup, up, GMP_LIMB_BYTES\t')\nLIMB32(`lwzu\tr9, 4(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\nLIMB64(`addi\tvp, vp, GMP_LIMB_BYTES\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwu\tr0, 4(rp)\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB64(`addi\trp, rp, GMP_LIMB_BYTES\t')\n\tbdnz\tL(topS)\nL(endS):\n\tblr\n\nL(big):\tmfspr\tr12, 256\n\toris\tr0, r12, 0xfffc\t\tC Set VRSAVE bit 0-13 FIXME\n\tmtspr\t256, r0\n\nC First loop until the destination is 16-byte aligned.  This will execute 0 or 1\nC times for 64-bit machines, and 0 to 3 times for 32-bit machines.\n\nLIMB32(`rlwinm.\tr0, rp, 30,30,31')\tC (rp >> 2) mod 4\nLIMB64(`rlwinm.\tr0, rp, 29,31,31')\tC (rp >> 3) mod 2\n\tbeq\tL(aligned)\n\n\tsubfic\tr7, r0, LIMBS_PER_VR\nLIMB32(`li\tr10, 0\t\t')\n\tsubf\tn, r7, n\nL(top0):\nLIMB32(`lwz\tr8, 0(up)\t')\nLIMB64(`ld\tr8, 0(up)\t')\n\taddi\tup, up, GMP_LIMB_BYTES\nLIMB32(`lwz\tr9, 0(vp)\t')\nLIMB64(`ld\tr9, 0(vp)\t')\n\taddi\tvp, vp, GMP_LIMB_BYTES\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top0)\t\t')\n\n\taddi\trp, rp, 16\t\tC update rp, but preserve its alignment\n\nL(aligned):\nLIMB64(`srdi\tr7, n, 1\t')\tC loop count corresponding to n\nLIMB32(`srwi\tr7, n, 2\t')\tC loop count corresponding to n\n\tmtctr\tr7\t\t\tC copy n to count register\n\n\tli\tr10, 16\n\tlvsl\tus, 0, up\n\tlvsl\tvs, 0, vp\n\n\tlvx\tv2, 0, up\n\tlvx\tv3, 0, vp\n\tbdnz\tL(gt1)\n\tlvx\tv0, r10, up\n\tlvx\tv1, r10, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(gt1):\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\nL(top):\tlvx\tv0, 0, up\n\tlvx\tv1, 0, vp\n\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\tbdz\tL(end)\n\tlvx\tv2, r10, up\n\tlvx\tv3, r10, vp\n\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\taddi\tup, up, 32\n\taddi\tvp, vp, 32\n\taddi\trp, rp, 32\n\tbdnz\tL(top)\n\n\tandi.\tr0, up, 15\n\tvxor\tv0, v0, v0\n\tbeq\t1f\n\tlvx\tv0, 0, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv1, v1, v1\n\tbeq\t1f\n\tlvx\tv1, 0, vp\n1:\tvperm\tv4, v2, v0, us\n\tvperm\tv5, v3, v1, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, 0, rp\n\taddi\trp, rp, 4\n\tb\tL(tail)\n\nL(end):\tandi.\tr0, up, 15\n\tvxor\tv2, v2, v2\n\tbeq\t1f\n\tlvx\tv2, r10, up\n1:\tandi.\tr0, vp, 15\n\tvxor\tv3, v3, v3\n\tbeq\t1f\n\tlvx\tv3, r10, vp\n1:\tvperm\tv4, v0, v2, us\n\tvperm\tv5, v1, v3, vs\n\tvnegb(\tv5, v5)\n\tlogop(\tv6, v4, v5)\n\tvnega(\tv6, v6)\n\tstvx\tv6, r10, rp\n\n\taddi\tup, up, 16\n\taddi\tvp, vp, 16\n\taddi\trp, rp, 20\n\nL(tail):\nLIMB32(`rlwinm.\tr7, n, 0,30,31\t')\tC r7 = n mod 4\nLIMB64(`rlwinm.\tr7, n, 0,31,31\t')\tC r7 = n mod 2\n\tbeq\tL(ret)\n\taddi\trp, rp, 15\nLIMB32(`rlwinm\trp, rp, 0,0,27\t')\nLIMB64(`rldicr\trp, rp, 0,59\t')\n\tli\tr10, 0\nL(top2):\nLIMB32(`lwzx\tr8, r10, up\t')\nLIMB64(`ldx\tr8, r10, up\t')\nLIMB32(`lwzx\tr9, r10, vp\t')\nLIMB64(`ldx\tr9, r10, vp\t')\nLIMB32(`addic.\tr7, r7, -1\t')\n\tlogopS(\tr0, r8, r9)\nLIMB32(`stwx\tr0, r10, rp\t')\nLIMB64(`std\tr0, 0(rp)\t')\nLIMB32(`addi\tr10, r10, GMP_LIMB_BYTES')\nLIMB32(`bne\tL(top2)\t\t')\n\nL(ret):\tmtspr\t256, r12\n\tblr\nEPILOGUE()\n\nC This works for 64-bit PowerPC, since a limb ptr can only be aligned\nC in 2 relevant ways, which means we can always find a pair of aligned\nC pointers of rp, up, and vp.\nC process words until rp is 16-byte aligned\nC if (((up | vp) & 15) == 0)\nC   process with VMX without any vperm\nC else if ((up & 15) != 0 && (vp & 15) != 0)\nC   process with VMX using vperm on store data\nC else if ((up & 15) != 0)\nC   process with VMX using vperm on up data\nC else\nC   process with VMX using vperm on vp data\nC\nC\trlwinm,\tr0, up, 0,28,31\nC\trlwinm\tr0, vp, 0,28,31\nC\tcmpwi\tcr7, r0, 0\nC\tcror\tcr6, cr0, cr7\nC\tcrand\tcr0, cr0, cr7\nC\t\n"
  },
  {
    "path": "mpn/powerpc64/README",
    "content": "Copyright 1999, 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n\n\n                    POWERPC-64 MPN SUBROUTINES\n\n\nThis directory contains mpn functions for 64-bit PowerPC chips.\n\n\nCODE ORGANIZATION\n\n\tmpn/powerpc64          mode-neutral code\n\tmpn/powerpc64/mode32   code for mode32\n\tmpn/powerpc64/mode64   code for mode64\n\n\nThe mode32 and mode64 sub-directories contain code which is for use in the\nrespective chip mode, 32 or 64.  The top-level directory is code that's\nunaffected by the mode.\n\nThe \"adde\" instruction is the main difference between mode32 and mode64.  It\noperates on either on a 32-bit or 64-bit quantity according to the chip mode.\nOther instructions have an operand size in their opcode and hence don't vary.\n\n\n\nPOWER3/PPC630 pipeline information:\n\nDecoding is 4-way + branch and issue is 8-way with some out-of-order\ncapability.\n\nFunctional units:\nLS1  - ld/st unit 1\nLS2  - ld/st unit 2\nFXU1 - integer unit 1, handles any simple integer instruction\nFXU2 - integer unit 2, handles any simple integer instruction\nFXU3 - integer unit 3, handles integer multiply and divide\nFPU1 - floating-point unit 1\nFPU2 - floating-point unit 2\n\nMemory:\t\t  Any two memory operations can issue, but memory subsystem\n\t\t  can sustain just one store per cycle.  No need for data\n\t\t  prefetch; the hardware has very sophisticated prefetch logic.\nSimple integer:\t  2 operations (such as add, rl*)\nInteger multiply: 1 operation every 9th cycle worst case; exact timing depends\n\t\t  on 2nd operand's most significant bit position (10 bits per\n\t\t  cycle).  Multiply unit is not pipelined, only one multiply\n\t\t  operation in progress is allowed.\nInteger divide:\t  ?\nFloating-point:\t  Any plain 2 arithmetic instructions (such as fmul, fadd, and\n\t\t  fmadd), latency 4 cycles.\nFloating-point divide:\n\t\t  ?\nFloating-point square root:\n\t\t  ?\n\nPOWER3/PPC630 best possible times for the main loops:\nshift:\t      1.5 cycles limited by integer unit contention.\n\t      With 63 special loops, one for each shift count, we could\n\t      reduce the needed integer instructions to 2, which would\n\t      reduce the best possible time to 1 cycle.\nadd/sub:      1.5 cycles, limited by ld/st unit contention.\nmul:\t      18 cycles (average) unless floating-point operations are used,\n\t      but that would only help for multiplies of perhaps 10 and more\n\t      limbs.\naddmul/submul:Same situation as for mul.\n\n\nPOWER4/PPC970 and POWER5 pipeline information:\n\nThis is a very odd pipeline, it is basically a VLIW masquerading as a plain\narchitecture.  Its issue rules are not made public, and since it is so weird,\nit is very hard to figure out any useful information from experimentation.\nAn example:\n\n  A well-aligned loop with nop's take 3, 4, 6, 7, ... cycles.\n    3 cycles for  0,  1,  2,  3,  4,  5,  6,  7 nop's\n    4 cycles for  8,  9, 10, 11, 12, 13, 14, 15 nop's\n    6 cycles for 16, 17, 18, 19, 20, 21, 22, 23 nop's\n    7 cycles for 24, 25, 26, 27 nop's\n    8 cycles for 28, 29, 30, 31 nop's\n    ... continues regularly\n\n\nFunctional units:\nLS1  - ld/st unit 1\nLS2  - ld/st unit 2\nFXU1 - integer unit 1, handles any integer instruction\nFXU2 - integer unit 2, handles any integer instruction\nFPU1 - floating-point unit 1\nFPU2 - floating-point unit 2\n\nWhile this is one integer unit less than POWER3/PPC630, the remaining units\nare more powerful; here they handle multiply and divide.\n\nMemory:\t\t  2 ld/st.  Stores go to the L2 cache, which can sustain just\n\t\t  one store per cycle.\n\t\t  L1 load latency: to gregs 3-4 cycles, to fregs 5-6 cycles.\n\t\t  Operations that modify the address register might be split\n\t\t  to use also a an integer issue slot.\nSimple integer:\t  2 operations every cycle, latency 2.\nInteger multiply: 2 operations every 6th cycle, latency 7 cycles.\nInteger divide:\t  ?\nFloating-point:\t  Any plain 2 arithmetic instructions (such as fmul, fadd, and\n\t\t  fmadd), latency 6 cycles.\nFloating-point divide:\n\t\t  ?\nFloating-point square root:\n\t\t  ?\n\n\nIDEAS\n\n*mul_1: Handling one limb using mulld/mulhdu and two limbs using floating-\npoint operations should give performance of about 20 cycles for 3 limbs, or 7\ncycles/limb.\n\nWe should probably split the single-limb operand in 32-bit chunks, and the\nmulti-limb operand in 16-bit chunks, allowing us to accumulate well in fp\nregisters.\n\nProblem is to get 32-bit or 16-bit words to the fp registers.  Only 64-bit fp\nmemops copies bits without fiddling with them.  We might therefore need to\nload to integer registers with zero extension, store as 64 bits into temp\nspace, and then load to fp regs.  Alternatively, load directly to fp space\nand add well-chosen constants to get cancelation.  (Other part after given by\nsubsequent subtraction.)\n\nPossible code mix for load-via-intregs variant:\n\nlwz,std,lfd\nfmadd,fmadd,fmul,fmul\nfctidz,stfd,ld,fctidz,stfd,ld\nadd,adde\nlwz,std,lfd\nfmadd,fmadd,fmul,fmul\nfctidz,stfd,ld,fctidz,stfd,ld\nadd,adde\nsrd,sld,add,adde,add,adde\n"
  },
  {
    "path": "mpn/powerpc64/aix.m4",
    "content": "divert(-1)\ndnl  m4 macros for AIX 64-bit assembly.\n\ndnl  Copyright 2000, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ndefine(`ASM_START',\n\t`.machine\t\"ppc64\"\n\t.toc')\n\ndnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)\ndnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)\ndnl\ndnl  Don't want ELF style .size in the epilogue.\n\ndefine(`PROLOGUE_cpu',\nm4_assert_numargs(1)\n\t`\n\t.globl\t$1\n\t.globl\t.$1\n\t.csect\t[DS], 3\n$1:\n\t.llong\t.$1, TOC[tc0], 0\n\t.csect\t[PR]\n\t.align\t4\n.$1:')\n\ndefine(`EPILOGUE_cpu',\nm4_assert_numargs(1)\n`')\n\ndefine(`TOC_ENTRY', `')\n\ndefine(`LEA',\nm4_assert_numargs(2)\n`define(`TOC_ENTRY',\n`\t.toc\n..$2:\t.tc\t$2[TC], $2')'\n\t`ld\t$1, ..$2(2)')\n\ndefine(`EXTERN',\nm4_assert_numargs(1)\n`\t.globl\t$1')\n\ndefine(`DEF_OBJECT',\nm4_assert_numargs_range(1,2)\n`\t.csect\t[RO], 3\n\tALIGN(ifelse($#,1,2,$2))\n$1:\n')\n\ndefine(`END_OBJECT',\nm4_assert_numargs(1))\n\ndefine(`CALL',\n\t`bl\t.$1\n\tnop')\n\ndefine(`ASM_END', `TOC_ENTRY')\n\ndivert\n"
  },
  {
    "path": "mpn/powerpc64/copyd.asm",
    "content": "dnl  PowerPC-64 mpn_copyd\n\ndnl  Copyright 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\tcycles/limb\nC POWER3/PPC630:     1\nC POWER4/PPC970:     1\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC n\tr5\n\nASM_START()\nPROLOGUE(mpn_copyd)\n\trldic.\tr0, r5, 3, 59\tC r0 = (r5 & 3) << 3; cr0 = (n == 4t)?\n\tcmpldi\tcr6, r0, 16\tC cr6 = (n cmp 4t + 2)?\n\nifdef(`HAVE_ABI_mode32',\n`\trldic\tr6, r5, 3, 32',\tC byte count corresponding to n\n`\trldicr\tr6, r5, 3, 60')\tC byte count corresponding to n\n\n\taddi\tr5, r5, 4\tC compute...\nifdef(`HAVE_ABI_mode32',\n`\trldicl\tr5, r5, 62,34',\tC ...branch count\n`\trldicl\tr5, r5, 62, 2')\tC ...branch count\n\tmtctr\tr5\n\n\tadd\tr4, r4, r6\n\tadd\tr3, r3, r6\n\tsub\tr4, r4, r0\tC offset up\n\tsub\tr3, r3, r0\tC offset rp\n\n\tbeq\tcr0, L(L00)\n\tblt\tcr6, L(L01)\n\tbeq\tcr6, L(L10)\n\tb\tL(L11)\n\n\tALIGN(16)\nL(oop):\tld\tr6, 24(r4)\n\tstd\tr6, 24(r3)\nL(L11):\tld\tr6, 16(r4)\n\tstd\tr6, 16(r3)\nL(L10):\tld\tr6, 8(r4)\n\tstd\tr6, 8(r3)\nL(L01):\tld\tr6, 0(r4)\n\tstd\tr6, 0(r3)\nL(L00):\taddi\tr4, r4, -32\n\taddi\tr3, r3, -32\n\tbdnz\tL(oop)\n\n\tblr\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc64/copyi.asm",
    "content": "dnl  PowerPC-64 mpn_copyi.\n\ndnl  Copyright 2004, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\tcycles/limb\nC POWER3/PPC630:     1\nC POWER4/PPC970:     1\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC n\tr5\n\nASM_START()\nPROLOGUE(mpn_copyi)\n\trldic.\tr0, r5, 3, 59\tC r0 = (r5 & 3) << 3; cr0 = (n == 4t)?\n\tcmpldi\tcr6, r0, 16\tC cr6 = (n cmp 4t + 2)?\n\n\taddi\tr5, r5, 4\tC compute...\nifdef(`HAVE_ABI_mode32',\n`\trldicl\tr5, r5, 62,34',\tC ...branch count\n`\trldicl\tr5, r5, 62, 2')\tC ...branch count\n\tmtctr\tr5\n\n\tadd\tr4, r4, r0\tC offset up\n\tadd\tr3, r3, r0\tC offset rp\n\n\tbeq\tcr0, L(L00)\n\tblt\tcr6, L(L01)\n\tbeq\tcr6, L(L10)\n\tb\tL(L11)\n\n\tALIGN(16)\nL(oop):\tld\tr6, -32(r4)\n\tstd\tr6, -32(r3)\nL(L11):\tld\tr6, -24(r4)\n\tstd\tr6, -24(r3)\nL(L10):\tld\tr6, -16(r4)\n\tstd\tr6, -16(r3)\nL(L01):\tld\tr6, -8(r4)\n\tstd\tr6, -8(r3)\nL(L00):\taddi\tr4, r4, 32\n\taddi\tr3, r3, 32\n\tbdnz\tL(oop)\n\n\tblr\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc64/darwin.m4",
    "content": "divert(-1)\ndnl  m4 macros for Mac OS 64-bit assembly.\n\ndnl  Copyright 2005, 2006 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ndefine(`ASM_START',`')\n\ndnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)\ndnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)\ndnl\n\ndefine(`DARWIN')\n\ndefine(`PROLOGUE_cpu',\nm4_assert_numargs(1)\n`\t.text\n\t.globl\t$1\n\t.align\t4\n$1:')\n\ndefine(`EPILOGUE_cpu',\nm4_assert_numargs(1))\n\ndefine(`LEA',\nm4_assert_numargs(2)\n`ifdef(`PIC',\n`\n\tmflr\tr0\t\t\tC save return address\n\tbcl\t20, 31, 1f\n1:\tmflr\t$1\n\taddis\t$1, $1, ha16($2-1b)\n\tla\t$1, lo16($2-1b)($1)\n\tmtlr\tr0\t\t\tC restore return address\n',`\n\tlis\t$1, ha16($2)\n\tla\t$1, lo16($2)($1)\n')')\n\ndefine(`EXTERN',\nm4_assert_numargs(1)\n`dnl')\n\ndefine(`DEF_OBJECT',\nm4_assert_numargs_range(1,2)\n`\t.const\n\tALIGN(ifelse($#,1,2,$2))\n$1:\n')\n\ndefine(`END_OBJECT',\nm4_assert_numargs(1))\n\ndefine(`CALL',\n\t`bl\tGSYM_PREFIX`'$1')\n\ndefine(`ASM_END', `dnl')\n\ndivert\n"
  },
  {
    "path": "mpn/powerpc64/elf.m4",
    "content": "divert(-1)\ndnl  m4 macros for powerpc64 GNU/Linux assembly.\n\ndnl  Copyright 2003, 2005, 2006 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ndefine(`ASM_START',`')\n\ndnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)\ndnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)\ndnl\n\ndefine(`PROLOGUE_cpu',\nm4_assert_numargs(1)\n\t`\n\t.globl\t$1\n\t.globl\t.$1\n\t.section\t\".opd\",\"aw\"\n\t.align\t3\n$1:\n\t.llong\t.$1, .TOC.@tocbase, 0\n\t.size\t$1, 24\n\t.type\t.$1, @function\n\t.section\t\".text\"\n\t.align\t4\n.$1:')\n\ndefine(`EPILOGUE_cpu',\nm4_assert_numargs(1)\n`\t.size\t.$1, .-.$1')\n\ndefine(`TOC_ENTRY', `')\n\ndefine(`LEA',\nm4_assert_numargs(2)\n`define(`TOC_ENTRY',\n`\t.section\t\".toc\", \"aw\"\n..$2:\t.tc\t$2[TC], $2')'\n\t`ld\t$1, ..$2@toc(2)')\n\ndefine(`EXTERN',\nm4_assert_numargs(1)\n`dnl')\n\ndefine(`DEF_OBJECT',\nm4_assert_numargs_range(1,2)\n`\n\t.section\t.rodata\n\tALIGN(ifelse($#,1,2,$2))\n\t.type\t$1, @object\n$1:\n')\n\ndefine(`END_OBJECT',\nm4_assert_numargs(1)\n`\t.size\t$1, .-$1')\n\ndefine(`CALL',\n\t`bl\tGSYM_PREFIX`'$1\n\tnop')\n\ndefine(`ASM_END', `TOC_ENTRY')\n\ndivert\n"
  },
  {
    "path": "mpn/powerpc64/longlong_inc.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 Free Software Foundation, Inc.\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#if defined (__GNUC__)\n\n#if !defined (_LONG_LONG_LIMB)\n/* _LONG_LONG_LIMB is ABI=mode32 where adde operates on 32-bit values.  So\n   use adde etc only when not _LONG_LONG_LIMB.  */\n#define add_ssaaaa(sh, sl, ah, al, bh, bl) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    if (__builtin_constant_p (bh) && (bh) == 0)\t\t\t\t\\\n      __asm__ (\"{a%I4|add%I4c} %1,%3,%4\\n\\t{aze|addze} %0,%2\"\t\t\\\n\t     : \"=r\" (sh), \"=&r\" (sl) : \"r\" (ah), \"%r\" (al), \"rI\" (bl));\\\n    else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)\t\t\\\n      __asm__ (\"{a%I4|add%I4c} %1,%3,%4\\n\\t{ame|addme} %0,%2\"\t\t\\\n\t     : \"=r\" (sh), \"=&r\" (sl) : \"r\" (ah), \"%r\" (al), \"rI\" (bl));\\\n    else\t\t\t\t\t\t\t\t\\\n      __asm__ (\"{a%I5|add%I5c} %1,%4,%5\\n\\t{ae|adde} %0,%2,%3\"\t\t\\\n\t     : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t     : \"r\" (ah), \"r\" (bh), \"%r\" (al), \"rI\" (bl));\t\t\\\n  } while (0)\n#define sub_ddmmss(sh, sl, ah, al, bh, bl) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    if (__builtin_constant_p (ah) && (ah) == 0)\t\t\t\t\\\n      __asm__ (\"{sf%I3|subf%I3c} %1,%4,%3\\n\\t{sfze|subfze} %0,%2\"\t\\\n\t       : \"=r\" (sh), \"=&r\" (sl) : \"r\" (bh), \"rI\" (al), \"r\" (bl));\\\n    else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)\t\t\\\n      __asm__ (\"{sf%I3|subf%I3c} %1,%4,%3\\n\\t{sfme|subfme} %0,%2\"\t\\\n\t       : \"=r\" (sh), \"=&r\" (sl) : \"r\" (bh), \"rI\" (al), \"r\" (bl));\\\n    else if (__builtin_constant_p (bh) && (bh) == 0)\t\t\t\\\n      __asm__ (\"{sf%I3|subf%I3c} %1,%4,%3\\n\\t{ame|addme} %0,%2\"\t\t\\\n\t       : \"=r\" (sh), \"=&r\" (sl) : \"r\" (ah), \"rI\" (al), \"r\" (bl));\\\n    else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)\t\t\\\n      __asm__ (\"{sf%I3|subf%I3c} %1,%4,%3\\n\\t{aze|addze} %0,%2\"\t\t\\\n\t       : \"=r\" (sh), \"=&r\" (sl) : \"r\" (ah), \"rI\" (al), \"r\" (bl));\\\n    else\t\t\t\t\t\t\t\t\\\n      __asm__ (\"{sf%I4|subf%I4c} %1,%5,%4\\n\\t{sfe|subfe} %0,%3,%2\"\t\\\n\t       : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t       : \"r\" (ah), \"r\" (bh), \"rI\" (al), \"r\" (bl));\t\t\\\n  } while (0)\n#endif /* ! _LONG_LONG_LIMB */\n#define count_leading_zeros(count, x) \\\n  __asm__ (\"cntlzd %0,%1\" : \"=r\" (count) : \"r\" (x))\n#define COUNT_LEADING_ZEROS_0 64\n#define umul_ppmm(ph, pl, m0, m1) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UDItype __m0 = (m0), __m1 = (m1);\t\t\t\t\t\\\n    __asm__ (\"mulhdu %0,%1,%2\" : \"=r\" (ph) : \"%r\" (m0), \"r\" (m1));\t\\\n    (pl) = __m0 * __m1;\t\t\t\t\t\t\t\\\n  } while (0)\n#define smul_ppmm(ph, pl, m0, m1) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    DItype __m0 = (m0), __m1 = (m1);\t\t\t\t\t\\\n    __asm__ (\"mulhd %0,%1,%2\" : \"=r\" (ph) : \"%r\" (m0), \"r\" (m1));\t\\\n    (pl) = __m0 * __m1;\t\t\t\t\t\t\t\\\n  } while (0)\n\n\n#endif\n"
  },
  {
    "path": "mpn/powerpc64/lshift.asm",
    "content": "dnl  PowerPC-64 mpn_lshift -- rp[] = up[] << cnt\n\ndnl  Copyright 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\tcycles/limb\nC POWER3/PPC630:     1.5\nC POWER4/PPC970:     3.0\n\nC INPUT PARAMETERS\ndefine(`rp',`r3')\ndefine(`up',`r4')\ndefine(`n',`r5')\ndefine(`cnt',`r6')\n\ndefine(`tnc',`r5')\ndefine(`v0',`r0')\ndefine(`v1',`r7')\ndefine(`u0',`r8')\ndefine(`u1',`r9')\ndefine(`h0',`r10')\ndefine(`h1',`r11')\n\n\nASM_START()\nPROLOGUE(mpn_lshift)\nifdef(`HAVE_ABI_mode32',\n`\trldicl\tr7, r5, 0, 32\tC zero extend n\n\tmtctr\tr7',\t\tC copy n to count register\n`\tmtctr\tn')\t\tC copy n to count register\n\nifdef(`HAVE_ABI_mode32',\n`\trldic\tr0, n, 3, 32',\tC byte count corresponding to n\n`\trldicr\tr0, n, 3, 60')\tC byte count corresponding to n\n\n\tadd\trp, rp, r0\tC rp = rp + n\n\tadd\tup, up, r0\tC up = up + n\n\taddi\trp, rp, 8\tC rp now points 16 beyond end\n\taddi\tup, up, -8\tC up now points to last limb\n\tsubfic\ttnc, cnt, 64\tC reverse shift count\n\n\tld\tu0, 0(up)\n\tsld\th0, u0, cnt\n\tsrd\tr12, u0, tnc\tC return value\n\tbdz\tL(1)\t\tC jump for n = 1\n\n\tld\tu1, -8(up)\n\tbdz\tL(2)\t\tC jump for n = 2\n\n\tldu\tu0, -16(up)\n\tbdz\tL(end)\t\tC jump for n = 3\n\nL(oop):\tsrd\tv1, u1, tnc\n\tsld\th1, u1, cnt\n\tld\tu1, -8(up)\n\tor\th0, v1, h0\n\tstdu\th0, -16(rp)\n\n\tbdz\tL(exit)\n\n\tsrd\tv0, u0, tnc\n\tsld\th0, u0, cnt\n\tldu\tu0, -16(up)\n\tor\th1, v0, h1\n\tstd\th1, -8(rp)\n\n\tbdnz\tL(oop)\n\nL(end):\tsrd\tv1, u1, tnc\n\tsld\th1, u1, cnt\n\tor\th0, v1, h0\n\tstdu\th0, -16(rp)\n\tsrd\tv0, u0, tnc\n\tsld\th0, u0, cnt\n\tor\th1, v0, h1\n\tstd\th1, -8(rp)\nL(1):\tstd\th0, -16(rp)\nifdef(`HAVE_ABI_mode32',\n`\tsrdi\tr3, r12, 32\n\tmr\tr4, r12\n',`\tmr\tr3, r12\n')\n\tblr\n\nL(exit):\tsrd\tv0, u0, tnc\n\tsld\th0, u0, cnt\n\tor\th1, v0, h1\n\tstd\th1, -8(rp)\nL(2):\tsrd\tv1, u1, tnc\n\tsld\th1, u1, cnt\n\tor\th0, v1, h0\n\tstdu\th0, -16(rp)\n\tstd\th1, -8(rp)\nifdef(`HAVE_ABI_mode32',\n`\tsrdi\tr3, r12, 32\n\tmr\tr4, r12\n',`\tmr\tr3, r12\n')\n\tblr\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc64/mode32/add_n.asm",
    "content": "dnl  PowerPC-64/mode32 mpn_add_n -- Add two limb vectors of the same length > 0\ndnl  and store sum in a third limb vector.\n\ndnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\tcycles/limb\nC POWER3/PPC630:     ?\nC POWER4/PPC970:     4.25\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC vp\tr5\nC n\tr6\n\nASM_START()\nPROLOGUE(mpn_add_n)\n\tmtctr\tr6\t\tC copy size into CTR\n\taddic\tr0, r0, 0\tC clear cy\n\tld\tr8, 0(r4)\tC load least significant s1 limb\n\tld\tr0, 0(r5)\tC load least significant s2 limb\n\taddi\tr3, r3, -8\tC offset res_ptr, it's updated before it's used\n\tbdz\tL(end)\t\tC If done, skip loop\n\nL(oop):\tld\tr9, 8(r4)\tC load s1 limb\n\tld\tr10, 8(r5)\tC load s2 limb\n\tadde\tr7, r0, r8\tC add limbs with cy, set cy\n\tsrdi\tr6, r0, 32\n\tsrdi\tr11, r8, 32\n\tadde\tr6, r6, r11\tC add high limb parts, set cy\n\tstd\tr7, 8(r3)\tC store result limb\n\tbdz\tL(exit)\t\tC decrement CTR and exit if done\n\tldu\tr8, 16(r4)\tC load s1 limb and update s1_ptr\n\tldu\tr0, 16(r5)\tC load s2 limb and update s2_ptr\n\tadde\tr7, r10, r9\tC add limbs with cy, set cy\n\tsrdi\tr6, r10, 32\n\tsrdi\tr11, r9, 32\n\tadde\tr6, r6, r11\tC add high limb parts, set cy\n\tstdu\tr7, 16(r3)\tC store result limb and update res_ptr\n\tbdnz\tL(oop)\t\tC decrement CTR and loop back\n\nL(end):\tadde\tr7, r0, r8\n\tsrdi\tr6, r0, 32\n\tsrdi\tr11, r8, 32\n\tadde\tr6, r6, r11\tC add limbs with cy, set cy\n\tstd\tr7, 8(r3)\tC store ultimate result limb\n\tli\tr3, 0\t\tC load cy into ...\n\taddze\tr4, r3\t\tC ... return value register\n\tblr\nL(exit):\tadde\tr7, r10, r9\n\tsrdi\tr6, r10, 32\n\tsrdi\tr11, r9, 32\n\tadde\tr6, r6, r11\tC add limbs with cy, set cy\n\tstd\tr7, 16(r3)\n\tli\tr3, 0\t\tC load cy into ...\n\taddze\tr4, r3\t\tC ... return value register\n\tblr\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc64/mode32/addmul_1.asm",
    "content": "dnl  PowerPC-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add\ndnl  the result to a second limb vector.\n\ndnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\tcycles/limb\nC POWER3/PPC630:     ?\nC POWER4/PPC970:     12.5\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC n\tr5\nC v\tr6,r7  or  r7,r8\n\nASM_START()\nPROLOGUE(mpn_addmul_1)\n\nifdef(`BROKEN_LONGLONG_PARAM',\n`\trldimi\tr8, r7, 32,0\tC assemble vlimb from separate 32-bit arguments\n\tmr\tr6, r8\n',`\n\trldimi\tr7, r6, 32,0\tC assemble vlimb from separate 32-bit arguments\n\tmr\tr6, r7\n')\n\tli\tr7, 0\t\tC cy_limb = 0\n\tmtctr\tr5\n\taddic\tr0, r0, 0\n\taddi\tr3, r3, -8\n\taddi\tr4, r4, -8\n\nL(oop):\tldu\tr0, 8(r4)\n\tmulld\tr9, r0, r6\n\tadde\tr12, r9, r7\tC add old high limb and new low limb\n\tsrdi\tr5, r9, 32\n\tsrdi\tr11, r7, 32\n\tadde\tr5, r5, r11\tC add high limb parts, set cy\n\tmulhdu\tr7, r0, r6\n\taddze\tr7, r7\n\tld\tr10, 8(r3)\n\taddc\tr9, r12, r10\n\tsrdi\tr5, r12, 32\n\tsrdi\tr11, r10, 32\n\tadde\tr5, r5, r11\tC add high limb parts, set cy\n\tstdu\tr9, 8(r3)\n\tbdnz\tL(oop)\n\n\taddze\tr4, r7\n\tsrdi\tr3, r4, 32\n\tblr\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc64/mode32/mul_1.asm",
    "content": "dnl  PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and add\ndnl  the result to a second limb vector.\n\ndnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\tcycles/limb\nC POWER3/PPC630:     ?\nC POWER4/PPC970:     10\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC n\tr5\nC v\tr6,r7  or  r7,r8\n\nASM_START()\nPROLOGUE(mpn_mul_1)\n\nifdef(`BROKEN_LONGLONG_PARAM',\n`\trldimi\tr8, r7, 32,0\tC assemble vlimb from separate 32-bit arguments\n\tmr\tr6, r8\n',`\n\trldimi\tr7, r6, 32,0\tC assemble vlimb from separate 32-bit arguments\n\tmr\tr6, r7\n')\n\tli\tr7, 0\t\tC cy_limb = 0\n\tmtctr\tr5\n\taddic\tr0, r0, 0\n\taddi\tr3, r3, -8\n\taddi\tr4, r4, -8\n\nL(oop):\tldu\tr0, 8(r4)\n\tmulld\tr9, r0, r6\n\tadde\tr12, r9, r7\tC add old high limb and new low limb\n\tsrdi\tr5, r9, 32\n\tsrdi\tr11, r7, 32\n\tadde\tr5, r5, r11\tC add high limb parts, set cy\n\tmulhdu\tr7, r0, r6\n\tstdu\tr12, 8(r3)\n\tbdnz\tL(oop)\n\n\taddze\tr4, r7\n\tsrdi\tr3, r4, 32\n\tblr\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc64/mode32/sub_n.asm",
    "content": "dnl  PowerPC-64/mode32 mpn_sub_n -- Subtract two limb vectors of the same\ndnl  length and store difference in a third limb vector.\n\ndnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\tcycles/limb\nC POWER3/PPC630:     ?\nC POWER4/PPC970:     4.25\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC vp\tr5\nC n\tr6\n\nASM_START()\nPROLOGUE(mpn_sub_n)\n\tmtctr\tr6\t\tC copy size into CTR\n\taddic\tr0, r6, -1\tC set cy\n\tld\tr8, 0(r4)\tC load least significant s1 limb\n\tld\tr0, 0(r5)\tC load least significant s2 limb\n\taddi\tr3, r3, -8\tC offset res_ptr, it's updated before it's used\n\tbdz\tL(end)\t\tC If done, skip loop\n\nL(oop):\tld\tr9, 8(r4)\tC load s1 limb\n\tld\tr10, 8(r5)\tC load s2 limb\n\tsubfe\tr7, r0, r8\tC subtract limbs with cy, set cy\n\tsrdi\tr6, r0, 32\n\tsrdi\tr11, r8, 32\n\tsubfe\tr6, r6, r11\n\tstd\tr7, 8(r3)\tC store result limb\n\tbdz\tL(exit)\t\tC decrement CTR and exit if done\n\tldu\tr8, 16(r4)\tC load s1 limb and update s1_ptr\n\tldu\tr0, 16(r5)\tC load s2 limb and update s2_ptr\n\tsubfe\tr7, r10, r9\tC subtract limbs with cy, set cy\n\tsrdi\tr6, r10, 32\n\tsrdi\tr11, r9, 32\n\tsubfe\tr6, r6, r11\n\tstdu\tr7, 16(r3)\tC store result limb and update res_ptr\n\tbdnz\tL(oop)\t\tC decrement CTR and loop back\n\nL(end):\tsubfe\tr7, r0, r8\n\tsrdi\tr6, r0, 32\n\tsrdi\tr11, r8, 32\n\tsubfe\tr6, r6, r11\n\tstd\tr7, 8(r3)\tC store ultimate result limb\n\tsubfe\tr3, r0, r0\tC load !cy into ...\n\tsubfic\tr4, r3, 0\tC ... return value register\n\tli\tr3, 0\t\tC zero extend return value\n\tblr\nL(exit):\tsubfe\tr7, r10, r9\n\tsrdi\tr6, r10, 32\n\tsrdi\tr11, r9, 32\n\tsubfe\tr6, r6, r11\n\tstd\tr7, 16(r3)\n\tsubfe\tr3, r0, r0\tC load !cy into ...\n\tsubfic\tr4, r3, 0\tC ... return value register\n\tli\tr3, 0\t\tC zero extend return value\n\tblr\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc64/mode32/submul_1.asm",
    "content": "dnl  PowerPC-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract\ndnl  the result from a second limb vector.\n\ndnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\tcycles/limb\nC POWER3/PPC630:     ?\nC POWER4/PPC970:     16\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC n\tr5\nC v\tr6,r7  or  r7,r8\n\nASM_START()\nPROLOGUE(mpn_submul_1)\n\nifdef(`BROKEN_LONGLONG_PARAM',\n`\trldimi\tr8, r7, 32,0\tC assemble vlimb from separate 32-bit arguments\n\tmr\tr6, r8\n',`\n\trldimi\tr7, r6, 32,0\tC assemble vlimb from separate 32-bit arguments\n\tmr\tr6, r7\n')\n\tli\tr7, 0\t\tC cy_limb = 0\n\tmtctr\tr5\n\taddic\tr0, r0, 0\n\taddi\tr3, r3, -8\n\taddi\tr4, r4, -8\n\nL(oop):\tldu\tr0, 8(r4)\n\tmulld\tr9, r0, r6\n\tadde\tr12, r9, r7\tC add old high limb and new low limb\n\tsrdi\tr5, r9, 32\n\tsrdi\tr11, r7, 32\n\tadde\tr5, r5, r11\tC add high limb parts, set cy\n\tmulhdu\tr7, r0, r6\n\taddze\tr7, r7\n\tld\tr10, 8(r3)\n\tsubfc\tr9, r12, r10\n\tsrdi\tr5, r12, 32\n\tsrdi\tr11, r10, 32\n\tsubfe\tr5, r5, r11\tC subtract high limb parts, set cy\n\tstdu\tr9, 8(r3)\n\tsubfe\tr11, r11, r11\tC invert ...\n\taddic\tr11, r11, 1\tC ... carry\n\tbdnz\tL(oop)\n\n\taddze\tr4, r7\n\tsrdi\tr3, r4, 32\n\tblr\nEPILOGUE()\n\n"
  },
  {
    "path": "mpn/powerpc64/mode64/add_n.asm",
    "content": "dnl  PowerPC-64 mpn_add_n -- Add two limb vectors of the same length > 0 and\ndnl  store sum in a third limb vector.\n\ndnl  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\tr3\nC s1_ptr\tr4\nC s2_ptr\tr5\nC size\t\tr6\n\nC This code runs at 1.5 cycles/limb on the PPC630.\n\nASM_START()\nPROLOGUE(mpn_add_n)\n\tmtctr\tr6\t\tC copy size into CTR\n\taddic\tr0,r0,0\t\tC clear cy\n\tld\tr8,0(r4)\tC load least significant s1 limb\n\tld\tr0,0(r5)\tC load least significant s2 limb\n\taddi\tr3,r3,-8\tC offset res_ptr, it's updated before it's used\n\tbdz\tL(eno)\t\tC If done, skip loop\nL(top):\tld\tr9,8(r4)\tC load s1 limb\n\tld\tr10,8(r5)\tC load s2 limb\n\tadde\tr7,r0,r8\tC add limbs with cy, set cy\n\tstd\tr7,8(r3)\tC store result limb\n\tbdz\tL(ene)\t\tC decrement CTR and exit if done\n\tldu\tr8,16(r4)\tC load s1 limb and update s1_ptr\n\tldu\tr0,16(r5)\tC load s2 limb and update s2_ptr\n\tadde\tr7,r10,r9\tC add limbs with cy, set cy\n\tstdu\tr7,16(r3)\tC store result limb and update res_ptr\n\tbdnz\tL(top)\t\tC decrement CTR and loop back\n\nL(eno):\tadde\tr7,r0,r8\n\tstd\tr7,8(r3)\tC store ultimate result limb\n\tli\tr3,0\t\tC load cy into ...\n\taddze\tr3,r3\t\tC ... return value register\n\tblr\nL(ene):\tadde\tr7,r10,r9\n\tstd\tr7,16(r3)\n\tli\tr3,0\t\tC load cy into ...\n\taddze\tr3,r3\t\tC ... return value register\n\tblr\nEPILOGUE(mpn_add_n)\n"
  },
  {
    "path": "mpn/powerpc64/mode64/addmul_1.asm",
    "content": "dnl  PowerPC-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add\ndnl  the result to a second limb vector.\n\ndnl  Copyright 1999, 2000, 2001, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\tcycles/limb\nC POWER3/PPC630:     6-18\nC POWER4/PPC970:     10\n\nC INPUT PARAMETERS\nC res_ptr\tr3\nC s1_ptr\tr4\nC size\t\tr5\nC s2_limb\tr6\nC cy_limb\tr7\n\nASM_START()\nPROLOGUE(mpn_addmul_1)\n\tli\tr7,0\t\t\tC cy_limb = 0\n\nPROLOGUE(mpn_addmul_1c)\n\tmtctr\tr5\n\taddic\tr0,r0,0\n\taddi\tr3, r3, -8\n\taddi\tr4, r4, -8\nL(top):\tldu\tr0,8(r4)\n\tld\tr10,8(r3)\n\tmulld\tr9,r0,r6\n\tadde\tr9,r9,r7\n\tmulhdu\tr7,r0,r6\n\taddze\tr7,r7\n\taddc\tr9,r9,r10\n\tstdu\tr9,8(r3)\n\tbdnz\tL(top)\n\n\taddze\tr3,r7\n\tblr\nEPILOGUE(mpn_addmul_1)\nEPILOGUE(mpn_addmul_1c)\n"
  },
  {
    "path": "mpn/powerpc64/mode64/mul_1.asm",
    "content": "dnl  PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and store\ndnl  the result in a second limb vector.\n\ndnl  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published by\ndnl  the Free Software Foundation; either version 2.1 of the License, or (at your\ndnl  option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\tr3\nC s1_ptr\tr4\nC size\t\tr5\nC s2_limb\tr6\nC cy_limb\tr7\n\nC PPC630: 6 to 18 cycles/limb, depending on multiplier.  This cannot be\nC improved unless floating-point operations are used instead of the slow\nC mulld/mulhdu.\n\nASM_START()\nPROLOGUE(mpn_mul_1)\n\tli\tr7,0\t\tC cy_limb = 0\n\nPROLOGUE(mpn_mul_1c)\n\tmtctr\tr5\n\taddic\tr0,r0,0\n\taddi\tr3, r3, -8\n\taddi\tr4, r4, -8\nL(top):\tldu\tr0,8(r4)\n\tmulld\tr9,r0,r6\n\tadde\tr9,r9,r7\n\tmulhdu\tr7,r0,r6\n\tstdu\tr9,8(r3)\n\tbdnz\tL(top)\n\n\taddze\tr3,r7\n\tblr\nEPILOGUE(mpn_mul_1)\nEPILOGUE(mpn_mul_1c)\n"
  },
  {
    "path": "mpn/powerpc64/mode64/sub_n.asm",
    "content": "dnl  PowerPC-64 mpn_sub_n -- Subtract two limb vectors of the same length > 0\ndnl  and store difference in a third limb vector.\n\ndnl  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\tr3\nC s1_ptr\tr4\nC s2_ptr\tr5\nC size\t\tr6\n\nC This code runs at 1.5 cycles/limb on the PPC630.\n\nASM_START()\nPROLOGUE(mpn_sub_n)\n\tmtctr\tr6\t\tC copy size into CTR\n\taddic\tr0,r6,-1\tC set cy\n\tld\tr8,0(r4)\tC load least significant s1 limb\n\tld\tr0,0(r5)\tC load least significant s2 limb\n\taddi\tr3,r3,-8\tC offset res_ptr, it's updated before it's used\n\tbdz\tL(eno)\t\tC If done, skip loop\nL(top):\tld\tr9,8(r4)\tC load s1 limb\n\tld\tr10,8(r5)\tC load s2 limb\n\tsubfe\tr7,r0,r8\tC subtract limbs with cy, set cy\n\tstd\tr7,8(r3)\tC store result limb\n\tbdz\tL(ene)\t\tC decrement CTR and exit if done\n\tldu\tr8,16(r4)\tC load s1 limb and update s1_ptr\n\tldu\tr0,16(r5)\tC load s2 limb and update s2_ptr\n\tsubfe\tr7,r10,r9\tC subtract limbs with cy, set cy\n\tstdu\tr7,16(r3)\tC store result limb and update res_ptr\n\tbdnz\tL(top)\t\tC decrement CTR and loop back\n\nL(eno):\tsubfe\tr7,r0,r8\n\tstd\tr7,8(r3)\tC store ultimate result limb\n\tsubfe\tr3,r0,r0\tC load !cy into ...\n\tsubfic\tr3,r3,0\t\tC ... return value register\n\tblr\nL(ene):\tsubfe\tr7,r10,r9\n\tstd\tr7,16(r3)\n\tsubfe\tr3,r0,r0\tC load !cy into ...\n\tsubfic\tr3,r3,0\t\tC ... return value register\n\tblr\nEPILOGUE(mpn_sub_n)\n"
  },
  {
    "path": "mpn/powerpc64/mode64/submul_1.asm",
    "content": "dnl  PowerPC-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract\ndnl  the result from a second limb vector.\n\ndnl  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\tcycles/limb\nC POWER3/PPC630:     6-18\nC POWER4/PPC970:     10\n\nC INPUT PARAMETERS\nC res_ptr\tr3\nC s1_ptr\tr4\nC size\t\tr5\nC s2_limb\tr6\nC cy_limb\tr7\n\nASM_START()\nPROLOGUE(mpn_submul_1)\n\tli\tr7,0\t\t\tC cy_limb = 0\n\nPROLOGUE(mpn_submul_1c)\n\tmtctr\tr5\n\taddic\tr0,r0,0\n\taddi\tr3, r3, -8\n\taddi\tr4, r4, -8\nL(top):\tldu\tr0,8(r4)\n\tld\tr10,8(r3)\n\tmulld\tr9,r0,r6\n\tadde\tr9,r9,r7\n\tmulhdu\tr7,r0,r6\n\taddze\tr7,r7\n\tsubfc\tr9,r9,r10\n\tstdu\tr9,8(r3)\n\tsubfe\tr11,r11,r11\t\tC invert ...\n\taddic\tr11,r11,1\t\tC ... carry\n\tbdnz\tL(top)\n\n\taddze\tr3,r7\n\tblr\nEPILOGUE(mpn_submul_1)\nEPILOGUE(mpn_submul_1c)\n"
  },
  {
    "path": "mpn/powerpc64/mode64/umul.asm",
    "content": "dnl  PowerPC-64 umul_ppmm -- support for longlong.h\n\ndnl  Copyright 2000, 2001 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2);\nC\n\nASM_START()\nPROLOGUE(mpn_umul_ppmm)\n\n\tC r3\tlowptr\n\tC r4\tm1\n\tC r5\tm2\n\n\tmulld\tr0, r4, r5\n\tmulhdu\tr9, r4, r5\n\tstd\tr0, 0(r3)\n\tmr\tr3, r9\n\tblr\n\nEPILOGUE(mpn_umul_ppmm)\n"
  },
  {
    "path": "mpn/powerpc64/rshift.asm",
    "content": "dnl  PowerPC-64 mpn_rshift -- rp[] = up[] >> cnt\n\ndnl  Copyright 2003, 2005 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\tcycles/limb\nC POWER3/PPC630:     1.5\nC POWER4/PPC970:     3.0\n\nC INPUT PARAMETERS\ndefine(`rp',`r3')\ndefine(`up',`r4')\ndefine(`n',`r5')\ndefine(`cnt',`r6')\n\ndefine(`tnc',`r5')\ndefine(`v0',`r0')\ndefine(`v1',`r7')\ndefine(`u0',`r8')\ndefine(`u1',`r9')\ndefine(`h0',`r10')\ndefine(`h1',`r11')\n\n\nASM_START()\nPROLOGUE(mpn_rshift)\nifdef(`HAVE_ABI_mode32',\n`\trldicl\tn, n, 0, 32')\tC zero extend n\n\tmtctr\tn\t\tC copy n to count register\n\taddi\trp, rp, -16\n\tsubfic\ttnc, cnt, 64\tC reverse shift count\n\n\tld\tu0, 0(up)\n\tsrd\th0, u0, cnt\n\tsld\tr12, u0, tnc\tC return value\n\tbdz\tL(1)\t\tC jump for n = 1\n\n\tld\tu1, 8(up)\n\tbdz\tL(2)\t\tC jump for n = 2\n\n\tldu\tu0, 16(up)\n\tbdz\tL(end)\t\tC jump for n = 3\n\nL(oop):\tsld\tv1, u1, tnc\n\tsrd\th1, u1, cnt\n\tld\tu1, 8(up)\n\tor\th0, v1, h0\n\tstdu\th0, 16(rp)\n\n\tbdz\tL(exit)\n\n\tsld\tv0, u0, tnc\n\tsrd\th0, u0, cnt\n\tldu\tu0, 16(up)\n\tor\th1, v0, h1\n\tstd\th1, 8(rp)\n\n\tbdnz\tL(oop)\n\nL(end):\tsld\tv1, u1, tnc\n\tsrd\th1, u1, cnt\n\tor\th0, v1, h0\n\tstdu\th0, 16(rp)\n\tsld\tv0, u0, tnc\n\tsrd\th0, u0, cnt\n\tor\th1, v0, h1\n\tstd\th1, 8(rp)\nL(1):\tstd\th0, 16(rp)\nifdef(`HAVE_ABI_mode32',\n`\tsrdi\tr3, r12, 32\n\tmr\tr4, r12\n',`\tmr\tr3, r12\n')\n\tblr\n\nL(exit):\tsld\tv0, u0, tnc\n\tsrd\th0, u0, cnt\n\tor\th1, v0, h1\n\tstd\th1, 8(rp)\nL(2):\tsld\tv1, u1, tnc\n\tsrd\th1, u1, cnt\n\tor\th0, v1, h0\n\tstdu\th0, 16(rp)\n\tstd\th1, 8(rp)\nifdef(`HAVE_ABI_mode32',\n`\tsrdi\tr3, r12, 32\n\tmr\tr4, r12\n',`\tmr\tr3, r12\n')\n\tblr\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc64/sqr_diagonal.asm",
    "content": "dnl  PowerPC-64 mpn_sqr_diagonal.\n\ndnl  Copyright 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\tcycles/limb\nC POWER3/PPC630:    18\nC POWER4/PPC970:     8\n\nC INPUT PARAMETERS\nC rp\tr3\nC up\tr4\nC n\tr5\n\nASM_START()\nPROLOGUE(mpn_sqr_diagonal)\nifdef(`HAVE_ABI_mode32',\n`\trldicl\tr5, r5, 0, 32')\t\tC zero extend n\n\tmtctr\tr5\n\tld\tr0, 0(r4)\n\tbdz\tL(end)\n\tALIGN(16)\n\nL(top):\tmulld\tr5, r0, r0\n\tmulhdu\tr6, r0, r0\n\tld\tr0, 8(r4)\n\taddi\tr4, r4, 8\n\tstd\tr5, 0(r3)\n\tstd\tr6, 8(r3)\n\taddi\tr3, r3, 16\n\tbdnz\tL(top)\n\nL(end):\tmulld\tr5, r0, r0\n\tmulhdu\tr6, r0, r0\n\tstd\tr5, 0(r3)\n\tstd\tr6, 8(r3)\n\n\tblr\nEPILOGUE()\n"
  },
  {
    "path": "mpn/powerpc64/umul.asm",
    "content": "dnl PowerPC-64 umul_ppmm -- support for longlong.h\n\ndnl Copyright 2000, 2001, 2005 Free Software Foundation, Inc.\ndnl\ndnl This file is part of the GNU MP Library.\ndnl\ndnl The GNU MP Library is free software; you can redistribute it and/or\ndnl modify it under the terms of the GNU Lesser General Public License as\ndnl published by the Free Software Foundation; either version 2.1 of the\ndnl License, or (at your option) any later version.\ndnl\ndnl The GNU MP Library is distributed in the hope that it will be useful,\ndnl but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser\ndnl General Public License for more details.\ndnl\ndnl You should have received a copy of the GNU Lesser General Public License\ndnl along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2);\nC\n\nASM_START()\nPROLOGUE(mpn_umul_ppmm)\n\n\tC r3\tlowptr\n\tC r4\tm1\n\tC r5\tm2\n\n\tmulld\tr0, r4, r5\n\tmulhdu\tr4, r4, r5\n\tstd\tr0, 0(r3)\nifdef(`HAVE_ABI_mode32',\n`\tsrdi\tr3, r4, 32\n',`\tmr\tr3, r4\n')\n\tblr\n\nEPILOGUE(mpn_umul_ppmm)\n"
  },
  {
    "path": "mpn/powerpc64/vmx/popcount.asm",
    "content": "dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_popcount.\n\ndnl  Copyright 2006 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                   cycles/limb\nC 7400,7410 (G4):       2.75\nC 744x,745x (G4+):      2.25\nC 970 (G5):             5.3\n\nC STATUS\nC  * Works for all sizes and alignments.\n\nC TODO\nC  * Tune the awkward huge n outer loop code.\nC  * Two lvx, two vperm, and two vxor could make us a similar hamdist.\nC  * For the 970, a combined VMX+intop approach might be best.\nC  * Compress cnsts table in 64-bit mode, only half the values are needed.\n\ndefine(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))\ndefine(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))\ndefine(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))\n\ndefine(`OPERATION_popcount')\n\nifdef(`OPERATION_popcount',`\n  define(`func',`mpn_popcount')\n  define(`up',\t\t`r3')\n  define(`n',\t\t`r4')\n  define(`HAM',\t\t`dnl')\n')\nifdef(`OPERATION_hamdist',`\n  define(`func',`mpn_hamdist')\n  define(`up',\t\t`r3')\n  define(`vp',\t\t`r4')\n  define(`n',\t\t`r5')\n  define(`HAM',\t\t`$1')\n')\n\ndefine(`x01010101',`v2')\ndefine(`x00110011',`v7')\ndefine(`x00001111',`v10')\ndefine(`cnt1',`v11')\ndefine(`cnt2',`v12')\ndefine(`cnt4',`v13')\n\nifelse(GMP_LIMB_BITS,32,`\n\tdefine(`LIMB32',`\t$1')\n\tdefine(`LIMB64',`')\n',`\n\tdefine(`LIMB32',`')\n\tdefine(`LIMB64',`\t$1')\n')\n\nC The inner loop handles up to 2^34 bits, i.e., 2^31 64-limbs, due to overflow\nC in vsum4ubs.  For large operands, we work in chunks, of size LIMBS_PER_CHUNK.\ndefine(`LIMBS_PER_CHUNK', 0x1000)\ndefine(`LIMBS_CHUNK_THRES', 0x1001)\n\nASM_START()\nPROLOGUE(mpn_popcount)\n\tmfspr\tr10, 256\n\toris\tr0, r10, 0xfffc\t\tC Set VRSAVE bit 0-13\n\tmtspr\t256, r0\n\nifdef(`HAVE_ABI_mode32',\n`\trldicl\tn, n, 0, 32')\t\tC zero extend n\n\nC Load various constants into vector registers\n\tLEA(\tr11, cnsts)\n\tli\tr12, 16\n\tvspltisb cnt1, 1\t\tC 0x0101...01 used as shift count\n\tvspltisb cnt2, 2\t\tC 0x0202...02 used as shift count\n\tvspltisb cnt4, 4\t\tC 0x0404...04 used as shift count\n\tlvx\tx01010101, 0, r11\tC 0x3333...33\n\tlvx\tx00110011, r12, r11\tC 0x5555...55\n\tvspltisb x00001111, 15\t\tC 0x0f0f...0f\n\nLIMB64(`lis\tr0, LIMBS_CHUNK_THRES\t')\nLIMB64(`cmpd\tcr7, n, r0\t\t')\n\n\tlvx\tv0, 0, up\n\taddi\tr7, r11, 96\n\trlwinm\tr6, up, 2,26,29\n\tlvx\tv8, r7, r6\n\tvand\tv0, v0, v8\n\nLIMB32(`rlwinm\tr8, up, 30,30,31\t')\nLIMB64(`rlwinm\tr8, up, 29,31,31\t')\n\tadd\tn, n, r8\t\tC compensate n for rounded down `up'\n\n\tvxor\tv1, v1, v1\n\tli\tr8, 0\t\t\tC grand total count\n\n\tvxor\tv3, v3, v3\t\tC zero total count\n\n\taddic.\tn, n, -LIMBS_PER_VR\n\tble\tL(sum)\n\n\taddic.\tn, n, -LIMBS_PER_VR\n\tble\tL(lsum)\n\nC For 64-bit machines, handle huge n that would overflow vsum4ubs\nLIMB64(`ble\tcr7, L(small)\t\t')\nLIMB64(`addis\tr9, n, -LIMBS_PER_CHUNK\t') C remaining n\nLIMB64(`lis\tn, LIMBS_PER_CHUNK\t')\nL(small):\n\n\nLIMB32(`srwi\tr7, n, 3\t')\tC loop count corresponding to n\nLIMB64(`srdi\tr7, n, 2\t')\tC loop count corresponding to n\n\taddi\tr7, r7, 1\n\tmtctr\tr7\t\t\tC copy n to count register\n\tb\tL(ent)\n\n\tALIGN(8)\nL(top):\tlvx\tv0, 0, up\n\tli\tr7, 128\t\t\tC prefetch distance\nL(ent):\tlvx\tv1, r12, up\n\taddi\tup, up, 32\n\tvsr\tv4, v0, cnt1\n\tvsr\tv5, v1, cnt1\n\tdcbt\tup, r7\t\t\tC prefetch\n\tvand\tv8, v4, x01010101\n\tvand\tv9, v5, x01010101\n\tvsububm\tv0, v0, v8\t\tC 64 2-bit accumulators (0..2)\n\tvsububm\tv1, v1, v9\t\tC 64 2-bit accumulators (0..2)\n\tvsr\tv4, v0, cnt2\n\tvsr\tv5, v1, cnt2\n\tvand\tv8, v0, x00110011\n\tvand\tv9, v1, x00110011\n\tvand\tv4, v4, x00110011\n\tvand\tv5, v5, x00110011\n\tvaddubm\tv0, v4, v8\t\tC 32 4-bit accumulators (0..4)\n\tvaddubm\tv1, v5, v9\t\tC 32 4-bit accumulators (0..4)\n\tvaddubm\tv8, v0, v1\t\tC 32 4-bit accumulators (0..8)\n\tvsr\tv9, v8, cnt4\n\tvand\tv6, v8, x00001111\n\tvand\tv9, v9, x00001111\n\tvaddubm\tv6, v9, v6\t\tC 16 8-bit accumulators (0..16)\n\tvsum4ubs v3, v6, v3\t\tC sum 4 x 4 bytes into 4 32-bit fields\n\tbdnz\tL(top)\n\n\tandi.\tn, n, eval(LIMBS_PER_2VR-1)\n\tbeq\tL(rt)\n\n\tlvx\tv0, 0, up\n\tvxor\tv1, v1, v1\n\tcmpwi\tn, LIMBS_PER_VR\n\tble\tL(sum)\nL(lsum):\n\tvor\tv1, v0, v0\n\tlvx\tv0, r12, up\nL(sum):\nLIMB32(`rlwinm\tr6, n, 4,26,27\t')\nLIMB64(`rlwinm\tr6, n, 5,26,26\t')\n\taddi\tr7, r11, 32\n\tlvx\tv8, r7, r6\n\tvand\tv0, v0, v8\n\n\tvsr\tv4, v0, cnt1\n\tvsr\tv5, v1, cnt1\n\tvand\tv8, v4, x01010101\n\tvand\tv9, v5, x01010101\n\tvsububm\tv0, v0, v8\t\tC 64 2-bit accumulators (0..2)\n\tvsububm\tv1, v1, v9\t\tC 64 2-bit accumulators (0..2)\n\tvsr\tv4, v0, cnt2\n\tvsr\tv5, v1, cnt2\n\tvand\tv8, v0, x00110011\n\tvand\tv9, v1, x00110011\n\tvand\tv4, v4, x00110011\n\tvand\tv5, v5, x00110011\n\tvaddubm\tv0, v4, v8\t\tC 32 4-bit accumulators (0..4)\n\tvaddubm\tv1, v5, v9\t\tC 32 4-bit accumulators (0..4)\n\tvaddubm\tv8, v0, v1\t\tC 32 4-bit accumulators (0..8)\n\tvsr\tv9, v8, cnt4\n\tvand\tv6, v8, x00001111\n\tvand\tv9, v9, x00001111\n\tvaddubm\tv6, v9, v6\t\tC 16 8-bit accumulators (0..16)\n\tvsum4ubs v3, v6, v3\t\tC sum 4 x 4 bytes into 4 32-bit fields\n\nL(rt):\n\tli\tr7, -16\t\t\tC FIXME: does all ppc32 and ppc64 ABIs\n\tstvx\tv3, r7, r1\t\tC FIXME: ...support storing below sp?\n\n\tlwz\tr7, -16(r1)\n\tadd\tr8, r8, r7\n\tlwz\tr7, -12(r1)\n\tadd\tr8, r8, r7\n\tlwz\tr7, -8(r1)\n\tadd\tr8, r8, r7\n\tlwz\tr7, -4(r1)\n\tadd\tr8, r8, r7\n\nC Handle outer loop for huge n.  We inherit cr7 and r0 from above.\nLIMB64(`ble\tcr7, L(ret)\n\tvxor\tv3, v3, v3\t\tC zero total count\n\tmr\tn, r9\n\tcmpd\tcr7, n, r0\n\tble\tcr7, L(2)\n\taddis\tr9, n, -LIMBS_PER_CHUNK\tC remaining n\n\tlis\tn, LIMBS_PER_CHUNK\nL(2):\tsrdi\tr7, n, 2\t\tC loop count corresponding to n\n\tmtctr\tr7\t\t\tC copy n to count register\n\tb\tL(top)\n')\n\nL(ret):\tmr\tr3, r8\n\tmtspr\t256, r10\n\tblr\nEPILOGUE()\n\nDEF_OBJECT(cnsts,16)\n\t.byte\t0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55\n\t.byte\t0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55\n\n\t.byte\t0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33\n\t.byte\t0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33\nC Masks for high end of number\n\t.byte\t0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff\n\t.byte\t0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff\n\n\t.byte\t0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00\n\t.byte\t0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00\n\n\t.byte\t0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff\n\t.byte\t0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00\n\n\t.byte\t0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff\n\t.byte\t0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00\nC Masks for low end of number\n\t.byte\t0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff\n\t.byte\t0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff\n\n\t.byte\t0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff\n\t.byte\t0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff\n\n\t.byte\t0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00\n\t.byte\t0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff\n\n\t.byte\t0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00\n\t.byte\t0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff\nEND_OBJECT(cnsts)\nASM_END()\n"
  },
  {
    "path": "mpn/sparc32/README",
    "content": "Copyright 1996, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n\n\n\n\nThis directory contains mpn functions for various SPARC chips.  Code that\nruns only on version 8 SPARC implementations, is in the v8 subdirectory.\n\nRELEVANT OPTIMIZATION ISSUES\n\n  Load and Store timing\n\nOn most early SPARC implementations, the ST instructions takes multiple\ncycles, while a STD takes just a single cycle more than an ST.  For the CPUs\nin SPARCstation I and II, the times are 3 and 4 cycles, respectively.\nTherefore, combining two ST instructions into a STD when possible is a\nsignificant optimization.\n\nLater SPARC implementations have single cycle ST.\n\nFor SuperSPARC, we can perform just one memory instruction per cycle, even\nif up to two integer instructions can be executed in its pipeline.  For\nprograms that perform so many memory operations that there are not enough\nnon-memory operations to issue in parallel with all memory operations, using\nLDD and STD when possible helps.\n\nUltraSPARC-1/2 has very slow integer multiplication.  In the v9 subdirectory,\nwe therefore use floating-point multiplication.\n\nSTATUS\n\n1. On a SuperSPARC, mpn_lshift and mpn_rshift run at 3 cycles/limb, or 2.5\n   cycles/limb asymptotically.  We could optimize speed for special counts\n   by using ADDXCC.\n\n2. On a SuperSPARC, mpn_add_n and mpn_sub_n runs at 2.5 cycles/limb, or 2\n   cycles/limb asymptotically.\n\n3. mpn_mul_1 runs at what is believed to be optimal speed.\n\n4. On SuperSPARC, mpn_addmul_1 and mpn_submul_1 could both be improved by a\n   cycle by avoiding one of the add instructions.  See a29k/addmul_1.\n\nThe speed of the code for other SPARC implementations is uncertain.\n"
  },
  {
    "path": "mpn/sparc32/add_n.asm",
    "content": "dnl  SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store\ndnl  sum in a third limb vector.\n\ndnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\ndefine(res_ptr,%o0)\ndefine(s1_ptr,%o1)\ndefine(s2_ptr,%o2)\ndefine(n,%o3)\n\nASM_START()\nPROLOGUE(mpn_add_n)\n\txor\ts2_ptr,res_ptr,%g1\n\tandcc\t%g1,4,%g0\n\tbne\tL(1)\t\t\tC branch if alignment differs\n\tnop\nC **  V1a  **\nL(0):\tandcc\tres_ptr,4,%g0\t\tC res_ptr unaligned? Side effect: cy=0\n\tbe\tL(v1)\t\t\tC if no, branch\n\tnop\nC Add least significant limb separately to align res_ptr and s2_ptr\n\tld\t[s1_ptr],%g4\n\tadd\ts1_ptr,4,s1_ptr\n\tld\t[s2_ptr],%g2\n\tadd\ts2_ptr,4,s2_ptr\n\tadd\tn,-1,n\n\taddcc\t%g4,%g2,%o4\n\tst\t%o4,[res_ptr]\n\tadd\tres_ptr,4,res_ptr\nL(v1):\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\tcmp\tn,2\t\t\tC if n < 2 ...\n\tbl\tL(end2)\t\t\tC ... branch to tail code\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\n\n\tld\t[s1_ptr+0],%g4\n\taddcc\tn,-10,n\n\tld\t[s1_ptr+4],%g1\n\tldd\t[s2_ptr+0],%g2\n\tblt\tL(fin1)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nC Add blocks of 8 limbs until less than 8 limbs remain\nL(loop1):\n\taddxcc\t%g4,%g2,%o4\n\tld\t[s1_ptr+8],%g4\n\taddxcc\t%g1,%g3,%o5\n\tld\t[s1_ptr+12],%g1\n\tldd\t[s2_ptr+8],%g2\n\tstd\t%o4,[res_ptr+0]\n\taddxcc\t%g4,%g2,%o4\n\tld\t[s1_ptr+16],%g4\n\taddxcc\t%g1,%g3,%o5\n\tld\t[s1_ptr+20],%g1\n\tldd\t[s2_ptr+16],%g2\n\tstd\t%o4,[res_ptr+8]\n\taddxcc\t%g4,%g2,%o4\n\tld\t[s1_ptr+24],%g4\n\taddxcc\t%g1,%g3,%o5\n\tld\t[s1_ptr+28],%g1\n\tldd\t[s2_ptr+24],%g2\n\tstd\t%o4,[res_ptr+16]\n\taddxcc\t%g4,%g2,%o4\n\tld\t[s1_ptr+32],%g4\n\taddxcc\t%g1,%g3,%o5\n\tld\t[s1_ptr+36],%g1\n\tldd\t[s2_ptr+32],%g2\n\tstd\t%o4,[res_ptr+24]\n\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\taddcc\tn,-8,n\n\tadd\ts1_ptr,32,s1_ptr\n\tadd\ts2_ptr,32,s2_ptr\n\tadd\tres_ptr,32,res_ptr\n\tbge\tL(loop1)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\n\nL(fin1):\n\taddcc\tn,8-2,n\n\tblt\tL(end1)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nC Add blocks of 2 limbs until less than 2 limbs remain\nL(loope1):\n\taddxcc\t%g4,%g2,%o4\n\tld\t[s1_ptr+8],%g4\n\taddxcc\t%g1,%g3,%o5\n\tld\t[s1_ptr+12],%g1\n\tldd\t[s2_ptr+8],%g2\n\tstd\t%o4,[res_ptr+0]\n\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\taddcc\tn,-2,n\n\tadd\ts1_ptr,8,s1_ptr\n\tadd\ts2_ptr,8,s2_ptr\n\tadd\tres_ptr,8,res_ptr\n\tbge\tL(loope1)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nL(end1):\n\taddxcc\t%g4,%g2,%o4\n\taddxcc\t%g1,%g3,%o5\n\tstd\t%o4,[res_ptr+0]\n\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\n\tandcc\tn,1,%g0\n\tbe\tL(ret1)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nC Add last limb\n\tld\t[s1_ptr+8],%g4\n\tld\t[s2_ptr+8],%g2\n\taddxcc\t%g4,%g2,%o4\n\tst\t%o4,[res_ptr+8]\n\nL(ret1):\n\tretl\n\taddx\t%g0,%g0,%o0\tC return carry-out from most sign. limb\n\nL(1):\txor\ts1_ptr,res_ptr,%g1\n\tandcc\t%g1,4,%g0\n\tbne\tL(2)\n\tnop\nC **  V1b  **\n\tmov\ts2_ptr,%g1\n\tmov\ts1_ptr,s2_ptr\n\tb\tL(0)\n\tmov\t%g1,s1_ptr\n\nC **  V2  **\nC If we come here, the alignment of s1_ptr and res_ptr as well as the\nC alignment of s2_ptr and res_ptr differ.  Since there are only two ways\nC things can be aligned (that we care about) we now know that the alignment\nC of s1_ptr and s2_ptr are the same.\n\nL(2):\tcmp\tn,1\n\tbe\tL(jone)\n\tnop\n\tandcc\ts1_ptr,4,%g0\t\tC s1_ptr unaligned? Side effect: cy=0\n\tbe\tL(v2)\t\t\tC if no, branch\n\tnop\nC Add least significant limb separately to align s1_ptr and s2_ptr\n\tld\t[s1_ptr],%g4\n\tadd\ts1_ptr,4,s1_ptr\n\tld\t[s2_ptr],%g2\n\tadd\ts2_ptr,4,s2_ptr\n\tadd\tn,-1,n\n\taddcc\t%g4,%g2,%o4\n\tst\t%o4,[res_ptr]\n\tadd\tres_ptr,4,res_ptr\n\nL(v2):\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\taddcc\tn,-8,n\n\tblt\tL(fin2)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nC Add blocks of 8 limbs until less than 8 limbs remain\nL(loop2):\n\tldd\t[s1_ptr+0],%g2\n\tldd\t[s2_ptr+0],%o4\n\taddxcc\t%g2,%o4,%g2\n\tst\t%g2,[res_ptr+0]\n\taddxcc\t%g3,%o5,%g3\n\tst\t%g3,[res_ptr+4]\n\tldd\t[s1_ptr+8],%g2\n\tldd\t[s2_ptr+8],%o4\n\taddxcc\t%g2,%o4,%g2\n\tst\t%g2,[res_ptr+8]\n\taddxcc\t%g3,%o5,%g3\n\tst\t%g3,[res_ptr+12]\n\tldd\t[s1_ptr+16],%g2\n\tldd\t[s2_ptr+16],%o4\n\taddxcc\t%g2,%o4,%g2\n\tst\t%g2,[res_ptr+16]\n\taddxcc\t%g3,%o5,%g3\n\tst\t%g3,[res_ptr+20]\n\tldd\t[s1_ptr+24],%g2\n\tldd\t[s2_ptr+24],%o4\n\taddxcc\t%g2,%o4,%g2\n\tst\t%g2,[res_ptr+24]\n\taddxcc\t%g3,%o5,%g3\n\tst\t%g3,[res_ptr+28]\n\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\taddcc\tn,-8,n\n\tadd\ts1_ptr,32,s1_ptr\n\tadd\ts2_ptr,32,s2_ptr\n\tadd\tres_ptr,32,res_ptr\n\tbge\tL(loop2)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\n\nL(fin2):\n\taddcc\tn,8-2,n\n\tblt\tL(end2)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nL(loope2):\n\tldd\t[s1_ptr+0],%g2\n\tldd\t[s2_ptr+0],%o4\n\taddxcc\t%g2,%o4,%g2\n\tst\t%g2,[res_ptr+0]\n\taddxcc\t%g3,%o5,%g3\n\tst\t%g3,[res_ptr+4]\n\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\taddcc\tn,-2,n\n\tadd\ts1_ptr,8,s1_ptr\n\tadd\ts2_ptr,8,s2_ptr\n\tadd\tres_ptr,8,res_ptr\n\tbge\tL(loope2)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nL(end2):\n\tandcc\tn,1,%g0\n\tbe\tL(ret2)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nC Add last limb\nL(jone):\n\tld\t[s1_ptr],%g4\n\tld\t[s2_ptr],%g2\n\taddxcc\t%g4,%g2,%o4\n\tst\t%o4,[res_ptr]\n\nL(ret2):\n\tretl\n\taddx\t%g0,%g0,%o0\tC return carry-out from most sign. limb\nEPILOGUE(mpn_add_n)\n"
  },
  {
    "path": "mpn/sparc32/addmul_1.asm",
    "content": "dnl  SPARC mpn_addmul_1 -- Multiply a limb vector with a limb and add the\ndnl  result to a second limb vector.\n\ndnl  Copyright 1992, 1993, 1994, 2000 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\to0\nC s1_ptr\to1\nC size\t\to2\nC s2_limb\to3\n\nASM_START()\nPROLOGUE(mpn_addmul_1)\n\tC Make S1_PTR and RES_PTR point at the end of their blocks\n\tC and put (- 4 x SIZE) in index/loop counter.\n\tsll\t%o2,2,%o2\n\tadd\t%o0,%o2,%o4\tC RES_PTR in o4 since o0 is retval\n\tadd\t%o1,%o2,%o1\n\tsub\t%g0,%o2,%o2\n\n\tcmp\t%o3,0xfff\n\tbgu\tL(large)\n\tnop\n\n\tld\t[%o1+%o2],%o5\n\tmov\t0,%o0\n\tb\tL(0)\n\t add\t%o4,-4,%o4\nL(loop0):\n\taddcc\t%o5,%g1,%g1\n\tld\t[%o1+%o2],%o5\n\taddx\t%o0,%g0,%o0\n\tst\t%g1,[%o4+%o2]\nL(0):\twr\t%g0,%o3,%y\n\tsra\t%o5,31,%g2\n\tand\t%o3,%g2,%g2\n\tandcc\t%g1,0,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,0,%g1\n\tsra\t%g1,20,%g4\n\tsll\t%g1,12,%g1\n\trd\t%y,%g3\n\tsrl\t%g3,20,%g3\n\tor\t%g1,%g3,%g1\n\n\taddcc\t%g1,%o0,%g1\n\taddx\t%g2,%g4,%o0\tC add sign-compensation and cy to hi limb\n\taddcc\t%o2,4,%o2\tC loop counter\n\tbne\tL(loop0)\n\t ld\t[%o4+%o2],%o5\n\n\taddcc\t%o5,%g1,%g1\n\taddx\t%o0,%g0,%o0\n\tretl\n\tst\t%g1,[%o4+%o2]\n\nL(large):\n\tld\t[%o1+%o2],%o5\n\tmov\t0,%o0\n\tsra\t%o3,31,%g4\tC g4 = mask of ones iff S2_LIMB < 0\n\tb\tL(1)\n\t add\t%o4,-4,%o4\nL(loop):\n\taddcc\t%o5,%g3,%g3\n\tld\t[%o1+%o2],%o5\n\taddx\t%o0,%g0,%o0\n\tst\t%g3,[%o4+%o2]\nL(1):\twr\t%g0,%o5,%y\n\tand\t%o5,%g4,%g2\n\tandcc\t%g0,%g0,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%g0,%g1\n\trd\t%y,%g3\n\taddcc\t%g3,%o0,%g3\n\taddx\t%g2,%g1,%o0\n\taddcc\t%o2,4,%o2\n\tbne\tL(loop)\n\t ld\t[%o4+%o2],%o5\n\n\taddcc\t%o5,%g3,%g3\n\taddx\t%o0,%g0,%o0\n\tretl\n\tst\t%g3,[%o4+%o2]\nEPILOGUE(mpn_addmul_1)\n"
  },
  {
    "path": "mpn/sparc32/longlong_inc.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 Free Software Foundation, Inc.\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#if defined (__GNUC__)\n\n#define __CLOBBER_CC : \"cc\"\n#define __AND_CLOBBER_CC , \"cc\"\n\n#define add_ssaaaa(sh, sl, ah, al, bh, bl) \\\n  __asm__ (\"addcc %r4,%5,%1\\n\\taddx %r2,%3,%0\"\t\t\t\t\\\n\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t   : \"rJ\" (ah), \"rI\" (bh),\"%rJ\" (al), \"rI\" (bl)\t\t\t\\\n\t   __CLOBBER_CC)\n#define sub_ddmmss(sh, sl, ah, al, bh, bl) \\\n  __asm__ (\"subcc %r4,%5,%1\\n\\tsubx %r2,%3,%0\"\t\t\t\t\\\n\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t   : \"rJ\" (ah), \"rI\" (bh), \"rJ\" (al), \"rI\" (bl)\t\\\n\t   __CLOBBER_CC)\n/* FIXME: When gcc -mcpu=v9 is used on solaris, gcc/config/sol2-sld-64.h\n   doesn't define anything to indicate that to us, it only sets __sparcv8. */\n#if defined (__sparc_v9__) || defined (__sparcv9)\n/* Perhaps we should use floating-point operations here?  */\n#if 0\n/* Triggers a bug making mpz/tests/t-gcd.c fail.\n   Perhaps we simply need explicitly zero-extend the inputs?  */\n#define umul_ppmm(w1, w0, u, v) \\\n  __asm__ (\"mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0\" :\t\t\\\n\t   \"=r\" (w1), \"=r\" (w0) : \"r\" (u), \"r\" (v) : \"g1\")\n#else\n/* Use v8 umul until above bug is fixed.  */\n#define umul_ppmm(w1, w0, u, v) \\\n  __asm__ (\"umul %2,%3,%1;rd %%y,%0\" : \"=r\" (w1), \"=r\" (w0) : \"r\" (u), \"r\" (v))\n#endif\n/* Use a plain v8 divide for v9.  */\n#define udiv_qrnnd(q, r, n1, n0, d) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    USItype __q;\t\t\t\t\t\t\t\\\n    __asm__ (\"mov %1,%%y;nop;nop;nop;udiv %2,%3,%0\"\t\t\t\\\n\t     : \"=r\" (__q) : \"r\" (n1), \"r\" (n0), \"r\" (d));\t\t\\\n    (r) = (n0) - __q * (d);\t\t\t\t\t\t\\\n    (q) = __q;\t\t\t\t\t\t\t\t\\\n  } while (0)\n#else\n#if defined (__sparc_v8__)   /* gcc normal */\t\t\t\t\\\n  || defined (__sparcv8)     /* gcc solaris */\n/* Don't match immediate range because, 1) it is not often useful,\n   2) the 'I' flag thinks of the range as a 13 bit signed interval,\n   while we want to match a 13 bit interval, sign extended to 32 bits,\n   but INTERPRETED AS UNSIGNED.  */\n#define umul_ppmm(w1, w0, u, v) \\\n  __asm__ (\"umul %2,%3,%1;rd %%y,%0\" : \"=r\" (w1), \"=r\" (w0) : \"r\" (u), \"r\" (v))\n\n#define udiv_qrnnd(q, r, n1, n0, d) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n    USItype __q;\t\t\t\t\t\t\t\\\n    __asm__ (\"mov %1,%%y;nop;nop;nop;udiv %2,%3,%0\"\t\t\t\\\n\t     : \"=r\" (__q) : \"r\" (n1), \"r\" (n0), \"r\" (d));\t\t\\\n    (r) = (n0) - __q * (d);\t\t\t\t\t\t\\\n    (q) = __q;\t\t\t\t\t\t\t\t\\\n  } while (0)\n\n#else /* ! __sparc_v8__ */\n#if defined (__sparclite__)\n/* This has hardware multiply but not divide.  It also has two additional\n   instructions scan (ffs from high bit) and divscc.  */\n#define umul_ppmm(w1, w0, u, v) \\\n  __asm__ (\"umul %2,%3,%1;rd %%y,%0\" : \"=r\" (w1), \"=r\" (w0) : \"r\" (u), \"r\" (v))\n#define udiv_qrnnd(q, r, n1, n0, d) \\\n  __asm__ (\"! Inlined udiv_qrnnd\\n\"\t\t\t\t\t\\\n\"\twr\t%%g0,%2,%%y\t! Not a delayed write for sparclite\\n\"\t\\\n\"\ttst\t%%g0\\n\"\t\t\t\t\t\t\t\\\n\"\tdivscc\t%3,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tdivscc\t%%g1,%4,%0\\n\"\t\t\t\t\t\t\\\n\"\trd\t%%y,%1\\n\"\t\t\t\t\t\t\\\n\"\tbl,a 1f\\n\"\t\t\t\t\t\t\t\\\n\"\tadd\t%1,%4,%1\\n\"\t\t\t\t\t\t\\\n\"1:\t! End of inline udiv_qrnnd\"\t\t\t\t\t\\\n\t   : \"=r\" (q), \"=r\" (r) : \"r\" (n1), \"r\" (n0), \"rI\" (d)\t\t\\\n\t   : \"%g1\" __AND_CLOBBER_CC)\n#define count_leading_zeros(count, x) \\\n  __asm__ (\"scan %1,1,%0\" : \"=r\" (count) : \"r\" (x))\n/* Early sparclites return 63 for an argument of 0, but they warn that future\n   implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0\n   undefined.  */\n#endif /* __sparclite__ */\n#endif /* __sparc_v8__ */\n#endif /* __sparc_v9__ */\n/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */\n#ifndef umul_ppmm\n#define umul_ppmm(w1, w0, u, v) \\\n  __asm__ (\"! Inlined umul_ppmm\\n\"\t\t\t\t\t\\\n\"\twr\t%%g0,%2,%%y\t! SPARC has 0-3 delay insn after a wr\\n\" \\\n\"\tsra\t%3,31,%%g2\t! Don't move this insn\\n\"\t\t\\\n\"\tand\t%2,%%g2,%%g2\t! Don't move this insn\\n\"\t\t\\\n\"\tandcc\t%%g0,0,%%g1\t! Don't move this insn\\n\"\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,%3,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tmulscc\t%%g1,0,%%g1\\n\"\t\t\t\t\t\t\\\n\"\tadd\t%%g1,%%g2,%0\\n\"\t\t\t\t\t\t\\\n\"\trd\t%%y,%1\"\t\t\t\t\t\t\t\\\n\t   : \"=r\" (w1), \"=r\" (w0) : \"%rI\" (u), \"r\" (v)\t\t\t\\\n\t   : \"%g1\", \"%g2\" __AND_CLOBBER_CC)\n#endif\n#ifndef udiv_qrnnd\n#ifndef LONGLONG_STANDALONE\n#define udiv_qrnnd(q, r, n1, n0, d) \\\n  do { UWtype __r;\t\t\t\t\t\t\t\\\n    (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d));\t\t\t\\\n    (r) = __r;\t\t\t\t\t\t\t\t\\\n  } while (0)\nextern UWtype __MPN(udiv_qrnnd)(UWtype *, UWtype, UWtype, UWtype);\n#endif /* LONGLONG_STANDALONE */\n#endif /* udiv_qrnnd */\n\n#endif\n"
  },
  {
    "path": "mpn/sparc32/lshift.asm",
    "content": "dnl  SPARC mpn_lshift -- Shift a number left.\n\ndnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t%o0\nC src_ptr\t%o1\nC size\t\t%o2\nC cnt\t\t%o3\n\nASM_START()\nPROLOGUE(mpn_lshift)\n\tsll\t%o2,2,%g1\n\tadd\t%o1,%g1,%o1\tC make %o1 point at end of src\n\tld\t[%o1-4],%g2\tC load first limb\n\tsub\t%g0,%o3,%o5\tC negate shift count\n\tadd\t%o0,%g1,%o0\tC make %o0 point at end of res\n\tadd\t%o2,-1,%o2\n\tandcc\t%o2,4-1,%g4\tC number of limbs in first loop\n\tsrl\t%g2,%o5,%g1\tC compute function result\n\tbe\tL(0)\t\tC if multiple of 4 limbs, skip first loop\n\tst\t%g1,[%sp+80]\n\n\tsub\t%o2,%g4,%o2\tC adjust count for main loop\n\nL(loop0):\n\tld\t[%o1-8],%g3\n\tadd\t%o0,-4,%o0\n\tadd\t%o1,-4,%o1\n\taddcc\t%g4,-1,%g4\n\tsll\t%g2,%o3,%o4\n\tsrl\t%g3,%o5,%g1\n\tmov\t%g3,%g2\n\tor\t%o4,%g1,%o4\n\tbne\tL(loop0)\n\t st\t%o4,[%o0+0]\n\nL(0):\ttst\t%o2\n\tbe\tL(end)\n\t nop\n\nL(loop):\n\tld\t[%o1-8],%g3\n\tadd\t%o0,-16,%o0\n\taddcc\t%o2,-4,%o2\n\tsll\t%g2,%o3,%o4\n\tsrl\t%g3,%o5,%g1\n\n\tld\t[%o1-12],%g2\n\tsll\t%g3,%o3,%g4\n\tor\t%o4,%g1,%o4\n\tst\t%o4,[%o0+12]\n\tsrl\t%g2,%o5,%g1\n\n\tld\t[%o1-16],%g3\n\tsll\t%g2,%o3,%o4\n\tor\t%g4,%g1,%g4\n\tst\t%g4,[%o0+8]\n\tsrl\t%g3,%o5,%g1\n\n\tld\t[%o1-20],%g2\n\tsll\t%g3,%o3,%g4\n\tor\t%o4,%g1,%o4\n\tst\t%o4,[%o0+4]\n\tsrl\t%g2,%o5,%g1\n\n\tadd\t%o1,-16,%o1\n\tor\t%g4,%g1,%g4\n\tbne\tL(loop)\n\t st\t%g4,[%o0+0]\n\nL(end):\tsll\t%g2,%o3,%g2\n\tst\t%g2,[%o0-4]\n\tretl\n\tld\t[%sp+80],%o0\nEPILOGUE(mpn_lshift)\n"
  },
  {
    "path": "mpn/sparc32/mul_1.asm",
    "content": "dnl  SPARC mpn_mul_1 -- Multiply a limb vector with a limb and store\ndnl  the result in a second limb vector.\n\ndnl  Copyright 1992, 1993, 1994, 2000 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\to0\nC s1_ptr\to1\nC size\t\to2\nC s2_limb\to3\n\nASM_START()\nPROLOGUE(mpn_mul_1)\n\tC Make S1_PTR and RES_PTR point at the end of their blocks\n\tC and put (- 4 x SIZE) in index/loop counter.\n\tsll\t%o2,2,%o2\n\tadd\t%o0,%o2,%o4\tC RES_PTR in o4 since o0 is retval\n\tadd\t%o1,%o2,%o1\n\tsub\t%g0,%o2,%o2\n\n\tcmp\t%o3,0xfff\n\tbgu\tL(large)\n\tnop\n\n\tld\t[%o1+%o2],%o5\n\tmov\t0,%o0\n\tb\tL(0)\n\t add\t%o4,-4,%o4\nL(loop0):\n\tst\t%g1,[%o4+%o2]\nL(0):\twr\t%g0,%o3,%y\n\tsra\t%o5,31,%g2\n\tand\t%o3,%g2,%g2\n\tandcc\t%g1,0,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,0,%g1\n\tsra\t%g1,20,%g4\n\tsll\t%g1,12,%g1\n\trd\t%y,%g3\n\tsrl\t%g3,20,%g3\n\tor\t%g1,%g3,%g1\n\n\taddcc\t%g1,%o0,%g1\n\taddx\t%g2,%g4,%o0\tC add sign-compensation and cy to hi limb\n\taddcc\t%o2,4,%o2\tC loop counter\n\tbne,a\tL(loop0)\n\t ld\t[%o1+%o2],%o5\n\n\tretl\n\tst\t%g1,[%o4+%o2]\n\n\nL(large):\n\tld\t[%o1+%o2],%o5\n\tmov\t0,%o0\n\tsra\t%o3,31,%g4\tC g4 = mask of ones iff S2_LIMB < 0\n\tb\tL(1)\n\t add\t%o4,-4,%o4\nL(loop):\n\tst\t%g3,[%o4+%o2]\nL(1):\twr\t%g0,%o5,%y\n\tand\t%o5,%g4,%g2\tC g2 = S1_LIMB iff S2_LIMB < 0, else 0\n\tandcc\t%g0,%g0,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%g0,%g1\n\trd\t%y,%g3\n\taddcc\t%g3,%o0,%g3\n\taddx\t%g2,%g1,%o0\tC add sign-compensation and cy to hi limb\n\taddcc\t%o2,4,%o2\tC loop counter\n\tbne,a\tL(loop)\n\t ld\t[%o1+%o2],%o5\n\n\tretl\n\tst\t%g3,[%o4+%o2]\nEPILOGUE(mpn_mul_1)\n"
  },
  {
    "path": "mpn/sparc32/rshift.asm",
    "content": "dnl  SPARC mpn_rshift -- Shift a number right.\n\ndnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\t%o0\nC src_ptr\t%o1\nC size\t\t%o2\nC cnt\t\t%o3\n\nASM_START()\nPROLOGUE(mpn_rshift)\n\tld\t[%o1],%g2\tC load first limb\n\tsub\t%g0,%o3,%o5\tC negate shift count\n\tadd\t%o2,-1,%o2\n\tandcc\t%o2,4-1,%g4\tC number of limbs in first loop\n\tsll\t%g2,%o5,%g1\tC compute function result\n\tbe\tL(0)\t\tC if multiple of 4 limbs, skip first loop\n\tst\t%g1,[%sp+80]\n\n\tsub\t%o2,%g4,%o2\tC adjust count for main loop\n\nL(loop0):\n\tld\t[%o1+4],%g3\n\tadd\t%o0,4,%o0\n\tadd\t%o1,4,%o1\n\taddcc\t%g4,-1,%g4\n\tsrl\t%g2,%o3,%o4\n\tsll\t%g3,%o5,%g1\n\tmov\t%g3,%g2\n\tor\t%o4,%g1,%o4\n\tbne\tL(loop0)\n\t st\t%o4,[%o0-4]\n\nL(0):\ttst\t%o2\n\tbe\tL(end)\n\t nop\n\nL(loop):\n\tld\t[%o1+4],%g3\n\tadd\t%o0,16,%o0\n\taddcc\t%o2,-4,%o2\n\tsrl\t%g2,%o3,%o4\n\tsll\t%g3,%o5,%g1\n\n\tld\t[%o1+8],%g2\n\tsrl\t%g3,%o3,%g4\n\tor\t%o4,%g1,%o4\n\tst\t%o4,[%o0-16]\n\tsll\t%g2,%o5,%g1\n\n\tld\t[%o1+12],%g3\n\tsrl\t%g2,%o3,%o4\n\tor\t%g4,%g1,%g4\n\tst\t%g4,[%o0-12]\n\tsll\t%g3,%o5,%g1\n\n\tld\t[%o1+16],%g2\n\tsrl\t%g3,%o3,%g4\n\tor\t%o4,%g1,%o4\n\tst\t%o4,[%o0-8]\n\tsll\t%g2,%o5,%g1\n\n\tadd\t%o1,16,%o1\n\tor\t%g4,%g1,%g4\n\tbne\tL(loop)\n\t st\t%g4,[%o0-4]\n\nL(end):\tsrl\t%g2,%o3,%g2\n\tst\t%g2,[%o0-0]\n\tretl\n\tld\t[%sp+80],%o0\nEPILOGUE(mpn_rshift)\n"
  },
  {
    "path": "mpn/sparc32/sparc-defs.m4",
    "content": "divert(-1)\n\ndnl  m4 macros for SPARC assembler (32 and 64 bit).\n\n\ndnl  Copyright 2002 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\n\nchangecom(!)\n\n\ndnl  Usage: REGISTER(reg,attr)\ndnl\ndnl  Give a \".register reg,attr\" directive, if the assembler supports it.\ndnl  HAVE_REGISTER comes from the GMP_ASM_SPARC_REGISTER configure test.\n\ndefine(REGISTER,\nm4_assert_numargs(2)\nm4_assert_defined(`HAVE_REGISTER')\n`ifelse(HAVE_REGISTER,yes,\n`.register `$1',`$2'')')\n\n\ndivert\n"
  },
  {
    "path": "mpn/sparc32/sub_n.asm",
    "content": "dnl  SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and\ndnl  store difference in a third limb vector.\n\ndnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\ndefine(res_ptr,%o0)\ndefine(s1_ptr,%o1)\ndefine(s2_ptr,%o2)\ndefine(n,%o3)\n\nASM_START()\nPROLOGUE(mpn_sub_n)\n\txor\ts2_ptr,res_ptr,%g1\n\tandcc\t%g1,4,%g0\n\tbne\tL(1)\t\t\tC branch if alignment differs\n\tnop\nC **  V1a  **\n\tandcc\tres_ptr,4,%g0\t\tC res_ptr unaligned? Side effect: cy=0\n\tbe\tL(v1)\t\t\tC if no, branch\n\tnop\nC Add least significant limb separately to align res_ptr and s2_ptr\n\tld\t[s1_ptr],%g4\n\tadd\ts1_ptr,4,s1_ptr\n\tld\t[s2_ptr],%g2\n\tadd\ts2_ptr,4,s2_ptr\n\tadd\tn,-1,n\n\tsubcc\t%g4,%g2,%o4\n\tst\t%o4,[res_ptr]\n\tadd\tres_ptr,4,res_ptr\nL(v1):\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\tcmp\tn,2\t\t\tC if n < 2 ...\n\tbl\tL(end2)\t\t\tC ... branch to tail code\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\n\n\tld\t[s1_ptr+0],%g4\n\taddcc\tn,-10,n\n\tld\t[s1_ptr+4],%g1\n\tldd\t[s2_ptr+0],%g2\n\tblt\tL(fin1)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nC Add blocks of 8 limbs until less than 8 limbs remain\nL(loop1):\n\tsubxcc\t%g4,%g2,%o4\n\tld\t[s1_ptr+8],%g4\n\tsubxcc\t%g1,%g3,%o5\n\tld\t[s1_ptr+12],%g1\n\tldd\t[s2_ptr+8],%g2\n\tstd\t%o4,[res_ptr+0]\n\tsubxcc\t%g4,%g2,%o4\n\tld\t[s1_ptr+16],%g4\n\tsubxcc\t%g1,%g3,%o5\n\tld\t[s1_ptr+20],%g1\n\tldd\t[s2_ptr+16],%g2\n\tstd\t%o4,[res_ptr+8]\n\tsubxcc\t%g4,%g2,%o4\n\tld\t[s1_ptr+24],%g4\n\tsubxcc\t%g1,%g3,%o5\n\tld\t[s1_ptr+28],%g1\n\tldd\t[s2_ptr+24],%g2\n\tstd\t%o4,[res_ptr+16]\n\tsubxcc\t%g4,%g2,%o4\n\tld\t[s1_ptr+32],%g4\n\tsubxcc\t%g1,%g3,%o5\n\tld\t[s1_ptr+36],%g1\n\tldd\t[s2_ptr+32],%g2\n\tstd\t%o4,[res_ptr+24]\n\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\taddcc\tn,-8,n\n\tadd\ts1_ptr,32,s1_ptr\n\tadd\ts2_ptr,32,s2_ptr\n\tadd\tres_ptr,32,res_ptr\n\tbge\tL(loop1)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\n\nL(fin1):\n\taddcc\tn,8-2,n\n\tblt\tL(end1)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nC Add blocks of 2 limbs until less than 2 limbs remain\nL(loope1):\n\tsubxcc\t%g4,%g2,%o4\n\tld\t[s1_ptr+8],%g4\n\tsubxcc\t%g1,%g3,%o5\n\tld\t[s1_ptr+12],%g1\n\tldd\t[s2_ptr+8],%g2\n\tstd\t%o4,[res_ptr+0]\n\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\taddcc\tn,-2,n\n\tadd\ts1_ptr,8,s1_ptr\n\tadd\ts2_ptr,8,s2_ptr\n\tadd\tres_ptr,8,res_ptr\n\tbge\tL(loope1)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nL(end1):\n\tsubxcc\t%g4,%g2,%o4\n\tsubxcc\t%g1,%g3,%o5\n\tstd\t%o4,[res_ptr+0]\n\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\n\tandcc\tn,1,%g0\n\tbe\tL(ret1)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nC Add last limb\n\tld\t[s1_ptr+8],%g4\n\tld\t[s2_ptr+8],%g2\n\tsubxcc\t%g4,%g2,%o4\n\tst\t%o4,[res_ptr+8]\n\nL(ret1):\n\tretl\n\taddx\t%g0,%g0,%o0\tC return carry-out from most sign. limb\n\nL(1):\txor\ts1_ptr,res_ptr,%g1\n\tandcc\t%g1,4,%g0\n\tbne\tL(2)\n\tnop\nC **  V1b  **\n\tandcc\tres_ptr,4,%g0\t\tC res_ptr unaligned? Side effect: cy=0\n\tbe\tL(v1b)\t\t\tC if no, branch\n\tnop\nC Add least significant limb separately to align res_ptr and s1_ptr\n\tld\t[s2_ptr],%g4\n\tadd\ts2_ptr,4,s2_ptr\n\tld\t[s1_ptr],%g2\n\tadd\ts1_ptr,4,s1_ptr\n\tadd\tn,-1,n\n\tsubcc\t%g2,%g4,%o4\n\tst\t%o4,[res_ptr]\n\tadd\tres_ptr,4,res_ptr\nL(v1b):\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\tcmp\tn,2\t\t\tC if n < 2 ...\n\tbl\tL(end2)\t\t\tC ... branch to tail code\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\n\n\tld\t[s2_ptr+0],%g4\n\taddcc\tn,-10,n\n\tld\t[s2_ptr+4],%g1\n\tldd\t[s1_ptr+0],%g2\n\tblt\tL(fin1b)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nC Add blocks of 8 limbs until less than 8 limbs remain\nL(loop1b):\n\tsubxcc\t%g2,%g4,%o4\n\tld\t[s2_ptr+8],%g4\n\tsubxcc\t%g3,%g1,%o5\n\tld\t[s2_ptr+12],%g1\n\tldd\t[s1_ptr+8],%g2\n\tstd\t%o4,[res_ptr+0]\n\tsubxcc\t%g2,%g4,%o4\n\tld\t[s2_ptr+16],%g4\n\tsubxcc\t%g3,%g1,%o5\n\tld\t[s2_ptr+20],%g1\n\tldd\t[s1_ptr+16],%g2\n\tstd\t%o4,[res_ptr+8]\n\tsubxcc\t%g2,%g4,%o4\n\tld\t[s2_ptr+24],%g4\n\tsubxcc\t%g3,%g1,%o5\n\tld\t[s2_ptr+28],%g1\n\tldd\t[s1_ptr+24],%g2\n\tstd\t%o4,[res_ptr+16]\n\tsubxcc\t%g2,%g4,%o4\n\tld\t[s2_ptr+32],%g4\n\tsubxcc\t%g3,%g1,%o5\n\tld\t[s2_ptr+36],%g1\n\tldd\t[s1_ptr+32],%g2\n\tstd\t%o4,[res_ptr+24]\n\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\taddcc\tn,-8,n\n\tadd\ts1_ptr,32,s1_ptr\n\tadd\ts2_ptr,32,s2_ptr\n\tadd\tres_ptr,32,res_ptr\n\tbge\tL(loop1b)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\n\nL(fin1b):\n\taddcc\tn,8-2,n\n\tblt\tL(end1b)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nC Add blocks of 2 limbs until less than 2 limbs remain\nL(loope1b):\n\tsubxcc\t%g2,%g4,%o4\n\tld\t[s2_ptr+8],%g4\n\tsubxcc\t%g3,%g1,%o5\n\tld\t[s2_ptr+12],%g1\n\tldd\t[s1_ptr+8],%g2\n\tstd\t%o4,[res_ptr+0]\n\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\taddcc\tn,-2,n\n\tadd\ts1_ptr,8,s1_ptr\n\tadd\ts2_ptr,8,s2_ptr\n\tadd\tres_ptr,8,res_ptr\n\tbge\tL(loope1b)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nL(end1b):\n\tsubxcc\t%g2,%g4,%o4\n\tsubxcc\t%g3,%g1,%o5\n\tstd\t%o4,[res_ptr+0]\n\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\n\tandcc\tn,1,%g0\n\tbe\tL(ret1b)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nC Add last limb\n\tld\t[s2_ptr+8],%g4\n\tld\t[s1_ptr+8],%g2\n\tsubxcc\t%g2,%g4,%o4\n\tst\t%o4,[res_ptr+8]\n\nL(ret1b):\n\tretl\n\taddx\t%g0,%g0,%o0\t\tC return carry-out from most sign. limb\n\nC **  V2  **\nC If we come here, the alignment of s1_ptr and res_ptr as well as the\nC alignment of s2_ptr and res_ptr differ.  Since there are only two ways\nC things can be aligned (that we care about) we now know that the alignment\nC of s1_ptr and s2_ptr are the same.\n\nL(2):\tcmp\tn,1\n\tbe\tL(jone)\n\tnop\n\tandcc\ts1_ptr,4,%g0\t\tC s1_ptr unaligned? Side effect: cy=0\n\tbe\tL(v2)\t\t\tC if no, branch\n\tnop\nC Add least significant limb separately to align s1_ptr and s2_ptr\n\tld\t[s1_ptr],%g4\n\tadd\ts1_ptr,4,s1_ptr\n\tld\t[s2_ptr],%g2\n\tadd\ts2_ptr,4,s2_ptr\n\tadd\tn,-1,n\n\tsubcc\t%g4,%g2,%o4\n\tst\t%o4,[res_ptr]\n\tadd\tres_ptr,4,res_ptr\n\nL(v2):\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\taddcc\tn,-8,n\n\tblt\tL(fin2)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nC Add blocks of 8 limbs until less than 8 limbs remain\nL(loop2):\n\tldd\t[s1_ptr+0],%g2\n\tldd\t[s2_ptr+0],%o4\n\tsubxcc\t%g2,%o4,%g2\n\tst\t%g2,[res_ptr+0]\n\tsubxcc\t%g3,%o5,%g3\n\tst\t%g3,[res_ptr+4]\n\tldd\t[s1_ptr+8],%g2\n\tldd\t[s2_ptr+8],%o4\n\tsubxcc\t%g2,%o4,%g2\n\tst\t%g2,[res_ptr+8]\n\tsubxcc\t%g3,%o5,%g3\n\tst\t%g3,[res_ptr+12]\n\tldd\t[s1_ptr+16],%g2\n\tldd\t[s2_ptr+16],%o4\n\tsubxcc\t%g2,%o4,%g2\n\tst\t%g2,[res_ptr+16]\n\tsubxcc\t%g3,%o5,%g3\n\tst\t%g3,[res_ptr+20]\n\tldd\t[s1_ptr+24],%g2\n\tldd\t[s2_ptr+24],%o4\n\tsubxcc\t%g2,%o4,%g2\n\tst\t%g2,[res_ptr+24]\n\tsubxcc\t%g3,%o5,%g3\n\tst\t%g3,[res_ptr+28]\n\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\taddcc\tn,-8,n\n\tadd\ts1_ptr,32,s1_ptr\n\tadd\ts2_ptr,32,s2_ptr\n\tadd\tres_ptr,32,res_ptr\n\tbge\tL(loop2)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\n\nL(fin2):\n\taddcc\tn,8-2,n\n\tblt\tL(end2)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nL(loope2):\n\tldd\t[s1_ptr+0],%g2\n\tldd\t[s2_ptr+0],%o4\n\tsubxcc\t%g2,%o4,%g2\n\tst\t%g2,[res_ptr+0]\n\tsubxcc\t%g3,%o5,%g3\n\tst\t%g3,[res_ptr+4]\n\taddx\t%g0,%g0,%o4\t\tC save cy in register\n\taddcc\tn,-2,n\n\tadd\ts1_ptr,8,s1_ptr\n\tadd\ts2_ptr,8,s2_ptr\n\tadd\tres_ptr,8,res_ptr\n\tbge\tL(loope2)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nL(end2):\n\tandcc\tn,1,%g0\n\tbe\tL(ret2)\n\tsubcc\t%g0,%o4,%g0\t\tC restore cy\nC Add last limb\nL(jone):\n\tld\t[s1_ptr],%g4\n\tld\t[s2_ptr],%g2\n\tsubxcc\t%g4,%g2,%o4\n\tst\t%o4,[res_ptr]\n\nL(ret2):\n\tretl\n\taddx\t%g0,%g0,%o0\t\tC return carry-out from most sign. limb\nEPILOGUE(mpn_sub_n)\n"
  },
  {
    "path": "mpn/sparc32/submul_1.asm",
    "content": "dnl  SPARC mpn_submul_1 -- Multiply a limb vector with a limb and subtract\ndnl  the result from a second limb vector.\n\ndnl  Copyright 1992, 1993, 1994, 2000 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC res_ptr\to0\nC s1_ptr\to1\nC size\t\to2\nC s2_limb\to3\n\nASM_START()\nPROLOGUE(mpn_submul_1)\n\tC Make S1_PTR and RES_PTR point at the end of their blocks\n\tC and put (- 4 x SIZE) in index/loop counter.\n\tsll\t%o2,2,%o2\n\tadd\t%o0,%o2,%o4\tC RES_PTR in o4 since o0 is retval\n\tadd\t%o1,%o2,%o1\n\tsub\t%g0,%o2,%o2\n\n\tcmp\t%o3,0xfff\n\tbgu\tL(large)\n\tnop\n\n\tld\t[%o1+%o2],%o5\n\tmov\t0,%o0\n\tb\tL(0)\n\t add\t%o4,-4,%o4\nL(loop0):\n\tsubcc\t%o5,%g1,%g1\n\tld\t[%o1+%o2],%o5\n\taddx\t%o0,%g0,%o0\n\tst\t%g1,[%o4+%o2]\nL(0):\twr\t%g0,%o3,%y\n\tsra\t%o5,31,%g2\n\tand\t%o3,%g2,%g2\n\tandcc\t%g1,0,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,%o5,%g1\n\tmulscc\t%g1,0,%g1\n\tsra\t%g1,20,%g4\n\tsll\t%g1,12,%g1\n\trd\t%y,%g3\n\tsrl\t%g3,20,%g3\n\tor\t%g1,%g3,%g1\n\n\taddcc\t%g1,%o0,%g1\n\taddx\t%g2,%g4,%o0\tC add sign-compensation and cy to hi limb\n\taddcc\t%o2,4,%o2\tC loop counter\n\tbne\tL(loop0)\n\t ld\t[%o4+%o2],%o5\n\n\tsubcc\t%o5,%g1,%g1\n\taddx\t%o0,%g0,%o0\n\tretl\n\tst\t%g1,[%o4+%o2]\n\nL(large):\n\tld\t[%o1+%o2],%o5\n\tmov\t0,%o0\n\tsra\t%o3,31,%g4\tC g4 = mask of ones iff S2_LIMB < 0\n\tb\tL(1)\n\t add\t%o4,-4,%o4\nL(loop):\n\tsubcc\t%o5,%g3,%g3\n\tld\t[%o1+%o2],%o5\n\taddx\t%o0,%g0,%o0\n\tst\t%g3,[%o4+%o2]\nL(1):\twr\t%g0,%o5,%y\n\tand\t%o5,%g4,%g2\n\tandcc\t%g0,%g0,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%o3,%g1\n\tmulscc\t%g1,%g0,%g1\n\trd\t%y,%g3\n\taddcc\t%g3,%o0,%g3\n\taddx\t%g2,%g1,%o0\n\taddcc\t%o2,4,%o2\n\tbne\tL(loop)\n\t ld\t[%o4+%o2],%o5\n\n\tsubcc\t%o5,%g3,%g3\n\taddx\t%o0,%g0,%o0\n\tretl\n\tst\t%g3,[%o4+%o2]\nEPILOGUE(mpn_submul_1)\n"
  },
  {
    "path": "mpn/sparc32/udiv.asm",
    "content": "dnl  SPARC v7 __udiv_qrnnd division support, used from longlong.h.\ndnl  This is for v7 CPUs with a floating-point unit.\n\ndnl  Copyright 1993, 1994, 1996, 2000 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC rem_ptr\ti0\nC n1\t\ti1\nC n0\t\ti2\nC d\t\ti3\n\nASM_START()\n\nifdef(`PIC',\n`\tTEXT\nL(getpc):\n\tretl\n\tnop')\n\n\tTEXT\n\tALIGN(8)\nL(C0):\t.double\t0r4294967296\nL(C1):\t.double\t0r2147483648\n\nPROLOGUE(mpn_udiv_qrnnd)\n\tsave\t%sp,-104,%sp\n\tst\t%i1,[%fp-8]\n\tld\t[%fp-8],%f10\n\nifdef(`PIC',\n`L(pc):\tcall\tL(getpc)\t\tC put address of this insn in %o7\n\tldd\t[%o7+L(C0)-L(pc)],%f8',\n`\tsethi\t%hi(L(C0)),%o7\n\tldd\t[%o7+%lo(L(C0))],%f8')\n\n\tfitod\t%f10,%f4\n\tcmp\t%i1,0\n\tbge\tL(248)\n\tmov\t%i0,%i5\n\tfaddd\t%f4,%f8,%f4\nL(248):\n\tst\t%i2,[%fp-8]\n\tld\t[%fp-8],%f10\n\tfmuld\t%f4,%f8,%f6\n\tcmp\t%i2,0\n\tbge\tL(249)\n\tfitod\t%f10,%f2\n\tfaddd\t%f2,%f8,%f2\nL(249):\n\tst\t%i3,[%fp-8]\n\tfaddd\t%f6,%f2,%f2\n\tld\t[%fp-8],%f10\n\tcmp\t%i3,0\n\tbge\tL(250)\n\tfitod\t%f10,%f4\n\tfaddd\t%f4,%f8,%f4\nL(250):\n\tfdivd\t%f2,%f4,%f2\n\nifdef(`PIC',\n`\tldd\t[%o7+L(C1)-L(pc)],%f4',\n`\tsethi\t%hi(L(C1)),%o7\n\tldd\t[%o7+%lo(L(C1))],%f4')\n\n\tfcmped\t%f2,%f4\n\tnop\n\tfbge,a\tL(251)\n\tfsubd\t%f2,%f4,%f2\n\tfdtoi\t%f2,%f2\n\tst\t%f2,[%fp-8]\n\tb\tL(252)\n\tld\t[%fp-8],%i4\nL(251):\n\tfdtoi\t%f2,%f2\n\tst\t%f2,[%fp-8]\n\tld\t[%fp-8],%i4\n\tsethi\t%hi(-2147483648),%g2\n\txor\t%i4,%g2,%i4\nL(252):\n\twr\t%g0,%i4,%y\n\tsra\t%i3,31,%g2\n\tand\t%i4,%g2,%g2\n\tandcc\t%g0,0,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,%i3,%g1\n\tmulscc\t%g1,0,%g1\n\tadd\t%g1,%g2,%i0\n\trd\t%y,%g3\n\tsubcc\t%i2,%g3,%o7\n\tsubxcc\t%i1,%i0,%g0\n\tbe\tL(253)\n\tcmp\t%o7,%i3\n\n\tadd\t%i4,-1,%i0\n\tadd\t%o7,%i3,%o7\n\tst\t%o7,[%i5]\n\tret\n\trestore\nL(253):\n\tblu\tL(246)\n\tmov\t%i4,%i0\n\tadd\t%i4,1,%i0\n\tsub\t%o7,%i3,%o7\nL(246):\n\tst\t%o7,[%i5]\n\tret\n\trestore\nEPILOGUE(mpn_udiv_qrnnd)\n"
  },
  {
    "path": "mpn/sparc32/udiv_nfp.asm",
    "content": "dnl  SPARC v7 __udiv_qrnnd division support, used from longlong.h.\ndnl  This is for v7 CPUs without a floating-point unit.\n\ndnl  Copyright 1993, 1994, 1996, 2000 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC rem_ptr\to0\nC n1\t\to1\nC n0\t\to2\nC d\t\to3\n\nASM_START()\nPROLOGUE(mpn_udiv_qrnnd)\n\ttst\t%o3\n\tbneg\tL(largedivisor)\n\tmov\t8,%g1\n\n\tb\tL(p1)\n\taddxcc\t%o2,%o2,%o2\n\nL(plop):\n\tbcc\tL(n1)\n\taddxcc\t%o2,%o2,%o2\nL(p1):\taddx\t%o1,%o1,%o1\n\tsubcc\t%o1,%o3,%o4\n\tbcc\tL(n2)\n\taddxcc\t%o2,%o2,%o2\nL(p2):\taddx\t%o1,%o1,%o1\n\tsubcc\t%o1,%o3,%o4\n\tbcc\tL(n3)\n\taddxcc\t%o2,%o2,%o2\nL(p3):\taddx\t%o1,%o1,%o1\n\tsubcc\t%o1,%o3,%o4\n\tbcc\tL(n4)\n\taddxcc\t%o2,%o2,%o2\nL(p4):\taddx\t%o1,%o1,%o1\n\taddcc\t%g1,-1,%g1\n\tbne\tL(plop)\n\tsubcc\t%o1,%o3,%o4\n\tbcc\tL(n5)\n\taddxcc\t%o2,%o2,%o2\nL(p5):\tst\t%o1,[%o0]\n\tretl\n\txnor\t%g0,%o2,%o0\n\nL(nlop):\n\tbcc\tL(p1)\n\taddxcc\t%o2,%o2,%o2\nL(n1):\taddx\t%o4,%o4,%o4\n\tsubcc\t%o4,%o3,%o1\n\tbcc\tL(p2)\n\taddxcc\t%o2,%o2,%o2\nL(n2):\taddx\t%o4,%o4,%o4\n\tsubcc\t%o4,%o3,%o1\n\tbcc\tL(p3)\n\taddxcc\t%o2,%o2,%o2\nL(n3):\taddx\t%o4,%o4,%o4\n\tsubcc\t%o4,%o3,%o1\n\tbcc\tL(p4)\n\taddxcc\t%o2,%o2,%o2\nL(n4):\taddx\t%o4,%o4,%o4\n\taddcc\t%g1,-1,%g1\n\tbne\tL(nlop)\n\tsubcc\t%o4,%o3,%o1\n\tbcc\tL(p5)\n\taddxcc\t%o2,%o2,%o2\nL(n5):\tst\t%o4,[%o0]\n\tretl\n\txnor\t%g0,%o2,%o0\n\nL(largedivisor):\n\tand\t%o2,1,%o5\tC %o5 = n0 & 1\n\n\tsrl\t%o2,1,%o2\n\tsll\t%o1,31,%g2\n\tor\t%g2,%o2,%o2\tC %o2 = lo(n1n0 >> 1)\n\tsrl\t%o1,1,%o1\tC %o1 = hi(n1n0 >> 1)\n\n\tand\t%o3,1,%g2\n\tsrl\t%o3,1,%g3\tC %g3 = floor(d / 2)\n\tadd\t%g3,%g2,%g3\tC %g3 = ceil(d / 2)\n\n\tb\tL(Lp1)\n\taddxcc\t%o2,%o2,%o2\n\nL(Lplop):\n\tbcc\tL(Ln1)\n\taddxcc\t%o2,%o2,%o2\nL(Lp1):\taddx\t%o1,%o1,%o1\n\tsubcc\t%o1,%g3,%o4\n\tbcc\tL(Ln2)\n\taddxcc\t%o2,%o2,%o2\nL(Lp2):\taddx\t%o1,%o1,%o1\n\tsubcc\t%o1,%g3,%o4\n\tbcc\tL(Ln3)\n\taddxcc\t%o2,%o2,%o2\nL(Lp3):\taddx\t%o1,%o1,%o1\n\tsubcc\t%o1,%g3,%o4\n\tbcc\tL(Ln4)\n\taddxcc\t%o2,%o2,%o2\nL(Lp4):\taddx\t%o1,%o1,%o1\n\taddcc\t%g1,-1,%g1\n\tbne\tL(Lplop)\n\tsubcc\t%o1,%g3,%o4\n\tbcc\tL(Ln5)\n\taddxcc\t%o2,%o2,%o2\nL(Lp5):\tadd\t%o1,%o1,%o1\tC << 1\n\ttst\t%g2\n\tbne\tL(oddp)\n\tadd\t%o5,%o1,%o1\n\tst\t%o1,[%o0]\n\tretl\n\txnor\t%g0,%o2,%o0\n\nL(Lnlop):\n\tbcc\tL(Lp1)\n\taddxcc\t%o2,%o2,%o2\nL(Ln1):\taddx\t%o4,%o4,%o4\n\tsubcc\t%o4,%g3,%o1\n\tbcc\tL(Lp2)\n\taddxcc\t%o2,%o2,%o2\nL(Ln2):\taddx\t%o4,%o4,%o4\n\tsubcc\t%o4,%g3,%o1\n\tbcc\tL(Lp3)\n\taddxcc\t%o2,%o2,%o2\nL(Ln3):\taddx\t%o4,%o4,%o4\n\tsubcc\t%o4,%g3,%o1\n\tbcc\tL(Lp4)\n\taddxcc\t%o2,%o2,%o2\nL(Ln4):\taddx\t%o4,%o4,%o4\n\taddcc\t%g1,-1,%g1\n\tbne\tL(Lnlop)\n\tsubcc\t%o4,%g3,%o1\n\tbcc\tL(Lp5)\n\taddxcc\t%o2,%o2,%o2\nL(Ln5):\tadd\t%o4,%o4,%o4\tC << 1\n\ttst\t%g2\n\tbne\tL(oddn)\n\tadd\t%o5,%o4,%o4\n\tst\t%o4,[%o0]\n\tretl\n\txnor\t%g0,%o2,%o0\n\nL(oddp):\n\txnor\t%g0,%o2,%o2\n\tC q' in %o2. r' in %o1\n\taddcc\t%o1,%o2,%o1\n\tbcc\tL(Lp6)\n\taddx\t%o2,0,%o2\n\tsub\t%o1,%o3,%o1\nL(Lp6):\tsubcc\t%o1,%o3,%g0\n\tbcs\tL(Lp7)\n\tsubx\t%o2,-1,%o2\n\tsub\t%o1,%o3,%o1\nL(Lp7):\tst\t%o1,[%o0]\n\tretl\n\tmov\t%o2,%o0\n\nL(oddn):\n\txnor\t%g0,%o2,%o2\n\tC q' in %o2. r' in %o4\n\taddcc\t%o4,%o2,%o4\n\tbcc\tL(Ln6)\n\taddx\t%o2,0,%o2\n\tsub\t%o4,%o3,%o4\nL(Ln6):\tsubcc\t%o4,%o3,%g0\n\tbcs\tL(Ln7)\n\tsubx\t%o2,-1,%o2\n\tsub\t%o4,%o3,%o4\nL(Ln7):\tst\t%o4,[%o0]\n\tretl\n\tmov\t%o2,%o0\nEPILOGUE(mpn_udiv_qrnnd)\n"
  },
  {
    "path": "mpn/sparc32/umul.asm",
    "content": "dnl  SPARC mpn_umul_ppmm -- support for longlong.h for non-gcc.\n\ndnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_umul_ppmm)\n\twr\t%g0,%o1,%y\n\tsra\t%o2,31,%g2\tC Don't move this insn\n\tand\t%o1,%g2,%g2\tC Don't move this insn\n\tandcc\t%g0,0,%g1\tC Don't move this insn\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,%o2,%g1\n\tmulscc\t%g1,0,%g1\n\trd\t%y,%g3\n\tst\t%g3,[%o0]\n\tretl\n\tadd\t%g1,%g2,%o0\nEPILOGUE(mpn_umul_ppmm)\n"
  },
  {
    "path": "mpn/sparc32/v9/README",
    "content": "Code for SPARC processors implementing version 9 of the SPARC architecture.\nThis code is for systems that doesn't preserve the full 64-bit contents of\ninteger register at context switch.  For other systems (such as Solaris 7 or\nlater) use the code in ../../sparc64.\n"
  },
  {
    "path": "mpn/sparc32/v9/add_n.asm.broken",
    "content": "dnl  SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store\ndnl  sum in a third limb vector.\n\ndnl  Copyright 2001 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\n\n\ndnl THIS CODE IS BROKEN FOR ONE LIMB \n\n\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\ndefine(rp,%o0)\ndefine(s1p,%o1)\ndefine(s2p,%o2)\ndefine(n,%o3)\ndefine(cy,%g1)\n\nC This code uses 64-bit operations on `o' and `g' registers.  It doesn't\nC require that `o' registers' upper 32 bits are preserved by the operating\nC system, but if they are not, they must be zeroed.  That is indeed what\nC happens at least on Slowaris 2.5 and 2.6.\n\nC On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at\nC about 10 cycles/limb from the Ecache.\n\nASM_START()\nPROLOGUE(mpn_add_n)\n\tlduw\t[s1p+0],%o4\n\tlduw\t[s2p+0],%o5\n\taddcc\tn,-2,n\n\tbl,pn\t%icc,L(end1)\n\tlduw\t[s1p+4],%g2\n\tlduw\t[s2p+4],%g3\n\tbe,pn\t%icc,L(end2)\n\tmov\t0,cy\n\n\t.align\t16\nL(loop):\n\tadd\t%o4,%o5,%g4\n\tadd\trp,8,rp\n\tlduw\t[s1p+8],%o4\n\tfitod\t%f0,%f2\nC ---\n\tadd\tcy,%g4,%g4\n\taddcc\tn,-1,n\n\tlduw\t[s2p+8],%o5\n\tfitod\t%f0,%f2\nC ---\n\tsrlx\t%g4,32,cy\n\tadd\ts2p,8,s2p\n\tstw\t%g4,[rp-8]\n\tbe,pn\t%icc,L(exito)+4\nC ---\n\tadd\t%g2,%g3,%g4\n\taddcc\tn,-1,n\n\tlduw\t[s1p+12],%g2\n\tfitod\t%f0,%f2\nC ---\n\tadd\tcy,%g4,%g4\n\tadd\ts1p,8,s1p\n\tlduw\t[s2p+4],%g3\n\tfitod\t%f0,%f2\nC ---\n\tsrlx\t%g4,32,cy\n\tbne,pt\t%icc,L(loop)\n\tstw\t%g4,[rp-4]\nC ---\nL(exite):\n\tadd\t%o4,%o5,%g4\n\tadd\tcy,%g4,%g4\n\tsrlx\t%g4,32,cy\n\tstw\t%g4,[rp+0]\n\tadd\t%g2,%g3,%g4\n\tadd\tcy,%g4,%g4\n\tstw\t%g4,[rp+4]\n\tretl\n\tsrlx\t%g4,32,%o0\n\nL(exito):\n\tadd\t%g2,%g3,%g4\n\tadd\tcy,%g4,%g4\n\tsrlx\t%g4,32,cy\n\tstw\t%g4,[rp-4]\n\tadd\t%o4,%o5,%g4\n\tadd\tcy,%g4,%g4\n\tstw\t%g4,[rp+0]\n\tretl\n\tsrlx\t%g4,32,%o0\n\nL(end1):\n\tadd\t%o4,%o5,%g4\n\tstw\t%g4,[rp+0]\n\tretl\n\tsrlx\t%g4,32,%o0\n\nL(end2):\n\tadd\t%o4,%o5,%g4\n\tsrlx\t%g4,32,cy\n\tstw\t%g4,[rp+0]\n\tadd\t%g2,%g3,%g4\n\tadd\tcy,%g4,%g4\n\tstw\t%g4,[rp+4]\n\tretl\n\tsrlx\t%g4,32,%o0\nEPILOGUE(mpn_add_n)\n"
  },
  {
    "path": "mpn/sparc32/v9/addmul_1.asm",
    "content": "dnl  SPARC v9 32-bit mpn_addmul_1 -- Multiply a limb vector with a limb and add\ndnl  the result to a second limb vector.\n\ndnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC Algorithm: We use two floating-point multiplies per limb product, with the\nC invariant v operand split into two 16-bit pieces, and the u operand split\nC into 32-bit pieces.  We convert the two 48-bit products and transfer them to\nC the integer unit.\n\nC\t\t   cycles/limb\nC UltraSPARC 1&2:     6.5\nC UltraSPARC 3:\t      ?\n\nC Possible optimizations:\nC   1. Combine 32-bit memory operations into 64-bit operations.  Since we're\nC      memory bandwidth limited, this could save 1.5 cycles/limb.\nC   2. Unroll the inner loop.  Since we already use alternate temporary areas,\nC      it is very straightforward to unroll, using an exit branch midways.\nC      Unrolling would allow deeper scheduling which could improve speed for L2\nC      cache case.\nC   3. For mpn_mul_1: Use more alternating temp areas.  The std'es and ldx'es\nC      aren't sufficiently apart-scheduled with just two temp areas.\nC   4. Specialize for particular v values.  If its upper 16 bits are zero, we\nC      could save many operations.\n\nC INPUT PARAMETERS\nC rp\ti0\nC up\ti1\nC n\ti2\nC v\ti3\n\ndefine(`FSIZE',224)\n\nASM_START()\nPROLOGUE(mpn_addmul_1)\n\tadd\t%sp, -FSIZE, %sp\n\tsethi\t%hi(0xffff), %g1\n\tsrl\t%o3, 16, %g2\n\tor\t%g1, %lo(0xffff), %g1\n\tand\t%o3, %g1, %g1\n\tstx\t%g1, [%sp+104]\n\tstx\t%g2, [%sp+112]\n\tldd\t[%sp+104], %f6\n\tldd\t[%sp+112], %f8\n\tfxtod\t%f6, %f6\n\tfxtod\t%f8, %f8\n\tld\t[%sp+104], %f10\t\tC zero f10\n\n\tmov\t0, %g3\t\t\tC cy = 0\n\ndefine(`fanop', `fitod %f18, %f0')\tC  A quasi nop running in the FA pipe\n\n\tadd\t%sp, 160, %o5\t\tC point in scratch area\n\tand\t%o5, -32, %o5\t\tC align at 0 (mod 32) in scratch area\n\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tadd\t%o1, 4, %o1\t\tC up++\n\tbne,pt\t%icc, .L_two_or_more\n\tfxtod\t%f10, %f2\n\n\tfmuld\t%f2, %f8, %f16\n\tfmuld\t%f2, %f6, %f4\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+16]\n\tstd\t%f12, [%o5+24]\n\tldx\t[%o5+16], %g2\t\tC p16\n\tldx\t[%o5+24], %g1\t\tC p0\n\tlduw\t[%o0], %g5\t\tC read rp[i]\n\tb\t.L1\n\tadd\t%o0, -16, %o0\n\n\t.align\t16\n.L_two_or_more:\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfmuld\t%f2, %f8, %f16\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tbne,pt\t%icc, .L_three_or_more\n\tfxtod\t%f10, %f2\n\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+16]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+24]\n\tfmuld\t%f2, %f6, %f4\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+0]\n\tstd\t%f12, [%o5+8]\n\tlduw\t[%o0], %g5\t\tC read rp[i]\n\tldx\t[%o5+16], %g2\t\tC p16\n\tldx\t[%o5+24], %g1\t\tC p0\n\tb\t.L2\n\tadd\t%o0, -12, %o0\n\n\t.align\t16\n.L_three_or_more:\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+16]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+24]\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tbne,pt\t%icc, .L_four_or_more\n\tfxtod\t%f10, %f2\n\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+0]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+8]\n\tfmuld\t%f2, %f6, %f4\n\tfdtox\t%f16, %f14\n\tldx\t[%o5+16], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tldx\t[%o5+24], %g1\t\tC p0\n\tstd\t%f14, [%o5+16]\n\tstd\t%f12, [%o5+24]\n\tlduw\t[%o0], %g5\t\tC read rp[i]\n\tb\t.L3\n\tadd\t%o0, -8, %o0\n\n\t.align\t16\n.L_four_or_more:\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+0]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+8]\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tbne,pt\t%icc, .L_five_or_more\n\tfxtod\t%f10, %f2\n\n\tfdtox\t%f16, %f14\n\tldx\t[%o5+16], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tldx\t[%o5+24], %g1\t\tC p0\n\tstd\t%f14, [%o5+16]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+24]\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tlduw\t[%o0], %g5\t\tC read rp[i]\n\tb\t.L4\n\tadd\t%o0, -4, %o0\n\n\t.align\t16\n.L_five_or_more:\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfdtox\t%f16, %f14\n\tldx\t[%o5+16], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tldx\t[%o5+24], %g1\t\tC p0\n\tstd\t%f14, [%o5+16]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+24]\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tlduw\t[%o0], %g5\t\tC read rp[i]\n\tbne,pt\t%icc, .Loop\n\tfxtod\t%f10, %f2\n\tb,a\t.L5\n\nC BEGIN MAIN LOOP\n\t.align 16\nC -- 0\n.Loop:\tnop\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfdtox\t%f16, %f14\nC -- 1\n\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tadd\t%o0, 4, %o0\t\tC rp++\n\tldx\t[%o5+0], %g2\t\tC p16\n\tfdtox\t%f4, %f12\nC -- 2\n\tnop\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tfanop\nC -- 3\n\tnop\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tstd\t%f14, [%o5+0]\n\tfmuld\t%f2, %f8, %f16\nC -- 4\n\tnop\n\tadd\t%g5, %g4, %g4\t\tC p += rp[i]\n\tstd\t%f12, [%o5+8]\n\tfmuld\t%f2, %f6, %f4\nC -- 5\n\txor\t%o5, 16, %o5\t\tC alternate scratch variables\n\tadd\t%o1, 4, %o1\t\tC up++\n\tstw\t%g4, [%o0-4]\n\tfanop\nC -- 6\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\tlduw\t[%o0], %g5\t\tC read rp[i]\n\tbne,pt\t%icc, .Loop\n\tfxtod\t%f10, %f2\nC END MAIN LOOP\n\n.L5:\tfdtox\t%f16, %f14\n\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tldx\t[%o5+0], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tadd\t%g4, %g3, %g4\t\tC p += cy\n\tstd\t%f14, [%o5+0]\n\tfmuld\t%f2, %f8, %f16\n\tadd\t%g5, %g4, %g4\t\tC p += rp[i]\n\tstd\t%f12, [%o5+8]\n\tfmuld\t%f2, %f6, %f4\n\txor\t%o5, 16, %o5\n\tstw\t%g4, [%o0+0]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\tlduw\t[%o0+4], %g5\t\tC read rp[i]\n\n.L4:\tfdtox\t%f16, %f14\n\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tldx\t[%o5+0], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tstd\t%f14, [%o5+0]\n\tadd\t%g5, %g4, %g4\t\tC p += rp[i]\n\tstd\t%f12, [%o5+8]\n\txor\t%o5, 16, %o5\n\tstw\t%g4, [%o0+4]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\tlduw\t[%o0+8], %g5\t\tC read rp[i]\n\n.L3:\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tldx\t[%o5+0], %g2\t\tC p16\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tadd\t%g5, %g4, %g4\t\tC p += rp[i]\n\txor\t%o5, 16, %o5\n\tstw\t%g4, [%o0+8]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\tlduw\t[%o0+12], %g5\t\tC read rp[i]\n\n.L2:\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tldx\t[%o5+0], %g2\t\tC p16\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tadd\t%g5, %g4, %g4\t\tC p += rp[i]\n\tstw\t%g4, [%o0+12]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\tlduw\t[%o0+16], %g5\t\tC read rp[i]\n\n.L1:\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tadd\t%g5, %g4, %g4\t\tC p += rp[i]\n\tstw\t%g4, [%o0+16]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\n\tmov\t%g3, %o0\n\tretl\n\tsub\t%sp, -FSIZE, %sp\nEPILOGUE(mpn_addmul_1)\n"
  },
  {
    "path": "mpn/sparc32/v9/mul_1.asm",
    "content": "dnl  SPARC v9 32-bit mpn_mul_1 -- Multiply a limb vector with a limb and store\ndnl  the result in a second limb vector.\n\ndnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC Algorithm: We use two floating-point multiplies per limb product, with the\nC invariant v operand split into two 16-bit pieces, and the u operand split\nC into 32-bit pieces.  We convert the two 48-bit products and transfer them to\nC the integer unit.\n\nC\t\t   cycles/limb\nC UltraSPARC 1&2:     6.5\nC UltraSPARC 3:\t      ?\n\nC Possible optimizations:\nC   1. Combine 32-bit memory operations into 64-bit operations.  Since we're\nC      memory bandwidth limited, this could save 1.5 cycles/limb.\nC   2. Unroll the inner loop.  Since we already use alternate temporary areas,\nC      it is very straightforward to unroll, using an exit branch midways.\nC      Unrolling would allow deeper scheduling which could improve speed for L2\nC      cache case.\nC   3. For mpn_mul_1: Use more alternating temp areas.  The std'es and ldx'es\nC      aren't sufficiently apart-scheduled with just two temp areas.\nC   4. Specialize for particular v values.  If its upper 16 bits are zero, we\nC      could save many operations.\n\nC INPUT PARAMETERS\nC rp\ti0\nC up\ti1\nC n\ti2\nC v\ti3\n\ndefine(`FSIZE',224)\n\nASM_START()\nPROLOGUE(mpn_mul_1)\n\tadd\t%sp, -FSIZE, %sp\n\tsethi\t%hi(0xffff), %g1\n\tsrl\t%o3, 16, %g2\n\tor\t%g1, %lo(0xffff), %g1\n\tand\t%o3, %g1, %g1\n\tstx\t%g1, [%sp+104]\n\tstx\t%g2, [%sp+112]\n\tldd\t[%sp+104], %f6\n\tldd\t[%sp+112], %f8\n\tfxtod\t%f6, %f6\n\tfxtod\t%f8, %f8\n\tld\t[%sp+104], %f10\t\tC zero f10\n\n\tmov\t0, %g3\t\t\tC cy = 0\n\ndefine(`fanop', `fitod %f18, %f0')\tC  A quasi nop running in the FA pipe\n\n\tadd\t%sp, 160, %o5\t\tC point in scratch area\n\tand\t%o5, -32, %o5\t\tC align at 0 (mod 32) in scratch area\n\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tadd\t%o1, 4, %o1\t\tC up++\n\tbne,pt\t%icc, .L_two_or_more\n\tfxtod\t%f10, %f2\n\n\tfmuld\t%f2, %f8, %f16\n\tfmuld\t%f2, %f6, %f4\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+16]\n\tstd\t%f12, [%o5+24]\n\tldx\t[%o5+16], %g2\t\tC p16\n\tldx\t[%o5+24], %g1\t\tC p0\n\tb\t.L1\n\tadd\t%o0, -16, %o0\n\n\t.align\t16\n.L_two_or_more:\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfmuld\t%f2, %f8, %f16\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tbne,pt\t%icc, .L_three_or_more\n\tfxtod\t%f10, %f2\n\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+16]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+24]\n\tfmuld\t%f2, %f6, %f4\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+0]\n\tstd\t%f12, [%o5+8]\n\tldx\t[%o5+16], %g2\t\tC p16\n\tldx\t[%o5+24], %g1\t\tC p0\n\tb\t.L2\n\tadd\t%o0, -12, %o0\n\n\t.align\t16\n.L_three_or_more:\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+16]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+24]\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tbne,pt\t%icc, .L_four_or_more\n\tfxtod\t%f10, %f2\n\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+0]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+8]\n\tfmuld\t%f2, %f6, %f4\n\tfdtox\t%f16, %f14\n\tldx\t[%o5+16], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tldx\t[%o5+24], %g1\t\tC p0\n\tstd\t%f14, [%o5+16]\n\tstd\t%f12, [%o5+24]\n\tb\t.L3\n\tadd\t%o0, -8, %o0\n\n\t.align\t16\n.L_four_or_more:\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+0]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+8]\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tbne,pt\t%icc, .L_five_or_more\n\tfxtod\t%f10, %f2\n\n\tfdtox\t%f16, %f14\n\tldx\t[%o5+16], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tldx\t[%o5+24], %g1\t\tC p0\n\tstd\t%f14, [%o5+16]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+24]\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tb\t.L4\n\tadd\t%o0, -4, %o0\n\n\t.align\t16\n.L_five_or_more:\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfdtox\t%f16, %f14\n\tldx\t[%o5+16], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tldx\t[%o5+24], %g1\t\tC p0\n\tstd\t%f14, [%o5+16]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+24]\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tbne,pt\t%icc, .Loop\n\tfxtod\t%f10, %f2\n\tb,a\t.L5\n\nC BEGIN MAIN LOOP\n\t.align 16\nC -- 0\n.Loop:\tnop\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfdtox\t%f16, %f14\nC -- 1\n\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tadd\t%o0, 4, %o0\t\tC rp++\n\tldx\t[%o5+0], %g2\t\tC p16\n\tfdtox\t%f4, %f12\nC -- 2\n\tnop\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tfanop\nC -- 3\n\tnop\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tstd\t%f14, [%o5+0]\n\tfmuld\t%f2, %f8, %f16\nC -- 4\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\tadd\t%o1, 4, %o1\t\tC up++\n\tstd\t%f12, [%o5+8]\n\tfmuld\t%f2, %f6, %f4\nC -- 5\n\txor\t%o5, 16, %o5\t\tC alternate scratch variables\n\tstw\t%g4, [%o0-4]\n\tbne,pt\t%icc, .Loop\n\tfxtod\t%f10, %f2\nC END MAIN LOOP\n\n.L5:\tfdtox\t%f16, %f14\n\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tldx\t[%o5+0], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tadd\t%g4, %g3, %g4\t\tC p += cy\n\tstd\t%f14, [%o5+0]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+8]\n\tfmuld\t%f2, %f6, %f4\n\txor\t%o5, 16, %o5\n\tstw\t%g4, [%o0+0]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\n.L4:\tfdtox\t%f16, %f14\n\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tldx\t[%o5+0], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tstd\t%f14, [%o5+0]\n\tstd\t%f12, [%o5+8]\n\txor\t%o5, 16, %o5\n\tstw\t%g4, [%o0+4]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\n.L3:\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tldx\t[%o5+0], %g2\t\tC p16\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\txor\t%o5, 16, %o5\n\tstw\t%g4, [%o0+8]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\n.L2:\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tldx\t[%o5+0], %g2\t\tC p16\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tstw\t%g4, [%o0+12]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\n.L1:\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tstw\t%g4, [%o0+16]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\n\tmov\t%g3, %o0\n\tretl\n\tsub\t%sp, -FSIZE, %sp\nEPILOGUE(mpn_mul_1)\n"
  },
  {
    "path": "mpn/sparc32/v9/sqr_diagonal.asm",
    "content": "dnl  SPARC v9 32-bit mpn_sqr_diagonal.\n\ndnl  Copyright 2001, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC rp\ti0\nC up\ti1\nC n\ti2\n\nC This code uses a very deep software pipeline, due to the need for moving data\nC forth and back between the integer registers and floating-point registers.\nC\nC A VIS variant of this code would make the pipeline less deep, since the\nC masking now done in the integer unit could take place in the floating-point\nC unit using the FAND instruction.  It would be possible to save several cycles\nC too.\nC\nC On UltraSPARC 1 and 2, this code runs at 11 cycles/limb from the Dcache and\nC not much slower from the Ecache.  It would perhaps be possible to shave off\nC one cycle, but not easily.  We cannot do better than 10 cycles/limb with the\nC used instructions, since we have 10 memory operations per limb.  But a VIS\nC variant could run three cycles faster than the corresponding non-VIS code.\n\nC This is non-pipelined code showing the algorithm:\nC\nC .Loop:\nC\tlduw\t[up+0],%g4\t\tC 00000000hhhhllll\nC\tsllx\t%g4,16,%g3\t\tC 0000hhhhllll0000\nC\tor\t%g3,%g4,%g2\t\tC 0000hhhhXXXXllll\nC\tandn\t%g2,%g5,%g2\t\tC 0000hhhh0000llll\nC\tstx\t%g2,[%fp+80]\nC\tldd\t[%fp+80],%f0\nC\tfitod\t%f0,%f4\t\t\tC hi16\nC\tfitod\t%f1,%f6\t\t\tC lo16\nC\tld\t[up+0],%f9\nC\tfxtod\t%f8,%f2\nC\tfmuld\t%f2,%f4,%f4\nC\tfmuld\t%f2,%f6,%f6\nC\tfdtox\t%f4,%f4\nC\tfdtox\t%f6,%f6\nC\tstd\t%f4,[%fp-24]\nC\tstd\t%f6,[%fp-16]\nC\tldx\t[%fp-24],%g2\nC\tldx\t[%fp-16],%g1\nC\tsllx\t%g2,16,%g2\nC\tadd\t%g2,%g1,%g1\nC\tstw\t%g1,[rp+0]\nC\tsrlx\t%g1,32,%l0\nC\tstw\t%l0,[rp+4]\nC\tadd\tup,4,up\nC\tsubcc\tn,1,n\nC\tbne,pt\t%icc,.Loop\nC\tadd\trp,8,rp\n\ndefine(`fanop',`fitod %f12,%f10')\tdnl  A quasi nop running in the FA pipe\n\nASM_START()\n\n\tTEXT\n\tALIGN(4)\n.Lnoll:\n\t.word\t0\n\nPROLOGUE(mpn_sqr_diagonal)\n\tsave\t%sp,-256,%sp\n\nifdef(`PIC',\n`.Lpc:\trd\t%pc,%o7\n\tld\t[%o7+.Lnoll-.Lpc],%f8',\n`\tsethi\t%hi(.Lnoll),%g1\n\tld\t[%g1+%lo(.Lnoll)],%f8')\n\n\tsethi\t%hi(0xffff0000),%g5\n\tadd\t%i1,-8,%i1\n\n\tlduw\t[%i1+8],%g4\n\tadd\t%i1,4,%i1\t\tC s1_ptr++\n\tsllx\t%g4,16,%g3\t\tC 0000hhhhllll0000\n\tor\t%g3,%g4,%g2\t\tC 0000hhhhXXXXllll\n\tsubcc\t%i2,1,%i2\n\tbne,pt\t%icc,.L_grt_1\n\tandn\t%g2,%g5,%g2\t\tC 0000hhhh0000llll\n\n\tadd\t%i1,4,%i1\t\tC s1_ptr++\n\tstx\t%g2,[%fp+80]\n\tld\t[%i1],%f9\n\tldd\t[%fp+80],%f0\n\tfxtod\t%f8,%f2\n\tfitod\t%f0,%f4\n\tfitod\t%f1,%f6\n\tfmuld\t%f2,%f4,%f4\n\tfmuld\t%f2,%f6,%f6\n\tfdtox\t%f4,%f4\n\tfdtox\t%f6,%f6\n\tstd\t%f4,[%fp-24]\n\tstd\t%f6,[%fp-16]\n\n\tadd\t%fp, 80, %l3\n\tadd\t%fp, -24, %l4\n\tadd\t%fp, 72, %l5\n\tb\t.L1\n\tadd\t%fp, -40, %l6\n\n.L_grt_1:\n\tstx\t%g2,[%fp+80]\n\tlduw\t[%i1+8],%g4\n\tadd\t%i1,4,%i1\t\tC s1_ptr++\n\tsllx\t%g4,16,%g3\t\tC 0000hhhhllll0000\n\tor\t%g3,%g4,%g2\t\tC 0000hhhhXXXXllll\n\tsubcc\t%i2,1,%i2\n\tbne,pt\t%icc,.L_grt_2\n\tandn\t%g2,%g5,%g2\t\tC 0000hhhh0000llll\n\n\tstx\t%g2,[%fp+72]\n\tld\t[%i1],%f9\n\tadd\t%i1,4,%i1\t\tC s1_ptr++\n\tldd\t[%fp+80],%f0\n\tfxtod\t%f8,%f2\n\tfitod\t%f0,%f4\n\tfitod\t%f1,%f6\n\tfmuld\t%f2,%f4,%f4\n\tld\t[%i1],%f9\n\tfmuld\t%f2,%f6,%f6\n\tldd\t[%fp+72],%f0\n\tfdtox\t%f4,%f4\n\tfdtox\t%f6,%f6\n\tstd\t%f4,[%fp-24]\n\tfxtod\t%f8,%f2\n\tstd\t%f6,[%fp-16]\n\tfitod\t%f0,%f4\n\tfitod\t%f1,%f6\n\tfmuld\t%f2,%f4,%f4\n\tfmuld\t%f2,%f6,%f6\n\tfdtox\t%f4,%f4\n\n\tadd\t%fp, 72, %l3\n\tadd\t%fp, -40, %l4\n\tadd\t%fp, 80, %l5\n\tb\t.L2\n\tadd\t%fp, -24, %l6\n\n.L_grt_2:\n\tstx\t%g2,[%fp+72]\n\tlduw\t[%i1+8],%g4\n\tld\t[%i1],%f9\n\tadd\t%i1,4,%i1\t\tC s1_ptr++\n\tldd\t[%fp+80],%f0\n\tsllx\t%g4,16,%g3\t\tC 0000hhhhllll0000\n\tor\t%g3,%g4,%g2\t\tC 0000hhhhXXXXllll\n\tsubcc\t%i2,1,%i2\n\tfxtod\t%f8,%f2\n\tbne,pt\t%icc,.L_grt_3\n\tandn\t%g2,%g5,%g2\t\tC 0000hhhh0000llll\n\n\tstx\t%g2,[%fp+80]\n\tfitod\t%f0,%f4\n\tfitod\t%f1,%f6\n\tfmuld\t%f2,%f4,%f4\n\tld\t[%i1],%f9\n\tfmuld\t%f2,%f6,%f6\n\tadd\t%i1,4,%i1\t\tC s1_ptr++\n\tldd\t[%fp+72],%f0\n\tfdtox\t%f4,%f4\n\tfdtox\t%f6,%f6\n\tstd\t%f4,[%fp-24]\n\tfxtod\t%f8,%f2\n\tstd\t%f6,[%fp-16]\n\tfitod\t%f0,%f4\n\tfitod\t%f1,%f6\n\tfmuld\t%f2,%f4,%f4\n\tld\t[%i1],%f9\n\tadd\t%fp, 80, %l3\n\tfmuld\t%f2,%f6,%f6\n\tadd\t%fp, -24, %l4\n\tldd\t[%fp+80],%f0\n\tadd\t%fp, 72, %l5\n\tfdtox\t%f4,%f4\n\tb\t.L3\n\tadd\t%fp, -40, %l6\n\n.L_grt_3:\n\tstx\t%g2,[%fp+80]\n\tfitod\t%f0,%f4\n\tlduw\t[%i1+8],%g4\n\tfitod\t%f1,%f6\n\tfmuld\t%f2,%f4,%f4\n\tld\t[%i1],%f9\n\tfmuld\t%f2,%f6,%f6\n\tadd\t%i1,4,%i1\t\tC s1_ptr++\n\tldd\t[%fp+72],%f0\n\tfdtox\t%f4,%f4\n\tsllx\t%g4,16,%g3\t\tC 0000hhhhllll0000\n\tfdtox\t%f6,%f6\n\tor\t%g3,%g4,%g2\t\tC 0000hhhhXXXXllll\n\tsubcc\t%i2,1,%i2\n\tstd\t%f4,[%fp-24]\n\tfxtod\t%f8,%f2\n\tstd\t%f6,[%fp-16]\n\tbne,pt\t%icc,.L_grt_4\n\tandn\t%g2,%g5,%g2\t\tC 0000hhhh0000llll\n\n\tstx\t%g2,[%fp+72]\n\tfitod\t%f0,%f4\n\tfitod\t%f1,%f6\n\tadd\t%fp, 72, %l3\n\tfmuld\t%f2,%f4,%f4\n\tadd\t%fp, -40, %l4\n\tld\t[%i1],%f9\n\tfmuld\t%f2,%f6,%f6\n\tadd\t%i1,4,%i1\t\tC s1_ptr++\n\tldd\t[%fp+80],%f0\n\tadd\t%fp, 80, %l5\n\tfdtox\t%f4,%f4\n\tb\t.L4\n\tadd\t%fp, -24, %l6\n\n.L_grt_4:\n\tstx\t%g2,[%fp+72]\n\tfitod\t%f0,%f4\n\tlduw\t[%i1+8],%g4\n\tfitod\t%f1,%f6\n\tfmuld\t%f2,%f4,%f4\n\tld\t[%i1],%f9\n\tfmuld\t%f2,%f6,%f6\n\tadd\t%i1,4,%i1\t\tC s1_ptr++\n\tldd\t[%fp+80],%f0\n\tfdtox\t%f4,%f4\n\tsllx\t%g4,16,%g3\t\tC 0000hhhhllll0000\n\tfdtox\t%f6,%f6\n\tor\t%g3,%g4,%g2\t\tC 0000hhhhXXXXllll\n\tsubcc\t%i2,1,%i2\n\tstd\t%f4,[%fp-40]\n\tfxtod\t%f8,%f2\n\tstd\t%f6,[%fp-32]\n\tbe,pn\t%icc,.L5\n\tandn\t%g2,%g5,%g2\t\tC 0000hhhh0000llll\n\n\tb,a\t.Loop\n\n\t.align\t16\nC --- LOOP BEGIN\n.Loop:\tnop\n\tnop\n\tstx\t%g2,[%fp+80]\n\tfitod\t%f0,%f4\nC ---\n\tnop\n\tnop\n\tlduw\t[%i1+8],%g4\n\tfitod\t%f1,%f6\nC ---\n\tnop\n\tnop\n\tldx\t[%fp-24],%g2\t\tC p16\n\tfanop\nC ---\n\tnop\n\tnop\n\tldx\t[%fp-16],%g1\t\tC p0\n\tfmuld\t%f2,%f4,%f4\nC ---\n\tsllx\t%g2,16,%g2\t\tC align p16\n\tadd\t%i0,8,%i0\t\tC res_ptr++\n\tld\t[%i1],%f9\n\tfmuld\t%f2,%f6,%f6\nC ---\n\tadd\t%g2,%g1,%g1\t\tC add p16 to p0 (ADD1)\n\tadd\t%i1,4,%i1\t\tC s1_ptr++\n\tldd\t[%fp+72],%f0\n\tfanop\nC ---\n\tsrlx\t%g1,32,%l0\n\tnop\n\tstw\t%g1,[%i0-8]\n\tfdtox\t%f4,%f4\nC ---\n\tsllx\t%g4,16,%g3\t\tC 0000hhhhllll0000\n\tnop\n\tstw\t%l0,[%i0-4]\n\tfdtox\t%f6,%f6\nC ---\n\tor\t%g3,%g4,%g2\t\tC 0000hhhhXXXXllll\n\tsubcc\t%i2,1,%i2\n\tstd\t%f4,[%fp-24]\n\tfxtod\t%f8,%f2\nC ---\n\tstd\t%f6,[%fp-16]\n\tandn\t%g2,%g5,%g2\t\tC 0000hhhh0000llll\n\tbe,pn\t%icc,.Lend\n\tfanop\nC ---  LOOP MIDDLE\n\tnop\n\tnop\n\tstx\t%g2,[%fp+72]\n\tfitod\t%f0,%f4\nC ---\n\tnop\n\tnop\n\tlduw\t[%i1+8],%g4\n\tfitod\t%f1,%f6\nC ---\n\tnop\n\tnop\n\tldx\t[%fp-40],%g2\t\tC p16\n\tfanop\nC ---\n\tnop\n\tnop\n\tldx\t[%fp-32],%g1\t\tC p0\n\tfmuld\t%f2,%f4,%f4\nC ---\n\tsllx\t%g2,16,%g2\t\tC align p16\n\tadd\t%i0,8,%i0\t\tC res_ptr++\n\tld\t[%i1],%f9\n\tfmuld\t%f2,%f6,%f6\nC ---\n\tadd\t%g2,%g1,%g1\t\tC add p16 to p0 (ADD1)\n\tadd\t%i1,4,%i1\t\tC s1_ptr++\n\tldd\t[%fp+80],%f0\n\tfanop\nC ---\n\tsrlx\t%g1,32,%l0\n\tnop\n\tstw\t%g1,[%i0-8]\n\tfdtox\t%f4,%f4\nC ---\n\tsllx\t%g4,16,%g3\t\tC 0000hhhhllll0000\n\tnop\n\tstw\t%l0,[%i0-4]\n\tfdtox\t%f6,%f6\nC ---\n\tor\t%g3,%g4,%g2\t\tC 0000hhhhXXXXllll\n\tsubcc\t%i2,1,%i2\n\tstd\t%f4,[%fp-40]\n\tfxtod\t%f8,%f2\nC ---\n\tstd\t%f6,[%fp-32]\n\tandn\t%g2,%g5,%g2\t\tC 0000hhhh0000llll\n\tbne,pt\t%icc,.Loop\n\tfanop\nC --- LOOP END\n\n.L5:\tadd\t%fp, 80, %l3\n\tadd\t%fp, -24, %l4\n\tadd\t%fp, 72, %l5\n\tb\t.Ltail\n\tadd\t%fp, -40, %l6\n\n.Lend:\tadd\t%fp, 72, %l3\n\tadd\t%fp, -40, %l4\n\tadd\t%fp, 80, %l5\n\tadd\t%fp, -24, %l6\n.Ltail:\tstx\t%g2,[%l3]\n\tfitod\t%f0,%f4\n\tfitod\t%f1,%f6\n\tldx\t[%l4],%g2\t\tC p16\n\tldx\t[%l4+8],%g1\t\tC p0\n\tfmuld\t%f2,%f4,%f4\n\tsllx\t%g2,16,%g2\t\tC align p16\n\tadd\t%i0,8,%i0\t\tC res_ptr++\n\tld\t[%i1],%f9\n\tfmuld\t%f2,%f6,%f6\n\tadd\t%g2,%g1,%g1\t\tC add p16 to p0 (ADD1)\n\tadd\t%i1,4,%i1\t\tC s1_ptr++\n\tldd\t[%l5],%f0\n\tsrlx\t%g1,32,%l0\n\tstw\t%g1,[%i0-8]\n\tfdtox\t%f4,%f4\n\tstw\t%l0,[%i0-4]\n.L4:\tfdtox\t%f6,%f6\n\tstd\t%f4,[%l4]\n\tfxtod\t%f8,%f2\n\tstd\t%f6,[%l4+8]\n\n\tfitod\t%f0,%f4\n\tfitod\t%f1,%f6\n\tldx\t[%l6],%g2\t\tC p16\n\tldx\t[%l6+8],%g1\t\tC p0\n\tfmuld\t%f2,%f4,%f4\n\tsllx\t%g2,16,%g2\t\tC align p16\n\tadd\t%i0,8,%i0\t\tC res_ptr++\n\tld\t[%i1],%f9\n\tfmuld\t%f2,%f6,%f6\n\tadd\t%g2,%g1,%g1\t\tC add p16 to p0 (ADD1)\n\tldd\t[%l3],%f0\n\tsrlx\t%g1,32,%l0\n\tstw\t%g1,[%i0-8]\n\tfdtox\t%f4,%f4\n\tstw\t%l0,[%i0-4]\n.L3:\tfdtox\t%f6,%f6\n\tstd\t%f4,[%l6]\n\tfxtod\t%f8,%f2\n\tstd\t%f6,[%l6+8]\n\n\tfitod\t%f0,%f4\n\tfitod\t%f1,%f6\n\tldx\t[%l4],%g2\t\tC p16\n\tldx\t[%l4+8],%g1\t\tC p0\n\tfmuld\t%f2,%f4,%f4\n\tsllx\t%g2,16,%g2\t\tC align p16\n\tadd\t%i0,8,%i0\t\tC res_ptr++\n\tfmuld\t%f2,%f6,%f6\n\tadd\t%g2,%g1,%g1\t\tC add p16 to p0 (ADD1)\n\tsrlx\t%g1,32,%l0\n\tstw\t%g1,[%i0-8]\n\tfdtox\t%f4,%f4\n\tstw\t%l0,[%i0-4]\n.L2:\tfdtox\t%f6,%f6\n\tstd\t%f4,[%l4]\n\tstd\t%f6,[%l4+8]\n\n\tldx\t[%l6],%g2\t\tC p16\n\tldx\t[%l6+8],%g1\t\tC p0\n\tsllx\t%g2,16,%g2\t\tC align p16\n\tadd\t%i0,8,%i0\t\tC res_ptr++\n\tadd\t%g2,%g1,%g1\t\tC add p16 to p0 (ADD1)\n\tsrlx\t%g1,32,%l0\n\tstw\t%g1,[%i0-8]\n\tstw\t%l0,[%i0-4]\n\n.L1:\tldx\t[%l4],%g2\t\tC p16\n\tldx\t[%l4+8],%g1\t\tC p0\n\tsllx\t%g2,16,%g2\t\tC align p16\n\tadd\t%i0,8,%i0\t\tC res_ptr++\n\tadd\t%g2,%g1,%g1\t\tC add p16 to p0 (ADD1)\n\tsrlx\t%g1,32,%l0\n\tstw\t%g1,[%i0-8]\n\tstw\t%l0,[%i0-4]\n\n\tret\n\trestore\t%g0,%g0,%o0\n\nEPILOGUE(mpn_sqr_diagonal)\n"
  },
  {
    "path": "mpn/sparc32/v9/sub_n.asm.broken",
    "content": "dnl  SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and\ndnl  store difference in a third limb vector.\n\ndnl  Copyright 2001 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\n\ndnl\tTHIS CODE IS BROKEN FOR ONE LIMB\n\n\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\ndefine(rp,%o0)\ndefine(s1p,%o1)\ndefine(s2p,%o2)\ndefine(n,%o3)\ndefine(cy,%g1)\n\nC This code uses 64-bit operations on `o' and `g' registers.  It doesn't\nC require that `o' registers' upper 32 bits are preserved by the operating\nC system, but if they are not, they must be zeroed.  That is indeed what\nC happens at least on Slowaris 2.5 and 2.6.\n\nC On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at\nC about 10 cycles/limb from the Ecache.\n\nASM_START()\nPROLOGUE(mpn_sub_n)\n\tlduw\t[s1p+0],%o4\n\tlduw\t[s2p+0],%o5\n\taddcc\tn,-2,n\n\tbl,pn\t%icc,L(end1)\n\tlduw\t[s1p+4],%g2\n\tlduw\t[s2p+4],%g3\n\tbe,pn\t%icc,L(end2)\n\tmov\t0,cy\n\n\t.align\t16\nL(loop):\n\tsub\t%o4,%o5,%g4\n\tadd\trp,8,rp\n\tlduw\t[s1p+8],%o4\n\tfitod\t%f0,%f2\nC ---\n\tsub\t%g4,cy,%g4\n\taddcc\tn,-1,n\n\tlduw\t[s2p+8],%o5\n\tfitod\t%f0,%f2\nC ---\n\tsrlx\t%g4,63,cy\n\tadd\ts2p,8,s2p\n\tstw\t%g4,[rp-8]\n\tbe,pn\t%icc,L(exito)+4\nC ---\n\tsub\t%g2,%g3,%g4\n\taddcc\tn,-1,n\n\tlduw\t[s1p+12],%g2\n\tfitod\t%f0,%f2\nC ---\n\tsub\t%g4,cy,%g4\n\tadd\ts1p,8,s1p\n\tlduw\t[s2p+4],%g3\n\tfitod\t%f0,%f2\nC ---\n\tsrlx\t%g4,63,cy\n\tbne,pt\t%icc,L(loop)\n\tstw\t%g4,[rp-4]\nC ---\nL(exite):\n\tsub\t%o4,%o5,%g4\n\tsub\t%g4,cy,%g4\n\tsrlx\t%g4,63,cy\n\tstw\t%g4,[rp+0]\n\tsub\t%g2,%g3,%g4\n\tsub\t%g4,cy,%g4\n\tstw\t%g4,[rp+4]\n\tretl\n\tsrlx\t%g4,63,%o0\n\nL(exito):\n\tsub\t%g2,%g3,%g4\n\tsub\t%g4,cy,%g4\n\tsrlx\t%g4,63,cy\n\tstw\t%g4,[rp-4]\n\tsub\t%o4,%o5,%g4\n\tsub\t%g4,cy,%g4\n\tstw\t%g4,[rp+0]\n\tretl\n\tsrlx\t%g4,63,%o0\n\nL(end1):\n\tsub\t%o4,%o5,%g4\n\tstw\t%g4,[rp+0]\n\tretl\n\tsrlx\t%g4,63,%o0\n\nL(end2):\n\tsub\t%o4,%o5,%g4\n\tsrlx\t%g4,63,cy\n\tstw\t%g4,[rp+0]\n\tsub\t%g2,%g3,%g4\n\tsub\t%g4,cy,%g4\n\tstw\t%g4,[rp+4]\n\tretl\n\tsrlx\t%g4,63,%o0\nEPILOGUE(mpn_sub_n)\n"
  },
  {
    "path": "mpn/sparc32/v9/submul_1.asm",
    "content": "dnl  SPARC v9 32-bit mpn_submul_1 -- Multiply a limb vector with a limb and\ndnl  subtract the result from a second limb vector.\n\ndnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC Algorithm: We use two floating-point multiplies per limb product, with the\nC invariant v operand split into two 16-bit pieces, and the u operand split\nC into 32-bit pieces.  We convert the two 48-bit products and transfer them to\nC the integer unit.\n\nC\t\t   cycles/limb\nC UltraSPARC 1&2:     6.5\nC UltraSPARC 3:\t      ?\n\nC Possible optimizations:\nC   1. Combine 32-bit memory operations into 64-bit operations.  Since we're\nC      memory bandwidth limited, this could save 1.5 cycles/limb.\nC   2. Unroll the inner loop.  Since we already use alternate temporary areas,\nC      it is very straightforward to unroll, using an exit branch midways.\nC      Unrolling would allow deeper scheduling which could improve speed for L2\nC      cache case.\nC   3. For mpn_mul_1: Use more alternating temp areas.  The std'es and ldx'es\nC      aren't sufficiently apart-scheduled with just two temp areas.\nC   4. Specialize for particular v values.  If its upper 16 bits are zero, we\nC      could save many operations.\n\nC INPUT PARAMETERS\nC rp\ti0\nC up\ti1\nC n\ti2\nC v\ti3\n\ndefine(`FSIZE',224)\n\nASM_START()\nPROLOGUE(mpn_submul_1)\n\tadd\t%sp, -FSIZE, %sp\n\tsethi\t%hi(0xffff), %g1\n\tsrl\t%o3, 16, %g2\n\tor\t%g1, %lo(0xffff), %g1\n\tand\t%o3, %g1, %g1\n\tstx\t%g1, [%sp+104]\n\tstx\t%g2, [%sp+112]\n\tldd\t[%sp+104], %f6\n\tldd\t[%sp+112], %f8\n\tfxtod\t%f6, %f6\n\tfxtod\t%f8, %f8\n\tld\t[%sp+104], %f10\t\tC zero f10\n\n\tmov\t0, %g3\t\t\tC cy = 0\n\ndefine(`fanop', `fitod %f18, %f0')\tC  A quasi nop running in the FA pipe\n\n\tadd\t%sp, 160, %o5\t\tC point in scratch area\n\tand\t%o5, -32, %o5\t\tC align at 0 (mod 32) in scratch area\n\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tadd\t%o1, 4, %o1\t\tC up++\n\tbne,pt\t%icc, .L_two_or_more\n\tfxtod\t%f10, %f2\n\n\tfmuld\t%f2, %f8, %f16\n\tfmuld\t%f2, %f6, %f4\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+16]\n\tstd\t%f12, [%o5+24]\n\tldx\t[%o5+16], %g2\t\tC p16\n\tldx\t[%o5+24], %g1\t\tC p0\n\tlduw\t[%o0], %g5\t\tC read rp[i]\n\tb\t.L1\n\tadd\t%o0, -16, %o0\n\n\t.align\t16\n.L_two_or_more:\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfmuld\t%f2, %f8, %f16\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tbne,pt\t%icc, .L_three_or_more\n\tfxtod\t%f10, %f2\n\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+16]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+24]\n\tfmuld\t%f2, %f6, %f4\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+0]\n\tstd\t%f12, [%o5+8]\n\tlduw\t[%o0], %g5\t\tC read rp[i]\n\tldx\t[%o5+16], %g2\t\tC p16\n\tldx\t[%o5+24], %g1\t\tC p0\n\tb\t.L2\n\tadd\t%o0, -12, %o0\n\n\t.align\t16\n.L_three_or_more:\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+16]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+24]\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tbne,pt\t%icc, .L_four_or_more\n\tfxtod\t%f10, %f2\n\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+0]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+8]\n\tfmuld\t%f2, %f6, %f4\n\tfdtox\t%f16, %f14\n\tldx\t[%o5+16], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tldx\t[%o5+24], %g1\t\tC p0\n\tstd\t%f14, [%o5+16]\n\tstd\t%f12, [%o5+24]\n\tlduw\t[%o0], %g5\t\tC read rp[i]\n\tb\t.L3\n\tadd\t%o0, -8, %o0\n\n\t.align\t16\n.L_four_or_more:\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfdtox\t%f16, %f14\n\tfdtox\t%f4, %f12\n\tstd\t%f14, [%o5+0]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+8]\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tbne,pt\t%icc, .L_five_or_more\n\tfxtod\t%f10, %f2\n\n\tfdtox\t%f16, %f14\n\tldx\t[%o5+16], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tldx\t[%o5+24], %g1\t\tC p0\n\tstd\t%f14, [%o5+16]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+24]\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tlduw\t[%o0], %g5\t\tC read rp[i]\n\tb\t.L4\n\tadd\t%o0, -4, %o0\n\n\t.align\t16\n.L_five_or_more:\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfdtox\t%f16, %f14\n\tldx\t[%o5+16], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tldx\t[%o5+24], %g1\t\tC p0\n\tstd\t%f14, [%o5+16]\n\tfmuld\t%f2, %f8, %f16\n\tstd\t%f12, [%o5+24]\n\tfmuld\t%f2, %f6, %f4\n\tadd\t%o1, 4, %o1\t\tC up++\n\tlduw\t[%o0], %g5\t\tC read rp[i]\n\tbne,pt\t%icc, .Loop\n\tfxtod\t%f10, %f2\n\tb,a\t.L5\n\nC BEGIN MAIN LOOP\n\t.align 16\nC -- 0\n.Loop:\tsub\t%g0, %g3, %g3\n\tsubcc\t%o2, 1, %o2\n\tld\t[%o1], %f11\t\tC read up[i]\n\tfdtox\t%f16, %f14\nC -- 1\n\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tadd\t%o0, 4, %o0\t\tC rp++\n\tldx\t[%o5+0], %g2\t\tC p16\n\tfdtox\t%f4, %f12\nC -- 2\n\tsrl\t%g3, 0, %g3\t\tC zero most significant 32 bits\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tfanop\nC -- 3\n\tnop\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tstd\t%f14, [%o5+0]\n\tfmuld\t%f2, %f8, %f16\nC -- 4\n\tnop\n\tsub\t%g5, %g4, %g4\t\tC p += rp[i]\n\tstd\t%f12, [%o5+8]\n\tfmuld\t%f2, %f6, %f4\nC -- 5\n\txor\t%o5, 16, %o5\t\tC alternate scratch variables\n\tadd\t%o1, 4, %o1\t\tC up++\n\tstw\t%g4, [%o0-4]\n\tfanop\nC -- 6\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\tlduw\t[%o0], %g5\t\tC read rp[i]\n\tbne,pt\t%icc, .Loop\n\tfxtod\t%f10, %f2\nC END MAIN LOOP\n\n.L5:\tsub\t%g0, %g3, %g3\n\tfdtox\t%f16, %f14\n\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tldx\t[%o5+0], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tsrl\t%g3, 0, %g3\t\tC zero most significant 32 bits\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tadd\t%g4, %g3, %g4\t\tC p += cy\n\tstd\t%f14, [%o5+0]\n\tfmuld\t%f2, %f8, %f16\n\tsub\t%g5, %g4, %g4\t\tC p += rp[i]\n\tstd\t%f12, [%o5+8]\n\tfmuld\t%f2, %f6, %f4\n\txor\t%o5, 16, %o5\n\tstw\t%g4, [%o0+0]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\tlduw\t[%o0+4], %g5\t\tC read rp[i]\n\n\tsub\t%g0, %g3, %g3\n.L4:\tfdtox\t%f16, %f14\n\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tldx\t[%o5+0], %g2\t\tC p16\n\tfdtox\t%f4, %f12\n\tsrl\t%g3, 0, %g3\t\tC zero most significant 32 bits\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tstd\t%f14, [%o5+0]\n\tsub\t%g5, %g4, %g4\t\tC p += rp[i]\n\tstd\t%f12, [%o5+8]\n\txor\t%o5, 16, %o5\n\tstw\t%g4, [%o0+4]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\tlduw\t[%o0+8], %g5\t\tC read rp[i]\n\n\tsub\t%g0, %g3, %g3\n.L3:\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tldx\t[%o5+0], %g2\t\tC p16\n\tsrl\t%g3, 0, %g3\t\tC zero most significant 32 bits\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tsub\t%g5, %g4, %g4\t\tC p += rp[i]\n\txor\t%o5, 16, %o5\n\tstw\t%g4, [%o0+8]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\tlduw\t[%o0+12], %g5\t\tC read rp[i]\n\n\tsub\t%g0, %g3, %g3\n.L2:\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tldx\t[%o5+0], %g2\t\tC p16\n\tsrl\t%g3, 0, %g3\t\tC zero most significant 32 bits\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tldx\t[%o5+8], %g1\t\tC p0\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tsub\t%g5, %g4, %g4\t\tC p += rp[i]\n\tstw\t%g4, [%o0+12]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\tlduw\t[%o0+16], %g5\t\tC read rp[i]\n\n\tsub\t%g0, %g3, %g3\n.L1:\tsllx\t%g2, 16, %g4\t\tC (p16 << 16)\n\tsrl\t%g3, 0, %g3\t\tC zero most significant 32 bits\n\tadd\t%g1, %g4, %g4\t\tC p = p0 + (p16 << 16)\n\tadd\t%g3, %g4, %g4\t\tC p += cy\n\tsub\t%g5, %g4, %g4\t\tC p += rp[i]\n\tstw\t%g4, [%o0+16]\n\tsrlx\t%g4, 32, %g3\t\tC new cy\n\n\tsub\t%g0, %g3, %o0\n\tretl\n\tsub\t%sp, -FSIZE, %sp\nEPILOGUE(mpn_submul_1)\n"
  },
  {
    "path": "mpn/sparc32/v9/udiv.asm",
    "content": "dnl  SPARC v9 32-bit mpn_udiv_qrnnd - division support for longlong.h.\n\ndnl  Copyright 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nC INPUT PARAMETERS\nC rem_ptr\to0\nC n1\t\to1\nC n0\t\to2\nC d\t\to3\n\nASM_START()\nPROLOGUE(mpn_udiv_qrnnd)\n\tsllx\t%o1, 32, %g1\t\tC shift upper dividend limb\n\tsrl\t%o2, 0, %g2\t\tC zero extend lower dividend limb\n\tsrl\t%o3, 0, %g3\t\tC zero extend divisor\n\tor\t%g2, %g1, %g1\t\tC assemble 64-bit dividend\n\tudivx\t%g1, %g3, %g1\n\tmulx\t%g1, %g3, %g4\n\tsub\t%g2, %g4, %g2\n\tst\t%g2, [%o0]\t\tC store remainder\n\tretl\n\tmov\t%g1, %o0\t\tC return quotient\nEPILOGUE(mpn_udiv_qrnnd)\n"
  },
  {
    "path": "mpn/sparc64/README",
    "content": "Copyright 1997, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n\n\n\n\nThis directory contains mpn functions for 64-bit V9 SPARC\n\nRELEVANT OPTIMIZATION ISSUES\n\nNotation:\n  IANY = shift/add/sub/logical/sethi\n  IADDLOG = add/sub/logical/sethi\n  MEM = ld*/st*\n  FA = fadd*/fsub*/f*to*/fmov*\n  FM = fmul*\n\nUltraSPARC can issue four instructions per cycle, with these restrictions:\n* Two IANY instructions, but only one of these may be a shift.  If there is a\n  shift and an IANY instruction, the shift must precede the IANY instruction.\n* One FA.\n* One FM.\n* One branch.\n* One MEM.\n* IANY/IADDLOG/MEM must be insn 1, 2, or 3 in an issue bundle.  Taken branches\n  should not be in slot 4, since that makes the delay insn come from separate\n  bundle.\n* If two IANY/IADDLOG instructions are to be executed in the same cycle and one\n  of these is setting the condition codes, that instruction must be the second\n  one.\n\nTo summarize, ignoring branches, these are the bundles that can reach the peak\nexecution speed:\n\ninsn1\tiany\tiany\tmem\tiany\tiany\tmem\tiany\tiany\tmem\ninsn2\tiaddlog\tmem\tiany\tmem\tiaddlog\tiany\tmem\tiaddlog\tiany\ninsn3\tmem\tiaddlog\tiaddlog\tfa\tfa\tfa\tfm\tfm\tfm\ninsn4\tfa/fm\tfa/fm\tfa/fm\tfm\tfm\tfm\tfa\tfa\tfa\n\nThe 64-bit integer multiply instruction mulx takes from 5 cycles to 35 cycles,\ndepending on the position of the most significant bit of the first source\noperand.  When used for 32x32->64 multiplication, it needs 20 cycles.\nFurthermore, it stalls the processor while executing.  We stay away from that\ninstruction, and instead use floating-point operations.\n\nFloating-point add and multiply units are fully pipelined.  The latency for\nUltraSPARC-1/2 is 3 cycles and for UltraSPARC-3 it is 4 cycles.\n\nInteger conditional move instructions cannot dual-issue with other integer\ninstructions.  No conditional move can issue 1-5 cycles after a load.  (This\nmight have been fixed for UltraSPARC-3.)\n\nThe UltraSPARC-3 pipeline is very simular to he one of UltraSPARC-1/2 , but is\nsomewhat slower.  Branches execute slower, and there may be other new stalls.\nBut integer multiply doesn't stall the entire CPU and also has a much lower\nlatency.  But it's still not pipelined, and thus useless for our needs.\n\nSTATUS\n\n* mpn_lshift, mpn_rshift: The current code runs at 2.0 cycles/limb on\n  UltraSPARC-1/2 and 2.65 on UltraSPARC-3.  For UltraSPARC-1/2, the IEU0\n  functional unit is saturated with shifts.\n\n* mpn_add_n, mpn_sub_n: The current code runs at 4 cycles/limb on\n  UltraSPARC-1/2 and 4.5 cycles/limb on UltraSPARC-3.  The 4 instruction\n  recurrency is the speed limiter.\n\n* mpn_addmul_1: The current code runs at 14 cycles/limb asymptotically on\n  UltraSPARC-1/2 and 17.5 cycles/limb on UltraSPARC-3.  On UltraSPARC-1/2, the\n  code sustains 4 instructions/cycle.  It might be possible to invent a better\n  way of summing the intermediate 49-bit operands, but it is unlikely that it\n  will save enough instructions to save an entire cycle.\n\n  The load-use of the u operand is not enough scheduled for good L2 cache\n  performance.  The UltraSPARC-1/2 L1 cache is direct mapped, and since we use\n  temporary stack slots that will conflict with the u and r operands, we miss\n  to L2 very often.  The load-use of the std/ldx pairs via the stack are\n  perhaps over-scheduled.\n\n  It would be possible to save two instructions: (1) The mov could be avoided\n  if the std/ldx were less scheduled.  (2) The ldx of the r operand could be\n  split into two ld instructions, saving the shifts/masks.\n\n  It should be possible to reach 14 cycles/limb for UltraSPARC-3 if the fp\n  operations where rescheduled for this processor's 4-cycle latency.\n\n* mpn_mul_1: The current code is a straightforward edit of the mpn_addmul_1\n  code.  It would be possible to shave one or two cycles from it, with some\n  labour.\n\n* mpn_submul_1: Simpleminded code just calling mpn_mul_1 + mpn_sub_n.  This\n  means that it runs at 18 cycles/limb on UltraSPARC-1/2 and 23 cycles/limb on\n  UltraSPARC-3.  It would be possible to either match the mpn_addmul_1\n  performance, or in the worst case use one more instruction group.\n\n* US1/US2 cache conflict resolving.  The direct mapped L1 date cache of US1/US2\n  is a problem for mul_1, addmul_1 (and a prospective submul_1).  We should\n  allocate a larger cache area, and put the stack temp area in a place that\n  doesn't cause cache conflicts.\n"
  },
  {
    "path": "mpn/sparc64/add_n.asm",
    "content": "dnl  SPARC v9 mpn_add_n -- Add two limb vectors of the same length > 0 and\ndnl  store sum in a third limb vector.\n\ndnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\t   cycles/limb\nC UltraSPARC 1&2:     4\nC UltraSPARC 3:\t      4.5\n\nC Compute carry-out from the most significant bits of u,v, and r, where\nC r=u+v+carry_in, using logic operations.\n\nC This code runs at 4 cycles/limb on UltraSPARC 1 and 2.  It has a 4 insn\nC recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.\nC Therefore, it seems futile to try to optimize this any further...\n\nC INPUT PARAMETERS\ndefine(`rp',`%i0')\ndefine(`up',`%i1')\ndefine(`vp',`%i2')\ndefine(`n',`%i3')\n\ndefine(`u0',`%l0')\ndefine(`u1',`%l2')\ndefine(`u2',`%l4')\ndefine(`u3',`%l6')\ndefine(`v0',`%l1')\ndefine(`v1',`%l3')\ndefine(`v2',`%l5')\ndefine(`v3',`%l7')\n\ndefine(`cy',`%i4')\n\ndefine(`fanop',`fitod %f0,%f2')\t\tdnl  A quasi nop running in the FA pipe\ndefine(`fmnop',`fmuld %f0,%f0,%f4')\tdnl  A quasi nop running in the FM pipe\n\nASM_START()\n\tREGISTER(%g2,#scratch)\n\tREGISTER(%g3,#scratch)\nPROLOGUE(mpn_add_n)\n\tsave\t%sp,-160,%sp\n\n\tfitod\t%f0,%f0\t\tC make sure f0 contains small, quiet number\n\tsubcc\tn,4,%g0\n\tbl,pn\t%icc,.Loop0\n\tmov\t0,cy\n\n\tldx\t[up+0],u0\n\tldx\t[vp+0],v0\n\tadd\tup,32,up\n\tldx\t[up-24],u1\n\tldx\t[vp+8],v1\n\tadd\tvp,32,vp\n\tldx\t[up-16],u2\n\tldx\t[vp-16],v2\n\tldx\t[up-8],u3\n\tldx\t[vp-8],v3\n\tsubcc\tn,8,n\n\tadd\tu0,v0,%g1\tC main add\n\tadd\t%g1,cy,%g4\tC carry add\n\tor\tu0,v0,%g2\n\tbl,pn\t%icc,.Lend4567\n\tfanop\n\tb,a\t.Loop\n\n\t.align\t16\nC START MAIN LOOP\n.Loop:\tandn\t%g2,%g4,%g2\n\tand\tu0,v0,%g3\n\tldx\t[up+0],u0\n\tfanop\nC --\n\tor\t%g3,%g2,%g2\n\tldx\t[vp+0],v0\n\tadd\tup,32,up\n\tfanop\nC --\n\tsrlx\t%g2,63,cy\n\tadd\tu1,v1,%g1\n\tstx\t%g4,[rp+0]\n\tfanop\nC --\n\tadd\t%g1,cy,%g4\n\tor\tu1,v1,%g2\n\tfmnop\n\tfanop\nC --\n\tandn\t%g2,%g4,%g2\n\tand\tu1,v1,%g3\n\tldx\t[up-24],u1\n\tfanop\nC --\n\tor\t%g3,%g2,%g2\n\tldx\t[vp+8],v1\n\tadd\tvp,32,vp\n\tfanop\nC --\n\tsrlx\t%g2,63,cy\n\tadd\tu2,v2,%g1\n\tstx\t%g4,[rp+8]\n\tfanop\nC --\n\tadd\t%g1,cy,%g4\n\tor\tu2,v2,%g2\n\tfmnop\n\tfanop\nC --\n\tandn\t%g2,%g4,%g2\n\tand\tu2,v2,%g3\n\tldx\t[up-16],u2\n\tfanop\nC --\n\tor\t%g3,%g2,%g2\n\tldx\t[vp-16],v2\n\tadd\trp,32,rp\n\tfanop\nC --\n\tsrlx\t%g2,63,cy\n\tadd\tu3,v3,%g1\n\tstx\t%g4,[rp-16]\n\tfanop\nC --\n\tadd\t%g1,cy,%g4\n\tor\tu3,v3,%g2\n\tfmnop\n\tfanop\nC --\n\tandn\t%g2,%g4,%g2\n\tand\tu3,v3,%g3\n\tldx\t[up-8],u3\n\tfanop\nC --\n\tor\t%g3,%g2,%g2\n\tsubcc\tn,4,n\n\tldx\t[vp-8],v3\n\tfanop\nC --\n\tsrlx\t%g2,63,cy\n\tadd\tu0,v0,%g1\n\tstx\t%g4,[rp-8]\n\tfanop\nC --\n\tadd\t%g1,cy,%g4\n\tor\tu0,v0,%g2\n\tbge,pt\t%icc,.Loop\n\tfanop\nC END MAIN LOOP\n.Lend4567:\n\tandn\t%g2,%g4,%g2\n\tand\tu0,v0,%g3\n\tor\t%g3,%g2,%g2\n\tsrlx\t%g2,63,cy\n\tadd\tu1,v1,%g1\n\tstx\t%g4,[rp+0]\n\tadd\t%g1,cy,%g4\n\tor\tu1,v1,%g2\n\tandn\t%g2,%g4,%g2\n\tand\tu1,v1,%g3\n\tor\t%g3,%g2,%g2\n\tsrlx\t%g2,63,cy\n\tadd\tu2,v2,%g1\n\tstx\t%g4,[rp+8]\n\tadd\t%g1,cy,%g4\n\tor\tu2,v2,%g2\n\tandn\t%g2,%g4,%g2\n\tand\tu2,v2,%g3\n\tor\t%g3,%g2,%g2\n\tadd\trp,32,rp\n\tsrlx\t%g2,63,cy\n\tadd\tu3,v3,%g1\n\tstx\t%g4,[rp-16]\n\tadd\t%g1,cy,%g4\n\tor\tu3,v3,%g2\n\tandn\t%g2,%g4,%g2\n\tand\tu3,v3,%g3\n\tor\t%g3,%g2,%g2\n\tsrlx\t%g2,63,cy\n\tstx\t%g4,[rp-8]\n\n\taddcc\tn,4,n\n\tbz,pn\t%icc,.Lret\n\tfanop\n\n.Loop0:\tldx\t[up],u0\n\tadd\tup,8,up\n\tldx\t[vp],v0\n\tadd\tvp,8,vp\n\tadd\trp,8,rp\n\tsubcc\tn,1,n\n\tadd\tu0,v0,%g1\n\tor\tu0,v0,%g2\n\tadd\t%g1,cy,%g4\n\tand\tu0,v0,%g3\n\tandn\t%g2,%g4,%g2\n\tstx\t%g4,[rp-8]\n\tor\t%g3,%g2,%g2\n\tbnz,pt\t%icc,.Loop0\n\tsrlx\t%g2,63,cy\n\n.Lret:\tmov\tcy,%i0\n\tret\n\trestore\nEPILOGUE(mpn_add_n)\n"
  },
  {
    "path": "mpn/sparc64/addmul_1.asm",
    "content": "dnl  SPARC v9 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and add\ndnl  the result to a second limb vector.\n\ndnl  Copyright 1998, 2000, 2001, 2002, 2003, 2004 Free Software Foundation,\ndnl  Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\t   cycles/limb\nC UltraSPARC 1&2:     14\nC UltraSPARC 3:\t      17.5\n\nC Algorithm: We use eight floating-point multiplies per limb product, with the\nC invariant v operand split into four 16-bit pieces, and the up operand split\nC into 32-bit pieces.  We sum pairs of 48-bit partial products using\nC floating-point add, then convert the four 49-bit product-sums and transfer\nC them to the integer unit.\n\nC Possible optimizations:\nC   0. Rewrite to use algorithm of mpn_addmul_2.\nC   1. Align the stack area where we transfer the four 49-bit product-sums\nC      to a 32-byte boundary.  That would minimize the cache collision.\nC      (UltraSPARC-1/2 use a direct-mapped cache.)  (Perhaps even better would\nC      be to align the area to map to the area immediately before up?)\nC   2. Sum the 4 49-bit quantities using 32-bit operations, as in the\nC      develop mpn_addmul_2.  This would save many integer instructions.\nC   3. Unrolling.  Questionable if it is worth the code expansion, given that\nC      it could only save 1 cycle/limb.\nC   4. Specialize for particular v values.  If its upper 32 bits are zero, we\nC      could save many operations, in the FPU (fmuld), but more so in the IEU\nC      since we'll be summing 48-bit quantities, which might be simpler.\nC   5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and\nC      the i00,i16,i32,i48 RAW less apart.  The latter apart-scheduling should\nC      not be greater than needed for L2 cache latency, and also not so great\nC      that i16 needs to be copied.\nC   6. Avoid performing mem+fa+fm in the same cycle, at least not when we want\nC      to get high IEU bandwidth.  (12 of the 14 cycles will be free for 2 IEU\nC      ops.)\n\nC Instruction classification (as per UltraSPARC-1/2 functional units):\nC    8 FM\nC   10 FA\nC   12 MEM\nC   10 ISHIFT + 14 IADDLOG\nC    1 BRANCH\nC   55 insns totally (plus one mov insn that should be optimized out)\n\nC The loop executes 56 instructions in 14 cycles on UltraSPARC-1/2, i.e we\nC sustain the peak execution rate of 4 instructions/cycle.\n\nC INPUT PARAMETERS\nC rp\ti0\nC up\ti1\nC n\ti2\nC v\ti3\n\nASM_START()\n\tREGISTER(%g2,#scratch)\n\tREGISTER(%g3,#scratch)\n\ndefine(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')\ndefine(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')\ndefine(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')\ndefine(`u00',`%f32') define(`u32', `%f34')\ndefine(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')\ndefine(`cy',`%g1')\ndefine(`rlimb',`%g3')\ndefine(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')\ndefine(`xffffffff',`%l7')\ndefine(`xffff',`%o0')\n\nPROLOGUE(mpn_addmul_1)\n\nC Initialization.  (1) Split v operand into four 16-bit chunks and store them\nC as IEEE double in fp registers.  (2) Clear upper 32 bits of fp register pairs\nC f2 and f4.  (3) Store masks in registers aliased to `xffff' and `xffffffff'.\n\n\tsave\t%sp, -256, %sp\n\tmov\t-1, %g4\n\tsrlx\t%g4, 48, xffff\t\tC store mask in register `xffff'\n\tand\t%i3, xffff, %g2\n\tstx\t%g2, [%sp+2223+0]\n\tsrlx\t%i3, 16, %g3\n\tand\t%g3, xffff, %g3\n\tstx\t%g3, [%sp+2223+8]\n\tsrlx\t%i3, 32, %g2\n\tand\t%g2, xffff, %g2\n\tstx\t%g2, [%sp+2223+16]\n\tsrlx\t%i3, 48, %g3\n\tstx\t%g3, [%sp+2223+24]\n\tsrlx\t%g4, 32, xffffffff\tC store mask in register `xffffffff'\n\n\tsllx\t%i2, 3, %i2\n\tmov\t0, cy\t\t\tC clear cy\n\tadd\t%i0, %i2, %i0\n\tadd\t%i1, %i2, %i1\n\tneg\t%i2\n\tadd\t%i1, 4, %i5\n\tadd\t%i0, -32, %i4\n\tadd\t%i0, -16, %i0\n\n\tldd\t[%sp+2223+0], v00\n\tldd\t[%sp+2223+8], v16\n\tldd\t[%sp+2223+16], v32\n\tldd\t[%sp+2223+24], v48\n\tld\t[%sp+2223+0],%f2\tC zero f2\n\tld\t[%sp+2223+0],%f4\tC zero f4\n\tld\t[%i5+%i2], %f3\t\tC read low 32 bits of up[i]\n\tld\t[%i1+%i2], %f5\t\tC read high 32 bits of up[i]\n\tfxtod\tv00, v00\n\tfxtod\tv16, v16\n\tfxtod\tv32, v32\n\tfxtod\tv48, v48\n\nC Start real work.  (We sneakingly read f3 and f5 above...)\nC The software pipeline is very deep, requiring 4 feed-in stages.\n\n\tfxtod\t%f2, u00\n\tfxtod\t%f4, u32\n\tfmuld\tu00, v00, a00\n\tfmuld\tu00, v16, a16\n\tfmuld\tu00, v32, p32\n\tfmuld\tu32, v00, r32\n\tfmuld\tu00, v48, p48\n\taddcc\t%i2, 8, %i2\n\tbnz,pt\t%icc, .L_two_or_more\n\tfmuld\tu32, v16, r48\n\n.L_one:\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tfdtox\ta00, a00\n\tfaddd\tp48, r48, a48\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tfdtox\ta32, a32\n\tfdtox\ta48, a48\n\tstd\ta00, [%sp+2223+0]\n\tstd\ta16, [%sp+2223+8]\n\tstd\ta32, [%sp+2223+16]\n\tstd\ta48, [%sp+2223+24]\n\tadd\t%i2, 8, %i2\n\n\tfdtox\tr64, a00\n\tldx\t[%i0+%i2], rlimb\tC read rp[i]\n\tfdtox\tr80, a16\n\tldx\t[%sp+2223+0], i00\n\tldx\t[%sp+2223+8], i16\n\tldx\t[%sp+2223+16], i32\n\tldx\t[%sp+2223+24], i48\n\tstd\ta00, [%sp+2223+0]\n\tstd\ta16, [%sp+2223+8]\n\tadd\t%i2, 8, %i2\n\n\tsrlx\trlimb, 32, %g4\t\tC HI(rlimb)\n\tand\trlimb, xffffffff, %g5\tC LO(rlimb)\n\tadd\ti00, %g5, %g5\t\tC i00+ now in g5\n\tldx\t[%sp+2223+0], i00\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tadd\ti32, %g4, %g4\t\tC i32+ now in g4\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\tb\t.L_out_1\n\tadd\t%i2, 8, %i2\n\n.L_two_or_more:\n\tld\t[%i5+%i2], %f3\t\tC read low 32 bits of up[i]\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tld\t[%i1+%i2], %f5\t\tC read high 32 bits of up[i]\n\tfdtox\ta00, a00\n\tfaddd\tp48, r48, a48\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tfdtox\ta32, a32\n\tfxtod\t%f2, u00\n\tfxtod\t%f4, u32\n\tfdtox\ta48, a48\n\tstd\ta00, [%sp+2223+0]\n\tfmuld\tu00, v00, p00\n\tstd\ta16, [%sp+2223+8]\n\tfmuld\tu00, v16, p16\n\tstd\ta32, [%sp+2223+16]\n\tfmuld\tu00, v32, p32\n\tstd\ta48, [%sp+2223+24]\n\tfaddd\tp00, r64, a00\n\tfmuld\tu32, v00, r32\n\tfaddd\tp16, r80, a16\n\tfmuld\tu00, v48, p48\n\taddcc\t%i2, 8, %i2\n\tbnz,pt\t%icc, .L_three_or_more\n\tfmuld\tu32, v16, r48\n\n.L_two:\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tfdtox\ta00, a00\n\tldx\t[%i0+%i2], rlimb\tC read rp[i]\n\tfaddd\tp48, r48, a48\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tldx\t[%sp+2223+0], i00\n\tfdtox\ta32, a32\n\tldx\t[%sp+2223+8], i16\n\tldx\t[%sp+2223+16], i32\n\tldx\t[%sp+2223+24], i48\n\tfdtox\ta48, a48\n\tstd\ta00, [%sp+2223+0]\n\tstd\ta16, [%sp+2223+8]\n\tstd\ta32, [%sp+2223+16]\n\tstd\ta48, [%sp+2223+24]\n\tadd\t%i2, 8, %i2\n\n\tfdtox\tr64, a00\n\tsrlx\trlimb, 32, %g4\t\tC HI(rlimb)\n\tand\trlimb, xffffffff, %g5\tC LO(rlimb)\n\tldx\t[%i0+%i2], rlimb\tC read rp[i]\n\tadd\ti00, %g5, %g5\t\tC i00+ now in g5\n\tfdtox\tr80, a16\n\tldx\t[%sp+2223+0], i00\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tadd\ti32, %g4, %g4\t\tC i32+ now in g4\n\tldx\t[%sp+2223+16], i32\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tldx\t[%sp+2223+24], i48\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\tb\t.L_out_2\n\tadd\t%i2, 8, %i2\n\n.L_three_or_more:\n\tld\t[%i5+%i2], %f3\t\tC read low 32 bits of up[i]\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tld\t[%i1+%i2], %f5\t\tC read high 32 bits of up[i]\n\tfdtox\ta00, a00\n\tldx\t[%i0+%i2], rlimb\tC read rp[i]\n\tfaddd\tp48, r48, a48\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tldx\t[%sp+2223+0], i00\n\tfdtox\ta32, a32\n\tldx\t[%sp+2223+8], i16\n\tfxtod\t%f2, u00\n\tldx\t[%sp+2223+16], i32\n\tfxtod\t%f4, u32\n\tldx\t[%sp+2223+24], i48\n\tfdtox\ta48, a48\n\tstd\ta00, [%sp+2223+0]\n\tfmuld\tu00, v00, p00\n\tstd\ta16, [%sp+2223+8]\n\tfmuld\tu00, v16, p16\n\tstd\ta32, [%sp+2223+16]\n\tfmuld\tu00, v32, p32\n\tstd\ta48, [%sp+2223+24]\n\tfaddd\tp00, r64, a00\n\tfmuld\tu32, v00, r32\n\tfaddd\tp16, r80, a16\n\tfmuld\tu00, v48, p48\n\taddcc\t%i2, 8, %i2\n\tbnz,pt\t%icc, .L_four_or_more\n\tfmuld\tu32, v16, r48\n\n.L_three:\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tfdtox\ta00, a00\n\tsrlx\trlimb, 32, %g4\t\tC HI(rlimb)\n\tand\trlimb, xffffffff, %g5\tC LO(rlimb)\n\tldx\t[%i0+%i2], rlimb\tC read rp[i]\n\tfaddd\tp48, r48, a48\n\tadd\ti00, %g5, %g5\t\tC i00+ now in g5\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tldx\t[%sp+2223+0], i00\n\tfdtox\ta32, a32\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tadd\ti32, %g4, %g4\t\tC i32+ now in g4\n\tldx\t[%sp+2223+16], i32\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tldx\t[%sp+2223+24], i48\n\tfdtox\ta48, a48\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta32, [%sp+2223+16]\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta48, [%sp+2223+24]\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\tb\t.L_out_3\n\tadd\t%i2, 8, %i2\n\n.L_four_or_more:\n\tld\t[%i5+%i2], %f3\t\tC read low 32 bits of up[i]\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tld\t[%i1+%i2], %f5\t\tC read high 32 bits of up[i]\n\tfdtox\ta00, a00\n\tsrlx\trlimb, 32, %g4\t\tC HI(rlimb)\n\tand\trlimb, xffffffff, %g5\tC LO(rlimb)\n\tldx\t[%i0+%i2], rlimb\tC read rp[i]\n\tfaddd\tp48, r48, a48\n\tadd\ti00, %g5, %g5\t\tC i00+ now in g5\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tldx\t[%sp+2223+0], i00\n\tfdtox\ta32, a32\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tfxtod\t%f2, u00\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tadd\ti32, %g4, %g4\t\tC i32+ now in g4\n\tldx\t[%sp+2223+16], i32\n\tfxtod\t%f4, u32\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tldx\t[%sp+2223+24], i48\n\tfdtox\ta48, a48\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tfmuld\tu00, v00, p00\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tfmuld\tu00, v16, p16\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta32, [%sp+2223+16]\n\tfmuld\tu00, v32, p32\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta48, [%sp+2223+24]\n\tfaddd\tp00, r64, a00\n\tfmuld\tu32, v00, r32\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tfaddd\tp16, r80, a16\n\tfmuld\tu00, v48, p48\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\taddcc\t%i2, 8, %i2\n\tbnz,pt\t%icc, .Loop\n\tfmuld\tu32, v16, r48\n\n.L_four:\n\tb,a\t.L_out_4\n\nC BEGIN MAIN LOOP\n\t.align\t16\n.Loop:\nC 00\n\tsrlx\t%o4, 16, %o5\t\tC (x >> 16)\n\tld\t[%i5+%i2], %f3\t\tC read low 32 bits of up[i]\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\nC 01\n\tadd\t%o5, %o2, %o2\t\tC mi64 in %o2   2nd ASSIGNMENT\n\tand\t%o4, xffff, %o5\t\tC (x & 0xffff)\n\tld\t[%i1+%i2], %f5\t\tC read high 32 bits of up[i]\n\tfdtox\ta00, a00\nC 02\n\tsrlx\trlimb, 32, %g4\t\tC HI(rlimb)\n\tand\trlimb, xffffffff, %g5\tC LO(rlimb)\n\tldx\t[%i0+%i2], rlimb\tC read rp[i]\n\tfaddd\tp48, r48, a48\nC 03\n\tsrlx\t%o2, 48, %o7\t\tC (mi64 >> 48)\n\tadd\ti00, %g5, %g5\t\tC i00+ now in g5\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\nC 04\n\tsllx\t%o2, 16, %i3\t\tC (mi64 << 16)\n\tadd\t%o7, %o1, cy\t\tC new cy\n\tldx\t[%sp+2223+0], i00\n\tfdtox\ta32, a32\nC 05\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tfxtod\t%f2, u00\nC 06\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tadd\ti32, %g4, %g4\t\tC i32+ now in g4\n\tldx\t[%sp+2223+16], i32\n\tfxtod\t%f4, u32\nC 07\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tor\t%i3, %o5, %o5\n\tldx\t[%sp+2223+24], i48\n\tfdtox\ta48, a48\nC 08\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tfmuld\tu00, v00, p00\nC 09\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tfmuld\tu00, v16, p16\nC 10\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta32, [%sp+2223+16]\n\tfmuld\tu00, v32, p32\nC 11\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta48, [%sp+2223+24]\n\tfaddd\tp00, r64, a00\n\tfmuld\tu32, v00, r32\nC 12\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tstx\t%o5, [%i4+%i2]\n\tfaddd\tp16, r80, a16\n\tfmuld\tu00, v48, p48\nC 13\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\taddcc\t%i2, 8, %i2\n\tbnz,pt\t%icc, .Loop\n\tfmuld\tu32, v16, r48\nC END MAIN LOOP\n\n.L_out_4:\n\tsrlx\t%o4, 16, %o5\t\tC (x >> 16)\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tadd\t%o5, %o2, %o2\t\tC mi64 in %o2   2nd ASSIGNMENT\n\tand\t%o4, xffff, %o5\t\tC (x & 0xffff)\n\tfdtox\ta00, a00\n\tsrlx\trlimb, 32, %g4\t\tC HI(rlimb)\n\tand\trlimb, xffffffff, %g5\tC LO(rlimb)\n\tldx\t[%i0+%i2], rlimb\tC read rp[i]\n\tfaddd\tp48, r48, a48\n\tsrlx\t%o2, 48, %o7\t\tC (mi64 >> 48)\n\tadd\ti00, %g5, %g5\t\tC i00+ now in g5\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tsllx\t%o2, 16, %i3\t\tC (mi64 << 16)\n\tadd\t%o7, %o1, cy\t\tC new cy\n\tldx\t[%sp+2223+0], i00\n\tfdtox\ta32, a32\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tadd\ti32, %g4, %g4\t\tC i32+ now in g4\n\tldx\t[%sp+2223+16], i32\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tor\t%i3, %o5, %o5\n\tldx\t[%sp+2223+24], i48\n\tfdtox\ta48, a48\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta32, [%sp+2223+16]\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta48, [%sp+2223+24]\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tstx\t%o5, [%i4+%i2]\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\tadd\t%i2, 8, %i2\n.L_out_3:\n\tsrlx\t%o4, 16, %o5\t\tC (x >> 16)\n\tadd\t%o5, %o2, %o2\t\tC mi64 in %o2   2nd ASSIGNMENT\n\tand\t%o4, xffff, %o5\t\tC (x & 0xffff)\n\tfdtox\tr64, a00\n\tsrlx\trlimb, 32, %g4\t\tC HI(rlimb)\n\tand\trlimb, xffffffff, %g5\tC LO(rlimb)\n\tldx\t[%i0+%i2], rlimb\tC read rp[i]\n\tsrlx\t%o2, 48, %o7\t\tC (mi64 >> 48)\n\tadd\ti00, %g5, %g5\t\tC i00+ now in g5\n\tfdtox\tr80, a16\n\tsllx\t%o2, 16, %i3\t\tC (mi64 << 16)\n\tadd\t%o7, %o1, cy\t\tC new cy\n\tldx\t[%sp+2223+0], i00\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tadd\ti32, %g4, %g4\t\tC i32+ now in g4\n\tldx\t[%sp+2223+16], i32\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tor\t%i3, %o5, %o5\n\tldx\t[%sp+2223+24], i48\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tstx\t%o5, [%i4+%i2]\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\tadd\t%i2, 8, %i2\n.L_out_2:\n\tsrlx\t%o4, 16, %o5\t\tC (x >> 16)\n\tadd\t%o5, %o2, %o2\t\tC mi64 in %o2   2nd ASSIGNMENT\n\tand\t%o4, xffff, %o5\t\tC (x & 0xffff)\n\tsrlx\trlimb, 32, %g4\t\tC HI(rlimb)\n\tand\trlimb, xffffffff, %g5\tC LO(rlimb)\n\tsrlx\t%o2, 48, %o7\t\tC (mi64 >> 48)\n\tadd\ti00, %g5, %g5\t\tC i00+ now in g5\n\tsllx\t%o2, 16, %i3\t\tC (mi64 << 16)\n\tadd\t%o7, %o1, cy\t\tC new cy\n\tldx\t[%sp+2223+0], i00\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tadd\ti32, %g4, %g4\t\tC i32+ now in g4\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tor\t%i3, %o5, %o5\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tstx\t%o5, [%i4+%i2]\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\tadd\t%i2, 8, %i2\n.L_out_1:\n\tsrlx\t%o4, 16, %o5\t\tC (x >> 16)\n\tadd\t%o5, %o2, %o2\t\tC mi64 in %o2   2nd ASSIGNMENT\n\tand\t%o4, xffff, %o5\t\tC (x & 0xffff)\n\tsrlx\t%o2, 48, %o7\t\tC (mi64 >> 48)\n\tsllx\t%o2, 16, %i3\t\tC (mi64 << 16)\n\tadd\t%o7, %o1, cy\t\tC new cy\n\tor\t%i3, %o5, %o5\n\tstx\t%o5, [%i4+%i2]\n\n\tsllx\ti00, 0, %g2\n\tadd\t%g2, cy, cy\n\tsllx\ti16, 16, %g3\n\tadd\t%g3, cy, cy\n\n\treturn\t%i7+8\n\tmov\tcy, %o0\nEPILOGUE(mpn_addmul_1)\n"
  },
  {
    "path": "mpn/sparc64/addmul_2.asm",
    "content": "dnl  SPARC v9 64-bit mpn_addmul_2 -- Multiply an n limb number with 2-limb\ndnl  number and add the result to a n limb vector.\n\ndnl  Copyright 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC                  cycles/limb\nC UltraSPARC 1&2:      9\nC UltraSPARC 3:       10\n\nC Algorithm: We use 16 floating-point multiplies per limb product, with the\nC 2-limb v operand split into eight 16-bit pieces, and the n-limb u operand\nC split into 32-bit pieces.  We sum four 48-bit partial products using\nC floating-point add, then convert the resulting four 50-bit quantities and\nC transfer them to the integer unit.\n\nC Possible optimizations:\nC   1. Align the stack area where we transfer the four 50-bit product-sums\nC      to a 32-byte boundary.  That would minimize the cache collision.\nC      (UltraSPARC-1/2 use a direct-mapped cache.)  (Perhaps even better would\nC      be to align the area to map to the area immediately before up?)\nC   2. Perform two of the fp->int conversions with integer instructions.  We\nC      can get almost ten free IEU slots, if we clean up bookkeeping and the\nC      silly carry-limb code.\nC   3. For an mpn_addmul_1 based on this, we need to fix the silly carry-limb\nC      code.\n\nC OSP (Overlapping software pipeline) version of mpn_mul_basecase:\nC Operand swap will require 8 LDDA and 8 FXTOD, which will mean 8 cycles.\nC FI\t= 20\nC L\t=  9 x un * vn\nC WDFI\t= 10 x vn / 2\nC WD\t= 4\n\nC Instruction classification (as per UltraSPARC functional units).\nC Assuming silly carry code is fixed.  Includes bookkeeping.\nC\nC               mpn_addmul_X     mpn_mul_X\nC                1       2       1       2\nC               ==========      ==========\nC      FM        8      16       8      16\nC      FA       10      18      10      18\nC     MEM       12      12      10      10\nC  ISHIFT        6       6       6       6\nC IADDLOG       11      11      10      10\nC  BRANCH        1       1       1       1\nC\nC TOTAL IEU     17      17      16      16\nC TOTAL         48      64      45      61\nC\nC IEU cycles     8.5     8.5     8       8\nC MEM cycles    12      12      10      10\nC ISSUE cycles  12      16      11.25   15.25\nC FPU cycles    10      18      10      18\nC cycles/loop   12      18      12      18\nC cycles/limb   12       9      12       9\n\n\nC INPUT PARAMETERS\nC rp[n + 1]\ti0\nC up[n]\t\ti1\nC n\t\ti2\nC vp[2]\t\ti3\n\n\nASM_START()\n\tREGISTER(%g2,#scratch)\n\tREGISTER(%g3,#scratch)\n\nC Combine registers:\nC u00_hi= u32_hi\nC u00_lo= u32_lo\nC a000  = out000\nC a016  = out016\nC Free: f52 f54\n\n\ndefine(`p000', `%f8')  define(`p016',`%f10')\ndefine(`p032',`%f12')  define(`p048',`%f14')\ndefine(`p064',`%f16')  define(`p080',`%f18')\ndefine(`p096a',`%f20') define(`p112a',`%f22')\ndefine(`p096b',`%f56') define(`p112b',`%f58')\n\ndefine(`out000',`%f0') define(`out016',`%f6')\n\ndefine(`v000',`%f24')  define(`v016',`%f26')\ndefine(`v032',`%f28')  define(`v048',`%f30')\ndefine(`v064',`%f44')  define(`v080',`%f46')\ndefine(`v096',`%f48')  define(`v112',`%f50')\n\ndefine(`u00',`%f32')   define(`u32', `%f34')\n\ndefine(`a000',`%f36')  define(`a016',`%f38')\ndefine(`a032',`%f40')  define(`a048',`%f42')\ndefine(`a064',`%f60')  define(`a080',`%f62')\n\ndefine(`u00_hi',`%f2') define(`u32_hi',`%f4')\ndefine(`u00_lo',`%f3') define(`u32_lo',`%f5')\n\ndefine(`cy',`%g1')\ndefine(`rlimb',`%g3')\ndefine(`i00',`%l0')    define(`i16',`%l1')\ndefine(`r00',`%l2')    define(`r32',`%l3')\ndefine(`xffffffff',`%l7')\ndefine(`xffff',`%o0')\n\n\nPROLOGUE(mpn_addmul_2)\n\nC Initialization.  (1) Split v operand into eight 16-bit chunks and store them\nC as IEEE double in fp registers.  (2) Clear upper 32 bits of fp register pairs\nC f2 and f4.  (3) Store masks in registers aliased to `xffff' and `xffffffff'.\nC This code could be better scheduled.\n\n\tsave\t%sp, -256, %sp\n\nifdef(`HAVE_VIS',\n`\tmov\t-1, %g4\n\twr\t%g0, 0xD2, %asi\n\tsrlx\t%g4, 32, xffffffff\tC store mask in register `xffffffff'\n\tldda\t[%i3+6] %asi, v000\n\tldda\t[%i3+4] %asi, v016\n\tldda\t[%i3+2] %asi, v032\n\tldda\t[%i3+0] %asi, v048\n\tfxtod\tv000, v000\n\tldda\t[%i3+14] %asi, v064\n\tfxtod\tv016, v016\n\tldda\t[%i3+12] %asi, v080\n\tfxtod\tv032, v032\n\tldda\t[%i3+10] %asi, v096\n\tfxtod\tv048, v048\n\tldda\t[%i3+8] %asi, v112\n\tfxtod\tv064, v064\n\tfxtod\tv080, v080\n\tfxtod\tv096, v096\n\tfxtod\tv112, v112\n\tfzero\tu00_hi\n\tfzero\tu32_hi\n',\n`\tmov\t-1, %g4\n\tldx\t[%i3+0], %l0\t\tC vp[0]\n\tsrlx\t%g4, 48, xffff\t\tC store mask in register `xffff'\n\tldx\t[%i3+8], %l1\t\tC vp[1]\n\n\tand\t%l0, xffff, %g2\n\tstx\t%g2, [%sp+2223+0]\n\tsrlx\t%l0, 16, %g3\n\tand\t%g3, xffff, %g3\n\tstx\t%g3, [%sp+2223+8]\n\tsrlx\t%l0, 32, %g2\n\tand\t%g2, xffff, %g2\n\tstx\t%g2, [%sp+2223+16]\n\tsrlx\t%l0, 48, %g3\n\tstx\t%g3, [%sp+2223+24]\n\tand\t%l1, xffff, %g2\n\tstx\t%g2, [%sp+2223+32]\n\tsrlx\t%l1, 16, %g3\n\tand\t%g3, xffff, %g3\n\tstx\t%g3, [%sp+2223+40]\n\tsrlx\t%l1, 32, %g2\n\tand\t%g2, xffff, %g2\n\tstx\t%g2, [%sp+2223+48]\n\tsrlx\t%l1, 48, %g3\n\tstx\t%g3, [%sp+2223+56]\n\n\tsrlx\t%g4, 32, xffffffff\tC store mask in register `xffffffff'\n\n\tldd\t[%sp+2223+0], v000\n\tldd\t[%sp+2223+8], v016\n\tldd\t[%sp+2223+16], v032\n\tldd\t[%sp+2223+24], v048\n\tfxtod\tv000, v000\n\tldd\t[%sp+2223+32], v064\n\tfxtod\tv016, v016\n\tldd\t[%sp+2223+40], v080\n\tfxtod\tv032, v032\n\tldd\t[%sp+2223+48], v096\n\tfxtod\tv048, v048\n\tldd\t[%sp+2223+56], v112\n\tfxtod\tv064, v064\n\tld\t[%sp+2223+0], u00_hi\tC zero u00_hi\n\tfxtod\tv080, v080\n\tld\t[%sp+2223+0], u32_hi\tC zero u32_hi\n\tfxtod\tv096, v096\n\tfxtod\tv112, v112\n')\nC Initialization done.\n\tmov\t0, %g2\n\tmov\t0, rlimb\n\tmov\t0, %g4\n\tadd\t%i0, -8, %i0\t\tC BOOKKEEPING\n\nC Start software pipeline.\n\n\tld\t[%i1+4], u00_lo\t\tC read low 32 bits of up[i]\n\tfxtod\tu00_hi, u00\nC mid\n\tld\t[%i1+0], u32_lo\t\tC read high 32 bits of up[i]\n\tfmuld\tu00, v000, a000\n\tfmuld\tu00, v016, a016\n\tfmuld\tu00, v032, a032\n\tfmuld\tu00, v048, a048\n\tadd\t%i2, -1, %i2\t\tC BOOKKEEPING\n\tfmuld\tu00, v064, p064\n\tadd\t%i1, 8, %i1\t\tC BOOKKEEPING\n\tfxtod\tu32_hi, u32\n\tfmuld\tu00, v080, p080\n\tfmuld\tu00, v096, p096a\n\tbrnz,pt\t%i2, .L_2_or_more\n\t fmuld\tu00, v112, p112a\n\n.L1:\tfdtox\ta000, out000\n\tfmuld\tu32, v000, p000\n\tfdtox\ta016, out016\n\tfmuld\tu32, v016, p016\n\tfmovd\tp064, a064\n\tfmuld\tu32, v032, p032\n\tfmovd\tp080, a080\n\tfmuld\tu32, v048, p048\n\tstd\tout000, [%sp+2223+16]\n\tfaddd\tp000, a032, a000\n\tfmuld\tu32, v064, p064\n\tstd\tout016, [%sp+2223+24]\n\tfxtod\tu00_hi, u00\n\tfaddd\tp016, a048, a016\n\tfmuld\tu32, v080, p080\n\tfaddd\tp032, a064, a032\n\tfmuld\tu32, v096, p096b\n\tfaddd\tp048, a080, a048\n\tfmuld\tu32, v112, p112b\nC mid\n\tfdtox\ta000, out000\n\tfdtox\ta016, out016\n\tfaddd\tp064, p096a, a064\n\tfaddd\tp080, p112a, a080\n\tstd\tout000, [%sp+2223+0]\n\tb\t.L_wd2\n\t std\tout016, [%sp+2223+8]\n\n.L_2_or_more:\n\tld\t[%i1+4], u00_lo\t\tC read low 32 bits of up[i]\n\tfdtox\ta000, out000\n\tfmuld\tu32, v000, p000\n\tfdtox\ta016, out016\n\tfmuld\tu32, v016, p016\n\tfmovd\tp064, a064\n\tfmuld\tu32, v032, p032\n\tfmovd\tp080, a080\n\tfmuld\tu32, v048, p048\n\tstd\tout000, [%sp+2223+16]\n\tfaddd\tp000, a032, a000\n\tfmuld\tu32, v064, p064\n\tstd\tout016, [%sp+2223+24]\n\tfxtod\tu00_hi, u00\n\tfaddd\tp016, a048, a016\n\tfmuld\tu32, v080, p080\n\tfaddd\tp032, a064, a032\n\tfmuld\tu32, v096, p096b\n\tfaddd\tp048, a080, a048\n\tfmuld\tu32, v112, p112b\nC mid\n\tld\t[%i1+0], u32_lo\t\tC read high 32 bits of up[i]\n\tfdtox\ta000, out000\n\tfmuld\tu00, v000, p000\n\tfdtox\ta016, out016\n\tfmuld\tu00, v016, p016\n\tfaddd\tp064, p096a, a064\n\tfmuld\tu00, v032, p032\n\tfaddd\tp080, p112a, a080\n\tfmuld\tu00, v048, p048\n\tadd\t%i2, -1, %i2\t\tC BOOKKEEPING\n\tstd\tout000, [%sp+2223+0]\n\tfaddd\tp000, a032, a000\n\tfmuld\tu00, v064, p064\n\tadd\t%i1, 8, %i1\t\tC BOOKKEEPING\n\tstd\tout016, [%sp+2223+8]\n\tfxtod\tu32_hi, u32\n\tfaddd\tp016, a048, a016\n\tfmuld\tu00, v080, p080\n\tfaddd\tp032, a064, a032\n\tfmuld\tu00, v096, p096a\n\tfaddd\tp048, a080, a048\n\tbrnz,pt\t%i2, .L_3_or_more\n\t fmuld\tu00, v112, p112a\n\n\tb\t.Lend\n\t nop\n\nC  64      32       0\nC   .       .       .\nC   .       |__rXXX_|\t32\nC   .      |___cy___|\t34\nC   .  |_______i00__|\t50\nC  |_______i16__|   .\t50\n\n\nC BEGIN MAIN LOOP\n\t.align\t16\n.L_3_or_more:\n.Loop:\tld\t[%i1+4], u00_lo\t\tC read low 32 bits of up[i]\n\tand\t%g2, xffffffff, %g2\n\tfdtox\ta000, out000\n\tfmuld\tu32, v000, p000\nC\n\tlduw\t[%i0+4+8], r00\t\tC read low 32 bits of rp[i]\n\tadd\t%g2, rlimb, %l5\n\tfdtox\ta016, out016\n\tfmuld\tu32, v016, p016\nC\n\tsrlx\t%l5, 32, cy\n\tldx\t[%sp+2223+16], i00\n\tfaddd\tp064, p096b, a064\n\tfmuld\tu32, v032, p032\nC\n\tadd\t%g4, cy, cy\t\tC new cy\n\tldx\t[%sp+2223+24], i16\n\tfaddd\tp080, p112b, a080\n\tfmuld\tu32, v048, p048\nC\n\tnop\n\tstd\tout000, [%sp+2223+16]\n\tfaddd\tp000, a032, a000\n\tfmuld\tu32, v064, p064\nC\n\tadd\ti00, r00, rlimb\n\tadd\t%i0, 8, %i0\t\tC BOOKKEEPING\n\tstd\tout016, [%sp+2223+24]\n\tfxtod\tu00_hi, u00\nC\n\tsllx\ti16, 16, %g2\n\tadd\tcy, rlimb, rlimb\n\tfaddd\tp016, a048, a016\n\tfmuld\tu32, v080, p080\nC\n\tsrlx\ti16, 16, %g4\n\tadd\t%g2, rlimb, %l5\n\tfaddd\tp032, a064, a032\n\tfmuld\tu32, v096, p096b\nC\n\tstw\t%l5, [%i0+4]\n\tnop\n\tfaddd\tp048, a080, a048\n\tfmuld\tu32, v112, p112b\nC midloop\n\tld\t[%i1+0], u32_lo\t\tC read high 32 bits of up[i]\n\tand\t%g2, xffffffff, %g2\n\tfdtox\ta000, out000\n\tfmuld\tu00, v000, p000\nC\n\tlduw\t[%i0+0], r32\t\tC read high 32 bits of rp[i]\n\tadd\t%g2, rlimb, %l5\n\tfdtox\ta016, out016\n\tfmuld\tu00, v016, p016\nC\n\tsrlx\t%l5, 32, cy\n\tldx\t[%sp+2223+0], i00\n\tfaddd\tp064, p096a, a064\n\tfmuld\tu00, v032, p032\nC\n\tadd\t%g4, cy, cy\t\tC new cy\n\tldx\t[%sp+2223+8], i16\n\tfaddd\tp080, p112a, a080\n\tfmuld\tu00, v048, p048\nC\n\tadd\t%i2, -1, %i2\t\tC BOOKKEEPING\n\tstd\tout000, [%sp+2223+0]\n\tfaddd\tp000, a032, a000\n\tfmuld\tu00, v064, p064\nC\n\tadd\ti00, r32, rlimb\n\tadd\t%i1, 8, %i1\t\tC BOOKKEEPING\n\tstd\tout016, [%sp+2223+8]\n\tfxtod\tu32_hi, u32\nC\n\tsllx\ti16, 16, %g2\n\tadd\tcy, rlimb, rlimb\n\tfaddd\tp016, a048, a016\n\tfmuld\tu00, v080, p080\nC\n\tsrlx\ti16, 16, %g4\n\tadd\t%g2, rlimb, %l5\n\tfaddd\tp032, a064, a032\n\tfmuld\tu00, v096, p096a\nC\n\tstw\t%l5, [%i0+0]\n\tfaddd\tp048, a080, a048\n\tbrnz,pt\t%i2, .Loop\n\t fmuld\tu00, v112, p112a\nC END MAIN LOOP\n\nC WIND-DOWN PHASE 1\n.Lend:\tand\t%g2, xffffffff, %g2\n\tfdtox\ta000, out000\n\tfmuld\tu32, v000, p000\n\tlduw\t[%i0+4+8], r00\t\tC read low 32 bits of rp[i]\n\tadd\t%g2, rlimb, %l5\n\tfdtox\ta016, out016\n\tfmuld\tu32, v016, p016\n\tsrlx\t%l5, 32, cy\n\tldx\t[%sp+2223+16], i00\n\tfaddd\tp064, p096b, a064\n\tfmuld\tu32, v032, p032\n\tadd\t%g4, cy, cy\t\tC new cy\n\tldx\t[%sp+2223+24], i16\n\tfaddd\tp080, p112b, a080\n\tfmuld\tu32, v048, p048\n\tstd\tout000, [%sp+2223+16]\n\tfaddd\tp000, a032, a000\n\tfmuld\tu32, v064, p064\n\tadd\ti00, r00, rlimb\n\tadd\t%i0, 8, %i0\t\tC BOOKKEEPING\n\tstd\tout016, [%sp+2223+24]\n\tsllx\ti16, 16, %g2\n\tadd\tcy, rlimb, rlimb\n\tfaddd\tp016, a048, a016\n\tfmuld\tu32, v080, p080\n\tsrlx\ti16, 16, %g4\n\tadd\t%g2, rlimb, %l5\n\tfaddd\tp032, a064, a032\n\tfmuld\tu32, v096, p096b\n\tstw\t%l5, [%i0+4]\n\tfaddd\tp048, a080, a048\n\tfmuld\tu32, v112, p112b\nC mid\n\tand\t%g2, xffffffff, %g2\n\tfdtox\ta000, out000\n\tlduw\t[%i0+0], r32\t\tC read high 32 bits of rp[i]\n\tadd\t%g2, rlimb, %l5\n\tfdtox\ta016, out016\n\tsrlx\t%l5, 32, cy\n\tldx\t[%sp+2223+0], i00\n\tfaddd\tp064, p096a, a064\n\tadd\t%g4, cy, cy\t\tC new cy\n\tldx\t[%sp+2223+8], i16\n\tfaddd\tp080, p112a, a080\n\tstd\tout000, [%sp+2223+0]\n\tadd\ti00, r32, rlimb\n\tstd\tout016, [%sp+2223+8]\n\tsllx\ti16, 16, %g2\n\tadd\tcy, rlimb, rlimb\n\tsrlx\ti16, 16, %g4\n\tadd\t%g2, rlimb, %l5\n\tstw\t%l5, [%i0+0]\n\nC WIND-DOWN PHASE 2\n.L_wd2:\tand\t%g2, xffffffff, %g2\n\tfdtox\ta032, out000\n\tlduw\t[%i0+4+8], r00\t\tC read low 32 bits of rp[i]\n\tadd\t%g2, rlimb, %l5\n\tfdtox\ta048, out016\n\tsrlx\t%l5, 32, cy\n\tldx\t[%sp+2223+16], i00\n\tadd\t%g4, cy, cy\t\tC new cy\n\tldx\t[%sp+2223+24], i16\n\tstd\tout000, [%sp+2223+16]\n\tadd\ti00, r00, rlimb\n\tadd\t%i0, 8, %i0\t\tC BOOKKEEPING\n\tstd\tout016, [%sp+2223+24]\n\tsllx\ti16, 16, %g2\n\tadd\tcy, rlimb, rlimb\n\tsrlx\ti16, 16, %g4\n\tadd\t%g2, rlimb, %l5\n\tstw\t%l5, [%i0+4]\nC mid\n\tand\t%g2, xffffffff, %g2\n\tfdtox\ta064, out000\n\tlduw\t[%i0+0], r32\t\tC read high 32 bits of rp[i]\n\tadd\t%g2, rlimb, %l5\n\tfdtox\ta080, out016\n\tsrlx\t%l5, 32, cy\n\tldx\t[%sp+2223+0], i00\n\tadd\t%g4, cy, cy\t\tC new cy\n\tldx\t[%sp+2223+8], i16\n\tstd\tout000, [%sp+2223+0]\n\tadd\ti00, r32, rlimb\n\tstd\tout016, [%sp+2223+8]\n\tsllx\ti16, 16, %g2\n\tadd\tcy, rlimb, rlimb\n\tsrlx\ti16, 16, %g4\n\tadd\t%g2, rlimb, %l5\n\tstw\t%l5, [%i0+0]\n\nC WIND-DOWN PHASE 3\n.L_wd3:\tand\t%g2, xffffffff, %g2\n\tfdtox\tp096b, out000\n\tadd\t%g2, rlimb, %l5\n\tfdtox\tp112b, out016\n\tsrlx\t%l5, 32, cy\n\tldx\t[%sp+2223+16], rlimb\n\tadd\t%g4, cy, cy\t\tC new cy\n\tldx\t[%sp+2223+24], i16\n\tstd\tout000, [%sp+2223+16]\n\tadd\t%i0, 8, %i0\t\tC BOOKKEEPING\n\tstd\tout016, [%sp+2223+24]\n\tsllx\ti16, 16, %g2\n\tadd\tcy, rlimb, rlimb\n\tsrlx\ti16, 16, %g4\n\tadd\t%g2, rlimb, %l5\n\tstw\t%l5, [%i0+4]\nC mid\n\tand\t%g2, xffffffff, %g2\n\tadd\t%g2, rlimb, %l5\n\tsrlx\t%l5, 32, cy\n\tldx\t[%sp+2223+0], rlimb\n\tadd\t%g4, cy, cy\t\tC new cy\n\tldx\t[%sp+2223+8], i16\n\tsllx\ti16, 16, %g2\n\tadd\tcy, rlimb, rlimb\n\tsrlx\ti16, 16, %g4\n\tadd\t%g2, rlimb, %l5\n\tstw\t%l5, [%i0+0]\n\n\tand\t%g2, xffffffff, %g2\n\tadd\t%g2, rlimb, %l5\n\tsrlx\t%l5, 32, cy\n\tldx\t[%sp+2223+16], i00\n\tadd\t%g4, cy, cy\t\tC new cy\n\tldx\t[%sp+2223+24], i16\n\n\tsllx\ti16, 16, %g2\n\tadd\ti00, cy, cy\n\treturn\t%i7+8\n\tadd\t%g2, cy, %o0\nEPILOGUE(mpn_addmul_2)\n"
  },
  {
    "path": "mpn/sparc64/copyd.asm",
    "content": "dnl  SPARC v9 mpn_copyd -- Copy a limb vector, decrementing.\n\ndnl  Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\t   cycles/limb\nC UltraSPARC 1&2:     2\nC UltraSPARC 3:\t      2.5\n\nC INPUT PARAMETERS\nC rptr\t%o0\nC sptr\t%o1\nC n\t%o2\n\nASM_START()\n\tREGISTER(%g2,#scratch)\n\tREGISTER(%g3,#scratch)\nPROLOGUE(mpn_copyd)\n\tsllx\t%o2,3,%g1\n\tadd\t%g1,%o0,%o0\n\tadd\t%g1,%o1,%o1\n\taddcc\t%o2,-8,%o2\n\tbl,pt\t%icc,L(end01234567)\n\tnop\nL(loop1):\n\tldx\t[%o1-8],%g1\n\tldx\t[%o1-16],%g2\n\tldx\t[%o1-24],%g3\n\tldx\t[%o1-32],%g4\n\tldx\t[%o1-40],%g5\n\tldx\t[%o1-48],%o3\n\tldx\t[%o1-56],%o4\n\tldx\t[%o1-64],%o5\n\tadd\t%o1,-64,%o1\n\tstx\t%g1,[%o0-8]\n\tstx\t%g2,[%o0-16]\n\tstx\t%g3,[%o0-24]\n\tstx\t%g4,[%o0-32]\n\tstx\t%g5,[%o0-40]\n\tstx\t%o3,[%o0-48]\n\tstx\t%o4,[%o0-56]\n\tstx\t%o5,[%o0-64]\n\taddcc\t%o2,-8,%o2\n\tbge,pt\t%icc,L(loop1)\n\tadd\t%o0,-64,%o0\nL(end01234567):\n\taddcc\t%o2,8,%o2\n\tbz,pn\t%icc,L(end)\n\tnop\nL(loop2):\n\tldx\t[%o1-8],%g1\n\tadd\t%o1,-8,%o1\n\taddcc\t%o2,-1,%o2\n\tstx\t%g1,[%o0-8]\n\tbg,pt\t%icc,L(loop2)\n\tadd\t%o0,-8,%o0\nL(end):\tretl\n\tnop\nEPILOGUE(mpn_copyd)\n"
  },
  {
    "path": "mpn/sparc64/copyi.asm",
    "content": "dnl  SPARC v9 mpn_copyi -- Copy a limb vector, incrementing.\n\ndnl  Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\t   cycles/limb\nC UltraSPARC 1&2:     2\nC UltraSPARC 3:\t      2.5\n\nC INPUT PARAMETERS\nC rptr\t%o0\nC sptr\t%o1\nC n\t%o2\n\nASM_START()\n\tREGISTER(%g2,#scratch)\n\tREGISTER(%g3,#scratch)\nPROLOGUE(mpn_copyi)\n\taddcc\t%o2,-8,%o2\n\tbl,pt\t%icc,L(end01234567)\n\tnop\nL(loop1):\n\tldx\t[%o1+0],%g1\n\tldx\t[%o1+8],%g2\n\tldx\t[%o1+16],%g3\n\tldx\t[%o1+24],%g4\n\tldx\t[%o1+32],%g5\n\tldx\t[%o1+40],%o3\n\tldx\t[%o1+48],%o4\n\tldx\t[%o1+56],%o5\n\tadd\t%o1,64,%o1\n\tstx\t%g1,[%o0+0]\n\tstx\t%g2,[%o0+8]\n\tstx\t%g3,[%o0+16]\n\tstx\t%g4,[%o0+24]\n\tstx\t%g5,[%o0+32]\n\tstx\t%o3,[%o0+40]\n\tstx\t%o4,[%o0+48]\n\tstx\t%o5,[%o0+56]\n\taddcc\t%o2,-8,%o2\n\tbge,pt\t%icc,L(loop1)\n\tadd\t%o0,64,%o0\nL(end01234567):\n\taddcc\t%o2,8,%o2\n\tbz,pn\t%icc,L(end)\n\tnop\nL(loop2):\n\tldx\t[%o1+0],%g1\n\tadd\t%o1,8,%o1\n\taddcc\t%o2,-1,%o2\n\tstx\t%g1,[%o0+0]\n\tbg,pt\t%icc,L(loop2)\n\tadd\t%o0,8,%o0\nL(end):\tretl\n\tnop\nEPILOGUE(mpn_copyi)\n"
  },
  {
    "path": "mpn/sparc64/divexact_1.c",
    "content": "/* UltraSPARC 64 mpn_divexact_1 -- mpn by limb exact division.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2000, 2001, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#include \"mpn/sparc64/sparc64.h\"\n\n\n/*                 64-bit divisor   32-bit divisor\n                    cycles/limb      cycles/limb\n                     (approx)         (approx)\n   Ultrasparc 2i:      110               70\n*/\n\n\n/* There are two key ideas here to reduce mulx's.  Firstly when the divisor\n   is 32-bits the high of q*d can be calculated without the two 32x32->64\n   cross-products involving the high 32-bits of the divisor, that being zero\n   of course.  Secondly umul_ppmm_lowequal and umul_ppmm_half_lowequal save\n   one mulx (each) knowing the low of q*d is equal to the input limb l.\n\n   For size==1, a simple udivx is used.  This is faster than calculating an\n   inverse.\n\n   For a 32-bit divisor and small sizes, an attempt was made at a simple\n   udivx loop (two per 64-bit limb), but it turned out to be slower than\n   mul-by-inverse.  At size==2 the inverse is about 260 cycles total\n   compared to a udivx at 291.  Perhaps the latter would suit when size==2\n   but the high 32-bits of the second limb is zero (saving one udivx), but\n   it doesn't seem worth a special case just for that.  */\n\nvoid\nmpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)\n{\n  mp_limb_t  inverse, s, s_next, c, l, ls, q;\n  unsigned   rshift, lshift;\n  mp_limb_t  lshift_mask;\n  mp_limb_t  divisor_h;\n\n  ASSERT (size >= 1);\n  ASSERT (divisor != 0);\n  ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));\n  ASSERT_MPN (src, size);\n  ASSERT_LIMB (divisor);\n\n  s = *src++;                 /* src low limb */\n  size--;\n  if (size == 0)\n    {\n      *dst = s / divisor;\n      return;\n    }\n\n  if ((divisor & 1) == 0)\n    {\n      count_trailing_zeros (rshift, divisor);\n      divisor >>= rshift;\n    }\n  else\n    rshift = 0;\n\n  modlimb_invert (inverse, divisor);\n\n  lshift = 64 - rshift;\n\n  /* lshift==64 means no shift, so must mask out other part in this case */\n  lshift_mask = (rshift == 0 ? 0 : MP_LIMB_T_MAX);\n\n  c = 0;\n  divisor_h = HIGH32 (divisor);\n\n  if (divisor_h == 0)\n    {\n      /* 32-bit divisor */\n      do\n        {\n          s_next = *src++;\n          ls = (s >> rshift) | ((s_next << lshift) & lshift_mask);\n          s = s_next;\n\n          SUBC_LIMB (c, l, ls, c);\n\n          q = l * inverse;\n          *dst++ = q;\n\n          umul_ppmm_half_lowequal (l, q, divisor, l);\n          c += l;\n\n          size--;\n        }\n      while (size != 0);\n\n      ls = s >> rshift;\n      l = ls - c;\n      q = l * inverse;\n      *dst = q;\n    }\n  else\n    {\n      /* 64-bit divisor */\n      mp_limb_t  divisor_l = LOW32 (divisor);\n      do\n        {\n          s_next = *src++;\n          ls = (s >> rshift) | ((s_next << lshift) & lshift_mask);\n          s = s_next;\n\n          SUBC_LIMB (c, l, ls, c);\n\n          q = l * inverse;\n          *dst++ = q;\n\n          umul_ppmm_lowequal (l, q, divisor, divisor_h, divisor_l, l);\n          c += l;\n\n          size--;\n        }\n      while (size != 0);\n\n      ls = s >> rshift;\n      l = ls - c;\n      q = l * inverse;\n      *dst = q;\n    }\n}\n"
  },
  {
    "path": "mpn/sparc64/divrem_1.c",
    "content": "/* UltraSparc 64 mpn_divrem_1 -- mpn by limb division.\n\nCopyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2001, 2003 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#include \"mpn/sparc64/sparc64.h\"\n\n\n/*                   64-bit divisor       32-bit divisor\n                       cycles/limb          cycles/limb\n                        (approx)             (approx)\n                   integer  fraction    integer  fraction\n   Ultrasparc 2i:    160      160          122      96\n*/\n\n\n/* 32-bit divisors are treated in special case code.  This requires 4 mulx\n   per limb instead of 8 in the general case.\n\n   For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]\n   addressing, to get the two halves of each limb read in the correct order.\n   This is kept in an adj variable.  Doing that measures about 4 c/l faster\n   than just writing HALF_ENDIAN_ADJ(i) in the integer loop.  The latter\n   shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well\n   (on gcc 3.2.1 at least).  The fraction loop doesn't seem affected, but we\n   still use a variable since that ought to work out best.  */\n\nmp_limb_t\nmpn_divrem_1 (mp_ptr qp_limbptr, mp_size_t xsize_limbs,\n              mp_srcptr ap_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)\n{\n  mp_size_t  total_size_limbs;\n  mp_size_t  i;\n\n  ASSERT (xsize_limbs >= 0);\n  ASSERT (size_limbs >= 0);\n  ASSERT (d_limb != 0);\n  /* FIXME: What's the correct overlap rule when xsize!=0? */\n  ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr + xsize_limbs,\n                                  ap_limbptr, size_limbs));\n\n  total_size_limbs = size_limbs + xsize_limbs;\n  if (UNLIKELY (total_size_limbs == 0))\n    return 0;\n\n  /* udivx is good for total_size==1, and no need to bother checking\n     limb<divisor, since if that's likely the caller should check */\n  if (UNLIKELY (total_size_limbs == 1))\n    {\n      mp_limb_t  a, q;\n      a = (LIKELY (size_limbs != 0) ? ap_limbptr[0] : 0);\n      q = a / d_limb;\n      qp_limbptr[0] = q;\n      return a - q*d_limb;\n    }\n\n  if (d_limb <= CNST_LIMB(0xFFFFFFFF))\n    {\n      mp_size_t  size, xsize, total_size, adj;\n      unsigned   *qp, n1, n0, q, r, nshift, norm_rmask;\n      mp_limb_t  dinv_limb;\n      const unsigned *ap;\n      int        norm, norm_rshift;\n\n      size = 2 * size_limbs;\n      xsize = 2 * xsize_limbs;\n      total_size = size + xsize;\n\n      ap = (unsigned *) ap_limbptr;\n      qp = (unsigned *) qp_limbptr;\n\n      qp += xsize;\n      r = 0;        /* initial remainder */\n\n      if (LIKELY (size != 0))\n        {\n          n1 = ap[size-1 + HALF_ENDIAN_ADJ(1)];\n\n          /* If the length of the source is uniformly distributed, then\n             there's a 50% chance of the high 32-bits being zero, which we\n             can skip.  */\n          if (n1 == 0)\n            {\n              n1 = ap[size-2 + HALF_ENDIAN_ADJ(0)];\n              total_size--;\n              size--;\n              ASSERT (size > 0);  /* because always even */\n              qp[size + HALF_ENDIAN_ADJ(1)] = 0;\n            }\n\n          /* Skip a division if high < divisor (high quotient 0).  Testing\n             here before before normalizing will still skip as often as\n             possible.  */\n          if (n1 < d_limb)\n            {\n              r = n1;\n              size--;\n              qp[size + HALF_ENDIAN_ADJ(size)] = 0;\n              total_size--;\n              if (total_size == 0)\n                return r;\n            }\n        }\n\n      count_leading_zeros_32 (norm, d_limb);\n      norm -= 32;\n      d_limb <<= norm;\n      r <<= norm;\n\n      norm_rshift = 32 - norm;\n      norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);\n\n      invert_half_limb (dinv_limb, d_limb);\n\n      if (LIKELY (size != 0))\n        {\n          i = size - 1;\n          adj = HALF_ENDIAN_ADJ (i);\n          n1 = ap[i + adj];\n          adj = -adj;\n          r |= ((n1 >> norm_rshift) & norm_rmask);\n          for ( ; i > 0; i--)\n            {\n              n0 = ap[i-1 + adj];\n              adj = -adj;\n              nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);\n              udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);\n              qp[i + adj] = q;\n              n1 = n0;\n            }\n          nshift = n1 << norm;\n          udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);\n          qp[0 + HALF_ENDIAN_ADJ(0)] = q;\n        }\n      qp -= xsize;\n      adj = HALF_ENDIAN_ADJ (0);\n      for (i = xsize-1; i >= 0; i--)\n        {\n          udiv_qrnnd_half_preinv (q, r, r, 0, d_limb, dinv_limb);\n          adj = -adj;\n          qp[i + adj] = q;\n        }\n\n      return r >> norm;\n    }\n  else\n    {\n      mp_srcptr  ap;\n      mp_ptr     qp;\n      mp_size_t  size, xsize, total_size;\n      mp_limb_t  d, n1, n0, q, r, dinv, nshift, norm_rmask;\n      int        norm, norm_rshift;\n\n      ap = ap_limbptr;\n      qp = qp_limbptr;\n      size = size_limbs;\n      xsize = xsize_limbs;\n      total_size = total_size_limbs;\n      d = d_limb;\n\n      qp += total_size;   /* above high limb */\n      r = 0;              /* initial remainder */\n\n      if (LIKELY (size != 0))\n        {\n          /* Skip a division if high < divisor (high quotient 0).  Testing\n             here before before normalizing will still skip as often as\n             possible.  */\n          n1 = ap[size-1];\n          if (n1 < d)\n            {\n              r = n1;\n              *--qp = 0;\n              total_size--;\n              if (total_size == 0)\n                return r;\n              size--;\n            }\n        }\n\n      count_leading_zeros (norm, d);\n      d <<= norm;\n      r <<= norm;\n\n      norm_rshift = GMP_LIMB_BITS - norm;\n      norm_rmask = (norm == 0 ? 0 : ~CNST_LIMB(0));\n\n      invert_limb (dinv, d);\n\n      if (LIKELY (size != 0))\n        {\n          n1 = ap[size-1];\n          r |= ((n1 >> norm_rshift) & norm_rmask);\n          for (i = size-2; i >= 0; i--)\n            {\n              n0 = ap[i];\n              nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);\n              udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);\n              *--qp = q;\n              n1 = n0;\n            }\n          nshift = n1 << norm;\n          udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);\n          *--qp = q;\n        }\n      for (i = 0; i < xsize; i++)\n        {\n          udiv_qrnnd_preinv (q, r, r, CNST_LIMB(0), d, dinv);\n          *--qp = q;\n        }\n      return r >> norm;\n    }\n}\n"
  },
  {
    "path": "mpn/sparc64/longlong_inc.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 Free Software Foundation, Inc.\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#if defined (__GNUC__)\n\n#define __CLOBBER_CC : \"cc\"\n#define __AND_CLOBBER_CC , \"cc\"\n\n#define add_ssaaaa(sh, sl, ah, al, bh, bl) \\\n  __asm__ (\t\t\t\t\t\t\t\t\\\n       \"addcc\t%r4,%5,%1\\n\"\t\t\t\t\t\t\\\n      \"\taddccc\t%r6,%7,%%g0\\n\"\t\t\t\t\t\t\\\n      \"\taddc\t%r2,%3,%0\"\t\t\t\t\t\t\\\n\t  : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t  : \"rJ\" (ah), \"rI\" (bh), \"%rJ\" (al), \"rI\" (bl),\t\t\\\n\t    \"%rJ\" ((al) >> 32), \"rI\" ((bl) >> 32)\t\t\t\\\n\t   __CLOBBER_CC)\n#define sub_ddmmss(sh, sl, ah, al, bh, bl) \\\n  __asm__ (\t\t\t\t\t\t\t\t\\\n       \"subcc\t%r4,%5,%1\\n\"\t\t\t\t\t\t\\\n      \"\tsubccc\t%r6,%7,%%g0\\n\"\t\t\t\t\t\t\\\n      \"\tsubc\t%r2,%3,%0\"\t\t\t\t\t\t\\\n\t  : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t  : \"rJ\" (ah), \"rI\" (bh), \"rJ\" (al), \"rI\" (bl),\t\t\\\n\t    \"rJ\" ((al) >> 32), \"rI\" ((bl) >> 32)\t\t\t\\\n\t   __CLOBBER_CC)\n\n#endif\n\n/* No processor claiming to be SPARC v9 compliant seems to\n   implement the POPC instruction.  Disable pattern for now.  */\n#if 0\n#if ! defined(popc_limb) && defined __GNUC__ && defined __sparc_v9__ \n#define popc_limb(result, input)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    DItype __res;\t\t\t\t\t\t\t\\\n    __asm__ (\"popc %1,%0\" : \"=r\" (result) : \"rI\" (input));\t\t\\\n  } while (0)\n#endif\n#endif\n"
  },
  {
    "path": "mpn/sparc64/lshift.asm",
    "content": "dnl  SPARC v9 mpn_lshift\n\ndnl  Copyright 1996, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nC\t\t   cycles/limb\nC UltraSPARC 1&2:     2\nC UltraSPARC 3:\t      3.25\n\nC INPUT PARAMETERS\ndefine(`rp',`%i0')\ndefine(`up',`%i1')\ndefine(`n',`%i2')\ndefine(`cnt',`%i3')\n\ndefine(`u0',`%l0')\ndefine(`u1',`%l2')\ndefine(`u2',`%l4')\ndefine(`u3',`%l6')\n\ndefine(`tnc',`%i4')\n\ndefine(`fanop',`fitod %f0,%f2')\t\tdnl  A quasi nop running in the FA pipe\ndefine(`fmnop',`fmuld %f0,%f0,%f4')\tdnl  A quasi nop running in the FM pipe\n\nASM_START()\n\tREGISTER(%g2,#scratch)\n\tREGISTER(%g3,#scratch)\nPROLOGUE(mpn_lshift)\n\tsave\t%sp,-160,%sp\n\n\tsllx\tn,3,%g1\n\tsub\t%g0,cnt,tnc\t\tC negate shift count\n\tadd\tup,%g1,up\t\tC make %o1 point at end of src\n\tadd\trp,%g1,rp\t\tC make %o0 point at end of res\n\tldx\t[up-8],u3\t\tC load first limb\n\tsubcc\tn,5,n\n\tsrlx\tu3,tnc,%i5\t\tC compute function result\n\tsllx\tu3,cnt,%g3\n\tbl,pn\t%icc,.Lend1234\n\tfanop\n\n\tsubcc\tn,4,n\n\tldx\t[up-16],u0\n\tldx\t[up-24],u1\n\tadd\tup,-32,up\n\tldx\t[up-0],u2\n\tldx\t[up-8],u3\n\tsrlx\tu0,tnc,%g2\n\n\tbl,pn\t%icc,.Lend5678\n\tfanop\n\n\tb,a\t.Loop\n\t.align\t16\n.Loop:\n\tsllx\tu0,cnt,%g1\n\tor\t%g3,%g2,%g3\n\tldx\t[up-16],u0\n\tfanop\nC --\n\tsrlx\tu1,tnc,%g2\n\tsubcc\tn,4,n\n\tstx\t%g3,[rp-8]\n\tfanop\nC --\n\tsllx\tu1,cnt,%g3\n\tor\t%g1,%g2,%g1\n\tldx\t[up-24],u1\n\tfanop\nC --\n\tsrlx\tu2,tnc,%g2\n\tstx\t%g1,[rp-16]\n\tadd\tup,-32,up\n\tfanop\nC --\n\tsllx\tu2,cnt,%g1\n\tor\t%g3,%g2,%g3\n\tldx\t[up-0],u2\n\tfanop\nC --\n\tsrlx\tu3,tnc,%g2\n\tstx\t%g3,[rp-24]\n\tadd\trp,-32,rp\n\tfanop\nC --\n\tsllx\tu3,cnt,%g3\n\tor\t%g1,%g2,%g1\n\tldx\t[up-8],u3\n\tfanop\nC --\n\tsrlx\tu0,tnc,%g2\n\tstx\t%g1,[rp-0]\n\tbge,pt\t%icc,.Loop\n\tfanop\nC --\n.Lend5678:\n\tsllx\tu0,cnt,%g1\n\tor\t%g3,%g2,%g3\n\tsrlx\tu1,tnc,%g2\n\tstx\t%g3,[rp-8]\n\tsllx\tu1,cnt,%g3\n\tor\t%g1,%g2,%g1\n\tsrlx\tu2,tnc,%g2\n\tstx\t%g1,[rp-16]\n\tsllx\tu2,cnt,%g1\n\tor\t%g3,%g2,%g3\n\tsrlx\tu3,tnc,%g2\n\tstx\t%g3,[rp-24]\n\tadd\trp,-32,rp\n\tsllx\tu3,cnt,%g3\t\tC carry...\n\tor\t%g1,%g2,%g1\n\tstx\t%g1,[rp-0]\n\n.Lend1234:\n\taddcc\tn,4,n\n\tbz,pn\t%icc,.Lret\n\tfanop\n.Loop0:\n\tadd\trp,-8,rp\n\tsubcc\tn,1,n\n\tldx\t[up-16],u3\n\tadd\tup,-8,up\n\tsrlx\tu3,tnc,%g2\n\tor\t%g3,%g2,%g3\n\tstx\t%g3,[rp]\n\tsllx\tu3,cnt,%g3\n\tbnz,pt\t%icc,.Loop0\n\tfanop\n.Lret:\n\tstx\t%g3,[rp-8]\n\tmov\t%i5,%i0\n\tret\n\trestore\nEPILOGUE(mpn_lshift)\n"
  },
  {
    "path": "mpn/sparc64/mod_1.c",
    "content": "/* UltraSPARC 64 mpn_mod_1 -- mpn by limb remainder.\n\nCopyright 1991, 1993, 1994, 1999, 2000, 2001, 2003 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#include \"mpn/sparc64/sparc64.h\"\n\n\n/*                 64-bit divisor   32-bit divisor\n                    cycles/limb      cycles/limb\n                     (approx)         (approx)\n   Ultrasparc 2i:      160               120\n*/\n\n\n/* 32-bit divisors are treated in special case code.  This requires 4 mulx\n   per limb instead of 8 in the general case.\n\n   For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]\n   addressing, to get the two halves of each limb read in the correct order.\n   This is kept in an adj variable.  Doing that measures about 6 c/l faster\n   than just writing HALF_ENDIAN_ADJ(i) in the loop.  The latter shouldn't\n   be 6 cycles worth of work, but perhaps it doesn't schedule well (on gcc\n   3.2.1 at least).\n\n   A simple udivx/umulx loop for the 32-bit case was attempted for small\n   sizes, but at size==2 it was only about the same speed and at size==3 was\n   slower.  */\n\nmp_limb_t\nmpn_mod_1 (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)\n{\n  int        norm, norm_rshift;\n  mp_limb_t  src_high_limb;\n  mp_size_t  i;\n\n  ASSERT (size_limbs >= 0);\n  ASSERT (d_limb != 0);\n\n  if (UNLIKELY (size_limbs == 0))\n    return 0;\n\n  src_high_limb = src_limbptr[size_limbs-1];\n\n  /* udivx is good for size==1, and no need to bother checking limb<divisor,\n     since if that's likely the caller should check */\n  if (UNLIKELY (size_limbs == 1))\n    return src_high_limb % d_limb;\n\n  if (d_limb <= CNST_LIMB(0xFFFFFFFF))\n    {\n      unsigned   *src, n1, n0, r, dummy_q, nshift, norm_rmask;\n      mp_size_t  size, adj;\n      mp_limb_t  dinv_limb;\n\n      size = 2 * size_limbs;    /* halfwords */\n      src = (unsigned *) src_limbptr;\n\n      /* prospective initial remainder, if < d */\n      r = src_high_limb >> 32;\n\n      /* If the length of the source is uniformly distributed, then there's\n         a 50% chance of the high 32-bits being zero, which we can skip.  */\n      if (r == 0)\n        {\n          r = (unsigned) src_high_limb;\n          size--;\n          ASSERT (size > 0);  /* because always even */\n        }\n\n      /* Skip a division if high < divisor.  Having the test here before\n         normalizing will still skip as often as possible.  */\n      if (r < d_limb)\n        {\n          size--;\n          ASSERT (size > 0);  /* because size==1 handled above */\n        }\n      else\n        r = 0;\n\n      count_leading_zeros_32 (norm, d_limb);\n      norm -= 32;\n      d_limb <<= norm;\n\n      norm_rshift = 32 - norm;\n      norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);\n      i = size-1;\n      adj = HALF_ENDIAN_ADJ (i);\n      n1 = src [i + adj];\n      r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask);\n\n      invert_half_limb (dinv_limb, d_limb);\n      adj = -adj;\n\n      for (i--; i >= 0; i--)\n        {\n          n0 = src [i + adj];\n          adj = -adj;\n          nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);\n          udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb);\n          n1 = n0;\n        }\n\n      /* same as loop, but without n0 */\n      nshift = n1 << norm;\n      udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb);\n\n      ASSERT ((r & ((1 << norm) - 1)) == 0);\n      return r >> norm;\n    }\n  else\n    {\n      mp_srcptr  src;\n      mp_size_t  size;\n      mp_limb_t  n1, n0, r, dinv, dummy_q, nshift, norm_rmask;\n\n      src = src_limbptr;\n      size = size_limbs;\n      r = src_high_limb;  /* initial remainder */\n\n      /* Skip a division if high < divisor.  Having the test here before\n         normalizing will still skip as often as possible.  */\n      if (r < d_limb)\n        {\n          size--;\n          ASSERT (size > 0);  /* because size==1 handled above */\n        }\n      else\n        r = 0;\n\n      count_leading_zeros (norm, d_limb);\n      d_limb <<= norm;\n\n      norm_rshift = GMP_LIMB_BITS - norm;\n      norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);\n\n      src += size;\n      n1 = *--src;\n      r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask);\n\n      invert_limb (dinv, d_limb);\n\n      for (i = size-2; i >= 0; i--)\n        {\n          n0 = *--src;\n          nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);\n          udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv);\n          n1 = n0;\n        }\n\n      /* same as loop, but without n0 */\n      nshift = n1 << norm;\n      udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv);\n\n      ASSERT ((r & ((CNST_LIMB(1) << norm) - 1)) == 0);\n      return r >> norm;\n    }\n}\n"
  },
  {
    "path": "mpn/sparc64/modexact_1c_odd.c",
    "content": "/* UltraSPARC 64 mpn_modexact_1c_odd -- mpn by limb exact style remainder.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#include \"mpn/sparc64/sparc64.h\"\n\n\n/*                 64-bit divisor   32-bit divisor\n                    cycles/limb      cycles/limb\n                     (approx)         (approx)\n   Ultrasparc 2i:       ?                ?\n*/\n\n\n/* This implementation reduces the number of multiplies done, knowing that\n   on ultrasparc 1 and 2 the mulx instruction stalls the whole chip.\n\n   The key idea is to use the fact that the low limb of q*d equals l, this\n   being the whole purpose of the q calculated.  It means there's no need to\n   calculate the lowest 32x32->64 part of the q*d, instead it can be\n   inferred from l and the other three 32x32->64 parts.  See sparc64.h for\n   details.\n\n   When d is 32-bits, the same applies, but in this case there's only one\n   other 32x32->64 part (ie. HIGH(q)*d).\n\n   The net effect is that for 64-bit divisor each limb is 4 mulx, or for\n   32-bit divisor each is 2 mulx.\n\n   Enhancements:\n\n   No doubt this could be done in assembler, if that helped the scheduling,\n   or perhaps guaranteed good code irrespective of the compiler.\n\n   Alternatives:\n\n   It might be possibly to use floating point.  The loop is dominated by\n   multiply latency, so not sure if floats would improve that.  One\n   possibility would be to take two limbs at a time, with a 128 bit inverse,\n   if there's enough registers, which could effectively use float throughput\n   to reduce total latency across two limbs.  */\n\n#define ASSERT_RETVAL(r)                \\\n  ASSERT (orig_c < d ? r < d : r <= d)\n\nmp_limb_t\nmpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t orig_c)\n{\n  mp_limb_t  c = orig_c;\n  mp_limb_t  s, l, q, h, inverse;\n\n  ASSERT (size >= 1);\n  ASSERT (d & 1);\n  ASSERT_MPN (src, size);\n  ASSERT_LIMB (d);\n  ASSERT_LIMB (c);\n\n  /* udivx is faster than 10 or 12 mulx's for one limb via an inverse */\n  if (size == 1)\n    {\n      s = src[0];\n      if (s > c)\n\t{\n\t  l = s-c;\n\t  h = l % d;\n\t  if (h != 0)\n\t    h = d - h;\n\t}\n      else\n\t{\n\t  l = c-s;\n\t  h = l % d;\n\t}\n      return h;\n    }\n\n  modlimb_invert (inverse, d);\n\n  if (d <= 0xFFFFFFFF)\n    {\n      s = *src++;\n      size--;\n      do\n        {\n          SUBC_LIMB (c, l, s, c);\n          s = *src++;\n          q = l * inverse;\n          umul_ppmm_half_lowequal (h, q, d, l);\n          c += h;\n          size--;\n        }\n      while (size != 0);\n\n      if (s <= d)\n        {\n          /* With high s <= d the final step can be a subtract and addback.\n             If c==0 then the addback will restore to l>=0.  If c==d then\n             will get l==d if s==0, but that's ok per the function\n             definition.  */\n\n          l = c - s;\n          l += (l > c ? d : 0);\n\n          ASSERT_RETVAL (l);\n          return l;\n        }\n      else\n        {\n          /* Can't skip a divide, just do the loop code once more. */\n          SUBC_LIMB (c, l, s, c);\n          q = l * inverse;\n          umul_ppmm_half_lowequal (h, q, d, l);\n          c += h;\n\n          ASSERT_RETVAL (c);\n          return c;\n        }\n    }\n  else\n    {\n      mp_limb_t  dl = LOW32 (d);\n      mp_limb_t  dh = HIGH32 (d);\n      long i;\n\n      s = *src++;\n      size--;\n      do\n        {\n          SUBC_LIMB (c, l, s, c);\n          s = *src++;\n          q = l * inverse;\n          umul_ppmm_lowequal (h, q, d, dh, dl, l);\n          c += h;\n          size--;\n        }\n      while (size != 0);\n\n      if (s <= d)\n        {\n          /* With high s <= d the final step can be a subtract and addback.\n             If c==0 then the addback will restore to l>=0.  If c==d then\n             will get l==d if s==0, but that's ok per the function\n             definition.  */\n\n          l = c - s;\n          l += (l > c ? d : 0);\n\n          ASSERT_RETVAL (l);\n          return l;\n        }\n      else\n        {\n          /* Can't skip a divide, just do the loop code once more. */\n          SUBC_LIMB (c, l, s, c);\n          q = l * inverse;\n          umul_ppmm_lowequal (h, q, d, dh, dl, l);\n          c += h;\n\n          ASSERT_RETVAL (c);\n          return c;\n        }\n    }\n}\n"
  },
  {
    "path": "mpn/sparc64/mul_1.asm",
    "content": "dnl  SPARC v9 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and store\ndnl  the result in a second limb vector.\n\ndnl  Copyright 1998, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\t   cycles/limb\nC UltraSPARC 1&2:     14\nC UltraSPARC 3:\t      18.5\n\nC Algorithm: We use eight floating-point multiplies per limb product, with the\nC invariant v operand split into four 16-bit pieces, and the s1 operand split\nC into 32-bit pieces.  We sum pairs of 48-bit partial products using\nC floating-point add, then convert the four 49-bit product-sums and transfer\nC them to the integer unit.\n\nC Possible optimizations:\nC   1. Align the stack area where we transfer the four 49-bit product-sums\nC      to a 32-byte boundary.  That would minimize the cache collision.\nC      (UltraSPARC-1/2 use a direct-mapped cache.)  (Perhaps even better would\nC      be to align the area to map to the area immediately before s1?)\nC   2. Sum the 4 49-bit quantities using 32-bit operations, as in the\nC      develop mpn_addmul_2.  This would save many integer instructions.\nC   3. Unrolling.  Questionable if it is worth the code expansion, given that\nC      it could only save 1 cycle/limb.\nC   4. Specialize for particular v values.  If its upper 32 bits are zero, we\nC      could save many operations, in the FPU (fmuld), but more so in the IEU\nC      since we'll be summing 48-bit quantities, which might be simpler.\nC   5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and\nC      the i00,i16,i32,i48 RAW less apart.  The latter apart-scheduling should\nC      not be greater than needed for L2 cache latency, and also not so great\nC      that i16 needs to be copied.\nC   6. Avoid performing mem+fa+fm in the same cycle, at least not when we want\nC      to get high IEU bandwidth.  (12 of the 14 cycles will be free for 2 IEU\nC      ops.)\n\nC Instruction classification (as per UltraSPARC-1/2 functional units):\nC    8 FM\nC   10 FA\nC   11 MEM\nC   9 ISHIFT + 10? IADDLOG\nC    1 BRANCH\nC   49 insns totally (plus three mov insns that should be optimized out)\n\nC The loop executes 53 instructions in 14 cycles on UltraSPARC-1/2, i.e we\nC sustain 3.79 instructions/cycle.\n\nC INPUT PARAMETERS\nC rp\ti0\nC up\ti1\nC n\ti2\nC v\ti3\n\nASM_START()\n\tREGISTER(%g2,#scratch)\n\tREGISTER(%g3,#scratch)\n\ndefine(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')\ndefine(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')\ndefine(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')\ndefine(`u00',`%f32') define(`u32', `%f34')\ndefine(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')\ndefine(`cy',`%g1')\ndefine(`rlimb',`%g3')\ndefine(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')\ndefine(`xffffffff',`%l7')\ndefine(`xffff',`%o0')\n\nPROLOGUE(mpn_mul_1)\n\nC Initialization.  (1) Split v operand into four 16-bit chunks and store them\nC as IEEE double in fp registers.  (2) Clear upper 32 bits of fp register pairs\nC f2 and f4.  (3) Store masks in registers aliased to `xffff' and `xffffffff'.\n\n\tsave\t%sp, -256, %sp\n\tmov\t-1, %g4\n\tsrlx\t%g4, 48, xffff\t\tC store mask in register `xffff'\n\tand\t%i3, xffff, %g2\n\tstx\t%g2, [%sp+2223+0]\n\tsrlx\t%i3, 16, %g3\n\tand\t%g3, xffff, %g3\n\tstx\t%g3, [%sp+2223+8]\n\tsrlx\t%i3, 32, %g2\n\tand\t%g2, xffff, %g2\n\tstx\t%g2, [%sp+2223+16]\n\tsrlx\t%i3, 48, %g3\n\tstx\t%g3, [%sp+2223+24]\n\tsrlx\t%g4, 32, xffffffff\tC store mask in register `xffffffff'\n\n\tsllx\t%i2, 3, %i2\n\tmov\t0, cy\t\t\tC clear cy\n\tadd\t%i0, %i2, %i0\n\tadd\t%i1, %i2, %i1\n\tneg\t%i2\n\tadd\t%i1, 4, %i5\n\tadd\t%i0, -32, %i4\n\tadd\t%i0, -16, %i0\n\n\tldd\t[%sp+2223+0], v00\n\tldd\t[%sp+2223+8], v16\n\tldd\t[%sp+2223+16], v32\n\tldd\t[%sp+2223+24], v48\n\tld\t[%sp+2223+0],%f2\tC zero f2\n\tld\t[%sp+2223+0],%f4\tC zero f4\n\tld\t[%i5+%i2], %f3\t\tC read low 32 bits of up[i]\n\tld\t[%i1+%i2], %f5\t\tC read high 32 bits of up[i]\n\tfxtod\tv00, v00\n\tfxtod\tv16, v16\n\tfxtod\tv32, v32\n\tfxtod\tv48, v48\n\nC Start real work.  (We sneakingly read f3 and f5 above...)\nC The software pipeline is very deep, requiring 4 feed-in stages.\n\n\tfxtod\t%f2, u00\n\tfxtod\t%f4, u32\n\tfmuld\tu00, v00, a00\n\tfmuld\tu00, v16, a16\n\tfmuld\tu00, v32, p32\n\tfmuld\tu32, v00, r32\n\tfmuld\tu00, v48, p48\n\taddcc\t%i2, 8, %i2\n\tbnz,pt\t%icc, .L_two_or_more\n\tfmuld\tu32, v16, r48\n\n.L_one:\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tfdtox\ta00, a00\n\tfaddd\tp48, r48, a48\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tfdtox\ta32, a32\n\tfdtox\ta48, a48\n\tstd\ta00, [%sp+2223+0]\n\tstd\ta16, [%sp+2223+8]\n\tstd\ta32, [%sp+2223+16]\n\tstd\ta48, [%sp+2223+24]\n\tadd\t%i2, 8, %i2\n\n\tfdtox\tr64, a00\n\tfdtox\tr80, a16\n\tldx\t[%sp+2223+0], i00\n\tldx\t[%sp+2223+8], i16\n\tldx\t[%sp+2223+16], i32\n\tldx\t[%sp+2223+24], i48\n\tstd\ta00, [%sp+2223+0]\n\tstd\ta16, [%sp+2223+8]\n\tadd\t%i2, 8, %i2\n\n\tmov\ti00, %g5\t\tC i00+ now in g5\n\tldx\t[%sp+2223+0], i00\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tmov\ti32, %g4\t\tC i32+ now in g4\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\tb\t.L_out_1\n\tadd\t%i2, 8, %i2\n\n.L_two_or_more:\n\tld\t[%i5+%i2], %f3\t\tC read low 32 bits of up[i]\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tld\t[%i1+%i2], %f5\t\tC read high 32 bits of up[i]\n\tfdtox\ta00, a00\n\tfaddd\tp48, r48, a48\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tfdtox\ta32, a32\n\tfxtod\t%f2, u00\n\tfxtod\t%f4, u32\n\tfdtox\ta48, a48\n\tstd\ta00, [%sp+2223+0]\n\tfmuld\tu00, v00, p00\n\tstd\ta16, [%sp+2223+8]\n\tfmuld\tu00, v16, p16\n\tstd\ta32, [%sp+2223+16]\n\tfmuld\tu00, v32, p32\n\tstd\ta48, [%sp+2223+24]\n\tfaddd\tp00, r64, a00\n\tfmuld\tu32, v00, r32\n\tfaddd\tp16, r80, a16\n\tfmuld\tu00, v48, p48\n\taddcc\t%i2, 8, %i2\n\tbnz,pt\t%icc, .L_three_or_more\n\tfmuld\tu32, v16, r48\n\n.L_two:\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tfdtox\ta00, a00\n\tfaddd\tp48, r48, a48\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tldx\t[%sp+2223+0], i00\n\tfdtox\ta32, a32\n\tldx\t[%sp+2223+8], i16\n\tldx\t[%sp+2223+16], i32\n\tldx\t[%sp+2223+24], i48\n\tfdtox\ta48, a48\n\tstd\ta00, [%sp+2223+0]\n\tstd\ta16, [%sp+2223+8]\n\tstd\ta32, [%sp+2223+16]\n\tstd\ta48, [%sp+2223+24]\n\tadd\t%i2, 8, %i2\n\n\tfdtox\tr64, a00\n\tmov\ti00, %g5\t\tC i00+ now in g5\n\tfdtox\tr80, a16\n\tldx\t[%sp+2223+0], i00\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tmov\ti32, %g4\t\tC i32+ now in g4\n\tldx\t[%sp+2223+16], i32\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tldx\t[%sp+2223+24], i48\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\tb\t.L_out_2\n\tadd\t%i2, 8, %i2\n\n.L_three_or_more:\n\tld\t[%i5+%i2], %f3\t\tC read low 32 bits of up[i]\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tld\t[%i1+%i2], %f5\t\tC read high 32 bits of up[i]\n\tfdtox\ta00, a00\n\tfaddd\tp48, r48, a48\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tldx\t[%sp+2223+0], i00\n\tfdtox\ta32, a32\n\tldx\t[%sp+2223+8], i16\n\tfxtod\t%f2, u00\n\tldx\t[%sp+2223+16], i32\n\tfxtod\t%f4, u32\n\tldx\t[%sp+2223+24], i48\n\tfdtox\ta48, a48\n\tstd\ta00, [%sp+2223+0]\n\tfmuld\tu00, v00, p00\n\tstd\ta16, [%sp+2223+8]\n\tfmuld\tu00, v16, p16\n\tstd\ta32, [%sp+2223+16]\n\tfmuld\tu00, v32, p32\n\tstd\ta48, [%sp+2223+24]\n\tfaddd\tp00, r64, a00\n\tfmuld\tu32, v00, r32\n\tfaddd\tp16, r80, a16\n\tfmuld\tu00, v48, p48\n\taddcc\t%i2, 8, %i2\n\tbnz,pt\t%icc, .L_four_or_more\n\tfmuld\tu32, v16, r48\n\n.L_three:\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tfdtox\ta00, a00\n\tfaddd\tp48, r48, a48\n\tmov\ti00, %g5\t\tC i00+ now in g5\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tldx\t[%sp+2223+0], i00\n\tfdtox\ta32, a32\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tmov\ti32, %g4\t\tC i32+ now in g4\n\tldx\t[%sp+2223+16], i32\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tldx\t[%sp+2223+24], i48\n\tfdtox\ta48, a48\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta32, [%sp+2223+16]\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta48, [%sp+2223+24]\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\tb\t.L_out_3\n\tadd\t%i2, 8, %i2\n\n.L_four_or_more:\n\tld\t[%i5+%i2], %f3\t\tC read low 32 bits of up[i]\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tld\t[%i1+%i2], %f5\t\tC read high 32 bits of up[i]\n\tfdtox\ta00, a00\n\tfaddd\tp48, r48, a48\n\tmov\ti00, %g5\t\tC i00+ now in g5\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tldx\t[%sp+2223+0], i00\n\tfdtox\ta32, a32\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tfxtod\t%f2, u00\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tmov\ti32, %g4\t\tC i32+ now in g4\n\tldx\t[%sp+2223+16], i32\n\tfxtod\t%f4, u32\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tldx\t[%sp+2223+24], i48\n\tfdtox\ta48, a48\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tfmuld\tu00, v00, p00\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tfmuld\tu00, v16, p16\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta32, [%sp+2223+16]\n\tfmuld\tu00, v32, p32\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta48, [%sp+2223+24]\n\tfaddd\tp00, r64, a00\n\tfmuld\tu32, v00, r32\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tfaddd\tp16, r80, a16\n\tfmuld\tu00, v48, p48\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\taddcc\t%i2, 8, %i2\n\tbnz,pt\t%icc, .Loop\n\tfmuld\tu32, v16, r48\n\n.L_four:\n\tb,a\t.L_out_4\n\nC BEGIN MAIN LOOP\n\t.align\t16\n.Loop:\nC 00\n\tsrlx\t%o4, 16, %o5\t\tC (x >> 16)\n\tld\t[%i5+%i2], %f3\t\tC read low 32 bits of up[i]\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\nC 01\n\tadd\t%o5, %o2, %o2\t\tC mi64 in %o2   2nd ASSIGNMENT\n\tand\t%o4, xffff, %o5\t\tC (x & 0xffff)\n\tld\t[%i1+%i2], %f5\t\tC read high 32 bits of up[i]\n\tfdtox\ta00, a00\nC 02\n\tfaddd\tp48, r48, a48\nC 03\n\tsrlx\t%o2, 48, %o7\t\tC (mi64 >> 48)\n\tmov\ti00, %g5\t\tC i00+ now in g5\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\nC 04\n\tsllx\t%o2, 16, %i3\t\tC (mi64 << 16)\n\tadd\t%o7, %o1, cy\t\tC new cy\n\tldx\t[%sp+2223+0], i00\n\tfdtox\ta32, a32\nC 05\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tfxtod\t%f2, u00\nC 06\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tmov\ti32, %g4\t\tC i32+ now in g4\n\tldx\t[%sp+2223+16], i32\n\tfxtod\t%f4, u32\nC 07\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tor\t%i3, %o5, %o5\n\tldx\t[%sp+2223+24], i48\n\tfdtox\ta48, a48\nC 08\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tfmuld\tu00, v00, p00\nC 09\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tfmuld\tu00, v16, p16\nC 10\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta32, [%sp+2223+16]\n\tfmuld\tu00, v32, p32\nC 11\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta48, [%sp+2223+24]\n\tfaddd\tp00, r64, a00\n\tfmuld\tu32, v00, r32\nC 12\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tstx\t%o5, [%i4+%i2]\n\tfaddd\tp16, r80, a16\n\tfmuld\tu00, v48, p48\nC 13\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\taddcc\t%i2, 8, %i2\n\tbnz,pt\t%icc, .Loop\n\tfmuld\tu32, v16, r48\nC END MAIN LOOP\n\n.L_out_4:\n\tsrlx\t%o4, 16, %o5\t\tC (x >> 16)\n\tfmuld\tu32, v32, r64\tC FIXME not urgent\n\tfaddd\tp32, r32, a32\n\tadd\t%o5, %o2, %o2\t\tC mi64 in %o2   2nd ASSIGNMENT\n\tand\t%o4, xffff, %o5\t\tC (x & 0xffff)\n\tfdtox\ta00, a00\n\tfaddd\tp48, r48, a48\n\tsrlx\t%o2, 48, %o7\t\tC (mi64 >> 48)\n\tmov\ti00, %g5\t\tC i00+ now in g5\n\tfmuld\tu32, v48, r80\tC FIXME not urgent\n\tfdtox\ta16, a16\n\tsllx\t%o2, 16, %i3\t\tC (mi64 << 16)\n\tadd\t%o7, %o1, cy\t\tC new cy\n\tldx\t[%sp+2223+0], i00\n\tfdtox\ta32, a32\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tmov\ti32, %g4\t\tC i32+ now in g4\n\tldx\t[%sp+2223+16], i32\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tor\t%i3, %o5, %o5\n\tldx\t[%sp+2223+24], i48\n\tfdtox\ta48, a48\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta32, [%sp+2223+16]\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tstd\ta48, [%sp+2223+24]\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tstx\t%o5, [%i4+%i2]\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\tadd\t%i2, 8, %i2\n.L_out_3:\n\tsrlx\t%o4, 16, %o5\t\tC (x >> 16)\n\tadd\t%o5, %o2, %o2\t\tC mi64 in %o2   2nd ASSIGNMENT\n\tand\t%o4, xffff, %o5\t\tC (x & 0xffff)\n\tfdtox\tr64, a00\n\tsrlx\t%o2, 48, %o7\t\tC (mi64 >> 48)\n\tmov\ti00, %g5\t\tC i00+ now in g5\n\tfdtox\tr80, a16\n\tsllx\t%o2, 16, %i3\t\tC (mi64 << 16)\n\tadd\t%o7, %o1, cy\t\tC new cy\n\tldx\t[%sp+2223+0], i00\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tmov\ti32, %g4\t\tC i32+ now in g4\n\tldx\t[%sp+2223+16], i32\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tor\t%i3, %o5, %o5\n\tldx\t[%sp+2223+24], i48\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tstd\ta00, [%sp+2223+0]\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tstd\ta16, [%sp+2223+8]\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tstx\t%o5, [%i4+%i2]\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\tadd\t%i2, 8, %i2\n.L_out_2:\n\tsrlx\t%o4, 16, %o5\t\tC (x >> 16)\n\tadd\t%o5, %o2, %o2\t\tC mi64 in %o2   2nd ASSIGNMENT\n\tand\t%o4, xffff, %o5\t\tC (x & 0xffff)\n\tsrlx\t%o2, 48, %o7\t\tC (mi64 >> 48)\n\tmov\ti00, %g5\t\tC i00+ now in g5\n\tsllx\t%o2, 16, %i3\t\tC (mi64 << 16)\n\tadd\t%o7, %o1, cy\t\tC new cy\n\tldx\t[%sp+2223+0], i00\n\tsrlx\ti16, 48, %l4\t\tC (i16 >> 48)\n\tmov\ti16, %g2\n\tldx\t[%sp+2223+8], i16\n\tsrlx\ti48, 16, %l5\t\tC (i48 >> 16)\n\tmov\ti32, %g4\t\tC i32+ now in g4\n\tsllx\ti48, 32, %l6\t\tC (i48 << 32)\n\tor\t%i3, %o5, %o5\n\tsrlx\t%g4, 32, %o3\t\tC (i32 >> 32)\n\tadd\t%l5, %l4, %o1\t\tC hi64- in %o1\n\tsllx\t%g4, 16, %o2\t\tC (i32 << 16)\n\tadd\t%o3, %o1, %o1\t\tC hi64 in %o1   1st ASSIGNMENT\n\tsllx\t%o1, 48, %o3\t\tC (hi64 << 48)\n\tadd\t%g2, %o2, %o2\t\tC mi64- in %o2\n\tadd\t%l6, %o2, %o2\t\tC mi64- in %o2\n\tsub\t%o2, %o3, %o2\t\tC mi64 in %o2   1st ASSIGNMENT\n\tstx\t%o5, [%i4+%i2]\n\tadd\tcy, %g5, %o4\t\tC x = prev(i00) + cy\n\tadd\t%i2, 8, %i2\n.L_out_1:\n\tsrlx\t%o4, 16, %o5\t\tC (x >> 16)\n\tadd\t%o5, %o2, %o2\t\tC mi64 in %o2   2nd ASSIGNMENT\n\tand\t%o4, xffff, %o5\t\tC (x & 0xffff)\n\tsrlx\t%o2, 48, %o7\t\tC (mi64 >> 48)\n\tsllx\t%o2, 16, %i3\t\tC (mi64 << 16)\n\tadd\t%o7, %o1, cy\t\tC new cy\n\tor\t%i3, %o5, %o5\n\tstx\t%o5, [%i4+%i2]\n\n\tsllx\ti00, 0, %g2\n\tadd\t%g2, cy, cy\n\tsllx\ti16, 16, %g3\n\tadd\t%g3, cy, cy\n\n\treturn\t%i7+8\n\tmov\tcy, %o0\nEPILOGUE(mpn_mul_1)\n"
  },
  {
    "path": "mpn/sparc64/rshift.asm",
    "content": "dnl  SPARC v9 mpn_rshift\n\ndnl  Copyright 1996, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\nC\t\t   cycles/limb\nC UltraSPARC 1&2:     2\nC UltraSPARC 3:\t      3.25\n\nC INPUT PARAMETERS\ndefine(`rp',`%i0')\ndefine(`up',`%i1')\ndefine(`n',`%i2')\ndefine(`cnt',`%i3')\n\ndefine(`u0',`%l0')\ndefine(`u1',`%l2')\ndefine(`u2',`%l4')\ndefine(`u3',`%l6')\n\ndefine(`tnc',`%i4')\n\ndefine(`fanop',`fitod %f0,%f2')\t\tdnl  A quasi nop running in the FA pipe\ndefine(`fmnop',`fmuld %f0,%f0,%f4')\tdnl  A quasi nop running in the FM pipe\n\nASM_START()\n\tREGISTER(%g2,#scratch)\n\tREGISTER(%g3,#scratch)\nPROLOGUE(mpn_rshift)\n\tsave\t%sp,-160,%sp\n\n\tsub\t%g0,cnt,tnc\t\tC negate shift count\n\tldx\t[up],u3\t\t\tC load first limb\n\tsubcc\tn,5,n\n\tsllx\tu3,tnc,%i5\t\tC compute function result\n\tsrlx\tu3,cnt,%g3\n\tbl,pn\t%icc,.Lend1234\n\tfanop\n\n\tsubcc\tn,4,n\n\tldx\t[up+8],u0\n\tldx\t[up+16],u1\n\tadd\tup,32,up\n\tldx\t[up-8],u2\n\tldx\t[up+0],u3\n\tsllx\tu0,tnc,%g2\n\n\tbl,pn\t%icc,.Lend5678\n\tfanop\n\n\tb,a\t.Loop\n\t.align\t16\n.Loop:\n\tsrlx\tu0,cnt,%g1\n\tor\t%g3,%g2,%g3\n\tldx\t[up+8],u0\n\tfanop\nC --\n\tsllx\tu1,tnc,%g2\n\tsubcc\tn,4,n\n\tstx\t%g3,[rp+0]\n\tfanop\nC --\n\tsrlx\tu1,cnt,%g3\n\tor\t%g1,%g2,%g1\n\tldx\t[up+16],u1\n\tfanop\nC --\n\tsllx\tu2,tnc,%g2\n\tstx\t%g1,[rp+8]\n\tadd\tup,32,up\n\tfanop\nC --\n\tsrlx\tu2,cnt,%g1\n\tor\t%g3,%g2,%g3\n\tldx\t[up-8],u2\n\tfanop\nC --\n\tsllx\tu3,tnc,%g2\n\tstx\t%g3,[rp+16]\n\tadd\trp,32,rp\n\tfanop\nC --\n\tsrlx\tu3,cnt,%g3\n\tor\t%g1,%g2,%g1\n\tldx\t[up+0],u3\n\tfanop\nC --\n\tsllx\tu0,tnc,%g2\n\tstx\t%g1,[rp-8]\n\tbge,pt\t%icc,.Loop\n\tfanop\nC --\n.Lend5678:\n\tsrlx\tu0,cnt,%g1\n\tor\t%g3,%g2,%g3\n\tsllx\tu1,tnc,%g2\n\tstx\t%g3,[rp+0]\n\tsrlx\tu1,cnt,%g3\n\tor\t%g1,%g2,%g1\n\tsllx\tu2,tnc,%g2\n\tstx\t%g1,[rp+8]\n\tsrlx\tu2,cnt,%g1\n\tor\t%g3,%g2,%g3\n\tsllx\tu3,tnc,%g2\n\tstx\t%g3,[rp+16]\n\tadd\trp,32,rp\n\tsrlx\tu3,cnt,%g3\t\tC carry...\n\tor\t%g1,%g2,%g1\n\tstx\t%g1,[rp-8]\n\n.Lend1234:\n\taddcc\tn,4,n\n\tbz,pn\t%icc,.Lret\n\tfanop\n.Loop0:\n\tadd\trp,8,rp\n\tsubcc\tn,1,n\n\tldx\t[up+8],u3\n\tadd\tup,8,up\n\tsllx\tu3,tnc,%g2\n\tor\t%g3,%g2,%g3\n\tstx\t%g3,[rp-8]\n\tsrlx\tu3,cnt,%g3\n\tbnz,pt\t%icc,.Loop0\n\tfanop\n.Lret:\n\tstx\t%g3,[rp+0]\n\tmov\t%i5,%i0\n\tret\n\trestore\nEPILOGUE(mpn_rshift)\n"
  },
  {
    "path": "mpn/sparc64/sparc64.h",
    "content": "/* UltraSPARC 64 support macros.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define LOW32(x)   ((x) & 0xFFFFFFFF)\n#define HIGH32(x)  ((x) >> 32)\n\n\n/* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)].\n   Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the\n   effect of swapping the two halves in this case.  */\n#if HAVE_LIMB_BIG_ENDIAN\n#define HALF_ENDIAN_ADJ(i)  (1 - (((i) & 1) << 1))   /* +1 even, -1 odd */\n#endif\n#if HAVE_LIMB_LITTLE_ENDIAN\n#define HALF_ENDIAN_ADJ(i)  0                        /* no adjust */\n#endif\n#ifndef HALF_ENDIAN_ADJ\nError, error, unknown limb endianness;\n#endif\n\n\n/* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb\n   of that product is equal to l.  dh and dl are the 32-bit halves of d.\n\n   |-----high----||----low-----|\n   +------+------+\n   |             |                 ph = qh * dh\n   +------+------+\n          +------+------+\n          |             |          pm1 = ql * dh\n          +------+------+\n          +------+------+\n          |             |          pm2 = qh * dl\n          +------+------+\n                 +------+------+\n                 |             |   pl = ql * dl (not calculated)\n                 +------+------+\n\n   Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2)\n   + HIGH(pl) == HIGH(l).  The only thing we need from those product parts\n   is whether they produce a carry into the high.\n\n   pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only\n   time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) >\n   HIGH(l).  pl is never actually calculated.  */\n\n#define umul_ppmm_lowequal(h, q, d, dh, dl, l)  \\\n  do {                                          \\\n    mp_limb_t  ql, qh, ph, pm1, pm2, pm_l;      \\\n    ASSERT (dh == HIGH32(d));                   \\\n    ASSERT (dl == LOW32(d));                    \\\n    ASSERT (q*d == l);                          \\\n                                                \\\n    ql = LOW32 (q);                             \\\n    qh = HIGH32 (q);                            \\\n                                                \\\n    pm1 = ql * dh;                              \\\n    pm2 = qh * dl;                              \\\n    ph  = qh * dh;                              \\\n                                                \\\n    pm_l = LOW32 (pm1) + LOW32 (pm2);           \\\n                                                \\\n    (h) = ph + HIGH32 (pm1) + HIGH32 (pm2)      \\\n      + HIGH32 (pm_l) + ((pm_l << 32) > l);     \\\n                                                \\\n    ASSERT_HIGH_PRODUCT (h, q, d);              \\\n  } while (0)\n\n\n/* Set h to the high of q*d, assuming the low limb of that product is equal\n   to l, and that d fits in 32-bits.\n\n   |-----high----||----low-----|\n          +------+------+\n          |             |          pm = qh * dl\n          +------+------+\n                 +------+------+\n                 |             |   pl = ql * dl (not calculated)\n                 +------+------+\n\n   Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only\n   time there's a carry from that sum is when LOW(pm) > HIGH(l).  There's no\n   need to calculate pl to determine this.  */\n\n#define umul_ppmm_half_lowequal(h, q, d, l)     \\\n  do {                                          \\\n    mp_limb_t pm;                               \\\n    ASSERT (q*d == l);                          \\\n    ASSERT (HIGH32(d) == 0);                    \\\n                                                \\\n    pm = HIGH32(q) * d;                         \\\n    (h) = HIGH32(pm) + ((pm << 32) > l);        \\\n    ASSERT_HIGH_PRODUCT (h, q, d);              \\\n  } while (0)\n\n\n/* check that h is the high limb of x*y */\n#if WANT_ASSERT\n#define ASSERT_HIGH_PRODUCT(h, x, y)    \\\n  do {                                  \\\n    mp_limb_t  want_h, dummy;           \\\n    umul_ppmm (want_h, dummy, x, y);    \\\n    ASSERT (h == want_h);               \\\n  } while (0)\n#else\n#define ASSERT_HIGH_PRODUCT(h, q, d)    \\\n  do { } while (0)\n#endif\n\n\n/* Count the leading zeros on a limb, but assuming it fits in 32 bits.\n   The count returned will be in the range 32 to 63.\n   This is the 32-bit generic C count_leading_zeros from longlong.h. */\n#define count_leading_zeros_32(count, x)                                      \\\n  do {                                                                        \\\n    mp_limb_t  __xr = (x);                                                    \\\n    unsigned   __a;                                                           \\\n    ASSERT ((x) != 0);                                                        \\\n    ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF));                                    \\\n    __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1)  \\\n      : (__xr < ((UWtype) 1 << 24)  ? 16 + 1 : 24 + 1);                       \\\n                                                                              \\\n    (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a];                 \\\n  } while (0)\n\n\n/* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits\n   32 bits and is normalized (high bit set).  */\n#define invert_half_limb(inv, d)                \\\n  do {                                          \\\n    mp_limb_t  _n;                              \\\n    ASSERT ((d) <= 0xFFFFFFFF);                 \\\n    ASSERT ((d) & 0x80000000);                  \\\n    _n = (((mp_limb_t) -(d)) << 32) - 1;        \\\n    (inv) = (mp_limb_t) (unsigned) (_n / (d));  \\\n  } while (0)\n\n\n/* Divide nh:nl by d, setting q to the quotient and r to the remainder.\n   q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t,\n   dinv_limb is similarly a 32-bit inverse but in an mp_limb_t.  */\n\n#define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb)         \\\n  do {                                                                  \\\n    unsigned   _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q;               \\\n    mp_limb_t  _n, _x;                                                  \\\n    ASSERT (d_limb <= 0xFFFFFFFF);                                      \\\n    ASSERT (dinv_limb <= 0xFFFFFFFF);                                   \\\n    ASSERT (d_limb & 0x80000000);                                       \\\n    ASSERT (nh < d_limb);                                               \\\n    _n10 = (nl);                                                        \\\n    _n2 = (nh);                                                         \\\n    _n1 = (int) _n10 >> 31;                                             \\\n    _nadj = _n10 + (_n1 & d_limb);                                      \\\n    _x = dinv_limb * (_n2 - _n1) + _nadj;                               \\\n    _q11n = ~(_n2 + HIGH32 (_x));             /* -q1-1 */               \\\n    _n = ((mp_limb_t) _n2 << 32) + _n10;                                \\\n    _x = _n + d_limb * _q11n;                 /* n-q1*d-d */            \\\n    _xh = HIGH32 (_x) - d_limb;               /* high(n-q1*d-d) */      \\\n    ASSERT (_xh == 0 || _xh == ~0);                                     \\\n    _r = _x + (d_limb & _xh);                 /* addback */             \\\n    _q = _xh - _q11n;                         /* q1+1-addback */        \\\n    ASSERT (_r < d_limb);                                               \\\n    ASSERT (d_limb * _q + _r == _n);                                    \\\n    (r) = _r;                                                           \\\n    (q) = _q;                                                           \\\n  } while (0)\n\n\n"
  },
  {
    "path": "mpn/sparc64/sqr_diagonal.asm",
    "content": "dnl  SPARC v9 64-bit mpn_sqr_diagonal.\n\ndnl  Copyright 2001, 2002 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\t   cycles/limb\nC UltraSPARC 1&2:     22\nC UltraSPARC 3:\t      36\n\nC This was generated by the Sun C compiler.  It runs at 22 cycles/limb on the\nC UltraSPARC-1/2, three cycles slower than theoretically possible for optimal\nC code using the same algorithm.  For 1-3 limbs, a special loop was generated,\nC which causes performance problems in particular for 2 and 3 limbs.\nC Ultimately, this should be replaced by hand-written code in the same software\nC pipeline style as e.g., addmul_1.asm.\n\nASM_START()\n\tREGISTER(%g2,#scratch)\n\tREGISTER(%g3,#scratch)\nPROLOGUE(mpn_sqr_diagonal)\n\tsave\t%sp, -240, %sp\n\n\tsethi\t%hi(0x1ffc00), %o0\n\tsethi\t%hi(0x3ffc00), %o1\n\tadd\t%o0, 1023, %o7\n\tcmp\t%i2, 4\n\tadd\t%o1, 1023, %o4\n\tor\t%g0, %i1, %g1\n\tor\t%g0, %i0, %o0\n\tbl,pn\t%xcc, .Lsmall\n\tor\t%g0, 0, %g2\n\n\tldx\t[%i1], %o1\n\tadd\t%i1, 24, %g1\n\tor\t%g0, 3, %g2\n\tsrlx\t%o1, 42, %g3\n\tstx\t%g3, [%sp+2279]\n\tand\t%o1, %o7, %o2\n\tstx\t%o2, [%sp+2263]\n\tsrlx\t%o1, 21, %o1\n\tldd\t[%sp+2279], %f0\n\tand\t%o1, %o7, %o1\n\tstx\t%o1, [%sp+2271]\n\tldx\t[%i1+8], %o2\n\tfxtod\t%f0, %f12\n\tsrlx\t%o2, 21, %o1\n\tand\t%o2, %o7, %g3\n\tldd\t[%sp+2263], %f2\n\tfmuld\t%f12, %f12, %f10\n\tsrlx\t%o2, 42, %o2\n\tldd\t[%sp+2271], %f0\n\tand\t%o1, %o7, %o1\n\tfxtod\t%f2, %f8\n\tstx\t%o2, [%sp+2279]\n\tstx\t%o1, [%sp+2271]\n\tfxtod\t%f0, %f0\n\tstx\t%g3, [%sp+2263]\n\tfdtox\t%f10, %f14\n\tfmuld\t%f12, %f8, %f6\n\tldx\t[%i1+16], %o2\n\tstd\t%f14, [%sp+2255]\n\tfmuld\t%f0, %f0, %f2\n\tfmuld\t%f8, %f8, %f10\n\tsrlx\t%o2, 42, %o1\n\tfaddd\t%f6, %f6, %f6\n\tfmuld\t%f12, %f0, %f12\n\tfmuld\t%f0, %f8, %f8\n\tldd\t[%sp+2279], %f0\n\tldd\t[%sp+2263], %f4\n\tfdtox\t%f10, %f10\n\tstd\t%f10, [%sp+2239]\n\tfaddd\t%f2, %f6, %f6\n\tldd\t[%sp+2271], %f2\n\tfdtox\t%f12, %f12\n\tstd\t%f12, [%sp+2247]\n\tfdtox\t%f8, %f8\n\tstd\t%f8, [%sp+2231]\n\tfdtox\t%f6, %f6\n\tstd\t%f6, [%sp+2223]\n\n.Loop:\tsrlx\t%o2, 21, %g3\n\tstx\t%o1, [%sp+2279]\n\tadd\t%g2, 1, %g2\n\tand\t%g3, %o7, %o1\n\tldx\t[%sp+2255], %g4\n\tcmp\t%g2, %i2\n\tstx\t%o1, [%sp+2271]\n\tadd\t%g1, 8, %g1\n\tadd\t%o0, 16, %o0\n\tldx\t[%sp+2239], %o1\n\tfxtod\t%f0, %f10\n\tfxtod\t%f4, %f14\n\tldx\t[%sp+2231], %i0\n\tldx\t[%sp+2223], %g5\n\tldx\t[%sp+2247], %g3\n\tand\t%o2, %o7, %o2\n\tfxtod\t%f2, %f8\n\tfmuld\t%f10, %f10, %f0\n\tstx\t%o2, [%sp+2263]\n\tfmuld\t%f10, %f14, %f6\n\tldx\t[%g1-8], %o2\n\tfmuld\t%f10, %f8, %f12\n\tfdtox\t%f0, %f2\n\tldd\t[%sp+2279], %f0\n\tfmuld\t%f8, %f8, %f4\n\tfaddd\t%f6, %f6, %f6\n\tfmuld\t%f14, %f14, %f10\n\tstd\t%f2, [%sp+2255]\n\tsllx\t%g4, 20, %g4\n\tldd\t[%sp+2271], %f2\n\tfmuld\t%f8, %f14, %f8\n\tsllx\t%i0, 22, %i1\n\tfdtox\t%f12, %f12\n\tstd\t%f12, [%sp+2247]\n\tsllx\t%g5, 42, %i0\n\tadd\t%o1, %i1, %o1\n\tfaddd\t%f4, %f6, %f6\n\tldd\t[%sp+2263], %f4\n\tadd\t%o1, %i0, %o1\n\tadd\t%g3, %g4, %g3\n\tfdtox\t%f10, %f10\n\tstd\t%f10, [%sp+2239]\n\tsrlx\t%o1, 42, %g4\n\tand\t%g5, %o4, %i0\n\tfdtox\t%f8, %f8\n\tstd\t%f8, [%sp+2231]\n\tsrlx\t%g5, 22, %g5\n\tsub\t%g4, %i0, %g4\n\tfdtox\t%f6, %f6\n\tstd\t%f6, [%sp+2223]\n\tsrlx\t%g4, 63, %g4\n\tadd\t%g3, %g5, %g3\n\tadd\t%g3, %g4, %g3\n\tstx\t%o1, [%o0-16]\n\tsrlx\t%o2, 42, %o1\n\tbl,pt\t%xcc, .Loop\n\tstx\t%g3, [%o0-8]\n\n\tstx\t%o1, [%sp+2279]\n\tsrlx\t%o2, 21, %o1\n\tfxtod\t%f0, %f16\n\tldx\t[%sp+2223], %g3\n\tfxtod\t%f4, %f6\n\tand\t%o2, %o7, %o3\n\tstx\t%o3, [%sp+2263]\n\tfxtod\t%f2, %f4\n\tand\t%o1, %o7, %o1\n\tldx\t[%sp+2231], %o2\n\tsllx\t%g3, 42, %g4\n\tfmuld\t%f16, %f16, %f14\n\tstx\t%o1, [%sp+2271]\n\tfmuld\t%f16, %f6, %f8\n\tadd\t%o0, 48, %o0\n\tldx\t[%sp+2239], %o1\n\tsllx\t%o2, 22, %o2\n\tfmuld\t%f4, %f4, %f10\n\tldx\t[%sp+2255], %o3\n\tfdtox\t%f14, %f14\n\tfmuld\t%f4, %f6, %f2\n\tstd\t%f14, [%sp+2255]\n\tfaddd\t%f8, %f8, %f12\n\tadd\t%o1, %o2, %o2\n\tfmuld\t%f16, %f4, %f4\n\tldd\t[%sp+2279], %f0\n\tsllx\t%o3, 20, %g5\n\tadd\t%o2, %g4, %o2\n\tfmuld\t%f6, %f6, %f6\n\tsrlx\t%o2, 42, %o3\n\tand\t%g3, %o4, %g4\n\tsrlx\t%g3, 22, %g3\n\tfaddd\t%f10, %f12, %f16\n\tldd\t[%sp+2271], %f12\n\tldd\t[%sp+2263], %f8\n\tfxtod\t%f0, %f0\n\tsub\t%o3, %g4, %o3\n\tldx\t[%sp+2247], %o1\n\tsrlx\t%o3, 63, %o3\n\tfdtox\t%f2, %f10\n\tfxtod\t%f8, %f8\n\tstd\t%f10, [%sp+2231]\n\tfdtox\t%f6, %f6\n\tstd\t%f6, [%sp+2239]\n\tadd\t%o1, %g5, %o1\n\tfmuld\t%f0, %f0, %f2\n\tfdtox\t%f16, %f16\n\tstd\t%f16, [%sp+2223]\n\tadd\t%o1, %g3, %o1\n\tfdtox\t%f4, %f4\n\tstd\t%f4, [%sp+2247]\n\tfmuld\t%f0, %f8, %f10\n\tfxtod\t%f12, %f12\n\tadd\t%o1, %o3, %o1\n\tstx\t%o2, [%o0-48]\n\tfmuld\t%f8, %f8, %f6\n\tstx\t%o1, [%o0-40]\n\tfdtox\t%f2, %f2\n\tldx\t[%sp+2231], %o2\n\tfaddd\t%f10, %f10, %f10\n\tldx\t[%sp+2223], %g3\n\tfmuld\t%f12, %f12, %f4\n\tfdtox\t%f6, %f6\n\tldx\t[%sp+2239], %o1\n\tsllx\t%o2, 22, %o2\n\tfmuld\t%f12, %f8, %f8\n\tsllx\t%g3, 42, %g5\n\tldx\t[%sp+2255], %o3\n\tfmuld\t%f0, %f12, %f0\n\tadd\t%o1, %o2, %o2\n\tfaddd\t%f4, %f10, %f4\n\tldx\t[%sp+2247], %o1\n\tadd\t%o2, %g5, %o2\n\tand\t%g3, %o4, %g4\n\tfdtox\t%f8, %f8\n\tsllx\t%o3, 20, %g5\n\tstd\t%f8, [%sp+2231]\n\tfdtox\t%f0, %f0\n\tsrlx\t%o2, 42, %o3\n\tadd\t%o1, %g5, %o1\n\tfdtox\t%f4, %f4\n\tsrlx\t%g3, 22, %g3\n\tsub\t%o3, %g4, %o3\n\tstd\t%f6, [%sp+2239]\n\tstd\t%f4, [%sp+2223]\n\tsrlx\t%o3, 63, %o3\n\tadd\t%o1, %g3, %o1\n\tstd\t%f2, [%sp+2255]\n\tadd\t%o1, %o3, %o1\n\tstd\t%f0, [%sp+2247]\n\tstx\t%o2, [%o0-32]\n\tstx\t%o1, [%o0-24]\n\tldx\t[%sp+2231], %o2\n\tldx\t[%sp+2223], %o3\n\tldx\t[%sp+2239], %o1\n\tsllx\t%o2, 22, %o2\n\tsllx\t%o3, 42, %g5\n\tldx\t[%sp+2255], %g4\n\tand\t%o3, %o4, %g3\n\tadd\t%o1, %o2, %o2\n\tldx\t[%sp+2247], %o1\n\tadd\t%o2, %g5, %o2\n\tstx\t%o2, [%o0-16]\n\tsllx\t%g4, 20, %g4\n\tsrlx\t%o2, 42, %o2\n\tadd\t%o1, %g4, %o1\n\tsrlx\t%o3, 22, %o3\n\tsub\t%o2, %g3, %o2\n\tsrlx\t%o2, 63, %o2\n\tadd\t%o1, %o3, %o1\n\tadd\t%o1, %o2, %o1\n\tstx\t%o1, [%o0-8]\n\tret\n\trestore\t%g0, %g0, %g0\n.Lsmall:\n\tldx\t[%g1], %o2\n.Loop0:\n\tand\t%o2, %o7, %o1\n\tstx\t%o1, [%sp+2263]\n\tadd\t%g2, 1, %g2\n\tsrlx\t%o2, 21, %o1\n\tadd\t%g1, 8, %g1\n\tsrlx\t%o2, 42, %o2\n\tstx\t%o2, [%sp+2279]\n\tand\t%o1, %o7, %o1\n\tldd\t[%sp+2263], %f0\n\tcmp\t%g2, %i2\n\tstx\t%o1, [%sp+2271]\n\tfxtod\t%f0, %f6\n\tldd\t[%sp+2279], %f0\n\tldd\t[%sp+2271], %f4\n\tfxtod\t%f0, %f2\n\tfmuld\t%f6, %f6, %f0\n\tfxtod\t%f4, %f10\n\tfmuld\t%f2, %f6, %f4\n\tfdtox\t%f0, %f0\n\tstd\t%f0, [%sp+2239]\n\tfmuld\t%f10, %f6, %f8\n\tfmuld\t%f10, %f10, %f0\n\tfaddd\t%f4, %f4, %f6\n\tfmuld\t%f2, %f2, %f4\n\tfdtox\t%f8, %f8\n\tstd\t%f8, [%sp+2231]\n\tfmuld\t%f2, %f10, %f2\n\tfaddd\t%f0, %f6, %f0\n\tfdtox\t%f4, %f4\n\tstd\t%f4, [%sp+2255]\n\tfdtox\t%f2, %f2\n\tstd\t%f2, [%sp+2247]\n\tfdtox\t%f0, %f0\n\tstd\t%f0, [%sp+2223]\n\tldx\t[%sp+2239], %o1\n\tldx\t[%sp+2255], %g4\n\tldx\t[%sp+2231], %o2\n\tsllx\t%g4, 20, %g4\n\tldx\t[%sp+2223], %o3\n\tsllx\t%o2, 22, %o2\n\tsllx\t%o3, 42, %g5\n\tadd\t%o1, %o2, %o2\n\tldx\t[%sp+2247], %o1\n\tadd\t%o2, %g5, %o2\n\tstx\t%o2, [%o0]\n\tand\t%o3, %o4, %g3\n\tsrlx\t%o2, 42, %o2\n\tadd\t%o1, %g4, %o1\n\tsrlx\t%o3, 22, %o3\n\tsub\t%o2, %g3, %o2\n\tsrlx\t%o2, 63, %o2\n\tadd\t%o1, %o3, %o1\n\tadd\t%o1, %o2, %o1\n\tstx\t%o1, [%o0+8]\n\tadd\t%o0, 16, %o0\n\tbl,a,pt\t%xcc, .Loop0\n\tldx\t[%g1], %o2\n\tret\n\trestore\t%g0, %g0, %g0\nEPILOGUE(mpn_sqr_diagonal)\n"
  },
  {
    "path": "mpn/sparc64/sub_n.asm",
    "content": "dnl  SPARC v9 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and\ndnl  store difference in a third limb vector.\n\ndnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\t   cycles/limb\nC UltraSPARC 1&2:     4\nC UltraSPARC 3:\t      4.5\n\nC Compute carry-out from the most significant bits of u,v, and r, where\nC r=u-v-carry_in, using logic operations.\n\nC This code runs at 4 cycles/limb on UltraSPARC 1 and 2.  It has a 4 insn\nC recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.\nC Therefore, it seems futile to try to optimize this any further...\n\nC INPUT PARAMETERS\ndefine(`rp',`%i0')\ndefine(`up',`%i1')\ndefine(`vp',`%i2')\ndefine(`n',`%i3')\n\ndefine(`u0',`%l0')\ndefine(`u1',`%l2')\ndefine(`u2',`%l4')\ndefine(`u3',`%l6')\ndefine(`v0',`%l1')\ndefine(`v1',`%l3')\ndefine(`v2',`%l5')\ndefine(`v3',`%l7')\n\ndefine(`cy',`%i4')\n\ndefine(`fanop',`fitod %f0,%f2')\t\tdnl  A quasi nop running in the FA pipe\ndefine(`fmnop',`fmuld %f0,%f0,%f4')\tdnl  A quasi nop running in the FM pipe\n\nASM_START()\n\tREGISTER(%g2,#scratch)\n\tREGISTER(%g3,#scratch)\nPROLOGUE(mpn_sub_n)\n\tsave\t%sp,-160,%sp\n\n\tfitod\t%f0,%f0\t\tC make sure f0 contains small, quiet number\n\tsubcc\tn,4,%g0\n\tbl,pn\t%icc,.Loop0\n\tmov\t0,cy\n\n\tldx\t[up+0],u0\n\tldx\t[vp+0],v0\n\tadd\tup,32,up\n\tldx\t[up-24],u1\n\tldx\t[vp+8],v1\n\tadd\tvp,32,vp\n\tldx\t[up-16],u2\n\tldx\t[vp-16],v2\n\tldx\t[up-8],u3\n\tldx\t[vp-8],v3\n\tsubcc\tn,8,n\n\tsub\tu0,v0,%g1\tC main sub\n\tsub\t%g1,cy,%g4\tC carry sub\n\torn\tu0,v0,%g2\n\tbl,pn\t%icc,.Lend4567\n\tfanop\n\tb,a\t.Loop\n\n\t.align\t16\nC START MAIN LOOP\n.Loop:\torn\t%g4,%g2,%g2\n\tandn\tu0,v0,%g3\n\tldx\t[up+0],u0\n\tfanop\nC --\n\tandn\t%g2,%g3,%g2\n\tldx\t[vp+0],v0\n\tadd\tup,32,up\n\tfanop\nC --\n\tsrlx\t%g2,63,cy\n\tsub\tu1,v1,%g1\n\tstx\t%g4,[rp+0]\n\tfanop\nC --\n\tsub\t%g1,cy,%g4\n\torn\tu1,v1,%g2\n\tfmnop\n\tfanop\nC --\n\torn\t%g4,%g2,%g2\n\tandn\tu1,v1,%g3\n\tldx\t[up-24],u1\n\tfanop\nC --\n\tandn\t%g2,%g3,%g2\n\tldx\t[vp+8],v1\n\tadd\tvp,32,vp\n\tfanop\nC --\n\tsrlx\t%g2,63,cy\n\tsub\tu2,v2,%g1\n\tstx\t%g4,[rp+8]\n\tfanop\nC --\n\tsub\t%g1,cy,%g4\n\torn\tu2,v2,%g2\n\tfmnop\n\tfanop\nC --\n\torn\t%g4,%g2,%g2\n\tandn\tu2,v2,%g3\n\tldx\t[up-16],u2\n\tfanop\nC --\n\tandn\t%g2,%g3,%g2\n\tldx\t[vp-16],v2\n\tadd\trp,32,rp\n\tfanop\nC --\n\tsrlx\t%g2,63,cy\n\tsub\tu3,v3,%g1\n\tstx\t%g4,[rp-16]\n\tfanop\nC --\n\tsub\t%g1,cy,%g4\n\torn\tu3,v3,%g2\n\tfmnop\n\tfanop\nC --\n\torn\t%g4,%g2,%g2\n\tandn\tu3,v3,%g3\n\tldx\t[up-8],u3\n\tfanop\nC --\n\tandn\t%g2,%g3,%g2\n\tsubcc\tn,4,n\n\tldx\t[vp-8],v3\n\tfanop\nC --\n\tsrlx\t%g2,63,cy\n\tsub\tu0,v0,%g1\n\tstx\t%g4,[rp-8]\n\tfanop\nC --\n\tsub\t%g1,cy,%g4\n\torn\tu0,v0,%g2\n\tbge,pt\t%icc,.Loop\n\tfanop\nC END MAIN LOOP\n.Lend4567:\n\torn\t%g4,%g2,%g2\n\tandn\tu0,v0,%g3\n\tandn\t%g2,%g3,%g2\n\tsrlx\t%g2,63,cy\n\tsub\tu1,v1,%g1\n\tstx\t%g4,[rp+0]\n\tsub\t%g1,cy,%g4\n\torn\tu1,v1,%g2\n\torn\t%g4,%g2,%g2\n\tandn\tu1,v1,%g3\n\tandn\t%g2,%g3,%g2\n\tsrlx\t%g2,63,cy\n\tsub\tu2,v2,%g1\n\tstx\t%g4,[rp+8]\n\tsub\t%g1,cy,%g4\n\torn\tu2,v2,%g2\n\torn\t%g4,%g2,%g2\n\tandn\tu2,v2,%g3\n\tandn\t%g2,%g3,%g2\n\tadd\trp,32,rp\n\tsrlx\t%g2,63,cy\n\tsub\tu3,v3,%g1\n\tstx\t%g4,[rp-16]\n\tsub\t%g1,cy,%g4\n\torn\tu3,v3,%g2\n\torn\t%g4,%g2,%g2\n\tandn\tu3,v3,%g3\n\tandn\t%g2,%g3,%g2\n\tsrlx\t%g2,63,cy\n\tstx\t%g4,[rp-8]\n\n\taddcc\tn,4,n\n\tbz,pn\t%icc,.Lret\n\tfanop\n\n.Loop0:\tldx\t[up],u0\n\tadd\tup,8,up\n\tldx\t[vp],v0\n\tadd\tvp,8,vp\n\tadd\trp,8,rp\n\tsubcc\tn,1,n\n\tsub\tu0,v0,%g1\n\torn\tu0,v0,%g2\n\tsub\t%g1,cy,%g4\n\tandn\tu0,v0,%g3\n\torn\t%g4,%g2,%g2\n\tstx\t%g4,[rp-8]\n\tandn\t%g2,%g3,%g2\n\tbnz,pt\t%icc,.Loop0\n\tsrlx\t%g2,63,cy\n\n.Lret:\tmov\tcy,%i0\n\tret\n\trestore\nEPILOGUE(mpn_sub_n)\n"
  },
  {
    "path": "mpn/sparc64/submul_1.asm",
    "content": "dnl  SPARC v9 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and\ndnl  subtract the result from a second limb vector.\n\ndnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t\t   cycles/limb\nC UltraSPARC 1&2:     18\nC UltraSPARC 3:\t      23\n\nC INPUT PARAMETERS\nC rp\ti0\nC up\ti1\nC n\ti2\nC v\ti3\n\nASM_START()\n\tREGISTER(%g2,#scratch)\n\nPROLOGUE(mpn_submul_1)\n\tsave\t%sp,-176,%sp\n\n\tsllx\t%i2, 3, %g2\n\tor\t%g0, %i1, %o1\n\tadd\t%g2, 15, %o0\n\tor\t%g0, %i2, %o2\n\tand\t%o0, -16, %o0\n\tsub\t%sp, %o0, %sp\n\tadd\t%sp, 2223, %o0\n\tor\t%g0, %o0, %l0\n\tcall\tmpn_mul_1\n\tor\t%g0, %i3, %o3\n\tor\t%g0, %o0, %l1\t\tC preserve carry value from mpn_mul_1\n\tor\t%g0, %i0, %o0\n\tor\t%g0, %i0, %o1\n\tor\t%g0, %l0, %o2\n\tcall\tmpn_sub_n\n\tor\t%g0, %i2, %o3\n\tret\n\trestore\t%l1, %o0, %o0\t\tC sum carry values\nEPILOGUE(mpn_submul_1)\n"
  },
  {
    "path": "mpn/x86_64/add_err1_n.asm",
    "content": "dnl  AMD64 mpn_add_err1_n\n\ndnl  Copyright (C) 2009, David Harvey\n\ndnl  All rights reserved.\n\ndnl  Redistribution and use in source and binary forms, with or without\ndnl  modification, are permitted provided that the following conditions are\ndnl  met:\n\ndnl  1. Redistributions of source code must retain the above copyright notice,\ndnl  this list of conditions and the following disclaimer.\n\ndnl  2. Redistributions in binary form must reproduce the above copyright\ndnl  notice, this list of conditions and the following disclaimer in the\ndnl  documentation and/or other materials provided with the distribution.\n\ndnl  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\ndnl  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\ndnl  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\ndnl  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\ndnl  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\ndnl  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\ndnl  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\ndnl  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\ndnl  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\ndnl  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\ndnl  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n\ninclude(`../config.m4')\n\nC\t     cycles/limb\nC K8,K9:\t 3.166\nC K10:\t\t ?\nC P4:\t\t ?\nC P6-15 (Core2): ?\nC P6-28 (Atom):\t ?\n\nC ret mpn_add_err1(mp_ptr rp,mp_ptr up,mp_ptr vp,mp_ptr ep,mp_ptr_t yp,mp_size_t n,mp_limb_t cy)\nC rax                    rdi,      rsi,      rdx,      rcx,         r8           r9       8(rsp)=>r10\n\nC INPUT PARAMETERS\ndefine(`rp',\t`%rdi')\ndefine(`up',\t`%rsi')\ndefine(`vp',\t`%rdx')\ndefine(`ep',\t`%rcx')\ndefine(`yp',\t`%r8')\ndefine(`n',\t`%r9')\ndefine(`cy_param',\t`8(%rsp)')\ndefine(`el',\t`%rbx')\ndefine(`eh',\t`%rbp')\ndefine(`t0',\t`%r10')\ndefine(`t1',\t`%r11')\ndefine(`w',\t`%r12')\n\n\nASM_START()\n\tTEXT\n\tALIGN(16)\nPROLOGUE(mpn_add_err1_n)\n\tmov\tcy_param, %rax    C cy\n\n\tpush\t%rbx\n\tpush\t%rbp\n\tpush\t%r12\n\n\txor el, el              C zero el, eh\n\txor eh, eh\n\tlea\t(rp,n,8), rp     C rp += n, up += n, vp += n\n\tlea\t(up,n,8), up\n\tlea\t(vp,n,8), vp\n\n\ttest\t$1, n            C if n is odd goto L(odd)\n\tjnz\tL(odd)\n\nL(even):\t\n\tlea\t-8(yp,n,8), yp   C yp += n - 1\n\tneg\tn                C { n = -n }\n\tjmp\tL(top)\n\n\tALIGN(16)\nL(odd):                           C n is odd, do extra iteration\n\tlea\t-16(yp,n,8), yp     C yp += n - 2\n\tneg\tn                   C { n = -n }\n\tshr\t$1, %rax            C rp[0] = up[0] + vp[0] + (cy&1)\n\tmov\t(up,n,8), w\n\tadc\t(vp,n,8), w\n\tcmovc\t8(yp), el           C if carry el = *yp\n\tmov\tw, (rp,n,8)\n\tsetc\t%al                 C store carry\n\tinc\tn                   C n++\n\tjz\tL(end)              C goto end if we are done\n\n\tALIGN(16)\nL(top):\n       mov     (up,n,8), w     C rp[n] = up[n] + vp[n] + carry\n\tshr     $1, %rax        C { restore carry }\n\tadc     (vp,n,8), w\n\tmov     $0, t1          C initialise t1\n\tmov     w, (rp,n,8)\n\tmov     $0, t0          C initialise t0\n\tmov     8(up,n,8), w    C rp[n+1] = up[n+1] + vp[n+1] + carry\n\tcmovc   (yp), t0        C if carry t0 = yp\n\tadc     8(vp,n,8), w\n\tcmovc   -8(yp), t1      C if next carry t1 = *(yp-1)\n\tsetc    %al             C { save carry }\n\tadd     t0, el          C (eh:el) += carry*yp limb\n\tadc     $0, eh\n\tadd     t1, el          C (eh:el) += next carry*next yp limb\n\tmov     w, 8(rp,n,8)\n\tadc     $0, eh\n\tadd     $2, n           C n += 2\n\tlea     -16(yp), yp     C yp -= 2\n\tjnz     L(top)          C if not done goto top\n\nL(end):\t\n\tmov\tel, (ep)         C write out (eh:el)\n\tmov\teh, 8(ep)\n\n\tpop\t%r12\n\tpop\t%rbp\n\tpop\t%rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/add_err2_n.asm",
    "content": "dnl  AMD64 mpn_add_err2_n\n\ndnl  Copyright (C) 2009, David Harvey\n\ndnl  All rights reserved.\n\ndnl  Redistribution and use in source and binary forms, with or without\ndnl  modification, are permitted provided that the following conditions are\ndnl  met:\n\ndnl  1. Redistributions of source code must retain the above copyright notice,\ndnl  this list of conditions and the following disclaimer.\n\ndnl  2. Redistributions in binary form must reproduce the above copyright\ndnl  notice, this list of conditions and the following disclaimer in the\ndnl  documentation and/or other materials provided with the distribution.\n\ndnl  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\ndnl  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\ndnl  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\ndnl  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\ndnl  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\ndnl  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\ndnl  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\ndnl  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\ndnl  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\ndnl  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\ndnl  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n\ninclude(`../config.m4')\n\nC\t     cycles/limb\nC K8,K9:\t 4.5\nC K10:\t\t ?\nC P4:\t\t ?\nC P6-15 (Core2): ?\nC P6-28 (Atom):\t ?\n\nC\nC mp_limb_t mpn_add_err2_n (* rp,* up, * vp, * ep, * yp1, * yp2, n, cy)\nC\n\nC INPUT PARAMETERS\ndefine(`rp',\t`%rdi')\ndefine(`up',\t`%rsi')\ndefine(`vp',\t`%rdx')\ndefine(`ep',\t`%rcx')\ndefine(`yp1',\t`%r8')\ndefine(`yp2',   `%r9')\ndefine(`n_param',     `8(%rsp)')\ndefine(`cy_param',    `16(%rsp)')\n\ndefine(`cy1',   `%r14')\ndefine(`cy2',   `%rax')\n\t\ndefine(`n',     `%r10')\n\ndefine(`w',     `%rbx')\ndefine(`e1l',\t`%rbp')\ndefine(`e1h',\t`%r11')\ndefine(`e2l',\t`%r12')\ndefine(`e2h',\t`%r13')\n\nASM_START()\n\tTEXT\n\tALIGN(16)\nPROLOGUE(mpn_add_err2_n)\n\tmov\tcy_param, cy2    C cy2\n\tmov\tn_param, n       C n\n\n\tpush\t%rbx\n\tpush\t%rbp\n\tpush\t%r12\n\tpush\t%r13\n\tpush\t%r14\n\n\txor e1l, e1l        C zero e1l, e1h, e2l, e2h\n\txor e1h, e1h\n\txor e2l, e2l\n\txor e2h, e2h\n\n\tsub\typ1, yp2           C yp2 -= yp1\n\n\tlea\t(rp,n,8), rp       C rp += n, up += n, vp += n\n\tlea\t(up,n,8), up\n\tlea\t(vp,n,8), vp\n\n\ttest\t$1, n              C if n is odd goto L(odd)\n\tjnz\tL(odd)\n\n\tlea\t-8(yp1,n,8), yp1   C { yp1 += n - 1 }\n\tneg\tn                  C { n = -n }\n\tjmp\tL(top)\n\n\tALIGN(16)\nL(odd):                          C n is odd, do extra iteration\n\tlea\t-16(yp1,n,8), yp1  C yp1 += n - 2\n\tneg\tn                  C { n = -n }\n\tshr\t$1, cy2            C rp[0] = up[0] + vp[0] + (cy2&1)\n\tmov\t(up,n,8), w\n\tadc\t(vp,n,8), w\n\tcmovc\t8(yp1), e1l        C if carry2 el1 = *(yp1+1)\n\tcmovc\t8(yp1,yp2), e2l    C if carry2 e2l = *(yp2+1)\n\tmov\tw, (rp,n,8)\n\tsbb\tcy2, cy2           C move carry2 into cy2\n\tinc\tn                  C n++\n\tjz\tL(end)             C goto end if we are done\n\t\n\tALIGN(16)\nL(top):\n       mov     (up,n,8), w\n\tshr     $1, cy2         C restore carry2\n\tadc     (vp,n,8), w\n\tmov     w, (rp,n,8)     C rp[n] = up[n] + vp[n] + carry2\n\tsbb     cy1, cy1        C generate mask, preserve CF\n\n\tmov     8(up,n,8), w    C rp[n] = up[n+1] + vp[n+1] + carry1\n\tadc     8(vp,n,8), w\n\tmov     w, 8(rp,n,8)\n\tsbb     cy2, cy2        C generate mask, preserve CF\n\n\tmov     (yp1), w\t   C (e1h:e1l) += cy1 * yp1 limb\n\tand     cy1, w\n\tadd     w, e1l\n\tadc     $0, e1h\n\n\tand     (yp1,yp2), cy1\tC (e2h:e2l) += cy1 * yp2 limb\n\tadd     cy1, e2l\n\tadc     $0, e2h\n\n\tmov     -8(yp1), w\t       C (e1h:e1l) += cy2 * next yp1 limb\n\tand     cy2, w\n\tadd     w, e1l\n\tadc     $0, e1h\n\n\tmov     -8(yp1,yp2), w\tC (e2h:e2l) += cy2 * next yp2 limb\n\tand     cy2, w\n\tadd     w, e2l\n\tadc     $0, e2h\n\n\tadd     $2, n               C n += 2\n\tlea     -16(yp1), yp1       C yp1 -= 2\n\tjnz     L(top)              C if not done goto top\nL(end):\n\n\tmov\te1l, (ep)            C write out e1l, e1h, e2l, e2h\n\tmov\te1h, 8(ep)\n\tmov\te2l, 16(ep)\n\tmov\te2h, 24(ep)\n\n\tand\t$1, %eax\tC return carry\n\n\tpop\t%r14\n\tpop\t%r13\n\tpop\t%r12\n\tpop\t%rbp\n\tpop\t%rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/atom/add_n.as",
    "content": "\n;  AMD64 mpn_add_n\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)+(rdx,rcx)\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_add_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     loop1\n\tmov     r11, [rsi]\n\tadd     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend1:\n\tadc     rax, rax\n\tret\n\talign   8\nloop1\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tadc     r11, [rdx]\n\tadc     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tadc     r9, [rdx-16]\n\tadc     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     loop1\n\tinc     rax\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi]\n\tadc     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend:\n\tadc     rax, rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/addlsh1_n.as",
    "content": "\n;  AMD64 mpn_addlsh1_n \n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi, rcx) = (rsi, rcx) + (rdx, rcx)<<1\n;\trax = carry\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_addlsh1_n\n\tlea     rsi, [rsi+rcx*8]\n\tlea     rdx, [rdx+rcx*8]\n\tlea     rdi, [rdi+rcx*8]\n\tneg     rcx\n\txor     r9, r9\n\txor     rax, rax\n\ttest    rcx, 3\n\tjz      next\nlp1:\n\tmov     r10, [rdx+rcx*8]\n\tadd     r9, 1\n\tadc     r10, r10\n\tsbb     r9, r9\n\tadd     rax, 1\n\tadc     r10, [rsi+rcx*8]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tinc     rcx\n\ttest    rcx, 3\n\tjnz     lp1\nnext:\n\tcmp     rcx, 0\n\tjz      end\n\tpush    rbx\n\talign   16\nlp:\n\tmov     r10, [rdx+rcx*8]\n\tmov     rbx, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8+16]\n\tmov     r8, [rdx+rcx*8+24]\n\tadd     r9, 1\n\tadc     r10, r10\n\tadc     rbx, rbx\n\tadc     r11, r11\n\tadc     r8, r8\n\tsbb     r9, r9\n\tadd     rax, 1\n\tadc     r10, [rsi+rcx*8]\n\tadc     rbx, [rsi+rcx*8+8]\n\tadc     r11, [rsi+rcx*8+16]\n\tadc     r8, [rsi+rcx*8+24]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tmov     [rdi+rcx*8+8], rbx\n\tmov     [rdi+rcx*8+16], r11\n\tmov     [rdi+rcx*8+24], r8\n\tadd     rcx, 4\n\tjnz     lp\n\tpop     rbx\nend:\n\tadd     rax, r9\n\tneg     rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/addlsh_n.asm",
    "content": "dnl  AMD64 mpn_addlsh_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC    carry+(xp,n)= (yp,n) + (zp,n)<<c with carry in ci\nC mp_limb_t\tmpn_addlsh_nc(mp_ptr xp, mp_srcptr yp,mp_srcptr zp,mp_size_t n,unsigned int c,mp_limb_t ci)\nC xp in rdi\typ in rsi\tzp in rdx\tn  in rcx\tc  in r8\tci in r9\n\nMULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc)\n\nASM_START()\nPROLOGUE(mpn_addlsh_n)\nxor %r9,%r9\nEPILOGUE()\nPROLOGUE(mpn_addlsh_nc)\nmov\t%rcx,%r10\nlea\t(%rdi,%r10,8),%rdi\nlea\t(%rsi,%r10,8),%rsi\nlea\t(%rdx,%r10,8),%rdx\nmov\t%r8,%rcx\nneg\t%rcx\nshr\t%cl,%r9\nneg\t%r10\nxor\t%rax,%rax\ntest\t$3,%r10\njz\tnext\nlp:\n\tmov\t(%rdx,%r10,8),%r8\n\tmov\t%r8,%r11\n\tneg\t%rcx\n\tshl\t%cl,%r8\n\tneg\t%rcx\n\tshr\t%cl,%r11\n\tor\t%r9,%r8\n\tmov\t%r11,%r9\n\tadd\t$1,%rax\n\tadc\t(%rsi,%r10,8),%r8\n\tsbb\t%rax,%rax\n\tmov\t%r8,(%rdi,%r10,8)\n\tinc\t%r10\n\ttest\t$3,%r10\n\tjnz\tlp\nnext:\ncmp\t$0,%r10\njz\tend\npush\t%rbx\npush\t%rbp\npush\t%r12\npush\t%r13\npush\t%r14\npush\t%r15\nALIGN(16)\nloop:\n\tmov\t(%rdx,%r10,8),%r8\n\tmov\t8(%rdx,%r10,8),%rbp\n\tmov\t16(%rdx,%r10,8),%rbx\n\tmov\t24(%rdx,%r10,8),%r12\n\tmov\t%r8,%r11\n\tmov\t%rbp,%r13\n\tmov\t%rbx,%r14\n\tmov\t%r12,%r15\n\tneg\t%rcx\n\tshl\t%cl,%r8\n\tshl\t%cl,%rbp\n\tshl\t%cl,%rbx\n\tshl\t%cl,%r12\n\tneg\t%rcx\n\tshr\t%cl,%r11\n\tshr\t%cl,%r13\n\tshr\t%cl,%r14\n\tshr\t%cl,%r15\n\tor\t%r9,%r8\n\tor\t%r11,%rbp\n\tor\t%r13,%rbx\n\tor\t%r14,%r12\n\tmov\t%r15,%r9\n\tadd\t$1,%rax\n\tadc\t(%rsi,%r10,8),%r8\n\tadc\t8(%rsi,%r10,8),%rbp\n\tadc\t16(%rsi,%r10,8),%rbx\n\tadc\t24(%rsi,%r10,8),%r12\n\tsbb\t%rax,%rax\n\tmov\t%r8,(%rdi,%r10,8)\n\tmov\t%rbp,8(%rdi,%r10,8)\n\tmov\t%rbx,16(%rdi,%r10,8)\n\tmov\t%r12,24(%rdi,%r10,8)\n\tadd\t$4,%r10\n\tjnz\tloop\npop\t%r15\npop\t%r14\npop\t%r13\npop\t%r12\npop\t%rbp\npop\t%rbx\nend:\nneg\t%rax\nadd\t%r9,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/atom/addmul_1.as",
    "content": "; **************************************************************************\n;  Intel64 mpn_addmul_1 -- Multiply a limb vector with a limb and\n;  add the result to a second limb vector.\n;\n;  Copyright (C) 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  This program is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2 of the License, or\n;  (at your option) any later version.\n;\n;  This program is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n;  GNU General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with this program; if not, write to the Free Software Foundation,\n;  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n;\n; **************************************************************************\n;\n;\n; CREDITS\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; *********************************************************************\n\n\n; With a 4-way unroll the code has\n;\n;         \tcycles/limb\n; Hammer:           4.8\n; Woodcrest:        4.6\n;\n; With increased unrolling, it appears to converge to 4 cycles/limb\n; on Intel Core 2 machines.  I believe that this is optimal, however\n; it requires such absurd unrolling that it becomes unusable for all\n; but the largest inputs.  A 4-way unroll seems like a good balance\n; to me because then commonly used input sizes (e.g. 1024bit Public\n; keys) still benifit from the speed up.\n\n;\n; This is just a check to see if we are in my code testing sandbox\n; or if we are actually in the GMP source tree\n\n%include 'yasm_mac.inc'\n\n\n; *********************************************************************\n; *********************************************************************\n;\n; Here are the important macro parameters for the code\n;\n;      BpL is Bytes per Limb (8 since this is 64bit code)\n;\n;\tUNROLL_SIZE is a power of 2 for which we unroll the code.\n;                  possible values are 2,4,8,15,..., 256.  A reasonable\n;                  value is probably 4.  If really large inputs\n;                  are expected, then 16 is probably good.  Larger\n;                  values are really only useful for flashy\n;                  benchmarks and testing asymptotic behavior.\n;\n;      THRESHOLD is the minimum number of limbs needed before we bother\n;                using the complicated loop.  A reasonable value is\n;                2*UNROLL_SIZE + 6\n;\n; *********************************************************************\n; *********************************************************************\n%define\tBpL\t8\n%define\tUNROLL_SIZE\t4\n%define\tUNROLL_MASK\tUNROLL_SIZE-1\n%define\tTHRESHOLD\t2*UNROLL_SIZE+6\n\n; Here is a convenient Macro for addressing\n; memory.  Entries of the form\n;\n;      ADDR(ptr,index,displacement)\n;\n; get converted to\n;\n;      [displacement*BpL + ptr + index*BpL]\n;\n%define\tADDR(a,b,c)\t[c*BpL+a+b*BpL]\n\n\n; Register\tUsage\n; --------\t-----\n; rax\t\tlow word from mul\n; rbx*\n; rcx\t\ts2limb\n; rdx\t\thigh word from mul\n; rsi\t\ts1p\n; rdi\t\trp\n; rbp*\t\tBase Pointer\n; rsp*\t\tStack Pointer\n; r8\t\tA_x\n; r9\t\tA_y\n; r10\t\tA_z\n; r11\t\tB_x\n; r12*\t\tB_y\n; r13*\t\tB_z\n; r14*\t\ttemp\n; r15*\t\tindex\n; \n; * indicates that the register must be\n; preserved for the caller.\n%define\ts2limb\trcx\n%define\ts1p\trsi\n%define\trp\trdi\n%define\tA_x\tr8\n%define\tA_y\tr9\n%define\tA_z\tr10\n%define\tB_x\tr11\n%define\tB_y\tr12\n%define\tB_z\tr13\n%define\ttemp\tr14\n%define\tindex\tr15\n\n\t\n; INPUT PARAMETERS\n; rp\t\trdi\n; s1p\t\trsi\n; n\t\trdx\n; s2limb\trcx\n\tBITS\t64\nGLOBAL_FUNC mpn_addmul_1\n\t\t\t\t\t; Compare the limb count\n\t\t\t\t\t; with the threshold value.\n\t\t\t\t\t; If the limb count is small\n\t\t\t\t\t; we just use the small loop,\n\t\t\t\t\t; otherwise we jump to the\n\t\t\t\t\t; more complicated loop.\n\tcmp\trdx,THRESHOLD\n\tjge\tL_mpn_addmul_1_main_loop_prep\n\tmov\tr11,rdx\n\tlea\trsi,[rsi+rdx*8]\n\tlea\trdi,[rdi+rdx*8]\n\tneg\tr11\n\txor\tr8, r8\n\txor\tr10, r10\n\tjmp\tL_mpn_addmul_1_small_loop\n\t\n\talign\t16\nL_mpn_addmul_1_small_loop:\n\tmov\trax,[rsi+r11*8]\n\tmul\trcx\n\tadd\trax,[rdi+r11*8]\n\tadc\trdx,r10\n\tadd\trax,r8\n\tmov\tr8,r10\n\tmov\t[rdi+r11*8],rax\n\tadc\tr8,rdx\n\tinc\tr11\n\tjne\tL_mpn_addmul_1_small_loop\n\n\tmov\trax,r8\n\tret\n\nL_mpn_addmul_1_main_loop_prep:\n\tpush\tr15\n\tpush\tr14\n\tpush\tr13\n\tpush\tr12\n\t\t\t\t; If n is even, we need to do three\n\t\t\t\t; pre-multiplies, if n is odd we only\n\t\t\t\t; need to do two.\n\tmov\ttemp,rdx\n\tmov\tindex,0\n\tmov\tA_x,0\n\tmov\tA_y,0\n\tand\trdx,1\n\tjnz\tL_mpn_addmul_1_odd_n\n\n\t\t\t\t\t; Case n is even\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tadd\tADDR(rp,index,0),rax\n\tadc\tA_x,rdx\n\tadd\tindex,1\n\t\t\t\t\t; At this point\n\t\t\t\t\t;  temp = n (even)\n\t\t\t\t\t; index = 1\n\nL_mpn_addmul_1_odd_n:\n\t\t\t\t\t; Now\n\t\t\t\t\t; temp = n\n\t\t\t\t\t; index = 1 if n even\n\t\t\t\t\t;       = 0 if n odd\n\t\t\t\t\t;\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tmov\tA_z,ADDR(rp,index,0)\n\tadd\tA_x,rax\n\tadc\tA_y,rdx\n\n\tmov\trax,ADDR(s1p,index,1)\n\tmul\ts2limb\n\tmov\tB_z,ADDR(rp,index,1)\n\tmov\tB_x,rax\n\tmov\tB_y,rdx\n\tmov\trax,ADDR(s1p,index,2)\n\n\tadd\tindex,3\n\tlea\ts1p,ADDR(s1p,temp,-1)\n\tlea\trp,ADDR(rp,temp,-1)\n\tneg\ttemp\n\tadd\tindex,temp\n\t\t\t\t; At this point:\n\t\t\t\t; s1p   = address of last s1limb\n\t\t\t\t; rp    = address of last rplimb\n\t\t\t\t; temp  = -n\n\t\t\t\t; index = 4 - n if n even\n\t\t\t\t;       = 3 - n if n odd\n\t\t\t\t;\n\t\t\t\t; So, index is a (negative) even\n\t\t\t\t; number.\n\t\t\t\t;\n\t\t\t\t; *****************************************\n\t\t\t\t; ATTENTION:\n\t\t\t\t;\n\t\t\t\t; From here on, I will use array\n\t\t\t\t; indexing notation in the comments\n\t\t\t\t; because it is convenient.  So, I\n\t\t\t\t; will pretend that index is positive\n\t\t\t\t; because then a comment like\n\t\t\t\t;      B_z = rp[index-1]\n\t\t\t\t; is easier to read.\n\t\t\t\t; However, keep in mind that index is\n\t\t\t\t; actually a negative number indexing\n\t\t\t\t; back from the end of the array.\n\t\t\t\t; This is a common trick to remove one\n\t\t\t\t; compare operation from the main loop.\n\t\t\t\t; *****************************************\n\n\t\t\t\t;\n\t\t\t\t; Now we enter a spin-up loop the\n\t\t\t\t; will make sure that the index is\n\t\t\t\t; a multiple of UNROLL_SIZE before\n\t\t\t\t; going to our main unrolled loop.\n\tmov\ttemp,index\n\tneg\ttemp\n\tand\ttemp,UNROLL_MASK\n\tjz\tL_mpn_addmul_1_main_loop\n\tshr\ttemp,1\nL_mpn_addmul_1_main_loop_spin_up:\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tadd\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,1)\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index+1]\n\tmul\ts2limb\n\tadd\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,2)\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,-1),B_z\n\tmov\tB_z,ADDR(rp,index,1)\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n\n\tadd\tindex,2\n\tsub\ttemp,1\n\tjnz\tL_mpn_addmul_1_main_loop_spin_up\n\t\n\tjmp\tL_mpn_addmul_1_main_loop\n\t\n\talign\t16\nL_mpn_addmul_1_main_loop:\n\t\t\t\t; The code here is really the same\n\t\t\t\t; logic as the spin-up loop.  It's\n\t\t\t\t; just been unrolled.\n%assign\tunroll_index 0\n%rep\tUNROLL_SIZE/2\n\tmul\ts2limb\n\tadd\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+1))\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,(2*unroll_index-2)),A_z\n\tmov\tA_z,ADDR(rp,index,(2*unroll_index))\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\n\tmul\ts2limb\n\tadd\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+2))\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,(2*unroll_index-1)),B_z\n\tmov\tB_z,ADDR(rp,index,(2*unroll_index+1))\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n%assign\tunroll_index\tunroll_index+1\n%endrep\n\n\tadd\tindex,UNROLL_SIZE\n\tjnz\tL_mpn_addmul_1_main_loop\n\nL_mpn_addmul_1_finish:\t\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t; \n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tadd\tA_z,A_x\n\tmov\tA_x,rax\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_x,A_y\n\tadc\tB_y,0\n\tmov\tA_y,rdx\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\tadd\tB_z,B_x\n\tmov\tADDR(rp,index,-1),B_z\n\tadc\tA_x,B_y\n\tadc\tA_y,0\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t; \n\t\t\t\t; A_x = low_mul[index] + carry_in\n\t\t\t\t; A_y = high_mul[index] + CF\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t; \n\tadd\tA_z,A_x\n\tmov\tADDR(rp,index,0),A_z\n\tadc\tA_y,0\n\n\tmov\trax,A_y\n\tpop\tr12\n\tpop\tr13\n\tpop\tr14\n\tpop\tr15\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/addmul_2.as",
    "content": ";  k8 mpn_addmul_2\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include \"yasm_mac.inc\"\n\n\tGLOBAL_FUNC mpn_addmul_2\n\n; (rdi,rdx+1) = (rdi,rdx) + (rsi,rdx)*(rcx,2) return carrylimb\n\npush    rbx\npush    r12\nmov     r8, [rcx+8]\nmov     rcx, [rcx]\nmov     rbx, 4\nsub     rbx, rdx\nlea     rsi, [rsi+rdx*8-32]\nlea     rdi, [rdi+rdx*8-32]\nmov     r10, 0\nmov     rax, [rsi+rbx*8]\nmul     rcx\nmov     r12, rax\nmov     r9, rdx\ncmp     rbx, 0\njge     skiplp\nalign   16\nlp:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     lp\nskiplp:\nmov     rax, [rsi+rbx*8]\nmul     r8\ncmp     rbx, 2\nja      case0\njz      case1\njp      case2\ncase3:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase2:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase1:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase0:\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n\tpop     r12\n\tpop     rbx\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/atom/and_n.as",
    "content": "\n;  AMD64 mpn_and_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi, rcx) = (rsi, rcx) & (rdx, rcx)\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_and_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tjz      skiploop\n\talign   8\nloop1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tand     r11, [rdx]\n\tand     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tand     r9, [rdx-16]\n\tand     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     loop1\nskiploop:\n\tinc     rax\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi]\n\tand     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+8]\n\tand     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+16]\n\tand     r11, [rdx+16]\n\tmov     [rdi+16], r11\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/andn_n.as",
    "content": "\n;  AMD64 mpn_andn_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_andn_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign   16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tand     r8, [rsi+rcx*8+24]\n\tand     r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\tand     r10, [rsi+rcx*8+8]\n\tand     r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/com_n.as",
    "content": "\n;  core2 mpn_com_n\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_com_n\n\tsub     rdx, 4\n\tjc      next\n\talign 8\nloop1:\n\tmov     rax, [rsi+rdx*8+24]\n\tmov     rcx, [rsi+rdx*8+16]\n\tnot     rax\n\tnot     rcx\n\tmov     [rdi+rdx*8+24], rax\n\tmov     [rdi+rdx*8+16], rcx\n\tmov     rax, [rsi+rdx*8+8]\n\tmov     rcx, [rsi+rdx*8]\n\tnot     rax\n\tnot     rcx\n\tmov     [rdi+rdx*8+8], rax\n\tmov     [rdi+rdx*8], rcx\n\tsub     rdx, 4\n\tjae     loop1\nnext:\n\tadd     rdx, 4\n\tjz      end\n;\tCould still have potential cache-bank conflicts in this tail part\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\n\tsub     rdx, 1\n\tjz      end\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\n\tsub     rdx, 1\n\tjz      end\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/copyd.as",
    "content": "\n;  mpn_copyd\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tmpn_copyd(mp_ptr rdi,mp_ptr rsi,mp_size_t rdx)\n;\t(rdi,rdx)=(rsi,rdx)\n\n\tGLOBAL_FUNC mpn_copyd\n\tlea     rsi, [rsi+rdx*8-8]\n\tlea     rdi, [rdi+rdx*8-8]\n\tsub     rdx, 4\n\tjl      L_skiplp\n\talign   16\nL_lp:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi-8]\n\tmov     r8, [rsi-16]\n\tmov     r9, [rsi-24]\n\tmov     [rdi], rax\n\tmov     [rdi-8], rcx\n\tlea     rdi, [rdi-32]\n\tsub     rdx, 4\n\tmov     [rdi+16], r8\n\tmov     [rdi+8], r9\n\tlea     rsi, [rsi-32]\n\tjns     L_lp\nL_skiplp:\n\tadd     rdx, 2\n\tjz      L_case2\n\tjns     L_case3\n\tjp      L_case1\nL_case0:\n\tret\n\talign   16\nL_case1:\n\tmov     rax, [rsi]\n\tmov     [rdi], rax\n\tret\n\talign   16\nL_case2:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi-8]\n\tmov     [rdi], rax\n\tmov     [rdi-8], rcx\n\tret\n\talign   16\nL_case3:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi-8]\n\tmov     r8, [rsi-16]\n\tmov     [rdi], rax\n\tmov     [rdi-8], rcx\n\tmov     [rdi-16], r8\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/atom/copyi.asm",
    "content": "dnl  mpn_copyi\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_copyi(mp_ptr,mp_ptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_copyi)\ncmp $0,%rdx\t\t#needed for case n=0\njz endfn\t\t#needed for case n=0\nmov %rdi,%rax\nsub %rsi,%rax\ntest $0xF,%rax\njz aligned\ntest $0xF,%rdi\njz srcisodd\nmov $5,%rcx\nsub %rdx,%rcx\nlea -40(%rsi,%rdx,8),%rsi\nlea -40(%rdi,%rdx,8),%rdi\nmovapd (%rsi,%rcx,8),%xmm1\nmovq %xmm1,(%rdi,%rcx,8)\nadd $8,%rdi\ncmp $1,%rdx\t\t#needed for case n=1\njz endfn\t\t#needed for case n=1\ncmp $0,%rcx\njge skiplpud\nALIGN(16)\nlpud:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tadd $4,%rcx\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,-32(%rdi,%rcx,8)\n\tmovapd 32-32(%rsi,%rcx,8),%xmm1\n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16-32(%rdi,%rcx,8)\n\tjnc lpud\nskiplpud:\ncmp $2,%rcx\nja case0d\njz case1d\njp case2d\nALIGN(16)\ncase3d:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovapd 32(%rsi,%rcx,8),%xmm1  \t# top is read past\n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase2d:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovhpd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase1d:\tmovapd 16(%rsi,%rcx,8),%xmm0\t# top read past\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase0d:\tmovhpd %xmm1,(%rdi,%rcx,8)\nendfn:\tret\n\n\n\nsrcisodd:\nmov $4,%rcx\nsub %rdx,%rcx\nlea -32(%rsi,%rdx,8),%rsi\nlea -32(%rdi,%rdx,8),%rdi\n\tmovapd -8(%rsi,%rcx,8),%xmm1\n\tsub $8,%rsi\ncmp $0,%rcx\njge skiplpus\t\nALIGN(16)\nlpus:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tadd $4,%rcx\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,-32(%rdi,%rcx,8)\n\tmovapd 32-32(%rsi,%rcx,8),%xmm1\n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16-32(%rdi,%rcx,8)\n\tjnc lpus\nskiplpus:\ncmp $2,%rcx\nja case0s\njz case1s\njp case2s\nALIGN(16)\ncase3s:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovapd 32(%rsi,%rcx,8),%xmm1  \t# read past\n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase2s: movapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovhpd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase1s:\tmovapd 16(%rsi,%rcx,8),%xmm0\t# read past\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase0s:\tmovhpd %xmm1,(%rdi,%rcx,8)\n\tret\n\n\nALIGN(16)\naligned:\nsub $4,%rdx\ntest $0xF,%rdi\njz notodda\n\tmov (%rsi),%rax\n\tmov %rax,(%rdi)\n\tsub $1,%rdx\n\tlea 8(%rsi),%rsi\n\tlea 8(%rdi),%rdi\nnotodda:\ncmp $0,%rdx\njl skiplpa\nALIGN(16)\nlpa:\tmovdqa (%rsi),%xmm0\n\tsub $4,%rdx\n\tmovdqa 16(%rsi),%xmm1\n\tlea 32(%rsi),%rsi\n\tmovdqa %xmm0,(%rdi)\n\tlea 32(%rdi),%rdi\n\tmovdqa %xmm1,16-32(%rdi)\n\tjnc lpa\nskiplpa:\ncmp $-2,%rdx\njg casea3\nje casea2\njnp casea0\ncasea1:\tmov (%rsi),%rax\n\tmov %rax,(%rdi)\n\tret\ncasea3:\tmovdqa (%rsi),%xmm0\n\tmov 16(%rsi),%rax\n\tmovdqa %xmm0,(%rdi)\n\tmov %rax,16(%rdi)\ncasea0:\tret\ncasea2:\tmovdqa (%rsi),%xmm0\n\tmovdqa %xmm0,(%rdi)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/atom/divexact_byff.as",
    "content": "\n;  mpn_divexact_byff\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tret mpn_divexact_byff(mp_ptr,mp_ptr,mp_size_t)\n\n\tGLOBAL_FUNC mpn_divexact_byff\n\txor     eax, eax\n\tmov     rcx, rdx\n\tand     rdx, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n\tje      L_skiplp\n; want carry clear here\n\talign   16\nL_lp:\n\tsbb     rax, [rsi]\n\tlea     rdi, [rdi+32]\n\tmov     r8, rax\n\tsbb     rax, [rsi+8]\n\tmov     r9, rax\n\tsbb     rax, [rsi+16]\n\tmov     r10, rax\n\tsbb     rax, [rsi+24]\n\tdec     rcx\n\tmov     [rdi-32], r8\n\tmov     [rdi-24], r9\n\tmov     [rdi-16], r10\n\tmov     [rdi-8], rax\n\tlea     rsi, [rsi+32]\n\tjnz     L_lp\nL_skiplp:\n; dont want to change the carry\n\tinc     rdx\n\tdec     rdx\n\tjz      L_end\n\tsbb     rax, [rsi]\n\tmov     [rdi], rax\n\tdec     rdx\n\tjz      L_end\n\tsbb     rax, [rsi+8]\n\tmov     [rdi+8], rax\n\tdec     rdx\n\tjz      L_end\n\tsbb     rax, [rsi+16]\n\tmov     [rdi+16], rax\nL_end:\n\tsbb     rax, 0\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/atom/divrem_hensel_qr_1_1.asm",
    "content": "dnl  X86_64 mpn_divrem_hensel_qr_1_1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=(rsi,rdx) / rcx\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\nC\tThis is same as the shifting version but with  no shifting\n\nASM_START()\nPROLOGUE(mpn_divrem_hensel_qr_1_1)\nmov $0,%r9\nsub %rdx,%r9\nlea (%rdi,%rdx,8),%rdi\nlea (%rsi,%rdx,8),%rsi\n\nmov %rcx,%rdx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\n\nC //clear carry\nxor %rdx,%rdx\nALIGN(16)\nloop:\n    mov (%rsi,%r9,8),%rax\n    sbb %rdx,%rax\n    sbb %r8,%r8\n    imul %r11,%rax\n    mov %rax,(%rdi,%r9,8)\n    mul %rcx\n    add $1,%r8\n    inc %r9\n    jnz loop\nmov $0,%rax\nadc %rdx,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/atom/ior_n.as",
    "content": "\n;  AMD64 mpn_ior_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi, rcx) = (rsi, rcx)| ( rdx, rcx)\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_ior_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tjz      skiploop\n\talign   8\nloop1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tor      r11, [rdx]\n\tor      r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tor      r9, [rdx-16]\n\tor      r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     loop1\nskiploop:\n\tinc     rax\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi]\n\tor      r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+8]\n\tor      r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+16]\n\tor      r11, [rdx+16]\n\tmov     [rdi+16], r11\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/iorn_n.as",
    "content": "\n;  AMD64 mpn_iorn_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_iorn_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign   16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tor      r8, [rsi+rcx*8+24]\n\tor      r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\tor      r10, [rsi+rcx*8+8]\n\tor      r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/karaadd.asm",
    "content": "dnl  mpn_karaadd\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karaadd)\n# requires n>=8\nmov %rbx,-8(%rsp)\nmov %rbp,-16(%rsp)\nmov %r12,-24(%rsp)\nmov %r13,-32(%rsp)\nmov %r14,-40(%rsp)\nmov %r15,-48(%rsp)\nmov %rdx,-56(%rsp)\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nlp:\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi,%rcx,8),%r8\n\tadc 8(%rsi,%rcx,8),%r9\n\tadc 16(%rsi,%rcx,8),%r10\n\tadc 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\tadc 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc lp\ncmp $2,%rcx\njg\tcase0\njz\tcase1\njp\tcase2\ncase3:\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi),%r8\n\tadc 8(%rsi),%r9\n\tadc 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp fin\ncase2:\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rsi),%r8\n\tadc 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp fin\ncase1:\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tinc %rdx\n\tmov %r12,(%rbp,%rcx,8)\nfin:\tmov $3,%rcx\ncase0: \t#rcx=3\n\tmov -56(%rsp),%r8\n\tbt $0,%r8\n\tjnc notodd\n\txor %r10,%r10\n\tmov (%rbp,%rdx,8),%r8\n\tmov 8(%rbp,%rdx,8),%r9\n\tadd (%rsi,%rdx,8),%r8\n\tadc 8(%rsi,%rdx,8),%r9\n\trcl $1,%r10\n\tadd %r8,24(%rbp)\n\tadc %r9,32(%rbp)\n\tadc %r10,40(%rbp)\nl7:\tadcq $0,24(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l7\n\tmov $3,%rcx\nnotodd:\txor %r8,%r8\n\tshr $1,%rax\n\tadc %r8,%r8\n\tshr $1,%rax\n\tadc $0,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadc %r8,(%rdi,%rdx,8)\nl1:\tadcq $0,8(%rdi,%rdx,8)\n\tinc %rdx\n\tjc l1\n\txor %r8,%r8\n\tshr $1,%rbx\n\tadc %r8,%r8\n\tshr $1,%rbx\n\tadc $0,%r8\n\tshr $1,%rbx\n\tadc $0,%r8\n\tadd %r8,24(%rbp)\nl2:\tadcq $0,8(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l2\nmov -8(%rsp),%rbx\nmov -16(%rsp),%rbp\nmov -24(%rsp),%r12\nmov -32(%rsp),%r13\nmov -40(%rsp),%r14\nmov -48(%rsp),%r15\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/atom/karasub.asm",
    "content": "dnl  mpn_karasub\n\ndnl  Copyright 2011,2012 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karasub)\n# requires n>=8\npush %rbx\npush %rbp\npush %r12\npush %r13\npush %r14\npush %r15\npush %rdx\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nlp:\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi,%rcx,8),%r8\n\tsbb 8(%rsi,%rcx,8),%r9\n\tsbb 16(%rsi,%rcx,8),%r10\n\tsbb 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\tsbb 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc lp\ncmp $2,%rcx\njg\tcase0\njz\tcase1\njp\tcase2\ncase3:\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi),%r8\n\tsbb 8(%rsi),%r9\n\tsbb 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp fin\ncase2:\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 8(%rsi),%r8\n\tsbb 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp fin\ncase1:\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tadd $1,%rdx\n\tmov %r12,(%rbp,%rcx,8)\nfin:\tmov $3,%rcx\ncase0: \t#rcx=3\n\t#// store top two words of H as carrys could change them\n\tpop %r15\n\tbt $0,%r15\n\tjnc skipload\n\tmov (%rbp,%rdx,8),%r12\n        mov 8(%rbp,%rdx,8),%r13\n\t#// the two carrys from 2nd to 3rd\nskipload:\tmov %rdx,%r11\n\txor %r8,%r8\n\tbt $1,%rax\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rdi,%rdx,8)\nl2:\tadcq $0,8(%rdi,%rdx,8)\n\tlea 1(%rdx),%rdx\n\tjc l2\n\t# //the two carrys from 3rd to 4th\n\txor %r8,%r8\n\tbt $1,%rbx\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rbp,%rcx,8)\nl3:\tadcq $0,8(%rbp,%rcx,8)\n\tlea 1(%rcx),%rcx\n\tjc l3\n\t#// now the borrow from 2nd to 3rd\n\tmov %r11,%rdx\n\tbt $0,%rax\nl1:\tsbbq $0,(%rdi,%rdx,8)\n\tlea 1(%rdx),%rdx\n\tjc l1\n\t#// borrow from 3rd to 4th\n\tmov $3,%rcx\n\tbt $0,%rbx\nl4:\tsbbq $0,(%rbp,%rcx,8)\n\tlea 1(%rcx),%rcx\n\tjc l4\n\t#// if odd the do next two\n\tmov $3,%rcx\n\tmov %r11,%rdx\n\tbt $0,%r15\n\tjnc notodd\n\txor %r10,%r10\n\tsub (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%r10\n\tadd %r12,24(%rbp)\n\tadc %r13,32(%rbp)\n\tmov $0,%r8\n\tadc %r8,%r8\n\tbt $0,%r10\n\tsbb $0,%r8\nl7:\tadd %r8,16(%rbp,%rcx,8)\n\tadc $0,%r8\n\tadd $1,%rcx\n\tsar $1,%r8\n\tjnz l7\nnotodd:\t\npop %r15\npop %r14\npop %r13\npop %r12\npop %rbp\npop %rbx\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/atom/lshift.as",
    "content": "\n;  AMD64 mpn_lshift\n; Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = (rsi,rdx)<<rcx\n;\trax = carry\n;\tdecent assmeblers understand what movq means ,except\n;\tmicrosofts/apple masm (what a suprise there) so for the broken old masm\n;\tassembler.  Needed for movq reg64,mediareg and movq mediareg,reg64\n;\tonly , where mediareg is xmm or mm\n\n%include 'yasm_mac.inc'\n\n%define MOVQ movd\n\n\tBITS 64\n\n   GLOBAL_FUNC  mpn_lshift\n\tcmp     rdx, 2\n\tja      threeormore\n\tjz      two\none:\n\tmov     rdx, [rsi]\n\tmov     rax, rdx\n\tshl     rdx, cl\n\tneg     rcx\n\tshr     rax, cl\n\tmov     [rdi], rdx\n\tret\ntwo:\n\tmov     r8, [rsi]\n\tmov     r9, [rsi+8]\n\tmov     r11, r8\n\tmov     rax, r9\n\tshl     r8, cl\n\tshl     r9, cl\n\tneg     rcx\n\tshr     r11, cl\n\tshr     rax, cl\n\tor      r9, r11\n\tmov     [rdi], r8\n\tmov     [rdi+8], r9\n\tret\nthreeormore:\n\tmov     eax, 64\n\tsub     rax, rcx\n\tMOVQ    xmm0, rcx\n\tMOVQ    xmm1, rax\n\tmov     r8, rdx\n\tlea     r9, [rsi+r8*8-16]\n\tmov     r10, r9\n\tand     r9, -16\n\tmovdqa  xmm3, [r9]\n\tmovdqa  xmm5, xmm3\n\tpsrlq   xmm3, xmm1\n\tpshufd  xmm3, xmm3, 0x4E\n\tMOVQ    rax, xmm3\n\tcmp     r10, r9\n\tje      aligned\n\tmovq    xmm2, [rsi+r8*8-8]\n\tmovq    xmm4, xmm2\n\tpsrlq   xmm2, xmm1\n\tMOVQ    rax, xmm2\n\tpsllq   xmm4, xmm0\n\tpor     xmm4, xmm3\n\tmovq    [rdi+r8*8-8], xmm4\n\tdec     r8\naligned:\n\tsub     r8, 5\n\tjle     skiploop\n\talign   16\nloop1:\n\tmovdqa  xmm2, [rsi+r8*8+8]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm5, xmm0\n\tpsrlq   xmm2, xmm1\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tpshufd  xmm2, xmm2, 0x4E\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tmovdqa  xmm3, [rsi+r8*8-8]\n\tmovdqa  xmm5, xmm3\n\tpsrlq   xmm3, xmm1\n\tmovhlps xmm2, xmm3\n\tpsllq   xmm4, xmm0\n\tpshufd  xmm3, xmm3, 0x4E\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+8], xmm4\n\tmovhpd  [rdi+r8*8+16], xmm4\n\tsub     r8, 4\n\tjg      loop1\nskiploop:\n\tcmp     r8, -1\n\tje      left2\n\tjg      left3\n\tjp      left1\nleft0:\n;\tmay be easier to bswap xmm5 first , same with other cases\n\tpxor    xmm2, xmm2\n\tpsllq   xmm5, xmm0\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tret\n\talign   16\nleft3:\n\tmovdqa  xmm2, [rsi+r8*8+8]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm5, xmm0\n\tpsrlq   xmm2, xmm1\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tpshufd  xmm2, xmm2, 0x4E\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tmovq    xmm3, [rsi+r8*8]\n\tpshufd  xmm3, xmm3, 0x4E\n\tmovdqa  xmm5, xmm3\n\tpsrlq   xmm3, xmm1\n\tmovhlps xmm2, xmm3\n\tpsllq   xmm4, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+8], xmm4\n\tmovhpd  [rdi+r8*8+16], xmm4\n\tpsllq   xmm5, xmm0\n\tmovhpd  [rdi+r8*8], xmm5\n\tret\n\talign   16\nleft2:\n\tmovdqa  xmm2, [rsi+r8*8+8]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm5, xmm0\n\tpsrlq   xmm2, xmm1\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tpshufd  xmm2, xmm2, 0x4E\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tpxor    xmm3, xmm3\n\tmovhlps xmm2, xmm3\n\tpsllq   xmm4, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+8], xmm4\n\tmovhpd  [rdi+r8*8+16], xmm4\n\tret\n\talign   16\nleft1:\n\tmovq    xmm2, [rsi+r8*8+16]\n\tpshufd  xmm2, xmm2, 0x4E\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm5, xmm0\n\tpsrlq   xmm2, xmm1\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tpsllq   xmm4, xmm0\n\tmovhpd  [rdi+r8*8+16], xmm4\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/lshift1.as",
    "content": "\n;  AMD64 mpn_lshift1\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = (rsi,rdx)<<1\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_lshift1\n\txor     rax, rax\n\tmov     r11, rdx\n\tand     r11, 7\n\tinc     r11\n\tshr     rdx, 3\n;\tand clear carry flag\n\tcmp     rdx, 0\n\tjz      next\n\talign   16\nloop1:\n\tmov     rcx, [rsi]\n\tmov     r8, [rsi+8]\n\tmov     r10, [rsi+16]\n\tmov     r9, [rsi+24]\n\tadc     rcx, rcx\n\tadc     r8, r8\n\tadc     r10, r10\n\tadc     r9, r9\n\tmov     [rdi], rcx\n\tmov     [rdi+8], r8\n\tmov     [rdi+16], r10\n\tmov     [rdi+24], r9\n\tmov     rcx, [rsi+32]\n\tmov     r8, [rsi+40]\n\tmov     r10, [rsi+48]\n\tmov     r9, [rsi+56]\n\tadc     rcx, rcx\n\tadc     r8, r8\n\tadc     r10, r10\n\tadc     r9, r9\n\tmov     [rdi+32], rcx\n\tmov     [rdi+40], r8\n\tmov     [rdi+48], r10\n\tmov     [rdi+56], r9\n\tlea     rdi, [rdi+64]\n\tdec     rdx\n\tlea     rsi, [rsi+64]\n\tjnz     loop1\nnext:\n\tdec     r11\n\tjz      end\n;\tCould still have cache-bank conflicts in this tail part\n\tmov     rcx, [rsi]\n\tadc     rcx, rcx\n\tmov     [rdi], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+8]\n\tadc     rcx, rcx\n\tmov     [rdi+8], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+16]\n\tadc     rcx, rcx\n\tmov     [rdi+16], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+24]\n\tadc     rcx, rcx\n\tmov     [rdi+24], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+32]\n\tadc     rcx, rcx\n\tmov     [rdi+32], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+40]\n\tadc     rcx, rcx\n\tmov     [rdi+40], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+48]\n\tadc     rcx, rcx\n\tmov     [rdi+48], rcx\nend:\n\tadc     rax, rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/lshift2.asm",
    "content": "dnl  mpn_lshift2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_lshidft2(mp_ptr,mp_ptr,mp_size_t)\nC\trax                 rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_lshift2)\nmov $3,%ecx\nsub %rdx,%rcx\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\nxor %eax,%eax\nxor %edx,%edx\ncmp $0,%rcx\njge skiplp\nALIGN(16)\nlp:\n\tmov (%rsi,%rcx,8),%r8\n\tmov 8(%rsi,%rcx,8),%r9\n\tmov 16(%rsi,%rcx,8),%r10\n\tmov 24(%rsi,%rcx,8),%r11\n\tadd %rax,%rax\n\tadc %r8,%r8\n\tadc %r9,%r9\n\tadc %r10,%r10\n\tadc %r11,%r11\n\tsbb %rax,%rax\n\tadd %rdx,%rdx\n\tadc %r8,%r8\n\tadc %r9,%r9\n\tadc %r10,%r10\n\tadc %r11,%r11\n\tmov %r11,24(%rdi,%rcx,8)\n\tsbb %rdx,%rdx\n\tmov %r8,(%rdi,%rcx,8)\n\tadd $4,%rcx\n\tmov %r9,-24(%rdi,%rcx,8)\n\tmov %r10,-16(%rdi,%rcx,8)\n\tjnc lp\nskiplp:\ncmp $2,%rcx\nja case0\nje case1\njp case2\ncase3:\n\tmov (%rsi,%rcx,8),%r8\n\tmov 8(%rsi,%rcx,8),%r9\n\tmov 16(%rsi,%rcx,8),%r10\n\tadd %rax,%rax\n\tadc %r8,%r8\n\tadc %r9,%r9\n\tadc %r10,%r10\n\tsbb %rax,%rax\n\tadd %rdx,%rdx\n\tadc %r8,%r8\n\tadc %r9,%r9\n\tadc %r10,%r10\n\tsbb %rdx,%rdx\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,8(%rdi,%rcx,8)\n\tmov %r10,16(%rdi,%rcx,8)\n\tlea (%rdx,%rax,2),%rax\n\tneg %rax\n\tret\nALIGN(16)\ncase2:\n\tmov (%rsi,%rcx,8),%r8\n\tmov 8(%rsi,%rcx,8),%r9\n\tadd %rax,%rax\n\tadc %r8,%r8\n\tadc %r9,%r9\n\tsbb %rax,%rax\n\tadd %rdx,%rdx\n\tadc %r8,%r8\n\tadc %r9,%r9\n\tsbb %rdx,%rdx\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,8(%rdi,%rcx,8)\n\tlea (%rdx,%rax,2),%rax\n\tneg %rax\n\tret\nALIGN(16)\ncase1:\n\tmov (%rsi,%rcx,8),%r8\n\tadd %rax,%rax\n\tadc %r8,%r8\n\tsbb %rax,%rax\n\tadd %rdx,%rdx\n\tadc %r8,%r8\n\tsbb %rdx,%rdx\n\tmov %r8,(%rdi,%rcx,8)\n\tlea (%rdx,%rax,2),%rax\n\tneg %rax\n\tret\nALIGN(16)\ncase0:\n\tlea (%rdx,%rax,2),%rax\n\tneg %rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/atom/mod_1_1.asm",
    "content": "dnl  mpn_mod_1_1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,2)  contains B^i % divisor\n\n\nASM_START()\nPROLOGUE(mpn_mod_1_1)\npush %r13\nmov -8(%rsi,%rdx,8),%r13\nmov -16(%rsi,%rdx,8),%rax\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov %rdx,%rcx\nsub $2,%rcx\nALIGN(16)\nlp:\n\tmov -8(%rsi,%rcx,8),%r10\n\tmul %r8\n\tadd %rax,%r10\n\tmov $0,%r11\n\tadc %rdx,%r11\n\tmov %r13,%rax\n\tmul %r9\n\tadd %r10,%rax\n\tmov %r11,%r13\n\tadc %rdx,%r13\n\tdec %rcx\n\tjnz lp\nC // r13,rax\nmov %rax,(%rdi)\nmov %r8,%rax\nmul %r13\nadd %rax,(%rdi)\nadc $0,%rdx\nmov %rdx,8(%rdi)\npop %r13\nret\nEPILOGUE()\n\n\n\n"
  },
  {
    "path": "mpn/x86_64/atom/mod_1_2.asm",
    "content": "dnl  mpn_mod_1_2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,3)  contains B^i % divisor\n\n\nASM_START()\nPROLOGUE(mpn_mod_1_2)\nC // require rdx >=4\npush %r12\npush %r13\npush %r14\nmov -8(%rsi,%rdx,8),%r14\nmov -16(%rsi,%rdx,8),%r13\nmov -32(%rsi,%rdx,8),%r11\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov 16(%rcx),%r10\nmov %rdx,%rcx\nmov -24(%rsi,%rdx,8),%rax\nsub $6,%rcx\njc skiplp\nALIGN(16)\nlp:\n\tmul %r8\n\tmov $0,%r12\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r9,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r10,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov 0(%rsi,%rcx,8),%r11\n\tmov %r12,%r14\n\tadc %rdx,%r14\n\tmov 8(%rsi,%rcx,8),%rax\n\tsub $2,%rcx\n\tjnc lp\nskiplp:\n\tmul %r8\n\tmov $0,%r12\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r9,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r10,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov %r12,%r14\n\tadc %rdx,%r14\ncmp $-2,%rcx\nje case0\ncase1:\n\tmov 8(%rsi,%rcx,8),%r11\n\tmov $0,%r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r9,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov %r12,%r14\n\tadc %rdx,%r14\ncase0:\t\nmov %r8,%rax\nmul %r14\nadd %rax,%r13\nadc $0,%rdx\nmov %r13,(%rdi)\nmov %rdx,8(%rdi)\npop %r14\npop %r13\npop %r12\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/atom/mod_1_3.asm",
    "content": "dnl  mpn_mod_1_3\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,4)  contains B^i % divisor\n\nASM_START()\nPROLOGUE(mpn_mod_1_3)\nC // require rdx >=5\npush %r12\npush %r13\npush %r14\npush %r15\nmov -8(%rsi,%rdx,8),%r15\nmov -16(%rsi,%rdx,8),%r14\nmov -32(%rsi,%rdx,8),%rax\nmov -40(%rsi,%rdx,8),%r12\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov 16(%rcx),%r10\nmov 24(%rcx),%r11\nmov %rdx,%rcx\nsub $8,%rcx\njc skiplp\nALIGN(16)\nC // r15 r14 -8() -16()=rax -24()=r12\nlp:\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 0(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tmov 8(%rsi,%rcx,8),%rax\n\tadc %rdx,%r15\n\tsub $3,%rcx\n\tjnc lp\nskiplp:\nC // we have loaded up the next two limbs\nC // but because they are out of order we can have to do 3 limbs min\ncmp $-2,%rcx\njl case1\nje case2\ncase3:\n\tC //two more limbs is 4 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 8(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tmov 16(%rsi,%rcx,8),%rax\n\tadc %rdx,%r15\n\tC // r15 r14 rax r12\n\tmov $0,%r13\n\tmul %r8\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r9,%rax\n\tmul %r14\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r15\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tC // r13 r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r12\n\tadc $0,%rdx\n\tmov %r12,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nALIGN(16)\ncase2:\n\tC //two more limbs is 4 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 16(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tadc %rdx,%r15\n\tC // r15 r14 r12\n\tmov $0,%r13\n\tmov %r8,%rax\n\tmul %r14\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r9,%rax\n\tmul %r15\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tC // r13 r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r12\n\tadc $0,%rdx\n\tmov %r12,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nALIGN(16)\ncase1:\n\tC // one more is 3 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12 \n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov %r13,%r15\n\tadc %rdx,%r15\n\tmov %r8,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tadc $0,%rdx\n\tmov %r14,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/atom/mul_1.as",
    "content": "\n;  core2 mpn_mul_1\n;  Copyright 2008,2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rdx) = (rsi, rdx)*rcx\n;\trax = carry\n\t\n    BITS 64\n    \n   GLOBAL_FUNC mpn_mul_1\n;\tthis is just an addmul , so we can get rid off stack use\n;\tand simplifiy wind down , and perhaps re-do the OOO order \n\tmov     rax, [rsi]\n\tcmp     rdx, 1\n\tje      one\n\tmov     r11, 5\n\tlea     rsi, [rsi+rdx*8-40]\n\tlea     rdi, [rdi+rdx*8-40]\n\tsub     r11, rdx\n\tmul     rcx\n\tdb      0x26\n\tmov     r8, rax\n\tdb      0x26\n\tmov     rax, [rsi+r11*8+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tdb      0x26\n\tcmp     r11, 0\n\tdb      0x26\n\tmov     [rsp-8], rbx\n\tdb      0x26\n\tjge     skiploop\n\talign 16\nloop1:\n\tmov     r10, 0\n\tmul     rcx\n\tmov     [rdi+r11*8], r8\n\tadd     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     rcx\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r8, 0\n\tmov     r9, 0\n\tmul     rcx\n\tmov     [rdi+r11*8+16], r10\n\tdb      0x26\n\tadd     rbx, rax\n\tdb      0x26\n\tadc     r8, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     rcx\n\tmov     [rdi+r11*8+24], rbx\n\tdb      0x26\n\tadd     r8, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     r11, 4\n\tmov     rax, [rsi+r11*8+8]\n\tjnc     loop1\n\talign 16\nskiploop:\n\tmov     r10d, 0\n\tmul     rcx\n\tmov     [rdi+r11*8], r8\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     r11, 2\n\tjz      next2\n\tja      next3\n\tjp      next1\nnext0:\n\tmov     rax, [rsi+r11*8+16]\n\tmul     rcx\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r8d, 0\n\tmul     rcx\n\tmov     [rdi+r11*8+16], r10\n\tadd     rbx, rax\n\tadc     r8, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     rcx\n\tmov     [rdi+r11*8+24], rbx\n\tmov     rbx, [rsp-8]\n\tadd     r8, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+32], r8\n\tmov     rax, rdx\n\tret\n\talign 16\nnext1:\n\tmov     rax, [rsi+r11*8+16]\n\tmul     rcx\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     r8d, 0\n\tadc     r8, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmul     rcx\n\tmov     [rdi+r11*8+16], r10\n\tadd     r8, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+24], r8\n\tmov     rbx, [rsp-8]\n\tmov     rax, rdx\n\tret\n\talign 16\none:\n\tmul     rcx\n\tmov     [rdi], rax\n\tmov     rax, rdx\n\tret\n\talign 16\nnext2:\n\tmov     rax, [rsi+r11*8+16]\n\tmul     rcx\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     [rdi+r11*8+16], r10\n\tmov     rax, rbx\n\tmov     rbx, [rsp-8]\n\tret\n\talign 16\nnext3:\n\tmov     rbx, [rsp-8]\n\tmov     [rdi+r11*8+8], r9\n\tmov     rax, r10\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/mul_2.as",
    "content": ";  X86_64 mpn_mul_2\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include \"yasm_mac.inc\"\n\n; (rdi,rdx+1)=(rsi,rdx)*(rcx,2) return carrylimb\n\n\tGLOBAL_FUNC mpn_mul_2\npush    rbx\nmov     r8, [rcx]\nmov     rcx, [rcx+8]\nlea     rsi, [rsi+rdx*8-24]\nlea     rdi, [rdi+rdx*8-24]\nmov     rbx, 3\nsub     rbx, rdx\nmov     r10, 0\nmov     rax, [rsi+rbx*8]\nmul     r8\nmov     r11, rax\nmov     r9, rdx\ncmp     rbx, 0\njge     skiplp\nalign 16\nlp:\n\tmov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r10, 0\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r9, rdx\n\tmul     r8\n\tadd     r11, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 3\n\tjnc     lp\nskiplp:\nmov     rax, [rsi+rbx*8]\nmov     [rdi+rbx*8], r11\nmul     rcx\nadd     r9, rax\nadc     r10, rdx\ncmp     rbx, 1\nja      case2\nje      case1\ncase0:\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r9\n\tpop     rbx\n\tret\nalign 16\ncase1:\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n\tpop     rbx\n\tret\nalign 16\ncase2:\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n\tpop     rbx\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/atom/mul_basecase.as",
    "content": "\n;  AMD64 mpn_mul_basecase\n\n;  Copyright 2008,2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n; C\t(rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n; C Version 1.0.7\n\n\n%macro addmul2lp 1\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+8], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-16], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi-8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+8], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi+rbx*8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+24], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+24], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tmov     [rdi], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-16], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi-8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+8], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n\talign   16\n%%1:\n\tmov     r10, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tmov     [rdi+rbx*8], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r10\n\tdb      0x26\n\tadd     r11, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+rbx*8+24]\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r11\n\tdb      0x26\n\tadd     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     rbx, 4\n\tmov     rax, [rsi+rbx*8]\n\tjnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n\tmov     rax, [rsi+8]\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmov     r12d, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r11\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n\tmov     rax, [rsi+16]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     [rdi+24], r10\n\tmov     [rdi+32], r11\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n\tmov     [rdi+24], r9\n\tmov     [rdi+32], r10\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n\tjz      %%2\n\talign   16\n%%1:\n\taddmul2pro%1\n\taddmul2lp %1\n\taddmul2epi%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tmov     r15, [rsp-40]\n\tret\n%endmacro\n\n%macro oldmulnext0 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tmov     [rdi+r11*8+24], rbx\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+32], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+r11*8+40], rdx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+24], r12\n\tmov     [rdi+r11*8+32], rdx\n\tinc     r8\n\tlea     rdi, [rdi+8]\n\tmov     r11, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     [rdi+r11*8+16], r10\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n\tmov     [rdi+r11*8+8], r9\n\tmov     [rdi+r11*8+16], r10\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tmul     r13\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+24]\n\tmov     r12d, 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+32]\n\tmul     r13\n\tadd     [rdi+24], rbx\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r12\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n\tmov     r13, [rcx+r8*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+24]\n\tmul     r13\n\tlea     rdi, [rdi+8]\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     r12d, 0\n\tmov     rax, [rsi+32]\n\tadc     r12, rdx\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+24], r12\n\tadc     rdx, 0\n\tmov     [rdi+32], rdx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n\tmov     r13, [rcx+r8*8]\n\tlea     rdi, [rdi+8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     ebx, 0\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r14*8]\n\tadd     [rdi+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tdb      0x26\n\tlea     rdi, [rdi+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tmov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r9\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n\toldmulnext%1\n\tjz      %%2\n\talign   16\n%%1:\n\toldaddmulpro%1\n\toldaddmulnext%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tret\n%endmacro\n\n\tASM_START\n\tGLOBAL_FUNC mpn_mul_basecase\n; the current mul does not handle case one \n\tcmp     rdx, 4\n\tjg      L_fiveormore\n\tcmp     rdx, 1\n\tje      L_one\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     r14, 5\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-40]\n\tlea     rcx, [rcx+r8*8]\n\tneg     r8\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rcx+r8*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tmov     r10d, 0\n\tmul     r13\n\tmov     [rdi+r11*8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     r11, 2\n\tja      L_oldcase3\n\tjz      L_oldcase2\n\tjp      L_oldcase1\nL_oldcase0:\n\toldmpn_muladdmul_1_int 0\nL_oldcase1:\n\toldmpn_muladdmul_1_int 1\nL_oldcase2:\n\toldmpn_muladdmul_1_int 2\nL_oldcase3:\n\toldmpn_muladdmul_1_int 3\n\talign   16\nL_fiveormore:\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     [rsp-40], r15\n\tmov     r14, 4\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-32]\n\tlea     rsi, [rsi+rdx*8-32]\n\tmov     r13, rcx\n\tmov     r15, r8\n\tlea     r13, [r13+r15*8]\n\tneg     r15\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tbt      r15, 0\n\tjnc     L_even\nL_odd:\n\tinc     rbx\n\tmov     r8, [r13+r15*8]\n\tmul     r8\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     L_mulskiploop\n\tmul1lp \nL_mulskiploop:\n\tmov     r10d, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     rbx, 2\n\tja      L_mul1case3\n\tjz      L_mul1case2\n\tjp      L_mul1case1\nL_mul1case0:\n\tmulnext0\n\tjmp     L_case0\nL_mul1case1:\n\tmulnext1\n\tjmp     L_case3\nL_mul1case2:\n\tmulnext2\n\tjmp     L_case2\nL_mul1case3:\n\tmulnext3\n\tjmp     L_case1\nL_even:\n\t; as all the mul2pro? are the same\n\tmul2pro0\n\tmul2lp \n\tcmp     rbx, 2\n\tja      L_mul2case0\n\tjz      L_mul2case1\n\tjp      L_mul2case2\nL_mul2case3:\n\tmul2epi3\nL_case3:\n\tmpn_addmul_2_int 3\nL_mul2case2:\n\tmul2epi2\nL_case2:\n\tmpn_addmul_2_int 2\nL_mul2case1:\n\tmul2epi1\nL_case1:\n\tmpn_addmul_2_int 1\nL_mul2case0:\n\tmul2epi0\nL_case0:\n\tmpn_addmul_2_int 0\n\talign   16\nL_one:\n\tmov     rax, [rsi]\n\tmul\tqword [rcx]\n\tmov     [rdi], rax\n\tmov     [rdi+8], rdx\n\tret\n\tend\n\n"
  },
  {
    "path": "mpn/x86_64/atom/nand_n.as",
    "content": "\n;  AMD64 mpn_nand_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_nand_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign   16\nloop1:\n\tmov     r8, [rsi+rcx*8+24]\n\tmov     r9, [rsi+rcx*8+16]\n\tand     r8, [rdx+rcx*8+24]\n\tand     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rsi+rcx*8+8]\n\tmov     r11, [rsi+rcx*8]\n\tand     r10, [rdx+rcx*8+8]\n\tand     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rsi+rcx*8-8]\n\tand     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rsi+rcx*8-8]\n\tand     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rsi+rcx*8-8]\n\tand     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/nior_n.as",
    "content": "\n;  AMD64 mpn_nior_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_nior_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign   16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tor      r8, [rsi+rcx*8+24]\n\tor      r9, [rsi+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tor      r10, [rsi+rcx*8+8]\n\tor      r11, [rsi+rcx*8]\n\tnot     r10\n\tnot     r11\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, [rsi+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, [rsi+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, [rsi+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/redc_1.as",
    "content": "\n;  core2 mpn_redc_1\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rcx) = (rsi, rcx) + (rdx, rcx)   with the carry flag set for the carry\n;\tthis is the usual mpn_add_n with the final dec rax;adc rax,rax;ret  removed \n;\tand a jump where we have two rets\n\n%macro mpn_add 0\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tadd     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tjmp     %%2\n\talign 16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tadc     r11, [rdx]\n\tadc     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tadc     r9, [rdx-16]\n\tadc     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tadc     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n%%2:\n%endmacro\n\n;\t(rbx, rbp) = (rsi, rbp) - (rdx, rbp)\n%macro mpn_sub 0\n\tmov     rax, rbp\n\tand     rax, 3\n\tshr     rbp, 2\n\tcmp     rbp, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tsub     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n\tjmp     %%2\n\talign 16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tsbb     r11, [rdx]\n\tsbb     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rbx], r11\n\tmov     [rbx+8], r8\n\tlea     rbx, [rbx+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tsbb     r9, [rdx-16]\n\tsbb     r10, [rdx-8]\n\tmov     [rbx-16], r9\n\tdec     rbp\n\tmov     [rbx-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tsbb     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n%%2:\n%endmacro\n\n;\tchanges from standard addmul\n;\tchange  r8 to r12   and rcx to r13   and rdi to r8\n;\treemove ret and write last limb but to beginning\n%macro addmulloop 1\n\talign 16\n%%1:\n\tmov     r10, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tdb      0x26\n\tadc     rbx, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tdb      0x26\n\tadc     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     r11, 4\n\tmov     rax, [rsi+r11*8+8]\n\tjnc     %%1\n%endmacro\n\n%macro addmulpropro0 0\n\timul    r13, rcx\n\tlea     r8, [r8-8]\n%endmacro\n\n%macro addmulpro0 0\n\tmov     r11, r14\n\tlea     r8, [r8+8]\n\tmov     rax, [rsi+r14*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmov     r9d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, rax\n\tadc     r9, rdx\n\timul    r13, rcx\n\tadd     [r8+r11*8+32], r12\n\tadc     r9, 0\n\tsub     r15, 1\n\tmov     [r8+r14*8], r9\n%endmacro\n\n%macro addmulpropro1 0\n%endmacro\n\n%macro addmulpro1 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, 0\n\tsub     r15, 1\n\tmov     [r8+r14*8], r12\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro2 0\n%endmacro\n\n%macro addmulpro2 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext2 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     r13, [r8+r14*8+8]\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [r8+r14*8], rbx\n\tsub     r15, 1\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro3 0\n%endmacro\n\n%macro addmulpro3 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext3 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, 0\n\tmov     r13, [r8+r14*8+8]\n\tmov     [r8+r14*8], r10\n\tlea     r8, [r8+8]\n\tsub     r15, 1\n%endmacro\n\n;\tchange r8 to r12\n;\twrite top limb ax straight to mem dont return  (NOTE we WRITE NOT ADD)\n%macro mpn_addmul_1_int 1\n\taddmulpropro%1\n\talign 16\n%%1:\n\taddmulpro%1\n\tjge     %%2\n\taddmulloop %1\n%%2:\n\taddmulnext%1\n\tjnz     %%1\n\tjmp     end\n%endmacro\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_redc_1\n\tmov r9,r8\n\tmov r8,rsi\n\tmov rsi,rdx\n\tmov rdx,rcx\n\tmov rcx,r9\n\n\tcmp     rdx, 1\n\tje      one\n\tpush    r13\n\tpush    r14\n\tpush    rbx\n\tpush    r12\n\tpush    r15\n\tpush    rbp\n\tmov     r14, 5\n\tsub     r14, rdx\n;\tstore copys\n\tpush    rsi\n\tpush    r8\n\tlea     r8, [r8+rdx*8-40]\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rbp, rdx\n\tmov     r15, rdx\n\tmov     rax, r14\n\tand     rax, 3\n\tmov     r13, [r8+r14*8]\n\tje      case0\n\tjp      case3\n\tcmp     rax, 1\n\tje      case1\ncase2:\n\tmpn_addmul_1_int 2\n\talign 16\ncase0:\n\tmpn_addmul_1_int 0\n\talign 16\ncase1:\n\tmpn_addmul_1_int 1\n\talign 16\ncase3:\n\tmpn_addmul_1_int 3\n\talign 16\nend:\n\tmov     rcx, rbp\n\tpop     rdx\n\tlea     rsi, [rdx+rbp*8]\n\tmov     rbx, rdi\n\tmpn_add\n;\tmpnadd(rdi,rsi,rdx,rcx)\n\tpop     rdx\n\tjnc     skip\n\tmov     rsi, rbx\n\tmpn_sub\n;\tmpn_sub_n(rbx,rsi,rdx,rbp) we can certainly improve this sub\nskip:\n\tpop     rbp\n\tpop     r15\n\tpop     r12\n\tpop     rbx\n\tpop     r14\n\tpop     r13\n\tret\n\talign 16\none:\n\tmov     r9, [r8]\n\tmov     r11, [rsi]\n\timul    rcx, r9\n\tmov     rax, rcx\n\tmul     r11\n\tadd     rax, r9\n;\trax is zero here\n\tadc     rdx, [r8+8]\n\tcmovnc  r11, rax\n\tsub     rdx, r11\n\tmov     [rdi], rdx\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/rsh1add_n.as",
    "content": "\n;  AMD64 mpn_rsh1add_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,rcx)=((rsi,rcx)+(rdx,rcx))/2 return low bit of sum\n\n\tGLOBAL_FUNC mpn_rsh1add_n\n\tlea     rdi, [rdi+rcx*8-32]\n\tlea     rsi, [rsi+rcx*8-32]\n\tlea     rdx, [rdx+rcx*8-32]\n\tpush    r12\n\tpush    rbx\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tadd     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     L_skiplp\nL_lp:\n\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\tadc     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     L_lp\nL_skiplp:\n\tcmp     r8, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tadc     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tpop     rbx\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/atom/rsh1sub_n.as",
    "content": "\n;  AMD64 mpn_rsh1sub_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,rcx)=((rsi,rcx)-(rdx,rcx))/2\n; return bottom bit of difference\n; subtraction treated as two compliment\n\n\tGLOBAL_FUNC mpn_rsh1sub_n\n\tlea     rdi, [rdi+rcx*8-32]\n\tlea     rsi, [rsi+rcx*8-32]\n\tlea     rdx, [rdx+rcx*8-32]\n\tpush    r12\n\tpush    rbx\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tsub     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     L_skiplp\nL_lp:\n\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\tsbb     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     L_lp\nL_skiplp:\n\tcmp     r8, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tpop     rbx\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/atom/rsh_divrem_hensel_qr_1_1.asm",
    "content": "dnl  X86_64 mpn_rsh_divrem_hensel_qr_1_1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=( (rsi,rdx)-r9  / rcx ) >> r8 \nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\n\ndefine(`MOVQ',`movd')\n\nASM_START()\nPROLOGUE(mpn_rsh_divrem_hensel_qr_1_1)\nmov %r9,%r10\nmov $1,%r9\nsub %rdx,%r9\nlea (%rdi,%rdx,8),%rdi\nlea (%rsi,%rdx,8),%rsi\n\nmov %rcx,%rdx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\n\nmov $64,%rax\nsub %r8,%rax\nMOVQ %r8,%mm0\nMOVQ %rax,%mm1\nmov -8(%rsi,%r9,8),%rax\nsub %r10,%rax\nsbb %r8,%r8\nimul %r11,%rax\nMOVQ %rax,%mm4\nmovq %mm4,%mm5\npsrlq %mm0,%mm4\npsllq %mm1,%mm5\npsrlq %mm1,%mm5\nmul %rcx\ncmp $0,%r9\nje one\nadd %r8,%r8\nALIGN(16)\nloop:\n    movq %mm4,%mm2\n    mov (%rsi,%r9,8),%rax\n    sbb %rdx,%rax\n    sbb %r8,%r8\n    imul %r11,%rax\n    MOVQ %rax,%mm3\n    movq %mm3,%mm4\n    psllq %mm1,%mm3\n    psrlq %mm0,%mm4\n    por %mm3,%mm2\n    movq %mm2,-8(%rdi,%r9,8)\n    mul %rcx\n    add %r8,%r8\n    inc %r9\n    jnz loop\nskiploop:\nmovq %mm4,-8(%rdi,%r9,8)\nmov $0,%rax\nadc %rdx,%rax\nemms\nret\none:\nmovq %mm4,-8(%rdi,%r9,8)\nadd %r8,%r8\nmov $0,%rax\nadc %rdx,%rax\nemms\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/atom/rshift.as",
    "content": "\n;  AMD64 mpn_rshift\n; Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = (rsi,rdx)>>rcx\n;\trax = carry\n;\tdecent assmeblers understand what movq means ,except\n;\tmicrosofts/apple masm (what a suprise there) so for the broken old masm\n;\tassembler.  Needed for movq reg64,mediareg and movq mediareg,reg64\n;\tonly , where mediareg is xmm or mm \n\n%define MOVQ movd\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n   GLOBAL_FUNC mpn_rshift\n\tcmp     rdx, 2\n\tja      threeormore\n\tjz      two\none:\n\tmov     rdx, [rsi]\n\tmov     rax, rdx\n\tshr     rdx, cl\n\tneg     rcx\n\tshl     rax, cl\n\tmov     [rdi], rdx\n\tret\ntwo:\n\tmov     r8, [rsi]\n\tmov     r9, [rsi+8]\n\tmov     rax, r8\n\tmov     r11, r9\n\tshr     r8, cl\n\tshr     r9, cl\n\tneg     rcx\n\tshl     r11, cl\n\tshl     rax, cl\n\tor      r8, r11\n\tmov     [rdi], r8\n\tmov     [rdi+8], r9\n\tret\nthreeormore:\n\tmov     eax, 64\n\tlea     r9, [rsi+8]\n\tsub     rax, rcx\n\tand     r9, -16\n\tMOVQ    xmm0, rcx\n\tMOVQ    xmm1, rax\n\tmovdqa  xmm5, [r9]\n\tmovdqa  xmm3, xmm5\n\tpsllq   xmm5, xmm1\n\tMOVQ    rax, xmm5\n\tcmp     rsi, r9\n\tlea     rsi, [rsi+rdx*8-40]\n\tje      aligned\n\tmovq    xmm2, [r9-8]\n\tmovq    xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tpsrlq   xmm4, xmm0\n\tpor     xmm4, xmm5\n\tmovq    [rdi], xmm4\n\tlea     rdi, [rdi+8]\n\tdec     rdx\n\tMOVQ    rax, xmm2\naligned:\n\tlea     rdi, [rdi+rdx*8-40]\n\tpsrlq   xmm3, xmm0\n\tmov     r8d, 5\n\tsub     r8, rdx\n\tjnc     skiploop\n\talign   16\nloop1:\n\tmovdqa  xmm2, [rsi+r8*8+16]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tmovdqa  xmm5, [rsi+r8*8+32]\n\tmovdqa  xmm3, xmm5\n\tpsllq   xmm5, xmm1\n\tshufpd  xmm2, xmm5, 1\n\tpsrlq   xmm3, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tmovhpd  [rdi+r8*8+24], xmm4\n\tadd     r8, 4\n\tjnc     loop1\nskiploop:\n\tcmp     r8, 2\n\tja      left0\n\tjz      left1\n\tjp      left2\nleft3:\n\tmovdqa  xmm2, [rsi+r8*8+16]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tmovq    xmm5, [rsi+r8*8+32]\n\tmovq    xmm3, xmm5\n\tpsllq   xmm5, xmm1\n\tshufpd  xmm2, xmm5, 1\n\tpsrlq   xmm3, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tmovhpd  [rdi+r8*8+24], xmm4\n\tpsrldq  xmm5, 8\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8+32], xmm3\n\tret\n\talign   16\nleft2:\n\tmovdqa  xmm2, [rsi+r8*8+16]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tpsrldq  xmm2, 8\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tmovhpd  [rdi+r8*8+24], xmm4\n\tret\n\talign   16\nleft1:\n\tmovq    xmm2, [rsi+r8*8+16]\n\tmovq    xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tpsrldq  xmm2, 8\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tret\n\talign   16\nleft0:\n\tpsrldq  xmm5, 8\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/sqr_basecase.as",
    "content": "\n;  core2 mpn_sqr_basecase\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, 2*rdx) = (rsi, rdx)^2\n;\tsame as the addmul for now\n;\tchanges from standard mul\n;\tchange  r8 to r12   and rcx to r13\n;\treemove ret and write last limb\n\n%macro mulloop 1\n\talign 16\n%%1:\n\tmov     r10, 0\n\tmul     r13\n\tmov     [rdi+r11*8], r12\n\tadd     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tdb      0x26\n\tadd     rbx, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tmov     [rdi+r11*8+24], rbx\n\tdb      0x26\n\tadd     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     r11, 4\n\tmov     rax, [rsi+r11*8+8]\n\tjnc     %%1\n%endmacro\n\n%macro mulnext0 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tmov     [rdi+r11*8+24], rbx\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+32], r12\n\tmov     [rdi+r11*8+40], rdx\n\tadd     r14, 1\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro mulnext1 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+24], r12\n\tmov     [rdi+r11*8+32], rdx\n\tadd     r14, 1\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro mulnext2 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     [rdi+r11*8+16], r10\n\tmov     [rdi+r11*8+24], rbx\n\tadd     r14, 1\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro mulnext3 0\n\tmov     [rdi+r11*8+8], r9\n\tmov     [rdi+r11*8+16], r10\n\tadd     r14, 1\n\tlea     rdi, [rdi+8]\n%endmacro\n\n;\tchanges from standard addmul\n;\tchange  r8 to r12   and rcx to r13\n;\treemove ret and write last limb\n%macro addmulloop 1\n\talign 16\n%%1:\n\tmov     r10, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r13\n\tadd     [rdi+r11*8+16], r10\n\tdb      0x26\n\tadc     rbx, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [rdi+r11*8+24], rbx\n\tdb      0x26\n\tadc     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     r11, 4\n\tmov     rax, [rsi+r11*8+8]\n\tjnc     %%1\n%endmacro\n\n%macro addmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tadd     [rdi+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [rdi+r11*8+24], rbx\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+r11*8+32], r12\n\tadc     rdx, 0\n\tadd     r14, 1\n\tmov     [rdi+r11*8+40], rdx\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro addmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmul     r13\n\tadd     [rdi+r11*8+16], r10\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+r11*8+24], r12\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+32], rdx\n\tadd     r14, 1\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro addmulnext2 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     ebx, 0\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tadc     rbx, rdx\n\tadd     [rdi+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [rdi+r11*8+24], rbx\n\tadd     r14, 1\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro addmulnext3 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, 0\n\tmov     [rdi+r11*8+16], r10\n\tadd     r14, 1\n\tlea     rdi, [rdi+8]\n\tcmp     r14, 4\n\tjnz     theloop\n%endmacro\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_sqr_basecase\n\tcmp     rdx, 3\n\tja      fourormore\n\tjz      three\n\tjp      two\none:\n\tmov     rax, [rsi]\n\tmul     rax\n\tmov     [rdi], rax\n\tmov     [rdi+8], rdx\n\tret\n\talign 16\nfourormore:\n;\tthis code can not handle cases 3,2,1\n\tmov     [rsp-8], r12\n\tmov     [rsp-16], r13\n\tmov     [rsp-24], r14\n\tmov     [rsp-32], rbx\n;\tsave data for later\n\tmov     [rsp-40], rdi\n\tmov     [rsp-48], rsi\n\tmov     [rsp-56], rdx\n\tmov     r13, [rsi]\n\tmov     rax, [rsi+8]\n\tmov     r14d, 6\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-40]\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n\tjge     mulskiploop1\n\tmulloop 1\nmulskiploop1:\n\tmov     r10d, 0\n\tmul     r13\n\tmov     [rdi+r11*8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n;\tcould save r9 here \n;\tcould update here ie lea 8(rdi),rdi and inc r14 \n\tcmp     r11, 2\n\tje      mcase2\n\tja      mcase3\n\tjp      mcase1\nmcase0:\n\tmulnext0\n\tjmp     case1\n\talign 16\nmcase1:\n\tmulnext1\n\tjmp     case2\n\talign 16\nmcase2:\n\tmulnext2\n\tjmp     case3\n\talign 16\nmcase3:\n\tmulnext3\n;\tjmp case0 just fall thru \n\talign 16\ntheloop:\ncase0:\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rsi+r14*8-8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n\tjge     addmulskiploop0\n\taddmulloop 0\naddmulskiploop0:\n\taddmulnext0\ncase1:\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rsi+r14*8-8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n\tjge     addmulskiploop1\n\taddmulloop 1\naddmulskiploop1:\n\taddmulnext1\ncase2:\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rsi+r14*8-8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n\tjge     addmulskiploop2\n\taddmulloop 2\naddmulskiploop2:\n\taddmulnext2\ncase3:\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rsi+r14*8-8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n\tjge     addmulskiploop3\n\taddmulloop 3\naddmulskiploop3:\n\taddmulnext3\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rsi+r14*8-8]\n\tmul     r13\n\tadd     [rdi+r14*8], rax\n\tadc     rdx, 0\n\tmov     [rdi+r14*8+8], rdx\n;\tnow lsh by 1 and add in the diagonal\n\tmov     rdi, [rsp-40]\n\tmov     rsi, [rsp-48]\n\tmov     rcx, [rsp-56]\n\tmov     r12, [rsp-8]\n\tmov     r13, [rsp-16]\n\txor     rbx, rbx\n\txor     r11, r11\n\tlea     rsi, [rsi+rcx*8]\n\tmov     [rdi], r11\n\tlea     r10, [rdi+rcx*8]\n\tmov     [r10+rcx*8-8], r11\n\tneg     rcx\n\talign 16\ndialoop:\n\tmov     rax, [rsi+rcx*8]\n\tmul     rax\n\tmov     r8, [rdi]\n\tmov     r9, [rdi+8]\n\tadd     rbx, 1\n\tadc     r8, r8\n\tadc     r9, r9\n\tsbb     rbx, rbx\n\tadd     r11, 1\n\tadc     r8, rax\n\tadc     r9, rdx\n\tsbb     r11, r11\n\tmov     [rdi], r8\n\tmov     [rdi+8], r9\n\tadd     rcx, 1\n\tlea     rdi, [rdi+16]\n\tjnz     dialoop\n\tmov     rbx, [rsp-32]\n\tmov     r14, [rsp-24]\n\tret\n\talign 16\ntwo:\n\tmov     rax, [rsi]\n\tmov     r9, [rsi+8]\n\tmov     r8, rax\n\tmul     rax\n\tmov     [rdi], rax\n\tmov     rax, r9\n\tmov     [rdi+8], rdx\n\tmul     rax\n\tmov     [rdi+16], rax\n\tmov     rax, r8\n\tmov     r10, rdx\n\tmul     r9\n\tadd     rax, rax\n\tadc     rdx, rdx\n\tadc     r10, 0\n\tadd     [rdi+8], rax\n\tadc     [rdi+16], rdx\n\tadc     r10, 0\n\tmov     [rdi+24], r10\n\tret\n\talign 16\nthree:\n\tmov     r8, [rsi]\n\tmov     rax, [rsi+8]\n\tmul     r8\n\tmov     r11d, 0\n\tmov     [rdi+8], rax\n\tmov     rax, [rsi+16]\n\tmov     r9, rdx\n\tmul     r8\n\tmov     r8, [rsi+8]\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tmul     r8\n\tmov     [rdi+16], r9\n\tadd     r11, rax\n\tmov     r9d, 0\n\tmov     [rdi+24], r11\n\tadc     r9, rdx\n\tmov     [rdi+32], r9\n\tmov     rcx, -3\n\txor     r10, r10\n\txor     r11, r11\n\tlea     rsi, [rsi+24]\n\tmov     [rdi], r11\n\tmov     [rdi+40], r11\ndialoop1:\n\tmov     rax, [rsi+rcx*8]\n\tmul     rax\n\tmov     r8, [rdi]\n\tmov     r9, [rdi+8]\n\tadd     r10, 1\n\tadc     r8, r8\n\tadc     r9, r9\n\tsbb     r10, r10\n\tadd     r11, 1\n\tadc     r8, rax\n\tadc     r9, rdx\n\tsbb     r11, r11\n\tmov     [rdi], r8\n\tmov     [rdi+8], r9\n\tadd     rcx, 1\n\tlea     rdi, [rdi+16]\n\tjnz     dialoop1\n\tnop\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/store.asm",
    "content": "dnl  mpn_store\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\ndefine(`MOVQ',`movd')\n\nC\tmpn_store(mp_ptr,mp_size_t,mp_limb_t val)\nC\trax          rdi,      rsi,         rdx\n\nASM_START()\nPROLOGUE(mpn_store)\ncmp $0,%rsi\njz case0\nMOVQ %rdx,%xmm0\nmovddup %xmm0,%xmm0\nlea -16(%rdi),%rdi\ntest $0xF,%rdi\njz notodd\n\tmov %rdx,16(%rdi)\n\tlea 8(%rdi),%rdi\n\tsub $1,%rsi\nnotodd:\nsub $2,%rsi\njc skiplp\nALIGN(16)\nlp:\n\tlea 16(%rdi),%rdi\n\tsub $2,%rsi\n\tmovdqa %xmm0,(%rdi)\n\tjnc lp\nskiplp:\njnp case0\n\tmov %rdx,16(%rdi)\ncase0:\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/atom/sub_n.as",
    "content": "\n;  AMD64 mpn_sub_n\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx)=(rsi,rcx)-(rdx,rcx)\n;\trax=borrow\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_sub_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     loop1\n\tmov     r11, [rsi]\n\tsub     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend1:\n\tadc     rax, rax\n\tret\n\talign   8\nloop1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tsbb     r11, [rdx]\n\tsbb     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tsbb     r9, [rdx-16]\n\tsbb     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     loop1\n\tinc     rax\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi]\n\tsbb     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend:\n\tadc     rax, rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/sublsh1_n.as",
    "content": "\n;  mpn_sublsh1_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tret mpn_sublsh1_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t)\n\n\tGLOBAL_FUNC mpn_sublsh1_n\n\tpush    rbx\n\tlea     rdi, [rdi+rcx*8-56]\n\tlea     rsi, [rsi+rcx*8-56]\n\tlea     rdx, [rdx+rcx*8-56]\n\txor     rax, rax\n\txor     r10, r10\n\tmov     r8, 3\n\tsub     r8, rcx\n\tjge     L_skip\n\tadd     r8, 4\n\tmov     r11, [rsi+r8*8+24]\n\tmov     rcx, [rsi+r8*8+16]\n\tmov     r9, [rsi+r8*8]\n\tmov     rbx, [rsi+r8*8+8]\n\tjc      L_skiplp\n\talign   16\nL_lp:\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], r9\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rcx\n\tmov     [rdi+r8*8+24], r11\n\tmov     r11, [rsi+r8*8+56]\n\tmov     rcx, [rsi+r8*8+48]\n\tadd     r8, 4\n\tmov     r9, [rsi+r8*8]\n\tmov     rbx, [rsi+r8*8+8]\n\tjnc     L_lp\nL_skiplp:\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], r9\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rcx\n\tmov     [rdi+r8*8+24], r11\nL_skip:\n\tcmp     r8, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     rcx, [rsi+r8*8+48]\n\tmov     r9, [rsi+r8*8+32]\n\tmov     rbx, [rsi+r8*8+40]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rcx, [rdx+r8*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rcx, [rdx+r8*8+48]\n\tmov     [rdi+r8*8+32], r9\n\tmov     [rdi+r8*8+40], rbx\n\tmov     [rdi+r8*8+48], rcx\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbx\n\tret\n\talign   16\nL_case2:\n\tmov     r9, [rsi+r8*8+32]\n\tmov     rbx, [rsi+r8*8+40]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tmov     [rdi+r8*8+32], r9\n\tmov     [rdi+r8*8+40], rbx\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbx\n\tret\n\talign   16\nL_case1:\n\tmov     r9, [rsi+r8*8+32]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tmov     [rdi+r8*8+32], r9\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbx\n\tret\n\talign   16\nL_case0:\n\tadd     r10, 1\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbx\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/atom/sublsh_n.asm",
    "content": "dnl  AMD64 mpn_sublsh \n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t    carry+(xp,n)= (yp,n) - (zp,n)<<c with carry in ci\nC\tmp_limb_t\tmpn_sublsh_nc(mp_ptr xp, mp_srcptr yp,mp_srcptr zp,mp_size_t n,unsigned int c,mp_limb_t ci)\nC\txp in rdi\typ in rsi\tzp in rdx\tn  in rcx\tc  in r8\tci in r9\n\nMULFUNC_PROLOGUE(mpn_sublsh_n mpn_sublsh_nc)\n\nASM_START()\nPROLOGUE(mpn_sublsh_n)\nxor %r9,%r9\nEPILOGUE()\nPROLOGUE(mpn_sublsh_nc)\nmov\t%rcx,%r10\nlea\t(%rdi,%r10,8),%rdi\nlea\t(%rsi,%r10,8),%rsi\nlea\t(%rdx,%r10,8),%rdx\nmov\t%r8,%rcx\nneg\t%rcx\nshr\t%cl,%r9\nneg\t%r10\nxor\t%rax,%rax\ntest\t$3,%r10\njz\tnext\nlp:\n\tmov\t(%rdx,%r10,8),%r8\n\tmov\t%r8,%r11\n\tneg\t%rcx\n\tshl\t%cl,%r8\n\tneg\t%rcx\n\tshr\t%cl,%r11\n\tor\t%r9,%r8\n\tmov\t%r11,%r9\n\tadd\t$1,%rax\n\tmov\t(%rsi,%r10,8),%r11\n\tsbb\t%r8,%r11\n\tsbb\t%rax,%rax\n\tmov\t%r11,(%rdi,%r10,8)\n\tinc\t%r10\n\ttest\t$3,%r10\n\tjnz\tlp\nnext:\ncmp\t$0,%r10\njz\tend\npush\t%rbx\npush\t%rbp\npush\t%r12\npush\t%r13\npush\t%r14\npush\t%r15\nALIGN(16)\nloop:\n\tmov\t(%rdx,%r10,8),%r8\n\tmov\t8(%rdx,%r10,8),%rbp\n\tmov\t16(%rdx,%r10,8),%rbx\n\tmov\t24(%rdx,%r10,8),%r12\n\tmov\t%r8,%r11\n\tmov\t%rbp,%r13\n\tmov\t%rbx,%r14\n\tmov\t%r12,%r15\n\tneg\t%rcx\n\tshl\t%cl,%r8\n\tshl\t%cl,%rbp\n\tshl\t%cl,%rbx\n\tshl\t%cl,%r12\n\tneg\t%rcx\n\tshr\t%cl,%r11\n\tshr\t%cl,%r13\n\tshr\t%cl,%r14\n\tshr\t%cl,%r15\n\tor\t%r9,%r8\n\tor\t%r11,%rbp\n\tor\t%r13,%rbx\n\tor\t%r14,%r12\n\tmov\t%r15,%r9\n\tadd\t$1,%rax\n\tmov\t(%rsi,%r10,8),%r11\n\tmov\t8(%rsi,%r10,8),%r13\n\tmov\t16(%rsi,%r10,8),%r14\n\tmov\t24(%rsi,%r10,8),%r15\n\tsbb\t%r8,%r11\n\tsbb\t%rbp,%r13\n\tsbb\t%rbx,%r14\n\tsbb\t%r12,%r15\n\tsbb\t%rax,%rax\n\tmov\t%r11,(%rdi,%r10,8)\n\tmov\t%r13,8(%rdi,%r10,8)\n\tmov\t%r14,16(%rdi,%r10,8)\n\tmov\t%r15,24(%rdi,%r10,8)\n\tadd\t$4,%r10\n\tjnz\tloop\npop\t%r15\npop\t%r14\npop\t%r13\npop\t%r12\npop\t%rbp\npop\t%rbx\nend:\nneg\t%rax\nadd\t%r9,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/atom/submul_1.as",
    "content": "; **************************************************************************\n;  Intel64 mpn_submul_1 -- Multiply a limb vector with a limb and\n;  subtract the result from a second limb vector.\n;\n;  Copyright (C) 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  This program is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2 of the License, or\n;  (at your option) any later version.\n;\n;  This program is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n;  GNU General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with this program; if not, write to the Free Software Foundation,\n;  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n;\n; **************************************************************************\n;\n;\n; CREDITS\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; *********************************************************************\n\n\n; With a 4-way unroll the code has\n;\n;         \tcycles/limb\n; Hammer:           4.6\n; Woodcrest:        4.6\n;\n; With increased unrolling, it appears to converge to 4 cycles/limb\n; on Intel Core 2 machines.  I believe that this is optimal, however\n; it requires such absurd unrolling that it becomes unusable for all\n; but the largest inputs.  A 4-way unroll seems like a good balance\n; to me because then commonly used input sizes (e.g. 1024bit Public\n; keys) still benifit from the speed up.\n\n;\n; This is just a check to see if we are in my code testing sandbox\n; or if we are actually in the GMP source tree\n;\n\n%include 'yasm_mac.inc'\n\n\n\n; *********************************************************************\n; *********************************************************************\n;\n; Here are the important macro parameters for the code\n;\n;      BpL is Bytes per Limb (8 since this is 64bit code)\n;\n;\tUNROLL_SIZE is a power of 2 for which we unroll the code.\n;                  possible values are 2,4,8,15,..., 256.  A reasonable\n;                  value is probably 4.  If really large inputs\n;                  are expected, then 16 is probably good.  Larger\n;                  values are really only useful for flashy\n;                  benchmarks and testing asymptotic behavior.\n;\n;      THRESHOLD is the minimum number of limbs needed before we bother\n;                using the complicated loop.  A reasonable value is\n;                2*UNROLL_SIZE + 6\n;\n; *********************************************************************\n; *********************************************************************\n%define\tBpL\t8\n%define\tUNROLL_SIZE\t4\n%define\tUNROLL_MASK\tUNROLL_SIZE-1\n%define\tTHRESHOLD\t2*UNROLL_SIZE+6\n\n; Here is a convenient Macro for addressing\n; memory.  Entries of the form\n;\n;      ADDR(ptr,index,displacement)\n;\n; get converted to\n;\n;      [displacement*BpL + ptr + index*BpL]\n;\n%define\tADDR(a,b,c)\t[c*BpL+a+b*BpL]\n\n\n; Register\tUsage\n; --------\t-----\n; rax\t\tlow word from mul\n; rbx*\n; rcx\t\ts2limb\n; rdx\t\thigh word from mul\n; rsi\t\ts1p\n; rdi\t\trp\n; rbp*\t\tBase Pointer\n; rsp*\t\tStack Pointer\n; r8\t\tA_x\n; r9\t\tA_y\n; r10\t\tA_z\n; r11\t\tB_x\n; r12*\t\tB_y\n; r13*\t\tB_z\n; r14*\t\ttemp\n; r15*\t\tindex\n;\n; * indicates that the register must be\n; preserved for the caller.\n%define\ts2limb\trcx\n%define\ts1p\trsi\n%define\trp\trdi\n%define\tA_x\tr8\n%define\tA_y\tr9\n%define\tA_z\tr10\n%define\tB_x\tr11\n%define\tB_y\tr12\n%define\tB_z\tr13\n%define\ttemp\tr14\n%define\tindex\tr15\n\n\t\n; INPUT PARAMETERS\n; rp\t\trdi\n; s1p\t\trsi\n; n\t\trdx\n; s2limb\trcx\n\tBITS\t64\nGLOBAL_FUNC mpn_submul_1\n\t\t\t\t\t; Compare the limb count\n\t\t\t\t\t; with the threshold value.\n\t\t\t\t\t; If the limb count is small\n\t\t\t\t\t; we just use the small loop,\n\t\t\t\t\t; otherwise we jump to the\n\t\t\t\t\t; more complicated loop.\n\tcmp\trdx,THRESHOLD\n\tjge\tL_mpn_submul_1_main_loop_prep\n\tmov\tr11,rdx\n\tlea\trsi,[rsi+rdx*8]\n\tlea\trdi,[rdi+rdx*8]\n\tneg\tr11\n\txor\tr8, r8\n\txor\tr10, r10\n\tjmp\tL_mpn_submul_1_small_loop\n\t\n\talign\t16\nL_mpn_submul_1_small_loop:\n\tmov\trax,[rsi+r11*8]\n\tmul\trcx\n\tadd\trax,r8\n\tadc\trdx,r10\n\tsub\t[rdi+r11*8],rax\n\tmov\tr8,r10\n\tadc\tr8,rdx\n\tinc\tr11\n\tjne\tL_mpn_submul_1_small_loop\n\n\tmov\trax,r8\n\tret\n\nL_mpn_submul_1_main_loop_prep:\n\tpush\tr15\n\tpush\tr14\n\tpush\tr13\n\tpush\tr12\n\t\t\t\t; If n is even, we need to do three\n\t\t\t\t; pre-multiplies, if n is odd we only\n\t\t\t\t; need to do two.\n\tmov\ttemp,rdx\n\tmov\tindex,0\n\tmov\tA_x,0\n\tmov\tA_y,0\n\tand\trdx,1\n\tjnz\tL_mpn_submul_1_odd_n\n\n\t\t\t\t\t; Case n is even\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tsub\tADDR(rp,index,0),rax\n\tadc\tA_x,rdx\n\tadd\tindex,1\n\t\t\t\t\t; At this point\n\t\t\t\t\t;  temp = n (even)\n\t\t\t\t\t; index = 1\n\nL_mpn_submul_1_odd_n:\n\t\t\t\t\t; Now\n\t\t\t\t\t; temp = n\n\t\t\t\t\t; index = 1 if n even\n\t\t\t\t\t;       = 0 if n odd\n\t\t\t\t\t;\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tmov\tA_z,ADDR(rp,index,0)\n\tadd\tA_x,rax\n\tadc\tA_y,rdx\n\n\tmov\trax,ADDR(s1p,index,1)\n\tmul\ts2limb\n\tmov\tB_z,ADDR(rp,index,1)\n\tmov\tB_x,rax\n\tmov\tB_y,rdx\n\tmov\trax,ADDR(s1p,index,2)\n\n\tadd\tindex,3\n\tlea\ts1p,ADDR(s1p,temp,-1)\n\tlea\trp,ADDR(rp,temp,-1)\n\tneg\ttemp\n\tadd\tindex,temp\n\t\t\t\t; At this point:\n\t\t\t\t; s1p   = address of last s1limb\n\t\t\t\t; rp    = address of last rplimb\n\t\t\t\t; temp  = -n\n\t\t\t\t; index = 4 - n if n even\n\t\t\t\t;       = 3 - n if n odd\n\t\t\t\t;\n\t\t\t\t; So, index is a (negative) even\n\t\t\t\t; number.\n\t\t\t\t;\n\t\t\t\t; *****************************************\n\t\t\t\t; ATTENTION:\n\t\t\t\t;\n\t\t\t\t; From here on, I will use array\n\t\t\t\t; indexing notation in the comments\n\t\t\t\t; because it is convenient.  So, I\n\t\t\t\t; will pretend that index is positive\n\t\t\t\t; because then a comment like\n\t\t\t\t;      B_z = rp[index-1]\n\t\t\t\t; is easier to read.\n\t\t\t\t; However, keep in mind that index is\n\t\t\t\t; actually a negative number indexing\n\t\t\t\t; back from the end of the array.\n\t\t\t\t; This is a common trick to remove one\n\t\t\t\t; compare operation from the main loop.\n\t\t\t\t; *****************************************\n\n\t\t\t\t;\n\t\t\t\t; Now we enter a spin-up loop the\n\t\t\t\t; will make sure that the index is\n\t\t\t\t; a multiple of UNROLL_SIZE before\n\t\t\t\t; going to our main unrolled loop.\n\tmov\ttemp,index\n\tneg\ttemp\n\tand\ttemp,UNROLL_MASK\n\tjz\tL_mpn_submul_1_main_loop\n\tshr\ttemp,1\nL_mpn_submul_1_main_loop_spin_up:\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tsub\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,1)\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index+1]\n\tmul\ts2limb\n\tsub\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,2)\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,-1),B_z\n\tmov\tB_z,ADDR(rp,index,1)\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n\n\tadd\tindex,2\n\tsub\ttemp,1\n\tjnz\tL_mpn_submul_1_main_loop_spin_up\n\t\n\tjmp\tL_mpn_submul_1_main_loop\n\t\n\talign\t16\nL_mpn_submul_1_main_loop:\n\t\t\t\t; The code here is really the same\n\t\t\t\t; logic as the spin-up loop.  It's\n\t\t\t\t; just been unrolled.\n%assign\tunroll_index 0\n%rep\tUNROLL_SIZE/2\n\tmul\ts2limb\n\tsub\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+1))\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,(2*unroll_index-2)),A_z\n\tmov\tA_z,ADDR(rp,index,(2*unroll_index))\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\n\tmul\ts2limb\n\tsub\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+2))\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,(2*unroll_index-1)),B_z\n\tmov\tB_z,ADDR(rp,index,(2*unroll_index+1))\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n%assign\tunroll_index\tunroll_index+1\n%endrep\n\n\n\tadd\tindex,UNROLL_SIZE\n\tjnz\tL_mpn_submul_1_main_loop\n\nL_mpn_submul_1_finish:\t\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tsub\tA_z,A_x\n\tmov\tA_x,rax\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_x,A_y\n\tadc\tB_y,0\n\tmov\tA_y,rdx\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\tsub\tB_z,B_x\n\tmov\tADDR(rp,index,-1),B_z\n\tadc\tA_x,B_y\n\tadc\tA_y,0\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index] + carry_in\n\t\t\t\t; A_y = high_mul[index] + CF\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t;\n\tsub\tA_z,A_x\n\tmov\tADDR(rp,index,0),A_z\n\tadc\tA_y,0\n\n\tmov\trax,A_y\n\tpop\tr12\n\tpop\tr13\n\tpop\tr14\n\tpop\tr15\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/xnor_n.as",
    "content": "\n;  AMD64 mpn_xnor_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_xnor_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign   16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\txor     r8, [rsi+rcx*8+24]\n\txor     r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\txor     r10, [rsi+rcx*8+8]\n\txor     r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/atom/xor_n.as",
    "content": "\n;  AMD64 mpn_xor_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_xor_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign   16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tor      r8, r8\n\tor      r9, r9\n\txor     r8, [rsi+rcx*8+24]\n\txor     r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tor      r10, r10\n\tor      r11, r11\n\txor     r10, [rsi+rcx*8+8]\n\txor     r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/add_err1_n.asm",
    "content": "dnl  mpn_add_err1_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC ret mpn_add_err1(mp_ptr rp,mp_ptr up,mp_ptr vp,mp_ptr ep,mp_ptr_t yp,mp_size_t n,mp_limb_t cy)\nC rax                    rdi,      rsi,      rdx,      rcx,         r8           r9       8(rsp)=>r10\n\nASM_START()\nPROLOGUE(mpn_add_err1_n)\n\tC // if we rearrange the params we could save some moves\n\tC //(rdi,r9)=(rsi,r9)+(rdx,r9)  sum=carry*(r8)\n\t\n\tmov 8(%rsp),%r10            C cy\n\tmov %rbp,-16(%rsp)          C save rbp\n\tlea -24(%rdi,%r9,8),%rdi    C rp += n - 3\n\tmov %r12,-24(%rsp)          C save r12\n\tmov %r13,-32(%rsp)          C save r13\n\tlea -24(%rsi,%r9,8),%rsi    C up += n - 3\n\tmov %r14,-40(%rsp)          C save r14\n\tmov %r15,-48(%rsp)          C save r15\n\tlea -24(%rdx,%r9,8),%rdx    C vp += n - 3\n\tmov %rcx,-56(%rsp)\t       C save rcx\n\tmov %rbx,-8(%rsp)           C save rbx\n\tmov $3,%r11                 C i = 3\n\tshl $63,%r10\n\tlea (%r8,%r9,8),%r8         C yp += n\n\tsub %r9,%r11\t              C i = 3 - n\n\tmov $0,%r9                  C t1 = 0\n\tmov $0,%rax                 C t2 = 0\n\tmov $0,%rbx                 C t3 = 0\n\tjnc skiplp                  C if done goto skiplp\nALIGN(16)\nlp:\n\tmov (%rsi,%r11,8),%r12      C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13     C s2 = *(up + i + 1)\n\tmov 16(%rsi,%r11,8),%r14    C s3 = *(up + i + 2)\n\tmov 24(%rsi,%r11,8),%r15    C s4 = *(up + i + 3)\n\tmov $0,%rbp                 C t5 = 0\n\tshl $1,%r10                 C s1 += *(vp + i + 0) + (cy & 1)\n\tadc (%rdx,%r11,8),%r12\n\tcmovc -8(%r8),%rax          C if carry1, t2 = *(yp - 1)\n\tadc 8(%rdx,%r11,8),%r13     C s2 += *(vp + i + 1) + carry1\n\tcmovc -16(%r8),%rbx         C if carry2 t3 = *(yp - 2)\n\tmov $0,%rcx                 C t4 = 0\n\tadc 16(%rdx,%r11,8),%r14    C s3 += *(vp + i + 2) + carry2\n\tcmovc -24(%r8),%rcx         C if carry3 t4 = *(yp - 3)\n\tadc 24(%rdx,%r11,8),%r15    C s4 += *(vp + i + 3) + carry3\n\tcmovc -32(%r8),%rbp         C if carry4 t5 = *(yp - 4)\n\trcr $1,%r10                 C high bit of cy = carry4\n\tadd %rax,%r9                C t1 += t2\n\tadc $0,%r10                 C accumulate cy\n\tadd %rbx,%r9                C t1 += t2\n\tadc $0,%r10                 C accumulate cy\n\tadd %rcx,%r9                C t1 += t4\n\tmov $0,%rax                 C t2 = 0\n\tadc $0,%r10                 C accumulate cy\n\tlea -32(%r8),%r8            C yp -= 4\n\tadd %rbp,%r9                C t1 += t5\n\tadc $0,%r10                 C accumulate cy\n\tmov %r12,(%rdi,%r11,8)      C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)     C *(rp + i + 1) = s2\n\tmov %r14,16(%rdi,%r11,8)    C *(rp + i + 2) = s3\n\tmov %r15,24(%rdi,%r11,8)    C *(rp + i + 3) = s4\n\tmov $0,%rbx                 C t3 = 0\n\tadd $4,%r11                 C i += 4\n\tjnc  lp                     C not done, goto lp\nskiplp:\n\tcmp $2,%r11                 C cmp(i, 2)\n\tmov -16(%rsp),%rbp          C restore rbp\n\tmov -48(%rsp),%r15          C restore r15\n\tja case0                    C i == 3 goto case0\n\tje case1                    C i == 2 goto case1\n\tjp case2                    C i == 1 goto case2\ncase3:\n\tmov (%rsi,%r11,8),%r12         C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13        C s2 = *(up + i + 1)\n\tmov 16(%rsi,%r11,8),%r14       C s3 = *(up + i + 2)\n\tshl $1,%r10                    C restore carry1 from high bit of t1\n\tadc (%rdx,%r11,8),%r12         C s1 += *(vp + i + 0) + carry1\n\tcmovc -8(%r8),%rax             C if carry2 t2 = *(yp - 1)\n\tadc 8(%rdx,%r11,8),%r13        C s2 += *(vp + i + 1) + carry2\n\tcmovc -16(%r8),%rbx            C if carry3 t3 = *(yp - 2)\n\tmov $0,%rcx                    C t4 = 0\n\tadc 16(%rdx,%r11,8),%r14       C s3 += *(vp + i + 3) + carry3\n\tcmovc -24(%r8),%rcx            C if carry4 t4 = *(yp - 3)\n\trcr $1,%r10                    C store carry4 in high bit of cy\n\tadd %rax,%r9                   C t1 += t2\n\tadc $0,%r10                    C accumulate cy\n\tadd %rbx,%r9                   C t1 += t3\n\tadc $0,%r10                    C accumulate cy\n\tadd %rcx,%r9                   C t1 += t4\n\tadc $0,%r10                    C accumulate cy\n\tmov %r12,(%rdi,%r11,8)         C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)        C *(rp + i + 1) = s2\n\tmov %r14,16(%rdi,%r11,8)       C *(rp + i + 2) = s3\n\tmov -56(%rsp),%rcx             C restore rcx\n\tmov %r9,(%rcx)                 C ep[0] = t1\n\tbtr $63,%r10                   C retrieve carry out and reset bit of cy\n\tmov %r10,8(%rcx)               C ep[1] = cy\n\tmov -40(%rsp),%r14             C restore r14\n\tmov $0,%rax\n\tmov -32(%rsp),%r13             C restore r13\n\tadc $0,%rax                    C return carry out\n\tmov -24(%rsp),%r12             C restore r12\n\tmov -8(%rsp),%rbx              C restore rbx\n\tret\nALIGN(16)\ncase2:\n\tmov (%rsi,%r11,8),%r12     C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13    C s2 = *(up + i + 1)\n\tshl $1,%r10                C restore carry1 from high bit of t1\n\tadc (%rdx,%r11,8),%r12     C s1 += *(vp + i + 0) + carry1\n\tcmovc -8(%r8),%rax         C if carry2 t2 = *(yp - 1)\n\tadc 8(%rdx,%r11,8),%r13    C s2 += *(vp + i + 1) + carry2\n\tcmovc -16(%r8),%rbx        C if carry3 t3 = *(yp - 2)\n\trcr $1,%r10                C store carry3 in high bit of cy\n\tadd %rax,%r9               C t1 += t2\n\tadc $0,%r10                C accumulate cy\n\tadd %rbx,%r9               C t1 += t3\n\tadc $0,%r10                C accumulate cy\n\tmov %r12,(%rdi,%r11,8)     C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)    C *(rp + i + 1) = s2\n\tmov -56(%rsp),%rcx         C restore rcx\n\tmov %r9,(%rcx)             C ep[0] = t1\n\tbtr $63,%r10               C retrieve carry out and reset bit of cy\n\tmov %r10,8(%rcx)           C ep[1] = cy\n\tmov -40(%rsp),%r14         C restore r14\n\tmov $0,%rax\n\tmov -32(%rsp),%r13         C restore r13\n\tadc $0,%rax                C return carry out\n\tmov -24(%rsp),%r12         C restore r12\n\tmov -8(%rsp),%rbx          C restore rbx\n\tret\nALIGN(16)\ncase1:\n\tmov (%rsi,%r11,8),%r12     C s1 = *(up + i + 0)\n\tshl $1,%r10                C restore carry1 from high bit of t1\n\tadc (%rdx,%r11,8),%r12     C s1 += *(vp + i + 0) + carry1\n\tcmovc -8(%r8),%rax         C if carry2 t2 = *(yp - 1)\n\trcr $1,%r10                C store carry3 in high bit of cy\n\tadd %rax,%r9               C t1 += t2\n\tadc $0,%r10                C accumulate cy\n\tmov %r12,(%rdi,%r11,8)     C *(rp + i + 0) = s1\ncase0:\tmov -56(%rsp),%rcx         C restore rcx\n\tmov %r9,(%rcx)             C ep[0] = t1\n\tbtr $63,%r10               C retrieve carry out and reset bit of cy\n\tmov %r10,8(%rcx)           C ep[1] = cy\n\tmov -40(%rsp),%r14         C restore r14\n\tmov $0,%rax                \n\tmov -32(%rsp),%r13         C restore r13\n\tadc $0,%rax                C return carry out\n\tmov -24(%rsp),%r12         C restore r12\n\tmov -8(%rsp),%rbx          C restore rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/add_n.as",
    "content": "\n;  AMD64 mpn_add_n\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)+(rdx,rcx)\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_add_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     loop1\n\tmov     r11, [rsi]\n\tadd     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend1:\n\tadc     rax, rax\n\tret\n\talign   8\nloop1\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tadc     r11, [rdx]\n\tadc     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tadc     r9, [rdx-16]\n\tadc     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     loop1\n\tinc     rax\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi]\n\tadc     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend:\n\tadc     rax, rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/addadd_n.as",
    "content": "\n;  AMD64 mpn_addadd_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,r8)=(rsi,r8)+(rdx,r8)+(rcx,r8) return carry+carry\n\n\tGLOBAL_FUNC mpn_addadd_n\n\tlea     rdx, [rdx+r8*8-56]\n\tlea     rcx, [rcx+r8*8-56]\n\tlea     rsi, [rsi+r8*8-56]\n\tlea     rdi, [rdi+r8*8-56]\n\tmov     r9, 3\n\txor     rax, rax\n\txor     r10, r10\n\tsub     r9, r8\n\tpush    r12\n\tpush    rbp\n\tjge     L_skip\n\tadd     r9, 4\n\tmov     rbp, [rdx+r9*8+16]\n\tmov     r11, [rdx+r9*8+24]\n\tmov     r12, [rdx+r9*8]\n\tmov     r8, [rdx+r9*8+8]\n\tjc      L_skiplp\n\talign   16\nL_lp:\n\tadd     rax, 1\n\tadc     r12, [rcx+r9*8]\n\tadc     r8, [rcx+r9*8+8]\n\tadc     rbp, [rcx+r9*8+16]\n\tadc     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8]\n\tadc     r8, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r12\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], r8\n\tmov     [rdi+r9*8+16], rbp\n\tmov     rbp, [rdx+r9*8+48]\n\tmov     r11, [rdx+r9*8+56]\n\tadd     r9, 4\n\tmov     r12, [rdx+r9*8]\n\tmov     r8, [rdx+r9*8+8]\n\tjnc     L_lp\nL_skiplp:\n\tadd     rax, 1\n\tadc     r12, [rcx+r9*8]\n\tadc     r8, [rcx+r9*8+8]\n\tadc     rbp, [rcx+r9*8+16]\n\tadc     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8]\n\tadc     r8, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r12\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], r8\n\tmov     [rdi+r9*8+16], rbp\nL_skip:\n\tcmp     r9, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     rbp, [rdx+r9*8+48]\n\tmov     r12, [rdx+r9*8+32]\n\tmov     r8, [rdx+r9*8+40]\n\tadd     rax, 1\n\tadc     r12, [rcx+r9*8+32]\n\tadc     r8, [rcx+r9*8+40]\n\tadc     rbp, [rcx+r9*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8+32]\n\tadc     r8, [rsi+r9*8+40]\n\tadc     rbp, [rsi+r9*8+48]\n\tmov     [rdi+r9*8+32], r12\n\tmov     [rdi+r9*8+40], r8\n\tmov     [rdi+r9*8+48], rbp\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tmov     r12, [rdx+r9*8+32]\n\tmov     r8, [rdx+r9*8+40]\n\tadd     rax, 1\n\tadc     r12, [rcx+r9*8+32]\n\tadc     r8, [rcx+r9*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8+32]\n\tadc     r8, [rsi+r9*8+40]\n\tmov     [rdi+r9*8+32], r12\n\tmov     [rdi+r9*8+40], r8\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tmov     r12, [rdx+r9*8+32]\n\tadd     rax, 1\n\tadc     r12, [rcx+r9*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8+32]\n\tmov     [rdi+r9*8+32], r12\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rax, r10\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/bobcat/addlsh1_n.as",
    "content": "\n;  AMD64 mpn_addlsh1_n \n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi, rcx) = (rsi, rcx) + (rdx, rcx)<<1\n;\trax = carry\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_addlsh1_n\n\tlea     rsi, [rsi+rcx*8]\n\tlea     rdx, [rdx+rcx*8]\n\tlea     rdi, [rdi+rcx*8]\n\tneg     rcx\n\txor     r9, r9\n\txor     rax, rax\n\ttest    rcx, 3\n\tjz      next\nlp1:\n\tmov     r10, [rdx+rcx*8]\n\tadd     r9, 1\n\tadc     r10, r10\n\tsbb     r9, r9\n\tadd     rax, 1\n\tadc     r10, [rsi+rcx*8]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tinc     rcx\n\ttest    rcx, 3\n\tjnz     lp1\nnext:\n\tcmp     rcx, 0\n\tjz      end\n\tpush    rbx\n\talign   16\nlp:\n\tmov     r10, [rdx+rcx*8]\n\tmov     rbx, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8+16]\n\tmov     r8, [rdx+rcx*8+24]\n\tadd     r9, 1\n\tadc     r10, r10\n\tadc     rbx, rbx\n\tadc     r11, r11\n\tadc     r8, r8\n\tsbb     r9, r9\n\tadd     rax, 1\n\tadc     r10, [rsi+rcx*8]\n\tadc     rbx, [rsi+rcx*8+8]\n\tadc     r11, [rsi+rcx*8+16]\n\tadc     r8, [rsi+rcx*8+24]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tmov     [rdi+rcx*8+8], rbx\n\tmov     [rdi+rcx*8+16], r11\n\tmov     [rdi+rcx*8+24], r8\n\tadd     rcx, 4\n\tjnz     lp\n\tpop     rbx\nend:\n\tadd     rax, r9\n\tneg     rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/addlsh_n.asm",
    "content": "dnl  AMD64 mpn_addlsh_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC    carry+(xp,n)= (yp,n) + (zp,n)<<c with carry in ci\nC mp_limb_t\tmpn_addlsh_nc(mp_ptr xp, mp_srcptr yp,mp_srcptr zp,mp_size_t n,unsigned int c,mp_limb_t ci)\nC xp in rdi\typ in rsi\tzp in rdx\tn  in rcx\tc  in r8\tci in r9\n\nMULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc)\n\nASM_START()\nPROLOGUE(mpn_addlsh_n)\nxor %r9,%r9\nEPILOGUE()\nPROLOGUE(mpn_addlsh_nc)\nmov\t%rcx,%r10\nlea\t(%rdi,%r10,8),%rdi\nlea\t(%rsi,%r10,8),%rsi\nlea\t(%rdx,%r10,8),%rdx\nmov\t%r8,%rcx\nneg\t%rcx\nshr\t%cl,%r9\nneg\t%r10\nxor\t%rax,%rax\ntest\t$3,%r10\njz\tnext\nlp:\n\tmov\t(%rdx,%r10,8),%r8\n\tmov\t%r8,%r11\n\tneg\t%rcx\n\tshl\t%cl,%r8\n\tneg\t%rcx\n\tshr\t%cl,%r11\n\tor\t%r9,%r8\n\tmov\t%r11,%r9\n\tadd\t$1,%rax\n\tadc\t(%rsi,%r10,8),%r8\n\tsbb\t%rax,%rax\n\tmov\t%r8,(%rdi,%r10,8)\n\tinc\t%r10\n\ttest\t$3,%r10\n\tjnz\tlp\nnext:\ncmp\t$0,%r10\njz\tend\npush\t%rbx\npush\t%rbp\npush\t%r12\npush\t%r13\npush\t%r14\npush\t%r15\nALIGN(16)\nloop:\n\tmov\t(%rdx,%r10,8),%r8\n\tmov\t8(%rdx,%r10,8),%rbp\n\tmov\t16(%rdx,%r10,8),%rbx\n\tmov\t24(%rdx,%r10,8),%r12\n\tmov\t%r8,%r11\n\tmov\t%rbp,%r13\n\tmov\t%rbx,%r14\n\tmov\t%r12,%r15\n\tneg\t%rcx\n\tshl\t%cl,%r8\n\tshl\t%cl,%rbp\n\tshl\t%cl,%rbx\n\tshl\t%cl,%r12\n\tneg\t%rcx\n\tshr\t%cl,%r11\n\tshr\t%cl,%r13\n\tshr\t%cl,%r14\n\tshr\t%cl,%r15\n\tor\t%r9,%r8\n\tor\t%r11,%rbp\n\tor\t%r13,%rbx\n\tor\t%r14,%r12\n\tmov\t%r15,%r9\n\tadd\t$1,%rax\n\tadc\t(%rsi,%r10,8),%r8\n\tadc\t8(%rsi,%r10,8),%rbp\n\tadc\t16(%rsi,%r10,8),%rbx\n\tadc\t24(%rsi,%r10,8),%r12\n\tsbb\t%rax,%rax\n\tmov\t%r8,(%rdi,%r10,8)\n\tmov\t%rbp,8(%rdi,%r10,8)\n\tmov\t%rbx,16(%rdi,%r10,8)\n\tmov\t%r12,24(%rdi,%r10,8)\n\tadd\t$4,%r10\n\tjnz\tloop\npop\t%r15\npop\t%r14\npop\t%r13\npop\t%r12\npop\t%rbp\npop\t%rbx\nend:\nneg\t%rax\nadd\t%r9,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/addmul_1.as",
    "content": "; **************************************************************************\n;  Intel64 mpn_addmul_1 -- Multiply a limb vector with a limb and\n;  add the result to a second limb vector.\n;\n;  Copyright (C) 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  This program is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2 of the License, or\n;  (at your option) any later version.\n;\n;  This program is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n;  GNU General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with this program; if not, write to the Free Software Foundation,\n;  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n;\n; **************************************************************************\n;\n;\n; CREDITS\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; *********************************************************************\n\n\n; With a 4-way unroll the code has\n;\n;         \tcycles/limb\n; Hammer:           4.8\n; Woodcrest:        4.6\n;\n; With increased unrolling, it appears to converge to 4 cycles/limb\n; on Intel Core 2 machines.  I believe that this is optimal, however\n; it requires such absurd unrolling that it becomes unusable for all\n; but the largest inputs.  A 4-way unroll seems like a good balance\n; to me because then commonly used input sizes (e.g. 1024bit Public\n; keys) still benifit from the speed up.\n\n;\n; This is just a check to see if we are in my code testing sandbox\n; or if we are actually in the GMP source tree\n\n%include 'yasm_mac.inc'\n\n\n; *********************************************************************\n; *********************************************************************\n;\n; Here are the important macro parameters for the code\n;\n;      BpL is Bytes per Limb (8 since this is 64bit code)\n;\n;\tUNROLL_SIZE is a power of 2 for which we unroll the code.\n;                  possible values are 2,4,8,15,..., 256.  A reasonable\n;                  value is probably 4.  If really large inputs\n;                  are expected, then 16 is probably good.  Larger\n;                  values are really only useful for flashy\n;                  benchmarks and testing asymptotic behavior.\n;\n;      THRESHOLD is the minimum number of limbs needed before we bother\n;                using the complicated loop.  A reasonable value is\n;                2*UNROLL_SIZE + 6\n;\n; *********************************************************************\n; *********************************************************************\n%define\tBpL\t8\n%define\tUNROLL_SIZE\t4\n%define\tUNROLL_MASK\tUNROLL_SIZE-1\n%define\tTHRESHOLD\t2*UNROLL_SIZE+6\n\n; Here is a convenient Macro for addressing\n; memory.  Entries of the form\n;\n;      ADDR(ptr,index,displacement)\n;\n; get converted to\n;\n;      [displacement*BpL + ptr + index*BpL]\n;\n%define\tADDR(a,b,c)\t[c*BpL+a+b*BpL]\n\n\n; Register\tUsage\n; --------\t-----\n; rax\t\tlow word from mul\n; rbx*\n; rcx\t\ts2limb\n; rdx\t\thigh word from mul\n; rsi\t\ts1p\n; rdi\t\trp\n; rbp*\t\tBase Pointer\n; rsp*\t\tStack Pointer\n; r8\t\tA_x\n; r9\t\tA_y\n; r10\t\tA_z\n; r11\t\tB_x\n; r12*\t\tB_y\n; r13*\t\tB_z\n; r14*\t\ttemp\n; r15*\t\tindex\n; \n; * indicates that the register must be\n; preserved for the caller.\n%define\ts2limb\trcx\n%define\ts1p\trsi\n%define\trp\trdi\n%define\tA_x\tr8\n%define\tA_y\tr9\n%define\tA_z\tr10\n%define\tB_x\tr11\n%define\tB_y\tr12\n%define\tB_z\tr13\n%define\ttemp\tr14\n%define\tindex\tr15\n\n\t\n; INPUT PARAMETERS\n; rp\t\trdi\n; s1p\t\trsi\n; n\t\trdx\n; s2limb\trcx\n\tBITS\t64\nGLOBAL_FUNC mpn_addmul_1\n\t\t\t\t\t; Compare the limb count\n\t\t\t\t\t; with the threshold value.\n\t\t\t\t\t; If the limb count is small\n\t\t\t\t\t; we just use the small loop,\n\t\t\t\t\t; otherwise we jump to the\n\t\t\t\t\t; more complicated loop.\n\tcmp\trdx,THRESHOLD\n\tjge\tL_mpn_addmul_1_main_loop_prep\n\tmov\tr11,rdx\n\tlea\trsi,[rsi+rdx*8]\n\tlea\trdi,[rdi+rdx*8]\n\tneg\tr11\n\txor\tr8, r8\n\txor\tr10, r10\n\tjmp\tL_mpn_addmul_1_small_loop\n\t\n\talign\t16\nL_mpn_addmul_1_small_loop:\n\tmov\trax,[rsi+r11*8]\n\tmul\trcx\n\tadd\trax,[rdi+r11*8]\n\tadc\trdx,r10\n\tadd\trax,r8\n\tmov\tr8,r10\n\tmov\t[rdi+r11*8],rax\n\tadc\tr8,rdx\n\tinc\tr11\n\tjne\tL_mpn_addmul_1_small_loop\n\n\tmov\trax,r8\n\tret\n\nL_mpn_addmul_1_main_loop_prep:\n\tpush\tr15\n\tpush\tr14\n\tpush\tr13\n\tpush\tr12\n\t\t\t\t; If n is even, we need to do three\n\t\t\t\t; pre-multiplies, if n is odd we only\n\t\t\t\t; need to do two.\n\tmov\ttemp,rdx\n\tmov\tindex,0\n\tmov\tA_x,0\n\tmov\tA_y,0\n\tand\trdx,1\n\tjnz\tL_mpn_addmul_1_odd_n\n\n\t\t\t\t\t; Case n is even\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tadd\tADDR(rp,index,0),rax\n\tadc\tA_x,rdx\n\tadd\tindex,1\n\t\t\t\t\t; At this point\n\t\t\t\t\t;  temp = n (even)\n\t\t\t\t\t; index = 1\n\nL_mpn_addmul_1_odd_n:\n\t\t\t\t\t; Now\n\t\t\t\t\t; temp = n\n\t\t\t\t\t; index = 1 if n even\n\t\t\t\t\t;       = 0 if n odd\n\t\t\t\t\t;\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tmov\tA_z,ADDR(rp,index,0)\n\tadd\tA_x,rax\n\tadc\tA_y,rdx\n\n\tmov\trax,ADDR(s1p,index,1)\n\tmul\ts2limb\n\tmov\tB_z,ADDR(rp,index,1)\n\tmov\tB_x,rax\n\tmov\tB_y,rdx\n\tmov\trax,ADDR(s1p,index,2)\n\n\tadd\tindex,3\n\tlea\ts1p,ADDR(s1p,temp,-1)\n\tlea\trp,ADDR(rp,temp,-1)\n\tneg\ttemp\n\tadd\tindex,temp\n\t\t\t\t; At this point:\n\t\t\t\t; s1p   = address of last s1limb\n\t\t\t\t; rp    = address of last rplimb\n\t\t\t\t; temp  = -n\n\t\t\t\t; index = 4 - n if n even\n\t\t\t\t;       = 3 - n if n odd\n\t\t\t\t;\n\t\t\t\t; So, index is a (negative) even\n\t\t\t\t; number.\n\t\t\t\t;\n\t\t\t\t; *****************************************\n\t\t\t\t; ATTENTION:\n\t\t\t\t;\n\t\t\t\t; From here on, I will use array\n\t\t\t\t; indexing notation in the comments\n\t\t\t\t; because it is convenient.  So, I\n\t\t\t\t; will pretend that index is positive\n\t\t\t\t; because then a comment like\n\t\t\t\t;      B_z = rp[index-1]\n\t\t\t\t; is easier to read.\n\t\t\t\t; However, keep in mind that index is\n\t\t\t\t; actually a negative number indexing\n\t\t\t\t; back from the end of the array.\n\t\t\t\t; This is a common trick to remove one\n\t\t\t\t; compare operation from the main loop.\n\t\t\t\t; *****************************************\n\n\t\t\t\t;\n\t\t\t\t; Now we enter a spin-up loop the\n\t\t\t\t; will make sure that the index is\n\t\t\t\t; a multiple of UNROLL_SIZE before\n\t\t\t\t; going to our main unrolled loop.\n\tmov\ttemp,index\n\tneg\ttemp\n\tand\ttemp,UNROLL_MASK\n\tjz\tL_mpn_addmul_1_main_loop\n\tshr\ttemp,1\nL_mpn_addmul_1_main_loop_spin_up:\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tadd\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,1)\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index+1]\n\tmul\ts2limb\n\tadd\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,2)\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,-1),B_z\n\tmov\tB_z,ADDR(rp,index,1)\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n\n\tadd\tindex,2\n\tsub\ttemp,1\n\tjnz\tL_mpn_addmul_1_main_loop_spin_up\n\t\n\tjmp\tL_mpn_addmul_1_main_loop\n\t\n\talign\t16\nL_mpn_addmul_1_main_loop:\n\t\t\t\t; The code here is really the same\n\t\t\t\t; logic as the spin-up loop.  It's\n\t\t\t\t; just been unrolled.\n%assign\tunroll_index 0\n%rep\tUNROLL_SIZE/2\n\tmul\ts2limb\n\tadd\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+1))\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,(2*unroll_index-2)),A_z\n\tmov\tA_z,ADDR(rp,index,(2*unroll_index))\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\n\tmul\ts2limb\n\tadd\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+2))\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,(2*unroll_index-1)),B_z\n\tmov\tB_z,ADDR(rp,index,(2*unroll_index+1))\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n%assign\tunroll_index\tunroll_index+1\n%endrep\n\n\tadd\tindex,UNROLL_SIZE\n\tjnz\tL_mpn_addmul_1_main_loop\n\nL_mpn_addmul_1_finish:\t\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t; \n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tadd\tA_z,A_x\n\tmov\tA_x,rax\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_x,A_y\n\tadc\tB_y,0\n\tmov\tA_y,rdx\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\tadd\tB_z,B_x\n\tmov\tADDR(rp,index,-1),B_z\n\tadc\tA_x,B_y\n\tadc\tA_y,0\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t; \n\t\t\t\t; A_x = low_mul[index] + carry_in\n\t\t\t\t; A_y = high_mul[index] + CF\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t; \n\tadd\tA_z,A_x\n\tmov\tADDR(rp,index,0),A_z\n\tadc\tA_y,0\n\n\tmov\trax,A_y\n\tpop\tr12\n\tpop\tr13\n\tpop\tr14\n\tpop\tr15\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/addsub_n.as",
    "content": "\n;  AMD64 mpn_addsub_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,r8)=(rsi,r8)+(rdx,r8)-(rcx,r8)  return carry-borrow\n\n\tGLOBAL_FUNC mpn_addsub_n\n\tlea     rdx, [rdx+r8*8-56]\n\tlea     rcx, [rcx+r8*8-56]\n\tlea     rsi, [rsi+r8*8-56]\n\tlea     rdi, [rdi+r8*8-56]\n\tmov     r9, 3\n\txor     rax, rax\n\txor     r10, r10\n\tsub     r9, r8\n\tpush    r12\n\tpush    rbp\n\tjge     L_skip\n\tadd     r9, 4\n\tmov     rbp, [rdx+r9*8+16]\n\tmov     r11, [rdx+r9*8+24]\n\tmov     r12, [rdx+r9*8]\n\tmov     r8, [rdx+r9*8+8]\n\tjc      L_skiplp\n\talign   16\nL_lp:\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8]\n\tsbb     r8, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8]\n\tadc     r8, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r12\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], r8\n\tmov     [rdi+r9*8+16], rbp\n\tmov     rbp, [rdx+r9*8+48]\n\tmov     r11, [rdx+r9*8+56]\n\tadd     r9, 4\n\tmov     r12, [rdx+r9*8]\n\tmov     r8, [rdx+r9*8+8]\n\tjnc     L_lp\nL_skiplp:\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8]\n\tsbb     r8, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8]\n\tadc     r8, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r12\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], r8\n\tmov     [rdi+r9*8+16], rbp\nL_skip:\n\tcmp     r9, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     rbp, [rdx+r9*8+48]\n\tmov     r12, [rdx+r9*8+32]\n\tmov     r8, [rdx+r9*8+40]\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8+32]\n\tsbb     r8, [rcx+r9*8+40]\n\tsbb     rbp, [rcx+r9*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8+32]\n\tadc     r8, [rsi+r9*8+40]\n\tadc     rbp, [rsi+r9*8+48]\n\tmov     [rdi+r9*8+32], r12\n\tmov     [rdi+r9*8+40], r8\n\tmov     [rdi+r9*8+48], rbp\n\tadc     rax, 0\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tmov     r12, [rdx+r9*8+32]\n\tmov     r8, [rdx+r9*8+40]\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8+32]\n\tsbb     r8, [rcx+r9*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8+32]\n\tadc     r8, [rsi+r9*8+40]\n\tmov     [rdi+r9*8+32], r12\n\tmov     [rdi+r9*8+40], r8\n\tadc     rax, 0\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tmov     r12, [rdx+r9*8+32]\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8+32]\n\tmov     [rdi+r9*8+32], r12\n\tadc     rax, 0\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tsub     rax, r10\n\tpop     rbp\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/bobcat/and_n.asm",
    "content": "dnl  mpn_and_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_and_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t)\nC\trax              rdi,   rsi,    rdx,  rcx\n\nASM_START()\nPROLOGUE(mpn_and_n)\nsub $4,%rcx\njc skiplp\nALIGN(16)\nlp:\n\tmov (%rdx),%r8\n\tmov 8(%rdx),%r9\n\tand (%rsi),%r8\n\tand 8(%rsi),%r9\n\tlea 32(%rsi),%rsi\n\tmov 16(%rdx),%r10\n\tmov 24(%rdx),%r11\n\tlea 32(%rdi),%rdi\n\tand 16-32(%rsi),%r10\n\tand 24-32(%rsi),%r11\n\tsub $4,%rcx\n\tmov %r8,-32(%rdi)\n\tmov %r9,8-32(%rdi)\n\tmov %r10,16-32(%rdi)\n\tlea 32(%rdx),%rdx\n\tmov %r11,24-32(%rdi)\n\tjnc lp\nskiplp:\ncmp $-2,%rcx\nje case2\njp case1\njl case0\ncase3:\tmov 8(%rdx,%rcx,8),%rax\n\tand 8(%rsi,%rcx,8),%rax\n\tmov %rax,8(%rdi,%rcx,8)\ncase2:\tmov 16(%rdx,%rcx,8),%rax\n\tand 16(%rsi,%rcx,8),%rax\n\tmov %rax,16(%rdi,%rcx,8)\ncase1:\tmov 24(%rdx,%rcx,8),%rax\n\tand 24(%rsi,%rcx,8),%rax\n\tmov %rax,24(%rdi,%rcx,8)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/andn_n.as",
    "content": "\n;  AMD64 mpn_andn_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_andn_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign   16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tand     r8, [rsi+rcx*8+24]\n\tand     r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\tand     r10, [rsi+rcx*8+8]\n\tand     r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/com_n.as",
    "content": "\n;  AMD64 mpn_com_n\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_com_n\n\tsub     rdx, 4\n\tjc      next\n\talign   8\nloop1:\n\tmov     rax, [rsi+rdx*8+24]\n\tmov     rcx, [rsi+rdx*8+16]\n\tnot     rax\n\tnot     rcx\n\tmov     [rdi+rdx*8+24], rax\n\tmov     [rdi+rdx*8+16], rcx\n\tmov     rax, [rsi+rdx*8+8]\n\tmov     rcx, [rsi+rdx*8]\n\tnot     rax\n\tnot     rcx\n\tmov     [rdi+rdx*8+8], rax\n\tmov     [rdi+rdx*8], rcx\n\tsub     rdx, 4\n\tjae     loop1\nnext:\n\tadd     rdx, 4\n\tjz      end\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\n\tdec     rdx\n\tjz      end\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\n\tdec     rdx\n\tjz      end\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/copyd.as",
    "content": "\n;  mpn_copyd\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tmpn_copyd(mp_ptr rdi ,mp_ptr rsi ,mp_size_t rdx)\n;\t(rdi,rdx)=(rsi,rdx)\n\n\tGLOBAL_FUNC mpn_copyd\n\tlea     rsi, [rsi+rdx*8-8]\n\tlea     rdi, [rdi+rdx*8-8]\n\tsub     rdx, 4\n\tjl      L_skiplp\n\talign   16\nL_lp:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi-8]\n\tmov     r8, [rsi-16]\n\tlea     rdi, [rdi-32]\n\tmov     r9, [rsi-24]\n\tmov     [rdi+32], rax\n\tsub     rdx, 4\n\tmov     [rdi+24], rcx\n\tmov     [rdi+16], r8\n\tlea     rsi, [rsi-32]\n\tmov     [rdi+8], r9\n\tjns     L_lp\nL_skiplp:\n\tadd     rdx, 2\n\tjz      L_case2\n\tjns     L_case3\n\tjp      L_case1\nL_case0:\n\tret\n\talign   16\nL_case1:\n\tmov     rax, [rsi]\n\tmov     [rdi], rax\n\tret\n\talign   16\nL_case2:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi-8]\n\tmov     [rdi], rax\n\tmov     [rdi-8], rcx\n\tret\n\talign   16\nL_case3:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi-8]\n\tmov     r8, [rsi-16]\n\tmov     [rdi], rax\n\tmov     [rdi-8], rcx\n\tmov     [rdi-16], r8\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/bobcat/copyi.as",
    "content": "\n;  mpn_copyi\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tmpn_copyi(mp_ptr rdi ,mp_ptr rsi ,mp_size_t rdx)\n;\t(rdi,rdx)=(rdi,rdx)\n\n\tGLOBAL_FUNC mpn_copyi\n\tsub     rdx, 4\n\tjl      L_skiplp\n\talign   16\nL_lp:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi+8]\n\tsub     rdx, 4\n\tmov     r8, [rsi+16]\n\tmov     r9, [rsi+24]\n\tlea     rdi, [rdi+32]\n\tmov     [rdi-32], rax\n\tmov     [rdi-24], rcx\n\tmov     [rdi-16], r8\n\tlea     rsi, [rsi+32]\n\tmov     [rdi-8], r9\n\tjns     L_lp\nL_skiplp:\n\tadd     rdx, 2\n\tjz      L_case2\n\tjns     L_case3\n\tjp      L_case1\nL_case0:\n\tret\n\talign   16\nL_case1:\n\tmov     rax, [rsi]\n\tmov     [rdi], rax\n\tret\n\talign   16\nL_case2:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi+8]\n\tmov     [rdi], rax\n\tmov     [rdi+8], rcx\n\tret\n\talign   16\nL_case3:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi+8]\n\tmov     r8, [rsi+16]\n\tmov     [rdi], rax\n\tmov     [rdi+8], rcx\n\tmov     [rdi+16], r8\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/bobcat/divexact_byff.as",
    "content": "\n;  mpn_divexact_byff\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tret mpn_divexact_byff(mp_ptr,mp_ptr,mp_size_t)\n\n\tGLOBAL_FUNC mpn_divexact_byff\n\txor     eax, eax\n\tmov     rcx, rdx\n\tand     rdx, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n\tje      L_skiplp\n; want carry clear here\n\talign   16\nL_lp:\n\tsbb     rax, [rsi]\n\tlea     rdi, [rdi+32]\n\tmov     r8, rax\n\tsbb     rax, [rsi+8]\n\tmov     r9, rax\n\tsbb     rax, [rsi+16]\n\tmov     r10, rax\n\tsbb     rax, [rsi+24]\n\tdec     rcx\n\tmov     [rdi-32], r8\n\tmov     [rdi-24], r9\n\tmov     [rdi-16], r10\n\tmov     [rdi-8], rax\n\tlea     rsi, [rsi+32]\n\tjnz     L_lp\nL_skiplp:\n; dont want to change the carry\n\tinc     rdx\n\tdec     rdx\n\tjz      L_end\n\tsbb     rax, [rsi]\n\tmov     [rdi], rax\n\tdec     rdx\n\tjz      L_end\n\tsbb     rax, [rsi+8]\n\tmov     [rdi+8], rax\n\tdec     rdx\n\tjz      L_end\n\tsbb     rax, [rsi+16]\n\tmov     [rdi+16], rax\nL_end:\n\tsbb     rax, 0\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/bobcat/divrem_hensel_qr_1_1.asm",
    "content": "dnl  X86_64 mpn_divrem_hensel_qr_1_1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=(rsi,rdx) / rcx\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\nC\tThis is same as the shifting version but with  no shifting\n\nASM_START()\nPROLOGUE(mpn_divrem_hensel_qr_1_1)\nmov $0,%r9\nsub %rdx,%r9\nlea (%rdi,%rdx,8),%rdi\nlea (%rsi,%rdx,8),%rsi\n\nmov %rcx,%rdx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\n\nC //clear carry\nxor %rdx,%rdx\nALIGN(16)\nloop:\n    mov (%rsi,%r9,8),%rax\n    sbb %rdx,%rax\n    sbb %r8,%r8\n    imul %r11,%rax\n    mov %rax,(%rdi,%r9,8)\n    mul %rcx\n    add $1,%r8\n    inc %r9\n    jnz loop\nmov $0,%rax\nadc %rdx,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/divrem_hensel_qr_1_2.asm",
    "content": "dnl  X86_64 mpn_divrem_hensel_qr_1_2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=(rsi,rdx) / rcx      rdx>=1\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\nC\tThis is same as the shifting version but with  no shifting\n\nASM_START()\nPROLOGUE(mpn_divrem_hensel_qr_1_2)\nmov $1,%r9\nsub %rdx,%r9\nlea -8(%rdi,%rdx,8),%rdi\nlea -8(%rsi,%rdx,8),%rsi\n\npush %r12\npush %r13\npush %r14\n\nmov %rcx,%rdx\t\nC // rdx is 3 bit inverse\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 4 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC //rdx has 8 bits\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 16 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC // rdx has 32 bits\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\t\nC //r11 has 64 bits\n\nmov %r11,%rax\nmov %r11,%r12\nmul %rcx\nneg %rdx\nimul %rdx,%r12\t\nC // r12,r11 has 128 bits\n\nmov %r11,%r13\nmov %r12,%r14\n\nmov (%rsi,%r9,8),%r11\nmov 8(%rsi,%r9,8),%r12\nmov $0,%r10\nadd $2,%r9\njc skiplp\nALIGN(16)\nlp:\n\tmov %r12,%r8\n\tmov %r13,%rax\n\tmul %r11\n\tmov %rax,-16(%rdi,%r9,8)\n\timul %r14,%r11\n\timul %r13,%r12\n\tadd %r11,%rdx\n\tadd %r12,%rdx\n\t\tmov (%rsi,%r9,8),%r11\n\t\tmov 8(%rsi,%r9,8),%r12\n\tmov %rdx,-8(%rdi,%r9,8)\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb $0,%r12\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r12\n\t\tsbb $0,%r10\n\tadd $2,%r9\n\tjnc lp\nskiplp:\nmov %r12,%r8\nmov %r13,%rax\t\nmul %r11\nmov %rax,-16(%rdi,%r9,8)\nimul %r14,%r11\t\nimul %r13,%r12\nadd %r11,%rdx\nadd %r12,%rdx\ncmp $0,%r9\njne case0\ncase1:\n\t\tmov (%rsi,%r9,8),%r11\n\tmov %rdx,-8(%rdi,%r9,8)\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r10\n\tmov %r11,%rax\n\timul %r13,%rax\n\tmov %rax,(%rdi,%r9,8)\n\tmul %rcx\n\tadd %r10,%r10\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\ncase0:\n\tmov %rdx,-8(%rdi,%r9,8)\t\n\tmov %rcx,%rax\t\n\tmul %rdx\t\n\tcmp %rax,%r8\t\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tsub %r10,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/divrem_hensel_r_1.asm",
    "content": "dnl  X86_64 mpn_divrem_hensel_r_1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t hensel divide (rdi,rsi) / rdx\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\nC\tThis is same as the shifting version but with  no shifting\n\nASM_START()\nPROLOGUE(mpn_divrem_hensel_r_1)\nmov $0,%r9\nsub %rsi,%r9\nlea (%rdi,%rsi,8),%rdi\n\nmov %rdx,%rcx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\n\nC //clear carry\nxor %rdx,%rdx\nALIGN(16)\nloop:\n    mov (%rdi,%r9,8),%rax\n    sbb %rdx,%rax\n    sbb %r8,%r8\n    imul %r11,%rax\n    mul %rcx\n    add $1,%r8\n    inc %r9\n    jnz loop\nmov $0,%rax\nadc %rdx,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/hamdist.asm",
    "content": "dnl  mpn_hamdist\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_hamdist)\nxor %eax,%eax\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nmov $3,%rcx\nsub %rdx,%rcx\njnc skiplp\nALIGN(16)\nlp:\tmov (%rdi,%rcx,8),%r8\n\tmov 8(%rdi,%rcx,8),%r9\n\txor (%rsi,%rcx,8),%r8\n\tmov 16(%rdi,%rcx,8),%r10\n\tpopcnt %r8,%r8\n\txor 8(%rsi,%rcx,8),%r9\n\txor 16(%rsi,%rcx,8),%r10\n\tpopcnt %r9,%r9\n\tmov 24(%rdi,%rcx,8),%r11\n\tadd %r8,%rax\n\tpopcnt %r10,%r10\n\txor 24(%rsi,%rcx,8),%r11\n\tadd %r9,%rax\n\tpopcnt %r11,%r11\n\tadd %r10,%rax\n\tadd %r11,%rax\n\tadd $4,%rcx\n\tjnc lp\nskiplp:\ncmp $2,%rcx\nja case0\nje case1\njp case2\ncase3:\tmov (%rdi),%r8\n\txor (%rsi),%r8\n\tpopcnt %r8,%r8\n\tadd %r8,%rax\ncase2:\tmov 8(%rdi),%r8\n\txor 8(%rsi),%r8\n\tpopcnt %r8,%r8\n\tadd %r8,%rax\ncase1:\tmov 16(%rdi),%r8\n\txor 16(%rsi),%r8\n\tpopcnt %r8,%r8\n\tadd %r8,%rax\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/ior_n.asm",
    "content": "dnl  mpn_ior_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_ior_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t)\nC\trax              rdi,   rsi,    rdx,  rcx\n\nASM_START()\nPROLOGUE(mpn_ior_n)\nsub $4,%rcx\njc skiplp\nALIGN(16)\nlp:\n\tmov (%rdx),%r8\n\tmov 8(%rdx),%r9\n\tor (%rsi),%r8\n\tor 8(%rsi),%r9\n\tlea 32(%rsi),%rsi\n\tmov 16(%rdx),%r10\n\tmov 24(%rdx),%r11\n\tlea 32(%rdi),%rdi\n\tor 16-32(%rsi),%r10\n\tor 24-32(%rsi),%r11\n\tsub $4,%rcx\n\tmov %r8,-32(%rdi)\n\tmov %r9,8-32(%rdi)\n\tmov %r10,16-32(%rdi)\n\tlea 32(%rdx),%rdx\n\tmov %r11,24-32(%rdi)\n\tjnc lp\nskiplp:\ncmp $-2,%rcx\nje case2\njp case1\njl case0\ncase3:\tmov 8(%rdx,%rcx,8),%rax\n\tor 8(%rsi,%rcx,8),%rax\n\tmov %rax,8(%rdi,%rcx,8)\ncase2:\tmov 16(%rdx,%rcx,8),%rax\n\tor 16(%rsi,%rcx,8),%rax\n\tmov %rax,16(%rdi,%rcx,8)\ncase1:\tmov 24(%rdx,%rcx,8),%rax\n\tor 24(%rsi,%rcx,8),%rax\n\tmov %rax,24(%rdi,%rcx,8)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/iorn_n.as",
    "content": "\n;  AMD64 mpn_iorn_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_iorn_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign   16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tor      r8, [rsi+rcx*8+24]\n\tor      r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\tor      r10, [rsi+rcx*8+8]\n\tor      r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/karaadd.asm",
    "content": "dnl  mpn_karaadd\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karaadd)\n# requires n>=8\npush %rbx\npush %rbp\npush %r12\npush %r13\npush %r14\npush %r15\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\npush %rdx\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nlp:\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi,%rcx,8),%r8\n\tadc 8(%rsi,%rcx,8),%r9\n\tadc 16(%rsi,%rcx,8),%r10\n\tadc 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\tadc 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc lp\ncmp $2,%rcx\njg\tcase0\njz\tcase1\njp\tcase2\ncase3:\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi),%r8\n\tadc 8(%rsi),%r9\n\tadc 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp fin\ncase2:\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rsi),%r8\n\tadc 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp fin\ncase1:\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tinc %rdx\n\tmov %r12,(%rbp,%rcx,8)\nfin:\tmov $3,%rcx\ncase0: \t#rcx=3\n\tpop %r8\n\tbt $0,%r8\n\tjnc notodd\n\txor %r10,%r10\n\tmov (%rbp,%rdx,8),%r8\n\tmov 8(%rbp,%rdx,8),%r9\n\tadd (%rsi,%rdx,8),%r8\n\tadc 8(%rsi,%rdx,8),%r9\n\trcl $1,%r10\n\tadd %r8,24(%rbp)\n\tadc %r9,32(%rbp)\n\tadc %r10,40(%rbp)\nl7:\tadcq $0,24(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l7\n\tmov $3,%rcx\nnotodd:\tand $3,%rax\n\tpopcnt %rax,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadc %r8,(%rdi,%rdx,8)\nl1:\tadcq $0,8(%rdi,%rdx,8)\n\tinc %rdx\n\tjc l1\n\tand $7,%rbx\n\tpopcnt %rbx,%r8\n\tadd %r8,24(%rbp)\nl2:\tadcq $0,8(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l2\npop %r15\npop %r14\npop %r13\npop %r12\npop %rbp\npop %rbx\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/karasub.asm",
    "content": "dnl  mpn_karasub\n\ndnl  Copyright 2011,2012 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karasub)\n# requires n>=8\npush %rbx\npush %rbp\npush %r12\npush %r13\npush %r14\npush %r15\npush %rdx\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nlp:\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi,%rcx,8),%r8\n\tsbb 8(%rsi,%rcx,8),%r9\n\tsbb 16(%rsi,%rcx,8),%r10\n\tsbb 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\tsbb 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc lp\ncmp $2,%rcx\njg\tcase0\njz\tcase1\njp\tcase2\ncase3:\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi),%r8\n\tsbb 8(%rsi),%r9\n\tsbb 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp fin\ncase2:\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 8(%rsi),%r8\n\tsbb 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp fin\ncase1:\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tinc %rdx\n\tmov %r12,(%rbp,%rcx,8)\nfin:\tmov $3,%rcx\ncase0: \t#rcx=3\n\t#// store top two words of H as carrys could change them\n\tpop %r15\n\tbt $0,%r15\n\tjnc skipload\n\tmov (%rbp,%rdx,8),%r12\n        mov 8(%rbp,%rdx,8),%r13\n\t#// the two carrys from 2nd to 3rd\nskipload:\tmov %rdx,%r11\n\txor %r8,%r8\n\tbt $1,%rax\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rdi,%rdx,8)\nl2:\tadcq $0,8(%rdi,%rdx,8)\n\tinc %rdx\n\tjc l2\n\t# //the two carrys from 3rd to 4th\n\txor %r8,%r8\n\tbt $1,%rbx\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rbp,%rcx,8)\nl3:\tadcq $0,8(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l3\n\t#// now the borrow from 2nd to 3rd\n\tmov %r11,%rdx\n\tbt $0,%rax\nl1:\tsbbq $0,(%rdi,%rdx,8)\n\tinc %rdx\n\tjc l1\n\t#// borrow from 3rd to 4th\n\tmov $3,%rcx\n\tbt $0,%rbx\nl4:\tsbbq $0,(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l4\n\t#// if odd the do next two\n\tmov $3,%rcx\n\tmov %r11,%rdx\n\tbt $0,%r15\n\tjnc notodd\n\txor %r10,%r10\n\tsub (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%r10\n\tadd %r12,24(%rbp)\n\tadc %r13,32(%rbp)\n\tmov $0,%r8\n\tadc %r8,%r8\n\tbt $0,%r10\n\tsbb $0,%r8\nl7:\tadd %r8,16(%rbp,%rcx,8)\n\tadc $0,%r8\n\tinc %rcx\n\tsar $1,%r8\n\tjnz l7\nnotodd:\t\npop %r15\npop %r14\npop %r13\npop %r12\npop %rbp\npop %rbx\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/lshift.as",
    "content": "\n;  AMD64 mpn_lshift\n; Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = (rsi,rdx)<<rcx\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_lshift\n\tmov     eax, 64\n\tsub     rax, rcx\n\tmovq    mm0, rcx\n\tsub     rdx, 4\n\tmovq    mm1, rax\n\tmovq    mm5, [rsi+rdx*8+24]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tmovq    rax, mm5\n\tpsllq   mm3, mm0\n\tjbe     skiploop\n\talign   16\nloop1\n\tmovq    mm2, [rsi+rdx*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rdi+rdx*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rsi+rdx*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rdi+rdx*8+16], mm4\n\tpsllq   mm3, mm0\n\tmovq    mm2, [rsi+rdx*8]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rdi+rdx*8+8], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rsi+rdx*8-8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rdi+rdx*8], mm4\n\tpsllq   mm3, mm0\n\tsub     rdx, 4\n\tja      loop1\nskiploop:\n\tcmp     rdx, -1\n\tjl      next\n\tmovq    mm2, [rsi+rdx*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rdi+rdx*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rsi+rdx*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rdi+rdx*8+16], mm4\n\tpsllq   mm3, mm0\n\tsub     rdx, 2\nnext:\n\ttest    rdx, 1\n\tjnz     end\n\tmovq    mm2, [rsi+rdx*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rdi+rdx*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    [rdi+rdx*8+16], mm4\n\temms\n\tret\nend:\n\tmovq    [rdi+rdx*8+24], mm3\n\temms\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/lshift1.as",
    "content": "\n;  AMD64 mpn_lshift1\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = (rsi,rdx)<<1\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_lshift1\n\txor     rax, rax\n\tmov     r11, rdx\n\tand     r11, 7\n\tinc     r11\n\tshr     rdx, 3\n;\tand clear carry flag\n\tcmp     rdx, 0\n\tjz      next\n\talign   16\nloop1:\n\tmov     rcx, [rsi]\n\tmov     r8, [rsi+8]\n\tmov     r10, [rsi+16]\n\tmov     r9, [rsi+24]\n\tadc     rcx, rcx\n\tadc     r8, r8\n\tadc     r10, r10\n\tadc     r9, r9\n\tmov     [rdi], rcx\n\tmov     [rdi+8], r8\n\tmov     [rdi+16], r10\n\tmov     [rdi+24], r9\n\tmov     rcx, [rsi+32]\n\tmov     r8, [rsi+40]\n\tmov     r10, [rsi+48]\n\tmov     r9, [rsi+56]\n\tadc     rcx, rcx\n\tadc     r8, r8\n\tadc     r10, r10\n\tadc     r9, r9\n\tmov     [rdi+32], rcx\n\tmov     [rdi+40], r8\n\tmov     [rdi+48], r10\n\tmov     [rdi+56], r9\n\tlea     rdi, [rdi+64]\n\tdec     rdx\n\tlea     rsi, [rsi+64]\n\tjnz     loop1\nnext:\n\tdec     r11\n\tjz      end\n;\tCould still have cache-bank conflicts in this tail part\n\tmov     rcx, [rsi]\n\tadc     rcx, rcx\n\tmov     [rdi], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+8]\n\tadc     rcx, rcx\n\tmov     [rdi+8], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+16]\n\tadc     rcx, rcx\n\tmov     [rdi+16], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+24]\n\tadc     rcx, rcx\n\tmov     [rdi+24], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+32]\n\tadc     rcx, rcx\n\tmov     [rdi+32], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+40]\n\tadc     rcx, rcx\n\tmov     [rdi+40], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+48]\n\tadc     rcx, rcx\n\tmov     [rdi+48], rcx\nend:\n\tadc     rax, rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/lshift2.asm",
    "content": "dnl  mpn_lshift2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_lshift2(mp_ptr,mp_ptr,mp_size_t)\nC\trax                 rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_lshift2)\nmov $3,%rcx\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\nmov $0,%r8\nsub %rdx,%rcx\njnc skiplp\nALIGN(16)\nlp:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,4),%r8\n\tshr $62,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%r9,%r10,4),%r9\n\tshr $62,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tmov %r8,(%rdi,%rcx,8)\n\tlea (%r10,%r11,4),%r10\n\tmov %r10,16(%rdi,%rcx,8)\n\tshr $62,%r11\n\tmov 24(%rsi,%rcx,8),%r8\n\tlea (%r11,%r8,4),%r11\n\tmov %r11,24(%rdi,%rcx,8)\n\tshr $62,%r8\n\tadd $4,%rcx\n\tmov %r9,8-32(%rdi,%rcx,8)\n\tjnc lp\nskiplp:\ncmp $2,%rcx\nja case0\nje case1\njp case2\ncase3:\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,4),%r8\n\tshr $62,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%r9,%r10,4),%r9\n\tshr $62,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tmov %r8,(%rdi,%rcx,8)\n\tlea (%r10,%r11,4),%r10\n\tmov %r10,16(%rdi,%rcx,8)\n\tshr $62,%r11\n\tmov %r11,%rax\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase2:\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,4),%r8\n\tshr $62,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%r9,%r10,4),%r9\n\tshr $62,%r10\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r10,%rax\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase1:\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,4),%r8\n\tshr $62,%r9\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,%rax\n\tret\nALIGN(16)\ncase0:\tmov %r8,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/lshiftc.asm",
    "content": "dnl  mpn_lshiftc\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\ndefine(`MOVQ',`movd')\n\nASM_START()\nPROLOGUE(mpn_lshiftc)\nMOVQ %rcx,%mm0\nmov $64,%rax\nsub %rcx,%rax\npcmpeqb %mm6,%mm6\nMOVQ %rax,%mm1\nlea 8(%rsi),%rsi\nlea 8(%rdi),%rdi\nsub $5,%rdx\nmovq 24(%rsi,%rdx,8),%mm5\nmovq %mm5,%mm3\npsrlq %mm1,%mm5\nMOVQ %mm5,%rax\npsllq %mm0,%mm3\njc skiplp\nALIGN(16)\nlp:\n\tmovq 16(%rsi,%rdx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq (%rsi,%rdx,8),%mm2\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tmovq 8(%rsi,%rdx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsrlq %mm1,%mm5\n\tpor %mm5,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,16(%rdi,%rdx,8)\n\tpsllq %mm0,%mm3\n\tmovq %mm2,%mm4\n\tmovq -8(%rsi,%rdx,8),%mm5\n\tsub $4,%rdx\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,40(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tmovq %mm5,%mm3\n\tpsrlq %mm1,%mm5\n\tpor %mm5,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,32(%rdi,%rdx,8)\n\tpsllq %mm0,%mm3\n\tjnc lp\nskiplp:\ncmp $-2,%rdx\njz case2\njp case1\njs case0\ncase3:\n\tmovq 16(%rsi,%rdx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq (%rsi,%rdx,8),%mm2\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tmovq 8(%rsi,%rdx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsrlq %mm1,%mm5\n\tpor %mm5,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,16(%rdi,%rdx,8)\n\tpsllq %mm0,%mm3\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,8(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,(%rdi,%rdx,8)\n\temms\n\tret\nALIGN(16)\ncase2:\n\tmovq 16(%rsi,%rdx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tmovq 8(%rsi,%rdx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsrlq %mm1,%mm5\n\tpor %mm5,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,16(%rdi,%rdx,8)\n\tpsllq %mm0,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,8(%rdi,%rdx,8)\n\temms\n\tret\nALIGN(16)\ncase1:\n\tmovq 16(%rsi,%rdx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,16(%rdi,%rdx,8)\n\temms\n\tret\nALIGN(16)\ncase0:\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\temms\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/mod_1_1.asm",
    "content": "dnl  mpn_mod_1_1\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,2)  contains B^i % divisor\n\n\nASM_START()\nPROLOGUE(mpn_mod_1_1)\npush %r13\nmov -8(%rsi,%rdx,8),%r13\nmov -16(%rsi,%rdx,8),%rax\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov %rdx,%rcx\nsub $2,%rcx\nALIGN(16)\nlp:\tmov $0,%r11d\n\tmov -8(%rsi,%rcx,8),%r10\n\tmul %r8\n\tadd %rax,%r10\n\tlea (%r13),%rax\n\tadc %rdx,%r11\n\tlea (%r11),%r13\n\tlea (%r9),%r9\n\tlea (%r8),%r8\n\tmul %r9\n\tadd %r10,%rax\n\tadc %rdx,%r13\n\tsub $1,%rcx\n\tjnz lp\nC // r13,rax\nmov %rax,(%rdi)\nmov %r8,%rax\nmul %r13\nadd %rax,(%rdi)\nadc $0,%rdx\nmov %rdx,8(%rdi)\npop %r13\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/mul_1.asm",
    "content": "dnl  mpn_mul_1\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_mul_1)\nlea -8(%rsi,%rdx,8),%rsi\nlea -8(%rdi,%rdx,8),%rdi\nmov $1,%r8d\nsub %rdx,%r8\nmov (%rsi,%r8,8),%rax\nmov $0,%r9d\njz skiplp\nALIGN(16)\nlp:\tmul %rcx\n\tadd %r9,%rax\n\tmov %rax,(%rdi,%r8,8)\n\tmov 8(%rsi,%r8,8),%rax\n\tmov $0,%r9d\n\tadc %rdx,%r9\n\tadd $1,%r8\n\tjnc lp\nskiplp:\nmul %rcx\nadd %r9,%rax\nmov %rax,(%rdi,%r8,8)\nmov $0,%eax\nadc %rdx,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/mul_basecase.as",
    "content": "\n;  AMD64 mpn_mul_basecase\n\n;  Copyright 2008,2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n; C\t(rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n; C Version 1.0.7\n\n\n%macro addmul2lp 1\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+8], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-16], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi-8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+8], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi+rbx*8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+24], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+24], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tmov     [rdi], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-16], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi-8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+8], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n\talign   16\n%%1:\n\tmov     r10, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tmov     [rdi+rbx*8], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r10\n\tdb      0x26\n\tadd     r11, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+rbx*8+24]\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r11\n\tdb      0x26\n\tadd     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     rbx, 4\n\tmov     rax, [rsi+rbx*8]\n\tjnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n\tmov     rax, [rsi+8]\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmov     r12d, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r11\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n\tmov     rax, [rsi+16]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     [rdi+24], r10\n\tmov     [rdi+32], r11\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n\tmov     [rdi+24], r9\n\tmov     [rdi+32], r10\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n\tjz      %%2\n\talign   16\n%%1:\n\taddmul2pro%1\n\taddmul2lp %1\n\taddmul2epi%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tmov     r15, [rsp-40]\n\tret\n%endmacro\n\n%macro oldmulnext0 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tmov     [rdi+r11*8+24], rbx\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+32], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+r11*8+40], rdx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+24], r12\n\tmov     [rdi+r11*8+32], rdx\n\tinc     r8\n\tlea     rdi, [rdi+8]\n\tmov     r11, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     [rdi+r11*8+16], r10\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n\tmov     [rdi+r11*8+8], r9\n\tmov     [rdi+r11*8+16], r10\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tmul     r13\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+24]\n\tmov     r12d, 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+32]\n\tmul     r13\n\tadd     [rdi+24], rbx\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r12\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n\tmov     r13, [rcx+r8*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+24]\n\tmul     r13\n\tlea     rdi, [rdi+8]\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     r12d, 0\n\tmov     rax, [rsi+32]\n\tadc     r12, rdx\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+24], r12\n\tadc     rdx, 0\n\tmov     [rdi+32], rdx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n\tmov     r13, [rcx+r8*8]\n\tlea     rdi, [rdi+8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     ebx, 0\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r14*8]\n\tadd     [rdi+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tdb      0x26\n\tlea     rdi, [rdi+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tmov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r9\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n\toldmulnext%1\n\tjz      %%2\n\talign   16\n%%1:\n\toldaddmulpro%1\n\toldaddmulnext%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tret\n%endmacro\n\n\tASM_START\n\tGLOBAL_FUNC mpn_mul_basecase\n; the current mul does not handle case one \n\tcmp     rdx, 4\n\tjg      L_fiveormore\n\tcmp     rdx, 1\n\tje      L_one\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     r14, 5\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-40]\n\tlea     rcx, [rcx+r8*8]\n\tneg     r8\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rcx+r8*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tmov     r10d, 0\n\tmul     r13\n\tmov     [rdi+r11*8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     r11, 2\n\tja      L_oldcase3\n\tjz      L_oldcase2\n\tjp      L_oldcase1\nL_oldcase0:\n\toldmpn_muladdmul_1_int 0\nL_oldcase1:\n\toldmpn_muladdmul_1_int 1\nL_oldcase2:\n\toldmpn_muladdmul_1_int 2\nL_oldcase3:\n\toldmpn_muladdmul_1_int 3\n\talign   16\nL_fiveormore:\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     [rsp-40], r15\n\tmov     r14, 4\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-32]\n\tlea     rsi, [rsi+rdx*8-32]\n\tmov     r13, rcx\n\tmov     r15, r8\n\tlea     r13, [r13+r15*8]\n\tneg     r15\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tbt      r15, 0\n\tjnc     L_even\nL_odd:\n\tinc     rbx\n\tmov     r8, [r13+r15*8]\n\tmul     r8\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     L_mulskiploop\n\tmul1lp \nL_mulskiploop:\n\tmov     r10d, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     rbx, 2\n\tja      L_mul1case3\n\tjz      L_mul1case2\n\tjp      L_mul1case1\nL_mul1case0:\n\tmulnext0\n\tjmp     L_case0\nL_mul1case1:\n\tmulnext1\n\tjmp     L_case3\nL_mul1case2:\n\tmulnext2\n\tjmp     L_case2\nL_mul1case3:\n\tmulnext3\n\tjmp     L_case1\nL_even:\n\t; as all the mul2pro? are the same\n\tmul2pro0\n\tmul2lp \n\tcmp     rbx, 2\n\tja      L_mul2case0\n\tjz      L_mul2case1\n\tjp      L_mul2case2\nL_mul2case3:\n\tmul2epi3\nL_case3:\n\tmpn_addmul_2_int 3\nL_mul2case2:\n\tmul2epi2\nL_case2:\n\tmpn_addmul_2_int 2\nL_mul2case1:\n\tmul2epi1\nL_case1:\n\tmpn_addmul_2_int 1\nL_mul2case0:\n\tmul2epi0\nL_case0:\n\tmpn_addmul_2_int 0\n\talign   16\nL_one:\n\tmov     rax, [rsi]\n\tmul\tqword [rcx]\n\tmov     [rdi], rax\n\tmov     [rdi+8], rdx\n\tret\n\tend\n\n"
  },
  {
    "path": "mpn/x86_64/bobcat/nand_n.asm",
    "content": "dnl  mpn_nand_n\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_nand_n)\nlea (%rdx,%rcx,8),%rdx\nlea (%rsi,%rcx,8),%rsi\nlea (%rdi,%rcx,8),%rdi\nneg %rcx\nadd $3,%rcx\njc skip\n\tmov -24(%rdx,%rcx,8),%r8\n\tmov -16(%rdx,%rcx,8),%r9\n\tmov -24(%rdx,%rcx,8),%r8\n\tmov -16(%rdx,%rcx,8),%r9\nadd $4,%rcx\n\tmov -40(%rdx,%rcx,8),%r10\n\tmov -32(%rdx,%rcx,8),%r11\njc skiplp\nALIGN(16)\nlp:\n\tand -56(%rsi,%rcx,8),%r8\n\tnot %r8\n\tand -48(%rsi,%rcx,8),%r9\n\tand -40(%rsi,%rcx,8),%r10\n\tand -32(%rsi,%rcx,8),%r11\n\tmov %r8,-56(%rdi,%rcx,8)\n\tnot %r9\n\tnot %r10\n\tmov %r9,-48(%rdi,%rcx,8)\n\tnot %r11\n\tmov -24(%rdx,%rcx,8),%r8\n\tmov -16(%rdx,%rcx,8),%r9\n\tmov %r10,-40(%rdi,%rcx,8)\n\tmov %r11,-32(%rdi,%rcx,8)\n\tadd $4,%rcx\n\tmov -40(%rdx,%rcx,8),%r10\n\tmov -32(%rdx,%rcx,8),%r11\n\tjnc lp\nskiplp:\n\tand -56(%rsi,%rcx,8),%r8\n\tnot %r8\n\tand -48(%rsi,%rcx,8),%r9\n\tand -40(%rsi,%rcx,8),%r10\n\tand -32(%rsi,%rcx,8),%r11\n\tmov %r8,-56(%rdi,%rcx,8)\n\tnot %r9\n\tnot %r10\n\tmov %r9,-48(%rdi,%rcx,8)\n\tnot %r11\n\tmov %r10,-40(%rdi,%rcx,8)\n\tmov %r11,-32(%rdi,%rcx,8)\nskip:\ncmp $2,%rcx\njg case0\nje case1\njp case2\ncase3:\tmov -24(%rdx),%r8\n\tand -24(%rsi),%r8\n\tnot %r8\n\tmov %r8,-24(%rdi)\ncase2:\tmov -16(%rdx),%r8\n\tand -16(%rsi),%r8\n\tnot %r8\n\tmov %r8,-16(%rdi)\ncase1:\tmov -8(%rdx),%r8\n\tand -8(%rsi),%r8\n\tnot %r8\n\tmov %r8,-8(%rdi)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/nior_n.asm",
    "content": "dnl  mpn_nior_n\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_nior_n)\nlea (%rdx,%rcx,8),%rdx\nlea (%rsi,%rcx,8),%rsi\nlea (%rdi,%rcx,8),%rdi\nneg %rcx\nadd $3,%rcx\njc skip\n\tmov -24(%rdx,%rcx,8),%r8\n\tmov -16(%rdx,%rcx,8),%r9\n\tmov -24(%rdx,%rcx,8),%r8\n\tmov -16(%rdx,%rcx,8),%r9\nadd $4,%rcx\n\tmov -40(%rdx,%rcx,8),%r10\n\tmov -32(%rdx,%rcx,8),%r11\njc skiplp\nALIGN(16)\nlp:\n\tor -56(%rsi,%rcx,8),%r8\n\tnot %r8\n\tor -48(%rsi,%rcx,8),%r9\n\tor -40(%rsi,%rcx,8),%r10\n\tor -32(%rsi,%rcx,8),%r11\n\tmov %r8,-56(%rdi,%rcx,8)\n\tnot %r9\n\tnot %r10\n\tmov %r9,-48(%rdi,%rcx,8)\n\tnot %r11\n\tmov -24(%rdx,%rcx,8),%r8\n\tmov -16(%rdx,%rcx,8),%r9\n\tmov %r10,-40(%rdi,%rcx,8)\n\tmov %r11,-32(%rdi,%rcx,8)\n\tadd $4,%rcx\n\tmov -40(%rdx,%rcx,8),%r10\n\tmov -32(%rdx,%rcx,8),%r11\n\tjnc lp\nskiplp:\n\tor -56(%rsi,%rcx,8),%r8\n\tnot %r8\n\tor -48(%rsi,%rcx,8),%r9\n\tor -40(%rsi,%rcx,8),%r10\n\tor -32(%rsi,%rcx,8),%r11\n\tmov %r8,-56(%rdi,%rcx,8)\n\tnot %r9\n\tnot %r10\n\tmov %r9,-48(%rdi,%rcx,8)\n\tnot %r11\n\tmov %r10,-40(%rdi,%rcx,8)\n\tmov %r11,-32(%rdi,%rcx,8)\nskip:\ncmp $2,%rcx\njg case0\nje case1\njp case2\ncase3:\tmov -24(%rdx),%r8\n\tor -24(%rsi),%r8\n\tnot %r8\n\tmov %r8,-24(%rdi)\ncase2:\tmov -16(%rdx),%r8\n\tor -16(%rsi),%r8\n\tnot %r8\n\tmov %r8,-16(%rdi)\ncase1:\tmov -8(%rdx),%r8\n\tor -8(%rsi),%r8\n\tnot %r8\n\tmov %r8,-8(%rdi)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/not.asm",
    "content": "dnl  mpn_not\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_not)\nmov $1,%ecx\nlea -8(%rdi,%rsi,8),%rdi\nsub %rsi,%rcx\njnc skiplp\nALIGN(16)\nlp:\tnotq (%rdi,%rcx,8)\n\tnotq 8(%rdi,%rcx,8)\n\tadd $2,%rcx\n\tjnc lp\nskiplp:\njnz case0\ncase1:\tnotq (%rdi,%rcx,8)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/popcount.asm",
    "content": "dnl  mpn_popcount\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_popcount)\nmov $5,%rcx\nlea -40(%rdi,%rsi,8),%rdi\nxor %eax,%eax\nsub %rsi,%rcx\njnc skiplp\nALIGN(16)\nlp:\n\tpopcnt (%rdi,%rcx,8),%r8\n\tpopcnt 8(%rdi,%rcx,8),%r9\n\tpopcnt 16(%rdi,%rcx,8),%r10\n\tpopcnt 24(%rdi,%rcx,8),%rsi\n\tadd %r8,%rax\n\tadd %rsi,%rax\n\tadd %r9,%rax\n\tpopcnt 32(%rdi,%rcx,8),%r8\n\tpopcnt 40(%rdi,%rcx,8),%r9\n\tadd %r8,%rax\n\tadd %r10,%rax\n\tadd %r9,%rax\n\tadd $6,%rcx\n\tjnc lp\nskiplp:\nlea case5(%rip),%rdx\t#// in linux we can do this before the loop\nlea (%rcx,%rcx,8),%rcx\t#// rcx*9\nadd %rcx,%rdx\njmp *%rdx\ncase5:\t#//rcx=0\n\tnop\n\tpopcnt (%rdi),%r8\t#// 5bytes\n\tadd %r8,%rax\t\t#// 3bytes\ncase4:\t#//rcx=1\n\tpopcnt 8(%rdi),%r9\t#// 6bytes\n\tadd %r9,%rax\ncase3:\tpopcnt 16(%rdi),%r10\n\tadd %r10,%rax\ncase2:\tpopcnt 24(%rdi),%rsi\n\tadd %rsi,%rax\ncase1:\tpopcnt 32(%rdi),%r8\n\tadd %r8,%rax\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/redc_1.as",
    "content": "\n;  AMD64 mpn_redc_1\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tVersion 1.0.4\n;\t(rdi,rcx)=(rsi,rcx)+(rdx,rcx)   with the carry flag set for the carry\n;\tthis is the usual mpn_add_n with the final dec rax;adc rax,rax;ret  removed \n;\tand a jump where we have two rets\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n%macro mpn_add 0\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tadd     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tjmp     %%2\n\talign   16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tadc     r11, [rdx]\n\tadc     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tadc     r9, [rdx-16]\n\tadc     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tadc     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n%%2:\n%endmacro\n\n;\t(rbx, rbp) = (rsi, rbp) - (rdx, rbp)\n%macro mpn_sub 0\n\tmov     rax, rbp\n\tand     rax, 3\n\tshr     rbp, 2\n\tcmp     rbp, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tsub     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n\tjmp     %%2\n\talign   16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tsbb     r11, [rdx]\n\tsbb     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rbx], r11\n\tmov     [rbx+8], r8\n\tlea     rbx, [rbx+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tsbb     r9, [rdx-16]\n\tsbb     r10, [rdx-8]\n\tmov     [rbx-16], r9\n\tdec     rbp\n\tmov     [rbx-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tsbb     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n%%2:\n%endmacro\n\n;\tchanges from standard addmul\n;\tchange  r8 to r12   and rcx to r13   and rdi to r8\n;\treemove ret and write last limb but to beginning\n%macro addmulloop 1\n\talign   16\n%%1:\n\tmov     r10, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tdb      0x26\n\tadc     rbx, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tdb      0x26\n\tadc     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     r11, 4\n\tmov     rax, [rsi+r11*8+8]\n\tjnc     %%1\n%endmacro\n\n%macro addmulpropro0 0\n\timul    r13, rcx\n\tlea     r8, [r8-8]\n%endmacro\n\n%macro addmulpro0 0\n\tmov     r11, r14\n\tlea     r8, [r8+8]\n\tmov     rax, [rsi+r14*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmov     r9d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, rax\n\tadc     r9, rdx\n\timul    r13, rcx\n\tadd     [r8+r11*8+32], r12\n\tadc     r9, 0\n\tdec     r15\n\tmov     [r8+r14*8], r9\n%endmacro\n\n%macro addmulpropro1 0\n%endmacro\n\n%macro addmulpro1 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, 0\n\tdec     r15\n\tmov     [r8+r14*8], r12\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro2 0\n%endmacro\n\n%macro addmulpro2 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext2 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     r13, [r8+r14*8+8]\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [r8+r14*8], rbx\n\tdec     r15\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro3 0\n%endmacro\n\n%macro addmulpro3 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext3 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, 0\n\tmov     r13, [r8+r14*8+8]\n\tmov     [r8+r14*8], r10\n\tlea     r8, [r8+8]\n\tdec     r15\n%endmacro\n\n;\tchange r8 to r12\n;\twrite top limb ax straight to mem dont return  (NOTE we WRITE NOT ADD)\n%macro mpn_addmul_1_int 1\n\taddmulpropro%1\n\talign   16\n%%1:\n\taddmulpro%1\n\tjge     %%2\n\taddmulloop %1\n%%2:\n\taddmulnext%1\n\tjnz     %%1\n\tjmp     end\n%endmacro\n\n   GLOBAL_FUNC mpn_redc_1\n\tmov r9,r8\n\tmov r8,rsi\n\tmov rsi,rdx\n\tmov rdx,rcx\n\tmov rcx,r9\n\tcmp     rdx, 1\n\tje      one\n\tpush    r13\n\tpush    r14\n\tpush    rbx\n\tpush    r12\n\tpush    r15\n\tpush    rbp\n\tmov     r14, 5\n\tsub     r14, rdx\n;\tstore copys\n\tpush    rsi\n\tpush    r8\n\tlea     r8, [r8+rdx*8-40]\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rbp, rdx\n\tmov     r15, rdx\n\tmov     rax, r14\n\tand     rax, 3\n\tmov     r13, [r8+r14*8]\n\tje      case0\n\tjp      case3\n\tcmp     rax, 1\n\tje      case1\ncase2:\n\tmpn_addmul_1_int 2\n\talign   16\ncase0:\n\tmpn_addmul_1_int 0\n\talign   16\ncase1:\n\tmpn_addmul_1_int 1\n\talign   16\ncase3:\n\tmpn_addmul_1_int 3\n\talign   16\nend:\n\tmov     rcx, rbp\n\tpop     rdx\n\tlea     rsi, [rdx+rbp*8]\n\tmov     rbx, rdi\n\tmpn_add\n;\tmpnadd(rdi,rsi,rdx,rcx)\n\tpop     rdx\n\tjnc     skip\n\tmov     rsi, rbx\n\tmpn_sub\n;\tmpn_sub_n(rbx,rsi,rdx,rbp) we can certainly improve this sub\nskip:\n\tpop     rbp\n\tpop     r15\n\tpop     r12\n\tpop     rbx\n\tpop     r14\n\tpop     r13\n\tret\n\talign   16\none:\n\tmov     r9, [r8]\n\tmov     r11, [rsi]\n\timul    rcx, r9\n\tmov     rax, rcx\n\tmul     r11\n\tadd     rax, r9\n;\trax is zero here\n\tadc     rdx, [r8+8]\n\tcmovnc  r11, rax\n\tsub     rdx, r11\n\tmov     [rdi], rdx\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/rsh1add_n.as",
    "content": "\n;  AMD64 mpn_rsh1add_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,rcx)=((rsi,rcx)+(rdx,rcx))/2 return low bit of sum\n\n\tGLOBAL_FUNC mpn_rsh1add_n\n\tlea     rdi, [rdi+rcx*8-32]\n\tlea     rsi, [rsi+rcx*8-32]\n\tlea     rdx, [rdx+rcx*8-32]\n\tpush    r12\n\tpush    rbx\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tadd     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     L_skiplp\nL_lp:\n\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\tadc     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     L_lp\nL_skiplp:\n\tcmp     r8, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tadc     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tpop     rbx\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/bobcat/rsh1sub_n.as",
    "content": "\n;  AMD64 mpn_rsh1sub_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,rcx)=((rsi,rcx)-(rdx,rcx))/2\n; return bottom bit of difference\n; subtraction treated as two compliment\n\n\tGLOBAL_FUNC mpn_rsh1sub_n\n\tlea     rdi, [rdi+rcx*8-32]\n\tlea     rsi, [rsi+rcx*8-32]\n\tlea     rdx, [rdx+rcx*8-32]\n\tpush    r12\n\tpush    rbx\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tsub     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     L_skiplp\nL_lp:\n\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\tsbb     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     L_lp\nL_skiplp:\n\tcmp     r8, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tpop     rbx\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/bobcat/rsh_divrem_hensel_qr_1_1.asm",
    "content": "dnl  X86_64 mpn_rsh_divrem_hensel_qr_1_1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=( (rsi,rdx)-r9  / rcx ) >> r8 \nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\n\ndefine(`MOVQ',`movd')\n\nASM_START()\nPROLOGUE(mpn_rsh_divrem_hensel_qr_1_1)\nmov %r9,%r10\nmov $1,%r9\nsub %rdx,%r9\nlea (%rdi,%rdx,8),%rdi\nlea (%rsi,%rdx,8),%rsi\n\nmov %rcx,%rdx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\n\nmov $64,%rax\nsub %r8,%rax\nMOVQ %r8,%mm0\nMOVQ %rax,%mm1\nmov -8(%rsi,%r9,8),%rax\nsub %r10,%rax\nsbb %r8,%r8\nimul %r11,%rax\nMOVQ %rax,%mm4\nmovq %mm4,%mm5\npsrlq %mm0,%mm4\npsllq %mm1,%mm5\npsrlq %mm1,%mm5\nmul %rcx\ncmp $0,%r9\nje one\nadd %r8,%r8\nALIGN(16)\nloop:\n    movq %mm4,%mm2\n    mov (%rsi,%r9,8),%rax\n    sbb %rdx,%rax\n    sbb %r8,%r8\n    imul %r11,%rax\n    MOVQ %rax,%mm3\n    movq %mm3,%mm4\n    psllq %mm1,%mm3\n    psrlq %mm0,%mm4\n    por %mm3,%mm2\n    movq %mm2,-8(%rdi,%r9,8)\n    mul %rcx\n    add %r8,%r8\n    inc %r9\n    jnz loop\nskiploop:\nmovq %mm4,-8(%rdi,%r9,8)\nmov $0,%rax\nadc %rdx,%rax\nemms\nret\none:\nmovq %mm4,-8(%rdi,%r9,8)\nadd %r8,%r8\nmov $0,%rax\nadc %rdx,%rax\nemms\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/rsh_divrem_hensel_qr_1_2.asm",
    "content": "dnl  X86_64 mpn_rsh_divrem_hensel_qr_1_2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=( (rsi,rdx)-r9 / rcx ) >> r8    rdx>=1\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1_2 with shifting on the output of the quotient\n\ndefine(`MOVQ',`movd')\n\nASM_START()\nPROLOGUE(mpn_rsh_divrem_hensel_qr_1_2)\nC\t// 3limb minimum for the mo\nmov %r9,%r10\nmov $2,%r9\nsub %rdx,%r9\nlea -16(%rdi,%rdx,8),%rdi\nlea -16(%rsi,%rdx,8),%rsi\n\npush %r12\npush %r13\npush %r14\n\nmov %rcx,%rdx\t\nC // rdx is 3 bit inverse\n\nmov $64,%rax\nsub %r8,%rax\nMOVQ %r8,%mm0\nMOVQ %rax,%mm1\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 4 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC //rdx has 8 bits\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 16 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC // rdx has 32 bits\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\t\nC //r11 has 64 bits\n\nmov %r11,%rax\nmov %r11,%r12\nmul %rcx\nneg %rdx\nimul %rdx,%r12\t\nC // r12,r11 has 128 bits\n\nC // for the first limb we can not store (as we have to shift) so we need to\nC // do first limb separately , we could do it as normal as an extention of\nC // the loop , but if we do it as a 1 limb inverse then we can start it\nC // eailer , ie interleave it with the calculation of the 2limb inverse\n\nmov %r11,%r13\nmov %r12,%r14\n\n\nmov (%rsi,%r9,8),%r11\nsub %r10,%r11\nsbb %r10,%r10\n\nimul %r13,%r11\nMOVQ %r11,%mm2\npsrlq %mm0,%mm2\nmov %rcx,%rax\nmul %r11\nmov 8(%rsi,%r9,8),%r11\nmov 16(%rsi,%r9,8),%r12\nadd %r10,%r10\nsbb %rdx,%r11\nsbb $0,%r12\nsbb %r10,%r10\n\n\nadd $2,%r9\njc skiplp\nALIGN(16)\nlp:\n\tmov %r12,%r8\n\tmov %r13,%rax\n\tmul %r11\n\n\tMOVQ %rax,%mm3\n\tmovq %mm3,%mm4\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm4\n\tpor %mm3,%mm2\n\tmovq %mm2,-16(%rdi,%r9,8)\n\n\timul %r14,%r11\n\timul %r13,%r12\n\tadd %r11,%rdx\n\tadd %r12,%rdx\n\t\tmov 8(%rsi,%r9,8),%r11\n\t\tmov 16(%rsi,%r9,8),%r12\n\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb $0,%r12\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r12\n\t\tsbb $0,%r10\n\tadd $2,%r9\n\tjnc lp\nskiplp:\nmov %r12,%r8\nmov %r13,%rax\nmul %r11\n\nMOVQ %rax,%mm3\nmovq %mm3,%mm4\npsllq %mm1,%mm3\npsrlq %mm0,%mm4\npor %mm3,%mm2\nmovq %mm2,-16(%rdi,%r9,8)\n\nimul %r14,%r11\nimul %r13,%r12\nadd %r11,%rdx\nadd %r12,%rdx\ncmp $0,%r9\njne case0\ncase1:\n\t\tmov 8(%rsi,%r9,8),%r11\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r10\n\tmov %r11,%rax\n\timul %r13,%rax\n\n\tMOVQ %rax,%mm3\n\tmovq %mm3,%mm4\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm4\n\tpor %mm3,%mm2\n\tmovq %mm2,(%rdi,%r9,8)\n\tmovq %mm4,8(%rdi,%r9,8)\n\n\tmul %rcx\n\tadd %r10,%r10\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\temms\n\tret\ncase0:\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\tmovq %mm2,(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\tcmp %rax,%r8\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tsub %r10,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\temms\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/rshift.asm",
    "content": "dnl  mpn_rshift\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_rshift(mp_ptr,mp_ptr,mp_size_t,ul)\nC\trax                 rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_rshift)\nC below really a movq\nmovd %rcx,%mm0\n\nmov $64,%rax\nsub %rcx,%rax\n\nC below really a movq\nmovd %rax,%mm1\n\nmov $4,%rcx\nlea -32(%rsi,%rdx,8),%rsi\nlea -32(%rdi,%rdx,8),%rdi\nsub %rdx,%rcx\nmovq (%rsi,%rcx,8),%mm5\nmovq %mm5,%mm3\npsllq %mm1,%mm5\n\nC below really a movq\nmovd %mm5,%rax\n\npsrlq %mm0,%mm3\njge skiplp\nALIGN(16)\nlp:\n\tmovq 8(%rsi,%rcx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsllq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq %mm3,(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm4\n\tmovq 16(%rsi,%rcx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsllq %mm1,%mm5\n\tpor %mm5,%mm4\n\tmovq %mm4,8(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm3\n\tmovq 24(%rsi,%rcx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsllq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq 32(%rsi,%rcx,8),%mm5\n\tmovq %mm3,16(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm4\n\tmovq %mm5,%mm3\n\tpsllq %mm1,%mm5\n\tpor %mm5,%mm4\n\tmovq %mm4,24(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm3\n\tadd $4,%rcx\n\tjnc lp\nskiplp:\ncmp $2,%rcx\nja case0\njz case1\njp case2\ncase3:\n\tmovq 8(%rsi,%rcx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsllq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq %mm3,(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm4\n\tmovq 16(%rsi,%rcx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsllq %mm1,%mm5\n\tpor %mm5,%mm4\n\tmovq %mm4,8(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm3\n\tmovq 24(%rsi,%rcx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsllq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq %mm3,16(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm4\n\tmovq %mm4,24(%rdi,%rcx,8)\n\temms\n\tret\nALIGN(16)\ncase2:\n\tmovq 8(%rsi,%rcx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsllq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq %mm3,(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm4\n\tmovq 16(%rsi,%rcx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsllq %mm1,%mm5\n\tpor %mm5,%mm4\n\tmovq %mm4,8(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm3\n\tmovq %mm3,16(%rdi,%rcx,8)\n\temms\n\tret\nALIGN(16)\ncase1:\n\tmovq 8(%rsi,%rcx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsllq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq %mm3,(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm4\n\tmovq %mm4,8(%rdi,%rcx,8)\n\temms\n\tret\nALIGN(16)\ncase0:\n\tmovq %mm3,(%rdi,%rcx,8)\n\temms\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/rshift2.asm",
    "content": "dnl  mpn_rshift2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_rshift2(mp_ptr,mp_ptr,mp_size_t)\nC\trax                 rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_rshift2)\nmov %rdx,%rcx\nlea 24(%rsi),%rsi\nlea 24(%rdi),%rdi\nxor %eax,%eax\nxor %edx,%edx\nsub $4,%rcx\njc skiplp\nALIGN(16)\nlp:\n\tmov (%rsi,%rcx,8),%r8\n\tmov -8(%rsi,%rcx,8),%r9\n\tmov -16(%rsi,%rcx,8),%r10\n\tmov -24(%rsi,%rcx,8),%r11\n\tadd %rax,%rax\n\trcr $1,%r8\n\trcr $1,%r9\n\trcr $1,%r10\n\trcr $1,%r11\n\tsbb %rax,%rax\n\tadd %rdx,%rdx\n\trcr $1,%r8\n\trcr $1,%r9\n\trcr $1,%r10\n\trcr $1,%r11\n\tmov %r11,-24(%rdi,%rcx,8)\n\tsbb %rdx,%rdx\n\tmov %r8,(%rdi,%rcx,8)\n\tsub $4,%rcx\n\tmov %r9,24(%rdi,%rcx,8)\n\tmov %r10,16(%rdi,%rcx,8)\n\tjnc lp\nskiplp:\ncmp $-2,%rcx\nja case3\nje case2\njp case1\ncase0:\n\tlea (%rax,%rdx,2),%rax\n\tneg %rax\n\tshl $62,%rax\n\tret\nALIGN(16)\ncase3:\n\tmov (%rsi,%rcx,8),%r8\n\tmov -8(%rsi,%rcx,8),%r9\n\tmov -16(%rsi,%rcx,8),%r10\n\tadd %rax,%rax\n\trcr $1,%r8\n\trcr $1,%r9\n\trcr $1,%r10\n\tsbb %rax,%rax\n\tadd %rdx,%rdx\n\trcr $1,%r8\n\trcr $1,%r9\n\trcr $1,%r10\n\tsbb %rdx,%rdx\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,-8(%rdi,%rcx,8)\n\tmov %r10,-16(%rdi,%rcx,8)\n\tlea (%rax,%rdx,2),%rax\n\tneg %rax\n\tshl $62,%rax\n\tret\nALIGN(16)\ncase2:\n\tmov (%rsi,%rcx,8),%r8\n\tmov -8(%rsi,%rcx,8),%r9\n\tadd %rax,%rax\n\trcr $1,%r8\n\trcr $1,%r9\n\tsbb %rax,%rax\n\tadd %rdx,%rdx\n\trcr $1,%r8\n\trcr $1,%r9\n\tsbb %rdx,%rdx\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,-8(%rdi,%rcx,8)\n\tlea (%rax,%rdx,2),%rax\n\tneg %rax\n\tshl $62,%rax\n\tret\nALIGN(16)\ncase1:\n\tmov (%rsi,%rcx,8),%r8\n\tadd %rax,%rax\n\trcr $1,%r8\n\tsbb %rax,%rax\n\tadd %rdx,%rdx\n\trcr $1,%r8\n\tsbb %rdx,%rdx\n\tmov %r8,(%rdi,%rcx,8)\n\tlea (%rax,%rdx,2),%rax\n\tneg %rax\n\tshl $62,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/sqr_basecase.as",
    "content": "\n;  AMD64 mpn_sqr_basecase\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi, 2*rdx) = (rsi,rdx)^2\n;\tVersion 1.0.5\n;\tsame as the addmul for now\n;\tchanges from standard mul\n;\tchange  r8 to r12   and rcx to r13\n;\treemove ret and write last limb\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n%macro mulloop 1\n\talign   16\n%%1:\n\tmov     r10, 0\n\tmul     r13\n\tmov     [rdi+r11*8], r12\n\tadd     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tdb      0x26\n\tadd     rbx, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tmov     [rdi+r11*8+24], rbx\n\tdb      0x26\n\tadd     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     r11, 4\n\tmov     rax, [rsi+r11*8+8]\n\tjnc     %%1\n%endmacro\n\n%macro mulnext0 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tmov     [rdi+r11*8+24], rbx\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+32], r12\n\tmov     [rdi+r11*8+40], rdx\n\tinc     r14\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro mulnext1 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+24], r12\n\tmov     [rdi+r11*8+32], rdx\n\tinc     r14\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro mulnext2 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     [rdi+r11*8+16], r10\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r14\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro mulnext3 0\n\tmov     [rdi+r11*8+8], r9\n\tmov     [rdi+r11*8+16], r10\n\tinc     r14\n\tlea     rdi, [rdi+8]\n%endmacro\n\n;\tchanges from standard addmul\n;\tchange  r8 to r12   and rcx to r13\n;\treemove ret and write last limb\n%macro addmulloop 1\n\talign   16\n%%1:\n\tmov     r10, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r13\n\tadd     [rdi+r11*8+16], r10\n\tdb      0x26\n\tadc     rbx, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [rdi+r11*8+24], rbx\n\tdb      0x26\n\tadc     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     r11, 4\n\tmov     rax, [rsi+r11*8+8]\n\tjnc     %%1\n%endmacro\n\n%macro addmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tadd     [rdi+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [rdi+r11*8+24], rbx\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+r11*8+32], r12\n\tadc     rdx, 0\n\tinc     r14\n\tmov     [rdi+r11*8+40], rdx\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro addmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmul     r13\n\tadd     [rdi+r11*8+16], r10\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+r11*8+24], r12\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+32], rdx\n\tinc     r14\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro addmulnext2 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     ebx, 0\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tadc     rbx, rdx\n\tadd     [rdi+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r14\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro addmulnext3 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, 0\n\tmov     [rdi+r11*8+16], r10\n\tinc     r14\n\tlea     rdi, [rdi+8]\n\tcmp     r14, 4\n\tjnz     theloop\n%endmacro\n\t\n   GLOBAL_FUNC mpn_sqr_basecase\n\tcmp     rdx, 3\n\tja      fourormore\n\tjz      three\n\tjp      two\none:\n\tmov     rax, [rsi]\n\tmul     rax\n\tmov     [rdi], rax\n\tmov     [rdi+8], rdx\n\tret\n\talign   16\nfourormore:\n;\tthis code can not handle cases 3,2,1\n\tmov     [rsp-8], r12\n\tmov     [rsp-16], r13\n\tmov     [rsp-24], r14\n\tmov     [rsp-32], rbx\n;\tsave data for later\n\tmov     [rsp-40], rdi\n\tmov     [rsp-48], rsi\n\tmov     [rsp-56], rdx\n\tmov     r13, [rsi]\n\tmov     rax, [rsi+8]\n\tmov     r14d, 6\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-40]\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n\tjge     mulskiploop1\n\tmulloop 1\nmulskiploop1:\n\tmov     r10d, 0\n\tmul     r13\n\tmov     [rdi+r11*8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n;\tcould save r9 here \n;\tcould update here ie lea 8(rdi),rdi and inc r14 \n\tcmp     r11, 2\n\tje      mcase2\n\tja      mcase3\n\tjp      mcase1\nmcase0:\n\tmulnext0\n\tjmp     case1\n\talign   16\nmcase1:\n\tmulnext1\n\tjmp     case2\n\talign   16\nmcase2:\n\tmulnext2\n\tjmp     case3\n\talign   16\nmcase3:\n\tmulnext3\n;\tjmp case0 just fall thru \n\talign   16\ntheloop:\ncase0:\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rsi+r14*8-8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n\tjge     addmulskiploop0\n\taddmulloop 0\naddmulskiploop0:\n\taddmulnext0\ncase1:\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rsi+r14*8-8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n\tjge     addmulskiploop1\n\taddmulloop 1\naddmulskiploop1:\n\taddmulnext1\ncase2:\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rsi+r14*8-8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n\tjge     addmulskiploop2\n\taddmulloop 2\naddmulskiploop2:\n\taddmulnext2\ncase3:\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rsi+r14*8-8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n\tjge     addmulskiploop3\n\taddmulloop 3\naddmulskiploop3:\n\taddmulnext3\n;\tonly need to add one more line here\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rsi+r14*8-8]\n\tmul     r13\n\tadd     [rdi+r14*8], rax\n\tadc     rdx, 0\n\tmov     [rdi+r14*8+8], rdx\n;\tnow lsh by 1 and add in the diagonal\n\tmov     rdi, [rsp-40]\n\tmov     rsi, [rsp-48]\n\tmov     rcx, [rsp-56]\n\tmov     r12, [rsp-8]\n\tmov     r13, [rsp-16]\n\txor     rbx, rbx\n\txor     r11, r11\n\tlea     rsi, [rsi+rcx*8]\n\tmov     [rdi], r11\n\tlea     r10, [rdi+rcx*8]\n\tmov     [r10+rcx*8-8], r11\n\tneg     rcx\n\talign   16\ndialoop:\n\tmov     rax, [rsi+rcx*8]\n\tmul     rax\n\tmov     r8, [rdi]\n\tmov     r9, [rdi+8]\n\tadd     rbx, 1\n\tadc     r8, r8\n\tadc     r9, r9\n\tsbb     rbx, rbx\n\tadd     r11, 1\n\tadc     r8, rax\n\tadc     r9, rdx\n\tsbb     r11, r11\n\tmov     [rdi], r8\n\tmov     [rdi+8], r9\n\tinc     rcx\n\tlea     rdi, [rdi+16]\n\tjnz     dialoop\n\tmov     rbx, [rsp-32]\n\tmov     r14, [rsp-24]\n\tret\n\talign   16\ntwo:\n\tmov     rax, [rsi]\n\tmov     r9, [rsi+8]\n\tmov     r8, rax\n\tmul     rax\n\tmov     [rdi], rax\n\tmov     rax, r9\n\tmov     [rdi+8], rdx\n\tmul     rax\n\tmov     [rdi+16], rax\n\tmov     rax, r8\n\tmov     r10, rdx\n\tmul     r9\n\tadd     rax, rax\n\tadc     rdx, rdx\n\tadc     r10, 0\n\tadd     [rdi+8], rax\n\tadc     [rdi+16], rdx\n\tadc     r10, 0\n\tmov     [rdi+24], r10\n\tret\n\talign   16\nthree:\n\tmov     r8, [rsi]\n\tmov     rax, [rsi+8]\n\tmul     r8\n\tmov     r11d, 0\n\tmov     [rdi+8], rax\n\tmov     rax, [rsi+16]\n\tmov     r9, rdx\n\tmul     r8\n\tmov     r8, [rsi+8]\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tmul     r8\n\tmov     [rdi+16], r9\n\tadd     r11, rax\n\tmov     r9d, 0\n\tmov     [rdi+24], r11\n\tadc     r9, rdx\n\tmov     [rdi+32], r9\n\tmov     rcx, -3\n\txor     r10, r10\n\txor     r11, r11\n\tlea     rsi, [rsi+24]\n\tmov     [rdi], r11\n\tmov     [rdi+40], r11\ndialoop1:\n\tmov     rax, [rsi+rcx*8]\n\tmul     rax\n\tmov     r8, [rdi]\n\tmov     r9, [rdi+8]\n\tadd     r10, 1\n\tadc     r8, r8\n\tadc     r9, r9\n\tsbb     r10, r10\n\tadd     r11, 1\n\tadc     r8, rax\n\tadc     r9, rdx\n\tsbb     r11, r11\n\tmov     [rdi], r8\n\tmov     [rdi+8], r9\n\tinc     rcx\n\tlea     rdi, [rdi+16]\n\tjnz     dialoop1\n\tnop\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/store.asm",
    "content": "dnl  mpn_store\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_store)\nmov %rsi,%rcx\nand $7,%rsi\nmov $8,%r8\nsub %rsi,%r8\t\t#// r8=8-n%8\nlea (%r8,%r8,4),%r8\t\nlea lp(%rip),%rax\t#// rax is lpjmp\nadd %r8,%rax\t\t#// rax=5*(8-n%8)+lp\nand $-8,%rcx\nadd $48,%rdi\njmp  *%rax\nALIGN(16)\nlp:\tmov %rdx,8(%rdi,%rcx,8)\t\t#// 5 bytes\t// n%8=0 NOT HERE\n\tmov %rdx, (%rdi,%rcx,8)\t\t#\t\t// n%8=7\t// 5 bytes with nop\n\tnop\n\tmov %rdx, -8(%rdi,%rcx,8)\t#\t\t// n%8=6\n\tmov %rdx,-16(%rdi,%rcx,8)\t#\t\t// n%8=5\n\tmov %rdx,-24(%rdi,%rcx,8)\t#\t\t// n%8=4\n\tmov %rdx,-32(%rdi,%rcx,8)\t#\t\t// n%8=3\n\tmov %rdx,-40(%rdi,%rcx,8)\t#\t\t// n%8=2\n\tmov %rdx,-48(%rdi,%rcx,8)\t#\t\t// n%8=1\n\tnop\t\t\t\t#\t\t// n%8=0\n\tsub $8,%rcx\n\tjnc lp\t\t\t\t#// 2 bytes\nnop\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/sub_err1_n.asm",
    "content": "dnl  mpn_sub_err1_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC ret mpn_sub_err1(mp_ptr rp,mp_ptr up,mp_ptr vp,mp_ptr ep,mp_ptr_t yp,mp_size_t n,mp_limb_t cy)\nC rax                    rdi,      rsi,      rdx,      rcx,         r8           r9       8(rsp)=>r10\n\nASM_START()\nPROLOGUE(mpn_sub_err1_n)\n       C if we rearrange the params we could save some moves\n       C (rdi,r9)=(rsi,r9)+(rdx,r9)  sum=carry*(r8)\n       \n       mov 8(%rsp),%r10            C cy\n       mov %rbp,-16(%rsp)          C save rbp\n       lea -24(%rdi,%r9,8),%rdi    C rp += n - 3\n       mov %r12,-24(%rsp)          C save r12\n       mov %r13,-32(%rsp)          C save r13\n       lea -24(%rsi,%r9,8),%rsi    C up += n - 3\n       mov %r14,-40(%rsp)          C save r14\n       mov %r15,-48(%rsp)          C save r15\n       lea -24(%rdx,%r9,8),%rdx    C vp += n - 3\n       mov %rcx,-56(%rsp)\t       C save rcx\n       mov %rbx,-8(%rsp)           C save rbx\n       mov $3,%r11                 C i = 3\n       shl $63,%r10                \n       lea (%r8,%r9,8),%r8         C yp += n\n       sub %r9,%r11\t              C i = 3 - n\n       mov $0,%r9                  C t1 = 0\n       mov $0,%rax                 C t2 = 0\n       mov $0,%rbx                 C t3 = 0\n       jnc skiplp                  C if done goto skiplp\nALIGN(16)\nlp:\n\tmov (%rsi,%r11,8),%r12      C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13     C s2 = *(up + i + 1)\n\tmov 16(%rsi,%r11,8),%r14    C s3 = *(up + i + 2)\n\tmov 24(%rsi,%r11,8),%r15    C s4 = *(up + i + 3)\n\tmov $0,%rbp                 C t5 = 0\n\tshl $1,%r10                 C s1 -= *(vp + i + 0) + (cy & 1)\n\tsbb (%rdx,%r11,8),%r12      \n\tcmovc -8(%r8),%rax          C if borrow1, t2 = *(yp - 1)\n\tsbb 8(%rdx,%r11,8),%r13     C s2 -= *(vp + i + 1) + borrow1\n\tcmovc -16(%r8),%rbx         C if borrow2 t3 = *(yp - 2)\n\tmov $0,%rcx                 C t4 = 0\n\tsbb 16(%rdx,%r11,8),%r14    C s3 -= *(vp + i + 2) + borrow2\n\tcmovc -24(%r8),%rcx         C if borrow3 t4 = *(yp - 3)\n\tsbb 24(%rdx,%r11,8),%r15    C s4 -= *(vp + i + 3) + borrow3\n\tcmovc -32(%r8),%rbp         C if borrow4 t5 = *(yp - 4)\n\trcr $1,%r10                 C high bit of cy = borrow\n\tadd %rax,%r9                C t1 += t2\n\tadc $0,%r10                 C accumulate cy\n\tadd %rbx,%r9                C t1 += t2\n\tadc $0,%r10                 C accumulate cy\n\tadd %rcx,%r9                C t1 += t4\n\tmov $0,%rax                 C t2 = 0\n\tadc $0,%r10                 C accumulate cy\n\tlea -32(%r8),%r8            C yp -= 4\n\tadd %rbp,%r9                C t1 += t5\n\tadc $0,%r10                 C accumulate cy\n\tmov %r12,(%rdi,%r11,8)      C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)     C *(rp + i + 1) = s2\n\tmov %r14,16(%rdi,%r11,8)    C *(rp + i + 2) = s3\n\tmov %r15,24(%rdi,%r11,8)    C *(rp + i + 3) = s4\n\tmov $0,%rbx                 C t3 = 0\n\tadd $4,%r11                 C i += 4\n\tjnc  lp                     C not done, goto lp\nskiplp:\n       cmp $2,%r11             C cmp(i, 2)\n       mov -16(%rsp),%rbp      C restore rbp\n       mov -48(%rsp),%r15      C restore r15\n       ja case0                C i == 3 goto case0 \n       je case1                C i == 2 goto case1\n       jp case2                C i == 1 goto case2\ncase3:\n\tmov (%rsi,%r11,8),%r12     C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13    C s2 = *(up + i + 1)\n\tmov 16(%rsi,%r11,8),%r14   C s3 = *(up + i + 2) \n\tshl $1,%r10                C restore borrow1 from high bit of t1\n\tsbb (%rdx,%r11,8),%r12     C s1 -= *(vp + i + 0) + borrow1\n\tcmovc -8(%r8),%rax         C if borrow2 t2 = *(yp - 1)\n\tsbb 8(%rdx,%r11,8),%r13    C s2 -= *(vp + i + 1) + borrow2\n\tcmovc -16(%r8),%rbx        C if borrow3 t3 = *(yp - 2)\n\tmov $0,%rcx                C t4 = 0\n\tsbb 16(%rdx,%r11,8),%r14   C s3 -= *(vp + i + 3) + borrow3\n\tcmovc -24(%r8),%rcx        C if borrow4 t4 = *(yp - 3)\n\trcr $1,%r10                C store borrow4 in high bit of cy\n\tadd %rax,%r9               C t1 += t2\n\tadc $0,%r10                C accumulate cy\n\tadd %rbx,%r9               C t1 += t3\n\tadc $0,%r10                C accumulate cy\n\tadd %rcx,%r9               C t1 += t4\n\tadc $0,%r10                C accumulate cy\n\tmov %r12,(%rdi,%r11,8)     C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)    C *(rp + i + 1) = s2\n\tmov %r14,16(%rdi,%r11,8)   C *(rp + i + 2) = s3\n\tmov -56(%rsp),%rcx         C restore rcx\n\tmov %r9,(%rcx)             C ep[0] = t1\n\tbtr $63,%r10               C retrieve borrow out and reset bit of cy\n\tmov %r10,8(%rcx)           C ep[1] = cy\n\tmov -40(%rsp),%r14         C restore r14\n\tmov $0,%rax                \n\tmov -32(%rsp),%r13         C restore r13\n\tadc $0,%rax                C return borrow out\n\tmov -24(%rsp),%r12         C restore r12\n\tmov -8(%rsp),%rbx          C restore rbx\n\tret\nALIGN(16)\ncase2:\n\tmov (%rsi,%r11,8),%r12   C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13  C s2 = *(up + i + 1)\n\tshl $1,%r10              C restore borrow1 from high bit of t1\n\tsbb (%rdx,%r11,8),%r12   C s1 -= *(vp + i + 0) + borrow1\n\tcmovc -8(%r8),%rax       C if borrow2 t2 = *(yp - 1)\n\tsbb 8(%rdx,%r11,8),%r13  C s2 -= *(vp + i + 1) + borrow2\n\tcmovc -16(%r8),%rbx      C if borrow3 t3 = *(yp - 2)\n\trcr $1,%r10              C store borrow3 in high bit of cy\n\tadd %rax,%r9             C t1 += t2\n\tadc $0,%r10              C accumulate cy\n\tadd %rbx,%r9             C t1 += t3\n\tadc $0,%r10              C accumulate cy\n\tmov %r12,(%rdi,%r11,8)   C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)  C *(rp + i + 1) = s2\n\tmov -56(%rsp),%rcx       C restore rcx\n\tmov %r9,(%rcx)           C ep[0] = t1\n\tbtr $63,%r10             C retrieve borrow out and reset bit of cy\n\tmov %r10,8(%rcx)         C ep[1] = cy\n\tmov -40(%rsp),%r14       C restore r14\n\tmov $0,%rax\n\tmov -32(%rsp),%r13       C restore r13\n\tadc $0,%rax              C return borrow out\n\tmov -24(%rsp),%r12       C restore r12\n\tmov -8(%rsp),%rbx        C restore rbx\n\tret\nALIGN(16)\ncase1:\n\tmov (%rsi,%r11,8),%r12   C s1 = *(up + i + 0)\n\tshl $1,%r10              C restore borrow1 from high bit of t1\n\tsbb (%rdx,%r11,8),%r12   C s1 -= *(vp + i + 0) + borrow1\n\tcmovc -8(%r8),%rax       C if borrow2 t2 = *(yp - 1)\n\trcr $1,%r10              C store borrow3 in high bit of cy\n\tadd %rax,%r9             C t1 += t2\n\tadc $0,%r10              C accumulate cy\n\tmov %r12,(%rdi,%r11,8)   C *(rp + i + 0) = s1\ncase0:\tmov -56(%rsp),%rcx       C restore rcx\n\tmov %r9,(%rcx)           C ep[0] = t1\n\tbtr $63,%r10             C retrieve borrow out and reset bit of cy\n\tmov %r10,8(%rcx)         C ep[1] = cy\n\tmov -40(%rsp),%r14       C restore r14\n\tmov $0,%rax\n\tmov -32(%rsp),%r13       C restore r13\n\tadc $0,%rax              C return borrow out\n\tmov -24(%rsp),%r12       C restore r12\n\tmov -8(%rsp),%rbx        C restore rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/sub_n.as",
    "content": "\n;  AMD64 mpn_sub_n\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx)=(rsi,rcx)-(rdx,rcx)\n;\trax=borrow\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_sub_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     loop1\n\tmov     r11, [rsi]\n\tsub     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend1:\n\tadc     rax, rax\n\tret\n\talign   8\nloop1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tsbb     r11, [rdx]\n\tsbb     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tsbb     r9, [rdx-16]\n\tsbb     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     loop1\n\tinc     rax\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi]\n\tsbb     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend:\n\tadc     rax, rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/subadd_n.as",
    "content": "\n;  AMD64 mpn_subadd_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,r8)=(rsi,r8)-(rdx,r8)-(rcx,r8) return borrow+borrow\n\n\tGLOBAL_FUNC mpn_subadd_n\n\tlea     rdx, [rdx+r8*8-56]\n\tlea     rcx, [rcx+r8*8-56]\n\tlea     rsi, [rsi+r8*8-56]\n\tlea     rdi, [rdi+r8*8-56]\n\tmov     r9, 3\n\txor     rax, rax\n\txor     r10, r10\n\tsub     r9, r8\n\tpush    r12\n\tpush    rbp\n\tjge     L_skip\n\tadd     r9, 4\n\tmov     rbp, [rsi+r9*8+16]\n\tmov     r11, [rsi+r9*8+24]\n\tmov     r12, [rsi+r9*8]\n\tmov     r8, [rsi+r9*8+8]\n\tjc      L_skiplp\n\talign   16\nL_lp:\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8]\n\tsbb     r8, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r12, [rdx+r9*8]\n\tsbb     r8, [rdx+r9*8+8]\n\tsbb     rbp, [rdx+r9*8+16]\n\tsbb     r11, [rdx+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r12\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], r8\n\tmov     [rdi+r9*8+16], rbp\n\tmov     rbp, [rsi+r9*8+48]\n\tmov     r11, [rsi+r9*8+56]\n\tadd     r9, 4\n\tmov     r12, [rsi+r9*8]\n\tmov     r8, [rsi+r9*8+8]\n\tjnc     L_lp\nL_skiplp:\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8]\n\tsbb     r8, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r12, [rdx+r9*8]\n\tsbb     r8, [rdx+r9*8+8]\n\tsbb     rbp, [rdx+r9*8+16]\n\tsbb     r11, [rdx+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r12\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], r8\n\tmov     [rdi+r9*8+16], rbp\nL_skip:\n\tcmp     r9, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     rbp, [rsi+r9*8+48]\n\tmov     r12, [rsi+r9*8+32]\n\tmov     r8, [rsi+r9*8+40]\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8+32]\n\tsbb     r8, [rcx+r9*8+40]\n\tsbb     rbp, [rcx+r9*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r12, [rdx+r9*8+32]\n\tsbb     r8, [rdx+r9*8+40]\n\tsbb     rbp, [rdx+r9*8+48]\n\tmov     [rdi+r9*8+32], r12\n\tmov     [rdi+r9*8+40], r8\n\tmov     [rdi+r9*8+48], rbp\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tmov     r12, [rsi+r9*8+32]\n\tmov     r8, [rsi+r9*8+40]\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8+32]\n\tsbb     r8, [rcx+r9*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r12, [rdx+r9*8+32]\n\tsbb     r8, [rdx+r9*8+40]\n\tmov     [rdi+r9*8+32], r12\n\tmov     [rdi+r9*8+40], r8\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tmov     r12, [rsi+r9*8+32]\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r12, [rdx+r9*8+32]\n\tmov     [rdi+r9*8+32], r12\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rax, r10\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/bobcat/sublsh1_n.as",
    "content": "\n;  mpn_sublsh1_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tret mpn_sublsh1_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t)\n\n\tGLOBAL_FUNC mpn_sublsh1_n\n\tpush    rbx\n\tlea     rdi, [rdi+rcx*8-56]\n\tlea     rsi, [rsi+rcx*8-56]\n\tlea     rdx, [rdx+rcx*8-56]\n\txor     rax, rax\n\txor     r10, r10\n\tmov     r8, 3\n\tsub     r8, rcx\n\tjge     L_skip\n\tadd     r8, 4\n\tmov     r11, [rsi+r8*8+24]\n\tmov     rcx, [rsi+r8*8+16]\n\tmov     r9, [rsi+r8*8]\n\tmov     rbx, [rsi+r8*8+8]\n\tjc      L_skiplp\n\talign   16\nL_lp:\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], r9\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rcx\n\tmov     [rdi+r8*8+24], r11\n\tmov     r11, [rsi+r8*8+56]\n\tmov     rcx, [rsi+r8*8+48]\n\tadd     r8, 4\n\tmov     r9, [rsi+r8*8]\n\tmov     rbx, [rsi+r8*8+8]\n\tjnc     L_lp\nL_skiplp:\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], r9\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rcx\n\tmov     [rdi+r8*8+24], r11\nL_skip:\n\tcmp     r8, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     rcx, [rsi+r8*8+48]\n\tmov     r9, [rsi+r8*8+32]\n\tmov     rbx, [rsi+r8*8+40]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rcx, [rdx+r8*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rcx, [rdx+r8*8+48]\n\tmov     [rdi+r8*8+32], r9\n\tmov     [rdi+r8*8+40], rbx\n\tmov     [rdi+r8*8+48], rcx\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbx\n\tret\n\talign   16\nL_case2:\n\tmov     r9, [rsi+r8*8+32]\n\tmov     rbx, [rsi+r8*8+40]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tmov     [rdi+r8*8+32], r9\n\tmov     [rdi+r8*8+40], rbx\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbx\n\tret\n\talign   16\nL_case1:\n\tmov     r9, [rsi+r8*8+32]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tmov     [rdi+r8*8+32], r9\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbx\n\tret\n\talign   16\nL_case0:\n\tadd     r10, 1\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbx\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/bobcat/sublsh_n.asm",
    "content": "dnl  AMD64 mpn_sublsh \n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t    carry+(xp,n)= (yp,n) - (zp,n)<<c with carry in ci\nC\tmp_limb_t\tmpn_sublsh_nc(mp_ptr xp, mp_srcptr yp,mp_srcptr zp,mp_size_t n,unsigned int c,mp_limb_t ci)\nC\txp in rdi\typ in rsi\tzp in rdx\tn  in rcx\tc  in r8\tci in r9\n\nMULFUNC_PROLOGUE(mpn_sublsh_n mpn_sublsh_nc)\n\nASM_START()\nPROLOGUE(mpn_sublsh_n)\nxor %r9,%r9\nEPILOGUE()\nPROLOGUE(mpn_sublsh_nc)\nmov\t%rcx,%r10\nlea\t(%rdi,%r10,8),%rdi\nlea\t(%rsi,%r10,8),%rsi\nlea\t(%rdx,%r10,8),%rdx\nmov\t%r8,%rcx\nneg\t%rcx\nshr\t%cl,%r9\nneg\t%r10\nxor\t%rax,%rax\ntest\t$3,%r10\njz\tnext\nlp:\n\tmov\t(%rdx,%r10,8),%r8\n\tmov\t%r8,%r11\n\tneg\t%rcx\n\tshl\t%cl,%r8\n\tneg\t%rcx\n\tshr\t%cl,%r11\n\tor\t%r9,%r8\n\tmov\t%r11,%r9\n\tadd\t$1,%rax\n\tmov\t(%rsi,%r10,8),%r11\n\tsbb\t%r8,%r11\n\tsbb\t%rax,%rax\n\tmov\t%r11,(%rdi,%r10,8)\n\tinc\t%r10\n\ttest\t$3,%r10\n\tjnz\tlp\nnext:\ncmp\t$0,%r10\njz\tend\npush\t%rbx\npush\t%rbp\npush\t%r12\npush\t%r13\npush\t%r14\npush\t%r15\nALIGN(16)\nloop:\n\tmov\t(%rdx,%r10,8),%r8\n\tmov\t8(%rdx,%r10,8),%rbp\n\tmov\t16(%rdx,%r10,8),%rbx\n\tmov\t24(%rdx,%r10,8),%r12\n\tmov\t%r8,%r11\n\tmov\t%rbp,%r13\n\tmov\t%rbx,%r14\n\tmov\t%r12,%r15\n\tneg\t%rcx\n\tshl\t%cl,%r8\n\tshl\t%cl,%rbp\n\tshl\t%cl,%rbx\n\tshl\t%cl,%r12\n\tneg\t%rcx\n\tshr\t%cl,%r11\n\tshr\t%cl,%r13\n\tshr\t%cl,%r14\n\tshr\t%cl,%r15\n\tor\t%r9,%r8\n\tor\t%r11,%rbp\n\tor\t%r13,%rbx\n\tor\t%r14,%r12\n\tmov\t%r15,%r9\n\tadd\t$1,%rax\n\tmov\t(%rsi,%r10,8),%r11\n\tmov\t8(%rsi,%r10,8),%r13\n\tmov\t16(%rsi,%r10,8),%r14\n\tmov\t24(%rsi,%r10,8),%r15\n\tsbb\t%r8,%r11\n\tsbb\t%rbp,%r13\n\tsbb\t%rbx,%r14\n\tsbb\t%r12,%r15\n\tsbb\t%rax,%rax\n\tmov\t%r11,(%rdi,%r10,8)\n\tmov\t%r13,8(%rdi,%r10,8)\n\tmov\t%r14,16(%rdi,%r10,8)\n\tmov\t%r15,24(%rdi,%r10,8)\n\tadd\t$4,%r10\n\tjnz\tloop\npop\t%r15\npop\t%r14\npop\t%r13\npop\t%r12\npop\t%rbp\npop\t%rbx\nend:\nneg\t%rax\nadd\t%r9,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bobcat/submul_1.as",
    "content": "; **************************************************************************\n;  Intel64 mpn_submul_1 -- Multiply a limb vector with a limb and\n;  subtract the result from a second limb vector.\n;\n;  Copyright (C) 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  This program is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2 of the License, or\n;  (at your option) any later version.\n;\n;  This program is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n;  GNU General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with this program; if not, write to the Free Software Foundation,\n;  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n;\n; **************************************************************************\n;\n;\n; CREDITS\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; *********************************************************************\n\n\n; With a 4-way unroll the code has\n;\n;         \tcycles/limb\n; Hammer:           4.6\n; Woodcrest:        4.6\n;\n; With increased unrolling, it appears to converge to 4 cycles/limb\n; on Intel Core 2 machines.  I believe that this is optimal, however\n; it requires such absurd unrolling that it becomes unusable for all\n; but the largest inputs.  A 4-way unroll seems like a good balance\n; to me because then commonly used input sizes (e.g. 1024bit Public\n; keys) still benifit from the speed up.\n\n;\n; This is just a check to see if we are in my code testing sandbox\n; or if we are actually in the GMP source tree\n;\n\n%include 'yasm_mac.inc'\n\n\n\n; *********************************************************************\n; *********************************************************************\n;\n; Here are the important macro parameters for the code\n;\n;      BpL is Bytes per Limb (8 since this is 64bit code)\n;\n;\tUNROLL_SIZE is a power of 2 for which we unroll the code.\n;                  possible values are 2,4,8,15,..., 256.  A reasonable\n;                  value is probably 4.  If really large inputs\n;                  are expected, then 16 is probably good.  Larger\n;                  values are really only useful for flashy\n;                  benchmarks and testing asymptotic behavior.\n;\n;      THRESHOLD is the minimum number of limbs needed before we bother\n;                using the complicated loop.  A reasonable value is\n;                2*UNROLL_SIZE + 6\n;\n; *********************************************************************\n; *********************************************************************\n%define\tBpL\t8\n%define\tUNROLL_SIZE\t4\n%define\tUNROLL_MASK\tUNROLL_SIZE-1\n%define\tTHRESHOLD\t2*UNROLL_SIZE+6\n\n; Here is a convenient Macro for addressing\n; memory.  Entries of the form\n;\n;      ADDR(ptr,index,displacement)\n;\n; get converted to\n;\n;      [displacement*BpL + ptr + index*BpL]\n;\n%define\tADDR(a,b,c)\t[c*BpL+a+b*BpL]\n\n\n; Register\tUsage\n; --------\t-----\n; rax\t\tlow word from mul\n; rbx*\n; rcx\t\ts2limb\n; rdx\t\thigh word from mul\n; rsi\t\ts1p\n; rdi\t\trp\n; rbp*\t\tBase Pointer\n; rsp*\t\tStack Pointer\n; r8\t\tA_x\n; r9\t\tA_y\n; r10\t\tA_z\n; r11\t\tB_x\n; r12*\t\tB_y\n; r13*\t\tB_z\n; r14*\t\ttemp\n; r15*\t\tindex\n;\n; * indicates that the register must be\n; preserved for the caller.\n%define\ts2limb\trcx\n%define\ts1p\trsi\n%define\trp\trdi\n%define\tA_x\tr8\n%define\tA_y\tr9\n%define\tA_z\tr10\n%define\tB_x\tr11\n%define\tB_y\tr12\n%define\tB_z\tr13\n%define\ttemp\tr14\n%define\tindex\tr15\n\n\t\n; INPUT PARAMETERS\n; rp\t\trdi\n; s1p\t\trsi\n; n\t\trdx\n; s2limb\trcx\n\tBITS\t64\nGLOBAL_FUNC mpn_submul_1\n\t\t\t\t\t; Compare the limb count\n\t\t\t\t\t; with the threshold value.\n\t\t\t\t\t; If the limb count is small\n\t\t\t\t\t; we just use the small loop,\n\t\t\t\t\t; otherwise we jump to the\n\t\t\t\t\t; more complicated loop.\n\tcmp\trdx,THRESHOLD\n\tjge\tL_mpn_submul_1_main_loop_prep\n\tmov\tr11,rdx\n\tlea\trsi,[rsi+rdx*8]\n\tlea\trdi,[rdi+rdx*8]\n\tneg\tr11\n\txor\tr8, r8\n\txor\tr10, r10\n\tjmp\tL_mpn_submul_1_small_loop\n\t\n\talign\t16\nL_mpn_submul_1_small_loop:\n\tmov\trax,[rsi+r11*8]\n\tmul\trcx\n\tadd\trax,r8\n\tadc\trdx,r10\n\tsub\t[rdi+r11*8],rax\n\tmov\tr8,r10\n\tadc\tr8,rdx\n\tinc\tr11\n\tjne\tL_mpn_submul_1_small_loop\n\n\tmov\trax,r8\n\tret\n\nL_mpn_submul_1_main_loop_prep:\n\tpush\tr15\n\tpush\tr14\n\tpush\tr13\n\tpush\tr12\n\t\t\t\t; If n is even, we need to do three\n\t\t\t\t; pre-multiplies, if n is odd we only\n\t\t\t\t; need to do two.\n\tmov\ttemp,rdx\n\tmov\tindex,0\n\tmov\tA_x,0\n\tmov\tA_y,0\n\tand\trdx,1\n\tjnz\tL_mpn_submul_1_odd_n\n\n\t\t\t\t\t; Case n is even\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tsub\tADDR(rp,index,0),rax\n\tadc\tA_x,rdx\n\tadd\tindex,1\n\t\t\t\t\t; At this point\n\t\t\t\t\t;  temp = n (even)\n\t\t\t\t\t; index = 1\n\nL_mpn_submul_1_odd_n:\n\t\t\t\t\t; Now\n\t\t\t\t\t; temp = n\n\t\t\t\t\t; index = 1 if n even\n\t\t\t\t\t;       = 0 if n odd\n\t\t\t\t\t;\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tmov\tA_z,ADDR(rp,index,0)\n\tadd\tA_x,rax\n\tadc\tA_y,rdx\n\n\tmov\trax,ADDR(s1p,index,1)\n\tmul\ts2limb\n\tmov\tB_z,ADDR(rp,index,1)\n\tmov\tB_x,rax\n\tmov\tB_y,rdx\n\tmov\trax,ADDR(s1p,index,2)\n\n\tadd\tindex,3\n\tlea\ts1p,ADDR(s1p,temp,-1)\n\tlea\trp,ADDR(rp,temp,-1)\n\tneg\ttemp\n\tadd\tindex,temp\n\t\t\t\t; At this point:\n\t\t\t\t; s1p   = address of last s1limb\n\t\t\t\t; rp    = address of last rplimb\n\t\t\t\t; temp  = -n\n\t\t\t\t; index = 4 - n if n even\n\t\t\t\t;       = 3 - n if n odd\n\t\t\t\t;\n\t\t\t\t; So, index is a (negative) even\n\t\t\t\t; number.\n\t\t\t\t;\n\t\t\t\t; *****************************************\n\t\t\t\t; ATTENTION:\n\t\t\t\t;\n\t\t\t\t; From here on, I will use array\n\t\t\t\t; indexing notation in the comments\n\t\t\t\t; because it is convenient.  So, I\n\t\t\t\t; will pretend that index is positive\n\t\t\t\t; because then a comment like\n\t\t\t\t;      B_z = rp[index-1]\n\t\t\t\t; is easier to read.\n\t\t\t\t; However, keep in mind that index is\n\t\t\t\t; actually a negative number indexing\n\t\t\t\t; back from the end of the array.\n\t\t\t\t; This is a common trick to remove one\n\t\t\t\t; compare operation from the main loop.\n\t\t\t\t; *****************************************\n\n\t\t\t\t;\n\t\t\t\t; Now we enter a spin-up loop the\n\t\t\t\t; will make sure that the index is\n\t\t\t\t; a multiple of UNROLL_SIZE before\n\t\t\t\t; going to our main unrolled loop.\n\tmov\ttemp,index\n\tneg\ttemp\n\tand\ttemp,UNROLL_MASK\n\tjz\tL_mpn_submul_1_main_loop\n\tshr\ttemp,1\nL_mpn_submul_1_main_loop_spin_up:\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tsub\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,1)\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index+1]\n\tmul\ts2limb\n\tsub\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,2)\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,-1),B_z\n\tmov\tB_z,ADDR(rp,index,1)\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n\n\tadd\tindex,2\n\tsub\ttemp,1\n\tjnz\tL_mpn_submul_1_main_loop_spin_up\n\t\n\tjmp\tL_mpn_submul_1_main_loop\n\t\n\talign\t16\nL_mpn_submul_1_main_loop:\n\t\t\t\t; The code here is really the same\n\t\t\t\t; logic as the spin-up loop.  It's\n\t\t\t\t; just been unrolled.\n%assign\tunroll_index 0\n%rep\tUNROLL_SIZE/2\n\tmul\ts2limb\n\tsub\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+1))\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,(2*unroll_index-2)),A_z\n\tmov\tA_z,ADDR(rp,index,(2*unroll_index))\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\n\tmul\ts2limb\n\tsub\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+2))\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,(2*unroll_index-1)),B_z\n\tmov\tB_z,ADDR(rp,index,(2*unroll_index+1))\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n%assign\tunroll_index\tunroll_index+1\n%endrep\n\n\n\tadd\tindex,UNROLL_SIZE\n\tjnz\tL_mpn_submul_1_main_loop\n\nL_mpn_submul_1_finish:\t\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tsub\tA_z,A_x\n\tmov\tA_x,rax\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_x,A_y\n\tadc\tB_y,0\n\tmov\tA_y,rdx\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\tsub\tB_z,B_x\n\tmov\tADDR(rp,index,-1),B_z\n\tadc\tA_x,B_y\n\tadc\tA_y,0\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index] + carry_in\n\t\t\t\t; A_y = high_mul[index] + CF\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t;\n\tsub\tA_z,A_x\n\tmov\tADDR(rp,index,0),A_z\n\tadc\tA_y,0\n\n\tmov\trax,A_y\n\tpop\tr12\n\tpop\tr13\n\tpop\tr14\n\tpop\tr15\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/sumdiff_n.as",
    "content": "\n;  AMD64 mpn_sumdiff_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi, r8) = (rdx, r8) + (rcx, r8)  (rsi, r8) = (rdx, r8) - (rcx, r8)\n;\treturn 2*add_carry + sub_borrow\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_sumdiff_n\n\tlea     rsi, [rsi+r8*8]\n\tlea     rdx, [rdx+r8*8]\n\tlea     rdi, [rdi+r8*8]\n\tlea     rcx, [rcx+r8*8]\n\tneg     r8\n\txor     r9, r9\n\txor     r10, r10\n\ttest    r8, 3\n\tjz      next\nlp1:\n\tmov     rax, [rdx+r8*8]\n\tmov     r11, rax\n\tadd     r9, 1\n\tadc     rax, [rcx+r8*8]\n\tsbb     r9, r9\n\tadd     r10, 1\n\tsbb     r11, [rcx+r8*8]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], rax\n\tmov     [rsi+r8*8], r11\n\tinc     r8\n\ttest    r8, 3\n\tjnz     lp1\nnext:\n\tcmp     r8, 0\n\tjz      skiploop\n\tmov     [rsp-8], rbx\n\tmov     [rsp-16], rbp\n\tmov     [rsp-24], r12\n\tmov     [rsp-32], r13\n\tmov     [rsp-40], r14\n\tmov     [rsp-48], r15\n\talign   16\nloop1:\n\tmov     rax, [rdx+r8*8]\n\tmov     rbx, [rdx+r8*8+8]\n\tmov     rbp, [rdx+r8*8+16]\n\tmov     r12, [rdx+r8*8+24]\n\tmov     r11, rax\n\tmov     r13, rbx\n\tmov     r14, rbp\n\tmov     r15, r12\n\tadd     r9, 1\n\tadc     rax, [rcx+r8*8]\n\tadc     rbx, [rcx+r8*8+8]\n\tadc     rbp, [rcx+r8*8+16]\n\tadc     r12, [rcx+r8*8+24]\n\tsbb     r9, r9\n\tadd     r10, 1\n\tsbb     r11, [rcx+r8*8]\n\tsbb     r13, [rcx+r8*8+8]\n\tsbb     r14, [rcx+r8*8+16]\n\tsbb     r15, [rcx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], rax\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rbp\n\tmov     [rdi+r8*8+24], r12\n\tmov     [rsi+r8*8], r11\n\tmov     [rsi+r8*8+8], r13\n\tmov     [rsi+r8*8+16], r14\n\tmov     [rsi+r8*8+24], r15\n\tadd     r8, 4\n\tjnz     loop1\n\tmov     rbx, [rsp-8]\n\tmov     rbp, [rsp-16]\n\tmov     r12, [rsp-24]\n\tmov     r13, [rsp-32]\n\tmov     r14, [rsp-40]\n\tmov     r15, [rsp-48]\nskiploop:\n\tlea     rax, [r10+r9*2]\n\tneg     rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/xnor_n.as",
    "content": "\n;  AMD64 mpn_xnor_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_xnor_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign   16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\txor     r8, [rsi+rcx*8+24]\n\txor     r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\txor     r10, [rsi+rcx*8+8]\n\txor     r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/bobcat/xor_n.asm",
    "content": "dnl  mpn_xor_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_xor_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t)\nC\trax              rdi,   rsi,    rdx,  rcx\n\nASM_START()\nPROLOGUE(mpn_xor_n)\nsub $4,%rcx\njc skiplp\nALIGN(16)\nlp:\n\tmov (%rdx),%r8\n\tmov 8(%rdx),%r9\n\txor (%rsi),%r8\n\txor 8(%rsi),%r9\n\tlea 32(%rsi),%rsi\n\tmov 16(%rdx),%r10\n\tmov 24(%rdx),%r11\n\tlea 32(%rdi),%rdi\n\txor 16-32(%rsi),%r10\n\txor 24-32(%rsi),%r11\n\tsub $4,%rcx\n\tmov %r8,-32(%rdi)\n\tmov %r9,8-32(%rdi)\n\tmov %r10,16-32(%rdi)\n\tlea 32(%rdx),%rdx\n\tmov %r11,24-32(%rdi)\n\tjnc lp\nskiplp:\ncmp $-2,%rcx\nje case2\njp case1\njl case0\ncase3:\tmov 8(%rdx,%rcx,8),%rax\n\txor 8(%rsi,%rcx,8),%rax\n\tmov %rax,8(%rdi,%rcx,8)\ncase2:\tmov 16(%rdx,%rcx,8),%rax\n\txor 16(%rsi,%rcx,8),%rax\n\tmov %rax,16(%rdi,%rcx,8)\ncase1:\tmov 24(%rdx,%rcx,8),%rax\n\txor 24(%rsi,%rcx,8),%rax\n\tmov %rax,24(%rdi,%rcx,8)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bulldozer/com_n.asm",
    "content": "dnl  mpn_com_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_com_n(mp_ptr,mp_ptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_com_n)\nmov $3,%rcx\nlea -24(%rsi,%rdx,8),%rsi\npcmpeqb %xmm2,%xmm2\nsub %rdx,%rcx\njnc skiplp\nALIGN(16)\nlp:\n\tmovdqu (%rsi,%rcx,8),%xmm0\n\tmovdqu 16(%rsi,%rcx,8),%xmm1\n\tpxor %xmm2,%xmm0\n\tadd $4,%rcx\n\tpxor %xmm2,%xmm1\n\tmovdqu %xmm0,(%rdi)\n\tmovdqu %xmm1,16(%rdi)\n\tlea 32(%rdi),%rdi\n\tjnc lp\nskiplp:\ncmp $2,%rcx\nja case0\nje case1\njp case2\t\ncase3:\tmovdqu (%rsi,%rcx,8),%xmm0\n\tmov 16(%rsi,%rcx,8),%rax\n\tpxor %xmm2,%xmm0\n\tnot %rax\n\tmovdqu %xmm0,(%rdi)\n\tmov %rax,16(%rdi)\n\tret\ncase2:\tmovdqu (%rsi,%rcx,8),%xmm0\n\tpxor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi)\n\tret\ncase1:\tmov (%rsi,%rcx,8),%rax\n\tnot %rax\n\tmov %rax,(%rdi)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bulldozer/copyd.asm",
    "content": "dnl  mpn_copyd\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_copyd(mp_ptr,mp_ptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_copyd)\nlea 16(%rsi),%rsi\nlea 16(%rdi),%rdi\nsub $4,%rdx\njc skiplp\nALIGN(16)\nlp:\n\tmovdqu (%rsi,%rdx,8),%xmm0\n\tmovdqu -16(%rsi,%rdx,8),%xmm1\n\tsub $4,%rdx\n\tmovdqu %xmm1,-16+32(%rdi,%rdx,8)\n\tmovdqu %xmm0,32(%rdi,%rdx,8)\n\tjnc lp\nskiplp:\ncmp $-2,%rdx\njg case3\nje case2\njnp case0\ncase1:\tmov 8(%rsi,%rdx,8),%rax\n\tmov %rax,8(%rdi,%rdx,8)\ncase0:\tret\ncase3:\tmovdqu (%rsi,%rdx,8),%xmm0\n\tmov -8(%rsi,%rdx,8),%rax\n\tmov %rax,-8(%rdi,%rdx,8)\n\tmovdqu %xmm0,(%rdi,%rdx,8)\n\tret\ncase2:\tmovdqu (%rsi,%rdx,8),%xmm0\n\tmovdqu %xmm0,(%rdi,%rdx,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bulldozer/copyi.asm",
    "content": "dnl  mpn_copyi\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_copyi(mp_ptr,mp_ptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_copyi)\nC // for <20 limbs this is slower than core2/copyi for rev 2257\nC // probaly want to tweek it , that should do most of the work\nC //below small loop is not much help\nC //cmp $10,%rdx\nC //jge large\nC //ALIGN(16)\nC //lp:\tmov (%rsi),%rax\nC //\tmov %rax,(%rdi)\nC //\tlea 8(%rsi),%rsi\nC //\tlea 8(%rdi),%rdi\nC //\tsub $1,%rdx\nC //\tjnz lp\nC //\tret\nC // large:\ncmp $0,%rdx\njz endfn\nmov %rdi,%rax\nsub %rsi,%rax\ntest $0xF,%rax\njz aligned\ntest $0xF,%rdi\njz srcisodd\nmov $5,%rcx\nsub %rdx,%rcx\nlea -40(%rsi,%rdx,8),%rsi\nlea -40(%rdi,%rdx,8),%rdi\nmovapd (%rsi,%rcx,8),%xmm1\nmovq %xmm1,(%rdi,%rcx,8)\nadd $8,%rdi\ncmp $1,%rdx\njz endfn\ncmp $0,%rcx\njge skiplpud\nALIGN(16)\nlpud:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tadd $4,%rcx\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,-32(%rdi,%rcx,8)\n\tmovapd 32-32(%rsi,%rcx,8),%xmm1\n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16-32(%rdi,%rcx,8)\n\tjnc lpud\nskiplpud:\ncmp $2,%rcx\nja case0d\njz case1d\njp case2d\nALIGN(16)\ncase3d:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovapd 32(%rsi,%rcx,8),%xmm1  \n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase2d:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovhpd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase1d:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase0d:\tmovhpd %xmm1,(%rdi,%rcx,8)\nendfn:\tret\nC //////////////////////////\nsrcisodd:\nmov $4,%rcx\nsub %rdx,%rcx\nlea -32(%rsi,%rdx,8),%rsi\nlea -32(%rdi,%rdx,8),%rdi\n\tmovapd -8(%rsi,%rcx,8),%xmm1\n\tsub $8,%rsi\ncmp $0,%rcx\njge skiplpus\t\nALIGN(16)\nlpus:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tadd $4,%rcx\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,-32(%rdi,%rcx,8)\n\tmovapd 32-32(%rsi,%rcx,8),%xmm1\n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16-32(%rdi,%rcx,8)\n\tjnc lpus\nskiplpus:\ncmp $2,%rcx\nja case0s\njz case1s\njp case2s\nALIGN(16)\ncase3s:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovapd 32(%rsi,%rcx,8),%xmm1\n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase2s: movapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovhpd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase1s:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase0s:\tmovhpd %xmm1,(%rdi,%rcx,8)\n\tret\nC //////////////////////////\nALIGN(16)\naligned:\nmov $3,%rcx\nsub %rdx,%rcx\ntest $0xF,%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\njz notodda\n\tmov (%rsi,%rcx,8),%rax\n\tmov %rax,(%rdi,%rcx,8)\n\tadd $1,%rcx\nnotodda:\ncmp $0,%rcx\njge skiplpa\nALIGN(16)\nlpa:\tadd $4,%rcx\n\tmovapd -32(%rsi,%rcx,8),%xmm0\n\tmovapd %xmm0,-32(%rdi,%rcx,8)\n\tmovapd 16-32(%rsi,%rcx,8),%xmm1\n\tmovapd %xmm1,16-32(%rdi,%rcx,8)\n\tjnc lpa\nskiplpa:\ncmp $2,%rcx\nja casea0\nje casea1\njp casea2\ncasea3:\tmovapd (%rsi,%rcx,8),%xmm0\n\tmovapd %xmm0,(%rdi,%rcx,8)\n\tmov 16(%rsi,%rcx,8),%rax\n\tmov %rax,16(%rdi,%rcx,8)\ncasea0:\tret\nALIGN(16)\ncasea2:\tmovapd (%rsi,%rcx,8),%xmm0\n\tmovapd %xmm0,(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncasea1:\tmov (%rsi,%rcx,8),%rax\n\tmov %rax,(%rdi,%rcx,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bulldozer/lshift.asm",
    "content": "dnl  AMD64 mpn_lshift optimised for CPUs with fast SSE including fast movdqu.\n\ndnl  Contributed to the GNU project by Torbjorn Granlund.\n\ndnl  Copyright 2010-2012 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of either:\ndnl\ndnl    * the GNU Lesser General Public License as published by the Free\ndnl      Software Foundation; either version 3 of the License, or (at your\ndnl      option) any later version.\ndnl\ndnl  or\ndnl\ndnl    * the GNU General Public License as published by the Free Software\ndnl      Foundation; either version 2 of the License, or (at your option) any\ndnl      later version.\ndnl\ndnl  or both in parallel, as here.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\ndnl  for more details.\ndnl\ndnl  You should have received copies of the GNU General Public License and the\ndnl  GNU Lesser General Public License along with the GNU MP Library.  If not,\ndnl  see https://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\n\nC\t     cycles/limb     cycles/limb     cycles/limb    good\nC              aligned\t      unaligned\t      best seen\t   for cpu?\nC AMD K8,K9\t 3\t\t 3\t\t 2.35\t  no, use shl/shr\nC AMD K10\t 1.5-1.8\t 1.5-1.8\t 1.33\t  yes\nC AMD bd1\t 1.7-1.9\t 1.7-1.9\t 1.33\t  yes\nC AMD bobcat\t 3.17\t\t 3.17\t\t\t  yes, bad for n < 20\nC Intel P4\t 4.67\t\t 4.67\t\t 2.7\t  no, slow movdqu\nC Intel core2\t 2.15\t\t 2.15\t\t 1.25\t  no, use shld/shrd\nC Intel NHM\t 1.66\t\t 1.66\t\t 1.25\t  no, use shld/shrd\nC Intel SBR\t 1.3\t\t 1.3\t\t 1.25\t  yes, bad for n = 4-6\nC Intel atom\t11.7\t\t11.7\t\t 4.5\t  no\nC VIA nano\t 5.7\t\t 5.95\t\t 2.0\t  no, slow movdqu\n\nC We try to do as many aligned 16-byte operations as possible.  The top-most\nC and bottom-most writes might need 8-byte operations.\nC\nC This variant rely on fast load movdqu, and uses it even for aligned operands,\nC in order to avoid the need for two separate loops.\nC\nC TODO\nC  * Could 2-limb wind-down code be simplified?\nC  * Improve basecase code, using shld/shrd for SBR, discrete integer shifts\nC    for other affected CPUs.\n\nC INPUT PARAMETERS\ndefine(`rp',  `%rdi')\ndefine(`ap',  `%rsi')\ndefine(`n',   `%rdx')\ndefine(`cnt', `%rcx')\n\nASM_START()\n\tTEXT\n\tALIGN(64)\nPROLOGUE(mpn_lshift)\n\tFUNC_ENTRY(4)\n\tmovd\tR32(%rcx), %xmm4\n\tmov\t$64, R32(%rax)\n\tsub\tR32(%rcx), R32(%rax)\n\tmovd\tR32(%rax), %xmm5\n\n\tneg\tR32(%rcx)\n\tmov\t-8(ap,n,8), %rax\n\tshr\tR8(%rcx), %rax\n\n\tcmp\t$3, n\n\tjle\tL(bc)\n\n\tlea\t(rp,n,8), R32(%rcx)\n\ttest\t$8, R8(%rcx)\n\tjz\tL(rp_aligned)\n\nC Do one initial limb in order to make rp aligned\n\tmovq\t-8(ap,n,8), %xmm0\n\tmovq\t-16(ap,n,8), %xmm1\n\tpsllq\t%xmm4, %xmm0\n\tpsrlq\t%xmm5, %xmm1\n\tpor\t%xmm1, %xmm0\n\tmovq\t%xmm0, -8(rp,n,8)\n\tdec\tn\n\nL(rp_aligned):\n\tlea\t1(n), %r8d\n\n\tand\t$6, R32(%r8)\n\tjz\tL(ba0)\n\tcmp\t$4, R32(%r8)\n\tjz\tL(ba4)\n\tjc\tL(ba2)\nL(ba6):\tadd\t$-4, n\n\tjmp\tL(i56)\nL(ba0):\tadd\t$-6, n\n\tjmp\tL(i70)\nL(ba4):\tadd\t$-2, n\n\tjmp\tL(i34)\nL(ba2):\tadd\t$-8, n\n\tjle\tL(end)\n\n\tALIGN(16)\nL(top):\tmovdqu\t40(ap,n,8), %xmm1\n\tmovdqu\t48(ap,n,8), %xmm0\n\tpsllq\t%xmm4, %xmm0\n\tpsrlq\t%xmm5, %xmm1\n\tpor\t%xmm1, %xmm0\n\tmovdqa\t%xmm0, 48(rp,n,8)\nL(i70):\n\tmovdqu\t24(ap,n,8), %xmm1\n\tmovdqu\t32(ap,n,8), %xmm0\n\tpsllq\t%xmm4, %xmm0\n\tpsrlq\t%xmm5, %xmm1\n\tpor\t%xmm1, %xmm0\n\tmovdqa\t%xmm0, 32(rp,n,8)\nL(i56):\n\tmovdqu\t8(ap,n,8), %xmm1\n\tmovdqu\t16(ap,n,8), %xmm0\n\tpsllq\t%xmm4, %xmm0\n\tpsrlq\t%xmm5, %xmm1\n\tpor\t%xmm1, %xmm0\n\tmovdqa\t%xmm0, 16(rp,n,8)\nL(i34):\n\tmovdqu\t-8(ap,n,8), %xmm1\n\tmovdqu\t(ap,n,8), %xmm0\n\tpsllq\t%xmm4, %xmm0\n\tpsrlq\t%xmm5, %xmm1\n\tpor\t%xmm1, %xmm0\n\tmovdqa\t%xmm0, (rp,n,8)\n\tsub\t$8, n\n\tjg\tL(top)\n\nL(end):\ttest\t$1, R8(n)\n\tjnz\tL(end8)\n\n\tmovdqu\t(ap), %xmm1\n\tpxor\t%xmm0, %xmm0\n\tpunpcklqdq  %xmm1, %xmm0\n\tpsllq\t%xmm4, %xmm1\n\tpsrlq\t%xmm5, %xmm0\n\tpor\t%xmm1, %xmm0\n\tmovdqa\t%xmm0, (rp)\n\tFUNC_EXIT()\n\tret\n\nC Basecase\n\tALIGN(16)\nL(bc):\tdec\tR32(n)\n\tjz\tL(end8)\n\n\tmovq\t(ap,n,8), %xmm1\n\tmovq\t-8(ap,n,8), %xmm0\n\tpsllq\t%xmm4, %xmm1\n\tpsrlq\t%xmm5, %xmm0\n\tpor\t%xmm1, %xmm0\n\tmovq\t%xmm0, (rp,n,8)\n\tsub\t$2, R32(n)\n\tjl\tL(end8)\n\tmovq\t8(ap), %xmm1\n\tmovq\t(ap), %xmm0\n\tpsllq\t%xmm4, %xmm1\n\tpsrlq\t%xmm5, %xmm0\n\tpor\t%xmm1, %xmm0\n\tmovq\t%xmm0, 8(rp)\n\nL(end8):movq\t(ap), %xmm0\n\tpsllq\t%xmm4, %xmm0\n\tmovq\t%xmm0, (rp)\n\tFUNC_EXIT()\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bulldozer/mul_basecase.asm",
    "content": "dnl  AMD64 mpn_mul_basecase optimised for AMD Bulldozer and Piledriver.\n\ndnl  Contributed to the GNU project by Torbjörn Granlund.\n\ndnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of either:\ndnl\ndnl    * the GNU Lesser General Public License as published by the Free\ndnl      Software Foundation; either version 3 of the License, or (at your\ndnl      option) any later version.\ndnl\ndnl  or\ndnl\ndnl    * the GNU General Public License as published by the Free Software\ndnl      Foundation; either version 2 of the License, or (at your option) any\ndnl      later version.\ndnl\ndnl  or both in parallel, as here.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\ndnl  for more details.\ndnl\ndnl  You should have received copies of the GNU General Public License and the\ndnl  GNU Lesser General Public License along with the GNU MP Library.  If not,\ndnl  see https://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC cycles/limb\tmul_1\t\tmul_2\t\tmul_3\t\taddmul_2\nC AMD K8,K9\nC AMD K10\nC AMD bull\t~4.8\t\t~4.55\t\t-\t\t~4.3\nC AMD pile\t~4.6\t\t~4.55\t\t-\t\t~4.55\nC AMD bobcat\nC AMD jaguar\nC Intel P4\nC Intel core\nC Intel NHM\nC Intel SBR\nC Intel IBR\nC Intel HWL\nC Intel BWL\nC Intel atom\nC VIA nano\n\nC The inner loops of this code are the result of running a code generation and\nC optimisation tool suite written by David Harvey and Torbjorn Granlund.\n\nC TODO\nC  * Merge bull-specific mul_1, if it is not slower the TOOM22 range.\nC    Alternatively, we could tweak the present code (which was loopmixed for a\nC    different CPU).\nC  * Merge faster mul_2, such as the one in the same directory as this file.\nC  * Further micro-optimise.\n\nC When playing with pointers, set this to $2 to fall back to conservative\nC indexing in wind-down code.\ndefine(`I',`$1')\n\n\ndefine(`rp',      `%rdi')\ndefine(`up',      `%rsi')\ndefine(`un_param',`%rdx')\ndefine(`vp',      `%rcx')\ndefine(`vn',      `%r8')\n\ndefine(`un',      `%rbx')\n\ndefine(`w0',\t`%r10')\ndefine(`w1',\t`%r11')\ndefine(`w2',\t`%r12')\ndefine(`w3',\t`%r13')\ndefine(`n',\t`%rbp')\ndefine(`v0',\t`%r9')\n\nABI_SUPPORT(DOS64)\nABI_SUPPORT(STD64)\n\nASM_START()\n\tTEXT\n\tALIGN(16)\nPROLOGUE(mpn_mul_basecase)\n\tFUNC_ENTRY(4)\nIFDOS(`\tmov\t56(%rsp), %r8d\t')\n\tpush\t%rbx\n\tpush\t%rbp\n\tmov\tun_param, un\t\tC free up rdx\n\tneg\tun\n\n\tmov\t(up), %rax\t\tC shared for mul_1 and mul_2\n\tlea\t(up,un_param,8), up\tC point at operand end\n\tlea\t(rp,un_param,8), rp\tC point at rp[un-1]\n\n\tmov\t(vp), v0\t\tC shared for mul_1 and mul_2\n\tmul\tv0\t\t\tC shared for mul_1 and mul_2\n\n\ttest\t$1, R8(vn)\n\tjz\tL(do_mul_2)\n\nL(do_mul_1):\n\ttest\t$1, R8(un)\n\tjnz\tL(m1x1)\n\nL(m1x0):mov\t%rax, w0\t\tC un = 2, 4, 6, 8, ...\n\tmov\t%rdx, w1\n\tmov\t8(up,un,8), %rax\n\ttest\t$2, R8(un)\n\tjnz\tL(m110)\n\nL(m100):lea\t2(un), n\t\tC un = 4, 8, 12, ...\n\tjmp\tL(m1l0)\n\nL(m110):lea\t(un), n\t\t\tC un = 2, 6, 10, ...\n\tjmp\tL(m1l2)\n\nL(m1x1):mov\t%rax, w1\t\tC un = 1, 3, 5, 7, ...\n\tmov\t%rdx, w0\n\ttest\t$2, R8(un)\n\tjz\tL(m111)\n\nL(m101):lea\t3(un), n\t\tC un = 1, 5, 9, ...\n\ttest\tn, n\n\tjs\tL(m1l1)\n\tmov\t%rax, -8(rp)\n\tmov\t%rdx, (rp)\n\tpop\t%rbp\n\tpop\t%rbx\n\tFUNC_EXIT()\n\tret\n\nL(m111):lea\t1(un), n\t\tC un = 3, 7, 11, ...\n\tmov\t8(up,un,8), %rax\n\tjmp\tL(m1l3)\n\n\tALIGN(16)\nL(m1tp):mov\t%rdx, w0\n\tadd\t%rax, w1\nL(m1l1):mov\t-16(up,n,8), %rax\n\tadc\t$0, w0\n\tmul\tv0\n\tadd\t%rax, w0\n\tmov\tw1, -24(rp,n,8)\n\tmov\t-8(up,n,8), %rax\n\tmov\t%rdx, w1\n\tadc\t$0, w1\nL(m1l0):mul\tv0\n\tmov\tw0, -16(rp,n,8)\n\tadd\t%rax, w1\n\tmov\t%rdx, w0\n\tmov\t(up,n,8), %rax\n\tadc\t$0, w0\nL(m1l3):mul\tv0\n\tmov\tw1, -8(rp,n,8)\n\tmov\t%rdx, w1\n\tadd\t%rax, w0\n\tmov\t8(up,n,8), %rax\n\tadc\t$0, w1\nL(m1l2):mul\tv0\n\tmov\tw0, (rp,n,8)\n\tadd\t$4, n\n\tjnc\tL(m1tp)\n\nL(m1ed):add\t%rax, w1\n\tadc\t$0, %rdx\n\tmov\tw1, I(-8(rp),-24(rp,n,8))\n\tmov\t%rdx, I((rp),-16(rp,n,8))\n\n\tdec\tR32(vn)\n\tjz\tL(ret2)\n\n\tlea\t8(vp), vp\n\tlea\t8(rp), rp\n\tpush\t%r12\n\tpush\t%r13\n\tpush\t%r14\n\tjmp\tL(do_addmul)\n\nL(do_mul_2):\ndefine(`v1',\t`%r14')\n\tpush\t%r12\n\tpush\t%r13\n\tpush\t%r14\n\n\tmov\t8(vp), v1\n\n\ttest\t$1, R8(un)\n\tjnz\tL(m2b1)\n\nL(m2b0):lea\t(un), n\n\tmov\t%rax, w2\t\tC 0\n\tmov\t(up,un,8), %rax\n\tmov\t%rdx, w1\t\tC 1\n\tmul\tv1\n\tmov\t%rax, w0\t\tC 1\n\tmov\tw2, (rp,un,8)\t\tC 0\n\tmov\t8(up,un,8), %rax\n\tmov\t%rdx, w2\t\tC 2\n\tjmp\tL(m2l0)\n\nL(m2b1):lea\t1(un), n\n\tmov\t%rax, w0\t\tC 1\n\tmov\t%rdx, w3\t\tC 2\n\tmov\t(up,un,8), %rax\n\tmul\tv1\n\tmov\tw0, (rp,un,8)\t\tC 1\n\tmov\t%rdx, w0\t\tC 3\n\tmov\t%rax, w2\t\tC 0\n\tmov\t8(up,un,8), %rax\n\tjmp\tL(m2l1)\n\n\tALIGN(32)\nL(m2tp):add\t%rax, w2\t\tC 0\n\tmov\t(up,n,8), %rax\n\tadc\t$0, w0\t\t\tC 1\nL(m2l1):mul\tv0\n\tadd\t%rax, w2\t\tC 0\n\tmov\t(up,n,8), %rax\n\tmov\t%rdx, w1\t\tC 1\n\tadc\t$0, w1\t\t\tC 1\n\tmul\tv1\n\tadd\tw3, w2\t\t\tC 0\n\tadc\t$0, w1\t\t\tC 1\n\tadd\t%rax, w0\t\tC 1\n\tmov\tw2, (rp,n,8)\t\tC 0\n\tmov\t8(up,n,8), %rax\n\tmov\t%rdx, w2\t\tC 2\n\tadc\t$0, w2\t\t\tC 2\nL(m2l0):mul\tv0\n\tadd\t%rax, w0\t\tC 1\n\tmov\t%rdx, w3\t\tC 2\n\tadc\t$0, w3\t\t\tC 2\n\tadd\tw1, w0\t\t\tC 1\n\tadc\t$0, w3\t\t\tC 2\n\tmov\t8(up,n,8), %rax\n\tmul\tv1\n\tadd\t$2, n\n\tmov\tw0, -8(rp,n,8)\t\tC 1\n\tmov\t%rdx, w0\t\tC 3\n\tjnc\tL(m2tp)\n\nL(m2ed):add\t%rax, w2\n\tadc\t$0, %rdx\n\tadd\tw3, w2\n\tadc\t$0, %rdx\n\tmov\tw2, I((rp),(rp,n,8))\n\tmov\t%rdx, I(8(rp),8(rp,n,8))\n\n\tadd\t$-2, R32(vn)\n\tjz\tL(ret5)\n\n\tlea\t16(vp), vp\n\tlea\t16(rp), rp\n\n\nL(do_addmul):\n\tpush\t%r15\n\tpush\tvn\t\t\tC save vn in new stack slot\ndefine(`vn',\t`(%rsp)')\ndefine(`X0',\t`%r14')\ndefine(`X1',\t`%r15')\ndefine(`v1',\t`%r8')\n\nL(outer):\n\tmov\t(vp), v0\n\tmov\t8(vp), v1\n\n\tmov\t(up,un,8), %rax\n\tmul\tv0\n\n\ttest\t$1, R8(un)\n\tjnz\tL(bx1)\n\nL(bx0):\tmov\t%rax, X1\n\tmov\t(up,un,8), %rax\n\tmov\t%rdx, X0\n\tmul\tv1\n\ttest\t$2, R8(un)\n\tjnz\tL(b10)\n\nL(b00):\tlea\t(un), n\t\t\tC un = 4, 8, 12, ...\n\tmov\t(rp,un,8), w3\n\tmov\t%rax, w0\n\tmov\t8(up,un,8), %rax\n\tmov\t%rdx, w1\n\tjmp\tL(lo0)\n\nL(b10):\tlea\t2(un), n\t\tC un = 2, 6, 10, ...\n\tmov\t(rp,un,8), w1\n\tmov\t%rdx, w3\n\tmov\t%rax, w2\n\tmov\t8(up,un,8), %rax\n\tjmp\tL(lo2)\n\nL(bx1):\tmov\t%rax, X0\n\tmov\t(up,un,8), %rax\n\tmov\t%rdx, X1\n\tmul\tv1\n\ttest\t$2, R8(un)\n\tjz\tL(b11)\n\nL(b01):\tlea\t1(un), n\t\tC un = 1, 5, 9, ...\n\tmov\t(rp,un,8), w2\n\tmov\t%rdx, w0\n\tmov\t%rax, w3\n\tjmp\tL(lo1)\n\nL(b11):\tlea\t-1(un), n\t\tC un = 3, 7, 11, ...\n\tmov\t(rp,un,8), w0\n\tmov\t%rax, w1\n\tmov\t8(up,un,8), %rax\n\tmov\t%rdx, w2\n\tjmp\tL(lo3)\n\n\tALIGN(32)\nL(top):\nL(lo2):\tmul\tv0\n\tadd\tw1, X1\n\tmov\tX1, -16(rp,n,8)\n\tmov\t%rdx, X1\n\tadc\t%rax, X0\n\tadc\t$0, X1\n\tmov\t-8(up,n,8), %rax\n\tmul\tv1\n\tmov\t-8(rp,n,8), w1\n\tmov\t%rdx, w0\n\tadd\tw1, w2\n\tadc\t%rax, w3\n\tadc\t$0, w0\nL(lo1):\tmov\t(up,n,8), %rax\n\tmul\tv0\n\tadd\tw2, X0\n\tmov\tX0, -8(rp,n,8)\n\tmov\t%rdx, X0\n\tadc\t%rax, X1\n\tmov\t(up,n,8), %rax\n\tadc\t$0, X0\n\tmov\t(rp,n,8), w2\n\tmul\tv1\n\tadd\tw2, w3\n\tadc\t%rax, w0\n\tmov\t8(up,n,8), %rax\n\tmov\t%rdx, w1\n\tadc\t$0, w1\nL(lo0):\tmul\tv0\n\tadd\tw3, X1\n\tmov\tX1, (rp,n,8)\n\tadc\t%rax, X0\n\tmov\t8(up,n,8), %rax\n\tmov\t%rdx, X1\n\tadc\t$0, X1\n\tmov\t8(rp,n,8), w3\n\tmul\tv1\n\tadd\tw3, w0\n\tadc\t%rax, w1\n\tmov\t16(up,n,8), %rax\n\tmov\t%rdx, w2\n\tadc\t$0, w2\nL(lo3):\tmul\tv0\n\tadd\tw0, X0\n\tmov\tX0, 8(rp,n,8)\n\tmov\t%rdx, X0\n\tadc\t%rax, X1\n\tadc\t$0, X0\n\tmov\t16(up,n,8), %rax\n\tmov\t16(rp,n,8), w0\n\tmul\tv1\n\tmov\t%rdx, w3\n\tadd\tw0, w1\n\tadc\t%rax, w2\n\tadc\t$0, w3\n\tmov\t24(up,n,8), %rax\n\tadd\t$4, n\n\tjnc\tL(top)\n\nL(end):\tmul\tv0\n\tadd\tw1, X1\n\tmov\tX1, I(-16(rp),-16(rp,n,8))\n\tmov\t%rdx, X1\n\tadc\t%rax, X0\n\tadc\t$0, X1\n\tmov\tI(-8(up),-8(up,n,8)), %rax\n\tmul\tv1\n\tmov\tI(-8(rp),-8(rp,n,8)), w1\n\tadd\tw1, w2\n\tadc\t%rax, w3\n\tadc\t$0, %rdx\n\tadd\tw2, X0\n\tadc\t$0, X1\n\tmov\tX0, I(-8(rp),-8(rp,n,8))\n\tadd\tw3, X1\n\tmov\tX1, I((rp),(rp,n,8))\n\tadc\t$0, %rdx\n\tmov\t%rdx, I(8(rp),8(rp,n,8))\n\n\n\taddl\t$-2, vn\n\tlea\t16(vp), vp\n\tlea\t16(rp), rp\n\tjnz\tL(outer)\n\n\tpop\t%rax\t\tC deallocate vn slot\n\tpop\t%r15\nL(ret5):pop\t%r14\n\tpop\t%r13\n\tpop\t%r12\nL(ret2):pop\t%rbp\n\tpop\t%rbx\n\tFUNC_EXIT()\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/bulldozer/rshift.asm",
    "content": "dnl  AMD64 mpn_rshift optimised for CPUs with fast SSE including fast movdqu.\n\ndnl  Contributed to the GNU project by Torbjorn Granlund.\n\ndnl  Copyright 2010-2012 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of either:\ndnl\ndnl    * the GNU Lesser General Public License as published by the Free\ndnl      Software Foundation; either version 3 of the License, or (at your\ndnl      option) any later version.\ndnl\ndnl  or\ndnl\ndnl    * the GNU General Public License as published by the Free Software\ndnl      Foundation; either version 2 of the License, or (at your option) any\ndnl      later version.\ndnl\ndnl  or both in parallel, as here.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\ndnl  for more details.\ndnl\ndnl  You should have received copies of the GNU General Public License and the\ndnl  GNU Lesser General Public License along with the GNU MP Library.  If not,\ndnl  see https://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\n\nC\t     cycles/limb     cycles/limb     cycles/limb    good\nC              aligned\t      unaligned\t      best seen\t   for cpu?\nC AMD K8,K9\t 3\t\t 3\t\t 2.35\t  no, use shl/shr\nC AMD K10\t 1.5-1.8\t 1.5-1.8\t 1.33\t  yes\nC AMD bd1\t 1.7-1.9\t 1.7-1.9\t 1.33\t  yes\nC AMD bobcat\t 3.17\t\t 3.17\t\t\t  yes, bad for n < 20\nC Intel P4\t 4.67\t\t 4.67\t\t 2.7\t  no, slow movdqu\nC Intel core2\t 2.15\t\t 2.15\t\t 1.25\t  no, use shld/shrd\nC Intel NHM\t 1.66\t\t 1.66\t\t 1.25\t  no, use shld/shrd\nC Intel SBR\t 1.3\t\t 1.3\t\t 1.25\t  yes, bad for n = 4-6\nC Intel atom\t11.7\t\t11.7\t\t 4.5\t  no\nC VIA nano\t 5.7\t\t 5.95\t\t 2.0\t  no, slow movdqu\n\nC We try to do as many aligned 16-byte operations as possible.  The top-most\nC and bottom-most writes might need 8-byte operations.\nC\nC This variant rely on fast load movdqu, and uses it even for aligned operands,\nC in order to avoid the need for two separate loops.\nC\nC TODO\nC  * Could 2-limb wind-down code be simplified?\nC  * Improve basecase code, using shld/shrd for SBR, discrete integer shifts\nC    for other affected CPUs.\n\nC INPUT PARAMETERS\ndefine(`rp',  `%rdi')\ndefine(`ap',  `%rsi')\ndefine(`n',   `%rdx')\ndefine(`cnt', `%rcx')\n\nASM_START()\n\tTEXT\n\tALIGN(64)\nPROLOGUE(mpn_rshift)\n\tFUNC_ENTRY(4)\n\tmovd\tR32(%rcx), %xmm4\n\tmov\t$64, R32(%rax)\n\tsub\tR32(%rcx), R32(%rax)\n\tmovd\tR32(%rax), %xmm5\n\n\tneg\tR32(%rcx)\n\tmov\t(ap), %rax\n\tshl\tR8(%rcx), %rax\n\n\tcmp\t$3, n\n\tjle\tL(bc)\n\n\ttest\t$8, R8(rp)\n\tjz\tL(rp_aligned)\n\nC Do one initial limb in order to make rp aligned\n\tmovq\t(ap), %xmm0\n\tmovq\t8(ap), %xmm1\n\tpsrlq\t%xmm4, %xmm0\n\tpsllq\t%xmm5, %xmm1\n\tpor\t%xmm1, %xmm0\n\tmovq\t%xmm0, (rp)\n\tlea\t8(ap), ap\n\tlea\t8(rp), rp\n\tdec\tn\n\nL(rp_aligned):\n\tlea\t1(n), %r8d\n\tlea\t(ap,n,8), ap\n\tlea\t(rp,n,8), rp\n\tneg\tn\n\n\tand\t$6, R32(%r8)\n\tjz\tL(bu0)\n\tcmp\t$4, R32(%r8)\n\tjz\tL(bu4)\n\tjc\tL(bu2)\nL(bu6):\tadd\t$4, n\n\tjmp\tL(i56)\nL(bu0):\tadd\t$6, n\n\tjmp\tL(i70)\nL(bu4):\tadd\t$2, n\n\tjmp\tL(i34)\nL(bu2):\tadd\t$8, n\n\tjge\tL(end)\n\n\tALIGN(16)\nL(top):\tmovdqu\t-64(ap,n,8), %xmm1\n\tmovdqu\t-56(ap,n,8), %xmm0\n\tpsllq\t%xmm5, %xmm0\n\tpsrlq\t%xmm4, %xmm1\n\tpor\t%xmm1, %xmm0\n\tmovdqa\t%xmm0, -64(rp,n,8)\nL(i70):\n\tmovdqu\t-48(ap,n,8), %xmm1\n\tmovdqu\t-40(ap,n,8), %xmm0\n\tpsllq\t%xmm5, %xmm0\n\tpsrlq\t%xmm4, %xmm1\n\tpor\t%xmm1, %xmm0\n\tmovdqa\t%xmm0, -48(rp,n,8)\nL(i56):\n\tmovdqu\t-32(ap,n,8), %xmm1\n\tmovdqu\t-24(ap,n,8), %xmm0\n\tpsllq\t%xmm5, %xmm0\n\tpsrlq\t%xmm4, %xmm1\n\tpor\t%xmm1, %xmm0\n\tmovdqa\t%xmm0, -32(rp,n,8)\nL(i34):\n\tmovdqu\t-16(ap,n,8), %xmm1\n\tmovdqu\t-8(ap,n,8), %xmm0\n\tpsllq\t%xmm5, %xmm0\n\tpsrlq\t%xmm4, %xmm1\n\tpor\t%xmm1, %xmm0\n\tmovdqa\t%xmm0, -16(rp,n,8)\n\tadd\t$8, n\n\tjl\tL(top)\n\nL(end):\ttest\t$1, R8(n)\n\tjnz\tL(e1)\n\n\tmovdqu\t-16(ap), %xmm1\n\tmovq\t-8(ap), %xmm0\n\tpsrlq\t%xmm4, %xmm1\n\tpsllq\t%xmm5, %xmm0\n\tpor\t%xmm1, %xmm0\n\tmovdqa\t%xmm0, -16(rp)\n\tFUNC_EXIT()\n\tret\n\nL(e1):\tmovq\t-8(ap), %xmm0\n\tpsrlq\t%xmm4, %xmm0\n\tmovq\t%xmm0, -8(rp)\n\tFUNC_EXIT()\n\tret\n\nC Basecase\n\tALIGN(16)\nL(bc):\tdec\tR32(n)\n\tjnz\t1f\n\tmovq\t(ap), %xmm0\n\tpsrlq\t%xmm4, %xmm0\n\tmovq\t%xmm0, (rp)\n\tFUNC_EXIT()\n\tret\n\n1:\tmovq\t(ap), %xmm1\n\tmovq\t8(ap), %xmm0\n\tpsrlq\t%xmm4, %xmm1\n\tpsllq\t%xmm5, %xmm0\n\tpor\t%xmm1, %xmm0\n\tmovq\t%xmm0, (rp)\n\tdec\tR32(n)\n\tjnz\t1f\n\tmovq\t8(ap), %xmm0\n\tpsrlq\t%xmm4, %xmm0\n\tmovq\t%xmm0, 8(rp)\n\tFUNC_EXIT()\n\tret\n\n1:\tmovq\t8(ap), %xmm1\n\tmovq\t16(ap), %xmm0\n\tpsrlq\t%xmm4, %xmm1\n\tpsllq\t%xmm5, %xmm0\n\tpor\t%xmm1, %xmm0\n\tmovq\t%xmm0,\t8(rp)\n\tmovq\t16(ap), %xmm0\n\tpsrlq\t%xmm4, %xmm0\n\tmovq\t%xmm0, 16(rp)\n\tFUNC_EXIT()\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/README",
    "content": "This is a patch to solve two problems:\n\n1.  It makes gmp run faster on Intel Core2 CPUs (i.e. Woodcrest, Conroe,\n    and friends) under Linux\n\n2.  It makes gmp work (and run fast) under Mac OS X on Core2 CPU\n    machines (e.g. Mac Pro)\n\nAs an added bonus, it actually gives a little speed up to gmp on AMD64\nmachines as well.\n\n\nTo Install on a 64 bit Intel Mac (e.g. Mac Pro)\n-------------------------------------------------------\n1. Download gmp-4.2.1-core2-port.tar.gz\n\n\n2. Uncompress and untar it.  Let's say that it's in the directory\n~/gmp-4.2.1-core2-port\n\n\n3.  Download GMP version 4.2.1\n\n\n4.  Uncompress and untar GMP.  Let's say that it's in the directory\n~/gmp-4.2.1\n\n\n5.  Change into the gmp-4.2.1-core2-port directory and run the install\nscript (if you want to see what it's doing, just read it... it's a\nvery simple script).\n\n    > cd ~/gmp-4.2.1-core2-port\n    > ./install_gmp_4.2.1_core2_patch.sh ~/gmp-4.2.1\n\n\n6.  Configure gmp for a 64 bit Intel Mac as such:\n\n   > cd ~/gmp-4.2.1\n   > ./configure --build=x86_64-apple-darwin CFLAGS=\"-m64 -fast\"\n\n(You can, of course, add whatever other config options you want.  Be\nsure to use the CFLAGS environmental variable given above on the\ncommand line.  Otherwise, the CFLAGS setting that configure generates\nby default will give you compilation problems.)\n\n7.  Build it!  Execute the following:\n\n   > make\n\n\n8.  Check it!  Execute the following:\n\n   > make check\n\n\n9.  Install it.\n\n   > sudo make install\n\n\n\n\nTo Install on a Linux machine.\n-------------------------------------------------------\n1. Download gmp-4.2.1-core2-port.tar.gz\n\n\n2. Uncompress and untar it.  Let's say that it's in the directory\n~/gmp-4.2.1-core2-port\n\n\n3.  Download GMP version 4.2.1\n\n\n4.  Uncompress and untar GMP.  Let's say that it's in the directory\n~/gmp-4.2.1\n\n\n5.  Change into the gmp-4.2.1-core2-port directory and run the install\nscript (if you want to see what it's doing, just read it... it's a\nvery simple script).\n\n    > cd ~/gmp-4.2.1-core2-port\n    > ./install_gmp_4.2.1_core2_patch.sh ~/gmp-4.2.1\n\n\n6.  Configure gmp as normal.\n\n   > cd ~/gmp-4.2.1\n   > ./configure\n\n(You can, of course, add whatever other config options you want.)\n\n\n7.  Build it!  Execute the following:\n\n   > make\n\n\n8.  Check it!  Execute the following:\n\n   > make check\n\n\n9.  Install it.\n\n   > sudo make install\n\n\n\n\n\n\nNOTES:\n\n1. Wow!  The GMP code base is really well organized!  It was very easy\nfor me to find out exactly what files needed changing.  Nice work guys!!\n\n2. In amd64call.asm all I changed was to make the addressing relative to\nthe rip register rather than absolute.  The Apple 64bit ABI doesn't support\nabsolute addressing.  Since Linux can use either addressing mode, it\nmakes sense to use position independent code... it's more portable and\nthere's no real performance difference.\n\n3. In add_n and sub_n I re-wrote the code to accomidate the Woodcrest\nnuances.  Mainly, I unrolled the main loop and I got rid of the \"inc\"\ninstruction (which causes a false dependency on the flag register and\nthus stalls the pipeline).  Of course, this also meant that I had to\nsave the carry flag between loop iterations using the \"lahf\" and\n\"sahf\" instructions.  These instructions are available on the Mac Pro\nusing the Apple assembler, but because some early x86_64 CPUs didn't\nsupport those instructins, the GNU assembler doesn't allow those\nmnemonics on 64bit machines (even when the CPU will support it).  So,\nmy assembly code includes some m4 code which calls the shell script\n\"lahf_sahf_test.sh\" which determines if the lahf and sahf instructions\nare available on the CPU.  If so, then it includes some hand assembled\nbytes to get around GNU as limitations.  Otherwise, it falls back to\nusing \"setc\" and \"bt\" which are slower.\n\n4.  On my 2.66 GHz Mac Pro, I was able to get a GMPbench score of 8263.\n\n5.  You'll notice a Makefile and a bunch of extraneous files.  These are\nused for testing the code outside of the GMP source tree.  The Makefile\nwill produce a file called mpn_test which just runs the routines through\na bunch of speed and correctness tests and compares them against the\noriginal GMP 4.2.1 assembly files.\n\n6.  On Mac OS X I haven't found a nice way yet to build dynamic\nlibraries.  The biggest obstical is that the Apple \"libtool\" and the\nGNU \"libtool\" have incompatible syntax.  My guess is that in the near\nfuture the GNU libtool will support the Apple libtool for creating\ndynamic shared libraries.  For the mean time, I'll be content with\nstatic libraries.  If you find a simple solution please let me know.\n\nJason Worth Martin\nAsst. Prof. of Mathematics\nJames Madison Univ.\nmartinjw@jmu.edu\n"
  },
  {
    "path": "mpn/x86_64/core2/add_n.as",
    "content": "\n;  mpn_add_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\trax=mpn_add_n(mp_ptr rdi ,mp_ptr rsi ,mp_ptr rdx ,mp_size_t rcx)\n;\t(rdi,rcx)=(rsi,rcx)+(rdx,rcx)  return rax=carry\n\n\tGLOBAL_FUNC mpn_add_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tsub     rcx, rax\n\tlea     rdi, [rdi+rcx*8]\n\tlea     rsi, [rsi+rcx*8]\n\tlea     rdx, [rdx+rcx*8]\n\tneg     rcx\n\tcmp     rcx, 0\n\tjz      L_skiplp\n\talign   16\nL_lp:\n\tmov     r8, [rsi+rcx*8]\n\tmov     r10, [rsi+rcx*8+16]\n\tadc     r8, [rdx+rcx*8]\n\tmov     [rdi+rcx*8], r8\n\tmov     r9, [rsi+rcx*8+8]\n\tadc     r9, [rdx+rcx*8+8]\n\tmov     [rdi+rcx*8+8], r9\n\tlea     rcx, [rcx+4]\n\tmov     r11, [rsi+rcx*8-8]\n\tadc     r10, [rdx+rcx*8-16]\n\tadc     r11, [rdx+rcx*8-8]\n\tmov     [rdi+rcx*8-16], r10\n\tmov     [rdi+rcx*8-8], r11\n\tjrcxz   L_exitlp\n\tjmp     L_lp\nL_exitlp:\n\tsbb     rcx, rcx\nL_skiplp:\n\tcmp     rax, 2\n\tja      L_case3\n\tjz      L_case2\n\tjp      L_case1\nL_case0:\n\tsub     rax, rcx\n\tret\n\talign   16\nL_case1:\n\tadd     rcx, rcx\n\tmov     r8, [rsi]\n\tadc     r8, [rdx]\n\tmov     [rdi], r8\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\talign   16\nL_case3:\n\tadd     rcx, rcx\n\tmov     r8, [rsi]\n\tmov     r10, [rsi+16]\n\tadc     r8, [rdx]\n\tmov     [rdi], r8\n\tmov     r9, [rsi+8]\n\tadc     r9, [rdx+8]\n\tmov     [rdi+8], r9\n\tadc     r10, [rdx+16]\n\tmov     [rdi+16], r10\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\talign   16\nL_case2:\n\tadd     rcx, rcx\n\tmov     r8, [rsi]\n\tadc     r8, [rdx]\n\tmov     [rdi], r8\n\tmov     r9, [rsi+8]\n\tadc     r9, [rdx+8]\n\tmov     [rdi+8], r9\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/core2/addadd_n.asm",
    "content": "dnl  mpn_addadd\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_addadd_n)\npush %rbx\nxor %r9,%r9\npush %rbp\nxor %rax,%rax\nmov $3,%r10d\nsub %r8,%r10\nlea -24(%rdi,%r8,8),%rdi\nlea -24(%rsi,%r8,8),%rsi\nlea -24(%rdx,%r8,8),%rdx\nlea -24(%rcx,%r8,8),%rcx\njnc L(skiplp)\n.align 16\nL(lp):     .byte 0x9e      # sahf\t\n\tmov (%rdx,%r10,8),%r8\n\tadc (%rcx,%r10,8),%r8\n\tmov 8(%rdx,%r10,8),%rbx\n\tadc 8(%rcx,%r10,8),%rbx\n\tmov 24(%rdx,%r10,8),%r11\n\tmov 16(%rdx,%r10,8),%rbp\n\tadc 16(%rcx,%r10,8),%rbp\n\tadc 24(%rcx,%r10,8),%r11\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc (%rsi,%r10,8),%r8\n\tadc 8(%rsi,%r10,8),%rbx\n\tmov %r8,(%rdi,%r10,8)\n\tadc 16(%rsi,%r10,8),%rbp\n\tadc 24(%rsi,%r10,8),%r11\n\tsetc %r9b\n\tmov %r11,24(%rdi,%r10,8)\n\tmov %rbp,16(%rdi,%r10,8)\n\tmov %rbx,8(%rdi,%r10,8)\n\tadd $4,%r10\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r10\njg L(case0)\nje L(case1)\njp L(case2)\nL(case3):  .byte 0x9e      # sahf\t\n\tmov (%rdx),%r8\n\tadc (%rcx),%r8\n\tmov 8(%rdx),%rbx\n\tadc 8(%rcx),%rbx\n\tmov 16(%rdx),%rbp\n\tadc 16(%rcx),%rbp\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc (%rsi),%r8\n\tadc 8(%rsi),%rbx\n\tmov %r8,(%rdi)\n\tadc 16(%rsi),%rbp\n\tsetc %r9b\n\tmov %rbp,16(%rdi)\n\tmov %rbx,8(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tadc $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case2):  .byte 0x9e      # sahf\t\n\tmov 8(%rdx),%r8\n\tadc 8(%rcx),%r8\n\tmov 16(%rdx),%rbx\n\tadc 16(%rcx),%rbx\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc 8(%rsi),%r8\n\tadc 16(%rsi),%rbx\n\tmov %r8,8(%rdi)\n\tsetc %r9b\n\tmov %rbx,16(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tadc $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case1):  .byte 0x9e      # sahf\t\n\tmov 16(%rdx),%r8\n\tadc 16(%rcx),%r8\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc 16(%rsi),%r8\n\tmov %r8,16(%rdi)\n\tsetc %r9b\nL(case0):  .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tadc $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/addlsh_n.as",
    "content": "\n;  mpn_addlsh_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tret mpn_addlsh_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t,shift)\n;\trax                 rdi,   rsi,   rdx,      rcx   r8\n\n\tASM_START\n\tGLOBAL_FUNC mpn_addlsh_n\n\tlea     rdi, [rdi+rcx*8-32]\n\tlea     rsi, [rsi+rcx*8-32]\n\tlea     rdx, [rdx+rcx*8-32]\n\tpush    r12\n\tpush    rbx\n\tmov     rbx, 4\n\tsub     rbx, rcx\n\tmov     rcx, 64\n\tsub     rcx, r8\n\tmov     r12, 0\n\tmov     rax, 0\n\tmov     r8, [rdx+rbx*8]\n\tcmp     rbx, 0\n\tjge     L_skiplp\n\talign   16\nL_lp:\n\tmov     r9, [rdx+rbx*8+8]\n\tmov     r10, [rdx+rbx*8+16]\n\tmov     r11, [rdx+rbx*8+24]\n\tshrd    r12, r8, cl\n\tshrd    r8, r9, cl\n\tshrd    r9, r10, cl\n\tshrd    r10, r11, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     r12, r11\n\tmov     [rdi+rbx*8+8], r8\n\tadc     r9, [rsi+rbx*8+16]\n\tadc     r10, [rsi+rbx*8+24]\n\tmov     [rdi+rbx*8+24], r10\n\tmov     [rdi+rbx*8+16], r9\n\tlahf\n\tmov     r8, [rdx+rbx*8+32]\n\tadd     rbx, 4\n\tjnc     L_lp\n\talign   16\nL_skiplp:\n\tcmp     rbx, 2\n\tja      L_case0\n\tje      L_case1\n\tjp      L_case2\nL_case3:\n\tshrd    r12, r8, cl\n\tmov     r9, [rdx+rbx*8+8]\n\tmov     r10, [rdx+rbx*8+16]\n\tmov     r11, [rdx+rbx*8+24]\n\tshrd    r8, r9, cl\n\tshrd    r9, r10, cl\n\tshrd    r10, r11, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     r12, r11\n\tmov     [rdi+rbx*8+8], r8\n\tadc     r9, [rsi+rbx*8+16]\n\tadc     r10, [rsi+rbx*8+24]\n\tmov     [rdi+rbx*8+24], r10\n\tmov     [rdi+rbx*8+16], r9\n\tlahf\n\tshr     r12, cl\n\tsahf\n\tadc     r12, 0\n\tmov     rax, r12\n\tpop     rbx\n\tpop     r12\n\tret\nL_case2:\n\tshrd    r12, r8, cl\n\tmov     r9, [rdx+rbx*8+8]\n\tshrd    r8, r9, cl\n\tmov     r10, [rdx+rbx*8+16]\n\tshrd    r9, r10, cl\n\tshr     r10, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     rax, 0\n\tmov     [rdi+rbx*8+8], r8\n\tadc     r9, [rsi+rbx*8+16]\n\tadc     rax, r10\n\tmov     [rdi+rbx*8+16], r9\n\tpop     rbx\n\tpop     r12\n\tret\nL_case1:\n\tshrd    r12, r8, cl\n\tmov     r9, [rdx+rbx*8+8]\n\tshrd    r8, r9, cl\n\tshr     r9, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     rax, 0\n\tmov     [rdi+rbx*8+8], r8\n\tadc     rax, r9\n\tpop     rbx\n\tpop     r12\n\tret\nL_case0:\n\tshrd    r12, r8, cl\n\tshr     r8, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, 0\n\tmov     rax, r8\n\tpop     rbx\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/core2/addmul_1.asm",
    "content": "dnl  x86-64 mpn_addmul_1 and mpn_submul_1, optimized for \"Core 2\".\n\ndnl  Copyright 2003, 2004, 2005, 2007, 2008, 2009, 2011, 2012 Free Software\ndnl  Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC\t     cycles/limb\nC AMD K8,K9\t 4\nC AMD K10\t 4\nC AMD bd1\t 5.1\nC AMD bobcat\nC Intel P4\t ?\nC Intel core2\t 4.3-4.5 (fluctuating)\nC Intel NHM\t 5.0\nC Intel SBR\t 4.1\nC Intel atom\t ?\nC VIA nano\t 5.25\n\nC INPUT PARAMETERS\ndefine(`rp',\t`%rdi')\ndefine(`up',\t`%rsi')\ndefine(`n',\t`%rdx')\ndefine(`v0',\t`%rcx')\ndefine(`carry_in', `%r8')\n\nMULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c)\n\nASM_START()\n\tTEXT\n\tALIGN(16)\nPROLOGUE(mpn_addmul_1c)\n\tpush\t%rbx\n\tpush\t%rbp\n\tlea\t(%rdx), %rbx\n\tneg\t%rbx\n\n\tmov\t(up), %rax\n\tmov\t(rp), %r10\n\n\tlea\t-16(rp,%rdx,8), rp\n\tlea\t(up,%rdx,8), up\n\tmul\t%rcx\n\tadd\tcarry_in, %rax\n\tadc\t$0, %rdx\n\tjmp\tL(start_nc)\nEPILOGUE()\n\n\tALIGN(16)\nPROLOGUE(mpn_addmul_1)\n\tpush\t%rbx\n\tpush\t%rbp\n\tlea\t(%rdx), %rbx\n\tneg\t%rbx\n\n\tmov\t(up), %rax\n\tmov\t(rp), %r10\n\n\tlea\t-16(rp,%rdx,8), rp\n\tlea\t(up,%rdx,8), up\n\tmul\t%rcx\n\nL(start_nc):\n\tbt\t$0, %ebx\n\tjc\tL(odd)\n\n\tlea\t(%rax), %r11\n\tmov\t8(up,%rbx,8), %rax\n\tlea\t(%rdx), %rbp\n\tmul\t%rcx\n\tadd\t$2, %rbx\n\tjns\tL(ln2)\n\n\tlea\t(%rax), %r8\n\tmov\t(up,%rbx,8), %rax\n\tlea\t(%rdx), %r9\n\tjmp\tL(mid)\n\nL(odd):\tadd\t$1, %rbx\n\tjns\tL(ln1)\n\n\tlea\t(%rax), %r8\n\tmov\t(up,%rbx,8), %rax\n\tlea\t(%rdx), %r9\n\tmul\t%rcx\n\tlea\t(%rax), %r11\n\tmov\t8(up,%rbx,8), %rax\n\tlea\t(%rdx), %rbp\n\tjmp\tL(le)\n\n\tALIGN(16)\nL(top):\tmul\t%rcx\n\tadd\t%r8, %r10\n\tlea\t(%rax), %r8\n\tmov\t(up,%rbx,8), %rax\n\tadc\t%r9, %r11\n\tmov\t%r10, -8(rp,%rbx,8)\n\tmov\t(rp,%rbx,8), %r10\n\tlea\t(%rdx), %r9\n\tadc\t$0, %rbp\nL(mid):\tmul\t%rcx\n\tadd\t%r11, %r10\n\tlea\t(%rax), %r11\n\tmov\t8(up,%rbx,8), %rax\n\tadc\t%rbp, %r8\n\tmov\t%r10, (rp,%rbx,8)\n\tmov\t8(rp,%rbx,8), %r10\n\tlea\t(%rdx), %rbp\n\tadc\t$0, %r9\nL(le):\tadd\t$2, %rbx\n\tjs\tL(top)\n\n\tmul\t%rcx\n\tadd\t%r8, %r10\n\tadc\t%r9, %r11\n\tmov\t%r10, -8(rp)\n\tadc\t$0, %rbp\nL(ln2):\tmov\t(rp), %r10\n\tadd\t%r11, %r10\n\tadc\t%rbp, %rax\n\tmov\t%r10, (rp)\n\tadc\t$0, %rdx\nL(ln1):\tmov\t8(rp), %r10\n\tadd\t%rax, %r10\n\tmov\t%r10, 8(rp)\n\tmov    %ebx, %eax\tC zero rax\n\tadc\t%rdx, %rax\n\tpop\t%rbp\n\tpop\t%rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/addmul_2.as",
    "content": ";  k8 mpn_addmul_2\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include \"yasm_mac.inc\"\n\n\tGLOBAL_FUNC mpn_addmul_2\n\n; (rdi,rdx+1) = (rdi,rdx) + (rsi,rdx)*(rcx,2) return carrylimb\n\npush    rbx\npush    r12\nmov     r8, [rcx+8]\nmov     rcx, [rcx]\nmov     rbx, 4\nsub     rbx, rdx\nlea     rsi, [rsi+rdx*8-32]\nlea     rdi, [rdi+rdx*8-32]\nmov     r10, 0\nmov     rax, [rsi+rbx*8]\nmul     rcx\nmov     r12, rax\nmov     r9, rdx\ncmp     rbx, 0\njge     skiplp\nalign   16\nlp:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     lp\nskiplp:\nmov     rax, [rsi+rbx*8]\nmul     r8\ncmp     rbx, 2\nja      case0\njz      case1\njp      case2\ncase3:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase2:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase1:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase0:\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n\tpop     r12\n\tpop     rbx\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/core2/addsub_n.asm",
    "content": "dnl  mpn_addsub\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_addsub_n)\npush %rbx\nxor %r9,%r9\npush %rbp\nxor %rax,%rax\nmov $3,%r10d\nsub %r8,%r10\nlea -24(%rdi,%r8,8),%rdi\nlea -24(%rsi,%r8,8),%rsi\nlea -24(%rdx,%r8,8),%rdx\nlea -24(%rcx,%r8,8),%rcx\njnc L(skiplp)\n.align 16\nL(lp):     .byte 0x9e      # sahf\t\n\tmov (%rdx,%r10,8),%r8\n\tsbb (%rcx,%r10,8),%r8\n\tmov 8(%rdx,%r10,8),%rbx\n\tsbb 8(%rcx,%r10,8),%rbx\n\tmov 24(%rdx,%r10,8),%r11\n\tmov 16(%rdx,%r10,8),%rbp\n\tsbb 16(%rcx,%r10,8),%rbp\n\tsbb 24(%rcx,%r10,8),%r11\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc (%rsi,%r10,8),%r8\n\tadc 8(%rsi,%r10,8),%rbx\n\tmov %r8,(%rdi,%r10,8)\n\tadc 16(%rsi,%r10,8),%rbp\n\tadc 24(%rsi,%r10,8),%r11\n\tsetc %r9b\n\tmov %r11,24(%rdi,%r10,8)\n\tmov %rbp,16(%rdi,%r10,8)\n\tmov %rbx,8(%rdi,%r10,8)\n\tadd $4,%r10\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r10\njg L(case0)\nje L(case1)\njp L(case2)\nL(case3):  .byte 0x9e      # sahf\t\n\tmov (%rdx),%r8\n\tsbb (%rcx),%r8\n\tmov 8(%rdx),%rbx\n\tsbb 8(%rcx),%rbx\n\tmov 16(%rdx),%rbp\n\tsbb 16(%rcx),%rbp\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc (%rsi),%r8\n\tadc 8(%rsi),%rbx\n\tmov %r8,(%rdi)\n\tadc 16(%rsi),%rbp\n\tsetc %r9b\n\tmov %rbp,16(%rdi)\n\tmov %rbx,8(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tsbb $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case2):  .byte 0x9e      # sahf\t\n\tmov 8(%rdx),%r8\n\tsbb 8(%rcx),%r8\n\tmov 16(%rdx),%rbx\n\tsbb 16(%rcx),%rbx\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc 8(%rsi),%r8\n\tadc 16(%rsi),%rbx\n\tmov %r8,8(%rdi)\n\tsetc %r9b\n\tmov %rbx,16(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tsbb $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case1):  .byte 0x9e      # sahf\t\n\tmov 16(%rdx),%r8\n\tsbb 16(%rcx),%r8\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc 16(%rsi),%r8\n\tmov %r8,16(%rdi)\n\tsetc %r9b\nL(case0):  .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tsbb $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/and_n.as",
    "content": "\n;  core2 mpn_and_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rcx) = (rsi, rcx) & (rdx, rcx)\n\t\n    BITS 64\n    \n   GLOBAL_FUNC mpn_and_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tjz      skiploop\n\talign 8\nloop1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tand     r11, [rdx]\n\tand     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tand     r9, [rdx-16]\n\tand     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tsub     rcx, 1\n\tmov     [rdi-8], r10\n\tjnz     loop1\nskiploop:\n\tcmp     rax, 0\n\tjz      end\n\tmov     r11, [rsi]\n\tand     r11, [rdx]\n\tmov     [rdi], r11\n\tsub     rax, 1\n\tjz      end\n\tmov     r11, [rsi+8]\n\tand     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tsub     rax, 1\n\tjz      end\n\tmov     r11, [rsi+16]\n\tand     r11, [rdx+16]\n\tmov     [rdi+16], r11\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/core2/andn_n.as",
    "content": "\n;  core2 mpn_andn_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_andn_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign 16\nloop1\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tand     r8, [rsi+rcx*8+24]\n\tand     r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\tand     r10, [rsi+rcx*8+8]\n\tand     r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/core2/com_n.as",
    "content": "\n;  core2 mpn_com_n\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_com_n\n\tsub     rdx, 4\n\tjc      next\n\talign 8\nloop1:\n\tmov     rax, [rsi+rdx*8+24]\n\tmov     rcx, [rsi+rdx*8+16]\n\tnot     rax\n\tnot     rcx\n\tmov     [rdi+rdx*8+24], rax\n\tmov     [rdi+rdx*8+16], rcx\n\tmov     rax, [rsi+rdx*8+8]\n\tmov     rcx, [rsi+rdx*8]\n\tnot     rax\n\tnot     rcx\n\tmov     [rdi+rdx*8+8], rax\n\tmov     [rdi+rdx*8], rcx\n\tsub     rdx, 4\n\tjae     loop1\nnext:\n\tadd     rdx, 4\n\tjz      end\n;\tCould still have potential cache-bank conflicts in this tail part\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\n\tsub     rdx, 1\n\tjz      end\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\n\tsub     rdx, 1\n\tjz      end\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/core2/copyd.as",
    "content": "\n;  mpn_copyd\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tmpn_copyd(mp_ptr rdi,mp_ptr rsi,mp_size_t rdx)\n;\t(rdi,rdx)=(rsi,rdx)\n\n\tGLOBAL_FUNC mpn_copyd\n\tlea     rsi, [rsi+rdx*8-8]\n\tlea     rdi, [rdi+rdx*8-8]\n\tsub     rdx, 4\n\tjl      L_skiplp\n\talign   16\nL_lp:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi-8]\n\tmov     r8, [rsi-16]\n\tmov     r9, [rsi-24]\n\tmov     [rdi], rax\n\tmov     [rdi-8], rcx\n\tlea     rdi, [rdi-32]\n\tsub     rdx, 4\n\tmov     [rdi+16], r8\n\tmov     [rdi+8], r9\n\tlea     rsi, [rsi-32]\n\tjns     L_lp\nL_skiplp:\n\tadd     rdx, 2\n\tjz      L_case2\n\tjns     L_case3\n\tjp      L_case1\nL_case0:\n\tret\n\talign   16\nL_case1:\n\tmov     rax, [rsi]\n\tmov     [rdi], rax\n\tret\n\talign   16\nL_case2:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi-8]\n\tmov     [rdi], rax\n\tmov     [rdi-8], rcx\n\tret\n\talign   16\nL_case3:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi-8]\n\tmov     r8, [rsi-16]\n\tmov     [rdi], rax\n\tmov     [rdi-8], rcx\n\tmov     [rdi-16], r8\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/core2/copyi.asm",
    "content": "dnl  mpn_copyi\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_copyi(mp_ptr,mp_ptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_copyi)\nC // for <20 limbs this is slower than core2/copyi for rev 2257\nC // probaly want to tweek it , that should do most of the work\nC //below small loop is not much help\nC //cmp $10,%rdx\nC //jge large\nC //ALIGN(16)\nC //lp:\tmov (%rsi),%rax\nC //\tmov %rax,(%rdi)\nC //\tlea 8(%rsi),%rsi\nC //\tlea 8(%rdi),%rdi\nC //\tsub $1,%rdx\nC //\tjnz lp\nC //\tret\nC // large:\ncmp $0,%rdx\njz L(endfn)\nmov %rdi,%rax\nsub %rsi,%rax\ntest $0xF,%rax\njz L(aligned)\ntest $0xF,%rdi\njz L(srcisodd)\nmov $5,%rcx\nsub %rdx,%rcx\nlea -40(%rsi,%rdx,8),%rsi\nlea -40(%rdi,%rdx,8),%rdi\nmovapd (%rsi,%rcx,8),%xmm1\nmovq %xmm1,(%rdi,%rcx,8)\nadd $8,%rdi\ncmp $1,%rdx\njz L(endfn)\ncmp $0,%rcx\njge L(skiplpud)\nALIGN(16)\nL(lpud):\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tadd $4,%rcx\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,-32(%rdi,%rcx,8)\n\tmovapd 32-32(%rsi,%rcx,8),%xmm1\n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16-32(%rdi,%rcx,8)\n\tjnc L(lpud)\nL(skiplpud):\ncmp $2,%rcx\nja L(case0d)\njz L(case1d)\njp L(case2d)\nALIGN(16)\nL(case3d):\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovapd 32(%rsi,%rcx,8),%xmm1  \n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\nL(case2d):\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovhpd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\nL(case1d):\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tret\nALIGN(16)\nL(case0d):\tmovhpd %xmm1,(%rdi,%rcx,8)\nL(endfn):\tret\nC //////////////////////////\nL(srcisodd):\nmov $4,%rcx\nsub %rdx,%rcx\nlea -32(%rsi,%rdx,8),%rsi\nlea -32(%rdi,%rdx,8),%rdi\n\tmovapd -8(%rsi,%rcx,8),%xmm1\n\tsub $8,%rsi\ncmp $0,%rcx\njge L(skiplpus)\nALIGN(16)\nL(lpus):\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tadd $4,%rcx\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,-32(%rdi,%rcx,8)\n\tmovapd 32-32(%rsi,%rcx,8),%xmm1\n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16-32(%rdi,%rcx,8)\n\tjnc L(lpus)\nL(skiplpus):\ncmp $2,%rcx\nja L(case0s)\njz L(case1s)\njp L(case2s)\nALIGN(16)\nL(case3s):\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovapd 32(%rsi,%rcx,8),%xmm1\n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\nL(case2s): movapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovhpd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\nL(case1s):\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tret\nALIGN(16)\nL(case0s):\tmovhpd %xmm1,(%rdi,%rcx,8)\n\tret\nC //////////////////////////\nALIGN(16)\nL(aligned):\nmov $3,%rcx\nsub %rdx,%rcx\ntest $0xF,%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\njz L(notodda)\n\tmov (%rsi,%rcx,8),%rax\n\tmov %rax,(%rdi,%rcx,8)\n\tadd $1,%rcx\nL(notodda):\ncmp $0,%rcx\njge L(skiplpa)\nALIGN(16)\nL(lpa):\tadd $4,%rcx\n\tmovapd -32(%rsi,%rcx,8),%xmm0\n\tmovapd %xmm0,-32(%rdi,%rcx,8)\n\tmovapd 16-32(%rsi,%rcx,8),%xmm1\n\tmovapd %xmm1,16-32(%rdi,%rcx,8)\n\tjnc L(lpa)\nL(skiplpa):\ncmp $2,%rcx\nja L(casea0)\nje L(casea1)\njp L(casea2)\nL(casea3):\tmovapd (%rsi,%rcx,8),%xmm0\n\tmovapd %xmm0,(%rdi,%rcx,8)\n\tmov 16(%rsi,%rcx,8),%rax\n\tmov %rax,16(%rdi,%rcx,8)\nL(casea0):\tret\nALIGN(16)\nL(casea2):\tmovapd (%rsi,%rcx,8),%xmm0\n\tmovapd %xmm0,(%rdi,%rcx,8)\n\tret\nALIGN(16)\nL(casea1):\tmov (%rsi,%rcx,8),%rax\n\tmov %rax,(%rdi,%rcx,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/divexact_byff.as",
    "content": ";  X86_64 mpn_diveby (B-1)/f   where f=1  special case\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include \"yasm_mac.inc\"\n\n;\t(rdi,rdx)=(rsi,rdx)/rcx where r8=(B-1)/rcx\n;\trax=carry out\n\n;\tspecial case rcx=ffff  r8=1\n\n;\tThe two imul's are only needed if want strict compatibility with\n;\tmpn_divexact_1 when the division is not exact\n\n\tGLOBAL_FUNC mpn_divexact_byff\n\nmov     r10d, 3\nlea     rsi, [rsi+rdx*8-24]\nlea     rdi, [rdi+rdx*8-24]\n; r9 is our carry in\nmov     r9, 0\nmov\tr8, 1\nmov\trcx,0xFFFFFFFFFFFFFFFF\n; imul %r8,%r9 this is needed if we have non-zero carry in\nsub     r10, rdx\njnc     skiploop\nalign 16\nlp:\n\tmov     rax, [rsi+r10*8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8], r9\n\tsbb     r9, rdx\n\tmov     rax, [rsi+r10*8+8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8+8], r9\n\tsbb     r9, rdx\n\tmov     rax, [rsi+r10*8+16]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8+16], r9\n\tsbb     r9, rdx\n\tmov     rax, [rsi+r10*8+24]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8+24], r9\n\tsbb     r9, rdx\n\tadd     r10, 4\n\tjnc     lp\nskiploop:\ntest    r10, 2\njnz     skip\n\tmov     rax, [rsi+r10*8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8], r9\n\tsbb     r9, rdx\n\tmov     rax, [rsi+r10*8+8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8+8], r9\n\tsbb     r9, rdx\n\tadd     r10, 2\nskip:\ntest    r10, 1\njnz     fin\n\tmov     rax, [rsi+r10*8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8], r9\n\tsbb     r9, rdx\nfin:\nimul    r9, rcx\nmov     rax, r9\nneg     rax\nret\nend\n"
  },
  {
    "path": "mpn/x86_64/core2/divrem_hensel_qr_1_2.asm",
    "content": "dnl  X86_64 mpn_divrem_hensel_qr_1_2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=(rsi,rdx) / rcx      rdx>=1\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\nC\tThis is same as the shifting version but with  no shifting\n\nASM_START()\nPROLOGUE(mpn_divrem_hensel_qr_1_2)\nmov $1,%r9\nsub %rdx,%r9\nlea -8(%rdi,%rdx,8),%rdi\nlea -8(%rsi,%rdx,8),%rsi\n\npush %r12\npush %r13\npush %r14\n\nmov %rcx,%rdx\t\nC // rdx is 3 bit inverse\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 4 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC //rdx has 8 bits\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 16 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC // rdx has 32 bits\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\t\nC //r11 has 64 bits\n\nmov %r11,%rax\nmov %r11,%r12\nmul %rcx\nneg %rdx\nimul %rdx,%r12\t\nC // r12,r11 has 128 bits\n\nmov %r11,%r13\nmov %r12,%r14\n\nmov (%rsi,%r9,8),%r11\nmov 8(%rsi,%r9,8),%r12\nmov $0,%r10\nadd $2,%r9\njc L(skiplp)\nALIGN(16)\nL(lp):\n\tmov %r12,%r8\n\tmov %r13,%rax\n\tmul %r11\n\tmov %rax,-16(%rdi,%r9,8)\n\timul %r14,%r11\n\timul %r13,%r12\n\tadd %r11,%rdx\n\tadd %r12,%rdx\n\t\tmov (%rsi,%r9,8),%r11\n\t\tmov 8(%rsi,%r9,8),%r12\n\tmov %rdx,-8(%rdi,%r9,8)\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb $0,%r12\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r12\n\t\tsbb $0,%r10\n\tadd $2,%r9\n\tjnc L(lp)\nL(skiplp):\nmov %r12,%r8\nmov %r13,%rax\t\nmul %r11\nmov %rax,-16(%rdi,%r9,8)\nimul %r14,%r11\t\nimul %r13,%r12\nadd %r11,%rdx\nadd %r12,%rdx\ncmp $0,%r9\njne L(case0)\nL(case1):\n\t\tmov (%rsi,%r9,8),%r11\n\tmov %rdx,-8(%rdi,%r9,8)\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r10\n\tmov %r11,%rax\n\timul %r13,%rax\n\tmov %rax,(%rdi,%r9,8)\n\tmul %rcx\n\tadd %r10,%r10\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nL(case0):\n\tmov %rdx,-8(%rdi,%r9,8)\t\n\tmov %rcx,%rax\t\n\tmul %rdx\t\n\tcmp %rax,%r8\t\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tsub %r10,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/hamdist.asm",
    "content": "dnl  mpn_hamdist\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_hamdist(mp_ptr,mp_ptr,mp_size_t)\nC\trax               rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_hamdist)\npush %r12\npush %r14\npush %rbp\nmov $0x5555555555555555,%r8\nmov $0x3333333333333333,%r9\nmov $0x0f0f0f0f0f0f0f0f,%r10\nmov $0x0101010101010101,%r11\nxor %eax,%eax\nsub $3,%rdx\njc L(skip)\n\tmov 16(%rdi,%rdx,8),%rcx\n\txor 16(%rsi,%rdx,8),%rcx\n\tmov 8(%rdi,%rdx,8),%r12\n\txor 8(%rsi,%rdx,8),%r12\n\tmov (%rdi,%rdx,8),%r14\n\txor (%rsi,%rdx,8),%r14\nsub $3,%rdx\njc L(skiplp)\nALIGN(16)\nL(lp):\tmov %rcx,%rbp\n\tshr $1,%rcx\n\tand %r8,%rcx\n\tsub %rcx,%rbp\n\tmov %rbp,%rcx\n\tshr $2,%rbp\n\tand %r9,%rcx\n\tand %r9,%rbp\n\tadd %rbp,%rcx\n\n\tmov %r12,%rbp\n\tshr $1,%r12\n\tand %r8,%r12\n\tsub %r12,%rbp\n\tmov %rbp,%r12\n\tshr $2,%rbp\n\tand %r9,%r12\n\tand %r9,%rbp\n\tadd %r12,%rbp\n\n\tmov %r14,%r12\n\tshr $1,%r14\n\tand %r8,%r14\n\tsub %r14,%r12\n\tmov %r12,%r14\n\tshr $2,%r12\n\tand %r9,%r14\n\tand %r9,%r12\n\tadd %r14,%r12\n\n\tadd %rcx,%rbp\n\tadd %r12,%rbp\t\n\t\tmov 16(%rdi,%rdx,8),%rcx\n\tmov %rbp,%r14\n\tshr $4,%rbp\n\tand %r10,%r14\n\t\txor 16(%rsi,%rdx,8),%rcx\n\t\tmov 8(%rdi,%rdx,8),%r12\n\t\txor 8(%rsi,%rdx,8),%r12\n\tand %r10,%rbp\n\tadd %rbp,%r14\n\timul %r11,%r14\n\tshr $56,%r14\n\tadd %r14,%rax\n\t\tmov (%rdi,%rdx,8),%r14\n\t\txor (%rsi,%rdx,8),%r14\n\tsub $3,%rdx\n\tjnc L(lp)\nL(skiplp):\n\tmov %rcx,%rbp\n\tshr $1,%rcx\n\tand %r8,%rcx\n\tsub %rcx,%rbp\n\tmov %rbp,%rcx\n\tshr $2,%rbp\n\tand %r9,%rcx\n\tand %r9,%rbp\n\tadd %rbp,%rcx\n\t\n\tmov %r12,%rbp\n\tshr $1,%r12\n\tand %r8,%r12\n\tsub %r12,%rbp\n\tmov %rbp,%r12\n\tshr $2,%rbp\n\tand %r9,%r12\n\tand %r9,%rbp\n\tadd %r12,%rbp\n\t\n\tmov %r14,%r12\n\tshr $1,%r14\n\tand %r8,%r14\n\tsub %r14,%r12\n\tmov %r12,%r14\n\tshr $2,%r12\n\tand %r9,%r14\n\tand %r9,%r12\n\tadd %r14,%r12\n\t\n\tadd %rcx,%rbp\n\tadd %r12,%rbp\t\n\tmov %rbp,%r14\n\tshr $4,%rbp\n\tand %r10,%r14\n\tand %r10,%rbp\n\tadd %rbp,%r14\n\timul %r11,%r14\n\tshr $56,%r14\n\tadd %r14,%rax\nL(skip):\n\tcmp $-2,%rdx\n\tjl L(case0)\n\tjz L(case1)\nL(case2):\n\tmov 16(%rdi,%rdx,8),%rcx\n\txor 16(%rsi,%rdx,8),%rcx\n\tmov %rcx,%rbp\n\tshr $1,%rcx\n\tand %r8,%rcx\n\tsub %rcx,%rbp\n\tmov %rbp,%rcx\n\tshr $2,%rbp\n\tand %r9,%rcx\n\tand %r9,%rbp\n\tadd %rbp,%rcx\n\t\n\tmov %rcx,%r14\n\tshr $4,%rcx\n\tand %r10,%r14\n\tand %r10,%rcx\n\tadd %rcx,%r14\n\timul %r11,%r14\n\tshr $56,%r14\n\tadd %r14,%rax\n\tdec %rdx\nL(case1):\n\tmov 16(%rdi,%rdx,8),%rcx\n\txor 16(%rsi,%rdx,8),%rcx\n\tmov %rcx,%rbp\n\tshr $1,%rcx\n\tand %r8,%rcx\n\tsub %rcx,%rbp\n\tmov %rbp,%rcx\n\tshr $2,%rbp\n\tand %r9,%rcx\n\tand %r9,%rbp\n\tadd %rbp,%rcx\n\t\n\tmov %rcx,%r14\n\tshr $4,%rcx\n\tand %r10,%r14\n\tand %r10,%rcx\n\tadd %rcx,%r14\n\timul %r11,%r14\n\tshr $56,%r14\n\tadd %r14,%rax\nL(case0):\tpop %rbp\n\tpop %r14\n\tpop %r12\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/ior_n.as",
    "content": "\n;  core2 mpn_ior_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rcx) = (rsi, rcx) | (rdx, rcx)\n\t\n    BITS 64\n    \n   GLOBAL_FUNC mpn_ior_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tjz      skiploop\n\talign 8\nloop1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tor      r11, [rdx]\n\tor      r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tor      r9, [rdx-16]\n\tor      r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tsub     rcx, 1\n\tmov     [rdi-8], r10\n\tjnz     loop1\nskiploop:\n\tcmp     rax, 0\n\tjz      end\n\tmov     r11, [rsi]\n\tor      r11, [rdx]\n\tmov     [rdi], r11\n\tsub     rax, 1\n\tjz      end\n\tmov     r11, [rsi+8]\n\tor      r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tsub     rax, 1\n\tjz      end\n\tmov     r11, [rsi+16]\n\tor      r11, [rdx+16]\n\tmov     [rdi+16], r11\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/core2/iorn_n.as",
    "content": "\n;  core2 mpn_iorn_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n   \n   GLOBAL_FUNC mpn_iorn_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign 16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tor      r8, [rsi+rcx*8+24]\n\tor      r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\tor      r10, [rsi+rcx*8+8]\n\tor      r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/core2/karaadd.asm",
    "content": "dnl  mpn_karaadd\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karaadd)\n# requires n>=8\npush %rbx\npush %rbp\npush %r12\npush %r13\npush %r14\npush %r15\npush %rdx\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nL(lp):\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi,%rcx,8),%r8\n\tadc 8(%rsi,%rcx,8),%r9\n\tadc 16(%rsi,%rcx,8),%r10\n\tadc 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\tadc 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc L(lp)\ncmp $2,%rcx\njg\tL(case0)\njz\tL(case1)\njp\tL(case2)\nL(case3):\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi),%r8\n\tadc 8(%rsi),%r9\n\tadc 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp L(fin)\nL(case2):\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rsi),%r8\n\tadc 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp L(fin)\nL(case1):\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tinc %rdx\n\tmov %r12,(%rbp,%rcx,8)\nL(fin):\tmov $3,%rcx\nL(case0): \t#rcx=3\n\tpop %r8\n\tbt $0,%r8\n\tjnc L(notodd)\n\txor %r10,%r10\n\tmov (%rbp,%rdx,8),%r8\n\tmov 8(%rbp,%rdx,8),%r9\n\tadd (%rsi,%rdx,8),%r8\n\tadc 8(%rsi,%rdx,8),%r9\n\trcl $1,%r10\n\tadd %r8,24(%rbp)\n\tadc %r9,32(%rbp)\n\tadc %r10,40(%rbp)\nL(l7):\tadcq $0,24(%rbp,%rcx,8)\n\tinc %rcx\n\tjc L(l7)\n\tmov $3,%rcx\nL(notodd):\txor %r8,%r8\n\tshr $1,%rax\n\tadc %r8,%r8\n\tshr $1,%rax\n\tadc $0,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadc %r8,(%rdi,%rdx,8)\nL(l1):\tadcq $0,8(%rdi,%rdx,8)\n\tinc %rdx\n\tjc L(l1)\n\txor %r8,%r8\n\tshr $1,%rbx\n\tadc %r8,%r8\n\tshr $1,%rbx\n\tadc $0,%r8\n\tshr $1,%rbx\n\tadc $0,%r8\n\tadd %r8,24(%rbp)\nL(l2):\tadcq $0,8(%rbp,%rcx,8)\n\tinc %rcx\n\tjc L(l2)\npop %r15\npop %r14\npop %r13\npop %r12\npop %rbp\npop %rbx\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/karasub.asm",
    "content": "dnl  mpn_karasub\n\ndnl  Copyright 2011,2012 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karasub)\n# requires n>=8\npush %rbx\npush %rbp\npush %r12\npush %r13\npush %r14\npush %r15\npush %rdx\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nL(lp):\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi,%rcx,8),%r8\n\tsbb 8(%rsi,%rcx,8),%r9\n\tsbb 16(%rsi,%rcx,8),%r10\n\tsbb 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\tsbb 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc L(lp)\ncmp $2,%rcx\njg\tL(case0)\njz\tL(case1)\njp\tL(case2)\nL(case3):\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi),%r8\n\tsbb 8(%rsi),%r9\n\tsbb 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp L(fin)\nL(case2):\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 8(%rsi),%r8\n\tsbb 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp L(fin)\nL(case1):\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tadd $1,%rdx\n\tmov %r12,(%rbp,%rcx,8)\nL(fin):\tmov $3,%rcx\nL(case0): \t#rcx=3\n\t#// store L(top) two words of H as carrys could change them\n\tpop %r15\n\tbt $0,%r15\n\tjnc L(skipload)\n\tmov (%rbp,%rdx,8),%r12\n        mov 8(%rbp,%rdx,8),%r13\n\t#// the two carrys from 2nd to 3rd\nL(skipload):\tmov %rdx,%r11\n\txor %r8,%r8\n\tbt $1,%rax\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rdi,%rdx,8)\nL(l2):\tadcq $0,8(%rdi,%rdx,8)\n\tlea 1(%rdx),%rdx\n\tjc L(l2)\n\t# //the two carrys from 3rd to 4th\n\txor %r8,%r8\n\tbt $1,%rbx\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rbp,%rcx,8)\nL(l3):\tadcq $0,8(%rbp,%rcx,8)\n\tlea 1(%rcx),%rcx\n\tjc L(l3)\n\t#// now the borrow from 2nd to 3rd\n\tmov %r11,%rdx\n\tbt $0,%rax\nL(l1):\tsbbq $0,(%rdi,%rdx,8)\n\tlea 1(%rdx),%rdx\n\tjc L(l1)\n\t#// borrow from 3rd to 4th\n\tmov $3,%rcx\n\tbt $0,%rbx\nL(l4):\tsbbq $0,(%rbp,%rcx,8)\n\tlea 1(%rcx),%rcx\n\tjc L(l4)\n\t#// if L(odd) the do next two\n\tmov $3,%rcx\n\tmov %r11,%rdx\n\tbt $0,%r15\n\tjnc L(notodd)\n\txor %r10,%r10\n\tsub (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%r10\n\tadd %r12,24(%rbp)\n\tadc %r13,32(%rbp)\n\tmov $0,%r8\n\tadc %r8,%r8\n\tbt $0,%r10\n\tsbb $0,%r8\nL(l7):\tadd %r8,16(%rbp,%rcx,8)\n\tadc $0,%r8\n\tadd $1,%rcx\n\tsar $1,%r8\n\tjnz L(l7)\nL(notodd):\t\npop %r15\npop %r14\npop %r13\npop %r12\npop %rbp\npop %rbx\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/lshift.asm",
    "content": "dnl  mpn_lshift\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tmpn_lshift(mp_ptr rdi,mp_ptr rsi,mp_size_t rdx,mp_limb_t rcx)\nC\trax=carry\n\nASM_START()\nPROLOGUE(mpn_lshift)\nC\t// odd and even n seem to have different runtimes\npush %rbx\nmov %rdx,%rbx\nlea 24(%rsi),%rsi\nlea 24(%rdi),%rdi\nmov -32(%rsi,%rbx,8),%rdx\nxor %rax,%rax\nshld %cl,%rdx,%rax\nsub $5,%rbx\njs L(skiplp)\nALIGN(16)\nL(lp):\n\tmov (%rsi,%rbx,8),%r8\n\tmov -24(%rsi,%rbx,8),%r11\n\tmov -8(%rsi,%rbx,8),%r9\n\tshld %cl,%r8,%rdx\n\tmov %rdx,8(%rdi,%rbx,8)\n\tmov %r11,%rdx\n\tmov -16(%rsi,%rbx,8),%r10\n\tshld %cl,%r9,%r8\n\tshld %cl,%r10,%r9\n\tmov %r8,(%rdi,%rbx,8)\n\tmov %r9,-8(%rdi,%rbx,8)\n\tshld %cl,%r11,%r10\n\tsub $4,%rbx\n\tmov %r10,16(%rdi,%rbx,8)\n\tjns L(lp)\nL(skiplp):\ncmp $-2,%rbx\nja L(case3)\nje L(case2)\njp L(case1)\nL(case0):\n\tshl %cl,%rdx\n\tmov %rdx,8(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nALIGN(16)\nL(case3):\n\tmov (%rsi,%rbx,8),%r8\n\tmov -8(%rsi,%rbx,8),%r9\n\tshld %cl,%r8,%rdx\n\tmov %rdx,8(%rdi,%rbx,8)\n\tmov -16(%rsi,%rbx,8),%r10\n\tshld %cl,%r9,%r8\n\tshld %cl,%r10,%r9\n\tmov %r8,(%rdi,%rbx,8)\n\tmov %r9,-8(%rdi,%rbx,8)\n\tshl %cl,%r10\n\tmov %r10,16-32(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nALIGN(16)\nL(case2):\n\tmov (%rsi,%rbx,8),%r8\n\tmov -8(%rsi,%rbx,8),%r9\n\tshld %cl,%r8,%rdx\n\tmov %rdx,8(%rdi,%rbx,8)\n\tshld %cl,%r9,%r8\n\tshl %cl,%r9\n\tmov %r8,(%rdi,%rbx,8)\n\tmov %r9,-8(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nALIGN(16)\nL(case1):\n\tmov (%rsi,%rbx,8),%r8\n\tshld %cl,%r8,%rdx\n\tmov %rdx,8(%rdi,%rbx,8)\n\tshl %cl,%r8\n\tmov %r8,(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/mod_1_1.asm",
    "content": "dnl  mpn_mod_1_1\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,2)  contains B^i % divisor\n\n\nASM_START()\nPROLOGUE(mpn_mod_1_1)\npush %r13\nmov -8(%rsi,%rdx,8),%r13\nmov -16(%rsi,%rdx,8),%rax\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov %rdx,%rcx\nsub $2,%rcx\nALIGN(16)\nL(lp):\tmov $0,%r11d\n\tmov -8(%rsi,%rcx,8),%r10\n\tmul %r8\n\tadd %rax,%r10\n\tlea (%r13),%rax\n\tadc %rdx,%r11\n\tlea (%r11),%r13\n\tlea (%r9),%r9\n\tlea (%r8),%r8\n\tmul %r9\n\tadd %r10,%rax\n\tadc %rdx,%r13\n\tsub $1,%rcx\n\tjnz L(lp)\nC // r13,rax\nmov %rax,(%rdi)\nmov %r8,%rax\nmul %r13\nadd %rax,(%rdi)\nadc $0,%rdx\nmov %rdx,8(%rdi)\npop %r13\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/mod_1_2.asm",
    "content": "dnl  mpn_mod_1_2\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,3)  contains B^i % divisor\n\n\nASM_START()\nPROLOGUE(mpn_mod_1_2)\nC // require rdx >=4\npush %r12\npush %r13\npush %r14\nmov -8(%rsi,%rdx,8),%r14\nmov -16(%rsi,%rdx,8),%r13\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov 16(%rcx),%r10\nmov %rdx,%rcx\nsub $6,%rcx\njc L(skiplp)\nALIGN(16)\nL(lp):\n\tmov 16(%rsi,%rcx,8),%r11\n\tmov 16+8(%rsi,%rcx,8),%rax\n\tmul %r8\n\tadd %rax,%r11\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tlea (%r9),%rax\n\tmul %r13\n\tadd %rax,%r11\n\tlea (%r8),%r8\n\tadc %rdx,%r12\n\tmov %r10,%rax\n\tlea (%r11),%r13\n\tmul %r14\n\tadd %rax,%r13\n\tlea (%r12),%r14\n\tadc %rdx,%r14\n\tsub $2,%rcx\n\tjnc L(lp)\nL(skiplp):\t#// Dont need this extra wind down code now as we are not pipelined anymore\n\tmov 16(%rsi,%rcx,8),%r11\n\tmov 16+8(%rsi,%rcx,8),%rax\n\tmul %r8\n\tmov $0,%r12d\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r9,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r10,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov %r12,%r14\n\tadc %rdx,%r14\ncmp $-2,%rcx\nje L(case0)\nL(case1):\n\tmov 8(%rsi,%rcx,8),%r11\n\tmov $0,%r12d\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r9,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov %r12,%r14\n\tadc %rdx,%r14\nL(case0):\t\nmov %r8,%rax\nmul %r14\nadd %rax,%r13\nadc $0,%rdx\nmov %r13,(%rdi)\nmov %rdx,8(%rdi)\npop %r14\npop %r13\npop %r12\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/mod_1_3.asm",
    "content": "dnl  mpn_mod_1_3\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,4)  contains B^i % divisor\n\nASM_START()\nPROLOGUE(mpn_mod_1_3)\nC // require rdx >=5\npush %r12\npush %r13\npush %r14\npush %r15\nmov -8(%rsi,%rdx,8),%r15\nmov -16(%rsi,%rdx,8),%r14\nmov -32(%rsi,%rdx,8),%rax\nmov -40(%rsi,%rdx,8),%r12\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov 16(%rcx),%r10\nmov 24(%rcx),%r11\nmov %rdx,%rcx\nsub $8,%rcx\njc L(skiplp)\nALIGN(16)\nC // r15 r14 -8() -16()=rax -24()=r12\nL(lp):\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 0(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tmov 8(%rsi,%rcx,8),%rax\n\tadc %rdx,%r15\n\tsub $3,%rcx\n\tjnc L(lp)\nL(skiplp):\nC // we have loaded up the next two limbs\nC // but because they are out of order we can have to do 3 limbs min\ncmp $-2,%rcx\njl L(case1)\nje L(case2)\nL(case3):\n\tC //two more limbs is 4 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 8(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tmov 16(%rsi,%rcx,8),%rax\n\tadc %rdx,%r15\n\tC // r15 r14 rax r12\n\tmov $0,%r13\n\tmul %r8\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r9,%rax\n\tmul %r14\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r15\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tC // r13 r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r12\n\tadc $0,%rdx\n\tmov %r12,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nALIGN(16)\nL(case2):\n\tC //two more limbs is 4 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 16(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tadc %rdx,%r15\n\tC // r15 r14 r12\n\tmov $0,%r13\n\tmov %r8,%rax\n\tmul %r14\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r9,%rax\n\tmul %r15\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tC // r13 r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r12\n\tadc $0,%rdx\n\tmov %r12,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nALIGN(16)\nL(case1):\n\tC // one more is 3 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12 \n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov %r13,%r15\n\tadc %rdx,%r15\n\tmov %r8,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tadc $0,%rdx\n\tmov %r14,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/mul_1.asm",
    "content": "dnl  mpn_mul_1\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_mul_1)\nlea -8(%rsi,%rdx,8),%rsi\nlea -8(%rdi,%rdx,8),%rdi\nmov $1,%r8d\nsub %rdx,%r8\nmov (%rsi,%r8,8),%rax\nmov $0,%r9d\njz L(skiplp)\nALIGN(16)\nL(lp):\tmul %rcx\n\tadd %r9,%rax\n\tmov %rax,(%rdi,%r8,8)\n\tmov 8(%rsi,%r8,8),%rax\n\tmov $0,%r9d\n\tadc %rdx,%r9\n\tadd $1,%r8\n\tjnc L(lp)\nL(skiplp):\nmul %rcx\nadd %r9,%rax\nmov %rax,(%rdi,%r8,8)\nmov $0,%eax\nadc %rdx,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/mul_2.as",
    "content": ";  X86_64 mpn_mul_2\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include \"yasm_mac.inc\"\n\n; (rdi,rdx+1)=(rsi,rdx)*(rcx,2) return carrylimb\n\n\tGLOBAL_FUNC mpn_mul_2\npush    rbx\nmov     r8, [rcx]\nmov     rcx, [rcx+8]\nlea     rsi, [rsi+rdx*8-24]\nlea     rdi, [rdi+rdx*8-24]\nmov     rbx, 3\nsub     rbx, rdx\nmov     r10, 0\nmov     rax, [rsi+rbx*8]\nmul     r8\nmov     r11, rax\nmov     r9, rdx\ncmp     rbx, 0\njge     skiplp\nalign 16\nlp:\n\tmov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r10, 0\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r9, rdx\n\tmul     r8\n\tadd     r11, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 3\n\tjnc     lp\nskiplp:\nmov     rax, [rsi+rbx*8]\nmov     [rdi+rbx*8], r11\nmul     rcx\nadd     r9, rax\nadc     r10, rdx\ncmp     rbx, 1\nja      case2\nje      case1\ncase0:\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r9\n\tpop     rbx\n\tret\nalign 16\ncase1:\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n\tpop     rbx\n\tret\nalign 16\ncase2:\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n\tpop     rbx\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/core2/mul_basecase.as",
    "content": "\n;  AMD64 mpn_mul_basecase\n\n;  Copyright 2008,2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n; C\t(rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n; C Version 1.0.7\n\n\n%macro addmul2lp 1\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+8], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-16], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi-8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+8], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi+rbx*8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+24], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+24], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tmov     [rdi], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-16], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi-8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+8], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n\talign   16\n%%1:\n\tmov     r10, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tmov     [rdi+rbx*8], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r10\n\tdb      0x26\n\tadd     r11, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+rbx*8+24]\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r11\n\tdb      0x26\n\tadd     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     rbx, 4\n\tmov     rax, [rsi+rbx*8]\n\tjnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n\tmov     rax, [rsi+8]\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmov     r12d, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r11\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n\tmov     rax, [rsi+16]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     [rdi+24], r10\n\tmov     [rdi+32], r11\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n\tmov     [rdi+24], r9\n\tmov     [rdi+32], r10\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n\tjz      %%2\n\talign   16\n%%1:\n\taddmul2pro%1\n\taddmul2lp %1\n\taddmul2epi%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tmov     r15, [rsp-40]\n\tret\n%endmacro\n\n%macro oldmulnext0 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tmov     [rdi+r11*8+24], rbx\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+32], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+r11*8+40], rdx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+24], r12\n\tmov     [rdi+r11*8+32], rdx\n\tinc     r8\n\tlea     rdi, [rdi+8]\n\tmov     r11, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     [rdi+r11*8+16], r10\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n\tmov     [rdi+r11*8+8], r9\n\tmov     [rdi+r11*8+16], r10\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tmul     r13\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+24]\n\tmov     r12d, 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+32]\n\tmul     r13\n\tadd     [rdi+24], rbx\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r12\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n\tmov     r13, [rcx+r8*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+24]\n\tmul     r13\n\tlea     rdi, [rdi+8]\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     r12d, 0\n\tmov     rax, [rsi+32]\n\tadc     r12, rdx\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+24], r12\n\tadc     rdx, 0\n\tmov     [rdi+32], rdx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n\tmov     r13, [rcx+r8*8]\n\tlea     rdi, [rdi+8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     ebx, 0\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r14*8]\n\tadd     [rdi+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tdb      0x26\n\tlea     rdi, [rdi+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tmov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r9\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n\toldmulnext%1\n\tjz      %%2\n\talign   16\n%%1:\n\toldaddmulpro%1\n\toldaddmulnext%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tret\n%endmacro\n\n\tASM_START\n\tGLOBAL_FUNC mpn_mul_basecase\n; the current mul does not handle case one \n\tcmp     rdx, 4\n\tjg      L_fiveormore\n\tcmp     rdx, 1\n\tje      L_one\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     r14, 5\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-40]\n\tlea     rcx, [rcx+r8*8]\n\tneg     r8\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rcx+r8*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tmov     r10d, 0\n\tmul     r13\n\tmov     [rdi+r11*8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     r11, 2\n\tja      L_oldcase3\n\tjz      L_oldcase2\n\tjp      L_oldcase1\nL_oldcase0:\n\toldmpn_muladdmul_1_int 0\nL_oldcase1:\n\toldmpn_muladdmul_1_int 1\nL_oldcase2:\n\toldmpn_muladdmul_1_int 2\nL_oldcase3:\n\toldmpn_muladdmul_1_int 3\n\talign   16\nL_fiveormore:\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     [rsp-40], r15\n\tmov     r14, 4\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-32]\n\tlea     rsi, [rsi+rdx*8-32]\n\tmov     r13, rcx\n\tmov     r15, r8\n\tlea     r13, [r13+r15*8]\n\tneg     r15\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tbt      r15, 0\n\tjnc     L_even\nL_odd:\n\tinc     rbx\n\tmov     r8, [r13+r15*8]\n\tmul     r8\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     L_mulskiploop\n\tmul1lp \nL_mulskiploop:\n\tmov     r10d, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     rbx, 2\n\tja      L_mul1case3\n\tjz      L_mul1case2\n\tjp      L_mul1case1\nL_mul1case0:\n\tmulnext0\n\tjmp     L_case0\nL_mul1case1:\n\tmulnext1\n\tjmp     L_case3\nL_mul1case2:\n\tmulnext2\n\tjmp     L_case2\nL_mul1case3:\n\tmulnext3\n\tjmp     L_case1\nL_even:\n\t; as all the mul2pro? are the same\n\tmul2pro0\n\tmul2lp \n\tcmp     rbx, 2\n\tja      L_mul2case0\n\tjz      L_mul2case1\n\tjp      L_mul2case2\nL_mul2case3:\n\tmul2epi3\nL_case3:\n\tmpn_addmul_2_int 3\nL_mul2case2:\n\tmul2epi2\nL_case2:\n\tmpn_addmul_2_int 2\nL_mul2case1:\n\tmul2epi1\nL_case1:\n\tmpn_addmul_2_int 1\nL_mul2case0:\n\tmul2epi0\nL_case0:\n\tmpn_addmul_2_int 0\n\talign   16\nL_one:\n\tmov     rax, [rsi]\n\tmul\tqword [rcx]\n\tmov     [rdi], rax\n\tmov     [rdi+8], rdx\n\tret\n\tend\n\n"
  },
  {
    "path": "mpn/x86_64/core2/mullow_n_basecase.asm",
    "content": "dnl  AMD64 mpn_mullo_basecase optimised for Conroe/Wolfdale/Nehalem/Westmere.\n\ndnl  Contributed to the GNU project by Torbjörn Granlund.\n\ndnl  Copyright 2008, 2009, 2011-2013 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of either:\ndnl\ndnl    * the GNU Lesser General Public License as published by the Free\ndnl      Software Foundation; either version 3 of the License, or (at your\ndnl      option) any later version.\ndnl\ndnl  or\ndnl\ndnl    * the GNU General Public License as published by the Free Software\ndnl      Foundation; either version 2 of the License, or (at your option) any\ndnl      later version.\ndnl\ndnl  or both in parallel, as here.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\ndnl  for more details.\ndnl\ndnl  You should have received copies of the GNU General Public License and the\ndnl  GNU Lesser General Public License along with the GNU MP Library.  If not,\ndnl  see https://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC cycles/limb\tmul_2\t\taddmul_2\nC AMD K8,K9\nC AMD K10\nC AMD bull\nC AMD pile\nC AMD steam\nC AMD bobcat\nC AMD jaguar\nC Intel P4\nC Intel core\t 4.0\t\t4.18-4.25\nC Intel NHM\t 3.75\t\t4.06-4.2\nC Intel SBR\nC Intel IBR\nC Intel HWL\nC Intel BWL\nC Intel atom\nC VIA nano\n\nC The inner loops of this code are the result of running a code generation and\nC optimisation tool suite written by David Harvey and Torbjörn Granlund.\n\nC TODO\nC   * Implement proper cor2, replacing current cor0.\nC   * Offset n by 2 in order to avoid the outer loop cmp.  (And sqr_basecase?)\nC   * Micro-optimise.\n\nC When playing with pointers, set this to $2 to fall back to conservative\nC indexing in wind-down code.\ndefine(`I',`$1')\n\ndefine(`rp',       `%rdi')\ndefine(`up',       `%rsi')\ndefine(`vp_param', `%rdx')\ndefine(`n_param',  `%rcx')\ndefine(`n_param8',  `%cl')\n\ndefine(`v0',       `%r10')\ndefine(`v1',       `%r11')\ndefine(`w0',       `%rbx')\ndefine(`w032',       `%ebx')\ndefine(`w1',       `%rcx')\ndefine(`w132',       `%ecx')\ndefine(`w2',       `%rbp')\ndefine(`w232',       `%ebp')\ndefine(`w3',       `%r12')\ndefine(`w332',       `%r12d')\ndefine(`n',        `%r9')\ndefine(`n32',        `%r9d')\ndefine(`n8',        `%r9b')\ndefine(`i',        `%r13')\ndefine(`vp',       `%r8')\n\ndefine(`X0',       `%r14')\ndefine(`X1',       `%r15')\n\nC rax rbx rcx rdx rdi rsi rbp r8 r9 r10 r11 r12 r13 r14 r15\n\ndefine(`ALIGNx', `ALIGN(16)')\n\ndefine(`N', 85)\nifdef(`N',,`define(`N',0)')\ndefine(`MOV', `ifelse(eval(N & $3),0,`mov\t$1, $2',`lea\t($1), $2')')\n\nASM_START()\n\tTEXT\n\tALIGN(32)\nPROLOGUE(mpn_mullow_n_basecase)\n\n\tmov\t(up), %rax\n\tmov\tvp_param, vp\n\n\tcmp\t$4, n_param\n\tjb\tL(lsmall)\n\n\tmov\t(vp_param), v0\n\tpush\t%rbx\n\tlea\t(rp,n_param,8), rp\tC point rp at R[un]\n\tpush\t%rbp\n\tlea\t(up,n_param,8), up\tC point up right after U's end\n\tpush\t%r12\n\tmov\t$0, n32\t\tC FIXME\n\tsub\tn_param, n\n\tpush\t%r13\n\tmul\tv0\n\tmov\t8(vp), v1\n\n\ttest\t$1, n_param8\n\tjnz\tL(lm2x1)\n\nL(lm2x0):test\t$2, n_param8\n\tjnz\tL(lm2b2)\n\nL(lm2b0):lea\t(n), i\n\tmov\t%rax, (rp,n,8)\n\tmov\t%rdx, w1\n\tmov\t(up,n,8), %rax\n\txor\tw232, w232\n\tjmp\tL(lm2e0)\n\nL(lm2b2):lea\t-2(n), i\n\tmov\t%rax, w2\n\tmov\t(up,n,8), %rax\n\tmov\t%rdx, w3\n\txor\tw032, w032\n\tjmp\tL(lm2e2)\n\nL(lm2x1):test\t$2, n_param8\n\tjnz\tL(lm2b3)\n\nL(lm2b1):lea\t1(n), i\n\tmov\t%rax, (rp,n,8)\n\tmov\t(up,n,8), %rax\n\tmov\t%rdx, w0\n\txor\tw132, w132\n\tjmp\tL(lm2e1)\n\nL(lm2b3):lea\t-1(n), i\n\txor\tw332, w332\n\tmov\t%rax, w1\n\tmov\t%rdx, w2\n\tmov\t(up,n,8), %rax\n\tjmp\tL(lm2e3)\n\n\tALIGNx\nL(lm2tp):mul\tv0\n\tadd\t%rax, w3\n\tmov\t-8(up,i,8), %rax\n\tmov\tw3, -8(rp,i,8)\n\tadc\t%rdx, w0\n\tadc\t$0, w132\nL(lm2e1):mul\tv1\n\tadd\t%rax, w0\n\tadc\t%rdx, w1\n\tmov\t$0, w232\n\tmov\t(up,i,8), %rax\n\tmul\tv0\n\tadd\t%rax, w0\n\tmov\tw0, (rp,i,8)\n\tadc\t%rdx, w1\n\tmov\t(up,i,8), %rax\n\tadc\t$0, w232\nL(lm2e0):mul\tv1\n\tadd\t%rax, w1\n\tadc\t%rdx, w2\n\tmov\t8(up,i,8), %rax\n\tmul\tv0\n\tmov\t$0, w332\n\tadd\t%rax, w1\n\tadc\t%rdx, w2\n\tadc\t$0, w332\n\tmov\t8(up,i,8), %rax\nL(lm2e3):mul\tv1\n\tadd\t%rax, w2\n\tmov\tw1, 8(rp,i,8)\n\tadc\t%rdx, w3\n\tmov\t$0, w032\n\tmov\t16(up,i,8), %rax\n\tmul\tv0\n\tadd\t%rax, w2\n\tmov\t16(up,i,8), %rax\n\tadc\t%rdx, w3\n\tadc\t$0, w032\nL(lm2e2):mul\tv1\n\tmov\t$0, w132\t\tC FIXME: dead in last iteration\n\tadd\t%rax, w3\n\tmov\t24(up,i,8), %rax\n\tmov\tw2, 16(rp,i,8)\n\tadc\t%rdx, w0\t\tC FIXME: dead in last iteration\n\tadd\t$4, i\n\tjs\tL(lm2tp)\n\nL(lm2ed):imul\tv0, %rax\n\tadd\tw3, %rax\n\tmov\t%rax, I(-8(rp),-8(rp,i,8))\n\n\tadd\t$2, n\n\tlea\t16(vp), vp\n\tlea\t-16(up), up\n\tcmp\t$-2, n\n\tjge\tL(lcor1)\n\n\tpush\t%r14\n\tpush\t%r15\n\nL(louter):\n\tmov\t(vp), v0\n\tmov\t8(vp), v1\n\tmov\t(up,n,8), %rax\n\tmul\tv0\n\ttest\t$1, n8\n\tjnz\tL(la1x1)\n\nL(la1x0):mov\t%rax, X1\n\tMOV(\t%rdx, X0, 8)\n\tmov\t(up,n,8), %rax\n\tmul\tv1\n\ttest\t$2, n8\n\tjnz\tL(la110)\n\nL(la100):lea\t(n), i\n\tmov\t(rp,n,8), w3\n\tmov\t%rax, w0\n\tMOV(\t%rdx, w1, 16)\n\tjmp\tL(llo0)\n\nL(la110):lea\t2(n), i\n\tmov\t(rp,n,8), w1\n\tmov\t%rax, w2\n\tmov\t8(up,n,8), %rax\n\tMOV(\t%rdx, w3, 1)\n\tjmp\tL(llo2)\n\nL(la1x1):mov\t%rax, X0\n\tMOV(\t%rdx, X1, 2)\n\tmov\t(up,n,8), %rax\n\tmul\tv1\n\ttest\t$2, n8\n\tjz\tL(la111)\n\nL(la101):lea\t1(n), i\n\tMOV(\t%rdx, w0, 4)\n\tmov\t(rp,n,8), w2\n\tmov\t%rax, w3\n\tjmp\tL(llo1)\n\nL(la111):lea\t-1(n), i\n\tMOV(\t%rdx, w2, 64)\n\tmov\t%rax, w1\n\tmov\t(rp,n,8), w0\n\tmov\t8(up,n,8), %rax\n\tjmp\tL(llo3)\n\n\tALIGNx\nL(ltop):\tmul\tv1\n\tadd\tw0, w1\n\tadc\t%rax, w2\n\tmov\t-8(up,i,8), %rax\n\tMOV(\t%rdx, w3, 1)\n\tadc\t$0, w3\nL(llo2):\tmul\tv0\n\tadd\tw1, X1\n\tmov\tX1, -16(rp,i,8)\n\tadc\t%rax, X0\n\tMOV(\t%rdx, X1, 2)\n\tadc\t$0, X1\n\tmov\t-8(up,i,8), %rax\n\tmul\tv1\n\tMOV(\t%rdx, w0, 4)\n\tmov\t-8(rp,i,8), w1\n\tadd\tw1, w2\n\tadc\t%rax, w3\n\tadc\t$0, w0\nL(llo1):\tmov\t(up,i,8), %rax\n\tmul\tv0\n\tadd\tw2, X0\n\tadc\t%rax, X1\n\tmov\tX0, -8(rp,i,8)\n\tMOV(\t%rdx, X0, 8)\n\tadc\t$0, X0\n\tmov\t(up,i,8), %rax\n\tmov\t(rp,i,8), w2\n\tmul\tv1\n\tadd\tw2, w3\n\tadc\t%rax, w0\n\tMOV(\t%rdx, w1, 16)\n\tadc\t$0, w1\nL(llo0):\tmov\t8(up,i,8), %rax\n\tmul\tv0\n\tadd\tw3, X1\n\tmov\tX1, (rp,i,8)\n\tadc\t%rax, X0\n\tMOV(\t%rdx, X1, 32)\n\tmov\t8(rp,i,8), w3\n\tadc\t$0, X1\n\tmov\t8(up,i,8), %rax\n\tmul\tv1\n\tadd\tw3, w0\n\tMOV(\t%rdx, w2, 64)\n\tadc\t%rax, w1\n\tmov\t16(up,i,8), %rax\n\tadc\t$0, w2\nL(llo3):\tmul\tv0\n\tadd\tw0, X0\n\tmov\tX0, 8(rp,i,8)\n\tMOV(\t%rdx, X0, 128)\n\tadc\t%rax, X1\n\tmov\t16(up,i,8), %rax\n\tmov\t16(rp,i,8), w0\n\tadc\t$0, X0\n\tadd\t$4, i\n\tjnc\tL(ltop)\n\nL(lend):\timul\tv1, %rax\n\tadd\tw0, w1\n\tadc\t%rax, w2\n\tmov\tI(-8(up),-8(up,i,8)), %rax\n\timul\tv0, %rax\n\tadd\tw1, X1\n\tmov\tX1, I(-16(rp),-16(rp,i,8))\n\tadc\tX0, %rax\n\tmov\tI(-8(rp),-8(rp,i,8)), w1\n\tadd\tw1, w2\n\tadd\tw2, %rax\n\tmov\t%rax, I(-8(rp),-8(rp,i,8))\n\n\tadd\t$2, n\n\tlea\t16(vp), vp\n\tlea\t-16(up), up\n\tcmp\t$-2, n\n\tjl\tL(louter)\n\n\tpop\t%r15\n\tpop\t%r14\n\n\tjnz\tL(lcor0)\n\nL(lcor1):mov\t(vp), v0\n\tmov\t8(vp), v1\n\tmov\t-16(up), %rax\n\tmul\tv0\t\t\tC u0 x v2\n\tadd\t-16(rp), %rax\t\tC FIXME: rp[0] still available in reg?\n\tadc\t-8(rp), %rdx\t\tC FIXME: rp[1] still available in reg?\n\tmov\t-8(up), %rbx\n\timul\tv0, %rbx\n\tmov\t-16(up), %rcx\n\timul\tv1, %rcx\n\tmov\t%rax, -16(rp)\n\tadd\t%rbx, %rcx\n\tadd\t%rdx, %rcx\n\tmov\t%rcx, -8(rp)\n\tpop\t%r13\n\tpop\t%r12\n\tpop\t%rbp\n\tpop\t%rbx\n\tret\n\nL(lcor0):mov\t(vp), %r11\n\timul\t-8(up), %r11\n\tadd\t%rax, %r11\n\tmov\t%r11, -8(rp)\n\tpop\t%r13\n\tpop\t%r12\n\tpop\t%rbp\n\tpop\t%rbx\n\tret\n\n\tALIGN(16)\nL(lsmall):\n\tcmp\t$2, n_param\n\tjae\tL(lgt1)\nL(ln1):\timul\t(vp_param), %rax\n\tmov\t%rax, (rp)\n\tret\nL(lgt1):\tja\tL(lgt2)\nL(ln2):\tmov\t(vp_param), %r9\n\tmul\t%r9\n\tmov\t%rax, (rp)\n\tmov\t8(up), %rax\n\timul\t%r9, %rax\n\tadd\t%rax, %rdx\n\tmov\t8(vp), %r9\n\tmov\t(up), %rcx\n\timul\t%r9, %rcx\n\tadd\t%rcx, %rdx\n\tmov\t%rdx, 8(rp)\n\tret\nL(lgt2):\nL(ln3):\tmov\t(vp_param), %r9\n\tmul\t%r9\t\tC u0 x v0\n\tmov\t%rax, (rp)\n\tmov\t%rdx, %r10\n\tmov\t8(up), %rax\n\tmul\t%r9\t\tC u1 x v0\n\timul\t16(up), %r9\tC u2 x v0\n\tadd\t%rax, %r10\n\tadc\t%rdx, %r9\n\tmov\t8(vp), %r11\n\tmov\t(up), %rax\n\tmul\t%r11\t\tC u0 x v1\n\tadd\t%rax, %r10\n\tadc\t%rdx, %r9\n\timul\t8(up), %r11\tC u1 x v1\n\tadd\t%r11, %r9\n\tmov\t%r10, 8(rp)\n\tmov\t16(vp), %r10\n\tmov\t(up), %rax\n\timul\t%rax, %r10\tC u0 x v2\n\tadd\t%r10, %r9\n\tmov\t%r9, 16(rp)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/nand_n.as",
    "content": "\n;  core2 mpn_nand_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_nand_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign 16\nloop1:\n\tmov     r8, [rsi+rcx*8+24]\n\tmov     r9, [rsi+rcx*8+16]\n\tand     r8, [rdx+rcx*8+24]\n\tand     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rsi+rcx*8+8]\n\tmov     r11, [rsi+rcx*8]\n\tand     r10, [rdx+rcx*8+8]\n\tand     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rsi+rcx*8-8]\n\tand     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rsi+rcx*8-8]\n\tand     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rsi+rcx*8-8]\n\tand     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/core2/nior_n.as",
    "content": "\n;  core2 mpn_nior_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n    BITS 64\n    \n   GLOBAL_FUNC mpn_nior_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign 16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tor      r8, [rsi+rcx*8+24]\n\tor      r9, [rsi+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tor      r10, [rsi+rcx*8+8]\n\tor      r11, [rsi+rcx*8]\n\tnot     r10\n\tnot     r11\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, [rsi+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, [rsi+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, [rsi+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/core2/penryn/mod_1_1.asm",
    "content": "dnl  mpn_mod_1_1\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,2)  contains B^i % divisor\n\n\nASM_START()\nPROLOGUE(mpn_mod_1_1)\npush %r13\nmov -8(%rsi,%rdx,8),%r13\nmov -16(%rsi,%rdx,8),%rax\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov %rdx,%rcx\nsub $2,%rcx\nALIGN(16)\nL(lp):\tlea (%r8),%r8\n\tmov $0,%r11d\n\tmul %r8\n\tmov -8(%rsi,%rcx,8),%r10\n\tadd %rax,%r10\n\tlea (%r13),%rax\n\tadc %rdx,%r11\n\tlea (%r9),%r9\n\tlea (%r11),%r13\n\tmul %r9\n\tadd %r10,%rax\n\tadc %rdx,%r13\n\tsub $1,%rcx\n\tjnz L(lp)\nC // r13,rax\nmov %rax,(%rdi)\nmov %r8,%rax\nmul %r13\nadd %rax,(%rdi)\nadc $0,%rdx\nmov %rdx,8(%rdi)\npop %r13\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/penryn/sumdiff_n.asm",
    "content": "dnl  mpn_sumdiff\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_sumdiff_n)\npush %rbx\nxor %r9,%r9\npush %rbp\nxor %rax,%rax\npush %r12\nmov $3,%r10d\npush %r13\nlea -24(%rdi,%r8,8),%rdi\npush %r14\nlea -24(%rsi,%r8,8),%rsi\npush %r15\nsub %r8,%r10\nlea -24(%rdx,%r8,8),%rdx\nlea -24(%rcx,%r8,8),%rcx\njnc L(skiplp)\n.align 16\nL(lp):     .byte 0x9e      # sahf\t\n\tmov (%rdx,%r10,8),%r8\n\tmov %r8,%r11\n\tadc (%rcx,%r10,8),%r8\n\tmov 8(%rdx,%r10,8),%rbx\n\tmov %rbx,%r13\n\tadc 8(%rcx,%r10,8),%rbx\n\tmov 16(%rdx,%r10,8),%rbp\n\tmov 24(%rdx,%r10,8),%r12\n\tmov %rbp,%r14\n\tmov %r12,%r15\n\tadc 16(%rcx,%r10,8),%rbp\n\tadc 24(%rcx,%r10,8),%r12\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb (%rcx,%r10,8),%r11\n\tmov %r8,(%rdi,%r10,8)\n\tsbb 8(%rcx,%r10,8),%r13\n\tsbb 16(%rcx,%r10,8),%r14\n\tmov %rbx,8(%rdi,%r10,8)\n\tsbb 24(%rcx,%r10,8),%r15\n\tmov %rbp,16(%rdi,%r10,8)\n\tmov %r12,24(%rdi,%r10,8)\n\tmov %r13,8(%rsi,%r10,8)\n\tsetc %r9b\n\tadd $4,%r10\n\tmov %r14,16-32(%rsi,%r10,8)\n\tmov %r15,24-32(%rsi,%r10,8)\n\tmov %r11,-32(%rsi,%r10,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r10\njg L(case0)\nje L(case1)\njp L(case2)\nL(case3):  .byte 0x9e      # sahf\t\n\tmov (%rdx),%r8\n\tmov %r8,%r11\n\tadc (%rcx),%r8\n\tmov 8(%rdx),%rbx\n\tmov %rbx,%r13\n\tadc 8(%rcx),%rbx\n\tmov 16(%rdx),%rbp\n\tmov %rbp,%r14\n\tadc 16(%rcx),%rbp\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb (%rcx),%r11\n\tmov %r8,(%rdi)\n\tsbb 8(%rcx),%r13\n\tsbb 16(%rcx),%r14\n\tmov %rbx,8(%rdi)\n\tmov %rbp,16(%rdi)\n\tmov %r13,8(%rsi)\n\tsetc %r9b\n\tmov %r14,16(%rsi)\n\tmov %r11,(%rsi)\n\t.byte 0x9e      # sahf\t\n\tmov $0,%rax\n\tadc $0,%rax\n\tadd $255,%r9b\n\trcl $1,%rax\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case2):  .byte 0x9e      # sahf\t\n\tmov 8(%rdx),%r8\n\tmov %r8,%r11\n\tadc 8(%rcx),%r8\n\tmov 8+8(%rdx),%rbx\n\tmov %rbx,%r13\n\tadc 8+8(%rcx),%rbx\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb 8(%rcx),%r11\n\tmov %r8,8(%rdi)\n\tsbb 8+8(%rcx),%r13\n\tmov %rbx,8+8(%rdi)\n\tmov %r13,8+8(%rsi)\n\tsetc %r9b\n\tmov %r11,8(%rsi)\n\t.byte 0x9e      # sahf\t\n\tmov $0,%rax\n\tadc $0,%rax\n\tadd $255,%r9b\n\trcl $1,%rax\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case1):  .byte 0x9e      # sahf\t\n\tmov 16(%rdx),%r8\n\tmov %r8,%r11\n\tadc 16(%rcx),%r8\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb 16(%rcx),%r11\n\tmov %r8,16(%rdi)\n\tsetc %r9b\n\tmov %r11,16(%rsi)\nL(case0):  .byte 0x9e      # sahf\t\n\tmov $0,%rax\n\tadc $0,%rax\n\tadd $255,%r9b\n\trcl $1,%rax\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tpop %rbp\n\tpop %rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/popcount.asm",
    "content": "dnl  mpn_popcount\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_popcount(mp_ptr,mp_size_t)\nC\trax               rdi,   rsi\n\ndefine(`MOVQ',`movd')\n\nASM_START()\nPROLOGUE(mpn_popcount)\n# could store these constants in mem and retune to get the same speed\nmov $0x5555555555555555,%rax\nMOVQ %rax,%xmm4\nmovddup %xmm4,%xmm4\nmov $0x3333333333333333,%rax\nMOVQ %rax,%xmm5\nmovddup %xmm5,%xmm5\nmov $0x0f0f0f0f0f0f0f0f,%rax\nMOVQ %rax,%xmm6\nmovddup %xmm6,%xmm6\npxor %xmm7,%xmm7\npxor %xmm11,%xmm11\npxor %xmm8,%xmm8\n# this takes care of an odd address by padding with zeros\nbtr $3,%rdi\t\t# rdi is even\nsbb %rax,%rax\t\t# rax =-1 if was L(odd)\nsub %rax,%rsi\nMOVQ %rax,%xmm0\npandn (%rdi),%xmm0\t# first load padded with zero\n# this takes care of odd number of digits by padding with zeros\nbt $0,%rsi\nsbb %rcx,%rcx\nsub %rcx,%rsi\t\t# len is even\nMOVQ %rcx,%xmm2\nshufpd $1,%xmm2,%xmm2\t# swap high/low halfs\npandn -16(%rdi,%rsi,8),%xmm2\t# last load padded with zero\n# so we have an even addr and an even number of digits \n# and we have loaded up first 2 and last 2 digits\n# by chance the general code handles all cases correctly except for\n# n=0,1, 2evenaddr\ncmp $2,%rsi\njne L(big)\n\t# so just pad out with zeros\n\tadd $2,%rsi\n\tMOVQ %rax,%xmm1\n\tmovddup %xmm1,%xmm1\n\tpand %xmm1,%xmm0\n\tpandn %xmm2,%xmm1\n\tmovdqa %xmm1,%xmm2\nL(big):\nmovdqa %xmm0,%xmm1\nmovdqa %xmm2,%xmm3\nsub $8,%rsi\njc L(skiplp)\nALIGN(16)\nL(lp):\n\tpsrlw $1,%xmm0\n\tpand %xmm4,%xmm0\n\tpsubb %xmm0,%xmm1\n\t\tpsrlw $1,%xmm2\n\tmovdqa %xmm1,%xmm0\n\t\t\t\tpaddq %xmm8,%xmm11\n\tpsrlw $2,%xmm1\n\tpand %xmm5,%xmm0\n\tpand %xmm5,%xmm1\n\tpaddb %xmm0,%xmm1\n\t\tpand %xmm4,%xmm2\n\t\t\t\tsub $4,%rsi\n\t\tpsubb %xmm2,%xmm3\n\t\tmovdqa %xmm3,%xmm2\n\t\tpsrlw $2,%xmm3\n\t\tpand %xmm5,%xmm2\n\t\tpand %xmm5,%xmm3\n\t\tpaddb %xmm2,%xmm3\n\t\t\t\tmovdqa 32-32+64(%rdi,%rsi,8),%xmm0\n\tpaddb %xmm1,%xmm3\n\tmovdqa %xmm3,%xmm8\n\tpsrlw $4,%xmm3\n\tpand %xmm6,%xmm3\n\t\t\t\tmovdqa 32-48+64(%rdi,%rsi,8),%xmm2\n\tpand %xmm6,%xmm8\n\t\t\t\tmovdqa 32-32+64(%rdi,%rsi,8),%xmm1\n\tpaddb %xmm3,%xmm8\n\t\t\t\tmovdqa 32-48+64(%rdi,%rsi,8),%xmm3\n\tpsadbw %xmm7,%xmm8\n\tjnc L(lp)\nL(skiplp):\n\tpsrlw $1,%xmm0\n\tpand %xmm4,%xmm0\n\tpsubb %xmm0,%xmm1\n\t\tpsrlw $1,%xmm2\n\tmovdqa %xmm1,%xmm0\n\t\t\t\tpaddq %xmm8,%xmm11\n\tpsrlw $2,%xmm1\n\tpand %xmm5,%xmm0\n\tpand %xmm5,%xmm1\n\tpaddb %xmm0,%xmm1\n\t\tpand %xmm4,%xmm2\n\t\tpsubb %xmm2,%xmm3\n\t\tmovdqa %xmm3,%xmm2\n\t\tpsrlw $2,%xmm3\n\t\tpand %xmm5,%xmm2\n\t\tpand %xmm5,%xmm3\n\t\tpaddb %xmm2,%xmm3\n\tpaddb %xmm1,%xmm3\n\tmovdqa %xmm3,%xmm8\n\tpsrlw $4,%xmm3\n\tpand %xmm6,%xmm3\n\tpand %xmm6,%xmm8\n\tpaddb %xmm3,%xmm8\n\tpsadbw %xmm7,%xmm8\t\ncmp $-3,%rsi\njl L(nomore)\nL(onemore):\n\tmovdqa -32+64(%rdi,%rsi,8),%xmm2\n\tmovdqa %xmm2,%xmm3\n\t\tpsrlw $1,%xmm2\n\t\t\t\tpaddq %xmm8,%xmm11\n\t\tpand %xmm4,%xmm2\n\t\tpsubb %xmm2,%xmm3\n\t\tmovdqa %xmm3,%xmm2\n\t\tpsrlw $2,%xmm3\n\t\tpand %xmm5,%xmm2\n\t\tpand %xmm5,%xmm3\n\t\tpaddb %xmm2,%xmm3\n\tmovdqa %xmm3,%xmm8\n\tpsrlw $4,%xmm3\n\tpand %xmm6,%xmm3\n\tpand %xmm6,%xmm8\n\tpaddb %xmm3,%xmm8\n\tpsadbw %xmm7,%xmm8\nL(nomore):\n\tpaddq %xmm8,%xmm11\nMOVQ %xmm11,%rax\nshufpd $1,%xmm11,%xmm11\nMOVQ %xmm11,%rcx\nadd %rcx,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/redc_1.as",
    "content": "\n;  core2 mpn_redc_1\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rcx) = (rsi, rcx) + (rdx, rcx)   with the carry flag set for the carry\n;\tthis is the usual mpn_add_n with the final dec rax;adc rax,rax;ret  removed \n;\tand a jump where we have two rets\n\n%macro mpn_add 0\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tadd     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tjmp     %%2\n\talign 16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tadc     r11, [rdx]\n\tadc     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tadc     r9, [rdx-16]\n\tadc     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tadc     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n%%2:\n%endmacro\n\n;\t(rbx, rbp) = (rsi, rbp) - (rdx, rbp)\n%macro mpn_sub 0\n\tmov     rax, rbp\n\tand     rax, 3\n\tshr     rbp, 2\n\tcmp     rbp, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tsub     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n\tjmp     %%2\n\talign 16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tsbb     r11, [rdx]\n\tsbb     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rbx], r11\n\tmov     [rbx+8], r8\n\tlea     rbx, [rbx+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tsbb     r9, [rdx-16]\n\tsbb     r10, [rdx-8]\n\tmov     [rbx-16], r9\n\tdec     rbp\n\tmov     [rbx-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tsbb     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n%%2:\n%endmacro\n\n;\tchanges from standard addmul\n;\tchange  r8 to r12   and rcx to r13   and rdi to r8\n;\treemove ret and write last limb but to beginning\n%macro addmulloop 1\n\talign 16\n%%1:\n\tmov     r10, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tdb      0x26\n\tadc     rbx, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tdb      0x26\n\tadc     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     r11, 4\n\tmov     rax, [rsi+r11*8+8]\n\tjnc     %%1\n%endmacro\n\n%macro addmulpropro0 0\n\timul    r13, rcx\n\tlea     r8, [r8-8]\n%endmacro\n\n%macro addmulpro0 0\n\tmov     r11, r14\n\tlea     r8, [r8+8]\n\tmov     rax, [rsi+r14*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmov     r9d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, rax\n\tadc     r9, rdx\n\timul    r13, rcx\n\tadd     [r8+r11*8+32], r12\n\tadc     r9, 0\n\tsub     r15, 1\n\tmov     [r8+r14*8], r9\n%endmacro\n\n%macro addmulpropro1 0\n%endmacro\n\n%macro addmulpro1 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, 0\n\tsub     r15, 1\n\tmov     [r8+r14*8], r12\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro2 0\n%endmacro\n\n%macro addmulpro2 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext2 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     r13, [r8+r14*8+8]\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [r8+r14*8], rbx\n\tsub     r15, 1\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro3 0\n%endmacro\n\n%macro addmulpro3 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext3 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, 0\n\tmov     r13, [r8+r14*8+8]\n\tmov     [r8+r14*8], r10\n\tlea     r8, [r8+8]\n\tsub     r15, 1\n%endmacro\n\n;\tchange r8 to r12\n;\twrite top limb ax straight to mem dont return  (NOTE we WRITE NOT ADD)\n%macro mpn_addmul_1_int 1\n\taddmulpropro%1\n\talign 16\n%%1:\n\taddmulpro%1\n\tjge     %%2\n\taddmulloop %1\n%%2:\n\taddmulnext%1\n\tjnz     %%1\n\tjmp     end\n%endmacro\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_redc_1\n\tmov r9,r8\n\tmov r8,rsi\n\tmov rsi,rdx\n\tmov rdx,rcx\n\tmov rcx,r9\n\n\tcmp     rdx, 1\n\tje      one\n\tpush    r13\n\tpush    r14\n\tpush    rbx\n\tpush    r12\n\tpush    r15\n\tpush    rbp\n\tmov     r14, 5\n\tsub     r14, rdx\n;\tstore copys\n\tpush    rsi\n\tpush    r8\n\tlea     r8, [r8+rdx*8-40]\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rbp, rdx\n\tmov     r15, rdx\n\tmov     rax, r14\n\tand     rax, 3\n\tmov     r13, [r8+r14*8]\n\tje      case0\n\tjp      case3\n\tcmp     rax, 1\n\tje      case1\ncase2:\n\tmpn_addmul_1_int 2\n\talign 16\ncase0:\n\tmpn_addmul_1_int 0\n\talign 16\ncase1:\n\tmpn_addmul_1_int 1\n\talign 16\ncase3:\n\tmpn_addmul_1_int 3\n\talign 16\nend:\n\tmov     rcx, rbp\n\tpop     rdx\n\tlea     rsi, [rdx+rbp*8]\n\tmov     rbx, rdi\n\tmpn_add\n;\tmpnadd(rdi,rsi,rdx,rcx)\n\tpop     rdx\n\tjnc     skip\n\tmov     rsi, rbx\n\tmpn_sub\n;\tmpn_sub_n(rbx,rsi,rdx,rbp) we can certainly improve this sub\nskip:\n\tpop     rbp\n\tpop     r15\n\tpop     r12\n\tpop     rbx\n\tpop     r14\n\tpop     r13\n\tret\n\talign 16\none:\n\tmov     r9, [r8]\n\tmov     r11, [rsi]\n\timul    rcx, r9\n\tmov     rax, rcx\n\tmul     r11\n\tadd     rax, r9\n;\trax is zero here\n\tadc     rdx, [r8+8]\n\tcmovnc  r11, rax\n\tsub     rdx, r11\n\tmov     [rdi], rdx\n\tret\n"
  },
  {
    "path": "mpn/x86_64/core2/rsh1add_n.as",
    "content": "\n;  AMD64 mpn_rsh1add_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,rcx)=((rsi,rcx)+(rdx,rcx))/2 return low bit of sum\n\n\tGLOBAL_FUNC mpn_rsh1add_n\n\tlea     rdi, [rdi+rcx*8-32]\n\tlea     rsi, [rsi+rcx*8-32]\n\tlea     rdx, [rdx+rcx*8-32]\n\tpush    r12\n\tpush    rbx\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tadd     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     L_skiplp\nL_lp:\n\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\tadc     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     L_lp\nL_skiplp:\n\tcmp     r8, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tadc     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tpop     rbx\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/core2/rsh1sub_n.as",
    "content": "\n;  AMD64 mpn_rsh1sub_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,rcx)=((rsi,rcx)-(rdx,rcx))/2\n; return bottom bit of difference\n; subtraction treated as two compliment\n\n\tGLOBAL_FUNC mpn_rsh1sub_n\n\tlea     rdi, [rdi+rcx*8-32]\n\tlea     rsi, [rsi+rcx*8-32]\n\tlea     rdx, [rdx+rcx*8-32]\n\tpush    r12\n\tpush    rbx\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tsub     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     L_skiplp\nL_lp:\n\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\tsbb     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     L_lp\nL_skiplp:\n\tcmp     r8, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tpop     rbx\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/core2/rsh_divrem_hensel_qr_1_2.asm",
    "content": "dnl  X86_64 mpn_rsh_divrem_hensel_qr_1_2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=( (rsi,rdx)-r9 / rcx ) >> r8    rdx>=1\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1_2 with shifting on the output of the quotient\n\ndefine(`MOVQ',`movd')\n\nASM_START()\nPROLOGUE(mpn_rsh_divrem_hensel_qr_1_2)\nC\t// 3limb minimum for the mo\nmov %r9,%r10\nmov $2,%r9\nsub %rdx,%r9\nlea -16(%rdi,%rdx,8),%rdi\nlea -16(%rsi,%rdx,8),%rsi\n\npush %r12\npush %r13\npush %r14\n\nmov %rcx,%rdx\t\nC // rdx is 3 bit inverse\n\nmov $64,%rax\nsub %r8,%rax\nMOVQ %r8,%mm0\nMOVQ %rax,%mm1\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 4 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC //rdx has 8 bits\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 16 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC // rdx has 32 bits\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\t\nC //r11 has 64 bits\n\nmov %r11,%rax\nmov %r11,%r12\nmul %rcx\nneg %rdx\nimul %rdx,%r12\t\nC // r12,r11 has 128 bits\n\nC // for the first limb we can not store (as we have to shift) so we need to\nC // do first limb separately , we could do it as normal as an extention of\nC // the loop , but if we do it as a 1 limb inverse then we can start it\nC // eailer , ie interleave it with the calculation of the 2limb inverse\n\nmov %r11,%r13\nmov %r12,%r14\n\n\nmov (%rsi,%r9,8),%r11\nsub %r10,%r11\nsbb %r10,%r10\n\nimul %r13,%r11\nMOVQ %r11,%mm2\npsrlq %mm0,%mm2\nmov %rcx,%rax\nmul %r11\nmov 8(%rsi,%r9,8),%r11\nmov 16(%rsi,%r9,8),%r12\nadd %r10,%r10\nsbb %rdx,%r11\nsbb $0,%r12\nsbb %r10,%r10\n\n\nadd $2,%r9\njc L(skiplp)\nALIGN(16)\nL(lp):\n\tmov %r12,%r8\n\tmov %r13,%rax\n\tmul %r11\n\n\tMOVQ %rax,%mm3\n\tmovq %mm3,%mm4\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm4\n\tpor %mm3,%mm2\n\tmovq %mm2,-16(%rdi,%r9,8)\n\n\timul %r14,%r11\n\timul %r13,%r12\n\tadd %r11,%rdx\n\tadd %r12,%rdx\n\t\tmov 8(%rsi,%r9,8),%r11\n\t\tmov 16(%rsi,%r9,8),%r12\n\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb $0,%r12\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r12\n\t\tsbb $0,%r10\n\tadd $2,%r9\n\tjnc L(lp)\nL(skiplp):\nmov %r12,%r8\nmov %r13,%rax\nmul %r11\n\nMOVQ %rax,%mm3\nmovq %mm3,%mm4\npsllq %mm1,%mm3\npsrlq %mm0,%mm4\npor %mm3,%mm2\nmovq %mm2,-16(%rdi,%r9,8)\n\nimul %r14,%r11\nimul %r13,%r12\nadd %r11,%rdx\nadd %r12,%rdx\ncmp $0,%r9\njne L(case0)\nL(case1):\n\t\tmov 8(%rsi,%r9,8),%r11\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r10\n\tmov %r11,%rax\n\timul %r13,%rax\n\n\tMOVQ %rax,%mm3\n\tmovq %mm3,%mm4\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm4\n\tpor %mm3,%mm2\n\tmovq %mm2,(%rdi,%r9,8)\n\tmovq %mm4,8(%rdi,%r9,8)\n\n\tmul %rcx\n\tadd %r10,%r10\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\temms\n\tret\nL(case0):\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\tmovq %mm2,(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\tcmp %rax,%r8\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tsub %r10,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\temms\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/rshift.asm",
    "content": "dnl  mpn_rshift\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tmpn_rshift(mp_ptr rdi,mp_ptr rsi,mp_size_t rdx,mp_limb_t rcx)\nC\trax=carry\n\nASM_START()\nPROLOGUE(mpn_rshift)\nC\t//when n=1 mod4 seem to have different runtimes\npush %rbx\nmov $4,%rbx\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\nsub %rdx,%rbx\nxor %rax,%rax\nmov -8(%rsi,%rbx,8),%rdx\nshrd %cl,%rdx,%rax\ncmp $0,%rbx\njge L(skiplp)\t\nALIGN(16)\nL(lp):\n\tmov (%rsi,%rbx,8),%r8\n\tmov 24(%rsi,%rbx,8),%r11\n\tshrd %cl,%r8,%rdx\n\tmov 8(%rsi,%rbx,8),%r9\n\tshrd %cl,%r9,%r8\n\tmov %rdx,-8(%rdi,%rbx,8)\n\tmov %r8,(%rdi,%rbx,8)\n\tmov 16(%rsi,%rbx,8),%r10\n\tshrd %cl,%r10,%r9\n\tmov %r11,%rdx\n\tmov %r9,8(%rdi,%rbx,8)\n\tshrd %cl,%r11,%r10\n\tadd $4,%rbx\n\tmov %r10,-16(%rdi,%rbx,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%rbx\nja L(case0)\nje L(case1)\njp L(case2)\nL(case3):\n\tmov (%rsi,%rbx,8),%r8\n\tshrd %cl,%r8,%rdx\n\tmov 8(%rsi,%rbx,8),%r9\n\tshrd %cl,%r9,%r8\n\tmov %rdx,-8(%rdi,%rbx,8)\n\tmov %r8,(%rdi,%rbx,8)\n\tmov 16(%rsi,%rbx,8),%r10\n\tshrd %cl,%r10,%r9\n\tmov %r9,8(%rdi,%rbx,8)\n\tshr %cl,%r10\n\tmov %r10,16(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nALIGN(16)\nL(case2):\n\tmov (%rsi,%rbx,8),%r8\n\tshrd %cl,%r8,%rdx\n\tmov 8(%rsi,%rbx,8),%r9\n\tshrd %cl,%r9,%r8\n\tmov %rdx,-8(%rdi,%rbx,8)\n\tmov %r8,(%rdi,%rbx,8)\n\tshr %cl,%r9\n\tmov %r9,8(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nALIGN(16)\nL(case1):\n\tmov (%rsi,%rbx,8),%r8\n\tshrd %cl,%r8,%rdx\n\tshr %cl,%r8\n\tmov %rdx,-8(%rdi,%rbx,8)\n\tmov %r8,(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nALIGN(16)\nL(case0):\n\tshr %cl,%rdx\n\tmov %rdx,-8(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/store.asm",
    "content": "dnl  mpn_store\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\ndefine(`MOVQ',`movd')\n\nC\tmpn_store(mp_ptr,mp_size_t,mp_limb_t val)\nC\trax          rdi,      rsi,         rdx\n\nASM_START()\nPROLOGUE(mpn_store)\ncmp $0,%rsi\njz L(case0)\nMOVQ %rdx,%xmm0\nmovddup %xmm0,%xmm0\nlea -16(%rdi),%rdi\ntest $0xF,%rdi\njz L(notodd)\n\tmov %rdx,16(%rdi)\n\tlea 8(%rdi),%rdi\n\tsub $1,%rsi\nL(notodd):\nsub $2,%rsi\njc L(skiplp)\nALIGN(16)\nL(lp):\n\tlea 16(%rdi),%rdi\n\tsub $2,%rsi\n\tmovdqa %xmm0,(%rdi)\n\tjnc L(lp)\nL(skiplp):\njnp L(case0)\n\tmov %rdx,16(%rdi)\nL(case0):\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/sub_n.as",
    "content": "\n;  mpn_sub_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tret mpn_sub_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t)\n;\t(rdi,rcx)=(rsi,rcx)-(rdx,rcx) return rax=borrow\n\n\tGLOBAL_FUNC mpn_sub_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tsub     rcx, rax\n\tlea     rdi, [rdi+rcx*8]\n\tlea     rsi, [rsi+rcx*8]\n\tlea     rdx, [rdx+rcx*8]\n\tneg     rcx\n\tcmp     rcx, 0\n\tjz      L_skiplp\n\talign   16\nL_lp:\n\tmov     r8, [rsi+rcx*8]\n\tmov     r10, [rsi+rcx*8+16]\n\tsbb     r8, [rdx+rcx*8]\n\tmov     [rdi+rcx*8], r8\n\tmov     r9, [rsi+rcx*8+8]\n\tsbb     r9, [rdx+rcx*8+8]\n\tmov     [rdi+rcx*8+8], r9\n\tlea     rcx, [rcx+4]\n\tmov     r11, [rsi+rcx*8-8]\n\tsbb     r10, [rdx+rcx*8-16]\n\tsbb     r11, [rdx+rcx*8-8]\n\tmov     [rdi+rcx*8-16], r10\n\tmov     [rdi+rcx*8-8], r11\n\tjrcxz   L_exitlp\n\tjmp     L_lp\nL_exitlp:\n\tsbb     rcx, rcx\nL_skiplp:\n\tcmp     rax, 2\n\tja      L_case3\n\tjz      L_case2\n\tjp      L_case1\nL_case0:\n\tsub     rax, rcx\n\tret\n\talign   16\nL_case1:\n\tadd     rcx, rcx\n\tmov     r8, [rsi]\n\tsbb     r8, [rdx]\n\tmov     [rdi], r8\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\talign   16\nL_case3:\n\tadd     rcx, rcx\n\tmov     r8, [rsi]\n\tmov     r10, [rsi+16]\n\tsbb     r8, [rdx]\n\tmov     [rdi], r8\n\tmov     r9, [rsi+8]\n\tsbb     r9, [rdx+8]\n\tmov     [rdi+8], r9\n\tsbb     r10, [rdx+16]\n\tmov     [rdi+16], r10\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\talign   16\nL_case2:\n\tadd     rcx, rcx\n\tmov     r8, [rsi]\n\tsbb     r8, [rdx]\n\tmov     [rdi], r8\n\tmov     r9, [rsi+8]\n\tsbb     r9, [rdx+8]\n\tmov     [rdi+8], r9\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/core2/subadd_n.asm",
    "content": "dnl  mpn_subadd\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_subadd_n)\npush %rbx\nxor %r9,%r9\npush %rbp\nxor %rax,%rax\nmov $3,%r10d\nsub %r8,%r10\nlea -24(%rdi,%r8,8),%rdi\nlea -24(%rsi,%r8,8),%rsi\nlea -24(%rdx,%r8,8),%rdx\nlea -24(%rcx,%r8,8),%rcx\njnc L(skiplp)\n.align 16\nL(lp):     .byte 0x9e      # sahf\t\n\tmov (%rsi,%r10,8),%r8\n\tsbb (%rcx,%r10,8),%r8\n\tmov 8(%rsi,%r10,8),%rbx\n\tsbb 8(%rcx,%r10,8),%rbx\n\tmov 24(%rsi,%r10,8),%r11\n\tmov 16(%rsi,%r10,8),%rbp\n\tsbb 16(%rcx,%r10,8),%rbp\n\tsbb 24(%rcx,%r10,8),%r11\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb (%rdx,%r10,8),%r8\n\tsbb 8(%rdx,%r10,8),%rbx\n\tmov %r8,(%rdi,%r10,8)\n\tsbb 16(%rdx,%r10,8),%rbp\n\tsbb 24(%rdx,%r10,8),%r11\n\tsetc %r9b\n\tmov %r11,24(%rdi,%r10,8)\n\tmov %rbp,16(%rdi,%r10,8)\n\tmov %rbx,8(%rdi,%r10,8)\n\tadd $4,%r10\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r10\njg L(case0)\nje L(case1)\njp L(case2)\nL(case3):  .byte 0x9e      # sahf\t\n\tmov (%rsi),%r8\n\tsbb (%rcx),%r8\n\tmov 8(%rsi),%rbx\n\tsbb 8(%rcx),%rbx\n\tmov 16(%rsi),%rbp\n\tsbb 16(%rcx),%rbp\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb (%rdx),%r8\n\tsbb 8(%rdx),%rbx\n\tmov %r8,(%rdi)\n\tsbb 16(%rdx),%rbp\n\tsetc %r9b\n\tmov %rbp,16(%rdi)\n\tmov %rbx,8(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tadc $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case2):  .byte 0x9e      # sahf\t\n\tmov 8(%rsi),%r8\n\tsbb 8(%rcx),%r8\n\tmov 16(%rsi),%rbx\n\tsbb 16(%rcx),%rbx\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb 8(%rdx),%r8\n\tsbb 16(%rdx),%rbx\n\tmov %r8,8(%rdi)\n\tsetc %r9b\n\tmov %rbx,16(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tadc $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case1):  .byte 0x9e      # sahf\t\n\tmov 16(%rsi),%r8\n\tsbb 16(%rcx),%r8\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb 16(%rdx),%r8\n\tmov %r8,16(%rdi)\n\tsetc %r9b\nL(case0):  .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tadc $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/sublsh1_n.as",
    "content": "\n;  core2 mpn_sublsh1_n \n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rcx) = (rsi, rcx) - (rdx, rcx)<<1\n;\trax = borrow\n\t\n    BITS 64\n    \n   GLOBAL_FUNC mpn_sublsh1_n\n\tlea     rsi, [rsi+rcx*8]\n\tlea     rdx, [rdx+rcx*8]\n\tlea     rdi, [rdi+rcx*8]\n\tneg     rcx\n\txor     r9, r9\n\txor     rax, rax\n\ttest    rcx, 3\n\tjz      next\nlp1:\n\tmov     r10, [rsi+rcx*8]\n\tadd     r9, 1\n\tsbb     r10, [rdx+rcx*8]\n\tsbb     r9, r9\n\tadd     rax, 1\n\tsbb     r10, [rdx+rcx*8]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tadd     rcx, 1\n\ttest    rcx, 3\n\tjnz     lp1\nnext:\n\tcmp     rcx, 0\n\tjz      end\n\tpush    r15\n\tpush    r14\n\tpush    r13\n\tpush    r12\n\tpush    rbx\n\talign 16\nlp:\n\tmov     r10, [rsi+rcx*8]\n\tmov     rbx, [rsi+rcx*8+8]\n\tmov     r11, [rsi+rcx*8+16]\n\tmov     r8, [rsi+rcx*8+24]\n\tmov     r12, [rdx+rcx*8]\n\tmov     r13, [rdx+rcx*8+8]\n\tmov     r14, [rdx+rcx*8+16]\n\tmov     r15, [rdx+rcx*8+24]\n\tadd     r9, 1\n\tsbb     r10, r12\n\tsbb     rbx, r13\n\tsbb     r11, r14\n\tsbb     r8, r15\n\tsbb     r9, r9\n\tadd     rax, 1\n\tsbb     r10, r12\n\tsbb     rbx, r13\n\tsbb     r11, r14\n\tsbb     r8, r15\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tmov     [rdi+rcx*8+8], rbx\n\tmov     [rdi+rcx*8+16], r11\n\tmov     [rdi+rcx*8+24], r8\n\tadd     rcx, 4\n\tjnz     lp\n\tpop     rbx\n\tpop     r12\n\tpop     r13\n\tpop     r14\n\tpop     r15\nend:\n\tadd     rax, r9\n\tneg     rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/core2/submul_1.asm",
    "content": "dnl  x86-64 mpn_addmul_1 and mpn_submul_1, optimized for \"Core 2\".\n\ndnl  Copyright 2003, 2004, 2005, 2007, 2008, 2009, 2011, 2012 Free Software\ndnl  Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC\t     cycles/limb\nC AMD K8,K9\t 4\nC AMD K10\t 4\nC AMD bd1\t 5.1\nC AMD bobcat\nC Intel P4\t ?\nC Intel core2\t 4.3-4.5 (fluctuating)\nC Intel NHM\t 5.0\nC Intel SBR\t 4.1\nC Intel atom\t ?\nC VIA nano\t 5.25\n\nC INPUT PARAMETERS\ndefine(`rp',\t`%rdi')\ndefine(`up',\t`%rsi')\ndefine(`n',\t`%rdx')\ndefine(`v0',\t`%rcx')\ndefine(`carry_in', `%r8')\n\nMULFUNC_PROLOGUE(mpn_submul_1 mpn_submul_1c)\n\nASM_START()\n\tTEXT\n\tALIGN(16)\nPROLOGUE(mpn_submul_1c)\n\tpush\t%rbx\n\tpush\t%rbp\n\tlea\t(%rdx), %rbx\n\tneg\t%rbx\n\n\tmov\t(up), %rax\n\tmov\t(rp), %r10\n\n\tlea\t-16(rp,%rdx,8), rp\n\tlea\t(up,%rdx,8), up\n\tmul\t%rcx\n\tadd\tcarry_in, %rax\n\tadc\t$0, %rdx\n\tjmp\tL(start_nc)\nEPILOGUE()\n\n\tALIGN(16)\nPROLOGUE(mpn_submul_1)\n\tpush\t%rbx\n\tpush\t%rbp\n\tlea\t(%rdx), %rbx\n\tneg\t%rbx\n\n\tmov\t(up), %rax\n\tmov\t(rp), %r10\n\n\tlea\t-16(rp,%rdx,8), rp\n\tlea\t(up,%rdx,8), up\n\tmul\t%rcx\n\nL(start_nc):\n\tbt\t$0, %ebx\n\tjc\tL(odd)\n\n\tlea\t(%rax), %r11\n\tmov\t8(up,%rbx,8), %rax\n\tlea\t(%rdx), %rbp\n\tmul\t%rcx\n\tadd\t$2, %rbx\n\tjns\tL(ln2)\n\n\tlea\t(%rax), %r8\n\tmov\t(up,%rbx,8), %rax\n\tlea\t(%rdx), %r9\n\tjmp\tL(mid)\n\nL(odd):\tadd\t$1, %rbx\n\tjns\tL(ln1)\n\n\tlea\t(%rax), %r8\n\tmov\t(up,%rbx,8), %rax\n\tlea\t(%rdx), %r9\n\tmul\t%rcx\n\tlea\t(%rax), %r11\n\tmov\t8(up,%rbx,8), %rax\n\tlea\t(%rdx), %rbp\n\tjmp\tL(le)\n\n\tALIGN(16)\nL(top):\tmul\t%rcx\n\tsub\t%r8, %r10\n\tlea\t(%rax), %r8\n\tmov\t(up,%rbx,8), %rax\n\tadc\t%r9, %r11\n\tmov\t%r10, -8(rp,%rbx,8)\n\tmov\t(rp,%rbx,8), %r10\n\tlea\t(%rdx), %r9\n\tadc\t$0, %rbp\nL(mid):\tmul\t%rcx\n\tsub\t%r11, %r10\n\tlea\t(%rax), %r11\n\tmov\t8(up,%rbx,8), %rax\n\tadc\t%rbp, %r8\n\tmov\t%r10, (rp,%rbx,8)\n\tmov\t8(rp,%rbx,8), %r10\n\tlea\t(%rdx), %rbp\n\tadc\t$0, %r9\nL(le):\tadd\t$2, %rbx\n\tjs\tL(top)\n\n\tmul\t%rcx\n\tsub\t%r8, %r10\n\tadc\t%r9, %r11\n\tmov\t%r10, -8(rp)\n\tadc\t$0, %rbp\nL(ln2):\tmov\t(rp), %r10\n\tsub\t%r11, %r10\n\tadc\t%rbp, %rax\n\tmov\t%r10, (rp)\n\tadc\t$0, %rdx\nL(ln1):\tmov\t8(rp), %r10\n\tsub\t%rax, %r10\n\tmov\t%r10, 8(rp)\n\tmov    %ebx, %eax\tC zero rax\n\tadc\t%rdx, %rax\n\tpop\t%rbp\n\tpop\t%rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/sumdiff_n.asm",
    "content": "dnl  mpn_sumdiff\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_sumdiff_n)\npush %rbx\nxor %r9,%r9\npush %rbp\nxor %rax,%rax\npush %r12\nmov $3,%r10d\npush %r13\nlea -24(%rdi,%r8,8),%rdi\npush %r14\nlea -24(%rsi,%r8,8),%rsi\npush %r15\nsub %r8,%r10\nlea -24(%rdx,%r8,8),%rdx\nlea -24(%rcx,%r8,8),%rcx\njnc L(skiplp)\n.align 16\nL(lp):     .byte 0x9e      # sahf\t\n\tmov (%rdx,%r10,8),%r8\n\tmov %r8,%r11\n\tadc (%rcx,%r10,8),%r8\n\tmov 8(%rdx,%r10,8),%rbx\n\tmov %rbx,%r13\n\tadc 8(%rcx,%r10,8),%rbx\n\tmov 24(%rdx,%r10,8),%r12\n\tmov 16(%rdx,%r10,8),%rbp\n\tmov %r12,%r15\n\tmov %rbp,%r14\n\tadc 16(%rcx,%r10,8),%rbp\n\tadc 24(%rcx,%r10,8),%r12\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb (%rcx,%r10,8),%r11\n\tmov %r8,(%rdi,%r10,8)\n\tsbb 8(%rcx,%r10,8),%r13\n\tsbb 16(%rcx,%r10,8),%r14\n\tmov %rbx,8(%rdi,%r10,8)\n\tmov %rbp,16(%rdi,%r10,8)\n\tsbb 24(%rcx,%r10,8),%r15\n\tmov %r12,24(%rdi,%r10,8)\n\tsetc %r9b\n\tmov %r14,16(%rsi,%r10,8)\n\tmov %r15,24(%rsi,%r10,8)\n\tadd $4,%r10\n\tmov %r13,8-32(%rsi,%r10,8)\n\tmov %r11,-32(%rsi,%r10,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r10\njg L(case0)\nje L(case1)\njp L(case2)\nL(case3):  .byte 0x9e      # sahf\t\n\tmov (%rdx),%r8\n\tmov %r8,%r11\n\tadc (%rcx),%r8\n\tmov 8(%rdx),%rbx\n\tmov %rbx,%r13\n\tadc 8(%rcx),%rbx\n\tmov 16(%rdx),%rbp\n\tmov %rbp,%r14\n\tadc 16(%rcx),%rbp\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb (%rcx),%r11\n\tmov %r8,(%rdi)\n\tsbb 8(%rcx),%r13\n\tsbb 16(%rcx),%r14\n\tmov %rbx,8(%rdi)\n\tmov %rbp,16(%rdi)\n\tsetc %r9b\n\tmov %r14,16(%rsi)\n\tmov %r13,8(%rsi)\n\tmov %r11,(%rsi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%rax\n\tadc $0,%rax\n\tadd $255,%r9b\n\trcl $1,%rax\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case2):  .byte 0x9e      # sahf\t\n\tmov 8(%rdx),%r8\n\tmov %r8,%r11\n\tadc 8(%rcx),%r8\n\tmov 8+8(%rdx),%rbx\n\tmov %rbx,%r13\n\tadc 8+8(%rcx),%rbx\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb 8(%rcx),%r11\n\tmov %r8,8(%rdi)\n\tsbb 8+8(%rcx),%r13\n\tmov %rbx,8+8(%rdi)\n\tsetc %r9b\n\tmov %r13,8+8(%rsi)\n\tmov %r11,8(%rsi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%rax\n\tadc $0,%rax\n\tadd $255,%r9b\n\trcl $1,%rax\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case1):  .byte 0x9e      # sahf\t\n\tmov 16(%rdx),%r8\n\tmov %r8,%r11\n\tadc 16(%rcx),%r8\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb 16(%rcx),%r11\n\tmov %r8,16(%rdi)\n\tsetc %r9b\n\tmov %r11,16(%rsi)\nL(case0):  .byte 0x9e      # sahf\t\n\tmov $0,%rax\n\tadc $0,%rax\n\tadd $255,%r9b\n\trcl $1,%rax\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tpop %rbp\n\tpop %rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/core2/xnor_n.as",
    "content": "\n;  core2 mpn_xnor_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_xnor_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign 16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\txor     r8, [rsi+rcx*8+24]\n\txor     r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\txor     r10, [rsi+rcx*8+8]\n\txor     r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/core2/xor_n.as",
    "content": "\n;  core2 mpn_xor_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_xor_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign 16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tor      r8, r8\n\tor      r9, r9\n\txor     r8, [rsi+rcx*8+24]\n\txor     r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tor      r10, r10\n\tor      r11, r11\n\txor     r10, [rsi+rcx*8+8]\n\txor     r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/divexact_by3c.as",
    "content": "\n;  AMD64 mpn_diveby3\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi, rdx) = (rsi, rdx)  rcx = carry in\n;\trax = carry out\n;\tNOTE could pass 55555...555 as next param so this would\n;\tbe mpn_divexact_by_ff_over_c , and change imul at end , or\n;\tdrop backwards compatibilty and just dump the two imuls\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n    GLOBAL_FUNC mpn_divexact_by3c\n\tmov     r9d, 3\n\tlea     rsi, [rsi+rdx*8-24]\n\tlea     rdi, [rdi+rdx*8-24]\n\tmov     r8, 0x5555555555555555\n\timul    rcx, r8\n\tsub     r9, rdx\n\tjnc     skiploop\n\talign   16\nloop1:\n\tmov     rax, [rsi+r9*8]\n\tmul     r8\n\tsub     rcx, rax\n\tmov     [rdi+r9*8], rcx\n\tsbb     rcx, rdx\n\tmov     rax, [rsi+r9*8+8]\n\tmul     r8\n\tsub     rcx, rax\n\tmov     [rdi+r9*8+8], rcx\n\tsbb     rcx, rdx\n\tmov     rax, [rsi+r9*8+16]\n\tmul     r8\n\tsub     rcx, rax\n\tmov     [rdi+r9*8+16], rcx\n\tsbb     rcx, rdx\n\tmov     rax, [rsi+r9*8+24]\n\tmul     r8\n\tsub     rcx, rax\n\tmov     [rdi+r9*8+24], rcx\n\tsbb     rcx, rdx\n\tadd     r9, 4\n\tjnc     loop1\nskiploop:\n\ttest    r9, 2\n\tjnz     skip\n\tmov     rax, [rsi+r9*8]\n\tmul     r8\n\tsub     rcx, rax\n\tmov     [rdi+r9*8], rcx\n\tsbb     rcx, rdx\n\tmov     rax, [rsi+r9*8+8]\n\tmul     r8\n\tsub     rcx, rax\n\tmov     [rdi+r9*8+8], rcx\n\tsbb     rcx, rdx\n\tadd     r9, 2\nskip:\n\ttest    r9, 1\n\tjnz     end\n\tmov     rax, [rsi+r9*8]\n\tmul     r8\n\tsub     rcx, rax\n\tmov     [rdi+r9*8], rcx\n\tsbb     rcx, rdx\nend:\n\t; below is the same as   imul    rax, rcx, -3\n\tlea     rax, [rcx+rcx*2]\n\tneg     rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/divexact_byfobm1.as",
    "content": ";  X86_64 mpn_diveby (B-1)/f\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include \"yasm_mac.inc\"\n\n;\t(rdi,rdx)=(rsi,rdx)/rcx where r8=(B-1)/rcx\n;\trax=carry out\n\n;\tThe two imul's are only needed if want strict compatibility with\n;\tmpn_divexact_1 when the division is not exact\n\n\tGLOBAL_FUNC mpn_divexact_byfobm1\n\nmov     r10d, 3\nlea     rsi, [rsi+rdx*8-24]\nlea     rdi, [rdi+rdx*8-24]\n; r9 is our carry in\nmov     r9, 0\n; imul %r8,%r9 this is needed if we have non-zero carry in\nsub     r10, rdx\njnc     skiploop\nalign 16\nlp:\n\tmov     rax, [rsi+r10*8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8], r9\n\tsbb     r9, rdx\n\tmov     rax, [rsi+r10*8+8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8+8], r9\n\tsbb     r9, rdx\n\tmov     rax, [rsi+r10*8+16]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8+16], r9\n\tsbb     r9, rdx\n\tmov     rax, [rsi+r10*8+24]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8+24], r9\n\tsbb     r9, rdx\n\tadd     r10, 4\n\tjnc     lp\nskiploop:\ntest    r10, 2\njnz     skip\n\tmov     rax, [rsi+r10*8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8], r9\n\tsbb     r9, rdx\n\tmov     rax, [rsi+r10*8+8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8+8], r9\n\tsbb     r9, rdx\n\tadd     r10, 2\nskip:\ntest    r10, 1\njnz     fin\n\tmov     rax, [rsi+r10*8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8], r9\n\tsbb     r9, rdx\nfin:\nimul    r9, rcx\nmov     rax, r9\nneg     rax\nret\nend\n"
  },
  {
    "path": "mpn/x86_64/divrem_2.as",
    "content": "\n;  x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.\n\n;  Copyright 2007, 2008 Free Software Foundation, Inc.\n\n;  Copyright Brian Gladman 2010 (Conversion to yasm format)\n\n;  This file is part of the GNU MP Library.\n\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 3 of the License, or (at\n;  your option) any later version.\n\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n\n;\t\tnorm\tfrac\n; k8\t\t20\t20\n; netburst\t73\t73\n; core2\t37\t37\n; nehalem\t33\t33\n\n; INPUT PARAMETERS\n; qp \trdi\n; fn \trsi\n; np\trdx\n; nn \trcx\n; dp\tr8\n; dinv r9\n\n%include 'yasm_mac.inc'\n    BITS 64\n\tTEXT\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n    align  16\nGLOBAL_FUNC mpn_divrem_2\n\n\tpush    r15\n\tlea     rax, [rdx+rcx*8]\n\tpush    r14\n\tpush    r13\n\tmov     r13, rsi\n\tpush    r12\n\tlea     r12, [rax-24]\n\tpush    rbp\n\tmov     rbp, rdi\n\tpush    rbx\n\tmov     r11, [r8+8]\n\tmov     r9, [rax-8]\n\tmov     r8, [r8]\n\tmov     r10, [rax-16]\n\txor     r15d, r15d\n\tcmp     r11, r9\n\tja      L_2\n\tsetb    dl\n\tcmp     r8, r10\n\tsetbe   al\n\tor      dl, al\n\tjne     L_23\nL_2:\n\tlea     rbx, [rcx+r13-3]\n\ttest    rbx, rbx\n\tjs      L_6\n\tmov     rdx, r11\n\tmov     rax, -1\n\tnot     rdx\n\tdiv     r11\n\tmov     rdx, r11\n\tmov     rdi, rax\n\timul    rdx, rax\n\tmov     r14, rdx\n\tmul     r8\n\tmov     rcx, rdx\n\tmov     rdx, -1\n\tadd     r14, r8\n\tadc     rdx, 0\n\tadd     r14, rcx\n\tadc     rdx, 0\n\tjs      L_8\nL_18:\n\tdec     rdi\n\tsub     r14, r11\n\tsbb     rdx, 0\n\tjns     L_18\nL_8:\n\n%ifdef NEW\n\tlea     rbp, [rbp+rbx*8]\n\tmov     rcx, rbx\n\tmov     rbx, r9\n\tmov     r9, rdi\n\tmov     r14, r10\n\tmov     rsi, r11\n\tneg     rsi\n\talign  16\nL_loop:\n\tmov     rax, r9\n\tmul     rbx\n\tadd     rax, r14\n\tmov     r10, rax\n\tadc     rdx, rbx\n\tmov     rdi, rdx\n\timul    rdx, rsi\n\tmov     rax, r8\n\tlea     rbx, [rdx+r14]\n\tmul     rdi\n\txor     r14d, r14d\n\tcmp     r13, rcx\n\tjg      L_19\n\tmov     r14, [r12]\n\tsub     r12, 8\nL_19:\n\tsub\t r14, r8\n\tsbb     rbx, r11\n\tsub     r14, rax\n\tsbb     rbx, rdx\n\tinc     rdi\n\txor     edx, edx\n\tcmp     rbx, r10\n\tmov     rax, r8\n\tadc     rdx, -1\n\tadd     rdi, rdx\n\tand     rax, rdx\n\tand     rdx, r11\n\tadd     r14, rax\n\tadc     rbx, rdx\n\tcmp     rbx, r11\n\tjae     L_fix\nL_bck:\n\tmov\t [rbp], rdi \n\tsub     rbp, 8\n\tdec     rcx\n\tjns     L_loop\n\n\tmov     r10, r14\n\tmov     r9, rbx\n\n%else\n\n\tlea     rbp, [rbp+rbx*8]\n\tmov     rcx, rbx\n\tmov     rax, r9\n\tmov     rsi, r10\n\talign  16\nL_loop:\n\tmov     r14, rax\n\tmul     rdi\n\tmov     r9, r11\n\tadd     rax, rsi\n\tmov     rbx, rax\n\tadc     rdx, r14\n\tlea     r10, [rdx+1]\n\tmov     rax, rdx\n\timul    r9, rdx\n\tsub     rsi, r9\n\txor     r9d, r9d\n\tmul     r8\n\tcmp     r13, rcx\n\tjg      L_13\n\tmov     r9, [r12]\n\tsub     r12, 8\nL_13:\n\tsub\t r9, r8\n\tsbb     rsi, r11\n\tsub     r9, rax\n\tsbb     rsi, rdx\n\tcmp     rsi, rbx\n\tsbb     rax, rax\n\tnot     rax\n\tadd     r10, rax\n\tmov     rbx, r8\n\tand     rbx, rax\n\tand     rax, r11\n\tadd     r9, rbx\n\tadc     rax, rsi\n\tcmp     r11, rax\n\tjbe     L_fix\nL_bck:\n\tmov\t [rbp], r10\n\tsub     rbp, 8\n\tmov     rsi, r9\n\tdec     rcx\n\tjns     L_loop\n\n\tmov     r10, rsi\n\tmov     r9, rax\n\n%endif\n\nL_6:\n\tmov     [r12+8], r10\n\tmov     [r12+16], r9\n\tpop     rbx\n\tpop     rbp\n\tpop     r12\n\tpop     r13\n\tpop     r14\n\tmov     rax, r15\n\tpop     r15\n\tret\n\nL_23:\n\tinc     r15d\n\tsub     r10, r8\n\tsbb     r9, r11\n\tjmp     L_2\n\n%ifdef NEW\n\nL_fix:\n\tseta\t dl\n\tcmp     r14, r8\n\tsetae   al\n\torb     al, dl\n\tje      L_bck\n\tinc     rdi\n\tsub     r14, r8\n\tsbb     rbx, r11\n\tjmp     L_bck\n\n%else\nL_fix:\n\tjb\t L_88\n\tcmp     r9, r8\n\tjb      L_bck\nL_88:\n\tinc\t r10\n\tsub     r9, r8\n\tsbb     rax, r11\n\tjmp     L_bck\n\n%endif\n\t"
  },
  {
    "path": "mpn/x86_64/divrem_euclidean_qr_1.as",
    "content": ";  x86-64 mpn_divrem_euclidean_qr_1 -- mpn by limb division.\n\n;  Copyright 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.\n\n;  Copyright 2010 Brian Gladman (Conversion to yasm format)\n\n;  This file is part of the GNU MP Library.\n\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 3 of the License, or (at\n;  your option) any later version.\n\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n\n; mp_limb_t\n; mpn_divrem_euclidean_qr_1 (mp_ptr qp, mp_size_t fn,\n;               mp_srcptr np, mp_size_t nn, mp_limb_t d)\n\n; mp_limb_t\n; mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,\n;                      mp_srcptr np, mp_size_t nn, mp_limb_t d,\n;                      mp_limb_t dinv, int shift)\n\n;\t\tnorm\tunorm\tfrac\n; k8\t\t13\t13\t12\n; netburst\t44.2\t44.2\t42.3\n; core2\t25\t24.5\t19.3\n; nehalem\t21.5\t20.7\t18\n; atom\t       42\t52\t37\n\n; INPUT PARAMETERS\n; qp \t%rdi\n; fn \t%rsi\n; np\t%rdx\n; nn \t%rcx\n; d    %r8\n; dinv %r9\t\t \tonly for mpn_preinv_divrem_1\n; shift passed on stack\tonly for mpn_preinv_divrem_1\n\n%include 'yasm_mac.inc'\n\n%define SPECIAL_CODE_FOR_NORMALIZED_DIVISOR\n\n\tTEXT\n\talign  16\nGLOBAL_FUNC mpn_preinv_divrem_1\n\n\txor     eax, eax\n\tpush    r13\n\tpush    r12\n\tpush    rbp\n\tpush    rbx\n\n\tmov     r12, rsi\n\tmov     rbx, rcx\n\tadd     rcx, rsi\n\tmov     rsi, rdx\n\n\tlea     rdi, [rdi+rcx*8-8]\n\n\ttest    r8, r8\n\tjs      L_nent\n\tmov     cl, [rsp+40]\n\tshl     r8, cl\n\tjmp     L_uent\n\n\talign  16\nGLOBAL_FUNC mpn_divrem_euclidean_qr_1\n\txor     eax, eax\n\tpush    r13\n\tpush    r12\n\tpush    rbp\n\tpush    rbx\n\n\tmov     r12, rsi\n\tmov     rbx, rcx\n\tadd     rcx, rsi\n\tmov     rsi, rdx\n\tje      L_ret\n\n\tlea     rdi, [rdi+rcx*8-8]\n\txor     ebp, ebp\n\n%ifdef  SPECIAL_CODE_FOR_NORMALIZED_DIVISOR\n\n\ttest    r8, r8\n\tjns     L_unnormalized\n\nL_normalized:\n\ttest    rbx, rbx\n\tje      L_8\n\tmov     rbp, [rsi+rbx*8-8]\n\tdec     rbx\n\tmov     rax, rbp\n\tsub     rbp, r8\n\tcmovb   rbp, rax\n\tsbb     eax, eax\n\tinc     eax\n\tmov     [rdi], rax\n\tlea     rdi, [rdi-8]\nL_8:\n\tmov     rdx, r8\n\tmov     rax, -1\n\tnot     rdx\n\tdiv     r8\n\tmov     r9, rax\n\tmov     rax, rbp\n\tjmp     L_nent\n\n\talign  16\nL_nloop:\n\tmov     r10, [rsi+rbx*8]\n\tlea     rbp, [rax+1]\n\tmul     r9\n\tadd     rax, r10\n\tadc     rdx, rbp\n\tmov     rbp, rax\n\tmov     r13, rdx\n\timul    rdx, r8\n\tsub     r10, rdx\n\tmov     rax, r8\n\tadd     rax, r10\n\tcmp     r10, rbp\n\tcmovb   rax, r10\n\tadc     r13, -1\n\tcmp     rax, r8\n\tjae     L_nfx\nL_nok:\n\tmov\t [rdi], r13\n       sub     rdi, 8\nL_nent:\n\tdec\t rbx\n\tjns     L_nloop\n\n\txor     ecx, ecx\n\tjmp     L_87\n\nL_nfx:\n\tsub\t rax, r8\n\tinc     r13\n\tjmp     L_nok\n\n%endif\n\nL_unnormalized:\n\ttest    rbx, rbx\n\tje      L_44\n\tmov     rax, [rsi+rbx*8-8]\n\tcmp     rax, r8\n\tjae     L_44\n\tmov     [rdi], rbp\n\tmov     rbp, rax\n\tlea     rdi, [rdi-8]\n\tje      L_ret\n\tdec     rbx\nL_44:\n\tbsr     rcx, r8\n\tnot     ecx\n\tsal     r8, cl\n\tsal     rbp, cl\n\tmov     rdx, r8\n\tmov     rax, -1\n\tnot     rdx\n\tdiv     r8\n\ttest    rbx, rbx\n\tmov     r9, rax\n\tmov     rax, rbp\n\tje      L_87\nL_uent:\n\tmov     rbp, [rsi+rbx*8-8]\n\tshr     rax, cl\n\tshld    rax, rbp, cl\n\tsub     rbx, 2\n\tjs      L_ulast\n\n\talign  16\nL_uloop:\n\tnop\n\tmov     r10, [rsi+rbx*8]\n\tlea     r11, [rax+1]\n\tshld    rbp, r10, cl\n\tmul     r9\n\tadd     rax, rbp\n\tadc     rdx, r11\n\tmov     r11, rax\n\tmov     r13, rdx\n\timul    rdx, r8\n\tsub     rbp, rdx\n\tmov     rax, r8\n\tadd     rax, rbp\n\tcmp     rbp, r11\n\tcmovb   rax, rbp\n\tadc     r13, -1\n\tcmp     rax, r8\n\tjae     L_ufx\nL_uok:\n\tmov\t [rdi], r13\n\tsub     rdi, 8\n\tdec     rbx\n\tmov     rbp, r10\n\tjns     L_uloop\nL_ulast:\n\tlea     r11, [rax+1]\n\tsal     rbp, cl\n\tmul     r9\n\tadd     rax, rbp\n\tadc     rdx, r11\n\tmov     r11, rax\n\tmov     r13, rdx\n\timul    rdx, r8\n\tsub     rbp, rdx\n\tmov     rax, r8\n\tadd     rax, rbp\n\tcmp     rbp, r11\n\tcmovb   rax, rbp\n\tadc     r13, -1\n\tcmp     rax, r8\n\tjae     L_93\nL_69:   \n\tmov\t [rdi], r13\n       sub     rdi, 8\n\tjmp     L_87\n\nL_ufx:  \n\tsub\t rax, r8\n\tinc     r13\n\tjmp     L_uok\n\nL_93:   \n\tsub\t rax, r8\n\tinc     r13\n\tjmp     L_69\n\nL_87:   \n\tmov\t rbp, r8\n\tneg     rbp\n\tjmp     L_87b\n\n\talign  16\nL_floop:\n\tlea     r11, [rax+1]\n\tmul     r9\n\tadd     rdx, r11\n\tmov     r11, rax\n\tmov     r13, rdx\n\timul    rdx, rbp\n\tmov     rax, r8\n\tadd     rax, rdx\n\tcmp     rdx, r11\n\tcmovb   rax, rdx\n\tadc     r13, -1\n\tmov     [rdi], r13\n\tsub     rdi, 8\nL_87b:\n\tdec\t r12\n\tjns     L_floop\n\n\tshr     rax, cl\nL_ret:\n\tpop\t rbx\n\tpop     rbp\n\tpop     r12\n\tpop     r13\n\tret\n\n\tend\n\n"
  },
  {
    "path": "mpn/x86_64/divrem_euclidean_qr_2.as",
    "content": "\n;  mpn_divrem_euclidean_qr_2\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tGLOBAL_FUNC mpn_divrem_euclidean_qr_2\n; rax\tMULTEMP\n; rbx\tqn\n; rcx\tdp\n; rdx\tMULTEMP\n; rdi\tqp\n; rsi\txp\n; rbp\ti\n; r8\tl\n; r9\th\n; r10\tt[0],xh\n; r11\t\n; r12\tt1[0]\n; r13\tt1[1]\n; r14\tqf\n; r15\tq,mask,temp\n\tpush    rbx\n\tpush    rbp\n\tpush    r12\n\tpush    r13\n\tpush    r14\n\tpush    r15\n\tmov     rbx, rdx\n\tdec     rbx\n\tmov     r9, 0\n\tmov     rdx, [rcx+8]\n\tnot     rdx\n\tmov     rax, -1\n\tdiv\tqword    [rcx+8]\n\tmov     rbp, rax\n\tmov     r8, [rsi+rbx*8]\n\tdec     rbx\n\tmov     r10, [rsi+rbx*8]\n\tcmp     r8, [rcx+8]\n\tjae     L_j2\n\tmov     rdx, r8\n\tmov     r9, r8\n\tmov     r10, [rsi+rbx*8]\n\tmov     r8, r10\n\tmov     r14, 0\n\tjmp     L_j1\nL_j2:\n\tmov     r14, 1\n\tmov     rdx, r8\n\tsub     rdx, [rcx+8]\n\tsub     r10, [rcx]\n\tsbb     rdx, 0\n\tjnc     L_j3\n\tdec     r14\n\tadd     r10, [rcx]\n\tadc     rdx, [rcx+8]\nL_j3:\n\tmov     r9, rdx\n\tmov     r8, r10\nL_j1:\n\tdec     rbx\n\tmov     rax, r9\n\tjs      L_skiplp\n\talign   16\nL_lp:\n\tcmp     rax, [rcx+8]\n\tje      L_j4\n\tmov     r9, rax\n\tbt      r8, 63\n\tadc     rax, 0\n\tmov     r15, r8\n\tmov     r13, 0\n\tsar     r15, 63\n\tand     r15, [rcx+8]\n\tadd     r15, r8\n\tmul     rbp\n\tadd     rax, r15\n\tmov     rax, [rcx]\n\tadc     rdx, r9\n\tmov     r10, rdx\n\tmul     rdx\n\tmov     r12, [rcx]\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, [rcx+8]\n\tnot     r10\n\tmul     r10\n\tsub     r9, [rcx+8]\n\tadd     rax, r8\n\tadc     rdx, r9\n\tmov     r9, rdx\n\tand     r9, [rcx+8]\n\t; swapped r9,rax from here\n\tadd     rax, r9\n\tmov     r15, rdx\n\tsub     r15, r10\n\tand     rdx, [rcx]\n\tsub     r12, rdx\n\tsbb     r13, 0\n\tmov     r8, [rsi+rbx*8]\n\tsub     r8, r12\n\tsbb     rax, r13\n\tmov     r10, [rcx]\n\tmov     r11, [rcx+8]\n\tsbb     r9, r9\n\tand     r10, r9\n\tand     r11, r9\n\tadd     r15, r9\n\tadd     r8, r10\n\tadc     rax, r11\n\tadc     r9, 0\n\tand     r10, r9\n\tand     r11, r9\n\tadd     r15, r9\n\tadd     r8, r10\n\tadc     rax, r11\nL_j6:\n\tmov     [rdi+rbx*8], r15\n\tdec     rbx\n\tjns     L_lp\nL_skiplp:\n\tmov     [rsi+8], rax\n\tmov     [rsi], r8\n\tmov     rax, r14\n\tpop     r15\n\tpop     r14\n\tpop     r13\n\tpop     r12\n\tpop     rbp\n\tpop     rbx\n\tret\nL_j4:\n\tmov     r10, [rsi+rbx*8]\n\tmov     r15, -1\n\tmov     rdx, r8\n\tadd     r10, [rcx]\n\tadc     rdx, rax\n\tsbb     rax, rax\n\tsub     rdx, [rcx]\n\tadc     rax, 0\n\tjz      L_j5\n\tdec     r15\n\tadd     r10, [rcx]\n\tadc     rdx, [rcx+8]\nL_j5:\n\tmov     rax, rdx\n\tmov     r8, r10\n\tjmp     L_j6\n\tend\n"
  },
  {
    "path": "mpn/x86_64/fat/add_err1_n.c",
    "content": "/* Fat binary fallback mpn_divrem_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/add_err1_n.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/add_err2_n.c",
    "content": "/* Fat binary fallback mpn_divrem_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/add_err2_n.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/add_n.c",
    "content": "/* Fat binary fallback mpn_add_n\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/add_n.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/addmul_1.c",
    "content": "/* Fat binary fallback mpn_addmul_1\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/addmul_1.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/copyd.c",
    "content": "/* Fat binary fallback mpn_copyd\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\tmpn_copyd(mp_ptr rp,mp_srcptr sp,mp_size_t n)\n{MPN_COPY_DECR(rp,sp,n);return;}\n"
  },
  {
    "path": "mpn/x86_64/fat/copyi.c",
    "content": "/* Fat binary fallback mpn_copyi\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\tmpn_copyi(mp_ptr rp,mp_srcptr sp,mp_size_t n)\n{MPN_COPY_INCR(rp,sp,n);return;}\n"
  },
  {
    "path": "mpn/x86_64/fat/divexact_1.c",
    "content": "/* Fat binary fallback mpn_divexact_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divexact_1.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/divexact_by3c.c",
    "content": "/* Fat binary fallback mpn_diveby3.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divexact_by3c.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/divexact_byfobm1.c",
    "content": "/* Fat binary fallback mpn_divexact_fobm1\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divexact_byfobm1.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/divrem_1.c",
    "content": "/* Fat binary fallback mpn_divrem_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divrem_1.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/divrem_2.c",
    "content": "/* Fat binary fallback mpn_divrem_2.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divrem_2.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/divrem_euclidean_qr_1.c",
    "content": "/* Fat binary fallback mpn_divrem_euclidean_qr_1\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divrem_euclidean_qr_1.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/divrem_euclidean_qr_2.c",
    "content": "/* Fat binary fallback mpn_divrem_euclidean_qr_2\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divrem_euclidean_qr_2.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/fat.c",
    "content": "/* x86 fat binary initializers.\n\n   THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.\n   THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR\n   COMPLETELY IN FUTURE GNU MP RELEASES.\n\nCopyright 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>    /* for printf */\n#include <stdlib.h>   /* for getenv */\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* Change this to \"#define TRACE(x) x\" for some traces. */\n#define TRACE(x)\n\n/* fat_entry.asm */\nlong __gmpn_cpuid(char dst[12], int id);\n\nstruct cpuvec_t __gmpn_cpuvec = {\n  __MPN(add_err1_n_init),\n  __MPN(add_err2_n_init),\n  __MPN(add_n_init),\n  __MPN(addmul_1_init),\n  __MPN(copyd_init),\n  __MPN(copyi_init),\n  __MPN(divexact_1_init),\n  __MPN(divexact_by3c_init),\n  __MPN(divexact_byfobm1_init),\n  __MPN(divrem_1_init),\n  __MPN(divrem_2_init),\n  __MPN(divrem_euclidean_qr_1_init),\n  __MPN(divrem_euclidean_qr_2_init),\n  __MPN(gcd_1_init),\n  __MPN(lshift_init),\n  __MPN(mod_1_init),\n  __MPN(mod_34lsub1_init),\n  __MPN(modexact_1c_odd_init),\n  __MPN(mul_1_init),\n  __MPN(mul_basecase_init),\n  __MPN(mulmid_basecase_init),\n  __MPN(preinv_divrem_1_init),\n  __MPN(preinv_mod_1_init),\n  __MPN(redc_1_init),\n  __MPN(rshift_init),\n  __MPN(sqr_basecase_init),\n  __MPN(sub_err1_n_init),\n  __MPN(sub_err2_n_init),\n  __MPN(sub_n_init),\n  __MPN(submul_1_init),\n  __MPN(sumdiff_n_init),\n  0\n};\n\n\n/* The following setups start with generic x86, then overwrite with\n   specifics for a chip, and higher versions of that chip.\n\n   The arrangement of the setups here will normally be the same as the $path\n   selections in configure.in for the respective chips.\n\n   This code is reentrant and thread safe.  We always calculate the same\n   decided_cpuvec, so if two copies of the code are running it doesn't\n   matter which completes first, both write the same to __gmpn_cpuvec.\n\n   We need to go via decided_cpuvec because if one thread has completed\n   __gmpn_cpuvec then it may be making use of the threshold values in that\n   vector.  If another thread is still running __gmpn_cpuvec_init then we\n   don't want it to write different values to those fields since some of the\n   asm routines only operate correctly up to their own defined threshold,\n   not an arbitrary value.  */\n\n#define CONFIG_GUESS            0\n#define CONFIG_GUESS_32BIT      0\n#define CONFIG_GUESS_64BIT      0\n#define FAT32                   0\n#define FAT64                   1\n#define INFAT                   1\n\n#define CPUSETUP_core2\t\tCPUVEC_SETUP_core2\n#define CPUSETUP_penryn\t\tCPUVEC_SETUP_core2;CPUVEC_SETUP_core2_penryn\n#define CPUSETUP_nehalem\tCPUVEC_SETUP_nehalem\n#define CPUSETUP_westmere\tCPUVEC_SETUP_nehalem;CPUVEC_SETUP_nehalem_westmere\n#define CPUSETUP_sandybridge\tCPUVEC_SETUP_sandybridge\n#define CPUSETUP_ivybridge\tCPUVEC_SETUP_sandybridge;CPUVEC_SETUP_sandybridge_ivybridge\n#define CPUSETUP_haswell\tCPUVEC_SETUP_haswell\n#define CPUSETUP_haswellavx\tCPUVEC_SETUP_haswell;CPUVEC_SETUP_haswell_avx\n#define CPUSETUP_broadwell      CPUVEC_SETUP_haswell;CPUVEC_SETUP_haswell_broadwell\n#define CPUSETUP_skylake        CPUVEC_SETUP_skylake\n#define CPUSETUP_skylakeavx     CPUVEC_SETUP_skylake;CPUVEC_SETUP_skylake_avx\n#define CPUSETUP_atom\t\tCPUVEC_SETUP_atom\n#define CPUSETUP_nano\t\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k8only\n#define CPUSETUP_netburst\tCPUVEC_SETUP_netburst\n#define CPUSETUP_netburstlahf\tCPUVEC_SETUP_netburst;\n#define CPUSETUP_k8\t\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k8only\n#define CPUSETUP_k10\t\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k10\n#define CPUSETUP_k102\t\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k10;CPUVEC_SETUP_k8_k10_k102\n#define CPUSETUP_k103\t\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k10;CPUVEC_SETUP_k8_k10_k102\n#define CPUSETUP_bulldozer\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k10;CPUVEC_SETUP_k8_k10_k102\n#define CPUSETUP_piledriver\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k10;CPUVEC_SETUP_k8_k10_k102\n#define CPUSETUP_bobcat\t\tCPUVEC_SETUP_bobcat\n\n#include \"cpuid.c\"\n\nvoid\n__gmpn_cpuvec_init (void)\n{\n  struct cpuvec_t  decided_cpuvec;\n\n  TRACE (printf (\"__gmpn_cpuvec_init:\\n\"));\n\n  __gmpn_cpu(&decided_cpuvec);\n\n  ASSERT_CPUVEC (decided_cpuvec);\n  CPUVEC_INSTALL (decided_cpuvec);\n\n  /* Set this once the threshold fields are ready.\n     Use volatile to prevent it getting moved.  */\n  ((volatile struct cpuvec_t *) &__gmpn_cpuvec)->initialized = 1;\n}\n"
  },
  {
    "path": "mpn/x86_64/fat/fat_entry.asm",
    "content": "dnl  x86 fat binary entrypoints.\n\ndnl  Copyright 2003 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\ndnl  Forcibly disable profiling.\ndnl\ndnl  The entrypoints and inits are small enough not to worry about, the real\ndnl  routines arrived at will have any profiling.  Also, the way the code\ndnl  here ends with a jump means we won't work properly with the\ndnl  \"instrument\" profiling scheme anyway.\n\ndefine(`WANT_PROFILING',no)\n\n\n\tTEXT\n\n\ndnl  Usage: FAT_ENTRY(name, offset)\ndnl\ndnl  Emit a fat binary entrypoint function of the given name.  This is the\ndnl  normal entry for applications, eg. __gmpn_add_n.\ndnl\ndnl  The code simply jumps through the function pointer in __gmpn_cpuvec at\ndnl  the given \"offset\" (in bytes).\ndnl\ndnl  For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be\ndnl  fine for all x86s.\ndnl\ndnl  For PIC, the jumps are 20 bytes each, and are best aligned to 16 to\ndnl  ensure at least the first two instructions don't cross a cache line\ndnl  boundary.\ndnl\ndnl  Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE\ndnl  grepping in configure, stopping that code trying to eval something with\ndnl  $1 in it.\n\ndefine(FAT_ENTRY,\nm4_assert_numargs(2)\n`\tALIGN(ifdef(`PIC',16,8))\n`'PROLOGUE($1)\nifdef(`PIC',\n`\tlea \t_GLOBAL_OFFSET_TABLE_(%rip),%r11\n\tmovq\tGSYM_PREFIX`'__gmpn_cpuvec@GOT(%r11), %r11\n\tjmp\t*m4_empty_if_zero($2)(%r11)\n',`dnl non-PIC\n\tjmp\t*GSYM_PREFIX`'__gmpn_cpuvec+$2\n')\nEPILOGUE()\n')\n\n\ndnl  FAT_ENTRY for each CPUVEC_FUNCS_LIST\ndnl\n\ndefine(`CPUVEC_offset',0)\nforeach(i,\n`FAT_ENTRY(MPN(i),CPUVEC_offset)\ndefine(`CPUVEC_offset',eval(CPUVEC_offset + 8))',\nCPUVEC_FUNCS_LIST)\n\ndnl  Usage: FAT_INIT(name, offset)\ndnl\ndnl  Emit a fat binary initializer function of the given name.  These\ndnl  functions are the initial values for the pointers in __gmpn_cpuvec.\ndnl\ndnl  The code simply calls __gmpn_cpuvec_init, and then jumps back through\ndnl  the __gmpn_cpuvec pointer, at the given \"offset\" (in bytes).\ndnl  __gmpn_cpuvec_init will have stored the address of the selected\ndnl  implementation there.\ndnl\ndnl  Only one of these routines will be executed, and only once, since after\ndnl  that all the __gmpn_cpuvec pointers go to real routines.  So there's no\ndnl  need for anything special here, just something small and simple.  To\ndnl  keep code size down, \"fat_init\" is a shared bit of code, arrived at\ndnl  with the offset in %al.  %al is used since the movb instruction is 2\ndnl  bytes where %eax would be 4.\ndnl\ndnl  Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the\ndnl  HAVE_NATIVE grepping in configure, preventing that code trying to eval\ndnl  something with $1 in it.\n\ndefine(FAT_INIT,\nm4_assert_numargs(2)\n`PROLOGUE($1)\n\tpushq\t%rax\n    pushq   %rbx\n    pushq   %rsi\n    pushq   %rdi\n    pushq   %rdx\n    pushq   %rcx\n    pushq   %r8\n    pushq   %r9\n    pushq   %rbp\n\t\nifdef(`PIC',`\n\tlea\t_GLOBAL_OFFSET_TABLE_(%rip),%rbx\n\tcall\tGSYM_PREFIX`'__gmpn_cpuvec_init@PLT\n\tmovq\tGSYM_PREFIX`'__gmpn_cpuvec@GOT(%rbx), %r11\n\tpopq    %rbp\n    popq    %r9\n    popq    %r8\n    popq    %rcx\n    popq    %rdx\n    popq    %rdi\n    popq    %rsi\n    popq\t%rbx\n    popq\t%rax\n    jmp\t*m4_empty_if_zero($2)(%r11)\n\n',`dnl non-PIC\n\tcall\tGSYM_PREFIX`'__gmpn_cpuvec_init\n\tpopq    %rbp\n    popq    %r9\n    popq    %r8\n    popq    %rcx\n    popq    %rdx\n    popq    %rdi\n    popq    %rsi\n    popq\t%rbx\n\tpopq\t%rax\n\tjmp\t*GSYM_PREFIX`'__gmpn_cpuvec+$2\n')\nEPILOGUE()\n')\n\ndnl  FAT_INIT for each CPUVEC_FUNCS_LIST\ndnl\n\ndefine(`CPUVEC_offset',0)\nforeach(i,\n`FAT_INIT(MPN(i`'_init),CPUVEC_offset)\ndefine(`CPUVEC_offset',eval(CPUVEC_offset + 8))',\nCPUVEC_FUNCS_LIST)\n\n\n\nC long __gmpn_cpuid (char dst[12], int id);\nC\nC This is called only once, so just something simple and compact is fine.\n\ndefframe(PARAM_ID,  8)\ndefframe(PARAM_DST, 4)\ndeflit(`FRAME',0)\n\nPROLOGUE(__gmpn_cpuid)\n\tpushq\t%rbx\t\tFRAME_pushq()\n\tmovq\t%rsi, %rax\n\tcpuid\n\tmovl\t%ebx, (%rdi)\n\tmovl\t%edx, 4(%rdi)\n\tmovl\t%ecx, 8(%rdi)\n\tpopq\t%rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/fat/gcd_1.c",
    "content": "/* Fat binary fallback mpn_gcd_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/gcd_1.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/lshift.c",
    "content": "/* Fat binary fallback mpn_lshift\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/lshift.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/mod_1.c",
    "content": "/* Fat binary fallback mpn_mod_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/mod_1.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/mod_34lsub1.c",
    "content": "/* Fat binary fallback mpn_mod_34lsub1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/mod_34lsub1.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/modexact_1c_odd.c",
    "content": "/* Fat binary fallback mpn_modexact_1c_odd.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/modexact_1c_odd.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/mul_1.c",
    "content": "/* Fat binary fallback mpn_mul_1\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/mul_1.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/mul_basecase.c",
    "content": "/* Fat binary fallback mpn_mul_basecase\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/mul_basecase.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/mulmid_basecase.c",
    "content": "/* Fat binary fallback mpn_divrem_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/mulmid_basecase.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/preinv_divrem_1.c",
    "content": "/* Fat binary fallback mpn_pre_divrem_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/preinv_divrem_1.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/preinv_mod_1.c",
    "content": "/* Fat binary fallback mpn_pre_mod_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/preinv_mod_1.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/redc_1.c",
    "content": "/* Fat binary fallback mpn_redc_1\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/redc_1.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/rshift.c",
    "content": "/* Fat binary fallback mpn_rshift\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/rshift.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/sqr_basecase.c",
    "content": "/* Fat binary fallback mpn_sqr_basecase\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/sqr_basecase.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/sub_err1_n.c",
    "content": "/* Fat binary fallback mpn_divrem_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/sub_err1_n.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/sub_err2_n.c",
    "content": "/* Fat binary fallback mpn_divrem_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/sub_err2_n.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/sub_n.c",
    "content": "/* Fat binary fallback mpn_sub_n\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/sub_n.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/submul_1.c",
    "content": "/* Fat binary fallback mpn_submul_1\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/submul_1.c\"\n"
  },
  {
    "path": "mpn/x86_64/fat/sumdiff_n.c",
    "content": "/* Fat binary fallback mpn_sumdiff_n\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/sumdiff_n.c\"\n"
  },
  {
    "path": "mpn/x86_64/haswell/add_n.as",
    "content": "\n;  AMD64 mpn_add_n, mpn_add_nc\n;  Copyright 2008, 2016 Jason Moxham and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)+(rdx,rcx)\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n    %define SumP    rcx\n    %define Inp1P   rdx\n    %define Inp2P   r8\n    %define Size    r9\n    %define CarryIn\n    %define LIMB1   rax\n    %define LIMB2   r10\n%else\n    %define SumP    rdi\n    %define Inp1P   rsi\n    %define Inp2P   rdx\n    %define Size    rcx\n    %define CarryIn r8\n    %define LIMB1   rax\n    %define LIMB2   r8  ; may reuse CarryIn\n    %define SizeRest r9\n%endif\n\n%define ADCSBB adc\n\n    BITS    64\n\n\talign   16\n   GLOBAL_FUNC mpn_add_nc\n\tmov     SizeRest, Size\n\tand     SizeRest, 7\n\tshr     Size, 3\n        neg\tCarryIn\t\t; Set CF iff CarryIn != 0\n\tinc     Size\n\tdec     Size\t\t; Set ZF without affecting CF\n\tjnz     loop1\n        jmp     therest ;ajs:notshortform\n\n\talign   16\n   GLOBAL_FUNC mpn_add_n\n\tmov     SizeRest, Size\n\tand     SizeRest, 7\n\tshr     Size, 3\n\tcmp     Size, 0\n;\tcarry flag is clear here\n\tjnz     loop1\n        jmp     therest ;ajs:notshortform\n\n\talign   16\nloop1:\n\tmov     LIMB1, [Inp1P]\n\tmov     LIMB2, [Inp1P+8]\n\tADCSBB  LIMB1, [Inp2P]\n\tmov     [SumP], LIMB1\n\tADCSBB  LIMB2, [Inp2P+8]\n\tmov     LIMB1, [Inp1P+16]\n\tmov     [SumP+8], LIMB2\n\tADCSBB  LIMB1, [Inp2P+16]\n\tmov     LIMB2, [Inp1P+24]\n\tmov     [SumP+16], LIMB1\n\tmov     LIMB1, [Inp1P+32]\n\tADCSBB  LIMB2, [Inp2P+24]\n\tmov     [SumP+24], LIMB2\n\tADCSBB  LIMB1, [Inp2P+32]\n\tmov     [SumP+32], LIMB1\n\tmov     LIMB2, [Inp1P+40]\n\tADCSBB  LIMB2, [Inp2P+40]\n\tmov     [SumP+40], LIMB2\n\tmov     LIMB1, [Inp1P+48]\n\tmov     LIMB2, [Inp1P+56]\n\tlea     Inp1P, [Inp1P+64]\n\tADCSBB  LIMB1, [Inp2P+48]\n\tADCSBB  LIMB2, [Inp2P+56]\n\tlea     Inp2P, [Inp2P+64]\n\tmov     [SumP+48], LIMB1\n\tmov     [SumP+56], LIMB2\n\tlea     SumP, [SumP+64]\n\tdec     Size\n\tjnz     loop1\n\tinc     SizeRest\n\tdec     SizeRest\n\tjz      end\ntherest:\n\tmov     LIMB1, [Inp1P]\n\tADCSBB  LIMB1, [Inp2P]\n\tmov     [SumP], LIMB1\n\tdec     SizeRest\n\tjz      end\n\tmov     LIMB1, [Inp1P+8]\n\tADCSBB  LIMB1, [Inp2P+8]\n\tmov     [SumP+8], LIMB1\n\tdec     SizeRest\n\tjz      end\n\tmov     LIMB1, [Inp1P+16]\n\tADCSBB  LIMB1, [Inp2P+16]\n\tmov     [SumP+16], LIMB1\n\tdec     SizeRest\n        jz      end\n\tmov     LIMB1, [Inp1P+24]\n\tADCSBB  LIMB1, [Inp2P+24]\n\tmov     [SumP+24], LIMB1\n\tdec     SizeRest\n        jz      end\n        lea     Inp1P, [Inp1P+32]\n        lea     Inp2P, [Inp2P+32]\n        lea     SumP, [SumP+32]\n        jmp     therest\nend:\n\tmov     eax, 0\n\tadc     eax, eax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/haswell/addlsh1_n.as",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t addlsh1_n(mp_ptr Op3, mp_srcptr Op2, mp_srcptr Op1; mp_size_t Size )\n; Linux     RAX       RDI         RSI            RDX            RCX\n; Win7      RAX       RCX         RDX            R8             R9\n;\n; Description:\n; The function shifts Op1 left one bit, adds this to Op2, stores the result\n; in Op3 and hands back the total carry. There is a gain in execution speed\n; compared to separate shift and add by interleaving the elementary operations\n; and reducing memory access. The factor depends on the size of the operands\n; (the cache hierarchy in which the operands can be handled).\n;\n; Caveats:\n; - for asm the processor MUST support LAHF/SAHF in 64 bit mode!\n; - the total carry is in [0..2]!\n;\n; Comments:\n; - asm version implemented, tested & benched on 16.05.2015 by jn\n; - improved asm version implemented, tested & benched on 30.07.2015 by jn\n; - On Nehalem per limb saving is 1 cycle in LD1$, LD2$ and 1-2 in LD3$\n; - includes LAHF / SAHF\n; - includes prefetching\n; - includes XMM save & restore\n;\n; Linux: (rdi, rcx) = (rsi, rcx) + (rdx, rcx)<<1\n; ============================================================================\n\n%define ADDSUB add\n%define ADCSBB adc\n\n%include \"yasm_mac.inc\"\n\nBITS 64\n\n%ifdef USE_WIN64\n\n    %define Op3     RCX\n    %define Op2     RDX\n    %define Op1     R8\n    %define Size    R9\n  %ifdef USE_PREFETCH\n    %define Offs    RBP         ; SAVE!\n  %endif\n\n    %define Limb0   RBX         ; SAVE!\n    %define Limb1   RDI         ; SAVE!\n    %define Limb2   RSI         ; SAVE!\n    %define Limb3   R10\n    %define Limb4   R11\n    %define Limb5   R12         ; SAVE!\n    %define Limb6   R13         ; SAVE!\n    %define Limb7   R14         ; SAVE!\n    %define Limb8   R15         ; SAVE!\n\n    %define SaveRBX XMM0        ; use available scratch XMM to\n    %define SaveRSI XMM1        ; save as many regs as possible\n    %define SaveRDI XMM2\n    %define SaveR12 XMM3\n    %define SaveR13 XMM4\n    %define SaveR14 XMM5\n\n%else\n\n    %define Op3     RDI\n    %define Op2     RSI\n    %define Op1     RDX\n    %define Size    RCX\n  %ifdef USE_PREFETCH\n    %define Offs    RBP         ; SAVE!\n  %endif\n\n    %define Limb0   RBX         ; SAVE!\n    %define Limb1   R8\n    %define Limb2   R9\n    %define Limb3   R10\n    %define Limb4   R11\n    %define Limb5   R12         ; SAVE!\n    %define Limb6   R13         ; SAVE!\n    %define Limb7   R14         ; SAVE!\n    %define Limb8   R15         ; SAVE!\n\n    %define SaveRBX XMM0        ; use available scratch XMM to save all regs\n    %define SaveR12 XMM1\n    %define SaveR13 XMM2\n    %define SaveR14 XMM3\n    %define SaveR15 XMM4\n  %ifdef USE_PREFETCH\n    %define SaveRBP XMM5\n  %endif\n\n%endif\n\n\n%macro ACCUMULATE 1\n\n    ADCSBB  Limb%1, [Op2 + 8 * %1]\n    mov     [Op3 + 8 * %1], Limb%1\n%endmacro\n\n\n    align   32\n\nGLOBAL_FUNC  mpn_addlsh1_n\n\n%ifdef USE_WIN64\n  %ifdef USE_PREFETCH\n    sub     RSP, 16\n    mov     [RSP+8], R15\n    mov     [RSP], RBP\n  %else\n    sub     RSP, 8\n    mov     [RSP], R15\n  %endif\n    movq    SaveRBX, RBX\n    movq    SaveRSI, RSI\n    movq    SaveRDI, RDI\n    movq    SaveR12, R12\n    movq    SaveR13, R13\n    movq    SaveR14, R14\n%else\n  %ifdef USE_PREFETCH\n    movq    SaveRBP, RBP\n  %endif\n    movq    SaveRBX, RBX\n    movq    SaveR12, R12\n    movq    SaveR13, R13\n    movq    SaveR14, R14\n    movq    SaveR15, R15\n%endif\n\n  %ifdef USE_PREFETCH\n    mov     Offs, PREFETCH_STRIDE   ; Attn: check if redefining Offs\n  %endif\n\n    ; prepare shift & addition with loop-unrolling 8\n    xor     Limb0, Limb0\n    lahf                        ; memorize clear carry (from \"xor\" above)\n\n    test    Size, 1\n    je      .n_two\n\n    mov     Limb1, [Op1]\n    shrd    Limb0, Limb1, 63\n\n\n    ADDSUB  Limb0, [Op2]\n    mov     [Op3], Limb0\n    lahf\n\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    mov     Limb0, Limb1\n\n  .n_two:\n\n    test    Size, 2\n    je      .n_four\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n\n    sahf\n    ACCUMULATE 0\n    ACCUMULATE 1\n    lahf\n\n    add     Op1, 16\n    add     Op2, 16\n    add     Op3, 16\n    mov     Limb0, Limb2\n\n  .n_four:\n\n    test    Size, 4\n    je      .n_test ;ajs:notshortform\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n    mov     Limb3, [Op1+16]\n    mov     Limb4, [Op1+24]\n    shrd    Limb2, Limb3, 63\n    shrd    Limb3, Limb4, 63\n\n    sahf\n    ACCUMULATE 0\n    ACCUMULATE 1\n    ACCUMULATE 2\n    ACCUMULATE 3\n    lahf\n\n    add     Op1, 32\n    add     Op2, 32\n    add     Op3, 32\n    mov     Limb0, Limb4\n    jmp     .n_test ;ajs:notshortform\n\n    ; main loop\n    ; - 2.40-2.50 cycles per limb in L1D$\n    ; - 2.6       cycles per limb in L2D$\n    ; - 2.80-3.30 cycles per limb in L3D$\n    align   16\n  .n_loop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n    prefetchnta [Op2+Offs]\n  %endif\n\n    mov     Limb1, [Op1]        ; prepare shifted oct-limb from Op1\n    mov     Limb2, [Op1+8]\n    mov     Limb3, [Op1+16]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n    shrd    Limb2, Limb3, 63\n    mov     Limb4, [Op1+24]\n    mov     Limb5, [Op1+32]\n    mov     Limb6, [Op1+40]\n    shrd    Limb3, Limb4, 63\n    shrd    Limb4, Limb5, 63\n    shrd    Limb5, Limb6, 63\n    mov     Limb7, [Op1+48]\n    mov     Limb8, [Op1+56]\n    shrd    Limb6, Limb7, 63\n    shrd    Limb7, Limb8, 63\n\n    sahf                        ; restore carry\n    ACCUMULATE 0                ; add Op2 to oct-limb and store in Op3\n    ACCUMULATE 1\n    ACCUMULATE 2\n    ACCUMULATE 3\n    ACCUMULATE 4\n    ACCUMULATE 5\n    ACCUMULATE 6\n    ACCUMULATE 7\n    lahf                        ; remember carry for next round\n\n    add     Op1, 64\n    add     Op2, 64\n    add     Op3, 64\n    mov     Limb0, Limb8\n\n  .n_test:\n\n    sub     Size, 8\n    jnc     .n_loop\n\n    ; housekeeping - hand back total carry\n    shr     Limb0, 63\n    sahf\n    adc     Limb0, 0            ; Limb0=0/1/2 depending on final carry and shift\n    mov     RAX, Limb0\n\n  .Exit:\n\n%ifdef USE_WIN64\n\n    movq    SaveR14, R14\n    movq    SaveR13, R13\n    movq    SaveR12, R12\n    movq    SaveRDI, RDI\n    movq    SaveRSI, RSI\n    movq    SaveRBX, RBX\n  %ifdef USE_PREFETCH\n    mov     [RSP], RBP\n    mov     [RSP+8], R15\n    add     RSP, 16\n  %else\n    mov     [RSP], R15\n    add     RSP, 8\n  %endif\n\n%else\n\n    movq    R15, SaveR15\n    movq    R14, SaveR14\n    movq    R13, SaveR13\n    movq    R12, SaveR12\n    movq    RBX, SaveRBX\n  %ifdef USE_PREFETCH\n    movq    RBP, SaveRBP\n  %endif\n\n%endif\n\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64/haswell/avx/addmul_1.as",
    "content": ";  AMD64 mpn_addmul_1 optimised for Intel Haswell.\n\n;  Contributed to the GNU project by Torbjörn Granlund.\n;  Converted to MPIR by Alexander Kruppa.\n\n;  Copyright 2013 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define RP      rcx\n    %define S1P     rbp\n    %define Size    r8\n    %define Sizeb   r8b\n    %define Limb    r9\n\n    %define Tmp0    r12\n    %define Tmp1    r13\n    %define Tmp2    rax\n    %define Tmp3    rbx\n    %define Tmp4    rsi\n    %define Tmp5    rdi\n    %define Tmp6    r10\n    %define Tmp7    r11\n    %define Tmp8    r9\n%else\n    %define RP      rdi\n    %define S1P     rsi\n    %define Size    rbp\n    %define Sizeb   bpl\n    %define Limb    rcx\n\n    %define Tmp0    r12\n    %define Tmp1    r13\n    %define Tmp2    rax\n    %define Tmp3    rbx\n    %define Tmp4    r8\n    %define Tmp5    r9\n    %define Tmp6    r10\n    %define Tmp7    r11\n    %define Tmp8    rcx\n%endif\n\n%define ADDSUB add\n%define ADCSBB adc\n\nalign 16\n\nGLOBAL_FUNC mpn_addmul_1\n\n\tpush \trbx\n\tpush \trbp\n\tpush \tr12\n\tpush \tr13\n\n\tmov \trbp, rdx ; mulx requires one input in rdx\n\tmov \trdx, Limb\n\n\ttest \tSizeb, 1\n\tjnz \t.Lbx1\n\n.Lbx0:  shr \tSize, 2\n\tjc \t.Lb10 ;ajs:notshortform\n\n.Lb00:\tmulx \tTmp0, Tmp1, [S1P]\n\tmulx \tTmp2, Tmp3, [S1P+8]\n\tadd \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp0, [RP]\n\tmov \tTmp8, [RP+8]\n\tmulx \tTmp4, Tmp5, [S1P+16]\n\tlea \tRP, [RP-16]\n\tlea \tS1P, [S1P+16]\n\tADDSUB \tTmp0, Tmp1\n\tjmp \t.Llo0 ;ajs:notshortform\n\n.Lbx1:\tshr \tSize, 2\n\tjc \t.Lb11\n\n.Lb01:\tmulx \tTmp6, Tmp7, [S1P]\n\tjnz \t.Lgt1\n.Ln1:\tADDSUB \t[RP], Tmp7\n\tmov \teax, 0\n\tadc \tTmp2, Tmp6\n\tjmp \t.Lret ;ajs:notshortform\n\n.Lgt1:\tmulx \tTmp0, Tmp1, [S1P+8]\n\tmulx \tTmp2, Tmp3, [S1P+16]\n\tlea \tS1P, [S1P+24]\n\tadd \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP]\n\tmov \tTmp0, [RP+8]\n\tmov \tTmp8, [RP+16]\n\tlea \tRP, [RP-8]\n\tADDSUB \tTmp6, Tmp7\n\tjmp \t.Llo1\n\n.Lb11:\tmulx \tTmp2, Tmp3, [S1P]\n\tmov \tTmp8, [RP]\n\tmulx \tTmp4, Tmp5, [S1P+8]\n\tlea \tS1P, [S1P+8]\n\tlea \tRP, [RP-24]\n\tinc \tSize\t\n\tADDSUB \tTmp8, Tmp3\n\tjmp \t.Llo3\n\n.Lb10:\tmulx \tTmp4, Tmp5, [S1P]\n\tmulx \tTmp6, Tmp7, [S1P+8]\n\tlea \tRP, [RP-32]\n\tmov \teax, 0\n\tclc\n\tjz \t.Lend ;ajs:notshortform\t\n\n\talign 16\n.Ltop:\tadc \tTmp5, Tmp2\n\tlea \tRP, [RP+32]\n\tadc \tTmp7, Tmp4\n\tmulx \tTmp0, Tmp1, [S1P+16]\n\tmov \tTmp4, [RP]\n\tmulx \tTmp2, Tmp3, [S1P+24]\n\tlea \tS1P, [S1P+32]\n\tadc \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+8]\n\tmov \tTmp0, [RP+16]\n\tADDSUB \tTmp4, Tmp5\n\tmov \tTmp8, [RP+24]\n\tmov \t[RP], Tmp4\n\tADCSBB \tTmp6, Tmp7\n.Llo1:\tmulx \tTmp4, Tmp5, [S1P]\n\tmov \t[RP+8], Tmp6\n\tADCSBB \tTmp0, Tmp1\n.Llo0:\tmov \t[RP+16], Tmp0\n\tADCSBB \tTmp8, Tmp3\n.Llo3:\tmulx \tTmp6, Tmp7, [S1P+8]\n\tmov \t[RP+24], Tmp8\n\tdec \tSize\n\tjnz \t.Ltop\n\n.Lend:\tadc \tTmp5, Tmp2\n\tadc \tTmp7, Tmp4\n\tmov \tTmp4, [RP+32]\n\tmov \tTmp2, Tmp6\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+40]\n\tADDSUB \tTmp4, Tmp5\n\tmov \t[RP+32], Tmp4\n\tADCSBB \tTmp6, Tmp7\n\tmov \t[RP+40], Tmp6\n\tadc \tTmp2, 0\n\n.Lret:\tpop \tr13\n\tpop \tr12\n\tpop \trbp\n\tpop \trbx\n\n\tret\n"
  },
  {
    "path": "mpn/x86_64/haswell/avx/com_n.as",
    "content": "\n;  AMD64 mpn_com_n\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\nBITS 64\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n    %define Op2     RCX\n    %define Op1     RDX\n    %define Size1   R8\n    %define Limb    R9\n    %define Offs    R10\n    %define FFFF    RSI\n%else\n    %define Op2     RDI\n    %define Op1     RSI\n    %define Size1   RDX\n    %define Limb    RCX\n    %define Offs    R10\n    %define FFFF    R8\n%endif\n\n%define DLimb0  XMM0\n%define QLimb0  YMM0\n%define QLimb1  YMM1\n%define QLimb2  YMM2\n%define QLimb3  YMM3\n%define QFFFF   YMM4\n%define DFFFF   XMM4\n\n    align   32\n\nGLOBAL_FUNC mpn_com_n\n    mov     RAX, Size1\n    or      RAX, RAX\n    je      .Exit      ;ajs:notshortform\n                       ; size=0 =>\n\n    ; Set a GPR to 0xFF...FF\n    mov     FFFF, -1\n\n    ; align the destination (Op2) to 32 byte\n    test    Op2, 8\n    je      .A32\n\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit      ;ajs:notshortform\n\n    add     Op1, 8\n    add     Op2, 8\n\n  .A32:\n\n    test    Op2, 16\n    je      .AVX\n\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit       ;ajs:notshortform\n\n    mov     Limb, [Op1+8]\n    xor     Limb, FFFF\n    mov     [Op2+8], Limb\n    dec     Size1\n    je      .Exit       ;ajs:notshortform\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .AVX:\n\n    ; Set an AVX2 reg to 0xFF...FF\n    movq    DFFFF, FFFF\n    vbroadcastsd QFFFF, DFFFF\n\n    mov     Offs, 128\n    jmp     .AVXCheck\n\n    ; main loop (prefetching disabled; unloaded cache)\n    ; - lCpyInc is slightly slower than lCpyDec through all cache levels?!\n    ; - 0.30      cycles / limb in L1$\n    ; - 0.60      cycles / limb in L2$\n    ; - 0.70-0.90 cycles / limb in L3$\n    align   16\n  .AVXLoop:\n\n    vmovdqu QLimb0, [Op1]\n    vpxor   QLimb0, QLimb0, QFFFF\n    vmovdqu QLimb1, [Op1+32]\n    vpxor   QLimb1, QLimb1, QFFFF\n    vmovdqu QLimb2, [Op1+64]\n    vpxor   QLimb2, QLimb2, QFFFF\n    vmovdqu QLimb3, [Op1+96]\n    vpxor   QLimb3, QLimb3, QFFFF\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n    vmovdqa [Op2+64], QLimb2\n    vmovdqa [Op2+96], QLimb3\n\n    add     Op1, Offs\n    add     Op2, Offs\n\n  .AVXCheck:\n\n    sub     Size1, 16\n    jnc     .AVXLoop\n\n    add     Size1, 16\n    je      .Exit ;ajs:notshortform\n                  ; AVX copied operand fully =>\n\n    ; copy remaining max. 15 limb\n    test    Size1, 8\n    je      .Four\n\n    vmovdqu QLimb0, [Op1]\n    vpxor   QLimb0, QLimb0, QFFFF\n    vmovdqu QLimb1, [Op1+32]\n    vpxor   QLimb1, QLimb1, QFFFF\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .Four:\n\n    test    Size1, 4\n    je      .Two\n\n    vmovdqu QLimb0, [Op1]\n    vpxor   QLimb0, QLimb0, QFFFF\n    vmovdqa [Op2], QLimb0\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .Two:\n\n    test    Size1, 2\n    je      .One\n\n%if 1\n    ; Avoid SSE2 instruction due to stall on Haswell\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n    mov     Limb, [Op1+8]\n    xor     Limb, FFFF\n    mov     [Op2+8], Limb\n%else\n    movdqu  DLimb0, [Op1]\n    pxor    DLimb0, DLimb0, DFFFF\n    movdqa  [Op2], DLimb0\n%endif\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .One:\n\n    test    Size1, 1\n    je      .Exit\n\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64/haswell/avx/copyd.as",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n\n; mpn_copyd(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1)\n; Linux     RDI         RSI            RDX\n; Win7      RCX         RDX            R8\n;\n; Description:\n; The function copies a given number of limb from source to destination (while\n; moving high to low in memory) and hands back the size (in limb) of the\n; destination.\n;\n; Result:\n; - Op2[ 0..size-1 ] = Op1[ 0..size-1 ]\n; - number of copied limb: range [ 0..max tCounter ]\n;\n; Caveats:\n; - if size 0 is given the content of the destination will remain untouched!\n; - if Op1=Op2 no copy is done!\n;\n; Comments:\n; - AVX-based version implemented, tested & benched on 05.01.2016 by jn\n; - did some experiments with AVX based version with following results\n;   - AVX can be faster in L1$-L3$ if destination is aligned on 32 byte\n;   - AVX is generally faster on small sized operands (<=100 limb) due too\n;     start-up overhead of \"rep movsq\" - however this could also be achieved by\n;     simple copy loop\n;   - startup overhead of \"rep movsq\" with negative direction is 200 cycles!!!\n;   - negative direction is unfavourable compared to positive \"rep movsq\" and\n;     to AVX.\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2     RCX\n    %define Op1     RDX\n    %define Size1   R8\n    %define Limb    R9\n    %define Offs    R10\n%else\n    %define Op2     RDI\n    %define Op1     RSI\n    %define Size1   RDX\n    %define Limb    RCX\n    %define Offs    R10\n%endif\n\n%define DLimb0  XMM0\n%define QLimb0  YMM0\n%define QLimb1  YMM1\n%define QLimb2  YMM2\n%define QLimb3  YMM3\n\n    align   32\n\nGLOBAL_FUNC mpn_copyd\n    mov     RAX, Size1\n    cmp     Op1, Op2\n    je      .Exit               ; no copy required =>\n\n    or      RAX, RAX\n    je      .Exit               ; Size=0 =>\n\n    lea     Op1, [Op1+8*Size1-8]\n    lea     Op2, [Op2+8*Size1-8]\n\n    ; align the destination (Op2) to 32 byte\n    test    Op2, 8\n    jne     .lCpyDecA32\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    sub     Op1, 8\n    sub     Op2, 8\n\n  .lCpyDecA32:\n\n    test    Op2, 16\n    jnz     .lCpyDecAVX\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    mov     Limb, [Op1-8]\n    mov     [Op2-8], Limb\n    dec     Size1\n    je      .Exit\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lCpyDecAVX:\n\n    mov     Offs, 128\n    jmp     .lCpyDecAVXCheck\n\n    ; main loop (prefetching disabled; unloaded cache)\n    ; - 0.30      cycles / limb in L1$\n    ; - 0.60      cycles / limb in L2$\n    ; - 0.70-0.90 cycles / limb in L3$\n    align   16\n  .lCpyDecAVXLoop:\n\n    vmovdqu QLimb0, [Op1-24]\n    vmovdqu QLimb1, [Op1-56]\n    vmovdqu QLimb2, [Op1-88]\n    vmovdqu QLimb3, [Op1-120]\n    vmovdqa [Op2-24], QLimb0\n    vmovdqa [Op2-56], QLimb1\n    vmovdqa [Op2-88], QLimb2\n    vmovdqa [Op2-120], QLimb3\n\n    sub     Op1, Offs\n    sub     Op2, Offs\n\n  .lCpyDecAVXCheck:\n\n    sub     Size1, 16\n    jnc     .lCpyDecAVXLoop\n\n    add     Size1, 16\n    je      .Exit               ; AVX copied operand fully =>\n\n    ; copy remaining max. 15 limb\n    test    Size1, 8\n    je      .lCpyDecFour\n\n    vmovdqu QLimb0, [Op1-24]\n    vmovdqu QLimb1, [Op1-56]\n    vmovdqa [Op2-24], QLimb0\n    vmovdqa [Op2-56], QLimb1\n\n    sub     Op1, 64\n    sub     Op2, 64\n\n  .lCpyDecFour:\n\n    test    Size1, 4\n    je      .lCpyDecTwo\n\n    vmovdqu QLimb0, [Op1-24]\n    vmovdqa [Op2-24], QLimb0\n\n    sub     Op1, 32\n    sub     Op2, 32\n\n  .lCpyDecTwo:\n\n    test    Size1, 2\n    je      .lCpyDecOne\n\n%if 1\n    ; Avoid SSE2 instruction due to stall on Haswell\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    mov     Limb, [Op1-8]\n    mov     [Op2-8], Limb\n%else\n    movdqu  DLimb0, [Op1-8]\n    movdqa  [Op2-8], DLimb0\n%endif\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lCpyDecOne:\n\n    test    Size1, 1\n    je      .Exit\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64/haswell/avx/copyi.as",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mpn_copyi(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1)\n; Linux     RDI         RSI            RDX\n; Win7      RCX         RDX            R8\n;\n; Description:\n; The function copies a given number of limb from source to destination (while\n; moving low to high in memory) and hands back the size (in limb) of the\n; destination.\n;\n; Result:\n; - Op2[ 0..size-1 ] = Op1[ 0..size-1 ]\n; - number of copied limb: range [ 0..max tCounter ]\n;\n; Caveats:\n; - if size 0 is given the content of the destination will remain untouched!\n; - if Op1=Op2 no copy is done!\n;\n; Comments:\n; - AVX-based version implemented, tested & benched on 05.01.2016 by jn\n; - did some experiments with AVX based version with following results\n;   - AVX can be faster in L1$ (30%), L2$ (10%) if dest. is aligned on 32 byte\n;   - AVX is generally faster on small sized operands (<=100 limb) due too\n;     start-up overhead of \"rep movsq\" - however this could also be achieved by\n;     simple copy loop\n;   - the break-even between AVX and \"rep movsq\" is around 10,000 limb\n; - the prologue & epilogue can still be optimized!\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2     RCX\n    %define Op1     RDX\n    %define Size1   R8\n    %define Limb    R9\n    %define Offs    R10\n%else\n    %define Op2     RDI\n    %define Op1     RSI\n    %define Size1   RDX\n    %define Limb    RCX\n    %define Offs    R10\n%endif\n\n%define DLimb0  XMM0\n%define QLimb0  YMM0\n%define QLimb1  YMM1\n%define QLimb2  YMM2\n%define QLimb3  YMM3\n\n    align   32\n\nGLOBAL_FUNC mpn_copyi\n    mov     RAX, Size1\n    cmp     Op1, Op2\n    je      .Exit               ; no copy required =>\n\n    or      RAX, RAX\n    je      .Exit               ; size=0 =>\n\n    ; align the destination (Op2) to 32 byte\n    test    Op2, 8\n    je      .lCpyIncA32\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    add     Op1, 8\n    add     Op2, 8\n\n  .lCpyIncA32:\n\n    test    Op2, 16\n    je      .lCpyIncAVX\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    mov     Limb, [Op1+8]\n    mov     [Op2+8], Limb\n    dec     Size1\n    je      .Exit\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lCpyIncAVX:\n\n    mov     Offs, 128\n    jmp     .lCpyIncAVXCheck\n\n    ; main loop (prefetching disabled; unloaded cache)\n    ; - lCpyInc is slightly slower than lCpyDec through all cache levels?!\n    ; - 0.30      cycles / limb in L1$\n    ; - 0.60      cycles / limb in L2$\n    ; - 0.70-0.90 cycles / limb in L3$\n    align   16\n  .lCpyIncAVXLoop:\n\n    vmovdqu QLimb0, [Op1]\n    vmovdqu QLimb1, [Op1+32]\n    vmovdqu QLimb2, [Op1+64]\n    vmovdqu QLimb3, [Op1+96]\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n    vmovdqa [Op2+64], QLimb2\n    vmovdqa [Op2+96], QLimb3\n\n    add     Op1, Offs\n    add     Op2, Offs\n\n  .lCpyIncAVXCheck:\n\n    sub     Size1, 16\n    jnc     .lCpyIncAVXLoop\n\n    add     Size1, 16\n    je      .Exit               ; AVX copied operand fully =>\n\n    ; copy remaining max. 15 limb\n    test    Size1, 8\n    je      .lCpyIncFour\n\n    vmovdqu QLimb0, [Op1]\n    vmovdqu QLimb1, [Op1+32]\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .lCpyIncFour:\n\n    test    Size1, 4\n    je      .lCpyIncTwo\n\n    vmovdqu QLimb0, [Op1]\n    vmovdqa [Op2], QLimb0\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .lCpyIncTwo:\n\n    test    Size1, 2\n    je      .lCpyIncOne\n\n%if 1\n    ; Avoid SSE2 instruction due to stall on Haswell\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    mov     Limb, [Op1+8]\n    mov     [Op2+8], Limb\n%else\n    movdqu  DLimb0, [Op1]\n    movdqa  [Op2], DLimb0\n%endif\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lCpyIncOne:\n\n    test    Size1, 1\n    je      .Exit\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64/haswell/avx/lshift.as",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_lshift(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1, unsigned int Shift)\n; Linux     RAX        RDI         RSI            RDX              RCX\n; Win7      RAX        RCX         RDX            R8               R9\n;\n; Description:\n; The function shifts Op1 left by n bit, stores the result in Op2 (non-\n; destructive shl) and hands back the shifted-out most significant bits of Op1.\n; The function operates decreasing in memory supporting in-place operation.\n;\n; Result:\n; - Op2[ Size1-1..0 ] := ( Op1[ Size1-1..0 ]:ShlIn ) << 1\n; - Op1[ 0 ] >> 63\n;\n; Caveats:\n; - caller must ensure that Shift is in [ 1..63 ]!\n; - currently Linux64 support only!\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - implemented, tested and benched on 31.03.2016 by jn\n; - includes prefetching\n; ============================================================================\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2         RCX\n    %define Op1         RDX\n    %define Size1       R8\n    %define Shift       R9\n    %define Limb1       R10\n    %define Limb2       R11\n  %ifdef USE_PREFETCH\n    %define Offs        -512    ; No caller-saves regs left, use immediate\n  %endif\n%else\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Shift       RCX\n    %define Limb1       R8\n    %define Limb2       R9\n  %ifdef USE_PREFETCH\n    %define OFFS_REG 1\n    %define Offs        R10\n  %endif\n%endif\n\n%define ShlDL0      XMM2    ; Attn: this must match ShlQL0 definition\n%define ShrDL0      XMM3    ; Attn: this must match ShrQL0 definition\n%define ShlDLCnt    XMM6    ; Attn: this must match ShlQlCnt definition\n%define ShrDLCnt    XMM7    ; Attn: this must match ShrQlCnt definition\n\n%define QLimb0      YMM0\n%define QLimb1      YMM1\n%define ShlQL0      YMM2\n%define ShrQL0      YMM3\n%define ShlQL1      YMM4\n%define ShrQL1      YMM5\n%define ShlQLCnt    YMM6\n%define ShrQLCnt    YMM7\n\n    align   32\nGLOBAL_FUNC mpn_lshift\n\n    xor     EAX, EAX\n    sub     Size1, 1\n    jc      .Exit               ; Size1=0 =>\n\n    lea     Op1, [Op1+8*Size1]\n    lea     Op2, [Op2+8*Size1]\n\n    mov     Limb1, [Op1]\n    shld    RAX, Limb1, CL\n\n    or      Size1, Size1\n    je      .lShlEquPost        ; Size1=1 =>\n\n  %ifdef USE_PREFETCH\n  %ifdef OFFS_REG\n    mov     Offs, -512\n  %endif\n  %endif\n\n    cmp     Size1, 8\n    jc      .lShlEquFour        ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    jne     .lShlEquA16\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op1, 8\n    sub     Op2, 8\n    sub     Size1, 1\n\n  .lShlEquA16:\n\n    test    Op2, 16\n    jne     .lShlEquAVX\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-16]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n    sub     Size1, 2\n\n  .lShlEquAVX:\n\n    ; initialize AVX shift counter\n    vmovq   ShlDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vmovq   ShrDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vpbroadcastq ShlQLCnt, ShlDLCnt\n    vpbroadcastq ShrQLCnt, ShrDLCnt\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1-24]\n    vpsrlvq ShrQL0, QLimb0, ShrQLCnt\n    vpermq  ShrQL0, ShrQL0, 10010011b\n\n    sub     Op1, 32\n    sub     Size1, 4\n    jmp     .lShlEquAVXCheck\n\n    ; main loop (prefetching enabled; unloaded cache)\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.70 cycles per limb in LD2$\n    ; - 0.70-0.90 cycles per limb in LD3$\n    align   16\n  .lShlEquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1-24]\n    vpsllvq   ShlQL0, QLimb0, ShlQLCnt\n    vmovdqu   QLimb0, [Op1-56]\n    vpsrlvq   ShrQL1, QLimb1, ShrQLCnt\n    vpermq    ShrQL1, ShrQL1, 10010011b\n    vpblendd  ShrQL0, ShrQL0, ShrQL1, 00000011b\n    vpor      ShlQL0, ShlQL0, ShrQL0\n    vpsllvq   ShlQL1, QLimb1, ShlQLCnt\n    vpsrlvq   ShrQL0, QLimb0, ShrQLCnt\n    vpermq    ShrQL0, ShrQL0, 10010011b\n    vpblendd  ShrQL1, ShrQL1, ShrQL0, 00000011b\n    vmovdqa   [Op2-24], ShlQL0\n    vpor      ShlQL1, ShlQL1, ShrQL1\n    vmovdqa   [Op2-56], ShlQL1\n\n    sub     Op1, 64\n    sub     Op2, 64\n\n  .lShlEquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShlEquAVXLoop\n\n    mov     Limb1, [Op1]\n    xor     Limb2, Limb2\n    shld    Limb2, Limb1, CL\n%if 1\n    vmovq   ShlDL0, Limb2\n    vpblendd ShrQL0, ShrQL0, ShlQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n    pinsrq  ShrDL0, Limb2, 0        ; SSE4.1\n%endif\n    vpsllvq ShlQL0, QLimb0, ShlQLCnt\n    vpor    ShlQL0, ShlQL0, ShrQL0\n    vmovdqa [Op2-24], ShlQL0\n\n    sub     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHLD mnemonic\n  .lShlEquFour:\n\n    sub     Op1, 8\n    test    Size1, 4\n    je      .lShlEquTwo\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-8], Limb2\n    mov     Limb2, [Op1-16]\n    shld    Limb1, Limb2, CL\n    mov     [Op2-16], Limb1\n    mov     Limb1, [Op1-24]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-24], Limb2\n\n    sub     Op1, 32\n    sub     Op2, 32\n\n  .lShlEquTwo:\n\n    test    Size1, 2\n    je      .lShlEquOne\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lShlEquOne:\n\n    test    Size1, 1\n    je      .lShlEquPost\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op2, 8\n\n  .lShlEquPost:\n\n    shl    Limb1, CL\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64/haswell/avx/lshift1.as",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_lshift1(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1 )\n; Linux     RAX         RDI         RSI            RDX\n; Win7      RAX         RCX         RDX            R8\n;\n; Description:\n; The function shifts Op1 left by one bit, stores the result in Op2 (non-\n; destructive shl) and hands back the shifted-out most significant bit of Op1.\n; The function operates decreasing in memory supporting in-place operation.\n;\n; Caveats:\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - implemented, tested and benched on 21.02.2016 by jn\n; - includes cache prefetching\n\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n\n    %define Op2         RCX\n    %define Op1         RDX\n    %define Size1       R8\n    %define Limb1       R9\n    %define Limb2       R10\n    %define Offs        -512    ; used direct def. to stay in Win scratch regs\n\n    %define ShlDL0      XMM2    ; ATTN: this must match ShlQL0 definition\n    %define ShrDL0      XMM3    ; ATTN: this must match ShrQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShlQL0      YMM2\n    %define ShrQL0      YMM3\n    %define ShlQL1      YMM4\n    %define ShrQL1      YMM5\n\n%else\n\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Limb1       R8\n    %define Limb2       R9\n    %define Offs        -512    ; used direct def. to stay in Win scratch regs\n\n    %define ShlDL0      XMM2    ; ATTN: this must match ShlQL0 definition\n    %define ShrDL0      XMM3    ; ATTN: this must match ShrQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShlQL0      YMM2\n    %define ShrQL0      YMM3\n    %define ShlQL1      YMM4\n    %define ShrQL1      YMM5\n\n%endif\n\n    align   32\n\nGLOBAL_FUNC mpn_lshift1\n\n    xor     EAX, EAX\n    sub      Size1, 1\n    jc      .Exit               ;ajs:notshortform ; Size1=0 =>\n\n    lea     Op1, [Op1+8*Size1]\n    lea     Op2, [Op2+8*Size1]\n\n    mov     Limb1, [Op1]\n    shld    RAX, Limb1, 1\n\n    or      Size1, Size1\n    je      .lShl1EquPost       ;ajs:notshortform ; Size1=1 =>\n\n    cmp     Size1, 8\n    jc      .lShl1EquFour       ;ajs:notshortform ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    jne     .lShl1EquA16\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op1, 8\n    sub     Op2, 8\n    sub     Size1, 1\n\n  .lShl1EquA16:\n\n    test    Op2, 16\n    jne     .lShl1EquAVX\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-16]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n    sub     Size1, 2\n\n  .lShl1EquAVX:\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1-24]\n    vpsrlq  ShrQL0, QLimb0, 63\n    vpermq  ShrQL0, ShrQL0, 147\t\t; 0b10010011\n\n    sub     Op1, 32\n    sub     Size1, 4\n    jmp     .lShl1EquAVXCheck\n\n    ; main loop requires on entry:\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.75 cycles per limb in LD2$\n    ; - 0.75-1.00 cycles per limb in LD3$\n    align   16\n  .lShl1EquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1-24]\n    vpsllq    ShlQL0, QLimb0, 1\n    vmovdqu   QLimb0, [Op1-56]\n    vpsrlq    ShrQL1, QLimb1, 63\n    vpermq    ShrQL1, ShrQL1, 147\t; 0b10010011\n    vpblendd  ShrQL0, ShrQL0, ShrQL1, 3\t; 0b00000011\n    vpor      ShlQL0, ShlQL0, ShrQL0\n    vpsllq    ShlQL1, QLimb1, 1\n    vpsrlq    ShrQL0, QLimb0, 63\n    vpermq    ShrQL0, ShrQL0, 147\t; 0b10010011\n    vpblendd  ShrQL1, ShrQL1, ShrQL0, 3\t; 0b00000011\n    vmovdqa   [Op2-24], ShlQL0\n    vpor      ShlQL1, ShlQL1, ShrQL1\n    vmovdqa   [Op2-56], ShlQL1\n\n    sub     Op1, 64\n    sub     Op2, 64\n\n  .lShl1EquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShl1EquAVXLoop\n\n    mov     Limb2, [Op1]\n    mov     Limb1, Limb2\n    shr     Limb2, 63\n%if 1\n    vmovq ShlDL0, Limb2\n    vpblendd ShrQL0, ShrQL0, ShlQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n\n    ; Insert value of Limb2 into the 0-th qword of ShrDL0\n    pinsrq  ShrDL0, Limb2, 0        ; SSE4.1\n%endif\n    vpsllq  ShlQL0, QLimb0, 1\n    vpor    ShlQL0, ShlQL0, ShrQL0\n    vmovdqa [Op2-24], ShlQL0\n\n    sub     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHLD mnemonic\n  .lShl1EquFour:\n\n    sub     Op1, 8\n    test    Size1, 4\n    je      .lShl1EquTwo\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-8], Limb2\n    mov     Limb2, [Op1-16]\n    shld    Limb1, Limb2, 1\n    mov     [Op2-16], Limb1\n    mov     Limb1, [Op1-24]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-24], Limb2\n\n    sub     Op1, 32\n    sub     Op2, 32\n\n  .lShl1EquTwo:\n\n    test    Size1, 2\n    je      .lShl1EquOne\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lShl1EquOne:\n\n    test    Size1, 1\n    je      .lShl1EquPost\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op2, 8\n\n  .lShl1EquPost:\n\n    shl     Limb1, 1\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64/haswell/avx/mul_1.asm",
    "content": "dnl  AMD64 mpn_mul_1 using mulx optimised for Intel Haswell.\n\ndnl  Contributed to the GNU project by Torbjörn Granlund.\n\ndnl  Copyright 2012, 2013 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of either:\ndnl\ndnl    * the GNU Lesser General Public License as published by the Free\ndnl      Software Foundation; either version 3 of the License, or (at your\ndnl      option) any later version.\ndnl\ndnl  or\ndnl\ndnl    * the GNU General Public License as published by the Free Software\ndnl      Foundation; either version 2 of the License, or (at your option) any\ndnl      later version.\ndnl\ndnl  or both in parallel, as here.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\ndnl  for more details.\ndnl\ndnl  You should have received copies of the GNU General Public License and the\ndnl  GNU Lesser General Public License along with the GNU MP Library.  If not,\ndnl  see https://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC\t     cycles/limb\tbest\nC AMD K8,K9\tn/a\nC AMD K10\tn/a\nC AMD bd1\tn/a\nC AMD bd2\t ?\nC AMD bobcat\tn/a\nC AMD jaguar\t ?\nC Intel P4\tn/a\nC Intel PNR\tn/a\nC Intel NHM\tn/a\nC Intel SBR\tn/a\nC Intel IBR\tn/a\nC Intel HWL\t 1.57\t\tthis\nC Intel BWL\t ?\nC Intel atom\tn/a\nC VIA nano\tn/a\n\nC The loop of this code is the result of running a code generation and\nC optimisation tool suite written by David Harvey and Torbjorn Granlund.\n\ndefine(`rp',      `%rdi')   C rcx\ndefine(`up',      `%rsi')   C rdx\ndefine(`n_param', `%rdx')   C r8\ndefine(`v0_param',`%rcx')   C r9\n\ndefine(`n',       `%rbp')\ndefine(`v0',      `%rdx')\n\n\n\n\nASM_START()\n\tTEXT\n\tALIGN(32)\nPROLOGUE(mpn_mul_1)\n\n\tpush\t%rbx\n\tpush\t%rbp\n\tpush\t%r12\n\n\tmov\tn_param, n\n\tshr\t$2, n\n\n\ttest\t$1, R8(n_param)\n\tjnz\tL(bx1)\n\nL(bx0):\ttest\t$2, R8(n_param)\n\tmov\tv0_param, v0\n\tjnz\tL(b10)\n\nL(b00):\tmulx\t(up), %r9, %r8\n\tmulx\t8(up), %r11, %r10\n\tmulx\t16(up), %rcx, %r12\n\tlea\t-32(rp), rp\n\tjmp\tL(lo0)\n\nL(b10):\tmulx\t(up), %rcx, %r12\n\tmulx\t8(up), %rbx, %rax\n\tlea\t-16(rp), rp\n\ttest\tn, n\n\tjz\tL(cj2)\n\tmulx\t16(up), %r9, %r8\n\tlea\t16(up), up\n\tjmp\tL(lo2)\n\nL(bx1):\ttest\t$2, R8(n_param)\n\tmov\tv0_param, v0\n\tjnz\tL(b11)\n\nL(b01):\tmulx\t(up), %rbx, %rax\n\tlea\t-24(rp), rp\n\ttest\tn, n\n\tjz\tL(cj1)\n\tmulx\t8(up), %r9, %r8\n\tlea\t8(up), up\n\tjmp\tL(lo1)\n\nL(b11):\tmulx\t(up), %r11, %r10\n\tmulx\t8(up), %rcx, %r12\n\tmulx\t16(up), %rbx, %rax\n\tlea\t-8(rp), rp\n\ttest\tn, n\n\tjz\tL(cj3)\n\tlea\t24(up), up\n\tjmp\tL(lo3)\n\n\tALIGN(32)\nL(top):\tlea\t32(rp), rp\n\tmov\t%r9, (rp)\n\tadc\t%r8, %r11\nL(lo3):\tmulx\t(up), %r9, %r8\n\tmov\t%r11, 8(rp)\n\tadc\t%r10, %rcx\nL(lo2):\tmov\t%rcx, 16(rp)\n\tadc\t%r12, %rbx\nL(lo1):\tmulx\t8(up), %r11, %r10\n\tadc\t%rax, %r9\n\tmulx\t16(up), %rcx, %r12\n\tmov\t%rbx, 24(rp)\nL(lo0):\tmulx\t24(up), %rbx, %rax\n\tlea\t32(up), up\n\tdec\tn\n\tjnz\tL(top)\n\nL(end):\tlea\t32(rp), rp\n\tmov\t%r9, (rp)\n\tadc\t%r8, %r11\nL(cj3):\tmov\t%r11, 8(rp)\n\tadc\t%r10, %rcx\nL(cj2):\tmov\t%rcx, 16(rp)\n\tadc\t%r12, %rbx\nL(cj1):\tmov\t%rbx, 24(rp)\n\tadc\t$0, %rax\n\n\tpop\t%r12\n\tpop\t%rbp\n\tpop\t%rbx\n\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/haswell/avx/mul_basecase.as",
    "content": ";  AMD64 mpn_mul_basecase optimised for Intel Haswell.\n\n;  Contributed to the GNU project by Torbjörn Granlund.\n\n;  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n%else\n%define rp       rdi\n%define up       rsi\n%define un_param rdx\n%define vp       rcx\n\n%define un       rbx\n%define un8      bl\n\n%define w0       r10\n%define w1       r11\n%define w2       r12\n%define w3       r13\n%define n        rbp\n%endif\n\nBITS    64\n\nalign 16\n\nGLOBAL_FUNC mpn_mul_basecase\n\tpush \trbx\n\tpush \trbp\n\tpush \tr12\n\tpush \tr13\n\tpush \tr14\n\tmov \tun, rdx\n\tneg \tun\n\n\tmov \tn, rdx\n\tsar \tn, 2\n\n\ttest \tr8b, 1\n\tjz \t.Ldo_mul_2\n\tmov \trdx, [vp]\n\n.Ldo_mul_1:\n\ttest \tun8, 1\n\tjnz \t.Lm1x1\n\n.Lm1x0:\n\ttest \tun8, 2\n\tjnz \t.Lm110\n\n.Lm100:\n\tmulx \tw2, r14, [up]\n\tmulx \tw3, w1, [up+8]\n\tlea \trp, [rp-24]\n\tjmp \t.Lm1l0\n\n.Lm110:\n\tmulx \tr9, w3, [up]\n\tmulx \tr14, w1, [up+8]\n\tlea \trp, [rp-8]\n\ttest \tn, n\n\tjz \t.Lcj2\n\tmulx \tw2, w0, [up+16]\n\tlea \tup, [up+16]\n\tjmp \t.Lm1l2\n\n.Lm1x1:\n\ttest \tun8, 2\n\tjz \t.Lm111\n\n.Lm101:\n\tmulx \tr14, r9, [up]\n\tlea \trp, [rp-16]\n\ttest \tn, n\n\tjz \t.Lcj1\n\tmulx \tw2, w0, [up+8]\n\tlea \tup, [up+8]\n\tjmp \t.Lm1l1\n\n.Lm111:\n\tmulx \tw3, w2, [up]\n\tmulx \tr9, w0, [up+8]\n\tmulx \tr14, w1, [up+16]\n\tlea \tup, [up+24]\n\ttest \tn, n\n\tjnz \t.Lgt3\n\tadd \tw3, w0\n\tjmp \t.Lcj3\n\n.Lgt3:\n\tadd \tw3, w0\n\tjmp \t.Lm1l3\n\n\n\talign 32\n.Lm1tp:\n\tlea \trp, [rp+32]\n.Lm1l3:\n\tmov \t[rp], w2\n\tmulx \tw2, w0, [up]\n.Lm1l2:\n\tmov \t[rp+8], w3\n\tadc \tr9, w1\n.Lm1l1:\n\tadc \tr14, w0\n\tmov \t[rp+16], r9\n\tmulx \tw3, w1, [up+8]\n.Lm1l0:\n\tmov \t[rp+24], r14\n\tmulx \tr9, w0, [up+16]\n\tadc \tw2, w1\n\tmulx \tr14, w1, [up+24]\n\tadc \tw3, w0\n\tlea \tup, [up+32]\n\tdec \tn\n\tjnz \t.Lm1tp\n\n.Lm1ed:\n\tlea \trp, [rp+32]\n.Lcj3:\n\tmov \t[rp], w2\n.Lcj2:\n\tmov \t[rp+8], w3\n\tadc \tr9, w1\n.Lcj1:\n\tmov \t[rp+16], r9\n\tadc \tr14, 0\n\tmov \t[rp+24], r14\n\n\tdec \tr8d\n\tjz \t.Lret5\n\n\tlea \tvp, [vp+8]\n\tlea \trp, [rp+32]\n\tjmp \t.Ldo_addmul\n\n.Ldo_mul_2:\n\tmov \tr9, [vp]\n\tmov \tr14, [vp+8]\n\tlea \tn, [un]\n\tsar \tn, 2\n\ttest \tun8, 1\n\tjnz \t.Lm2x1\n\n.Lm2x0:\n\txor \tw0, w0\n\ttest \tun8, 2\n\tmov \trdx, [up]\n\tmulx \tw1, w2, r9\n\tjz \t.Lm2l0\n\n.Lm210:\n\tlea \trp, [rp-16]\n\tlea \tup, [up-16]\n\tjmp \t.Lm2l2\n\n.Lm2x1:\n\txor \tw2, w2\n\ttest \tun8, 2\n\tmov \trdx, [up]\n\tmulx \tw3, w0, r9\n\tjz \t.Lm211\n\n.Lm201:\n\tlea \trp, [rp-24]\n\tlea \tup, [up+8]\n\tjmp \t.Lm2l1\n\n.Lm211:\n\tlea \trp, [rp-8]\n\tlea \tup, [up-8]\n\tjmp \t.Lm2l3\n\n\n\talign 16\n.Lm2tp:\n\tmulx \tw0, rax, r14\n\tadd \tw2, rax\n\tmov \trdx, [up]\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n\tadd \tw2, rax\n\tadc \tw1, 0\n\tadd \tw2, w3\n.Lm2l0:\n\tmov \t[rp], w2\n\tadc \tw1, 0\n\tmulx \tw2, rax, r14\n\tadd \tw0, rax\n\tmov \trdx, [up+8]\n\tadc \tw2, 0\n\tmulx \tw3, rax, r9\n\tadd \tw0, rax\n\tadc \tw3, 0\n\tadd \tw0, w1\n.Lm2l3:\n\tmov \t[rp+8], w0\n\tadc \tw3, 0\n\tmulx \tw0, rax, r14\n\tadd \tw2, rax\n\tmov \trdx, [up+16]\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n\tadd \tw2, rax\n\tadc \tw1, 0\n\tadd \tw2, w3\n.Lm2l2:\n\tmov \t[rp+16], w2\n\tadc \tw1, 0\n\tmulx \tw2, rax, r14\n\tadd \tw0, rax\n\tmov \trdx, [up+24]\n\tadc \tw2, 0\n\tmulx \tw3, rax, r9\n\tadd \tw0, rax\n\tadc \tw3, 0\n\tadd \tw0, w1\n\tlea \tup, [up+32]\n.Lm2l1:\n\tmov \t[rp+24], w0\n\tadc \tw3, 0\n\tinc \tn\n\tlea \trp, [rp+32]\n\tjnz \t.Lm2tp\n\n.Lm2ed:\n\tmulx \trax, rdx, r14\n\tadd \tw2, rdx\n\tadc \trax, 0\n\tadd \tw2, w3\n\tmov \t[rp], w2\n\tadc \trax, 0\n\tmov \t[rp+8], rax\n\tadd \tr8d, -2\n\tjz \t.Lret5\n\tlea \tvp, [vp+16]\n\tlea \trp, [rp+16]\n\n.Ldo_addmul:\n\tpush \tr15\n\tpush \tr8\n\tlea \trp, [un*8+rp]\n\tlea \tup, [un*8+up]\n\n.Louter:\n\tmov \tr9, [vp]\n\tmov \tr8, [vp+8]\n\tlea \tn, [un+2]\n\tsar \tn, 2\n\tmov \trdx, [up]\n\ttest \tun8, 1\n\tjnz \t.Lbx1\n\n.Lbx0:\n\tmov \tr14, [rp]\n\tmov \tr15, [rp+8]\n\tmulx \tw1, rax, r9\n\tadd \tr14, rax\n\tmulx \tw2, rax, r8\n\tadc \tw1, 0\n\tmov \t[rp], r14\n\tadd \tr15, rax\n\tadc \tw2, 0\n\tmov \trdx, [up+8]\n\ttest \tun8, 2\n\tjnz \t.Lb10\n\n.Lb00:\n\tlea \tup, [up+16]\n\tlea \trp, [rp+16]\n\tjmp \t.Llo0\n\n.Lb10:\n\tmov \tr14, [rp+16]\n\tlea \tup, [up+32]\n\tmulx \tw3, rax, r9\n\tjmp \t.Llo2\n\n.Lbx1:\n\tmov \tr15, [rp]\n\tmov \tr14, [rp+8]\n\tmulx \tw3, rax, r9\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmulx \tw0, rax, r8\n\tadd \tr14, rax\n\tadc \tw0, 0\n\tmov \trdx, [up+8]\n\tmov \t[rp], r15\n\tmulx \tw1, rax, r9\n\ttest \tun8, 2\n\tjz \t.Lb11\n\n.Lb01:\n\tmov \tr15, [rp+16]\n\tlea \trp, [rp+24]\n\tlea \tup, [up+24]\n\tjmp \t.Llo1\n\n.Lb11:\n\tlea \trp, [rp+8]\n\tlea \tup, [up+8]\n\tjmp \t.Llo3\n\n\n\talign 16\n.Ltop:\n\tmulx \tw3, rax, r9\n\tadd \tr15, w0\n\tadc \tw2, 0\n.Llo2:\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmulx \tw0, rax, r8\n\tadd \tr14, rax\n\tadc \tw0, 0\n\tlea \trp, [rp+32]\n\tadd \tr15, w1\n\tmov \trdx, [up-16]\n\tmov \t[rp-24], r15\n\tadc \tw3, 0\n\tadd \tr14, w2\n\tmov \tr15, [rp-8]\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n.Llo1:\n\tadd \tr14, rax\n\tmulx \tw2, rax, r8\n\tadc \tw1, 0\n\tadd \tr14, w3\n\tmov \t[rp-16], r14\n\tadc \tw1, 0\n\tadd \tr15, rax\n\tadc \tw2, 0\n\tadd \tr15, w0\n\tmov \trdx, [up-8]\n\tadc \tw2, 0\n.Llo0:\n\tmulx \tw3, rax, r9\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmov \tr14, [rp]\n\tmulx \tw0, rax, r8\n\tadd \tr14, rax\n\tadc \tw0, 0\n\tadd \tr15, w1\n\tmov \t[rp-8], r15\n\tadc \tw3, 0\n\tmov \trdx, [up]\n\tadd \tr14, w2\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n.Llo3:\n\tadd \tr14, rax\n\tadc \tw1, 0\n\tmulx \tw2, rax, r8\n\tadd \tr14, w3\n\tmov \tr15, [rp+8]\n\tmov \t[rp], r14\n\tmov \tr14, [rp+16]\n\tadc \tw1, 0\n\tadd \tr15, rax\n\tadc \tw2, 0\n\tmov \trdx, [up+8]\n\tlea \tup, [up+32]\n\tinc \tn\n\tjnz \t.Ltop\n\n.Lend:\n\tmulx \tw3, rax, r9\n\tadd \tr15, w0\n\tadc \tw2, 0\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmulx \trax, rdx, r8\n\tadd \tr15, w1\n\tmov \t[rp+8], r15\n\tadc \tw3, 0\n\tadd \trdx, w2\n\tadc \trax, 0\n\tadd \trdx, w3\n\tmov \t[rp+16], rdx\n\tadc \trax, 0\n\tmov \t[rp+24], rax\n\n\tadd \tDWORD [rsp], -2\n\tlea \tvp, [vp+16]\n\tlea \tup, [up+un*8-16]\n\tlea \trp, [rp+un*8+32]\n\tjnz \t.Louter\n\n\tpop \trax\n\tpop \tr15\n.Lret5:\n\tpop \tr14\n.Lret4:\n\tpop \tr13\n.Lret3:\n\tpop \tr12\n.Lret2:\n\tpop \trbp\n\tpop \trbx\n\n\tret\n"
  },
  {
    "path": "mpn/x86_64/haswell/avx/rshift.as",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_rshift(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1, unsigned int Shift)\n; Linux     RAX        RDI         RSI            RDX              RCX\n; Win7      RAX        RCX         RDX            R8               R9\n;\n; Description:\n; The function shifts Op1 right by Shift bits, stores the result in Op2 (non-\n; destructive shr) and hands back the shifted-out least significant bits of\n; Op1. The function operates increasing in memory supporting in place shifts.\n;\n; Result:\n; - Op2[ Size1-1..0 ] := ( ShrIn:Op1[ Size1-1..0 ] ) >> Shift\n; - Op1[ 0 ] << ( 64-Shift )\n;\n; Caveats:\n; - caller must ensure that Shift is in [ 1..63 ]!\n; - currently Linux64 support only!\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - implemented, tested and benchmarked on 30.03.2016 by jn\n; - includes prefetching\n; ============================================================================\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2         RCX\n    %define Op1         RDX\n    %define Size1       R8\n    %define Shift       R9\n    %define Limb1       R10\n    %define Limb2       R11\n  %ifdef USE_PREFETCH\n    %define Offs        -512    ; No caller-saves regs left, use immediate\n  %endif\n%else\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Shift       RCX\n    %define Limb1       R8\n    %define Limb2       R9\n  %ifdef USE_PREFETCH\n    %define OFFS_REG 1\n    %define Offs        R10\n  %endif\n%endif\n\n%define ShrDL0      XMM2    ; Attn: this must match ShrQL0 definition\n%define ShlDL0      XMM3    ; Attn: this must match ShlQL0 definition\n%define ShrDLCnt    XMM6    ; Attn: this must match ShrQlCnt definition\n%define ShlDLCnt    XMM7    ; Attn: this must match ShlQlCnt definition\n\n%define QLimb0      YMM0\n%define QLimb1      YMM1\n%define ShrQL0      YMM2\n%define ShlQL0      YMM3\n%define ShrQL1      YMM4\n%define ShlQL1      YMM5\n%define ShrQLCnt    YMM6\n%define ShlQLCnt    YMM7\n\n    align   32\nGLOBAL_FUNC mpn_rshift\n\n    xor     EAX, EAX\n    or      Size1, Size1\n    je      .Exit\n\n    mov     Limb1, [Op1]\n    shrd    RAX, Limb1, CL\n\n    sub     Size1, 1\n    je      .lShrEquPost        ; Size1=1 =>\n\n  %ifdef USE_PREFETCH\n    mov     Offs, 512\n  %endif\n\n    cmp     Size1, 8\n    jc      .lShrEquFour        ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    je      .lShrEquAlign16\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op1, 8\n    add     Op2, 8\n    sub     Size1, 1\n\n  .lShrEquAlign16:\n\n    test    Op2, 16\n    je      .lShrEquAVX\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+16]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n    sub     Size1, 2\n\n  .lShrEquAVX:\n\n    ; initialize AVX shift counter\n    vmovq   ShrDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vmovq   ShlDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vpbroadcastq ShrQLCnt, ShrDLCnt\n    vpbroadcastq ShlQLCnt, ShlDLCnt\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1]\n    vpsllvq ShlQL0, QLimb0, ShlQLCnt\n\n    add     Op1, 32\n    sub     Size1, 4\n    jmp     .lShrEquAVXCheck\n\n    ; main loop (prefetching enabled, unloaded data cache)\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.70 cycles per limb in LD2$\n    ; - 0.70-0.90 cycles per limb in LD3$\n    align   16\n  .lShrEquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1]\n    vpsrlvq   ShrQL0, QLimb0, ShrQLCnt\n    vmovdqu   QLimb0, [Op1+32]\n    vpsllvq   ShlQL1, QLimb1, ShlQLCnt\n    vpblendd  ShlQL0, ShlQL0, ShlQL1, 00000011b\n    vpermq    ShlQL0, ShlQL0, 00111001b\n    vpor      ShrQL0, ShrQL0, ShlQL0\n    vpsrlvq   ShrQL1, QLimb1, ShrQLCnt\n    vpsllvq   ShlQL0, QLimb0, ShlQLCnt\n    vpblendd  ShlQL1, ShlQL1, ShlQL0, 00000011b\n    vpermq    ShlQL1, ShlQL1, 00111001b\n    vmovdqa   [Op2], ShrQL0\n    vpor      ShrQL1, ShrQL1, ShlQL1\n    vmovdqa   [Op2+32], ShrQL1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .lShrEquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShrEquAVXLoop\n\n    mov     Limb1, [Op1]\n    xor     Limb2, Limb2\n    shrd    Limb2, Limb1, CL\n%if 1\n    vmovq   ShrDL0, Limb2\n    vpblendd ShlQL0, ShlQL0, ShrQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n    pinsrq  ShlDL0, Limb2, 0            ; SSE4.1\n%endif\n    vpsrlvq ShrQL0, QLimb0, ShrQLCnt\n    vpermq  ShlQL0, ShlQL0, 00111001b\n    vpor    ShrQL0, ShrQL0, ShlQL0\n    vmovdqa [Op2], ShrQL0\n\n    add     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHRD mnemonic\n  .lShrEquFour:\n\n    add     Op1, 8\n    test    Size1, 4\n    je      .lShrEquTwo\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+8], Limb2\n    mov     Limb2, [Op1+16]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2+16], Limb1\n    mov     Limb1, [Op1+24]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+24], Limb2\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .lShrEquTwo:\n\n    test    Size1, 2\n    je      .lShrEquOne\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lShrEquOne:\n\n    test    Size1, 1\n    je      .lShrEquPost\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op2, 8\n\n    ; store most significant limb considering shift-in part\n  .lShrEquPost:\n\n    shr     Limb1, CL\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64/haswell/avx/rshift1.as",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_rshift1(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1 )\n; Linux     RAX         RDI         RSI            RDX\n; Win7      RAX         RCX         RDX            R8\n;\n; Description:\n; The function shifts Op1 right by one bit, stores the result in Op2 (non-\n; destructive shr) and hands back the shifted-out least significant bit of Op1.\n; The function operates increasing in memory supporting in place shifts.\n;\n; Caveats:\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - AVX based version implemented, tested & benched on 21.02.2016 by jn\n; - includes cache prefetching\n\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n\n    %define Op2         RCX\n    %define Op1         RDX\n    %define Size1       R8\n    %define Limb1       R9\n    %define Limb2       R10\n    %define Offs        512     ; used direct def. to stay in Win scratch regs\n\n    %define ShrDL0      XMM2    ; Attn: this must match ShrQL0 definition\n    %define ShlDL0      XMM3    ; Attn: this must match ShlQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShrQL0      YMM2\n    %define ShlQL0      YMM3\n    %define ShrQL1      YMM4\n    %define ShlQL1      YMM5\n\n%else\n\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Limb1       R8\n    %define Limb2       R9\n    %define Offs        512     ; used direct def. to stay in Win scratch regs\n\n    %define ShrDL0      XMM2    ; Attn: this must match ShrQL0 definition\n    %define ShlDL0      XMM3    ; Attn: this must match ShlQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShrQL0      YMM2\n    %define ShlQL0      YMM3\n    %define ShrQL1      YMM4\n    %define ShlQL1      YMM5\n\n%endif\n\n    align   32\n\nGLOBAL_FUNC mpn_rshift1\n\n    xor     EAX, EAX\n    or      Size1, Size1\n    je      .Exit\n\n    mov     RAX, [Op1]\n    mov     Limb1, RAX\n    shl     RAX, 63\n\n    sub     Size1, 1\n    je      .lShr1EquPost       ; Size1=1 =>\n\n    cmp     Size1, 8\n    jc      .lShr1EquFour       ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    je      .lShr1EquAlign16\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op1, 8\n    add     Op2, 8\n    sub     Size1, 1\n\n  .lShr1EquAlign16:\n\n    test    Op2, 16\n    je      .lShr1EquAVX\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+16]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n    sub     Size1, 2\n\n  .lShr1EquAVX:\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1]\n    vpsllq  ShlQL0, QLimb0, 63\n\n    add     Op1, 32\n    sub     Size1, 4\n    jmp     .lShr1EquAVXCheck\n\n    ; main loop (prefetching enabled, unloaded data cache)\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.75 cycles per limb in LD2$\n    ; - 0.75-1.00 cycles per limb in LD3$\n    align   16\n  .lShr1EquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1]\n    vpsrlq    ShrQL0, QLimb0, 1\n    vmovdqu   QLimb0, [Op1+32]\n    vpsllq    ShlQL1, QLimb1, 63\n    vpblendd  ShlQL0, ShlQL0, ShlQL1, 00000011b\n    vpermq    ShlQL0, ShlQL0, 00111001b\n    vpor      ShrQL0, ShrQL0, ShlQL0\n    vpsrlq    ShrQL1, QLimb1, 1\n    vpsllq    ShlQL0, QLimb0, 63\n    vpblendd  ShlQL1, ShlQL1, ShlQL0, 00000011b\n    vpermq    ShlQL1, ShlQL1, 00111001b\n    vmovdqa   [Op2], ShrQL0\n    vpor      ShrQL1, ShrQL1, ShlQL1\n    vmovdqa   [Op2+32], ShrQL1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .lShr1EquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShr1EquAVXLoop\n\n    mov     Limb2, [Op1]\n    mov     Limb1, Limb2\n    shl     Limb2, 63\n%if 1\n    vmovq ShrDL0, Limb2\n    vpblendd ShlQL0, ShlQL0, ShrQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n    pinsrq  ShlDL0, Limb2, 0            ; SSE4.1\n%endif\n    vpsrlq  ShrQL0, QLimb0, 1\n    vpermq  ShlQL0, ShlQL0, 00111001b\n    vpor    ShrQL0, ShrQL0, ShlQL0\n    vmovdqa [Op2], ShrQL0\n\n    add     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHRD mnemonic\n  .lShr1EquFour:\n\n    add     Op1, 8\n    test    Size1, 4\n    je      .lShr1EquTwo\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+8], Limb2\n    mov     Limb2, [Op1+16]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2+16], Limb1\n    mov     Limb1, [Op1+24]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+24], Limb2\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .lShr1EquTwo:\n\n    test    Size1, 2\n    je      .lShr1EquOne\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lShr1EquOne:\n\n    test    Size1, 1\n    je      .lShr1EquPost\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op2, 8\n\n  .lShr1EquPost:\n\n    shr     Limb1, 1\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64/haswell/avx/sqr_basecase.asm",
    "content": "dnl  AMD64 mpn_sqr_basecase optimised for Intel Haswell.\n\ndnl  Contributed to the GNU project by Torbjörn Granlund.\n\ndnl  Copyright 2008, 2009, 2011-2013 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of either:\ndnl\ndnl    * the GNU Lesser General Public License as published by the Free\ndnl      Software Foundation; either version 3 of the License, or (at your\ndnl      option) any later version.\ndnl\ndnl  or\ndnl\ndnl    * the GNU General Public License as published by the Free Software\ndnl      Foundation; either version 2 of the License, or (at your option) any\ndnl      later version.\ndnl\ndnl  or both in parallel, as here.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\ndnl  for more details.\ndnl\ndnl  You should have received copies of the GNU General Public License and the\ndnl  GNU Lesser General Public License along with the GNU MP Library.  If not,\ndnl  see https://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC cycles/limb\tmul_2\t\taddmul_2\tsqr_diag_addlsh1\nC AMD K8,K9\tn/a\t\tn/a\t\t\tn/a\nC AMD K10\tn/a\t\tn/a\t\t\tn/a\nC AMD bull\tn/a\t\tn/a\t\t\tn/a\nC AMD pile\tn/a\t\tn/a\t\t\tn/a\nC AMD steam\t ?\t\t ?\t\t\t ?\nC AMD bobcat\tn/a\t\tn/a\t\t\tn/a\nC AMD jaguar\t ?\t\t ?\t\t\t ?\nC Intel P4\tn/a\t\tn/a\t\t\tn/a\nC Intel core\tn/a\t\tn/a\t\t\tn/a\nC Intel NHM\tn/a\t\tn/a\t\t\tn/a\nC Intel SBR\tn/a\t\tn/a\t\t\tn/a\nC Intel IBR\tn/a\t\tn/a\t\t\tn/a\nC Intel HWL\t 1.86\t\t 2.15\t\t\t~2.5\nC Intel BWL\t ?\t\t ?\t\t\t ?\nC Intel atom\tn/a\t\tn/a\t\t\tn/a\nC VIA nano\tn/a\t\tn/a\t\t\tn/a\n\nC The inner loops of this code are the result of running a code generation and\nC optimisation tool suite written by David Harvey and Torbjörn Granlund, except\nC that the sqr_diag_addlsh1 loop was manually written.\n\nC TODO\nC  * Replace current unoptimised sqr_diag_addlsh1 loop; 1.75 c/l might be\nC    possible.\nC  * Consider splitting outer loop into 2, one for n = 1 (mod 2) and one for\nC    n = 0 (mod 2).  These loops could fall into specific \"corner\" code.\nC  * Consider splitting outer loop into 4.\nC  * Streamline pointer updates.\nC  * Perhaps suppress a few more xor insns in feed-in code.\nC  * Make sure we write no dead registers in feed-in code.\nC  * We might use 32-bit size ops, since n >= 2^32 is non-terminating.  Watch\nC    out for negative sizes being zero-extended, though.\nC  * Provide straight-line code for n = 4; then look for simplifications in\nC    main code.\n\ndefine(`rp',\t  `%rdi')\ndefine(`up',\t  `%rsi')\ndefine(`un_param',`%rdx')\n\n\nASM_START()\n\tTEXT\n\tALIGN(32)\nPROLOGUE(mpn_sqr_basecase)\n\n\n\tcmp\t$2, un_param\n\tjae\tL(gt1)\n\n\tmov\t(up), %rdx\n\tmulx\t%rdx, %rax, %rdx\n\tmov\t%rax, (rp)\n\tmov\t%rdx, 8(rp)\n\t\n\tret\n\nL(gt1):\tjne\tL(gt2)\n\n\tmov\t(up), %rdx\n\tmov\t8(up), %rcx\n\tmulx\t%rcx, %r9, %r10\t\tC v0 * v1\tW 1 2\n\tmulx\t%rdx, %rax, %r8\t\tC v0 * v0\tW 0 1\n\tmov\t%rcx, %rdx\n\tmulx\t%rdx, %r11, %rdx\tC v1 * v1\tW 2 3\n\tadd\t%r9, %r9\t\tC\t\tW 1\n\tadc\t%r10, %r10\t\tC\t\tW 2\n\tadc\t$0, %rdx\t\tC\t\tW 3\n\tadd\t%r9, %r8\t\tC W 1\n\tadc\t%r11, %r10\t\tC W 2\n\tadc\t$0, %rdx\t\tC W 3\n\tmov\t%rax, (rp)\n\tmov\t%r8, 8(rp)\n\tmov\t%r10, 16(rp)\n\tmov\t%rdx, 24(rp)\n\t\n\tret\n\nL(gt2):\tcmp\t$4, un_param\n\tjae\tL(gt3)\ndefine(`v0', `%r8')\ndefine(`v1', `%r9')\ndefine(`w0', `%r10')\ndefine(`w2', `%r11')\n\n\tmov\t(up), v0\n\tmov\t8(up), %rdx\n\tmov\t%rdx, v1\n\tmulx\tv0, w2, %rax\n\tmov\t16(up), %rdx\n\tmulx\tv0, w0, %rcx\n\tmov\tw2, %r8\n\tadd\t%rax, w0\n\tadc\t$0, %rcx\n\tmulx\tv1, %rdx, %rax\n\tadd\t%rcx, %rdx\n\tmov\t%rdx, 24(rp)\n\tadc\t$0, %rax\n\tmov\t%rax, 32(rp)\n\txor\tR32(%rcx), R32(%rcx)\n\tmov\t(up), %rdx\n\tmulx\t%rdx, %rax, w2\n\tmov\t%rax, (rp)\n\tadd\t%r8, %r8\n\tadc\tw0, w0\n\tsetc\tR8(%rcx)\n\tmov\t8(up), %rdx\n\tmulx\t%rdx, %rax, %rdx\n\tadd\tw2, %r8\n\tadc\t%rax, w0\n\tmov\t%r8, 8(rp)\n\tmov\tw0, 16(rp)\n\tmov\t24(rp), %r8\n\tmov\t32(rp), w0\n\tlea\t(%rdx,%rcx), w2\n\tadc\t%r8, %r8\n\tadc\tw0, w0\n\tsetc\tR8(%rcx)\n\tmov\t16(up), %rdx\n\tmulx\t%rdx, %rax, %rdx\n\tadd\tw2, %r8\n\tadc\t%rax, w0\n\tmov\t%r8, 24(rp)\n\tmov\tw0, 32(rp)\n\tadc\t%rcx, %rdx\n\tmov\t%rdx, 40(rp)\n\t\n\tret\n\nL(gt3):\n\ndefine(`v0', `%r8')\ndefine(`v1', `%r9')\ndefine(`w0', `%r10')\ndefine(`w1', `%r11')\ndefine(`w2', `%rbx')\ndefine(`w3', `%rbp')\ndefine(`un', `%r12')\ndefine(`n',  `%rcx')\n\ndefine(`X0', `%r13')\ndefine(`X1', `%r14')\n\nL(do_mul_2):\n\tpush\t%rbx\n\tpush\t%rbp\n\tpush\t%r12\n\tpush\t%r13\n\tpush\t%r14\n\tmov\t$0, R32(un)\n\tsub\tun_param, un\t\tC free up rdx\n\tpush\tun\n\tmov\t(up), v0\n\tmov\t8(up), %rdx\n\tlea\t2(un), n\n\tsar\t$2, n\t\t\tC FIXME: suppress, change loop?\n\tinc\tun\t\t\tC decrement |un|\n\tmov\t%rdx, v1\n\n\ttest\t$1, R8(un)\n\tjnz\tL(mx1)\n\nL(mx0):\tmulx\tv0, w2, w1\n\tmov\t16(up), %rdx\n\tmov\tw2, 8(rp)\n\txor\tw2, w2\n\tmulx\tv0, w0, w3\n\ttest\t$2, R8(un)\n\tjz\tL(m00)\n\nL(m10):\tlea\t-8(rp), rp\n\tlea\t-8(up), up\n\tjmp\tL(mlo2)\n\nL(m00):\tlea\t8(up), up\n\tlea\t8(rp), rp\n\tjmp\tL(mlo0)\n\nL(mx1):\tmulx\tv0, w0, w3\n\tmov\t16(up), %rdx\n\tmov\tw0, 8(rp)\n\txor\tw0, w0\n\tmulx\tv0, w2, w1\n\ttest\t$2, R8(un)\n\tjz\tL(mlo3)\n\nL(m01):\tlea\t16(rp), rp\n\tlea\t16(up), up\n\tjmp\tL(mlo1)\n\n\tALIGN(32)\nL(mtop):\tmulx\tv1, %rax, w0\n\tadd\t%rax, w2\t\tC 0\n\tmov\t(up), %rdx\n\tmulx\tv0, %rax, w1\n\tadc\t$0, w0\t\t\tC 1\n\tadd\t%rax, w2\t\tC 0\nL(mlo1):\tadc\t$0, w1\t\t\tC 1\n\tadd\tw3, w2\t\t\tC 0\n\tmov\tw2, (rp)\t\tC 0\n\tadc\t$0, w1\t\t\tC 1\n\tmulx\tv1, %rax, w2\n\tadd\t%rax, w0\t\tC 1\n\tmov\t8(up), %rdx\n\tadc\t$0, w2\t\t\tC 2\n\tmulx\tv0, %rax, w3\n\tadd\t%rax, w0\t\tC 1\n\tadc\t$0, w3\t\t\tC 2\nL(mlo0):\tadd\tw1, w0\t\t\tC 1\n\tmov\tw0, 8(rp)\t\tC 1\n\tadc\t$0, w3\t\t\tC 2\n\tmulx\tv1, %rax, w0\n\tadd\t%rax, w2\t\tC 2\n\tmov\t16(up), %rdx\n\tmulx\tv0, %rax, w1\n\tadc\t$0, w0\t\t\tC 3\n\tadd\t%rax, w2\t\tC 2\n\tadc\t$0, w1\t\t\tC 3\nL(mlo3):\tadd\tw3, w2\t\t\tC 2\n\tmov\tw2, 16(rp)\t\tC 2\n\tadc\t$0, w1\t\t\tC 3\n\tmulx\tv1, %rax, w2\n\tadd\t%rax, w0\t\tC 3\n\tmov\t24(up), %rdx\n\tadc\t$0, w2\t\t\tC 4\n\tmulx\tv0, %rax, w3\n\tadd\t%rax, w0\t\tC 3\n\tadc\t$0, w3\t\t\tC 4\nL(mlo2):\tadd\tw1, w0\t\t\tC 3\n\tlea\t32(up), up\n\tmov\tw0, 24(rp)\t\tC 3\n\tadc\t$0, w3\t\t\tC 4\n\tinc\tn\n\tlea\t32(rp), rp\n\tjnz\tL(mtop)\n\nL(mend):\tmulx\tv1, %rdx, %rax\n\tadd\t%rdx, w2\n\tadc\t$0, %rax\n\tadd\tw3, w2\n\tmov\tw2, (rp)\n\tadc\t$0, %rax\n\tmov\t%rax, 8(rp)\n\n\tlea\t16(up), up\n\tlea\t-16(rp), rp\n\nL(do_addmul_2):\nL(outer):\n\tlea\t(up,un,8), up\t\tC put back up to 2 positions above last time\n\tlea\t48(rp,un,8), rp\t\tC put back rp to 4 positions above last time\n\n\tmov\t-8(up), v0\t\tC shared between addmul_2 and corner\n\n\tadd\t$2, un\t\t\tC decrease |un|\n\tcmp\t$-2, un\n\tjge\tL(corner)\n\n\tmov\t(up), v1\n\n\tlea\t1(un), n\n\tsar\t$2, n\t\t\tC FIXME: suppress, change loop?\n\n\tmov\tv1, %rdx\n\ttest\t$1, R8(un)\n\tjnz\tL(bx1)\n\nL(bx0):\tmov\t(rp), X0\n\tmov\t8(rp), X1\n\tmulx\tv0, %rax, w1\n\tadd\t%rax, X0\n\tadc\t$0, w1\n\tmov\tX0, (rp)\n\txor\tw2, w2\n\ttest\t$2, R8(un)\n\tjnz\tL(b10)\n\nL(b00):\tmov\t8(up), %rdx\n\tlea\t16(rp), rp\n\tlea\t16(up), up\n\tjmp\tL(lo0)\n\nL(b10):\tmov\t8(up), %rdx\n\tmov\t16(rp), X0\n\tlea\t32(up), up\n\tinc\tn\n\tmulx\tv0, %rax, w3\n\tjz\tL(ex)\n\tjmp\tL(lo2)\n\nL(bx1):\tmov\t(rp), X1\n\tmov\t8(rp), X0\n\tmulx\tv0, %rax, w3\n\tmov\t8(up), %rdx\n\tadd\t%rax, X1\n\tadc\t$0, w3\n\txor\tw0, w0\n\tmov\tX1, (rp)\n\tmulx\tv0, %rax, w1\n\ttest\t$2, R8(un)\n\tjz\tL(b11)\n\nL(b01):\tmov\t16(rp), X1\n\tlea\t24(rp), rp\n\tlea\t24(up), up\n\tjmp\tL(lo1)\n\nL(b11):\tlea\t8(rp), rp\n\tlea\t8(up), up\n\tjmp\tL(lo3)\n\n\tALIGN(32)\nL(top):\tmulx\tv0, %rax, w3\n\tadd\tw0, X1\n\tadc\t$0, w2\nL(lo2):\tadd\t%rax, X1\n\tadc\t$0, w3\n\tmulx\tv1, %rax, w0\n\tadd\t%rax, X0\n\tadc\t$0, w0\n\tlea\t32(rp), rp\n\tadd\tw1, X1\n\tmov\t-16(up), %rdx\n\tmov\tX1, -24(rp)\n\tadc\t$0, w3\n\tadd\tw2, X0\n\tmov\t-8(rp), X1\n\tmulx\tv0, %rax, w1\n\tadc\t$0, w0\nL(lo1):\tadd\t%rax, X0\n\tmulx\tv1, %rax, w2\n\tadc\t$0, w1\n\tadd\tw3, X0\n\tmov\tX0, -16(rp)\n\tadc\t$0, w1\n\tadd\t%rax, X1\n\tadc\t$0, w2\n\tadd\tw0, X1\n\tmov\t-8(up), %rdx\n\tadc\t$0, w2\nL(lo0):\tmulx\tv0, %rax, w3\n\tadd\t%rax, X1\n\tadc\t$0, w3\n\tmov\t(rp), X0\n\tmulx\tv1, %rax, w0\n\tadd\t%rax, X0\n\tadc\t$0, w0\n\tadd\tw1, X1\n\tmov\tX1, -8(rp)\n\tadc\t$0, w3\n\tmov\t(up), %rdx\n\tadd\tw2, X0\n\tmulx\tv0, %rax, w1\n\tadc\t$0, w0\nL(lo3):\tadd\t%rax, X0\n\tadc\t$0, w1\n\tmulx\tv1, %rax, w2\n\tadd\tw3, X0\n\tmov\t8(rp), X1\n\tmov\tX0, (rp)\n\tmov\t16(rp), X0\n\tadc\t$0, w1\n\tadd\t%rax, X1\n\tadc\t$0, w2\n\tmov\t8(up), %rdx\n\tlea\t32(up), up\n\tinc\tn\n\tjnz\tL(top)\n\nL(end):\tmulx\tv0, %rax, w3\n\tadd\tw0, X1\n\tadc\t$0, w2\nL(ex):\tadd\t%rax, X1\n\tadc\t$0, w3\n\tmulx\tv1, %rdx, %rax\n\tadd\tw1, X1\n\tmov\tX1, 8(rp)\n\tadc\t$0, w3\n\tadd\tw2, %rdx\n\tadc\t$0, %rax\n\tadd\t%rdx, w3\n\tmov\tw3, 16(rp)\n\tadc\t$0, %rax\n\tmov\t%rax, 24(rp)\n\n\tjmp\tL(outer)\t\tC loop until a small corner remains\n\nL(corner):\n\tpop\tun\n\tmov\t(up), %rdx\n\tjg\tL(small_corner)\n\n\tmov\t%rdx, v1\n\tmov\t(rp), X0\n\tmov\t%rax, X1\t\tC Tricky rax reuse of last iteration\n\tmulx\tv0, %rax, w1\n\tadd\t%rax, X0\n\tadc\t$0, w1\n\tmov\tX0, (rp)\n\tmov\t8(up), %rdx\n\tmulx\tv0, %rax, w3\n\tadd\t%rax, X1\n\tadc\t$0, w3\n\tmulx\tv1, %rdx, %rax\n\tadd\tw1, X1\n\tmov\tX1, 8(rp)\n\tadc\t$0, w3\n\tadd\tw3, %rdx\n\tmov\t%rdx, 16(rp)\n\tadc\t$0, %rax\n\tmov\t%rax, 24(rp)\n\tlea\t32(rp), rp\n\tlea\t16(up), up\n\tjmp\tL(com)\n\nL(small_corner):\n\tmulx\tv0, X1, w3\n\tadd\t%rax, X1\t\tC Tricky rax reuse of last iteration\n\tadc\t$0, w3\n\tmov\tX1, (rp)\n\tmov\tw3, 8(rp)\n\tlea\t16(rp), rp\n\tlea\t8(up), up\n\nL(com):\n\nL(sqr_diag_addlsh1):\n\tlea\t8(up,un,8), up\t\tC put back up at its very beginning\n\tlea\t(rp,un,8), rp\n\tlea\t(rp,un,8), rp\t\tC put back rp at its very beginning\n\tinc\tun\n\n\tmov\t-8(up), %rdx\n\txor\tR32(%rbx), R32(%rbx)\tC clear CF as side effect\n\tmulx\t%rdx, %rax, %r10\n\tmov\t%rax, 8(rp)\n\tmov\t16(rp), %r8\n\tmov\t24(rp), %r9\n\tjmp\tL(dm)\n\n\tALIGN(16)\nL(dtop):\tmov\t32(rp), %r8\n\tmov\t40(rp), %r9\n\tlea\t16(rp), rp\n\tlea\t(%rdx,%rbx), %r10\nL(dm):\tadc\t%r8, %r8\n\tadc\t%r9, %r9\n\tsetc\tR8(%rbx)\n\tmov\t(up), %rdx\n\tlea\t8(up), up\n\tmulx\t%rdx, %rax, %rdx\n\tadd\t%r10, %r8\n\tadc\t%rax, %r9\n\tmov\t%r8, 16(rp)\n\tmov\t%r9, 24(rp)\n\tinc\tun\n\tjnz\tL(dtop)\n\nL(dend):\tadc\t%rbx, %rdx\n\tmov\t%rdx, 32(rp)\n\n\tpop\t%r14\n\tpop\t%r13\n\tpop\t%r12\n\tpop\t%rbp\n\tpop\t%rbx\n\t\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/haswell/avx/submul_1.as",
    "content": ";  AMD64 mpn_submul_1 optimised for Intel Haswell.\n\n;  Contributed to the GNU project by Torbjörn Granlund.\n;  Converted to MPIR by Alexander Kruppa.\n\n;  Copyright 2013 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define RP      rcx\n    %define S1P     rbp\n    %define Size    r8\n    %define Sizeb   r8b\n    %define Limb    r9\n\n    %define Tmp0    r12\n    %define Tmp1    r13\n    %define Tmp2    rax\n    %define Tmp3    rbx\n    %define Tmp4    rsi\n    %define Tmp5    rdi\n    %define Tmp6    r10\n    %define Tmp7    r11\n    %define Tmp8    r9\n%else\n    %define RP      rdi\n    %define S1P     rsi\n    %define Size    rbp\n    %define Sizeb   bpl\n    %define Limb    rcx\n\n    %define Tmp0    r12\n    %define Tmp1    r13\n    %define Tmp2    rax\n    %define Tmp3    rbx\n    %define Tmp4    r8\n    %define Tmp5    r9\n    %define Tmp6    r10\n    %define Tmp7    r11\n    %define Tmp8    rcx\n%endif\n\n%define ADDSUB sub\n%define ADCSBB sbb\n\nalign 16\n\nGLOBAL_FUNC mpn_submul_1\n\n\tpush \trbx\n\tpush \trbp\n\tpush \tr12\n\tpush \tr13\n\n\tmov \trbp, rdx ; mulx requires one input in rdx\n\tmov \trdx, Limb\n\n\ttest \tSizeb, 1\n\tjnz \t.Lbx1\n\n.Lbx0:  shr \tSize, 2\n\tjc \t.Lb10 ;ajs:notshortform\n\n.Lb00:\tmulx \tTmp0, Tmp1, [S1P]\n\tmulx \tTmp2, Tmp3, [S1P+8]\n\tadd \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp0, [RP]\n\tmov \tTmp8, [RP+8]\n\tmulx \tTmp4, Tmp5, [S1P+16]\n\tlea \tRP, [RP-16]\n\tlea \tS1P, [S1P+16]\n\tADDSUB \tTmp0, Tmp1\n\tjmp \t.Llo0 ;ajs:notshortform\n\n.Lbx1:\tshr \tSize, 2\n\tjc \t.Lb11\n\n.Lb01:\tmulx \tTmp6, Tmp7, [S1P]\n\tjnz \t.Lgt1\n.Ln1:\tADDSUB \t[RP], Tmp7\n\tmov \teax, 0\n\tadc \tTmp2, Tmp6\n\tjmp \t.Lret ;ajs:notshortform\n\n.Lgt1:\tmulx \tTmp0, Tmp1, [S1P+8]\n\tmulx \tTmp2, Tmp3, [S1P+16]\n\tlea \tS1P, [S1P+24]\n\tadd \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP]\n\tmov \tTmp0, [RP+8]\n\tmov \tTmp8, [RP+16]\n\tlea \tRP, [RP-8]\n\tADDSUB \tTmp6, Tmp7\n\tjmp \t.Llo1\n\n.Lb11:\tmulx \tTmp2, Tmp3, [S1P]\n\tmov \tTmp8, [RP]\n\tmulx \tTmp4, Tmp5, [S1P+8]\n\tlea \tS1P, [S1P+8]\n\tlea \tRP, [RP-24]\n\tinc \tSize\t\n\tADDSUB \tTmp8, Tmp3\n\tjmp \t.Llo3\n\n.Lb10:\tmulx \tTmp4, Tmp5, [S1P]\n\tmulx \tTmp6, Tmp7, [S1P+8]\n\tlea \tRP, [RP-32]\n\tmov \teax, 0\n\tclc\n\tjz \t.Lend ;ajs:notshortform\t\n\n\talign 16\n.Ltop:\tadc \tTmp5, Tmp2\n\tlea \tRP, [RP+32]\n\tadc \tTmp7, Tmp4\n\tmulx \tTmp0, Tmp1, [S1P+16]\n\tmov \tTmp4, [RP]\n\tmulx \tTmp2, Tmp3, [S1P+24]\n\tlea \tS1P, [S1P+32]\n\tadc \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+8]\n\tmov \tTmp0, [RP+16]\n\tADDSUB \tTmp4, Tmp5\n\tmov \tTmp8, [RP+24]\n\tmov \t[RP], Tmp4\n\tADCSBB \tTmp6, Tmp7\n.Llo1:\tmulx \tTmp4, Tmp5, [S1P]\n\tmov \t[RP+8], Tmp6\n\tADCSBB \tTmp0, Tmp1\n.Llo0:\tmov \t[RP+16], Tmp0\n\tADCSBB \tTmp8, Tmp3\n.Llo3:\tmulx \tTmp6, Tmp7, [S1P+8]\n\tmov \t[RP+24], Tmp8\n\tdec \tSize\n\tjnz \t.Ltop\n\n.Lend:\tadc \tTmp5, Tmp2\n\tadc \tTmp7, Tmp4\n\tmov \tTmp4, [RP+32]\n\tmov \tTmp2, Tmp6\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+40]\n\tADDSUB \tTmp4, Tmp5\n\tmov \t[RP+32], Tmp4\n\tADCSBB \tTmp6, Tmp7\n\tmov \t[RP+40], Tmp6\n\tadc \tTmp2, 0\n\n.Lret:\tpop \tr13\n\tpop \tr12\n\tpop \trbp\n\tpop \trbx\n\n\tret\n"
  },
  {
    "path": "mpn/x86_64/haswell/hgcd2.c",
    "content": "/* hgcd2.c\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2012 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#if GMP_NAIL_BITS == 0\n\n/* Copied from the old mpn/generic/gcdext.c, and modified slightly to return\n   the remainder. */\n\nstatic inline unsigned long\nlzcnt(const unsigned long a)\n{\n  unsigned long t;\n  __asm__(\"lzcnt %1, %0\"\n          : \"=r\" (t)\n          : \"r\" (a)\n          : \"cc\");\n  return t;\n}\n\nstatic inline unsigned long\nshld(const unsigned long hi, const unsigned long lo, const unsigned char i)\n{\n  unsigned long r;\n  __asm__ (\n    \"shldq %3, %1, %0\\n\"\n    : \"=rm\" (r)\n    : \"r\" (lo), \"0\" (hi), \"cJ\" (i) /* i can be in %cl or a literal constant < 64 */\n    : \"cc\");\n return r;\n}\n\nstatic inline unsigned long\nshrd(const unsigned long hi, const unsigned long lo, const unsigned char i)\n{\n  unsigned long r;\n  __asm__ (\n    \"shrdq %3, %2, %0\\n\"\n    : \"=rm\" (r)\n    : \"0\" (lo), \"r\" (hi), \"cJ\" (i) /* i can be in %cl or a literal constant < 64 */\n    : \"cc\");\n return r;\n}\n\n/* if(s1>=s2){r=s1-s2;c++;}\n   r and s1 can be the same variable, but no other alias is allowed */\n#define TRY_SUB(r, s1, s2, c) \\\ndo { \\\n    mp_limb_t t = s1; \\\n    __asm__( \\\n    \"sub     %3, %1\\n\\t\" /* t -= s2; */ \\\n    \"cmovnc  %1, %0\\n\\t\" /* if (s1 >= s2) r = s1;  */ \\\n    \"sbb     $-1, %2\\n\\t\" /* if (t >= s) c++; */ \\\n    : \"+&r\" (r), \"+&r\" (t), \"+rm\" (c) \\\n    : \"rm\" (s2) \\\n    : \"cc\"); \\\n} while(0)\n\n#define TRY_SUB2(rhi, rlo, shi, slo, c) \\\ndo { \\\n    const mp_limb_t thi = rhi, tlo = rlo; \\\n    __asm__( \\\n    \"sub     %6, %1\\n\\t\" /* r -= s; */ \\\n    \"sbb     %5, %0\\n\\t\" \\\n    \"cmovc   %4, %1\\n\\t\" /* if (t < s) r = t;  */ \\\n    \"cmovc   %3, %0\\n\\t\" \\\n    \"sbb     $-1, %2\\n\\t\" /* if (t >= s) c++; */ \\\n    : \"+&r\" (rhi), \"+&r\" (rlo), \"+r\" (c) \\\n    : \"rm\" (thi), \"rm\" (tlo), \"rm\" (shi), \"rm\" (slo) \\\n    : \"cc\"); \\\n} while(0)\n\nstatic inline mp_limb_t\ncmp2(const mp_limb_t ahi, const mp_limb_t alo, const mp_limb_t bhi, const mp_limb_t blo)\n{\n  mp_limb_t r = ahi;\n  __asm__(\n  \"cmpq %3, %1\\n\\t\"\n  \"sbbq %2, %0\\n\\t\"\n  \"sbbq %0, %0\\n\\t\"\n  : \"+&r\" (r)\n  : \"r\" (alo), \"rm\" (bhi), \"rm\" (blo)\n  : \"cc\");\n  return r;\n}\n\n/* Single-limb division optimized for small quotients. */\nstatic inline mp_limb_t\ndiv1 (mp_ptr rp,\n      mp_limb_t n0,\n      mp_limb_t d0)\n{\n  mp_limb_t q = 0;\n  // const mp_limb_t correct_q = n0 / d0, correct_r = n0 % d0;\n\n  int cnt = lzcnt(d0) - lzcnt(n0);\n  d0 <<= cnt; /* (d0+1)/2 <= n0 <= 2*d0 - 1 */\n\n  q = 0;\n  TRY_SUB(n0, n0, d0, q);\n  while (cnt)\n    {\n      q <<= 1;\n      d0 >>= 1;\n      TRY_SUB(n0, n0, d0, q);\n      cnt--;\n    }\n\n  // ASSERT_ALWAYS(n0 == correct_r);\n  // ASSERT_ALWAYS(q != correct_q);\n  *rp = n0;\n  return q;\n}\n\n/* Two-limb division optimized for small quotients.  */\nstatic inline mp_limb_t\ndiv2 (mp_ptr rp,\n      mp_limb_t nh, mp_limb_t nl,\n      mp_limb_t dh, mp_limb_t dl)\n{\n  mp_limb_t q = 0;\n\n  int cnt = lzcnt(dh) - lzcnt(nh);\n  dh = shld(dh, dl, cnt);\n  dl <<= cnt;\n\n  TRY_SUB2(nh, nl, dh, dl, q);\n\n  while (cnt)\n    {\n      q <<= 1;\n      dl = shrd(dh, dl, 1);\n      dh = dh >> 1;\n      TRY_SUB2(nh, nl, dh, dl, q);\n      cnt--;\n    }\n\n  rp[0] = nl;\n  rp[1] = nh;\n\n  return q;\n}\n\n#else /* GMP_NAIL_BITS != 0 */\n/* Check all functions for nail support. */\n/* hgcd2 should be defined to take inputs including nail bits, and\n   produce a matrix with elements also including nail bits. This is\n   necessary, for the matrix elements to be useful with mpn_mul_1,\n   mpn_addmul_1 and friends. */\n#error Not implemented\n#endif /* GMP_NAIL_BITS != 0 */\n\n/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs\n   matrix M. Returns 1 if we make progress, i.e. can perform at least\n   one subtraction. Otherwise returns zero. */\n\n/* FIXME: Possible optimizations:\n\n   The div2 function starts with checking the most significant bit of\n   the numerator. We can maintained normalized operands here, call\n   hgcd with normalized operands only, which should make the code\n   simpler and possibly faster.\n\n   Experiment with table lookups on the most significant bits.\n\n   This function is also a candidate for assembler implementation.\n*/\nint\nmpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,\n\t   struct hgcd_matrix1 *M)\n{\n  mp_limb_t u00, u01, u10, u11;\n\n  if (ah < 2 || bh < 2)\n    return 0;\n\n  u00 = u11 = 1;\n  if (cmp2(ah, al, bh, bl) == 0)\n    {\n      sub_ddmmss (ah, al, ah, al, bh, bl);\n      if (ah < 2)\n\treturn 0;\n\n      u01 = 1;\n      u10 = 0;\n    }\n  else\n    {\n      sub_ddmmss (bh, bl, bh, bl, ah, al);\n      if (bh < 2)\n\treturn 0;\n\n      u10 = 1;\n      u01 = 0;\n    }\n\n  if (ah < bh)\n    goto subtract_a;\n\n  for (;;)\n    {\n      ASSERT (ah >= bh);\n      if (ah == bh)\n\tgoto done;\n\n      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))\n\t{\n\t  ah = shld(ah, al, GMP_LIMB_BITS / 2);\n\t  bh = shld(bh, bl, GMP_LIMB_BITS / 2);\n\n\t  break;\n\t}\n\n      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0\n\t 1), affecting the second column of M. */\n      {\n        mp_limb_t r[2];\n        mp_limb_t q = div2 (r, ah, al, bh, bl);\n        al = r[0]; ah = r[1];\n        q -= (ah < 2) ? 1 : 0;\n        u01 += q * u00;\n        u11 += q * u10;\n        if (ah < 2)\n          {\n            goto done;\n          }\n      }\n    subtract_a:\n      ASSERT (bh >= ah);\n      if (ah == bh)\n\tgoto done;\n\n      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))\n\t{\n\t  ah = shld(ah, al, GMP_LIMB_BITS / 2);\n\t  bh = shld(bh, bl, GMP_LIMB_BITS / 2);\n\n\t  goto subtract_a1;\n\t}\n\n      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q\n\t 1), affecting the first column of M. */\n      {\n        mp_limb_t r[2];\n        mp_limb_t q = div2 (r, bh, bl, ah, al);\n        bl = r[0]; bh = r[1];\n        q -= (bh < 2) ? 1 : 0;\n        u00 += q * u01;\n        u10 += q * u11;\n        if (bh < 2)\n          {\n            goto done;\n          }\n      }\n    }\n\n  /* NOTE: Since we discard the least significant half limb, we don't\n     get a truly maximal M (corresponding to |a - b| <\n     2^{GMP_LIMB_BITS +1}). */\n  /* Single precision loop */\n  for (;;)\n    {\n      ASSERT (ah >= bh);\n\n      {\n        mp_limb_t q = div1 (&ah, ah, bh);\n        if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))\n          {\n            q--;\n            u01 += q * u00;\n            u11 += q * u10;\n            break;\n          }\n        u01 += q * u00;\n        u11 += q * u10;\n      }\n    subtract_a1:\n      ASSERT (bh >= ah);\n\n      {\n        mp_limb_t q = div1 (&bh, bh, ah);\n        if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))\n          {\n            q--;\n            u00 += q * u01;\n            u10 += q * u11;\n            break;\n          }\n        u00 += q * u01;\n        u10 += q * u11;\n      }\n    }\n\n done:\n  M->u[0][0] = u00; M->u[0][1] = u01;\n  M->u[1][0] = u10; M->u[1][1] = u11;\n\n  return 1;\n}\n"
  },
  {
    "path": "mpn/x86_64/haswell/karaadd.asm",
    "content": "dnl  mpn_karaadd\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karaadd)\n# requires n>=8\npush %rbx\npush %rbp\npush %r12\npush %r13\npush %r14\npush %r15\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\npush %rdx\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nL(p):\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\tadc %rbx,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\tadc %rax,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\tadc %rbx,%rbx\n\tbt $1,%rax\n\tadc (%rsi,%rcx,8),%r8\n\tadc 8(%rsi,%rcx,8),%r9\n\tadc 16(%rsi,%rcx,8),%r10\n\tadc 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\tadc %rax,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\tadc 24(%rsi,%rdx,8),%r15\n\tadc %rbx,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc L(p)\ncmp $2,%rcx\njg\tL(case0)\njz\tL(case1)\njp\tL(case2)\nL(case3):\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\tadc %rbx,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\tadc %rax,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc %rbx,%rbx\n\tbt $1,%rax\n\tadc (%rsi),%r8\n\tadc 8(%rsi),%r9\n\tadc 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\tadc %rax,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\tadc %rbx,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp L(fin)\nL(case2):\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\tadc %rbx,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\tadc %rax,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc %rbx,%rbx\n\tbt $1,%rax\n\tadc 8(%rsi),%r8\n\tadc 16(%rsi),%r9\n\tadc %rax,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc %rbx,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp L(fin)\nL(case1):\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\tadc %rbx,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\tadc %rax,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc %rbx,%rbx\n\tbt $1,%rax\n\tadc 16(%rsi),%r8\n\tadc %rax,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc %rbx,%rbx\n\tinc %rdx\n\tmov %r12,(%rbp,%rcx,8)\nL(fin):\tmov $3,%rcx\nL(case0): #rcx=3\n\tpop %r8\n\tbt $0,%r8\n\tjnc L(notodd)\n\txor %r10,%r10\n\tmov (%rbp,%rdx,8),%r8\n\tmov 8(%rbp,%rdx,8),%r9\n\tadd (%rsi,%rdx,8),%r8\n\tadc 8(%rsi,%rdx,8),%r9\n\tadc %r10,%r10\n\tadd %r8,24(%rbp)\n\tadc %r9,32(%rbp)\n\tadc %r10,40(%rbp)\nL(l7):\tadcq $0,24(%rbp,%rcx,8)\n\tinc %rcx\n\tjc L(l7)\n\tmov $3,%rcx\nL(notodd):and $3,%rax\n\tpopcnt %rax,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadc %r8,(%rdi,%rdx,8)\nL(l1):\tadcq $0,8(%rdi,%rdx,8)\n\tinc %rdx\n\tjc L(l1)\n\tand $7,%rbx\n\tpopcnt %rbx,%r8\n\tadd %r8,24(%rbp)\nL(l2):\tadcq $0,8(%rbp,%rcx,8)\n\tinc %rcx\n\tjc L(l2)\npop %r15\npop %r14\npop %r13\npop %r12\npop %rbp\npop %rbx\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/haswell/karasub.as",
    "content": ";  mpn_karasub\n\n;  Copyright 2011,2012 The Code Cavern\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_karasub(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n;\n;  Karasuba Multiplication - split x and y into two equal length halves so\n;  that x = xh.B + xl and y = yh.B + yl. Then their product is:\n;\n;  x.y = xh.yh.B^2 + (xh.yl + xl.yh).B + xl.yl\n;      = xh.yh.B^2 + (xh.yh + xl.yl - {xh - xl}.{yh - yl}).B + xl.yl\n;\n; If the length of the elements is m (about n / 2), the output length is 4 * m \n; as illustrated below.  The middle two blocks involve three additions and one \n; subtraction: \n; \n;       -------------------- rp\n;       |                  |-->\n;       |   A:xl.yl[lo]    |   |\n;       |                  |   |      (xh - xl).(yh - yl)\n;       --------------------   |      -------------------- tp\n;  <--  |                  |<--<  <-- |                  |\n; |     |   B:xl.yl[hi]    |   |      |     E:[lo]       |\n; |     |                  |   |      |                  |\n; |     --------------------   |      --------------------\n; >-->  |                  |-->   <-- |                  |\n; |\\___ |   C:xh.yh[lo]    | ____/    |     F:[hi]       |\n; |     |                  |          |                  |\n; |     --------------------          --------------------\n;  <--  |                  |   \n;       |   D:xh.yh[hi]    |\n;       |                  |\n;       --------------------\n;\n; Let n2 = floor(n/2), n3 = n - n2, i.e., either n3 = n2 or n3 = n2 + 1.\n; The sizes of the blocks are: {rp, 2*n2} = xl.yl, {rp + 2*n2, 2*n3} = xh.yh,\n; {tp, 2*n3} = (xh - xl).(yh - yl).\n; We arrange sizes so that A, B, C, and E are of length n2, and D and F\n; are of length 2*n3-n2. I.e.,\n; A = {rp, n2}, B = {rp + n2, n2}, C = {rp + 2*n2, n2}\n; D = {rp + 3*n2, 2*n3-n2}, E = {tp, n2}, F = {tp + n2, 2*n3-n2}.\n;\n; To avoid overwriting B before it is used, we need to do two operations\n; in parallel:\n;\n; (1)   B = B + C + A - E = (B + C) + A - E\n; (2)   C = C + B + D - F = (B + C) + D - F\n;\n; The final carry from (1) has to be propagated into C and and, D the final\n; carry from (2) has to be propagated into D. When the number of input limbs\n; is odd, some extra operations have to be undertaken. \n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%define TP rsi\n%define RP rdi\n\n%define A_P rdi\n%define B_P rbx\n%define C_P rcx\n%define D_P rdx\n%define E_P rsi\n%define F_P rbp\n\nGLOBAL_FUNC mpn_karasub\n; requires n>=8\npush rbp\npush rbx\npush r12\npush r13\npush r14\npush r15\npush rdx\n; n is rdx and put it on the stack\nand rdx, -2\t\t\t; rdx = 2*n2\nshl rdx, 2\t\t\t; rdx = 8*n2\nlea B_P, [RP + rdx]\nlea C_P, [RP + rdx*2]\nlea F_P, [E_P + rdx]\nlea rdx, [rdx*2 + rdx]\nlea D_P, [RP + rdx]\nmov rax, B_P\nsub rax, 3*8\nmov [rsp-8], rax\t\t\t; for testing end of main loop\n; eax contains the carrys\nxor eax, eax\nmov r11, rax\nalign 16\n.Lp:\tbt r11, 3\n\tmov r8, [B_P]\t\t; r8 = B[i]\n\tadc r8, [C_P]\t\t; r8 = B[i] + C[i]\n\tmov r9, [B_P + 8]\t; r9 = B[i+1]\n\tadc r9, [C_P + 8]\t; r9 = B[i+1] + C[i+1]\n\tmov r10, [B_P + 16]\t; r10 = B[i+2]\n\tadc r10, [C_P + 16]\t; r10 = B[i+2] + C[i+2]\n\tmov r11, [B_P + 24]\t; r11 = B[i+3]\n\tadc r11, [C_P + 24]\t; r11 = B[i+3] + C[i+3]\n\tmov r12, rax\n\tadc eax, eax\n\n\tbt r12, 3\n\tmov r12, r8\t\t; r12 = B[i] + C[i]\n\tmov r13, r9\t\t; r13 = B[i+1] + C[i+1]\n\tmov r14, r10\t\t; r14 = B[i+2] + C[i+2]\n\tmov r15, r11\t\t; r15 = B[i+3] + C[i+3]\n\tadc r8, [A_P]\t\t; r8 = B[i] + C[i] + A[i]\n\tadc r9, [A_P + 8]\t; r9 = B[i+1] + C[i+1] + A[i+1]\n\tadc r10, [A_P + 16]\t; r10 = B[i+2] + C[i+2] + A[i+2]\n\tadc r11, [A_P + 24]\t; r11 = B[i+3] + C[i+3] + A[i+3]\n\tadc eax, eax\n\n\tbt eax, 4\t; FIXME: can we break the dependency chain here?\n\tsbb r8, [E_P]\t\t; r8 = B[i] + C[i] + A[i] - E[i]\n\tsbb r9, [E_P + 8]\t; r9 = B[i+1] + C[i+1] + A[i+1] - E[i+1]\n\tsbb r10, [E_P + 16]\t; r10 = B[i+2] + C[i+2] + A[i+2] - E[i+2]\n\tsbb r11, [E_P + 24]\t; r11 = B[i+3] + C[i+3] + A[i+3] - E[i+3]\n\tmov [B_P], r8\t\t; B[i] = B[i] + C[i] + A[i] - E[i]\n\tmov [B_P + 8], r9\t; B[i+1] = B[i+1] + C[i+1] + A[i+1] - E[i+1]\n\tmov [B_P + 16], r10\t; B[i+2] = B[i+2] + C[i+2] + A[i+2] - E[i+2]\n\tmov [B_P + 24], r11\t; B[i+3] = B[i+3] + C[i+3] + A[i+3] - E[i+3]\n\tmov r11, rax\n\tadc eax, eax\n\n\tbt r11, 3\n\tadc r12, [D_P]\t\t; r12 = B[i] + C[i] + D[i]\n\tadc r13, [D_P + 8]\t; r13 = B[i+1] + C[i+1] + D[i+1]\n\tadc r14, [D_P + 16]\t; r14 = B[i+2] + C[i+2] + D[i+2]\n\tadc r15, [D_P + 24]\t; r15 = B[i+3] + C[i+3] + D[i+3]\n\tmov r11, rax\n\tadc eax, eax\n\n\tbt r11, 3\n\tsbb r12, [F_P]\t\t; r12 = B[i] + C[i] + D[i] - F[i]\n\tsbb r13, [F_P + 8]\t; r13 = B[i+1] + C[i+1] + D[i+1] - F[i+1]\n\tsbb r14, [F_P + 16]\t; r14 = B[i+2] + C[i+2] + D[i+2] - F[i+2]\n\tsbb r15, [F_P + 24]\t; r15 = B[i+3] + C[i+3] + D[i+3] - F[i+3]\n\tmov r11, rax\n\tadc eax, eax\n\n\tmov [C_P], r12\t\t; C[i] = B[i] + C[i] + D[i] - F[i]\n\tmov [C_P + 8], r13\t; C[i+1] = B[i+1] + C[i+1] + D[i+1] - F[i+1]\n\tmov [C_P + 16], r14\t; C[i+2] = B[i+2] + C[i+2] + D[i+2] - F[i+2]\n\tmov [C_P + 24], r15\t; C[i+3] = B[i+3] + C[i+3] + D[i+3] - F[i+3]\n\tlea A_P, [A_P + 4*8]\n\tlea B_P, [B_P + 4*8]\n\tlea C_P, [C_P + 4*8]\n\tlea D_P, [D_P + 4*8]\n\tlea E_P, [E_P + 4*8]\n\tlea F_P, [F_P + 4*8]\n\tmov r9, A_P\n\tsub r9, [rsp-8]\t\t; r9 = A_P - (orig_B_P - 3*8) = A_P - orig_B_P + 24\n\tjc .Lp\t\t\t; If A_P < orig_B_P - 3*8, then do another 4 words\n\n; Bits of eax contain carries of:\n; 0\t\t1\t\t2\t3\t4\n; (B+C+D)-F\t(B+C)+D\t(B+C+A)-E\t(B+C)+A\tB+C\n\n\tjz .Lcase3\t\t; If A_P = orig_B_P - 3*8, then do remaining 3 words\n\t\t\t\t; Difference is 8, 16, or 24, corresponding to 2, 1, or 0 words left to do, resp.\n\tcmp r9, 16\n\tjb\t.Lcase2\t\t; r9 = 8 : 2 words\n\tje\t.Lcase1\t\t; r9 = 16 : 1 word\n\tjmp\t.Lcase0\t\t; r9 = 24 : 0 words\n.Lcase3:\n\tbt r11, 3\n\tmov r8, [B_P]\n\tadc r8, [C_P]\n\tmov r9, [B_P + 8]\n\tadc r9, [C_P + 8]\n\tmov r10, [B_P + 16]\n\tadc r10, [C_P + 16]\n\tmov r11, rax\n\tadc eax, eax\n\tbt r11, 3\n\tmov r12, r8\n\tmov r13, r9\n\tmov r14, r10\n\tadc r8, [A_P]\n\tadc r9, [A_P + 8]\n\tadc r10, [A_P + 16]\n\tmov r11, rax\n\tadc eax, eax\n\tbt r11, 3\n\tsbb r8, [E_P]\n\tsbb r9, [E_P + 8]\n\tsbb r10, [E_P + 16]\n\tmov [B_P], r8\n\tmov [B_P + 8], r9\n\tmov [B_P + 16], r10\n\tmov r11, rax\n\tadc eax, eax\n\tbt r11, 3\n\tadc r12, [D_P]\n\tadc r13, [D_P + 8]\n\tadc r14, [D_P + 16]\n\tmov r11, rax\n\tadc eax, eax\n\tbt r11, 3\n\tsbb r12, [F_P]\n\tsbb r13, [F_P + 8]\n\tsbb r14, [F_P + 16]\n\tadc eax, eax\n\tmov [C_P], r12\n\tmov [C_P + 8], r13\n\tmov [C_P + 16], r14\n\tlea A_P, [A_P + 3*8]\n\tlea B_P, [B_P + 3*8]\n\tlea C_P, [C_P + 3*8]\n\tlea D_P, [D_P + 3*8]\n\tlea E_P, [E_P + 3*8]\n\tlea F_P, [F_P + 3*8]\n\tjmp .Lfin\n.Lcase2:\n\tbt r11, 3\n\tmov r8, [B_P]\n\tadc r8, [C_P]\n\tmov r9, [B_P + 8]\n\tadc r9, [C_P + 8]\n\tmov r11, rax\n\tadc eax, eax\n\tbt r11, 3\n\tmov r12, r8\n\tmov r13, r9\n\tadc r8, [A_P]\n\tadc r9, [A_P + 8]\n\tmov r11, rax\n\tadc eax, eax\n\tbt r11, 3\n\tsbb r8, [E_P]\n\tsbb r9, [E_P + 8]\n\tmov r11, rax\n\tadc eax, eax\n\tbt r11, 3\n\tadc r12, [D_P]\n\tadc r13, [D_P + 8]\n\tmov r11, rax\n\tadc eax, eax\n\tbt r11, 3\n\tmov [B_P], r8\n\tmov [B_P + 8], r9\n\tsbb r12, [F_P]\n\tsbb r13, [F_P + 8]\n\tadc eax, eax\n\tmov [C_P], r12\n\tmov [C_P + 8], r13\n\tlea A_P, [A_P + 2*8]\n\tlea B_P, [B_P + 2*8]\n\tlea C_P, [C_P + 2*8]\n\tlea D_P, [D_P + 2*8]\n\tlea E_P, [E_P + 2*8]\n\tlea F_P, [F_P + 2*8]\n\tjmp .Lfin\n.Lcase1:\n\tbt r11, 3\n\tmov r8, [B_P]\n\tadc r8, [C_P]\n\tmov r11, rax\n\tadc eax, eax\n\tbt r11, 3\n\tmov r12, r8\n\tadc r8, [A_P]\n\tmov r11, rax\n\tadc eax, eax\n\tbt r11, 3\n\tsbb r8, [E_P]\n\tmov r11, rax\n\tadc eax, eax\n\tbt r11, 3\n\tadc r12, [D_P]\n\tmov r11, rax\n\tadc eax, eax\n\tbt r11, 3\n\tmov [B_P], r8\n\tsbb r12, [F_P]\n\tadc eax, eax\n\tmov [C_P], r12\n\tlea A_P, [A_P + 1*8]\n\tlea B_P, [B_P + 1*8]\n\tlea C_P, [C_P + 1*8]\n\tlea D_P, [D_P + 1*8]\n\tlea E_P, [E_P + 1*8]\n\tlea F_P, [F_P + 1*8]\n.Lfin:\n\t; Now A_P, ..., F_P point at A[n2]=B[0], B[n2]=C[0], C[n2]=D[0],\n        ; D[n2], E[n2]=F[0], F[n2], resp.\n.Lcase0:\n\t; store top two words of D as carrys could change them\n\tpop r15\n\tbt r15, 0\n\tjnc .Lskipload\n\tmov r12, [D_P]\t\t; load D[n2]\n        mov r13, [D_P + 8]\t; load D[n2 + 1]\n\t; the two carrys from 2nd to 3rd\n.Lskipload:\n\txor r8, r8\n\tmov r11, r8\t\t; r11 is constant 0 now\n\tbt eax, 4\t\t; carry of B+C\n\tadc r8, r8\n\tmov r9, r8\n\tbt eax, 3\t\t; carry of (B+C)+A\n\tlea r10, [B_P + 8]\n\tadc [B_P], r8\t\t; B_P points at B[n2] = C[0]\n.L2:\tadc qword [r10], r11\n\tlea r10, [r10 + 8]\n\tjc .L2\n\t; the two carrys from 3rd to 4th\n\tlea r10, [C_P + 8]\n\tbt eax, 1\t\t; carry of (B+C)+D\n\tadc [C_P], r9\n.L3:\tadc qword [r10], r11\n\tlea r10, [r10 + 8]\n\tjc .L3\n\t; now the borrow from 2nd to 3rd\n\tmov r10, B_P\n\tbt eax, 2\t\t; borrow of (B+C+A)-E\n.L1:\tsbb qword [r10], r11\n\tlea r10, [r10 + 8]\n\tjc .L1\n\t; borrow from 3rd to 4th\n\tbt eax, 0\t\t; borrow of (B+C+D)-F\n\tmov r10, C_P\n.L4:\tsbb qword [r10], r11\n\tlea r10, [r10 + 8]\n\tjc .L4\n\n\t; if odd then do next two\n\tbt r15, 0\n\tjnc .Lnotodd\n\n\tsub r12, [F_P]\t\t; r12 contains D[n2]\n\tsbb r13, [F_P + 8]\t; r13 contains D[n2+1]\n\tsbb rax, rax\n\tadd [C_P], r12\n\tadc [C_P + 8], r13\n\tadc rax, 0\t\t; rax is -1, 0, or 1\n.L7:\tadd [C_P + 16], rax\n\tadc rax, 0\n\tlea C_P, [C_P + 8]\n\tsar rax, 1\n\tjnz .L7\n.Lnotodd:\npop r15\npop r14\npop r13\npop r12\npop rbx\npop rbp\nret\n"
  },
  {
    "path": "mpn/x86_64/haswell/nsumdiff_n.as",
    "content": "; ============================================================================\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n; mp_limb_t mpn_nsumdiff_n(mp_ptr Op3, mp_ptr Op4, mp_srcptr Op1, mp_srcptr Op2, mp_size_t Size)\n; Linux     RAX           RDI         RSI         RDX            RCX            R8\n; Win7      RAX           RCX         RDX         R8             R9             Stack\n;\n; Description:\n; The function computes -(Op2+Op1) and stores the result in Op3 while at the\n; same time subtracting Op2 from Op1 with result in Op4. The final carries from\n; addition and subtraction are handed back as a combined mp_limb_t. There is a\n; gain in execution speed compared to separate addition and subtraction by\n; reducing memory access. The factor depends on the size of the operands (the\n; cache hierarchy in which the operands can be handled).\n;\n; Equivalent to, assuming no overlap:\n; cy1 = mpn_add_n(r1, s1, s2, n);\n; cy2 = mpn_neg_n(r1, r1, n); /* cy2 = [{r1,n} != 0] */\n; cy3 = mpn_sub_n(r2, s1, s2, n);\n; return 2*(cy1 + cy2) + cy3;\n; ============================================================================\n\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n\n    %define Op3     RCX\n    %define Op4     RDX\n    %define Op1     R8\n    %define Op2     R9\n    %define Size    RBX             ; SAVE!\n\n    %define Limb0   RBP             ; SAVE!\n    %define Limb1   RSI             ; SAVE!\n    %define Limb2   RDI             ; SAVE!\n    %define Limb3   R10\n    %define Limb4   R11\n    %define Limb5   R12             ; SAVE!\n    %define Limb6   R13             ; SAVE!\n    %define Limb7   R14             ; SAVE!\n    %define Limb8   R15             ; SAVE!\n\n  %ifdef USE_PREFETCH\n    %define Offs    PREFETCH_STRIDE ; no more regs avail. => fallback to const\n  %endif\n\n    %define SaveRBX XMM0            ; use scratch XMM for fast save & restore\n    %define SaveRBP XMM1            ; R14 and R15 handled via stack\n    %define SaveRSI XMM2\n    %define SaveRDI XMM3\n    %define SaveR12 XMM4\n    %define SaveR13 XMM5\n\n%else\n\n    %define Op3     RDI\n    %define Op4     RSI\n    %define Op1     RDX\n    %define Op2     RCX\n    %define Size    R8\n\n    %define Limb0   RBP             ; SAVE!\n    %define Limb1   RBX             ; SAVE!\n    %define Limb2   R9\n    %define Limb3   R10\n    %define Limb4   R11\n    %define Limb5   R12             ; SAVE!\n    %define Limb6   R13             ; SAVE!\n    %define Limb7   R14             ; SAVE!\n    %define Limb8   R15             ; SAVE!\n\n  %ifdef USE_PREFETCH\n    %define Offs    PREFETCH_STRIDE ; no more regs avail. => fallback to const\n  %endif\n\n    %define SaveRBX XMM0            ; use scratch XMM for fast save & restore\n    %define SaveRBP XMM1\n    %define SaveR12 XMM2\n    %define SaveR13 XMM3\n    %define SaveR14 XMM4\n    %define SaveR15 XMM5\n\n%endif\n\n%define SaveAC  setc    AL\n%define LoadAC  shr     AL, 1\n\n%define SaveSC  sbb     AH, AH\n%define LoadSC  add     AH, AH\n\nBITS 64\n\nalign   32\n\n   GLOBAL_FUNC mpn_nsumdiff_n\n\n  %ifdef USE_WIN64\n    sub     RSP, 16\n    mov     [RSP+8], R15\n    mov     [RSP], R14\n\n    movq    SaveR13, R13\n    movq    SaveR12, R12\n    movq    SaveRDI, RDI\n    movq    SaveRSI, RSI\n    movq    SaveRBP, RBP\n    movq    SaveRBX, RBX\n  %else\n    movq    SaveR15, R15\n    movq    SaveR14, R14\n    movq    SaveR13, R13\n    movq    SaveR12, R12\n    movq    SaveRBP, RBP\n    movq    SaveRBX, RBX\n  %endif\n\n    xor     EAX, EAX            ; clear add & sub carry\n\n; First we handle any words whose sum = 0\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    mov     Limb2, Limb1\n    add     Limb2, Limb5\n    SaveAC\n    neg\t    Limb2\n    jz      .zero_sum ; ajs:notshortform\n\n; Then we handle the first word whose sum !=0. The NOT of this sum needs to\n; be incremented, which produces no carry (NOT(x) + 1 = NEG(x))\n.not_zero:\n    mov     [Op3], Limb2\n    LoadSC\n    sbb     Limb1, Limb5\n    SaveSC\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    dec     Size\n    mov     [Op4], Limb1\n    add     Op4, 8\n\n    shr     Size, 1\n    jnc     .n_two\n\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    LoadAC\n    mov     Limb2, Limb1\n    adc     Limb2, Limb5\n    SaveAC\n    LoadSC\n    sbb     Limb1, Limb5\n    SaveSC\n    not     Limb2\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    mov     [Op3-8], Limb2\n    mov     [Op4], Limb1\n    add     Op4, 8\n\n  .n_two:\n\n    shr     Size, 1\n    jnc     .n_four\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    mov     Limb5, [Op2]\n    mov     Limb6, [Op2+8]\n    LoadAC\n    mov     Limb3, Limb1\n    mov     Limb4, Limb2\n    adc     Limb3, Limb5\n    not     Limb3\n    adc     Limb4, Limb6\n    SaveAC\n    LoadSC\n    not     Limb4\n    sbb     Limb1, Limb5\n    sbb     Limb2, Limb6\n    SaveSC\n    mov     [Op3], Limb3\n    mov     [Op3+8], Limb4\n    mov     [Op4], Limb1\n    mov     [Op4+8], Limb2\n    add     Op1, 16\n    add     Op2, 16\n    add     Op3, 16\n    add     Op4, 16\n\n\n  .n_four:\n\n    shr     Size, 1\n    jnc     .n_loop_pre ;ajs:notshortform\n\n    LoadAC\n\n    ; slight change of scheme here - avoid too many\n    ; memory to reg or reg to memory moves in a row\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    mov     Limb0, Limb1\n    adc     Limb0, Limb5\n    not     Limb0\n    mov     [Op3], Limb0\n    mov     Limb2, [Op1+8]\n    mov     Limb6, [Op2+8]\n    mov     Limb0, Limb2\n    adc     Limb0, Limb6\n    not     Limb0\n    mov     [Op3+8], Limb0\n    mov     Limb3, [Op1+16]\n    mov     Limb7, [Op2+16]\n    mov     Limb0, Limb3\n    adc     Limb0, Limb7\n    not     Limb0\n    mov     [Op3+16], Limb0\n    mov     Limb4, [Op1+24]\n    mov     Limb8, [Op2+24]\n    mov     Limb0, Limb4\n    adc     Limb0, Limb8\n    not     Limb0\n    mov     [Op3+24], Limb0\n\n    SaveAC\n    LoadSC\n\n    sbb     Limb1, Limb5\n    mov     [Op4], Limb1\n    sbb     Limb2, Limb6\n    mov     [Op4+8], Limb2\n    sbb     Limb3, Limb7\n    mov     [Op4+16], Limb3\n    sbb     Limb4, Limb8\n    mov     [Op4+24], Limb4\n\n    SaveSC\n\n    add     Op1, 32\n    add     Op2, 32\n    add     Op3, 32\n    add     Op4, 32\n \n    test   Size, Size\n  .n_loop_pre:\t\t; If we jump here, ZF=1 iff Size=0\n    jz     .n_post      ;ajs:notshortform\n    LoadAC              ; set carry for addition\n\n    ; main loop - values below are best case - up to 50% fluctuation possible!\n    ; - 3.50      cycles per limb in LD1$\n    ; - 3.50      cycles per limb in LD2$\n    ; - 5.10-5.50 cycles per limb in LD3$\n    align   16\n  .n_loop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n    prefetchnta [Op2+Offs]\n  %endif\n\n    mov     Limb1, [Op1]        ; add the first quad-limb\n    mov     Limb5, [Op2]\n    mov     Limb0, Limb1\n    adc     Limb0, Limb5\n    not     Limb0\n    mov     [Op3], Limb0\n    mov     Limb2, [Op1+8]\n    mov     Limb6, [Op2+8]\n    mov     Limb0, Limb2\n    adc     Limb0, Limb6\n    not     Limb0\n    mov     [Op3+8], Limb0\n    mov     Limb3, [Op1+16]\n    mov     Limb7, [Op2+16]\n    mov     Limb0, Limb3\n    adc     Limb0, Limb7\n    not     Limb0\n    mov     [Op3+16], Limb0\n    mov     Limb4, [Op1+24]\n    mov     Limb8, [Op2+24]\n    mov     Limb0, Limb4\n    adc     Limb0, Limb8\n    not     Limb0\n    mov     [Op3+24], Limb0\n    lea     Op3, [Op3 + 64]\n\n    SaveAC              ; memorize add-carry\n    LoadSC              ; set carry for subtraction\n\n    sbb     Limb1, Limb5        ; now sub the first quad-limb\n    mov     [Op4], Limb1\n    sbb     Limb2, Limb6\n    mov     [Op4+8], Limb2\n    sbb     Limb3, Limb7\n    mov     [Op4+16], Limb3\n    sbb     Limb4, Limb8\n    mov     [Op4+24], Limb4\n    mov     Limb1, [Op1+32]     ; sub the second quad-limb\n    mov     Limb5, [Op2+32]\n    mov     Limb0, Limb1\n    sbb     Limb0, Limb5\n    mov     [Op4+32], Limb0\n    mov     Limb2, [Op1+40]\n    mov     Limb6, [Op2+40]\n    mov     Limb0, Limb2\n    sbb     Limb0, Limb6\n    mov     [Op4+40], Limb0\n    mov     Limb3, [Op1+48]\n    mov     Limb7, [Op2+48]\n    mov     Limb0, Limb3\n    sbb     Limb0, Limb7\n    mov     [Op4+48], Limb0\n    mov     Limb4, [Op1+56]\n    mov     Limb8, [Op2+56]\n    mov     Limb0, Limb4\n    sbb     Limb0, Limb8\n    mov     [Op4+56], Limb0\n    lea     Op4, [Op4 + 64]\n\n    SaveSC                      ; memorize sub-carry\n    LoadAC                      ; set carry for addition\n\n    adc     Limb1, Limb5        ; add the second quad-limb\n    not     Limb1\n    mov     [Op3+32-64], Limb1\n    adc     Limb2, Limb6\n    not     Limb2\n    mov     [Op3+40-64], Limb2\n    adc     Limb3, Limb7\n    not     Limb3\n    mov     [Op3+48-64], Limb3\n    adc     Limb4, Limb8\n    not     Limb4\n    mov     [Op3+56-64], Limb4\n\n    lea     Op1, [Op1 + 64]\n    lea     Op2, [Op2 + 64]\n\n\n    dec     Size\n    jnz     .n_loop     ;ajs:notshortform\n\n    SaveAC                      ; memorize add-carry\n    ; hand back carries\n  .n_post:\n\t\t\t\t; AL = cy1, AH = -cy3. cy2 = 1 here, as\n                                ; there were non-zero words in the sum\n    inc     al\t\t\t; AL = cy1 + cy2 = cy1 + 1, AH = -cy3\n.all_zero:\n    LoadSC\t\t\t; AL = cy1 + cy2, CY = cy3\n    adc     AL, AL\t\t; AL = 2*(cy1 + cy2) + cy3\n    movsx   EAX, AL\n\n  .Exit:\n\n  %ifdef USE_WIN64\n    movq    RBX, SaveRBX\n    movq    RBP, SaveRBP\n    movq    RSI, SaveRSI\n    movq    RDI, SaveRDI\n    movq    R12, SaveR12\n    movq    R13, SaveR13\n\n    mov     R14, [RSP]\n    mov     R15, [RSP+8]\n    add     RSP, 16\n  %else\n    movq    R15, SaveR15\n    movq    R14, SaveR14\n    movq    R13, SaveR13\n    movq    R12, SaveR12\n    movq    RBP, SaveRBP\n    movq    RBX, SaveRBX\n  %endif\n\n    ret\n.end:\n\n.zero_sum:\n    mov     [Op3], Limb2\n    LoadSC\n    sbb     Limb1, Limb5\n    SaveSC\n    mov     [Op4], Limb1\n    dec     Size\n    jz      .all_zero\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    add     Op4, 8\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    mov     Limb2, Limb1\n    LoadAC\n    adc     Limb2, Limb5\n    SaveAC\n    neg     Limb2\n    jz      .zero_sum\n    jmp     .not_zero\n"
  },
  {
    "path": "mpn/x86_64/haswell/sub_n.as",
    "content": "\n;  AMD64 mpn_sub_n, mpn_sub_nc\n;  Copyright 2008, 2016 Jason Moxham and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)+(rdx,rcx)\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n    %define SumP    rcx\n    %define Inp1P   rdx\n    %define Inp2P   r8\n    %define Size    r9\n    %define CarryIn\n    %define LIMB1   rax\n    %define LIMB2   r10\n%else\n    %define SumP    rdi\n    %define Inp1P   rsi\n    %define Inp2P   rdx\n    %define Size    rcx\n    %define CarryIn r8\n    %define LIMB1   rax\n    %define LIMB2   r8  ; may reuse CarryIn\n    %define SizeRest r9\n%endif\n\n%define ADCSBB sbb\n\n    BITS    64\n\n\talign   16\n   GLOBAL_FUNC mpn_sub_nc\n\tmov     SizeRest, Size\n\tand     SizeRest, 7\n\tshr     Size, 3\n        neg\tCarryIn\t\t; Set CF iff CarryIn != 0\n\tinc     Size\n\tdec     Size\t\t; Set ZF without affecting CF\n\tjnz     loop1\n        jmp     therest ;ajs:notshortform\n\n\talign   16\n   GLOBAL_FUNC mpn_sub_n\n\tmov     SizeRest, Size\n\tand     SizeRest, 7\n\tshr     Size, 3\n\tcmp     Size, 0\n;\tcarry flag is clear here\n\tjnz     loop1\n        jmp     therest ;ajs:notshortform\n\n\talign   16\nloop1:\n\tmov     LIMB1, [Inp1P]\n\tmov     LIMB2, [Inp1P+8]\n\tADCSBB  LIMB1, [Inp2P]\n\tmov     [SumP], LIMB1\n\tADCSBB  LIMB2, [Inp2P+8]\n\tmov     LIMB1, [Inp1P+16]\n\tmov     [SumP+8], LIMB2\n\tADCSBB  LIMB1, [Inp2P+16]\n\tmov     LIMB2, [Inp1P+24]\n\tmov     [SumP+16], LIMB1\n\tmov     LIMB1, [Inp1P+32]\n\tADCSBB  LIMB2, [Inp2P+24]\n\tmov     [SumP+24], LIMB2\n\tADCSBB  LIMB1, [Inp2P+32]\n\tmov     [SumP+32], LIMB1\n\tmov     LIMB2, [Inp1P+40]\n\tADCSBB  LIMB2, [Inp2P+40]\n\tmov     [SumP+40], LIMB2\n\tmov     LIMB1, [Inp1P+48]\n\tmov     LIMB2, [Inp1P+56]\n\tlea     Inp1P, [Inp1P+64]\n\tADCSBB  LIMB1, [Inp2P+48]\n\tADCSBB  LIMB2, [Inp2P+56]\n\tlea     Inp2P, [Inp2P+64]\n\tmov     [SumP+48], LIMB1\n\tmov     [SumP+56], LIMB2\n\tlea     SumP, [SumP+64]\n\tdec     Size\n\tjnz     loop1\n\tinc     SizeRest\n\tdec     SizeRest\n\tjz      end\ntherest:\n\tmov     LIMB1, [Inp1P]\n\tADCSBB  LIMB1, [Inp2P]\n\tmov     [SumP], LIMB1\n\tdec     SizeRest\n\tjz      end\n\tmov     LIMB1, [Inp1P+8]\n\tADCSBB  LIMB1, [Inp2P+8]\n\tmov     [SumP+8], LIMB1\n\tdec     SizeRest\n\tjz      end\n\tmov     LIMB1, [Inp1P+16]\n\tADCSBB  LIMB1, [Inp2P+16]\n\tmov     [SumP+16], LIMB1\n\tdec     SizeRest\n        jz      end\n\tmov     LIMB1, [Inp1P+24]\n\tADCSBB  LIMB1, [Inp2P+24]\n\tmov     [SumP+24], LIMB1\n\tdec     SizeRest\n        jz      end\n        lea     Inp1P, [Inp1P+32]\n        lea     Inp2P, [Inp2P+32]\n        lea     SumP, [SumP+32]\n        jmp     therest\nend:\n\tmov     eax, 0\n\tadc     eax, eax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/haswell/sublsh1_n.as",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t sublsh1_n(mp_ptr Op3, mp_srcptr Op2, mp_srcptr Op1; mp_size_t Size )\n; Linux     RAX       RDI         RSI            RDX            RCX\n; Win7      RAX       RCX         RDX            R8             R9\n;\n; Description:\n; The function shifts Op1 left one bit, subtracts it from Op2, stores the result\n; in Op3 and hands back the total carry. There is a gain in execution speed\n; compared to separate shift and subtract by interleaving the elementary operations\n; and reducing memory access. The factor depends on the size of the operands\n; (the cache hierarchy in which the operands can be handled).\n;\n; Caveats:\n; - for asm the processor MUST support LAHF/SAHF in 64 bit mode!\n; - the total carry is in [0..2]!\n;\n; Comments:\n; - asm version implemented, tested & benched on 16.05.2015 by jn\n; - improved asm version implemented, tested & benched on 30.07.2015 by jn\n; - On Nehalem per limb saving is 0.7 cycles in LD1$, LD2$ and 1-2 in LD3$\n; - includes LAHF / SAHF\n; - includes prefetching\n; - includes XMM save & restore\n;\n; Linux: (rdi, rcx) = (rsi, rcx) - (rdx, rcx)<<1\n; ============================================================================\n\n%define ADDSUB sub\n%define ADCSBB sbb\n\n%include \"yasm_mac.inc\"\n\nBITS 64\n\n%ifdef USE_WIN64\n\n    %define Op3     RCX\n    %define Op2     RDX\n    %define Op1     R8\n    %define Size    R9\n  %ifdef USE_PREFETCH\n    %define Offs    RBP         ; SAVE!\n  %endif\n\n    %define Limb0   RBX         ; SAVE!\n    %define Limb1   RDI         ; SAVE!\n    %define Limb2   RSI         ; SAVE!\n    %define Limb3   R10\n    %define Limb4   R11\n    %define Limb5   R12         ; SAVE!\n    %define Limb6   R13         ; SAVE!\n    %define Limb7   R14         ; SAVE!\n    %define Limb8   R15         ; SAVE!\n\n    %define SaveRBX XMM0        ; use available scratch XMM to\n    %define SaveRSI XMM1        ; save as many regs as possible\n    %define SaveRDI XMM2\n    %define SaveR12 XMM3\n    %define SaveR13 XMM4\n    %define SaveR14 XMM5\n\n%else\n\n    %define Op3     RDI\n    %define Op2     RSI\n    %define Op1     RDX\n    %define Size    RCX\n  %ifdef USE_PREFETCH\n    %define Offs    RBP         ; SAVE!\n  %endif\n\n    %define Limb0   RBX         ; SAVE!\n    %define Limb1   R8\n    %define Limb2   R9\n    %define Limb3   R10\n    %define Limb4   R11\n    %define Limb5   R12         ; SAVE!\n    %define Limb6   R13         ; SAVE!\n    %define Limb7   R14         ; SAVE!\n    %define Limb8   R15         ; SAVE!\n\n    %define SaveRBX XMM0        ; use available scratch XMM to save all regs\n    %define SaveR12 XMM1\n    %define SaveR13 XMM2\n    %define SaveR14 XMM3\n    %define SaveR15 XMM4\n  %ifdef USE_PREFETCH\n    %define SaveRBP XMM5\n  %endif\n\n%endif\n\n\n%macro ACCUMULATE 1\n    mov     rax, [Op2 + 8 * %1]\n    ADCSBB  rax, Limb%1\n    mov     [Op3 + 8 * %1], rax\n%endmacro\n\n\n    align   32\n\nGLOBAL_FUNC  mpn_sublsh1_n\n\n%ifdef USE_WIN64\n  %ifdef USE_PREFETCH\n    sub     RSP, 16\n    mov     [RSP+8], R15\n    mov     [RSP], RBP\n  %else\n    sub     RSP, 8\n    mov     [RSP], R15\n  %endif\n    movq    SaveRBX, RBX\n    movq    SaveRSI, RSI\n    movq    SaveRDI, RDI\n    movq    SaveR12, R12\n    movq    SaveR13, R13\n    movq    SaveR14, R14\n%else\n  %ifdef USE_PREFETCH\n    movq    SaveRBP, RBP\n  %endif\n    movq    SaveRBX, RBX\n    movq    SaveR12, R12\n    movq    SaveR13, R13\n    movq    SaveR14, R14\n    movq    SaveR15, R15\n%endif\n\n  %ifdef USE_PREFETCH\n    mov     Offs, PREFETCH_STRIDE   ; Attn: check if redefining Offs\n  %endif\n\n    ; prepare shift & subtraction with loop-unrolling 8\n    xor     Limb0, Limb0\n    lahf                        ; memorize clear carry (from \"xor\" above)\n\n    test    Size, 1\n    je      .n_two\n\n    mov     Limb1, [Op1]\n    shrd    Limb0, Limb1, 63\n\n    mov     rax, [Op2]\n    ADDSUB  rax, Limb0\n    mov     [Op3], rax\n    lahf\n\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    mov     Limb0, Limb1\n\n  .n_two:\n\n    test    Size, 2\n    je      .n_four\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n\n    sahf\n    ACCUMULATE 0\n    ACCUMULATE 1\n    lahf\n\n    add     Op1, 16\n    add     Op2, 16\n    add     Op3, 16\n    mov     Limb0, Limb2\n\n  .n_four:\n\n    test    Size, 4\n    je      .n_test ;ajs:notshortform\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n    mov     Limb3, [Op1+16]\n    mov     Limb4, [Op1+24]\n    shrd    Limb2, Limb3, 63\n    shrd    Limb3, Limb4, 63\n\n    sahf\n    ACCUMULATE 0\n    ACCUMULATE 1\n    ACCUMULATE 2\n    ACCUMULATE 3\n    lahf\n\n    add     Op1, 32\n    add     Op2, 32\n    add     Op3, 32\n    mov     Limb0, Limb4\n    jmp     .n_test ;ajs:notshortform\n\n    ; main loop\n    ; - 2.40-2.50 cycles per limb in L1D$\n    ; - 2.6       cycles per limb in L2D$\n    ; - 2.80-3.30 cycles per limb in L3D$\n    align   16\n  .n_loop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n    prefetchnta [Op2+Offs]\n  %endif\n\n    mov     Limb1, [Op1]        ; prepare shifted oct-limb from Op1\n    mov     Limb2, [Op1+8]\n    mov     Limb3, [Op1+16]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n    shrd    Limb2, Limb3, 63\n    mov     Limb4, [Op1+24]\n    mov     Limb5, [Op1+32]\n    mov     Limb6, [Op1+40]\n    shrd    Limb3, Limb4, 63\n    shrd    Limb4, Limb5, 63\n    shrd    Limb5, Limb6, 63\n    mov     Limb7, [Op1+48]\n    mov     Limb8, [Op1+56]\n    shrd    Limb6, Limb7, 63\n    shrd    Limb7, Limb8, 63\n\n    sahf                        ; restore carry\n    ACCUMULATE 0                ; sub shifted Op1 from Op2 with result in Op3\n    ACCUMULATE 1\n    ACCUMULATE 2\n    ACCUMULATE 3\n    ACCUMULATE 4\n    ACCUMULATE 5\n    ACCUMULATE 6\n    ACCUMULATE 7\n    lahf                        ; remember carry for next round\n\n    add     Op1, 64\n    add     Op2, 64\n    add     Op3, 64\n    mov     Limb0, Limb8\n\n  .n_test:\n\n    sub     Size, 8\n    jnc     .n_loop\n\n    ; housekeeping - hand back total carry\n    shr     Limb0, 63\n    sahf\n    adc     Limb0, 0            ; Limb0=0/1/2 depending on final carry and shift\n    mov     RAX, Limb0\n\n  .Exit:\n\n%ifdef USE_WIN64\n\n    movq    SaveR14, R14\n    movq    SaveR13, R13\n    movq    SaveR12, R12\n    movq    SaveRDI, RDI\n    movq    SaveRSI, RSI\n    movq    SaveRBX, RBX\n  %ifdef USE_PREFETCH\n    mov     [RSP], RBP\n    mov     [RSP+8], R15\n    add     RSP, 16\n  %else\n    mov     [RSP], R15\n    add     RSP, 8\n  %endif\n\n%else\n\n    movq    R15, SaveR15\n    movq    R14, SaveR14\n    movq    R13, SaveR13\n    movq    R12, SaveR12\n    movq    RBX, SaveRBX\n  %ifdef USE_PREFETCH\n    movq    RBP, SaveRBP\n  %endif\n\n%endif\n\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64/haswell/sumdiff_n.as",
    "content": "; ============================================================================\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n; mp_limb_t mpn_sumdiff_n(mp_ptr Op3, mp_ptr Op4, mp_srcptr Op1, mp_srcptr Op2, mp_size_t Size)\n; Linux     RAX           RDI         RSI         RDX            RCX            R8\n; Win7      RAX           RCX         RDX         R8             R9             Stack\n;\n; Description:\n; The function adds Op2 to Op1 and stores the result in Op3 while at the same\n; time subtracting Op2 from Op1 with result in Op4. The final carries from\n; addition and subtraction are handed back as a combined mp_limb_t. There is a\n; gain in execution speed compared to separate addition and subtraction by\n; reducing memory access. The factor depends on the size of the operands (the\n; cache hierarchy in which the operands can be handled).\n;\n; Comments:\n; - asm version implemented, tested & benched on 10.06.2015 by jn\n; - On Nehalem per limb saving is 0.5 cycle in LD1$, LD2$ and LD3$\n; - includes prefetching\n; - includes XMM save & restore\n; ============================================================================\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n\n    %define Op3     RCX\n    %define Op4     RDX\n    %define Op1     R8\n    %define Op2     R9\n    %define Size    RBX             ; SAVE!\n\n    %define Limb0   RBP             ; SAVE!\n    %define Limb1   RSI             ; SAVE!\n    %define Limb2   RDI             ; SAVE!\n    %define Limb3   R10\n    %define Limb4   R11\n    %define Limb5   R12             ; SAVE!\n    %define Limb6   R13             ; SAVE!\n    %define Limb7   R14             ; SAVE!\n    %define Limb8   R15             ; SAVE!\n\n  %ifdef USE_PREFETCH\n    %define Offs    PREFETCH_STRIDE ; no more regs avail. => fallback to const\n  %endif\n\n    %define SaveRBX XMM0            ; use scratch XMM for fast save & restore\n    %define SaveRBP XMM1            ; R14 and R15 handled via stack\n    %define SaveRSI XMM2\n    %define SaveRDI XMM3\n    %define SaveR12 XMM4\n    %define SaveR13 XMM5\n\n%else\n\n    %define Op3     RDI\n    %define Op4     RSI\n    %define Op1     RDX\n    %define Op2     RCX\n    %define Size    R8\n\n    %define Limb0   RBP             ; SAVE!\n    %define Limb1   RBX             ; SAVE!\n    %define Limb2   R9\n    %define Limb3   R10\n    %define Limb4   R11\n    %define Limb5   R12             ; SAVE!\n    %define Limb6   R13             ; SAVE!\n    %define Limb7   R14             ; SAVE!\n    %define Limb8   R15             ; SAVE!\n\n  %ifdef USE_PREFETCH\n    %define Offs    PREFETCH_STRIDE ; no more regs avail. => fallback to const\n  %endif\n\n    %define SaveRBX XMM0            ; use scratch XMM for fast save & restore\n    %define SaveRBP XMM1\n    %define SaveR12 XMM2\n    %define SaveR13 XMM3\n    %define SaveR14 XMM4\n    %define SaveR15 XMM5\n\n%endif\n\n%define SaveAC  setc    AL\n%define LoadAC  shr     AL, 1\n\n%define SaveSC  sbb     AH, AH\n%define LoadSC  add     AH, AH\n\nBITS 64\n\nalign   32\n\n   GLOBAL_FUNC mpn_sumdiff_n\n\n  %ifdef USE_WIN64\n    sub     RSP, 16\n    mov     [RSP+8], R15\n    mov     [RSP], R14\n\n    movq    SaveR13, R13\n    movq    SaveR12, R12\n    movq    SaveRDI, RDI\n    movq    SaveRSI, RSI\n    movq    SaveRBP, RBP\n    movq    SaveRBX, RBX\n  %else\n    movq    SaveR15, R15\n    movq    SaveR14, R14\n    movq    SaveR13, R13\n    movq    SaveR12, R12\n    movq    SaveRBP, RBP\n    movq    SaveRBX, RBX\n  %endif\n\n    xor     EAX, EAX            ; clear add & sub carry\n\n    shr     Size, 1\n    jnc     .sumdiff_n_two\n\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    mov     Limb2, Limb1\n    add     Limb2, Limb5\n    mov     [Op3], Limb2\n\n    SaveAC\n\n    sub     Limb1, Limb5\n    mov     [Op4], Limb1\n\n    SaveSC\n\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    add     Op4, 8\n\n  .sumdiff_n_two:\n\n    shr     Size, 1\n    jnc     .sumdiff_n_four\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    mov     Limb5, [Op2]\n    mov     Limb6, [Op2+8]\n\n    LoadAC\n\n    mov     Limb3, Limb1\n    adc     Limb3, Limb5\n    mov     [Op3], Limb3\n    mov     Limb4, Limb2\n    adc     Limb4, Limb6\n    mov     [Op3+8], Limb4\n\n    SaveAC\n    LoadSC\n\n    sbb     Limb1, Limb5\n    mov     [Op4], Limb1\n    sbb     Limb2, Limb6\n    mov     [Op4+8], Limb2\n\n    SaveSC\n\n    add     Op1, 16\n    add     Op2, 16\n    add     Op3, 16\n    add     Op4, 16\n\n  .sumdiff_n_four:\n\n    shr     Size, 1\n    jnc     .sumdiff_n_loop_pre ;ajs:notshortform\n\n    LoadAC\n\n    ; slight change of scheme here - avoid too many\n    ; memory to reg or reg to memory moves in a row\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    mov     Limb0, Limb1\n    adc     Limb0, Limb5\n    mov     [Op3], Limb0\n    mov     Limb2, [Op1+8]\n    mov     Limb6, [Op2+8]\n    mov     Limb0, Limb2\n    adc     Limb0, Limb6\n    mov     [Op3+8], Limb0\n    mov     Limb3, [Op1+16]\n    mov     Limb7, [Op2+16]\n    mov     Limb0, Limb3\n    adc     Limb0, Limb7\n    mov     [Op3+16], Limb0\n    mov     Limb4, [Op1+24]\n    mov     Limb8, [Op2+24]\n    mov     Limb0, Limb4\n    adc     Limb0, Limb8\n    mov     [Op3+24], Limb0\n\n    SaveAC\n    LoadSC\n\n    sbb     Limb1, Limb5\n    mov     [Op4], Limb1\n    sbb     Limb2, Limb6\n    mov     [Op4+8], Limb2\n    sbb     Limb3, Limb7\n    mov     [Op4+16], Limb3\n    sbb     Limb4, Limb8\n    mov     [Op4+24], Limb4\n\n    SaveSC\n\n    add     Op1, 32\n    add     Op2, 32\n    add     Op3, 32\n    add     Op4, 32\n \n    test   Size, Size\n  .sumdiff_n_loop_pre:\t\t; If we jump here, ZF=1 iff Size=0\n    jz     .sumdiff_n_post      ;ajs:notshortform\n    LoadAC              ; set carry for addition\n\n    ; main loop - values below are best case - up to 50% fluctuation possible!\n    ; - 3.50      cycles per limb in LD1$\n    ; - 3.50      cycles per limb in LD2$\n    ; - 5.10-5.50 cycles per limb in LD3$\n    align   16\n  .sumdiff_n_loop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n    prefetchnta [Op2+Offs]\n  %endif\n\n    mov     Limb1, [Op1]        ; add the first quad-limb\n    mov     Limb5, [Op2]\n    mov     Limb0, Limb1\n    adc     Limb0, Limb5\n    mov     [Op3], Limb0\n    mov     Limb2, [Op1+8]\n    mov     Limb6, [Op2+8]\n    mov     Limb0, Limb2\n    adc     Limb0, Limb6\n    mov     [Op3+8], Limb0\n    mov     Limb3, [Op1+16]\n    mov     Limb7, [Op2+16]\n    mov     Limb0, Limb3\n    adc     Limb0, Limb7\n    mov     [Op3+16], Limb0\n    mov     Limb4, [Op1+24]\n    mov     Limb8, [Op2+24]\n    mov     Limb0, Limb4\n    adc     Limb0, Limb8\n    mov     [Op3+24], Limb0\n    lea     Op3, [Op3 + 64]\n\n    SaveAC              ; memorize add-carry\n    LoadSC              ; set carry for subtraction\n\n    sbb     Limb1, Limb5        ; now sub the first quad-limb\n    mov     [Op4], Limb1\n    sbb     Limb2, Limb6\n    mov     [Op4+8], Limb2\n    sbb     Limb3, Limb7\n    mov     [Op4+16], Limb3\n    sbb     Limb4, Limb8\n    mov     [Op4+24], Limb4\n\n    mov     Limb1, [Op1+32]     ; sub the second quad-limb\n    mov     Limb5, [Op2+32]\n    mov     Limb0, Limb1\n    sbb     Limb0, Limb5\n    mov     [Op4+32], Limb0\n    mov     Limb2, [Op1+40]\n    mov     Limb6, [Op2+40]\n    mov     Limb0, Limb2\n    sbb     Limb0, Limb6\n    mov     [Op4+40], Limb0\n    mov     Limb3, [Op1+48]\n    mov     Limb7, [Op2+48]\n    mov     Limb0, Limb3\n    sbb     Limb0, Limb7\n    mov     [Op4+48], Limb0\n    mov     Limb4, [Op1+56]\n    mov     Limb8, [Op2+56]\n    mov     Limb0, Limb4\n    sbb     Limb0, Limb8\n    mov     [Op4+56], Limb0\n    lea     Op4, [Op4 + 64]\n\n    SaveSC                      ; memorize sub-carry\n    LoadAC                      ; set carry for addition\n\n    adc     Limb1, Limb5        ; add the second quad-limb\n    mov     [Op3+32-64], Limb1\n    adc     Limb2, Limb6\n    mov     [Op3+40-64], Limb2\n    adc     Limb3, Limb7\n    mov     [Op3+48-64], Limb3\n    adc     Limb4, Limb8\n    mov     [Op3+56-64], Limb4\n\n    lea     Op1, [Op1 + 64]\n    lea     Op2, [Op2 + 64]\n\n    dec     Size\n    jnz     .sumdiff_n_loop     ;ajs:notshortform\n\n    SaveAC                      ; memorize add-carry\n    ; hand back carries\n  .sumdiff_n_post:\n\t\t\t\t; AL = add_carry, AH = -sub_carry\n    LoadSC\t\t\t; AL = add_carry, CY = sub_carry\n    adc     AL, AL\t\t; AL = 2*add_carry + sub_carry\n    movsx   RAX, AL\n\n  .Exit:\n\n  %ifdef USE_WIN64\n    movq    RBX, SaveRBX\n    movq    RBP, SaveRBP\n    movq    RSI, SaveRSI\n    movq    RDI, SaveRDI\n    movq    R12, SaveR12\n    movq    R13, SaveR13\n\n    mov     R14, [RSP]\n    mov     R15, [RSP+8]\n    add     RSP, 16\n  %else\n    movq    R15, SaveR15\n    movq    R14, SaveR14\n    movq    R13, SaveR13\n    movq    R12, SaveR12\n    movq    RBP, SaveRBP\n    movq    RBX, SaveRBX\n  %endif\n\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64/k8/add_err1_n.asm",
    "content": "dnl  mpn_add_err1_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC ret mpn_add_err1(mp_ptr rp,mp_ptr up,mp_ptr vp,mp_ptr ep,mp_ptr_t yp,mp_size_t n,mp_limb_t cy)\nC rax                    rdi,      rsi,      rdx,      rcx,         r8           r9       8(rsp)=>r10\n\nASM_START()\nPROLOGUE(mpn_add_err1_n)\n\tC // if we rearrange the params we could save some moves\n\tC //(rdi,r9)=(rsi,r9)+(rdx,r9)  sum=carry*(r8)\n\t\n\tmov 8(%rsp),%r10            C cy\n\tmov %rbp,-16(%rsp)          C save rbp\n\tlea -24(%rdi,%r9,8),%rdi    C rp += n - 3\n\tmov %r12,-24(%rsp)          C save r12\n\tmov %r13,-32(%rsp)          C save r13\n\tlea -24(%rsi,%r9,8),%rsi    C up += n - 3\n\tmov %r14,-40(%rsp)          C save r14\n\tmov %r15,-48(%rsp)          C save r15\n\tlea -24(%rdx,%r9,8),%rdx    C vp += n - 3\n\tmov %rcx,-56(%rsp)\t       C save rcx\n\tmov %rbx,-8(%rsp)           C save rbx\n\tmov $3,%r11                 C i = 3\n\tshl $63,%r10\n\tlea (%r8,%r9,8),%r8         C yp += n\n\tsub %r9,%r11\t              C i = 3 - n\n\tmov $0,%r9                  C t1 = 0\n\tmov $0,%rax                 C t2 = 0\n\tmov $0,%rbx                 C t3 = 0\n\tjnc skiplp                  C if done goto skiplp\nALIGN(16)\nlp:\n\tmov (%rsi,%r11,8),%r12      C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13     C s2 = *(up + i + 1)\n\tmov 16(%rsi,%r11,8),%r14    C s3 = *(up + i + 2)\n\tmov 24(%rsi,%r11,8),%r15    C s4 = *(up + i + 3)\n\tmov $0,%rbp                 C t5 = 0\n\tshl $1,%r10                 C s1 += *(vp + i + 0) + (cy & 1)\n\tadc (%rdx,%r11,8),%r12\n\tcmovc -8(%r8),%rax          C if carry1, t2 = *(yp - 1)\n\tadc 8(%rdx,%r11,8),%r13     C s2 += *(vp + i + 1) + carry1\n\tcmovc -16(%r8),%rbx         C if carry2 t3 = *(yp - 2)\n\tmov $0,%rcx                 C t4 = 0\n\tadc 16(%rdx,%r11,8),%r14    C s3 += *(vp + i + 2) + carry2\n\tcmovc -24(%r8),%rcx         C if carry3 t4 = *(yp - 3)\n\tadc 24(%rdx,%r11,8),%r15    C s4 += *(vp + i + 3) + carry3\n\tcmovc -32(%r8),%rbp         C if carry4 t5 = *(yp - 4)\n\trcr $1,%r10                 C high bit of cy = carry4\n\tadd %rax,%r9                C t1 += t2\n\tadc $0,%r10                 C accumulate cy\n\tadd %rbx,%r9                C t1 += t2\n\tadc $0,%r10                 C accumulate cy\n\tadd %rcx,%r9                C t1 += t4\n\tmov $0,%rax                 C t2 = 0\n\tadc $0,%r10                 C accumulate cy\n\tlea -32(%r8),%r8            C yp -= 4\n\tadd %rbp,%r9                C t1 += t5\n\tadc $0,%r10                 C accumulate cy\n\tmov %r12,(%rdi,%r11,8)      C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)     C *(rp + i + 1) = s2\n\tmov %r14,16(%rdi,%r11,8)    C *(rp + i + 2) = s3\n\tmov %r15,24(%rdi,%r11,8)    C *(rp + i + 3) = s4\n\tmov $0,%rbx                 C t3 = 0\n\tadd $4,%r11                 C i += 4\n\tjnc  lp                     C not done, goto lp\nskiplp:\n\tcmp $2,%r11                 C cmp(i, 2)\n\tmov -16(%rsp),%rbp          C restore rbp\n\tmov -48(%rsp),%r15          C restore r15\n\tja case0                    C i == 3 goto case0\n\tje case1                    C i == 2 goto case1\n\tjp case2                    C i == 1 goto case2\ncase3:\n\tmov (%rsi,%r11,8),%r12         C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13        C s2 = *(up + i + 1)\n\tmov 16(%rsi,%r11,8),%r14       C s3 = *(up + i + 2)\n\tshl $1,%r10                    C restore carry1 from high bit of t1\n\tadc (%rdx,%r11,8),%r12         C s1 += *(vp + i + 0) + carry1\n\tcmovc -8(%r8),%rax             C if carry2 t2 = *(yp - 1)\n\tadc 8(%rdx,%r11,8),%r13        C s2 += *(vp + i + 1) + carry2\n\tcmovc -16(%r8),%rbx            C if carry3 t3 = *(yp - 2)\n\tmov $0,%rcx                    C t4 = 0\n\tadc 16(%rdx,%r11,8),%r14       C s3 += *(vp + i + 3) + carry3\n\tcmovc -24(%r8),%rcx            C if carry4 t4 = *(yp - 3)\n\trcr $1,%r10                    C store carry4 in high bit of cy\n\tadd %rax,%r9                   C t1 += t2\n\tadc $0,%r10                    C accumulate cy\n\tadd %rbx,%r9                   C t1 += t3\n\tadc $0,%r10                    C accumulate cy\n\tadd %rcx,%r9                   C t1 += t4\n\tadc $0,%r10                    C accumulate cy\n\tmov %r12,(%rdi,%r11,8)         C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)        C *(rp + i + 1) = s2\n\tmov %r14,16(%rdi,%r11,8)       C *(rp + i + 2) = s3\n\tmov -56(%rsp),%rcx             C restore rcx\n\tmov %r9,(%rcx)                 C ep[0] = t1\n\tbtr $63,%r10                   C retrieve carry out and reset bit of cy\n\tmov %r10,8(%rcx)               C ep[1] = cy\n\tmov -40(%rsp),%r14             C restore r14\n\tmov $0,%rax\n\tmov -32(%rsp),%r13             C restore r13\n\tadc $0,%rax                    C return carry out\n\tmov -24(%rsp),%r12             C restore r12\n\tmov -8(%rsp),%rbx              C restore rbx\n\tret\nALIGN(16)\ncase2:\n\tmov (%rsi,%r11,8),%r12     C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13    C s2 = *(up + i + 1)\n\tshl $1,%r10                C restore carry1 from high bit of t1\n\tadc (%rdx,%r11,8),%r12     C s1 += *(vp + i + 0) + carry1\n\tcmovc -8(%r8),%rax         C if carry2 t2 = *(yp - 1)\n\tadc 8(%rdx,%r11,8),%r13    C s2 += *(vp + i + 1) + carry2\n\tcmovc -16(%r8),%rbx        C if carry3 t3 = *(yp - 2)\n\trcr $1,%r10                C store carry3 in high bit of cy\n\tadd %rax,%r9               C t1 += t2\n\tadc $0,%r10                C accumulate cy\n\tadd %rbx,%r9               C t1 += t3\n\tadc $0,%r10                C accumulate cy\n\tmov %r12,(%rdi,%r11,8)     C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)    C *(rp + i + 1) = s2\n\tmov -56(%rsp),%rcx         C restore rcx\n\tmov %r9,(%rcx)             C ep[0] = t1\n\tbtr $63,%r10               C retrieve carry out and reset bit of cy\n\tmov %r10,8(%rcx)           C ep[1] = cy\n\tmov -40(%rsp),%r14         C restore r14\n\tmov $0,%rax\n\tmov -32(%rsp),%r13         C restore r13\n\tadc $0,%rax                C return carry out\n\tmov -24(%rsp),%r12         C restore r12\n\tmov -8(%rsp),%rbx          C restore rbx\n\tret\nALIGN(16)\ncase1:\n\tmov (%rsi,%r11,8),%r12     C s1 = *(up + i + 0)\n\tshl $1,%r10                C restore carry1 from high bit of t1\n\tadc (%rdx,%r11,8),%r12     C s1 += *(vp + i + 0) + carry1\n\tcmovc -8(%r8),%rax         C if carry2 t2 = *(yp - 1)\n\trcr $1,%r10                C store carry3 in high bit of cy\n\tadd %rax,%r9               C t1 += t2\n\tadc $0,%r10                C accumulate cy\n\tmov %r12,(%rdi,%r11,8)     C *(rp + i + 0) = s1\ncase0:\tmov -56(%rsp),%rcx         C restore rcx\n\tmov %r9,(%rcx)             C ep[0] = t1\n\tbtr $63,%r10               C retrieve carry out and reset bit of cy\n\tmov %r10,8(%rcx)           C ep[1] = cy\n\tmov -40(%rsp),%r14         C restore r14\n\tmov $0,%rax                \n\tmov -32(%rsp),%r13         C restore r13\n\tadc $0,%rax                C return carry out\n\tmov -24(%rsp),%r12         C restore r12\n\tmov -8(%rsp),%rbx          C restore rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/add_n.as",
    "content": "\n;  AMD64 mpn_add_n\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)+(rdx,rcx)\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_add_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     loop1\n\tmov     r11, [rsi]\n\tadd     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend1:\n\tadc     rax, rax\n\tret\n\talign   8\nloop1\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tadc     r11, [rdx]\n\tadc     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tadc     r9, [rdx-16]\n\tadc     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     loop1\n\tinc     rax\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi]\n\tadc     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend:\n\tadc     rax, rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/addadd_n.as",
    "content": "\n;  AMD64 mpn_addadd_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,r8)=(rsi,r8)+(rdx,r8)+(rcx,r8) return carry+carry\n\n\tGLOBAL_FUNC mpn_addadd_n\n\tlea     rdx, [rdx+r8*8-56]\n\tlea     rcx, [rcx+r8*8-56]\n\tlea     rsi, [rsi+r8*8-56]\n\tlea     rdi, [rdi+r8*8-56]\n\tmov     r9, 3\n\txor     rax, rax\n\txor     r10, r10\n\tsub     r9, r8\n\tpush    r12\n\tpush    rbp\n\tjge     L_skip\n\tadd     r9, 4\n\tmov     rbp, [rdx+r9*8+16]\n\tmov     r11, [rdx+r9*8+24]\n\tmov     r12, [rdx+r9*8]\n\tmov     r8, [rdx+r9*8+8]\n\tjc      L_skiplp\n\talign   16\nL_lp:\n\tadd     rax, 1\n\tadc     r12, [rcx+r9*8]\n\tadc     r8, [rcx+r9*8+8]\n\tadc     rbp, [rcx+r9*8+16]\n\tadc     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8]\n\tadc     r8, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r12\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], r8\n\tmov     [rdi+r9*8+16], rbp\n\tmov     rbp, [rdx+r9*8+48]\n\tmov     r11, [rdx+r9*8+56]\n\tadd     r9, 4\n\tmov     r12, [rdx+r9*8]\n\tmov     r8, [rdx+r9*8+8]\n\tjnc     L_lp\nL_skiplp:\n\tadd     rax, 1\n\tadc     r12, [rcx+r9*8]\n\tadc     r8, [rcx+r9*8+8]\n\tadc     rbp, [rcx+r9*8+16]\n\tadc     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8]\n\tadc     r8, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r12\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], r8\n\tmov     [rdi+r9*8+16], rbp\nL_skip:\n\tcmp     r9, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     rbp, [rdx+r9*8+48]\n\tmov     r12, [rdx+r9*8+32]\n\tmov     r8, [rdx+r9*8+40]\n\tadd     rax, 1\n\tadc     r12, [rcx+r9*8+32]\n\tadc     r8, [rcx+r9*8+40]\n\tadc     rbp, [rcx+r9*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8+32]\n\tadc     r8, [rsi+r9*8+40]\n\tadc     rbp, [rsi+r9*8+48]\n\tmov     [rdi+r9*8+32], r12\n\tmov     [rdi+r9*8+40], r8\n\tmov     [rdi+r9*8+48], rbp\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tmov     r12, [rdx+r9*8+32]\n\tmov     r8, [rdx+r9*8+40]\n\tadd     rax, 1\n\tadc     r12, [rcx+r9*8+32]\n\tadc     r8, [rcx+r9*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8+32]\n\tadc     r8, [rsi+r9*8+40]\n\tmov     [rdi+r9*8+32], r12\n\tmov     [rdi+r9*8+40], r8\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tmov     r12, [rdx+r9*8+32]\n\tadd     rax, 1\n\tadc     r12, [rcx+r9*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8+32]\n\tmov     [rdi+r9*8+32], r12\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rax, r10\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/k8/addlsh1_n.as",
    "content": "\n;  AMD64 mpn_addlsh1_n \n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi, rcx) = (rsi, rcx) + (rdx, rcx)<<1\n;\trax = carry\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_addlsh1_n\n\tlea     rsi, [rsi+rcx*8]\n\tlea     rdx, [rdx+rcx*8]\n\tlea     rdi, [rdi+rcx*8]\n\tneg     rcx\n\txor     r9, r9\n\txor     rax, rax\n\ttest    rcx, 3\n\tjz      next\nlp1:\n\tmov     r10, [rdx+rcx*8]\n\tadd     r9, 1\n\tadc     r10, r10\n\tsbb     r9, r9\n\tadd     rax, 1\n\tadc     r10, [rsi+rcx*8]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tinc     rcx\n\ttest    rcx, 3\n\tjnz     lp1\nnext:\n\tcmp     rcx, 0\n\tjz      end\n\tpush    rbx\n\talign   16\nlp:\n\tmov     r10, [rdx+rcx*8]\n\tmov     rbx, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8+16]\n\tmov     r8, [rdx+rcx*8+24]\n\tadd     r9, 1\n\tadc     r10, r10\n\tadc     rbx, rbx\n\tadc     r11, r11\n\tadc     r8, r8\n\tsbb     r9, r9\n\tadd     rax, 1\n\tadc     r10, [rsi+rcx*8]\n\tadc     rbx, [rsi+rcx*8+8]\n\tadc     r11, [rsi+rcx*8+16]\n\tadc     r8, [rsi+rcx*8+24]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tmov     [rdi+rcx*8+8], rbx\n\tmov     [rdi+rcx*8+16], r11\n\tmov     [rdi+rcx*8+24], r8\n\tadd     rcx, 4\n\tjnz     lp\n\tpop     rbx\nend:\n\tadd     rax, r9\n\tneg     rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/addlsh_n.asm",
    "content": "dnl  AMD64 mpn_addlsh_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC    carry+(xp,n)= (yp,n) + (zp,n)<<c with carry in ci\nC mp_limb_t\tmpn_addlsh_nc(mp_ptr xp, mp_srcptr yp,mp_srcptr zp,mp_size_t n,unsigned int c,mp_limb_t ci)\nC xp in rdi\typ in rsi\tzp in rdx\tn  in rcx\tc  in r8\tci in r9\n\nMULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc)\n\nASM_START()\nPROLOGUE(mpn_addlsh_n)\nxor %r9,%r9\nEPILOGUE()\nPROLOGUE(mpn_addlsh_nc)\nmov\t%rcx,%r10\nlea\t(%rdi,%r10,8),%rdi\nlea\t(%rsi,%r10,8),%rsi\nlea\t(%rdx,%r10,8),%rdx\nmov\t%r8,%rcx\nneg\t%rcx\nshr\t%cl,%r9\nneg\t%r10\nxor\t%rax,%rax\ntest\t$3,%r10\njz\tnext\nlp:\n\tmov\t(%rdx,%r10,8),%r8\n\tmov\t%r8,%r11\n\tneg\t%rcx\n\tshl\t%cl,%r8\n\tneg\t%rcx\n\tshr\t%cl,%r11\n\tor\t%r9,%r8\n\tmov\t%r11,%r9\n\tadd\t$1,%rax\n\tadc\t(%rsi,%r10,8),%r8\n\tsbb\t%rax,%rax\n\tmov\t%r8,(%rdi,%r10,8)\n\tinc\t%r10\n\ttest\t$3,%r10\n\tjnz\tlp\nnext:\ncmp\t$0,%r10\njz\tend\npush\t%rbx\npush\t%rbp\npush\t%r12\npush\t%r13\npush\t%r14\npush\t%r15\nALIGN(16)\nloop:\n\tmov\t(%rdx,%r10,8),%r8\n\tmov\t8(%rdx,%r10,8),%rbp\n\tmov\t16(%rdx,%r10,8),%rbx\n\tmov\t24(%rdx,%r10,8),%r12\n\tmov\t%r8,%r11\n\tmov\t%rbp,%r13\n\tmov\t%rbx,%r14\n\tmov\t%r12,%r15\n\tneg\t%rcx\n\tshl\t%cl,%r8\n\tshl\t%cl,%rbp\n\tshl\t%cl,%rbx\n\tshl\t%cl,%r12\n\tneg\t%rcx\n\tshr\t%cl,%r11\n\tshr\t%cl,%r13\n\tshr\t%cl,%r14\n\tshr\t%cl,%r15\n\tor\t%r9,%r8\n\tor\t%r11,%rbp\n\tor\t%r13,%rbx\n\tor\t%r14,%r12\n\tmov\t%r15,%r9\n\tadd\t$1,%rax\n\tadc\t(%rsi,%r10,8),%r8\n\tadc\t8(%rsi,%r10,8),%rbp\n\tadc\t16(%rsi,%r10,8),%rbx\n\tadc\t24(%rsi,%r10,8),%r12\n\tsbb\t%rax,%rax\n\tmov\t%r8,(%rdi,%r10,8)\n\tmov\t%rbp,8(%rdi,%r10,8)\n\tmov\t%rbx,16(%rdi,%r10,8)\n\tmov\t%r12,24(%rdi,%r10,8)\n\tadd\t$4,%r10\n\tjnz\tloop\npop\t%r15\npop\t%r14\npop\t%r13\npop\t%r12\npop\t%rbp\npop\t%rbx\nend:\nneg\t%rax\nadd\t%r9,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/addmul_1.asm",
    "content": "dnl  mpn_addmul_1\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_addmul_1)\nmov (%rsi),%rax\ncmp $1,%rdx\nje one\t\t\nmov $5,%r11\nlea -40(%rsi,%rdx,8),%rsi\nlea -40(%rdi,%rdx,8),%rdi\nsub %rdx,%r11\nmul %rcx\n.byte 0x26\nmov %rax,%r8\n.byte 0x26\nmov 8(%rsi,%r11,8),%rax\n.byte 0x26\nmov %rdx,%r9\n.byte 0x26\ncmp $0,%r11\n.byte 0x26\nmov %r12,-8(%rsp)\n.byte 0x26\njge skiplp\nlp:\txor %r10,%r10\n\tmul %rcx\n\tadd %r8,(%rdi,%r11,8)\n\tadc %rax,%r9\n\tadc %rdx,%r10\n\tmov 16(%rsi,%r11,8),%rax\n\tmul %rcx\n\tadd %r9,8(%rdi,%r11,8)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 24(%rsi,%r11,8),%rax\n\txor %r8,%r8\n\txor %r9,%r9\n\tmul %rcx\n\tadd %r10,16(%rdi,%r11,8)\n\tadc %rax,%r12\n\tadc %rdx,%r8\n\tmov 32(%rsi,%r11,8),%rax\n \tmul %rcx\n\tadd %r12,24(%rdi,%r11,8)\n\tadc %rax,%r8\n\tadc %rdx,%r9\n\tadd $4,%r11\n\tmov 8(%rsi,%r11,8),%rax\n\tjnc lp\nskiplp:\nxor %r10,%r10\nmul %rcx\nadd %r8,(%rdi,%r11,8)\nadc %rax,%r9\nadc %rdx,%r10\ncmp $2,%r11\nja case0\njz case1\njp case2\ncase3:\tmov 16(%rsi),%rax\n\tmul %rcx\n\tadd %r9,8(%rdi)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 24(%rsi),%rax\n\txor %r8,%r8\n\txor %r9,%r9\n\tmul %rcx\n\tadd %r10,16(%rdi)\n\tadc %rax,%r12\n\tadc %rdx,%r8\n\tmov 32(%rsi),%rax\n \tmul %rcx\n\tadd %r12,24(%rdi)\n\tadc %rax,%r8\n\tadc %rdx,%r9\n\tadd %r8,32(%rdi)\n\tadc $0,%r9\n\tmov -8(%rsp),%r12\n\tmov %r9,%rax\n\tret\ncase2:\tmov 24(%rsi),%rax\n\tmul %rcx\n\tadd %r9,16(%rdi)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 32(%rsi),%rax\n\txor %r8,%r8\n\t#xor %r9,%r9\n\tmul %rcx\n\tadd %r10,24(%rdi)\n\tadc %rax,%r12\n\tadc %rdx,%r8\n\tadd %r12,32(%rdi)\n\tadc $0,%r8\n\tmov -8(%rsp),%r12\n\tmov %r8,%rax\n\tret\nALIGN(16)\ncase1:\tmov 32(%rsi),%rax\n\tmul %rcx\n\tadd %r9,24(%rdi)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tadd %r10,32(%rdi)\n\tadc $0,%r12\n\tmov %r12,%rax\n\tmov -8(%rsp),%r12\n\tret\none:\tmul %rcx\n\tadd %rax,(%rdi)\n\tadc $0,%rdx\n\tmov %rdx,%rax\n\tret\ncase0:\tadd %r9,32(%rdi)\n\tadc $0,%r10\n\tmov -8(%rsp),%r12\n\tmov %r10,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/addmul_2.as",
    "content": ";  k8 mpn_addmul_2\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include \"yasm_mac.inc\"\n\n\tGLOBAL_FUNC mpn_addmul_2\n\n; (rdi,rdx+1) = (rdi,rdx) + (rsi,rdx)*(rcx,2) return carrylimb\n\npush    rbx\npush    r12\nmov     r8, [rcx+8]\nmov     rcx, [rcx]\nmov     rbx, 4\nsub     rbx, rdx\nlea     rsi, [rsi+rdx*8-32]\nlea     rdi, [rdi+rdx*8-32]\nmov     r10, 0\nmov     rax, [rsi+rbx*8]\nmul     rcx\nmov     r12, rax\nmov     r9, rdx\ncmp     rbx, 0\njge     skiplp\nalign   16\nlp:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     lp\nskiplp:\nmov     rax, [rsi+rbx*8]\nmul     r8\ncmp     rbx, 2\nja      case0\njz      case1\njp      case2\ncase3:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase2:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase1:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase0:\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n\tpop     r12\n\tpop     rbx\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/k8/addsub_n.as",
    "content": "\n;  AMD64 mpn_addsub_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,r8)=(rsi,r8)+(rdx,r8)-(rcx,r8)  return carry-borrow\n\n\tGLOBAL_FUNC mpn_addsub_n\n\tlea     rdx, [rdx+r8*8-56]\n\tlea     rcx, [rcx+r8*8-56]\n\tlea     rsi, [rsi+r8*8-56]\n\tlea     rdi, [rdi+r8*8-56]\n\tmov     r9, 3\n\txor     rax, rax\n\txor     r10, r10\n\tsub     r9, r8\n\tpush    r12\n\tpush    rbp\n\tjge     L_skip\n\tadd     r9, 4\n\tmov     rbp, [rdx+r9*8+16]\n\tmov     r11, [rdx+r9*8+24]\n\tmov     r12, [rdx+r9*8]\n\tmov     r8, [rdx+r9*8+8]\n\tjc      L_skiplp\n\talign   16\nL_lp:\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8]\n\tsbb     r8, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8]\n\tadc     r8, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r12\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], r8\n\tmov     [rdi+r9*8+16], rbp\n\tmov     rbp, [rdx+r9*8+48]\n\tmov     r11, [rdx+r9*8+56]\n\tadd     r9, 4\n\tmov     r12, [rdx+r9*8]\n\tmov     r8, [rdx+r9*8+8]\n\tjnc     L_lp\nL_skiplp:\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8]\n\tsbb     r8, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8]\n\tadc     r8, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r12\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], r8\n\tmov     [rdi+r9*8+16], rbp\nL_skip:\n\tcmp     r9, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     rbp, [rdx+r9*8+48]\n\tmov     r12, [rdx+r9*8+32]\n\tmov     r8, [rdx+r9*8+40]\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8+32]\n\tsbb     r8, [rcx+r9*8+40]\n\tsbb     rbp, [rcx+r9*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8+32]\n\tadc     r8, [rsi+r9*8+40]\n\tadc     rbp, [rsi+r9*8+48]\n\tmov     [rdi+r9*8+32], r12\n\tmov     [rdi+r9*8+40], r8\n\tmov     [rdi+r9*8+48], rbp\n\tadc     rax, 0\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tmov     r12, [rdx+r9*8+32]\n\tmov     r8, [rdx+r9*8+40]\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8+32]\n\tsbb     r8, [rcx+r9*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8+32]\n\tadc     r8, [rsi+r9*8+40]\n\tmov     [rdi+r9*8+32], r12\n\tmov     [rdi+r9*8+40], r8\n\tadc     rax, 0\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tmov     r12, [rdx+r9*8+32]\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r12, [rsi+r9*8+32]\n\tmov     [rdi+r9*8+32], r12\n\tadc     rax, 0\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tsub     rax, r10\n\tpop     rbp\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/k8/and_n.asm",
    "content": "dnl  mpn_and_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_and_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t)\nC\trax              rdi,   rsi,    rdx,  rcx\n\nASM_START()\nPROLOGUE(mpn_and_n)\nsub $4,%rcx\njc skiplp\nALIGN(16)\nlp:\n\tmov (%rdx),%r8\n\tmov 8(%rdx),%r9\n\tand (%rsi),%r8\n\tand 8(%rsi),%r9\n\tlea 32(%rsi),%rsi\n\tmov 16(%rdx),%r10\n\tmov 24(%rdx),%r11\n\tlea 32(%rdi),%rdi\n\tand 16-32(%rsi),%r10\n\tand 24-32(%rsi),%r11\n\tsub $4,%rcx\n\tmov %r8,-32(%rdi)\n\tmov %r9,8-32(%rdi)\n\tmov %r10,16-32(%rdi)\n\tlea 32(%rdx),%rdx\n\tmov %r11,24-32(%rdi)\n\tjnc lp\nskiplp:\ncmp $-2,%rcx\nje case2\njp case1\njl case0\ncase3:\tmov 8(%rdx,%rcx,8),%rax\n\tand 8(%rsi,%rcx,8),%rax\n\tmov %rax,8(%rdi,%rcx,8)\ncase2:\tmov 16(%rdx,%rcx,8),%rax\n\tand 16(%rsi,%rcx,8),%rax\n\tmov %rax,16(%rdi,%rcx,8)\ncase1:\tmov 24(%rdx,%rcx,8),%rax\n\tand 24(%rsi,%rcx,8),%rax\n\tmov %rax,24(%rdi,%rcx,8)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/andn_n.as",
    "content": "\n;  AMD64 mpn_andn_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_andn_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign   16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tand     r8, [rsi+rcx*8+24]\n\tand     r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\tand     r10, [rsi+rcx*8+8]\n\tand     r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/com_n.as",
    "content": "\n;  AMD64 mpn_com_n\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_com_n\n\tsub     rdx, 4\n\tjc      next\n\talign   8\nloop1:\n\tmov     rax, [rsi+rdx*8+24]\n\tmov     rcx, [rsi+rdx*8+16]\n\tnot     rax\n\tnot     rcx\n\tmov     [rdi+rdx*8+24], rax\n\tmov     [rdi+rdx*8+16], rcx\n\tmov     rax, [rsi+rdx*8+8]\n\tmov     rcx, [rsi+rdx*8]\n\tnot     rax\n\tnot     rcx\n\tmov     [rdi+rdx*8+8], rax\n\tmov     [rdi+rdx*8], rcx\n\tsub     rdx, 4\n\tjae     loop1\nnext:\n\tadd     rdx, 4\n\tjz      end\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\n\tdec     rdx\n\tjz      end\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\n\tdec     rdx\n\tjz      end\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/copyd.as",
    "content": "\n;  mpn_copyd\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tmpn_copyd(mp_ptr rdi ,mp_ptr rsi ,mp_size_t rdx)\n;\t(rdi,rdx)=(rsi,rdx)\n\n\tGLOBAL_FUNC mpn_copyd\n\tlea     rsi, [rsi+rdx*8-8]\n\tlea     rdi, [rdi+rdx*8-8]\n\tsub     rdx, 4\n\tjl      L_skiplp\n\talign   16\nL_lp:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi-8]\n\tmov     r8, [rsi-16]\n\tlea     rdi, [rdi-32]\n\tmov     r9, [rsi-24]\n\tmov     [rdi+32], rax\n\tsub     rdx, 4\n\tmov     [rdi+24], rcx\n\tmov     [rdi+16], r8\n\tlea     rsi, [rsi-32]\n\tmov     [rdi+8], r9\n\tjns     L_lp\nL_skiplp:\n\tadd     rdx, 2\n\tjz      L_case2\n\tjns     L_case3\n\tjp      L_case1\nL_case0:\n\tret\n\talign   16\nL_case1:\n\tmov     rax, [rsi]\n\tmov     [rdi], rax\n\tret\n\talign   16\nL_case2:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi-8]\n\tmov     [rdi], rax\n\tmov     [rdi-8], rcx\n\tret\n\talign   16\nL_case3:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi-8]\n\tmov     r8, [rsi-16]\n\tmov     [rdi], rax\n\tmov     [rdi-8], rcx\n\tmov     [rdi-16], r8\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/k8/copyi.as",
    "content": "\n;  mpn_copyi\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tmpn_copyi(mp_ptr rdi ,mp_ptr rsi ,mp_size_t rdx)\n;\t(rdi,rdx)=(rdi,rdx)\n\n\tGLOBAL_FUNC mpn_copyi\n\tsub     rdx, 4\n\tjl      L_skiplp\n\talign   16\nL_lp:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi+8]\n\tsub     rdx, 4\n\tmov     r8, [rsi+16]\n\tmov     r9, [rsi+24]\n\tlea     rdi, [rdi+32]\n\tmov     [rdi-32], rax\n\tmov     [rdi-24], rcx\n\tmov     [rdi-16], r8\n\tlea     rsi, [rsi+32]\n\tmov     [rdi-8], r9\n\tjns     L_lp\nL_skiplp:\n\tadd     rdx, 2\n\tjz      L_case2\n\tjns     L_case3\n\tjp      L_case1\nL_case0:\n\tret\n\talign   16\nL_case1:\n\tmov     rax, [rsi]\n\tmov     [rdi], rax\n\tret\n\talign   16\nL_case2:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi+8]\n\tmov     [rdi], rax\n\tmov     [rdi+8], rcx\n\tret\n\talign   16\nL_case3:\n\tmov     rax, [rsi]\n\tmov     rcx, [rsi+8]\n\tmov     r8, [rsi+16]\n\tmov     [rdi], rax\n\tmov     [rdi+8], rcx\n\tmov     [rdi+16], r8\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/k8/divexact_byff.as",
    "content": "\n;  mpn_divexact_byff\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tret mpn_divexact_byff(mp_ptr,mp_ptr,mp_size_t)\n\n\tGLOBAL_FUNC mpn_divexact_byff\n\txor     eax, eax\n\tmov     rcx, rdx\n\tand     rdx, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n\tje      L_skiplp\n; want carry clear here\n\talign   16\nL_lp:\n\tsbb     rax, [rsi]\n\tlea     rdi, [rdi+32]\n\tmov     r8, rax\n\tsbb     rax, [rsi+8]\n\tmov     r9, rax\n\tsbb     rax, [rsi+16]\n\tmov     r10, rax\n\tsbb     rax, [rsi+24]\n\tdec     rcx\n\tmov     [rdi-32], r8\n\tmov     [rdi-24], r9\n\tmov     [rdi-16], r10\n\tmov     [rdi-8], rax\n\tlea     rsi, [rsi+32]\n\tjnz     L_lp\nL_skiplp:\n; dont want to change the carry\n\tinc     rdx\n\tdec     rdx\n\tjz      L_end\n\tsbb     rax, [rsi]\n\tmov     [rdi], rax\n\tdec     rdx\n\tjz      L_end\n\tsbb     rax, [rsi+8]\n\tmov     [rdi+8], rax\n\tdec     rdx\n\tjz      L_end\n\tsbb     rax, [rsi+16]\n\tmov     [rdi+16], rax\nL_end:\n\tsbb     rax, 0\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/k8/divrem_hensel_qr_1_1.asm",
    "content": "dnl  X86_64 mpn_divrem_hensel_qr_1_1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=(rsi,rdx) / rcx\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\nC\tThis is same as the shifting version but with  no shifting\n\nASM_START()\nPROLOGUE(mpn_divrem_hensel_qr_1_1)\nmov $0,%r9\nsub %rdx,%r9\nlea (%rdi,%rdx,8),%rdi\nlea (%rsi,%rdx,8),%rsi\n\nmov %rcx,%rdx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\n\nC //clear carry\nxor %rdx,%rdx\nALIGN(16)\nloop:\n    mov (%rsi,%r9,8),%rax\n    sbb %rdx,%rax\n    sbb %r8,%r8\n    imul %r11,%rax\n    mov %rax,(%rdi,%r9,8)\n    mul %rcx\n    add $1,%r8\n    inc %r9\n    jnz loop\nmov $0,%rax\nadc %rdx,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/divrem_hensel_qr_1_2.asm",
    "content": "dnl  X86_64 mpn_divrem_hensel_qr_1_2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=(rsi,rdx) / rcx      rdx>=1\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\nC\tThis is same as the shifting version but with  no shifting\n\nASM_START()\nPROLOGUE(mpn_divrem_hensel_qr_1_2)\nmov $1,%r9\nsub %rdx,%r9\nlea -8(%rdi,%rdx,8),%rdi\nlea -8(%rsi,%rdx,8),%rsi\n\npush %r12\npush %r13\npush %r14\n\nmov %rcx,%rdx\t\nC // rdx is 3 bit inverse\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 4 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC //rdx has 8 bits\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 16 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC // rdx has 32 bits\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\t\nC //r11 has 64 bits\n\nmov %r11,%rax\nmov %r11,%r12\nmul %rcx\nneg %rdx\nimul %rdx,%r12\t\nC // r12,r11 has 128 bits\n\nmov %r11,%r13\nmov %r12,%r14\n\nmov (%rsi,%r9,8),%r11\nmov 8(%rsi,%r9,8),%r12\nmov $0,%r10\nadd $2,%r9\njc skiplp\nALIGN(16)\nlp:\n\tmov %r12,%r8\n\tmov %r13,%rax\n\tmul %r11\n\tmov %rax,-16(%rdi,%r9,8)\n\timul %r14,%r11\n\timul %r13,%r12\n\tadd %r11,%rdx\n\tadd %r12,%rdx\n\t\tmov (%rsi,%r9,8),%r11\n\t\tmov 8(%rsi,%r9,8),%r12\n\tmov %rdx,-8(%rdi,%r9,8)\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb $0,%r12\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r12\n\t\tsbb $0,%r10\n\tadd $2,%r9\n\tjnc lp\nskiplp:\nmov %r12,%r8\nmov %r13,%rax\t\nmul %r11\nmov %rax,-16(%rdi,%r9,8)\nimul %r14,%r11\t\nimul %r13,%r12\nadd %r11,%rdx\nadd %r12,%rdx\ncmp $0,%r9\njne case0\ncase1:\n\t\tmov (%rsi,%r9,8),%r11\n\tmov %rdx,-8(%rdi,%r9,8)\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r10\n\tmov %r11,%rax\n\timul %r13,%rax\n\tmov %rax,(%rdi,%r9,8)\n\tmul %rcx\n\tadd %r10,%r10\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\ncase0:\n\tmov %rdx,-8(%rdi,%r9,8)\t\n\tmov %rcx,%rax\t\n\tmul %rdx\t\n\tcmp %rax,%r8\t\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tsub %r10,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/divrem_hensel_r_1.asm",
    "content": "dnl  X86_64 mpn_divrem_hensel_r_1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t hensel divide (rdi,rsi) / rdx\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\nC\tThis is same as the shifting version but with  no shifting\n\nASM_START()\nPROLOGUE(mpn_divrem_hensel_r_1)\nmov $0,%r9\nsub %rsi,%r9\nlea (%rdi,%rsi,8),%rdi\n\nmov %rdx,%rcx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\n\nC //clear carry\nxor %rdx,%rdx\nALIGN(16)\nloop:\n    mov (%rdi,%r9,8),%rax\n    sbb %rdx,%rax\n    sbb %r8,%r8\n    imul %r11,%rax\n    mul %rcx\n    add $1,%r8\n    inc %r9\n    jnz loop\nmov $0,%rax\nadc %rdx,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/double.asm",
    "content": "dnl  mpn_double\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_double)\nmov %rsi,%rax\nshr $2,%rsi\nand $3,%eax\njz t1\nshlq $1,(%rdi)\nlea 8(%rdi),%rdi\ndec %rax\njz t1\nrclq $1,(%rdi)\nlea 8(%rdi),%rdi\ndec %rax\njz t1\nrclq $1,(%rdi)\nlea 8(%rdi),%rdi\ndec %rax\nt1:\nsbb %rdx,%rdx\ncmp $0,%rsi\njz skiplp\nadd %rdx,%rdx\n.align 16\nlp:\n  rclq $1,(%rdi)\n  nop\n  rclq $1,8(%rdi)\n  rclq $1,16(%rdi)\n  rclq $1,24(%rdi)\n  nop\n  dec %rsi\n  lea 32(%rdi),%rdi\n  jnz lp\nsbb %rdx,%rdx\nskiplp:\nsub %rdx,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/half.asm",
    "content": "dnl  mpn_half\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_half)\nmov %rsi,%rax\nlea -8(%rdi,%rsi,8),%rdi\nshr $2,%rsi\nand $3,%eax\njz t1\nshrq $1,(%rdi)\nlea -8(%rdi),%rdi\ndec %rax\njz t1\nrcrq $1,(%rdi)\nlea -8(%rdi),%rdi\ndec %rax\njz t1\nrcrq $1,(%rdi)\nlea -8(%rdi),%rdi\ndec %rax\nt1:\nsbb %rdx,%rdx\ncmp $0,%rsi\njz skiplp\nadd %rdx,%rdx\n.align 16\nlp:\n  rcrq $1,(%rdi)\n  nop\n  rcrq $1,-8(%rdi)\n  rcrq $1,-16(%rdi)\n  rcrq $1,-24(%rdi)\n  nop\n  dec %rsi\n  lea -32(%rdi),%rdi\n  jnz lp\nsbb %rdx,%rdx\nskiplp:\nsub %rdx,%rax\nshl $63,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/hamdist.asm",
    "content": "dnl  mpn_hamdist\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_hamdist(mp_ptr,mp_ptr,mp_size_t)\nC\trax               rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_hamdist)\npush %r12\npush %r14\npush %rbp\nmov $0x5555555555555555,%r8\nmov $0x3333333333333333,%r9\nmov $0x0f0f0f0f0f0f0f0f,%r10\nmov $0x0101010101010101,%r11\nxor %eax,%eax\nsub $3,%rdx\njc skip\n\tmov 16(%rdi,%rdx,8),%rcx\n\txor 16(%rsi,%rdx,8),%rcx\n\tmov 8(%rdi,%rdx,8),%r12\n\txor 8(%rsi,%rdx,8),%r12\n\tmov (%rdi,%rdx,8),%r14\n\txor (%rsi,%rdx,8),%r14\nsub $3,%rdx\njc skiplp\nALIGN(16)\nlp:\tmov %rcx,%rbp\n\tshr $1,%rcx\n\tand %r8,%rcx\n\tsub %rcx,%rbp\n\tmov %rbp,%rcx\n\tshr $2,%rbp\n\tand %r9,%rcx\n\tand %r9,%rbp\n\tadd %rbp,%rcx\n\n\tmov %r12,%rbp\n\tshr $1,%r12\n\tand %r8,%r12\n\tsub %r12,%rbp\n\tmov %rbp,%r12\n\tshr $2,%rbp\n\tand %r9,%r12\n\tand %r9,%rbp\n\tadd %r12,%rbp\n\n\tmov %r14,%r12\n\tshr $1,%r14\n\tand %r8,%r14\n\tsub %r14,%r12\n\tmov %r12,%r14\n\tshr $2,%r12\n\tand %r9,%r14\n\tand %r9,%r12\n\tadd %r14,%r12\n\n\tadd %rcx,%rbp\n\tadd %r12,%rbp\t\n\t\tmov 16(%rdi,%rdx,8),%rcx\n\tmov %rbp,%r14\n\tshr $4,%rbp\n\tand %r10,%r14\n\t\txor 16(%rsi,%rdx,8),%rcx\n\t\tmov 8(%rdi,%rdx,8),%r12\n\t\txor 8(%rsi,%rdx,8),%r12\n\tand %r10,%rbp\n\tadd %rbp,%r14\n\timul %r11,%r14\n\tshr $56,%r14\n\tadd %r14,%rax\n\t\tmov (%rdi,%rdx,8),%r14\n\t\txor (%rsi,%rdx,8),%r14\n\tsub $3,%rdx\n\tjnc lp\nskiplp:\n\tmov %rcx,%rbp\n\tshr $1,%rcx\n\tand %r8,%rcx\n\tsub %rcx,%rbp\n\tmov %rbp,%rcx\n\tshr $2,%rbp\n\tand %r9,%rcx\n\tand %r9,%rbp\n\tadd %rbp,%rcx\n\t\n\tmov %r12,%rbp\n\tshr $1,%r12\n\tand %r8,%r12\n\tsub %r12,%rbp\n\tmov %rbp,%r12\n\tshr $2,%rbp\n\tand %r9,%r12\n\tand %r9,%rbp\n\tadd %r12,%rbp\n\t\n\tmov %r14,%r12\n\tshr $1,%r14\n\tand %r8,%r14\n\tsub %r14,%r12\n\tmov %r12,%r14\n\tshr $2,%r12\n\tand %r9,%r14\n\tand %r9,%r12\n\tadd %r14,%r12\n\t\n\tadd %rcx,%rbp\n\tadd %r12,%rbp\t\n\tmov %rbp,%r14\n\tshr $4,%rbp\n\tand %r10,%r14\n\tand %r10,%rbp\n\tadd %rbp,%r14\n\timul %r11,%r14\n\tshr $56,%r14\n\tadd %r14,%rax\nskip:\n\tcmp $-2,%rdx\n\tjl case0\n\tjz case1\ncase2:\n\tmov 16(%rdi,%rdx,8),%rcx\n\txor 16(%rsi,%rdx,8),%rcx\n\tmov %rcx,%rbp\n\tshr $1,%rcx\n\tand %r8,%rcx\n\tsub %rcx,%rbp\n\tmov %rbp,%rcx\n\tshr $2,%rbp\n\tand %r9,%rcx\n\tand %r9,%rbp\n\tadd %rbp,%rcx\n\t\n\tmov %rcx,%r14\n\tshr $4,%rcx\n\tand %r10,%r14\n\tand %r10,%rcx\n\tadd %rcx,%r14\n\timul %r11,%r14\n\tshr $56,%r14\n\tadd %r14,%rax\n\tdec %rdx\ncase1:\n\tmov 16(%rdi,%rdx,8),%rcx\n\txor 16(%rsi,%rdx,8),%rcx\n\tmov %rcx,%rbp\n\tshr $1,%rcx\n\tand %r8,%rcx\n\tsub %rcx,%rbp\n\tmov %rbp,%rcx\n\tshr $2,%rbp\n\tand %r9,%rcx\n\tand %r9,%rbp\n\tadd %rbp,%rcx\n\t\n\tmov %rcx,%r14\n\tshr $4,%rcx\n\tand %r10,%r14\n\tand %r10,%rcx\n\tadd %rcx,%r14\n\timul %r11,%r14\n\tshr $56,%r14\n\tadd %r14,%rax\ncase0:\tpop %rbp\n\tpop %r14\n\tpop %r12\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/ior_n.asm",
    "content": "dnl  mpn_ior_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_ior_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t)\nC\trax              rdi,   rsi,    rdx,  rcx\n\nASM_START()\nPROLOGUE(mpn_ior_n)\nsub $4,%rcx\njc skiplp\nALIGN(16)\nlp:\n\tmov (%rdx),%r8\n\tmov 8(%rdx),%r9\n\tor (%rsi),%r8\n\tor 8(%rsi),%r9\n\tlea 32(%rsi),%rsi\n\tmov 16(%rdx),%r10\n\tmov 24(%rdx),%r11\n\tlea 32(%rdi),%rdi\n\tor 16-32(%rsi),%r10\n\tor 24-32(%rsi),%r11\n\tsub $4,%rcx\n\tmov %r8,-32(%rdi)\n\tmov %r9,8-32(%rdi)\n\tmov %r10,16-32(%rdi)\n\tlea 32(%rdx),%rdx\n\tmov %r11,24-32(%rdi)\n\tjnc lp\nskiplp:\ncmp $-2,%rcx\nje case2\njp case1\njl case0\ncase3:\tmov 8(%rdx,%rcx,8),%rax\n\tor 8(%rsi,%rcx,8),%rax\n\tmov %rax,8(%rdi,%rcx,8)\ncase2:\tmov 16(%rdx,%rcx,8),%rax\n\tor 16(%rsi,%rcx,8),%rax\n\tmov %rax,16(%rdi,%rcx,8)\ncase1:\tmov 24(%rdx,%rcx,8),%rax\n\tor 24(%rsi,%rcx,8),%rax\n\tmov %rax,24(%rdi,%rcx,8)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/iorn_n.as",
    "content": "\n;  AMD64 mpn_iorn_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_iorn_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign   16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tor      r8, [rsi+rcx*8+24]\n\tor      r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\tor      r10, [rsi+rcx*8+8]\n\tor      r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/k10/hamdist.asm",
    "content": "dnl  mpn_hamdist\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_hamdist)\nxor %eax,%eax\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nmov $3,%rcx\nsub %rdx,%rcx\njnc skiplp\nALIGN(16)\nlp:\tmov (%rdi,%rcx,8),%r8\n\tmov 8(%rdi,%rcx,8),%r9\n\txor (%rsi,%rcx,8),%r8\n\tmov 16(%rdi,%rcx,8),%r10\n\tpopcnt %r8,%r8\n\txor 8(%rsi,%rcx,8),%r9\n\txor 16(%rsi,%rcx,8),%r10\n\tpopcnt %r9,%r9\n\tmov 24(%rdi,%rcx,8),%r11\n\tadd %r8,%rax\n\tpopcnt %r10,%r10\n\txor 24(%rsi,%rcx,8),%r11\n\tadd %r9,%rax\n\tpopcnt %r11,%r11\n\tadd %r10,%rax\n\tadd %r11,%rax\n\tadd $4,%rcx\n\tjnc lp\nskiplp:\ncmp $2,%rcx\nja case0\nje case1\njp case2\ncase3:\tmov (%rdi),%r8\n\txor (%rsi),%r8\n\tpopcnt %r8,%r8\n\tadd %r8,%rax\ncase2:\tmov 8(%rdi),%r8\n\txor 8(%rsi),%r8\n\tpopcnt %r8,%r8\n\tadd %r8,%rax\ncase1:\tmov 16(%rdi),%r8\n\txor 16(%rsi),%r8\n\tpopcnt %r8,%r8\n\tadd %r8,%rax\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/k10/karaadd.asm",
    "content": "dnl  mpn_karaadd\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karaadd)\n# requires n>=8\npush %rbx\npush %rbp\npush %r12\npush %r13\npush %r14\npush %r15\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\npush %rdx\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nlp:\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi,%rcx,8),%r8\n\tadc 8(%rsi,%rcx,8),%r9\n\tadc 16(%rsi,%rcx,8),%r10\n\tadc 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\tadc 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc lp\ncmp $2,%rcx\njg\tcase0\njz\tcase1\njp\tcase2\ncase3:\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi),%r8\n\tadc 8(%rsi),%r9\n\tadc 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp fin\ncase2:\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rsi),%r8\n\tadc 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp fin\ncase1:\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tinc %rdx\n\tmov %r12,(%rbp,%rcx,8)\nfin:\tmov $3,%rcx\ncase0: \t#rcx=3\n\tpop %r8\n\tbt $0,%r8\n\tjnc notodd\n\txor %r10,%r10\n\tmov (%rbp,%rdx,8),%r8\n\tmov 8(%rbp,%rdx,8),%r9\n\tadd (%rsi,%rdx,8),%r8\n\tadc 8(%rsi,%rdx,8),%r9\n\trcl $1,%r10\n\tadd %r8,24(%rbp)\n\tadc %r9,32(%rbp)\n\tadc %r10,40(%rbp)\nl7:\tadcq $0,24(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l7\n\tmov $3,%rcx\nnotodd:\tand $3,%rax\n\tpopcnt %rax,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadc %r8,(%rdi,%rdx,8)\nl1:\tadcq $0,8(%rdi,%rdx,8)\n\tinc %rdx\n\tjc l1\n\tand $7,%rbx\n\tpopcnt %rbx,%r8\n\tadd %r8,24(%rbp)\nl2:\tadcq $0,8(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l2\npop %r15\npop %r14\npop %r13\npop %r12\npop %rbp\npop %rbx\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/k10/karasub.asm",
    "content": "dnl  mpn_karasub\n\ndnl  Copyright 2011,2012 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karasub)\n# requires n>=8\npush %rbx\npush %rbp\npush %r12\npush %r13\npush %r14\npush %r15\npush %rdx\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nlp:\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi,%rcx,8),%r8\n\tsbb 8(%rsi,%rcx,8),%r9\n\tsbb 16(%rsi,%rcx,8),%r10\n\tsbb 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\tsbb 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc lp\ncmp $2,%rcx\njg\tcase0\njz\tcase1\njp\tcase2\ncase3:\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi),%r8\n\tsbb 8(%rsi),%r9\n\tsbb 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp fin\ncase2:\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 8(%rsi),%r8\n\tsbb 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp fin\ncase1:\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tinc %rdx\n\tmov %r12,(%rbp,%rcx,8)\nfin:\tmov $3,%rcx\ncase0: \t#rcx=3\n\t#// store top two words of H as carrys could change them\n\tpop %r15\n\tbt $0,%r15\n\tjnc skipload\n\tmov (%rbp,%rdx,8),%r12\n        mov 8(%rbp,%rdx,8),%r13\n\t#// the two carrys from 2nd to 3rd\nskipload:\tmov %rdx,%r11\n\txor %r8,%r8\n\tbt $1,%rax\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rdi,%rdx,8)\nl2:\tadcq $0,8(%rdi,%rdx,8)\n\tinc %rdx\n\tjc l2\n\t# //the two carrys from 3rd to 4th\n\txor %r8,%r8\n\tbt $1,%rbx\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rbp,%rcx,8)\nl3:\tadcq $0,8(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l3\n\t#// now the borrow from 2nd to 3rd\n\tmov %r11,%rdx\n\tbt $0,%rax\nl1:\tsbbq $0,(%rdi,%rdx,8)\n\tinc %rdx\n\tjc l1\n\t#// borrow from 3rd to 4th\n\tmov $3,%rcx\n\tbt $0,%rbx\nl4:\tsbbq $0,(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l4\n\t#// if odd the do next two\n\tmov $3,%rcx\n\tmov %r11,%rdx\n\tbt $0,%r15\n\tjnc notodd\n\txor %r10,%r10\n\tsub (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%r10\n\tadd %r12,24(%rbp)\n\tadc %r13,32(%rbp)\n\tmov $0,%r8\n\tadc %r8,%r8\n\tbt $0,%r10\n\tsbb $0,%r8\nl7:\tadd %r8,16(%rbp,%rcx,8)\n\tadc $0,%r8\n\tinc %rcx\n\tsar $1,%r8\n\tjnz l7\nnotodd:\t\npop %r15\npop %r14\npop %r13\npop %r12\npop %rbp\npop %rbx\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/k10/lshift.as",
    "content": "\n;  AMD64 mpn_lshift\n; Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = (rsi,rdx)<<rcx\n;\trax = carry\n;\tdecent assmeblers understand what movq means ,except\n;\tmicrosofts/apple masm (what a suprise there) so for the broken old masm\n;\tassembler.  Needed for movq reg64,mediareg and movq mediareg,reg64\n;\tonly , where mediareg is xmm or mm\n\n%include 'yasm_mac.inc'\n\n%define MOVQ movd\n\n\tBITS 64\n\n   GLOBAL_FUNC  mpn_lshift\n\tcmp     rdx, 2\n\tja      threeormore\n\tjz      two\none:\n\tmov     rdx, [rsi]\n\tmov     rax, rdx\n\tshl     rdx, cl\n\tneg     rcx\n\tshr     rax, cl\n\tmov     [rdi], rdx\n\tret\ntwo:\n\tmov     r8, [rsi]\n\tmov     r9, [rsi+8]\n\tmov     r11, r8\n\tmov     rax, r9\n\tshl     r8, cl\n\tshl     r9, cl\n\tneg     rcx\n\tshr     r11, cl\n\tshr     rax, cl\n\tor      r9, r11\n\tmov     [rdi], r8\n\tmov     [rdi+8], r9\n\tret\nthreeormore:\n\tmov     eax, 64\n\tsub     rax, rcx\n\tMOVQ    xmm0, rcx\n\tMOVQ    xmm1, rax\n\tmov     r8, rdx\n\tlea     r9, [rsi+r8*8-16]\n\tmov     r10, r9\n\tand     r9, -9\n\tmovdqa  xmm3, [r9]\n\tmovdqa  xmm5, xmm3\n\tpsrlq   xmm3, xmm1\n\tpshufd  xmm3, xmm3, 0x4E\n\tMOVQ    rax, xmm3\n\tcmp     r10, r9\n\tje      aligned\n\tmovq    xmm2, [rsi+r8*8-8]\n\tmovq    xmm4, xmm2\n\tpsrlq   xmm2, xmm1\n\tMOVQ    rax, xmm2\n\tpsllq   xmm4, xmm0\n\tpor     xmm4, xmm3\n\tmovq    [rdi+r8*8-8], xmm4\n\tdec     r8\naligned:\n\tsub     r8, 5\n\tjle     skiploop\n\talign   16\nloop1:\n\tmovdqa  xmm2, [rsi+r8*8+8]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm5, xmm0\n\tpsrlq   xmm2, xmm1\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tpshufd  xmm2, xmm2, 0x4E\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tmovdqa  xmm3, [rsi+r8*8-8]\n\tmovdqa  xmm5, xmm3\n\tpsrlq   xmm3, xmm1\n\tmovhlps xmm2, xmm3\n\tpsllq   xmm4, xmm0\n\tpshufd  xmm3, xmm3, 0x4E\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+8], xmm4\n\tmovhpd  [rdi+r8*8+16], xmm4\n\tsub     r8, 4\n\tjg      loop1\nskiploop:\n\tcmp     r8, -1\n\tje      left2\n\tjg      left3\n\tjp      left1\nleft0:\n;\tmay be easier to bswap xmm5 first , same with other cases\n\tpxor    xmm2, xmm2\n\tpsllq   xmm5, xmm0\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tret\n\talign   16\nleft3:\n\tmovdqa  xmm2, [rsi+r8*8+8]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm5, xmm0\n\tpsrlq   xmm2, xmm1\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tpshufd  xmm2, xmm2, 0x4E\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tmovq    xmm3, [rsi+r8*8]\n\tpshufd  xmm3, xmm3, 0x4E\n\tmovdqa  xmm5, xmm3\n\tpsrlq   xmm3, xmm1\n\tmovhlps xmm2, xmm3\n\tpsllq   xmm4, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+8], xmm4\n\tmovhpd  [rdi+r8*8+16], xmm4\n\tpsllq   xmm5, xmm0\n\tmovhpd  [rdi+r8*8], xmm5\n\tret\n\talign   16\nleft2:\n\tmovdqa  xmm2, [rsi+r8*8+8]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm5, xmm0\n\tpsrlq   xmm2, xmm1\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tpshufd  xmm2, xmm2, 0x4E\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tpxor    xmm3, xmm3\n\tmovhlps xmm2, xmm3\n\tpsllq   xmm4, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+8], xmm4\n\tmovhpd  [rdi+r8*8+16], xmm4\n\tret\n\talign   16\nleft1:\n\tmovq    xmm2, [rsi+r8*8+16]\n\tpshufd  xmm2, xmm2, 0x4E\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm5, xmm0\n\tpsrlq   xmm2, xmm1\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tpsllq   xmm4, xmm0\n\tmovhpd  [rdi+r8*8+16], xmm4\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/k10/popcount.asm",
    "content": "dnl  mpn_popcount\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_popcount)\nxor %eax,%eax\npopcnt -8(%rdi,%rsi,8),%r9\nsub $3,%rsi\njc skiplp\nALIGN(16)\nlp:\tpopcnt 8(%rdi,%rsi,8),%r8\n\tadd %r9,%rax\n\tadd %r8,%rax\n\tpopcnt (%rdi,%rsi,8),%r9\n\tsub $2,%rsi\n\tjnc lp\nskiplp:\njnp case0\ncase1:\tpopcnt (%rdi),%r8\n\tadd %r8,%rax\ncase0:\tadd %r9,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/k10/rshift.as",
    "content": "\n;  AMD64 mpn_rshift\n; Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = (rsi,rdx)>>rcx\n;\trax = carry\n;\tdecent assmeblers understand what movq means ,except\n;\tmicrosofts/apple masm (what a suprise there) so for the broken old masm\n;\tassembler.  Needed for movq reg64,mediareg and movq mediareg,reg64\n;\tonly , where mediareg is xmm or mm \n\n%define MOVQ movd\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n   GLOBAL_FUNC mpn_rshift\n\tcmp     rdx, 2\n\tja      threeormore\n\tjz      two\none:\n\tmov     rdx, [rsi]\n\tmov     rax, rdx\n\tshr     rdx, cl\n\tneg     rcx\n\tshl     rax, cl\n\tmov     [rdi], rdx\n\tret\ntwo:\n\tmov     r8, [rsi]\n\tmov     r9, [rsi+8]\n\tmov     rax, r8\n\tmov     r11, r9\n\tshr     r8, cl\n\tshr     r9, cl\n\tneg     rcx\n\tshl     r11, cl\n\tshl     rax, cl\n\tor      r8, r11\n\tmov     [rdi], r8\n\tmov     [rdi+8], r9\n\tret\nthreeormore:\n\tmov     eax, 64\n\tlea     r9, [rsi+8]\n\tsub     rax, rcx\n\tand     r9, -16\n\tMOVQ    xmm0, rcx\n\tMOVQ    xmm1, rax\n\tmovdqa  xmm5, [r9]\n\tmovdqa  xmm3, xmm5\n\tpsllq   xmm5, xmm1\n\tMOVQ    rax, xmm5\n\tcmp     rsi, r9\n\tlea     rsi, [rsi+rdx*8-40]\n\tje      aligned\n\tmovq    xmm2, [r9-8]\n\tmovq    xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tpsrlq   xmm4, xmm0\n\tpor     xmm4, xmm5\n\tmovq    [rdi], xmm4\n\tlea     rdi, [rdi+8]\n\tdec     rdx\n\tMOVQ    rax, xmm2\naligned:\n\tlea     rdi, [rdi+rdx*8-40]\n\tpsrlq   xmm3, xmm0\n\tmov     r8d, 5\n\tsub     r8, rdx\n\tjnc     skiploop\n\talign   16\nloop1:\n\tmovdqa  xmm2, [rsi+r8*8+16]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tmovdqa  xmm5, [rsi+r8*8+32]\n\tmovdqa  xmm3, xmm5\n\tpsllq   xmm5, xmm1\n\tshufpd  xmm2, xmm5, 1\n\tpsrlq   xmm3, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tmovhpd  [rdi+r8*8+24], xmm4\n\tadd     r8, 4\n\tjnc     loop1\nskiploop:\n\tcmp     r8, 2\n\tja      left0\n\tjz      left1\n\tjp      left2\nleft3:\n\tmovdqa  xmm2, [rsi+r8*8+16]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tmovq    xmm5, [rsi+r8*8+32]\n\tmovq    xmm3, xmm5\n\tpsllq   xmm5, xmm1\n\tshufpd  xmm2, xmm5, 1\n\tpsrlq   xmm3, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tmovhpd  [rdi+r8*8+24], xmm4\n\tpsrldq  xmm5, 8\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8+32], xmm3\n\tret\n\talign   16\nleft2:\n\tmovdqa  xmm2, [rsi+r8*8+16]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tpsrldq  xmm2, 8\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tmovhpd  [rdi+r8*8+24], xmm4\n\tret\n\talign   16\nleft1:\n\tmovq    xmm2, [rsi+r8*8+16]\n\tmovq    xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tpsrldq  xmm2, 8\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tret\n\talign   16\nleft0:\n\tpsrldq  xmm5, 8\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/k8only/lshift2.asm",
    "content": "dnl  mpn_lshift2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_lshift2(mp_ptr,mp_ptr,mp_size_t)\nC\trax                 rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_lshift2)\nmov $3,%rcx\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\nmov $0,%r8\nsub %rdx,%rcx\njnc skiplp\nALIGN(16)\nlp:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,4),%r8\n\tshr $62,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%r9,%r10,4),%r9\n\tshr $62,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tmov %r8,(%rdi,%rcx,8)\n\tlea (%r10,%r11,4),%r10\n\tmov %r10,16(%rdi,%rcx,8)\n\tshr $62,%r11\n\tmov 24(%rsi,%rcx,8),%r8\n\tlea (%r11,%r8,4),%r11\n\tmov %r11,24(%rdi,%rcx,8)\n\tshr $62,%r8\n\tadd $4,%rcx\n\tmov %r9,8-32(%rdi,%rcx,8)\n\tjnc lp\nskiplp:\ncmp $2,%rcx\nja case0\nje case1\njp case2\ncase3:\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,4),%r8\n\tshr $62,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%r9,%r10,4),%r9\n\tshr $62,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tmov %r8,(%rdi,%rcx,8)\n\tlea (%r10,%r11,4),%r10\n\tmov %r10,16(%rdi,%rcx,8)\n\tshr $62,%r11\n\tmov %r11,%rax\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase2:\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,4),%r8\n\tshr $62,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%r9,%r10,4),%r9\n\tshr $62,%r10\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r10,%rax\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase1:\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,4),%r8\n\tshr $62,%r9\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,%rax\n\tret\nALIGN(16)\ncase0:\tmov %r8,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/k8only/lshift3.asm",
    "content": "dnl  mpn_lshift3\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_lshift3(mp_ptr,mp_ptr,mp_size_t)\nC\trax                 rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_lshift3)\nmov $3,%rcx\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\nmov $0,%r8\nsub %rdx,%rcx\njnc skiplp\nALIGN(16)\nlp:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,8),%r8\n\tshr $61,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%r9,%r10,8),%r9\n\tshr $61,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tmov %r8,(%rdi,%rcx,8)\n\tlea (%r10,%r11,8),%r10\n\tmov %r10,16(%rdi,%rcx,8)\n\tshr $61,%r11\n\tmov 24(%rsi,%rcx,8),%r8\n\tlea (%r11,%r8,8),%r11\n\tmov %r11,24(%rdi,%rcx,8)\n\tshr $61,%r8\n\tadd $4,%rcx\n\tmov %r9,8-32(%rdi,%rcx,8)\n\tjnc lp\nskiplp:\ncmp $2,%rcx\nja case0\nje case1\njp case2\ncase3:\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,8),%r8\n\tshr $61,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%r9,%r10,8),%r9\n\tshr $61,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tmov %r8,(%rdi,%rcx,8)\n\tlea (%r10,%r11,8),%r10\n\tmov %r10,16(%rdi,%rcx,8)\n\tshr $61,%r11\n\tmov %r11,%rax\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase2:\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,8),%r8\n\tshr $61,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%r9,%r10,8),%r9\n\tshr $61,%r10\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r10,%rax\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase1:\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,8),%r8\n\tshr $61,%r9\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,%rax\n\tret\nALIGN(16)\ncase0:\tmov %r8,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/k8only/lshift4.asm",
    "content": "dnl  mpn_lshift4\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_lshift4(mp_ptr,mp_ptr,mp_size_t)\nC\trax                 rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_lshift4)\nmov $3,%rcx\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\nmov $0,%r8\nmov $0,%rax\nsub %rdx,%rcx\njnc skiplp\nALIGN(16)\nlp:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%rax,%r9,2),%rdx\n\tlea (%r8,%rdx,8),%r8\n\tshr $60,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%rax,%r10,2),%rdx\n\tlea (%r9,%rdx,8),%r9\n\tshr $60,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tlea (%rax,%r11,2),%rdx\n\tlea (%r10,%rdx,8),%r10\n\tshr $60,%r11\n\tmov %r10,16(%rdi,%rcx,8)\n\tmov %r8,(%rdi,%rcx,8)\n\tmov 24(%rsi,%rcx,8),%r8\n\tlea (%rax,%r8,2),%rdx\n\tlea (%r11,%rdx,8),%r11\n\tshr $60,%r8\n\tmov %r11,24(%rdi,%rcx,8)\n\tadd $4,%rcx\n\tmov %r9,8-32(%rdi,%rcx,8)\n\tjnc lp\nskiplp:\ncmp $2,%rcx\nja case0\nje case1\njp case2\ncase3:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%rax,%r9,2),%rdx\n\tlea (%r8,%rdx,8),%r8\n\tshr $60,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%rax,%r10,2),%rdx\n\tlea (%r9,%rdx,8),%r9\n\tshr $60,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tlea (%rax,%r11,2),%rdx\n\tlea (%r10,%rdx,8),%r10\n\tshr $60,%r11\n\tmov %r10,16(%rdi,%rcx,8)\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r11,%rax\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase2:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%rax,%r9,2),%rdx\n\tlea (%r8,%rdx,8),%r8\n\tshr $60,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%rax,%r10,2),%rdx\n\tlea (%r9,%rdx,8),%r9\n\tshr $60,%r10\n\tmov %r10,%rax\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase1:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%rax,%r9,2),%rdx\n\tlea (%r8,%rdx,8),%r8\n\tshr $60,%r9\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,%rax\n\tret\nALIGN(16)\ncase0:\tmov %r8,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/k8only/lshift5.asm",
    "content": "dnl  mpn_lshift5\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_lshift5(mp_ptr,mp_ptr,mp_size_t)\nC\trax                 rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_lshift5)\nmov $3,%rcx\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\nmov $0,%r8\nmov $0,%rax\nsub %rdx,%rcx\njnc skiplp\nALIGN(16)\nlp:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%rax,%r9,4),%rdx\n\tlea (%r8,%rdx,8),%r8\n\tshr $59,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%rax,%r10,4),%rdx\n\tlea (%r9,%rdx,8),%r9\n\tshr $59,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tlea (%rax,%r11,4),%rdx\n\tlea (%r10,%rdx,8),%r10\n\tshr $59,%r11\n\tmov %r10,16(%rdi,%rcx,8)\n\tmov %r8,(%rdi,%rcx,8)\n\tmov 24(%rsi,%rcx,8),%r8\n\tlea (%rax,%r8,4),%rdx\n\tlea (%r11,%rdx,8),%r11\n\tshr $59,%r8\n\tmov %r11,24(%rdi,%rcx,8)\n\tadd $4,%rcx\n\tmov %r9,8-32(%rdi,%rcx,8)\n\tjnc lp\nskiplp:\ncmp $2,%rcx\nja case0\nje case1\njp case2\ncase3:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%rax,%r9,4),%rdx\n\tlea (%r8,%rdx,8),%r8\n\tshr $59,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%rax,%r10,4),%rdx\n\tlea (%r9,%rdx,8),%r9\n\tshr $59,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tlea (%rax,%r11,4),%rdx\n\tlea (%r10,%rdx,8),%r10\n\tshr $59,%r11\n\tmov %r10,16(%rdi,%rcx,8)\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r11,%rax\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase2:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%rax,%r9,4),%rdx\n\tlea (%r8,%rdx,8),%r8\n\tshr $59,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%rax,%r10,4),%rdx\n\tlea (%r9,%rdx,8),%r9\n\tshr $59,%r10\n\tmov %r10,%rax\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase1:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%rax,%r9,4),%rdx\n\tlea (%r8,%rdx,8),%r8\n\tshr $59,%r9\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,%rax\n\tret\nALIGN(16)\ncase0:\tmov %r8,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/k8only/lshift6.asm",
    "content": "dnl  mpn_lshift6\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_lshift6(mp_ptr,mp_ptr,mp_size_t)\nC\trax                 rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_lshift6)\nmov $3,%rcx\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\nmov $0,%r8\nmov $0,%rax\nsub %rdx,%rcx\njnc skiplp\nALIGN(16)\nlp:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%rax,%r9,8),%rdx\n\tlea (%r8,%rdx,8),%r8\n\tshr $58,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%rax,%r10,8),%rdx\n\tlea (%r9,%rdx,8),%r9\n\tshr $58,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tlea (%rax,%r11,8),%rdx\n\tlea (%r10,%rdx,8),%r10\n\tshr $58,%r11\n\tmov %r10,16(%rdi,%rcx,8)\n\tmov %r8,(%rdi,%rcx,8)\n\tmov 24(%rsi,%rcx,8),%r8\n\tlea (%rax,%r8,8),%rdx\n\tlea (%r11,%rdx,8),%r11\n\tshr $58,%r8\n\tmov %r11,24(%rdi,%rcx,8)\n\tadd $4,%rcx\n\tmov %r9,8-32(%rdi,%rcx,8)\n\tjnc lp\nskiplp:\ncmp $2,%rcx\nja case0\nje case1\njp case2\ncase3:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%rax,%r9,8),%rdx\n\tlea (%r8,%rdx,8),%r8\n\tshr $58,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%rax,%r10,8),%rdx\n\tlea (%r9,%rdx,8),%r9\n\tshr $58,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tlea (%rax,%r11,8),%rdx\n\tlea (%r10,%rdx,8),%r10\n\tshr $58,%r11\n\tmov %r10,16(%rdi,%rcx,8)\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r11,%rax\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase2:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%rax,%r9,8),%rdx\n\tlea (%r8,%rdx,8),%r8\n\tshr $58,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%rax,%r10,8),%rdx\n\tlea (%r9,%rdx,8),%r9\n\tshr $58,%r10\n\tmov %r10,%rax\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase1:\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%rax,%r9,8),%rdx\n\tlea (%r8,%rdx,8),%r8\n\tshr $58,%r9\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,%rax\n\tret\nALIGN(16)\ncase0:\tmov %r8,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/k8only/rshift2.asm",
    "content": "dnl  mpn_rshift2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_rshift2(mp_ptr,mp_ptr,mp_size_t)\nC\trax                 rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_rshift2)\nmov %rdx,%rcx\nlea 24(%rsi),%rsi\nlea 24(%rdi),%rdi\nxor %eax,%eax\nxor %edx,%edx\nsub $4,%rcx\njc skiplp\nALIGN(16)\nlp:\n\tmov (%rsi,%rcx,8),%r8\n\tmov -8(%rsi,%rcx,8),%r9\n\tmov -16(%rsi,%rcx,8),%r10\n\tmov -24(%rsi,%rcx,8),%r11\n\tadd %rax,%rax\n\trcr $1,%r8\n\trcr $1,%r9\n\trcr $1,%r10\n\trcr $1,%r11\n\tsbb %rax,%rax\n\tadd %rdx,%rdx\n\trcr $1,%r8\n\trcr $1,%r9\n\trcr $1,%r10\n\trcr $1,%r11\n\tmov %r11,-24(%rdi,%rcx,8)\n\tsbb %rdx,%rdx\n\tmov %r8,(%rdi,%rcx,8)\n\tsub $4,%rcx\n\tmov %r9,24(%rdi,%rcx,8)\n\tmov %r10,16(%rdi,%rcx,8)\n\tjnc lp\nskiplp:\ncmp $-2,%rcx\nja case3\nje case2\njp case1\ncase0:\n\tlea (%rax,%rdx,2),%rax\n\tneg %rax\n\tshl $62,%rax\n\tret\nALIGN(16)\ncase3:\n\tmov (%rsi,%rcx,8),%r8\n\tmov -8(%rsi,%rcx,8),%r9\n\tmov -16(%rsi,%rcx,8),%r10\n\tadd %rax,%rax\n\trcr $1,%r8\n\trcr $1,%r9\n\trcr $1,%r10\n\tsbb %rax,%rax\n\tadd %rdx,%rdx\n\trcr $1,%r8\n\trcr $1,%r9\n\trcr $1,%r10\n\tsbb %rdx,%rdx\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,-8(%rdi,%rcx,8)\n\tmov %r10,-16(%rdi,%rcx,8)\n\tlea (%rax,%rdx,2),%rax\n\tneg %rax\n\tshl $62,%rax\n\tret\nALIGN(16)\ncase2:\n\tmov (%rsi,%rcx,8),%r8\n\tmov -8(%rsi,%rcx,8),%r9\n\tadd %rax,%rax\n\trcr $1,%r8\n\trcr $1,%r9\n\tsbb %rax,%rax\n\tadd %rdx,%rdx\n\trcr $1,%r8\n\trcr $1,%r9\n\tsbb %rdx,%rdx\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,-8(%rdi,%rcx,8)\n\tlea (%rax,%rdx,2),%rax\n\tneg %rax\n\tshl $62,%rax\n\tret\nALIGN(16)\ncase1:\n\tmov (%rsi,%rcx,8),%r8\n\tadd %rax,%rax\n\trcr $1,%r8\n\tsbb %rax,%rax\n\tadd %rdx,%rdx\n\trcr $1,%r8\n\tsbb %rdx,%rdx\n\tmov %r8,(%rdi,%rcx,8)\n\tlea (%rax,%rdx,2),%rax\n\tneg %rax\n\tshl $62,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/karaadd.asm",
    "content": "dnl  mpn_karaadd\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karaadd)\n# requires n>=8\nmov %rbx,-8(%rsp)\nmov %rbp,-16(%rsp)\nmov %r12,-24(%rsp)\nmov %r13,-32(%rsp)\nmov %r14,-40(%rsp)\nmov %r15,-48(%rsp)\nmov %rdx,-56(%rsp)\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nlp:\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi,%rcx,8),%r8\n\tadc 8(%rsi,%rcx,8),%r9\n\tadc 16(%rsi,%rcx,8),%r10\n\tadc 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\tadc 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc lp\ncmp $2,%rcx\njg\tcase0\njz\tcase1\njp\tcase2\ncase3:\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi),%r8\n\tadc 8(%rsi),%r9\n\tadc 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp fin\ncase2:\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rsi),%r8\n\tadc 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp fin\ncase1:\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tinc %rdx\n\tmov %r12,(%rbp,%rcx,8)\nfin:\tmov $3,%rcx\ncase0: \t#rcx=3\n\tmov -56(%rsp),%r8\n\tbt $0,%r8\n\tjnc notodd\n\txor %r10,%r10\n\tmov (%rbp,%rdx,8),%r8\n\tmov 8(%rbp,%rdx,8),%r9\n\tadd (%rsi,%rdx,8),%r8\n\tadc 8(%rsi,%rdx,8),%r9\n\trcl $1,%r10\n\tadd %r8,24(%rbp)\n\tadc %r9,32(%rbp)\n\tadc %r10,40(%rbp)\nl7:\tadcq $0,24(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l7\n\tmov $3,%rcx\nnotodd:\txor %r8,%r8\n\tshr $1,%rax\n\tadc %r8,%r8\n\tshr $1,%rax\n\tadc $0,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadc %r8,(%rdi,%rdx,8)\nl1:\tadcq $0,8(%rdi,%rdx,8)\n\tinc %rdx\n\tjc l1\n\txor %r8,%r8\n\tshr $1,%rbx\n\tadc %r8,%r8\n\tshr $1,%rbx\n\tadc $0,%r8\n\tshr $1,%rbx\n\tadc $0,%r8\n\tadd %r8,24(%rbp)\nl2:\tadcq $0,8(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l2\nmov -8(%rsp),%rbx\nmov -16(%rsp),%rbp\nmov -24(%rsp),%r12\nmov -32(%rsp),%r13\nmov -40(%rsp),%r14\nmov -48(%rsp),%r15\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/karasub.asm",
    "content": "dnl  mpn_karasub\n\ndnl  Copyright 2011,2012 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karasub)\n# requires n>=8\nmov %rbx,-8(%rsp)\nmov %rbp,-16(%rsp)\nmov %r12,-24(%rsp)\nmov %r13,-32(%rsp)\nmov %r14,-40(%rsp)\nmov %r15,-48(%rsp)\nmov %rdx,-56(%rsp)\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nlp:\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi,%rcx,8),%r8\n\tsbb 8(%rsi,%rcx,8),%r9\n\tsbb 16(%rsi,%rcx,8),%r10\n\tsbb 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\tsbb 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc lp\ncmp $2,%rcx\njg\tcase0\njz\tcase1\njp\tcase2\ncase3:\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi),%r8\n\tsbb 8(%rsi),%r9\n\tsbb 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp fin\ncase2:\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 8(%rsi),%r8\n\tsbb 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp fin\ncase1:\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tinc %rdx\n\tmov %r12,(%rbp,%rcx,8)\nfin:\tmov $3,%rcx\ncase0: \t#rcx=3\n\t#// store top two words of H as carrys could change them\n\tmov -56(%rsp),%r15\n\tbt $0,%r15\n\tjnc skipload\n\tmov (%rbp,%rdx,8),%r12\n        mov 8(%rbp,%rdx,8),%r13\n\t#// the two carrys from 2nd to 3rd\nskipload:\tmov %rdx,%r11\n\txor %r8,%r8\n\tbt $1,%rax\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rdi,%rdx,8)\nl2:\tadcq $0,8(%rdi,%rdx,8)\n\tinc %rdx\n\tjc l2\n\t# //the two carrys from 3rd to 4th\n\txor %r8,%r8\n\tbt $1,%rbx\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rbp,%rcx,8)\nl3:\tadcq $0,8(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l3\n\t#// now the borrow from 2nd to 3rd\n\tmov %r11,%rdx\n\tbt $0,%rax\nl1:\tsbbq $0,(%rdi,%rdx,8)\n\tinc %rdx\n\tjc l1\n\t#// borrow from 3rd to 4th\n\tmov $3,%rcx\n\tbt $0,%rbx\nl4:\tsbbq $0,(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l4\n\t#// if odd the do next two\n\tmov $3,%rcx\n\tmov %r11,%rdx\n\tbt $0,%r15\n\tjnc notodd\n\txor %r10,%r10\n\tsub (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%r10\n\tadd %r12,24(%rbp)\n\tadc %r13,32(%rbp)\n\tmov $0,%r8\n\tadc %r8,%r8\n\tbt $0,%r10\n\tsbb $0,%r8\nl7:\tadd %r8,16(%rbp,%rcx,8)\n\tadc $0,%r8\n\tinc %rcx\n\tsar $1,%r8\n\tjnz l7\nnotodd:\t\nmov -8(%rsp),%rbx\nmov -16(%rsp),%rbp\nmov -24(%rsp),%r12\nmov -32(%rsp),%r13\nmov -40(%rsp),%r14\nmov -48(%rsp),%r15\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/lshift.as",
    "content": "\n;  AMD64 mpn_lshift\n; Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = (rsi,rdx)<<rcx\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_lshift\n\tmov     eax, 64\n\tsub     rax, rcx\n\tmovq    mm0, rcx\n\tsub     rdx, 4\n\tmovq    mm1, rax\n\tmovq    mm5, [rsi+rdx*8+24]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tmovq    rax, mm5\n\tpsllq   mm3, mm0\n\tjbe     skiploop\n\talign   16\nloop1\n\tmovq    mm2, [rsi+rdx*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rdi+rdx*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rsi+rdx*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rdi+rdx*8+16], mm4\n\tpsllq   mm3, mm0\n\tmovq    mm2, [rsi+rdx*8]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rdi+rdx*8+8], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rsi+rdx*8-8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rdi+rdx*8], mm4\n\tpsllq   mm3, mm0\n\tsub     rdx, 4\n\tja      loop1\nskiploop:\n\tcmp     rdx, -1\n\tjl      next\n\tmovq    mm2, [rsi+rdx*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rdi+rdx*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rsi+rdx*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rdi+rdx*8+16], mm4\n\tpsllq   mm3, mm0\n\tsub     rdx, 2\nnext:\n\ttest    rdx, 1\n\tjnz     end\n\tmovq    mm2, [rsi+rdx*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rdi+rdx*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    [rdi+rdx*8+16], mm4\n\temms\n\tret\nend:\n\tmovq    [rdi+rdx*8+24], mm3\n\temms\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/lshift1.as",
    "content": "\n;  AMD64 mpn_lshift1\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = (rsi,rdx)<<1\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_lshift1\n\txor     rax, rax\n\tmov     r11, rdx\n\tand     r11, 7\n\tinc     r11\n\tshr     rdx, 3\n;\tand clear carry flag\n\tcmp     rdx, 0\n\tjz      next\n\talign   16\nloop1:\n\tmov     rcx, [rsi]\n\tmov     r8, [rsi+8]\n\tmov     r10, [rsi+16]\n\tmov     r9, [rsi+24]\n\tadc     rcx, rcx\n\tadc     r8, r8\n\tadc     r10, r10\n\tadc     r9, r9\n\tmov     [rdi], rcx\n\tmov     [rdi+8], r8\n\tmov     [rdi+16], r10\n\tmov     [rdi+24], r9\n\tmov     rcx, [rsi+32]\n\tmov     r8, [rsi+40]\n\tmov     r10, [rsi+48]\n\tmov     r9, [rsi+56]\n\tadc     rcx, rcx\n\tadc     r8, r8\n\tadc     r10, r10\n\tadc     r9, r9\n\tmov     [rdi+32], rcx\n\tmov     [rdi+40], r8\n\tmov     [rdi+48], r10\n\tmov     [rdi+56], r9\n\tlea     rdi, [rdi+64]\n\tdec     rdx\n\tlea     rsi, [rsi+64]\n\tjnz     loop1\nnext:\n\tdec     r11\n\tjz      end\n;\tCould still have cache-bank conflicts in this tail part\n\tmov     rcx, [rsi]\n\tadc     rcx, rcx\n\tmov     [rdi], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+8]\n\tadc     rcx, rcx\n\tmov     [rdi+8], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+16]\n\tadc     rcx, rcx\n\tmov     [rdi+16], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+24]\n\tadc     rcx, rcx\n\tmov     [rdi+24], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+32]\n\tadc     rcx, rcx\n\tmov     [rdi+32], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+40]\n\tadc     rcx, rcx\n\tmov     [rdi+40], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+48]\n\tadc     rcx, rcx\n\tmov     [rdi+48], rcx\nend:\n\tadc     rax, rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/lshiftc.asm",
    "content": "dnl  mpn_lshiftc\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\ndefine(`MOVQ',`movd')\n\nASM_START()\nPROLOGUE(mpn_lshiftc)\nMOVQ %rcx,%mm0\nmov $64,%rax\nsub %rcx,%rax\npcmpeqb %mm6,%mm6\nMOVQ %rax,%mm1\nlea 8(%rsi),%rsi\nlea 8(%rdi),%rdi\nsub $5,%rdx\nmovq 24(%rsi,%rdx,8),%mm5\nmovq %mm5,%mm3\npsrlq %mm1,%mm5\nMOVQ %mm5,%rax\npsllq %mm0,%mm3\njc skiplp\nALIGN(16)\nlp:\n\tmovq 16(%rsi,%rdx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq (%rsi,%rdx,8),%mm2\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tmovq 8(%rsi,%rdx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsrlq %mm1,%mm5\n\tpor %mm5,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,16(%rdi,%rdx,8)\n\tpsllq %mm0,%mm3\n\tmovq %mm2,%mm4\n\tmovq -8(%rsi,%rdx,8),%mm5\n\tsub $4,%rdx\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,40(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tmovq %mm5,%mm3\n\tpsrlq %mm1,%mm5\n\tpor %mm5,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,32(%rdi,%rdx,8)\n\tpsllq %mm0,%mm3\n\tjnc lp\nskiplp:\ncmp $-2,%rdx\njz case2\njp case1\njs case0\ncase3:\n\tmovq 16(%rsi,%rdx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq (%rsi,%rdx,8),%mm2\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tmovq 8(%rsi,%rdx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsrlq %mm1,%mm5\n\tpor %mm5,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,16(%rdi,%rdx,8)\n\tpsllq %mm0,%mm3\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,8(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,(%rdi,%rdx,8)\n\temms\n\tret\nALIGN(16)\ncase2:\n\tmovq 16(%rsi,%rdx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tmovq 8(%rsi,%rdx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsrlq %mm1,%mm5\n\tpor %mm5,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,16(%rdi,%rdx,8)\n\tpsllq %mm0,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,8(%rdi,%rdx,8)\n\temms\n\tret\nALIGN(16)\ncase1:\n\tmovq 16(%rsi,%rdx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,16(%rdi,%rdx,8)\n\temms\n\tret\nALIGN(16)\ncase0:\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\temms\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/mod_1_1.asm",
    "content": "dnl  mpn_mod_1_1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,2)  contains B^i % divisor\n\n\nASM_START()\nPROLOGUE(mpn_mod_1_1)\npush %r13\nmov -8(%rsi,%rdx,8),%r13\nmov -16(%rsi,%rdx,8),%rax\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov %rdx,%rcx\nsub $2,%rcx\nALIGN(16)\nlp:\n\tmov -8(%rsi,%rcx,8),%r10\n\tmul %r8\n\tadd %rax,%r10\n\tmov $0,%r11\n\tadc %rdx,%r11\n\tmov %r13,%rax\n\tmul %r9\n\tadd %r10,%rax\n\tmov %r11,%r13\n\tadc %rdx,%r13\n\tdec %rcx\n\tjnz lp\nC // r13,rax\nmov %rax,(%rdi)\nmov %r8,%rax\nmul %r13\nadd %rax,(%rdi)\nadc $0,%rdx\nmov %rdx,8(%rdi)\npop %r13\nret\nEPILOGUE()\n\n\n\n"
  },
  {
    "path": "mpn/x86_64/k8/mod_1_2.asm",
    "content": "dnl  mpn_mod_1_2\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,3)  contains B^i % divisor\n\n\nASM_START()\nPROLOGUE(mpn_mod_1_2)\nC // require rdx >=4\npush %r12\npush %r13\npush %r14\nmov -8(%rsi,%rdx,8),%r14\nmov -16(%rsi,%rdx,8),%r13\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov 16(%rcx),%r10\nmov %rdx,%rcx\nmov -24(%rsi,%rdx,8),%rax\nmul %r8\nmov -32(%rsi,%rcx,8),%r11\nxor %r12,%r12\nsub $6,%rcx\njc skiplp\nALIGN(16)\nlp:\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r9,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r10,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov 8(%rsi,%rcx,8),%rax\n\tmov %r12,%r14\n\tadc %rdx,%r14\n\tmul %r8\n\tmov $0,%r12d\n\tmov 0(%rsi,%rcx,8),%r11\n\tsub $2,%rcx\n\tjnc lp\nskiplp:\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r9,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r10,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov %r12,%r14\n\tadc %rdx,%r14\ncmp $-2,%rcx\nje case0\ncase1:\n\tmov 8(%rsi,%rcx,8),%r11\n\txor %r12,%r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r9,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov %r12,%r14\n\tadc %rdx,%r14\ncase0:\t\nmov %r8,%rax\nmul %r14\nadd %rax,%r13\nadc $0,%rdx\nmov %r13,(%rdi)\nmov %rdx,8(%rdi)\npop %r14\npop %r13\npop %r12\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/mod_1_3.asm",
    "content": "dnl  mpn_mod_1_3\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,4)  contains B^i % divisor\n\nASM_START()\nPROLOGUE(mpn_mod_1_3)\nC // require rdx >=5\npush %r12\npush %r13\npush %r14\npush %r15\nmov -8(%rsi,%rdx,8),%r15\nmov -16(%rsi,%rdx,8),%r14\nmov -32(%rsi,%rdx,8),%rax\nmov -40(%rsi,%rdx,8),%r12\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov 16(%rcx),%r10\nmov 24(%rcx),%r11\nmov %rdx,%rcx\nsub $8,%rcx\njc skiplp\nALIGN(16)\nC // r15 r14 -8() -16()=rax -24()=r12\nlp:\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 0(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tmov 8(%rsi,%rcx,8),%rax\n\tadc %rdx,%r15\n\tsub $3,%rcx\n\tjnc lp\nskiplp:\nC // we have loaded up the next two limbs\nC // but because they are out of order we can have to do 3 limbs min\ncmp $-2,%rcx\njl case1\nje case2\ncase3:\n\tC //two more limbs is 4 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 8(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tmov 16(%rsi,%rcx,8),%rax\n\tadc %rdx,%r15\n\tC // r15 r14 rax r12\n\tmov $0,%r13\n\tmul %r8\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r9,%rax\n\tmul %r14\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r15\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tC // r13 r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r12\n\tadc $0,%rdx\n\tmov %r12,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nALIGN(16)\ncase2:\n\tC //two more limbs is 4 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 16(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tadc %rdx,%r15\n\tC // r15 r14 r12\n\tmov $0,%r13\n\tmov %r8,%rax\n\tmul %r14\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r9,%rax\n\tmul %r15\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tC // r13 r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r12\n\tadc $0,%rdx\n\tmov %r12,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nALIGN(16)\ncase1:\n\tC // one more is 3 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12 \n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov %r13,%r15\n\tadc %rdx,%r15\n\tmov %r8,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tadc $0,%rdx\n\tmov %r14,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/mul_1.asm",
    "content": "dnl  mpn_mul_1\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_mul_1)\nmov (%rsi),%rax\ncmp $1,%rdx\nje one\t\t\nmov $5,%r11\nlea -40(%rsi,%rdx,8),%rsi\nlea -40(%rdi,%rdx,8),%rdi\nsub %rdx,%r11\nmul %rcx\n.byte 0x26\nmov %rax,%r8\n.byte 0x26\nmov 8(%rsi,%r11,8),%rax\n.byte 0x26\nmov %rdx,%r9\n.byte 0x26\ncmp $0,%r11\n.byte 0x26\nmov %r12,-8(%rsp)\n.byte 0x26\njge skiplp\nlp:\txor %r10,%r10\n\tmul %rcx\n\tmov %r8,(%rdi,%r11,8)\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tmov 16(%rsi,%r11,8),%rax\n\tmul %rcx\n\tmov %r9,8(%rdi,%r11,8)\n\tadd %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 24(%rsi,%r11,8),%rax\n\txor %r8,%r8\n\txor %r9,%r9\n\tmul %rcx\n\tmov %r10,16(%rdi,%r11,8)\n\tadd %rax,%r12\n\tadc %rdx,%r8\n\tmov 32(%rsi,%r11,8),%rax\n \tmul %rcx\n\tmov %r12,24(%rdi,%r11,8)\n\tadd %rax,%r8\n\tadc %rdx,%r9\n\tadd $4,%r11\n\tmov 8(%rsi,%r11,8),%rax\n\tjnc lp\nskiplp:\nxor %r10,%r10\nmul %rcx\nmov %r8,(%rdi,%r11,8)\nadd %rax,%r9\nadc %rdx,%r10\ncmp $2,%r11\nja case0\njz case1\njp case2\ncase3:\tmov 16(%rsi),%rax\n\tmul %rcx\n\tmov %r9,8(%rdi)\n\tadd %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 24(%rsi),%rax\n\txor %r8,%r8\n\txor %r9,%r9\n\tmul %rcx\n\tmov %r10,16(%rdi)\n\tadd %rax,%r12\n\tadc %rdx,%r8\n\tmov 32(%rsi),%rax\n \tmul %rcx\n\tmov %r12,24(%rdi)\n\tadd %rax,%r8\n\tadc %rdx,%r9\n\tmov %r8,32(%rdi)\n\t#add $0,%r9\n\tmov -8(%rsp),%r12\n\tmov %r9,%rax\n\tret\ncase2:\tmov 24(%rsi),%rax\n\tmul %rcx\n\tmov %r9,16(%rdi)\n\tadd %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 32(%rsi),%rax\n\txor %r8,%r8\n\t#xor %r9,%r9\n\tmul %rcx\n\tmov %r10,24(%rdi)\n\tadd %rax,%r12\n\tadc %rdx,%r8\n\tmov %r12,32(%rdi)\n\t#add $0,%r8\n\tmov -8(%rsp),%r12\n\tmov %r8,%rax\n\tret\nALIGN(16)\ncase1:\tmov 32(%rsi),%rax\n\tmul %rcx\n\tmov %r9,24(%rdi)\n\tadd %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov %r10,32(%rdi)\n\t#add $0,%r12\n\tmov %r12,%rax\n\tmov -8(%rsp),%r12\n\tret\none:\tmul %rcx\n\tmov %rax,(%rdi)\n\t#add $0,%rdx\n\tmov %rdx,%rax\n\tret\ncase0:\tmov %r9,32(%rdi)\n\t#add $0,%r10\n\tmov -8(%rsp),%r12\n\tmov %r10,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/mul_2.as",
    "content": ";  X86_64 mpn_mul_2\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include \"yasm_mac.inc\"\n\n; (rdi,rdx+1)=(rsi,rdx)*(rcx,2) return carrylimb\n\n\tGLOBAL_FUNC mpn_mul_2\npush    rbx\nmov     r8, [rcx]\nmov     rcx, [rcx+8]\nlea     rsi, [rsi+rdx*8-24]\nlea     rdi, [rdi+rdx*8-24]\nmov     rbx, 3\nsub     rbx, rdx\nmov     r10, 0\nmov     rax, [rsi+rbx*8]\nmul     r8\nmov     r11, rax\nmov     r9, rdx\ncmp     rbx, 0\njge     skiplp\nalign 16\nlp:\n\tmov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r10, 0\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r9, rdx\n\tmul     r8\n\tadd     r11, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 3\n\tjnc     lp\nskiplp:\nmov     rax, [rsi+rbx*8]\nmov     [rdi+rbx*8], r11\nmul     rcx\nadd     r9, rax\nadc     r10, rdx\ncmp     rbx, 1\nja      case2\nje      case1\ncase0:\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r9\n\tpop     rbx\n\tret\nalign 16\ncase1:\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n\tpop     rbx\n\tret\nalign 16\ncase2:\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n\tpop     rbx\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/k8/mul_basecase.as",
    "content": "\n;  AMD64 mpn_mul_basecase\n\n;  Copyright 2008,2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n; C\t(rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n; C Version 1.0.7\n\n\n%macro addmul2lp 1\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+8], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-16], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi-8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+8], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi+rbx*8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+24], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+24], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tmov     [rdi], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-16], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi-8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+8], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n\talign   16\n%%1:\n\tmov     r10, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tmov     [rdi+rbx*8], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r10\n\tdb      0x26\n\tadd     r11, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+rbx*8+24]\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r11\n\tdb      0x26\n\tadd     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     rbx, 4\n\tmov     rax, [rsi+rbx*8]\n\tjnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n\tmov     rax, [rsi+8]\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmov     r12d, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r11\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n\tmov     rax, [rsi+16]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     [rdi+24], r10\n\tmov     [rdi+32], r11\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n\tmov     [rdi+24], r9\n\tmov     [rdi+32], r10\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n\tjz      %%2\n\talign   16\n%%1:\n\taddmul2pro%1\n\taddmul2lp %1\n\taddmul2epi%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tmov     r15, [rsp-40]\n\tret\n%endmacro\n\n%macro oldmulnext0 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tmov     [rdi+r11*8+24], rbx\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+32], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+r11*8+40], rdx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+24], r12\n\tmov     [rdi+r11*8+32], rdx\n\tinc     r8\n\tlea     rdi, [rdi+8]\n\tmov     r11, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     [rdi+r11*8+16], r10\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n\tmov     [rdi+r11*8+8], r9\n\tmov     [rdi+r11*8+16], r10\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tmul     r13\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+24]\n\tmov     r12d, 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+32]\n\tmul     r13\n\tadd     [rdi+24], rbx\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r12\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n\tmov     r13, [rcx+r8*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+24]\n\tmul     r13\n\tlea     rdi, [rdi+8]\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     r12d, 0\n\tmov     rax, [rsi+32]\n\tadc     r12, rdx\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+24], r12\n\tadc     rdx, 0\n\tmov     [rdi+32], rdx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n\tmov     r13, [rcx+r8*8]\n\tlea     rdi, [rdi+8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     ebx, 0\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r14*8]\n\tadd     [rdi+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tdb      0x26\n\tlea     rdi, [rdi+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tmov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r9\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n\toldmulnext%1\n\tjz      %%2\n\talign   16\n%%1:\n\toldaddmulpro%1\n\toldaddmulnext%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tret\n%endmacro\n\n\tASM_START\n\tGLOBAL_FUNC mpn_mul_basecase\n; the current mul does not handle case one \n\tcmp     rdx, 4\n\tjg      L_fiveormore\n\tcmp     rdx, 1\n\tje      L_one\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     r14, 5\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-40]\n\tlea     rcx, [rcx+r8*8]\n\tneg     r8\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rcx+r8*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tmov     r10d, 0\n\tmul     r13\n\tmov     [rdi+r11*8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     r11, 2\n\tja      L_oldcase3\n\tjz      L_oldcase2\n\tjp      L_oldcase1\nL_oldcase0:\n\toldmpn_muladdmul_1_int 0\nL_oldcase1:\n\toldmpn_muladdmul_1_int 1\nL_oldcase2:\n\toldmpn_muladdmul_1_int 2\nL_oldcase3:\n\toldmpn_muladdmul_1_int 3\n\talign   16\nL_fiveormore:\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     [rsp-40], r15\n\tmov     r14, 4\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-32]\n\tlea     rsi, [rsi+rdx*8-32]\n\tmov     r13, rcx\n\tmov     r15, r8\n\tlea     r13, [r13+r15*8]\n\tneg     r15\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tbt      r15, 0\n\tjnc     L_even\nL_odd:\n\tinc     rbx\n\tmov     r8, [r13+r15*8]\n\tmul     r8\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     L_mulskiploop\n\tmul1lp \nL_mulskiploop:\n\tmov     r10d, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     rbx, 2\n\tja      L_mul1case3\n\tjz      L_mul1case2\n\tjp      L_mul1case1\nL_mul1case0:\n\tmulnext0\n\tjmp     L_case0\nL_mul1case1:\n\tmulnext1\n\tjmp     L_case3\nL_mul1case2:\n\tmulnext2\n\tjmp     L_case2\nL_mul1case3:\n\tmulnext3\n\tjmp     L_case1\nL_even:\n\t; as all the mul2pro? are the same\n\tmul2pro0\n\tmul2lp \n\tcmp     rbx, 2\n\tja      L_mul2case0\n\tjz      L_mul2case1\n\tjp      L_mul2case2\nL_mul2case3:\n\tmul2epi3\nL_case3:\n\tmpn_addmul_2_int 3\nL_mul2case2:\n\tmul2epi2\nL_case2:\n\tmpn_addmul_2_int 2\nL_mul2case1:\n\tmul2epi1\nL_case1:\n\tmpn_addmul_2_int 1\nL_mul2case0:\n\tmul2epi0\nL_case0:\n\tmpn_addmul_2_int 0\n\talign   16\nL_one:\n\tmov     rax, [rsi]\n\tmul\tqword [rcx]\n\tmov     [rdi], rax\n\tmov     [rdi+8], rdx\n\tret\n\tend\n\n"
  },
  {
    "path": "mpn/x86_64/k8/nand_n.asm",
    "content": "dnl  mpn_nand_n\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_nand_n)\nlea (%rdx,%rcx,8),%rdx\nlea (%rsi,%rcx,8),%rsi\nlea (%rdi,%rcx,8),%rdi\nneg %rcx\nadd $3,%rcx\njc skip\n\tmov -24(%rdx,%rcx,8),%r8\n\tmov -16(%rdx,%rcx,8),%r9\n\tmov -24(%rdx,%rcx,8),%r8\n\tmov -16(%rdx,%rcx,8),%r9\nadd $4,%rcx\n\tmov -40(%rdx,%rcx,8),%r10\n\tmov -32(%rdx,%rcx,8),%r11\njc skiplp\nALIGN(16)\nlp:\n\tand -56(%rsi,%rcx,8),%r8\n\tnot %r8\n\tand -48(%rsi,%rcx,8),%r9\n\tand -40(%rsi,%rcx,8),%r10\n\tand -32(%rsi,%rcx,8),%r11\n\tmov %r8,-56(%rdi,%rcx,8)\n\tnot %r9\n\tnot %r10\n\tmov %r9,-48(%rdi,%rcx,8)\n\tnot %r11\n\tmov -24(%rdx,%rcx,8),%r8\n\tmov -16(%rdx,%rcx,8),%r9\n\tmov %r10,-40(%rdi,%rcx,8)\n\tmov %r11,-32(%rdi,%rcx,8)\n\tadd $4,%rcx\n\tmov -40(%rdx,%rcx,8),%r10\n\tmov -32(%rdx,%rcx,8),%r11\n\tjnc lp\nskiplp:\n\tand -56(%rsi,%rcx,8),%r8\n\tnot %r8\n\tand -48(%rsi,%rcx,8),%r9\n\tand -40(%rsi,%rcx,8),%r10\n\tand -32(%rsi,%rcx,8),%r11\n\tmov %r8,-56(%rdi,%rcx,8)\n\tnot %r9\n\tnot %r10\n\tmov %r9,-48(%rdi,%rcx,8)\n\tnot %r11\n\tmov %r10,-40(%rdi,%rcx,8)\n\tmov %r11,-32(%rdi,%rcx,8)\nskip:\ncmp $2,%rcx\njg case0\nje case1\njp case2\ncase3:\tmov -24(%rdx),%r8\n\tand -24(%rsi),%r8\n\tnot %r8\n\tmov %r8,-24(%rdi)\ncase2:\tmov -16(%rdx),%r8\n\tand -16(%rsi),%r8\n\tnot %r8\n\tmov %r8,-16(%rdi)\ncase1:\tmov -8(%rdx),%r8\n\tand -8(%rsi),%r8\n\tnot %r8\n\tmov %r8,-8(%rdi)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/nior_n.asm",
    "content": "dnl  mpn_nior_n\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_nior_n)\nlea (%rdx,%rcx,8),%rdx\nlea (%rsi,%rcx,8),%rsi\nlea (%rdi,%rcx,8),%rdi\nneg %rcx\nadd $3,%rcx\njc skip\n\tmov -24(%rdx,%rcx,8),%r8\n\tmov -16(%rdx,%rcx,8),%r9\n\tmov -24(%rdx,%rcx,8),%r8\n\tmov -16(%rdx,%rcx,8),%r9\nadd $4,%rcx\n\tmov -40(%rdx,%rcx,8),%r10\n\tmov -32(%rdx,%rcx,8),%r11\njc skiplp\nALIGN(16)\nlp:\n\tor -56(%rsi,%rcx,8),%r8\n\tnot %r8\n\tor -48(%rsi,%rcx,8),%r9\n\tor -40(%rsi,%rcx,8),%r10\n\tor -32(%rsi,%rcx,8),%r11\n\tmov %r8,-56(%rdi,%rcx,8)\n\tnot %r9\n\tnot %r10\n\tmov %r9,-48(%rdi,%rcx,8)\n\tnot %r11\n\tmov -24(%rdx,%rcx,8),%r8\n\tmov -16(%rdx,%rcx,8),%r9\n\tmov %r10,-40(%rdi,%rcx,8)\n\tmov %r11,-32(%rdi,%rcx,8)\n\tadd $4,%rcx\n\tmov -40(%rdx,%rcx,8),%r10\n\tmov -32(%rdx,%rcx,8),%r11\n\tjnc lp\nskiplp:\n\tor -56(%rsi,%rcx,8),%r8\n\tnot %r8\n\tor -48(%rsi,%rcx,8),%r9\n\tor -40(%rsi,%rcx,8),%r10\n\tor -32(%rsi,%rcx,8),%r11\n\tmov %r8,-56(%rdi,%rcx,8)\n\tnot %r9\n\tnot %r10\n\tmov %r9,-48(%rdi,%rcx,8)\n\tnot %r11\n\tmov %r10,-40(%rdi,%rcx,8)\n\tmov %r11,-32(%rdi,%rcx,8)\nskip:\ncmp $2,%rcx\njg case0\nje case1\njp case2\ncase3:\tmov -24(%rdx),%r8\n\tor -24(%rsi),%r8\n\tnot %r8\n\tmov %r8,-24(%rdi)\ncase2:\tmov -16(%rdx),%r8\n\tor -16(%rsi),%r8\n\tnot %r8\n\tmov %r8,-16(%rdi)\ncase1:\tmov -8(%rdx),%r8\n\tor -8(%rsi),%r8\n\tnot %r8\n\tmov %r8,-8(%rdi)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/not.asm",
    "content": "dnl  mpn_not\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_not)\nmov $1,%ecx\nlea -8(%rdi,%rsi,8),%rdi\nsub %rsi,%rcx\njnc skiplp\nALIGN(16)\nlp:\tnotq (%rdi,%rcx,8)\n\tnotq 8(%rdi,%rcx,8)\n\tadd $2,%rcx\n\tjnc lp\nskiplp:\njnz case0\ncase1:\tnotq (%rdi,%rcx,8)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/popcount.asm",
    "content": "dnl  mpn_popcount\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_popcount(mp_ptr,mp_size_t)\nC\trax               rdi,   rsi\n\nASM_START()\nPROLOGUE(mpn_popcount)\npush %r12\npush %r14\npush %r15\nmov $0x5555555555555555,%r8\nmov $0x3333333333333333,%r9\nmov $0x0f0f0f0f0f0f0f0f,%r10\nmov $0x0101010101010101,%r11\nxor %rax,%rax\nsub $3,%rsi\njc skip\n\tmov 16(%rdi,%rsi,8),%rcx\n\tmov 8(%rdi,%rsi,8),%r12\n\tmov (%rdi,%rsi,8),%r14\nsub $3,%rsi\njc skiplp\nALIGN(16)\nlp:\n\tmov %rcx,%rdx\n\tshr $1,%rcx\n\tand %r8,%rcx\n\tsub %rcx,%rdx\n\tmov %rdx,%rcx\n\tshr $2,%rdx\n\tand %r9,%rcx\n\tand %r9,%rdx\n\tadd %rcx,%rdx\n\t\n\tmov %r12,%rcx\n\tshr $1,%r12\n\tand %r8,%r12\n\tsub %r12,%rcx\n\tmov %rcx,%r12\n\tshr $2,%rcx\n\tand %r9,%r12\n\tand %r9,%rcx\n\tadd %rcx,%r12\n\t\n\tmov %r14,%r15\n\tshr $1,%r14\n\tand %r8,%r14\n\t\tmov 16(%rdi,%rsi,8),%rcx\n\tsub %r14,%r15\n\tmov %r15,%r14\n\tshr $2,%r15\n\tand %r9,%r14\n\tand %r9,%r15\n\tadd %r14,%r15\n\t\n\tadd %rdx,%r12\n\tadd %r15,%r12\n\t\n\tmov %r12,%r14\n\tshr $4,%r12\n\tand %r10,%r14\n\tand %r10,%r12\n\tadd %r12,%r14\n\timul %r11,%r14\n\tshr $56,%r14\n\tadd %r14,%rax\n\t\tmov 8(%rdi,%rsi,8),%r12\n\tsub $3,%rsi\n\t\tmov 24-0(%rdi,%rsi,8),%r14\n\tjnc lp\nskiplp:\n\tmov %rcx,%rdx\n\tshr $1,%rcx\n\tand %r8,%rcx\n\tsub %rcx,%rdx\n\tmov %rdx,%rcx\n\tshr $2,%rdx\n\tand %r9,%rcx\n\tand %r9,%rdx\n\tadd %rcx,%rdx\n\t\n\tmov %r12,%rcx\n\tshr $1,%r12\n\tand %r8,%r12\n\tsub %r12,%rcx\n\tmov %rcx,%r12\n\tshr $2,%rcx\n\tand %r9,%r12\n\tand %r9,%rcx\n\tadd %rcx,%r12\n\t\n\tmov %r14,%r15\n\tshr $1,%r14\n\tand %r8,%r14\n\tsub %r14,%r15\n\tmov %r15,%r14\n\tshr $2,%r15\n\tand %r9,%r14\n\tand %r9,%r15\n\tadd %r14,%r15\n\t\n\tadd %rdx,%r12\n\tadd %r15,%r12\n\t\n\tmov %r12,%r14\n\tshr $4,%r12\n\tand %r10,%r14\n\tand %r10,%r12\n\tadd %r12,%r14\n\timul %r11,%r14\n\tshr $56,%r14\n\tadd %r14,%rax\nskip:\tcmp $-2,%rsi\n\tjl case0\n\tjz case1\ncase2:\n\tmov 16(%rdi,%rsi,8),%rcx\n\n\tmov %rcx,%rdx\n\tshr $1,%rcx\n\tand %r8,%rcx\n\tsub %rcx,%rdx\n\tmov %rdx,%rcx\n\tshr $2,%rdx\n\tand %r9,%rcx\n\tand %r9,%rdx\n\tadd %rcx,%rdx\n\t\n\tmov %rdx,%r14\n\tshr $4,%rdx\n\tand %r10,%r14\n\tand %r10,%rdx\n\tadd %rdx,%r14\n\timul %r11,%r14\n\tshr $56,%r14\n\tadd %r14,%rax\n\tdec %rsi\ncase1:\tmov 16(%rdi,%rsi,8),%rcx\n\n\tmov %rcx,%rdx\n\tshr $1,%rcx\n\tand %r8,%rcx\n\tsub %rcx,%rdx\n\tmov %rdx,%rcx\n\tshr $2,%rdx\n\tand %r9,%rcx\n\tand %r9,%rdx\n\tadd %rcx,%rdx\n\t\n\tmov %rdx,%r14\n\tshr $4,%rdx\n\tand %r10,%r14\n\tand %r10,%rdx\n\tadd %rdx,%r14\n\timul %r11,%r14\n\tshr $56,%r14\n\tadd %r14,%rax\ncase0:\tpop %r15\n\tpop %r14\n\tpop %r12\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/redc_1.as",
    "content": "\n;  AMD64 mpn_redc_1\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tVersion 1.0.4\n;\t(rdi,rcx)=(rsi,rcx)+(rdx,rcx)   with the carry flag set for the carry\n;\tthis is the usual mpn_add_n with the final dec rax;adc rax,rax;ret  removed \n;\tand a jump where we have two rets\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n%macro mpn_add 0\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tadd     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tjmp     %%2\n\talign   16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tadc     r11, [rdx]\n\tadc     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tadc     r9, [rdx-16]\n\tadc     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tadc     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n%%2:\n%endmacro\n\n;\t(rbx, rbp) = (rsi, rbp) - (rdx, rbp)\n%macro mpn_sub 0\n\tmov     rax, rbp\n\tand     rax, 3\n\tshr     rbp, 2\n\tcmp     rbp, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tsub     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n\tjmp     %%2\n\talign   16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tsbb     r11, [rdx]\n\tsbb     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rbx], r11\n\tmov     [rbx+8], r8\n\tlea     rbx, [rbx+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tsbb     r9, [rdx-16]\n\tsbb     r10, [rdx-8]\n\tmov     [rbx-16], r9\n\tdec     rbp\n\tmov     [rbx-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tsbb     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n%%2:\n%endmacro\n\n;\tchanges from standard addmul\n;\tchange  r8 to r12   and rcx to r13   and rdi to r8\n;\treemove ret and write last limb but to beginning\n%macro addmulloop 1\n\talign   16\n%%1:\n\tmov     r10, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tdb      0x26\n\tadc     rbx, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tdb      0x26\n\tadc     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     r11, 4\n\tmov     rax, [rsi+r11*8+8]\n\tjnc     %%1\n%endmacro\n\n%macro addmulpropro0 0\n\timul    r13, rcx\n\tlea     r8, [r8-8]\n%endmacro\n\n%macro addmulpro0 0\n\tmov     r11, r14\n\tlea     r8, [r8+8]\n\tmov     rax, [rsi+r14*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmov     r9d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, rax\n\tadc     r9, rdx\n\timul    r13, rcx\n\tadd     [r8+r11*8+32], r12\n\tadc     r9, 0\n\tdec     r15\n\tmov     [r8+r14*8], r9\n%endmacro\n\n%macro addmulpropro1 0\n%endmacro\n\n%macro addmulpro1 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, 0\n\tdec     r15\n\tmov     [r8+r14*8], r12\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro2 0\n%endmacro\n\n%macro addmulpro2 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext2 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     r13, [r8+r14*8+8]\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [r8+r14*8], rbx\n\tdec     r15\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro3 0\n%endmacro\n\n%macro addmulpro3 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext3 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, 0\n\tmov     r13, [r8+r14*8+8]\n\tmov     [r8+r14*8], r10\n\tlea     r8, [r8+8]\n\tdec     r15\n%endmacro\n\n;\tchange r8 to r12\n;\twrite top limb ax straight to mem dont return  (NOTE we WRITE NOT ADD)\n%macro mpn_addmul_1_int 1\n\taddmulpropro%1\n\talign   16\n%%1:\n\taddmulpro%1\n\tjge     %%2\n\taddmulloop %1\n%%2:\n\taddmulnext%1\n\tjnz     %%1\n\tjmp     end\n%endmacro\n\n   GLOBAL_FUNC mpn_redc_1\n\tmov r9,r8\n\tmov r8,rsi\n\tmov rsi,rdx\n\tmov rdx,rcx\n\tmov rcx,r9\n\tcmp     rdx, 1\n\tje      one\n\tpush    r13\n\tpush    r14\n\tpush    rbx\n\tpush    r12\n\tpush    r15\n\tpush    rbp\n\tmov     r14, 5\n\tsub     r14, rdx\n;\tstore copys\n\tpush    rsi\n\tpush    r8\n\tlea     r8, [r8+rdx*8-40]\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rbp, rdx\n\tmov     r15, rdx\n\tmov     rax, r14\n\tand     rax, 3\n\tmov     r13, [r8+r14*8]\n\tje      case0\n\tjp      case3\n\tcmp     rax, 1\n\tje      case1\ncase2:\n\tmpn_addmul_1_int 2\n\talign   16\ncase0:\n\tmpn_addmul_1_int 0\n\talign   16\ncase1:\n\tmpn_addmul_1_int 1\n\talign   16\ncase3:\n\tmpn_addmul_1_int 3\n\talign   16\nend:\n\tmov     rcx, rbp\n\tpop     rdx\n\tlea     rsi, [rdx+rbp*8]\n\tmov     rbx, rdi\n\tmpn_add\n;\tmpnadd(rdi,rsi,rdx,rcx)\n\tpop     rdx\n\tjnc     skip\n\tmov     rsi, rbx\n\tmpn_sub\n;\tmpn_sub_n(rbx,rsi,rdx,rbp) we can certainly improve this sub\nskip:\n\tpop     rbp\n\tpop     r15\n\tpop     r12\n\tpop     rbx\n\tpop     r14\n\tpop     r13\n\tret\n\talign   16\none:\n\tmov     r9, [r8]\n\tmov     r11, [rsi]\n\timul    rcx, r9\n\tmov     rax, rcx\n\tmul     r11\n\tadd     rax, r9\n;\trax is zero here\n\tadc     rdx, [r8+8]\n\tcmovnc  r11, rax\n\tsub     rdx, r11\n\tmov     [rdi], rdx\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/rsh1add_n.as",
    "content": "\n;  AMD64 mpn_rsh1add_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,rcx)=((rsi,rcx)+(rdx,rcx))/2 return low bit of sum\n\n\tGLOBAL_FUNC mpn_rsh1add_n\n\tlea     rdi, [rdi+rcx*8-32]\n\tlea     rsi, [rsi+rcx*8-32]\n\tlea     rdx, [rdx+rcx*8-32]\n\tpush    r12\n\tpush    rbx\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tadd     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     L_skiplp\nL_lp:\n\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\tadc     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     L_lp\nL_skiplp:\n\tcmp     r8, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tadc     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tpop     rbx\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/k8/rsh1sub_n.as",
    "content": "\n;  AMD64 mpn_rsh1sub_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,rcx)=((rsi,rcx)-(rdx,rcx))/2\n; return bottom bit of difference\n; subtraction treated as two compliment\n\n\tGLOBAL_FUNC mpn_rsh1sub_n\n\tlea     rdi, [rdi+rcx*8-32]\n\tlea     rsi, [rsi+rcx*8-32]\n\tlea     rdx, [rdx+rcx*8-32]\n\tpush    r12\n\tpush    rbx\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tsub     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     L_skiplp\nL_lp:\n\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\tsbb     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     L_lp\nL_skiplp:\n\tcmp     r8, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tpop     rbx\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/k8/rsh_divrem_hensel_qr_1_1.asm",
    "content": "dnl  X86_64 mpn_rsh_divrem_hensel_qr_1_1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=( (rsi,rdx)-r9  / rcx ) >> r8 \nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\n\ndefine(`MOVQ',`movd')\n\nASM_START()\nPROLOGUE(mpn_rsh_divrem_hensel_qr_1_1)\nmov %r9,%r10\nmov $1,%r9\nsub %rdx,%r9\nlea (%rdi,%rdx,8),%rdi\nlea (%rsi,%rdx,8),%rsi\n\nmov %rcx,%rdx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\n\nmov $64,%rax\nsub %r8,%rax\nMOVQ %r8,%mm0\nMOVQ %rax,%mm1\nmov -8(%rsi,%r9,8),%rax\nsub %r10,%rax\nsbb %r8,%r8\nimul %r11,%rax\nMOVQ %rax,%mm4\nmovq %mm4,%mm5\npsrlq %mm0,%mm4\npsllq %mm1,%mm5\npsrlq %mm1,%mm5\nmul %rcx\ncmp $0,%r9\nje one\nadd %r8,%r8\nALIGN(16)\nloop:\n    movq %mm4,%mm2\n    mov (%rsi,%r9,8),%rax\n    sbb %rdx,%rax\n    sbb %r8,%r8\n    imul %r11,%rax\n    MOVQ %rax,%mm3\n    movq %mm3,%mm4\n    psllq %mm1,%mm3\n    psrlq %mm0,%mm4\n    por %mm3,%mm2\n    movq %mm2,-8(%rdi,%r9,8)\n    mul %rcx\n    add %r8,%r8\n    inc %r9\n    jnz loop\nskiploop:\nmovq %mm4,-8(%rdi,%r9,8)\nmov $0,%rax\nadc %rdx,%rax\nemms\nret\none:\nmovq %mm4,-8(%rdi,%r9,8)\nadd %r8,%r8\nmov $0,%rax\nadc %rdx,%rax\nemms\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/rsh_divrem_hensel_qr_1_2.asm",
    "content": "dnl  X86_64 mpn_rsh_divrem_hensel_qr_1_2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=( (rsi,rdx)-r9 / rcx ) >> r8    rdx>=1\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1_2 with shifting on the output of the quotient\n\ndefine(`MOVQ',`movd')\n\nASM_START()\nPROLOGUE(mpn_rsh_divrem_hensel_qr_1_2)\nC\t// 3limb minimum for the mo\nmov %r9,%r10\nmov $2,%r9\nsub %rdx,%r9\nlea -16(%rdi,%rdx,8),%rdi\nlea -16(%rsi,%rdx,8),%rsi\n\npush %r12\npush %r13\npush %r14\n\nmov %rcx,%rdx\t\nC // rdx is 3 bit inverse\n\nmov $64,%rax\nsub %r8,%rax\nMOVQ %r8,%mm0\nMOVQ %rax,%mm1\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 4 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC //rdx has 8 bits\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 16 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC // rdx has 32 bits\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\t\nC //r11 has 64 bits\n\nmov %r11,%rax\nmov %r11,%r12\nmul %rcx\nneg %rdx\nimul %rdx,%r12\t\nC // r12,r11 has 128 bits\n\nC // for the first limb we can not store (as we have to shift) so we need to\nC // do first limb separately , we could do it as normal as an extention of\nC // the loop , but if we do it as a 1 limb inverse then we can start it\nC // eailer , ie interleave it with the calculation of the 2limb inverse\n\nmov %r11,%r13\nmov %r12,%r14\n\n\nmov (%rsi,%r9,8),%r11\nsub %r10,%r11\nsbb %r10,%r10\n\nimul %r13,%r11\nMOVQ %r11,%mm2\npsrlq %mm0,%mm2\nmov %rcx,%rax\nmul %r11\nmov 8(%rsi,%r9,8),%r11\nmov 16(%rsi,%r9,8),%r12\nadd %r10,%r10\nsbb %rdx,%r11\nsbb $0,%r12\nsbb %r10,%r10\n\n\nadd $2,%r9\njc skiplp\nALIGN(16)\nlp:\n\tmov %r12,%r8\n\tmov %r13,%rax\n\tmul %r11\n\n\tMOVQ %rax,%mm3\n\tmovq %mm3,%mm4\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm4\n\tpor %mm3,%mm2\n\tmovq %mm2,-16(%rdi,%r9,8)\n\n\timul %r14,%r11\n\timul %r13,%r12\n\tadd %r11,%rdx\n\tadd %r12,%rdx\n\t\tmov 8(%rsi,%r9,8),%r11\n\t\tmov 16(%rsi,%r9,8),%r12\n\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb $0,%r12\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r12\n\t\tsbb $0,%r10\n\tadd $2,%r9\n\tjnc lp\nskiplp:\nmov %r12,%r8\nmov %r13,%rax\nmul %r11\n\nMOVQ %rax,%mm3\nmovq %mm3,%mm4\npsllq %mm1,%mm3\npsrlq %mm0,%mm4\npor %mm3,%mm2\nmovq %mm2,-16(%rdi,%r9,8)\n\nimul %r14,%r11\nimul %r13,%r12\nadd %r11,%rdx\nadd %r12,%rdx\ncmp $0,%r9\njne case0\ncase1:\n\t\tmov 8(%rsi,%r9,8),%r11\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r10\n\tmov %r11,%rax\n\timul %r13,%rax\n\n\tMOVQ %rax,%mm3\n\tmovq %mm3,%mm4\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm4\n\tpor %mm3,%mm2\n\tmovq %mm2,(%rdi,%r9,8)\n\tmovq %mm4,8(%rdi,%r9,8)\n\n\tmul %rcx\n\tadd %r10,%r10\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\temms\n\tret\ncase0:\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\tmovq %mm2,(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\tcmp %rax,%r8\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tsub %r10,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\temms\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/rshift.asm",
    "content": "dnl  mpn_rshift\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_rshift(mp_ptr,mp_ptr,mp_size_t,ul)\nC\trax                 rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_rshift)\nC below really a movq\nmovd %rcx,%mm0\n\nmov $64,%rax\nsub %rcx,%rax\n\nC below really a movq\nmovd %rax,%mm1\n\nmov $4,%rcx\nlea -32(%rsi,%rdx,8),%rsi\nlea -32(%rdi,%rdx,8),%rdi\nsub %rdx,%rcx\nmovq (%rsi,%rcx,8),%mm5\nmovq %mm5,%mm3\npsllq %mm1,%mm5\n\nC below really a movq\nmovd %mm5,%rax\n\npsrlq %mm0,%mm3\njge skiplp\nALIGN(16)\nlp:\n\tmovq 8(%rsi,%rcx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsllq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq %mm3,(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm4\n\tmovq 16(%rsi,%rcx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsllq %mm1,%mm5\n\tpor %mm5,%mm4\n\tmovq %mm4,8(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm3\n\tmovq 24(%rsi,%rcx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsllq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq 32(%rsi,%rcx,8),%mm5\n\tmovq %mm3,16(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm4\n\tmovq %mm5,%mm3\n\tpsllq %mm1,%mm5\n\tpor %mm5,%mm4\n\tmovq %mm4,24(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm3\n\tadd $4,%rcx\n\tjnc lp\nskiplp:\ncmp $2,%rcx\nja case0\njz case1\njp case2\ncase3:\n\tmovq 8(%rsi,%rcx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsllq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq %mm3,(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm4\n\tmovq 16(%rsi,%rcx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsllq %mm1,%mm5\n\tpor %mm5,%mm4\n\tmovq %mm4,8(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm3\n\tmovq 24(%rsi,%rcx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsllq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq %mm3,16(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm4\n\tmovq %mm4,24(%rdi,%rcx,8)\n\temms\n\tret\nALIGN(16)\ncase2:\n\tmovq 8(%rsi,%rcx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsllq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq %mm3,(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm4\n\tmovq 16(%rsi,%rcx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsllq %mm1,%mm5\n\tpor %mm5,%mm4\n\tmovq %mm4,8(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm3\n\tmovq %mm3,16(%rdi,%rcx,8)\n\temms\n\tret\nALIGN(16)\ncase1:\n\tmovq 8(%rsi,%rcx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsllq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq %mm3,(%rdi,%rcx,8)\n\tpsrlq %mm0,%mm4\n\tmovq %mm4,8(%rdi,%rcx,8)\n\temms\n\tret\nALIGN(16)\ncase0:\n\tmovq %mm3,(%rdi,%rcx,8)\n\temms\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/rshift1.as",
    "content": "\n;  AMD64 mpn_rshift1\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = (rsi,rdx)>>1\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_rshift1\n\txor     rax, rax\n\tlea     rsi, [rsi+rdx*8-8]\n\tlea     rdi, [rdi+rdx*8-8]\n\tmov     r11, rdx\n\tand     r11, 7\n\tinc     r11\n\tshr     rdx, 3\n;\tand clear carry flag\n\tcmp     rdx, 0\n\tjz      next\n\talign   16\nloop1:\n\tmov     rcx, [rsi]\n\tmov     r8, [rsi-8]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-24]\n\trcr     rcx, 1\n\trcr     r8, 1\n\trcr     r9, 1\n\trcr     r10, 1\n\tmov     [rdi], rcx\n\tmov     [rdi-8], r8\n\tmov     [rdi-16], r9\n\tmov     [rdi-24], r10\n\tmov     rcx, [rsi-32]\n\tmov     r8, [rsi-40]\n\tmov     r9, [rsi-48]\n\tmov     r10, [rsi-56]\n\trcr     rcx, 1\n\trcr     r8, 1\n\trcr     r9, 1\n\trcr     r10, 1\n\tmov     [rdi-32], rcx\n\tmov     [rdi-40], r8\n\tmov     [rdi-48], r9\n\tmov     [rdi-56], r10\n\tlea     rsi, [rsi-64]\n\tdec     rdx\n\tlea     rdi, [rdi-64]\n\tjnz     loop1\nnext:\n\tdec     r11\n\tjz      end\n;\tCould suffer cache-bank conflicts in this tail part\n\tmov     rcx, [rsi]\n\trcr     rcx, 1\n\tmov     [rdi], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi-8]\n\trcr     rcx, 1\n\tmov     [rdi-8], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi-16]\n\trcr     rcx, 1\n\tmov     [rdi-16], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi-24]\n\trcr     rcx, 1\n\tmov     [rdi-24], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi-32]\n\trcr     rcx, 1\n\tmov     [rdi-32], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi-40]\n\trcr     rcx, 1\n\tmov     [rdi-40], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi-48]\n\trcr     rcx, 1\n\tmov     [rdi-48], rcx\nend:\n\trcr     rax, 1\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/store.asm",
    "content": "dnl  mpn_store\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_store)\nmov %rsi,%rcx\nand $7,%rsi\nmov $8,%r8\nsub %rsi,%r8\t\t#// r8=8-n%8\nlea (%r8,%r8,4),%r8\t\nlea lp(%rip),%rax\t#// rax is lpjmp\nadd %r8,%rax\t\t#// rax=5*(8-n%8)+lp\nand $-8,%rcx\nadd $48,%rdi\njmp  *%rax\nALIGN(16)\nlp:\tmov %rdx,8(%rdi,%rcx,8)\t\t#// 5 bytes\t// n%8=0 NOT HERE\n\tmov %rdx, (%rdi,%rcx,8)\t\t#\t\t// n%8=7\t// 5 bytes with nop\n\tnop\n\tmov %rdx, -8(%rdi,%rcx,8)\t#\t\t// n%8=6\n\tmov %rdx,-16(%rdi,%rcx,8)\t#\t\t// n%8=5\n\tmov %rdx,-24(%rdi,%rcx,8)\t#\t\t// n%8=4\n\tmov %rdx,-32(%rdi,%rcx,8)\t#\t\t// n%8=3\n\tmov %rdx,-40(%rdi,%rcx,8)\t#\t\t// n%8=2\n\tmov %rdx,-48(%rdi,%rcx,8)\t#\t\t// n%8=1\n\tnop\t\t\t\t#\t\t// n%8=0\n\tsub $8,%rcx\n\tjnc lp\t\t\t\t#// 2 bytes\nnop\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/sub_err1_n.asm",
    "content": "dnl  mpn_sub_err1_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC ret mpn_sub_err1(mp_ptr rp,mp_ptr up,mp_ptr vp,mp_ptr ep,mp_ptr_t yp,mp_size_t n,mp_limb_t cy)\nC rax                    rdi,      rsi,      rdx,      rcx,         r8           r9       8(rsp)=>r10\n\nASM_START()\nPROLOGUE(mpn_sub_err1_n)\n       C if we rearrange the params we could save some moves\n       C (rdi,r9)=(rsi,r9)+(rdx,r9)  sum=carry*(r8)\n       \n       mov 8(%rsp),%r10            C cy\n       mov %rbp,-16(%rsp)          C save rbp\n       lea -24(%rdi,%r9,8),%rdi    C rp += n - 3\n       mov %r12,-24(%rsp)          C save r12\n       mov %r13,-32(%rsp)          C save r13\n       lea -24(%rsi,%r9,8),%rsi    C up += n - 3\n       mov %r14,-40(%rsp)          C save r14\n       mov %r15,-48(%rsp)          C save r15\n       lea -24(%rdx,%r9,8),%rdx    C vp += n - 3\n       mov %rcx,-56(%rsp)\t       C save rcx\n       mov %rbx,-8(%rsp)           C save rbx\n       mov $3,%r11                 C i = 3\n       shl $63,%r10                \n       lea (%r8,%r9,8),%r8         C yp += n\n       sub %r9,%r11\t              C i = 3 - n\n       mov $0,%r9                  C t1 = 0\n       mov $0,%rax                 C t2 = 0\n       mov $0,%rbx                 C t3 = 0\n       jnc skiplp                  C if done goto skiplp\nALIGN(16)\nlp:\n\tmov (%rsi,%r11,8),%r12      C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13     C s2 = *(up + i + 1)\n\tmov 16(%rsi,%r11,8),%r14    C s3 = *(up + i + 2)\n\tmov 24(%rsi,%r11,8),%r15    C s4 = *(up + i + 3)\n\tmov $0,%rbp                 C t5 = 0\n\tshl $1,%r10                 C s1 -= *(vp + i + 0) + (cy & 1)\n\tsbb (%rdx,%r11,8),%r12      \n\tcmovc -8(%r8),%rax          C if borrow1, t2 = *(yp - 1)\n\tsbb 8(%rdx,%r11,8),%r13     C s2 -= *(vp + i + 1) + borrow1\n\tcmovc -16(%r8),%rbx         C if borrow2 t3 = *(yp - 2)\n\tmov $0,%rcx                 C t4 = 0\n\tsbb 16(%rdx,%r11,8),%r14    C s3 -= *(vp + i + 2) + borrow2\n\tcmovc -24(%r8),%rcx         C if borrow3 t4 = *(yp - 3)\n\tsbb 24(%rdx,%r11,8),%r15    C s4 -= *(vp + i + 3) + borrow3\n\tcmovc -32(%r8),%rbp         C if borrow4 t5 = *(yp - 4)\n\trcr $1,%r10                 C high bit of cy = borrow\n\tadd %rax,%r9                C t1 += t2\n\tadc $0,%r10                 C accumulate cy\n\tadd %rbx,%r9                C t1 += t2\n\tadc $0,%r10                 C accumulate cy\n\tadd %rcx,%r9                C t1 += t4\n\tmov $0,%rax                 C t2 = 0\n\tadc $0,%r10                 C accumulate cy\n\tlea -32(%r8),%r8            C yp -= 4\n\tadd %rbp,%r9                C t1 += t5\n\tadc $0,%r10                 C accumulate cy\n\tmov %r12,(%rdi,%r11,8)      C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)     C *(rp + i + 1) = s2\n\tmov %r14,16(%rdi,%r11,8)    C *(rp + i + 2) = s3\n\tmov %r15,24(%rdi,%r11,8)    C *(rp + i + 3) = s4\n\tmov $0,%rbx                 C t3 = 0\n\tadd $4,%r11                 C i += 4\n\tjnc  lp                     C not done, goto lp\nskiplp:\n       cmp $2,%r11             C cmp(i, 2)\n       mov -16(%rsp),%rbp      C restore rbp\n       mov -48(%rsp),%r15      C restore r15\n       ja case0                C i == 3 goto case0 \n       je case1                C i == 2 goto case1\n       jp case2                C i == 1 goto case2\ncase3:\n\tmov (%rsi,%r11,8),%r12     C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13    C s2 = *(up + i + 1)\n\tmov 16(%rsi,%r11,8),%r14   C s3 = *(up + i + 2) \n\tshl $1,%r10                C restore borrow1 from high bit of t1\n\tsbb (%rdx,%r11,8),%r12     C s1 -= *(vp + i + 0) + borrow1\n\tcmovc -8(%r8),%rax         C if borrow2 t2 = *(yp - 1)\n\tsbb 8(%rdx,%r11,8),%r13    C s2 -= *(vp + i + 1) + borrow2\n\tcmovc -16(%r8),%rbx        C if borrow3 t3 = *(yp - 2)\n\tmov $0,%rcx                C t4 = 0\n\tsbb 16(%rdx,%r11,8),%r14   C s3 -= *(vp + i + 3) + borrow3\n\tcmovc -24(%r8),%rcx        C if borrow4 t4 = *(yp - 3)\n\trcr $1,%r10                C store borrow4 in high bit of cy\n\tadd %rax,%r9               C t1 += t2\n\tadc $0,%r10                C accumulate cy\n\tadd %rbx,%r9               C t1 += t3\n\tadc $0,%r10                C accumulate cy\n\tadd %rcx,%r9               C t1 += t4\n\tadc $0,%r10                C accumulate cy\n\tmov %r12,(%rdi,%r11,8)     C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)    C *(rp + i + 1) = s2\n\tmov %r14,16(%rdi,%r11,8)   C *(rp + i + 2) = s3\n\tmov -56(%rsp),%rcx         C restore rcx\n\tmov %r9,(%rcx)             C ep[0] = t1\n\tbtr $63,%r10               C retrieve borrow out and reset bit of cy\n\tmov %r10,8(%rcx)           C ep[1] = cy\n\tmov -40(%rsp),%r14         C restore r14\n\tmov $0,%rax                \n\tmov -32(%rsp),%r13         C restore r13\n\tadc $0,%rax                C return borrow out\n\tmov -24(%rsp),%r12         C restore r12\n\tmov -8(%rsp),%rbx          C restore rbx\n\tret\nALIGN(16)\ncase2:\n\tmov (%rsi,%r11,8),%r12   C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13  C s2 = *(up + i + 1)\n\tshl $1,%r10              C restore borrow1 from high bit of t1\n\tsbb (%rdx,%r11,8),%r12   C s1 -= *(vp + i + 0) + borrow1\n\tcmovc -8(%r8),%rax       C if borrow2 t2 = *(yp - 1)\n\tsbb 8(%rdx,%r11,8),%r13  C s2 -= *(vp + i + 1) + borrow2\n\tcmovc -16(%r8),%rbx      C if borrow3 t3 = *(yp - 2)\n\trcr $1,%r10              C store borrow3 in high bit of cy\n\tadd %rax,%r9             C t1 += t2\n\tadc $0,%r10              C accumulate cy\n\tadd %rbx,%r9             C t1 += t3\n\tadc $0,%r10              C accumulate cy\n\tmov %r12,(%rdi,%r11,8)   C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)  C *(rp + i + 1) = s2\n\tmov -56(%rsp),%rcx       C restore rcx\n\tmov %r9,(%rcx)           C ep[0] = t1\n\tbtr $63,%r10             C retrieve borrow out and reset bit of cy\n\tmov %r10,8(%rcx)         C ep[1] = cy\n\tmov -40(%rsp),%r14       C restore r14\n\tmov $0,%rax\n\tmov -32(%rsp),%r13       C restore r13\n\tadc $0,%rax              C return borrow out\n\tmov -24(%rsp),%r12       C restore r12\n\tmov -8(%rsp),%rbx        C restore rbx\n\tret\nALIGN(16)\ncase1:\n\tmov (%rsi,%r11,8),%r12   C s1 = *(up + i + 0)\n\tshl $1,%r10              C restore borrow1 from high bit of t1\n\tsbb (%rdx,%r11,8),%r12   C s1 -= *(vp + i + 0) + borrow1\n\tcmovc -8(%r8),%rax       C if borrow2 t2 = *(yp - 1)\n\trcr $1,%r10              C store borrow3 in high bit of cy\n\tadd %rax,%r9             C t1 += t2\n\tadc $0,%r10              C accumulate cy\n\tmov %r12,(%rdi,%r11,8)   C *(rp + i + 0) = s1\ncase0:\tmov -56(%rsp),%rcx       C restore rcx\n\tmov %r9,(%rcx)           C ep[0] = t1\n\tbtr $63,%r10             C retrieve borrow out and reset bit of cy\n\tmov %r10,8(%rcx)         C ep[1] = cy\n\tmov -40(%rsp),%r14       C restore r14\n\tmov $0,%rax\n\tmov -32(%rsp),%r13       C restore r13\n\tadc $0,%rax              C return borrow out\n\tmov -24(%rsp),%r12       C restore r12\n\tmov -8(%rsp),%rbx        C restore rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/sub_n.as",
    "content": "\n;  AMD64 mpn_sub_n\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx)=(rsi,rcx)-(rdx,rcx)\n;\trax=borrow\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_sub_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     loop1\n\tmov     r11, [rsi]\n\tsub     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend1:\n\tadc     rax, rax\n\tret\n\talign   8\nloop1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tsbb     r11, [rdx]\n\tsbb     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tsbb     r9, [rdx-16]\n\tsbb     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     loop1\n\tinc     rax\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi]\n\tsbb     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend:\n\tadc     rax, rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/subadd_n.as",
    "content": "\n;  AMD64 mpn_subadd_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,r8)=(rsi,r8)-(rdx,r8)-(rcx,r8) return borrow+borrow\n\n\tGLOBAL_FUNC mpn_subadd_n\n\tlea     rdx, [rdx+r8*8-56]\n\tlea     rcx, [rcx+r8*8-56]\n\tlea     rsi, [rsi+r8*8-56]\n\tlea     rdi, [rdi+r8*8-56]\n\tmov     r9, 3\n\txor     rax, rax\n\txor     r10, r10\n\tsub     r9, r8\n\tpush    r12\n\tpush    rbp\n\tjge     L_skip\n\tadd     r9, 4\n\tmov     rbp, [rsi+r9*8+16]\n\tmov     r11, [rsi+r9*8+24]\n\tmov     r12, [rsi+r9*8]\n\tmov     r8, [rsi+r9*8+8]\n\tjc      L_skiplp\n\talign   16\nL_lp:\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8]\n\tsbb     r8, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r12, [rdx+r9*8]\n\tsbb     r8, [rdx+r9*8+8]\n\tsbb     rbp, [rdx+r9*8+16]\n\tsbb     r11, [rdx+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r12\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], r8\n\tmov     [rdi+r9*8+16], rbp\n\tmov     rbp, [rsi+r9*8+48]\n\tmov     r11, [rsi+r9*8+56]\n\tadd     r9, 4\n\tmov     r12, [rsi+r9*8]\n\tmov     r8, [rsi+r9*8+8]\n\tjnc     L_lp\nL_skiplp:\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8]\n\tsbb     r8, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r12, [rdx+r9*8]\n\tsbb     r8, [rdx+r9*8+8]\n\tsbb     rbp, [rdx+r9*8+16]\n\tsbb     r11, [rdx+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r12\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], r8\n\tmov     [rdi+r9*8+16], rbp\nL_skip:\n\tcmp     r9, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     rbp, [rsi+r9*8+48]\n\tmov     r12, [rsi+r9*8+32]\n\tmov     r8, [rsi+r9*8+40]\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8+32]\n\tsbb     r8, [rcx+r9*8+40]\n\tsbb     rbp, [rcx+r9*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r12, [rdx+r9*8+32]\n\tsbb     r8, [rdx+r9*8+40]\n\tsbb     rbp, [rdx+r9*8+48]\n\tmov     [rdi+r9*8+32], r12\n\tmov     [rdi+r9*8+40], r8\n\tmov     [rdi+r9*8+48], rbp\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tmov     r12, [rsi+r9*8+32]\n\tmov     r8, [rsi+r9*8+40]\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8+32]\n\tsbb     r8, [rcx+r9*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r12, [rdx+r9*8+32]\n\tsbb     r8, [rdx+r9*8+40]\n\tmov     [rdi+r9*8+32], r12\n\tmov     [rdi+r9*8+40], r8\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tmov     r12, [rsi+r9*8+32]\n\tadd     rax, 1\n\tsbb     r12, [rcx+r9*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r12, [rdx+r9*8+32]\n\tmov     [rdi+r9*8+32], r12\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rax, r10\n\tneg     rax\n\tpop     rbp\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/k8/sublsh1_n.as",
    "content": "\n;  mpn_sublsh1_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tret mpn_sublsh1_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t)\n\n\tGLOBAL_FUNC mpn_sublsh1_n\n\tpush    rbx\n\tlea     rdi, [rdi+rcx*8-56]\n\tlea     rsi, [rsi+rcx*8-56]\n\tlea     rdx, [rdx+rcx*8-56]\n\txor     rax, rax\n\txor     r10, r10\n\tmov     r8, 3\n\tsub     r8, rcx\n\tjge     L_skip\n\tadd     r8, 4\n\tmov     r11, [rsi+r8*8+24]\n\tmov     rcx, [rsi+r8*8+16]\n\tmov     r9, [rsi+r8*8]\n\tmov     rbx, [rsi+r8*8+8]\n\tjc      L_skiplp\n\talign   16\nL_lp:\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], r9\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rcx\n\tmov     [rdi+r8*8+24], r11\n\tmov     r11, [rsi+r8*8+56]\n\tmov     rcx, [rsi+r8*8+48]\n\tadd     r8, 4\n\tmov     r9, [rsi+r8*8]\n\tmov     rbx, [rsi+r8*8+8]\n\tjnc     L_lp\nL_skiplp:\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], r9\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rcx\n\tmov     [rdi+r8*8+24], r11\nL_skip:\n\tcmp     r8, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     rcx, [rsi+r8*8+48]\n\tmov     r9, [rsi+r8*8+32]\n\tmov     rbx, [rsi+r8*8+40]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rcx, [rdx+r8*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rcx, [rdx+r8*8+48]\n\tmov     [rdi+r8*8+32], r9\n\tmov     [rdi+r8*8+40], rbx\n\tmov     [rdi+r8*8+48], rcx\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbx\n\tret\n\talign   16\nL_case2:\n\tmov     r9, [rsi+r8*8+32]\n\tmov     rbx, [rsi+r8*8+40]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tmov     [rdi+r8*8+32], r9\n\tmov     [rdi+r8*8+40], rbx\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbx\n\tret\n\talign   16\nL_case1:\n\tmov     r9, [rsi+r8*8+32]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tmov     [rdi+r8*8+32], r9\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbx\n\tret\n\talign   16\nL_case0:\n\tadd     r10, 1\n\tsbb     rax, 0\n\tneg     rax\n\tpop     rbx\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/k8/sublsh_n.asm",
    "content": "dnl  AMD64 mpn_sublsh \n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t    carry+(xp,n)= (yp,n) - (zp,n)<<c with carry in ci\nC\tmp_limb_t\tmpn_sublsh_nc(mp_ptr xp, mp_srcptr yp,mp_srcptr zp,mp_size_t n,unsigned int c,mp_limb_t ci)\nC\txp in rdi\typ in rsi\tzp in rdx\tn  in rcx\tc  in r8\tci in r9\n\nMULFUNC_PROLOGUE(mpn_sublsh_n mpn_sublsh_nc)\n\nASM_START()\nPROLOGUE(mpn_sublsh_n)\nxor %r9,%r9\nEPILOGUE()\nPROLOGUE(mpn_sublsh_nc)\nmov\t%rcx,%r10\nlea\t(%rdi,%r10,8),%rdi\nlea\t(%rsi,%r10,8),%rsi\nlea\t(%rdx,%r10,8),%rdx\nmov\t%r8,%rcx\nneg\t%rcx\nshr\t%cl,%r9\nneg\t%r10\nxor\t%rax,%rax\ntest\t$3,%r10\njz\tnext\nlp:\n\tmov\t(%rdx,%r10,8),%r8\n\tmov\t%r8,%r11\n\tneg\t%rcx\n\tshl\t%cl,%r8\n\tneg\t%rcx\n\tshr\t%cl,%r11\n\tor\t%r9,%r8\n\tmov\t%r11,%r9\n\tadd\t$1,%rax\n\tmov\t(%rsi,%r10,8),%r11\n\tsbb\t%r8,%r11\n\tsbb\t%rax,%rax\n\tmov\t%r11,(%rdi,%r10,8)\n\tinc\t%r10\n\ttest\t$3,%r10\n\tjnz\tlp\nnext:\ncmp\t$0,%r10\njz\tend\npush\t%rbx\npush\t%rbp\npush\t%r12\npush\t%r13\npush\t%r14\npush\t%r15\nALIGN(16)\nloop:\n\tmov\t(%rdx,%r10,8),%r8\n\tmov\t8(%rdx,%r10,8),%rbp\n\tmov\t16(%rdx,%r10,8),%rbx\n\tmov\t24(%rdx,%r10,8),%r12\n\tmov\t%r8,%r11\n\tmov\t%rbp,%r13\n\tmov\t%rbx,%r14\n\tmov\t%r12,%r15\n\tneg\t%rcx\n\tshl\t%cl,%r8\n\tshl\t%cl,%rbp\n\tshl\t%cl,%rbx\n\tshl\t%cl,%r12\n\tneg\t%rcx\n\tshr\t%cl,%r11\n\tshr\t%cl,%r13\n\tshr\t%cl,%r14\n\tshr\t%cl,%r15\n\tor\t%r9,%r8\n\tor\t%r11,%rbp\n\tor\t%r13,%rbx\n\tor\t%r14,%r12\n\tmov\t%r15,%r9\n\tadd\t$1,%rax\n\tmov\t(%rsi,%r10,8),%r11\n\tmov\t8(%rsi,%r10,8),%r13\n\tmov\t16(%rsi,%r10,8),%r14\n\tmov\t24(%rsi,%r10,8),%r15\n\tsbb\t%r8,%r11\n\tsbb\t%rbp,%r13\n\tsbb\t%rbx,%r14\n\tsbb\t%r12,%r15\n\tsbb\t%rax,%rax\n\tmov\t%r11,(%rdi,%r10,8)\n\tmov\t%r13,8(%rdi,%r10,8)\n\tmov\t%r14,16(%rdi,%r10,8)\n\tmov\t%r15,24(%rdi,%r10,8)\n\tadd\t$4,%r10\n\tjnz\tloop\npop\t%r15\npop\t%r14\npop\t%r13\npop\t%r12\npop\t%rbp\npop\t%rbx\nend:\nneg\t%rax\nadd\t%r9,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/submul_1.asm",
    "content": "dnl  mpn_submul_1\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_submul_1)\nmov (%rsi),%rax\ncmp $1,%rdx\nje one\t\t\nmov $5,%r11\nlea -40(%rsi,%rdx,8),%rsi\nlea -40(%rdi,%rdx,8),%rdi\nsub %rdx,%r11\nmul %rcx\n.byte 0x26\nmov %rax,%r8\n.byte 0x26\nmov 8(%rsi,%r11,8),%rax\n.byte 0x26\nmov %rdx,%r9\n.byte 0x26\ncmp $0,%r11\n.byte 0x26\nmov %r12,-8(%rsp)\n.byte 0x26\njge skiplp\nlp:\txor %r10,%r10\n\tmul %rcx\n\tsub %r8,(%rdi,%r11,8)\n\tadc %rax,%r9\n\tadc %rdx,%r10\n\tmov 16(%rsi,%r11,8),%rax\n\tmul %rcx\n\tsub %r9,8(%rdi,%r11,8)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 24(%rsi,%r11,8),%rax\n\txor %r8,%r8\n\txor %r9,%r9\n\tmul %rcx\n\tsub %r10,16(%rdi,%r11,8)\n\tadc %rax,%r12\n\tadc %rdx,%r8\n\tmov 32(%rsi,%r11,8),%rax\n \tmul %rcx\n\tsub %r12,24(%rdi,%r11,8)\n\tadc %rax,%r8\n\tadc %rdx,%r9\n\tadd $4,%r11\n\tmov 8(%rsi,%r11,8),%rax\n\tjnc lp\nskiplp:\nxor %r10,%r10\nmul %rcx\nsub %r8,(%rdi,%r11,8)\nadc %rax,%r9\nadc %rdx,%r10\ncmp $2,%r11\nja case0\njz case1\njp case2\ncase3:\tmov 16(%rsi),%rax\n\tmul %rcx\n\tsub %r9,8(%rdi)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 24(%rsi),%rax\n\txor %r8,%r8\n\txor %r9,%r9\n\tmul %rcx\n\tsub %r10,16(%rdi)\n\tadc %rax,%r12\n\tadc %rdx,%r8\n\tmov 32(%rsi),%rax\n \tmul %rcx\n\tsub %r12,24(%rdi)\n\tadc %rax,%r8\n\tadc %rdx,%r9\n\tsub %r8,32(%rdi)\n\tadc $0,%r9\n\tmov -8(%rsp),%r12\n\tmov %r9,%rax\n\tret\ncase2:\tmov 24(%rsi),%rax\n\tmul %rcx\n\tsub %r9,16(%rdi)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 32(%rsi),%rax\n\txor %r8,%r8\n\t#xor %r9,%r9\n\tmul %rcx\n\tsub %r10,24(%rdi)\n\tadc %rax,%r12\n\tadc %rdx,%r8\n\tsub %r12,32(%rdi)\n\tadc $0,%r8\n\tmov -8(%rsp),%r12\n\tmov %r8,%rax\n\tret\nALIGN(16)\ncase1:\tmov 32(%rsi),%rax\n\tmul %rcx\n\tsub %r9,24(%rdi)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tsub %r10,32(%rdi)\n\tadc $0,%r12\n\tmov %r12,%rax\n\tmov -8(%rsp),%r12\n\tret\none:\tmul %rcx\n\tsub %rax,(%rdi)\n\tadc $0,%rdx\n\tmov %rdx,%rax\n\tret\ncase0:\tsub %r9,32(%rdi)\n\tadc $0,%r10\n\tmov -8(%rsp),%r12\n\tmov %r10,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/k8/sumdiff_n.as",
    "content": "\n;  AMD64 mpn_sumdiff_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi, r8) = (rdx, r8) + (rcx, r8)  (rsi, r8) = (rdx, r8) - (rcx, r8)\n;\treturn 2*add_carry + sub_borrow\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_sumdiff_n\n\tlea     rsi, [rsi+r8*8]\n\tlea     rdx, [rdx+r8*8]\n\tlea     rdi, [rdi+r8*8]\n\tlea     rcx, [rcx+r8*8]\n\tneg     r8\n\txor     r9, r9\n\txor     r10, r10\n\ttest    r8, 3\n\tjz      next\nlp1:\n\tmov     rax, [rdx+r8*8]\n\tmov     r11, rax\n\tadd     r9, 1\n\tadc     rax, [rcx+r8*8]\n\tsbb     r9, r9\n\tadd     r10, 1\n\tsbb     r11, [rcx+r8*8]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], rax\n\tmov     [rsi+r8*8], r11\n\tinc     r8\n\ttest    r8, 3\n\tjnz     lp1\nnext:\n\tcmp     r8, 0\n\tjz      skiploop\n\tmov     [rsp-8], rbx\n\tmov     [rsp-16], rbp\n\tmov     [rsp-24], r12\n\tmov     [rsp-32], r13\n\tmov     [rsp-40], r14\n\tmov     [rsp-48], r15\n\talign   16\nloop1:\n\tmov     rax, [rdx+r8*8]\n\tmov     rbx, [rdx+r8*8+8]\n\tmov     rbp, [rdx+r8*8+16]\n\tmov     r12, [rdx+r8*8+24]\n\tmov     r11, rax\n\tmov     r13, rbx\n\tmov     r14, rbp\n\tmov     r15, r12\n\tadd     r9, 1\n\tadc     rax, [rcx+r8*8]\n\tadc     rbx, [rcx+r8*8+8]\n\tadc     rbp, [rcx+r8*8+16]\n\tadc     r12, [rcx+r8*8+24]\n\tsbb     r9, r9\n\tadd     r10, 1\n\tsbb     r11, [rcx+r8*8]\n\tsbb     r13, [rcx+r8*8+8]\n\tsbb     r14, [rcx+r8*8+16]\n\tsbb     r15, [rcx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], rax\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rbp\n\tmov     [rdi+r8*8+24], r12\n\tmov     [rsi+r8*8], r11\n\tmov     [rsi+r8*8+8], r13\n\tmov     [rsi+r8*8+16], r14\n\tmov     [rsi+r8*8+24], r15\n\tadd     r8, 4\n\tjnz     loop1\n\tmov     rbx, [rsp-8]\n\tmov     rbp, [rsp-16]\n\tmov     r12, [rsp-24]\n\tmov     r13, [rsp-32]\n\tmov     r14, [rsp-40]\n\tmov     r15, [rsp-48]\nskiploop:\n\tlea     rax, [r10+r9*2]\n\tneg     rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/xnor_n.as",
    "content": "\n;  AMD64 mpn_xnor_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_xnor_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign   16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\txor     r8, [rsi+rcx*8+24]\n\txor     r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\txor     r10, [rsi+rcx*8+8]\n\txor     r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tdec     rcx\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/k8/xor_n.asm",
    "content": "dnl  mpn_xor_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_xor_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t)\nC\trax              rdi,   rsi,    rdx,  rcx\n\nASM_START()\nPROLOGUE(mpn_xor_n)\nsub $4,%rcx\njc skiplp\nALIGN(16)\nlp:\n\tmov (%rdx),%r8\n\tmov 8(%rdx),%r9\n\txor (%rsi),%r8\n\txor 8(%rsi),%r9\n\tlea 32(%rsi),%rsi\n\tmov 16(%rdx),%r10\n\tmov 24(%rdx),%r11\n\tlea 32(%rdi),%rdi\n\txor 16-32(%rsi),%r10\n\txor 24-32(%rsi),%r11\n\tsub $4,%rcx\n\tmov %r8,-32(%rdi)\n\tmov %r9,8-32(%rdi)\n\tmov %r10,16-32(%rdi)\n\tlea 32(%rdx),%rdx\n\tmov %r11,24-32(%rdi)\n\tjnc lp\nskiplp:\ncmp $-2,%rcx\nje case2\njp case1\njl case0\ncase3:\tmov 8(%rdx,%rcx,8),%rax\n\txor 8(%rsi,%rcx,8),%rax\n\tmov %rax,8(%rdi,%rcx,8)\ncase2:\tmov 16(%rdx,%rcx,8),%rax\n\txor 16(%rsi,%rcx,8),%rax\n\tmov %rax,16(%rdi,%rcx,8)\ncase1:\tmov 24(%rdx,%rcx,8),%rax\n\txor 24(%rsi,%rcx,8),%rax\n\tmov %rax,24(%rdi,%rcx,8)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/longlong_inc.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 Free Software Foundation, Inc.\n\nCopyright 2013 William Hart\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#if defined (__GNUC__) || defined(__INTEL_COMPILER)\n\n#define add_ssaaaa(sh, sl, ah, al, bh, bl) \\\n  __asm__ (\"addq %5,%q1\\n\\tadcq %3,%q0\"\t\t\t\t\t\\\n\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t   : \"0\"  ((UDItype)(ah)), \"rme\" ((UDItype)(bh)),\t\t\\\n\t     \"%1\" ((UDItype)(al)), \"rme\" ((UDItype)(bl)))\n#define sub_ddmmss(sh, sl, ah, al, bh, bl) \\\n  __asm__ (\"subq %5,%q1\\n\\tsbbq %3,%q0\"\t\t\t\t\t\\\n\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t   : \"0\" ((UDItype)(ah)), \"rme\" ((UDItype)(bh)),\t\t\\\n\t     \"1\" ((UDItype)(al)), \"rme\" ((UDItype)(bl)))\n#define umul_ppmm(w1, w0, u, v) \\\n  __asm__ (\"mulq %3\"\t\t\t\t\t\t\t\\\n\t   : \"=a\" (w0), \"=d\" (w1)\t\t\t\t\t\\\n\t   : \"%0\" ((UDItype)(u)), \"rm\" ((UDItype)(v)))\n#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding \"=d\" */\\\n  __asm__ (\"divq %4\"\t\t     /* stringification in K&R C */\t\\\n\t   : \"=a\" (q), \"=d\" (r)\t\t\t\t\t\t\\\n\t   : \"0\" ((UDItype)(n0)), \"1\" ((UDItype)(n1)), \"rm\" ((UDItype)(dx)))\n#define add_333(sh, sm, sl, ah, am, al, bh, bm, bl)  \\\n  __asm__ (\"addq %8,%q2\\n\\tadcq %6,%q1\\n\\tadcq %4,%q0\"     \\\n       : \"=r\" (sh), \"=r\" (sm), \"=&r\" (sl)                  \\\n       : \"0\"  ((UDItype)(ah)), \"rme\" ((UDItype)(bh)),  \\\n         \"1\"  ((UDItype)(am)), \"rme\" ((UDItype)(bm)),  \\\n         \"2\"  ((UDItype)(al)), \"rme\" ((UDItype)(bl)))  \n#define sub_333(sh, sm, sl, ah, am, al, bh, bm, bl)  \\\n  __asm__ (\"subq %8,%q2\\n\\tsbbq %6,%q1\\n\\tsbbq %4,%q0\"     \\\n       : \"=r\" (sh), \"=r\" (sm), \"=&r\" (sl)                  \\\n       : \"0\"  ((UDItype)(ah)), \"rme\" ((UDItype)(bh)),  \\\n         \"1\"  ((UDItype)(am)), \"rme\" ((UDItype)(bm)),  \\\n         \"2\"  ((UDItype)(al)), \"rme\" ((UDItype)(bl)))  \n\n/* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */\n#define count_leading_zeros(count, x)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UDItype __cbtmp;\t\t\t\t\t\t\t\\\n    ASSERT ((x) != 0);\t\t\t\t\t\t\t\\\n    __asm__ (\"bsrq %1,%0\" : \"=r\" (__cbtmp) : \"rm\" ((UDItype)(x)));\t\\\n    (count) = __cbtmp ^ 63;\t\t\t\t\t\t\\\n  } while (0)\n/* bsfq destination must be a 64-bit register, \"%q0\" forces this in case\n   count is only an int. */\n#define count_trailing_zeros(count, x)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    ASSERT ((x) != 0);\t\t\t\t\t\t\t\\\n    __asm__ (\"bsfq %1,%q0\" : \"=r\" (count) : \"rm\" ((UDItype)(x)));\t\\\n  } while (0)\n\n#endif\n\n#if !defined(BSWAP_LIMB) && defined (__GNUC__) \n#define BSWAP_LIMB(dst, src)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    __asm__ (\"bswap %q0\" : \"=r\" (dst) : \"0\" (src));\t\t\t\\\n  } while (0)\n#endif\n"
  },
  {
    "path": "mpn/x86_64/modexact_1c_odd.as",
    "content": ";  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software\n;  Foundation, Inc.\n;\n;  Copyright 2008 William Hart\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n;               cycles/limb\n; Hammer:         10\n; Prescott/Nocona:   33\n\n; mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,\n;                               mp_limb_t divisor);\n; mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,\n;                                mp_limb_t divisor, mp_limb_t carry);\n;\n; The dependent chain in the main loop is\n;\n;                            cycles\n; sub   rax, r8 1\n; imul  rax, r9 4\n; mul   rsi     5\n;            ----\n; total        10\n;\n; The movq load from src seems to need to be scheduled back before the jz to\n; achieve this speed, out-of-order execution apparently can't completely\n; hide the latency otherwise.\n;\n; The l=src[i]-cbit step is rotated back too, since that allows us to avoid\n; it for the first iteration (where there's no cbit).\n;\n; The code alignment used (32-byte) for the loop also seems necessary.\n; Without that the non-PIC case has adcq crossing the 0x60 offset,\n; apparently making it run at 11 cycles instead of 10.\n;\n; Not done:\n;\n; divq for size==1 was measured at about 79 cycles, compared to the inverse\n; at about 25 cycles (both including function call overheads), so that's not\n; used.\n;\n; Enhancements:\n;\n; For PIC, we shouldn't really need the GOT fetch for modlimb_invert_table,\n; it'll be in rodata or text in libmpir.so and can be accessed directly %rip\n; relative.  This would be for small model only (something we don't\n; presently detect, but which is all that gcc 3.3.3 supports), since 8-byte\n; PC-relative relocations are apparently not available.  Some rough\n; experiments with binutils 2.13 looked worrylingly like it might come out\n; with an unwanted text segment relocation though, even with \".protected\".\n\n;  AMD64 mpn_modexact_1_odd -- exact division style remainder.\n;\n; mp_limb_t mpn_modexact_1_odd (\n;  mp_srcptr src,           rdi\n;  mp_size_t size,          rsi\n;  mp_limb_t divisor        rdx\n; );\n; mp_limb_t mpn_modexact_1c_odd (\n;  mp_srcptr src,           rdi\n;  mp_size_t size,          rsi\n;  mp_limb_t divisor,       rdx\n;  mp_limb_t carry          rcx\n; );\n;\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n    align   32\n\n    G_EXTERN __gmp_modlimb_invert_table\n\nGLOBAL_FUNC mpn_modexact_1_odd\n    mov      ecx, 0       ; carry\n\nGLOBAL_FUNC mpn_modexact_1c_odd\n    mov      r8, rdx\n    shr      edx, 1\n\n%ifdef GSYM_PREFIX\n%define mod_table ___gmp_modlimb_invert_table\n%else\n%define mod_table __gmp_modlimb_invert_table\n%endif\n\n    ; first use Newton's iteration to invert the divisor limb (d) using \n    ; f(x) = 1/x - d  and x[i+1] = x[i] - f(x[i]) / f'(x[i]) to give\n    ; the iteration formula: x[i+1] = x[i] * (2 - d * x[i])\n \n%ifdef PIC\n    mov      r9, [mod_table wrt rip wrt ..gotpcrel]\n%else\n    lea      r9, [mod_table wrt rip]\n%endif\n\n    and      edx, 127\n    mov      r10, rcx\n\n    movzx    edx, byte [rdx+r9] ; inv -> rdx (8-bit approx)\n    \n    mov      rax, [rdi]         ; first limb of numerator\n    lea      r11, [rdi+rsi*8]   ; pointer to top of src\n    mov      rdi, r8            ; save divisor\n\n    lea      ecx, [rdx+rdx] \n    imul     rdx, rdx\n\n    neg      rsi                ; limb offset from top of src\n \n    imul     edx, edi\n\n    sub      ecx, edx           ; inv -> rcx (16-bit approx)\n    \n    lea      edx, [rcx+rcx]\n    imul     ecx, ecx\n\n    imul     ecx, edi\n\n    sub      edx, ecx           ; inv -> rdx (32-bit approx)\n    xor      ecx, ecx\n\n    lea      r9, [rdx+rdx]\n    imul     rdx, rdx\n\n    imul     rdx, r8\n\n    sub      r9, rdx            ; inv -> r10 (64-bit approx)\n    mov      rdx, r10           ; intial carry -> rdx\n\n    inc      rsi                ; adjust limb offset\n    jz       label1\n    \n    align    16\nlabel0:                         ; now multiply through by inverse in loop\n    sub      rax, rdx\n    \n    adc      rcx, 0\n    imul     rax, r9\n\n    mul      r8\n\n    mov      rax, [r11+rsi*8]\n    sub      rax, rcx\n    setc     cl\n\n    inc      rsi\n    jnz      label0\n\nlabel1:                         ; do final multiply\n    sub      rax, rdx\n   \n    adc      rcx, 0\n    imul     rax, r9\n\n    mul      r8\n    lea      rax, [rcx+rdx]     ; return remainder\n    ret\n"
  },
  {
    "path": "mpn/x86_64/mullow_n_basecase.asm",
    "content": "dnl  AMD64 mpn_mullo_basecase.\n\ndnl  Contributed to the GNU project by Torbjorn Granlund.\n\ndnl  Copyright 2008, 2009, 2011, 2012 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of either:\ndnl\ndnl    * the GNU Lesser General Public License as published by the Free\ndnl      Software Foundation; either version 3 of the License, or (at your\ndnl      option) any later version.\ndnl\ndnl  or\ndnl\ndnl    * the GNU General Public License as published by the Free Software\ndnl      Foundation; either version 2 of the License, or (at your option) any\ndnl      later version.\ndnl\ndnl  or both in parallel, as here.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\ndnl  for more details.\ndnl\ndnl  You should have received copies of the GNU General Public License and the\ndnl  GNU Lesser General Public License along with the GNU MP Library.  If not,\ndnl  see https://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC The inner loops of this code are the result of running a code generation and\nC optimisation tool suite written by David Harvey and Torbjorn Granlund.\n\nC NOTES\nC   * There is a major stupidity in that we call mpn_mul_1 initially, for a\nC     large trip count.  Instead, we should start with mul_2 for any operand\nC     size congruence class.\nC   * Stop iterating addmul_2 earlier, falling into straight-line triangle code\nC     for the last 2-3 iterations.\nC   * Perhaps implement n=4 special code.\nC   * The reload of the outer loop jump address hurts branch prediction.\nC   * The addmul_2 loop ends with an MUL whose high part is not used upon loop\nC     exit.\n\nC INPUT PARAMETERS\ndefine(`rp',\t   `%rdi')\ndefine(`up',\t   `%rsi')\ndefine(`vp_param', `%rdx')\ndefine(`n',\t   `%rcx')\n\ndefine(`vp',\t`%r11')\ndefine(`outer_addr', `%r8')\ndefine(`j',\t`%r9')\ndefine(`v0',\t`%r13')\ndefine(`v1',\t`%r14')\ndefine(`w0',\t`%rbx')\ndefine(`w032',\t`%ebx')\ndefine(`w1',\t`%r15')\ndefine(`w132',\t`%r15d')\ndefine(`w2',\t`%rbp')\ndefine(`w232',\t`%ebp')\ndefine(`w3',\t`%r10')\ndefine(`w332',\t`%r10d')\n\nASM_START()\n\tTEXT\n\tALIGN(16)\nPROLOGUE(mpn_mullow_n_basecase)\n\tcmp\t$4, n\n\tjge\tL(lgen)\n\tmov\t(up), %rax\t\tC u0\n\tmov\t(vp_param), %r8\t\tC v0\n\n\tlea\tL(ltab)(%rip), %r9\nifdef(`PIC',\n`\tmovslq\t(%r9,%rcx,4), %r10\n\tadd\t%r10, %r9\n\tjmp\t*%r9\n',`\n\tjmp\t*(%r9,n,8)\n')\n\tJUMPTABSECT\n\tALIGN(8)\nL(ltab):\tJMPENT(\tL(ltab), L(ltab))\t\t\tC not allowed\n\tJMPENT(\tL(l1), L(ltab))\t\t\tC 1\n\tJMPENT(\tL(l2), L(ltab))\t\t\tC 2\n\tJMPENT(\tL(l3), L(ltab))\t\t\tC 3\ndnl\tJMPENT(\tL(l0m4), L(ltab))\t\tC 4\ndnl\tJMPENT(\tL(l1m4), L(ltab))\t\tC 5\ndnl\tJMPENT(\tL(l2m4), L(ltab))\t\tC 6\ndnl\tJMPENT(\tL(l3m4), L(ltab))\t\tC 7\ndnl\tJMPENT(\tL(l0m4), L(ltab))\t\tC 8\ndnl\tJMPENT(\tL(l1m4), L(ltab))\t\tC 9\ndnl\tJMPENT(\tL(l2m4), L(ltab))\t\tC 10\ndnl\tJMPENT(\tL(l3m4), L(ltab))\t\tC 11\n\tTEXT\n\nL(l1):\timul\t%r8, %rax\n\tmov\t%rax, (rp)\n\tret\n\nL(l2):\tmov\t8(vp_param), %r11\n\timul\t%rax, %r11\t\tC u0 x v1\n\tmul\t%r8\t\t\tC u0 x v0\n\tmov\t%rax, (rp)\n\timul\t8(up), %r8\t\tC u1 x v0\n\tlea\t(%r11, %rdx), %rax\n\tadd\t%r8, %rax\n\tmov\t%rax, 8(rp)\n\tret\n\nL(l3):\tmov\t8(vp_param), %r9\tC v1\n\tmov\t16(vp_param), %r11\n\tmul\t%r8\t\t\tC u0 x v0 -> <r1,r0>\n\tmov\t%rax, (rp)\t\tC r0\n\tmov\t(up), %rax\t\tC u0\n\tmov\t%rdx, %rcx\t\tC r1\n\tmul\t%r9\t\t\tC u0 x v1 -> <r2,r1>\n\timul\t8(up), %r9\t\tC u1 x v1 -> r2\n\tmov\t16(up), %r10\n\timul\t%r8, %r10\t\tC u2 x v0 -> r2\n\tadd\t%rax, %rcx\n\tadc\t%rdx, %r9\n\tadd\t%r10, %r9\n\tmov\t8(up), %rax\t\tC u1\n\tmul\t%r8\t\t\tC u1 x v0 -> <r2,r1>\n\tadd\t%rax, %rcx\n\tadc\t%rdx, %r9\n\tmov\t%r11, %rax\n\timul\t(up), %rax\t\tC u0 x v2 -> r2\n\tadd\t%rax, %r9\n\tmov\t%rcx, 8(rp)\n\tmov\t%r9, 16(rp)\n\tret\n\nL(l0m4):\nL(l1m4):\nL(l2m4):\nL(l3m4):\nL(lgen): push\t%rbx\n\tpush\t%rbp\n\tpush\t%r13\n\tpush\t%r14\n\tpush\t%r15\n\n\tmov\t(up), %rax\n\tmov\t(vp_param), v0\n\tmov\tvp_param, vp\n\n\tlea\t(rp,n,8), rp\n\tlea\t(up,n,8), up\n\tneg\tn\n\n\tmul\tv0\n\n\ttest\t$1, R8(n)\n\tjz\tL(lmul_2)\n\nL(lmul_1):\n\tlea\t-8(rp), rp\n\tlea\t-8(up), up\n\ttest\t$2, R8(n)\n\tjnz\tL(lmul_1_prologue_3)\n\nL(lmul_1_prologue_2):\t\tC n = 7, 11, 15, ...\n\tlea\t-1(n), j\n\tlea\tL(laddmul_outer_1)(%rip), outer_addr\n\tmov\t%rax, w0\n\tmov\t%rdx, w1\n\txor\tw232, w232\n\txor\tw332, w332\n\tmov\t16(up,n,8), %rax\n\tjmp\tL(lmul_1_entry_2)\n\nL(lmul_1_prologue_3):\t\tC n = 5, 9, 13, ...\n\tlea\t1(n), j\n\tlea\tL(laddmul_outer_3)(%rip), outer_addr\n\tmov\t%rax, w2\n\tmov\t%rdx, w3\n\txor\tw032, w032\n\tjmp\tL(lmul_1_entry_0)\n\n\tALIGN(16)\nL(lmul_1_top):\n\tmov\tw0, -16(rp,j,8)\n\tadd\t%rax, w1\n\tmov\t(up,j,8), %rax\n\tadc\t%rdx, w2\n\txor\tw032, w032\n\tmul\tv0\n\tmov\tw1, -8(rp,j,8)\n\tadd\t%rax, w2\n\tadc\t%rdx, w3\nL(lmul_1_entry_0):\n\tmov\t8(up,j,8), %rax\n\tmul\tv0\n\tmov\tw2, (rp,j,8)\n\tadd\t%rax, w3\n\tadc\t%rdx, w0\n\tmov\t16(up,j,8), %rax\n\tmul\tv0\n\tmov\tw3, 8(rp,j,8)\n\txor\tw232, w232\tC zero\n\tmov\tw2, w3\t\t\tC zero\n\tadd\t%rax, w0\n\tmov\t24(up,j,8), %rax\n\tmov\tw2, w1\t\t\tC zero\n\tadc\t%rdx, w1\nL(lmul_1_entry_2):\n\tmul\tv0\n\tadd\t$4, j\n\tjs\tL(lmul_1_top)\n\n\tmov\tw0, -16(rp)\n\tadd\t%rax, w1\n\tmov\tw1, -8(rp)\n\tadc\t%rdx, w2\n\n\timul\t(up), v0\n\tadd\tv0, w2\n\tmov\tw2, (rp)\n\n\tadd\t$1, n\n\tjz\tL(lret)\n\n\tmov\t8(vp), v0\n\tmov\t16(vp), v1\n\n\tlea\t16(up), up\n\tlea\t8(vp), vp\n\tlea\t24(rp), rp\n\n\tjmp\t*outer_addr\n\n\nL(lmul_2):\n\tmov\t8(vp), v1\n\ttest\t$2, R8(n)\n\tjz\tL(lmul_2_prologue_3)\n\n\tALIGN(16)\nL(lmul_2_prologue_1):\n\tlea\t0(n), j\n\tmov\t%rax, w3\n\tmov\t%rdx, w0\n\txor\tw132, w132\n\tmov\t(up,n,8), %rax\n\tlea\tL(laddmul_outer_3)(%rip), outer_addr\n\tjmp\tL(lmul_2_entry_1)\n\n\tALIGN(16)\nL(lmul_2_prologue_3):\n\tlea\t2(n), j\n\tmov\t$0, w332\n\tmov\t%rax, w1\n\tmov\t(up,n,8), %rax\n\tmov\t%rdx, w2\n\tlea\tL(laddmul_outer_1)(%rip), outer_addr\n\tjmp\tL(lmul_2_entry_3)\n\n\tALIGN(16)\nL(lmul_2_top):\n\tmov\t-32(up,j,8), %rax\n\tmul\tv1\n\tadd\t%rax, w0\n\tadc\t%rdx, w1\n\tmov\t-24(up,j,8), %rax\n\txor\tw232, w232\n\tmul\tv0\n\tadd\t%rax, w0\n\tmov\t-24(up,j,8), %rax\n\tadc\t%rdx, w1\n\tadc\t$0, w232\n\tmul\tv1\n\tadd\t%rax, w1\n\tmov\tw0, -24(rp,j,8)\n\tadc\t%rdx, w2\n\tmov\t-16(up,j,8), %rax\n\tmul\tv0\n\tmov\t$0, w332\n\tadd\t%rax, w1\n\tadc\t%rdx, w2\n\tmov\t-16(up,j,8), %rax\n\tadc\t$0, w332\nL(lmul_2_entry_3):\n\tmov\t$0, w032\n\tmov\tw1, -16(rp,j,8)\n\tmul\tv1\n\tadd\t%rax, w2\n\tmov\t-8(up,j,8), %rax\n\tadc\t%rdx, w3\n\tmov\t$0, w132\n\tmul\tv0\n\tadd\t%rax, w2\n\tmov\t-8(up,j,8), %rax\n\tadc\t%rdx, w3\n\tadc\tw132, w032\n\tmul\tv1\n\tadd\t%rax, w3\n\tmov\tw2, -8(rp,j,8)\n\tadc\t%rdx, w0\n\tmov\t(up,j,8), %rax\n\tmul\tv0\n\tadd\t%rax, w3\n\tadc\t%rdx, w0\n\tadc\t$0, w132\nL(lmul_2_entry_1):\n\tadd\t$4, j\n\tmov\tw3, -32(rp,j,8)\n\tjs\tL(lmul_2_top)\n\n\timul\t-16(up), v1\n\tadd\tv1, w0\n\timul\t-8(up), v0\n\tadd\tv0, w0\n\tmov\tw0, -8(rp)\n\n\tadd\t$2, n\n\tjz\tL(lret)\n\n\tmov\t16(vp), v0\n\tmov\t24(vp), v1\n\n\tlea\t16(vp), vp\n\tlea\t16(rp), rp\n\n\tjmp\t*outer_addr\n\n\nL(laddmul_outer_1):\n\tlea\t-2(n), j\n\tmov\t-16(up,n,8), %rax\n\tmul\tv0\n\tmov\t%rax, w3\n\tmov\t-16(up,n,8), %rax\n\tmov\t%rdx, w0\n\txor\tw132, w132\n\tlea\tL(laddmul_outer_3)(%rip), outer_addr\n\tjmp\tL(laddmul_entry_1)\n\nL(laddmul_outer_3):\n\tlea\t0(n), j\n\tmov\t-16(up,n,8), %rax\n\txor\tw332, w332\n\tmul\tv0\n\tmov\t%rax, w1\n\tmov\t-16(up,n,8), %rax\n\tmov\t%rdx, w2\n\tlea\tL(laddmul_outer_1)(%rip), outer_addr\n\tjmp\tL(laddmul_entry_3)\n\n\tALIGN(16)\nL(laddmul_top):\n\tadd\tw3, -32(rp,j,8)\n\tadc\t%rax, w0\n\tmov\t-24(up,j,8), %rax\n\tadc\t%rdx, w1\n\txor\tw232, w232\n\tmul\tv0\n\tadd\t%rax, w0\n\tmov\t-24(up,j,8), %rax\n\tadc\t%rdx, w1\n\tadc\tw232, w232\n\tmul\tv1\n\txor\tw332, w332\n\tadd\tw0, -24(rp,j,8)\n\tadc\t%rax, w1\n\tmov\t-16(up,j,8), %rax\n\tadc\t%rdx, w2\n\tmul\tv0\n\tadd\t%rax, w1\n\tmov\t-16(up,j,8), %rax\n\tadc\t%rdx, w2\n\tadc\t$0, w332\nL(laddmul_entry_3):\n\tmul\tv1\n\tadd\tw1, -16(rp,j,8)\n\tadc\t%rax, w2\n\tmov\t-8(up,j,8), %rax\n\tadc\t%rdx, w3\n\tmul\tv0\n\txor\tw032, w032\n\tadd\t%rax, w2\n\tadc\t%rdx, w3\n\tmov\t$0, w132\n\tmov\t-8(up,j,8), %rax\n\tadc\tw132, w032\n\tmul\tv1\n\tadd\tw2, -8(rp,j,8)\n\tadc\t%rax, w3\n\tadc\t%rdx, w0\n\tmov\t(up,j,8), %rax\n\tmul\tv0\n\tadd\t%rax, w3\n\tmov\t(up,j,8), %rax\n\tadc\t%rdx, w0\n\tadc\t$0, w132\nL(laddmul_entry_1):\n\tmul\tv1\n\tadd\t$4, j\n\tjs\tL(laddmul_top)\n\n\tadd\tw3, -32(rp)\n\tadc\t%rax, w0\n\n\timul\t-24(up), v0\n\tadd\tv0, w0\n\tadd\tw0, -24(rp)\n\n\tadd\t$2, n\n\tjns\tL(lret)\n\n\tlea\t16(vp), vp\n\n\tmov\t(vp), v0\n\tmov\t8(vp), v1\n\n\tlea\t-16(up), up\n\n\tjmp\t*outer_addr\n\nL(lret):\tpop\t%r15\n\tpop\t%r14\n\tpop\t%r13\n\tpop\t%rbp\n\tpop\t%rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/mulmid_basecase.asm",
    "content": "dnl  AMD64 mpn_mulmid_basecase\n\ndnl  Based on mul_basecase.asm from GMP 4.3.1, modifications are copyright\ndnl  (C) 2009, David Harvey. The original mul_basecase.asm was released under\ndnl  LGPLv3+, license terms reproduced below. These modifications are hereby\ndnl  released under the same terms.\n\ndnl  ========= Original license terms:\n\ndnl  Contributed to the GNU project by Torbjorn Granlund and David Harvey.\n\ndnl  Copyright 2008 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ndnl  ========= end license terms\n\n\ninclude(`../config.m4')\n\nC\t     cycles/limb\nC K8,K9:\t 2.375  (2.5 when un - vn is \"small\")\nC K10:\t\t ?\nC P4:\t\t ?\nC P6-15:\t ?\n\nC INPUT PARAMETERS\ndefine(`rp',      `%rdi')\ndefine(`up',      `%rsi')\ndefine(`un_param',`%rdx')\ndefine(`vp_param',`%rcx')\ndefine(`vn',      `%r8')\ndefine(`vn32',    `%r8d')\n\ndefine(`v0', `%r12')\ndefine(`v1', `%r9')\n\ndefine(`w0', `%rbx')\ndefine(`w1', `%rcx')\ndefine(`w2', `%rbp')\ndefine(`w3', `%r10')\ndefine(`w032', `%ebx')\ndefine(`w132', `%ecx')\ndefine(`w232', `%ebp')\ndefine(`w332', `%r10d')\n\ndefine(`n',  `%r11')\ndefine(`outer_addr', `%r14')\ndefine(`un',  `%r13')\ndefine(`un32',`%r13d')\ndefine(`vp',  `%r15')\n\ndefine(`vp_inner', `%r10')\n\n\nASM_START()\n\tTEXT\n\tALIGN(16)\nPROLOGUE(mpn_mulmid_basecase)\n\tpush\t%rbx\n\tpush\t%rbp\n\tpush\t%r12\n\tpush\t%r13\n\tpush\t%r14\n\tpush\t%r15\n\n\tmov\tvp_param, vp\n\n\tC use un for row length (= un_param - vn + 1)\n\tlea\t1(un_param), un\n\tsub\tvn, un\n\n\tlea\t(rp,un,8), rp\n\n\tcmp\t$4, un\t\tC FIXME: needs tuning\n\tjc\tL(diagonal)\n\n\tlea\t(up,un_param,8), up\n\n\ttest\t$1, vn\n\tjz\tL(mul_2)\n\nC ===========================================================\nC     mul_1 for vp[0] if vn is odd\n\nL(mul_1):\t\n\tmov\tun32, w032\n\n\tneg\tun\n\tmov\t(up,un,8), %rax\n\tmov\t(vp), v0\n\tmul\tv0\n\n\tand\t$-4, un\t\tC round down to multiple of 4\n\tmov\tun, n\n\n\tand\t$3, w032\n\tjz\tL(mul_1_prologue_0)\n\tcmp\t$2, w032\n\tjc\tL(mul_1_prologue_1)\n\tjz\tL(mul_1_prologue_2)\n\nL(mul_1_prologue_3):\n\tmov\t%rax, w3\n\tmov\t%rdx, w0\n\tlea\tL(addmul_prologue_3)(%rip), outer_addr\n\tjmp\tL(mul_1_entry_3)\n\t\n\tALIGN(16)\nL(mul_1_prologue_0):\n\tmov\t%rax, w2\n\tmov\t%rdx, w3\t\tC note already w0 == 0\n\tlea\tL(addmul_prologue_0)(%rip), outer_addr\n\tjmp\tL(mul_1_entry_0)\n\n\tALIGN(16)\nL(mul_1_prologue_1):\n\tadd\t$4, n\n\tmov\t%rax, w1\n\tmov\t%rdx, w2\n\tmov\t$0, w332\n\tmov\t(up,n,8), %rax\n\tlea\tL(addmul_prologue_1)(%rip), outer_addr\n\tjmp\tL(mul_1_entry_1)\n\n\tALIGN(16)\nL(mul_1_prologue_2):\n\tmov\t%rax, w0\n\tmov\t%rdx, w1\n\tmov\t24(up,n,8), %rax\n\tmov\t$0, w232\n\tmov\t$0, w332\n\tlea\tL(addmul_prologue_2)(%rip), outer_addr\n\tjmp\tL(mul_1_entry_2)\n\n\t\n\tC this loop is 10 c/loop = 2.5 c/l on K8\n\n\tALIGN(16)\nL(mul_1_top):\n\tmov\tw0, -16(rp,n,8)\n\tadd\t%rax, w1\n\tmov\t(up,n,8), %rax\n\tadc\t%rdx, w2\nL(mul_1_entry_1):\n\tmov\t$0, w032\n\tmul\tv0\n\tmov\tw1, -8(rp,n,8)\n\tadd\t%rax, w2\n\tadc\t%rdx, w3\nL(mul_1_entry_0):\n\tmov\t8(up,n,8), %rax\n\tmul\tv0\n\tmov\tw2, (rp,n,8)\n\tadd\t%rax, w3\n\tadc\t%rdx, w0\nL(mul_1_entry_3):\n\tmov\t16(up,n,8), %rax\n\tmul\tv0\n\tmov\tw3, 8(rp,n,8)\n\tmov\t$0, w232\t\tC zero\n\tmov\tw2, w3\t\t\tC zero\n\tadd\t%rax, w0\n\tmov\t24(up,n,8), %rax\n\tmov\tw2, w1\t\t\tC zero\n\tadc\t%rdx, w1\nL(mul_1_entry_2):\n\tmul\tv0\n\tadd\t$4, n\n\tjs\tL(mul_1_top)\n\n\tmov\tw0, -16(rp)\n\tadd\t%rax, w1\n\tmov\tw1, -8(rp)\n\tmov\tw2, 8(rp)\t\tC zero last limb of output\n\tadc\t%rdx, w2\n\tmov\tw2, (rp)\n\n\tdec\tvn\n\tjz\tL(ret)\n\n\tlea\t-8(up), up\n\tlea\t8(vp), vp\n\t\n\tmov\tun, n\n\tmov\t(vp), v0\n\tmov\t8(vp), v1\n\t\n\tjmp\t*outer_addr\n\nC ===========================================================\nC     mul_2 for vp[0], vp[1] if vn is even\n\n\tALIGN(16)\nL(mul_2):\n\tmov\tun32, w032\n\n\tneg\tun\n\tmov\t-8(up,un,8), %rax\n\tmov\t(vp), v0\n\tmov\t8(vp), v1\n\tmul\tv1\n\n\tand\t$-4, un\t\tC round down to multiple of 4\n\tmov\tun, n\n\n\tand\t$3, w032\n\tjz\tL(mul_2_prologue_0)\n\tcmp\t$2, w032\n\tjc\tL(mul_2_prologue_1)\n\tjz\tL(mul_2_prologue_2)\n\nL(mul_2_prologue_3):\n\tmov\t%rax, w1\n\tmov\t%rdx, w2\n\tlea\tL(addmul_prologue_3)(%rip), outer_addr\n\tjmp\tL(mul_2_entry_3)\n\t\n\tALIGN(16)\nL(mul_2_prologue_0):\n\tmov\t%rax, w0\n\tmov\t%rdx, w1\n\tlea\tL(addmul_prologue_0)(%rip), outer_addr\n\tjmp\tL(mul_2_entry_0)\n\n\tALIGN(16)\nL(mul_2_prologue_1):\t\n\tmov\t%rax, w3\n\tmov\t%rdx, w0\n\tmov\t$0, w132\n\tlea\tL(addmul_prologue_1)(%rip), outer_addr\n\tjmp\tL(mul_2_entry_1)\n\t\n\tALIGN(16)\nL(mul_2_prologue_2):\n\tmov\t%rax, w2\n\tmov\t%rdx, w3\n\tmov\t$0, w032\n\tmov\t16(up,n,8), %rax\n\tlea\tL(addmul_prologue_2)(%rip), outer_addr\n\tjmp\tL(mul_2_entry_2)\n\t\n\n\tC this loop is 18 c/loop = 2.25 c/l on K8\n\n\tALIGN(16)\nL(mul_2_top):\t\n\tmov     -8(up,n,8), %rax\n\tmul     v1\n\tadd     %rax, w0\n\tadc     %rdx, w1\nL(mul_2_entry_0):\t\n\tmov     $0, w232\n\tmov     (up,n,8), %rax\n\tmul     v0\n\tadd     %rax, w0\n\tmov     (up,n,8), %rax\n\tadc     %rdx, w1\n\tadc     $0, w232\n\tmul     v1\n\tadd     %rax, w1\n\tmov     w0, (rp,n,8)\n\tadc     %rdx, w2\nL(mul_2_entry_3):\t\n\tmov     8(up,n,8), %rax\n\tmul     v0\n\tmov     $0, w332\n\tadd     %rax, w1\n\tadc     %rdx, w2\n\tmov     $0, w032\n\tadc     $0, w332\n\tmov     8(up,n,8), %rax\n\tmov     w1, 8(rp,n,8)\n\tmul     v1\n\tadd     %rax, w2\n\tmov     16(up,n,8), %rax\n\tadc     %rdx, w3\nL(mul_2_entry_2):\t\n\tmov     $0, w132\n\tmul     v0\n\tadd     %rax, w2\n\tmov     16(up,n,8), %rax\n\tadc     %rdx, w3\n\tadc     $0, w032\n\tmul     v1\n\tadd     %rax, w3\n\tmov     w2, 16(rp,n,8)\n\tadc     %rdx, w0\nL(mul_2_entry_1):\t\n\tmov     24(up,n,8), %rax\n\tmul     v0\n\tadd     %rax, w3\n\tadc     %rdx, w0\n\tadc     $0, w132\n\tadd     $4, n\n\tmov     w3, -8(rp,n,8)\n\tjnz     L(mul_2_top)\n\n\tmov\tw0, (rp)\n\tmov\tw1, 8(rp)\n\n\tsub\t$2, vn\n\tjz\tL(ret)\n\n\tlea\t16(vp), vp\n\tlea\t-16(up), up\n\n\tmov\tun, n\n\tmov\t(vp), v0\n\tmov\t8(vp), v1\n\n\tjmp\t*outer_addr\n\t\t\t\nC ===========================================================\nC     addmul_2 for remaining vp's\n\n\tALIGN(16)\nL(addmul_prologue_0):\n\tmov\t-8(up,n,8), %rax\n\tmul\tv1\n\tmov\t%rax, w1\n\tmov\t%rdx, w2\n\tmov\t$0, w332\n\tjmp\tL(addmul_entry_0)\n\t\n\tALIGN(16)\nL(addmul_prologue_1):\n\tmov\t16(up,n,8), %rax\n\tmul\tv1\n\tmov\t%rax, w0\n\tmov\t%rdx, w1\n\tmov\t$0, w232\n\tmov\t24(up,n,8), %rax\n\tjmp\tL(addmul_entry_1)\n\t\n\tALIGN(16)\nL(addmul_prologue_2):\n\tmov\t8(up,n,8), %rax\n\tmul\tv1\n\tmov\t%rax, w3\n\tmov\t%rdx, w0\n\tmov\t$0, w132\n\tjmp\tL(addmul_entry_2)\n\n\tALIGN(16)\nL(addmul_prologue_3):\n\tmov\t(up,n,8), %rax\n\tmul\tv1\n\tmov\t%rax, w2\n\tmov\t%rdx, w3\n\tmov\t$0, w032\n\tmov\t$0, w132\n\tjmp\tL(addmul_entry_3)\n\n\tC this loop is 19 c/loop = 2.375 c/l on K8\n\n\tALIGN(16)\nL(addmul_top):\n\tmov\t$0, w332\n\tadd\t%rax, w0\n\tmov\t-8(up,n,8), %rax\n\tadc\t%rdx, w1\n\tadc\t$0, w232\n\tmul\tv1\n\tadd\tw0, -8(rp,n,8)\n\tadc\t%rax, w1\n\tadc\t%rdx, w2\nL(addmul_entry_0):\n\tmov\t(up,n,8), %rax\n\tmul\tv0\n\tadd\t%rax, w1\n\tmov\t(up,n,8), %rax\n\tadc\t%rdx, w2\n\tadc\t$0, w332\n\tmul\tv1\n\tadd\tw1, (rp,n,8)\n\tmov\t$0, w132\n\tadc\t%rax, w2\n\tmov\t$0, w032\n\tadc\t%rdx, w3\nL(addmul_entry_3):\n\tmov\t8(up,n,8), %rax\n\tmul\tv0\n\tadd\t%rax, w2\n\tmov\t8(up,n,8), %rax\n\tadc\t%rdx, w3\n\tadc\t$0, w032\n\tmul\tv1\n\tadd\tw2, 8(rp,n,8)\n\tadc\t%rax, w3\n\tadc\t%rdx, w0\nL(addmul_entry_2):\n\tmov\t16(up,n,8), %rax\n\tmul\tv0\n\tadd\t%rax, w3\n\tmov\t16(up,n,8), %rax\n\tadc\t%rdx, w0\n\tadc\t$0, w132\n\tmul\tv1\n\tadd\tw3, 16(rp,n,8)\n\tnop\t\t\tC don't ask...\n\tadc\t%rax, w0\n\tmov\t$0, w232\n\tmov\t24(up,n,8), %rax\n\tadc\t%rdx, w1\nL(addmul_entry_1):\n\tmul\tv0\n\tadd\t$4, n\n\tjnz\tL(addmul_top)\n\n\tadd\t%rax, w0\n\tadc\t%rdx, w1\n\tadc\t$0, w232\n\n\tadd\tw0, -8(rp)\n\tadc\tw1, (rp)\n\tadc\tw2, 8(rp)\n\n\tsub\t$2, vn\n\tjz\tL(ret)\n\n\tlea\t16(vp), vp\n\tlea\t-16(up), up\n\n\tmov\tun, n\n\tmov\t(vp), v0\n\tmov\t8(vp), v1\n\n\tjmp\t*outer_addr\n\nC ===========================================================\nC     accumulate along diagonals if un - vn is small\n\n\tALIGN(16)\nL(diagonal):\n\txor\tw032, w032\n\txor\tw132, w132\n\txor\tw232, w232\n\n\tneg\tun\n\t\n\tmov\tvn32, %eax\n\tand\t$3, %eax\n\tjz\tL(diag_prologue_0)\n\tcmp\t$2, %eax\n\tjc\tL(diag_prologue_1)\n\tjz\tL(diag_prologue_2)\n\nL(diag_prologue_3):\n\tlea\t-8(vp), vp\n\tmov\tvp, vp_inner\n\tadd\t$1, vn\n\tmov\tvn, n\n\tlea\tL(diag_entry_3)(%rip), outer_addr\n\tjmp\tL(diag_entry_3)\n\nL(diag_prologue_0):\n\tmov\tvp, vp_inner\n\tmov\tvn, n\n\tlea\t0(%rip), outer_addr\n\tmov     -8(up,n,8), %rax\n\tjmp\tL(diag_entry_0)\n\nL(diag_prologue_1):\n\tlea\t8(vp), vp\n\tmov\tvp, vp_inner\n\tadd\t$3, vn\n\tmov\tvn, n\n\tlea\t0(%rip), outer_addr\n\tmov     -8(vp_inner), %rax\n\tjmp\tL(diag_entry_1)\n\nL(diag_prologue_2):\n\tlea\t-16(vp), vp\n\tmov\tvp, vp_inner\n\tadd\t$2, vn\n\tmov\tvn, n\n\tlea\t0(%rip), outer_addr\n\tmov\t16(vp_inner), %rax\n\tjmp\tL(diag_entry_2)\t\n\n\t\n\tC this loop is 10 c/loop = 2.5 c/l on K8\n\n\tALIGN(16)\t\nL(diag_top):\n\tadd     %rax, w0\n\tadc     %rdx, w1\n\tmov     -8(up,n,8), %rax\n\tadc     $0, w2\nL(diag_entry_0):\n\tmulq    (vp_inner)\n\tadd     %rax, w0\n\tadc     %rdx, w1\n\tadc     $0, w2\nL(diag_entry_3):\n\tmov     -16(up,n,8), %rax\n\tmulq    8(vp_inner)\n\tadd     %rax, w0\n\tmov     16(vp_inner), %rax\n\tadc     %rdx, w1\n\tadc     $0, w2\nL(diag_entry_2):\n\tmulq    -24(up,n,8)\n\tadd     %rax, w0\n\tmov     24(vp_inner), %rax\n\tadc     %rdx, w1\n\tlea     32(vp_inner), vp_inner\n\tadc     $0, w2\nL(diag_entry_1):\n\tmulq    -32(up,n,8)\n\tsub     $4, n\n\tjnz\tL(diag_top)\n\n\tadd\t%rax, w0\n\tadc\t%rdx, w1\n\tadc\t$0, w2\n\t\n\tmov\tw0, (rp,un,8)\n\n\tinc\tun\n\tjz\tL(diag_end)\n\n\tmov\tvn, n\n\tmov\tvp, vp_inner\n\n\tlea\t8(up), up\n\tmov\tw1, w0\n\tmov\tw2, w1\n\txor\tw232, w232\n\n\tjmp\t*outer_addr\n\nL(diag_end):\n\tmov\tw1, (rp)\n\tmov\tw2, 8(rp)\n\t\nL(ret):\tpop\t%r15\n\tpop\t%r14\n\tpop\t%r13\n\tpop\t%r12\n\tpop\t%rbp\n\tpop\t%rbx\n\tret\n\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/add_err1_n.asm",
    "content": "dnl  mpn_add_err1_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC ret mpn_add_err1(mp_ptr rp,mp_ptr up,mp_ptr vp,mp_ptr ep,mp_ptr_t yp,mp_size_t n,mp_limb_t cy)\nC rax                    rdi,      rsi,      rdx,      rcx,         r8           r9       8(rsp)=>r10\n\nASM_START()\nPROLOGUE(mpn_add_err1_n)\n\tC // if we rearrange the params we could save some moves\n\tC //(rdi,r9)=(rsi,r9)+(rdx,r9)  sum=carry*(r8)\n\t\n\tmov 8(%rsp),%r10            C cy\n\tmov %rbp,-16(%rsp)          C save rbp\n\tlea -24(%rdi,%r9,8),%rdi    C rp += n - 3\n\tmov %r12,-24(%rsp)          C save r12\n\tmov %r13,-32(%rsp)          C save r13\n\tlea -24(%rsi,%r9,8),%rsi    C up += n - 3\n\tmov %r14,-40(%rsp)          C save r14\n\tmov %r15,-48(%rsp)          C save r15\n\tlea -24(%rdx,%r9,8),%rdx    C vp += n - 3\n\tmov %rcx,-56(%rsp)\t       C save rcx\n\tmov %rbx,-8(%rsp)           C save rbx\n\tmov $3,%r11                 C i = 3\n\tshl $63,%r10\n\tlea (%r8,%r9,8),%r8         C yp += n\n\tsub %r9,%r11\t              C i = 3 - n\n\tmov $0,%r9                  C t1 = 0\n\tmov $0,%rax                 C t2 = 0\n\tmov $0,%rbx                 C t3 = 0\n\tjnc L(skiplp)                  C if done goto skiplp\nALIGN(16)\nL(lp):\n\tmov (%rsi,%r11,8),%r12      C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13     C s2 = *(up + i + 1)\n\tmov 16(%rsi,%r11,8),%r14    C s3 = *(up + i + 2)\n\tmov 24(%rsi,%r11,8),%r15    C s4 = *(up + i + 3)\n\tmov $0,%rbp                 C t5 = 0\n\tshl $1,%r10                 C s1 += *(vp + i + 0) + (cy & 1)\n\tadc (%rdx,%r11,8),%r12\n\tcmovc -8(%r8),%rax          C if carry1, t2 = *(yp - 1)\n\tadc 8(%rdx,%r11,8),%r13     C s2 += *(vp + i + 1) + carry1\n\tcmovc -16(%r8),%rbx         C if carry2 t3 = *(yp - 2)\n\tmov $0,%rcx                 C t4 = 0\n\tadc 16(%rdx,%r11,8),%r14    C s3 += *(vp + i + 2) + carry2\n\tcmovc -24(%r8),%rcx         C if carry3 t4 = *(yp - 3)\n\tadc 24(%rdx,%r11,8),%r15    C s4 += *(vp + i + 3) + carry3\n\tcmovc -32(%r8),%rbp         C if carry4 t5 = *(yp - 4)\n\trcr $1,%r10                 C high bit of cy = carry4\n\tadd %rax,%r9                C t1 += t2\n\tadc $0,%r10                 C accumulate cy\n\tadd %rbx,%r9                C t1 += t2\n\tadc $0,%r10                 C accumulate cy\n\tadd %rcx,%r9                C t1 += t4\n\tmov $0,%rax                 C t2 = 0\n\tadc $0,%r10                 C accumulate cy\n\tlea -32(%r8),%r8            C yp -= 4\n\tadd %rbp,%r9                C t1 += t5\n\tadc $0,%r10                 C accumulate cy\n\tmov %r12,(%rdi,%r11,8)      C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)     C *(rp + i + 1) = s2\n\tmov %r14,16(%rdi,%r11,8)    C *(rp + i + 2) = s3\n\tmov %r15,24(%rdi,%r11,8)    C *(rp + i + 3) = s4\n\tmov $0,%rbx                 C t3 = 0\n\tadd $4,%r11                 C i += 4\n\tjnc  L(lp)                     C not done, goto lp\nL(skiplp):\n\tcmp $2,%r11                 C cmp(i, 2)\n\tmov -16(%rsp),%rbp          C restore rbp\n\tmov -48(%rsp),%r15          C restore r15\n\tja L(case0)                    C i == 3 goto L(case0)\n\tje L(case1)                    C i == 2 goto L(case1)\n\tjp L(case2)                    C i == 1 goto L(case2)\nL(case3):\n\tmov (%rsi,%r11,8),%r12         C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13        C s2 = *(up + i + 1)\n\tmov 16(%rsi,%r11,8),%r14       C s3 = *(up + i + 2)\n\tshl $1,%r10                    C restore carry1 from high bit of t1\n\tadc (%rdx,%r11,8),%r12         C s1 += *(vp + i + 0) + carry1\n\tcmovc -8(%r8),%rax             C if carry2 t2 = *(yp - 1)\n\tadc 8(%rdx,%r11,8),%r13        C s2 += *(vp + i + 1) + carry2\n\tcmovc -16(%r8),%rbx            C if carry3 t3 = *(yp - 2)\n\tmov $0,%rcx                    C t4 = 0\n\tadc 16(%rdx,%r11,8),%r14       C s3 += *(vp + i + 3) + carry3\n\tcmovc -24(%r8),%rcx            C if carry4 t4 = *(yp - 3)\n\trcr $1,%r10                    C store carry4 in high bit of cy\n\tadd %rax,%r9                   C t1 += t2\n\tadc $0,%r10                    C accumulate cy\n\tadd %rbx,%r9                   C t1 += t3\n\tadc $0,%r10                    C accumulate cy\n\tadd %rcx,%r9                   C t1 += t4\n\tadc $0,%r10                    C accumulate cy\n\tmov %r12,(%rdi,%r11,8)         C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)        C *(rp + i + 1) = s2\n\tmov %r14,16(%rdi,%r11,8)       C *(rp + i + 2) = s3\n\tmov -56(%rsp),%rcx             C restore rcx\n\tmov %r9,(%rcx)                 C ep[0] = t1\n\tbtr $63,%r10                   C retrieve carry out and reset bit of cy\n\tmov %r10,8(%rcx)               C ep[1] = cy\n\tmov -40(%rsp),%r14             C restore r14\n\tmov $0,%rax\n\tmov -32(%rsp),%r13             C restore r13\n\tadc $0,%rax                    C return carry out\n\tmov -24(%rsp),%r12             C restore r12\n\tmov -8(%rsp),%rbx              C restore rbx\n\tret\nALIGN(16)\nL(case2):\n\tmov (%rsi,%r11,8),%r12     C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13    C s2 = *(up + i + 1)\n\tshl $1,%r10                C restore carry1 from high bit of t1\n\tadc (%rdx,%r11,8),%r12     C s1 += *(vp + i + 0) + carry1\n\tcmovc -8(%r8),%rax         C if carry2 t2 = *(yp - 1)\n\tadc 8(%rdx,%r11,8),%r13    C s2 += *(vp + i + 1) + carry2\n\tcmovc -16(%r8),%rbx        C if carry3 t3 = *(yp - 2)\n\trcr $1,%r10                C store carry3 in high bit of cy\n\tadd %rax,%r9               C t1 += t2\n\tadc $0,%r10                C accumulate cy\n\tadd %rbx,%r9               C t1 += t3\n\tadc $0,%r10                C accumulate cy\n\tmov %r12,(%rdi,%r11,8)     C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)    C *(rp + i + 1) = s2\n\tmov -56(%rsp),%rcx         C restore rcx\n\tmov %r9,(%rcx)             C ep[0] = t1\n\tbtr $63,%r10               C retrieve carry out and reset bit of cy\n\tmov %r10,8(%rcx)           C ep[1] = cy\n\tmov -40(%rsp),%r14         C restore r14\n\tmov $0,%rax\n\tmov -32(%rsp),%r13         C restore r13\n\tadc $0,%rax                C return carry out\n\tmov -24(%rsp),%r12         C restore r12\n\tmov -8(%rsp),%rbx          C restore rbx\n\tret\nALIGN(16)\nL(case1):\n\tmov (%rsi,%r11,8),%r12     C s1 = *(up + i + 0)\n\tshl $1,%r10                C restore carry1 from high bit of t1\n\tadc (%rdx,%r11,8),%r12     C s1 += *(vp + i + 0) + carry1\n\tcmovc -8(%r8),%rax         C if carry2 t2 = *(yp - 1)\n\trcr $1,%r10                C store carry3 in high bit of cy\n\tadd %rax,%r9               C t1 += t2\n\tadc $0,%r10                C accumulate cy\n\tmov %r12,(%rdi,%r11,8)     C *(rp + i + 0) = s1\nL(case0):\tmov -56(%rsp),%rcx         C restore rcx\n\tmov %r9,(%rcx)             C ep[0] = t1\n\tbtr $63,%r10               C retrieve carry out and reset bit of cy\n\tmov %r10,8(%rcx)           C ep[1] = cy\n\tmov -40(%rsp),%r14         C restore r14\n\tmov $0,%rax                \n\tmov -32(%rsp),%r13         C restore r13\n\tadc $0,%rax                C return carry out\n\tmov -24(%rsp),%r12         C restore r12\n\tmov -8(%rsp),%rbx          C restore rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/add_n.as",
    "content": "\n;  mpn_add_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\trax=mpn_add_n(mp_ptr rdi ,mp_ptr rsi ,mp_ptr rdx ,mp_size_t rcx)\n;\t(rdi,rcx)=(rsi,rcx)+(rdx,rcx)  return rax=carry\n\n\tGLOBAL_FUNC mpn_add_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tsub     rcx, rax\n\tlea     rdi, [rdi+rcx*8]\n\tlea     rsi, [rsi+rcx*8]\n\tlea     rdx, [rdx+rcx*8]\n\tneg     rcx\n\tcmp     rcx, 0\n\tjz      L_skiplp\n\talign   16\nL_lp:\n\tmov     r8, [rsi+rcx*8]\n\tmov     r10, [rsi+rcx*8+16]\n\tadc     r8, [rdx+rcx*8]\n\tmov     [rdi+rcx*8], r8\n\tmov     r9, [rsi+rcx*8+8]\n\tadc     r9, [rdx+rcx*8+8]\n\tmov     [rdi+rcx*8+8], r9\n\tlea     rcx, [rcx+4]\n\tmov     r11, [rsi+rcx*8-8]\n\tadc     r10, [rdx+rcx*8-16]\n\tadc     r11, [rdx+rcx*8-8]\n\tmov     [rdi+rcx*8-16], r10\n\tmov     [rdi+rcx*8-8], r11\n\tjrcxz   L_exitlp\n\tjmp     L_lp\nL_exitlp:\n\tsbb     rcx, rcx\nL_skiplp:\n\tcmp     rax, 2\n\tja      L_case3\n\tjz      L_case2\n\tjp      L_case1\nL_case0:\n\tsub     rax, rcx\n\tret\n\talign   16\nL_case1:\n\tadd     rcx, rcx\n\tmov     r8, [rsi]\n\tadc     r8, [rdx]\n\tmov     [rdi], r8\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\talign   16\nL_case3:\n\tadd     rcx, rcx\n\tmov     r8, [rsi]\n\tmov     r10, [rsi+16]\n\tadc     r8, [rdx]\n\tmov     [rdi], r8\n\tmov     r9, [rsi+8]\n\tadc     r9, [rdx+8]\n\tmov     [rdi+8], r9\n\tadc     r10, [rdx+16]\n\tmov     [rdi+16], r10\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\talign   16\nL_case2:\n\tadd     rcx, rcx\n\tmov     r8, [rsi]\n\tadc     r8, [rdx]\n\tmov     [rdi], r8\n\tmov     r9, [rsi+8]\n\tadc     r9, [rdx+8]\n\tmov     [rdi+8], r9\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/nehalem/addadd_n.asm",
    "content": "dnl  mpn_addadd\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_addadd_n)\npush %rbx\nxor %r9,%r9\npush %rbp\nxor %rax,%rax\nmov $3,%r10d\nsub %r8,%r10\nlea -24(%rdi,%r8,8),%rdi\nlea -24(%rsi,%r8,8),%rsi\nlea -24(%rdx,%r8,8),%rdx\nlea -24(%rcx,%r8,8),%rcx\njnc L(skiplp)\n.align 16\nL(lp):     .byte 0x9e      # sahf\t\n\tmov (%rdx,%r10,8),%r8\n\tadc (%rcx,%r10,8),%r8\n\tmov 8(%rdx,%r10,8),%rbx\n\tadc 8(%rcx,%r10,8),%rbx\n\tmov 24(%rdx,%r10,8),%r11\n\tmov 16(%rdx,%r10,8),%rbp\n\tadc 16(%rcx,%r10,8),%rbp\n\tadc 24(%rcx,%r10,8),%r11\n        .byte 0x9f      # lahf\n\tadd $255,%r9b\n\tadc (%rsi,%r10,8),%r8\n\tadc 8(%rsi,%r10,8),%rbx\n\tmov %r8,(%rdi,%r10,8)\n\tadc 16(%rsi,%r10,8),%rbp\n\tadc 24(%rsi,%r10,8),%r11\n\tsetc %r9b\n\tmov %r11,24(%rdi,%r10,8)\n\tmov %rbp,16(%rdi,%r10,8)\n\tmov %rbx,8(%rdi,%r10,8)\n\tadd $4,%r10\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r10\njg L(case0)\nje L(case1)\njp L(case2)\nL(case3):  .byte 0x9e      # sahf\t\n\tmov (%rdx),%r8\n\tadc (%rcx),%r8\n\tmov 8(%rdx),%rbx\n\tadc 8(%rcx),%rbx\n\tmov 16(%rdx),%rbp\n\tadc 16(%rcx),%rbp\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc (%rsi),%r8\n\tadc 8(%rsi),%rbx\n\tmov %r8,(%rdi)\n\tadc 16(%rsi),%rbp\n\tsetc %r9b\n\tmov %rbp,16(%rdi)\n\tmov %rbx,8(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tadc $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case2):  .byte 0x9e      # sahf\t\n\tmov 8(%rdx),%r8\n\tadc 8(%rcx),%r8\n\tmov 16(%rdx),%rbx\n\tadc 16(%rcx),%rbx\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc 8(%rsi),%r8\n\tadc 16(%rsi),%rbx\n\tmov %r8,8(%rdi)\n\tsetc %r9b\n\tmov %rbx,16(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tadc $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case1):  .byte 0x9e      # sahf\t\n\tmov 16(%rdx),%r8\n\tadc 16(%rcx),%r8\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc 16(%rsi),%r8\n\tmov %r8,16(%rdi)\n\tsetc %r9b\nL(case0):  .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tadc $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/addlsh_n.as",
    "content": "\n;  mpn_addlsh_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tret mpn_addlsh_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t,shift)\n;\trax                 rdi,   rsi,   rdx,      rcx   r8\n\n\tASM_START\n\tGLOBAL_FUNC mpn_addlsh_n\n\tlea     rdi, [rdi+rcx*8-32]\n\tlea     rsi, [rsi+rcx*8-32]\n\tlea     rdx, [rdx+rcx*8-32]\n\tpush    r12\n\tpush    rbx\n\tmov     rbx, 4\n\tsub     rbx, rcx\n\tmov     rcx, 64\n\tsub     rcx, r8\n\tmov     r12, 0\n\tmov     rax, 0\n\tmov     r8, [rdx+rbx*8]\n\tcmp     rbx, 0\n\tjge     L_skiplp\n\talign   16\nL_lp:\n\tmov     r9, [rdx+rbx*8+8]\n\tmov     r10, [rdx+rbx*8+16]\n\tmov     r11, [rdx+rbx*8+24]\n\tshrd    r12, r8, cl\n\tshrd    r8, r9, cl\n\tshrd    r9, r10, cl\n\tshrd    r10, r11, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     r12, r11\n\tmov     [rdi+rbx*8+8], r8\n\tadc     r9, [rsi+rbx*8+16]\n\tadc     r10, [rsi+rbx*8+24]\n\tmov     [rdi+rbx*8+24], r10\n\tmov     [rdi+rbx*8+16], r9\n\tlahf\n\tmov     r8, [rdx+rbx*8+32]\n\tadd     rbx, 4\n\tjnc     L_lp\n\talign   16\nL_skiplp:\n\tcmp     rbx, 2\n\tja      L_case0\n\tje      L_case1\n\tjp      L_case2\nL_case3:\n\tshrd    r12, r8, cl\n\tmov     r9, [rdx+rbx*8+8]\n\tmov     r10, [rdx+rbx*8+16]\n\tmov     r11, [rdx+rbx*8+24]\n\tshrd    r8, r9, cl\n\tshrd    r9, r10, cl\n\tshrd    r10, r11, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     r12, r11\n\tmov     [rdi+rbx*8+8], r8\n\tadc     r9, [rsi+rbx*8+16]\n\tadc     r10, [rsi+rbx*8+24]\n\tmov     [rdi+rbx*8+24], r10\n\tmov     [rdi+rbx*8+16], r9\n\tlahf\n\tshr     r12, cl\n\tsahf\n\tadc     r12, 0\n\tmov     rax, r12\n\tpop     rbx\n\tpop     r12\n\tret\nL_case2:\n\tshrd    r12, r8, cl\n\tmov     r9, [rdx+rbx*8+8]\n\tshrd    r8, r9, cl\n\tmov     r10, [rdx+rbx*8+16]\n\tshrd    r9, r10, cl\n\tshr     r10, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     rax, 0\n\tmov     [rdi+rbx*8+8], r8\n\tadc     r9, [rsi+rbx*8+16]\n\tadc     rax, r10\n\tmov     [rdi+rbx*8+16], r9\n\tpop     rbx\n\tpop     r12\n\tret\nL_case1:\n\tshrd    r12, r8, cl\n\tmov     r9, [rdx+rbx*8+8]\n\tshrd    r8, r9, cl\n\tshr     r9, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     rax, 0\n\tmov     [rdi+rbx*8+8], r8\n\tadc     rax, r9\n\tpop     rbx\n\tpop     r12\n\tret\nL_case0:\n\tshrd    r12, r8, cl\n\tshr     r8, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, 0\n\tmov     rax, r8\n\tpop     rbx\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/nehalem/addmul_1.as",
    "content": "; **************************************************************************\n;  Intel64 mpn_addmul_1 -- Multiply a limb vector with a limb and\n;  add the result to a second limb vector.\n;\n;  Copyright (C) 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  This program is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2 of the License, or\n;  (at your option) any later version.\n;\n;  This program is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n;  GNU General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with this program; if not, write to the Free Software Foundation,\n;  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n;\n; **************************************************************************\n;\n;\n; CREDITS\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; *********************************************************************\n\n\n; With a 4-way unroll the code has\n;\n;         \tcycles/limb\n; Hammer:           4.8\n; Woodcrest:        4.6\n;\n; With increased unrolling, it appears to converge to 4 cycles/limb\n; on Intel Core 2 machines.  I believe that this is optimal, however\n; it requires such absurd unrolling that it becomes unusable for all\n; but the largest inputs.  A 4-way unroll seems like a good balance\n; to me because then commonly used input sizes (e.g. 1024bit Public\n; keys) still benifit from the speed up.\n\n;\n; This is just a check to see if we are in my code testing sandbox\n; or if we are actually in the GMP source tree\n\n%include 'yasm_mac.inc'\n\n\n; *********************************************************************\n; *********************************************************************\n;\n; Here are the important macro parameters for the code\n;\n;      BpL is Bytes per Limb (8 since this is 64bit code)\n;\n;\tUNROLL_SIZE is a power of 2 for which we unroll the code.\n;                  possible values are 2,4,8,15,..., 256.  A reasonable\n;                  value is probably 4.  If really large inputs\n;                  are expected, then 16 is probably good.  Larger\n;                  values are really only useful for flashy\n;                  benchmarks and testing asymptotic behavior.\n;\n;      THRESHOLD is the minimum number of limbs needed before we bother\n;                using the complicated loop.  A reasonable value is\n;                2*UNROLL_SIZE + 6\n;\n; *********************************************************************\n; *********************************************************************\n%define\tBpL\t8\n%define\tUNROLL_SIZE\t4\n%define\tUNROLL_MASK\tUNROLL_SIZE-1\n%define\tTHRESHOLD\t2*UNROLL_SIZE+6\n\n; Here is a convenient Macro for addressing\n; memory.  Entries of the form\n;\n;      ADDR(ptr,index,displacement)\n;\n; get converted to\n;\n;      [displacement*BpL + ptr + index*BpL]\n;\n%define\tADDR(a,b,c)\t[c*BpL+a+b*BpL]\n\n\n; Register\tUsage\n; --------\t-----\n; rax\t\tlow word from mul\n; rbx*\n; rcx\t\ts2limb\n; rdx\t\thigh word from mul\n; rsi\t\ts1p\n; rdi\t\trp\n; rbp*\t\tBase Pointer\n; rsp*\t\tStack Pointer\n; r8\t\tA_x\n; r9\t\tA_y\n; r10\t\tA_z\n; r11\t\tB_x\n; r12*\t\tB_y\n; r13*\t\tB_z\n; r14*\t\ttemp\n; r15*\t\tindex\n; \n; * indicates that the register must be\n; preserved for the caller.\n%define\ts2limb\trcx\n%define\ts1p\trsi\n%define\trp\trdi\n%define\tA_x\tr8\n%define\tA_y\tr9\n%define\tA_z\tr10\n%define\tB_x\tr11\n%define\tB_y\tr12\n%define\tB_z\tr13\n%define\ttemp\tr14\n%define\tindex\tr15\n\n\t\n; INPUT PARAMETERS\n; rp\t\trdi\n; s1p\t\trsi\n; n\t\trdx\n; s2limb\trcx\n\tBITS\t64\nGLOBAL_FUNC mpn_addmul_1\n\t\t\t\t\t; Compare the limb count\n\t\t\t\t\t; with the threshold value.\n\t\t\t\t\t; If the limb count is small\n\t\t\t\t\t; we just use the small loop,\n\t\t\t\t\t; otherwise we jump to the\n\t\t\t\t\t; more complicated loop.\n\tcmp\trdx,THRESHOLD\n\tjge\tL_mpn_addmul_1_main_loop_prep\n\tmov\tr11,rdx\n\tlea\trsi,[rsi+rdx*8]\n\tlea\trdi,[rdi+rdx*8]\n\tneg\tr11\n\txor\tr8, r8\n\txor\tr10, r10\n\tjmp\tL_mpn_addmul_1_small_loop\n\t\n\talign\t16\nL_mpn_addmul_1_small_loop:\n\tmov\trax,[rsi+r11*8]\n\tmul\trcx\n\tadd\trax,[rdi+r11*8]\n\tadc\trdx,r10\n\tadd\trax,r8\n\tmov\tr8,r10\n\tmov\t[rdi+r11*8],rax\n\tadc\tr8,rdx\n\tinc\tr11\n\tjne\tL_mpn_addmul_1_small_loop\n\n\tmov\trax,r8\n\tret\n\nL_mpn_addmul_1_main_loop_prep:\n\tpush\tr15\n\tpush\tr14\n\tpush\tr13\n\tpush\tr12\n\t\t\t\t; If n is even, we need to do three\n\t\t\t\t; pre-multiplies, if n is odd we only\n\t\t\t\t; need to do two.\n\tmov\ttemp,rdx\n\tmov\tindex,0\n\tmov\tA_x,0\n\tmov\tA_y,0\n\tand\trdx,1\n\tjnz\tL_mpn_addmul_1_odd_n\n\n\t\t\t\t\t; Case n is even\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tadd\tADDR(rp,index,0),rax\n\tadc\tA_x,rdx\n\tadd\tindex,1\n\t\t\t\t\t; At this point\n\t\t\t\t\t;  temp = n (even)\n\t\t\t\t\t; index = 1\n\nL_mpn_addmul_1_odd_n:\n\t\t\t\t\t; Now\n\t\t\t\t\t; temp = n\n\t\t\t\t\t; index = 1 if n even\n\t\t\t\t\t;       = 0 if n odd\n\t\t\t\t\t;\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tmov\tA_z,ADDR(rp,index,0)\n\tadd\tA_x,rax\n\tadc\tA_y,rdx\n\n\tmov\trax,ADDR(s1p,index,1)\n\tmul\ts2limb\n\tmov\tB_z,ADDR(rp,index,1)\n\tmov\tB_x,rax\n\tmov\tB_y,rdx\n\tmov\trax,ADDR(s1p,index,2)\n\n\tadd\tindex,3\n\tlea\ts1p,ADDR(s1p,temp,-1)\n\tlea\trp,ADDR(rp,temp,-1)\n\tneg\ttemp\n\tadd\tindex,temp\n\t\t\t\t; At this point:\n\t\t\t\t; s1p   = address of last s1limb\n\t\t\t\t; rp    = address of last rplimb\n\t\t\t\t; temp  = -n\n\t\t\t\t; index = 4 - n if n even\n\t\t\t\t;       = 3 - n if n odd\n\t\t\t\t;\n\t\t\t\t; So, index is a (negative) even\n\t\t\t\t; number.\n\t\t\t\t;\n\t\t\t\t; *****************************************\n\t\t\t\t; ATTENTION:\n\t\t\t\t;\n\t\t\t\t; From here on, I will use array\n\t\t\t\t; indexing notation in the comments\n\t\t\t\t; because it is convenient.  So, I\n\t\t\t\t; will pretend that index is positive\n\t\t\t\t; because then a comment like\n\t\t\t\t;      B_z = rp[index-1]\n\t\t\t\t; is easier to read.\n\t\t\t\t; However, keep in mind that index is\n\t\t\t\t; actually a negative number indexing\n\t\t\t\t; back from the end of the array.\n\t\t\t\t; This is a common trick to remove one\n\t\t\t\t; compare operation from the main loop.\n\t\t\t\t; *****************************************\n\n\t\t\t\t;\n\t\t\t\t; Now we enter a spin-up loop the\n\t\t\t\t; will make sure that the index is\n\t\t\t\t; a multiple of UNROLL_SIZE before\n\t\t\t\t; going to our main unrolled loop.\n\tmov\ttemp,index\n\tneg\ttemp\n\tand\ttemp,UNROLL_MASK\n\tjz\tL_mpn_addmul_1_main_loop\n\tshr\ttemp,1\nL_mpn_addmul_1_main_loop_spin_up:\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tadd\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,1)\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index+1]\n\tmul\ts2limb\n\tadd\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,2)\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,-1),B_z\n\tmov\tB_z,ADDR(rp,index,1)\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n\n\tadd\tindex,2\n\tsub\ttemp,1\n\tjnz\tL_mpn_addmul_1_main_loop_spin_up\n\t\n\tjmp\tL_mpn_addmul_1_main_loop\n\t\n\talign\t16\nL_mpn_addmul_1_main_loop:\n\t\t\t\t; The code here is really the same\n\t\t\t\t; logic as the spin-up loop.  It's\n\t\t\t\t; just been unrolled.\n%assign\tunroll_index 0\n%rep\tUNROLL_SIZE/2\n\tmul\ts2limb\n\tadd\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+1))\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,(2*unroll_index-2)),A_z\n\tmov\tA_z,ADDR(rp,index,(2*unroll_index))\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\n\tmul\ts2limb\n\tadd\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+2))\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,(2*unroll_index-1)),B_z\n\tmov\tB_z,ADDR(rp,index,(2*unroll_index+1))\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n%assign\tunroll_index\tunroll_index+1\n%endrep\n\n\tadd\tindex,UNROLL_SIZE\n\tjnz\tL_mpn_addmul_1_main_loop\n\nL_mpn_addmul_1_finish:\t\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t; \n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tadd\tA_z,A_x\n\tmov\tA_x,rax\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_x,A_y\n\tadc\tB_y,0\n\tmov\tA_y,rdx\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\tadd\tB_z,B_x\n\tmov\tADDR(rp,index,-1),B_z\n\tadc\tA_x,B_y\n\tadc\tA_y,0\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t; \n\t\t\t\t; A_x = low_mul[index] + carry_in\n\t\t\t\t; A_y = high_mul[index] + CF\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t; \n\tadd\tA_z,A_x\n\tmov\tADDR(rp,index,0),A_z\n\tadc\tA_y,0\n\n\tmov\trax,A_y\n\tpop\tr12\n\tpop\tr13\n\tpop\tr14\n\tpop\tr15\n\tret\n"
  },
  {
    "path": "mpn/x86_64/nehalem/addmul_2.as",
    "content": ";  k8 mpn_addmul_2\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include \"yasm_mac.inc\"\n\n\tGLOBAL_FUNC mpn_addmul_2\n\n; (rdi,rdx+1) = (rdi,rdx) + (rsi,rdx)*(rcx,2) return carrylimb\n\npush    rbx\npush    r12\nmov     r8, [rcx+8]\nmov     rcx, [rcx]\nmov     rbx, 4\nsub     rbx, rdx\nlea     rsi, [rsi+rdx*8-32]\nlea     rdi, [rdi+rdx*8-32]\nmov     r10, 0\nmov     rax, [rsi+rbx*8]\nmul     rcx\nmov     r12, rax\nmov     r9, rdx\ncmp     rbx, 0\njge     skiplp\nalign   16\nlp:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     lp\nskiplp:\nmov     rax, [rsi+rbx*8]\nmul     r8\ncmp     rbx, 2\nja      case0\njz      case1\njp      case2\ncase3:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase2:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase1:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase0:\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n\tpop     r12\n\tpop     rbx\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/nehalem/addsub_n.asm",
    "content": "dnl  mpn_addsub\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_addsub_n)\npush %rbx\nxor %r9,%r9\npush %rbp\nxor %rax,%rax\nmov $3,%r10d\nsub %r8,%r10\nlea -24(%rdi,%r8,8),%rdi\nlea -24(%rsi,%r8,8),%rsi\nlea -24(%rdx,%r8,8),%rdx\nlea -24(%rcx,%r8,8),%rcx\njnc L(skiplp)\n.align 16\nL(lp):     .byte 0x9e      # sahf\t\n\tmov (%rdx,%r10,8),%r8\n\tsbb (%rcx,%r10,8),%r8\n\tmov 8(%rdx,%r10,8),%rbx\n\tsbb 8(%rcx,%r10,8),%rbx\n\tmov 24(%rdx,%r10,8),%r11\n\tmov 16(%rdx,%r10,8),%rbp\n\tsbb 16(%rcx,%r10,8),%rbp\n\tsbb 24(%rcx,%r10,8),%r11\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc (%rsi,%r10,8),%r8\n\tadc 8(%rsi,%r10,8),%rbx\n\tmov %r8,(%rdi,%r10,8)\n\tadc 16(%rsi,%r10,8),%rbp\n\tadc 24(%rsi,%r10,8),%r11\n\tsetc %r9b\n\tmov %r11,24(%rdi,%r10,8)\n\tmov %rbp,16(%rdi,%r10,8)\n\tmov %rbx,8(%rdi,%r10,8)\n\tadd $4,%r10\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r10\njg L(case0)\nje L(case1)\njp L(case2)\nL(case3):  .byte 0x9e      # sahf\t\n\tmov (%rdx),%r8\n\tsbb (%rcx),%r8\n\tmov 8(%rdx),%rbx\n\tsbb 8(%rcx),%rbx\n\tmov 16(%rdx),%rbp\n\tsbb 16(%rcx),%rbp\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc (%rsi),%r8\n\tadc 8(%rsi),%rbx\n\tmov %r8,(%rdi)\n\tadc 16(%rsi),%rbp\n\tsetc %r9b\n\tmov %rbp,16(%rdi)\n\tmov %rbx,8(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tsbb $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case2):  .byte 0x9e      # sahf\t\n\tmov 8(%rdx),%r8\n\tsbb 8(%rcx),%r8\n\tmov 16(%rdx),%rbx\n\tsbb 16(%rcx),%rbx\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc 8(%rsi),%r8\n\tadc 16(%rsi),%rbx\n\tmov %r8,8(%rdi)\n\tsetc %r9b\n\tmov %rbx,16(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tsbb $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case1):  .byte 0x9e      # sahf\t\n\tmov 16(%rdx),%r8\n\tsbb 16(%rcx),%r8\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tadc 16(%rsi),%r8\n\tmov %r8,16(%rdi)\n\tsetc %r9b\nL(case0):  .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tsbb $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/and_n.asm",
    "content": "dnl  mpn_and_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_and_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_and_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu 16(%rsi,%r8,8),%xmm3\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpand %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tpand %xmm3,%xmm1\n\tadd $4,%r8\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmov 16(%rsi,%r8,8),%rcx\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpand %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tand %rcx,%rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpand %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\tand %rcx,%rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/andn_n.asm",
    "content": "dnl  mpn_andn_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_andn_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_andn_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu 16(%rsi,%r8,8),%xmm3\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpandn %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tpandn %xmm3,%xmm1\n\tadd $4,%r8\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmov 16(%rsi,%r8,8),%rcx\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpandn %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tnot %rax\n\tand %rcx,%rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpandn %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\tnot %rax\n\tand %rcx,%rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/com_n.asm",
    "content": "dnl  mpn_com_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_com_n(mp_ptr,mp_ptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_com_n)\nmov $3,%rcx\nlea -24(%rsi,%rdx,8),%rsi\npcmpeqb %xmm2,%xmm2\nsub %rdx,%rcx\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rsi,%rcx,8),%xmm0\n\tmovdqu 16(%rsi,%rcx,8),%xmm1\n\tpxor %xmm2,%xmm0\n\tadd $4,%rcx\n\tpxor %xmm2,%xmm1\n\tmovdqu %xmm0,(%rdi)\n\tmovdqu %xmm1,16(%rdi)\n\tlea 32(%rdi),%rdi\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%rcx\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rsi,%rcx,8),%xmm0\n\tmov 16(%rsi,%rcx,8),%rax\n\tpxor %xmm2,%xmm0\n\tnot %rax\n\tmovdqu %xmm0,(%rdi)\n\tmov %rax,16(%rdi)\n\tret\nL(case2):\tmovdqu (%rsi,%rcx,8),%xmm0\n\tpxor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi)\n\tret\nL(case1):\tmov (%rsi,%rcx,8),%rax\n\tnot %rax\n\tmov %rax,(%rdi)\nL(case0):\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/copyd.asm",
    "content": "dnl  mpn_copyd\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_copyd(mp_ptr,mp_ptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_copyd)\nlea 16(%rsi),%rsi\nlea 16(%rdi),%rdi\nsub $4,%rdx\njc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rsi,%rdx,8),%xmm0\n\tmovdqu -16(%rsi,%rdx,8),%xmm1\n\tsub $4,%rdx\n\tmovdqu %xmm1,-16+32(%rdi,%rdx,8)\n\tmovdqu %xmm0,32(%rdi,%rdx,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $-2,%rdx\njg L(case3)\nje L(case2)\njnp L(case0)\nL(case1):\tmov 8(%rsi,%rdx,8),%rax\n\tmov %rax,8(%rdi,%rdx,8)\nL(case0):\tret\nL(case3):\tmovdqu (%rsi,%rdx,8),%xmm0\n\tmov -8(%rsi,%rdx,8),%rax\n\tmov %rax,-8(%rdi,%rdx,8)\n\tmovdqu %xmm0,(%rdi,%rdx,8)\n\tret\nL(case2):\tmovdqu (%rsi,%rdx,8),%xmm0\n\tmovdqu %xmm0,(%rdi,%rdx,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/copyi.asm",
    "content": "dnl  mpn_copyi\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_copyi(mp_ptr,mp_ptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_copyi)\nmov $3,%rcx\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\nsub %rdx,%rcx\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rsi,%rcx,8),%xmm0\n\tmovdqu 16(%rsi,%rcx,8),%xmm1\n\tadd $4,%rcx\n\tmovdqu %xmm1,16-32(%rdi,%rcx,8)\n\tmovdqu %xmm0,-32(%rdi,%rcx,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%rcx\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rsi,%rcx,8),%xmm0\n\tmov 16(%rsi,%rcx,8),%rax\n\tmov %rax,16(%rdi,%rcx,8)\n\tmovdqu %xmm0,(%rdi,%rcx,8)\n\tret\nL(case2):\tmovdqu (%rsi,%rcx,8),%xmm0\n\tmovdqu %xmm0,(%rdi,%rcx,8)\n\tret\nL(case1):\tmov (%rsi,%rcx,8),%rax\n\tmov %rax,(%rdi,%rcx,8)\nL(case0):\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/divexact_byff.as",
    "content": ";  X86_64 mpn_diveby (B-1)/f   where f=1  special case\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include \"yasm_mac.inc\"\n\n;\t(rdi,rdx)=(rsi,rdx)/rcx where r8=(B-1)/rcx\n;\trax=carry out\n\n;\tspecial case rcx=ffff  r8=1\n\n;\tThe two imul's are only needed if want strict compatibility with\n;\tmpn_divexact_1 when the division is not exact\n\n\tGLOBAL_FUNC mpn_divexact_byff\n\nmov     r10d, 3\nlea     rsi, [rsi+rdx*8-24]\nlea     rdi, [rdi+rdx*8-24]\n; r9 is our carry in\nmov     r9, 0\nmov\tr8, 1\nmov\trcx,0xFFFFFFFFFFFFFFFF\n; imul %r8,%r9 this is needed if we have non-zero carry in\nsub     r10, rdx\njnc     skiploop\nalign 16\nlp:\n\tmov     rax, [rsi+r10*8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8], r9\n\tsbb     r9, rdx\n\tmov     rax, [rsi+r10*8+8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8+8], r9\n\tsbb     r9, rdx\n\tmov     rax, [rsi+r10*8+16]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8+16], r9\n\tsbb     r9, rdx\n\tmov     rax, [rsi+r10*8+24]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8+24], r9\n\tsbb     r9, rdx\n\tadd     r10, 4\n\tjnc     lp\nskiploop:\ntest    r10, 2\njnz     skip\n\tmov     rax, [rsi+r10*8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8], r9\n\tsbb     r9, rdx\n\tmov     rax, [rsi+r10*8+8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8+8], r9\n\tsbb     r9, rdx\n\tadd     r10, 2\nskip:\ntest    r10, 1\njnz     fin\n\tmov     rax, [rsi+r10*8]\n\tmul     r8\n\tsub     r9, rax\n\tmov     [rdi+r10*8], r9\n\tsbb     r9, rdx\nfin:\nimul    r9, rcx\nmov     rax, r9\nneg     rax\nret\nend\n"
  },
  {
    "path": "mpn/x86_64/nehalem/divrem_hensel_qr_1_2.asm",
    "content": "dnl  X86_64 mpn_divrem_hensel_qr_1_2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=(rsi,rdx) / rcx      rdx>=1\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\nC\tThis is same as the shifting version but with  no shifting\n\nASM_START()\nPROLOGUE(mpn_divrem_hensel_qr_1_2)\nmov $1,%r9\nsub %rdx,%r9\nlea -8(%rdi,%rdx,8),%rdi\nlea -8(%rsi,%rdx,8),%rsi\n\npush %r12\npush %r13\npush %r14\n\nmov %rcx,%rdx\t\nC // rdx is 3 bit inverse\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 4 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC //rdx has 8 bits\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 16 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC // rdx has 32 bits\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\t\nC //r11 has 64 bits\n\nmov %r11,%rax\nmov %r11,%r12\nmul %rcx\nneg %rdx\nimul %rdx,%r12\t\nC // r12,r11 has 128 bits\n\nmov %r11,%r13\nmov %r12,%r14\n\nmov (%rsi,%r9,8),%r11\nmov 8(%rsi,%r9,8),%r12\nmov $0,%r10\nadd $2,%r9\njc L(skiplp)\nALIGN(16)\nL(lp):\n\tmov %r12,%r8\n\tmov %r13,%rax\n\tmul %r11\n\tmov %rax,-16(%rdi,%r9,8)\n\timul %r14,%r11\n\timul %r13,%r12\n\tadd %r11,%rdx\n\tadd %r12,%rdx\n\t\tmov (%rsi,%r9,8),%r11\n\t\tmov 8(%rsi,%r9,8),%r12\n\tmov %rdx,-8(%rdi,%r9,8)\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb $0,%r12\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r12\n\t\tsbb $0,%r10\n\tadd $2,%r9\n\tjnc L(lp)\nL(skiplp):\nmov %r12,%r8\nmov %r13,%rax\t\nmul %r11\nmov %rax,-16(%rdi,%r9,8)\nimul %r14,%r11\t\nimul %r13,%r12\nadd %r11,%rdx\nadd %r12,%rdx\ncmp $0,%r9\njne L(case0)\nL(case1):\n\t\tmov (%rsi,%r9,8),%r11\n\tmov %rdx,-8(%rdi,%r9,8)\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r10\n\tmov %r11,%rax\n\timul %r13,%rax\n\tmov %rax,(%rdi,%r9,8)\n\tmul %rcx\n\tadd %r10,%r10\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nL(case0):\n\tmov %rdx,-8(%rdi,%r9,8)\t\n\tmov %rcx,%rax\t\n\tmul %rdx\t\n\tcmp %rax,%r8\t\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tsub %r10,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/hamdist.asm",
    "content": "dnl  mpn_hamdist\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_hamdist)\nmov $1,%rcx\nlea -8(%rsi,%rdx,8),%rsi\nlea -8(%rdi,%rdx,8),%rdi\nxor %eax,%eax\nsub %rdx,%rcx\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmov (%rdi,%rcx,8),%r8                                                                                                                                                               \n\txor (%rsi,%rcx,8),%r8                                                                                                                                                               \n\tpopcnt %r8,%r8                                                                                                                                                                      \n\tmov 8(%rdi,%rcx,8),%r9                                                                                                                                                              \n\txor 8(%rsi,%rcx,8),%r9                                                                                                                                                              \n\tpopcnt %r9,%r9                                                                                                                                                                      \n\tadd %r8,%rax                                                                                                                                                                        \n\tadd %r9,%rax                                                                                                                                                                        \n\tadd $2,%rcx\n\tjnc L(lp)\nL(skiplp):\njne L(fin)\n\tmov (%rdi,%rcx,8),%r8 \n\txor (%rsi,%rcx,8),%r8\n\tpopcnt %r8,%r8\n\tadd %r8,%rax \nL(fin):\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/ior_n.asm",
    "content": "dnl  mpn_ior_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_ior_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_ior_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu 16(%rsi,%r8,8),%xmm3\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tpor %xmm3,%xmm1\n\tadd $4,%r8\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmov 16(%rsi,%r8,8),%rcx\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tor %rcx,%rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\tor %rcx,%rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/iorn_n.asm",
    "content": "dnl  mpn_iorn_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_iorn_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nC\tiorn=  x | ~y  =   ~(~x & y)  = nandn (swap arg)\n\nASM_START()\nPROLOGUE(mpn_iorn_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\npcmpeqb %xmm4,%xmm4\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rsi,%r8,8),%xmm0\n\tmovdqu 16(%rsi,%r8,8),%xmm1\n\tmovdqu (%rdx,%r8,8),%xmm2\n\tadd $4,%r8\n\tmovdqu 16-32(%rdx,%r8,8),%xmm3\n\tpandn %xmm3,%xmm1\n\tpandn %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,-32(%rdi,%r8,8)\n\tpxor %xmm4,%xmm1\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rsi,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmovdqu (%rdx,%r8,8),%xmm2\n\tmov 16(%rsi,%r8,8),%rcx\n\tnot %rax\n\tor %rcx,%rax\n\tpandn %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rsi,%r8,8),%xmm0\n\tmovdqu (%rdx,%r8,8),%xmm2\n\tpandn %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\t\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\tnot %rax\n\tor %rcx,%rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/karaadd.asm",
    "content": "dnl  mpn_karaadd\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karaadd)\n# requires n>=8\npush %rbx\npush %rbp\npush %r12\npush %r13\npush %r14\npush %r15\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\npush %rdx\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nL(lp):\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi,%rcx,8),%r8\n\tadc 8(%rsi,%rcx,8),%r9\n\tadc 16(%rsi,%rcx,8),%r10\n\tadc 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\tadc 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc L(lp)\ncmp $2,%rcx\njg\tL(case0)\njz\tL(case1)\njp\tL(case2)\nL(case3):\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi),%r8\n\tadc 8(%rsi),%r9\n\tadc 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp L(fin)\nL(case2):\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rsi),%r8\n\tadc 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp L(fin)\nL(case1):\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tadd $1,%rdx\n\tmov %r12,(%rbp,%rcx,8)\nL(fin):\tmov $3,%rcx\nL(case0): \t#rcx=3\n\tpop %r8\n\tbt $0,%r8\n\tjnc L(notodd)\n\txor %r10,%r10\n\tmov (%rbp,%rdx,8),%r8\n\tmov 8(%rbp,%rdx,8),%r9\n\tadd (%rsi,%rdx,8),%r8\n\tadc 8(%rsi,%rdx,8),%r9\n\trcl $1,%r10\n\tadd %r8,24(%rbp)\n\tadc %r9,32(%rbp)\n\tadc %r10,40(%rbp)\nL(l7):\tadcq $0,24(%rbp,%rcx,8)\n\tlea 1(%rcx),%rcx\n\tjc L(l7)\n\tmov $3,%rcx\nL(notodd):\tand $3,%rax\n\tpopcnt %rax,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadc %r8,(%rdi,%rdx,8)\nL(l1):\tadcq $0,8(%rdi,%rdx,8)\n\tlea 1(%rdx),%rdx\n\tjc L(l1)\n\tand $7,%rbx\n\tpopcnt %rbx,%r8\n\tadd %r8,24(%rbp)\nL(l2):\tadcq $0,8(%rbp,%rcx,8)\n\tlea 1(%rcx),%rcx\n\tjc L(l2)\npop %r15\npop %r14\npop %r13\npop %r12\npop %rbp\npop %rbx\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/karasub.asm",
    "content": "dnl  mpn_karasub\n\ndnl  Copyright 2011,2012 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karasub)\n# requires n>=8\npush %rbx\npush %rbp\npush %r12\npush %r13\npush %r14\npush %r15\npush %rdx\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nL(lp):\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi,%rcx,8),%r8\n\tsbb 8(%rsi,%rcx,8),%r9\n\tsbb 16(%rsi,%rcx,8),%r10\n\tsbb 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\tsbb 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc L(lp)\ncmp $2,%rcx\njg\tL(case0)\njz\tL(case1)\njp\tL(case2)\nL(case3):\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi),%r8\n\tsbb 8(%rsi),%r9\n\tsbb 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp L(fin)\nL(case2):\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 8(%rsi),%r8\n\tsbb 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp L(fin)\nL(case1):\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tadd $1,%rdx\n\tmov %r12,(%rbp,%rcx,8)\nL(fin):\tmov $3,%rcx\nL(case0): \t#rcx=3\n\t#// store top two words of H as carrys could change them\n\tpop %r15\n\tbt $0,%r15\n\tjnc L(skipload)\n\tmov (%rbp,%rdx,8),%r12\n        mov 8(%rbp,%rdx,8),%r13\n\t#// the two carrys from 2nd to 3rd\nL(skipload):\tmov %rdx,%r11\n\txor %r8,%r8\n\tbt $1,%rax\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rdi,%rdx,8)\nL(l2):\tadcq $0,8(%rdi,%rdx,8)\n\tlea 1(%rdx),%rdx\n\tjc L(l2)\n\t# //the two carrys from 3rd to 4th\n\txor %r8,%r8\n\tbt $1,%rbx\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rbp,%rcx,8)\nL(l3):\tadcq $0,8(%rbp,%rcx,8)\n\tlea 1(%rcx),%rcx\n\tjc L(l3)\n\t#// now the borrow from 2nd to 3rd\n\tmov %r11,%rdx\n\tbt $0,%rax\nL(l1):\tsbbq $0,(%rdi,%rdx,8)\n\tlea 1(%rdx),%rdx\n\tjc L(l1)\n\t#// borrow from 3rd to 4th\n\tmov $3,%rcx\n\tbt $0,%rbx\nL(l4):\tsbbq $0,(%rbp,%rcx,8)\n\tlea 1(%rcx),%rcx\n\tjc L(l4)\n\t#// if odd the do next two\n\tmov $3,%rcx\n\tmov %r11,%rdx\n\tbt $0,%r15\n\tjnc L(notodd)\n\txor %r10,%r10\n\tsub (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%r10\n\tadd %r12,24(%rbp)\n\tadc %r13,32(%rbp)\n\tmov $0,%r8\n\tadc %r8,%r8\n\tbt $0,%r10\n\tsbb $0,%r8\nL(l7):\tadd %r8,16(%rbp,%rcx,8)\n\tadc $0,%r8\n\tadd $1,%rcx\n\tsar $1,%r8\n\tjnz L(l7)\nL(notodd):\t\npop %r15\npop %r14\npop %r13\npop %r12\npop %rbp\npop %rbx\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/lshift.asm",
    "content": "dnl  mpn_lshift\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tmpn_lshift(mp_ptr rdi,mp_ptr rsi,mp_size_t rdx,mp_limb_t rcx)\nC\trax=carry\n\nASM_START()\nPROLOGUE(mpn_lshift)\nC\t// odd and even n seem to have different runtimes\npush %rbx\nmov %rdx,%rbx\nlea 24(%rsi),%rsi\nlea 24(%rdi),%rdi\nmov -32(%rsi,%rbx,8),%rdx\nxor %rax,%rax\nshld %cl,%rdx,%rax\nsub $5,%rbx\njs L(skiplp)\nALIGN(16)\nL(lp):\n\tmov (%rsi,%rbx,8),%r8\n\tmov -24(%rsi,%rbx,8),%r11\n\tmov -8(%rsi,%rbx,8),%r9\n\tshld %cl,%r8,%rdx\n\tmov %rdx,8(%rdi,%rbx,8)\n\tmov %r11,%rdx\n\tmov -16(%rsi,%rbx,8),%r10\n\tshld %cl,%r9,%r8\n\tshld %cl,%r10,%r9\n\tmov %r8,(%rdi,%rbx,8)\n\tmov %r9,-8(%rdi,%rbx,8)\n\tshld %cl,%r11,%r10\n\tsub $4,%rbx\n\tmov %r10,16(%rdi,%rbx,8)\n\tjns L(lp)\nL(skiplp):\ncmp $-2,%rbx\nja L(case3)\nje L(case2)\njp L(case1)\nL(case0):\n\tshl %cl,%rdx\n\tmov %rdx,8(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nALIGN(16)\nL(case3):\n\tmov (%rsi,%rbx,8),%r8\n\tmov -8(%rsi,%rbx,8),%r9\n\tshld %cl,%r8,%rdx\n\tmov %rdx,8(%rdi,%rbx,8)\n\tmov -16(%rsi,%rbx,8),%r10\n\tshld %cl,%r9,%r8\n\tshld %cl,%r10,%r9\n\tmov %r8,(%rdi,%rbx,8)\n\tmov %r9,-8(%rdi,%rbx,8)\n\tshl %cl,%r10\n\tmov %r10,16-32(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nALIGN(16)\nL(case2):\n\tmov (%rsi,%rbx,8),%r8\n\tmov -8(%rsi,%rbx,8),%r9\n\tshld %cl,%r8,%rdx\n\tmov %rdx,8(%rdi,%rbx,8)\n\tshld %cl,%r9,%r8\n\tshl %cl,%r9\n\tmov %r8,(%rdi,%rbx,8)\n\tmov %r9,-8(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nALIGN(16)\nL(case1):\n\tmov (%rsi,%rbx,8),%r8\n\tshld %cl,%r8,%rdx\n\tmov %rdx,8(%rdi,%rbx,8)\n\tshl %cl,%r8\n\tmov %r8,(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/mod_1_1.asm",
    "content": "dnl  mpn_mod_1_1\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,2)  contains B^i % divisor\n\n\n#// 3 is the min size\nASM_START()\nPROLOGUE(mpn_mod_1_1)\npush %r13\nmov -8(%rsi,%rdx,8),%r13\nmov -16(%rsi,%rdx,8),%rax\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov %rdx,%rcx\n\txor %r11,%r11\n\tmov -24(%rsi,%rcx,8),%r10\n\tlea (%r8),%r8\n\tsub $3,%rcx\n\tlea (%r9),%r9\n\tjz L(skiplp)\nALIGN(16)\nL(lp):\tmul %r8\n\tadd %rax,%r10\n\tadc %rdx,%r11\n\tlea (%r13),%rax\n\tlea (%r11),%r13\n\tmul %r9\n\tadd %r10,%rax\n\tadc %rdx,%r13\n\txor %r11,%r11\n\tmov -8(%rsi,%rcx,8),%r10\n\tlea (%r8),%r8\n\tdec %rcx\n\tlea (%r9),%r9\n\tjnz L(lp)\nL(skiplp):\t\n\tmul %r8\n\tadd %rax,%r10\n\tadc %rdx,%r11\n\tlea (%r13),%rax\n\tlea (%r11),%r13\n\tmul %r9\n\tadd %r10,%rax\n\tadc %rdx,%r13\nC // r13,rax\nmov %rax,(%rdi)\nmov %r8,%rax\nmul %r13\nadd %rax,(%rdi)\nadc $0,%rdx\nmov %rdx,8(%rdi)\npop %r13\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/mod_1_2.asm",
    "content": "dnl  mpn_mod_1_2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,3)  contains B^i % divisor\n\n\nASM_START()\nPROLOGUE(mpn_mod_1_2)\nC // require rdx >=4\npush %r12\npush %r13\npush %r14\nmov -8(%rsi,%rdx,8),%r14\nmov -16(%rsi,%rdx,8),%r13\nmov -32(%rsi,%rdx,8),%r11\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov 16(%rcx),%r10\nmov %rdx,%rcx\nmov -24(%rsi,%rdx,8),%rax\nsub $6,%rcx\njc L(skiplp)\nALIGN(16)\nL(lp):\n\tmul %r8\n\tmov $0,%r12\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r9,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r10,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov 0(%rsi,%rcx,8),%r11\n\tmov %r12,%r14\n\tadc %rdx,%r14\n\tmov 8(%rsi,%rcx,8),%rax\n\tsub $2,%rcx\n\tjnc L(lp)\nL(skiplp):\n\tmul %r8\n\tmov $0,%r12\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r9,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r10,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov %r12,%r14\n\tadc %rdx,%r14\ncmp $-2,%rcx\nje L(case0)\nL(case1):\n\tmov 8(%rsi,%rcx,8),%r11\n\tmov $0,%r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r9,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov %r12,%r14\n\tadc %rdx,%r14\nL(case0):\t\nmov %r8,%rax\nmul %r14\nadd %rax,%r13\nadc $0,%rdx\nmov %r13,(%rdi)\nmov %rdx,8(%rdi)\npop %r14\npop %r13\npop %r12\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/mod_1_3.asm",
    "content": "dnl  mpn_mod_1_3\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,4)  contains B^i % divisor\n\nASM_START()\nPROLOGUE(mpn_mod_1_3)\nC // require rdx >=5\npush %r12\npush %r13\npush %r14\npush %r15\nmov -8(%rsi,%rdx,8),%r15\nmov -16(%rsi,%rdx,8),%r14\nmov -32(%rsi,%rdx,8),%rax\nmov -40(%rsi,%rdx,8),%r12\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov 16(%rcx),%r10\nmov 24(%rcx),%r11\nmov %rdx,%rcx\nsub $8,%rcx\njc L(skiplp)\nALIGN(16)\nC // r15 r14 -8() -16()=rax -24()=r12\nL(lp):\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 0(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tmov 8(%rsi,%rcx,8),%rax\n\tadc %rdx,%r15\n\tsub $3,%rcx\n\tjnc L(lp)\nL(skiplp):\nC // we have loaded up the next two limbs\nC // but because they are out of order we can have to do 3 limbs min\ncmp $-2,%rcx\njl L(case1)\nje L(case2)\nL(case3):\n\tC //two more limbs is 4 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 8(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tmov 16(%rsi,%rcx,8),%rax\n\tadc %rdx,%r15\n\tC // r15 r14 rax r12\n\tmov $0,%r13\n\tmul %r8\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r9,%rax\n\tmul %r14\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r15\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tC // r13 r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r12\n\tadc $0,%rdx\n\tmov %r12,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nALIGN(16)\nL(case2):\n\tC //two more limbs is 4 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 16(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tadc %rdx,%r15\n\tC // r15 r14 r12\n\tmov $0,%r13\n\tmov %r8,%rax\n\tmul %r14\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r9,%rax\n\tmul %r15\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tC // r13 r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r12\n\tadc $0,%rdx\n\tmov %r12,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nALIGN(16)\nL(case1):\n\tC // one more is 3 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12 \n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov %r13,%r15\n\tadc %rdx,%r15\n\tmov %r8,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tadc $0,%rdx\n\tmov %r14,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/mul_1.asm",
    "content": "dnl  mpn_mul_1\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_mul_1)\nmov $3,%r8\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nsub %rdx,%r8\nmov $0,%r9d\nmov $0,%r11d\nmov 24-24(%rsi,%r8,8),%rax\njnc L(skiplp)\nALIGN(16)\nL(lp):\tmul %rcx\n\tadd %rax,%r9\n\tmov %r9,(%rdi,%r8,8)\n\tmov $0,%r10d\n\tmov 8(%rsi,%r8,8),%rax\n\tadc %rdx,%r10\n\tmov $0,%r9d\n\tmul %rcx\n\tadd %rax,%r10\n\tmov $0,%r11d\n\tmov 16(%rsi,%r8,8),%rax\n\tmov %r10,8(%rdi,%r8,8)\n\tadc %rdx,%r11\n\tmul %rcx\n\tadd %rax,%r11\n\tmov 24(%rsi,%r8,8),%rax\n\tmov %r11,16(%rdi,%r8,8)\n\tadc %rdx,%r9\n\tadd $3,%r8\n\tjnc L(lp)\nL(skiplp):\ncmp $1,%r8\nja L(case0)\nje L(case1)\nL(case2):\tmul %rcx\n\tadd %rax,%r9\n\tmov %r9,(%rdi)\n\tmov $0,%r10d\n\tmov 8(%rsi),%rax\n\tadc %rdx,%r10\n\tmov $0,%r9d\n\tmul %rcx\n\tadd %rax,%r10\n\tmov $0,%r11d\n\tmov 16(%rsi),%rax\n\tmov %r10,8(%rdi)\n\tadc %rdx,%r11\n\tmul %rcx\n\tadd %rax,%r11\n\tmov %r11,16(%rdi)\n\tadc %rdx,%r9\n\tmov %r9,%rax\n\tret\nL(case1):\tmul %rcx\n\tadd %rax,%r9\n\tmov %r9,8(%rdi)\n\tmov $0,%r10d\n\tmov 16(%rsi),%rax\n\tadc %rdx,%r10\n\tmov $0,%r9d\n\tmul %rcx\n\tadd %rax,%r10\n\tmov $0,%r11d\n\tmov %r10,16(%rdi)\n\tadc %rdx,%r11\n\tmov %r11,%rax\n\tret\nL(case0):\tmul %rcx\n\tadd %rax,%r9\n\tmov %r9,16(%rdi)\n\tmov $0,%r10d\n\tadc %rdx,%r10\n\tmov %r10,%rax\n\tret\t\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/mul_2.asm",
    "content": "dnl  mpn_mul_2\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_mul_2)\npush %rbx\nmov 8(%rcx),%r8\nmov (%rcx),%rcx\nmov $3,%rbx\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nsub %rdx,%rbx\n\t#mul %r8\n\t#add %rax,%r9\n\t#adc %rdx,%r10\n\t#mov -16(%rsi,%rbx,8),%rax\n\t\tmov $0,%r11\n\t#mul %rcx\n\t#add %rax,%r9\n\t#adc %rdx,%r10\n\t#adc $0,%r11\n\t#mov -16(%rsi,%rbx,8),%rax\n\t#mul %r8\n\t#add %rax,%r10\n\t#mov -8(%rsi,%rbx,8),%rax\n\t#adc %rdx,%r11\n\t#mul %rcx\n\t#add %rax,%r10\n\t#mov %r9,-16(%rdi,%rbx,8)\n\t#adc %rdx,%r11\n\t\tmov $0,%r9\n\t#mov -8(%rsi,%rbx,8),%rax\n\t#adc $0,%r9\n\t#mul %r8\n\t#add %rax,%r11\n\t#mov %r10,-8(%rdi,%rbx,8)\n\tmov (%rsi,%rbx,8),%rax\n\tmov $0,%r10\n\t#adc %rdx,%r9\n\tmul %rcx\n\tadd %rax,%r11\n\tmov (%rsi,%rbx,8),%rax\n\tmov %r11,(%rdi,%rbx,8)\n\tadc %rdx,%r9\n\t#adc $0,%r10\n\t#add $3,%rbx\ncmp $0,%rbx\njge L(skiplp)\nALIGN(16)\nL(lp):\tmul %r8\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tmov 8(%rsi,%rbx,8),%rax\n\tmov $0,%r11\n\tmul %rcx\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tadc $0,%r11\n\tmov 8(%rsi,%rbx,8),%rax\n\tmul %r8\n\tadd %rax,%r10\n\tmov 16(%rsi,%rbx,8),%rax\n\tadc %rdx,%r11\n\tmul %rcx\n\tadd %rax,%r10\n\tmov %r9,8(%rdi,%rbx,8)\n\tadc %rdx,%r11\n\tmov $0,%r9\n\tmov 16(%rsi,%rbx,8),%rax\n\tadc $0,%r9\n\tmul %r8\n\tadd %rax,%r11\n\tmov %r10,16(%rdi,%rbx,8)\n\tmov 24(%rsi,%rbx,8),%rax\n\tmov $0,%r10\n\tadc %rdx,%r9\n\tmul %rcx\n\tadd %rax,%r11\n\tmov 24(%rsi,%rbx,8),%rax\n\tmov %r11,24(%rdi,%rbx,8)\n\tadc %rdx,%r9\n\tadc $0,%r10\n\tadd $3,%rbx\n\tjnc L(lp)\nL(skiplp):\ncmp $1,%rbx\nja L(case0)\nje L(case1)\nL(case2):\tmul %r8\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tmov 8(%rsi,%rbx,8),%rax\n\tmov $0,%r11\n\tmul %rcx\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tadc $0,%r11\n\tmov 8(%rsi,%rbx,8),%rax\n\tmul %r8\n\tadd %rax,%r10\n\tmov 16(%rsi,%rbx,8),%rax\n\tadc %rdx,%r11\n\tmul %rcx\n\tadd %rax,%r10\n\tmov %r9,8(%rdi,%rbx,8)\n\tadc %rdx,%r11\n\tmov $0,%r9\n\tmov 16(%rsi,%rbx,8),%rax\n\tadc $0,%r9\n\tmul %r8\n\tadd %rax,%r11\n\tmov %r10,16(%rdi,%rbx,8)\n\tadc %rdx,%r9\n\tmov %r11,24(%rdi,%rbx,8)\n\tmov %r9,%rax\n\tpop %rbx\n\tret\nL(case1):\tmul %r8\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tmov 8(%rsi,%rbx,8),%rax\n\tmov $0,%r11\n\tmul %rcx\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tadc $0,%r11\n\tmov 8(%rsi,%rbx,8),%rax\n\tmul %r8\n\tadd %rax,%r10\n\tadc %rdx,%r11\n\tmov %r9,8(%rdi,%rbx,8)\n\tmov %r10,16(%rdi,%rbx,8)\n\tmov %r11,%rax\n\tpop %rbx\n\tret\nL(case0):\tmul %r8\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tmov %r9,8(%rdi,%rbx,8)\n\tmov %r10,%rax\n\tpop %rbx\n\tret\nEPILOGUE()\n\n\n\n"
  },
  {
    "path": "mpn/x86_64/nehalem/mul_basecase.as",
    "content": "\n;  AMD64 mpn_mul_basecase\n\n;  Copyright 2008,2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n; C\t(rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n; C Version 1.0.7\n\n\n%macro addmul2lp 1\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+8], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-16], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi-8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+8], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi+rbx*8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+24], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+24], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tmov     [rdi], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-16], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi-8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+8], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n\talign   16\n%%1:\n\tmov     r10, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tmov     [rdi+rbx*8], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r10\n\tdb      0x26\n\tadd     r11, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+rbx*8+24]\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r11\n\tdb      0x26\n\tadd     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     rbx, 4\n\tmov     rax, [rsi+rbx*8]\n\tjnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n\tmov     rax, [rsi+8]\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmov     r12d, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r11\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n\tmov     rax, [rsi+16]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     [rdi+24], r10\n\tmov     [rdi+32], r11\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n\tmov     [rdi+24], r9\n\tmov     [rdi+32], r10\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n\tjz      %%2\n\talign   16\n%%1:\n\taddmul2pro%1\n\taddmul2lp %1\n\taddmul2epi%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tmov     r15, [rsp-40]\n\tret\n%endmacro\n\n%macro oldmulnext0 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tmov     [rdi+r11*8+24], rbx\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+32], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+r11*8+40], rdx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+24], r12\n\tmov     [rdi+r11*8+32], rdx\n\tinc     r8\n\tlea     rdi, [rdi+8]\n\tmov     r11, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     [rdi+r11*8+16], r10\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n\tmov     [rdi+r11*8+8], r9\n\tmov     [rdi+r11*8+16], r10\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tmul     r13\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+24]\n\tmov     r12d, 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+32]\n\tmul     r13\n\tadd     [rdi+24], rbx\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r12\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n\tmov     r13, [rcx+r8*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+24]\n\tmul     r13\n\tlea     rdi, [rdi+8]\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     r12d, 0\n\tmov     rax, [rsi+32]\n\tadc     r12, rdx\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+24], r12\n\tadc     rdx, 0\n\tmov     [rdi+32], rdx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n\tmov     r13, [rcx+r8*8]\n\tlea     rdi, [rdi+8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     ebx, 0\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r14*8]\n\tadd     [rdi+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tdb      0x26\n\tlea     rdi, [rdi+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tmov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r9\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n\toldmulnext%1\n\tjz      %%2\n\talign   16\n%%1:\n\toldaddmulpro%1\n\toldaddmulnext%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tret\n%endmacro\n\n\tASM_START\n\tGLOBAL_FUNC mpn_mul_basecase\n; the current mul does not handle case one \n\tcmp     rdx, 4\n\tjg      L_fiveormore\n\tcmp     rdx, 1\n\tje      L_one\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     r14, 5\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-40]\n\tlea     rcx, [rcx+r8*8]\n\tneg     r8\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rcx+r8*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tmov     r10d, 0\n\tmul     r13\n\tmov     [rdi+r11*8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     r11, 2\n\tja      L_oldcase3\n\tjz      L_oldcase2\n\tjp      L_oldcase1\nL_oldcase0:\n\toldmpn_muladdmul_1_int 0\nL_oldcase1:\n\toldmpn_muladdmul_1_int 1\nL_oldcase2:\n\toldmpn_muladdmul_1_int 2\nL_oldcase3:\n\toldmpn_muladdmul_1_int 3\n\talign   16\nL_fiveormore:\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     [rsp-40], r15\n\tmov     r14, 4\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-32]\n\tlea     rsi, [rsi+rdx*8-32]\n\tmov     r13, rcx\n\tmov     r15, r8\n\tlea     r13, [r13+r15*8]\n\tneg     r15\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tbt      r15, 0\n\tjnc     L_even\nL_odd:\n\tinc     rbx\n\tmov     r8, [r13+r15*8]\n\tmul     r8\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     L_mulskiploop\n\tmul1lp \nL_mulskiploop:\n\tmov     r10d, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     rbx, 2\n\tja      L_mul1case3\n\tjz      L_mul1case2\n\tjp      L_mul1case1\nL_mul1case0:\n\tmulnext0\n\tjmp     L_case0\nL_mul1case1:\n\tmulnext1\n\tjmp     L_case3\nL_mul1case2:\n\tmulnext2\n\tjmp     L_case2\nL_mul1case3:\n\tmulnext3\n\tjmp     L_case1\nL_even:\n\t; as all the mul2pro? are the same\n\tmul2pro0\n\tmul2lp \n\tcmp     rbx, 2\n\tja      L_mul2case0\n\tjz      L_mul2case1\n\tjp      L_mul2case2\nL_mul2case3:\n\tmul2epi3\nL_case3:\n\tmpn_addmul_2_int 3\nL_mul2case2:\n\tmul2epi2\nL_case2:\n\tmpn_addmul_2_int 2\nL_mul2case1:\n\tmul2epi1\nL_case1:\n\tmpn_addmul_2_int 1\nL_mul2case0:\n\tmul2epi0\nL_case0:\n\tmpn_addmul_2_int 0\n\talign   16\nL_one:\n\tmov     rax, [rsi]\n\tmul\tqword [rcx]\n\tmov     [rdi], rax\n\tmov     [rdi+8], rdx\n\tret\n\tend\n\n"
  },
  {
    "path": "mpn/x86_64/nehalem/nand_n.asm",
    "content": "dnl  mpn_nand_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_nand_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_nand_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\npcmpeqb %xmm4,%xmm4\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tadd $4,%r8\n\tmovdqu 16-32(%rsi,%r8,8),%xmm3\n\tpand %xmm3,%xmm1\n\tpand %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,-32(%rdi,%r8,8)\n\tpxor %xmm4,%xmm1\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tmov 16(%rsi,%r8,8),%rcx\n\tand %rcx,%rax\n\tpand %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tnot %rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpand %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\t\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\tand %rcx,%rax\n\tnot %rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/nior_n.asm",
    "content": "dnl  mpn_nior_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_nior_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_nior_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\npcmpeqb %xmm4,%xmm4\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tadd $4,%r8\n\tmovdqu 16-32(%rsi,%r8,8),%xmm3\n\tpor %xmm3,%xmm1\n\tpor %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,-32(%rdi,%r8,8)\n\tpxor %xmm4,%xmm1\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tmov 16(%rsi,%r8,8),%rcx\n\tor %rcx,%rax\n\tpor %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tnot %rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpor %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\t\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\tor %rcx,%rax\n\tnot %rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/popcount.asm",
    "content": "dnl  mpn_popcount\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_popcount)\nmov $5,%rcx\nlea -40(%rdi,%rsi,8),%rdi\nxor %eax,%eax\nsub %rsi,%rcx\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tpopcnt (%rdi,%rcx,8),%r8\n\tpopcnt 8(%rdi,%rcx,8),%r9\n\tpopcnt 16(%rdi,%rcx,8),%r10\n\tpopcnt 24(%rdi,%rcx,8),%rsi\n\tadd %r8,%rax\n\tadd %rsi,%rax\n\tadd %r9,%rax\n\tpopcnt 32(%rdi,%rcx,8),%r8\n\tpopcnt 40(%rdi,%rcx,8),%r9\n\tadd %r8,%rax\n\tadd %r10,%rax\n\tadd %r9,%rax\n\tadd $6,%rcx\n\tjnc L(lp)\nL(skiplp):\nlea L(case5)(%rip),%rdx\t#// in linux we can do this before the loop\nlea (%rcx,%rcx,8),%rcx\t#// rcx*9\nadd %rcx,%rdx\njmp *%rdx\nL(case5):\t#//rcx=0\n\tnop\n\tpopcnt (%rdi),%r8\t#// 5bytes\n\tadd %r8,%rax\t\t#// 3bytes\nL(case4):\t#//rcx=1\n\tpopcnt 8(%rdi),%r9\t#// 6bytes\n\tadd %r9,%rax\nL(case3):\tpopcnt 16(%rdi),%r10\n\tadd %r10,%rax\nL(case2):\tpopcnt 24(%rdi),%rsi\n\tadd %rsi,%rax\nL(case1):\tpopcnt 32(%rdi),%r8\n\tadd %r8,%rax\nL(case0):\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/redc_1.as",
    "content": "\n;  core2 mpn_redc_1\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rcx) = (rsi, rcx) + (rdx, rcx)   with the carry flag set for the carry\n;\tthis is the usual mpn_add_n with the final dec rax;adc rax,rax;ret  removed \n;\tand a jump where we have two rets\n\n%macro mpn_add 0\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tadd     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tjmp     %%2\n\talign 16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tadc     r11, [rdx]\n\tadc     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tadc     r9, [rdx-16]\n\tadc     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tadc     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n%%2:\n%endmacro\n\n;\t(rbx, rbp) = (rsi, rbp) - (rdx, rbp)\n%macro mpn_sub 0\n\tmov     rax, rbp\n\tand     rax, 3\n\tshr     rbp, 2\n\tcmp     rbp, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tsub     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n\tjmp     %%2\n\talign 16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tsbb     r11, [rdx]\n\tsbb     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rbx], r11\n\tmov     [rbx+8], r8\n\tlea     rbx, [rbx+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tsbb     r9, [rdx-16]\n\tsbb     r10, [rdx-8]\n\tmov     [rbx-16], r9\n\tdec     rbp\n\tmov     [rbx-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tsbb     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n%%2:\n%endmacro\n\n;\tchanges from standard addmul\n;\tchange  r8 to r12   and rcx to r13   and rdi to r8\n;\treemove ret and write last limb but to beginning\n%macro addmulloop 1\n\talign 16\n%%1:\n\tmov     r10, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tdb      0x26\n\tadc     rbx, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tdb      0x26\n\tadc     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     r11, 4\n\tmov     rax, [rsi+r11*8+8]\n\tjnc     %%1\n%endmacro\n\n%macro addmulpropro0 0\n\timul    r13, rcx\n\tlea     r8, [r8-8]\n%endmacro\n\n%macro addmulpro0 0\n\tmov     r11, r14\n\tlea     r8, [r8+8]\n\tmov     rax, [rsi+r14*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmov     r9d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, rax\n\tadc     r9, rdx\n\timul    r13, rcx\n\tadd     [r8+r11*8+32], r12\n\tadc     r9, 0\n\tsub     r15, 1\n\tmov     [r8+r14*8], r9\n%endmacro\n\n%macro addmulpropro1 0\n%endmacro\n\n%macro addmulpro1 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, 0\n\tsub     r15, 1\n\tmov     [r8+r14*8], r12\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro2 0\n%endmacro\n\n%macro addmulpro2 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext2 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     r13, [r8+r14*8+8]\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [r8+r14*8], rbx\n\tsub     r15, 1\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro3 0\n%endmacro\n\n%macro addmulpro3 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext3 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, 0\n\tmov     r13, [r8+r14*8+8]\n\tmov     [r8+r14*8], r10\n\tlea     r8, [r8+8]\n\tsub     r15, 1\n%endmacro\n\n;\tchange r8 to r12\n;\twrite top limb ax straight to mem dont return  (NOTE we WRITE NOT ADD)\n%macro mpn_addmul_1_int 1\n\taddmulpropro%1\n\talign 16\n%%1:\n\taddmulpro%1\n\tjge     %%2\n\taddmulloop %1\n%%2:\n\taddmulnext%1\n\tjnz     %%1\n\tjmp     end\n%endmacro\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_redc_1\n\tmov r9,r8\n\tmov r8,rsi\n\tmov rsi,rdx\n\tmov rdx,rcx\n\tmov rcx,r9\n\n\tcmp     rdx, 1\n\tje      one\n\tpush    r13\n\tpush    r14\n\tpush    rbx\n\tpush    r12\n\tpush    r15\n\tpush    rbp\n\tmov     r14, 5\n\tsub     r14, rdx\n;\tstore copys\n\tpush    rsi\n\tpush    r8\n\tlea     r8, [r8+rdx*8-40]\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rbp, rdx\n\tmov     r15, rdx\n\tmov     rax, r14\n\tand     rax, 3\n\tmov     r13, [r8+r14*8]\n\tje      case0\n\tjp      case3\n\tcmp     rax, 1\n\tje      case1\ncase2:\n\tmpn_addmul_1_int 2\n\talign 16\ncase0:\n\tmpn_addmul_1_int 0\n\talign 16\ncase1:\n\tmpn_addmul_1_int 1\n\talign 16\ncase3:\n\tmpn_addmul_1_int 3\n\talign 16\nend:\n\tmov     rcx, rbp\n\tpop     rdx\n\tlea     rsi, [rdx+rbp*8]\n\tmov     rbx, rdi\n\tmpn_add\n;\tmpnadd(rdi,rsi,rdx,rcx)\n\tpop     rdx\n\tjnc     skip\n\tmov     rsi, rbx\n\tmpn_sub\n;\tmpn_sub_n(rbx,rsi,rdx,rbp) we can certainly improve this sub\nskip:\n\tpop     rbp\n\tpop     r15\n\tpop     r12\n\tpop     rbx\n\tpop     r14\n\tpop     r13\n\tret\n\talign 16\none:\n\tmov     r9, [r8]\n\tmov     r11, [rsi]\n\timul    rcx, r9\n\tmov     rax, rcx\n\tmul     r11\n\tadd     rax, r9\n;\trax is zero here\n\tadc     rdx, [r8+8]\n\tcmovnc  r11, rax\n\tsub     rdx, r11\n\tmov     [rdi], rdx\n\tret\n"
  },
  {
    "path": "mpn/x86_64/nehalem/rsh1add_n.as",
    "content": "\n;  AMD64 mpn_rsh1add_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,rcx)=((rsi,rcx)+(rdx,rcx))/2 return low bit of sum\n\n\tGLOBAL_FUNC mpn_rsh1add_n\n\tlea     rdi, [rdi+rcx*8-32]\n\tlea     rsi, [rsi+rcx*8-32]\n\tlea     rdx, [rdx+rcx*8-32]\n\tpush    r12\n\tpush    rbx\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tadd     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     L_skiplp\nL_lp:\n\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\tadc     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     L_lp\nL_skiplp:\n\tcmp     r8, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tadc     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tpop     rbx\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/nehalem/rsh1sub_n.as",
    "content": "\n;  AMD64 mpn_rsh1sub_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi,rcx)=((rsi,rcx)-(rdx,rcx))/2\n; return bottom bit of difference\n; subtraction treated as two compliment\n\n\tGLOBAL_FUNC mpn_rsh1sub_n\n\tlea     rdi, [rdi+rcx*8-32]\n\tlea     rsi, [rsi+rcx*8-32]\n\tlea     rdx, [rdx+rcx*8-32]\n\tpush    r12\n\tpush    rbx\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tsub     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     L_skiplp\nL_lp:\n\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\tsbb     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     L_lp\nL_skiplp:\n\tcmp     r8, 2\n\tja      L_case0\n\tjz      L_case1\n\tjp      L_case2\nL_case3:\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case2:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case1:\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tpop     rbx\n\tpop     r12\n\tret\n\talign   16\nL_case0:\n\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tpop     rbx\n\tpop     r12\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/nehalem/rsh_divrem_hensel_qr_1_2.asm",
    "content": "dnl  X86_64 mpn_rsh_divrem_hensel_qr_1_2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=( (rsi,rdx)-r9 / rcx ) >> r8    rdx>=1\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1_2 with shifting on the output of the quotient\n\ndefine(`MOVQ',`movd')\n\nASM_START()\nPROLOGUE(mpn_rsh_divrem_hensel_qr_1_2)\nC\t// 3limb minimum for the mo\nmov %r9,%r10\nmov $2,%r9\nsub %rdx,%r9\nlea -16(%rdi,%rdx,8),%rdi\nlea -16(%rsi,%rdx,8),%rsi\n\npush %r12\npush %r13\npush %r14\n\nmov %rcx,%rdx\t\nC // rdx is 3 bit inverse\n\nmov $64,%rax\nsub %r8,%rax\nMOVQ %r8,%mm0\nMOVQ %rax,%mm1\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 4 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC //rdx has 8 bits\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 16 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC // rdx has 32 bits\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\t\nC //r11 has 64 bits\n\nmov %r11,%rax\nmov %r11,%r12\nmul %rcx\nneg %rdx\nimul %rdx,%r12\t\nC // r12,r11 has 128 bits\n\nC // for the first limb we can not store (as we have to shift) so we need to\nC // do first limb separately , we could do it as normal as an extention of\nC // the loop , but if we do it as a 1 limb inverse then we can start it\nC // eailer , ie interleave it with the calculation of the 2limb inverse\n\nmov %r11,%r13\nmov %r12,%r14\n\n\nmov (%rsi,%r9,8),%r11\nsub %r10,%r11\nsbb %r10,%r10\n\nimul %r13,%r11\nMOVQ %r11,%mm2\npsrlq %mm0,%mm2\nmov %rcx,%rax\nmul %r11\nmov 8(%rsi,%r9,8),%r11\nmov 16(%rsi,%r9,8),%r12\nadd %r10,%r10\nsbb %rdx,%r11\nsbb $0,%r12\nsbb %r10,%r10\n\n\nadd $2,%r9\njc L(skiplp)\nALIGN(16)\nL(lp):\n\tmov %r12,%r8\n\tmov %r13,%rax\n\tmul %r11\n\n\tMOVQ %rax,%mm3\n\tmovq %mm3,%mm4\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm4\n\tpor %mm3,%mm2\n\tmovq %mm2,-16(%rdi,%r9,8)\n\n\timul %r14,%r11\n\timul %r13,%r12\n\tadd %r11,%rdx\n\tadd %r12,%rdx\n\t\tmov 8(%rsi,%r9,8),%r11\n\t\tmov 16(%rsi,%r9,8),%r12\n\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb $0,%r12\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r12\n\t\tsbb $0,%r10\n\tadd $2,%r9\n\tjnc L(lp)\nL(skiplp):\nmov %r12,%r8\nmov %r13,%rax\nmul %r11\n\nMOVQ %rax,%mm3\nmovq %mm3,%mm4\npsllq %mm1,%mm3\npsrlq %mm0,%mm4\npor %mm3,%mm2\nmovq %mm2,-16(%rdi,%r9,8)\n\nimul %r14,%r11\nimul %r13,%r12\nadd %r11,%rdx\nadd %r12,%rdx\ncmp $0,%r9\njne L(case0)\nL(case1):\n\t\tmov 8(%rsi,%r9,8),%r11\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r10\n\tmov %r11,%rax\n\timul %r13,%rax\n\n\tMOVQ %rax,%mm3\n\tmovq %mm3,%mm4\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm4\n\tpor %mm3,%mm2\n\tmovq %mm2,(%rdi,%r9,8)\n\tmovq %mm4,8(%rdi,%r9,8)\n\n\tmul %rcx\n\tadd %r10,%r10\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\temms\n\tret\nL(case0):\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\tmovq %mm2,(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\tcmp %rax,%r8\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tsub %r10,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\temms\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/rshift.asm",
    "content": "dnl  mpn_rshift\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tmpn_rshift(mp_ptr rdi,mp_ptr rsi,mp_size_t rdx,mp_limb_t rcx)\nC\trax=carry\n\nASM_START()\nPROLOGUE(mpn_rshift)\nC\t//when n=1 mod4 seem to have different runtimes\npush %rbx\nmov $4,%rbx\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\nsub %rdx,%rbx\nxor %rax,%rax\nmov -8(%rsi,%rbx,8),%rdx\nshrd %cl,%rdx,%rax\ncmp $0,%rbx\njge L(skiplp)\t\nALIGN(16)\nL(lp):\n\tmov (%rsi,%rbx,8),%r8\n\tmov 24(%rsi,%rbx,8),%r11\n\tshrd %cl,%r8,%rdx\n\tmov 8(%rsi,%rbx,8),%r9\n\tshrd %cl,%r9,%r8\n\tmov %rdx,-8(%rdi,%rbx,8)\n\tmov %r8,(%rdi,%rbx,8)\n\tmov 16(%rsi,%rbx,8),%r10\n\tshrd %cl,%r10,%r9\n\tmov %r11,%rdx\n\tmov %r9,8(%rdi,%rbx,8)\n\tshrd %cl,%r11,%r10\n\tadd $4,%rbx\n\tmov %r10,-16(%rdi,%rbx,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%rbx\nja L(case0)\nje L(case1)\njp L(case2)\nL(case3):\n\tmov (%rsi,%rbx,8),%r8\n\tshrd %cl,%r8,%rdx\n\tmov 8(%rsi,%rbx,8),%r9\n\tshrd %cl,%r9,%r8\n\tmov %rdx,-8(%rdi,%rbx,8)\n\tmov %r8,(%rdi,%rbx,8)\n\tmov 16(%rsi,%rbx,8),%r10\n\tshrd %cl,%r10,%r9\n\tmov %r9,8(%rdi,%rbx,8)\n\tshr %cl,%r10\n\tmov %r10,16(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nALIGN(16)\nL(case2):\n\tmov (%rsi,%rbx,8),%r8\n\tshrd %cl,%r8,%rdx\n\tmov 8(%rsi,%rbx,8),%r9\n\tshrd %cl,%r9,%r8\n\tmov %rdx,-8(%rdi,%rbx,8)\n\tmov %r8,(%rdi,%rbx,8)\n\tshr %cl,%r9\n\tmov %r9,8(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nALIGN(16)\nL(case1):\n\tmov (%rsi,%rbx,8),%r8\n\tshrd %cl,%r8,%rdx\n\tshr %cl,%r8\n\tmov %rdx,-8(%rdi,%rbx,8)\n\tmov %r8,(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nALIGN(16)\nL(case0):\n\tshr %cl,%rdx\n\tmov %rdx,-8(%rdi,%rbx,8)\n\tpop %rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/store.asm",
    "content": "dnl  mpn_store\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\ndefine(`MOVQ',`movd')\n\nC\tmpn_store(mp_ptr,mp_size_t,mp_limb_t)\nC\trax          rdi,   rsi,    rdx\n\nASM_START()\nPROLOGUE(mpn_store)\nlea -32(%rdi),%rdi\ncmp $0,%rsi\njz L(case0)\nMOVQ %rdx,%xmm0\nmovddup %xmm0,%xmm0\ntest $0xF,%rdi\njz L(notodd)\n\tmov %rdx,32(%rdi)\n\tlea 8(%rdi),%rdi\n\tsub $1,%rsi\nL(notodd):\nsub $4,%rsi\njc L(skiplp)\nALIGN(16)\nL(lp):\n\tlea 32(%rdi),%rdi\n\tsub $4,%rsi\n\tmovdqa %xmm0,(%rdi)\n\tmovdqa %xmm0,16(%rdi)\n\tjnc L(lp)\nL(skiplp):\ncmp $-2,%rsi\nja L(case3)\njz L(case2)\njp L(case1)\nL(case0):\nret\nL(case3):// rsi=-1\n\tmovdqa %xmm0,32(%rdi)\nL(case1):\n\tmov %rdx,56(%rdi,%rsi,8)\n\tret\nL(case2):// rsi=-2\n\tmovdqa %xmm0,32(%rdi)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/sub_err1_n.asm",
    "content": "dnl  mpn_sub_err1_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC ret mpn_sub_err1(mp_ptr rp,mp_ptr up,mp_ptr vp,mp_ptr ep,mp_ptr_t yp,mp_size_t n,mp_limb_t cy)\nC rax                    rdi,      rsi,      rdx,      rcx,         r8           r9       8(rsp)=>r10\n\nASM_START()\nPROLOGUE(mpn_sub_err1_n)\n       C if we rearrange the params we could save some moves\n       C (rdi,r9)=(rsi,r9)+(rdx,r9)  sum=carry*(r8)\n       \n       mov 8(%rsp),%r10            C cy\n       mov %rbp,-16(%rsp)          C save rbp\n       lea -24(%rdi,%r9,8),%rdi    C rp += n - 3\n       mov %r12,-24(%rsp)          C save r12\n       mov %r13,-32(%rsp)          C save r13\n       lea -24(%rsi,%r9,8),%rsi    C up += n - 3\n       mov %r14,-40(%rsp)          C save r14\n       mov %r15,-48(%rsp)          C save r15\n       lea -24(%rdx,%r9,8),%rdx    C vp += n - 3\n       mov %rcx,-56(%rsp)\t       C save rcx\n       mov %rbx,-8(%rsp)           C save rbx\n       mov $3,%r11                 C i = 3\n       shl $63,%r10                \n       lea (%r8,%r9,8),%r8         C yp += n\n       sub %r9,%r11\t              C i = 3 - n\n       mov $0,%r9                  C t1 = 0\n       mov $0,%rax                 C t2 = 0\n       mov $0,%rbx                 C t3 = 0\n       jnc L(skiplp)                  C if done goto L(skiplp)\nALIGN(16)\nL(lp):\n\tmov (%rsi,%r11,8),%r12      C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13     C s2 = *(up + i + 1)\n\tmov 16(%rsi,%r11,8),%r14    C s3 = *(up + i + 2)\n\tmov 24(%rsi,%r11,8),%r15    C s4 = *(up + i + 3)\n\tmov $0,%rbp                 C t5 = 0\n\tshl $1,%r10                 C s1 -= *(vp + i + 0) + (cy & 1)\n\tsbb (%rdx,%r11,8),%r12      \n\tcmovc -8(%r8),%rax          C if borrow1, t2 = *(yp - 1)\n\tsbb 8(%rdx,%r11,8),%r13     C s2 -= *(vp + i + 1) + borrow1\n\tcmovc -16(%r8),%rbx         C if borrow2 t3 = *(yp - 2)\n\tmov $0,%rcx                 C t4 = 0\n\tsbb 16(%rdx,%r11,8),%r14    C s3 -= *(vp + i + 2) + borrow2\n\tcmovc -24(%r8),%rcx         C if borrow3 t4 = *(yp - 3)\n\tsbb 24(%rdx,%r11,8),%r15    C s4 -= *(vp + i + 3) + borrow3\n\tcmovc -32(%r8),%rbp         C if borrow4 t5 = *(yp - 4)\n\trcr $1,%r10                 C high bit of cy = borrow\n\tadd %rax,%r9                C t1 += t2\n\tadc $0,%r10                 C accumulate cy\n\tadd %rbx,%r9                C t1 += t2\n\tadc $0,%r10                 C accumulate cy\n\tadd %rcx,%r9                C t1 += t4\n\tmov $0,%rax                 C t2 = 0\n\tadc $0,%r10                 C accumulate cy\n\tlea -32(%r8),%r8            C yp -= 4\n\tadd %rbp,%r9                C t1 += t5\n\tadc $0,%r10                 C accumulate cy\n\tmov %r12,(%rdi,%r11,8)      C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)     C *(rp + i + 1) = s2\n\tmov %r14,16(%rdi,%r11,8)    C *(rp + i + 2) = s3\n\tmov %r15,24(%rdi,%r11,8)    C *(rp + i + 3) = s4\n\tmov $0,%rbx                 C t3 = 0\n\tadd $4,%r11                 C i += 4\n\tjnc L(lp)                     C not done, goto lp\nL(skiplp):\n       cmp $2,%r11             C cmp(i, 2)\n       mov -16(%rsp),%rbp      C restore rbp\n       mov -48(%rsp),%r15      C restore r15\n       ja L(case0)                C i == 3 goto L(case0) \n       je L(case1)                C i == 2 goto L(case1)\n       jp L(case2)                C i == 1 goto L(case2)\nL(case3):\n\tmov (%rsi,%r11,8),%r12     C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13    C s2 = *(up + i + 1)\n\tmov 16(%rsi,%r11,8),%r14   C s3 = *(up + i + 2) \n\tshl $1,%r10                C restore borrow1 from high bit of t1\n\tsbb (%rdx,%r11,8),%r12     C s1 -= *(vp + i + 0) + borrow1\n\tcmovc -8(%r8),%rax         C if borrow2 t2 = *(yp - 1)\n\tsbb 8(%rdx,%r11,8),%r13    C s2 -= *(vp + i + 1) + borrow2\n\tcmovc -16(%r8),%rbx        C if borrow3 t3 = *(yp - 2)\n\tmov $0,%rcx                C t4 = 0\n\tsbb 16(%rdx,%r11,8),%r14   C s3 -= *(vp + i + 3) + borrow3\n\tcmovc -24(%r8),%rcx        C if borrow4 t4 = *(yp - 3)\n\trcr $1,%r10                C store borrow4 in high bit of cy\n\tadd %rax,%r9               C t1 += t2\n\tadc $0,%r10                C accumulate cy\n\tadd %rbx,%r9               C t1 += t3\n\tadc $0,%r10                C accumulate cy\n\tadd %rcx,%r9               C t1 += t4\n\tadc $0,%r10                C accumulate cy\n\tmov %r12,(%rdi,%r11,8)     C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)    C *(rp + i + 1) = s2\n\tmov %r14,16(%rdi,%r11,8)   C *(rp + i + 2) = s3\n\tmov -56(%rsp),%rcx         C restore rcx\n\tmov %r9,(%rcx)             C ep[0] = t1\n\tbtr $63,%r10               C retrieve borrow out and reset bit of cy\n\tmov %r10,8(%rcx)           C ep[1] = cy\n\tmov -40(%rsp),%r14         C restore r14\n\tmov $0,%rax                \n\tmov -32(%rsp),%r13         C restore r13\n\tadc $0,%rax                C return borrow out\n\tmov -24(%rsp),%r12         C restore r12\n\tmov -8(%rsp),%rbx          C restore rbx\n\tret\nALIGN(16)\nL(case2):\n\tmov (%rsi,%r11,8),%r12   C s1 = *(up + i + 0)\n\tmov 8(%rsi,%r11,8),%r13  C s2 = *(up + i + 1)\n\tshl $1,%r10              C restore borrow1 from high bit of t1\n\tsbb (%rdx,%r11,8),%r12   C s1 -= *(vp + i + 0) + borrow1\n\tcmovc -8(%r8),%rax       C if borrow2 t2 = *(yp - 1)\n\tsbb 8(%rdx,%r11,8),%r13  C s2 -= *(vp + i + 1) + borrow2\n\tcmovc -16(%r8),%rbx      C if borrow3 t3 = *(yp - 2)\n\trcr $1,%r10              C store borrow3 in high bit of cy\n\tadd %rax,%r9             C t1 += t2\n\tadc $0,%r10              C accumulate cy\n\tadd %rbx,%r9             C t1 += t3\n\tadc $0,%r10              C accumulate cy\n\tmov %r12,(%rdi,%r11,8)   C *(rp + i + 0) = s1\n\tmov %r13,8(%rdi,%r11,8)  C *(rp + i + 1) = s2\n\tmov -56(%rsp),%rcx       C restore rcx\n\tmov %r9,(%rcx)           C ep[0] = t1\n\tbtr $63,%r10             C retrieve borrow out and reset bit of cy\n\tmov %r10,8(%rcx)         C ep[1] = cy\n\tmov -40(%rsp),%r14       C restore r14\n\tmov $0,%rax\n\tmov -32(%rsp),%r13       C restore r13\n\tadc $0,%rax              C return borrow out\n\tmov -24(%rsp),%r12       C restore r12\n\tmov -8(%rsp),%rbx        C restore rbx\n\tret\nALIGN(16)\nL(case1):\n\tmov (%rsi,%r11,8),%r12   C s1 = *(up + i + 0)\n\tshl $1,%r10              C restore borrow1 from high bit of t1\n\tsbb (%rdx,%r11,8),%r12   C s1 -= *(vp + i + 0) + borrow1\n\tcmovc -8(%r8),%rax       C if borrow2 t2 = *(yp - 1)\n\trcr $1,%r10              C store borrow3 in high bit of cy\n\tadd %rax,%r9             C t1 += t2\n\tadc $0,%r10              C accumulate cy\n\tmov %r12,(%rdi,%r11,8)   C *(rp + i + 0) = s1\nL(case0):\tmov -56(%rsp),%rcx       C restore rcx\n\tmov %r9,(%rcx)           C ep[0] = t1\n\tbtr $63,%r10             C retrieve borrow out and reset bit of cy\n\tmov %r10,8(%rcx)         C ep[1] = cy\n\tmov -40(%rsp),%r14       C restore r14\n\tmov $0,%rax\n\tmov -32(%rsp),%r13       C restore r13\n\tadc $0,%rax              C return borrow out\n\tmov -24(%rsp),%r12       C restore r12\n\tmov -8(%rsp),%rbx        C restore rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/sub_n.as",
    "content": "\n;  mpn_sub_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tret mpn_sub_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t)\n;\t(rdi,rcx)=(rsi,rcx)-(rdx,rcx) return rax=borrow\n\n\tGLOBAL_FUNC mpn_sub_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tsub     rcx, rax\n\tlea     rdi, [rdi+rcx*8]\n\tlea     rsi, [rsi+rcx*8]\n\tlea     rdx, [rdx+rcx*8]\n\tneg     rcx\n\tcmp     rcx, 0\n\tjz      L_skiplp\n\talign   16\nL_lp:\n\tmov     r8, [rsi+rcx*8]\n\tmov     r10, [rsi+rcx*8+16]\n\tsbb     r8, [rdx+rcx*8]\n\tmov     [rdi+rcx*8], r8\n\tmov     r9, [rsi+rcx*8+8]\n\tsbb     r9, [rdx+rcx*8+8]\n\tmov     [rdi+rcx*8+8], r9\n\tlea     rcx, [rcx+4]\n\tmov     r11, [rsi+rcx*8-8]\n\tsbb     r10, [rdx+rcx*8-16]\n\tsbb     r11, [rdx+rcx*8-8]\n\tmov     [rdi+rcx*8-16], r10\n\tmov     [rdi+rcx*8-8], r11\n\tjrcxz   L_exitlp\n\tjmp     L_lp\nL_exitlp:\n\tsbb     rcx, rcx\nL_skiplp:\n\tcmp     rax, 2\n\tja      L_case3\n\tjz      L_case2\n\tjp      L_case1\nL_case0:\n\tsub     rax, rcx\n\tret\n\talign   16\nL_case1:\n\tadd     rcx, rcx\n\tmov     r8, [rsi]\n\tsbb     r8, [rdx]\n\tmov     [rdi], r8\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\talign   16\nL_case3:\n\tadd     rcx, rcx\n\tmov     r8, [rsi]\n\tmov     r10, [rsi+16]\n\tsbb     r8, [rdx]\n\tmov     [rdi], r8\n\tmov     r9, [rsi+8]\n\tsbb     r9, [rdx+8]\n\tmov     [rdi+8], r9\n\tsbb     r10, [rdx+16]\n\tmov     [rdi+16], r10\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\talign   16\nL_case2:\n\tadd     rcx, rcx\n\tmov     r8, [rsi]\n\tsbb     r8, [rdx]\n\tmov     [rdi], r8\n\tmov     r9, [rsi+8]\n\tsbb     r9, [rdx+8]\n\tmov     [rdi+8], r9\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/nehalem/subadd_n.asm",
    "content": "dnl  mpn_subadd\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_subadd_n)\npush %rbx\nxor %r9,%r9\npush %rbp\nxor %rax,%rax\nmov $3,%r10d\nsub %r8,%r10\nlea -24(%rdi,%r8,8),%rdi\nlea -24(%rsi,%r8,8),%rsi\nlea -24(%rdx,%r8,8),%rdx\nlea -24(%rcx,%r8,8),%rcx\njnc L(skiplp)\n.align 16\nL(lp):    .byte 0x9e      # sahf\t\n\tmov (%rsi,%r10,8),%r8\n\tsbb (%rcx,%r10,8),%r8\n\tmov 8(%rsi,%r10,8),%rbx\n\tsbb 8(%rcx,%r10,8),%rbx\n\tmov 24(%rsi,%r10,8),%r11\n\tmov 16(%rsi,%r10,8),%rbp\n\tsbb 16(%rcx,%r10,8),%rbp\n\tsbb 24(%rcx,%r10,8),%r11\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb (%rdx,%r10,8),%r8\n\tsbb 8(%rdx,%r10,8),%rbx\n\tmov %r8,(%rdi,%r10,8)\n\tsbb 16(%rdx,%r10,8),%rbp\n\tsbb 24(%rdx,%r10,8),%r11\n\tsetc %r9b\n\tmov %r11,24(%rdi,%r10,8)\n\tmov %rbp,16(%rdi,%r10,8)\n\tmov %rbx,8(%rdi,%r10,8)\n\tadd $4,%r10\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r10\njg L(case0)\nje L(case1)\njp L(case2)\nL(case3):  .byte 0x9e      # sahf\t\n\tmov (%rsi),%r8\n\tsbb (%rcx),%r8\n\tmov 8(%rsi),%rbx\n\tsbb 8(%rcx),%rbx\n\tmov 16(%rsi),%rbp\n\tsbb 16(%rcx),%rbp\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb (%rdx),%r8\n\tsbb 8(%rdx),%rbx\n\tmov %r8,(%rdi)\n\tsbb 16(%rdx),%rbp\n\tsetc %r9b\n\tmov %rbp,16(%rdi)\n\tmov %rbx,8(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tadc $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case2):  .byte 0x9e      # sahf\t\n\tmov 8(%rsi),%r8\n\tsbb 8(%rcx),%r8\n\tmov 16(%rsi),%rbx\n\tsbb 16(%rcx),%rbx\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb 8(%rdx),%r8\n\tsbb 16(%rdx),%rbx\n\tmov %r8,8(%rdi)\n\tsetc %r9b\n\tmov %rbx,16(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tadc $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case1):  .byte 0x9e      # sahf\t\n\tmov 16(%rsi),%r8\n\tsbb 16(%rcx),%r8\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb 16(%rdx),%r8\n\tmov %r8,16(%rdi)\n\tsetc %r9b\nL(case0):  .byte 0x9e      # sahf\t\n\tmov $0,%eax\n\tadc $0,%rax\n\tadd $255,%r9b\n\tadc $0,%rax\t\n\tpop %rbp\n\tpop %rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/sublsh1_n.as",
    "content": "\n;  core2 mpn_sublsh1_n \n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rcx) = (rsi, rcx) - (rdx, rcx)<<1\n;\trax = borrow\n\t\n    BITS 64\n    \n   GLOBAL_FUNC mpn_sublsh1_n\n\tlea     rsi, [rsi+rcx*8]\n\tlea     rdx, [rdx+rcx*8]\n\tlea     rdi, [rdi+rcx*8]\n\tneg     rcx\n\txor     r9, r9\n\txor     rax, rax\n\ttest    rcx, 3\n\tjz      next\nlp1:\n\tmov     r10, [rsi+rcx*8]\n\tadd     r9, 1\n\tsbb     r10, [rdx+rcx*8]\n\tsbb     r9, r9\n\tadd     rax, 1\n\tsbb     r10, [rdx+rcx*8]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tadd     rcx, 1\n\ttest    rcx, 3\n\tjnz     lp1\nnext:\n\tcmp     rcx, 0\n\tjz      end\n\tpush    r15\n\tpush    r14\n\tpush    r13\n\tpush    r12\n\tpush    rbx\n\talign 16\nlp:\n\tmov     r10, [rsi+rcx*8]\n\tmov     rbx, [rsi+rcx*8+8]\n\tmov     r11, [rsi+rcx*8+16]\n\tmov     r8, [rsi+rcx*8+24]\n\tmov     r12, [rdx+rcx*8]\n\tmov     r13, [rdx+rcx*8+8]\n\tmov     r14, [rdx+rcx*8+16]\n\tmov     r15, [rdx+rcx*8+24]\n\tadd     r9, 1\n\tsbb     r10, r12\n\tsbb     rbx, r13\n\tsbb     r11, r14\n\tsbb     r8, r15\n\tsbb     r9, r9\n\tadd     rax, 1\n\tsbb     r10, r12\n\tsbb     rbx, r13\n\tsbb     r11, r14\n\tsbb     r8, r15\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tmov     [rdi+rcx*8+8], rbx\n\tmov     [rdi+rcx*8+16], r11\n\tmov     [rdi+rcx*8+24], r8\n\tadd     rcx, 4\n\tjnz     lp\n\tpop     rbx\n\tpop     r12\n\tpop     r13\n\tpop     r14\n\tpop     r15\nend:\n\tadd     rax, r9\n\tneg     rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/nehalem/submul_1.as",
    "content": "; **************************************************************************\n;  Intel64 mpn_submul_1 -- Multiply a limb vector with a limb and\n;  subtract the result from a second limb vector.\n;\n;  Copyright (C) 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  This program is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2 of the License, or\n;  (at your option) any later version.\n;\n;  This program is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n;  GNU General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with this program; if not, write to the Free Software Foundation,\n;  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n;\n; **************************************************************************\n;\n;\n; CREDITS\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; *********************************************************************\n\n\n; With a 4-way unroll the code has\n;\n;         \tcycles/limb\n; Hammer:           4.6\n; Woodcrest:        4.6\n;\n; With increased unrolling, it appears to converge to 4 cycles/limb\n; on Intel Core 2 machines.  I believe that this is optimal, however\n; it requires such absurd unrolling that it becomes unusable for all\n; but the largest inputs.  A 4-way unroll seems like a good balance\n; to me because then commonly used input sizes (e.g. 1024bit Public\n; keys) still benifit from the speed up.\n\n;\n; This is just a check to see if we are in my code testing sandbox\n; or if we are actually in the GMP source tree\n;\n\n%include 'yasm_mac.inc'\n\n\n\n; *********************************************************************\n; *********************************************************************\n;\n; Here are the important macro parameters for the code\n;\n;      BpL is Bytes per Limb (8 since this is 64bit code)\n;\n;\tUNROLL_SIZE is a power of 2 for which we unroll the code.\n;                  possible values are 2,4,8,15,..., 256.  A reasonable\n;                  value is probably 4.  If really large inputs\n;                  are expected, then 16 is probably good.  Larger\n;                  values are really only useful for flashy\n;                  benchmarks and testing asymptotic behavior.\n;\n;      THRESHOLD is the minimum number of limbs needed before we bother\n;                using the complicated loop.  A reasonable value is\n;                2*UNROLL_SIZE + 6\n;\n; *********************************************************************\n; *********************************************************************\n%define\tBpL\t8\n%define\tUNROLL_SIZE\t4\n%define\tUNROLL_MASK\tUNROLL_SIZE-1\n%define\tTHRESHOLD\t2*UNROLL_SIZE+6\n\n; Here is a convenient Macro for addressing\n; memory.  Entries of the form\n;\n;      ADDR(ptr,index,displacement)\n;\n; get converted to\n;\n;      [displacement*BpL + ptr + index*BpL]\n;\n%define\tADDR(a,b,c)\t[c*BpL+a+b*BpL]\n\n\n; Register\tUsage\n; --------\t-----\n; rax\t\tlow word from mul\n; rbx*\n; rcx\t\ts2limb\n; rdx\t\thigh word from mul\n; rsi\t\ts1p\n; rdi\t\trp\n; rbp*\t\tBase Pointer\n; rsp*\t\tStack Pointer\n; r8\t\tA_x\n; r9\t\tA_y\n; r10\t\tA_z\n; r11\t\tB_x\n; r12*\t\tB_y\n; r13*\t\tB_z\n; r14*\t\ttemp\n; r15*\t\tindex\n;\n; * indicates that the register must be\n; preserved for the caller.\n%define\ts2limb\trcx\n%define\ts1p\trsi\n%define\trp\trdi\n%define\tA_x\tr8\n%define\tA_y\tr9\n%define\tA_z\tr10\n%define\tB_x\tr11\n%define\tB_y\tr12\n%define\tB_z\tr13\n%define\ttemp\tr14\n%define\tindex\tr15\n\n\t\n; INPUT PARAMETERS\n; rp\t\trdi\n; s1p\t\trsi\n; n\t\trdx\n; s2limb\trcx\n\tBITS\t64\nGLOBAL_FUNC mpn_submul_1\n\t\t\t\t\t; Compare the limb count\n\t\t\t\t\t; with the threshold value.\n\t\t\t\t\t; If the limb count is small\n\t\t\t\t\t; we just use the small loop,\n\t\t\t\t\t; otherwise we jump to the\n\t\t\t\t\t; more complicated loop.\n\tcmp\trdx,THRESHOLD\n\tjge\tL_mpn_submul_1_main_loop_prep\n\tmov\tr11,rdx\n\tlea\trsi,[rsi+rdx*8]\n\tlea\trdi,[rdi+rdx*8]\n\tneg\tr11\n\txor\tr8, r8\n\txor\tr10, r10\n\tjmp\tL_mpn_submul_1_small_loop\n\t\n\talign\t16\nL_mpn_submul_1_small_loop:\n\tmov\trax,[rsi+r11*8]\n\tmul\trcx\n\tadd\trax,r8\n\tadc\trdx,r10\n\tsub\t[rdi+r11*8],rax\n\tmov\tr8,r10\n\tadc\tr8,rdx\n\tinc\tr11\n\tjne\tL_mpn_submul_1_small_loop\n\n\tmov\trax,r8\n\tret\n\nL_mpn_submul_1_main_loop_prep:\n\tpush\tr15\n\tpush\tr14\n\tpush\tr13\n\tpush\tr12\n\t\t\t\t; If n is even, we need to do three\n\t\t\t\t; pre-multiplies, if n is odd we only\n\t\t\t\t; need to do two.\n\tmov\ttemp,rdx\n\tmov\tindex,0\n\tmov\tA_x,0\n\tmov\tA_y,0\n\tand\trdx,1\n\tjnz\tL_mpn_submul_1_odd_n\n\n\t\t\t\t\t; Case n is even\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tsub\tADDR(rp,index,0),rax\n\tadc\tA_x,rdx\n\tadd\tindex,1\n\t\t\t\t\t; At this point\n\t\t\t\t\t;  temp = n (even)\n\t\t\t\t\t; index = 1\n\nL_mpn_submul_1_odd_n:\n\t\t\t\t\t; Now\n\t\t\t\t\t; temp = n\n\t\t\t\t\t; index = 1 if n even\n\t\t\t\t\t;       = 0 if n odd\n\t\t\t\t\t;\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tmov\tA_z,ADDR(rp,index,0)\n\tadd\tA_x,rax\n\tadc\tA_y,rdx\n\n\tmov\trax,ADDR(s1p,index,1)\n\tmul\ts2limb\n\tmov\tB_z,ADDR(rp,index,1)\n\tmov\tB_x,rax\n\tmov\tB_y,rdx\n\tmov\trax,ADDR(s1p,index,2)\n\n\tadd\tindex,3\n\tlea\ts1p,ADDR(s1p,temp,-1)\n\tlea\trp,ADDR(rp,temp,-1)\n\tneg\ttemp\n\tadd\tindex,temp\n\t\t\t\t; At this point:\n\t\t\t\t; s1p   = address of last s1limb\n\t\t\t\t; rp    = address of last rplimb\n\t\t\t\t; temp  = -n\n\t\t\t\t; index = 4 - n if n even\n\t\t\t\t;       = 3 - n if n odd\n\t\t\t\t;\n\t\t\t\t; So, index is a (negative) even\n\t\t\t\t; number.\n\t\t\t\t;\n\t\t\t\t; *****************************************\n\t\t\t\t; ATTENTION:\n\t\t\t\t;\n\t\t\t\t; From here on, I will use array\n\t\t\t\t; indexing notation in the comments\n\t\t\t\t; because it is convenient.  So, I\n\t\t\t\t; will pretend that index is positive\n\t\t\t\t; because then a comment like\n\t\t\t\t;      B_z = rp[index-1]\n\t\t\t\t; is easier to read.\n\t\t\t\t; However, keep in mind that index is\n\t\t\t\t; actually a negative number indexing\n\t\t\t\t; back from the end of the array.\n\t\t\t\t; This is a common trick to remove one\n\t\t\t\t; compare operation from the main loop.\n\t\t\t\t; *****************************************\n\n\t\t\t\t;\n\t\t\t\t; Now we enter a spin-up loop the\n\t\t\t\t; will make sure that the index is\n\t\t\t\t; a multiple of UNROLL_SIZE before\n\t\t\t\t; going to our main unrolled loop.\n\tmov\ttemp,index\n\tneg\ttemp\n\tand\ttemp,UNROLL_MASK\n\tjz\tL_mpn_submul_1_main_loop\n\tshr\ttemp,1\nL_mpn_submul_1_main_loop_spin_up:\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tsub\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,1)\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index+1]\n\tmul\ts2limb\n\tsub\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,2)\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,-1),B_z\n\tmov\tB_z,ADDR(rp,index,1)\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n\n\tadd\tindex,2\n\tsub\ttemp,1\n\tjnz\tL_mpn_submul_1_main_loop_spin_up\n\t\n\tjmp\tL_mpn_submul_1_main_loop\n\t\n\talign\t16\nL_mpn_submul_1_main_loop:\n\t\t\t\t; The code here is really the same\n\t\t\t\t; logic as the spin-up loop.  It's\n\t\t\t\t; just been unrolled.\n%assign\tunroll_index 0\n%rep\tUNROLL_SIZE/2\n\tmul\ts2limb\n\tsub\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+1))\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,(2*unroll_index-2)),A_z\n\tmov\tA_z,ADDR(rp,index,(2*unroll_index))\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\n\tmul\ts2limb\n\tsub\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+2))\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,(2*unroll_index-1)),B_z\n\tmov\tB_z,ADDR(rp,index,(2*unroll_index+1))\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n%assign\tunroll_index\tunroll_index+1\n%endrep\n\n\n\tadd\tindex,UNROLL_SIZE\n\tjnz\tL_mpn_submul_1_main_loop\n\nL_mpn_submul_1_finish:\t\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tsub\tA_z,A_x\n\tmov\tA_x,rax\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_x,A_y\n\tadc\tB_y,0\n\tmov\tA_y,rdx\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\tsub\tB_z,B_x\n\tmov\tADDR(rp,index,-1),B_z\n\tadc\tA_x,B_y\n\tadc\tA_y,0\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index] + carry_in\n\t\t\t\t; A_y = high_mul[index] + CF\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t;\n\tsub\tA_z,A_x\n\tmov\tADDR(rp,index,0),A_z\n\tadc\tA_y,0\n\n\tmov\trax,A_y\n\tpop\tr12\n\tpop\tr13\n\tpop\tr14\n\tpop\tr15\n\tret\n"
  },
  {
    "path": "mpn/x86_64/nehalem/sumdiff_n.asm",
    "content": "dnl  mpn_sumdiff\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_sumdiff_n)\npush %rbx\nxor %r9,%r9\npush %rbp\nxor %rax,%rax\npush %r12\nmov $3,%r10d\npush %r13\nlea -24(%rdi,%r8,8),%rdi\npush %r14\nlea -24(%rsi,%r8,8),%rsi\npush %r15\nsub %r8,%r10\nlea -24(%rdx,%r8,8),%rdx\nlea -24(%rcx,%r8,8),%rcx\njnc L(skiplp)\n.align 16\nL(lp):     .byte 0x9e      # sahf\t\n\tmov (%rdx,%r10,8),%r8\n\tmov 24(%rdx,%r10,8),%r12\n\tmov %r8,%r11\n\tadc (%rcx,%r10,8),%r8\n\tmov 8(%rdx,%r10,8),%rbx\n\tmov %rbx,%r13\n\tadc 8(%rcx,%r10,8),%rbx\n\tmov 16(%rdx,%r10,8),%rbp\n\tmov %rbp,%r14\n\tadc 16(%rcx,%r10,8),%rbp\n\tmov %r12,%r15\n\tadc 24(%rcx,%r10,8),%r12\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb (%rcx,%r10,8),%r11\n\tmov %r11,(%rsi,%r10,8)\n\tsbb 8(%rcx,%r10,8),%r13\n\tsbb 16(%rcx,%r10,8),%r14\n\tsbb 24(%rcx,%r10,8),%r15\n\tsetc %r9b\n\tadd $4,%r10\n\tmov %r8,-32(%rdi,%r10,8)\n\tmov %rbp,16-32(%rdi,%r10,8)\n\tmov %r13,8-32(%rsi,%r10,8)\n\tmov %r15,24-32(%rsi,%r10,8)\n\tmov %r12,24-32(%rdi,%r10,8)\n\tmov %r14,16-32(%rsi,%r10,8)\n\tmov %rbx,8-32(%rdi,%r10,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r10\njg L(case0)\nje L(case1)\njp L(case2)\nL(case3):  .byte 0x9e      # sahf\t\n\tmov (%rdx),%r8\n\tmov %r8,%r11\n\tadc (%rcx),%r8\n\tmov 8(%rdx),%rbx\n\tmov %rbx,%r13\n\tadc 8(%rcx),%rbx\n\tmov 16(%rdx),%rbp\n\tmov %rbp,%r14\n\tadc 16(%rcx),%rbp\n        .byte 0x9f      # lahf\t\n\tadd $255,%r9b\n\tsbb (%rcx),%r11\n\tmov %r11,(%rsi)\n\tsbb 8(%rcx),%r13\n\tsbb 16(%rcx),%r14\n\tsetc %r9b\n\tmov %r8,(%rdi)\n\tmov %rbp,16(%rdi)\n\tmov %r13,8(%rsi)\n\tmov %r14,16(%rsi)\n\tmov %rbx,8(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%rax\n\tadc $0,%rax\n\tadd $255,%r9b\n\trcl $1,%rax\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case2):  .byte 0x9e      # sahf\t\n\tmov 8(%rdx),%r8\n\tmov %r8,%r11\n\tadc 8(%rcx),%r8\n\tmov 16(%rdx),%rbx\n\tmov %rbx,%r13\n\tadc 16(%rcx),%rbx\n        .byte 0x9f      # lahf\n\tadd $255,%r9b\n\tsbb 8(%rcx),%r11\n\tmov %r11,8(%rsi)\n\tsbb 16(%rcx),%r13\n\tsetc %r9b\n\tmov %r8,8(%rdi)\n\tmov %r13,16(%rsi)\n\tmov %rbx,16(%rdi)\n        .byte 0x9e      # sahf\t\n\tmov $0,%rax\n\tadc $0,%rax\n\tadd $255,%r9b\n\trcl $1,%rax\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tpop %rbp\n\tpop %rbx\n\tret\nL(case1):  .byte 0x9e      # sahf\t\n\tmov 16(%rdx),%r8\n\tmov %r8,%r11\n\tadc 16(%rcx),%r8\n        .byte 0x9f      # lahf\n\tadd $255,%r9b\n\tsbb 16(%rcx),%r11\n\tmov %r11,16(%rsi)\n\tsetc %r9b\n\tmov %r8,16(%rdi)\nL(case0):  .byte 0x9e      # sahf\t\n\tmov $0,%rax\n\tadc $0,%rax\n\tadd $255,%r9b\n\trcl $1,%rax\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tpop %rbp\n\tpop %rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/xnor_n.asm",
    "content": "dnl  mpn_xnor_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_xnor_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_xnor_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\npcmpeqb %xmm4,%xmm4\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tadd $4,%r8\n\tmovdqu 16-32(%rsi,%r8,8),%xmm3\n\tpxor %xmm3,%xmm1\n\tpxor %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,-32(%rdi,%r8,8)\n\tpxor %xmm4,%xmm1\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tmov 16(%rsi,%r8,8),%rcx\n\txor %rcx,%rax\n\tpxor %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tnot %rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpxor %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\t\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\txor %rcx,%rax\n\tnot %rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/nehalem/xor_n.asm",
    "content": "dnl  mpn_xor_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_xor_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_xor_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu 16(%rsi,%r8,8),%xmm3\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpxor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tpxor %xmm3,%xmm1\n\tadd $4,%r8\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmov 16(%rsi,%r8,8),%rcx\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpxor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\txor %rcx,%rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpxor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\txor %rcx,%rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/netburst/add_n.as",
    "content": ";  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n;\n;  Copyright 2005, 2006 Pierrick Gaudry\n;\n;  Copyright 2008 Brian Gladman, William Hart\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n;  Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit\n;  compiler and the YASM assembler.\n\n;  AMD64 mpn_add_n/mpn_sub_n -- mpn add or subtract.\n;\n;  Calling interface:\n;\n;  mp_limb_t __gmpn_<op>_n(    <op> = add OR sub\n;     mp_ptr dst,              rdi\n;     mp_srcptr src1,          rsi\n;     mp_srcptr src2,          rdx\n;     mp_size_t  len           rcx\n;  )\n;\n;  mp_limb_t __gmpn_<op>_nc(   <op> = add OR sub\n;     mp_ptr dst,              rdi\n;     mp_srcptr src1,          rsi\n;     mp_srcptr src2,          rdx\n;     mp_size_t len,           rcx\n;     mp_limb_t carry           r8 \n;  )\n;\n;  Calculate src1[size] plus(minus) src2[size] and store the result in\n;  dst[size].  The return value is the carry bit from the top of the result\n;  (1 or 0).  The _nc version accepts 1 or 0 for an initial carry into the\n;  low limb of the calculation.  Note values other than 1 or 0 here will\n;  lead to garbage results.\n\n%include 'yasm_mac.inc'\n\n%define dst       rdi   ; destination pointer\n%define sr1       rsi   ; source 1 pointer\n%define sr2       rdx   ; source 2 pointer\n%define len       rcx   ; number of limbs\n%define lend      ecx   ; number of limbs\n%define cy         r8   ; carry value\n\n%define r_jmp     r10   ; temporary for jump table entry\n%define r_cnt     r11   ; temporary for loop count\n\n%define UNROLL_LOG2         4\n%define UNROLL_COUNT        (1 << UNROLL_LOG2)\n%define UNROLL_MASK         (UNROLL_COUNT - 1)\n%define UNROLL_BYTES        (8 * UNROLL_COUNT)\n%define UNROLL_THRESHOLD    8\n\n%if UNROLL_BYTES >= 256\n%error unroll count is too large\n%elif UNROLL_BYTES >= 128\n%define off 128\n%else\n%define off 0\n%endif\n\n%macro  mac_sub  3\n\n;LOBAL_FUNC mpn_add_nc\n;    mov     rax,cy\n;    jmp     %%0\nGLOBAL_FUNC mpn_add_n\n    xor     rax,rax\n%%0:\n    movsxd  len,lend\n    cmp     len,UNROLL_THRESHOLD\n    jae     %%2\n    lea     sr1,[sr1+len*8]\n    lea     sr2,[sr2+len*8]\n    lea     dst,[dst+len*8]\n    neg     len\n    shr     rax,1\n%%1:\n    mov     rax,[sr1+len*8]\n    mov     r10,[sr2+len*8]\n    %1      rax,r10\n    mov     [dst+len*8],rax\n    inc     len\n    jnz     %%1\n    mov     rax,dword 0\n    setc    al\n    ret\n%%2:\n    mov     r_cnt,1\n    and     r_cnt,len\n    push    r_cnt\n    and     len,-2\n    mov     r_cnt,len\n    dec     r_cnt\n    shr     r_cnt,UNROLL_LOG2\n    neg     len\n    and     len,UNROLL_MASK\n    lea     r_jmp,[len*4]\n    neg     len\n    lea     sr1,[sr1+len*8+off]\n    lea     sr2,[sr2+len*8+off]\n    lea     dst,[dst+len*8+off]\n    shr     rax,1\n    lea     r_jmp,[r_jmp+r_jmp*2]\n\n%ifdef PIC\n    call    .pic_calc\n.unroll_here:\n..@unroll_here1:\n\n%else\n    lea     rax,[rel %%3]\n%endif\n\n    lea     r_jmp,[r_jmp+rax]\n    jmp     r_jmp\n\n%ifdef PIC\n\n.pic_calc:\n\n\tmov     rax, ..@unroll_entry1 - ..@unroll_here1\n\tadd     rax, [rsp]\n\tret\n\n%endif\n\n    align 32\n\n.unroll_entry1:\n..@unroll_entry1:\n%%3:\n\n%define CHUNK_COUNT  2\n%assign i 0\n\n%rep  UNROLL_COUNT / CHUNK_COUNT\n%assign  disp0 8 * i * CHUNK_COUNT - off\n\n    mov     r_jmp,[byte sr1+disp0]      ; len and r_jmp registers\n    mov     len,[byte sr1+disp0+8]      ; now not needed\n    %1      r_jmp,[byte sr2+disp0]\n    mov     [byte dst+disp0],r_jmp\n    %1      len,[byte sr2+disp0+8]\n    mov     [byte dst+disp0+8],len\n\n%assign i i + 1\n%endrep\n\n%if UNROLL_BYTES > 64\n    lea     sr1,[byte sr1+127]\n    inc     sr1\n%else\n    lea     sr1,[byte sr1+UNROLL_BYTES]\n%endif\n    dec     r_cnt\n    lea     sr2,[sr2+UNROLL_BYTES]\n    lea     dst,[dst+UNROLL_BYTES]\n    jns     %%3\n\n    pop     rax\n    dec     rax\n    js      %%5\n    mov     len,[sr1-off]\n    %1      len,[sr2-off]\n    mov     [dst-off],len\n%%5:mov     rax,dword 0\n    setc    al\n    ret\n\n%endmacro\n\n    BITS    64\n\n    mac_sub adc,mpn_add_n,mpn_add_nc\n"
  },
  {
    "path": "mpn/x86_64/netburst/addmul_1.as",
    "content": "; **************************************************************************\n;  Intel64 mpn_addmul_1 -- Multiply a limb vector with a limb and\n;  add the result to a second limb vector.\n;\n;  Copyright (C) 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  This program is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2 of the License, or\n;  (at your option) any later version.\n;\n;  This program is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n;  GNU General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with this program; if not, write to the Free Software Foundation,\n;  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n;\n; **************************************************************************\n;\n;\n; CREDITS\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; *********************************************************************\n\n\n; With a 4-way unroll the code has\n;\n;         \tcycles/limb\n; Hammer:           4.8\n; Woodcrest:        4.6\n;\n; With increased unrolling, it appears to converge to 4 cycles/limb\n; on Intel Core 2 machines.  I believe that this is optimal, however\n; it requires such absurd unrolling that it becomes unusable for all\n; but the largest inputs.  A 4-way unroll seems like a good balance\n; to me because then commonly used input sizes (e.g. 1024bit Public\n; keys) still benifit from the speed up.\n\n;\n; This is just a check to see if we are in my code testing sandbox\n; or if we are actually in the GMP source tree\n\n%include 'yasm_mac.inc'\n\n\n; *********************************************************************\n; *********************************************************************\n;\n; Here are the important macro parameters for the code\n;\n;      BpL is Bytes per Limb (8 since this is 64bit code)\n;\n;\tUNROLL_SIZE is a power of 2 for which we unroll the code.\n;                  possible values are 2,4,8,15,..., 256.  A reasonable\n;                  value is probably 4.  If really large inputs\n;                  are expected, then 16 is probably good.  Larger\n;                  values are really only useful for flashy\n;                  benchmarks and testing asymptotic behavior.\n;\n;      THRESHOLD is the minimum number of limbs needed before we bother\n;                using the complicated loop.  A reasonable value is\n;                2*UNROLL_SIZE + 6\n;\n; *********************************************************************\n; *********************************************************************\n%define\tBpL\t8\n%define\tUNROLL_SIZE\t4\n%define\tUNROLL_MASK\tUNROLL_SIZE-1\n%define\tTHRESHOLD\t2*UNROLL_SIZE+6\n\n; Here is a convenient Macro for addressing\n; memory.  Entries of the form\n;\n;      ADDR(ptr,index,displacement)\n;\n; get converted to\n;\n;      [displacement*BpL + ptr + index*BpL]\n;\n%define\tADDR(a,b,c)\t[c*BpL+a+b*BpL]\n\n\n; Register\tUsage\n; --------\t-----\n; rax\t\tlow word from mul\n; rbx*\n; rcx\t\ts2limb\n; rdx\t\thigh word from mul\n; rsi\t\ts1p\n; rdi\t\trp\n; rbp*\t\tBase Pointer\n; rsp*\t\tStack Pointer\n; r8\t\tA_x\n; r9\t\tA_y\n; r10\t\tA_z\n; r11\t\tB_x\n; r12*\t\tB_y\n; r13*\t\tB_z\n; r14*\t\ttemp\n; r15*\t\tindex\n; \n; * indicates that the register must be\n; preserved for the caller.\n%define\ts2limb\trcx\n%define\ts1p\trsi\n%define\trp\trdi\n%define\tA_x\tr8\n%define\tA_y\tr9\n%define\tA_z\tr10\n%define\tB_x\tr11\n%define\tB_y\tr12\n%define\tB_z\tr13\n%define\ttemp\tr14\n%define\tindex\tr15\n\n\t\n; INPUT PARAMETERS\n; rp\t\trdi\n; s1p\t\trsi\n; n\t\trdx\n; s2limb\trcx\n\tBITS\t64\nGLOBAL_FUNC mpn_addmul_1\n\t\t\t\t\t; Compare the limb count\n\t\t\t\t\t; with the threshold value.\n\t\t\t\t\t; If the limb count is small\n\t\t\t\t\t; we just use the small loop,\n\t\t\t\t\t; otherwise we jump to the\n\t\t\t\t\t; more complicated loop.\n\tcmp\trdx,THRESHOLD\n\tjge\tL_mpn_addmul_1_main_loop_prep\n\tmov\tr11,rdx\n\tlea\trsi,[rsi+rdx*8]\n\tlea\trdi,[rdi+rdx*8]\n\tneg\tr11\n\txor\tr8, r8\n\txor\tr10, r10\n\tjmp\tL_mpn_addmul_1_small_loop\n\t\n\talign\t16\nL_mpn_addmul_1_small_loop:\n\tmov\trax,[rsi+r11*8]\n\tmul\trcx\n\tadd\trax,[rdi+r11*8]\n\tadc\trdx,r10\n\tadd\trax,r8\n\tmov\tr8,r10\n\tmov\t[rdi+r11*8],rax\n\tadc\tr8,rdx\n\tinc\tr11\n\tjne\tL_mpn_addmul_1_small_loop\n\n\tmov\trax,r8\n\tret\n\nL_mpn_addmul_1_main_loop_prep:\n\tpush\tr15\n\tpush\tr14\n\tpush\tr13\n\tpush\tr12\n\t\t\t\t; If n is even, we need to do three\n\t\t\t\t; pre-multiplies, if n is odd we only\n\t\t\t\t; need to do two.\n\tmov\ttemp,rdx\n\tmov\tindex,0\n\tmov\tA_x,0\n\tmov\tA_y,0\n\tand\trdx,1\n\tjnz\tL_mpn_addmul_1_odd_n\n\n\t\t\t\t\t; Case n is even\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tadd\tADDR(rp,index,0),rax\n\tadc\tA_x,rdx\n\tadd\tindex,1\n\t\t\t\t\t; At this point\n\t\t\t\t\t;  temp = n (even)\n\t\t\t\t\t; index = 1\n\nL_mpn_addmul_1_odd_n:\n\t\t\t\t\t; Now\n\t\t\t\t\t; temp = n\n\t\t\t\t\t; index = 1 if n even\n\t\t\t\t\t;       = 0 if n odd\n\t\t\t\t\t;\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tmov\tA_z,ADDR(rp,index,0)\n\tadd\tA_x,rax\n\tadc\tA_y,rdx\n\n\tmov\trax,ADDR(s1p,index,1)\n\tmul\ts2limb\n\tmov\tB_z,ADDR(rp,index,1)\n\tmov\tB_x,rax\n\tmov\tB_y,rdx\n\tmov\trax,ADDR(s1p,index,2)\n\n\tadd\tindex,3\n\tlea\ts1p,ADDR(s1p,temp,-1)\n\tlea\trp,ADDR(rp,temp,-1)\n\tneg\ttemp\n\tadd\tindex,temp\n\t\t\t\t; At this point:\n\t\t\t\t; s1p   = address of last s1limb\n\t\t\t\t; rp    = address of last rplimb\n\t\t\t\t; temp  = -n\n\t\t\t\t; index = 4 - n if n even\n\t\t\t\t;       = 3 - n if n odd\n\t\t\t\t;\n\t\t\t\t; So, index is a (negative) even\n\t\t\t\t; number.\n\t\t\t\t;\n\t\t\t\t; *****************************************\n\t\t\t\t; ATTENTION:\n\t\t\t\t;\n\t\t\t\t; From here on, I will use array\n\t\t\t\t; indexing notation in the comments\n\t\t\t\t; because it is convenient.  So, I\n\t\t\t\t; will pretend that index is positive\n\t\t\t\t; because then a comment like\n\t\t\t\t;      B_z = rp[index-1]\n\t\t\t\t; is easier to read.\n\t\t\t\t; However, keep in mind that index is\n\t\t\t\t; actually a negative number indexing\n\t\t\t\t; back from the end of the array.\n\t\t\t\t; This is a common trick to remove one\n\t\t\t\t; compare operation from the main loop.\n\t\t\t\t; *****************************************\n\n\t\t\t\t;\n\t\t\t\t; Now we enter a spin-up loop the\n\t\t\t\t; will make sure that the index is\n\t\t\t\t; a multiple of UNROLL_SIZE before\n\t\t\t\t; going to our main unrolled loop.\n\tmov\ttemp,index\n\tneg\ttemp\n\tand\ttemp,UNROLL_MASK\n\tjz\tL_mpn_addmul_1_main_loop\n\tshr\ttemp,1\nL_mpn_addmul_1_main_loop_spin_up:\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tadd\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,1)\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index+1]\n\tmul\ts2limb\n\tadd\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,2)\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,-1),B_z\n\tmov\tB_z,ADDR(rp,index,1)\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n\n\tadd\tindex,2\n\tsub\ttemp,1\n\tjnz\tL_mpn_addmul_1_main_loop_spin_up\n\t\n\tjmp\tL_mpn_addmul_1_main_loop\n\t\n\talign\t16\nL_mpn_addmul_1_main_loop:\n\t\t\t\t; The code here is really the same\n\t\t\t\t; logic as the spin-up loop.  It's\n\t\t\t\t; just been unrolled.\n%assign\tunroll_index 0\n%rep\tUNROLL_SIZE/2\n\tmul\ts2limb\n\tadd\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+1))\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,(2*unroll_index-2)),A_z\n\tmov\tA_z,ADDR(rp,index,(2*unroll_index))\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\n\tmul\ts2limb\n\tadd\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+2))\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,(2*unroll_index-1)),B_z\n\tmov\tB_z,ADDR(rp,index,(2*unroll_index+1))\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n%assign\tunroll_index\tunroll_index+1\n%endrep\n\n\tadd\tindex,UNROLL_SIZE\n\tjnz\tL_mpn_addmul_1_main_loop\n\nL_mpn_addmul_1_finish:\t\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t; \n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tadd\tA_z,A_x\n\tmov\tA_x,rax\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_x,A_y\n\tadc\tB_y,0\n\tmov\tA_y,rdx\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\tadd\tB_z,B_x\n\tmov\tADDR(rp,index,-1),B_z\n\tadc\tA_x,B_y\n\tadc\tA_y,0\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t; \n\t\t\t\t; A_x = low_mul[index] + carry_in\n\t\t\t\t; A_y = high_mul[index] + CF\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t; \n\tadd\tA_z,A_x\n\tmov\tADDR(rp,index,0),A_z\n\tadc\tA_y,0\n\n\tmov\trax,A_y\n\tpop\tr12\n\tpop\tr13\n\tpop\tr14\n\tpop\tr15\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/addmul_2.as",
    "content": ";  k8 mpn_addmul_2\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include \"yasm_mac.inc\"\n\n\tGLOBAL_FUNC mpn_addmul_2\n\n; (rdi,rdx+1) = (rdi,rdx) + (rsi,rdx)*(rcx,2) return carrylimb\n\npush    rbx\npush    r12\nmov     r8, [rcx+8]\nmov     rcx, [rcx]\nmov     rbx, 4\nsub     rbx, rdx\nlea     rsi, [rsi+rdx*8-32]\nlea     rdi, [rdi+rdx*8-32]\nmov     r10, 0\nmov     rax, [rsi+rbx*8]\nmul     rcx\nmov     r12, rax\nmov     r9, rdx\ncmp     rbx, 0\njge     skiplp\nalign   16\nlp:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     lp\nskiplp:\nmov     rax, [rsi+rbx*8]\nmul     r8\ncmp     rbx, 2\nja      case0\njz      case1\njp      case2\ncase3:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase2:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase1:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase0:\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n\tpop     r12\n\tpop     rbx\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/netburst/and_n.as",
    "content": "\n;  core2 mpn_and_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rcx) = (rsi, rcx) & (rdx, rcx)\n\t\n    BITS 64\n    \n   GLOBAL_FUNC mpn_and_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tjz      skiploop\n\talign 8\nloop1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tand     r11, [rdx]\n\tand     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tand     r9, [rdx-16]\n\tand     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tsub     rcx, 1\n\tmov     [rdi-8], r10\n\tjnz     loop1\nskiploop:\n\tcmp     rax, 0\n\tjz      end\n\tmov     r11, [rsi]\n\tand     r11, [rdx]\n\tmov     [rdi], r11\n\tsub     rax, 1\n\tjz      end\n\tmov     r11, [rsi+8]\n\tand     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tsub     rax, 1\n\tjz      end\n\tmov     r11, [rsi+16]\n\tand     r11, [rdx+16]\n\tmov     [rdi+16], r11\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/andn_n.as",
    "content": "\n;  core2 mpn_andn_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_andn_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign 16\nloop1\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tand     r8, [rsi+rcx*8+24]\n\tand     r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\tand     r10, [rsi+rcx*8+8]\n\tand     r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tand     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/com_n.as",
    "content": "\n;  core2 mpn_com_n\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_com_n\n\tsub     rdx, 4\n\tjc      next\n\talign 8\nloop1:\n\tmov     rax, [rsi+rdx*8+24]\n\tmov     rcx, [rsi+rdx*8+16]\n\tnot     rax\n\tnot     rcx\n\tmov     [rdi+rdx*8+24], rax\n\tmov     [rdi+rdx*8+16], rcx\n\tmov     rax, [rsi+rdx*8+8]\n\tmov     rcx, [rsi+rdx*8]\n\tnot     rax\n\tnot     rcx\n\tmov     [rdi+rdx*8+8], rax\n\tmov     [rdi+rdx*8], rcx\n\tsub     rdx, 4\n\tjae     loop1\nnext:\n\tadd     rdx, 4\n\tjz      end\n;\tCould still have potential cache-bank conflicts in this tail part\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\n\tsub     rdx, 1\n\tjz      end\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\n\tsub     rdx, 1\n\tjz      end\n\tmov     rax, [rsi+rdx*8-8]\n\tnot     rax\n\tmov     [rdi+rdx*8-8], rax\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/divexact_byff.as",
    "content": "\n;  core2 mpn_divexact_byff\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rdx) = (rsi, rdx)/0xFFFFFFFFFFFFFFFF\n;\trax = \"remainder\"\n;\twhere (rsi, rdx) = (rdi, rdx)*(B - 1) -rax*B^rdx    and 0 <= rax < B - 1      B = 0xFFFFFFFFFFFFFFFF\n\n    BITS 64\n\n   GLOBAL_FUNC mpn_divexact_byff\n;\tthis is good but suffers from alignment slowdown\n;\twe dont seem to have much freedom to re-arrange the instructions to avoid\n;\tit , I suppose we could detect alignment at the start and have different\n;\troutines for different alignments\n\txor     eax, eax\n\tmov     rcx, rdx\n\tand     rcx, 3\n\tshr     rdx, 2\n\tcmp     rdx, 0\n;\tcarry flag is clear here\n\tjnz     loop1\n\tsbb     rax, [rsi]\n\tmov     [rdi], rax\n\tdec     rcx\n\tjz      end1\n\tsbb     rax, [rsi+8]\n\tmov     [rdi+8], rax\n\tdec     rcx\n\tjz      end1\n\tsbb     rax, [rsi+16]\n\tmov     [rdi+16], rax\n\tdec     rcx\nend1:\n\tsbb     rax, 0\n\tret\n\talign 16\nloop1:\n\tsbb     rax, [rsi]\n\tmov     [rdi], rax\n\tsbb     rax, [rsi+8]\n\tmov     [rdi+8], rax\n\tsbb     rax, [rsi+16]\n\tmov     [rdi+16], rax\n\tsbb     rax, [rsi+24]\n\tmov     [rdi+24], rax\n\tlea     rsi, [rsi+32]\n\tdec     rdx\n\tlea     rdi, [rdi+32]\n\tjnz     loop1\n\tinc     rcx\n\tdec     rcx\n\tjz      end\n\tsbb     rax, [rsi]\n\tmov     [rdi], rax\n\tdec     rcx\n\tjz      end\n\tsbb     rax, [rsi+8]\n\tmov     [rdi+8], rax\n\tdec     rcx\n\tjz      end\n\tsbb     rax, [rsi+16]\n\tmov     [rdi+16], rax\n\tdec     rcx\nend:\n\tsbb     rax, 0\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/ior_n.as",
    "content": "\n;  core2 mpn_ior_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rcx) = (rsi, rcx) | (rdx, rcx)\n\t\n    BITS 64\n    \n   GLOBAL_FUNC mpn_ior_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tjz      skiploop\n\talign 8\nloop1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tor      r11, [rdx]\n\tor      r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tor      r9, [rdx-16]\n\tor      r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tsub     rcx, 1\n\tmov     [rdi-8], r10\n\tjnz     loop1\nskiploop:\n\tcmp     rax, 0\n\tjz      end\n\tmov     r11, [rsi]\n\tor      r11, [rdx]\n\tmov     [rdi], r11\n\tsub     rax, 1\n\tjz      end\n\tmov     r11, [rsi+8]\n\tor      r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tsub     rax, 1\n\tjz      end\n\tmov     r11, [rsi+16]\n\tor      r11, [rdx+16]\n\tmov     [rdi+16], r11\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/iorn_n.as",
    "content": "\n;  core2 mpn_iorn_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n   \n   GLOBAL_FUNC mpn_iorn_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign 16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tor      r8, [rsi+rcx*8+24]\n\tor      r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\tor      r10, [rsi+rcx*8+8]\n\tor      r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tor      r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/karaadd.asm",
    "content": "dnl  mpn_karaadd\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karaadd)\n# requires n>=8\nmov %rbx,-8(%rsp)\nmov %rbp,-16(%rsp)\nmov %r12,-24(%rsp)\nmov %r13,-32(%rsp)\nmov %r14,-40(%rsp)\nmov %r15,-48(%rsp)\nmov %rdx,-56(%rsp)\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nlp:\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi,%rcx,8),%r8\n\tadc 8(%rsi,%rcx,8),%r9\n\tadc 16(%rsi,%rcx,8),%r10\n\tadc 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\tadc 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc lp\ncmp $2,%rcx\njg\tcase0\njz\tcase1\njp\tcase2\ncase3:\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi),%r8\n\tadc 8(%rsi),%r9\n\tadc 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp fin\ncase2:\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rsi),%r8\n\tadc 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp fin\ncase1:\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tinc %rdx\n\tmov %r12,(%rbp,%rcx,8)\nfin:\tmov $3,%rcx\ncase0: \t#rcx=3\n\tmov -56(%rsp),%r8\n\tbt $0,%r8\n\tjnc notodd\n\txor %r10,%r10\n\tmov (%rbp,%rdx,8),%r8\n\tmov 8(%rbp,%rdx,8),%r9\n\tadd (%rsi,%rdx,8),%r8\n\tadc 8(%rsi,%rdx,8),%r9\n\trcl $1,%r10\n\tadd %r8,24(%rbp)\n\tadc %r9,32(%rbp)\n\tadc %r10,40(%rbp)\nl7:\tadcq $0,24(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l7\n\tmov $3,%rcx\nnotodd:\txor %r8,%r8\n\tshr $1,%rax\n\tadc %r8,%r8\n\tshr $1,%rax\n\tadc $0,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadc %r8,(%rdi,%rdx,8)\nl1:\tadcq $0,8(%rdi,%rdx,8)\n\tinc %rdx\n\tjc l1\n\txor %r8,%r8\n\tshr $1,%rbx\n\tadc %r8,%r8\n\tshr $1,%rbx\n\tadc $0,%r8\n\tshr $1,%rbx\n\tadc $0,%r8\n\tadd %r8,24(%rbp)\nl2:\tadcq $0,8(%rbp,%rcx,8)\n\tinc %rcx\n\tjc l2\nmov -8(%rsp),%rbx\nmov -16(%rsp),%rbp\nmov -24(%rsp),%r12\nmov -32(%rsp),%r13\nmov -40(%rsp),%r14\nmov -48(%rsp),%r15\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/netburst/karasub.asm",
    "content": "dnl  mpn_karasub\n\ndnl  Copyright 2011,2012 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karasub)\n# requires n>=8\npush %rbx\npush %rbp\npush %r12\npush %r13\npush %r14\npush %r15\npush %rdx\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\nlp:\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi,%rcx,8),%r8\n\tsbb 8(%rsi,%rcx,8),%r9\n\tsbb 16(%rsi,%rcx,8),%r10\n\tsbb 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\tsbb 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc lp\ncmp $2,%rcx\njg\tcase0\njz\tcase1\njp\tcase2\ncase3:\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi),%r8\n\tsbb 8(%rsi),%r9\n\tsbb 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp fin\ncase2:\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 8(%rsi),%r8\n\tsbb 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp fin\ncase1:\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tadd $1,%rdx\n\tmov %r12,(%rbp,%rcx,8)\nfin:\tmov $3,%rcx\ncase0: \t#rcx=3\n\t#// store top two words of H as carrys could change them\n\tpop %r15\n\tbt $0,%r15\n\tjnc skipload\n\tmov (%rbp,%rdx,8),%r12\n        mov 8(%rbp,%rdx,8),%r13\n\t#// the two carrys from 2nd to 3rd\nskipload:\tmov %rdx,%r11\n\txor %r8,%r8\n\tbt $1,%rax\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rdi,%rdx,8)\nl2:\tadcq $0,8(%rdi,%rdx,8)\n\tlea 1(%rdx),%rdx\n\tjc l2\n\t# //the two carrys from 3rd to 4th\n\txor %r8,%r8\n\tbt $1,%rbx\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rbp,%rcx,8)\nl3:\tadcq $0,8(%rbp,%rcx,8)\n\tlea 1(%rcx),%rcx\n\tjc l3\n\t#// now the borrow from 2nd to 3rd\n\tmov %r11,%rdx\n\tbt $0,%rax\nl1:\tsbbq $0,(%rdi,%rdx,8)\n\tlea 1(%rdx),%rdx\n\tjc l1\n\t#// borrow from 3rd to 4th\n\tmov $3,%rcx\n\tbt $0,%rbx\nl4:\tsbbq $0,(%rbp,%rcx,8)\n\tlea 1(%rcx),%rcx\n\tjc l4\n\t#// if odd the do next two\n\tmov $3,%rcx\n\tmov %r11,%rdx\n\tbt $0,%r15\n\tjnc notodd\n\txor %r10,%r10\n\tsub (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%r10\n\tadd %r12,24(%rbp)\n\tadc %r13,32(%rbp)\n\tmov $0,%r8\n\tadc %r8,%r8\n\tbt $0,%r10\n\tsbb $0,%r8\nl7:\tadd %r8,16(%rbp,%rcx,8)\n\tadc $0,%r8\n\tadd $1,%rcx\n\tsar $1,%r8\n\tjnz l7\nnotodd:\t\npop %r15\npop %r14\npop %r13\npop %r12\npop %rbp\npop %rbx\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/netburst/lshift.as",
    "content": ";  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n;\n;  Copyright 2008 Brian Gladman, William Hart\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n;  Adapted by Brian Gladman for AMD64 using the Microsoft VC++ v8 64-bit\n;  compiler and the YASM assembler.\n\n;  AMD64 mpn_lshift -- mpn left shift\n;\n;  Calling interface:\n;\n; mp_limb_t mpn_lshift(\n;     mp_ptr dst,       rdi\n;     mp_srcptr src,    rsi\n;     mp_size_t size,   rdx\n;     unsigned shift    rcx\n; )\n\n%include 'yasm_mac.inc'\n\n%define src    rsi\n%define dst    rdi\n%define s_len  rdx\n%define r_tmpd ecx\n\n    BITS    64\n\nGLOBAL_FUNC mpn_lshift\n    movq    mm7, [src+s_len*8-8]   ; put top source chunk in mm7\n    movd    mm1, r_tmpd            ; put shift value in mm1\n    mov     eax, 64                \n    sub     eax, r_tmpd            \n    movd    mm0, eax               ; put 64 - shift value in mm0\n    movq    mm3, mm7               ; save original source chunk in mm3\n    psrlq   mm7, mm0               ; shift \n    movd    rax, mm7               ; put part shifted out top in rax to be returned\n    sub     s_len, 2\n    jl      label1\n\n    align   4\nlabel0:  \n    movq    mm6, [src+s_len*8]     ; put next source chunk in mm6\n    movq    mm2, mm6               ; copy into mm2\n    psrlq   mm6, mm0               ; shift mm6 right\n    psllq   mm3, mm1               ; ...and mm3 left\n    por     mm3, mm6               ; and combine\n    movq    [dst+s_len*8+8], mm3   ; store result\n    je      label2                \n    movq    mm7, [src+s_len*8-8]   ; next source chunk\n    movq    mm3, mm7               ; copy it\n    psrlq   mm7, mm0               ; shift right\n    psllq   mm2, mm1               ; ...and left\n    por     mm2, mm7               ; and combine\n    movq    [dst+s_len*8], mm2     ; and store result\n    sub     s_len, 2\n    jge     label0\nlabel1:  \n    movq    mm2, mm3\nlabel2:  \n    psllq   mm2, mm1               ; final shift\n    movq    [dst], mm2             ; and store\n    emms\n    ret\n"
  },
  {
    "path": "mpn/x86_64/netburst/mul_1.as",
    "content": "\n;  core2 mpn_mul_1\n;  Copyright 2008,2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rdx) = (rsi, rdx)*rcx\n;\trax = carry\n\t\n    BITS 64\n    \n   GLOBAL_FUNC mpn_mul_1\n;\tthis is just an addmul , so we can get rid off stack use\n;\tand simplifiy wind down , and perhaps re-do the OOO order \n\tmov     rax, [rsi]\n\tcmp     rdx, 1\n\tje      one\n\tmov     r11, 5\n\tlea     rsi, [rsi+rdx*8-40]\n\tlea     rdi, [rdi+rdx*8-40]\n\tsub     r11, rdx\n\tmul     rcx\n\tdb      0x26\n\tmov     r8, rax\n\tdb      0x26\n\tmov     rax, [rsi+r11*8+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tdb      0x26\n\tcmp     r11, 0\n\tdb      0x26\n\tmov     [rsp-8], rbx\n\tdb      0x26\n\tjge     skiploop\n\talign 16\nloop1:\n\tmov     r10, 0\n\tmul     rcx\n\tmov     [rdi+r11*8], r8\n\tadd     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     rcx\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r8, 0\n\tmov     r9, 0\n\tmul     rcx\n\tmov     [rdi+r11*8+16], r10\n\tdb      0x26\n\tadd     rbx, rax\n\tdb      0x26\n\tadc     r8, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     rcx\n\tmov     [rdi+r11*8+24], rbx\n\tdb      0x26\n\tadd     r8, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     r11, 4\n\tmov     rax, [rsi+r11*8+8]\n\tjnc     loop1\n\talign 16\nskiploop:\n\tmov     r10d, 0\n\tmul     rcx\n\tmov     [rdi+r11*8], r8\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     r11, 2\n\tjz      next2\n\tja      next3\n\tjp      next1\nnext0:\n\tmov     rax, [rsi+r11*8+16]\n\tmul     rcx\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r8d, 0\n\tmul     rcx\n\tmov     [rdi+r11*8+16], r10\n\tadd     rbx, rax\n\tadc     r8, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     rcx\n\tmov     [rdi+r11*8+24], rbx\n\tmov     rbx, [rsp-8]\n\tadd     r8, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+32], r8\n\tmov     rax, rdx\n\tret\n\talign 16\nnext1:\n\tmov     rax, [rsi+r11*8+16]\n\tmul     rcx\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     r8d, 0\n\tadc     r8, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmul     rcx\n\tmov     [rdi+r11*8+16], r10\n\tadd     r8, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+24], r8\n\tmov     rbx, [rsp-8]\n\tmov     rax, rdx\n\tret\n\talign 16\none:\n\tmul     rcx\n\tmov     [rdi], rax\n\tmov     rax, rdx\n\tret\n\talign 16\nnext2:\n\tmov     rax, [rsi+r11*8+16]\n\tmul     rcx\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     [rdi+r11*8+16], r10\n\tmov     rax, rbx\n\tmov     rbx, [rsp-8]\n\tret\n\talign 16\nnext3:\n\tmov     rbx, [rsp-8]\n\tmov     [rdi+r11*8+8], r9\n\tmov     rax, r10\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/mul_2.as",
    "content": ";  X86_64 mpn_mul_2\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include \"yasm_mac.inc\"\n\n; (rdi,rdx+1)=(rsi,rdx)*(rcx,2) return carrylimb\n\n\tGLOBAL_FUNC mpn_mul_2\npush    rbx\nmov     r8, [rcx]\nmov     rcx, [rcx+8]\nlea     rsi, [rsi+rdx*8-24]\nlea     rdi, [rdi+rdx*8-24]\nmov     rbx, 3\nsub     rbx, rdx\nmov     r10, 0\nmov     rax, [rsi+rbx*8]\nmul     r8\nmov     r11, rax\nmov     r9, rdx\ncmp     rbx, 0\njge     skiplp\nalign 16\nlp:\n\tmov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r10, 0\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r9, rdx\n\tmul     r8\n\tadd     r11, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 3\n\tjnc     lp\nskiplp:\nmov     rax, [rsi+rbx*8]\nmov     [rdi+rbx*8], r11\nmul     rcx\nadd     r9, rax\nadc     r10, rdx\ncmp     rbx, 1\nja      case2\nje      case1\ncase0:\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r9\n\tpop     rbx\n\tret\nalign 16\ncase1:\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n\tpop     rbx\n\tret\nalign 16\ncase2:\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n\tpop     rbx\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/netburst/mul_basecase.as",
    "content": "\n;  AMD64 mpn_mul_basecase\n\n;  Copyright 2008,2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n; C\t(rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n; C Version 1.0.7\n\n\n%macro addmul2lp 1\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+8], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-16], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi-8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+8], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi+rbx*8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+24], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+24], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tmov     [rdi], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-16], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi-8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+8], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n\talign   16\n%%1:\n\tmov     r10, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tmov     [rdi+rbx*8], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r10\n\tdb      0x26\n\tadd     r11, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+rbx*8+24]\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r11\n\tdb      0x26\n\tadd     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     rbx, 4\n\tmov     rax, [rsi+rbx*8]\n\tjnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n\tmov     rax, [rsi+8]\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmov     r12d, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r11\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n\tmov     rax, [rsi+16]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     [rdi+24], r10\n\tmov     [rdi+32], r11\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n\tmov     [rdi+24], r9\n\tmov     [rdi+32], r10\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n\tjz      %%2\n\talign   16\n%%1:\n\taddmul2pro%1\n\taddmul2lp %1\n\taddmul2epi%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tmov     r15, [rsp-40]\n\tret\n%endmacro\n\n%macro oldmulnext0 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tmov     [rdi+r11*8+24], rbx\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+32], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+r11*8+40], rdx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+24], r12\n\tmov     [rdi+r11*8+32], rdx\n\tinc     r8\n\tlea     rdi, [rdi+8]\n\tmov     r11, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     [rdi+r11*8+16], r10\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n\tmov     [rdi+r11*8+8], r9\n\tmov     [rdi+r11*8+16], r10\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tmul     r13\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+24]\n\tmov     r12d, 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+32]\n\tmul     r13\n\tadd     [rdi+24], rbx\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r12\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n\tmov     r13, [rcx+r8*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+24]\n\tmul     r13\n\tlea     rdi, [rdi+8]\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     r12d, 0\n\tmov     rax, [rsi+32]\n\tadc     r12, rdx\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+24], r12\n\tadc     rdx, 0\n\tmov     [rdi+32], rdx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n\tmov     r13, [rcx+r8*8]\n\tlea     rdi, [rdi+8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     ebx, 0\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r14*8]\n\tadd     [rdi+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tdb      0x26\n\tlea     rdi, [rdi+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tmov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r9\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n\toldmulnext%1\n\tjz      %%2\n\talign   16\n%%1:\n\toldaddmulpro%1\n\toldaddmulnext%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tret\n%endmacro\n\n\tASM_START\n\tGLOBAL_FUNC mpn_mul_basecase\n; the current mul does not handle case one \n\tcmp     rdx, 4\n\tjg      L_fiveormore\n\tcmp     rdx, 1\n\tje      L_one\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     r14, 5\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-40]\n\tlea     rcx, [rcx+r8*8]\n\tneg     r8\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rcx+r8*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tmov     r10d, 0\n\tmul     r13\n\tmov     [rdi+r11*8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     r11, 2\n\tja      L_oldcase3\n\tjz      L_oldcase2\n\tjp      L_oldcase1\nL_oldcase0:\n\toldmpn_muladdmul_1_int 0\nL_oldcase1:\n\toldmpn_muladdmul_1_int 1\nL_oldcase2:\n\toldmpn_muladdmul_1_int 2\nL_oldcase3:\n\toldmpn_muladdmul_1_int 3\n\talign   16\nL_fiveormore:\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     [rsp-40], r15\n\tmov     r14, 4\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-32]\n\tlea     rsi, [rsi+rdx*8-32]\n\tmov     r13, rcx\n\tmov     r15, r8\n\tlea     r13, [r13+r15*8]\n\tneg     r15\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tbt      r15, 0\n\tjnc     L_even\nL_odd:\n\tinc     rbx\n\tmov     r8, [r13+r15*8]\n\tmul     r8\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     L_mulskiploop\n\tmul1lp \nL_mulskiploop:\n\tmov     r10d, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     rbx, 2\n\tja      L_mul1case3\n\tjz      L_mul1case2\n\tjp      L_mul1case1\nL_mul1case0:\n\tmulnext0\n\tjmp     L_case0\nL_mul1case1:\n\tmulnext1\n\tjmp     L_case3\nL_mul1case2:\n\tmulnext2\n\tjmp     L_case2\nL_mul1case3:\n\tmulnext3\n\tjmp     L_case1\nL_even:\n\t; as all the mul2pro? are the same\n\tmul2pro0\n\tmul2lp \n\tcmp     rbx, 2\n\tja      L_mul2case0\n\tjz      L_mul2case1\n\tjp      L_mul2case2\nL_mul2case3:\n\tmul2epi3\nL_case3:\n\tmpn_addmul_2_int 3\nL_mul2case2:\n\tmul2epi2\nL_case2:\n\tmpn_addmul_2_int 2\nL_mul2case1:\n\tmul2epi1\nL_case1:\n\tmpn_addmul_2_int 1\nL_mul2case0:\n\tmul2epi0\nL_case0:\n\tmpn_addmul_2_int 0\n\talign   16\nL_one:\n\tmov     rax, [rsi]\n\tmul\tqword [rcx]\n\tmov     [rdi], rax\n\tmov     [rdi+8], rdx\n\tret\n\tend\n\n"
  },
  {
    "path": "mpn/x86_64/netburst/nand_n.as",
    "content": "\n;  core2 mpn_nand_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_nand_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign 16\nloop1:\n\tmov     r8, [rsi+rcx*8+24]\n\tmov     r9, [rsi+rcx*8+16]\n\tand     r8, [rdx+rcx*8+24]\n\tand     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rsi+rcx*8+8]\n\tmov     r11, [rsi+rcx*8]\n\tand     r10, [rdx+rcx*8+8]\n\tand     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rsi+rcx*8-8]\n\tand     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rsi+rcx*8-8]\n\tand     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rsi+rcx*8-8]\n\tand     r8, [rdx+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/nior_n.as",
    "content": "\n;  core2 mpn_nior_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n    BITS 64\n    \n   GLOBAL_FUNC mpn_nior_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign 16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tor      r8, [rsi+rcx*8+24]\n\tor      r9, [rsi+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tor      r10, [rsi+rcx*8+8]\n\tor      r11, [rsi+rcx*8]\n\tnot     r10\n\tnot     r11\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, [rsi+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, [rsi+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, [rsi+rcx*8-8]\n\tnot     r8\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/redc_1.as",
    "content": "\n;  core2 mpn_redc_1\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rcx) = (rsi, rcx) + (rdx, rcx)   with the carry flag set for the carry\n;\tthis is the usual mpn_add_n with the final dec rax;adc rax,rax;ret  removed \n;\tand a jump where we have two rets\n\n%macro mpn_add 0\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tadd     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tjmp     %%2\n\talign 16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tadc     r11, [rdx]\n\tadc     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tadc     r9, [rdx-16]\n\tadc     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tadc     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n%%2:\n%endmacro\n\n;\t(rbx, rbp) = (rsi, rbp) - (rdx, rbp)\n%macro mpn_sub 0\n\tmov     rax, rbp\n\tand     rax, 3\n\tshr     rbp, 2\n\tcmp     rbp, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tsub     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n\tjmp     %%2\n\talign 16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tsbb     r11, [rdx]\n\tsbb     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rbx], r11\n\tmov     [rbx+8], r8\n\tlea     rbx, [rbx+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tsbb     r9, [rdx-16]\n\tsbb     r10, [rdx-8]\n\tmov     [rbx-16], r9\n\tdec     rbp\n\tmov     [rbx-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tsbb     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n%%2:\n%endmacro\n\n;\tchanges from standard addmul\n;\tchange  r8 to r12   and rcx to r13   and rdi to r8\n;\treemove ret and write last limb but to beginning\n%macro addmulloop 1\n\talign 16\n%%1:\n\tmov     r10, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tdb      0x26\n\tadc     rbx, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tdb      0x26\n\tadc     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     r11, 4\n\tmov     rax, [rsi+r11*8+8]\n\tjnc     %%1\n%endmacro\n\n%macro addmulpropro0 0\n\timul    r13, rcx\n\tlea     r8, [r8-8]\n%endmacro\n\n%macro addmulpro0 0\n\tmov     r11, r14\n\tlea     r8, [r8+8]\n\tmov     rax, [rsi+r14*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmov     r9d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, rax\n\tadc     r9, rdx\n\timul    r13, rcx\n\tadd     [r8+r11*8+32], r12\n\tadc     r9, 0\n\tsub     r15, 1\n\tmov     [r8+r14*8], r9\n%endmacro\n\n%macro addmulpropro1 0\n%endmacro\n\n%macro addmulpro1 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, 0\n\tsub     r15, 1\n\tmov     [r8+r14*8], r12\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro2 0\n%endmacro\n\n%macro addmulpro2 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext2 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     r13, [r8+r14*8+8]\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [r8+r14*8], rbx\n\tsub     r15, 1\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro3 0\n%endmacro\n\n%macro addmulpro3 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext3 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, 0\n\tmov     r13, [r8+r14*8+8]\n\tmov     [r8+r14*8], r10\n\tlea     r8, [r8+8]\n\tsub     r15, 1\n%endmacro\n\n;\tchange r8 to r12\n;\twrite top limb ax straight to mem dont return  (NOTE we WRITE NOT ADD)\n%macro mpn_addmul_1_int 1\n\taddmulpropro%1\n\talign 16\n%%1:\n\taddmulpro%1\n\tjge     %%2\n\taddmulloop %1\n%%2:\n\taddmulnext%1\n\tjnz     %%1\n\tjmp     end\n%endmacro\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_redc_1\n\tmov r9,r8\n\tmov r8,rsi\n\tmov rsi,rdx\n\tmov rdx,rcx\n\tmov rcx,r9\n\n\tcmp     rdx, 1\n\tje      one\n\tpush    r13\n\tpush    r14\n\tpush    rbx\n\tpush    r12\n\tpush    r15\n\tpush    rbp\n\tmov     r14, 5\n\tsub     r14, rdx\n;\tstore copys\n\tpush    rsi\n\tpush    r8\n\tlea     r8, [r8+rdx*8-40]\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rbp, rdx\n\tmov     r15, rdx\n\tmov     rax, r14\n\tand     rax, 3\n\tmov     r13, [r8+r14*8]\n\tje      case0\n\tjp      case3\n\tcmp     rax, 1\n\tje      case1\ncase2:\n\tmpn_addmul_1_int 2\n\talign 16\ncase0:\n\tmpn_addmul_1_int 0\n\talign 16\ncase1:\n\tmpn_addmul_1_int 1\n\talign 16\ncase3:\n\tmpn_addmul_1_int 3\n\talign 16\nend:\n\tmov     rcx, rbp\n\tpop     rdx\n\tlea     rsi, [rdx+rbp*8]\n\tmov     rbx, rdi\n\tmpn_add\n;\tmpnadd(rdi,rsi,rdx,rcx)\n\tpop     rdx\n\tjnc     skip\n\tmov     rsi, rbx\n\tmpn_sub\n;\tmpn_sub_n(rbx,rsi,rdx,rbp) we can certainly improve this sub\nskip:\n\tpop     rbp\n\tpop     r15\n\tpop     r12\n\tpop     rbx\n\tpop     r14\n\tpop     r13\n\tret\n\talign 16\none:\n\tmov     r9, [r8]\n\tmov     r11, [rsi]\n\timul    rcx, r9\n\tmov     rax, rcx\n\tmul     r11\n\tadd     rax, r9\n;\trax is zero here\n\tadc     rdx, [r8+8]\n\tcmovnc  r11, rax\n\tsub     rdx, r11\n\tmov     [rdi], rdx\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/rshift.as",
    "content": "\n;  core2 mpn_rshift\n; Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rdx) = (rsi,rdx)>>rcx\n;\trax = carry\n;\tdecent assmeblers understand what movq means ,except\n;\tmicrosofts/apple masm (what a suprise there) so for the broken old masm\n;\tassembler.  Needed for movq reg64,mediareg and movq mediareg,reg64\n;\tonly , where mediareg is xmm or mm\n\n%define MOVQ movd\n\n\tBITS 64\n\n   GLOBAL_FUNC mpn_rshift\n\tcmp     rdx, 2\n\tja      threeormore\n\tjz      two\none:\n\tmov     rdx, [rsi]\n\tmov     rax, rdx\n\tshr     rdx, cl\n\tneg     rcx\n\tshl     rax, cl\n\tmov     [rdi], rdx\n\tret\ntwo:\n\tmov     r8, [rsi]\n\tmov     r9, [rsi+8]\n\tmov     rax, r8\n\tmov     r11, r9\n\tshr     r8, cl\n\tshr     r9, cl\n\tneg     rcx\n\tshl     r11, cl\n\tshl     rax, cl\n\tor      r8, r11\n\tmov     [rdi], r8\n\tmov     [rdi+8], r9\n\tret\nthreeormore:\n\tmov     eax, 64\n\tlea     r9, [rsi+8]\n\tsub     rax, rcx\n\tand     r9, -16\n\tMOVQ    xmm0, rcx\n\tMOVQ    xmm1, rax\n\tmovdqa  xmm5, [r9]\n\tmovdqa  xmm3, xmm5\n\tpsllq   xmm5, xmm1\n\tMOVQ    rax, xmm5\n\tcmp     rsi, r9\n\tlea     rsi, [rsi+rdx*8-40]\n\tje      aligned\n\tmovq    xmm2, [r9-8]\n\tmovq    xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tpsrlq   xmm4, xmm0\n\tpor     xmm4, xmm5\n\tmovq    [rdi], xmm4\n\tlea     rdi, [rdi+8]\n\tsub     rdx, 1\n\tMOVQ    rax, xmm2\naligned:\n\tlea     rdi, [rdi+rdx*8-40]\n\tpsrlq   xmm3, xmm0\n\tmov     r8d, 5\n\tsub     r8, rdx\n\tjnc     skiploop\n\talign 16\nloop1:\n\tmovdqa  xmm2, [rsi+r8*8+16]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tmovdqa  xmm5, [rsi+r8*8+32]\n\tmovdqa  xmm3, xmm5\n\tpsllq   xmm5, xmm1\n\tshufpd  xmm2, xmm5, 1\n\tpsrlq   xmm3, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tmovhpd  [rdi+r8*8+24], xmm4\n\tadd     r8, 4\n\tjnc     loop1\nskiploop:\n\tcmp     r8, 2\n\tja      left0\n\tjz      left1\n\tjp      left2\nleft3:\n\tmovdqa  xmm2, [rsi+r8*8+16]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tmovq    xmm5, [rsi+r8*8+32]\n\tmovq    xmm3, xmm5\n\tpsllq   xmm5, xmm1\n\tshufpd  xmm2, xmm5, 1\n\tpsrlq   xmm3, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tmovhpd  [rdi+r8*8+24], xmm4\n\tpsrldq  xmm5, 8\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8+32], xmm3\n\tret\n\talign 16\nleft2:\n\tmovdqa  xmm2, [rsi+r8*8+16]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tpsrldq  xmm2, 8\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tmovhpd  [rdi+r8*8+24], xmm4\n\tret\n\talign 16\nleft1:\n\tmovq    xmm2, [rsi+r8*8+16]\n\tmovq    xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tpsrldq  xmm2, 8\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tret\n\talign 16\nleft0:\n\tpsrldq  xmm5, 8\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/sub_n.as",
    "content": ";  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n;\n;  Copyright 2005, 2006 Pierrick Gaudry\n;\n;  Copyright 2008 Brian Gladman, William Hart\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n;  Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit\n;  compiler and the YASM assembler.\n\n;  AMD64 mpn_add_n/mpn_sub_n -- mpn add or subtract.\n;\n;  Calling interface:\n;\n;  mp_limb_t __gmpn_<op>_n(    <op> = add OR sub\n;     mp_ptr dst,              rdi\n;     mp_srcptr src1,          rsi\n;     mp_srcptr src2,          rdx\n;     mp_size_t  len           rcx\n;  )\n;\n;  mp_limb_t __gmpn_<op>_nc(   <op> = add OR sub\n;     mp_ptr dst,              rdi\n;     mp_srcptr src1,          rsi\n;     mp_srcptr src2,          rdx\n;     mp_size_t len,           rcx\n;     mp_limb_t carry           r8 \n;  )\n;\n;  Calculate src1[size] plus(minus) src2[size] and store the result in\n;  dst[size].  The return value is the carry bit from the top of the result\n;  (1 or 0).  The _nc version accepts 1 or 0 for an initial carry into the\n;  low limb of the calculation.  Note values other than 1 or 0 here will\n;  lead to garbage results.\n\n%include 'yasm_mac.inc'\n\n%define dst       rdi   ; destination pointer\n%define sr1       rsi   ; source 1 pointer\n%define sr2       rdx   ; source 2 pointer\n%define len       rcx   ; number of limbs\n%define lend      ecx   ; number of limbs\n%define cy         r8   ; carry value\n\n%define r_jmp     r10   ; temporary for jump table entry\n%define r_cnt     r11   ; temporary for loop count\n\n%define UNROLL_LOG2         4\n%define UNROLL_COUNT        (1 << UNROLL_LOG2)\n%define UNROLL_MASK         (UNROLL_COUNT - 1)\n%define UNROLL_BYTES        (8 * UNROLL_COUNT)\n%define UNROLL_THRESHOLD    8\n\n%if UNROLL_BYTES >= 256\n%error unroll count is too large\n%elif UNROLL_BYTES >= 128\n%define off 128\n%else\n%define off 0\n%endif\n\n%macro  mac_sub  3\n\n;LOBAL_FUNC mpn_sub_nc\n;    mov     rax,cy\n;    jmp     %%0\nGLOBAL_FUNC mpn_sub_n\n    xor     rax,rax\n%%0:\n    movsxd  len,lend\n    cmp     len,UNROLL_THRESHOLD\n    jae     %%2\n    lea     sr1,[sr1+len*8]\n    lea     sr2,[sr2+len*8]\n    lea     dst,[dst+len*8]\n    neg     len\n    shr     rax,1\n%%1:\n    mov     rax,[sr1+len*8]\n    mov     r10,[sr2+len*8]\n    %1      rax,r10\n    mov     [dst+len*8],rax\n    inc     len\n    jnz     %%1\n    mov     rax,dword 0\n    setc    al\n    ret\n%%2:\n    mov     r_cnt,1\n    and     r_cnt,len\n    push    r_cnt\n    and     len,-2\n    mov     r_cnt,len\n    dec     r_cnt\n    shr     r_cnt,UNROLL_LOG2\n    neg     len\n    and     len,UNROLL_MASK\n    lea     r_jmp,[len*4]\n    neg     len\n    lea     sr1,[sr1+len*8+off]\n    lea     sr2,[sr2+len*8+off]\n    lea     dst,[dst+len*8+off]\n    shr     rax,1\n    lea     r_jmp,[r_jmp+r_jmp*2]\n\n%ifdef PIC\n    call    .pic_calc\n.unroll_here:\n..@unroll_here1:\n\n%else\n    lea     rax,[rel %%3]\n%endif\n\n    lea     r_jmp,[r_jmp+rax]\n    jmp     r_jmp\n\n%ifdef PIC\n\n.pic_calc:\n\n\tmov     rax, ..@unroll_entry1 - ..@unroll_here1\n\tadd     rax, [rsp]\n\tret\n\n%endif\n\n    align 32\n\n.unroll_entry1:\n..@unroll_entry1:\n%%3:\n\n%define CHUNK_COUNT  2\n%assign i 0\n\n%rep  UNROLL_COUNT / CHUNK_COUNT\n%assign  disp0 8 * i * CHUNK_COUNT - off\n\n    mov     r_jmp,[byte sr1+disp0]      ; len and r_jmp registers\n    mov     len,[byte sr1+disp0+8]      ; now not needed\n    %1      r_jmp,[byte sr2+disp0]\n    mov     [byte dst+disp0],r_jmp\n    %1      len,[byte sr2+disp0+8]\n    mov     [byte dst+disp0+8],len\n\n%assign i i + 1\n%endrep\n\n%if UNROLL_BYTES > 64\n    lea     sr1,[byte sr1+127]\n    inc     sr1\n%else\n    lea     sr1,[byte sr1+UNROLL_BYTES]\n%endif\n    dec     r_cnt\n    lea     sr2,[sr2+UNROLL_BYTES]\n    lea     dst,[dst+UNROLL_BYTES]\n    jns     %%3\n\n    pop     rax\n    dec     rax\n    js      %%5\n    mov     len,[sr1-off]\n    %1      len,[sr2-off]\n    mov     [dst-off],len\n%%5:\n    mov     rax,dword 0\n    setc    al\n    ret\n\n%endmacro\n\n    BITS    64\n\n    mac_sub sbb,mpn_sub_n,mpn_sub_nc\n"
  },
  {
    "path": "mpn/x86_64/netburst/submul_1.as",
    "content": "; **************************************************************************\n;  Intel64 mpn_submul_1 -- Multiply a limb vector with a limb and\n;  subtract the result from a second limb vector.\n;\n;  Copyright (C) 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  This program is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2 of the License, or\n;  (at your option) any later version.\n;\n;  This program is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n;  GNU General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with this program; if not, write to the Free Software Foundation,\n;  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n;\n; **************************************************************************\n;\n;\n; CREDITS\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; *********************************************************************\n\n\n; With a 4-way unroll the code has\n;\n;         \tcycles/limb\n; Hammer:           4.6\n; Woodcrest:        4.6\n;\n; With increased unrolling, it appears to converge to 4 cycles/limb\n; on Intel Core 2 machines.  I believe that this is optimal, however\n; it requires such absurd unrolling that it becomes unusable for all\n; but the largest inputs.  A 4-way unroll seems like a good balance\n; to me because then commonly used input sizes (e.g. 1024bit Public\n; keys) still benifit from the speed up.\n\n;\n; This is just a check to see if we are in my code testing sandbox\n; or if we are actually in the GMP source tree\n;\n\n%include 'yasm_mac.inc'\n\n\n\n; *********************************************************************\n; *********************************************************************\n;\n; Here are the important macro parameters for the code\n;\n;      BpL is Bytes per Limb (8 since this is 64bit code)\n;\n;\tUNROLL_SIZE is a power of 2 for which we unroll the code.\n;                  possible values are 2,4,8,15,..., 256.  A reasonable\n;                  value is probably 4.  If really large inputs\n;                  are expected, then 16 is probably good.  Larger\n;                  values are really only useful for flashy\n;                  benchmarks and testing asymptotic behavior.\n;\n;      THRESHOLD is the minimum number of limbs needed before we bother\n;                using the complicated loop.  A reasonable value is\n;                2*UNROLL_SIZE + 6\n;\n; *********************************************************************\n; *********************************************************************\n%define\tBpL\t8\n%define\tUNROLL_SIZE\t4\n%define\tUNROLL_MASK\tUNROLL_SIZE-1\n%define\tTHRESHOLD\t2*UNROLL_SIZE+6\n\n; Here is a convenient Macro for addressing\n; memory.  Entries of the form\n;\n;      ADDR(ptr,index,displacement)\n;\n; get converted to\n;\n;      [displacement*BpL + ptr + index*BpL]\n;\n%define\tADDR(a,b,c)\t[c*BpL+a+b*BpL]\n\n\n; Register\tUsage\n; --------\t-----\n; rax\t\tlow word from mul\n; rbx*\n; rcx\t\ts2limb\n; rdx\t\thigh word from mul\n; rsi\t\ts1p\n; rdi\t\trp\n; rbp*\t\tBase Pointer\n; rsp*\t\tStack Pointer\n; r8\t\tA_x\n; r9\t\tA_y\n; r10\t\tA_z\n; r11\t\tB_x\n; r12*\t\tB_y\n; r13*\t\tB_z\n; r14*\t\ttemp\n; r15*\t\tindex\n;\n; * indicates that the register must be\n; preserved for the caller.\n%define\ts2limb\trcx\n%define\ts1p\trsi\n%define\trp\trdi\n%define\tA_x\tr8\n%define\tA_y\tr9\n%define\tA_z\tr10\n%define\tB_x\tr11\n%define\tB_y\tr12\n%define\tB_z\tr13\n%define\ttemp\tr14\n%define\tindex\tr15\n\n\t\n; INPUT PARAMETERS\n; rp\t\trdi\n; s1p\t\trsi\n; n\t\trdx\n; s2limb\trcx\n\tBITS\t64\nGLOBAL_FUNC mpn_submul_1\n\t\t\t\t\t; Compare the limb count\n\t\t\t\t\t; with the threshold value.\n\t\t\t\t\t; If the limb count is small\n\t\t\t\t\t; we just use the small loop,\n\t\t\t\t\t; otherwise we jump to the\n\t\t\t\t\t; more complicated loop.\n\tcmp\trdx,THRESHOLD\n\tjge\tL_mpn_submul_1_main_loop_prep\n\tmov\tr11,rdx\n\tlea\trsi,[rsi+rdx*8]\n\tlea\trdi,[rdi+rdx*8]\n\tneg\tr11\n\txor\tr8, r8\n\txor\tr10, r10\n\tjmp\tL_mpn_submul_1_small_loop\n\t\n\talign\t16\nL_mpn_submul_1_small_loop:\n\tmov\trax,[rsi+r11*8]\n\tmul\trcx\n\tadd\trax,r8\n\tadc\trdx,r10\n\tsub\t[rdi+r11*8],rax\n\tmov\tr8,r10\n\tadc\tr8,rdx\n\tinc\tr11\n\tjne\tL_mpn_submul_1_small_loop\n\n\tmov\trax,r8\n\tret\n\nL_mpn_submul_1_main_loop_prep:\n\tpush\tr15\n\tpush\tr14\n\tpush\tr13\n\tpush\tr12\n\t\t\t\t; If n is even, we need to do three\n\t\t\t\t; pre-multiplies, if n is odd we only\n\t\t\t\t; need to do two.\n\tmov\ttemp,rdx\n\tmov\tindex,0\n\tmov\tA_x,0\n\tmov\tA_y,0\n\tand\trdx,1\n\tjnz\tL_mpn_submul_1_odd_n\n\n\t\t\t\t\t; Case n is even\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tsub\tADDR(rp,index,0),rax\n\tadc\tA_x,rdx\n\tadd\tindex,1\n\t\t\t\t\t; At this point\n\t\t\t\t\t;  temp = n (even)\n\t\t\t\t\t; index = 1\n\nL_mpn_submul_1_odd_n:\n\t\t\t\t\t; Now\n\t\t\t\t\t; temp = n\n\t\t\t\t\t; index = 1 if n even\n\t\t\t\t\t;       = 0 if n odd\n\t\t\t\t\t;\n\tmov\trax,ADDR(s1p,index,0)\n\tmul\ts2limb\n\tmov\tA_z,ADDR(rp,index,0)\n\tadd\tA_x,rax\n\tadc\tA_y,rdx\n\n\tmov\trax,ADDR(s1p,index,1)\n\tmul\ts2limb\n\tmov\tB_z,ADDR(rp,index,1)\n\tmov\tB_x,rax\n\tmov\tB_y,rdx\n\tmov\trax,ADDR(s1p,index,2)\n\n\tadd\tindex,3\n\tlea\ts1p,ADDR(s1p,temp,-1)\n\tlea\trp,ADDR(rp,temp,-1)\n\tneg\ttemp\n\tadd\tindex,temp\n\t\t\t\t; At this point:\n\t\t\t\t; s1p   = address of last s1limb\n\t\t\t\t; rp    = address of last rplimb\n\t\t\t\t; temp  = -n\n\t\t\t\t; index = 4 - n if n even\n\t\t\t\t;       = 3 - n if n odd\n\t\t\t\t;\n\t\t\t\t; So, index is a (negative) even\n\t\t\t\t; number.\n\t\t\t\t;\n\t\t\t\t; *****************************************\n\t\t\t\t; ATTENTION:\n\t\t\t\t;\n\t\t\t\t; From here on, I will use array\n\t\t\t\t; indexing notation in the comments\n\t\t\t\t; because it is convenient.  So, I\n\t\t\t\t; will pretend that index is positive\n\t\t\t\t; because then a comment like\n\t\t\t\t;      B_z = rp[index-1]\n\t\t\t\t; is easier to read.\n\t\t\t\t; However, keep in mind that index is\n\t\t\t\t; actually a negative number indexing\n\t\t\t\t; back from the end of the array.\n\t\t\t\t; This is a common trick to remove one\n\t\t\t\t; compare operation from the main loop.\n\t\t\t\t; *****************************************\n\n\t\t\t\t;\n\t\t\t\t; Now we enter a spin-up loop the\n\t\t\t\t; will make sure that the index is\n\t\t\t\t; a multiple of UNROLL_SIZE before\n\t\t\t\t; going to our main unrolled loop.\n\tmov\ttemp,index\n\tneg\ttemp\n\tand\ttemp,UNROLL_MASK\n\tjz\tL_mpn_submul_1_main_loop\n\tshr\ttemp,1\nL_mpn_submul_1_main_loop_spin_up:\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tsub\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,1)\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index+1]\n\tmul\ts2limb\n\tsub\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,2)\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,-1),B_z\n\tmov\tB_z,ADDR(rp,index,1)\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n\n\tadd\tindex,2\n\tsub\ttemp,1\n\tjnz\tL_mpn_submul_1_main_loop_spin_up\n\t\n\tjmp\tL_mpn_submul_1_main_loop\n\t\n\talign\t16\nL_mpn_submul_1_main_loop:\n\t\t\t\t; The code here is really the same\n\t\t\t\t; logic as the spin-up loop.  It's\n\t\t\t\t; just been unrolled.\n%assign\tunroll_index 0\n%rep\tUNROLL_SIZE/2\n\tmul\ts2limb\n\tsub\tA_z,A_x\n\tlea\tA_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+1))\n\tadc\tB_x,A_y\n\tmov\tADDR(rp,index,(2*unroll_index-2)),A_z\n\tmov\tA_z,ADDR(rp,index,(2*unroll_index))\n\tadc\tB_y,0\n\tlea\tA_y,[rdx]\n\n\tmul\ts2limb\n\tsub\tB_z,B_x\n\tlea\tB_x,[rax]\n\tmov\trax,ADDR(s1p,index,(2*unroll_index+2))\n\tadc\tA_x,B_y\n\tmov\tADDR(rp,index,(2*unroll_index-1)),B_z\n\tmov\tB_z,ADDR(rp,index,(2*unroll_index+1))\n\tadc\tA_y,0\n\tlea\tB_y,[rdx]\n%assign\tunroll_index\tunroll_index+1\n%endrep\n\n\n\tadd\tindex,UNROLL_SIZE\n\tjnz\tL_mpn_submul_1_main_loop\n\nL_mpn_submul_1_finish:\t\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index-2] + carry_in\n\t\t\t\t; A_y = high_mul[index-2] + CF\n\t\t\t\t; A_z = rp[index-2]\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1]\n\t\t\t\t; B_y = high_mul[index-1]\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; rax = s1p[index]\n\tmul\ts2limb\n\tsub\tA_z,A_x\n\tmov\tA_x,rax\n\tmov\tADDR(rp,index,-2),A_z\n\tmov\tA_z,ADDR(rp,index,0)\n\tadc\tB_x,A_y\n\tadc\tB_y,0\n\tmov\tA_y,rdx\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; B_x = low_mul[index-1] + carry_in\n\t\t\t\t; B_y = high_mul[index-1] + CF\n\t\t\t\t; B_z = rp[index-1]\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index]\n\t\t\t\t; A_y = high_mul[index]\n\t\t\t\t; A_z = rp[index]\n\tsub\tB_z,B_x\n\tmov\tADDR(rp,index,-1),B_z\n\tadc\tA_x,B_y\n\tadc\tA_y,0\n\t\t\t\t; At this point we should have:\n\t\t\t\t;\n\t\t\t\t; index = n-1\n\t\t\t\t;\n\t\t\t\t; A_x = low_mul[index] + carry_in\n\t\t\t\t; A_y = high_mul[index] + CF\n\t\t\t\t; A_z = rp[index]\n\t\t\t\t;\n\tsub\tA_z,A_x\n\tmov\tADDR(rp,index,0),A_z\n\tadc\tA_y,0\n\n\tmov\trax,A_y\n\tpop\tr12\n\tpop\tr13\n\tpop\tr14\n\tpop\tr15\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/xnor_n.as",
    "content": "\n;  core2 mpn_xnor_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_xnor_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign 16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tnot     r8\n\tnot     r9\n\txor     r8, [rsi+rcx*8+24]\n\txor     r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tnot     r10\n\tnot     r11\n\txor     r10, [rsi+rcx*8+8]\n\txor     r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tnot     r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/netburst/xor_n.as",
    "content": "\n;  core2 mpn_xor_n\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_xor_n\n\tsub     rcx, 4\n\tjb      skiploop\n\talign 16\nloop1:\n\tmov     r8, [rdx+rcx*8+24]\n\tmov     r9, [rdx+rcx*8+16]\n\tor      r8, r8\n\tor      r9, r9\n\txor     r8, [rsi+rcx*8+24]\n\txor     r9, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8+24], r8\n\tmov     [rdi+rcx*8+16], r9\n\tmov     r10, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8]\n\tor      r10, r10\n\tor      r11, r11\n\txor     r10, [rsi+rcx*8+8]\n\txor     r11, [rsi+rcx*8]\n\tmov     [rdi+rcx*8+8], r10\n\tmov     [rdi+rcx*8], r11\n\tsub     rcx, 4\n\tjnc     loop1\nskiploop:\n\tadd     rcx, 4\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\n\tsub     rcx, 1\n\tjz      end\n\tmov     r8, [rdx+rcx*8-8]\n\tor      r8, r8\n\txor     r8, [rsi+rcx*8-8]\n\tmov     [rdi+rcx*8-8], r8\nend:\n\tret\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/add_n.as",
    "content": "\n;  AMD64 mpn_add_n\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)+(rdx,rcx)\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_add_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     loop1\n\tmov     r11, [rsi]\n\tadd     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend1:\n\tadc     rax, rax\n\tret\n\talign   8\nloop1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tadc     r11, [rdx]\n\tadc     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tadc     r9, [rdx-16]\n\tadc     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     loop1\n\tinc     rax\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi]\n\tadc     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend:\n\tadc     rax, rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/addmul_1.asm",
    "content": "dnl  mpn_addmul_1\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_addmul_1)\nmov (%rsi),%rax\ncmp $1,%rdx\nje one\t\t\nmov $5,%r11\nlea -40(%rsi,%rdx,8),%rsi\nlea -40(%rdi,%rdx,8),%rdi\nsub %rdx,%r11\nmul %rcx\n.byte 0x26\nmov %rax,%r8\n.byte 0x26\nmov 8(%rsi,%r11,8),%rax\n.byte 0x26\nmov %rdx,%r9\n.byte 0x26\ncmp $0,%r11\n.byte 0x26\nmov %r12,-8(%rsp)\n.byte 0x26\njge skiplp\nlp:\txor %r10,%r10\n\tmul %rcx\n\tadd %r8,(%rdi,%r11,8)\n\tadc %rax,%r9\n\tadc %rdx,%r10\n\tmov 16(%rsi,%r11,8),%rax\n\tmul %rcx\n\tadd %r9,8(%rdi,%r11,8)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 24(%rsi,%r11,8),%rax\n\txor %r8,%r8\n\txor %r9,%r9\n\tmul %rcx\n\tadd %r10,16(%rdi,%r11,8)\n\tadc %rax,%r12\n\tadc %rdx,%r8\n\tmov 32(%rsi,%r11,8),%rax\n \tmul %rcx\n\tadd %r12,24(%rdi,%r11,8)\n\tadc %rax,%r8\n\tadc %rdx,%r9\n\tadd $4,%r11\n\tmov 8(%rsi,%r11,8),%rax\n\tjnc lp\nskiplp:\nxor %r10,%r10\nmul %rcx\nadd %r8,(%rdi,%r11,8)\nadc %rax,%r9\nadc %rdx,%r10\ncmp $2,%r11\nja case0\njz case1\njp case2\ncase3:\tmov 16(%rsi),%rax\n\tmul %rcx\n\tadd %r9,8(%rdi)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 24(%rsi),%rax\n\txor %r8,%r8\n\txor %r9,%r9\n\tmul %rcx\n\tadd %r10,16(%rdi)\n\tadc %rax,%r12\n\tadc %rdx,%r8\n\tmov 32(%rsi),%rax\n \tmul %rcx\n\tadd %r12,24(%rdi)\n\tadc %rax,%r8\n\tadc %rdx,%r9\n\tadd %r8,32(%rdi)\n\tadc $0,%r9\n\tmov -8(%rsp),%r12\n\tmov %r9,%rax\n\tret\ncase2:\tmov 24(%rsi),%rax\n\tmul %rcx\n\tadd %r9,16(%rdi)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 32(%rsi),%rax\n\txor %r8,%r8\n\t#xor %r9,%r9\n\tmul %rcx\n\tadd %r10,24(%rdi)\n\tadc %rax,%r12\n\tadc %rdx,%r8\n\tadd %r12,32(%rdi)\n\tadc $0,%r8\n\tmov -8(%rsp),%r12\n\tmov %r8,%rax\n\tret\nALIGN(16)\ncase1:\tmov 32(%rsi),%rax\n\tmul %rcx\n\tadd %r9,24(%rdi)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tadd %r10,32(%rdi)\n\tadc $0,%r12\n\tmov %r12,%rax\n\tmov -8(%rsp),%r12\n\tret\none:\tmul %rcx\n\tadd %rax,(%rdi)\n\tadc $0,%rdx\n\tmov %rdx,%rax\n\tret\ncase0:\tadd %r9,32(%rdi)\n\tadc $0,%r10\n\tmov -8(%rsp),%r12\n\tmov %r10,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/addmul_2.as",
    "content": ";  k8 mpn_addmul_2\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include \"yasm_mac.inc\"\n\n\tGLOBAL_FUNC mpn_addmul_2\n\n; (rdi,rdx+1) = (rdi,rdx) + (rsi,rdx)*(rcx,2) return carrylimb\n\npush    rbx\npush    r12\nmov     r8, [rcx+8]\nmov     rcx, [rcx]\nmov     rbx, 4\nsub     rbx, rdx\nlea     rsi, [rsi+rdx*8-32]\nlea     rdi, [rdi+rdx*8-32]\nmov     r10, 0\nmov     rax, [rsi+rbx*8]\nmul     rcx\nmov     r12, rax\nmov     r9, rdx\ncmp     rbx, 0\njge     skiplp\nalign   16\nlp:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     lp\nskiplp:\nmov     rax, [rsi+rbx*8]\nmul     r8\ncmp     rbx, 2\nja      case0\njz      case1\njp      case2\ncase3:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase2:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase1:\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n\tpop     r12\n\tpop     rbx\n\tret\nalign   16\ncase0:\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n\tpop     r12\n\tpop     rbx\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/and_n.asm",
    "content": "dnl  mpn_and_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_and_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_and_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu 16(%rsi,%r8,8),%xmm3\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpand %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tpand %xmm3,%xmm1\n\tadd $4,%r8\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmov 16(%rsi,%r8,8),%rcx\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpand %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tand %rcx,%rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpand %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\tand %rcx,%rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/andn_n.asm",
    "content": "dnl  mpn_andn_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_andn_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_andn_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu 16(%rsi,%r8,8),%xmm3\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpandn %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tpandn %xmm3,%xmm1\n\tadd $4,%r8\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmov 16(%rsi,%r8,8),%rcx\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpandn %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tnot %rax\n\tand %rcx,%rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpandn %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\tnot %rax\n\tand %rcx,%rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/com_n.asm",
    "content": "dnl  mpn_com_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_com_n(mp_ptr,mp_ptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_com_n)\nmov $3,%rcx\nlea -24(%rsi,%rdx,8),%rsi\npcmpeqb %xmm2,%xmm2\nsub %rdx,%rcx\njnc skiplp\nALIGN(16)\nlp:\n\tmovdqu (%rsi,%rcx,8),%xmm0\n\tmovdqu 16(%rsi,%rcx,8),%xmm1\n\tpxor %xmm2,%xmm0\n\tadd $4,%rcx\n\tpxor %xmm2,%xmm1\n\tmovdqu %xmm0,(%rdi)\n\tmovdqu %xmm1,16(%rdi)\n\tlea 32(%rdi),%rdi\n\tjnc lp\nskiplp:\ncmp $2,%rcx\nja case0\nje case1\njp case2\t\ncase3:\tmovdqu (%rsi,%rcx,8),%xmm0\n\tmov 16(%rsi,%rcx,8),%rax\n\tpxor %xmm2,%xmm0\n\tnot %rax\n\tmovdqu %xmm0,(%rdi)\n\tmov %rax,16(%rdi)\n\tret\ncase2:\tmovdqu (%rsi,%rcx,8),%xmm0\n\tpxor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi)\n\tret\ncase1:\tmov (%rsi,%rcx,8),%rax\n\tnot %rax\n\tmov %rax,(%rdi)\ncase0:\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/copyd.asm",
    "content": "dnl  mpn_copyd\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_copyd(mp_ptr,mp_ptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_copyd)\nlea 16(%rsi),%rsi\nlea 16(%rdi),%rdi\nsub $4,%rdx\njc skiplp\nALIGN(16)\nlp:\n\tmovdqu (%rsi,%rdx,8),%xmm0\n\tmovdqu -16(%rsi,%rdx,8),%xmm1\n\tsub $4,%rdx\n\tmovdqu %xmm1,-16+32(%rdi,%rdx,8)\n\tmovdqu %xmm0,32(%rdi,%rdx,8)\n\tjnc lp\nskiplp:\ncmp $-2,%rdx\njg case3\nje case2\njnp case0\ncase1:\tmov 8(%rsi,%rdx,8),%rax\n\tmov %rax,8(%rdi,%rdx,8)\ncase0:\tret\ncase3:\tmovdqu (%rsi,%rdx,8),%xmm0\n\tmov -8(%rsi,%rdx,8),%rax\n\tmov %rax,-8(%rdi,%rdx,8)\n\tmovdqu %xmm0,(%rdi,%rdx,8)\n\tret\ncase2:\tmovdqu (%rsi,%rdx,8),%xmm0\n\tmovdqu %xmm0,(%rdi,%rdx,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/copyi.asm",
    "content": "dnl  mpn_copyi\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_copyi(mp_ptr,mp_ptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_copyi)\nC // for <20 limbs this is slower than core2/copyi for rev 2257\nC // probaly want to tweek it , that should do most of the work\nC //below small loop is not much help\nC //cmp $10,%rdx\nC //jge large\nC //ALIGN(16)\nC //lp:\tmov (%rsi),%rax\nC //\tmov %rax,(%rdi)\nC //\tlea 8(%rsi),%rsi\nC //\tlea 8(%rdi),%rdi\nC //\tsub $1,%rdx\nC //\tjnz lp\nC //\tret\nC // large:\ncmp $0,%rdx\njz endfn\nmov %rdi,%rax\nsub %rsi,%rax\ntest $0xF,%rax\njz aligned\ntest $0xF,%rdi\njz srcisodd\nmov $5,%rcx\nsub %rdx,%rcx\nlea -40(%rsi,%rdx,8),%rsi\nlea -40(%rdi,%rdx,8),%rdi\nmovapd (%rsi,%rcx,8),%xmm1\nmovq %xmm1,(%rdi,%rcx,8)\nadd $8,%rdi\ncmp $1,%rdx\njz endfn\ncmp $0,%rcx\njge skiplpud\nALIGN(16)\nlpud:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tadd $4,%rcx\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,-32(%rdi,%rcx,8)\n\tmovapd 32-32(%rsi,%rcx,8),%xmm1\n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16-32(%rdi,%rcx,8)\n\tjnc lpud\nskiplpud:\ncmp $2,%rcx\nja case0d\njz case1d\njp case2d\nALIGN(16)\ncase3d:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovapd 32(%rsi,%rcx,8),%xmm1  \n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase2d:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovhpd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase1d:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase0d:\tmovhpd %xmm1,(%rdi,%rcx,8)\nendfn:\tret\nC //////////////////////////\nsrcisodd:\nmov $4,%rcx\nsub %rdx,%rcx\nlea -32(%rsi,%rdx,8),%rsi\nlea -32(%rdi,%rdx,8),%rdi\n\tmovapd -8(%rsi,%rcx,8),%xmm1\n\tsub $8,%rsi\ncmp $0,%rcx\njge skiplpus\t\nALIGN(16)\nlpus:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tadd $4,%rcx\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,-32(%rdi,%rcx,8)\n\tmovapd 32-32(%rsi,%rcx,8),%xmm1\n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16-32(%rdi,%rcx,8)\n\tjnc lpus\nskiplpus:\ncmp $2,%rcx\nja case0s\njz case1s\njp case2s\nALIGN(16)\ncase3s:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovapd 32(%rsi,%rcx,8),%xmm1\n\tshufpd $1,%xmm1,%xmm0\n\tmovapd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase2s: movapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tmovhpd %xmm0,16(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase1s:\tmovapd 16(%rsi,%rcx,8),%xmm0\n\tshufpd $1,%xmm0,%xmm1\n\tmovapd %xmm1,(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncase0s:\tmovhpd %xmm1,(%rdi,%rcx,8)\n\tret\nC //////////////////////////\nALIGN(16)\naligned:\nmov $3,%rcx\nsub %rdx,%rcx\ntest $0xF,%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\njz notodda\n\tmov (%rsi,%rcx,8),%rax\n\tmov %rax,(%rdi,%rcx,8)\n\tadd $1,%rcx\nnotodda:\ncmp $0,%rcx\njge skiplpa\nALIGN(16)\nlpa:\tadd $4,%rcx\n\tmovapd -32(%rsi,%rcx,8),%xmm0\n\tmovapd %xmm0,-32(%rdi,%rcx,8)\n\tmovapd 16-32(%rsi,%rcx,8),%xmm1\n\tmovapd %xmm1,16-32(%rdi,%rcx,8)\n\tjnc lpa\nskiplpa:\ncmp $2,%rcx\nja casea0\nje casea1\njp casea2\ncasea3:\tmovapd (%rsi,%rcx,8),%xmm0\n\tmovapd %xmm0,(%rdi,%rcx,8)\n\tmov 16(%rsi,%rcx,8),%rax\n\tmov %rax,16(%rdi,%rcx,8)\ncasea0:\tret\nALIGN(16)\ncasea2:\tmovapd (%rsi,%rcx,8),%xmm0\n\tmovapd %xmm0,(%rdi,%rcx,8)\n\tret\nALIGN(16)\ncasea1:\tmov (%rsi,%rcx,8),%rax\n\tmov %rax,(%rdi,%rcx,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/divexact_byff.as",
    "content": "\n;  mpn_divexact_byff\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\tret mpn_divexact_byff(mp_ptr,mp_ptr,mp_size_t)\n\n\tGLOBAL_FUNC mpn_divexact_byff\n\txor     eax, eax\n\tmov     rcx, rdx\n\tand     rdx, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n\tje      L_skiplp\n; want carry clear here\n\talign   16\nL_lp:\n\tsbb     rax, [rsi]\n\tlea     rdi, [rdi+32]\n\tmov     r8, rax\n\tsbb     rax, [rsi+8]\n\tmov     r9, rax\n\tsbb     rax, [rsi+16]\n\tmov     r10, rax\n\tsbb     rax, [rsi+24]\n\tdec     rcx\n\tmov     [rdi-32], r8\n\tmov     [rdi-24], r9\n\tmov     [rdi-16], r10\n\tmov     [rdi-8], rax\n\tlea     rsi, [rsi+32]\n\tjnz     L_lp\nL_skiplp:\n; dont want to change the carry\n\tinc     rdx\n\tdec     rdx\n\tjz      L_end\n\tsbb     rax, [rsi]\n\tmov     [rdi], rax\n\tdec     rdx\n\tjz      L_end\n\tsbb     rax, [rsi+8]\n\tmov     [rdi+8], rax\n\tdec     rdx\n\tjz      L_end\n\tsbb     rax, [rsi+16]\n\tmov     [rdi+16], rax\nL_end:\n\tsbb     rax, 0\n\tret\n\tend\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/divrem_hensel_qr_1_1.asm",
    "content": "dnl  X86_64 mpn_divrem_hensel_qr_1_1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=(rsi,rdx) / rcx\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\nC\tThis is same as the shifting version but with  no shifting\n\nASM_START()\nPROLOGUE(mpn_divrem_hensel_qr_1_1)\nmov $0,%r9\nsub %rdx,%r9\nlea (%rdi,%rdx,8),%rdi\nlea (%rsi,%rdx,8),%rsi\n\nmov %rcx,%rdx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\n\nC //clear carry\nxor %rdx,%rdx\nALIGN(16)\nL(loop):\n    mov (%rsi,%r9,8),%rax\n    sbb %rdx,%rax\n    sbb %r8,%r8\n    imul %r11,%rax\n    mov %rax,(%rdi,%r9,8)\n    mul %rcx\n    add $1,%r8\n    inc %r9\n    jnz L(loop)\nmov $0,%rax\nadc %rdx,%rax\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/hamdist.asm",
    "content": "dnl  mpn_hamdist\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_hamdist)\nmov $1,%rcx\nlea -8(%rsi,%rdx,8),%rsi\nlea -8(%rdi,%rdx,8),%rdi\nxor %eax,%eax\nsub %rdx,%rcx\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmov (%rdi,%rcx,8),%r8                                                                                                                                                               \n\txor (%rsi,%rcx,8),%r8                                                                                                                                                               \n\tpopcnt %r8,%r8                                                                                                                                                                      \n\tmov 8(%rdi,%rcx,8),%r9                                                                                                                                                              \n\txor 8(%rsi,%rcx,8),%r9                                                                                                                                                              \n\tpopcnt %r9,%r9                                                                                                                                                                      \n\tadd %r8,%rax                                                                                                                                                                        \n\tadd %r9,%rax                                                                                                                                                                        \n\tadd $2,%rcx\n\tjnc L(lp)\nL(skiplp):\njne L(fin)\n\tmov (%rdi,%rcx,8),%r8 \n\txor (%rsi,%rcx,8),%r8\n\tpopcnt %r8,%r8\n\tadd %r8,%rax \nL(fin):\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/ior_n.asm",
    "content": "dnl  mpn_ior_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_ior_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_ior_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu 16(%rsi,%r8,8),%xmm3\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tpor %xmm3,%xmm1\n\tadd $4,%r8\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmov 16(%rsi,%r8,8),%rcx\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tor %rcx,%rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\tor %rcx,%rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/iorn_n.asm",
    "content": "dnl  mpn_iorn_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_iorn_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nC\tiorn=  x | ~y  =   ~(~x & y)  = nandn (swap arg)\n\nASM_START()\nPROLOGUE(mpn_iorn_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\npcmpeqb %xmm4,%xmm4\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rsi,%r8,8),%xmm0\n\tmovdqu 16(%rsi,%r8,8),%xmm1\n\tmovdqu (%rdx,%r8,8),%xmm2\n\tadd $4,%r8\n\tmovdqu 16-32(%rdx,%r8,8),%xmm3\n\tpandn %xmm3,%xmm1\n\tpandn %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,-32(%rdi,%r8,8)\n\tpxor %xmm4,%xmm1\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rsi,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmovdqu (%rdx,%r8,8),%xmm2\n\tmov 16(%rsi,%r8,8),%rcx\n\tnot %rax\n\tor %rcx,%rax\n\tpandn %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rsi,%r8,8),%xmm0\n\tmovdqu (%rdx,%r8,8),%xmm2\n\tpandn %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\t\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\tnot %rax\n\tor %rcx,%rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/karaadd.asm",
    "content": "dnl  mpn_karaadd\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karaadd)\n# requires n>=8\npush %rbx\npush %rbp\npush %r12\npush %r13\npush %r14\npush %r15\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\npush %rdx\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\n.Lp:\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi,%rcx,8),%r8\n\tadc 8(%rsi,%rcx,8),%r9\n\tadc 16(%rsi,%rcx,8),%r10\n\tadc 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\tadc 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc .Lp\ncmp $2,%rcx\njg\t.Lcase0\njz\t.Lcase1\njp\t.Lcase2\n.Lcase3:\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rsi),%r8\n\tadc 8(%rsi),%r9\n\tadc 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\tadc 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp .Lfin\n.Lcase2:\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rsi),%r8\n\tadc 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\tadc 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp .Lfin\n.Lcase1:\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tadc (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tinc %rdx\n\tmov %r12,(%rbp,%rcx,8)\n.Lfin:\tmov $3,%rcx\n.Lcase0: #rcx=3\n\tpop %r8\n\tbt $0,%r8\n\tjnc .Lnotodd\n\txor %r10,%r10\n\tmov (%rbp,%rdx,8),%r8\n\tmov 8(%rbp,%rdx,8),%r9\n\tadd (%rsi,%rdx,8),%r8\n\tadc 8(%rsi,%rdx,8),%r9\n\trcl $1,%r10\n\tadd %r8,24(%rbp)\n\tadc %r9,32(%rbp)\n\tadc %r10,40(%rbp)\n.L7:\tadcq $0,24(%rbp,%rcx,8)\n\tinc %rcx\n\tjc .L7\n\tmov $3,%rcx\n.Lnotodd:and $3,%rax\n\tpopcnt %rax,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadc %r8,(%rdi,%rdx,8)\n.L1:\tadcq $0,8(%rdi,%rdx,8)\n\tinc %rdx\n\tjc .L1\n\tand $7,%rbx\n\tpopcnt %rbx,%r8\n\tadd %r8,24(%rbp)\n.L2:\tadcq $0,8(%rbp,%rcx,8)\n\tinc %rcx\n\tjc .L2\npop %r15\npop %r14\npop %r13\npop %r12\npop %rbp\npop %rbx\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/karasub.asm",
    "content": "dnl  mpn_karasub\n\ndnl  Copyright 2011,2012 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_karasub)\n# requires n>=8\npush %rbx\npush %rbp\npush %r12\npush %r13\npush %r14\npush %r15\npush %rdx\n#rp is rdi\n#tp is rsi\n#n is rdx and put it on the stack\nshr $1,%rdx\n#n2 is rdx\nlea (%rdx,%rdx,1),%rcx\n# 2*n2 is rcx\n# L is rdi\n# H is rbp\n# tp is rsi\nlea (%rdi,%rcx,8),%rbp\nxor %rax,%rax\nxor %rbx,%rbx\n# rax rbx are the carrys\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rbp,%rdx,8),%rbp\nmov $3,%ecx\nsub %rdx,%rcx\nmov $3,%edx\n.align 16\n.Lp:\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp,%rcx,8),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp,%rcx,8),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp,%rcx,8),%r10\n\tmov 24(%rdi,%rdx,8),%r11\n\tadc 24(%rbp,%rcx,8),%r11\n\trcl $1,%rbx\n\tbt $1,%rax\n\tmov %r11,%r15\n\tadc (%rdi,%rcx,8),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi,%rcx,8),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi,%rcx,8),%r10\n\tadc 24(%rdi,%rcx,8),%r11\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\tadc 24(%rbp,%rdx,8),%r15\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi,%rcx,8),%r8\n\tsbb 8(%rsi,%rcx,8),%r9\n\tsbb 16(%rsi,%rcx,8),%r10\n\tsbb 24(%rsi,%rcx,8),%r11\n\tmov %r10,16(%rdi,%rdx,8)\n\tmov %r11,24(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\tsbb 24(%rsi,%rdx,8),%r15\n\trcl $1,%rbx\n\tadd $4,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n\tmov %r13,8(%rbp,%rcx,8)\n\tmov %r14,16(%rbp,%rcx,8)\n\tmov %r15,24(%rbp,%rcx,8)\n\tadd $4,%rcx\n\tjnc .Lp\ncmp $2,%rcx\njg\t.Lcase0\njz\t.Lcase1\njp\t.Lcase2\n.Lcase3:\t#rcx=0\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc (%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 8(%rbp),%r9\n\tmov 16(%rdi,%rdx,8),%r10\n\tadc 16(%rbp),%r10\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc (%rdi),%r8\n\tmov %r9,%r13\n\tadc 8(%rdi),%r9\n\tmov %r10,%r14\n\tadc 16(%rdi),%r10\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\tadc 16(%rbp,%rdx,8),%r14\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb (%rsi),%r8\n\tsbb 8(%rsi),%r9\n\tsbb 16(%rsi),%r10\n\tmov %r10,16(%rdi,%rdx,8)\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\tsbb 16(%rsi,%rdx,8),%r14\n\trcl $1,%rbx\n\tadd $3,%rdx\n\tmov %r12,(%rbp)\n\tmov %r13,8(%rbp)\n\tmov %r14,16(%rbp)\n\tjmp .Lfin\n.Lcase2:\t#rcx=1\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 8(%rbp),%r8\n\tmov %r8,%r12\n\tmov 8(%rdi,%rdx,8),%r9\n\tadc 16(%rbp),%r9\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 8(%rdi),%r8\n\tmov %r9,%r13\n\tadc 16(%rdi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\tadc 8(%rbp,%rdx,8),%r13\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 8(%rsi),%r8\n\tsbb 16(%rsi),%r9\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tmov %r9,8(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%rbx\n\tadd $2,%rdx\n\tmov %r12,8(%rbp)\n\tmov %r13,16(%rbp)\n\tjmp .Lfin\n.Lcase1:\t#rcx=2\n\tbt $2,%rbx\n\tmov (%rdi,%rdx,8),%r8\n\tadc 16(%rbp),%r8\n\tmov %r8,%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tadc 16(%rdi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tadc (%rbp,%rdx,8),%r12\n\trcl $1,%rbx\n\tbt $1,%rax\n\tsbb 16(%rsi),%r8\n\trcl $1,%rax\n\tbt $2,%rbx\n\tmov %r8,(%rdi,%rdx,8)\n\tsbb (%rsi,%rdx,8),%r12\n\trcl $1,%rbx\n\tadd $1,%rdx\n\tmov %r12,(%rbp,%rcx,8)\n.Lfin:\tmov $3,%rcx\n.Lcase0: \t#rcx=3\n\t#// store top two words of H as carrys could change them\n\tpop %r15\n\tbt $0,%r15\n\tjnc .Lskipload\n\tmov (%rbp,%rdx,8),%r12\n        mov 8(%rbp,%rdx,8),%r13\n\t#// the two carrys from 2nd to 3rd\n.Lskipload:\tmov %rdx,%r11\n\txor %r8,%r8\n\tbt $1,%rax\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rdi,%rdx,8)\n.L2:\tadcq $0,8(%rdi,%rdx,8)\n\tlea 1(%rdx),%rdx\n\tjc .L2\n\t# //the two carrys from 3rd to 4th\n\txor %r8,%r8\n\tbt $1,%rbx\n\tadc %r8,%r8\n\tbt $2,%rbx\n\tadc $0,%r8\n\tadd %r8,(%rbp,%rcx,8)\n.L3:\tadcq $0,8(%rbp,%rcx,8)\n\tlea 1(%rcx),%rcx\n\tjc .L3\n\t#// now the borrow from 2nd to 3rd\n\tmov %r11,%rdx\n\tbt $0,%rax\n.L1:\tsbbq $0,(%rdi,%rdx,8)\n\tlea 1(%rdx),%rdx\n\tjc .L1\n\t#// borrow from 3rd to 4th\n\tmov $3,%rcx\n\tbt $0,%rbx\n.L4:\tsbbq $0,(%rbp,%rcx,8)\n\tlea 1(%rcx),%rcx\n\tjc .L4\n\t#// if odd the do next two\n\tmov $3,%rcx\n\tmov %r11,%rdx\n\tbt $0,%r15\n\tjnc .Lnotodd\n\txor %r10,%r10\n\tsub (%rsi,%rdx,8),%r12\n\tsbb 8(%rsi,%rdx,8),%r13\n\trcl $1,%r10\n\tadd %r12,24(%rbp)\n\tadc %r13,32(%rbp)\n\tmov $0,%r8\n\tadc %r8,%r8\n\tbt $0,%r10\n\tsbb $0,%r8\n.L7:\tadd %r8,16(%rbp,%rcx,8)\n\tadc $0,%r8\n\tadd $1,%rcx\n\tsar $1,%r8\n\tjnz .L7\n.Lnotodd:\t\npop %r15\npop %r14\npop %r13\npop %r12\npop %rbp\npop %rbx\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/lshift.as",
    "content": "\n;  AMD64 mpn_lshift\n; Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = (rsi,rdx)<<rcx\n;\trax = carry\n;\tdecent assmeblers understand what movq means ,except\n;\tmicrosofts/apple masm (what a suprise there) so for the broken old masm\n;\tassembler.  Needed for movq reg64,mediareg and movq mediareg,reg64\n;\tonly , where mediareg is xmm or mm\n\n%include 'yasm_mac.inc'\n\n%define MOVQ movd\n\n\tBITS 64\n\n   GLOBAL_FUNC  mpn_lshift\n\tcmp     rdx, 2\n\tja      threeormore\n\tjz      two\none:\n\tmov     rdx, [rsi]\n\tmov     rax, rdx\n\tshl     rdx, cl\n\tneg     rcx\n\tshr     rax, cl\n\tmov     [rdi], rdx\n\tret\ntwo:\n\tmov     r8, [rsi]\n\tmov     r9, [rsi+8]\n\tmov     r11, r8\n\tmov     rax, r9\n\tshl     r8, cl\n\tshl     r9, cl\n\tneg     rcx\n\tshr     r11, cl\n\tshr     rax, cl\n\tor      r9, r11\n\tmov     [rdi], r8\n\tmov     [rdi+8], r9\n\tret\nthreeormore:\n\tmov     eax, 64\n\tsub     rax, rcx\n\tMOVQ    xmm0, rcx\n\tMOVQ    xmm1, rax\n\tmov     r8, rdx\n\tlea     r9, [rsi+r8*8-16]\n\tmov     r10, r9\n\tand     r9, -9\n\tmovdqa  xmm3, [r9]\n\tmovdqa  xmm5, xmm3\n\tpsrlq   xmm3, xmm1\n\tpshufd  xmm3, xmm3, 0x4E\n\tMOVQ    rax, xmm3\n\tcmp     r10, r9\n\tje      aligned\n\tmovq    xmm2, [rsi+r8*8-8]\n\tmovq    xmm4, xmm2\n\tpsrlq   xmm2, xmm1\n\tMOVQ    rax, xmm2\n\tpsllq   xmm4, xmm0\n\tpor     xmm4, xmm3\n\tmovq    [rdi+r8*8-8], xmm4\n\tdec     r8\naligned:\n\tsub     r8, 5\n\tjle     skiploop\n\talign   16\nloop1:\n\tmovdqa  xmm2, [rsi+r8*8+8]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm5, xmm0\n\tpsrlq   xmm2, xmm1\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tpshufd  xmm2, xmm2, 0x4E\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tmovdqa  xmm3, [rsi+r8*8-8]\n\tmovdqa  xmm5, xmm3\n\tpsrlq   xmm3, xmm1\n\tmovhlps xmm2, xmm3\n\tpsllq   xmm4, xmm0\n\tpshufd  xmm3, xmm3, 0x4E\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+8], xmm4\n\tmovhpd  [rdi+r8*8+16], xmm4\n\tsub     r8, 4\n\tjg      loop1\nskiploop:\n\tcmp     r8, -1\n\tje      left2\n\tjg      left3\n\tjp      left1\nleft0:\n;\tmay be easier to bswap xmm5 first , same with other cases\n\tpxor    xmm2, xmm2\n\tpsllq   xmm5, xmm0\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tret\n\talign   16\nleft3:\n\tmovdqa  xmm2, [rsi+r8*8+8]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm5, xmm0\n\tpsrlq   xmm2, xmm1\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tpshufd  xmm2, xmm2, 0x4E\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tmovq    xmm3, [rsi+r8*8]\n\tpshufd  xmm3, xmm3, 0x4E\n\tmovdqa  xmm5, xmm3\n\tpsrlq   xmm3, xmm1\n\tmovhlps xmm2, xmm3\n\tpsllq   xmm4, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+8], xmm4\n\tmovhpd  [rdi+r8*8+16], xmm4\n\tpsllq   xmm5, xmm0\n\tmovhpd  [rdi+r8*8], xmm5\n\tret\n\talign   16\nleft2:\n\tmovdqa  xmm2, [rsi+r8*8+8]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm5, xmm0\n\tpsrlq   xmm2, xmm1\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tpshufd  xmm2, xmm2, 0x4E\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tpxor    xmm3, xmm3\n\tmovhlps xmm2, xmm3\n\tpsllq   xmm4, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+8], xmm4\n\tmovhpd  [rdi+r8*8+16], xmm4\n\tret\n\talign   16\nleft1:\n\tmovq    xmm2, [rsi+r8*8+16]\n\tpshufd  xmm2, xmm2, 0x4E\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm5, xmm0\n\tpsrlq   xmm2, xmm1\n\tmovhlps xmm3, xmm2\n\tpor     xmm5, xmm3\n\tmovq    [rdi+r8*8+24], xmm5\n\tmovhpd  [rdi+r8*8+32], xmm5\n\tpsllq   xmm4, xmm0\n\tmovhpd  [rdi+r8*8+16], xmm4\n\tret\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/lshift1.as",
    "content": "\n;  AMD64 mpn_lshift1\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = (rsi,rdx)<<1\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_lshift1\n\txor     rax, rax\n\tmov     r11, rdx\n\tand     r11, 7\n\tinc     r11\n\tshr     rdx, 3\n;\tand clear carry flag\n\tcmp     rdx, 0\n\tjz      next\n\talign   16\nloop1:\n\tmov     rcx, [rsi]\n\tmov     r8, [rsi+8]\n\tmov     r10, [rsi+16]\n\tmov     r9, [rsi+24]\n\tadc     rcx, rcx\n\tadc     r8, r8\n\tadc     r10, r10\n\tadc     r9, r9\n\tmov     [rdi], rcx\n\tmov     [rdi+8], r8\n\tmov     [rdi+16], r10\n\tmov     [rdi+24], r9\n\tmov     rcx, [rsi+32]\n\tmov     r8, [rsi+40]\n\tmov     r10, [rsi+48]\n\tmov     r9, [rsi+56]\n\tadc     rcx, rcx\n\tadc     r8, r8\n\tadc     r10, r10\n\tadc     r9, r9\n\tmov     [rdi+32], rcx\n\tmov     [rdi+40], r8\n\tmov     [rdi+48], r10\n\tmov     [rdi+56], r9\n\tlea     rdi, [rdi+64]\n\tdec     rdx\n\tlea     rsi, [rsi+64]\n\tjnz     loop1\nnext:\n\tdec     r11\n\tjz      end\n;\tCould still have cache-bank conflicts in this tail part\n\tmov     rcx, [rsi]\n\tadc     rcx, rcx\n\tmov     [rdi], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+8]\n\tadc     rcx, rcx\n\tmov     [rdi+8], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+16]\n\tadc     rcx, rcx\n\tmov     [rdi+16], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+24]\n\tadc     rcx, rcx\n\tmov     [rdi+24], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+32]\n\tadc     rcx, rcx\n\tmov     [rdi+32], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+40]\n\tadc     rcx, rcx\n\tmov     [rdi+40], rcx\n\tdec     r11\n\tjz      end\n\tmov     rcx, [rsi+48]\n\tadc     rcx, rcx\n\tmov     [rdi+48], rcx\nend:\n\tadc     rax, rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/lshift2.asm",
    "content": "dnl  mpn_lshift2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_lshift2(mp_ptr,mp_ptr,mp_size_t)\nC\trax                 rdi,   rsi,      rdx\n\nASM_START()\nPROLOGUE(mpn_lshift2)\nmov $3,%rcx\nlea -24(%rsi,%rdx,8),%rsi\nlea -24(%rdi,%rdx,8),%rdi\nmov $0,%r8\nsub %rdx,%rcx\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,4),%r8\n\tshr $62,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%r9,%r10,4),%r9\n\tshr $62,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tmov %r8,(%rdi,%rcx,8)\n\tlea (%r10,%r11,4),%r10\n\tmov %r10,16(%rdi,%rcx,8)\n\tshr $62,%r11\n\tmov 24(%rsi,%rcx,8),%r8\n\tlea (%r11,%r8,4),%r11\n\tmov %r11,24(%rdi,%rcx,8)\n\tshr $62,%r8\n\tadd $4,%rcx\n\tmov %r9,8-32(%rdi,%rcx,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%rcx\nja L(case0)\nje L(case1)\njp L(case2)\nL(case3):\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,4),%r8\n\tshr $62,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%r9,%r10,4),%r9\n\tshr $62,%r10\n\tmov 16(%rsi,%rcx,8),%r11\n\tmov %r8,(%rdi,%rcx,8)\n\tlea (%r10,%r11,4),%r10\n\tmov %r10,16(%rdi,%rcx,8)\n\tshr $62,%r11\n\tmov %r11,%rax\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\nL(case2):\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,4),%r8\n\tshr $62,%r9\n\tmov 8(%rsi,%rcx,8),%r10\n\tlea (%r9,%r10,4),%r9\n\tshr $62,%r10\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r10,%rax\n\tmov %r9,8(%rdi,%rcx,8)\n\tret\nALIGN(16)\nL(case1):\tmov (%rsi,%rcx,8),%r9\n\tlea (%r8,%r9,4),%r8\n\tshr $62,%r9\n\tmov %r8,(%rdi,%rcx,8)\n\tmov %r9,%rax\n\tret\nALIGN(16)\nL(case0):\tmov %r8,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/lshiftc.asm",
    "content": "dnl  mpn_lshiftc\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\ndefine(`MOVQ',`movd')\n\nASM_START()\nPROLOGUE(mpn_lshiftc)\nMOVQ %rcx,%mm0\nmov $64,%rax\nsub %rcx,%rax\npcmpeqb %mm6,%mm6\nMOVQ %rax,%mm1\nlea 8(%rsi),%rsi\nlea 8(%rdi),%rdi\nsub $5,%rdx\nmovq 24(%rsi,%rdx,8),%mm5\nmovq %mm5,%mm3\npsrlq %mm1,%mm5\nMOVQ %mm5,%rax\npsllq %mm0,%mm3\njc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovq 16(%rsi,%rdx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq (%rsi,%rdx,8),%mm2\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tmovq 8(%rsi,%rdx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsrlq %mm1,%mm5\n\tpor %mm5,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,16(%rdi,%rdx,8)\n\tpsllq %mm0,%mm3\n\tmovq %mm2,%mm4\n\tmovq -8(%rsi,%rdx,8),%mm5\n\tsub $4,%rdx\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,40(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tmovq %mm5,%mm3\n\tpsrlq %mm1,%mm5\n\tpor %mm5,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,32(%rdi,%rdx,8)\n\tpsllq %mm0,%mm3\n\tjnc L(lp)\nL(skiplp):\ncmp $-2,%rdx\njz L(case2)\njp L(case1)\njs L(case0)\nL(case3):\n\tmovq 16(%rsi,%rdx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tmovq (%rsi,%rdx,8),%mm2\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tmovq 8(%rsi,%rdx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsrlq %mm1,%mm5\n\tpor %mm5,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,16(%rdi,%rdx,8)\n\tpsllq %mm0,%mm3\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,8(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,(%rdi,%rdx,8)\n\temms\n\tret\nALIGN(16)\nL(case2):\n\tmovq 16(%rsi,%rdx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tmovq 8(%rsi,%rdx,8),%mm5\n\tmovq %mm5,%mm3\n\tpsrlq %mm1,%mm5\n\tpor %mm5,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,16(%rdi,%rdx,8)\n\tpsllq %mm0,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,8(%rdi,%rdx,8)\n\temms\n\tret\nALIGN(16)\nL(case1):\n\tmovq 16(%rsi,%rdx,8),%mm2\n\tmovq %mm2,%mm4\n\tpsrlq %mm1,%mm2\n\tpor %mm2,%mm3\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\tpsllq %mm0,%mm4\n\tpxor %mm6,%mm4\n\tmovq %mm4,16(%rdi,%rdx,8)\n\temms\n\tret\nALIGN(16)\nL(case0):\n\tpxor %mm6,%mm3\n\tmovq %mm3,24(%rdi,%rdx,8)\n\temms\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/mod_1_1.asm",
    "content": "dnl  mpn_mod_1_1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,2)  contains B^i % divisor\n\n\nASM_START()\nPROLOGUE(mpn_mod_1_1)\npush %r13\nmov -8(%rsi,%rdx,8),%r13\nmov -16(%rsi,%rdx,8),%rax\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov %rdx,%rcx\nsub $2,%rcx\nALIGN(16)\nL(lp):\n\tmov -8(%rsi,%rcx,8),%r10\n\tmul %r8\n\tadd %rax,%r10\n\tmov $0,%r11\n\tadc %rdx,%r11\n\tmov %r13,%rax\n\tmul %r9\n\tadd %r10,%rax\n\tmov %r11,%r13\n\tadc %rdx,%r13\n\tdec %rcx\n\tjnz L(lp)\nC // r13,rax\nmov %rax,(%rdi)\nmov %r8,%rax\nmul %r13\nadd %rax,(%rdi)\nadc $0,%rdx\nmov %rdx,8(%rdi)\npop %r13\nret\nEPILOGUE()\n\n\n\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/mod_1_2.asm",
    "content": "dnl  mpn_mod_1_2\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,3)  contains B^i % divisor\n\n\nASM_START()\nPROLOGUE(mpn_mod_1_2)\nC // require rdx >=4\npush %r12\npush %r13\npush %r14\nmov -8(%rsi,%rdx,8),%r14\nmov -16(%rsi,%rdx,8),%r13\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov 16(%rcx),%r10\nmov %rdx,%rcx\nmov -24(%rsi,%rdx,8),%rax\nmul %r8\nmov -32(%rsi,%rcx,8),%r11\nxor %r12,%r12\nsub $6,%rcx\njc L(skiplp)\nALIGN(16)\nL(lp):\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r9,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r10,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov 8(%rsi,%rcx,8),%rax\n\tmov %r12,%r14\n\tadc %rdx,%r14\n\tmul %r8\n\tmov $0,%r12d\n\tmov 0(%rsi,%rcx,8),%r11\n\tsub $2,%rcx\n\tjnc L(lp)\nL(skiplp):\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r9,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r10,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov %r12,%r14\n\tadc %rdx,%r14\ncmp $-2,%rcx\nje L(case0)\nL(case1):\n\tmov 8(%rsi,%rcx,8),%r11\n\txor %r12,%r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r11\n\tadc %rdx,%r12\n\tmov %r11,%r13\n\tmov %r9,%rax\n\tmul %r14\n\tadd %rax,%r13\n\tmov %r12,%r14\n\tadc %rdx,%r14\nL(case0):\t\nmov %r8,%rax\nmul %r14\nadd %rax,%r13\nadc $0,%rdx\nmov %r13,(%rdi)\nmov %rdx,8(%rdi)\npop %r14\npop %r13\npop %r12\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/mod_1_3.asm",
    "content": "dnl  mpn_mod_1_3\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\nC\twhere (rcx,4)  contains B^i % divisor\n\nASM_START()\nPROLOGUE(mpn_mod_1_3)\nC // require rdx >=5\npush %r12\npush %r13\npush %r14\npush %r15\nmov -8(%rsi,%rdx,8),%r15\nmov -16(%rsi,%rdx,8),%r14\nmov -32(%rsi,%rdx,8),%rax\nmov -40(%rsi,%rdx,8),%r12\nmov (%rcx),%r8\nmov 8(%rcx),%r9\nmov 16(%rcx),%r10\nmov 24(%rcx),%r11\nmov %rdx,%rcx\nsub $8,%rcx\njc L(skiplp)\nALIGN(16)\nC // r15 r14 -8() -16()=rax -24()=r12\nL(lp):\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 0(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tmov 8(%rsi,%rcx,8),%rax\n\tadc %rdx,%r15\n\tsub $3,%rcx\n\tjnc L(lp)\nL(skiplp):\nC // we have loaded up the next two limbs\nC // but because they are out of order we can have to do 3 limbs min\ncmp $-2,%rcx\njl L(case1)\nje L(case2)\nL(case3):\n\tC //two more limbs is 4 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 8(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tmov 16(%rsi,%rcx,8),%rax\n\tadc %rdx,%r15\n\tC // r15 r14 rax r12\n\tmov $0,%r13\n\tmul %r8\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r9,%rax\n\tmul %r14\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r15\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tC // r13 r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r12\n\tadc $0,%rdx\n\tmov %r12,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nALIGN(16)\nL(case2):\n\tC //two more limbs is 4 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12\n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov 16(%rsi,%rcx,8),%r12\n\tmov %r13,%r15\n\tadc %rdx,%r15\n\tC // r15 r14 r12\n\tmov $0,%r13\n\tmov %r8,%rax\n\tmul %r14\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r9,%rax\n\tmul %r15\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tC // r13 r12\n\tmov %r8,%rax\n\tmul %r13\n\tadd %rax,%r12\n\tadc $0,%rdx\n\tmov %r12,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nALIGN(16)\nL(case1):\n\tC // one more is 3 limbs\n\tC // r15 r14 40() 8+24()=rax 0+24()=r12 \n\tmul %r8\n\tadd %rax,%r12\n\tmov 40(%rsi,%rcx,8),%rax\n\tmov $0,%r13\n\tadc %rdx,%r13\n\tmul %r9\n\tadd %rax,%r12\n\tnop\n\tadc %rdx,%r13\n\tmov %r10,%rax\n\tmul %r14\t\n\tadd %rax,%r12\n\tadc %rdx,%r13\n\tmov %r12,%r14\n\tmov %r11,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tmov %r13,%r15\n\tadc %rdx,%r15\n\tmov %r8,%rax\n\tmul %r15\n\tadd %rax,%r14\n\tadc $0,%rdx\n\tmov %r14,(%rdi)\n\tmov %rdx,8(%rdi)\n\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/mul_1.asm",
    "content": "dnl  mpn_mul_1\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_mul_1)\nmov (%rsi),%rax\ncmp $1,%rdx\nje one\t\t\nmov $5,%r11\nlea -40(%rsi,%rdx,8),%rsi\nlea -40(%rdi,%rdx,8),%rdi\nsub %rdx,%r11\nmul %rcx\n.byte 0x26\nmov %rax,%r8\n.byte 0x26\nmov 8(%rsi,%r11,8),%rax\n.byte 0x26\nmov %rdx,%r9\n.byte 0x26\ncmp $0,%r11\n.byte 0x26\nmov %r12,-8(%rsp)\n.byte 0x26\njge skiplp\nlp:\txor %r10,%r10\n\tmul %rcx\n\tmov %r8,(%rdi,%r11,8)\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tmov 16(%rsi,%r11,8),%rax\n\tmul %rcx\n\tmov %r9,8(%rdi,%r11,8)\n\tadd %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 24(%rsi,%r11,8),%rax\n\txor %r8,%r8\n\txor %r9,%r9\n\tmul %rcx\n\tmov %r10,16(%rdi,%r11,8)\n\tadd %rax,%r12\n\tadc %rdx,%r8\n\tmov 32(%rsi,%r11,8),%rax\n \tmul %rcx\n\tmov %r12,24(%rdi,%r11,8)\n\tadd %rax,%r8\n\tadc %rdx,%r9\n\tadd $4,%r11\n\tmov 8(%rsi,%r11,8),%rax\n\tjnc lp\nskiplp:\nxor %r10,%r10\nmul %rcx\nmov %r8,(%rdi,%r11,8)\nadd %rax,%r9\nadc %rdx,%r10\ncmp $2,%r11\nja case0\njz case1\njp case2\ncase3:\tmov 16(%rsi),%rax\n\tmul %rcx\n\tmov %r9,8(%rdi)\n\tadd %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 24(%rsi),%rax\n\txor %r8,%r8\n\txor %r9,%r9\n\tmul %rcx\n\tmov %r10,16(%rdi)\n\tadd %rax,%r12\n\tadc %rdx,%r8\n\tmov 32(%rsi),%rax\n \tmul %rcx\n\tmov %r12,24(%rdi)\n\tadd %rax,%r8\n\tadc %rdx,%r9\n\tmov %r8,32(%rdi)\n\t#add $0,%r9\n\tmov -8(%rsp),%r12\n\tmov %r9,%rax\n\tret\ncase2:\tmov 24(%rsi),%rax\n\tmul %rcx\n\tmov %r9,16(%rdi)\n\tadd %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 32(%rsi),%rax\n\txor %r8,%r8\n\t#xor %r9,%r9\n\tmul %rcx\n\tmov %r10,24(%rdi)\n\tadd %rax,%r12\n\tadc %rdx,%r8\n\tmov %r12,32(%rdi)\n\t#add $0,%r8\n\tmov -8(%rsp),%r12\n\tmov %r8,%rax\n\tret\nALIGN(16)\ncase1:\tmov 32(%rsi),%rax\n\tmul %rcx\n\tmov %r9,24(%rdi)\n\tadd %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov %r10,32(%rdi)\n\t#add $0,%r12\n\tmov %r12,%rax\n\tmov -8(%rsp),%r12\n\tret\none:\tmul %rcx\n\tmov %rax,(%rdi)\n\t#add $0,%rdx\n\tmov %rdx,%rax\n\tret\ncase0:\tmov %r9,32(%rdi)\n\t#add $0,%r10\n\tmov -8(%rsp),%r12\n\tmov %r10,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/mul_2.asm",
    "content": "dnl  mpn_mul_2\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_mul_2)\npush %rbx\nmov 8(%rcx),%r8\nmov (%rcx),%rcx\nmov $3,%rbx\nlea -24(%rdi,%rdx,8),%rdi\nlea -24(%rsi,%rdx,8),%rsi\nsub %rdx,%rbx\n\t#mul %r8\n\t#add %rax,%r9\n\t#adc %rdx,%r10\n\t#mov -16(%rsi,%rbx,8),%rax\n\t\tmov $0,%r11\n\t#mul %rcx\n\t#add %rax,%r9\n\t#adc %rdx,%r10\n\t#adc $0,%r11\n\t#mov -16(%rsi,%rbx,8),%rax\n\t#mul %r8\n\t#add %rax,%r10\n\t#mov -8(%rsi,%rbx,8),%rax\n\t#adc %rdx,%r11\n\t#mul %rcx\n\t#add %rax,%r10\n\t#mov %r9,-16(%rdi,%rbx,8)\n\t#adc %rdx,%r11\n\t\tmov $0,%r9\n\t#mov -8(%rsi,%rbx,8),%rax\n\t#adc $0,%r9\n\t#mul %r8\n\t#add %rax,%r11\n\t#mov %r10,-8(%rdi,%rbx,8)\n\tmov (%rsi,%rbx,8),%rax\n\tmov $0,%r10\n\t#adc %rdx,%r9\n\tmul %rcx\n\tadd %rax,%r11\n\tmov (%rsi,%rbx,8),%rax\n\tmov %r11,(%rdi,%rbx,8)\n\tadc %rdx,%r9\n\t#adc $0,%r10\n\t#add $3,%rbx\ncmp $0,%rbx\njge L(skiplp)\nALIGN(16)\nL(lp):\tmul %r8\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tmov 8(%rsi,%rbx,8),%rax\n\tmov $0,%r11\n\tmul %rcx\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tadc $0,%r11\n\tmov 8(%rsi,%rbx,8),%rax\n\tmul %r8\n\tadd %rax,%r10\n\tmov 16(%rsi,%rbx,8),%rax\n\tadc %rdx,%r11\n\tmul %rcx\n\tadd %rax,%r10\n\tmov %r9,8(%rdi,%rbx,8)\n\tadc %rdx,%r11\n\tmov $0,%r9\n\tmov 16(%rsi,%rbx,8),%rax\n\tadc $0,%r9\n\tmul %r8\n\tadd %rax,%r11\n\tmov %r10,16(%rdi,%rbx,8)\n\tmov 24(%rsi,%rbx,8),%rax\n\tmov $0,%r10\n\tadc %rdx,%r9\n\tmul %rcx\n\tadd %rax,%r11\n\tmov 24(%rsi,%rbx,8),%rax\n\tmov %r11,24(%rdi,%rbx,8)\n\tadc %rdx,%r9\n\tadc $0,%r10\n\tadd $3,%rbx\n\tjnc L(lp)\nL(skiplp):\ncmp $1,%rbx\nja L(case0)\nje L(case1)\nL(case2):\tmul %r8\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tmov 8(%rsi,%rbx,8),%rax\n\tmov $0,%r11\n\tmul %rcx\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tadc $0,%r11\n\tmov 8(%rsi,%rbx,8),%rax\n\tmul %r8\n\tadd %rax,%r10\n\tmov 16(%rsi,%rbx,8),%rax\n\tadc %rdx,%r11\n\tmul %rcx\n\tadd %rax,%r10\n\tmov %r9,8(%rdi,%rbx,8)\n\tadc %rdx,%r11\n\tmov $0,%r9\n\tmov 16(%rsi,%rbx,8),%rax\n\tadc $0,%r9\n\tmul %r8\n\tadd %rax,%r11\n\tmov %r10,16(%rdi,%rbx,8)\n\tadc %rdx,%r9\n\tmov %r11,24(%rdi,%rbx,8)\n\tmov %r9,%rax\n\tpop %rbx\n\tret\nL(case1):\tmul %r8\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tmov 8(%rsi,%rbx,8),%rax\n\tmov $0,%r11\n\tmul %rcx\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tadc $0,%r11\n\tmov 8(%rsi,%rbx,8),%rax\n\tmul %r8\n\tadd %rax,%r10\n\tadc %rdx,%r11\n\tmov %r9,8(%rdi,%rbx,8)\n\tmov %r10,16(%rdi,%rbx,8)\n\tmov %r11,%rax\n\tpop %rbx\n\tret\nL(case0):\tmul %r8\n\tadd %rax,%r9\n\tadc %rdx,%r10\n\tmov %r9,8(%rdi,%rbx,8)\n\tmov %r10,%rax\n\tpop %rbx\n\tret\nEPILOGUE()\n\n\n\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/mul_basecase.as",
    "content": "\n;  AMD64 mpn_mul_basecase\n\n;  Copyright 2008,2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n; C\t(rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n; C Version 1.0.7\n\n\n%macro addmul2lp 1\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+8], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi-16], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi-8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+8], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n\talign   16\n%%1:\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi+rbx*8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+24], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r10, 0\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n\tmov     rbx, r14\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+24], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadd     r15, 2\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], r9\n\tlea     rdi, [rdi+16]\n\tmov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r10, 0\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n\tmov     rax, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+24]\n\tmov     r11, 0\n\tadc     r10, rdx\n\tmul     rcx\n\tmov     [rdi], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tadd     r15, 2\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+24], r11\n\tmov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r10, 0\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n\tmov     rax, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-8], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+24]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+24]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r11\n\tmov     [rdi+24], r12\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n\tmov     rcx, [r13+r15*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tmov     r8, [r13+r15*8+8]\n\tmov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n\tmov     rax, [rsi]\n\tlea     rdi, [rdi+16]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tmov     [rdi-16], r12\n\tadd     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+8]\n\tadc     r11, 0\n\tmul     r8\n\tmov     [rdi-8], r9\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tmov     [rdi], r10\n\tmov     r9, 0\n\tadd     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tmov     [rdi+8], r11\n\tadd     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+16], r12\n\tmov     [rdi+24], r9\n\tadd     r15, 2\n\tmov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n\talign   16\n%%1:\n\tmov     r10, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tmov     [rdi+rbx*8], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r8\n\tmov     [rdi+rbx*8+8], r10\n\tdb      0x26\n\tadd     r11, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+rbx*8+24]\n\tmul     r8\n\tmov     [rdi+rbx*8+16], r11\n\tdb      0x26\n\tadd     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     rbx, 4\n\tmov     rax, [rsi+rbx*8]\n\tjnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n\tmov     rax, [rsi+8]\n\tmul     r8\n\tmov     [rdi], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmov     r12d, 0\n\tmul     r8\n\tmov     [rdi+8], r10\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r11\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n\tmov     rax, [rsi+16]\n\tmul     r8\n\tmov     [rdi+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+24], r12\n\tmov     [rdi+32], rdx\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n\tmov     rax, [rsi+24]\n\tmul     r8\n\tmov     [rdi+16], r9\n\tadd     r10, rax\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tmov     [rdi+24], r10\n\tmov     [rdi+32], r11\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n\tmov     [rdi+24], r9\n\tmov     [rdi+32], r10\n\tinc     r15\n\tlea     rdi, [rdi+8]\n\tmov     rax, [rsi+r14*8]\n\tmov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n\tjz      %%2\n\talign   16\n%%1:\n\taddmul2pro%1\n\taddmul2lp %1\n\taddmul2epi%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tmov     r15, [rsp-40]\n\tret\n%endmacro\n\n%macro oldmulnext0 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tmov     [rdi+r11*8+24], rbx\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+32], r12\n\tmov     rax, [rsi+r14*8]\n\tmov     [rdi+r11*8+40], rdx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     r12d, 0\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmul     r13\n\tmov     [rdi+r11*8+16], r10\n\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rdi+r11*8+24], r12\n\tmov     [rdi+r11*8+32], rdx\n\tinc     r8\n\tlea     rdi, [rdi+8]\n\tmov     r11, r14\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     [rdi+r11*8+8], r9\n\tadd     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     [rdi+r11*8+16], r10\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n\tmov     [rdi+r11*8+8], r9\n\tmov     [rdi+r11*8+16], r10\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tlea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+16]\n\tmul     r13\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+24]\n\tmov     r12d, 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+32]\n\tmul     r13\n\tadd     [rdi+24], rbx\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r12\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n\tmov     r13, [rcx+r8*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+24]\n\tmul     r13\n\tlea     rdi, [rdi+8]\n\tadd     [rdi+8], r9\n\tadc     r10, rax\n\tmov     r12d, 0\n\tmov     rax, [rsi+32]\n\tadc     r12, rdx\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+16], r10\n\tadc     r12, rax\n\tadc     rdx, 0\n\tadd     [rdi+24], r12\n\tadc     rdx, 0\n\tmov     [rdi+32], rdx\n\tinc     r8\n\tmov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n\tmov     r13, [rcx+r8*8]\n\tlea     rdi, [rdi+8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [rdi+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tmov     ebx, 0\n\tadd     [rdi+r11*8+8], r9\n\tadc     r10, rax\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r14*8]\n\tadd     [rdi+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [rdi+r11*8+24], rbx\n\tinc     r8\n\tmov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n\tmov     r13, [rcx+r8*8]\n\tdb      0x26\n\tmul     r13\n\tdb      0x26\n\tmov     r12, rax\n\tdb      0x26\n\tlea     rdi, [rdi+8]\n\tdb      0x26\n\tmov     r9, rdx\n\tmov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n\tmov     r11, r14\n\tmul     r13\n\tadd     [rdi+24], r12\n\tadc     r9, rax\n\tadc     rdx, 0\n\tadd     [rdi+32], r9\n\tmov     rax, [rsi+r14*8]\n\tadc     rdx, 0\n\tinc     r8\n\tmov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n\toldmulnext%1\n\tjz      %%2\n\talign   16\n%%1:\n\toldaddmulpro%1\n\toldaddmulnext%1\n\tjnz     %%1\n%%2:\n\tmov     r13, [rsp-8]\n\tmov     r14, [rsp-16]\n\tmov     rbx, [rsp-24]\n\tmov     r12, [rsp-32]\n\tret\n%endmacro\n\n\tASM_START\n\tGLOBAL_FUNC mpn_mul_basecase\n; the current mul does not handle case one \n\tcmp     rdx, 4\n\tjg      L_fiveormore\n\tcmp     rdx, 1\n\tje      L_one\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     r14, 5\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-40]\n\tlea     rcx, [rcx+r8*8]\n\tneg     r8\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rax, [rsi+r14*8]\n\tmov     r13, [rcx+r8*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tmov     r10d, 0\n\tmul     r13\n\tmov     [rdi+r11*8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     r11, 2\n\tja      L_oldcase3\n\tjz      L_oldcase2\n\tjp      L_oldcase1\nL_oldcase0:\n\toldmpn_muladdmul_1_int 0\nL_oldcase1:\n\toldmpn_muladdmul_1_int 1\nL_oldcase2:\n\toldmpn_muladdmul_1_int 2\nL_oldcase3:\n\toldmpn_muladdmul_1_int 3\n\talign   16\nL_fiveormore:\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\tmov     [rsp-8], r13\n\tmov     [rsp-16], r14\n\tmov     [rsp-24], rbx\n\tmov     [rsp-32], r12\n\tmov     [rsp-40], r15\n\tmov     r14, 4\n\tsub     r14, rdx\n\tlea     rdi, [rdi+rdx*8-32]\n\tlea     rsi, [rsi+rdx*8-32]\n\tmov     r13, rcx\n\tmov     r15, r8\n\tlea     r13, [r13+r15*8]\n\tneg     r15\n\tmov     rbx, r14\n\tmov     rax, [rsi+r14*8]\n\tbt      r15, 0\n\tjnc     L_even\nL_odd:\n\tinc     rbx\n\tmov     r8, [r13+r15*8]\n\tmul     r8\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     L_mulskiploop\n\tmul1lp \nL_mulskiploop:\n\tmov     r10d, 0\n\tmul     r8\n\tmov     [rdi+rbx*8-8], r12\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     rbx, 2\n\tja      L_mul1case3\n\tjz      L_mul1case2\n\tjp      L_mul1case1\nL_mul1case0:\n\tmulnext0\n\tjmp     L_case0\nL_mul1case1:\n\tmulnext1\n\tjmp     L_case3\nL_mul1case2:\n\tmulnext2\n\tjmp     L_case2\nL_mul1case3:\n\tmulnext3\n\tjmp     L_case1\nL_even:\n\t; as all the mul2pro? are the same\n\tmul2pro0\n\tmul2lp \n\tcmp     rbx, 2\n\tja      L_mul2case0\n\tjz      L_mul2case1\n\tjp      L_mul2case2\nL_mul2case3:\n\tmul2epi3\nL_case3:\n\tmpn_addmul_2_int 3\nL_mul2case2:\n\tmul2epi2\nL_case2:\n\tmpn_addmul_2_int 2\nL_mul2case1:\n\tmul2epi1\nL_case1:\n\tmpn_addmul_2_int 1\nL_mul2case0:\n\tmul2epi0\nL_case0:\n\tmpn_addmul_2_int 0\n\talign   16\nL_one:\n\tmov     rax, [rsi]\n\tmul\tqword [rcx]\n\tmov     [rdi], rax\n\tmov     [rdi+8], rdx\n\tret\n\tend\n\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/mullow_n_basecase.asm",
    "content": "dnl  AMD64 mpn_mullo_basecase optimised for Intel Sandy bridge and Ivy bridge.\n\ndnl  Contributed to the GNU project by Torbjörn Granlund.\n\ndnl  Copyright 2008, 2009, 2011-2013 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of either:\ndnl\ndnl    * the GNU Lesser General Public License as published by the Free\ndnl      Software Foundation; either version 3 of the License, or (at your\ndnl      option) any later version.\ndnl\ndnl  or\ndnl\ndnl    * the GNU General Public License as published by the Free Software\ndnl      Foundation; either version 2 of the License, or (at your option) any\ndnl      later version.\ndnl\ndnl  or both in parallel, as here.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\ndnl  for more details.\ndnl\ndnl  You should have received copies of the GNU General Public License and the\ndnl  GNU Lesser General Public License along with the GNU MP Library.  If not,\ndnl  see https://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC cycles/limb\tmul_2\t\taddmul_2\nC AMD K8,K9\nC AMD K10\nC AMD bull\nC AMD pile\nC AMD steam\nC AMD bobcat\nC AMD jaguar\nC Intel P4\nC Intel core\nC Intel NHM\nC Intel SBR\t 2.5\t\t 2.95\nC Intel IBR\t 2.3\t\t 2.68\nC Intel HWL\t 2.0\t\t 2.5\nC Intel BWL\nC Intel atom\nC VIA nano\n\nC The inner loops of this code are the result of running a code generation and\nC optimisation tool suite written by David Harvey and Torbjörn Granlund.\n\nC TODO\nC   * Implement proper cor2, replacing current cor0.\nC   * Offset n by 2 in order to avoid the outer loop cmp.  (And sqr_basecase?)\nC   * Micro-optimise.\n\nC When playing with pointers, set this to $2 to fall back to conservative\nC indexing in wind-down code.\ndefine(`I',`$1')\n\ndefine(`rp',       `%rdi')\ndefine(`up',       `%rsi')\ndefine(`vp_param', `%rdx')\ndefine(`n',        `%rcx')\ndefine(`n8',        `%cl')\n\ndefine(`vp',       `%r8')\ndefine(`X0',       `%r14')\ndefine(`X1',       `%r15')\n\ndefine(`w0',       `%r10')\ndefine(`w1',       `%r11')\ndefine(`w2',       `%r12')\ndefine(`w3',       `%r13')\ndefine(`i',        `%rbp')\ndefine(`v0',       `%r9')\ndefine(`v1',       `%rbx')\n\nC rax rbx rcx rdx rdi rsi rbp r8 r9 r10 r11 r12 r13 r14 r15\n\nASM_START()\n\tTEXT\n\tALIGN(32)\nPROLOGUE(mpn_mullow_n_basecase)\n\tmov\t(up), %rax\n\tmov\tvp_param, vp\n\n\tcmp\t$4, n\n\tjb\tL(lsmall)\n\n\tmov\t(vp_param), v0\n\tpush\t%rbx\n\tlea\t(rp,n,8), rp\t\tC point rp at R[un]\n\tpush\t%rbp\n\tlea\t(up,n,8), up\t\tC point up right after U's end\n\tpush\t%r12\n\tneg\tn\n\tpush\t%r13\n\tmul\tv0\n\tmov\t8(vp), v1\n\n\ttest\t$1, n8\n\tjnz\tL(lm2b1)\n\nL(lm2b0): lea\t(n), i\n\txor\tw0, w0\n\tmov\t%rax, w2\n\tmov\t%rdx, w1\n\tjmp\tL(lm2l0)\n\nL(lm2b1): lea\t1(n), i\n\txor\tw1, w1\n\txor\tw2, w2\n\tmov\t%rax, w0\n\tmov\t%rdx, w3\n\tjmp\tL(lm2l1)\n\n\tALIGN(32)\nL(lm2tp):mul\tv0\n\tadd\t%rax, w0\n\tmov\t%rdx, w3\n\tadc\t$0, w3\nL(lm2l1):mov\t-8(up,i,8), %rax\n\tmul\tv1\n\tadd\tw1, w0\n\tadc\t$0, w3\n\tadd\t%rax, w2\n\tmov\tw0, -8(rp,i,8)\n\tmov\t%rdx, w0\n\tadc\t$0, w0\n\tmov\t(up,i,8), %rax\n\tmul\tv0\n\tadd\t%rax, w2\n\tmov\t%rdx, w1\n\tadc\t$0, w1\n\tadd\tw3, w2\nL(lm2l0):mov\t(up,i,8), %rax\n\tadc\t$0, w1\n\tmul\tv1\n\tmov\tw2, (rp,i,8)\n\tadd\t%rax, w0\n\tmov\t%rdx, w2\t\tC FIXME: dead in last iteration\n\tmov\t8(up,i,8), %rax\n\tadc\t$0, w2\t\t\tC FIXME: dead in last iteration\n\tadd\t$2, i\n\tjnc\tL(lm2tp)\n\nL(lm2ed):imul\tv0, %rax\n\tadd\tw0, %rax\n\tadd\tw1, %rax\n\tmov\t%rax, I(-8(rp),-8(rp,i,8))\n\n\tadd\t$2, n\n\tlea\t16(vp), vp\n\tlea\t-16(up), up\n\tcmp\t$-2, n\n\tjge\tL(lcor1)\n\n\tpush\t%r14\n\tpush\t%r15\n\nL(louter):\n\tmov\t(vp), v0\n\tmov\t8(vp), v1\n\tmov\t(up,n,8), %rax\n\tmul\tv0\n\ttest\t$1, n8\n\tjnz\tL(la1x1)\n\nL(la1x0):mov\t(rp,n,8), X1\n\txor\tw2, w2\n\txor\tw1, w1\n\ttest\t$2, n8\n\tjnz\tL(la110)\n\nL(la100):lea\t1(n), i\n\tjmp\tL(llo0)\n\nL(la110):lea\t3(n), i\n\tmov\t%rdx, w3\n\tadd\t%rax, X1\n\tmov\t(up,n,8), %rax\n\tmov\t8(rp,n,8), X0\n\tadc\t$0, w3\n\tjmp\tL(llo2)\n\nL(la1x1):mov\t(rp,n,8), X0\n\txor\tw0, w0\n\tmov\t%rdx, w1\n\ttest\t$2, n8\n\tjz\tL(la111)\n\nL(la101):lea\t2(n), i\n\tadd\t%rax, X0\n\tadc\t$0, w1\n\tmov\t(up,n,8), %rax\n\tmul\tv1\n\tmov\t8(rp,n,8), X1\n\tjmp\tL(llo1)\n\nL(la111):lea\t(n), i\n\txor\tw3, w3\n\tjmp\tL(llo3)\n\n\tALIGN(32)\nL(ltop):\nL(llo2):\tmul\tv1\n\tmov\t%rdx, w0\n\tadd\t%rax, X0\n\tadc\t$0, w0\n\tadd\tw1, X1\n\tadc\t$0, w3\n\tadd\tw2, X0\n\tadc\t$0, w0\n\tmov\t-16(up,i,8), %rax\n\tmul\tv0\n\tadd\t%rax, X0\n\tmov\t%rdx, w1\n\tadc\t$0, w1\n\tmov\t-16(up,i,8), %rax\n\tmul\tv1\n\tmov\tX1, -24(rp,i,8)\n\tmov\t-8(rp,i,8), X1\n\tadd\tw3, X0\n\tadc\t$0, w1\nL(llo1):\tmov\t%rdx, w2\n\tmov\tX0, -16(rp,i,8)\n\tadd\t%rax, X1\n\tadc\t$0, w2\n\tmov\t-8(up,i,8), %rax\n\tadd\tw0, X1\n\tadc\t$0, w2\n\tmul\tv0\nL(llo0):\tadd\t%rax, X1\n\tmov\t%rdx, w3\n\tadc\t$0, w3\n\tmov\t-8(up,i,8), %rax\n\tmul\tv1\n\tadd\tw1, X1\n\tmov\t(rp,i,8), X0\n\tadc\t$0, w3\n\tmov\t%rdx, w0\n\tadd\t%rax, X0\n\tadc\t$0, w0\n\tmov\t(up,i,8), %rax\n\tmul\tv0\n\tadd\tw2, X0\n\tmov\tX1, -8(rp,i,8)\n\tmov\t%rdx, w1\n\tadc\t$0, w0\nL(llo3):\tadd\t%rax, X0\n\tadc\t$0, w1\n\tmov\t(up,i,8), %rax\n\tadd\tw3, X0\n\tadc\t$0, w1\n\tmul\tv1\n\tmov\t8(rp,i,8), X1\n\tadd\t%rax, X1\n\tmov\t%rdx, w2\n\tadc\t$0, w2\n\tmov\t8(up,i,8), %rax\n\tmov\tX0, (rp,i,8)\n\tmul\tv0\n\tadd\tw0, X1\n\tmov\t%rdx, w3\n\tadc\t$0, w2\n\tadd\t%rax, X1\n\tmov\t8(up,i,8), %rax\n\tmov\t16(rp,i,8), X0\n\tadc\t$0, w3\n\tadd\t$4, i\n\tjnc\tL(ltop)\n\nL(lend):\timul\tv1, %rax\n\tadd\t%rax, X0\n\tadd\tw1, X1\n\tadc\t$0, w3\n\tadd\tw2, X0\n\tmov\tI(-8(up),-16(up,i,8)), %rax\n\timul\tv0, %rax\n\tadd\tX0, %rax\n\tmov\tX1, I(-16(rp),-24(rp,i,8))\n\tadd\tw3, %rax\n\tmov\t%rax, I(-8(rp),-16(rp,i,8))\n\n\tadd\t$2, n\n\tlea\t16(vp), vp\n\tlea\t-16(up), up\n\tcmp\t$-2, n\n\tjl\tL(louter)\n\n\tpop\t%r15\n\tpop\t%r14\n\n\tjnz\tL(lcor0)\n\nL(lcor1):mov\t(vp), v0\n\tmov\t8(vp), v1\n\tmov\t-16(up), %rax\n\tmul\tv0\t\t\tC u0 x v2\n\tadd\t-16(rp), %rax\t\tC FIXME: rp[0] still available in reg?\n\tadc\t-8(rp), %rdx\t\tC FIXME: rp[1] still available in reg?\n\tmov\t-8(up), %r10\n\timul\tv0, %r10\n\tmov\t-16(up), %r11\n\timul\tv1, %r11\n\tmov\t%rax, -16(rp)\n\tadd\t%r10, %r11\n\tadd\t%rdx, %r11\n\tmov\t%r11, -8(rp)\n\tpop\t%r13\n\tpop\t%r12\n\tpop\t%rbp\n\tpop\t%rbx\n\tret\n\nL(lcor0):mov\t(vp), %r11\n\timul\t-8(up), %r11\n\tadd\t%rax, %r11\n\tmov\t%r11, -8(rp)\n\tpop\t%r13\n\tpop\t%r12\n\tpop\t%rbp\n\tpop\t%rbx\n\tret\n\n\tALIGN(16)\nL(lsmall):\n\tcmp\t$2, n\n\tjae\tL(lgt1)\nL(ln1):\timul\t(vp_param), %rax\n\tmov\t%rax, (rp)\n\tret\nL(lgt1):\tja\tL(lgt2)\nL(ln2):\tmov\t(vp_param), %r9\n\tmul\t%r9\n\tmov\t%rax, (rp)\n\tmov\t8(up), %rax\n\timul\t%r9, %rax\n\tadd\t%rax, %rdx\n\tmov\t8(vp), %r9\n\tmov\t(up), %rcx\n\timul\t%r9, %rcx\n\tadd\t%rcx, %rdx\n\tmov\t%rdx, 8(rp)\n\tret\nL(lgt2):\nL(ln3):\tmov\t(vp_param), %r9\n\tmul\t%r9\t\tC u0 x v0\n\tmov\t%rax, (rp)\n\tmov\t%rdx, %r10\n\tmov\t8(up), %rax\n\tmul\t%r9\t\tC u1 x v0\n\timul\t16(up), %r9\tC u2 x v0\n\tadd\t%rax, %r10\n\tadc\t%rdx, %r9\n\tmov\t8(vp), %r11\n\tmov\t(up), %rax\n\tmul\t%r11\t\tC u0 x v1\n\tadd\t%rax, %r10\n\tadc\t%rdx, %r9\n\timul\t8(up), %r11\tC u1 x v1\n\tadd\t%r11, %r9\n\tmov\t%r10, 8(rp)\n\tmov\t16(vp), %r10\n\tmov\t(up), %rax\n\timul\t%rax, %r10\tC u0 x v2\n\tadd\t%r10, %r9\n\tmov\t%r9, 16(rp)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/nand_n.asm",
    "content": "dnl  mpn_nand_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_nand_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_nand_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\npcmpeqb %xmm4,%xmm4\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tadd $4,%r8\n\tmovdqu 16-32(%rsi,%r8,8),%xmm3\n\tpand %xmm3,%xmm1\n\tpand %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,-32(%rdi,%r8,8)\n\tpxor %xmm4,%xmm1\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tmov 16(%rsi,%r8,8),%rcx\n\tand %rcx,%rax\n\tpand %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tnot %rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpand %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\t\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\tand %rcx,%rax\n\tnot %rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/nior_n.asm",
    "content": "dnl  mpn_nior_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_nior_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_nior_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\npcmpeqb %xmm4,%xmm4\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tadd $4,%r8\n\tmovdqu 16-32(%rsi,%r8,8),%xmm3\n\tpor %xmm3,%xmm1\n\tpor %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,-32(%rdi,%r8,8)\n\tpxor %xmm4,%xmm1\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tmov 16(%rsi,%r8,8),%rcx\n\tor %rcx,%rax\n\tpor %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tnot %rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpor %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\t\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\tor %rcx,%rax\n\tnot %rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/popcount.asm",
    "content": "dnl  mpn_popcount\n\ndnl  Copyright 2010 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_popcount)\nmov $5,%rcx\nlea -40(%rdi,%rsi,8),%rdi\nxor %eax,%eax\nsub %rsi,%rcx\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tpopcnt (%rdi,%rcx,8),%r8\n\tpopcnt 8(%rdi,%rcx,8),%r9\n\tpopcnt 16(%rdi,%rcx,8),%r10\n\tpopcnt 24(%rdi,%rcx,8),%rsi\n\tadd %r8,%rax\n\tadd %rsi,%rax\n\tadd %r9,%rax\n\tpopcnt 32(%rdi,%rcx,8),%r8\n\tpopcnt 40(%rdi,%rcx,8),%r9\n\tadd %r8,%rax\n\tadd %r10,%rax\n\tadd %r9,%rax\n\tadd $6,%rcx\n\tjnc L(lp)\nL(skiplp):\nlea L(case5)(%rip),%rdx\t#// in linux we can do this before the loop\nlea (%rcx,%rcx,8),%rcx\t#// rcx*9\nadd %rcx,%rdx\njmp *%rdx\nL(case5):\t#//rcx=0\n\tnop\n\tpopcnt (%rdi),%r8\t#// 5bytes\n\tadd %r8,%rax\t\t#// 3bytes\nL(case4):\t#//rcx=1\n\tpopcnt 8(%rdi),%r9\t#// 6bytes\n\tadd %r9,%rax\nL(case3):\tpopcnt 16(%rdi),%r10\n\tadd %r10,%rax\nL(case2):\tpopcnt 24(%rdi),%rsi\n\tadd %rsi,%rax\nL(case1):\tpopcnt 32(%rdi),%r8\n\tadd %r8,%rax\nL(case0):\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/redc_1.as",
    "content": "\n;  core2 mpn_redc_1\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%include 'yasm_mac.inc'\n\n;\t(rdi, rcx) = (rsi, rcx) + (rdx, rcx)   with the carry flag set for the carry\n;\tthis is the usual mpn_add_n with the final dec rax;adc rax,rax;ret  removed \n;\tand a jump where we have two rets\n\n%macro mpn_add 0\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tadd     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tjmp     %%2\n\talign 16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tadc     r11, [rdx]\n\tadc     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tadc     r9, [rdx-16]\n\tadc     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tadc     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tadc     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tadc     r11, [rdx+16]\n\tmov     [rdi+16], r11\n%%2:\n%endmacro\n\n;\t(rbx, rbp) = (rsi, rbp) - (rdx, rbp)\n%macro mpn_sub 0\n\tmov     rax, rbp\n\tand     rax, 3\n\tshr     rbp, 2\n\tcmp     rbp, 0\n;\tcarry flag is clear here\n\tjnz     %%1\n\tmov     r11, [rsi]\n\tsub     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n\tjmp     %%2\n\talign 16\n%%1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tsbb     r11, [rdx]\n\tsbb     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rbx], r11\n\tmov     [rbx+8], r8\n\tlea     rbx, [rbx+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tsbb     r9, [rdx-16]\n\tsbb     r10, [rdx-8]\n\tmov     [rbx-16], r9\n\tdec     rbp\n\tmov     [rbx-8], r10\n\tjnz     %%1\n\tinc     rax\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi]\n\tsbb     r11, [rdx]\n\tmov     [rbx], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rbx+8], r11\n\tdec     rax\n\tjz      %%2\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rbx+16], r11\n%%2:\n%endmacro\n\n;\tchanges from standard addmul\n;\tchange  r8 to r12   and rcx to r13   and rdi to r8\n;\treemove ret and write last limb but to beginning\n%macro addmulloop 1\n\talign 16\n%%1:\n\tmov     r10, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tdb      0x26\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12, 0\n\tmov     r9, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tdb      0x26\n\tadc     rbx, rax\n\tdb      0x26\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tdb      0x26\n\tadc     r12, rax\n\tdb      0x26\n\tadc     r9, rdx\n\tadd     r11, 4\n\tmov     rax, [rsi+r11*8+8]\n\tjnc     %%1\n%endmacro\n\n%macro addmulpropro0 0\n\timul    r13, rcx\n\tlea     r8, [r8-8]\n%endmacro\n\n%macro addmulpro0 0\n\tmov     r11, r14\n\tlea     r8, [r8+8]\n\tmov     rax, [rsi+r14*8]\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext0 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmov     r9d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tmov     rax, [rsi+r11*8+32]\n\tmul     r13\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, rax\n\tadc     r9, rdx\n\timul    r13, rcx\n\tadd     [r8+r11*8+32], r12\n\tadc     r9, 0\n\tsub     r15, 1\n\tmov     [r8+r14*8], r9\n%endmacro\n\n%macro addmulpropro1 0\n%endmacro\n\n%macro addmulpro1 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext1 0\n\tmov     r10d, 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     r12d, 0\n\tmul     r13\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, rax\n\tadc     r12, rdx\n\tadd     [r8+r11*8+24], rbx\n\tmov     r13, [r8+r14*8+8]\n\tadc     r12, 0\n\tsub     r15, 1\n\tmov     [r8+r14*8], r12\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro2 0\n%endmacro\n\n%macro addmulpro2 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext2 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+r11*8+16]\n\tmul     r13\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, rax\n\tmov     ebx, 0\n\tadc     rbx, rdx\n\tmov     r13, [r8+r14*8+8]\n\tadd     [r8+r11*8+16], r10\n\tadc     rbx, 0\n\tmov     [r8+r14*8], rbx\n\tsub     r15, 1\n\tlea     r8, [r8+8]\n%endmacro\n\n%macro addmulpropro3 0\n%endmacro\n\n%macro addmulpro3 0\n\timul    r13, rcx\n\tmov     rax, [rsi+r14*8]\n\tmov     r11, r14\n\tmul     r13\n\tmov     r12, rax\n\tmov     rax, [rsi+r14*8+8]\n\tmov     r9, rdx\n\tcmp     r14, 0\n%endmacro\n\n%macro addmulnext3 0\n\tmul     r13\n\tadd     [r8+r11*8], r12\n\tadc     r9, rax\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tadd     [r8+r11*8+8], r9\n\tadc     r10, 0\n\tmov     r13, [r8+r14*8+8]\n\tmov     [r8+r14*8], r10\n\tlea     r8, [r8+8]\n\tsub     r15, 1\n%endmacro\n\n;\tchange r8 to r12\n;\twrite top limb ax straight to mem dont return  (NOTE we WRITE NOT ADD)\n%macro mpn_addmul_1_int 1\n\taddmulpropro%1\n\talign 16\n%%1:\n\taddmulpro%1\n\tjge     %%2\n\taddmulloop %1\n%%2:\n\taddmulnext%1\n\tjnz     %%1\n\tjmp     end\n%endmacro\n\n\tBITS 64\n    \n   GLOBAL_FUNC mpn_redc_1\n\tmov r9,r8\n\tmov r8,rsi\n\tmov rsi,rdx\n\tmov rdx,rcx\n\tmov rcx,r9\n\n\tcmp     rdx, 1\n\tje      one\n\tpush    r13\n\tpush    r14\n\tpush    rbx\n\tpush    r12\n\tpush    r15\n\tpush    rbp\n\tmov     r14, 5\n\tsub     r14, rdx\n;\tstore copys\n\tpush    rsi\n\tpush    r8\n\tlea     r8, [r8+rdx*8-40]\n\tlea     rsi, [rsi+rdx*8-40]\n\tmov     rbp, rdx\n\tmov     r15, rdx\n\tmov     rax, r14\n\tand     rax, 3\n\tmov     r13, [r8+r14*8]\n\tje      case0\n\tjp      case3\n\tcmp     rax, 1\n\tje      case1\ncase2:\n\tmpn_addmul_1_int 2\n\talign 16\ncase0:\n\tmpn_addmul_1_int 0\n\talign 16\ncase1:\n\tmpn_addmul_1_int 1\n\talign 16\ncase3:\n\tmpn_addmul_1_int 3\n\talign 16\nend:\n\tmov     rcx, rbp\n\tpop     rdx\n\tlea     rsi, [rdx+rbp*8]\n\tmov     rbx, rdi\n\tmpn_add\n;\tmpnadd(rdi,rsi,rdx,rcx)\n\tpop     rdx\n\tjnc     skip\n\tmov     rsi, rbx\n\tmpn_sub\n;\tmpn_sub_n(rbx,rsi,rdx,rbp) we can certainly improve this sub\nskip:\n\tpop     rbp\n\tpop     r15\n\tpop     r12\n\tpop     rbx\n\tpop     r14\n\tpop     r13\n\tret\n\talign 16\none:\n\tmov     r9, [r8]\n\tmov     r11, [rsi]\n\timul    rcx, r9\n\tmov     rax, rcx\n\tmul     r11\n\tadd     rax, r9\n;\trax is zero here\n\tadc     rdx, [r8+8]\n\tcmovnc  r11, rax\n\tsub     rdx, r11\n\tmov     [rdi], rdx\n\tret\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/rsh_divrem_hensel_qr_1_1.asm",
    "content": "dnl  X86_64 mpn_rsh_divrem_hensel_qr_1_1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=( (rsi,rdx)-r9  / rcx ) >> r8 \nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1 with shifting on the output of the quotient\nC\tOn k8/k10 the shifting comes for free so no need to have different\nC\tfn for that. And on K8/K10 this runs at 10c/l which is optimal\nC\tThis function \"replaces\" divexact_1 and modexact_1_odd\n\ndefine(`MOVQ',`movd')\n\nASM_START()\nPROLOGUE(mpn_rsh_divrem_hensel_qr_1_1)\nmov %r9,%r10\nmov $1,%r9\nsub %rdx,%r9\nlea (%rdi,%rdx,8),%rdi\nlea (%rsi,%rdx,8),%rsi\n\nmov %rcx,%rdx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\n\nmov $64,%rax\nsub %r8,%rax\nMOVQ %r8,%mm0\nMOVQ %rax,%mm1\nmov -8(%rsi,%r9,8),%rax\nsub %r10,%rax\nsbb %r8,%r8\nimul %r11,%rax\nMOVQ %rax,%mm4\nmovq %mm4,%mm5\npsrlq %mm0,%mm4\npsllq %mm1,%mm5\npsrlq %mm1,%mm5\nmul %rcx\ncmp $0,%r9\nje L(one)\nadd %r8,%r8\nALIGN(16)\nL(loop):\n    movq %mm4,%mm2\n    mov (%rsi,%r9,8),%rax\n    sbb %rdx,%rax\n    sbb %r8,%r8\n    imul %r11,%rax\n    MOVQ %rax,%mm3\n    movq %mm3,%mm4\n    psllq %mm1,%mm3\n    psrlq %mm0,%mm4\n    por %mm3,%mm2\n    movq %mm2,-8(%rdi,%r9,8)\n    mul %rcx\n    add %r8,%r8\n    inc %r9\n    jnz L(loop)\nL(skiploop):\nmovq %mm4,-8(%rdi,%r9,8)\nmov $0,%rax\nadc %rdx,%rax\nemms\nret\nL(one):\nmovq %mm4,-8(%rdi,%r9,8)\nadd %r8,%r8\nmov $0,%rax\nadc %rdx,%rax\nemms\nret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/rsh_divrem_hensel_qr_1_2.asm",
    "content": "dnl  X86_64 mpn_rsh_divrem_hensel_qr_1_2\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\t(rdi,rdx)=( (rsi,rdx)-r9 / rcx ) >> r8    rdx>=1\nC\trax=hensel remainder from div \n\nC\tThis is divrem_hensel_1_2 with shifting on the output of the quotient\n\ndefine(`MOVQ',`movd')\n\nASM_START()\nPROLOGUE(mpn_rsh_divrem_hensel_qr_1_2)\nC\t// 3limb minimum for the mo\nmov %r9,%r10\nmov $2,%r9\nsub %rdx,%r9\nlea -16(%rdi,%rdx,8),%rdi\nlea -16(%rsi,%rdx,8),%rsi\n\npush %r12\npush %r13\npush %r14\n\nmov %rcx,%rdx\t\nC // rdx is 3 bit inverse\n\nmov $64,%rax\nsub %r8,%rax\nMOVQ %r8,%mm0\nMOVQ %rax,%mm1\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 4 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC //rdx has 8 bits\n\nmov %rdx,%rax\nimul %ecx,%edx\nmov $2,%r11\nsub %rdx,%r11\nimul %eax,%r11d\t\nC //r11 has 16 bits\n\nmov %r11,%rax\nimul %ecx,%r11d \nmov $2,%rdx\nsub %r11,%rdx\t\t\nimul %eax,%edx\t\nC // rdx has 32 bits\n\nmov %rdx,%rax\nimul %rcx,%rdx\nmov $2,%r11\nsub %rdx,%r11\nimul %rax,%r11\t\nC //r11 has 64 bits\n\nmov %r11,%rax\nmov %r11,%r12\nmul %rcx\nneg %rdx\nimul %rdx,%r12\t\nC // r12,r11 has 128 bits\n\nC // for the first limb we can not store (as we have to shift) so we need to\nC // do first limb separately , we could do it as normal as an extention of\nC // the loop , but if we do it as a 1 limb inverse then we can start it\nC // eailer , ie interleave it with the calculation of the 2limb inverse\n\nmov %r11,%r13\nmov %r12,%r14\n\n\nmov (%rsi,%r9,8),%r11\nsub %r10,%r11\nsbb %r10,%r10\n\nimul %r13,%r11\nMOVQ %r11,%mm2\npsrlq %mm0,%mm2\nmov %rcx,%rax\nmul %r11\nmov 8(%rsi,%r9,8),%r11\nmov 16(%rsi,%r9,8),%r12\nadd %r10,%r10\nsbb %rdx,%r11\nsbb $0,%r12\nsbb %r10,%r10\n\n\nadd $2,%r9\njc L(skiplp)\nALIGN(16)\nL(lp):\n\tmov %r12,%r8\n\tmov %r13,%rax\n\tmul %r11\n\n\tMOVQ %rax,%mm3\n\tmovq %mm3,%mm4\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm4\n\tpor %mm3,%mm2\n\tmovq %mm2,-16(%rdi,%r9,8)\n\n\timul %r14,%r11\n\timul %r13,%r12\n\tadd %r11,%rdx\n\tadd %r12,%rdx\n\t\tmov 8(%rsi,%r9,8),%r11\n\t\tmov 16(%rsi,%r9,8),%r12\n\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb $0,%r12\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r12\n\t\tsbb $0,%r10\n\tadd $2,%r9\n\tjnc L(lp)\nL(skiplp):\nmov %r12,%r8\nmov %r13,%rax\nmul %r11\n\nMOVQ %rax,%mm3\nmovq %mm3,%mm4\npsllq %mm1,%mm3\npsrlq %mm0,%mm4\npor %mm3,%mm2\nmovq %mm2,-16(%rdi,%r9,8)\n\nimul %r14,%r11\nimul %r13,%r12\nadd %r11,%rdx\nadd %r12,%rdx\ncmp $0,%r9\njne L(case0)\nL(case1):\n\tmov 8(%rsi,%r9,8),%r11\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\t\tadd %r10,%r10\n\t\tsbb $0,%r11\n\t\tsbb %r10,%r10\n\tcmp %rax,%r8\n\t\tsbb %rdx,%r11\n\t\tsbb $0,%r10\n\tmov %r11,%rax\n\timul %r13,%rax\n\n\tMOVQ %rax,%mm3\n\tmovq %mm3,%mm4\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm4\n\tpor %mm3,%mm2\n\tmovq %mm2,(%rdi,%r9,8)\n\tmovq %mm4,8(%rdi,%r9,8)\n\n\tmul %rcx\n\tadd %r10,%r10\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\temms\n\tret\nL(case0):\n\tMOVQ %rdx,%mm3\n\tmovq %mm3,%mm2\n\tpsllq %mm1,%mm3\n\tpsrlq %mm0,%mm2\n\tpor %mm3,%mm4\n\tmovq %mm4,-8(%rdi,%r9,8)\n\tmovq %mm2,(%rdi,%r9,8)\n\n\tmov %rcx,%rax\n\tmul %rdx\n\tcmp %rax,%r8\n\tmov $0,%rax\n\tadc %rdx,%rax\n\tsub %r10,%rax\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\temms\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/rshift.as",
    "content": "\n;  AMD64 mpn_rshift\n; Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = (rsi,rdx)>>rcx\n;\trax = carry\n;\tdecent assmeblers understand what movq means ,except\n;\tmicrosofts/apple masm (what a suprise there) so for the broken old masm\n;\tassembler.  Needed for movq reg64,mediareg and movq mediareg,reg64\n;\tonly , where mediareg is xmm or mm \n\n%define MOVQ movd\n\n%include 'yasm_mac.inc'\n\n\tBITS 64\n   GLOBAL_FUNC mpn_rshift\n\tcmp     rdx, 2\n\tja      threeormore\n\tjz      two\none:\n\tmov     rdx, [rsi]\n\tmov     rax, rdx\n\tshr     rdx, cl\n\tneg     rcx\n\tshl     rax, cl\n\tmov     [rdi], rdx\n\tret\ntwo:\n\tmov     r8, [rsi]\n\tmov     r9, [rsi+8]\n\tmov     rax, r8\n\tmov     r11, r9\n\tshr     r8, cl\n\tshr     r9, cl\n\tneg     rcx\n\tshl     r11, cl\n\tshl     rax, cl\n\tor      r8, r11\n\tmov     [rdi], r8\n\tmov     [rdi+8], r9\n\tret\nthreeormore:\n\tmov     eax, 64\n\tlea     r9, [rsi+8]\n\tsub     rax, rcx\n\tand     r9, -16\n\tMOVQ    xmm0, rcx\n\tMOVQ    xmm1, rax\n\tmovdqa  xmm5, [r9]\n\tmovdqa  xmm3, xmm5\n\tpsllq   xmm5, xmm1\n\tMOVQ    rax, xmm5\n\tcmp     rsi, r9\n\tlea     rsi, [rsi+rdx*8-40]\n\tje      aligned\n\tmovq    xmm2, [r9-8]\n\tmovq    xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tpsrlq   xmm4, xmm0\n\tpor     xmm4, xmm5\n\tmovq    [rdi], xmm4\n\tlea     rdi, [rdi+8]\n\tdec     rdx\n\tMOVQ    rax, xmm2\naligned:\n\tlea     rdi, [rdi+rdx*8-40]\n\tpsrlq   xmm3, xmm0\n\tmov     r8d, 5\n\tsub     r8, rdx\n\tjnc     skiploop\n\talign   16\nloop1:\n\tmovdqa  xmm2, [rsi+r8*8+16]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tmovdqa  xmm5, [rsi+r8*8+32]\n\tmovdqa  xmm3, xmm5\n\tpsllq   xmm5, xmm1\n\tshufpd  xmm2, xmm5, 1\n\tpsrlq   xmm3, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tmovhpd  [rdi+r8*8+24], xmm4\n\tadd     r8, 4\n\tjnc     loop1\nskiploop:\n\tcmp     r8, 2\n\tja      left0\n\tjz      left1\n\tjp      left2\nleft3:\n\tmovdqa  xmm2, [rsi+r8*8+16]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tmovq    xmm5, [rsi+r8*8+32]\n\tmovq    xmm3, xmm5\n\tpsllq   xmm5, xmm1\n\tshufpd  xmm2, xmm5, 1\n\tpsrlq   xmm3, xmm0\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tmovhpd  [rdi+r8*8+24], xmm4\n\tpsrldq  xmm5, 8\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8+32], xmm3\n\tret\n\talign   16\nleft2:\n\tmovdqa  xmm2, [rsi+r8*8+16]\n\tmovdqa  xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tpsrldq  xmm2, 8\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tmovhpd  [rdi+r8*8+24], xmm4\n\tret\n\talign   16\nleft1:\n\tmovq    xmm2, [rsi+r8*8+16]\n\tmovq    xmm4, xmm2\n\tpsllq   xmm2, xmm1\n\tshufpd  xmm5, xmm2, 1\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tpsrlq   xmm4, xmm0\n\tpsrldq  xmm2, 8\n\tpor     xmm4, xmm2\n\tmovq    [rdi+r8*8+16], xmm4\n\tret\n\talign   16\nleft0:\n\tpsrldq  xmm5, 8\n\tpor     xmm3, xmm5\n\tmovq    [rdi+r8*8], xmm3\n\tmovhpd  [rdi+r8*8+8], xmm3\n\tret\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/store.asm",
    "content": "dnl  mpn_store\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\ndefine(`MOVQ',`movd')\n\nC\tmpn_store(mp_ptr,mp_size_t,mp_limb_t)\nC\trax          rdi,   rsi,    rdx\n\nASM_START()\nPROLOGUE(mpn_store)\nlea -32(%rdi),%rdi\ncmp $0,%rsi\njz L(case0)\nMOVQ %rdx,%xmm0\nmovddup %xmm0,%xmm0\ntest $0xF,%rdi\njz L(notodd)\n\tmov %rdx,32(%rdi)\n\tlea 8(%rdi),%rdi\n\tsub $1,%rsi\nL(notodd):\nsub $4,%rsi\njc L(skiplp)\nALIGN(16)\nL(lp):\n\tlea 32(%rdi),%rdi\n\tsub $4,%rsi\n\tmovdqa %xmm0,(%rdi)\n\tmovdqa %xmm0,16(%rdi)\n\tjnc L(lp)\nL(skiplp):\ncmp $-2,%rsi\nja L(case3)\njz L(case2)\njp L(case1)\nL(case0):\nret\nL(case3):// rsi=-1\n\tmovdqa %xmm0,32(%rdi)\nL(case1):\n\tmov %rdx,56(%rdi,%rsi,8)\n\tret\nL(case2):// rsi=-2\n\tmovdqa %xmm0,32(%rdi)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/sub_n.as",
    "content": "\n;  AMD64 mpn_sub_n\n;  Copyright 2008 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx)=(rsi,rcx)-(rdx,rcx)\n;\trax=borrow\n\t\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n   GLOBAL_FUNC mpn_sub_n\n\tmov     rax, rcx\n\tand     rax, 3\n\tshr     rcx, 2\n\tcmp     rcx, 0\n;\tcarry flag is clear here\n\tjnz     loop1\n\tmov     r11, [rsi]\n\tsub     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end1\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend1:\n\tadc     rax, rax\n\tret\n\talign   8\nloop1:\n\tmov     r11, [rsi]\n\tmov     r8, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tsbb     r11, [rdx]\n\tsbb     r8, [rdx+8]\n\tlea     rdx, [rdx+32]\n\tmov     [rdi], r11\n\tmov     [rdi+8], r8\n\tlea     rdi, [rdi+32]\n\tmov     r9, [rsi-16]\n\tmov     r10, [rsi-8]\n\tsbb     r9, [rdx-16]\n\tsbb     r10, [rdx-8]\n\tmov     [rdi-16], r9\n\tdec     rcx\n\tmov     [rdi-8], r10\n\tjnz     loop1\n\tinc     rax\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi]\n\tsbb     r11, [rdx]\n\tmov     [rdi], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+8]\n\tsbb     r11, [rdx+8]\n\tmov     [rdi+8], r11\n\tdec     rax\n\tjz      end\n\tmov     r11, [rsi+16]\n\tsbb     r11, [rdx+16]\n\tmov     [rdi+16], r11\n\tdec     rax\nend:\n\tadc     rax, rax\n\tret\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/submul_1.asm",
    "content": "dnl  mpn_submul_1\n\ndnl  Copyright 2011 The Code Cavern\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nASM_START()\nPROLOGUE(mpn_submul_1)\nmov (%rsi),%rax\ncmp $1,%rdx\nje one\t\t\nmov $5,%r11\nlea -40(%rsi,%rdx,8),%rsi\nlea -40(%rdi,%rdx,8),%rdi\nsub %rdx,%r11\nmul %rcx\n.byte 0x26\nmov %rax,%r8\n.byte 0x26\nmov 8(%rsi,%r11,8),%rax\n.byte 0x26\nmov %rdx,%r9\n.byte 0x26\ncmp $0,%r11\n.byte 0x26\nmov %r12,-8(%rsp)\n.byte 0x26\njge skiplp\nlp:\txor %r10,%r10\n\tmul %rcx\n\tsub %r8,(%rdi,%r11,8)\n\tadc %rax,%r9\n\tadc %rdx,%r10\n\tmov 16(%rsi,%r11,8),%rax\n\tmul %rcx\n\tsub %r9,8(%rdi,%r11,8)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 24(%rsi,%r11,8),%rax\n\txor %r8,%r8\n\txor %r9,%r9\n\tmul %rcx\n\tsub %r10,16(%rdi,%r11,8)\n\tadc %rax,%r12\n\tadc %rdx,%r8\n\tmov 32(%rsi,%r11,8),%rax\n \tmul %rcx\n\tsub %r12,24(%rdi,%r11,8)\n\tadc %rax,%r8\n\tadc %rdx,%r9\n\tadd $4,%r11\n\tmov 8(%rsi,%r11,8),%rax\n\tjnc lp\nskiplp:\nxor %r10,%r10\nmul %rcx\nsub %r8,(%rdi,%r11,8)\nadc %rax,%r9\nadc %rdx,%r10\ncmp $2,%r11\nja case0\njz case1\njp case2\ncase3:\tmov 16(%rsi),%rax\n\tmul %rcx\n\tsub %r9,8(%rdi)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 24(%rsi),%rax\n\txor %r8,%r8\n\txor %r9,%r9\n\tmul %rcx\n\tsub %r10,16(%rdi)\n\tadc %rax,%r12\n\tadc %rdx,%r8\n\tmov 32(%rsi),%rax\n \tmul %rcx\n\tsub %r12,24(%rdi)\n\tadc %rax,%r8\n\tadc %rdx,%r9\n\tsub %r8,32(%rdi)\n\tadc $0,%r9\n\tmov -8(%rsp),%r12\n\tmov %r9,%rax\n\tret\ncase2:\tmov 24(%rsi),%rax\n\tmul %rcx\n\tsub %r9,16(%rdi)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tmov 32(%rsi),%rax\n\txor %r8,%r8\n\t#xor %r9,%r9\n\tmul %rcx\n\tsub %r10,24(%rdi)\n\tadc %rax,%r12\n\tadc %rdx,%r8\n\tsub %r12,32(%rdi)\n\tadc $0,%r8\n\tmov -8(%rsp),%r12\n\tmov %r8,%rax\n\tret\nALIGN(16)\ncase1:\tmov 32(%rsi),%rax\n\tmul %rcx\n\tsub %r9,24(%rdi)\n\tadc %rax,%r10\n\tmov $0,%r12d\n\tadc %rdx,%r12\n\tsub %r10,32(%rdi)\n\tadc $0,%r12\n\tmov %r12,%rax\n\tmov -8(%rsp),%r12\n\tret\none:\tmul %rcx\n\tsub %rax,(%rdi)\n\tadc $0,%rdx\n\tmov %rdx,%rax\n\tret\ncase0:\tsub %r9,32(%rdi)\n\tadc $0,%r10\n\tmov -8(%rsp),%r12\n\tmov %r10,%rax\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/xnor_n.asm",
    "content": "dnl  mpn_xnor_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_xnor_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_xnor_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\npcmpeqb %xmm4,%xmm4\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tadd $4,%r8\n\tmovdqu 16-32(%rsi,%r8,8),%xmm3\n\tpxor %xmm3,%xmm1\n\tpxor %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,-32(%rdi,%r8,8)\n\tpxor %xmm4,%xmm1\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tmov 16(%rsi,%r8,8),%rcx\n\txor %rcx,%rax\n\tpxor %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tnot %rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpxor %xmm2,%xmm0\n\tpxor %xmm4,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\t\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\txor %rcx,%rax\n\tnot %rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sandybridge/xor_n.asm",
    "content": "dnl  mpn_xor_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\nC\tret mpn_xor_n(mp_ptr,mp_srcptr,mp_srcptr,mp_size_t)\nC\trax             rdi,   rsi,      rdx,     rcx\n\nASM_START()\nPROLOGUE(mpn_xor_n)\nmov $3,%r8\nlea -24(%rsi,%rcx,8),%rsi\nlea -24(%rdx,%rcx,8),%rdx\nlea -24(%rdi,%rcx,8),%rdi\nsub %rcx,%r8\njnc L(skiplp)\nALIGN(16)\nL(lp):\n\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu 16(%rdx,%r8,8),%xmm1\n\tmovdqu 16(%rsi,%r8,8),%xmm3\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpxor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tpxor %xmm3,%xmm1\n\tadd $4,%r8\n\tmovdqu %xmm1,16-32(%rdi,%r8,8)\n\tjnc L(lp)\nL(skiplp):\ncmp $2,%r8\nja L(case0)\nje L(case1)\njp L(case2)\t\nL(case3):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmov 16(%rdx,%r8,8),%rax\n\tmov 16(%rsi,%r8,8),%rcx\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpxor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\txor %rcx,%rax\n\tmov %rax,16(%rdi,%r8,8)\nL(case0):\tret\nL(case2):\tmovdqu (%rdx,%r8,8),%xmm0\n\tmovdqu (%rsi,%r8,8),%xmm2\n\tpxor %xmm2,%xmm0\n\tmovdqu %xmm0,(%rdi,%r8,8)\n\tret\nL(case1):\tmov (%rdx,%r8,8),%rax\n\tmov (%rsi,%r8,8),%rcx\n\txor %rcx,%rax\n\tmov %rax,(%rdi,%r8,8)\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/skylake/add_n.as",
    "content": ";  AMD64 mpn_add_n\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)+(rdx,rcx)\n;\trax = carry\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n    %define SumP    rcx\n    %define Inp1P   rdx\n    %define Inp2P   r8\n    %define Size    r9\n    %define LIMB0   r10\n%else\n    %define SumP    rdi\n    %define Inp1P   rsi\n    %define Inp2P   rdx\n    %define Size    rcx\n    %define SizeRest r11\n    %define LIMB0   rax\n%endif\n\n%define ADDSUB add\n%define ADCSBB adc\n\n\talign   32\n\tBITS    64\n\nGLOBAL_FUNC mpn_add_n\n\n\tmov     SizeRest, Size\n\tand\tSizeRest, 7\n\tshr     Size, 3\n\tclc\n\tjz      .testrest\n\n\talign   16\n.loop:\n\tmov     LIMB0, [Inp1P]        ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P]        ;2\t2\tp06 p23\t\t1\n\tmov     [SumP], LIMB0         ;1\t2\tp237 p4\t3\t1\n\tvpblendd YMM0, YMM0, YMM0, 0  ; This one is black magic. Beware.\n\tmov     LIMB0, [Inp1P+8]      ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+8]      ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+8], LIMB0       ;1\t2\tp237 p4\t3\t1\n\tmov     LIMB0, [Inp1P+16]     ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+16]     ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+16], LIMB0      ;1\t2\tp237 p4\t3\t1\n\tmov     LIMB0, [Inp1P+24]     ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+24]     ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+24], LIMB0      ;1\t2\tp237 p4\t3\t1\n\n\tmov     LIMB0, [Inp1P+32]     ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+32]     ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+32], LIMB0      ;1\t2\tp237 p4\t3\t1\n\tmov     LIMB0, [Inp1P+40]     ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+40]     ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+40], LIMB0      ;1\t2\tp237 p4\t3\t1\n\tmov     LIMB0, [Inp1P+48]     ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+48]     ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+48], LIMB0      ;1\t2\tp237 p4\t3\t1\n\tmov     LIMB0, [Inp1P+56]     ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+56]     ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+56], LIMB0      ;1\t2\tp237 p4\t3\t1\n\n\tlea     Inp1P, [Inp1P+64]     ;1\t1\tp15\t1\t0.5\n\tlea     Inp2P, [Inp2P+64]     ;1\t1\tp15\t1\t0.5\n\tlea     SumP, [SumP+64]       ;1\t1\tp15\t1\t0.5\n\n\tdec     Size\n\tjne     .loop\n\n.testrest:\n\tinc\tSizeRest\n\tdec\tSizeRest\n\tjz\t.exit\n\n.rest:\n\tmov\tLIMB0, [Inp1P]\n\tADCSBB\tLIMB0, [Inp2P]\n\tmov\t[SumP], LIMB0\n\tdec\tSizeRest\n\tjz\t.exit\n\tmov\tLIMB0, [Inp1P+8]\n\tADCSBB\tLIMB0, [Inp2P+8]\n\tmov\t[SumP+8], LIMB0\n\tdec\tSizeRest\n\tjz\t.exit\n\tmov\tLIMB0, [Inp1P+16]\n\tADCSBB\tLIMB0, [Inp2P+16]\n\tmov\t[SumP+16], LIMB0\n\tdec\tSizeRest\n\tjz\t.exit\n\tmov\tLIMB0, [Inp1P+24]\n\tADCSBB\tLIMB0, [Inp2P+24]\n\tmov\t[SumP+24], LIMB0\n\tdec\tSizeRest\n\tjz\t.exit\n\tlea\tInp1P, [Inp1P+32]\n\tlea\tInp2P, [Inp2P+32]\n\tlea\tSumP, [SumP+32]\n\tjmp\t.rest\n\n.exit:\n\tmov rax, 0\n\tsetc al\n\tret"
  },
  {
    "path": "mpn/x86_64/skylake/avx/add_err1_n.as",
    "content": ";  AMD64 mpn_add_err1_n\n;  Copyright 2017 Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)+(rdx,rcx)+CyIn\n;\trax = carry\n;\t(rcx,2) = rev(r8,rcx) \\dot (carry,rcx) where carry is the sequence\n;       of carries from the addition of (rsi,rcx)+(rdx,rcx)\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n    %define SumP    rcx\n    %define Inp1P   rdx\n    %define Inp2P   r8\n    %define Size    r9\n    %define LIMB0   r10\n%else\n    %define SumP    rdi\n    %define Inp1P   rsi\n    %define Inp2P   rdx\n    %define EP      rcx\n    %define YP      r8\n    %define Size    r9\n    %define CyIn    [rsp+8]\n    %define LIMB0   rax\n    %define E0      r12\n    %define E1      r13\n    %define Zero    r14\n    %define Dummy   rbx\n%endif\n\n\talign   32\n\tBITS    64\n\n%macro  DO_LIMB 1\n\tmov     LIMB0, [Inp1P + %1*8]\n\tadc     LIMB0, [Inp2P + %1*8]\n\tmov     [SumP + %1*8], LIMB0\n\tmov\tLIMB0, [YP - %1*8]\n\tcmovnc\tLIMB0, Zero\n\tinc\tDummy\t\t\t; OF = 0\n\tadox\tE0, LIMB0\n\tadox\tE1, Zero\n%endmacro\n\nGLOBAL_FUNC mpn_add_err1_n\n\n\tmov\tLIMB0, CyIn\n\tpush\tr12\n\tpush\tr13\n\tpush\tr14\n\tpush\trbx\n\tmov\tr11, rcx\n    %define EP      r11\n    %define SizeRest rcx\t; Need it in rcx for jrcxz\n\tmov     SizeRest, Size\n\tlea\tYP, [YP + Size*8 - 8]\n\tand\tSizeRest, 7\n\txor\tZero, Zero\n\tmov\tE0, Zero\n\tmov\tE1, Zero\n\tshr     Size, 3\n\tbt\tLIMB0, 0\n\tjz      .testrest\n\n\talign   16\n.loop:\n\tDO_LIMB 0\n\tDO_LIMB 1\n\tDO_LIMB 2\n\tDO_LIMB 3\n\tDO_LIMB 4\n\tDO_LIMB 5\n\tDO_LIMB 6\n\tDO_LIMB 7\n\n\tlea     Inp1P, [Inp1P+64]\n\tlea     Inp2P, [Inp2P+64]\n\tlea     SumP, [SumP+64]\n\tlea     YP, [YP-64]\n\n\tdec     Size\n\tjne     .loop\n\n.testrest:\n\tinc\tSizeRest\n\tdec\tSizeRest\n\tjz\t.exit\n\n.rest:\n\tDO_LIMB 0\n\tdec\tSizeRest\n\tjz\t.exit\n\tDO_LIMB 1\n\tdec\tSizeRest\n\tjz\t.exit\n\tDO_LIMB 2\n\tdec\tSizeRest\n\tjz\t.exit\n\tDO_LIMB 3\n\tdec\tSizeRest\n\tjz\t.exit\n\tlea\tInp1P, [Inp1P+32]\n\tlea\tInp2P, [Inp2P+32]\n\tlea\tSumP, [SumP+32]\n\tlea\tYP, [YP-32]\n\tjmp\t.rest\n\n.exit:\n\tmov rax, Zero\n\tsetc al\n\tmov\t[EP], E0\n\tmov\t[EP+8], E1\n\tpop\trbx\n\tpop\tr14\n\tpop\tr13\n\tpop\tr12\n\tret\n"
  },
  {
    "path": "mpn/x86_64/skylake/avx/addmul_1.asm",
    "content": "dnl  AMD64 mpn_addmul_1 optimised for Intel Broadwell.\n\ndnl  Copyright 2015 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of either:\ndnl\ndnl    * the GNU Lesser General Public License as published by the Free\ndnl      Software Foundation; either version 3 of the License, or (at your\ndnl      option) any later version.\ndnl\ndnl  or\ndnl\ndnl    * the GNU General Public License as published by the Free Software\ndnl      Foundation; either version 2 of the License, or (at your option) any\ndnl      later version.\ndnl\ndnl  or both in parallel, as here.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\ndnl  for more details.\ndnl\ndnl  You should have received copies of the GNU General Public License and the\ndnl  GNU Lesser General Public License along with the GNU MP Library.  If not,\ndnl  see https://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC\t     cycles/limb\nC AMD K8,K9\tn/a\nC AMD K10\tn/a\nC AMD bull\tn/a\nC AMD pile\tn/a\nC AMD steam\tn/a\nC AMD excavator\t ?\nC AMD bobcat\tn/a\nC AMD jaguar\tn/a\nC Intel P4\tn/a\nC Intel core2\tn/a\nC Intel NHM\tn/a\nC Intel SBR\tn/a\nC Intel IBR\tn/a\nC Intel HWL\tn/a\nC Intel BWL   1.8-1.9\nC Intel atom\tn/a\nC Intel SLM\tn/a\nC VIA nano\tn/a\n\nC The loop of this code is the result of running a code generation and\nC optimisation tool suite written by David Harvey and Torbjorn Granlund.\n\nC TODO\nC  * Put an initial mulx before switching, targeting some free registers.\nC  * Tune feed-in code.\nC  * Trim nop execution after L(f2).\nC  * Port to DOS64, not forgetting nop execution.\n\ndefine(`rp',      `%rdi')   C rcx\ndefine(`up',      `%rsi')   C rdx\ndefine(`n_param', `%rdx')   C r8\ndefine(`v0_param',`%rcx')   C r9\n\ndefine(`n',       `%rcx')\n\ndnl ABI_SUPPORT(DOS64)\nABI_SUPPORT(STD64)\n\ndnl IFDOS(`\tdefine(`up', ``%rsi'')\t') dnl\ndnl IFDOS(`\tdefine(`rp', ``%rcx'')\t') dnl\ndnl IFDOS(`\tdefine(`vl', ``%r9'')\t') dnl\ndnl IFDOS(`\tdefine(`r9', ``rdi'')\t') dnl\ndnl IFDOS(`\tdefine(`n',  ``%r8'')\t') dnl\ndnl IFDOS(`\tdefine(`r8', ``r11'')\t') dnl\n\nASM_START()\n\tTEXT\n\tALIGN(32)\nPROLOGUE(mpn_addmul_1)\n\n\tmov\tv0_param, %r10\n\tmov\tn_param, n\n\tmov\tR32(n_param), R32(%r8)\n\tshr\t$3, n\n\tand\t$7, R32(%r8)\t\tC clear OF, CF as side-effect\n\tmov\t%r10, %rdx\n\tlea\tL(tab)(%rip), %r10\nifdef(`PIC',\n`\tmovslq\t(%r10,%r8,4), %r8\n\tlea\t(%r8, %r10), %r10\n\tjmp\t*%r10\n',`\n\tjmp\t*(%r10,%r8,8)\n')\n\tJUMPTABSECT\n\tALIGN(8)\nL(tab):\tJMPENT(\tL(f0), L(tab))\n\tJMPENT(\tL(f1), L(tab))\n\tJMPENT(\tL(f2), L(tab))\n\tJMPENT(\tL(f3), L(tab))\n\tJMPENT(\tL(f4), L(tab))\n\tJMPENT(\tL(f5), L(tab))\n\tJMPENT(\tL(f6), L(tab))\n\tJMPENT(\tL(f7), L(tab))\n\tTEXT\n\nL(f0):\tmulx(\t(up), %r10, %r8)\n\tlea\t-8(up), up\n\tlea\t-8(rp), rp\n\tlea\t-1(n), n\n\tjmp\tL(b0)\n\nL(f3):\tmulx(\t(up), %r9, %rax)\n\tlea\t16(up), up\n\tlea\t-48(rp), rp\n\tjmp\tL(b3)\n\nL(f4):\tmulx(\t(up), %r10, %r8)\n\tlea\t24(up), up\n\tlea\t-40(rp), rp\n\tjmp\tL(b4)\n\nL(f5):\tmulx(\t(up), %r9, %rax)\n\tlea\t32(up), up\n\tlea\t-32(rp), rp\n\tjmp\tL(b5)\n\nL(f6):\tmulx(\t(up), %r10, %r8)\n\tlea\t40(up), up\n\tlea\t-24(rp), rp\n\tjmp\tL(b6)\n\nL(f1):\tmulx(\t(up), %r9, %rax)\n\tjrcxz\tL(1)\n\tjmp\tL(b1)\nL(1):\tadd\t(rp), %r9\n\tmov\t%r9, (rp)\n\tadc\t%rcx, %rax\t\tC relies on rcx = 0\n\tret\n\nL(end):\tadox(\t(rp), %r9)\n\tmov\t%r9, (rp)\n\tadox(\t%rcx, %rax)\t\tC relies on rcx = 0\n\tadc\t%rcx, %rax\t\tC relies on rcx = 0\n\tret\n\nifdef(`PIC',\n`\tnop;nop;nop;nop',\n`\tnop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop')\n\nL(f2):\tmulx(\t(up), %r10, %r8)\n\tlea\t8(up), up\n\tlea\t8(rp), rp\n\tmulx(\t(up), %r9, %rax)\n\n\tALIGN(32)\nL(top):\tadox(\t-8,(rp), %r10)\n\tadcx(\t%r8, %r9)\n\tmov\t%r10, -8(rp)\n\tjrcxz\tL(end)\nL(b1):\tmulx(\t8,(up), %r10, %r8)\n\tadox(\t(rp), %r9)\n\tlea\t-1(n), n\n\tmov\t%r9, (rp)\n\tadcx(\t%rax, %r10)\nL(b0):\tmulx(\t16,(up), %r9, %rax)\n\tadcx(\t%r8, %r9)\n\tadox(\t8,(rp), %r10)\n\tmov\t%r10, 8(rp)\nL(b7):\tmulx(\t24,(up), %r10, %r8)\n\tlea\t64(up), up\n\tadcx(\t%rax, %r10)\n\tadox(\t16,(rp), %r9)\n\tmov\t%r9, 16(rp)\nL(b6):\tmulx(\t-32,(up), %r9, %rax)\n\tadox(\t24,(rp), %r10)\n\tadcx(\t%r8, %r9)\n\tmov\t%r10, 24(rp)\nL(b5):\tmulx(\t-24,(up), %r10, %r8)\n\tadcx(\t%rax, %r10)\n\tadox(\t32,(rp), %r9)\n\tmov\t%r9, 32(rp)\nL(b4):\tmulx(\t-16,(up), %r9, %rax)\n\tadox(\t40,(rp), %r10)\n\tadcx(\t%r8, %r9)\n\tmov\t%r10, 40(rp)\nL(b3):\tadox(\t48,(rp), %r9)\n\tmulx(\t-8,(up), %r10, %r8)\n\tmov\t%r9, 48(rp)\n\tlea\t64(rp), rp\n\tadcx(\t%rax, %r10)\n\tmulx(\t(up), %r9, %rax)\n\tjmp\tL(top)\n\nL(f7):\tmulx(\t(up), %r9, %rax)\n\tlea\t-16(up), up\n\tlea\t-16(rp), rp\n\tjmp\tL(b7)\nEPILOGUE()\nASM_END()\n"
  },
  {
    "path": "mpn/x86_64/skylake/avx/mul_1.as",
    "content": ";  AMD64 mpn_mul_1\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = rcx*(rsi,rdx)\n;\trax = high word of product\n\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n; the following register allocation scheme is valid for Linux\n\n    %define RP      RDI\n    %define S1P     RSI\n    %define Size    RDX\n    %define S2      RCX\n\n    %define MulLo0  R8\n    %define MulHi0  R9\n    %define MulLo1  R10\n    %define MulHi1  R11\n    %define MulLo2  R12         ; SAVE!\n    %define MulHi2  R13         ; SAVE!\n    %define MulLo3  R14         ; SAVE!\n    %define MulHi3  RBX         ; SAVE!\n\n    %define SaveRBX XMM0\n    %define SaveR12 XMM1\n    %define SaveR13 XMM2\n    %define SaveR14 XMM3\n\n    align   32\nGLOBAL_FUNC mpn_mul_1\n\n    ; this is how I save registers under Linux\n    movq    SaveR14, R14\n    movq    SaveR13, R13\n    movq    SaveR12, R12\n    movq    SaveRBX, RBX\n\n    xchg    rcx, rdx\t\t; S2 must be in rdx, Size in rcx is fine\n    %define Size    RCX\n    %define S2      RDX\n    xor     MulHi3, MulHi3\n\n    mov     RAX, Size           ; may be increased by 1 at the end\n    sub     Size, 4\n    jc      .Post               ; separate handling of remaining max. 3 limb =>\n\n    ; prepare a quadlimb for main-loop entry\n    mulx    MulHi0, MulLo0, [S1P]\n    mulx    MulHi1, MulLo1, [S1P+8]\n    mulx    MulHi2, MulLo2, [S1P+16]\n    mulx    MulHi3, MulLo3, [S1P+24]\n    add     S1P, 32\n    add     MulLo1, MulHi0\n    adc     MulLo2, MulHi1\n    adc     MulLo3, MulHi2\n    adc     MulHi3, 0\n\n    jmp     .Check              ; enter main loop =>\n\n    ; main loop (unloaded operands)\n    ; - 1.25      cycles per limb in L1D$\n    ; - 1.25      cycles per limb in L2D$\n    ; - 1.60-1.72 cycles per limb in L3D$\n    align   32\n  .Loop:\n\n    mov     [RP], MulLo0\n    mov     [RP+8], MulLo1\n    mov     [RP+16], MulLo2\n    mov     [RP+24], MulLo3\n    mulx    MulHi0, MulLo0, [S1P]\n    mulx    MulHi1, MulLo1, [S1P+8]\n    mulx    MulHi2, MulLo2, [S1P+16]\n    add     MulLo0, MulHi3\n    mov     [RP+32], MulLo0\n    adc     MulLo1, MulHi0\n    mov     [RP+40], MulLo1\n    adc     MulLo2, MulHi1\n    mov     [RP+48], MulLo2\n    mulx    MulHi3, MulLo3, [S1P+24]\n    mulx    MulHi0, MulLo0, [S1P+32]\n    mulx    MulHi1, MulLo1, [S1P+40]\n    adc     MulLo3, MulHi2\t; no carry-out here\n    adc     MulLo0, MulHi3\n    adc     MulLo1, MulHi0\n    mulx    MulHi2, MulLo2, [S1P+48]\n    adc     MulLo2, MulHi1\n    mov     [RP+56], MulLo3\n    mulx    MulHi3, MulLo3, [S1P+56]\n    adc     MulLo3, MulHi2\n    adc     MulHi3, 0\n\n    add     S1P, 64\n    add     RP, 64\n\n  .Check:\n\n    sub     Size, 8\n    jnc     .Loop\n\n    ; core loop roll-out 8 can generate dangling quad-limb\n    test    Size, 4\n    je      .Store              ; no dangling quad-limb =>\n\n    mov     [RP], MulLo0\n    mulx    MulHi0, MulLo0, [S1P]\n    mov     [RP+8], MulLo1\n    mulx    MulHi1, MulLo1, [S1P+8]\n    mov     [RP+16], MulLo2\n    mulx    MulHi2, MulLo2, [S1P+16]\n    add     MulLo0, MulHi3\n    mov     [RP+24], MulLo3\n    mulx    MulHi3, MulLo3, [S1P+24]\n    adc     MulLo1, MulHi0\n    adc     MulLo2, MulHi1\n    adc     MulLo3, MulHi2\n    adc     MulHi3, 0\n\n    add     S1P, 32\n    add     RP, 32\n\n    ; store remaining quad-limb from main loop\n  .Store:\n    mov     [RP], MulLo0\n    mov     [RP+8], MulLo1\n    mov     [RP+16], MulLo2\n    mov     [RP+24], MulLo3\n    add     RP, 32\n\n    ; handle final 0-3 single limb of S1P\n  .Post:\n\n    and     Size, 3\n    je      .Post0\n\n    cmp     Size, 2\n    ja      .Post3\n    je      .Post2\n\n  .Post1:\n\n    mulx    MulHi0, MulLo0, [S1P]\n    add     MulLo0, MulHi3\n    adc     MulHi0, 0\n    mov     [RP], MulLo0\n    mov     rax, MulHi0\n    jmp     .Exit\n\n  .Post2:\n\n    mulx    MulHi0, MulLo0, [S1P]\n    mulx    MulHi1, MulLo1, [S1P+8]\n    add     MulLo0, MulHi3\n    adc     MulLo1, MulHi0\n    adc     MulHi1, 0\n    mov     [RP], MulLo0\n    mov     [RP+8], MulLo1\n    mov     rax, MulHi1\n    jmp     .Exit\n\n  .Post3:\n\n    mulx    MulHi0, MulLo0, [S1P]\n    mulx    MulHi1, MulLo1, [S1P+8]\n    mulx    MulHi2, MulLo2, [S1P+16]\n    add     MulLo0, MulHi3\n    adc     MulLo1, MulHi0\n    adc     MulLo2, MulHi1\n    adc     MulHi2, 0\n    mov     [RP], MulLo0\n    mov     [RP+8], MulLo1\n    mov     [RP+16], MulLo2\n    mov     rax, MulHi2\n    jmp     .Exit\n\n  .Post0:\n\n    mov     rax, MulHi3\n\n  .Exit:\n\n    ; restore registers the Linux way\n    movq    RBX, SaveRBX\n    movq    R12, SaveR12\n    movq    R13, SaveR13\n    movq    R14, SaveR14\n\n    ret\n\n"
  },
  {
    "path": "mpn/x86_64/skylake/avx/mul_basecase.asm",
    "content": "dnl  AMD64 mpn_mul_basecase optimised for Intel Broadwell.\n\ndnl  Copyright 2015 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of either:\ndnl\ndnl    * the GNU Lesser General Public License as published by the Free\ndnl      Software Foundation; either version 3 of the License, or (at your\ndnl      option) any later version.\ndnl\ndnl  or\ndnl\ndnl    * the GNU General Public License as published by the Free Software\ndnl      Foundation; either version 2 of the License, or (at your option) any\ndnl      later version.\ndnl\ndnl  or both in parallel, as here.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\ndnl  for more details.\ndnl\ndnl  You should have received copies of the GNU General Public License and the\ndnl  GNU Lesser General Public License along with the GNU MP Library.  If not,\ndnl  see https://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC cycles/limb\tmul_1\t\taddmul_1\nC AMD K8,K9\tn/a\t\tn/a\nC AMD K10\tn/a\t\tn/a\nC AMD bull\tn/a\t\tn/a\nC AMD pile\tn/a\t\tn/a\nC AMD steam\tn/a\t\tn/a\nC AMD excavator\t ?\t\t ?\nC AMD bobcat\tn/a\t\tn/a\nC AMD jaguar\tn/a\t\tn/a\nC Intel P4\tn/a\t\tn/a\nC Intel core2\tn/a\t\tn/a\nC Intel NHM\tn/a\t\tn/a\nC Intel SBR\tn/a\t\tn/a\nC Intel IBR\tn/a\t\tn/a\nC Intel HWL\t 1.68\t\tn/a\nC Intel BWL\t 1.69\t      1.8-1.9\nC Intel atom\tn/a\t\tn/a\nC Intel SLM\tn/a\t\tn/a\nC VIA nano\tn/a\t\tn/a\n\nC The inner loops of this code are the result of running a code generation and\nC optimisation tool suite written by David Harvey and Torbjorn Granlund.\n\nC TODO\nC  * Do overlapped software pipelining.\nC  * When changing this, make sure the code which falls into the inner loops\nC    does not execute too many no-ops (for both PIC and non-PIC).\n\ndefine(`rp',      `%rdi')\ndefine(`up',      `%rsi')\ndefine(`un_param',`%rdx')\ndefine(`vp_param',`%rcx')\ndefine(`vn',      `%r8')\n\ndefine(`n',       `%rcx')\ndefine(`n_save',  `%rbp')\ndefine(`vp',      `%r14')\ndefine(`unneg',   `%rbx')\ndefine(`v0',      `%rdx')\ndefine(`jaddr',   `%rax')\n\ndefine(`w0',\t`%r12')\ndefine(`w1',\t`%r9')\ndefine(`w2',\t`%r10')\ndefine(`w3',\t`%r11')\n\n\nASM_START()\n\tTEXT\n\tALIGN(16)\nPROLOGUE(mpn_mul_basecase)\n\n\tcmp\t$2, un_param\n\tja\tL(gen)\n\tmov\t(vp_param), %rdx\n\tmulx\t(up), %rax, %r9\t\tC 0 1\n\tje\tL(s2x)\n\nL(s11):\tmov\t%rax, (rp)\n\tmov\t%r9, 8(rp)\n\t\n\tret\n\nL(s2x):\tcmp\t$2, vn\n\tmulx\t8(up), %r8, %r10\tC 1 2\n\tje\tL(s22)\n\nL(s21):\tadd\t%r8, %r9\n\tadc\t$0, %r10\n\tmov\t%rax, (rp)\n\tmov\t%r9, 8(rp)\n\tmov\t%r10, 16(rp)\n\t\n\tret\n\nL(s22):\tadd\t%r8, %r9\t\tC 1\n\tadc\t$0, %r10\t\tC 2\n\tmov\t8(vp_param), %rdx\n\tmov\t%rax, (rp)\n\tmulx\t(up), %r8, %r11\t\tC 1 2\n\tmulx\t8(up), %rax, %rdx\tC 2 3\n\tadd\t%r11, %rax\t\tC 2\n\tadc\t$0, %rdx\t\tC 3\n\tadd\t%r8, %r9\t\tC 1\n\tadc\t%rax, %r10\t\tC 2\n\tadc\t$0, %rdx\t\tC 3\n\tmov\t%r9, 8(rp)\n\tmov\t%r10, 16(rp)\n\tmov\t%rdx, 24(rp)\n\t\n\tret\n\n\tALIGN(16)\nL(gen):\n\tpush\t%rbx\n\tpush\t%rbp\n\tpush\t%r12\n\tpush\t%r14\n\n\tmov\tvp_param, vp\n\tlea\t1(un_param), unneg\n\tmov\tun_param, n_save\n\tmov\tR32(un_param), R32(%rax)\n\tand\t$-8, unneg\n\tshr\t$3, n_save\t\tC loop count\n\tneg\tunneg\n\tand\t$7, R32(%rax)\t\tC clear CF for adc as side-effect\n\t\t\t\t\tC note that rax lives very long\n\tmov\tn_save, n\n\tmov\t(vp), v0\n\tlea\t8(vp), vp\n\n\tlea\tL(mtab)(%rip), %r10\nifdef(`PIC',\n`\tmovslq\t(%r10,%rax,4), %r11\n\tlea\t(%r11, %r10), %r10\n\tjmp\t*%r10\n',`\n\tjmp\t*(%r10,%rax,8)\n')\n\nL(mf0):\tmulx\t(up), w2, w3\n\tlea\t56(up), up\n\tlea\t-8(rp), rp\n\tjmp\tL(mb0)\n\nL(mf3):\tmulx\t(up), w0, w1\n\tlea\t16(up), up\n\tlea\t16(rp), rp\n\tinc\tn\n\tjmp\tL(mb3)\n\nL(mf4):\tmulx\t(up), w2, w3\n\tlea\t24(up), up\n\tlea\t24(rp), rp\n\tinc\tn\n\tjmp\tL(mb4)\n\nL(mf5):\tmulx\t(up), w0, w1\n\tlea\t32(up), up\n\tlea\t32(rp), rp\n\tinc\tn\n\tjmp\tL(mb5)\n\nL(mf6):\tmulx\t(up), w2, w3\n\tlea\t40(up), up\n\tlea\t40(rp), rp\n\tinc\tn\n\tjmp\tL(mb6)\n\nL(mf7):\tmulx\t(up), w0, w1\n\tlea\t48(up), up\n\tlea\t48(rp), rp\n\tinc\tn\n\tjmp\tL(mb7)\n\nL(mf1):\tmulx\t(up), w0, w1\n\tjmp\tL(mb1)\n\nL(mf2):\tmulx\t(up), w2, w3\n\tlea\t8(up), up\n\tlea\t8(rp), rp\n\tmulx\t(up), w0, w1\n\n\tALIGN(16)\nL(m1top):\n\tmov\tw2, -8(rp)\n\tadc\tw3, w0\nL(mb1):\tmulx\t8(up), w2, w3\n\tadc\tw1, w2\n\tlea\t64(up), up\n\tmov\tw0, (rp)\nL(mb0):\tmov\tw2, 8(rp)\n\tmulx\t-48(up), w0, w1\n\tlea\t64(rp), rp\n\tadc\tw3, w0\nL(mb7):\tmulx\t-40(up), w2, w3\n\tmov\tw0, -48(rp)\n\tadc\tw1, w2\nL(mb6):\tmov\tw2, -40(rp)\n\tmulx\t-32(up), w0, w1\n\tadc\tw3, w0\nL(mb5):\tmulx\t-24(up), w2, w3\n\tmov\tw0, -32(rp)\n\tadc\tw1, w2\nL(mb4):\tmulx\t-16(up), w0, w1\n\tmov\tw2, -24(rp)\n\tadc\tw3, w0\nL(mb3):\tmulx\t-8(up), w2, w3\n\tadc\tw1, w2\n\tmov\tw0, -16(rp)\n\tdec\tn\n\tmulx\t(up), w0, w1\n\tjnz\tL(m1top)\n\nL(m1end):\n\tmov\tw2, -8(rp)\n\tadc\tw3, w0\n\tmov\tw0, (rp)\n\tadc\t%rcx, w1\t\tC relies on rcx = 0\n\tmov\tw1, 8(rp)\n\n\tdec\tvn\n\tjz\tL(done)\n\n\tlea\tL(atab)(%rip), %r10\nifdef(`PIC',\n`\tmovslq\t(%r10,%rax,4), %rax\n\tlea\t(%rax, %r10), jaddr\n',`\n\tmov\t(%r10,%rax,8), jaddr\n')\n\nL(outer):\n\tlea\t(up,unneg,8), up\n\tmov\tn_save, n\n\tmov\t(vp), v0\n\tlea\t8(vp), vp\n\tjmp\t*jaddr\n\nL(f0):\tmulx\t8(up), w2, w3\n\tlea\t8(rp,unneg,8), rp\n\tlea\t-1(n), n\n\tjmp\tL(b0)\n\nL(f3):\tmulx\t-16(up), w0, w1\n\tlea\t-56(rp,unneg,8), rp\n\tjmp\tL(b3)\n\nL(f4):\tmulx\t-24(up), w2, w3\n\tlea\t-56(rp,unneg,8), rp\n\tjmp\tL(b4)\n\nL(f5):\tmulx\t-32(up), w0, w1\n\tlea\t-56(rp,unneg,8), rp\n\tjmp\tL(b5)\n\nL(f6):\tmulx\t-40(up), w2, w3\n\tlea\t-56(rp,unneg,8), rp\n\tjmp\tL(b6)\n\nL(f7):\tmulx\t16(up), w0, w1\n\tlea\t8(rp,unneg,8), rp\n\tjmp\tL(b7)\n\nL(f1):\tmulx\t(up), w0, w1\n\tlea\t8(rp,unneg,8), rp\n\tjmp\tL(b1)\n\nL(am1end):\n\tadox\t(rp), w0\n\tadox\t%rcx, w1\t\tC relies on rcx = 0\n\tmov\tw0, (rp)\n\tadc\t%rcx, w1\t\tC relies on rcx = 0\n\tmov\tw1, 8(rp)\n\n\tdec\tvn\t\t\tC clear CF and OF as side-effect\n\tjnz\tL(outer)\nL(done):\n\tpop\t%r14\n\tpop\t%r12\n\tpop\t%rbp\n\tpop\t%rbx\n\t\n\tret\n\nL(f2):\n\tmulx\t-8(up), w2, w3\n\tlea\t8(rp,unneg,8), rp\n\tmulx\t(up), w0, w1\n\n\tALIGN(16)\nL(am1top):\n\tadox\t-8(rp), w2\n\tadcx\tw3, w0\n\tmov\tw2, -8(rp)\n\tjrcxz\tL(am1end)\nL(b1):\tmulx\t8(up), w2, w3\n\tadox\t(rp), w0\n\tlea\t-1(n), n\n\tmov\tw0, (rp)\n\tadcx\tw1, w2\nL(b0):\tmulx\t16(up), w0, w1\n\tadcx\tw3, w0\n\tadox\t8(rp), w2\n\tmov\tw2, 8(rp)\nL(b7):\tmulx\t24(up), w2, w3\n\tlea\t64(up), up\n\tadcx\tw1, w2\n\tadox\t16(rp), w0\n\tmov\tw0, 16(rp)\nL(b6):\tmulx\t-32(up), w0, w1\n\tadox\t24(rp), w2\n\tadcx\tw3, w0\n\tmov\tw2, 24(rp)\nL(b5):\tmulx\t-24(up), w2, w3\n\tadcx\tw1, w2\n\tadox\t32(rp), w0\n\tmov\tw0, 32(rp)\nL(b4):\tmulx\t-16(up), w0, w1\n\tadox\t40(rp), w2\n\tadcx\tw3, w0\n\tmov\tw2, 40(rp)\nL(b3):\tadox\t48(rp), w0\n\tmulx\t-8(up), w2, w3\n\tmov\tw0, 48(rp)\n\tlea\t64(rp), rp\n\tadcx\tw1, w2\n\tmulx\t(up), w0, w1\n\tjmp\tL(am1top)\n\n\tJUMPTABSECT\n\tALIGN(8)\nL(mtab):JMPENT(\tL(mf0), L(mtab))\n\tJMPENT(\tL(mf1), L(mtab))\n\tJMPENT(\tL(mf2), L(mtab))\n\tJMPENT(\tL(mf3), L(mtab))\n\tJMPENT(\tL(mf4), L(mtab))\n\tJMPENT(\tL(mf5), L(mtab))\n\tJMPENT(\tL(mf6), L(mtab))\n\tJMPENT(\tL(mf7), L(mtab))\nL(atab):JMPENT(\tL(f0), L(atab))\n\tJMPENT(\tL(f1), L(atab))\n\tJMPENT(\tL(f2), L(atab))\n\tJMPENT(\tL(f3), L(atab))\n\tJMPENT(\tL(f4), L(atab))\n\tJMPENT(\tL(f5), L(atab))\n\tJMPENT(\tL(f6), L(atab))\n\tJMPENT(\tL(f7), L(atab))\n\tTEXT\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/skylake/avx/sqr_basecase.asm",
    "content": "dnl  AMD64 mpn_sqr_basecase optimised for Intel Broadwell.\n\ndnl  Copyright 2015 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of either:\ndnl\ndnl    * the GNU Lesser General Public License as published by the Free\ndnl      Software Foundation; either version 3 of the License, or (at your\ndnl      option) any later version.\ndnl\ndnl  or\ndnl\ndnl    * the GNU General Public License as published by the Free Software\ndnl      Foundation; either version 2 of the License, or (at your option) any\ndnl      later version.\ndnl\ndnl  or both in parallel, as here.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\ndnl  for more details.\ndnl\ndnl  You should have received copies of the GNU General Public License and the\ndnl  GNU Lesser General Public License along with the GNU MP Library.  If not,\ndnl  see https://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC cycles/limb\tmul_1\t\taddmul_1\nC AMD K8,K9\tn/a\t\tn/a\nC AMD K10\tn/a\t\tn/a\nC AMD bull\tn/a\t\tn/a\nC AMD pile\tn/a\t\tn/a\nC AMD steam\tn/a\t\tn/a\nC AMD excavator\t ?\t\t ?\nC AMD bobcat\tn/a\t\tn/a\nC AMD jaguar\tn/a\t\tn/a\nC Intel P4\tn/a\t\tn/a\nC Intel core2\tn/a\t\tn/a\nC Intel NHM\tn/a\t\tn/a\nC Intel SBR\tn/a\t\tn/a\nC Intel IBR\tn/a\t\tn/a\nC Intel HWL\t 1.68\t\tn/a\nC Intel BWL\t 1.69\t      1.8-1.9\nC Intel atom\tn/a\t\tn/a\nC Intel SLM\tn/a\t\tn/a\nC VIA nano\tn/a\t\tn/a\n\nC The inner loops of this code are the result of running a code generation and\nC optimisation tool suite written by David Harvey and Torbjorn Granlund.\n\nC TODO\nC  * We have 8 addmul_1 loops which fall into each other.  The idea is to save\nC    on switching code, since a circularly updated computed goto target will\nC    hardly allow correct branch prediction.  On 2nd thought, we now might make\nC    each of the 8 loop branches be poorly predicted since they will be\nC    executed fewer times for each time.  With just one addmul_1 loop, the loop\nC    count will change only once each 8th time!\nC  * Replace sqr_diag_addlsh1 code (from haswell) with adx-aware code.  We have\nC    3 variants below, but the haswell code turns out to be fastest.\nC  * Do overlapped software pipelining.\nC  * When changing this, make sure the code which falls into the inner loops\nC    does not execute too many no-ops (for both PIC and non-PIC).\n\ndefine(`rp',      `%rdi')\ndefine(`up',      `%rsi')\ndefine(`un_param',`%rdx')\n\ndefine(`n',       `%rcx')\ndefine(`un_save', `%rbx')\ndefine(`u0',      `%rdx')\n\ndefine(`w0',\t`%r8')\ndefine(`w1',\t`%r9')\ndefine(`w2',\t`%r10')\ndefine(`w3',\t`%r11')\n\nABI_SUPPORT(DOS64)\nABI_SUPPORT(STD64)\n\nASM_START()\n\tTEXT\n\tALIGN(16)\nPROLOGUE(mpn_sqr_basecase)\n\tFUNC_ENTRY(3)\n\n\tcmp\t$2, un_param\n\tjae\tL(gt1)\n\n\tmov\t(up), %rdx\n\tmulx(\t%rdx, %rax, %rdx)\n\tmov\t%rax, (rp)\n\tmov\t%rdx, 8(rp)\n\tFUNC_EXIT()\n\tret\n\nL(gt1):\tjne\tL(gt2)\n\n\tmov\t(up), %rdx\n\tmov\t8(up), %rcx\n\tmulx(\t%rcx, %r9, %r10)\tC v0 * v1\tW 1 2\n\tmulx(\t%rdx, %rax, %r8)\tC v0 * v0\tW 0 1\n\tmov\t%rcx, %rdx\n\tmulx(\t%rdx, %r11, %rdx)\tC v1 * v1\tW 2 3\n\tadd\t%r9, %r9\t\tC\t\tW 1\n\tadc\t%r10, %r10\t\tC\t\tW 2\n\tadc\t$0, %rdx\t\tC\t\tW 3\n\tadd\t%r9, %r8\t\tC W 1\n\tadc\t%r11, %r10\t\tC W 2\n\tadc\t$0, %rdx\t\tC W 3\n\tmov\t%rax, (rp)\n\tmov\t%r8, 8(rp)\n\tmov\t%r10, 16(rp)\n\tmov\t%rdx, 24(rp)\n\tFUNC_EXIT()\n\tret\n\nL(gt2):\tcmp\t$4, un_param\n\tjae\tL(gt3)\n\n\tpush\t%rbx\n\tmov\t(up), %rdx\n\tmulx(\t8,(up), w2, w3)\n\tmulx(\t16,(up), w0, w1)\n\tadd\tw3, w0\n\tmov\t8(up), %rdx\n\tmulx(\t16,(up), %rax, w3)\n\tadc\t%rax, w1\n\tadc\t$0, w3\n\ttest\tR32(%rbx), R32(%rbx)\n\tmov\t(up), %rdx\n\tmulx(\t%rdx, %rbx, %rcx)\n\tmov\t%rbx, (rp)\n\tmov\t8(up), %rdx\n\tmulx(\t%rdx, %rax, %rbx)\n\tmov\t16(up), %rdx\n\tmulx(\t%rdx, %rsi, %rdx)\n\tadcx(\tw2, w2)\n\tadcx(\tw0, w0)\n\tadcx(\tw1, w1)\n\tadcx(\tw3, w3)\n\tadox(\tw2, %rcx)\n\tadox(\tw0, %rax)\n\tadox(\tw1, %rbx)\n\tadox(\tw3, %rsi)\n\tmov\t$0, R32(%r8)\n\tadox(\t%r8, %rdx)\n\tadcx(\t%r8, %rdx)\n\tmov\t%rcx, 8(rp)\n\tmov\t%rax, 16(rp)\n\tmov\t%rbx, 24(rp)\n\tmov\t%rsi, 32(rp)\n\tmov\t%rdx, 40(rp)\n\tpop\t%rbx\n\tFUNC_EXIT()\n\tret\n\nL(gt3):\tpush\t%rbx\n\n\tpush\trp\n\tpush\tup\n\tpush\tun_param\n\n\tlea\t-3(un_param), R32(un_save)\n\tlea\t5(un_param), n\n\tmov\tR32(un_param), R32(%rax)\n\tand\t$-8, R32(un_save)\n\tshr\t$3, R32(n)\t\tC count for mul_1 loop\n\tneg\tun_save\t\t\tC 8*count and offert for addmul_1 loops\n\tand\t$7, R32(%rax)\t\tC clear CF for adc as side-effect\n\n\tmov\t(up), u0\n\n\tlea\tL(mtab)(%rip), %r10\nifdef(`PIC',\n`\tmovslq\t(%r10,%rax,4), %r8\n\tlea\t(%r8, %r10), %r10\n\tjmp\t*%r10\n',`\n\tjmp\t*(%r10,%rax,8)\n')\n\nL(mf0):\tmulx(\t8,(up), w2, w3)\n\tlea\t64(up), up\nC\tlea\t(rp), rp\n\tjmp\tL(mb0)\n\nL(mf3):\tmulx(\t8,(up), w0, w1)\n\tlea\t24(up), up\n\tlea\t24(rp), rp\n\tjmp\tL(mb3)\n\nL(mf4):\tmulx(\t8,(up), w2, w3)\n\tlea\t32(up), up\n\tlea\t32(rp), rp\n\tjmp\tL(mb4)\n\nL(mf5):\tmulx(\t8,(up), w0, w1)\n\tlea\t40(up), up\n\tlea\t40(rp), rp\n\tjmp\tL(mb5)\n\nL(mf6):\tmulx(\t8,(up), w2, w3)\n\tlea\t48(up), up\n\tlea\t48(rp), rp\n\tjmp\tL(mb6)\n\nL(mf7):\tmulx(\t8,(up), w0, w1)\n\tlea\t56(up), up\n\tlea\t56(rp), rp\n\tjmp\tL(mb7)\n\nL(mf1):\tmulx(\t8,(up), w0, w1)\n\tlea\t8(up), up\n\tlea\t8(rp), rp\n\tjmp\tL(mb1)\n\nL(mf2):\tmulx(\t8,(up), w2, w3)\n\tlea\t16(up), up\n\tlea\t16(rp), rp\n\tdec\tR32(n)\n\tmulx(\t(up), w0, w1)\n\n\tALIGN(16)\nL(top):\tmov\tw2, -8(rp)\n\tadc\tw3, w0\nL(mb1):\tmulx(\t8,(up), w2, w3)\n\tadc\tw1, w2\n\tlea\t64(up), up\n\tmov\tw0, (rp)\nL(mb0):\tmov\tw2, 8(rp)\n\tmulx(\t-48,(up), w0, w1)\n\tlea\t64(rp), rp\n\tadc\tw3, w0\nL(mb7):\tmulx(\t-40,(up), w2, w3)\n\tmov\tw0, -48(rp)\n\tadc\tw1, w2\nL(mb6):\tmov\tw2, -40(rp)\n\tmulx(\t-32,(up), w0, w1)\n\tadc\tw3, w0\nL(mb5):\tmulx(\t-24,(up), w2, w3)\n\tmov\tw0, -32(rp)\n\tadc\tw1, w2\nL(mb4):\tmulx(\t-16,(up), w0, w1)\n\tmov\tw2, -24(rp)\n\tadc\tw3, w0\nL(mb3):\tmulx(\t-8,(up), w2, w3)\n\tadc\tw1, w2\n\tmov\tw0, -16(rp)\n\tdec\tR32(n)\n\tmulx(\t(up), w0, w1)\n\tjnz\tL(top)\n\nL(end):\tmov\tw2, -8(rp)\n\tadc\tw3, w0\n\tmov\tw0, (rp)\n\tadc\t%rcx, w1\n\tmov\tw1, 8(rp)\n\n\tlea\tL(atab)(%rip), %r10\nifdef(`PIC',\n`\tmovslq\t(%r10,%rax,4), %r11\n\tlea\t(%r11, %r10), %r11\n\tjmp\t*%r11\n',`\n\tjmp\t*(%r10,%rax,8)\n')\n\nL(ed0):\tadox(\t(rp), w0)\n\tadox(\t%rcx, w1)\t\tC relies on rcx = 0\n\tmov\tw0, (rp)\n\tadc\t%rcx, w1\t\tC relies on rcx = 0\n\tmov\tw1, 8(rp)\nL(f7):\tlea\t-64(up,un_save,8), up\n\tor\tR32(un_save), R32(n)\n\tmov\t8(up), u0\n\tmulx(\t16,(up), w0, w1)\n\tlea\t-56(rp,un_save,8), rp\n\tjmp\tL(b7)\n\n\tALIGN(16)\nL(tp0):\tadox(\t-8,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, -8(rp)\n\tjrcxz\tL(ed0)\n\tmulx(\t8,(up), w2, w3)\n\tadox(\t(rp), w0)\n\tlea\t8(n), R32(n)\n\tmov\tw0, (rp)\n\tadcx(\tw1, w2)\nL(b0):\tmulx(\t16,(up), w0, w1)\n\tadcx(\tw3, w0)\n\tadox(\t8,(rp), w2)\n\tmov\tw2, 8(rp)\n\tmulx(\t24,(up), w2, w3)\n\tlea\t64(up), up\n\tadcx(\tw1, w2)\n\tadox(\t16,(rp), w0)\n\tmov\tw0, 16(rp)\n\tmulx(\t-32,(up), w0, w1)\n\tadox(\t24,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 24(rp)\n\tmulx(\t-24,(up), w2, w3)\n\tadcx(\tw1, w2)\n\tadox(\t32,(rp), w0)\n\tmov\tw0, 32(rp)\n\tmulx(\t-16,(up), w0, w1)\n\tadox(\t40,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 40(rp)\n\tadox(\t48,(rp), w0)\n\tmulx(\t-8,(up), w2, w3)\n\tmov\tw0, 48(rp)\n\tlea\t64(rp), rp\n\tadcx(\tw1, w2)\n\tmulx(\t(up), w0, w1)\n\tjmp\tL(tp0)\n\nL(ed1):\tadox(\t(rp), w0)\n\tadox(\t%rcx, w1)\t\tC relies on rcx = 0\n\tmov\tw0, (rp)\n\tadc\t%rcx, w1\t\tC relies on rcx = 0\n\tmov\tw1, 8(rp)\nL(f0):\tlea\t-64(up,un_save,8), up\n\tor\tR32(un_save), R32(n)\n\tmov\t(up), u0\n\tmulx(\t8,(up), w2, w3)\n\tlea\t-56(rp,un_save,8), rp\n\tjmp\tL(b0)\n\n\tALIGN(16)\nL(tp1):\tadox(\t-8,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, -8(rp)\n\tjrcxz\tL(ed1)\nL(b1):\tmulx(\t8,(up), w2, w3)\n\tadox(\t(rp), w0)\n\tlea\t8(n), R32(n)\n\tmov\tw0, (rp)\n\tadcx(\tw1, w2)\n\tmulx(\t16,(up), w0, w1)\n\tadcx(\tw3, w0)\n\tadox(\t8,(rp), w2)\n\tmov\tw2, 8(rp)\n\tmulx(\t24,(up), w2, w3)\n\tlea\t64(up), up\n\tadcx(\tw1, w2)\n\tadox(\t16,(rp), w0)\n\tmov\tw0, 16(rp)\n\tmulx(\t-32,(up), w0, w1)\n\tadox(\t24,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 24(rp)\n\tmulx(\t-24,(up), w2, w3)\n\tadcx(\tw1, w2)\n\tadox(\t32,(rp), w0)\n\tmov\tw0, 32(rp)\n\tmulx(\t-16,(up), w0, w1)\n\tadox(\t40,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 40(rp)\n\tadox(\t48,(rp), w0)\n\tmulx(\t-8,(up), w2, w3)\n\tmov\tw0, 48(rp)\n\tlea\t64(rp), rp\n\tadcx(\tw1, w2)\n\tmulx(\t(up), w0, w1)\n\tjmp\tL(tp1)\n\nL(ed2):\tadox(\t(rp), w0)\n\tadox(\t%rcx, w1)\t\tC relies on rcx = 0\n\tmov\tw0, (rp)\n\tadc\t%rcx, w1\t\tC relies on rcx = 0\n\tmov\tw1, 8(rp)\nL(f1):\tlea\t(up,un_save,8), up\n\tor\tR32(un_save), R32(n)\n\tlea\t8(un_save), un_save\n\tmov\t-8(up), u0\n\tmulx(\t(up), w0, w1)\n\tlea\t-56(rp,un_save,8), rp\n\tjmp\tL(b1)\n\n\tALIGN(16)\nL(tp2):\tadox(\t-8,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, -8(rp)\n\tjrcxz\tL(ed2)\n\tmulx(\t8,(up), w2, w3)\n\tadox(\t(rp), w0)\n\tlea\t8(n), R32(n)\n\tmov\tw0, (rp)\n\tadcx(\tw1, w2)\n\tmulx(\t16,(up), w0, w1)\n\tadcx(\tw3, w0)\n\tadox(\t8,(rp), w2)\n\tmov\tw2, 8(rp)\n\tmulx(\t24,(up), w2, w3)\n\tlea\t64(up), up\n\tadcx(\tw1, w2)\n\tadox(\t16,(rp), w0)\n\tmov\tw0, 16(rp)\n\tmulx(\t-32,(up), w0, w1)\n\tadox(\t24,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 24(rp)\n\tmulx(\t-24,(up), w2, w3)\n\tadcx(\tw1, w2)\n\tadox(\t32,(rp), w0)\n\tmov\tw0, 32(rp)\n\tmulx(\t-16,(up), w0, w1)\n\tadox(\t40,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 40(rp)\n\tadox(\t48,(rp), w0)\n\tmulx(\t-8,(up), w2, w3)\n\tmov\tw0, 48(rp)\n\tlea\t64(rp), rp\n\tadcx(\tw1, w2)\n\tmulx(\t(up), w0, w1)\n\tjmp\tL(tp2)\n\nL(ed3):\tadox(\t(rp), w0)\n\tadox(\t%rcx, w1)\t\tC relies on rcx = 0\n\tmov\tw0, (rp)\n\tadc\t%rcx, w1\t\tC relies on rcx = 0\n\tmov\tw1, 8(rp)\nL(f2):\tlea\t(up,un_save,8), up\n\tor\tR32(un_save), R32(n)\n\tjz\tL(corner2)\n\tmov\t-16(up), u0\n\tmulx(\t-8,(up), w2, w3)\n\tlea\t8(rp,un_save,8), rp\n\tmulx(\t(up), w0, w1)\n\tjmp\tL(tp2)\n\n\tALIGN(16)\nL(tp3):\tadox(\t-8,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, -8(rp)\n\tjrcxz\tL(ed3)\n\tmulx(\t8,(up), w2, w3)\n\tadox(\t(rp), w0)\n\tlea\t8(n), R32(n)\n\tmov\tw0, (rp)\n\tadcx(\tw1, w2)\n\tmulx(\t16,(up), w0, w1)\n\tadcx(\tw3, w0)\n\tadox(\t8,(rp), w2)\n\tmov\tw2, 8(rp)\n\tmulx(\t24,(up), w2, w3)\n\tlea\t64(up), up\n\tadcx(\tw1, w2)\n\tadox(\t16,(rp), w0)\n\tmov\tw0, 16(rp)\n\tmulx(\t-32,(up), w0, w1)\n\tadox(\t24,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 24(rp)\n\tmulx(\t-24,(up), w2, w3)\n\tadcx(\tw1, w2)\n\tadox(\t32,(rp), w0)\n\tmov\tw0, 32(rp)\n\tmulx(\t-16,(up), w0, w1)\n\tadox(\t40,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 40(rp)\nL(b3):\tadox(\t48,(rp), w0)\n\tmulx(\t-8,(up), w2, w3)\n\tmov\tw0, 48(rp)\n\tlea\t64(rp), rp\n\tadcx(\tw1, w2)\n\tmulx(\t(up), w0, w1)\n\tjmp\tL(tp3)\n\nL(ed4):\tadox(\t(rp), w0)\n\tadox(\t%rcx, w1)\t\tC relies on rcx = 0\n\tmov\tw0, (rp)\n\tadc\t%rcx, w1\t\tC relies on rcx = 0\n\tmov\tw1, 8(rp)\nL(f3):\tlea\t(up,un_save,8), up\n\tor\tR32(un_save), R32(n)\n\tjz\tL(corner3)\n\tmov\t-24(up), u0\n\tmulx(\t-16,(up), w0, w1)\n\tlea\t-56(rp,un_save,8), rp\n\tjmp\tL(b3)\n\n\tALIGN(16)\nL(tp4):\tadox(\t-8,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, -8(rp)\n\tjrcxz\tL(ed4)\n\tmulx(\t8,(up), w2, w3)\n\tadox(\t(rp), w0)\n\tlea\t8(n), R32(n)\n\tmov\tw0, (rp)\n\tadcx(\tw1, w2)\n\tmulx(\t16,(up), w0, w1)\n\tadcx(\tw3, w0)\n\tadox(\t8,(rp), w2)\n\tmov\tw2, 8(rp)\n\tmulx(\t24,(up), w2, w3)\n\tlea\t64(up), up\n\tadcx(\tw1, w2)\n\tadox(\t16,(rp), w0)\n\tmov\tw0, 16(rp)\n\tmulx(\t-32,(up), w0, w1)\n\tadox(\t24,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 24(rp)\n\tmulx(\t-24,(up), w2, w3)\n\tadcx(\tw1, w2)\n\tadox(\t32,(rp), w0)\n\tmov\tw0, 32(rp)\nL(b4):\tmulx(\t-16,(up), w0, w1)\n\tadox(\t40,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 40(rp)\n\tadox(\t48,(rp), w0)\n\tmulx(\t-8,(up), w2, w3)\n\tmov\tw0, 48(rp)\n\tlea\t64(rp), rp\n\tadcx(\tw1, w2)\n\tmulx(\t(up), w0, w1)\n\tjmp\tL(tp4)\n\nL(ed5):\tadox(\t(rp), w0)\n\tadox(\t%rcx, w1)\t\tC relies on rcx = 0\n\tmov\tw0, (rp)\n\tadc\t%rcx, w1\t\tC relies on rcx = 0\n\tmov\tw1, 8(rp)\nL(f4):\tlea\t(up,un_save,8), up\n\tor\tR32(un_save), R32(n)\n\tmov\t-32(up), u0\n\tmulx(\t-24,(up), w2, w3)\n\tlea\t-56(rp,un_save,8), rp\n\tjmp\tL(b4)\n\n\tALIGN(16)\nL(tp5):\tadox(\t-8,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, -8(rp)\n\tjrcxz\tL(ed5)\n\tmulx(\t8,(up), w2, w3)\n\tadox(\t(rp), w0)\n\tlea\t8(n), R32(n)\n\tmov\tw0, (rp)\n\tadcx(\tw1, w2)\n\tmulx(\t16,(up), w0, w1)\n\tadcx(\tw3, w0)\n\tadox(\t8,(rp), w2)\n\tmov\tw2, 8(rp)\n\tmulx(\t24,(up), w2, w3)\n\tlea\t64(up), up\n\tadcx(\tw1, w2)\n\tadox(\t16,(rp), w0)\n\tmov\tw0, 16(rp)\n\tmulx(\t-32,(up), w0, w1)\n\tadox(\t24,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 24(rp)\nL(b5):\tmulx(\t-24,(up), w2, w3)\n\tadcx(\tw1, w2)\n\tadox(\t32,(rp), w0)\n\tmov\tw0, 32(rp)\n\tmulx(\t-16,(up), w0, w1)\n\tadox(\t40,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 40(rp)\n\tadox(\t48,(rp), w0)\n\tmulx(\t-8,(up), w2, w3)\n\tmov\tw0, 48(rp)\n\tlea\t64(rp), rp\n\tadcx(\tw1, w2)\n\tmulx(\t(up), w0, w1)\n\tjmp\tL(tp5)\n\nL(ed6):\tadox(\t(rp), w0)\n\tadox(\t%rcx, w1)\t\tC relies on rcx = 0\n\tmov\tw0, (rp)\n\tadc\t%rcx, w1\t\tC relies on rcx = 0\n\tmov\tw1, 8(rp)\nL(f5):\tlea\t(up,un_save,8), up\n\tor\tR32(un_save), R32(n)\n\tmov\t-40(up), u0\n\tmulx(\t-32,(up), w0, w1)\n\tlea\t-56(rp,un_save,8), rp\n\tjmp\tL(b5)\n\n\tALIGN(16)\nL(tp6):\tadox(\t-8,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, -8(rp)\n\tjrcxz\tL(ed6)\n\tmulx(\t8,(up), w2, w3)\n\tadox(\t(rp), w0)\n\tlea\t8(n), R32(n)\n\tmov\tw0, (rp)\n\tadcx(\tw1, w2)\n\tmulx(\t16,(up), w0, w1)\n\tadcx(\tw3, w0)\n\tadox(\t8,(rp), w2)\n\tmov\tw2, 8(rp)\n\tmulx(\t24,(up), w2, w3)\n\tlea\t64(up), up\n\tadcx(\tw1, w2)\n\tadox(\t16,(rp), w0)\n\tmov\tw0, 16(rp)\nL(b6):\tmulx(\t-32,(up), w0, w1)\n\tadox(\t24,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 24(rp)\n\tmulx(\t-24,(up), w2, w3)\n\tadcx(\tw1, w2)\n\tadox(\t32,(rp), w0)\n\tmov\tw0, 32(rp)\n\tmulx(\t-16,(up), w0, w1)\n\tadox(\t40,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 40(rp)\n\tadox(\t48,(rp), w0)\n\tmulx(\t-8,(up), w2, w3)\n\tmov\tw0, 48(rp)\n\tlea\t64(rp), rp\n\tadcx(\tw1, w2)\n\tmulx(\t(up), w0, w1)\n\tjmp\tL(tp6)\n\nL(ed7):\tadox(\t(rp), w0)\n\tadox(\t%rcx, w1)\t\tC relies on rcx = 0\n\tmov\tw0, (rp)\n\tadc\t%rcx, w1\t\tC relies on rcx = 0\n\tmov\tw1, 8(rp)\nL(f6):\tlea\t(up,un_save,8), up\n\tor\tR32(un_save), R32(n)\n\tmov\t-48(up), u0\n\tmulx(\t-40,(up), w2, w3)\n\tlea\t-56(rp,un_save,8), rp\n\tjmp\tL(b6)\n\n\tALIGN(16)\nL(tp7):\tadox(\t-8,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, -8(rp)\n\tjrcxz\tL(ed7)\n\tmulx(\t8,(up), w2, w3)\n\tadox(\t(rp), w0)\n\tlea\t8(n), R32(n)\n\tmov\tw0, (rp)\n\tadcx(\tw1, w2)\n\tmulx(\t16,(up), w0, w1)\n\tadcx(\tw3, w0)\n\tadox(\t8,(rp), w2)\n\tmov\tw2, 8(rp)\nL(b7):\tmulx(\t24,(up), w2, w3)\n\tlea\t64(up), up\n\tadcx(\tw1, w2)\n\tadox(\t16,(rp), w0)\n\tmov\tw0, 16(rp)\n\tmulx(\t-32,(up), w0, w1)\n\tadox(\t24,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 24(rp)\n\tmulx(\t-24,(up), w2, w3)\n\tadcx(\tw1, w2)\n\tadox(\t32,(rp), w0)\n\tmov\tw0, 32(rp)\n\tmulx(\t-16,(up), w0, w1)\n\tadox(\t40,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, 40(rp)\n\tadox(\t48,(rp), w0)\n\tmulx(\t-8,(up), w2, w3)\n\tmov\tw0, 48(rp)\n\tlea\t64(rp), rp\n\tadcx(\tw1, w2)\n\tmulx(\t(up), w0, w1)\n\tjmp\tL(tp7)\n\nL(corner3):\n\tmov\t-24(up), u0\n\tmulx(\t-16,(up), w0, w1)\n\tadox(\t-8,(rp), w0)\n\tmulx(\t-8,(up), w2, w3)\n\tmov\tw0, -8(rp)\n\tlea\t8(rp), rp\n\tadcx(\tw1, w2)\n\tmulx(\t(up), w0, w1)\n\tadox(\t-8,(rp), w2)\n\tadcx(\tw3, w0)\n\tmov\tw2, -8(rp)\n\tadox(\t(rp), w0)\n\tadox(\t%rcx, w1)\t\tC relies on rcx = 0\n\tadcx(\t%rcx, w1)\t\tC relies on rcx = 0\nL(corner2):\n\tmov\t-16(up), u0\n\tmulx(\t-8,(up), w2, w3)\n\tmulx(\t(up), %rax, %rbx)\n\tadox(\tw0, w2)\n\tadcx(\tw3, %rax)\n\tmov\tw2, (rp)\n\tadox(\tw1, %rax)\n\tadox(\t%rcx, %rbx)\t\tC relies on rcx = 0\n\tmov\t%rax, 8(rp)\n\tadc\t%rcx, %rbx\t\tC relies on rcx = 0\n\tmov\t-8(up), %rdx\n\tmulx(\t(up), %rax, %rdx)\n\tadd\t%rbx, %rax\n\tmov\t%rax, 16(rp)\n\tadc\t%rcx, %rdx\t\tC relies on rcx = 0\n\tmov\t%rdx, 24(rp)\n\nL(sqr_diag_addlsh1):\n\tpop\tn\n\tpop\tup\n\tpop\trp\n\nifdef(`SDA_VARIANT',,`define(`SDA_VARIANT', 2)')\n\nifelse(SDA_VARIANT,1,`\n\tlea\t(n,n), %rax\n\tmovq\t$0, -8(rp,%rax,8)\t\tC FIXME\n\ttest\tR32(%rax), R32(%rax)\n\tmov\t(up), %rdx\n\tlea\t8(up), up\n\tmulx(\t%rdx, %r8, %rdx)\n\tjmp\tL(dm)\n\n\tALIGN(16)\nL(dtop):mov\t8(rp), %r9\n\tadcx(\t%r9, %r9)\n\tadox(\t%rdx, %r9)\n\tmov\t%r9, 8(rp)\n\tlea\t16(rp), rp\n\tjrcxz\tL(dend)\n\tmov\t(up), %rdx\n\tmulx(\t%rdx, %rax, %rdx)\n\tlea\t8(up), up\n\tmov\t(rp), %r8\n\tadcx(\t%r8, %r8)\n\tadox(\t%rax, %r8)\nL(dm):\tmov\t%r8, (rp)\n\tlea\t-1(n), n\n\tjmp\tL(dtop)\nL(dend):\n')\n\nifelse(SDA_VARIANT,2,`\n\tdec\tR32(n)\n\tmov\t(up), %rdx\n\txor\tR32(%rbx), R32(%rbx)\tC clear CF as side effect\n\tmulx(\t%rdx, %rax, %r10)\n\tmov\t%rax, (rp)\n\tmov\t8(rp), %r8\n\tmov\t16(rp), %r9\n\tjmp\tL(dm)\n\n\tALIGN(16)\nL(dtop):mov\t24(rp), %r8\n\tmov\t32(rp), %r9\n\tlea\t16(rp), rp\n\tlea\t(%rdx,%rbx), %r10\nL(dm):\tadc\t%r8, %r8\n\tadc\t%r9, %r9\n\tsetc\tR8(%rbx)\n\tmov\t8(up), %rdx\n\tlea\t8(up), up\n\tmulx(\t%rdx, %rax, %rdx)\n\tadd\t%r10, %r8\n\tadc\t%rax, %r9\n\tmov\t%r8, 8(rp)\n\tmov\t%r9, 16(rp)\n\tdec\tR32(n)\n\tjnz\tL(dtop)\n\nL(dend):adc\t%rbx, %rdx\n\tmov\t%rdx, 24(rp)\n')\n\nifelse(SDA_VARIANT,3,`\n\tdec\tR32(n)\n\tmov\t(up), %rdx\n\ttest\tR32(%rbx), R32(%rbx)\tC clear CF and OF\n\tmulx(\t%rdx, %rax, %r10)\n\tmov\t%rax, (rp)\n\tmov\t8(rp), %r8\n\tmov\t16(rp), %r9\n\tjmp\tL(dm)\n\n\tALIGN(16)\nL(dtop):jrcxz\tL(dend)\n\tmov\t24(rp), %r8\n\tmov\t32(rp), %r9\n\tlea\t16(rp), rp\nL(dm):\tadcx(\t%r8, %r8)\n\tadcx(\t%r9, %r9)\n\tmov\t8(up), %rdx\n\tlea\t8(up), up\n\tadox(\t%r10, %r8)\n\tmulx(\t%rdx, %rax, %r10)\n\tadox(\t%rax, %r9)\n\tmov\t%r8, 8(rp)\n\tmov\t%r9, 16(rp)\n\tlea\t-1(n), R32(n)\n\tjmp\tL(dtop)\n\nL(dend):adcx(\t%rcx, %r10)\n\tadox(\t%rcx, %r10)\n\tmov\t%r10, 24(rp)\n')\n\n\tpop\t%rbx\n\tFUNC_EXIT()\n\tret\n\n\tJUMPTABSECT\n\tALIGN(8)\nL(mtab):JMPENT(\tL(mf7), L(mtab))\n\tJMPENT(\tL(mf0), L(mtab))\n\tJMPENT(\tL(mf1), L(mtab))\n\tJMPENT(\tL(mf2), L(mtab))\n\tJMPENT(\tL(mf3), L(mtab))\n\tJMPENT(\tL(mf4), L(mtab))\n\tJMPENT(\tL(mf5), L(mtab))\n\tJMPENT(\tL(mf6), L(mtab))\nL(atab):JMPENT(\tL(f6), L(atab))\n\tJMPENT(\tL(f7), L(atab))\n\tJMPENT(\tL(f0), L(atab))\n\tJMPENT(\tL(f1), L(atab))\n\tJMPENT(\tL(f2), L(atab))\n\tJMPENT(\tL(f3), L(atab))\n\tJMPENT(\tL(f4), L(atab))\n\tJMPENT(\tL(f5), L(atab))\n\tTEXT\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/skylake/avx/sub_err1_n.as",
    "content": "dd_err1_n.as;  AMD64 mpn_sub_err1_n\n;  Copyright 2017 Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)-(rdx,rcx)-BwIn\n;\trax = borrow\n;\t(rcx,2) = rev(r8,rcx) \\dot (borrow,rcx) where borrow is the sequence\n;       of borrows from the subtraction of (rsi,rcx)-(rdx,rcx)\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n    %define SumP    rcx\n    %define Inp1P   rdx\n    %define Inp2P   r8\n    %define Size    r9\n    %define LIMB0   r10\n%else\n    %define SumP    rdi\n    %define Inp1P   rsi\n    %define Inp2P   rdx\n    %define EP      rcx\n    %define YP      r8\n    %define Size    r9\n    %define BwIn    [rsp+8]\n    %define LIMB0   rax\n    %define E0      r12\n    %define E1      r13\n    %define Zero    r14\n    %define Dummy   rbx\n%endif\n\n\talign   32\n\tBITS    64\n\n%macro  DO_LIMB 1\n\tmov     LIMB0, [Inp1P + %1*8]\n\tsbb     LIMB0, [Inp2P + %1*8]\n\tmov     [SumP + %1*8], LIMB0\n\tmov\tLIMB0, [YP - %1*8]\n\tcmovnc\tLIMB0, Zero\n\tinc\tDummy\t\t\t; OF = 0\n\tadox\tE0, LIMB0\n\tadox\tE1, Zero\n%endmacro\n\nGLOBAL_FUNC mpn_sub_err1_n\n\n\tmov\tLIMB0, BwIn\n\tpush\tr12\n\tpush\tr13\n\tpush\tr14\n\tpush\trbx\n\tmov\tr11, rcx\n    %define EP      r11\n    %define SizeRest rcx\t; Need it in rcx for jrcxz\n\tmov     SizeRest, Size\n\tlea\tYP, [YP + Size*8 - 8]\n\tand\tSizeRest, 7\n\txor\tZero, Zero\n\tmov\tE0, Zero\n\tmov\tE1, Zero\n\tshr     Size, 3\n\tbt\tLIMB0, 0\n\tjz      .testrest\n\n\talign   16\n.loop:\n\tDO_LIMB 0\n\tDO_LIMB 1\n\tDO_LIMB 2\n\tDO_LIMB 3\n\tDO_LIMB 4\n\tDO_LIMB 5\n\tDO_LIMB 6\n\tDO_LIMB 7\n\n\tlea     Inp1P, [Inp1P+64]\n\tlea     Inp2P, [Inp2P+64]\n\tlea     SumP, [SumP+64]\n\tlea     YP, [YP-64]\n\n\tdec     Size\n\tjne     .loop\n\n.testrest:\n\tinc\tSizeRest\n\tdec\tSizeRest\n\tjz\t.exit\n\n.rest:\n\tDO_LIMB 0\n\tdec\tSizeRest\n\tjz\t.exit\n\tDO_LIMB 1\n\tdec\tSizeRest\n\tjz\t.exit\n\tDO_LIMB 2\n\tdec\tSizeRest\n\tjz\t.exit\n\tDO_LIMB 3\n\tdec\tSizeRest\n\tjz\t.exit\n\tlea\tInp1P, [Inp1P+32]\n\tlea\tInp2P, [Inp2P+32]\n\tlea\tSumP, [SumP+32]\n\tlea\tYP, [YP-32]\n\tjmp\t.rest\n\n.exit:\n\tmov rax, Zero\n\tsetc al\n\tmov\t[EP], E0\n\tmov\t[EP+8], E1\n\tpop\trbx\n\tpop\tr14\n\tpop\tr13\n\tpop\tr12\n\tret\n"
  },
  {
    "path": "mpn/x86_64/skylake/sub_n.as",
    "content": ";  AMD64 mpn_sub_n\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)-(rdx,rcx)\n;\trax = borrow\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n    %define SumP    rcx\n    %define Inp1P   rdx\n    %define Inp2P   r8\n    %define Size    r9\n    %define LIMB0   r10\n%else\n    %define SumP    rdi\n    %define Inp1P   rsi\n    %define Inp2P   rdx\n    %define Size    rcx\n    %define SizeRest r11\n    %define LIMB0   rax\n%endif\n\n%define ADDSUB sub\n%define ADCSBB sbb\n\n\talign   32\n\tBITS    64\n\nGLOBAL_FUNC mpn_sub_n\n\n\tmov     SizeRest, Size\n\tand\tSizeRest, 7\n\tshr     Size, 3\n\tclc\n\tjz      .testrest\n\n\talign   16\n.loop:\n\tmov     LIMB0, [Inp1P]        ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P]        ;2\t2\tp06 p23\t\t1\n\tmov     [SumP], LIMB0         ;1\t2\tp237 p4\t3\t1\n\tvpblendd YMM0, YMM0, YMM0, 0  ; This one is black magic. Beware.\n\tmov     LIMB0, [Inp1P+8]      ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+8]      ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+8], LIMB0       ;1\t2\tp237 p4\t3\t1\n\tmov     LIMB0, [Inp1P+16]     ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+16]     ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+16], LIMB0      ;1\t2\tp237 p4\t3\t1\n\tmov     LIMB0, [Inp1P+24]     ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+24]     ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+24], LIMB0      ;1\t2\tp237 p4\t3\t1\n\n\tmov     LIMB0, [Inp1P+32]     ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+32]     ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+32], LIMB0      ;1\t2\tp237 p4\t3\t1\n\tmov     LIMB0, [Inp1P+40]     ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+40]     ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+40], LIMB0      ;1\t2\tp237 p4\t3\t1\n\tmov     LIMB0, [Inp1P+48]     ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+48]     ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+48], LIMB0      ;1\t2\tp237 p4\t3\t1\n\tmov     LIMB0, [Inp1P+56]     ;1\t1\tp23\t2\t0.5\n\tADCSBB  LIMB0, [Inp2P+56]     ;2\t2\tp06 p23\t\t1\n\tmov     [SumP+56], LIMB0      ;1\t2\tp237 p4\t3\t1\n\n\tlea     Inp1P, [Inp1P+64]     ;1\t1\tp15\t1\t0.5\n\tlea     Inp2P, [Inp2P+64]     ;1\t1\tp15\t1\t0.5\n\tlea     SumP, [SumP+64]       ;1\t1\tp15\t1\t0.5\n\n\tdec     Size\n\tjne     .loop\n\n.testrest:\n\tinc\tSizeRest\n\tdec\tSizeRest\n\tjz\t.exit\n\n.rest:\n\tmov\tLIMB0, [Inp1P]\n\tADCSBB\tLIMB0, [Inp2P]\n\tmov\t[SumP], LIMB0\n\tdec\tSizeRest\n\tjz\t.exit\n\tmov\tLIMB0, [Inp1P+8]\n\tADCSBB\tLIMB0, [Inp2P+8]\n\tmov\t[SumP+8], LIMB0\n\tdec\tSizeRest\n\tjz\t.exit\n\tmov\tLIMB0, [Inp1P+16]\n\tADCSBB\tLIMB0, [Inp2P+16]\n\tmov\t[SumP+16], LIMB0\n\tdec\tSizeRest\n\tjz\t.exit\n\tmov\tLIMB0, [Inp1P+24]\n\tADCSBB\tLIMB0, [Inp2P+24]\n\tmov\t[SumP+24], LIMB0\n\tdec\tSizeRest\n\tjz\t.exit\n\tlea\tInp1P, [Inp1P+32]\n\tlea\tInp2P, [Inp2P+32]\n\tlea\tSumP, [SumP+32]\n\tjmp\t.rest\n\n.exit:\n\tmov rax, 0\n\tsetc al\n\tret"
  },
  {
    "path": "mpn/x86_64/sqr_basecase.asm",
    "content": "dnl  AMD64 mpn_sqr_basecase.\n\ndnl  Contributed to the GNU project by Torbjorn Granlund.\n\ndnl  Copyright 2008, 2009, 2011, 2012 Free Software Foundation, Inc.\n\ndnl  This file is part of the GNU MP Library.\n\ndnl  The GNU MP Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 3 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The GNU MP Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ninclude(`../config.m4')\n\nC The inner loops of this code are the result of running a code generation and\nC optimization tool suite written by David Harvey and Torbjorn Granlund.\n\nC NOTES\nC   * There is a major stupidity in that we call mpn_mul_1 initially, for a\nC     large trip count.  Instead, we should follow the generic/sqr_basecase.c\nC     code which uses addmul_2s from the start, conditionally leaving a 1x1\nC     multiply to the end.  (In assembly code, one would stop invoking\nC     addmul_2s loops when perhaps 3x2s respectively a 2x2s remains.)\nC   * Another stupidity is in the sqr_diag_addlsh1 code.  It does not need to\nC     save/restore carry, instead it can propagate into the high product word.\nC   * Align more labels, should shave off a few cycles.\nC   * We can safely use 32-bit size operations, since operands with (2^32)\nC     limbs will lead to non-termination in practice.\nC   * The jump table could probably be optimized, at least for non-pic.\nC   * The special code for n <= 4 was quickly written.  It is probably too\nC     large and unnecessarily slow.\nC   * Consider combining small cases code so that the n=k-1 code jumps into the\nC     middle of the n=k code.\nC   * Avoid saving registers for small cases code.\nC   * Needed variables:\nC    n   r11  input size\nC    i   r8   work left, initially n\nC    j   r9   inner loop count\nC        r15  unused\nC    v0  r13\nC    v1  r14\nC    rp  rdi\nC    up  rsi\nC    w0  rbx\nC    w1  rcx\nC    w2  rbp\nC    w3  r10\nC    tp  r12\nC    lo  rax\nC    hi  rdx\nC        rsp\n\nC INPUT PARAMETERS\ndefine(`rp',\t  `%rdi')\ndefine(`up',\t  `%rsi')\ndefine(`n_param', `%rdx')\ndefine(`n_param32', `%edx')\n\ndefine(`n',\t`%r11')\ndefine(`n32',\t`%r11d')\ndefine(`tp',\t`%r12')\ndefine(`i',\t`%r8')\ndefine(`i32',\t`%r8d')\ndefine(`j',\t`%r9')\ndefine(`j8',\t`%r9b')\ndefine(`j32',\t`%r9d')\ndefine(`v0',\t`%r13')\ndefine(`v1',\t`%r14')\ndefine(`w0',\t`%rbx')\ndefine(`w032',\t`%ebx')\ndefine(`w1',\t`%rcx')\ndefine(`w132',\t`%ecx')\ndefine(`w2',\t`%rbp')\ndefine(`w232',\t`%ebp')\ndefine(`w3',\t`%r10')\ndefine(`w332',\t`%r10d')\n\nASM_START()\n\tTEXT\n\tALIGN(16)\nPROLOGUE(mpn_sqr_basecase)\n\tmov\tn_param32, %ecx\n\tmov\tn_param32, n32\t\tC free original n register (rdx)\n\n\tadd\t$-40, %rsp\n\n\tand\t$3, %ecx\n\tcmp\t$4, n_param32\n\tlea\t4(%rcx), %r8\n\n\tmov\t%rbx, 32(%rsp)\n\tmov\t%rbp, 24(%rsp)\n\tmov\t%r12, 16(%rsp)\n\tmov\t%r13, 8(%rsp)\n\tmov\t%r14, (%rsp)\n\n\tcmovg\t%r8, %rcx\n\n\tlea\tL(tab)(%rip), %rax\nifdef(`PIC',\n`\tmovslq\t(%rax,%rcx,4), %r10\n\tadd\t%r10, %rax\n\tjmp\t*%rax\n',`\n\tjmp\t*(%rax,%rcx,8)\n')\n\tJUMPTABSECT\n\tALIGN(8)\nL(tab):\tJMPENT(\tL(l4), L(tab))\n\tJMPENT(\tL(l1), L(tab))\n\tJMPENT(\tL(l2), L(tab))\n\tJMPENT(\tL(l3), L(tab))\n\tJMPENT(\tL(l0m4), L(tab))\n\tJMPENT(\tL(l1m4), L(tab))\n\tJMPENT(\tL(l2m4), L(tab))\n\tJMPENT(\tL(l3m4), L(tab))\n\tTEXT\n\nL(l1):\tmov\t(up), %rax\n\tmul\t%rax\n\tadd\t$40, %rsp\n\tmov\t%rax, (rp)\n\tmov\t%rdx, 8(rp)\n\tret\n\nL(l2):\tmov\t(up), %rax\n\tmov\t%rax, %r8\n\tmul\t%rax\n\tmov\t8(up), %r11\n\tmov\t%rax, (rp)\n\tmov\t%r11, %rax\n\tmov\t%rdx, %r9\n\tmul\t%rax\n\tadd\t$40, %rsp\n\tmov\t%rax, %r10\n\tmov\t%r11, %rax\n\tmov\t%rdx, %r11\n\tmul\t%r8\n\txor\t%r8, %r8\n\tadd\t%rax, %r9\n\tadc\t%rdx, %r10\n\tadc\t%r8, %r11\n\tadd\t%rax, %r9\n\tmov\t%r9, 8(rp)\n\tadc\t%rdx, %r10\n\tmov\t%r10, 16(rp)\n\tadc\t%r8, %r11\n\tmov\t%r11, 24(rp)\n\tret\n\nL(l3):\tmov\t(up), %rax\n\tmov\t%rax, %r10\n\tmul\t%rax\n\tmov\t8(up), %r11\n\tmov\t%rax, (rp)\n\tmov\t%r11, %rax\n\tmov\t%rdx, 8(rp)\n\tmul\t%rax\n\tmov\t16(up), %rcx\n\tmov\t%rax, 16(rp)\n\tmov\t%rcx, %rax\n\tmov\t%rdx, 24(rp)\n\tmul\t%rax\n\tmov\t%rax, 32(rp)\n\tmov\t%rdx, 40(rp)\n\n\tmov\t%r11, %rax\n\tmul\t%r10\n\tmov\t%rax, %r8\n\tmov\t%rcx, %rax\n\tmov\t%rdx, %r9\n\tmul\t%r10\n\txor\t%r10, %r10\n\tadd\t%rax, %r9\n\tmov\t%r11, %rax\n\tmov\t%r10, %r11\n\tadc\t%rdx, %r10\n\n\tmul\t%rcx\n\tadd\t$40, %rsp\n\tadd\t%rax, %r10\n\tadc\t%r11, %rdx\n\tadd\t%r8, %r8\n\tadc\t%r9, %r9\n\tadc\t%r10, %r10\n\tadc\t%rdx, %rdx\n\tadc\t%r11, %r11\n\tadd\t%r8, 8(rp)\n\tadc\t%r9, 16(rp)\n\tadc\t%r10, 24(rp)\n\tadc\t%rdx, 32(rp)\n\tadc\t%r11, 40(rp)\n\tret\n\nL(l4):\tmov\t(up), %rax\n\tmov\t%rax, %r11\n\tmul\t%rax\n\tmov\t8(up), %rbx\n\tmov\t%rax, (rp)\n\tmov\t%rbx, %rax\n\tmov\t%rdx, 8(rp)\n\tmul\t%rax\n\tmov\t%rax, 16(rp)\n\tmov\t%rdx, 24(rp)\n\tmov\t16(up), %rax\n\tmul\t%rax\n\tmov\t%rax, 32(rp)\n\tmov\t%rdx, 40(rp)\n\tmov\t24(up), %rax\n\tmul\t%rax\n\tmov\t%rax, 48(rp)\n\tmov\t%rbx, %rax\n\tmov\t%rdx, 56(rp)\n\n\tmul\t%r11\n\tadd\t$32, %rsp\n\tmov\t%rax, %r8\n\tmov\t%rdx, %r9\n\tmov\t16(up), %rax\n\tmul\t%r11\n\txor\t%r10, %r10\n\tadd\t%rax, %r9\n\tadc\t%rdx, %r10\n\tmov\t24(up), %rax\n\tmul\t%r11\n\txor\t%r11, %r11\n\tadd\t%rax, %r10\n\tadc\t%rdx, %r11\n\tmov\t16(up), %rax\n\tmul\t%rbx\n\txor\t%rcx, %rcx\n\tadd\t%rax, %r10\n\tadc\t%rdx, %r11\n\tadc\t$0, %rcx\n\tmov\t24(up), %rax\n\tmul\t%rbx\n\tpop\t%rbx\n\tadd\t%rax, %r11\n\tadc\t%rdx, %rcx\n\tmov\t16(up), %rdx\n\tmov\t24(up), %rax\n\tmul\t%rdx\n\tadd\t%rax, %rcx\n\tadc\t$0, %rdx\n\n\tadd\t%r8, %r8\n\tadc\t%r9, %r9\n\tadc\t%r10, %r10\n\tadc\t%r11, %r11\n\tadc\t%rcx, %rcx\n\tmov\t$0, %eax\n\tadc\t%rdx, %rdx\n\n\tadc\t%rax, %rax\n\tadd\t%r8, 8(rp)\n\tadc\t%r9, 16(rp)\n\tadc\t%r10, 24(rp)\n\tadc\t%r11, 32(rp)\n\tadc\t%rcx, 40(rp)\n\tadc\t%rdx, 48(rp)\n\tadc\t%rax, 56(rp)\n\tret\n\n\nL(l0m4):\n\tlea\t-16(rp,n,8), tp\t\tC point tp in middle of result operand\n\tmov\t(up), v0\n\tmov\t8(up), %rax\n\tlea\t(up,n,8), up\t\tC point up at end of input operand\n\n\tlea\t-4(n), i\nC Function mpn_mul_1_m3(tp, up - i, i, up[-i - 1])\n\txor\tj32, j32\n\tsub\tn, j\n\n\tmul\tv0\n\txor\tw232, w232\n\tmov\t%rax, w0\n\tmov\t16(up,j,8), %rax\n\tmov\t%rdx, w3\n\tjmp\tL(lL3)\n\n\tALIGN(16)\nL(mul_1_m3_top):\n\tadd\t%rax, w2\n\tmov\tw3, (tp,j,8)\n\tmov\t(up,j,8), %rax\n\tadc\t%rdx, w1\n\txor\tw032, w032\n\tmul\tv0\n\txor\tw332, w332\n\tmov\tw2, 8(tp,j,8)\n\tadd\t%rax, w1\n\tadc\t%rdx, w0\n\tmov\t8(up,j,8), %rax\n\tmov\tw1, 16(tp,j,8)\n\txor\tw232, w232\n\tmul\tv0\n\tadd\t%rax, w0\n\tmov\t16(up,j,8), %rax\n\tadc\t%rdx, w3\nL(lL3):\txor\tw132, w132\n\tmul\tv0\n\tadd\t%rax, w3\n\tmov\t24(up,j,8), %rax\n\tadc\t%rdx, w2\n\tmov\tw0, 24(tp,j,8)\n\tmul\tv0\n\tadd\t$4, j\n\tjs\tL(mul_1_m3_top)\n\n\tadd\t%rax, w2\n\tmov\tw3, (tp)\n\tadc\t%rdx, w1\n\tmov\tw2, 8(tp)\n\tmov\tw1, 16(tp)\n\n\tlea\teval(2*8)(tp), tp\tC tp += 2\n\tlea\t-8(up), up\n\tjmp\tL(dowhile)\n\n\nL(l1m4):\n\tlea\t8(rp,n,8), tp\t\tC point tp in middle of result operand\n\tmov\t(up), v0\t\tC u0\n\tmov\t8(up), %rax\t\tC u1\n\tlea\t8(up,n,8), up\t\tC point up at end of input operand\n\n\tlea\t-3(n), i\nC Function mpn_mul_2s_m0(tp, up - i, i, up - i - 1)\n\tlea\t-3(n), j\n\tneg\tj\n\n\tmov\t%rax, v1\t\tC u1\n\tmul\tv0\t\t\tC u0 * u1\n\tmov\t%rdx, w1\n\txor\tw232, w232\n\tmov\t%rax, 8(rp)\n\tjmp\tL(lm0)\n\n\tALIGN(16)\nL(mul_2_m0_top):\n\tmul\tv1\n\tadd\t%rax, w0\n\tadc\t%rdx, w1\n\tmov\t-24(up,j,8), %rax\n\tmov\t$0, w232\n\tmul\tv0\n\tadd\t%rax, w0\n\tmov\t-24(up,j,8), %rax\n\tadc\t%rdx, w1\n\tadc\t$0, w232\n\tmul\tv1\t\t\tC v1 * u0\n\tadd\t%rax, w1\n\tmov\tw0, -24(tp,j,8)\n\tadc\t%rdx, w2\nL(lm0):\tmov\t-16(up,j,8), %rax\tC u2, u6 ...\n\tmul\tv0\t\t\tC u0 * u2\n\tmov\t$0, w332\n\tadd\t%rax, w1\n\tadc\t%rdx, w2\n\tmov\t-16(up,j,8), %rax\n\tadc\t$0, w332\n\tmov\t$0, w032\n\tmov\tw1, -16(tp,j,8)\n\tmul\tv1\n\tadd\t%rax, w2\n\tmov\t-8(up,j,8), %rax\n\tadc\t%rdx, w3\n\tmov\t$0, w132\n\tmul\tv0\n\tadd\t%rax, w2\n\tmov\t-8(up,j,8), %rax\n\tadc\t%rdx, w3\n\tadc\t$0, w032\n\tmul\tv1\n\tadd\t%rax, w3\n\tmov\tw2, -8(tp,j,8)\n\tadc\t%rdx, w0\nL(lm2x):\tmov\t(up,j,8), %rax\n\tmul\tv0\n\tadd\t%rax, w3\n\tadc\t%rdx, w0\n\tadc\t$0, w132\n\tadd\t$4, j\n\tmov\t-32(up,j,8), %rax\n\tmov\tw3, -32(tp,j,8)\n\tjs\tL(mul_2_m0_top)\n\n\tmul\tv1\n\tadd\t%rax, w0\n\tadc\t%rdx, w1\n\tmov\tw0, -8(tp)\n\tmov\tw1, (tp)\n\n\tlea\t-16(up), up\n\tlea\teval(3*8-24)(tp), tp\tC tp += 3\n\tjmp\tL(dowhile_end)\n\n\nL(l2m4):\n\tlea\t-16(rp,n,8), tp\t\tC point tp in middle of result operand\n\tmov\t(up), v0\n\tmov\t8(up), %rax\n\tlea\t(up,n,8), up\t\tC point up at end of input operand\n\n\tlea\t-4(n), i\nC Function mpn_mul_1_m1(tp, up - (i - 1), i - 1, up[-i])\n\tlea\t-2(n), j\n\tneg\tj\n\n\tmul\tv0\n\tmov\t%rax, w2\n\tmov\t(up,j,8), %rax\n\tmov\t%rdx, w1\n\tjmp\tL(lL1)\n\n\tALIGN(16)\nL(mul_1_m1_top):\n\tadd\t%rax, w2\n\tmov\tw3, (tp,j,8)\n\tmov\t(up,j,8), %rax\n\tadc\t%rdx, w1\nL(lL1):\txor\tw032, w032\n\tmul\tv0\n\txor\tw332, w332\n\tmov\tw2, 8(tp,j,8)\n\tadd\t%rax, w1\n\tadc\t%rdx, w0\n\tmov\t8(up,j,8), %rax\n\tmov\tw1, 16(tp,j,8)\n\txor\tw232, w232\n\tmul\tv0\n\tadd\t%rax, w0\n\tmov\t16(up,j,8), %rax\n\tadc\t%rdx, w3\n\txor\tw132, w132\n\tmul\tv0\n\tadd\t%rax, w3\n\tmov\t24(up,j,8), %rax\n\tadc\t%rdx, w2\n\tmov\tw0, 24(tp,j,8)\n\tmul\tv0\n\tadd\t$4, j\n\tjs\tL(mul_1_m1_top)\n\n\tadd\t%rax, w2\n\tmov\tw3, (tp)\n\tadc\t%rdx, w1\n\tmov\tw2, 8(tp)\n\tmov\tw1, 16(tp)\n\n\tlea\teval(2*8)(tp), tp\tC tp += 2\n\tlea\t-8(up), up\n\tjmp\tL(dowhile_mid)\n\n\nL(l3m4):\n\tlea\t8(rp,n,8), tp\t\tC point tp in middle of result operand\n\tmov\t(up), v0\t\tC u0\n\tmov\t8(up), %rax\t\tC u1\n\tlea\t8(up,n,8), up\t\tC point up at end of input operand\n\n\tlea\t-5(n), i\nC Function mpn_mul_2s_m2(tp, up - i + 1, i - 1, up - i)\n\tlea\t-1(n), j\n\tneg\tj\n\n\tmov\t%rax, v1\t\tC u1\n\tmul\tv0\t\t\tC u0 * u1\n\tmov\t%rdx, w3\n\txor\tw032, w032\n\txor\tw132, w132\n\tmov\t%rax, 8(rp)\n\tjmp\tL(lm2)\n\n\tALIGN(16)\nL(mul_2_m2_top):\n\tmul\tv1\n\tadd\t%rax, w0\n\tadc\t%rdx, w1\n\tmov\t-24(up,j,8), %rax\n\tmov\t$0, w232\n\tmul\tv0\n\tadd\t%rax, w0\n\tmov\t-24(up,j,8), %rax\n\tadc\t%rdx, w1\n\tadc\t$0, w232\n\tmul\tv1\t\t\tC v1 * u0\n\tadd\t%rax, w1\n\tmov\tw0, -24(tp,j,8)\n\tadc\t%rdx, w2\n\tmov\t-16(up,j,8), %rax\n\tmul\tv0\n\tmov\t$0, w332\n\tadd\t%rax, w1\n\tadc\t%rdx, w2\n\tmov\t-16(up,j,8), %rax\n\tadc\t$0, w332\n\tmov\t$0, w032\n\tmov\tw1, -16(tp,j,8)\n\tmul\tv1\n\tadd\t%rax, w2\n\tmov\t-8(up,j,8), %rax\n\tadc\t%rdx, w3\n\tmov\t$0, w132\n\tmul\tv0\n\tadd\t%rax, w2\n\tmov\t-8(up,j,8), %rax\n\tadc\t%rdx, w3\n\tadc\t$0, w032\n\tmul\tv1\n\tadd\t%rax, w3\n\tmov\tw2, -8(tp,j,8)\n\tadc\t%rdx, w0\nL(lm2):\tmov\t(up,j,8), %rax\n\tmul\tv0\n\tadd\t%rax, w3\n\tadc\t%rdx, w0\n\tadc\t$0, w132\n\tadd\t$4, j\n\tmov\t-32(up,j,8), %rax\n\tmov\tw3, -32(tp,j,8)\n\tjs\tL(mul_2_m2_top)\n\n\tmul\tv1\n\tadd\t%rax, w0\n\tadc\t%rdx, w1\n\tmov\tw0, -8(tp)\n\tmov\tw1, (tp)\n\n\tlea\t-16(up), up\n\tjmp\tL(dowhile_mid)\n\nL(dowhile):\nC Function mpn_addmul_2s_m2(tp, up - (i - 1), i - 1, up - i)\n\tlea\t4(i), j\n\tneg\tj\n\n\tmov\t16(up,j,8), v0\n\tmov\t24(up,j,8), v1\n\tmov\t24(up,j,8), %rax\n\tmul\tv0\n\txor\tw332, w332\n\tadd\t%rax, 24(tp,j,8)\n\tadc\t%rdx, w3\n\txor\tw032, w032\n\txor\tw132, w132\n\tjmp\tL(lam2)\n\n\tALIGN(16)\nL(addmul_2_m2_top):\n\tadd\tw3, (tp,j,8)\n\tadc\t%rax, w0\n\tmov\t8(up,j,8), %rax\n\tadc\t%rdx, w1\n\tmov\t$0, w232\n\tmul\tv0\n\tadd\t%rax, w0\n\tmov\t8(up,j,8), %rax\n\tadc\t%rdx, w1\n\tadc\t$0, w232\n\tmul\tv1\t\t\t\tC v1 * u0\n\tadd\tw0, 8(tp,j,8)\n\tadc\t%rax, w1\n\tadc\t%rdx, w2\n\tmov\t16(up,j,8), %rax\n\tmov\t$0, w332\n\tmul\tv0\t\t\t\tC v0 * u1\n\tadd\t%rax, w1\n\tmov\t16(up,j,8), %rax\n\tadc\t%rdx, w2\n\tadc\t$0, w332\n\tmul\tv1\t\t\t\tC v1 * u1\n\tadd\tw1, 16(tp,j,8)\n\tadc\t%rax, w2\n\tmov\t24(up,j,8), %rax\n\tadc\t%rdx, w3\n\tmul\tv0\n\tmov\t$0, w032\n\tadd\t%rax, w2\n\tadc\t%rdx, w3\n\tmov\t$0, w132\n\tmov\t24(up,j,8), %rax\n\tadc\t$0, w032\n\tmul\tv1\n\tadd\tw2, 24(tp,j,8)\n\tadc\t%rax, w3\n\tadc\t%rdx, w0\nL(lam2):\n        mov\t32(up,j,8), %rax\n\tmul\tv0\n\tadd\t%rax, w3\n\tmov\t32(up,j,8), %rax\n\tadc\t%rdx, w0\n\tadc\t$0, w132\n\tmul\tv1\n\tadd\t$4, j\n\tjs\tL(addmul_2_m2_top)\n\n\tadd\tw3, (tp)\n\tadc\t%rax, w0\n\tadc\t%rdx, w1\n\tmov\tw0, 8(tp)\n\tmov\tw1, 16(tp)\n\n\tlea\teval(2*8)(tp), tp\tC tp += 2\n\n\tadd\t$-2, i32\t\tC i -= 2\n\nL(dowhile_mid):\nC Function mpn_addmul_2s_m0(tp, up - (i - 1), i - 1, up - i)\n\tlea\t2(i), j\n\tneg\tj\n\n\tmov\t(up,j,8), v0\n\tmov\t8(up,j,8), v1\n\tmov\t8(up,j,8), %rax\n\tmul\tv0\n\txor\tw132, w132\n\tadd\t%rax, 8(tp,j,8)\n\tadc\t%rdx, w1\n\txor\tw232, w232\n\tjmp\tL(l20)\n\n\tALIGN(16)\nL(addmul_2_m0_top):\n\tadd\tw3, (tp,j,8)\n\tadc\t%rax, w0\n\tmov\t8(up,j,8), %rax\n\tadc\t%rdx, w1\n\tmov\t$0, w232\n\tmul\tv0\n\tadd\t%rax, w0\n\tmov\t8(up,j,8), %rax\n\tadc\t%rdx, w1\n\tadc\t$0, w232\n\tmul\tv1\t\t\t\tC v1 * u0\n\tadd\tw0, 8(tp,j,8)\n\tadc\t%rax, w1\n\tadc\t%rdx, w2\nL(l20):\tmov\t16(up,j,8), %rax\n\tmov\t$0, w332\n\tmul\tv0\t\t\t\tC v0 * u1\n\tadd\t%rax, w1\n\tmov\t16(up,j,8), %rax\n\tadc\t%rdx, w2\n\tadc\t$0, w332\n\tmul\tv1\t\t\t\tC v1 * u1\n\tadd\tw1, 16(tp,j,8)\n\tadc\t%rax, w2\n\tmov\t24(up,j,8), %rax\n\tadc\t%rdx, w3\n\tmul\tv0\n\tmov\t$0, w032\n\tadd\t%rax, w2\n\tadc\t%rdx, w3\n\tmov\t$0, w132\n\tmov\t24(up,j,8), %rax\n\tadc\t$0, w032\n\tmul\tv1\n\tadd\tw2, 24(tp,j,8)\n\tadc\t%rax, w3\n\tadc\t%rdx, w0\n\tmov\t32(up,j,8), %rax\n\tmul\tv0\n\tadd\t%rax, w3\n\tmov\t32(up,j,8), %rax\n\tadc\t%rdx, w0\n\tadc\t$0, w132\n\tmul\tv1\n\tadd\t$4, j\n\tjs\tL(addmul_2_m0_top)\n\n\tadd\tw3, (tp)\n\tadc\t%rax, w0\n\tadc\t%rdx, w1\n\tmov\tw0, 8(tp)\n\tmov\tw1, 16(tp)\n\n\tlea\teval(2*8)(tp), tp\tC tp += 2\nL(dowhile_end):\n\n\tadd\t$-2, i32\t\tC i -= 2\n\tjne\tL(dowhile)\n\nC Function mpn_addmul_2s_2\n\tmov\t-16(up), v0\n\tmov\t-8(up), v1\n\tmov\t-8(up), %rax\n\tmul\tv0\n\txor\tw332, w332\n\tadd\t%rax, -8(tp)\n\tadc\t%rdx, w3\n\txor\tw032, w032\n\txor\tw132, w132\n\tmov\t(up), %rax\n\tmul\tv0\n\tadd\t%rax, w3\n\tmov\t(up), %rax\n\tadc\t%rdx, w0\n\tmul\tv1\n\tadd\tw3, (tp)\n\tadc\t%rax, w0\n\tadc\t%rdx, w1\n\tmov\tw0, 8(tp)\n\tmov\tw1, 16(tp)\n\nC Function mpn_sqr_diag_addlsh1\n\tlea\t-4(n,n), j\n\n\tmov\t8(rp), %r11\n\tlea\t-8(up), up\n\tlea\t(rp,j,8), rp\n\tneg\tj\n\tmov\t(up,j,4), %rax\n\tmul\t%rax\n\ttest\t$2, j8\n\tjnz\tL(odd)\n\nL(evn):\tadd\t%r11, %r11\n\tsbb\t%ebx, %ebx\t\tC save CF\n\tadd\t%rdx, %r11\n\tmov\t%rax, (rp,j,8)\n\tjmp\tL(ld0)\n\nL(odd):\tadd\t%r11, %r11\n\tsbb\t%ebp, %ebp\t\tC save CF\n\tadd\t%rdx, %r11\n\tmov\t%rax, (rp,j,8)\n\tlea\t-2(j), j\n\tjmp\tL(ld1)\n\n\tALIGN(16)\nL(top):\tmov\t(up,j,4), %rax\n\tmul\t%rax\n\tadd\t%ebp, %ebp\t\tC restore carry\n\tadc\t%rax, %r10\n\tadc\t%rdx, %r11\n\tmov\t%r10, (rp,j,8)\nL(ld0):\tmov\t%r11, 8(rp,j,8)\n\tmov\t16(rp,j,8), %r10\n\tadc\t%r10, %r10\n\tmov\t24(rp,j,8), %r11\n\tadc\t%r11, %r11\n\tnop\n\tsbb\t%ebp, %ebp\t\tC save CF\n\tmov\t8(up,j,4), %rax\n\tmul\t%rax\n\tadd\t%ebx, %ebx\t\tC restore carry\n\tadc\t%rax, %r10\n\tadc\t%rdx, %r11\n\tmov\t%r10, 16(rp,j,8)\nL(ld1):\tmov\t%r11, 24(rp,j,8)\n\tmov\t32(rp,j,8), %r10\n\tadc\t%r10, %r10\n\tmov\t40(rp,j,8), %r11\n\tadc\t%r11, %r11\n\tsbb\t%ebx, %ebx\t\tC save CF\n\tadd\t$4, j\n\tjs\tL(top)\n\n\tmov\t(up), %rax\n\tmul\t%rax\n\tadd\t%ebp, %ebp\t\tC restore carry\n\tadc\t%rax, %r10\n\tadc\t%rdx, %r11\n\tmov\t%r10, (rp)\n\tmov\t%r11, 8(rp)\n\tmov\t16(rp), %r10\n\tadc\t%r10, %r10\n\tsbb\t%ebp, %ebp\t\tC save CF\n\tneg\t%ebp\n\tmov\t8(up), %rax\n\tmul\t%rax\n\tadd\t%ebx, %ebx\t\tC restore carry\n\tadc\t%rax, %r10\n\tadc\t%rbp, %rdx\n\tmov\t%r10, 16(rp)\n\tmov\t%rdx, 24(rp)\n\n\tpop\t%r14\n\tpop\t%r13\n\tpop\t%r12\n\tpop\t%rbp\n\tpop\t%rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sub_err1_n.asm",
    "content": "dnl  AMD64 mpn_sub_err1_n\n\ndnl  Copyright (C) 2009, David Harvey\n\ndnl  All rights reserved.\n\ndnl  Redistribution and use in source and binary forms, with or without\ndnl  modification, are permitted provided that the following conditions are\ndnl  met:\n\ndnl  1. Redistributions of source code must retain the above copyright notice,\ndnl  this list of conditions and the following disclaimer.\n\ndnl  2. Redistributions in binary form must reproduce the above copyright\ndnl  notice, this list of conditions and the following disclaimer in the\ndnl  documentation and/or other materials provided with the distribution.\n\ndnl  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\ndnl  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\ndnl  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\ndnl  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\ndnl  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\ndnl  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\ndnl  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\ndnl  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\ndnl  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\ndnl  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\ndnl  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n\ninclude(`../config.m4')\n\nC\t     cycles/limb\nC K8,K9:\t 3.166\nC K10:\t\t ?\nC P4:\t\t ?\nC P6-15 (Core2): ?\nC P6-28 (Atom):\t ?\n\nC ret mpn_sub_err1(mp_ptr rp,mp_ptr up,mp_ptr vp,mp_ptr ep,mp_ptr_t yp,mp_size_t n,mp_limb_t cy)\nC rax                    rdi,      rsi,      rdx,      rcx,         r8           r9       8(rsp)=>r10\n\nC INPUT PARAMETERS\ndefine(`rp',\t`%rdi')\ndefine(`up',\t`%rsi')\ndefine(`vp',\t`%rdx')\ndefine(`ep',\t`%rcx')\ndefine(`yp',\t`%r8')\ndefine(`n',\t`%r9')\ndefine(`cy_param',\t`8(%rsp)')\ndefine(`el',\t`%rbx')\ndefine(`eh',\t`%rbp')\ndefine(`t0',\t`%r10')\ndefine(`t1',\t`%r11')\ndefine(`w',\t`%r12')\n\nASM_START()\n\tTEXT\n\tALIGN(16)\nPROLOGUE(mpn_sub_err1_n)\n\tmov\tcy_param, %rax  C cy\n\n\tpush\t%rbx\n\tpush\t%rbp\n\tpush\t%r12\n\n\txor    el, el          C zero el, eh\n\txor    eh, eh\n\tlea\t(rp,n,8), rp    C rp += n, up += n, vp += n\n\tlea\t(up,n,8), up\n\tlea\t(vp,n,8), vp\n\n\ttest\t$1, n           C if n is odd goto L(odd)\n\tjnz\tL(odd)\n\nL(even):\t\n\tlea\t-8(yp,n,8), yp  C yp += n - 1\n\tneg\tn               C { n = -n }\n\tjmp\tL(top)\n\n\tALIGN(16)\nL(odd):                         C n is odd, do extra iteration\n\tlea\t-16(yp,n,8), yp   C yp += n - 2\n\tneg\tn                 C { n = -n }\n\tshr\t$1, %rax          C rp[0] = up[0] - vp[0] - (cy&1)\n\tmov\t(up,n,8), w       \n\tsbb\t(vp,n,8), w\n\tcmovc\t8(yp), el         C if borrow el = *yp\n\tmov\tw, (rp,n,8)\n\tsetc\t%al               C store borrow\n\tinc\tn                 C n++\n\tjz\tL(end)            C goto end if we are done\n\n\tALIGN(16)\nL(top):\n       mov     (up,n,8), w     C rp[n] = up[n] - vp[n] - borrow\n\tshr     $1, %rax        C { restore borrow }\n\tsbb     (vp,n,8), w\n\tmov     $0, t1          C initialise t1\n\tmov     w, (rp,n,8)\n\tmov     $0, t0          C initialise t0\n\tmov     8(up,n,8), w    C rp[n+1] = up[n+1] - vp[n+1] - borrow\n\tcmovc   (yp), t0        C if borrow t0 = yp\n\tsbb     8(vp,n,8), w\n\tcmovc   -8(yp), t1      C if next borrow t1 = *(yp-1)\n\tsetc    %al             C { save borrow }\n\tadd     t0, el          C (eh:el) += borrow*yp limb\n\tadc     $0, eh\n\tadd     t1, el          C (eh:el) += next borrow*next yp limb\n\tmov     w, 8(rp,n,8)\n\tadc     $0, eh\n\tadd     $2, n           C n += 2\n\tlea     -16(yp), yp     C yp -= 2\n\tjnz     L(top)          C if not done goto top\n\nL(end):\t\n\tmov\tel, (ep)         C write out (eh:el)\n\tmov\teh, 8(ep)\n\n\tpop\t%r12\n\tpop\t%rbp\n\tpop\t%rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/sub_err2_n.asm",
    "content": "dnl  AMD64 mpn_sub_err2_n\n\ndnl  Copyright (C) 2009, David Harvey\n\ndnl  All rights reserved.\n\ndnl  Redistribution and use in source and binary forms, with or without\ndnl  modification, are permitted provided that the following conditions are\ndnl  met:\n\ndnl  1. Redistributions of source code must retain the above copyright notice,\ndnl  this list of conditions and the following disclaimer.\n\ndnl  2. Redistributions in binary form must reproduce the above copyright\ndnl  notice, this list of conditions and the following disclaimer in the\ndnl  documentation and/or other materials provided with the distribution.\n\ndnl  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\ndnl  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\ndnl  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\ndnl  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\ndnl  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\ndnl  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\ndnl  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\ndnl  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\ndnl  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\ndnl  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\ndnl  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n\ninclude(`../config.m4')\n\nC\t     cycles/limb\nC K8,K9:\t 4.5\nC K10:\t\t ?\nC P4:\t\t ?\nC P6-15 (Core2): ?\nC P6-28 (Atom):\t ?\n\nC\nC mp_limb_t mpn_sub_err2_n (* rp,* up, * vp, * ep, * yp1, * yp2, n, cy)\nC\n\nC INPUT PARAMETERS\ndefine(`rp',\t`%rdi')\ndefine(`up',\t`%rsi')\ndefine(`vp',\t`%rdx')\ndefine(`ep',\t`%rcx')\ndefine(`yp1',\t`%r8')\ndefine(`yp2',   `%r9')\ndefine(`n_param',     `8(%rsp)')\ndefine(`cy_param',    `16(%rsp)')\n\ndefine(`cy1',   `%r14')\ndefine(`cy2',   `%rax')\n\t\ndefine(`n',     `%r10')\n\ndefine(`w',     `%rbx')\ndefine(`e1l',\t`%rbp')\ndefine(`e1h',\t`%r11')\ndefine(`e2l',\t`%r12')\ndefine(`e2h',\t`%r13')\n\nASM_START()\n\tTEXT\n\tALIGN(16)\nPROLOGUE(mpn_sub_err2_n)\n\tmov\tcy_param, cy2  C cy2\n\tmov\tn_param, n     C n\n\n\tpush\t%rbx\n\tpush\t%rbp\n\tpush\t%r12\n\tpush\t%r13\n\tpush\t%r14\n\n\txor e1l, e1l       C zero e1l, elh, e2l, e2h\n\txor e1h, e1h\n\txor e2l, e2l\n\txor e2h, e2h\n\n\tsub\typ1, yp2        C yp2 -= yp1\n\n\tlea\t(rp,n,8), rp    C rp += n, up += n, vp += n\n\tlea\t(up,n,8), up\n\tlea\t(vp,n,8), vp\n\n\ttest\t$1, n           C if n is odd goto L(odd)\n\tjnz\tL(odd)\n\n\tlea\t-8(yp1,n,8), yp1  C { yp1 += n - 1 }\n\tneg\tn                 C { n = -n }\n\tjmp\tL(top)\n\n\tALIGN(16)\nL(odd):                           C n is odd, do extra iteration\n\tlea\t-16(yp1,n,8), yp1   C yp1 += n - 2\n\tneg\tn                   C { n = -n }\n\tshr\t$1, cy2             C rp[0] = up[0] - vp[0] - (cy2&1)\n\tmov\t(up,n,8), w         \n\tsbb\t(vp,n,8), w         \n\tcmovc\t8(yp1), e1l         C if borrow2 el1 = *(yp1+1) \n\tcmovc\t8(yp1,yp2), e2l     C if borrow2 e2l = *(yp2+1)\n\tmov\tw, (rp,n,8)         \n\tsbb\tcy2, cy2            C move borrow2 into cy2\n\tinc\tn                   C n++\n\tjz\tL(end)              C goto end if we are done\n\t\n\tALIGN(16)\nL(top):\n       mov     (up,n,8), w\n\tshr     $1, cy2         C restore borrow2\n\tsbb     (vp,n,8), w\n\tmov     w, (rp,n,8)     C rp[n] = up[n] - vp[n] - borrow2\n\tsbb     cy1, cy1        C generate mask, preserve CF\n\n\tmov     8(up,n,8), w    C rp[n] = up[n+1] - vp[n+1] - borrow1\n\tsbb     8(vp,n,8), w\n\tmov     w, 8(rp,n,8)\n\tsbb     cy2, cy2        C generate mask, preserve CF\n\n\tmov     (yp1), w\t   C (e1h:e1l) += cy1 * yp1 limb\n\tand     cy1, w\n\tadd     w, e1l\n\tadc     $0, e1h\n\n\tand     (yp1,yp2), cy1  C (e2h:e2l) += cy1 * yp2 limb\n\tadd     cy1, e2l\n\tadc     $0, e2h\n\n\tmov     -8(yp1), w\t   C (e1h:e1l) += cy2 * next yp1 limb\n\tand     cy2, w\n\tadd     w, e1l\n\tadc     $0, e1h\n\n\tmov     -8(yp1,yp2), w  C (e2h:e2l) += cy2 * next yp2 limb\n\tand     cy2, w\n\tadd     w, e2l\n\tadc     $0, e2h\n\n\tadd     $2, n           C n += 2\n\tlea     -16(yp1), yp1   C yp1 -= 2\n\tjnz     L(top)          C if not done goto top\nL(end):\n\n\tmov\te1l, (ep)        C write out e1l, e1h, e2l, e2h\n\tmov\te1h, 8(ep)\n\tmov\te2l, 16(ep)\n\tmov\te2h, 24(ep)\n\n\tand\t$1, %eax\t   C return carry\n\n\tpop\t%r14\n\tpop\t%r13\n\tpop\t%r12\n\tpop\t%rbp\n\tpop\t%rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64/x86_64-defs.m4",
    "content": "divert(-1)\n\ndnl  m4 macros for x86-64 assembler.\n\ndnl WARNING: only a few of the macros have been ported from x86 to x86-64\n\ndnl  This file is just an adaptation of similar file in the x86 directory.\ndnl  Adapted by P. Gaudry in April 2005.\ndnl  Here is the copyright of the original x86 version:\n\ndnl  Copyright 1999-2005, 2008, 2009, 2011-2013 Free Software Foundation, Inc.\ndnl \ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 3 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\ndnl  Usage: ret_internal\ndnl\ndnl  A plain \"ret\", without any __cyg_profile_func_exit call.  This can be\ndnl  used for a return which is internal to some function, such as when\ndnl  getting %eip for PIC.\n\ndefine(ret_internal,\nm4_assert_numargs(-1)\n``ret'')\n\ndnl  Usage: CPUVEC_FUNCS_LIST\ndnl\ndnl  A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the\ndnl  order they appear in that structure.\n\ndefine(CPUVEC_FUNCS_LIST,\n``add_err1_n',\n`add_err2_n',\n`add_n',\n`addmul_1',\n`copyd',\n`copyi',\n`divexact_1',\n`divexact_by3c',\n`divexact_byfobm1',\n`divrem_1',\n`divrem_2',\n`divrem_euclidean_qr_1',\n`divrem_euclidean_qr_2',\n`gcd_1',\n`lshift',\n`mod_1',\n`mod_34lsub1',\n`modexact_1c_odd',\n`mul_1',\n`mul_basecase',\n`mulmid_basecase',\n`preinv_divrem_1',\n`preinv_mod_1',\n`redc_1',\n`rshift',\n`sqr_basecase',\n`sub_err1_n',\n`sub_err2_n',\n`sub_n',\n`submul_1',\n`sumdiff_n'')\n\ndnl  Notes:\ndnl\ndnl  m4 isn't perfect for processing BSD style x86 assembler code, the main\ndnl  problems are,\ndnl\ndnl  1. Doing define(foo,123) and then using foo in an addressing mode like\ndnl     foo(%ebx) expands as a macro rather than a constant.  This is worked\ndnl     around by using deflit() from asm-defs.m4, instead of define().\ndnl\ndnl  2. Immediates in macro definitions need a space or `' to stop the $\ndnl     looking like a macro parameter.  For example,\ndnl\ndnl  \t        define(foo, `mov $ 123, %eax')\ndnl\ndnl     This is only a problem in macro definitions, not in ordinary text,\ndnl     and not in macro parameters like text passed to forloop() or ifdef().\n\n\ndeflit(BYTES_PER_MP_LIMB, 8)\n\n\ndnl  Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL.  We\ndnl  undefine PIC since we don't need to be position independent in this\ndnl  case and definitely don't want the ELF style _GLOBAL_OFFSET_TABLE_ etc.\n\nifdef(`DLL_EXPORT',`undefine(`PIC')')\n\n\ndnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)\ndnl\ndnl  In the x86 code we use explicit TEXT and ALIGN() calls in the code,\ndnl  since different alignments are wanted in various circumstances.  So for\ndnl  instance,\ndnl\ndnl                  TEXT\ndnl                  ALIGN(16)\ndnl          PROLOGUE(mpn_add_n)\ndnl          ...\ndnl          EPILOGUE()\n\ndefine(`PROLOGUE_cpu',\nm4_assert_numargs(1)\n\t`GLOBL\t$1\n\tTYPE($1,`function')\n$1:\nifelse(WANT_PROFILING,`no',,`call_mcount\n')')\n\n\ndnl  Usage: call_mcount\ndnl\ndnl  For `gprof' style profiling, %ebp is setup as a frame pointer.  None of\ndnl  the assembler routines use %ebp this way, so it's done only for the\ndnl  benefit of mcount.  glibc sysdeps/i386/i386-mcount.S shows how mcount\ndnl  gets the current function from (%esp) and the parent from 4(%ebp).\ndnl\ndnl  For `prof' style profiling gcc generates mcount calls without setting\ndnl  up %ebp, and the same is done here.\n\ndefine(`call_mcount',\nm4_assert_numargs(-1)\nm4_assert_defined(`WANT_PROFILING')\nm4_assert_defined(`MCOUNT_PIC_REG')\nm4_assert_defined(`MCOUNT_NONPIC_REG')\nm4_assert_defined(`MCOUNT_PIC_CALL')\nm4_assert_defined(`MCOUNT_NONPIC_CALL')\n`ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,,\n`\tDATA\n\tALIGN(4)\nL(mcount_data_`'mcount_data_counter):\n\tW32\t0\n\tTEXT\n')dnl\nifelse(WANT_PROFILING,`gprof',\n`\tpushl\t%ebp\n\tmovl\t%esp, %ebp\n')dnl\nifdef(`PIC',\n`\tpushl\t%ebx\n\tmcount_movl_GOT_ebx\nifelse(MCOUNT_PIC_REG,,,\n`\tleal\tL(mcount_data_`'mcount_data_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG')\nMCOUNT_PIC_CALL\n\tpopl\t%ebx\n',`dnl non-PIC\nifelse(MCOUNT_NONPIC_REG,,,\n`\tmovl\t`$'L(mcount_data_`'mcount_data_counter), MCOUNT_NONPIC_REG\n')dnl\nMCOUNT_NONPIC_CALL\n')dnl\nifelse(WANT_PROFILING,`gprof',\n`\tpopl\t%ebp\n')\ndefine(`mcount_data_counter',eval(mcount_data_counter+1))')\n\ndefine(mcount_data_counter,1)\n\ndnl  Called: mcount_movl_GOT_ebx\ndnl  Label H is \"here\", the %eip obtained from the call.  C is the called\ndnl  subroutine.  J is the jump across that subroutine.  A fetch and \"ret\"\ndnl  is always done so calls and returns are balanced for the benefit of the\ndnl  various x86s that have return stack branch prediction.\ndefine(mcount_movl_GOT_ebx,\nm4_assert_numargs(-1)\n`\tcall\tL(mcount_movl_GOT_ebx_C`'mcount_movl_GOT_ebx_counter)\nL(mcount_movl_GOT_ebx_H`'mcount_movl_GOT_ebx_counter):\n\tjmp\tL(mcount_movl_GOT_ebx_J`'mcount_movl_GOT_ebx_counter)\nL(mcount_movl_GOT_ebx_C`'mcount_movl_GOT_ebx_counter):\n\tmovl\t(%esp), %ebx\n\tret\nL(mcount_movl_GOT_ebx_J`'mcount_movl_GOT_ebx_counter):\n\taddl\t$_GLOBAL_OFFSET_TABLE_+[.-L(mcount_movl_GOT_ebx_H`'mcount_movl_GOT_ebx_counter)], %ebx\ndefine(`mcount_movl_GOT_ebx_counter',incr(mcount_movl_GOT_ebx_counter))')\n\ndefine(mcount_movl_GOT_ebx_counter,1)\n\n\ndnl  --------------------------------------------------------------------------\ndnl  Various x86 macros.\ndnl\n\n\ndnl  Usage: ALIGN_OFFSET(bytes,offset)\ndnl\ndnl  Align to `offset' away from a multiple of `bytes'.\ndnl\ndnl  This is useful for testing, for example align to something very strict\ndnl  and see what effect offsets from it have, \"ALIGN_OFFSET(256,32)\".\ndnl\ndnl  Generally you wouldn't execute across the padding, but it's done with\ndnl  nop's so it'll work.\n\ndefine(ALIGN_OFFSET,\nm4_assert_numargs(2)\n`ALIGN($1)\nforloop(`i',1,$2,`\tnop\n')')\n\n\ndnl  Usage: defframe(name,offset)\ndnl\ndnl  Make a definition like the following with which to access a parameter\ndnl  or variable on the stack.\ndnl\ndnl         define(name,`FRAME+offset(%esp)')\ndnl\ndnl  Actually m4_empty_if_zero(FRAME+offset) is used, which will save one\ndnl  byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp).\ndnl  Use define(`defframe_empty_if_zero_disabled',1) if for some reason the\ndnl  zero offset is wanted.\ndnl\ndnl  The new macro also gets a check that when it's used FRAME is actually\ndnl  defined, and that the final %esp offset isn't negative, which would\ndnl  mean an attempt to access something below the current %esp.\ndnl\ndnl  deflit() is used rather than a plain define(), so the new macro won't\ndnl  delete any following parenthesized expression.  name(%edi) will come\ndnl  out say as 16(%esp)(%edi).  This isn't valid assembler and should\ndnl  provoke an error, which is better than silently giving just 16(%esp).\ndnl\ndnl  See README for more on the suggested way to access the stack frame.\n\ndefine(defframe,\nm4_assert_numargs(2)\n`deflit(`$1',\nm4_assert_defined(`FRAME')\n`defframe_check_notbelow(`$1',$2,FRAME)dnl\ndefframe_empty_if_zero(FRAME+($2))(%rsp)')')\n\ndnl  Called: defframe_empty_if_zero(expression)\ndefine(defframe_empty_if_zero,\nm4_assert_numargs(1)\n`ifelse(defframe_empty_if_zero_disabled,1,\n`eval($1)',\n`m4_empty_if_zero($1)')')\n\ndnl  Called: defframe_check_notbelow(`name',offset,FRAME)\ndefine(defframe_check_notbelow,\nm4_assert_numargs(3)\n`ifelse(eval(($3)+($2)<0),1,\n`m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes\n')')')\n\n\ndnl  Usage: FRAME_pushl()\ndnl         FRAME_popl()\ndnl         FRAME_addl_esp(n)\ndnl         FRAME_subl_esp(n)\ndnl\ndnl  Adjust FRAME appropriately for a pushl or popl, or for an addl or subl\ndnl  %esp of n bytes.\ndnl\ndnl  Using these macros is completely optional.  Sometimes it makes more\ndnl  sense to put explicit deflit(`FRAME',N) forms, especially when there's\ndnl  jumps and different sequences of FRAME values need to be used in\ndnl  different places.\n\ndefine(FRAME_pushq,\nm4_assert_numargs(0)\nm4_assert_defined(`FRAME')\n`deflit(`FRAME',eval(FRAME+8))')\n\ndefine(FRAME_popq,\nm4_assert_numargs(0)\nm4_assert_defined(`FRAME')\n`deflit(`FRAME',eval(FRAME-8))')\n\ndefine(FRAME_addq_rsp,\nm4_assert_numargs(1)\nm4_assert_defined(`FRAME')\n`deflit(`FRAME',eval(FRAME-($1)))')\n\ndefine(FRAME_subq_rsp,\nm4_assert_numargs(1)\nm4_assert_defined(`FRAME')\n`deflit(`FRAME',eval(FRAME+($1)))')\n\n\ndnl  Usage: defframe_pushl(name)\ndnl\ndnl  Do a combination FRAME_pushl() and a defframe() to name the stack\ndnl  location just pushed.  This should come after a pushl instruction.\ndnl  Putting it on the same line works and avoids lengthening the code.  For\ndnl  example,\ndnl\ndnl         pushl   %eax     defframe_pushl(VAR_COUNTER)\ndnl\ndnl  Notice the defframe() is done with an unquoted -FRAME thus giving its\ndnl  current value without tracking future changes.\n\ndefine(defframe_pushq,\nm4_assert_numargs(1)\n`FRAME_pushl()defframe(`$1',-FRAME)')\n\n\ndnl  --------------------------------------------------------------------------\ndnl  Assembler instruction macros.\ndnl\n\n\ndnl  Usage: x86_lookup(target, key,value, key,value, ...)\ndnl         x86_lookup_p(target, key,value, key,value, ...)\ndnl\ndnl  Look for `target' among the `key' parameters.\ndnl\ndnl  x86_lookup expands to the corresponding `value', or generates an error\ndnl  if `target' isn't found.\ndnl\ndnl  x86_lookup_p expands to 1 if `target' is found, or 0 if not.\n\ndefine(x86_lookup,\nm4_assert_numargs_range(1,999)\n`ifelse(eval($#<3),1,\n`m4_error(`unrecognised part of x86 instruction: $1\n')',\n`ifelse(`$1',`$2', `$3',\n`x86_lookup(`$1',shift(shift(shift($@))))')')')\n\ndefine(x86_lookup_p,\nm4_assert_numargs_range(1,999)\n`ifelse(eval($#<3),1, `0',\n`ifelse(`$1',`$2',    `1',\n`x86_lookup_p(`$1',shift(shift(shift($@))))')')')\n\n\ndnl  Usage: x86_opcode_reg32(reg)\ndnl         x86_opcode_reg32_p(reg)\ndnl\ndnl  x86_opcode_reg32 expands to the standard 3 bit encoding for the given\ndnl  32-bit register, eg. `%ebp' turns into 5.\ndnl\ndnl  x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0\ndnl  if not.\n\ndefine(x86_opcode_reg32,\nm4_assert_numargs(1)\n`x86_lookup(`$1',x86_opcode_reg32_list)')\n\ndefine(x86_opcode_reg32_p,\nm4_assert_onearg()\n`x86_lookup_p(`$1',x86_opcode_reg32_list)')\n\ndefine(x86_opcode_reg32_list,\n``%eax',0,\n`%ecx',1,\n`%edx',2,\n`%ebx',3,\n`%esp',4,\n`%ebp',5,\n`%esi',6,\n`%edi',7')\n\n\ndnl  Usage: x86_opcode_tttn(cond)\ndnl\ndnl  Expand to the 4-bit \"tttn\" field value for the given x86 branch\ndnl  condition (like `c', `ae', etc).\n\ndefine(x86_opcode_tttn,\nm4_assert_numargs(1)\n`x86_lookup(`$1',x86_opcode_ttn_list)')\n\ndefine(x86_opcode_tttn_list,\n``o',  0,\n`no',  1,\n`b',   2, `c',  2, `nae',2,\n`nb',  3, `nc', 3, `ae', 3,\n`e',   4, `z',  4,\n`ne',  5, `nz', 5,\n`be',  6, `na', 6,\n`nbe', 7, `a',  7,\n`s',   8,\n`ns',  9,\n`p',  10, `pe', 10, `npo',10,\n`np', 11, `npe',11, `po', 11,\n`l',  12, `nge',12,\n`nl', 13, `ge', 13,\n`le', 14, `ng', 14,\n`nle',15, `g',  15')\n\n\ndnl  Usage: x86_opcode_regmmx(reg)\ndnl\ndnl  Validate the given mmx register, and return its number, 0 to 7.\n\ndefine(x86_opcode_regmmx,\nm4_assert_numargs(1)\n`x86_lookup(`$1',x86_opcode_regmmx_list)')\n\ndefine(x86_opcode_regmmx_list,\n``%mm0',0,\n`%mm1',1,\n`%mm2',2,\n`%mm3',3,\n`%mm4',4,\n`%mm5',5,\n`%mm6',6,\n`%mm7',7')\n\ndnl  Usage: Zdisp(inst,op,op,op)\ndnl\ndnl  Generate explicit .byte sequences if necessary to force a byte-sized\ndnl  zero displacement on an instruction.  For example,\ndnl\ndnl         Zdisp(  movl,   0,(%esi), %eax)\ndnl\ndnl  expands to\ndnl\ndnl                 .byte   139,70,0  C movl 0(%esi), %eax\ndnl\ndnl  If the displacement given isn't 0, then normal assembler code is\ndnl  generated.  For example,\ndnl\ndnl         Zdisp(  movl,   4,(%esi), %eax)\ndnl\ndnl  expands to\ndnl\ndnl                 movl    4(%esi), %eax\ndnl\ndnl  This means a single Zdisp() form can be used with an expression for the\ndnl  displacement, and .byte will be used only if necessary.  The\ndnl  displacement argument is eval()ed.\ndnl\ndnl  Because there aren't many places a 0(reg) form is wanted, Zdisp is\ndnl  implemented with a table of instructions and encodings.  A new entry is\ndnl  needed for any different operation or registers.  The table is split\ndnl  into separate macros to avoid overflowing BSD m4 macro expansion space.\n\ndefine(Zdisp,\nm4_assert_numargs(4)\n`define(`Zdisp_found',0)dnl\nZdisp_1($@)dnl\nZdisp_2($@)dnl\nZdisp_3($@)dnl\nZdisp_4($@)dnl\nZdisp_5($@)dnl\nifelse(Zdisp_found,0,\n`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4\n')')')\n\ndefine(Zdisp_1,`dnl\nZdisp_match( adcl, 0,(%edx), %eax,        `0x13,0x42,0x00',           $@)`'dnl\nZdisp_match( adcl, 0,(%edx), %ebx,        `0x13,0x5a,0x00',           $@)`'dnl\nZdisp_match( adcl, 0,(%edx), %esi,        `0x13,0x72,0x00',           $@)`'dnl\nZdisp_match( addl, %ebx, 0,(%edi),        `0x01,0x5f,0x00',           $@)`'dnl\nZdisp_match( addl, %ecx, 0,(%edi),        `0x01,0x4f,0x00',           $@)`'dnl\nZdisp_match( addl, %esi, 0,(%edi),        `0x01,0x77,0x00',           $@)`'dnl\nZdisp_match( sbbl, 0,(%edx), %eax,        `0x1b,0x42,0x00',           $@)`'dnl\nZdisp_match( sbbl, 0,(%edx), %esi,        `0x1b,0x72,0x00',           $@)`'dnl\nZdisp_match( subl, %ecx, 0,(%edi),        `0x29,0x4f,0x00',           $@)`'dnl\nZdisp_match( movzbl, 0,(%eax,%ebp), %eax, `0x0f,0xb6,0x44,0x28,0x00', $@)`'dnl\nZdisp_match( movzbl, 0,(%ecx,%edi), %edi, `0x0f,0xb6,0x7c,0x39,0x00', $@)`'dnl\n')\ndefine(Zdisp_2,`dnl\nZdisp_match( movl, %eax, 0,(%edi),        `0x89,0x47,0x00',           $@)`'dnl\nZdisp_match( movl, %ebx, 0,(%edi),        `0x89,0x5f,0x00',           $@)`'dnl\nZdisp_match( movl, %esi, 0,(%edi),        `0x89,0x77,0x00',           $@)`'dnl\nZdisp_match( movl, 0,(%ebx), %eax,        `0x8b,0x43,0x00',           $@)`'dnl\nZdisp_match( movl, 0,(%ebx), %esi,        `0x8b,0x73,0x00',           $@)`'dnl\nZdisp_match( movl, 0,(%edx), %eax,        `0x8b,0x42,0x00',           $@)`'dnl\nZdisp_match( movl, 0,(%esi), %eax,        `0x8b,0x46,0x00',           $@)`'dnl\nZdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00',      $@)`'dnl\n')\ndefine(Zdisp_3,`dnl\nZdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl\nZdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl\nZdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl\nZdisp_match( movq, 0,(%ebx,%ecx,4), %mm0, `0x0f,0x6f,0x44,0x8b,0x00', $@)`'dnl\nZdisp_match( movq, 0,(%edx), %mm0,        `0x0f,0x6f,0x42,0x00',      $@)`'dnl\nZdisp_match( movq, 0,(%esi), %mm0,        `0x0f,0x6f,0x46,0x00',      $@)`'dnl\nZdisp_match( movq, %mm0, 0,(%edi),        `0x0f,0x7f,0x47,0x00',      $@)`'dnl\nZdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl\nZdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl\nZdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl\n')\ndefine(Zdisp_4,`dnl\nZdisp_match( movd, 0,(%eax,%ecx,4), %mm0, `0x0f,0x6e,0x44,0x88,0x00', $@)`'dnl\nZdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl\nZdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl\nZdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl\nZdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl\nZdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl\nZdisp_match( movd, %mm0, 0,(%edx,%ecx,4), `0x0f,0x7e,0x44,0x8a,0x00', $@)`'dnl\n')\ndefine(Zdisp_5,`dnl\nZdisp_match( movq, 0,(%rsi), %rax, `0x48,0x8b,0x46,0x00', $@)`'dnl\nZdisp_match( addq, %rcx, 0, (%rdi), `0x48,0x01,0x4f,0x00', $@)`'dnl\nZdisp_match( addq, %rbx, 0, (%rdi), `0x48,0x01,0x5f,0x00', $@)`'dnl\nZdisp_match( subq, %rcx, 0, (%rdi), `0x48,0x29,0x4f,0x00', $@)`'dnl\nZdisp_match( movq, 0,(%rbx), %rsi, `0x48,0x8b,0x73,0x00', $@)`'dnl\nZdisp_match( movq, %rsi, 0,(%rdi), `0x48,0x89,0x77,0x00', $@)`'dnl\nZdisp_match( adcq, 0, (%rdx), %rsi, `0x48,0x13,0x72,0x00', $@)`'dnl\nZdisp_match( sbbq, 0, (%rdx), %rsi, `0x48,0x1b,0x72,0x00', $@)`'dnl\n')\n\n\ndefine(Zdisp_match,\nm4_assert_numargs(9)\n`ifelse(eval(m4_stringequal_p(`$1',`$6')\n\t&& m4_stringequal_p(`$2',0)\n\t&& m4_stringequal_p(`$3',`$8')\n\t&& m4_stringequal_p(`$4',`$9')),1,\n`define(`Zdisp_found',1)dnl\nifelse(eval(`$7'),0,\n`\t.byte\t$5  C `$1 0$3, $4'',\n`\t$6\t$7$8, $9')',\n\n`ifelse(eval(m4_stringequal_p(`$1',`$6')\n\t&& m4_stringequal_p(`$2',`$7')\n\t&& m4_stringequal_p(`$3',0)\n\t&& m4_stringequal_p(`$4',`$9')),1,\n`define(`Zdisp_found',1)dnl\nifelse(eval(`$8'),0,\n`\t.byte\t$5  C `$1 $2, 0$4'',\n`\t$6\t$7, $8$9')')')')\n\n\ndnl  Usage: shldl(count,src,dst)\ndnl         shrdl(count,src,dst)\ndnl         shldw(count,src,dst)\ndnl         shrdw(count,src,dst)\ndnl\ndnl  Generate a double-shift instruction, possibly omitting a %cl count\ndnl  parameter if that's what the assembler requires, as indicated by\ndnl  WANT_SHLDL_CL in config.m4.  For example,\ndnl\ndnl         shldl(  %cl, %eax, %ebx)\ndnl\ndnl  turns into either\ndnl\ndnl         shldl   %cl, %eax, %ebx\ndnl  or\ndnl         shldl   %eax, %ebx\ndnl\ndnl  Immediate counts are always passed through unchanged.  For example,\ndnl\ndnl         shrdl(  $2, %esi, %edi)\ndnl  becomes\ndnl         shrdl   $2, %esi, %edi\ndnl\ndnl\ndnl  If you forget to use the macro form \"shldl( ...)\" and instead write\ndnl  just a plain \"shldl ...\", an error results.  This ensures the necessary\ndnl  variant treatment of %cl isn't accidentally bypassed.\n\ndefine(define_shd_instruction,\nm4_assert_numargs(1)\n`define($1,\nm4_instruction_wrapper()\nm4_assert_numargs(3)\n`shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl\nm4_doublequote($`'2),m4_doublequote($`'3)))')\n\ndnl  Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc\ndefine_shd_instruction(shldl)\ndefine_shd_instruction(shrdl)\ndefine_shd_instruction(shldw)\ndefine_shd_instruction(shrdw)\n\ndnl  Called: shd_instruction(op,count,src,dst)\ndefine(shd_instruction,\nm4_assert_numargs(4)\nm4_assert_defined(`WANT_SHLDL_CL')\n`ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,\n``$1'\t`$3', `$4'',\n``$1'\t`$2', `$3', `$4'')')\n\n\ndnl  Usage: ASSERT([cond][,instructions])\ndnl\ndnl  If WANT_ASSERT is 1, output the given instructions and expect the given\ndnl  flags condition to then be satisfied.  For example,\ndnl\ndnl         ASSERT(ne, `cmpl %eax, %ebx')\ndnl\ndnl  The instructions can be omitted to just assert a flags condition with\ndnl  no extra calculation.  For example,\ndnl\ndnl         ASSERT(nc)\ndnl\ndnl  When `instructions' is not empty, a pushf/popf is added to preserve the\ndnl  flags, but the instructions themselves must preserve any registers that\ndnl  matter.  FRAME is adjusted for the push and pop, so the instructions\ndnl  given can use defframe() stack variables.\ndnl\ndnl  The condition can be omitted to just output the given instructions when\ndnl  assertion checking is wanted.  In this case the pushf/popf is omitted.\ndnl  For example,\ndnl\ndnl         ASSERT(, `movl %eax, VAR_KEEPVAL')\n\ndefine(ASSERT,\nm4_assert_numargs_range(1,2)\n`ifelse(WANT_ASSERT,1,\n`ifelse(`$1',,\n\t`$2',\n\t`C ASSERT\nifelse(`$2',,,`\tpushf\tifdef(`FRAME',`FRAME_pushl()')')\n\t$2\n\tj`$1'\tL(ASSERT_ok`'ASSERT_counter)\n\tud2\tC assertion failed\nL(ASSERT_ok`'ASSERT_counter):\nifelse(`$2',,,`\tpopf\tifdef(`FRAME',`FRAME_popl()')')\ndefine(`ASSERT_counter',incr(ASSERT_counter))')')')\n\ndefine(ASSERT_counter,1)\n\ndefine(`R32',\n        `ifelse($1,`%rax',`%eax',\n                $1,`%rbx',`%ebx',\n                $1,`%rcx',`%ecx',\n                $1,`%rdx',`%edx',\n                $1,`%rsi',`%esi',\n                $1,`%rdi',`%edi',\n                $1,`%rbp',`%ebp',\n                $1,`%r8',`%r8d',\n                $1,`%r9',`%r9d',\n                $1,`%r10',`%r10d',\n                $1,`%r11',`%r11d',\n                $1,`%r12',`%r12d',\n                $1,`%r13',`%r13d',\n                $1,`%r14',`%r14d',\n                $1,`%r15',`%r15d')')\n\ndefine(`R8',\n        `ifelse($1,`%rax',`%al',\n                $1,`%rbx',`%bl',\n                $1,`%rcx',`%cl',\n                $1,`%rdx',`%dl',\n                $1,`%rsi',`%sil',\n                $1,`%rdi',`%dil',\n                $1,`%rbp',`%bpl',\n                $1,`%r8',`%r8b',\n                $1,`%r9',`%r9b',\n                $1,`%r10',`%r10b',\n                $1,`%r11',`%r11b',\n                $1,`%r12',`%r12b',\n                $1,`%r13',`%r13b',\n                $1,`%r14',`%r14b',\n                $1,`%r15',`%r15b')')\n\ndefine(`JUMPTABSECT', `.section .data.rel.ro.local,\"aw\",@progbits')\n\n\ndnl  Usage: JMPENT(targlabel,tablabel)\n\ndefine(`JMPENT',`dnl\nifdef(`PIC',\n        `.long  $1-$2'\n,\n        `.quad  $1'\n)')\n\ndnl  Usage: movl_text_address(label,register)\ndnl\ndnl  Get the address of a text segment label, using either a plain movl or a\ndnl  position-independent calculation, as necessary.  For example,\ndnl\ndnl         movl_code_address(L(foo),%eax)\ndnl\ndnl  This macro is only meant for use in ASSERT()s or when testing, since\ndnl  the PIC sequence it generates will want to be done with a ret balancing\ndnl  the call on CPUs with return address branch predition.\ndnl\ndnl  The addl generated here has a backward reference to the label, and so\ndnl  won't suffer from the two forwards references bug in old gas (described\ndnl  in mpn/x86/README).\n\ndefine(movl_text_address,\nm4_assert_numargs(2)\n`ifdef(`PIC',\n\t`call\tL(movl_text_address_`'movl_text_address_counter)\nL(movl_text_address_`'movl_text_address_counter):\n\tpopl\t$2\tC %eip\n\taddl\t`$'$1-L(movl_text_address_`'movl_text_address_counter), $2\ndefine(`movl_text_address_counter',incr(movl_text_address_counter))',\n\t`movl\t`$'$1, $2')')\n\ndefine(movl_text_address_counter,1)\n\n\ndnl  Usage: notl_or_xorl_GMP_NUMB_MASK(reg)\ndnl\ndnl  Expand to either \"notl `reg'\" or \"xorl $GMP_NUMB_BITS,`reg'\" as\ndnl  appropriate for nails in use or not.\n\ndefine(notl_or_xorl_GMP_NUMB_MASK,\nm4_assert_numargs(1)\n`ifelse(GMP_NAIL_BITS,0,\n`notl\t`$1'',\n`xorl\t$GMP_NUMB_MASK, `$1'')')\n\ndefine(`mulx', `ifelse(`$#',3,``mulx' $1,$2,$3',`$#',4,``mulx' $1$2,$3,$4',``mulx'')')\ndefine(`adcx', `ifelse(`$#',2,``adcx' $1,$2',`$#',3,``adcx' $1$2,$3',``adcx'')')\ndefine(`adox', `ifelse(`$#',2,``adox' $1,$2',`$#',3,``adox' $1$2,$3',``adox'')')\ndefine(`ABI_SUPPORT', `')\ndefine(`FUNC_ENTRY', `')\ndefine(`FUNC_EXIT', `')\ndefine(`IFDOS', `')\n\ndivert`'dnl\n"
  },
  {
    "path": "mpn/x86_64w/add_err1_n.asm",
    "content": "; PROLOGUE(mpn_add_err1_n)\n\n;  Copyright (C) 2009, David Harvey\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  All rights reserved.\n;\n;  Redistribution and use in source and binary forms, with or without\n;  modification, are permitted provided that the following conditions are\n;  met:\n;  1. Redistributions of source code must retain the above copyright notice,\n;  this list of conditions and the following disclaimer.\n;\n;  2. Redistributions in binary form must reproduce the above copyright\n;  notice, this list of conditions and the following disclaimer in the\n;  documentation and/or other materials provided with the distribution.\n;\n;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\n;  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n;  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\n;  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\n;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\n;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n;\n;  mp_limb_t mpn_add_err1_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_sub_err1_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                         rdi     rsi     rdx     rcx       r8        r9     8(rsp)\n;  rax                         rcx     rdx      r8      r9 [rsp+40]  [rsp+48]   [rsp+56]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12\n\n%macro fun 2\n\txalign  16\n    FRAME_PROC %1, 0, reg_save_list\n    mov     rax, qword [rsp+stack_use+48]\n    lea     rdi, [rcx+rax*8]\n    lea     rsi, [rdx+rax*8]\n    lea     rdx, [r8+rax*8]\n    mov     rcx, r9\n    mov     r9, rax\n    mov     r8, [rsp+stack_use+40]\n    mov     rax, [rsp+stack_use+56]\n    \n\txor     rbx, rbx\n\txor     rbp, rbp\n\ttest    r9, 1\n\tjnz     %%2\n%%1:lea     r8, [r8+r9*8-8]\n\tneg     r9\n\tjmp     %%3\n\n\txalign  16\n%%2:lea     r8, [r8+r9*8-16]\n\tneg     r9\n\tshr     rax, 1\n\tmov     r12, [rsi+r9*8]\n\t%2      r12, [rdx+r9*8]\n\tcmovc   rbx, [r8+8]\n\tmov     [rdi+r9*8], r12\n\tsetc    al\n\tinc     r9\n\tjz      %%4\n\n\txalign  16\n%%3:mov     r12, [rsi+r9*8]\n\tshr     rax, 1\n\t%2      r12, [rdx+r9*8]\n\tmov     r11, 0\n\tmov     [rdi+r9*8], r12\n\tmov     r10, 0\n\tmov     r12, [rsi+r9*8+8]\n\tcmovc   r10, [r8]\n\t%2      r12, [rdx+r9*8+8]\n\tcmovc   r11, [r8-8]\n\tsetc    al\n\tadd     rbx, r10\n\tadc     rbp, 0\n\tadd     rbx, r11\n\tmov     [rdi+r9*8+8], r12\n\tadc     rbp, 0\n\tadd     r9, 2\n\tlea     r8, [r8-16]\n\tjnz     %%3\n%%4:mov     [rcx], rbx\n\tmov     [rcx+8], rbp\n    END_PROC reg_save_list\n%endmacro\n\n\n    CPU  Athlon64\n    BITS 64\n;   global __gmpn_add_err1_n\n\n    fun mpn_add_err1_n, adc\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/add_err2_n.asm",
    "content": "; PROLOGUE(mpn_add_err2_n)\n;\n;  AMD64 mpn_add_err2_n, mpn_sub_err2_n\n;\n;  Copyright (C) 2009, David Harvey\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  All rights reserved.\n;\n;  Redistribution and use in source and binary forms, with or without\n;  modification, are permitted provided that the following conditions are\n;  met:\n;\n;  1. Redistributions of source code must retain the above copyright notice,\n;  this list of conditions and the following disclaimer.\n;\n;  2. Redistributions in binary form must reproduce the above copyright\n;  notice, this list of conditions and the following disclaimer in the\n;  documentation and/or other materials provided with the distribution.\n;\n;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\n;  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n;  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\n;  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\n;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\n;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n;\n;  mp_limb_t mpn_add_err2_n (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_ptr, mp_size_t, mp_limb_t);\n;  mp_limb_t mpn_sub_err2_n (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_ptr, mp_size_t, mp_limb_t);\n;  rax                          rdi     rsi     rdx     rcx       r8      r9     8(rsp)    16(rsp)\n;  rax                          rcx     rdx      r8      r9 [rsp+40] [rsp+48]  [rsp+56]   [rsp+64]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14\n\n%macro fun 2\n\txalign  16\n    FRAME_PROC %1, 0, reg_save_list\n    mov     rax, qword [rsp+stack_use+56]\n    lea     rdi, [rcx+rax*8]\n    lea     rsi, [rdx+rax*8]\n    lea     rdx, [r8+rax*8]\n    mov     rcx, r9\n    mov     r10, rax\n    mov     r8, [rsp+stack_use+40]\n    mov     r9, [rsp+stack_use+48]\n    mov     rax, [rsp+stack_use+64]\n    \n\txor     rbp, rbp\n\txor     r11, r11\n\txor     r12, r12\n\txor     r13, r13\n\tsub     r9, r8\n\ttest    r10, 1\n\tjnz     %%1\n\tlea     r8, [r8+r10*8-8]\n\tneg     r10\n\tjmp     %%2\n\n\txalign  16\n%%1:lea     r8, [r8+r10*8-16]\n\tneg     r10\n\tshr     rax, 1\n\tmov     rbx, [rsi+r10*8]\n\t%2      rbx, [rdx+r10*8]\n\tcmovc   rbp, [r8+8]\n\tcmovc   r12, [r8+r9+8]\n\tmov     [rdi+r10*8], rbx\n\tsbb     rax, rax\n\tinc     r10\n\tjz      %%3\n\n\txalign  16\n%%2:mov     rbx, [rsi+r10*8]\n\tshr     rax, 1\n\t%2      rbx, [rdx+r10*8]\n\tmov     [rdi+r10*8], rbx\n\tsbb     r14, r14\n\tmov     rbx, [rsi+r10*8+8]\n\t%2      rbx, [rdx+r10*8+8]\n\tmov     [rdi+r10*8+8], rbx\n\tsbb     rax, rax\n    mov     rbx, [r8]\n\tand     rbx, r14\n\tadd     rbp, rbx\n\tadc     r11, 0\n    and     r14, [r8+r9]\n\tadd     r12, r14\n\tadc     r13, 0\n    mov     rbx, [r8-8]\n\tand     rbx, rax\n\tadd     rbp, rbx\n\tadc     r11, 0\n    mov     rbx, [r8+r9-8]\n\tand     rbx, rax\n\tadd     r12, rbx\n\tadc     r13, 0\n\tadd     r10, 2\n\tlea     r8, [r8-16]\n\tjnz     %%2\n%%3:mov     [rcx], rbp\n\tmov     [rcx+8], r11\n\tmov     [rcx+16], r12\n\tmov     [rcx+24], r13\n\tand     eax, 1\n    END_PROC reg_save_list\n%endmacro\n\n    CPU  Athlon64\n    BITS 64\n;\t\tglobal __gmpn_add_err2_n\n\n    fun mpn_add_err2_n, adc\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/addmul_2.asm",
    "content": "; PROLOGUE(mpn_addmul_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addmul_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n\tFRAME_PROC mpn_addmul_2, 0, reg_save_list\n\tmov     rdi, rcx\n\tmov     rsi, rdx\n\tmov     rax, r8\n\t\n\tmov     rcx, [r9]\n\tmov     r8, [r9+8]\n\tmov     rbx, 4\n\tsub     rbx, rax\n\tlea     rsi, [rsi+rax*8-32]\n\tlea     rdi, [rdi+rax*8-32]\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1: mov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tcmp     rbx, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n\tjmp     .7\n\n\txalign  16\n.4: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6: add     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/add_n.asm",
    "content": "; PROLOGUE(mpn_add_n)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  Calculate src1[size] plus(minus) src2[size] and store the result in\n;  dst[size].  The return value is the carry bit from the top of the result\n;  (1 or 0).  The _nc version accepts 1 or 0 for an initial carry into the\n;  low limb of the calculation.  Note values other than 1 or 0 here will\n;  lead to garbage results.\n;\n;  mp_limb_t  mpn_add_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_add_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    xalign  8\n    LEAF_PROC mpn_add_nc\n    mov     r10,[rsp+40]\n    jmp     entry\n\n    xalign  8\n    LEAF_PROC mpn_add_n\n    xor     r10, r10\nentry:\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    lea     r9,[r10+r9*2]\n\tsar     r9, 1\n    jnz     .2\n\n    mov     r10, [rdx]\n    adc     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+8]\n    adc     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+16]\n    adc     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.1: adc     rax, rax\n    ret\n\n    xalign  8\n.2: mov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    adc     r10, [r8]\n    adc     r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     r10, [rdx-16]\n    mov     r11, [rdx-8]\n    adc     r10, [r8-16]\n    adc     r11, [r8-8]\n    mov     [rcx-16], r10\n    dec     r9\n    mov     [rcx-8], r11\n    jnz     .2\n\n    inc     rax\n    dec     rax\n    jz      .3\n    mov     r10, [rdx]\n    adc     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+8]\n    adc     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+16]\n    adc     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.3: adc     rax, rax\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/addlsh1_n.asm",
    "content": "; PROLOGUE(mpn_addlsh1_n)\n\n;  Copyright 2008 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addlsh1_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\t\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_addlsh1_n, 0, reg_save_list\n    mov     rax, r9\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n\tlea     rdx, [r8+rax*8]\n    mov     rcx, rax\n\tneg     rcx\n\txor     r9, r9\n\txor     rax, rax\n\ttest    rcx, 3\n\tjz      .2\n.1:\tmov     r10, [rdx+rcx*8]\n\tadd     r9, 1\n\tadc     r10, r10\n\tsbb     r9, r9\n\tadd     rax, 1\n\tadc     r10, [rsi+rcx*8]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tinc     rcx\n\ttest    rcx, 3\n\tjnz     .1\n.2:\tcmp     rcx, 0\n\tjz      .4\n\t\n\txalign  16\n.3:\tmov     r10, [rdx+rcx*8]\n\tmov     rbx, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8+16]\n\tmov     r8, [rdx+rcx*8+24]\n\tadd     r9, 1\n\tadc     r10, r10\n\tadc     rbx, rbx\n\tadc     r11, r11\n\tadc     r8, r8\n\tsbb     r9, r9\n\tadd     rax, 1\n\tadc     r10, [rsi+rcx*8]\n\tadc     rbx, [rsi+rcx*8+8]\n\tadc     r11, [rsi+rcx*8+16]\n\tadc     r8, [rsi+rcx*8+24]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tmov     [rdi+rcx*8+8], rbx\n\tmov     [rdi+rcx*8+16], r11\n\tmov     [rdi+rcx*8+24], r8\n\tadd     rcx, 4\n\tjnz     .3\n.4:\tadd     rax, r9\n\tneg     rax\n    END_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/addlsh_n.asm",
    "content": "; PROLOGUE(mpn_addlsh_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_addlsh_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint, mp_limb_t)\n;  mp_limb_t mpn_addlsh_nc(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                        rdi     rsi     rdx        rcx       r8         r9\n;  rax                        rcx     rdx      r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n\tLEAF_PROC mpn_addlsh_n\n\tmov     r10, r9\n\txor     r9, r9\n    jmp     entry\n    \n\tLEAF_PROC mpn_addlsh_nc\n\tmov     r10, r9\n\tmov     r9, [rsp+48]\n    jmp     entry\n    \n    xalign 16\nentry:\n\tFRAME_PROC ?mpn_addlsh, 0, reg_save_list\n\tlea     rdi, [rcx+r10*8]\n\tlea     rsi, [rdx+r10*8]\n\tlea     rdx, [r8+r10*8]\n\tmov     ecx, dword [rsp+stack_use+40]\n\n\tneg     rcx\n\tshr     r9, cl\n\tneg     r10\n\txor     rax, rax\n\ttest    r10, 3\n\tjz      .2\n.1:\tmov     r8, [rdx+r10*8]\n\tmov     r11, r8\n\tneg     rcx\n\tshl     r8, cl\n\tneg     rcx\n\tshr     r11, cl\n\tor      r8, r9\n\tmov     r9, r11\n\tadd     rax, 1\n\tadc     r8, [rsi+r10*8]\n\tsbb     rax, rax\n\tmov     [rdi+r10*8], r8\n\tinc     r10\n\ttest    r10, 3\n\tjnz     .1\n.2:\tcmp     r10, 0\n\tjz      .4\n\t\n\txalign  16\n.3:\tmov     r8, [rdx+r10*8]\n\tmov     rbp, [rdx+r10*8+8]\n\tmov     rbx, [rdx+r10*8+16]\n\tmov     r12, [rdx+r10*8+24]\n\tmov     r11, r8\n\tmov     r13, rbp\n\tmov     r14, rbx\n\tmov     r15, r12\n\tneg     rcx\n\tshl     r8, cl\n\tshl     rbp, cl\n\tshl     rbx, cl\n\tshl     r12, cl\n\tneg     rcx\n\tshr     r11, cl\n\tshr     r13, cl\n\tshr     r14, cl\n\tshr     r15, cl\n\tor      r8, r9\n\tor      rbp, r11\n\tor      rbx, r13\n\tor      r12, r14\n\tmov     r9, r15\n\tadd     rax, 1\n\tadc     r8, [rsi+r10*8]\n\tadc     rbp, [rsi+r10*8+8]\n\tadc     rbx, [rsi+r10*8+16]\n\tadc     r12, [rsi+r10*8+24]\n\tsbb     rax, rax\n\tmov     [rdi+r10*8], r8\n\tmov     [rdi+r10*8+8], rbp\n\tmov     [rdi+r10*8+16], rbx\n\tmov     [rdi+r10*8+24], r12\n\tadd     r10, 4\n\tjnz     .3\n.4:\tneg     rax\n\tadd     rax, r9\n    END_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/addmul_1.asm",
    "content": "; PROLOGUE(mpn_addmul_1)\n\n;  Copyright 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  Copyright 2008, 2009 Brian Gladman\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n; CREDITS\n;\n; The code used here is derived from that provided by ct35z at:\n;\n;    http://www.geocities.jp/ct35z/gmp-core2-en.html\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Jason Worth Martin's excellent assembly support for the Intel64\n; architecture has been used where appropriate.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; Adapted for use with VC++ and YASM using a special mode in which NASM\n; preprocessing is used with AT&T assembler syntax. I am very grateful\n; for the support that Peter Johnson (one of the authors of YASM) has\n; provided in getting this special YASM mode working.  Without his\n; support this port would have been a great deal more difficult.\n;\n; The principle issues that I have had to address is the difference\n; between GCC and MSVC in their register saving and parameter passing\n; conventions.  Registers that have to be preserved across function\n; calls are:\n;\n; GCC:             rbx, rbp, r12..r15\n; MSVC:  rsi, rdi, rbx, rbp, r12..r15 xmm6..xmm15\n;\n; Parameter passing conventions for non floating point parameters:\n;\n;   function(   GCC     MSVC\n;       p1,     rdi      rcx\n;       p2,     rsi      rdx\n;       p3,     rdx       r8\n;       p4,     rcx       r9\n;       p5,      r8 [rsp+40]\n;       p6,      r9 [rsp+48]\n;\n; Care must be taken with 32-bit values in 64-bit register or on the\n; stack because the upper 32-bits of such parameters are undefined.\n;\n;       Brian Gladman\n;\n; Calculate src[size] multiplied by mult[1] and add to /subtract from dst[size] and\n; return the carry or borrow from the top of the result\n;\n; BPL is bytes per limb, which is 8 in the 64-bit code here\n;\n;  mp_limb_t mpn_addmul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_inclsh_n(mp_ptr, mp_ptr, mp_size_t,   mp_uint)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8        r9d\n\n%define BPL                 8\n%define UNROLL_EXPONENT     4\n%define UNROLL_SIZE         (1 << UNROLL_EXPONENT)\n%define UNROLL_MASK         (UNROLL_SIZE - 1)\n%define ADDR(p,i,d)         (d*BPL)(p, i, BPL)\n\n; Register  Usage\n; --------  -----\n; rax    low word from mul\n; rbx\n; rcx    s2limb\n; rdx    high word from mul\n; rsi    s1p\n; rdi    rp\n; rbp    Base Pointer\n; rsp    Stack Pointer\n; r8     A_x\n; r9     A_y\n; r10    A_z\n; r11    B_x\n; r12    B_y\n; r13    B_z\n; r14    temp\n; r15    index\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list       rsi, rdi, r12, r13, r14, r15\n\n%define s2limb  rcx\n%define s1p     rsi\n%define rp      rdi\n%define a_x      r8\n%define a_y      r9\n%define a_z     r10\n%define b_x     r11\n%define b_y     r12\n%define b_z     r13\n%define temp    r14\n%define index   r15\n\n    CPU  Core2\n    BITS 64\n\n    LEAF_PROC mpn_addmul_1\n    xor     a_z, a_z\n    jmp     entry\n\n    LEAF_PROC mpn_addmul_1c\n    mov     a_z, [rsp+0x28]\nentry:\n    FRAME_PROC ?mpn_atom_addmul, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    xor     rdx, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n\n    lea     s1p, [s1p+rdx*8]\n    lea     rp, [rp+rdx*8]\n    xor     index, index\n    sub     index, rdx\n    cmp     rdx, 4\n    jge     .6\n    lea     rax, [rel .1]\n    add     rax, [rax+rdx*8]\n    jmp     rax\n\n    xalign  8\n.1:\tdq      .2 - .1\n    dq      .3 - .1\n    dq      .4 - .1\n    dq      .5 - .1\n.2:\tmov     rax, a_z\n    EXIT_PROC reg_save_list\n\n.3:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    add      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8], a_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.4: mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    add      a_z, a_x\n    adc     rax, a_y\n    mov     [rp+index*8], a_z\n    adc     rdx, 0\n    add      b_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+8], b_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.5: mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+16]\n    mul     s2limb\n    add      a_z, a_x\n    adc     b_x, a_y\n    mov     [rp+index*8], a_z\n    mov     a_z, [rp+index*8+16]\n    adc     b_y, 0\n    add      b_z, b_x\n    adc     rax, b_y\n    mov     [rp+index*8+8], b_z\n    adc     rdx, 0\n    add      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+16], a_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.6: mov     temp, rdx\n    test    rdx, 1\n    jz      .7\n    mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    jmp     .8\n.7:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     b_z, [rp+index*8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     a_z, [rp+index*8+8]\n    mov     a_x, rax\n    mov     a_y, rdx\n.8:\tsub     temp, 4\n    and     temp, UNROLL_MASK\n    inc     temp\n    mov     rax, (.10 - .9) >> UNROLL_EXPONENT\n    mul     temp\n    lea     rdx, [rel .10]\n    sub     rdx, rax\n    mov     rax, [s1p+index*8+16]\n    lea     index, [index+temp+3-UNROLL_SIZE]\n    jmp     rdx\n\n%macro seq_1 7\n    mul     s2limb\n    %7      %3, %1\n    lea     %1, [rax]\n    mov     rax, [byte s1p+index*8+8*%6]\n    adc     %4, %2\n    mov     [byte rp+index*8+8*(%6-3)], %3\n    mov     %3, [byte rp+index*8+8*(%6-1)]\n    lea     %2, [rdx]\n    adc     %5, 0\n%endmacro\n\n   xalign 16\n.9:\n%assign i 0\n%rep    16\n    %if (i & 1)\n        seq_1   b_x, b_y, b_z, a_x, a_y, i, add\n    %else\n        seq_1   a_x, a_y, a_z, b_x, b_y, i, add\n    %endif\n%assign i i + 1\n%endrep\n.10:add     index, UNROLL_SIZE\n    jnz     .9\n.11:mul     s2limb\n    add      a_z, a_x\n    mov     [rp+index*8-24], a_z\n    mov     a_z, [rp+index*8-8]\n    adc     b_x, a_y\n    adc     b_y, 0\n    add      b_z, b_x\n    mov     [rp+index*8-16], b_z\n    adc     rax, b_y\n    adc     rdx, 0\n    add      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8-8], a_z\n    adc     rax, rdx\n.12:END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/addmul_2.asm",
    "content": "; PROLOGUE(mpn_addmul_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.2 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addmul_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n\tFRAME_PROC mpn_addmul_2, 0, reg_save_list\n\tmov     rdi, rcx\n\tmov     rsi, rdx\n\tmov     rax, r8\n\t\n\tmov     rcx, [r9]\n\tmov     r8, [r9+8]\n\tmov     rbx, 4\n\tsub     rbx, rax\n\tlea     rsi, [rsi+rax*8-32]\n\tlea     rdi, [rdi+rax*8-32]\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1:\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tcmp     rbx, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6: add     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/and_n.asm",
    "content": "; PROLOGUE(mpn_and_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_and_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rdi        rsi        rdx        rcx\n;                    rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_and_n\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    jz      .2\n\n    xalign  8\n.1: mov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    and     r10, [r8]\n    and     r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     T3, [rdx-16]\n    mov     T4, [rdx-8]\n    and     T3, [r8-16]\n    and     T4, [r8-8]\n    mov     [rcx-16], T3\n    dec     r9\n    mov     [rcx-8], T4\n    jnz     .1\n.2: inc     rax\n    dec     rax\n    jz      .3\n    mov     r10, [rdx]\n    and     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+8]\n    and     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+16]\n    and     r10, [r8+16]\n    mov     [rcx+16], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/andn_n.asm",
    "content": "; PROLOGUE(mpn_andn_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_andn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_andn_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1:\tmov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    and     r10, [rdx+r9*8+24]\n    and     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    and     T3, [rdx+r9*8+8]\n    and     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/com_n.asm",
    "content": "; PROLOGUE(mpn_com_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_com_n(mp_ptr, mp_ptr, mp_size_t)\n;                     rdi     rsi       rdx\n;                     rcx     rdx        r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n    LEAF_PROC mpn_com_n\n    mov     rax, r8\n    sub     rax, 4\n    jc      .2\n\n    xalign  8\n.1: mov     r8, [rdx+rax*8+24]\n    mov     r9, [rdx+rax*8+16]\n    not     r8\n    not     r9\n    mov     [rcx+rax*8+24], r8\n    mov     [rcx+rax*8+16], r9\n    mov     r8, [rdx+rax*8+8]\n    mov     r9, [rdx+rax*8]\n    not     r8\n    not     r9\n    mov     [rcx+rax*8+8], r8\n    mov     [rcx+rax*8], r9\n    sub     rax, 4\n    jae     .1\n.2: add     rax, 4\n    jz      .3\n\n; Could still have potential cache-bank conflicts in this tail part\n\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n    sub     rax, 1\n    jz      .3\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n    sub     rax, 1\n    jz      .3\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/copyd.asm",
    "content": "; PROLOGUE(mpn_copyd)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_copyd(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi     rsi        rdx\n;                    rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n\tLEAF_PROC mpn_copyd\n\tlea     rdx, [rdx+r8*8-8]\n\tlea     rcx, [rcx+r8*8-8]\n\tsub     r8, 4\n\tjl      .2\n\t\n\txalign  16\n.1:\tmov     rax, [rdx]\n\tmov     r9, [rdx-8]\n\tmov     r10, [rdx-16]\n\tmov     r11, [rdx-24]\n\tmov     [rcx], rax\n\tmov     [rcx-8], r9\n\tlea     rcx, [rcx-32]\n\tsub     r8, 4\n\tmov     [rcx+16], r10\n\tmov     [rcx+8], r11\n\tlea     rdx, [rdx-32]\n\tjns     .1\n.2:\tadd     r8, 2\n\tjz      .5\n\tjns     .6\n\tjp      .4\n.3:\tret\n\t\n\txalign  16\n.4:\tmov     rax, [rdx]\n\tmov     [rcx], rax\n\tret\n\t\n\txalign  16\n.5:\tmov     rax, [rdx]\n\tmov     r9, [rdx-8]\n\tmov     [rcx], rax\n\tmov     [rcx-8], r9\n\tret\n\t\n\txalign  16\n.6:\tmov     rax, [rdx]\n\tmov     r9, [rdx-8]\n\tmov     r10, [rdx-16]\n\tmov     [rcx], rax\n\tmov     [rcx-8], r9\n\tmov     [rcx-16], r10\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/atom/divexact_byff.asm",
    "content": "; PROLOGUE(mpn_divexact_byff)\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divexact_byff(mp_ptr, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi         rdx\n;  rax                           rcx     rdx          r8 \n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_divexact_byff\n\tmov     rax, 3\n\tand     rax, r8\n\tmov     [rsp+24], rax\n\txor     eax, eax\n\tshr     r8, 2\n\tcmp     r8, 0\n\tje      .2\n; want carry clear here\n\txalign  16\n.1:\tsbb     rax, [rdx]\n\tlea     rcx, [rcx+32]\n\tmov     r9, rax\n\tsbb     rax, [rdx+8]\n\tmov     r10, rax\n\tsbb     rax, [rdx+16]\n\tmov     r11, rax\n\tsbb     rax, [rdx+24]\n\tdec     r8\n\tmov     [rcx-32], r9\n\tmov     [rcx-24], r10\n\tmov     [rcx-16], r11\n\tmov     [rcx-8], rax\n\tlea     rdx, [rdx+32]\n\tjnz     .1\n.2:\tmov     r8, [rsp+24]\n; dont want to change the carry\n\tinc     r8\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx]\n\tmov     [rcx], rax\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx+8]\n\tmov     [rcx+8], rax\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx+16]\n\tmov     [rcx+16], rax\n.3:\tsbb     rax, 0\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/atom/divrem_hensel_qr_1_1.asm",
    "content": "; PROLOGUE(mpn_divrem_hensel_qr_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_divrem_hensel_qr_1_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                                    rdi     rsi        rdx        rcx\n;  rax                                    rcx     rdx         r8         r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi\n\n    FRAME_PROC mpn_divrem_hensel_qr_1_1, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n\tmov     rcx, r9\n\tmov     r9, 0\n\tsub     r9, rax\n\t\n\tmov     rdx, rcx\n\t\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\txor     rdx, rdx\n\n\txalign  16\n.1:\tmov     rax, [rsi+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmov     [rdi+r9*8], rax\n\tmul     rcx\n\tadd     r8, 1\n\tinc     r9\n\tjnz     .1\n\tmov     rax, 0\n\tadc     rax, rdx\n\tEND_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/atom/iorn_n.asm",
    "content": "; PROLOGUE(mpn_iorn_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_iorn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_iorn_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    or      r10, [rdx+r9*8+24]\n    or      r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    or      T3, [rdx+r9*8+8]\n    or      T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/karaadd.asm",
    "content": "; PROLOGUE(mpn_karaadd)\n;  mpn_karaadd  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karaadd(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n    CPU  Athlon64\n    BITS 64\n    TEXT\n\n;   requires n >= 8  \n        FRAME_PROC mpn_karaadd, 1, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n;rp is rdi  \n;tp is rsi  \n;n is rdx and put it on the stack  \n        shr     rdx, 1\n;n2 is rdx  \n        lea     rcx, [rdx+rdx*1]\n; 2*n2 is rcx  \n; L is rdi  \n; H is rbp  \n; tp is rsi  \n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n; rax rbx are the carrys  \n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+rcx*8]\n        adc     r9, [rsi+rcx*8+8]\n        adc     r10, [rsi+rcx*8+16]\n        adc     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        adc     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .6\n        jz      .4\n        jp      .3\n.2:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi]\n        adc     r9, [rsi+8]\n        adc     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n.3:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+8]\n        adc     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n.4:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        adc     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        inc     rdx\n        mov     [rbp+rcx*8], r12\n.5:     mov     rcx, 3\n.6:     \n        mov     r8, [rsp]\n        bt      r8, 0\n        jnc     .8\n        xor     r10, r10\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        add     r8, [rsi+rdx*8]\n        adc     r9, [rsi+rdx*8+8]\n        rcl     r10, 1\n        add     [rbp+24], r8\n        adc     [rbp+32], r9\n        adc     [rbp+40], r10\n.7:     adc     qword[rbp+rcx*8+24], 0\n        inc     rcx\n        jc      .7\n        mov     rcx, 3\n.8:     xor     r8, r8\n        shr     rax, 1\n        adc     r8, r8\n        shr     rax, 1\n        adc     r8, 0\n        bt      rbx, 2\n        adc     r8, 0\n        adc     [rdi+rdx*8], r8\n.9:     adc     qword[rdi+rdx*8+8], 0\n        inc     rdx\n        jc      .9\n        xor     r8, r8\n        shr     rbx, 1\n        adc     r8, r8\n        shr     rbx, 1\n        adc     r8, 0\n        shr     rbx, 1\n        adc     r8, 0\n        add     [rbp+24], r8\n.10:    adc     qword[rbp+rcx*8+8], 0\n        inc     rcx\n        jc      .10\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/atom/karasub.asm",
    "content": "; PROLOGUE(mpn_karasub)\n;  mpn_karasub  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Copyright 2012 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karasub(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n;\n;  Karasuba Multiplication - split x and y into two equal length halves so\n;  that x = xh.B + xl and y = yh.B + yl. Then their product is:\n;\n;  x.y = xh.yh.B^2 + (xh.yl + xl.yh).B + xl.yl\n;      = xh.yh.B^2 + (xh.yh + xl.yl - {xh - xl}.{yh - yl}).B + xl.yl\n;\n; If the length of the elements is m (about n / 2), the output length is 4 * m \n; as illustrated below.  The middle two blocks involve three additions and one \n; subtraction: \n; \n;       -------------------- rp\n;       |                  |-->\n;       |   A:xl.yl[lo]    |   |\n;       |                  |   |      (xh - xl).(yh - yl)\n;       --------------------   |      -------------------- tp\n;  <--  |                  |<--<  <-- |                  |\n; |     |   B:xl.yl[hi]    |   |      |     E:[lo]       |\n; |     |                  |   |      |                  |\n; |     --------------------   |      --------------------\n; >-->  |                  |-->   <-- |                  |\n; |\\___ |   C:xh.yh[lo]    | ____/    |     F:[hi]       |\n; |     |                  |          |                  |\n; |     --------------------          --------------------\n;  <--  |                  |   \n;       |   D:xh.yh[hi]    |\n;       |                  |\n;       --------------------\n;\n; To avoid overwriting B before it is used, we need to do two operations\n; in parallel:\n;\n; (1)   B = B + C + A - E = (B + C) + A - E\n; (2)   C = C + B + D - F = (B + C) + D - F\n;\n; The final carry from (1) has to be propagated into C and D, and the final\n; carry from (2) has to be propagated into D. When the number of input limbs\n; is odd, some extra operations have to be undertaken. \n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n%macro add_one 1\n    inc     %1\n%endmacro\n\n    BITS 64\n    TEXT\n        \n;       requires n >= 8  \n        FRAME_PROC mpn_karasub, 2, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n        mov     [rsp+8], rdi\n\n;       rp is rdi, tp is rsi, L is rdi, H is rbp, tp is rsi\n;       carries/borrows in rax, rbx\n   \n        shr     rdx, 1\n        lea     rcx, [rdx+rdx*1]\n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+rcx*8]\n        sbb     r9, [rsi+rcx*8+8]\n        sbb     r10, [rsi+rcx*8+16]\n        sbb     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        sbb     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .5\n        jz      .4\n        jp      .3\n\n.2:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi]\n        sbb     r9, [rsi+8]\n        sbb     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n\n.3:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+8]\n        sbb     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n\n.4:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        sbb     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        add_one rdx\n        mov     [rbp+rcx*8], r12\n\n;       move low half rbx carry into rax\n.5:     rcr     rax, 3\n        bt      rbx, 2\n        rcl     rax, 3\n        mov     r8, [rsp]\n        mov     rcx, rsi\n        mov     rsi,[rsp+8]\n        lea     r9, [r8+r8]\n        lea     rsi, [rsi+r9*8]\n        lea     r11, [rbp+24]\n        sub     r11, rsi\n        sar     r11, 3\n        bt      r8, 0\n        jnc     .9\n\n;       if odd the do next two  \n        add     r11, 2\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        rcr     rbx, 2\n        adc     r8,0\n        adc     r9, 0\n        rcl     rbx, 1\n        sbb     r8, [rcx+rdx*8]\n        sbb     r9, [rcx+rdx*8+8]\n        rcr     rbx, 2\n        adc     [rbp+24], r8\n        adc     [rbp+32], r9\n        rcl     rbx, 3\n\n; Now add in any accummulated carries and/or borrows\n;\n; NOTE: We can't propagate individual borrows or carries from the second\n; and third quarter blocks into the fourth quater block by simply waiting\n; for carry (or borrow) propagation to end.  This is because a carry into\n; the fourth quarter block when it contains only maximum integers or a \n; borrow when it contains all zero integers will incorrectly propagate\n; beyond the end of the top quarter block.\n\n.9:     lea     rdx, [rdi+rdx*8]\n        sub     rdx, rsi\n        sar     rdx, 3\n\n; carries/borrrow from second to third quarter quarter block\n;   rax{2} is the carry in (B + C)\n;   rax{1} is the carry in (B + C) + A\n;   rax{0} is the borrow in (B + C + A) - E\n\n        mov     rcx, rdx\n        bt      rax, 0\n.10:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .11\n        jc      .10\n\n.11     mov     rcx, rdx\n        bt      rax, 1\n.12:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .13\n        jc      .12\n\n.13     mov     rcx, rdx\n        bt      rax, 2\n.14:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .15\n        jc      .14\n\n; carries/borrrow from third to fourth quarter quarter block\n;   rbx{2} is the carry in (B + C)\n;   rbx{1} is the carry in (B + C) + D\n;   rbx{0} is the borrow in (B + C + D) - F\n\n.15:    mov     rcx, r11\n        bt      rbx, 0\n.16:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .17\n        jc      .16\n\n.17:    mov     rcx, r11\n        bt      rbx, 1\n.18:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .19\n        jc      .18\n\n.19:    mov     rcx, r11\n        bt      rbx, 2\n.20:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .21\n        jc      .20\n.21:\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/atom/lshift1.asm",
    "content": "; PROLOGUE(mpn_lshift1)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_lshift1(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_lshift1\n    mov     rax, r8\n\tand     r8, 7\n\tinc     r8\n\tmov     [rsp+0x18], r8\n\tshr     rax, 3\n\tcmp     rax, 0\n\tjz      .2\n\t\n\txalign  16\n.1:\tmov     r8, [rdx]\n\tmov     r9, [rdx+8]\n\tmov     r10, [rdx+16]\n\tmov     r11, [rdx+24]\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tmov     [rcx], r8\n\tmov     [rcx+8], r9\n\tmov     [rcx+16], r10\n\tmov     [rcx+24], r11\n\tmov     r8, [rdx+32]\n\tmov     r9, [rdx+40]\n\tmov     r10, [rdx+48]\n\tmov     r11, [rdx+56]\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tmov     [rcx+32], r8\n\tmov     [rcx+40], r9\n\tmov     [rcx+48], r10\n\tmov     [rcx+56], r11\n\tlea     rcx, [rcx+64]\n\tdec     rax\n\tlea     rdx, [rdx+64]\n\tjnz     .1\n.2:\tmov     rax, [rsp+0x18]\n\tdec     rax\n\tjz      .3\n;\tCould still have cache-bank conflicts in this tail part\n\tmov     r8, [rdx]\n\tadc     r8, r8\n\tmov     [rcx], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+8]\n\tadc     r8, r8\n\tmov     [rcx+8], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+16]\n\tadc     r8, r8\n\tmov     [rcx+16], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+24]\n\tadc     r8, r8\n\tmov     [rcx+24], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+32]\n\tadc     r8, r8\n\tmov     [rcx+32], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+40]\n\tadc     r8, r8\n\tmov     [rcx+40], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+48]\n\tadc     r8, r8\n\tmov     [rcx+48], r8\n.3:\tsbb     rax, rax\n\tneg     rax\n\tret\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/atom/mod_1_1.asm",
    "content": "; PROLOGUE(mpn_mod_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_1(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r13\n\n    FRAME_PROC mpn_mod_1_1, 0, reg_save_list\n    mov     rsi, rdx\n    mov     rdx, r8\n    \n\tmov     r13, [rsi+rdx*8-8]\n\tmov     rax, [rsi+rdx*8-16]\n\tmov     r8, [r9]\n\tmov     r9, [r9+8]\n\tmov     rdi, rdx\n\tsub     rdi, 2\n\t\n\txalign  16\n.1:\tmov     r10, [rsi+rdi*8-8]\n\tmul     r8\n\tadd     r10, rax\n\tmov     r11, 0\n\tadc     r11, rdx\n\tmov     rax, r13\n\tmul     r9\n\tadd     rax, r10\n\tmov     r13, r11\n\tadc     r13, rdx\n\tdec     rdi\n\tjnz     .1\n\n\tmov     [rcx], rax\n\tmov     rax, r8\n\tmul     r13\n\tadd     [rcx], rax\n\tadc     rdx, 0\n\tmov     [rcx+8], rdx\n\tEND_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/atom/mod_1_2.asm",
    "content": "; PROLOGUE(mpn_mod_1_2)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n    FRAME_PROC mpn_mod_1_2, 0, reg_save_list\n    mov     rsi, rdx\n    mov     rdx, r8\n\n\tmov     r14, [rsi+rdx*8-8]\n\tmov     r13, [rsi+rdx*8-16]\n\tmov     r11, [rsi+rdx*8-32]\n\tmov     r8, [r9]\n\tmov     r10, [r9+16]\n\tmov     r9, [r9+8]\n\tmov     rdi, rdx\n\tmov     rax, [rsi+rdx*8-24]\n\tsub     rdi, 6\n\tjc      .2\n\t\n\txalign  16\n.1:\tmul     r8\n\tmov     r12, 0\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, r9\n\tmul     r13\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     r13, r11\n\tmov     rax, r10\n\tmul     r14\n\tadd     r13, rax\n\tmov     r11, [rsi+rdi*8+0]\n\tmov     r14, r12\n\tadc     r14, rdx\n\tmov     rax, [rsi+rdi*8+8]\n\tsub     rdi, 2\n\tjnc     .1\n.2:\tmul     r8\n\tmov     r12, 0\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, r9\n\tmul     r13\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     r13, r11\n\tmov     rax, r10\n\tmul     r14\n\tadd     r13, rax\n\tmov     r14, r12\n\tadc     r14, rdx\n\tcmp     rdi, -2\n\tje      .4\n.3:\tmov     r11, [rsi+rdi*8+8]\n\tmov     r12, 0\n\tmov     rax, r8\n\tmul     r13\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     r13, r11\n\tmov     rax, r9\n\tmul     r14\n\tadd     r13, rax\n\tmov     r14, r12\n\tadc     r14, rdx\n.4:\tmov     rax, r8\n\tmul     r14\n\tadd     r13, rax\n\tadc     rdx, 0\n\tmov     [rcx], r13\n\tmov     [rcx+8], rdx\n\tEND_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/atom/mod_1_3.asm",
    "content": "; PROLOGUE(mpn_mod_1_3)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_3(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n;\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\n;\twhere (rcx,4)  contains B^i % divisor\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14, r15\n\n    FRAME_PROC mpn_mod_1_3, 0, reg_save_list\n    mov     rsi, rdx\n    mov     rdi, r8\n\tmov     r15, [rsi+rdi*8-8]\n\tmov     r14, [rsi+rdi*8-16]\n\tmov     rax, [rsi+rdi*8-32]\n\tmov     r12, [rsi+rdi*8-40]\n\tmov     r8, [r9]\n\tmov     r10, [r9+16]\n\tmov     r11, [r9+24]\n\tmov     r9, [r9+8]\n\tsub     rdi, 8\n\tjc      .2\n\t\n; // r15 r14 -8() -16()=rax -24()=r12\n\txalign  16\n.1:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+0]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+8]\n\tadc     r15, rdx\n\tsub     rdi, 3\n\tjnc     .1\n\n; // we have loaded up the next two limbs\n; // but because they are out of order we can have to do 3 limbs min\n.2:\tcmp     rdi, -2\n\tjl      .5\n\tje      .4\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n.3:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+8]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+16]\n\tadc     r15, rdx\n\t; // r15 r14 rax r12\n\tmov     r13, 0\n\tmul     r8\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n\txalign  16\n.4:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+16]\n\tmov     r15, r13\n\tadc     r15, rdx\n\t; // r15 r14 r12\n\tmov     r13, 0\n\tmov     rax, r8\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\t\n\t; // one more is 3 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12 \n\txalign  16\n.5:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r11\n\tmul     r15\n\tadd     r12, rax\n\tmov     r15, r13\n\tadc     r15, rdx\n\tmov     rax, r8\n\tmul     r15\n.6:\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rcx], r12\n\tmov     [rcx+8], rdx\n    END_PROC reg_save_list\n    \n    end\n    \n"
  },
  {
    "path": "mpn/x86_64w/atom/mul_1.asm",
    "content": "; PROLOGUE(mpn_mul_1)\n\n; Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_mul_1c(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx         r8\n;  rax                     rcx     rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n    LEAF_PROC mpn_mul_1c\n    mov     r11, [rsp+0x28]\n    jmp     start\n\n    LEAF_PROC mpn_mul_1\n    xor     r11, r11\n    jmp     start\n\n    xalign  16\nstart:\n    FRAME_PROC ?mpn_atom_mul, 0, rbx\n    mov     rax, r8\n    mov     r8d, 3\n    lea     r10, [rdx+rax*8-24]\n    sub     r8, rax\n    lea     rcx, [rcx+rax*8-24]\n    jc      .1\n    jmp     .2\n\n    xalign  16\n.1: mov     rax, [r10+r8*8]\n    mov     ebx, 0\n    mul     r9\n    add     r11, rax\n    mov     [rcx+r8*8], r11\n    mov     rax, [r10+r8*8+8]\n    adc     rbx, rdx\n    mul     r9\n    mov     r11d, 0\n    add     rbx, rax\n    mov     rax, [r10+r8*8+16]\n    adc     r11, rdx\n    mul     r9\n    mov     [rcx+r8*8+8], rbx\n    add     r11, rax\n    mov     ebx, 0\n    mov     [rcx+r8*8+16], r11\n    mov     rax, [r10+r8*8+24]\n    mov     r11d, 0\n    adc     rbx, rdx\n    mul     r9\n    add     rbx, rax\n    mov     [rcx+r8*8+24], rbx\n    adc     r11, rdx\n    add     r8, 4\n    jnc     .1\n.2:\ttest    r8, 2\n    jnz     .3\n    mov     rax, [r10+r8*8]\n    mov     ebx, 0\n    mul     r9\n    add     r11, rax\n    mov     [rcx+r8*8], r11\n    mov     rax, [r10+r8*8+8]\n    adc     rbx, rdx\n    mul     r9\n    mov     r11d, 0\n    add     rbx, rax\n    adc     r11, rdx\n    add     r8, 2\n    mov     [rcx+r8*8-8], rbx\n.3: test    r8, 1\n    mov     rax, r11\n    jnz     .4\n    mov     rax, [r10+r8*8]\n    mov     ebx, 0\n    mul     r9\n    add     r11, rax\n    mov     [rcx+r8*8], r11\n    adc     rbx, rdx\n    mov     rax, rbx\n.4: END_PROC rbx\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/mul_2.asm",
    "content": "; PROLOGUE(mpn_mul_2)\n\n;  X86_64 mpn_mul_2\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx\n;  rax                     rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi, rbx\n\n    BITS 64\n\n\tFRAME_PROC mpn_mul_2, 0, reg_save_list\n\tmov     rax, r8\n\t\n\tmov     r8, [r9]\n\tlea     rsi, [rdx+rax*8-24]\n\tlea     rdi, [rcx+rax*8-24]\n\tmov     rcx, [r9+8]\n\tmov     rbx, 3\n\tsub     rbx, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tmov     r11, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1: mov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r10, 0\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r9, rdx\n\tmul     r8\n\tadd     r11, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 3\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     rbx, 1\n\tja      .5\n\tje      .4\n.3:\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4:\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.6:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/mul_basecase.asm",
    "content": "; PROLOGUE(mpn_mul_basecase)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40] \n\n%include \"yasm_mac.inc\"\n\n%macro addmul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi+rbx*8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi+rbx*8+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+rbx*8+24], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    add     [rdi], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+8], r10\n    adc     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-16], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi-8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+8], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi+rbx*8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+24], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+24], r12\n    add     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    mov     [rdi], r12\n    add     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-16], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi-8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+8], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n    xalign  16\n%%1:\n    mov     r10, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    mul     r8\n    mov     [rdi+rbx*8], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r10\n    db      0x26\n    add     r11, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+rbx*8+24]\n    mul     r8\n    mov     [rdi+rbx*8+16], r11\n    db      0x26\n    add     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     rbx, 4\n    mov     rax, [rsi+rbx*8]\n    jnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n    mov     rax, [rsi+8]\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mov     r12d, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r11\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n    mov     rax, [rsi+16]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     [rdi+24], r10\n    mov     [rdi+32], r11\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n    mov     [rdi+24], r9\n    mov     [rdi+32], r10\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n    jz      %%2\n    xalign  16\n%%1:addmul2pro%1\n    addmul2lp\n    addmul2epi%1\n    jnz     %%1\n%%2:\n%endmacro\n\n%macro oldmulnext0 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    mov     [rdi+r11*8+24], rbx\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+32], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+r11*8+40], rdx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+24]\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+24], r12\n    mov     [rdi+r11*8+32], rdx\n    inc     r8\n    lea     rdi, [rdi+8]\n    mov     r11, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     [rdi+r11*8+16], r10\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n    mov     [rdi+r11*8+8], r9\n    mov     [rdi+r11*8+16], r10\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    db      0x26\n    mov     r9, rdx\n    lea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    mul     r13\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+24]\n    mov     r12d, 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+32]\n    mul     r13\n    add     [rdi+24], rbx\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+32], r12\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n    mov     r13, [rcx+r8*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+24]\n    mul     r13\n    lea     rdi, [rdi+8]\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     r12d, 0\n    mov     rax, [rsi+32]\n    adc     r12, rdx\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+24], r12\n    adc     rdx, 0\n    mov     [rdi+32], rdx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n    mov     r13, [rcx+r8*8]\n    lea     rdi, [rdi+8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     ebx, 0\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    adc     rbx, rdx\n    mov     rax, [rsi+r14*8]\n    add     [rdi+r11*8+16], r10\n    adc     rbx, 0\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    db      0x26\n    lea     rdi, [rdi+8]\n    db      0x26\n    mov     r9, rdx\n    mov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     rdx, 0\n    add     [rdi+32], r9\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n    oldmulnext%1\n    jz      %%2\n    xalign  16\n%%1:oldaddmulpro%1\n    oldaddmulnext%1\n    jnz     %%1\n%%2:\n%endmacro\n\n    CPU  Core2\n    BITS 64\n\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40] \n\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14\n\n    LEAF_PROC mpn_mul_basecase\n    ; the current mul does not handle case one\n    cmp     r8d, 4\n    jg      fiveormore\n    cmp     r8d, 1\n    je      one\n\n    WIN64_GCC_PROC mpn_atom_mbc1, 5, frame\n\n    mov     r14, 5\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-40]\n    lea     rcx, [rcx+r8*8]\n    neg     r8\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rax, [rsi+r14*8]\n    mov     r13, [rcx+r8*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    mov     r10d, 0\n    mul     r13\n    mov     [rdi+r11*8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     r11, 2\n    ja      .4\n    jz      .3\n    jp      .2\n.1:\toldmpn_muladdmul_1_int 0\n    jmp     .5\n.2:\toldmpn_muladdmul_1_int 1\n    jmp     .5\n.3:\toldmpn_muladdmul_1_int 2\n    jmp     .5\n.4:\toldmpn_muladdmul_1_int 3\n.5:\tWIN64_GCC_END frame\n\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\n%undef  reg_save_list\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14, r15\n\n    xalign  16\nfiveormore:\n    WIN64_GCC_PROC mpn_atom_mbc2, 5, frame\n    movsxd  rdx, edx\n    movsxd  r8, r8d\n\n    mov     r14, 4\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-32]\n    lea     rsi, [rsi+rdx*8-32]\n    mov     r13, rcx\n    mov     r15, r8\n    lea     r13, [r13+r15*8]\n    neg     r15\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    bt      r15, 0\n    jnc     .12\n.6:\tinc     rbx\n    mov     r8, [r13+r15*8]\n    mul     r8\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     rbx, 0\n    jge     .7\n    mul1lp\n.7:\tmov     r10d, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     rbx, 2\n    ja      .11\n    jz      .10\n    jp      .9\n.8:\tmulnext0\n    jmp     .20\n.9:\tmulnext1\n    jmp     .14\n.10:mulnext2\n    jmp     .16\n.11:mulnext3\n    jmp     .18\n     ; as all the mul2pro? are the same\n.12:mul2pro0\n    mul2lp\n    cmp     rbx, 2\n    ja      .19\n    jz      .17\n    jp      .15\n.13:mul2epi3\n.14:mpn_addmul_2_int 3\n    WIN64_GCC_EXIT frame\n\n.15:mul2epi2\n.16:mpn_addmul_2_int 2\n    WIN64_GCC_EXIT frame\n\n.17:mul2epi1\n.18:mpn_addmul_2_int 1\n    WIN64_GCC_EXIT frame\n\n.19:mul2epi0\n.20:mpn_addmul_2_int 0\n\n    xalign  16\n.21:WIN64_GCC_END frame\n\n    xalign  16\none:mov     rax, [rdx]\n    mul     qword [r9]\n    mov     [rcx], rax\n    mov     [rcx+8], rdx\n    ret\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/nand_n.asm",
    "content": "; PROLOGUE(mpn_nand_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_nand_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_nand_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [rdx+r9*8+24]\n    mov     r11, [rdx+r9*8+16]\n    and     r10, [r8+r9*8+24]\n    and     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [rdx+r9*8+8]\n    mov     T4, [rdx+r9*8]\n    and     T3, [r8+r9*8+8]\n    and     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [rdx+r9*8-8]\n    and     r10, [r8+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx+r9*8-8]\n    and     r10, [r8+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx+r9*8-8]\n    and     r10, [r8+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/nior_n.asm",
    "content": "; PROLOGUE(mpn_nior_n)\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_nior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_nior_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1:\tmov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    or      r10, [rdx+r9*8+24]\n    or      r11, [rdx+r9*8+16]\n    not     r10\n    not     r11\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    or      T3, [rdx+r9*8+8]\n    or      T4, [rdx+r9*8]\n    not     T3\n    not     T4\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, [rdx+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, [rdx+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, [rdx+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/redc_1.asm",
    "content": "; PROLOGUE(mpn_redc_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_redc_1(mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                rdi     rsi        rdx        rcx        r8\n;  rax                rcx     rdx         r8         r9  [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    CPU  Core2\n    BITS 64\n\n%macro mpn_add 0\n\n    mov     rax, rcx\n    and     rax, 3\n    shr     rcx, 2\n    cmp     rcx, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    add     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n    jmp     %%2\n\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    adc     r11, [rdx]\n    adc     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rdi], r11\n    mov     [rdi+8], r8\n    lea     rdi, [rdi+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    adc     r9, [rdx-16]\n    adc     r10, [rdx-8]\n    mov     [rdi-16], r9\n    dec     rcx\n    mov     [rdi-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    adc     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n%%2:\n\n%endmacro\n\n%macro mpn_sub 0\n\n    mov     rax, rbp\n    and     rax, 3\n    shr     rbp, 2\n    cmp     rbp, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    sub     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n    jmp     %%2\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    sbb     r11, [rdx]\n    sbb     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rbx], r11\n    mov     [rbx+8], r8\n    lea     rbx, [rbx+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    sbb     r9, [rdx-16]\n    sbb     r10, [rdx-8]\n    mov     [rbx-16], r9\n    dec     rbp\n    mov     [rbx-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    sbb     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n%%2:\n\n%endmacro\n\n%macro addmulloop 1\n\n    xalign  16\n%%1:mov     r10, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    db      0x26\n    adc     rbx, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    db      0x26\n    adc     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     r11, 4\n    mov     rax, [rsi+r11*8+8]\n    jnc     %%1\n\n%endmacro\n\n%macro addmulpropro0 0\n\n    imul    r13, rcx\n    lea     r8, [r8-8]\n\n%endmacro\n\n%macro addmulpro0 0\n\n    mov     r11, r14\n    lea     r8, [r8+8]\n    mov     rax, [rsi+r14*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext0 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mov     r9d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, rax\n    adc     r9, rdx\n    imul    r13, rcx\n    add     [r8+r11*8+32], r12\n    adc     r9, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r9\n\n%endmacro\n\n%macro addmulpropro1 0\n\n%endmacro\n\n%macro addmulpro1 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext1 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r12\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro2 0\n\n%endmacro\n\n%macro addmulpro2 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext2 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     r13, [r8+r14*8+8]\n    add     [r8+r11*8+16], r10\n    adc     rbx, 0\n    mov     [r8+r14*8], rbx\n    sub     r15, 1          ; ***\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro3 0\n\n%endmacro\n\n%macro addmulpro3 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext3 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    add     [r8+r11*8+8], r9\n    adc     r10, 0\n    mov     r13, [r8+r14*8+8]\n    mov     [r8+r14*8], r10\n    lea     r8, [r8+8]\n    sub     r15, 1          ; ***\n\n%endmacro\n\n%macro mpn_addmul_1_int 1\n\n    addmulpropro%1\n    xalign  16\n%%1:addmulpro%1\n    jge     %%2\n    addmulloop %1\n%%2:addmulnext%1\n    jnz     %%1\n\n%endmacro\n\n    LEAF_PROC mpn_redc_1\n    cmp     r9, 1\n    je      one\n    FRAME_PROC ?mpn_atom_redc_1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, r8\n    mov     r8, rdx\n    mov     rdx, r9\n    mov     rcx, [rsp+stack_use+0x28]\n    mov     [rsp+stack_use+0x28], r8\n\n    mov     r14, 5\n    sub     r14, rdx\n\n    mov     [rsp+stack_use+0x10], rsi\n    mov     r8, [rsp+stack_use+0x28]\n\n    lea     r8, [r8+rdx*8-40]\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rbp, rdx\n    mov     r15, rdx\n    mov     rax, r14\n    and     rax, 3\n    mov     r13, [r8+r14*8]\n    je      .2\n    jp      .4\n    cmp     rax, 1\n    je      .3\n.1:\tmpn_addmul_1_int 2\n    jmp     .5\n\n    xalign  16\n.2:\tmpn_addmul_1_int 0\n    jmp     .5\n\n    xalign  16\n.3:\tmpn_addmul_1_int 1\n    jmp     .5\n\n    xalign  16\n.4:\tmpn_addmul_1_int 3\n\n    xalign  16\n.5:\tmov     rcx, rbp\n    mov     rdx, [rsp+stack_use+0x28]\n    lea     rsi, [rdx+rbp*8]\n    mov     rbx, rdi\n    mpn_add\n    mov     rdx, [rsp+stack_use+0x10]\n    jnc     .6\n    mov     rsi, rbx\n    mpn_sub\n.6:\tEND_PROC reg_save_list\n\n    xalign  16\none:mov     r9, rdx\n    mov     r11, [r8]\n    mov     r8, [rsp+0x28]\n\n    mov     r10, [r9]\n    imul    r8, r10\n    mov     rax, r8\n    mul     r11\n    add     rax, r10\n    adc     rdx, [r9+8]\n    cmovnc  r11, rax\n    sub     rdx, r11\n    mov     [rcx], rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/rsh1add_n.asm",
    "content": "; PROLOGUE(mpn_rsh1add_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_rsh1add_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rsh1add_n, 0, reg_save_list\n    mov     rax, r9\n\tlea     rdi, [rcx+rax*8-32]\n\tlea     rsi, [rdx+rax*8-32]\n\tlea     rdx, [r8+rax*8-32]\n\tmov     rcx, rax\n\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tadd     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     .2\n.1:\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\tadc     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     .1\n.2:\tcmp     r8, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3:\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tadc     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n.7:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/rsh1sub_n.asm",
    "content": "; PROLOGUE(mpn_rsh1sub_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  return bottom bit of difference\n;  subtraction treated as two compliment\n;\n;  mp_limb_t mpn_rsh1sub_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rsh1sub_n, 0, reg_save_list\n    mov     rax, r9\n\tlea     rdi, [rcx+rax*8-32]\n\tlea     rsi, [rdx+rax*8-32]\n\tlea     rdx, [r8+rax*8-32]\n\tmov     rcx, rax\n\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tsub     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     .2\n.1:\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\tsbb     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     .1\n.2:\tcmp     r8, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3:\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n.7:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/rsh_divrem_hensel_qr_1_1.asm",
    "content": "; PROLOGUE(mpn_rsh_divrem_hensel_qr_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rsh_divrem_hensel_qr_1_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t,  mp_int, mp_limb_t)\n;  rax                                        rdi     rsi        rdx        rcx       r8         r9\n;  rax                                        rcx     rdx         r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi\n\n\tFRAME_PROC mpn_rsh_divrem_hensel_qr_1_1, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n    mov     rcx, r9\n\tmov     rdx, r9\n\tmov     r9, 1\n\tsub     r9, rax\n    movsxd  r8, dword [rsp+stack_use+40]\n    mov     r10, qword [rsp+stack_use+48]\n    \n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, 64\n\tsub     rax, r8\n\tmovq    mm0, r8\n\tmovq    mm1, rax\n\tmov     rax, [rsi+r9*8-8]\n\tsub     rax, r10\n\tsbb     r8, r8\n\timul    rax, r11\n\tmovq    mm4, rax\n\tmovq    mm5, mm4\n\tpsrlq   mm4, mm0\n\tpsllq   mm5, mm1\n\tpsrlq   mm5, mm1\n\tmul     rcx\n\tcmp     r9, 0\n\tje      .3\n\tadd     r8, r8\n\t\n\txalign  16\n.1:\tmovq    mm2, mm4\n\tmov     rax, [rsi+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-8], mm2\n\tmul     rcx\n\tadd     r8, r8\n\tinc     r9\n\tjnz     .1\n.2:\tmovq    [rdi+r9*8-8], mm4\n\tmov     rax, 0\n\tadc     rax, rdx\n\temms\n\tEXIT_PROC reg_save_list\n\n.3:\tmovq    [rdi+r9*8-8], mm4\n\tadd     r8, r8\n\tmov     rax, 0\n\tadc     rax, rdx\n.4:\temms\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/sqr_basecase.asm",
    "content": "; PROLOGUE(mpn_sqr_basecase)\n\n;  Version 1.0L5\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_sqr_basecase(mp_ptr, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx\n;  rax                           rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14\n\n    CPU  Core2\n    BITS 64\n\n%macro mulloop 0\n\n    xalign  16\n%%1:mov     r10, 0\n    mul     r13\n    mov     [rdi+r11*8], r12\n    add     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    db      0x26\n    add     rbx, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    mov     [rdi+r11*8+24], rbx\n    db      0x26\n    add     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     r11, 4\n    mov     rax, [rsi+r11*8+8]\n    jnc     %%1\n\n%endmacro\n\n%macro mulnext0 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    mov     [rdi+r11*8+24], rbx\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+32], r12\n    mov     [rdi+r11*8+40], rdx\n    add     r14, 1              ; ***\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro mulnext1 0\n\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+24]\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+24], r12\n    mov     [rdi+r11*8+32], rdx\n    add     r14, 1              ; ***\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro mulnext2 0\n\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     [rdi+r11*8+16], r10\n    mov     [rdi+r11*8+24], rbx\n    add     r14, 1              ; ***\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro mulnext3 0\n\n    mov     [rdi+r11*8+8], r9\n    mov     [rdi+r11*8+16], r10\n    add     r14, 1              ; ***\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro addmulloop 0\n\n    xalign  16\n%%1:mov     r10, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r13\n    add     [rdi+r11*8+16], r10\n    db      0x26\n    adc     rbx, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [rdi+r11*8+24], rbx\n    db      0x26\n    adc     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     r11, 4\n    mov     rax, [rsi+r11*8+8]\n    jnc     %%1\n\n%endmacro\n\n%macro addmulnext0 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    add     [rdi+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [rdi+r11*8+24], rbx\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+r11*8+32], r12\n    adc     rdx, 0\n    add     r14, 1              ; ***\n    mov     [rdi+r11*8+40], rdx\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro addmulnext1 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+24]\n    mul     r13\n    add     [rdi+r11*8+16], r10\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+r11*8+24], r12\n    adc     rdx, 0\n    mov     [rdi+r11*8+32], rdx\n    add     r14, 1              ; ***\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro addmulnext2 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     ebx, 0\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    adc     rbx, rdx\n    add     [rdi+r11*8+16], r10\n    adc     rbx, 0\n    mov     [rdi+r11*8+24], rbx\n    add     r14, 1              ; ***\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro addmulnext3 0\n\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    add     [rdi+r11*8+8], r9\n    adc     r10, 0\n    mov     [rdi+r11*8+16], r10\n    add     r14, 1              ; ***\n    lea     rdi, [rdi+8]\n    cmp     r14, 4\n\n%endmacro\n\n    LEAF_PROC mpn_sqr_basecase\n    cmp     r8, 3\n    ja      fourormore\n    jz      three\n    jp      two\n    mov     rax, [rdx]\n    mul     rax\n    mov     [rcx], rax\n    mov     [rcx+8], rdx\n    ret\n\n    xalign  16\nfourormore:\n    FRAME_PROC ?mpn_atom_sqr_1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n\n    mov     [rsp+stack_use+8], rdi\n    mov     [rsp+stack_use+16], rsi\n    mov     [rsp+stack_use+24], rdx\n\n    mov     r13, [rsi]\n    mov     rax, [rsi+8]\n    mov     r14d, 6\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-40]\n    lea     rsi, [rsi+rdx*8-40]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n    jge     .1\n    mulloop\n.1: mov     r10d, 0\n    mul     r13\n    mov     [rdi+r11*8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     r11, 2\n    je      .4\n    ja      .5\n    jp      .3\n.2:\tmulnext0\n    jmp     .8\n\n    xalign  16\n.3:\tmulnext1\n    jmp     .10\n\n    xalign  16\n.4:\tmulnext2\n    jmp     .12\n\n    xalign  16\n.5:\tmulnext3\n\n    xalign  16\n.6:\tmov     rax, [rsi+r14*8]\n    mov     r13, [rsi+r14*8-8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n    jge     .7\n    addmulloop\n.7:\taddmulnext0\n.8:\tmov     rax, [rsi+r14*8]\n    mov     r13, [rsi+r14*8-8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n    jge     .9\n    addmulloop\n.9:\taddmulnext1\n.10:mov     rax, [rsi+r14*8]\n    mov     r13, [rsi+r14*8-8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n    jge     .11\n    addmulloop\n.11:addmulnext2\n.12:mov     rax, [rsi+r14*8]\n    mov     r13, [rsi+r14*8-8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n    jge     .13\n    addmulloop\n.13:addmulnext3\n    jnz     .6\n    mov     rax, [rsi+r14*8]\n    mov     r13, [rsi+r14*8-8]\n    mul     r13\n    add     [rdi+r14*8], rax\n    adc     rdx, 0\n    mov     [rdi+r14*8+8], rdx\n    mov     rdi, [rsp+stack_use+8]\n    mov     rsi, [rsp+stack_use+16]\n    mov     rcx, [rsp+stack_use+24]\n    xor     rbx, rbx\n    xor     r11, r11\n    lea     rsi, [rsi+rcx*8]\n    mov     [rdi], r11\n    lea     r10, [rdi+rcx*8]\n    mov     [r10+rcx*8-8], r11\n    neg     rcx\n\n    xalign  16\n.14:mov     rax, [rsi+rcx*8]\n    mul     rax\n    mov     r8, [rdi]\n    mov     r9, [rdi+8]\n    add     rbx, 1\n    adc     r8, r8\n    adc     r9, r9\n    sbb     rbx, rbx\n    add     r11, 1\n    adc     r8, rax\n    adc     r9, rdx\n    sbb     r11, r11\n    mov     [rdi], r8\n    mov     [rdi+8], r9\n    add     rcx, 1          ; ***\n    lea     rdi, [rdi+16]\n    jnz     .14\n    END_PROC reg_save_list\n\n    xalign  16\ntwo:mov     rax, [rdx]\n    mov     r9, [rdx+8]\n    mov     r8, rax\n    mul     rax\n    mov     [rcx], rax\n    mov     rax, r9\n    mov     [rcx+8], rdx\n    mul     rax\n    mov     [rcx+16], rax\n    mov     rax, r8\n    mov     r10, rdx\n    mul     r9\n    add     rax, rax\n    adc     rdx, rdx\n    adc     r10, 0\n    add     [rcx+8], rax\n    adc     [rcx+16], rdx\n    adc     r10, 0\n    mov     [rcx+24], r10\n    ret\n\n    xalign  16\nthree:\n    FRAME_PROC ?mpn_atom_sqr_2, 0, rsi, rdi\n    mov     rdi, rcx\n    mov     rsi, rdx\n\n    mov     r8, [rsi]\n    mov     rax, [rsi+8]\n    mul     r8\n    mov     r11d, 0\n    mov     [rcx+8], rax\n    mov     rax, [rsi+16]\n    mov     r9, rdx\n    mul     r8\n    mov     r8, [rsi+8]\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    mul     r8\n    mov     [rcx+16], r9\n    add     r11, rax\n    mov     r9d, 0\n    mov     [rcx+24], r11\n    adc     r9, rdx\n    mov     [rcx+32], r9\n    mov     rdi, -3\n    xor     r10, r10\n    xor     r11, r11\n    lea     rsi, [rsi+24]\n    mov     [rcx], r11\n    mov     [rcx+40], r11\n\n.1: mov     rax, [rsi+rdi*8]\n    mul     rax\n    mov     r8, [rcx]\n    mov     r9, [rcx+8]\n    add     r10, 1\n    adc     r8, r8\n    adc     r9, r9\n    sbb     r10, r10\n    add     r11, 1\n    adc     r8, rax\n    adc     r9, rdx\n    sbb     r11, r11\n    mov     [rcx], r8\n    mov     [rcx+8], r9\n    add     rdi, 1      ; ***\n    lea     rcx, [rcx+16]\n    jnz     .1\n    END_PROC rsi, rdi\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/store.asm",
    "content": "; PROLOGUE(mpn_store)\n\n;  Copyright 2009 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_store(mp_ptr, mp_size_t, mp_limb_t)\n;                    rdi,       rsi,       rdx\n;                    rcx,       rdx,        r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n\tLEAF_PROC mpn_store\n\n\tcmp     rdx, 0\n\tjz      .4\n\tmovq    xmm0, r8\n\tmovddup xmm0, xmm0\n\tlea     rcx, [rcx-16]\n\ttest    rcx, 0xF\n\tjz      .1\n\tmov     [rcx+16], r8\n\tlea     rcx, [rcx+8]\n\tsub     rdx, 1\n.1:\tsub     rdx, 2\n\tjc      .3\n\t\n\txalign  16\n.2:\tlea     rcx, [rcx+16]\n\tsub     rdx, 2\n\tmovdqa  [rcx], xmm0\n\tjnc     .2\n.3:\tjnp     .4\n\tmov     [rcx+16], r8\n.4:\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/sub_n.asm",
    "content": "; PROLOGUE(mpn_sub_n)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  Calculate src1[size] plus(minus) src2[size] and store the result in\n;  dst[size].  The return value is the carry bit from the top of the result\n;  (1 or 0).  The _nc version accepts 1 or 0 for an initial carry into the\n;  low limb of the calculation.  Note values other than 1 or 0 here will\n;  lead to garbage results.\n;\n;  mp_limb_t  mpn_sub_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_sub_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    xalign  8\n    LEAF_PROC mpn_sub_nc\n    mov     r10,[rsp+40]\n    jmp     entry\n\n    xalign  8\n    LEAF_PROC mpn_sub_n\n    xor     r10, r10\nentry:\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    lea     r9,[r10+r9*2]\n\tsar     r9, 1\n    jnz     .2\n\n    mov     r10, [rdx]\n    sbb     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+8]\n    sbb     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+16]\n    sbb     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.1: adc     rax, rax\n    ret\n\n    xalign  8\n.2:\tmov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    sbb     r10, [r8]\n    sbb     r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     r10, [rdx-16]\n    mov     r11, [rdx-8]\n    sbb     r10, [r8-16]\n    sbb     r11, [r8-8]\n    mov     [rcx-16], r10\n    dec     r9\n    mov     [rcx-8], r11\n    jnz     .2\n\n    inc     rax\n    dec     rax\n    jz      .3\n    mov     r10, [rdx]\n    sbb     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+8]\n    sbb      r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+16]\n    sbb     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.3: adc     rax, rax\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/sublsh1_n.asm",
    "content": "; PROLOGUE(mpn_sublsh1_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_sublsh1_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx  \n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_sublsh1_n, 0, reg_save_list\n    mov     rax, r9\n\n    lea     rdi, [rcx+rax*8-56]\n    lea     rsi, [rdx+rax*8-56]\n    lea     rdx, [ r8+rax*8-56]\n    mov     rcx, rax\n\n\txor     rax, rax\n\txor     r10, r10\n\tmov     r8, 3\n\tsub     r8, rcx\n\tjge     .3\n\tadd     r8, 4\n\tmov     r11, [rsi+r8*8+24]\n\tmov     rcx, [rsi+r8*8+16]\n\tmov     r9, [rsi+r8*8]\n\tmov     rbx, [rsi+r8*8+8]\n\tjc      .2\n\t\n\txalign   16\n.1: add     rax, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], r9\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rcx\n\tmov     [rdi+r8*8+24], r11\n\tmov     r11, [rsi+r8*8+56]\n\tmov     rcx, [rsi+r8*8+48]\n\tadd     r8, 4\n\tmov     r9, [rsi+r8*8]\n\tmov     rbx, [rsi+r8*8+8]\n\tjnc     .1\n.2: add     rax, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], r9\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rcx\n\tmov     [rdi+r8*8+24], r11\n.3: cmp     r8, 2\n\tja      .7\n\tjz      .6\n\tjp      .5\n.4: mov     rcx, [rsi+r8*8+48]\n\tmov     r9, [rsi+r8*8+32]\n\tmov     rbx, [rsi+r8*8+40]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rcx, [rdx+r8*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rcx, [rdx+r8*8+48]\n\tmov     [rdi+r8*8+32], r9\n\tmov     [rdi+r8*8+40], rbx\n\tmov     [rdi+r8*8+48], rcx\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign   16\n.5: mov     r9, [rsi+r8*8+32]\n\tmov     rbx, [rsi+r8*8+40]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tmov     [rdi+r8*8+32], r9\n\tmov     [rdi+r8*8+40], rbx\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign   16\n.6: mov     r9, [rsi+r8*8+32]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tmov     [rdi+r8*8+32], r9\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign   16\n.7: add     r10, 1\n.8: sbb     rax, 0\n\tneg     rax\n\tEND_PROC reg_save_list\n\n\tend\n\t"
  },
  {
    "path": "mpn/x86_64w/atom/sublsh_n.asm",
    "content": "; PROLOGUE(mpn_sublsh_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_sublsh_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint, mp_limb_t)\n;  mp_limb_t mpn_sublsh_nc(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                        rdi     rsi     rdx        rcx       r8         r9\n;  rax                        rcx     rdx      r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n\tLEAF_PROC mpn_sublsh_n\n\tmov     r10, r9\n\txor     r9, r9\n    jmp     entry\n    \n\tLEAF_PROC mpn_sublsh_nc\n\tmov     r10, r9\n\tmov     r9, [rsp+48]\nentry:\n\tFRAME_PROC ?mpn_sublsh, 0, reg_save_list\n\tlea     rdi, [rcx+r10*8]\n\tlea     rsi, [rdx+r10*8]\n\tlea     rdx, [r8+r10*8]\n\tmov     ecx, dword [rsp+stack_use+40]\n\t\n\tneg     rcx\n\tshr     r9, cl\n\tneg     r10\n\txor     rax, rax\n\ttest    r10, 3\n\tjz      .2\n.1:\tmov     r8, [rdx+r10*8]\n\tmov     r11, r8\n\tneg     rcx\n\tshl     r8, cl\n\tneg     rcx\n\tshr     r11, cl\n\tor      r8, r9\n\tmov     r9, r11\n\tadd     rax, 1\n\tmov     r11, [rsi+r10*8]\n\tsbb     r11, r8\n\tsbb     rax, rax\n\tmov     [rdi+r10*8], r11\n\tinc     r10\n\ttest    r10, 3\n\tjnz     .1\n.2:\tcmp     r10, 0\n\tjz      .4\n\n\txalign  16\n.3:\tmov     r8, [rdx+r10*8]\n\tmov     rbp, [rdx+r10*8+8]\n\tmov     rbx, [rdx+r10*8+16]\n\tmov     r12, [rdx+r10*8+24]\n\tmov     r11, r8\n\tmov     r13, rbp\n\tmov     r14, rbx\n\tmov     r15, r12\n\tneg     rcx\n\tshl     r8, cl\n\tshl     rbp, cl\n\tshl     rbx, cl\n\tshl     r12, cl\n\tneg     rcx\n\tshr     r11, cl\n\tshr     r13, cl\n\tshr     r14, cl\n\tshr     r15, cl\n\tor      r8, r9\n\tor      rbp, r11\n\tor      rbx, r13\n\tor      r12, r14\n\tmov     r9, r15\n\tadd     rax, 1\n\tmov     r11, [rsi+r10*8]\n\tmov     r13, [rsi+r10*8+8]\n\tmov     r14, [rsi+r10*8+16]\n\tmov     r15, [rsi+r10*8+24]\n\tsbb     r11, r8\n\tsbb     r13, rbp\n\tsbb     r14, rbx\n\tsbb     r15, r12\n\tsbb     rax, rax\n\tmov     [rdi+r10*8], r11\n\tmov     [rdi+r10*8+8], r13\n\tmov     [rdi+r10*8+16], r14\n\tmov     [rdi+r10*8+24], r15\n\tadd     r10, 4\n\tjnz     .3\n.4:\tneg     rax\n\tadd     rax, r9\n    END_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/submul_1.asm",
    "content": "; PROLOGUE(mpn_submul_1)\n\n;  Copyright 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  Copyright 2008, 2009 Brian Gladman\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n; CREDITS\n;\n; The code used here is derived from that provided by ct35z at:\n;\n;    http://www.geocities.jp/ct35z/gmp-core2-en.html\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Jason Worth Martin's excellent assembly support for the Intel64\n; architecture has been used where appropriate.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; Adapted for use with VC++ and YASM using a special mode in which NASM\n; preprocessing is used with AT&T assembler syntax. I am very grateful\n; for the support that Peter Johnson (one of the authors of YASM) has\n; provided in getting this special YASM mode working.  Without his\n; support this port would have been a great deal more difficult.\n;\n; The principle issues that I have had to address is the difference\n; between GCC and MSVC in their register saving and parameter passing\n; conventions.  Registers that have to be preserved across function\n; calls are:\n;\n; GCC:             rbx, rbp, r12..r15\n; MSVC:  rsi, rdi, rbx, rbp, r12..r15 xmm6..xmm15\n;\n; Parameter passing conventions for non floating point parameters:\n;\n;   function(   GCC     MSVC\n;       p1,     rdi      rcx\n;       p2,     rsi      rdx\n;       p3,     rdx       r8\n;       p4,     rcx       r9\n;       p5,      r8 [rsp+40]\n;       p6,      r9 [rsp+48]\n;\n; Care must be taken with 32-bit values in 64-bit register or on the\n; stack because the upper 32-bits of such parameters are undefined.\n;\n;       Brian Gladman\n;\n; Intel64 mpn_addmul_1 -- Multiply a limb vector with a limb and\n; add the result to a second limb vector.\n;\n; Calculate src[size] multiplied by mult[1] and add to /subtract from dst[size] and\n; return the carry or borrow from the top of the result\n;\n; BPL is bytes per limb, which is 8 in the 64-bit code here\n\n;  mp_limb_t mpn_submul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_declsh_n(mp_ptr, mp_ptr, mp_size_t,   mp_uint)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8         r9\n;\n\n%define BPL                 8\n%define UNROLL_EXPONENT     4\n%define UNROLL_SIZE         (1 << UNROLL_EXPONENT)\n%define UNROLL_MASK         (UNROLL_SIZE - 1)\n%define ADDR(p,i,d)         (d*BPL)(p, i, BPL)\n\n; Register  Usage\n; --------  -----\n; rax    low word from mul\n; rbx\n; rcx    s2limb\n; rdx    high word from mul\n; rsi    s1p\n; rdi    rp\n; rbp    Base Pointer\n; rsp    Stack Pointer\n; r8     A_x\n; r9     A_y\n; r10    A_z\n; r11    B_x\n; r12    B_y\n; r13    B_z\n; r14    temp\n; r15    index\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list       rsi, rdi, r12, r13, r14, r15\n\n%define s2limb  rcx\n%define s1p     rsi\n%define rp      rdi\n%define a_x      r8\n%define a_y      r9\n%define a_z     r10\n%define b_x     r11\n%define b_y     r12\n%define b_z     r13\n%define temp    r14\n%define index   r15\n\n    LEAF_PROC mpn_submul_1\n    xor     a_z, a_z\n    jmp     entry\n\n    LEAF_PROC mpn_submul_1c\n    mov     a_z, [rsp+0x28]\nentry:\n    FRAME_PROC ?mpn_atom_submul, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    xor     rdx, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n\n    lea     s1p, [s1p+rdx*8]\n    lea     rp, [rp+rdx*8]\n    xor     index, index\n    sub     index, rdx\n    cmp     rdx, 4\n    jge     .6\n    lea     rax, [rel .1]\n    add     rax, [rax+rdx*8]\n    jmp     rax\n\n    xalign  8\n.1:\tdq      .2 - .1\n    dq      .3 - .1\n    dq      .4 - .1\n    dq      .5 - .1\n.2:\tmov     rax, a_z\n\tEXIT_PROC reg_save_list\n\n.3:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    sub      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8], a_z\n    adc     rax, rdx\n\tEXIT_PROC reg_save_list\n\n.4:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    sub      a_z, a_x\n    adc     rax, a_y\n    mov     [rp+index*8], a_z\n    adc     rdx, 0\n    sub      b_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+8], b_z\n    adc     rax, rdx\n\tEXIT_PROC reg_save_list\n\n.5:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+16]\n    mul     s2limb\n    sub      a_z, a_x\n    adc     b_x, a_y\n    mov     [rp+index*8], a_z\n    mov     a_z, [rp+index*8+16]\n    adc     b_y, 0\n    sub      b_z, b_x\n    adc     rax, b_y\n    mov     [rp+index*8+8], b_z\n    adc     rdx, 0\n    sub      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+16], a_z\n    adc     rax, rdx\n\tEXIT_PROC reg_save_list\n\n.6:\tmov     temp, rdx\n    test    rdx, 1\n    jz      .7\n    mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    jmp     .8\n\n.7:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     b_z, [rp+index*8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     a_z, [rp+index*8+8]\n    mov     a_x, rax\n    mov     a_y, rdx\n.8:\tsub     temp, 4\n    and     temp, UNROLL_MASK\n    inc     temp\n    mov     rax, (.10 - .9) >> UNROLL_EXPONENT\n    mul     temp\n    lea     rdx, [rel .10]\n    sub     rdx, rax\n    mov     rax, [s1p+index*8+16]\n    lea     index, [index+temp+3-UNROLL_SIZE]\n    jmp     rdx\n\n%macro seq_1 7\n    mul     s2limb\n    %7      %3, %1\n    lea     %1, [rax]\n    mov     rax, [byte s1p+index*8+8*%6]\n    adc     %4, %2\n    mov     [byte rp+index*8+8*(%6-3)], %3\n    mov     %3, [byte rp+index*8+8*(%6-1)]\n    lea     %2, [rdx]\n    adc     %5, 0\n%endmacro\n\n   xalign 16\n.9:\n%assign i 0\n%rep    16\n    %if (i & 1)\n        seq_1   b_x, b_y, b_z, a_x, a_y, i, sub\n    %else\n        seq_1   a_x, a_y, a_z, b_x, b_y, i, sub\n    %endif\n%assign i i + 1\n%endrep\n.10:add     index, UNROLL_SIZE\n    jnz     .9\n.11:mul     s2limb\n    sub      a_z, a_x\n    mov     [rp+index*8-24], a_z\n    mov     a_z, [rp+index*8-8]\n    adc     b_x, a_y\n    adc     b_y, 0\n    sub      b_z, b_x\n    mov     [rp+index*8-16], b_z\n    adc     rax, b_y\n    adc     rdx, 0\n    sub      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8-8], a_z\n    adc     rax, rdx\n.12:END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/atom/xnor_n.asm",
    "content": "; PROLOGUE(mpn_xnor_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_xnor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_xnor_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    xor     r10, [rdx+r9*8+24]\n    xor     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    xor     T3, [rdx+r9*8+8]\n    xor     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/add_err1_n.asm",
    "content": "; PROLOGUE(mpn_add_err1_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_add_err1(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_ptr, mp_size_t, mp_limb_t)\n;  rax                       rdi     rsi     rdx     rcx       r8         r9     8(rsp)\n;  rax                       rcx     rdx      r8      r9 [rsp+40]   [rsp+48]   [rsp+56]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    FRAME_PROC mpn_add_err1_n, 0, reg_save_list\n\tmov     [rsp+stack_use+32], r9\n    mov     r9, qword [rsp+stack_use+48]\n\tmov     r10, [rsp++stack_use+56]\n\tlea     rdi, [rcx+r9*8-24]\n\tlea     rsi, [rdx+r9*8-24]\n\tlea     rdx, [r8+r9*8-24]\n\tmov     r8, [rsp+stack_use+40]\n\t\n\tmov     r11, 3\n\tshl     r10, 63\n\tlea     r8, [r8+r9*8]\n\tsub     r11, r9\n\tmov     r9, 0\n\tmov     rax, 0\n\tmov     rbx, 0\n\tjnc     .2\n\n\txalign  16\n.1:\tmov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tmov     r14, [rsi+r11*8+16]\n\tmov     r15, [rsi+r11*8+24]\n\tmov     rbp, 0\n\tshl     r10, 1\n\tadc     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tadc     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\tmov     rcx, 0\n\tadc     r14, [rdx+r11*8+16]\n\tcmovc   rcx, [r8-24]\n\tadc     r15, [rdx+r11*8+24]\n\tcmovc   rbp, [r8-32]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tadd     r9, rcx\n\tmov     rax, 0\n\tadc     r10, 0\n\tlea     r8, [r8-32]\n\tadd     r9, rbp\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tmov     [rdi+r11*8+16], r14\n\tmov     [rdi+r11*8+24], r15\n\tmov     rbx, 0\n\tadd     r11, 4\n\tjnc     .1\n.2: cmp     r11, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3: mov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tmov     r14, [rsi+r11*8+16]\n\tshl     r10, 1\n\tadc     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tadc     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\tmov     rcx, 0\n\tadc     r14, [rdx+r11*8+16]\n\tcmovc   rcx, [r8-24]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tadd     r9, rcx\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tmov     [rdi+r11*8+16], r14\n    jmp     .6\n    \n\txalign  16\n.4: mov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tshl     r10, 1\n\tadc     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tadc     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tjmp     .6\n\n\txalign  16\n.5: mov     r12, [rsi+r11*8]\n\tshl     r10, 1\n\tadc     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n.6:\tmov     rcx, [rsp+stack_use+32]\n\tmov     [rcx], r9\n\tbtr     r10, 63\n\tmov     [rcx+8], r10\n    mov     rax, 0\n\tadc     rax, 0\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/add_n.asm",
    "content": "; PROLOGUE(mpn_add_n)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  Calculate src1[size] plus(minus) src2[size] and store the result in\n;  dst[size].  The return value is the carry bit from the top of the result\n;  (1 or 0).  The _nc version accepts 1 or 0 for an initial carry into the\n;  low limb of the calculation.  Note values other than 1 or 0 here will\n;  lead to garbage results.\n;\n;  mp_limb_t  mpn_add_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_add_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    xalign  8\n    LEAF_PROC mpn_add_nc\n    mov     r10,[rsp+40]\n    jmp     entry\n\n    xalign  8\n    LEAF_PROC mpn_add_n\n    xor     r10, r10\nentry:\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    lea     r9,[r10+r9*2]\n\tsar     r9, 1\n    jnz     .2\n\n    mov     r10, [rdx]\n    adc     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+8]\n    adc     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+16]\n    adc     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.1: adc     rax, rax\n    ret\n\n    xalign  8\n.2: mov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    adc     r10, [r8]\n    adc     r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     r10, [rdx-16]\n    mov     r11, [rdx-8]\n    adc     r10, [r8-16]\n    adc     r11, [r8-8]\n    mov     [rcx-16], r10\n    dec     r9\n    mov     [rcx-8], r11\n    jnz     .2\n\n    inc     rax\n    dec     rax\n    jz      .3\n    mov     r10, [rdx]\n    adc     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+8]\n    adc     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+16]\n    adc     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.3: adc     rax, rax\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/addadd_n.asm",
    "content": "; PROLOGUE(mpn_addadd_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_addadd_n, 0, reg_save_list\n    mov     rbx, qword [rsp+stack_use+40]\n\tlea     rdi, [rcx+rbx*8-56]\n\tlea     rsi, [rdx+rbx*8-56]\n\tlea     rdx, [r8+rbx*8-56]\n\tlea     rcx, [r9+rbx*8-56]\n\tmov     r9, 3\n\txor     rax, rax\n\txor     r10, r10\n\tsub     r9, rbx\n\tjge     .3\n\tadd     r9, 4\n\tmov     rbp, [rdx+r9*8+16]\n\tmov     r11, [rdx+r9*8+24]\n\tmov     r8, [rdx+r9*8]\n\tmov     rbx, [rdx+r9*8+8]\n\tjc      .2\n\t\n\txalign  16\n.1:\tadd     rax, 1\n\tadc     r8, [rcx+r9*8]\n\tadc     rbx, [rcx+r9*8+8]\n\tadc     rbp, [rcx+r9*8+16]\n\tadc     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8]\n\tadc     rbx, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r8\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], rbx\n\tmov     [rdi+r9*8+16], rbp\n\tmov     rbp, [rdx+r9*8+48]\n\tmov     r11, [rdx+r9*8+56]\n\tadd     r9, 4\n\tmov     r8, [rdx+r9*8]\n\tmov     rbx, [rdx+r9*8+8]\n\tjnc     .1\n.2:\tadd     rax, 1\n\tadc     r8, [rcx+r9*8]\n\tadc     rbx, [rcx+r9*8+8]\n\tadc     rbp, [rcx+r9*8+16]\n\tadc     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8]\n\tadc     rbx, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r8\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], rbx\n\tmov     [rdi+r9*8+16], rbp\n.3:\tcmp     r9, 2\n\tja      .7\n\tjz      .6\n\tjp      .5\n.4:\tmov     rbp, [rdx+r9*8+48]\n\tmov     r8, [rdx+r9*8+32]\n\tmov     rbx, [rdx+r9*8+40]\n\tadd     rax, 1\n\tadc     r8, [rcx+r9*8+32]\n\tadc     rbx, [rcx+r9*8+40]\n\tadc     rbp, [rcx+r9*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8+32]\n\tadc     rbx, [rsi+r9*8+40]\n\tadc     rbp, [rsi+r9*8+48]\n\tmov     [rdi+r9*8+32], r8\n\tmov     [rdi+r9*8+40], rbx\n\tmov     [rdi+r9*8+48], rbp\n\tsbb     rax, 0\n\tneg     rax\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     r8, [rdx+r9*8+32]\n\tmov     rbx, [rdx+r9*8+40]\n\tadd     rax, 1\n\tadc     r8, [rcx+r9*8+32]\n\tadc     rbx, [rcx+r9*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8+32]\n\tadc     rbx, [rsi+r9*8+40]\n\tmov     [rdi+r9*8+32], r8\n\tmov     [rdi+r9*8+40], rbx\n\tsbb     rax, 0\n\tneg     rax\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tmov     r8, [rdx+r9*8+32]\n\tadd     rax, 1\n\tadc     r8, [rcx+r9*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8+32]\n\tmov     [rdi+r9*8+32], r8\n\tsbb     rax, 0\n\tneg     rax\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.7:\tadd     rax, r10\n.8:\tneg     rax\n    END_PROC reg_save_list\n    \n\tend\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/addlsh1_n.asm",
    "content": "; PROLOGUE(mpn_addlsh1_n)\n\n;  Copyright 2008 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addlsh1_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\t\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_addlsh1_n, 0, reg_save_list\n    mov     rax, r9\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n\tlea     rdx, [r8+rax*8]\n    mov     rcx, rax\n\tneg     rcx\n\txor     r9, r9\n\txor     rax, rax\n\ttest    rcx, 3\n\tjz      .2\n.1:\tmov     r10, [rdx+rcx*8]\n\tadd     r9, 1\n\tadc     r10, r10\n\tsbb     r9, r9\n\tadd     rax, 1\n\tadc     r10, [rsi+rcx*8]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tinc     rcx\n\ttest    rcx, 3\n\tjnz     .1\n.2:\tcmp     rcx, 0\n\tjz      .4\n\t\n\txalign  16\n.3:\tmov     r10, [rdx+rcx*8]\n\tmov     rbx, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8+16]\n\tmov     r8, [rdx+rcx*8+24]\n\tadd     r9, 1\n\tadc     r10, r10\n\tadc     rbx, rbx\n\tadc     r11, r11\n\tadc     r8, r8\n\tsbb     r9, r9\n\tadd     rax, 1\n\tadc     r10, [rsi+rcx*8]\n\tadc     rbx, [rsi+rcx*8+8]\n\tadc     r11, [rsi+rcx*8+16]\n\tadc     r8, [rsi+rcx*8+24]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tmov     [rdi+rcx*8+8], rbx\n\tmov     [rdi+rcx*8+16], r11\n\tmov     [rdi+rcx*8+24], r8\n\tadd     rcx, 4\n\tjnz     .3\n.4:\tadd     rax, r9\n\tneg     rax\n    END_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/addlsh_n.asm",
    "content": "; PROLOGUE(mpn_addlsh_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_addlsh_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint, mp_limb_t)\n;  mp_limb_t mpn_addlsh_nc(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                        rdi     rsi     rdx        rcx       r8         r9\n;  rax                        rcx     rdx      r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n\tLEAF_PROC mpn_addlsh_n\n\tmov     r10, r9\n\txor     r9, r9\n    jmp     entry\n    \n\tLEAF_PROC mpn_addlsh_nc\n\tmov     r10, r9\n\tmov     r9, [rsp+48]\n    jmp     entry\n    \n    xalign 16\nentry:\n\tFRAME_PROC ?mpn_addlsh, 0, reg_save_list\n\tlea     rdi, [rcx+r10*8]\n\tlea     rsi, [rdx+r10*8]\n\tlea     rdx, [r8+r10*8]\n\tmov     ecx, dword [rsp+stack_use+40]\n\n\tneg     rcx\n\tshr     r9, cl\n\tneg     r10\n\txor     rax, rax\n\ttest    r10, 3\n\tjz      .2\n.1:\tmov     r8, [rdx+r10*8]\n\tmov     r11, r8\n\tneg     rcx\n\tshl     r8, cl\n\tneg     rcx\n\tshr     r11, cl\n\tor      r8, r9\n\tmov     r9, r11\n\tadd     rax, 1\n\tadc     r8, [rsi+r10*8]\n\tsbb     rax, rax\n\tmov     [rdi+r10*8], r8\n\tinc     r10\n\ttest    r10, 3\n\tjnz     .1\n.2:\tcmp     r10, 0\n\tjz      .4\n\t\n\txalign  16\n.3:\tmov     r8, [rdx+r10*8]\n\tmov     rbp, [rdx+r10*8+8]\n\tmov     rbx, [rdx+r10*8+16]\n\tmov     r12, [rdx+r10*8+24]\n\tmov     r11, r8\n\tmov     r13, rbp\n\tmov     r14, rbx\n\tmov     r15, r12\n\tneg     rcx\n\tshl     r8, cl\n\tshl     rbp, cl\n\tshl     rbx, cl\n\tshl     r12, cl\n\tneg     rcx\n\tshr     r11, cl\n\tshr     r13, cl\n\tshr     r14, cl\n\tshr     r15, cl\n\tor      r8, r9\n\tor      rbp, r11\n\tor      rbx, r13\n\tor      r12, r14\n\tmov     r9, r15\n\tadd     rax, 1\n\tadc     r8, [rsi+r10*8]\n\tadc     rbp, [rsi+r10*8+8]\n\tadc     rbx, [rsi+r10*8+16]\n\tadc     r12, [rsi+r10*8+24]\n\tsbb     rax, rax\n\tmov     [rdi+r10*8], r8\n\tmov     [rdi+r10*8+8], rbp\n\tmov     [rdi+r10*8+16], rbx\n\tmov     [rdi+r10*8+24], r12\n\tadd     r10, 4\n\tjnz     .3\n.4:\tneg     rax\n\tadd     rax, r9\n    END_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/addmul_1.asm",
    "content": "; PROLOGUE(mpn_addmul_1)\n\n;  Copyright 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  Copyright 2008, 2009 Brian Gladman\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n; CREDITS\n;\n; The code used here is derived from that provided by ct35z at:\n;\n;    http://www.geocities.jp/ct35z/gmp-core2-en.html\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Jason Worth Martin's excellent assembly support for the Intel64\n; architecture has been used where appropriate.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; Adapted for use with VC++ and YASM using a special mode in which NASM\n; preprocessing is used with AT&T assembler syntax. I am very grateful\n; for the support that Peter Johnson (one of the authors of YASM) has\n; provided in getting this special YASM mode working.  Without his\n; support this port would have been a great deal more difficult.\n;\n; The principle issues that I have had to address is the difference\n; between GCC and MSVC in their register saving and parameter passing\n; conventions.  Registers that have to be preserved across function\n; calls are:\n;\n; GCC:             rbx, rbp, r12..r15\n; MSVC:  rsi, rdi, rbx, rbp, r12..r15 xmm6..xmm15\n;\n; Parameter passing conventions for non floating point parameters:\n;\n;   function(   GCC     MSVC\n;       p1,     rdi      rcx\n;       p2,     rsi      rdx\n;       p3,     rdx       r8\n;       p4,     rcx       r9\n;       p5,      r8 [rsp+40]\n;       p6,      r9 [rsp+48]\n;\n; Care must be taken with 32-bit values in 64-bit register or on the\n; stack because the upper 32-bits of such parameters are undefined.\n;\n;       Brian Gladman\n;\n; Calculate src[size] multiplied by mult[1] and add to /subtract from dst[size] and\n; return the carry or borrow from the top of the result\n;\n; BPL is bytes per limb, which is 8 in the 64-bit code here\n;\n;  mp_limb_t mpn_addmul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_inclsh_n(mp_ptr, mp_ptr, mp_size_t,   mp_uint)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8        r9d\n\n%define BPL                 8\n%define UNROLL_EXPONENT     4\n%define UNROLL_SIZE         (1 << UNROLL_EXPONENT)\n%define UNROLL_MASK         (UNROLL_SIZE - 1)\n%define ADDR(p,i,d)         (d*BPL)(p, i, BPL)\n\n; Register  Usage\n; --------  -----\n; rax    low word from mul\n; rbx\n; rcx    s2limb\n; rdx    high word from mul\n; rsi    s1p\n; rdi    rp\n; rbp    Base Pointer\n; rsp    Stack Pointer\n; r8     A_x\n; r9     A_y\n; r10    A_z\n; r11    B_x\n; r12    B_y\n; r13    B_z\n; r14    temp\n; r15    index\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list       rsi, rdi, r12, r13, r14, r15\n\n%define s2limb  rcx\n%define s1p     rsi\n%define rp      rdi\n%define a_x      r8\n%define a_y      r9\n%define a_z     r10\n%define b_x     r11\n%define b_y     r12\n%define b_z     r13\n%define temp    r14\n%define index   r15\n\n    CPU  Core2\n    BITS 64\n\n    LEAF_PROC mpn_addmul_1\n    xor     a_z, a_z\n    jmp     entry\n\n    LEAF_PROC mpn_addmul_1c\n    mov     a_z, [rsp+0x28]\nentry:\n    FRAME_PROC ?mpn_bobcat_addmul, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    xor     rdx, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n\n    lea     s1p, [s1p+rdx*8]\n    lea     rp, [rp+rdx*8]\n    xor     index, index\n    sub     index, rdx\n    cmp     rdx, 4\n    jge     .6\n    lea     rax, [rel .1]\n    add     rax, [rax+rdx*8]\n    jmp     rax\n\n    xalign  8\n.1:\tdq      .2 - .1\n    dq      .3 - .1\n    dq      .4 - .1\n    dq      .5 - .1\n.2:\tmov     rax, a_z\n    EXIT_PROC reg_save_list\n\n.3:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    add      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8], a_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.4: mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    add      a_z, a_x\n    adc     rax, a_y\n    mov     [rp+index*8], a_z\n    adc     rdx, 0\n    add      b_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+8], b_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.5: mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+16]\n    mul     s2limb\n    add      a_z, a_x\n    adc     b_x, a_y\n    mov     [rp+index*8], a_z\n    mov     a_z, [rp+index*8+16]\n    adc     b_y, 0\n    add      b_z, b_x\n    adc     rax, b_y\n    mov     [rp+index*8+8], b_z\n    adc     rdx, 0\n    add      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+16], a_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.6: mov     temp, rdx\n    test    rdx, 1\n    jz      .7\n    mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    jmp     .8\n.7:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     b_z, [rp+index*8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     a_z, [rp+index*8+8]\n    mov     a_x, rax\n    mov     a_y, rdx\n.8:\tsub     temp, 4\n    and     temp, UNROLL_MASK\n    inc     temp\n    mov     rax, (.10 - .9) >> UNROLL_EXPONENT\n    mul     temp\n    lea     rdx, [rel .10]\n    sub     rdx, rax\n    mov     rax, [s1p+index*8+16]\n    lea     index, [index+temp+3-UNROLL_SIZE]\n    jmp     rdx\n\n%macro seq_1 7\n    mul     s2limb\n    %7      %3, %1\n    lea     %1, [rax]\n    mov     rax, [byte s1p+index*8+8*%6]\n    adc     %4, %2\n    mov     [byte rp+index*8+8*(%6-3)], %3\n    mov     %3, [byte rp+index*8+8*(%6-1)]\n    lea     %2, [rdx]\n    adc     %5, 0\n%endmacro\n\n   xalign 16\n.9:\n%assign i 0\n%rep    16\n    %if (i & 1)\n        seq_1   b_x, b_y, b_z, a_x, a_y, i, add\n    %else\n        seq_1   a_x, a_y, a_z, b_x, b_y, i, add\n    %endif\n%assign i i + 1\n%endrep\n.10:add     index, UNROLL_SIZE\n    jnz     .9\n.11:mul     s2limb\n    add      a_z, a_x\n    mov     [rp+index*8-24], a_z\n    mov     a_z, [rp+index*8-8]\n    adc     b_x, a_y\n    adc     b_y, 0\n    add      b_z, b_x\n    mov     [rp+index*8-16], b_z\n    adc     rax, b_y\n    adc     rdx, 0\n    add      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8-8], a_z\n    adc     rax, rdx\n.12:END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/addsub_n.asm",
    "content": "; PROLOGUE(mpn_addsub_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addsub_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_addsub_n, 0, reg_save_list\n    mov     rbx, qword [rsp+stack_use+40]\n\tlea     rdi, [rcx+rbx*8-56]\n\tlea     rsi, [rdx+rbx*8-56]\n\tlea     rdx, [r8+rbx*8-56]\n\tlea     rcx, [r9+rbx*8-56]\n\tmov     r9, 3\n\txor     rax, rax\n\txor     r10, r10\n\tsub     r9, rbx\n\tjge     .3\n\tadd     r9, 4\n\tmov     rbp, [rdx+r9*8+16]\n\tmov     r11, [rdx+r9*8+24]\n\tmov     r8, [rdx+r9*8]\n\tmov     rbx, [rdx+r9*8+8]\n\tjc      .2\n\t\n\txalign  16\n.1:\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8]\n\tsbb     rbx, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8]\n\tadc     rbx, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r8\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], rbx\n\tmov     [rdi+r9*8+16], rbp\n\tmov     rbp, [rdx+r9*8+48]\n\tmov     r11, [rdx+r9*8+56]\n\tadd     r9, 4\n\tmov     r8, [rdx+r9*8]\n\tmov     rbx, [rdx+r9*8+8]\n\tjnc     .1\n.2:\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8]\n\tsbb     rbx, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8]\n\tadc     rbx, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r8\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], rbx\n\tmov     [rdi+r9*8+16], rbp\n.3:\tcmp     r9, 2\n\tja      .7\n\tjz      .6\n\tjp      .5\n.4:\tmov     rbp, [rdx+r9*8+48]\n\tmov     r8, [rdx+r9*8+32]\n\tmov     rbx, [rdx+r9*8+40]\n\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8+32]\n\tsbb     rbx, [rcx+r9*8+40]\n\tsbb     rbp, [rcx+r9*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8+32]\n\tadc     rbx, [rsi+r9*8+40]\n\tadc     rbp, [rsi+r9*8+48]\n\tmov     [rdi+r9*8+32], r8\n\tmov     [rdi+r9*8+40], rbx\n\tmov     [rdi+r9*8+48], rbp\n\tadc     rax, 0\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     r8, [rdx+r9*8+32]\n\tmov     rbx, [rdx+r9*8+40]\n\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8+32]\n\tsbb     rbx, [rcx+r9*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8+32]\n\tadc     rbx, [rsi+r9*8+40]\n\tmov     [rdi+r9*8+32], r8\n\tmov     [rdi+r9*8+40], rbx\n\tadc     rax, 0\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tmov     r8, [rdx+r9*8+32]\n\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8+32]\n\tmov     [rdi+r9*8+32], r8\n\tadc     rax, 0\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.7:\tsub     rax, r10\n.8:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/and_n.asm",
    "content": "; PROLOGUE(mpn_and_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_and_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rdi        rsi        rdx        rcx\n;                    rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_and_n\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    jz      .2\n\n    xalign  8\n.1: mov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    and     r10, [r8]\n    and     r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     T3, [rdx-16]\n    mov     T4, [rdx-8]\n    and     T3, [r8-16]\n    and     T4, [r8-8]\n    mov     [rcx-16], T3\n    dec     r9\n    mov     [rcx-8], T4\n    jnz     .1\n.2: inc     rax\n    dec     rax\n    jz      .3\n    mov     r10, [rdx]\n    and     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+8]\n    and     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+16]\n    and     r10, [r8+16]\n    mov     [rcx+16], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/andn_n.asm",
    "content": "; PROLOGUE(mpn_andn_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_andn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_andn_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1:\tmov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    and     r10, [rdx+r9*8+24]\n    and     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    and     T3, [rdx+r9*8+8]\n    and     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/com_n.asm",
    "content": "; PROLOGUE(mpn_com_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_com_n(mp_ptr, mp_ptr, mp_size_t)\n;                     rdi     rsi       rdx\n;                     rcx     rdx        r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n    LEAF_PROC mpn_com_n\n    mov     rax, r8\n    sub     rax, 4\n    jc      .2\n\n    xalign  8\n.1: mov     r8, [rdx+rax*8+24]\n    mov     r9, [rdx+rax*8+16]\n    not     r8\n    not     r9\n    mov     [rcx+rax*8+24], r8\n    mov     [rcx+rax*8+16], r9\n    mov     r8, [rdx+rax*8+8]\n    mov     r9, [rdx+rax*8]\n    not     r8\n    not     r9\n    mov     [rcx+rax*8+8], r8\n    mov     [rcx+rax*8], r9\n    sub     rax, 4\n    jae     .1\n.2: add     rax, 4\n    jz      .3\n\n; Could still have potential cache-bank conflicts in this tail part\n\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n    dec     rax\n    jz      .3\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n    dec     rax\n    jz      .3\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/copyd.asm",
    "content": "; PROLOGUE(mpn_copyd)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_copyd(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi     rsi        rdx\n;                    rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n;\tmpn_copyd(mp_ptr rcx ,mp_ptr rdx ,mp_size_t r8)\n;\t(rcx,r8)=(rdx,r8)\n\n\tLEAF_PROC mpn_copyd\n\tlea     rdx, [rdx+r8*8-8]\n\tlea     rcx, [rcx+r8*8-8]\n\tsub     r8, 4\n\tjl      .2\n\t\n\txalign  16\n.1:\tmov     rax, [rdx]\n\tmov     r9, [rdx-8]\n\tmov     r10, [rdx-16]\n\tlea     rcx, [rcx-32]\n\tmov     r11, [rdx-24]\n\tmov     [rcx+32], rax\n\tsub     r8, 4\n\tmov     [rcx+24], r9\n\tmov     [rcx+16], r10\n\tlea     rdx, [rdx-32]\n\tmov     [rcx+8], r11\n\tjns     .1\n.2:\tadd     r8, 2\n\tjz      .5\n\tjns     .6\n\tjp      .4\n.3:\tret\n\t\n\txalign  16\n.4:\tmov     rax, [rdx]\n\tmov     [rcx], rax\n\tret\n\t\n\txalign  16\n.5:\tmov     rax, [rdx]\n\tmov     r9, [rdx-8]\n\tmov     [rcx], rax\n\tmov     [rcx-8], r9\n\tret\n\t\n\txalign  16\n.6:\tmov     rax, [rdx]\n\tmov     r9, [rdx-8]\n\tmov     r10, [rdx-16]\n\tmov     [rcx], rax\n\tmov     [rcx-8], r9\n\tmov     [rcx-16], r10\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/copyi.asm",
    "content": "; PROLOGUE(mpn_copyi)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_copyi(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi     rsi        rdx\n;                    rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n;\tmpn_copyi(mp_ptr rcx ,mp_ptr rdx ,mp_size_t r8)\n;\t(rcx,r8)=(rcx,r8)\n\n\tLEAF_PROC mpn_copyi\n\tsub     r8, 4\n\tjl      .2\n\t\n\txalign  16\n.1:\tmov     rax, [rdx]\n\tmov     r9, [rdx+8]\n\tsub     r8, 4\n\tmov     r10, [rdx+16]\n\tmov     r11, [rdx+24]\n\tlea     rcx, [rcx+32]\n\tmov     [rcx-32], rax\n\tmov     [rcx-24], r9\n\tmov     [rcx-16], r10\n\tlea     rdx, [rdx+32]\n\tmov     [rcx-8], r11\n\tjns     .1\n.2:\tadd     r8, 2\n\tjz      .5\n\tjns     .6\n\tjp      .4\n.3:\tret\n\t\n\txalign  16\n.4:\tmov     rax, [rdx]\n\tmov     [rcx], rax\n\tret\n\t\n\txalign  16\n.5:\tmov     rax, [rdx]\n\tmov     r9, [rdx+8]\n\tmov     [rcx], rax\n\tmov     [rcx+8], r9\n\tret\n\t\n\txalign  16\n.6:\tmov     rax, [rdx]\n\tmov     r9, [rdx+8]\n\tmov     r10, [rdx+16]\n\tmov     [rcx], rax\n\tmov     [rcx+8], r9\n\tmov     [rcx+16], r10\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/divexact_byff.asm",
    "content": "; PROLOGUE(mpn_divexact_byff)\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divexact_byff(mp_ptr, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi         rdx\n;  rax                           rcx     rdx          r8 \n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_divexact_byff\n\tmov     rax, 3\n\tand     rax, r8\n\tmov     [rsp+24], rax\n\txor     eax, eax\n\tshr     r8, 2\n\tcmp     r8, 0\n\tje      .2\n; want carry clear here\n\txalign  16\n.1:\tsbb     rax, [rdx]\n\tlea     rcx, [rcx+32]\n\tmov     r9, rax\n\tsbb     rax, [rdx+8]\n\tmov     r10, rax\n\tsbb     rax, [rdx+16]\n\tmov     r11, rax\n\tsbb     rax, [rdx+24]\n\tdec     r8\n\tmov     [rcx-32], r9\n\tmov     [rcx-24], r10\n\tmov     [rcx-16], r11\n\tmov     [rcx-8], rax\n\tlea     rdx, [rdx+32]\n\tjnz     .1\n.2:\tmov     r8, [rsp+24]\n; dont want to change the carry\n\tinc     r8\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx]\n\tmov     [rcx], rax\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx+8]\n\tmov     [rcx+8], rax\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx+16]\n\tmov     [rcx+16], rax\n.3:\tsbb     rax, 0\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/divrem_hensel_qr_1_1.asm",
    "content": "; PROLOGUE(mpn_divrem_hensel_qr_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_divrem_hensel_qr_1_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                                    rdi     rsi        rdx        rcx\n;  rax                                    rcx     rdx         r8         r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi\n\n    FRAME_PROC mpn_divrem_hensel_qr_1_1, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n\tmov     rcx, r9\n\tmov     r9, 0\n\tsub     r9, rax\n\t\n\tmov     rdx, rcx\n\t\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\txor     rdx, rdx\n\n\txalign  16\n.1:\tmov     rax, [rsi+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmov     [rdi+r9*8], rax\n\tmul     rcx\n\tadd     r8, 1\n\tinc     r9\n\tjnz     .1\n\tmov     rax, 0\n\tadc     rax, rdx\n\tEND_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/divrem_hensel_qr_1_2.asm",
    "content": "; PROLOGUE(mpn_divrem_hensel_qr_1_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divrem_hensel_qr_1_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                                   rdi     rsi        rdx        rcx\n;  rax                                   rcx     rdx         r8         r9\n\t\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_divrem_hensel_qr_1_2, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8-8]\n\tlea     rsi, [rdx+rax*8-8]\n    mov     rcx, r9\n\tmov     rdx, r9\n\tmov     r9, 1\n\tsub     r9, rax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, r11\n\tmov     r12, r11\n\tmul     rcx\n\tneg     rdx\n\timul    r12, rdx\n\n\tmov     r13, r11\n\tmov     r14, r12\n\n\tmov     r11, [rsi+r9*8]\n\tmov     r12, [rsi+r9*8+8]\n\tmov     r10, 0\n\tadd     r9, 2\n\tjc      .2\n\t\n\txalign  16\n.1:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\tmov     [rdi+r9*8-16], rax\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tmov     r11, [rsi+r9*8]\n\tmov     r12, [rsi+r9*8+8]\n\tmov     [rdi+r9*8-8], rdx\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r12, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, 0\n\tadd     r9, 2\n\tjnc     .1\n.2:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\tmov     [rdi+r9*8-16], rax\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tcmp     r9, 0\n\tjne     .4\n.3:\tmov     r11, [rsi+r9*8]\n\tmov     [rdi+r9*8-8], rdx\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r10, 0\n\tmov     rax, r11\n\timul    rax, r13\n\tmov     [rdi+r9*8], rax\n\tmul     rcx\n\tadd     r10, r10\n\tmov     rax, 0\n\tadc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.4:\tmov     [rdi+r9*8-8], rdx\n\tmov     rax, rcx\n\tmul     rdx\n\tcmp     r8, rax\n\tmov     rax, 0\n\tadc     rax, rdx\n\tsub     rax, r10\n.5:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/divrem_hensel_r_1.asm",
    "content": "; PROLOGUE(mpn_divrem_hensel_r_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_divrem_hensel_r_1(mp_ptr, mp_size_t, mp_limb_t)\n;  rax                                 rdi        rsi        rdx\n;  rax                                 rcx        rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n\tLEAF_PROC mpn_divrem_hensel_r_1\n    movsxd  rax, edx\n\tmov     rdx, r8\n\tlea     r10, [rcx+rax*8]\n\tmov     r9, 0\n\tsub     r9, rax\n\n    mov     rcx, rdx    \n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\txor     rdx, rdx\n\n\txalign  16\n.1:\tmov     rax, [r10+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmul     rcx\n\tadd     r8, 1\n\tinc     r9\n\tjnz     .1\n\tmov     rax, 0\n\tadc     rax, rdx\n\tret\n\t\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/hamdist.asm",
    "content": "; PROLOGUE(mpn_hamdist)\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmp_limb_t mpn_hamdist(mp_ptr, mp_ptr, mp_size_t)\n;\trax                      rdi,    rdx,       rdx\n;\trax                      rcx,    rdx,        r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  SSE4.2\n    BITS 64\n\n    FRAME_PROC mpn_hamdist, 0, rdi\n\txor     eax, eax\n\tlea     rdi, [rcx+r8*8-24]\n\tlea     rdx, [rdx+r8*8-24]\n\tmov     rcx, 3\n\tsub     rcx, r8\n\tjnc     .1\n\txalign  16\n.0:\tmov     r8, [rdi+rcx*8]\n\tmov     r9, [rdi+rcx*8+8]\n\txor     r8, [rdx+rcx*8]\n\tmov     r10, [rdi+rcx*8+16]\n\tpopcnt  r8, r8\n\txor     r9, [rdx+rcx*8+8]\n\txor     r10, [rdx+rcx*8+16]\n\tpopcnt  r9, r9\n\tmov     r11, [rdi+rcx*8+24]\n\tadd     rax, r8\n\tpopcnt  r10, r10\n\txor     r11, [rdx+rcx*8+24]\n\tadd     rax, r9\n\tpopcnt  r11, r11\n\tadd     rax, r10\n\tadd     rax, r11\n\tadd     rcx, 4\n\tjnc     .0\n.1:\n\tcmp     rcx, 2\n\tja      .5\n\tje      .4\n\tjp      .3\n.2:\tmov     r8, [rdi]\n\txor     r8, [rdx]\n\tpopcnt  r8, r8\n\tadd     rax, r8\n.3:\tmov     r8, [rdi+8]\n\txor     r8, [rdx+8]\n\tpopcnt  r8, r8\n\tadd     rax, r8\n.4:\tmov     r8, [rdi+16]\n\txor     r8, [rdx+16]\n\tpopcnt  r8, r8\n\tadd     rax, r8\n.5: END_PROC rdi\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/ior_n.asm",
    "content": "; PROLOGUE(mpn_ior_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_ior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rdi        rsi        rdx        rcx\n;                    rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_ior_n\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    jz      .2\n\n    xalign  8\n.1: mov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    or      r10, [r8]\n    or      r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     T3, [rdx-16]\n    mov     T4, [rdx-8]\n    or      T3, [r8-16]\n    or      T4, [r8-8]\n    mov     [rcx-16], T3\n    dec     r9\n    mov     [rcx-8], T4\n    jnz     .1\n.2: inc     rax\n    dec     rax\n    jz      .3\n    mov     r10, [rdx]\n    or      r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+8]\n    or      r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+16]\n    or      r10, [r8+16]\n    mov     [rcx+16], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/iorn_n.asm",
    "content": "; PROLOGUE(mpn_iorn_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_iorn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_iorn_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    or      r10, [rdx+r9*8+24]\n    or      r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    or      T3, [rdx+r9*8+8]\n    or      T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/karaadd.asm",
    "content": "; PROLOGUE(mpn_karaadd)\n;  mpn_karaadd  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karaadd(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n    BITS 64\n    TEXT\n\n;   requires n >= 8  \n        FRAME_PROC mpn_karaadd, 1, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n;rp is rdi  \n;tp is rsi  \n;n is rdx and put it on the stack  \n        shr     rdx, 1\n;n2 is rdx  \n        lea     rcx, [rdx+rdx*1]\n; 2*n2 is rcx  \n; L is rdi  \n; H is rbp  \n; tp is rsi  \n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n; rax rbx are the carrys  \n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+rcx*8]\n        adc     r9, [rsi+rcx*8+8]\n        adc     r10, [rsi+rcx*8+16]\n        adc     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        adc     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .6\n        jz      .4\n        jp      .3\n.2:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi]\n        adc     r9, [rsi+8]\n        adc     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n.3:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+8]\n        adc     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n.4:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        adc     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        inc     rdx\n        mov     [rbp+rcx*8], r12\n.5:     mov     rcx, 3\n.6:     \n        mov     r8, [rsp]\n        bt      r8, 0\n        jnc     .8\n        xor     r10, r10\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        add     r8, [rsi+rdx*8]\n        adc     r9, [rsi+rdx*8+8]\n        rcl     r10, 1\n        add     [rbp+24], r8\n        adc     [rbp+32], r9\n        adc     [rbp+40], r10\n.7:     adc     qword[rbp+rcx*8+24], 0\n        inc     rcx\n        jc      .7\n        mov     rcx, 3\n.8:     and     rax, 3\n        popcnt  r8, rax\n        bt      rbx, 2\n        adc     r8, 0\n        adc     [rdi+rdx*8], r8\n.9:     adc     qword[rdi+rdx*8+8], 0\n        inc     rdx\n        jc      .9\n        and     rbx, 7\n        popcnt  r8, rbx\n        add     [rbp+24], r8\n.10:    adc     qword[rbp+rcx*8+8], 0\n        inc     rcx\n        jc      .10\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/karasub.asm",
    "content": "; PROLOGUE(mpn_karasub)\n;  mpn_karasub  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Copyright 2012 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karasub(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n;\n;  Karasuba Multiplication - split x and y into two equal length halves so\n;  that x = xh.B + xl and y = yh.B + yl. Then their product is:\n;\n;  x.y = xh.yh.B^2 + (xh.yl + xl.yh).B + xl.yl\n;      = xh.yh.B^2 + (xh.yh + xl.yl - {xh - xl}.{yh - yl}).B + xl.yl\n;\n; If the length of the elements is m (about n / 2), the output length is 4 * m \n; as illustrated below.  The middle two blocks involve three additions and one \n; subtraction: \n; \n;       -------------------- rp\n;       |                  |-->\n;       |   A:xl.yl[lo]    |   |\n;       |                  |   |      (xh - xl).(yh - yl)\n;       --------------------   |      -------------------- tp\n;  <--  |                  |<--<  <-- |                  |\n; |     |   B:xl.yl[hi]    |   |      |     E:[lo]       |\n; |     |                  |   |      |                  |\n; |     --------------------   |      --------------------\n; >-->  |                  |-->   <-- |                  |\n; |\\___ |   C:xh.yh[lo]    | ____/    |     F:[hi]       |\n; |     |                  |          |                  |\n; |     --------------------          --------------------\n;  <--  |                  |   \n;       |   D:xh.yh[hi]    |\n;       |                  |\n;       --------------------\n;\n; To avoid overwriting B before it is used, we need to do two operations\n; in parallel:\n;\n; (1)   B = B + C + A - E = (B + C) + A - E\n; (2)   C = C + B + D - F = (B + C) + D - F\n;\n; The final carry from (1) has to be propagated into C and D, and the final\n; carry from (2) has to be propagated into D. When the number of input limbs\n; is odd, some extra operations have to be undertaken. \n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n%macro add_one 1\n    inc     %1\n%endmacro\n\n    BITS 64\n    TEXT\n        \n;       requires n >= 8  \n        FRAME_PROC mpn_karasub, 2, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n        mov     [rsp+8], rdi\n\n;       rp is rdi, tp is rsi, L is rdi, H is rbp, tp is rsi\n;       carries/borrows in rax, rbx\n   \n        shr     rdx, 1\n        lea     rcx, [rdx+rdx*1]\n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+rcx*8]\n        sbb     r9, [rsi+rcx*8+8]\n        sbb     r10, [rsi+rcx*8+16]\n        sbb     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        sbb     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .5\n        jz      .4\n        jp      .3\n\n.2:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi]\n        sbb     r9, [rsi+8]\n        sbb     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n\n.3:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+8]\n        sbb     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n\n.4:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        sbb     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        add_one rdx\n        mov     [rbp+rcx*8], r12\n\n;       move low half rbx carry into rax\n.5:     rcr     rax, 3\n        bt      rbx, 2\n        rcl     rax, 3\n        mov     r8, [rsp]\n        mov     rcx, rsi\n        mov     rsi,[rsp+8]\n        lea     r9, [r8+r8]\n        lea     rsi, [rsi+r9*8]\n        lea     r11, [rbp+24]\n        sub     r11, rsi\n        sar     r11, 3\n        bt      r8, 0\n        jnc     .9\n\n;       if odd the do next two  \n        add     r11, 2\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        rcr     rbx, 2\n        adc     r8,0\n        adc     r9, 0\n        rcl     rbx, 1\n        sbb     r8, [rcx+rdx*8]\n        sbb     r9, [rcx+rdx*8+8]\n        rcr     rbx, 2\n        adc     [rbp+24], r8\n        adc     [rbp+32], r9\n        rcl     rbx, 3\n\n; Now add in any accummulated carries and/or borrows\n;\n; NOTE: We can't propagate individual borrows or carries from the second\n; and third quarter blocks into the fourth quater block by simply waiting\n; for carry (or borrow) propagation to end.  This is because a carry into\n; the fourth quarter block when it contains only maximum integers or a \n; borrow when it contains all zero integers will incorrectly propagate\n; beyond the end of the top quarter block.\n\n.9:     lea     rdx, [rdi+rdx*8]\n        sub     rdx, rsi\n        sar     rdx, 3\n\n; carries/borrrow from second to third quarter quarter block\n;   rax{2} is the carry in (B + C)\n;   rax{1} is the carry in (B + C) + A\n;   rax{0} is the borrow in (B + C + A) - E\n\n        mov     rcx, rdx\n        bt      rax, 0\n.10:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .11\n        jc      .10\n\n.11     mov     rcx, rdx\n        bt      rax, 1\n.12:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .13\n        jc      .12\n\n.13     mov     rcx, rdx\n        bt      rax, 2\n.14:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .15\n        jc      .14\n\n; carries/borrrow from third to fourth quarter quarter block\n;   rbx{2} is the carry in (B + C)\n;   rbx{1} is the carry in (B + C) + D\n;   rbx{0} is the borrow in (B + C + D) - F\n\n.15:    mov     rcx, r11\n        bt      rbx, 0\n.16:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .17\n        jc      .16\n\n.17:    mov     rcx, r11\n        bt      rbx, 1\n.18:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .19\n        jc      .18\n\n.19:    mov     rcx, r11\n        bt      rbx, 2\n.20:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .21\n        jc      .20\n.21:\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/lshift.asm",
    "content": "; PROLOGUE(mpn_lshift)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_lshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi     rsi        rdx      rcx\n;  rax                     rcx     rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_lshift\n    mov     r9d, r9d\n    mov     eax, 64\n    sub     rax, r9\n    movq    mm0, r9\n    sub     r8, 4\n    movq    mm1, rax\n    movq    mm5, [rdx+r8*8+24]\n    movq    mm3, mm5\n    psrlq   mm5, mm1\n    movq    rax, mm5\n    psllq   mm3, mm0\n    jbe     .2\n\n    xalign  16\n.1:\tmovq    mm2, [rdx+r8*8+16]\n    movq    mm4, mm2\n    psrlq   mm2, mm1\n    por     mm3, mm2\n    movq    [rcx+r8*8+24], mm3\n    psllq   mm4, mm0\n    movq    mm5, [rdx+r8*8+8]\n    movq    mm3, mm5\n    psrlq   mm5, mm1\n    por     mm4, mm5\n    movq    [rcx+r8*8+16], mm4\n    psllq   mm3, mm0\n    movq    mm2, [rdx+r8*8]\n    movq    mm4, mm2\n    psrlq   mm2, mm1\n    por     mm3, mm2\n    movq    [rcx+r8*8+8], mm3\n    psllq   mm4, mm0\n    movq    mm5, [rdx+r8*8-8]\n    movq    mm3, mm5\n    psrlq   mm5, mm1\n    por     mm4, mm5\n    movq    [rcx+r8*8], mm4\n    psllq   mm3, mm0\n    sub     r8, 4\n    ja      .1\n\n; r8 is 0,-1,-2,-3 here , so we have 3+r8 limbs to do\n.2: cmp     r8, -1\n    jl      .3\n    movq    mm2, [rdx+r8*8+16]\n    movq    mm4, mm2\n    psrlq   mm2, mm1\n    por     mm3, mm2\n    movq    [rcx+r8*8+24], mm3\n    psllq   mm4, mm0\n    movq    mm5, [rdx+r8*8+8]\n    movq    mm3, mm5\n    psrlq   mm5, mm1\n    por     mm4, mm5\n    movq    [rcx+r8*8+16], mm4\n    psllq   mm3, mm0\n    sub     r8, 2\n.3: test    r8, 1\n    jnz     .4\n    movq    mm2, [rdx+r8*8+16]\n    movq    mm4, mm2\n    psrlq   mm2, mm1\n    por     mm3, mm2\n    movq    [rcx+r8*8+24], mm3\n    psllq   mm4, mm0\n    movq    [rcx+r8*8+16], mm4\n    emms\n    ret\n.4: movq    [rcx+r8*8+24], mm3\n    emms\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/lshift1.asm",
    "content": "; PROLOGUE(mpn_lshift1)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_lshift1(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_lshift1\n    mov     rax, r8\n\tand     r8, 7\n\tinc     r8\n\tmov     [rsp+0x18], r8\n\tshr     rax, 3\n\tcmp     rax, 0\n\tjz      .2\n\t\n\txalign  16\n.1:\tmov     r8, [rdx]\n\tmov     r9, [rdx+8]\n\tmov     r10, [rdx+16]\n\tmov     r11, [rdx+24]\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tmov     [rcx], r8\n\tmov     [rcx+8], r9\n\tmov     [rcx+16], r10\n\tmov     [rcx+24], r11\n\tmov     r8, [rdx+32]\n\tmov     r9, [rdx+40]\n\tmov     r10, [rdx+48]\n\tmov     r11, [rdx+56]\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tmov     [rcx+32], r8\n\tmov     [rcx+40], r9\n\tmov     [rcx+48], r10\n\tmov     [rcx+56], r11\n\tlea     rcx, [rcx+64]\n\tdec     rax\n\tlea     rdx, [rdx+64]\n\tjnz     .1\n.2:\tmov     rax, [rsp+0x18]\n\tdec     rax\n\tjz      .3\n;\tCould still have cache-bank conflicts in this tail part\n\tmov     r8, [rdx]\n\tadc     r8, r8\n\tmov     [rcx], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+8]\n\tadc     r8, r8\n\tmov     [rcx+8], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+16]\n\tadc     r8, r8\n\tmov     [rcx+16], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+24]\n\tadc     r8, r8\n\tmov     [rcx+24], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+32]\n\tadc     r8, r8\n\tmov     [rcx+32], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+40]\n\tadc     r8, r8\n\tmov     [rcx+40], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+48]\n\tadc     r8, r8\n\tmov     [rcx+48], r8\n.3:\tsbb     rax, rax\n\tneg     rax\n\tret\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/lshift2.asm",
    "content": "; PROLOGUE(mpn_lshift2)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_lshift1(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_lshift2, 0, reg_save_list\n    mov     rax, r8\n\tlea     rsi, [rdx+rax*8-24]\n\tlea     rdi, [rcx+rax*8-24]\n\tmov     ecx, 3\n\tsub     rcx, rax\n\txor     eax, eax\n\txor     edx, edx\n\tcmp     rcx, 0\n\tjge     .2\n\t\n\txalign  16\n.1:\tmov     r8, [rsi+rcx*8]\n\tmov     r9, [rsi+rcx*8+8]\n\tmov     r10, [rsi+rcx*8+16]\n\tmov     r11, [rsi+rcx*8+24]\n\tadd     rax, rax\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tmov     [rdi+rcx*8+24], r11\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tadd     rcx, 4\n\tmov     [rdi+rcx*8-24], r9\n\tmov     [rdi+rcx*8-16], r10\n\tjnc     .1\n.2:\tcmp     rcx, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r8, [rsi+rcx*8]\n\tmov     r9, [rsi+rcx*8+8]\n\tmov     r10, [rsi+rcx*8+16]\n\tadd     rax, rax\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tmov     [rdi+rcx*8+8], r9\n\tmov     [rdi+rcx*8+16], r10\n\tlea     rax, [rdx+rax*2]\n\tneg     rax\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tmov     r8, [rsi+rcx*8]\n\tmov     r9, [rsi+rcx*8+8]\n\tadd     rax, rax\n\tadc     r8, r8\n\tadc     r9, r9\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\tadc     r8, r8\n\tadc     r9, r9\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tmov     [rdi+rcx*8+8], r9\n\tlea     rax, [rdx+rax*2]\n\tneg     rax\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     r8, [rsi+rcx*8]\n\tadd     rax, rax\n\tadc     r8, r8\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\tadc     r8, r8\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n.6:\tlea     rax, [rdx+rax*2]\n\tneg     rax\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/lshiftc.asm",
    "content": "; PROLOGUE(mpn_lshiftc)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void lshiftc(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;                  rdi     rsi        rdx      rcx\n;                  rcx     rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_lshiftc\n\tmov     r9d, r9d\n\tmovq    mm0, r9\n\tmov     rax, 64\n\tsub     rax, r9\n\tpcmpeqb mm6, mm6\n\tmovq    mm1, rax\n\tlea     rdx, [rdx+8]\n\tlea     rcx, [rcx+8]\n\tsub     r8, 5\n\tmovq    mm5, [rdx+r8*8+24]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tmovq    rax, mm5\n\tpsllq   mm3, mm0\n\tjc      .2\n\t\n\txalign  16\n.1: movq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    mm2, [rdx+r8*8]\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tmovq    mm4, mm2\n\tmovq    mm5, [rdx+r8*8-8]\n\tsub     r8, 4\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+40], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+32], mm4\n\tpsllq   mm3, mm0\n\tjnc     .1\n.2: cmp     r8, -2\n\tjz      .4\n\tjp      .5\n\tjs      .6\n.3:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    mm2, [rdx+r8*8]\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+8], mm3\n\tpsllq   mm4, mm0\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8], mm4\n\temms\n\tret\n\n\txalign  16\n.4:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+8], mm3\n\temms\n\tret\n\n\txalign  16\n.5:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\temms\n\tret\n\n\txalign  16\n.6:\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\temms\n\tret\n\t\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/mod_1_1.asm",
    "content": "; PROLOGUE(mpn_mod_1_1)\n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2011 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_1(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Core2\n    BITS 64\n\n%define reg_save_list rsi, rdi, r13\n\n        FRAME_PROC mpn_mod_1_1, 0, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n\n        mov     r13, [rsi+rdx*8-8]\n        mov     rax, [rsi+rdx*8-16]\n        mov     r8, [r9]\n        mov     r9, [r9+8]\n        mov     rcx, rdx\n        sub     rcx, 2\n        \n        align   16  \n.1:     mov     r11d, 0\n        mov     r10, [rsi+rcx*8-8]\n        mul     r8\n        add     r10, rax\n        lea     rax, [r13]\n        adc     r11, rdx\n        lea     r13, [r11]\n        lea     r9, [r9]\n        lea     r8, [r8]\n        mul     r9\n        add     rax, r10\n        adc     r13, rdx\n        sub     rcx, 1\n        jnz     .1\n        mov     [rdi], rax\n        mov     rax, r8\n        mul     r13\n        add     [rdi], rax\n        adc     rdx, 0\n        mov     [rdi+8], rdx\n    \tEND_PROC reg_save_list\n\t\n\t    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/mul_1.asm",
    "content": "; PROLOGUE(mpn_mul_1)\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_mul_1c(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx         r8\n;  rax                     rcx     rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n    LEAF_PROC mpn_mul_1c\n    mov     r11, [rsp+0x28]\n    jmp     start\n    LEAF_PROC mpn_mul_1\n    xor     r11, r11\n\nstart:\n    mov     rax, r8\n\tlea     r10, [rdx+rax*8-8]\n\tlea     rcx, [rcx+rax*8-8]\n\tmov     r8d, 1\n\tsub     r8, rax\n\tmov     rax, [r10+r8*8]\n\tjz      .1\n\n\txalign  16\n.0:\tmul     r9\n\tadd     rax, r11\n\tmov     [rcx+r8*8], rax\n\tmov     rax, [r10+r8*8+8]\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tadd     r8, 1\n\tjnc     .0\n.1: mul     r9\n\tadd     rax, r11\n\tmov     [rcx+r8*8], rax\n\tmov     eax, 0\n\tadc     rax, rdx\n\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/mul_basecase.asm",
    "content": "; PROLOGUE(mpn_mul_basecase)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40] \n\n%include \"yasm_mac.inc\"\n\n%macro addmul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi+rbx*8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi+rbx*8+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+rbx*8+24], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    add     [rdi], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+8], r10\n    adc     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-16], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi-8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+8], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi+rbx*8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+24], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+24], r12\n    add     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    mov     [rdi], r12\n    add     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-16], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi-8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+8], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n    xalign  16\n%%1:\n    mov     r10, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    mul     r8\n    mov     [rdi+rbx*8], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r10\n    db      0x26\n    add     r11, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+rbx*8+24]\n    mul     r8\n    mov     [rdi+rbx*8+16], r11\n    db      0x26\n    add     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     rbx, 4\n    mov     rax, [rsi+rbx*8]\n    jnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n    mov     rax, [rsi+8]\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mov     r12d, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r11\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n    mov     rax, [rsi+16]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     [rdi+24], r10\n    mov     [rdi+32], r11\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n    mov     [rdi+24], r9\n    mov     [rdi+32], r10\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n    jz      %%2\n    xalign  16\n%%1:addmul2pro%1\n    addmul2lp\n    addmul2epi%1\n    jnz     %%1\n%%2:\n%endmacro\n\n%macro oldmulnext0 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    mov     [rdi+r11*8+24], rbx\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+32], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+r11*8+40], rdx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+24]\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+24], r12\n    mov     [rdi+r11*8+32], rdx\n    inc     r8\n    lea     rdi, [rdi+8]\n    mov     r11, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     [rdi+r11*8+16], r10\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n    mov     [rdi+r11*8+8], r9\n    mov     [rdi+r11*8+16], r10\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    db      0x26\n    mov     r9, rdx\n    lea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    mul     r13\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+24]\n    mov     r12d, 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+32]\n    mul     r13\n    add     [rdi+24], rbx\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+32], r12\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n    mov     r13, [rcx+r8*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+24]\n    mul     r13\n    lea     rdi, [rdi+8]\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     r12d, 0\n    mov     rax, [rsi+32]\n    adc     r12, rdx\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+24], r12\n    adc     rdx, 0\n    mov     [rdi+32], rdx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n    mov     r13, [rcx+r8*8]\n    lea     rdi, [rdi+8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     ebx, 0\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    adc     rbx, rdx\n    mov     rax, [rsi+r14*8]\n    add     [rdi+r11*8+16], r10\n    adc     rbx, 0\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    db      0x26\n    lea     rdi, [rdi+8]\n    db      0x26\n    mov     r9, rdx\n    mov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     rdx, 0\n    add     [rdi+32], r9\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n    oldmulnext%1\n    jz      %%2\n    xalign  16\n%%1:oldaddmulpro%1\n    oldaddmulnext%1\n    jnz     %%1\n%%2:\n%endmacro\n\n    CPU  Core2\n    BITS 64\n\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40] \n\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14\n\n    LEAF_PROC mpn_mul_basecase\n    ; the current mul does not handle case one\n    cmp     r8d, 4\n    jg      fiveormore\n    cmp     r8d, 1\n    je      one\n\n    WIN64_GCC_PROC mpn_bobcat_mbc1, 5, frame\n\n    mov     r14, 5\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-40]\n    lea     rcx, [rcx+r8*8]\n    neg     r8\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rax, [rsi+r14*8]\n    mov     r13, [rcx+r8*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    mov     r10d, 0\n    mul     r13\n    mov     [rdi+r11*8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     r11, 2\n    ja      .4\n    jz      .3\n    jp      .2\n.1:\toldmpn_muladdmul_1_int 0\n    jmp     .5\n.2:\toldmpn_muladdmul_1_int 1\n    jmp     .5\n.3:\toldmpn_muladdmul_1_int 2\n    jmp     .5\n.4:\toldmpn_muladdmul_1_int 3\n.5:\tWIN64_GCC_END frame\n\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\n%undef  reg_save_list\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14, r15\n\n    xalign  16\nfiveormore:\n    WIN64_GCC_PROC mpn_bobcat_mbc2, 5, frame\n    movsxd  rdx, edx\n    movsxd  r8, r8d\n\n    mov     r14, 4\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-32]\n    lea     rsi, [rsi+rdx*8-32]\n    mov     r13, rcx\n    mov     r15, r8\n    lea     r13, [r13+r15*8]\n    neg     r15\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    bt      r15, 0\n    jnc     .12\n.6:\tinc     rbx\n    mov     r8, [r13+r15*8]\n    mul     r8\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     rbx, 0\n    jge     .7\n    mul1lp\n.7:\tmov     r10d, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     rbx, 2\n    ja      .11\n    jz      .10\n    jp      .9\n.8:\tmulnext0\n    jmp     .20\n.9:\tmulnext1\n    jmp     .14\n.10:mulnext2\n    jmp     .16\n.11:mulnext3\n    jmp     .18\n     ; as all the mul2pro? are the same\n.12:mul2pro0\n    mul2lp\n    cmp     rbx, 2\n    ja      .19\n    jz      .17\n    jp      .15\n.13:mul2epi3\n.14:mpn_addmul_2_int 3\n    WIN64_GCC_EXIT frame\n\n.15:mul2epi2\n.16:mpn_addmul_2_int 2\n    WIN64_GCC_EXIT frame\n\n.17:mul2epi1\n.18:mpn_addmul_2_int 1\n    WIN64_GCC_EXIT frame\n\n.19:mul2epi0\n.20:mpn_addmul_2_int 0\n\n    xalign  16\n.21:WIN64_GCC_END frame\n\n    xalign  16\none:mov     rax, [rdx]\n    mul     qword [r9]\n    mov     [rcx], rax\n    mov     [rcx+8], rdx\n    ret\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/nand_n.asm",
    "content": "; PROLOGUE(mpn_nand_n)\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_nand_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n   CPU  Athlon64\n    BITS 64\n\n %define reg_save_list rbx, rsi, rdi\n\n    FRAME_PROC mpn_nand_n, 0, reg_save_list\n\tlea     rdi, [rcx+r9*8]\n\tlea     rsi, [rdx+r9*8]\n\tlea     rdx, [r8+r9*8]\n    mov     rcx, r9\n\tneg     rcx\n\tadd     rcx, 3\n\tjc      .2\n\tmov     r8, [rdx+rcx*8-24]\n\tmov     r9, [rdx+rcx*8-16]\n\tmov     r8, [rdx+rcx*8-24]\n\tmov     r9, [rdx+rcx*8-16]\n\tadd     rcx, 4\n\tmov     r10, [rdx+rcx*8-40]\n\tmov     r11, [rdx+rcx*8-32]\n\tjc      .1\n\txalign  16\n.0:\n\tand     r8, [rsi+rcx*8-56]\n\tnot     r8\n\tand     r9, [rsi+rcx*8-48]\n\tand     r10, [rsi+rcx*8-40]\n\tand     r11, [rsi+rcx*8-32]\n\tmov     [rdi+rcx*8-56], r8\n\tnot     r9\n\tnot     r10\n\tmov     [rdi+rcx*8-48], r9\n\tnot     r11\n\tmov     r8, [rdx+rcx*8-24]\n\tmov     r9, [rdx+rcx*8-16]\n\tmov     [rdi+rcx*8-40], r10\n\tmov     [rdi+rcx*8-32], r11\n\tadd     rcx, 4\n\tmov     r10, [rdx+rcx*8-40]\n\tmov     r11, [rdx+rcx*8-32]\n\tjnc     .0\n.1:\n\tand     r8, [rsi+rcx*8-56]\n\tnot     r8\n\tand     r9, [rsi+rcx*8-48]\n\tand     r10, [rsi+rcx*8-40]\n\tand     r11, [rsi+rcx*8-32]\n\tmov     [rdi+rcx*8-56], r8\n\tnot     r9\n\tnot     r10\n\tmov     [rdi+rcx*8-48], r9\n\tnot     r11\n\tmov     [rdi+rcx*8-40], r10\n\tmov     [rdi+rcx*8-32], r11\n.2:\n\tcmp     rcx, 2\n\tjg      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r8, [rdx-24]\n\tand     r8, [rsi-24]\n\tnot     r8\n\tmov     [rdi-24], r8\n.4:\tmov     r8, [rdx-16]\n\tand     r8, [rsi-16]\n\tnot     r8\n\tmov     [rdi-16], r8\n.5:\tmov     r8, [rdx-8]\n\tand     r8, [rsi-8]\n\tnot     r8\n\tmov     [rdi-8], r8\n.6:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/nior_n.asm",
    "content": "; PROLOGUE(mpn_nior_n)\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_nior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n %define reg_save_list rbx, rsi, rdi\n\n    FRAME_PROC mpn_nior_n, 0, reg_save_list\n\tlea     rdi, [rcx+r9*8]\n\tlea     rsi, [rdx+r9*8]\n\tlea     rdx, [r8+r9*8]\n    mov     rcx, r9\n\tneg     rcx\n\tadd     rcx, 3\n\tjc      .2\n\tmov     r8, [rdx+rcx*8-24]\n\tmov     r9, [rdx+rcx*8-16]\n\tmov     r8, [rdx+rcx*8-24]\n\tmov     r9, [rdx+rcx*8-16]\n\tadd     rcx, 4\n\tmov     r10, [rdx+rcx*8-40]\n\tmov     r11, [rdx+rcx*8-32]\n\tjc      .1\n\txalign  16\n.0:\n\tor      r8, [rsi+rcx*8-56]\n\tnot     r8\n\tor      r9, [rsi+rcx*8-48]\n\tor      r10, [rsi+rcx*8-40]\n\tor      r11, [rsi+rcx*8-32]\n\tmov     [rdi+rcx*8-56], r8\n\tnot     r9\n\tnot     r10\n\tmov     [rdi+rcx*8-48], r9\n\tnot     r11\n\tmov     r8, [rdx+rcx*8-24]\n\tmov     r9, [rdx+rcx*8-16]\n\tmov     [rdi+rcx*8-40], r10\n\tmov     [rdi+rcx*8-32], r11\n\tadd     rcx, 4\n\tmov     r10, [rdx+rcx*8-40]\n\tmov     r11, [rdx+rcx*8-32]\n\tjnc     .0\n.1:\n\tor      r8, [rsi+rcx*8-56]\n\tnot     r8\n\tor      r9, [rsi+rcx*8-48]\n\tor      r10, [rsi+rcx*8-40]\n\tor      r11, [rsi+rcx*8-32]\n\tmov     [rdi+rcx*8-56], r8\n\tnot     r9\n\tnot     r10\n\tmov     [rdi+rcx*8-48], r9\n\tnot     r11\n\tmov     [rdi+rcx*8-40], r10\n\tmov     [rdi+rcx*8-32], r11\n.2:\n\tcmp     rcx, 2\n\tjg      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r8, [rdx-24]\n\tor      r8, [rsi-24]\n\tnot     r8\n\tmov     [rdi-24], r8\n.4:\tmov     r8, [rdx-16]\n\tor      r8, [rsi-16]\n\tnot     r8\n\tmov     [rdi-16], r8\n.5:\tmov     r8, [rdx-8]\n\tor      r8, [rsi-8]\n\tnot     r8\n\tmov     [rdi-8], r8\n.6:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/not.asm",
    "content": "; PROLOGUE(mpn_not)\n\n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2011 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_not(mp_ptr, mp_size_t)\n;                  rdi        rsi\n;                  rcx        rdx\n\n%include \"yasm_mac.inc\"\n\n        CPU  Athlon64\n        BITS 64\n        LEAF_PROC mpn_not\n        mov     eax, 1\n        lea     rcx, [rcx+rdx*8-8]\n        sub     rax, rdx\n        jnc     .2\n\n        align   16\n.1:     not     qword[rcx+rax*8]\n        not     qword[rcx+rax*8+8]\n        add     rax, 2\n        jnc     .1\n.2:     jnz     .4\n.3:     not     qword[rcx+rax*8]\n.4:     ret     \n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/popcount.asm",
    "content": "; PROLOGUE(mpn_popcount)\n\n;  AMD64 mpn_popcount\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmpn_limb_t mpn_popcount(mp_ptr,mp_size_t)\n;\trax                        rdi,      rsi\n;\teax                        rcx,      rdx\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_popcount\n\tmov     r8, 5\n\tlea     rcx, [rcx+rdx*8-40]\n\txor     eax, eax\n\tsub     r8, rdx\n\tjnc     .1\n\txalign  16\n.0:\n\tpopcnt  r9, [rcx+r8*8]\n\tpopcnt  r10, [rcx+r8*8+8]\n\tpopcnt  r11, [rcx+r8*8+16]\n\tpopcnt  rdx, [rcx+r8*8+24]\n\tadd     rax, r9\n\tadd     rax, rdx\n\tadd     rax, r10\n\tpopcnt  r9, [rcx+r8*8+32]\n\tpopcnt  r10, [rcx+r8*8+40]\n\tadd     rax, r9\n\tadd     rax, r11\n\tadd     rax, r10\n\tadd     r8, 6\n\tjnc     .0\n.1:\n\tlea     rdx, [rel .2]\n\tlea     r8, [r8+r8*8]\n\tadd     rdx, r8\n\tjmp     rdx\n.2:\n\tnop\n\tpopcnt  r9, [rcx]\n\tadd     rax, r9\n.3:\n\tpopcnt  r10, [rcx+8]\n\tadd     rax, r10\n.4:\tpopcnt  r11, [rcx+16]\n\tadd     rax, r11\n.5:\tpopcnt  rdx, [rcx+24]\n\tadd     rax, rdx\n.6:\tpopcnt  r9, [rcx+32]\n\tadd     rax, r9\n.7:\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/redc_1.asm",
    "content": "; PROLOGUE(mpn_redc_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_redc_1(mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                rdi     rsi        rdx        rcx        r8\n;  rax                rcx     rdx         r8         r9  [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    CPU  Athlon64\n    BITS 64\n\n%macro mpn_add 0\n\n    mov     rax, rcx\n    and     rax, 3\n    shr     rcx, 2\n    cmp     rcx, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    add     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n    jmp     %%2\n\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    adc     r11, [rdx]\n    adc     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rdi], r11\n    mov     [rdi+8], r8\n    lea     rdi, [rdi+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    adc     r9, [rdx-16]\n    adc     r10, [rdx-8]\n    mov     [rdi-16], r9\n    dec     rcx\n    mov     [rdi-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    adc     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n%%2:\n\n%endmacro\n\n%macro mpn_sub 0\n\n    mov     rax, rbp\n    and     rax, 3\n    shr     rbp, 2\n    cmp     rbp, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    sub     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n    jmp     %%2\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    sbb     r11, [rdx]\n    sbb     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rbx], r11\n    mov     [rbx+8], r8\n    lea     rbx, [rbx+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    sbb     r9, [rdx-16]\n    sbb     r10, [rdx-8]\n    mov     [rbx-16], r9\n    dec     rbp\n    mov     [rbx-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    sbb     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n%%2:\n\n%endmacro\n\n%macro addmulloop 1\n\n    xalign  16\n%%1:mov     r10, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    db      0x26\n    adc     rbx, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    db      0x26\n    adc     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     r11, 4\n    mov     rax, [rsi+r11*8+8]\n    jnc     %%1\n\n%endmacro\n\n%macro addmulpropro0 0\n\n    imul    r13, rcx\n    lea     r8, [r8-8]\n\n%endmacro\n\n%macro addmulpro0 0\n\n    mov     r11, r14\n    lea     r8, [r8+8]\n    mov     rax, [rsi+r14*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext0 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mov     r9d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, rax\n    adc     r9, rdx\n    imul    r13, rcx\n    add     [r8+r11*8+32], r12\n    adc     r9, 0\n    dec     r15\n    mov     [r8+r14*8], r9\n\n%endmacro\n\n%macro addmulpropro1 0\n\n%endmacro\n\n%macro addmulpro1 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext1 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, 0\n    dec     r15\n    mov     [r8+r14*8], r12\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro2 0\n\n%endmacro\n\n%macro addmulpro2 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext2 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     r13, [r8+r14*8+8]\n    add     [r8+r11*8+16], r10\n    adc     rbx, 0\n    mov     [r8+r14*8], rbx\n    dec     r15\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro3 0\n\n%endmacro\n\n%macro addmulpro3 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext3 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    add     [r8+r11*8+8], r9\n    adc     r10, 0\n    mov     r13, [r8+r14*8+8]\n    mov     [r8+r14*8], r10\n    lea     r8, [r8+8]\n    dec     r15\n\n%endmacro\n\n%macro mpn_addmul_1_int 1\n\n    addmulpropro%1\n    xalign  16\n%%1:addmulpro%1\n    jge     %%2\n    addmulloop %1\n%%2:addmulnext%1\n    jnz     %%1\n\n%endmacro\n\n    LEAF_PROC mpn_redc_1\n    cmp     r9, 1\n    je      one\n    FRAME_PROC ?mpn_bobcat_redc_1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, r8\n    mov     r8, rdx\n    mov     rdx, r9\n    mov     rcx, [rsp+stack_use+0x28]\n    mov     [rsp+stack_use+0x28], r8\n\n    mov     r14, 5\n    sub     r14, rdx\n\n    mov     [rsp+stack_use+0x10], rsi\n    mov     r8, [rsp+stack_use+0x28]\n\n    lea     r8, [r8+rdx*8-40]\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rbp, rdx\n    mov     r15, rdx\n    mov     rax, r14\n    and     rax, 3\n    mov     r13, [r8+r14*8]\n    je      .2\n    jp      .4\n    cmp     rax, 1\n    je      .3\n.1:\tmpn_addmul_1_int 2\n    jmp     .5\n\n    xalign  16\n.2:\tmpn_addmul_1_int 0\n    jmp     .5\n\n    xalign  16\n.3:\tmpn_addmul_1_int 1\n    jmp     .5\n\n    xalign  16\n.4:\tmpn_addmul_1_int 3\n\n    xalign  16\n.5:\tmov     rcx, rbp\n    mov     rdx, [rsp+stack_use+0x28]\n    lea     rsi, [rdx+rbp*8]\n    mov     rbx, rdi\n    mpn_add\n    mov     rdx, [rsp+stack_use+0x10]\n    jnc     .6\n    mov     rsi, rbx\n    mpn_sub\n.6:\tEND_PROC reg_save_list\n\n    xalign  16\none:mov     r9, rdx\n    mov     r11, [r8]\n    mov     r8, [rsp+0x28]\n\n    mov     r10, [r9]\n    imul    r8, r10\n    mov     rax, r8\n    mul     r11\n    add     rax, r10\n    adc     rdx, [r9+8]\n    cmovnc  r11, rax\n    sub     rdx, r11\n    mov     [rcx], rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/rsh1add_n.asm",
    "content": "; PROLOGUE(mpn_rsh1add_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_rsh1add_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rsh1add_n, 0, reg_save_list\n    mov     rax, r9\n\tlea     rdi, [rcx+rax*8-32]\n\tlea     rsi, [rdx+rax*8-32]\n\tlea     rdx, [r8+rax*8-32]\n\tmov     rcx, rax\n\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tadd     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     .2\n.1:\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\tadc     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     .1\n.2:\tcmp     r8, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3:\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tadc     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n.7:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/rsh1sub_n.asm",
    "content": "; PROLOGUE(mpn_rsh1sub_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  return bottom bit of difference\n;  subtraction treated as two compliment\n;\n;  mp_limb_t mpn_rsh1sub_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rsh1sub_n, 0, reg_save_list\n    mov     rax, r9\n\tlea     rdi, [rcx+rax*8-32]\n\tlea     rsi, [rdx+rax*8-32]\n\tlea     rdx, [r8+rax*8-32]\n\tmov     rcx, rax\n\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tsub     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     .2\n.1:\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\tsbb     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     .1\n.2:\tcmp     r8, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3:\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n.7:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/rsh_divrem_hensel_qr_1_1.asm",
    "content": "; PROLOGUE(mpn_rsh_divrem_hensel_qr_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rsh_divrem_hensel_qr_1_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t,  mp_int, mp_limb_t)\n;  rax                                        rdi     rsi        rdx        rcx       r8         r9\n;  rax                                        rcx     rdx         r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi\n\n\tFRAME_PROC mpn_rsh_divrem_hensel_qr_1_1, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n    mov     rcx, r9\n\tmov     rdx, r9\n\tmov     r9, 1\n\tsub     r9, rax\n    movsxd  r8, dword [rsp+stack_use+40]\n    mov     r10, qword [rsp+stack_use+48]\n    \n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, 64\n\tsub     rax, r8\n\tmovq    mm0, r8\n\tmovq    mm1, rax\n\tmov     rax, [rsi+r9*8-8]\n\tsub     rax, r10\n\tsbb     r8, r8\n\timul    rax, r11\n\tmovq    mm4, rax\n\tmovq    mm5, mm4\n\tpsrlq   mm4, mm0\n\tpsllq   mm5, mm1\n\tpsrlq   mm5, mm1\n\tmul     rcx\n\tcmp     r9, 0\n\tje      .3\n\tadd     r8, r8\n\t\n\txalign  16\n.1:\tmovq    mm2, mm4\n\tmov     rax, [rsi+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-8], mm2\n\tmul     rcx\n\tadd     r8, r8\n\tinc     r9\n\tjnz     .1\n.2:\tmovq    [rdi+r9*8-8], mm4\n\tmov     rax, 0\n\tadc     rax, rdx\n\temms\n\tEXIT_PROC reg_save_list\n\n.3:\tmovq    [rdi+r9*8-8], mm4\n\tadd     r8, r8\n\tmov     rax, 0\n\tadc     rax, rdx\n.4:\temms\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/rsh_divrem_hensel_qr_1_2.asm",
    "content": "; PROLOGUE(mpn_rsh_divrem_hensel_qr_1_2)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rsh_divrem_hensel_qr_1_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_uint, mp_limb_t)\n;  rax                                        rdi     rsi        rdx        rcx       r8         r9\n;  rax                                        rcx     rdx         r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n\tFRAME_PROC mpn_rsh_divrem_hensel_qr_1_2, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8-16]\n\tlea     rsi, [rdx+rax*8-16]\n    mov     rcx, r9\n    mov     rdx, r9\n\tmov     r9, 2\n\tsub     r9, rax    \n    mov     r8d, dword [rsp+stack_use+40]\n    mov     r10, qword [rsp+stack_use+48]\n\n\tmov     rax, 64\n\tsub     rax, r8\n\tmovq    mm0, r8\n\tmovq    mm1, rax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, r11\n\tmov     r12, r11\n\tmul     rcx\n\tneg     rdx\n\timul    r12, rdx\n\n; // for the first limb we can not store (as we have to shift) so we need to\n; // do first limb separately , we could do it as normal as an extention of\n; // the loop , but if we do it as a 1 limb inverse then we can start it\n; // eailer , ie interleave it with the calculation of the 2limb inverse\n\n\tmov     r13, r11\n\tmov     r14, r12\n\n\tmov     r11, [rsi+r9*8]\n\tsub     r11, r10\n\tsbb     r10, r10\n\n\timul    r11, r13\n\tmovq    mm2, r11\n\tpsrlq   mm2, mm0\n\tmov     rax, rcx\n\tmul     r11\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\tadd     r10, r10\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, r10\n\n; mov $0,%r10\n\tadd     r9, 2\n\tjc      .2\n\txalign  16\n.1:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n\t; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r12, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, 0\n\tadd     r9, 2\n\tjnc     .1\n.2:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tcmp     r9, 0\n\tjne     .4\n.3:\tmov     r11, [rsi+r9*8+8]\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r10, 0\n\tmov     rax, r11\n\timul    rax, r13\n\t; mov %rax,(%rdi,%r9,8)\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8], mm2\n\tmovq    [rdi+r9*8+8], mm4\n\n\tmul     rcx\n\tadd     r10, r10\n\tmov     rax, 0\n\tadc     rax, rdx\n\temms\n\tEXIT_PROC reg_save_list\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n.4:\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\tmovq    [rdi+r9*8], mm2\n\n\tmov     rax, rcx\n\tmul     rdx\n\tcmp     r8, rax\n\tmov     rax, 0\n\tadc     rax, rdx\n\tsub     rax, r10\n.5:\temms\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/rshift.asm",
    "content": "; PROLOGUE(mpn_rshift)\n\n;  Verdxon 1.1.4.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_rshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi     rsi        rdx      rcx\n;  rax                     rcx     rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_rshift\n    mov     r9d, r9d\n\tmovq    mm0, r9\n\tmov     rax, 64\n\tsub     rax, r9\n\tmovq    mm1, rax\n    mov     rax, r8\n\tmov     r8, 4\n\tlea     rdx, [rdx+rax*8-32]\n\tlea     rcx, [rcx+rax*8-32]\n\tsub     r8, rax\n\n\tmovq    mm5, [rdx+r8*8]\n\tmovq    mm3, mm5\n\tpsllq   mm5, mm1\n\tmovq    rax, mm5\n\tpsrlq   mm3, mm0\n\tjge     .2\n\n\txalign  16\n.1: movq    mm2, [rdx+r8*8+8]\n\tmovq    mm4, mm2\n\tpsllq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rcx+r8*8], mm3\n\tpsrlq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+16]\n\tmovq    mm3, mm5\n\tpsllq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rcx+r8*8+8], mm4\n\tpsrlq   mm3, mm0\n\tmovq    mm2, [rdx+r8*8+24]\n\tmovq    mm4, mm2\n\tpsllq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    mm5, [rdx+r8*8+32]\n\tmovq    [rcx+r8*8+16], mm3\n\tpsrlq   mm4, mm0\n\tmovq    mm3, mm5\n\tpsllq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rcx+r8*8+24], mm4\n\tpsrlq   mm3, mm0\n\tadd     r8, 4\n\tjnc     .1\n\n.2:\tcmp     r8, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n\n.3:\tmovq    mm2, [rdx+r8*8+8]\n\tmovq    mm4, mm2\n\tpsllq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rcx+r8*8], mm3\n\tpsrlq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+16]\n\tmovq    mm3, mm5\n\tpsllq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rcx+r8*8+8], mm4\n\tpsrlq   mm3, mm0\n\tmovq    mm2, [rdx+r8*8+24]\n\tmovq    mm4, mm2\n\tpsllq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rcx+r8*8+16], mm3\n\tpsrlq   mm4, mm0\n\tmovq    [rcx+r8*8+24], mm4\n\temms\n\tret\n\n\txalign  16\n.4: movq    mm2, [rdx+r8*8+8]\n\tmovq    mm4, mm2\n\tpsllq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rcx+r8*8], mm3\n\tpsrlq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+16]\n\tmovq    mm3, mm5\n\tpsllq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rcx+r8*8+8], mm4\n\tpsrlq   mm3, mm0\n\tmovq    [rcx+r8*8+16], mm3\n\temms\n\tret\n\n\txalign  16\n.5: movq    mm2, [rdx+r8*8+8]\n\tmovq    mm4, mm2\n\tpsllq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rcx+r8*8], mm3\n\tpsrlq   mm4, mm0\n\tmovq    [rcx+r8*8+8], mm4\n\temms\n\tret\n\n\txalign  16\n.6: movq    [rcx+r8*8], mm3\n\temms\n\tret\n\tend\n\t"
  },
  {
    "path": "mpn/x86_64w/bobcat/rshift2.asm",
    "content": "; PROLOGUE(mpn_rshift2)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_rshift2(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rshift2, 0, reg_save_list\n\tlea     rsi, [rdx+24]\n\tlea     rdi, [rcx+24]\n    mov     rcx, r8\n\txor     eax, eax\n\txor     edx, edx\n\tsub     rcx, 4\n\tjc      .2\n\t\n\txalign  16\n.1:\tmov     r8, [rsi+rcx*8]\n\tmov     r9, [rsi+rcx*8-8]\n\tmov     r10, [rsi+rcx*8-16]\n\tmov     r11, [rsi+rcx*8-24]\n\tadd     rax, rax\n\trcr     r8, 1\n\trcr     r9, 1\n\trcr     r10, 1\n\trcr     r11, 1\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\trcr     r8, 1\n\trcr     r9, 1\n\trcr     r10, 1\n\trcr     r11, 1\n\tmov     [rdi+rcx*8-24], r11\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tsub     rcx, 4\n\tmov     [rdi+rcx*8+24], r9\n\tmov     [rdi+rcx*8+16], r10\n\tjnc     .1\n.2:\tcmp     rcx, -2\n\tja      .4\n\tje      .5\n\tjp      .6\n.3:\tlea     rax, [rax+rdx*2]\n\tneg     rax\n\tshl     rax, 62\n\tEXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tmov     r8, [rsi+rcx*8]\n\tmov     r9, [rsi+rcx*8-8]\n\tmov     r10, [rsi+rcx*8-16]\n\tadd     rax, rax\n\trcr     r8, 1\n\trcr     r9, 1\n\trcr     r10, 1\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\trcr     r8, 1\n\trcr     r9, 1\n\trcr     r10, 1\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tmov     [rdi+rcx*8-8], r9\n\tmov     [rdi+rcx*8-16], r10\n\tlea     rax, [rax+rdx*2]\n\tneg     rax\n\tshl     rax, 62\n\tEXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     r8, [rsi+rcx*8]\n\tmov     r9, [rsi+rcx*8-8]\n\tadd     rax, rax\n\trcr     r8, 1\n\trcr     r9, 1\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\trcr     r8, 1\n\trcr     r9, 1\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tmov     [rdi+rcx*8-8], r9\n\tlea     rax, [rax+rdx*2]\n\tneg     rax\n\tshl     rax, 62\n\tEXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tmov     r8, [rsi+rcx*8]\n\tadd     rax, rax\n\trcr     r8, 1\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\trcr     r8, 1\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tlea     rax, [rax+rdx*2]\n\tneg     rax\n\tshl     rax, 62\n.7:\tEND_PROC reg_save_list\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/sqr_basecase.asm",
    "content": "; PROLOGUE(mpn_sqr_basecase)\n\n;  Version 1.0L5\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_sqr_basecase(mp_ptr, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx\n;  rax                           rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14\n\n    CPU  Athlon64\n    BITS 64\n\n%macro mulloop 0\n\n    xalign  16\n%%1:mov     r10, 0\n    mul     r13\n    mov     [rdi+r11*8], r12\n    add     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    db      0x26\n    add     rbx, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    mov     [rdi+r11*8+24], rbx\n    db      0x26\n    add     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     r11, 4\n    mov     rax, [rsi+r11*8+8]\n    jnc     %%1\n\n%endmacro\n\n%macro mulnext0 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    mov     [rdi+r11*8+24], rbx\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+32], r12\n    mov     [rdi+r11*8+40], rdx\n    inc     r14\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro mulnext1 0\n\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+24]\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+24], r12\n    mov     [rdi+r11*8+32], rdx\n    inc     r14\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro mulnext2 0\n\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     [rdi+r11*8+16], r10\n    mov     [rdi+r11*8+24], rbx\n    inc     r14\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro mulnext3 0\n\n    mov     [rdi+r11*8+8], r9\n    mov     [rdi+r11*8+16], r10\n    inc     r14\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro addmulloop 0\n\n    xalign  16\n%%1:mov     r10, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r13\n    add     [rdi+r11*8+16], r10\n    db      0x26\n    adc     rbx, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [rdi+r11*8+24], rbx\n    db      0x26\n    adc     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     r11, 4\n    mov     rax, [rsi+r11*8+8]\n    jnc     %%1\n\n%endmacro\n\n%macro addmulnext0 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    add     [rdi+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [rdi+r11*8+24], rbx\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+r11*8+32], r12\n    adc     rdx, 0\n    inc     r14\n    mov     [rdi+r11*8+40], rdx\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro addmulnext1 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+24]\n    mul     r13\n    add     [rdi+r11*8+16], r10\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+r11*8+24], r12\n    adc     rdx, 0\n    mov     [rdi+r11*8+32], rdx\n    inc     r14\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro addmulnext2 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     ebx, 0\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    adc     rbx, rdx\n    add     [rdi+r11*8+16], r10\n    adc     rbx, 0\n    mov     [rdi+r11*8+24], rbx\n    inc     r14\n    lea     rdi, [rdi+8]\n\n%endmacro\n\n%macro addmulnext3 0\n\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    add     [rdi+r11*8+8], r9\n    adc     r10, 0\n    mov     [rdi+r11*8+16], r10\n    inc     r14\n    lea     rdi, [rdi+8]\n    cmp     r14, 4\n\n%endmacro\n\n    LEAF_PROC mpn_sqr_basecase\n    cmp     r8d, 3\n    ja      fourormore\n    jz      three\n    jp      two\n    mov     rax, [rdx]\n    mul     rax\n    mov     [rcx], rax\n    mov     [rcx+8], rdx\n    ret\n\n    xalign  16\nfourormore:\n    FRAME_PROC ?mpn_bobcat_sqr_1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n\n    mov     [rsp+stack_use+8], rdi\n    mov     [rsp+stack_use+16], rsi\n    mov     [rsp+stack_use+24], rdx\n\n    mov     r13, [rsi]\n    mov     rax, [rsi+8]\n    mov     r14d, 6\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-40]\n    lea     rsi, [rsi+rdx*8-40]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n    jge     .1\n    mulloop\n.1: mov     r10d, 0\n    mul     r13\n    mov     [rdi+r11*8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     r11, 2\n    je      .4\n    ja      .5\n    jp      .3\n.2:\tmulnext0\n    jmp     .8\n\n    xalign  16\n.3:\tmulnext1\n    jmp     .10\n\n    xalign  16\n.4:\tmulnext2\n    jmp     .12\n\n    xalign  16\n.5:\tmulnext3\n\n    xalign  16\n.6:\tmov     rax, [rsi+r14*8]\n    mov     r13, [rsi+r14*8-8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n    jge     .7\n    addmulloop\n.7:\taddmulnext0\n.8:\tmov     rax, [rsi+r14*8]\n    mov     r13, [rsi+r14*8-8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n    jge     .9\n    addmulloop\n.9:\taddmulnext1\n.10:mov     rax, [rsi+r14*8]\n    mov     r13, [rsi+r14*8-8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n    jge     .11\n    addmulloop\n.11:addmulnext2\n.12:mov     rax, [rsi+r14*8]\n    mov     r13, [rsi+r14*8-8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n    jge     .13\n    addmulloop\n.13:addmulnext3\n    jnz     .6\n\n    mov     rax, [rsi+r14*8]\n    mov     r13, [rsi+r14*8-8]\n    mul     r13\n    add     [rdi+r14*8], rax\n    adc     rdx, 0\n    mov     [rdi+r14*8+8], rdx\n\n    mov     rdi, [rsp+stack_use+8]\n    mov     rsi, [rsp+stack_use+16]\n    mov     rcx, [rsp+stack_use+24]\n    xor     rbx, rbx\n    xor     r11, r11\n    lea     rsi, [rsi+rcx*8]\n    mov     [rdi], r11\n    lea     r10, [rdi+rcx*8]\n    mov     [r10+rcx*8-8], r11\n    neg     rcx\n\n    xalign  16\n.14:mov     rax, [rsi+rcx*8]\n    mul     rax\n    mov     r8, [rdi]\n    mov     r9, [rdi+8]\n    add     rbx, 1\n    adc     r8, r8\n    adc     r9, r9\n    sbb     rbx, rbx\n    add     r11, 1\n    adc     r8, rax\n    adc     r9, rdx\n    sbb     r11, r11\n    mov     [rdi], r8\n    mov     [rdi+8], r9\n    inc     rcx\n    lea     rdi, [rdi+16]\n    jnz     .14\n    END_PROC reg_save_list\n\n    xalign  16\ntwo:mov     rax, [rdx]\n    mov     r9, [rdx+8]\n    mov     r8, rax\n    mul     rax\n    mov     [rcx], rax\n    mov     rax, r9\n    mov     [rcx+8], rdx\n    mul     rax\n    mov     [rcx+16], rax\n    mov     rax, r8\n    mov     r10, rdx\n    mul     r9\n    add     rax, rax\n    adc     rdx, rdx\n    adc     r10, 0\n    add     [rcx+8], rax\n    adc     [rcx+16], rdx\n    adc     r10, 0\n    mov     [rcx+24], r10\n    ret\n\n    align   16\nthree:\n    FRAME_PROC ?mpn_bobcat_sqr_2, 0, rsi, rdi\n    mov     rdi, rcx\n    mov     rsi, rdx\n\n    mov     r8, [rsi]\n    mov     rax, [rsi+8]\n    mul     r8\n    mov     r11d, 0\n    mov     [rcx+8], rax\n    mov     rax, [rsi+16]\n    mov     r9, rdx\n    mul     r8\n    mov     r8, [rsi+8]\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    mul     r8\n    mov     [rcx+16], r9\n    add     r11, rax\n    mov     r9d, 0\n    mov     [rcx+24], r11\n    adc     r9, rdx\n    mov     [rcx+32], r9\n    mov     rdi, -3\n    xor     r10, r10\n    xor     r11, r11\n    lea     rsi, [rsi+24]\n    mov     [rcx], r11\n    mov     [rcx+40], r11\n.1: mov     rax, [rsi+rdi*8]\n    mul     rax\n    mov     r8, [rcx]\n    mov     r9, [rcx+8]\n    add     r10, 1\n    adc     r8, r8\n    adc     r9, r9\n    sbb     r10, r10\n    add     r11, 1\n    adc     r8, rax\n    adc     r9, rdx\n    sbb     r11, r11\n    mov     [rcx], r8\n    mov     [rcx+8], r9\n    inc     rdi\n    lea     rcx, [rcx+16]\n    jnz     .1\n    END_PROC rsi, rdi\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/store.asm",
    "content": "; PROLOGUE(mpn_store)\n;  Copyright 2009 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;\tmpn_store(mp_ptr, mp_size_t, mp_limb_t)\n;   r10          rdi        rsi        rdx\n;\tr10          rcx        rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n\tLEAF_PROC mpn_store\n\tmov     rax, rdx\n\tand     rax, 7\n\tmov     r9, 8\n\tsub     r9, rax\n    mov     rax, r8\n\tlea     r9, [r9+r9*4]\n\tlea     r10, [rel .0]\n\tadd     r10, r9\n\tand     rdx, -8\n\tadd     rcx, 48\n\tjmp     r10\n\txalign  16\n.0:\tmov     [rcx+rdx*8+8], rax\n\tmov     [byte rcx+rdx*8+0], rax\n\tmov     [rcx+rdx*8-8], rax\n\tmov     [rcx+rdx*8-16], rax\n\tmov     [rcx+rdx*8-24], rax\n\tmov     [rcx+rdx*8-32], rax\n\tmov     [rcx+rdx*8-40], rax\n\tmov     [rcx+rdx*8-48], rax\n\tnop\n\tsub     rdx, 8\n\tjnc     .0\n\tnop\n\tret\n\n    end"
  },
  {
    "path": "mpn/x86_64w/bobcat/sub_err1_n.asm",
    "content": "; PROLOGUE(mpn_sub_err1_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_sub_err1(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_ptr, mp_size_t, mp_limb_t)\n;  rax                       rdi     rsi     rdx     rcx       r8         r9     8(rsp)\n;  rax                       rcx     rdx      r8      r9 [rsp+40]   [rsp+48]   [rsp+56]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    FRAME_PROC mpn_sub_err1_n, 0, reg_save_list\n\tmov     [rsp+stack_use+32], r9\n    mov     r9, qword [rsp+stack_use+48]\n\tmov     r10, [rsp++stack_use+56]\n\tlea     rdi, [rcx+r9*8-24]\n\tlea     rsi, [rdx+r9*8-24]\n\tlea     rdx, [r8+r9*8-24]\n\tmov     r8, [rsp+stack_use+40]\n\n\tmov     r11, 3\n\tshl     r10, 63\n\tlea     r8, [r8+r9*8]\n\tsub     r11, r9\n\tmov     r9, 0\n\tmov     rax, 0\n\tmov     rbx, 0\n\tjnc     .2\n\t\n\txalign  16\n.1: mov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tmov     r14, [rsi+r11*8+16]\n\tmov     r15, [rsi+r11*8+24]\n\tmov     rbp, 0\n\tshl     r10, 1\n\tsbb     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tsbb     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\tmov     rcx, 0\n\tsbb     r14, [rdx+r11*8+16]\n\tcmovc   rcx, [r8-24]\n\tsbb     r15, [rdx+r11*8+24]\n\tcmovc   rbp, [r8-32]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tadd     r9, rcx\n\tmov     rax, 0\n\tadc     r10, 0\n\tlea     r8, [r8-32]\n\tadd     r9, rbp\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tmov     [rdi+r11*8+16], r14\n\tmov     [rdi+r11*8+24], r15\n\tmov     rbx, 0\n\tadd     r11, 4\n\tjnc     .1\n.2: cmp     r11, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tmov     r14, [rsi+r11*8+16]\n\tshl     r10, 1\n\tsbb     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tsbb     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\tmov     rcx, 0\n\tsbb     r14, [rdx+r11*8+16]\n\tcmovc   rcx, [r8-24]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tadd     r9, rcx\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tmov     [rdi+r11*8+16], r14\n    jmp     .6\n\n\txalign  16\n.4: mov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tshl     r10, 1\n\tsbb     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tsbb     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n    jmp     .6\n\n\txalign  16\n.5: mov     r12, [rsi+r11*8]\n\tshl     r10, 1\n\tsbb     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n.6:\tmov     rcx, [rsp+stack_use+32]\n\tmov     [rcx], r9\n\tbtr     r10, 63\n\tmov     [rcx+8], r10\n    mov     rax, 0\n\tadc     rax, 0\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/sub_n.asm",
    "content": "; PROLOGUE(mpn_sub_n)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  Calculate src1[size] plus(minus) src2[size] and store the result in\n;  dst[size].  The return value is the carry bit from the top of the result\n;  (1 or 0).  The _nc version accepts 1 or 0 for an initial carry into the\n;  low limb of the calculation.  Note values other than 1 or 0 here will\n;  lead to garbage results.\n;\n;  mp_limb_t  mpn_sub_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_sub_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    xalign  8\n    LEAF_PROC mpn_sub_nc\n    mov     r10,[rsp+40]\n    jmp     entry\n\n    xalign  8\n    LEAF_PROC mpn_sub_n\n    xor     r10, r10\nentry:\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    lea     r9,[r10+r9*2]\n\tsar     r9, 1\n    jnz     .2\n\n    mov     r10, [rdx]\n    sbb     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+8]\n    sbb     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+16]\n    sbb     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.1: adc     rax, rax\n    ret\n\n    xalign  8\n.2:\tmov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    sbb     r10, [r8]\n    sbb     r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     r10, [rdx-16]\n    mov     r11, [rdx-8]\n    sbb     r10, [r8-16]\n    sbb     r11, [r8-8]\n    mov     [rcx-16], r10\n    dec     r9\n    mov     [rcx-8], r11\n    jnz     .2\n\n    inc     rax\n    dec     rax\n    jz      .3\n    mov     r10, [rdx]\n    sbb     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+8]\n    sbb      r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+16]\n    sbb     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.3: adc     rax, rax\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/subadd_n.asm",
    "content": "; PROLOGUE(mpn_subadd_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_subadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx          r8\n;  rax                       rcx     rdx      r8      r9    [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_subadd_n, 0, reg_save_list\n    mov     rbx, qword [rsp+stack_use+40]\n\tlea     rdi, [rcx+rbx*8-56]\n\tlea     rsi, [rdx+rbx*8-56]\n\tlea     rdx, [r8+rbx*8-56]\n\tlea     rcx, [r9+rbx*8-56]\n\tmov     r9, 3\n\txor     rax, rax\n\txor     r10, r10\n\tsub     r9, rbx\n\tjge     .3\n\tadd     r9, 4\n\tmov     rbp, [rsi+r9*8+16]\n\tmov     r11, [rsi+r9*8+24]\n\tmov     r8, [rsi+r9*8]\n\tmov     rbx, [rsi+r9*8+8]\n\tjc      .2\n\t\n\txalign  16\n.1:\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8]\n\tsbb     rbx, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r8, [rdx+r9*8]\n\tsbb     rbx, [rdx+r9*8+8]\n\tsbb     rbp, [rdx+r9*8+16]\n\tsbb     r11, [rdx+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r8\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], rbx\n\tmov     [rdi+r9*8+16], rbp\n\tmov     rbp, [rsi+r9*8+48]\n\tmov     r11, [rsi+r9*8+56]\n\tadd     r9, 4\n\tmov     r8, [rsi+r9*8]\n\tmov     rbx, [rsi+r9*8+8]\n\tjnc     .1\n.2:\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8]\n\tsbb     rbx, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r8, [rdx+r9*8]\n\tsbb     rbx, [rdx+r9*8+8]\n\tsbb     rbp, [rdx+r9*8+16]\n\tsbb     r11, [rdx+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r8\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], rbx\n\tmov     [rdi+r9*8+16], rbp\n.3:\tcmp     r9, 2\n\tja      .7\n\tjz      .6\n\tjp      .5\n.4:\tmov     rbp, [rsi+r9*8+48]\n\tmov     r8, [rsi+r9*8+32]\n\tmov     rbx, [rsi+r9*8+40]\n\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8+32]\n\tsbb     rbx, [rcx+r9*8+40]\n\tsbb     rbp, [rcx+r9*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r8, [rdx+r9*8+32]\n\tsbb     rbx, [rdx+r9*8+40]\n\tsbb     rbp, [rdx+r9*8+48]\n\tmov     [rdi+r9*8+32], r8\n\tmov     [rdi+r9*8+40], rbx\n\tmov     [rdi+r9*8+48], rbp\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     r8, [rsi+r9*8+32]\n\tmov     rbx, [rsi+r9*8+40]\n\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8+32]\n\tsbb     rbx, [rcx+r9*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r8, [rdx+r9*8+32]\n\tsbb     rbx, [rdx+r9*8+40]\n\tmov     [rdi+r9*8+32], r8\n\tmov     [rdi+r9*8+40], rbx\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tmov     r8, [rsi+r9*8+32]\n\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r8, [rdx+r9*8+32]\n\tmov     [rdi+r9*8+32], r8\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign  16\n.7:\tadd     rax, r10\n\tneg     rax\n.8:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/sublsh1_n.asm",
    "content": "; PROLOGUE(mpn_sublsh1_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_sublsh1_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx  \n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_sublsh1_n, 0, reg_save_list\n    mov     rax, r9\n\n    lea     rdi, [rcx+rax*8-56]\n    lea     rsi, [rdx+rax*8-56]\n    lea     rdx, [ r8+rax*8-56]\n    mov     rcx, rax\n\n\txor     rax, rax\n\txor     r10, r10\n\tmov     r8, 3\n\tsub     r8, rcx\n\tjge     .3\n\tadd     r8, 4\n\tmov     r11, [rsi+r8*8+24]\n\tmov     rcx, [rsi+r8*8+16]\n\tmov     r9, [rsi+r8*8]\n\tmov     rbx, [rsi+r8*8+8]\n\tjc      .2\n\t\n\txalign   16\n.1: add     rax, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], r9\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rcx\n\tmov     [rdi+r8*8+24], r11\n\tmov     r11, [rsi+r8*8+56]\n\tmov     rcx, [rsi+r8*8+48]\n\tadd     r8, 4\n\tmov     r9, [rsi+r8*8]\n\tmov     rbx, [rsi+r8*8+8]\n\tjnc     .1\n.2: add     rax, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], r9\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rcx\n\tmov     [rdi+r8*8+24], r11\n.3: cmp     r8, 2\n\tja      .7\n\tjz      .6\n\tjp      .5\n.4: mov     rcx, [rsi+r8*8+48]\n\tmov     r9, [rsi+r8*8+32]\n\tmov     rbx, [rsi+r8*8+40]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rcx, [rdx+r8*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rcx, [rdx+r8*8+48]\n\tmov     [rdi+r8*8+32], r9\n\tmov     [rdi+r8*8+40], rbx\n\tmov     [rdi+r8*8+48], rcx\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign   16\n.5: mov     r9, [rsi+r8*8+32]\n\tmov     rbx, [rsi+r8*8+40]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tmov     [rdi+r8*8+32], r9\n\tmov     [rdi+r8*8+40], rbx\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign   16\n.6: mov     r9, [rsi+r8*8+32]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tmov     [rdi+r8*8+32], r9\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign   16\n.7: add     r10, 1\n.8: sbb     rax, 0\n\tneg     rax\n\tEND_PROC reg_save_list\n\n\tend\n\t"
  },
  {
    "path": "mpn/x86_64w/bobcat/sublsh_n.asm",
    "content": "; PROLOGUE(mpn_sublsh_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_sublsh_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint, mp_limb_t)\n;  mp_limb_t mpn_sublsh_nc(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                        rdi     rsi     rdx        rcx       r8         r9\n;  rax                        rcx     rdx      r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n\tLEAF_PROC mpn_sublsh_n\n\tmov     r10, r9\n\txor     r9, r9\n    jmp     entry\n    \n\tLEAF_PROC mpn_sublsh_nc\n\tmov     r10, r9\n\tmov     r9, [rsp+48]\nentry:\n\tFRAME_PROC ?mpn_sublsh, 0, reg_save_list\n\tlea     rdi, [rcx+r10*8]\n\tlea     rsi, [rdx+r10*8]\n\tlea     rdx, [r8+r10*8]\n\tmov     ecx, dword [rsp+stack_use+40]\n\t\n\tneg     rcx\n\tshr     r9, cl\n\tneg     r10\n\txor     rax, rax\n\ttest    r10, 3\n\tjz      .2\n.1:\tmov     r8, [rdx+r10*8]\n\tmov     r11, r8\n\tneg     rcx\n\tshl     r8, cl\n\tneg     rcx\n\tshr     r11, cl\n\tor      r8, r9\n\tmov     r9, r11\n\tadd     rax, 1\n\tmov     r11, [rsi+r10*8]\n\tsbb     r11, r8\n\tsbb     rax, rax\n\tmov     [rdi+r10*8], r11\n\tinc     r10\n\ttest    r10, 3\n\tjnz     .1\n.2:\tcmp     r10, 0\n\tjz      .4\n\n\txalign  16\n.3:\tmov     r8, [rdx+r10*8]\n\tmov     rbp, [rdx+r10*8+8]\n\tmov     rbx, [rdx+r10*8+16]\n\tmov     r12, [rdx+r10*8+24]\n\tmov     r11, r8\n\tmov     r13, rbp\n\tmov     r14, rbx\n\tmov     r15, r12\n\tneg     rcx\n\tshl     r8, cl\n\tshl     rbp, cl\n\tshl     rbx, cl\n\tshl     r12, cl\n\tneg     rcx\n\tshr     r11, cl\n\tshr     r13, cl\n\tshr     r14, cl\n\tshr     r15, cl\n\tor      r8, r9\n\tor      rbp, r11\n\tor      rbx, r13\n\tor      r12, r14\n\tmov     r9, r15\n\tadd     rax, 1\n\tmov     r11, [rsi+r10*8]\n\tmov     r13, [rsi+r10*8+8]\n\tmov     r14, [rsi+r10*8+16]\n\tmov     r15, [rsi+r10*8+24]\n\tsbb     r11, r8\n\tsbb     r13, rbp\n\tsbb     r14, rbx\n\tsbb     r15, r12\n\tsbb     rax, rax\n\tmov     [rdi+r10*8], r11\n\tmov     [rdi+r10*8+8], r13\n\tmov     [rdi+r10*8+16], r14\n\tmov     [rdi+r10*8+24], r15\n\tadd     r10, 4\n\tjnz     .3\n.4:\tneg     rax\n\tadd     rax, r9\n    END_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/submul_1.asm",
    "content": "; PROLOGUE(mpn_submul_1)\n\n;  Copyright 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  Copyright 2008, 2009 Brian Gladman\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n; CREDITS\n;\n; The code used here is derived from that provided by ct35z at:\n;\n;    http://www.geocities.jp/ct35z/gmp-core2-en.html\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Jason Worth Martin's excellent assembly support for the Intel64\n; architecture has been used where appropriate.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; Adapted for use with VC++ and YASM using a special mode in which NASM\n; preprocessing is used with AT&T assembler syntax. I am very grateful\n; for the support that Peter Johnson (one of the authors of YASM) has\n; provided in getting this special YASM mode working.  Without his\n; support this port would have been a great deal more difficult.\n;\n; The principle issues that I have had to address is the difference\n; between GCC and MSVC in their register saving and parameter passing\n; conventions.  Registers that have to be preserved across function\n; calls are:\n;\n; GCC:             rbx, rbp, r12..r15\n; MSVC:  rsi, rdi, rbx, rbp, r12..r15 xmm6..xmm15\n;\n; Parameter passing conventions for non floating point parameters:\n;\n;   function(   GCC     MSVC\n;       p1,     rdi      rcx\n;       p2,     rsi      rdx\n;       p3,     rdx       r8\n;       p4,     rcx       r9\n;       p5,      r8 [rsp+40]\n;       p6,      r9 [rsp+48]\n;\n; Care must be taken with 32-bit values in 64-bit register or on the\n; stack because the upper 32-bits of such parameters are undefined.\n;\n;       Brian Gladman\n;\n; Intel64 mpn_addmul_1 -- Multiply a limb vector with a limb and\n; add the result to a second limb vector.\n;\n; Calculate src[size] multiplied by mult[1] and add to /subtract from dst[size] and\n; return the carry or borrow from the top of the result\n;\n; BPL is bytes per limb, which is 8 in the 64-bit code here\n\n;  mp_limb_t mpn_submul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_declsh_n(mp_ptr, mp_ptr, mp_size_t,   mp_uint)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8         r9\n;\n\n%define BPL                 8\n%define UNROLL_EXPONENT     4\n%define UNROLL_SIZE         (1 << UNROLL_EXPONENT)\n%define UNROLL_MASK         (UNROLL_SIZE - 1)\n%define ADDR(p,i,d)         (d*BPL)(p, i, BPL)\n\n; Register  Usage\n; --------  -----\n; rax    low word from mul\n; rbx\n; rcx    s2limb\n; rdx    high word from mul\n; rsi    s1p\n; rdi    rp\n; rbp    Base Pointer\n; rsp    Stack Pointer\n; r8     A_x\n; r9     A_y\n; r10    A_z\n; r11    B_x\n; r12    B_y\n; r13    B_z\n; r14    temp\n; r15    index\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list       rsi, rdi, r12, r13, r14, r15\n\n%define s2limb  rcx\n%define s1p     rsi\n%define rp      rdi\n%define a_x      r8\n%define a_y      r9\n%define a_z     r10\n%define b_x     r11\n%define b_y     r12\n%define b_z     r13\n%define temp    r14\n%define index   r15\n\n    LEAF_PROC mpn_submul_1\n    xor     a_z, a_z\n    jmp     entry\n\n    LEAF_PROC mpn_submul_1c\n    mov     a_z, [rsp+0x28]\nentry:\n    FRAME_PROC ?mpn_bobcat_submul, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    xor     rdx, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n\n    lea     s1p, [s1p+rdx*8]\n    lea     rp, [rp+rdx*8]\n    xor     index, index\n    sub     index, rdx\n    cmp     rdx, 4\n    jge     .6\n    lea     rax, [rel .1]\n    add     rax, [rax+rdx*8]\n    jmp     rax\n\n    xalign  8\n.1:\tdq      .2 - .1\n    dq      .3 - .1\n    dq      .4 - .1\n    dq      .5 - .1\n.2:\tmov     rax, a_z\n\tEXIT_PROC reg_save_list\n\n.3:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    sub      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8], a_z\n    adc     rax, rdx\n\tEXIT_PROC reg_save_list\n\n.4:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    sub      a_z, a_x\n    adc     rax, a_y\n    mov     [rp+index*8], a_z\n    adc     rdx, 0\n    sub      b_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+8], b_z\n    adc     rax, rdx\n\tEXIT_PROC reg_save_list\n\n.5:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+16]\n    mul     s2limb\n    sub      a_z, a_x\n    adc     b_x, a_y\n    mov     [rp+index*8], a_z\n    mov     a_z, [rp+index*8+16]\n    adc     b_y, 0\n    sub      b_z, b_x\n    adc     rax, b_y\n    mov     [rp+index*8+8], b_z\n    adc     rdx, 0\n    sub      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+16], a_z\n    adc     rax, rdx\n\tEXIT_PROC reg_save_list\n\n.6:\tmov     temp, rdx\n    test    rdx, 1\n    jz      .7\n    mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    jmp     .8\n\n.7:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     b_z, [rp+index*8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     a_z, [rp+index*8+8]\n    mov     a_x, rax\n    mov     a_y, rdx\n.8:\tsub     temp, 4\n    and     temp, UNROLL_MASK\n    inc     temp\n    mov     rax, (.10 - .9) >> UNROLL_EXPONENT\n    mul     temp\n    lea     rdx, [rel .10]\n    sub     rdx, rax\n    mov     rax, [s1p+index*8+16]\n    lea     index, [index+temp+3-UNROLL_SIZE]\n    jmp     rdx\n\n%macro seq_1 7\n    mul     s2limb\n    %7      %3, %1\n    lea     %1, [rax]\n    mov     rax, [byte s1p+index*8+8*%6]\n    adc     %4, %2\n    mov     [byte rp+index*8+8*(%6-3)], %3\n    mov     %3, [byte rp+index*8+8*(%6-1)]\n    lea     %2, [rdx]\n    adc     %5, 0\n%endmacro\n\n   xalign 16\n.9:\n%assign i 0\n%rep    16\n    %if (i & 1)\n        seq_1   b_x, b_y, b_z, a_x, a_y, i, sub\n    %else\n        seq_1   a_x, a_y, a_z, b_x, b_y, i, sub\n    %endif\n%assign i i + 1\n%endrep\n.10:add     index, UNROLL_SIZE\n    jnz     .9\n.11:mul     s2limb\n    sub      a_z, a_x\n    mov     [rp+index*8-24], a_z\n    mov     a_z, [rp+index*8-8]\n    adc     b_x, a_y\n    adc     b_y, 0\n    sub      b_z, b_x\n    mov     [rp+index*8-16], b_z\n    adc     rax, b_y\n    adc     rdx, 0\n    sub      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8-8], a_z\n    adc     rax, rdx\n.12:END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/sumdiff_n.asm",
    "content": "; PROLOGUE(mpn_sumdiff_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;       rcx[r10] = r8[r10] + r9[r10]\n;       rdx[r10] = r8[r10] - r9[r10]\n;\n;  return 2 * add_carry + sub_borrow\n;\n;  mp_limb_t mpn_sumdiff_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_size_t)\n;  rax                        rdi     rsi     rdx     rcx          r8\n;  rax                        rcx     rdx      r8      r9    [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rsi, rdi, r12, r13, r14, r15, rbx, rbp\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_sumdiff_n, 0, reg_save_list\n    mov     r10, qword [rsp+stack_use+40]\n\n    lea     rdx, [rdx+r10*8]\n    lea     r8, [r8+r10*8]\n    lea     rcx, [rcx+r10*8]\n    lea     r9, [r9+r10*8]\n    neg     r10\n    xor     r12, r12\n    xor     r11, r11\n    test    r10, 3\n    jz      .2\n.1: mov     rax, [r8+r10*8]\n    mov     r13, rax\n    add     r12, 1\n    adc     rax, [r9+r10*8]\n    sbb     r12, r12\n    add     r11, 1\n    sbb     r13, [r9+r10*8]\n    sbb     r11, r11\n    mov     [rcx+r10*8], rax\n    mov     [rdx+r10*8], r13\n    inc     r10\n    test    r10, 3\n    jnz     .1\n.2: cmp     r10, 0\n    jz      .4\n\n    xalign  16\n.3: mov     rax, [r8+r10*8]\n    mov     rsi, [r8+r10*8+8]\n    mov     rdi, [r8+r10*8+16]\n    mov     rbp, [r8+r10*8+24]\n    mov     r13, rax\n    mov     r14, rsi\n    mov     r15, rdi\n    mov     rbx, rbp\n    add     r12, 1\n    adc     rax, [r9+r10*8]\n    adc     rsi, [r9+r10*8+8]\n    adc     rdi, [r9+r10*8+16]\n    adc     rbp, [r9+r10*8+24]\n    sbb     r12, r12\n    add     r11, 1\n    sbb     r13, [r9+r10*8]\n    sbb     r14, [r9+r10*8+8]\n    sbb     r15, [r9+r10*8+16]\n    sbb     rbx, [r9+r10*8+24]\n    sbb     r11, r11\n    mov     [rcx+r10*8], rax\n    mov     [rcx+r10*8+8], rsi\n    mov     [rcx+r10*8+16], rdi\n    mov     [rcx+r10*8+24], rbp\n    mov     [rdx+r10*8], r13\n    mov     [rdx+r10*8+8], r14\n    mov     [rdx+r10*8+16], r15\n    mov     [rdx+r10*8+24], rbx\n    add     r10, 4\n    jnz     .3\n.4: lea     rax, [r11+r12*2]\n    neg     rax\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/xnor_n.asm",
    "content": "; PROLOGUE(mpn_xnor_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_xnor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_xnor_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    xor     r10, [rdx+r9*8+24]\n    xor     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    xor     T3, [rdx+r9*8+8]\n    xor     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/bobcat/xor_n.asm",
    "content": "; PROLOGUE(mpn_xor_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_xor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rdi        rsi        rdx        rcx\n;                    rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_xor_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    or      r10, r10\n    or      r11, r11\n    xor     r10, [rdx+r9*8+24]\n    xor     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    or      T3, T3\n    or      T4, T4\n    xor     T3, [rdx+r9*8+8]\n    xor     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/broadwell/avx/addsub_n.asm",
    "content": ";  AMD64 mpn_addsub_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,r8) = (rsi,r8)+(rdx,r8)-(rcx,r8)\n;   rax = summed carry and borrow in range [ -1..1 ]\n\n; the main loop has been enhanced with the MPIR SuperOptimizer\n; the gain was roughly 4% execution speed for operands in LD1$\n;\n;  mp_limb_t mpn_addsub_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n; cycles per limb with all operands in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         1.6-1.7   1.7-1.85\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rsi, rdi, r12\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    rdi\n%define Src1P   rsi\n%define Src2P   rdx\n%define Src3P   rcx\n%define Size    r8\n\n%define Spills  eax\n%define Carry   al\n%define Borrow  ah\n\n%define Limb0   r9\n%define Limb1   r10\n%define Limb2   r11\n%define Limb3   r12\n\n    align   32\n    BITS    64\n\n    FRAME_PROC mpn_addsub_n, 0, reg_save_list\n\tmov\t\trdi, rcx\n\tmov\t\trsi, rdx\n\tmov\t\trdx, r8\n \tmov\t\trcx, r9\n\tmov\t\tr8, [rsp+stack_use+40]\n\n    sub     Src3P, 32\n    sub     ResP, 32\n\n    xor     Spills, Spills      ; clears carry & borrow\n\n    jmp     .Check\n\n    align   16\n.Loop:\n\n    ; do not delete!\n    ; this seemingly unreasoned AVX instruction optimizes the allocation of\n    ; read/write operations to ports 2, 3 & 7 (write allways ending up\n    ; on port 7) which allows a sustained 2r1w execution per cycle\n    vpor    ymm0, ymm0, ymm0\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P]\n    mov     Limb1, [Src1P+8]\n    mov     Limb2, [Src1P+16]\n    mov     Limb3, [Src1P+24]\n    lea     Src3P, [Src3P+32]\n    lea     ResP, [ResP+32]\n    adc     Limb0, [Src2P]\n    adc     Limb1, [Src2P+8]\n    adc     Limb2, [Src2P+16]\n    adc     Limb3, [Src2P+24]\n    setc    Carry\n\n    lea     Src2P, [Src2P+32]\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P]\n    sbb     Limb1, [Src3P+8]\n    lea     Src1P, [Src1P+32]\n    mov     [ResP], Limb0\n    sbb     Limb2, [Src3P+16]\n    mov     [ResP+8], Limb1\n    mov     [ResP+16], Limb2\n    sbb     Limb3, [Src3P+24]\n    setc    Borrow\n    mov     [ResP+24], Limb3\n\n    ; label @ $a (mod $10) seems ok from benchmark figures\n.Check:\n\n    sub     Size, 4\n    jnc     .Loop\n\n    add     Src3P, 32\n    add     ResP, 32\n\n    add     Size, 4\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P]\n    adc     Limb0, [Src2P]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P]\n    setc    Borrow\n    mov     [ResP], Limb0\n    dec     Size\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P+8]\n    adc     Limb0, [Src2P+8]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P+8]\n    setc    Borrow\n    mov     [ResP+8], Limb0\n    dec     Size\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P+16]\n    adc     Limb0, [Src2P+16]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P+16]\n    mov     [ResP+16], Limb0\n    setc    Borrow\n\n    ; label @ $2 (mod $10) ok from benchmark figures\n.Exit:\n\n    sub     Carry, Borrow\n    movsx   rax, Carry\n\tEND_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/broadwell/avx/and_n.asm",
    "content": ";  AVX mpn_and_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = (rsi,rcx) and (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.68 0.34-0.94 (depending on alignment)\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define QLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_and_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 0, 4, 8 & 12 immediately\n\n\t; the code density in the core loop is low - 5.18 byte per instruction\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpand   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpand   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpand   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpand   QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    add     Src1P, 128\n    add     Src2P, 128\n    add     ResP, 128\n\n  .Check:\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     SizeB, 2            ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpand   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpand   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpand   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     CountB, 2           ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    and     Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    and     Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    and     Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n\n"
  },
  {
    "path": "mpn/x86_64w/broadwell/avx/andn_n.asm",
    "content": ";  AVX mpn_andn_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = not(rsi,rcx) and (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_andn_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpandn  QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpandn  QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpandn  QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpandn  QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpandn  QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpandn  QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpandn  QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    andn    Limb0, Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    andn    Limb0, Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    andn    Limb0, Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/broadwell/avx/ior_n.asm",
    "content": ";  AVX mpn_ior_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = (rsi,rcx) ior (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_ior_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpor    QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    or      Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    or      Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    or      Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/broadwell/avx/iorn_n.asm",
    "content": ";  AVX mpn_iorn_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = not(rsi,rcx) and (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define\tQLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_iorn_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    vpcmpeqq QLimb1, QLimb1, QLimb1\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vpxor   QLimb0, QLimb1, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+96]\n    vpor    QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    not     Limb0\n    or      Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    not     Limb0\n    or      Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    not     Limb0\n    or      Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/broadwell/avx/nand_n.asm",
    "content": ";  AVX mpn_nand_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = not( (rsi,rcx) and (rdx,rcx) )\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.34-0.35 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define QLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_nand_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    vpcmpeqq QLimb1, QLimb1, QLimb1\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpand   QLimb0, QLimb0, [Src2P]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpand   QLimb0, QLimb0, [Src2P+32]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpand   QLimb0, QLimb0, [Src2P+64]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpand   QLimb0, QLimb0, [Src2P+96]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpand   QLimb0, QLimb0, [Src2P+64]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpand   QLimb0, QLimb0, [Src2P+32]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpand   QLimb0, QLimb0, [Src2P]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    and     Limb0, [Src2P+16]\n    not     Limb0\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    and     Limb0, [Src2P+8]\n    not     Limb0\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    and     Limb0, [Src2P]\n    not     Limb0\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/broadwell/avx/nior_n.asm",
    "content": ";  AVX mpn_nior_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = not( (rsi,rcx) or (rdx,rcx) )\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.34-0.35 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define QLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_nior_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    vpcmpeqq QLimb1, QLimb1, QLimb1\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpor    QLimb0, QLimb0, [Src2P+96]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    or      Limb0, [Src2P+16]\n    not     Limb0\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    or      Limb0, [Src2P+8]\n    not     Limb0\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    or      Limb0, [Src2P]\n    not     Limb0\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/broadwell/avx/subadd_n.asm",
    "content": ";  AMD64 mpn_subadd_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,r8) = (rsi,r8)-(rdx,r8)-(rcx,r8)\n;   rax = summed borrow in range [ 0..2 ]\n\n; the main loop has been enhanced with the MPIR SuperOptimizer\n; the gain was roughly 4% execution speed for operands in LD1$\n;\n;  mp_limb_t mpn_subadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n; cycles per limb with all operands in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         1.6-1.7   1.7-1.85\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rsi, rdi, r12\n; definition according to Linux 64 bit ABI\n\n%define ResP    rdi\n%define Src1P   rsi\n%define Src2P   rdx\n%define Src3P   rcx\n%define Size    r8\n\n%define Spills  eax\n%define Carry   al\n%define Borrow  ah\n\n%define Limb0   r9\n%define Limb1   r10\n%define Limb2   r11\n%define Limb3   r12\n\n    align   32\n    BITS    64\n\n    FRAME_PROC mpn_subadd_n, 0, reg_save_list\n\tmov\t\trdi, rcx\n\tmov\t\trsi, rdx\n\tmov\t\trdx, r8\n \tmov\t\trcx, r9\n\tmov\t\tr8, [rsp+stack_use+40]\n\n    sub     Src3P, 32\n    sub     ResP, 32\n\n    xor     Spills, Spills\n\n    jmp     .Check\n\n    align   16\n  .Loop:\n\n    ; do not delete!\n    ; this seemingly unreasoned AVX instruction optimizes the allocation of\n    ; read/write operations to ports 2, 3 & 7 (write allways ending up\n    ; on port 7) which allows a sustained 2r1w execution per cycle\n    vpor    ymm0, ymm0, ymm0\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P]\n    mov     Limb1, [Src1P+8]\n    mov     Limb2, [Src1P+16]\n    mov     Limb3, [Src1P+24]\n    lea     Src3P, [Src3P+32]\n    lea     ResP, [ResP+32]\n    sbb     Limb0, [Src2P]\n    sbb     Limb1, [Src2P+8]\n    sbb     Limb2, [Src2P+16]\n    sbb     Limb3, [Src2P+24]\n    setc    Carry\n\n    lea     Src2P, [Src2P+32]\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P]\n    sbb     Limb1, [Src3P+8]\n    lea     Src1P, [Src1P+32]\n    mov     [ResP], Limb0\n    sbb     Limb2, [Src3P+16]\n    mov     [ResP+8], Limb1\n    mov     [ResP+16], Limb2\n    sbb     Limb3, [Src3P+24]\n    setc    Borrow\n    mov     [ResP+24], Limb3\n\n    ; label @ $a (mod $10) seems ok from benchmark figures\n  .Check:\n\n    sub     Size, 4\n    jnc     .Loop\n\n  .Post:\n\n    add     Src3P, 32\n    add     ResP, 32\n\n    add     Size, 4\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P]\n    sbb     Limb0, [Src2P]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P]\n    setc    Borrow\n    mov     [ResP], Limb0\n    dec     Size\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P+8]\n    sbb     Limb0, [Src2P+8]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P+8]\n    setc    Borrow\n    mov     [ResP+8], Limb0\n    dec     Size\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P+16]\n    sbb     Limb0, [Src2P+16]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P+16]\n    mov     [ResP+16], Limb0\n    setc    Borrow\n\n    ; label @ $2 (mod $10) is ok\n.Exit:\n\n    add     Carry, Borrow\n    movsx   rax, Carry\n\tEND_PROC reg_save_list\n\n"
  },
  {
    "path": "mpn/x86_64w/broadwell/avx/xnor_n.asm",
    "content": ";  AVX mpn_xnor_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = not(rsi,rcx) xor (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define\tQLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_xnor_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    vpcmpeqq QLimb1, QLimb1, QLimb1\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vpxor   QLimb0, QLimb1, [Src1P]\n    vpxor   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+32]\n    vpxor   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+64]\n    vpxor   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+96]\n    vpxor   QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P+64]\n    vpxor   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P+32]\n    vpxor   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P]\n    vpxor   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    not     Limb0\n    xor     Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    not     Limb0\n    xor     Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    not     Limb0\n    xor     Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/broadwell/avx/xor_n.asm",
    "content": ";  AVX mpn_xor_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = (rsi,rcx) xor (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_xor_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpxor   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpxor   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpxor   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpxor   QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpxor   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpxor   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpxor   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    xor     Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    xor     Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    xor     Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/bulldozer/com_n.asm",
    "content": "; PROLOGUE(mpn_com_n)\n\n;  mpn_com_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_com_n(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi,    rsi,       rdx\n;                    rcx,    rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_com_n\n\tmov     r9, 3\n\tlea     rdx, [rdx+r8*8-24]\n\tpcmpeqb xmm2, xmm2\n\tsub     r9, r8\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [rdx+r9*8]\n\tmovdqu  xmm1, [rdx+r9*8+16]\n\tpxor    xmm0, xmm2\n\tadd     r9, 4\n\tpxor    xmm1, xmm2\n\tmovdqu  [rcx], xmm0\n\tmovdqu  [rcx+16], xmm1\n\tlea     rcx, [rcx+32]\n\tjnc     .1\n.2:\tcmp     r9, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmovdqu  xmm0, [rdx+r9*8]\n\tmov     rax, [rdx+r9*8+16]\n\tpxor    xmm0, xmm2\n\tnot     rax\n\tmovdqu  [rcx], xmm0\n\tmov     [rcx+16], rax\n\tret\n\n.4:\tmovdqu  xmm0, [rdx+r9*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx], xmm0\n\tret\n\n.5:\tmov     rax, [rdx+r9*8]\n\tnot     rax\n\tmov     [rcx], rax\n.6:\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/bulldozer/copyd.asm",
    "content": "; PROLOGUE(mpn_copyd)\n\n;  mpn_copyd\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_copyi(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi,    rsi,       rdx\n;                    rcx,    rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_copyd\n\tlea     rdx, [rdx+16]\n\tlea     rcx, [rcx+16]\n\tsub     r8, 4\n\tjc      .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [rdx+r8*8]\n\tmovdqu  xmm1, [rdx+r8*8-16]\n\tsub     r8, 4\n\tmovdqu  [rcx+r8*8-16+32], xmm1\n\tmovdqu  [rcx+r8*8+32], xmm0\n\tjnc     .1\n.2:\tcmp     r8, -2\n\tjg      .5\n\tje      .6\n\tjnp     .4\n.3:\tmov     rax, [rdx+r8*8+8]\n\tmov     [rcx+r8*8+8], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [rdx+r8*8]\n\tmov     rax, [rdx+r8*8-8]\n\tmov     [rcx+r8*8-8], rax\n\tmovdqu  [rcx+r8*8], xmm0\n\tret\n\n.6:\tmovdqu  xmm0, [rdx+r8*8]\n\tmovdqu  [rcx+r8*8], xmm0\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/bulldozer/copyi.asm",
    "content": "; PROLOGUE(mpn_copyi)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_copyi(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi     rsi        rdx\n;                    rcx     rdx         r8\n\n%define SMALL_LOOP  1\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n\tLEAF_PROC mpn_copyi\n    cmp     r8, 0\n\tjz      .9\n%if SMALL_LOOP <> 0\n\tcmp     r8, 10\n\tjge     .2\n\t\n\txalign  16\n.1:\tmov     rax, [rdx]\n\tmov     [rcx], rax\n\tlea     rdx, [rdx+8]\n\tlea     rcx, [rcx+8]\n\tsub     r8, 1\n\tjnz     .1\n\tret\n%endif\n\n.2:\tmov     rax, rcx\n\tsub     rax, rdx\n\ttest    rax, 0xF\n\tjz      .17\n\ttest    rcx, 0xF\n\tjz      .10\n\tmov     r9, 5\n\tsub     r9, r8\n\tlea     rdx, [rdx+r8*8-40]\n\tlea     rcx, [rcx+r8*8-40]\n\tmovapd  xmm1, [rdx+r9*8]\n\tmovq    [rcx+r9*8], xmm1\n\tadd     rcx, 8\n%if SMALL_LOOP = 0\n\tcmp     r8, 1\n\tjz      .9\n%endif\n\tcmp     r9, 0\n\tjge     .4\n\n\txalign  16\n.3:\tmovapd  xmm0, [rdx+r9*8+16]\n\tadd     r9, 4\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8-32], xmm1\n\tmovapd  xmm1, [rdx+r9*8+0]\n\tshufpd  xmm0, xmm1, 1\n\tmovapd  [rcx+r9*8-16], xmm0\n\tjnc     .3\n.4:\tcmp     r9, 2\n\tja      .8\n\tjz      .7\n\tjp      .6\n\n\txalign  16\n.5:\tmovapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tmovapd  xmm1, [rdx+r9*8+32]\n\tshufpd  xmm0, xmm1, 1\n\tmovapd  [rcx+r9*8+16], xmm0\n\tret\n\n\txalign  16\n.6:\tmovapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tmovhpd  [rcx+r9*8+16], xmm0\n\tret\n\n\txalign  16\n.7:\tmovapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tret\n\n\txalign  16\n.8:\tmovhpd  [rcx+r9*8], xmm1\n.9:\tret\n\n.10:mov     r9, 4\n\tsub     r9, r8\n\tlea     rdx, [rdx+r8*8-32]\n\tlea     rcx, [rcx+r8*8-32]\n\tmovapd  xmm1, [rdx+r9*8-8]\n\tsub     rdx, 8\n\tcmp     r9, 0\n\tjge     .12\n\n\txalign  16\n.11:movapd  xmm0, [rdx+r9*8+16]\n\tadd     r9, 4\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8-32], xmm1\n\tmovapd  xmm1, [rdx+r9*8+0]\n\tshufpd  xmm0, xmm1, 1\n\tmovapd  [rcx+r9*8-16], xmm0\n\tjnc     .11\n.12:cmp     r9, 2\n\tja      .16\n\tjz      .15\n\tjp      .14\n\n\txalign  16\n.13:movapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tmovapd  xmm1, [rdx+r9*8+32]\n\tshufpd  xmm0, xmm1, 1\n\tmovapd  [rcx+r9*8+16], xmm0\n\tret\n\n\txalign  16\n.14:movapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tmovhpd  [rcx+r9*8+16], xmm0\n\tret\n\t\n\txalign  16\n.15:movapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tret\n\n\txalign  16\n.16:movhpd  [rcx+r9*8], xmm1\n\tret\n\n\txalign  16\n.17:mov     r9, 3\n\tsub     r9, r8\n\ttest    rcx, 0xF\n\tlea     rdx, [rdx+r8*8-24]\n\tlea     rcx, [rcx+r8*8-24]\n\tjz      .18\n\tmov     rax, [rdx+r9*8]\n\tmov     [rcx+r9*8], rax\n\tadd     r9, 1\n.18:cmp     r9, 0\n\tjge     .20\n\n\txalign  16\n.19:add     r9, 4\n\tmovapd  xmm0, [rdx+r9*8-32]\n\tmovapd  [rcx+r9*8-32], xmm0\n\tmovapd  xmm1, [rdx+r9*8-16]\n\tmovapd  [rcx+r9*8-16], xmm1\n\tjnc     .19\n.20:cmp     r9, 2\n\tja      .22\n\tje      .24\n\tjp      .23\n\n.21:movapd  xmm0, [rdx+r9*8]\n\tmovapd  [rcx+r9*8], xmm0\n\tmov     rax, [rdx+r9*8+16]\n\tmov     [rcx+r9*8+16], rax\n.22:ret\n\n\txalign  16\n.23:movapd  xmm0, [rdx+r9*8]\n\tmovapd  [rcx+r9*8], xmm0\n\tret\n\n\txalign  16\n.24:mov     rax, [rdx+r9*8]\n\tmov     [rcx+r9*8], rax\n\tret\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/bulldozer/lshift.asm",
    "content": ";  AMD64 mpn_lshift optimised for CPUs with fast SSE including fast movdqu.\n\n;  Contributed to the GNU project by Torbjorn Granlund.\n\n;  Copyright 2010-2012 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n\n;\t     cycles/limb     cycles/limb     cycles/limb    good\n;              aligned\t      unaligned\t      best seen\t   for cpu?\n; AMD K8,K9\t 3\t\t 3\t\t 2.35\t  no, use shl/shr\n; AMD K10\t 1.5-1.8\t 1.5-1.8\t 1.33\t  yes\n; AMD bd1\t 1.7-1.9\t 1.7-1.9\t 1.33\t  yes\n; AMD bobcat\t 3.17\t\t 3.17\t\t\t  yes, bad for n < 20\n; Intel P4\t 4.67\t\t 4.67\t\t 2.7\t  no, slow movdqu\n; Intel core2\t 2.15\t\t 2.15\t\t 1.25\t  no, use shld/shrd\n; Intel NHM\t 1.66\t\t 1.66\t\t 1.25\t  no, use shld/shrd\n; Intel SBR\t 1.3\t\t 1.3\t\t 1.25\t  yes, bad for n = 4-6\n; Intel atom\t11.7\t\t11.7\t\t 4.5\t  no\n; VIA nano\t 5.7\t\t 5.95\t\t 2.0\t  no, slow movdqu\n\n; We try to do as many aligned 16-byte operations as possible.  The top-most\n; and bottom-most writes might need 8-byte operations.\n;\n; This variant rely on fast load movdqu, and uses it even for aligned operands,\n; in order to avoid the need for two separate loops.\n;\n; TODO\n;  * Could 2-limb wind-down code be simplified?\n;  * Improve basecase code, using shld/shrd for SBR, discrete integer shifts\n;    for other affected CPUs.\n\n;  mp_limb_t mpn_lshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi     rsi        rdx      rcx\n;  rax                     rcx     rdx         r8      r9d\n\n%include 'yasm_mac.inc'\n\n\tTEXT\n\talign\t64\n\tLEAF_PROC mpn_lshift\n    mov     r10, rcx\n    mov     r11, rdx\n    mov     rdx, r8\n    mov     ecx, r9d\n\n\tmovd    xmm4, ecx\n\tmov     eax, 64\n\tsub     eax, ecx\n\tmovd    xmm5, eax\n\tneg     ecx\n\tmov     rax, [r11+rdx*8-8]\n\tshr     rax, cl\n\tcmp     rdx, 3\n\tjle     .10\n\tlea     ecx, [r10+rdx*8]\n\ttest    cl, 8\n\tjz      .0\n\tmovq    xmm0, [r11+rdx*8-8]\n\tmovq    xmm1, [r11+rdx*8-16]\n\tpsllq   xmm0, xmm4\n\tpsrlq   xmm1, xmm5\n\tpor     xmm0, xmm1\n\tmovq    [r10+rdx*8-8], xmm0\n\tdec     rdx\n.0: lea     r8d, [rdx+1]\n\tand     r8d, 6\n\tjz      .2\n\tcmp     r8d, 4\n\tjz      .3\n\tjc      .4\n.1:\tadd     rdx, -4\n\tjmp     .7\n.2:\tadd     rdx, -6\n\tjmp     .6\n.3:\tadd     rdx, -2\n\tjmp     .8\n.4:\tadd     rdx, -8\n\tjle     .9\n\talign\t16\n.5:\tmovdqu  xmm1, [r11+rdx*8+40]\n\tmovdqu  xmm0, [r11+rdx*8+48]\n\tpsllq   xmm0, xmm4\n\tpsrlq   xmm1, xmm5\n\tpor     xmm0, xmm1\n\tmovdqa  [r10+rdx*8+48], xmm0\n.6: movdqu  xmm1, [r11+rdx*8+24]\n\tmovdqu  xmm0, [r11+rdx*8+32]\n\tpsllq   xmm0, xmm4\n\tpsrlq   xmm1, xmm5\n\tpor     xmm0, xmm1\n\tmovdqa  [r10+rdx*8+32], xmm0\n.7: movdqu  xmm1, [r11+rdx*8+8]\n\tmovdqu  xmm0, [r11+rdx*8+16]\n\tpsllq   xmm0, xmm4\n\tpsrlq   xmm1, xmm5\n\tpor     xmm0, xmm1\n\tmovdqa  [r10+rdx*8+16], xmm0\n.8: movdqu  xmm1, [r11+rdx*8-8]\n\tmovdqu  xmm0, [r11+rdx*8]\n\tpsllq   xmm0, xmm4\n\tpsrlq   xmm1, xmm5\n\tpor     xmm0, xmm1\n\tmovdqa  [r10+rdx*8], xmm0\n\tsub     rdx, 8\n\tjg      .5\n.9:\ttest    dl, 1\n\tjnz     .11\n\tmovdqu  xmm1, [r11]\n\tpxor    xmm0, xmm0\n\tpunpcklqdq xmm0, xmm1\n\tpsllq   xmm1, xmm4\n\tpsrlq   xmm0, xmm5\n\tpor     xmm0, xmm1\n\tmovdqa  [r10], xmm0\n\tret\n\talign\t16\n.10:dec     edx\n\tjz      .11\n\tmovq    xmm1, [r11+rdx*8]\n\tmovq    xmm0, [r11+rdx*8-8]\n\tpsllq   xmm1, xmm4\n\tpsrlq   xmm0, xmm5\n\tpor     xmm0, xmm1\n\tmovq    [r10+rdx*8], xmm0\n\tsub     edx, 2\n\tjl      .11\n\tmovq    xmm1, [r11+8]\n\tmovq    xmm0, [r11]\n\tpsllq   xmm1, xmm4\n\tpsrlq   xmm0, xmm5\n\tpor     xmm0, xmm1\n\tmovq    [r10+8], xmm0\n.11:movq    xmm0, [r11]\n\tpsllq   xmm0, xmm4\n\tmovq    [r10], xmm0\n\tret\n"
  },
  {
    "path": "mpn/x86_64w/bulldozer/mul_basecase.asm",
    "content": ";  AMD64 mpn_mul_basecase optimised for AMD Bulldozer and Piledriver.\n\n;  Contributed to the GNU project by Torbjörn Granlund.\n\n;  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n\n; cycles/limb\tmul_1\t\tmul_2\t\tmul_3\t\taddmul_2\n; AMD K8,K9\n; AMD K10\n; AMD bull\t~4.8\t\t~4.55\t\t-\t\t~4.3\n; AMD pile\t~4.6\t\t~4.55\t\t-\t\t~4.55\n; AMD bobcat\n; AMD jaguar\n; Intel P4\n; Intel core\n; Intel NHM\n; Intel SBR\n; Intel IBR\n; Intel HWL\n; Intel BWL\n; Intel atom\n; VIA nano\n\n; The inner loops of this code are the result of running a code generation and\n; optimisation tool suite written by David Harvey and Torbjorn Granlund.\n\n; TODO\n;  * Merge bull-specific mul_1, if it is not slower the TOOM22 range.\n;    Alternatively, we could tweak the present code (which was loopmixed for a\n;    different CPU).\n;  * Merge faster mul_2, such as the one in the same directory as this file.\n;  * Further micro-optimise.\n\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40]\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rsi, rdi, rbx, rbp, r12, r13, r14, r15\n\n\tTEXT\n\talign\t16\n\tFRAME_PROC mpn_mul_basecase, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n    mov     r8, [rsp+stack_use+40]\n\n\tmov     rbx, rdx\n\tneg     rbx\n\tmov     rax, [rsi]\n\tlea     rsi, [rsi+rdx*8]\n\tlea     rdi, [rdi+rdx*8]\n\tmov     r9, [rcx]\n\tmul     r9\n\ttest    r8b, 1\n\tjz      .13\n.0: test    bl, 1\n\tjnz     .4\n.1:\tmov     r10, rax\n\tmov     r11, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\ttest    bl, 2\n\tjnz     .3\n.2:\tlea     rbp, [rbx+2]\n\tjmp     .9\n.3:\tlea     rbp, [rbx]\n\tjmp     .11\n.4:\tmov     r11, rax\n\tmov     r10, rdx\n\ttest    bl, 2\n\tjz      .6\n.5:\tlea     rbp, [rbx+3]\n\ttest    rbp, rbp\n\tjs      .8\n\tmov     [rdi-8], rax\n\tmov     [rdi], rdx\n    EXIT_PROC reg_save_list\n\n.6:\tlea     rbp, [rbx+1]\n\tmov     rax, [rsi+rbx*8+8]\n\tjmp     .10\n\talign\t16\n.7:\tmov     r10, rdx\n\tadd     r11, rax\n.8:\tmov     rax, [rsi+rbp*8-16]\n\tadc     r10, 0\n\tmul     r9\n\tadd     r10, rax\n\tmov     [rdi+rbp*8-24], r11\n\tmov     rax, [rsi+rbp*8-8]\n\tmov     r11, rdx\n\tadc     r11, 0\n.9:\tmul     r9\n\tmov     [rdi+rbp*8-16], r10\n\tadd     r11, rax\n\tmov     r10, rdx\n\tmov     rax, [rsi+rbp*8]\n\tadc     r10, 0\n.10:mul     r9\n\tmov     [rdi+rbp*8-8], r11\n\tmov     r11, rdx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbp*8+8]\n\tadc     r11, 0\n.11:mul     r9\n\tmov     [rdi+rbp*8], r10\n\tadd     rbp, 4\n\tjnc     .7\n.12:add     r11, rax\n\tadc     rdx, 0\n\tmov     [rdi-8], r11\n\tmov     [rdi], rdx\n\tdec     r8d\n\tjz      .35\n\tlea     rcx, [rcx+8]\n\tlea     rdi, [rdi+8]\n\tjmp     .20\n.13:mov     r14, [rcx+8]\n\ttest    bl, 1\n\tjnz     .15\n.14:lea     rbp, [rbx]\n\tmov     r12, rax\n\tmov     rax, [rsi+rbx*8]\n\tmov     r11, rdx\n\tmul     r14\n\tmov     r10, rax\n\tmov     [rdi+rbx*8], r12\n\tmov     rax, [rsi+rbx*8+8]\n\tmov     r12, rdx\n\tjmp     .18\n.15:lea     rbp, [rbx+1]\n\tmov     r10, rax\n\tmov     r13, rdx\n\tmov     rax, [rsi+rbx*8]\n\tmul     r14\n\tmov     [rdi+rbx*8], r10\n\tmov     r10, rdx\n\tmov     r12, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tjmp     .17\n\talign\t32\n.16:add     r12, rax\n\tmov     rax, [rsi+rbp*8]\n\tadc     r10, 0\n.17:mul     r9\n\tadd     r12, rax\n\tmov     rax, [rsi+rbp*8]\n\tmov     r11, rdx\n\tadc     r11, 0\n\tmul     r14\n\tadd     r12, r13\n\tadc     r11, 0\n\tadd     r10, rax\n\tmov     [rdi+rbp*8], r12\n\tmov     rax, [rsi+rbp*8+8]\n\tmov     r12, rdx\n\tadc     r12, 0\n.18:mul     r9\n\tadd     r10, rax\n\tmov     r13, rdx\n\tadc     r13, 0\n\tadd     r10, r11\n\tadc     r13, 0\n\tmov     rax, [rsi+rbp*8+8]\n\tmul     r14\n\tadd     rbp, 2\n\tmov     [rdi+rbp*8-8], r10\n\tmov     r10, rdx\n\tjnc     .16\n.19:add     r12, rax\n\tadc     rdx, 0\n\tadd     r12, r13\n\tadc     rdx, 0\n\tmov     [rdi], r12\n\tmov     [rdi+8], rdx\n\tadd     r8d, -2\n\tjz      .34\n\tlea     rcx, [rcx+16]\n\tlea     rdi, [rdi+16]\n.20:\n\tmov    [rsp+stack_use+8], r8\n.21:mov     r9, [rcx]\n\tmov     r8, [rcx+8]\n\tmov     rax, [rsi+rbx*8]\n\tmul     r9\n\ttest    bl, 1\n\tjnz     .25\n.22:mov     r15, rax\n\tmov     rax, [rsi+rbx*8]\n\tmov     r14, rdx\n\tmul     r8\n\ttest    bl, 2\n\tjnz     .24\n.23:lea     rbp, [rbx]\n\tmov     r13, [rdi+rbx*8]\n\tmov     r10, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tmov     r11, rdx\n\tjmp     .31\n.24:lea     rbp, [rbx+2]\n\tmov     r11, [rdi+rbx*8]\n\tmov     r13, rdx\n\tmov     r12, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tjmp     .29\n.25:mov     r14, rax\n\tmov     rax, [rsi+rbx*8]\n\tmov     r15, rdx\n\tmul     r8\n\ttest    bl, 2\n\tjz      .27\n.26:lea     rbp, [rbx+1]\n\tmov     r12, [rdi+rbx*8]\n\tmov     r10, rdx\n\tmov     r13, rax\n\tjmp     .30\n.27:lea     rbp, [rbx-1]\n\tmov     r10, [rdi+rbx*8]\n\tmov     r11, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tmov     r12, rdx\n\tjmp     .32\n\talign\t32\n.28:\n.29:mul     r9\n\tadd     r15, r11\n\tmov     [rdi+rbp*8-16], r15\n\tmov     r15, rdx\n\tadc     r14, rax\n\tadc     r15, 0\n\tmov     rax, [rsi+rbp*8-8]\n\tmul     r8\n\tmov     r11, [rdi+rbp*8-8]\n\tmov     r10, rdx\n\tadd     r12, r11\n\tadc     r13, rax\n\tadc     r10, 0\n.30:mov     rax, [rsi+rbp*8]\n\tmul     r9\n\tadd     r14, r12\n\tmov     [rdi+rbp*8-8], r14\n\tmov     r14, rdx\n\tadc     r15, rax\n\tmov     rax, [rsi+rbp*8]\n\tadc     r14, 0\n\tmov     r12, [rdi+rbp*8]\n\tmul     r8\n\tadd     r13, r12\n\tadc     r10, rax\n\tmov     rax, [rsi+rbp*8+8]\n\tmov     r11, rdx\n\tadc     r11, 0\n.31:mul     r9\n\tadd     r15, r13\n\tmov     [rdi+rbp*8], r15\n\tadc     r14, rax\n\tmov     rax, [rsi+rbp*8+8]\n\tmov     r15, rdx\n\tadc     r15, 0\n\tmov     r13, [rdi+rbp*8+8]\n\tmul     r8\n\tadd     r10, r13\n\tadc     r11, rax\n\tmov     rax, [rsi+rbp*8+16]\n\tmov     r12, rdx\n\tadc     r12, 0\n.32:mul     r9\n\tadd     r14, r10\n\tmov     [rdi+rbp*8+8], r14\n\tmov     r14, rdx\n\tadc     r15, rax\n\tadc     r14, 0\n\tmov     rax, [rsi+rbp*8+16]\n\tmov     r10, [rdi+rbp*8+16]\n\tmul     r8\n\tmov     r13, rdx\n\tadd     r11, r10\n\tadc     r12, rax\n\tadc     r13, 0\n\tmov     rax, [rsi+rbp*8+24]\n\tadd     rbp, 4\n\tjnc     .28\n.33:mul     r9\n\tadd     r15, r11\n\tmov     [rdi-16], r15\n\tmov     r15, rdx\n\tadc     r14, rax\n\tadc     r15, 0\n\tmov     rax, [rsi-8]\n\tmul     r8\n\tmov     r11, [rdi-8]\n\tadd     r12, r11\n\tadc     r13, rax\n\tadc     rdx, 0\n\tadd     r14, r12\n\tadc     r15, 0\n\tmov     [rdi-8], r14\n\tadd     r15, r13\n\tmov     [rdi], r15\n\tadc     rdx, 0\n\tmov     [rdi+8], rdx\n\tadd     qword [rsp+stack_use+8], -2\n\tlea     rcx, [rcx+16]\n\tlea     rdi, [rdi+16]\n\tjnz     .21\n    mov     rax, [rsp+stack_use+8]\n.34:\n.35:\n    END_PROC reg_save_list"
  },
  {
    "path": "mpn/x86_64w/bulldozer/rshift.asm",
    "content": ";  AMD64 mpn_rshift optimised for CPUs with fast SSE including fast movdqu.\n\n;  Contributed to the GNU project by Torbjorn Granlund.\n\n;  Copyright 2010-2012 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n\n;\t     cycles/limb     cycles/limb     cycles/limb    good\n;              aligned\t      unaligned\t      best seen\t   for cpu?\n; AMD K8,K9\t 3\t\t 3\t\t 2.35\t  no, use shl/shr\n; AMD K10\t 1.5-1.8\t 1.5-1.8\t 1.33\t  yes\n; AMD bd1\t 1.7-1.9\t 1.7-1.9\t 1.33\t  yes\n; AMD bobcat\t 3.17\t\t 3.17\t\t\t  yes, bad for n < 20\n; Intel P4\t 4.67\t\t 4.67\t\t 2.7\t  no, slow movdqu\n; Intel core2\t 2.15\t\t 2.15\t\t 1.25\t  no, use shld/shrd\n; Intel NHM\t 1.66\t\t 1.66\t\t 1.25\t  no, use shld/shrd\n; Intel SBR\t 1.3\t\t 1.3\t\t 1.25\t  yes, bad for n = 4-6\n; Intel atom\t11.7\t\t11.7\t\t 4.5\t  no\n; VIA nano\t 5.7\t\t 5.95\t\t 2.0\t  no, slow movdqu\n\n; We try to do as many aligned 16-byte operations as possible.  The top-most\n; and bottom-most writes might need 8-byte operations.\n;\n; This variant rely on fast load movdqu, and uses it even for aligned operands,\n; in order to avoid the need for two separate loops.\n;\n; TODO\n;  * Could 2-limb wind-down code be simplified?\n;  * Improve basecase code, using shld/shrd for SBR, discrete integer shifts\n;    for other affected CPUs.\n\n;  mp_limb_t mpn_rshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi     rsi        rdx      rcx\n;  rax                     rcx     rdx         r8      r9d\n\n%include 'yasm_mac.inc'\n\n\tTEXT\n\talign\t64\n\tLEAF_PROC mpn_rshift\n    mov     r10, rcx\n    mov     r11, rdx\n    mov     rdx, r8\n    mov     ecx, r9d\n\tmovd    xmm4, ecx\n\tmov     eax, 64\n\tsub     eax, ecx\n\tmovd    xmm5, eax\n\tneg     ecx\n\tmov     rax, [r11]\n\tshl     rax, cl\n\tcmp     rdx, 3\n\tjle     .11\n\ttest    r10b, 8\n\tjz      .0\n\tmovq    xmm0, [r11]\n\tmovq    xmm1, [r11+8]\n\tpsrlq   xmm0, xmm4\n\tpsllq   xmm1, xmm5\n\tpor     xmm0, xmm1\n\tmovq    [r10], xmm0\n\tlea     r11, [r11+8]\n\tlea     r10, [r10+8]\n\tdec     rdx\n.0: lea     r8d, [rdx+1]\n\tlea     r11, [r11+rdx*8]\n\tlea     r10, [r10+rdx*8]\n\tneg     rdx\n\tand     r8d, 6\n\tjz      .2\n\tcmp     r8d, 4\n\tjz      .3\n\tjc      .4\n.1:\tadd     rdx, 4\n\tjmp     .7\n.2:\tadd     rdx, 6\n\tjmp     .6\n.3:\tadd     rdx, 2\n\tjmp     .8\n.4:\tadd     rdx, 8\n\tjge     .9\n\talign\t16\n.5:\tmovdqu  xmm1, [r11+rdx*8-64]\n\tmovdqu  xmm0, [r11+rdx*8-56]\n\tpsllq   xmm0, xmm5\n\tpsrlq   xmm1, xmm4\n\tpor     xmm0, xmm1\n\tmovdqa  [r10+rdx*8-64], xmm0\n.6: movdqu  xmm1, [r11+rdx*8-48]\n\tmovdqu  xmm0, [r11+rdx*8-40]\n\tpsllq   xmm0, xmm5\n\tpsrlq   xmm1, xmm4\n\tpor     xmm0, xmm1\n\tmovdqa  [r10+rdx*8-48], xmm0\n.7: movdqu  xmm1, [r11+rdx*8-32]\n\tmovdqu  xmm0, [r11+rdx*8-24]\n\tpsllq   xmm0, xmm5\n\tpsrlq   xmm1, xmm4\n\tpor     xmm0, xmm1\n\tmovdqa  [r10+rdx*8-32], xmm0\n.8: movdqu  xmm1, [r11+rdx*8-16]\n\tmovdqu  xmm0, [r11+rdx*8-8]\n\tpsllq   xmm0, xmm5\n\tpsrlq   xmm1, xmm4\n\tpor     xmm0, xmm1\n\tmovdqa  [r10+rdx*8-16], xmm0\n\tadd     rdx, 8\n\tjl      .5\n.9:\ttest    dl, 1\n\tjnz     .10\n\tmovdqu  xmm1, [r11-16]\n\tmovq    xmm0, [r11-8]\n\tpsrlq   xmm1, xmm4\n\tpsllq   xmm0, xmm5\n\tpor     xmm0, xmm1\n\tmovdqa  [r10-16], xmm0\n\tret\n.10:movq    xmm0, [r11-8]\n\tpsrlq   xmm0, xmm4\n\tmovq    [r10-8], xmm0\n\tret\n\talign\t16\n.11:dec     edx\n\tjnz     .12\n\tmovq    xmm0, [r11]\n\tpsrlq   xmm0, xmm4\n\tmovq    [r10], xmm0\n\tret\n.12:movq    xmm1, [r11]\n\tmovq    xmm0, [r11+8]\n\tpsrlq   xmm1, xmm4\n\tpsllq   xmm0, xmm5\n\tpor     xmm0, xmm1\n\tmovq    [r10], xmm0\n\tdec     edx\n\tjnz     .13\n\tmovq    xmm0, [r11+8]\n\tpsrlq   xmm0, xmm4\n\tmovq    [r10+8], xmm0\n\tret\n.13:movq    xmm1, [r11+8]\n\tmovq    xmm0, [r11+16]\n\tpsrlq   xmm1, xmm4\n\tpsllq   xmm0, xmm5\n\tpor     xmm0, xmm1\n\tmovq    [r10+8], xmm0\n\tmovq    xmm0, [r11+16]\n\tpsrlq   xmm0, xmm4\n\tmovq    [r10+16], xmm0\n\tret\n"
  },
  {
    "path": "mpn/x86_64w/core2/add_n.asm",
    "content": "; PROLOGUE(mpn_add_n)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_add_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_add_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define TR2 r10\n%define TR4 r10\n\n    CPU  Core2\n    BITS 64\n\n\tLEAF_PROC mpn_add_nc\n    mov     r10, [rsp+0x28]\n    jmp     mpn_add_entry\n\n\tLEAF_PROC mpn_add_n\n\txor     r10, r10\n\nmpn_add_entry:\n\tmov     rax, r9\n\tmov     r9, rcx\n\tmov     rcx, rax\n\tand     rax, 3\n\tsub     rcx, rax\n\tlea     r9, [r9+rcx*8]\n\tlea     rdx, [rdx+rcx*8]\n\tlea     r8, [r8+rcx*8]\n\tneg     rcx\n\tlea     rcx, [r10+rcx*2]\n\tsar     rcx, 1\n\tjz      .2\n\n\txalign  16\n.1:\tmov     r10, [rdx+rcx*8]\n\tmov     r11, [rdx+rcx*8+16]\n\tadc     r10, [r8+rcx*8]\n\tmov     [r9+rcx*8], r10\n\tmov     TR2, [rdx+rcx*8+8]\n\tadc     TR2, [r8+rcx*8+8]\n\tmov     [r9+rcx*8+8], TR2\n\tlea     rcx, [rcx+4]\n\tmov     TR4, [rdx+rcx*8-8]\n\tadc     r11, [r8+rcx*8-16]\n\tadc     TR4, [r8+rcx*8-8]\n\tmov     [r9+rcx*8-16], r11\n\tmov     [r9+rcx*8-8], TR4\n\tjrcxz   .2\n\tjmp     .1\n.2:\tsbb     rcx, rcx\n.3:\tcmp     rax, 2\n\tja      .6\n\tjz      .7\n\tjp      .5\n.4:\tsub     rax, rcx\n\tret\n\t\n\txalign  16\n.5:\tadd     rcx, rcx\n\tmov     r10, [rdx]\n\tadc     r10, [r8]\n\tmov     [r9], r10\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\t\n\txalign  16\n.6:\tadd     rcx, rcx\n\tmov     r10, [rdx]\n\tmov     r11, [rdx+16]\n\tadc     r10, [r8]\n\tmov     [r9], r10\n\tmov     TR2, [rdx+8]\n\tadc     TR2, [r8+8]\n\tmov     [r9+8], TR2\n\tadc     r11, [r8+16]\n\tmov     [r9+16], r11\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\t\n\txalign  16\n.7:\tadd     rcx, rcx\n\tmov     r10, [rdx]\n\tadc     r10, [r8]\n\tmov     [r9], r10\n\tmov     TR2, [rdx+8]\n\tadc     TR2, [r8+8]\n\tmov     [r9+8], TR2\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/core2/addadd_n.asm",
    "content": "; PROLOGUE(mpn_addadd_n)\n;        \n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n        CPU  Nehalem\n        BITS 64\n\n        FRAME_PROC mpn_addadd_n, 0, reg_save_list\n        mov     rbx, [rsp+stack_use+40]\n        xor     rax, rax\n        mov     r10d, 3\n        sub     r10, rbx\n        lea     rdi, [rcx+rbx*8-24]\n        lea     rsi, [rdx+rbx*8-24]\n        lea     rdx, [r8+rbx*8-24]\n        lea     rcx, [r9+rbx*8-24]\n        mov     r9, rax\n        jnc     .2\n\n        align   16\n.1:     sahf    \n        mov     r8, [rdx+r10*8]\n        adc     r8, [rcx+r10*8]\n        mov     rbx, [rdx+r10*8+8]\n        adc     rbx, [rcx+r10*8+8]\n        mov     r11, [rdx+r10*8+24]\n        mov     rbp, [rdx+r10*8+16]\n        adc     rbp, [rcx+r10*8+16]\n        adc     r11, [rcx+r10*8+24]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi+r10*8]\n        adc     rbx, [rsi+r10*8+8]\n        mov     [rdi+r10*8], r8\n        adc     rbp, [rsi+r10*8+16]\n        adc     r11, [rsi+r10*8+24]\n        setc    r9b\n        mov     [rdi+r10*8+24], r11\n        mov     [rdi+r10*8+16], rbp\n        mov     [rdi+r10*8+8], rbx\n        add     r10, 4\n        jnc     .1\n.2:     cmp     r10, 2\n        jg      .6\n        je      .5\n        jp      .4\n.3:     sahf    \n        mov     r8, [rdx]\n        adc     r8, [rcx]\n        mov     rbx, [rdx+8]\n        adc     rbx, [rcx+8]\n        mov     rbp, [rdx+16]\n        adc     rbp, [rcx+16]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi]\n        adc     rbx, [rsi+8]\n        mov     [rdi], r8\n        adc     rbp, [rsi+16]\n        setc    r9b\n        mov     [rdi+16], rbp\n        mov     [rdi+8], rbx\n        sahf    \n        mov     eax, 0\n        adc     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        EXIT_PROC reg_save_list\n\n.4:     sahf    \n        mov     r8, [rdx+8]\n        adc     r8, [rcx+8]\n        mov     rbx, [rdx+16]\n        adc     rbx, [rcx+16]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi+8]\n        adc     rbx, [rsi+16]\n        mov     [rdi+8], r8\n        setc    r9b\n        mov     [rdi+16], rbx\n        sahf    \n        mov     eax, 0\n        adc     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        EXIT_PROC reg_save_list\n\n.5:     sahf    \n        mov     r8, [rdx+16]\n        adc     r8, [rcx+16]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi+16]\n        mov     [rdi+16], r8\n        setc    r9b\n.6:     sahf    \n        mov     eax, 0\n        adc     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        END_PROC reg_save_list\n\n        end"
  },
  {
    "path": "mpn/x86_64w/core2/addlsh1_n.asm",
    "content": "; PROLOGUE(mpn_addlsh1_n)\n\n;  Copyright 2008 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addlsh1_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi\n\n    CPU  Core2\n    BITS 64\n\n    FRAME_PROC mpn_addlsh1_n, 0, reg_save_list\n\n    lea     rdx, [rdx+r9*8]\n    lea     r8, [r8+r9*8]\n    lea     rcx, [rcx+r9*8]\n    neg     r9\n    xor     rsi, rsi\n    xor     rax, rax\n    test    r9, 3\n    jz      .2\n.1:\tmov     rdi, [r8+r9*8]\n    add     rsi, 1\n    adc     rdi, rdi\n    sbb     rsi, rsi\n    add     rax, 1\n    adc     rdi, [rdx+r9*8]\n    sbb     rax, rax\n    mov     [rcx+r9*8], rdi\n    add     r9, 1           ; ***\n    test    r9, 3\n    jnz     .1\n.2: cmp     r9, 0\n    jz      .4\n\n    xalign  16\n.3: mov     rdi, [r8+r9*8]\n    mov     rbx, [r8+r9*8+8]\n    mov     r10, [r8+r9*8+16]\n    mov     r11, [r8+r9*8+24]\n    add     rsi, 1\n    adc     rdi, rdi\n    adc     rbx, rbx\n    adc     r10, r10\n    adc     r11, r11\n    sbb     rsi, rsi\n    add     rax, 1\n    adc     rdi, [rdx+r9*8]\n    adc     rbx, [rdx+r9*8+8]\n    adc     r10, [rdx+r9*8+16]\n    adc     r11, [rdx+r9*8+24]\n    sbb     rax, rax\n    mov     [rcx+r9*8], rdi\n    mov     [rcx+r9*8+8], rbx\n    mov     [rcx+r9*8+16], r10\n    mov     [rcx+r9*8+24], r11\n    add     r9, 4\n    jnz     .3\n.4: add     rax, rsi\n    neg     rax\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/addlsh_n.asm",
    "content": "; PROLOGUE(mpn_addlsh_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_addlsh_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint, mp_limb_t)\n;  mp_limb_t mpn_addlsh_nc(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                        rdi     rsi     rdx        rcx       r8         r9\n;  rax                        rcx     rdx      r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define reg_save_list   rbx, rsi, rdi, r12\n\n\tFRAME_PROC mpn_addlsh_n, 0, reg_save_list\n\tmov     rax, r9\n\tlea     rdi, [rcx+rax*8-32]\n\tlea     rsi, [rdx+rax*8-32]\n\tlea     rdx, [r8+rax*8-32]\n\tmov     rcx, rax\n\tmov     r8d, dword [rsp+stack_use+40]\n\n\tmov     rbx, 4\n\tsub     rbx, rcx\n\tmov     rcx, 64\n\tsub     rcx, r8\n\tmov     r12, 0\n\tmov     rax, 0\n\tmov     r8, [rdx+rbx*8]\n\tcmp     rbx, 0\n\tjge     .2\n\n\txalign  16\n.1:\tmov     r9, [rdx+rbx*8+8]\n\tmov     r10, [rdx+rbx*8+16]\n\tmov     r11, [rdx+rbx*8+24]\n\tshrd    r12, r8, cl\n\tshrd    r8, r9, cl\n\tshrd    r9, r10, cl\n\tshrd    r10, r11, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     r12, r11\n\tmov     [rdi+rbx*8+8], r8\n\tadc     r9, [rsi+rbx*8+16]\n\tadc     r10, [rsi+rbx*8+24]\n\tmov     [rdi+rbx*8+24], r10\n\tmov     [rdi+rbx*8+16], r9\n\tlahf\n\tmov     r8, [rdx+rbx*8+32]\n\tadd     rbx, 4\n\tjnc     .1\n\n\txalign  16\n.2:\tcmp     rbx, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tshrd    r12, r8, cl\n\tmov     r9, [rdx+rbx*8+8]\n\tmov     r10, [rdx+rbx*8+16]\n\tmov     r11, [rdx+rbx*8+24]\n\tshrd    r8, r9, cl\n\tshrd    r9, r10, cl\n\tshrd    r10, r11, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     r12, r11\n\tmov     [rdi+rbx*8+8], r8\n\tadc     r9, [rsi+rbx*8+16]\n\tadc     r10, [rsi+rbx*8+24]\n\tmov     [rdi+rbx*8+24], r10\n\tmov     [rdi+rbx*8+16], r9\n\tlahf\n\tshr     r12, cl\n\tsahf\n\tadc     r12, 0\n\tmov     rax, r12\n    EXIT_PROC reg_save_list\n\n.4:\tshrd    r12, r8, cl\n\tmov     r9, [rdx+rbx*8+8]\n\tshrd    r8, r9, cl\n\tmov     r10, [rdx+rbx*8+16]\n\tshrd    r9, r10, cl\n\tshr     r10, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     rax, 0\n\tmov     [rdi+rbx*8+8], r8\n\tadc     r9, [rsi+rbx*8+16]\n\tadc     rax, r10\n\tmov     [rdi+rbx*8+16], r9\n    EXIT_PROC reg_save_list\n\n.5:\tshrd    r12, r8, cl\n\tmov     r9, [rdx+rbx*8+8]\n\tshrd    r8, r9, cl\n\tshr     r9, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     rax, 0\n\tmov     [rdi+rbx*8+8], r8\n\tadc     rax, r9\n    EXIT_PROC reg_save_list\n\n.6:\tshrd    r12, r8, cl\n\tshr     r8, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, 0\n\tmov     rax, r8\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/addmul_1.asm",
    "content": ";  x86-64 mpn_addmul_1 and mpn_submul_1, optimized for \"Core 2\".\n\n;  Copyright 2003, 2004, 2005, 2007, 2008, 2009, 2011, 2012 Free Software\n;  Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 3 of the License, or (at\n;  your option) any later version.\n\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\n; mp_limb_t mpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                       rdi        rsi        rdx        rcx\n;  rax                       rcx        rdx         r8         r9\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n\tTEXT\n\n\txalign  16\n\tWIN64_GCC_PROC mpn_addmul_1c, 4\n\tlea     rbx, [rdx]\n\tneg     rbx\n\tmov     rax, [rsi]\n\tmov     r10, [rdi]\n\tlea     rdi, [rdi+rdx*8-16]\n\tlea     rsi, [rsi+rdx*8]\n\tmul     rcx\n\tadd     rax, r8\n\tadc     rdx, 0\n\tbt      ebx, 0\n\tjc      .1\n\tlea     r11, [rax]\n\tmov     rax, [rsi+rbx*8+8]\n\tlea     rbp, [rdx]\n\tmul     rcx\n\tadd     rbx, 2\n\tjns     .5\n\tlea     r8, [rax]\n\tmov     rax, [rsi+rbx*8]\n\tlea     r9, [rdx]\n\tjmp     .3\n.1:\tadd     rbx, 1\n\tjns     .6\n\tlea     r8, [rax]\n\tmov     rax, [rsi+rbx*8]\n\tlea     r9, [rdx]\n\tmul     rcx\n\tlea     r11, [rax]\n\tmov     rax, [rsi+rbx*8+8]\n\tlea     rbp, [rdx]\n\tjmp     .4\n\n\txalign  16\n.2:\tmul     rcx\n\tadd     r10, r8\n\tlea     r8, [rax]\n\tmov     rax, [rsi+rbx*8]\n\tadc     r11, r9\n\tmov     [rdi+rbx*8-8], r10\n\tmov     r10, [rdi+rbx*8]\n\tlea     r9, [rdx]\n\tadc     rbp, 0\n.3:\tmul     rcx\n\tadd     r10, r11\n\tlea     r11, [rax]\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r8, rbp\n\tmov     [rdi+rbx*8], r10\n\tmov     r10, [rdi+rbx*8+8]\n\tlea     rbp, [rdx]\n\tadc     r9, 0\n.4:\tadd     rbx, 2\n\tjs      .2\n\tmul     rcx\n\tadd     r10, r8\n\tadc     r11, r9\n\tmov     [rdi-8], r10\n\tadc     rbp, 0\n.5:\tmov     r10, [rdi]\n\tadd     r10, r11\n\tadc     rax, rbp\n\tmov     [rdi], r10\n\tadc     rdx, 0\n.6:\tmov     r10, [rdi+8]\n\tadd     r10, rax\n\tmov     [rdi+8], r10\n\tmov     eax, ebx\n\tadc     rax, rdx\n\tWIN64_GCC_END\n\n\txalign  16\n\tWIN64_GCC_PROC mpn_addmul_1, 4\n\tlea     rbx, [rdx]\n\tneg     rbx\n\tmov     rax, [rsi]\n\tmov     r10, [rdi]\n\tlea     rdi, [rdi+rdx*8-16]\n\tlea     rsi, [rsi+rdx*8]\n\tmul     rcx\n\tbt      ebx, 0\n\tjc      .1\n\tlea     r11, [rax]\n\tmov     rax, [rsi+rbx*8+8]\n\tlea     rbp, [rdx]\n\tmul     rcx\n\tadd     rbx, 2\n\tjns     .5\n\tlea     r8, [rax]\n\tmov     rax, [rsi+rbx*8]\n\tlea     r9, [rdx]\n\tjmp     .3\n.1:\tadd     rbx, 1\n\tjns     .6\n\tlea     r8, [rax]\n\tmov     rax, [rsi+rbx*8]\n\tlea     r9, [rdx]\n\tmul     rcx\n\tlea     r11, [rax]\n\tmov     rax, [rsi+rbx*8+8]\n\tlea     rbp, [rdx]\n\tjmp     .4\n\n\txalign  16\n.2:\tmul     rcx\n\tadd     r10, r8\n\tlea     r8, [rax]\n\tmov     rax, [rsi+rbx*8]\n\tadc     r11, r9\n\tmov     [rdi+rbx*8-8], r10\n\tmov     r10, [rdi+rbx*8]\n\tlea     r9, [rdx]\n\tadc     rbp, 0\n.3:\tmul     rcx\n\tadd     r10, r11\n\tlea     r11, [rax]\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r8, rbp\n\tmov     [rdi+rbx*8], r10\n\tmov     r10, [rdi+rbx*8+8]\n\tlea     rbp, [rdx]\n\tadc     r9, 0\n.4:\tadd     rbx, 2\n\tjs      .2\n\tmul     rcx\n\tadd     r10, r8\n\tadc     r11, r9\n\tmov     [rdi-8], r10\n\tadc     rbp, 0\n.5:\tmov     r10, [rdi]\n\tadd     r10, r11\n\tadc     rax, rbp\n\tmov     [rdi], r10\n\tadc     rdx, 0\n.6:\tmov     r10, [rdi+8]\n\tadd     r10, rax\n\tmov     [rdi+8], r10\n\tmov     eax, ebx\n\tadc     rax, rdx\n\tWIN64_GCC_END\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/core2/addmul_2.asm",
    "content": "; PROLOGUE(mpn_addmul_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.2 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addmul_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n\tFRAME_PROC mpn_addmul_2, 0, reg_save_list\n\tmov     rdi, rcx\n\tmov     rsi, rdx\n\tmov     rax, r8\n\t\n\tmov     rcx, [r9]\n\tmov     r8, [r9+8]\n\tmov     rbx, 4\n\tsub     rbx, rax\n\tlea     rsi, [rsi+rax*8-32]\n\tlea     rdi, [rdi+rax*8-32]\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1: mov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tcmp     rbx, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6: add     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/addsub_n.asm",
    "content": "; PROLOGUE(mpn_addsub_n)\n;        \n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addsub_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n        CPU  Nehalem\n        BITS 64\n\n        FRAME_PROC mpn_addsub_n, 0, reg_save_list\n        mov     rbx, [rsp+stack_use+40]\n        xor     rax, rax\n        mov     r10d, 3\n        sub     r10, rbx\n        lea     rdi, [rcx+rbx*8-24]\n        lea     rsi, [rdx+rbx*8-24]\n        lea     rdx, [r8+rbx*8-24]\n        lea     rcx, [r9+rbx*8-24]\n        mov     r9, rax\n        jnc     .2\n\n        align   16\n.1:     sahf    \n        mov     r8, [rdx+r10*8]\n        sbb     r8, [rcx+r10*8]\n        mov     rbx, [rdx+r10*8+8]\n        sbb     rbx, [rcx+r10*8+8]\n        mov     r11, [rdx+r10*8+24]\n        mov     rbp, [rdx+r10*8+16]\n        sbb     rbp, [rcx+r10*8+16]\n        sbb     r11, [rcx+r10*8+24]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi+r10*8]\n        adc     rbx, [rsi+r10*8+8]\n        mov     [rdi+r10*8], r8\n        adc     rbp, [rsi+r10*8+16]\n        adc     r11, [rsi+r10*8+24]\n        setc    r9b\n        mov     [rdi+r10*8+24], r11\n        mov     [rdi+r10*8+16], rbp\n        mov     [rdi+r10*8+8], rbx\n        add     r10, 4\n        jnc     .1\n.2:     \n        cmp     r10, 2\n        jg      .6\n        je      .5\n        jp      .4\n.3:     sahf    \n        mov     r8, [rdx]\n        sbb     r8, [rcx]\n        mov     rbx, [rdx+8]\n        sbb     rbx, [rcx+8]\n        mov     rbp, [rdx+16]\n        sbb     rbp, [rcx+16]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi]\n        adc     rbx, [rsi+8]\n        mov     [rdi], r8\n        adc     rbp, [rsi+16]\n        setc    r9b\n        mov     [rdi+16], rbp\n        mov     [rdi+8], rbx\n        sahf    \n        mov     eax, 0\n        sbb     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        EXIT_PROC reg_save_list\n\n.4:     sahf    \n        mov     r8, [rdx+8]\n        sbb     r8, [rcx+8]\n        mov     rbx, [rdx+16]\n        sbb     rbx, [rcx+16]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi+8]\n        adc     rbx, [rsi+16]\n        mov     [rdi+8], r8\n        setc    r9b\n        mov     [rdi+16], rbx\n        sahf    \n        mov     eax, 0\n        sbb     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        EXIT_PROC reg_save_list\n\n.5:     sahf    \n        mov     r8, [rdx+16]\n        sbb     r8, [rcx+16]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi+16]\n        mov     [rdi+16], r8\n        setc    r9b\n.6:     sahf    \n        mov     eax, 0\n        sbb     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        END_PROC reg_save_list\n\n        end"
  },
  {
    "path": "mpn/x86_64w/core2/and_n.asm",
    "content": "; PROLOGUE(mpn_and_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_and_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rdi        rsi        rdx        rcx\n;                    rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_and_n\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    jz      .2\n\n    xalign  8\n.1: mov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    and     r10, [r8]\n    and     r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     T3, [rdx-16]\n    mov     T4, [rdx-8]\n    and     T3, [r8-16]\n    and     T4, [r8-8]\n    mov     [rcx-16], T3\n    sub     r9, 1       ; ***\n    mov     [rcx-8], T4\n    jnz     .1\n.2: cmp     rax, 0      ; **\n    jz      .3\n    mov     r10, [rdx]\n    and     r10, [r8]\n    mov     [rcx], r10\n    sub     rax, 1      ; ***\n    jz      .3\n    mov     r10, [rdx+8]\n    and     r10, [r8+8]\n    mov     [rcx+8], r10\n    sub     rax, 1      ; ***\n    jz      .3\n    mov     r10, [rdx+16]\n    and     r10, [r8+16]\n    mov     [rcx+16], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/andn_n.asm",
    "content": "; PROLOGUE(mpn_andn_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_andn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_andn_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    and     r10, [rdx+r9*8+24]\n    and     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    and     T3, [rdx+r9*8+8]\n    and     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/com_n.asm",
    "content": "; PROLOGUE(mpn_com_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_com_n(mp_ptr, mp_ptr, mp_size_t)\n;                     rdi     rsi       rdx\n;                     rcx     rdx        r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n    LEAF_PROC mpn_com_n\n    mov     rax, r8\n    sub     rax, 4\n    jc      .2\n\n    xalign  8\n.1: mov     r8, [rdx+rax*8+24]\n    mov     r9, [rdx+rax*8+16]\n    not     r8\n    not     r9\n    mov     [rcx+rax*8+24], r8\n    mov     [rcx+rax*8+16], r9\n    mov     r8, [rdx+rax*8+8]\n    mov     r9, [rdx+rax*8]\n    not     r8\n    not     r9\n    mov     [rcx+rax*8+8], r8\n    mov     [rcx+rax*8], r9\n    sub     rax, 4\n    jae     .1\n.2: add     rax, 4\n    jz      .3\n\n; Could still have potential cache-bank conflicts in this tail part\n\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n    sub     rax, 1\n    jz      .3\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n    sub     rax, 1\n    jz      .3\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/copyd.asm",
    "content": "; PROLOGUE(mpn_copyd)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_copyd(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi     rsi        rdx\n;                    rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n\tLEAF_PROC mpn_copyd\n\tlea     rdx, [rdx+r8*8-8]\n\tlea     rcx, [rcx+r8*8-8]\n\tsub     r8, 4\n\tjl      .2\n\t\n\txalign  16\n.1:\tmov     rax, [rdx]\n\tmov     r9, [rdx-8]\n\tmov     r10, [rdx-16]\n\tmov     r11, [rdx-24]\n\tmov     [rcx], rax\n\tmov     [rcx-8], r9\n\tlea     rcx, [rcx-32]\n\tsub     r8, 4\n\tmov     [rcx+16], r10\n\tmov     [rcx+8], r11\n\tlea     rdx, [rdx-32]\n\tjns     .1\n.2:\tadd     r8, 2\n\tjz      .5\n\tjns     .6\n\tjp      .4\n.3:\tret\n\t\n\txalign  16\n.4:\tmov     rax, [rdx]\n\tmov     [rcx], rax\n\tret\n\t\n\txalign  16\n.5:\tmov     rax, [rdx]\n\tmov     r9, [rdx-8]\n\tmov     [rcx], rax\n\tmov     [rcx-8], r9\n\tret\n\t\n\txalign  16\n.6:\tmov     rax, [rdx]\n\tmov     r9, [rdx-8]\n\tmov     r10, [rdx-16]\n\tmov     [rcx], rax\n\tmov     [rcx-8], r9\n\tmov     [rcx-16], r10\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/core2/copyi.asm",
    "content": "; PROLOGUE(mpn_copyi)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_copyi(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi     rsi        rdx\n;                    rcx     rdx         r8\n\n%define SMALL_LOOP  1\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n\tLEAF_PROC mpn_copyi\n    cmp     r8, 0\n\tjz      .9\n%if SMALL_LOOP <> 0\n\tcmp     r8, 10\n\tjge     .2\n\t\n\txalign  16\n.1:\tmov     rax, [rdx]\n\tmov     [rcx], rax\n\tlea     rdx, [rdx+8]\n\tlea     rcx, [rcx+8]\n\tsub     r8, 1\n\tjnz     .1\n\tret\n%endif\n\n.2:\tmov     rax, rcx\n\tsub     rax, rdx\n\ttest    rax, 0xF\n\tjz      .17\n\ttest    rcx, 0xF\n\tjz      .10\n\tmov     r9, 5\n\tsub     r9, r8\n\tlea     rdx, [rdx+r8*8-40]\n\tlea     rcx, [rcx+r8*8-40]\n\tmovapd  xmm1, [rdx+r9*8]\n\tmovq    [rcx+r9*8], xmm1\n\tadd     rcx, 8\n%if SMALL_LOOP = 0\n\tcmp     r8, 1\n\tjz      .9\n%endif\n\tcmp     r9, 0\n\tjge     .4\n\n\txalign  16\n.3:\tmovapd  xmm0, [rdx+r9*8+16]\n\tadd     r9, 4\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8-32], xmm1\n\tmovapd  xmm1, [rdx+r9*8+0]\n\tshufpd  xmm0, xmm1, 1\n\tmovapd  [rcx+r9*8-16], xmm0\n\tjnc     .3\n.4:\tcmp     r9, 2\n\tja      .8\n\tjz      .7\n\tjp      .6\n\n\txalign  16\n.5:\tmovapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tmovapd  xmm1, [rdx+r9*8+32]\n\tshufpd  xmm0, xmm1, 1\n\tmovapd  [rcx+r9*8+16], xmm0\n\tret\n\n\txalign  16\n.6:\tmovapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tmovhpd  [rcx+r9*8+16], xmm0\n\tret\n\n\txalign  16\n.7:\tmovapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tret\n\n\txalign  16\n.8:\tmovhpd  [rcx+r9*8], xmm1\n.9:\tret\n\n.10:mov     r9, 4\n\tsub     r9, r8\n\tlea     rdx, [rdx+r8*8-32]\n\tlea     rcx, [rcx+r8*8-32]\n\tmovapd  xmm1, [rdx+r9*8-8]\n\tsub     rdx, 8\n\tcmp     r9, 0\n\tjge     .12\n\n\txalign  16\n.11:movapd  xmm0, [rdx+r9*8+16]\n\tadd     r9, 4\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8-32], xmm1\n\tmovapd  xmm1, [rdx+r9*8+0]\n\tshufpd  xmm0, xmm1, 1\n\tmovapd  [rcx+r9*8-16], xmm0\n\tjnc     .11\n.12:cmp     r9, 2\n\tja      .16\n\tjz      .15\n\tjp      .14\n\n\txalign  16\n.13:movapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tmovapd  xmm1, [rdx+r9*8+32]\n\tshufpd  xmm0, xmm1, 1\n\tmovapd  [rcx+r9*8+16], xmm0\n\tret\n\n\txalign  16\n.14:movapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tmovhpd  [rcx+r9*8+16], xmm0\n\tret\n\t\n\txalign  16\n.15:movapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tret\n\n\txalign  16\n.16:movhpd  [rcx+r9*8], xmm1\n\tret\n\n\txalign  16\n.17:mov     r9, 3\n\tsub     r9, r8\n\ttest    rcx, 0xF\n\tlea     rdx, [rdx+r8*8-24]\n\tlea     rcx, [rcx+r8*8-24]\n\tjz      .18\n\tmov     rax, [rdx+r9*8]\n\tmov     [rcx+r9*8], rax\n\tadd     r9, 1\n.18:cmp     r9, 0\n\tjge     .20\n\n\txalign  16\n.19:add     r9, 4\n\tmovapd  xmm0, [rdx+r9*8-32]\n\tmovapd  [rcx+r9*8-32], xmm0\n\tmovapd  xmm1, [rdx+r9*8-16]\n\tmovapd  [rcx+r9*8-16], xmm1\n\tjnc     .19\n.20:cmp     r9, 2\n\tja      .22\n\tje      .24\n\tjp      .23\n\n.21:movapd  xmm0, [rdx+r9*8]\n\tmovapd  [rcx+r9*8], xmm0\n\tmov     rax, [rdx+r9*8+16]\n\tmov     [rcx+r9*8+16], rax\n.22:ret\n\n\txalign  16\n.23:movapd  xmm0, [rdx+r9*8]\n\tmovapd  [rcx+r9*8], xmm0\n\tret\n\n\txalign  16\n.24:mov     rax, [rdx+r9*8]\n\tmov     [rcx+r9*8], rax\n\tret\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/core2/divexact_byff.asm",
    "content": "; PROLOGUE(mpn_divexact_byff)\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divexact_byff(mp_ptr, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi         rdx\n;  rax                           rcx     rdx          r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n    LEAF_PROC mpn_divexact_byff\n\n\txor     eax, eax\n\tmov     r9, r8\n\tand     r9, 3\n\tshr     r8, 2\n\tcmp     r8, 0\n;\tcarry flag is clear here\n\tjnz     .2\n\tsbb     rax, [rdx]\n\tmov     [rcx], rax\n\tdec     r9\n\tjz      .1\n\tsbb     rax, [rdx+8]\n\tmov     [rcx+8], rax\n\tdec     r9\n\tjz      .1\n\tsbb     rax, [rdx+16]\n\tmov     [rcx+16], rax\n\tdec     r9\n.1:\tsbb     rax, 0\n\tret\n\t\n\txalign  16\n.2:\tsbb     rax, [rdx]\n\tmov     [rcx], rax\n\tsbb     rax, [rdx+8]\n\tmov     [rcx+8], rax\n\tsbb     rax, [rdx+16]\n\tmov     [rcx+16], rax\n\tsbb     rax, [rdx+24]\n\tmov     [rcx+24], rax\n\tlea     rdx, [rdx+32]\n\tdec     r8\n\tlea     rcx, [rcx+32]\n\tjnz     .2\n\tinc     r9\n\tdec     r9\n\tjz      .3\n\tsbb     rax, [rdx]\n\tmov     [rcx], rax\n\tdec     r9\n\tjz      .3\n\tsbb     rax, [rdx+8]\n\tmov     [rcx+8], rax\n\tdec     r9\n\tjz      .3\n\tsbb     rax, [rdx+16]\n\tmov     [rcx+16], rax\n\tdec     r9\n.3:\tsbb     rax, 0\n\tret\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/core2/divrem_hensel_qr_1_2.asm",
    "content": "; PROLOGUE(mpn_divrem_hensel_qr_1_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divrem_hensel_qr_1_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                                   rdi     rsi        rdx        rcx\n;  rax                                   rcx     rdx         r8         r9\n\t\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_divrem_hensel_qr_1_2, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8-8]\n\tlea     rsi, [rdx+rax*8-8]\n    mov     rcx, r9\n\tmov     rdx, r9\n\tmov     r9, 1\n\tsub     r9, rax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, r11\n\tmov     r12, r11\n\tmul     rcx\n\tneg     rdx\n\timul    r12, rdx\n\n\tmov     r13, r11\n\tmov     r14, r12\n\n\tmov     r11, [rsi+r9*8]\n\tmov     r12, [rsi+r9*8+8]\n\tmov     r10, 0\n\tadd     r9, 2\n\tjc      .2\n\n\txalign  16\n.1:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\tmov     [rdi+r9*8-16], rax\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tmov     r11, [rsi+r9*8]\n\tmov     r12, [rsi+r9*8+8]\n\tmov     [rdi+r9*8-8], rdx\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r12, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, 0\n\tadd     r9, 2\n\tjnc     .1\n.2:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\tmov     [rdi+r9*8-16], rax\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tcmp     r9, 0\n\tjne     .4\n.3:\tmov     r11, [rsi+r9*8]\n\tmov     [rdi+r9*8-8], rdx\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r10, 0\n\tmov     rax, r11\n\timul    rax, r13\n\tmov     [rdi+r9*8], rax\n\tmul     rcx\n\tadd     r10, r10\n\tmov     rax, 0\n\tadc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.4:\tmov     [rdi+r9*8-8], rdx\n\tmov     rax, rcx\n\tmul     rdx\n\tcmp     r8, rax\n\tmov     rax, 0\n\tadc     rax, rdx\n\tsub     rax, r10\n.5:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/hamdist.asm",
    "content": "; PROLOGUE(mpn_hamdist)\n\n;  mpn_hamdist\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmp_limb_t mpn_hamdist(mp_ptr, mp_ptr, mp_size_t)\n;\trax                      rdi,    rsi,       rdx\n;\trax                      rcx,    rdx,        r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list   rsi, rdi, rbp, r12, r14\n\n\tFRAME_PROC mpn_hamdist, 0, reg_save_list\n\tmov\trdi, rcx\n\tmov\trsi, rdx\n\tmov   \trdx, r8\n\n\tmov     r8, 0x5555555555555555\n\tmov     r9, 0x3333333333333333\n\tmov     r10, 0x0f0f0f0f0f0f0f0f\n\tmov     r11, 0x0101010101010101\n\txor     eax, eax\n\tsub     rdx, 3\n\tjc      .2\n\tmov     rcx, [rdi+rdx*8+16]\n\txor     rcx, [rsi+rdx*8+16]\n\tmov     r12, [rdi+rdx*8+8]\n\txor     r12, [rsi+rdx*8+8]\n\tmov     r14, [rdi+rdx*8]\n\txor     r14, [rsi+rdx*8]\n\tsub     rdx, 3\n\tjc      .1\n\txalign  16\n.0:\tmov     rbp, rcx\n\tshr     rcx, 1\n\tand     rcx, r8\n\tsub     rbp, rcx\n\tmov     rcx, rbp\n\tshr     rbp, 2\n\tand     rcx, r9\n\tand     rbp, r9\n\tadd     rcx, rbp\n\tmov     rbp, r12\n\tshr     r12, 1\n\tand     r12, r8\n\tsub     rbp, r12\n\tmov     r12, rbp\n\tshr     rbp, 2\n\tand     r12, r9\n\tand     rbp, r9\n\tadd     rbp, r12\n\tmov     r12, r14\n\tshr     r14, 1\n\tand     r14, r8\n\tsub     r12, r14\n\tmov     r14, r12\n\tshr     r12, 2\n\tand     r14, r9\n\tand     r12, r9\n\tadd     r12, r14\n\tadd     rbp, rcx\n\tadd     rbp, r12\n\tmov     rcx, [rdi+rdx*8+16]\n\tmov     r14, rbp\n\tshr     rbp, 4\n\tand     r14, r10\n\txor     rcx, [rsi+rdx*8+16]\n\tmov     r12, [rdi+rdx*8+8]\n\txor     r12, [rsi+rdx*8+8]\n\tand     rbp, r10\n\tadd     r14, rbp\n\timul    r14, r11\n\tshr     r14, 56\n\tadd     rax, r14\n\tmov     r14, [rdi+rdx*8]\n\txor     r14, [rsi+rdx*8]\n\tsub     rdx, 3\n\tjnc     .0\n.1:\n\tmov     rbp, rcx\n\tshr     rcx, 1\n\tand     rcx, r8\n\tsub     rbp, rcx\n\tmov     rcx, rbp\n\tshr     rbp, 2\n\tand     rcx, r9\n\tand     rbp, r9\n\tadd     rcx, rbp\n\tmov     rbp, r12\n\tshr     r12, 1\n\tand     r12, r8\n\tsub     rbp, r12\n\tmov     r12, rbp\n\tshr     rbp, 2\n\tand     r12, r9\n\tand     rbp, r9\n\tadd     rbp, r12\n\tmov     r12, r14\n\tshr     r14, 1\n\tand     r14, r8\n\tsub     r12, r14\n\tmov     r14, r12\n\tshr     r12, 2\n\tand     r14, r9\n\tand     r12, r9\n\tadd     r12, r14\n\tadd     rbp, rcx\n\tadd     rbp, r12\n\tmov     r14, rbp\n\tshr     rbp, 4\n\tand     r14, r10\n\tand     rbp, r10\n\tadd     r14, rbp\n\timul    r14, r11\n\tshr     r14, 56\n\tadd     rax, r14\n.2:\n\tcmp     rdx, -2\n\tjl      .5\n\tjz      .4\n.3:\n\tmov     rcx, [rdi+rdx*8+16]\n\txor     rcx, [rsi+rdx*8+16]\n\tmov     rbp, rcx\n\tshr     rcx, 1\n\tand     rcx, r8\n\tsub     rbp, rcx\n\tmov     rcx, rbp\n\tshr     rbp, 2\n\tand     rcx, r9\n\tand     rbp, r9\n\tadd     rcx, rbp\n\tmov     r14, rcx\n\tshr     rcx, 4\n\tand     r14, r10\n\tand     rcx, r10\n\tadd     r14, rcx\n\timul    r14, r11\n\tshr     r14, 56\n\tadd     rax, r14\n\tdec     rdx\n.4:\n\tmov     rcx, [rdi+rdx*8+16]\n\txor     rcx, [rsi+rdx*8+16]\n\tmov     rbp, rcx\n\tshr     rcx, 1\n\tand     rcx, r8\n\tsub     rbp, rcx\n\tmov     rcx, rbp\n\tshr     rbp, 2\n\tand     rcx, r9\n\tand     rbp, r9\n\tadd     rcx, rbp\n\tmov     r14, rcx\n\tshr     rcx, 4\n\tand     r14, r10\n\tand     rcx, r10\n\tadd     r14, rcx\n\timul    r14, r11\n\tshr     r14, 56\n\tadd     rax, r14\n.5:\tEND_PROC reg_save_list\n\n    \tend\n"
  },
  {
    "path": "mpn/x86_64w/core2/ior_n.asm",
    "content": "; PROLOGUE(mpn_ior_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_ior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rdi        rsi        rdx        rcx\n;                    rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_ior_n\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    jz      .2\n\n    xalign  8\n.1: mov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    or      r10, [r8]\n    or      r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     T3, [rdx-16]\n    mov     T4, [rdx-8]\n    or      T3, [r8-16]\n    or      T4, [r8-8]\n    mov     [rcx-16], T3\n    dec     r9\n    mov     [rcx-8], T4\n    jnz     .1\n.2: cmp     rax, 0      ; ***\n    jz      .3\n    mov     r10, [rdx]\n    or      r10, [r8]\n    mov     [rcx], r10\n    sub     rax, 1      ; ***\n    jz      .3\n    mov     r10, [rdx+8]\n    or      r10, [r8+8]\n    mov     [rcx+8], r10\n    sub     rax, 1      ; ***\n    jz      .3\n    mov     r10, [rdx+16]\n    or      r10, [r8+16]\n    mov     [rcx+16], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/iorn_n.asm",
    "content": "; PROLOGUE(mpn_iorn_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_iorn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_iorn_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    or      r10, [rdx+r9*8+24]\n    or      r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    or      T3, [rdx+r9*8+8]\n    or      T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/karaadd.asm",
    "content": "; PROLOGUE(mpn_karaadd)\n;  mpn_karaadd  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karaadd(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n    CPU  Athlon64\n    BITS 64\n    TEXT\n\n;   requires n >= 8  \n        FRAME_PROC mpn_karaadd, 1, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n;rp is rdi  \n;tp is rsi  \n;n is rdx and put it on the stack  \n        shr     rdx, 1\n;n2 is rdx  \n        lea     rcx, [rdx+rdx*1]\n; 2*n2 is rcx  \n; L is rdi  \n; H is rbp  \n; tp is rsi  \n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n; rax rbx are the carrys  \n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+rcx*8]\n        adc     r9, [rsi+rcx*8+8]\n        adc     r10, [rsi+rcx*8+16]\n        adc     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        adc     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .6\n        jz      .4\n        jp      .3\n.2:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi]\n        adc     r9, [rsi+8]\n        adc     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n.3:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+8]\n        adc     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n.4:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        adc     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        inc     rdx\n        mov     [rbp+rcx*8], r12\n.5:     mov     rcx, 3\n.6:     \n        mov     r8, [rsp]\n        bt      r8, 0\n        jnc     .8\n        xor     r10, r10\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        add     r8, [rsi+rdx*8]\n        adc     r9, [rsi+rdx*8+8]\n        rcl     r10, 1\n        add     [rbp+24], r8\n        adc     [rbp+32], r9\n        adc     [rbp+40], r10\n.7:     adc     qword[rbp+rcx*8+24], 0\n        inc     rcx\n        jc      .7\n        mov     rcx, 3\n.8:     xor     r8, r8\n        shr     rax, 1\n        adc     r8, r8\n        shr     rax, 1\n        adc     r8, 0\n        bt      rbx, 2\n        adc     r8, 0\n        adc     [rdi+rdx*8], r8\n.9:     adc     qword[rdi+rdx*8+8], 0\n        inc     rdx\n        jc      .9\n        xor     r8, r8\n        shr     rbx, 1\n        adc     r8, r8\n        shr     rbx, 1\n        adc     r8, 0\n        shr     rbx, 1\n        adc     r8, 0\n        add     [rbp+24], r8\n.10:    adc     qword[rbp+rcx*8+8], 0\n        inc     rcx\n        jc      .10\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/core2/karasub.asm",
    "content": "; PROLOGUE(mpn_karasub)\n;  mpn_karasub  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Copyright 2012 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karasub(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n;\n;  Karasuba Multiplication - split x and y into two equal length halves so\n;  that x = xh.B + xl and y = yh.B + yl. Then their product is:\n;\n;  x.y = xh.yh.B^2 + (xh.yl + xl.yh).B + xl.yl\n;      = xh.yh.B^2 + (xh.yh + xl.yl - {xh - xl}.{yh - yl}).B + xl.yl\n;\n; If the length of the elements is m (about n / 2), the output length is 4 * m \n; as illustrated below.  The middle two blocks involve three additions and one \n; subtraction: \n; \n;       -------------------- rp\n;       |                  |-->\n;       |   A:xl.yl[lo]    |   |\n;       |                  |   |      (xh - xl).(yh - yl)\n;       --------------------   |      -------------------- tp\n;  <--  |                  |<--<  <-- |                  |\n; |     |   B:xl.yl[hi]    |   |      |     E:[lo]       |\n; |     |                  |   |      |                  |\n; |     --------------------   |      --------------------\n; >-->  |                  |-->   <-- |                  |\n; |\\___ |   C:xh.yh[lo]    | ____/    |     F:[hi]       |\n; |     |                  |          |                  |\n; |     --------------------          --------------------\n;  <--  |                  |   \n;       |   D:xh.yh[hi]    |\n;       |                  |\n;       --------------------\n;\n; To avoid overwriting B before it is used, we need to do two operations\n; in parallel:\n;\n; (1)   B = B + C + A - E = (B + C) + A - E\n; (2)   C = C + B + D - F = (B + C) + D - F\n;\n; The final carry from (1) has to be propagated into C and D, and the final\n; carry from (2) has to be propagated into D. When the number of input limbs\n; is odd, some extra operations have to be undertaken. \n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n%macro add_one 1\n    inc     %1\n%endmacro\n\n    BITS 64\n    TEXT\n        \n;       requires n >= 8  \n        FRAME_PROC mpn_karasub, 2, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n        mov     [rsp+8], rdi\n\n;       rp is rdi, tp is rsi, L is rdi, H is rbp, tp is rsi\n;       carries/borrows in rax, rbx\n   \n        shr     rdx, 1\n        lea     rcx, [rdx+rdx*1]\n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+rcx*8]\n        sbb     r9, [rsi+rcx*8+8]\n        sbb     r10, [rsi+rcx*8+16]\n        sbb     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        sbb     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .5\n        jz      .4\n        jp      .3\n\n.2:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi]\n        sbb     r9, [rsi+8]\n        sbb     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n\n.3:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+8]\n        sbb     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n\n.4:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        sbb     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        add_one rdx\n        mov     [rbp+rcx*8], r12\n\n;       move low half rbx carry into rax\n.5:     rcr     rax, 3\n        bt      rbx, 2\n        rcl     rax, 3\n        mov     r8, [rsp]\n        mov     rcx, rsi\n        mov     rsi,[rsp+8]\n        lea     r9, [r8+r8]\n        lea     rsi, [rsi+r9*8]\n        lea     r11, [rbp+24]\n        sub     r11, rsi\n        sar     r11, 3\n        bt      r8, 0\n        jnc     .9\n\n;       if odd the do next two  \n        add     r11, 2\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        rcr     rbx, 2\n        adc     r8,0\n        adc     r9, 0\n        rcl     rbx, 1\n        sbb     r8, [rcx+rdx*8]\n        sbb     r9, [rcx+rdx*8+8]\n        rcr     rbx, 2\n        adc     [rbp+24], r8\n        adc     [rbp+32], r9\n        rcl     rbx, 3\n\n; Now add in any accummulated carries and/or borrows\n;\n; NOTE: We can't propagate individual borrows or carries from the second\n; and third quarter blocks into the fourth quater block by simply waiting\n; for carry (or borrow) propagation to end.  This is because a carry into\n; the fourth quarter block when it contains only maximum integers or a \n; borrow when it contains all zero integers will incorrectly propagate\n; beyond the end of the top quarter block.\n\n.9:     lea     rdx, [rdi+rdx*8]\n        sub     rdx, rsi\n        sar     rdx, 3\n\n; carries/borrrow from second to third quarter quarter block\n;   rax{2} is the carry in (B + C)\n;   rax{1} is the carry in (B + C) + A\n;   rax{0} is the borrow in (B + C + A) - E\n\n        mov     rcx, rdx\n        bt      rax, 0\n.10:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .11\n        jc      .10\n\n.11     mov     rcx, rdx\n        bt      rax, 1\n.12:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .13\n        jc      .12\n\n.13     mov     rcx, rdx\n        bt      rax, 2\n.14:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .15\n        jc      .14\n\n; carries/borrrow from third to fourth quarter quarter block\n;   rbx{2} is the carry in (B + C)\n;   rbx{1} is the carry in (B + C) + D\n;   rbx{0} is the borrow in (B + C + D) - F\n\n.15:    mov     rcx, r11\n        bt      rbx, 0\n.16:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .17\n        jc      .16\n\n.17:    mov     rcx, r11\n        bt      rbx, 1\n.18:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .19\n        jc      .18\n\n.19:    mov     rcx, r11\n        bt      rbx, 2\n.20:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .21\n        jc      .20\n.21:\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/core2/lshift.asm",
    "content": "; PROLOGUE(mpn_lshift)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_lshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi     rsi        rdx      rcx\n;  rax                     rcx     rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_lshift, 0, reg_save_list\n; odd and even n seem to have different runtimes\n    mov     rbx, r8\n    lea     rsi, [rdx+24]\n    lea     rdi, [rcx+24]\n    mov     ecx, r9d\n\n    mov     rdx, [rsi+rbx*8-32]\n    xor     rax, rax\n    shld    rax, rdx, cl\n    sub     rbx, 5\n    js      .2\n\n    xalign  16\n.1:\tmov     r8, [rsi+rbx*8]\n    mov     r11, [rsi+rbx*8-24]\n    mov     r9, [rsi+rbx*8-8]\n    shld    rdx, r8, cl\n    mov     [rdi+rbx*8+8], rdx\n    mov     rdx, r11\n    mov     r10, [rsi+rbx*8-16]\n    shld    r8, r9, cl\n    shld    r9, r10, cl\n    mov     [rdi+rbx*8], r8\n    mov     [rdi+rbx*8-8], r9\n    shld    r10, r11, cl\n    sub     rbx, 4\n    mov     [rdi+rbx*8+16], r10\n    jns     .1\n.2:\tcmp     rbx, -2\n    ja      .4\n    je      .5\n    jp      .6\n; ALIGN(16)\n.3:\tshl     rdx, cl\n    mov     [rdi+rbx*8+8], rdx\n    EXIT_PROC reg_save_list\n\n    xalign  16\n.4:\tmov     r8, [rsi+rbx*8]\n    mov     r9, [rsi+rbx*8-8]\n    shld    rdx, r8, cl\n    mov     [rdi+rbx*8+8], rdx\n    mov     r10, [rsi+rbx*8-16]\n    shld    r8, r9, cl\n    shld    r9, r10, cl\n    mov     [rdi+rbx*8], r8\n    mov     [rdi+rbx*8-8], r9\n    shl     r10, cl\n    mov     [rdi+rbx*8-16], r10\n    EXIT_PROC reg_save_list\n\n    xalign  16\n.5:\tmov     r8, [rsi+rbx*8]\n    mov     r9, [rsi+rbx*8-8]\n    shld    rdx, r8, cl\n    mov     [rdi+rbx*8+8], rdx\n    shld    r8, r9, cl\n    shl     r9, cl\n    mov     [rdi+rbx*8], r8\n    mov     [rdi+rbx*8-8], r9\n    EXIT_PROC reg_save_list\n\n    xalign  16\n.6:\tmov     r8, [rsi+rbx*8]\n    shld    rdx, r8, cl\n    mov     [rdi+rbx*8+8], rdx\n    shl     r8, cl\n    mov     [rdi+rbx*8], r8\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/lshift1.asm",
    "content": "; PROLOGUE(mpn_lshift1)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_lshift1(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx        r8d\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list r12, r13\n\n    BITS 64\n\n    FRAME_PROC mpn_lshift1, 0, reg_save_list\n    xor     rax, rax\n    mov     r9, r8\n    and     r9, 7\n    inc     r9\n    shr     r8, 3\n; and clear carry flag\n    cmp     r8, 0\n    jz      .2\n\n    alignb  16, nop\n.1: \n\tmov     r10, [rdx]\n    mov     r11, [rdx+8]\n    mov     r12, [rdx+16]\n    mov     r13, [rdx+24]\n    adc     r10, r10\n    adc     r11, r11\n    adc     r12, r12\n    adc     r13, r13\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    mov     [rcx+16], r12\n    mov     [rcx+24], r13\n    mov     r10, [rdx+32]\n    mov     r11, [rdx+40]\n    mov     r12, [rdx+48]\n    mov     r13, [rdx+56]\n    adc     r10, r10\n    adc     r11, r11\n    adc     r12, r12\n    adc     r13, r13\n    mov     [rcx+32], r10\n    mov     [rcx+40], r11\n    mov     [rcx+48], r12\n    mov     [rcx+56], r13\n    lea     rcx, [rcx+64]\n    dec     r8\n    lea     rdx, [rdx+64]\n    jnz     .1\n.2:\n    dec     r9\n    jz      .3\n; Could still have cache-bank conflicts in this tail part\n    mov     r10, [rdx]\n    adc     r10, r10\n    mov     [rcx], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx+8]\n    adc     r10, r10\n    mov     [rcx+8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx+16]\n    adc     r10, r10\n    mov     [rcx+16], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx+24]\n    adc     r10, r10\n    mov     [rcx+24], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx+32]\n    adc     r10, r10\n    mov     [rcx+32], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx+40]\n    adc     r10, r10\n    mov     [rcx+40], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx+48]\n    adc     r10, r10\n    mov     [rcx+48], r10\n.3:\n    adc     rax, rax\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/mod_1_1.asm",
    "content": "; PROLOGUE(mpn_mod_1_1)\n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2011 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_1(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Core2\n    BITS 64\n\n%define reg_save_list rsi, rdi, r13\n\n        FRAME_PROC mpn_mod_1_1, 0, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n\n        mov     r13, [rsi+rdx*8-8]\n        mov     rax, [rsi+rdx*8-16]\n        mov     r8, [r9]\n        mov     r9, [r9+8]\n        mov     rcx, rdx\n        sub     rcx, 2\n        \n        align   16  \n.1:     mov     r11d, 0\n        mov     r10, [rsi+rcx*8-8]\n        mul     r8\n        add     r10, rax\n        lea     rax, [r13]\n        adc     r11, rdx\n        lea     r13, [r11]\n        lea     r9, [r9]\n        lea     r8, [r8]\n        mul     r9\n        add     rax, r10\n        adc     r13, rdx\n        sub     rcx, 1\n        jnz     .1\n        mov     [rdi], rax\n        mov     rax, r8\n        mul     r13\n        add     [rdi], rax\n        adc     rdx, 0\n        mov     [rdi+8], rdx\n    \tEND_PROC reg_save_list\n\t\n\t    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/mod_1_2.asm",
    "content": "; PROLOGUE(mpn_mod_1_2)\n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2011 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Core2\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n        FRAME_PROC mpn_mod_1_2, 0, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n\n        mov     r14, [rsi+rdx*8-8]\n        mov     r13, [rsi+rdx*8-16]\n        mov     r8, [r9]\n        mov     r10, [r9+16]\n        mov     r9, [r9+8]\n        mov     rcx, rdx\n        sub     rcx, 6\n        jc      .2\n\n        align   16\n.1:     \n        mov     r11, [rsi+rcx*8+16]\n        mov     rax, [rsi+rcx*8+16+8]\n        mul     r8\n        add     r11, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        lea     rax, [r9]\n        mul     r13\n        add     r11, rax\n        lea     r8, [r8]\n        adc     r12, rdx\n        mov     rax, r10\n        lea     r13, [r11]\n        mul     r14\n        add     r13, rax\n        lea     r14, [r12]\n        adc     r14, rdx\n        sub     rcx, 2\n        jnc     .1\n.2:     mov     r11, [rsi+rcx*8+16]\n        mov     rax, [rsi+rcx*8+16+8]\n        mul     r8\n        mov     r12d, 0\n        add     r11, rax\n        adc     r12, rdx\n        mov     rax, r9\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r10\n        mul     r14\n        add     r13, rax\n        mov     r14, r12\n        adc     r14, rdx\n        cmp     rcx, -2\n        je      .4\n.3:     mov     r11, [rsi+rcx*8+8]\n        mov     r12d, 0\n        mov     rax, r8\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r9\n        mul     r14\n        add     r13, rax\n        mov     r14, r12\n        adc     r14, rdx\n.4:     mov     rax, r8\n        mul     r14\n        add     r13, rax\n        adc     rdx, 0\n        mov     [rdi], r13\n        mov     [rdi+8], rdx\n    \tEND_PROC reg_save_list\n\t\n\t    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/mod_1_3.asm",
    "content": "; PROLOGUE(mpn_mod_1_3)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_3(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n;\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\n;\twhere (rcx,4)  contains B^i % divisor\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14, r15\n\n    FRAME_PROC mpn_mod_1_3, 0, reg_save_list\n    mov     rsi, rdx\n    mov     rdi, r8\n\tmov     r15, [rsi+rdi*8-8]\n\tmov     r14, [rsi+rdi*8-16]\n\tmov     rax, [rsi+rdi*8-32]\n\tmov     r12, [rsi+rdi*8-40]\n\tmov     r8, [r9]\n\tmov     r10, [r9+16]\n\tmov     r11, [r9+24]\n\tmov     r9, [r9+8]\n\tsub     rdi, 8\n\tjc      .2\n\t\n; // r15 r14 -8() -16()=rax -24()=r12\n\txalign  16\n.1:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+0]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+8]\n\tadc     r15, rdx\n\tsub     rdi, 3\n\tjnc     .1\n\n; // we have loaded up the next two limbs\n; // but because they are out of order we can have to do 3 limbs min\n.2:\tcmp     rdi, -2\n\tjl      .5\n\tje      .4\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n.3:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+8]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+16]\n\tadc     r15, rdx\n\t; // r15 r14 rax r12\n\tmov     r13, 0\n\tmul     r8\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n\txalign  16\n.4:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+16]\n\tmov     r15, r13\n\tadc     r15, rdx\n\t; // r15 r14 r12\n\tmov     r13, 0\n\tmov     rax, r8\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\t\n\t; // one more is 3 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12 \n\txalign  16\n.5:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r11\n\tmul     r15\n\tadd     r12, rax\n\tmov     r15, r13\n\tadc     r15, rdx\n\tmov     rax, r8\n\tmul     r15\n.6:\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rcx], r12\n\tmov     [rcx+8], rdx\n    END_PROC reg_save_list\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/core2/mul_1.asm",
    "content": "; PROLOGUE(mpn_mul_1)\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_mul_1c(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx         r8\n;  rax                     rcx     rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n    LEAF_PROC mpn_mul_1c\n    mov     r11, [rsp+0x28]\n    jmp     start\n    LEAF_PROC mpn_mul_1\n    xor     r11, r11\n\nstart:\n    mov     rax, r8\n\tlea     r10, [rdx+rax*8-8]\n\tlea     rcx, [rcx+rax*8-8]\n\tmov     r8d, 1\n\tsub     r8, rax\n\tmov     rax, [r10+r8*8]\n\tjz      .1\n\n\txalign  16\n.0:\tmul     r9\n\tadd     rax, r11\n\tmov     [rcx+r8*8], rax\n\tmov     rax, [r10+r8*8+8]\n\tmov     r11d, 0\n\tadc     r11, rdx\n\tadd     r8, 1\n\tjnc     .0\n.1: mul     r9\n\tadd     rax, r11\n\tmov     [rcx+r8*8], rax\n\tmov     eax, 0\n\tadc     rax, rdx\n\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/mul_2.asm",
    "content": "; PROLOGUE(mpn_mul_2)\n\n;  X86_64 mpn_mul_2\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx\n;  rax                     rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi, rbx\n\n    BITS 64\n\n\tFRAME_PROC mpn_mul_2, 0, reg_save_list\n\tmov     rax, r8\n\t\n\tmov     r8, [r9]\n\tlea     rsi, [rdx+rax*8-24]\n\tlea     rdi, [rcx+rax*8-24]\n\tmov     rcx, [r9+8]\n\tmov     rbx, 3\n\tsub     rbx, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tmov     r11, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1:\tmov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r10, 0\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r9, rdx\n\tmul     r8\n\tadd     r11, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 3\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     rbx, 1\n\tja      .5\n\tje      .4\n.3:\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4:\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.6: END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/mul_basecase.asm",
    "content": "; PROLOGUE(mpn_mul_basecase)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%macro addmul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi+rbx*8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi+rbx*8+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+rbx*8+24], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    add     [rdi], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+8], r10\n    adc     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-16], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi-8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+8], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi+rbx*8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+24], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+24], r12\n    add     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    mov     [rdi], r12\n    add     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-16], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi-8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+8], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n    xalign  16\n%%1:mov     r10, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    mul     r8\n    mov     [rdi+rbx*8], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r10\n    db      0x26\n    add     r11, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+rbx*8+24]\n    mul     r8\n    mov     [rdi+rbx*8+16], r11\n    db      0x26\n    add     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     rbx, 4\n    mov     rax, [rsi+rbx*8]\n    jnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n    mov     rax, [rsi+8]\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mov     r12d, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r11\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n    mov     rax, [rsi+16]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     [rdi+24], r10\n    mov     [rdi+32], r11\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n    mov     [rdi+24], r9\n    mov     [rdi+32], r10\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n    jz      %%2\n    xalign  16\n%%1:addmul2pro%1\n    addmul2lp\n    addmul2epi%1\n    jnz     %%1\n%%2:\n%endmacro\n\n%macro oldmulnext0 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    mov     [rdi+r11*8+24], rbx\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+32], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+r11*8+40], rdx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+24]\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+24], r12\n    mov     [rdi+r11*8+32], rdx\n    inc     r8\n    lea     rdi, [rdi+8]\n    mov     r11, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     [rdi+r11*8+16], r10\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n    mov     [rdi+r11*8+8], r9\n    mov     [rdi+r11*8+16], r10\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    db      0x26\n    mov     r9, rdx\n    lea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    mul     r13\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+24]\n    mov     r12d, 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+32]\n    mul     r13\n    add     [rdi+24], rbx\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+32], r12\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n    mov     r13, [rcx+r8*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+24]\n    mul     r13\n    lea     rdi, [rdi+8]\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     r12d, 0\n    mov     rax, [rsi+32]\n    adc     r12, rdx\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+24], r12\n    adc     rdx, 0\n    mov     [rdi+32], rdx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n    mov     r13, [rcx+r8*8]\n    lea     rdi, [rdi+8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     ebx, 0\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    adc     rbx, rdx\n    mov     rax, [rsi+r14*8]\n    add     [rdi+r11*8+16], r10\n    adc     rbx, 0\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    db      0x26\n    lea     rdi, [rdi+8]\n    db      0x26\n    mov     r9, rdx\n    mov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     rdx, 0\n    add     [rdi+32], r9\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n    oldmulnext%1\n    jz      %%2\n    xalign  16\n%%1:\n    oldaddmulpro%1\n    oldaddmulnext%1\n    jnz     %%1\n%%2:\n%endmacro\n\n    CPU  Core2\n    BITS 64\n\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14\n\n    LEAF_PROC mpn_mul_basecase\n    ; the current mul does not handle case one\n    cmp     r8d, 4\n    jg      __gmpn_core2_mbc2\n    cmp     r8d, 1\n    je      one\n\n    WIN64_GCC_PROC mpn_core2_mbc1, 5, frame\n    movsxd  rdx, edx\n    movsxd  r8, r8d\n\n    mov     r14, 5\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-40]\n    lea     rcx, [rcx+r8*8]\n    neg     r8\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rax, [rsi+r14*8]\n    mov     r13, [rcx+r8*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    mov     r10d, 0\n    mul     r13\n    mov     [rdi+r11*8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     r11, 2\n    ja      .4\n    jz      .3\n    jp      .2\n.1:\toldmpn_muladdmul_1_int 0\n    WIN64_GCC_EXIT frame\n\n    xalign  16\n.2:\toldmpn_muladdmul_1_int 1\n    WIN64_GCC_EXIT frame\n\n    xalign  16\n.3:\toldmpn_muladdmul_1_int 2\n    WIN64_GCC_EXIT frame\n\n    xalign  16\n.4:\toldmpn_muladdmul_1_int 3\n\n    xalign  16\n.5:\tWIN64_GCC_END frame\n\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\n%undef  reg_save_list\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14, r15\n\n    xalign  16\n.6:\tWIN64_GCC_PROC mpn_core2_mbc2, 5, frame\n\n    mov     r14, 4\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-32]\n    lea     rsi, [rsi+rdx*8-32]\n    mov     r13, rcx\n    mov     r15, r8\n    lea     r13, [r13+r15*8]\n    neg     r15\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    bt      r15, 0\n    jnc     .13\n.7:\tinc     rbx\n    mov     r8, [r13+r15*8]\n    mul     r8\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     rbx, 0\n    jge     .8\n    mul1lp\n.8:\tmov     r10d, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     rbx, 2\n    ja      .12\n    jz      .11\n    jp      .10\n.9:\tmulnext0\n    jmp     .21\n.10:mulnext1\n    jmp     .15\n.11:mulnext2\n    jmp     .17\n.12:mulnext3\n    jmp     .19\n    ; as all the mul2pro? are the same\n.13:mul2pro0\n    mul2lp\n    cmp     rbx, 2\n    ja      .20\n    jz      .18\n    jp      .16\n.14:mul2epi3\n.15:mpn_addmul_2_int 3\n    jmp     .22\n.16:mul2epi2\n.17:mpn_addmul_2_int 2\n    jmp     .22\n.18:mul2epi1\n.19:mpn_addmul_2_int 1\n    jmp     .22\n.20:mul2epi0\n.21:mpn_addmul_2_int 0\n\n    xalign  16\n.22:WIN64_GCC_END frame\n\n    xalign  16\none:mov     rax, [rdx]\n    mul     qword [r9]\n    mov     [rcx], rax\n    mov     [rcx+8], rdx\n    ret\n    \n\tend\n"
  },
  {
    "path": "mpn/x86_64w/core2/mullow_n_basecase.asm",
    "content": ";  AMD64 mpn_mullow_n_basecase\n;\n;  Copyright 2015 Free Software Foundation, Inc.\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;\n;void mpn_mullow_n_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n;                                  rdi           rsi           rdx          rcx\n;                                  rcx           rdx            r8           r9\n\n%include 'yasm_mac.inc'\n\n\ttext\n\talign\t32\n\tLEAF_PROC mpn_mullow_n_basecase\n\tcmp     r9, 3\n\tje      asm_sym(?mpn_mullow1)\n\tja      asm_sym(?mpn_mullow2)\n\tmov     rax, [rdx]\n\tcmp     r9, 2\n\tjae     .1\n\n\timul    rax, [r8]\n\tmov     [rcx], rax\n\tret\n\n.1: mov     r10, rdx\n    mov     r9, [r8]\n\tmul     r9\n\tmov     [rcx], rax\n\tmov     rax, [r10+8]\n\timul    rax, r9\n\tadd     rdx, rax\n\tmov     r9, [r8+8]\n\tmov     r11, [r10]\n\timul    r11, r9\n\tadd     rdx, r11\n\tmov     [rcx+8], rdx\n\tret\n\n%define reg_save_list rsi, rdi\n\n\talign\t32\n\tFRAME_PROC ?mpn_mullow1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rcx, r9\n\tmov     rax, [rsi]\n    mov     r9, [r8]\n\tmul     r9\n\tmov     [rdi], rax\n\tmov     r10, rdx\n\tmov     rax, [rsi+8]\n\tmul     r9\n\timul    r9, [rsi+16]\n\tadd     r10, rax\n\tadc     r9, rdx\n\tmov     r11, [r8+8]\n\tmov     rax, [rsi]\n\tmul     r11\n\tadd     r10, rax\n\tadc     r9, rdx\n\timul    r11, [rsi+8]\n\tadd     r9, r11\n\tmov     [rdi+8], r10\n\tmov     r10, [r8+16]\n\tmov     rax, [rsi]\n\timul    r10, rax\n\tadd     r9, r10\n\tmov     [rdi+16], r9\n    END_PROC reg_save_list\n\n%define reg_save_list rsi, rdi, rbx, rbp, r12, r13, r14, r15\n\n\talign\t32\n\tFRAME_PROC ?mpn_mullow2, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rcx, r9\n\tmov     rax, [rsi]\n\tmov     r10, [r8]\n\tlea     rdi, [rdi+rcx*8]\n\tlea     rsi, [rsi+rcx*8]\n\tmov     r9d, 0\n\tsub     r9, rcx\n\tmul     r10\n\tmov     r11, [r8+8]\n\ttest    cl, 1\n\tjnz     .4\n.1:\ttest    cl, 2\n\tjnz     .3\n.2:\tlea     r13, [r9]\n\tmov     [rdi+r9*8], rax\n\tmov     rcx, rdx\n\tmov     rax, [rsi+r9*8]\n\txor     ebp, ebp\n\tjmp     .9\n.3:\tlea     r13, [r9-2]\n\tmov     rbp, rax\n\tmov     rax, [rsi+r9*8]\n\tmov     r12, rdx\n\txor     ebx, ebx\n\tjmp     .11\n.4:\ttest    cl, 2\n\tjnz     .6\n.5:\tlea     r13, [r9+1]\n\tmov     [rdi+r9*8], rax\n\tmov     rax, [rsi+r9*8]\n\tmov     rbx, rdx\n\txor     ecx, ecx\n\tjmp     .8\n.6:\tlea     r13, [r9-1]\n\txor     r12d, r12d\n\tmov     rcx, rax\n\tmov     rbp, rdx\n\tmov     rax, [rsi+r9*8]\n\tjmp     .10\n\n\talign\t16\n.7:\tmul     r10\n\tadd     r12, rax\n\tmov     rax, [rsi+r13*8-8]\n\tmov     [rdi+r13*8-8], r12\n\tadc     rbx, rdx\n\tadc     ecx, 0\n.8:\tmul     r11\n\tadd     rbx, rax\n\tadc     rcx, rdx\n\tmov     ebp, 0\n\tmov     rax, [rsi+r13*8]\n\tmul     r10\n\tadd     rbx, rax\n\tmov     [rdi+r13*8], rbx\n\tadc     rcx, rdx\n\tmov     rax, [rsi+r13*8]\n\tadc     ebp, 0\n.9:\tmul     r11\n\tadd     rcx, rax\n\tadc     rbp, rdx\n\tmov     rax, [rsi+r13*8+8]\n\tmul     r10\n\tmov     r12d, 0\n\tadd     rcx, rax\n\tadc     rbp, rdx\n\tadc     r12d, 0\n\tmov     rax, [rsi+r13*8+8]\n.10:mul     r11\n\tadd     rbp, rax\n\tmov     [rdi+r13*8+8], rcx\n\tadc     r12, rdx\n\tmov     ebx, 0\n\tmov     rax, [rsi+r13*8+16]\n\tmul     r10\n\tadd     rbp, rax\n\tmov     rax, [rsi+r13*8+16]\n\tadc     r12, rdx\n\tadc     ebx, 0\n.11:mul     r11\n\tmov     ecx, 0\n\tadd     r12, rax\n\tmov     rax, [rsi+r13*8+24]\n\tmov     [rdi+r13*8+16], rbp\n\tadc     rbx, rdx\n\tadd     r13, 4\n\tjs      .7\n.12:imul    rax, r10\n\tadd     rax, r12\n\tmov     [rdi-8], rax\n\tadd     r9, 2\n\tlea     r8, [r8+16]\n\tlea     rsi, [rsi-16]\n\tcmp     r9, -2\n\tjge     .26\n.13:mov     r10, [r8]\n\tmov     r11, [r8+8]\n\tmov     rax, [rsi+r9*8]\n\tmul     r10\n\ttest    r9b, 1\n\tjnz     .17\n.14:mov     r15, rax\n\tmov     r14, rdx\n\tmov     rax, [rsi+r9*8]\n\tmul     r11\n\ttest    r9b, 2\n\tjnz     .16\n.15:lea     r13, [r9]\n\tmov     r12, [rdi+r9*8]\n\tmov     rbx, rax\n\tlea     rcx, [rdx]\n\tjmp     .23\n.16:lea     r13, [r9+2]\n\tmov     rcx, [rdi+r9*8]\n\tmov     rbp, rax\n\tmov     rax, [rsi+r9*8+8]\n\tlea     r12, [rdx]\n\tjmp     .21\n.17:mov     r14, rax\n\tmov     r15, rdx\n\tmov     rax, [rsi+r9*8]\n\tmul     r11\n\ttest    r9b, 2\n\tjz      .19\n.18:lea     r13, [r9+1]\n\tlea     rbx, [rdx]\n\tmov     rbp, [rdi+r9*8]\n\tmov     r12, rax\n\tjmp     .22\n.19:lea     r13, [r9-1]\n\tlea     rbp, [rdx]\n\tmov     rcx, rax\n\tmov     rbx, [rdi+r9*8]\n\tmov     rax, [rsi+r9*8+8]\n\tjmp     .24\n\t\n    align\t16\n.20:mul     r11\n\tadd     rcx, rbx\n\tadc     rbp, rax\n\tmov     rax, [rsi+r13*8-8]\n\tlea     r12, [rdx]\n\tadc     r12, 0\n.21:mul     r10\n\tadd     r15, rcx\n\tmov     [rdi+r13*8-16], r15\n\tadc     r14, rax\n\tmov     r15, rdx\n\tadc     r15, 0\n\tmov     rax, [rsi+r13*8-8]\n\tmul     r11\n\tlea     rbx, [rdx]\n\tmov     rcx, [rdi+r13*8-8]\n\tadd     rbp, rcx\n\tadc     r12, rax\n\tadc     rbx, 0\n.22:mov     rax, [rsi+r13*8]\n\tmul     r10\n\tadd     r14, rbp\n\tadc     r15, rax\n\tmov     [rdi+r13*8-8], r14\n\tmov     r14, rdx\n\tadc     r14, 0\n\tmov     rax, [rsi+r13*8]\n\tmov     rbp, [rdi+r13*8]\n\tmul     r11\n\tadd     r12, rbp\n\tadc     rbx, rax\n\tlea     rcx, [rdx]\n\tadc     rcx, 0\n.23:mov     rax, [rsi+r13*8+8]\n\tmul     r10\n\tadd     r15, r12\n\tmov     [rdi+r13*8], r15\n\tadc     r14, rax\n\tmov     r15, rdx\n\tmov     r12, [rdi+r13*8+8]\n\tadc     r15, 0\n\tmov     rax, [rsi+r13*8+8]\n\tmul     r11\n\tadd     rbx, r12\n\tlea     rbp, [rdx]\n\tadc     rcx, rax\n\tmov     rax, [rsi+r13*8+16]\n\tadc     rbp, 0\n.24:mul     r10\n\tadd     r14, rbx\n\tmov     [rdi+r13*8+8], r14\n\tmov     r14, rdx\n\tadc     r15, rax\n\tmov     rax, [rsi+r13*8+16]\n\tmov     rbx, [rdi+r13*8+16]\n\tadc     r14, 0\n\tadd     r13, 4\n\tjnc     .20\n.25:imul    rax, r11\n\tadd     rcx, rbx\n\tadc     rbp, rax\n\tmov     rax, [rsi-8]\n\timul    rax, r10\n\tadd     r15, rcx\n\tmov     [rdi-16], r15\n\tadc     rax, r14\n\tmov     rcx, [rdi-8]\n\tadd     rbp, rcx\n\tadd     rax, rbp\n\tmov     [rdi-8], rax\n\tadd     r9, 2\n\tlea     r8, [r8+16]\n\tlea     rsi, [rsi-16]\n\tcmp     r9, -2\n\tjl      .13\n\tjnz     .27\n.26:mov     r10, [r8]\n\tmov     r11, [r8+8]\n\tmov     rax, [rsi-16]\n\tmul     r10\n\tadd     rax, [rdi-16]\n\tadc     rdx, [rdi-8]\n\tmov     rbx, [rsi-8]\n\timul    rbx, r10\n\tmov     rcx, [rsi-16]\n\timul    rcx, r11\n\tmov     [rdi-16], rax\n\tadd     rcx, rbx\n\tadd     rcx, rdx\n\tmov     [rdi-8], rcx\n    EXIT_PROC reg_save_list\n\n.27:mov     r11, [r8]\n\timul    r11, [rsi-8]\n\tadd     r11, rax\n\tmov     [rdi-8], r11\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/core2/nand_n.asm",
    "content": "; PROLOGUE(mpn_nand_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_nand_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_nand_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [rdx+r9*8+24]\n    mov     r11, [rdx+r9*8+16]\n    and     r10, [r8+r9*8+24]\n    and     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [rdx+r9*8+8]\n    mov     T4, [rdx+r9*8]\n    and     T3, [r8+r9*8+8]\n    and     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [rdx+r9*8-8]\n    and     r10, [r8+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [rdx+r9*8-8]\n    and     r10, [r8+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [rdx+r9*8-8]\n    and     r10, [r8+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/nior_n.asm",
    "content": "; PROLOGUE(mpn_nior_n)\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_nior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_nior_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    or      r10, [rdx+r9*8+24]\n    or      r11, [rdx+r9*8+16]\n    not     r10\n    not     r11\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    or      T3, [rdx+r9*8+8]\n    or      T4, [rdx+r9*8]\n    not     T3\n    not     T4\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, [rdx+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, [rdx+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, [rdx+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/penryn/mod_1_1.asm",
    "content": "; PROLOGUE(mpn_mod_1_1)\n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2011 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_1(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n        CPU  Core2\n        BITS 64\n\n%define reg_save_list rsi, rdi, r13\n\n        FRAME_PROC mpn_mod_1_1, 0, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     r13, [rsi+rdx*8-8]\n        mov     rax, [rsi+rdx*8-16]\n        mov     r8, [r9]\n        mov     r9, [r9+8]\n        mov     rcx, rdx\n        sub     rcx, 2\n        align   16\n.1:     lea     r8, [r8]\n        mov     r11d, 0\n        mul     r8\n        mov     r10, [rsi+rcx*8-8]\n        add     r10, rax\n        lea     rax, [r13]\n        adc     r11, rdx\n        lea     r9, [r9]\n        lea     r13, [r11]\n        mul     r9\n        add     rax, r10\n        adc     r13, rdx\n        sub     rcx, 1\n        jnz     .1\n        mov     [rdi], rax\n        mov     rax, r8\n        mul     r13\n        add     [rdi], rax\n        adc     rdx, 0\n        mov     [rdi+8], rdx\n     \tEND_PROC reg_save_list\n\t\n\t    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/penryn/sumdiff_n.asm",
    "content": "; PROLOGUE(mpn_sumdiff_n)\n;\n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;        \n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;        \n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_sumdiff_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_size_t)\n;  rax                        rdi     rsi     rdx     rcx          r8\n;  rax                        rcx     rdx      r8      r9    [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n        CPU  Core2\n        BITS 64\n\n        FRAME_PROC mpn_sumdiff_n, 0, reg_save_list\n        mov     rbx, [rsp+stack_use+40]\n        xor     rax, rax\n        mov     r10d, 3\n        lea     rdi, [rcx+rbx*8-24]\n        lea     rsi, [rdx+rbx*8-24]\n        sub     r10, rbx\n        lea     rdx, [r8+rbx*8-24]\n        lea     rcx, [r9+rbx*8-24]\n        mov     r9, rax\n        jnc     .2\n        align   16\n.1:     sahf    \n        mov     r8, [rdx+r10*8]\n        mov     r11, r8\n        adc     r8, [rcx+r10*8]\n        mov     rbx, [rdx+r10*8+8]\n        mov     r13, rbx\n        adc     rbx, [rcx+r10*8+8]\n        mov     rbp, [rdx+r10*8+16]\n        mov     r12, [rdx+r10*8+24]\n        mov     r14, rbp\n        mov     r15, r12\n        adc     rbp, [rcx+r10*8+16]\n        adc     r12, [rcx+r10*8+24]\n        lahf    \n        add     r9b, 255\n        sbb     r11, [rcx+r10*8]\n        mov     [rdi+r10*8], r8\n        sbb     r13, [rcx+r10*8+8]\n        sbb     r14, [rcx+r10*8+16]\n        mov     [rdi+r10*8+8], rbx\n        sbb     r15, [rcx+r10*8+24]\n        mov     [rdi+r10*8+16], rbp\n        mov     [rdi+r10*8+24], r12\n        mov     [rsi+r10*8+8], r13\n        setc    r9b\n        add     r10, 4\n        mov     [rsi+r10*8+16-32], r14\n        mov     [rsi+r10*8+24-32], r15\n        mov     [rsi+r10*8-32], r11\n        jnc     .1\n.2:     \n        cmp     r10, 2\n        jg      .6\n        je      .5\n        jp      .4\n.3:     sahf    \n        mov     r8, [rdx]\n        mov     r11, r8\n        adc     r8, [rcx]\n        mov     rbx, [rdx+8]\n        mov     r13, rbx\n        adc     rbx, [rcx+8]\n        mov     rbp, [rdx+16]\n        mov     r14, rbp\n        adc     rbp, [rcx+16]\n        lahf    \n        add     r9b, 255\n        sbb     r11, [rcx]\n        mov     [rdi], r8\n        sbb     r13, [rcx+8]\n        sbb     r14, [rcx+16]\n        mov     [rdi+8], rbx\n        mov     [rdi+16], rbp\n        mov     [rsi+8], r13\n        setc    r9b\n        mov     [rsi+16], r14\n        mov     [rsi], r11\n        sahf    \n        mov     rax, 0\n        adc     rax, 0\n        add     r9b, 255\n        rcl     rax, 1\n        EXIT_PROC reg_save_list\n\n.4:     sahf    \n        mov     r8, [rdx+8]\n        mov     r11, r8\n        adc     r8, [rcx+8]\n        mov     rbx, [rdx+8+8]\n        mov     r13, rbx\n        adc     rbx, [rcx+8+8]\n        lahf    \n        add     r9b, 255\n        sbb     r11, [rcx+8]\n        mov     [rdi+8], r8\n        sbb     r13, [rcx+8+8]\n        mov     [rdi+8+8], rbx\n        mov     [rsi+8+8], r13\n        setc    r9b\n        mov     [rsi+8], r11\n        sahf    \n        mov     rax, 0\n        adc     rax, 0\n        add     r9b, 255\n        rcl     rax, 1\n        EXIT_PROC reg_save_list\n\n.5:     sahf    \n        mov     r8, [rdx+16]\n        mov     r11, r8\n        adc     r8, [rcx+16]\n        lahf    \n        add     r9b, 255\n        sbb     r11, [rcx+16]\n        mov     [rdi+16], r8\n        setc    r9b\n        mov     [rsi+16], r11\n.6:     sahf    \n        mov     rax, 0\n        adc     rax, 0\n        add     r9b, 255\n        rcl     rax, 1\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/core2/popcount.asm",
    "content": "; PROLOGUE(mpn_popcount)\n\n;  mpn_popcount\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmp_limb_t mpn_popcount(mp_ptr,mp_size_t)\n;\trax                       rdi,      rsi\n;\trax                       rcx,      rdx\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n    global  __gmpn_popcount\n    \n%ifdef DLL\n    export  __gmpn_popcount\n%endif\n\n    PROC_FRAME __gmpn_popcount\n    alloc_stack 0x48\n    save_xmm128 xmm6, 0x00\n    save_xmm128 xmm7, 0x10\n    save_xmm128 xmm8, 0x20\n    save_xmm128 xmm9, 0x30\n    END_PROLOGUE\n\n\tmov     rax, 0x5555555555555555\n\tmovq    xmm4, rax\n\tmovddup xmm4, xmm4\n\tmov     rax, 0x3333333333333333\n\tmovq    xmm5, rax\n\tmovddup xmm5, xmm5\n\tmov     rax, 0x0f0f0f0f0f0f0f0f\n\tmovq    xmm6, rax\n\tmovddup xmm6, xmm6\n\tpxor    xmm7, xmm7\n\tpxor    xmm9, xmm9\n\tpxor    xmm8, xmm8\n\tbtr     rcx, 3\n\tsbb     rax, rax\n\tsub     rdx, rax\n\tmovq    xmm0, rax\n\tpandn   xmm0, [rcx]\n\tbt      rdx, 0\n\tsbb     r8, r8\n\tsub     rdx, r8\n\tmovq    xmm2, r8\n\tshufpd  xmm2, xmm2, 1\n\tpandn   xmm2, [rcx+rdx*8-16]\n\tcmp     rdx, 2\n\tjne     .0\n\tadd     rdx, 2\n\tmovq    xmm1, rax\n\tmovddup xmm1, xmm1\n\tpand    xmm0, xmm1\n\tpandn   xmm1, xmm2\n\tmovdqa  xmm2, xmm1\n.0: movdqa  xmm1, xmm0\n\tmovdqa  xmm3, xmm2\n\tsub     rdx, 8\n\tjc      .2\n\t\n    xalign  16\n.1: psrlw   xmm0, 1\n\tpand    xmm0, xmm4\n\tpsubb   xmm1, xmm0\n\tpsrlw   xmm2, 1\n\tmovdqa  xmm0, xmm1\n\tpaddq   xmm9, xmm8\n\tpsrlw   xmm1, 2\n\tpand    xmm0, xmm5\n\tpand    xmm1, xmm5\n\tpaddb   xmm1, xmm0\n\tpand    xmm2, xmm4\n\tsub     rdx, 4\n\tpsubb   xmm3, xmm2\n\tmovdqa  xmm2, xmm3\n\tpsrlw   xmm3, 2\n\tpand    xmm2, xmm5\n\tpand    xmm3, xmm5\n\tpaddb   xmm3, xmm2\n\tmovdqa  xmm0, [rcx+rdx*8+32-32+64]\n\tpaddb   xmm3, xmm1\n\tmovdqa  xmm8, xmm3\n\tpsrlw   xmm3, 4\n\tpand    xmm3, xmm6\n\tmovdqa  xmm2, [rcx+rdx*8+32-48+64]\n\tpand    xmm8, xmm6\n\tmovdqa  xmm1, [rcx+rdx*8+32-32+64]\n\tpaddb   xmm8, xmm3\n\tmovdqa  xmm3, [rcx+rdx*8+32-48+64]\n\tpsadbw  xmm8, xmm7\n\tjnc     .1\n.2: psrlw   xmm0, 1\n\tpand    xmm0, xmm4\n\tpsubb   xmm1, xmm0\n\tpsrlw   xmm2, 1\n\tmovdqa  xmm0, xmm1\n\tpaddq   xmm9, xmm8\n\tpsrlw   xmm1, 2\n\tpand    xmm0, xmm5\n\tpand    xmm1, xmm5\n\tpaddb   xmm1, xmm0\n\tpand    xmm2, xmm4\n\tpsubb   xmm3, xmm2\n\tmovdqa  xmm2, xmm3\n\tpsrlw   xmm3, 2\n\tpand    xmm2, xmm5\n\tpand    xmm3, xmm5\n\tpaddb   xmm3, xmm2\n\tpaddb   xmm3, xmm1\n\tmovdqa  xmm8, xmm3\n\tpsrlw   xmm3, 4\n\tpand    xmm3, xmm6\n\tpand    xmm8, xmm6\n\tpaddb   xmm8, xmm3\n\tpsadbw  xmm8, xmm7\n\tcmp     rdx, -3\n\tjl      .4\n.3: movdqa  xmm2, [rcx+rdx*8-32+64]\n\tmovdqa  xmm3, xmm2\n\tpsrlw   xmm2, 1\n\tpaddq   xmm9, xmm8\n\tpand    xmm2, xmm4\n\tpsubb   xmm3, xmm2\n\tmovdqa  xmm2, xmm3\n\tpsrlw   xmm3, 2\n\tpand    xmm2, xmm5\n\tpand    xmm3, xmm5\n\tpaddb   xmm3, xmm2\n\tmovdqa  xmm8, xmm3\n\tpsrlw   xmm3, 4\n\tpand    xmm3, xmm6\n\tpand    xmm8, xmm6\n\tpaddb   xmm8, xmm3\n\tpsadbw  xmm8, xmm7\n.4:     paddq   xmm9, xmm8\n\tmovq    rax, xmm9\n\tshufpd  xmm9, xmm9, 1\n\tmovq    r8, xmm9\n\tadd     rax, r8\n.5: movdqa  xmm6, [rsp+0x00]\n    movdqa  xmm7, [rsp+0x10]\n    movdqa  xmm8, [rsp+0x20]\n    movdqa  xmm9, [rsp+0x30]\n    add     rsp, 0x48\n    ret\n    ENDPROC_FRAME\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/redc_1.asm",
    "content": "; PROLOGUE(mpn_redc_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_redc_1(mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                rdi     rsi        rdx        rcx        r8\n;  rax                rcx     rdx         r8         r9  [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    CPU  Core2\n    BITS 64\n\n%macro mpn_add 0\n\n    mov     rax, rcx\n    and     rax, 3\n    shr     rcx, 2\n    cmp     rcx, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    add     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n    jmp     %%2\n\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    adc     r11, [rdx]\n    adc     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rdi], r11\n    mov     [rdi+8], r8\n    lea     rdi, [rdi+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    adc     r9, [rdx-16]\n    adc     r10, [rdx-8]\n    mov     [rdi-16], r9\n    dec     rcx\n    mov     [rdi-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    adc     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n%%2:\n\n%endmacro\n\n%macro mpn_sub 0\n\n    mov     rax, rbp\n    and     rax, 3\n    shr     rbp, 2\n    cmp     rbp, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    sub     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n    jmp     %%2\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    sbb     r11, [rdx]\n    sbb     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rbx], r11\n    mov     [rbx+8], r8\n    lea     rbx, [rbx+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    sbb     r9, [rdx-16]\n    sbb     r10, [rdx-8]\n    mov     [rbx-16], r9\n    dec     rbp\n    mov     [rbx-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    sbb     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n%%2:\n\n%endmacro\n\n%macro addmulloop 1\n\n    xalign  16\n%%1:mov     r10, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    db      0x26\n    adc     rbx, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    db      0x26\n    adc     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     r11, 4\n    mov     rax, [rsi+r11*8+8]\n    jnc     %%1\n\n%endmacro\n\n%macro addmulpropro0 0\n\n    imul    r13, rcx\n    lea     r8, [r8-8]\n\n%endmacro\n\n%macro addmulpro0 0\n\n    mov     r11, r14\n    lea     r8, [r8+8]\n    mov     rax, [rsi+r14*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext0 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mov     r9d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, rax\n    adc     r9, rdx\n    imul    r13, rcx\n    add     [r8+r11*8+32], r12\n    adc     r9, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r9\n\n%endmacro\n\n%macro addmulpropro1 0\n\n%endmacro\n\n%macro addmulpro1 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext1 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r12\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro2 0\n\n%endmacro\n\n%macro addmulpro2 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext2 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     r13, [r8+r14*8+8]\n    add     [r8+r11*8+16], r10\n    adc     rbx, 0\n    mov     [r8+r14*8], rbx\n    sub     r15, 1          ; ***\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro3 0\n\n%endmacro\n\n%macro addmulpro3 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext3 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    add     [r8+r11*8+8], r9\n    adc     r10, 0\n    mov     r13, [r8+r14*8+8]\n    mov     [r8+r14*8], r10\n    lea     r8, [r8+8]\n    sub     r15, 1          ; ***\n\n%endmacro\n\n%macro mpn_addmul_1_int 1\n\n    addmulpropro%1\n    xalign  16\n%%1:addmulpro%1\n    jge     %%2\n    addmulloop %1\n%%2:addmulnext%1\n    jnz     %%1\n\n%endmacro\n\n    LEAF_PROC mpn_redc_1\n    cmp     r9, 1\n    je      one\n    FRAME_PROC ?mpn_core2_redc_1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, r8\n    mov     r8, rdx\n    mov     rdx, r9\n    mov     rcx, [rsp+stack_use+0x28]\n    mov     [rsp+stack_use+0x28], r8\n\n    mov     r14, 5\n    sub     r14, rdx\n\n    mov     [rsp+stack_use+0x10], rsi\n    mov     r8, [rsp+stack_use+0x28]\n\n    lea     r8, [r8+rdx*8-40]\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rbp, rdx\n    mov     r15, rdx\n    mov     rax, r14\n    and     rax, 3\n    mov     r13, [r8+r14*8]\n    je      .2\n    jp      .4\n    cmp     rax, 1\n    je      .3\n.1:\tmpn_addmul_1_int 2\n    jmp     .5\n\n    xalign  16\n.2:\tmpn_addmul_1_int 0\n    jmp     .5\n\n    xalign  16\n.3:\tmpn_addmul_1_int 1\n    jmp     .5\n\n    xalign  16\n.4:\tmpn_addmul_1_int 3\n\n    xalign  16\n.5:\tmov     rcx, rbp\n    mov     rdx, [rsp+stack_use+0x28]\n    lea     rsi, [rdx+rbp*8]\n    mov     rbx, rdi\n    mpn_add\n    mov     rdx, [rsp+stack_use+0x10]\n    jnc     .6\n    mov     rsi, rbx\n    mpn_sub\n.6:\tEND_PROC reg_save_list\n\n    xalign  16\none:mov     r9, rdx\n    mov     r11, [r8]\n    mov     r8, [rsp+0x28]\n\n    mov     r10, [r9]\n    imul    r8, r10\n    mov     rax, r8\n    mul     r11\n    add     rax, r10\n    adc     rdx, [r9+8]\n    cmovnc  r11, rax\n    sub     rdx, r11\n    mov     [rcx], rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/rsh1add_n.asm",
    "content": "; PROLOGUE(mpn_rsh1add_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_rsh1add_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rsh1add_n, 0, reg_save_list\n    mov     rax, r9\n\tlea     rdi, [rcx+rax*8-32]\n\tlea     rsi, [rdx+rax*8-32]\n\tlea     rdx, [r8+rax*8-32]\n\tmov     rcx, rax\n\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tadd     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     .2\n.1:\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\tadc     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     .1\n.2:\tcmp     r8, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3:\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tadc     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n.7:\tEND_PROC reg_save_list\n    \n\tend\n"
  },
  {
    "path": "mpn/x86_64w/core2/rsh1sub_n.asm",
    "content": "; PROLOGUE(mpn_rsh1sub_n)\n\n;  AMD64 mpn_rsh1sub_n\n\n;  Copyright 2009 Jason Moxham\n\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;\t(rdi,rcx)=((rsi,rcx)-(rdx,rcx))/2\n; return bottom bit of difference\n; subtraction treated as two compliment\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rsh1sub_n, 0, reg_save_list\n    mov     rax, r9\n\tlea     rdi, [rcx+rax*8-32]\n\tlea     rsi, [rdx+rax*8-32]\n\tlea     rdx, [r8+rax*8-32]\n\tmov     rcx, rax\n\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tsub     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     .2\n.1:\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\tsbb     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     .1\n.2:\tcmp     r8, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3:\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n.7:\tEND_PROC reg_save_list\n    \n\tend\n"
  },
  {
    "path": "mpn/x86_64w/core2/rsh_divrem_hensel_qr_1_2.asm",
    "content": "; PROLOGUE(mpn_rsh_divrem_hensel_qr_1_2)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rsh_divrem_hensel_qr_1_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_uint, mp_limb_t)\n;  rax                                        rdi     rsi        rdx        rcx       r8         r9\n;  rax                                        rcx     rdx         r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n\tFRAME_PROC mpn_rsh_divrem_hensel_qr_1_2, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8-16]\n\tlea     rsi, [rdx+rax*8-16]\n    mov     rcx, r9\n    mov     rdx, r9\n\tmov     r9, 2\n\tsub     r9, rax    \n    movsxd  r8, dword [rsp+stack_use+40]\n    mov     r10, qword [rsp+stack_use+48]\n\n\tmov     rax, 64\n\tsub     rax, r8\n\tmovq    mm0, r8\n\tmovq    mm1, rax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, r11\n\tmov     r12, r11\n\tmul     rcx\n\tneg     rdx\n\timul    r12, rdx\n\n; // for the first limb we can not store (as we have to shift) so we need to\n; // do first limb separately , we could do it as normal as an extention of\n; // the loop , but if we do it as a 1 limb inverse then we can start it\n; // eailer , ie interleave it with the calculation of the 2limb inverse\n\n\tmov     r13, r11\n\tmov     r14, r12\n\n\tmov     r11, [rsi+r9*8]\n\tsub     r11, r10\n\tsbb     r10, r10\n\n\timul    r11, r13\n\tmovq    mm2, r11\n\tpsrlq   mm2, mm0\n\tmov     rax, rcx\n\tmul     r11\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\tadd     r10, r10\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, r10\n\n; mov $0,%r10\n\tadd     r9, 2\n\tjc      .2\n\t\n\txalign  16\n.1:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n\t; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r12, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, 0\n\tadd     r9, 2\n\tjnc     .1\n.2:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tcmp     r9, 0\n\tjne     .4\n.3:\tmov     r11, [rsi+r9*8+8]\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r10, 0\n\tmov     rax, r11\n\timul    rax, r13\n\t; mov %rax,(%rdi,%r9,8)\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8], mm2\n\tmovq    [rdi+r9*8+8], mm4\n\n\tmul     rcx\n\tadd     r10, r10\n\tmov     rax, 0\n\tadc     rax, rdx\n\temms\n\tEXIT_PROC reg_save_list\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n.4:\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\tmovq    [rdi+r9*8], mm2\n\n\tmov     rax, rcx\n\tmul     rdx\n\tcmp     r8, rax\n\tmov     rax, 0\n\tadc     rax, rdx\n\tsub     rax, r10\n.5:\temms\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/rshift.asm",
    "content": "; PROLOGUE(mpn_rshift)\n\n;  Verdxon 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_rshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi     rsi        rdx      rcx\n;  rax                     rcx     rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rshift, 0, reg_save_list\n; when n=1 mod4 seem to have different runtimes\n    mov     rax, r8\n\tmov     rbx, 4\n\tlea     rsi, [rdx+rax*8-24]\n\tlea     rdi, [rcx+rax*8-24]\n\tmov     ecx, r9d\n\tsub     rbx, rax\n\n\txor     rax, rax\n\tmov     rdx, [rsi+rbx*8-8]\n\tshrd    rax, rdx, cl\n\tcmp     rbx, 0\n\tjge     .2\n\n\txalign  16\n.1:\tmov     r8, [rsi+rbx*8]\n\tmov     r11, [rsi+rbx*8+24]\n\tshrd    rdx, r8, cl\n\tmov     r9, [rsi+rbx*8+8]\n\tshrd    r8, r9, cl\n\tmov     [rdi+rbx*8-8], rdx\n\tmov     [rdi+rbx*8], r8\n\tmov     r10, [rsi+rbx*8+16]\n\tshrd    r9, r10, cl\n\tmov     rdx, r11\n\tmov     [rdi+rbx*8+8], r9\n\tshrd    r10, r11, cl\n\tadd     rbx, 4\n\tmov     [rdi+rbx*8-16], r10\n\tjnc     .1\n.2:\tcmp     rbx, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r8, [rsi+rbx*8]\n\tshrd    rdx, r8, cl\n\tmov     r9, [rsi+rbx*8+8]\n\tshrd    r8, r9, cl\n\tmov     [rdi+rbx*8-8], rdx\n\tmov     [rdi+rbx*8], r8\n\tmov     r10, [rsi+rbx*8+16]\n\tshrd    r9, r10, cl\n\tmov     [rdi+rbx*8+8], r9\n\tshr     r10, cl\n\tmov     [rdi+rbx*8+16], r10\n\tEXIT_PROC reg_save_list\n\n\txalign  16\n.4:\tmov     r8, [rsi+rbx*8]\n\tshrd    rdx, r8, cl\n\tmov     r9, [rsi+rbx*8+8]\n\tshrd    r8, r9, cl\n\tmov     [rdi+rbx*8-8], rdx\n\tmov     [rdi+rbx*8], r8\n\tshr     r9, cl\n\tmov     [rdi+rbx*8+8], r9\n\tEXIT_PROC reg_save_list\n\n\txalign  16\n.5:\tmov     r8, [rsi+rbx*8]\n\tshrd    rdx, r8, cl\n\tshr     r8, cl\n\tmov     [rdi+rbx*8-8], rdx\n\tmov     [rdi+rbx*8], r8\n\tEXIT_PROC reg_save_list\n\n\txalign  16\n.6:\tshr     rdx, cl\n\tmov     [rdi+rbx*8-8], rdx\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/rshift1.asm",
    "content": "; PROLOGUE(mpn_rshift1)\n;  Version 1.0.4.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_rshift(mp_ptr, mp_ptr, mp_size_t)\n;  rax                     rdi     rsi        rdx\n;  rax                     rcx     rdx        r8d\n\n%include \"yasm_mac.inc\"\n\n    BITS 64\n\n%define reg_save_list r12, r13\n\n    FRAME_PROC mpn_rshift1, 0, reg_save_list\n    mov     r8d, r8d\n\n    xor     rax, rax\n    lea     rdx, [rdx+r8*8-8]\n    lea     rcx, [rcx+r8*8-8]\n    mov     r9, r8\n    and     r9, 7\n    inc     r9\n    shr     r8, 3\n; and clear carry flag\n    cmp     r8, 0\n    jz      .2\n\n    alignb  16, nop\n.1: \n\tmov     r10, [rdx]\n    mov     r11, [rdx-8]\n    mov     r12, [rdx-16]\n    mov     r13, [rdx-24]\n    rcr     r10, 1\n    rcr     r11, 1\n    rcr     r12, 1\n    rcr     r13, 1\n    mov     [rcx], r10\n    mov     [rcx-8], r11\n    mov     [rcx-16], r12\n    mov     [rcx-24], r13\n    mov     r10, [rdx-32]\n    mov     r11, [rdx-40]\n    mov     r12, [rdx-48]\n    mov     r13, [rdx-56]\n    rcr     r10, 1\n    rcr     r11, 1\n    rcr     r12, 1\n    rcr     r13, 1\n    mov     [rcx-32], r10\n    mov     [rcx-40], r11\n    mov     [rcx-48], r12\n    mov     [rcx-56], r13\n    lea     rdx, [rdx-64]\n    dec     r8\n    lea     rcx, [rcx-64]\n    jnz     .1\n.2: \n\tdec     r9\n    jz      .3\n; Could suffer cache-bank conflicts in this tail part\n    mov     r10, [rdx]\n    rcr     r10, 1\n    mov     [rcx], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx-8]\n    rcr     r10, 1\n    mov     [rcx-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx-16]\n    rcr     r10, 1\n    mov     [rcx-16], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx-24]\n    rcr     r10, 1\n    mov     [rcx-24], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx-32]\n    rcr     r10, 1\n    mov     [rcx-32], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx-40]\n    rcr     r10, 1\n    mov     [rcx-40], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx-48]\n    rcr     r10, 1\n    mov     [rcx-48], r10\n.3: \n\trcr     rax, 1\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/store.asm",
    "content": "; PROLOGUE(mpn_store)\n\n;  Copyright 2009 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_store(mp_ptr, mp_size_t, mp_limb_t)\n;                    rdi,       rsi,       rdx\n;                    rcx,       rdx,        r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n\tLEAF_PROC mpn_store\n\n\tcmp     rdx, 0\n\tjz      .4\n\tmovq    xmm0, r8\n\tmovddup xmm0, xmm0\n\tlea     rcx, [rcx-16]\n\ttest    rcx, 0xF\n\tjz      .1\n\tmov     [rcx+16], r8\n\tlea     rcx, [rcx+8]\n\tsub     rdx, 1\n.1:\tsub     rdx, 2\n\tjc      .3\n\t\n\txalign  16\n.2:\tlea     rcx, [rcx+16]\n\tsub     rdx, 2\n\tmovdqa  [rcx], xmm0\n\tjnc     .2\n.3:\tjnp     .4\n\tmov     [rcx+16], r8\n.4:\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/sub_n.asm",
    "content": "; PROLOGUE(mpn_sub_n)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_sub_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_sub_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define TR2 r10\n%define TR4 r10\n\n    CPU  Core2\n    BITS 64\n\n\tLEAF_PROC mpn_sub_nc\n    mov     r10, [rsp+0x28]\n    jmp     mpn_sub_entry\n\n\tLEAF_PROC mpn_sub_n\n\txor     r10, r10\n\nmpn_sub_entry:\n\tmov     rax, r9\n\tmov     r9, rcx\n\tmov     rcx, rax\n\tand     rax, 3\n\tsub     rcx, rax\n\tlea     r9, [r9+rcx*8]\n\tlea     rdx, [rdx+rcx*8]\n\tlea     r8, [r8+rcx*8]\n\tneg     rcx\n\tlea     rcx, [r10+rcx*2]\n\tsar     rcx, 1\n\tjz      .2\n\n\txalign  16\n.1:\tmov     r10, [rdx+rcx*8]\n\tmov     r11, [rdx+rcx*8+16]\n\tsbb     r10, [r8+rcx*8]\n\tmov     [r9+rcx*8], r10\n\tmov     TR2, [rdx+rcx*8+8]\n\tsbb     TR2, [r8+rcx*8+8]\n\tmov     [r9+rcx*8+8], TR2\n\tlea     rcx, [rcx+4]\n\tmov     TR4, [rdx+rcx*8-8]\n\tsbb     r11, [r8+rcx*8-16]\n\tsbb     TR4, [r8+rcx*8-8]\n\tmov     [r9+rcx*8-16], r11\n\tmov     [r9+rcx*8-8], TR4\n\tjrcxz   .2\n\tjmp     .1\n.2:\tsbb     rcx, rcx\n.3:\tcmp     rax, 2\n\tja      .6\n\tjz      .7\n\tjp      .5\n.4:\tsub     rax, rcx\n\tret\n\t\n\txalign  16\n.5:\tadd     rcx, rcx\n\tmov     r10, [rdx]\n\tsbb     r10, [r8]\n\tmov     [r9], r10\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\t\n\txalign  16\n.6:\tadd     rcx, rcx\n\tmov     r10, [rdx]\n\tmov     r11, [rdx+16]\n\tsbb     r10, [r8]\n\tmov     [r9], r10\n\tmov     TR2, [rdx+8]\n\tsbb     TR2, [r8+8]\n\tmov     [r9+8], TR2\n\tsbb     r11, [r8+16]\n\tmov     [r9+16], r11\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\t\n\txalign  16\n.7:\tadd     rcx, rcx\n\tmov     r10, [rdx]\n\tsbb     r10, [r8]\n\tmov     [r9], r10\n\tmov     TR2, [rdx+8]\n\tsbb     TR2, [r8+8]\n\tmov     [r9+8], TR2\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/core2/subadd_n.asm",
    "content": "; PROLOGUE(mpn_subadd_n)\n;        \n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_subadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx          r8\n;  rax                       rcx     rdx      r8      r9    [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n        CPU  Nehalem\n        BITS 64\n\n        FRAME_PROC mpn_subadd_n, 0, reg_save_list\n        mov     rbx, [rsp+stack_use+40]\n        xor     rax, rax\n        mov     r10d, 3\n        sub     r10, rbx\n        lea     rdi, [rcx+rbx*8-24]\n        lea     rsi, [rdx+rbx*8-24]\n        lea     rdx, [r8+rbx*8-24]\n        lea     rcx, [r9+rbx*8-24]\n        mov     r9, rax\n        jnc     .2\n\n        align   16\n.1:     sahf    \n        mov     r8, [rsi+r10*8]\n        sbb     r8, [rcx+r10*8]\n        mov     rbx, [rsi+r10*8+8]\n        sbb     rbx, [rcx+r10*8+8]\n        mov     r11, [rsi+r10*8+24]\n        mov     rbp, [rsi+r10*8+16]\n        sbb     rbp, [rcx+r10*8+16]\n        sbb     r11, [rcx+r10*8+24]\n        lahf    \n        add     r9b, 255\n        sbb     r8, [rdx+r10*8]\n        sbb     rbx, [rdx+r10*8+8]\n        mov     [rdi+r10*8], r8\n        sbb     rbp, [rdx+r10*8+16]\n        sbb     r11, [rdx+r10*8+24]\n        setc    r9b\n        mov     [rdi+r10*8+24], r11\n        mov     [rdi+r10*8+16], rbp\n        mov     [rdi+r10*8+8], rbx\n        add     r10, 4\n        jnc     .1\n.2:     cmp     r10, 2\n        jg      .6\n        je      .5\n        jp      .4\n.3:     sahf    \n        mov     r8, [rsi]\n        sbb     r8, [rcx]\n        mov     rbx, [rsi+8]\n        sbb     rbx, [rcx+8]\n        mov     rbp, [rsi+16]\n        sbb     rbp, [rcx+16]\n        lahf    \n        add     r9b, 255\n        sbb     r8, [rdx]\n        sbb     rbx, [rdx+8]\n        mov     [rdi], r8\n        sbb     rbp, [rdx+16]\n        setc    r9b\n        mov     [rdi+16], rbp\n        mov     [rdi+8], rbx\n        sahf    \n        mov     eax, 0\n        adc     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        EXIT_PROC reg_save_list\n\n.4:     sahf    \n        mov     r8, [rsi+8]\n        sbb     r8, [rcx+8]\n        mov     rbx, [rsi+16]\n        sbb     rbx, [rcx+16]\n        lahf    \n        add     r9b, 255\n        sbb     r8, [rdx+8]\n        sbb     rbx, [rdx+16]\n        mov     [rdi+8], r8\n        setc    r9b\n        mov     [rdi+16], rbx\n        sahf    \n        mov     eax, 0\n        adc     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        EXIT_PROC reg_save_list\n\n.5:     sahf    \n        mov     r8, [rsi+16]\n        sbb     r8, [rcx+16]\n        lahf    \n        add     r9b, 255\n        sbb     r8, [rdx+16]\n        mov     [rdi+16], r8\n        setc    r9b\n.6:     sahf    \n        mov     eax, 0\n        adc     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        END_PROC reg_save_list\n\n        end\n  "
  },
  {
    "path": "mpn/x86_64w/core2/sublsh1_n.asm",
    "content": "; PROLOGUE(mpn_sublsh1_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_sublsh1_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi, r12, r13, r14, r15\n\n    CPU  Core2\n    BITS 64\n\n    FRAME_PROC mpn_sublsh1_n, 0, reg_save_list\n    mov     rax, r9\n\n    lea     rsi, [rdx+rax*8]\n    lea     rdx, [r8+rax*8]\n    lea     rdi, [rcx+rax*8]\n    mov     rcx, rax\n\n    neg     rcx\n    xor     r9, r9\n    xor     rax, rax\n    test    rcx, 3\n    jz      .2\n.1: mov     r10, [rsi+rcx*8]\n    add     r9, 1\n    sbb     r10, [rdx+rcx*8]\n    sbb     r9, r9\n    add     rax, 1\n    sbb     r10, [rdx+rcx*8]\n    sbb     rax, rax\n    mov     [rdi+rcx*8], r10\n    add     rcx, 1              ; ***\n    test    rcx, 3\n    jnz     .1\n.2: cmp     rcx, 0\n    jz      .4\n\n    xalign  16\n.3: mov     r10, [rsi+rcx*8]\n    mov     rbx, [rsi+rcx*8+8]\n    mov     r11, [rsi+rcx*8+16]\n    mov     r8, [rsi+rcx*8+24]\n    mov     r12, [rdx+rcx*8]\n    mov     r13, [rdx+rcx*8+8]\n    mov     r14, [rdx+rcx*8+16]\n    mov     r15, [rdx+rcx*8+24]\n    add     r9, 1\n    sbb     r10, r12\n    sbb     rbx, r13\n    sbb     r11, r14\n    sbb     r8, r15\n    sbb     r9, r9\n    add     rax, 1\n    sbb     r10, r12\n    sbb     rbx, r13\n    sbb     r11, r14\n    sbb     r8, r15\n    sbb     rax, rax\n    mov     [rdi+rcx*8], r10\n    mov     [rdi+rcx*8+8], rbx\n    mov     [rdi+rcx*8+16], r11\n    mov     [rdi+rcx*8+24], r8\n    add     rcx, 4\n    jnz     .3\n.4: add     rax, r9\n    neg     rax\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/submul_1.asm",
    "content": ";  x86-64 mpn_addmul_1 and mpn_submul_1, optimized for \"Core 2\".\n\n;  Copyright 2003, 2004, 2005, 2007, 2008, 2009, 2011, 2012 Free Software\n;  Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 3 of the License, or (at\n;  your option) any later version.\n\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\n; mp_limb_t mpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                       rdi        rsi        rdx        rcx\n;  rax                       rcx        rdx         r8         r9\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n\tTEXT\n\n\txalign  16\n\tWIN64_GCC_PROC mpn_submul_1c, 4\n\tlea     rbx, [rdx]\n\tneg     rbx\n\tmov     rax, [rsi]\n\tmov     r10, [rdi]\n\tlea     rdi, [rdi+rdx*8-16]\n\tlea     rsi, [rsi+rdx*8]\n\tmul     rcx\n\tadd     rax, r8\n\tadc     rdx, 0\n\tbt      ebx, 0\n\tjc      .1\n\tlea     r11, [rax]\n\tmov     rax, [rsi+rbx*8+8]\n\tlea     rbp, [rdx]\n\tmul     rcx\n\tadd     rbx, 2\n\tjns     .5\n\tlea     r8, [rax]\n\tmov     rax, [rsi+rbx*8]\n\tlea     r9, [rdx]\n\tjmp     .3\n.1:\tadd     rbx, 1\n\tjns     .6\n\tlea     r8, [rax]\n\tmov     rax, [rsi+rbx*8]\n\tlea     r9, [rdx]\n\tmul     rcx\n\tlea     r11, [rax]\n\tmov     rax, [rsi+rbx*8+8]\n\tlea     rbp, [rdx]\n\tjmp     .4\n\n\txalign  16\n.2:\tmul     rcx\n\tsub     r10, r8\n\tlea     r8, [rax]\n\tmov     rax, [rsi+rbx*8]\n\tadc     r11, r9\n\tmov     [rdi+rbx*8-8], r10\n\tmov     r10, [rdi+rbx*8]\n\tlea     r9, [rdx]\n\tadc     rbp, 0\n.3:\tmul     rcx\n\tsub     r10, r11\n\tlea     r11, [rax]\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r8, rbp\n\tmov     [rdi+rbx*8], r10\n\tmov     r10, [rdi+rbx*8+8]\n\tlea     rbp, [rdx]\n\tadc     r9, 0\n.4:\tadd     rbx, 2\n\tjs      .2\n\tmul     rcx\n\tsub     r10, r8\n\tadc     r11, r9\n\tmov     [rdi-8], r10\n\tadc     rbp, 0\n.5:\tmov     r10, [rdi]\n\tsub     r10, r11\n\tadc     rax, rbp\n\tmov     [rdi], r10\n\tadc     rdx, 0\n.6:\tmov     r10, [rdi+8]\n\tsub     r10, rax\n\tmov     [rdi+8], r10\n\tmov     eax, ebx\n\tadc     rax, rdx\n\tWIN64_GCC_END\n\n\txalign  16\n\tWIN64_GCC_PROC mpn_submul_1, 4\n\tlea     rbx, [rdx]\n\tneg     rbx\n\tmov     rax, [rsi]\n\tmov     r10, [rdi]\n\tlea     rdi, [rdi+rdx*8-16]\n\tlea     rsi, [rsi+rdx*8]\n\tmul     rcx\n\tbt      ebx, 0\n\tjc      .1\n\tlea     r11, [rax]\n\tmov     rax, [rsi+rbx*8+8]\n\tlea     rbp, [rdx]\n\tmul     rcx\n\tadd     rbx, 2\n\tjns     .5\n\tlea     r8, [rax]\n\tmov     rax, [rsi+rbx*8]\n\tlea     r9, [rdx]\n\tjmp     .3\n.1:\tadd     rbx, 1\n\tjns     .6\n\tlea     r8, [rax]\n\tmov     rax, [rsi+rbx*8]\n\tlea     r9, [rdx]\n\tmul     rcx\n\tlea     r11, [rax]\n\tmov     rax, [rsi+rbx*8+8]\n\tlea     rbp, [rdx]\n\tjmp     .4\n\n\txalign  16\n.2:\tmul     rcx\n\tsub     r10, r8\n\tlea     r8, [rax]\n\tmov     rax, [rsi+rbx*8]\n\tadc     r11, r9\n\tmov     [rdi+rbx*8-8], r10\n\tmov     r10, [rdi+rbx*8]\n\tlea     r9, [rdx]\n\tadc     rbp, 0\n.3:\tmul     rcx\n\tsub     r10, r11\n\tlea     r11, [rax]\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r8, rbp\n\tmov     [rdi+rbx*8], r10\n\tmov     r10, [rdi+rbx*8+8]\n\tlea     rbp, [rdx]\n\tadc     r9, 0\n.4:\tadd     rbx, 2\n\tjs      .2\n\tmul     rcx\n\tsub     r10, r8\n\tadc     r11, r9\n\tmov     [rdi-8], r10\n\tadc     rbp, 0\n.5:\tmov     r10, [rdi]\n\tsub     r10, r11\n\tadc     rax, rbp\n\tmov     [rdi], r10\n\tadc     rdx, 0\n.6:\tmov     r10, [rdi+8]\n\tsub     r10, rax\n\tmov     [rdi+8], r10\n\tmov     eax, ebx\n\tadc     rax, rdx\n\tWIN64_GCC_END\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/core2/sumdiff_n.asm",
    "content": "; PROLOGUE(mpn_sumdiff_n)\n;\n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;        \n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;        \n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_sumdiff_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_size_t)\n;  rax                        rdi     rsi     rdx     rcx          r8\n;  rax                        rcx     rdx      r8      r9    [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n        CPU  Nehalem\n        BITS 64\n\n        FRAME_PROC mpn_sumdiff_n, 0, reg_save_list\n        mov     rbx, [rsp+stack_use+40]\n        xor     rax, rax\n        mov     r10d, 3\n        lea     rdi, [rcx+rbx*8-24]\n        lea     rsi, [rdx+rbx*8-24]\n        sub     r10, rbx\n        lea     rdx, [r8+rbx*8-24]\n        lea     rcx, [r9+rbx*8-24]\n        mov     r9, rax\n        jnc     .2\n\n        align   16\n.1:     sahf    \n        mov     r8, [rdx+r10*8]\n        mov     r11, r8\n        adc     r8, [rcx+r10*8]\n        mov     rbx, [rdx+r10*8+8]\n        mov     r13, rbx\n        adc     rbx, [rcx+r10*8+8]\n        mov     r12, [rdx+r10*8+24]\n        mov     rbp, [rdx+r10*8+16]\n        mov     r15, r12\n        mov     r14, rbp\n        adc     rbp, [rcx+r10*8+16]\n        adc     r12, [rcx+r10*8+24]\n        lahf    \n        add     r9b, 255\n        sbb     r11, [rcx+r10*8]\n        mov     [rdi+r10*8], r8\n        sbb     r13, [rcx+r10*8+8]\n        sbb     r14, [rcx+r10*8+16]\n        mov     [rdi+r10*8+8], rbx\n        mov     [rdi+r10*8+16], rbp\n        sbb     r15, [rcx+r10*8+24]\n        mov     [rdi+r10*8+24], r12\n        setc    r9b\n        mov     [rsi+r10*8+16], r14\n        mov     [rsi+r10*8+24], r15\n        add     r10, 4\n        mov     [rsi+r10*8+8-32], r13\n        mov     [rsi+r10*8-32], r11\n        jnc     .1\n.2:     \n        cmp     r10, 2\n        jg      .6\n        je      .5\n        jp      .4\n.3:     sahf    \n        mov     r8, [rdx]\n        mov     r11, r8\n        adc     r8, [rcx]\n        mov     rbx, [rdx+8]\n        mov     r13, rbx\n        adc     rbx, [rcx+8]\n        mov     rbp, [rdx+16]\n        mov     r14, rbp\n        adc     rbp, [rcx+16]\n        lahf    \n        add     r9b, 255\n        sbb     r11, [rcx]\n        mov     [rdi], r8\n        sbb     r13, [rcx+8]\n        sbb     r14, [rcx+16]\n        mov     [rdi+8], rbx\n        mov     [rdi+16], rbp\n        setc    r9b\n        mov     [rsi+16], r14\n        mov     [rsi+8], r13\n        mov     [rsi], r11\n        sahf    \n        mov     rax, 0\n        adc     rax, 0\n        add     r9b, 255\n        rcl     rax, 1\n        EXIT_PROC reg_save_list\n\n.4:     sahf    \n        mov     r8, [rdx+8]\n        mov     r11, r8\n        adc     r8, [rcx+8]\n        mov     rbx, [rdx+8+8]\n        mov     r13, rbx\n        adc     rbx, [rcx+8+8]\n        lahf    \n        add     r9b, 255\n        sbb     r11, [rcx+8]\n        mov     [rdi+8], r8\n        sbb     r13, [rcx+8+8]\n        mov     [rdi+8+8], rbx\n        setc    r9b\n        mov     [rsi+8+8], r13\n        mov     [rsi+8], r11\n        sahf    \n        mov     rax, 0\n        adc     rax, 0\n        add     r9b, 255\n        rcl     rax, 1\n        EXIT_PROC reg_save_list\n\n.5:     sahf    \n        mov     r8, [rdx+16]\n        mov     r11, r8\n        adc     r8, [rcx+16]\n        lahf    \n        add     r9b, 255\n        sbb     r11, [rcx+16]\n        mov     [rdi+16], r8\n        setc    r9b\n        mov     [rsi+16], r11\n.6:     sahf    \n        mov     rax, 0\n        adc     rax, 0\n        add     r9b, 255\n        rcl     rax, 1\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/core2/udiv_qrnnd.asm",
    "content": "; PROLOGUE(mpn_udiv_qrnnd)\n\n;  Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_udiv_qrnnd(mp_ptr, mp_limb_t, mp_limb_t, mp_limb_t)\n;  rax                         rdi        rsi        rdx        rcx\n;  rax                         rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    BITS 64\n\n    LEAF_PROC mpn_udiv_qrnnd\n    mov     rax,r8\n    div     r9\n    mov     [rcx],rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/umul_ppmm.asm",
    "content": "; PROLOGUE(mpn_umul_ppmm)\n\n;  Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_umul_ppmm(mp_ptr, mp_limb_t, mp_limb_t)\n;  rax                        rdi        rsi        rdx\n;  rax                        rcx        rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    BITS 64\n\n    LEAF_PROC mpn_umul_ppmm\n    mov     rax,rdx\n    mul     r8\n    mov     [rcx],rax\n    mov     rax,rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/xnor_n.asm",
    "content": "; PROLOGUE(mpn_xnor_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_xnor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_xnor_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    xor     r10, [rdx+r9*8+24]\n    xor     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    xor     T3, [rdx+r9*8+8]\n    xor     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1       ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1       ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/core2/xor_n.asm",
    "content": "; PROLOGUE(mpn_xor_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_xor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rdi        rsi        rdx        rcx\n;                    rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_xor_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    or      r10, r10\n    or      r11, r11\n    xor     r10, [rdx+r9*8+24]\n    xor     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    or      T3, T3\n    or      T4, T4\n    xor     T3, [rdx+r9*8+8]\n    xor     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1       ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1       ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/divexact_1.asm",
    "content": "; PROLOGUE(mpn_divexact_1)\n\n;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n;\n;  Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n; since the inverse takes a while to setup,plain division is used for small\n; Multiplying works out faster for size>=3 when the divisor is odd or size>=4\n; when the divisor is even.\n;\n;  void mpn_divexact_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;                         rdi     rsi        rdx        rcx\n;                         rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list       rsi, rdi\n\n    BITS 64\n\n    extern  __gmp_modlimb_invert_table\n\n    LEAF_PROC mpn_divexact_1\n    mov     r10, rdx\n    mov     rax, r9\n    and     rax, byte 1\n    add     rax, r8\n    cmp     rax, byte 4\n    jae     .2\n    xor     rdx,rdx\n.1: mov     rax, [r10+r8*8-8]\n    div     r9\n    mov     [rcx+r8*8-8], rax\n    sub     r8, 1\n    jnz     .1\n    ret                     ; avoid single byte return\n\n.2:\tFRAME_PROC ?mpn_divexact, 0, reg_save_list\n    mov     rsi, rdx        ; src pointer\n    mov     rdi, rcx        ; dst pointer\n    bsf     rcx, r9         ; remove powers of two\n    shr     r9, cl\n    mov     rax, r9\n    shr     rax, 1\n    and     rax, 127\n    lea     rdx, [rel __gmp_modlimb_invert_table]\n    movzx   rax, byte [rdx+rax]\n\n; If f(x) = 0, then x[n+1] = x[n] - f(x) / f'(x) is Newton's iteration for a\n; root. With f(x) = 1/x - v we obtain x[n + 1] = 2 * x[n] - v * x[n] * x[n]\n; as an iteration for x = 1 / v.  This provides quadratic convergence so\n; that the number of bits of precision doubles on each iteration.  The\n; iteration starts with 8-bit precision.\n\n    lea     edx, [rax+rax]\n    imul    eax, eax\n    imul    eax, r9d\n    sub     edx, eax            ; inv -> rdx (16-bit approx)\n\n    lea     eax, [rdx+rdx]\n    imul    edx, edx\n    imul    edx, r9d\n    sub     eax, edx            ; inv -> rdx (32-bit approx)\n\n    lea     rdx, [rax+rax]\n    imul    rax, rax\n    imul    rax, r9\n    sub     rdx, rax            ; inv -> rdx (64-bit approx)\n\n    lea     rsi, [rsi+r8*8]\n    lea     rdi, [rdi+r8*8]\n    neg     r8\n\n    mov     r10, rdx            ; inverse multiplier -> r10\n    xor     r11, r11\n    mov     rax, [rsi+r8*8]\n    or      rcx, rcx\n    mov     rdx, [rsi+r8*8+8]\n    jz      .4                  ; if divisor is odd\n    shrd    rax, rdx, cl\n    add     r8, 1\n    jmp     .6\n\n    xalign  16\n.3: mul     r9                  ; divisor is odd\n    mov     rax, [rsi+r8*8]\n    sub     rdx, r11\n    sub     rax, rdx\n    sbb     r11, r11\n.4: imul    rax, r10\n    mov     [rdi+r8*8], rax\n    add     r8, 1\n    jnz     .3\n    jmp     .7\n\n    xalign  16\n.5: mul     r9                  ; divisor is even\n    sub     rdx, r11\n    mov     rax, [rsi+r8*8-8]\n    mov     r11, [rsi+r8*8]\n    shrd    rax, r11, cl\n    sub     rax, rdx\n    sbb     r11, r11\n.6: imul    rax, r10\n    mov     [rdi+r8*8-8],rax\n    add     r8, 1\n    jnz     .5\n    mul     r9\n    mov     rax, [rsi-8]\n    sub     rdx, r11\n    shr     rax, cl\n    sub     rax, rdx\n    imul    rax, r10\n    mov     [rdi-8], rax\n.7: END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/divexact_by3c.asm",
    "content": "; PROLOGUE(mpn_divexact_by3c)\n\n; Version 1.0.4.\n;\n;  Copyright 2008 Jason Moxham and Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divexact_by3c(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t  mpn_divexact_by3(mp_ptr, mp_ptr, mp_size_t)\n;  rax                            rdi     rsi        rdx        rcx\n;  rax                            rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    BITS 64\n\n%define MLT1 0x5555555555555555\n\n    xalign  16\n    LEAF_PROC mpn_divexact_by3c\n    mov     rax, r8\n    mov     r8, MLT1\n    imul    r9, r8\n    jmp     entry\n\n    xalign  16\n    LEAF_PROC mpn_divexact_by3\n    mov     rax, r8\n    mov     r8, MLT1\n    xor     r9, r9\n\nentry:\n    lea     r10, [rdx+rax*8-24]\n    lea     r11, [rcx+rax*8-24]\n    mov     ecx, 3\n    sub     rcx, rax\n    jnc     .2\n\n    xalign  16\n.1: mov     rax, [r10+rcx*8]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8], r9\n    sbb     r9, rdx\n    mov     rax, [r10+rcx*8+8]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8+8], r9\n    sbb     r9, rdx\n    mov     rax, [r10+rcx*8+16]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8+16], r9\n    sbb     r9, rdx\n    mov     rax, [r10+rcx*8+24]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8+24], r9\n    sbb     r9, rdx\n    add     rcx, 4\n    jnc     .1\n.2: test    rcx, 2\n    jnz     .3\n    mov     rax, [r10+rcx*8]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8], r9\n    sbb     r9, rdx\n    mov     rax, [r10+rcx*8+8]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8+8], r9\n    sbb     r9, rdx\n    add     rcx, 2\n.3: test    rcx, 1\n    jnz     .4\n    mov     rax, [r10+rcx*8]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8], r9\n    sbb     r9, rdx\n.4:\tlea     rax, [r9+r9*2]\n    neg     rax\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/divexact_byfobm1.asm",
    "content": "; PROLOGUE(mpn_divexact_byfobm1)\n\n;  Copyright 2008 Jason Moxham and Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later veSRCon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  rcx[r8] = rdx[r8] / r9     where [rsp+40] must be set to (B - 1) / r9 on entry \n;\n;  mp_limb_t mpn_divexact_byfobm1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t)\n;  rax                             rdi     rsi        rdx        rcx         r8  \n;  rax                             rcx     rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define CARRY_OUT\n\n    BITS 64\n\n    LEAF_PROC mpn_divexact_byfobm1\n    mov     rax, r8\n    mov     r8, [rsp+40]\n    \n%ifdef CARRY_OUT\n    mov     [rsp+32], r9\n%endif\n\n    lea     r10, [rdx+rax*8-24]\n    lea     r11, [rcx+rax*8-24]\n    mov     ecx, 3\n\n%ifdef CARRY_IN\n\tmov\t\tr9, [rsp+48]    ; r9 is the carry in\n    imul    r9, r8          ; this is needed if we have non-zero carry in\n%else\n    mov     r9, 0\n%endif\n\n    sub     rcx, rax\n    jnc     .2\n\n    xalign  16\n.1: mov     rax, [r10+rcx*8]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8], r9\n    sbb     r9, rdx\n    mov     rax, [r10+rcx*8+8]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8+8], r9\n    sbb     r9, rdx\n    mov     rax, [r10+rcx*8+16]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8+16], r9\n    sbb     r9, rdx\n    mov     rax, [r10+rcx*8+24]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8+24], r9\n    sbb     r9, rdx\n    add     rcx, 4\n    jnc     .1\n.2: test    rcx, 2\n    jnz     .3\n    mov     rax, [r10+rcx*8]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8], r9\n    sbb     r9, rdx\n    mov     rax, [r10+rcx*8+8]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8+8], r9\n    sbb     r9, rdx\n    add     rcx, 2\n.3: test    rcx, 1\n    jnz     .4\n    mov     rax, [r10+rcx*8]\n    mul     r8\n    sub     r9, rax\n    mov     [r11+rcx*8], r9\n    sbb     r9, rdx\n.4: \n\n%ifdef CARRY_OUT\n    imul    r9, [rsp+32]\n%endif\n\n    mov     rax, r9\n    neg     rax\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/divrem_2.asm",
    "content": "; PROLOGUE(mpn_divrem_2)\n\n;  x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.\n\n;  Copyright 2007, 2008 Free Software Foundation, Inc.\n\n;  Copyright Brian Gladman 2010 (Conversion to yasm format)\n\n;  This file is part of the GNU MP Library.\n\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 3 of the License, or (at\n;  your option) any later version.\n\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n\n;  mp_limb_t mpn_divrem_2(mp_ptr, mp_size_t, mp_ptr, mp_size_t,  mp_ptr)\n;  rax                       rdi        rsi     rdx        rcx       r8\n;  rax                       rcx        rdx      r8         r9 [rsp+40]\n\n%include 'yasm_mac.inc'\n\n    BITS 64\n\tTEXT\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n    xalign  16\n\tWIN64_GCC_PROC mpn_divrem_2, 5, frame\n\tlea     rax, [rdx+rcx*8]\n\tmov     r13, rsi\n\tlea     r12, [rax-24]\n\tmov     rbp, rdi\n\tmov     r11, [r8+8]\n\tmov     r9, [rax-8]\n\tmov     r8, [r8]\n\tmov     r10, [rax-16]\n\txor     r15d, r15d\n\tcmp     r11, r9\n\tja      .1\n\tsetb    dl\n\tcmp     r8, r10\n\tsetbe   al\n\tor      dl, al\n\tjne     .10\n.1: lea     rbx, [rcx+r13-3]\n\ttest    rbx, rbx\n\tjs      .14\n\tmov     rdx, r11\n\tmov     rax, -1\n\tnot     rdx\n\tdiv     r11\n\tmov     rdx, r11\n\tmov     rdi, rax\n\timul    rdx, rax\n\tmov     r14, rdx\n\tmul     r8\n\tmov     rcx, rdx\n\tmov     rdx, -1\n\tadd     r14, r8\n\tadc     rdx, 0\n\tadd     r14, rcx\n\tadc     rdx, 0\n\tjs      .3\n.2: dec     rdi\n\tsub     r14, r11\n\tsbb     rdx, 0\n\tjns     .2\n.3:\n\n%ifdef  NEW\n\n\tlea     rbp, [rbp+rbx*8]\n\tmov     rcx, rbx\n\tmov     rbx, r9\n\tmov     r9, rdi\n\tmov     r14, r10\n\tmov     rsi, r11\n\tneg     rsi\n\t\n\txalign  16\n.4: mov     rax, r9\n\tmul     rbx\n\tadd     rax, r14\n\tmov     r10, rax\n\tadc     rdx, rbx\n\tmov     rdi, rdx\n\timul    rdx, rsi\n\tmov     rax, r8\n\tlea     rbx, [rdx+r14]\n\tmul     rdi\n\txor     r14d, r14d\n\tcmp     r13, rcx\n\tjg      .5\n\tmov     r14, [r12]\n\tsub     r12, 8\n.5: sub     r14, r8\n\tsbb     rbx, r11\n\tsub     r14, rax\n\tsbb     rbx, rdx\n\tinc     rdi\n\txor     edx, edx\n\tcmp     rbx, r10\n\tmov     rax, r8\n\tadc     rdx, -1\n\tadd     rdi, rdx\n\tand     rax, rdx\n\tand     rdx, r11\n\tadd     r14, rax\n\tadc     rbx, rdx\n\tcmp     rbx, r11\n\tjae     .11\n.6:\tmov     [rbp], rdi\n\tsub     rbp, 8\n\tdec     rcx\n\tjns     .4\n\n\tmov     r10, r14\n\tmov     r9, rbx\n\n%else\n\n\tlea     rbp, [rbp+rbx*8]\n\tmov     rcx, rbx\n\tmov     rax, r9\n\tmov     rsi, r10\n\t\n\txalign  16\n.7: mov     r14, rax\n\tmul     rdi\n\tmov     r9, r11\n\tadd     rax, rsi\n\tmov     rbx, rax\n\tadc     rdx, r14\n\tlea     r10, [rdx+1]\n\tmov     rax, rdx\n\timul    r9, rdx\n\tsub     rsi, r9\n\txor     r9d, r9d\n\tmul     r8\n\tcmp     r13, rcx\n\tjg      .8\n\tmov     r9, [r12]\n\tsub     r12, 8\n.8: sub     r9, r8\n\tsbb     rsi, r11\n\tsub     r9, rax\n\tsbb     rsi, rdx\n\tcmp     rsi, rbx\n\tsbb     rax, rax\n\tnot     rax\n\tadd     r10, rax\n\tmov     rbx, r8\n\tand     rbx, rax\n\tand     rax, r11\n\tadd     r9, rbx\n\tadc     rax, rsi\n\tcmp     r11, rax\n\tjbe     .12\n.9: mov     [rbp], r10\n\tsub     rbp, 8\n\tmov     rsi, r9\n\tdec     rcx\n\tjns     .7\n\n\tmov     r10, rsi\n\tmov     r9, rax\n\n%endif\n\n    jmp     .14\n\n.10:inc     r15d\n\tsub     r10, r8\n\tsbb     r9, r11\n\tjmp     .1\n\n%ifdef  NEW\n\n.11:seta    dl\n\tcmp     r14, r8\n\tsetae   al\n\torb     al, dl\n\tje      .6\n\tinc     rdi\n\tsub     r14, r8\n\tsbb     rbx, r11\n\tjmp     .6\n\n%else\n\n.12:jb      .13\n\tcmp     r9, r8\n\tjb      .9\n.13:inc     r10\n\tsub     r9, r8\n\tsbb     rax, r11\n\tjmp     .9\n\t\n%endif\n\n.14:mov     [r12+8], r10\n\tmov     [r12+16], r9\n\tmov     rax, r15\n\tWIN64_GCC_END\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/divrem_euclidean_qr_1.asm",
    "content": "; PROLOGUE(mpn_divrem_euclidean_qr_1)\n\n;  x86-64 mpn_divrem_euclidean_qr_1 -- mpn by limb division.\n\n;  Copyright 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.\n\n;  Copyright Brian Gladman 2010 (Conversion to yasm format)\n\n;  This file is part of the GNU MP Library.\n\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 3 of the License, or (at\n;  your option) any later version.\n\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n\n;\tmp_limb_t mpn_divrem_euclidean_qr_1(mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_limb_t)\n;   rax                                    rdi        rsi     rdx        rcx         r8\n;   rax                                    rcx        rdx      r8         r9   [rsp+40]\n\n;\tmp_limb_t mpn_preinv_divrem_1(mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t,     int)\n;   rax                              rdi        rsi     rdx        rcx         r8         r9   8(rsp)\n;   rax                              rcx        rdx      r8         r9   [rsp+40]   [rsp+48] [rsp+56]\n\n%include 'yasm_mac.inc'\n\n    BITS 64\n\tTEXT\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13\n\n%define SPECIAL_CODE_FOR_NORMALIZED_DIVISOR\n\n\txalign  16\n\tWIN64_GCC_PROC mpn_divrem_euclidean_qr_1, 5, frame\n\txor     eax, eax\n\tmov     r12, rsi\n\tmov     rbx, rcx\n\tadd     rcx, rsi\n\tmov     rsi, rdx\n\tje      .17\n\n\tlea     rdi, [rdi+rcx*8-8]\n\txor     ebp, ebp\n\n%ifdef  SPECIAL_CODE_FOR_NORMALIZED_DIVISOR\n\n\ttest    r8, r8\n\tjns     .6\n\n\ttest    rbx, rbx\n\tje      .1\n\tmov     rbp, [rsi+rbx*8-8]\n\tdec     rbx\n\tmov     rax, rbp\n\tsub     rbp, r8\n\tcmovb   rbp, rax\n\tsbb     eax, eax\n\tinc     eax\n\tmov     [rdi], rax\n\tlea     rdi, [rdi-8]\n.1: mov     rdx, r8\n\tmov     rax, -1\n\tnot     rdx\n\tdiv     r8\n\tmov     r9, rax\n\tmov     rax, rbp\n\tjmp     .4\n\n\txalign  16\n.2: mov     r10, [rsi+rbx*8]\n\tlea     rbp, [rax+1]\n\tmul     r9\n\tadd     rax, r10\n\tadc     rdx, rbp\n\tmov     rbp, rax\n\tmov     r13, rdx\n\timul    rdx, r8\n\tsub     r10, rdx\n\tmov     rax, r8\n\tadd     rax, r10\n\tcmp     r10, rbp\n\tcmovb   rax, r10\n\tadc     r13, -1\n\tcmp     rax, r8\n\tjae     .5\n.3: mov     [rdi], r13\n\tsub     rdi, 8\n.4: dec     rbx\n\tjns     .2\n\txor     ecx, ecx\n\tjmp     .14\n.5: sub     rax, r8\n\tinc     r13\n\tjmp     .3\n\t\n%endif\n\n.6: test    rbx, rbx\n\tje      .7\n\tmov     rax, [rsi+rbx*8-8]\n\tcmp     rax, r8\n\tjae     .7\n\tmov     [rdi], rbp\n\tmov     rbp, rax\n\tlea     rdi, [rdi-8]\n\tje      .17\n\tdec     rbx\n.7: bsr     rcx, r8\n\tnot     ecx\n\tsal     r8, cl\n\tsal     rbp, cl\n\tmov     rdx, r8\n\tmov     rax, -1\n\tnot     rdx\n\tdiv     r8\n\ttest    rbx, rbx\n\tmov     r9, rax\n\tmov     rax, rbp\n\tje      .14\n\tmov     rbp, [rsi+rbx*8-8]\n\tshr     rax, cl\n\tshld    rax, rbp, cl\n\tsub     rbx, 2\n\tjs      .10\n\n\txalign  16\n.8: nop\n\tmov     r10, [rsi+rbx*8]\n\tlea     r11, [rax+1]\n\tshld    rbp, r10, cl\n\tmul     r9\n\tadd     rax, rbp\n\tadc     rdx, r11\n\tmov     r11, rax\n\tmov     r13, rdx\n\timul    rdx, r8\n\tsub     rbp, rdx\n\tmov     rax, r8\n\tadd     rax, rbp\n\tcmp     rbp, r11\n\tcmovb   rax, rbp\n\tadc     r13, -1\n\tcmp     rax, r8\n\tjae     .12\n.9: mov     [rdi], r13\n\tsub     rdi, 8\n\tdec     rbx\n\tmov     rbp, r10\n\tjns     .8\n.10:lea     r11, [rax+1]\n\tsal     rbp, cl\n\tmul     r9\n\tadd     rax, rbp\n\tadc     rdx, r11\n\tmov     r11, rax\n\tmov     r13, rdx\n\timul    rdx, r8\n\tsub     rbp, rdx\n\tmov     rax, r8\n\tadd     rax, rbp\n\tcmp     rbp, r11\n\tcmovb   rax, rbp\n\tadc     r13, -1\n\tcmp     rax, r8\n\tjae     .13\n.11:mov     [rdi], r13\n\tsub     rdi, 8\n\tjmp     .14\n.12:sub     rax, r8\n\tinc     r13\n\tjmp     .9\n.13:sub     rax, r8\n\tinc     r13\n\tjmp     .11\n.14:mov     rbp, r8\n\tneg     rbp\n\tjmp     .16\n\n\txalign  16\n.15:lea     r11, [rax+1]\n\tmul     r9\n\tadd     rdx, r11\n\tmov     r11, rax\n\tmov     r13, rdx\n\timul    rdx, rbp\n\tmov     rax, r8\n\tadd     rax, rdx\n\tcmp     rdx, r11\n\tcmovb   rax, rdx\n\tadc     r13, -1\n\tmov     [rdi], r13\n\tsub     rdi, 8\n.16:dec     r12\n\tjns     .15\n\tshr     rax, cl\n.17:WIN64_GCC_END\n\n%ifndef EXCLUDE_PREINV\n\n\txalign  16\n\tWIN64_GCC_PROC mpn_preinv_divrem_1, 7, frame\n\txor     eax, eax\n\tmov     r12, rsi\n\tmov     rbx, rcx\n\tadd     rcx, rsi\n\tmov     rsi, rdx\n\tlea     rdi, [rdi+rcx*8-8]\n\n\ttest    r8, r8\n\tjs      .3\n\tmov     cl, [rsp+stack_use+0x38]\n\tshl     r8, cl\n\tjmp     .7\n\t\n%ifdef  SPECIAL_CODE_FOR_NORMALIZED_DIVISOR\n\n\txalign  16\n.1: mov     r10, [rsi+rbx*8]\n\tlea     rbp, [rax+1]\n\tmul     r9\n\tadd     rax, r10\n\tadc     rdx, rbp\n\tmov     rbp, rax\n\tmov     r13, rdx\n\timul    rdx, r8\n\tsub     r10, rdx\n\tmov     rax, r8\n\tadd     rax, r10\n\tcmp     r10, rbp\n\tcmovb   rax, r10\n\tadc     r13, -1\n\tcmp     rax, r8\n\tjae     .4\n.2: mov     [rdi], r13\n\tsub     rdi, 8\n.3: dec     rbx\n\tjns     .1\n\txor     ecx, ecx\n\tjmp     .14\n.4: sub     rax, r8\n\tinc     r13\n\tjmp     .2\n\t\n%endif\n\n.5: test    rbx, rbx\n\tje      .6\n\tmov     rax, [rsi+rbx*8-8]\n\tcmp     rax, r8\n\tjae     .6\n\tmov     [rdi], rbp\n\tmov     rbp, rax\n\tlea     rdi, [rdi-8]\n\tje      .17\n\tdec     rbx\n.6: bsr     rcx, r8\n\tnot     ecx\n\tsal     r8, cl\n\tsal     rbp, cl\n\tmov     rdx, r8\n\tmov     rax, -1\n\tnot     rdx\n\tdiv     r8\n\ttest    rbx, rbx\n\tmov     r9, rax\n\tmov     rax, rbp\n\tje      .14\n.7: mov     rbp, [rsi+rbx*8-8]\n\tshr     rax, cl\n\tshld    rax, rbp, cl\n\tsub     rbx, 2\n\tjs      .10\n\n\txalign  16\n.8: nop\n\tmov     r10, [rsi+rbx*8]\n\tlea     r11, [rax+1]\n\tshld    rbp, r10, cl\n\tmul     r9\n\tadd     rax, rbp\n\tadc     rdx, r11\n\tmov     r11, rax\n\tmov     r13, rdx\n\timul    rdx, r8\n\tsub     rbp, rdx\n\tmov     rax, r8\n\tadd     rax, rbp\n\tcmp     rbp, r11\n\tcmovb   rax, rbp\n\tadc     r13, -1\n\tcmp     rax, r8\n\tjae     .12\n.9: mov     [rdi], r13\n\tsub     rdi, 8\n\tdec     rbx\n\tmov     rbp, r10\n\tjns     .8\n.10:lea     r11, [rax+1]\n\tsal     rbp, cl\n\tmul     r9\n\tadd     rax, rbp\n\tadc     rdx, r11\n\tmov     r11, rax\n\tmov     r13, rdx\n\timul    rdx, r8\n\tsub     rbp, rdx\n\tmov     rax, r8\n\tadd     rax, rbp\n\tcmp     rbp, r11\n\tcmovb   rax, rbp\n\tadc     r13, -1\n\tcmp     rax, r8\n\tjae     .13\n.11:mov     [rdi], r13\n\tsub     rdi, 8\n\tjmp     .14\n.12:sub     rax, r8\n\tinc     r13\n\tjmp     .9\n.13:sub     rax, r8\n\tinc     r13\n\tjmp     .11\n.14:mov     rbp, r8\n\tneg     rbp\n\tjmp     .16\n\n\txalign  16\n.15:lea     r11, [rax+1]\n\tmul     r9\n\tadd     rdx, r11\n\tmov     r11, rax\n\tmov     r13, rdx\n\timul    rdx, rbp\n\tmov     rax, r8\n\tadd     rax, rdx\n\tcmp     rdx, r11\n\tcmovb   rax, rdx\n\tadc     r13, -1\n\tmov     [rdi], r13\n\tsub     rdi, 8\n.16:dec     r12\n\tjns     .15\n\tshr     rax, cl\n.17:WIN64_GCC_END\n\n%endif\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/divrem_euclidean_qr_2.asm",
    "content": "; PROLOGUE(mpn_divrem_euclidean_qr_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divrem_euclidean_qr_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                                    rdi     rsi        rdx        rcx\n;  rax                                    rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    BITS 64\n\n\tFRAME_PROC mpn_divrem_euclidean_qr_2, 0, reg_save_list\n\tmov     rdi, rcx\n\tmov     rsi, rdx\n\tmov     rdx, r8\n    mov     rcx, r9\n\n\tmov     rbx, rdx\n\tdec     rbx\n\tmov     r9, 0\n\tmov     rdx, [rcx+8]\n\tnot     rdx\n\tmov     rax, -1\n\tdiv     qword[rcx+8]\n\tmov     rbp, rax\n\tmov     r8, [rsi+rbx*8]\n\tdec     rbx\n\tmov     r10, [rsi+rbx*8]\n\tcmp     r8, [rcx+8]\n\tjae     .1\n\tmov     rdx, r8\n\tmov     r9, r8\n\tmov     r10, [rsi+rbx*8]\n\tmov     r8, r10\n\tmov     r14, 0\n\tjmp     .3\n.1: mov     r14, 1\n\tmov     rdx, r8\n\tsub     rdx, [rcx+8]\n\tsub     r10, [rcx]\n\tsbb     rdx, 0\n\tjnc     .2\n\tdec     r14\n\tadd     r10, [rcx]\n\tadc     rdx, [rcx+8]\n.2: mov     r9, rdx\n\tmov     r8, r10\n.3: dec     rbx\n\tmov     rax, r9\n\tjs      .8\n\n\txalign  16\n.4: cmp     rax, [rcx+8]\n\tjne     .6\n\tmov     r10, [rsi+rbx*8]\n\tmov     r15, -1\n\tmov     rdx, r8\n\tadd     r10, [rcx]\n\tadc     rdx, rax\n\tsbb     rax, rax\n\tsub     rdx, [rcx]\n\tadc     rax, 0\n\tjz      .5\n\tdec     r15\n\tadd     r10, [rcx]\n\tadc     rdx, [rcx+8]\n.5: mov     rax, rdx\n\tmov     r8, r10\n\tjmp     .7\n.6:\tmov     r9, rax\n\tbt      r8, 63\n\tadc     rax, 0\n\tmov     r15, r8\n\tmov     r13, 0\n\tsar     r15, 63\n\tand     r15, [rcx+8]\n\tadd     r15, r8\n\tmul     rbp\n\tadd     rax, r15\n\tmov     rax, [rcx]\n\tadc     rdx, r9\n\tmov     r10, rdx\n\tmul     rdx\n\tmov     r12, [rcx]\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, [rcx+8]\n\tnot     r10\n\tmul     r10\n\tsub     r9, [rcx+8]\n\tadd     rax, r8\n\tadc     rdx, r9\n\tmov     r9, rdx\n\tand     r9, [rcx+8]\n\t; swapped r9,rax from here\n\tadd     rax, r9\n\tmov     r15, rdx\n\tsub     r15, r10\n\tand     rdx, [rcx]\n\tsub     r12, rdx\n\tsbb     r13, 0\n\tmov     r8, [rsi+rbx*8]\n\tsub     r8, r12\n\tsbb     rax, r13\n\tmov     r10, [rcx]\n\tmov     r11, [rcx+8]\n\tsbb     r9, r9\n\tand     r10, r9\n\tand     r11, r9\n\tadd     r15, r9\n\tadd     r8, r10\n\tadc     rax, r11\n\tadc     r9, 0\n\tand     r10, r9\n\tand     r11, r9\n\tadd     r15, r9\n\tadd     r8, r10\n\tadc     rax, r11\n.7: mov     [rdi+rbx*8], r15\n\tdec     rbx\n\tjns     .4\n.8: mov     [rsi+8], rax\n\tmov     [rsi], r8\n\tmov     rax, r14\n    END_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/fat/add_n.c",
    "content": "/* Fat binary fallback mpn_add_n\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/add_n.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/addmul_1.c",
    "content": "/* Fat binary fallback mpn_addmul_1\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/addmul_1.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/copyd.c",
    "content": "/* Fat binary fallback mpn_copyd\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\tmpn_copyd(mp_ptr rp,mp_srcptr sp,mp_size_t n)\n{MPN_COPY_DECR(rp,sp,n);return;}\n"
  },
  {
    "path": "mpn/x86_64w/fat/copyi.c",
    "content": "/* Fat binary fallback mpn_copyi\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\tmpn_copyi(mp_ptr rp,mp_srcptr sp,mp_size_t n)\n{MPN_COPY_INCR(rp,sp,n);return;}\n"
  },
  {
    "path": "mpn/x86_64w/fat/divexact_1.c",
    "content": "/* Fat binary fallback mpn_divexact_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divexact_1.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/divexact_by3c.c",
    "content": "/* Fat binary fallback mpn_diveby3.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divexact_by3c.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/divexact_byfobm1.c",
    "content": "/* Fat binary fallback mpn_divexact_fobm1\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divexact_byfobm1.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/divrem_1.c",
    "content": "/* Fat binary fallback mpn_divrem_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divrem_1.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/divrem_2.c",
    "content": "/* Fat binary fallback mpn_divrem_2.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divrem_2.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/divrem_euclidean_qr_1.c",
    "content": "/* Fat binary fallback mpn_divrem_euclidean_qr_1\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divrem_euclidean_qr_1.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/divrem_euclidean_qr_2.c",
    "content": "/* Fat binary fallback mpn_divrem_euclidean_qr_2\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/divrem_euclidean_qr_2.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/fat.c",
    "content": "/* x86 fat binary initializers.\n\n   THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.\n   THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR\n   COMPLETELY IN FUTURE GNU MP RELEASES.\n\nCopyright 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>    /* for printf */\n#include <stdlib.h>   /* for getenv */\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* Change this to \"#define TRACE(x) x\" for some traces. */\n#define TRACE(x)\n\n/* fat_entry.asm */\nlong __gmpn_cpuid(char dst[12], int id);\n\nstruct cpuvec_t __gmpn_cpuvec = {\n  __MPN(add_err1_n_init),\n  __MPN(add_err2_n_init),\n  __MPN(add_n_init),\n  __MPN(addmul_1_init),\n  __MPN(copyd_init),\n  __MPN(copyi_init),\n  __MPN(divexact_1_init),\n  __MPN(divexact_by3c_init),\n  __MPN(divexact_byfobm1_init),\n  __MPN(divrem_1_init),\n  __MPN(divrem_2_init),\n  __MPN(divrem_euclidean_qr_1_init),\n  __MPN(divrem_euclidean_qr_2_init),\n  __MPN(gcd_1_init),\n  __MPN(lshift_init),\n  __MPN(mod_1_init),\n  __MPN(mod_34lsub1_init),\n  __MPN(modexact_1c_odd_init),\n  __MPN(mul_1_init),\n  __MPN(mul_basecase_init),\n  __MPN(mulmid_basecase_init),\n  __MPN(preinv_divrem_1_init),\n  __MPN(preinv_mod_1_init),\n  __MPN(redc_1_init),\n  __MPN(rshift_init),\n  __MPN(sqr_basecase_init),\n  __MPN(sub_err1_n_init),\n  __MPN(sub_err2_n_init),\n  __MPN(sub_n_init),\n  __MPN(submul_1_init),\n  __MPN(sumdiff_n_init),\n  0\n};\n\n\n/* The following setups start with generic x86, then overwrite with\n   specifics for a chip, and higher versions of that chip.\n\n   The arrangement of the setups here will normally be the same as the $path\n   selections in configure.in for the respective chips.\n\n   This code is reentrant and thread safe.  We always calculate the same\n   decided_cpuvec, so if two copies of the code are running it doesn't\n   matter which completes first, both write the same to __gmpn_cpuvec.\n\n   We need to go via decided_cpuvec because if one thread has completed\n   __gmpn_cpuvec then it may be making use of the threshold values in that\n   vector.  If another thread is still running __gmpn_cpuvec_init then we\n   don't want it to write different values to those fields since some of the\n   asm routines only operate correctly up to their own defined threshold,\n   not an arbitrary value.  */\n\n#define CONFIG_GUESS            0\n#define CONFIG_GUESS_32BIT      0\n#define CONFIG_GUESS_64BIT      0\n#define FAT32                   0\n#define FAT64                   1\n#define INFAT                   1\n\n#define CPUSETUP_core2\t\tCPUVEC_SETUP_core2\n#define CPUSETUP_penryn\t\tCPUVEC_SETUP_core2;CPUVEC_SETUP_core2_penryn\n#define CPUSETUP_nehalem\tCPUVEC_SETUP_nehalem\n#define CPUSETUP_westmere\tCPUVEC_SETUP_nehalem;CPUVEC_SETUP_nehalem_westmere\n#define CPUSETUP_sandybridge\tCPUVEC_SETUP_sandybridge\n#define CPUSETUP_ivybridge\tCPUVEC_SETUP_sandybridge\n#define CPUSETUP_haswell        CPUVEC_SETUP_haswell\n#define CPUSETUP_haswellavx        CPUVEC_SETUP_haswell;CPUVEC_SETUP_haswell_avx\n#define CPUSETUP_broadwell      CPUVEC_SETUP_haswell;CPUVEC_SETUP_haswell_broadwell\n#define CPUSETUP_skylake        CPUVEC_SETUP_skylake\n#define CPUSETUP_skylakeavx     CPUVEC_SETUP_skylake;CPUVEC_SETUP_skylake_avx\n#define CPUSETUP_atom\t\tCPUVEC_SETUP_atom\n#define CPUSETUP_nano\t\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k8only\n#define CPUSETUP_netburst\tCPUVEC_SETUP_netburst\n#define CPUSETUP_netburstlahf\tCPUVEC_SETUP_netburst;\n#define CPUSETUP_k8\t\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k8only\n#define CPUSETUP_k10\t\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k10\n#define CPUSETUP_k102\t\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k10;CPUVEC_SETUP_k8_k10_k102\n#define CPUSETUP_k103\t\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k10;CPUVEC_SETUP_k8_k10_k102\n#define CPUSETUP_bulldozer\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k10;CPUVEC_SETUP_k8_k10_k102\n#define CPUSETUP_piledriver\tCPUVEC_SETUP_k8;CPUVEC_SETUP_k8_k10;CPUVEC_SETUP_k8_k10_k102\n#define CPUSETUP_bobcat\t\tCPUVEC_SETUP_bobcat\n\n#include \"cpuid.c\"\n\nvoid\n__gmpn_cpuvec_init (void)\n{\n  struct cpuvec_t  decided_cpuvec;\n\n  TRACE (printf (\"__gmpn_cpuvec_init:\\n\"));\n\n  __gmpn_cpu(&decided_cpuvec);\n\n  ASSERT_CPUVEC (decided_cpuvec);\n  CPUVEC_INSTALL (decided_cpuvec);\n\n  /* Set this once the threshold fields are ready.\n     Use volatile to prevent it getting moved.  */\n  ((volatile struct cpuvec_t *) &__gmpn_cpuvec)->initialized = 1;\n}\n"
  },
  {
    "path": "mpn/x86_64w/fat/fat_entry.asm",
    "content": "dnl  x86 fat binary entrypoints.\n\ndnl  Copyright 2003 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\ndnl  Forcibly disable profiling.\ndnl\ndnl  The entrypoints and inits are small enough not to worry about, the real\ndnl  routines arrived at will have any profiling.  Also, the way the code\ndnl  here ends with a jump means we won't work properly with the\ndnl  \"instrument\" profiling scheme anyway.\n\ndefine(`WANT_PROFILING',no)\n\n\n\tTEXT\n\n\ndnl  Usage: FAT_ENTRY(name, offset)\ndnl\ndnl  Emit a fat binary entrypoint function of the given name.  This is the\ndnl  normal entry for applications, eg. __gmpn_add_n.\ndnl\ndnl  The code simply jumps through the function pointer in __gmpn_cpuvec at\ndnl  the given \"offset\" (in bytes).\ndnl\ndnl  For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be\ndnl  fine for all x86s.\ndnl\ndnl  For PIC, the jumps are 20 bytes each, and are best aligned to 16 to\ndnl  ensure at least the first two instructions don't cross a cache line\ndnl  boundary.\ndnl\ndnl  Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE\ndnl  grepping in configure, stopping that code trying to eval something with\ndnl  $1 in it.\n\ndefine(FAT_ENTRY,\nm4_assert_numargs(2)\n`\tALIGN(ifdef(`PIC',16,8))\n`'PROLOGUE($1)\nifdef(`PIC',\n`\tlea\t_GLOBAL_OFFSET_TABLE_(%rip),%r11\n\tmovq\tGSYM_PREFIX`'__gmpn_cpuvec@GOT(%r11), %r11\n\tjmp\t*m4_empty_if_zero($2)(%r11)\n',`dnl non-PIC\n\tjmp\t*GSYM_PREFIX`'__gmpn_cpuvec+$2\n')\nEPILOGUE()\n')\n\n\ndnl  FAT_ENTRY for each CPUVEC_FUNCS_LIST\ndnl\n\ndefine(`CPUVEC_offset',0)\nforeach(i,\n`FAT_ENTRY(MPN(i),CPUVEC_offset)\ndefine(`CPUVEC_offset',eval(CPUVEC_offset + 8))',\nCPUVEC_FUNCS_LIST)\n\n\ndnl  Usage: FAT_INIT(name, offset)\ndnl\ndnl  Emit a fat binary initializer function of the given name.  These\ndnl  functions are the initial values for the pointers in __gmpn_cpuvec.\ndnl\ndnl  The code simply calls __gmpn_cpuvec_init, and then jumps back through\ndnl  the __gmpn_cpuvec pointer, at the given \"offset\" (in bytes).\ndnl  __gmpn_cpuvec_init will have stored the address of the selected\ndnl  implementation there.\ndnl\ndnl  Only one of these routines will be executed, and only once, since after\ndnl  that all the __gmpn_cpuvec pointers go to real routines.  So there's no\ndnl  need for anything special here, just something small and simple.  To\ndnl  keep code size down, \"fat_init\" is a shared bit of code, arrived at\ndnl  with the offset in %al.  %al is used since the movb instruction is 2\ndnl  bytes where %eax would be 4.\ndnl\ndnl  Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the\ndnl  HAVE_NATIVE grepping in configure, preventing that code trying to eval\ndnl  something with $1 in it.\n\ndefine(FAT_INIT,\nm4_assert_numargs(2)\n`PROLOGUE($1)\n\tpushq\t%rax\n    pushq   %rbx\n    pushq   %rsi\n    pushq   %rdi\n    pushq   %rdx\n    pushq   %rcx\n    pushq   %r8\n    pushq   %r9\n    pushq   %rbp\n\t\nifdef(`PIC',`\n\tlea\t_GLOBAL_OFFSET_TABLE_(%rip),%rbx\n\tcall\tGSYM_PREFIX`'__gmpn_cpuvec_init@PLT\n\tmovq\tGSYM_PREFIX`'__gmpn_cpuvec@GOT(%rbx), %r11\n\tpopq    %rbp\n    popq    %r9\n    popq    %r8\n    popq    %rcx\n    popq    %rdx\n    popq    %rdi\n    popq    %rsi\n    popq\t%rbx\n    popq\t%rax\n    jmp\t*m4_empty_if_zero($2)(%r11)\n\n',`dnl non-PIC\n\tcall\tGSYM_PREFIX`'__gmpn_cpuvec_init\n\tpopq    %rbp\n    popq    %r9\n    popq    %r8\n    popq    %rcx\n    popq    %rdx\n    popq    %rdi\n    popq    %rsi\n    popq\t%rbx\n\tpopq\t%rax\n\tjmp\t*GSYM_PREFIX`'__gmpn_cpuvec+$2\n')\nEPILOGUE()\n')\n\n\ndnl  FAT_INIT for each CPUVEC_FUNCS_LIST\ndnl\n\ndefine(`CPUVEC_offset',0)\nforeach(i,\n`FAT_INIT(MPN(i`'_init),CPUVEC_offset)\ndefine(`CPUVEC_offset',eval(CPUVEC_offset + 8))',\nCPUVEC_FUNCS_LIST)\n\n\n\nC long __gmpn_cpuid (char dst[12], int id);\nC\nC This is called only once, so just something simple and compact is fine.\n\ndefframe(PARAM_ID,  8)\ndefframe(PARAM_DST, 4)\ndeflit(`FRAME',0)\n\nPROLOGUE(__gmpn_cpuid)\n\tpushq\t%rbx\t\tFRAME_pushq()\n\tmovq\t%rdx, %rax\n\tmovq\t%rcx, %r8\n\tcpuid\n\tmovl\t%ebx, (%r8)\n\tmovl\t%edx, 4(%r8)\n\tmovl\t%ecx, 8(%r8)\n\tpopq\t%rbx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "mpn/x86_64w/fat/gcd_1.c",
    "content": "/* Fat binary fallback mpn_gcd_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/gcd_1.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/lshift.c",
    "content": "/* Fat binary fallback mpn_lshift\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/lshift.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/mod_1.c",
    "content": "/* Fat binary fallback mpn_mod_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/mod_1.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/mod_34lsub1.c",
    "content": "/* Fat binary fallback mpn_mod_34lsub1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/mod_34lsub1.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/modexact_1c_odd.c",
    "content": "/* Fat binary fallback mpn_modexact_1c_odd.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/modexact_1c_odd.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/mul_1.c",
    "content": "/* Fat binary fallback mpn_mul_1\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/mul_1.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/mul_basecase.c",
    "content": "/* Fat binary fallback mpn_mul_basecase\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/mul_basecase.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/preinv_divrem_1.c",
    "content": "/* Fat binary fallback mpn_pre_divrem_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/preinv_divrem_1.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/preinv_mod_1.c",
    "content": "/* Fat binary fallback mpn_pre_mod_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/preinv_mod_1.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/redc_1.c",
    "content": "/* Fat binary fallback mpn_redc_1\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/redc_1.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/rshift.c",
    "content": "/* Fat binary fallback mpn_rshift\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/rshift.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/sqr_basecase.c",
    "content": "/* Fat binary fallback mpn_sqr_basecase\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/sqr_basecase.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/sub_n.c",
    "content": "/* Fat binary fallback mpn_sub_n\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/sub_n.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/submul_1.c",
    "content": "/* Fat binary fallback mpn_submul_1\n\nCopyright 2011 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/submul_1.c\"\n"
  },
  {
    "path": "mpn/x86_64w/fat/sumdiff_n.c",
    "content": "/* Fat binary fallback mpn_sumdiff_n\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include \"mpn/generic/sumdiff_n.c\"\n"
  },
  {
    "path": "mpn/x86_64w/haswell/add_n.asm",
    "content": "\n;  AMD64 mpn_add_n\n;  Copyright 2008, 2016 Jason Moxham and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)+(rdx,rcx)\n;\trax = carry\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n    %define SumP    rcx\n    %define Inp1P   rdx\n    %define Inp2P   r8\n    %define Size    r9\n    %define LIMB1   rax\n    %define LIMB2   r10\n    %define SizeRest r11\n%else\n    %define SumP    rdi\n    %define Inp1P   rsi\n    %define Inp2P   rdx\n    %define Size    rcx\n    %define LIMB1   rax\n    %define LIMB2   r9\n    %define SizeRest r10\n%endif\n\n%define ADCSBB adc\n\n    BITS    64\n\n    xalign  8\n    LEAF_PROC mpn_add_nc\n    mov     r10,[rsp+40]\n    jmp     entry\n\n    xalign  8\n    LEAF_PROC mpn_add_n\n    xor     r10, r10\nentry:\n\tmov     SizeRest, Size\n\tand     SizeRest, 7\n\tshr     Size, 3\n    lea     Size, [r10 + 2*Size]\n    sar     Size, 1\n\tjnz     .loop1\n    jmp     .rest\n\n\talign   16\n.loop1:\n\tmov     LIMB1, [Inp1P]\n\tmov     LIMB2, [Inp1P+8]\n\tADCSBB  LIMB1, [Inp2P]\n\tmov     [SumP], LIMB1\n\tADCSBB  LIMB2, [Inp2P+8]\n\tmov     LIMB1, [Inp1P+16]\n\tmov     [SumP+8], LIMB2\n\tADCSBB  LIMB1, [Inp2P+16]\n\tmov     LIMB2, [Inp1P+24]\n\tmov     [SumP+16], LIMB1\n\tmov     LIMB1, [Inp1P+32]\n\tADCSBB  LIMB2, [Inp2P+24]\n\tmov     [SumP+24], LIMB2\n\tADCSBB  LIMB1, [Inp2P+32]\n\tmov     [SumP+32], LIMB1\n\tmov     LIMB2, [Inp1P+40]\n\tADCSBB  LIMB2, [Inp2P+40]\n\tmov     [SumP+40], LIMB2\n\tmov     LIMB1, [Inp1P+48]\n\tmov     LIMB2, [Inp1P+56]\n\tlea     Inp1P, [Inp1P+64]\n\tADCSBB  LIMB1, [Inp2P+48]\n\tADCSBB  LIMB2, [Inp2P+56]\n\tlea     Inp2P, [Inp2P+64]\n\tmov     [SumP+48], LIMB1\n\tmov     [SumP+56], LIMB2\n\tlea     SumP, [SumP+64]\n\tdec     Size\n\tjnz     .loop1\n\tinc     SizeRest\n\tdec     SizeRest\n\tjz      .end\n.rest:\n\tmov     LIMB1, [Inp1P]\n\tADCSBB  LIMB1, [Inp2P]\n\tmov     [SumP], LIMB1\n\tdec     SizeRest\n\tjz      .end\n\tmov     LIMB1, [Inp1P+8]\n\tADCSBB  LIMB1, [Inp2P+8]\n\tmov     [SumP+8], LIMB1\n\tdec     SizeRest\n\tjz      .end\n\tmov     LIMB1, [Inp1P+16]\n\tADCSBB  LIMB1, [Inp2P+16]\n\tmov     [SumP+16], LIMB1\n\tdec     SizeRest\n    jz      .end\n\tmov     LIMB1, [Inp1P+24]\n\tADCSBB  LIMB1, [Inp2P+24]\n\tmov     [SumP+24], LIMB1\n\tdec     SizeRest\n    jz      .end\n    lea     Inp1P, [Inp1P+32]\n    lea     Inp2P, [Inp2P+32]\n    lea     SumP, [SumP+32]\n    jmp     .rest\n.end:\n\tmov     eax, 0\n\tadc     eax, eax\n\tret\n"
  },
  {
    "path": "mpn/x86_64w/haswell/addlsh1_n.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t addlsh1_n(mp_ptr Op3, mp_srcptr Op2, mp_srcptr Op1; mp_size_t Size )\n; Linux     RAX       RDI         RSI            RDX            RCX\n; Win7      RAX       RCX         RDX            R8             R9\n;\n; Description:\n; The function shifts Op1 left one bit, adds this to Op2, stores the result\n; in Op3 and hands back the total carry. There is a gain in execution speed\n; compared to separate shift and add by interleaving the elementary operations\n; and reducing memory access. The factor depends on the size of the operands\n; (the cache hierarchy in which the operands can be handled).\n;\n; Caveats:\n; - for asm the processor MUST support LAHF/SAHF in 64 bit mode!\n; - the total carry is in [0..2]!\n;\n; Comments:\n; - asm version implemented, tested & benched on 16.05.2015 by jn\n; - improved asm version implemented, tested & benched on 30.07.2015 by jn\n; - On Nehalem per limb saving is 1 cycle in LD1$, LD2$ and 1-2 in LD3$\n; - includes LAHF / SAHF\n; - includes prefetching\n; - includes XMM save & restore\n;\n; Linux: (rdi, rcx) = (rsi, rcx) + (rdx, rcx)<<1\n; ============================================================================\n\n%define USE_WIN64\n\n%define ADDSUB add\n%define ADCSBB adc\n\n%include \"yasm_mac.inc\"\n\nBITS 64\n\n%define reg_save_list RBX, RBP, RSI, RDI, R10, R11, R12, R13, R14, R15\n\n%define Op3     RCX\n%define Op2     RDX\n%define Op1     R8\n%define Size    R9\n\n%define Limb0   RBX\n%define Limb1   RDI\n%define Limb2   RSI\n\n%define Limb3   R10\n%define Limb4   R11\n%define Limb5   R12\n%define Limb6   R13\n%define Limb7   R14\n%define Limb8   R15\n\n%ifdef USE_PREFETCH\n%define Offs    RBP\n%endif\n\n%macro ACCUMULATE 1\n\n    ADCSBB  Limb%1, [Op2 + 8 * %1]\n    mov     [Op3 + 8 * %1], Limb%1\n%endmacro\n\n    align   32\n\n  FRAME_PROC mpn_addlsh1_n, 0, reg_save_list\n\n  %ifdef USE_PREFETCH\n    mov     Offs, PREFETCH_STRIDE   ; Attn: check if redefining Offs\n  %endif\n\n    ; prepare shift & addition with loop-unrolling 8\n    xor     Limb0, Limb0\n    lahf                        ; memorize clear carry (from \"xor\" above)\n\n    test    Size, 1\n    je      .n_two\n\n    mov     Limb1, [Op1]\n    shrd    Limb0, Limb1, 63\n\n\n    ADDSUB  Limb0, [Op2]\n    mov     [Op3], Limb0\n    lahf\n\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    mov     Limb0, Limb1\n\n  .n_two:\n\n    test    Size, 2\n    je      .n_four\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n\n    sahf\n    ACCUMULATE 0\n    ACCUMULATE 1\n    lahf\n\n    add     Op1, 16\n    add     Op2, 16\n    add     Op3, 16\n    mov     Limb0, Limb2\n\n  .n_four:\n\n    test    Size, 4\n    je      .n_test ;ajs:notshortform\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n    mov     Limb3, [Op1+16]\n    mov     Limb4, [Op1+24]\n    shrd    Limb2, Limb3, 63\n    shrd    Limb3, Limb4, 63\n\n    sahf\n    ACCUMULATE 0\n    ACCUMULATE 1\n    ACCUMULATE 2\n    ACCUMULATE 3\n    lahf\n\n    add     Op1, 32\n    add     Op2, 32\n    add     Op3, 32\n    mov     Limb0, Limb4\n    jmp     .n_test ;ajs:notshortform\n\n    ; main loop\n    ; - 2.40-2.50 cycles per limb in L1D$\n    ; - 2.6       cycles per limb in L2D$\n    ; - 2.80-3.30 cycles per limb in L3D$\n    align   16\n  .n_loop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n    prefetchnta [Op2+Offs]\n  %endif\n\n    mov     Limb1, [Op1]        ; prepare shifted oct-limb from Op1\n    mov     Limb2, [Op1+8]\n    mov     Limb3, [Op1+16]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n    shrd    Limb2, Limb3, 63\n    mov     Limb4, [Op1+24]\n    mov     Limb5, [Op1+32]\n    mov     Limb6, [Op1+40]\n    shrd    Limb3, Limb4, 63\n    shrd    Limb4, Limb5, 63\n    shrd    Limb5, Limb6, 63\n    mov     Limb7, [Op1+48]\n    mov     Limb8, [Op1+56]\n    shrd    Limb6, Limb7, 63\n    shrd    Limb7, Limb8, 63\n\n    sahf                        ; restore carry\n    ACCUMULATE 0                ; add Op2 to oct-limb and store in Op3\n    ACCUMULATE 1\n    ACCUMULATE 2\n    ACCUMULATE 3\n    ACCUMULATE 4\n    ACCUMULATE 5\n    ACCUMULATE 6\n    ACCUMULATE 7\n    lahf                        ; remember carry for next round\n\n    add     Op1, 64\n    add     Op2, 64\n    add     Op3, 64\n    mov     Limb0, Limb8\n\n  .n_test:\n\n    sub     Size, 8\n    jnc     .n_loop\n\n    ; housekeeping - hand back total carry\n    shr     Limb0, 63\n    sahf\n    adc     Limb0, 0            ; Limb0=0/1/2 depending on final carry and shift\n    mov     RAX, Limb0\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/haswell/addmul_1.asm",
    "content": ";  AMD64 mpn_addmul_1 optimised for Intel Haswell.\n\n;  Contributed to the GNU project by Torbjörn Granlund.\n;  Converted to MPIR by Alexander Kruppa.\n\n;  Copyright 2013 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;  mp_limb_t mpn_addmul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8        r9d\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%define reg_save_list rbx, rbp, rsi, rdi, r12, r13\n%define RP      rdi\n%define S1P     rsi\n%define Size    rbp\n%define Sizeb   bpl\n%define Limb    rcx\n\n%define Tmp0    r12\n%define Tmp1    r13\n%define Tmp2    rax\n%define Tmp3    rbx\n%define Tmp4    r8\n%define Tmp5    r9\n%define Tmp6    r10\n%define Tmp7    r11\n%define Tmp8    rcx\n\n%define ADDSUB add\n%define ADCSBB adc\n\nalign 16\n\nFRAME_PROC mpn_addmul_1, 0, reg_save_list\n    mov     rdi, rcx \n    mov     rsi, rdx\n\tmov \trbp, r8 ; mulx requires one input in rdx\n\tmov \trdx, r9\n\n\ttest \tSizeb, 1\n\tjnz \t.Lbx1\n\n.Lbx0:  shr \tSize, 2\n\tjc \t.Lb10 ;ajs:notshortform\n\n.Lb00:\tmulx \tTmp0, Tmp1, [S1P]\n\tmulx \tTmp2, Tmp3, [S1P+8]\n\tadd \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp0, [RP]\n\tmov \tTmp8, [RP+8]\n\tmulx \tTmp4, Tmp5, [S1P+16]\n\tlea \tRP, [RP-16]\n\tlea \tS1P, [S1P+16]\n\tADDSUB \tTmp0, Tmp1\n\tjmp \t.Llo0 ;ajs:notshortform\n\n.Lbx1:\tshr \tSize, 2\n\tjc \t.Lb11\n\n.Lb01:\tmulx \tTmp6, Tmp7, [S1P]\n\tjnz \t.Lgt1\n.Ln1:\tADDSUB \t[RP], Tmp7\n\tmov \teax, 0\n\tadc \tTmp2, Tmp6\n\tjmp \t.Lret ;ajs:notshortform\n\n.Lgt1:\tmulx \tTmp0, Tmp1, [S1P+8]\n\tmulx \tTmp2, Tmp3, [S1P+16]\n\tlea \tS1P, [S1P+24]\n\tadd \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP]\n\tmov \tTmp0, [RP+8]\n\tmov \tTmp8, [RP+16]\n\tlea \tRP, [RP-8]\n\tADDSUB \tTmp6, Tmp7\n\tjmp \t.Llo1\n\n.Lb11:\tmulx \tTmp2, Tmp3, [S1P]\n\tmov \tTmp8, [RP]\n\tmulx \tTmp4, Tmp5, [S1P+8]\n\tlea \tS1P, [S1P+8]\n\tlea \tRP, [RP-24]\n\tinc \tSize\t\n\tADDSUB \tTmp8, Tmp3\n\tjmp \t.Llo3\n\n.Lb10:\tmulx \tTmp4, Tmp5, [S1P]\n\tmulx \tTmp6, Tmp7, [S1P+8]\n\tlea \tRP, [RP-32]\n\tmov \teax, 0\n\tclc\n\tjz \t.Lend ;ajs:notshortform\t\n\n\talign 16\n.Ltop:\tadc \tTmp5, Tmp2\n\tlea \tRP, [RP+32]\n\tadc \tTmp7, Tmp4\n\tmulx \tTmp0, Tmp1, [S1P+16]\n\tmov \tTmp4, [RP]\n\tmulx \tTmp2, Tmp3, [S1P+24]\n\tlea \tS1P, [S1P+32]\n\tadc \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+8]\n\tmov \tTmp0, [RP+16]\n\tADDSUB \tTmp4, Tmp5\n\tmov \tTmp8, [RP+24]\n\tmov \t[RP], Tmp4\n\tADCSBB \tTmp6, Tmp7\n.Llo1:\tmulx \tTmp4, Tmp5, [S1P]\n\tmov \t[RP+8], Tmp6\n\tADCSBB \tTmp0, Tmp1\n.Llo0:\tmov \t[RP+16], Tmp0\n\tADCSBB \tTmp8, Tmp3\n.Llo3:\tmulx \tTmp6, Tmp7, [S1P+8]\n\tmov \t[RP+24], Tmp8\n\tdec \tSize\n\tjnz \t.Ltop\n\n.Lend:\tadc \tTmp5, Tmp2\n\tadc \tTmp7, Tmp4\n\tmov \tTmp4, [RP+32]\n\tmov \tTmp2, Tmp6\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+40]\n\tADDSUB \tTmp4, Tmp5\n\tmov \t[RP+32], Tmp4\n\tADCSBB \tTmp6, Tmp7\n\tmov \t[RP+40], Tmp6\n\tadc \tTmp2, 0\n\n.Lret:\t\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/haswell/addmul_2.asm",
    "content": "; PROLOGUE(mpn_addmul_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.2 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addmul_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n\tFRAME_PROC mpn_addmul_2, 0, reg_save_list\n\tmov     rdi, rcx\n\tmov     rsi, rdx\n\tmov     rax, r8\n\t\n\tmov     rcx, [r9]\n\tmov     r8, [r9+8]\n\tmov     rbx, 4\n\tsub     rbx, rax\n\tlea     rsi, [rsi+rax*8-32]\n\tlea     rdi, [rdi+rax*8-32]\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1: mov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tcmp     rbx, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6: add     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/haswell/and_n.asm",
    "content": "; PROLOGUE(mpn_and_n)\n\n;  mpn_and_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_and_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rcx        rdx         r8         r9\n;                    rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_and_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpand    xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tand     rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tand     rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/haswell/andn_n.asm",
    "content": "; PROLOGUE(mpn_andn_n)\n\n;  mpn_andn_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_andn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n    \n\tLEAF_PROC mpn_andn_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpandn   xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpandn   xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpandn   xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tand     rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpandn   xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tnot     rax\n\tand     rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/addmul_1.asm",
    "content": ";  AMD64 mpn_addmul_1 optimised for Intel Haswell.\n\n;  Contributed to the GNU project by Torbjörn Granlund.\n;  Converted to MPIR by Alexander Kruppa.\n\n;  Copyright 2013 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;  mp_limb_t mpn_addmul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8        r9d\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%define reg_save_list rbx, rbp, rsi, rdi, r12, r13\n%define RP      rdi\n%define S1P     rsi\n%define Size    rbp\n%define Sizeb   bpl\n%define Limb    rcx\n\n%define Tmp0    r12\n%define Tmp1    r13\n%define Tmp2    rax\n%define Tmp3    rbx\n%define Tmp4    r8\n%define Tmp5    r9\n%define Tmp6    r10\n%define Tmp7    r11\n%define Tmp8    rcx\n\n%define ADDSUB add\n%define ADCSBB adc\n\nalign 16\n\nFRAME_PROC mpn_addmul_1, 0, reg_save_list\n    mov     rdi, rcx \n    mov     rsi, rdx\n\tmov \trbp, r8 ; mulx requires one input in rdx\n\tmov \trdx, r9\n\n\ttest \tSizeb, 1\n\tjnz \t.Lbx1\n\n.Lbx0:  shr \tSize, 2\n\tjc \t.Lb10 ;ajs:notshortform\n\n.Lb00:\tmulx \tTmp0, Tmp1, [S1P]\n\tmulx \tTmp2, Tmp3, [S1P+8]\n\tadd \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp0, [RP]\n\tmov \tTmp8, [RP+8]\n\tmulx \tTmp4, Tmp5, [S1P+16]\n\tlea \tRP, [RP-16]\n\tlea \tS1P, [S1P+16]\n\tADDSUB \tTmp0, Tmp1\n\tjmp \t.Llo0 ;ajs:notshortform\n\n.Lbx1:\tshr \tSize, 2\n\tjc \t.Lb11\n\n.Lb01:\tmulx \tTmp6, Tmp7, [S1P]\n\tjnz \t.Lgt1\n.Ln1:\tADDSUB \t[RP], Tmp7\n\tmov \teax, 0\n\tadc \tTmp2, Tmp6\n\tjmp \t.Lret ;ajs:notshortform\n\n.Lgt1:\tmulx \tTmp0, Tmp1, [S1P+8]\n\tmulx \tTmp2, Tmp3, [S1P+16]\n\tlea \tS1P, [S1P+24]\n\tadd \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP]\n\tmov \tTmp0, [RP+8]\n\tmov \tTmp8, [RP+16]\n\tlea \tRP, [RP-8]\n\tADDSUB \tTmp6, Tmp7\n\tjmp \t.Llo1\n\n.Lb11:\tmulx \tTmp2, Tmp3, [S1P]\n\tmov \tTmp8, [RP]\n\tmulx \tTmp4, Tmp5, [S1P+8]\n\tlea \tS1P, [S1P+8]\n\tlea \tRP, [RP-24]\n\tinc \tSize\t\n\tADDSUB \tTmp8, Tmp3\n\tjmp \t.Llo3\n\n.Lb10:\tmulx \tTmp4, Tmp5, [S1P]\n\tmulx \tTmp6, Tmp7, [S1P+8]\n\tlea \tRP, [RP-32]\n\tmov \teax, 0\n\tclc\n\tjz \t.Lend ;ajs:notshortform\t\n\n\talign 16\n.Ltop:\tadc \tTmp5, Tmp2\n\tlea \tRP, [RP+32]\n\tadc \tTmp7, Tmp4\n\tmulx \tTmp0, Tmp1, [S1P+16]\n\tmov \tTmp4, [RP]\n\tmulx \tTmp2, Tmp3, [S1P+24]\n\tlea \tS1P, [S1P+32]\n\tadc \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+8]\n\tmov \tTmp0, [RP+16]\n\tADDSUB \tTmp4, Tmp5\n\tmov \tTmp8, [RP+24]\n\tmov \t[RP], Tmp4\n\tADCSBB \tTmp6, Tmp7\n.Llo1:\tmulx \tTmp4, Tmp5, [S1P]\n\tmov \t[RP+8], Tmp6\n\tADCSBB \tTmp0, Tmp1\n.Llo0:\tmov \t[RP+16], Tmp0\n\tADCSBB \tTmp8, Tmp3\n.Llo3:\tmulx \tTmp6, Tmp7, [S1P+8]\n\tmov \t[RP+24], Tmp8\n\tdec \tSize\n\tjnz \t.Ltop\n\n.Lend:\tadc \tTmp5, Tmp2\n\tadc \tTmp7, Tmp4\n\tmov \tTmp4, [RP+32]\n\tmov \tTmp2, Tmp6\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+40]\n\tADDSUB \tTmp4, Tmp5\n\tmov \t[RP+32], Tmp4\n\tADCSBB \tTmp6, Tmp7\n\tmov \t[RP+40], Tmp6\n\tadc \tTmp2, 0\n\n.Lret:\t\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/addsub_n.asm",
    "content": ";  AMD64 mpn_addsub_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,r8) = (rsi,r8)+(rdx,r8)-(rcx,r8)\n;   rax = summed carry and borrow in range [ -1..1 ]\n\n; the main loop has been enhanced with the MPIR SuperOptimizer\n; the gain was roughly 4% execution speed for operands in LD1$\n;\n;  mp_limb_t mpn_addsub_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n; cycles per limb with all operands in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         1.6-1.7   1.7-1.85\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rsi, rdi, r12\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    rdi\n%define Src1P   rsi\n%define Src2P   rdx\n%define Src3P   rcx\n%define Size    r8\n\n%define Spills  eax\n%define Carry   al\n%define Borrow  ah\n\n%define Limb0   r9\n%define Limb1   r10\n%define Limb2   r11\n%define Limb3   r12\n\n    align   32\n    BITS    64\n\n    FRAME_PROC mpn_addsub_n, 0, reg_save_list\n\tmov\t\trdi, rcx\n\tmov\t\trsi, rdx\n\tmov\t\trdx, r8\n \tmov\t\trcx, r9\n\tmov\t\tr8, [rsp+stack_use+40]\n\n    sub     Src3P, 32\n    sub     ResP, 32\n\n    xor     Spills, Spills      ; clears carry & borrow\n\n    jmp     .Check\n\n    align   16\n.Loop:\n\n    ; do not delete!\n    ; this seemingly unreasoned AVX instruction optimizes the allocation of\n    ; read/write operations to ports 2, 3 & 7 (write allways ending up\n    ; on port 7) which allows a sustained 2r1w execution per cycle\n    vpor    ymm0, ymm0, ymm0\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P]\n    mov     Limb1, [Src1P+8]\n    mov     Limb2, [Src1P+16]\n    mov     Limb3, [Src1P+24]\n    lea     Src3P, [Src3P+32]\n    lea     ResP, [ResP+32]\n    adc     Limb0, [Src2P]\n    adc     Limb1, [Src2P+8]\n    adc     Limb2, [Src2P+16]\n    adc     Limb3, [Src2P+24]\n    setc    Carry\n\n    lea     Src2P, [Src2P+32]\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P]\n    sbb     Limb1, [Src3P+8]\n    lea     Src1P, [Src1P+32]\n    mov     [ResP], Limb0\n    sbb     Limb2, [Src3P+16]\n    mov     [ResP+8], Limb1\n    mov     [ResP+16], Limb2\n    sbb     Limb3, [Src3P+24]\n    setc    Borrow\n    mov     [ResP+24], Limb3\n\n    ; label @ $a (mod $10) seems ok from benchmark figures\n.Check:\n\n    sub     Size, 4\n    jnc     .Loop\n\n    add     Src3P, 32\n    add     ResP, 32\n\n    add     Size, 4\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P]\n    adc     Limb0, [Src2P]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P]\n    setc    Borrow\n    mov     [ResP], Limb0\n    dec     Size\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P+8]\n    adc     Limb0, [Src2P+8]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P+8]\n    setc    Borrow\n    mov     [ResP+8], Limb0\n    dec     Size\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P+16]\n    adc     Limb0, [Src2P+16]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P+16]\n    mov     [ResP+16], Limb0\n    setc    Borrow\n\n    ; label @ $2 (mod $10) ok from benchmark figures\n.Exit:\n\n    sub     Carry, Borrow\n    movsx   rax, Carry\n\tEND_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/and_n.asm",
    "content": ";  AVX mpn_and_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = (rsi,rcx) and (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.68 0.34-0.94 (depending on alignment)\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define QLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_and_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 0, 4, 8 & 12 immediately\n\n\t; the code density in the core loop is low - 5.18 byte per instruction\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpand   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpand   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpand   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpand   QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    add     Src1P, 128\n    add     Src2P, 128\n    add     ResP, 128\n\n  .Check:\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     SizeB, 2            ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpand   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpand   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpand   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     CountB, 2           ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    and     Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    and     Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    and     Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/andn_n.asm",
    "content": ";  AVX mpn_andn_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = not(rsi,rcx) and (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_andn_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpandn  QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpandn  QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpandn  QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpandn  QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpandn  QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpandn  QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpandn  QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    andn    Limb0, Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    andn    Limb0, Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    andn    Limb0, Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/com_n.asm",
    "content": "\n;  AMD64 mpn_com_n\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%define USE_WIN64\n\nBITS 64\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n    %define Op2     RCX\n    %define Op1     RDX\n    %define Size1   R8\n    %define Limb    R9\n    %define Offs    R10\n    %define FFFF    R11\n%else\n    %define Op2     RDI\n    %define Op1     RSI\n    %define Size1   RDX\n    %define Limb    RCX\n    %define Offs    R10\n    %define FFFF    R8\n%endif\n\n%define DLimb0  XMM0\n%define QLimb0  YMM0\n%define QLimb1  YMM1\n%define QLimb2  YMM2\n%define QLimb3  YMM3\n%define QFFFF   YMM4\n%define DFFFF   XMM4\n\n    align   32\n\nLEAF_PROC mpn_com_n\n    mov     RAX, Size1\n    or      RAX, RAX\n    je      .Exit      ;ajs:notshortform\n                       ; size=0 =>\n\n    ; Set a GPR to 0xFF...FF\n    mov     FFFF, -1\n\n    ; align the destination (Op2) to 32 byte\n    test    Op2, 8\n    je      .A32\n\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit      ;ajs:notshortform\n\n    add     Op1, 8\n    add     Op2, 8\n\n  .A32:\n\n    test    Op2, 16\n    je      .AVX\n\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit       ;ajs:notshortform\n\n    mov     Limb, [Op1+8]\n    xor     Limb, FFFF\n    mov     [Op2+8], Limb\n    dec     Size1\n    je      .Exit       ;ajs:notshortform\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .AVX:\n\n    ; Set an AVX2 reg to 0xFF...FF\n    movq    DFFFF, FFFF\n    vbroadcastsd QFFFF, DFFFF\n\n    mov     Offs, 128\n    jmp     .AVXCheck\n\n    ; main loop (prefetching disabled; unloaded cache)\n    ; - lCpyInc is slightly slower than lCpyDec through all cache levels?!\n    ; - 0.30      cycles / limb in L1$\n    ; - 0.60      cycles / limb in L2$\n    ; - 0.70-0.90 cycles / limb in L3$\n    align   16\n  .AVXLoop:\n\n    vmovdqu QLimb0, [Op1]\n    vpxor   QLimb0, QLimb0, QFFFF\n    vmovdqu QLimb1, [Op1+32]\n    vpxor   QLimb1, QLimb1, QFFFF\n    vmovdqu QLimb2, [Op1+64]\n    vpxor   QLimb2, QLimb2, QFFFF\n    vmovdqu QLimb3, [Op1+96]\n    vpxor   QLimb3, QLimb3, QFFFF\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n    vmovdqa [Op2+64], QLimb2\n    vmovdqa [Op2+96], QLimb3\n\n    add     Op1, Offs\n    add     Op2, Offs\n\n  .AVXCheck:\n\n    sub     Size1, 16\n    jnc     .AVXLoop\n\n    add     Size1, 16\n    je      .Exit ;ajs:notshortform\n                  ; AVX copied operand fully =>\n\n    ; copy remaining max. 15 limb\n    test    Size1, 8\n    je      .Four\n\n    vmovdqu QLimb0, [Op1]\n    vpxor   QLimb0, QLimb0, QFFFF\n    vmovdqu QLimb1, [Op1+32]\n    vpxor   QLimb1, QLimb1, QFFFF\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .Four:\n\n    test    Size1, 4\n    je      .Two\n\n    vmovdqu QLimb0, [Op1]\n    vpxor   QLimb0, QLimb0, QFFFF\n    vmovdqa [Op2], QLimb0\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .Two:\n\n    test    Size1, 2\n    je      .One\n\n%if 1\n    ; Avoid SSE2 instruction due to stall on Haswell\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n    mov     Limb, [Op1+8]\n    xor     Limb, FFFF\n    mov     [Op2+8], Limb\n%else\n    movdqu  DLimb0, [Op1]\n    pxor    DLimb0, DLimb0, DFFFF\n    movdqa  [Op2], DLimb0\n%endif\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .One:\n\n    test    Size1, 1\n    je      .Exit\n\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/copyd.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n\n; mpn_copyd(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1)\n; Linux     RDI         RSI            RDX\n; Win7      RCX         RDX            R8\n;\n; Description:\n; The function copies a given number of limb from source to destination (while\n; moving high to low in memory) and hands back the size (in limb) of the\n; destination.\n;\n; Result:\n; - Op2[ 0..size-1 ] = Op1[ 0..size-1 ]\n; - number of copied limb: range [ 0..max tCounter ]\n;\n; Caveats:\n; - if size 0 is given the content of the destination will remain untouched!\n; - if Op1=Op2 no copy is done!\n;\n; Comments:\n; - AVX-based version implemented, tested & benched on 05.01.2016 by jn\n; - did some experiments with AVX based version with following results\n;   - AVX can be faster in L1$-L3$ if destination is aligned on 32 byte\n;   - AVX is generally faster on small sized operands (<=100 limb) due too\n;     start-up overhead of \"rep movsq\" - however this could also be achieved by\n;     simple copy loop\n;   - startup overhead of \"rep movsq\" with negative direction is 200 cycles!!!\n;   - negative direction is unfavourable compared to positive \"rep movsq\" and\n;     to AVX.\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2     RCX\n    %define Op1     RDX\n    %define Size1   R8\n    %define Limb    R9\n    %define Offs    R10\n%else\n    %define Op2     RDI\n    %define Op1     RSI\n    %define Size1   RDX\n    %define Limb    RCX\n    %define Offs    R10\n%endif\n\n%define DLimb0  XMM0\n%define QLimb0  YMM0\n%define QLimb1  YMM1\n%define QLimb2  YMM2\n%define QLimb3  YMM3\n\n    align   32\n\nLEAF_PROC   mpn_copyd\n    mov     RAX, Size1\n    cmp     Op1, Op2\n    je      .Exit               ; no copy required =>\n\n    or      RAX, RAX\n    je      .Exit               ; Size=0 =>\n\n    lea     Op1, [Op1+8*Size1-8]\n    lea     Op2, [Op2+8*Size1-8]\n\n    ; align the destination (Op2) to 32 byte\n    test    Op2, 8\n    jne     .lCpyDecA32\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    sub     Op1, 8\n    sub     Op2, 8\n\n  .lCpyDecA32:\n\n    test    Op2, 16\n    jnz     .lCpyDecAVX\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    mov     Limb, [Op1-8]\n    mov     [Op2-8], Limb\n    dec     Size1\n    je      .Exit\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lCpyDecAVX:\n\n    mov     Offs, 128\n    jmp     .lCpyDecAVXCheck\n\n    ; main loop (prefetching disabled; unloaded cache)\n    ; - 0.30      cycles / limb in L1$\n    ; - 0.60      cycles / limb in L2$\n    ; - 0.70-0.90 cycles / limb in L3$\n    align   16\n  .lCpyDecAVXLoop:\n\n    vmovdqu QLimb0, [Op1-24]\n    vmovdqu QLimb1, [Op1-56]\n    vmovdqu QLimb2, [Op1-88]\n    vmovdqu QLimb3, [Op1-120]\n    vmovdqa [Op2-24], QLimb0\n    vmovdqa [Op2-56], QLimb1\n    vmovdqa [Op2-88], QLimb2\n    vmovdqa [Op2-120], QLimb3\n\n    sub     Op1, Offs\n    sub     Op2, Offs\n\n  .lCpyDecAVXCheck:\n\n    sub     Size1, 16\n    jnc     .lCpyDecAVXLoop\n\n    add     Size1, 16\n    je      .Exit               ; AVX copied operand fully =>\n\n    ; copy remaining max. 15 limb\n    test    Size1, 8\n    je      .lCpyDecFour\n\n    vmovdqu QLimb0, [Op1-24]\n    vmovdqu QLimb1, [Op1-56]\n    vmovdqa [Op2-24], QLimb0\n    vmovdqa [Op2-56], QLimb1\n\n    sub     Op1, 64\n    sub     Op2, 64\n\n  .lCpyDecFour:\n\n    test    Size1, 4\n    je      .lCpyDecTwo\n\n    vmovdqu QLimb0, [Op1-24]\n    vmovdqa [Op2-24], QLimb0\n\n    sub     Op1, 32\n    sub     Op2, 32\n\n  .lCpyDecTwo:\n\n    test    Size1, 2\n    je      .lCpyDecOne\n\n%if 1\n    ; Avoid SSE2 instruction due to stall on Haswell\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    mov     Limb, [Op1-8]\n    mov     [Op2-8], Limb\n%else\n    movdqu  DLimb0, [Op1-8]\n    movdqa  [Op2-8], DLimb0\n%endif\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lCpyDecOne:\n\n    test    Size1, 1\n    je      .Exit\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/copyi.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mpn_copyi(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1)\n; Linux     RDI         RSI            RDX\n; Win7      RCX         RDX            R8\n;\n; Description:\n; The function copies a given number of limb from source to destination (while\n; moving low to high in memory) and hands back the size (in limb) of the\n; destination.\n;\n; Result:\n; - Op2[ 0..size-1 ] = Op1[ 0..size-1 ]\n; - number of copied limb: range [ 0..max tCounter ]\n;\n; Caveats:\n; - if size 0 is given the content of the destination will remain untouched!\n; - if Op1=Op2 no copy is done!\n;\n; Comments:\n; - AVX-based version implemented, tested & benched on 05.01.2016 by jn\n; - did some experiments with AVX based version with following results\n;   - AVX can be faster in L1$ (30%), L2$ (10%) if dest. is aligned on 32 byte\n;   - AVX is generally faster on small sized operands (<=100 limb) due too\n;     start-up overhead of \"rep movsq\" - however this could also be achieved by\n;     simple copy loop\n;   - the break-even between AVX and \"rep movsq\" is around 10,000 limb\n; - the prologue & epilogue can still be optimized!\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2     RCX\n    %define Op1     RDX\n    %define Size1   R8\n    %define Limb    R9\n    %define Offs    R10\n%else\n    %define Op2     RDI\n    %define Op1     RSI\n    %define Size1   RDX\n    %define Limb    RCX\n    %define Offs    R10\n%endif\n\n%define DLimb0  XMM0\n%define QLimb0  YMM0\n%define QLimb1  YMM1\n%define QLimb2  YMM2\n%define QLimb3  YMM3\n\n    align   32\n\nLEAF_PROC   mpn_copyi\n    mov     RAX, Size1\n    cmp     Op1, Op2\n    je      .Exit               ; no copy required =>\n\n    or      RAX, RAX\n    je      .Exit               ; size=0 =>\n\n    ; align the destination (Op2) to 32 byte\n    test    Op2, 8\n    je      .lCpyIncA32\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    add     Op1, 8\n    add     Op2, 8\n\n  .lCpyIncA32:\n\n    test    Op2, 16\n    je      .lCpyIncAVX\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    mov     Limb, [Op1+8]\n    mov     [Op2+8], Limb\n    dec     Size1\n    je      .Exit\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lCpyIncAVX:\n\n    mov     Offs, 128\n    jmp     .lCpyIncAVXCheck\n\n    ; main loop (prefetching disabled; unloaded cache)\n    ; - lCpyInc is slightly slower than lCpyDec through all cache levels?!\n    ; - 0.30      cycles / limb in L1$\n    ; - 0.60      cycles / limb in L2$\n    ; - 0.70-0.90 cycles / limb in L3$\n    align   16\n  .lCpyIncAVXLoop:\n\n    vmovdqu QLimb0, [Op1]\n    vmovdqu QLimb1, [Op1+32]\n    vmovdqu QLimb2, [Op1+64]\n    vmovdqu QLimb3, [Op1+96]\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n    vmovdqa [Op2+64], QLimb2\n    vmovdqa [Op2+96], QLimb3\n\n    add     Op1, Offs\n    add     Op2, Offs\n\n  .lCpyIncAVXCheck:\n\n    sub     Size1, 16\n    jnc     .lCpyIncAVXLoop\n\n    add     Size1, 16\n    je      .Exit               ; AVX copied operand fully =>\n\n    ; copy remaining max. 15 limb\n    test    Size1, 8\n    je      .lCpyIncFour\n\n    vmovdqu QLimb0, [Op1]\n    vmovdqu QLimb1, [Op1+32]\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .lCpyIncFour:\n\n    test    Size1, 4\n    je      .lCpyIncTwo\n\n    vmovdqu QLimb0, [Op1]\n    vmovdqa [Op2], QLimb0\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .lCpyIncTwo:\n\n    test    Size1, 2\n    je      .lCpyIncOne\n\n%if 1\n    ; Avoid SSE2 instruction due to stall on Haswell\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    mov     Limb, [Op1+8]\n    mov     [Op2+8], Limb\n%else\n    movdqu  DLimb0, [Op1]\n    movdqa  [Op2], DLimb0\n%endif\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lCpyIncOne:\n\n    test    Size1, 1\n    je      .Exit\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/ior_n.asm",
    "content": ";  AVX mpn_ior_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = (rsi,rcx) ior (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_ior_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpor    QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    or      Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    or      Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    or      Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/iorn_n.asm",
    "content": ";  AVX mpn_iorn_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = not(rsi,rcx) and (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define\tQLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_iorn_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    vpcmpeqq QLimb1, QLimb1, QLimb1\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vpxor   QLimb0, QLimb1, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+96]\n    vpor    QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    not     Limb0\n    or      Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    not     Limb0\n    or      Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    not     Limb0\n    or      Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/lshift.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_lshift(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1, unsigned int Shift)\n; Linux     RAX        RDI         RSI            RDX              RCX\n; Win7      RAX        RCX         RDX            R8               R9\n;\n; Description:\n; The function shifts Op1 left by n bit, stores the result in Op2 (non-\n; destructive shl) and hands back the shifted-out most significant bits of Op1.\n; The function operates decreasing in memory supporting in-place operation.\n;\n; Result:\n; - Op2[ Size1-1..0 ] := ( Op1[ Size1-1..0 ]:ShlIn ) << 1\n; - Op1[ 0 ] >> 63\n;\n; Caveats:\n; - caller must ensure that Shift is in [ 1..63 ]!\n; - currently Linux64 support only!\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - implemented, tested and benched on 31.03.2016 by jn\n; - includes prefetching\n; ============================================================================\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2         R11\n    %define Op1         RDX\n    %define Size1       R8\n    %define Shift       RCX\n    %define Limb1       R9\n    %define Limb2       R10\n  %ifdef USE_PREFETCH\n    %define Offs        -512    ; No caller-saves regs left, use immediate\n  %endif\n    %define reg_save_list XMM, 6, 7\n%else\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Shift       RCX\n    %define Limb1       R8\n    %define Limb2       R9\n  %ifdef USE_PREFETCH\n    %define OFFS_REG 1\n    %define Offs        R10\n  %endif\n%endif\n\n%define ShlDL0      XMM2    ; Attn: this must match ShlQL0 definition\n%define ShrDL0      XMM3    ; Attn: this must match ShrQL0 definition\n%define ShlDLCnt    XMM6    ; Attn: this must match ShlQlCnt definition\n%define ShrDLCnt    XMM7    ; Attn: this must match ShrQlCnt definition\n\n%define QLimb0      YMM0\n%define QLimb1      YMM1\n%define ShlQL0      YMM2\n%define ShrQL0      YMM3\n%define ShlQL1      YMM4\n%define ShrQL1      YMM5\n%define ShlQLCnt    YMM6\n%define ShrQLCnt    YMM7\n\n    align   32\nFRAME_PROC  mpn_lshift, 0, reg_save_list\n%ifdef USE_WIN64\n    mov     r11, rcx\n\tmov     rcx, r9\n%endif\n    xor     EAX, EAX\n    sub     Size1, 1\n    jc      .Exit               ; Size1=0 =>\n\n    lea     Op1, [Op1+8*Size1]\n    lea     Op2, [Op2+8*Size1]\n\n    mov     Limb1, [Op1]\n    shld    RAX, Limb1, CL\n\n    or      Size1, Size1\n    je      .lShlEquPost        ; Size1=1 =>\n\n  %ifdef USE_PREFETCH\n  %ifdef OFFS_REG\n    mov     Offs, -512\n  %endif\n  %endif\n\n    cmp     Size1, 8\n    jc      .lShlEquFour        ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    jne     .lShlEquA16\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op1, 8\n    sub     Op2, 8\n    sub     Size1, 1\n\n  .lShlEquA16:\n\n    test    Op2, 16\n    jne     .lShlEquAVX\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-16]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n    sub     Size1, 2\n\n  .lShlEquAVX:\n\n    ; initialize AVX shift counter\n    vmovq   ShlDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vmovq   ShrDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vpbroadcastq ShlQLCnt, ShlDLCnt\n    vpbroadcastq ShrQLCnt, ShrDLCnt\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1-24]\n    vpsrlvq ShrQL0, QLimb0, ShrQLCnt\n    vpermq  ShrQL0, ShrQL0, 10010011b\n\n    sub     Op1, 32\n    sub     Size1, 4\n    jmp     .lShlEquAVXCheck\n\n    ; main loop (prefetching enabled; unloaded cache)\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.70 cycles per limb in LD2$\n    ; - 0.70-0.90 cycles per limb in LD3$\n    align   16\n  .lShlEquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1-24]\n    vpsllvq   ShlQL0, QLimb0, ShlQLCnt\n    vmovdqu   QLimb0, [Op1-56]\n    vpsrlvq   ShrQL1, QLimb1, ShrQLCnt\n    vpermq    ShrQL1, ShrQL1, 10010011b\n    vpblendd  ShrQL0, ShrQL0, ShrQL1, 00000011b\n    vpor      ShlQL0, ShlQL0, ShrQL0\n    vpsllvq   ShlQL1, QLimb1, ShlQLCnt\n    vpsrlvq   ShrQL0, QLimb0, ShrQLCnt\n    vpermq    ShrQL0, ShrQL0, 10010011b\n    vpblendd  ShrQL1, ShrQL1, ShrQL0, 00000011b\n    vmovdqa   [Op2-24], ShlQL0\n    vpor      ShlQL1, ShlQL1, ShrQL1\n    vmovdqa   [Op2-56], ShlQL1\n\n    sub     Op1, 64\n    sub     Op2, 64\n\n  .lShlEquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShlEquAVXLoop\n\n    mov     Limb1, [Op1]\n    xor     Limb2, Limb2\n    shld    Limb2, Limb1, CL\n%if 1\n    vmovq   ShlDL0, Limb2\n    vpblendd ShrQL0, ShrQL0, ShlQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n    pinsrq  ShrDL0, Limb2, 0        ; SSE4.1\n%endif\n    vpsllvq ShlQL0, QLimb0, ShlQLCnt\n    vpor    ShlQL0, ShlQL0, ShrQL0\n    vmovdqa [Op2-24], ShlQL0\n\n    sub     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHLD mnemonic\n  .lShlEquFour:\n\n    sub     Op1, 8\n    test    Size1, 4\n    je      .lShlEquTwo\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-8], Limb2\n    mov     Limb2, [Op1-16]\n    shld    Limb1, Limb2, CL\n    mov     [Op2-16], Limb1\n    mov     Limb1, [Op1-24]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-24], Limb2\n\n    sub     Op1, 32\n    sub     Op2, 32\n\n  .lShlEquTwo:\n\n    test    Size1, 2\n    je      .lShlEquOne\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lShlEquOne:\n\n    test    Size1, 1\n    je      .lShlEquPost\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op2, 8\n\n  .lShlEquPost:\n\n    shl    Limb1, CL\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    vzeroupper\nEND_PROC reg_save_list\n.end:"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/lshift1.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_lshift1(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1 )\n; Linux     RAX         RDI         RSI            RDX\n; Win7      RAX         RCX         RDX            R8\n;\n; Description:\n; The function shifts Op1 left by one bit, stores the result in Op2 (non-\n; destructive shl) and hands back the shifted-out most significant bit of Op1.\n; The function operates decreasing in memory supporting in-place operation.\n;\n; Caveats:\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - implemented, tested and benched on 21.02.2016 by jn\n; - includes cache prefetching\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n\n    %define Op2         RCX\n    %define Op1         RDX\n    %define Size1       R8\n    %define Limb1       R9\n    %define Limb2       R10\n    %define Offs        -512    ; used direct def. to stay in Win scratch regs\n\n    %define ShlDL0      XMM2    ; ATTN: this must match ShlQL0 definition\n    %define ShrDL0      XMM3    ; ATTN: this must match ShrQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShlQL0      YMM2\n    %define ShrQL0      YMM3\n    %define ShlQL1      YMM4\n    %define ShrQL1      YMM5\n\n%else\n\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Limb1       R8\n    %define Limb2       R9\n    %define Offs        -512    ; used direct def. to stay in Win scratch regs\n\n    %define ShlDL0      XMM2    ; ATTN: this must match ShlQL0 definition\n    %define ShrDL0      XMM3    ; ATTN: this must match ShrQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShlQL0      YMM2\n    %define ShrQL0      YMM3\n    %define ShlQL1      YMM4\n    %define ShrQL1      YMM5\n\n%endif\n\n    align   32\n\nLEAF_PROC mpn_lshift1\n\n    xor     EAX, EAX\n    sub      Size1, 1\n    jc      .Exit               ;ajs:notshortform ; Size1=0 =>\n\n    lea     Op1, [Op1+8*Size1]\n    lea     Op2, [Op2+8*Size1]\n\n    mov     Limb1, [Op1]\n    shld    RAX, Limb1, 1\n\n    or      Size1, Size1\n    je      .lShl1EquPost       ;ajs:notshortform ; Size1=1 =>\n\n    cmp     Size1, 8\n    jc      .lShl1EquFour       ;ajs:notshortform ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    jne     .lShl1EquA16\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op1, 8\n    sub     Op2, 8\n    sub     Size1, 1\n\n  .lShl1EquA16:\n\n    test    Op2, 16\n    jne     .lShl1EquAVX\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-16]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n    sub     Size1, 2\n\n  .lShl1EquAVX:\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1-24]\n    vpsrlq  ShrQL0, QLimb0, 63\n    vpermq  ShrQL0, ShrQL0, 147\t\t; 0b10010011\n\n    sub     Op1, 32\n    sub     Size1, 4\n    jmp     .lShl1EquAVXCheck\n\n    ; main loop requires on entry:\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.75 cycles per limb in LD2$\n    ; - 0.75-1.00 cycles per limb in LD3$\n    align   16\n  .lShl1EquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1-24]\n    vpsllq    ShlQL0, QLimb0, 1\n    vmovdqu   QLimb0, [Op1-56]\n    vpsrlq    ShrQL1, QLimb1, 63\n    vpermq    ShrQL1, ShrQL1, 147\t; 0b10010011\n    vpblendd  ShrQL0, ShrQL0, ShrQL1, 3\t; 0b00000011\n    vpor      ShlQL0, ShlQL0, ShrQL0\n    vpsllq    ShlQL1, QLimb1, 1\n    vpsrlq    ShrQL0, QLimb0, 63\n    vpermq    ShrQL0, ShrQL0, 147\t; 0b10010011\n    vpblendd  ShrQL1, ShrQL1, ShrQL0, 3\t; 0b00000011\n    vmovdqa   [Op2-24], ShlQL0\n    vpor      ShlQL1, ShlQL1, ShrQL1\n    vmovdqa   [Op2-56], ShlQL1\n\n    sub     Op1, 64\n    sub     Op2, 64\n\n  .lShl1EquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShl1EquAVXLoop\n\n    mov     Limb2, [Op1]\n    mov     Limb1, Limb2\n    shr     Limb2, 63\n%if 1\n    vmovq ShlDL0, Limb2\n    vpblendd ShrQL0, ShrQL0, ShlQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n\n    ; Insert value of Limb2 into the 0-th qword of ShrDL0\n    pinsrq  ShrDL0, Limb2, 0        ; SSE4.1\n%endif\n    vpsllq  ShlQL0, QLimb0, 1\n    vpor    ShlQL0, ShlQL0, ShrQL0\n    vmovdqa [Op2-24], ShlQL0\n\n    sub     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHLD mnemonic\n  .lShl1EquFour:\n\n    sub     Op1, 8\n    test    Size1, 4\n    je      .lShl1EquTwo\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-8], Limb2\n    mov     Limb2, [Op1-16]\n    shld    Limb1, Limb2, 1\n    mov     [Op2-16], Limb1\n    mov     Limb1, [Op1-24]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-24], Limb2\n\n    sub     Op1, 32\n    sub     Op2, 32\n\n  .lShl1EquTwo:\n\n    test    Size1, 2\n    je      .lShl1EquOne\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lShl1EquOne:\n\n    test    Size1, 1\n    je      .lShl1EquPost\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op2, 8\n\n  .lShl1EquPost:\n\n    shl     Limb1, 1\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/mul_1.asm",
    "content": "; PROLOGUE(mpn_mul_1)\n\n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2011 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_mul_1c(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx         r8\n;  rax                     rcx     rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12\n\n        LEAF_PROC mpn_mul_1\n        mov     rax, [rdx]\n        cmp     r8, 1\n        jne     .1\n        mul     r9\n        mov     [rcx], rax\n        mov     rax, rdx\n        ret\n\n.1:     FRAME_PROC ?mpn_sandybridge_mul, 0, reg_save_list\n        mov     r11, 5\n        lea     rsi, [rdx+r8*8-40]\n        lea     rdi, [rcx+r8*8-40]\n        mov     rcx, r9\n        sub     r11, r8\n        mul     rcx\n        db      0x26\n        mov     r8, rax\n        db      0x26\n        mov     rax, [rsi+r11*8+8]\n        db      0x26\n        mov     r9, rdx\n        db      0x26\n        cmp     r11, 0\n        db      0x26\n        mov     [rsp-8], r12\n        db      0x26\n        jge     .2\n.1:     xor     r10, r10\n        mul     rcx\n        mov     [rdi+r11*8], r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+r11*8+16]\n        mul     rcx\n        mov     [rdi+r11*8+8], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+r11*8+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        mov     [rdi+r11*8+16], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+r11*8+32]\n        mul     rcx\n        mov     [rdi+r11*8+24], r12\n        add     r8, rax\n        adc     r9, rdx\n        add     r11, 4\n        mov     rax, [rsi+r11*8+8]\n        jnc     .1\n.2:     xor     r10, r10\n        mul     rcx\n        mov     [rdi+r11*8], r8\n        add     r9, rax\n        adc     r10, rdx\n        cmp     r11, 2\n        ja      .5\n        jz      .4\n        jp      .3\n        mov     rax, [rsi+16]\n        mul     rcx\n        mov     [rdi+8], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        mov     [rdi+16], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+32]\n        mul     rcx\n        mov     [rdi+24], r12\n        add     r8, rax\n        adc     r9, rdx\n        mov     [rdi+32], r8\n        mov     rax, r9\n        EXIT_PROC   reg_save_list\n\n.3:     mov     rax, [rsi+24]\n        mul     rcx\n        mov     [rdi+16], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+32]\n        xor     r8, r8\n        mul     rcx\n        mov     [rdi+24], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     [rdi+32], r12\n        mov     rax, r8\n        EXIT_PROC   reg_save_list\n\n        align   16\n.4:     mov     rax, [rsi+32]\n        mul     rcx\n        mov     [rdi+24], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     [rdi+32], r10\n        mov     rax, r12\n        EXIT_PROC   reg_save_list\n\n.5:     mov     [rdi+32], r9\n        mov     rax, r10\n        END_PROC   reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/mul_basecase.asm",
    "content": ";  AMD64 mpn_mul_basecase optimised for Intel Haswell.\n\n;  Contributed to the GNU project by Torbjörn Granlund.\n\n;  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40]\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rbx, rbp, rsi, rdi, r12, r13, r14, r15 \n\n%define rp       rdi\n%define up       rsi\n%define un_param rdx\n%define vp       rcx\n\n%define un       rbx\n%define un8      bl\n\n%define w0       r10\n%define w1       r11\n%define w2       r12\n%define w3       r13\n%define n        rbp\n\nBITS    64\n\nalign 16\n\nFRAME_PROC mpn_mul_basecase, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n    mov     r8, [rsp+stack_use+40]\n\t\n    mov \tun, rdx\n\tneg \tun\n\tmov \tn, rdx\n\tsar \tn, 2\n\n\ttest \tr8b, 1\n\tjz \t.Ldo_mul_2\n\tmov \trdx, [vp]\n\n.Ldo_mul_1:\n\ttest \tun8, 1\n\tjnz \t.Lm1x1\n\n.Lm1x0:\n\ttest \tun8, 2\n\tjnz \t.Lm110\n\n.Lm100:\n\tmulx \tw2, r14, [up]\n\tmulx \tw3, w1, [up+8]\n\tlea \trp, [rp-24]\n\tjmp \t.Lm1l0\n\n.Lm110:\n\tmulx \tr9, w3, [up]\n\tmulx \tr14, w1, [up+8]\n\tlea \trp, [rp-8]\n\ttest \tn, n\n\tjz \t.Lcj2\n\tmulx \tw2, w0, [up+16]\n\tlea \tup, [up+16]\n\tjmp \t.Lm1l2\n\n.Lm1x1:\n\ttest \tun8, 2\n\tjz \t.Lm111\n\n.Lm101:\n\tmulx \tr14, r9, [up]\n\tlea \trp, [rp-16]\n\ttest \tn, n\n\tjz \t.Lcj1\n\tmulx \tw2, w0, [up+8]\n\tlea \tup, [up+8]\n\tjmp \t.Lm1l1\n\n.Lm111:\n\tmulx \tw3, w2, [up]\n\tmulx \tr9, w0, [up+8]\n\tmulx \tr14, w1, [up+16]\n\tlea \tup, [up+24]\n\ttest \tn, n\n\tjnz \t.Lgt3\n\tadd \tw3, w0\n\tjmp \t.Lcj3\n\n.Lgt3:\n\tadd \tw3, w0\n\tjmp \t.Lm1l3\n\n\n\talign 32\n.Lm1tp:\n\tlea \trp, [rp+32]\n.Lm1l3:\n\tmov \t[rp], w2\n\tmulx \tw2, w0, [up]\n.Lm1l2:\n\tmov \t[rp+8], w3\n\tadc \tr9, w1\n.Lm1l1:\n\tadc \tr14, w0\n\tmov \t[rp+16], r9\n\tmulx \tw3, w1, [up+8]\n.Lm1l0:\n\tmov \t[rp+24], r14\n\tmulx \tr9, w0, [up+16]\n\tadc \tw2, w1\n\tmulx \tr14, w1, [up+24]\n\tadc \tw3, w0\n\tlea \tup, [up+32]\n\tdec \tn\n\tjnz \t.Lm1tp\n\n.Lm1ed:\n\tlea \trp, [rp+32]\n.Lcj3:\n\tmov \t[rp], w2\n.Lcj2:\n\tmov \t[rp+8], w3\n\tadc \tr9, w1\n.Lcj1:\n\tmov \t[rp+16], r9\n\tadc \tr14, 0\n\tmov \t[rp+24], r14\n\n\tdec \tr8d\n\tjz \t.Lret5\n\n\tlea \tvp, [vp+8]\n\tlea \trp, [rp+32]\n\tjmp \t.Ldo_addmul\n\n.Ldo_mul_2:\n\tmov \tr9, [vp]\n\tmov \tr14, [vp+8]\n\tlea \tn, [un]\n\tsar \tn, 2\n\ttest \tun8, 1\n\tjnz \t.Lm2x1\n\n.Lm2x0:\n\txor \tw0, w0\n\ttest \tun8, 2\n\tmov \trdx, [up]\n\tmulx \tw1, w2, r9\n\tjz \t.Lm2l0\n\n.Lm210:\n\tlea \trp, [rp-16]\n\tlea \tup, [up-16]\n\tjmp \t.Lm2l2\n\n.Lm2x1:\n\txor \tw2, w2\n\ttest \tun8, 2\n\tmov \trdx, [up]\n\tmulx \tw3, w0, r9\n\tjz \t.Lm211\n\n.Lm201:\n\tlea \trp, [rp-24]\n\tlea \tup, [up+8]\n\tjmp \t.Lm2l1\n\n.Lm211:\n\tlea \trp, [rp-8]\n\tlea \tup, [up-8]\n\tjmp \t.Lm2l3\n\n\n\talign 16\n.Lm2tp:\n\tmulx \tw0, rax, r14\n\tadd \tw2, rax\n\tmov \trdx, [up]\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n\tadd \tw2, rax\n\tadc \tw1, 0\n\tadd \tw2, w3\n.Lm2l0:\n\tmov \t[rp], w2\n\tadc \tw1, 0\n\tmulx \tw2, rax, r14\n\tadd \tw0, rax\n\tmov \trdx, [up+8]\n\tadc \tw2, 0\n\tmulx \tw3, rax, r9\n\tadd \tw0, rax\n\tadc \tw3, 0\n\tadd \tw0, w1\n.Lm2l3:\n\tmov \t[rp+8], w0\n\tadc \tw3, 0\n\tmulx \tw0, rax, r14\n\tadd \tw2, rax\n\tmov \trdx, [up+16]\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n\tadd \tw2, rax\n\tadc \tw1, 0\n\tadd \tw2, w3\n.Lm2l2:\n\tmov \t[rp+16], w2\n\tadc \tw1, 0\n\tmulx \tw2, rax, r14\n\tadd \tw0, rax\n\tmov \trdx, [up+24]\n\tadc \tw2, 0\n\tmulx \tw3, rax, r9\n\tadd \tw0, rax\n\tadc \tw3, 0\n\tadd \tw0, w1\n\tlea \tup, [up+32]\n.Lm2l1:\n\tmov \t[rp+24], w0\n\tadc \tw3, 0\n\tinc \tn\n\tlea \trp, [rp+32]\n\tjnz \t.Lm2tp\n\n.Lm2ed:\n\tmulx \trax, rdx, r14\n\tadd \tw2, rdx\n\tadc \trax, 0\n\tadd \tw2, w3\n\tmov \t[rp], w2\n\tadc \trax, 0\n\tmov \t[rp+8], rax\n\tadd \tr8d, -2\n\tjz \t.Lret5\n\tlea \tvp, [vp+16]\n\tlea \trp, [rp+16]\n\n.Ldo_addmul:\n\tmov     [rsp+stack_use+8], r8\n\tlea \trp, [un*8+rp]\n\tlea \tup, [un*8+up]\n\n.Louter:\n\tmov \tr9, [vp]\n\tmov \tr8, [vp+8]\n\tlea \tn, [un+2]\n\tsar \tn, 2\n\tmov \trdx, [up]\n\ttest \tun8, 1\n\tjnz \t.Lbx1\n\n.Lbx0:\n\tmov \tr14, [rp]\n\tmov \tr15, [rp+8]\n\tmulx \tw1, rax, r9\n\tadd \tr14, rax\n\tmulx \tw2, rax, r8\n\tadc \tw1, 0\n\tmov \t[rp], r14\n\tadd \tr15, rax\n\tadc \tw2, 0\n\tmov \trdx, [up+8]\n\ttest \tun8, 2\n\tjnz \t.Lb10\n\n.Lb00:\n\tlea \tup, [up+16]\n\tlea \trp, [rp+16]\n\tjmp \t.Llo0\n\n.Lb10:\n\tmov \tr14, [rp+16]\n\tlea \tup, [up+32]\n\tmulx \tw3, rax, r9\n\tjmp \t.Llo2\n\n.Lbx1:\n\tmov \tr15, [rp]\n\tmov \tr14, [rp+8]\n\tmulx \tw3, rax, r9\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmulx \tw0, rax, r8\n\tadd \tr14, rax\n\tadc \tw0, 0\n\tmov \trdx, [up+8]\n\tmov \t[rp], r15\n\tmulx \tw1, rax, r9\n\ttest \tun8, 2\n\tjz \t.Lb11\n\n.Lb01:\n\tmov \tr15, [rp+16]\n\tlea \trp, [rp+24]\n\tlea \tup, [up+24]\n\tjmp \t.Llo1\n\n.Lb11:\n\tlea \trp, [rp+8]\n\tlea \tup, [up+8]\n\tjmp \t.Llo3\n\n\n\talign 16\n.Ltop:\n\tmulx \tw3, rax, r9\n\tadd \tr15, w0\n\tadc \tw2, 0\n.Llo2:\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmulx \tw0, rax, r8\n\tadd \tr14, rax\n\tadc \tw0, 0\n\tlea \trp, [rp+32]\n\tadd \tr15, w1\n\tmov \trdx, [up-16]\n\tmov \t[rp-24], r15\n\tadc \tw3, 0\n\tadd \tr14, w2\n\tmov \tr15, [rp-8]\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n.Llo1:\n\tadd \tr14, rax\n\tmulx \tw2, rax, r8\n\tadc \tw1, 0\n\tadd \tr14, w3\n\tmov \t[rp-16], r14\n\tadc \tw1, 0\n\tadd \tr15, rax\n\tadc \tw2, 0\n\tadd \tr15, w0\n\tmov \trdx, [up-8]\n\tadc \tw2, 0\n.Llo0:\n\tmulx \tw3, rax, r9\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmov \tr14, [rp]\n\tmulx \tw0, rax, r8\n\tadd \tr14, rax\n\tadc \tw0, 0\n\tadd \tr15, w1\n\tmov \t[rp-8], r15\n\tadc \tw3, 0\n\tmov \trdx, [up]\n\tadd \tr14, w2\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n.Llo3:\n\tadd \tr14, rax\n\tadc \tw1, 0\n\tmulx \tw2, rax, r8\n\tadd \tr14, w3\n\tmov \tr15, [rp+8]\n\tmov \t[rp], r14\n\tmov \tr14, [rp+16]\n\tadc \tw1, 0\n\tadd \tr15, rax\n\tadc \tw2, 0\n\tmov \trdx, [up+8]\n\tlea \tup, [up+32]\n\tinc \tn\n\tjnz \t.Ltop\n\n.Lend:\n\tmulx \tw3, rax, r9\n\tadd \tr15, w0\n\tadc \tw2, 0\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmulx \trax, rdx, r8\n\tadd \tr15, w1\n\tmov \t[rp+8], r15\n\tadc \tw3, 0\n\tadd \trdx, w2\n\tadc \trax, 0\n\tadd \trdx, w3\n\tmov \t[rp+16], rdx\n\tadc \trax, 0\n\tmov \t[rp+24], rax\n\n\tadd \tDWORD [rsp+stack_use+8], -2\n\tlea \tvp, [vp+16]\n\tlea \tup, [up+un*8-16]\n\tlea \trp, [rp+un*8+32]\n\tjnz \t.Louter\n\n\tmov \trax, [rsp+stack_use+8]\n.Lret5:\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/nand_n.asm",
    "content": ";  AVX mpn_nand_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = not( (rsi,rcx) and (rdx,rcx) )\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.34-0.35 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define QLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_nand_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    vpcmpeqq QLimb1, QLimb1, QLimb1\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpand   QLimb0, QLimb0, [Src2P]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpand   QLimb0, QLimb0, [Src2P+32]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpand   QLimb0, QLimb0, [Src2P+64]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpand   QLimb0, QLimb0, [Src2P+96]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpand   QLimb0, QLimb0, [Src2P+64]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpand   QLimb0, QLimb0, [Src2P+32]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpand   QLimb0, QLimb0, [Src2P]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    and     Limb0, [Src2P+16]\n    not     Limb0\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    and     Limb0, [Src2P+8]\n    not     Limb0\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    and     Limb0, [Src2P]\n    not     Limb0\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/nior_n.asm",
    "content": ";  AVX mpn_nior_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = not( (rsi,rcx) or (rdx,rcx) )\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.34-0.35 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define QLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_nior_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    vpcmpeqq QLimb1, QLimb1, QLimb1\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpor    QLimb0, QLimb0, [Src2P+96]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    or      Limb0, [Src2P+16]\n    not     Limb0\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    or      Limb0, [Src2P+8]\n    not     Limb0\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    or      Limb0, [Src2P]\n    not     Limb0\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/rshift.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_rshift(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1, unsigned int Shift)\n; Linux       RAX         RDI            RSI            RDX                 RCX\n; Windows x64 RAX         RCX            RDX             R8                  R9\n;\n; Description:\n; The function shifts Op1 right by Shift bits, stores the result in Op2 (non-\n; destructive shr) and hands back the shifted-out least significant bits of\n; Op1. The function operates increasing in memory supporting in place shifts.\n;\n; Result:\n; - Op2[ Size1-1..0 ] := ( ShrIn:Op1[ Size1-1..0 ] ) >> Shift\n; - Op1[ 0 ] << ( 64-Shift )\n;\n; Caveats:\n; - caller must ensure that Shift is in [ 1..63 ]!\n; - currently Linux64 support only!\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - implemented, tested and benchmarked on 30.03.2016 by jn\n; - includes prefetching\n; ============================================================================\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2         R11\n    %define Op1         RDX\n    %define Size1       R8\n    %define Shift       RCX\n    %define Limb1       R9\n    %define Limb2       R10\n  %ifdef USE_PREFETCH\n    %define Offs        -512    ; No caller-saves regs left, use immediate\n  %endif\n    %define reg_save_list XMM, 6, 7\n%else\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Shift       RCX\n    %define Limb1       R8\n    %define Limb2       R9\n  %ifdef USE_PREFETCH\n    %define OFFS_REG 1\n    %define Offs        R10\n  %endif\n%endif\n\n%define ShrDL0      XMM2    ; Attn: this must match ShrQL0 definition\n%define ShlDL0      XMM3    ; Attn: this must match ShlQL0 definition\n%define ShrDLCnt    XMM6    ; Attn: this must match ShrQlCnt definition\n%define ShlDLCnt    XMM7    ; Attn: this must match ShlQlCnt definition\n\n%define QLimb0      YMM0\n%define QLimb1      YMM1\n%define ShrQL0      YMM2\n%define ShlQL0      YMM3\n%define ShrQL1      YMM4\n%define ShlQL1      YMM5\n%define ShrQLCnt    YMM6\n%define ShlQLCnt    YMM7\n\n    align   32\n\nFRAME_PROC mpn_rshift, 0, reg_save_list\n%ifdef USE_WIN64\n    mov     r11, rcx\n\tmov     rcx, r9\n%endif\n    xor     EAX, EAX\n    or      Size1, Size1\n    je      .Exit\n\n    mov     Limb1, [Op1]\n    shrd    RAX, Limb1, CL\n\n    sub     Size1, 1\n    je      .lShrEquPost        ; Size1=1 =>\n\n  %ifdef USE_PREFETCH\n    mov     Offs, 512\n  %endif\n\n    cmp     Size1, 8\n    jc      .lShrEquFour        ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    je      .lShrEquAlign16\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op1, 8\n    add     Op2, 8\n    sub     Size1, 1\n\n  .lShrEquAlign16:\n\n    test    Op2, 16\n    je      .lShrEquAVX\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+16]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n    sub     Size1, 2\n\n  .lShrEquAVX:\n\n    ; initialize AVX shift counter\n    vmovq   ShrDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vmovq   ShlDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vpbroadcastq ShrQLCnt, ShrDLCnt\n    vpbroadcastq ShlQLCnt, ShlDLCnt\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1]\n    vpsllvq ShlQL0, QLimb0, ShlQLCnt\n\n    add     Op1, 32\n    sub     Size1, 4\n    jmp     .lShrEquAVXCheck\n\n    ; main loop (prefetching enabled, unloaded data cache)\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.70 cycles per limb in LD2$\n    ; - 0.70-0.90 cycles per limb in LD3$\n    align   16\n  .lShrEquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1]\n    vpsrlvq   ShrQL0, QLimb0, ShrQLCnt\n    vmovdqu   QLimb0, [Op1+32]\n    vpsllvq   ShlQL1, QLimb1, ShlQLCnt\n    vpblendd  ShlQL0, ShlQL0, ShlQL1, 00000011b\n    vpermq    ShlQL0, ShlQL0, 00111001b\n    vpor      ShrQL0, ShrQL0, ShlQL0\n    vpsrlvq   ShrQL1, QLimb1, ShrQLCnt\n    vpsllvq   ShlQL0, QLimb0, ShlQLCnt\n    vpblendd  ShlQL1, ShlQL1, ShlQL0, 00000011b\n    vpermq    ShlQL1, ShlQL1, 00111001b\n    vmovdqa   [Op2], ShrQL0\n    vpor      ShrQL1, ShrQL1, ShlQL1\n    vmovdqa   [Op2+32], ShrQL1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .lShrEquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShrEquAVXLoop\n\n    mov     Limb1, [Op1]\n    xor     Limb2, Limb2\n    shrd    Limb2, Limb1, CL\n%if 1\n    vmovq   ShrDL0, Limb2\n    vpblendd ShlQL0, ShlQL0, ShrQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n    pinsrq  ShlDL0, Limb2, 0            ; SSE4.1\n%endif\n    vpsrlvq ShrQL0, QLimb0, ShrQLCnt\n    vpermq  ShlQL0, ShlQL0, 00111001b\n    vpor    ShrQL0, ShrQL0, ShlQL0\n    vmovdqa [Op2], ShrQL0\n\n    add     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHRD mnemonic\n  .lShrEquFour:\n\n    add     Op1, 8\n    test    Size1, 4\n    je      .lShrEquTwo\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+8], Limb2\n    mov     Limb2, [Op1+16]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2+16], Limb1\n    mov     Limb1, [Op1+24]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+24], Limb2\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .lShrEquTwo:\n\n    test    Size1, 2\n    je      .lShrEquOne\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lShrEquOne:\n\n    test    Size1, 1\n    je      .lShrEquPost\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op2, 8\n\n    ; store most significant limb considering shift-in part\n  .lShrEquPost:\n\n    shr     Limb1, CL\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    vzeroupper\nEND_PROC reg_save_list\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/rshift1.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_rshift1(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1 )\n; Linux     RAX         RDI         RSI            RDX\n; Win7      RAX         RCX         RDX            R8\n;\n; Description:\n; The function shifts Op1 right by one bit, stores the result in Op2 (non-\n; destructive shr) and hands back the shifted-out least significant bit of Op1.\n; The function operates increasing in memory supporting in place shifts.\n;\n; Caveats:\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - AVX based version implemented, tested & benched on 21.02.2016 by jn\n; - includes cache prefetching\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n\n    %define Op2         RCX\n    %define Op1         RDX\n    %define Size1       R8\n    %define Limb1       R9\n    %define Limb2       R10\n    %define Offs        512     ; used direct def. to stay in Win scratch regs\n\n    %define ShrDL0      XMM2    ; Attn: this must match ShrQL0 definition\n    %define ShlDL0      XMM3    ; Attn: this must match ShlQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShrQL0      YMM2\n    %define ShlQL0      YMM3\n    %define ShrQL1      YMM4\n    %define ShlQL1      YMM5\n\n%else\n\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Limb1       R8\n    %define Limb2       R9\n    %define Offs        512     ; used direct def. to stay in Win scratch regs\n\n    %define ShrDL0      XMM2    ; Attn: this must match ShrQL0 definition\n    %define ShlDL0      XMM3    ; Attn: this must match ShlQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShrQL0      YMM2\n    %define ShlQL0      YMM3\n    %define ShrQL1      YMM4\n    %define ShlQL1      YMM5\n\n%endif\n\n    align   32\n\nLEAF_PROC mpn_rshift1\n\n    xor     EAX, EAX\n    or      Size1, Size1\n    je      .Exit\n\n    mov     RAX, [Op1]\n    mov     Limb1, RAX\n    shl     RAX, 63\n\n    sub     Size1, 1\n    je      .lShr1EquPost       ; Size1=1 =>\n\n    cmp     Size1, 8\n    jc      .lShr1EquFour       ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    je      .lShr1EquAlign16\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op1, 8\n    add     Op2, 8\n    sub     Size1, 1\n\n  .lShr1EquAlign16:\n\n    test    Op2, 16\n    je      .lShr1EquAVX\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+16]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n    sub     Size1, 2\n\n  .lShr1EquAVX:\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1]\n    vpsllq  ShlQL0, QLimb0, 63\n\n    add     Op1, 32\n    sub     Size1, 4\n    jmp     .lShr1EquAVXCheck\n\n    ; main loop (prefetching enabled, unloaded data cache)\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.75 cycles per limb in LD2$\n    ; - 0.75-1.00 cycles per limb in LD3$\n    align   16\n  .lShr1EquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1]\n    vpsrlq    ShrQL0, QLimb0, 1\n    vmovdqu   QLimb0, [Op1+32]\n    vpsllq    ShlQL1, QLimb1, 63\n    vpblendd  ShlQL0, ShlQL0, ShlQL1, 00000011b\n    vpermq    ShlQL0, ShlQL0, 00111001b\n    vpor      ShrQL0, ShrQL0, ShlQL0\n    vpsrlq    ShrQL1, QLimb1, 1\n    vpsllq    ShlQL0, QLimb0, 63\n    vpblendd  ShlQL1, ShlQL1, ShlQL0, 00000011b\n    vpermq    ShlQL1, ShlQL1, 00111001b\n    vmovdqa   [Op2], ShrQL0\n    vpor      ShrQL1, ShrQL1, ShlQL1\n    vmovdqa   [Op2+32], ShrQL1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .lShr1EquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShr1EquAVXLoop\n\n    mov     Limb2, [Op1]\n    mov     Limb1, Limb2\n    shl     Limb2, 63\n%if 1\n    vmovq ShrDL0, Limb2\n    vpblendd ShlQL0, ShlQL0, ShrQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n    pinsrq  ShlDL0, Limb2, 0            ; SSE4.1\n%endif\n    vpsrlq  ShrQL0, QLimb0, 1\n    vpermq  ShlQL0, ShlQL0, 00111001b\n    vpor    ShrQL0, ShrQL0, ShlQL0\n    vmovdqa [Op2], ShrQL0\n\n    add     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHRD mnemonic\n  .lShr1EquFour:\n\n    add     Op1, 8\n    test    Size1, 4\n    je      .lShr1EquTwo\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+8], Limb2\n    mov     Limb2, [Op1+16]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2+16], Limb1\n    mov     Limb1, [Op1+24]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+24], Limb2\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .lShr1EquTwo:\n\n    test    Size1, 2\n    je      .lShr1EquOne\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lShr1EquOne:\n\n    test    Size1, 1\n    je      .lShr1EquPost\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op2, 8\n\n  .lShr1EquPost:\n\n    shr     Limb1, 1\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/sqr_basecase.asm",
    "content": "\n; void mpn_sqr_basecase(mp_ptr, mp_srcptr, mp_size_t)\n; Linux                    rdi        rsi        rdx\n; Win64                    rcx        rdx         r8\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14\n    \n    text\n\txalign  32\n\tLEAF_PROC mpn_sqr_basecase \n\tcmp     r8, 2\n\tjae     .0\n\tmov     rdx, [rdx]\n\tmulx    rdx, rax, rdx\n\tmov     [rcx], rax\n\tmov     [rcx+8], rdx\n\tret\n.0:\tjne     .1\n\tmov     r11, [rdx+8]\n\tmov     rdx, [rdx]\n\tmulx    r10, r9, r11\n\tmulx    r8, rax, rdx\n\tmov     rdx, r11\n\tmulx    rdx, r11, rdx\n\tadd     r9, r9\n\tadc     r10, r10\n\tadc     rdx, 0\n\tadd     r8, r9\n\tadc     r10, r11\n\tadc     rdx, 0\n\tmov     [rcx], rax\n\tmov     [rcx+8], r8\n\tmov     [rcx+16], r10\n\tmov     [rcx+24], rdx\n\tret\n\n\txalign  32\n.1:\tFRAME_PROC mpn_sqr_basec1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8  \n    cmp     rdx, 4\n\tjae     .2\n\tmov     r8, [rsi]\n\tmov     rdx, [rsi+8]\n\tmov     r9, rdx\n\tmulx    rax, r11, r8\n\tmov     rdx, [rsi+16]\n\tmulx    rcx, r10, r8\n\tmov     r8, r11\n\tadd     r10, rax\n\tadc     rcx, 0\n\tmulx    rax, rdx, r9\n\tadd     rdx, rcx\n\tmov     [rdi+24], rdx\n\tadc     rax, 0\n\tmov     [rdi+32], rax\n\txor     rcx, rcx\n\tmov     rdx, [rsi]\n\tmulx    r11, rax, rdx\n\tmov     [rdi], rax\n\tadd     r8, r8\n\tadc     r10, r10\n\tsetc    cl\n\tmov     rdx, [rsi+8]\n\tmulx    rdx, rax, rdx\n\tadd     r8, r11\n\tadc     r10, rax\n\tmov     [rdi+8], r8\n\tmov     [rdi+16], r10\n\tmov     r8, [rdi+24]\n\tmov     r10, [rdi+32]\n\tlea     r11, [rdx+rcx]\n\tadc     r8, r8\n\tadc     r10, r10\n\tsetc    cl\n\tmov     rdx, [rsi+16]\n\tmulx    rdx, rax, rdx\n\tadd     r8, r11\n\tadc     r10, rax\n\tmov     [rdi+24], r8\n\tmov     [rdi+32], r10\n\tadc     rdx, rcx\n\tmov     [rdi+40], rdx\n\tEXIT_PROC reg_save_list\n.2:\n.3:\tmov     r12, 0\n\tsub     r12, rdx\n\tmov     [rsp+stack_use+8], r12\n\tmov     r8, [rsi]\n\tmov     rdx, [rsi+8]\n\tlea     rcx, [r12+2]\n\tsar     rcx, 2\n\tinc     r12\n\tmov     r9, rdx\n\ttest    r12b, 1\n\tjnz     .7\n.4:\tmulx    r11, rbx, r8\n\tmov     rdx, [rsi+16]\n\tmov     [rdi+8], rbx\n\txor     rbx, rbx\n\tmulx    rbp, r10, r8\n\ttest    r12b, 2\n\tjz      .6\n.5:\tlea     rdi, [rdi-8]\n\tlea     rsi, [rsi-8]\n\tjmp     .13\n.6:\tlea     rsi, [rsi+8]\n\tlea     rdi, [rdi+8]\n\tjmp     .11\n.7:\tmulx    rbp, r10, r8\n\tmov     rdx, [rsi+16]\n\tmov     [rdi+8], r10\n\txor     r10, r10\n\tmulx    r11, rbx, r8\n\ttest    r12b, 2\n\tjz      .12\n.8:\tlea     rdi, [rdi+16]\n\tlea     rsi, [rsi+16]\n\tjmp     .10\n\txalign  32\n.9:\tmulx    r10, rax, r9\n\tadd     rbx, rax\n\tmov     rdx, [rsi]\n\tmulx    r11, rax, r8\n\tadc     r10, 0\n\tadd     rbx, rax\n.10:adc     r11, 0\n\tadd     rbx, rbp\n\tmov     [rdi], rbx\n\tadc     r11, 0\n\tmulx    rbx, rax, r9\n\tadd     r10, rax\n\tmov     rdx, [rsi+8]\n\tadc     rbx, 0\n\tmulx    rbp, rax, r8\n\tadd     r10, rax\n\tadc     rbp, 0\n.11:add     r10, r11\n\tmov     [rdi+8], r10\n\tadc     rbp, 0\n\tmulx    r10, rax, r9\n\tadd     rbx, rax\n\tmov     rdx, [rsi+16]\n\tmulx    r11, rax, r8\n\tadc     r10, 0\n\tadd     rbx, rax\n\tadc     r11, 0\n.12:add     rbx, rbp\n\tmov     [rdi+16], rbx\n\tadc     r11, 0\n\tmulx    rbx, rax, r9\n\tadd     r10, rax\n\tmov     rdx, [rsi+24]\n\tadc     rbx, 0\n\tmulx    rbp, rax, r8\n\tadd     r10, rax\n\tadc     rbp, 0\n.13:add     r10, r11\n\tlea     rsi, [rsi+32]\n\tmov     [rdi+24], r10\n\tadc     rbp, 0\n\tinc     rcx\n\tlea     rdi, [rdi+32]\n\tjnz     .9\n.14:mulx    rax, rdx, r9\n\tadd     rbx, rdx\n\tadc     rax, 0\n\tadd     rbx, rbp\n\tmov     [rdi], rbx\n\tadc     rax, 0\n\tmov     [rdi+8], rax\n\tlea     rsi, [rsi+16]\n\tlea     rdi, [rdi-16]\n.15:\n.16:\n\tlea     rsi, [rsi+r12*8]\n\tlea     rdi, [rdi+r12*8+48]\n\tmov     r8, [rsi-8]\n\tadd     r12, 2\n\tcmp     r12, -2\n\tjge     .30\n\tmov     r9, [rsi]\n\tlea     rcx, [r12+1]\n\tsar     rcx, 2\n\tmov     rdx, r9\n\ttest    r12b, 1\n\tjnz     .20\n.17:mov     r13, [rdi]\n\tmov     r14, [rdi+8]\n\tmulx    r11, rax, r8\n\tadd     r13, rax\n\tadc     r11, 0\n\tmov     [rdi], r13\n\txor     rbx, rbx\n\ttest    r12b, 2\n\tjnz     .19\n.18:mov     rdx, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tlea     rsi, [rsi+16]\n\tjmp     .26\n.19:mov     rdx, [rsi+8]\n\tmov     r13, [rdi+16]\n\tlea     rsi, [rsi+32]\n\tinc     rcx\n\tmulx    rbp, rax, r8\n\tjz      .29\n\tjmp     .24\n.20:mov     r14, [rdi]\n\tmov     r13, [rdi+8]\n\tmulx    rbp, rax, r8\n\tmov     rdx, [rsi+8]\n\tadd     r14, rax\n\tadc     rbp, 0\n\txor     r10, r10\n\tmov     [rdi], r14\n\tmulx    r11, rax, r8\n\ttest    r12b, 2\n\tjz      .22\n.21:mov     r14, [rdi+16]\n\tlea     rdi, [rdi+24]\n\tlea     rsi, [rsi+24]\n\tjmp     .25\n.22:lea     rdi, [rdi+8]\n\tlea     rsi, [rsi+8]\n\tjmp     .27\n\txalign  32\n.23:mulx    rbp, rax, r8\n\tadd     r14, r10\n\tadc     rbx, 0\n.24:add     r14, rax\n\tadc     rbp, 0\n\tmulx    r10, rax, r9\n\tadd     r13, rax\n\tadc     r10, 0\n\tlea     rdi, [rdi+32]\n\tadd     r14, r11\n\tmov     rdx, [rsi-16]\n\tmov     [rdi-24], r14\n\tadc     rbp, 0\n\tadd     r13, rbx\n\tmov     r14, [rdi-8]\n\tmulx    r11, rax, r8\n\tadc     r10, 0\n.25:add     r13, rax\n\tmulx    rbx, rax, r9\n\tadc     r11, 0\n\tadd     r13, rbp\n\tmov     [rdi-16], r13\n\tadc     r11, 0\n\tadd     r14, rax\n\tadc     rbx, 0\n\tadd     r14, r10\n\tmov     rdx, [rsi-8]\n\tadc     rbx, 0\n.26:mulx    rbp, rax, r8\n\tadd     r14, rax\n\tadc     rbp, 0\n\tmov     r13, [rdi]\n\tmulx    r10, rax, r9\n\tadd     r13, rax\n\tadc     r10, 0\n\tadd     r14, r11\n\tmov     [rdi-8], r14\n\tadc     rbp, 0\n\tmov     rdx, [rsi]\n\tadd     r13, rbx\n\tmulx    r11, rax, r8\n\tadc     r10, 0\n.27:add     r13, rax\n\tadc     r11, 0\n\tmulx    rbx, rax, r9\n\tadd     r13, rbp\n\tmov     r14, [rdi+8]\n\tmov     [rdi], r13\n\tmov     r13, [rdi+16]\n\tadc     r11, 0\n\tadd     r14, rax\n\tadc     rbx, 0\n\tmov     rdx, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tinc     rcx\n\tjnz     .23\n.28:mulx    rbp, rax, r8\n\tadd     r14, r10\n\tadc     rbx, 0\n.29:add     r14, rax\n\tadc     rbp, 0\n\tmulx    rax, rdx, r9\n\tadd     r14, r11\n\tmov     [rdi+8], r14\n\tadc     rbp, 0\n\tadd     rdx, rbx\n\tadc     rax, 0\n\tadd     rbp, rdx\n\tmov     [rdi+16], rbp\n\tadc     rax, 0\n\tmov     [rdi+24], rax\n\tjmp     .16\n.30:mov     r12, [rsp+stack_use+8]\n\tmov     rdx, [rsi]\n\tjg      .31\n\tmov     r9, rdx\n\tmov     r13, [rdi]\n\tmov     r14, rax\n\tmulx    r11, rax, r8\n\tadd     r13, rax\n\tadc     r11, 0\n\tmov     [rdi], r13\n\tmov     rdx, [rsi+8]\n\tmulx    rbp, rax, r8\n\tadd     r14, rax\n\tadc     rbp, 0\n\tmulx    rax, rdx, r9\n\tadd     r14, r11\n\tmov     [rdi+8], r14\n\tadc     rbp, 0\n\tadd     rdx, rbp\n\tmov     [rdi+16], rdx\n\tadc     rax, 0\n\tmov     [rdi+24], rax\n\tlea     rdi, [rdi+32]\n\tlea     rsi, [rsi+16]\n\tjmp     .32\n.31:mulx    rbp, r14, r8\n\tadd     r14, rax\n\tadc     rbp, 0\n\tmov     [rdi], r14\n\tmov     [rdi+8], rbp\n\tlea     rdi, [rdi+16]\n\tlea     rsi, [rsi+8]\n.32:\n.33:lea     rsi, [rsi+r12*8+8]\n\tlea     rdi, [rdi+r12*8]\n\tlea     rdi, [rdi+r12*8]\n\tinc     r12\n\tmov     rdx, [rsi-8]\n\txor     rbx, rbx\n\tmulx    r10, rax, rdx\n\tmov     [rdi+8], rax\n\tmov     r8, [rdi+16]\n\tmov     r9, [rdi+24]\n\tjmp     .35\n\txalign  16\n.34:mov     r8, [rdi+32]\n\tmov     r9, [rdi+40]\n\tlea     rdi, [rdi+16]\n\tlea     r10, [rdx+rbx]\n.35:adc     r8, r8\n\tadc     r9, r9\n\tsetc    bl\n\tmov     rdx, [rsi]\n\tlea     rsi, [rsi+8]\n\tmulx    rdx, rax, rdx\n\tadd     r8, r10\n\tadc     r9, rax\n\tmov     [rdi+16], r8\n\tmov     [rdi+24], r9\n\tinc     r12\n\tjnz     .34\n.36:adc     rdx, rbx\n\tmov     [rdi+32], rdx\n\tEND_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/subadd_n.asm",
    "content": ";  AMD64 mpn_subadd_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,r8) = (rsi,r8)-(rdx,r8)-(rcx,r8)\n;   rax = summed borrow in range [ 0..2 ]\n\n; the main loop has been enhanced with the MPIR SuperOptimizer\n; the gain was roughly 4% execution speed for operands in LD1$\n;\n;  mp_limb_t mpn_subadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n; cycles per limb with all operands in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         1.6-1.7   1.7-1.85\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rsi, rdi, r12\n; definition according to Linux 64 bit ABI\n\n%define ResP    rdi\n%define Src1P   rsi\n%define Src2P   rdx\n%define Src3P   rcx\n%define Size    r8\n\n%define Spills  eax\n%define Carry   al\n%define Borrow  ah\n\n%define Limb0   r9\n%define Limb1   r10\n%define Limb2   r11\n%define Limb3   r12\n\n    align   32\n    BITS    64\n\n    FRAME_PROC mpn_subadd_n, 0, reg_save_list\n\tmov\t\trdi, rcx\n\tmov\t\trsi, rdx\n\tmov\t\trdx, r8\n \tmov\t\trcx, r9\n\tmov\t\tr8, [rsp+stack_use+40]\n\n    sub     Src3P, 32\n    sub     ResP, 32\n\n    xor     Spills, Spills\n\n    jmp     .Check\n\n    align   16\n  .Loop:\n\n    ; do not delete!\n    ; this seemingly unreasoned AVX instruction optimizes the allocation of\n    ; read/write operations to ports 2, 3 & 7 (write allways ending up\n    ; on port 7) which allows a sustained 2r1w execution per cycle\n    vpor    ymm0, ymm0, ymm0\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P]\n    mov     Limb1, [Src1P+8]\n    mov     Limb2, [Src1P+16]\n    mov     Limb3, [Src1P+24]\n    lea     Src3P, [Src3P+32]\n    lea     ResP, [ResP+32]\n    sbb     Limb0, [Src2P]\n    sbb     Limb1, [Src2P+8]\n    sbb     Limb2, [Src2P+16]\n    sbb     Limb3, [Src2P+24]\n    setc    Carry\n\n    lea     Src2P, [Src2P+32]\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P]\n    sbb     Limb1, [Src3P+8]\n    lea     Src1P, [Src1P+32]\n    mov     [ResP], Limb0\n    sbb     Limb2, [Src3P+16]\n    mov     [ResP+8], Limb1\n    mov     [ResP+16], Limb2\n    sbb     Limb3, [Src3P+24]\n    setc    Borrow\n    mov     [ResP+24], Limb3\n\n    ; label @ $a (mod $10) seems ok from benchmark figures\n  .Check:\n\n    sub     Size, 4\n    jnc     .Loop\n\n  .Post:\n\n    add     Src3P, 32\n    add     ResP, 32\n\n    add     Size, 4\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P]\n    sbb     Limb0, [Src2P]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P]\n    setc    Borrow\n    mov     [ResP], Limb0\n    dec     Size\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P+8]\n    sbb     Limb0, [Src2P+8]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P+8]\n    setc    Borrow\n    mov     [ResP+8], Limb0\n    dec     Size\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P+16]\n    sbb     Limb0, [Src2P+16]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P+16]\n    mov     [ResP+16], Limb0\n    setc    Borrow\n\n    ; label @ $2 (mod $10) is ok\n.Exit:\n\n    add     Carry, Borrow\n    movsx   rax, Carry\n\tEND_PROC reg_save_list\n\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/submul_1.asm",
    "content": ";  AMD64 mpn_submul_1 optimised for Intel Haswell.\n\n;  Contributed to the GNU project by Torbjörn Granlund.\n;  Converted to MPIR by Alexander Kruppa.\n\n;  Copyright 2013 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;  mp_limb_t mpn_submul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8        r9d\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%define reg_save_list rbx, rbp, rsi, rdi, r12, r13\n%define RP      rdi\n%define S1P     rsi\n%define Size    rbp\n%define Sizeb   bpl\n%define Limb    rcx\n\n%define Tmp0    r12\n%define Tmp1    r13\n%define Tmp2    rax\n%define Tmp3    rbx\n%define Tmp4    r8\n%define Tmp5    r9\n%define Tmp6    r10\n%define Tmp7    r11\n%define Tmp8    rcx\n\n%define ADDSUB sub\n%define ADCSBB sbb\n\nalign 16\n\nFRAME_PROC mpn_submul_1, 0, reg_save_list\n    mov     rdi, rcx \n    mov     rsi, rdx\n\tmov \trbp, r8 ; mulx requires one input in rdx\n\tmov \trdx, r9\n\n\ttest \tSizeb, 1\n\tjnz \t.Lbx1\n\n.Lbx0:  shr \tSize, 2\n\tjc \t.Lb10 ;ajs:notshortform\n\n.Lb00:\tmulx \tTmp0, Tmp1, [S1P]\n\tmulx \tTmp2, Tmp3, [S1P+8]\n\tadd \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp0, [RP]\n\tmov \tTmp8, [RP+8]\n\tmulx \tTmp4, Tmp5, [S1P+16]\n\tlea \tRP, [RP-16]\n\tlea \tS1P, [S1P+16]\n\tADDSUB \tTmp0, Tmp1\n\tjmp \t.Llo0 ;ajs:notshortform\n\n.Lbx1:\tshr \tSize, 2\n\tjc \t.Lb11\n\n.Lb01:\tmulx \tTmp6, Tmp7, [S1P]\n\tjnz \t.Lgt1\n.Ln1:\tADDSUB \t[RP], Tmp7\n\tmov \teax, 0\n\tadc \tTmp2, Tmp6\n\tjmp \t.Lret ;ajs:notshortform\n\n.Lgt1:\tmulx \tTmp0, Tmp1, [S1P+8]\n\tmulx \tTmp2, Tmp3, [S1P+16]\n\tlea \tS1P, [S1P+24]\n\tadd \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP]\n\tmov \tTmp0, [RP+8]\n\tmov \tTmp8, [RP+16]\n\tlea \tRP, [RP-8]\n\tADDSUB \tTmp6, Tmp7\n\tjmp \t.Llo1\n\n.Lb11:\tmulx \tTmp2, Tmp3, [S1P]\n\tmov \tTmp8, [RP]\n\tmulx \tTmp4, Tmp5, [S1P+8]\n\tlea \tS1P, [S1P+8]\n\tlea \tRP, [RP-24]\n\tinc \tSize\t\n\tADDSUB \tTmp8, Tmp3\n\tjmp \t.Llo3\n\n.Lb10:\tmulx \tTmp4, Tmp5, [S1P]\n\tmulx \tTmp6, Tmp7, [S1P+8]\n\tlea \tRP, [RP-32]\n\tmov \teax, 0\n\tclc\n\tjz \t.Lend ;ajs:notshortform\t\n\n\talign 16\n.Ltop:\tadc \tTmp5, Tmp2\n\tlea \tRP, [RP+32]\n\tadc \tTmp7, Tmp4\n\tmulx \tTmp0, Tmp1, [S1P+16]\n\tmov \tTmp4, [RP]\n\tmulx \tTmp2, Tmp3, [S1P+24]\n\tlea \tS1P, [S1P+32]\n\tadc \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+8]\n\tmov \tTmp0, [RP+16]\n\tADDSUB \tTmp4, Tmp5\n\tmov \tTmp8, [RP+24]\n\tmov \t[RP], Tmp4\n\tADCSBB \tTmp6, Tmp7\n.Llo1:\tmulx \tTmp4, Tmp5, [S1P]\n\tmov \t[RP+8], Tmp6\n\tADCSBB \tTmp0, Tmp1\n.Llo0:\tmov \t[RP+16], Tmp0\n\tADCSBB \tTmp8, Tmp3\n.Llo3:\tmulx \tTmp6, Tmp7, [S1P+8]\n\tmov \t[RP+24], Tmp8\n\tdec \tSize\n\tjnz \t.Ltop\n\n.Lend:\tadc \tTmp5, Tmp2\n\tadc \tTmp7, Tmp4\n\tmov \tTmp4, [RP+32]\n\tmov \tTmp2, Tmp6\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+40]\n\tADDSUB \tTmp4, Tmp5\n\tmov \t[RP+32], Tmp4\n\tADCSBB \tTmp6, Tmp7\n\tmov \t[RP+40], Tmp6\n\tadc \tTmp2, 0\n\n.Lret:\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/xnor_n.asm",
    "content": ";  AVX mpn_xnor_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = not(rsi,rcx) xor (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define\tQLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_xnor_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    vpcmpeqq QLimb1, QLimb1, QLimb1\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vpxor   QLimb0, QLimb1, [Src1P]\n    vpxor   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+32]\n    vpxor   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+64]\n    vpxor   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+96]\n    vpxor   QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P+64]\n    vpxor   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P+32]\n    vpxor   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P]\n    vpxor   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    not     Limb0\n    xor     Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    not     Limb0\n    xor     Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    not     Limb0\n    xor     Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/haswell/avx/xor_n.asm",
    "content": ";  AVX mpn_xor_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = (rsi,rcx) xor (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_xor_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpxor   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpxor   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpxor   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpxor   QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpxor   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpxor   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpxor   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    xor     Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    xor     Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    xor     Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/haswell/com_n.asm",
    "content": "\n;  AMD64 mpn_com_n\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%define USE_WIN64\n\nBITS 64\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n    %define Op2     RCX\n    %define Op1     RDX\n    %define Size1   R8\n    %define Limb    R9\n    %define Offs    R10\n    %define FFFF    R11\n%else\n    %define Op2     RDI\n    %define Op1     RSI\n    %define Size1   RDX\n    %define Limb    RCX\n    %define Offs    R10\n    %define FFFF    R8\n%endif\n\n%define DLimb0  XMM0\n%define QLimb0  YMM0\n%define QLimb1  YMM1\n%define QLimb2  YMM2\n%define QLimb3  YMM3\n%define QFFFF   YMM4\n%define DFFFF   XMM4\n\n    align   32\n\nLEAF_PROC mpn_com_n\n    mov     RAX, Size1\n    or      RAX, RAX\n    je      .Exit      ;ajs:notshortform\n                       ; size=0 =>\n\n    ; Set a GPR to 0xFF...FF\n    mov     FFFF, -1\n\n    ; align the destination (Op2) to 32 byte\n    test    Op2, 8\n    je      .A32\n\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit      ;ajs:notshortform\n\n    add     Op1, 8\n    add     Op2, 8\n\n  .A32:\n\n    test    Op2, 16\n    je      .AVX\n\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit       ;ajs:notshortform\n\n    mov     Limb, [Op1+8]\n    xor     Limb, FFFF\n    mov     [Op2+8], Limb\n    dec     Size1\n    je      .Exit       ;ajs:notshortform\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .AVX:\n\n    ; Set an AVX2 reg to 0xFF...FF\n    movq    DFFFF, FFFF\n    vbroadcastsd QFFFF, DFFFF\n\n    mov     Offs, 128\n    jmp     .AVXCheck\n\n    ; main loop (prefetching disabled; unloaded cache)\n    ; - lCpyInc is slightly slower than lCpyDec through all cache levels?!\n    ; - 0.30      cycles / limb in L1$\n    ; - 0.60      cycles / limb in L2$\n    ; - 0.70-0.90 cycles / limb in L3$\n    align   16\n  .AVXLoop:\n\n    vmovdqu QLimb0, [Op1]\n    vpxor   QLimb0, QLimb0, QFFFF\n    vmovdqu QLimb1, [Op1+32]\n    vpxor   QLimb1, QLimb1, QFFFF\n    vmovdqu QLimb2, [Op1+64]\n    vpxor   QLimb2, QLimb2, QFFFF\n    vmovdqu QLimb3, [Op1+96]\n    vpxor   QLimb3, QLimb3, QFFFF\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n    vmovdqa [Op2+64], QLimb2\n    vmovdqa [Op2+96], QLimb3\n\n    add     Op1, Offs\n    add     Op2, Offs\n\n  .AVXCheck:\n\n    sub     Size1, 16\n    jnc     .AVXLoop\n\n    add     Size1, 16\n    je      .Exit ;ajs:notshortform\n                  ; AVX copied operand fully =>\n\n    ; copy remaining max. 15 limb\n    test    Size1, 8\n    je      .Four\n\n    vmovdqu QLimb0, [Op1]\n    vpxor   QLimb0, QLimb0, QFFFF\n    vmovdqu QLimb1, [Op1+32]\n    vpxor   QLimb1, QLimb1, QFFFF\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .Four:\n\n    test    Size1, 4\n    je      .Two\n\n    vmovdqu QLimb0, [Op1]\n    vpxor   QLimb0, QLimb0, QFFFF\n    vmovdqa [Op2], QLimb0\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .Two:\n\n    test    Size1, 2\n    je      .One\n\n%if 1\n    ; Avoid SSE2 instruction due to stall on Haswell\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n    mov     Limb, [Op1+8]\n    xor     Limb, FFFF\n    mov     [Op2+8], Limb\n%else\n    movdqu  DLimb0, [Op1]\n    pxor    DLimb0, DLimb0, DFFFF\n    movdqa  [Op2], DLimb0\n%endif\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .One:\n\n    test    Size1, 1\n    je      .Exit\n\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/haswell/copyd.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n\n; mpn_copyd(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1)\n; Linux     RDI         RSI            RDX\n; Win7      RCX         RDX            R8\n;\n; Description:\n; The function copies a given number of limb from source to destination (while\n; moving high to low in memory) and hands back the size (in limb) of the\n; destination.\n;\n; Result:\n; - Op2[ 0..size-1 ] = Op1[ 0..size-1 ]\n; - number of copied limb: range [ 0..max tCounter ]\n;\n; Caveats:\n; - if size 0 is given the content of the destination will remain untouched!\n; - if Op1=Op2 no copy is done!\n;\n; Comments:\n; - AVX-based version implemented, tested & benched on 05.01.2016 by jn\n; - did some experiments with AVX based version with following results\n;   - AVX can be faster in L1$-L3$ if destination is aligned on 32 byte\n;   - AVX is generally faster on small sized operands (<=100 limb) due too\n;     start-up overhead of \"rep movsq\" - however this could also be achieved by\n;     simple copy loop\n;   - startup overhead of \"rep movsq\" with negative direction is 200 cycles!!!\n;   - negative direction is unfavourable compared to positive \"rep movsq\" and\n;     to AVX.\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2     RCX\n    %define Op1     RDX\n    %define Size1   R8\n    %define Limb    R9\n    %define Offs    R10\n%else\n    %define Op2     RDI\n    %define Op1     RSI\n    %define Size1   RDX\n    %define Limb    RCX\n    %define Offs    R10\n%endif\n\n%define DLimb0  XMM0\n%define QLimb0  YMM0\n%define QLimb1  YMM1\n%define QLimb2  YMM2\n%define QLimb3  YMM3\n\n    align   32\n\nLEAF_PROC   mpn_copyd\n    mov     RAX, Size1\n    cmp     Op1, Op2\n    je      .Exit               ; no copy required =>\n\n    or      RAX, RAX\n    je      .Exit               ; Size=0 =>\n\n    lea     Op1, [Op1+8*Size1-8]\n    lea     Op2, [Op2+8*Size1-8]\n\n    ; align the destination (Op2) to 32 byte\n    test    Op2, 8\n    jne     .lCpyDecA32\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    sub     Op1, 8\n    sub     Op2, 8\n\n  .lCpyDecA32:\n\n    test    Op2, 16\n    jnz     .lCpyDecAVX\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    mov     Limb, [Op1-8]\n    mov     [Op2-8], Limb\n    dec     Size1\n    je      .Exit\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lCpyDecAVX:\n\n    mov     Offs, 128\n    jmp     .lCpyDecAVXCheck\n\n    ; main loop (prefetching disabled; unloaded cache)\n    ; - 0.30      cycles / limb in L1$\n    ; - 0.60      cycles / limb in L2$\n    ; - 0.70-0.90 cycles / limb in L3$\n    align   16\n  .lCpyDecAVXLoop:\n\n    vmovdqu QLimb0, [Op1-24]\n    vmovdqu QLimb1, [Op1-56]\n    vmovdqu QLimb2, [Op1-88]\n    vmovdqu QLimb3, [Op1-120]\n    vmovdqa [Op2-24], QLimb0\n    vmovdqa [Op2-56], QLimb1\n    vmovdqa [Op2-88], QLimb2\n    vmovdqa [Op2-120], QLimb3\n\n    sub     Op1, Offs\n    sub     Op2, Offs\n\n  .lCpyDecAVXCheck:\n\n    sub     Size1, 16\n    jnc     .lCpyDecAVXLoop\n\n    add     Size1, 16\n    je      .Exit               ; AVX copied operand fully =>\n\n    ; copy remaining max. 15 limb\n    test    Size1, 8\n    je      .lCpyDecFour\n\n    vmovdqu QLimb0, [Op1-24]\n    vmovdqu QLimb1, [Op1-56]\n    vmovdqa [Op2-24], QLimb0\n    vmovdqa [Op2-56], QLimb1\n\n    sub     Op1, 64\n    sub     Op2, 64\n\n  .lCpyDecFour:\n\n    test    Size1, 4\n    je      .lCpyDecTwo\n\n    vmovdqu QLimb0, [Op1-24]\n    vmovdqa [Op2-24], QLimb0\n\n    sub     Op1, 32\n    sub     Op2, 32\n\n  .lCpyDecTwo:\n\n    test    Size1, 2\n    je      .lCpyDecOne\n\n%if 1\n    ; Avoid SSE2 instruction due to stall on Haswell\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    mov     Limb, [Op1-8]\n    mov     [Op2-8], Limb\n%else\n    movdqu  DLimb0, [Op1-8]\n    movdqa  [Op2-8], DLimb0\n%endif\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lCpyDecOne:\n\n    test    Size1, 1\n    je      .Exit\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/haswell/copyi.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mpn_copyi(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1)\n; Linux     RDI         RSI            RDX\n; Win7      RCX         RDX            R8\n;\n; Description:\n; The function copies a given number of limb from source to destination (while\n; moving low to high in memory) and hands back the size (in limb) of the\n; destination.\n;\n; Result:\n; - Op2[ 0..size-1 ] = Op1[ 0..size-1 ]\n; - number of copied limb: range [ 0..max tCounter ]\n;\n; Caveats:\n; - if size 0 is given the content of the destination will remain untouched!\n; - if Op1=Op2 no copy is done!\n;\n; Comments:\n; - AVX-based version implemented, tested & benched on 05.01.2016 by jn\n; - did some experiments with AVX based version with following results\n;   - AVX can be faster in L1$ (30%), L2$ (10%) if dest. is aligned on 32 byte\n;   - AVX is generally faster on small sized operands (<=100 limb) due too\n;     start-up overhead of \"rep movsq\" - however this could also be achieved by\n;     simple copy loop\n;   - the break-even between AVX and \"rep movsq\" is around 10,000 limb\n; - the prologue & epilogue can still be optimized!\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2     RCX\n    %define Op1     RDX\n    %define Size1   R8\n    %define Limb    R9\n    %define Offs    R10\n%else\n    %define Op2     RDI\n    %define Op1     RSI\n    %define Size1   RDX\n    %define Limb    RCX\n    %define Offs    R10\n%endif\n\n%define DLimb0  XMM0\n%define QLimb0  YMM0\n%define QLimb1  YMM1\n%define QLimb2  YMM2\n%define QLimb3  YMM3\n\n    align   32\n\nLEAF_PROC   mpn_copyi\n    mov     RAX, Size1\n    cmp     Op1, Op2\n    je      .Exit               ; no copy required =>\n\n    or      RAX, RAX\n    je      .Exit               ; size=0 =>\n\n    ; align the destination (Op2) to 32 byte\n    test    Op2, 8\n    je      .lCpyIncA32\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    add     Op1, 8\n    add     Op2, 8\n\n  .lCpyIncA32:\n\n    test    Op2, 16\n    je      .lCpyIncAVX\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    mov     Limb, [Op1+8]\n    mov     [Op2+8], Limb\n    dec     Size1\n    je      .Exit\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lCpyIncAVX:\n\n    mov     Offs, 128\n    jmp     .lCpyIncAVXCheck\n\n    ; main loop (prefetching disabled; unloaded cache)\n    ; - lCpyInc is slightly slower than lCpyDec through all cache levels?!\n    ; - 0.30      cycles / limb in L1$\n    ; - 0.60      cycles / limb in L2$\n    ; - 0.70-0.90 cycles / limb in L3$\n    align   16\n  .lCpyIncAVXLoop:\n\n    vmovdqu QLimb0, [Op1]\n    vmovdqu QLimb1, [Op1+32]\n    vmovdqu QLimb2, [Op1+64]\n    vmovdqu QLimb3, [Op1+96]\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n    vmovdqa [Op2+64], QLimb2\n    vmovdqa [Op2+96], QLimb3\n\n    add     Op1, Offs\n    add     Op2, Offs\n\n  .lCpyIncAVXCheck:\n\n    sub     Size1, 16\n    jnc     .lCpyIncAVXLoop\n\n    add     Size1, 16\n    je      .Exit               ; AVX copied operand fully =>\n\n    ; copy remaining max. 15 limb\n    test    Size1, 8\n    je      .lCpyIncFour\n\n    vmovdqu QLimb0, [Op1]\n    vmovdqu QLimb1, [Op1+32]\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .lCpyIncFour:\n\n    test    Size1, 4\n    je      .lCpyIncTwo\n\n    vmovdqu QLimb0, [Op1]\n    vmovdqa [Op2], QLimb0\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .lCpyIncTwo:\n\n    test    Size1, 2\n    je      .lCpyIncOne\n\n%if 1\n    ; Avoid SSE2 instruction due to stall on Haswell\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    mov     Limb, [Op1+8]\n    mov     [Op2+8], Limb\n%else\n    movdqu  DLimb0, [Op1]\n    movdqa  [Op2], DLimb0\n%endif\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lCpyIncOne:\n\n    test    Size1, 1\n    je      .Exit\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/haswell/divexact_byff.asm",
    "content": "; PROLOGUE(mpn_divexact_byff)\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divexact_byff(mp_ptr, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi         rdx\n;  rax                           rcx     rdx          r8 \n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_divexact_byff\n\tmov     rax, 3\n\tand     rax, r8\n\tmov     [rsp+24], rax\n\txor     eax, eax\n\tshr     r8, 2\n\tcmp     r8, 0\n\tje      .2\n; want carry clear here\n\txalign  16\n.1:\tsbb     rax, [rdx]\n\tlea     rcx, [rcx+32]\n\tmov     r9, rax\n\tsbb     rax, [rdx+8]\n\tmov     r10, rax\n\tsbb     rax, [rdx+16]\n\tmov     r11, rax\n\tsbb     rax, [rdx+24]\n\tdec     r8\n\tmov     [rcx-32], r9\n\tmov     [rcx-24], r10\n\tmov     [rcx-16], r11\n\tmov     [rcx-8], rax\n\tlea     rdx, [rdx+32]\n\tjnz     .1\n.2:\tmov     r8, [rsp+24]\n; dont want to change the carry\n\tinc     r8\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx]\n\tmov     [rcx], rax\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx+8]\n\tmov     [rcx+8], rax\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx+16]\n\tmov     [rcx+16], rax\n.3:\tsbb     rax, 0\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/haswell/divrem_hensel_qr_1_1.asm",
    "content": "; PROLOGUE(mpn_divrem_hensel_qr_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_divrem_hensel_qr_1_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                                    rdi     rsi        rdx        rcx\n;  rax                                    rcx     rdx         r8         r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi\n\n    FRAME_PROC mpn_divrem_hensel_qr_1_1, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n\tmov     rcx, r9\n\tmov     r9, 0\n\tsub     r9, rax\n\t\n\tmov     rdx, rcx\n\t\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\txor     rdx, rdx\n\n\txalign  16\n.1:\tmov     rax, [rsi+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmov     [rdi+r9*8], rax\n\tmul     rcx\n\tadd     r8, 1\n\tinc     r9\n\tjnz     .1\n\tmov     rax, 0\n\tadc     rax, rdx\n\tEND_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/haswell/hamdist.asm",
    "content": "; PROLOGUE(mpn_hamdist)\n\n;  AMD64 mpn_hamdist\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmp_limb_t mpn_hamdist(mp_ptr, mp_ptr, mp_size_t)\n;\trax                      rdi,    rsi,       rdx\n;\trax                      rcx,    rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_hamdist\n\tlea     rdx, [rdx+r8*8-8]\n\tlea     r9, [rcx+r8*8-8]\n\tmov     rcx, 1\n\txor     eax, eax\n\tsub     rcx, r8\n\tjnc     .1\n\txalign  16\n.0:\tmov     r10, [r9+rcx*8]\n\txor     r10, [rdx+rcx*8]\n\tpopcnt  r10, r10\n\tmov     r11, [r9+rcx*8+8]\n\txor     r11, [rdx+rcx*8+8]\n\tpopcnt  r11, r11\n\tadd     rax, r10\n\tadd     rax, r11\n\tadd     rcx, 2\n\tjnc     .0\n.1: jne     .2\n\tmov     r10, [r9+rcx*8]\n\txor     r10, [rdx+rcx*8]\n\tpopcnt  r10, r10\n\tadd     rax, r10\n.2:\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/haswell/ior_n.asm",
    "content": "; PROLOGUE(mpn_ior_n)\n\n;  mpn_ior_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_ior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rcx        rdx         r8         r9\n;                    rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_ior_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpor     xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tor      rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tor      rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/haswell/iorn_n.asm",
    "content": "; PROLOGUE(mpn_iorn_n)\n\n;  mpn_iorn_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_iorn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_iorn_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [rdx+r10*8]\n\tmovdqu  xmm1, [rdx+r10*8+16]\n\tmovdqu  xmm2, [r8+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [r8+r10*8+16-32]\n\tpandn   xmm1, xmm3\n\tpandn   xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [rdx+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [r8+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\tnot     rax\n\tor      rax, r9\n\tpandn   xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [rdx+r10*8]\n\tmovdqu  xmm2, [r8+r10*8]\n\tpandn   xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tnot     rax\n\tor      rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/haswell/karaadd.asm",
    "content": "; PROLOGUE(mpn_karaadd)\n;  mpn_karaadd  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karaadd(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n    BITS 64\n    TEXT\n\n;   requires n >= 8  \n        FRAME_PROC mpn_karaadd, 1, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n;rp is rdi  \n;tp is rsi  \n;n is rdx and put it on the stack  \n        shr     rdx, 1\n;n2 is rdx  \n        lea     rcx, [rdx+rdx*1]\n; 2*n2 is rcx  \n; L is rdi  \n; H is rbp  \n; tp is rsi  \n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n; rax rbx are the carrys  \n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n        align   16\nlp:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        adc     rbx, rbx\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        adc     rax, rax\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        adc     rbx, rbx\n        bt      rax, 1\n        adc     r8, [rsi+rcx*8]\n        adc     r9, [rsi+rcx*8+8]\n        adc     r10, [rsi+rcx*8+16]\n        adc     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        adc     rax, rax\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        adc     r15, [rsi+rdx*8+24]\n        adc     rbx, rbx\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     lp\n        cmp     rcx, 2\n        jg      case0\n        jz      case1\n        jp      case2\ncase3:  \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        adc     rbx, rbx\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        adc     rax, rax\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     rbx, rbx\n        bt      rax, 1\n        adc     r8, [rsi]\n        adc     r9, [rsi+8]\n        adc     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        adc     rax, rax\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        adc     rbx, rbx\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     fin\ncase2:  \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        adc     rbx, rbx\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        adc     rax, rax\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     rbx, rbx\n        bt      rax, 1\n        adc     r8, [rsi+8]\n        adc     r9, [rsi+16]\n        adc     rax, rax\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     rbx, rbx\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     fin\ncase1:  \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        adc     rbx, rbx\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        adc     rax, rax\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     rbx, rbx\n        bt      rax, 1\n        adc     r8, [rsi+16]\n        adc     rax, rax\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        adc     r12, [rsi+rdx*8]\n        adc     rbx, rbx\n        inc     rdx\n        mov     [rbp+rcx*8], r12\nfin:    mov     rcx, 3\ncase0:  \n        mov     r8, [rsp]\n        bt      r8, 0\n        jnc     notodd\n        xor     r10, r10\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        add     r8, [rsi+rdx*8]\n        adc     r9, [rsi+rdx*8+8]\n        adc     r10, r10\n        add     [rbp+24], r8\n        adc     [rbp+32], r9\n        adc     [rbp+40], r10\nl7:     adc     qword[rbp+rcx*8+24], 0\n        inc     rcx\n        jc      l7\n        mov     rcx, 3\nnotodd: and     rax, 3\n        popcnt  r8, rax\n        bt      rbx, 2\n        adc     r8, 0\n        adc     [rdi+rdx*8], r8\nl1:     adc     qword[rdi+rdx*8+8], 0\n        inc     rdx\n        jc      l1\n        and     rbx, 7\n        popcnt  r8, rbx\n        add     [rbp+24], r8\nl2:     adc     qword[rbp+rcx*8+8], 0\n        inc     rcx\n        jc      l2\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/haswell/karasub.asm",
    "content": "; PROLOGUE(mpn_karasub)\n;  mpn_karasub  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Copyright 2012 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karasub(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n;\n;  Karasuba Multiplication - split x and y into two equal length halves so\n;  that x = xh.B + xl and y = yh.B + yl. Then their product is:\n;\n;  x.y = xh.yh.B^2 + (xh.yl + xl.yh).B + xl.yl\n;      = xh.yh.B^2 + (xh.yh + xl.yl - {xh - xl}.{yh - yl}).B + xl.yl\n;\n; If the length of the elements is m (about n / 2), the output length is 4 * m \n; as illustrated below.  The middle two blocks involve three additions and one \n; subtraction: \n; \n;       -------------------- rp\n;       |                  |-->\n;       |   A:xl.yl[lo]    |   |\n;       |                  |   |      (xh - xl).(yh - yl)\n;       --------------------   |      -------------------- tp\n;  <--  |                  |<--<  <-- |                  |\n; |     |   B:xl.yl[hi]    |   |      |     E:[lo]       |\n; |     |                  |   |      |                  |\n; |     --------------------   |      --------------------\n; >-->  |                  |-->   <-- |                  |\n; |\\___ |   C:xh.yh[lo]    | ____/    |     F:[hi]       |\n; |     |                  |          |                  |\n; |     --------------------          --------------------\n;  <--  |                  |   \n;       |   D:xh.yh[hi]    |\n;       |                  |\n;       --------------------\n;\n; To avoid overwriting B before it is used, we need to do two operations\n; in parallel:\n;\n; (1)   B = B + C + A - E = (B + C) + A - E\n; (2)   C = C + B + D - F = (B + C) + D - F\n;\n; The final carry from (1) has to be propagated into C and D, and the final\n; carry from (2) has to be propagated into D. When the number of input limbs\n; is odd, some extra operations have to be undertaken. \n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n%macro add_one 1\n    inc     %1\n%endmacro\n\n    BITS 64\n    TEXT\n        \n;       requires n >= 8  \n        FRAME_PROC mpn_karasub, 2, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n        mov     [rsp+8], rdi\n\n;       rp is rdi, tp is rsi, L is rdi, H is rbp, tp is rsi\n;       carries/borrows in rax, rbx\n   \n        shr     rdx, 1\n        lea     rcx, [rdx+rdx*1]\n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        adc     rbx, rbx\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        adc     rax, rax\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        adc     rbx, rbx\n        bt      rax, 1\n        sbb     r8, [rsi+rcx*8]\n        sbb     r9, [rsi+rcx*8+8]\n        sbb     r10, [rsi+rcx*8+16]\n        sbb     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        adc     rax, rax\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        sbb     r15, [rsi+rdx*8+24]\n        adc     rbx, rbx\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .5\n        jz      .4\n        jp      .3\n\n.2:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        adc     rbx, rbx\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        adc     rax, rax\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     rbx, rbx\n        bt      rax, 1\n        sbb     r8, [rsi]\n        sbb     r9, [rsi+8]\n        sbb     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        adc     rax, rax\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        adc     rbx, rbx\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n\n.3:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        adc     rbx, rbx\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        adc     rax, rax\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     rbx, rbx\n        bt      rax, 1\n        sbb     r8, [rsi+8]\n        sbb     r9, [rsi+16]\n        adc     rax, rax\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        adc     rbx, rbx\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n\n.4:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        adc     rbx, rbx\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        adc     rax, rax\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     rbx, rbx\n        bt      rax, 1\n        sbb     r8, [rsi+16]\n        adc     rax, rax\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        sbb     r12, [rsi+rdx*8]\n        adc     rbx, rbx\n        add_one rdx\n        mov     [rbp+rcx*8], r12\n\n;       move low half rbx carry into rax\n.5:     rcr     rax, 3\n        bt      rbx, 2\n        rcl     rax, 3\n        mov     r8, [rsp]\n        mov     rcx, rsi\n        mov     rsi,[rsp+8]\n        lea     r9, [r8+r8]\n        lea     rsi, [rsi+r9*8]\n        lea     r11, [rbp+24]\n        sub     r11, rsi\n        sar     r11, 3\n        bt      r8, 0\n        jnc     .9\n\n;       if odd the do next two  \n        add     r11, 2\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        rcr     rbx, 2\n        adc     r8,0\n        adc     r9, 0\n        adc     rbx, rbx\n        sbb     r8, [rcx+rdx*8]\n        sbb     r9, [rcx+rdx*8+8]\n        rcr     rbx, 2\n        adc     [rbp+24], r8\n        adc     [rbp+32], r9\n        rcl     rbx, 3\n\n; Now add in any accummulated carries and/or borrows\n;\n; NOTE: We can't propagate individual borrows or carries from the second\n; and third quarter blocks into the fourth quater block by simply waiting\n; for carry (or borrow) propagation to end.  This is because a carry into\n; the fourth quarter block when it contains only maximum integers or a \n; borrow when it contains all zero integers will incorrectly propagate\n; beyond the end of the top quarter block.\n\n.9:     lea     rdx, [rdi+rdx*8]\n        sub     rdx, rsi\n        sar     rdx, 3\n\n; carries/borrrow from second to third quarter quarter block\n;   rax{2} is the carry in (B + C)\n;   rax{1} is the carry in (B + C) + A\n;   rax{0} is the borrow in (B + C + A) - E\n\n        mov     rcx, rdx\n        bt      rax, 0\n.10:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .11\n        jc      .10\n\n.11     mov     rcx, rdx\n        bt      rax, 1\n.12:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .13\n        jc      .12\n\n.13     mov     rcx, rdx\n        bt      rax, 2\n.14:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .15\n        jc      .14\n\n; carries/borrrow from third to fourth quarter quarter block\n;   rbx{2} is the carry in (B + C)\n;   rbx{1} is the carry in (B + C) + D\n;   rbx{0} is the borrow in (B + C + D) - F\n\n.15:    mov     rcx, r11\n        bt      rbx, 0\n.16:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .17\n        jc      .16\n\n.17:    mov     rcx, r11\n        bt      rbx, 1\n.18:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .19\n        jc      .18\n\n.19:    mov     rcx, r11\n        bt      rbx, 2\n.20:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .21\n        jc      .20\n.21:\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/haswell/lshift.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_lshift(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1, unsigned int Shift)\n; Linux     RAX        RDI         RSI            RDX              RCX\n; Win7      RAX        RCX         RDX            R8               R9\n;\n; Description:\n; The function shifts Op1 left by n bit, stores the result in Op2 (non-\n; destructive shl) and hands back the shifted-out most significant bits of Op1.\n; The function operates decreasing in memory supporting in-place operation.\n;\n; Result:\n; - Op2[ Size1-1..0 ] := ( Op1[ Size1-1..0 ]:ShlIn ) << 1\n; - Op1[ 0 ] >> 63\n;\n; Caveats:\n; - caller must ensure that Shift is in [ 1..63 ]!\n; - currently Linux64 support only!\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - implemented, tested and benched on 31.03.2016 by jn\n; - includes prefetching\n; ============================================================================\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2         R11\n    %define Op1         RDX\n    %define Size1       R8\n    %define Shift       RCX\n    %define Limb1       R9\n    %define Limb2       R10\n  %ifdef USE_PREFETCH\n    %define Offs        -512    ; No caller-saves regs left, use immediate\n  %endif\n    %define reg_save_list XMM, 6, 7\n%else\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Shift       RCX\n    %define Limb1       R8\n    %define Limb2       R9\n  %ifdef USE_PREFETCH\n    %define OFFS_REG 1\n    %define Offs        R10\n  %endif\n%endif\n\n%define ShlDL0      XMM2    ; Attn: this must match ShlQL0 definition\n%define ShrDL0      XMM3    ; Attn: this must match ShrQL0 definition\n%define ShlDLCnt    XMM6    ; Attn: this must match ShlQlCnt definition\n%define ShrDLCnt    XMM7    ; Attn: this must match ShrQlCnt definition\n\n%define QLimb0      YMM0\n%define QLimb1      YMM1\n%define ShlQL0      YMM2\n%define ShrQL0      YMM3\n%define ShlQL1      YMM4\n%define ShrQL1      YMM5\n%define ShlQLCnt    YMM6\n%define ShrQLCnt    YMM7\n\n    align   32\nFRAME_PROC  mpn_lshift, 0, reg_save_list\n%ifdef USE_WIN64\n    mov     r11, rcx\n\tmov     rcx, r9\n%endif\n    xor     EAX, EAX\n    sub     Size1, 1\n    jc      .Exit               ; Size1=0 =>\n\n    lea     Op1, [Op1+8*Size1]\n    lea     Op2, [Op2+8*Size1]\n\n    mov     Limb1, [Op1]\n    shld    RAX, Limb1, CL\n\n    or      Size1, Size1\n    je      .lShlEquPost        ; Size1=1 =>\n\n  %ifdef USE_PREFETCH\n  %ifdef OFFS_REG\n    mov     Offs, -512\n  %endif\n  %endif\n\n    cmp     Size1, 8\n    jc      .lShlEquFour        ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    jne     .lShlEquA16\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op1, 8\n    sub     Op2, 8\n    sub     Size1, 1\n\n  .lShlEquA16:\n\n    test    Op2, 16\n    jne     .lShlEquAVX\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-16]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n    sub     Size1, 2\n\n  .lShlEquAVX:\n\n    ; initialize AVX shift counter\n    vmovq   ShlDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vmovq   ShrDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vpbroadcastq ShlQLCnt, ShlDLCnt\n    vpbroadcastq ShrQLCnt, ShrDLCnt\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1-24]\n    vpsrlvq ShrQL0, QLimb0, ShrQLCnt\n    vpermq  ShrQL0, ShrQL0, 10010011b\n\n    sub     Op1, 32\n    sub     Size1, 4\n    jmp     .lShlEquAVXCheck\n\n    ; main loop (prefetching enabled; unloaded cache)\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.70 cycles per limb in LD2$\n    ; - 0.70-0.90 cycles per limb in LD3$\n    align   16\n  .lShlEquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1-24]\n    vpsllvq   ShlQL0, QLimb0, ShlQLCnt\n    vmovdqu   QLimb0, [Op1-56]\n    vpsrlvq   ShrQL1, QLimb1, ShrQLCnt\n    vpermq    ShrQL1, ShrQL1, 10010011b\n    vpblendd  ShrQL0, ShrQL0, ShrQL1, 00000011b\n    vpor      ShlQL0, ShlQL0, ShrQL0\n    vpsllvq   ShlQL1, QLimb1, ShlQLCnt\n    vpsrlvq   ShrQL0, QLimb0, ShrQLCnt\n    vpermq    ShrQL0, ShrQL0, 10010011b\n    vpblendd  ShrQL1, ShrQL1, ShrQL0, 00000011b\n    vmovdqa   [Op2-24], ShlQL0\n    vpor      ShlQL1, ShlQL1, ShrQL1\n    vmovdqa   [Op2-56], ShlQL1\n\n    sub     Op1, 64\n    sub     Op2, 64\n\n  .lShlEquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShlEquAVXLoop\n\n    mov     Limb1, [Op1]\n    xor     Limb2, Limb2\n    shld    Limb2, Limb1, CL\n%if 1\n    vmovq   ShlDL0, Limb2\n    vpblendd ShrQL0, ShrQL0, ShlQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n    pinsrq  ShrDL0, Limb2, 0        ; SSE4.1\n%endif\n    vpsllvq ShlQL0, QLimb0, ShlQLCnt\n    vpor    ShlQL0, ShlQL0, ShrQL0\n    vmovdqa [Op2-24], ShlQL0\n\n    sub     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHLD mnemonic\n  .lShlEquFour:\n\n    sub     Op1, 8\n    test    Size1, 4\n    je      .lShlEquTwo\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-8], Limb2\n    mov     Limb2, [Op1-16]\n    shld    Limb1, Limb2, CL\n    mov     [Op2-16], Limb1\n    mov     Limb1, [Op1-24]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-24], Limb2\n\n    sub     Op1, 32\n    sub     Op2, 32\n\n  .lShlEquTwo:\n\n    test    Size1, 2\n    je      .lShlEquOne\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lShlEquOne:\n\n    test    Size1, 1\n    je      .lShlEquPost\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op2, 8\n\n  .lShlEquPost:\n\n    shl    Limb1, CL\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    vzeroupper\nEND_PROC reg_save_list\n.end:"
  },
  {
    "path": "mpn/x86_64w/haswell/lshift1.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_lshift1(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1 )\n; Linux     RAX         RDI         RSI            RDX\n; Win7      RAX         RCX         RDX            R8\n;\n; Description:\n; The function shifts Op1 left by one bit, stores the result in Op2 (non-\n; destructive shl) and hands back the shifted-out most significant bit of Op1.\n; The function operates decreasing in memory supporting in-place operation.\n;\n; Caveats:\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - implemented, tested and benched on 21.02.2016 by jn\n; - includes cache prefetching\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n\n    %define Op2         RCX\n    %define Op1         RDX\n    %define Size1       R8\n    %define Limb1       R9\n    %define Limb2       R10\n    %define Offs        -512    ; used direct def. to stay in Win scratch regs\n\n    %define ShlDL0      XMM2    ; ATTN: this must match ShlQL0 definition\n    %define ShrDL0      XMM3    ; ATTN: this must match ShrQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShlQL0      YMM2\n    %define ShrQL0      YMM3\n    %define ShlQL1      YMM4\n    %define ShrQL1      YMM5\n\n%else\n\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Limb1       R8\n    %define Limb2       R9\n    %define Offs        -512    ; used direct def. to stay in Win scratch regs\n\n    %define ShlDL0      XMM2    ; ATTN: this must match ShlQL0 definition\n    %define ShrDL0      XMM3    ; ATTN: this must match ShrQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShlQL0      YMM2\n    %define ShrQL0      YMM3\n    %define ShlQL1      YMM4\n    %define ShrQL1      YMM5\n\n%endif\n\n    align   32\n\nLEAF_PROC mpn_lshift1\n\n    xor     EAX, EAX\n    sub      Size1, 1\n    jc      .Exit               ;ajs:notshortform ; Size1=0 =>\n\n    lea     Op1, [Op1+8*Size1]\n    lea     Op2, [Op2+8*Size1]\n\n    mov     Limb1, [Op1]\n    shld    RAX, Limb1, 1\n\n    or      Size1, Size1\n    je      .lShl1EquPost       ;ajs:notshortform ; Size1=1 =>\n\n    cmp     Size1, 8\n    jc      .lShl1EquFour       ;ajs:notshortform ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    jne     .lShl1EquA16\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op1, 8\n    sub     Op2, 8\n    sub     Size1, 1\n\n  .lShl1EquA16:\n\n    test    Op2, 16\n    jne     .lShl1EquAVX\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-16]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n    sub     Size1, 2\n\n  .lShl1EquAVX:\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1-24]\n    vpsrlq  ShrQL0, QLimb0, 63\n    vpermq  ShrQL0, ShrQL0, 147\t\t; 0b10010011\n\n    sub     Op1, 32\n    sub     Size1, 4\n    jmp     .lShl1EquAVXCheck\n\n    ; main loop requires on entry:\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.75 cycles per limb in LD2$\n    ; - 0.75-1.00 cycles per limb in LD3$\n    align   16\n  .lShl1EquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1-24]\n    vpsllq    ShlQL0, QLimb0, 1\n    vmovdqu   QLimb0, [Op1-56]\n    vpsrlq    ShrQL1, QLimb1, 63\n    vpermq    ShrQL1, ShrQL1, 147\t; 0b10010011\n    vpblendd  ShrQL0, ShrQL0, ShrQL1, 3\t; 0b00000011\n    vpor      ShlQL0, ShlQL0, ShrQL0\n    vpsllq    ShlQL1, QLimb1, 1\n    vpsrlq    ShrQL0, QLimb0, 63\n    vpermq    ShrQL0, ShrQL0, 147\t; 0b10010011\n    vpblendd  ShrQL1, ShrQL1, ShrQL0, 3\t; 0b00000011\n    vmovdqa   [Op2-24], ShlQL0\n    vpor      ShlQL1, ShlQL1, ShrQL1\n    vmovdqa   [Op2-56], ShlQL1\n\n    sub     Op1, 64\n    sub     Op2, 64\n\n  .lShl1EquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShl1EquAVXLoop\n\n    mov     Limb2, [Op1]\n    mov     Limb1, Limb2\n    shr     Limb2, 63\n%if 1\n    vmovq ShlDL0, Limb2\n    vpblendd ShrQL0, ShrQL0, ShlQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n\n    ; Insert value of Limb2 into the 0-th qword of ShrDL0\n    pinsrq  ShrDL0, Limb2, 0        ; SSE4.1\n%endif\n    vpsllq  ShlQL0, QLimb0, 1\n    vpor    ShlQL0, ShlQL0, ShrQL0\n    vmovdqa [Op2-24], ShlQL0\n\n    sub     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHLD mnemonic\n  .lShl1EquFour:\n\n    sub     Op1, 8\n    test    Size1, 4\n    je      .lShl1EquTwo\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-8], Limb2\n    mov     Limb2, [Op1-16]\n    shld    Limb1, Limb2, 1\n    mov     [Op2-16], Limb1\n    mov     Limb1, [Op1-24]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-24], Limb2\n\n    sub     Op1, 32\n    sub     Op2, 32\n\n  .lShl1EquTwo:\n\n    test    Size1, 2\n    je      .lShl1EquOne\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lShl1EquOne:\n\n    test    Size1, 1\n    je      .lShl1EquPost\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op2, 8\n\n  .lShl1EquPost:\n\n    shl     Limb1, 1\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/haswell/lshiftc.asm",
    "content": "; PROLOGUE(mpn_lshiftc)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void lshiftc(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;                  rdi     rsi        rdx      rcx\n;                  rcx     rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_lshiftc\n\tmov     r9d, r9d\n\tmovq    mm0, r9\n\tmov     rax, 64\n\tsub     rax, r9\n\tpcmpeqb mm6, mm6\n\tmovq    mm1, rax\n\tlea     rdx, [rdx+8]\n\tlea     rcx, [rcx+8]\n\tsub     r8, 5\n\tmovq    mm5, [rdx+r8*8+24]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tmovq    rax, mm5\n\tpsllq   mm3, mm0\n\tjc      .2\n\t\n\txalign  16\n.1: movq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    mm2, [rdx+r8*8]\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tmovq    mm4, mm2\n\tmovq    mm5, [rdx+r8*8-8]\n\tsub     r8, 4\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+40], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+32], mm4\n\tpsllq   mm3, mm0\n\tjnc     .1\n.2: cmp     r8, -2\n\tjz      .4\n\tjp      .5\n\tjs      .6\n.3:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    mm2, [rdx+r8*8]\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+8], mm3\n\tpsllq   mm4, mm0\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8], mm4\n\temms\n\tret\n\n\txalign  16\n.4:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+8], mm3\n\temms\n\tret\n\n\txalign  16\n.5:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\temms\n\tret\n\n\txalign  16\n.6:\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\temms\n\tret\n\t\n"
  },
  {
    "path": "mpn/x86_64w/haswell/mod_1_1.asm",
    "content": "; PROLOGUE(mpn_mod_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_1(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r13\n\n    FRAME_PROC mpn_mod_1_1, 0, reg_save_list\n    mov     rsi, rdx\n    mov     rdx, r8\n    \n\tmov     r13, [rsi+rdx*8-8]\n\tmov     rax, [rsi+rdx*8-16]\n\tmov     r8, [r9]\n\tmov     r9, [r9+8]\n\tmov     rdi, rdx\n\tsub     rdi, 2\n\t\n\txalign  16\n.1:\tmov     r10, [rsi+rdi*8-8]\n\tmul     r8\n\tadd     r10, rax\n\tmov     r11, 0\n\tadc     r11, rdx\n\tmov     rax, r13\n\tmul     r9\n\tadd     rax, r10\n\tmov     r13, r11\n\tadc     r13, rdx\n\tdec     rdi\n\tjnz     .1\n\n\tmov     [rcx], rax\n\tmov     rax, r8\n\tmul     r13\n\tadd     [rcx], rax\n\tadc     rdx, 0\n\tmov     [rcx+8], rdx\n\tEND_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/haswell/mod_1_2.asm",
    "content": "; PROLOGUE(mpn_mod_1_2)\n\n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n        FRAME_PROC mpn_mod_1_2, 0, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n\n        mov     r14, [rsi+rdx*8-8]\n        mov     r13, [rsi+rdx*8-16]\n        mov     r8, [r9]\n        mov     r10, [r9+16]\n        mov     r9, [r9+8]\n        mov     rcx, rdx\n        mov     rax, [rsi+rdx*8-24]\n        mul     r8\n        mov     r11, [rsi+rcx*8-32]\n        xor     r12, r12\n        sub     rcx, 6\n        jc      .2\n    \n        align   16\n.1:     add     r11, rax\n        adc     r12, rdx\n        mov     rax, r9\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r10\n        mul     r14\n        add     r13, rax\n        mov     rax, [rsi+rcx*8+8]\n        mov     r14, r12\n        adc     r14, rdx\n        mul     r8\n        mov     r12d, 0\n        mov     r11, [rsi+rcx*8+0]\n        sub     rcx, 2\n        jnc     .1\n.2:     add     r11, rax\n        adc     r12, rdx\n        mov     rax, r9\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r10\n        mul     r14\n        add     r13, rax\n        mov     r14, r12\n        adc     r14, rdx\n        cmp     rcx, -2\n        je      .4\n.3:     mov     r11, [rsi+rcx*8+8]\n        xor     r12, r12\n        mov     rax, r8\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r9\n        mul     r14\n        add     r13, rax\n        mov     r14, r12\n        adc     r14, rdx\n.4:     mov     rax, r8\n        mul     r14\n        add     r13, rax\n        adc     rdx, 0\n        mov     [rdi], r13\n        mov     [rdi+8], rdx\n    \tEND_PROC reg_save_list\n\t\n\t    end\n"
  },
  {
    "path": "mpn/x86_64w/haswell/mod_1_3.asm",
    "content": "; PROLOGUE(mpn_mod_1_3)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_3(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n;\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\n;\twhere (rcx,4)  contains B^i % divisor\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14, r15\n\n    FRAME_PROC mpn_mod_1_3, 0, reg_save_list\n    mov     rsi, rdx\n    mov     rdi, r8\n\tmov     r15, [rsi+rdi*8-8]\n\tmov     r14, [rsi+rdi*8-16]\n\tmov     rax, [rsi+rdi*8-32]\n\tmov     r12, [rsi+rdi*8-40]\n\tmov     r8, [r9]\n\tmov     r10, [r9+16]\n\tmov     r11, [r9+24]\n\tmov     r9, [r9+8]\n\tsub     rdi, 8\n\tjc      .2\n\t\n; // r15 r14 -8() -16()=rax -24()=r12\n\txalign  16\n.1:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+0]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+8]\n\tadc     r15, rdx\n\tsub     rdi, 3\n\tjnc     .1\n\n; // we have loaded up the next two limbs\n; // but because they are out of order we can have to do 3 limbs min\n.2:\tcmp     rdi, -2\n\tjl      .5\n\tje      .4\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n.3:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+8]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+16]\n\tadc     r15, rdx\n\t; // r15 r14 rax r12\n\tmov     r13, 0\n\tmul     r8\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n\txalign  16\n.4:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+16]\n\tmov     r15, r13\n\tadc     r15, rdx\n\t; // r15 r14 r12\n\tmov     r13, 0\n\tmov     rax, r8\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\t\n\t; // one more is 3 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12 \n\txalign  16\n.5:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r11\n\tmul     r15\n\tadd     r12, rax\n\tmov     r15, r13\n\tadc     r15, rdx\n\tmov     rax, r8\n\tmul     r15\n.6:\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rcx], r12\n\tmov     [rcx+8], rdx\n    END_PROC reg_save_list\n    \n    end\n    \n"
  },
  {
    "path": "mpn/x86_64w/haswell/mul_1.asm",
    "content": "; PROLOGUE(mpn_mul_1)\n\n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2011 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_mul_1c(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx         r8\n;  rax                     rcx     rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12\n\n        LEAF_PROC mpn_mul_1\n        mov     rax, [rdx]\n        cmp     r8, 1\n        jne     .1\n        mul     r9\n        mov     [rcx], rax\n        mov     rax, rdx\n        ret\n\n.1:     FRAME_PROC ?mpn_sandybridge_mul, 0, reg_save_list\n        mov     r11, 5\n        lea     rsi, [rdx+r8*8-40]\n        lea     rdi, [rcx+r8*8-40]\n        mov     rcx, r9\n        sub     r11, r8\n        mul     rcx\n        db      0x26\n        mov     r8, rax\n        db      0x26\n        mov     rax, [rsi+r11*8+8]\n        db      0x26\n        mov     r9, rdx\n        db      0x26\n        cmp     r11, 0\n        db      0x26\n        mov     [rsp-8], r12\n        db      0x26\n        jge     .2\n.1:     xor     r10, r10\n        mul     rcx\n        mov     [rdi+r11*8], r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+r11*8+16]\n        mul     rcx\n        mov     [rdi+r11*8+8], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+r11*8+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        mov     [rdi+r11*8+16], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+r11*8+32]\n        mul     rcx\n        mov     [rdi+r11*8+24], r12\n        add     r8, rax\n        adc     r9, rdx\n        add     r11, 4\n        mov     rax, [rsi+r11*8+8]\n        jnc     .1\n.2:     xor     r10, r10\n        mul     rcx\n        mov     [rdi+r11*8], r8\n        add     r9, rax\n        adc     r10, rdx\n        cmp     r11, 2\n        ja      .5\n        jz      .4\n        jp      .3\n        mov     rax, [rsi+16]\n        mul     rcx\n        mov     [rdi+8], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        mov     [rdi+16], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+32]\n        mul     rcx\n        mov     [rdi+24], r12\n        add     r8, rax\n        adc     r9, rdx\n        mov     [rdi+32], r8\n        mov     rax, r9\n        EXIT_PROC   reg_save_list\n\n.3:     mov     rax, [rsi+24]\n        mul     rcx\n        mov     [rdi+16], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+32]\n        xor     r8, r8\n        mul     rcx\n        mov     [rdi+24], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     [rdi+32], r12\n        mov     rax, r8\n        EXIT_PROC   reg_save_list\n\n        align   16\n.4:     mov     rax, [rsi+32]\n        mul     rcx\n        mov     [rdi+24], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     [rdi+32], r10\n        mov     rax, r12\n        EXIT_PROC   reg_save_list\n\n.5:     mov     [rdi+32], r9\n        mov     rax, r10\n        END_PROC   reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/haswell/mul_2.asm",
    "content": "; PROLOGUE(mpn_mul_2)\n;  X86_64 mpn_mul_2\n;\n;  Copyright 2010 Jason Moxham\n;\n;  Windows Conversion Copyright 2010 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx\n;  rax                     rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi, rbx\n\n        CPU  nehalem\n        BITS 64\n\n    \tFRAME_PROC mpn_mul_2, 0, reg_save_list\n        mov     rbx, 3\n        lea     rdi, [rcx+r8*8-24]\n        lea     rsi, [rdx+r8*8-24]\n        sub     rbx, r8\n        mov     r8, [r9+8]\n        mov     rcx, [r9]\n\n        mov     r11, 0\n        mov     r9, 0\n        mov     rax, [rsi+rbx*8]\n        mov     r10, 0\n        mul     rcx\n        add     r11, rax\n        mov     rax, [rsi+rbx*8]\n        mov     [rdi+rbx*8], r11\n        adc     r9, rdx\n        cmp     rbx, 0\n        jge     .2\n\n        align   16\n.1:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+rbx*8+8]\n        mov     r11, 0\n        mul     rcx\n        add     r9, rax\n        adc     r10, rdx\n        adc     r11, 0\n        mov     rax, [rsi+rbx*8+8]\n        mul     r8\n        add     r10, rax\n        mov     rax, [rsi+rbx*8+16]\n        adc     r11, rdx\n        mul     rcx\n        add     r10, rax\n        mov     [rdi+rbx*8+8], r9\n        adc     r11, rdx\n        mov     r9, 0\n        mov     rax, [rsi+rbx*8+16]\n        adc     r9, 0\n        mul     r8\n        add     r11, rax\n        mov     [rdi+rbx*8+16], r10\n        mov     rax, [rsi+rbx*8+24]\n        mov     r10, 0\n        adc     r9, rdx\n        mul     rcx\n        add     r11, rax\n        mov     rax, [rsi+rbx*8+24]\n        mov     [rdi+rbx*8+24], r11\n        adc     r9, rdx\n        adc     r10, 0\n        add     rbx, 3\n        jnc     .1\n.2:     cmp     rbx, 1\n        ja      .5\n        je      .4\n.3:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+rbx*8+8]\n        mov     r11, 0\n        mul     rcx\n        add     r9, rax\n        adc     r10, rdx\n        adc     r11, 0\n        mov     rax, [rsi+rbx*8+8]\n        mul     r8\n        add     r10, rax\n        mov     rax, [rsi+rbx*8+16]\n        adc     r11, rdx\n        mul     rcx\n        add     r10, rax\n        mov     [rdi+rbx*8+8], r9\n        adc     r11, rdx\n        mov     r9, 0\n        mov     rax, [rsi+rbx*8+16]\n        adc     r9, 0\n        mul     r8\n        add     r11, rax\n        mov     [rdi+rbx*8+16], r10\n        adc     r9, rdx\n        mov     [rdi+rbx*8+24], r11\n        mov     rax, r9\n        EXIT_PROC reg_save_list\n\n.4:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+rbx*8+8]\n        mov     r11, 0\n        mul     rcx\n        add     r9, rax\n        adc     r10, rdx\n        adc     r11, 0\n        mov     rax, [rsi+rbx*8+8]\n        mul     r8\n        add     r10, rax\n        adc     r11, rdx\n        mov     [rdi+rbx*8+8], r9\n        mov     [rdi+rbx*8+16], r10\n        mov     rax, r11\n        EXIT_PROC reg_save_list\n\n.5:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     [rdi+rbx*8+8], r9\n        mov     rax, r10\n        END_PROC reg_save_list\n        \n        end    \n        \n"
  },
  {
    "path": "mpn/x86_64w/haswell/mul_basecase.asm",
    "content": ";  AMD64 mpn_mul_basecase optimised for Intel Haswell.\n\n;  Contributed to the GNU project by Torbjörn Granlund.\n\n;  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40]\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rbx, rbp, rsi, rdi, r12, r13, r14, r15 \n\n%define rp       rdi\n%define up       rsi\n%define un_param rdx\n%define vp       rcx\n\n%define un       rbx\n%define un8      bl\n\n%define w0       r10\n%define w1       r11\n%define w2       r12\n%define w3       r13\n%define n        rbp\n\nBITS    64\n\nalign 16\n\nFRAME_PROC mpn_mul_basecase, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n    mov     r8, [rsp+stack_use+40]\n\t\n    mov \tun, rdx\n\tneg \tun\n\tmov \tn, rdx\n\tsar \tn, 2\n\n\ttest \tr8b, 1\n\tjz \t.Ldo_mul_2\n\tmov \trdx, [vp]\n\n.Ldo_mul_1:\n\ttest \tun8, 1\n\tjnz \t.Lm1x1\n\n.Lm1x0:\n\ttest \tun8, 2\n\tjnz \t.Lm110\n\n.Lm100:\n\tmulx \tw2, r14, [up]\n\tmulx \tw3, w1, [up+8]\n\tlea \trp, [rp-24]\n\tjmp \t.Lm1l0\n\n.Lm110:\n\tmulx \tr9, w3, [up]\n\tmulx \tr14, w1, [up+8]\n\tlea \trp, [rp-8]\n\ttest \tn, n\n\tjz \t.Lcj2\n\tmulx \tw2, w0, [up+16]\n\tlea \tup, [up+16]\n\tjmp \t.Lm1l2\n\n.Lm1x1:\n\ttest \tun8, 2\n\tjz \t.Lm111\n\n.Lm101:\n\tmulx \tr14, r9, [up]\n\tlea \trp, [rp-16]\n\ttest \tn, n\n\tjz \t.Lcj1\n\tmulx \tw2, w0, [up+8]\n\tlea \tup, [up+8]\n\tjmp \t.Lm1l1\n\n.Lm111:\n\tmulx \tw3, w2, [up]\n\tmulx \tr9, w0, [up+8]\n\tmulx \tr14, w1, [up+16]\n\tlea \tup, [up+24]\n\ttest \tn, n\n\tjnz \t.Lgt3\n\tadd \tw3, w0\n\tjmp \t.Lcj3\n\n.Lgt3:\n\tadd \tw3, w0\n\tjmp \t.Lm1l3\n\n\n\talign 32\n.Lm1tp:\n\tlea \trp, [rp+32]\n.Lm1l3:\n\tmov \t[rp], w2\n\tmulx \tw2, w0, [up]\n.Lm1l2:\n\tmov \t[rp+8], w3\n\tadc \tr9, w1\n.Lm1l1:\n\tadc \tr14, w0\n\tmov \t[rp+16], r9\n\tmulx \tw3, w1, [up+8]\n.Lm1l0:\n\tmov \t[rp+24], r14\n\tmulx \tr9, w0, [up+16]\n\tadc \tw2, w1\n\tmulx \tr14, w1, [up+24]\n\tadc \tw3, w0\n\tlea \tup, [up+32]\n\tdec \tn\n\tjnz \t.Lm1tp\n\n.Lm1ed:\n\tlea \trp, [rp+32]\n.Lcj3:\n\tmov \t[rp], w2\n.Lcj2:\n\tmov \t[rp+8], w3\n\tadc \tr9, w1\n.Lcj1:\n\tmov \t[rp+16], r9\n\tadc \tr14, 0\n\tmov \t[rp+24], r14\n\n\tdec \tr8d\n\tjz \t.Lret5\n\n\tlea \tvp, [vp+8]\n\tlea \trp, [rp+32]\n\tjmp \t.Ldo_addmul\n\n.Ldo_mul_2:\n\tmov \tr9, [vp]\n\tmov \tr14, [vp+8]\n\tlea \tn, [un]\n\tsar \tn, 2\n\ttest \tun8, 1\n\tjnz \t.Lm2x1\n\n.Lm2x0:\n\txor \tw0, w0\n\ttest \tun8, 2\n\tmov \trdx, [up]\n\tmulx \tw1, w2, r9\n\tjz \t.Lm2l0\n\n.Lm210:\n\tlea \trp, [rp-16]\n\tlea \tup, [up-16]\n\tjmp \t.Lm2l2\n\n.Lm2x1:\n\txor \tw2, w2\n\ttest \tun8, 2\n\tmov \trdx, [up]\n\tmulx \tw3, w0, r9\n\tjz \t.Lm211\n\n.Lm201:\n\tlea \trp, [rp-24]\n\tlea \tup, [up+8]\n\tjmp \t.Lm2l1\n\n.Lm211:\n\tlea \trp, [rp-8]\n\tlea \tup, [up-8]\n\tjmp \t.Lm2l3\n\n\n\talign 16\n.Lm2tp:\n\tmulx \tw0, rax, r14\n\tadd \tw2, rax\n\tmov \trdx, [up]\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n\tadd \tw2, rax\n\tadc \tw1, 0\n\tadd \tw2, w3\n.Lm2l0:\n\tmov \t[rp], w2\n\tadc \tw1, 0\n\tmulx \tw2, rax, r14\n\tadd \tw0, rax\n\tmov \trdx, [up+8]\n\tadc \tw2, 0\n\tmulx \tw3, rax, r9\n\tadd \tw0, rax\n\tadc \tw3, 0\n\tadd \tw0, w1\n.Lm2l3:\n\tmov \t[rp+8], w0\n\tadc \tw3, 0\n\tmulx \tw0, rax, r14\n\tadd \tw2, rax\n\tmov \trdx, [up+16]\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n\tadd \tw2, rax\n\tadc \tw1, 0\n\tadd \tw2, w3\n.Lm2l2:\n\tmov \t[rp+16], w2\n\tadc \tw1, 0\n\tmulx \tw2, rax, r14\n\tadd \tw0, rax\n\tmov \trdx, [up+24]\n\tadc \tw2, 0\n\tmulx \tw3, rax, r9\n\tadd \tw0, rax\n\tadc \tw3, 0\n\tadd \tw0, w1\n\tlea \tup, [up+32]\n.Lm2l1:\n\tmov \t[rp+24], w0\n\tadc \tw3, 0\n\tinc \tn\n\tlea \trp, [rp+32]\n\tjnz \t.Lm2tp\n\n.Lm2ed:\n\tmulx \trax, rdx, r14\n\tadd \tw2, rdx\n\tadc \trax, 0\n\tadd \tw2, w3\n\tmov \t[rp], w2\n\tadc \trax, 0\n\tmov \t[rp+8], rax\n\tadd \tr8d, -2\n\tjz \t.Lret5\n\tlea \tvp, [vp+16]\n\tlea \trp, [rp+16]\n\n.Ldo_addmul:\n\tmov     [rsp+stack_use+8], r8\n\tlea \trp, [un*8+rp]\n\tlea \tup, [un*8+up]\n\n.Louter:\n\tmov \tr9, [vp]\n\tmov \tr8, [vp+8]\n\tlea \tn, [un+2]\n\tsar \tn, 2\n\tmov \trdx, [up]\n\ttest \tun8, 1\n\tjnz \t.Lbx1\n\n.Lbx0:\n\tmov \tr14, [rp]\n\tmov \tr15, [rp+8]\n\tmulx \tw1, rax, r9\n\tadd \tr14, rax\n\tmulx \tw2, rax, r8\n\tadc \tw1, 0\n\tmov \t[rp], r14\n\tadd \tr15, rax\n\tadc \tw2, 0\n\tmov \trdx, [up+8]\n\ttest \tun8, 2\n\tjnz \t.Lb10\n\n.Lb00:\n\tlea \tup, [up+16]\n\tlea \trp, [rp+16]\n\tjmp \t.Llo0\n\n.Lb10:\n\tmov \tr14, [rp+16]\n\tlea \tup, [up+32]\n\tmulx \tw3, rax, r9\n\tjmp \t.Llo2\n\n.Lbx1:\n\tmov \tr15, [rp]\n\tmov \tr14, [rp+8]\n\tmulx \tw3, rax, r9\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmulx \tw0, rax, r8\n\tadd \tr14, rax\n\tadc \tw0, 0\n\tmov \trdx, [up+8]\n\tmov \t[rp], r15\n\tmulx \tw1, rax, r9\n\ttest \tun8, 2\n\tjz \t.Lb11\n\n.Lb01:\n\tmov \tr15, [rp+16]\n\tlea \trp, [rp+24]\n\tlea \tup, [up+24]\n\tjmp \t.Llo1\n\n.Lb11:\n\tlea \trp, [rp+8]\n\tlea \tup, [up+8]\n\tjmp \t.Llo3\n\n\n\talign 16\n.Ltop:\n\tmulx \tw3, rax, r9\n\tadd \tr15, w0\n\tadc \tw2, 0\n.Llo2:\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmulx \tw0, rax, r8\n\tadd \tr14, rax\n\tadc \tw0, 0\n\tlea \trp, [rp+32]\n\tadd \tr15, w1\n\tmov \trdx, [up-16]\n\tmov \t[rp-24], r15\n\tadc \tw3, 0\n\tadd \tr14, w2\n\tmov \tr15, [rp-8]\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n.Llo1:\n\tadd \tr14, rax\n\tmulx \tw2, rax, r8\n\tadc \tw1, 0\n\tadd \tr14, w3\n\tmov \t[rp-16], r14\n\tadc \tw1, 0\n\tadd \tr15, rax\n\tadc \tw2, 0\n\tadd \tr15, w0\n\tmov \trdx, [up-8]\n\tadc \tw2, 0\n.Llo0:\n\tmulx \tw3, rax, r9\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmov \tr14, [rp]\n\tmulx \tw0, rax, r8\n\tadd \tr14, rax\n\tadc \tw0, 0\n\tadd \tr15, w1\n\tmov \t[rp-8], r15\n\tadc \tw3, 0\n\tmov \trdx, [up]\n\tadd \tr14, w2\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n.Llo3:\n\tadd \tr14, rax\n\tadc \tw1, 0\n\tmulx \tw2, rax, r8\n\tadd \tr14, w3\n\tmov \tr15, [rp+8]\n\tmov \t[rp], r14\n\tmov \tr14, [rp+16]\n\tadc \tw1, 0\n\tadd \tr15, rax\n\tadc \tw2, 0\n\tmov \trdx, [up+8]\n\tlea \tup, [up+32]\n\tinc \tn\n\tjnz \t.Ltop\n\n.Lend:\n\tmulx \tw3, rax, r9\n\tadd \tr15, w0\n\tadc \tw2, 0\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmulx \trax, rdx, r8\n\tadd \tr15, w1\n\tmov \t[rp+8], r15\n\tadc \tw3, 0\n\tadd \trdx, w2\n\tadc \trax, 0\n\tadd \trdx, w3\n\tmov \t[rp+16], rdx\n\tadc \trax, 0\n\tmov \t[rp+24], rax\n\n\tadd \tDWORD [rsp+stack_use+8], -2\n\tlea \tvp, [vp+16]\n\tlea \tup, [up+un*8-16]\n\tlea \trp, [rp+un*8+32]\n\tjnz \t.Louter\n\n\tmov \trax, [rsp+stack_use+8]\n.Lret5:\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/haswell/nand_n.asm",
    "content": "; PROLOGUE(mpn_nand_n)\n\n;  mpn_nand_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_nand_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_nand_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [rdx+r10*8+16-32]\n\tpand    xmm1, xmm3\n\tpand    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\tand     rax, r9\n\tpand    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tand     rax, r9\n\tnot     rax\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/haswell/nior_n.asm",
    "content": "; PROLOGUE(mpn_nior_n)\n\n;  mpn_nior_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_nior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_nior_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [rdx+r10*8+16-32]\n\tpor     xmm1, xmm3\n\tpor     xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\tor      rax, r9\n\tpor     xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tor      rax, r9\n\tnot     rax\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/haswell/nsumdiff_n.asm",
    "content": "; ============================================================================\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n; mp_limb_t mpn_nsumdiff_n(mp_ptr Op3, mp_ptr Op4, mp_srcptr Op1, mp_srcptr Op2, mp_size_t Size)\n; Linux     RAX           RDI         RSI         RDX            RCX            R8\n; Win7      RAX           RCX         RDX         R8             R9             Stack\n;\n; Description:\n; The function computes -(Op2+Op1) and stores the result in Op3 while at the\n; same time subtracting Op2 from Op1 with result in Op4. The final carries from\n; addition and subtraction are handed back as a combined mp_limb_t. There is a\n; gain in execution speed compared to separate addition and subtraction by\n; reducing memory access. The factor depends on the size of the operands (the\n; cache hierarchy in which the operands can be handled).\n;\n; Equivalent to, assuming no overlap:\n; cy1 = mpn_add_n(r1, s1, s2, n);\n; cy2 = mpn_neg_n(r1, r1, n); /* cy2 = [{r1,n} != 0] */\n; cy3 = mpn_sub_n(r2, s1, s2, n);\n; return 2*(cy1 + cy2) + cy3;\n; ============================================================================\n\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rsi, rdi, rbx, rbp, r12, r13, r14, r15\n\n%define Op3     RDI\n%define Op4     RSI\n%define Op1     RDX\n%define Op2     RCX\n%define Size    R8\n\n%define Limb0   RBP\n%define Limb1   RBX\n%define Limb2   R9\n%define Limb3   R10\n%define Limb4   R11\n%define Limb5   R12\n%define Limb6   R13\n%define Limb7   R14\n%define Limb8   R15\n\n%ifdef USE_PREFETCH\n%define Offs    PREFETCH_STRIDE ; no more regs avail. => fallback to const\n%endif\n\n%define SaveAC  setc    AL\n%define LoadAC  shr     AL, 1\n\n%define SaveSC  sbb     AH, AH\n%define LoadSC  add     AH, AH\n\n    BITS 64\n\n    align   32\n\n    FRAME_PROC mpn_nsumdiff_n, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n    mov     r8, [rsp+stack_use+40]\n\n    xor     EAX, EAX            ; clear add & sub carry\n\n; First we handle any words whose sum = 0\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    mov     Limb2, Limb1\n    add     Limb2, Limb5\n    SaveAC\n    neg\t    Limb2\n    jz      .zero_sum ; ajs:notshortform\n\n; Then we handle the first word whose sum !=0. The NOT of this sum needs to\n; be incremented, which produces no carry (NOT(x) + 1 = NEG(x))\n.not_zero:\n    mov     [Op3], Limb2\n    LoadSC\n    sbb     Limb1, Limb5\n    SaveSC\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    dec     Size\n    mov     [Op4], Limb1\n    add     Op4, 8\n\n    shr     Size, 1\n    jnc     .n_two\n\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    LoadAC\n    mov     Limb2, Limb1\n    adc     Limb2, Limb5\n    SaveAC\n    LoadSC\n    sbb     Limb1, Limb5\n    SaveSC\n    not     Limb2\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    mov     [Op3-8], Limb2\n    mov     [Op4], Limb1\n    add     Op4, 8\n\n  .n_two:\n\n    shr     Size, 1\n    jnc     .n_four\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    mov     Limb5, [Op2]\n    mov     Limb6, [Op2+8]\n    LoadAC\n    mov     Limb3, Limb1\n    mov     Limb4, Limb2\n    adc     Limb3, Limb5\n    not     Limb3\n    adc     Limb4, Limb6\n    SaveAC\n    LoadSC\n    not     Limb4\n    sbb     Limb1, Limb5\n    sbb     Limb2, Limb6\n    SaveSC\n    mov     [Op3], Limb3\n    mov     [Op3+8], Limb4\n    mov     [Op4], Limb1\n    mov     [Op4+8], Limb2\n    add     Op1, 16\n    add     Op2, 16\n    add     Op3, 16\n    add     Op4, 16\n\n\n  .n_four:\n\n    shr     Size, 1\n    jnc     .n_loop_pre ;ajs:notshortform\n\n    LoadAC\n\n    ; slight change of scheme here - avoid too many\n    ; memory to reg or reg to memory moves in a row\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    mov     Limb0, Limb1\n    adc     Limb0, Limb5\n    not     Limb0\n    mov     [Op3], Limb0\n    mov     Limb2, [Op1+8]\n    mov     Limb6, [Op2+8]\n    mov     Limb0, Limb2\n    adc     Limb0, Limb6\n    not     Limb0\n    mov     [Op3+8], Limb0\n    mov     Limb3, [Op1+16]\n    mov     Limb7, [Op2+16]\n    mov     Limb0, Limb3\n    adc     Limb0, Limb7\n    not     Limb0\n    mov     [Op3+16], Limb0\n    mov     Limb4, [Op1+24]\n    mov     Limb8, [Op2+24]\n    mov     Limb0, Limb4\n    adc     Limb0, Limb8\n    not     Limb0\n    mov     [Op3+24], Limb0\n\n    SaveAC\n    LoadSC\n\n    sbb     Limb1, Limb5\n    mov     [Op4], Limb1\n    sbb     Limb2, Limb6\n    mov     [Op4+8], Limb2\n    sbb     Limb3, Limb7\n    mov     [Op4+16], Limb3\n    sbb     Limb4, Limb8\n    mov     [Op4+24], Limb4\n\n    SaveSC\n\n    add     Op1, 32\n    add     Op2, 32\n    add     Op3, 32\n    add     Op4, 32\n \n    test   Size, Size\n  .n_loop_pre:\t\t; If we jump here, ZF=1 iff Size=0\n    jz     .n_post      ;ajs:notshortform\n    LoadAC              ; set carry for addition\n\n    ; main loop - values below are best case - up to 50% fluctuation possible!\n    ; - 3.50      cycles per limb in LD1$\n    ; - 3.50      cycles per limb in LD2$\n    ; - 5.10-5.50 cycles per limb in LD3$\n    align   16\n  .n_loop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n    prefetchnta [Op2+Offs]\n  %endif\n\n    mov     Limb1, [Op1]        ; add the first quad-limb\n    mov     Limb5, [Op2]\n    mov     Limb0, Limb1\n    adc     Limb0, Limb5\n    not     Limb0\n    mov     [Op3], Limb0\n    mov     Limb2, [Op1+8]\n    mov     Limb6, [Op2+8]\n    mov     Limb0, Limb2\n    adc     Limb0, Limb6\n    not     Limb0\n    mov     [Op3+8], Limb0\n    mov     Limb3, [Op1+16]\n    mov     Limb7, [Op2+16]\n    mov     Limb0, Limb3\n    adc     Limb0, Limb7\n    not     Limb0\n    mov     [Op3+16], Limb0\n    mov     Limb4, [Op1+24]\n    mov     Limb8, [Op2+24]\n    mov     Limb0, Limb4\n    adc     Limb0, Limb8\n    not     Limb0\n    mov     [Op3+24], Limb0\n    lea     Op3, [Op3 + 64]\n\n    SaveAC              ; memorize add-carry\n    LoadSC              ; set carry for subtraction\n\n    sbb     Limb1, Limb5        ; now sub the first quad-limb\n    mov     [Op4], Limb1\n    sbb     Limb2, Limb6\n    mov     [Op4+8], Limb2\n    sbb     Limb3, Limb7\n    mov     [Op4+16], Limb3\n    sbb     Limb4, Limb8\n    mov     [Op4+24], Limb4\n    mov     Limb1, [Op1+32]     ; sub the second quad-limb\n    mov     Limb5, [Op2+32]\n    mov     Limb0, Limb1\n    sbb     Limb0, Limb5\n    mov     [Op4+32], Limb0\n    mov     Limb2, [Op1+40]\n    mov     Limb6, [Op2+40]\n    mov     Limb0, Limb2\n    sbb     Limb0, Limb6\n    mov     [Op4+40], Limb0\n    mov     Limb3, [Op1+48]\n    mov     Limb7, [Op2+48]\n    mov     Limb0, Limb3\n    sbb     Limb0, Limb7\n    mov     [Op4+48], Limb0\n    mov     Limb4, [Op1+56]\n    mov     Limb8, [Op2+56]\n    mov     Limb0, Limb4\n    sbb     Limb0, Limb8\n    mov     [Op4+56], Limb0\n    lea     Op4, [Op4 + 64]\n\n    SaveSC                      ; memorize sub-carry\n    LoadAC                      ; set carry for addition\n\n    adc     Limb1, Limb5        ; add the second quad-limb\n    not     Limb1\n    mov     [Op3+32-64], Limb1\n    adc     Limb2, Limb6\n    not     Limb2\n    mov     [Op3+40-64], Limb2\n    adc     Limb3, Limb7\n    not     Limb3\n    mov     [Op3+48-64], Limb3\n    adc     Limb4, Limb8\n    not     Limb4\n    mov     [Op3+56-64], Limb4\n\n    lea     Op1, [Op1 + 64]\n    lea     Op2, [Op2 + 64]\n\n\n    dec     Size\n    jnz     .n_loop     ;ajs:notshortform\n\n    SaveAC                      ; memorize add-carry\n    ; hand back carries\n  .n_post:\n\t\t\t\t; AL = cy1, AH = -cy3. cy2 = 1 here, as\n                                ; there were non-zero words in the sum\n    inc     al\t\t\t; AL = cy1 + cy2 = cy1 + 1, AH = -cy3\n.all_zero:\n    LoadSC\t\t\t; AL = cy1 + cy2, CY = cy3\n    adc     AL, AL\t\t; AL = 2*(cy1 + cy2) + cy3\n    movsx   EAX, AL\n\n  .Exit:\n  END_PROC reg_save_list\n.end:\n\n.zero_sum:\n    mov     [Op3], Limb2\n    LoadSC\n    sbb     Limb1, Limb5\n    SaveSC\n    mov     [Op4], Limb1\n    dec     Size\n    jz      .all_zero\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    add     Op4, 8\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    mov     Limb2, Limb1\n    LoadAC\n    adc     Limb2, Limb5\n    SaveAC\n    neg     Limb2\n    jz      .zero_sum\n    jmp     .not_zero\n"
  },
  {
    "path": "mpn/x86_64w/haswell/popcount.asm",
    "content": "; PROLOGUE(mpn_popcount)\n\n;  AMD64 mpn_popcount\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmpn_limb_t mpn_popcount(mp_ptr,mp_size_t)\n;\trax                        rdi,      rsi\n;\teax                        rcx,      rdx\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_popcount\n\tmov     r8, 5\n\tlea     rcx, [rcx+rdx*8-40]\n\txor     eax, eax\n\tsub     r8, rdx\n\tjnc     .1\n\txalign  16\n.0:\n\tpopcnt  r9, [rcx+r8*8]\n\tpopcnt  r10, [rcx+r8*8+8]\n\tpopcnt  r11, [rcx+r8*8+16]\n\tpopcnt  rdx, [rcx+r8*8+24]\n\tadd     rax, r9\n\tadd     rax, rdx\n\tadd     rax, r10\n\tpopcnt  r9, [rcx+r8*8+32]\n\tpopcnt  r10, [rcx+r8*8+40]\n\tadd     rax, r9\n\tadd     rax, r11\n\tadd     rax, r10\n\tadd     r8, 6\n\tjnc     .0\n.1:\n\tlea     rdx, [rel .2]\n\tlea     r8, [r8+r8*8]\n\tadd     rdx, r8\n\tjmp     rdx\n.2:\n\tnop\n\tpopcnt  r9, [rcx]\n\tadd     rax, r9\n.3:\n\tpopcnt  r10, [rcx+8]\n\tadd     rax, r10\n.4:\tpopcnt  r11, [rcx+16]\n\tadd     rax, r11\n.5:\tpopcnt  rdx, [rcx+24]\n\tadd     rax, rdx\n.6:\tpopcnt  r9, [rcx+32]\n\tadd     rax, r9\n.7:\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/haswell/redc_1.asm",
    "content": "; PROLOGUE(mpn_redc_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_redc_1(mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                rdi     rsi        rdx        rcx        r8\n;  rax                rcx     rdx         r8         r9  [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    CPU  Core2\n    BITS 64\n\n%macro mpn_add 0\n\n    mov     rax, rcx\n    and     rax, 3\n    shr     rcx, 2\n    cmp     rcx, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    add     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n    jmp     %%2\n\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    adc     r11, [rdx]\n    adc     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rdi], r11\n    mov     [rdi+8], r8\n    lea     rdi, [rdi+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    adc     r9, [rdx-16]\n    adc     r10, [rdx-8]\n    mov     [rdi-16], r9\n    dec     rcx\n    mov     [rdi-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    adc     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n%%2:\n\n%endmacro\n\n%macro mpn_sub 0\n\n    mov     rax, rbp\n    and     rax, 3\n    shr     rbp, 2\n    cmp     rbp, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    sub     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n    jmp     %%2\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    sbb     r11, [rdx]\n    sbb     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rbx], r11\n    mov     [rbx+8], r8\n    lea     rbx, [rbx+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    sbb     r9, [rdx-16]\n    sbb     r10, [rdx-8]\n    mov     [rbx-16], r9\n    dec     rbp\n    mov     [rbx-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    sbb     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n%%2:\n\n%endmacro\n\n%macro addmulloop 1\n\n    xalign  16\n%%1:mov     r10, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    db      0x26\n    adc     rbx, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    db      0x26\n    adc     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     r11, 4\n    mov     rax, [rsi+r11*8+8]\n    jnc     %%1\n\n%endmacro\n\n%macro addmulpropro0 0\n\n    imul    r13, rcx\n    lea     r8, [r8-8]\n\n%endmacro\n\n%macro addmulpro0 0\n\n    mov     r11, r14\n    lea     r8, [r8+8]\n    mov     rax, [rsi+r14*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext0 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mov     r9d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, rax\n    adc     r9, rdx\n    imul    r13, rcx\n    add     [r8+r11*8+32], r12\n    adc     r9, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r9\n\n%endmacro\n\n%macro addmulpropro1 0\n\n%endmacro\n\n%macro addmulpro1 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext1 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r12\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro2 0\n\n%endmacro\n\n%macro addmulpro2 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext2 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     r13, [r8+r14*8+8]\n    add     [r8+r11*8+16], r10\n    adc     rbx, 0\n    mov     [r8+r14*8], rbx\n    sub     r15, 1          ; ***\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro3 0\n\n%endmacro\n\n%macro addmulpro3 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext3 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    add     [r8+r11*8+8], r9\n    adc     r10, 0\n    mov     r13, [r8+r14*8+8]\n    mov     [r8+r14*8], r10\n    lea     r8, [r8+8]\n    sub     r15, 1          ; ***\n\n%endmacro\n\n%macro mpn_addmul_1_int 1\n\n    addmulpropro%1\n    xalign  16\n%%1:addmulpro%1\n    jge     %%2\n    addmulloop %1\n%%2:addmulnext%1\n    jnz     %%1\n\n%endmacro\n\n    LEAF_PROC mpn_redc_1\n    cmp     r9, 1\n    je      one\n    FRAME_PROC ?mpn_sandybridge_redc_1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, r8\n    mov     r8, rdx\n    mov     rdx, r9\n    mov     rcx, [rsp+stack_use+0x28]\n    mov     [rsp+stack_use+0x28], r8\n\n    mov     r14, 5\n    sub     r14, rdx\n\n    mov     [rsp+stack_use+0x10], rsi\n    mov     r8, [rsp+stack_use+0x28]\n\n    lea     r8, [r8+rdx*8-40]\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rbp, rdx\n    mov     r15, rdx\n    mov     rax, r14\n    and     rax, 3\n    mov     r13, [r8+r14*8]\n    je      .2\n    jp      .4\n    cmp     rax, 1\n    je      .3\n.1:\tmpn_addmul_1_int 2\n    jmp     .5\n\n    xalign  16\n.2:\tmpn_addmul_1_int 0\n    jmp     .5\n\n    xalign  16\n.3:\tmpn_addmul_1_int 1\n    jmp     .5\n\n    xalign  16\n.4:\tmpn_addmul_1_int 3\n\n    xalign  16\n.5:\tmov     rcx, rbp\n    mov     rdx, [rsp+stack_use+0x28]\n    lea     rsi, [rdx+rbp*8]\n    mov     rbx, rdi\n    mpn_add\n    mov     rdx, [rsp+stack_use+0x10]\n    jnc     .6\n    mov     rsi, rbx\n    mpn_sub\n.6:\tEND_PROC reg_save_list\n\n    xalign  16\none:mov     r9, rdx\n    mov     r11, [r8]\n    mov     r8, [rsp+0x28]\n\n    mov     r10, [r9]\n    imul    r8, r10\n    mov     rax, r8\n    mul     r11\n    add     rax, r10\n    adc     rdx, [r9+8]\n    cmovnc  r11, rax\n    sub     rdx, r11\n    mov     [rcx], rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/haswell/rsh_divrem_hensel_qr_1_1.asm",
    "content": "; PROLOGUE(mpn_rsh_divrem_hensel_qr_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rsh_divrem_hensel_qr_1_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t,  mp_int, mp_limb_t)\n;  rax                                        rdi     rsi        rdx        rcx       r8         r9\n;  rax                                        rcx     rdx         r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi\n\n\tFRAME_PROC mpn_rsh_divrem_hensel_qr_1_1, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n    mov     rcx, r9\n\tmov     rdx, r9\n\tmov     r9, 1\n\tsub     r9, rax\n    movsxd  r8, dword [rsp+stack_use+40]\n    mov     r10, qword [rsp+stack_use+48]\n    \n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, 64\n\tsub     rax, r8\n\tmovq    mm0, r8\n\tmovq    mm1, rax\n\tmov     rax, [rsi+r9*8-8]\n\tsub     rax, r10\n\tsbb     r8, r8\n\timul    rax, r11\n\tmovq    mm4, rax\n\tmovq    mm5, mm4\n\tpsrlq   mm4, mm0\n\tpsllq   mm5, mm1\n\tpsrlq   mm5, mm1\n\tmul     rcx\n\tcmp     r9, 0\n\tje      .3\n\tadd     r8, r8\n\t\n\txalign  16\n.1:\tmovq    mm2, mm4\n\tmov     rax, [rsi+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-8], mm2\n\tmul     rcx\n\tadd     r8, r8\n\tinc     r9\n\tjnz     .1\n.2:\tmovq    [rdi+r9*8-8], mm4\n\tmov     rax, 0\n\tadc     rax, rdx\n\temms\n\tEXIT_PROC reg_save_list\n\n.3:\tmovq    [rdi+r9*8-8], mm4\n\tadd     r8, r8\n\tmov     rax, 0\n\tadc     rax, rdx\n.4:\temms\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/haswell/rsh_divrem_hensel_qr_1_2.asm",
    "content": "; PROLOGUE(mpn_rsh_divrem_hensel_qr_1_2)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rsh_divrem_hensel_qr_1_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_uint, mp_limb_t)\n;  rax                                        rdi     rsi        rdx        rcx       r8         r9\n;  rax                                        rcx     rdx         r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n\tFRAME_PROC mpn_rsh_divrem_hensel_qr_1_2, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8-16]\n\tlea     rsi, [rdx+rax*8-16]\n    mov     rcx, r9\n    mov     rdx, r9\n\tmov     r9, 2\n\tsub     r9, rax    \n    mov     r8d, dword [rsp+stack_use+40]\n    mov     r10, qword [rsp+stack_use+48]\n\n\tmov     rax, 64\n\tsub     rax, r8\n\tmovq    mm0, r8\n\tmovq    mm1, rax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, r11\n\tmov     r12, r11\n\tmul     rcx\n\tneg     rdx\n\timul    r12, rdx\n\n; // for the first limb we can not store (as we have to shift) so we need to\n; // do first limb separately , we could do it as normal as an extention of\n; // the loop , but if we do it as a 1 limb inverse then we can start it\n; // eailer , ie interleave it with the calculation of the 2limb inverse\n\n\tmov     r13, r11\n\tmov     r14, r12\n\n\tmov     r11, [rsi+r9*8]\n\tsub     r11, r10\n\tsbb     r10, r10\n\n\timul    r11, r13\n\tmovq    mm2, r11\n\tpsrlq   mm2, mm0\n\tmov     rax, rcx\n\tmul     r11\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\tadd     r10, r10\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, r10\n\n; mov $0,%r10\n\tadd     r9, 2\n\tjc      .2\n\txalign  16\n.1:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n\t; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r12, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, 0\n\tadd     r9, 2\n\tjnc     .1\n.2:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tcmp     r9, 0\n\tjne     .4\n.3:\tmov     r11, [rsi+r9*8+8]\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r10, 0\n\tmov     rax, r11\n\timul    rax, r13\n\t; mov %rax,(%rdi,%r9,8)\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8], mm2\n\tmovq    [rdi+r9*8+8], mm4\n\n\tmul     rcx\n\tadd     r10, r10\n\tmov     rax, 0\n\tadc     rax, rdx\n\temms\n\tEXIT_PROC reg_save_list\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n.4:\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\tmovq    [rdi+r9*8], mm2\n\n\tmov     rax, rcx\n\tmul     rdx\n\tcmp     r8, rax\n\tmov     rax, 0\n\tadc     rax, rdx\n\tsub     rax, r10\n.5:\temms\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/haswell/rshift.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_rshift(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1, unsigned int Shift)\n; Linux       RAX         RDI            RSI            RDX                 RCX\n; Windows x64 RAX         RCX            RDX             R8                  R9\n;\n; Description:\n; The function shifts Op1 right by Shift bits, stores the result in Op2 (non-\n; destructive shr) and hands back the shifted-out least significant bits of\n; Op1. The function operates increasing in memory supporting in place shifts.\n;\n; Result:\n; - Op2[ Size1-1..0 ] := ( ShrIn:Op1[ Size1-1..0 ] ) >> Shift\n; - Op1[ 0 ] << ( 64-Shift )\n;\n; Caveats:\n; - caller must ensure that Shift is in [ 1..63 ]!\n; - currently Linux64 support only!\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - implemented, tested and benchmarked on 30.03.2016 by jn\n; - includes prefetching\n; ============================================================================\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2         R11\n    %define Op1         RDX\n    %define Size1       R8\n    %define Shift       RCX\n    %define Limb1       R9\n    %define Limb2       R10\n  %ifdef USE_PREFETCH\n    %define Offs        -512    ; No caller-saves regs left, use immediate\n  %endif\n    %define reg_save_list XMM, 6, 7\n%else\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Shift       RCX\n    %define Limb1       R8\n    %define Limb2       R9\n  %ifdef USE_PREFETCH\n    %define OFFS_REG 1\n    %define Offs        R10\n  %endif\n%endif\n\n%define ShrDL0      XMM2    ; Attn: this must match ShrQL0 definition\n%define ShlDL0      XMM3    ; Attn: this must match ShlQL0 definition\n%define ShrDLCnt    XMM6    ; Attn: this must match ShrQlCnt definition\n%define ShlDLCnt    XMM7    ; Attn: this must match ShlQlCnt definition\n\n%define QLimb0      YMM0\n%define QLimb1      YMM1\n%define ShrQL0      YMM2\n%define ShlQL0      YMM3\n%define ShrQL1      YMM4\n%define ShlQL1      YMM5\n%define ShrQLCnt    YMM6\n%define ShlQLCnt    YMM7\n\n    align   32\n\nFRAME_PROC mpn_rshift, 0, reg_save_list\n%ifdef USE_WIN64\n    mov     r11, rcx\n\tmov     rcx, r9\n%endif\n    xor     EAX, EAX\n    or      Size1, Size1\n    je      .Exit\n\n    mov     Limb1, [Op1]\n    shrd    RAX, Limb1, CL\n\n    sub     Size1, 1\n    je      .lShrEquPost        ; Size1=1 =>\n\n  %ifdef USE_PREFETCH\n    mov     Offs, 512\n  %endif\n\n    cmp     Size1, 8\n    jc      .lShrEquFour        ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    je      .lShrEquAlign16\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op1, 8\n    add     Op2, 8\n    sub     Size1, 1\n\n  .lShrEquAlign16:\n\n    test    Op2, 16\n    je      .lShrEquAVX\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+16]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n    sub     Size1, 2\n\n  .lShrEquAVX:\n\n    ; initialize AVX shift counter\n    vmovq   ShrDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vmovq   ShlDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vpbroadcastq ShrQLCnt, ShrDLCnt\n    vpbroadcastq ShlQLCnt, ShlDLCnt\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1]\n    vpsllvq ShlQL0, QLimb0, ShlQLCnt\n\n    add     Op1, 32\n    sub     Size1, 4\n    jmp     .lShrEquAVXCheck\n\n    ; main loop (prefetching enabled, unloaded data cache)\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.70 cycles per limb in LD2$\n    ; - 0.70-0.90 cycles per limb in LD3$\n    align   16\n  .lShrEquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1]\n    vpsrlvq   ShrQL0, QLimb0, ShrQLCnt\n    vmovdqu   QLimb0, [Op1+32]\n    vpsllvq   ShlQL1, QLimb1, ShlQLCnt\n    vpblendd  ShlQL0, ShlQL0, ShlQL1, 00000011b\n    vpermq    ShlQL0, ShlQL0, 00111001b\n    vpor      ShrQL0, ShrQL0, ShlQL0\n    vpsrlvq   ShrQL1, QLimb1, ShrQLCnt\n    vpsllvq   ShlQL0, QLimb0, ShlQLCnt\n    vpblendd  ShlQL1, ShlQL1, ShlQL0, 00000011b\n    vpermq    ShlQL1, ShlQL1, 00111001b\n    vmovdqa   [Op2], ShrQL0\n    vpor      ShrQL1, ShrQL1, ShlQL1\n    vmovdqa   [Op2+32], ShrQL1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .lShrEquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShrEquAVXLoop\n\n    mov     Limb1, [Op1]\n    xor     Limb2, Limb2\n    shrd    Limb2, Limb1, CL\n%if 1\n    vmovq   ShrDL0, Limb2\n    vpblendd ShlQL0, ShlQL0, ShrQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n    pinsrq  ShlDL0, Limb2, 0            ; SSE4.1\n%endif\n    vpsrlvq ShrQL0, QLimb0, ShrQLCnt\n    vpermq  ShlQL0, ShlQL0, 00111001b\n    vpor    ShrQL0, ShrQL0, ShlQL0\n    vmovdqa [Op2], ShrQL0\n\n    add     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHRD mnemonic\n  .lShrEquFour:\n\n    add     Op1, 8\n    test    Size1, 4\n    je      .lShrEquTwo\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+8], Limb2\n    mov     Limb2, [Op1+16]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2+16], Limb1\n    mov     Limb1, [Op1+24]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+24], Limb2\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .lShrEquTwo:\n\n    test    Size1, 2\n    je      .lShrEquOne\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lShrEquOne:\n\n    test    Size1, 1\n    je      .lShrEquPost\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op2, 8\n\n    ; store most significant limb considering shift-in part\n  .lShrEquPost:\n\n    shr     Limb1, CL\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    vzeroupper\nEND_PROC reg_save_list\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/haswell/rshift1.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_rshift1(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1 )\n; Linux     RAX         RDI         RSI            RDX\n; Win7      RAX         RCX         RDX            R8\n;\n; Description:\n; The function shifts Op1 right by one bit, stores the result in Op2 (non-\n; destructive shr) and hands back the shifted-out least significant bit of Op1.\n; The function operates increasing in memory supporting in place shifts.\n;\n; Caveats:\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - AVX based version implemented, tested & benched on 21.02.2016 by jn\n; - includes cache prefetching\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n\n    %define Op2         RCX\n    %define Op1         RDX\n    %define Size1       R8\n    %define Limb1       R9\n    %define Limb2       R10\n    %define Offs        512     ; used direct def. to stay in Win scratch regs\n\n    %define ShrDL0      XMM2    ; Attn: this must match ShrQL0 definition\n    %define ShlDL0      XMM3    ; Attn: this must match ShlQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShrQL0      YMM2\n    %define ShlQL0      YMM3\n    %define ShrQL1      YMM4\n    %define ShlQL1      YMM5\n\n%else\n\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Limb1       R8\n    %define Limb2       R9\n    %define Offs        512     ; used direct def. to stay in Win scratch regs\n\n    %define ShrDL0      XMM2    ; Attn: this must match ShrQL0 definition\n    %define ShlDL0      XMM3    ; Attn: this must match ShlQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShrQL0      YMM2\n    %define ShlQL0      YMM3\n    %define ShrQL1      YMM4\n    %define ShlQL1      YMM5\n\n%endif\n\n    align   32\n\nLEAF_PROC mpn_rshift1\n\n    xor     EAX, EAX\n    or      Size1, Size1\n    je      .Exit\n\n    mov     RAX, [Op1]\n    mov     Limb1, RAX\n    shl     RAX, 63\n\n    sub     Size1, 1\n    je      .lShr1EquPost       ; Size1=1 =>\n\n    cmp     Size1, 8\n    jc      .lShr1EquFour       ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    je      .lShr1EquAlign16\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op1, 8\n    add     Op2, 8\n    sub     Size1, 1\n\n  .lShr1EquAlign16:\n\n    test    Op2, 16\n    je      .lShr1EquAVX\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+16]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n    sub     Size1, 2\n\n  .lShr1EquAVX:\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1]\n    vpsllq  ShlQL0, QLimb0, 63\n\n    add     Op1, 32\n    sub     Size1, 4\n    jmp     .lShr1EquAVXCheck\n\n    ; main loop (prefetching enabled, unloaded data cache)\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.75 cycles per limb in LD2$\n    ; - 0.75-1.00 cycles per limb in LD3$\n    align   16\n  .lShr1EquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1]\n    vpsrlq    ShrQL0, QLimb0, 1\n    vmovdqu   QLimb0, [Op1+32]\n    vpsllq    ShlQL1, QLimb1, 63\n    vpblendd  ShlQL0, ShlQL0, ShlQL1, 00000011b\n    vpermq    ShlQL0, ShlQL0, 00111001b\n    vpor      ShrQL0, ShrQL0, ShlQL0\n    vpsrlq    ShrQL1, QLimb1, 1\n    vpsllq    ShlQL0, QLimb0, 63\n    vpblendd  ShlQL1, ShlQL1, ShlQL0, 00000011b\n    vpermq    ShlQL1, ShlQL1, 00111001b\n    vmovdqa   [Op2], ShrQL0\n    vpor      ShrQL1, ShrQL1, ShlQL1\n    vmovdqa   [Op2+32], ShrQL1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .lShr1EquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShr1EquAVXLoop\n\n    mov     Limb2, [Op1]\n    mov     Limb1, Limb2\n    shl     Limb2, 63\n%if 1\n    vmovq ShrDL0, Limb2\n    vpblendd ShlQL0, ShlQL0, ShrQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n    pinsrq  ShlDL0, Limb2, 0            ; SSE4.1\n%endif\n    vpsrlq  ShrQL0, QLimb0, 1\n    vpermq  ShlQL0, ShlQL0, 00111001b\n    vpor    ShrQL0, ShrQL0, ShlQL0\n    vmovdqa [Op2], ShrQL0\n\n    add     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHRD mnemonic\n  .lShr1EquFour:\n\n    add     Op1, 8\n    test    Size1, 4\n    je      .lShr1EquTwo\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+8], Limb2\n    mov     Limb2, [Op1+16]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2+16], Limb1\n    mov     Limb1, [Op1+24]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+24], Limb2\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .lShr1EquTwo:\n\n    test    Size1, 2\n    je      .lShr1EquOne\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lShr1EquOne:\n\n    test    Size1, 1\n    je      .lShr1EquPost\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op2, 8\n\n  .lShr1EquPost:\n\n    shr     Limb1, 1\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/haswell/sqr_basecase.asm",
    "content": "\n; void mpn_sqr_basecase(mp_ptr, mp_srcptr, mp_size_t)\n; Linux                    rdi        rsi        rdx\n; Win64                    rcx        rdx         r8\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14\n    \n    text\n\txalign  32\n\tLEAF_PROC mpn_sqr_basecase \n\tcmp     r8, 2\n\tjae     .0\n\tmov     rdx, [rdx]\n\tmulx    rdx, rax, rdx\n\tmov     [rcx], rax\n\tmov     [rcx+8], rdx\n\tret\n.0:\tjne     .1\n\tmov     r11, [rdx+8]\n\tmov     rdx, [rdx]\n\tmulx    r10, r9, r11\n\tmulx    r8, rax, rdx\n\tmov     rdx, r11\n\tmulx    rdx, r11, rdx\n\tadd     r9, r9\n\tadc     r10, r10\n\tadc     rdx, 0\n\tadd     r8, r9\n\tadc     r10, r11\n\tadc     rdx, 0\n\tmov     [rcx], rax\n\tmov     [rcx+8], r8\n\tmov     [rcx+16], r10\n\tmov     [rcx+24], rdx\n\tret\n\n\txalign  32\n.1:\tFRAME_PROC mpn_sqr_basec1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8  \n    cmp     rdx, 4\n\tjae     .2\n\tmov     r8, [rsi]\n\tmov     rdx, [rsi+8]\n\tmov     r9, rdx\n\tmulx    rax, r11, r8\n\tmov     rdx, [rsi+16]\n\tmulx    rcx, r10, r8\n\tmov     r8, r11\n\tadd     r10, rax\n\tadc     rcx, 0\n\tmulx    rax, rdx, r9\n\tadd     rdx, rcx\n\tmov     [rdi+24], rdx\n\tadc     rax, 0\n\tmov     [rdi+32], rax\n\txor     rcx, rcx\n\tmov     rdx, [rsi]\n\tmulx    r11, rax, rdx\n\tmov     [rdi], rax\n\tadd     r8, r8\n\tadc     r10, r10\n\tsetc    cl\n\tmov     rdx, [rsi+8]\n\tmulx    rdx, rax, rdx\n\tadd     r8, r11\n\tadc     r10, rax\n\tmov     [rdi+8], r8\n\tmov     [rdi+16], r10\n\tmov     r8, [rdi+24]\n\tmov     r10, [rdi+32]\n\tlea     r11, [rdx+rcx]\n\tadc     r8, r8\n\tadc     r10, r10\n\tsetc    cl\n\tmov     rdx, [rsi+16]\n\tmulx    rdx, rax, rdx\n\tadd     r8, r11\n\tadc     r10, rax\n\tmov     [rdi+24], r8\n\tmov     [rdi+32], r10\n\tadc     rdx, rcx\n\tmov     [rdi+40], rdx\n\tEXIT_PROC reg_save_list\n.2:\n.3:\tmov     r12, 0\n\tsub     r12, rdx\n\tmov     [rsp+stack_use+8], r12\n\tmov     r8, [rsi]\n\tmov     rdx, [rsi+8]\n\tlea     rcx, [r12+2]\n\tsar     rcx, 2\n\tinc     r12\n\tmov     r9, rdx\n\ttest    r12b, 1\n\tjnz     .7\n.4:\tmulx    r11, rbx, r8\n\tmov     rdx, [rsi+16]\n\tmov     [rdi+8], rbx\n\txor     rbx, rbx\n\tmulx    rbp, r10, r8\n\ttest    r12b, 2\n\tjz      .6\n.5:\tlea     rdi, [rdi-8]\n\tlea     rsi, [rsi-8]\n\tjmp     .13\n.6:\tlea     rsi, [rsi+8]\n\tlea     rdi, [rdi+8]\n\tjmp     .11\n.7:\tmulx    rbp, r10, r8\n\tmov     rdx, [rsi+16]\n\tmov     [rdi+8], r10\n\txor     r10, r10\n\tmulx    r11, rbx, r8\n\ttest    r12b, 2\n\tjz      .12\n.8:\tlea     rdi, [rdi+16]\n\tlea     rsi, [rsi+16]\n\tjmp     .10\n\txalign  32\n.9:\tmulx    r10, rax, r9\n\tadd     rbx, rax\n\tmov     rdx, [rsi]\n\tmulx    r11, rax, r8\n\tadc     r10, 0\n\tadd     rbx, rax\n.10:adc     r11, 0\n\tadd     rbx, rbp\n\tmov     [rdi], rbx\n\tadc     r11, 0\n\tmulx    rbx, rax, r9\n\tadd     r10, rax\n\tmov     rdx, [rsi+8]\n\tadc     rbx, 0\n\tmulx    rbp, rax, r8\n\tadd     r10, rax\n\tadc     rbp, 0\n.11:add     r10, r11\n\tmov     [rdi+8], r10\n\tadc     rbp, 0\n\tmulx    r10, rax, r9\n\tadd     rbx, rax\n\tmov     rdx, [rsi+16]\n\tmulx    r11, rax, r8\n\tadc     r10, 0\n\tadd     rbx, rax\n\tadc     r11, 0\n.12:add     rbx, rbp\n\tmov     [rdi+16], rbx\n\tadc     r11, 0\n\tmulx    rbx, rax, r9\n\tadd     r10, rax\n\tmov     rdx, [rsi+24]\n\tadc     rbx, 0\n\tmulx    rbp, rax, r8\n\tadd     r10, rax\n\tadc     rbp, 0\n.13:add     r10, r11\n\tlea     rsi, [rsi+32]\n\tmov     [rdi+24], r10\n\tadc     rbp, 0\n\tinc     rcx\n\tlea     rdi, [rdi+32]\n\tjnz     .9\n.14:mulx    rax, rdx, r9\n\tadd     rbx, rdx\n\tadc     rax, 0\n\tadd     rbx, rbp\n\tmov     [rdi], rbx\n\tadc     rax, 0\n\tmov     [rdi+8], rax\n\tlea     rsi, [rsi+16]\n\tlea     rdi, [rdi-16]\n.15:\n.16:\n\tlea     rsi, [rsi+r12*8]\n\tlea     rdi, [rdi+r12*8+48]\n\tmov     r8, [rsi-8]\n\tadd     r12, 2\n\tcmp     r12, -2\n\tjge     .30\n\tmov     r9, [rsi]\n\tlea     rcx, [r12+1]\n\tsar     rcx, 2\n\tmov     rdx, r9\n\ttest    r12b, 1\n\tjnz     .20\n.17:mov     r13, [rdi]\n\tmov     r14, [rdi+8]\n\tmulx    r11, rax, r8\n\tadd     r13, rax\n\tadc     r11, 0\n\tmov     [rdi], r13\n\txor     rbx, rbx\n\ttest    r12b, 2\n\tjnz     .19\n.18:mov     rdx, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tlea     rsi, [rsi+16]\n\tjmp     .26\n.19:mov     rdx, [rsi+8]\n\tmov     r13, [rdi+16]\n\tlea     rsi, [rsi+32]\n\tinc     rcx\n\tmulx    rbp, rax, r8\n\tjz      .29\n\tjmp     .24\n.20:mov     r14, [rdi]\n\tmov     r13, [rdi+8]\n\tmulx    rbp, rax, r8\n\tmov     rdx, [rsi+8]\n\tadd     r14, rax\n\tadc     rbp, 0\n\txor     r10, r10\n\tmov     [rdi], r14\n\tmulx    r11, rax, r8\n\ttest    r12b, 2\n\tjz      .22\n.21:mov     r14, [rdi+16]\n\tlea     rdi, [rdi+24]\n\tlea     rsi, [rsi+24]\n\tjmp     .25\n.22:lea     rdi, [rdi+8]\n\tlea     rsi, [rsi+8]\n\tjmp     .27\n\txalign  32\n.23:mulx    rbp, rax, r8\n\tadd     r14, r10\n\tadc     rbx, 0\n.24:add     r14, rax\n\tadc     rbp, 0\n\tmulx    r10, rax, r9\n\tadd     r13, rax\n\tadc     r10, 0\n\tlea     rdi, [rdi+32]\n\tadd     r14, r11\n\tmov     rdx, [rsi-16]\n\tmov     [rdi-24], r14\n\tadc     rbp, 0\n\tadd     r13, rbx\n\tmov     r14, [rdi-8]\n\tmulx    r11, rax, r8\n\tadc     r10, 0\n.25:add     r13, rax\n\tmulx    rbx, rax, r9\n\tadc     r11, 0\n\tadd     r13, rbp\n\tmov     [rdi-16], r13\n\tadc     r11, 0\n\tadd     r14, rax\n\tadc     rbx, 0\n\tadd     r14, r10\n\tmov     rdx, [rsi-8]\n\tadc     rbx, 0\n.26:mulx    rbp, rax, r8\n\tadd     r14, rax\n\tadc     rbp, 0\n\tmov     r13, [rdi]\n\tmulx    r10, rax, r9\n\tadd     r13, rax\n\tadc     r10, 0\n\tadd     r14, r11\n\tmov     [rdi-8], r14\n\tadc     rbp, 0\n\tmov     rdx, [rsi]\n\tadd     r13, rbx\n\tmulx    r11, rax, r8\n\tadc     r10, 0\n.27:add     r13, rax\n\tadc     r11, 0\n\tmulx    rbx, rax, r9\n\tadd     r13, rbp\n\tmov     r14, [rdi+8]\n\tmov     [rdi], r13\n\tmov     r13, [rdi+16]\n\tadc     r11, 0\n\tadd     r14, rax\n\tadc     rbx, 0\n\tmov     rdx, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tinc     rcx\n\tjnz     .23\n.28:mulx    rbp, rax, r8\n\tadd     r14, r10\n\tadc     rbx, 0\n.29:add     r14, rax\n\tadc     rbp, 0\n\tmulx    rax, rdx, r9\n\tadd     r14, r11\n\tmov     [rdi+8], r14\n\tadc     rbp, 0\n\tadd     rdx, rbx\n\tadc     rax, 0\n\tadd     rbp, rdx\n\tmov     [rdi+16], rbp\n\tadc     rax, 0\n\tmov     [rdi+24], rax\n\tjmp     .16\n.30:mov     r12, [rsp+stack_use+8]\n\tmov     rdx, [rsi]\n\tjg      .31\n\tmov     r9, rdx\n\tmov     r13, [rdi]\n\tmov     r14, rax\n\tmulx    r11, rax, r8\n\tadd     r13, rax\n\tadc     r11, 0\n\tmov     [rdi], r13\n\tmov     rdx, [rsi+8]\n\tmulx    rbp, rax, r8\n\tadd     r14, rax\n\tadc     rbp, 0\n\tmulx    rax, rdx, r9\n\tadd     r14, r11\n\tmov     [rdi+8], r14\n\tadc     rbp, 0\n\tadd     rdx, rbp\n\tmov     [rdi+16], rdx\n\tadc     rax, 0\n\tmov     [rdi+24], rax\n\tlea     rdi, [rdi+32]\n\tlea     rsi, [rsi+16]\n\tjmp     .32\n.31:mulx    rbp, r14, r8\n\tadd     r14, rax\n\tadc     rbp, 0\n\tmov     [rdi], r14\n\tmov     [rdi+8], rbp\n\tlea     rdi, [rdi+16]\n\tlea     rsi, [rsi+8]\n.32:\n.33:lea     rsi, [rsi+r12*8+8]\n\tlea     rdi, [rdi+r12*8]\n\tlea     rdi, [rdi+r12*8]\n\tinc     r12\n\tmov     rdx, [rsi-8]\n\txor     rbx, rbx\n\tmulx    r10, rax, rdx\n\tmov     [rdi+8], rax\n\tmov     r8, [rdi+16]\n\tmov     r9, [rdi+24]\n\tjmp     .35\n\txalign  16\n.34:mov     r8, [rdi+32]\n\tmov     r9, [rdi+40]\n\tlea     rdi, [rdi+16]\n\tlea     r10, [rdx+rbx]\n.35:adc     r8, r8\n\tadc     r9, r9\n\tsetc    bl\n\tmov     rdx, [rsi]\n\tlea     rsi, [rsi+8]\n\tmulx    rdx, rax, rdx\n\tadd     r8, r10\n\tadc     r9, rax\n\tmov     [rdi+16], r8\n\tmov     [rdi+24], r9\n\tinc     r12\n\tjnz     .34\n.36:adc     rdx, rbx\n\tmov     [rdi+32], rdx\n\tEND_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/haswell/store.asm",
    "content": "; PROLOGUE(mpn_store)\n\n;  mpn_store\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_store(mp_ptr, mp_size_t, mp_limb_t)\n;                    rdi,       rsi,       rdx\n;                    rcx,       rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n%define\tMOVQ\tmovd\n\n\tLEAF_PROC mpn_store\n\tlea     rcx, [rcx-32]\n\tcmp     rdx, 0\n\tjz      .4\n\tMOVQ    xmm0, r8\n\tmovddup xmm0, xmm0\n\ttest    rcx, 0xF\n\tjz      .1\n\tmov     [rcx+32], r8\n\tlea     rcx, [rcx+8]\n\tsub     rdx, 1\n.1:\tsub     rdx, 4\n\tjc      .3\n\t\n\txalign  16\n.2:\tlea     rcx, [rcx+32]\n\tsub     rdx, 4\n\tmovdqa  [rcx], xmm0\n\tmovdqa  [rcx+16], xmm0\n\tjnc     .2\n.3:\tcmp     rdx, -2\n\tja      .5\n\tjz      .7\n\tjp      .6\n.4:\tret\n\n.5:\tmovdqa  [rcx+32], xmm0\n.6:\tmov     [rcx+rdx*8+56], r8\n\tret\n\n.7:\tmovdqa  [rcx+32], xmm0\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/haswell/sub_n.asm",
    "content": "\n;  AMD64 mpn_sub_n\n;  Copyright 2008, 2016 Jason Moxham and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)+(rdx,rcx)\n;\trax = carry\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n    %define SumP    rcx\n    %define Inp1P   rdx\n    %define Inp2P   r8\n    %define Size    r9\n    %define LIMB1   rax\n    %define LIMB2   r10\n    %define SizeRest r11\n%else\n    %define SumP    rdi\n    %define Inp1P   rsi\n    %define Inp2P   rdx\n    %define Size    rcx\n    %define LIMB1   rax\n    %define LIMB2   r9\n    %define SizeRest r10\n%endif\n\n%define ADCSBB sbb\n\n    BITS    64\n\n    xalign  8\n    LEAF_PROC mpn_sub_nc\n    mov     r10,[rsp+40]\n    jmp     entry\n\n    xalign  8\n    LEAF_PROC mpn_sub_n\n    xor     r10, r10\nentry:\n\tmov     SizeRest, Size\n\tand     SizeRest, 7\n\tshr     Size, 3\n    lea     Size, [r10 + 2*Size]\n    sar     Size, 1\n\tjnz     .loop1\n    jmp     .rest\n\n\talign   16\n.loop1:\n\tmov     LIMB1, [Inp1P]\n\tmov     LIMB2, [Inp1P+8]\n\tADCSBB  LIMB1, [Inp2P]\n\tmov     [SumP], LIMB1\n\tADCSBB  LIMB2, [Inp2P+8]\n\tmov     LIMB1, [Inp1P+16]\n\tmov     [SumP+8], LIMB2\n\tADCSBB  LIMB1, [Inp2P+16]\n\tmov     LIMB2, [Inp1P+24]\n\tmov     [SumP+16], LIMB1\n\tmov     LIMB1, [Inp1P+32]\n\tADCSBB  LIMB2, [Inp2P+24]\n\tmov     [SumP+24], LIMB2\n\tADCSBB  LIMB1, [Inp2P+32]\n\tmov     [SumP+32], LIMB1\n\tmov     LIMB2, [Inp1P+40]\n\tADCSBB  LIMB2, [Inp2P+40]\n\tmov     [SumP+40], LIMB2\n\tmov     LIMB1, [Inp1P+48]\n\tmov     LIMB2, [Inp1P+56]\n\tlea     Inp1P, [Inp1P+64]\n\tADCSBB  LIMB1, [Inp2P+48]\n\tADCSBB  LIMB2, [Inp2P+56]\n\tlea     Inp2P, [Inp2P+64]\n\tmov     [SumP+48], LIMB1\n\tmov     [SumP+56], LIMB2\n\tlea     SumP, [SumP+64]\n\tdec     Size\n\tjnz     .loop1\n\tinc     SizeRest\n\tdec     SizeRest\n\tjz      .end\n.rest:\n\tmov     LIMB1, [Inp1P]\n\tADCSBB  LIMB1, [Inp2P]\n\tmov     [SumP], LIMB1\n\tdec     SizeRest\n\tjz      .end\n\tmov     LIMB1, [Inp1P+8]\n\tADCSBB  LIMB1, [Inp2P+8]\n\tmov     [SumP+8], LIMB1\n\tdec     SizeRest\n\tjz      .end\n\tmov     LIMB1, [Inp1P+16]\n\tADCSBB  LIMB1, [Inp2P+16]\n\tmov     [SumP+16], LIMB1\n\tdec     SizeRest\n    jz      .end\n\tmov     LIMB1, [Inp1P+24]\n\tADCSBB  LIMB1, [Inp2P+24]\n\tmov     [SumP+24], LIMB1\n\tdec     SizeRest\n    jz      .end\n    lea     Inp1P, [Inp1P+32]\n    lea     Inp2P, [Inp2P+32]\n    lea     SumP, [SumP+32]\n    jmp     .rest\n.end:\n\tmov     eax, 0\n\tadc     eax, eax\n\tret\n"
  },
  {
    "path": "mpn/x86_64w/haswell/sublsh1_n.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t sublsh1_n(mp_ptr Op3, mp_srcptr Op2, mp_srcptr Op1; mp_size_t Size )\n; Linux     RAX       RDI         RSI            RDX            RCX\n; Win7      RAX       RCX         RDX            R8             R9\n;\n; Description:\n; The function shifts Op1 left one bit, subtracts it from Op2, stores the result\n; in Op3 and hands back the total carry. There is a gain in execution speed\n; compared to separate shift and subtract by interleaving the elementary operations\n; and reducing memory access. The factor depends on the size of the operands\n; (the cache hierarchy in which the operands can be handled).\n;\n; Caveats:\n; - for asm the processor MUST support LAHF/SAHF in 64 bit mode!\n; - the total carry is in [0..2]!\n;\n; Comments:\n; - asm version implemented, tested & benched on 16.05.2015 by jn\n; - improved asm version implemented, tested & benched on 30.07.2015 by jn\n; - On Nehalem per limb saving is 0.7 cycles in LD1$, LD2$ and 1-2 in LD3$\n; - includes LAHF / SAHF\n; - includes prefetching\n; - includes XMM save & restore\n;\n; Linux: (rdi, rcx) = (rsi, rcx) - (rdx, rcx)<<1\n; ============================================================================\n\n%define USE_WIN64\n\n%define ADDSUB add\n%define ADCSBB adc\n\n%include \"yasm_mac.inc\"\n\nBITS 64\n\n%define reg_save_list RBX, RBP, RSI, RDI, R10, R11, R12, R13, R14, R15\n\n%define Op3     RCX\n%define Op2     RDX\n%define Op1     R8\n%define Size    R9\n\n%define Limb0   RBX\n%define Limb1   RDI\n%define Limb2   RSI\n\n%define Limb3   R10\n%define Limb4   R11\n%define Limb5   R12\n%define Limb6   R13\n%define Limb7   R14\n%define Limb8   R15\n\n%ifdef USE_PREFETCH\n%define Offs    RBP\n%endif\n\n\n%macro ACCUMULATE 1\n    mov     rax, [Op2 + 8 * %1]\n    ADCSBB  rax, Limb%1\n    mov     [Op3 + 8 * %1], rax\n%endmacro\n\n\n    align   32\n\n  FRAME_PROC mpn_sublsh1_n, 0, reg_save_list\n\n  %ifdef USE_PREFETCH\n    mov     Offs, PREFETCH_STRIDE   ; Attn: check if redefining Offs\n  %endif\n\n    ; prepare shift & subtraction with loop-unrolling 8\n    xor     Limb0, Limb0\n    lahf                        ; memorize clear carry (from \"xor\" above)\n\n    test    Size, 1\n    je      .n_two\n\n    mov     Limb1, [Op1]\n    shrd    Limb0, Limb1, 63\n\n    mov     rax, [Op2]\n    ADDSUB  rax, Limb0\n    mov     [Op3], rax\n    lahf\n\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    mov     Limb0, Limb1\n\n  .n_two:\n\n    test    Size, 2\n    je      .n_four\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n\n    sahf\n    ACCUMULATE 0\n    ACCUMULATE 1\n    lahf\n\n    add     Op1, 16\n    add     Op2, 16\n    add     Op3, 16\n    mov     Limb0, Limb2\n\n  .n_four:\n\n    test    Size, 4\n    je      .n_test ;ajs:notshortform\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n    mov     Limb3, [Op1+16]\n    mov     Limb4, [Op1+24]\n    shrd    Limb2, Limb3, 63\n    shrd    Limb3, Limb4, 63\n\n    sahf\n    ACCUMULATE 0\n    ACCUMULATE 1\n    ACCUMULATE 2\n    ACCUMULATE 3\n    lahf\n\n    add     Op1, 32\n    add     Op2, 32\n    add     Op3, 32\n    mov     Limb0, Limb4\n    jmp     .n_test ;ajs:notshortform\n\n    ; main loop\n    ; - 2.40-2.50 cycles per limb in L1D$\n    ; - 2.6       cycles per limb in L2D$\n    ; - 2.80-3.30 cycles per limb in L3D$\n    align   16\n  .n_loop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n    prefetchnta [Op2+Offs]\n  %endif\n\n    mov     Limb1, [Op1]        ; prepare shifted oct-limb from Op1\n    mov     Limb2, [Op1+8]\n    mov     Limb3, [Op1+16]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n    shrd    Limb2, Limb3, 63\n    mov     Limb4, [Op1+24]\n    mov     Limb5, [Op1+32]\n    mov     Limb6, [Op1+40]\n    shrd    Limb3, Limb4, 63\n    shrd    Limb4, Limb5, 63\n    shrd    Limb5, Limb6, 63\n    mov     Limb7, [Op1+48]\n    mov     Limb8, [Op1+56]\n    shrd    Limb6, Limb7, 63\n    shrd    Limb7, Limb8, 63\n\n    sahf                        ; restore carry\n    ACCUMULATE 0                ; sub shifted Op1 from Op2 with result in Op3\n    ACCUMULATE 1\n    ACCUMULATE 2\n    ACCUMULATE 3\n    ACCUMULATE 4\n    ACCUMULATE 5\n    ACCUMULATE 6\n    ACCUMULATE 7\n    lahf                        ; remember carry for next round\n\n    add     Op1, 64\n    add     Op2, 64\n    add     Op3, 64\n    mov     Limb0, Limb8\n\n  .n_test:\n\n    sub     Size, 8\n    jnc     .n_loop\n\n    ; housekeeping - hand back total carry\n    shr     Limb0, 63\n    sahf\n    adc     Limb0, 0            ; Limb0=0/1/2 depending on final carry and shift\n    mov     RAX, Limb0\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/haswell/submul_1.asm",
    "content": ";  AMD64 mpn_submul_1 optimised for Intel Haswell.\n\n;  Contributed to the GNU project by Torbjörn Granlund.\n;  Converted to MPIR by Alexander Kruppa.\n\n;  Copyright 2013 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;  mp_limb_t mpn_submul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8        r9d\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%define reg_save_list rbx, rbp, rsi, rdi, r12, r13\n%define RP      rdi\n%define S1P     rsi\n%define Size    rbp\n%define Sizeb   bpl\n%define Limb    rcx\n\n%define Tmp0    r12\n%define Tmp1    r13\n%define Tmp2    rax\n%define Tmp3    rbx\n%define Tmp4    r8\n%define Tmp5    r9\n%define Tmp6    r10\n%define Tmp7    r11\n%define Tmp8    rcx\n\n%define ADDSUB sub\n%define ADCSBB sbb\n\nalign 16\n\nFRAME_PROC mpn_submul_1, 0, reg_save_list\n    mov     rdi, rcx \n    mov     rsi, rdx\n\tmov \trbp, r8 ; mulx requires one input in rdx\n\tmov \trdx, r9\n\n\ttest \tSizeb, 1\n\tjnz \t.Lbx1\n\n.Lbx0:  shr \tSize, 2\n\tjc \t.Lb10 ;ajs:notshortform\n\n.Lb00:\tmulx \tTmp0, Tmp1, [S1P]\n\tmulx \tTmp2, Tmp3, [S1P+8]\n\tadd \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp0, [RP]\n\tmov \tTmp8, [RP+8]\n\tmulx \tTmp4, Tmp5, [S1P+16]\n\tlea \tRP, [RP-16]\n\tlea \tS1P, [S1P+16]\n\tADDSUB \tTmp0, Tmp1\n\tjmp \t.Llo0 ;ajs:notshortform\n\n.Lbx1:\tshr \tSize, 2\n\tjc \t.Lb11\n\n.Lb01:\tmulx \tTmp6, Tmp7, [S1P]\n\tjnz \t.Lgt1\n.Ln1:\tADDSUB \t[RP], Tmp7\n\tmov \teax, 0\n\tadc \tTmp2, Tmp6\n\tjmp \t.Lret ;ajs:notshortform\n\n.Lgt1:\tmulx \tTmp0, Tmp1, [S1P+8]\n\tmulx \tTmp2, Tmp3, [S1P+16]\n\tlea \tS1P, [S1P+24]\n\tadd \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP]\n\tmov \tTmp0, [RP+8]\n\tmov \tTmp8, [RP+16]\n\tlea \tRP, [RP-8]\n\tADDSUB \tTmp6, Tmp7\n\tjmp \t.Llo1\n\n.Lb11:\tmulx \tTmp2, Tmp3, [S1P]\n\tmov \tTmp8, [RP]\n\tmulx \tTmp4, Tmp5, [S1P+8]\n\tlea \tS1P, [S1P+8]\n\tlea \tRP, [RP-24]\n\tinc \tSize\t\n\tADDSUB \tTmp8, Tmp3\n\tjmp \t.Llo3\n\n.Lb10:\tmulx \tTmp4, Tmp5, [S1P]\n\tmulx \tTmp6, Tmp7, [S1P+8]\n\tlea \tRP, [RP-32]\n\tmov \teax, 0\n\tclc\n\tjz \t.Lend ;ajs:notshortform\t\n\n\talign 16\n.Ltop:\tadc \tTmp5, Tmp2\n\tlea \tRP, [RP+32]\n\tadc \tTmp7, Tmp4\n\tmulx \tTmp0, Tmp1, [S1P+16]\n\tmov \tTmp4, [RP]\n\tmulx \tTmp2, Tmp3, [S1P+24]\n\tlea \tS1P, [S1P+32]\n\tadc \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+8]\n\tmov \tTmp0, [RP+16]\n\tADDSUB \tTmp4, Tmp5\n\tmov \tTmp8, [RP+24]\n\tmov \t[RP], Tmp4\n\tADCSBB \tTmp6, Tmp7\n.Llo1:\tmulx \tTmp4, Tmp5, [S1P]\n\tmov \t[RP+8], Tmp6\n\tADCSBB \tTmp0, Tmp1\n.Llo0:\tmov \t[RP+16], Tmp0\n\tADCSBB \tTmp8, Tmp3\n.Llo3:\tmulx \tTmp6, Tmp7, [S1P+8]\n\tmov \t[RP+24], Tmp8\n\tdec \tSize\n\tjnz \t.Ltop\n\n.Lend:\tadc \tTmp5, Tmp2\n\tadc \tTmp7, Tmp4\n\tmov \tTmp4, [RP+32]\n\tmov \tTmp2, Tmp6\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+40]\n\tADDSUB \tTmp4, Tmp5\n\tmov \t[RP+32], Tmp4\n\tADCSBB \tTmp6, Tmp7\n\tmov \t[RP+40], Tmp6\n\tadc \tTmp2, 0\n\n.Lret:\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/haswell/sumdiff_n.asm",
    "content": "; ============================================================================\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n; mp_limb_t mpn_sumdiff_n(mp_ptr Op3, mp_ptr Op4, mp_srcptr Op1, mp_srcptr Op2, mp_size_t Size)\n; Linux     RAX           RDI         RSI         RDX            RCX            R8\n; Win7      RAX           RCX         RDX         R8             R9             Stack\n;\n; Description:\n; The function adds Op2 to Op1 and stores the result in Op3 while at the same\n; time subtracting Op2 from Op1 with result in Op4. The final carries from\n; addition and subtraction are handed back as a combined mp_limb_t. There is a\n; gain in execution speed compared to separate addition and subtraction by\n; reducing memory access. The factor depends on the size of the operands (the\n; cache hierarchy in which the operands can be handled).\n;\n; Comments:\n; - asm version implemented, tested & benched on 10.06.2015 by jn\n; - On Nehalem per limb saving is 0.5 cycle in LD1$, LD2$ and LD3$\n; - includes prefetching\n; - includes XMM save & restore\n; ============================================================================\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n\n%define reg_save_list RBX, RBP, RSI, RDI, R10, R11, R12, R13, R14, R15\n\n    %define Op3     RCX\n    %define Op4     RDX\n    %define Op1     R8\n    %define Op2     R9\n    %define Size    RBX\n    %define Limb1   RSI\n    %define Limb2   RDI\n%else\n    %define Op3     RDI\n    %define Op4     RSI\n    %define Op1     RDX\n    %define Op2     RCX\n    %define Size    R8\n    %define Limb1   RBX\n    %define Limb2   R9\n%endif\n\n    %define Limb0   RBP\n    %define Limb3   R10\n    %define Limb4   R11\n    %define Limb5   R12\n    %define Limb6   R13\n    %define Limb7   R14\n    %define Limb8   R15\n\n%ifdef USE_PREFETCH\n%define Offs    PREFETCH_STRIDE\n%endif\n\n%define SaveAC  setc    AL\n%define LoadAC  shr     AL, 1\n\n%define SaveSC  sbb     AH, AH\n%define LoadSC  add     AH, AH\n\nBITS 64\n\nalign   32\n\n   FRAME_PROC mpn_sumdiff_n, 0, reg_save_list\n    mov     Size, [rsp+stack_use+40]\n\n    xor     EAX, EAX            ; clear add & sub carry\n\n    shr     Size, 1\n    jnc     .sumdiff_n_two\n\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    mov     Limb2, Limb1\n    add     Limb2, Limb5\n    mov     [Op3], Limb2\n\n    SaveAC\n\n    sub     Limb1, Limb5\n    mov     [Op4], Limb1\n\n    SaveSC\n\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    add     Op4, 8\n\n  .sumdiff_n_two:\n\n    shr     Size, 1\n    jnc     .sumdiff_n_four\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    mov     Limb5, [Op2]\n    mov     Limb6, [Op2+8]\n\n    LoadAC\n\n    mov     Limb3, Limb1\n    adc     Limb3, Limb5\n    mov     [Op3], Limb3\n    mov     Limb4, Limb2\n    adc     Limb4, Limb6\n    mov     [Op3+8], Limb4\n\n    SaveAC\n    LoadSC\n\n    sbb     Limb1, Limb5\n    mov     [Op4], Limb1\n    sbb     Limb2, Limb6\n    mov     [Op4+8], Limb2\n\n    SaveSC\n\n    add     Op1, 16\n    add     Op2, 16\n    add     Op3, 16\n    add     Op4, 16\n\n  .sumdiff_n_four:\n\n    shr     Size, 1\n    jnc     .sumdiff_n_loop_pre ;ajs:notshortform\n\n    LoadAC\n\n    ; slight change of scheme here - avoid too many\n    ; memory to reg or reg to memory moves in a row\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    mov     Limb0, Limb1\n    adc     Limb0, Limb5\n    mov     [Op3], Limb0\n    mov     Limb2, [Op1+8]\n    mov     Limb6, [Op2+8]\n    mov     Limb0, Limb2\n    adc     Limb0, Limb6\n    mov     [Op3+8], Limb0\n    mov     Limb3, [Op1+16]\n    mov     Limb7, [Op2+16]\n    mov     Limb0, Limb3\n    adc     Limb0, Limb7\n    mov     [Op3+16], Limb0\n    mov     Limb4, [Op1+24]\n    mov     Limb8, [Op2+24]\n    mov     Limb0, Limb4\n    adc     Limb0, Limb8\n    mov     [Op3+24], Limb0\n\n    SaveAC\n    LoadSC\n\n    sbb     Limb1, Limb5\n    mov     [Op4], Limb1\n    sbb     Limb2, Limb6\n    mov     [Op4+8], Limb2\n    sbb     Limb3, Limb7\n    mov     [Op4+16], Limb3\n    sbb     Limb4, Limb8\n    mov     [Op4+24], Limb4\n\n    SaveSC\n\n    add     Op1, 32\n    add     Op2, 32\n    add     Op3, 32\n    add     Op4, 32\n \n    test   Size, Size\n  .sumdiff_n_loop_pre:\t\t; If we jump here, ZF=1 iff Size=0\n    jz     .sumdiff_n_post      ;ajs:notshortform\n    LoadAC              ; set carry for addition\n\n    ; main loop - values below are best case - up to 50% fluctuation possible!\n    ; - 3.50      cycles per limb in LD1$\n    ; - 3.50      cycles per limb in LD2$\n    ; - 5.10-5.50 cycles per limb in LD3$\n    align   16\n  .sumdiff_n_loop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n    prefetchnta [Op2+Offs]\n  %endif\n\n    mov     Limb1, [Op1]        ; add the first quad-limb\n    mov     Limb5, [Op2]\n    mov     Limb0, Limb1\n    adc     Limb0, Limb5\n    mov     [Op3], Limb0\n    mov     Limb2, [Op1+8]\n    mov     Limb6, [Op2+8]\n    mov     Limb0, Limb2\n    adc     Limb0, Limb6\n    mov     [Op3+8], Limb0\n    mov     Limb3, [Op1+16]\n    mov     Limb7, [Op2+16]\n    mov     Limb0, Limb3\n    adc     Limb0, Limb7\n    mov     [Op3+16], Limb0\n    mov     Limb4, [Op1+24]\n    mov     Limb8, [Op2+24]\n    mov     Limb0, Limb4\n    adc     Limb0, Limb8\n    mov     [Op3+24], Limb0\n    lea     Op3, [Op3 + 64]\n\n    SaveAC              ; memorize add-carry\n    LoadSC              ; set carry for subtraction\n\n    sbb     Limb1, Limb5        ; now sub the first quad-limb\n    mov     [Op4], Limb1\n    sbb     Limb2, Limb6\n    mov     [Op4+8], Limb2\n    sbb     Limb3, Limb7\n    mov     [Op4+16], Limb3\n    sbb     Limb4, Limb8\n    mov     [Op4+24], Limb4\n\n    mov     Limb1, [Op1+32]     ; sub the second quad-limb\n    mov     Limb5, [Op2+32]\n    mov     Limb0, Limb1\n    sbb     Limb0, Limb5\n    mov     [Op4+32], Limb0\n    mov     Limb2, [Op1+40]\n    mov     Limb6, [Op2+40]\n    mov     Limb0, Limb2\n    sbb     Limb0, Limb6\n    mov     [Op4+40], Limb0\n    mov     Limb3, [Op1+48]\n    mov     Limb7, [Op2+48]\n    mov     Limb0, Limb3\n    sbb     Limb0, Limb7\n    mov     [Op4+48], Limb0\n    mov     Limb4, [Op1+56]\n    mov     Limb8, [Op2+56]\n    mov     Limb0, Limb4\n    sbb     Limb0, Limb8\n    mov     [Op4+56], Limb0\n    lea     Op4, [Op4 + 64]\n\n    SaveSC                      ; memorize sub-carry\n    LoadAC                      ; set carry for addition\n\n    adc     Limb1, Limb5        ; add the second quad-limb\n    mov     [Op3+32-64], Limb1\n    adc     Limb2, Limb6\n    mov     [Op3+40-64], Limb2\n    adc     Limb3, Limb7\n    mov     [Op3+48-64], Limb3\n    adc     Limb4, Limb8\n    mov     [Op3+56-64], Limb4\n\n    lea     Op1, [Op1 + 64]\n    lea     Op2, [Op2 + 64]\n\n    dec     Size\n    jnz     .sumdiff_n_loop     ;ajs:notshortform\n\n    SaveAC                      ; memorize add-carry\n    ; hand back carries\n  .sumdiff_n_post:\n\t\t\t\t; AL = add_carry, AH = -sub_carry\n    LoadSC\t\t\t; AL = add_carry, CY = sub_carry\n    adc     AL, AL\t\t; AL = 2*add_carry + sub_carry\n    movsx   RAX, AL\n\n   END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/haswell/xnor_n.asm",
    "content": "; PROLOGUE(mpn_xnor_n)\n\n;  mpn_xnor_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_xnor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_xnor_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [rdx+r10*8+16-32]\n\tpxor    xmm1, xmm3\n\tpxor    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\txor     rax, r9\n\tpxor    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\txor     rax, r9\n\tnot     rax\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/haswell/xor_n.asm",
    "content": "; PROLOGUE(mpn_xor_n)\n\n;  mpn_xor_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_xor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rcx        rdx         r8         r9\n;                    rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_xor_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpxor    xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\txor     rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\txor     rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/k8/add_err1_n.asm",
    "content": "; PROLOGUE(mpn_add_err1_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_add_err1(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_ptr, mp_size_t, mp_limb_t)\n;  rax                       rdi     rsi     rdx     rcx       r8         r9     8(rsp)\n;  rax                       rcx     rdx      r8      r9 [rsp+40]   [rsp+48]   [rsp+56]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    FRAME_PROC mpn_add_err1_n, 0, reg_save_list\n\tmov     [rsp+stack_use+32], r9\n    mov     r9, qword [rsp+stack_use+48]\n\tmov     r10, [rsp++stack_use+56]\n\tlea     rdi, [rcx+r9*8-24]\n\tlea     rsi, [rdx+r9*8-24]\n\tlea     rdx, [r8+r9*8-24]\n\tmov     r8, [rsp+stack_use+40]\n\t\n\tmov     r11, 3\n\tshl     r10, 63\n\tlea     r8, [r8+r9*8]\n\tsub     r11, r9\n\tmov     r9, 0\n\tmov     rax, 0\n\tmov     rbx, 0\n\tjnc     .2\n\n\txalign  16\n.1:\tmov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tmov     r14, [rsi+r11*8+16]\n\tmov     r15, [rsi+r11*8+24]\n\tmov     rbp, 0\n\tshl     r10, 1\n\tadc     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tadc     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\tmov     rcx, 0\n\tadc     r14, [rdx+r11*8+16]\n\tcmovc   rcx, [r8-24]\n\tadc     r15, [rdx+r11*8+24]\n\tcmovc   rbp, [r8-32]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tadd     r9, rcx\n\tmov     rax, 0\n\tadc     r10, 0\n\tlea     r8, [r8-32]\n\tadd     r9, rbp\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tmov     [rdi+r11*8+16], r14\n\tmov     [rdi+r11*8+24], r15\n\tmov     rbx, 0\n\tadd     r11, 4\n\tjnc     .1\n.2: cmp     r11, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3: mov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tmov     r14, [rsi+r11*8+16]\n\tshl     r10, 1\n\tadc     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tadc     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\tmov     rcx, 0\n\tadc     r14, [rdx+r11*8+16]\n\tcmovc   rcx, [r8-24]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tadd     r9, rcx\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tmov     [rdi+r11*8+16], r14\n    jmp     .6\n    \n\txalign  16\n.4: mov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tshl     r10, 1\n\tadc     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tadc     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tjmp     .6\n\n\txalign  16\n.5: mov     r12, [rsi+r11*8]\n\tshl     r10, 1\n\tadc     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n.6:\tmov     rcx, [rsp+stack_use+32]\n\tmov     [rcx], r9\n\tbtr     r10, 63\n\tmov     [rcx+8], r10\n    mov     rax, 0\n\tadc     rax, 0\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/add_n.asm",
    "content": "; PROLOGUE(mpn_add_n)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  Calculate src1[size] plus(minus) src2[size] and store the result in\n;  dst[size].  The return value is the carry bit from the top of the result\n;  (1 or 0).  The _nc version accepts 1 or 0 for an initial carry into the\n;  low limb of the calculation.  Note values other than 1 or 0 here will\n;  lead to garbage results.\n;\n;  mp_limb_t  mpn_add_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_add_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    xalign  8\n    LEAF_PROC mpn_add_nc\n    mov     r10,[rsp+40]\n    jmp     entry\n\n    xalign  8\n    LEAF_PROC mpn_add_n\n    xor     r10, r10\nentry:\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    lea     r9,[r10+r9*2]\n\tsar     r9, 1\n    jnz     .2\n\n    mov     r10, [rdx]\n    adc     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+8]\n    adc     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+16]\n    adc     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.1: adc     rax, rax\n    ret\n\n    xalign  8\n.2: mov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    adc     r10, [r8]\n    adc     r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     r10, [rdx-16]\n    mov     r11, [rdx-8]\n    adc     r10, [r8-16]\n    adc     r11, [r8-8]\n    mov     [rcx-16], r10\n    dec     r9\n    mov     [rcx-8], r11\n    jnz     .2\n\n    inc     rax\n    dec     rax\n    jz      .3\n    mov     r10, [rdx]\n    adc     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+8]\n    adc     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+16]\n    adc     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.3: adc     rax, rax\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/addadd_n.asm",
    "content": "; PROLOGUE(mpn_addadd_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_addadd_n, 0, reg_save_list\n    mov     rbx, qword [rsp+stack_use+40]\n\tlea     rdi, [rcx+rbx*8-56]\n\tlea     rsi, [rdx+rbx*8-56]\n\tlea     rdx, [r8+rbx*8-56]\n\tlea     rcx, [r9+rbx*8-56]\n\tmov     r9, 3\n\txor     rax, rax\n\txor     r10, r10\n\tsub     r9, rbx\n\tjge     .3\n\tadd     r9, 4\n\tmov     rbp, [rdx+r9*8+16]\n\tmov     r11, [rdx+r9*8+24]\n\tmov     r8, [rdx+r9*8]\n\tmov     rbx, [rdx+r9*8+8]\n\tjc      .2\n\t\n\txalign  16\n.1:\tadd     rax, 1\n\tadc     r8, [rcx+r9*8]\n\tadc     rbx, [rcx+r9*8+8]\n\tadc     rbp, [rcx+r9*8+16]\n\tadc     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8]\n\tadc     rbx, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r8\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], rbx\n\tmov     [rdi+r9*8+16], rbp\n\tmov     rbp, [rdx+r9*8+48]\n\tmov     r11, [rdx+r9*8+56]\n\tadd     r9, 4\n\tmov     r8, [rdx+r9*8]\n\tmov     rbx, [rdx+r9*8+8]\n\tjnc     .1\n.2:\tadd     rax, 1\n\tadc     r8, [rcx+r9*8]\n\tadc     rbx, [rcx+r9*8+8]\n\tadc     rbp, [rcx+r9*8+16]\n\tadc     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8]\n\tadc     rbx, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r8\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], rbx\n\tmov     [rdi+r9*8+16], rbp\n.3:\tcmp     r9, 2\n\tja      .7\n\tjz      .6\n\tjp      .5\n.4:\tmov     rbp, [rdx+r9*8+48]\n\tmov     r8, [rdx+r9*8+32]\n\tmov     rbx, [rdx+r9*8+40]\n\tadd     rax, 1\n\tadc     r8, [rcx+r9*8+32]\n\tadc     rbx, [rcx+r9*8+40]\n\tadc     rbp, [rcx+r9*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8+32]\n\tadc     rbx, [rsi+r9*8+40]\n\tadc     rbp, [rsi+r9*8+48]\n\tmov     [rdi+r9*8+32], r8\n\tmov     [rdi+r9*8+40], rbx\n\tmov     [rdi+r9*8+48], rbp\n\tsbb     rax, 0\n\tneg     rax\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     r8, [rdx+r9*8+32]\n\tmov     rbx, [rdx+r9*8+40]\n\tadd     rax, 1\n\tadc     r8, [rcx+r9*8+32]\n\tadc     rbx, [rcx+r9*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8+32]\n\tadc     rbx, [rsi+r9*8+40]\n\tmov     [rdi+r9*8+32], r8\n\tmov     [rdi+r9*8+40], rbx\n\tsbb     rax, 0\n\tneg     rax\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tmov     r8, [rdx+r9*8+32]\n\tadd     rax, 1\n\tadc     r8, [rcx+r9*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8+32]\n\tmov     [rdi+r9*8+32], r8\n\tsbb     rax, 0\n\tneg     rax\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.7:\tadd     rax, r10\n.8:\tneg     rax\n    END_PROC reg_save_list\n    \n\tend\n"
  },
  {
    "path": "mpn/x86_64w/k8/addlsh1_n.asm",
    "content": "; PROLOGUE(mpn_addlsh1_n)\n\n;  Copyright 2008 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addlsh1_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\t\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_addlsh1_n, 0, reg_save_list\n    mov     rax, r9\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n\tlea     rdx, [r8+rax*8]\n    mov     rcx, rax\n\tneg     rcx\n\txor     r9, r9\n\txor     rax, rax\n\ttest    rcx, 3\n\tjz      .2\n.1:\tmov     r10, [rdx+rcx*8]\n\tadd     r9, 1\n\tadc     r10, r10\n\tsbb     r9, r9\n\tadd     rax, 1\n\tadc     r10, [rsi+rcx*8]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tinc     rcx\n\ttest    rcx, 3\n\tjnz     .1\n.2:\tcmp     rcx, 0\n\tjz      .4\n\t\n\txalign  16\n.3:\tmov     r10, [rdx+rcx*8]\n\tmov     rbx, [rdx+rcx*8+8]\n\tmov     r11, [rdx+rcx*8+16]\n\tmov     r8, [rdx+rcx*8+24]\n\tadd     r9, 1\n\tadc     r10, r10\n\tadc     rbx, rbx\n\tadc     r11, r11\n\tadc     r8, r8\n\tsbb     r9, r9\n\tadd     rax, 1\n\tadc     r10, [rsi+rcx*8]\n\tadc     rbx, [rsi+rcx*8+8]\n\tadc     r11, [rsi+rcx*8+16]\n\tadc     r8, [rsi+rcx*8+24]\n\tsbb     rax, rax\n\tmov     [rdi+rcx*8], r10\n\tmov     [rdi+rcx*8+8], rbx\n\tmov     [rdi+rcx*8+16], r11\n\tmov     [rdi+rcx*8+24], r8\n\tadd     rcx, 4\n\tjnz     .3\n.4:\tadd     rax, r9\n\tneg     rax\n    END_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/addlsh_n.asm",
    "content": "; PROLOGUE(mpn_addlsh_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_addlsh_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint, mp_limb_t)\n;  mp_limb_t mpn_addlsh_nc(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                        rdi     rsi     rdx        rcx       r8         r9\n;  rax                        rcx     rdx      r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n\tLEAF_PROC mpn_addlsh_n\n\tmov     r10, r9\n\txor     r9, r9\n    jmp     entry\n    \n\tLEAF_PROC mpn_addlsh_nc\n\tmov     r10, r9\n\tmov     r9, [rsp+48]\n    jmp     entry\n    \n    xalign 16\nentry:\n\tFRAME_PROC ?mpn_addlsh, 0, reg_save_list\n\tlea     rdi, [rcx+r10*8]\n\tlea     rsi, [rdx+r10*8]\n\tlea     rdx, [r8+r10*8]\n\tmov     ecx, dword [rsp+stack_use+40]\n\n\tneg     rcx\n\tshr     r9, cl\n\tneg     r10\n\txor     rax, rax\n\ttest    r10, 3\n\tjz      .2\n.1:\tmov     r8, [rdx+r10*8]\n\tmov     r11, r8\n\tneg     rcx\n\tshl     r8, cl\n\tneg     rcx\n\tshr     r11, cl\n\tor      r8, r9\n\tmov     r9, r11\n\tadd     rax, 1\n\tadc     r8, [rsi+r10*8]\n\tsbb     rax, rax\n\tmov     [rdi+r10*8], r8\n\tinc     r10\n\ttest    r10, 3\n\tjnz     .1\n.2:\tcmp     r10, 0\n\tjz      .4\n\t\n\txalign  16\n.3:\tmov     r8, [rdx+r10*8]\n\tmov     rbp, [rdx+r10*8+8]\n\tmov     rbx, [rdx+r10*8+16]\n\tmov     r12, [rdx+r10*8+24]\n\tmov     r11, r8\n\tmov     r13, rbp\n\tmov     r14, rbx\n\tmov     r15, r12\n\tneg     rcx\n\tshl     r8, cl\n\tshl     rbp, cl\n\tshl     rbx, cl\n\tshl     r12, cl\n\tneg     rcx\n\tshr     r11, cl\n\tshr     r13, cl\n\tshr     r14, cl\n\tshr     r15, cl\n\tor      r8, r9\n\tor      rbp, r11\n\tor      rbx, r13\n\tor      r12, r14\n\tmov     r9, r15\n\tadd     rax, 1\n\tadc     r8, [rsi+r10*8]\n\tadc     rbp, [rsi+r10*8+8]\n\tadc     rbx, [rsi+r10*8+16]\n\tadc     r12, [rsi+r10*8+24]\n\tsbb     rax, rax\n\tmov     [rdi+r10*8], r8\n\tmov     [rdi+r10*8+8], rbp\n\tmov     [rdi+r10*8+16], rbx\n\tmov     [rdi+r10*8+24], r12\n\tadd     r10, 4\n\tjnz     .3\n.4:\tneg     rax\n\tadd     rax, r9\n    END_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/addmul_1.asm",
    "content": "; PROLOGUE(mpn_addmul_1)\n        \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addmul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_inclsh_n(mp_ptr, mp_ptr, mp_size_t,   mp_uint)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8        r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12\n\n        xalign 16\n\t    LEAF_PROC mpn_inclsh_n\n\t    mov     r10, rcx\n\t    mov     ecx, r9d\n\t    mov     r9, 1\n\t    shl     r9, cl\n        mov     rcx, r10\n\n        xalign 16\n        LEAF_PROC mpn_addmul_1\n        mov     rax, [rdx]\n        cmp     r8, 1\n        jnz     .1\n        mul     r9\n        add     [rcx], rax\n        adc     rdx, 0\n        mov     rax, rdx\n        ret\n\n        xalign   16\n.1:\t    FRAME_PROC ?mpn_k8_addmul, 0, reg_save_list\n        mov     r11, 5\n        lea     rsi, [rdx+r8*8-40]\n        lea     rdi, [rcx+r8*8-40]\n        mov     rcx, r9\n        sub     r11, r8\n        mul     rcx\n        db      0x26\n        mov     r8, rax\n        db      0x26\n        mov     rax, [rsi+r11*8+8]\n        db      0x26\n        mov     r9, rdx\n        db      0x26\n        cmp     r11, 0\n        db      0x26\n        mov     [rsp-8], r12\n        db      0x26\n        jge     .3\n.2:     xor     r10, r10\n        mul     rcx\n        add     [rdi+r11*8], r8\n        adc     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+r11*8+16]\n        mul     rcx\n        add     [rdi+r11*8+8], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+r11*8+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        add     [rdi+r11*8+16], r10\n        adc     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+r11*8+32]\n        mul     rcx\n        add     [rdi+r11*8+24], r12\n        adc     r8, rax\n        adc     r9, rdx\n        add     r11, 4\n        mov     rax, [rsi+r11*8+8]\n        jnc     .2\n.3      xor     r10, r10\n        mul     rcx\n        add     [rdi+r11*8], r8\n        adc     r9, rax\n        adc     r10, rdx\n        cmp     r11, 2\n        ja      .7\n        jz      .6\n        jp      .5\n.4:     mov     rax, [rsi+16]\n        mul     rcx\n        add     [rdi+8], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        add     [rdi+16], r10\n        adc     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+32]\n        mul     rcx\n        add     [rdi+24], r12\n        adc     r8, rax\n        adc     r9, rdx\n        add     [rdi+32], r8\n        adc     r9, 0\n        mov     rax, r9\n        EXIT_PROC reg_save_list\n.5:     mov     rax, [rsi+24]\n        mul     rcx\n        add     [rdi+16], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+32]\n        xor     r8, r8\n        mul     rcx\n        add     [rdi+24], r10\n        adc     r12, rax\n        adc     r8, rdx\n        add     [rdi+32], r12\n        adc     r8, 0\n        mov     rax, r8\n        EXIT_PROC reg_save_list\n\n        align   16\n.6:     mov     rax, [rsi+32]\n        mul     rcx\n        add     [rdi+24], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        add     [rdi+32], r10\n        adc     r12, 0\n        mov     rax, r12\n        EXIT_PROC reg_save_list\n\n.7:     add     [rdi+32], r9\n        adc     r10, 0\n        mov     rax, r10\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/k8/addmul_2.asm",
    "content": "; PROLOGUE(mpn_addmul_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.2 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addmul_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n\tFRAME_PROC mpn_addmul_2, 0, reg_save_list\n\tmov     rdi, rcx\n\tmov     rsi, rdx\n\tmov     rax, r8\n\t\n\tmov     rcx, [r9]\n\tmov     r8, [r9+8]\n\tmov     rbx, 4\n\tsub     rbx, rax\n\tlea     rsi, [rsi+rax*8-32]\n\tlea     rdi, [rdi+rax*8-32]\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1:\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tcmp     rbx, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6: add     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/addsub_n.asm",
    "content": "; PROLOGUE(mpn_addsub_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addsub_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_addsub_n, 0, reg_save_list\n    mov     rbx, qword [rsp+stack_use+40]\n\tlea     rdi, [rcx+rbx*8-56]\n\tlea     rsi, [rdx+rbx*8-56]\n\tlea     rdx, [r8+rbx*8-56]\n\tlea     rcx, [r9+rbx*8-56]\n\tmov     r9, 3\n\txor     rax, rax\n\txor     r10, r10\n\tsub     r9, rbx\n\tjge     .3\n\tadd     r9, 4\n\tmov     rbp, [rdx+r9*8+16]\n\tmov     r11, [rdx+r9*8+24]\n\tmov     r8, [rdx+r9*8]\n\tmov     rbx, [rdx+r9*8+8]\n\tjc      .2\n\t\n\txalign  16\n.1:\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8]\n\tsbb     rbx, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8]\n\tadc     rbx, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r8\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], rbx\n\tmov     [rdi+r9*8+16], rbp\n\tmov     rbp, [rdx+r9*8+48]\n\tmov     r11, [rdx+r9*8+56]\n\tadd     r9, 4\n\tmov     r8, [rdx+r9*8]\n\tmov     rbx, [rdx+r9*8+8]\n\tjnc     .1\n.2:\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8]\n\tsbb     rbx, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8]\n\tadc     rbx, [rsi+r9*8+8]\n\tadc     rbp, [rsi+r9*8+16]\n\tadc     r11, [rsi+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r8\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], rbx\n\tmov     [rdi+r9*8+16], rbp\n.3:\tcmp     r9, 2\n\tja      .7\n\tjz      .6\n\tjp      .5\n.4:\tmov     rbp, [rdx+r9*8+48]\n\tmov     r8, [rdx+r9*8+32]\n\tmov     rbx, [rdx+r9*8+40]\n\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8+32]\n\tsbb     rbx, [rcx+r9*8+40]\n\tsbb     rbp, [rcx+r9*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8+32]\n\tadc     rbx, [rsi+r9*8+40]\n\tadc     rbp, [rsi+r9*8+48]\n\tmov     [rdi+r9*8+32], r8\n\tmov     [rdi+r9*8+40], rbx\n\tmov     [rdi+r9*8+48], rbp\n\tadc     rax, 0\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     r8, [rdx+r9*8+32]\n\tmov     rbx, [rdx+r9*8+40]\n\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8+32]\n\tsbb     rbx, [rcx+r9*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8+32]\n\tadc     rbx, [rsi+r9*8+40]\n\tmov     [rdi+r9*8+32], r8\n\tmov     [rdi+r9*8+40], rbx\n\tadc     rax, 0\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tmov     r8, [rdx+r9*8+32]\n\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tadc     r8, [rsi+r9*8+32]\n\tmov     [rdi+r9*8+32], r8\n\tadc     rax, 0\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.7:\tsub     rax, r10\n.8:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/and_n.asm",
    "content": "; PROLOGUE(mpn_and_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_and_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rdi        rsi        rdx        rcx\n;                    rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_and_n\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    jz      .2\n\n    xalign  8\n.1: mov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    and     r10, [r8]\n    and     r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     T3, [rdx-16]\n    mov     T4, [rdx-8]\n    and     T3, [r8-16]\n    and     T4, [r8-8]\n    mov     [rcx-16], T3\n    dec     r9\n    mov     [rcx-8], T4\n    jnz     .1\n.2: inc     rax\n    dec     rax\n    jz      .3\n    mov     r10, [rdx]\n    and     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+8]\n    and     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+16]\n    and     r10, [r8+16]\n    mov     [rcx+16], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/andn_n.asm",
    "content": "; PROLOGUE(mpn_andn_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_andn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_andn_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1:\tmov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    and     r10, [rdx+r9*8+24]\n    and     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    and     T3, [rdx+r9*8+8]\n    and     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/com_n.asm",
    "content": "; PROLOGUE(mpn_com_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_com_n(mp_ptr, mp_ptr, mp_size_t)\n;                     rdi     rsi       rdx\n;                     rcx     rdx        r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n    LEAF_PROC mpn_com_n\n    mov     rax, r8\n    sub     rax, 4\n    jc      .2\n\n    xalign  8\n.1: mov     r8, [rdx+rax*8+24]\n    mov     r9, [rdx+rax*8+16]\n    not     r8\n    not     r9\n    mov     [rcx+rax*8+24], r8\n    mov     [rcx+rax*8+16], r9\n    mov     r8, [rdx+rax*8+8]\n    mov     r9, [rdx+rax*8]\n    not     r8\n    not     r9\n    mov     [rcx+rax*8+8], r8\n    mov     [rcx+rax*8], r9\n    sub     rax, 4\n    jae     .1\n.2: add     rax, 4\n    jz      .3\n\n; Could still have potential cache-bank conflicts in this tail part\n\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n    dec     rax\n    jz      .3\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n    dec     rax\n    jz      .3\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/copyd.asm",
    "content": "; PROLOGUE(mpn_copyd)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_copyd(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi     rsi        rdx\n;                    rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n;\tmpn_copyd(mp_ptr rcx ,mp_ptr rdx ,mp_size_t r8)\n;\t(rcx,r8)=(rdx,r8)\n\n\tLEAF_PROC mpn_copyd\n\tlea     rdx, [rdx+r8*8-8]\n\tlea     rcx, [rcx+r8*8-8]\n\tsub     r8, 4\n\tjl      .2\n\t\n\txalign  16\n.1:\tmov     rax, [rdx]\n\tmov     r9, [rdx-8]\n\tmov     r10, [rdx-16]\n\tlea     rcx, [rcx-32]\n\tmov     r11, [rdx-24]\n\tmov     [rcx+32], rax\n\tsub     r8, 4\n\tmov     [rcx+24], r9\n\tmov     [rcx+16], r10\n\tlea     rdx, [rdx-32]\n\tmov     [rcx+8], r11\n\tjns     .1\n.2:\tadd     r8, 2\n\tjz      .5\n\tjns     .6\n\tjp      .4\n.3:\tret\n\t\n\txalign  16\n.4:\tmov     rax, [rdx]\n\tmov     [rcx], rax\n\tret\n\t\n\txalign  16\n.5:\tmov     rax, [rdx]\n\tmov     r9, [rdx-8]\n\tmov     [rcx], rax\n\tmov     [rcx-8], r9\n\tret\n\t\n\txalign  16\n.6:\tmov     rax, [rdx]\n\tmov     r9, [rdx-8]\n\tmov     r10, [rdx-16]\n\tmov     [rcx], rax\n\tmov     [rcx-8], r9\n\tmov     [rcx-16], r10\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/k8/copyi.asm",
    "content": "; PROLOGUE(mpn_copyi)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_copyi(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi     rsi        rdx\n;                    rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n;\tmpn_copyi(mp_ptr rcx ,mp_ptr rdx ,mp_size_t r8)\n;\t(rcx,r8)=(rcx,r8)\n\n\tLEAF_PROC mpn_copyi\n\tsub     r8, 4\n\tjl      .2\n\t\n\txalign  16\n.1:\tmov     rax, [rdx]\n\tmov     r9, [rdx+8]\n\tsub     r8, 4\n\tmov     r10, [rdx+16]\n\tmov     r11, [rdx+24]\n\tlea     rcx, [rcx+32]\n\tmov     [rcx-32], rax\n\tmov     [rcx-24], r9\n\tmov     [rcx-16], r10\n\tlea     rdx, [rdx+32]\n\tmov     [rcx-8], r11\n\tjns     .1\n.2:\tadd     r8, 2\n\tjz      .5\n\tjns     .6\n\tjp      .4\n.3:\tret\n\t\n\txalign  16\n.4:\tmov     rax, [rdx]\n\tmov     [rcx], rax\n\tret\n\t\n\txalign  16\n.5:\tmov     rax, [rdx]\n\tmov     r9, [rdx+8]\n\tmov     [rcx], rax\n\tmov     [rcx+8], r9\n\tret\n\t\n\txalign  16\n.6:\tmov     rax, [rdx]\n\tmov     r9, [rdx+8]\n\tmov     r10, [rdx+16]\n\tmov     [rcx], rax\n\tmov     [rcx+8], r9\n\tmov     [rcx+16], r10\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/k8/divexact_byff.asm",
    "content": "; PROLOGUE(mpn_divexact_byff)\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divexact_byff(mp_ptr, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi         rdx\n;  rax                           rcx     rdx          r8 \n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_divexact_byff\n\tmov     rax, 3\n\tand     rax, r8\n\tmov     [rsp+24], rax\n\txor     eax, eax\n\tshr     r8, 2\n\tcmp     r8, 0\n\tje      .2\n; want carry clear here\n\txalign  16\n.1:\tsbb     rax, [rdx]\n\tlea     rcx, [rcx+32]\n\tmov     r9, rax\n\tsbb     rax, [rdx+8]\n\tmov     r10, rax\n\tsbb     rax, [rdx+16]\n\tmov     r11, rax\n\tsbb     rax, [rdx+24]\n\tdec     r8\n\tmov     [rcx-32], r9\n\tmov     [rcx-24], r10\n\tmov     [rcx-16], r11\n\tmov     [rcx-8], rax\n\tlea     rdx, [rdx+32]\n\tjnz     .1\n.2:\tmov     r8, [rsp+24]\n; dont want to change the carry\n\tinc     r8\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx]\n\tmov     [rcx], rax\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx+8]\n\tmov     [rcx+8], rax\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx+16]\n\tmov     [rcx+16], rax\n.3:\tsbb     rax, 0\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/k8/divrem_hensel_qr_1_1.asm",
    "content": "; PROLOGUE(mpn_divrem_hensel_qr_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_divrem_hensel_qr_1_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                                    rdi     rsi        rdx        rcx\n;  rax                                    rcx     rdx         r8         r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi\n\n    FRAME_PROC mpn_divrem_hensel_qr_1_1, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n\tmov     rcx, r9\n\tmov     r9, 0\n\tsub     r9, rax\n\t\n\tmov     rdx, rcx\n\t\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\txor     rdx, rdx\n\n\txalign  16\n.1:\tmov     rax, [rsi+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmov     [rdi+r9*8], rax\n\tmul     rcx\n\tadd     r8, 1\n\tinc     r9\n\tjnz     .1\n\tmov     rax, 0\n\tadc     rax, rdx\n\tEND_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/k8/divrem_hensel_qr_1_2.asm",
    "content": "; PROLOGUE(mpn_divrem_hensel_qr_1_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divrem_hensel_qr_1_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                                   rdi     rsi        rdx        rcx\n;  rax                                   rcx     rdx         r8         r9\n\t\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_divrem_hensel_qr_1_2, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8-8]\n\tlea     rsi, [rdx+rax*8-8]\n    mov     rcx, r9\n\tmov     rdx, r9\n\tmov     r9, 1\n\tsub     r9, rax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, r11\n\tmov     r12, r11\n\tmul     rcx\n\tneg     rdx\n\timul    r12, rdx\n\n\tmov     r13, r11\n\tmov     r14, r12\n\n\tmov     r11, [rsi+r9*8]\n\tmov     r12, [rsi+r9*8+8]\n\tmov     r10, 0\n\tadd     r9, 2\n\tjc      .2\n\t\n\txalign  16\n.1:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\tmov     [rdi+r9*8-16], rax\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tmov     r11, [rsi+r9*8]\n\tmov     r12, [rsi+r9*8+8]\n\tmov     [rdi+r9*8-8], rdx\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r12, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, 0\n\tadd     r9, 2\n\tjnc     .1\n.2:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\tmov     [rdi+r9*8-16], rax\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tcmp     r9, 0\n\tjne     .4\n.3:\tmov     r11, [rsi+r9*8]\n\tmov     [rdi+r9*8-8], rdx\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r10, 0\n\tmov     rax, r11\n\timul    rax, r13\n\tmov     [rdi+r9*8], rax\n\tmul     rcx\n\tadd     r10, r10\n\tmov     rax, 0\n\tadc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.4:\tmov     [rdi+r9*8-8], rdx\n\tmov     rax, rcx\n\tmul     rdx\n\tcmp     r8, rax\n\tmov     rax, 0\n\tadc     rax, rdx\n\tsub     rax, r10\n.5:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/divrem_hensel_r_1.asm",
    "content": "; PROLOGUE(mpn_divrem_hensel_r_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_divrem_hensel_r_1(mp_ptr, mp_size_t, mp_limb_t)\n;  rax                                 rdi        rsi        rdx\n;  rax                                 rcx        rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n\tLEAF_PROC mpn_divrem_hensel_r_1\n    movsxd  rax, edx\n\tmov     rdx, r8\n\tlea     r10, [rcx+rax*8]\n\tmov     r9, 0\n\tsub     r9, rax\n\n    mov     rcx, rdx    \n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\txor     rdx, rdx\n\n\txalign  16\n.1:\tmov     rax, [r10+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmul     rcx\n\tadd     r8, 1\n\tinc     r9\n\tjnz     .1\n\tmov     rax, 0\n\tadc     rax, rdx\n\tret\n\t\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/double.asm",
    "content": "; PROLOGUE(mpn_double)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_double(mp_ptr, mp_size_t)\n;                          rdi        rsi\n;                          rcx        rdx\n\n%include \"yasm_mac.inc\"\n\n        CPU  Athlon64\n        BITS 64\n\n    \tLEAF_PROC mpn_double\n\n        mov     rax, rdx\n        shr     rdx, 2\n        and     eax, 3\n        jz      .1\n        shl     qword [rcx], 1\n        lea     rcx, [rcx+8]\n        dec     rax\n        jz      .1\n        rcl     qword [rcx], 1\n        lea     rcx, [rcx+8]\n        dec     rax\n        jz      .1\n        rcl     qword [rcx], 1\n        lea     rcx, [rcx+8]\n        dec     rax\n.1:     sbb     r8, r8\n        cmp     rdx, 0\n        jz      .3\n        add     r8, r8\n        \n        xalign   16\n.2:     rcl     qword [rcx], 1\n        nop     \n        rcl     qword [rcx+8], 1\n        rcl     qword [rcx+16], 1\n        rcl     qword [rcx+24], 1\n        nop     \n        dec     rdx\n        lea     rcx, [rcx+32]\n        jnz     .2\n        sbb     r8, r8\n.3:     \n        sub     rax, r8\n        ret     \n\n        end\n\n"
  },
  {
    "path": "mpn/x86_64w/k8/half.asm",
    "content": "; PROLOGUE(mpn_half)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_half(mp_ptr, mp_size_t)\n;                        rdi        rsi\n;                        rcx        rdx\n\n%include \"yasm_mac.inc\"\n\n        CPU  Athlon64\n        BITS 64\n\n    \tLEAF_PROC mpn_half\n\n        mov     rax, rdx\n        lea     rcx, [rcx+rdx*8-8]\n        shr     rdx, 2\n        and     eax, 3\n        jz      .1\n        shr     qword [rcx], 1\n        lea     rcx, [rcx-8]\n        dec     rax\n        jz      .1\n        rcr     qword [rcx], 1\n        lea     rcx, [rcx-8]\n        dec     rax\n        jz      .1\n        rcr     qword [rcx], 1\n        lea     rcx, [rcx-8]\n        dec     rax\n.1:     sbb     r8, r8\n        cmp     rdx, 0\n        jz      .3\n        add     r8, r8\n        \n        xalign   16\n.2:     rcr     qword [rcx], 1\n        nop     \n        rcr     qword [rcx-8], 1\n        rcr     qword [rcx-16], 1\n        rcr     qword [rcx-24], 1\n        nop     \n        dec     rdx\n        lea     rcx, [rcx-32]\n        jnz     .2\n        sbb     r8, r8\n.3:     sub     rax, r8\n        shl     rax, 63\n        ret     \n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/k8/hamdist.asm",
    "content": "; PROLOGUE(mpn_hamdist)\n\n;  mpn_hamdist\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmp_limb_t mpn_hamdist(mp_ptr, mp_ptr, mp_size_t)\n;\trax                      rdi,    rsi,       rdx\n;\trax                      rcx,    rdx,        r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list   rsi, rdi, rbp, r12, r14\n\n\tFRAME_PROC mpn_hamdist, 0, reg_save_list\n\tmov\trdi, rcx\n\tmov\trsi, rdx\n\tmov   \trdx, r8\n\n\tmov     r8, 0x5555555555555555\n\tmov     r9, 0x3333333333333333\n\tmov     r10, 0x0f0f0f0f0f0f0f0f\n\tmov     r11, 0x0101010101010101\n\txor     eax, eax\n\tsub     rdx, 3\n\tjc      .2\n\tmov     rcx, [rdi+rdx*8+16]\n\txor     rcx, [rsi+rdx*8+16]\n\tmov     r12, [rdi+rdx*8+8]\n\txor     r12, [rsi+rdx*8+8]\n\tmov     r14, [rdi+rdx*8]\n\txor     r14, [rsi+rdx*8]\n\tsub     rdx, 3\n\tjc      .1\n\txalign  16\n.0:\tmov     rbp, rcx\n\tshr     rcx, 1\n\tand     rcx, r8\n\tsub     rbp, rcx\n\tmov     rcx, rbp\n\tshr     rbp, 2\n\tand     rcx, r9\n\tand     rbp, r9\n\tadd     rcx, rbp\n\tmov     rbp, r12\n\tshr     r12, 1\n\tand     r12, r8\n\tsub     rbp, r12\n\tmov     r12, rbp\n\tshr     rbp, 2\n\tand     r12, r9\n\tand     rbp, r9\n\tadd     rbp, r12\n\tmov     r12, r14\n\tshr     r14, 1\n\tand     r14, r8\n\tsub     r12, r14\n\tmov     r14, r12\n\tshr     r12, 2\n\tand     r14, r9\n\tand     r12, r9\n\tadd     r12, r14\n\tadd     rbp, rcx\n\tadd     rbp, r12\n\tmov     rcx, [rdi+rdx*8+16]\n\tmov     r14, rbp\n\tshr     rbp, 4\n\tand     r14, r10\n\txor     rcx, [rsi+rdx*8+16]\n\tmov     r12, [rdi+rdx*8+8]\n\txor     r12, [rsi+rdx*8+8]\n\tand     rbp, r10\n\tadd     r14, rbp\n\timul    r14, r11\n\tshr     r14, 56\n\tadd     rax, r14\n\tmov     r14, [rdi+rdx*8]\n\txor     r14, [rsi+rdx*8]\n\tsub     rdx, 3\n\tjnc     .0\n.1:\n\tmov     rbp, rcx\n\tshr     rcx, 1\n\tand     rcx, r8\n\tsub     rbp, rcx\n\tmov     rcx, rbp\n\tshr     rbp, 2\n\tand     rcx, r9\n\tand     rbp, r9\n\tadd     rcx, rbp\n\tmov     rbp, r12\n\tshr     r12, 1\n\tand     r12, r8\n\tsub     rbp, r12\n\tmov     r12, rbp\n\tshr     rbp, 2\n\tand     r12, r9\n\tand     rbp, r9\n\tadd     rbp, r12\n\tmov     r12, r14\n\tshr     r14, 1\n\tand     r14, r8\n\tsub     r12, r14\n\tmov     r14, r12\n\tshr     r12, 2\n\tand     r14, r9\n\tand     r12, r9\n\tadd     r12, r14\n\tadd     rbp, rcx\n\tadd     rbp, r12\n\tmov     r14, rbp\n\tshr     rbp, 4\n\tand     r14, r10\n\tand     rbp, r10\n\tadd     r14, rbp\n\timul    r14, r11\n\tshr     r14, 56\n\tadd     rax, r14\n.2:\n\tcmp     rdx, -2\n\tjl      .5\n\tjz      .4\n.3:\n\tmov     rcx, [rdi+rdx*8+16]\n\txor     rcx, [rsi+rdx*8+16]\n\tmov     rbp, rcx\n\tshr     rcx, 1\n\tand     rcx, r8\n\tsub     rbp, rcx\n\tmov     rcx, rbp\n\tshr     rbp, 2\n\tand     rcx, r9\n\tand     rbp, r9\n\tadd     rcx, rbp\n\tmov     r14, rcx\n\tshr     rcx, 4\n\tand     r14, r10\n\tand     rcx, r10\n\tadd     r14, rcx\n\timul    r14, r11\n\tshr     r14, 56\n\tadd     rax, r14\n\tdec     rdx\n.4:\n\tmov     rcx, [rdi+rdx*8+16]\n\txor     rcx, [rsi+rdx*8+16]\n\tmov     rbp, rcx\n\tshr     rcx, 1\n\tand     rcx, r8\n\tsub     rbp, rcx\n\tmov     rcx, rbp\n\tshr     rbp, 2\n\tand     rcx, r9\n\tand     rbp, r9\n\tadd     rcx, rbp\n\tmov     r14, rcx\n\tshr     rcx, 4\n\tand     r14, r10\n\tand     rcx, r10\n\tadd     r14, rcx\n\timul    r14, r11\n\tshr     r14, 56\n\tadd     rax, r14\n.5:\tEND_PROC reg_save_list\n\n    \tend\n"
  },
  {
    "path": "mpn/x86_64w/k8/ior_n.asm",
    "content": "; PROLOGUE(mpn_ior_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_ior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rdi        rsi        rdx        rcx\n;                    rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_ior_n\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    jz      .2\n\n    xalign  8\n.1: mov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    or      r10, [r8]\n    or      r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     T3, [rdx-16]\n    mov     T4, [rdx-8]\n    or      T3, [r8-16]\n    or      T4, [r8-8]\n    mov     [rcx-16], T3\n    dec     r9\n    mov     [rcx-8], T4\n    jnz     .1\n.2: inc     rax\n    dec     rax\n    jz      .3\n    mov     r10, [rdx]\n    or      r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+8]\n    or      r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+16]\n    or      r10, [r8+16]\n    mov     [rcx+16], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/iorn_n.asm",
    "content": "; PROLOGUE(mpn_iorn_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_iorn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_iorn_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    or      r10, [rdx+r9*8+24]\n    or      r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    or      T3, [rdx+r9*8+8]\n    or      T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/k10/hamdist.asm",
    "content": "; PROLOGUE(mpn_hamdist)\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmp_limb_t mpn_hamdist(mp_ptr, mp_ptr, mp_size_t)\n;\trax                      rdi,    rdx,       rdx\n;\trax                      rcx,    rdx,        r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  SSE4.2\n    BITS 64\n\n    FRAME_PROC mpn_hamdist, 0, rdi\n\txor     eax, eax\n\tlea     rdi, [rcx+r8*8-24]\n\tlea     rdx, [rdx+r8*8-24]\n\tmov     rcx, 3\n\tsub     rcx, r8\n\tjnc     .1\n\txalign  16\n.0:\tmov     r8, [rdi+rcx*8]\n\tmov     r9, [rdi+rcx*8+8]\n\txor     r8, [rdx+rcx*8]\n\tmov     r10, [rdi+rcx*8+16]\n\tpopcnt  r8, r8\n\txor     r9, [rdx+rcx*8+8]\n\txor     r10, [rdx+rcx*8+16]\n\tpopcnt  r9, r9\n\tmov     r11, [rdi+rcx*8+24]\n\tadd     rax, r8\n\tpopcnt  r10, r10\n\txor     r11, [rdx+rcx*8+24]\n\tadd     rax, r9\n\tpopcnt  r11, r11\n\tadd     rax, r10\n\tadd     rax, r11\n\tadd     rcx, 4\n\tjnc     .0\n.1:\n\tcmp     rcx, 2\n\tja      .5\n\tje      .4\n\tjp      .3\n.2:\tmov     r8, [rdi]\n\txor     r8, [rdx]\n\tpopcnt  r8, r8\n\tadd     rax, r8\n.3:\tmov     r8, [rdi+8]\n\txor     r8, [rdx+8]\n\tpopcnt  r8, r8\n\tadd     rax, r8\n.4:\tmov     r8, [rdi+16]\n\txor     r8, [rdx+16]\n\tpopcnt  r8, r8\n\tadd     rax, r8\n.5: END_PROC rdi\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/k10/karaadd.asm",
    "content": "; PROLOGUE(mpn_karaadd)\n;  mpn_karaadd  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karaadd(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n    BITS 64\n    TEXT\n\n;   requires n >= 8  \n        FRAME_PROC mpn_karaadd, 1, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n;rp is rdi  \n;tp is rsi  \n;n is rdx and put it on the stack  \n        shr     rdx, 1\n;n2 is rdx  \n        lea     rcx, [rdx+rdx*1]\n; 2*n2 is rcx  \n; L is rdi  \n; H is rbp  \n; tp is rsi  \n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n; rax rbx are the carrys  \n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+rcx*8]\n        adc     r9, [rsi+rcx*8+8]\n        adc     r10, [rsi+rcx*8+16]\n        adc     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        adc     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .6\n        jz      .4\n        jp      .3\n.2:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi]\n        adc     r9, [rsi+8]\n        adc     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n.3:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+8]\n        adc     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n.4:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        adc     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        inc     rdx\n        mov     [rbp+rcx*8], r12\n.5:     mov     rcx, 3\n.6:     \n        mov     r8, [rsp]\n        bt      r8, 0\n        jnc     .8\n        xor     r10, r10\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        add     r8, [rsi+rdx*8]\n        adc     r9, [rsi+rdx*8+8]\n        rcl     r10, 1\n        add     [rbp+24], r8\n        adc     [rbp+32], r9\n        adc     [rbp+40], r10\n.7:     adc     qword[rbp+rcx*8+24], 0\n        inc     rcx\n        jc      .7\n        mov     rcx, 3\n.8:     and     rax, 3\n        popcnt  r8, rax\n        bt      rbx, 2\n        adc     r8, 0\n        adc     [rdi+rdx*8], r8\n.9:     adc     qword[rdi+rdx*8+8], 0\n        inc     rdx\n        jc      .9\n        and     rbx, 7\n        popcnt  r8, rbx\n        add     [rbp+24], r8\n.10:    adc     qword[rbp+rcx*8+8], 0\n        inc     rcx\n        jc      .10\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/k8/k10/karasub.asm",
    "content": "; PROLOGUE(mpn_karasub)\n;  mpn_karasub  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Copyright 2012 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karasub(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n;\n;  Karasuba Multiplication - split x and y into two equal length halves so\n;  that x = xh.B + xl and y = yh.B + yl. Then their product is:\n;\n;  x.y = xh.yh.B^2 + (xh.yl + xl.yh).B + xl.yl\n;      = xh.yh.B^2 + (xh.yh + xl.yl - {xh - xl}.{yh - yl}).B + xl.yl\n;\n; If the length of the elements is m (about n / 2), the output length is 4 * m \n; as illustrated below.  The middle two blocks involve three additions and one \n; subtraction: \n; \n;       -------------------- rp\n;       |                  |-->\n;       |   A:xl.yl[lo]    |   |\n;       |                  |   |      (xh - xl).(yh - yl)\n;       --------------------   |      -------------------- tp\n;  <--  |                  |<--<  <-- |                  |\n; |     |   B:xl.yl[hi]    |   |      |     E:[lo]       |\n; |     |                  |   |      |                  |\n; |     --------------------   |      --------------------\n; >-->  |                  |-->   <-- |                  |\n; |\\___ |   C:xh.yh[lo]    | ____/    |     F:[hi]       |\n; |     |                  |          |                  |\n; |     --------------------          --------------------\n;  <--  |                  |   \n;       |   D:xh.yh[hi]    |\n;       |                  |\n;       --------------------\n;\n; To avoid overwriting B before it is used, we need to do two operations\n; in parallel:\n;\n; (1)   B = B + C + A - E = (B + C) + A - E\n; (2)   C = C + B + D - F = (B + C) + D - F\n;\n; The final carry from (1) has to be propagated into C and D, and the final\n; carry from (2) has to be propagated into D. When the number of input limbs\n; is odd, some extra operations have to be undertaken. \n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n%macro add_one 1\n    inc     %1\n%endmacro\n\n    BITS 64\n    TEXT\n        \n;       requires n >= 8  \n        FRAME_PROC mpn_karasub, 2, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n        mov     [rsp+8], rdi\n\n;       rp is rdi, tp is rsi, L is rdi, H is rbp, tp is rsi\n;       carries/borrows in rax, rbx\n   \n        shr     rdx, 1\n        lea     rcx, [rdx+rdx*1]\n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+rcx*8]\n        sbb     r9, [rsi+rcx*8+8]\n        sbb     r10, [rsi+rcx*8+16]\n        sbb     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        sbb     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .5\n        jz      .4\n        jp      .3\n\n.2:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi]\n        sbb     r9, [rsi+8]\n        sbb     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n\n.3:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+8]\n        sbb     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n\n.4:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        sbb     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        add_one rdx\n        mov     [rbp+rcx*8], r12\n\n;       move low half rbx carry into rax\n.5:     rcr     rax, 3\n        bt      rbx, 2\n        rcl     rax, 3\n        mov     r8, [rsp]\n        mov     rcx, rsi\n        mov     rsi,[rsp+8]\n        lea     r9, [r8+r8]\n        lea     rsi, [rsi+r9*8]\n        lea     r11, [rbp+24]\n        sub     r11, rsi\n        sar     r11, 3\n        bt      r8, 0\n        jnc     .9\n\n;       if odd the do next two  \n        add     r11, 2\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        rcr     rbx, 2\n        adc     r8,0\n        adc     r9, 0\n        rcl     rbx, 1\n        sbb     r8, [rcx+rdx*8]\n        sbb     r9, [rcx+rdx*8+8]\n        rcr     rbx, 2\n        adc     [rbp+24], r8\n        adc     [rbp+32], r9\n        rcl     rbx, 3\n\n; Now add in any accummulated carries and/or borrows\n;\n; NOTE: We can't propagate individual borrows or carries from the second\n; and third quarter blocks into the fourth quater block by simply waiting\n; for carry (or borrow) propagation to end.  This is because a carry into\n; the fourth quarter block when it contains only maximum integers or a \n; borrow when it contains all zero integers will incorrectly propagate\n; beyond the end of the top quarter block.\n\n.9:     lea     rdx, [rdi+rdx*8]\n        sub     rdx, rsi\n        sar     rdx, 3\n\n; carries/borrrow from second to third quarter quarter block\n;   rax{2} is the carry in (B + C)\n;   rax{1} is the carry in (B + C) + A\n;   rax{0} is the borrow in (B + C + A) - E\n\n        mov     rcx, rdx\n        bt      rax, 0\n.10:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .11\n        jc      .10\n\n.11     mov     rcx, rdx\n        bt      rax, 1\n.12:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .13\n        jc      .12\n\n.13     mov     rcx, rdx\n        bt      rax, 2\n.14:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .15\n        jc      .14\n\n; carries/borrrow from third to fourth quarter quarter block\n;   rbx{2} is the carry in (B + C)\n;   rbx{1} is the carry in (B + C) + D\n;   rbx{0} is the borrow in (B + C + D) - F\n\n.15:    mov     rcx, r11\n        bt      rbx, 0\n.16:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .17\n        jc      .16\n\n.17:    mov     rcx, r11\n        bt      rbx, 1\n.18:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .19\n        jc      .18\n\n.19:    mov     rcx, r11\n        bt      rbx, 2\n.20:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .21\n        jc      .20\n.21:\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/k8/k10/lshift.asm",
    "content": "; PROLOGUE(mpn_lshift)\n\n;  Version 1.0.4.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_lshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi      rsi        rdx      rcx\n;  rax                     rcx      rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  SSE4.2\n    BITS 64\n\n    LEAF_PROC mpn_lshift\n    mov     r10, rcx\n    mov     ecx, r9d\n    cmp     r8, 2\n    ja      .3\n    jz      .2\n.1:\tmov     rdx, [rdx]\n    mov     rax, rdx\n    shl     rdx, cl\n    neg     rcx\n    shr     rax, cl\n    mov     [r10], rdx\n    ret\n\n.2:\tmov     r8, [rdx]\n    mov     r9, [rdx+8]\n    mov     r11, r8\n    mov     rax, r9\n    shl     r8, cl\n    shl     r9, cl\n    neg     rcx\n    shr     r11, cl\n    shr     rax, cl\n    or      r9, r11\n    mov     [r10], r8\n    mov     [r10+8], r9\n    ret\n\n.3:\tmov     eax, 64\n    sub     rax, rcx\n    movq    xmm0, rcx\n    movq    xmm1, rax\n    lea     r9, [rdx+r8*8-16]\n    mov     r11, r9\n    and     r9, -16\n    movdqa  xmm3, [r9]\n    movdqa  xmm5, xmm3\n    psrlq   xmm3, xmm1\n    pshufd  xmm3, xmm3, 0x4e\n    movq    rax, xmm3\n    cmp     r11, r9\n    je      .4\n    movq    xmm2, [rdx+r8*8-8]\n    movq    xmm4, xmm2\n    psrlq   xmm2, xmm1\n    movq    rax, xmm2\n    psllq   xmm4, xmm0\n    por     xmm4, xmm3\n    movq    [r10+r8*8-8], xmm4\n    dec     r8\n.4:\tsub     r8, 5\n    jle     .6\n\n    xalign  16\n.5: movdqa  xmm2, [rdx+r8*8+8]\n    movdqa  xmm4, xmm2\n    psllq   xmm5, xmm0\n    psrlq   xmm2, xmm1\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    pshufd  xmm2, xmm2, 0x4e\n    movhpd  [r10+r8*8+32], xmm5\n    movdqa  xmm3, [rdx+r8*8-8]\n    movdqa  xmm5, xmm3\n    psrlq   xmm3, xmm1\n    movhlps xmm2, xmm3\n    psllq   xmm4, xmm0\n    pshufd  xmm3, xmm3, 0x4e\n    por     xmm4, xmm2\n    movq    [r10+r8*8+8], xmm4\n    movhpd  [r10+r8*8+16], xmm4\n    sub     r8, 4\n    jg      .5\n.6: cmp     r8, -1\n    je      .9\n    jg      .8\n    jp      .10\n.7:\tpxor    xmm2, xmm2\n    psllq   xmm5, xmm0\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    movhpd  [r10+r8*8+32], xmm5\n    ret\n\n    xalign  16\n.8:\tmovdqa  xmm2, [rdx+r8*8+8]\n    movdqa  xmm4, xmm2\n    psllq   xmm5, xmm0\n    psrlq   xmm2, xmm1\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    pshufd  xmm2, xmm2, 0x4e\n    movhpd  [r10+r8*8+32], xmm5\n    movq    xmm3, [rdx+r8*8]\n    pshufd  xmm3, xmm3, 0x4e\n    movdqa  xmm5, xmm3\n    psrlq   xmm3, xmm1\n    movhlps xmm2, xmm3\n    psllq   xmm4, xmm0\n    por     xmm4, xmm2\n    movq    [r10+r8*8+8], xmm4\n    movhpd  [r10+r8*8+16], xmm4\n    psllq   xmm5, xmm0\n    movhpd  [r10+r8*8], xmm5\n    ret\n\n    xalign  16\n.9:\tmovdqa  xmm2, [rdx+r8*8+8]\n    movdqa  xmm4, xmm2\n    psllq   xmm5, xmm0\n    psrlq   xmm2, xmm1\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    pshufd  xmm2, xmm2, 0x4e\n    movhpd  [r10+r8*8+32], xmm5\n    pxor    xmm3, xmm3\n    movhlps xmm2, xmm3\n    psllq   xmm4, xmm0\n    por     xmm4, xmm2\n    movq    [r10+r8*8+8], xmm4\n    movhpd  [r10+r8*8+16], xmm4\n    ret\n\n    xalign  16\n.10:movq    xmm2, [rdx+r8*8+16]\n    pshufd  xmm2, xmm2, 0x4e\n    movdqa  xmm4, xmm2\n    psllq   xmm5, xmm0\n    psrlq   xmm2, xmm1\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    movhpd  [r10+r8*8+32], xmm5\n    psllq   xmm4, xmm0\n    movhpd  [r10+r8*8+16], xmm4\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/k10/popcount.asm",
    "content": "; PROLOGUE(mpn_popcount)\n\n;  AMD64 mpn_popcount\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Convercxon Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either vercxon 2.1 of the License, or (at\n;  your option) any later vercxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmpn_limb_t mpn_popcount(mp_ptr,mp_size_t)\n;\trax                        rdi,      rsi\n;\trax                        rcx,      rdx\n\n%include \"yasm_mac.inc\"\n\n    CPU  SSE4.2\n    BITS 64\n\n    LEAF_PROC mpn_popcount\n\txor     eax, eax\n\tpopcnt  r9, [rcx+rdx*8-8]\n\tsub     rdx, 3\n\tjc      .1\n\txalign  16\n.0:\tpopcnt  r8, [rcx+rdx*8+8]\n\tadd     rax, r9\n\tadd     rax, r8\n\tpopcnt  r9, [rcx+rdx*8]\n\tsub     rdx, 2\n\tjnc     .0\n.1: jnp     .3\n.2:\tpopcnt  r8, [rcx]\n\tadd     rax, r8\n.3:\tadd     rax, r9\n\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/k10/rshift.asm",
    "content": "; PROLOGUE(mpn_rshift)\n\n;  Version 1.0.4.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi      rsi        rdx      rcx\n;  rax                     rcx      rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  SSE4.2\n    BITS 64\n\n    LEAF_PROC mpn_rshift\n    mov     r10, rcx\n    mov     ecx, r9d\n    cmp     r8, 2\n    ja      .3\n    jz      .2\n.1:\tmov     rdx, [rdx]\n    mov     rax, rdx\n    shr     rdx, cl\n    neg     rcx\n    shl     rax, cl\n    mov     [r10], rdx\n    ret\n\n.2:\tmov     r8, [rdx]\n    mov     r9, [rdx+8]\n    mov     rax, r8\n    mov     r11, r9\n    shr     r8, cl\n    shr     r9, cl\n    neg     rcx\n    shl     r11, cl\n    shl     rax, cl\n    or      r8, r11\n    mov     [r10], r8\n    mov     [r10+8], r9\n    ret\n\n.3:\tmov     r11, rdx\n    mov     rdx, r8\n\n    mov     eax, 64\n    lea     r9, [r11+8]\n    sub     rax, rcx\n    and     r9, -16\n    movq    xmm0, rcx\n    movq    xmm1, rax\n    movdqa  xmm5, [r9]\n    movdqa  xmm3, xmm5\n    psllq   xmm5, xmm1\n    movq    rax, xmm5\n    cmp     r11, r9\n    lea     r11, [r11+rdx*8-40]\n    je      .4\n    movq    xmm2, [r9-8]\n    movq    xmm4, xmm2\n    psllq   xmm2, xmm1\n    psrlq   xmm4, xmm0\n    por     xmm4, xmm5\n    movq    [r10], xmm4\n    lea     r10, [r10+8]\n    dec     rdx\n    movq    rax, xmm2\n.4: lea     r10, [r10+rdx*8-40]\n    psrlq   xmm3, xmm0\n    mov     r8d, 5\n    sub     r8, rdx\n    jnc     .6\n\n    xalign  16\n.5: movdqa  xmm2, [r11+r8*8+16]\n    movdqa  xmm4, xmm2\n    psllq   xmm2, xmm1\n    shufpd  xmm5, xmm2, 1\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    psrlq   xmm4, xmm0\n    movdqa  xmm5, [r11+r8*8+32]\n    movdqa  xmm3, xmm5\n    psllq   xmm5, xmm1\n    shufpd  xmm2, xmm5, 1\n    psrlq   xmm3, xmm0\n    por     xmm4, xmm2\n    movq    [r10+r8*8+16], xmm4\n    movhpd  [r10+r8*8+24], xmm4\n    add     r8, 4\n    jnc     .5\n.6: cmp     r8, 2\n    ja      .10\n    jz      .9\n    jp      .8\n.7:\tmovdqa  xmm2, [r11+r8*8+16]\n    movdqa  xmm4, xmm2\n    psllq   xmm2, xmm1\n    shufpd  xmm5, xmm2, 1\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    psrlq   xmm4, xmm0\n    movq    xmm5, [r11+r8*8+32]\n    movq    xmm3, xmm5\n    psllq   xmm5, xmm1\n    shufpd  xmm2, xmm5, 1\n    psrlq   xmm3, xmm0\n    por     xmm4, xmm2\n    movq    [r10+r8*8+16], xmm4\n    movhpd  [r10+r8*8+24], xmm4\n    psrldq  xmm5, 8\n    por     xmm3, xmm5\n    movq    [r10+r8*8+32], xmm3\n    ret\n\n    xalign  16\n.8:\tmovdqa  xmm2, [r11+r8*8+16]\n    movdqa  xmm4, xmm2\n    psllq   xmm2, xmm1\n    shufpd  xmm5, xmm2, 1\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    psrlq   xmm4, xmm0\n    psrldq  xmm2, 8\n    por     xmm4, xmm2\n    movq    [r10+r8*8+16], xmm4\n    movhpd  [r10+r8*8+24], xmm4\n    ret\n\n    xalign  16\n.9:\tmovq    xmm2, [r11+r8*8+16]\n    movq    xmm4, xmm2\n    psllq   xmm2, xmm1\n    shufpd  xmm5, xmm2, 1\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    psrlq   xmm4, xmm0\n    psrldq  xmm2, 8\n    por     xmm4, xmm2\n    movq    [r10+r8*8+16], xmm4\n    ret\n\n    xalign  16\n.10:psrldq  xmm5, 8\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/k10/store.asm",
    "content": "; PROLOGUE(mpn_store)\n;  Copyright 2009 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;\tmpn_store(mp_ptr, mp_size_t, mp_limb_t)\n;   r10          rdi        rsi        rdx\n;\tr10          rcx        rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n\tLEAF_PROC mpn_store\n\tmov     rax, rdx\n\tand     rax, 7\n\tmov     r9, 8\n\tsub     r9, rax\n    mov     rax, r8\n\tlea     r9, [r9+r9*4]\n\tlea     r10, [rel .0]\n\tadd     r10, r9\n\tand     rdx, -8\n\tadd     rcx, 48\n\tjmp     r10\n\txalign  16\n.0:\tmov     [rcx+rdx*8+8], rax\n\tmov     [byte rcx+rdx*8+0], rax\n\tmov     [rcx+rdx*8-8], rax\n\tmov     [rcx+rdx*8-16], rax\n\tmov     [rcx+rdx*8-24], rax\n\tmov     [rcx+rdx*8-32], rax\n\tmov     [rcx+rdx*8-40], rax\n\tmov     [rcx+rdx*8-48], rax\n\tnop\n\tsub     rdx, 8\n\tjnc     .0\n\tnop\n\tret\n\n    end"
  },
  {
    "path": "mpn/x86_64w/k8/karaadd.asm",
    "content": "; PROLOGUE(mpn_karaadd)\n;  mpn_karaadd  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karaadd(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n    CPU  Athlon64\n    BITS 64\n    TEXT\n\n;   requires n >= 8  \n        FRAME_PROC mpn_karaadd, 1, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n;rp is rdi  \n;tp is rsi  \n;n is rdx and put it on the stack  \n        shr     rdx, 1\n;n2 is rdx  \n        lea     rcx, [rdx+rdx*1]\n; 2*n2 is rcx  \n; L is rdi  \n; H is rbp  \n; tp is rsi  \n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n; rax rbx are the carrys  \n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+rcx*8]\n        adc     r9, [rsi+rcx*8+8]\n        adc     r10, [rsi+rcx*8+16]\n        adc     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        adc     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .6\n        jz      .4\n        jp      .3\n.2:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi]\n        adc     r9, [rsi+8]\n        adc     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n.3:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+8]\n        adc     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n.4:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        adc     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        inc     rdx\n        mov     [rbp+rcx*8], r12\n.5:     mov     rcx, 3\n.6:     \n        mov     r8, [rsp]\n        bt      r8, 0\n        jnc     .8\n        xor     r10, r10\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        add     r8, [rsi+rdx*8]\n        adc     r9, [rsi+rdx*8+8]\n        rcl     r10, 1\n        add     [rbp+24], r8\n        adc     [rbp+32], r9\n        adc     [rbp+40], r10\n.7:     adc     qword[rbp+rcx*8+24], 0\n        inc     rcx\n        jc      .7\n        mov     rcx, 3\n.8:     xor     r8, r8\n        shr     rax, 1\n        adc     r8, r8\n        shr     rax, 1\n        adc     r8, 0\n        bt      rbx, 2\n        adc     r8, 0\n        adc     [rdi+rdx*8], r8\n.9:     adc     qword[rdi+rdx*8+8], 0\n        inc     rdx\n        jc      .9\n        xor     r8, r8\n        shr     rbx, 1\n        adc     r8, r8\n        shr     rbx, 1\n        adc     r8, 0\n        shr     rbx, 1\n        adc     r8, 0\n        add     [rbp+24], r8\n.10:    adc     qword[rbp+rcx*8+8], 0\n        inc     rcx\n        jc      .10\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/k8/karasub.asm",
    "content": "; PROLOGUE(mpn_karasub)\n;  mpn_karasub  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Copyright 2012 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karasub(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n;\n;  Karasuba Multiplication - split x and y into two equal length halves so\n;  that x = xh.B + xl and y = yh.B + yl. Then their product is:\n;\n;  x.y = xh.yh.B^2 + (xh.yl + xl.yh).B + xl.yl\n;      = xh.yh.B^2 + (xh.yh + xl.yl - {xh - xl}.{yh - yl}).B + xl.yl\n;\n; If the length of the elements is m (about n / 2), the output length is 4 * m \n; as illustrated below.  The middle two blocks involve three additions and one \n; subtraction: \n; \n;       -------------------- rp\n;       |                  |-->\n;       |   A:xl.yl[lo]    |   |\n;       |                  |   |      (xh - xl).(yh - yl)\n;       --------------------   |      -------------------- tp\n;  <--  |                  |<--<  <-- |                  |\n; |     |   B:xl.yl[hi]    |   |      |     E:[lo]       |\n; |     |                  |   |      |                  |\n; |     --------------------   |      --------------------\n; >-->  |                  |-->   <-- |                  |\n; |\\___ |   C:xh.yh[lo]    | ____/    |     F:[hi]       |\n; |     |                  |          |                  |\n; |     --------------------          --------------------\n;  <--  |                  |   \n;       |   D:xh.yh[hi]    |\n;       |                  |\n;       --------------------\n;\n; To avoid overwriting B before it is used, we need to do two operations\n; in parallel:\n;\n; (1)   B = B + C + A - E = (B + C) + A - E\n; (2)   C = C + B + D - F = (B + C) + D - F\n;\n; The final carry from (1) has to be propagated into C and D, and the final\n; carry from (2) has to be propagated into D. When the number of input limbs\n; is odd, some extra operations have to be undertaken. \n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n%macro add_one 1\n    inc     %1\n%endmacro\n\n    BITS 64\n    TEXT\n        \n;       requires n >= 8  \n        FRAME_PROC mpn_karasub, 2, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n        mov     [rsp+8], rdi\n\n;       rp is rdi, tp is rsi, L is rdi, H is rbp, tp is rsi\n;       carries/borrows in rax, rbx\n   \n        shr     rdx, 1\n        lea     rcx, [rdx+rdx*1]\n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+rcx*8]\n        sbb     r9, [rsi+rcx*8+8]\n        sbb     r10, [rsi+rcx*8+16]\n        sbb     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        sbb     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .5\n        jz      .4\n        jp      .3\n\n.2:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi]\n        sbb     r9, [rsi+8]\n        sbb     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n\n.3:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+8]\n        sbb     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n\n.4:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        sbb     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        add_one rdx\n        mov     [rbp+rcx*8], r12\n\n;       move low half rbx carry into rax\n.5:     rcr     rax, 3\n        bt      rbx, 2\n        rcl     rax, 3\n        mov     r8, [rsp]\n        mov     rcx, rsi\n        mov     rsi,[rsp+8]\n        lea     r9, [r8+r8]\n        lea     rsi, [rsi+r9*8]\n        lea     r11, [rbp+24]\n        sub     r11, rsi\n        sar     r11, 3\n        bt      r8, 0\n        jnc     .9\n\n;       if odd the do next two  \n        add     r11, 2\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        rcr     rbx, 2\n        adc     r8,0\n        adc     r9, 0\n        rcl     rbx, 1\n        sbb     r8, [rcx+rdx*8]\n        sbb     r9, [rcx+rdx*8+8]\n        rcr     rbx, 2\n        adc     [rbp+24], r8\n        adc     [rbp+32], r9\n        rcl     rbx, 3\n\n; Now add in any accummulated carries and/or borrows\n;\n; NOTE: We can't propagate individual borrows or carries from the second\n; and third quarter blocks into the fourth quater block by simply waiting\n; for carry (or borrow) propagation to end.  This is because a carry into\n; the fourth quarter block when it contains only maximum integers or a \n; borrow when it contains all zero integers will incorrectly propagate\n; beyond the end of the top quarter block.\n\n.9:     lea     rdx, [rdi+rdx*8]\n        sub     rdx, rsi\n        sar     rdx, 3\n\n; carries/borrrow from second to third quarter quarter block\n;   rax{2} is the carry in (B + C)\n;   rax{1} is the carry in (B + C) + A\n;   rax{0} is the borrow in (B + C + A) - E\n\n        mov     rcx, rdx\n        bt      rax, 0\n.10:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .11\n        jc      .10\n\n.11     mov     rcx, rdx\n        bt      rax, 1\n.12:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .13\n        jc      .12\n\n.13     mov     rcx, rdx\n        bt      rax, 2\n.14:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .15\n        jc      .14\n\n; carries/borrrow from third to fourth quarter quarter block\n;   rbx{2} is the carry in (B + C)\n;   rbx{1} is the carry in (B + C) + D\n;   rbx{0} is the borrow in (B + C + D) - F\n\n.15:    mov     rcx, r11\n        bt      rbx, 0\n.16:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .17\n        jc      .16\n\n.17:    mov     rcx, r11\n        bt      rbx, 1\n.18:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .19\n        jc      .18\n\n.19:    mov     rcx, r11\n        bt      rbx, 2\n.20:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .21\n        jc      .20\n.21:\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/k8/lshift.asm",
    "content": "; PROLOGUE(mpn_lshift)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_lshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi     rsi        rdx      rcx\n;  rax                     rcx     rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_lshift\n    mov     r9d, r9d\n    mov     eax, 64\n    sub     rax, r9\n    movq    mm0, r9\n    sub     r8, 4\n    movq    mm1, rax\n    movq    mm5, [rdx+r8*8+24]\n    movq    mm3, mm5\n    psrlq   mm5, mm1\n    movq    rax, mm5\n    psllq   mm3, mm0\n    jbe     .2\n\n    xalign  16\n.1:\tmovq    mm2, [rdx+r8*8+16]\n    movq    mm4, mm2\n    psrlq   mm2, mm1\n    por     mm3, mm2\n    movq    [rcx+r8*8+24], mm3\n    psllq   mm4, mm0\n    movq    mm5, [rdx+r8*8+8]\n    movq    mm3, mm5\n    psrlq   mm5, mm1\n    por     mm4, mm5\n    movq    [rcx+r8*8+16], mm4\n    psllq   mm3, mm0\n    movq    mm2, [rdx+r8*8]\n    movq    mm4, mm2\n    psrlq   mm2, mm1\n    por     mm3, mm2\n    movq    [rcx+r8*8+8], mm3\n    psllq   mm4, mm0\n    movq    mm5, [rdx+r8*8-8]\n    movq    mm3, mm5\n    psrlq   mm5, mm1\n    por     mm4, mm5\n    movq    [rcx+r8*8], mm4\n    psllq   mm3, mm0\n    sub     r8, 4\n    ja      .1\n\n; r8 is 0,-1,-2,-3 here , so we have 3+r8 limbs to do\n.2: cmp     r8, -1\n    jl      .3\n    movq    mm2, [rdx+r8*8+16]\n    movq    mm4, mm2\n    psrlq   mm2, mm1\n    por     mm3, mm2\n    movq    [rcx+r8*8+24], mm3\n    psllq   mm4, mm0\n    movq    mm5, [rdx+r8*8+8]\n    movq    mm3, mm5\n    psrlq   mm5, mm1\n    por     mm4, mm5\n    movq    [rcx+r8*8+16], mm4\n    psllq   mm3, mm0\n    sub     r8, 2\n.3: test    r8, 1\n    jnz     .4\n    movq    mm2, [rdx+r8*8+16]\n    movq    mm4, mm2\n    psrlq   mm2, mm1\n    por     mm3, mm2\n    movq    [rcx+r8*8+24], mm3\n    psllq   mm4, mm0\n    movq    [rcx+r8*8+16], mm4\n    emms\n    ret\n.4: movq    [rcx+r8*8+24], mm3\n    emms\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/lshift1.asm",
    "content": "; PROLOGUE(mpn_lshift1)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_lshift1(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_lshift1\n    mov     rax, r8\n\tand     r8, 7\n\tinc     r8\n\tmov     [rsp+0x18], r8\n\tshr     rax, 3\n\tcmp     rax, 0\n\tjz      .2\n\t\n\txalign  16\n.1:\tmov     r8, [rdx]\n\tmov     r9, [rdx+8]\n\tmov     r10, [rdx+16]\n\tmov     r11, [rdx+24]\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tmov     [rcx], r8\n\tmov     [rcx+8], r9\n\tmov     [rcx+16], r10\n\tmov     [rcx+24], r11\n\tmov     r8, [rdx+32]\n\tmov     r9, [rdx+40]\n\tmov     r10, [rdx+48]\n\tmov     r11, [rdx+56]\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tmov     [rcx+32], r8\n\tmov     [rcx+40], r9\n\tmov     [rcx+48], r10\n\tmov     [rcx+56], r11\n\tlea     rcx, [rcx+64]\n\tdec     rax\n\tlea     rdx, [rdx+64]\n\tjnz     .1\n.2:\tmov     rax, [rsp+0x18]\n\tdec     rax\n\tjz      .3\n;\tCould still have cache-bank conflicts in this tail part\n\tmov     r8, [rdx]\n\tadc     r8, r8\n\tmov     [rcx], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+8]\n\tadc     r8, r8\n\tmov     [rcx+8], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+16]\n\tadc     r8, r8\n\tmov     [rcx+16], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+24]\n\tadc     r8, r8\n\tmov     [rcx+24], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+32]\n\tadc     r8, r8\n\tmov     [rcx+32], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+40]\n\tadc     r8, r8\n\tmov     [rcx+40], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+48]\n\tadc     r8, r8\n\tmov     [rcx+48], r8\n.3:\tsbb     rax, rax\n\tneg     rax\n\tret\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/k8/lshift2.asm",
    "content": "; PROLOGUE(mpn_lshift2)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_lshift1(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_lshift2, 0, reg_save_list\n    mov     rax, r8\n\tlea     rsi, [rdx+rax*8-24]\n\tlea     rdi, [rcx+rax*8-24]\n\tmov     ecx, 3\n\tsub     rcx, rax\n\txor     eax, eax\n\txor     edx, edx\n\tcmp     rcx, 0\n\tjge     .2\n\t\n\txalign  16\n.1:\tmov     r8, [rsi+rcx*8]\n\tmov     r9, [rsi+rcx*8+8]\n\tmov     r10, [rsi+rcx*8+16]\n\tmov     r11, [rsi+rcx*8+24]\n\tadd     rax, rax\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tmov     [rdi+rcx*8+24], r11\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tadd     rcx, 4\n\tmov     [rdi+rcx*8-24], r9\n\tmov     [rdi+rcx*8-16], r10\n\tjnc     .1\n.2:\tcmp     rcx, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r8, [rsi+rcx*8]\n\tmov     r9, [rsi+rcx*8+8]\n\tmov     r10, [rsi+rcx*8+16]\n\tadd     rax, rax\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tmov     [rdi+rcx*8+8], r9\n\tmov     [rdi+rcx*8+16], r10\n\tlea     rax, [rdx+rax*2]\n\tneg     rax\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tmov     r8, [rsi+rcx*8]\n\tmov     r9, [rsi+rcx*8+8]\n\tadd     rax, rax\n\tadc     r8, r8\n\tadc     r9, r9\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\tadc     r8, r8\n\tadc     r9, r9\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tmov     [rdi+rcx*8+8], r9\n\tlea     rax, [rdx+rax*2]\n\tneg     rax\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     r8, [rsi+rcx*8]\n\tadd     rax, rax\n\tadc     r8, r8\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\tadc     r8, r8\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n.6:\tlea     rax, [rdx+rax*2]\n\tneg     rax\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/lshift3.asm",
    "content": "; PROLOGUE(mpn_lshift3)\n\n;  mpn_lshift3\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  mp_limb_t mpn_lshift3(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_lshift3, 0, reg_save_list\n    mov     rax, r8\n\tlea     rsi, [rdx+rax*8-24]\n\tlea     rdi, [rcx+rax*8-24]\n\tmov     ecx, 3\n\tsub     rcx, rax\n\tmov     r8, 0\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmov     r9, [rsi+rcx*8]\n\tlea     r8, [r8+r9*8]\n\tshr     r9, 61\n\tmov     r10, [rsi+rcx*8+8]\n\tlea     r9, [r9+r10*8]\n\tshr     r10, 61\n\tmov     r11, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8], r8\n\tlea     r10, [r10+r11*8]\n\tmov     [rdi+rcx*8+16], r10\n\tshr     r11, 61\n\tmov     r8, [rsi+rcx*8+24]\n\tlea     r11, [r11+r8*8]\n\tmov     [rdi+rcx*8+24], r11\n\tshr     r8, 61\n\tadd     rcx, 4\n\tmov     [rdi+rcx*8+8-32], r9\n\tjnc     .1\n.2:\tcmp     rcx, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r9, [rsi+rcx*8]\n\tlea     r8, [r8+r9*8]\n\tshr     r9, 61\n\tmov     r10, [rsi+rcx*8+8]\n\tlea     r9, [r9+r10*8]\n\tshr     r10, 61\n\tmov     r11, [rsi+rcx*8+16]\n\tmov     [rdi+rcx*8], r8\n\tlea     r10, [r10+r11*8]\n\tmov     [rdi+rcx*8+16], r10\n\tshr     r11, 61\n\tmov     rax, r11\n\tmov     [rdi+rcx*8+8], r9\n\tjmp\t\t.7\n\t\n\txalign  16\n.4:\tmov     r9, [rsi+rcx*8]\n\tlea     r8, [r8+r9*8]\n\tshr     r9, 61\n\tmov     r10, [rsi+rcx*8+8]\n\tlea     r9, [r9+r10*8]\n\tshr     r10, 61\n\tmov     [rdi+rcx*8], r8\n\tmov     rax, r10\n\tmov     [rdi+rcx*8+8], r9\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     r9, [rsi+rcx*8]\n\tlea     r8, [r8+r9*8]\n\tshr     r9, 61\n\tmov     [rdi+rcx*8], r8\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6:\tmov     rax, r8\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/lshift4.asm",
    "content": "; PROLOGUE(mpn_lshift4)\n\n;  mpn_lshift4\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  mp_limb_t mpn_lshift3(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_lshift4, 0, reg_save_list\n    mov     rax, r8\n\tlea     rsi, [rdx+rax*8-24]\n\tlea     rdi, [rcx+rax*8-24]\n\tmov     ecx, 3\n\tsub     rcx, rax\n\tmov     r8, 0\n\tmov     rax, 0\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmov     r9, [rsi+rcx*8]\n\tlea     rdx, [rax+r9*2]\n\tlea     r8, [r8+rdx*8]\n\tshr     r9, 60\n\tmov     r10, [rsi+rcx*8+8]\n\tlea     rdx, [rax+r10*2]\n\tlea     r9, [r9+rdx*8]\n\tshr     r10, 60\n\tmov     r11, [rsi+rcx*8+16]\n\tlea     rdx, [rax+r11*2]\n\tlea     r10, [r10+rdx*8]\n\tshr     r11, 60\n\tmov     [rdi+rcx*8+16], r10\n\tmov     [rdi+rcx*8], r8\n\tmov     r8, [rsi+rcx*8+24]\n\tlea     rdx, [rax+r8*2]\n\tlea     r11, [r11+rdx*8]\n\tshr     r8, 60\n\tmov     [rdi+rcx*8+24], r11\n\tadd     rcx, 4\n\tmov     [rdi+rcx*8+8-32], r9\n\tjnc     .1\n.2:\tcmp     rcx, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r9, [rsi+rcx*8]\n\tlea     rdx, [rax+r9*2]\n\tlea     r8, [r8+rdx*8]\n\tshr     r9, 60\n\tmov     r10, [rsi+rcx*8+8]\n\tlea     rdx, [rax+r10*2]\n\tlea     r9, [r9+rdx*8]\n\tshr     r10, 60\n\tmov     r11, [rsi+rcx*8+16]\n\tlea     rdx, [rax+r11*2]\n\tlea     r10, [r10+rdx*8]\n\tshr     r11, 60\n\tmov     [rdi+rcx*8+16], r10\n\tmov     [rdi+rcx*8], r8\n\tmov     rax, r11\n\tmov     [rdi+rcx*8+8], r9\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tmov     r9, [rsi+rcx*8]\n\tlea     rdx, [rax+r9*2]\n\tlea     r8, [r8+rdx*8]\n\tshr     r9, 60\n\tmov     r10, [rsi+rcx*8+8]\n\tlea     rdx, [rax+r10*2]\n\tlea     r9, [r9+rdx*8]\n\tshr     r10, 60\n\tmov     rax, r10\n\tmov     [rdi+rcx*8], r8\n\tmov     [rdi+rcx*8+8], r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.5:\tmov     r9, [rsi+rcx*8]\n\tlea     rdx, [rax+r9*2]\n\tlea     r8, [r8+rdx*8]\n\tshr     r9, 60\n\tmov     [rdi+rcx*8], r8\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6:\tmov     rax, r8\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/lshift5.asm",
    "content": "; PROLOGUE(mpn_lshift5)\n\n;  mpn_lshift5\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  mp_limb_t mpn_lshift3(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_lshift5, 0, reg_save_list\n    mov     rax, r8\n\tlea     rsi, [rdx+rax*8-24]\n\tlea     rdi, [rcx+rax*8-24]\n\tmov     ecx, 3\n\tsub     rcx, rax\n\tmov     r8, 0\n\tmov     rax, 0\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmov     r9, [rsi+rcx*8]\n\tlea     rdx, [rax+r9*4]\n\tlea     r8, [r8+rdx*8]\n\tshr     r9, 59\n\tmov     r10, [rsi+rcx*8+8]\n\tlea     rdx, [rax+r10*4]\n\tlea     r9, [r9+rdx*8]\n\tshr     r10, 59\n\tmov     r11, [rsi+rcx*8+16]\n\tlea     rdx, [rax+r11*4]\n\tlea     r10, [r10+rdx*8]\n\tshr     r11, 59\n\tmov     [rdi+rcx*8+16], r10\n\tmov     [rdi+rcx*8], r8\n\tmov     r8, [rsi+rcx*8+24]\n\tlea     rdx, [rax+r8*4]\n\tlea     r11, [r11+rdx*8]\n\tshr     r8, 59\n\tmov     [rdi+rcx*8+24], r11\n\tadd     rcx, 4\n\tmov     [rdi+rcx*8+8-32], r9\n\tjnc     .1\n.2:\tcmp     rcx, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r9, [rsi+rcx*8]\n\tlea     rdx, [rax+r9*4]\n\tlea     r8, [r8+rdx*8]\n\tshr     r9, 59\n\tmov     r10, [rsi+rcx*8+8]\n\tlea     rdx, [rax+r10*4]\n\tlea     r9, [r9+rdx*8]\n\tshr     r10, 59\n\tmov     r11, [rsi+rcx*8+16]\n\tlea     rdx, [rax+r11*4]\n\tlea     r10, [r10+rdx*8]\n\tshr     r11, 59\n\tmov     [rdi+rcx*8+16], r10\n\tmov     [rdi+rcx*8], r8\n\tmov     rax, r11\n\tmov     [rdi+rcx*8+8], r9\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tmov     r9, [rsi+rcx*8]\n\tlea     rdx, [rax+r9*4]\n\tlea     r8, [r8+rdx*8]\n\tshr     r9, 59\n\tmov     r10, [rsi+rcx*8+8]\n\tlea     rdx, [rax+r10*4]\n\tlea     r9, [r9+rdx*8]\n\tshr     r10, 59\n\tmov     rax, r10\n\tmov     [rdi+rcx*8], r8\n\tmov     [rdi+rcx*8+8], r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.5:\tmov     r9, [rsi+rcx*8]\n\tlea     rdx, [rax+r9*4]\n\tlea     r8, [r8+rdx*8]\n\tshr     r9, 59\n\tmov     [rdi+rcx*8], r8\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6:\tmov     rax, r8\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/lshift6.asm",
    "content": "; PROLOGUE(mpn_lshift6)\n\n;  mpn_lshift6\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  mp_limb_t mpn_lshift3(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_lshift6, 0, reg_save_list\n    mov     rax, r8\n\tlea     rsi, [rdx+rax*8-24]\n\tlea     rdi, [rcx+rax*8-24]\n\tmov     ecx, 3\n\tsub     rcx, rax\n\tmov     r8, 0\n\tmov     rax, 0\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmov     r9, [rsi+rcx*8]\n\tlea     rdx, [rax+r9*8]\n\tlea     r8, [r8+rdx*8]\n\tshr     r9, 58\n\tmov     r10, [rsi+rcx*8+8]\n\tlea     rdx, [rax+r10*8]\n\tlea     r9, [r9+rdx*8]\n\tshr     r10, 58\n\tmov     r11, [rsi+rcx*8+16]\n\tlea     rdx, [rax+r11*8]\n\tlea     r10, [r10+rdx*8]\n\tshr     r11, 58\n\tmov     [rdi+rcx*8+16], r10\n\tmov     [rdi+rcx*8], r8\n\tmov     r8, [rsi+rcx*8+24]\n\tlea     rdx, [rax+r8*8]\n\tlea     r11, [r11+rdx*8]\n\tshr     r8, 58\n\tmov     [rdi+rcx*8+24], r11\n\tadd     rcx, 4\n\tmov     [rdi+rcx*8+8-32], r9\n\tjnc     .1\n.2:\tcmp     rcx, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r9, [rsi+rcx*8]\n\tlea     rdx, [rax+r9*8]\n\tlea     r8, [r8+rdx*8]\n\tshr     r9, 58\n\tmov     r10, [rsi+rcx*8+8]\n\tlea     rdx, [rax+r10*8]\n\tlea     r9, [r9+rdx*8]\n\tshr     r10, 58\n\tmov     r11, [rsi+rcx*8+16]\n\tlea     rdx, [rax+r11*8]\n\tlea     r10, [r10+rdx*8]\n\tshr     r11, 58\n\tmov     [rdi+rcx*8+16], r10\n\tmov     [rdi+rcx*8], r8\n\tmov     rax, r11\n\tmov     [rdi+rcx*8+8], r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4:\tmov     r9, [rsi+rcx*8]\n\tlea     rdx, [rax+r9*8]\n\tlea     r8, [r8+rdx*8]\n\tshr     r9, 58\n\tmov     r10, [rsi+rcx*8+8]\n\tlea     rdx, [rax+r10*8]\n\tlea     r9, [r9+rdx*8]\n\tshr     r10, 58\n\tmov     rax, r10\n\tmov     [rdi+rcx*8], r8\n\tmov     [rdi+rcx*8+8], r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.5:\tmov     r9, [rsi+rcx*8]\n\tlea     rdx, [rax+r9*8]\n\tlea     r8, [r8+rdx*8]\n\tshr     r9, 58\n\tmov     [rdi+rcx*8], r8\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6:\tmov     rax, r8\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/lshiftc.asm",
    "content": "; PROLOGUE(mpn_lshiftc)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void lshiftc(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;                  rdi     rsi        rdx      rcx\n;                  rcx     rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_lshiftc\n\tmov     r9d, r9d\n\tmovq    mm0, r9\n\tmov     rax, 64\n\tsub     rax, r9\n\tpcmpeqb mm6, mm6\n\tmovq    mm1, rax\n\tlea     rdx, [rdx+8]\n\tlea     rcx, [rcx+8]\n\tsub     r8, 5\n\tmovq    mm5, [rdx+r8*8+24]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tmovq    rax, mm5\n\tpsllq   mm3, mm0\n\tjc      .2\n\t\n\txalign  16\n.1: movq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    mm2, [rdx+r8*8]\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tmovq    mm4, mm2\n\tmovq    mm5, [rdx+r8*8-8]\n\tsub     r8, 4\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+40], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+32], mm4\n\tpsllq   mm3, mm0\n\tjnc     .1\n.2: cmp     r8, -2\n\tjz      .4\n\tjp      .5\n\tjs      .6\n.3:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    mm2, [rdx+r8*8]\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+8], mm3\n\tpsllq   mm4, mm0\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8], mm4\n\temms\n\tret\n\n\txalign  16\n.4:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+8], mm3\n\temms\n\tret\n\n\txalign  16\n.5:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\temms\n\tret\n\n\txalign  16\n.6:\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\temms\n\tret\n\t\n"
  },
  {
    "path": "mpn/x86_64w/k8/mod_1_1.asm",
    "content": "; PROLOGUE(mpn_mod_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_1(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r13\n\n    FRAME_PROC mpn_mod_1_1, 0, reg_save_list\n    mov     rsi, rdx\n    mov     rdx, r8\n    \n\tmov     r13, [rsi+rdx*8-8]\n\tmov     rax, [rsi+rdx*8-16]\n\tmov     r8, [r9]\n\tmov     r9, [r9+8]\n\tmov     rdi, rdx\n\tsub     rdi, 2\n\t\n\txalign  16\n.1:\tmov     r10, [rsi+rdi*8-8]\n\tmul     r8\n\tadd     r10, rax\n\tmov     r11, 0\n\tadc     r11, rdx\n\tmov     rax, r13\n\tmul     r9\n\tadd     rax, r10\n\tmov     r13, r11\n\tadc     r13, rdx\n\tdec     rdi\n\tjnz     .1\n\n\tmov     [rcx], rax\n\tmov     rax, r8\n\tmul     r13\n\tadd     [rcx], rax\n\tadc     rdx, 0\n\tmov     [rcx+8], rdx\n\tEND_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/k8/mod_1_2.asm",
    "content": "; PROLOGUE(mpn_mod_1_2)\n\n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n        FRAME_PROC mpn_mod_1_2, 0, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n\n        mov     r14, [rsi+rdx*8-8]\n        mov     r13, [rsi+rdx*8-16]\n        mov     r8, [r9]\n        mov     r10, [r9+16]\n        mov     r9, [r9+8]\n        mov     rcx, rdx\n        mov     rax, [rsi+rdx*8-24]\n        mul     r8\n        mov     r11, [rsi+rcx*8-32]\n        xor     r12, r12\n        sub     rcx, 6\n        jc      .2\n    \n        align   16\n.1:     add     r11, rax\n        adc     r12, rdx\n        mov     rax, r9\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r10\n        mul     r14\n        add     r13, rax\n        mov     rax, [rsi+rcx*8+8]\n        mov     r14, r12\n        adc     r14, rdx\n        mul     r8\n        mov     r12d, 0\n        mov     r11, [rsi+rcx*8+0]\n        sub     rcx, 2\n        jnc     .1\n.2:     add     r11, rax\n        adc     r12, rdx\n        mov     rax, r9\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r10\n        mul     r14\n        add     r13, rax\n        mov     r14, r12\n        adc     r14, rdx\n        cmp     rcx, -2\n        je      .4\n.3:     mov     r11, [rsi+rcx*8+8]\n        xor     r12, r12\n        mov     rax, r8\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r9\n        mul     r14\n        add     r13, rax\n        mov     r14, r12\n        adc     r14, rdx\n.4:     mov     rax, r8\n        mul     r14\n        add     r13, rax\n        adc     rdx, 0\n        mov     [rdi], r13\n        mov     [rdi+8], rdx\n    \tEND_PROC reg_save_list\n\t\n\t    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/mod_1_3.asm",
    "content": "; PROLOGUE(mpn_mod_1_3)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_3(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n;\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\n;\twhere (rcx,4)  contains B^i % divisor\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14, r15\n\n    FRAME_PROC mpn_mod_1_3, 0, reg_save_list\n    mov     rsi, rdx\n    mov     rdi, r8\n\tmov     r15, [rsi+rdi*8-8]\n\tmov     r14, [rsi+rdi*8-16]\n\tmov     rax, [rsi+rdi*8-32]\n\tmov     r12, [rsi+rdi*8-40]\n\tmov     r8, [r9]\n\tmov     r10, [r9+16]\n\tmov     r11, [r9+24]\n\tmov     r9, [r9+8]\n\tsub     rdi, 8\n\tjc      .2\n\t\n; // r15 r14 -8() -16()=rax -24()=r12\n\txalign  16\n.1:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+0]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+8]\n\tadc     r15, rdx\n\tsub     rdi, 3\n\tjnc     .1\n\n; // we have loaded up the next two limbs\n; // but because they are out of order we can have to do 3 limbs min\n.2:\tcmp     rdi, -2\n\tjl      .5\n\tje      .4\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n.3:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+8]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+16]\n\tadc     r15, rdx\n\t; // r15 r14 rax r12\n\tmov     r13, 0\n\tmul     r8\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n\txalign  16\n.4:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+16]\n\tmov     r15, r13\n\tadc     r15, rdx\n\t; // r15 r14 r12\n\tmov     r13, 0\n\tmov     rax, r8\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\t\n\t; // one more is 3 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12 \n\txalign  16\n.5:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r11\n\tmul     r15\n\tadd     r12, rax\n\tmov     r15, r13\n\tadc     r15, rdx\n\tmov     rax, r8\n\tmul     r15\n.6:\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rcx], r12\n\tmov     [rcx+8], rdx\n    END_PROC reg_save_list\n    \n    end\n    \n"
  },
  {
    "path": "mpn/x86_64w/k8/mul_1.asm",
    "content": "; PROLOGUE(mpn_mul_1)\n\n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2011 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_mul_1c(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx         r8\n;  rax                     rcx     rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12\n\n        LEAF_PROC mpn_mul_1\n        mov     rax, [rdx]\n        cmp     r8, 1\n        jne     .1\n        mul     r9\n        mov     [rcx], rax\n        mov     rax, rdx\n        ret\n\n.1:     FRAME_PROC ?mpn_k8_mul, 0, reg_save_list\n        mov     r11, 5\n        lea     rsi, [rdx+r8*8-40]\n        lea     rdi, [rcx+r8*8-40]\n        mov     rcx, r9\n        sub     r11, r8\n        mul     rcx\n        db      0x26\n        mov     r8, rax\n        db      0x26\n        mov     rax, [rsi+r11*8+8]\n        db      0x26\n        mov     r9, rdx\n        db      0x26\n        cmp     r11, 0\n        db      0x26\n        mov     [rsp-8], r12\n        db      0x26\n        jge     .2\n.1:     xor     r10, r10\n        mul     rcx\n        mov     [rdi+r11*8], r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+r11*8+16]\n        mul     rcx\n        mov     [rdi+r11*8+8], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+r11*8+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        mov     [rdi+r11*8+16], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+r11*8+32]\n        mul     rcx\n        mov     [rdi+r11*8+24], r12\n        add     r8, rax\n        adc     r9, rdx\n        add     r11, 4\n        mov     rax, [rsi+r11*8+8]\n        jnc     .1\n.2:     xor     r10, r10\n        mul     rcx\n        mov     [rdi+r11*8], r8\n        add     r9, rax\n        adc     r10, rdx\n        cmp     r11, 2\n        ja      .5\n        jz      .4\n        jp      .3\n        mov     rax, [rsi+16]\n        mul     rcx\n        mov     [rdi+8], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        mov     [rdi+16], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+32]\n        mul     rcx\n        mov     [rdi+24], r12\n        add     r8, rax\n        adc     r9, rdx\n        mov     [rdi+32], r8\n        mov     rax, r9\n        EXIT_PROC   reg_save_list\n\n.3:     mov     rax, [rsi+24]\n        mul     rcx\n        mov     [rdi+16], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+32]\n        xor     r8, r8\n        mul     rcx\n        mov     [rdi+24], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     [rdi+32], r12\n        mov     rax, r8\n        EXIT_PROC   reg_save_list\n\n        align   16\n.4:     mov     rax, [rsi+32]\n        mul     rcx\n        mov     [rdi+24], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     [rdi+32], r10\n        mov     rax, r12\n        EXIT_PROC   reg_save_list\n\n.5:     mov     [rdi+32], r9\n        mov     rax, r10\n        END_PROC   reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/k8/mul_2.asm",
    "content": "; PROLOGUE(mpn_mul_2)\n\n;  X86_64 mpn_mul_2\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx\n;  rax                     rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi, rbx\n\n    BITS 64\n\n\tFRAME_PROC mpn_mul_2, 0, reg_save_list\n\tmov     rax, r8\n\t\n\tmov     r8, [r9]\n\tlea     rsi, [rdx+rax*8-24]\n\tlea     rdi, [rcx+rax*8-24]\n\tmov     rcx, [r9+8]\n\tmov     rbx, 3\n\tsub     rbx, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tmov     r11, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1: mov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r10, 0\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r9, rdx\n\tmul     r8\n\tadd     r11, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 3\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     rbx, 1\n\tja      .5\n\tje      .4\n.3:\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4:\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.6:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/mul_basecase.asm",
    "content": "; PROLOGUE(mpn_mul_basecase)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40] \n\n%include \"yasm_mac.inc\"\n\n%macro addmul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi+rbx*8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi+rbx*8+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+rbx*8+24], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    add     [rdi], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+8], r10\n    adc     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-16], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi-8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+8], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi+rbx*8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+24], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+24], r12\n    add     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    mov     [rdi], r12\n    add     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-16], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi-8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+8], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n    xalign  16\n%%1:\n    mov     r10, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    mul     r8\n    mov     [rdi+rbx*8], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r10\n    db      0x26\n    add     r11, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+rbx*8+24]\n    mul     r8\n    mov     [rdi+rbx*8+16], r11\n    db      0x26\n    add     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     rbx, 4\n    mov     rax, [rsi+rbx*8]\n    jnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n    mov     rax, [rsi+8]\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mov     r12d, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r11\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n    mov     rax, [rsi+16]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     [rdi+24], r10\n    mov     [rdi+32], r11\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n    mov     [rdi+24], r9\n    mov     [rdi+32], r10\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n    jz      %%2\n    xalign  16\n%%1:addmul2pro%1\n    addmul2lp\n    addmul2epi%1\n    jnz     %%1\n%%2:\n%endmacro\n\n%macro oldmulnext0 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    mov     [rdi+r11*8+24], rbx\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+32], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+r11*8+40], rdx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+24]\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+24], r12\n    mov     [rdi+r11*8+32], rdx\n    inc     r8\n    lea     rdi, [rdi+8]\n    mov     r11, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     [rdi+r11*8+16], r10\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n    mov     [rdi+r11*8+8], r9\n    mov     [rdi+r11*8+16], r10\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    db      0x26\n    mov     r9, rdx\n    lea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    mul     r13\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+24]\n    mov     r12d, 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+32]\n    mul     r13\n    add     [rdi+24], rbx\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+32], r12\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n    mov     r13, [rcx+r8*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+24]\n    mul     r13\n    lea     rdi, [rdi+8]\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     r12d, 0\n    mov     rax, [rsi+32]\n    adc     r12, rdx\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+24], r12\n    adc     rdx, 0\n    mov     [rdi+32], rdx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n    mov     r13, [rcx+r8*8]\n    lea     rdi, [rdi+8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     ebx, 0\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    adc     rbx, rdx\n    mov     rax, [rsi+r14*8]\n    add     [rdi+r11*8+16], r10\n    adc     rbx, 0\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    db      0x26\n    lea     rdi, [rdi+8]\n    db      0x26\n    mov     r9, rdx\n    mov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     rdx, 0\n    add     [rdi+32], r9\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n    oldmulnext%1\n    jz      %%2\n    xalign  16\n%%1:oldaddmulpro%1\n    oldaddmulnext%1\n    jnz     %%1\n%%2:\n%endmacro\n\n    CPU  Core2\n    BITS 64\n\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40] \n\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14\n\n    LEAF_PROC mpn_mul_basecase\n    ; the current mul does not handle case one\n    cmp     r8d, 4\n    jg      fiveormore\n    cmp     r8d, 1\n    je      one\n\n    WIN64_GCC_PROC mpn_k8_mbc1, 5, frame\n\n    mov     r14, 5\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-40]\n    lea     rcx, [rcx+r8*8]\n    neg     r8\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rax, [rsi+r14*8]\n    mov     r13, [rcx+r8*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    mov     r10d, 0\n    mul     r13\n    mov     [rdi+r11*8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     r11, 2\n    ja      .4\n    jz      .3\n    jp      .2\n.1:\toldmpn_muladdmul_1_int 0\n    jmp     .5\n.2:\toldmpn_muladdmul_1_int 1\n    jmp     .5\n.3:\toldmpn_muladdmul_1_int 2\n    jmp     .5\n.4:\toldmpn_muladdmul_1_int 3\n.5:\tWIN64_GCC_END frame\n\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\n%undef  reg_save_list\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14, r15\n\n    xalign  16\nfiveormore:\n    WIN64_GCC_PROC mpn_k8_mbc2, 5, frame\n    movsxd  rdx, edx\n    movsxd  r8, r8d\n\n    mov     r14, 4\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-32]\n    lea     rsi, [rsi+rdx*8-32]\n    mov     r13, rcx\n    mov     r15, r8\n    lea     r13, [r13+r15*8]\n    neg     r15\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    bt      r15, 0\n    jnc     .12\n.6:\tinc     rbx\n    mov     r8, [r13+r15*8]\n    mul     r8\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     rbx, 0\n    jge     .7\n    mul1lp\n.7:\tmov     r10d, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     rbx, 2\n    ja      .11\n    jz      .10\n    jp      .9\n.8:\tmulnext0\n    jmp     .20\n.9:\tmulnext1\n    jmp     .14\n.10:mulnext2\n    jmp     .16\n.11:mulnext3\n    jmp     .18\n     ; as all the mul2pro? are the same\n.12:mul2pro0\n    mul2lp\n    cmp     rbx, 2\n    ja      .19\n    jz      .17\n    jp      .15\n.13:mul2epi3\n.14:mpn_addmul_2_int 3\n    WIN64_GCC_EXIT frame\n\n.15:mul2epi2\n.16:mpn_addmul_2_int 2\n    WIN64_GCC_EXIT frame\n\n.17:mul2epi1\n.18:mpn_addmul_2_int 1\n    WIN64_GCC_EXIT frame\n\n.19:mul2epi0\n.20:mpn_addmul_2_int 0\n\n    xalign  16\n.21:WIN64_GCC_END frame\n\n    xalign  16\none:mov     rax, [rdx]\n    mul     qword [r9]\n    mov     [rcx], rax\n    mov     [rcx+8], rdx\n    ret\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/nand_n.asm",
    "content": "; PROLOGUE(mpn_nand_n)\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_nand_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n   CPU  Athlon64\n    BITS 64\n\n %define reg_save_list rbx, rsi, rdi\n\n    FRAME_PROC mpn_nand_n, 0, reg_save_list\n\tlea     rdi, [rcx+r9*8]\n\tlea     rsi, [rdx+r9*8]\n\tlea     rdx, [r8+r9*8]\n    mov     rcx, r9\n\tneg     rcx\n\tadd     rcx, 3\n\tjc      .2\n\tmov     r8, [rdx+rcx*8-24]\n\tmov     r9, [rdx+rcx*8-16]\n\tmov     r8, [rdx+rcx*8-24]\n\tmov     r9, [rdx+rcx*8-16]\n\tadd     rcx, 4\n\tmov     r10, [rdx+rcx*8-40]\n\tmov     r11, [rdx+rcx*8-32]\n\tjc      .1\n\txalign  16\n.0:\n\tand     r8, [rsi+rcx*8-56]\n\tnot     r8\n\tand     r9, [rsi+rcx*8-48]\n\tand     r10, [rsi+rcx*8-40]\n\tand     r11, [rsi+rcx*8-32]\n\tmov     [rdi+rcx*8-56], r8\n\tnot     r9\n\tnot     r10\n\tmov     [rdi+rcx*8-48], r9\n\tnot     r11\n\tmov     r8, [rdx+rcx*8-24]\n\tmov     r9, [rdx+rcx*8-16]\n\tmov     [rdi+rcx*8-40], r10\n\tmov     [rdi+rcx*8-32], r11\n\tadd     rcx, 4\n\tmov     r10, [rdx+rcx*8-40]\n\tmov     r11, [rdx+rcx*8-32]\n\tjnc     .0\n.1:\n\tand     r8, [rsi+rcx*8-56]\n\tnot     r8\n\tand     r9, [rsi+rcx*8-48]\n\tand     r10, [rsi+rcx*8-40]\n\tand     r11, [rsi+rcx*8-32]\n\tmov     [rdi+rcx*8-56], r8\n\tnot     r9\n\tnot     r10\n\tmov     [rdi+rcx*8-48], r9\n\tnot     r11\n\tmov     [rdi+rcx*8-40], r10\n\tmov     [rdi+rcx*8-32], r11\n.2:\n\tcmp     rcx, 2\n\tjg      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r8, [rdx-24]\n\tand     r8, [rsi-24]\n\tnot     r8\n\tmov     [rdi-24], r8\n.4:\tmov     r8, [rdx-16]\n\tand     r8, [rsi-16]\n\tnot     r8\n\tmov     [rdi-16], r8\n.5:\tmov     r8, [rdx-8]\n\tand     r8, [rsi-8]\n\tnot     r8\n\tmov     [rdi-8], r8\n.6:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/nior_n.asm",
    "content": "; PROLOGUE(mpn_nior_n)\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_nior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n %define reg_save_list rbx, rsi, rdi\n\n    FRAME_PROC mpn_nior_n, 0, reg_save_list\n\tlea     rdi, [rcx+r9*8]\n\tlea     rsi, [rdx+r9*8]\n\tlea     rdx, [r8+r9*8]\n    mov     rcx, r9\n\tneg     rcx\n\tadd     rcx, 3\n\tjc      .2\n\tmov     r8, [rdx+rcx*8-24]\n\tmov     r9, [rdx+rcx*8-16]\n\tmov     r8, [rdx+rcx*8-24]\n\tmov     r9, [rdx+rcx*8-16]\n\tadd     rcx, 4\n\tmov     r10, [rdx+rcx*8-40]\n\tmov     r11, [rdx+rcx*8-32]\n\tjc      .1\n\txalign  16\n.0:\n\tor      r8, [rsi+rcx*8-56]\n\tnot     r8\n\tor      r9, [rsi+rcx*8-48]\n\tor      r10, [rsi+rcx*8-40]\n\tor      r11, [rsi+rcx*8-32]\n\tmov     [rdi+rcx*8-56], r8\n\tnot     r9\n\tnot     r10\n\tmov     [rdi+rcx*8-48], r9\n\tnot     r11\n\tmov     r8, [rdx+rcx*8-24]\n\tmov     r9, [rdx+rcx*8-16]\n\tmov     [rdi+rcx*8-40], r10\n\tmov     [rdi+rcx*8-32], r11\n\tadd     rcx, 4\n\tmov     r10, [rdx+rcx*8-40]\n\tmov     r11, [rdx+rcx*8-32]\n\tjnc     .0\n.1:\n\tor      r8, [rsi+rcx*8-56]\n\tnot     r8\n\tor      r9, [rsi+rcx*8-48]\n\tor      r10, [rsi+rcx*8-40]\n\tor      r11, [rsi+rcx*8-32]\n\tmov     [rdi+rcx*8-56], r8\n\tnot     r9\n\tnot     r10\n\tmov     [rdi+rcx*8-48], r9\n\tnot     r11\n\tmov     [rdi+rcx*8-40], r10\n\tmov     [rdi+rcx*8-32], r11\n.2:\n\tcmp     rcx, 2\n\tjg      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r8, [rdx-24]\n\tor      r8, [rsi-24]\n\tnot     r8\n\tmov     [rdi-24], r8\n.4:\tmov     r8, [rdx-16]\n\tor      r8, [rsi-16]\n\tnot     r8\n\tmov     [rdi-16], r8\n.5:\tmov     r8, [rdx-8]\n\tor      r8, [rsi-8]\n\tnot     r8\n\tmov     [rdi-8], r8\n.6:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/not.asm",
    "content": "; PROLOGUE(mpn_not)\n\n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2011 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_not(mp_ptr, mp_size_t)\n;                  rdi        rsi\n;                  rcx        rdx\n\n%include \"yasm_mac.inc\"\n\n        CPU  Athlon64\n        BITS 64\n        LEAF_PROC mpn_not\n        mov     eax, 1\n        lea     rcx, [rcx+rdx*8-8]\n        sub     rax, rdx\n        jnc     .2\n\n        align   16\n.1:     not     qword[rcx+rax*8]\n        not     qword[rcx+rax*8+8]\n        add     rax, 2\n        jnc     .1\n.2:     jnz     .4\n.3:     not     qword[rcx+rax*8]\n.4:     ret     \n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/k8/popcount.asm",
    "content": "; PROLOGUE(mpn_popcount)\n\n;  mpn_popcount\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmpn_limb_t mpn_popcount(mp_ptr,mp_size_t)\n;\trax                        rdi,      rsi\n;\trax                        rcx,      rdx\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list   rsi, rdi, r12, r14, r15\n\n\tFRAME_PROC mpn_popcount, 0, reg_save_list\n\tmov\trdi, rcx\n\tmov   \trsi, rdx\n\n\tmov     r8, 0x5555555555555555\n\tmov     r9, 0x3333333333333333\n\tmov     r10, 0x0f0f0f0f0f0f0f0f\n\tmov     r11, 0x0101010101010101\n\txor     rax, rax\n\tsub     rsi, 3\n\tjc      .2\n\tmov     rcx, [rdi+rsi*8+16]\n\tmov     r12, [rdi+rsi*8+8]\n\tmov     r14, [rdi+rsi*8]\n\tsub     rsi, 3\n\tjc      .1\n\txalign  16\n.0:\n\tmov     rdx, rcx\n\tshr     rcx, 1\n\tand     rcx, r8\n\tsub     rdx, rcx\n\tmov     rcx, rdx\n\tshr     rdx, 2\n\tand     rcx, r9\n\tand     rdx, r9\n\tadd     rdx, rcx\n\tmov     rcx, r12\n\tshr     r12, 1\n\tand     r12, r8\n\tsub     rcx, r12\n\tmov     r12, rcx\n\tshr     rcx, 2\n\tand     r12, r9\n\tand     rcx, r9\n\tadd     r12, rcx\n\tmov     r15, r14\n\tshr     r14, 1\n\tand     r14, r8\n\tmov     rcx, [rdi+rsi*8+16]\n\tsub     r15, r14\n\tmov     r14, r15\n\tshr     r15, 2\n\tand     r14, r9\n\tand     r15, r9\n\tadd     r15, r14\n\tadd     r12, rdx\n\tadd     r12, r15\n\tmov     r14, r12\n\tshr     r12, 4\n\tand     r14, r10\n\tand     r12, r10\n\tadd     r14, r12\n\timul    r14, r11\n\tshr     r14, 56\n\tadd     rax, r14\n\tmov     r12, [rdi+rsi*8+8]\n\tsub     rsi, 3\n\tmov     r14, [rdi+rsi*8+24-0]\n\tjnc     .0\n.1:\n\tmov     rdx, rcx\n\tshr     rcx, 1\n\tand     rcx, r8\n\tsub     rdx, rcx\n\tmov     rcx, rdx\n\tshr     rdx, 2\n\tand     rcx, r9\n\tand     rdx, r9\n\tadd     rdx, rcx\n\tmov     rcx, r12\n\tshr     r12, 1\n\tand     r12, r8\n\tsub     rcx, r12\n\tmov     r12, rcx\n\tshr     rcx, 2\n\tand     r12, r9\n\tand     rcx, r9\n\tadd     r12, rcx\n\tmov     r15, r14\n\tshr     r14, 1\n\tand     r14, r8\n\tsub     r15, r14\n\tmov     r14, r15\n\tshr     r15, 2\n\tand     r14, r9\n\tand     r15, r9\n\tadd     r15, r14\n\tadd     r12, rdx\n\tadd     r12, r15\n\tmov     r14, r12\n\tshr     r12, 4\n\tand     r14, r10\n\tand     r12, r10\n\tadd     r14, r12\n\timul    r14, r11\n\tshr     r14, 56\n\tadd     rax, r14\n.2:\tcmp     rsi, -2\n\tjl      .5\n\tjz      .4\n.3:\n\tmov     rcx, [rdi+rsi*8+16]\n\tmov     rdx, rcx\n\tshr     rcx, 1\n\tand     rcx, r8\n\tsub     rdx, rcx\n\tmov     rcx, rdx\n\tshr     rdx, 2\n\tand     rcx, r9\n\tand     rdx, r9\n\tadd     rdx, rcx\n\tmov     r14, rdx\n\tshr     rdx, 4\n\tand     r14, r10\n\tand     rdx, r10\n\tadd     r14, rdx\n\timul    r14, r11\n\tshr     r14, 56\n\tadd     rax, r14\n\tdec     rsi\n.4:\tmov     rcx, [rdi+rsi*8+16]\n\tmov     rdx, rcx\n\tshr     rcx, 1\n\tand     rcx, r8\n\tsub     rdx, rcx\n\tmov     rcx, rdx\n\tshr     rdx, 2\n\tand     rcx, r9\n\tand     rdx, r9\n\tadd     rdx, rcx\n\tmov     r14, rdx\n\tshr     rdx, 4\n\tand     r14, r10\n\tand     rdx, r10\n\tadd     r14, rdx\n\timul    r14, r11\n\tshr     r14, 56\n\tadd     rax, r14\n.5:\tEND_PROC reg_save_list\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/k8/redc_1.asm",
    "content": "; PROLOGUE(mpn_redc_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_redc_1(mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                rdi     rsi        rdx        rcx        r8\n;  rax                rcx     rdx         r8         r9  [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    CPU  Athlon64\n    BITS 64\n\n%macro mpn_add 0\n\n    mov     rax, rcx\n    and     rax, 3\n    shr     rcx, 2\n    cmp     rcx, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    add     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n    jmp     %%2\n\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    adc     r11, [rdx]\n    adc     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rdi], r11\n    mov     [rdi+8], r8\n    lea     rdi, [rdi+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    adc     r9, [rdx-16]\n    adc     r10, [rdx-8]\n    mov     [rdi-16], r9\n    dec     rcx\n    mov     [rdi-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    adc     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n%%2:\n\n%endmacro\n\n%macro mpn_sub 0\n\n    mov     rax, rbp\n    and     rax, 3\n    shr     rbp, 2\n    cmp     rbp, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    sub     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n    jmp     %%2\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    sbb     r11, [rdx]\n    sbb     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rbx], r11\n    mov     [rbx+8], r8\n    lea     rbx, [rbx+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    sbb     r9, [rdx-16]\n    sbb     r10, [rdx-8]\n    mov     [rbx-16], r9\n    dec     rbp\n    mov     [rbx-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    sbb     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n%%2:\n\n%endmacro\n\n%macro addmulloop 1\n\n    xalign  16\n%%1:mov     r10, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    db      0x26\n    adc     rbx, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    db      0x26\n    adc     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     r11, 4\n    mov     rax, [rsi+r11*8+8]\n    jnc     %%1\n\n%endmacro\n\n%macro addmulpropro0 0\n\n    imul    r13, rcx\n    lea     r8, [r8-8]\n\n%endmacro\n\n%macro addmulpro0 0\n\n    mov     r11, r14\n    lea     r8, [r8+8]\n    mov     rax, [rsi+r14*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext0 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mov     r9d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, rax\n    adc     r9, rdx\n    imul    r13, rcx\n    add     [r8+r11*8+32], r12\n    adc     r9, 0\n    dec     r15\n    mov     [r8+r14*8], r9\n\n%endmacro\n\n%macro addmulpropro1 0\n\n%endmacro\n\n%macro addmulpro1 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext1 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, 0\n    dec     r15\n    mov     [r8+r14*8], r12\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro2 0\n\n%endmacro\n\n%macro addmulpro2 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext2 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     r13, [r8+r14*8+8]\n    add     [r8+r11*8+16], r10\n    adc     rbx, 0\n    mov     [r8+r14*8], rbx\n    dec     r15\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro3 0\n\n%endmacro\n\n%macro addmulpro3 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext3 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    add     [r8+r11*8+8], r9\n    adc     r10, 0\n    mov     r13, [r8+r14*8+8]\n    mov     [r8+r14*8], r10\n    lea     r8, [r8+8]\n    dec     r15\n\n%endmacro\n\n%macro mpn_addmul_1_int 1\n\n    addmulpropro%1\n    xalign  16\n%%1:addmulpro%1\n    jge     %%2\n    addmulloop %1\n%%2:addmulnext%1\n    jnz     %%1\n\n%endmacro\n\n    LEAF_PROC mpn_redc_1\n    cmp     r9, 1\n    je      one\n    FRAME_PROC ?mpn_k8_redc_1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, r8\n    mov     r8, rdx\n    mov     rdx, r9\n    mov     rcx, [rsp+stack_use+0x28]\n    mov     [rsp+stack_use+0x28], r8\n\n    mov     r14, 5\n    sub     r14, rdx\n\n    mov     [rsp+stack_use+0x10], rsi\n    mov     r8, [rsp+stack_use+0x28]\n\n    lea     r8, [r8+rdx*8-40]\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rbp, rdx\n    mov     r15, rdx\n    mov     rax, r14\n    and     rax, 3\n    mov     r13, [r8+r14*8]\n    je      .2\n    jp      .4\n    cmp     rax, 1\n    je      .3\n.1:\tmpn_addmul_1_int 2\n    jmp     .5\n\n    xalign  16\n.2:\tmpn_addmul_1_int 0\n    jmp     .5\n\n    xalign  16\n.3:\tmpn_addmul_1_int 1\n    jmp     .5\n\n    xalign  16\n.4:\tmpn_addmul_1_int 3\n\n    xalign  16\n.5:\tmov     rcx, rbp\n    mov     rdx, [rsp+stack_use+0x28]\n    lea     rsi, [rdx+rbp*8]\n    mov     rbx, rdi\n    mpn_add\n    mov     rdx, [rsp+stack_use+0x10]\n    jnc     .6\n    mov     rsi, rbx\n    mpn_sub\n.6:\tEND_PROC reg_save_list\n\n    xalign  16\none:mov     r9, rdx\n    mov     r11, [r8]\n    mov     r8, [rsp+0x28]\n\n    mov     r10, [r9]\n    imul    r8, r10\n    mov     rax, r8\n    mul     r11\n    add     rax, r10\n    adc     rdx, [r9+8]\n    cmovnc  r11, rax\n    sub     rdx, r11\n    mov     [rcx], rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/rsh1add_n.asm",
    "content": "; PROLOGUE(mpn_rsh1add_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_rsh1add_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rsh1add_n, 0, reg_save_list\n    mov     rax, r9\n\tlea     rdi, [rcx+rax*8-32]\n\tlea     rsi, [rdx+rax*8-32]\n\tlea     rdx, [r8+rax*8-32]\n\tmov     rcx, rax\n\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tadd     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     .2\n.1:\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\tadc     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     .1\n.2:\tcmp     r8, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3:\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tadc     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n.7:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/rsh1sub_n.asm",
    "content": "; PROLOGUE(mpn_rsh1sub_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  return bottom bit of difference\n;  subtraction treated as two compliment\n;\n;  mp_limb_t mpn_rsh1sub_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rsh1sub_n, 0, reg_save_list\n    mov     rax, r9\n\tlea     rdi, [rcx+rax*8-32]\n\tlea     rsi, [rdx+rax*8-32]\n\tlea     rdx, [r8+rax*8-32]\n\tmov     rcx, rax\n\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tsub     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     .2\n.1:\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\tsbb     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     .1\n.2:\tcmp     r8, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3:\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n.7:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/rsh_divrem_hensel_qr_1_1.asm",
    "content": "; PROLOGUE(mpn_rsh_divrem_hensel_qr_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rsh_divrem_hensel_qr_1_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t,  mp_int, mp_limb_t)\n;  rax                                        rdi     rsi        rdx        rcx       r8         r9\n;  rax                                        rcx     rdx         r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi\n\n\tFRAME_PROC mpn_rsh_divrem_hensel_qr_1_1, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n    mov     rcx, r9\n\tmov     rdx, r9\n\tmov     r9, 1\n\tsub     r9, rax\n    movsxd  r8, dword [rsp+stack_use+40]\n    mov     r10, qword [rsp+stack_use+48]\n    \n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, 64\n\tsub     rax, r8\n\tmovq    mm0, r8\n\tmovq    mm1, rax\n\tmov     rax, [rsi+r9*8-8]\n\tsub     rax, r10\n\tsbb     r8, r8\n\timul    rax, r11\n\tmovq    mm4, rax\n\tmovq    mm5, mm4\n\tpsrlq   mm4, mm0\n\tpsllq   mm5, mm1\n\tpsrlq   mm5, mm1\n\tmul     rcx\n\tcmp     r9, 0\n\tje      .3\n\tadd     r8, r8\n\t\n\txalign  16\n.1:\tmovq    mm2, mm4\n\tmov     rax, [rsi+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-8], mm2\n\tmul     rcx\n\tadd     r8, r8\n\tinc     r9\n\tjnz     .1\n.2:\tmovq    [rdi+r9*8-8], mm4\n\tmov     rax, 0\n\tadc     rax, rdx\n\temms\n\tEXIT_PROC reg_save_list\n\n.3:\tmovq    [rdi+r9*8-8], mm4\n\tadd     r8, r8\n\tmov     rax, 0\n\tadc     rax, rdx\n.4:\temms\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/rsh_divrem_hensel_qr_1_2.asm",
    "content": "; PROLOGUE(mpn_rsh_divrem_hensel_qr_1_2)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rsh_divrem_hensel_qr_1_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_uint, mp_limb_t)\n;  rax                                        rdi     rsi        rdx        rcx       r8         r9\n;  rax                                        rcx     rdx         r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n\tFRAME_PROC mpn_rsh_divrem_hensel_qr_1_2, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8-16]\n\tlea     rsi, [rdx+rax*8-16]\n    mov     rcx, r9\n    mov     rdx, r9\n\tmov     r9, 2\n\tsub     r9, rax    \n    mov     r8d, dword [rsp+stack_use+40]\n    mov     r10, qword [rsp+stack_use+48]\n\n\tmov     rax, 64\n\tsub     rax, r8\n\tmovq    mm0, r8\n\tmovq    mm1, rax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, r11\n\tmov     r12, r11\n\tmul     rcx\n\tneg     rdx\n\timul    r12, rdx\n\n; // for the first limb we can not store (as we have to shift) so we need to\n; // do first limb separately , we could do it as normal as an extention of\n; // the loop , but if we do it as a 1 limb inverse then we can start it\n; // eailer , ie interleave it with the calculation of the 2limb inverse\n\n\tmov     r13, r11\n\tmov     r14, r12\n\n\tmov     r11, [rsi+r9*8]\n\tsub     r11, r10\n\tsbb     r10, r10\n\n\timul    r11, r13\n\tmovq    mm2, r11\n\tpsrlq   mm2, mm0\n\tmov     rax, rcx\n\tmul     r11\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\tadd     r10, r10\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, r10\n\n; mov $0,%r10\n\tadd     r9, 2\n\tjc      .2\n\txalign  16\n.1:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n\t; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r12, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, 0\n\tadd     r9, 2\n\tjnc     .1\n.2:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tcmp     r9, 0\n\tjne     .4\n.3:\tmov     r11, [rsi+r9*8+8]\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r10, 0\n\tmov     rax, r11\n\timul    rax, r13\n\t; mov %rax,(%rdi,%r9,8)\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8], mm2\n\tmovq    [rdi+r9*8+8], mm4\n\n\tmul     rcx\n\tadd     r10, r10\n\tmov     rax, 0\n\tadc     rax, rdx\n\temms\n\tEXIT_PROC reg_save_list\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n.4:\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\tmovq    [rdi+r9*8], mm2\n\n\tmov     rax, rcx\n\tmul     rdx\n\tcmp     r8, rax\n\tmov     rax, 0\n\tadc     rax, rdx\n\tsub     rax, r10\n.5:\temms\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/rshift.asm",
    "content": "; PROLOGUE(mpn_rshift)\n\n;  Verdxon 1.1.4.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_rshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi     rsi        rdx      rcx\n;  rax                     rcx     rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_rshift\n    mov     r9d, r9d\n\tmovq    mm0, r9\n\tmov     rax, 64\n\tsub     rax, r9\n\tmovq    mm1, rax\n    mov     rax, r8\n\tmov     r8, 4\n\tlea     rdx, [rdx+rax*8-32]\n\tlea     rcx, [rcx+rax*8-32]\n\tsub     r8, rax\n\n\tmovq    mm5, [rdx+r8*8]\n\tmovq    mm3, mm5\n\tpsllq   mm5, mm1\n\tmovq    rax, mm5\n\tpsrlq   mm3, mm0\n\tjge     .2\n\n\txalign  16\n.1: movq    mm2, [rdx+r8*8+8]\n\tmovq    mm4, mm2\n\tpsllq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rcx+r8*8], mm3\n\tpsrlq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+16]\n\tmovq    mm3, mm5\n\tpsllq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rcx+r8*8+8], mm4\n\tpsrlq   mm3, mm0\n\tmovq    mm2, [rdx+r8*8+24]\n\tmovq    mm4, mm2\n\tpsllq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    mm5, [rdx+r8*8+32]\n\tmovq    [rcx+r8*8+16], mm3\n\tpsrlq   mm4, mm0\n\tmovq    mm3, mm5\n\tpsllq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rcx+r8*8+24], mm4\n\tpsrlq   mm3, mm0\n\tadd     r8, 4\n\tjnc     .1\n\n.2:\tcmp     r8, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n\n.3:\tmovq    mm2, [rdx+r8*8+8]\n\tmovq    mm4, mm2\n\tpsllq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rcx+r8*8], mm3\n\tpsrlq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+16]\n\tmovq    mm3, mm5\n\tpsllq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rcx+r8*8+8], mm4\n\tpsrlq   mm3, mm0\n\tmovq    mm2, [rdx+r8*8+24]\n\tmovq    mm4, mm2\n\tpsllq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rcx+r8*8+16], mm3\n\tpsrlq   mm4, mm0\n\tmovq    [rcx+r8*8+24], mm4\n\temms\n\tret\n\n\txalign  16\n.4: movq    mm2, [rdx+r8*8+8]\n\tmovq    mm4, mm2\n\tpsllq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rcx+r8*8], mm3\n\tpsrlq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+16]\n\tmovq    mm3, mm5\n\tpsllq   mm5, mm1\n\tpor     mm4, mm5\n\tmovq    [rcx+r8*8+8], mm4\n\tpsrlq   mm3, mm0\n\tmovq    [rcx+r8*8+16], mm3\n\temms\n\tret\n\n\txalign  16\n.5: movq    mm2, [rdx+r8*8+8]\n\tmovq    mm4, mm2\n\tpsllq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    [rcx+r8*8], mm3\n\tpsrlq   mm4, mm0\n\tmovq    [rcx+r8*8+8], mm4\n\temms\n\tret\n\n\txalign  16\n.6: movq    [rcx+r8*8], mm3\n\temms\n\tret\n\tend\n\t"
  },
  {
    "path": "mpn/x86_64w/k8/rshift1.asm",
    "content": "; PROLOGUE(mpn_rshift1)\n;  Version 1.0.4.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_rshift(mp_ptr, mp_ptr, mp_size_t)\n;  rax                     rdi     rsi        rdx\n;  rax                     rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list r12, r13\n\n    FRAME_PROC mpn_rshift1, 0, reg_save_list\n\n    xor     rax, rax\n    lea     rdx, [rdx+r8*8-8]\n    lea     rcx, [rcx+r8*8-8]\n    mov     r9, r8\n    and     r9, 7\n    inc     r9\n    shr     r8, 3\n; and clear carry flag\n    cmp     r8, 0\n    jz      .2\n\n    xalign  16\n.1:\tmov     r10, [rdx]\n    mov     r11, [rdx-8]\n    mov     r12, [rdx-16]\n    mov     r13, [rdx-24]\n    rcr     r10, 1\n    rcr     r11, 1\n    rcr     r12, 1\n    rcr     r13, 1\n    mov     [rcx], r10\n    mov     [rcx-8], r11\n    mov     [rcx-16], r12\n    mov     [rcx-24], r13\n    mov     r10, [rdx-32]\n    mov     r11, [rdx-40]\n    mov     r12, [rdx-48]\n    mov     r13, [rdx-56]\n    rcr     r10, 1\n    rcr     r11, 1\n    rcr     r12, 1\n    rcr     r13, 1\n    mov     [rcx-32], r10\n    mov     [rcx-40], r11\n    mov     [rcx-48], r12\n    mov     [rcx-56], r13\n    lea     rdx, [rdx-64]\n    dec     r8\n    lea     rcx, [rcx-64]\n    jnz     .1\n.2: dec     r9\n    jz      .3\n; Could suffer cache-bank conflicts in this tail part\n    mov     r10, [rdx]\n    rcr     r10, 1\n    mov     [rcx], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx-8]\n    rcr     r10, 1\n    mov     [rcx-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx-16]\n    rcr     r10, 1\n    mov     [rcx-16], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx-24]\n    rcr     r10, 1\n    mov     [rcx-24], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx-32]\n    rcr     r10, 1\n    mov     [rcx-32], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx-40]\n    rcr     r10, 1\n    mov     [rcx-40], r10\n    dec     r9\n    jz      .3\n    mov     r10, [rdx-48]\n    rcr     r10, 1\n    mov     [rcx-48], r10\n.3: rcr     rax, 1\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/rshift2.asm",
    "content": "; PROLOGUE(mpn_rshift2)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_rshift2(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rshift2, 0, reg_save_list\n\tlea     rsi, [rdx+24]\n\tlea     rdi, [rcx+24]\n    mov     rcx, r8\n\txor     eax, eax\n\txor     edx, edx\n\tsub     rcx, 4\n\tjc      .2\n\t\n\txalign  16\n.1:\tmov     r8, [rsi+rcx*8]\n\tmov     r9, [rsi+rcx*8-8]\n\tmov     r10, [rsi+rcx*8-16]\n\tmov     r11, [rsi+rcx*8-24]\n\tadd     rax, rax\n\trcr     r8, 1\n\trcr     r9, 1\n\trcr     r10, 1\n\trcr     r11, 1\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\trcr     r8, 1\n\trcr     r9, 1\n\trcr     r10, 1\n\trcr     r11, 1\n\tmov     [rdi+rcx*8-24], r11\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tsub     rcx, 4\n\tmov     [rdi+rcx*8+24], r9\n\tmov     [rdi+rcx*8+16], r10\n\tjnc     .1\n.2:\tcmp     rcx, -2\n\tja      .4\n\tje      .5\n\tjp      .6\n.3:\tlea     rax, [rax+rdx*2]\n\tneg     rax\n\tshl     rax, 62\n\tEXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tmov     r8, [rsi+rcx*8]\n\tmov     r9, [rsi+rcx*8-8]\n\tmov     r10, [rsi+rcx*8-16]\n\tadd     rax, rax\n\trcr     r8, 1\n\trcr     r9, 1\n\trcr     r10, 1\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\trcr     r8, 1\n\trcr     r9, 1\n\trcr     r10, 1\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tmov     [rdi+rcx*8-8], r9\n\tmov     [rdi+rcx*8-16], r10\n\tlea     rax, [rax+rdx*2]\n\tneg     rax\n\tshl     rax, 62\n\tEXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     r8, [rsi+rcx*8]\n\tmov     r9, [rsi+rcx*8-8]\n\tadd     rax, rax\n\trcr     r8, 1\n\trcr     r9, 1\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\trcr     r8, 1\n\trcr     r9, 1\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tmov     [rdi+rcx*8-8], r9\n\tlea     rax, [rax+rdx*2]\n\tneg     rax\n\tshl     rax, 62\n\tEXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tmov     r8, [rsi+rcx*8]\n\tadd     rax, rax\n\trcr     r8, 1\n\tsbb     rax, rax\n\tadd     rdx, rdx\n\trcr     r8, 1\n\tsbb     rdx, rdx\n\tmov     [rdi+rcx*8], r8\n\tlea     rax, [rax+rdx*2]\n\tneg     rax\n\tshl     rax, 62\n.7:\tEND_PROC reg_save_list\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/k8/store.asm",
    "content": "; PROLOGUE(mpn_store)\n;  Copyright 2009 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;\tmpn_store(mp_ptr, mp_size_t, mp_limb_t)\n;   r10          rdi        rsi        rdx\n;\tr10          rcx        rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n\tLEAF_PROC mpn_store\n\tmov     rax, rdx\n\tand     rax, 7\n\tmov     r9, 8\n\tsub     r9, rax\n    mov     rax, r8\n\tlea     r9, [r9+r9*4]\n\tlea     r10, [rel .0]\n\tadd     r10, r9\n\tand     rdx, -8\n\tadd     rcx, 48\n\tjmp     r10\n\txalign  16\n.0:\tmov     [rcx+rdx*8+8], rax\n\tmov     [byte rcx+rdx*8+0], rax\n\tmov     [rcx+rdx*8-8], rax\n\tmov     [rcx+rdx*8-16], rax\n\tmov     [rcx+rdx*8-24], rax\n\tmov     [rcx+rdx*8-32], rax\n\tmov     [rcx+rdx*8-40], rax\n\tmov     [rcx+rdx*8-48], rax\n\tnop\n\tsub     rdx, 8\n\tjnc     .0\n\tnop\n\tret\n\n    end"
  },
  {
    "path": "mpn/x86_64w/k8/sub_err1_n.asm",
    "content": "; PROLOGUE(mpn_sub_err1_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_sub_err1(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_ptr, mp_size_t, mp_limb_t)\n;  rax                       rdi     rsi     rdx     rcx       r8         r9     8(rsp)\n;  rax                       rcx     rdx      r8      r9 [rsp+40]   [rsp+48]   [rsp+56]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    FRAME_PROC mpn_sub_err1_n, 0, reg_save_list\n\tmov     [rsp+stack_use+32], r9\n    mov     r9, qword [rsp+stack_use+48]\n\tmov     r10, [rsp++stack_use+56]\n\tlea     rdi, [rcx+r9*8-24]\n\tlea     rsi, [rdx+r9*8-24]\n\tlea     rdx, [r8+r9*8-24]\n\tmov     r8, [rsp+stack_use+40]\n\n\tmov     r11, 3\n\tshl     r10, 63\n\tlea     r8, [r8+r9*8]\n\tsub     r11, r9\n\tmov     r9, 0\n\tmov     rax, 0\n\tmov     rbx, 0\n\tjnc     .2\n\t\n\txalign  16\n.1: mov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tmov     r14, [rsi+r11*8+16]\n\tmov     r15, [rsi+r11*8+24]\n\tmov     rbp, 0\n\tshl     r10, 1\n\tsbb     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tsbb     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\tmov     rcx, 0\n\tsbb     r14, [rdx+r11*8+16]\n\tcmovc   rcx, [r8-24]\n\tsbb     r15, [rdx+r11*8+24]\n\tcmovc   rbp, [r8-32]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tadd     r9, rcx\n\tmov     rax, 0\n\tadc     r10, 0\n\tlea     r8, [r8-32]\n\tadd     r9, rbp\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tmov     [rdi+r11*8+16], r14\n\tmov     [rdi+r11*8+24], r15\n\tmov     rbx, 0\n\tadd     r11, 4\n\tjnc     .1\n.2: cmp     r11, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tmov     r14, [rsi+r11*8+16]\n\tshl     r10, 1\n\tsbb     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tsbb     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\tmov     rcx, 0\n\tsbb     r14, [rdx+r11*8+16]\n\tcmovc   rcx, [r8-24]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tadd     r9, rcx\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tmov     [rdi+r11*8+16], r14\n    jmp     .6\n\n\txalign  16\n.4: mov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tshl     r10, 1\n\tsbb     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tsbb     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n    jmp     .6\n\n\txalign  16\n.5: mov     r12, [rsi+r11*8]\n\tshl     r10, 1\n\tsbb     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n.6:\tmov     rcx, [rsp+stack_use+32]\n\tmov     [rcx], r9\n\tbtr     r10, 63\n\tmov     [rcx+8], r10\n    mov     rax, 0\n\tadc     rax, 0\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/sub_n.asm",
    "content": "; PROLOGUE(mpn_sub_n)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  Calculate src1[size] plus(minus) src2[size] and store the result in\n;  dst[size].  The return value is the carry bit from the top of the result\n;  (1 or 0).  The _nc version accepts 1 or 0 for an initial carry into the\n;  low limb of the calculation.  Note values other than 1 or 0 here will\n;  lead to garbage results.\n;\n;  mp_limb_t  mpn_sub_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_sub_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    xalign  8\n    LEAF_PROC mpn_sub_nc\n    mov     r10,[rsp+40]\n    jmp     entry\n\n    xalign  8\n    LEAF_PROC mpn_sub_n\n    xor     r10, r10\nentry:\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    lea     r9,[r10+r9*2]\n\tsar     r9, 1\n    jnz     .2\n\n    mov     r10, [rdx]\n    sbb     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+8]\n    sbb     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+16]\n    sbb     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.1: adc     rax, rax\n    ret\n\n    xalign  8\n.2:\tmov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    sbb     r10, [r8]\n    sbb     r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     r10, [rdx-16]\n    mov     r11, [rdx-8]\n    sbb     r10, [r8-16]\n    sbb     r11, [r8-8]\n    mov     [rcx-16], r10\n    dec     r9\n    mov     [rcx-8], r11\n    jnz     .2\n\n    inc     rax\n    dec     rax\n    jz      .3\n    mov     r10, [rdx]\n    sbb     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+8]\n    sbb      r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+16]\n    sbb     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.3: adc     rax, rax\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/subadd_n.asm",
    "content": "; PROLOGUE(mpn_subadd_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_subadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx          r8\n;  rax                       rcx     rdx      r8      r9    [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_subadd_n, 0, reg_save_list\n    mov     rbx, qword [rsp+stack_use+40]\n\tlea     rdi, [rcx+rbx*8-56]\n\tlea     rsi, [rdx+rbx*8-56]\n\tlea     rdx, [r8+rbx*8-56]\n\tlea     rcx, [r9+rbx*8-56]\n\tmov     r9, 3\n\txor     rax, rax\n\txor     r10, r10\n\tsub     r9, rbx\n\tjge     .3\n\tadd     r9, 4\n\tmov     rbp, [rsi+r9*8+16]\n\tmov     r11, [rsi+r9*8+24]\n\tmov     r8, [rsi+r9*8]\n\tmov     rbx, [rsi+r9*8+8]\n\tjc      .2\n\t\n\txalign  16\n.1:\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8]\n\tsbb     rbx, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r8, [rdx+r9*8]\n\tsbb     rbx, [rdx+r9*8+8]\n\tsbb     rbp, [rdx+r9*8+16]\n\tsbb     r11, [rdx+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r8\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], rbx\n\tmov     [rdi+r9*8+16], rbp\n\tmov     rbp, [rsi+r9*8+48]\n\tmov     r11, [rsi+r9*8+56]\n\tadd     r9, 4\n\tmov     r8, [rsi+r9*8]\n\tmov     rbx, [rsi+r9*8+8]\n\tjnc     .1\n.2:\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8]\n\tsbb     rbx, [rcx+r9*8+8]\n\tsbb     rbp, [rcx+r9*8+16]\n\tsbb     r11, [rcx+r9*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r8, [rdx+r9*8]\n\tsbb     rbx, [rdx+r9*8+8]\n\tsbb     rbp, [rdx+r9*8+16]\n\tsbb     r11, [rdx+r9*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r9*8], r8\n\tmov     [rdi+r9*8+24], r11\n\tmov     [rdi+r9*8+8], rbx\n\tmov     [rdi+r9*8+16], rbp\n.3:\tcmp     r9, 2\n\tja      .7\n\tjz      .6\n\tjp      .5\n.4:\tmov     rbp, [rsi+r9*8+48]\n\tmov     r8, [rsi+r9*8+32]\n\tmov     rbx, [rsi+r9*8+40]\n\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8+32]\n\tsbb     rbx, [rcx+r9*8+40]\n\tsbb     rbp, [rcx+r9*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r8, [rdx+r9*8+32]\n\tsbb     rbx, [rdx+r9*8+40]\n\tsbb     rbp, [rdx+r9*8+48]\n\tmov     [rdi+r9*8+32], r8\n\tmov     [rdi+r9*8+40], rbx\n\tmov     [rdi+r9*8+48], rbp\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     r8, [rsi+r9*8+32]\n\tmov     rbx, [rsi+r9*8+40]\n\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8+32]\n\tsbb     rbx, [rcx+r9*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r8, [rdx+r9*8+32]\n\tsbb     rbx, [rdx+r9*8+40]\n\tmov     [rdi+r9*8+32], r8\n\tmov     [rdi+r9*8+40], rbx\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tmov     r8, [rsi+r9*8+32]\n\tadd     rax, 1\n\tsbb     r8, [rcx+r9*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r8, [rdx+r9*8+32]\n\tmov     [rdi+r9*8+32], r8\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign  16\n.7:\tadd     rax, r10\n\tneg     rax\n.8:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/sublsh1_n.asm",
    "content": "; PROLOGUE(mpn_sublsh1_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_sublsh1_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx  \n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_sublsh1_n, 0, reg_save_list\n    mov     rax, r9\n\n    lea     rdi, [rcx+rax*8-56]\n    lea     rsi, [rdx+rax*8-56]\n    lea     rdx, [ r8+rax*8-56]\n    mov     rcx, rax\n\n\txor     rax, rax\n\txor     r10, r10\n\tmov     r8, 3\n\tsub     r8, rcx\n\tjge     .3\n\tadd     r8, 4\n\tmov     r11, [rsi+r8*8+24]\n\tmov     rcx, [rsi+r8*8+16]\n\tmov     r9, [rsi+r8*8]\n\tmov     rbx, [rsi+r8*8+8]\n\tjc      .2\n\t\n\txalign   16\n.1: add     rax, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], r9\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rcx\n\tmov     [rdi+r8*8+24], r11\n\tmov     r11, [rsi+r8*8+56]\n\tmov     rcx, [rsi+r8*8+48]\n\tadd     r8, 4\n\tmov     r9, [rsi+r8*8]\n\tmov     rbx, [rsi+r8*8+8]\n\tjnc     .1\n.2: add     rax, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8]\n\tsbb     rbx, [rdx+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+16]\n\tsbb     r11, [rdx+r8*8+24]\n\tsbb     r10, r10\n\tmov     [rdi+r8*8], r9\n\tmov     [rdi+r8*8+8], rbx\n\tmov     [rdi+r8*8+16], rcx\n\tmov     [rdi+r8*8+24], r11\n.3: cmp     r8, 2\n\tja      .7\n\tjz      .6\n\tjp      .5\n.4: mov     rcx, [rsi+r8*8+48]\n\tmov     r9, [rsi+r8*8+32]\n\tmov     rbx, [rsi+r8*8+40]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rcx, [rdx+r8*8+48]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rcx, [rdx+r8*8+48]\n\tmov     [rdi+r8*8+32], r9\n\tmov     [rdi+r8*8+40], rbx\n\tmov     [rdi+r8*8+48], rcx\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign   16\n.5: mov     r9, [rsi+r8*8+32]\n\tmov     rbx, [rsi+r8*8+40]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rbx, [rdx+r8*8+40]\n\tmov     [rdi+r8*8+32], r9\n\tmov     [rdi+r8*8+40], rbx\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign   16\n.6: mov     r9, [rsi+r8*8+32]\n\tadd     rax, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tsbb     rax, rax\n\tadd     r10, 1\n\tsbb     r9, [rdx+r8*8+32]\n\tmov     [rdi+r8*8+32], r9\n\tsbb     rax, 0\n\tneg     rax\n\tEXIT_PROC reg_save_list\n\t\n\txalign   16\n.7: add     r10, 1\n.8: sbb     rax, 0\n\tneg     rax\n\tEND_PROC reg_save_list\n\n\tend\n\t"
  },
  {
    "path": "mpn/x86_64w/k8/sublsh_n.asm",
    "content": "; PROLOGUE(mpn_sublsh_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_sublsh_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint, mp_limb_t)\n;  mp_limb_t mpn_sublsh_nc(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                        rdi     rsi     rdx        rcx       r8         r9\n;  rax                        rcx     rdx      r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n\tLEAF_PROC mpn_sublsh_n\n\tmov     r10, r9\n\txor     r9, r9\n    jmp     entry\n    \n\tLEAF_PROC mpn_sublsh_nc\n\tmov     r10, r9\n\tmov     r9, [rsp+48]\nentry:\n\tFRAME_PROC ?mpn_sublsh, 0, reg_save_list\n\tlea     rdi, [rcx+r10*8]\n\tlea     rsi, [rdx+r10*8]\n\tlea     rdx, [r8+r10*8]\n\tmov     ecx, dword [rsp+stack_use+40]\n\t\n\tneg     rcx\n\tshr     r9, cl\n\tneg     r10\n\txor     rax, rax\n\ttest    r10, 3\n\tjz      .2\n.1:\tmov     r8, [rdx+r10*8]\n\tmov     r11, r8\n\tneg     rcx\n\tshl     r8, cl\n\tneg     rcx\n\tshr     r11, cl\n\tor      r8, r9\n\tmov     r9, r11\n\tadd     rax, 1\n\tmov     r11, [rsi+r10*8]\n\tsbb     r11, r8\n\tsbb     rax, rax\n\tmov     [rdi+r10*8], r11\n\tinc     r10\n\ttest    r10, 3\n\tjnz     .1\n.2:\tcmp     r10, 0\n\tjz      .4\n\n\txalign  16\n.3:\tmov     r8, [rdx+r10*8]\n\tmov     rbp, [rdx+r10*8+8]\n\tmov     rbx, [rdx+r10*8+16]\n\tmov     r12, [rdx+r10*8+24]\n\tmov     r11, r8\n\tmov     r13, rbp\n\tmov     r14, rbx\n\tmov     r15, r12\n\tneg     rcx\n\tshl     r8, cl\n\tshl     rbp, cl\n\tshl     rbx, cl\n\tshl     r12, cl\n\tneg     rcx\n\tshr     r11, cl\n\tshr     r13, cl\n\tshr     r14, cl\n\tshr     r15, cl\n\tor      r8, r9\n\tor      rbp, r11\n\tor      rbx, r13\n\tor      r12, r14\n\tmov     r9, r15\n\tadd     rax, 1\n\tmov     r11, [rsi+r10*8]\n\tmov     r13, [rsi+r10*8+8]\n\tmov     r14, [rsi+r10*8+16]\n\tmov     r15, [rsi+r10*8+24]\n\tsbb     r11, r8\n\tsbb     r13, rbp\n\tsbb     r14, rbx\n\tsbb     r15, r12\n\tsbb     rax, rax\n\tmov     [rdi+r10*8], r11\n\tmov     [rdi+r10*8+8], r13\n\tmov     [rdi+r10*8+16], r14\n\tmov     [rdi+r10*8+24], r15\n\tadd     r10, 4\n\tjnz     .3\n.4:\tneg     rax\n\tadd     rax, r9\n    END_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/submul_1.asm",
    "content": "; PROLOGUE(mpn_submul_1)\n        \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_submul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_declsh_n(mp_ptr, mp_ptr, mp_size_t,   mp_uint)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12\n\n        xalign 16\n\t    LEAF_PROC mpn_declsh_n\n\t    mov     r10, rcx\n\t    mov     ecx, r9d\n\t    mov     r9, 1\n\t    shl     r9, cl\n        mov     rcx, r10\n    \n        xalign 16\n        LEAF_PROC mpn_submul_1\n        mov     rax, [rdx]\n        cmp     r8, 1\n        jnz     .1\n        mul     r9\n        sub     [rcx], rax\n        adc     rdx, 0\n        mov     rax, rdx\n        ret\n\n        xalign   16\n.1:\t    FRAME_PROC ?mpn_k8_submul, 0, reg_save_list\n        mov     r11, 5\n        lea     rsi, [rdx+r8*8-40]\n        lea     rdi, [rcx+r8*8-40]\n        mov     rcx, r9\n        sub     r11, r8\n        mul     rcx\n        db      0x26\n        mov     r8, rax\n        db      0x26\n        mov     rax, [rsi+r11*8+8]\n        db      0x26\n        mov     r9, rdx\n        db      0x26\n        cmp     r11, 0\n        db      0x26\n        mov     [rsp-8], r12\n        db      0x26\n        jge     .3\n.2:     xor     r10, r10\n        mul     rcx\n        sub     [rdi+r11*8], r8\n        adc     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+r11*8+16]\n        mul     rcx\n        sub     [rdi+r11*8+8], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+r11*8+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        sub     [rdi+r11*8+16], r10\n        adc     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+r11*8+32]\n        mul     rcx\n        sub     [rdi+r11*8+24], r12\n        adc     r8, rax\n        adc     r9, rdx\n        add     r11, 4\n        mov     rax, [rsi+r11*8+8]\n        jnc     .2\n.3:     xor     r10, r10\n        mul     rcx\n        sub     [rdi+r11*8], r8\n        adc     r9, rax\n        adc     r10, rdx\n        cmp     r11, 2\n        ja      .7\n        jz      .6\n        jp      .5\n.4:     mov     rax, [rsi+16]\n        mul     rcx\n        sub     [rdi+8], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        sub     [rdi+16], r10\n        adc     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+32]\n        mul     rcx\n        sub     [rdi+24], r12\n        adc     r8, rax\n        adc     r9, rdx\n        sub     [rdi+32], r8\n        adc     r9, 0\n        mov     rax, r9\n        EXIT_PROC reg_save_list\n.5:     mov     rax, [rsi+24]\n        mul     rcx\n        sub     [rdi+16], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+32]\n        xor     r8, r8\n        mul     rcx\n        sub     [rdi+24], r10\n        adc     r12, rax\n        adc     r8, rdx\n        sub     [rdi+32], r12\n        adc     r8, 0\n        mov     rax, r8\n        EXIT_PROC reg_save_list\n\n        align   16\n.6:     mov     rax, [rsi+32]\n        mul     rcx\n        sub     [rdi+24], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        sub     [rdi+32], r10\n        adc     r12, 0\n        mov     rax, r12\n        EXIT_PROC reg_save_list\n.7:     sub     [rdi+32], r9\n        adc     r10, 0\n        mov     rax, r10\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/k8/sumdiff_n.asm",
    "content": "; PROLOGUE(mpn_sumdiff_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;       rcx[r10] = r8[r10] + r9[r10]\n;       rdx[r10] = r8[r10] - r9[r10]\n;\n;  return 2 * add_carry + sub_borrow\n;\n;  mp_limb_t mpn_sumdiff_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_size_t)\n;  rax                        rdi     rsi     rdx     rcx          r8\n;  rax                        rcx     rdx      r8      r9    [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rsi, rdi, r12, r13, r14, r15, rbx, rbp\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_sumdiff_n, 0, reg_save_list\n    mov     r10, qword [rsp+stack_use+40]\n\n    lea     rdx, [rdx+r10*8]\n    lea     r8, [r8+r10*8]\n    lea     rcx, [rcx+r10*8]\n    lea     r9, [r9+r10*8]\n    neg     r10\n    xor     r12, r12\n    xor     r11, r11\n    test    r10, 3\n    jz      .2\n.1: mov     rax, [r8+r10*8]\n    mov     r13, rax\n    add     r12, 1\n    adc     rax, [r9+r10*8]\n    sbb     r12, r12\n    add     r11, 1\n    sbb     r13, [r9+r10*8]\n    sbb     r11, r11\n    mov     [rcx+r10*8], rax\n    mov     [rdx+r10*8], r13\n    inc     r10\n    test    r10, 3\n    jnz     .1\n.2: cmp     r10, 0\n    jz      .4\n\n    xalign  16\n.3: mov     rax, [r8+r10*8]\n    mov     rsi, [r8+r10*8+8]\n    mov     rdi, [r8+r10*8+16]\n    mov     rbp, [r8+r10*8+24]\n    mov     r13, rax\n    mov     r14, rsi\n    mov     r15, rdi\n    mov     rbx, rbp\n    add     r12, 1\n    adc     rax, [r9+r10*8]\n    adc     rsi, [r9+r10*8+8]\n    adc     rdi, [r9+r10*8+16]\n    adc     rbp, [r9+r10*8+24]\n    sbb     r12, r12\n    add     r11, 1\n    sbb     r13, [r9+r10*8]\n    sbb     r14, [r9+r10*8+8]\n    sbb     r15, [r9+r10*8+16]\n    sbb     rbx, [r9+r10*8+24]\n    sbb     r11, r11\n    mov     [rcx+r10*8], rax\n    mov     [rcx+r10*8+8], rsi\n    mov     [rcx+r10*8+16], rdi\n    mov     [rcx+r10*8+24], rbp\n    mov     [rdx+r10*8], r13\n    mov     [rdx+r10*8+8], r14\n    mov     [rdx+r10*8+16], r15\n    mov     [rdx+r10*8+24], rbx\n    add     r10, 4\n    jnz     .3\n.4: lea     rax, [r11+r12*2]\n    neg     rax\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/udiv_qrnnd.asm",
    "content": "; PROLOGUE(mpn_udiv_qrnnd)\n\n;  Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_udiv_qrnnd(mp_ptr, mp_limb_t, mp_limb_t, mp_limb_t)\n;  rax                         rdi        rsi        rdx        rcx\n;  rax                         rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    BITS 64\n\n    LEAF_PROC mpn_udiv_qrnnd\n    mov     rax,r8\n    div     r9\n    mov     [rcx],rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/umul_ppmm.asm",
    "content": "; PROLOGUE(mpn_umul_ppmm)\n\n;  Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_umul_ppmm(mp_ptr, mp_limb_t, mp_limb_t)\n;  rax                        rdi        rsi        rdx\n;  rax                        rcx        rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    BITS 64\n\n    LEAF_PROC mpn_umul_ppmm\n    mov     rax,rdx\n    mul     r8\n    mov     [rcx],rax\n    mov     rax,rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/xnor_n.asm",
    "content": "; PROLOGUE(mpn_xnor_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_xnor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_xnor_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    xor     r10, [rdx+r9*8+24]\n    xor     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    xor     T3, [rdx+r9*8+8]\n    xor     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/k8/xor_n.asm",
    "content": "; PROLOGUE(mpn_xor_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_xor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rdi        rsi        rdx        rcx\n;                    rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_xor_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    or      r10, r10\n    or      r11, r11\n    xor     r10, [rdx+r9*8+24]\n    xor     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    or      T3, T3\n    or      T4, T4\n    xor     T3, [rdx+r9*8+8]\n    xor     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    dec     r9\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/longlong_inc.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 Free Software Foundation, Inc.\n\nCopyright 2013 William Hart\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#if defined(__ICL)\n\n#define add_ssaaaa(sh, sl, ah, al, bh, bl) \\\n  __asm__ (\"addq %5,%q1\\n\\tadcq %3,%q0\"\t\t\t\t\t\\\n\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t   : \"0\"  ((UDItype)(ah)), \"rme\" ((UDItype)(bh)),\t\t\\\n\t     \"%1\" ((UDItype)(al)), \"rme\" ((UDItype)(bl)))\n#define sub_ddmmss(sh, sl, ah, al, bh, bl) \\\n  __asm__ (\"subq %5,%q1\\n\\tsbbq %3,%q0\"\t\t\t\t\t\\\n\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t   : \"0\" ((UDItype)(ah)), \"rme\" ((UDItype)(bh)),\t\t\\\n\t     \"1\" ((UDItype)(al)), \"rme\" ((UDItype)(bl)))\n#define umul_ppmm(w1, w0, u, v) \\\n  __asm__ (\"mulq %3\"\t\t\t\t\t\t\t\\\n\t   : \"=a\" (w0), \"=d\" (w1)\t\t\t\t\t\\\n\t   : \"%0\" ((UDItype)(u)), \"rm\" ((UDItype)(v)))\n#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding \"=d\" */\\\n  __asm__ (\"divq %4\"\t\t     /* stringification in K&R C */\t\\\n\t   : \"=a\" (q), \"=d\" (r)\t\t\t\t\t\t\\\n\t   : \"0\" ((UDItype)(n0)), \"1\" ((UDItype)(n1)), \"rm\" ((UDItype)(dx)))\n#define add_333(sh, sm, sl, ah, am, al, bh, bm, bl)  \\\n  __asm__ (\"addq %8,%q2\\n\\tadcq %6,%q1\\n\\tadcq %4,%q0\"     \\\n       : \"=r\" (sh), \"=r\" (sm), \"=&r\" (sl)                  \\\n       : \"0\"  ((UDItype)(ah)), \"rme\" ((UDItype)(bh)),  \\\n         \"1\"  ((UDItype)(am)), \"rme\" ((UDItype)(bm)),  \\\n         \"2\"  ((UDItype)(al)), \"rme\" ((UDItype)(bl)))  \n#define sub_333(sh, sm, sl, ah, am, al, bh, bm, bl)  \\\n  __asm__ (\"subq %8,%q2\\n\\tsbbq %6,%q1\\n\\tsbbq %4,%q0\"     \\\n       : \"=r\" (sh), \"=r\" (sm), \"=&r\" (sl)                  \\\n       : \"0\"  ((UDItype)(ah)), \"rme\" ((UDItype)(bh)),  \\\n         \"1\"  ((UDItype)(am)), \"rme\" ((UDItype)(bm)),  \\\n         \"2\"  ((UDItype)(al)), \"rme\" ((UDItype)(bl)))  \n\n/* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */\n#define count_leading_zeros(count, x)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    UDItype __cbtmp;\t\t\t\t\t\t\t\\\n    ASSERT ((x) != 0);\t\t\t\t\t\t\t\\\n    __asm__ (\"bsrq %1,%0\" : \"=r\" (__cbtmp) : \"rm\" ((UDItype)(x)));\t\\\n    (count) = __cbtmp ^ 63;\t\t\t\t\t\t\\\n  } while (0)\n/* bsfq destination must be a 64-bit register, \"%q0\" forces this in case\n   count is only an int. */\n#define count_trailing_zeros(count, x)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    ASSERT ((x) != 0);\t\t\t\t\t\t\t\\\n    __asm__ (\"bsfq %1,%q0\" : \"=r\" (count) : \"rm\" ((UDItype)(x)));\t\\\n  } while (0)\n\n#endif\n\n#if !defined(BSWAP_LIMB) && defined (__GNUC__) \n#define BSWAP_LIMB(dst, src)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    __asm__ (\"bswap %q0\" : \"=r\" (dst) : \"0\" (src));\t\t\t\\\n  } while (0)\n#endif\n\n#if defined( _MSC_VER )\n#include <intrin.h>\n\n#if !defined( COUNT_LEADING_ZEROS_NEED_CLZ_TAB )\n#  define COUNT_LEADING_ZEROS_NEED_CLZ_TAB\n#endif\n\n#if defined( _WIN64 )\n\n#if !defined(count_leading_zeros)\n#   pragma intrinsic(_BitScanReverse64)\n#   define count_leading_zeros(c,x)\t    \\\n    do  { unsigned long _z;\t            \\\n          ASSERT ((x) != 0);            \\\n          _BitScanReverse64(&_z, (x));  \\\n          c = 63 - _z;                  \\\n        } while (0)\n#endif\n\n#if !defined(count_trailing_zeros)\n#   pragma intrinsic(_BitScanForward64)\n#   define count_trailing_zeros(c,x)\t\\\n    do  {  unsigned long _z;\t\t \t\\\n           ASSERT ((x) != 0);           \\\n           _BitScanForward64(&_z, (x));\t\\\n\t\t   c = _z;\t\t\t\t\t\t\\\n         } while (0)\n#endif\n\n#if !defined(umul_ppmm)\n#    pragma intrinsic(_umul128)\n#    define umul_ppmm(xh, xl, m0, m1)       \\\n     do  {                                  \\\n           xl = _umul128( (m0), (m1), &xh); \\\n         } while (0)\n#endif\n\n#if !defined( BSWAP_LIMB )\n#  pragma intrinsic(_byteswap_uint64)\n#    define BSWAP_LIMB(dst, src)  dst = _byteswap_uint64(src)\n#endif\n\n#endif    /* _WIN64 */\n\n#if 0\t/* trial re-definition of C versions */\n#define add_333(sh, sm, sl, ah, am, al, bh, bm, bl)  \\\n   do { \\\n      UWtype _cy; \\\n      (sl) = (al) + (bl); \\\n\t  _cy = (sl) < (bl) ? 1 : 0; \\\n\t  (sm) = (am) + (bm) + _cy; \\\n\t  _cy  = (sm) < (bm) ? 1 : (sm) > (bm) ? 0 : _cy; \\\n\t  (sh) = (ah) + (bh) + _cy; \\\n   } while (0)\n\n#define sub_333(sh, sm, sl, ah, am, al, bh, bm, bl)  \\\n   do { \\\n      UWtype _bw; \\\n      (sl) = (al) - (bl); \\\n\t  _bw = (sl) > (al) ? 1 : 0; \\\n\t  (sm) = (am) - (bm) - _bw; \\\n\t  _bw = (sm) > (am) ? 1 : (sm) < (am) ? 0 : _bw; \\\n\t  (sh) = (ah) - (bh) - _bw; \\\n   } while (0)\n#endif\n\n#endif      /* _MSC_VER */\n"
  },
  {
    "path": "mpn/x86_64w/modexact_1c_odd.asm",
    "content": "; PROLOGUE(mpn_modexact_1c_odd)\n\n;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n;\n;  Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_modexact_1_odd(mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_modexact_1c_odd(mp_ptr, mp_size_t, mp_limb_t, mp_limb_t)\n;  rax                              rdi        rsi        rdx        rcx\n;  rax                              rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list       rsi, rdi\n\n    BITS 64\n    \n    extern  __gmp_modlimb_invert_table\n\n    LEAF_PROC mpn_modexact_1_odd\n    mov     r9, 0               ; carry\n\n    FRAME_PROC mpn_modexact_1c_odd, 0, reg_save_list\n    \n    ; first use Newton's iteration to invert the divisor limb (d) using \n    ; f(x) = 1/x - d  and x[i+1] = x[i] - f(x[i]) / f'(x[i]) to give\n    ; the iteration formula: x[i+1] = x[i] * (2 - d * x[i])\n    \n    mov     rsi, rdx\n    mov     rdx, r8    \n    shr     edx, 1              ; div / 2\n    lea     r10, [rel __gmp_modlimb_invert_table]\n    and     edx, 127\n    movzx   edx, byte [rdx+r10] ; inv -> rdx (8-bit approx)\n\n    mov     rax, [rcx]          ; first limb of numerator\n    lea     r11, [rcx+rsi*8]    ; pointer to top of src\n    mov     rdi, r8             ; save divisor\n\n    lea     ecx, [rdx+rdx]\n    imul    edx, edx\n    neg     rsi                 ; limb offset from top of src\n    imul    edx, edi\n    sub     ecx, edx            ; inv -> rcx (16-bit approx)\n\n    lea     edx, [rcx+rcx]\n    imul    ecx, ecx\n    imul    ecx, edi\n    sub     edx, ecx            ; inv -> rdx (32-bit approx)\n    xor     ecx, ecx\n\n    lea     r10, [rdx+rdx]\n    imul    rdx, rdx\n    imul    rdx, r8\n    sub     r10, rdx            ; inv -> r10 (64-bit approx)\n\n    mov     rdx, r9             ; intial carry -> rdx\n    add     rsi, 1              ; adjust limb offset\n    jz      .2\n\n    mov     r9, r11\n    lea     rsi,[r11+rsi*8]\n    \n    xalign    16\n.1: sub     rax, rdx\n    adc     rcx, 0\n    imul    rax, r10\n    mul     r8\n    lodsq\n    sub     rax, rcx\n    setc    cl\n    cmp     r9, rsi\n    jne     .1\n.2: sub     rax, rdx\n    adc     rcx, 0\n    imul    rax, r10\n    mul     r8\n    lea     rax, [rcx+rdx]\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/mul_2.asm",
    "content": "; PROLOGUE(mpn_mul_2)\n\n;  X86_64 mpn_mul_2\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx\n;  rax                     rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi, rbx\n\n    BITS 64\n\n\tFRAME_PROC mpn_mul_2, 0, reg_save_list\n\tmov     rax, r8\n\t\n\tmov     r8, [r9]\n\tlea     rsi, [rdx+rax*8-24]\n\tlea     rdi, [rcx+rax*8-24]\n\tmov     rcx, [r9+8]\n\tmov     rbx, 3\n\tsub     rbx, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tmov     r11, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1: mov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r10, 0\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r9, rdx\n\tmul     r8\n\tadd     r11, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 3\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     rbx, 1\n\tja      .5\n\tje      .4\n.3:\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4:\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.6: END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/mullow_n_basecase.asm",
    "content": ";  AMD64 mpn_mullow_n_basecase\n;\n;  Copyright 2015 Free Software Foundation, Inc.\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;\n;void mpn_mullow_n_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n;                                  rdi           rsi           rdx          rcx\n;                                  rcx           rdx            r8           r9\n\n%include 'yasm_mac.inc'\n\n\tTEXT\n\n\tLEAF_PROC mpn_mullow_n_basecase\n    cmp     r9, 3\n    je      asm_sym(?mpn_mullow1)\n    jg      asm_sym(?mpn_mullow2)\n\tmov     rax, [rdx]\n    mov     r11, [r8+8]\n\tmov     r8, [r8]\n    cmp     r9, 1\n    jg      .3\n\t\n.2:\timul    rax, r8\n\tmov     [rcx], rax\n\tret\n\t\n.3:\tmov     r9, [rdx+8]\n    imul    r11, rax\n\tmul     r8\n\tmov     [rcx], rax\n\timul    r8, r9\n\tlea     rax, [r11+rdx]\n\tadd     rax, r8\n\tmov     [rcx+8], rax\n    ret\n\n%define reg_save_list rsi, rdi\n\t\n\tFRAME_PROC ?mpn_mullow1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n    mov\t    rax, [rsi]\n\tmov\t    r8, [rdx]\n    mov     r9, [rdx+8]\n\tmov     r11, [rdx+16]\n\tmul     r8\n\tmov     [rdi], rax\n\tmov     rax, [rsi]\n\tmov     rcx, rdx\n\tmul     r9\n\timul    r9, [rsi+8]\n\tmov     r10, [rsi+16]\n\timul    r10, r8\n\tadd     rcx, rax\n\tadc     r9, rdx\n\tadd     r9, r10\n\tmov     rax, [rsi+8]\n\tmul     r8\n\tadd     rcx, rax\n\tadc     r9, rdx\n\tmov     rax, r11\n\timul    rax, [rsi]\n\tadd     r9, rax\n\tmov     [rdi+8], rcx\n\tmov     [rdi+16], r9\n\tEND_PROC reg_save_list\n\n%define reg_save_list rsi, rdi, rbx, rbp, r13, r14, r15\n\n\talign  16\n\tFRAME_PROC ?mpn_mullow2, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n.5:\tmov     rax, [rsi]\n\tmov     r13, [rdx]\n\tmov     r11, rdx\n\tlea     rdi, [rdi+rcx*8]\n\tlea     rsi, [rsi+rcx*8]\n\tneg     rcx\n\tmul     r13\n\ttest    cl, 1\n\tjz      .12\n.6: lea     rdi, [rdi-8]\n\tlea     rsi, [rsi-8]\n\ttest    cl, 2\n\tjnz     .8\n.7: lea     r9, [rcx-1]\n\tlea     r8, [rel .18]\n\tmov     rbx, rax\n\tmov     r15, rdx\n\txor     ebp, ebp\n\txor     r10d, r10d\n\tmov     rax, [rsi+rcx*8+16]\n\tjmp     .11\n\n.8: lea     r9, [rcx+1]\n\tlea     r8, [rel .19]\n\tmov     rbp, rax\n\tmov     r10, rdx\n\txor     ebx, ebx\n\tjmp     .10\n\n\talign  16\n.9: mov     [rdi+r9*8-16], rbx\n\tadd     r15, rax\n\tmov     rax, [rsi+r9*8]\n\tadc     rbp, rdx\n\txor     ebx, ebx\n\tmul     r13\n\tmov     [rdi+r9*8-8], r15\n\tadd     rbp, rax\n\tadc     r10, rdx\n.10:mov     rax, [rsi+r9*8+8]\n\tmul     r13\n\tmov     [rdi+r9*8], rbp\n\tadd     r10, rax\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r9*8+16]\n\tmul     r13\n\tmov     [rdi+r9*8+8], r10\n\txor     ebp, ebp\n\tmov     r10, rbp\n\tadd     rbx, rax\n\tmov     rax, [rsi+r9*8+24]\n\tmov     r15, rbp\n\tadc     r15, rdx\n.11:mul     r13\n\tadd     r9, 4\n\tjs      .9\n\tmov     [rdi-16], rbx\n\tadd     r15, rax\n\tmov     [rdi-8], r15\n\tadc     rbp, rdx\n\timul    r13, [rsi]\n\tadd     rbp, r13\n\tmov     [rdi], rbp\n\tadd     rcx, 1\n\tjz      .23\n\tmov     r13, [r11+8]\n\tmov     r14, [r11+16]\n\tlea     rsi, [rsi+16]\n\tlea     r11, [r11+8]\n\tlea     rdi, [rdi+24]\n\tjmp     r8\n\n.12:mov     r14, [r11+8]\n\ttest    cl, 2\n\tjz      .14\n\n\talign  16\n.13:lea     r9, [rcx+0]\n\tmov     r10, rax\n\tmov     rbx, rdx\n\txor     r15d, r15d\n\tmov     rax, [rsi+rcx*8]\n\tlea     r8, [rel .19]\n\tjmp     .17\n\t\n\talign  16\n.14:lea     r9, [rcx+2]\n\tmov     r10d, 0\n\tmov     r15, rax\n\tmov     rax, [rsi+rcx*8]\n\tmov     rbp, rdx\n\tlea     r8, [rel .18]\n\tjmp     .16\n\t\n\talign  16\n.15:mov     rax, [rsi+r9*8-32]\n\tmul     r14\n\tadd     rbx, rax\n\tadc     r15, rdx\n\tmov     rax, [rsi+r9*8-24]\n\txor     ebp, ebp\n\tmul     r13\n\tadd     rbx, rax\n\tmov     rax, [rsi+r9*8-24]\n\tadc     r15, rdx\n\tadc     ebp, 0\n\tmul     r14\n\tadd     r15, rax\n\tmov     [rdi+r9*8-24], rbx\n\tadc     rbp, rdx\n\tmov     rax, [rsi+r9*8-16]\n\tmul     r13\n\tmov     r10d, 0\n\tadd     r15, rax\n\tadc     rbp, rdx\n\tmov     rax, [rsi+r9*8-16]\n\tadc     r10d, 0\n.16:mov     ebx, 0\n\tmov     [rdi+r9*8-16], r15\n\tmul     r14\n\tadd     rbp, rax\n\tmov     rax, [rsi+r9*8-8]\n\tadc     r10, rdx\n\tmov     r15d, 0\n\tmul     r13\n\tadd     rbp, rax\n\tmov     rax, [rsi+r9*8-8]\n\tadc     r10, rdx\n\tadc     ebx, r15d\n\tmul     r14\n\tadd     r10, rax\n\tmov     [rdi+r9*8-8], rbp\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r9*8]\n\tmul     r13\n\tadd     r10, rax\n\tadc     rbx, rdx\n\tadc     r15d, 0\n.17:add     r9, 4\n\tmov     [rdi+r9*8-32], r10\n\tjs      .15\n\timul    r14, [rsi-16]\n\tadd     rbx, r14\n\timul    r13, [rsi-8]\n\tadd     rbx, r13\n\tmov     [rdi-8], rbx\n\tadd     rcx, 2\n\tjz      .23\n\tmov     r13, [r11+16]\n\tmov     r14, [r11+24]\n\tlea     r11, [r11+16]\n\tlea     rdi, [rdi+16]\n\tjmp     r8\n.18:lea     r9, [rcx-2]\n\tmov     rax, [rsi+rcx*8-16]\n\tmul     r13\n\tmov     r10, rax\n\tmov     rax, [rsi+rcx*8-16]\n\tmov     rbx, rdx\n\txor     r15d, r15d\n\tlea     r8, [rel .19]\n\tjmp     .22\n.19:lea     r9, [rcx+0]\n\tmov     rax, [rsi+rcx*8-16]\n\txor     r10d, r10d\n\tmul     r13\n\tmov     r15, rax\n\tmov     rax, [rsi+rcx*8-16]\n\tmov     rbp, rdx\n\tlea     r8, [rel .18]\n\tjmp     .21\n\t\n\talign  16\n.20:add     [rdi+r9*8-32], r10\n\tadc     rbx, rax\n\tmov     rax, [rsi+r9*8-24]\n\tadc     r15, rdx\n\txor     ebp, ebp\n\tmul     r13\n\tadd     rbx, rax\n\tmov     rax, [rsi+r9*8-24]\n\tadc     r15, rdx\n\tadc     ebp, ebp\n\tmul     r14\n\txor     r10d, r10d\n\tadd     [rdi+r9*8-24], rbx\n\tadc     r15, rax\n\tmov     rax, [rsi+r9*8-16]\n\tadc     rbp, rdx\n\tmul     r13\n\tadd     r15, rax\n\tmov     rax, [rsi+r9*8-16]\n\tadc     rbp, rdx\n\tadc     r10d, 0\n.21:mul     r14\n\tadd     [rdi+r9*8-16], r15\n\tadc     rbp, rax\n\tmov     rax, [rsi+r9*8-8]\n\tadc     r10, rdx\n\tmul     r13\n\txor     ebx, ebx\n\tadd     rbp, rax\n\tadc     r10, rdx\n\tmov     r15d, 0\n\tmov     rax, [rsi+r9*8-8]\n\tadc     ebx, r15d\n\tmul     r14\n\tadd     [rdi+r9*8-8], rbp\n\tadc     r10, rax\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r9*8]\n\tmul     r13\n\tadd     r10, rax\n\tmov     rax, [rsi+r9*8]\n\tadc     rbx, rdx\n\tadc     r15d, 0\n.22:mul     r14\n\tadd     r9, 4\n\tjs      .20\n\tadd     [rdi-32], r10\n\tadc     rbx, rax\n\timul    r13, [rsi-24]\n\tadd     rbx, r13\n\tadd     [rdi-24], rbx\n\tadd     rcx, 2\n\tjns     .23\n\tlea     r11, [r11+16]\n\tmov     r13, [r11]\n\tmov     r14, [r11+8]\n\tlea     rsi, [rsi-16]\n\tjmp     r8\n.23:\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/mulmid_basecase.asm",
    "content": "; PROLOGUE(mpn_mulmid_basecase)\n;\n;  AMD64 mpn_mulmid_basecase\n;\n;  Based on mul_basecase.asm from GMP 4.3.1, modifications are copyright\n;  (C) 2009, David Harvey. The original mul_basecase.asm was released under\n;  LGPLv3+, license terms reproduced below. These modifications are hereby\n;  released under the same terms.\n;\n;  Windows Conversion Copyright 2010 Dr B R Gladman\n;\n;  Contributed to the GNU project by Torbjorn Granlund and David Harvey.\n;\n;  Copyright 2008 Free Software Foundation, Inc.\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 3 of the License, or (at\n;  your option) any later version.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  //www.gnu.org/licenses/.\n;\n; void mpn_mulmid_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi        rdx     rcx         r8\n;  rax                        rcx     rdx         r8      r9   [rsp+40]\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n%include 'yasm_mac.inc'\n\n        BITS 64\n\tTEXT\n\t\n\txalign  16\n\tWIN64_GCC_PROC mpn_mulmid_basecase, 5, frame\n\tmov     r15, rcx\n\n\tlea     r13, [rdx+1]\n\tsub     r13, r8\n\tlea     rdi, [rdi+r13*8]\n\tcmp     r13, 4\n\tjc      .29\n\tlea     rsi, [rsi+rdx*8]\n\ttest    r8, 1\n\tjz      .10\n\n.0:\tmov     ebx, r13d\n\tneg     r13\n\tmov     rax, [rsi+r13*8]\n\tmov     r12, [r15]\n\tmul     r12\n\n\tand     r13, -4\n\tmov     r11, r13\n\n\tand     ebx, 3\n\tjz      .2\n\tcmp     ebx, 2\n\tjc      .3\n\tjz      .4\n\n.1:\tmov     r10, rax\n\tmov     rbx, rdx\n\tlea     r14, [rel .23]\n\tjmp     .8\n\n\txalign  16\n.2:\tmov     rbp, rax\n\tmov     r10, rdx\n\tlea     r14, [rel .20]\n\tjmp     .7\n\n\txalign  16\n.3:\tadd     r11, 4\n\tmov     rcx, rax\n\tmov     rbp, rdx\n\tmov     r10d, 0\n\tmov     rax, [rsi+r11*8]\n\tlea     r14, [rel .21]\n\tjmp     .6\n\n\txalign  16\n.4:\tmov     rbx, rax\n\tmov     rcx, rdx\n\tmov     rax, [rsi+r11*8+24]\n\tmov     ebp, 0\n\tmov     r10d, 0\n\tlea     r14, [rel .22]\n\tjmp     .9\n\n\txalign  16\n.5:\tmov     [rdi+r11*8-16], rbx\n\tadd     rcx, rax\n\tmov     rax, [rsi+r11*8]\n\tadc     rbp, rdx\n.6:\tmov     ebx, 0\n\tmul     r12\n\tmov     [rdi+r11*8-8], rcx\n\tadd     rbp, rax\n\tadc     r10, rdx\n.7:\tmov     rax, [rsi+r11*8+8]\n\tmul     r12\n\tmov     [rdi+r11*8], rbp\n\tadd     r10, rax\n\tadc     rbx, rdx\n.8:\tmov     rax, [rsi+r11*8+16]\n\tmul     r12\n\tmov     [rdi+r11*8+8], r10\n\tmov     ebp, 0\n\tmov     r10, rbp\n\tadd     rbx, rax\n\tmov     rax, [rsi+r11*8+24]\n\tmov     rcx, rbp\n\tadc     rcx, rdx\n.9:\tmul     r12\n\tadd     r11, 4\n\tjs      .5\n\tmov     [rdi-16], rbx\n\tadd     rcx, rax\n\tmov     [rdi-8], rcx\n\tmov     [rdi+8], rbp\n\tadc     rbp, rdx\n\tmov     [rdi], rbp\n\tdec     r8\n\tjz      .40\n\tlea     rsi, [rsi-8]\n\tlea     r15, [r15+8]\n\tmov     r11, r13\n\tmov     r12, [r15]\n\tmov     r9, [r15+8]\n\tjmp     r14\n\n\txalign  16\n.10:mov     ebx, r13d\n\n\tneg     r13\n\tmov     rax, [rsi+r13*8-8]\n\tmov     r12, [r15]\n\tmov     r9, [r15+8]\n\tmul     r9\n\tand     r13, -4\n\tmov     r11, r13\n\tand     ebx, 3\n\tjz      .12\n\tcmp     ebx, 2\n\tjc      .13\n\tjz      .14\n.11:mov     rcx, rax\n\tmov     rbp, rdx\n\tlea     r14, [rel .23]\n\tjmp     .17\n\n\txalign  16\n.12:mov     rbx, rax\n\tmov     rcx, rdx\n\tlea     r14, [rel .20]\n\tjmp     .16\n\n\txalign  16\n.13:mov     r10, rax\n\tmov     rbx, rdx\n\tmov     ecx, 0\n\tlea     r14, [rel .21]\n\tjmp     .19\n\n\txalign  16\n.14:mov     rbp, rax\n\tmov     r10, rdx\n\tmov     ebx, 0\n\tmov     rax, [rsi+r11*8+16]\n\tlea     r14, [rel .22]\n\tjmp     .18\n\n\txalign  16\n.15:mov     rax, [rsi+r11*8-8]\n\tmul     r9\n\tadd     rbx, rax\n\tadc     rcx, rdx\n.16:mov     ebp, 0\n\tmov     rax, [rsi+r11*8]\n\tmul     r12\n\tadd     rbx, rax\n\tmov     rax, [rsi+r11*8]\n\tadc     rcx, rdx\n\tadc     ebp, 0\n\tmul     r9\n\tadd     rcx, rax\n\tmov     [rdi+r11*8], rbx\n\tadc     rbp, rdx\n.17:mov     rax, [rsi+r11*8+8]\n\tmul     r12\n\tmov     r10d, 0\n\tadd     rcx, rax\n\tadc     rbp, rdx\n\tmov     ebx, 0\n\tadc     r10d, 0\n\tmov     rax, [rsi+r11*8+8]\n\tmov     [rdi+r11*8+8], rcx\n\tmul     r9\n\tadd     rbp, rax\n\tmov     rax, [rsi+r11*8+16]\n\tadc     r10, rdx\n.18:mov     ecx, 0\n\tmul     r12\n\tadd     rbp, rax\n\tmov     rax, [rsi+r11*8+16]\n\tadc     r10, rdx\n\tadc     ebx, 0\n\tmul     r9\n\tadd     r10, rax\n\tmov     [rdi+r11*8+16], rbp\n\tadc     rbx, rdx\n.19:mov     rax, [rsi+r11*8+24]\n\tmul     r12\n\tadd     r10, rax\n\tadc     rbx, rdx\n\tadc     ecx, 0\n\tadd     r11, 4\n\tmov     [rdi+r11*8-8], r10\n\tjnz     .15\n\tmov     [rdi], rbx\n\tmov     [rdi+8], rcx\n\tsub     r8, 2\n\tjz      .40\n\tlea     r15, [r15+16]\n\tlea     rsi, [rsi-16]\n\tmov     r11, r13\n\tmov     r12, [r15]\n\tmov     r9, [r15+8]\n\tjmp     r14\n\n\txalign  16\n.20:mov     rax, [rsi+r11*8-8]\n\tmul     r9\n\tmov     rcx, rax\n\tmov     rbp, rdx\n\tmov     r10d, 0\n\tjmp     .25\n\n\txalign  16\n.21:mov     rax, [rsi+r11*8+16]\n\tmul     r9\n\tmov     rbx, rax\n\tmov     rcx, rdx\n\tmov     ebp, 0\n\tmov     rax, [rsi+r11*8+24]\n\tjmp     .28\n\n\txalign  16\n.22:mov     rax, [rsi+r11*8+8]\n\tmul     r9\n\tmov     r10, rax\n\tmov     rbx, rdx\n\tmov     ecx, 0\n\tjmp     .27\n\n\txalign  16\n.23:mov     rax, [rsi+r11*8]\n\tmul     r9\n\tmov     rbp, rax\n\tmov     r10, rdx\n\tmov     ebx, 0\n\tmov     ecx, 0\n\tjmp     .26\n\t\n\txalign  16\n.24:mov     r10d, 0\n\tadd     rbx, rax\n\tmov     rax, [rsi+r11*8-8]\n\tadc     rcx, rdx\n\tadc     ebp, 0\n\tmul     r9\n\tadd     [rdi+r11*8-8], rbx\n\tadc     rcx, rax\n\tadc     rbp, rdx\n.25:mov     rax, [rsi+r11*8]\n\tmul     r12\n\tadd     rcx, rax\n\tmov     rax, [rsi+r11*8]\n\tadc     rbp, rdx\n\tadc     r10d, 0\n\tmul     r9\n\tadd     [rdi+r11*8], rcx\n\tmov     ecx, 0\n\tadc     rbp, rax\n\tmov     ebx, 0\n\tadc     r10, rdx\n.26:mov     rax, [rsi+r11*8+8]\n\tmul     r12\n\tadd     rbp, rax\n\tmov     rax, [rsi+r11*8+8]\n\tadc     r10, rdx\n\tadc     ebx, 0\n\tmul     r9\n\tadd     [rdi+r11*8+8], rbp\n\tadc     r10, rax\n\tadc     rbx, rdx\n.27:mov     rax, [rsi+r11*8+16]\n\tmul     r12\n\tadd     r10, rax\n\tmov     rax, [rsi+r11*8+16]\n\tadc     rbx, rdx\n\tadc     ecx, 0\n\tmul     r9\n\tadd     [rdi+r11*8+16], r10\n\tnop ; < not translated >\n\tadc     rbx, rax\n\tmov     ebp, 0\n\tmov     rax, [rsi+r11*8+24]\n\tadc     rcx, rdx\n.28:mul     r12\n\tadd     r11, 4\n\tjnz     .24\n\tadd     rbx, rax\n\tadc     rcx, rdx\n\tadc     ebp, 0\n\tadd     [rdi-8], rbx\n\tadc     [rdi], rcx\n\tadc     [rdi+8], rbp\n\tsub     r8, 2\n\tjz      .40\n\tlea     r15, [r15+16]\n\tlea     rsi, [rsi-16]\n\tmov     r11, r13\n\tmov     r12, [r15]\n\tmov     r9, [r15+8]\n\tjmp     r14\n\n\txalign  16\n.29:xor     ebx, ebx\n\txor     ecx, ecx\n\txor     ebp, ebp\n\tneg     r13\n\tmov     eax, r8d\n\tand     eax, 3\n\tjz      .31\n\tcmp     eax, 2\n\tjc      .32\n\tjz      .33\n.30:lea     r15, [r15-8]\n\tmov     r10, r15\n\tadd     r8, 1\n\tmov     r11, r8\n\tlea     r14, [rel .36]\n\tjmp     .36\n.31:mov     r10, r15\n\tmov     r11, r8\n\tlea     r14, [rip+0]\n\tmov     rax, [rsi+r11*8-8]\n\tjmp     .35\n.32:lea     r15, [r15+8]\n\tmov     r10, r15\n\tadd     r8, 3\n\tmov     r11, r8\n\tlea     r14, [rip+0]\n\tmov     rax, [r10-8]\n\tjmp     .38\n.33:lea     r15, [r15-16]\n\tmov     r10, r15\n\tadd     r8, 2\n\tmov     r11, r8\n\tlea     r14, [rip+0]\n\tmov     rax, [r10+16]\n\tjmp     .37\n\n\txalign  16\n.34:add     rbx, rax\n\tadc     rcx, rdx\n\tmov     rax, [rsi+r11*8-8]\n\tadc     rbp, 0\n.35:mul     qword [r10]\n\tadd     rbx, rax\n\tadc     rcx, rdx\n\tadc     rbp, 0\n.36:mov     rax, [rsi+r11*8-16]\n\tmul     qword [r10+8]\n\tadd     rbx, rax\n\tmov     rax, [r10+16]\n\tadc     rcx, rdx\n\tadc     rbp, 0\n.37:mul     qword [rsi+r11*8-24]\n\tadd     rbx, rax\n\tmov     rax, [r10+24]\n\tadc     rcx, rdx\n\tlea     r10, [r10+32]\n\tadc     rbp, 0\n.38:mul     qword [rsi+r11*8-32]\n\tsub     r11, 4\n\tjnz     .34\n\tadd     rbx, rax\n\tadc     rcx, rdx\n\tadc     rbp, 0\n\tmov     [rdi+r13*8], rbx\n\tinc     r13\n\tjz      .39\n\tmov     r11, r8\n\tmov     r10, r15\n\tlea     rsi, [rsi+8]\n\tmov     rbx, rcx\n\tmov     rcx, rbp\n\txor     ebp, ebp\n\tjmp     r14\n.39:mov     [rdi], rcx\n\tmov     [rdi+8], rbp\n.40:WIN64_GCC_END\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/add_err1_n.asm",
    "content": "; PROLOGUE(mpn_add_err1_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_add_err1(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_ptr, mp_size_t, mp_limb_t)\n;  rax                       rdi     rsi     rdx     rcx       r8         r9     8(rsp)\n;  rax                       rcx     rdx      r8      r9 [rsp+40]   [rsp+48]   [rsp+56]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    FRAME_PROC mpn_add_err1_n, 0, reg_save_list\n\tmov     [rsp+stack_use+32], r9\n    movsxd  r9, dword [rsp+stack_use+48]\n\tmov     r10, [rsp++stack_use+56]\n\tlea     rdi, [rcx+r9*8-24]\n\tlea     rsi, [rdx+r9*8-24]\n\tlea     rdx, [r8+r9*8-24]\n\tmov     r8, [rsp+stack_use+40]\n\t\n\tmov     r11, 3\n\tshl     r10, 63\n\tlea     r8, [r8+r9*8]\n\tsub     r11, r9\n\tmov     r9, 0\n\tmov     rax, 0\n\tmov     rbx, 0\n\tjnc     .2\n\n\txalign  16\n.1:\tmov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tmov     r14, [rsi+r11*8+16]\n\tmov     r15, [rsi+r11*8+24]\n\tmov     rbp, 0\n\tshl     r10, 1\n\tadc     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tadc     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\tmov     rcx, 0\n\tadc     r14, [rdx+r11*8+16]\n\tcmovc   rcx, [r8-24]\n\tadc     r15, [rdx+r11*8+24]\n\tcmovc   rbp, [r8-32]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tadd     r9, rcx\n\tmov     rax, 0\n\tadc     r10, 0\n\tlea     r8, [r8-32]\n\tadd     r9, rbp\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tmov     [rdi+r11*8+16], r14\n\tmov     [rdi+r11*8+24], r15\n\tmov     rbx, 0\n\tadd     r11, 4\n\tjnc     .1\n.2: cmp     r11, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3: mov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tmov     r14, [rsi+r11*8+16]\n\tshl     r10, 1\n\tadc     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tadc     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\tmov     rcx, 0\n\tadc     r14, [rdx+r11*8+16]\n\tcmovc   rcx, [r8-24]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tadd     r9, rcx\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tmov     [rdi+r11*8+16], r14\n    jmp     .6\n    \n\txalign  16\n.4: mov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tshl     r10, 1\n\tadc     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tadc     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tjmp     .6\n\n\txalign  16\n.5: mov     r12, [rsi+r11*8]\n\tshl     r10, 1\n\tadc     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n.6:\tmov     rcx, [rsp+stack_use+32]\n\tmov     [rcx], r9\n\tbtr     r10, 63\n\tmov     [rcx+8], r10\n    mov     rax, 0\n\tadc     rax, 0\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/add_n.asm",
    "content": "; PROLOGUE(mpn_add_n)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_add_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_add_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define TR2 r10\n%define TR4 r10\n\n    CPU  Core2\n    BITS 64\n\n\tLEAF_PROC mpn_add_nc\n    mov     r10, [rsp+0x28]\n    jmp     mpn_add_entry\n\n\tLEAF_PROC mpn_add_n\n\txor     r10, r10\n\nmpn_add_entry:\n\tmov     rax, r9\n\tmov     r9, rcx\n\tmov     rcx, rax\n\tand     rax, 3\n\tsub     rcx, rax\n\tlea     r9, [r9+rcx*8]\n\tlea     rdx, [rdx+rcx*8]\n\tlea     r8, [r8+rcx*8]\n\tneg     rcx\n\tlea     rcx, [r10+rcx*2]\n\tsar     rcx, 1\n\tjz      .2\n\n\txalign  16\n.1:\tmov     r10, [rdx+rcx*8]\n\tmov     r11, [rdx+rcx*8+16]\n\tadc     r10, [r8+rcx*8]\n\tmov     [r9+rcx*8], r10\n\tmov     TR2, [rdx+rcx*8+8]\n\tadc     TR2, [r8+rcx*8+8]\n\tmov     [r9+rcx*8+8], TR2\n\tlea     rcx, [rcx+4]\n\tmov     TR4, [rdx+rcx*8-8]\n\tadc     r11, [r8+rcx*8-16]\n\tadc     TR4, [r8+rcx*8-8]\n\tmov     [r9+rcx*8-16], r11\n\tmov     [r9+rcx*8-8], TR4\n\tjrcxz   .2\n\tjmp     .1\n.2:\tsbb     rcx, rcx\n.3:\tcmp     rax, 2\n\tja      .6\n\tjz      .7\n\tjp      .5\n.4:\tsub     rax, rcx\n\tret\n\t\n\txalign  16\n.5:\tadd     rcx, rcx\n\tmov     r10, [rdx]\n\tadc     r10, [r8]\n\tmov     [r9], r10\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\t\n\txalign  16\n.6:\tadd     rcx, rcx\n\tmov     r10, [rdx]\n\tmov     r11, [rdx+16]\n\tadc     r10, [r8]\n\tmov     [r9], r10\n\tmov     TR2, [rdx+8]\n\tadc     TR2, [r8+8]\n\tmov     [r9+8], TR2\n\tadc     r11, [r8+16]\n\tmov     [r9+16], r11\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\t\n\txalign  16\n.7:\tadd     rcx, rcx\n\tmov     r10, [rdx]\n\tadc     r10, [r8]\n\tmov     [r9], r10\n\tmov     TR2, [rdx+8]\n\tadc     TR2, [r8+8]\n\tmov     [r9+8], TR2\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/addadd_n.asm",
    "content": "; PROLOGUE(mpn_addadd_n)\n;        \n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n        CPU  Nehalem\n        BITS 64\n\n        FRAME_PROC mpn_addadd_n, 0, reg_save_list\n        mov     rbx, [rsp+stack_use+40]\n        xor     rax, rax\n        mov     r10d, 3\n        sub     r10, rbx\n        lea     rdi, [rcx+rbx*8-24]\n        lea     rsi, [rdx+rbx*8-24]\n        lea     rdx, [r8+rbx*8-24]\n        lea     rcx, [r9+rbx*8-24]\n        mov     r9, rax\n        jnc     .2\n\n        align   16\n.1:     sahf    \n        mov     r8, [rdx+r10*8]\n        adc     r8, [rcx+r10*8]\n        mov     rbx, [rdx+r10*8+8]\n        adc     rbx, [rcx+r10*8+8]\n        mov     r11, [rdx+r10*8+24]\n        mov     rbp, [rdx+r10*8+16]\n        adc     rbp, [rcx+r10*8+16]\n        adc     r11, [rcx+r10*8+24]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi+r10*8]\n        adc     rbx, [rsi+r10*8+8]\n        mov     [rdi+r10*8], r8\n        adc     rbp, [rsi+r10*8+16]\n        adc     r11, [rsi+r10*8+24]\n        setc    r9b\n        mov     [rdi+r10*8+24], r11\n        mov     [rdi+r10*8+16], rbp\n        mov     [rdi+r10*8+8], rbx\n        add     r10, 4\n        jnc     .1\n.2:     cmp     r10, 2\n        jg      .6\n        je      .5\n        jp      .4\n.3:     sahf    \n        mov     r8, [rdx]\n        adc     r8, [rcx]\n        mov     rbx, [rdx+8]\n        adc     rbx, [rcx+8]\n        mov     rbp, [rdx+16]\n        adc     rbp, [rcx+16]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi]\n        adc     rbx, [rsi+8]\n        mov     [rdi], r8\n        adc     rbp, [rsi+16]\n        setc    r9b\n        mov     [rdi+16], rbp\n        mov     [rdi+8], rbx\n        sahf    \n        mov     eax, 0\n        adc     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        EXIT_PROC reg_save_list\n\n.4:     sahf    \n        mov     r8, [rdx+8]\n        adc     r8, [rcx+8]\n        mov     rbx, [rdx+16]\n        adc     rbx, [rcx+16]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi+8]\n        adc     rbx, [rsi+16]\n        mov     [rdi+8], r8\n        setc    r9b\n        mov     [rdi+16], rbx\n        sahf    \n        mov     eax, 0\n        adc     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        EXIT_PROC reg_save_list\n\n.5:     sahf    \n        mov     r8, [rdx+16]\n        adc     r8, [rcx+16]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi+16]\n        mov     [rdi+16], r8\n        setc    r9b\n.6:     sahf    \n        mov     eax, 0\n        adc     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        END_PROC reg_save_list\n\n        end"
  },
  {
    "path": "mpn/x86_64w/nehalem/addlsh1_n.asm",
    "content": "; PROLOGUE(mpn_addlsh1_n)\n\n;  Copyright 2008 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addlsh1_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi\n\n    CPU  Core2\n    BITS 64\n\n    FRAME_PROC mpn_addlsh1_n, 0, reg_save_list\n    lea     rdx, [rdx+r9*8]\n    lea     r8, [r8+r9*8]\n    lea     rcx, [rcx+r9*8]\n    neg     r9\n    xor     rsi, rsi\n    xor     rax, rax\n    test    r9, 3\n    jz      .2\n.1:\tmov     rdi, [r8+r9*8]\n    add     rsi, 1\n    adc     rdi, rdi\n    sbb     rsi, rsi\n    add     rax, 1\n    adc     rdi, [rdx+r9*8]\n    sbb     rax, rax\n    mov     [rcx+r9*8], rdi\n    add     r9, 1           ; ***\n    test    r9, 3\n    jnz     .1\n.2: cmp     r9, 0\n    jz      .4\n\n    xalign  16\n.3: mov     rdi, [r8+r9*8]\n    mov     rbx, [r8+r9*8+8]\n    mov     r10, [r8+r9*8+16]\n    mov     r11, [r8+r9*8+24]\n    add     rsi, 1\n    adc     rdi, rdi\n    adc     rbx, rbx\n    adc     r10, r10\n    adc     r11, r11\n    sbb     rsi, rsi\n    add     rax, 1\n    adc     rdi, [rdx+r9*8]\n    adc     rbx, [rdx+r9*8+8]\n    adc     r10, [rdx+r9*8+16]\n    adc     r11, [rdx+r9*8+24]\n    sbb     rax, rax\n    mov     [rcx+r9*8], rdi\n    mov     [rcx+r9*8+8], rbx\n    mov     [rcx+r9*8+16], r10\n    mov     [rcx+r9*8+24], r11\n    add     r9, 4\n    jnz     .3\n.4: add     rax, rsi\n    neg     rax\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/addlsh_n.asm",
    "content": "; PROLOGUE(mpn_addlsh_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_addlsh_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint, mp_limb_t)\n;  mp_limb_t mpn_addlsh_nc(mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                        rdi     rsi     rdx        rcx       r8         r9\n;  rax                        rcx     rdx      r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define reg_save_list   rbx, rsi, rdi, r12\n\n\tFRAME_PROC mpn_addlsh_n, 0, reg_save_list\n\tmov     rax, r9\n\tlea     rdi, [rcx+rax*8-32]\n\tlea     rsi, [rdx+rax*8-32]\n\tlea     rdx, [r8+rax*8-32]\n\tmov     rcx, rax\n\tmov     r8d, dword [rsp+stack_use+40]\n\n\tmov     rbx, 4\n\tsub     rbx, rcx\n\tmov     rcx, 64\n\tsub     rcx, r8\n\tmov     r12, 0\n\tmov     rax, 0\n\tmov     r8, [rdx+rbx*8]\n\tcmp     rbx, 0\n\tjge     .2\n\n\txalign  16\n.1:\tmov     r9, [rdx+rbx*8+8]\n\tmov     r10, [rdx+rbx*8+16]\n\tmov     r11, [rdx+rbx*8+24]\n\tshrd    r12, r8, cl\n\tshrd    r8, r9, cl\n\tshrd    r9, r10, cl\n\tshrd    r10, r11, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     r12, r11\n\tmov     [rdi+rbx*8+8], r8\n\tadc     r9, [rsi+rbx*8+16]\n\tadc     r10, [rsi+rbx*8+24]\n\tmov     [rdi+rbx*8+24], r10\n\tmov     [rdi+rbx*8+16], r9\n\tlahf\n\tmov     r8, [rdx+rbx*8+32]\n\tadd     rbx, 4\n\tjnc     .1\n\n\txalign  16\n.2:\tcmp     rbx, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tshrd    r12, r8, cl\n\tmov     r9, [rdx+rbx*8+8]\n\tmov     r10, [rdx+rbx*8+16]\n\tmov     r11, [rdx+rbx*8+24]\n\tshrd    r8, r9, cl\n\tshrd    r9, r10, cl\n\tshrd    r10, r11, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     r12, r11\n\tmov     [rdi+rbx*8+8], r8\n\tadc     r9, [rsi+rbx*8+16]\n\tadc     r10, [rsi+rbx*8+24]\n\tmov     [rdi+rbx*8+24], r10\n\tmov     [rdi+rbx*8+16], r9\n\tlahf\n\tshr     r12, cl\n\tsahf\n\tadc     r12, 0\n\tmov     rax, r12\n\tEXIT_PROC reg_save_list\n\n.4:\tshrd    r12, r8, cl\n\tmov     r9, [rdx+rbx*8+8]\n\tshrd    r8, r9, cl\n\tmov     r10, [rdx+rbx*8+16]\n\tshrd    r9, r10, cl\n\tshr     r10, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     rax, 0\n\tmov     [rdi+rbx*8+8], r8\n\tadc     r9, [rsi+rbx*8+16]\n\tadc     rax, r10\n\tmov     [rdi+rbx*8+16], r9\n\tEXIT_PROC reg_save_list\n\n.5:\tshrd    r12, r8, cl\n\tmov     r9, [rdx+rbx*8+8]\n\tshrd    r8, r9, cl\n\tshr     r9, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, [rsi+rbx*8+8]\n\tmov     rax, 0\n\tmov     [rdi+rbx*8+8], r8\n\tadc     rax, r9\n\tEXIT_PROC reg_save_list\n\n.6:\tshrd    r12, r8, cl\n\tshr     r8, cl\n\tsahf\n\tadc     r12, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r12\n\tadc     r8, 0\n\tmov     rax, r8\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/addmul_1.asm",
    "content": "; PROLOGUE(mpn_addmul_1)\n\n;  Copyright 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  Copyright 2008, 2009 Brian Gladman\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n; CREDITS\n;\n; The code used here is derived from that provided by ct35z at:\n;\n;    http://www.geocities.jp/ct35z/gmp-core2-en.html\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Jason Worth Martin's excellent assembly support for the Intel64\n; architecture has been used where appropriate.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; Adapted for use with VC++ and YASM using a special mode in which NASM\n; preprocessing is used with AT&T assembler syntax. I am very grateful\n; for the support that Peter Johnson (one of the authors of YASM) has\n; provided in getting this special YASM mode working.  Without his\n; support this port would have been a great deal more difficult.\n;\n; The principle issues that I have had to address is the difference\n; between GCC and MSVC in their register saving and parameter passing\n; conventions.  Registers that have to be preserved across function\n; calls are:\n;\n; GCC:             rbx, rbp, r12..r15\n; MSVC:  rsi, rdi, rbx, rbp, r12..r15 xmm6..xmm15\n;\n; Parameter passing conventions for non floating point parameters:\n;\n;   function(   GCC     MSVC\n;       p1,     rdi      rcx\n;       p2,     rsi      rdx\n;       p3,     rdx       r8\n;       p4,     rcx       r9\n;       p5,      r8 [rsp+40]\n;       p6,      r9 [rsp+48]\n;\n; Care must be taken with 32-bit values in 64-bit register or on the\n; stack because the upper 32-bits of such parameters are undefined.\n;\n;       Brian Gladman\n;\n; Calculate src[size] multiplied by mult[1] and add to /subtract from dst[size] and\n; return the carry or borrow from the top of the result\n;\n; BPL is bytes per limb, which is 8 in the 64-bit code here\n;\n;  mp_limb_t mpn_addmul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_inclsh_n(mp_ptr, mp_ptr, mp_size_t,   mp_uint)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8        r9d\n\n%define BPL                 8\n%define UNROLL_EXPONENT     4\n%define UNROLL_SIZE         (1 << UNROLL_EXPONENT)\n%define UNROLL_MASK         (UNROLL_SIZE - 1)\n%define ADDR(p,i,d)         (d*BPL)(p, i, BPL)\n\n; Register  Usage\n; --------  -----\n; rax    low word from mul\n; rbx\n; rcx    s2limb\n; rdx    high word from mul\n; rsi    s1p\n; rdi    rp\n; rbp    Base Pointer\n; rsp    Stack Pointer\n; r8     A_x\n; r9     A_y\n; r10    A_z\n; r11    B_x\n; r12    B_y\n; r13    B_z\n; r14    temp\n; r15    index\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list       rsi, rdi, r12, r13, r14, r15\n\n%define s2limb  rcx\n%define s1p     rsi\n%define rp      rdi\n%define a_x      r8\n%define a_y      r9\n%define a_z     r10\n%define b_x     r11\n%define b_y     r12\n%define b_z     r13\n%define temp    r14\n%define index   r15\n\n    CPU  Core2\n    BITS 64\n\n    LEAF_PROC mpn_addmul_1\n    xor     a_z, a_z\n    jmp     entry\n\n    LEAF_PROC mpn_addmul_1c\n    mov     a_z, [rsp+0x28]\nentry:\n    FRAME_PROC ?mpn_nehalem_addmul, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    xor     rdx, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n\n    lea     s1p, [s1p+rdx*8]\n    lea     rp, [rp+rdx*8]\n    xor     index, index\n    sub     index, rdx\n    cmp     rdx, 4\n    jge     .6\n    lea     rax, [rel .1]\n    add     rax, [rax+rdx*8]\n    jmp     rax\n\n    xalign  8\n.1:\tdq      .2 - .1\n    dq      .3 - .1\n    dq      .4 - .1\n    dq      .5 - .1\n.2:\tmov     rax, a_z\n    EXIT_PROC reg_save_list\n\n.3:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    add      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8], a_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.4:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    add      a_z, a_x\n    adc     rax, a_y\n    mov     [rp+index*8], a_z\n    adc     rdx, 0\n    add      b_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+8], b_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.5:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+16]\n    mul     s2limb\n    add      a_z, a_x\n    adc     b_x, a_y\n    mov     [rp+index*8], a_z\n    mov     a_z, [rp+index*8+16]\n    adc     b_y, 0\n    add      b_z, b_x\n    adc     rax, b_y\n    mov     [rp+index*8+8], b_z\n    adc     rdx, 0\n    add      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+16], a_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.6:\tmov     temp, rdx\n    test    rdx, 1\n    jz      .7\n    mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    jmp     .8\n.7:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     b_z, [rp+index*8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     a_z, [rp+index*8+8]\n    mov     a_x, rax\n    mov     a_y, rdx\n.8:\tsub     temp, 4\n    and     temp, UNROLL_MASK\n    inc     temp\n    mov     rax, (.10 - .9) >> UNROLL_EXPONENT\n    mul     temp\n    lea     rdx, [rel .10]\n    sub     rdx, rax\n    mov     rax, [s1p+index*8+16]\n    lea     index, [index+temp+3-UNROLL_SIZE]\n    jmp     rdx\n\n%macro seq_1 7\n    mul     s2limb\n    %7      %3, %1\n    lea     %1, [rax]\n    mov     rax, [byte s1p+index*8+8*%6]\n    adc     %4, %2\n    mov     [byte rp+index*8+8*(%6-3)], %3\n    mov     %3, [byte rp+index*8+8*(%6-1)]\n    lea     %2, [rdx]\n    adc     %5, 0\n%endmacro\n\n   xalign 16\n.9:\n%assign i 0\n%rep    16\n    %if (i & 1)\n        seq_1   b_x, b_y, b_z, a_x, a_y, i, add\n    %else\n        seq_1   a_x, a_y, a_z, b_x, b_y, i, add\n    %endif\n%assign i i + 1\n%endrep\n.10:add     index, UNROLL_SIZE\n    jnz     .9\n.11:mul     s2limb\n    add      a_z, a_x\n    mov     [rp+index*8-24], a_z\n    mov     a_z, [rp+index*8-8]\n    adc     b_x, a_y\n    adc     b_y, 0\n    add      b_z, b_x\n    mov     [rp+index*8-16], b_z\n    adc     rax, b_y\n    adc     rdx, 0\n    add      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8-8], a_z\n    adc     rax, rdx\n.12:END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/addmul_2.asm",
    "content": "; PROLOGUE(mpn_addmul_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.2 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addmul_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n\tFRAME_PROC mpn_addmul_2, 0, reg_save_list\n\tmov     rdi, rcx\n\tmov     rsi, rdx\n\tmov     rax, r8\n\t\n\tmov     rcx, [r9]\n\tmov     r8, [r9+8]\n\tmov     rbx, 4\n\tsub     rbx, rax\n\tlea     rsi, [rsi+rax*8-32]\n\tlea     rdi, [rdi+rax*8-32]\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1: mov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tcmp     rbx, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6: add     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/addsub_n.asm",
    "content": "; PROLOGUE(mpn_addsub_n)\n;        \n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addsub_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n        CPU  Nehalem\n        BITS 64\n\n        FRAME_PROC mpn_addsub_n, 0, reg_save_list\n        mov     rbx, [rsp+stack_use+40]\n        xor     rax, rax\n        mov     r10d, 3\n        sub     r10, rbx\n        lea     rdi, [rcx+rbx*8-24]\n        lea     rsi, [rdx+rbx*8-24]\n        lea     rdx, [r8+rbx*8-24]\n        lea     rcx, [r9+rbx*8-24]\n        mov     r9, rax\n        jnc     .2\n\n        align   16\n.1:     sahf    \n        mov     r8, [rdx+r10*8]\n        sbb     r8, [rcx+r10*8]\n        mov     rbx, [rdx+r10*8+8]\n        sbb     rbx, [rcx+r10*8+8]\n        mov     r11, [rdx+r10*8+24]\n        mov     rbp, [rdx+r10*8+16]\n        sbb     rbp, [rcx+r10*8+16]\n        sbb     r11, [rcx+r10*8+24]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi+r10*8]\n        adc     rbx, [rsi+r10*8+8]\n        mov     [rdi+r10*8], r8\n        adc     rbp, [rsi+r10*8+16]\n        adc     r11, [rsi+r10*8+24]\n        setc    r9b\n        mov     [rdi+r10*8+24], r11\n        mov     [rdi+r10*8+16], rbp\n        mov     [rdi+r10*8+8], rbx\n        add     r10, 4\n        jnc     .1\n.2:     \n        cmp     r10, 2\n        jg      .6\n        je      .5\n        jp      .4\n.3:     sahf    \n        mov     r8, [rdx]\n        sbb     r8, [rcx]\n        mov     rbx, [rdx+8]\n        sbb     rbx, [rcx+8]\n        mov     rbp, [rdx+16]\n        sbb     rbp, [rcx+16]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi]\n        adc     rbx, [rsi+8]\n        mov     [rdi], r8\n        adc     rbp, [rsi+16]\n        setc    r9b\n        mov     [rdi+16], rbp\n        mov     [rdi+8], rbx\n        sahf    \n        mov     eax, 0\n        sbb     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        EXIT_PROC reg_save_list\n\n.4:     sahf    \n        mov     r8, [rdx+8]\n        sbb     r8, [rcx+8]\n        mov     rbx, [rdx+16]\n        sbb     rbx, [rcx+16]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi+8]\n        adc     rbx, [rsi+16]\n        mov     [rdi+8], r8\n        setc    r9b\n        mov     [rdi+16], rbx\n        sahf    \n        mov     eax, 0\n        sbb     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        EXIT_PROC reg_save_list\n\n.5:     sahf    \n        mov     r8, [rdx+16]\n        sbb     r8, [rcx+16]\n        lahf    \n        add     r9b, 255\n        adc     r8, [rsi+16]\n        mov     [rdi+16], r8\n        setc    r9b\n.6:     sahf    \n        mov     eax, 0\n        sbb     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        END_PROC reg_save_list\n\n        end"
  },
  {
    "path": "mpn/x86_64w/nehalem/and_n.asm",
    "content": "; PROLOGUE(mpn_and_n)\n\n;  mpn_and_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_and_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rcx        rdx         r8         r9\n;                    rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_and_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpand    xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tand     rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tand     rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/andn_n.asm",
    "content": "; PROLOGUE(mpn_andn_n)\n\n;  mpn_andn_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_andn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n    \n\tLEAF_PROC mpn_andn_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpandn   xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpandn   xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpandn   xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tand     rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpandn   xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tnot     rax\n\tand     rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/com_n.asm",
    "content": "; PROLOGUE(mpn_com_n)\n\n;  mpn_com_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_com_n(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi,    rsi,       rdx\n;                    rcx,    rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_com_n\n\tmov     r9, 3\n\tlea     rdx, [rdx+r8*8-24]\n\tpcmpeqb xmm2, xmm2\n\tsub     r9, r8\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [rdx+r9*8]\n\tmovdqu  xmm1, [rdx+r9*8+16]\n\tpxor    xmm0, xmm2\n\tadd     r9, 4\n\tpxor    xmm1, xmm2\n\tmovdqu  [rcx], xmm0\n\tmovdqu  [rcx+16], xmm1\n\tlea     rcx, [rcx+32]\n\tjnc     .1\n.2:\tcmp     r9, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmovdqu  xmm0, [rdx+r9*8]\n\tmov     rax, [rdx+r9*8+16]\n\tpxor    xmm0, xmm2\n\tnot     rax\n\tmovdqu  [rcx], xmm0\n\tmov     [rcx+16], rax\n\tret\n\n.4:\tmovdqu  xmm0, [rdx+r9*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx], xmm0\n\tret\n\n.5:\tmov     rax, [rdx+r9*8]\n\tnot     rax\n\tmov     [rcx], rax\n.6:\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/copyd.asm",
    "content": "; PROLOGUE(mpn_copyd)\n\n;  mpn_copyd\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_copyi(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi,    rsi,       rdx\n;                    rcx,    rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_copyd\n\tlea     rdx, [rdx+16]\n\tlea     rcx, [rcx+16]\n\tsub     r8, 4\n\tjc      .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [rdx+r8*8]\n\tmovdqu  xmm1, [rdx+r8*8-16]\n\tsub     r8, 4\n\tmovdqu  [rcx+r8*8-16+32], xmm1\n\tmovdqu  [rcx+r8*8+32], xmm0\n\tjnc     .1\n.2:\tcmp     r8, -2\n\tjg      .5\n\tje      .6\n\tjnp     .4\n.3:\tmov     rax, [rdx+r8*8+8]\n\tmov     [rcx+r8*8+8], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [rdx+r8*8]\n\tmov     rax, [rdx+r8*8-8]\n\tmov     [rcx+r8*8-8], rax\n\tmovdqu  [rcx+r8*8], xmm0\n\tret\n\n.6:\tmovdqu  xmm0, [rdx+r8*8]\n\tmovdqu  [rcx+r8*8], xmm0\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/copyi.asm",
    "content": "; PROLOGUE(mpn_copyi)\n\n;  mpn_copyi\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_copyi(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi,    rsi,       rdx\n;                    rcx,    rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_copyi\n\tmov     r9, 3\n\tlea     rdx, [rdx+r8*8-24]\n\tlea     rcx, [rcx+r8*8-24]\n\tsub     r9, r8\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [rdx+r9*8]\n\tmovdqu  xmm1, [rdx+r9*8+16]\n\tadd     r9, 4\n\tmovdqu  [rcx+r9*8+16-32], xmm1\n\tmovdqu  [rcx+r9*8-32], xmm0\n\tjnc     .1\n.2:\tcmp     r9, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmovdqu  xmm0, [rdx+r9*8]\n\tmov     rax, [rdx+r9*8+16]\n\tmov     [rcx+r9*8+16], rax\n\tmovdqu  [rcx+r9*8], xmm0\n\tret\n\n.4:\tmovdqu  xmm0, [rdx+r9*8]\n\tmovdqu  [rcx+r9*8], xmm0\n\tret\n\n.5:\tmov     rax, [rdx+r9*8]\n\tmov     [rcx+r9*8], rax\n.6:\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/divexact_byff.asm",
    "content": "; PROLOGUE(mpn_divexact_byff)\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conver11on Copyright 2009 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divexact_byff(mp_ptr, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi         rdx\n;  rax                           rcx     rdx          r8\n\n%include \"yasm_mac.inc\"\n\n        CPU  nehalem\n        BITS 64\n\n        LEAF_PROC mpn_divexact_byff\n        mov     r11, rdx\n        mov     r10d, 3\n        lea     r11, [r11+r8*8-24]\n        lea     rcx, [rcx+r8*8-24]\n        sub     r10, r8\n        mov     r8, 0\n        mov\t    r9, 1\n        jnc     .2\n        \n        align   16\n.1:     mov     rax, [r11+r10*8]\n\t    mul     r9\n\t    sub     r8, rax\n\t    mov     [rcx+r10*8], r8\n\t    sbb     r8, rdx\n\t    mov     rax, [r11+r10*8+8]\n\t    mul     r9\n\t    sub     r8, rax\n\t    mov     [rcx+r10*8+8], r8\n\t    sbb     r8, rdx\n\t    mov     rax, [r11+r10*8+16]\n\t    mul     r9\n\t    sub     r8, rax\n\t    mov     [rcx+r10*8+16], r8\n\t    sbb     r8, rdx\n\t    mov     rax, [r11+r10*8+24]\n\t    mul     r9\n\t    sub     r8, rax\n\t    mov     [rcx+r10*8+24], r8\n\t    sbb     r8, rdx\n\t    add     r10, 4\n\t    jnc     .1\n.2:     test    r10, 2\n        jnz     .3\n\t    mov     rax, [r11+r10*8]\n\t    mul     r9\n\t    sub     r8, rax\n\t    mov     [rcx+r10*8], r8\n\t    sbb     r8, rdx\n\t    mov     rax, [r11+r10*8+8]\n\t    mul     r9\n\t    sub     r8, rax\n\t    mov     [rcx+r10*8+8], r8\n\t    sbb     r8, rdx\n\t    add     r10, 2\n.3:     test    r10, 1\n        jnz     .4\n\t    mov     rax, [r11+r10*8]\n\t    mul     r9\n\t    sub     r8, rax\n\t    mov     [rcx+r10*8], r8\n\t    sbb     r8, rdx\n.4:     mov\t    rcx,0xFFFFFFFFFFFFFFFF\n        imul    r8, rcx\n        mov     rax, r8\n        neg     rax\n        ret\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/divrem_hensel_qr_1_2.asm",
    "content": "; PROLOGUE(mpn_divrem_hensel_qr_1_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  This file is part of the MPIR Library.\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divrem_hensel_qr_1_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                                   rdi     rsi        rdx        rcx\n;  rax                                   rcx     rdx         r8         r9\n\t\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_divrem_hensel_qr_1_2, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8-8]\n\tlea     rsi, [rdx+rax*8-8]\n    mov     rcx, r9\n\tmov     rdx, r9\n\tmov     r9, 1\n\tsub     r9, rax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, r11\n\tmov     r12, r11\n\tmul     rcx\n\tneg     rdx\n\timul    r12, rdx\n\n\tmov     r13, r11\n\tmov     r14, r12\n\n\tmov     r11, [rsi+r9*8]\n\tmov     r12, [rsi+r9*8+8]\n\tmov     r10, 0\n\tadd     r9, 2\n\tjc      .2\n\t\n\txalign  16\n.1:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\tmov     [rdi+r9*8-16], rax\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tmov     r11, [rsi+r9*8]\n\tmov     r12, [rsi+r9*8+8]\n\tmov     [rdi+r9*8-8], rdx\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r12, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, 0\n\tadd     r9, 2\n\tjnc     .1\n.2:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\tmov     [rdi+r9*8-16], rax\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tcmp     r9, 0\n\tjne     .4\n.3:\tmov     r11, [rsi+r9*8]\n\tmov     [rdi+r9*8-8], rdx\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r10, 0\n\tmov     rax, r11\n\timul    rax, r13\n\tmov     [rdi+r9*8], rax\n\tmul     rcx\n\tadd     r10, r10\n\tmov     rax, 0\n\tadc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.4:\tmov     [rdi+r9*8-8], rdx\n\tmov     rax, rcx\n\tmul     rdx\n\tcmp     r8, rax\n\tmov     rax, 0\n\tadc     rax, rdx\n\tsub     rax, r10\n.5:\tEND_PROC reg_save_list\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/hamdist.asm",
    "content": "; PROLOGUE(mpn_hamdist)\n\n;  AMD64 mpn_hamdist\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmp_limb_t mpn_hamdist(mp_ptr, mp_ptr, mp_size_t)\n;\trax                      rdi,    rsi,       rdx\n;\trax                      rcx,    rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_hamdist\n\tlea     rdx, [rdx+r8*8-8]\n\tlea     r9, [rcx+r8*8-8]\n\tmov     rcx, 1\n\txor     eax, eax\n\tsub     rcx, r8\n\tjnc     .1\n\txalign  16\n.0:\tmov     r10, [r9+rcx*8]\n\txor     r10, [rdx+rcx*8]\n\tpopcnt  r10, r10\n\tmov     r11, [r9+rcx*8+8]\n\txor     r11, [rdx+rcx*8+8]\n\tpopcnt  r11, r11\n\tadd     rax, r10\n\tadd     rax, r11\n\tadd     rcx, 2\n\tjnc     .0\n.1: jne     .2\n\tmov     r10, [r9+rcx*8]\n\txor     r10, [rdx+rcx*8]\n\tpopcnt  r10, r10\n\tadd     rax, r10\n.2:\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/ior_n.asm",
    "content": "; PROLOGUE(mpn_ior_n)\n\n;  mpn_ior_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_ior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rcx        rdx         r8         r9\n;                    rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_ior_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpor     xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tor      rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tor      rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/iorn_n.asm",
    "content": "; PROLOGUE(mpn_iorn_n)\n\n;  mpn_iorn_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_iorn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_iorn_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [rdx+r10*8]\n\tmovdqu  xmm1, [rdx+r10*8+16]\n\tmovdqu  xmm2, [r8+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [r8+r10*8+16-32]\n\tpandn   xmm1, xmm3\n\tpandn   xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [rdx+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [r8+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\tnot     rax\n\tor      rax, r9\n\tpandn   xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [rdx+r10*8]\n\tmovdqu  xmm2, [r8+r10*8]\n\tpandn   xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tnot     rax\n\tor      rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/karaadd.asm",
    "content": "; PROLOGUE(mpn_karaadd)\n;  mpn_karaadd  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karaadd(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n    BITS 64\n    TEXT\n\n;   requires n >= 8  \n        FRAME_PROC mpn_karaadd, 1, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n;rp is rdi  \n;tp is rsi  \n;n is rdx and put it on the stack  \n        shr     rdx, 1\n;n2 is rdx  \n        lea     rcx, [rdx+rdx*1]\n; 2*n2 is rcx  \n; L is rdi  \n; H is rbp  \n; tp is rsi  \n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n; rax rbx are the carrys  \n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+rcx*8]\n        adc     r9, [rsi+rcx*8+8]\n        adc     r10, [rsi+rcx*8+16]\n        adc     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        adc     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .6\n        jz      .4\n        jp      .3\n.2:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi]\n        adc     r9, [rsi+8]\n        adc     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n.3:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+8]\n        adc     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n.4:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        adc     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        add     rdx, 1\n        mov     [rbp+rcx*8], r12\n.5:     mov     rcx, 3\n.6:     \n        mov     r8, [rsp]\n        bt      r8, 0\n        jnc     .8\n        xor     r10, r10\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        add     r8, [rsi+rdx*8]\n        adc     r9, [rsi+rdx*8+8]\n        rcl     r10, 1\n        add     [rbp+24], r8\n        adc     [rbp+32], r9\n        adc     [rbp+40], r10\n.7:     adc     qword[rbp+rcx*8+24], 0\n        lea     rcx, [rcx + 1]\n        jc      .7\n        mov     rcx, 3\n.8:     and     rax, 3\n        popcnt  r8, rax\n        bt      rbx, 2\n        adc     r8, 0\n        adc     [rdi+rdx*8], r8\n.9:     adc     qword[rdi+rdx*8+8], 0\n        lea     rdx, [rdx + 1]\n        jc      .9\n        and     rbx, 7\n        popcnt  r8, rbx\n        add     [rbp+24], r8\n.10:    adc     qword[rbp+rcx*8+8], 0\n        lea     rcx, [rcx + 1]\n        jc      .10\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/karasub.asm",
    "content": "; PROLOGUE(mpn_karasub)\n;  mpn_karasub  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Copyright 2012 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karasub(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n;\n;  Karasuba Multiplication - split x and y into two equal length halves so\n;  that x = xh.B + xl and y = yh.B + yl. Then their product is:\n;\n;  x.y = xh.yh.B^2 + (xh.yl + xl.yh).B + xl.yl\n;      = xh.yh.B^2 + (xh.yh + xl.yl - {xh - xl}.{yh - yl}).B + xl.yl\n;\n; If the length of the elements is m (about n / 2), the output length is 4 * m \n; as illustrated below.  The middle two blocks involve three additions and one \n; subtraction: \n; \n;       -------------------- rp\n;       |                  |-->\n;       |   A:xl.yl[lo]    |   |\n;       |                  |   |      (xh - xl).(yh - yl)\n;       --------------------   |      -------------------- tp\n;  <--  |                  |<--<  <-- |                  |\n; |     |   B:xl.yl[hi]    |   |      |     E:[lo]       |\n; |     |                  |   |      |                  |\n; |     --------------------   |      --------------------\n; >-->  |                  |-->   <-- |                  |\n; |\\___ |   C:xh.yh[lo]    | ____/    |     F:[hi]       |\n; |     |                  |          |                  |\n; |     --------------------          --------------------\n;  <--  |                  |   \n;       |   D:xh.yh[hi]    |\n;       |                  |\n;       --------------------\n;\n; To avoid overwriting B before it is used, we need to do two operations\n; in parallel:\n;\n; (1)   B = B + C + A - E = (B + C) + A - E\n; (2)   C = C + B + D - F = (B + C) + D - F\n;\n; The final carry from (1) has to be propagated into C and D, and the final\n; carry from (2) has to be propagated into D. When the number of input limbs\n; is odd, some extra operations have to be undertaken. \n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n%macro add_one 1\n    lea     %1, [%1 + 1]\n%endmacro\n\n    BITS 64\n    TEXT\n        \n;       requires n >= 8  \n        FRAME_PROC mpn_karasub, 2, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n        mov     [rsp+8], rdi\n\n;       rp is rdi, tp is rsi, L is rdi, H is rbp, tp is rsi\n;       carries/borrows in rax, rbx\n   \n        shr     rdx, 1\n        lea     rcx, [rdx+rdx*1]\n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+rcx*8]\n        sbb     r9, [rsi+rcx*8+8]\n        sbb     r10, [rsi+rcx*8+16]\n        sbb     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        sbb     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .5\n        jz      .4\n        jp      .3\n\n.2:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi]\n        sbb     r9, [rsi+8]\n        sbb     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n\n.3:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+8]\n        sbb     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n\n.4:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        sbb     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        add_one rdx\n        mov     [rbp+rcx*8], r12\n\n;       move low half rbx carry into rax\n.5:     rcr     rax, 3\n        bt      rbx, 2\n        rcl     rax, 3\n        mov     r8, [rsp]\n        mov     rcx, rsi\n        mov     rsi,[rsp+8]\n        lea     r9, [r8+r8]\n        lea     rsi, [rsi+r9*8]\n        lea     r11, [rbp+24]\n        sub     r11, rsi\n        sar     r11, 3\n        bt      r8, 0\n        jnc     .9\n\n;       if odd the do next two  \n        add     r11, 2\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        rcr     rbx, 2\n        adc     r8,0\n        adc     r9, 0\n        rcl     rbx, 1\n        sbb     r8, [rcx+rdx*8]\n        sbb     r9, [rcx+rdx*8+8]\n        rcr     rbx, 2\n        adc     [rbp+24], r8\n        adc     [rbp+32], r9\n        rcl     rbx, 3\n\n; Now add in any accummulated carries and/or borrows\n;\n; NOTE: We can't propagate individual borrows or carries from the second\n; and third quarter blocks into the fourth quater block by simply waiting\n; for carry (or borrow) propagation to end.  This is because a carry into\n; the fourth quarter block when it contains only maximum integers or a \n; borrow when it contains all zero integers will incorrectly propagate\n; beyond the end of the top quarter block.\n\n.9:     lea     rdx, [rdi+rdx*8]\n        sub     rdx, rsi\n        sar     rdx, 3\n\n; carries/borrrow from second to third quarter quarter block\n;   rax{2} is the carry in (B + C)\n;   rax{1} is the carry in (B + C) + A\n;   rax{0} is the borrow in (B + C + A) - E\n\n        mov     rcx, rdx\n        bt      rax, 0\n.10:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .11\n        jc      .10\n\n.11     mov     rcx, rdx\n        bt      rax, 1\n.12:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .13\n        jc      .12\n\n.13     mov     rcx, rdx\n        bt      rax, 2\n.14:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .15\n        jc      .14\n\n; carries/borrrow from third to fourth quarter quarter block\n;   rbx{2} is the carry in (B + C)\n;   rbx{1} is the carry in (B + C) + D\n;   rbx{0} is the borrow in (B + C + D) - F\n\n.15:    mov     rcx, r11\n        bt      rbx, 0\n.16:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .17\n        jc      .16\n\n.17:    mov     rcx, r11\n        bt      rbx, 1\n.18:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .19\n        jc      .18\n\n.19:    mov     rcx, r11\n        bt      rbx, 2\n.20:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .21\n        jc      .20\n.21:\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/lshift.asm",
    "content": "; PROLOGUE(mpn_lshift)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_lshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi     rsi        rdx      rcx\n;  rax                     rcx     rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_lshift, 0, reg_save_list\n; odd and even n seem to have different runtimes\n    mov     rbx, r8\n    lea     rsi, [rdx+24]\n    lea     rdi, [rcx+24]\n    mov     ecx, r9d\n\n    mov     rdx, [rsi+rbx*8-32]\n    xor     rax, rax\n    shld    rax, rdx, cl\n    sub     rbx, 5\n    js      .2\n    \n\txalign  16\n.1:\tmov     r8, [rsi+rbx*8]\n    mov     r11, [rsi+rbx*8-24]\n    mov     r9, [rsi+rbx*8-8]\n    shld    rdx, r8, cl\n    mov     [rdi+rbx*8+8], rdx\n    mov     rdx, r11\n    mov     r10, [rsi+rbx*8-16]\n    shld    r8, r9, cl\n    shld    r9, r10, cl\n    mov     [rdi+rbx*8], r8\n    mov     [rdi+rbx*8-8], r9\n    shld    r10, r11, cl\n    sub     rbx, 4\n    mov     [rdi+rbx*8+16], r10\n    jns     .1\n.2:\tcmp     rbx, -2\n    ja      .4\n    je      .5\n    jp      .6\n; ALIGN(16)\n.3:\tshl     rdx, cl\n    mov     [rdi+rbx*8+8], rdx\n    EXIT_PROC reg_save_list\n\n    xalign  16\n.4:\tmov     r8, [rsi+rbx*8]\n    mov     r9, [rsi+rbx*8-8]\n    shld    rdx, r8, cl\n    mov     [rdi+rbx*8+8], rdx\n    mov     r10, [rsi+rbx*8-16]\n    shld    r8, r9, cl\n    shld    r9, r10, cl\n    mov     [rdi+rbx*8], r8\n    mov     [rdi+rbx*8-8], r9\n    shl     r10, cl\n    mov     [rdi+rbx*8-16], r10\n    EXIT_PROC reg_save_list\n\n    xalign  16\n.5:\tmov     r8, [rsi+rbx*8]\n    mov     r9, [rsi+rbx*8-8]\n    shld    rdx, r8, cl\n    mov     [rdi+rbx*8+8], rdx\n    shld    r8, r9, cl\n    shl     r9, cl\n    mov     [rdi+rbx*8], r8\n    mov     [rdi+rbx*8-8], r9\n    EXIT_PROC reg_save_list\n\n    xalign  16\n.6:\tmov     r8, [rsi+rbx*8]\n    shld    rdx, r8, cl\n    mov     [rdi+rbx*8+8], rdx\n    shl     r8, cl\n    mov     [rdi+rbx*8], r8\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/mod_1_1.asm",
    "content": "; PROLOGUE(mpn_mod_1_1)\n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2011 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_1(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r13\n\n        FRAME_PROC mpn_mod_1_1, 0, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rcx, r8\n\n        mov     r13, [rsi+rcx*8-8]\n        mov     rax, [rsi+rcx*8-16]\n        mov     r8, [r9]\n        mov     r9, [r9+8]\n        xor     r11, r11\n        mov     r10, [rsi+rcx*8-24]\n        sub     rcx, 3\n        jz      .2\n\n        align   16\n.1:     mul     r8\n        add     r10, rax\n        adc     r11, rdx\n        lea     rax, [r13]\n        lea     r13, [r11]\n        mul     r9\n        add     rax, r10\n        adc     r13, rdx\n        xor     r11, r11\n        mov     r10, [rsi+rcx*8-8]\n        lea     r8, [r8]\n        dec     rcx\n        lea     r9, [r9]\n        jnz     .1\n.2:     mul     r8\n        add     r10, rax\n        adc     r11, rdx\n        lea     rax, [r13]\n        lea     r13, [r11]\n        mul     r9\n        add     rax, r10\n        adc     r13, rdx\n        mov     [rdi], rax\n        mov     rax, r8\n        mul     r13\n        add     [rdi], rax\n        adc     rdx, 0\n        mov     [rdi+8], rdx\n    \tEND_PROC reg_save_list\n\t\n\t    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/mod_1_2.asm",
    "content": "; PROLOGUE(mpn_mod_1_2)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n    FRAME_PROC mpn_mod_1_2, 0, reg_save_list\n    mov     rsi, rdx\n\tmov     rdi, r8\n\n\tmov     r14, [rsi+rdi*8-8]\n\tmov     r13, [rsi+rdi*8-16]\n\tmov     r11, [rsi+rdi*8-32]\n\tmov     r8, [r9]\n\tmov     r10, [r9+16]\n\tmov     r9, [r9+8]\n\tmov     rax, [rsi+rdi*8-24]\n\tsub     rdi, 6\n\tjc      .2\n\t\n\txalign  16\n.1:\tmul     r8\n\tmov     r12, 0\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, r9\n\tmul     r13\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     r13, r11\n\tmov     rax, r10\n\tmul     r14\n\tadd     r13, rax\n\tmov     r11, [rsi+rdi*8+0]\n\tmov     r14, r12\n\tadc     r14, rdx\n\tmov     rax, [rsi+rdi*8+8]\n\tsub     rdi, 2\n\tjnc     .1\n.2:\tmul     r8\n\tmov     r12, 0\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     rax, r9\n\tmul     r13\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     r13, r11\n\tmov     rax, r10\n\tmul     r14\n\tadd     r13, rax\n\tmov     r14, r12\n\tadc     r14, rdx\n\tcmp     rdi, -2\n\tje      .4\n.3:\tmov     r11, [rsi+rdi*8+8]\n\tmov     r12, 0\n\tmov     rax, r8\n\tmul     r13\n\tadd     r11, rax\n\tadc     r12, rdx\n\tmov     r13, r11\n\tmov     rax, r9\n\tmul     r14\n\tadd     r13, rax\n\tmov     r14, r12\n\tadc     r14, rdx\n.4:\tmov     rax, r8\n\tmul     r14\n\tadd     r13, rax\n\tadc     rdx, 0\n\tmov     [rcx], r13\n\tmov     [rcx+8], rdx\n\tEND_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/mod_1_3.asm",
    "content": "; PROLOGUE(mpn_mod_1_3)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_3(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n;\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\n;\twhere (rcx,4)  contains B^i % divisor\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14, r15\n\n    FRAME_PROC mpn_mod_1_3, 0, reg_save_list\n    mov     rsi, rdx\n\tmov     rdi, r8\n\tmov     r15, [rsi+rdi*8-8]\n\tmov     r14, [rsi+rdi*8-16]\n\tmov     rax, [rsi+rdi*8-32]\n\tmov     r12, [rsi+rdi*8-40]\n\tmov     r8, [r9]\n\tmov     r10, [r9+16]\n\tmov     r11, [r9+24]\n\tmov     r9, [r9+8]\n\tsub     rdi, 8\n\tjc      .2\n\n; // r15 r14 -8() -16()=rax -24()=r12\n\txalign  16\n.1:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+0]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+8]\n\tadc     r15, rdx\n\tsub     rdi, 3\n\tjnc     .1\n\n; // we have loaded up the next two limbs\n; // but because they are out of order we can have to do 3 limbs min\n.2:\tcmp     rdi, -2\n\tjl      .5\n\tje      .4\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n.3:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+8]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+16]\n\tadc     r15, rdx\n\t; // r15 r14 rax r12\n\tmov     r13, 0\n\tmul     r8\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n\txalign  16\n.4:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+16]\n\tmov     r15, r13\n\tadc     r15, rdx\n\t; // r15 r14 r12\n\tmov     r13, 0\n\tmov     rax, r8\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\t\n\t; // one more is 3 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12 \n\txalign  16\n.5:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r11\n\tmul     r15\n\tadd     r12, rax\n\tmov     r15, r13\n\tadc     r15, rdx\n\tmov     rax, r8\n\tmul     r15\n.6:\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rcx], r12\n\tmov     [rcx+8], rdx\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/mul_1.asm",
    "content": "; PROLOGUE(mpn_mul_1)\n;  Copyright 2010 Jason Moxham\n;\n;  Windows Conversion Copyright 2010 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_mul_1c(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx         r8\n;  rax                     rcx     rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  nehalem\n    BITS 64\n\n%define reg_save_list   rsi, rdi\n\n    LEAF_PROC mpn_mul_1c\n    mov     r11, [rsp+0x28]\n    jmp     start\n\n    LEAF_PROC mpn_mul_1\n    xor     r11, r11\n\n    xalign  16\nstart:\n    FRAME_PROC ?mpn_nehalem_mul, 0, reg_save_list\n    mov     rax, r8\n\tmov     r8, 3\n\tlea     rdi, [rcx+rax*8-24]\n\tlea     rsi, [rdx+rax*8-24]\n    mov     rcx, r9\n\tsub     r8, rax\n\tmov     r9d, 0\n\tmov     rax, [rsi+r8*8+24-24]\n\tjnc     .1\n\n\txalign  16\n.0:\tmul     rcx\n\tadd     r11, rax\n\tmov     [rdi+r8*8], r11\n\tmov     r10d, 0\n\tmov     rax, [rsi+r8*8+8]\n\tadc     r10, rdx\n\tmov     r11d, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     r9d, 0\n\tmov     rax, [rsi+r8*8+16]\n\tmov     [rdi+r8*8+8], r10\n\tadc     r9, rdx\n\tmul     rcx\n\tadd     r9, rax\n\tmov     rax, [rsi+r8*8+24]\n\tmov     [rdi+r8*8+16], r9\n\tadc     r11, rdx\n\tadd     r8, 3\n\tjnc     .0\n.1:\n\tcmp     r8, 1\n\tja      .4\n\tje      .3\n.2:\tmul     rcx\n\tadd     r11, rax\n\tmov     [rdi], r11\n\tmov     r10d, 0\n\tmov     rax, [rsi+8]\n\tadc     r10, rdx\n\tmov     r11d, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     r9d, 0\n\tmov     rax, [rsi+16]\n\tmov     [rdi+8], r10\n\tadc     r9, rdx\n\tmul     rcx\n\tadd     r9, rax\n\tmov     [rdi+16], r9\n\tadc     r11, rdx\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\n.3:\tmul     rcx\n\tadd     r11, rax\n\tmov     [rdi+8], r11\n\tmov     r10d, 0\n\tmov     rax, [rsi+16]\n\tadc     r10, rdx\n\tmov     r11d, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     r9d, 0\n\tmov     [rdi+16], r10\n\tadc     r9, rdx\n\tmov     rax, r9\n\tEXIT_PROC reg_save_list\n\n.4:\tmul     rcx\n\tadd     r11, rax\n\tmov     [rdi+16], r11\n\tmov     r10d, 0\n\tadc     r10, rdx\n\tmov     rax, r10\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/mul_2.asm",
    "content": "; PROLOGUE(mpn_mul_2)\n;  X86_64 mpn_mul_2\n;\n;  Copyright 2010 Jason Moxham\n;\n;  Windows Conversion Copyright 2010 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx\n;  rax                     rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi, rbx\n\n        CPU  nehalem\n        BITS 64\n\n    \tFRAME_PROC mpn_mul_2, 0, reg_save_list\n        mov     rbx, 3\n        lea     rdi, [rcx+r8*8-24]\n        lea     rsi, [rdx+r8*8-24]\n        sub     rbx, r8\n        mov     r8, [r9+8]\n        mov     rcx, [r9]\n\n        mov     r11, 0\n        mov     r9, 0\n        mov     rax, [rsi+rbx*8]\n        mov     r10, 0\n        mul     rcx\n        add     r11, rax\n        mov     rax, [rsi+rbx*8]\n        mov     [rdi+rbx*8], r11\n        adc     r9, rdx\n        cmp     rbx, 0\n        jge     .2\n\n        align   16\n.1:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+rbx*8+8]\n        mov     r11, 0\n        mul     rcx\n        add     r9, rax\n        adc     r10, rdx\n        adc     r11, 0\n        mov     rax, [rsi+rbx*8+8]\n        mul     r8\n        add     r10, rax\n        mov     rax, [rsi+rbx*8+16]\n        adc     r11, rdx\n        mul     rcx\n        add     r10, rax\n        mov     [rdi+rbx*8+8], r9\n        adc     r11, rdx\n        mov     r9, 0\n        mov     rax, [rsi+rbx*8+16]\n        adc     r9, 0\n        mul     r8\n        add     r11, rax\n        mov     [rdi+rbx*8+16], r10\n        mov     rax, [rsi+rbx*8+24]\n        mov     r10, 0\n        adc     r9, rdx\n        mul     rcx\n        add     r11, rax\n        mov     rax, [rsi+rbx*8+24]\n        mov     [rdi+rbx*8+24], r11\n        adc     r9, rdx\n        adc     r10, 0\n        add     rbx, 3\n        jnc     .1\n.2:     cmp     rbx, 1\n        ja      .5\n        je      .4\n.3:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+rbx*8+8]\n        mov     r11, 0\n        mul     rcx\n        add     r9, rax\n        adc     r10, rdx\n        adc     r11, 0\n        mov     rax, [rsi+rbx*8+8]\n        mul     r8\n        add     r10, rax\n        mov     rax, [rsi+rbx*8+16]\n        adc     r11, rdx\n        mul     rcx\n        add     r10, rax\n        mov     [rdi+rbx*8+8], r9\n        adc     r11, rdx\n        mov     r9, 0\n        mov     rax, [rsi+rbx*8+16]\n        adc     r9, 0\n        mul     r8\n        add     r11, rax\n        mov     [rdi+rbx*8+16], r10\n        adc     r9, rdx\n        mov     [rdi+rbx*8+24], r11\n        mov     rax, r9\n        EXIT_PROC reg_save_list\n\n.4:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+rbx*8+8]\n        mov     r11, 0\n        mul     rcx\n        add     r9, rax\n        adc     r10, rdx\n        adc     r11, 0\n        mov     rax, [rsi+rbx*8+8]\n        mul     r8\n        add     r10, rax\n        adc     r11, rdx\n        mov     [rdi+rbx*8+8], r9\n        mov     [rdi+rbx*8+16], r10\n        mov     rax, r11\n        EXIT_PROC reg_save_list\n\n.5:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     [rdi+rbx*8+8], r9\n        mov     rax, r10\n        END_PROC reg_save_list\n        \n        end    \n        \n"
  },
  {
    "path": "mpn/x86_64w/nehalem/mul_basecase.asm",
    "content": "; PROLOGUE(mpn_mul_basecase)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%macro addmul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi+rbx*8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi+rbx*8+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+rbx*8+24], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    add     [rdi], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+8], r10\n    adc     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-16], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi-8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+8], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi+rbx*8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+24], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+24], r12\n    add     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    mov     [rdi], r12\n    add     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-16], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi-8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+8], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n    xalign  16\n%%1:mov     r10, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    mul     r8\n    mov     [rdi+rbx*8], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r10\n    db      0x26\n    add     r11, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+rbx*8+24]\n    mul     r8\n    mov     [rdi+rbx*8+16], r11\n    db      0x26\n    add     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     rbx, 4\n    mov     rax, [rsi+rbx*8]\n    jnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n    mov     rax, [rsi+8]\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mov     r12d, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r11\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n    mov     rax, [rsi+16]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     [rdi+24], r10\n    mov     [rdi+32], r11\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n    mov     [rdi+24], r9\n    mov     [rdi+32], r10\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n    jz      %%2\n    xalign  16\n%%1:addmul2pro%1\n    addmul2lp\n    addmul2epi%1\n    jnz     %%1\n%%2:\n%endmacro\n\n%macro oldmulnext0 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    mov     [rdi+r11*8+24], rbx\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+32], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+r11*8+40], rdx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+24]\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+24], r12\n    mov     [rdi+r11*8+32], rdx\n    inc     r8\n    lea     rdi, [rdi+8]\n    mov     r11, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     [rdi+r11*8+16], r10\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n    mov     [rdi+r11*8+8], r9\n    mov     [rdi+r11*8+16], r10\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    db      0x26\n    mov     r9, rdx\n    lea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    mul     r13\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+24]\n    mov     r12d, 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+32]\n    mul     r13\n    add     [rdi+24], rbx\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+32], r12\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n    mov     r13, [rcx+r8*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+24]\n    mul     r13\n    lea     rdi, [rdi+8]\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     r12d, 0\n    mov     rax, [rsi+32]\n    adc     r12, rdx\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+24], r12\n    adc     rdx, 0\n    mov     [rdi+32], rdx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n    mov     r13, [rcx+r8*8]\n    lea     rdi, [rdi+8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     ebx, 0\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    adc     rbx, rdx\n    mov     rax, [rsi+r14*8]\n    add     [rdi+r11*8+16], r10\n    adc     rbx, 0\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    db      0x26\n    lea     rdi, [rdi+8]\n    db      0x26\n    mov     r9, rdx\n    mov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     rdx, 0\n    add     [rdi+32], r9\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n    oldmulnext%1\n    jz      %%2\n    xalign  16\n%%1:\n    oldaddmulpro%1\n    oldaddmulnext%1\n    jnz     %%1\n%%2:\n%endmacro\n\n    CPU  Core2\n    BITS 64\n\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14\n\n    LEAF_PROC mpn_mul_basecase\n    ; the current mul does not handle case one\n    cmp     r8d, 4\n    jg      __gmpn_nehalem_mbc2\n    cmp     r8d, 1\n    je      one\n\n    WIN64_GCC_PROC mpn_nehalem_mbc1, 5, frame\n    movsxd  rdx, edx\n    movsxd  r8, r8d\n\n    mov     r14, 5\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-40]\n    lea     rcx, [rcx+r8*8]\n    neg     r8\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rax, [rsi+r14*8]\n    mov     r13, [rcx+r8*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    mov     r10d, 0\n    mul     r13\n    mov     [rdi+r11*8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     r11, 2\n    ja      .4\n    jz      .3\n    jp      .2\n.1:\toldmpn_muladdmul_1_int 0\n\tWIN64_GCC_EXIT frame\n\n    xalign  16\n.2:\toldmpn_muladdmul_1_int 1\n\tWIN64_GCC_EXIT frame\n\n    xalign  16\n.3:\toldmpn_muladdmul_1_int 2\n\tWIN64_GCC_EXIT frame\n\n    xalign  16\n.4:\toldmpn_muladdmul_1_int 3\n\tWIN64_GCC_END frame\n\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\n%undef  reg_save_list\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14, r15\n\n    xalign  16\n.6:\tWIN64_GCC_PROC mpn_nehalem_mbc2, 5, frame\n\n    mov     r14, 4\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-32]\n    lea     rsi, [rsi+rdx*8-32]\n    mov     r13, rcx\n    mov     r15, r8\n    lea     r13, [r13+r15*8]\n    neg     r15\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    bt      r15, 0\n    jnc     .13\n.7:\tinc     rbx\n    mov     r8, [r13+r15*8]\n    mul     r8\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     rbx, 0\n    jge     .8\n    mul1lp\n.8:\tmov     r10d, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     rbx, 2\n    ja      .12\n    jz      .11\n    jp      .10\n.9:\tmulnext0\n    jmp     .21\n\n.10:mulnext1\n    jmp     .15\n\n.11:mulnext2\n    jmp     .17\n\n.12:mulnext3\n    jmp     .19\n\n    ; as all the mul2pro? are the same\n.13:mul2pro0\n    mul2lp\n    cmp     rbx, 2\n    ja      .20\n    jz      .18\n    jp      .16\n.14:mul2epi3\n.15:mpn_addmul_2_int 3\n    WIN64_GCC_EXIT frame\n\n.16:mul2epi2\n.17:mpn_addmul_2_int 2\n    WIN64_GCC_EXIT frame\n.18:mul2epi1\n.19:mpn_addmul_2_int 1\n    WIN64_GCC_EXIT frame\n\n.20:mul2epi0\n.21:mpn_addmul_2_int 0\n    WIN64_GCC_END frame\n\n    xalign  16\none:mov     rax, [rdx]\n    mul     qword [r9]\n    mov     [rcx], rax\n    mov     [rcx+8], rdx\n    ret\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/nand_n.asm",
    "content": "; PROLOGUE(mpn_nand_n)\n\n;  mpn_nand_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_nand_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_nand_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [rdx+r10*8+16-32]\n\tpand    xmm1, xmm3\n\tpand    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\tand     rax, r9\n\tpand    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tand     rax, r9\n\tnot     rax\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/nior_n.asm",
    "content": "; PROLOGUE(mpn_nior_n)\n\n;  mpn_nior_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_nior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_nior_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [rdx+r10*8+16-32]\n\tpor     xmm1, xmm3\n\tpor     xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\tor      rax, r9\n\tpor     xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tor      rax, r9\n\tnot     rax\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/popcount.asm",
    "content": "; PROLOGUE(mpn_popcount)\n\n;  AMD64 mpn_popcount\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmpn_limb_t mpn_popcount(mp_ptr,mp_size_t)\n;\trax                        rdi,      rsi\n;\teax                        rcx,      rdx\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_popcount\n\tmov     r8, 5\n\tlea     rcx, [rcx+rdx*8-40]\n\txor     eax, eax\n\tsub     r8, rdx\n\tjnc     .1\n\txalign  16\n.0:\n\tpopcnt  r9, [rcx+r8*8]\n\tpopcnt  r10, [rcx+r8*8+8]\n\tpopcnt  r11, [rcx+r8*8+16]\n\tpopcnt  rdx, [rcx+r8*8+24]\n\tadd     rax, r9\n\tadd     rax, rdx\n\tadd     rax, r10\n\tpopcnt  r9, [rcx+r8*8+32]\n\tpopcnt  r10, [rcx+r8*8+40]\n\tadd     rax, r9\n\tadd     rax, r11\n\tadd     rax, r10\n\tadd     r8, 6\n\tjnc     .0\n.1:\n\tlea     rdx, [rel .2]\n\tlea     r8, [r8+r8*8]\n\tadd     rdx, r8\n\tjmp     rdx\n.2:\n\tnop\n\tpopcnt  r9, [rcx]\n\tadd     rax, r9\n.3:\n\tpopcnt  r10, [rcx+8]\n\tadd     rax, r10\n.4:\tpopcnt  r11, [rcx+16]\n\tadd     rax, r11\n.5:\tpopcnt  rdx, [rcx+24]\n\tadd     rax, rdx\n.6:\tpopcnt  r9, [rcx+32]\n\tadd     rax, r9\n.7:\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/redc_1.asm",
    "content": "; PROLOGUE(mpn_redc_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_redc_1(mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                rdi     rsi        rdx        rcx        r8\n;  rax                rcx     rdx         r8         r9  [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    CPU  Core2\n    BITS 64\n\n%macro mpn_add 0\n\n    mov     rax, rcx\n    and     rax, 3\n    shr     rcx, 2\n    cmp     rcx, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    add     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n    jmp     %%2\n\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    adc     r11, [rdx]\n    adc     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rdi], r11\n    mov     [rdi+8], r8\n    lea     rdi, [rdi+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    adc     r9, [rdx-16]\n    adc     r10, [rdx-8]\n    mov     [rdi-16], r9\n    dec     rcx\n    mov     [rdi-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    adc     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n%%2:\n\n%endmacro\n\n%macro mpn_sub 0\n\n    mov     rax, rbp\n    and     rax, 3\n    shr     rbp, 2\n    cmp     rbp, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    sub     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n    jmp     %%2\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    sbb     r11, [rdx]\n    sbb     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rbx], r11\n    mov     [rbx+8], r8\n    lea     rbx, [rbx+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    sbb     r9, [rdx-16]\n    sbb     r10, [rdx-8]\n    mov     [rbx-16], r9\n    dec     rbp\n    mov     [rbx-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    sbb     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n%%2:\n\n%endmacro\n\n%macro addmulloop 1\n\n    xalign  16\n%%1:mov     r10, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    db      0x26\n    adc     rbx, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    db      0x26\n    adc     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     r11, 4\n    mov     rax, [rsi+r11*8+8]\n    jnc     %%1\n\n%endmacro\n\n%macro addmulpropro0 0\n\n    imul    r13, rcx\n    lea     r8, [r8-8]\n\n%endmacro\n\n%macro addmulpro0 0\n\n    mov     r11, r14\n    lea     r8, [r8+8]\n    mov     rax, [rsi+r14*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext0 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mov     r9d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, rax\n    adc     r9, rdx\n    imul    r13, rcx\n    add     [r8+r11*8+32], r12\n    adc     r9, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r9\n\n%endmacro\n\n%macro addmulpropro1 0\n\n%endmacro\n\n%macro addmulpro1 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext1 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r12\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro2 0\n\n%endmacro\n\n%macro addmulpro2 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext2 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     r13, [r8+r14*8+8]\n    add     [r8+r11*8+16], r10\n    adc     rbx, 0\n    mov     [r8+r14*8], rbx\n    sub     r15, 1          ; ***\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro3 0\n\n%endmacro\n\n%macro addmulpro3 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext3 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    add     [r8+r11*8+8], r9\n    adc     r10, 0\n    mov     r13, [r8+r14*8+8]\n    mov     [r8+r14*8], r10\n    lea     r8, [r8+8]\n    sub     r15, 1          ; ***\n\n%endmacro\n\n%macro mpn_addmul_1_int 1\n\n    addmulpropro%1\n    xalign  16\n%%1:addmulpro%1\n    jge     %%2\n    addmulloop %1\n%%2:addmulnext%1\n    jnz     %%1\n\n%endmacro\n\n    LEAF_PROC mpn_redc_1\n    cmp     r9, 1\n    je      one\n    FRAME_PROC ?mpn_nehalem_redc_1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, r8\n    mov     r8, rdx\n    mov     rdx, r9\n    mov     rcx, [rsp+stack_use+0x28]\n    mov     [rsp+stack_use+0x28], r8\n\n    mov     r14, 5\n    sub     r14, rdx\n\n    mov     [rsp+stack_use+0x10], rsi\n    mov     r8, [rsp+stack_use+0x28]\n\n    lea     r8, [r8+rdx*8-40]\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rbp, rdx\n    mov     r15, rdx\n    mov     rax, r14\n    and     rax, 3\n    mov     r13, [r8+r14*8]\n    je      .2\n    jp      .4\n    cmp     rax, 1\n    je      .3\n.1:\tmpn_addmul_1_int 2\n    jmp     .5\n\n    xalign  16\n.2:\tmpn_addmul_1_int 0\n    jmp     .5\n\n    xalign  16\n.3:mpn_addmul_1_int 1\n    jmp     .5\n\n    xalign  16\n.4:\tmpn_addmul_1_int 3\n\n    xalign  16\n.5:\tmov     rcx, rbp\n    mov     rdx, [rsp+stack_use+0x28]\n    lea     rsi, [rdx+rbp*8]\n    mov     rbx, rdi\n    mpn_add\n    mov     rdx, [rsp+stack_use+0x10]\n    jnc     .6\n    mov     rsi, rbx\n    mpn_sub\n.6:\tEND_PROC reg_save_list\n\n    xalign  16\none:mov     r9, rdx\n    mov     r11, [r8]\n    mov     r8, [rsp+0x28]\n\n    mov     r10, [r9]\n    imul    r8, r10\n    mov     rax, r8\n    mul     r11\n    add     rax, r10\n    adc     rdx, [r9+8]\n    cmovnc  r11, rax\n    sub     rdx, r11\n    mov     [rcx], rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/rsh1add_n.asm",
    "content": "; PROLOGUE(mpn_rsh1add_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_rsh1add_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rsh1add_n, 0, reg_save_list\n\tmov     rax, r9\n\tlea     rdi, [rcx+rax*8-32]\n\tlea     rsi, [rdx+rax*8-32]\n\tlea     rdx, [r8+rax*8-32]\n\tmov     rcx, rax\n\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tadd     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     .2\n.1:\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\tadc     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     .1\n.2:\tcmp     r8, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3:\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\tadc     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tadc     rcx, [rdx+r8*8+8]\n\tadc     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tadc     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n.7:\tEND_PROC reg_save_list\n    \n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/rsh1sub_n.asm",
    "content": "; PROLOGUE(mpn_rsh1sub_n)\n\n;  AMD64 mpn_rsh1sub_n\n\n;  Copyright 2009 Jason Moxham\n\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;\t(rdi,rcx)=((rsi,rcx)-(rdx,rcx))/2\n; return bottom bit of difference\n; subtraction treated as two compliment\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rsh1sub_n, 0, reg_save_list\n\tmov     rax, r9\n\tlea     rdi, [rcx+rax*8-32]\n\tlea     rsi, [rdx+rax*8-32]\n\tlea     rdx, [r8+rax*8-32]\n\tmov     rcx, rax\n\n\tmov     r8, 4\n\tsub     r8, rcx\n\tmov     r12, [rsi+r8*8]\n\tsub     r12, [rdx+r8*8]\n\tsbb     rbx, rbx\n\tmov     rax, r12\n\tand     rax, 1\n\tcmp     r8, 0\n\tjge     .2\n.1:\tmov     r11, [rsi+r8*8+32]\n\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\tsbb     r11, [rdx+r8*8+32]\n\tsbb     rbx, rbx\n\tbt      r11, 0\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n\tmov     r12, r11\n\tadd     r8, 4\n\tjnc     .1\n.2:\tcmp     r8, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3:\tmov     r10, [rsi+r8*8+24]\n\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\tsbb     r10, [rdx+r8*8+24]\n\trcr     r10, 1\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n\tmov     [rdi+r8*8+24], r10\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.4:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tmov     r9, [rsi+r8*8+16]\n\tsbb     rcx, [rdx+r8*8+8]\n\tsbb     r9, [rdx+r8*8+16]\n\trcr     r9, 1\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n\tmov     [rdi+r8*8+16], r9\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tadd     rbx, 1\n\tmov     rcx, [rsi+r8*8+8]\n\tsbb     rcx, [rdx+r8*8+8]\n\trcr     rcx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n\tmov     [rdi+r8*8+8], rcx\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.6:\tadd     rbx, 1\n\trcr     r12, 1\n\tmov     [rdi+r8*8], r12\n.7:\tEND_PROC reg_save_list\n    \n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/rsh_divrem_hensel_qr_1_2.asm",
    "content": "; PROLOGUE(mpn_rsh_divrem_hensel_qr_1_2)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rsh_divrem_hensel_qr_1_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_uint, mp_limb_t)\n;  rax                                        rdi     rsi        rdx        rcx       r8         r9\n;  rax                                        rcx     rdx         r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n\tFRAME_PROC mpn_rsh_divrem_hensel_qr_1_2, 0, reg_save_list\n\tmov     rax, r8\n\tlea     rdi, [rcx+rax*8-16]\n\tlea     rsi, [rdx+rax*8-16]\n    mov     rcx, r9\n    mov     rdx, r9\n\tmov     r9, 2\n\tsub     r9, rax    \n    movsxd  r8, dword [rsp+stack_use+40]\n    mov     r10, qword [rsp+stack_use+48]\n\n\tmov     rax, 64\n\tsub     rax, r8\n\tmovq    mm0, r8\n\tmovq    mm1, rax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, r11\n\tmov     r12, r11\n\tmul     rcx\n\tneg     rdx\n\timul    r12, rdx\n\n; // for the first limb we can not store (as we have to shift) so we need to\n; // do first limb separately , we could do it as normal as an extention of\n; // the loop , but if we do it as a 1 limb inverse then we can start it\n; // eailer , ie interleave it with the calculation of the 2limb inverse\n\n\tmov     r13, r11\n\tmov     r14, r12\n\n\tmov     r11, [rsi+r9*8]\n\tsub     r11, r10\n\tsbb     r10, r10\n\n\timul    r11, r13\n\tmovq    mm2, r11\n\tpsrlq   mm2, mm0\n\tmov     rax, rcx\n\tmul     r11\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\tadd     r10, r10\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, r10\n\n; mov $0,%r10\n\tadd     r9, 2\n\tjc      .2\n\t\n\txalign  16\n.1:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n\t; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r12, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, 0\n\tadd     r9, 2\n\tjnc     .1\n.2:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tcmp     r9, 0\n\tjne     .4\n.3:\tmov     r11, [rsi+r9*8+8]\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r10, 0\n\tmov     rax, r11\n\timul    rax, r13\n\t; mov %rax,(%rdi,%r9,8)\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8], mm2\n\tmovq    [rdi+r9*8+8], mm4\n\n\tmul     rcx\n\tadd     r10, r10\n\tmov     rax, 0\n\tadc     rax, rdx\n\temms\n\tEXIT_PROC reg_save_list\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n.4:\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\tmovq    [rdi+r9*8], mm2\n\n\tmov     rax, rcx\n\tmul     rdx\n\tcmp     r8, rax\n\tmov     rax, 0\n\tadc     rax, rdx\n\tsub     rax, r10\n.5:\temms\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/rshift.asm",
    "content": "; PROLOGUE(mpn_rshift)\n\n;  Verdxon 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_rshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi     rsi        rdx      rcx\n;  rax                     rcx     rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi\n\n    CPU  Athlon64\n    BITS 64\n\n    FRAME_PROC mpn_rshift, 0, reg_save_list\n; when n=1 mod4 seem to have different runtimes\n    \tmov     rax, r8\n\tmov     rbx, 4\n\tlea     rsi, [rdx+rax*8-24]\n\tlea     rdi, [rcx+rax*8-24]\n\tmov     rcx, r9\n\tsub     rbx, rax\n\n\txor     rax, rax\n\tmov     rdx, [rsi+rbx*8-8]\n\tshrd    rax, rdx, cl\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1:\tmov     r8, [rsi+rbx*8]\n\tmov     r11, [rsi+rbx*8+24]\n\tshrd    rdx, r8, cl\n\tmov     r9, [rsi+rbx*8+8]\n\tshrd    r8, r9, cl\n\tmov     [rdi+rbx*8-8], rdx\n\tmov     [rdi+rbx*8], r8\n\tmov     r10, [rsi+rbx*8+16]\n\tshrd    r9, r10, cl\n\tmov     rdx, r11\n\tmov     [rdi+rbx*8+8], r9\n\tshrd    r10, r11, cl\n\tadd     rbx, 4\n\tmov     [rdi+rbx*8-16], r10\n\tjnc     .1\n.2:\tcmp     rbx, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r8, [rsi+rbx*8]\n\tshrd    rdx, r8, cl\n\tmov     r9, [rsi+rbx*8+8]\n\tshrd    r8, r9, cl\n\tmov     [rdi+rbx*8-8], rdx\n\tmov     [rdi+rbx*8], r8\n\tmov     r10, [rsi+rbx*8+16]\n\tshrd    r9, r10, cl\n\tmov     [rdi+rbx*8+8], r9\n\tshr     r10, cl\n\tmov     [rdi+rbx*8+16], r10\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4:\tmov     r8, [rsi+rbx*8]\n\tshrd    rdx, r8, cl\n\tmov     r9, [rsi+rbx*8+8]\n\tshrd    r8, r9, cl\n\tmov     [rdi+rbx*8-8], rdx\n\tmov     [rdi+rbx*8], r8\n\tshr     r9, cl\n\tmov     [rdi+rbx*8+8], r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.5:\tmov     r8, [rsi+rbx*8]\n\tshrd    rdx, r8, cl\n\tshr     r8, cl\n\tmov     [rdi+rbx*8-8], rdx\n\tmov     [rdi+rbx*8], r8\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6:\tshr     rdx, cl\n\tmov     [rdi+rbx*8-8], rdx\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/store.asm",
    "content": "; PROLOGUE(mpn_store)\n\n;  mpn_store\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_store(mp_ptr, mp_size_t, mp_limb_t)\n;                    rdi,       rsi,       rdx\n;                    rcx,       rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n%define\tMOVQ\tmovd\n\n\tLEAF_PROC mpn_store\n\tlea     rcx, [rcx-32]\n\tcmp     rdx, 0\n\tjz      .4\n\tMOVQ    xmm0, r8\n\tmovddup xmm0, xmm0\n\ttest    rcx, 0xF\n\tjz      .1\n\tmov     [rcx+32], r8\n\tlea     rcx, [rcx+8]\n\tsub     rdx, 1\n.1:\tsub     rdx, 4\n\tjc      .3\n\t\n\txalign  16\n.2:\tlea     rcx, [rcx+32]\n\tsub     rdx, 4\n\tmovdqa  [rcx], xmm0\n\tmovdqa  [rcx+16], xmm0\n\tjnc     .2\n.3:\tcmp     rdx, -2\n\tja      .5\n\tjz      .7\n\tjp      .6\n.4:\tret\n\n.5:\tmovdqa  [rcx+32], xmm0\n.6:\tmov     [rcx+rdx*8+56], r8\n\tret\n\n.7:\tmovdqa  [rcx+32], xmm0\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/sub_err1_n.asm",
    "content": "; PROLOGUE(mpn_sub_err1_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_sub_err1(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_ptr, mp_size_t, mp_limb_t)\n;  rax                       rdi     rsi     rdx     rcx       r8         r9     8(rsp)\n;  rax                       rcx     rdx      r8      r9 [rsp+40]   [rsp+48]   [rsp+56]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    FRAME_PROC mpn_sub_err1_n, 0, reg_save_list\n\tmov     [rsp+stack_use+32], r9\n    movsxd  r9, dword [rsp+stack_use+48]\n\tmov     r10, [rsp++stack_use+56]\n\tlea     rdi, [rcx+r9*8-24]\n\tlea     rsi, [rdx+r9*8-24]\n\tlea     rdx, [r8+r9*8-24]\n\tmov     r8, [rsp+stack_use+40]\n\n\tmov     r11, 3\n\tshl     r10, 63\n\tlea     r8, [r8+r9*8]\n\tsub     r11, r9\n\tmov     r9, 0\n\tmov     rax, 0\n\tmov     rbx, 0\n\tjnc     .2\n\t\n\txalign  16\n.1: mov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tmov     r14, [rsi+r11*8+16]\n\tmov     r15, [rsi+r11*8+24]\n\tmov     rbp, 0\n\tshl     r10, 1\n\tsbb     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tsbb     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\tmov     rcx, 0\n\tsbb     r14, [rdx+r11*8+16]\n\tcmovc   rcx, [r8-24]\n\tsbb     r15, [rdx+r11*8+24]\n\tcmovc   rbp, [r8-32]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tadd     r9, rcx\n\tmov     rax, 0\n\tadc     r10, 0\n\tlea     r8, [r8-32]\n\tadd     r9, rbp\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tmov     [rdi+r11*8+16], r14\n\tmov     [rdi+r11*8+24], r15\n\tmov     rbx, 0\n\tadd     r11, 4\n\tjnc     .1\n.2: cmp     r11, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tmov     r14, [rsi+r11*8+16]\n\tshl     r10, 1\n\tsbb     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tsbb     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\tmov     rcx, 0\n\tsbb     r14, [rdx+r11*8+16]\n\tcmovc   rcx, [r8-24]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tadd     r9, rcx\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n\tmov     [rdi+r11*8+16], r14\n    jmp     .6\n\n\txalign  16\n.4: mov     r12, [rsi+r11*8]\n\tmov     r13, [rsi+r11*8+8]\n\tshl     r10, 1\n\tsbb     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\tsbb     r13, [rdx+r11*8+8]\n\tcmovc   rbx, [r8-16]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tadd     r9, rbx\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n\tmov     [rdi+r11*8+8], r13\n    jmp     .6\n\n\txalign  16\n.5: mov     r12, [rsi+r11*8]\n\tshl     r10, 1\n\tsbb     r12, [rdx+r11*8]\n\tcmovc   rax, [r8-8]\n\trcr     r10, 1\n\tadd     r9, rax\n\tadc     r10, 0\n\tmov     [rdi+r11*8], r12\n.6:\tmov     rcx, [rsp+stack_use+32]\n\tmov     [rcx], r9\n\tbtr     r10, 63\n\tmov     [rcx+8], r10\n    mov     rax, 0\n\tadc     rax, 0\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/sub_n.asm",
    "content": "; PROLOGUE(mpn_sub_n)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_sub_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_sub_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define TR2 r10\n%define TR4 r10\n\n    CPU  Core2\n    BITS 64\n\n\tLEAF_PROC mpn_sub_nc\n    mov     r10, [rsp+0x28]\n    jmp     mpn_sub_entry\n\n\tLEAF_PROC mpn_sub_n\n\txor     r10, r10\n\nmpn_sub_entry:\n\tmov     rax, r9\n\tmov     r9, rcx\n\tmov     rcx, rax\n\tand     rax, 3\n\tsub     rcx, rax\n\tlea     r9, [r9+rcx*8]\n\tlea     rdx, [rdx+rcx*8]\n\tlea     r8, [r8+rcx*8]\n\tneg     rcx\n\tlea     rcx, [r10+rcx*2]\n\tsar     rcx, 1\n\tjz      .2\n\n\txalign  16\n.1:\tmov     r10, [rdx+rcx*8]\n\tmov     r11, [rdx+rcx*8+16]\n\tsbb     r10, [r8+rcx*8]\n\tmov     [r9+rcx*8], r10\n\tmov     TR2, [rdx+rcx*8+8]\n\tsbb     TR2, [r8+rcx*8+8]\n\tmov     [r9+rcx*8+8], TR2\n\tlea     rcx, [rcx+4]\n\tmov     TR4, [rdx+rcx*8-8]\n\tsbb     r11, [r8+rcx*8-16]\n\tsbb     TR4, [r8+rcx*8-8]\n\tmov     [r9+rcx*8-16], r11\n\tmov     [r9+rcx*8-8], TR4\n\tjrcxz   .2\n\tjmp     .1\n.2:\tsbb     rcx, rcx\n.3:\tcmp     rax, 2\n\tja      .6\n\tjz      .7\n\tjp      .5\n.4:\tsub     rax, rcx\n\tret\n\t\n\txalign  16\n.5:\tadd     rcx, rcx\n\tmov     r10, [rdx]\n\tsbb     r10, [r8]\n\tmov     [r9], r10\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\t\n\txalign  16\n.6:\tadd     rcx, rcx\n\tmov     r10, [rdx]\n\tmov     r11, [rdx+16]\n\tsbb     r10, [r8]\n\tmov     [r9], r10\n\tmov     TR2, [rdx+8]\n\tsbb     TR2, [r8+8]\n\tmov     [r9+8], TR2\n\tsbb     r11, [r8+16]\n\tmov     [r9+16], r11\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\t\n\txalign  16\n.7:\tadd     rcx, rcx\n\tmov     r10, [rdx]\n\tsbb     r10, [r8]\n\tmov     [r9], r10\n\tmov     TR2, [rdx+8]\n\tsbb     TR2, [r8+8]\n\tmov     [r9+8], TR2\n\tsbb     rax, rax\n\tneg     rax\n\tret\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/subadd_n.asm",
    "content": "; PROLOGUE(mpn_subadd_n)\n;        \n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_subadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx          r8\n;  rax                       rcx     rdx      r8      r9    [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi\n\n        CPU  Nehalem\n        BITS 64\n\n        FRAME_PROC mpn_subadd_n, 0, reg_save_list\n        mov     rbx, [rsp+stack_use+40]\n        xor     rax, rax\n        mov     r10d, 3\n        sub     r10, rbx\n        lea     rdi, [rcx+rbx*8-24]\n        lea     rsi, [rdx+rbx*8-24]\n        lea     rdx, [r8+rbx*8-24]\n        lea     rcx, [r9+rbx*8-24]\n        mov     r9, rax\n        jnc     .2\n\n        align   16\n.1:     sahf    \n        mov     r8, [rsi+r10*8]\n        sbb     r8, [rcx+r10*8]\n        mov     rbx, [rsi+r10*8+8]\n        sbb     rbx, [rcx+r10*8+8]\n        mov     r11, [rsi+r10*8+24]\n        mov     rbp, [rsi+r10*8+16]\n        sbb     rbp, [rcx+r10*8+16]\n        sbb     r11, [rcx+r10*8+24]\n        lahf    \n        add     r9b, 255\n        sbb     r8, [rdx+r10*8]\n        sbb     rbx, [rdx+r10*8+8]\n        mov     [rdi+r10*8], r8\n        sbb     rbp, [rdx+r10*8+16]\n        sbb     r11, [rdx+r10*8+24]\n        setc    r9b\n        mov     [rdi+r10*8+24], r11\n        mov     [rdi+r10*8+16], rbp\n        mov     [rdi+r10*8+8], rbx\n        add     r10, 4\n        jnc     .1\n.2:     cmp     r10, 2\n        jg      .6\n        je      .5\n        jp      .4\n.3:     sahf    \n        mov     r8, [rsi]\n        sbb     r8, [rcx]\n        mov     rbx, [rsi+8]\n        sbb     rbx, [rcx+8]\n        mov     rbp, [rsi+16]\n        sbb     rbp, [rcx+16]\n        lahf    \n        add     r9b, 255\n        sbb     r8, [rdx]\n        sbb     rbx, [rdx+8]\n        mov     [rdi], r8\n        sbb     rbp, [rdx+16]\n        setc    r9b\n        mov     [rdi+16], rbp\n        mov     [rdi+8], rbx\n        sahf    \n        mov     eax, 0\n        adc     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        EXIT_PROC reg_save_list\n\n.4:     sahf    \n        mov     r8, [rsi+8]\n        sbb     r8, [rcx+8]\n        mov     rbx, [rsi+16]\n        sbb     rbx, [rcx+16]\n        lahf    \n        add     r9b, 255\n        sbb     r8, [rdx+8]\n        sbb     rbx, [rdx+16]\n        mov     [rdi+8], r8\n        setc    r9b\n        mov     [rdi+16], rbx\n        sahf    \n        mov     eax, 0\n        adc     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        EXIT_PROC reg_save_list\n\n.5:     sahf    \n        mov     r8, [rsi+16]\n        sbb     r8, [rcx+16]\n        lahf    \n        add     r9b, 255\n        sbb     r8, [rdx+16]\n        mov     [rdi+16], r8\n        setc    r9b\n.6:     sahf    \n        mov     eax, 0\n        adc     rax, 0\n        add     r9b, 255\n        adc     rax, 0\n        END_PROC reg_save_list\n\n        end\n  "
  },
  {
    "path": "mpn/x86_64w/nehalem/sublsh1_n.asm",
    "content": "; PROLOGUE(mpn_sublsh1_n)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_sublsh1_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                        rdi     rsi     rdx        rcx\n;  rax                        rcx     rdx      r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rbx, rsi, rdi, r12, r13, r14, r15\n\n    CPU  Core2\n    BITS 64\n\n    FRAME_PROC mpn_sublsh1_n, 0, reg_save_list\n    mov     rax, r9\n\n    lea     rsi, [rdx+rax*8]\n    lea     rdx, [r8+rax*8]\n    lea     rdi, [rcx+rax*8]\n    mov     rcx, rax\n\n    neg     rcx\n    xor     r9, r9\n    xor     rax, rax\n    test    rcx, 3\n    jz      .2\n.1: mov     r10, [rsi+rcx*8]\n    add     r9, 1\n    sbb     r10, [rdx+rcx*8]\n    sbb     r9, r9\n    add     rax, 1\n    sbb     r10, [rdx+rcx*8]\n    sbb     rax, rax\n    mov     [rdi+rcx*8], r10\n    add     rcx, 1              ; ***\n    test    rcx, 3\n    jnz     .1\n.2: cmp     rcx, 0\n    jz      .4\n\n    xalign  16\n.3: mov     r10, [rsi+rcx*8]\n    mov     rbx, [rsi+rcx*8+8]\n    mov     r11, [rsi+rcx*8+16]\n    mov     r8, [rsi+rcx*8+24]\n    mov     r12, [rdx+rcx*8]\n    mov     r13, [rdx+rcx*8+8]\n    mov     r14, [rdx+rcx*8+16]\n    mov     r15, [rdx+rcx*8+24]\n    add     r9, 1\n    sbb     r10, r12\n    sbb     rbx, r13\n    sbb     r11, r14\n    sbb     r8, r15\n    sbb     r9, r9\n    add     rax, 1\n    sbb     r10, r12\n    sbb     rbx, r13\n    sbb     r11, r14\n    sbb     r8, r15\n    sbb     rax, rax\n    mov     [rdi+rcx*8], r10\n    mov     [rdi+rcx*8+8], rbx\n    mov     [rdi+rcx*8+16], r11\n    mov     [rdi+rcx*8+24], r8\n    add     rcx, 4\n    jnz     .3\n.4: add     rax, r9\n    neg     rax\n    END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/submul_1.asm",
    "content": "; PROLOGUE(mpn_submul_1)\n\n;  Copyright 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  Copyright 2008, 2009 Brian Gladman\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n; CREDITS\n;\n; The code used here is derived from that provided by ct35z at:\n;\n;    http://www.geocities.jp/ct35z/gmp-core2-en.html\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Jason Worth Martin's excellent assembly support for the Intel64\n; architecture has been used where appropriate.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; Adapted for use with VC++ and YASM using a special mode in which NASM\n; preprocessing is used with AT&T assembler syntax. I am very grateful\n; for the support that Peter Johnson (one of the authors of YASM) has\n; provided in getting this special YASM mode working.  Without his\n; support this port would have been a great deal more difficult.\n;\n; The principle issues that I have had to address is the difference\n; between GCC and MSVC in their register saving and parameter passing\n; conventions.  Registers that have to be preserved across function\n; calls are:\n;\n; GCC:             rbx, rbp, r12..r15\n; MSVC:  rsi, rdi, rbx, rbp, r12..r15 xmm6..xmm15\n;\n; Parameter passing conventions for non floating point parameters:\n;\n;   function(   GCC     MSVC\n;       p1,     rdi      rcx\n;       p2,     rsi      rdx\n;       p3,     rdx       r8\n;       p4,     rcx       r9\n;       p5,      r8 [rsp+40]\n;       p6,      r9 [rsp+48]\n;\n; Care must be taken with 32-bit values in 64-bit register or on the\n; stack because the upper 32-bits of such parameters are undefined.\n;\n;       Brian Gladman\n;\n; Intel64 mpn_addmul_1 -- Multiply a limb vector with a limb and\n; add the result to a second limb vector.\n;\n; Calculate src[size] multiplied by mult[1] and add to /subtract from dst[size] and\n; return the carry or borrow from the top of the result\n;\n; BPL is bytes per limb, which is 8 in the 64-bit code here\n\n;  mp_limb_t mpn_submul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_declsh_n(mp_ptr, mp_ptr, mp_size_t,   mp_uint)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8         r9\n;\n\n%define BPL                 8\n%define UNROLL_EXPONENT     4\n%define UNROLL_SIZE         (1 << UNROLL_EXPONENT)\n%define UNROLL_MASK         (UNROLL_SIZE - 1)\n%define ADDR(p,i,d)         (d*BPL)(p, i, BPL)\n\n; Register  Usage\n; --------  -----\n; rax    low word from mul\n; rbx\n; rcx    s2limb\n; rdx    high word from mul\n; rsi    s1p\n; rdi    rp\n; rbp    Base Pointer\n; rsp    Stack Pointer\n; r8     A_x\n; r9     A_y\n; r10    A_z\n; r11    B_x\n; r12    B_y\n; r13    B_z\n; r14    temp\n; r15    index\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list       rsi, rdi, r12, r13, r14, r15\n\n%define s2limb  rcx\n%define s1p     rsi\n%define rp      rdi\n%define a_x      r8\n%define a_y      r9\n%define a_z     r10\n%define b_x     r11\n%define b_y     r12\n%define b_z     r13\n%define temp    r14\n%define index   r15\n\n    LEAF_PROC mpn_submul_1\n    xor     a_z, a_z\n    jmp     entry\n\n    LEAF_PROC mpn_submul_1c\n    mov     a_z, [rsp+0x28]\nentry:\n    FRAME_PROC ?mpn_nehalem_submul, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    xor     rdx, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n\n    lea     s1p, [s1p+rdx*8]\n    lea     rp, [rp+rdx*8]\n    xor     index, index\n    sub     index, rdx\n    cmp     rdx, 4\n    jge     .6\n    lea     rax, [rel .1]\n    add     rax, [rax+rdx*8]\n    jmp     rax\n\n    xalign  8\n.1:\tdq      .2 - .1\n    dq      .3 - .1\n    dq      .4 - .1\n    dq      .5 - .1\n.2:\tmov     rax, a_z\n    EXIT_PROC reg_save_list\n\n.3:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    sub      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8], a_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.4:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    sub      a_z, a_x\n    adc     rax, a_y\n    mov     [rp+index*8], a_z\n    adc     rdx, 0\n    sub      b_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+8], b_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.5:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+16]\n    mul     s2limb\n    sub      a_z, a_x\n    adc     b_x, a_y\n    mov     [rp+index*8], a_z\n    mov     a_z, [rp+index*8+16]\n    adc     b_y, 0\n    sub      b_z, b_x\n    adc     rax, b_y\n    mov     [rp+index*8+8], b_z\n    adc     rdx, 0\n    sub      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+16], a_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.6:\tmov     temp, rdx\n    test    rdx, 1\n    jz      .7\n    mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    jmp     .8\n.7:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     b_z, [rp+index*8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     a_z, [rp+index*8+8]\n    mov     a_x, rax\n    mov     a_y, rdx\n.8:\tsub     temp, 4\n    and     temp, UNROLL_MASK\n    inc     temp\n    mov     rax, (.10 - .9) >> UNROLL_EXPONENT\n    mul     temp\n    lea     rdx, [rel .10]\n    sub     rdx, rax\n    mov     rax, [s1p+index*8+16]\n    lea     index, [index+temp+3-UNROLL_SIZE]\n    jmp     rdx\n\n%macro seq_1 7\n    mul     s2limb\n    %7      %3, %1\n    lea     %1, [rax]\n    mov     rax, [byte s1p+index*8+8*%6]\n    adc     %4, %2\n    mov     [byte rp+index*8+8*(%6-3)], %3\n    mov     %3, [byte rp+index*8+8*(%6-1)]\n    lea     %2, [rdx]\n    adc     %5, 0\n%endmacro\n\n   xalign 16\n.9:\n%assign i 0\n%rep    16\n    %if (i & 1)\n        seq_1   b_x, b_y, b_z, a_x, a_y, i, sub\n    %else\n        seq_1   a_x, a_y, a_z, b_x, b_y, i, sub\n    %endif\n%assign i i + 1\n%endrep\n.10:add     index, UNROLL_SIZE\n    jnz     .9\n.11:mul     s2limb\n    sub      a_z, a_x\n    mov     [rp+index*8-24], a_z\n    mov     a_z, [rp+index*8-8]\n    adc     b_x, a_y\n    adc     b_y, 0\n    sub      b_z, b_x\n    mov     [rp+index*8-16], b_z\n    adc     rax, b_y\n    adc     rdx, 0\n    sub      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8-8], a_z\n    adc     rax, rdx\n.12:END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/sumdiff_n.asm",
    "content": "; PROLOGUE(mpn_sumdiff_n)\n;\n;  Copyright 2011 The Code Cavern\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;        \n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;        \n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_sumdiff_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_size_t)\n;  rax                        rdi     rsi     rdx     rcx          r8\n;  rax                        rcx     rdx      r8      r9    [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n        CPU  Nehalem\n        BITS 64\n\n        FRAME_PROC mpn_sumdiff_n, 0, reg_save_list\n        mov     rbx, [rsp+stack_use+40]\n        xor     rax, rax\n        mov     r10d, 3\n        lea     rdi, [rcx+rbx*8-24]\n        lea     rsi, [rdx+rbx*8-24]\n        sub     r10, rbx\n        lea     rdx, [r8+rbx*8-24]\n        lea     rcx, [r9+rbx*8-24]\n        mov     r9, rax\n        jnc     .2\n\n        align   16\n.1:     sahf    \n        mov     r8, [rdx+r10*8]\n        mov     r12, [rdx+r10*8+24]\n        mov     r11, r8\n        adc     r8, [rcx+r10*8]\n        mov     rbx, [rdx+r10*8+8]\n        mov     r13, rbx\n        adc     rbx, [rcx+r10*8+8]\n        mov     rbp, [rdx+r10*8+16]\n        mov     r14, rbp\n        adc     rbp, [rcx+r10*8+16]\n        mov     r15, r12\n        adc     r12, [rcx+r10*8+24]\n        lahf    \n        add     r9b, 255\n        sbb     r11, [rcx+r10*8]\n        mov     [rsi+r10*8], r11\n        sbb     r13, [rcx+r10*8+8]\n        sbb     r14, [rcx+r10*8+16]\n        sbb     r15, [rcx+r10*8+24]\n        setc    r9b\n        add     r10, 4\n        mov     [rdi+r10*8-32], r8\n        mov     [rdi+r10*8+16-32], rbp\n        mov     [rsi+r10*8+8-32], r13\n        mov     [rsi+r10*8+24-32], r15\n        mov     [rdi+r10*8+24-32], r12\n        mov     [rsi+r10*8+16-32], r14\n        mov     [rdi+r10*8+8-32], rbx\n        jnc     .1\n.2:     cmp     r10, 2\n        jg      .6\n        je      .5\n        jp      .4\n.3:     sahf    \n        mov     r8, [rdx]\n        mov     r11, r8\n        adc     r8, [rcx]\n        mov     rbx, [rdx+8]\n        mov     r13, rbx\n        adc     rbx, [rcx+8]\n        mov     rbp, [rdx+16]\n        mov     r14, rbp\n        adc     rbp, [rcx+16]\n        lahf    \n        add     r9b, 255\n        sbb     r11, [rcx]\n        mov     [rsi], r11\n        sbb     r13, [rcx+8]\n        sbb     r14, [rcx+16]\n        setc    r9b\n        mov     [rdi], r8\n        mov     [rdi+16], rbp\n        mov     [rsi+8], r13\n        mov     [rsi+16], r14\n        mov     [rdi+8], rbx\n        sahf    \n        mov     rax, 0\n        adc     rax, 0\n        add     r9b, 255\n        rcl     rax, 1\n        EXIT_PROC reg_save_list\n\n.4:     sahf    \n        mov     r8, [rdx+8]\n        mov     r11, r8\n        adc     r8, [rcx+8]\n        mov     rbx, [rdx+16]\n        mov     r13, rbx\n        adc     rbx, [rcx+16]\n        lahf    \n        add     r9b, 255\n        sbb     r11, [rcx+8]\n        mov     [rsi+8], r11\n        sbb     r13, [rcx+16]\n        setc    r9b\n        mov     [rdi+8], r8\n        mov     [rsi+16], r13\n        mov     [rdi+16], rbx\n        sahf    \n        mov     rax, 0\n        adc     rax, 0\n        add     r9b, 255\n        rcl     rax, 1\n        EXIT_PROC reg_save_list\n\n.5:     sahf    \n        mov     r8, [rdx+16]\n        mov     r11, r8\n        adc     r8, [rcx+16]\n        lahf    \n        add     r9b, 255\n        sbb     r11, [rcx+16]\n        mov     [rsi+16], r11\n        setc    r9b\n        mov     [rdi+16], r8\n.6:     sahf    \n        mov     rax, 0\n        adc     rax, 0\n        add     r9b, 255\n        rcl     rax, 1\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/xnor_n.asm",
    "content": "; PROLOGUE(mpn_xnor_n)\n\n;  mpn_xnor_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_xnor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_xnor_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [rdx+r10*8+16-32]\n\tpxor    xmm1, xmm3\n\tpxor    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\txor     rax, r9\n\tpxor    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\txor     rax, r9\n\tnot     rax\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/nehalem/xor_n.asm",
    "content": "; PROLOGUE(mpn_xor_n)\n\n;  mpn_xor_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_xor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rcx        rdx         r8         r9\n;                    rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_xor_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpxor    xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\txor     rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\txor     rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/netburst/addmul_1.asm",
    "content": "; PROLOGUE(mpn_addmul_1)\n\n;  Copyright 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  Copyright 2008, 2009 Brian Gladman\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n; CREDITS\n;\n; The code used here is derived from that provided by ct35z at:\n;\n;    http://www.geocities.jp/ct35z/gmp-core2-en.html\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Jason Worth Martin's excellent assembly support for the Intel64\n; architecture has been used where appropriate.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; Adapted for use with VC++ and YASM using a special mode in which NASM\n; preprocessing is used with AT&T assembler syntax. I am very grateful\n; for the support that Peter Johnson (one of the authors of YASM) has\n; provided in getting this special YASM mode working.  Without his\n; support this port would have been a great deal more difficult.\n;\n; The principle issues that I have had to address is the difference\n; between GCC and MSVC in their register saving and parameter passing\n; conventions.  Registers that have to be preserved across function\n; calls are:\n;\n; GCC:             rbx, rbp, r12..r15\n; MSVC:  rsi, rdi, rbx, rbp, r12..r15 xmm6..xmm15\n;\n; Parameter passing conventions for non floating point parameters:\n;\n;   function(   GCC     MSVC\n;       p1,     rdi      rcx\n;       p2,     rsi      rdx\n;       p3,     rdx       r8\n;       p4,     rcx       r9\n;       p5,      r8 [rsp+40]\n;       p6,      r9 [rsp+48]\n;\n; Care must be taken with 32-bit values in 64-bit register or on the\n; stack because the upper 32-bits of such parameters are undefined.\n;\n;       Brian Gladman\n;\n; Calculate src[size] multiplied by mult[1] and add to /subtract from dst[size] and\n; return the carry or borrow from the top of the result\n;\n; BPL is bytes per limb, which is 8 in the 64-bit code here\n;\n;  mp_limb_t mpn_addmul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_inclsh_n(mp_ptr, mp_ptr, mp_size_t,   mp_uint)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8        r9d\n\n%define BPL                 8\n%define UNROLL_EXPONENT     4\n%define UNROLL_SIZE         (1 << UNROLL_EXPONENT)\n%define UNROLL_MASK         (UNROLL_SIZE - 1)\n%define ADDR(p,i,d)         (d*BPL)(p, i, BPL)\n\n; Register  Usage\n; --------  -----\n; rax    low word from mul\n; rbx\n; rcx    s2limb\n; rdx    high word from mul\n; rsi    s1p\n; rdi    rp\n; rbp    Base Pointer\n; rsp    Stack Pointer\n; r8     A_x\n; r9     A_y\n; r10    A_z\n; r11    B_x\n; r12    B_y\n; r13    B_z\n; r14    temp\n; r15    index\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list       rsi, rdi, r12, r13, r14, r15\n\n%define s2limb  rcx\n%define s1p     rsi\n%define rp      rdi\n%define a_x      r8\n%define a_y      r9\n%define a_z     r10\n%define b_x     r11\n%define b_y     r12\n%define b_z     r13\n%define temp    r14\n%define index   r15\n\n    CPU  Core2\n    BITS 64\n\n    LEAF_PROC mpn_addmul_1\n    xor     a_z, a_z\n    jmp     entry\n\n    LEAF_PROC mpn_addmul_1c\n    mov     a_z, [rsp+0x28]\nentry:\n    FRAME_PROC ?mpn_netburst_addmul, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    xor     rdx, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n\n    lea     s1p, [s1p+rdx*8]\n    lea     rp, [rp+rdx*8]\n    xor     index, index\n    sub     index, rdx\n    cmp     rdx, 4\n    jge     .6\n    lea     rax, [rel .1]\n    add     rax, [rax+rdx*8]\n    jmp     rax\n\n    xalign  8\n.1:\tdq      .2 - .1\n    dq      .3 - .1\n    dq      .4 - .1\n    dq      .5 - .1\n.2:\tmov     rax, a_z\n    EXIT_PROC reg_save_list\n\n.3:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    add      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8], a_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.4: mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    add      a_z, a_x\n    adc     rax, a_y\n    mov     [rp+index*8], a_z\n    adc     rdx, 0\n    add      b_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+8], b_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.5: mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+16]\n    mul     s2limb\n    add      a_z, a_x\n    adc     b_x, a_y\n    mov     [rp+index*8], a_z\n    mov     a_z, [rp+index*8+16]\n    adc     b_y, 0\n    add      b_z, b_x\n    adc     rax, b_y\n    mov     [rp+index*8+8], b_z\n    adc     rdx, 0\n    add      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+16], a_z\n    adc     rax, rdx\n    EXIT_PROC reg_save_list\n\n.6: mov     temp, rdx\n    test    rdx, 1\n    jz      .7\n    mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    jmp     .8\n.7:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     b_z, [rp+index*8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     a_z, [rp+index*8+8]\n    mov     a_x, rax\n    mov     a_y, rdx\n.8:\tsub     temp, 4\n    and     temp, UNROLL_MASK\n    inc     temp\n    mov     rax, (.10 - .9) >> UNROLL_EXPONENT\n    mul     temp\n    lea     rdx, [rel .10]\n    sub     rdx, rax\n    mov     rax, [s1p+index*8+16]\n    lea     index, [index+temp+3-UNROLL_SIZE]\n    jmp     rdx\n\n%macro seq_1 7\n    mul     s2limb\n    %7      %3, %1\n    lea     %1, [rax]\n    mov     rax, [byte s1p+index*8+8*%6]\n    adc     %4, %2\n    mov     [byte rp+index*8+8*(%6-3)], %3\n    mov     %3, [byte rp+index*8+8*(%6-1)]\n    lea     %2, [rdx]\n    adc     %5, 0\n%endmacro\n\n   xalign 16\n.9:\n%assign i 0\n%rep    16\n    %if (i & 1)\n        seq_1   b_x, b_y, b_z, a_x, a_y, i, add\n    %else\n        seq_1   a_x, a_y, a_z, b_x, b_y, i, add\n    %endif\n%assign i i + 1\n%endrep\n.10:add     index, UNROLL_SIZE\n    jnz     .9\n.11:mul     s2limb\n    add      a_z, a_x\n    mov     [rp+index*8-24], a_z\n    mov     a_z, [rp+index*8-8]\n    adc     b_x, a_y\n    adc     b_y, 0\n    add      b_z, b_x\n    mov     [rp+index*8-16], b_z\n    adc     rax, b_y\n    adc     rdx, 0\n    add      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8-8], a_z\n    adc     rax, rdx\n.12:END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/addmul_2.asm",
    "content": "; PROLOGUE(mpn_addmul_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.2 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addmul_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n\tFRAME_PROC mpn_addmul_2, 0, reg_save_list\n\tmov     rdi, rcx\n\tmov     rsi, rdx\n\tmov     rax, r8\n\t\n\tmov     rcx, [r9]\n\tmov     r8, [r9+8]\n\tmov     rbx, 4\n\tsub     rbx, rax\n\tlea     rsi, [rsi+rax*8-32]\n\tlea     rdi, [rdi+rax*8-32]\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1: mov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tcmp     rbx, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6: add     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/and_n.asm",
    "content": "; PROLOGUE(mpn_and_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_and_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rdi        rsi        rdx        rcx\n;                    rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_and_n\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    jz      .2\n\n    xalign  8\n.1: mov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    and     r10, [r8]\n    and     r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     T3, [rdx-16]\n    mov     T4, [rdx-8]\n    and     T3, [r8-16]\n    and     T4, [r8-8]\n    mov     [rcx-16], T3\n    sub     r9, 1       ; ***\n    mov     [rcx-8], T4\n    jnz     .1\n.2: cmp     rax, 0      ; **\n    jz      .3\n    mov     r10, [rdx]\n    and     r10, [r8]\n    mov     [rcx], r10\n    sub     rax, 1      ; ***\n    jz      .3\n    mov     r10, [rdx+8]\n    and     r10, [r8+8]\n    mov     [rcx+8], r10\n    sub     rax, 1      ; ***\n    jz      .3\n    mov     r10, [rdx+16]\n    and     r10, [r8+16]\n    mov     [rcx+16], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/andn_n.asm",
    "content": "; PROLOGUE(mpn_andn_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_andn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_andn_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    and     r10, [rdx+r9*8+24]\n    and     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    and     T3, [rdx+r9*8+8]\n    and     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    and     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/com_n.asm",
    "content": "; PROLOGUE(mpn_com_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_com_n(mp_ptr, mp_ptr, mp_size_t)\n;                     rdi     rsi       rdx\n;                     rcx     rdx        r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n    LEAF_PROC mpn_com_n\n    mov     rax, r8\n    sub     rax, 4\n    jc      .2\n\n    xalign  8\n.1: mov     r8, [rdx+rax*8+24]\n    mov     r9, [rdx+rax*8+16]\n    not     r8\n    not     r9\n    mov     [rcx+rax*8+24], r8\n    mov     [rcx+rax*8+16], r9\n    mov     r8, [rdx+rax*8+8]\n    mov     r9, [rdx+rax*8]\n    not     r8\n    not     r9\n    mov     [rcx+rax*8+8], r8\n    mov     [rcx+rax*8], r9\n    sub     rax, 4\n    jae     .1\n.2: add     rax, 4\n    jz      .3\n\n; Could still have potential cache-bank conflicts in this tail part\n\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n    sub     rax, 1\n    jz      .3\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n    sub     rax, 1\n    jz      .3\n    mov     r8, [rdx+rax*8-8]\n    not     r8\n    mov     [rcx+rax*8-8], r8\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/ior_n.asm",
    "content": "; PROLOGUE(mpn_ior_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_ior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rdi        rsi        rdx        rcx\n;                    rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_ior_n\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    jz      .2\n\n    xalign  8\n.1: mov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    or      r10, [r8]\n    or      r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     T3, [rdx-16]\n    mov     T4, [rdx-8]\n    or      T3, [r8-16]\n    or      T4, [r8-8]\n    mov     [rcx-16], T3\n    dec     r9\n    mov     [rcx-8], T4\n    jnz     .1\n.2: cmp     rax, 0      ; ***\n    jz      .3\n    mov     r10, [rdx]\n    or      r10, [r8]\n    mov     [rcx], r10\n    sub     rax, 1      ; ***\n    jz      .3\n    mov     r10, [rdx+8]\n    or      r10, [r8+8]\n    mov     [rcx+8], r10\n    sub     rax, 1      ; ***\n    jz      .3\n    mov     r10, [rdx+16]\n    or      r10, [r8+16]\n    mov     [rcx+16], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/iorn_n.asm",
    "content": "; PROLOGUE(mpn_iorn_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_iorn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_iorn_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    or      r10, [rdx+r9*8+24]\n    or      r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    or      T3, [rdx+r9*8+8]\n    or      T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    or      r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/karaadd.asm",
    "content": "; PROLOGUE(mpn_karaadd)\n;  mpn_karaadd  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karaadd(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n    CPU  Athlon64\n    BITS 64\n    TEXT\n\n;   requires n >= 8  \n        FRAME_PROC mpn_karaadd, 1, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n;rp is rdi  \n;tp is rsi  \n;n is rdx and put it on the stack  \n        shr     rdx, 1\n;n2 is rdx  \n        lea     rcx, [rdx+rdx*1]\n; 2*n2 is rcx  \n; L is rdi  \n; H is rbp  \n; tp is rsi  \n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n; rax rbx are the carrys  \n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+rcx*8]\n        adc     r9, [rsi+rcx*8+8]\n        adc     r10, [rsi+rcx*8+16]\n        adc     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        adc     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .6\n        jz      .4\n        jp      .3\n.2:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi]\n        adc     r9, [rsi+8]\n        adc     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n.3:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+8]\n        adc     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n.4:     \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        adc     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        inc     rdx\n        mov     [rbp+rcx*8], r12\n.5:     mov     rcx, 3\n.6:     \n        mov     r8, [rsp]\n        bt      r8, 0\n        jnc     .8\n        xor     r10, r10\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        add     r8, [rsi+rdx*8]\n        adc     r9, [rsi+rdx*8+8]\n        rcl     r10, 1\n        add     [rbp+24], r8\n        adc     [rbp+32], r9\n        adc     [rbp+40], r10\n.7:     adc     qword[rbp+rcx*8+24], 0\n        inc     rcx\n        jc      .7\n        mov     rcx, 3\n.8:     xor     r8, r8\n        shr     rax, 1\n        adc     r8, r8\n        shr     rax, 1\n        adc     r8, 0\n        bt      rbx, 2\n        adc     r8, 0\n        adc     [rdi+rdx*8], r8\n.9:     adc     qword[rdi+rdx*8+8], 0\n        inc     rdx\n        jc      .9\n        xor     r8, r8\n        shr     rbx, 1\n        adc     r8, r8\n        shr     rbx, 1\n        adc     r8, 0\n        shr     rbx, 1\n        adc     r8, 0\n        add     [rbp+24], r8\n.10:    adc     qword[rbp+rcx*8+8], 0\n        inc     rcx\n        jc      .10\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/karasub.asm",
    "content": "; PROLOGUE(mpn_karasub)\n;  mpn_karasub  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Copyright 2012 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karasub(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n;\n;  Karasuba Multiplication - split x and y into two equal length halves so\n;  that x = xh.B + xl and y = yh.B + yl. Then their product is:\n;\n;  x.y = xh.yh.B^2 + (xh.yl + xl.yh).B + xl.yl\n;      = xh.yh.B^2 + (xh.yh + xl.yl - {xh - xl}.{yh - yl}).B + xl.yl\n;\n; If the length of the elements is m (about n / 2), the output length is 4 * m \n; as illustrated below.  The middle two blocks involve three additions and one \n; subtraction: \n; \n;       -------------------- rp\n;       |                  |-->\n;       |   A:xl.yl[lo]    |   |\n;       |                  |   |      (xh - xl).(yh - yl)\n;       --------------------   |      -------------------- tp\n;  <--  |                  |<--<  <-- |                  |\n; |     |   B:xl.yl[hi]    |   |      |     E:[lo]       |\n; |     |                  |   |      |                  |\n; |     --------------------   |      --------------------\n; >-->  |                  |-->   <-- |                  |\n; |\\___ |   C:xh.yh[lo]    | ____/    |     F:[hi]       |\n; |     |                  |          |                  |\n; |     --------------------          --------------------\n;  <--  |                  |   \n;       |   D:xh.yh[hi]    |\n;       |                  |\n;       --------------------\n;\n; To avoid overwriting B before it is used, we need to do two operations\n; in parallel:\n;\n; (1)   B = B + C + A - E = (B + C) + A - E\n; (2)   C = C + B + D - F = (B + C) + D - F\n;\n; The final carry from (1) has to be propagated into C and D, and the final\n; carry from (2) has to be propagated into D. When the number of input limbs\n; is odd, some extra operations have to be undertaken. \n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n%macro add_one 1\n    inc     %1\n%endmacro\n\n    BITS 64\n    TEXT\n        \n;       requires n >= 8  \n        FRAME_PROC mpn_karasub, 2, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n        mov     [rsp+8], rdi\n\n;       rp is rdi, tp is rsi, L is rdi, H is rbp, tp is rsi\n;       carries/borrows in rax, rbx\n   \n        shr     rdx, 1\n        lea     rcx, [rdx+rdx*1]\n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+rcx*8]\n        sbb     r9, [rsi+rcx*8+8]\n        sbb     r10, [rsi+rcx*8+16]\n        sbb     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        sbb     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .5\n        jz      .4\n        jp      .3\n\n.2:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi]\n        sbb     r9, [rsi+8]\n        sbb     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n\n.3:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+8]\n        sbb     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n\n.4:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        sbb     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        add_one rdx\n        mov     [rbp+rcx*8], r12\n\n;       move low half rbx carry into rax\n.5:     rcr     rax, 3\n        bt      rbx, 2\n        rcl     rax, 3\n        mov     r8, [rsp]\n        mov     rcx, rsi\n        mov     rsi,[rsp+8]\n        lea     r9, [r8+r8]\n        lea     rsi, [rsi+r9*8]\n        lea     r11, [rbp+24]\n        sub     r11, rsi\n        sar     r11, 3\n        bt      r8, 0\n        jnc     .9\n\n;       if odd the do next two  \n        add     r11, 2\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        rcr     rbx, 2\n        adc     r8,0\n        adc     r9, 0\n        rcl     rbx, 1\n        sbb     r8, [rcx+rdx*8]\n        sbb     r9, [rcx+rdx*8+8]\n        rcr     rbx, 2\n        adc     [rbp+24], r8\n        adc     [rbp+32], r9\n        rcl     rbx, 3\n\n; Now add in any accummulated carries and/or borrows\n;\n; NOTE: We can't propagate individual borrows or carries from the second\n; and third quarter blocks into the fourth quater block by simply waiting\n; for carry (or borrow) propagation to end.  This is because a carry into\n; the fourth quarter block when it contains only maximum integers or a \n; borrow when it contains all zero integers will incorrectly propagate\n; beyond the end of the top quarter block.\n\n.9:     lea     rdx, [rdi+rdx*8]\n        sub     rdx, rsi\n        sar     rdx, 3\n\n; carries/borrrow from second to third quarter quarter block\n;   rax{2} is the carry in (B + C)\n;   rax{1} is the carry in (B + C) + A\n;   rax{0} is the borrow in (B + C + A) - E\n\n        mov     rcx, rdx\n        bt      rax, 0\n.10:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .11\n        jc      .10\n\n.11     mov     rcx, rdx\n        bt      rax, 1\n.12:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .13\n        jc      .12\n\n.13     mov     rcx, rdx\n        bt      rax, 2\n.14:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .15\n        jc      .14\n\n; carries/borrrow from third to fourth quarter quarter block\n;   rbx{2} is the carry in (B + C)\n;   rbx{1} is the carry in (B + C) + D\n;   rbx{0} is the borrow in (B + C + D) - F\n\n.15:    mov     rcx, r11\n        bt      rbx, 0\n.16:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .17\n        jc      .16\n\n.17:    mov     rcx, r11\n        bt      rbx, 1\n.18:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .19\n        jc      .18\n\n.19:    mov     rcx, r11\n        bt      rbx, 2\n.20:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .21\n        jc      .20\n.21:\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/mul_1.asm",
    "content": "; PROLOGUE(mpn_mul_1)\n\n; Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_mul_1c(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx         r8\n;  rax                     rcx     rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n    LEAF_PROC mpn_mul_1c\n    mov     r11, [rsp+0x28]\n    jmp     start\n\n    LEAF_PROC mpn_mul_1\n    xor     r11, r11\n    jmp     start\n\n    xalign  16\nstart:\n    FRAME_PROC ?mpn_netburst_mul, 0, rbx\n    mov     rax, r8\n    mov     r8d, 3\n    lea     r10, [rdx+rax*8-24]\n    sub     r8, rax\n    lea     rcx, [rcx+rax*8-24]\n    jc      .1\n    jmp     .2\n\n    xalign  16\n.1: mov     rax, [r10+r8*8]\n    mov     ebx, 0\n    mul     r9\n    add     r11, rax\n    mov     [rcx+r8*8], r11\n    mov     rax, [r10+r8*8+8]\n    adc     rbx, rdx\n    mul     r9\n    mov     r11d, 0\n    add     rbx, rax\n    mov     rax, [r10+r8*8+16]\n    adc     r11, rdx\n    mul     r9\n    mov     [rcx+r8*8+8], rbx\n    add     r11, rax\n    mov     ebx, 0\n    mov     [rcx+r8*8+16], r11\n    mov     rax, [r10+r8*8+24]\n    mov     r11d, 0\n    adc     rbx, rdx\n    mul     r9\n    add     rbx, rax\n    mov     [rcx+r8*8+24], rbx\n    adc     r11, rdx\n    add     r8, 4\n    jnc     .1\n.2:\ttest    r8, 2\n    jnz     .3\n    mov     rax, [r10+r8*8]\n    mov     ebx, 0\n    mul     r9\n    add     r11, rax\n    mov     [rcx+r8*8], r11\n    mov     rax, [r10+r8*8+8]\n    adc     rbx, rdx\n    mul     r9\n    mov     r11d, 0\n    add     rbx, rax\n    adc     r11, rdx\n    add     r8, 2\n    mov     [rcx+r8*8-8], rbx\n.3: test    r8, 1\n    mov     rax, r11\n    jnz     .4\n    mov     rax, [r10+r8*8]\n    mov     ebx, 0\n    mul     r9\n    add     r11, rax\n    mov     [rcx+r8*8], r11\n    adc     rbx, rdx\n    mov     rax, rbx\n.4: END_PROC rbx\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/mul_2.asm",
    "content": "; PROLOGUE(mpn_mul_2)\n\n;  X86_64 mpn_mul_2\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx\n;  rax                     rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi, rbx\n\n    BITS 64\n\n\tFRAME_PROC mpn_mul_2, 0, reg_save_list\n\tmov     rax, r8\n\t\n\tmov     r8, [r9]\n\tlea     rsi, [rdx+rax*8-24]\n\tlea     rdi, [rcx+rax*8-24]\n\tmov     rcx, [r9+8]\n\tmov     rbx, 3\n\tsub     rbx, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tmov     r11, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1:\tmov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     r10, 0\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r9, rdx\n\tmul     r8\n\tadd     r11, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 3\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmov     [rdi+rbx*8], r11\n\tmul     rcx\n\tadd     r9, rax\n\tadc     r10, rdx\n\tcmp     rbx, 1\n\tja      .5\n\tje      .4\n.3:\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     r8\n\tmov     r9, 0\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tadc     r9, 0\n\tmul     rcx\n\tadd     r11, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4:\tmov     r11, 0\n\tmov     rax, [rsi+rbx*8+8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tadc     r11, 0\n\tmul     rcx\n\tadd     r10, rax\n\tmov     [rdi+rbx*8+8], r9\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5:\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.6: END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/mul_basecase.asm",
    "content": "; PROLOGUE(mpn_mul_basecase)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%macro addmul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi+rbx*8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi+rbx*8+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+rbx*8+24], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    add     [rdi], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+8], r10\n    adc     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-16], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi-8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+8], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi+rbx*8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+24], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+24], r12\n    add     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    mov     [rdi], r12\n    add     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-16], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi-8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+8], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n    xalign  16\n%%1:mov     r10, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    mul     r8\n    mov     [rdi+rbx*8], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r10\n    db      0x26\n    add     r11, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+rbx*8+24]\n    mul     r8\n    mov     [rdi+rbx*8+16], r11\n    db      0x26\n    add     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     rbx, 4\n    mov     rax, [rsi+rbx*8]\n    jnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n    mov     rax, [rsi+8]\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mov     r12d, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r11\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n    mov     rax, [rsi+16]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     [rdi+24], r10\n    mov     [rdi+32], r11\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n    mov     [rdi+24], r9\n    mov     [rdi+32], r10\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n    jz      %%2\n    xalign  16\n%%1:addmul2pro%1\n    addmul2lp\n    addmul2epi%1\n    jnz     %%1\n%%2:\n%endmacro\n\n%macro oldmulnext0 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    mov     [rdi+r11*8+24], rbx\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+32], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+r11*8+40], rdx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+24]\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+24], r12\n    mov     [rdi+r11*8+32], rdx\n    inc     r8\n    lea     rdi, [rdi+8]\n    mov     r11, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     [rdi+r11*8+16], r10\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n    mov     [rdi+r11*8+8], r9\n    mov     [rdi+r11*8+16], r10\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    db      0x26\n    mov     r9, rdx\n    lea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    mul     r13\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+24]\n    mov     r12d, 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+32]\n    mul     r13\n    add     [rdi+24], rbx\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+32], r12\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n    mov     r13, [rcx+r8*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+24]\n    mul     r13\n    lea     rdi, [rdi+8]\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     r12d, 0\n    mov     rax, [rsi+32]\n    adc     r12, rdx\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+24], r12\n    adc     rdx, 0\n    mov     [rdi+32], rdx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n    mov     r13, [rcx+r8*8]\n    lea     rdi, [rdi+8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     ebx, 0\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    adc     rbx, rdx\n    mov     rax, [rsi+r14*8]\n    add     [rdi+r11*8+16], r10\n    adc     rbx, 0\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    db      0x26\n    lea     rdi, [rdi+8]\n    db      0x26\n    mov     r9, rdx\n    mov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     rdx, 0\n    add     [rdi+32], r9\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n    oldmulnext%1\n    jz      %%2\n    xalign  16\n%%1:\n    oldaddmulpro%1\n    oldaddmulnext%1\n    jnz     %%1\n%%2:\n%endmacro\n\n    CPU  Core2\n    BITS 64\n\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14\n\n    LEAF_PROC mpn_mul_basecase\n    ; the current mul does not handle case one\n    cmp     r8d, 4\n    jg      __gmpn_netburst_mbc2\n    cmp     r8d, 1\n    je      one\n\n    WIN64_GCC_PROC mpn_netburst_mbc1, 5, frame\n    movsxd  rdx, edx\n    movsxd  r8, r8d\n\n    mov     r14, 5\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-40]\n    lea     rcx, [rcx+r8*8]\n    neg     r8\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rax, [rsi+r14*8]\n    mov     r13, [rcx+r8*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    mov     r10d, 0\n    mul     r13\n    mov     [rdi+r11*8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     r11, 2\n    ja      .4\n    jz      .3\n    jp      .2\n.1:\toldmpn_muladdmul_1_int 0\n    WIN64_GCC_EXIT frame\n\n    xalign  16\n.2:\toldmpn_muladdmul_1_int 1\n    WIN64_GCC_EXIT frame\n\n    xalign  16\n.3:\toldmpn_muladdmul_1_int 2\n    WIN64_GCC_EXIT frame\n\n    xalign  16\n.4:\toldmpn_muladdmul_1_int 3\n\n    xalign  16\n.5:\tWIN64_GCC_END frame\n\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\n%undef  reg_save_list\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14, r15\n\n    xalign  16\n.6:\tWIN64_GCC_PROC mpn_netburst_mbc2, 5, frame\n\n    mov     r14, 4\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-32]\n    lea     rsi, [rsi+rdx*8-32]\n    mov     r13, rcx\n    mov     r15, r8\n    lea     r13, [r13+r15*8]\n    neg     r15\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    bt      r15, 0\n    jnc     .13\n.7:\tinc     rbx\n    mov     r8, [r13+r15*8]\n    mul     r8\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     rbx, 0\n    jge     .8\n    mul1lp\n.8:\tmov     r10d, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     rbx, 2\n    ja      .12\n    jz      .11\n    jp      .10\n.9:\tmulnext0\n    jmp     .21\n.10:mulnext1\n    jmp     .15\n.11:mulnext2\n    jmp     .17\n.12:mulnext3\n    jmp     .19\n    ; as all the mul2pro? are the same\n.13:mul2pro0\n    mul2lp\n    cmp     rbx, 2\n    ja      .20\n    jz      .18\n    jp      .16\n.14:mul2epi3\n.15:mpn_addmul_2_int 3\n    jmp     .22\n.16:mul2epi2\n.17:mpn_addmul_2_int 2\n    jmp     .22\n.18:mul2epi1\n.19:mpn_addmul_2_int 1\n    jmp     .22\n.20:mul2epi0\n.21:mpn_addmul_2_int 0\n\n    xalign  16\n.22:WIN64_GCC_END frame\n\n    xalign  16\none:mov     rax, [rdx]\n    mul     qword [r9]\n    mov     [rcx], rax\n    mov     [rcx+8], rdx\n    ret\n    \n\tend\n"
  },
  {
    "path": "mpn/x86_64w/netburst/nand_n.asm",
    "content": "; PROLOGUE(mpn_nand_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_nand_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_nand_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [rdx+r9*8+24]\n    mov     r11, [rdx+r9*8+16]\n    and     r10, [r8+r9*8+24]\n    and     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [rdx+r9*8+8]\n    mov     T4, [rdx+r9*8]\n    and     T3, [r8+r9*8+8]\n    and     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [rdx+r9*8-8]\n    and     r10, [r8+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [rdx+r9*8-8]\n    and     r10, [r8+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [rdx+r9*8-8]\n    and     r10, [r8+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/nior_n.asm",
    "content": "; PROLOGUE(mpn_nior_n)\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_nior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_nior_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    or      r10, [rdx+r9*8+24]\n    or      r11, [rdx+r9*8+16]\n    not     r10\n    not     r11\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    or      T3, [rdx+r9*8+8]\n    or      T4, [rdx+r9*8]\n    not     T3\n    not     T4\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, [rdx+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, [rdx+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1           ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, [rdx+r9*8-8]\n    not     r10\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/redc_1.asm",
    "content": "; PROLOGUE(mpn_redc_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_redc_1(mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                rdi     rsi        rdx        rcx        r8\n;  rax                rcx     rdx         r8         r9  [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    CPU  Core2\n    BITS 64\n\n%macro mpn_add 0\n\n    mov     rax, rcx\n    and     rax, 3\n    shr     rcx, 2\n    cmp     rcx, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    add     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n    jmp     %%2\n\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    adc     r11, [rdx]\n    adc     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rdi], r11\n    mov     [rdi+8], r8\n    lea     rdi, [rdi+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    adc     r9, [rdx-16]\n    adc     r10, [rdx-8]\n    mov     [rdi-16], r9\n    dec     rcx\n    mov     [rdi-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    adc     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n%%2:\n\n%endmacro\n\n%macro mpn_sub 0\n\n    mov     rax, rbp\n    and     rax, 3\n    shr     rbp, 2\n    cmp     rbp, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    sub     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n    jmp     %%2\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    sbb     r11, [rdx]\n    sbb     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rbx], r11\n    mov     [rbx+8], r8\n    lea     rbx, [rbx+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    sbb     r9, [rdx-16]\n    sbb     r10, [rdx-8]\n    mov     [rbx-16], r9\n    dec     rbp\n    mov     [rbx-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    sbb     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n%%2:\n\n%endmacro\n\n%macro addmulloop 1\n\n    xalign  16\n%%1:mov     r10, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    db      0x26\n    adc     rbx, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    db      0x26\n    adc     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     r11, 4\n    mov     rax, [rsi+r11*8+8]\n    jnc     %%1\n\n%endmacro\n\n%macro addmulpropro0 0\n\n    imul    r13, rcx\n    lea     r8, [r8-8]\n\n%endmacro\n\n%macro addmulpro0 0\n\n    mov     r11, r14\n    lea     r8, [r8+8]\n    mov     rax, [rsi+r14*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext0 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mov     r9d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, rax\n    adc     r9, rdx\n    imul    r13, rcx\n    add     [r8+r11*8+32], r12\n    adc     r9, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r9\n\n%endmacro\n\n%macro addmulpropro1 0\n\n%endmacro\n\n%macro addmulpro1 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext1 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r12\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro2 0\n\n%endmacro\n\n%macro addmulpro2 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext2 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     r13, [r8+r14*8+8]\n    add     [r8+r11*8+16], r10\n    adc     rbx, 0\n    mov     [r8+r14*8], rbx\n    sub     r15, 1          ; ***\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro3 0\n\n%endmacro\n\n%macro addmulpro3 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext3 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    add     [r8+r11*8+8], r9\n    adc     r10, 0\n    mov     r13, [r8+r14*8+8]\n    mov     [r8+r14*8], r10\n    lea     r8, [r8+8]\n    sub     r15, 1          ; ***\n\n%endmacro\n\n%macro mpn_addmul_1_int 1\n\n    addmulpropro%1\n    xalign  16\n%%1:addmulpro%1\n    jge     %%2\n    addmulloop %1\n%%2:addmulnext%1\n    jnz     %%1\n\n%endmacro\n\n    LEAF_PROC mpn_redc_1\n    cmp     r9, 1\n    je      one\n    FRAME_PROC ?mpn_netburst_redc_1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, r8\n    mov     r8, rdx\n    mov     rdx, r9\n    mov     rcx, [rsp+stack_use+0x28]\n    mov     [rsp+stack_use+0x28], r8\n\n    mov     r14, 5\n    sub     r14, rdx\n\n    mov     [rsp+stack_use+0x10], rsi\n    mov     r8, [rsp+stack_use+0x28]\n\n    lea     r8, [r8+rdx*8-40]\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rbp, rdx\n    mov     r15, rdx\n    mov     rax, r14\n    and     rax, 3\n    mov     r13, [r8+r14*8]\n    je      .2\n    jp      .4\n    cmp     rax, 1\n    je      .3\n.1:\tmpn_addmul_1_int 2\n    jmp     .5\n\n    xalign  16\n.2:\tmpn_addmul_1_int 0\n    jmp     .5\n\n    xalign  16\n.3:\tmpn_addmul_1_int 1\n    jmp     .5\n\n    xalign  16\n.4:\tmpn_addmul_1_int 3\n\n    xalign  16\n.5:\tmov     rcx, rbp\n    mov     rdx, [rsp+stack_use+0x28]\n    lea     rsi, [rdx+rbp*8]\n    mov     rbx, rdi\n    mpn_add\n    mov     rdx, [rsp+stack_use+0x10]\n    jnc     .6\n    mov     rsi, rbx\n    mpn_sub\n.6:\tEND_PROC reg_save_list\n\n    xalign  16\none:mov     r9, rdx\n    mov     r11, [r8]\n    mov     r8, [rsp+0x28]\n\n    mov     r10, [r9]\n    imul    r8, r10\n    mov     rax, r8\n    mul     r11\n    add     rax, r10\n    adc     rdx, [r9+8]\n    cmovnc  r11, rax\n    sub     rdx, r11\n    mov     [rcx], rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/submul_1.asm",
    "content": "; PROLOGUE(mpn_submul_1)\n\n;  Copyright 2006  Jason Worth Martin <jason.worth.martin@gmail.com>\n;\n;  Copyright 2008, 2009 Brian Gladman\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n; CREDITS\n;\n; The code used here is derived from that provided by ct35z at:\n;\n;    http://www.geocities.jp/ct35z/gmp-core2-en.html\n;\n; This code is based largely on Pierrick Gaudry's excellent assembly\n; support for the AMD64 architecture.  (Note that Intel64 and AMD64,\n; while using the same instruction set, have very different\n; microarchitectures.  So, this code performs very poorly on AMD64\n; machines even though it is near-optimal on Intel64.)\n;\n; Roger Golliver works for Intel and provided insightful improvements\n; particularly in using the \"lea\" instruction to perform additions\n; and register-to-register moves.\n;\n; Jason Worth Martin's excellent assembly support for the Intel64\n; architecture has been used where appropriate.\n;\n; Eric Bainville has a brilliant exposition of optimizing arithmetic for\n; AMD64 (http://www.bealto.it).  I adapted many of the ideas he\n; describes to Intel64.\n;\n; Agner Fog is a demigod in the x86 world.  If you are reading assembly\n; code files and you haven't heard of Agner Fog, then take a minute to\n; look over his software optimization manuals (http://www.agner.org/).\n; They are superb.\n;\n; Adapted for use with VC++ and YASM using a special mode in which NASM\n; preprocessing is used with AT&T assembler syntax. I am very grateful\n; for the support that Peter Johnson (one of the authors of YASM) has\n; provided in getting this special YASM mode working.  Without his\n; support this port would have been a great deal more difficult.\n;\n; The principle issues that I have had to address is the difference\n; between GCC and MSVC in their register saving and parameter passing\n; conventions.  Registers that have to be preserved across function\n; calls are:\n;\n; GCC:             rbx, rbp, r12..r15\n; MSVC:  rsi, rdi, rbx, rbp, r12..r15 xmm6..xmm15\n;\n; Parameter passing conventions for non floating point parameters:\n;\n;   function(   GCC     MSVC\n;       p1,     rdi      rcx\n;       p2,     rsi      rdx\n;       p3,     rdx       r8\n;       p4,     rcx       r9\n;       p5,      r8 [rsp+40]\n;       p6,      r9 [rsp+48]\n;\n; Care must be taken with 32-bit values in 64-bit register or on the\n; stack because the upper 32-bits of such parameters are undefined.\n;\n;       Brian Gladman\n;\n; Intel64 mpn_addmul_1 -- Multiply a limb vector with a limb and\n; add the result to a second limb vector.\n;\n; Calculate src[size] multiplied by mult[1] and add to /subtract from dst[size] and\n; return the carry or borrow from the top of the result\n;\n; BPL is bytes per limb, which is 8 in the 64-bit code here\n\n;  mp_limb_t mpn_submul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_declsh_n(mp_ptr, mp_ptr, mp_size_t,   mp_uint)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8         r9\n;\n\n%define BPL                 8\n%define UNROLL_EXPONENT     4\n%define UNROLL_SIZE         (1 << UNROLL_EXPONENT)\n%define UNROLL_MASK         (UNROLL_SIZE - 1)\n%define ADDR(p,i,d)         (d*BPL)(p, i, BPL)\n\n; Register  Usage\n; --------  -----\n; rax    low word from mul\n; rbx\n; rcx    s2limb\n; rdx    high word from mul\n; rsi    s1p\n; rdi    rp\n; rbp    Base Pointer\n; rsp    Stack Pointer\n; r8     A_x\n; r9     A_y\n; r10    A_z\n; r11    B_x\n; r12    B_y\n; r13    B_z\n; r14    temp\n; r15    index\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list       rsi, rdi, r12, r13, r14, r15\n\n%define s2limb  rcx\n%define s1p     rsi\n%define rp      rdi\n%define a_x      r8\n%define a_y      r9\n%define a_z     r10\n%define b_x     r11\n%define b_y     r12\n%define b_z     r13\n%define temp    r14\n%define index   r15\n\n    LEAF_PROC mpn_submul_1\n    xor     a_z, a_z\n    jmp     entry\n\n    LEAF_PROC mpn_submul_1c\n    mov     a_z, [rsp+0x28]\nentry:\n    FRAME_PROC ?mpn_netburst_submul, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    xor     rdx, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n\n    lea     s1p, [s1p+rdx*8]\n    lea     rp, [rp+rdx*8]\n    xor     index, index\n    sub     index, rdx\n    cmp     rdx, 4\n    jge     .6\n    lea     rax, [rel .1]\n    add     rax, [rax+rdx*8]\n    jmp     rax\n\n    xalign  8\n.1:\tdq      .2 - .1\n    dq      .3 - .1\n    dq      .4 - .1\n    dq      .5 - .1\n.2:\tmov     rax, a_z\n\tEXIT_PROC reg_save_list\n\n.3:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    sub      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8], a_z\n    adc     rax, rdx\n\tEXIT_PROC reg_save_list\n\n.4:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    sub      a_z, a_x\n    adc     rax, a_y\n    mov     [rp+index*8], a_z\n    adc     rdx, 0\n    sub      b_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+8], b_z\n    adc     rax, rdx\n\tEXIT_PROC reg_save_list\n\n.5:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+16]\n    mul     s2limb\n    sub      a_z, a_x\n    adc     b_x, a_y\n    mov     [rp+index*8], a_z\n    mov     a_z, [rp+index*8+16]\n    adc     b_y, 0\n    sub      b_z, b_x\n    adc     rax, b_y\n    mov     [rp+index*8+8], b_z\n    adc     rdx, 0\n    sub      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8+16], a_z\n    adc     rax, rdx\n\tEXIT_PROC reg_save_list\n\n.6:\tmov     temp, rdx\n    test    rdx, 1\n    jz      .7\n    mov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     a_z, [rp+index*8]\n    mov     a_x, rax\n    mov     a_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     b_z, [rp+index*8+8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    jmp     .8\n\n.7:\tmov     rax, [s1p+index*8]\n    mul     s2limb\n    add     rax, a_z\n    adc     rdx, 0\n    mov     b_z, [rp+index*8]\n    mov     b_x, rax\n    mov     b_y, rdx\n    mov     rax, [s1p+index*8+8]\n    mul     s2limb\n    mov     a_z, [rp+index*8+8]\n    mov     a_x, rax\n    mov     a_y, rdx\n.8:\tsub     temp, 4\n    and     temp, UNROLL_MASK\n    inc     temp\n    mov     rax, (.10 - .9) >> UNROLL_EXPONENT\n    mul     temp\n    lea     rdx, [rel .10]\n    sub     rdx, rax\n    mov     rax, [s1p+index*8+16]\n    lea     index, [index+temp+3-UNROLL_SIZE]\n    jmp     rdx\n\n%macro seq_1 7\n    mul     s2limb\n    %7      %3, %1\n    lea     %1, [rax]\n    mov     rax, [byte s1p+index*8+8*%6]\n    adc     %4, %2\n    mov     [byte rp+index*8+8*(%6-3)], %3\n    mov     %3, [byte rp+index*8+8*(%6-1)]\n    lea     %2, [rdx]\n    adc     %5, 0\n%endmacro\n\n   xalign 16\n.9:\n%assign i 0\n%rep    16\n    %if (i & 1)\n        seq_1   b_x, b_y, b_z, a_x, a_y, i, sub\n    %else\n        seq_1   a_x, a_y, a_z, b_x, b_y, i, sub\n    %endif\n%assign i i + 1\n%endrep\n.10:add     index, UNROLL_SIZE\n    jnz     .9\n.11:mul     s2limb\n    sub      a_z, a_x\n    mov     [rp+index*8-24], a_z\n    mov     a_z, [rp+index*8-8]\n    adc     b_x, a_y\n    adc     b_y, 0\n    sub      b_z, b_x\n    mov     [rp+index*8-16], b_z\n    adc     rax, b_y\n    adc     rdx, 0\n    sub      a_z, rax\n    mov     rax, 0\n    mov     [rp+index*8-8], a_z\n    adc     rax, rdx\n.12:END_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/xnor_n.asm",
    "content": "; PROLOGUE(mpn_xnor_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_xnor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rdi        rsi        rdx        rcx\n;                     rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_xnor_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    not     r10\n    not     r11\n    xor     r10, [rdx+r9*8+24]\n    xor     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    not     T3\n    not     T4\n    xor     T3, [rdx+r9*8+8]\n    xor     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1       ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1       ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    not     r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/netburst/xor_n.asm",
    "content": "; PROLOGUE(mpn_xor_n)\n\n;  Version 1.0.4\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_xor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rdi        rsi        rdx        rcx\n;                    rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n%define T3      r10\n%define T4      r11\n\n    LEAF_PROC mpn_xor_n\n    sub     r9, 4\n    jb      .2\n\n    xalign  16\n.1: mov     r10, [r8+r9*8+24]\n    mov     r11, [r8+r9*8+16]\n    or      r10, r10\n    or      r11, r11\n    xor     r10, [rdx+r9*8+24]\n    xor     r11, [rdx+r9*8+16]\n    mov     [rcx+r9*8+24], r10\n    mov     [rcx+r9*8+16], r11\n    mov     T3, [r8+r9*8+8]\n    mov     T4, [r8+r9*8]\n    or      T3, T3\n    or      T4, T4\n    xor     T3, [rdx+r9*8+8]\n    xor     T4, [rdx+r9*8]\n    mov     [rcx+r9*8+8], T3\n    mov     [rcx+r9*8], T4\n    sub     r9, 4\n    jnc     .1\n.2: add     r9, 4\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1       ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n    sub     r9, 1       ; ***\n    jz      .3\n    mov     r10, [r8+r9*8-8]\n    or      r10, r10\n    xor     r10, [rdx+r9*8-8]\n    mov     [rcx+r9*8-8], r10\n.3: ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/add_n.asm",
    "content": "; PROLOGUE(mpn_add_n)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  Calculate src1[size] plus(minus) src2[size] and store the result in\n;  dst[size].  The return value is the carry bit from the top of the result\n;  (1 or 0).  The _nc version accepts 1 or 0 for an initial carry into the\n;  low limb of the calculation.  Note values other than 1 or 0 here will\n;  lead to garbage results.\n;\n;  mp_limb_t  mpn_add_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_add_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    xalign  8\n    LEAF_PROC mpn_add_nc\n    mov     r10,[rsp+40]\n    jmp     entry\n\n    xalign  8\n    LEAF_PROC mpn_add_n\n    xor     r10, r10\nentry:\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    lea     r9,[r10+r9*2]\n\tsar     r9, 1\n    jnz     .2\n\n    mov     r10, [rdx]\n    adc     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+8]\n    adc     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+16]\n    adc     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.1: adc     rax, rax\n    ret\n\n    xalign  8\n.2: mov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    adc     r10, [r8]\n    adc     r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     r10, [rdx-16]\n    mov     r11, [rdx-8]\n    adc     r10, [r8-16]\n    adc     r11, [r8-8]\n    mov     [rcx-16], r10\n    dec     r9\n    mov     [rcx-8], r11\n    jnz     .2\n\n    inc     rax\n    dec     rax\n    jz      .3\n    mov     r10, [rdx]\n    adc     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+8]\n    adc     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+16]\n    adc     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.3: adc     rax, rax\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/addmul_1.asm",
    "content": "; PROLOGUE(mpn_addmul_1)\n        \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addmul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_inclsh_n(mp_ptr, mp_ptr, mp_size_t,   mp_uint)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8        r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12\n\n        xalign 16\n        LEAF_PROC mpn_addmul_1\n        mov     rax, [rdx]\n        cmp     r8, 1\n        jnz     .1\n        mul     r9\n        add     [rcx], rax\n        adc     rdx, 0\n        mov     rax, rdx\n        ret\n\n        xalign   16\n.1:\t    FRAME_PROC ?mpn_sandybridge_addmul, 0, reg_save_list\n        mov     r11, 5\n        lea     rsi, [rdx+r8*8-40]\n        lea     rdi, [rcx+r8*8-40]\n        mov     rcx, r9\n        sub     r11, r8\n        mul     rcx\n        db      0x26\n        mov     r8, rax\n        db      0x26\n        mov     rax, [rsi+r11*8+8]\n        db      0x26\n        mov     r9, rdx\n        db      0x26\n        cmp     r11, 0\n        db      0x26\n        mov     [rsp-8], r12\n        db      0x26\n        jge     .3\n.2:     xor     r10, r10\n        mul     rcx\n        add     [rdi+r11*8], r8\n        adc     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+r11*8+16]\n        mul     rcx\n        add     [rdi+r11*8+8], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+r11*8+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        add     [rdi+r11*8+16], r10\n        adc     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+r11*8+32]\n        mul     rcx\n        add     [rdi+r11*8+24], r12\n        adc     r8, rax\n        adc     r9, rdx\n        add     r11, 4\n        mov     rax, [rsi+r11*8+8]\n        jnc     .2\n.3      xor     r10, r10\n        mul     rcx\n        add     [rdi+r11*8], r8\n        adc     r9, rax\n        adc     r10, rdx\n        cmp     r11, 2\n        ja      .7\n        jz      .6\n        jp      .5\n.4:     mov     rax, [rsi+16]\n        mul     rcx\n        add     [rdi+8], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        add     [rdi+16], r10\n        adc     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+32]\n        mul     rcx\n        add     [rdi+24], r12\n        adc     r8, rax\n        adc     r9, rdx\n        add     [rdi+32], r8\n        adc     r9, 0\n        mov     rax, r9\n        EXIT_PROC reg_save_list\n.5:     mov     rax, [rsi+24]\n        mul     rcx\n        add     [rdi+16], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+32]\n        xor     r8, r8\n        mul     rcx\n        add     [rdi+24], r10\n        adc     r12, rax\n        adc     r8, rdx\n        add     [rdi+32], r12\n        adc     r8, 0\n        mov     rax, r8\n        EXIT_PROC reg_save_list\n\n        align   16\n.6:     mov     rax, [rsi+32]\n        mul     rcx\n        add     [rdi+24], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        add     [rdi+32], r10\n        adc     r12, 0\n        mov     rax, r12\n        EXIT_PROC reg_save_list\n\n.7:     add     [rdi+32], r9\n        adc     r10, 0\n        mov     rax, r10\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/addmul_2.asm",
    "content": "; PROLOGUE(mpn_addmul_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.2 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addmul_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n\tFRAME_PROC mpn_addmul_2, 0, reg_save_list\n\tmov     rdi, rcx\n\tmov     rsi, rdx\n\tmov     rax, r8\n\t\n\tmov     rcx, [r9]\n\tmov     r8, [r9+8]\n\tmov     rbx, 4\n\tsub     rbx, rax\n\tlea     rsi, [rsi+rax*8-32]\n\tlea     rdi, [rdi+rax*8-32]\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1: mov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tcmp     rbx, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6: add     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/and_n.asm",
    "content": "; PROLOGUE(mpn_and_n)\n\n;  mpn_and_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_and_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rcx        rdx         r8         r9\n;                    rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_and_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpand    xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tand     rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tand     rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/andn_n.asm",
    "content": "; PROLOGUE(mpn_andn_n)\n\n;  mpn_andn_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_andn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n    \n\tLEAF_PROC mpn_andn_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpandn   xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpandn   xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpandn   xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tand     rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpandn   xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tnot     rax\n\tand     rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/com_n.asm",
    "content": "; PROLOGUE(mpn_com_n)\n\n;  mpn_com_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_com_n(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi,    rsi,       rdx\n;                    rcx,    rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_com_n\n\tmov     r9, 3\n\tlea     rdx, [rdx+r8*8-24]\n\tpcmpeqb xmm2, xmm2\n\tsub     r9, r8\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [rdx+r9*8]\n\tmovdqu  xmm1, [rdx+r9*8+16]\n\tpxor    xmm0, xmm2\n\tadd     r9, 4\n\tpxor    xmm1, xmm2\n\tmovdqu  [rcx], xmm0\n\tmovdqu  [rcx+16], xmm1\n\tlea     rcx, [rcx+32]\n\tjnc     .1\n.2:\tcmp     r9, 2\n\tja      .6\n\tje      .5\n\tjp      .4\n.3:\tmovdqu  xmm0, [rdx+r9*8]\n\tmov     rax, [rdx+r9*8+16]\n\tpxor    xmm0, xmm2\n\tnot     rax\n\tmovdqu  [rcx], xmm0\n\tmov     [rcx+16], rax\n\tret\n\n.4:\tmovdqu  xmm0, [rdx+r9*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx], xmm0\n\tret\n\n.5:\tmov     rax, [rdx+r9*8]\n\tnot     rax\n\tmov     [rcx], rax\n.6:\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/copyd.asm",
    "content": "; PROLOGUE(mpn_copyd)\n\n;  mpn_copyd\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_copyi(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi,    rsi,       rdx\n;                    rcx,    rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_copyd\n\tlea     rdx, [rdx+16]\n\tlea     rcx, [rcx+16]\n\tsub     r8, 4\n\tjc      .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [rdx+r8*8]\n\tmovdqu  xmm1, [rdx+r8*8-16]\n\tsub     r8, 4\n\tmovdqu  [rcx+r8*8-16+32], xmm1\n\tmovdqu  [rcx+r8*8+32], xmm0\n\tjnc     .1\n.2:\tcmp     r8, -2\n\tjg      .5\n\tje      .6\n\tjnp     .4\n.3:\tmov     rax, [rdx+r8*8+8]\n\tmov     [rcx+r8*8+8], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [rdx+r8*8]\n\tmov     rax, [rdx+r8*8-8]\n\tmov     [rcx+r8*8-8], rax\n\tmovdqu  [rcx+r8*8], xmm0\n\tret\n\n.6:\tmovdqu  xmm0, [rdx+r8*8]\n\tmovdqu  [rcx+r8*8], xmm0\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/copyi.asm",
    "content": "; PROLOGUE(mpn_copyi)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_copyi(mp_ptr, mp_ptr, mp_size_t)\n;                    rdi     rsi        rdx\n;                    rcx     rdx         r8\n\n%define SMALL_LOOP  1\n\n%include \"yasm_mac.inc\"\n\n    CPU  Core2\n    BITS 64\n\n\tLEAF_PROC mpn_copyi\n    cmp     r8, 0\n\tjz      .9\n%if SMALL_LOOP <> 0\n\tcmp     r8, 10\n\tjge     .2\n\t\n\txalign  16\n.1:\tmov     rax, [rdx]\n\tmov     [rcx], rax\n\tlea     rdx, [rdx+8]\n\tlea     rcx, [rcx+8]\n\tsub     r8, 1\n\tjnz     .1\n\tret\n%endif\n\n.2:\tmov     rax, rcx\n\tsub     rax, rdx\n\ttest    rax, 0xF\n\tjz      .17\n\ttest    rcx, 0xF\n\tjz      .10\n\tmov     r9, 5\n\tsub     r9, r8\n\tlea     rdx, [rdx+r8*8-40]\n\tlea     rcx, [rcx+r8*8-40]\n\tmovapd  xmm1, [rdx+r9*8]\n\tmovq    [rcx+r9*8], xmm1\n\tadd     rcx, 8\n%if SMALL_LOOP = 0\n\tcmp     r8, 1\n\tjz      .9\n%endif\n\tcmp     r9, 0\n\tjge     .4\n\n\txalign  16\n.3:\tmovapd  xmm0, [rdx+r9*8+16]\n\tadd     r9, 4\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8-32], xmm1\n\tmovapd  xmm1, [rdx+r9*8+0]\n\tshufpd  xmm0, xmm1, 1\n\tmovapd  [rcx+r9*8-16], xmm0\n\tjnc     .3\n.4:\tcmp     r9, 2\n\tja      .8\n\tjz      .7\n\tjp      .6\n\n\txalign  16\n.5:\tmovapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tmovapd  xmm1, [rdx+r9*8+32]\n\tshufpd  xmm0, xmm1, 1\n\tmovapd  [rcx+r9*8+16], xmm0\n\tret\n\n\txalign  16\n.6:\tmovapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tmovhpd  [rcx+r9*8+16], xmm0\n\tret\n\n\txalign  16\n.7:\tmovapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tret\n\n\txalign  16\n.8:\tmovhpd  [rcx+r9*8], xmm1\n.9:\tret\n\n.10:mov     r9, 4\n\tsub     r9, r8\n\tlea     rdx, [rdx+r8*8-32]\n\tlea     rcx, [rcx+r8*8-32]\n\tmovapd  xmm1, [rdx+r9*8-8]\n\tsub     rdx, 8\n\tcmp     r9, 0\n\tjge     .12\n\n\txalign  16\n.11:movapd  xmm0, [rdx+r9*8+16]\n\tadd     r9, 4\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8-32], xmm1\n\tmovapd  xmm1, [rdx+r9*8+0]\n\tshufpd  xmm0, xmm1, 1\n\tmovapd  [rcx+r9*8-16], xmm0\n\tjnc     .11\n.12:cmp     r9, 2\n\tja      .16\n\tjz      .15\n\tjp      .14\n\n\txalign  16\n.13:movapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tmovapd  xmm1, [rdx+r9*8+32]\n\tshufpd  xmm0, xmm1, 1\n\tmovapd  [rcx+r9*8+16], xmm0\n\tret\n\n\txalign  16\n.14:movapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tmovhpd  [rcx+r9*8+16], xmm0\n\tret\n\t\n\txalign  16\n.15:movapd  xmm0, [rdx+r9*8+16]\n\tshufpd  xmm1, xmm0, 1\n\tmovapd  [rcx+r9*8], xmm1\n\tret\n\n\txalign  16\n.16:movhpd  [rcx+r9*8], xmm1\n\tret\n\n\txalign  16\n.17:mov     r9, 3\n\tsub     r9, r8\n\ttest    rcx, 0xF\n\tlea     rdx, [rdx+r8*8-24]\n\tlea     rcx, [rcx+r8*8-24]\n\tjz      .18\n\tmov     rax, [rdx+r9*8]\n\tmov     [rcx+r9*8], rax\n\tadd     r9, 1\n.18:cmp     r9, 0\n\tjge     .20\n\n\txalign  16\n.19:add     r9, 4\n\tmovapd  xmm0, [rdx+r9*8-32]\n\tmovapd  [rcx+r9*8-32], xmm0\n\tmovapd  xmm1, [rdx+r9*8-16]\n\tmovapd  [rcx+r9*8-16], xmm1\n\tjnc     .19\n.20:cmp     r9, 2\n\tja      .22\n\tje      .24\n\tjp      .23\n\n.21:movapd  xmm0, [rdx+r9*8]\n\tmovapd  [rcx+r9*8], xmm0\n\tmov     rax, [rdx+r9*8+16]\n\tmov     [rcx+r9*8+16], rax\n.22:ret\n\n\txalign  16\n.23:movapd  xmm0, [rdx+r9*8]\n\tmovapd  [rcx+r9*8], xmm0\n\tret\n\n\txalign  16\n.24:mov     rax, [rdx+r9*8]\n\tmov     [rcx+r9*8], rax\n\tret\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/divexact_byff.asm",
    "content": "; PROLOGUE(mpn_divexact_byff)\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divexact_byff(mp_ptr, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi         rdx\n;  rax                           rcx     rdx          r8 \n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_divexact_byff\n\tmov     rax, 3\n\tand     rax, r8\n\tmov     [rsp+24], rax\n\txor     eax, eax\n\tshr     r8, 2\n\tcmp     r8, 0\n\tje      .2\n; want carry clear here\n\txalign  16\n.1:\tsbb     rax, [rdx]\n\tlea     rcx, [rcx+32]\n\tmov     r9, rax\n\tsbb     rax, [rdx+8]\n\tmov     r10, rax\n\tsbb     rax, [rdx+16]\n\tmov     r11, rax\n\tsbb     rax, [rdx+24]\n\tdec     r8\n\tmov     [rcx-32], r9\n\tmov     [rcx-24], r10\n\tmov     [rcx-16], r11\n\tmov     [rcx-8], rax\n\tlea     rdx, [rdx+32]\n\tjnz     .1\n.2:\tmov     r8, [rsp+24]\n; dont want to change the carry\n\tinc     r8\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx]\n\tmov     [rcx], rax\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx+8]\n\tmov     [rcx+8], rax\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx+16]\n\tmov     [rcx+16], rax\n.3:\tsbb     rax, 0\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/divrem_hensel_qr_1_1.asm",
    "content": "; PROLOGUE(mpn_divrem_hensel_qr_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_divrem_hensel_qr_1_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                                    rdi     rsi        rdx        rcx\n;  rax                                    rcx     rdx         r8         r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi\n\n    FRAME_PROC mpn_divrem_hensel_qr_1_1, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n\tmov     rcx, r9\n\tmov     r9, 0\n\tsub     r9, rax\n\t\n\tmov     rdx, rcx\n\t\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\txor     rdx, rdx\n\n\txalign  16\n.1:\tmov     rax, [rsi+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmov     [rdi+r9*8], rax\n\tmul     rcx\n\tadd     r8, 1\n\tinc     r9\n\tjnz     .1\n\tmov     rax, 0\n\tadc     rax, rdx\n\tEND_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/hamdist.asm",
    "content": "; PROLOGUE(mpn_hamdist)\n\n;  AMD64 mpn_hamdist\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmp_limb_t mpn_hamdist(mp_ptr, mp_ptr, mp_size_t)\n;\trax                      rdi,    rsi,       rdx\n;\trax                      rcx,    rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_hamdist\n\tlea     rdx, [rdx+r8*8-8]\n\tlea     r9, [rcx+r8*8-8]\n\tmov     rcx, 1\n\txor     eax, eax\n\tsub     rcx, r8\n\tjnc     .1\n\txalign  16\n.0:\tmov     r10, [r9+rcx*8]\n\txor     r10, [rdx+rcx*8]\n\tpopcnt  r10, r10\n\tmov     r11, [r9+rcx*8+8]\n\txor     r11, [rdx+rcx*8+8]\n\tpopcnt  r11, r11\n\tadd     rax, r10\n\tadd     rax, r11\n\tadd     rcx, 2\n\tjnc     .0\n.1: jne     .2\n\tmov     r10, [r9+rcx*8]\n\txor     r10, [rdx+rcx*8]\n\tpopcnt  r10, r10\n\tadd     rax, r10\n.2:\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/ior_n.asm",
    "content": "; PROLOGUE(mpn_ior_n)\n\n;  mpn_ior_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_ior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rcx        rdx         r8         r9\n;                    rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_ior_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpor     xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tor      rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tor      rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/iorn_n.asm",
    "content": "; PROLOGUE(mpn_iorn_n)\n\n;  mpn_iorn_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_iorn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_iorn_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [rdx+r10*8]\n\tmovdqu  xmm1, [rdx+r10*8+16]\n\tmovdqu  xmm2, [r8+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [r8+r10*8+16-32]\n\tpandn   xmm1, xmm3\n\tpandn   xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [rdx+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [r8+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\tnot     rax\n\tor      rax, r9\n\tpandn   xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [rdx+r10*8]\n\tmovdqu  xmm2, [r8+r10*8]\n\tpandn   xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tnot     rax\n\tor      rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/karaadd.asm",
    "content": "; PROLOGUE(mpn_karaadd)\n;  mpn_karaadd  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karaadd(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n    BITS 64\n    TEXT\n\n;   requires n >= 8  \n        FRAME_PROC mpn_karaadd, 1, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n;rp is rdi  \n;tp is rsi  \n;n is rdx and put it on the stack  \n        shr     rdx, 1\n;n2 is rdx  \n        lea     rcx, [rdx+rdx*1]\n; 2*n2 is rcx  \n; L is rdi  \n; H is rbp  \n; tp is rsi  \n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n; rax rbx are the carrys  \n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n        align   16\nlp:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+rcx*8]\n        adc     r9, [rsi+rcx*8+8]\n        adc     r10, [rsi+rcx*8+16]\n        adc     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        adc     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     lp\n        cmp     rcx, 2\n        jg      case0\n        jz      case1\n        jp      case2\ncase3:  \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi]\n        adc     r9, [rsi+8]\n        adc     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     fin\ncase2:  \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+8]\n        adc     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     fin\ncase1:  \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        adc     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        inc     rdx\n        mov     [rbp+rcx*8], r12\nfin:    mov     rcx, 3\ncase0:  \n        mov     r8, [rsp]\n        bt      r8, 0\n        jnc     notodd\n        xor     r10, r10\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        add     r8, [rsi+rdx*8]\n        adc     r9, [rsi+rdx*8+8]\n        rcl     r10, 1\n        add     [rbp+24], r8\n        adc     [rbp+32], r9\n        adc     [rbp+40], r10\nl7:     adc     qword[rbp+rcx*8+24], 0\n        inc     rcx\n        jc      l7\n        mov     rcx, 3\nnotodd: and     rax, 3\n        popcnt  r8, rax\n        bt      rbx, 2\n        adc     r8, 0\n        adc     [rdi+rdx*8], r8\nl1:     adc     qword[rdi+rdx*8+8], 0\n        inc     rdx\n        jc      l1\n        and     rbx, 7\n        popcnt  r8, rbx\n        add     [rbp+24], r8\nl2:     adc     qword[rbp+rcx*8+8], 0\n        inc     rcx\n        jc      l2\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/karasub.asm",
    "content": "; PROLOGUE(mpn_karasub)\n;  mpn_karasub  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Copyright 2012 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karasub(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n;\n;  Karasuba Multiplication - split x and y into two equal length halves so\n;  that x = xh.B + xl and y = yh.B + yl. Then their product is:\n;\n;  x.y = xh.yh.B^2 + (xh.yl + xl.yh).B + xl.yl\n;      = xh.yh.B^2 + (xh.yh + xl.yl - {xh - xl}.{yh - yl}).B + xl.yl\n;\n; If the length of the elements is m (about n / 2), the output length is 4 * m \n; as illustrated below.  The middle two blocks involve three additions and one \n; subtraction: \n; \n;       -------------------- rp\n;       |                  |-->\n;       |   A:xl.yl[lo]    |   |\n;       |                  |   |      (xh - xl).(yh - yl)\n;       --------------------   |      -------------------- tp\n;  <--  |                  |<--<  <-- |                  |\n; |     |   B:xl.yl[hi]    |   |      |     E:[lo]       |\n; |     |                  |   |      |                  |\n; |     --------------------   |      --------------------\n; >-->  |                  |-->   <-- |                  |\n; |\\___ |   C:xh.yh[lo]    | ____/    |     F:[hi]       |\n; |     |                  |          |                  |\n; |     --------------------          --------------------\n;  <--  |                  |   \n;       |   D:xh.yh[hi]    |\n;       |                  |\n;       --------------------\n;\n; To avoid overwriting B before it is used, we need to do two operations\n; in parallel:\n;\n; (1)   B = B + C + A - E = (B + C) + A - E\n; (2)   C = C + B + D - F = (B + C) + D - F\n;\n; The final carry from (1) has to be propagated into C and D, and the final\n; carry from (2) has to be propagated into D. When the number of input limbs\n; is odd, some extra operations have to be undertaken. \n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n%macro add_one 1\n    inc     %1\n%endmacro\n\n    BITS 64\n    TEXT\n        \n;       requires n >= 8  \n        FRAME_PROC mpn_karasub, 2, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n        mov     [rsp+8], rdi\n\n;       rp is rdi, tp is rsi, L is rdi, H is rbp, tp is rsi\n;       carries/borrows in rax, rbx\n   \n        shr     rdx, 1\n        lea     rcx, [rdx+rdx*1]\n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+rcx*8]\n        sbb     r9, [rsi+rcx*8+8]\n        sbb     r10, [rsi+rcx*8+16]\n        sbb     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        sbb     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .5\n        jz      .4\n        jp      .3\n\n.2:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi]\n        sbb     r9, [rsi+8]\n        sbb     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n\n.3:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+8]\n        sbb     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n\n.4:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        sbb     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        add_one rdx\n        mov     [rbp+rcx*8], r12\n\n;       move low half rbx carry into rax\n.5:     rcr     rax, 3\n        bt      rbx, 2\n        rcl     rax, 3\n        mov     r8, [rsp]\n        mov     rcx, rsi\n        mov     rsi,[rsp+8]\n        lea     r9, [r8+r8]\n        lea     rsi, [rsi+r9*8]\n        lea     r11, [rbp+24]\n        sub     r11, rsi\n        sar     r11, 3\n        bt      r8, 0\n        jnc     .9\n\n;       if odd the do next two  \n        add     r11, 2\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        rcr     rbx, 2\n        adc     r8,0\n        adc     r9, 0\n        rcl     rbx, 1\n        sbb     r8, [rcx+rdx*8]\n        sbb     r9, [rcx+rdx*8+8]\n        rcr     rbx, 2\n        adc     [rbp+24], r8\n        adc     [rbp+32], r9\n        rcl     rbx, 3\n\n; Now add in any accummulated carries and/or borrows\n;\n; NOTE: We can't propagate individual borrows or carries from the second\n; and third quarter blocks into the fourth quater block by simply waiting\n; for carry (or borrow) propagation to end.  This is because a carry into\n; the fourth quarter block when it contains only maximum integers or a \n; borrow when it contains all zero integers will incorrectly propagate\n; beyond the end of the top quarter block.\n\n.9:     lea     rdx, [rdi+rdx*8]\n        sub     rdx, rsi\n        sar     rdx, 3\n\n; carries/borrrow from second to third quarter quarter block\n;   rax{2} is the carry in (B + C)\n;   rax{1} is the carry in (B + C) + A\n;   rax{0} is the borrow in (B + C + A) - E\n\n        mov     rcx, rdx\n        bt      rax, 0\n.10:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .11\n        jc      .10\n\n.11     mov     rcx, rdx\n        bt      rax, 1\n.12:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .13\n        jc      .12\n\n.13     mov     rcx, rdx\n        bt      rax, 2\n.14:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .15\n        jc      .14\n\n; carries/borrrow from third to fourth quarter quarter block\n;   rbx{2} is the carry in (B + C)\n;   rbx{1} is the carry in (B + C) + D\n;   rbx{0} is the borrow in (B + C + D) - F\n\n.15:    mov     rcx, r11\n        bt      rbx, 0\n.16:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .17\n        jc      .16\n\n.17:    mov     rcx, r11\n        bt      rbx, 1\n.18:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .19\n        jc      .18\n\n.19:    mov     rcx, r11\n        bt      rbx, 2\n.20:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .21\n        jc      .20\n.21:\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/lshift.asm",
    "content": "; PROLOGUE(mpn_lshift)\n\n;  Version 1.0.4.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_lshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi      rsi        rdx      rcx\n;  rax                     rcx      rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  SSE4.2\n    BITS 64\n\n    LEAF_PROC mpn_lshift\n    mov     r10, rcx\n    mov     ecx, r9d\n    cmp     r8, 2\n    ja      .3\n    jz      .2\n.1:\tmov     rdx, [rdx]\n    mov     rax, rdx\n    shl     rdx, cl\n    neg     rcx\n    shr     rax, cl\n    mov     [r10], rdx\n    ret\n\n.2:\tmov     r8, [rdx]\n    mov     r9, [rdx+8]\n    mov     r11, r8\n    mov     rax, r9\n    shl     r8, cl\n    shl     r9, cl\n    neg     rcx\n    shr     r11, cl\n    shr     rax, cl\n    or      r9, r11\n    mov     [r10], r8\n    mov     [r10+8], r9\n    ret\n\n.3:\tmov     eax, 64\n    sub     rax, rcx\n    movq    xmm0, rcx\n    movq    xmm1, rax\n    lea     r9, [rdx+r8*8-16]\n    mov     r11, r9\n    and     r9, -16\n    movdqa  xmm3, [r9]\n    movdqa  xmm5, xmm3\n    psrlq   xmm3, xmm1\n    pshufd  xmm3, xmm3, 0x4e\n    movq    rax, xmm3\n    cmp     r11, r9\n    je      .4\n    movq    xmm2, [rdx+r8*8-8]\n    movq    xmm4, xmm2\n    psrlq   xmm2, xmm1\n    movq    rax, xmm2\n    psllq   xmm4, xmm0\n    por     xmm4, xmm3\n    movq    [r10+r8*8-8], xmm4\n    dec     r8\n.4:\tsub     r8, 5\n    jle     .6\n\n    xalign  16\n.5: movdqa  xmm2, [rdx+r8*8+8]\n    movdqa  xmm4, xmm2\n    psllq   xmm5, xmm0\n    psrlq   xmm2, xmm1\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    pshufd  xmm2, xmm2, 0x4e\n    movhpd  [r10+r8*8+32], xmm5\n    movdqa  xmm3, [rdx+r8*8-8]\n    movdqa  xmm5, xmm3\n    psrlq   xmm3, xmm1\n    movhlps xmm2, xmm3\n    psllq   xmm4, xmm0\n    pshufd  xmm3, xmm3, 0x4e\n    por     xmm4, xmm2\n    movq    [r10+r8*8+8], xmm4\n    movhpd  [r10+r8*8+16], xmm4\n    sub     r8, 4\n    jg      .5\n.6: cmp     r8, -1\n    je      .9\n    jg      .8\n    jp      .10\n.7:\tpxor    xmm2, xmm2\n    psllq   xmm5, xmm0\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    movhpd  [r10+r8*8+32], xmm5\n    ret\n\n    xalign  16\n.8:\tmovdqa  xmm2, [rdx+r8*8+8]\n    movdqa  xmm4, xmm2\n    psllq   xmm5, xmm0\n    psrlq   xmm2, xmm1\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    pshufd  xmm2, xmm2, 0x4e\n    movhpd  [r10+r8*8+32], xmm5\n    movq    xmm3, [rdx+r8*8]\n    pshufd  xmm3, xmm3, 0x4e\n    movdqa  xmm5, xmm3\n    psrlq   xmm3, xmm1\n    movhlps xmm2, xmm3\n    psllq   xmm4, xmm0\n    por     xmm4, xmm2\n    movq    [r10+r8*8+8], xmm4\n    movhpd  [r10+r8*8+16], xmm4\n    psllq   xmm5, xmm0\n    movhpd  [r10+r8*8], xmm5\n    ret\n\n    xalign  16\n.9:\tmovdqa  xmm2, [rdx+r8*8+8]\n    movdqa  xmm4, xmm2\n    psllq   xmm5, xmm0\n    psrlq   xmm2, xmm1\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    pshufd  xmm2, xmm2, 0x4e\n    movhpd  [r10+r8*8+32], xmm5\n    pxor    xmm3, xmm3\n    movhlps xmm2, xmm3\n    psllq   xmm4, xmm0\n    por     xmm4, xmm2\n    movq    [r10+r8*8+8], xmm4\n    movhpd  [r10+r8*8+16], xmm4\n    ret\n\n    xalign  16\n.10:movq    xmm2, [rdx+r8*8+16]\n    pshufd  xmm2, xmm2, 0x4e\n    movdqa  xmm4, xmm2\n    psllq   xmm5, xmm0\n    psrlq   xmm2, xmm1\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    movhpd  [r10+r8*8+32], xmm5\n    psllq   xmm4, xmm0\n    movhpd  [r10+r8*8+16], xmm4\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/lshift1.asm",
    "content": "; PROLOGUE(mpn_lshift1)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_lshift1(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_lshift1\n    mov     rax, r8\n\tand     r8, 7\n\tinc     r8\n\tmov     [rsp+0x18], r8\n\tshr     rax, 3\n\tcmp     rax, 0\n\tjz      .2\n\t\n\txalign  16\n.1:\tmov     r8, [rdx]\n\tmov     r9, [rdx+8]\n\tmov     r10, [rdx+16]\n\tmov     r11, [rdx+24]\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tmov     [rcx], r8\n\tmov     [rcx+8], r9\n\tmov     [rcx+16], r10\n\tmov     [rcx+24], r11\n\tmov     r8, [rdx+32]\n\tmov     r9, [rdx+40]\n\tmov     r10, [rdx+48]\n\tmov     r11, [rdx+56]\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tmov     [rcx+32], r8\n\tmov     [rcx+40], r9\n\tmov     [rcx+48], r10\n\tmov     [rcx+56], r11\n\tlea     rcx, [rcx+64]\n\tdec     rax\n\tlea     rdx, [rdx+64]\n\tjnz     .1\n.2:\tmov     rax, [rsp+0x18]\n\tdec     rax\n\tjz      .3\n;\tCould still have cache-bank conflicts in this tail part\n\tmov     r8, [rdx]\n\tadc     r8, r8\n\tmov     [rcx], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+8]\n\tadc     r8, r8\n\tmov     [rcx+8], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+16]\n\tadc     r8, r8\n\tmov     [rcx+16], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+24]\n\tadc     r8, r8\n\tmov     [rcx+24], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+32]\n\tadc     r8, r8\n\tmov     [rcx+32], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+40]\n\tadc     r8, r8\n\tmov     [rcx+40], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+48]\n\tadc     r8, r8\n\tmov     [rcx+48], r8\n.3:\tsbb     rax, rax\n\tneg     rax\n\tret\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/lshiftc.asm",
    "content": "; PROLOGUE(mpn_lshiftc)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void lshiftc(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;                  rdi     rsi        rdx      rcx\n;                  rcx     rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_lshiftc\n\tmov     r9d, r9d\n\tmovq    mm0, r9\n\tmov     rax, 64\n\tsub     rax, r9\n\tpcmpeqb mm6, mm6\n\tmovq    mm1, rax\n\tlea     rdx, [rdx+8]\n\tlea     rcx, [rcx+8]\n\tsub     r8, 5\n\tmovq    mm5, [rdx+r8*8+24]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tmovq    rax, mm5\n\tpsllq   mm3, mm0\n\tjc      .2\n\t\n\txalign  16\n.1: movq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    mm2, [rdx+r8*8]\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tmovq    mm4, mm2\n\tmovq    mm5, [rdx+r8*8-8]\n\tsub     r8, 4\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+40], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+32], mm4\n\tpsllq   mm3, mm0\n\tjnc     .1\n.2: cmp     r8, -2\n\tjz      .4\n\tjp      .5\n\tjs      .6\n.3:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    mm2, [rdx+r8*8]\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+8], mm3\n\tpsllq   mm4, mm0\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8], mm4\n\temms\n\tret\n\n\txalign  16\n.4:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+8], mm3\n\temms\n\tret\n\n\txalign  16\n.5:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\temms\n\tret\n\n\txalign  16\n.6:\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\temms\n\tret\n\t\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/mod_1_1.asm",
    "content": "; PROLOGUE(mpn_mod_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_1(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r13\n\n    FRAME_PROC mpn_mod_1_1, 0, reg_save_list\n    mov     rsi, rdx\n    mov     rdx, r8\n    \n\tmov     r13, [rsi+rdx*8-8]\n\tmov     rax, [rsi+rdx*8-16]\n\tmov     r8, [r9]\n\tmov     r9, [r9+8]\n\tmov     rdi, rdx\n\tsub     rdi, 2\n\t\n\txalign  16\n.1:\tmov     r10, [rsi+rdi*8-8]\n\tmul     r8\n\tadd     r10, rax\n\tmov     r11, 0\n\tadc     r11, rdx\n\tmov     rax, r13\n\tmul     r9\n\tadd     rax, r10\n\tmov     r13, r11\n\tadc     r13, rdx\n\tdec     rdi\n\tjnz     .1\n\n\tmov     [rcx], rax\n\tmov     rax, r8\n\tmul     r13\n\tadd     [rcx], rax\n\tadc     rdx, 0\n\tmov     [rcx+8], rdx\n\tEND_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/mod_1_2.asm",
    "content": "; PROLOGUE(mpn_mod_1_2)\n\n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n        FRAME_PROC mpn_mod_1_2, 0, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n\n        mov     r14, [rsi+rdx*8-8]\n        mov     r13, [rsi+rdx*8-16]\n        mov     r8, [r9]\n        mov     r10, [r9+16]\n        mov     r9, [r9+8]\n        mov     rcx, rdx\n        mov     rax, [rsi+rdx*8-24]\n        mul     r8\n        mov     r11, [rsi+rcx*8-32]\n        xor     r12, r12\n        sub     rcx, 6\n        jc      .2\n    \n        align   16\n.1:     add     r11, rax\n        adc     r12, rdx\n        mov     rax, r9\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r10\n        mul     r14\n        add     r13, rax\n        mov     rax, [rsi+rcx*8+8]\n        mov     r14, r12\n        adc     r14, rdx\n        mul     r8\n        mov     r12d, 0\n        mov     r11, [rsi+rcx*8+0]\n        sub     rcx, 2\n        jnc     .1\n.2:     add     r11, rax\n        adc     r12, rdx\n        mov     rax, r9\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r10\n        mul     r14\n        add     r13, rax\n        mov     r14, r12\n        adc     r14, rdx\n        cmp     rcx, -2\n        je      .4\n.3:     mov     r11, [rsi+rcx*8+8]\n        xor     r12, r12\n        mov     rax, r8\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r9\n        mul     r14\n        add     r13, rax\n        mov     r14, r12\n        adc     r14, rdx\n.4:     mov     rax, r8\n        mul     r14\n        add     r13, rax\n        adc     rdx, 0\n        mov     [rdi], r13\n        mov     [rdi+8], rdx\n    \tEND_PROC reg_save_list\n\t\n\t    end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/mod_1_3.asm",
    "content": "; PROLOGUE(mpn_mod_1_3)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_3(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n;\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\n;\twhere (rcx,4)  contains B^i % divisor\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14, r15\n\n    FRAME_PROC mpn_mod_1_3, 0, reg_save_list\n    mov     rsi, rdx\n    mov     rdi, r8\n\tmov     r15, [rsi+rdi*8-8]\n\tmov     r14, [rsi+rdi*8-16]\n\tmov     rax, [rsi+rdi*8-32]\n\tmov     r12, [rsi+rdi*8-40]\n\tmov     r8, [r9]\n\tmov     r10, [r9+16]\n\tmov     r11, [r9+24]\n\tmov     r9, [r9+8]\n\tsub     rdi, 8\n\tjc      .2\n\t\n; // r15 r14 -8() -16()=rax -24()=r12\n\txalign  16\n.1:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+0]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+8]\n\tadc     r15, rdx\n\tsub     rdi, 3\n\tjnc     .1\n\n; // we have loaded up the next two limbs\n; // but because they are out of order we can have to do 3 limbs min\n.2:\tcmp     rdi, -2\n\tjl      .5\n\tje      .4\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n.3:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+8]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+16]\n\tadc     r15, rdx\n\t; // r15 r14 rax r12\n\tmov     r13, 0\n\tmul     r8\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n\txalign  16\n.4:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+16]\n\tmov     r15, r13\n\tadc     r15, rdx\n\t; // r15 r14 r12\n\tmov     r13, 0\n\tmov     rax, r8\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\t\n\t; // one more is 3 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12 \n\txalign  16\n.5:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r11\n\tmul     r15\n\tadd     r12, rax\n\tmov     r15, r13\n\tadc     r15, rdx\n\tmov     rax, r8\n\tmul     r15\n.6:\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rcx], r12\n\tmov     [rcx+8], rdx\n    END_PROC reg_save_list\n    \n    end\n    \n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/mul_1.asm",
    "content": "; PROLOGUE(mpn_mul_1)\n\n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2011 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_mul_1c(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx         r8\n;  rax                     rcx     rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12\n\n        LEAF_PROC mpn_mul_1\n        mov     rax, [rdx]\n        cmp     r8, 1\n        jne     .1\n        mul     r9\n        mov     [rcx], rax\n        mov     rax, rdx\n        ret\n\n.1:     FRAME_PROC ?mpn_sandybridge_mul, 0, reg_save_list\n        mov     r11, 5\n        lea     rsi, [rdx+r8*8-40]\n        lea     rdi, [rcx+r8*8-40]\n        mov     rcx, r9\n        sub     r11, r8\n        mul     rcx\n        db      0x26\n        mov     r8, rax\n        db      0x26\n        mov     rax, [rsi+r11*8+8]\n        db      0x26\n        mov     r9, rdx\n        db      0x26\n        cmp     r11, 0\n        db      0x26\n        mov     [rsp-8], r12\n        db      0x26\n        jge     .2\n.1:     xor     r10, r10\n        mul     rcx\n        mov     [rdi+r11*8], r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+r11*8+16]\n        mul     rcx\n        mov     [rdi+r11*8+8], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+r11*8+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        mov     [rdi+r11*8+16], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+r11*8+32]\n        mul     rcx\n        mov     [rdi+r11*8+24], r12\n        add     r8, rax\n        adc     r9, rdx\n        add     r11, 4\n        mov     rax, [rsi+r11*8+8]\n        jnc     .1\n.2:     xor     r10, r10\n        mul     rcx\n        mov     [rdi+r11*8], r8\n        add     r9, rax\n        adc     r10, rdx\n        cmp     r11, 2\n        ja      .5\n        jz      .4\n        jp      .3\n        mov     rax, [rsi+16]\n        mul     rcx\n        mov     [rdi+8], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        mov     [rdi+16], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+32]\n        mul     rcx\n        mov     [rdi+24], r12\n        add     r8, rax\n        adc     r9, rdx\n        mov     [rdi+32], r8\n        mov     rax, r9\n        EXIT_PROC   reg_save_list\n\n.3:     mov     rax, [rsi+24]\n        mul     rcx\n        mov     [rdi+16], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+32]\n        xor     r8, r8\n        mul     rcx\n        mov     [rdi+24], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     [rdi+32], r12\n        mov     rax, r8\n        EXIT_PROC   reg_save_list\n\n        align   16\n.4:     mov     rax, [rsi+32]\n        mul     rcx\n        mov     [rdi+24], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     [rdi+32], r10\n        mov     rax, r12\n        EXIT_PROC   reg_save_list\n\n.5:     mov     [rdi+32], r9\n        mov     rax, r10\n        END_PROC   reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/mul_2.asm",
    "content": "; PROLOGUE(mpn_mul_2)\n;  X86_64 mpn_mul_2\n;\n;  Copyright 2010 Jason Moxham\n;\n;  Windows Conversion Copyright 2010 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx\n;  rax                     rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi, rbx\n\n        CPU  nehalem\n        BITS 64\n\n    \tFRAME_PROC mpn_mul_2, 0, reg_save_list\n        mov     rbx, 3\n        lea     rdi, [rcx+r8*8-24]\n        lea     rsi, [rdx+r8*8-24]\n        sub     rbx, r8\n        mov     r8, [r9+8]\n        mov     rcx, [r9]\n\n        mov     r11, 0\n        mov     r9, 0\n        mov     rax, [rsi+rbx*8]\n        mov     r10, 0\n        mul     rcx\n        add     r11, rax\n        mov     rax, [rsi+rbx*8]\n        mov     [rdi+rbx*8], r11\n        adc     r9, rdx\n        cmp     rbx, 0\n        jge     .2\n\n        align   16\n.1:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+rbx*8+8]\n        mov     r11, 0\n        mul     rcx\n        add     r9, rax\n        adc     r10, rdx\n        adc     r11, 0\n        mov     rax, [rsi+rbx*8+8]\n        mul     r8\n        add     r10, rax\n        mov     rax, [rsi+rbx*8+16]\n        adc     r11, rdx\n        mul     rcx\n        add     r10, rax\n        mov     [rdi+rbx*8+8], r9\n        adc     r11, rdx\n        mov     r9, 0\n        mov     rax, [rsi+rbx*8+16]\n        adc     r9, 0\n        mul     r8\n        add     r11, rax\n        mov     [rdi+rbx*8+16], r10\n        mov     rax, [rsi+rbx*8+24]\n        mov     r10, 0\n        adc     r9, rdx\n        mul     rcx\n        add     r11, rax\n        mov     rax, [rsi+rbx*8+24]\n        mov     [rdi+rbx*8+24], r11\n        adc     r9, rdx\n        adc     r10, 0\n        add     rbx, 3\n        jnc     .1\n.2:     cmp     rbx, 1\n        ja      .5\n        je      .4\n.3:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+rbx*8+8]\n        mov     r11, 0\n        mul     rcx\n        add     r9, rax\n        adc     r10, rdx\n        adc     r11, 0\n        mov     rax, [rsi+rbx*8+8]\n        mul     r8\n        add     r10, rax\n        mov     rax, [rsi+rbx*8+16]\n        adc     r11, rdx\n        mul     rcx\n        add     r10, rax\n        mov     [rdi+rbx*8+8], r9\n        adc     r11, rdx\n        mov     r9, 0\n        mov     rax, [rsi+rbx*8+16]\n        adc     r9, 0\n        mul     r8\n        add     r11, rax\n        mov     [rdi+rbx*8+16], r10\n        adc     r9, rdx\n        mov     [rdi+rbx*8+24], r11\n        mov     rax, r9\n        EXIT_PROC reg_save_list\n\n.4:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+rbx*8+8]\n        mov     r11, 0\n        mul     rcx\n        add     r9, rax\n        adc     r10, rdx\n        adc     r11, 0\n        mov     rax, [rsi+rbx*8+8]\n        mul     r8\n        add     r10, rax\n        adc     r11, rdx\n        mov     [rdi+rbx*8+8], r9\n        mov     [rdi+rbx*8+16], r10\n        mov     rax, r11\n        EXIT_PROC reg_save_list\n\n.5:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     [rdi+rbx*8+8], r9\n        mov     rax, r10\n        END_PROC reg_save_list\n        \n        end    \n        \n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/mul_basecase.asm",
    "content": "; PROLOGUE(mpn_mul_basecase)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40] \n\n%include \"yasm_mac.inc\"\n\n%macro addmul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi+rbx*8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi+rbx*8+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+rbx*8+24], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro addmul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro addmul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    add     [rdi+8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro addmul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro addmul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-8], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    add     [rdi], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi+8], r10\n    adc     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro addmul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro addmul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    add     [rdi-16], r12\n    adc     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    add     [rdi-8], r9\n    adc     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    add     [rdi], r10\n    mov     r9, 0\n    adc     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    add     [rdi+8], r11\n    adc     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2lp 0\n    xalign  16\n%%1:mov     rax, [rsi+rbx*8]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+rbx*8+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi+rbx*8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+rbx*8+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+rbx*8+16], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+rbx*8+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+24], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+rbx*8+32]\n    mul     rcx\n    add     r12, rax\n    adc     r9, rdx\n    adc     r10, 0\n    add     rbx, 4\n    jnc     %%1\n%endmacro\n\n%macro mul2pro0 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r10, 0\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi0 0\n    mov     rbx, r14\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+24], r12\n    add     r9, rax\n    adc     r10, rdx\n    add     r15, 2\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], r9\n    lea     rdi, [rdi+16]\n    mov     [rdi+24], r10\n%endmacro\n\n%macro mul2pro1 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r10, 0\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi1 0\n    mov     rax, [rsi+16]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+24]\n    mov     r11, 0\n    adc     r10, rdx\n    mul     rcx\n    mov     [rdi], r12\n    add     r9, rax\n    adc     r10, rdx\n    adc     r11, 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    adc     r11, rdx\n    add     r15, 2\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+24], r11\n    mov     [rdi+16], r10\n%endmacro\n\n%macro mul2pro2 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r10, 0\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n%endmacro\n\n%macro mul2epi2 0\n    mov     rax, [rsi+8]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+16]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-8], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+24]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+24]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r11\n    mov     [rdi+24], r12\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul2pro3 0\n    mov     rcx, [r13+r15*8]\n    mul     rcx\n    mov     r12, rax\n    mov     r9, rdx\n    mov     r8, [r13+r15*8+8]\n    mov     r10, 0\n%endmacro\n\n%macro mul2epi3 0\n    mov     rax, [rsi]\n    lea     rdi, [rdi+16]\n    mul     r8\n    add     r9, rax\n    mov     rax, [rsi+8]\n    adc     r10, rdx\n    mov     r11, 0\n    mul     rcx\n    mov     [rdi-16], r12\n    add     r9, rax\n    mov     r12, 0\n    adc     r10, rdx\n    mov     rax, [rsi+8]\n    adc     r11, 0\n    mul     r8\n    mov     [rdi-8], r9\n    add     r10, rax\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mul     rcx\n    add     r10, rax\n    mov     rax, [rsi+16]\n    adc     r11, rdx\n    adc     r12, 0\n    mul     r8\n    mov     [rdi], r10\n    mov     r9, 0\n    add     r11, rax\n    mov     r10, 0\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    mov     r15, r15\n    mul     rcx\n    add     r11, rax\n    mov     rax, [rsi+24]\n    adc     r12, rdx\n    adc     r9, 0\n    mul     r8\n    mov     [rdi+8], r11\n    add     r12, rax\n    adc     r9, rdx\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+16], r12\n    mov     [rdi+24], r9\n    add     r15, 2\n    mov     rbx, r14\n%endmacro\n\n%macro mul1lp 0\n    xalign  16\n%%1:\n    mov     r10, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+rbx*8+8]\n    mul     r8\n    mov     [rdi+rbx*8], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+rbx*8+16]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r8\n    mov     [rdi+rbx*8+8], r10\n    db      0x26\n    add     r11, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+rbx*8+24]\n    mul     r8\n    mov     [rdi+rbx*8+16], r11\n    db      0x26\n    add     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     rbx, 4\n    mov     rax, [rsi+rbx*8]\n    jnc     %%1\n%endmacro\n\n; rbx is 0\n%macro mulnext0 0\n    mov     rax, [rsi+8]\n    mul     r8\n    mov     [rdi], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     rax, [rsi+16]\n    mov     r12d, 0\n    mul     r8\n    mov     [rdi+8], r10\n    add     r11, rax\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r11\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 1\n%macro mulnext1 0\n    mov     rax, [rsi+16]\n    mul     r8\n    mov     [rdi+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+24], r12\n    mov     [rdi+32], rdx\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n; rbx is 2\n%macro mulnext2 0\n    mov     rax, [rsi+24]\n    mul     r8\n    mov     [rdi+16], r9\n    add     r10, rax\n    mov     r11d, 0\n    adc     r11, rdx\n    mov     [rdi+24], r10\n    mov     [rdi+32], r11\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n; rbx is 3\n%macro mulnext3 0\n    mov     [rdi+24], r9\n    mov     [rdi+32], r10\n    inc     r15\n    lea     rdi, [rdi+8]\n    mov     rax, [rsi+r14*8]\n    mov     rbx, r14\n%endmacro\n\n%macro mpn_addmul_2_int 1\n    jz      %%2\n    xalign  16\n%%1:addmul2pro%1\n    addmul2lp\n    addmul2epi%1\n    jnz     %%1\n%%2:\n%endmacro\n\n%macro oldmulnext0 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    mov     [rdi+r11*8+24], rbx\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+32], r12\n    mov     rax, [rsi+r14*8]\n    mov     [rdi+r11*8+40], rdx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext1 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     r12d, 0\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+24]\n    mul     r13\n    mov     [rdi+r11*8+16], r10\n    add     r12, rax\n    adc     rdx, 0\n    mov     [rdi+r11*8+24], r12\n    mov     [rdi+r11*8+32], rdx\n    inc     r8\n    lea     rdi, [rdi+8]\n    mov     r11, r14\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldmulnext2 0\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     [rdi+r11*8+8], r9\n    add     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     [rdi+r11*8+16], r10\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldmulnext3 0\n    mov     [rdi+r11*8+8], r9\n    mov     [rdi+r11*8+16], r10\n    inc     r8\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro0 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    db      0x26\n    mov     r9, rdx\n    lea     rdi, [rdi+8]\n%endmacro\n\n%macro oldaddmulnext0 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+16]\n    mul     r13\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+24]\n    mov     r12d, 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+32]\n    mul     r13\n    add     [rdi+24], rbx\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+32], r12\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldaddmulpro1 0\n    mov     r13, [rcx+r8*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext1 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+24]\n    mul     r13\n    lea     rdi, [rdi+8]\n    add     [rdi+8], r9\n    adc     r10, rax\n    mov     r12d, 0\n    mov     rax, [rsi+32]\n    adc     r12, rdx\n    mov     r11, r14\n    mul     r13\n    add     [rdi+16], r10\n    adc     r12, rax\n    adc     rdx, 0\n    add     [rdi+24], r12\n    adc     rdx, 0\n    mov     [rdi+32], rdx\n    inc     r8\n    mov     rax, [rsi+r14*8]\n%endmacro\n\n%macro oldaddmulpro2 0\n    mov     r13, [rcx+r8*8]\n    lea     rdi, [rdi+8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n%endmacro\n\n%macro oldaddmulnext2 0\n    mov     r10d, 0\n    mul     r13\n    add     [rdi+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    mov     ebx, 0\n    add     [rdi+r11*8+8], r9\n    adc     r10, rax\n    adc     rbx, rdx\n    mov     rax, [rsi+r14*8]\n    add     [rdi+r11*8+16], r10\n    adc     rbx, 0\n    mov     [rdi+r11*8+24], rbx\n    inc     r8\n    mov     r11, r14\n%endmacro\n\n%macro oldaddmulpro3 0\n    mov     r13, [rcx+r8*8]\n    db      0x26\n    mul     r13\n    db      0x26\n    mov     r12, rax\n    db      0x26\n    lea     rdi, [rdi+8]\n    db      0x26\n    mov     r9, rdx\n    mov     rax, [rsi+r14*8+8]\n%endmacro\n\n%macro oldaddmulnext3 0\n    mov     r11, r14\n    mul     r13\n    add     [rdi+24], r12\n    adc     r9, rax\n    adc     rdx, 0\n    add     [rdi+32], r9\n    mov     rax, [rsi+r14*8]\n    adc     rdx, 0\n    inc     r8\n    mov     [rdi+40], rdx\n%endmacro\n\n%macro oldmpn_muladdmul_1_int 1\n    oldmulnext%1\n    jz      %%2\n    xalign  16\n%%1:oldaddmulpro%1\n    oldaddmulnext%1\n    jnz     %%1\n%%2:\n%endmacro\n\n    CPU  Core2\n    BITS 64\n\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40] \n\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14\n\n    LEAF_PROC mpn_mul_basecase\n    ; the current mul does not handle case one\n    cmp     r8d, 4\n    jg      fiveormore\n    cmp     r8d, 1\n    je      one\n\n    WIN64_GCC_PROC mpn_sandybridge_mbc1, 5, frame\n\n    mov     r14, 5\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-40]\n    lea     rcx, [rcx+r8*8]\n    neg     r8\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rax, [rsi+r14*8]\n    mov     r13, [rcx+r8*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    mov     r10d, 0\n    mul     r13\n    mov     [rdi+r11*8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     r11, 2\n    ja      .4\n    jz      .3\n    jp      .2\n.1:\toldmpn_muladdmul_1_int 0\n    jmp     .5\n.2:\toldmpn_muladdmul_1_int 1\n    jmp     .5\n.3:\toldmpn_muladdmul_1_int 2\n    jmp     .5\n.4:\toldmpn_muladdmul_1_int 3\n.5:\tWIN64_GCC_END frame\n\n; rdx >= 5  as we dont have an inner jump\n; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8)\n\n%undef  reg_save_list\n%define reg_save_list   rbx, rsi, rdi, r12, r13, r14, r15\n\n    xalign  16\nfiveormore:\n    WIN64_GCC_PROC mpn_sandybridge_mbc2, 5, frame\n    movsxd  rdx, edx\n    movsxd  r8, r8d\n\n    mov     r14, 4\n    sub     r14, rdx\n    lea     rdi, [rdi+rdx*8-32]\n    lea     rsi, [rsi+rdx*8-32]\n    mov     r13, rcx\n    mov     r15, r8\n    lea     r13, [r13+r15*8]\n    neg     r15\n    mov     rbx, r14\n    mov     rax, [rsi+r14*8]\n    bt      r15, 0\n    jnc     .12\n.6:\tinc     rbx\n    mov     r8, [r13+r15*8]\n    mul     r8\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     rbx, 0\n    jge     .7\n    mul1lp\n.7:\tmov     r10d, 0\n    mul     r8\n    mov     [rdi+rbx*8-8], r12\n    add     r9, rax\n    adc     r10, rdx\n    cmp     rbx, 2\n    ja      .11\n    jz      .10\n    jp      .9\n.8:\tmulnext0\n    jmp     .20\n.9:\tmulnext1\n    jmp     .14\n.10:mulnext2\n    jmp     .16\n.11:mulnext3\n    jmp     .18\n     ; as all the mul2pro? are the same\n.12:mul2pro0\n    mul2lp\n    cmp     rbx, 2\n    ja      .19\n    jz      .17\n    jp      .15\n.13:mul2epi3\n.14:mpn_addmul_2_int 3\n    WIN64_GCC_EXIT frame\n\n.15:mul2epi2\n.16:mpn_addmul_2_int 2\n    WIN64_GCC_EXIT frame\n\n.17:mul2epi1\n.18:mpn_addmul_2_int 1\n    WIN64_GCC_EXIT frame\n\n.19:mul2epi0\n.20:mpn_addmul_2_int 0\n\n    xalign  16\n.21:WIN64_GCC_END frame\n\n    xalign  16\none:mov     rax, [rdx]\n    mul     qword [r9]\n    mov     [rcx], rax\n    mov     [rcx+8], rdx\n    ret\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/mullow_n_basecase.asm",
    "content": ";  AMD64 mpn_mullow_n_basecase\n;\n;  Copyright 2015 Free Software Foundation, Inc.\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;\n;void mpn_mullow_n_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n;                                  rdi           rsi           rdx          rcx\n;                                  rcx           rdx            r8           r9\n\n%include 'yasm_mac.inc'\n\n\ttext\n\talign\t32\n\tLEAF_PROC mpn_mullow_n_basecase\n\tcmp     r9, 3\n\tje      asm_sym(?mpn_mullow1)\n\tja      asm_sym(?mpn_mullow2)\n    mov     r10, rdx\n\tmov     rax, [rdx]\n\n\talign\t16\n    cmp     r9, 2\n\tjae     .1\n    imul    rax, [r8]\n\tmov     [rcx], rax\n\tret\n\n.1: mov     r9, [r8]\n\tmul     r9\n\tmov     [rcx], rax\n\tmov     rax, [r10+8]\n\timul    rax, r9\n\tadd     rdx, rax\n\tmov     r9, [r8+8]\n\tmov     r11, [r10]\n\timul    r11, r9\n\tadd     rdx, r11\n\tmov     [rcx+8], rdx\n\tret\n\n%define reg_save_list rsi, rdi\n\n\talign\t32\n\tFRAME_PROC ?mpn_mullow1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rcx, r9\n\tmov     rax, [rsi]\n    mov     r9, [r8]\n\tmul     r9\n\tmov     [rdi], rax\n\tmov     r10, rdx\n\tmov     rax, [rsi+8]\n\tmul     r9\n\timul    r9, [rsi+16]\n\tadd     r10, rax\n\tadc     r9, rdx\n\tmov     r11, [r8+8]\n\tmov     rax, [rsi]\n\tmul     r11\n\tadd     r10, rax\n\tadc     r9, rdx\n\timul    r11, [rsi+8]\n\tadd     r9, r11\n\tmov     [rdi+8], r10\n\tmov     r10, [r8+16]\n\tmov     rax, [rsi]\n\timul    r10, rax\n\tadd     r9, r10\n\tmov     [rdi+16], r9\n    END_PROC reg_save_list\n\n%define reg_save_list rsi, rdi, rbx, rbp, r12, r13, r14, r15\n\n\talign\t32\n\tFRAME_PROC ?mpn_mullow2, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rcx, r9\n\n\tmov     rax, [rsi]\n\tmov     r9, [r8]\n\tlea     rdi, [rdi+rcx*8]\n\tlea     rsi, [rsi+rcx*8]\n\tneg     rcx\n\tmul     r9\n\tmov     rbx, [r8+8]\n\ttest    cl, 1\n\tjnz     .2\n.1:\tlea     rbp, [rcx]\n\txor     r10, r10\n\tmov     r12, rax\n\tmov     r11, rdx\n\tjmp     .5\n.2:\tlea     rbp, [rcx+1]\n\txor     r11, r11\n\txor     r12, r12\n\tmov     r10, rax\n\tmov     r13, rdx\n\tjmp     .4\n\talign\t32\n.3:\tmul     r9\n\tadd     r10, rax\n\tmov     r13, rdx\n\tadc     r13, 0\n.4:\tmov     rax, [rsi+rbp*8-8]\n\tmul     rbx\n\tadd     r10, r11\n\tadc     r13, 0\n\tadd     r12, rax\n\tmov     [rdi+rbp*8-8], r10\n\tmov     r10, rdx\n\tadc     r10, 0\n\tmov     rax, [rsi+rbp*8]\n\tmul     r9\n\tadd     r12, rax\n\tmov     r11, rdx\n\tadc     r11, 0\n\tadd     r12, r13\n.5:\tmov     rax, [rsi+rbp*8]\n\tadc     r11, 0\n\tmul     rbx\n\tmov     [rdi+rbp*8], r12\n\tadd     r10, rax\n\tmov     r12, rdx\n\tmov     rax, [rsi+rbp*8+8]\n\tadc     r12, 0\n\tadd     rbp, 2\n\tjnc     .3\n.6:\timul    rax, r9\n\tadd     rax, r10\n\tadd     rax, r11\n\tmov     [rdi-8], rax\n\tadd     rcx, 2\n\tlea     r8, [r8+16]\n\tlea     rsi, [rsi-16]\n\tcmp     rcx, -2\n\tjge     .19\n.7:\tmov     r9, [r8]\n\tmov     rbx, [r8+8]\n\tmov     rax, [rsi+rcx*8]\n\tmul     r9\n\ttest    cl, 1\n\tjnz     .11\n.8:\tmov     r15, [rdi+rcx*8]\n\txor     r12, r12\n\txor     r11, r11\n\ttest    cl, 2\n\tjnz     .10\n.9:\tlea     rbp, [rcx+1]\n\tjmp     .16\n.10:lea     rbp, [rcx+3]\n\tmov     r13, rdx\n\tadd     r15, rax\n\tmov     rax, [rsi+rcx*8]\n\tmov     r14, [rdi+rcx*8+8]\n\tadc     r13, 0\n\tjmp     .14\n.11:mov     r14, [rdi+rcx*8]\n\txor     r10, r10\n\tmov     r11, rdx\n\ttest    cl, 2\n\tjz      .13\n.12:lea     rbp, [rcx+2]\n\tadd     r14, rax\n\tadc     r11, 0\n\tmov     rax, [rsi+rcx*8]\n\tmul     rbx\n\tmov     r15, [rdi+rcx*8+8]\n\tjmp     .15\n.13:lea     rbp, [rcx]\n\txor     r13, r13\n\tjmp     .17\n\t\n    align\t32\n.14:mul     rbx\n\tmov     r10, rdx\n\tadd     r14, rax\n\tadc     r10, 0\n\tadd     r15, r11\n\tadc     r13, 0\n\tadd     r14, r12\n\tadc     r10, 0\n\tmov     rax, [rsi+rbp*8-16]\n\tmul     r9\n\tadd     r14, rax\n\tmov     r11, rdx\n\tadc     r11, 0\n\tmov     rax, [rsi+rbp*8-16]\n\tmul     rbx\n\tmov     [rdi+rbp*8-24], r15\n\tmov     r15, [rdi+rbp*8-8]\n\tadd     r14, r13\n\tadc     r11, 0\n.15:mov     r12, rdx\n\tmov     [rdi+rbp*8-16], r14\n\tadd     r15, rax\n\tadc     r12, 0\n\tmov     rax, [rsi+rbp*8-8]\n\tadd     r15, r10\n\tadc     r12, 0\n\tmul     r9\n.16:add     r15, rax\n\tmov     r13, rdx\n\tadc     r13, 0\n\tmov     rax, [rsi+rbp*8-8]\n\tmul     rbx\n\tadd     r15, r11\n\tmov     r14, [rdi+rbp*8]\n\tadc     r13, 0\n\tmov     r10, rdx\n\tadd     r14, rax\n\tadc     r10, 0\n\tmov     rax, [rsi+rbp*8]\n\tmul     r9\n\tadd     r14, r12\n\tmov     [rdi+rbp*8-8], r15\n\tmov     r11, rdx\n\tadc     r10, 0\n.17:add     r14, rax\n\tadc     r11, 0\n\tmov     rax, [rsi+rbp*8]\n\tadd     r14, r13\n\tadc     r11, 0\n\tmul     rbx\n\tmov     r15, [rdi+rbp*8+8]\n\tadd     r15, rax\n\tmov     r12, rdx\n\tadc     r12, 0\n\tmov     rax, [rsi+rbp*8+8]\n\tmov     [rdi+rbp*8], r14\n\tmul     r9\n\tadd     r15, r10\n\tmov     r13, rdx\n\tadc     r12, 0\n\tadd     r15, rax\n\tmov     rax, [rsi+rbp*8+8]\n\tmov     r14, [rdi+rbp*8+16]\n\tadc     r13, 0\n\tadd     rbp, 4\n\tjnc     .14\n.18:imul    rax, rbx\n\tadd     r14, rax\n\tadd     r15, r11\n\tadc     r13, 0\n\tadd     r14, r12\n\tmov     rax, [rsi-8]\n\timul    rax, r9\n\tadd     rax, r14\n\tmov     [rdi-16], r15\n\tadd     rax, r13\n\tmov     [rdi-8], rax\n\tadd     rcx, 2\n\tlea     r8, [r8+16]\n\tlea     rsi, [rsi-16]\n\tcmp     rcx, -2\n\tjl      .7\n\tjnz     .20\n.19:mov     r9, [r8]\n\tmov     rbx, [r8+8]\n\tmov     rax, [rsi-16]\n\tmul     r9\n\tadd     rax, [rdi-16]\n\tadc     rdx, [rdi-8]\n\tmov     r10, [rsi-8]\n\timul    r10, r9\n\tmov     r11, [rsi-16]\n\timul    r11, rbx\n\tmov     [rdi-16], rax\n\tadd     r11, r10\n\tadd     r11, rdx\n\tmov     [rdi-8], r11\n    EXIT_PROC reg_save_list\n\n.20:mov     r11, [r8]\n\timul    r11, [rsi-8]\n\tadd     r11, rax\n\tmov     [rdi-8], r11\n    END_PROC reg_save_list\n "
  },
  {
    "path": "mpn/x86_64w/sandybridge/nand_n.asm",
    "content": "; PROLOGUE(mpn_nand_n)\n\n;  mpn_nand_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_nand_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_nand_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [rdx+r10*8+16-32]\n\tpand    xmm1, xmm3\n\tpand    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\tand     rax, r9\n\tpand    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tand     rax, r9\n\tnot     rax\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/nior_n.asm",
    "content": "; PROLOGUE(mpn_nior_n)\n\n;  mpn_nior_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_nior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_nior_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [rdx+r10*8+16-32]\n\tpor     xmm1, xmm3\n\tpor     xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\tor      rax, r9\n\tpor     xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tor      rax, r9\n\tnot     rax\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/popcount.asm",
    "content": "; PROLOGUE(mpn_popcount)\n\n;  AMD64 mpn_popcount\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmpn_limb_t mpn_popcount(mp_ptr,mp_size_t)\n;\trax                        rdi,      rsi\n;\teax                        rcx,      rdx\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_popcount\n\tmov     r8, 5\n\tlea     rcx, [rcx+rdx*8-40]\n\txor     eax, eax\n\tsub     r8, rdx\n\tjnc     .1\n\txalign  16\n.0:\n\tpopcnt  r9, [rcx+r8*8]\n\tpopcnt  r10, [rcx+r8*8+8]\n\tpopcnt  r11, [rcx+r8*8+16]\n\tpopcnt  rdx, [rcx+r8*8+24]\n\tadd     rax, r9\n\tadd     rax, rdx\n\tadd     rax, r10\n\tpopcnt  r9, [rcx+r8*8+32]\n\tpopcnt  r10, [rcx+r8*8+40]\n\tadd     rax, r9\n\tadd     rax, r11\n\tadd     rax, r10\n\tadd     r8, 6\n\tjnc     .0\n.1:\n\tlea     rdx, [rel .2]\n\tlea     r8, [r8+r8*8]\n\tadd     rdx, r8\n\tjmp     rdx\n.2:\n\tnop\n\tpopcnt  r9, [rcx]\n\tadd     rax, r9\n.3:\n\tpopcnt  r10, [rcx+8]\n\tadd     rax, r10\n.4:\tpopcnt  r11, [rcx+16]\n\tadd     rax, r11\n.5:\tpopcnt  rdx, [rcx+24]\n\tadd     rax, rdx\n.6:\tpopcnt  r9, [rcx+32]\n\tadd     rax, r9\n.7:\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/redc_1.asm",
    "content": "; PROLOGUE(mpn_redc_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_redc_1(mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                rdi     rsi        rdx        rcx        r8\n;  rax                rcx     rdx         r8         r9  [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    CPU  Core2\n    BITS 64\n\n%macro mpn_add 0\n\n    mov     rax, rcx\n    and     rax, 3\n    shr     rcx, 2\n    cmp     rcx, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    add     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n    jmp     %%2\n\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    adc     r11, [rdx]\n    adc     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rdi], r11\n    mov     [rdi+8], r8\n    lea     rdi, [rdi+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    adc     r9, [rdx-16]\n    adc     r10, [rdx-8]\n    mov     [rdi-16], r9\n    dec     rcx\n    mov     [rdi-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    adc     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n%%2:\n\n%endmacro\n\n%macro mpn_sub 0\n\n    mov     rax, rbp\n    and     rax, 3\n    shr     rbp, 2\n    cmp     rbp, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    sub     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n    jmp     %%2\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    sbb     r11, [rdx]\n    sbb     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rbx], r11\n    mov     [rbx+8], r8\n    lea     rbx, [rbx+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    sbb     r9, [rdx-16]\n    sbb     r10, [rdx-8]\n    mov     [rbx-16], r9\n    dec     rbp\n    mov     [rbx-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    sbb     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n%%2:\n\n%endmacro\n\n%macro addmulloop 1\n\n    xalign  16\n%%1:mov     r10, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    db      0x26\n    adc     rbx, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    db      0x26\n    adc     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     r11, 4\n    mov     rax, [rsi+r11*8+8]\n    jnc     %%1\n\n%endmacro\n\n%macro addmulpropro0 0\n\n    imul    r13, rcx\n    lea     r8, [r8-8]\n\n%endmacro\n\n%macro addmulpro0 0\n\n    mov     r11, r14\n    lea     r8, [r8+8]\n    mov     rax, [rsi+r14*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext0 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mov     r9d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, rax\n    adc     r9, rdx\n    imul    r13, rcx\n    add     [r8+r11*8+32], r12\n    adc     r9, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r9\n\n%endmacro\n\n%macro addmulpropro1 0\n\n%endmacro\n\n%macro addmulpro1 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext1 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r12\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro2 0\n\n%endmacro\n\n%macro addmulpro2 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext2 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     r13, [r8+r14*8+8]\n    add     [r8+r11*8+16], r10\n    adc     rbx, 0\n    mov     [r8+r14*8], rbx\n    sub     r15, 1          ; ***\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro3 0\n\n%endmacro\n\n%macro addmulpro3 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext3 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    add     [r8+r11*8+8], r9\n    adc     r10, 0\n    mov     r13, [r8+r14*8+8]\n    mov     [r8+r14*8], r10\n    lea     r8, [r8+8]\n    sub     r15, 1          ; ***\n\n%endmacro\n\n%macro mpn_addmul_1_int 1\n\n    addmulpropro%1\n    xalign  16\n%%1:addmulpro%1\n    jge     %%2\n    addmulloop %1\n%%2:addmulnext%1\n    jnz     %%1\n\n%endmacro\n\n    LEAF_PROC mpn_redc_1\n    cmp     r9, 1\n    je      one\n    FRAME_PROC ?mpn_sandybridge_redc_1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, r8\n    mov     r8, rdx\n    mov     rdx, r9\n    mov     rcx, [rsp+stack_use+0x28]\n    mov     [rsp+stack_use+0x28], r8\n\n    mov     r14, 5\n    sub     r14, rdx\n\n    mov     [rsp+stack_use+0x10], rsi\n    mov     r8, [rsp+stack_use+0x28]\n\n    lea     r8, [r8+rdx*8-40]\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rbp, rdx\n    mov     r15, rdx\n    mov     rax, r14\n    and     rax, 3\n    mov     r13, [r8+r14*8]\n    je      .2\n    jp      .4\n    cmp     rax, 1\n    je      .3\n.1:\tmpn_addmul_1_int 2\n    jmp     .5\n\n    xalign  16\n.2:\tmpn_addmul_1_int 0\n    jmp     .5\n\n    xalign  16\n.3:\tmpn_addmul_1_int 1\n    jmp     .5\n\n    xalign  16\n.4:\tmpn_addmul_1_int 3\n\n    xalign  16\n.5:\tmov     rcx, rbp\n    mov     rdx, [rsp+stack_use+0x28]\n    lea     rsi, [rdx+rbp*8]\n    mov     rbx, rdi\n    mpn_add\n    mov     rdx, [rsp+stack_use+0x10]\n    jnc     .6\n    mov     rsi, rbx\n    mpn_sub\n.6:\tEND_PROC reg_save_list\n\n    xalign  16\none:mov     r9, rdx\n    mov     r11, [r8]\n    mov     r8, [rsp+0x28]\n\n    mov     r10, [r9]\n    imul    r8, r10\n    mov     rax, r8\n    mul     r11\n    add     rax, r10\n    adc     rdx, [r9+8]\n    cmovnc  r11, rax\n    sub     rdx, r11\n    mov     [rcx], rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/rsh_divrem_hensel_qr_1_1.asm",
    "content": "; PROLOGUE(mpn_rsh_divrem_hensel_qr_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rsh_divrem_hensel_qr_1_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t,  mp_int, mp_limb_t)\n;  rax                                        rdi     rsi        rdx        rcx       r8         r9\n;  rax                                        rcx     rdx         r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi\n\n\tFRAME_PROC mpn_rsh_divrem_hensel_qr_1_1, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n    mov     rcx, r9\n\tmov     rdx, r9\n\tmov     r9, 1\n\tsub     r9, rax\n    movsxd  r8, dword [rsp+stack_use+40]\n    mov     r10, qword [rsp+stack_use+48]\n    \n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, 64\n\tsub     rax, r8\n\tmovq    mm0, r8\n\tmovq    mm1, rax\n\tmov     rax, [rsi+r9*8-8]\n\tsub     rax, r10\n\tsbb     r8, r8\n\timul    rax, r11\n\tmovq    mm4, rax\n\tmovq    mm5, mm4\n\tpsrlq   mm4, mm0\n\tpsllq   mm5, mm1\n\tpsrlq   mm5, mm1\n\tmul     rcx\n\tcmp     r9, 0\n\tje      .3\n\tadd     r8, r8\n\t\n\txalign  16\n.1:\tmovq    mm2, mm4\n\tmov     rax, [rsi+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-8], mm2\n\tmul     rcx\n\tadd     r8, r8\n\tinc     r9\n\tjnz     .1\n.2:\tmovq    [rdi+r9*8-8], mm4\n\tmov     rax, 0\n\tadc     rax, rdx\n\temms\n\tEXIT_PROC reg_save_list\n\n.3:\tmovq    [rdi+r9*8-8], mm4\n\tadd     r8, r8\n\tmov     rax, 0\n\tadc     rax, rdx\n.4:\temms\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/rsh_divrem_hensel_qr_1_2.asm",
    "content": "; PROLOGUE(mpn_rsh_divrem_hensel_qr_1_2)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rsh_divrem_hensel_qr_1_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_uint, mp_limb_t)\n;  rax                                        rdi     rsi        rdx        rcx       r8         r9\n;  rax                                        rcx     rdx         r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n\tFRAME_PROC mpn_rsh_divrem_hensel_qr_1_2, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8-16]\n\tlea     rsi, [rdx+rax*8-16]\n    mov     rcx, r9\n    mov     rdx, r9\n\tmov     r9, 2\n\tsub     r9, rax    \n    mov     r8d, dword [rsp+stack_use+40]\n    mov     r10, qword [rsp+stack_use+48]\n\n\tmov     rax, 64\n\tsub     rax, r8\n\tmovq    mm0, r8\n\tmovq    mm1, rax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, r11\n\tmov     r12, r11\n\tmul     rcx\n\tneg     rdx\n\timul    r12, rdx\n\n; // for the first limb we can not store (as we have to shift) so we need to\n; // do first limb separately , we could do it as normal as an extention of\n; // the loop , but if we do it as a 1 limb inverse then we can start it\n; // eailer , ie interleave it with the calculation of the 2limb inverse\n\n\tmov     r13, r11\n\tmov     r14, r12\n\n\tmov     r11, [rsi+r9*8]\n\tsub     r11, r10\n\tsbb     r10, r10\n\n\timul    r11, r13\n\tmovq    mm2, r11\n\tpsrlq   mm2, mm0\n\tmov     rax, rcx\n\tmul     r11\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\tadd     r10, r10\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, r10\n\n; mov $0,%r10\n\tadd     r9, 2\n\tjc      .2\n\txalign  16\n.1:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n\t; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r12, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, 0\n\tadd     r9, 2\n\tjnc     .1\n.2:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tcmp     r9, 0\n\tjne     .4\n.3:\tmov     r11, [rsi+r9*8+8]\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r10, 0\n\tmov     rax, r11\n\timul    rax, r13\n\t; mov %rax,(%rdi,%r9,8)\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8], mm2\n\tmovq    [rdi+r9*8+8], mm4\n\n\tmul     rcx\n\tadd     r10, r10\n\tmov     rax, 0\n\tadc     rax, rdx\n\temms\n\tEXIT_PROC reg_save_list\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n.4:\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\tmovq    [rdi+r9*8], mm2\n\n\tmov     rax, rcx\n\tmul     rdx\n\tcmp     r8, rax\n\tmov     rax, 0\n\tadc     rax, rdx\n\tsub     rax, r10\n.5:\temms\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/rshift.asm",
    "content": "; PROLOGUE(mpn_rshift)\n\n;  Version 1.0.4.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi      rsi        rdx      rcx\n;  rax                     rcx      rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  SSE4.2\n    BITS 64\n\n    LEAF_PROC mpn_rshift\n    mov     r10, rcx\n    mov     ecx, r9d\n    cmp     r8, 2\n    ja      .3\n    jz      .2\n.1:\tmov     rdx, [rdx]\n    mov     rax, rdx\n    shr     rdx, cl\n    neg     rcx\n    shl     rax, cl\n    mov     [r10], rdx\n    ret\n\n.2:\tmov     r8, [rdx]\n    mov     r9, [rdx+8]\n    mov     rax, r8\n    mov     r11, r9\n    shr     r8, cl\n    shr     r9, cl\n    neg     rcx\n    shl     r11, cl\n    shl     rax, cl\n    or      r8, r11\n    mov     [r10], r8\n    mov     [r10+8], r9\n    ret\n\n.3:\tmov     r11, rdx\n    mov     rdx, r8\n\n    mov     eax, 64\n    lea     r9, [r11+8]\n    sub     rax, rcx\n    and     r9, -16\n    movq    xmm0, rcx\n    movq    xmm1, rax\n    movdqa  xmm5, [r9]\n    movdqa  xmm3, xmm5\n    psllq   xmm5, xmm1\n    movq    rax, xmm5\n    cmp     r11, r9\n    lea     r11, [r11+rdx*8-40]\n    je      .4\n    movq    xmm2, [r9-8]\n    movq    xmm4, xmm2\n    psllq   xmm2, xmm1\n    psrlq   xmm4, xmm0\n    por     xmm4, xmm5\n    movq    [r10], xmm4\n    lea     r10, [r10+8]\n    dec     rdx\n    movq    rax, xmm2\n.4: lea     r10, [r10+rdx*8-40]\n    psrlq   xmm3, xmm0\n    mov     r8d, 5\n    sub     r8, rdx\n    jnc     .6\n\n    xalign  16\n.5: movdqa  xmm2, [r11+r8*8+16]\n    movdqa  xmm4, xmm2\n    psllq   xmm2, xmm1\n    shufpd  xmm5, xmm2, 1\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    psrlq   xmm4, xmm0\n    movdqa  xmm5, [r11+r8*8+32]\n    movdqa  xmm3, xmm5\n    psllq   xmm5, xmm1\n    shufpd  xmm2, xmm5, 1\n    psrlq   xmm3, xmm0\n    por     xmm4, xmm2\n    movq    [r10+r8*8+16], xmm4\n    movhpd  [r10+r8*8+24], xmm4\n    add     r8, 4\n    jnc     .5\n.6: cmp     r8, 2\n    ja      .10\n    jz      .9\n    jp      .8\n.7:\tmovdqa  xmm2, [r11+r8*8+16]\n    movdqa  xmm4, xmm2\n    psllq   xmm2, xmm1\n    shufpd  xmm5, xmm2, 1\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    psrlq   xmm4, xmm0\n    movq    xmm5, [r11+r8*8+32]\n    movq    xmm3, xmm5\n    psllq   xmm5, xmm1\n    shufpd  xmm2, xmm5, 1\n    psrlq   xmm3, xmm0\n    por     xmm4, xmm2\n    movq    [r10+r8*8+16], xmm4\n    movhpd  [r10+r8*8+24], xmm4\n    psrldq  xmm5, 8\n    por     xmm3, xmm5\n    movq    [r10+r8*8+32], xmm3\n    ret\n\n    xalign  16\n.8:\tmovdqa  xmm2, [r11+r8*8+16]\n    movdqa  xmm4, xmm2\n    psllq   xmm2, xmm1\n    shufpd  xmm5, xmm2, 1\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    psrlq   xmm4, xmm0\n    psrldq  xmm2, 8\n    por     xmm4, xmm2\n    movq    [r10+r8*8+16], xmm4\n    movhpd  [r10+r8*8+24], xmm4\n    ret\n\n    xalign  16\n.9:\tmovq    xmm2, [r11+r8*8+16]\n    movq    xmm4, xmm2\n    psllq   xmm2, xmm1\n    shufpd  xmm5, xmm2, 1\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    psrlq   xmm4, xmm0\n    psrldq  xmm2, 8\n    por     xmm4, xmm2\n    movq    [r10+r8*8+16], xmm4\n    ret\n\n    xalign  16\n.10:psrldq  xmm5, 8\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/store.asm",
    "content": "; PROLOGUE(mpn_store)\n\n;  mpn_store\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_store(mp_ptr, mp_size_t, mp_limb_t)\n;                    rdi,       rsi,       rdx\n;                    rcx,       rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n%define\tMOVQ\tmovd\n\n\tLEAF_PROC mpn_store\n\tlea     rcx, [rcx-32]\n\tcmp     rdx, 0\n\tjz      .4\n\tMOVQ    xmm0, r8\n\tmovddup xmm0, xmm0\n\ttest    rcx, 0xF\n\tjz      .1\n\tmov     [rcx+32], r8\n\tlea     rcx, [rcx+8]\n\tsub     rdx, 1\n.1:\tsub     rdx, 4\n\tjc      .3\n\t\n\txalign  16\n.2:\tlea     rcx, [rcx+32]\n\tsub     rdx, 4\n\tmovdqa  [rcx], xmm0\n\tmovdqa  [rcx+16], xmm0\n\tjnc     .2\n.3:\tcmp     rdx, -2\n\tja      .5\n\tjz      .7\n\tjp      .6\n.4:\tret\n\n.5:\tmovdqa  [rcx+32], xmm0\n.6:\tmov     [rcx+rdx*8+56], r8\n\tret\n\n.7:\tmovdqa  [rcx+32], xmm0\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/sub_n.asm",
    "content": "; PROLOGUE(mpn_sub_n)\n\n;  Version 1.0.3.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  Calculate src1[size] plus(minus) src2[size] and store the result in\n;  dst[size].  The return value is the carry bit from the top of the result\n;  (1 or 0).  The _nc version accepts 1 or 0 for an initial carry into the\n;  low limb of the calculation.  Note values other than 1 or 0 here will\n;  lead to garbage results.\n;\n;  mp_limb_t  mpn_sub_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_sub_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    xalign  8\n    LEAF_PROC mpn_sub_nc\n    mov     r10,[rsp+40]\n    jmp     entry\n\n    xalign  8\n    LEAF_PROC mpn_sub_n\n    xor     r10, r10\nentry:\n    mov     rax, r9\n    and     rax, 3\n    shr     r9, 2\n    lea     r9,[r10+r9*2]\n\tsar     r9, 1\n    jnz     .2\n\n    mov     r10, [rdx]\n    sbb     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+8]\n    sbb     r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .1\n    mov     r10, [rdx+16]\n    sbb     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.1: adc     rax, rax\n    ret\n\n    xalign  8\n.2:\tmov     r10, [rdx]\n    mov     r11, [rdx+8]\n    lea     rdx, [rdx+32]\n    sbb     r10, [r8]\n    sbb     r11, [r8+8]\n    lea     r8, [r8+32]\n    mov     [rcx], r10\n    mov     [rcx+8], r11\n    lea     rcx, [rcx+32]\n    mov     r10, [rdx-16]\n    mov     r11, [rdx-8]\n    sbb     r10, [r8-16]\n    sbb     r11, [r8-8]\n    mov     [rcx-16], r10\n    dec     r9\n    mov     [rcx-8], r11\n    jnz     .2\n\n    inc     rax\n    dec     rax\n    jz      .3\n    mov     r10, [rdx]\n    sbb     r10, [r8]\n    mov     [rcx], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+8]\n    sbb      r10, [r8+8]\n    mov     [rcx+8], r10\n    dec     rax\n    jz      .3\n    mov     r10, [rdx+16]\n    sbb     r10, [r8+16]\n    mov     [rcx+16], r10\n    dec     rax\n.3: adc     rax, rax\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/submul_1.asm",
    "content": "; PROLOGUE(mpn_submul_1)\n        \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_submul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_declsh_n(mp_ptr, mp_ptr, mp_size_t,   mp_uint)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12\n\n  \n        xalign 16\n        LEAF_PROC mpn_submul_1\n        mov     rax, [rdx]\n        cmp     r8, 1\n        jnz     .1\n        mul     r9\n        sub     [rcx], rax\n        adc     rdx, 0\n        mov     rax, rdx\n        ret\n\n        xalign   16\n.1:\t    FRAME_PROC ?mpn_sandybridge_submul, 0, reg_save_list\n        mov     r11, 5\n        lea     rsi, [rdx+r8*8-40]\n        lea     rdi, [rcx+r8*8-40]\n        mov     rcx, r9\n        sub     r11, r8\n        mul     rcx\n        db      0x26\n        mov     r8, rax\n        db      0x26\n        mov     rax, [rsi+r11*8+8]\n        db      0x26\n        mov     r9, rdx\n        db      0x26\n        cmp     r11, 0\n        db      0x26\n        mov     [rsp-8], r12\n        db      0x26\n        jge     .3\n.2:     xor     r10, r10\n        mul     rcx\n        sub     [rdi+r11*8], r8\n        adc     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+r11*8+16]\n        mul     rcx\n        sub     [rdi+r11*8+8], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+r11*8+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        sub     [rdi+r11*8+16], r10\n        adc     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+r11*8+32]\n        mul     rcx\n        sub     [rdi+r11*8+24], r12\n        adc     r8, rax\n        adc     r9, rdx\n        add     r11, 4\n        mov     rax, [rsi+r11*8+8]\n        jnc     .2\n.3:     xor     r10, r10\n        mul     rcx\n        sub     [rdi+r11*8], r8\n        adc     r9, rax\n        adc     r10, rdx\n        cmp     r11, 2\n        ja      .7\n        jz      .6\n        jp      .5\n.4:     mov     rax, [rsi+16]\n        mul     rcx\n        sub     [rdi+8], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        sub     [rdi+16], r10\n        adc     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+32]\n        mul     rcx\n        sub     [rdi+24], r12\n        adc     r8, rax\n        adc     r9, rdx\n        sub     [rdi+32], r8\n        adc     r9, 0\n        mov     rax, r9\n        EXIT_PROC reg_save_list\n.5:     mov     rax, [rsi+24]\n        mul     rcx\n        sub     [rdi+16], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+32]\n        xor     r8, r8\n        mul     rcx\n        sub     [rdi+24], r10\n        adc     r12, rax\n        adc     r8, rdx\n        sub     [rdi+32], r12\n        adc     r8, 0\n        mov     rax, r8\n        EXIT_PROC reg_save_list\n\n        align   16\n.6:     mov     rax, [rsi+32]\n        mul     rcx\n        sub     [rdi+24], r9\n        adc     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        sub     [rdi+32], r10\n        adc     r12, 0\n        mov     rax, r12\n        EXIT_PROC reg_save_list\n.7:     sub     [rdi+32], r9\n        adc     r10, 0\n        mov     rax, r10\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/xnor_n.asm",
    "content": "; PROLOGUE(mpn_xnor_n)\n\n;  mpn_xnor_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_xnor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_xnor_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [rdx+r10*8+16-32]\n\tpxor    xmm1, xmm3\n\tpxor    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\txor     rax, r9\n\tpxor    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\txor     rax, r9\n\tnot     rax\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sandybridge/xor_n.asm",
    "content": "; PROLOGUE(mpn_xor_n)\n\n;  mpn_xor_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_xor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rcx        rdx         r8         r9\n;                    rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_xor_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpxor    xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\txor     rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\txor     rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/skylake/add_n.asm",
    "content": "\n;  AMD64 mpn_add_n\n;  Copyright 2016 Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)+(rdx,rcx)\n;\trax = carry\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n    %define SumP    rcx\n    %define Inp1P   rdx\n    %define Inp2P   r8\n    %define Size    r9\n    %define SizeRest r11\n    %define LIMB1   rax\n    %define LIMB2   r10\n%else\n    %define SumP    rdi\n    %define Inp1P   rsi\n    %define Inp2P   rdx\n    %define Size    rcx\n    %define SizeRest r11\n    %define LIMB1   rax\n    %define LIMB2   r8\n%endif\n\n%define ADDSUB add\n%define ADCSBB adc\n\n; Skylake has problems sustaining 2 read and 1 write per clock cycle.\n; It sometimes gets into a \"mode\" (for the lack of a better word) where\n; it does not fully utilize port 7, causing store uops to compete with\n; the reads for ports 2,3. We try to alleviate the problem by turning\n; some of the 64-bit writes into 128-bit writes, reducing the number of\n; write instructions. Unfortunately, SSE2/AVX2 do not have particularly\n; good instructions for assembling an SSE2 128-bit word from two GPR\n; 64-bit words, so the instruction count is greatly inflated.\n\n%macro  STORE 1\n\tmov\t[SumP %1], LIMB1\n\tmov\t[SumP %1 + 8], LIMB2\n%endmacro\n\n%macro  SSESTORE 1\n\tmovq\txmm0, LIMB1\n\tmovq\txmm1, LIMB2\n\tvpermilpd xmm1, xmm1, 0\n\tpblendw xmm0, xmm1, 0xf0\n\tmovaps\t[SumP %1], xmm0\n%endmacro\n\n\n    BITS    64\n\n    LEAF_PROC mpn_add_n\n; Make dest 16-bytes aligned\n\ttest\tSumP, 8\n\tjz\t.aligned\n\tdec\tSize\n\tmov\tSizeRest, Size\n\tand\tSizeRest, 7\n\tshr\tSize, 3\n; Unaligned and Size > 8: do one limb separately, then the normal loop\n\tjnz\t.unaligned\n; Unaligned and Size <= 8: do all with .rest loop\n\tinc\tSizeRest\n\tclc\n\tjmp\t.rest ;ajs:notshortform\n\n.aligned:\n\tmov\tSizeRest, Size\n\tand\tSizeRest, 7\n\tshr\tSize, 3\n\tclc\n\tjz\t.rest ;ajs:notshortform\n\tjmp\t.loop1\n\n.unaligned:\n\tmov\tLIMB1, [Inp1P]\n\tADDSUB\tLIMB1, [Inp2P]\n\tmov\t[SumP], LIMB1\n\tlea\tInp1P, [Inp1P+8]\n\tlea\tInp2P, [Inp2P+8]\n\tlea\tSumP, [SumP+8]\n\n\talign   16\n.loop1:\n\tmov\tLIMB1, [Inp1P]\n\tmov\tLIMB2, [Inp1P+8]\n\tADCSBB\tLIMB1, [Inp2P]\n\tADCSBB\tLIMB2, [Inp2P+8]\n\tSSESTORE +0\n\tmov\tLIMB1, [Inp1P+16]\n\tmov\tLIMB2, [Inp1P+24]\n\tADCSBB\tLIMB1, [Inp2P+16]\n\tADCSBB\tLIMB2, [Inp2P+24]\n\tSTORE +16\n\tmov\tLIMB1, [Inp1P+32]\n\tmov\tLIMB2, [Inp1P+40]\n\tADCSBB\tLIMB1, [Inp2P+32]\n\tADCSBB\tLIMB2, [Inp2P+40]\n\tSTORE +32\n\tmov\tLIMB1, [Inp1P+48]\n\tmov\tLIMB2, [Inp1P+56]\n\tADCSBB\tLIMB1, [Inp2P+48]\n\tADCSBB\tLIMB2, [Inp2P+56]\n\tSTORE +48\n\tlea\tInp1P, [Inp1P+64]\n\tlea\tInp2P, [Inp2P+64]\n\tlea\tSumP, [SumP+64]\n\tdec\tSize\n\tjnz\t.loop1\n\tinc\tSizeRest\n\tdec\tSizeRest\n\tjz\t.end\n.rest:\n\tmov\tLIMB1, [Inp1P]\n\tADCSBB\tLIMB1, [Inp2P]\n\tmov\t[SumP], LIMB1\n\tdec\tSizeRest\n\tjz\t.end\n\tmov\tLIMB1, [Inp1P+8]\n\tADCSBB\tLIMB1, [Inp2P+8]\n\tmov\t[SumP+8], LIMB1\n\tdec\tSizeRest\n\tjz\t.end\n\tmov\tLIMB1, [Inp1P+16]\n\tADCSBB\tLIMB1, [Inp2P+16]\n\tmov\t[SumP+16], LIMB1\n\tdec\tSizeRest\n\tjz\t.end\n\tmov\tLIMB1, [Inp1P+24]\n\tADCSBB\tLIMB1, [Inp2P+24]\n\tmov\t[SumP+24], LIMB1\n\tdec\tSizeRest\n\tjz\t.end\n\tlea\tInp1P, [Inp1P+32]\n\tlea\tInp2P, [Inp2P+32]\n\tlea\tSumP, [SumP+32]\n\tjmp\t.rest\n.end:\n\tmov\teax, 0\n\tadc\teax, eax\n\tret\n"
  },
  {
    "path": "mpn/x86_64w/skylake/addlsh1_n.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t addlsh1_n(mp_ptr Op3, mp_srcptr Op2, mp_srcptr Op1; mp_size_t Size )\n; Linux     RAX       RDI         RSI            RDX            RCX\n; Win7      RAX       RCX         RDX            R8             R9\n;\n; Description:\n; The function shifts Op1 left one bit, adds this to Op2, stores the result\n; in Op3 and hands back the total carry. There is a gain in execution speed\n; compared to separate shift and add by interleaving the elementary operations\n; and reducing memory access. The factor depends on the size of the operands\n; (the cache hierarchy in which the operands can be handled).\n;\n; Caveats:\n; - for asm the processor MUST support LAHF/SAHF in 64 bit mode!\n; - the total carry is in [0..2]!\n;\n; Comments:\n; - asm version implemented, tested & benched on 16.05.2015 by jn\n; - improved asm version implemented, tested & benched on 30.07.2015 by jn\n; - On Nehalem per limb saving is 1 cycle in LD1$, LD2$ and 1-2 in LD3$\n; - includes LAHF / SAHF\n; - includes prefetching\n; - includes XMM save & restore\n;\n; Linux: (rdi, rcx) = (rsi, rcx) + (rdx, rcx)<<1\n; ============================================================================\n\n%define USE_WIN64\n\n%define ADDSUB add\n%define ADCSBB adc\n\n%include \"yasm_mac.inc\"\n\nBITS 64\n\n%define reg_save_list RBX, RBP, RSI, RDI, R10, R11, R12, R13, R14, R15\n\n%define Op3     RCX\n%define Op2     RDX\n%define Op1     R8\n%define Size    R9\n\n%define Limb0   RBX\n%define Limb1   RDI\n%define Limb2   RSI\n\n%define Limb3   R10\n%define Limb4   R11\n%define Limb5   R12\n%define Limb6   R13\n%define Limb7   R14\n%define Limb8   R15\n\n%ifdef USE_PREFETCH\n%define Offs    RBP\n%endif\n\n%macro ACCUMULATE 1\n\n    ADCSBB  Limb%1, [Op2 + 8 * %1]\n    mov     [Op3 + 8 * %1], Limb%1\n%endmacro\n\n    align   32\n\n  FRAME_PROC mpn_addlsh1_n, 0, reg_save_list\n\n  %ifdef USE_PREFETCH\n    mov     Offs, PREFETCH_STRIDE   ; Attn: check if redefining Offs\n  %endif\n\n    ; prepare shift & addition with loop-unrolling 8\n    xor     Limb0, Limb0\n    lahf                        ; memorize clear carry (from \"xor\" above)\n\n    test    Size, 1\n    je      .n_two\n\n    mov     Limb1, [Op1]\n    shrd    Limb0, Limb1, 63\n\n\n    ADDSUB  Limb0, [Op2]\n    mov     [Op3], Limb0\n    lahf\n\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    mov     Limb0, Limb1\n\n  .n_two:\n\n    test    Size, 2\n    je      .n_four\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n\n    sahf\n    ACCUMULATE 0\n    ACCUMULATE 1\n    lahf\n\n    add     Op1, 16\n    add     Op2, 16\n    add     Op3, 16\n    mov     Limb0, Limb2\n\n  .n_four:\n\n    test    Size, 4\n    je      .n_test ;ajs:notshortform\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n    mov     Limb3, [Op1+16]\n    mov     Limb4, [Op1+24]\n    shrd    Limb2, Limb3, 63\n    shrd    Limb3, Limb4, 63\n\n    sahf\n    ACCUMULATE 0\n    ACCUMULATE 1\n    ACCUMULATE 2\n    ACCUMULATE 3\n    lahf\n\n    add     Op1, 32\n    add     Op2, 32\n    add     Op3, 32\n    mov     Limb0, Limb4\n    jmp     .n_test ;ajs:notshortform\n\n    ; main loop\n    ; - 2.40-2.50 cycles per limb in L1D$\n    ; - 2.6       cycles per limb in L2D$\n    ; - 2.80-3.30 cycles per limb in L3D$\n    align   16\n  .n_loop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n    prefetchnta [Op2+Offs]\n  %endif\n\n    mov     Limb1, [Op1]        ; prepare shifted oct-limb from Op1\n    mov     Limb2, [Op1+8]\n    mov     Limb3, [Op1+16]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n    shrd    Limb2, Limb3, 63\n    mov     Limb4, [Op1+24]\n    mov     Limb5, [Op1+32]\n    mov     Limb6, [Op1+40]\n    shrd    Limb3, Limb4, 63\n    shrd    Limb4, Limb5, 63\n    shrd    Limb5, Limb6, 63\n    mov     Limb7, [Op1+48]\n    mov     Limb8, [Op1+56]\n    shrd    Limb6, Limb7, 63\n    shrd    Limb7, Limb8, 63\n\n    sahf                        ; restore carry\n    ACCUMULATE 0                ; add Op2 to oct-limb and store in Op3\n    ACCUMULATE 1\n    ACCUMULATE 2\n    ACCUMULATE 3\n    ACCUMULATE 4\n    ACCUMULATE 5\n    ACCUMULATE 6\n    ACCUMULATE 7\n    lahf                        ; remember carry for next round\n\n    add     Op1, 64\n    add     Op2, 64\n    add     Op3, 64\n    mov     Limb0, Limb8\n\n  .n_test:\n\n    sub     Size, 8\n    jnc     .n_loop\n\n    ; housekeeping - hand back total carry\n    shr     Limb0, 63\n    sahf\n    adc     Limb0, 0            ; Limb0=0/1/2 depending on final carry and shift\n    mov     RAX, Limb0\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/skylake/addmul_1.asm",
    "content": ";  AMD64 mpn_addmul_1 optimised for Intel Haswell.\n\n;  Contributed to the GNU project by Torbjörn Granlund.\n;  Converted to MPIR by Alexander Kruppa.\n\n;  Copyright 2013 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;  mp_limb_t mpn_addmul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8        r9d\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%define reg_save_list rbx, rbp, rsi, rdi, r12, r13\n%define RP      rdi\n%define S1P     rsi\n%define Size    rbp\n%define Sizeb   bpl\n%define Limb    rcx\n\n%define Tmp0    r12\n%define Tmp1    r13\n%define Tmp2    rax\n%define Tmp3    rbx\n%define Tmp4    r8\n%define Tmp5    r9\n%define Tmp6    r10\n%define Tmp7    r11\n%define Tmp8    rcx\n\n%define ADDSUB add\n%define ADCSBB adc\n\nalign 16\n\nFRAME_PROC mpn_addmul_1, 0, reg_save_list\n    mov     rdi, rcx \n    mov     rsi, rdx\n\tmov \trbp, r8 ; mulx requires one input in rdx\n\tmov \trdx, r9\n\n\ttest \tSizeb, 1\n\tjnz \t.Lbx1\n\n.Lbx0:  shr \tSize, 2\n\tjc \t.Lb10 ;ajs:notshortform\n\n.Lb00:\tmulx \tTmp0, Tmp1, [S1P]\n\tmulx \tTmp2, Tmp3, [S1P+8]\n\tadd \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp0, [RP]\n\tmov \tTmp8, [RP+8]\n\tmulx \tTmp4, Tmp5, [S1P+16]\n\tlea \tRP, [RP-16]\n\tlea \tS1P, [S1P+16]\n\tADDSUB \tTmp0, Tmp1\n\tjmp \t.Llo0 ;ajs:notshortform\n\n.Lbx1:\tshr \tSize, 2\n\tjc \t.Lb11\n\n.Lb01:\tmulx \tTmp6, Tmp7, [S1P]\n\tjnz \t.Lgt1\n.Ln1:\tADDSUB \t[RP], Tmp7\n\tmov \teax, 0\n\tadc \tTmp2, Tmp6\n\tjmp \t.Lret ;ajs:notshortform\n\n.Lgt1:\tmulx \tTmp0, Tmp1, [S1P+8]\n\tmulx \tTmp2, Tmp3, [S1P+16]\n\tlea \tS1P, [S1P+24]\n\tadd \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP]\n\tmov \tTmp0, [RP+8]\n\tmov \tTmp8, [RP+16]\n\tlea \tRP, [RP-8]\n\tADDSUB \tTmp6, Tmp7\n\tjmp \t.Llo1\n\n.Lb11:\tmulx \tTmp2, Tmp3, [S1P]\n\tmov \tTmp8, [RP]\n\tmulx \tTmp4, Tmp5, [S1P+8]\n\tlea \tS1P, [S1P+8]\n\tlea \tRP, [RP-24]\n\tinc \tSize\t\n\tADDSUB \tTmp8, Tmp3\n\tjmp \t.Llo3\n\n.Lb10:\tmulx \tTmp4, Tmp5, [S1P]\n\tmulx \tTmp6, Tmp7, [S1P+8]\n\tlea \tRP, [RP-32]\n\tmov \teax, 0\n\tclc\n\tjz \t.Lend ;ajs:notshortform\t\n\n\talign 16\n.Ltop:\tadc \tTmp5, Tmp2\n\tlea \tRP, [RP+32]\n\tadc \tTmp7, Tmp4\n\tmulx \tTmp0, Tmp1, [S1P+16]\n\tmov \tTmp4, [RP]\n\tmulx \tTmp2, Tmp3, [S1P+24]\n\tlea \tS1P, [S1P+32]\n\tadc \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+8]\n\tmov \tTmp0, [RP+16]\n\tADDSUB \tTmp4, Tmp5\n\tmov \tTmp8, [RP+24]\n\tmov \t[RP], Tmp4\n\tADCSBB \tTmp6, Tmp7\n.Llo1:\tmulx \tTmp4, Tmp5, [S1P]\n\tmov \t[RP+8], Tmp6\n\tADCSBB \tTmp0, Tmp1\n.Llo0:\tmov \t[RP+16], Tmp0\n\tADCSBB \tTmp8, Tmp3\n.Llo3:\tmulx \tTmp6, Tmp7, [S1P+8]\n\tmov \t[RP+24], Tmp8\n\tdec \tSize\n\tjnz \t.Ltop\n\n.Lend:\tadc \tTmp5, Tmp2\n\tadc \tTmp7, Tmp4\n\tmov \tTmp4, [RP+32]\n\tmov \tTmp2, Tmp6\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+40]\n\tADDSUB \tTmp4, Tmp5\n\tmov \t[RP+32], Tmp4\n\tADCSBB \tTmp6, Tmp7\n\tmov \t[RP+40], Tmp6\n\tadc \tTmp2, 0\n\n.Lret:\t\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/skylake/addmul_2.asm",
    "content": "; PROLOGUE(mpn_addmul_2)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.2 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_addmul_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rbx, rsi, rdi, r12\n\n\tFRAME_PROC mpn_addmul_2, 0, reg_save_list\n\tmov     rdi, rcx\n\tmov     rsi, rdx\n\tmov     rax, r8\n\t\n\tmov     rcx, [r9]\n\tmov     r8, [r9+8]\n\tmov     rbx, 4\n\tsub     rbx, rax\n\tlea     rsi, [rsi+rax*8-32]\n\tlea     rdi, [rdi+rax*8-32]\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8]\n\tmul     rcx\n\tmov     r12, rax\n\tmov     r9, rdx\n\tcmp     rbx, 0\n\tjge     .2\n\t\n\txalign  16\n.1: mov     rax, [rsi+rbx*8]\n\tmul     r8\n\tadd     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     rax, [rsi+rbx*8+32]\n\tmul     rcx\n\tadd     r12, rax\n\tadc     r9, rdx\n\tadc     r10, 0\n\tadd     rbx, 4\n\tjnc     .1\n.2:\tmov     rax, [rsi+rbx*8]\n\tmul     r8\n\tcmp     rbx, 2\n\tja      .6\n\tjz      .5\n\tjp      .4\n.3: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tmov     r9, 0\n\tadc     r11, rax\n\t; padding\n\tmov     r10, 0\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\t; padding\n\tmov     r15, r15\n\tmul     rcx\n\tadd     r11, rax\n\tmov     rax, [rsi+rbx*8+24]\n\tadc     r12, rdx\n\tadc     r9, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+24], r11\n\tadc     r12, rax\n\tadc     r9, rdx\n\tmov     [rdi+rbx*8+32], r12\n\tmov     rax, r9\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.4: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+rbx*8+16]\n\tmul     rcx\n\tadd     r10, rax\n\tmov     rax, [rsi+rbx*8+16]\n\tadc     r11, rdx\n\tadc     r12, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+16], r10\n\tadc     r11, rax\n\tadc     r12, rdx\n\tmov     [rdi+rbx*8+24], r11\n\tmov     rax, r12\n    EXIT_PROC reg_save_list\n\t\n\txalign  16\n.5: add     r9, rax\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r10, rdx\n\tmov     r11, 0\n\tmul     rcx\n\tadd     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tmov     r12, 0\n\tadc     r10, rdx\n\tmov     rax, [rsi+rbx*8+8]\n\tadc     r11, 0\n\tmul     r8\n\tadd     [rdi+rbx*8+8], r9\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+rbx*8+16], r10\n\tmov     rax, r11\n    EXIT_PROC reg_save_list\n\n\txalign  16\n.6: add     [rdi+rbx*8], r12\n\tadc     r9, rax\n\tadc     r10, rdx\n\tmov     [rdi+rbx*8+8], r9\n\tmov     rax, r10\n.7:\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/and_n.asm",
    "content": "; PROLOGUE(mpn_and_n)\n\n;  mpn_and_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_and_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rcx        rdx         r8         r9\n;                    rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_and_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpand    xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tand     rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tand     rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/skylake/andn_n.asm",
    "content": "; PROLOGUE(mpn_andn_n)\n\n;  mpn_andn_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_andn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n    \n\tLEAF_PROC mpn_andn_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpandn   xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpandn   xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpandn   xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tand     rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpandn   xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tnot     rax\n\tand     rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/add_err1_n.asm",
    "content": ";  AMD64 mpn_add_err1_n\n;  Copyright 2017 Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)+(rdx,rcx)+CyIn\n;\trax = carry\n;\t(rcx,2) = rev(r8,rcx) \\dot (carry,rcx) where carry is the sequence\n;       of carries from the addition of (rsi,rcx)+(rdx,rcx)\n\n;  mp_limb_t mpn_add_err1_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_sub_err1_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                         rdi     rsi     rdx     rcx       r8        r9     8(rsp)\n;  rax                         rcx     rdx      r8      r9 [rsp+40]  [rsp+48]   [rsp+56]\n\n%include 'yasm_mac.inc'\n\n%define SumP    rdi\n%define Inp1P   rsi\n%define Inp2P   rdx\n%define EP      r11\n%define SizeRest rcx\n%define YP      r8\n%define Size    r9\n%define CyIn    [rsp+8]\n%define LIMB0   rax\n%define E0      r12\n%define E1      r13\n%define Zero    r14\n%define Dummy   rbx\n\n%define reg_save_list rsi, rdi, rbx, r12, r13, r14\n\n\talign   32\n\tBITS    64\n\n%macro  DO_LIMB 1\n\tmov     LIMB0, [Inp1P + %1*8]\n\tadc     LIMB0, [Inp2P + %1*8]\n\tmov     [SumP + %1*8], LIMB0\n\tmov\t    LIMB0, [YP - %1*8]\n\tcmovnc\tLIMB0, Zero\n\tinc\t    Dummy\t\t\t; OF = 0\n\tadox\tE0, LIMB0\n\tadox\tE1, Zero\n%endmacro\n\nFRAME_PROC mpn_add_err1_n, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n    mov     r11, r9\n    mov     r8, [rsp + stack_use + 40]\n    mov     r9, [rsp + stack_use + 48]\n    mov     LIMB0, [rsp + stack_use + 56]\n\n\tmov     SizeRest, Size\n\tlea\tYP, [YP + Size*8 - 8]\n\tand\tSizeRest, 7\n\txor\tZero, Zero\n\tmov\tE0, Zero\n\tmov\tE1, Zero\n\tshr     Size, 3\n\tbt\tLIMB0, 0\n\tjz      .testrest\n\n\talign   16\n.loop:\n\tDO_LIMB 0\n\tDO_LIMB 1\n\tDO_LIMB 2\n\tDO_LIMB 3\n\tDO_LIMB 4\n\tDO_LIMB 5\n\tDO_LIMB 6\n\tDO_LIMB 7\n\n\tlea     Inp1P, [Inp1P+64]\n\tlea     Inp2P, [Inp2P+64]\n\tlea     SumP, [SumP+64]\n\tlea     YP, [YP-64]\n\n\tdec     Size\n\tjne     .loop\n\n.testrest:\n\tinc\tSizeRest\n\tdec\tSizeRest\n\tjz\t.exit\n\n.rest:\n\tDO_LIMB 0\n\tdec\tSizeRest\n\tjz\t.exit\n\tDO_LIMB 1\n\tdec\tSizeRest\n\tjz\t.exit\n\tDO_LIMB 2\n\tdec\tSizeRest\n\tjz\t.exit\n\tDO_LIMB 3\n\tdec\tSizeRest\n\tjz\t.exit\n\tlea\tInp1P, [Inp1P+32]\n\tlea\tInp2P, [Inp2P+32]\n\tlea\tSumP, [SumP+32]\n\tlea\tYP, [YP-32]\n\tjmp\t.rest\n\n.exit:\n\tmov rax, Zero\n\tsetc al\n\tmov\t[EP], E0\n\tmov\t[EP+8], E1\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/add_n.asm",
    "content": "\n;  Copyright 2016, 2017 Jens Nurmann\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  mp_limb_t  mpn_add_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_add_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include 'yasm_mac.inc'\n\n%define Op1     RDX\n%define Op2     R8\n%define Size    R9\n%define Op3     RCX\n\n%define CarryD  EAX\n%define CarryB  AL\n\n%define Limb0   R10\n%define SizeB   R9B         ; check if this fits to code alignment!\n%define Count   R11\n\n    align   32\n\n\tLEAF_PROC mpn_add_nc\n    mov     CarryD, [rsp+40]\n    mov     Count, Size\n    shr     Count, 3\n    inc     Count\n    vpor    YMM0, YMM0, YMM0    ; see comment in main loop below\n    jmp     One\n\n    align   32\n\n\tLEAF_PROC mpn_add_n\n    xor     CarryD, CarryD\n    mov     Count, Size\n    shr     Count, 3\n    inc     Count\n    vpor    YMM0, YMM0, YMM0    ; see comment in main loop below\n\n    ; unrolling the loop from small to high gives better timings\n    ; when considering all sizes 1-100 limb\n  One:\n    test    SizeB, 1\n    je      .Two\n    shr     CarryB, 1\n    mov     Limb0, [Op1]        ; I am using implicit code alignment through-\n    adc     Limb0, [Op2]        ; out the following to get all branch targets\n    mov     [Op3], Limb0        ; on 16 byte alignments - check this if non-\n    setc    CarryB              ; Linux register allocation is used!\n\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n\n  .Two:\n\n    test    SizeB, 2\n    je      .Four\n\n    shr     CarryB, 1\n    mov     Limb0, [dword Op1]\n    adc     Limb0, [Op2]\n    mov     [Op3], Limb0\n    mov     Limb0, [Op1+8]\n    adc     Limb0, [Op2+8]\n    mov     [Op3+8], Limb0\n    setc    CarryB\n\n    add     Op1, 16\n    add     Op2, 16\n    add     Op3, 16\n\n  .Four:\n\n    test    SizeB, 4\n    bt      CarryD, 0\n    je      .Check\n\n    mov     Limb0, [Op1]\n    adc     Limb0, [Op2]\n    mov     [Op3], Limb0\n    mov     Limb0, [Op1+8]\n    adc     Limb0, [Op2+8]\n    mov     [Op3+8], Limb0\n    mov     Limb0, [Op1+16]\n    adc     Limb0, [Op2+16]\n    mov     [Op3+16], Limb0\n    mov     Limb0, [Op1+24]\n    adc     Limb0, [Op2+24]\n    mov     [Op3+24], Limb0\n\n    lea     Op1, [Op1+32]\n    lea     Op2, [Op2+32]\n    lea     Op3, [Op3+32]\n    jmp     .Check\n\n    ; main loop:\n    ; - 1.03-1.05 cycles per limb in L1D$\n    ; - 1.13-1.15 cycles per limb in L2D$\n    ; - 1.50-1.75 cycles per limb in L3D$\n    align   16\n  .Loop:\n\n    mov     Limb0, [Op1]\n    adc     Limb0, [Op2]\n    mov     [Op3], Limb0\n\n    ; do not delete!\n    ; this seemingly unreasoned AVX instruction optimizes the allocation of\n    ; read/write operations to ports 2, 3 & 7 (write always ending up\n    ; on port 7) which allows a sustained 2r1w execution per cycle\n    vpor    YMM0, YMM0, YMM0\n\n    mov     Limb0, [dword Op1+8]\n    adc     Limb0, [Op2+8]\n    mov     [Op3+8], Limb0\n    mov     Limb0, [Op1+16]\n    adc     Limb0, [Op2+16]\n    mov     [Op3+16], Limb0\n    mov     Limb0, [Op1+24]\n    adc     Limb0, [Op2+24]\n    mov     [Op3+24], Limb0\n    mov     Limb0, [Op1+32]\n    adc     Limb0, [Op2+32]\n    mov     [Op3+32], Limb0\n    mov     Limb0, [Op1+40]\n    adc     Limb0, [Op2+40]\n    mov     [Op3+40], Limb0\n    mov     Limb0, [Op1+48]\n    adc     Limb0, [Op2+48]\n    mov     [Op3+48], Limb0\n    mov     Limb0, [Op1+56]\n    adc     Limb0, [Op2+56]\n    mov     [Op3+56], Limb0\n\n    lea     Op1, [Op1+64]\n    lea     Op2, [Op2+64]\n    lea     Op3, [Op3+64]\n\n  .Check:\n\n    dec     Count\n    jne     .Loop\n\n  .Exit:\n\n    setc    CarryB              ; move total carry to RAX\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/addmul_1.asm",
    "content": ";  AMD64 mpn_addmul_1 optimised for Intel Broadwell.\n;\n;  Copyright 2015 Free Software Foundation, Inc.\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;\n;  mp_limb_t mpn_addmul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8        r9d\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list  rsi, rdi\n\n    TEXT\n    align   32\n    FRAME_PROC mpn_addmul_1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n\n    mov     r10, rcx\n    mov     rcx, rdx\n    mov     r8, rdx\n    shr     rcx, 3\n    and     r8, 7\n    mov     rdx, r10\n    lea     r10, [rel .1]\n    movsxd  r8, dword [r10+r8*4]\n    lea     r10, [r8+r10]\n    jmp     r10\n\n    align   8\n.1: dd      .2 - .1\n    dd      .7 - .1\n    dd     .10 - .1\n    dd      .3 - .1\n    dd      .4 - .1\n    dd      .5 - .1\n    dd      .6 - .1\n    dd     .19 - .1\n\n.2: mulx    r8, r10, [rsi]\n    lea     rsi, [rsi-8]\n    lea     rdi, [rdi-8]\n    lea     rcx, [rcx-1]\n    jmp     .13\n.3: mulx    rax, r9, [rsi]\n    lea     rsi, [rsi+16]\n    lea     rdi, [rdi-48]\n    jmp     .18\n.4: mulx    r8, r10, [rsi]\n    lea     rsi, [rsi+24]\n    lea     rdi, [rdi-40]\n    jmp     .17\n.5: mulx    rax, r9, [rsi]\n    lea     rsi, [rsi+32]\n    lea     rdi, [rdi-32]\n    jmp     .16\n.6: mulx    r8, r10, [rsi]\n    lea     rsi, [rsi+40]\n    lea     rdi, [rdi-24]\n    jmp     .15\n.7: mulx    rax, r9, [rsi]\n    jrcxz   .8\n    jmp     .12\n.8: add     r9, [rdi]\n    mov     [rdi], r9\n    adc     rax, rcx\n    EXIT_PROC reg_save_list\n\n.9: adox    r9, [rdi]\n    mov     [rdi], r9\n    adox    rax, rcx\n    adc     rax, rcx\n    END_PROC reg_save_list\n\n    nop\n    nop\n    nop\n    nop\n.10:mulx    r8, r10, [rsi]\n    lea     rsi, [rsi+8]\n    lea     rdi, [rdi+8]\n    mulx    rax, r9, [rsi]\n\n    align   32\n.11:adox    r10, [rdi-8]\n    adcx    r9, r8\n    mov     [rdi-8], r10\n    jrcxz   .9\n.12:mulx    r8, r10, [rsi+8]\n    adox    r9, [rdi]\n    lea     rcx, [rcx-1]\n    mov     [rdi], r9\n    adcx    r10, rax\n.13:mulx    rax, r9, [rsi+16]\n    adcx    r9, r8\n    adox    r10, [rdi+8]\n    mov     [rdi+8], r10\n.14:mulx    r8, r10, [rsi+24]\n    lea     rsi, [rsi+64]\n    adcx    r10, rax\n    adox    r9, [rdi+16]\n    mov     [rdi+16], r9\n.15:mulx    rax, r9, [rsi-32]\n    adox    r10, [rdi+24]\n    adcx    r9, r8\n    mov     [rdi+24], r10\n.16:mulx    r8, r10, [rsi-24]\n    adcx    r10, rax\n    adox    r9, [rdi+32]\n    mov     [rdi+32], r9\n.17:mulx    rax, r9, [rsi-16]\n    adox    r10, [rdi+40]\n    adcx    r9, r8\n    mov     [rdi+40], r10\n.18:adox    r9, [rdi+48]\n    mulx    r8, r10, [rsi-8]\n    mov     [rdi+48], r9\n    lea     rdi, [rdi+64]\n    adcx    r10, rax\n    mulx    rax, r9, [rsi]\n    jmp     .11\n.19:mulx    rax, r9, [rsi]\n    lea     rsi, [rsi-16]\n    lea     rdi, [rdi-16]\n    jmp     .14\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/addsub_n.asm",
    "content": ";  AMD64 mpn_addsub_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,r8) = (rsi,r8)+(rdx,r8)-(rcx,r8)\n;   rax = summed carry and borrow in range [ -1..1 ]\n\n; the main loop has been enhanced with the MPIR SuperOptimizer\n; the gain was roughly 4% execution speed for operands in LD1$\n;\n;  mp_limb_t mpn_addsub_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n; cycles per limb with all operands in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         1.6-1.7   1.7-1.85\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rsi, rdi, r12\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    rdi\n%define Src1P   rsi\n%define Src2P   rdx\n%define Src3P   rcx\n%define Size    r8\n\n%define Spills  eax\n%define Carry   al\n%define Borrow  ah\n\n%define Limb0   r9\n%define Limb1   r10\n%define Limb2   r11\n%define Limb3   r12\n\n    align   32\n    BITS    64\n\n    FRAME_PROC mpn_addsub_n, 0, reg_save_list\n\tmov\t\trdi, rcx\n\tmov\t\trsi, rdx\n\tmov\t\trdx, r8\n \tmov\t\trcx, r9\n\tmov\t\tr8, [rsp+stack_use+40]\n\n    sub     Src3P, 32\n    sub     ResP, 32\n\n    xor     Spills, Spills      ; clears carry & borrow\n\n    jmp     .Check\n\n    align   16\n.Loop:\n\n    ; do not delete!\n    ; this seemingly unreasoned AVX instruction optimizes the allocation of\n    ; read/write operations to ports 2, 3 & 7 (write allways ending up\n    ; on port 7) which allows a sustained 2r1w execution per cycle\n    vpor    ymm0, ymm0, ymm0\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P]\n    mov     Limb1, [Src1P+8]\n    mov     Limb2, [Src1P+16]\n    mov     Limb3, [Src1P+24]\n    lea     Src3P, [Src3P+32]\n    lea     ResP, [ResP+32]\n    adc     Limb0, [Src2P]\n    adc     Limb1, [Src2P+8]\n    adc     Limb2, [Src2P+16]\n    adc     Limb3, [Src2P+24]\n    setc    Carry\n\n    lea     Src2P, [Src2P+32]\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P]\n    sbb     Limb1, [Src3P+8]\n    lea     Src1P, [Src1P+32]\n    mov     [ResP], Limb0\n    sbb     Limb2, [Src3P+16]\n    mov     [ResP+8], Limb1\n    mov     [ResP+16], Limb2\n    sbb     Limb3, [Src3P+24]\n    setc    Borrow\n    mov     [ResP+24], Limb3\n\n    ; label @ $a (mod $10) seems ok from benchmark figures\n.Check:\n\n    sub     Size, 4\n    jnc     .Loop\n\n    add     Src3P, 32\n    add     ResP, 32\n\n    add     Size, 4\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P]\n    adc     Limb0, [Src2P]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P]\n    setc    Borrow\n    mov     [ResP], Limb0\n    dec     Size\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P+8]\n    adc     Limb0, [Src2P+8]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P+8]\n    setc    Borrow\n    mov     [ResP+8], Limb0\n    dec     Size\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P+16]\n    adc     Limb0, [Src2P+16]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P+16]\n    mov     [ResP+16], Limb0\n    setc    Borrow\n\n    ; label @ $2 (mod $10) ok from benchmark figures\n.Exit:\n\n    sub     Carry, Borrow\n    movsx   rax, Carry\n\tEND_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/and_n.asm",
    "content": ";  AVX mpn_and_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = (rsi,rcx) and (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.68 0.34-0.94 (depending on alignment)\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define SizeB   R9B\n%define Count   RAX\n%define CountD  EAX\n%define CountB   AL\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define QLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_and_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 0, 4, 8 & 12 immediately\n\n\t; the code density in the core loop is low - 5.18 byte per instruction\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpand   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpand   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpand   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpand   QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    add     Src1P, 128\n    add     Src2P, 128\n    add     ResP, 128\n\n  .Check:\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     SizeB, 2            ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpand   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpand   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpand   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     CountB, 2           ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    and     Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    and     Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    and     Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/andn_n.asm",
    "content": ";  AVX mpn_andn_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = (rsi,rcx) and not (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_andn_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src2P]\n    vpandn  QLimb0, QLimb0, [Src1P]\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src2P+32]\n    vpandn  QLimb0, QLimb0, [Src1P+32]\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src2P+64]\n    vpandn  QLimb0, QLimb0, [Src1P+64]\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src2P+96]\n    vpandn  QLimb0, QLimb0, [Src1P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src2P, [Src2P+Limb0]\n    lea     Src1P, [Src1P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src2P+64]\n    vpandn  QLimb0, QLimb0, [Src1P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src2P+32]\n    vpandn  QLimb0, QLimb0, [Src1P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src2P]\n    vpandn  QLimb0, QLimb0, [Src1P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src2P, Limb0\n    add     Src1P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src2P+16]\n    andn    Limb0, Limb0, [Src1P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src2P+8]\n    andn    Limb0, Limb0, [Src1P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src2P]\n    andn    Limb0, Limb0, [Src1P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/ior_n.asm",
    "content": ";  AVX mpn_ior_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = (rsi,rcx) ior (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_ior_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpor    QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    or      Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    or      Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    or      Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/iorn_n.asm",
    "content": ";  AVX mpn_iorn_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = (rsi,rcx) and not (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define\tQLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_iorn_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    vpcmpeqq QLimb1, QLimb1, QLimb1\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vpxor   QLimb0, QLimb1, [Src2P]\n    vpor    QLimb0, QLimb0, [Src1P]\n    vmovdqu [ResP], QLimb0\n    vpxor   QLimb0, QLimb1, [Src2P+32]\n    vpor    QLimb0, QLimb0, [Src1P+32]\n    vmovdqu [ResP+32], QLimb0\n    vpxor   QLimb0, QLimb1, [Src2P+64]\n    vpor    QLimb0, QLimb0, [Src1P+64]\n    vmovdqu [ResP+64], QLimb0\n    vpxor   QLimb0, QLimb1, [Src2P+96]\n    vpor    QLimb0, QLimb0, [Src1P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src2P, [Src2P+Limb0]\n    lea     Src1P, [Src1P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src2P+64]\n    vpor    QLimb0, QLimb0, [Src1P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src2P+32]\n    vpor    QLimb0, QLimb0, [Src1P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src2P]\n    vpor    QLimb0, QLimb0, [Src1P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src2P, Limb0\n    add     Src1P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src2P+16]\n    not     Limb0\n    or      Limb0, [Src1P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src2P+8]\n    not     Limb0\n    or      Limb0, [Src1P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src2P]\n    not     Limb0\n    or      Limb0, [Src1P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/lshift.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_lshift(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1, unsigned int Shift)\n; Linux     RAX        RDI         RSI            RDX              RCX\n; Win7      RAX        RCX         RDX            R8               R9\n;\n; Description:\n; The function shifts Op1 left by n bit, stores the result in Op2 (non-\n; destructive shl) and hands back the shifted-out most significant bits of Op1.\n; The function operates decreasing in memory supporting in-place operation.\n;\n; Result:\n; - Op2[ Size1-1..0 ] := ( Op1[ Size1-1..0 ]:ShlIn ) << 1\n; - Op1[ 0 ] >> 63\n;\n; Caveats:\n; - caller must ensure that Shift is in [ 1..63 ]!\n; - currently Linux64 support only!\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - implemented, tested and benched on 31.03.2016 by jn\n; - includes prefetching\n; ============================================================================\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2         R11\n    %define Op1         RDX\n    %define Size1       R8\n    %define Shift       RCX\n    %define Limb1       R9\n    %define Limb2       R10\n  %ifdef USE_PREFETCH\n    %define Offs        -512    ; No caller-saves regs left, use immediate\n  %endif\n    %define reg_save_list XMM, 6, 7\n%else\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Shift       RCX\n    %define Limb1       R8\n    %define Limb2       R9\n  %ifdef USE_PREFETCH\n    %define OFFS_REG 1\n    %define Offs        R10\n  %endif\n%endif\n\n%define ShlDL0      XMM2    ; Attn: this must match ShlQL0 definition\n%define ShrDL0      XMM3    ; Attn: this must match ShrQL0 definition\n%define ShlDLCnt    XMM6    ; Attn: this must match ShlQlCnt definition\n%define ShrDLCnt    XMM7    ; Attn: this must match ShrQlCnt definition\n\n%define QLimb0      YMM0\n%define QLimb1      YMM1\n%define ShlQL0      YMM2\n%define ShrQL0      YMM3\n%define ShlQL1      YMM4\n%define ShrQL1      YMM5\n%define ShlQLCnt    YMM6\n%define ShrQLCnt    YMM7\n\n    align   32\nFRAME_PROC  mpn_lshift, 0, reg_save_list\n%ifdef USE_WIN64\n    mov     r11, rcx\n\tmov     rcx, r9\n%endif\n    xor     EAX, EAX\n    sub     Size1, 1\n    jc      .Exit               ; Size1=0 =>\n\n    lea     Op1, [Op1+8*Size1]\n    lea     Op2, [Op2+8*Size1]\n\n    mov     Limb1, [Op1]\n    shld    RAX, Limb1, CL\n\n    or      Size1, Size1\n    je      .lShlEquPost        ; Size1=1 =>\n\n  %ifdef USE_PREFETCH\n  %ifdef OFFS_REG\n    mov     Offs, -512\n  %endif\n  %endif\n\n    cmp     Size1, 8\n    jc      .lShlEquFour        ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    jne     .lShlEquA16\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op1, 8\n    sub     Op2, 8\n    sub     Size1, 1\n\n  .lShlEquA16:\n\n    test    Op2, 16\n    jne     .lShlEquAVX\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-16]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n    sub     Size1, 2\n\n  .lShlEquAVX:\n\n    ; initialize AVX shift counter\n    vmovq   ShlDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vmovq   ShrDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vpbroadcastq ShlQLCnt, ShlDLCnt\n    vpbroadcastq ShrQLCnt, ShrDLCnt\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1-24]\n    vpsrlvq ShrQL0, QLimb0, ShrQLCnt\n    vpermq  ShrQL0, ShrQL0, 10010011b\n\n    sub     Op1, 32\n    sub     Size1, 4\n    jmp     .lShlEquAVXCheck\n\n    ; main loop (prefetching enabled; unloaded cache)\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.70 cycles per limb in LD2$\n    ; - 0.70-0.90 cycles per limb in LD3$\n    align   16\n  .lShlEquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1-24]\n    vpsllvq   ShlQL0, QLimb0, ShlQLCnt\n    vmovdqu   QLimb0, [Op1-56]\n    vpsrlvq   ShrQL1, QLimb1, ShrQLCnt\n    vpermq    ShrQL1, ShrQL1, 10010011b\n    vpblendd  ShrQL0, ShrQL0, ShrQL1, 00000011b\n    vpor      ShlQL0, ShlQL0, ShrQL0\n    vpsllvq   ShlQL1, QLimb1, ShlQLCnt\n    vpsrlvq   ShrQL0, QLimb0, ShrQLCnt\n    vpermq    ShrQL0, ShrQL0, 10010011b\n    vpblendd  ShrQL1, ShrQL1, ShrQL0, 00000011b\n    vmovdqa   [Op2-24], ShlQL0\n    vpor      ShlQL1, ShlQL1, ShrQL1\n    vmovdqa   [Op2-56], ShlQL1\n\n    sub     Op1, 64\n    sub     Op2, 64\n\n  .lShlEquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShlEquAVXLoop\n\n    mov     Limb1, [Op1]\n    xor     Limb2, Limb2\n    shld    Limb2, Limb1, CL\n%if 1\n    vmovq   ShlDL0, Limb2\n    vpblendd ShrQL0, ShrQL0, ShlQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n    pinsrq  ShrDL0, Limb2, 0        ; SSE4.1\n%endif\n    vpsllvq ShlQL0, QLimb0, ShlQLCnt\n    vpor    ShlQL0, ShlQL0, ShrQL0\n    vmovdqa [Op2-24], ShlQL0\n\n    sub     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHLD mnemonic\n  .lShlEquFour:\n\n    sub     Op1, 8\n    test    Size1, 4\n    je      .lShlEquTwo\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-8], Limb2\n    mov     Limb2, [Op1-16]\n    shld    Limb1, Limb2, CL\n    mov     [Op2-16], Limb1\n    mov     Limb1, [Op1-24]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-24], Limb2\n\n    sub     Op1, 32\n    sub     Op2, 32\n\n  .lShlEquTwo:\n\n    test    Size1, 2\n    je      .lShlEquOne\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, CL\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lShlEquOne:\n\n    test    Size1, 1\n    je      .lShlEquPost\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op2, 8\n\n  .lShlEquPost:\n\n    shl    Limb1, CL\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    vzeroupper\nEND_PROC reg_save_list\n.end:"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/lshift1.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_lshift1(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1 )\n; Linux     RAX         RDI         RSI            RDX\n; Win7      RAX         RCX         RDX            R8\n;\n; Description:\n; The function shifts Op1 left by one bit, stores the result in Op2 (non-\n; destructive shl) and hands back the shifted-out most significant bit of Op1.\n; The function operates decreasing in memory supporting in-place operation.\n;\n; Caveats:\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - implemented, tested and benched on 21.02.2016 by jn\n; - includes cache prefetching\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n\n    %define Op2         RCX\n    %define Op1         RDX\n    %define Size1       R8\n    %define Limb1       R9\n    %define Limb2       R10\n    %define Offs        -512    ; used direct def. to stay in Win scratch regs\n\n    %define ShlDL0      XMM2    ; ATTN: this must match ShlQL0 definition\n    %define ShrDL0      XMM3    ; ATTN: this must match ShrQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShlQL0      YMM2\n    %define ShrQL0      YMM3\n    %define ShlQL1      YMM4\n    %define ShrQL1      YMM5\n\n%else\n\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Limb1       R8\n    %define Limb2       R9\n    %define Offs        -512    ; used direct def. to stay in Win scratch regs\n\n    %define ShlDL0      XMM2    ; ATTN: this must match ShlQL0 definition\n    %define ShrDL0      XMM3    ; ATTN: this must match ShrQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShlQL0      YMM2\n    %define ShrQL0      YMM3\n    %define ShlQL1      YMM4\n    %define ShrQL1      YMM5\n\n%endif\n\n    align   32\n\nLEAF_PROC mpn_lshift1\n\n    xor     EAX, EAX\n    sub      Size1, 1\n    jc      .Exit               ;ajs:notshortform ; Size1=0 =>\n\n    lea     Op1, [Op1+8*Size1]\n    lea     Op2, [Op2+8*Size1]\n\n    mov     Limb1, [Op1]\n    shld    RAX, Limb1, 1\n\n    or      Size1, Size1\n    je      .lShl1EquPost       ;ajs:notshortform ; Size1=1 =>\n\n    cmp     Size1, 8\n    jc      .lShl1EquFour       ;ajs:notshortform ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    jne     .lShl1EquA16\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op1, 8\n    sub     Op2, 8\n    sub     Size1, 1\n\n  .lShl1EquA16:\n\n    test    Op2, 16\n    jne     .lShl1EquAVX\n\n    mov     Limb2, [Op1-8]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-16]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n    sub     Size1, 2\n\n  .lShl1EquAVX:\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1-24]\n    vpsrlq  ShrQL0, QLimb0, 63\n    vpermq  ShrQL0, ShrQL0, 147\t\t; 0b10010011\n\n    sub     Op1, 32\n    sub     Size1, 4\n    jmp     .lShl1EquAVXCheck\n\n    ; main loop requires on entry:\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.75 cycles per limb in LD2$\n    ; - 0.75-1.00 cycles per limb in LD3$\n    align   16\n  .lShl1EquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1-24]\n    vpsllq    ShlQL0, QLimb0, 1\n    vmovdqu   QLimb0, [Op1-56]\n    vpsrlq    ShrQL1, QLimb1, 63\n    vpermq    ShrQL1, ShrQL1, 147\t; 0b10010011\n    vpblendd  ShrQL0, ShrQL0, ShrQL1, 3\t; 0b00000011\n    vpor      ShlQL0, ShlQL0, ShrQL0\n    vpsllq    ShlQL1, QLimb1, 1\n    vpsrlq    ShrQL0, QLimb0, 63\n    vpermq    ShrQL0, ShrQL0, 147\t; 0b10010011\n    vpblendd  ShrQL1, ShrQL1, ShrQL0, 3\t; 0b00000011\n    vmovdqa   [Op2-24], ShlQL0\n    vpor      ShlQL1, ShlQL1, ShrQL1\n    vmovdqa   [Op2-56], ShlQL1\n\n    sub     Op1, 64\n    sub     Op2, 64\n\n  .lShl1EquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShl1EquAVXLoop\n\n    mov     Limb2, [Op1]\n    mov     Limb1, Limb2\n    shr     Limb2, 63\n%if 1\n    vmovq ShlDL0, Limb2\n    vpblendd ShrQL0, ShrQL0, ShlQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n\n    ; Insert value of Limb2 into the 0-th qword of ShrDL0\n    pinsrq  ShrDL0, Limb2, 0        ; SSE4.1\n%endif\n    vpsllq  ShlQL0, QLimb0, 1\n    vpor    ShlQL0, ShlQL0, ShrQL0\n    vmovdqa [Op2-24], ShlQL0\n\n    sub     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHLD mnemonic\n  .lShl1EquFour:\n\n    sub     Op1, 8\n    test    Size1, 4\n    je      .lShl1EquTwo\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-8], Limb2\n    mov     Limb2, [Op1-16]\n    shld    Limb1, Limb2, 1\n    mov     [Op2-16], Limb1\n    mov     Limb1, [Op1-24]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-24], Limb2\n\n    sub     Op1, 32\n    sub     Op2, 32\n\n  .lShl1EquTwo:\n\n    test    Size1, 2\n    je      .lShl1EquOne\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1-8]\n    shld    Limb2, Limb1, 1\n    mov     [Op2-8], Limb2\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lShl1EquOne:\n\n    test    Size1, 1\n    je      .lShl1EquPost\n\n    mov     Limb2, [Op1]\n    shld    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    sub     Op2, 8\n\n  .lShl1EquPost:\n\n    shl     Limb1, 1\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/mul_1.asm",
    "content": ";  AMD64 mpn_mul_1\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rdx) = rcx*(rsi,rdx)\n;\trax = high word of product\n\n;  mp_limb_t  mpn_mul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_mul_1c(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx         r8\n;  rax                     rcx     rdx         r8         r9   [rsp+40]\n\n\n%include 'yasm_mac.inc'\n\n    BITS    64\n\n; the following register allocation scheme is valid for Linux\n\n    %define RP      RDI\n    %define S1P     RSI\n    %define Size    RCX\n    %define S2      RDX\n\n    %define MulLo0  R8\n    %define MulHi0  R9\n    %define MulLo1  R10\n    %define MulHi1  R11\n    %define MulLo2  R12\n    %define MulHi2  R13\n    %define MulLo3  R14\n    %define MulHi3  RBX\n\n%define reg_save_list rsi, rdi, rbx, r12, r13, r14\n\n    align   32\nFRAME_PROC mpn_mul_1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rcx, r8\n    mov     rdx, r9\n    mov     r8, [rsp + stack_use + 40]\n\n    xor     MulHi3, MulHi3\n\n    mov     RAX, Size           ; may be increased by 1 at the end\n    sub     Size, 4\n    jc      .Post               ; separate handling of remaining max. 3 limb =>\n\n    ; prepare a quadlimb for main-loop entry\n    mulx    MulHi0, MulLo0, [S1P]\n    mulx    MulHi1, MulLo1, [S1P+8]\n    mulx    MulHi2, MulLo2, [S1P+16]\n    mulx    MulHi3, MulLo3, [S1P+24]\n    add     S1P, 32\n    add     MulLo1, MulHi0\n    adc     MulLo2, MulHi1\n    adc     MulLo3, MulHi2\n    adc     MulHi3, 0\n\n    jmp     .Check              ; enter main loop =>\n\n    ; main loop (unloaded operands)\n    ; - 1.25      cycles per limb in L1D$\n    ; - 1.25      cycles per limb in L2D$\n    ; - 1.60-1.72 cycles per limb in L3D$\n    align   32\n  .Loop:\n\n    mov     [RP], MulLo0\n    mov     [RP+8], MulLo1\n    mov     [RP+16], MulLo2\n    mov     [RP+24], MulLo3\n    mulx    MulHi0, MulLo0, [S1P]\n    mulx    MulHi1, MulLo1, [S1P+8]\n    mulx    MulHi2, MulLo2, [S1P+16]\n    add     MulLo0, MulHi3\n    mov     [RP+32], MulLo0\n    adc     MulLo1, MulHi0\n    mov     [RP+40], MulLo1\n    adc     MulLo2, MulHi1\n    mov     [RP+48], MulLo2\n    mulx    MulHi3, MulLo3, [S1P+24]\n    mulx    MulHi0, MulLo0, [S1P+32]\n    mulx    MulHi1, MulLo1, [S1P+40]\n    adc     MulLo3, MulHi2\t; no carry-out here\n    adc     MulLo0, MulHi3\n    adc     MulLo1, MulHi0\n    mulx    MulHi2, MulLo2, [S1P+48]\n    adc     MulLo2, MulHi1\n    mov     [RP+56], MulLo3\n    mulx    MulHi3, MulLo3, [S1P+56]\n    adc     MulLo3, MulHi2\n    adc     MulHi3, 0\n\n    add     S1P, 64\n    add     RP, 64\n\n  .Check:\n\n    sub     Size, 8\n    jnc     .Loop\n\n    ; core loop roll-out 8 can generate dangling quad-limb\n    test    Size, 4\n    je      .Store              ; no dangling quad-limb =>\n\n    mov     [RP], MulLo0\n    mulx    MulHi0, MulLo0, [S1P]\n    mov     [RP+8], MulLo1\n    mulx    MulHi1, MulLo1, [S1P+8]\n    mov     [RP+16], MulLo2\n    mulx    MulHi2, MulLo2, [S1P+16]\n    add     MulLo0, MulHi3\n    mov     [RP+24], MulLo3\n    mulx    MulHi3, MulLo3, [S1P+24]\n    adc     MulLo1, MulHi0\n    adc     MulLo2, MulHi1\n    adc     MulLo3, MulHi2\n    adc     MulHi3, 0\n\n    add     S1P, 32\n    add     RP, 32\n\n    ; store remaining quad-limb from main loop\n  .Store:\n    mov     [RP], MulLo0\n    mov     [RP+8], MulLo1\n    mov     [RP+16], MulLo2\n    mov     [RP+24], MulLo3\n    add     RP, 32\n\n    ; handle final 0-3 single limb of S1P\n  .Post:\n\n    and     Size, 3\n    je      .Post0\n\n    cmp     Size, 2\n    ja      .Post3\n    je      .Post2\n\n  .Post1:\n\n    mulx    MulHi0, MulLo0, [S1P]\n    add     MulLo0, MulHi3\n    adc     MulHi0, 0\n    mov     [RP], MulLo0\n    mov     rax, MulHi0\n    jmp     .Exit\n\n  .Post2:\n\n    mulx    MulHi0, MulLo0, [S1P]\n    mulx    MulHi1, MulLo1, [S1P+8]\n    add     MulLo0, MulHi3\n    adc     MulLo1, MulHi0\n    adc     MulHi1, 0\n    mov     [RP], MulLo0\n    mov     [RP+8], MulLo1\n    mov     rax, MulHi1\n    jmp     .Exit\n\n  .Post3:\n\n    mulx    MulHi0, MulLo0, [S1P]\n    mulx    MulHi1, MulLo1, [S1P+8]\n    mulx    MulHi2, MulLo2, [S1P+16]\n    add     MulLo0, MulHi3\n    adc     MulLo1, MulHi0\n    adc     MulLo2, MulHi1\n    adc     MulHi2, 0\n    mov     [RP], MulLo0\n    mov     [RP+8], MulLo1\n    mov     [RP+16], MulLo2\n    mov     rax, MulHi2\n    jmp     .Exit\n\n  .Post0:\n\n    mov     rax, MulHi3\n\n  .Exit:\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/mul_basecase.asm",
    "content": ";  AMD64 mpn_mul_basecase optimised for Intel Broadwell.\n;\n;  Copyright 2015 Free Software Foundation, Inc.\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40]\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rsi, rdi, rbx, rbp, r12, r14\n\n    BITS 64\n\talign\t16\n\n\tLEAF_PROC mpn_mul_basecase\n\tcmp     r8, 2\n\tja      .4\n    mov     r8, rdx\n\tmov     rdx, [r9]\n\tmulx    r11, rax, [r8]\n    mov     [rcx], rax\n\tje      .1\n.0:\tmov     [rcx+8], r11\n\tret\n.1:\tcmp     qword [rsp+40], 2\n\tmulx    r10, rax, [r8+8]\n\tje      .3\n.2:\tadd     r11, rax\n\tadc     r10, 0\n\tmov     [rcx+8], r11\n\tmov     [rcx+16], r10\n\tret\n.3:\tadd     r11, rax\n\tadc     r10, 0\n\tmov     rdx, [r9+8]\n    mov     rax, r8\n\tmulx    r9, r8, [rax]\n\tmulx    rdx, rax, [rax+8]\n\tadd     rax, r9\n\tadc     rdx, 0\n\tadd     r11, r8\n\tadc     r10, rax\n\tadc     rdx, 0\n\tmov     [rcx+8], r11\n\tmov     [rcx+16], r10\n\tmov     [rcx+24], rdx\n\tret\n\n\talign\t16\n.4:\n    FRAME_PROC mpn_mul_bc, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n    mov     r8, [rsp+stack_use+40]\n\n\tmov     r14, rcx\n\tlea     rbx, [rdx+1]\n\tmov     rbp, rdx\n\tmov     eax, edx\n\tand     rbx, -8\n\tshr     rbp, 3\n\tneg     rbx\n\tand     eax, 7\n\tmov     rcx, rbp\n\tmov     rdx, [r14]\n\tlea     r14, [r14+8]\n\tlea     r10, [rel .41]\n\tmovsxd  r11, dword [r10+rax*4]\n\tlea     r10, [r11+r10]\n\tjmp     r10\n.5:\tmulx    r11, r10, [rsi]\n\tlea     rsi, [rsi+56]\n\tlea     rdi, [rdi-8]\n\tjmp     .15\n.6:\tmulx    r9, r12, [rsi]\n\tlea     rsi, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tinc     rcx\n\tjmp     .20\n.7:\tmulx    r11, r10, [rsi]\n\tlea     rsi, [rsi+24]\n\tlea     rdi, [rdi+24]\n\tinc     rcx\n\tjmp     .19\n.8:\tmulx    r9, r12, [rsi]\n\tlea     rsi, [rsi+32]\n\tlea     rdi, [rdi+32]\n\tinc     rcx\n\tjmp     .18\n.9:\tmulx    r11, r10, [rsi]\n\tlea     rsi, [rsi+40]\n\tlea     rdi, [rdi+40]\n\tinc     rcx\n\tjmp     .17\n.10:mulx    r9, r12, [rsi]\n\tlea     rsi, [rsi+48]\n\tlea     rdi, [rdi+48]\n\tinc     rcx\n\tjmp     .16\n.11:mulx    r9, r12, [rsi]\n\tjmp     .14\n.12:mulx    r11, r10, [rsi]\n\tlea     rsi, [rsi+8]\n\tlea     rdi, [rdi+8]\n\tmulx    r9, r12, [rsi]\n\t\n\talign\t16\n.13:mov     [rdi-8], r10\n\tadc     r12, r11\n.14:mulx    r11, r10, [rsi+8]\n\tadc     r10, r9\n\tlea     rsi, [rsi+64]\n\tmov     [rdi], r12\n.15:mov     [rdi+8], r10\n\tmulx    r9, r12, [rsi-48]\n\tlea     rdi, [rdi+64]\n\tadc     r12, r11\n.16:mulx    r11, r10, [rsi-40]\n\tmov     [rdi-48], r12\n\tadc     r10, r9\n.17:mov     [rdi-40], r10\n\tmulx    r9, r12, [rsi-32]\n\tadc     r12, r11\n.18:mulx    r11, r10, [rsi-24]\n\tmov     [rdi-32], r12\n\tadc     r10, r9\n.19:mulx    r9, r12, [rsi-16]\n\tmov     [rdi-24], r10\n\tadc     r12, r11\n.20:mulx    r11, r10, [rsi-8]\n\tadc     r10, r9\n\tmov     [rdi-16], r12\n\tdec     rcx\n\tmulx    r9, r12, [rsi]\n\tjnz     .13\n.21:mov     [rdi-8], r10\n\tadc     r12, r11\n\tmov     [rdi], r12\n\tadc     r9, rcx\n\tmov     [rdi+8], r9\n\tdec     r8\n\tjz      .31\n\tlea     r10, [rel .42]\n\tmovsxd  rax, dword [r10+rax*4]\n\tlea     rax, [rax+r10]\n.22:lea     rsi, [rsi+rbx*8]\n\tmov     rcx, rbp\n\tmov     rdx, [r14]\n\tlea     r14, [r14+8]\n\tjmp     rax\n.23:mulx    r11, r10, [rsi+8]\n\tlea     rdi, [rdi+rbx*8+8]\n\tlea     rcx, [rcx-1]\n\tjmp     .35\n.24:mulx    r9, r12, [rsi-16]\n\tlea     rdi, [rdi+rbx*8-56]\n\tjmp     .40\n.25:mulx    r11, r10, [rsi-24]\n\tlea     rdi, [rdi+rbx*8-56]\n\tjmp     .39\n.26:mulx    r9, r12, [rsi-32]\n\tlea     rdi, [rdi+rbx*8-56]\n\tjmp     .38\n.27:mulx    r11, r10, [rsi-40]\n\tlea     rdi, [rdi+rbx*8-56]\n\tjmp     .37\n.28:mulx    r9, r12, [rsi+16]\n\tlea     rdi, [rdi+rbx*8+8]\n\tjmp     .36\n.29:mulx    r9, r12, [rsi]\n\tlea     rdi, [rdi+rbx*8+8]\n\tjmp     .34\n.30:adox    r12, [rdi]\n\tadox    r9, rcx\n\tmov     [rdi], r12\n\tadc     r9, rcx\n\tmov     [rdi+8], r9\n\tdec     r8\n\tjnz     .22\n.31:\n    END_PROC reg_save_list\n\n.32:mulx    r11, r10, [rsi-8]\n\tlea     rdi, [rdi+rbx*8+8]\n\tmulx    r9, r12, [rsi]\n\n\talign\t16\n.33:adox    r10, [rdi-8]\n\tadcx    r12, r11\n\tmov     [rdi-8], r10\n\tjrcxz   .30\n.34:mulx    r11, r10, [rsi+8]\n\tadox    r12, [rdi]\n\tlea     rcx, [rcx-1]\n\tmov     [rdi], r12\n\tadcx    r10, r9\n.35:mulx    r9, r12, [rsi+16]\n\tadcx    r12, r11\n\tadox    r10, [rdi+8]\n\tmov     [rdi+8], r10\n.36:mulx    r11, r10, [rsi+24]\n\tlea     rsi, [rsi+64]\n\tadcx    r10, r9\n\tadox    r12, [rdi+16]\n\tmov     [rdi+16], r12\n.37:mulx    r9, r12, [rsi-32]\n\tadox    r10, [rdi+24]\n\tadcx    r12, r11\n\tmov     [rdi+24], r10\n.38:mulx    r11, r10, [rsi-24]\n\tadcx    r10, r9\n\tadox    r12, [rdi+32]\n\tmov     [rdi+32], r12\n.39:mulx    r9, r12, [rsi-16]\n\tadox    r10, [rdi+40]\n\tadcx    r12, r11\n\tmov     [rdi+40], r10\n.40:adox    r12, [rdi+48]\n\tmulx    r11, r10, [rsi-8]\n\tmov     [rdi+48], r12\n\tlea     rdi, [rdi+64]\n\tadcx    r10, r9\n\tmulx    r9, r12, [rsi]\n\tjmp     .33\n\n\talign\t8\n.41:\n\tdd      .5 - .41\n\tdd     .11 - .41\n\tdd     .12 - .41\n\tdd      .6 - .41\n\tdd      .7 - .41\n\tdd      .8 - .41\n\tdd      .9 - .41\n\tdd     .10 - .41\n.42:\n\tdd     .23 - .42\n\tdd     .29 - .42\n\tdd     .32 - .42\n\tdd     .24 - .42\n\tdd     .25 - .42\n\tdd     .26 - .42\n\tdd     .27 - .42\n\tdd     .28 - .42\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/nand_n.asm",
    "content": ";  AVX mpn_nand_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = not( (rsi,rcx) and (rdx,rcx) )\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.34-0.35 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define QLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_nand_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    vpcmpeqq QLimb1, QLimb1, QLimb1\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpand   QLimb0, QLimb0, [Src2P]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpand   QLimb0, QLimb0, [Src2P+32]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpand   QLimb0, QLimb0, [Src2P+64]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpand   QLimb0, QLimb0, [Src2P+96]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpand   QLimb0, QLimb0, [Src2P+64]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpand   QLimb0, QLimb0, [Src2P+32]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpand   QLimb0, QLimb0, [Src2P]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    and     Limb0, [Src2P+16]\n    not     Limb0\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    and     Limb0, [Src2P+8]\n    not     Limb0\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    and     Limb0, [Src2P]\n    not     Limb0\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/nior_n.asm",
    "content": ";  AVX mpn_nior_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = not( (rsi,rcx) or (rdx,rcx) )\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.34-0.35 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define QLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_nior_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    vpcmpeqq QLimb1, QLimb1, QLimb1\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpor    QLimb0, QLimb0, [Src2P+96]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpor    QLimb0, QLimb0, [Src2P+64]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpor    QLimb0, QLimb0, [Src2P+32]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpor    QLimb0, QLimb0, [Src2P]\n    vpxor   QLimb0, QLimb0, QLimb1\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    or      Limb0, [Src2P+16]\n    not     Limb0\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    or      Limb0, [Src2P+8]\n    not     Limb0\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    or      Limb0, [Src2P]\n    not     Limb0\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/rshift.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_rshift(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1, unsigned int Shift)\n; Linux       RAX         RDI            RSI            RDX                 RCX\n; Windows x64 RAX         RCX            RDX             R8                  R9\n;\n; Description:\n; The function shifts Op1 right by Shift bits, stores the result in Op2 (non-\n; destructive shr) and hands back the shifted-out least significant bits of\n; Op1. The function operates increasing in memory supporting in place shifts.\n;\n; Result:\n; - Op2[ Size1-1..0 ] := ( ShrIn:Op1[ Size1-1..0 ] ) >> Shift\n; - Op1[ 0 ] << ( 64-Shift )\n;\n; Caveats:\n; - caller must ensure that Shift is in [ 1..63 ]!\n; - currently Linux64 support only!\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - implemented, tested and benchmarked on 30.03.2016 by jn\n; - includes prefetching\n; ============================================================================\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2         R11\n    %define Op1         RDX\n    %define Size1       R8\n    %define Shift       RCX\n    %define Limb1       R9\n    %define Limb2       R10\n  %ifdef USE_PREFETCH\n    %define Offs        -512    ; No caller-saves regs left, use immediate\n  %endif\n    %define reg_save_list XMM, 6, 7\n%else\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Shift       RCX\n    %define Limb1       R8\n    %define Limb2       R9\n  %ifdef USE_PREFETCH\n    %define OFFS_REG 1\n    %define Offs        R10\n  %endif\n%endif\n\n%define ShrDL0      XMM2    ; Attn: this must match ShrQL0 definition\n%define ShlDL0      XMM3    ; Attn: this must match ShlQL0 definition\n%define ShrDLCnt    XMM6    ; Attn: this must match ShrQlCnt definition\n%define ShlDLCnt    XMM7    ; Attn: this must match ShlQlCnt definition\n\n%define QLimb0      YMM0\n%define QLimb1      YMM1\n%define ShrQL0      YMM2\n%define ShlQL0      YMM3\n%define ShrQL1      YMM4\n%define ShlQL1      YMM5\n%define ShrQLCnt    YMM6\n%define ShlQLCnt    YMM7\n\n    align   32\n\nFRAME_PROC mpn_rshift, 0, reg_save_list\n%ifdef USE_WIN64\n    mov     r11, rcx\n\tmov     rcx, r9\n%endif\n    xor     EAX, EAX\n    or      Size1, Size1\n    je      .Exit\n\n    mov     Limb1, [Op1]\n    shrd    RAX, Limb1, CL\n\n    sub     Size1, 1\n    je      .lShrEquPost        ; Size1=1 =>\n\n  %ifdef USE_PREFETCH\n    mov     Offs, 512\n  %endif\n\n    cmp     Size1, 8\n    jc      .lShrEquFour        ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    je      .lShrEquAlign16\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op1, 8\n    add     Op2, 8\n    sub     Size1, 1\n\n  .lShrEquAlign16:\n\n    test    Op2, 16\n    je      .lShrEquAVX\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+16]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n    sub     Size1, 2\n\n  .lShrEquAVX:\n\n    ; initialize AVX shift counter\n    vmovq   ShrDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vmovq   ShlDLCnt, RCX\n    neg     RCX\n    and     RCX, 63             ; must do, as AVX shifts set result=0 if Shift>63!\n    vpbroadcastq ShrQLCnt, ShrDLCnt\n    vpbroadcastq ShlQLCnt, ShlDLCnt\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1]\n    vpsllvq ShlQL0, QLimb0, ShlQLCnt\n\n    add     Op1, 32\n    sub     Size1, 4\n    jmp     .lShrEquAVXCheck\n\n    ; main loop (prefetching enabled, unloaded data cache)\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.70 cycles per limb in LD2$\n    ; - 0.70-0.90 cycles per limb in LD3$\n    align   16\n  .lShrEquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1]\n    vpsrlvq   ShrQL0, QLimb0, ShrQLCnt\n    vmovdqu   QLimb0, [Op1+32]\n    vpsllvq   ShlQL1, QLimb1, ShlQLCnt\n    vpblendd  ShlQL0, ShlQL0, ShlQL1, 00000011b\n    vpermq    ShlQL0, ShlQL0, 00111001b\n    vpor      ShrQL0, ShrQL0, ShlQL0\n    vpsrlvq   ShrQL1, QLimb1, ShrQLCnt\n    vpsllvq   ShlQL0, QLimb0, ShlQLCnt\n    vpblendd  ShlQL1, ShlQL1, ShlQL0, 00000011b\n    vpermq    ShlQL1, ShlQL1, 00111001b\n    vmovdqa   [Op2], ShrQL0\n    vpor      ShrQL1, ShrQL1, ShlQL1\n    vmovdqa   [Op2+32], ShrQL1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .lShrEquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShrEquAVXLoop\n\n    mov     Limb1, [Op1]\n    xor     Limb2, Limb2\n    shrd    Limb2, Limb1, CL\n%if 1\n    vmovq   ShrDL0, Limb2\n    vpblendd ShlQL0, ShlQL0, ShrQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n    pinsrq  ShlDL0, Limb2, 0            ; SSE4.1\n%endif\n    vpsrlvq ShrQL0, QLimb0, ShrQLCnt\n    vpermq  ShlQL0, ShlQL0, 00111001b\n    vpor    ShrQL0, ShrQL0, ShlQL0\n    vmovdqa [Op2], ShrQL0\n\n    add     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHRD mnemonic\n  .lShrEquFour:\n\n    add     Op1, 8\n    test    Size1, 4\n    je      .lShrEquTwo\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+8], Limb2\n    mov     Limb2, [Op1+16]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2+16], Limb1\n    mov     Limb1, [Op1+24]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+24], Limb2\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .lShrEquTwo:\n\n    test    Size1, 2\n    je      .lShrEquOne\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, CL\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lShrEquOne:\n\n    test    Size1, 1\n    je      .lShrEquPost\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, CL\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op2, 8\n\n    ; store most significant limb considering shift-in part\n  .lShrEquPost:\n\n    shr     Limb1, CL\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    vzeroupper\nEND_PROC reg_save_list\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/rshift1.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t mpn_rshift1(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1 )\n; Linux     RAX         RDI         RSI            RDX\n; Win7      RAX         RCX         RDX            R8\n;\n; Description:\n; The function shifts Op1 right by one bit, stores the result in Op2 (non-\n; destructive shr) and hands back the shifted-out least significant bit of Op1.\n; The function operates increasing in memory supporting in place shifts.\n;\n; Caveats:\n; - the AVX version uses mnemonics only available on Haswell, Broadwell and\n;   Skylake cores\n; - the behaviour of cache prefetching in combination with AVX shifting seems\n;   somewhat erratic\n;    - slight (a few clock cycles) degradation for 1/2 LD1$ sizes\n;    - slight (a few percent) improvement for full LD1$ sizes\n;    - substantial (>10%) improvement for 1/2 LD2$ sizes\n;    - slight (a few percent) improvement for full LD2$ sizes\n;    - slight (a few percent) degradation for 1/2 LD3$ sizes\n;    - substantial (around 10%) degradation for full LD3$ sizes\n;\n; Comments:\n; - AVX based version implemented, tested & benched on 21.02.2016 by jn\n; - includes cache prefetching\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n\n    %define Op2         RCX\n    %define Op1         RDX\n    %define Size1       R8\n    %define Limb1       R9\n    %define Limb2       R10\n    %define Offs        512     ; used direct def. to stay in Win scratch regs\n\n    %define ShrDL0      XMM2    ; Attn: this must match ShrQL0 definition\n    %define ShlDL0      XMM3    ; Attn: this must match ShlQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShrQL0      YMM2\n    %define ShlQL0      YMM3\n    %define ShrQL1      YMM4\n    %define ShlQL1      YMM5\n\n%else\n\n    %define Op2         RDI\n    %define Op1         RSI\n    %define Size1       RDX\n    %define Limb1       R8\n    %define Limb2       R9\n    %define Offs        512     ; used direct def. to stay in Win scratch regs\n\n    %define ShrDL0      XMM2    ; Attn: this must match ShrQL0 definition\n    %define ShlDL0      XMM3    ; Attn: this must match ShlQL0 definition\n\n    %define QLimb0      YMM0\n    %define QLimb1      YMM1\n    %define ShrQL0      YMM2\n    %define ShlQL0      YMM3\n    %define ShrQL1      YMM4\n    %define ShlQL1      YMM5\n\n%endif\n\n    align   32\n\nLEAF_PROC mpn_rshift1\n\n    xor     EAX, EAX\n    or      Size1, Size1\n    je      .Exit\n\n    mov     RAX, [Op1]\n    mov     Limb1, RAX\n    shl     RAX, 63\n\n    sub     Size1, 1\n    je      .lShr1EquPost       ; Size1=1 =>\n\n    cmp     Size1, 8\n    jc      .lShr1EquFour       ; AVX inefficient =>\n\n    ; first align Op2 to 32 bytes\n    test    Op2, 8\n    je      .lShr1EquAlign16\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op1, 8\n    add     Op2, 8\n    sub     Size1, 1\n\n  .lShr1EquAlign16:\n\n    test    Op2, 16\n    je      .lShr1EquAVX\n\n    mov     Limb2, [Op1+8]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+16]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n    sub     Size1, 2\n\n  .lShr1EquAVX:\n\n    ; pre-fetch first quad-limb\n    vmovdqu QLimb0, [Op1]\n    vpsllq  ShlQL0, QLimb0, 63\n\n    add     Op1, 32\n    sub     Size1, 4\n    jmp     .lShr1EquAVXCheck\n\n    ; main loop (prefetching enabled, unloaded data cache)\n    ; - 0.60      cycles per limb in LD1$\n    ; - 0.60-0.75 cycles per limb in LD2$\n    ; - 0.75-1.00 cycles per limb in LD3$\n    align   16\n  .lShr1EquAVXLoop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n  %endif\n\n    vmovdqu   QLimb1, [Op1]\n    vpsrlq    ShrQL0, QLimb0, 1\n    vmovdqu   QLimb0, [Op1+32]\n    vpsllq    ShlQL1, QLimb1, 63\n    vpblendd  ShlQL0, ShlQL0, ShlQL1, 00000011b\n    vpermq    ShlQL0, ShlQL0, 00111001b\n    vpor      ShrQL0, ShrQL0, ShlQL0\n    vpsrlq    ShrQL1, QLimb1, 1\n    vpsllq    ShlQL0, QLimb0, 63\n    vpblendd  ShlQL1, ShlQL1, ShlQL0, 00000011b\n    vpermq    ShlQL1, ShlQL1, 00111001b\n    vmovdqa   [Op2], ShrQL0\n    vpor      ShrQL1, ShrQL1, ShlQL1\n    vmovdqa   [Op2+32], ShrQL1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .lShr1EquAVXCheck:\n\n    sub     Size1, 8\n    jnc     .lShr1EquAVXLoop\n\n    mov     Limb2, [Op1]\n    mov     Limb1, Limb2\n    shl     Limb2, 63\n%if 1\n    vmovq ShrDL0, Limb2\n    vpblendd ShlQL0, ShlQL0, ShrQL0, 3\n%else\n    ; I am mixing in a single SSE4.1 instruction into otherwise pure AVX2\n    ; this is generating stalls on Haswell & Broadwell architecture (Agner Fog)\n    ; but it is only executed once and there is no AVX2 based alternative\n    pinsrq  ShlDL0, Limb2, 0            ; SSE4.1\n%endif\n    vpsrlq  ShrQL0, QLimb0, 1\n    vpermq  ShlQL0, ShlQL0, 00111001b\n    vpor    ShrQL0, ShrQL0, ShlQL0\n    vmovdqa [Op2], ShrQL0\n\n    add     Op2, 32\n    add     Size1, 8\n\n    ; shift remaining max. 7 limbs with SHRD mnemonic\n  .lShr1EquFour:\n\n    add     Op1, 8\n    test    Size1, 4\n    je      .lShr1EquTwo\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+8], Limb2\n    mov     Limb2, [Op1+16]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2+16], Limb1\n    mov     Limb1, [Op1+24]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+24], Limb2\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .lShr1EquTwo:\n\n    test    Size1, 2\n    je      .lShr1EquOne\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, [Op1+8]\n    shrd    Limb2, Limb1, 1\n    mov     [Op2+8], Limb2\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lShr1EquOne:\n\n    test    Size1, 1\n    je      .lShr1EquPost\n\n    mov     Limb2, [Op1]\n    shrd    Limb1, Limb2, 1\n    mov     [Op2], Limb1\n    mov     Limb1, Limb2\n\n    add     Op2, 8\n\n  .lShr1EquPost:\n\n    shr     Limb1, 1\n    mov     [Op2], Limb1\n\n  .Exit:\n\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/sqr_basecase.asm",
    "content": ";  AMD64 mpn_sqr_basecase optimised for Intel Broadwell.\n\n;  Copyright 2015 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;\n; void mpn_sqr_basecase(mp_ptr, mp_srcptr, mp_size_t)\n; Linux                    rdi        rsi        rdx\n; Win64                    rcx        rdx         r8\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rsi, rdi, rbx\n\n\tTEXT\n\talign\t16\n    LEAF_PROC mpn_sqr_basecase\n\tcmp     r8, 2\n\tjae     .1\n\tmov     rdx, [rdx]\n\tmulx    rdx, rax, rdx\n\tmov     [rcx], rax\n\tmov     [rcx+8], rdx\n\tret\n.1:\tjne     .2\n\tmov     r11, [rdx+8]\n\tmov     rdx, [rdx]\n\tmulx    r10, r9, r11\n\tmulx    r8, rax, rdx\n\tmov     rdx, r11\n\tmulx    rdx, r11, rdx\n\tadd     r9, r9\n\tadc     r10, r10\n\tadc     rdx, 0\n\tadd     r8, r9\n\tadc     r10, r11\n\tadc     rdx, 0\n\tmov     [rcx], rax\n\tmov     [rcx+8], r8\n\tmov     [rcx+16], r10\n\tmov     [rcx+24], rdx\n\tret\n.2:\t\n    FRAME_PROC ?mpn_sqb, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n\n    cmp     rdx, 4\n\tjae     .3\n\tmov     rdx, [rsi]\n\tmulx    r11, r10, [rsi+8]\n\tmulx    r9, r8, [rsi+16]\n\tadd     r8, r11\n\tmov     rdx, [rsi+8]\n\tmulx    r11, rax, [rsi+16]\n\tadc     r9, rax\n\tadc     r11, 0\n\ttest    ebx, ebx\n\tmov     rdx, [rsi]\n\tmulx    rcx, rbx, rdx\n\tmov     [rdi], rbx\n\tmov     rdx, [rsi+8]\n\tmulx    rbx, rax, rdx\n\tmov     rdx, [rsi+16]\n\tmulx    rdx, rsi, rdx\n\tadcx    r10, r10\n\tadcx    r8, r8\n\tadcx    r9, r9\n\tadcx    r11, r11\n\tadox    rcx, r10\n\tadox    rax, r8\n\tadox    rbx, r9\n\tadox    rsi, r11\n\tmov     r8d, 0\n\tadox    rdx, r8\n\tadcx    rdx, r8\n\tmov     [rdi+8], rcx\n\tmov     [rdi+16], rax\n\tmov     [rdi+24], rbx\n\tmov     [rdi+32], rsi\n\tmov     [rdi+40], rdx\n\tEXIT_PROC reg_save_list\n\n.3:\tmov     [rsp+stack_use+8], rdi\n    mov     [rsp+stack_use+16], rsi\n    mov     [rsp+stack_use+24], rdx\n\tlea     ebx, [rdx-3]\n\tlea     rcx, [rdx+5]\n\tmov     eax, edx\n\tand     ebx, -8\n\tshr     ecx, 3\n\tneg     rbx\n\tand     eax, 7\n\tmov     rdx, [rsi]\n\tlea     r10, [rel .58]\n\tmovsxd  r8, dword [r10+rax*4]\n\tlea     r10, [r8+r10]\n\tjmp     r10\n.4:\tmulx    r11, r10, [rsi+8]\n\tlea     rsi, [rsi+64]\n\tjmp     .14\n.5:\tmulx    r9, r8, [rsi+8]\n\tlea     rsi, [rsi+24]\n\tlea     rdi, [rdi+24]\n\tjmp     .19\n.6:\tmulx    r11, r10, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tlea     rdi, [rdi+32]\n\tjmp     .18\n.7:\tmulx    r9, r8, [rsi+8]\n\tlea     rsi, [rsi+40]\n\tlea     rdi, [rdi+40]\n\tjmp     .17\n.8:\tmulx    r11, r10, [rsi+8]\n\tlea     rsi, [rsi+48]\n\tlea     rdi, [rdi+48]\n\tjmp     .16\n.9:\tmulx    r9, r8, [rsi+8]\n\tlea     rsi, [rsi+56]\n\tlea     rdi, [rdi+56]\n\tjmp     .15\n.10:mulx    r9, r8, [rsi+8]\n\tlea     rsi, [rsi+8]\n\tlea     rdi, [rdi+8]\n\tjmp     .13\n.11:mulx    r11, r10, [rsi+8]\n\tlea     rsi, [rsi+16]\n\tlea     rdi, [rdi+16]\n\tdec     ecx\n\tmulx    r9, r8, [rsi]\n\talign\t16\n.12:mov     [rdi-8], r10\n\tadc     r8, r11\n.13:mulx    r11, r10, [rsi+8]\n\tadc     r10, r9\n\tlea     rsi, [rsi+64]\n\tmov     [rdi], r8\n.14:mov     [rdi+8], r10\n\tmulx    r9, r8, [rsi-48]\n\tlea     rdi, [rdi+64]\n\tadc     r8, r11\n.15:mulx    r11, r10, [rsi-40]\n\tmov     [rdi-48], r8\n\tadc     r10, r9\n.16:mov     [rdi-40], r10\n\tmulx    r9, r8, [rsi-32]\n\tadc     r8, r11\n.17:mulx    r11, r10, [rsi-24]\n\tmov     [rdi-32], r8\n\tadc     r10, r9\n.18:mulx    r9, r8, [rsi-16]\n\tmov     [rdi-24], r10\n\tadc     r8, r11\n.19:mulx    r11, r10, [rsi-8]\n\tadc     r10, r9\n\tmov     [rdi-16], r8\n\tdec     ecx\n\tmulx    r9, r8, [rsi]\n\tjnz     .12\n.20:mov     [rdi-8], r10\n\tadc     r8, r11\n\tmov     [rdi], r8\n\tadc     r9, rcx\n\tmov     [rdi+8], r9\n\tlea     r10, [rel .59]\n\tmovsxd  r11, dword [r10+rax*4]\n\tlea     r11, [r11+r10]\n\tjmp     r11\n.21:adox    r8, [rdi]\n\tadox    r9, rcx\n\tmov     [rdi], r8\n\tadc     r9, rcx\n\tmov     [rdi+8], r9\n.22:lea     rsi, [rsi+rbx*8-64]\n\tor      ecx, ebx\n\tmov     rdx, [rsi+8]\n\tmulx    r9, r8, [rsi+16]\n\tlea     rdi, [rdi+rbx*8-56]\n\tjmp     .51\n\talign\t16\n.23:adox    r10, [rdi-8]\n\tadcx    r8, r11\n\tmov     [rdi-8], r10\n\tjrcxz   .21\n\tmulx    r11, r10, [rsi+8]\n\tadox    r8, [rdi]\n\tlea     ecx, [rcx+8]\n\tmov     [rdi], r8\n\tadcx    r10, r9\n.24:mulx    r9, r8, [rsi+16]\n\tadcx    r8, r11\n\tadox    r10, [rdi+8]\n\tmov     [rdi+8], r10\n\tmulx    r11, r10, [rsi+24]\n\tlea     rsi, [rsi+64]\n\tadcx    r10, r9\n\tadox    r8, [rdi+16]\n\tmov     [rdi+16], r8\n\tmulx    r9, r8, [rsi-32]\n\tadox    r10, [rdi+24]\n\tadcx    r8, r11\n\tmov     [rdi+24], r10\n\tmulx    r11, r10, [rsi-24]\n\tadcx    r10, r9\n\tadox    r8, [rdi+32]\n\tmov     [rdi+32], r8\n\tmulx    r9, r8, [rsi-16]\n\tadox    r10, [rdi+40]\n\tadcx    r8, r11\n\tmov     [rdi+40], r10\n\tadox    r8, [rdi+48]\n\tmulx    r11, r10, [rsi-8]\n\tmov     [rdi+48], r8\n\tlea     rdi, [rdi+64]\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi]\n\tjmp     .23\n.25:adox    r8, [rdi]\n\tadox    r9, rcx\n\tmov     [rdi], r8\n\tadc     r9, rcx\n\tmov     [rdi+8], r9\n.26:lea     rsi, [rsi+rbx*8-64]\n\tor      ecx, ebx\n\tmov     rdx, [rsi]\n\tmulx    r11, r10, [rsi+8]\n\tlea     rdi, [rdi+rbx*8-56]\n\tjmp     .24\n\talign\t16\n.27:adox    r10, [rdi-8]\n\tadcx    r8, r11\n\tmov     [rdi-8], r10\n\tjrcxz   .25\n.28:mulx    r11, r10, [rsi+8]\n\tadox    r8, [rdi]\n\tlea     ecx, [rcx+8]\n\tmov     [rdi], r8\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi+16]\n\tadcx    r8, r11\n\tadox    r10, [rdi+8]\n\tmov     [rdi+8], r10\n\tmulx    r11, r10, [rsi+24]\n\tlea     rsi, [rsi+64]\n\tadcx    r10, r9\n\tadox    r8, [rdi+16]\n\tmov     [rdi+16], r8\n\tmulx    r9, r8, [rsi-32]\n\tadox    r10, [rdi+24]\n\tadcx    r8, r11\n\tmov     [rdi+24], r10\n\tmulx    r11, r10, [rsi-24]\n\tadcx    r10, r9\n\tadox    r8, [rdi+32]\n\tmov     [rdi+32], r8\n\tmulx    r9, r8, [rsi-16]\n\tadox    r10, [rdi+40]\n\tadcx    r8, r11\n\tmov     [rdi+40], r10\n\tadox    r8, [rdi+48]\n\tmulx    r11, r10, [rsi-8]\n\tmov     [rdi+48], r8\n\tlea     rdi, [rdi+64]\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi]\n\tjmp     .27\n.29:adox    r8, [rdi]\n\tadox    r9, rcx\n\tmov     [rdi], r8\n\tadc     r9, rcx\n\tmov     [rdi+8], r9\n.30:lea     rsi, [rsi+rbx*8]\n\tor      ecx, ebx\n\tlea     rbx, [rbx+8]\n\tmov     rdx, [rsi-8]\n\tmulx    r9, r8, [rsi]\n\tlea     rdi, [rdi+rbx*8-56]\n\tjmp     .28\n\talign\t16\n.31:adox    r10, [rdi-8]\n\tadcx    r8, r11\n\tmov     [rdi-8], r10\n\tjrcxz   .29\n\tmulx    r11, r10, [rsi+8]\n\tadox    r8, [rdi]\n\tlea     ecx, [rcx+8]\n\tmov     [rdi], r8\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi+16]\n\tadcx    r8, r11\n\tadox    r10, [rdi+8]\n\tmov     [rdi+8], r10\n\tmulx    r11, r10, [rsi+24]\n\tlea     rsi, [rsi+64]\n\tadcx    r10, r9\n\tadox    r8, [rdi+16]\n\tmov     [rdi+16], r8\n\tmulx    r9, r8, [rsi-32]\n\tadox    r10, [rdi+24]\n\tadcx    r8, r11\n\tmov     [rdi+24], r10\n\tmulx    r11, r10, [rsi-24]\n\tadcx    r10, r9\n\tadox    r8, [rdi+32]\n\tmov     [rdi+32], r8\n\tmulx    r9, r8, [rsi-16]\n\tadox    r10, [rdi+40]\n\tadcx    r8, r11\n\tmov     [rdi+40], r10\n\tadox    r8, [rdi+48]\n\tmulx    r11, r10, [rsi-8]\n\tmov     [rdi+48], r8\n\tlea     rdi, [rdi+64]\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi]\n\tjmp     .31\n.32:adox    r8, [rdi]\n\tadox    r9, rcx\n\tmov     [rdi], r8\n\tadc     r9, rcx\n\tmov     [rdi+8], r9\n.33:lea     rsi, [rsi+rbx*8]\n\tor      ecx, ebx\n\tjz      .53\n\tmov     rdx, [rsi-16]\n\tmulx    r11, r10, [rsi-8]\n\tlea     rdi, [rdi+rbx*8+8]\n\tmulx    r9, r8, [rsi]\n\tjmp     .31\n\talign\t16\n.34:adox    r10, [rdi-8]\n\tadcx    r8, r11\n\tmov     [rdi-8], r10\n\tjrcxz   .32\n\tmulx    r11, r10, [rsi+8]\n\tadox    r8, [rdi]\n\tlea     ecx, [rcx+8]\n\tmov     [rdi], r8\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi+16]\n\tadcx    r8, r11\n\tadox    r10, [rdi+8]\n\tmov     [rdi+8], r10\n\tmulx    r11, r10, [rsi+24]\n\tlea     rsi, [rsi+64]\n\tadcx    r10, r9\n\tadox    r8, [rdi+16]\n\tmov     [rdi+16], r8\n\tmulx    r9, r8, [rsi-32]\n\tadox    r10, [rdi+24]\n\tadcx    r8, r11\n\tmov     [rdi+24], r10\n\tmulx    r11, r10, [rsi-24]\n\tadcx    r10, r9\n\tadox    r8, [rdi+32]\n\tmov     [rdi+32], r8\n\tmulx    r9, r8, [rsi-16]\n\tadox    r10, [rdi+40]\n\tadcx    r8, r11\n\tmov     [rdi+40], r10\n.35:adox    r8, [rdi+48]\n\tmulx    r11, r10, [rsi-8]\n\tmov     [rdi+48], r8\n\tlea     rdi, [rdi+64]\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi]\n\tjmp     .34\n.36:adox    r8, [rdi]\n\tadox    r9, rcx\n\tmov     [rdi], r8\n\tadc     r9, rcx\n\tmov     [rdi+8], r9\n.37:lea     rsi, [rsi+rbx*8]\n\tor      ecx, ebx\n\tjz      .52\n\tmov     rdx, [rsi-24]\n\tmulx    r9, r8, [rsi-16]\n\tlea     rdi, [rdi+rbx*8-56]\n\tjmp     .35\n\talign\t16\n.38:adox    r10, [rdi-8]\n\tadcx    r8, r11\n\tmov     [rdi-8], r10\n\tjrcxz   .36\n\tmulx    r11, r10, [rsi+8]\n\tadox    r8, [rdi]\n\tlea     ecx, [rcx+8]\n\tmov     [rdi], r8\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi+16]\n\tadcx    r8, r11\n\tadox    r10, [rdi+8]\n\tmov     [rdi+8], r10\n\tmulx    r11, r10, [rsi+24]\n\tlea     rsi, [rsi+64]\n\tadcx    r10, r9\n\tadox    r8, [rdi+16]\n\tmov     [rdi+16], r8\n\tmulx    r9, r8, [rsi-32]\n\tadox    r10, [rdi+24]\n\tadcx    r8, r11\n\tmov     [rdi+24], r10\n\tmulx    r11, r10, [rsi-24]\n\tadcx    r10, r9\n\tadox    r8, [rdi+32]\n\tmov     [rdi+32], r8\n.39:mulx    r9, r8, [rsi-16]\n\tadox    r10, [rdi+40]\n\tadcx    r8, r11\n\tmov     [rdi+40], r10\n\tadox    r8, [rdi+48]\n\tmulx    r11, r10, [rsi-8]\n\tmov     [rdi+48], r8\n\tlea     rdi, [rdi+64]\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi]\n\tjmp     .38\n.40:adox    r8, [rdi]\n\tadox    r9, rcx\n\tmov     [rdi], r8\n\tadc     r9, rcx\n\tmov     [rdi+8], r9\n.41:lea     rsi, [rsi+rbx*8]\n\tor      ecx, ebx\n\tmov     rdx, [rsi-32]\n\tmulx    r11, r10, [rsi-24]\n\tlea     rdi, [rdi+rbx*8-56]\n\tjmp     .39\n\talign\t16\n.42:adox    r10, [rdi-8]\n\tadcx    r8, r11\n\tmov     [rdi-8], r10\n\tjrcxz   .40\n\tmulx    r11, r10, [rsi+8]\n\tadox    r8, [rdi]\n\tlea     ecx, [rcx+8]\n\tmov     [rdi], r8\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi+16]\n\tadcx    r8, r11\n\tadox    r10, [rdi+8]\n\tmov     [rdi+8], r10\n\tmulx    r11, r10, [rsi+24]\n\tlea     rsi, [rsi+64]\n\tadcx    r10, r9\n\tadox    r8, [rdi+16]\n\tmov     [rdi+16], r8\n\tmulx    r9, r8, [rsi-32]\n\tadox    r10, [rdi+24]\n\tadcx    r8, r11\n\tmov     [rdi+24], r10\n.43:mulx    r11, r10, [rsi-24]\n\tadcx    r10, r9\n\tadox    r8, [rdi+32]\n\tmov     [rdi+32], r8\n\tmulx    r9, r8, [rsi-16]\n\tadox    r10, [rdi+40]\n\tadcx    r8, r11\n\tmov     [rdi+40], r10\n\tadox    r8, [rdi+48]\n\tmulx    r11, r10, [rsi-8]\n\tmov     [rdi+48], r8\n\tlea     rdi, [rdi+64]\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi]\n\tjmp     .42\n.44:adox    r8, [rdi]\n\tadox    r9, rcx\n\tmov     [rdi], r8\n\tadc     r9, rcx\n\tmov     [rdi+8], r9\n.45:lea     rsi, [rsi+rbx*8]\n\tor      ecx, ebx\n\tmov     rdx, [rsi-40]\n\tmulx    r9, r8, [rsi-32]\n\tlea     rdi, [rdi+rbx*8-56]\n\tjmp     .43\n\talign\t16\n.46:adox    r10, [rdi-8]\n\tadcx    r8, r11\n\tmov     [rdi-8], r10\n\tjrcxz   .44\n\tmulx    r11, r10, [rsi+8]\n\tadox    r8, [rdi]\n\tlea     ecx, [rcx+8]\n\tmov     [rdi], r8\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi+16]\n\tadcx    r8, r11\n\tadox    r10, [rdi+8]\n\tmov     [rdi+8], r10\n\tmulx    r11, r10, [rsi+24]\n\tlea     rsi, [rsi+64]\n\tadcx    r10, r9\n\tadox    r8, [rdi+16]\n\tmov     [rdi+16], r8\n.47:mulx    r9, r8, [rsi-32]\n\tadox    r10, [rdi+24]\n\tadcx    r8, r11\n\tmov     [rdi+24], r10\n\tmulx    r11, r10, [rsi-24]\n\tadcx    r10, r9\n\tadox    r8, [rdi+32]\n\tmov     [rdi+32], r8\n\tmulx    r9, r8, [rsi-16]\n\tadox    r10, [rdi+40]\n\tadcx    r8, r11\n\tmov     [rdi+40], r10\n\tadox    r8, [rdi+48]\n\tmulx    r11, r10, [rsi-8]\n\tmov     [rdi+48], r8\n\tlea     rdi, [rdi+64]\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi]\n\tjmp     .46\n.48:adox    r8, [rdi]\n\tadox    r9, rcx\n\tmov     [rdi], r8\n\tadc     r9, rcx\n\tmov     [rdi+8], r9\n.49:lea     rsi, [rsi+rbx*8]\n\tor      ecx, ebx\n\tmov     rdx, [rsi-48]\n\tmulx    r11, r10, [rsi-40]\n\tlea     rdi, [rdi+rbx*8-56]\n\tjmp     .47\n\talign\t16\n.50:adox    r10, [rdi-8]\n\tadcx    r8, r11\n\tmov     [rdi-8], r10\n\tjrcxz   .48\n\tmulx    r11, r10, [rsi+8]\n\tadox    r8, [rdi]\n\tlea     ecx, [rcx+8]\n\tmov     [rdi], r8\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi+16]\n\tadcx    r8, r11\n\tadox    r10, [rdi+8]\n\tmov     [rdi+8], r10\n.51:mulx    r11, r10, [rsi+24]\n\tlea     rsi, [rsi+64]\n\tadcx    r10, r9\n\tadox    r8, [rdi+16]\n\tmov     [rdi+16], r8\n\tmulx    r9, r8, [rsi-32]\n\tadox    r10, [rdi+24]\n\tadcx    r8, r11\n\tmov     [rdi+24], r10\n\tmulx    r11, r10, [rsi-24]\n\tadcx    r10, r9\n\tadox    r8, [rdi+32]\n\tmov     [rdi+32], r8\n\tmulx    r9, r8, [rsi-16]\n\tadox    r10, [rdi+40]\n\tadcx    r8, r11\n\tmov     [rdi+40], r10\n\tadox    r8, [rdi+48]\n\tmulx    r11, r10, [rsi-8]\n\tmov     [rdi+48], r8\n\tlea     rdi, [rdi+64]\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi]\n\tjmp     .50\n.52:\n\tmov     rdx, [rsi-24]\n\tmulx    r9, r8, [rsi-16]\n\tadox    r8, [rdi-8]\n\tmulx    r11, r10, [rsi-8]\n\tmov     [rdi-8], r8\n\tlea     rdi, [rdi+8]\n\tadcx    r10, r9\n\tmulx    r9, r8, [rsi]\n\tadox    r10, [rdi-8]\n\tadcx    r8, r11\n\tmov     [rdi-8], r10\n\tadox    r8, [rdi]\n\tadox    r9, rcx\n\tadcx    r9, rcx\n.53:\n\tmov     rdx, [rsi-16]\n\tmulx    r11, r10, [rsi-8]\n\tmulx    rbx, rax, [rsi]\n\tadox    r10, r8\n\tadcx    rax, r11\n\tmov     [rdi], r10\n\tadox    rax, r9\n\tadox    rbx, rcx\n\tmov     [rdi+8], rax\n\tadc     rbx, rcx\n\tmov     rdx, [rsi-8]\n\tmulx    rdx, rax, [rsi]\n\tadd     rax, rbx\n\tmov     [rdi+16], rax\n\tadc     rdx, rcx\n\tmov     [rdi+24], rdx\n.54:\n    mov     rdi, [rsp+stack_use+8]\n    mov     rsi, [rsp+stack_use+16]\n    mov     rcx, [rsp+stack_use+24]\n\tdec     ecx\n\tmov     rdx, [rsi]\n\txor     ebx, ebx\n\tmulx    r10, rax, rdx\n\tmov     [rdi], rax\n\tmov     r8, [rdi+8]\n\tmov     r9, [rdi+16]\n\tjmp     .56\n\talign\t16\n.55:mov     r8, [rdi+24]\n\tmov     r9, [rdi+32]\n\tlea     rdi, [rdi+16]\n\tlea     r10, [rdx+rbx]\n.56:adc     r8, r8\n\tadc     r9, r9\n\tsetc    bl\n\tmov     rdx, [rsi+8]\n\tlea     rsi, [rsi+8]\n\tmulx    rdx, rax, rdx\n\tadd     r8, r10\n\tadc     r9, rax\n\tmov     [rdi+8], r8\n\tmov     [rdi+16], r9\n\tdec     ecx\n\tjnz     .55\n.57:adc     rdx, rbx\n\tmov     [rdi+24], rdx\n    END_PROC reg_save_list\n\n\talign\t8\n.58:\n\tdd     .9 - .58\n\tdd     .4 - .58\n\tdd     .10 - .58\n\tdd     .11 - .58\n\tdd     .5 - .58\n\tdd     .6 - .58\n\tdd     .7 - .58\n\tdd     .8 - .58\n.59:\n\tdd     .49 - .59\n\tdd     .22 - .59\n\tdd     .26 - .59\n\tdd     .30 - .59\n\tdd     .33 - .59\n\tdd     .37 - .59\n\tdd     .41 - .59\n\tdd     .45 - .59\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/sub_err1_n.asm",
    "content": "dd_err1_n.as;  AMD64 mpn_sub_err1_n\n;  Copyright 2017 Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)-(rdx,rcx)-BwIn\n;\trax = borrow\n;\t(rcx,2) = rev(r8,rcx) \\dot (borrow,rcx) where borrow is the sequence\n;       of borrows from the subtraction of (rsi,rcx)-(rdx,rcx)\n\n;  mp_limb_t mpn_add_err1_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_sub_err1_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                         rdi     rsi     rdx     rcx       r8        r9     8(rsp)\n;  rax                         rcx     rdx      r8      r9 [rsp+40]  [rsp+48]   [rsp+56]\n\n%include 'yasm_mac.inc'\n\n%define SumP    rdi\n%define Inp1P   rsi\n%define Inp2P   rdx\n%define EP      r11\n%define SizeRest rcx\n%define YP      r8\n%define Size    r9\n%define LIMB0   rax\n%define E0      r12\n%define E1      r13\n%define Zero    r14\n%define Dummy   rbx\n\n%define reg_save_list rsi, rdi, rbx, r12, r13, r14\n\n\talign   32\n\tBITS    64\n\n%macro  DO_LIMB 1\n\tmov     LIMB0, [Inp1P + %1*8]\n\tsbb     LIMB0, [Inp2P + %1*8]\n\tmov     [SumP + %1*8], LIMB0\n\tmov\t    LIMB0, [YP - %1*8]\n\tcmovnc\tLIMB0, Zero\n\tinc\t    Dummy\t\t\t; OF = 0\n\tadox\tE0, LIMB0\n\tadox\tE1, Zero\n%endmacro\n\nFRAME_PROC mpn_sub_err1_n, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n    mov     r11, r9\n    mov     r8, [rsp + stack_use + 40]\n    mov     r9, [rsp + stack_use + 48]\n    mov     LIMB0, [rsp + stack_use + 56]\n\n\tmov     SizeRest, Size\n\tlea\t    YP, [YP + Size*8 - 8]\n\tand\t    SizeRest, 7\n\txor\t    Zero, Zero\n\tmov\t    E0, Zero\n\tmov\t    E1, Zero\n\tshr     Size, 3\n\tbt\t    LIMB0, 0\n\tjz      .testrest\n\n\talign   16\n.loop:\n\tDO_LIMB 0\n\tDO_LIMB 1\n\tDO_LIMB 2\n\tDO_LIMB 3\n\tDO_LIMB 4\n\tDO_LIMB 5\n\tDO_LIMB 6\n\tDO_LIMB 7\n\n\tlea     Inp1P, [Inp1P+64]\n\tlea     Inp2P, [Inp2P+64]\n\tlea     SumP, [SumP+64]\n\tlea     YP, [YP-64]\n\n\tdec     Size\n\tjne     .loop\n\n.testrest:\n\tinc\tSizeRest\n\tdec\tSizeRest\n\tjz\t.exit\n\n.rest:\n\tDO_LIMB 0\n\tdec\tSizeRest\n\tjz\t.exit\n\tDO_LIMB 1\n\tdec\tSizeRest\n\tjz\t.exit\n\tDO_LIMB 2\n\tdec\tSizeRest\n\tjz\t.exit\n\tDO_LIMB 3\n\tdec\tSizeRest\n\tjz\t.exit\n\tlea\tInp1P, [Inp1P+32]\n\tlea\tInp2P, [Inp2P+32]\n\tlea\tSumP, [SumP+32]\n\tlea\tYP, [YP-32]\n\tjmp\t.rest\n\n.exit:\n\tmov rax, Zero\n\tsetc al\n\tmov\t[EP], E0\n\tmov\t[EP+8], E1\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/sub_n.asm",
    "content": "\n;  Copyright 2016, 2017 Jens Nurmann\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  mp_limb_t  mpn_sub_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;  mp_limb_t mpn_sub_nc(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                     rdi        rsi        rdx        rcx         r8\n;  rax                     rcx        rdx         r8         r9   [rsp+40]\n\n%include 'yasm_mac.inc'\n\n%define Op1     RDX\n%define Op2     R8\n%define Size    R9\n%define Op3     RCX\n\n%define BorrowD EAX\n%define BorrowB AL\n\n%define Limb0   R10\n%define SizeB   R9B         ; check if this fits to code alignment!\n%define Count   R11\n\n    align   32\n\tLEAF_PROC mpn_sub_nc\n    mov     BorrowD, [rsp+40]\n    mov     Count, Size\n    shr     Count, 3\n    inc     Count\n    vpor    YMM0, YMM0, YMM0    ; see comment in main loop below\n    jmp     One\n\n    align   32\n\tLEAF_PROC mpn_sub_n\n    xor     BorrowD, BorrowD\n    mov     Count, Size\n    shr     Count, 3\n    inc     Count\n    vpor    YMM0, YMM0, YMM0    ; see comment in main loop below\n\n    ; unrolling the loop from small to high gives better timings\n    ; when considering all sizes 1-100 limb\n  One:\n    test    SizeB, 1\n    je      .Two\n    shr     BorrowB, 1\n    mov     Limb0, [Op1]        ; I am using implicit code alignment through-\n    sbb     Limb0, [Op2]        ; out the following to get all branch targets\n    mov     [Op3], Limb0        ; on 16 byte alignments - check this if non-\n    setc    BorrowB             ; Linux register allocation is used!\n\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n\n  .Two:\n\n    test    SizeB, 2\n    je      .Four\n\n    shr     BorrowB, 1\n    mov     Limb0, [dword Op1]\n    sbb     Limb0, [Op2]\n    mov     [Op3], Limb0\n    mov     Limb0, [Op1+8]\n    sbb     Limb0, [Op2+8]\n    mov     [Op3+8], Limb0\n    setc    BorrowB\n\n    add     Op1, 16\n    add     Op2, 16\n    add     Op3, 16\n\n  .Four:\n\n    test    SizeB, 4\n    bt      BorrowD, 0\n    je      .Check\n\n    mov     Limb0, [Op1]\n    sbb     Limb0, [Op2]\n    mov     [Op3], Limb0\n    mov     Limb0, [Op1+8]\n    sbb     Limb0, [Op2+8]\n    mov     [Op3+8], Limb0\n    mov     Limb0, [Op1+16]\n    sbb     Limb0, [Op2+16]\n    mov     [Op3+16], Limb0\n    mov     Limb0, [Op1+24]\n    sbb     Limb0, [Op2+24]\n    mov     [Op3+24], Limb0\n\n    lea     Op1, [Op1+32]\n    lea     Op2, [Op2+32]\n    lea     Op3, [Op3+32]\n    jmp     .Check\n\n    ; main loop:\n    ; - 1.03-1.05 cycles per limb in L1D$\n    ; - 1.13-1.15 cycles per limb in L2D$\n    ; - 1.50-1.75 cycles per limb in L3D$\n    align   16\n  .Loop:\n\n    mov     Limb0, [Op1]\n    sbb     Limb0, [Op2]\n    mov     [Op3], Limb0\n\n    ; do not delete!\n    ; this seemingly unreasoned AVX instruction optimizes the allocation of\n    ; read/write operations to ports 2, 3 & 7 (write always ending up\n    ; on port 7) which allows a sustained 2r1w execution per cycle\n    vpor    YMM0, YMM0, YMM0\n\n    mov     Limb0, [dword Op1+8]\n    sbb     Limb0, [Op2+8]\n    mov     [Op3+8], Limb0\n    mov     Limb0, [Op1+16]\n    sbb     Limb0, [Op2+16]\n    mov     [Op3+16], Limb0\n    mov     Limb0, [Op1+24]\n    sbb     Limb0, [Op2+24]\n    mov     [Op3+24], Limb0\n    mov     Limb0, [Op1+32]\n    sbb     Limb0, [Op2+32]\n    mov     [Op3+32], Limb0\n    mov     Limb0, [Op1+40]\n    sbb     Limb0, [Op2+40]\n    mov     [Op3+40], Limb0\n    mov     Limb0, [Op1+48]\n    sbb     Limb0, [Op2+48]\n    mov     [Op3+48], Limb0\n    mov     Limb0, [Op1+56]\n    sbb     Limb0, [Op2+56]\n    mov     [Op3+56], Limb0\n\n    lea     Op1, [Op1+64]\n    lea     Op2, [Op2+64]\n    lea     Op3, [Op3+64]\n\n  .Check:\n\n    dec     Count\n    jne     .Loop\n\n  .Exit:\n\n    setc    BorrowB             ; move total borrow to RAX\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/subadd_n.asm",
    "content": ";  AMD64 mpn_subadd_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,r8) = (rsi,r8)-(rdx,r8)-(rcx,r8)\n;   rax = summed borrow in range [ 0..2 ]\n\n; the main loop has been enhanced with the MPIR SuperOptimizer\n; the gain was roughly 4% execution speed for operands in LD1$\n;\n;  mp_limb_t mpn_subadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t)\n;  rax                       rdi     rsi     rdx     rcx         r8\n;  rax                       rcx     rdx      r8      r9   [rsp+40]\n\n; cycles per limb with all operands in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         1.6-1.7   1.7-1.85\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rsi, rdi, r12\n; definition according to Linux 64 bit ABI\n\n%define ResP    rdi\n%define Src1P   rsi\n%define Src2P   rdx\n%define Src3P   rcx\n%define Size    r8\n\n%define Spills  eax\n%define Carry   al\n%define Borrow  ah\n\n%define Limb0   r9\n%define Limb1   r10\n%define Limb2   r11\n%define Limb3   r12\n\n    align   32\n    BITS    64\n\n    FRAME_PROC mpn_subadd_n, 0, reg_save_list\n\tmov\t\trdi, rcx\n\tmov\t\trsi, rdx\n\tmov\t\trdx, r8\n \tmov\t\trcx, r9\n\tmov\t\tr8, [rsp+stack_use+40]\n\n    sub     Src3P, 32\n    sub     ResP, 32\n\n    xor     Spills, Spills\n\n    jmp     .Check\n\n    align   16\n  .Loop:\n\n    ; do not delete!\n    ; this seemingly unreasoned AVX instruction optimizes the allocation of\n    ; read/write operations to ports 2, 3 & 7 (write allways ending up\n    ; on port 7) which allows a sustained 2r1w execution per cycle\n    vpor    ymm0, ymm0, ymm0\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P]\n    mov     Limb1, [Src1P+8]\n    mov     Limb2, [Src1P+16]\n    mov     Limb3, [Src1P+24]\n    lea     Src3P, [Src3P+32]\n    lea     ResP, [ResP+32]\n    sbb     Limb0, [Src2P]\n    sbb     Limb1, [Src2P+8]\n    sbb     Limb2, [Src2P+16]\n    sbb     Limb3, [Src2P+24]\n    setc    Carry\n\n    lea     Src2P, [Src2P+32]\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P]\n    sbb     Limb1, [Src3P+8]\n    lea     Src1P, [Src1P+32]\n    mov     [ResP], Limb0\n    sbb     Limb2, [Src3P+16]\n    mov     [ResP+8], Limb1\n    mov     [ResP+16], Limb2\n    sbb     Limb3, [Src3P+24]\n    setc    Borrow\n    mov     [ResP+24], Limb3\n\n    ; label @ $a (mod $10) seems ok from benchmark figures\n  .Check:\n\n    sub     Size, 4\n    jnc     .Loop\n\n  .Post:\n\n    add     Src3P, 32\n    add     ResP, 32\n\n    add     Size, 4\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P]\n    sbb     Limb0, [Src2P]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P]\n    setc    Borrow\n    mov     [ResP], Limb0\n    dec     Size\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P+8]\n    sbb     Limb0, [Src2P+8]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P+8]\n    setc    Borrow\n    mov     [ResP+8], Limb0\n    dec     Size\n    je      .Exit\n\n    shr     Carry, 1\n    mov     Limb0, [Src1P+16]\n    sbb     Limb0, [Src2P+16]\n    setc    Carry\n    shr     Borrow, 1\n    sbb     Limb0, [Src3P+16]\n    mov     [ResP+16], Limb0\n    setc    Borrow\n\n    ; label @ $2 (mod $10) is ok\n.Exit:\n\n    add     Carry, Borrow\n    movsx   rax, Carry\n\tEND_PROC reg_save_list\n\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/xnor_n.asm",
    "content": ";  AVX mpn_xnor_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = not(rsi,rcx) xor (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n%define\tQLimb1 YMM1\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_xnor_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    vpcmpeqq QLimb1, QLimb1, QLimb1\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vpxor   QLimb0, QLimb1, [Src1P]\n    vpxor   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+32]\n    vpxor   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+64]\n    vpxor   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vpxor   QLimb0, QLimb1, [Src1P+96]\n    vpxor   QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P+64]\n    vpxor   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P+32]\n    vpxor   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vpxor   QLimb0, QLimb1, [Src1P]\n    vpxor   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    not     Limb0\n    xor     Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    not     Limb0\n    xor     Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    not     Limb0\n    xor     Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/skylake/avx/xor_n.asm",
    "content": ";  AVX mpn_xor_n\n;\n;  Copyright 2017 Jens Nurmann\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;   (rdi,rcx) = (rsi,rcx) xor (rdx,rcx)\n\n; There is no initial pointer alignment lead in code below. The argument\n; why not is based on some statistical reasoning and measurement points.\n; All statements below strictly refer to the Intel i6xxx (Skylake) \n; microarchitecture.\n\n; The function is intended to be used with arbitrary pointer alignment on\n; entry. That is there are 8 possible cases to consider:\n\n; - A: 1 x all pointers mis-aligned (mod 32 byte)\n; - B: 3 x one pointer aligned (mod 32 byte))\n; - C: 3 x two pointers aligned (mod 32 byte)\n; - D: 1 x all pointers aligned (mod 32 byte)\n\n; All sub cases under B show equivalent performance, as do all sub cases of\n; C. B is 7% faster than A, C is 11% faster than A and D is 39% faster than A.\n\n; To do a proper alignment would require a complex decision tree to allways \n; advance the alignment situation in the best possible manner - e.g. pointer\n; 1 is off by 8 while pointer 2 & 3 are off by 16. To do the alignment\n; requires some arith and at least one branch in the function proloque - a\n; reasonable impact for small sized operands. And all this for a small gain\n; (around 6% all summed up) in the average case.\n\n; In a specific application scenario this might be the wrong choice.\n\n; The execution speed of VMOVDQU is equivalent to VMOVDQA in case of aligned\n; pointers. This may be different for earlier generations of Intel core \n; architectures like Broadwell, Haswell, ...\n\n; cycles per limb with all operands aligned and in:\n\n;                   LD1$      LD2$\n;   Haswell         ???       ???\n;   Broadwell       ???       ???\n;   Skylake         0.29-0.31 0.39-0.40\n\n%include 'yasm_mac.inc'\n\n; definition according to Linux 64 bit ABI\n\n%define ResP    RCX\n%define Src1P   RDX\n%define Src2P    R8\n%define Size     R9\n%define SizeD   R9D\n%define Count   RAX\n%define CountD  EAX\n%define Limb0   R10\n%define Limb0D R10D\n%define QLimb0 YMM0\n\n    align   32\n    BITS    64\n\nLEAF_PROC   mpn_xor_n\n\n    mov     CountD, 3\n    mov     Limb0, Size\n    sub     Count, Size\n    jnc     .PostGPR            ; dispatch size 0-3 immediately\n\n    mov     SizeD, 3\n    shr     Limb0, 2\n    or      Count, -4\n    sub     Size, Limb0\n    jnc     .PostAVX            ; dispatch size 4, 8 & 12 immediately\n\n    mov     Limb0D, 128\n\n  .Loop:\n\n    vmovdqu QLimb0, [Src1P]\n    vpxor   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n    vmovdqu QLimb0, [Src1P+32]\n    vpxor   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n    vmovdqu QLimb0, [Src1P+64]\n    vpxor   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n    vmovdqu QLimb0, [Src1P+96]\n    vpxor   QLimb0, QLimb0, [Src2P+96]\n    vmovdqu [ResP+96], QLimb0\n\n    lea     Src1P, [Src1P+Limb0]\n    lea     Src2P, [Src2P+Limb0]\n    lea     ResP, [ResP+Limb0]\n\n    add     Size, 4\n    jnc     .Loop\n\n  .PostAVX:\n\n    mov     Limb0D, 0           ; to allow pointer correction on exit\n    cmp     Size, 2             ; fastest way to dispatch values 0-3\n    ja      .PostAVX0\n    je      .PostAVX1\n    jp      .PostAVX2\n\n  .PostAVX3:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+64]\n    vpxor   QLimb0, QLimb0, [Src2P+64]\n    vmovdqu [ResP+64], QLimb0\n\n  .PostAVX2:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P+32]\n    vpxor   QLimb0, QLimb0, [Src2P+32]\n    vmovdqu [ResP+32], QLimb0\n\n  .PostAVX1:\n\n    add     Limb0, 32\n    vmovdqu QLimb0, [Src1P]\n    vpxor   QLimb0, QLimb0, [Src2P]\n    vmovdqu [ResP], QLimb0\n\n  .PostAVX0:\n\n    add     Src1P, Limb0\n    add     Src2P, Limb0\n    add     ResP, Limb0\n    add     Count, 4\n\n  .PostGPR:\n\n    cmp     Count, 2            ; fastest way to dispatch values 0-3\n    ja      .Exit\n    je      .PostGPR1\n    jp      .PostGPR2\n\n  .PostGPR3:\n\n    mov     Limb0, [Src1P+16]\n    xor     Limb0, [Src2P+16]\n    mov     [ResP+16], Limb0\n\n  .PostGPR2:\n\n    mov     Limb0, [Src1P+8]\n    xor     Limb0, [Src2P+8]\n    mov     [ResP+8], Limb0\n\n  .PostGPR1:\n\n    mov     Limb0, [Src1P]\n    xor     Limb0, [Src2P]\n    mov     [ResP], Limb0\n\n  .Exit:\n\n    ret\n"
  },
  {
    "path": "mpn/x86_64w/skylake/com_n.asm",
    "content": "\n;  AMD64 mpn_com_n\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n%define USE_WIN64\n\nBITS 64\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n    %define Op2     RCX\n    %define Op1     RDX\n    %define Size1   R8\n    %define Limb    R9\n    %define Offs    R10\n    %define FFFF    R11\n%else\n    %define Op2     RDI\n    %define Op1     RSI\n    %define Size1   RDX\n    %define Limb    RCX\n    %define Offs    R10\n    %define FFFF    R8\n%endif\n\n%define DLimb0  XMM0\n%define QLimb0  YMM0\n%define QLimb1  YMM1\n%define QLimb2  YMM2\n%define QLimb3  YMM3\n%define QFFFF   YMM4\n%define DFFFF   XMM4\n\n    align   32\n\nLEAF_PROC mpn_com_n\n    mov     RAX, Size1\n    or      RAX, RAX\n    je      .Exit      ;ajs:notshortform\n                       ; size=0 =>\n\n    ; Set a GPR to 0xFF...FF\n    mov     FFFF, -1\n\n    ; align the destination (Op2) to 32 byte\n    test    Op2, 8\n    je      .A32\n\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit      ;ajs:notshortform\n\n    add     Op1, 8\n    add     Op2, 8\n\n  .A32:\n\n    test    Op2, 16\n    je      .AVX\n\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit       ;ajs:notshortform\n\n    mov     Limb, [Op1+8]\n    xor     Limb, FFFF\n    mov     [Op2+8], Limb\n    dec     Size1\n    je      .Exit       ;ajs:notshortform\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .AVX:\n\n    ; Set an AVX2 reg to 0xFF...FF\n    movq    DFFFF, FFFF\n    vbroadcastsd QFFFF, DFFFF\n\n    mov     Offs, 128\n    jmp     .AVXCheck\n\n    ; main loop (prefetching disabled; unloaded cache)\n    ; - lCpyInc is slightly slower than lCpyDec through all cache levels?!\n    ; - 0.30      cycles / limb in L1$\n    ; - 0.60      cycles / limb in L2$\n    ; - 0.70-0.90 cycles / limb in L3$\n    align   16\n  .AVXLoop:\n\n    vmovdqu QLimb0, [Op1]\n    vpxor   QLimb0, QLimb0, QFFFF\n    vmovdqu QLimb1, [Op1+32]\n    vpxor   QLimb1, QLimb1, QFFFF\n    vmovdqu QLimb2, [Op1+64]\n    vpxor   QLimb2, QLimb2, QFFFF\n    vmovdqu QLimb3, [Op1+96]\n    vpxor   QLimb3, QLimb3, QFFFF\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n    vmovdqa [Op2+64], QLimb2\n    vmovdqa [Op2+96], QLimb3\n\n    add     Op1, Offs\n    add     Op2, Offs\n\n  .AVXCheck:\n\n    sub     Size1, 16\n    jnc     .AVXLoop\n\n    add     Size1, 16\n    je      .Exit ;ajs:notshortform\n                  ; AVX copied operand fully =>\n\n    ; copy remaining max. 15 limb\n    test    Size1, 8\n    je      .Four\n\n    vmovdqu QLimb0, [Op1]\n    vpxor   QLimb0, QLimb0, QFFFF\n    vmovdqu QLimb1, [Op1+32]\n    vpxor   QLimb1, QLimb1, QFFFF\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .Four:\n\n    test    Size1, 4\n    je      .Two\n\n    vmovdqu QLimb0, [Op1]\n    vpxor   QLimb0, QLimb0, QFFFF\n    vmovdqa [Op2], QLimb0\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .Two:\n\n    test    Size1, 2\n    je      .One\n\n%if 1\n    ; Avoid SSE2 instruction due to stall on Haswell\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n    mov     Limb, [Op1+8]\n    xor     Limb, FFFF\n    mov     [Op2+8], Limb\n%else\n    movdqu  DLimb0, [Op1]\n    pxor    DLimb0, DLimb0, DFFFF\n    movdqa  [Op2], DLimb0\n%endif\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .One:\n\n    test    Size1, 1\n    je      .Exit\n\n    mov     Limb, [Op1]\n    xor     Limb, FFFF\n    mov     [Op2], Limb\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/skylake/copyd.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n\n; mpn_copyd(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1)\n; Linux     RDI         RSI            RDX\n; Win7      RCX         RDX            R8\n;\n; Description:\n; The function copies a given number of limb from source to destination (while\n; moving high to low in memory) and hands back the size (in limb) of the\n; destination.\n;\n; Result:\n; - Op2[ 0..size-1 ] = Op1[ 0..size-1 ]\n; - number of copied limb: range [ 0..max tCounter ]\n;\n; Caveats:\n; - if size 0 is given the content of the destination will remain untouched!\n; - if Op1=Op2 no copy is done!\n;\n; Comments:\n; - AVX-based version implemented, tested & benched on 05.01.2016 by jn\n; - did some experiments with AVX based version with following results\n;   - AVX can be faster in L1$-L3$ if destination is aligned on 32 byte\n;   - AVX is generally faster on small sized operands (<=100 limb) due too\n;     start-up overhead of \"rep movsq\" - however this could also be achieved by\n;     simple copy loop\n;   - startup overhead of \"rep movsq\" with negative direction is 200 cycles!!!\n;   - negative direction is unfavourable compared to positive \"rep movsq\" and\n;     to AVX.\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2     RCX\n    %define Op1     RDX\n    %define Size1   R8\n    %define Limb    R9\n    %define Offs    R10\n%else\n    %define Op2     RDI\n    %define Op1     RSI\n    %define Size1   RDX\n    %define Limb    RCX\n    %define Offs    R10\n%endif\n\n%define DLimb0  XMM0\n%define QLimb0  YMM0\n%define QLimb1  YMM1\n%define QLimb2  YMM2\n%define QLimb3  YMM3\n\n    align   32\n\nLEAF_PROC   mpn_copyd\n    mov     RAX, Size1\n    cmp     Op1, Op2\n    je      .Exit               ; no copy required =>\n\n    or      RAX, RAX\n    je      .Exit               ; Size=0 =>\n\n    lea     Op1, [Op1+8*Size1-8]\n    lea     Op2, [Op2+8*Size1-8]\n\n    ; align the destination (Op2) to 32 byte\n    test    Op2, 8\n    jne     .lCpyDecA32\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    sub     Op1, 8\n    sub     Op2, 8\n\n  .lCpyDecA32:\n\n    test    Op2, 16\n    jnz     .lCpyDecAVX\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    mov     Limb, [Op1-8]\n    mov     [Op2-8], Limb\n    dec     Size1\n    je      .Exit\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lCpyDecAVX:\n\n    mov     Offs, 128\n    jmp     .lCpyDecAVXCheck\n\n    ; main loop (prefetching disabled; unloaded cache)\n    ; - 0.30      cycles / limb in L1$\n    ; - 0.60      cycles / limb in L2$\n    ; - 0.70-0.90 cycles / limb in L3$\n    align   16\n  .lCpyDecAVXLoop:\n\n    vmovdqu QLimb0, [Op1-24]\n    vmovdqu QLimb1, [Op1-56]\n    vmovdqu QLimb2, [Op1-88]\n    vmovdqu QLimb3, [Op1-120]\n    vmovdqa [Op2-24], QLimb0\n    vmovdqa [Op2-56], QLimb1\n    vmovdqa [Op2-88], QLimb2\n    vmovdqa [Op2-120], QLimb3\n\n    sub     Op1, Offs\n    sub     Op2, Offs\n\n  .lCpyDecAVXCheck:\n\n    sub     Size1, 16\n    jnc     .lCpyDecAVXLoop\n\n    add     Size1, 16\n    je      .Exit               ; AVX copied operand fully =>\n\n    ; copy remaining max. 15 limb\n    test    Size1, 8\n    je      .lCpyDecFour\n\n    vmovdqu QLimb0, [Op1-24]\n    vmovdqu QLimb1, [Op1-56]\n    vmovdqa [Op2-24], QLimb0\n    vmovdqa [Op2-56], QLimb1\n\n    sub     Op1, 64\n    sub     Op2, 64\n\n  .lCpyDecFour:\n\n    test    Size1, 4\n    je      .lCpyDecTwo\n\n    vmovdqu QLimb0, [Op1-24]\n    vmovdqa [Op2-24], QLimb0\n\n    sub     Op1, 32\n    sub     Op2, 32\n\n  .lCpyDecTwo:\n\n    test    Size1, 2\n    je      .lCpyDecOne\n\n%if 1\n    ; Avoid SSE2 instruction due to stall on Haswell\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    mov     Limb, [Op1-8]\n    mov     [Op2-8], Limb\n%else\n    movdqu  DLimb0, [Op1-8]\n    movdqa  [Op2-8], DLimb0\n%endif\n\n    sub     Op1, 16\n    sub     Op2, 16\n\n  .lCpyDecOne:\n\n    test    Size1, 1\n    je      .Exit\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/skylake/copyi.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mpn_copyi(mp_ptr Op2, mp_srcptr Op1, mp_size_t Size1)\n; Linux     RDI         RSI            RDX\n; Win7      RCX         RDX            R8\n;\n; Description:\n; The function copies a given number of limb from source to destination (while\n; moving low to high in memory) and hands back the size (in limb) of the\n; destination.\n;\n; Result:\n; - Op2[ 0..size-1 ] = Op1[ 0..size-1 ]\n; - number of copied limb: range [ 0..max tCounter ]\n;\n; Caveats:\n; - if size 0 is given the content of the destination will remain untouched!\n; - if Op1=Op2 no copy is done!\n;\n; Comments:\n; - AVX-based version implemented, tested & benched on 05.01.2016 by jn\n; - did some experiments with AVX based version with following results\n;   - AVX can be faster in L1$ (30%), L2$ (10%) if dest. is aligned on 32 byte\n;   - AVX is generally faster on small sized operands (<=100 limb) due too\n;     start-up overhead of \"rep movsq\" - however this could also be achieved by\n;     simple copy loop\n;   - the break-even between AVX and \"rep movsq\" is around 10,000 limb\n; - the prologue & epilogue can still be optimized!\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%ifdef USE_WIN64\n    %define Op2     RCX\n    %define Op1     RDX\n    %define Size1   R8\n    %define Limb    R9\n    %define Offs    R10\n%else\n    %define Op2     RDI\n    %define Op1     RSI\n    %define Size1   RDX\n    %define Limb    RCX\n    %define Offs    R10\n%endif\n\n%define DLimb0  XMM0\n%define QLimb0  YMM0\n%define QLimb1  YMM1\n%define QLimb2  YMM2\n%define QLimb3  YMM3\n\n    align   32\n\nLEAF_PROC   mpn_copyi\n    mov     RAX, Size1\n    cmp     Op1, Op2\n    je      .Exit               ; no copy required =>\n\n    or      RAX, RAX\n    je      .Exit               ; size=0 =>\n\n    ; align the destination (Op2) to 32 byte\n    test    Op2, 8\n    je      .lCpyIncA32\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    add     Op1, 8\n    add     Op2, 8\n\n  .lCpyIncA32:\n\n    test    Op2, 16\n    je      .lCpyIncAVX\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    dec     Size1\n    je      .Exit\n\n    mov     Limb, [Op1+8]\n    mov     [Op2+8], Limb\n    dec     Size1\n    je      .Exit\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lCpyIncAVX:\n\n    mov     Offs, 128\n    jmp     .lCpyIncAVXCheck\n\n    ; main loop (prefetching disabled; unloaded cache)\n    ; - lCpyInc is slightly slower than lCpyDec through all cache levels?!\n    ; - 0.30      cycles / limb in L1$\n    ; - 0.60      cycles / limb in L2$\n    ; - 0.70-0.90 cycles / limb in L3$\n    align   16\n  .lCpyIncAVXLoop:\n\n    vmovdqu QLimb0, [Op1]\n    vmovdqu QLimb1, [Op1+32]\n    vmovdqu QLimb2, [Op1+64]\n    vmovdqu QLimb3, [Op1+96]\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n    vmovdqa [Op2+64], QLimb2\n    vmovdqa [Op2+96], QLimb3\n\n    add     Op1, Offs\n    add     Op2, Offs\n\n  .lCpyIncAVXCheck:\n\n    sub     Size1, 16\n    jnc     .lCpyIncAVXLoop\n\n    add     Size1, 16\n    je      .Exit               ; AVX copied operand fully =>\n\n    ; copy remaining max. 15 limb\n    test    Size1, 8\n    je      .lCpyIncFour\n\n    vmovdqu QLimb0, [Op1]\n    vmovdqu QLimb1, [Op1+32]\n    vmovdqa [Op2], QLimb0\n    vmovdqa [Op2+32], QLimb1\n\n    add     Op1, 64\n    add     Op2, 64\n\n  .lCpyIncFour:\n\n    test    Size1, 4\n    je      .lCpyIncTwo\n\n    vmovdqu QLimb0, [Op1]\n    vmovdqa [Op2], QLimb0\n\n    add     Op1, 32\n    add     Op2, 32\n\n  .lCpyIncTwo:\n\n    test    Size1, 2\n    je      .lCpyIncOne\n\n%if 1\n    ; Avoid SSE2 instruction due to stall on Haswell\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n    mov     Limb, [Op1+8]\n    mov     [Op2+8], Limb\n%else\n    movdqu  DLimb0, [Op1]\n    movdqa  [Op2], DLimb0\n%endif\n\n    add     Op1, 16\n    add     Op2, 16\n\n  .lCpyIncOne:\n\n    test    Size1, 1\n    je      .Exit\n\n    mov     Limb, [Op1]\n    mov     [Op2], Limb\n\n  .Exit:\n\n    vzeroupper\n    ret\n.end:\n"
  },
  {
    "path": "mpn/x86_64w/skylake/divexact_byff.asm",
    "content": "; PROLOGUE(mpn_divexact_byff)\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_divexact_byff(mp_ptr, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi         rdx\n;  rax                           rcx     rdx          r8 \n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_divexact_byff\n\tmov     rax, 3\n\tand     rax, r8\n\tmov     [rsp+24], rax\n\txor     eax, eax\n\tshr     r8, 2\n\tcmp     r8, 0\n\tje      .2\n; want carry clear here\n\txalign  16\n.1:\tsbb     rax, [rdx]\n\tlea     rcx, [rcx+32]\n\tmov     r9, rax\n\tsbb     rax, [rdx+8]\n\tmov     r10, rax\n\tsbb     rax, [rdx+16]\n\tmov     r11, rax\n\tsbb     rax, [rdx+24]\n\tdec     r8\n\tmov     [rcx-32], r9\n\tmov     [rcx-24], r10\n\tmov     [rcx-16], r11\n\tmov     [rcx-8], rax\n\tlea     rdx, [rdx+32]\n\tjnz     .1\n.2:\tmov     r8, [rsp+24]\n; dont want to change the carry\n\tinc     r8\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx]\n\tmov     [rcx], rax\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx+8]\n\tmov     [rcx+8], rax\n\tdec     r8\n\tjz      .3\n\tsbb     rax, [rdx+16]\n\tmov     [rcx+16], rax\n.3:\tsbb     rax, 0\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/skylake/divrem_hensel_qr_1_1.asm",
    "content": "; PROLOGUE(mpn_divrem_hensel_qr_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_divrem_hensel_qr_1_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                                    rdi     rsi        rdx        rcx\n;  rax                                    rcx     rdx         r8         r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi\n\n    FRAME_PROC mpn_divrem_hensel_qr_1_1, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n\tmov     rcx, r9\n\tmov     r9, 0\n\tsub     r9, rax\n\t\n\tmov     rdx, rcx\n\t\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\txor     rdx, rdx\n\n\txalign  16\n.1:\tmov     rax, [rsi+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmov     [rdi+r9*8], rax\n\tmul     rcx\n\tadd     r8, 1\n\tinc     r9\n\tjnz     .1\n\tmov     rax, 0\n\tadc     rax, rdx\n\tEND_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/skylake/hamdist.asm",
    "content": "; PROLOGUE(mpn_hamdist)\n\n;  AMD64 mpn_hamdist\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmp_limb_t mpn_hamdist(mp_ptr, mp_ptr, mp_size_t)\n;\trax                      rdi,    rsi,       rdx\n;\trax                      rcx,    rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_hamdist\n\tlea     rdx, [rdx+r8*8-8]\n\tlea     r9, [rcx+r8*8-8]\n\tmov     rcx, 1\n\txor     eax, eax\n\tsub     rcx, r8\n\tjnc     .1\n\txalign  16\n.0:\tmov     r10, [r9+rcx*8]\n\txor     r10, [rdx+rcx*8]\n\tpopcnt  r10, r10\n\tmov     r11, [r9+rcx*8+8]\n\txor     r11, [rdx+rcx*8+8]\n\tpopcnt  r11, r11\n\tadd     rax, r10\n\tadd     rax, r11\n\tadd     rcx, 2\n\tjnc     .0\n.1: jne     .2\n\tmov     r10, [r9+rcx*8]\n\txor     r10, [rdx+rcx*8]\n\tpopcnt  r10, r10\n\tadd     rax, r10\n.2:\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/ior_n.asm",
    "content": "; PROLOGUE(mpn_ior_n)\n\n;  mpn_ior_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_ior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rcx        rdx         r8         r9\n;                    rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_ior_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpor     xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tor      rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tor      rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/skylake/iorn_n.asm",
    "content": "; PROLOGUE(mpn_iorn_n)\n\n;  mpn_iorn_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_iorn_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_iorn_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [rdx+r10*8]\n\tmovdqu  xmm1, [rdx+r10*8+16]\n\tmovdqu  xmm2, [r8+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [r8+r10*8+16-32]\n\tpandn   xmm1, xmm3\n\tpandn   xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [rdx+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [r8+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\tnot     rax\n\tor      rax, r9\n\tpandn   xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [rdx+r10*8]\n\tmovdqu  xmm2, [r8+r10*8]\n\tpandn   xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tnot     rax\n\tor      rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/skylake/karaadd.asm",
    "content": "; PROLOGUE(mpn_karaadd)\n;  mpn_karaadd  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karaadd(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n    BITS 64\n    TEXT\n\n;   requires n >= 8  \n        FRAME_PROC mpn_karaadd, 1, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n;rp is rdi  \n;tp is rsi  \n;n is rdx and put it on the stack  \n        shr     rdx, 1\n;n2 is rdx  \n        lea     rcx, [rdx+rdx*1]\n; 2*n2 is rcx  \n; L is rdi  \n; H is rbp  \n; tp is rsi  \n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n; rax rbx are the carrys  \n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n        align   16\nlp:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+rcx*8]\n        adc     r9, [rsi+rcx*8+8]\n        adc     r10, [rsi+rcx*8+16]\n        adc     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        adc     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     lp\n        cmp     rcx, 2\n        jg      case0\n        jz      case1\n        jp      case2\ncase3:  \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi]\n        adc     r9, [rsi+8]\n        adc     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        adc     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     fin\ncase2:  \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+8]\n        adc     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        adc     r12, [rsi+rdx*8]\n        adc     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     fin\ncase1:  \n        bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        adc     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        inc     rdx\n        mov     [rbp+rcx*8], r12\nfin:    mov     rcx, 3\ncase0:  \n        mov     r8, [rsp]\n        bt      r8, 0\n        jnc     notodd\n        xor     r10, r10\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        add     r8, [rsi+rdx*8]\n        adc     r9, [rsi+rdx*8+8]\n        rcl     r10, 1\n        add     [rbp+24], r8\n        adc     [rbp+32], r9\n        adc     [rbp+40], r10\nl7:     adc     qword[rbp+rcx*8+24], 0\n        inc     rcx\n        jc      l7\n        mov     rcx, 3\nnotodd: and     rax, 3\n        popcnt  r8, rax\n        bt      rbx, 2\n        adc     r8, 0\n        adc     [rdi+rdx*8], r8\nl1:     adc     qword[rdi+rdx*8+8], 0\n        inc     rdx\n        jc      l1\n        and     rbx, 7\n        popcnt  r8, rbx\n        add     [rbp+24], r8\nl2:     adc     qword[rbp+rcx*8+8], 0\n        inc     rcx\n        jc      l2\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/karasub.asm",
    "content": "; PROLOGUE(mpn_karasub)\n;  mpn_karasub  \n;       \n;  Copyright 2011 The Code Cavern  \n;\n;  Copyright 2012 Brian Gladman\n;       \n;  This file is part of the MPIR Library.  \n;       \n;  The MPIR Library is free software; you can redistribute it and/or modify  \n;  it under the terms of the GNU Lesser General Public License as published  \n;  by the Free Software Foundation; either version 2.1 of the License, or (at  \n;  your option) any later version.  \n;        \n;  The MPIR Library is distributed in the hope that it will be useful, but  \n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  \n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  \n;  License for more details.  \n;        \n;  You should have received a copy of the GNU Lesser General Public License  \n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write  \n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,  \n;  Boston, MA 02110-1301, USA.  \n;\n;  void mpn_karasub(mp_ptr, mp_ptr, mp_size_t)\n;  rax                 rdi     rsi        rdx\n;  rax                 rcx     rdx         r8\n;\n;  Karasuba Multiplication - split x and y into two equal length halves so\n;  that x = xh.B + xl and y = yh.B + yl. Then their product is:\n;\n;  x.y = xh.yh.B^2 + (xh.yl + xl.yh).B + xl.yl\n;      = xh.yh.B^2 + (xh.yh + xl.yl - {xh - xl}.{yh - yl}).B + xl.yl\n;\n; If the length of the elements is m (about n / 2), the output length is 4 * m \n; as illustrated below.  The middle two blocks involve three additions and one \n; subtraction: \n; \n;       -------------------- rp\n;       |                  |-->\n;       |   A:xl.yl[lo]    |   |\n;       |                  |   |      (xh - xl).(yh - yl)\n;       --------------------   |      -------------------- tp\n;  <--  |                  |<--<  <-- |                  |\n; |     |   B:xl.yl[hi]    |   |      |     E:[lo]       |\n; |     |                  |   |      |                  |\n; |     --------------------   |      --------------------\n; >-->  |                  |-->   <-- |                  |\n; |\\___ |   C:xh.yh[lo]    | ____/    |     F:[hi]       |\n; |     |                  |          |                  |\n; |     --------------------          --------------------\n;  <--  |                  |   \n;       |   D:xh.yh[hi]    |\n;       |                  |\n;       --------------------\n;\n; To avoid overwriting B before it is used, we need to do two operations\n; in parallel:\n;\n; (1)   B = B + C + A - E = (B + C) + A - E\n; (2)   C = C + B + D - F = (B + C) + D - F\n;\n; The final carry from (1) has to be propagated into C and D, and the final\n; carry from (2) has to be propagated into D. When the number of input limbs\n; is odd, some extra operations have to be undertaken. \n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14, r15\n\n%macro add_one 1\n    inc     %1\n%endmacro\n\n    BITS 64\n    TEXT\n        \n;       requires n >= 8  \n        FRAME_PROC mpn_karasub, 2, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n        mov     [rsp], rdx\n        mov     [rsp+8], rdi\n\n;       rp is rdi, tp is rsi, L is rdi, H is rbp, tp is rsi\n;       carries/borrows in rax, rbx\n   \n        shr     rdx, 1\n        lea     rcx, [rdx+rdx*1]\n        lea     rbp, [rdi+rcx*8]\n        xor     rax, rax\n        xor     rbx, rbx\n        lea     rdi, [rdi+rdx*8-24]\n        lea     rsi, [rsi+rdx*8-24]\n        lea     rbp, [rbp+rdx*8-24]\n        mov     ecx, 3\n        sub     rcx, rdx\n        mov     edx, 3\n\n        align   16\n.1:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+rcx*8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+rcx*8+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+rcx*8+16]\n        mov     r11, [rdi+rdx*8+24]\n        adc     r11, [rbp+rcx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        mov     r15, r11\n        adc     r8, [rdi+rcx*8]\n        mov     r13, r9\n        adc     r9, [rdi+rcx*8+8]\n        mov     r14, r10\n        adc     r10, [rdi+rcx*8+16]\n        adc     r11, [rdi+rcx*8+24]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        adc     r15, [rbp+rdx*8+24]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+rcx*8]\n        sbb     r9, [rsi+rcx*8+8]\n        sbb     r10, [rsi+rcx*8+16]\n        sbb     r11, [rsi+rcx*8+24]\n        mov     [rdi+rdx*8+16], r10\n        mov     [rdi+rdx*8+24], r11\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        sbb     r15, [rsi+rdx*8+24]\n        rcl     rbx, 1\n        add     rdx, 4\n        mov     [rbp+rcx*8], r12\n        mov     [rbp+rcx*8+8], r13\n        mov     [rbp+rcx*8+16], r14\n        mov     [rbp+rcx*8+24], r15\n        add     rcx, 4\n        jnc     .1\n        cmp     rcx, 2\n        jg      .5\n        jz      .4\n        jp      .3\n\n.2:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+8]\n        mov     r10, [rdi+rdx*8+16]\n        adc     r10, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi]\n        mov     r13, r9\n        adc     r9, [rdi+8]\n        mov     r14, r10\n        adc     r10, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        adc     r14, [rbp+rdx*8+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi]\n        sbb     r9, [rsi+8]\n        sbb     r10, [rsi+16]\n        mov     [rdi+rdx*8+16], r10\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        sbb     r14, [rsi+rdx*8+16]\n        rcl     rbx, 1\n        add     rdx, 3\n        mov     [rbp], r12\n        mov     [rbp+8], r13\n        mov     [rbp+16], r14\n        jmp     .5\n\n.3:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+8]\n        mov     r12, r8\n        mov     r9, [rdi+rdx*8+8]\n        adc     r9, [rbp+16]\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+8]\n        mov     r13, r9\n        adc     r9, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        adc     r13, [rbp+rdx*8+8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+8]\n        sbb     r9, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        mov     [rdi+rdx*8+8], r9\n        sbb     r12, [rsi+rdx*8]\n        sbb     r13, [rsi+rdx*8+8]\n        rcl     rbx, 1\n        add     rdx, 2\n        mov     [rbp+8], r12\n        mov     [rbp+16], r13\n        jmp     .5\n\n.4:     bt      rbx, 2\n        mov     r8, [rdi+rdx*8]\n        adc     r8, [rbp+16]\n        mov     r12, r8\n        rcl     rbx, 1\n        bt      rax, 1\n        adc     r8, [rdi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        adc     r12, [rbp+rdx*8]\n        rcl     rbx, 1\n        bt      rax, 1\n        sbb     r8, [rsi+16]\n        rcl     rax, 1\n        bt      rbx, 2\n        mov     [rdi+rdx*8], r8\n        sbb     r12, [rsi+rdx*8]\n        rcl     rbx, 1\n        add_one rdx\n        mov     [rbp+rcx*8], r12\n\n;       move low half rbx carry into rax\n.5:     rcr     rax, 3\n        bt      rbx, 2\n        rcl     rax, 3\n        mov     r8, [rsp]\n        mov     rcx, rsi\n        mov     rsi,[rsp+8]\n        lea     r9, [r8+r8]\n        lea     rsi, [rsi+r9*8]\n        lea     r11, [rbp+24]\n        sub     r11, rsi\n        sar     r11, 3\n        bt      r8, 0\n        jnc     .9\n\n;       if odd the do next two  \n        add     r11, 2\n        mov     r8, [rbp+rdx*8]\n        mov     r9, [rbp+rdx*8+8]\n        rcr     rbx, 2\n        adc     r8,0\n        adc     r9, 0\n        rcl     rbx, 1\n        sbb     r8, [rcx+rdx*8]\n        sbb     r9, [rcx+rdx*8+8]\n        rcr     rbx, 2\n        adc     [rbp+24], r8\n        adc     [rbp+32], r9\n        rcl     rbx, 3\n\n; Now add in any accummulated carries and/or borrows\n;\n; NOTE: We can't propagate individual borrows or carries from the second\n; and third quarter blocks into the fourth quater block by simply waiting\n; for carry (or borrow) propagation to end.  This is because a carry into\n; the fourth quarter block when it contains only maximum integers or a \n; borrow when it contains all zero integers will incorrectly propagate\n; beyond the end of the top quarter block.\n\n.9:     lea     rdx, [rdi+rdx*8]\n        sub     rdx, rsi\n        sar     rdx, 3\n\n; carries/borrrow from second to third quarter quarter block\n;   rax{2} is the carry in (B + C)\n;   rax{1} is the carry in (B + C) + A\n;   rax{0} is the borrow in (B + C + A) - E\n\n        mov     rcx, rdx\n        bt      rax, 0\n.10:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .11\n        jc      .10\n\n.11     mov     rcx, rdx\n        bt      rax, 1\n.12:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .13\n        jc      .12\n\n.13     mov     rcx, rdx\n        bt      rax, 2\n.14:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .15\n        jc      .14\n\n; carries/borrrow from third to fourth quarter quarter block\n;   rbx{2} is the carry in (B + C)\n;   rbx{1} is the carry in (B + C) + D\n;   rbx{0} is the borrow in (B + C + D) - F\n\n.15:    mov     rcx, r11\n        bt      rbx, 0\n.16:    sbb     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .17\n        jc      .16\n\n.17:    mov     rcx, r11\n        bt      rbx, 1\n.18:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .19\n        jc      .18\n\n.19:    mov     rcx, r11\n        bt      rbx, 2\n.20:    adc     qword[rsi+rcx*8], 0\n        add_one rcx\n        jrcxz   .21\n        jc      .20\n.21:\n        END_PROC reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/lshift.asm",
    "content": "; PROLOGUE(mpn_lshift)\n\n;  Version 1.0.4.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_lshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi      rsi        rdx      rcx\n;  rax                     rcx      rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  SSE4.2\n    BITS 64\n\n    LEAF_PROC mpn_lshift\n    mov     r10, rcx\n    mov     ecx, r9d\n    cmp     r8, 2\n    ja      .3\n    jz      .2\n.1:\tmov     rdx, [rdx]\n    mov     rax, rdx\n    shl     rdx, cl\n    neg     rcx\n    shr     rax, cl\n    mov     [r10], rdx\n    ret\n\n.2:\tmov     r8, [rdx]\n    mov     r9, [rdx+8]\n    mov     r11, r8\n    mov     rax, r9\n    shl     r8, cl\n    shl     r9, cl\n    neg     rcx\n    shr     r11, cl\n    shr     rax, cl\n    or      r9, r11\n    mov     [r10], r8\n    mov     [r10+8], r9\n    ret\n\n.3:\tmov     eax, 64\n    sub     rax, rcx\n    movq    xmm0, rcx\n    movq    xmm1, rax\n    lea     r9, [rdx+r8*8-16]\n    mov     r11, r9\n    and     r9, -16\n    movdqa  xmm3, [r9]\n    movdqa  xmm5, xmm3\n    psrlq   xmm3, xmm1\n    pshufd  xmm3, xmm3, 0x4e\n    movq    rax, xmm3\n    cmp     r11, r9\n    je      .4\n    movq    xmm2, [rdx+r8*8-8]\n    movq    xmm4, xmm2\n    psrlq   xmm2, xmm1\n    movq    rax, xmm2\n    psllq   xmm4, xmm0\n    por     xmm4, xmm3\n    movq    [r10+r8*8-8], xmm4\n    dec     r8\n.4:\tsub     r8, 5\n    jle     .6\n\n    xalign  16\n.5: movdqa  xmm2, [rdx+r8*8+8]\n    movdqa  xmm4, xmm2\n    psllq   xmm5, xmm0\n    psrlq   xmm2, xmm1\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    pshufd  xmm2, xmm2, 0x4e\n    movhpd  [r10+r8*8+32], xmm5\n    movdqa  xmm3, [rdx+r8*8-8]\n    movdqa  xmm5, xmm3\n    psrlq   xmm3, xmm1\n    movhlps xmm2, xmm3\n    psllq   xmm4, xmm0\n    pshufd  xmm3, xmm3, 0x4e\n    por     xmm4, xmm2\n    movq    [r10+r8*8+8], xmm4\n    movhpd  [r10+r8*8+16], xmm4\n    sub     r8, 4\n    jg      .5\n.6: cmp     r8, -1\n    je      .9\n    jg      .8\n    jp      .10\n.7:\tpxor    xmm2, xmm2\n    psllq   xmm5, xmm0\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    movhpd  [r10+r8*8+32], xmm5\n    ret\n\n    xalign  16\n.8:\tmovdqa  xmm2, [rdx+r8*8+8]\n    movdqa  xmm4, xmm2\n    psllq   xmm5, xmm0\n    psrlq   xmm2, xmm1\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    pshufd  xmm2, xmm2, 0x4e\n    movhpd  [r10+r8*8+32], xmm5\n    movq    xmm3, [rdx+r8*8]\n    pshufd  xmm3, xmm3, 0x4e\n    movdqa  xmm5, xmm3\n    psrlq   xmm3, xmm1\n    movhlps xmm2, xmm3\n    psllq   xmm4, xmm0\n    por     xmm4, xmm2\n    movq    [r10+r8*8+8], xmm4\n    movhpd  [r10+r8*8+16], xmm4\n    psllq   xmm5, xmm0\n    movhpd  [r10+r8*8], xmm5\n    ret\n\n    xalign  16\n.9:\tmovdqa  xmm2, [rdx+r8*8+8]\n    movdqa  xmm4, xmm2\n    psllq   xmm5, xmm0\n    psrlq   xmm2, xmm1\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    pshufd  xmm2, xmm2, 0x4e\n    movhpd  [r10+r8*8+32], xmm5\n    pxor    xmm3, xmm3\n    movhlps xmm2, xmm3\n    psllq   xmm4, xmm0\n    por     xmm4, xmm2\n    movq    [r10+r8*8+8], xmm4\n    movhpd  [r10+r8*8+16], xmm4\n    ret\n\n    xalign  16\n.10:movq    xmm2, [rdx+r8*8+16]\n    pshufd  xmm2, xmm2, 0x4e\n    movdqa  xmm4, xmm2\n    psllq   xmm5, xmm0\n    psrlq   xmm2, xmm1\n    movhlps xmm3, xmm2\n    por     xmm5, xmm3\n    movq    [r10+r8*8+24], xmm5\n    movhpd  [r10+r8*8+32], xmm5\n    psllq   xmm4, xmm0\n    movhpd  [r10+r8*8+16], xmm4\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/lshift1.asm",
    "content": "; PROLOGUE(mpn_lshift1)\n\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_lshift1(mp_ptr, mp_ptr, mp_size_t)\n;  rax                      rdi     rsi        rdx\n;  rax                      rcx     rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_lshift1\n    mov     rax, r8\n\tand     r8, 7\n\tinc     r8\n\tmov     [rsp+0x18], r8\n\tshr     rax, 3\n\tcmp     rax, 0\n\tjz      .2\n\t\n\txalign  16\n.1:\tmov     r8, [rdx]\n\tmov     r9, [rdx+8]\n\tmov     r10, [rdx+16]\n\tmov     r11, [rdx+24]\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tmov     [rcx], r8\n\tmov     [rcx+8], r9\n\tmov     [rcx+16], r10\n\tmov     [rcx+24], r11\n\tmov     r8, [rdx+32]\n\tmov     r9, [rdx+40]\n\tmov     r10, [rdx+48]\n\tmov     r11, [rdx+56]\n\tadc     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tmov     [rcx+32], r8\n\tmov     [rcx+40], r9\n\tmov     [rcx+48], r10\n\tmov     [rcx+56], r11\n\tlea     rcx, [rcx+64]\n\tdec     rax\n\tlea     rdx, [rdx+64]\n\tjnz     .1\n.2:\tmov     rax, [rsp+0x18]\n\tdec     rax\n\tjz      .3\n;\tCould still have cache-bank conflicts in this tail part\n\tmov     r8, [rdx]\n\tadc     r8, r8\n\tmov     [rcx], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+8]\n\tadc     r8, r8\n\tmov     [rcx+8], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+16]\n\tadc     r8, r8\n\tmov     [rcx+16], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+24]\n\tadc     r8, r8\n\tmov     [rcx+24], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+32]\n\tadc     r8, r8\n\tmov     [rcx+32], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+40]\n\tadc     r8, r8\n\tmov     [rcx+40], r8\n\tdec     rax\n\tjz      .3\n\tmov     r8, [rdx+48]\n\tadc     r8, r8\n\tmov     [rcx+48], r8\n.3:\tsbb     rax, rax\n\tneg     rax\n\tret\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/skylake/lshiftc.asm",
    "content": "; PROLOGUE(mpn_lshiftc)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void lshiftc(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;                  rdi     rsi        rdx      rcx\n;                  rcx     rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n    LEAF_PROC mpn_lshiftc\n\tmov     r9d, r9d\n\tmovq    mm0, r9\n\tmov     rax, 64\n\tsub     rax, r9\n\tpcmpeqb mm6, mm6\n\tmovq    mm1, rax\n\tlea     rdx, [rdx+8]\n\tlea     rcx, [rcx+8]\n\tsub     r8, 5\n\tmovq    mm5, [rdx+r8*8+24]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tmovq    rax, mm5\n\tpsllq   mm3, mm0\n\tjc      .2\n\t\n\txalign  16\n.1: movq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    mm2, [rdx+r8*8]\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tmovq    mm4, mm2\n\tmovq    mm5, [rdx+r8*8-8]\n\tsub     r8, 4\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+40], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+32], mm4\n\tpsllq   mm3, mm0\n\tjnc     .1\n.2: cmp     r8, -2\n\tjz      .4\n\tjp      .5\n\tjs      .6\n.3:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tmovq    mm2, [rdx+r8*8]\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+8], mm3\n\tpsllq   mm4, mm0\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8], mm4\n\temms\n\tret\n\n\txalign  16\n.4:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tmovq    mm5, [rdx+r8*8+8]\n\tmovq    mm3, mm5\n\tpsrlq   mm5, mm1\n\tpor     mm4, mm5\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\tpsllq   mm3, mm0\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+8], mm3\n\temms\n\tret\n\n\txalign  16\n.5:\tmovq    mm2, [rdx+r8*8+16]\n\tmovq    mm4, mm2\n\tpsrlq   mm2, mm1\n\tpor     mm3, mm2\n\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\tpsllq   mm4, mm0\n\tpxor    mm4, mm6\n\tmovq    [rcx+r8*8+16], mm4\n\temms\n\tret\n\n\txalign  16\n.6:\tpxor    mm3, mm6\n\tmovq    [rcx+r8*8+24], mm3\n\temms\n\tret\n\t\n"
  },
  {
    "path": "mpn/x86_64w/skylake/mod_1_1.asm",
    "content": "; PROLOGUE(mpn_mod_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_1(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r13\n\n    FRAME_PROC mpn_mod_1_1, 0, reg_save_list\n    mov     rsi, rdx\n    mov     rdx, r8\n    \n\tmov     r13, [rsi+rdx*8-8]\n\tmov     rax, [rsi+rdx*8-16]\n\tmov     r8, [r9]\n\tmov     r9, [r9+8]\n\tmov     rdi, rdx\n\tsub     rdi, 2\n\t\n\txalign  16\n.1:\tmov     r10, [rsi+rdi*8-8]\n\tmul     r8\n\tadd     r10, rax\n\tmov     r11, 0\n\tadc     r11, rdx\n\tmov     rax, r13\n\tmul     r9\n\tadd     rax, r10\n\tmov     r13, r11\n\tadc     r13, rdx\n\tdec     rdi\n\tjnz     .1\n\n\tmov     [rcx], rax\n\tmov     rax, r8\n\tmul     r13\n\tadd     [rcx], rax\n\tadc     rdx, 0\n\tmov     [rcx+8], rdx\n\tEND_PROC reg_save_list\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/skylake/mod_1_2.asm",
    "content": "; PROLOGUE(mpn_mod_1_2)\n\n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_2(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n        FRAME_PROC mpn_mod_1_2, 0, reg_save_list\n        mov     rdi, rcx\n        mov     rsi, rdx\n        mov     rdx, r8\n\n        mov     r14, [rsi+rdx*8-8]\n        mov     r13, [rsi+rdx*8-16]\n        mov     r8, [r9]\n        mov     r10, [r9+16]\n        mov     r9, [r9+8]\n        mov     rcx, rdx\n        mov     rax, [rsi+rdx*8-24]\n        mul     r8\n        mov     r11, [rsi+rcx*8-32]\n        xor     r12, r12\n        sub     rcx, 6\n        jc      .2\n    \n        align   16\n.1:     add     r11, rax\n        adc     r12, rdx\n        mov     rax, r9\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r10\n        mul     r14\n        add     r13, rax\n        mov     rax, [rsi+rcx*8+8]\n        mov     r14, r12\n        adc     r14, rdx\n        mul     r8\n        mov     r12d, 0\n        mov     r11, [rsi+rcx*8+0]\n        sub     rcx, 2\n        jnc     .1\n.2:     add     r11, rax\n        adc     r12, rdx\n        mov     rax, r9\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r10\n        mul     r14\n        add     r13, rax\n        mov     r14, r12\n        adc     r14, rdx\n        cmp     rcx, -2\n        je      .4\n.3:     mov     r11, [rsi+rcx*8+8]\n        xor     r12, r12\n        mov     rax, r8\n        mul     r13\n        add     r11, rax\n        adc     r12, rdx\n        mov     r13, r11\n        mov     rax, r9\n        mul     r14\n        add     r13, rax\n        mov     r14, r12\n        adc     r14, rdx\n.4:     mov     rax, r8\n        mul     r14\n        add     r13, rax\n        adc     rdx, 0\n        mov     [rdi], r13\n        mov     [rdi+8], rdx\n    \tEND_PROC reg_save_list\n\t\n\t    end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/mod_1_3.asm",
    "content": "; PROLOGUE(mpn_mod_1_3)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mod_1_3(mp_ptr, mp_ptr, mp_size_t, mp_ptr)\n;  rax                       rdi     rsi        rdx     rcx\n;  rax                       rcx     rdx         r8      r9\n\n;\t(rdi,2)= not fully reduced remainder of (rsi,rdx) / divisor , and top limb <d\n;\twhere (rcx,4)  contains B^i % divisor\n\n%include 'yasm_mac.inc'\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14, r15\n\n    FRAME_PROC mpn_mod_1_3, 0, reg_save_list\n    mov     rsi, rdx\n    mov     rdi, r8\n\tmov     r15, [rsi+rdi*8-8]\n\tmov     r14, [rsi+rdi*8-16]\n\tmov     rax, [rsi+rdi*8-32]\n\tmov     r12, [rsi+rdi*8-40]\n\tmov     r8, [r9]\n\tmov     r10, [r9+16]\n\tmov     r11, [r9+24]\n\tmov     r9, [r9+8]\n\tsub     rdi, 8\n\tjc      .2\n\t\n; // r15 r14 -8() -16()=rax -24()=r12\n\txalign  16\n.1:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+0]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+8]\n\tadc     r15, rdx\n\tsub     rdi, 3\n\tjnc     .1\n\n; // we have loaded up the next two limbs\n; // but because they are out of order we can have to do 3 limbs min\n.2:\tcmp     rdi, -2\n\tjl      .5\n\tje      .4\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n.3:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+8]\n\tmov     r15, r13\n\tmov     rax, [rsi+rdi*8+16]\n\tadc     r15, rdx\n\t; // r15 r14 rax r12\n\tmov     r13, 0\n\tmul     r8\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\n\t; //two more limbs is 4 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12\n\txalign  16\n.4:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     r14, r12\n\tmov     rax, r11\n\tmul     r15\n\tadd     r14, rax\n\tmov     r12, [rsi+rdi*8+16]\n\tmov     r15, r13\n\tadc     r15, rdx\n\t; // r15 r14 r12\n\tmov     r13, 0\n\tmov     rax, r8\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r9\n\tmul     r15\n\tadd     r12, rax\n\tadc     r13, rdx\n\t; // r13 r12\n\tmov     rax, r8\n\tmul     r13\n\tjmp     .6\n\t\n\t; // one more is 3 limbs\n\t; // r15 r14 40() 8+24()=rax 0+24()=r12 \n\txalign  16\n.5:\tmul     r8\n\tadd     r12, rax\n\tmov     rax, [rsi+rdi*8+40]\n\tmov     r13, 0\n\tadc     r13, rdx\n\tmul     r9\n\tadd     r12, rax\n\tnop\n\tadc     r13, rdx\n\tmov     rax, r10\n\tmul     r14\n\tadd     r12, rax\n\tadc     r13, rdx\n\tmov     rax, r11\n\tmul     r15\n\tadd     r12, rax\n\tmov     r15, r13\n\tadc     r15, rdx\n\tmov     rax, r8\n\tmul     r15\n.6:\tadd     r12, rax\n\tadc     rdx, 0\n\tmov     [rcx], r12\n\tmov     [rcx+8], rdx\n    END_PROC reg_save_list\n    \n    end\n    \n"
  },
  {
    "path": "mpn/x86_64w/skylake/mul_1.asm",
    "content": "; PROLOGUE(mpn_mul_1)\n\n;  Copyright 2011 The Code Cavern  \n;\n;  Windows Conversion Copyright 2011 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_mul_1c(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx         r8\n;  rax                     rcx     rdx         r8         r9   [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12\n\n        LEAF_PROC mpn_mul_1\n        mov     rax, [rdx]\n        cmp     r8, 1\n        jne     .1\n        mul     r9\n        mov     [rcx], rax\n        mov     rax, rdx\n        ret\n\n.1:     FRAME_PROC ?mpn_sandybridge_mul, 0, reg_save_list\n        mov     r11, 5\n        lea     rsi, [rdx+r8*8-40]\n        lea     rdi, [rcx+r8*8-40]\n        mov     rcx, r9\n        sub     r11, r8\n        mul     rcx\n        db      0x26\n        mov     r8, rax\n        db      0x26\n        mov     rax, [rsi+r11*8+8]\n        db      0x26\n        mov     r9, rdx\n        db      0x26\n        cmp     r11, 0\n        db      0x26\n        mov     [rsp-8], r12\n        db      0x26\n        jge     .2\n.1:     xor     r10, r10\n        mul     rcx\n        mov     [rdi+r11*8], r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+r11*8+16]\n        mul     rcx\n        mov     [rdi+r11*8+8], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+r11*8+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        mov     [rdi+r11*8+16], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+r11*8+32]\n        mul     rcx\n        mov     [rdi+r11*8+24], r12\n        add     r8, rax\n        adc     r9, rdx\n        add     r11, 4\n        mov     rax, [rsi+r11*8+8]\n        jnc     .1\n.2:     xor     r10, r10\n        mul     rcx\n        mov     [rdi+r11*8], r8\n        add     r9, rax\n        adc     r10, rdx\n        cmp     r11, 2\n        ja      .5\n        jz      .4\n        jp      .3\n        mov     rax, [rsi+16]\n        mul     rcx\n        mov     [rdi+8], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+24]\n        xor     r8, r8\n        xor     r9, r9\n        mul     rcx\n        mov     [rdi+16], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     rax, [rsi+32]\n        mul     rcx\n        mov     [rdi+24], r12\n        add     r8, rax\n        adc     r9, rdx\n        mov     [rdi+32], r8\n        mov     rax, r9\n        EXIT_PROC   reg_save_list\n\n.3:     mov     rax, [rsi+24]\n        mul     rcx\n        mov     [rdi+16], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     rax, [rsi+32]\n        xor     r8, r8\n        mul     rcx\n        mov     [rdi+24], r10\n        add     r12, rax\n        adc     r8, rdx\n        mov     [rdi+32], r12\n        mov     rax, r8\n        EXIT_PROC   reg_save_list\n\n        align   16\n.4:     mov     rax, [rsi+32]\n        mul     rcx\n        mov     [rdi+24], r9\n        add     r10, rax\n        mov     r12d, 0\n        adc     r12, rdx\n        mov     [rdi+32], r10\n        mov     rax, r12\n        EXIT_PROC   reg_save_list\n\n.5:     mov     [rdi+32], r9\n        mov     rax, r10\n        END_PROC   reg_save_list\n\n        end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/mul_2.asm",
    "content": "; PROLOGUE(mpn_mul_2)\n;  X86_64 mpn_mul_2\n;\n;  Copyright 2010 Jason Moxham\n;\n;  Windows Conversion Copyright 2010 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_mul_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                     rdi     rsi        rdx        rcx\n;  rax                     rcx     rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list rsi, rdi, rbx\n\n        CPU  nehalem\n        BITS 64\n\n    \tFRAME_PROC mpn_mul_2, 0, reg_save_list\n        mov     rbx, 3\n        lea     rdi, [rcx+r8*8-24]\n        lea     rsi, [rdx+r8*8-24]\n        sub     rbx, r8\n        mov     r8, [r9+8]\n        mov     rcx, [r9]\n\n        mov     r11, 0\n        mov     r9, 0\n        mov     rax, [rsi+rbx*8]\n        mov     r10, 0\n        mul     rcx\n        add     r11, rax\n        mov     rax, [rsi+rbx*8]\n        mov     [rdi+rbx*8], r11\n        adc     r9, rdx\n        cmp     rbx, 0\n        jge     .2\n\n        align   16\n.1:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+rbx*8+8]\n        mov     r11, 0\n        mul     rcx\n        add     r9, rax\n        adc     r10, rdx\n        adc     r11, 0\n        mov     rax, [rsi+rbx*8+8]\n        mul     r8\n        add     r10, rax\n        mov     rax, [rsi+rbx*8+16]\n        adc     r11, rdx\n        mul     rcx\n        add     r10, rax\n        mov     [rdi+rbx*8+8], r9\n        adc     r11, rdx\n        mov     r9, 0\n        mov     rax, [rsi+rbx*8+16]\n        adc     r9, 0\n        mul     r8\n        add     r11, rax\n        mov     [rdi+rbx*8+16], r10\n        mov     rax, [rsi+rbx*8+24]\n        mov     r10, 0\n        adc     r9, rdx\n        mul     rcx\n        add     r11, rax\n        mov     rax, [rsi+rbx*8+24]\n        mov     [rdi+rbx*8+24], r11\n        adc     r9, rdx\n        adc     r10, 0\n        add     rbx, 3\n        jnc     .1\n.2:     cmp     rbx, 1\n        ja      .5\n        je      .4\n.3:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+rbx*8+8]\n        mov     r11, 0\n        mul     rcx\n        add     r9, rax\n        adc     r10, rdx\n        adc     r11, 0\n        mov     rax, [rsi+rbx*8+8]\n        mul     r8\n        add     r10, rax\n        mov     rax, [rsi+rbx*8+16]\n        adc     r11, rdx\n        mul     rcx\n        add     r10, rax\n        mov     [rdi+rbx*8+8], r9\n        adc     r11, rdx\n        mov     r9, 0\n        mov     rax, [rsi+rbx*8+16]\n        adc     r9, 0\n        mul     r8\n        add     r11, rax\n        mov     [rdi+rbx*8+16], r10\n        adc     r9, rdx\n        mov     [rdi+rbx*8+24], r11\n        mov     rax, r9\n        EXIT_PROC reg_save_list\n\n.4:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     rax, [rsi+rbx*8+8]\n        mov     r11, 0\n        mul     rcx\n        add     r9, rax\n        adc     r10, rdx\n        adc     r11, 0\n        mov     rax, [rsi+rbx*8+8]\n        mul     r8\n        add     r10, rax\n        adc     r11, rdx\n        mov     [rdi+rbx*8+8], r9\n        mov     [rdi+rbx*8+16], r10\n        mov     rax, r11\n        EXIT_PROC reg_save_list\n\n.5:     mul     r8\n        add     r9, rax\n        adc     r10, rdx\n        mov     [rdi+rbx*8+8], r9\n        mov     rax, r10\n        END_PROC reg_save_list\n        \n        end    \n        \n"
  },
  {
    "path": "mpn/x86_64w/skylake/mul_basecase.asm",
    "content": ";  AMD64 mpn_mul_basecase optimised for Intel Haswell.\n\n;  Contributed to the GNU project by Torbjörn Granlund.\n\n;  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;\n;  mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)\n;  rax                           rdi     rsi        rdx     rcx         r8\n;  rax                           rcx     rdx         r8      r9   [rsp+40]\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rbx, rbp, rsi, rdi, r12, r13, r14, r15 \n\n%define rp       rdi\n%define up       rsi\n%define un_param rdx\n%define vp       rcx\n\n%define un       rbx\n%define un8      bl\n\n%define w0       r10\n%define w1       r11\n%define w2       r12\n%define w3       r13\n%define n        rbp\n\nBITS    64\n\nalign 16\n\nFRAME_PROC mpn_mul_basecase, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8\n    mov     rcx, r9\n    mov     r8, [rsp+stack_use+40]\n\t\n    mov \tun, rdx\n\tneg \tun\n\tmov \tn, rdx\n\tsar \tn, 2\n\n\ttest \tr8b, 1\n\tjz \t.Ldo_mul_2\n\tmov \trdx, [vp]\n\n.Ldo_mul_1:\n\ttest \tun8, 1\n\tjnz \t.Lm1x1\n\n.Lm1x0:\n\ttest \tun8, 2\n\tjnz \t.Lm110\n\n.Lm100:\n\tmulx \tw2, r14, [up]\n\tmulx \tw3, w1, [up+8]\n\tlea \trp, [rp-24]\n\tjmp \t.Lm1l0\n\n.Lm110:\n\tmulx \tr9, w3, [up]\n\tmulx \tr14, w1, [up+8]\n\tlea \trp, [rp-8]\n\ttest \tn, n\n\tjz \t.Lcj2\n\tmulx \tw2, w0, [up+16]\n\tlea \tup, [up+16]\n\tjmp \t.Lm1l2\n\n.Lm1x1:\n\ttest \tun8, 2\n\tjz \t.Lm111\n\n.Lm101:\n\tmulx \tr14, r9, [up]\n\tlea \trp, [rp-16]\n\ttest \tn, n\n\tjz \t.Lcj1\n\tmulx \tw2, w0, [up+8]\n\tlea \tup, [up+8]\n\tjmp \t.Lm1l1\n\n.Lm111:\n\tmulx \tw3, w2, [up]\n\tmulx \tr9, w0, [up+8]\n\tmulx \tr14, w1, [up+16]\n\tlea \tup, [up+24]\n\ttest \tn, n\n\tjnz \t.Lgt3\n\tadd \tw3, w0\n\tjmp \t.Lcj3\n\n.Lgt3:\n\tadd \tw3, w0\n\tjmp \t.Lm1l3\n\n\n\talign 32\n.Lm1tp:\n\tlea \trp, [rp+32]\n.Lm1l3:\n\tmov \t[rp], w2\n\tmulx \tw2, w0, [up]\n.Lm1l2:\n\tmov \t[rp+8], w3\n\tadc \tr9, w1\n.Lm1l1:\n\tadc \tr14, w0\n\tmov \t[rp+16], r9\n\tmulx \tw3, w1, [up+8]\n.Lm1l0:\n\tmov \t[rp+24], r14\n\tmulx \tr9, w0, [up+16]\n\tadc \tw2, w1\n\tmulx \tr14, w1, [up+24]\n\tadc \tw3, w0\n\tlea \tup, [up+32]\n\tdec \tn\n\tjnz \t.Lm1tp\n\n.Lm1ed:\n\tlea \trp, [rp+32]\n.Lcj3:\n\tmov \t[rp], w2\n.Lcj2:\n\tmov \t[rp+8], w3\n\tadc \tr9, w1\n.Lcj1:\n\tmov \t[rp+16], r9\n\tadc \tr14, 0\n\tmov \t[rp+24], r14\n\n\tdec \tr8d\n\tjz \t.Lret5\n\n\tlea \tvp, [vp+8]\n\tlea \trp, [rp+32]\n\tjmp \t.Ldo_addmul\n\n.Ldo_mul_2:\n\tmov \tr9, [vp]\n\tmov \tr14, [vp+8]\n\tlea \tn, [un]\n\tsar \tn, 2\n\ttest \tun8, 1\n\tjnz \t.Lm2x1\n\n.Lm2x0:\n\txor \tw0, w0\n\ttest \tun8, 2\n\tmov \trdx, [up]\n\tmulx \tw1, w2, r9\n\tjz \t.Lm2l0\n\n.Lm210:\n\tlea \trp, [rp-16]\n\tlea \tup, [up-16]\n\tjmp \t.Lm2l2\n\n.Lm2x1:\n\txor \tw2, w2\n\ttest \tun8, 2\n\tmov \trdx, [up]\n\tmulx \tw3, w0, r9\n\tjz \t.Lm211\n\n.Lm201:\n\tlea \trp, [rp-24]\n\tlea \tup, [up+8]\n\tjmp \t.Lm2l1\n\n.Lm211:\n\tlea \trp, [rp-8]\n\tlea \tup, [up-8]\n\tjmp \t.Lm2l3\n\n\n\talign 16\n.Lm2tp:\n\tmulx \tw0, rax, r14\n\tadd \tw2, rax\n\tmov \trdx, [up]\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n\tadd \tw2, rax\n\tadc \tw1, 0\n\tadd \tw2, w3\n.Lm2l0:\n\tmov \t[rp], w2\n\tadc \tw1, 0\n\tmulx \tw2, rax, r14\n\tadd \tw0, rax\n\tmov \trdx, [up+8]\n\tadc \tw2, 0\n\tmulx \tw3, rax, r9\n\tadd \tw0, rax\n\tadc \tw3, 0\n\tadd \tw0, w1\n.Lm2l3:\n\tmov \t[rp+8], w0\n\tadc \tw3, 0\n\tmulx \tw0, rax, r14\n\tadd \tw2, rax\n\tmov \trdx, [up+16]\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n\tadd \tw2, rax\n\tadc \tw1, 0\n\tadd \tw2, w3\n.Lm2l2:\n\tmov \t[rp+16], w2\n\tadc \tw1, 0\n\tmulx \tw2, rax, r14\n\tadd \tw0, rax\n\tmov \trdx, [up+24]\n\tadc \tw2, 0\n\tmulx \tw3, rax, r9\n\tadd \tw0, rax\n\tadc \tw3, 0\n\tadd \tw0, w1\n\tlea \tup, [up+32]\n.Lm2l1:\n\tmov \t[rp+24], w0\n\tadc \tw3, 0\n\tinc \tn\n\tlea \trp, [rp+32]\n\tjnz \t.Lm2tp\n\n.Lm2ed:\n\tmulx \trax, rdx, r14\n\tadd \tw2, rdx\n\tadc \trax, 0\n\tadd \tw2, w3\n\tmov \t[rp], w2\n\tadc \trax, 0\n\tmov \t[rp+8], rax\n\tadd \tr8d, -2\n\tjz \t.Lret5\n\tlea \tvp, [vp+16]\n\tlea \trp, [rp+16]\n\n.Ldo_addmul:\n\tmov     [rsp+stack_use+8], r8\n\tlea \trp, [un*8+rp]\n\tlea \tup, [un*8+up]\n\n.Louter:\n\tmov \tr9, [vp]\n\tmov \tr8, [vp+8]\n\tlea \tn, [un+2]\n\tsar \tn, 2\n\tmov \trdx, [up]\n\ttest \tun8, 1\n\tjnz \t.Lbx1\n\n.Lbx0:\n\tmov \tr14, [rp]\n\tmov \tr15, [rp+8]\n\tmulx \tw1, rax, r9\n\tadd \tr14, rax\n\tmulx \tw2, rax, r8\n\tadc \tw1, 0\n\tmov \t[rp], r14\n\tadd \tr15, rax\n\tadc \tw2, 0\n\tmov \trdx, [up+8]\n\ttest \tun8, 2\n\tjnz \t.Lb10\n\n.Lb00:\n\tlea \tup, [up+16]\n\tlea \trp, [rp+16]\n\tjmp \t.Llo0\n\n.Lb10:\n\tmov \tr14, [rp+16]\n\tlea \tup, [up+32]\n\tmulx \tw3, rax, r9\n\tjmp \t.Llo2\n\n.Lbx1:\n\tmov \tr15, [rp]\n\tmov \tr14, [rp+8]\n\tmulx \tw3, rax, r9\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmulx \tw0, rax, r8\n\tadd \tr14, rax\n\tadc \tw0, 0\n\tmov \trdx, [up+8]\n\tmov \t[rp], r15\n\tmulx \tw1, rax, r9\n\ttest \tun8, 2\n\tjz \t.Lb11\n\n.Lb01:\n\tmov \tr15, [rp+16]\n\tlea \trp, [rp+24]\n\tlea \tup, [up+24]\n\tjmp \t.Llo1\n\n.Lb11:\n\tlea \trp, [rp+8]\n\tlea \tup, [up+8]\n\tjmp \t.Llo3\n\n\n\talign 16\n.Ltop:\n\tmulx \tw3, rax, r9\n\tadd \tr15, w0\n\tadc \tw2, 0\n.Llo2:\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmulx \tw0, rax, r8\n\tadd \tr14, rax\n\tadc \tw0, 0\n\tlea \trp, [rp+32]\n\tadd \tr15, w1\n\tmov \trdx, [up-16]\n\tmov \t[rp-24], r15\n\tadc \tw3, 0\n\tadd \tr14, w2\n\tmov \tr15, [rp-8]\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n.Llo1:\n\tadd \tr14, rax\n\tmulx \tw2, rax, r8\n\tadc \tw1, 0\n\tadd \tr14, w3\n\tmov \t[rp-16], r14\n\tadc \tw1, 0\n\tadd \tr15, rax\n\tadc \tw2, 0\n\tadd \tr15, w0\n\tmov \trdx, [up-8]\n\tadc \tw2, 0\n.Llo0:\n\tmulx \tw3, rax, r9\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmov \tr14, [rp]\n\tmulx \tw0, rax, r8\n\tadd \tr14, rax\n\tadc \tw0, 0\n\tadd \tr15, w1\n\tmov \t[rp-8], r15\n\tadc \tw3, 0\n\tmov \trdx, [up]\n\tadd \tr14, w2\n\tmulx \tw1, rax, r9\n\tadc \tw0, 0\n.Llo3:\n\tadd \tr14, rax\n\tadc \tw1, 0\n\tmulx \tw2, rax, r8\n\tadd \tr14, w3\n\tmov \tr15, [rp+8]\n\tmov \t[rp], r14\n\tmov \tr14, [rp+16]\n\tadc \tw1, 0\n\tadd \tr15, rax\n\tadc \tw2, 0\n\tmov \trdx, [up+8]\n\tlea \tup, [up+32]\n\tinc \tn\n\tjnz \t.Ltop\n\n.Lend:\n\tmulx \tw3, rax, r9\n\tadd \tr15, w0\n\tadc \tw2, 0\n\tadd \tr15, rax\n\tadc \tw3, 0\n\tmulx \trax, rdx, r8\n\tadd \tr15, w1\n\tmov \t[rp+8], r15\n\tadc \tw3, 0\n\tadd \trdx, w2\n\tadc \trax, 0\n\tadd \trdx, w3\n\tmov \t[rp+16], rdx\n\tadc \trax, 0\n\tmov \t[rp+24], rax\n\n\tadd \tDWORD [rsp+stack_use+8], -2\n\tlea \tvp, [vp+16]\n\tlea \tup, [up+un*8-16]\n\tlea \trp, [rp+un*8+32]\n\tjnz \t.Louter\n\n\tmov \trax, [rsp+stack_use+8]\n.Lret5:\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/skylake/mullow_n_basecase.asm",
    "content": ";  AMD64 mpn_mullow_n_basecase\n;\n;  Copyright 2015 Free Software Foundation, Inc.\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;\n;void mpn_mullow_n_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n;                                  rdi           rsi           rdx          rcx\n;                                  rcx           rdx            r8           r9\n\n%include 'yasm_mac.inc'\n\n\ttext\n\talign\t32\n\tLEAF_PROC mpn_mullow_n_basecase\n\tcmp     r9, 3\n\tje      asm_sym(?mpn_mullow1)\n\tja      asm_sym(?mpn_mullow2)\n\tmov     rax, [rdx]\n\tcmp     r9, 2\n\tjae     .1\n\n\timul    rax, [r8]\n\tmov     [rcx], rax\n\tret\n\n.1: mov     r10, rdx\n    mov     r9, [r8]\n\tmul     r9\n\tmov     [rcx], rax\n\tmov     rax, [r10+8]\n\timul    rax, r9\n\tadd     rdx, rax\n\tmov     r9, [r8+8]\n\tmov     r11, [r10]\n\timul    r11, r9\n\tadd     rdx, r11\n\tmov     [rcx+8], rdx\n\tret\n\n%define reg_save_list rsi, rdi\n\n\talign\t32\n\tFRAME_PROC ?mpn_mullow1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rcx, r9\n\tmov     rax, [rsi]\n    mov     r9, [r8]\n\tmul     r9\n\tmov     [rdi], rax\n\tmov     r10, rdx\n\tmov     rax, [rsi+8]\n\tmul     r9\n\timul    r9, [rsi+16]\n\tadd     r10, rax\n\tadc     r9, rdx\n\tmov     r11, [r8+8]\n\tmov     rax, [rsi]\n\tmul     r11\n\tadd     r10, rax\n\tadc     r9, rdx\n\timul    r11, [rsi+8]\n\tadd     r9, r11\n\tmov     [rdi+8], r10\n\tmov     r10, [r8+16]\n\tmov     rax, [rsi]\n\timul    r10, rax\n\tadd     r9, r10\n\tmov     [rdi+16], r9\n    END_PROC reg_save_list\n\n%define reg_save_list rsi, rdi, rbx, rbp, r12, r13, r14, r15\n\n\talign\t32\n\tFRAME_PROC ?mpn_mullow2, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rcx, r9\n\tmov     rax, [rsi]\n\tmov     r10, [r8]\n\tlea     rdi, [rdi+rcx*8]\n\tlea     rsi, [rsi+rcx*8]\n\tmov     r9d, 0\n\tsub     r9, rcx\n\tmul     r10\n\tmov     r11, [r8+8]\n\ttest    cl, 1\n\tjnz     .4\n.1:\ttest    cl, 2\n\tjnz     .3\n.2:\tlea     r13, [r9]\n\tmov     [rdi+r9*8], rax\n\tmov     rcx, rdx\n\tmov     rax, [rsi+r9*8]\n\txor     ebp, ebp\n\tjmp     .9\n.3:\tlea     r13, [r9-2]\n\tmov     rbp, rax\n\tmov     rax, [rsi+r9*8]\n\tmov     r12, rdx\n\txor     ebx, ebx\n\tjmp     .11\n.4:\ttest    cl, 2\n\tjnz     .6\n.5:\tlea     r13, [r9+1]\n\tmov     [rdi+r9*8], rax\n\tmov     rax, [rsi+r9*8]\n\tmov     rbx, rdx\n\txor     ecx, ecx\n\tjmp     .8\n.6:\tlea     r13, [r9-1]\n\txor     r12d, r12d\n\tmov     rcx, rax\n\tmov     rbp, rdx\n\tmov     rax, [rsi+r9*8]\n\tjmp     .10\n\n\talign\t16\n.7:\tmul     r10\n\tadd     r12, rax\n\tmov     rax, [rsi+r13*8-8]\n\tmov     [rdi+r13*8-8], r12\n\tadc     rbx, rdx\n\tadc     ecx, 0\n.8:\tmul     r11\n\tadd     rbx, rax\n\tadc     rcx, rdx\n\tmov     ebp, 0\n\tmov     rax, [rsi+r13*8]\n\tmul     r10\n\tadd     rbx, rax\n\tmov     [rdi+r13*8], rbx\n\tadc     rcx, rdx\n\tmov     rax, [rsi+r13*8]\n\tadc     ebp, 0\n.9:\tmul     r11\n\tadd     rcx, rax\n\tadc     rbp, rdx\n\tmov     rax, [rsi+r13*8+8]\n\tmul     r10\n\tmov     r12d, 0\n\tadd     rcx, rax\n\tadc     rbp, rdx\n\tadc     r12d, 0\n\tmov     rax, [rsi+r13*8+8]\n.10:mul     r11\n\tadd     rbp, rax\n\tmov     [rdi+r13*8+8], rcx\n\tadc     r12, rdx\n\tmov     ebx, 0\n\tmov     rax, [rsi+r13*8+16]\n\tmul     r10\n\tadd     rbp, rax\n\tmov     rax, [rsi+r13*8+16]\n\tadc     r12, rdx\n\tadc     ebx, 0\n.11:mul     r11\n\tmov     ecx, 0\n\tadd     r12, rax\n\tmov     rax, [rsi+r13*8+24]\n\tmov     [rdi+r13*8+16], rbp\n\tadc     rbx, rdx\n\tadd     r13, 4\n\tjs      .7\n.12:imul    rax, r10\n\tadd     rax, r12\n\tmov     [rdi-8], rax\n\tadd     r9, 2\n\tlea     r8, [r8+16]\n\tlea     rsi, [rsi-16]\n\tcmp     r9, -2\n\tjge     .26\n.13:mov     r10, [r8]\n\tmov     r11, [r8+8]\n\tmov     rax, [rsi+r9*8]\n\tmul     r10\n\ttest    r9b, 1\n\tjnz     .17\n.14:mov     r15, rax\n\tmov     r14, rdx\n\tmov     rax, [rsi+r9*8]\n\tmul     r11\n\ttest    r9b, 2\n\tjnz     .16\n.15:lea     r13, [r9]\n\tmov     r12, [rdi+r9*8]\n\tmov     rbx, rax\n\tlea     rcx, [rdx]\n\tjmp     .23\n.16:lea     r13, [r9+2]\n\tmov     rcx, [rdi+r9*8]\n\tmov     rbp, rax\n\tmov     rax, [rsi+r9*8+8]\n\tlea     r12, [rdx]\n\tjmp     .21\n.17:mov     r14, rax\n\tmov     r15, rdx\n\tmov     rax, [rsi+r9*8]\n\tmul     r11\n\ttest    r9b, 2\n\tjz      .19\n.18:lea     r13, [r9+1]\n\tlea     rbx, [rdx]\n\tmov     rbp, [rdi+r9*8]\n\tmov     r12, rax\n\tjmp     .22\n.19:lea     r13, [r9-1]\n\tlea     rbp, [rdx]\n\tmov     rcx, rax\n\tmov     rbx, [rdi+r9*8]\n\tmov     rax, [rsi+r9*8+8]\n\tjmp     .24\n\t\n    align\t16\n.20:mul     r11\n\tadd     rcx, rbx\n\tadc     rbp, rax\n\tmov     rax, [rsi+r13*8-8]\n\tlea     r12, [rdx]\n\tadc     r12, 0\n.21:mul     r10\n\tadd     r15, rcx\n\tmov     [rdi+r13*8-16], r15\n\tadc     r14, rax\n\tmov     r15, rdx\n\tadc     r15, 0\n\tmov     rax, [rsi+r13*8-8]\n\tmul     r11\n\tlea     rbx, [rdx]\n\tmov     rcx, [rdi+r13*8-8]\n\tadd     rbp, rcx\n\tadc     r12, rax\n\tadc     rbx, 0\n.22:mov     rax, [rsi+r13*8]\n\tmul     r10\n\tadd     r14, rbp\n\tadc     r15, rax\n\tmov     [rdi+r13*8-8], r14\n\tmov     r14, rdx\n\tadc     r14, 0\n\tmov     rax, [rsi+r13*8]\n\tmov     rbp, [rdi+r13*8]\n\tmul     r11\n\tadd     r12, rbp\n\tadc     rbx, rax\n\tlea     rcx, [rdx]\n\tadc     rcx, 0\n.23:mov     rax, [rsi+r13*8+8]\n\tmul     r10\n\tadd     r15, r12\n\tmov     [rdi+r13*8], r15\n\tadc     r14, rax\n\tmov     r15, rdx\n\tmov     r12, [rdi+r13*8+8]\n\tadc     r15, 0\n\tmov     rax, [rsi+r13*8+8]\n\tmul     r11\n\tadd     rbx, r12\n\tlea     rbp, [rdx]\n\tadc     rcx, rax\n\tmov     rax, [rsi+r13*8+16]\n\tadc     rbp, 0\n.24:mul     r10\n\tadd     r14, rbx\n\tmov     [rdi+r13*8+8], r14\n\tmov     r14, rdx\n\tadc     r15, rax\n\tmov     rax, [rsi+r13*8+16]\n\tmov     rbx, [rdi+r13*8+16]\n\tadc     r14, 0\n\tadd     r13, 4\n\tjnc     .20\n.25:imul    rax, r11\n\tadd     rcx, rbx\n\tadc     rbp, rax\n\tmov     rax, [rsi-8]\n\timul    rax, r10\n\tadd     r15, rcx\n\tmov     [rdi-16], r15\n\tadc     rax, r14\n\tmov     rcx, [rdi-8]\n\tadd     rbp, rcx\n\tadd     rax, rbp\n\tmov     [rdi-8], rax\n\tadd     r9, 2\n\tlea     r8, [r8+16]\n\tlea     rsi, [rsi-16]\n\tcmp     r9, -2\n\tjl      .13\n\tjnz     .27\n.26:mov     r10, [r8]\n\tmov     r11, [r8+8]\n\tmov     rax, [rsi-16]\n\tmul     r10\n\tadd     rax, [rdi-16]\n\tadc     rdx, [rdi-8]\n\tmov     rbx, [rsi-8]\n\timul    rbx, r10\n\tmov     rcx, [rsi-16]\n\timul    rcx, r11\n\tmov     [rdi-16], rax\n\tadd     rcx, rbx\n\tadd     rcx, rdx\n\tmov     [rdi-8], rcx\n    EXIT_PROC reg_save_list\n\n.27:mov     r11, [r8]\n\timul    r11, [rsi-8]\n\tadd     r11, rax\n\tmov     [rdi-8], r11\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/skylake/nand_n.asm",
    "content": "; PROLOGUE(mpn_nand_n)\n\n;  mpn_nand_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_nand_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_nand_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [rdx+r10*8+16-32]\n\tpand    xmm1, xmm3\n\tpand    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\tand     rax, r9\n\tpand    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpand    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tand     rax, r9\n\tnot     rax\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/skylake/nior_n.asm",
    "content": "; PROLOGUE(mpn_nior_n)\n\n;  mpn_nior_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_nior_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_nior_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [rdx+r10*8+16-32]\n\tpor     xmm1, xmm3\n\tpor     xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\tor      rax, r9\n\tpor     xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpor     xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\tor      rax, r9\n\tnot     rax\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/skylake/popcount.asm",
    "content": "; PROLOGUE(mpn_popcount)\n\n;  AMD64 mpn_popcount\n;  Copyright 2009 Jason Moxham\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\tmpn_limb_t mpn_popcount(mp_ptr,mp_size_t)\n;\trax                        rdi,      rsi\n;\teax                        rcx,      rdx\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_popcount\n\tmov     r8, 5\n\tlea     rcx, [rcx+rdx*8-40]\n\txor     eax, eax\n\tsub     r8, rdx\n\tjnc     .1\n\txalign  16\n.0:\n\tpopcnt  r9, [rcx+r8*8]\n\tpopcnt  r10, [rcx+r8*8+8]\n\tpopcnt  r11, [rcx+r8*8+16]\n\tpopcnt  rdx, [rcx+r8*8+24]\n\tadd     rax, r9\n\tadd     rax, rdx\n\tadd     rax, r10\n\tpopcnt  r9, [rcx+r8*8+32]\n\tpopcnt  r10, [rcx+r8*8+40]\n\tadd     rax, r9\n\tadd     rax, r11\n\tadd     rax, r10\n\tadd     r8, 6\n\tjnc     .0\n.1:\n\tlea     rdx, [rel .2]\n\tlea     r8, [r8+r8*8]\n\tadd     rdx, r8\n\tjmp     rdx\n.2:\n\tnop\n\tpopcnt  r9, [rcx]\n\tadd     rax, r9\n.3:\n\tpopcnt  r10, [rcx+8]\n\tadd     rax, r10\n.4:\tpopcnt  r11, [rcx+16]\n\tadd     rax, r11\n.5:\tpopcnt  rdx, [rcx+24]\n\tadd     rax, rdx\n.6:\tpopcnt  r9, [rcx+32]\n\tadd     rax, r9\n.7:\tret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/redc_1.asm",
    "content": "; PROLOGUE(mpn_redc_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  void mpn_redc_1(mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)\n;  rax                rdi     rsi        rdx        rcx        r8\n;  rax                rcx     rdx         r8         r9  [rsp+40]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14, r15\n\n    CPU  Core2\n    BITS 64\n\n%macro mpn_add 0\n\n    mov     rax, rcx\n    and     rax, 3\n    shr     rcx, 2\n    cmp     rcx, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    add     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n    jmp     %%2\n\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    adc     r11, [rdx]\n    adc     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rdi], r11\n    mov     [rdi+8], r8\n    lea     rdi, [rdi+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    adc     r9, [rdx-16]\n    adc     r10, [rdx-8]\n    mov     [rdi-16], r9\n    dec     rcx\n    mov     [rdi-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    adc     r11, [rdx]\n    mov     [rdi], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    adc     r11, [rdx+8]\n    mov     [rdi+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    adc     r11, [rdx+16]\n    mov     [rdi+16], r11\n%%2:\n\n%endmacro\n\n%macro mpn_sub 0\n\n    mov     rax, rbp\n    and     rax, 3\n    shr     rbp, 2\n    cmp     rbp, 0\n    jnz     %%1\n    mov     r11, [rsi]\n    sub     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n    jmp     %%2\n    xalign  16\n%%1:mov     r11, [rsi]\n    mov     r8, [rsi+8]\n    lea     rsi, [rsi+32]\n    sbb     r11, [rdx]\n    sbb     r8, [rdx+8]\n    lea     rdx, [rdx+32]\n    mov     [rbx], r11\n    mov     [rbx+8], r8\n    lea     rbx, [rbx+32]\n    mov     r9, [rsi-16]\n    mov     r10, [rsi-8]\n    sbb     r9, [rdx-16]\n    sbb     r10, [rdx-8]\n    mov     [rbx-16], r9\n    dec     rbp\n    mov     [rbx-8], r10\n    jnz     %%1\n    inc     rax\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi]\n    sbb     r11, [rdx]\n    mov     [rbx], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+8]\n    sbb     r11, [rdx+8]\n    mov     [rbx+8], r11\n    dec     rax\n    jz      %%2\n    mov     r11, [rsi+16]\n    sbb     r11, [rdx+16]\n    mov     [rbx+16], r11\n%%2:\n\n%endmacro\n\n%macro addmulloop 1\n\n    xalign  16\n%%1:mov     r10, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    db      0x26\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12, 0\n    mov     r9, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    db      0x26\n    adc     rbx, rax\n    db      0x26\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    db      0x26\n    adc     r12, rax\n    db      0x26\n    adc     r9, rdx\n    add     r11, 4\n    mov     rax, [rsi+r11*8+8]\n    jnc     %%1\n\n%endmacro\n\n%macro addmulpropro0 0\n\n    imul    r13, rcx\n    lea     r8, [r8-8]\n\n%endmacro\n\n%macro addmulpro0 0\n\n    mov     r11, r14\n    lea     r8, [r8+8]\n    mov     rax, [rsi+r14*8]\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext0 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mov     r9d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    mov     rax, [rsi+r11*8+32]\n    mul     r13\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, rax\n    adc     r9, rdx\n    imul    r13, rcx\n    add     [r8+r11*8+32], r12\n    adc     r9, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r9\n\n%endmacro\n\n%macro addmulpropro1 0\n\n%endmacro\n\n%macro addmulpro1 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext1 0\n\n    mov     r10d, 0\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     rax, [rsi+r11*8+24]\n    mov     r12d, 0\n    mul     r13\n    add     [r8+r11*8+16], r10\n    adc     rbx, rax\n    adc     r12, rdx\n    add     [r8+r11*8+24], rbx\n    mov     r13, [r8+r14*8+8]\n    adc     r12, 0\n    sub     r15, 1          ; ***\n    mov     [r8+r14*8], r12\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro2 0\n\n%endmacro\n\n%macro addmulpro2 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext2 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    mov     rax, [rsi+r11*8+16]\n    mul     r13\n    add     [r8+r11*8+8], r9\n    adc     r10, rax\n    mov     ebx, 0\n    adc     rbx, rdx\n    mov     r13, [r8+r14*8+8]\n    add     [r8+r11*8+16], r10\n    adc     rbx, 0\n    mov     [r8+r14*8], rbx\n    sub     r15, 1          ; ***\n    lea     r8, [r8+8]\n\n%endmacro\n\n%macro addmulpropro3 0\n\n%endmacro\n\n%macro addmulpro3 0\n\n    imul    r13, rcx\n    mov     rax, [rsi+r14*8]\n    mov     r11, r14\n    mul     r13\n    mov     r12, rax\n    mov     rax, [rsi+r14*8+8]\n    mov     r9, rdx\n    cmp     r14, 0\n\n%endmacro\n\n%macro addmulnext3 0\n\n    mul     r13\n    add     [r8+r11*8], r12\n    adc     r9, rax\n    mov     r10d, 0\n    adc     r10, rdx\n    add     [r8+r11*8+8], r9\n    adc     r10, 0\n    mov     r13, [r8+r14*8+8]\n    mov     [r8+r14*8], r10\n    lea     r8, [r8+8]\n    sub     r15, 1          ; ***\n\n%endmacro\n\n%macro mpn_addmul_1_int 1\n\n    addmulpropro%1\n    xalign  16\n%%1:addmulpro%1\n    jge     %%2\n    addmulloop %1\n%%2:addmulnext%1\n    jnz     %%1\n\n%endmacro\n\n    LEAF_PROC mpn_redc_1\n    cmp     r9, 1\n    je      one\n    FRAME_PROC ?mpn_sandybridge_redc_1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, r8\n    mov     r8, rdx\n    mov     rdx, r9\n    mov     rcx, [rsp+stack_use+0x28]\n    mov     [rsp+stack_use+0x28], r8\n\n    mov     r14, 5\n    sub     r14, rdx\n\n    mov     [rsp+stack_use+0x10], rsi\n    mov     r8, [rsp+stack_use+0x28]\n\n    lea     r8, [r8+rdx*8-40]\n    lea     rsi, [rsi+rdx*8-40]\n    mov     rbp, rdx\n    mov     r15, rdx\n    mov     rax, r14\n    and     rax, 3\n    mov     r13, [r8+r14*8]\n    je      .2\n    jp      .4\n    cmp     rax, 1\n    je      .3\n.1:\tmpn_addmul_1_int 2\n    jmp     .5\n\n    xalign  16\n.2:\tmpn_addmul_1_int 0\n    jmp     .5\n\n    xalign  16\n.3:\tmpn_addmul_1_int 1\n    jmp     .5\n\n    xalign  16\n.4:\tmpn_addmul_1_int 3\n\n    xalign  16\n.5:\tmov     rcx, rbp\n    mov     rdx, [rsp+stack_use+0x28]\n    lea     rsi, [rdx+rbp*8]\n    mov     rbx, rdi\n    mpn_add\n    mov     rdx, [rsp+stack_use+0x10]\n    jnc     .6\n    mov     rsi, rbx\n    mpn_sub\n.6:\tEND_PROC reg_save_list\n\n    xalign  16\none:mov     r9, rdx\n    mov     r11, [r8]\n    mov     r8, [rsp+0x28]\n\n    mov     r10, [r9]\n    imul    r8, r10\n    mov     rax, r8\n    mul     r11\n    add     rax, r10\n    adc     rdx, [r9+8]\n    cmovnc  r11, rax\n    sub     rdx, r11\n    mov     [rcx], rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/rsh_divrem_hensel_qr_1_1.asm",
    "content": "; PROLOGUE(mpn_rsh_divrem_hensel_qr_1_1)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rsh_divrem_hensel_qr_1_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t,  mp_int, mp_limb_t)\n;  rax                                        rdi     rsi        rdx        rcx       r8         r9\n;  rax                                        rcx     rdx         r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi\n\n\tFRAME_PROC mpn_rsh_divrem_hensel_qr_1_1, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8]\n\tlea     rsi, [rdx+rax*8]\n    mov     rcx, r9\n\tmov     rdx, r9\n\tmov     r9, 1\n\tsub     r9, rax\n    movsxd  r8, dword [rsp+stack_use+40]\n    mov     r10, qword [rsp+stack_use+48]\n    \n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, 64\n\tsub     rax, r8\n\tmovq    mm0, r8\n\tmovq    mm1, rax\n\tmov     rax, [rsi+r9*8-8]\n\tsub     rax, r10\n\tsbb     r8, r8\n\timul    rax, r11\n\tmovq    mm4, rax\n\tmovq    mm5, mm4\n\tpsrlq   mm4, mm0\n\tpsllq   mm5, mm1\n\tpsrlq   mm5, mm1\n\tmul     rcx\n\tcmp     r9, 0\n\tje      .3\n\tadd     r8, r8\n\t\n\txalign  16\n.1:\tmovq    mm2, mm4\n\tmov     rax, [rsi+r9*8]\n\tsbb     rax, rdx\n\tsbb     r8, r8\n\timul    rax, r11\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-8], mm2\n\tmul     rcx\n\tadd     r8, r8\n\tinc     r9\n\tjnz     .1\n.2:\tmovq    [rdi+r9*8-8], mm4\n\tmov     rax, 0\n\tadc     rax, rdx\n\temms\n\tEXIT_PROC reg_save_list\n\n.3:\tmovq    [rdi+r9*8-8], mm4\n\tadd     r8, r8\n\tmov     rax, 0\n\tadc     rax, rdx\n.4:\temms\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/rsh_divrem_hensel_qr_1_2.asm",
    "content": "; PROLOGUE(mpn_rsh_divrem_hensel_qr_1_2)\n\n;  Copyright 2009 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rsh_divrem_hensel_qr_1_2(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_uint, mp_limb_t)\n;  rax                                        rdi     rsi        rdx        rcx       r8         r9\n;  rax                                        rcx     rdx         r8         r9 [rsp+40]   [rsp+48]\n\n%include \"yasm_mac.inc\"\n\n    CPU  Athlon64\n    BITS 64\n\n%define reg_save_list rsi, rdi, r12, r13, r14\n\n\tFRAME_PROC mpn_rsh_divrem_hensel_qr_1_2, 0, reg_save_list\n    mov     rax, r8\n\tlea     rdi, [rcx+rax*8-16]\n\tlea     rsi, [rdx+rax*8-16]\n    mov     rcx, r9\n    mov     rdx, r9\n\tmov     r9, 2\n\tsub     r9, rax    \n    mov     r8d, dword [rsp+stack_use+40]\n    mov     r10, qword [rsp+stack_use+48]\n\n\tmov     rax, 64\n\tsub     rax, r8\n\tmovq    mm0, r8\n\tmovq    mm1, rax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    edx, ecx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11d, eax\n\n\tmov     rax, r11\n\timul    r11d, ecx\n\tmov     rdx, 2\n\tsub     rdx, r11\n\timul    edx, eax\n\n\tmov     rax, rdx\n\timul    rdx, rcx\n\tmov     r11, 2\n\tsub     r11, rdx\n\timul    r11, rax\n\n\tmov     rax, r11\n\tmov     r12, r11\n\tmul     rcx\n\tneg     rdx\n\timul    r12, rdx\n\n; // for the first limb we can not store (as we have to shift) so we need to\n; // do first limb separately , we could do it as normal as an extention of\n; // the loop , but if we do it as a 1 limb inverse then we can start it\n; // eailer , ie interleave it with the calculation of the 2limb inverse\n\n\tmov     r13, r11\n\tmov     r14, r12\n\n\tmov     r11, [rsi+r9*8]\n\tsub     r11, r10\n\tsbb     r10, r10\n\n\timul    r11, r13\n\tmovq    mm2, r11\n\tpsrlq   mm2, mm0\n\tmov     rax, rcx\n\tmul     r11\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\tadd     r10, r10\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, r10\n\n; mov $0,%r10\n\tadd     r9, 2\n\tjc      .2\n\txalign  16\n.1:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n\t; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tmov     r11, [rsi+r9*8+8]\n\tmov     r12, [rsi+r9*8+16]\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r12, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r12, 0\n\tsbb     r10, 0\n\tadd     r9, 2\n\tjnc     .1\n.2:\tmov     r8, r12\n\tmov     rax, r13\n\tmul     r11\n\n; mov %rax,-16(%rdi,%r9,8)\t#// store low quotient\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8-16], mm2\n\n\timul    r11, r14\n\timul    r12, r13\n\tadd     rdx, r11\n\tadd     rdx, r12\n\tcmp     r9, 0\n\tjne     .4\n.3:\tmov     r11, [rsi+r9*8+8]\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\n\tmov     rax, rcx\n\tmul     rdx\n\tadd     r10, r10\n\tsbb     r11, 0\n\tsbb     r10, r10\n\tcmp     r8, rax\n\tsbb     r11, rdx\n\tsbb     r10, 0\n\tmov     rax, r11\n\timul    rax, r13\n\t; mov %rax,(%rdi,%r9,8)\n\tmovq    mm3, rax\n\tmovq    mm4, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm4, mm0\n\tpor     mm2, mm3\n\tmovq    [rdi+r9*8], mm2\n\tmovq    [rdi+r9*8+8], mm4\n\n\tmul     rcx\n\tadd     r10, r10\n\tmov     rax, 0\n\tadc     rax, rdx\n\temms\n\tEXIT_PROC reg_save_list\n\n\t; mov %rdx,-8(%rdi,%r9,8)\t#// store high quotient\n.4:\tmovq    mm3, rdx\n\tmovq    mm2, mm3\n\tpsllq   mm3, mm1\n\tpsrlq   mm2, mm0\n\tpor     mm4, mm3\n\tmovq    [rdi+r9*8-8], mm4\n\tmovq    [rdi+r9*8], mm2\n\n\tmov     rax, rcx\n\tmul     rdx\n\tcmp     r8, rax\n\tmov     rax, 0\n\tadc     rax, rdx\n\tsub     rax, r10\n.5:\temms\n\tEND_PROC reg_save_list\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/rshift.asm",
    "content": "; PROLOGUE(mpn_rshift)\n\n;  Version 1.0.4.\n;\n;  Copyright 2008 Jason Moxham\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t  mpn_rshift(mp_ptr, mp_ptr, mp_size_t, mp_uint)\n;  rax                     rdi      rsi        rdx      rcx\n;  rax                     rcx      rdx         r8      r9d\n\n%include \"yasm_mac.inc\"\n\n    CPU  SSE4.2\n    BITS 64\n\n    LEAF_PROC mpn_rshift\n    mov     r10, rcx\n    mov     ecx, r9d\n    cmp     r8, 2\n    ja      .3\n    jz      .2\n.1:\tmov     rdx, [rdx]\n    mov     rax, rdx\n    shr     rdx, cl\n    neg     rcx\n    shl     rax, cl\n    mov     [r10], rdx\n    ret\n\n.2:\tmov     r8, [rdx]\n    mov     r9, [rdx+8]\n    mov     rax, r8\n    mov     r11, r9\n    shr     r8, cl\n    shr     r9, cl\n    neg     rcx\n    shl     r11, cl\n    shl     rax, cl\n    or      r8, r11\n    mov     [r10], r8\n    mov     [r10+8], r9\n    ret\n\n.3:\tmov     r11, rdx\n    mov     rdx, r8\n\n    mov     eax, 64\n    lea     r9, [r11+8]\n    sub     rax, rcx\n    and     r9, -16\n    movq    xmm0, rcx\n    movq    xmm1, rax\n    movdqa  xmm5, [r9]\n    movdqa  xmm3, xmm5\n    psllq   xmm5, xmm1\n    movq    rax, xmm5\n    cmp     r11, r9\n    lea     r11, [r11+rdx*8-40]\n    je      .4\n    movq    xmm2, [r9-8]\n    movq    xmm4, xmm2\n    psllq   xmm2, xmm1\n    psrlq   xmm4, xmm0\n    por     xmm4, xmm5\n    movq    [r10], xmm4\n    lea     r10, [r10+8]\n    dec     rdx\n    movq    rax, xmm2\n.4: lea     r10, [r10+rdx*8-40]\n    psrlq   xmm3, xmm0\n    mov     r8d, 5\n    sub     r8, rdx\n    jnc     .6\n\n    xalign  16\n.5: movdqa  xmm2, [r11+r8*8+16]\n    movdqa  xmm4, xmm2\n    psllq   xmm2, xmm1\n    shufpd  xmm5, xmm2, 1\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    psrlq   xmm4, xmm0\n    movdqa  xmm5, [r11+r8*8+32]\n    movdqa  xmm3, xmm5\n    psllq   xmm5, xmm1\n    shufpd  xmm2, xmm5, 1\n    psrlq   xmm3, xmm0\n    por     xmm4, xmm2\n    movq    [r10+r8*8+16], xmm4\n    movhpd  [r10+r8*8+24], xmm4\n    add     r8, 4\n    jnc     .5\n.6: cmp     r8, 2\n    ja      .10\n    jz      .9\n    jp      .8\n.7:\tmovdqa  xmm2, [r11+r8*8+16]\n    movdqa  xmm4, xmm2\n    psllq   xmm2, xmm1\n    shufpd  xmm5, xmm2, 1\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    psrlq   xmm4, xmm0\n    movq    xmm5, [r11+r8*8+32]\n    movq    xmm3, xmm5\n    psllq   xmm5, xmm1\n    shufpd  xmm2, xmm5, 1\n    psrlq   xmm3, xmm0\n    por     xmm4, xmm2\n    movq    [r10+r8*8+16], xmm4\n    movhpd  [r10+r8*8+24], xmm4\n    psrldq  xmm5, 8\n    por     xmm3, xmm5\n    movq    [r10+r8*8+32], xmm3\n    ret\n\n    xalign  16\n.8:\tmovdqa  xmm2, [r11+r8*8+16]\n    movdqa  xmm4, xmm2\n    psllq   xmm2, xmm1\n    shufpd  xmm5, xmm2, 1\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    psrlq   xmm4, xmm0\n    psrldq  xmm2, 8\n    por     xmm4, xmm2\n    movq    [r10+r8*8+16], xmm4\n    movhpd  [r10+r8*8+24], xmm4\n    ret\n\n    xalign  16\n.9:\tmovq    xmm2, [r11+r8*8+16]\n    movq    xmm4, xmm2\n    psllq   xmm2, xmm1\n    shufpd  xmm5, xmm2, 1\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    psrlq   xmm4, xmm0\n    psrldq  xmm2, 8\n    por     xmm4, xmm2\n    movq    [r10+r8*8+16], xmm4\n    ret\n\n    xalign  16\n.10:psrldq  xmm5, 8\n    por     xmm3, xmm5\n    movq    [r10+r8*8], xmm3\n    movhpd  [r10+r8*8+8], xmm3\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/skylake/sqr_basecase.asm",
    "content": "\n; void mpn_sqr_basecase(mp_ptr, mp_srcptr, mp_size_t)\n; Linux                    rdi        rsi        rdx\n; Win64                    rcx        rdx         r8\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14\n    \n    text\n\txalign  32\n\tLEAF_PROC mpn_sqr_basecase \n\tcmp     r8, 2\n\tjae     .0\n\tmov     rdx, [rdx]\n\tmulx    rdx, rax, rdx\n\tmov     [rcx], rax\n\tmov     [rcx+8], rdx\n\tret\n.0:\tjne     .1\n\tmov     r11, [rdx+8]\n\tmov     rdx, [rdx]\n\tmulx    r10, r9, r11\n\tmulx    r8, rax, rdx\n\tmov     rdx, r11\n\tmulx    rdx, r11, rdx\n\tadd     r9, r9\n\tadc     r10, r10\n\tadc     rdx, 0\n\tadd     r8, r9\n\tadc     r10, r11\n\tadc     rdx, 0\n\tmov     [rcx], rax\n\tmov     [rcx+8], r8\n\tmov     [rcx+16], r10\n\tmov     [rcx+24], rdx\n\tret\n\n\txalign  32\n.1:\tFRAME_PROC mpn_sqr_basec1, 0, reg_save_list\n    mov     rdi, rcx\n    mov     rsi, rdx\n    mov     rdx, r8  \n    cmp     rdx, 4\n\tjae     .2\n\tmov     r8, [rsi]\n\tmov     rdx, [rsi+8]\n\tmov     r9, rdx\n\tmulx    rax, r11, r8\n\tmov     rdx, [rsi+16]\n\tmulx    rcx, r10, r8\n\tmov     r8, r11\n\tadd     r10, rax\n\tadc     rcx, 0\n\tmulx    rax, rdx, r9\n\tadd     rdx, rcx\n\tmov     [rdi+24], rdx\n\tadc     rax, 0\n\tmov     [rdi+32], rax\n\txor     rcx, rcx\n\tmov     rdx, [rsi]\n\tmulx    r11, rax, rdx\n\tmov     [rdi], rax\n\tadd     r8, r8\n\tadc     r10, r10\n\tsetc    cl\n\tmov     rdx, [rsi+8]\n\tmulx    rdx, rax, rdx\n\tadd     r8, r11\n\tadc     r10, rax\n\tmov     [rdi+8], r8\n\tmov     [rdi+16], r10\n\tmov     r8, [rdi+24]\n\tmov     r10, [rdi+32]\n\tlea     r11, [rdx+rcx]\n\tadc     r8, r8\n\tadc     r10, r10\n\tsetc    cl\n\tmov     rdx, [rsi+16]\n\tmulx    rdx, rax, rdx\n\tadd     r8, r11\n\tadc     r10, rax\n\tmov     [rdi+24], r8\n\tmov     [rdi+32], r10\n\tadc     rdx, rcx\n\tmov     [rdi+40], rdx\n\tEXIT_PROC reg_save_list\n.2:\n.3:\tmov     r12, 0\n\tsub     r12, rdx\n\tmov     [rsp+stack_use+8], r12\n\tmov     r8, [rsi]\n\tmov     rdx, [rsi+8]\n\tlea     rcx, [r12+2]\n\tsar     rcx, 2\n\tinc     r12\n\tmov     r9, rdx\n\ttest    r12b, 1\n\tjnz     .7\n.4:\tmulx    r11, rbx, r8\n\tmov     rdx, [rsi+16]\n\tmov     [rdi+8], rbx\n\txor     rbx, rbx\n\tmulx    rbp, r10, r8\n\ttest    r12b, 2\n\tjz      .6\n.5:\tlea     rdi, [rdi-8]\n\tlea     rsi, [rsi-8]\n\tjmp     .13\n.6:\tlea     rsi, [rsi+8]\n\tlea     rdi, [rdi+8]\n\tjmp     .11\n.7:\tmulx    rbp, r10, r8\n\tmov     rdx, [rsi+16]\n\tmov     [rdi+8], r10\n\txor     r10, r10\n\tmulx    r11, rbx, r8\n\ttest    r12b, 2\n\tjz      .12\n.8:\tlea     rdi, [rdi+16]\n\tlea     rsi, [rsi+16]\n\tjmp     .10\n\txalign  32\n.9:\tmulx    r10, rax, r9\n\tadd     rbx, rax\n\tmov     rdx, [rsi]\n\tmulx    r11, rax, r8\n\tadc     r10, 0\n\tadd     rbx, rax\n.10:adc     r11, 0\n\tadd     rbx, rbp\n\tmov     [rdi], rbx\n\tadc     r11, 0\n\tmulx    rbx, rax, r9\n\tadd     r10, rax\n\tmov     rdx, [rsi+8]\n\tadc     rbx, 0\n\tmulx    rbp, rax, r8\n\tadd     r10, rax\n\tadc     rbp, 0\n.11:add     r10, r11\n\tmov     [rdi+8], r10\n\tadc     rbp, 0\n\tmulx    r10, rax, r9\n\tadd     rbx, rax\n\tmov     rdx, [rsi+16]\n\tmulx    r11, rax, r8\n\tadc     r10, 0\n\tadd     rbx, rax\n\tadc     r11, 0\n.12:add     rbx, rbp\n\tmov     [rdi+16], rbx\n\tadc     r11, 0\n\tmulx    rbx, rax, r9\n\tadd     r10, rax\n\tmov     rdx, [rsi+24]\n\tadc     rbx, 0\n\tmulx    rbp, rax, r8\n\tadd     r10, rax\n\tadc     rbp, 0\n.13:add     r10, r11\n\tlea     rsi, [rsi+32]\n\tmov     [rdi+24], r10\n\tadc     rbp, 0\n\tinc     rcx\n\tlea     rdi, [rdi+32]\n\tjnz     .9\n.14:mulx    rax, rdx, r9\n\tadd     rbx, rdx\n\tadc     rax, 0\n\tadd     rbx, rbp\n\tmov     [rdi], rbx\n\tadc     rax, 0\n\tmov     [rdi+8], rax\n\tlea     rsi, [rsi+16]\n\tlea     rdi, [rdi-16]\n.15:\n.16:\n\tlea     rsi, [rsi+r12*8]\n\tlea     rdi, [rdi+r12*8+48]\n\tmov     r8, [rsi-8]\n\tadd     r12, 2\n\tcmp     r12, -2\n\tjge     .30\n\tmov     r9, [rsi]\n\tlea     rcx, [r12+1]\n\tsar     rcx, 2\n\tmov     rdx, r9\n\ttest    r12b, 1\n\tjnz     .20\n.17:mov     r13, [rdi]\n\tmov     r14, [rdi+8]\n\tmulx    r11, rax, r8\n\tadd     r13, rax\n\tadc     r11, 0\n\tmov     [rdi], r13\n\txor     rbx, rbx\n\ttest    r12b, 2\n\tjnz     .19\n.18:mov     rdx, [rsi+8]\n\tlea     rdi, [rdi+16]\n\tlea     rsi, [rsi+16]\n\tjmp     .26\n.19:mov     rdx, [rsi+8]\n\tmov     r13, [rdi+16]\n\tlea     rsi, [rsi+32]\n\tinc     rcx\n\tmulx    rbp, rax, r8\n\tjz      .29\n\tjmp     .24\n.20:mov     r14, [rdi]\n\tmov     r13, [rdi+8]\n\tmulx    rbp, rax, r8\n\tmov     rdx, [rsi+8]\n\tadd     r14, rax\n\tadc     rbp, 0\n\txor     r10, r10\n\tmov     [rdi], r14\n\tmulx    r11, rax, r8\n\ttest    r12b, 2\n\tjz      .22\n.21:mov     r14, [rdi+16]\n\tlea     rdi, [rdi+24]\n\tlea     rsi, [rsi+24]\n\tjmp     .25\n.22:lea     rdi, [rdi+8]\n\tlea     rsi, [rsi+8]\n\tjmp     .27\n\txalign  32\n.23:mulx    rbp, rax, r8\n\tadd     r14, r10\n\tadc     rbx, 0\n.24:add     r14, rax\n\tadc     rbp, 0\n\tmulx    r10, rax, r9\n\tadd     r13, rax\n\tadc     r10, 0\n\tlea     rdi, [rdi+32]\n\tadd     r14, r11\n\tmov     rdx, [rsi-16]\n\tmov     [rdi-24], r14\n\tadc     rbp, 0\n\tadd     r13, rbx\n\tmov     r14, [rdi-8]\n\tmulx    r11, rax, r8\n\tadc     r10, 0\n.25:add     r13, rax\n\tmulx    rbx, rax, r9\n\tadc     r11, 0\n\tadd     r13, rbp\n\tmov     [rdi-16], r13\n\tadc     r11, 0\n\tadd     r14, rax\n\tadc     rbx, 0\n\tadd     r14, r10\n\tmov     rdx, [rsi-8]\n\tadc     rbx, 0\n.26:mulx    rbp, rax, r8\n\tadd     r14, rax\n\tadc     rbp, 0\n\tmov     r13, [rdi]\n\tmulx    r10, rax, r9\n\tadd     r13, rax\n\tadc     r10, 0\n\tadd     r14, r11\n\tmov     [rdi-8], r14\n\tadc     rbp, 0\n\tmov     rdx, [rsi]\n\tadd     r13, rbx\n\tmulx    r11, rax, r8\n\tadc     r10, 0\n.27:add     r13, rax\n\tadc     r11, 0\n\tmulx    rbx, rax, r9\n\tadd     r13, rbp\n\tmov     r14, [rdi+8]\n\tmov     [rdi], r13\n\tmov     r13, [rdi+16]\n\tadc     r11, 0\n\tadd     r14, rax\n\tadc     rbx, 0\n\tmov     rdx, [rsi+8]\n\tlea     rsi, [rsi+32]\n\tinc     rcx\n\tjnz     .23\n.28:mulx    rbp, rax, r8\n\tadd     r14, r10\n\tadc     rbx, 0\n.29:add     r14, rax\n\tadc     rbp, 0\n\tmulx    rax, rdx, r9\n\tadd     r14, r11\n\tmov     [rdi+8], r14\n\tadc     rbp, 0\n\tadd     rdx, rbx\n\tadc     rax, 0\n\tadd     rbp, rdx\n\tmov     [rdi+16], rbp\n\tadc     rax, 0\n\tmov     [rdi+24], rax\n\tjmp     .16\n.30:mov     r12, [rsp+stack_use+8]\n\tmov     rdx, [rsi]\n\tjg      .31\n\tmov     r9, rdx\n\tmov     r13, [rdi]\n\tmov     r14, rax\n\tmulx    r11, rax, r8\n\tadd     r13, rax\n\tadc     r11, 0\n\tmov     [rdi], r13\n\tmov     rdx, [rsi+8]\n\tmulx    rbp, rax, r8\n\tadd     r14, rax\n\tadc     rbp, 0\n\tmulx    rax, rdx, r9\n\tadd     r14, r11\n\tmov     [rdi+8], r14\n\tadc     rbp, 0\n\tadd     rdx, rbp\n\tmov     [rdi+16], rdx\n\tadc     rax, 0\n\tmov     [rdi+24], rax\n\tlea     rdi, [rdi+32]\n\tlea     rsi, [rsi+16]\n\tjmp     .32\n.31:mulx    rbp, r14, r8\n\tadd     r14, rax\n\tadc     rbp, 0\n\tmov     [rdi], r14\n\tmov     [rdi+8], rbp\n\tlea     rdi, [rdi+16]\n\tlea     rsi, [rsi+8]\n.32:\n.33:lea     rsi, [rsi+r12*8+8]\n\tlea     rdi, [rdi+r12*8]\n\tlea     rdi, [rdi+r12*8]\n\tinc     r12\n\tmov     rdx, [rsi-8]\n\txor     rbx, rbx\n\tmulx    r10, rax, rdx\n\tmov     [rdi+8], rax\n\tmov     r8, [rdi+16]\n\tmov     r9, [rdi+24]\n\tjmp     .35\n\txalign  16\n.34:mov     r8, [rdi+32]\n\tmov     r9, [rdi+40]\n\tlea     rdi, [rdi+16]\n\tlea     r10, [rdx+rbx]\n.35:adc     r8, r8\n\tadc     r9, r9\n\tsetc    bl\n\tmov     rdx, [rsi]\n\tlea     rsi, [rsi+8]\n\tmulx    rdx, rax, rdx\n\tadd     r8, r10\n\tadc     r9, rax\n\tmov     [rdi+16], r8\n\tmov     [rdi+24], r9\n\tinc     r12\n\tjnz     .34\n.36:adc     rdx, rbx\n\tmov     [rdi+32], rdx\n\tEND_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/skylake/store.asm",
    "content": "; PROLOGUE(mpn_store)\n\n;  mpn_store\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_store(mp_ptr, mp_size_t, mp_limb_t)\n;                    rdi,       rsi,       rdx\n;                    rcx,       rdx,        r8\n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n%define\tMOVQ\tmovd\n\n\tLEAF_PROC mpn_store\n\tlea     rcx, [rcx-32]\n\tcmp     rdx, 0\n\tjz      .4\n\tMOVQ    xmm0, r8\n\tmovddup xmm0, xmm0\n\ttest    rcx, 0xF\n\tjz      .1\n\tmov     [rcx+32], r8\n\tlea     rcx, [rcx+8]\n\tsub     rdx, 1\n.1:\tsub     rdx, 4\n\tjc      .3\n\t\n\txalign  16\n.2:\tlea     rcx, [rcx+32]\n\tsub     rdx, 4\n\tmovdqa  [rcx], xmm0\n\tmovdqa  [rcx+16], xmm0\n\tjnc     .2\n.3:\tcmp     rdx, -2\n\tja      .5\n\tjz      .7\n\tjp      .6\n.4:\tret\n\n.5:\tmovdqa  [rcx+32], xmm0\n.6:\tmov     [rcx+rdx*8+56], r8\n\tret\n\n.7:\tmovdqa  [rcx+32], xmm0\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/skylake/sub_n.asm",
    "content": "\n;  AMD64 mpn_sub_n\n;  Copyright 2016 Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;\t(rdi,rcx) = (rsi,rcx)-(rdx,rcx)\n;\trax = borrow\n\n%include 'yasm_mac.inc'\n\n%define USE_WIN64\n\n%ifdef USE_WIN64\n    %define SumP    rcx\n    %define Inp1P   rdx\n    %define Inp2P   r8\n    %define Size    r9\n    %define SizeRest r11\n    %define LIMB1   rax\n    %define LIMB2   r10\n%else\n    %define SumP    rdi\n    %define Inp1P   rsi\n    %define Inp2P   rdx\n    %define Size    rcx\n    %define SizeRest r11\n    %define LIMB1   rax\n    %define LIMB2   r8\n%endif\n\n%define ADDSUB sub\n%define ADCSBB sbb\n\n; Skylake has problems sustaining 2 read and 1 write per clock cycle.\n; It sometimes gets into a \"mode\" (for the lack of a better word) where\n; it does not fully utilize port 7, causing store uops to compete with\n; the reads for ports 2,3. We try to alleviate the problem by turning\n; some of the 64-bit writes into 128-bit writes, reducing the number of\n; write instructions. Unfortunately, SSE2/AVX2 do not have particularly\n; good instructions for assembling an SSE2 128-bit word from two GPR\n; 64-bit words, so the instruction count is greatly inflated.\n\n%macro  STORE 1\n\tmov\t[SumP %1], LIMB1\n\tmov\t[SumP %1 + 8], LIMB2\n%endmacro\n\n%macro  SSESTORE 1\n\tmovq\txmm0, LIMB1\n\tmovq\txmm1, LIMB2\n\tvpermilpd xmm1, xmm1, 0\n\tpblendw xmm0, xmm1, 0xf0\n\tmovaps\t[SumP %1], xmm0\n%endmacro\n\n    BITS    64\n\n   LEAF_PROC mpn_sub_n\n; Make dest 16-bytes aligned\n\ttest\tSumP, 8\n\tjz\t.aligned\n\tdec\tSize\n\tmov\tSizeRest, Size\n\tand\tSizeRest, 7\n\tshr\tSize, 3\n; Unaligned and Size > 8: do one limb separately, then the normal loop\n\tjnz\t.unaligned\n; Unaligned and Size <= 8: do all with .rest loop\n\tinc\tSizeRest\n\tclc\n\tjmp\t.rest ;ajs:notshortform\n\n.aligned:\n\tmov\tSizeRest, Size\n\tand\tSizeRest, 7\n\tshr\tSize, 3\n\tclc\n\tjz\t.rest ;ajs:notshortform\n\tjmp\t.loop1\n\n.unaligned:\n\tmov\tLIMB1, [Inp1P]\n\tADDSUB\tLIMB1, [Inp2P]\n\tmov\t[SumP], LIMB1\n\tlea\tInp1P, [Inp1P+8]\n\tlea\tInp2P, [Inp2P+8]\n\tlea\tSumP, [SumP+8]\n\n\talign   16\n.loop1:\n\tmov\tLIMB1, [Inp1P]\n\tmov\tLIMB2, [Inp1P+8]\n\tADCSBB\tLIMB1, [Inp2P]\n\tADCSBB\tLIMB2, [Inp2P+8]\n\tSSESTORE +0\n\tmov\tLIMB1, [Inp1P+16]\n\tmov\tLIMB2, [Inp1P+24]\n\tADCSBB\tLIMB1, [Inp2P+16]\n\tADCSBB\tLIMB2, [Inp2P+24]\n\tSTORE +16\n\tmov\tLIMB1, [Inp1P+32]\n\tmov\tLIMB2, [Inp1P+40]\n\tADCSBB\tLIMB1, [Inp2P+32]\n\tADCSBB\tLIMB2, [Inp2P+40]\n\tSTORE +32\n\tmov\tLIMB1, [Inp1P+48]\n\tmov\tLIMB2, [Inp1P+56]\n\tADCSBB\tLIMB1, [Inp2P+48]\n\tADCSBB\tLIMB2, [Inp2P+56]\n\tSTORE +48\n\tlea\tInp1P, [Inp1P+64]\n\tlea\tInp2P, [Inp2P+64]\n\tlea\tSumP, [SumP+64]\n\tdec\tSize\n\tjnz\t.loop1\n\tinc\tSizeRest\n\tdec\tSizeRest\n\tjz\t.end\n.rest:\n\tmov\tLIMB1, [Inp1P]\n\tADCSBB\tLIMB1, [Inp2P]\n\tmov\t[SumP], LIMB1\n\tdec\tSizeRest\n\tjz\t.end\n\tmov\tLIMB1, [Inp1P+8]\n\tADCSBB\tLIMB1, [Inp2P+8]\n\tmov\t[SumP+8], LIMB1\n\tdec\tSizeRest\n\tjz\t.end\n\tmov\tLIMB1, [Inp1P+16]\n\tADCSBB\tLIMB1, [Inp2P+16]\n\tmov\t[SumP+16], LIMB1\n\tdec\tSizeRest\n\tjz\t.end\n\tmov\tLIMB1, [Inp1P+24]\n\tADCSBB\tLIMB1, [Inp2P+24]\n\tmov\t[SumP+24], LIMB1\n\tdec\tSizeRest\n\tjz\t.end\n\tlea\tInp1P, [Inp1P+32]\n\tlea\tInp2P, [Inp2P+32]\n\tlea\tSumP, [SumP+32]\n\tjmp\t.rest\n.end:\n\tmov\teax, 0\n\tadc\teax, eax\n\tret\n"
  },
  {
    "path": "mpn/x86_64w/skylake/sublsh1_n.asm",
    "content": "\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n; mp_limb_t sublsh1_n(mp_ptr Op3, mp_srcptr Op2, mp_srcptr Op1; mp_size_t Size )\n; Linux     RAX       RDI         RSI            RDX            RCX\n; Win7      RAX       RCX         RDX            R8             R9\n;\n; Description:\n; The function shifts Op1 left one bit, subtracts it from Op2, stores the result\n; in Op3 and hands back the total carry. There is a gain in execution speed\n; compared to separate shift and subtract by interleaving the elementary operations\n; and reducing memory access. The factor depends on the size of the operands\n; (the cache hierarchy in which the operands can be handled).\n;\n; Caveats:\n; - for asm the processor MUST support LAHF/SAHF in 64 bit mode!\n; - the total carry is in [0..2]!\n;\n; Comments:\n; - asm version implemented, tested & benched on 16.05.2015 by jn\n; - improved asm version implemented, tested & benched on 30.07.2015 by jn\n; - On Nehalem per limb saving is 0.7 cycles in LD1$, LD2$ and 1-2 in LD3$\n; - includes LAHF / SAHF\n; - includes prefetching\n; - includes XMM save & restore\n;\n; Linux: (rdi, rcx) = (rsi, rcx) - (rdx, rcx)<<1\n; ============================================================================\n\n%define USE_WIN64\n\n%define ADDSUB add\n%define ADCSBB adc\n\n%include \"yasm_mac.inc\"\n\nBITS 64\n\n%define reg_save_list RBX, RBP, RSI, RDI, R10, R11, R12, R13, R14, R15\n\n%define Op3     RCX\n%define Op2     RDX\n%define Op1     R8\n%define Size    R9\n\n%define Limb0   RBX\n%define Limb1   RDI\n%define Limb2   RSI\n\n%define Limb3   R10\n%define Limb4   R11\n%define Limb5   R12\n%define Limb6   R13\n%define Limb7   R14\n%define Limb8   R15\n\n%ifdef USE_PREFETCH\n%define Offs    RBP\n%endif\n\n\n%macro ACCUMULATE 1\n    mov     rax, [Op2 + 8 * %1]\n    ADCSBB  rax, Limb%1\n    mov     [Op3 + 8 * %1], rax\n%endmacro\n\n\n    align   32\n\n  FRAME_PROC mpn_sublsh1_n, 0, reg_save_list\n\n  %ifdef USE_PREFETCH\n    mov     Offs, PREFETCH_STRIDE   ; Attn: check if redefining Offs\n  %endif\n\n    ; prepare shift & subtraction with loop-unrolling 8\n    xor     Limb0, Limb0\n    lahf                        ; memorize clear carry (from \"xor\" above)\n\n    test    Size, 1\n    je      .n_two\n\n    mov     Limb1, [Op1]\n    shrd    Limb0, Limb1, 63\n\n    mov     rax, [Op2]\n    ADDSUB  rax, Limb0\n    mov     [Op3], rax\n    lahf\n\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    mov     Limb0, Limb1\n\n  .n_two:\n\n    test    Size, 2\n    je      .n_four\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n\n    sahf\n    ACCUMULATE 0\n    ACCUMULATE 1\n    lahf\n\n    add     Op1, 16\n    add     Op2, 16\n    add     Op3, 16\n    mov     Limb0, Limb2\n\n  .n_four:\n\n    test    Size, 4\n    je      .n_test ;ajs:notshortform\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n    mov     Limb3, [Op1+16]\n    mov     Limb4, [Op1+24]\n    shrd    Limb2, Limb3, 63\n    shrd    Limb3, Limb4, 63\n\n    sahf\n    ACCUMULATE 0\n    ACCUMULATE 1\n    ACCUMULATE 2\n    ACCUMULATE 3\n    lahf\n\n    add     Op1, 32\n    add     Op2, 32\n    add     Op3, 32\n    mov     Limb0, Limb4\n    jmp     .n_test ;ajs:notshortform\n\n    ; main loop\n    ; - 2.40-2.50 cycles per limb in L1D$\n    ; - 2.6       cycles per limb in L2D$\n    ; - 2.80-3.30 cycles per limb in L3D$\n    align   16\n  .n_loop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n    prefetchnta [Op2+Offs]\n  %endif\n\n    mov     Limb1, [Op1]        ; prepare shifted oct-limb from Op1\n    mov     Limb2, [Op1+8]\n    mov     Limb3, [Op1+16]\n    shrd    Limb0, Limb1, 63\n    shrd    Limb1, Limb2, 63\n    shrd    Limb2, Limb3, 63\n    mov     Limb4, [Op1+24]\n    mov     Limb5, [Op1+32]\n    mov     Limb6, [Op1+40]\n    shrd    Limb3, Limb4, 63\n    shrd    Limb4, Limb5, 63\n    shrd    Limb5, Limb6, 63\n    mov     Limb7, [Op1+48]\n    mov     Limb8, [Op1+56]\n    shrd    Limb6, Limb7, 63\n    shrd    Limb7, Limb8, 63\n\n    sahf                        ; restore carry\n    ACCUMULATE 0                ; sub shifted Op1 from Op2 with result in Op3\n    ACCUMULATE 1\n    ACCUMULATE 2\n    ACCUMULATE 3\n    ACCUMULATE 4\n    ACCUMULATE 5\n    ACCUMULATE 6\n    ACCUMULATE 7\n    lahf                        ; remember carry for next round\n\n    add     Op1, 64\n    add     Op2, 64\n    add     Op3, 64\n    mov     Limb0, Limb8\n\n  .n_test:\n\n    sub     Size, 8\n    jnc     .n_loop\n\n    ; housekeeping - hand back total carry\n    shr     Limb0, 63\n    sahf\n    adc     Limb0, 0            ; Limb0=0/1/2 depending on final carry and shift\n    mov     RAX, Limb0\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/skylake/submul_1.asm",
    "content": ";  AMD64 mpn_submul_1 optimised for Intel Haswell.\n\n;  Contributed to the GNU project by Torbjörn Granlund.\n;  Converted to MPIR by Alexander Kruppa.\n\n;  Copyright 2013 Free Software Foundation, Inc.\n\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or modify\n;  it under the terms of either:\n;\n;    * the GNU Lesser General Public License as published by the Free\n;      Software Foundation; either version 3 of the License, or (at your\n;      option) any later version.\n;\n;  or\n;\n;    * the GNU General Public License as published by the Free Software\n;      Foundation; either version 2 of the License, or (at your option) any\n;      later version.\n;\n;  or both in parallel, as here.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n;  for more details.\n;\n;  You should have received copies of the GNU General Public License and the\n;  GNU Lesser General Public License along with the GNU MP Library.  If not,\n;  see https://www.gnu.org/licenses/.\n;  mp_limb_t mpn_submul_1(mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                       rdi     rsi        rdx        rcx\n;  rax                       rcx     rdx         r8        r9d\n\n%include 'yasm_mac.inc'\n\nBITS 64\n\n%define reg_save_list rbx, rbp, rsi, rdi, r12, r13\n%define RP      rdi\n%define S1P     rsi\n%define Size    rbp\n%define Sizeb   bpl\n%define Limb    rcx\n\n%define Tmp0    r12\n%define Tmp1    r13\n%define Tmp2    rax\n%define Tmp3    rbx\n%define Tmp4    r8\n%define Tmp5    r9\n%define Tmp6    r10\n%define Tmp7    r11\n%define Tmp8    rcx\n\n%define ADDSUB sub\n%define ADCSBB sbb\n\nalign 16\n\nFRAME_PROC mpn_submul_1, 0, reg_save_list\n    mov     rdi, rcx \n    mov     rsi, rdx\n\tmov \trbp, r8 ; mulx requires one input in rdx\n\tmov \trdx, r9\n\n\ttest \tSizeb, 1\n\tjnz \t.Lbx1\n\n.Lbx0:  shr \tSize, 2\n\tjc \t.Lb10 ;ajs:notshortform\n\n.Lb00:\tmulx \tTmp0, Tmp1, [S1P]\n\tmulx \tTmp2, Tmp3, [S1P+8]\n\tadd \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp0, [RP]\n\tmov \tTmp8, [RP+8]\n\tmulx \tTmp4, Tmp5, [S1P+16]\n\tlea \tRP, [RP-16]\n\tlea \tS1P, [S1P+16]\n\tADDSUB \tTmp0, Tmp1\n\tjmp \t.Llo0 ;ajs:notshortform\n\n.Lbx1:\tshr \tSize, 2\n\tjc \t.Lb11\n\n.Lb01:\tmulx \tTmp6, Tmp7, [S1P]\n\tjnz \t.Lgt1\n.Ln1:\tADDSUB \t[RP], Tmp7\n\tmov \teax, 0\n\tadc \tTmp2, Tmp6\n\tjmp \t.Lret ;ajs:notshortform\n\n.Lgt1:\tmulx \tTmp0, Tmp1, [S1P+8]\n\tmulx \tTmp2, Tmp3, [S1P+16]\n\tlea \tS1P, [S1P+24]\n\tadd \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP]\n\tmov \tTmp0, [RP+8]\n\tmov \tTmp8, [RP+16]\n\tlea \tRP, [RP-8]\n\tADDSUB \tTmp6, Tmp7\n\tjmp \t.Llo1\n\n.Lb11:\tmulx \tTmp2, Tmp3, [S1P]\n\tmov \tTmp8, [RP]\n\tmulx \tTmp4, Tmp5, [S1P+8]\n\tlea \tS1P, [S1P+8]\n\tlea \tRP, [RP-24]\n\tinc \tSize\t\n\tADDSUB \tTmp8, Tmp3\n\tjmp \t.Llo3\n\n.Lb10:\tmulx \tTmp4, Tmp5, [S1P]\n\tmulx \tTmp6, Tmp7, [S1P+8]\n\tlea \tRP, [RP-32]\n\tmov \teax, 0\n\tclc\n\tjz \t.Lend ;ajs:notshortform\t\n\n\talign 16\n.Ltop:\tadc \tTmp5, Tmp2\n\tlea \tRP, [RP+32]\n\tadc \tTmp7, Tmp4\n\tmulx \tTmp0, Tmp1, [S1P+16]\n\tmov \tTmp4, [RP]\n\tmulx \tTmp2, Tmp3, [S1P+24]\n\tlea \tS1P, [S1P+32]\n\tadc \tTmp1, Tmp6\n\tadc \tTmp3, Tmp0\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+8]\n\tmov \tTmp0, [RP+16]\n\tADDSUB \tTmp4, Tmp5\n\tmov \tTmp8, [RP+24]\n\tmov \t[RP], Tmp4\n\tADCSBB \tTmp6, Tmp7\n.Llo1:\tmulx \tTmp4, Tmp5, [S1P]\n\tmov \t[RP+8], Tmp6\n\tADCSBB \tTmp0, Tmp1\n.Llo0:\tmov \t[RP+16], Tmp0\n\tADCSBB \tTmp8, Tmp3\n.Llo3:\tmulx \tTmp6, Tmp7, [S1P+8]\n\tmov \t[RP+24], Tmp8\n\tdec \tSize\n\tjnz \t.Ltop\n\n.Lend:\tadc \tTmp5, Tmp2\n\tadc \tTmp7, Tmp4\n\tmov \tTmp4, [RP+32]\n\tmov \tTmp2, Tmp6\n\tadc \tTmp2, 0\n\tmov \tTmp6, [RP+40]\n\tADDSUB \tTmp4, Tmp5\n\tmov \t[RP+32], Tmp4\n\tADCSBB \tTmp6, Tmp7\n\tmov \t[RP+40], Tmp6\n\tadc \tTmp2, 0\n\n.Lret:\n    END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/skylake/sumdiff_n.asm",
    "content": "; ============================================================================\n;  Copyright 2016 Jens Nurmann and Alexander Kruppa\n;  This file is part of the MPIR Library.\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either version 2.1 of the License, or (at\n;  your option) any later version.\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n; mp_limb_t mpn_sumdiff_n(mp_ptr Op3, mp_ptr Op4, mp_srcptr Op1, mp_srcptr Op2, mp_size_t Size)\n; Linux     RAX           RDI         RSI         RDX            RCX            R8\n; Win7      RAX           RCX         RDX         R8             R9             Stack\n;\n; Description:\n; The function adds Op2 to Op1 and stores the result in Op3 while at the same\n; time subtracting Op2 from Op1 with result in Op4. The final carries from\n; addition and subtraction are handed back as a combined mp_limb_t. There is a\n; gain in execution speed compared to separate addition and subtraction by\n; reducing memory access. The factor depends on the size of the operands (the\n; cache hierarchy in which the operands can be handled).\n;\n; Comments:\n; - asm version implemented, tested & benched on 10.06.2015 by jn\n; - On Nehalem per limb saving is 0.5 cycle in LD1$, LD2$ and LD3$\n; - includes prefetching\n; - includes XMM save & restore\n; ============================================================================\n\n%define USE_WIN64\n\n%include 'yasm_mac.inc'\n\n%ifdef USE_WIN64\n\n%define reg_save_list RBX, RBP, RSI, RDI, R10, R11, R12, R13, R14, R15\n\n    %define Op3     RCX\n    %define Op4     RDX\n    %define Op1     R8\n    %define Op2     R9\n    %define Size    RBX\n    %define Limb1   RSI\n    %define Limb2   RDI\n%else\n    %define Op3     RDI\n    %define Op4     RSI\n    %define Op1     RDX\n    %define Op2     RCX\n    %define Size    R8\n    %define Limb1   RBX\n    %define Limb2   R9\n%endif\n\n    %define Limb0   RBP\n    %define Limb3   R10\n    %define Limb4   R11\n    %define Limb5   R12\n    %define Limb6   R13\n    %define Limb7   R14\n    %define Limb8   R15\n\n%ifdef USE_PREFETCH\n%define Offs    PREFETCH_STRIDE\n%endif\n\n%define SaveAC  setc    AL\n%define LoadAC  shr     AL, 1\n\n%define SaveSC  sbb     AH, AH\n%define LoadSC  add     AH, AH\n\nBITS 64\n\nalign   32\n\n   FRAME_PROC mpn_sumdiff_n, 0, reg_save_list\n    mov     Size, [rsp+stack_use+40]\n\n    xor     EAX, EAX            ; clear add & sub carry\n\n    shr     Size, 1\n    jnc     .sumdiff_n_two\n\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    mov     Limb2, Limb1\n    add     Limb2, Limb5\n    mov     [Op3], Limb2\n\n    SaveAC\n\n    sub     Limb1, Limb5\n    mov     [Op4], Limb1\n\n    SaveSC\n\n    add     Op1, 8\n    add     Op2, 8\n    add     Op3, 8\n    add     Op4, 8\n\n  .sumdiff_n_two:\n\n    shr     Size, 1\n    jnc     .sumdiff_n_four\n\n    mov     Limb1, [Op1]\n    mov     Limb2, [Op1+8]\n    mov     Limb5, [Op2]\n    mov     Limb6, [Op2+8]\n\n    LoadAC\n\n    mov     Limb3, Limb1\n    adc     Limb3, Limb5\n    mov     [Op3], Limb3\n    mov     Limb4, Limb2\n    adc     Limb4, Limb6\n    mov     [Op3+8], Limb4\n\n    SaveAC\n    LoadSC\n\n    sbb     Limb1, Limb5\n    mov     [Op4], Limb1\n    sbb     Limb2, Limb6\n    mov     [Op4+8], Limb2\n\n    SaveSC\n\n    add     Op1, 16\n    add     Op2, 16\n    add     Op3, 16\n    add     Op4, 16\n\n  .sumdiff_n_four:\n\n    shr     Size, 1\n    jnc     .sumdiff_n_loop_pre ;ajs:notshortform\n\n    LoadAC\n\n    ; slight change of scheme here - avoid too many\n    ; memory to reg or reg to memory moves in a row\n    mov     Limb1, [Op1]\n    mov     Limb5, [Op2]\n    mov     Limb0, Limb1\n    adc     Limb0, Limb5\n    mov     [Op3], Limb0\n    mov     Limb2, [Op1+8]\n    mov     Limb6, [Op2+8]\n    mov     Limb0, Limb2\n    adc     Limb0, Limb6\n    mov     [Op3+8], Limb0\n    mov     Limb3, [Op1+16]\n    mov     Limb7, [Op2+16]\n    mov     Limb0, Limb3\n    adc     Limb0, Limb7\n    mov     [Op3+16], Limb0\n    mov     Limb4, [Op1+24]\n    mov     Limb8, [Op2+24]\n    mov     Limb0, Limb4\n    adc     Limb0, Limb8\n    mov     [Op3+24], Limb0\n\n    SaveAC\n    LoadSC\n\n    sbb     Limb1, Limb5\n    mov     [Op4], Limb1\n    sbb     Limb2, Limb6\n    mov     [Op4+8], Limb2\n    sbb     Limb3, Limb7\n    mov     [Op4+16], Limb3\n    sbb     Limb4, Limb8\n    mov     [Op4+24], Limb4\n\n    SaveSC\n\n    add     Op1, 32\n    add     Op2, 32\n    add     Op3, 32\n    add     Op4, 32\n \n    test   Size, Size\n  .sumdiff_n_loop_pre:\t\t; If we jump here, ZF=1 iff Size=0\n    jz     .sumdiff_n_post      ;ajs:notshortform\n    LoadAC              ; set carry for addition\n\n    ; main loop - values below are best case - up to 50% fluctuation possible!\n    ; - 3.50      cycles per limb in LD1$\n    ; - 3.50      cycles per limb in LD2$\n    ; - 5.10-5.50 cycles per limb in LD3$\n    align   16\n  .sumdiff_n_loop:\n\n  %ifdef USE_PREFETCH\n    prefetchnta [Op1+Offs]\n    prefetchnta [Op2+Offs]\n  %endif\n\n    mov     Limb1, [Op1]        ; add the first quad-limb\n    mov     Limb5, [Op2]\n    mov     Limb0, Limb1\n    adc     Limb0, Limb5\n    mov     [Op3], Limb0\n    mov     Limb2, [Op1+8]\n    mov     Limb6, [Op2+8]\n    mov     Limb0, Limb2\n    adc     Limb0, Limb6\n    mov     [Op3+8], Limb0\n    mov     Limb3, [Op1+16]\n    mov     Limb7, [Op2+16]\n    mov     Limb0, Limb3\n    adc     Limb0, Limb7\n    mov     [Op3+16], Limb0\n    mov     Limb4, [Op1+24]\n    mov     Limb8, [Op2+24]\n    mov     Limb0, Limb4\n    adc     Limb0, Limb8\n    mov     [Op3+24], Limb0\n    lea     Op3, [Op3 + 64]\n\n    SaveAC              ; memorize add-carry\n    LoadSC              ; set carry for subtraction\n\n    sbb     Limb1, Limb5        ; now sub the first quad-limb\n    mov     [Op4], Limb1\n    sbb     Limb2, Limb6\n    mov     [Op4+8], Limb2\n    sbb     Limb3, Limb7\n    mov     [Op4+16], Limb3\n    sbb     Limb4, Limb8\n    mov     [Op4+24], Limb4\n\n    mov     Limb1, [Op1+32]     ; sub the second quad-limb\n    mov     Limb5, [Op2+32]\n    mov     Limb0, Limb1\n    sbb     Limb0, Limb5\n    mov     [Op4+32], Limb0\n    mov     Limb2, [Op1+40]\n    mov     Limb6, [Op2+40]\n    mov     Limb0, Limb2\n    sbb     Limb0, Limb6\n    mov     [Op4+40], Limb0\n    mov     Limb3, [Op1+48]\n    mov     Limb7, [Op2+48]\n    mov     Limb0, Limb3\n    sbb     Limb0, Limb7\n    mov     [Op4+48], Limb0\n    mov     Limb4, [Op1+56]\n    mov     Limb8, [Op2+56]\n    mov     Limb0, Limb4\n    sbb     Limb0, Limb8\n    mov     [Op4+56], Limb0\n    lea     Op4, [Op4 + 64]\n\n    SaveSC                      ; memorize sub-carry\n    LoadAC                      ; set carry for addition\n\n    adc     Limb1, Limb5        ; add the second quad-limb\n    mov     [Op3+32-64], Limb1\n    adc     Limb2, Limb6\n    mov     [Op3+40-64], Limb2\n    adc     Limb3, Limb7\n    mov     [Op3+48-64], Limb3\n    adc     Limb4, Limb8\n    mov     [Op3+56-64], Limb4\n\n    lea     Op1, [Op1 + 64]\n    lea     Op2, [Op2 + 64]\n\n    dec     Size\n    jnz     .sumdiff_n_loop     ;ajs:notshortform\n\n    SaveAC                      ; memorize add-carry\n    ; hand back carries\n  .sumdiff_n_post:\n\t\t\t\t; AL = add_carry, AH = -sub_carry\n    LoadSC\t\t\t; AL = add_carry, CY = sub_carry\n    adc     AL, AL\t\t; AL = 2*add_carry + sub_carry\n    movsx   RAX, AL\n\n   END_PROC reg_save_list\n"
  },
  {
    "path": "mpn/x86_64w/skylake/xnor_n.asm",
    "content": "; PROLOGUE(mpn_xnor_n)\n\n;  mpn_xnor_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_xnor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                     rcx        rdx         r8         r9\n;                     rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_xnor_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tpcmpeqb xmm4, xmm4\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tadd     r10, 4\n\tmovdqu  xmm3, [rdx+r10*8+16-32]\n\tpxor    xmm1, xmm3\n\tpxor    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8-32], xmm0\n\tpxor    xmm1, xmm4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tmov     r9, [rdx+r10*8+16]\n\txor     rax, r9\n\tpxor    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tnot     rax\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tpxor    xmm0, xmm4\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\txor     rax, r9\n\tnot     rax\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/skylake/xor_n.asm",
    "content": "; PROLOGUE(mpn_xor_n)\n\n;  mpn_xor_n\n\n;  Copyright 2009 Jason Moxham\n\n;  This file is part of the MPIR Library.\n\n;  The MPIR Library is free software; you can redistribute it and/or modify\n;  it under the terms of the GNU Lesser General Public License as published\n;  by the Free Software Foundation; either verdxon 2.1 of the License, or (at\n;  your option) any later verdxon.\n\n;  The MPIR Library is distributed in the hope that it will be useful, but\n;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n;  License for more details.\n\n;  You should have received a copy of the GNU Lesser General Public License\n;  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n;  Boston, MA 02110-1301, USA.\n\n;  void mpn_xor_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)\n;                    rcx        rdx         r8         r9\n;                    rdi        rsi        rdx        rcx \n\n%include 'yasm_mac.inc'\n\n    CPU  nehalem\n    BITS 64\n\n\tLEAF_PROC mpn_xor_n\n\tmovsxd\tr9, r9d\n\tmov     r10, 3\n\tlea     rdx, [rdx+r9*8-24]\n\tlea     r8, [r8+r9*8-24]\n\tlea     rcx, [rcx+r9*8-24]\n\tsub     r10, r9\n\tjnc     .2\n\t\n\txalign  16\n.1:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm1, [r8+r10*8+16]\n\tmovdqu  xmm3, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tpxor    xmm1, xmm3\n\tadd     r10, 4\n\tmovdqu  [rcx+r10*8+16-32], xmm1\n\tjnc     .1\n.2:\tcmp     r10, 2\n\tja      .4\n\tje      .6\n\tjp      .5\n.3:\tmovdqu  xmm0, [r8+r10*8]\n\tmov     rax, [r8+r10*8+16]\n\tmov     r9, [rdx+r10*8+16]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\txor     rax, r9\n\tmov     [rcx+r10*8+16], rax\n.4:\tret\n\n.5:\tmovdqu  xmm0, [r8+r10*8]\n\tmovdqu  xmm2, [rdx+r10*8]\n\tpxor    xmm0, xmm2\n\tmovdqu  [rcx+r10*8], xmm0\n\tret\n\n.6:\tmov     rax, [r8+r10*8]\n\tmov     r9, [rdx+r10*8]\n\txor     rax, r9\n\tmov     [rcx+r10*8], rax\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sqr_basecase.asm",
    "content": "\n; void mpn_sqr_basecase (mp_ptr, mp_srcptr, mp_size_t)\n;  rax                      rdi        rsi        rdx\n;  rax                      rcx        rdx         r8\n\n%include 'yasm_mac.inc'\n\n%define reg_save_list   rbx, rbp, rsi, rdi, r12, r13, r14\n\n%macro jmp_tab 2\n\t%ifdef PIC\n\t\tdd\t%1 - %2\n\t%else\n\t\tdq\t%1\n\t%endif\n%endmacro\n\n%macro\tdo_jmp 0\n\tlea     rax, [rel L0]\n\t%ifdef PIC\n\t\tmov     r10d, [rax+rcx*4]\n\t\tadd     rax, r10\n\t\tjmp     rax\n\t%else\n\t\tjmp     [rax+rcx*8]\n\t%endif\n%endmacro\n\n\ttext\n\txalign  16\n\tWIN64_GCC_PROC mpn_sqr_basecase, 3\n\tmov     ecx, edx\n\tmov     r11d, edx\n\tand     ecx, 3\n\tcmp     edx, 4\n\tlea     r8, [rcx+4]\n\tcmovg   rcx, r8\n\tdo_jmp\n\n\txalign  8\nL0:\tjmp_tab L4, L0\n\tjmp_tab L1, L0\n\tjmp_tab L2, L0\n\tjmp_tab L3, L0\n\tjmp_tab\tL5, L0\n\tjmp_tab L8, L0\n\tjmp_tab L12, L0\n\tjmp_tab L15, L0\n\nL1:\tmov     rax, [rsi]\n\tmul     rax\n\tmov     [rdi], rax\n\tmov     [rdi+8], rdx\n\tWIN64_GCC_EXIT\n\nL2:\tmov     rax, [rsi]\n\tmov     r8, rax\n\tmul     rax\n\tmov     r11, [rsi+8]\n\tmov     [rdi], rax\n\tmov     rax, r11\n\tmov     r9, rdx\n\tmul     rax\n\tmov     r10, rax\n\tmov     rax, r11\n\tmov     r11, rdx\n\tmul     r8\n\txor     r8, r8\n\tadd     r9, rax\n\tadc     r10, rdx\n\tadc     r11, r8\n\tadd     r9, rax\n\tmov     [rdi+8], r9\n\tadc     r10, rdx\n\tmov     [rdi+16], r10\n\tadc     r11, r8\n\tmov     [rdi+24], r11\n\tWIN64_GCC_EXIT\n\nL3:\tmov     rax, [rsi]\n\tmov     r10, rax\n\tmul     rax\n\tmov     r11, [rsi+8]\n\tmov     [rdi], rax\n\tmov     rax, r11\n\tmov     [rdi+8], rdx\n\tmul     rax\n\tmov     rcx, [rsi+16]\n\tmov     [rdi+16], rax\n\tmov     rax, rcx\n\tmov     [rdi+24], rdx\n\tmul     rax\n\tmov     [rdi+32], rax\n\tmov     [rdi+40], rdx\n\tmov     rax, r11\n\tmul     r10\n\tmov     r8, rax\n\tmov     rax, rcx\n\tmov     r9, rdx\n\tmul     r10\n\txor     r10, r10\n\tadd     r9, rax\n\tmov     rax, r11\n\tmov     r11, r10\n\tadc     r10, rdx\n\tmul     rcx\n\tadd     r10, rax\n\tadc     rdx, r11\n\tadd     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     rdx, rdx\n\tadc     r11, r11\n\tadd     [rdi+8], r8\n\tadc     [rdi+16], r9\n\tadc     [rdi+24], r10\n\tadc     [rdi+32], rdx\n\tadc     [rdi+40], r11\n\tWIN64_GCC_EXIT\n\nL4:\tmov     rax, [rsi]\n\tmov     r11, rax\n\tmul     rax\n\tmov     rbx, [rsi+8]\n\tmov     [rdi], rax\n\tmov     rax, rbx\n\tmov     [rdi+8], rdx\n\tmul     rax\n\tmov     [rdi+16], rax\n\tmov     [rdi+24], rdx\n\tmov     rax, [rsi+16]\n\tmul     rax\n\tmov     [rdi+32], rax\n\tmov     [rdi+40], rdx\n\tmov     rax, [rsi+24]\n\tmul     rax\n\tmov     [rdi+48], rax\n\tmov     rax, rbx\n\tmov     [rdi+56], rdx\n\tmul     r11\n\tmov     r8, rax\n\tmov     r9, rdx\n\tmov     rax, [rsi+16]\n\tmul     r11\n\txor     r10, r10\n\tadd     r9, rax\n\tadc     r10, rdx\n\tmov     rax, [rsi+24]\n\tmul     r11\n\txor     r11, r11\n\tadd     r10, rax\n\tadc     r11, rdx\n\tmov     rax, [rsi+16]\n\tmul     rbx\n\txor     rcx, rcx\n\tadd     r10, rax\n\tadc     r11, rdx\n\tadc     rcx, 0\n\tmov     rax, [rsi+24]\n\tmul     rbx\n\tadd     r11, rax\n\tadc     rcx, rdx\n\tmov     rdx, [rsi+16]\n\tmov     rax, [rsi+24]\n\tmul     rdx\n\tadd     rcx, rax\n\tadc     rdx, 0\n\tadd     r8, r8\n\tadc     r9, r9\n\tadc     r10, r10\n\tadc     r11, r11\n\tadc     rcx, rcx\n\tmov     eax, 0\n\tadc     rdx, rdx\n\tadc     rax, rax\n\tadd     [rdi+8], r8\n\tadc     [rdi+16], r9\n\tadc     [rdi+24], r10\n\tadc     [rdi+32], r11\n\tadc     [rdi+40], rcx\n\tadc     [rdi+48], rdx\n\tadc     [rdi+56], rax\n\tWIN64_GCC_EXIT\n\nL5:\tlea     r12, [rdi+r11*8-16]\n\tmov     r13, [rsi]\n\tmov     rax, [rsi+8]\n\tlea     rsi, [rsi+r11*8]\n\tlea     r8, [r11-4]\n\txor     r9d, r9d\n\tsub     r9, r11\n\tmul     r13\n\txor     ebp, ebp\n\tmov     rbx, rax\n\tmov     rax, [rsi+r9*8+16]\n\tmov     r10, rdx\n\tjmp     .7\n\n\txalign  16\n.6:\tadd     rbp, rax\n\tmov     [r12+r9*8], r10\n\tmov     rax, [rsi+r9*8]\n\tadc     rcx, rdx\n\txor     ebx, ebx\n\tmul     r13\n\txor     r10d, r10d\n\tmov     [r12+r9*8+8], rbp\n\tadd     rcx, rax\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r9*8+8]\n\tmov     [r12+r9*8+16], rcx\n\txor     ebp, ebp\n\tmul     r13\n\tadd     rbx, rax\n\tmov     rax, [rsi+r9*8+16]\n\tadc     r10, rdx\n.7:\txor     ecx, ecx\n\tmul     r13\n\tadd     r10, rax\n\tmov     rax, [rsi+r9*8+24]\n\tadc     rbp, rdx\n\tmov     [r12+r9*8+24], rbx\n\tmul     r13\n\tadd     r9, 4\n\tjs      .6\n\tadd     rbp, rax\n\tmov     [r12], r10\n\tadc     rcx, rdx\n\tmov     [r12+8], rbp\n\tmov     [r12+16], rcx\n\tlea\t\tr12, [r12+16]\n\tlea     rsi, [rsi-8]\n\tjmp     L18\n\nL8:\tlea     r12, [rdi+r11*8+8]\n\tmov     r13, [rsi]\n\tmov     rax, [rsi+8]\n\tlea     rsi, [rsi+r11*8+8]\n\tlea     r8, [r11-3]\n\tlea     r9, [r11-3]\n\tneg     r9\n\tmov     r14, rax\n\tmul     r13\n\tmov     rcx, rdx\n\txor     ebp, ebp\n\tmov     [rdi+8], rax\n\tjmp     .10\n\n\txalign  16\n.9:\tmul     r14\n\tadd     rbx, rax\n\tadc     rcx, rdx\n\tmov     rax, [rsi+r9*8-24]\n\tmov     ebp, 0\n\tmul     r13\n\tadd     rbx, rax\n\tmov     rax, [rsi+r9*8-24]\n\tadc     rcx, rdx\n\tadc     ebp, 0\n\tmul     r14\n\tadd     rcx, rax\n\tmov     [r12+r9*8-24], rbx\n\tadc     rbp, rdx\n.10:mov     rax, [rsi+r9*8-16]\n\tmul     r13\n\tmov     r10d, 0\n\tadd     rcx, rax\n\tadc     rbp, rdx\n\tmov     rax, [rsi+r9*8-16]\n\tadc     r10d, 0\n\tmov     ebx, 0\n\tmov     [r12+r9*8-16], rcx\n\tmul     r14\n\tadd     rbp, rax\n\tmov     rax, [rsi+r9*8-8]\n\tadc     r10, rdx\n\tmov     ecx, 0\n\tmul     r13\n\tadd     rbp, rax\n\tmov     rax, [rsi+r9*8-8]\n\tadc     r10, rdx\n\tadc     ebx, 0\n\tmul     r14\n\tadd     r10, rax\n\tmov     [r12+r9*8-8], rbp\n\tadc     rbx, rdx\n.11:mov     rax, [rsi+r9*8]\n\tmul     r13\n\tadd     r10, rax\n\tadc     rbx, rdx\n\tadc     ecx, 0\n\tadd     r9, 4\n\tmov     rax, [rsi+r9*8-32]\n\tmov     [r12+r9*8-32], r10\n\tjs      .9\n\tmul     r14\n\tadd     rbx, rax\n\tadc     rcx, rdx\n\tmov     [r12-8], rbx\n\tmov     [r12], rcx\n\tlea     rsi, [rsi-16]\n\tjmp     L24\n\nL12:lea     r12, [rdi+r11*8-16]\n\tmov     r13, [rsi]\n\tmov     rax, [rsi+8]\n\tlea     rsi, [rsi+r11*8]\n\tlea     r8, [r11-4]\n\tlea     r9, [r11-2]\n\tneg     r9\n\tmul     r13\n\tmov     rbp, rax\n\tmov     rax, [rsi+r9*8]\n\tmov     rcx, rdx\n\tjmp     .14\n\t\n\txalign  16\n.13:add     rbp, rax\n\tmov     [r12+r9*8], r10\n\tmov     rax, [rsi+r9*8]\n\tadc     rcx, rdx\n.14:xor     ebx, ebx\n\tmul     r13\n\txor     r10d, r10d\n\tmov     [r12+r9*8+8], rbp\n\tadd     rcx, rax\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r9*8+8]\n\tmov     [r12+r9*8+16], rcx\n\txor     ebp, ebp\n\tmul     r13\n\tadd     rbx, rax\n\tmov     rax, [rsi+r9*8+16]\n\tadc     r10, rdx\n\txor     ecx, ecx\n\tmul     r13\n\tadd     r10, rax\n\tmov     rax, [rsi+r9*8+24]\n\tadc     rbp, rdx\n\tmov     [r12+r9*8+24], rbx\n\tmul     r13\n\tadd     r9, 4\n\tjs      .13\n\tadd     rbp, rax\n\tmov     [r12], r10\n\tadc     rcx, rdx\n\tmov     [r12+8], rbp\n\tmov     [r12+16], rcx\n\tlea\t\tr12, [r12+16]\n\tlea     rsi, [rsi-8]\n\tjmp     L21\n\nL15:lea     r12, [rdi+r11*8+8]\n\tmov     r13, [rsi]\n\tmov     rax, [rsi+8]\n\tlea     rsi, [rsi+r11*8+8]\n\tlea     r8, [r11-5]\n\tlea     r9, [r11-1]\n\tneg     r9\n\tmov     r14, rax\n\tmul     r13\n\tmov     r10, rdx\n\txor     ebx, ebx\n\txor     ecx, ecx\n\tmov     [rdi+8], rax\n\tjmp     .17\n\n\txalign  16\n.16:mul     r14\n\tadd     rbx, rax\n\tadc     rcx, rdx\n\tmov     rax, [rsi+r9*8-24]\n\tmov     ebp, 0\n\tmul     r13\n\tadd     rbx, rax\n\tmov     rax, [rsi+r9*8-24]\n\tadc     rcx, rdx\n\tadc     ebp, 0\n\tmul     r14\n\tadd     rcx, rax\n\tmov     [r12+r9*8-24], rbx\n\tadc     rbp, rdx\n\tmov     rax, [rsi+r9*8-16]\n\tmul     r13\n\tmov     r10d, 0\n\tadd     rcx, rax\n\tadc     rbp, rdx\n\tmov     rax, [rsi+r9*8-16]\n\tadc     r10d, 0\n\tmov     ebx, 0\n\tmov     [r12+r9*8-16], rcx\n\tmul     r14\n\tadd     rbp, rax\n\tmov     rax, [rsi+r9*8-8]\n\tadc     r10, rdx\n\tmov     ecx, 0\n\tmul     r13\n\tadd     rbp, rax\n\tmov     rax, [rsi+r9*8-8]\n\tadc     r10, rdx\n\tadc     ebx, 0\n\tmul     r14\n\tadd     r10, rax\n\tmov     [r12+r9*8-8], rbp\n\tadc     rbx, rdx\n.17:mov     rax, [rsi+r9*8]\n\tmul     r13\n\tadd     r10, rax\n\tadc     rbx, rdx\n\tadc     ecx, 0\n\tadd     r9, 4\n\tmov     rax, [rsi+r9*8-32]\n\tmov     [r12+r9*8-32], r10\n\tjs      .16\n\tmul     r14\n\tadd     rbx, rax\n\tadc     rcx, rdx\n\tmov     [r12-8], rbx\n\tmov     [r12], rcx\n\tlea     rsi, [rsi-16]\n\tjmp     L21\nL18:lea     r9, [r8+4]\n\tneg     r9\n\tmov     r13, [rsi+r9*8+16]\n\tmov     r14, [rsi+r9*8+24]\n\tmov     rax, [rsi+r9*8+24]\n\tmul     r13\n\txor     r10d, r10d\n\tadd     [r12+r9*8+24], rax\n\tadc     r10, rdx\n\txor     ebx, ebx\n\txor     ecx, ecx\n\tjmp     .20\n\n\txalign  16\n.19:add     [r12+r9*8], r10\n\tadc     rbx, rax\n\tmov     rax, [rsi+r9*8+8]\n\tadc     rcx, rdx\n\tmov     ebp, 0\n\tmul     r13\n\tadd     rbx, rax\n\tmov     rax, [rsi+r9*8+8]\n\tadc     rcx, rdx\n\tadc     ebp, 0\n\tmul     r14\n\tadd     [r12+r9*8+8], rbx\n\tadc     rcx, rax\n\tadc     rbp, rdx\n\tmov     rax, [rsi+r9*8+16]\n\tmov     r10d, 0\n\tmul     r13\n\tadd     rcx, rax\n\tmov     rax, [rsi+r9*8+16]\n\tadc     rbp, rdx\n\tadc     r10d, 0\n\tmul     r14\n\tadd     [r12+r9*8+16], rcx\n\tadc     rbp, rax\n\tmov     rax, [rsi+r9*8+24]\n\tadc     r10, rdx\n\tmul     r13\n\tmov     ebx, 0\n\tadd     rbp, rax\n\tadc     r10, rdx\n\tmov     ecx, 0\n\tmov     rax, [rsi+r9*8+24]\n\tadc     ebx, 0\n\tmul     r14\n\tadd     [r12+r9*8+24], rbp\n\tadc     r10, rax\n\tadc     rbx, rdx\n.20:mov     rax, [rsi+r9*8+32]\n\tmul     r13\n\tadd     r10, rax\n\tmov     rax, [rsi+r9*8+32]\n\tadc     rbx, rdx\n\tadc     ecx, 0\n\tmul     r14\n\tadd     r9, 4\n\tjs      .19\n\tadd     [r12], r10\n\tadc     rbx, rax\n\tadc     rcx, rdx\n\tmov     [r12+8], rbx\n\tmov     [r12+16], rcx\n\tlea\t\tr12, [r12+16]\n\tadd     r8d, -2\nL21:lea     r9, [r8+2]\n\tneg     r9\n\tmov     r13, [rsi+r9*8]\n\tmov     r14, [rsi+r9*8+8]\n\tmov     rax, [rsi+r9*8+8]\n\tmul     r13\n\txor     ecx, ecx\n\tadd     [r12+r9*8+8], rax\n\tadc     rcx, rdx\n\txor     ebp, ebp\n\tjmp     .23\n\n\txalign  16\n.22:add     [r12+r9*8], r10\n\tadc     rbx, rax\n\tmov     rax, [rsi+r9*8+8]\n\tadc     rcx, rdx\n\tmov     ebp, 0\n\tmul     r13\n\tadd     rbx, rax\n\tmov     rax, [rsi+r9*8+8]\n\tadc     rcx, rdx\n\tadc     ebp, 0\n\tmul     r14\n\tadd     [r12+r9*8+8], rbx\n\tadc     rcx, rax\n\tadc     rbp, rdx\n.23:mov     rax, [rsi+r9*8+16]\n\tmov     r10d, 0\n\tmul     r13\n\tadd     rcx, rax\n\tmov     rax, [rsi+r9*8+16]\n\tadc     rbp, rdx\n\tadc     r10d, 0\n\tmul     r14\n\tadd     [r12+r9*8+16], rcx\n\tadc     rbp, rax\n\tmov     rax, [rsi+r9*8+24]\n\tadc     r10, rdx\n\tmul     r13\n\tmov     ebx, 0\n\tadd     rbp, rax\n\tadc     r10, rdx\n\tmov     ecx, 0\n\tmov     rax, [rsi+r9*8+24]\n\tadc     ebx, 0\n\tmul     r14\n\tadd     [r12+r9*8+24], rbp\n\tadc     r10, rax\n\tadc     rbx, rdx\n\tmov     rax, [rsi+r9*8+32]\n\tmul     r13\n\tadd     r10, rax\n\tmov     rax, [rsi+r9*8+32]\n\tadc     rbx, rdx\n\tadc     ecx, 0\n\tmul     r14\n\tadd     r9, 4\n\tjs      .22\n\tadd     [r12], r10\n\tadc     rbx, rax\n\tadc     rcx, rdx\n\tmov     [r12+8], rbx\n\tmov     [r12+16], rcx\n\tlea\t\tr12, [r12+16]\nL24:add     r8d, -2\n\tjne     L18\n\tmov     r13, [rsi-16]\n\tmov     r14, [rsi-8]\n\tmov     rax, [rsi-8]\n\tmul     r13\n\txor     r10d, r10d\n\tadd     [r12-8], rax\n\tadc     r10, rdx\n\txor     ebx, ebx\n\txor     ecx, ecx\n\tmov     rax, [rsi]\n\tmul     r13\n\tadd     r10, rax\n\tmov     rax, [rsi]\n\tadc     rbx, rdx\n\tmul     r14\n\tadd     [r12], r10\n\tadc     rbx, rax\n\tadc     rcx, rdx\n\tmov     [r12+8], rbx\n\tmov     [r12+16], rcx\n\tlea     r9, [r11+r11-4]\n\tmov     r11, [rdi+8]\n\tlea     rsi, [rsi-8]\n\tlea     rdi, [rdi+r9*8]\n\tneg     r9\n\tmov     rax, [rsi+r9*4]\n\tmul     rax\n\ttest    r9b, 2\n\tjnz     .26\n.25:add     r11, r11\n\tsbb     ebx, ebx\n\tadd     r11, rdx\n\tmov     [rdi+r9*8], rax\n\tjmp     .28\n.26:add     r11, r11\n\tsbb     ebp, ebp\n\tadd     r11, rdx\n\tmov     [rdi+r9*8], rax\n\tlea     r9, [r9-2]\n\tjmp     .29\n\n\txalign  16\n.27:mov     rax, [rsi+r9*4]\n\tmul     rax\n\tadd     ebp, ebp\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+r9*8], r10\n.28:mov     [rdi+r9*8+8], r11\n\tmov     r10, [rdi+r9*8+16]\n\tadc     r10, r10\n\tmov     r11, [rdi+r9*8+24]\n\tadc     r11, r11\n\tnop\n\tsbb     ebp, ebp\n\tmov     rax, [rsi+r9*4+8]\n\tmul     rax\n\tadd     ebx, ebx\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi+r9*8+16], r10\n.29:mov     [rdi+r9*8+24], r11\n\tmov     r10, [rdi+r9*8+32]\n\tadc     r10, r10\n\tmov     r11, [rdi+r9*8+40]\n\tadc     r11, r11\n\tsbb     ebx, ebx\n\tadd     r9, 4\n\tjs      .27\n\tmov     rax, [rsi]\n\tmul     rax\n\tadd     ebp, ebp\n\tadc     r10, rax\n\tadc     r11, rdx\n\tmov     [rdi], r10\n\tmov     [rdi+8], r11\n\tmov     r10, [rdi+16]\n\tadc     r10, r10\n\tsbb     ebp, ebp\n\tneg     ebp\n\tmov     rax, [rsi+8]\n\tmul     rax\n\tadd     ebx, ebx\n\tadc     r10, rax\n\tadc     rdx, rbp\n\tmov     [rdi+16], r10\n\tmov     [rdi+24], rdx\n\tWIN64_GCC_END\n\n\tend\n"
  },
  {
    "path": "mpn/x86_64w/sub_err1_n.asm",
    "content": "; PROLOGUE(mpn_sub_err1_n)\n\n;  Copyright (C) 2009, David Harvey\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  All rights reserved.\n;\n;  Redistribution and use in source and binary forms, with or without\n;  modification, are permitted provided that the following conditions are\n;  met:\n;  1. Redistributions of source code must retain the above copyright notice,\n;  this list of conditions and the following disclaimer.\n;\n;  2. Redistributions in binary form must reproduce the above copyright\n;  notice, this list of conditions and the following disclaimer in the\n;  documentation and/or other materials provided with the distribution.\n;\n;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\n;  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n;  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\n;  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\n;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\n;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n;\n;  mp_limb_t mpn_add_err1_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  mp_limb_t mpn_sub_err1_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_limb_t)\n;  rax                         rdi     rsi     rdx     rcx       r8        r9     8(rsp)\n;  rax                         rcx     rdx      r8      r9 [rsp+40]  [rsp+48]   [rsp+56]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12\n\n%macro fun 2\n\txalign  16\n    FRAME_PROC %1, 0, reg_save_list\n    mov     rax, qword [rsp+stack_use+48]\n    lea     rdi, [rcx+rax*8]\n    lea     rsi, [rdx+rax*8]\n    lea     rdx, [r8+rax*8]\n    mov     rcx, r9\n    mov     r9, rax\n    mov     r8, [rsp+stack_use+40]\n    mov     rax, [rsp+stack_use+56]\n    \n\txor     rbx, rbx\n\txor     rbp, rbp\n\ttest    r9, 1\n\tjnz     %%2\n%%1:lea     r8, [r8+r9*8-8]\n\tneg     r9\n\tjmp     %%3\n\n\txalign  16\n%%2:lea     r8, [r8+r9*8-16]\n\tneg     r9\n\tshr     rax, 1\n\tmov     r12, [rsi+r9*8]\n\t%2      r12, [rdx+r9*8]\n\tcmovc   rbx, [r8+8]\n\tmov     [rdi+r9*8], r12\n\tsetc    al\n\tinc     r9\n\tjz      %%4\n\n\txalign  16\n%%3:mov     r12, [rsi+r9*8]\n\tshr     rax, 1\n\t%2      r12, [rdx+r9*8]\n\tmov     r11, 0\n\tmov     [rdi+r9*8], r12\n\tmov     r10, 0\n\tmov     r12, [rsi+r9*8+8]\n\tcmovc   r10, [r8]\n\t%2      r12, [rdx+r9*8+8]\n\tcmovc   r11, [r8-8]\n\tsetc    al\n\tadd     rbx, r10\n\tadc     rbp, 0\n\tadd     rbx, r11\n\tmov     [rdi+r9*8+8], r12\n\tadc     rbp, 0\n\tadd     r9, 2\n\tlea     r8, [r8-16]\n\tjnz     %%3\n%%4:mov     [rcx], rbx\n\tmov     [rcx+8], rbp\n    END_PROC reg_save_list\n%endmacro\n\n\n    CPU  Athlon64\n    BITS 64\n;\t\tglobal __gmpn_sub_err1_n\n\n    fun mpn_sub_err1_n, sbb\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/sub_err2_n.asm",
    "content": "; PROLOGUE(mpn_sub_err2_n)\n;\n;  AMD64 mpn_add_err2_n, mpn_sub_err2_n\n;\n;  Copyright (C) 2009, David Harvey\n;\n;  Windows Conversion Copyright 2008 Brian Gladman\n;\n;  All rights reserved.\n;\n;  Redistribution and use in source and binary forms, with or without\n;  modification, are permitted provided that the following conditions are\n;  met:\n;\n;  1. Redistributions of source code must retain the above copyright notice,\n;  this list of conditions and the following disclaimer.\n;\n;  2. Redistributions in binary form must reproduce the above copyright\n;  notice, this list of conditions and the following disclaimer in the\n;  documentation and/or other materials provided with the distribution.\n;\n;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\n;  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n;  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\n;  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\n;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\n;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n;\n;  mp_limb_t mpn_add_err2_n (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_ptr, mp_size_t, mp_limb_t);\n;  mp_limb_t mpn_sub_err2_n (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_ptr, mp_size_t, mp_limb_t);\n;  rax                          rdi     rsi     rdx     rcx       r8      r9     8(rsp)    16(rsp)\n;  rax                          rcx     rdx      r8      r9 [rsp+40] [rsp+48]  [rsp+56]   [rsp+64]\n\n%include \"yasm_mac.inc\"\n\n%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14\n\n%macro fun 2\n\txalign  16\n    FRAME_PROC %1, 0, reg_save_list\n    mov     rax, qword [rsp+stack_use+56]\n    lea     rdi, [rcx+rax*8]\n    lea     rsi, [rdx+rax*8]\n    lea     rdx, [r8+rax*8]\n    mov     rcx, r9\n    mov     r10, rax\n    mov     r8, [rsp+stack_use+40]\n    mov     r9, [rsp+stack_use+48]\n    mov     rax, [rsp+stack_use+64]\n    \n\txor     rbp, rbp\n\txor     r11, r11\n\txor     r12, r12\n\txor     r13, r13\n\tsub     r9, r8\n\ttest    r10, 1\n\tjnz     %%1\n\tlea     r8, [r8+r10*8-8]\n\tneg     r10\n\tjmp     %%2\n\n\txalign  16\n%%1:lea     r8, [r8+r10*8-16]\n\tneg     r10\n\tshr     rax, 1\n\tmov     rbx, [rsi+r10*8]\n\t%2      rbx, [rdx+r10*8]\n\tcmovc   rbp, [r8+8]\n\tcmovc   r12, [r8+r9+8]\n\tmov     [rdi+r10*8], rbx\n\tsbb     rax, rax\n\tinc     r10\n\tjz      %%3\n\n\txalign  16\n%%2:mov     rbx, [rsi+r10*8]\n\tshr     rax, 1\n\t%2      rbx, [rdx+r10*8]\n\tmov     [rdi+r10*8], rbx\n\tsbb     r14, r14\n\tmov     rbx, [rsi+r10*8+8]\n\t%2      rbx, [rdx+r10*8+8]\n\tmov     [rdi+r10*8+8], rbx\n\tsbb     rax, rax\n    mov     rbx, [r8]\n\tand     rbx, r14\n\tadd     rbp, rbx\n\tadc     r11, 0\n    and     r14, [r8+r9]\n\tadd     r12, r14\n\tadc     r13, 0\n    mov     rbx, [r8-8]\n\tand     rbx, rax\n\tadd     rbp, rbx\n\tadc     r11, 0\n    mov     rbx, [r8+r9-8]\n\tand     rbx, rax\n\tadd     r12, rbx\n\tadc     r13, 0\n\tadd     r10, 2\n\tlea     r8, [r8-16]\n\tjnz     %%2\n%%3:mov     [rcx], rbp\n\tmov     [rcx+8], r11\n\tmov     [rcx+16], r12\n\tmov     [rcx+24], r13\n\tand     eax, 1\n    END_PROC reg_save_list\n%endmacro\n\n\n    CPU  Athlon64\n    BITS 64\n;\t\tglobal __gmpn_sub_err2_n\n\n    fun mpn_sub_err2_n, sbb\n    \n    end\n"
  },
  {
    "path": "mpn/x86_64w/udiv_qrnnd.asm",
    "content": "; PROLOGUE(mpn_udiv_qrnnd)\n\n;  Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_udiv_qrnnd(mp_ptr, mp_limb_t, mp_limb_t, mp_limb_t)\n;  rax                         rdi        rsi        rdx        rcx\n;  rax                         rcx        rdx         r8         r9\n\n%include \"yasm_mac.inc\"\n\n    BITS 64\n\n    LEAF_PROC mpn_udiv_qrnnd\n    mov     rax,r8\n    div     r9\n    mov     [rcx],rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86_64w/umul_ppmm.asm",
    "content": "; PROLOGUE(mpn_umul_ppmm)\n\n;  Copyright 2008 Brian Gladman\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n;\n;  mp_limb_t mpn_umul_ppmm(mp_ptr, mp_limb_t, mp_limb_t)\n;  rax                        rdi        rsi        rdx\n;  rax                        rcx        rdx         r8\n\n%include \"yasm_mac.inc\"\n\n    BITS 64\n\n    LEAF_PROC mpn_umul_ppmm\n    mov     rax,rdx\n    mul     r8\n    mov     [rcx],rax\n    mov     rax,rdx\n    ret\n\n    end\n"
  },
  {
    "path": "mpn/x86w/add_n.asm",
    "content": "\n;  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software\n;  Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n; mp_limb_t M4_function_n (mp_ptr dst,mp_srcptr src1,mp_srcptr src2,\n; mp_size_t size); \n; mp_limb_t M4_function_nc (mp_ptr dst,mp_srcptr src1,mp_srcptr src2,\n; mp_size_t size,mp_limb_t carry); \n\n%define PARAM_SPACE   20\n%define PARAM_CARRY   esp+frame+20\n%define PARAM_SIZE    esp+frame+16\n%define PARAM_SRC2    esp+frame+12\n%define PARAM_SRC1    esp+frame+8\n%define PARAM_DST     esp+frame+4\n\n%macro\tmac_sub 4\n\n\tglobal  %1%4\n%ifdef\tDLL\n\texport\t%1%4\n%endif\n\n    align   8\n%1%4:\n%assign\tframe   0\n\tFR_push edi\n\tFR_push esi\n    mov     edi,[PARAM_DST]\n    mov     esi,[PARAM_SRC1]\n    mov     edx,[PARAM_SRC2]\n    mov     ecx,[PARAM_SIZE]\n    mov     eax,ecx\n    shr     ecx,3\t\t\t\t; compute count for unrolled %%4 \n    neg     eax\n    and     eax,7\t\t\t\t; get index where to start %%4 \n    jz      %%3\t\t\t\t\t; necessary special case for 0 \n    inc     ecx\t\t\t\t\t; adjust %%4 count \n    shl     eax,2\t\t\t\t; adjustment for pointers... \n    sub     edi,eax\t\t\t\t; ... since they are offset ... \n    sub     esi,eax\t\t\t\t; ... by a constant when we ... \n    sub     edx,eax\t\t\t\t; ... enter the %%4 \n    shr     eax,2\t\t\t\t; restore previous value \n\n; Calculate start address in %%4\n\n%ifdef\tPIC\n    call    %%1\n%%1:\n\tlea     eax,[%%4-%%1-3+eax+eax*8]\n    add     eax,[esp]\n    add     esp,4\n%else\n\tlea     eax,[%%4-3+eax+eax*8]\n%endif\n\n; These lines initialize carry from the 5th parameter.  Should be \n; possible to simplify. \n\n\tFR_push ebp    \n\tmov     ebp,[PARAM_CARRY]    \n    shr     ebp,1\t\t\t\t; shift bit 0 into carry \n\tFR_pop  ebp\n    jmp     eax\t\t\t\t\t; jump into %%4 \n\n\tglobal\t%1%3\n%ifdef\tDLL\n\texport\t%1%3\n%endif\n\talign   8\t\n%1%3:\n%assign\tframe\t0\n\tFR_push edi\n    FR_push esi\n    mov     edi,[PARAM_DST]\n    mov     esi,[PARAM_SRC1]\n    mov     edx,[PARAM_SRC2]\n    mov     ecx,[PARAM_SIZE]\n    mov     eax,ecx\n    shr     ecx,3\t\t\t\t; compute count for unrolled %%4 \n    neg     eax\n    and     eax,7\t\t\t\t; get index where to start %%4 \n    jz      %%4\t\t\t\t\t; necessary special case for 0 \n    inc     ecx\t\t\t\t\t; adjust %%4 count \n    shl     eax,2\t\t\t\t; adjustment for pointers... \n    sub     edi,eax\t\t\t\t; ... since they are offset ... \n    sub     esi,eax\t\t\t\t; ... by a constant when we ... \n    sub     edx,eax\t\t\t\t; ... enter the %%4 \n    shr     eax,2\t\t\t\t; restore previous value \n\n; Calculate start address in %%4 for PIC.  \n; Due to limitations in some assemblers,%%4-%%2-3 \n; cannot be put into the leal \n\n%ifdef\tPIC\n\tcall    %%2\n%%2:\n\tlea     eax,[%%4-%%2-3+eax+eax*8]\n    add     eax,[esp]\n    add     esp,4\n%else\n    lea     eax,[%%4-3+eax+eax*8]\n%endif\n\tjmp     eax\t\t\t\t\t; jump into %%4 \n%%3:\n\tFR_push ebp\n    mov     ebp,[PARAM_CARRY]\n    shr     ebp,1\t\t\t\t; shift bit 0 into carry \n\tFR_pop  ebp\n\n\talign   8\n%%4:\n\tmov     eax,[esi]\n\t%2\t\teax,[edx]\n    mov     [edi],eax\n    mov     eax,[4+esi]\n\t%2\t\teax,[edx+4]\n    mov     [4+edi],eax\n    mov     eax,[8+esi]\n\t%2\t\teax,[edx+8]\n    mov     [8+edi],eax\n    mov     eax,[12+esi]\n    %2\t\teax,[edx+12]\n    mov     [12+edi],eax\n    mov     eax,[16+esi]\n\t%2\t\teax,[edx+16]\n    mov     [16+edi],eax\n    mov     eax,[20+esi]\n\t%2\t\teax,[edx+20]\n    mov     [20+edi],eax\n    mov     eax,[24+esi]\n\t%2\t\teax,[edx+24]\n    mov     [24+edi],eax\n    mov     eax,[28+esi]\n    %2\t\teax,[edx+28]\n    mov     [28+edi],eax\n    lea     edi,[32+edi]\n    lea     esi,[32+esi]\n    lea     edx,[32+edx]\n    dec     ecx\n    jnz     %%4\n    sbb     eax,eax\n    neg     eax\n    pop     esi\n    pop     edi\n    ret\t\t\n%endmacro\n\n\tsection .text\n;       global ___gmpn_add_n\n;       global ___gmpn_add_nc\n\t\n\tmac_sub\t___g,adc,mpn_add_n,mpn_add_nc\n\t\n    end\n"
  },
  {
    "path": "mpn/x86w/addmul_1.asm",
    "content": "\n;  Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002 Free Software\n;  Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n%define\tPARAM_MULTIPLIER\tesp+frame+16\n%define\tPARAM_SIZE\t\t\tesp+frame+12\n%define\tPARAM_SRC\t\t\tesp+frame+8\n%define\tPARAM_DST\t\t\tesp+frame+4\n%assign frame\t\t\t\t16\n\n%macro\tmac_sub 3\n\n\tglobal  %1%3\n%ifdef\tDLL\n\texport\t%1%3\n%endif\n\n\talign   8\n%1%3:\n    push    edi\n    push    esi\n    push    ebx\n    push    ebp\n    mov     edi,[PARAM_DST]\n    mov     esi,[PARAM_SRC]\n    mov     ecx,[PARAM_SIZE]\n    xor     ebx,ebx\n    and     ecx,3\n    jz      %%2\n%%1:\n    mov     eax,[esi]\n    mul     dword [PARAM_MULTIPLIER]\n    lea     esi,[4+esi]\n    add     eax,ebx\n    mov     ebx,0\n    adc     edx,ebx\n\t%2\t\t[edi],eax\n    adc     ebx,edx\t\t; propagate carry into cylimb \n    lea     edi,[4+edi]\n    dec     ecx\n    jnz     %%1\n%%2:\n    mov     ecx,[PARAM_SIZE]\n    shr     ecx,2\n    jz      %%4\n\n\talign   8\n%%3:\n\tmov     eax,[esi]\n    mul     dword [PARAM_MULTIPLIER]\n    add     ebx,eax\n    mov     ebp,0\n    adc     ebp,edx\n    mov     eax,[4+esi]\n    mul     dword [PARAM_MULTIPLIER]\n\t%2\t\t[edi],ebx\n    adc     ebp,eax\t\t; new lo + cylimb \n    mov     ebx,0\n    adc     ebx,edx\n    mov     eax,[8+esi]\n    mul     dword [PARAM_MULTIPLIER]\n\t%2\t\t[4+edi],ebp\n    adc     ebx,eax\t\t; new lo + cylimb \n    mov     ebp,0\n    adc     ebp,edx\n    mov     eax,[12+esi]\n    mul     dword [PARAM_MULTIPLIER]\n\t%2\t\t[8+edi],ebx\n    adc     ebp,eax   ; new lo + cylimb \n    mov     ebx,0\n    adc     ebx,edx\n\t%2\t\t[12+edi],ebp\n    adc     ebx,0\t\t; propagate carry into cylimb \n    lea     esi,[16+esi]\n    lea     edi,[16+edi]\n    dec     ecx\n    jnz     %%3\n%%4:\n\tmov     eax,ebx\n    pop     ebp\n    pop     ebx\n    pop     esi\n    pop     edi\n    ret\n%endmacro\n\n\tsection .text\n;       global ___gmpn_addmul_1\n\tmac_sub\t___g,add,mpn_addmul_1\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/copyd.asm",
    "content": "\n;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n\tglobal  ___gmpn_copyd\n\n%ifdef\tDLL\n\texport\t___gmpn_copyd\n%endif\n\n%define PARAM_SIZE    esp+frame+12\n%define PARAM_SRC     esp+frame+8\n%define PARAM_DST     esp+frame+4\n%assign       frame   0\n\n\n; eax  saved esi \n; ebx \n; ecx  counter \n; edx  saved edi \n; esi  src \n; edi  dst \n; ebp \n\n    section .text\n\talign   32\n\n___gmpn_copyd: \n    mov     ecx,[PARAM_SIZE]\n    mov     eax,esi\n    mov     esi,[PARAM_SRC]\n    mov     edx,edi\n    mov     edi,[PARAM_DST]\n    lea     esi,[-4+esi+ecx*4]\n    lea     edi,[-4+edi+ecx*4]\n    std\n    rep\tmovsd\n    cld\n    mov     esi,eax\n    mov     edi,edx\n\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/copyi.asm",
    "content": "\n;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n\tglobal  ___gmpn_copyi\n\n%ifdef\tDLL\n\texport\t___gmpn_copyi\n%endif\n\n%define PARAM_SIZE    esp+frame+12\n%define PARAM_SRC     esp+frame+8\n%define PARAM_DST     esp+frame+4\n%assign       frame   0\n\n\tsection .text\n    align   32\n\n; eax  saved esi \n; ebx \n; ecx  counter \n; edx  saved edi \n; esi  src \n; edi  dst \n; ebp \n\n___gmpn_copyi: \n    mov     ecx,[PARAM_SIZE]\n    mov     eax,esi\n    mov     esi,[PARAM_SRC]\n    mov     edx,edi\n    mov     edi,[PARAM_DST]\n    cld\t\t\t\t\t\t; better safe than sorry,see mpn/x86/README \n    rep\tmovsd\n    mov     esi,eax\n    mov     edi,edx\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/divexact_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\" \n\n%define PARAM_DIVISOR\tesp+frame+16\n%define PARAM_SIZE\t\tesp+frame+12\n%define PARAM_SRC\t\tesp+frame+8\n%define PARAM_DST\t\tesp+frame+4\n%define\tVAR_INVERSE\t\tPARAM_SRC\n%assign frame\t\t\t0\n\n    section .text\n\n\textern\t___gmp_modlimb_invert_table\n    global  ___gmpn_divexact_1\n%ifdef\tDLL\n\texport\t___gmpn_divexact_1\n%endif\n\n    align   16    \n___gmpn_divexact_1: \n    mov     eax,[PARAM_DIVISOR]\n\tFR_push ebp\n    mov     ebp,[PARAM_SIZE]\n\tFR_push edi\n    FR_push ebx\n    mov     ecx,-1\t\t\t\t\t; shift count \n    FR_push esi\nLstrip_twos:\n    inc     ecx\n    shr     eax,1\n    jnc     Lstrip_twos\n    lea     ebx,[1+eax+eax]\t\t\t; d without twos \n    and     eax,127\t\t\t\t\t; d/2,7 bits \n\n%ifdef\tPIC\n    call    Lmovl_eip_edx\n    add     edx,_GLOBAL_OFFSET_TABLE_\n    mov     edx,[___gmp_modlimb_invert_table+edx]\n    movzx   eax,byte [eax+edx]\t; inv 8 bits \n%else\n    movzx   eax,byte [___gmp_modlimb_invert_table+eax] ; inv 8 bits \n%endif\n\n    lea     edx,[eax+eax]\t\t; 2*inv \n    mov     [PARAM_DIVISOR],ebx ; d without twos \n    imul    eax,eax\t\t\t\t; inv*inv \n    mov     esi,[PARAM_SRC]\n    mov     edi,[PARAM_DST]\n    imul    eax,ebx\t\t\t\t; inv*inv*d \n    sub     edx,eax\t\t\t\t; inv = 2*inv - inv*inv*d \n    lea     eax,[edx+edx]\t\t; 2*inv \n    imul    edx,edx\t\t\t\t; inv*inv \n    lea     esi,[esi+ebp*4]\t\t; src end \n    lea     edi,[edi+ebp*4]\t\t; dst end \n    neg     ebp\t\t\t\t\t; -size \n    imul    edx,ebx\t\t\t\t; inv*inv*d \n    sub     eax,edx\t\t\t\t; inv = 2*inv - inv*inv*d \n\n%ifdef\tASSERT\n    FR_push eax\n    imul    eax,[PARAM_DIVISOR]\n    cmp     eax,1\n    FR_pop  eax\n%endif\n\n    mov     [VAR_INVERSE],eax\n    mov     eax,[esi+ebp*4]\t\t; src[0] \n    xor     ebx,ebx\n    xor     edx,edx\n    inc     ebp\n    jz      Lone\n    mov     edx,[esi+ebp*4]\t\t; src[1] \n\tshrd\teax,edx,cl\n    mov     edx,[VAR_INVERSE]\n    jmp     Lentry\n\n    align   8\n    nop\t\t\t\t\t\t\t; k6 code alignment \n    nop\n\n; eax  q \n; ebx  carry bit,0 or -1 \n; ecx  shift \n; edx  carry limb \n; esi  src end \n; edi  dst end \n; ebp  counter,limbs,negative \n\nLtop:\n    mov     eax,[-4+esi+ebp*4]\n    sub     edx,ebx\t\t\t\t; accumulate carry bit \n    mov     ebx,[esi+ebp*4]\n\tshrd\teax,ebx,cl\n    sub     eax,edx\t\t\t\t; apply carry limb \n    mov     edx,[VAR_INVERSE]\n    sbb     ebx,ebx\nLentry:\n    imul    eax,edx\n    mov     [-4+edi+ebp*4],eax\n    mov     edx,[PARAM_DIVISOR]\n    mul     edx\n    inc     ebp\n    jnz     Ltop\n    mov     eax,[-4+esi]\t\t; src high limb \nLone: \n    shr     eax,cl\n\tFR_pop  esi\n    add     eax,ebx\t\t\t\t; apply carry bit \n    FR_pop  ebx\n    sub     eax,edx\t\t\t\t; apply carry limb \n    imul    eax,[VAR_INVERSE]\n    mov     [-4+edi],eax\n    pop     edi\n    pop     ebp\n    ret\n\n%ifdef\tPIC\nLmovl_eip_edx:\t \n    mov     edx,[esp]\n    ret\n%endif\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/divexact_by3c.asm",
    "content": "\n;  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n%define PARAM_CARRY esp+frame+16\n%define PARAM_SIZE  esp+frame+12\n%define PARAM_SRC   esp+frame+8\n%define PARAM_DST   esp+frame+4\n%assign frame\t\t0\n\n; multiplicative inverse of 3,modulo 2^32 \n; ceil(b/3) and ceil(b*2/3) where b=2^32 \n\n%define\tINVERSE_3\t\t0xAAAAAAAB\n%define\tONE_THIRD_CEIL\t0x55555556\n%define\tTWO_THIRDS_CEIL\t0xAAAAAAAB\n\n    section .text\n    \n    global  ___gmpn_divexact_by3c\n%ifdef\tDLL\n\texport\t___gmpn_divexact_by3c\n%endif\n\n    align   8\n___gmpn_divexact_by3c: \n    mov     ecx,[PARAM_SRC]\n    FR_push ebp\n    mov     ebp,[PARAM_SIZE]\n    FR_push edi\n    mov     edi,[PARAM_DST]\n    FR_push esi\n    mov     esi,INVERSE_3\n\tFR_push ebx\n    lea     ecx,[ecx+ebp*4]\n    mov     ebx,[PARAM_CARRY]\n    lea     edi,[edi+ebp*4]\n    neg     ebp\n\n; eax  scratch,low product \n; ebx  carry limb (0 to 3) \n; ecx  &src[size] \n; edx  scratch,high product \n; esi  multiplier \n; edi  &dst[size] \n; ebp  counter,limbs,negative \n\n\talign   8\nLtop:\t \n    mov     eax,[ecx+ebp*4]\n    sub     eax,ebx\n    setc    bl\n    imul    esi\n    cmp     eax,ONE_THIRD_CEIL\n    mov     [edi+ebp*4],eax\n    sbb     ebx,-1\t\t\t\t; +1 if eax>=ceil(b/3) \n    cmp     eax,TWO_THIRDS_CEIL\n    sbb     ebx,-1\t\t\t\t; +1 if eax>=ceil(b*2/3) \n    inc     ebp\n    jnz     Ltop\n    mov     eax,ebx\n    pop     ebx\n    pop     esi\n    pop     edi\n    pop     ebp\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/divrem_1.asm",
    "content": "\n;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n%define PARAM_CARRY   esp+frame+24\n%define PARAM_DIVISOR esp+frame+20\n%define PARAM_SIZE    esp+frame+16\n%define PARAM_SRC     esp+frame+12\n%define PARAM_XSIZE   esp+frame+8\n%define PARAM_DST     esp+frame+4\n\n\tsection .text\n\n    global  ___gmpn_divrem_1c\n%ifdef\tDLL\n\texport\t___gmpn_divrem_1c\n%endif\n\n    align   16\n___gmpn_divrem_1c:       \n%assign       frame   0\n    mov     ecx,[PARAM_SIZE]\n\tFR_push edi\n    mov     edi,[PARAM_SRC]\n\tFR_push esi\n    mov     esi,[PARAM_DIVISOR]\n\tFR_push ebx\n    mov     ebx,[PARAM_DST]\n\tFR_push ebp\n    mov     ebp,[PARAM_XSIZE]\n    or      ecx,ecx\n    mov     edx,[PARAM_CARRY]\n    jz      Lfraction\n    lea     ebx,[-4+ebx+ebp*4]  ; dst one limb below integer part \n    jmp     Linteger_top\n\t\t\n\tglobal  ___gmpn_divrem_1\n%ifdef\tDLL\n\texport\t___gmpn_divrem_1\n%endif\n    align   16\n___gmpn_divrem_1: \n\n%assign       frame   0\n    mov     ecx,[PARAM_SIZE]\n\tFR_push edi\n    mov     edi,[PARAM_SRC]\n    FR_push esi\n    mov     esi,[PARAM_DIVISOR]\n    or      ecx,ecx\n    jz      Lsize_zero\n\tFR_push ebx\n    mov     eax,[-4+edi+ecx*4]  ; src high limb \n    xor     edx,edx\n    mov     ebx,[PARAM_DST]\n\tFR_push ebp\n    mov     ebp,[PARAM_XSIZE]\n    cmp     eax,esi\n    lea     ebx,[-4+ebx+ebp*4]  ; dst one limb below integer part \n    jae     Linteger_entry\n\n; high<divisor,so high of dst is zero,and avoid one div \n\n    mov     [ebx+ecx*4],edx\n    dec     ecx\n    mov     edx,eax\n    jz      Lfraction\n\t \n; eax  scratch (quotient) \n; ebx  dst+4*xsize-4 \n; ecx  counter \n; edx  scratch (remainder) \n; esi  divisor \n; edi  src \n; ebp  xsize \n\nLinteger_top:\n    mov     eax,[-4+edi+ecx*4]\nLinteger_entry:\t \n    div     esi\n    mov     [ebx+ecx*4],eax\n\tdec\t\tecx\n\tjnz\t\tLinteger_top\nLfraction:\n    or      ecx,ebp\n    jz      Ldone\n    mov     ebx,[PARAM_DST]\n\n; eax  scratch (quotient) \n; ebx  dst \n; ecx  counter \n; edx  scratch (remainder) \n; esi  divisor \n; edi \n; ebp \n\nLfraction_top:\t \n    xor     eax,eax\n    div     esi\n    mov     [-4+ebx+ecx*4],eax\n    dec\t\tecx\n    jnz\t\tLfraction_top\nLdone:\n    pop     ebp\n    mov     eax,edx\n    pop     ebx\n    pop     esi\n    pop     edi\n    ret\nLsize_zero: \n    mov     ecx,[PARAM_XSIZE]\n    xor     eax,eax\n    mov     edi,[PARAM_DST]\n    cld\t\t\t\t\t\t\t; better safe than sorry,see mpn/x86/README \n    rep\t\tstosd\n    pop     esi\n    pop     edi\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/longlong_inc.h",
    "content": "/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.\n\nCopyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,\n2004, 2005 Free Software Foundation, Inc.\n\nCopyright 2013 William Hart\n\nThis file is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThis file is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with this file; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#if defined(__ICL)\n\n#define add_ssaaaa(sh, sl, ah, al, bh, bl) \\\n  __asm__ (\"addl %5,%k1\\n\\tadcl %3,%k0\"\t\t\t\t\t\\\n\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t   : \"0\"  ((USItype)(ah)), \"g\" ((USItype)(bh)),\t\t\t\\\n\t     \"%1\" ((USItype)(al)), \"g\" ((USItype)(bl)))\n#define sub_ddmmss(sh, sl, ah, al, bh, bl) \\\n  __asm__ (\"subl %5,%k1\\n\\tsbbl %3,%k0\"\t\t\t\t\t\\\n\t   : \"=r\" (sh), \"=&r\" (sl)\t\t\t\t\t\\\n\t   : \"0\" ((USItype)(ah)), \"g\" ((USItype)(bh)),\t\t\t\\\n\t     \"1\" ((USItype)(al)), \"g\" ((USItype)(bl)))\n#define add_333(sh, sm, sl, ah, am, al, bh, bm, bl)  \\\n  __asm__ (\"addl %8,%q2\\n\\tadcl %6,%q1\\n\\tadcl %4,%q0\"     \\\n       : \"=r\" (sh), \"=r\" (sm), \"=&r\" (sl)                  \\\n       : \"0\"  ((USItype)(ah)), \"rme\" ((USItype)(bh)),  \\\n         \"1\"  ((USItype)(am)), \"rme\" ((USItype)(bm)),  \\\n         \"2\"  ((USItype)(al)), \"rme\" ((USItype)(bl)))  \n#define sub_333(sh, sm, sl, ah, am, al, bh, bm, bl)  \\\n  __asm__ (\"subl %8,%q2\\n\\tsbbl %6,%q1\\n\\tsbbl %4,%q0\"     \\\n       : \"=r\" (sh), \"=r\" (sm), \"=&r\" (sl)                  \\\n       : \"0\"  ((USItype)(ah)), \"rme\" ((USItype)(bh)),  \\\n         \"1\"  ((USItype)(am)), \"rme\" ((USItype)(bm)),  \\\n         \"2\"  ((USItype)(al)), \"rme\" ((USItype)(bl)))  \n#define umul_ppmm(w1, w0, u, v) \\\n  __asm__ (\"mull %3\"\t\t\t\t\t\t\t\\\n\t   : \"=a\" (w0), \"=d\" (w1)\t\t\t\t\t\\\n\t   : \"%0\" ((USItype)(u)), \"rm\" ((USItype)(v)))\n#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding \"=d\" */\\\n  __asm__ (\"divl %4\"\t\t     /* stringification in K&R C */\t\\\n\t   : \"=a\" (q), \"=d\" (r)\t\t\t\t\t\t\\\n\t   : \"0\" ((USItype)(n0)), \"1\" ((USItype)(n1)), \"rm\" ((USItype)(dx)))\n\n\n#if __GMP_GNUC_PREREQ (3,4)  /* using bsrl */\n#define count_leading_zeros(count,x)  count_leading_zeros_gcc_clz(count,x)\n#endif /* gcc clz */\n\n#ifndef count_leading_zeros\n#define count_leading_zeros(count, x)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    USItype __cbtmp;\t\t\t\t\t\t\t\\\n    ASSERT ((x) != 0);\t\t\t\t\t\t\t\\\n    __asm__ (\"bsrl %1,%0\" : \"=r\" (__cbtmp) : \"rm\" ((USItype)(x)));\t\\\n    (count) = __cbtmp ^ 31;\t\t\t\t\t\t\\\n  } while (0)\n#endif /* asm bsrl */\n\n#if __GMP_GNUC_PREREQ (3,4)  /* using bsfl */\n#define count_trailing_zeros(count,x)  count_trailing_zeros_gcc_ctz(count,x)\n#endif /* gcc ctz */\n\n#ifndef count_trailing_zeros\n#define count_trailing_zeros(count, x)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    ASSERT ((x) != 0);\t\t\t\t\t\t\t\\\n    __asm__ (\"bsfl %1,%0\" : \"=r\" (count) : \"rm\" ((USItype)(x)));\t\\\n  } while (0)\n#endif /* asm bsfl */\n\n\n/* ASM_L gives a local label for a gcc asm block, for use when temporary\n   local labels like \"1:\" might not be available, which is the case for\n   instance on the x86s (the SCO assembler doesn't support them).\n\n   The label generated is made unique by including \"%=\" which is a unique\n   number for each insn.  This ensures the same name can be used in multiple\n   asm blocks, perhaps via a macro.  Since jumps between asm blocks are not\n   allowed there's no need for a label to be usable outside a single\n   block.  */\n\n#define ASM_L(name)  LSYM_PREFIX \"asm_%=_\" #name\n\n#if ! WANT_ASSERT\n/* Better flags handling than the generic C gives on i386, saving a few\n   bytes of code and maybe a cycle or two.  */\n\n#define MPN_IORD_U(ptr, incr, aors)\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr  __ptr_dummy;\t\t\t\t\t\t\\\n    if (__builtin_constant_p (incr) && (incr) == 1)\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n        __asm__ __volatile__\t\t\t\t\t\t\\\n          (\"\\n\" ASM_L(top) \":\\n\"\t\t\t\t\t\\\n           \"\\t\" aors \" $1, (%0)\\n\"\t\t\t\t\t\\\n           \"\\tleal 4(%0),%0\\n\"\t\t\t\t\t\t\\\n           \"\\tjc \" ASM_L(top)\t\t\t\t\t\t\\\n           : \"=r\" (__ptr_dummy)\t\t\t\t\t\t\\\n           : \"0\"  (ptr)\t\t\t\t\t\t\t\\\n           : \"memory\");\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    else\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n        __asm__ __volatile__\t\t\t\t\t\t\\\n          (   aors  \" %2,(%0)\\n\"\t\t\t\t\t\\\n           \"\\tjnc \" ASM_L(done) \"\\n\"\t\t\t\t\t\\\n           ASM_L(top) \":\\n\"\t\t\t\t\t\t\\\n           \"\\t\" aors \" $1,4(%0)\\n\"\t\t\t\t\t\\\n           \"\\tleal 4(%0),%0\\n\"\t\t\t\t\t\t\\\n           \"\\tjc \" ASM_L(top) \"\\n\"\t\t\t\t\t\\\n           ASM_L(done) \":\\n\"\t\t\t\t\t\t\\\n           : \"=r\" (__ptr_dummy)\t\t\t\t\t\t\\\n           : \"0\"  (ptr),\t\t\t\t\t\t\\\n             \"ri\" (incr)\t\t\t\t\t\t\\\n           : \"memory\");\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n  } while (0)\n\n#ifndef MPN_INCR_U\n#define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, \"addl\")\n#endif\n#ifndef MPN_DECR_U\n#define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, \"subl\")\n#endif\n#ifndef mpn_incr_u\n#define mpn_incr_u(ptr, incr)  MPN_INCR_U (ptr, 0, incr)\n#endif\n#ifndef mpn_decr_u\n#define mpn_decr_u(ptr, incr)  MPN_DECR_U (ptr, 0, incr)\n#endif\n#endif\n\n#endif \n\n#if defined (__GNUC__)\n#if __GMP_GNUC_PREREQ (3,1)\n#define __GMP_qm \"=Qm\"\n#define __GMP_q \"=Q\"\n#else\n#define __GMP_qm \"=qm\"\n#define __GMP_q \"=q\"\n#endif\n#ifndef ULONG_PARITY\n#define ULONG_PARITY(p, n)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    char\t   __p;\t\t\t\t\t\t\t\\\n    unsigned long  __n = (n);\t\t\t\t\t\t\\\n    __n ^= (__n >> 16);\t\t\t\t\t\t\t\\\n    __asm__ (\"xorb %h1, %b1\\n\\t\"\t\t\t\t\t\\\n\t     \"setpo %0\"\t\t\t\t\t\t\t\\\n\t : __GMP_qm (__p), __GMP_q (__n)\t\t\t\t\\\n\t : \"1\" (__n));\t\t\t\t\t\t\t\\\n    (p) = __p;\t\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n#endif\n\n#if !defined(BSWAP_LIMB) && defined (__GNUC__)\n#define BSWAP_LIMB(dst, src)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    __asm__ (\"bswap %0\" : \"=r\" (dst) : \"0\" (src));\t\t\t\\\n  } while (0)\n#endif\n\n#if defined( _MSC_VER )\n#  if !defined(COUNT_LEADING_ZEROS_NEED_CLZ_TAB)\n#    define COUNT_LEADING_ZEROS_NEED_CLZ_TAB\n#  endif\n#endif\n"
  },
  {
    "path": "mpn/x86w/lshift.asm",
    "content": "\n;  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software\n;  Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n%define PARAM_SHIFT   esp+frame+16\n%define PARAM_SIZE    esp+frame+12\n%define PARAM_SRC     esp+frame+8\n%define PARAM_DST     esp+frame+4\n\n    section .text\n\n    global  ___gmpn_lshift\n%ifdef\tDLL\n\texport\t___gmpn_lshift\n%endif\n\n    align   8    \n___gmpn_lshift: \n    push    edi\n    push    esi\n    push    ebx\n%assign       frame   frame+12\n    mov     edi,[PARAM_DST]\n    mov     esi,[PARAM_SRC]\n    mov     edx,[PARAM_SIZE]\n    mov     ecx,[PARAM_SHIFT]\n    sub     esi,4\t\t\t\t; adjust src \n    mov     ebx,[esi+edx*4]\t\t; read most significant limb \n    xor     eax,eax\n\tshld\teax,ebx,cl\n    dec     edx\n    jz      Lend\n    push    eax\t\t\t\t\t; push carry limb onto stack \n    test    dl,1\n    jnz     L1\t\t\t\t\t; enter Lop in the middle \n    mov     eax,ebx\n\n\talign   8\nLop:\n\tmov     ebx,[esi+edx*4]\t\t; load next lower limb \n\tshld\teax,ebx,cl\n    mov     [edi+edx*4],eax\t\t; store it \n    dec     edx\nL1:\n\tmov     eax,[esi+edx*4]\n\tshld\tebx,eax,cl\n    mov     [edi+edx*4],ebx\n    dec     edx\n    jnz     Lop\n    shl     eax,cl\t\t\t\t; compute least significant limb \n    mov     [edi],eax\t\t\t; store it \n    pop     eax\t\t\t\t\t; pop carry limb \n    pop     ebx\n    pop     esi\n    pop     edi\n    ret\nLend:\n\tshl     ebx,cl\t\t\t\t; compute least significant limb \n    mov     [edi],ebx\t\t\t; store it \n    pop     ebx\n    pop     esi\n    pop     edi\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/mod_1.asm",
    "content": "\n;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n%define PARAM_CARRY   esp+frame+16\n%define PARAM_DIVISOR esp+frame+12\n%define PARAM_SIZE    esp+frame+8\n%define PARAM_SRC     esp+frame+4\n\n    section .text\n\n    global  ___gmpn_mod_1c\n%ifdef\tDLL\n\texport\t___gmpn_mod_1c\n%endif\n\n    align   16\n___gmpn_mod_1c:     \n%assign\tframe   0\n    mov     ecx,[PARAM_SIZE]\n\tFR_push ebx\n    mov     ebx,[PARAM_SRC]\n\tFR_push esi\n    mov     esi,[PARAM_DIVISOR]\n    or      ecx,ecx\n    mov     edx,[PARAM_CARRY]\n    jnz     Ltop\n    pop     esi\n    mov     eax,edx\n    pop     ebx\n    ret\n\n\tglobal  ___gmpn_mod_1\n%ifdef\tDLL\n\texport\t___gmpn_mod_1\n%endif\n    align   16\n___gmpn_mod_1: \n\n%assign\tframe   0        \n    mov     ecx,[PARAM_SIZE]\n\tFR_push ebx\n    mov     ebx,[PARAM_SRC]\n\tFR_push esi\n    or      ecx,ecx\n    jz      Ldone_zero\n    mov     esi,[PARAM_DIVISOR]\n    mov     eax,[-4+ebx+ecx*4]  ; src high limb \n    cmp     eax,esi\n    sbb     edx,edx\t\t\t\t; -1 if high<divisor \n    add     ecx,edx\t\t\t\t; skip one division if high<divisor \n    jz      Ldone_eax\n    and     edx,eax\t\t\t\t; carry if high<divisor \n\n; eax  scratch (quotient) \n; ebx  src \n; ecx  counter \n; edx  carry (remainder) \n; esi  divisor \n; edi \n; ebp \n\nLtop:\n    mov     eax,[-4+ebx+ecx*4]\n    div     esi\n\tdec\t\tecx\n\tjnz\t\tLtop\n    mov     eax,edx\nLdone_eax:\n    pop     esi\n    pop     ebx\n    ret\nLdone_zero:\n    pop     esi\n    xor     eax,eax\n    pop     ebx\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/mod_34lsub1.asm",
    "content": "\n;  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n;\n;  This file is part of the GNU MP Library.\n;\n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n    section .text\n\n%define PARAM_SIZE    esp+frame+8\n%define PARAM_SRC     esp+frame+4\n\n; re-use parameter space \n%define\tSAVE_EBX\tPARAM_SRC\n\n\tglobal  ___gmpn_mod_34lsub1\n%ifdef\tDLL\n\texport\t___gmpn_mod_34lsub1\n%endif\n\n    align   16\n___gmpn_mod_34lsub1:\n%assign       frame   0\n    mov     ecx,[PARAM_SIZE]\n    mov     edx,[PARAM_SRC]\n    sub     ecx,2\n    ja      Lthree_or_more\n    mov     eax,[edx]\n    jb      Lone\n    mov     ecx,[4+edx]\n    mov     edx,eax\n    shr     eax,24\t\t\t\t; src[0] low \n\n    and     edx,0xFFFFFF\t\t; src[0] high \n    add     eax,edx\n    mov     edx,ecx\n\n    and     ecx,0xFFFF\n    shr     edx,16\t\t\t\t; src[1] high \n    add     eax,edx\n\n    shl     ecx,8\t\t\t\t; src[1] low \n    add     eax,ecx\nLone:\n    ret\n\n; eax \n; ebx \n; ecx  size-2 \n; edx  src \n; esi \n; edi \n; ebp \n\nLthree_or_more:\n    mov     [SAVE_EBX],ebx\t; and arrange 16-byte loop alignment \n    xor     ebx,ebx\n\tFR_push esi\n    xor     esi,esi\n\tFR_push edi\n    xor     eax,eax         ; and clear carry flag \n\n; offset 0x40 here \n; eax  acc 0mod3 \n; ebx  acc 1mod3 \n; ecx  counter,limbs \n; edx  src \n; esi  acc 2mod3 \n; edi \n; ebp \n\nLtop:\n    lea     edx,[12+edx]\n    lea     ecx,[-2+ecx]\n    adc     eax,[-12+edx]\n    adc     ebx,[-8+edx]\n    adc     esi,[-4+edx]\n    dec     ecx\n    jg      Ltop\n; ecx is -2,-1 or 0 representing 0,1 or 2 more limbs,respectively \n    mov     edi,0xFFFFFFFF\n    inc     ecx\n    js      Lcombine\n    adc     eax,[edx]\n    mov     edi,0xFFFFFF00\n    dec     ecx\n    js      Lcombine\n    adc     ebx,[4+edx]\n    mov     edi,0xFFFF0000\n\n; eax  acc 0mod3 \n; ebx  acc 1mod3 \n; ecx \n; edx \n; esi  acc 2mod3 \n; edi  mask \n; ebp \n\nLcombine:\n    sbb     ecx,ecx\t\t\t; carry \n    mov     edx,eax         ; 0mod3 \n    shr     eax,24          ; 0mod3 high \n    and     ecx,edi         ; carry masked \n    sub     eax,ecx         ; apply carry \n    mov     edi,ebx         ; 1mod3 \n    shr     ebx,16          ; 1mod3 high \n    and     edx,0x00FFFFFF  ; 0mod3 low \n    add     eax,edx         ; apply 0mod3 low \n    and     edi,0xFFFF\n    shl     edi,8\t\t\t; 1mod3 low \n    add     eax,ebx         ; apply 1mod3 high \n    add     eax,edi         ; apply 1mod3 low \n    mov     edx,esi         ; 2mod3 \n    shr     esi,8\t\t\t; 2mod3 high \n    and     edx,0xFF        ; 2mod3 low \n    shl     edx,16          ; 2mod3 low \n    add     eax,esi         ; apply 2mod3 high \n    add     eax,edx         ; apply 2mod3 low \n\tFR_pop  edi\n    mov     ebx,[SAVE_EBX]\n\tFR_pop  esi\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/mul_1.asm",
    "content": "\n;  Copyright 1992, 1994, 1997, 1998, 1999, 2000, 2001, 2002 Free Software\n;  Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n%define PARAM_MULTIPLIER    esp+frame+16\n%define PARAM_SIZE\t\t\tesp+frame+12\n%define PARAM_SRC\t\t\tesp+frame+8\n%define PARAM_DST\t\t\tesp+frame+4\n%assign frame\t\t\t\t0\n\n\tsection .text\n\t\n\tglobal  ___gmpn_mul_1\n%ifdef\tDLL\n\texport\t___gmpn_mul_1\n%endif\n\n    align   8\n___gmpn_mul_1: \n    push    edi\n    push    esi\n    push    ebx\n    push    ebp\n%assign       frame   frame+16\n    mov     edi,[PARAM_DST]\n    mov     esi,[PARAM_SRC]\n    mov     ecx,[PARAM_SIZE]\n    xor     ebx,ebx\n    and     ecx,3\n    jz      Lend0\nLoop0:\n    mov     eax,[esi]\n    mul     dword [PARAM_MULTIPLIER]\n    lea     esi,[4+esi]\n    add     eax,ebx\n    mov     ebx,0\n    adc     edx,ebx\n    mov     [edi],eax\n    mov     ebx,edx\t\t\t; propagate carry into cylimb \n    lea     edi,[4+edi]\n    dec     ecx\n    jnz     Loop0\nLend0:\n    mov     ecx,[PARAM_SIZE]\n    shr     ecx,2\n    jz      Lend\n\n    align   8\nLop:\n\tmov     eax,[esi]\n    mul     dword [PARAM_MULTIPLIER]\n    add     ebx,eax\n    mov     ebp,0\n    adc     ebp,edx\n    mov     eax,[4+esi]\n    mul     dword [PARAM_MULTIPLIER]\n    mov     [edi],ebx\n    add     ebp,eax\t\t\t; new lo + cylimb \n    mov     ebx,0\n    adc     ebx,edx\n    mov     eax,[8+esi]\n    mul     dword [PARAM_MULTIPLIER]\n    mov     [4+edi],ebp\n    add     ebx,eax\t\t\t; new lo + cylimb \n    mov     ebp,0\n    adc     ebp,edx\n    mov     eax,[12+esi]\n    mul     dword [PARAM_MULTIPLIER]\n    mov     [8+edi],ebx\n    add     ebp,eax\t\t\t; new lo + cylimb \n    mov     ebx,0\n    adc     ebx,edx\n    mov     [12+edi],ebp\n    lea     esi,[16+esi]\n    lea     edi,[16+edi]\n    dec     ecx\n    jnz     Lop\nLend:\n\tmov     eax,ebx\n    pop     ebp\n    pop     ebx\n    pop     esi\n    pop     edi\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/mul_basecase.asm",
    "content": "\n;  Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free Software\n;  Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"x86i.inc\"\n\n\n%define\tVAR_STACK_SPACE\t8\n%define PARAM_YSIZE\t\tesp+frame+20\n%define PARAM_YP\t\tesp+frame+16\n%define PARAM_XSIZE\t\tesp+frame+12\n%define PARAM_XP\t\tesp+frame+8\n%define PARAM_WP\t\tesp+frame+4\n%define\tVAR_MULTIPLIER\tesp+frame-4\n%define\tVAR_COUNTER\t\tesp+frame-8\n\n\tsection .text\n\n\tglobal  ___gmpn_mul_basecase\n%ifdef\tDLL\n\texport\t___gmpn_mul_basecase\n%endif\n\n\talign   8\t\n___gmpn_mul_basecase: \n    sub     esp,VAR_STACK_SPACE\n    push    esi\n    push    ebp\n    push    edi\n%assign\tframe\t\t\tVAR_STACK_SPACE+12\n    mov     esi,[PARAM_XP]\n    mov     edi,[PARAM_WP]\n    mov     ebp,[PARAM_YP]\n    mov     eax,[esi]\t\t\t\t; load xp[0] \n    mul     dword [ebp]\t\t\t\t; multiply by yp[0] \n    mov     [edi],eax\t\t\t\t; store to wp[0] \n    mov     ecx,[PARAM_XSIZE]\t\t; xsize \n    dec     ecx\t\t\t\t\t\t; If xsize = 1,ysize = 1 too \n    jz      Ldone\n    FR_push\tebx\n    mov     ebx,edx\n    lea     esi,[4+esi]\n    lea     edi,[4+edi]\nLoopM:\n    mov     eax,[esi]\t\t\t\t; load next limb at xp[j] \n    lea     esi,[4+esi]\n    mul     dword [ebp]\n    add     eax,ebx\n    mov     ebx,edx\n    adc     ebx,0\n    mov     [edi],eax\n    lea     edi,[4+edi]\n    dec     ecx\n    jnz     LoopM\n    mov     [edi],ebx\t\t\t\t; most significant limb of product \n    add     edi,4\t\t\t\t\t; increment wp \n    mov     eax,[PARAM_XSIZE]\n    shl     eax,2\n    sub     edi,eax\n    sub     esi,eax\n    mov     eax,[PARAM_YSIZE]\t\t; ysize \n    dec     eax\n    jz      Lskip\n    mov     [VAR_COUNTER],eax\t\t; set index i to ysize \nLouter:\n    mov     ebp,[PARAM_YP]\t\t\t; yp \n    add     ebp,4\t\t\t\t\t; make ebp point to next v limb \n    mov     [PARAM_YP],ebp\n    mov     eax,[ebp]\t\t\t\t; copy y limb ... \n    mov     [VAR_MULTIPLIER],eax\t; ... to stack slot \n    mov     ecx,[PARAM_XSIZE]\n    xor     ebx,ebx\n    and     ecx,3\n    jz      Lend0\nLoop0:\n    mov     eax,[esi]\n    mul     dword [VAR_MULTIPLIER]\n    lea     esi,[4+esi]\n    add     eax,ebx\n    mov     ebx,0\n    adc     edx,ebx\n    add     [edi],eax\n    adc     ebx,edx\t\t\t\t\t; propagate carry into cylimb \n    lea     edi,[4+edi]\n    dec     ecx\n    jnz     Loop0\nLend0:\n    mov     ecx,[PARAM_XSIZE]\n    shr     ecx,2\n    jz      LendX\n\n\talign   8\nLoopX:\t \n    mov     eax,[esi]\n    mul     dword [VAR_MULTIPLIER]\n    add     ebx,eax\n    mov     ebp,0\n    adc     ebp,edx\n\n    mov     eax,[4+esi]\n    mul     dword [VAR_MULTIPLIER]\n    add     [edi],ebx\n    adc     ebp,eax\t\t\t\t\t; new lo + cylimb \n    mov     ebx,0\n    adc     ebx,edx\n\n    mov     eax,[8+esi]\n    mul     dword [VAR_MULTIPLIER]\n    add     [4+edi],ebp\n    adc     ebx,eax\t\t\t\t\t; new lo + cylimb \n    mov     ebp,0\n    adc     ebp,edx\n\n    mov     eax,[12+esi]\n    mul     dword [VAR_MULTIPLIER]\n    add     [8+edi],ebx\n    adc     ebp,eax\t\t\t\t\t; new lo + cylimb \n    mov     ebx,0\n    adc     ebx,edx\n\n    add     [12+edi],ebp\n    adc     ebx,0\t\t\t\t\t; propagate carry into cylimb \n\n    lea     esi,[16+esi]\n    lea     edi,[16+edi]\n    dec     ecx\n    jnz     LoopX\nLendX:\n    mov     [edi],ebx\n    add     edi,4\n\n; we incremented wp and xp in the loop above; compensate \n    mov     eax,[PARAM_XSIZE]\n    shl     eax,2\n    sub     edi,eax\n    sub     esi,eax\n\n    mov     eax,[VAR_COUNTER]\n    dec     eax\n    mov     [VAR_COUNTER],eax\n    jnz     Louter\nLskip:\n    pop     ebx\n    pop     edi\n    pop     ebp\n    pop     esi\n    add     esp,8\n    ret\nLdone:\n    mov     [4+edi],edx\t\t\t\t; store to wp[1] \n    pop     edi\n    pop     ebp\n    pop     esi\n    add     esp,8\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p3/addmul_1.asm",
    "content": "\n;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n%define\tUNROLL_LOG2\t\t4\n%define\tUNROLL_COUNT\t(1 << UNROLL_LOG2)\n%define\tUNROLL_MASK\t\tUNROLL_COUNT-1  \n%define\tUNROLL_BYTES\t4*UNROLL_COUNT\n\n%ifdef\tPIC\n%define\tUNROLL_THRESHOLD\t5 \n%else\n%define\tUNROLL_THRESHOLD\t5 \n%endif\n\n%define\tPARAM_CARRY\t\t\tesp+frame+20 \n%define PARAM_MULTIPLIER\tesp+frame+16 \n%define PARAM_SIZE\t\t\tesp+frame+12 \n%define PARAM_SRC\t\t\tesp+frame+8 \n%define PARAM_DST\t\t\tesp+frame+4 \n\n%macro\tmul_fun\t4\n\n%ifdef\tDLL\n\texport\t%1%3\n\texport  %1%4\n%endif\n\n\talign   32\n%define\tframe\t0\n%1%4:\n    FR_push ebx\n    mov     ebx,[PARAM_CARRY]\n\tjmp     %%Lstart_nc\n\n%define\tframe\t0\n%1%3:\n\tFR_push ebx\n    xor     ebx,ebx\t;  initial carry \n%%Lstart_nc: \n    mov     ecx,[PARAM_SIZE]\n    FR_push esi\n    mov     esi,[PARAM_SRC]\n    FR_push\tedi\n    mov     edi,[PARAM_DST]\n    FR_push ebp\n    cmp     ecx,UNROLL_THRESHOLD\n    mov     ebp,[PARAM_MULTIPLIER]\n    jae     %%Lunroll\n\n;  simple loop \n;  this is offset 0x22,so close enough to aligned \n;  eax scratch \n;  ebx carry \n;  ecx counter \n;  edx scratch \n;  esi src \n;  edi dst \n;  ebp multiplier \n\n%%Lsimple: \n    mov     eax,[esi]\n    add     edi,4\n    mul     ebp\n    add     eax,ebx\n    adc     edx,0\n\t%2\t\t[edi-4],eax\n    mov     ebx,edx\n    adc     ebx,0\n    dec     ecx\n    lea     esi,[4+esi]\n    jnz     %%Lsimple\n    pop     ebp\n    pop     edi\n    pop     esi\n    mov     eax,ebx\n    pop     ebx\n    ret\n\n;  VAR_JUMP holds the computed jump temporarily because there's not enough \n;  registers when doing the mul for the initial two carry limbs. \n; \n;  The add/adc for the initial carry in %ebx is necessary only for the \n;  mpn_add/submul_1c entry points.  Duplicating the startup code to \n;  eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good \n;  idea. \n;\n;   overlapping with parameters already fetched \n\n%define\tVAR_COUNTER\tPARAM_SIZE\n%define\tVAR_JUMP\tPARAM_DST\n\n; VAL1 = ifelse(UNROLL_BYTES,256,128)\n%define\tVAL1\t128\n; VAL2 = ifelse(UNROLL_BYTES,256,-128)\n%define\tVAL2   -128\n\n;  this is offset 0x43,so close enough to aligned \n;  eax \n;  ebx initial carry \n;  ecx size \n;  edx \n;  esi src \n;  edi dst \n;  ebp \n\n%%Lunroll: \n    mov     edx,ecx\n    dec     ecx\n    sub     edx,2\n    neg     ecx\n    shr     edx,UNROLL_LOG2\n    and     ecx,UNROLL_MASK\n    mov     [VAR_COUNTER],edx\n    mov     edx,ecx\n        \n;  15 code bytes per limb \n\n%ifdef\tPIC\n\tcall    %%Lhere\n%%Lhere: \n    shl     edx,4\n    neg     ecx\n    lea     edx,[edx+ecx*1]\n    add     edx,%%Lentry-%%Lhere\n    add     edx,[esp]\n\tadd\t\tesp,4\n%else\n\tshl     edx,4\n    neg     ecx\n\tlea\t\tedx,[%%Lentry+edx+ecx]\n%endif\n    mov     eax,[esi]\t\t\t;  src low limb \n    mov     [VAR_JUMP],edx\n\tlea\t\tesi,[VAL1+4+esi+ecx*4]\n    mul     ebp\n    add     eax,ebx\t\t\t\t;  initial carry (from _1c) \n    adc     edx,0\n    mov     ebx,edx\t\t\t\t;  high carry \n\tlea\t\tedi,[VAL1+edi+ecx*4]\n    mov     edx,[VAR_JUMP]\n    test    ecx,1\n    mov     ecx,eax\t\t\t\t;  low carry \n\tcmovnz\tecx,ebx\n\tcmovnz\tebx,eax\n    jmp     edx\n\n;  eax scratch \n;  ebx carry hi \n;  ecx carry lo \n;  edx scratch \n;  esi src \n;  edi dst \n;  ebp multiplier \n;\n;  VAR_COUNTER loop counter \n;\n;  15 code bytes per limb \n\n%define\tCHUNK_COUNT\t2 \n\n\talign   32\n%%Ltop: \n\tadd     edi,UNROLL_BYTES\n%%Lentry: \n%assign\tdisp\tVAL2\n%rep\tUNROLL_COUNT/CHUNK_COUNT\n\tmov\t\teax,[byte disp+esi]\n\tmul     ebp\n\t%2\t\t[byte disp+edi],ecx\n    adc     ebx,eax\n    mov     ecx,edx\n    adc     ecx,0\n    mov     eax,[byte disp+4+esi]\n    mul     ebp\n\t%2\t\t[byte disp+4+edi],ebx\n    adc     ecx,eax\n    mov     ebx,edx\n    adc     ebx,0\n%assign\t\tdisp\tdisp+4*CHUNK_COUNT\n%endrep\n\n    dec     dword [VAR_COUNTER]\n    lea     esi,[UNROLL_BYTES+esi]\n    jns     %%Ltop\n\n%assign\tdisp\tUNROLL_BYTES+VAL2\n\t%2\t\t[disp+edi],ecx\n    mov     eax,ebx\n    pop     ebp\n    pop     edi\n    pop     esi\n    pop     ebx\n    adc     eax,0\n    ret\n%endmacro\n\n\tsection .text\n\tglobal ___gmpn_addmul_1\n\tglobal ___gmpn_addmul_1c\n\t\n\tmul_fun\t___g,add,mpn_addmul_1,mpn_addmul_1c\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86w/p3/copyd.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n\tglobal  ___gmpn_copyd \n\n%ifdef\tDLL\n\texport\t___gmpn_copyd\n%endif\n\n%define\tPARAM_SIZE\tesp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n\n%define\tSAVE_ESI\tPARAM_SIZE\n%define\tSAVE_EDI\tPARAM_SRC\n%define\tframe\t\t0 \n\n\tsection .text\n\talign   16\n\n___gmpn_copyd: \n\tmov     ecx,[PARAM_SIZE]\n    mov     [SAVE_ESI],esi\n    mov     esi,[PARAM_SRC]\n    mov     [SAVE_EDI],edi\n    mov     edi,[PARAM_DST]\n    sub     ecx,1\n    jb      Lzero\n    mov     eax,[esi+ecx*4]\t\t\t;  src[size-1] \n    jz      Lone\n    mov     edx,[-4+esi+ecx*4]\t\t;  src[size-2] \n    sub     ecx,2\n    jbe     Ldone_loop              ;  2 or 3 limbs only \n        \n;  The usual overlap is \n;\n;      high                   low \n;      +------------------+ \n;      |               dst| \n;      +------------------+ \n;            +------------------+ \n;            |               src| \n;            +------------------+ \n;\n;  We can use an incrementing copy in the following circumstances. \n;\n;      src+4*size<=dst,since then the regions are disjoint \n;\n;      src==dst,clearly (though this shouldn't occur normally) \n;\n;      src>dst,since in that case it's a requirement of the \n;               parameters that src>=dst+size*4,and hence the \n;               regions are disjoint \n;\n;  eax prev high limb \n;  ebx \n;  ecx counter,size-3 down to 0 or -1,inclusive,by 2s \n;  edx prev low limb \n;  esi src \n;  edi dst \n;  ebp \n\n    lea     edx,[edi+ecx*4]\n    cmp     esi,edi\n    jae     Luse_movsl\t\t\t;  src >= dst \n    cmp     edx,edi\n    mov     edx,[4+esi+ecx*4]\t;  src[size-2] again \n    jbe     Luse_movsl\t\t\t;  src+4*size <= dst \nLtop: \n    mov     [8+edi+ecx*4],eax\n    mov     eax,[esi+ecx*4]\n    mov     [4+edi+ecx*4],edx\n    mov     edx,[-4+esi+ecx*4]\n    sub     ecx,2\n    jnbe    Ltop\nLdone_loop: \n    mov     [8+edi+ecx*4],eax\n    mov     [4+edi+ecx*4],edx\n\n;  copy low limb (needed if size was odd,but will already have been \n;  done in the loop if size was even) \n\n    mov     eax,[esi]\nLone: \n    mov     [edi],eax\n    mov     edi,[SAVE_EDI]\n    mov     esi,[SAVE_ESI]\n\tret\nLuse_movsl: \n    add     ecx,3\n    cld\n    rep\t\tmovsd\nLzero: \n    mov     esi,[SAVE_ESI]\n    mov     edi,[SAVE_EDI]\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p3/divexact_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n\textern\t___gmp_modlimb_invert_table\n\tglobal  ___gmpn_divexact_1 \n\n%ifdef\tDLL\n\texport\t___gmpn_divexact_1\n%endif\n\n%define\tPARAM_DIVISOR\tesp+frame+16 \n%define PARAM_SIZE      esp+frame+12 \n%define PARAM_SRC       esp+frame+8 \n%define PARAM_DST       esp+frame+4 \n\n%define SAVE_EBX        esp+frame-4 \n%define SAVE_ESI        esp+frame-8 \n%define SAVE_EDI\t\tesp+frame-12 \n%define SAVE_EBP\t\tesp+frame-16 \n%define VAR_INVERSE\t\tesp+frame-20 \n%define STACK_SPACE\t\t20 \n%define frame\t\t\t0 \n\n\tsection .text\n\n\talign   16\n\n___gmpn_divexact_1: \n\tmov     eax,[PARAM_DIVISOR]\n    sub     esp,STACK_SPACE\n\tFR_sesp\tSTACK_SPACE\n    mov     [SAVE_ESI],esi\n    mov     esi,[PARAM_SRC]\n    mov     [SAVE_EBX],ebx\n    mov     ebx,[PARAM_SIZE]\n    bsf     ecx,eax         ;  trailing twos \n    mov     [SAVE_EBP],ebp\n    shr     eax,cl          ;  d without twos \n    mov     edx,eax\n    shr     eax,1           ;  d/2 without twos \n    mov     [PARAM_DIVISOR],edx\n    and     eax,127\n\n%ifdef\tPIC\n    call    Lmovl_eip_ebp\n    add     ebp,_GLOBAL_OFFSET_TABLE_\n    mov     ebp,[___gmp_modlimb_invert_table+edx+ebp]\n    movzx   ebp,byte [eax+ebp]\t\t\t\t\t\t\t;  inv 8 bits \n%else\n\tmovzx   ebp,byte [___gmp_modlimb_invert_table+eax]\t;  inv 8 bits \n%endif\n\n    lea     eax,[ebp+ebp]\t\t;  2*inv \n    imul    ebp,ebp\t\t\t\t;  inv*inv \n    mov     [SAVE_EDI],edi\n    mov     edi,[PARAM_DST]\n    lea     esi,[esi+ebx*4]\t\t;  src end \n    imul    ebp,[PARAM_DIVISOR]\t;  inv*inv*d \n    sub     eax,ebp\t\t\t\t;  inv = 2*inv - inv*inv*d \n    lea     ebp,[eax+eax]\t\t;  2*inv \n    imul    eax,eax\t\t\t\t;  inv*inv \n    lea     edi,[edi+ebx*4]\t\t;  dst end \n    neg     ebx\t\t\t\t\t;  -size \n    mov     [PARAM_DST],edi\n    imul    eax,[PARAM_DIVISOR] ;  inv*inv*d \n    sub     ebp,eax\t\t\t\t;  inv = 2*inv - inv*inv*d \n\n    mov     [VAR_INVERSE],ebp\n    mov     eax,[esi+ebx*4]\t\t;  src[0] \n    or      ecx,ecx\n    jnz     Leven\n    jmp     Lodd_entry\t\t\t;  ecx initial carry is zero \n\n;  The dependent chain here is \n; \n;      subl    %edx,%eax        1 \n;      imull   %ebp,%eax        4 \n;      mull    PARAM_DIVISOR    5 \n;                             ---- \n;        total                 10 \n; \n;  and this is the measured speed.  No special scheduling is necessary,out \n;  of order execution hides the load latency. \n;\n;  eax scratch (src limb) \n;  ebx counter,limbs,negative \n;  ecx carry bit \n;  edx carry limb,high of last product \n;  esi &src[size] \n;  edi &dst[size] \n\nLodd_top: \n    mul     dword [PARAM_DIVISOR]\n    mov     eax,[esi+ebx*4]\n    sub     eax,ecx\n    sbb     ecx,ecx\n    sub     eax,edx\n    sbb     ecx,0\nLodd_entry: \n    imul    eax,[VAR_INVERSE]\n    mov     [edi+ebx*4],eax\n    neg     ecx\n    inc     ebx\n    jnz     Lodd_top\n    mov     esi,[SAVE_ESI]\n    mov     edi,[SAVE_EDI]\n    mov     ebp,[SAVE_EBP]\n    mov     ebx,[SAVE_EBX]\n    add     esp,STACK_SPACE\n    ret\n\n;  eax src[0] \n;  ebx counter,limbs,negative \n;  ecx shift \n\nLeven: \n    xor     ebp,ebp         ;  initial carry bit \n    xor     edx,edx         ;  initial carry limb (for size==1) \n    inc     ebx\n    jz      Leven_one\n    mov     edi,[esi+ebx*4]\t;  src[1] \n\tshrd\teax,edi,cl\n    jmp     Leven_entry\n\n;  eax scratch \n;  ebx counter,limbs,negative \n;  ecx shift \n;  edx scratch \n;  esi &src[size] \n;  edi &dst[size] and scratch \n;  ebp carry bit \n\nLeven_top: \n    mov     edi,[esi+ebx*4]\n    mul     dword [PARAM_DIVISOR]\n    mov     eax,[-4+esi+ebx*4]\n\tshrd\teax,edi,cl\n    sub     eax,ebp\n    sbb     ebp,ebp\n    sub     eax,edx\n    sbb     ebp,0\n\nLeven_entry: \n    imul    eax,[VAR_INVERSE]\n    mov     edi,[PARAM_DST]\n    neg     ebp\n    mov     [-4+edi+ebx*4],eax\n    inc     ebx\n    jnz     Leven_top\n    mul     dword [PARAM_DIVISOR]\n    mov     eax,[-4+esi]\nLeven_one: \n    shr     eax,cl\n    mov     esi,[SAVE_ESI]\n    sub     eax,ebp\n    mov     ebp,[SAVE_EBP]\n    sub     eax,edx\n    mov     ebx,[SAVE_EBX]\n    imul    eax,[VAR_INVERSE]\n    mov     [-4+edi],eax\n    mov     edi,[SAVE_EDI]\n    add     esp,STACK_SPACE\n    ret\n\n%ifdef\tPIC\nLmovl_eip_ebp: \n    mov     ebp,[esp]\n    ret\n%endif\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p3/divexact_by3c.asm",
    "content": "\n;  Copyright 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n\tglobal  ___gmpn_divexact_by3c \n\n%ifdef\tDLL\n\texport\t___gmpn_divexact_by3c\n%endif\n\n%define\tPARAM_CARRY esp+frame+16 \n%define PARAM_SIZE  esp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n%define\tframe\t\t0 \n\n;   multiplicative inverse of 3,modulo 2^32 \n;   ceil(b/3),ceil(b*2/3) and floor(b*2/3) where b=2^32 \n%define\tINVERSE_3\t\t-0x55555555\n%define\tONE_THIRD_CEIL\t\t0x55555556\n%define\tTWO_THIRDS_CEIL\t\t0xAAAAAAAB\n%define\tTWO_THIRDS_FLOOR\t0xAAAAAAAA\n\n\tsection .text\n\n\talign   8   \n\n___gmpn_divexact_by3c: \n    mov     ecx,[PARAM_SRC]\n    mov     edx,[PARAM_SIZE]\n    dec     edx\n    jnz     Ltwo_or_more\n    mov     edx,[ecx]\n    mov     eax,[PARAM_CARRY]\t\t\t;  risk of cache bank clash here \n    mov     ecx,[PARAM_DST]\n    sub     edx,eax\n    sbb     eax,eax\t\t\t\t\t\t;  0 or -1 \n    imul    edx,edx,INVERSE_3\n    neg     eax\t\t\t\t\t\t\t;  0 or 1 \n    cmp     edx,ONE_THIRD_CEIL\n    sbb     eax,-1\t\t\t\t\t\t;  +1 if edx>=ceil(b/3) \n    cmp     edx,TWO_THIRDS_CEIL\n    sbb     eax,-1\t\t\t\t\t\t;  +1 if edx>=ceil(b*2/3) \n    mov     [ecx],edx\n    ret\n\n;  eax \n;  ebx \n;  ecx src \n;  edx size-1 \n;  esi \n;  edi \n;  ebp \n\nLtwo_or_more: \n\tFR_push\tebx\n\tFR_push\tesi\n\tFR_push\tedi\n\tFR_push\tebp\n    mov     edi,[PARAM_DST]\n    mov     esi,[PARAM_CARRY]\n    mov     eax,[ecx]\t\t\t\t;  src low limb \n    xor     ebx,ebx\n\tsub     eax,esi\n    mov     esi,TWO_THIRDS_FLOOR\n    lea     ecx,[ecx+edx*4]\t\t\t;  &src[size-1] \n    lea     edi,[edi+edx*4]\t\t\t;  &dst[size-1] \n    adc     ebx,0\t\t\t\t\t;  carry,0 or 1 \n    neg     edx\t\t\t\t\t\t;  -(size-1) \n\n;  The loop needs a source limb ready at the top,which leads to one limb \n;  handled separately at the end,and the special case above for size==1. \n;  There doesn't seem to be any scheduling that would keep the speed but move \n;  the source load and carry subtract up to the top. \n; \n;  The destination cache line prefetching adds 1 cycle to the loop but is \n;  considered worthwhile.  The slowdown is a factor of 1.07,but will prevent \n;  repeated write-throughs if the destination isn't in L1.  A version using \n;  an outer loop to prefetch only every 8 limbs (a cache line) proved to be \n;  no faster,due to unavoidable branch mispreditions in the inner loop. \n; \n;  setc is 2 cycles on P54,so an adcl is used instead.  If the movl $0,%ebx \n;  could be avoided then the src limb fetch could pair up and save a cycle. \n;  This would probably mean going to a two limb loop with the carry limb \n;  alternately positive or negative,since an sbbl %ebx,%ebx will leave a \n;  value which is in the opposite sense to the preceding sbbl/adcl %ebx,%eax. \n; \n;  A register is used for TWO_THIRDS_FLOOR because a cmp can't be done as \n;  \"cmpl %edx,$n\" with the immediate as the second operand. \n; \n;  The \"4\" source displacement is in the loop rather than the setup because \n;  this gets Ltop aligned to 8 bytes at no cost. \n\n;  eax source limb,carry subtracted \n;  ebx carry (0 or 1) \n;  ecx &src[size-1] \n;  edx counter,limbs,negative \n;  esi TWO_THIRDS_FLOOR \n;  edi &dst[size-1] \n;  ebp scratch (result limb) \n\n\talign   8\nLtop: \n    imul    ebp,eax,INVERSE_3\n    cmp     ebp,ONE_THIRD_CEIL\n    mov     eax,[edi+edx*4]\t\t;  dst cache line prefetch \n    sbb     ebx,-1\t\t\t\t;  +1 if ebp>=ceil(b/3) \n    cmp     esi,ebp\n    mov     eax,[4+ecx+edx*4]\t;  next src limb \n    sbb     eax,ebx\t\t\t\t;  and further -1 if ebp>=ceil(b*2/3) \n    mov     ebx,0\n    adc     ebx,0\t\t\t\t;  new carry \n    mov     [edi+edx*4],ebp\n    inc     edx\n    jnz     Ltop\n    imul    edx,eax,INVERSE_3\n    cmp     edx,ONE_THIRD_CEIL\n    mov     [edi],edx\n    sbb     ebx,-1\t\t\t\t;  +1 if edx>=ceil(b/3) \n    cmp     edx,TWO_THIRDS_CEIL\n    sbb     ebx,-1\t\t\t\t;  +1 if edx>=ceil(b*2/3) \n    pop     ebp\n    mov     eax,ebx\n    pop     edi\n    pop     esi\n    pop     ebx\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p3/divrem_1.asm",
    "content": "\n;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n\tglobal  ___gmpn_preinv_divrem_1 \n    global  ___gmpn_divrem_1c \n    global  ___gmpn_divrem_1 \n\n%ifdef\tDLL\n\texport\t___gmpn_divrem_1c\n\texport\t___gmpn_divrem_1\n%endif\n\n%define\tMUL_THRESHOLD\t\t4 \n%define\tPARAM_PREINV_SHIFT      esp+frame+28 \n%define PARAM_PREINV_INVERSE\tesp+frame+24 \n%define PARAM_CARRY     esp+frame+24 \n%define PARAM_DIVISOR   esp+frame+20 \n%define PARAM_SIZE      esp+frame+16 \n%define PARAM_SRC       esp+frame+12 \n%define PARAM_XSIZE     esp+frame+8 \n%define PARAM_DST       esp+frame+4 \n\n%define SAVE_EBX        esp+frame-4 \n%define SAVE_ESI        esp+frame-8 \n%define SAVE_EDI        esp+frame-12 \n%define SAVE_EBP        esp+frame-16 \n\n%define VAR_NORM        esp+frame-20 \n%define VAR_INVERSE     esp+frame-24 \n%define VAR_SRC\t\t\tesp+frame-28 \n%define VAR_DST\t\t\tesp+frame-32 \n%define VAR_DST_STOP    esp+frame-36 \n%define STACK_SPACE\t\t36 \n%define frame\t\t\t0 \n\n\tsection .text\n\n\talign   16   \n\n___gmpn_preinv_divrem_1: \n    mov     ecx,[PARAM_XSIZE]\n    sub     esp,STACK_SPACE\n\tFR_sesp\tSTACK_SPACE\n    mov     [SAVE_ESI],esi\n    mov     esi,[PARAM_SRC]\n    mov     [SAVE_EBX],ebx\n    mov     ebx,[PARAM_SIZE]\n    mov     [SAVE_EBP],ebp\n    mov     ebp,[PARAM_DIVISOR]\n    mov     [SAVE_EDI],edi\n    mov     edx,[PARAM_DST]\n    mov     eax,[-4+esi+ebx*4]\t;  src high limb \n    xor     edi,edi\t\t\t\t;  initial carry (if can't skip a div) \n\tlea     edx,[8+edx+ecx*4]\t;  &dst[xsize+2] \n\txor     ecx,ecx\n    mov     [VAR_DST_STOP],edx\t;  &dst[xsize+2] \n    cmp     eax,ebp\t\t\t\t;  high cmp divisor \n\tcmovc\tedi,eax\n\tcmovnc\tecx,eax\t\t\t\t;  (the latter in case src==dst) \n    mov     [-12+edx+ebx*4],ecx\t;  dst high limb \n\tsbb     ebx,0\t\t\t\t;  skip one division if high<divisor \n    mov     ecx,[PARAM_PREINV_SHIFT]\n    lea     edx,[-8+edx+ebx*4]\t;  &dst[xsize+size] \n    mov     eax,32\n    mov     [VAR_DST],edx\t\t;  &dst[xsize+size] \n    shl     ebp,cl\t\t\t\t;  d normalized \n    sub     eax,ecx\n    mov     [VAR_NORM],ecx\n    movd    mm7,eax\t\t\t\t;  rshift \n    mov     eax,[PARAM_PREINV_INVERSE]\n    jmp     Lstart_preinv\n\n\talign   16\n\n%define       frame   0 \n\n___gmpn_divrem_1c: \n    mov     edx,[PARAM_CARRY]\n    mov     ecx,[PARAM_SIZE]\n    sub     esp,STACK_SPACE\n%define frame   STACK_SPACE \n    mov     [SAVE_EBX],ebx\n    mov     ebx,[PARAM_XSIZE]\n    mov     [SAVE_EDI],edi\n    mov     edi,[PARAM_DST]\n    mov     [SAVE_EBP],ebp\n    mov     ebp,[PARAM_DIVISOR]\n    mov     [SAVE_ESI],esi\n    mov     esi,[PARAM_SRC]\n    lea     edi,[-4+edi+ebx*4]\n    jmp     Lstart_1c\n\n;  offset 0x31,close enough to aligned \n\n%define       frame   0 \n\n___gmpn_divrem_1: \n    mov     ecx,[PARAM_SIZE]\n    mov     edx,0\t\t\t\t;  initial carry (if can't skip a div) \n    sub     esp,STACK_SPACE\n%define frame   STACK_SPACE \n    mov     [SAVE_EBP],ebp\n    mov     ebp,[PARAM_DIVISOR]\n    mov     [SAVE_EBX],ebx\n    mov     ebx,[PARAM_XSIZE]\n    mov     [SAVE_ESI],esi\n    mov     esi,[PARAM_SRC]\n    or      ecx,ecx\t\t\t\t;  size \n    mov     [SAVE_EDI],edi\n    mov     edi,[PARAM_DST]\n    lea     edi,[-4+edi+ebx*4]\t;  &dst[xsize-1] \n    jz      Lno_skip_div\t\t;  if size==0 \n    mov     eax,[-4+esi+ecx*4]\t;  src high limb \n    xor     esi,esi\n    cmp     eax,ebp\t\t\t\t;  high cmp divisor \n\tcmovc\tedx,eax\n\tcmovnc\tesi,eax\t\t\t\t;  (the latter in case src==dst) \n    mov     [edi+ecx*4],esi\t\t;  dst high limb \n    sbb     ecx,0\t\t\t\t;  size-1 if high<divisor \n    mov     esi,[PARAM_SRC]\t\t;  reload \nLno_skip_div: \n\n;  eax  \n;  ebx xsize \n;  ecx size \n;  edx carry \n;  esi src \n;  edi &dst[xsize-1] \n;  ebp divisor \n\nLstart_1c: \n    lea     eax,[ebx+ecx]\t\t;  size+xsize \n    cmp     eax,MUL_THRESHOLD\n    jae     Lmul_by_inverse\n    or      ecx,ecx\n    jz      Ldivide_no_integer\n\n;  eax scratch (quotient) \n;  ebx xsize \n;  ecx counter \n;  edx scratch (remainder) \n;  esi src \n;  edi &dst[xsize-1] \n;  ebp divisor \n\nLdivide_integer: \n    mov     eax,[-4+esi+ecx*4]\n    div     ebp\n    mov     [edi+ecx*4],eax\n    dec     ecx\n    jnz     Ldivide_integer\nLdivide_no_integer: \n    mov     edi,[PARAM_DST]\n    or      ebx,ebx\n    jnz     Ldivide_fraction\nLdivide_done: \n    mov     esi,[SAVE_ESI]\n    mov     edi,[SAVE_EDI]\n    mov     ebx,[SAVE_EBX]\n    mov     eax,edx\n    mov     ebp,[SAVE_EBP]\n    add     esp,STACK_SPACE\n    ret\n\n;  eax scratch (quotient) \n;  ebx counter \n;  ecx \n;  edx scratch (remainder) \n;  esi \n;  edi dst \n;  ebp divisor \n\nLdivide_fraction: \n    mov     eax,0\n    div     ebp\n    mov     [-4+edi+ebx*4],eax\n    dec     ebx\n    jnz     Ldivide_fraction\n    jmp     Ldivide_done\n\n;  eax \n;  ebx xsize \n;  ecx size \n;  edx carry \n;  esi src \n;  edi &dst[xsize-1] \n;  ebp divisor \n\nLmul_by_inverse: \n    lea     ebx,[12+edi]   ;  &dst[xsize+2],loop dst stop \n    mov     [VAR_DST_STOP],ebx\n    lea     edi,[4+edi+ecx*4] ;  &dst[xsize+size] \n    mov     [VAR_DST],edi\n    mov     ebx,ecx         ;  size \n    bsr     ecx,ebp         ;  31-l \n    mov     edi,edx         ;  carry \n    lea     eax,[1+ecx]    ;  32-l \n    xor     ecx,31         ;  l \n    mov     [VAR_NORM],ecx\n    mov     edx,-1\n    shl     ebp,cl          ;  d normalized \n    movd    mm7,eax\n    mov     eax,-1\n    sub     edx,ebp         ;  (b-d)-1 giving edx:eax = b*(b-d)-1 \n    div     ebp             ;  floor (b*(b-d)-1) / d \n\n;  eax inverse \n;  ebx size \n;  ecx shift \n;  edx \n;  esi src \n;  edi carry \n;  ebp divisor \n;\n;  mm7 rshift \n\nLstart_preinv: \n    mov     [VAR_INVERSE],eax\n    or      ebx,ebx         ;  size \n    lea     eax,[-12+esi+ebx*4] ;  &src[size-3] \n    mov     [VAR_SRC],eax\n    jz      Lstart_zero\n    mov     esi,[8+eax]    ;  src high limb \n    cmp     ebx,1\n    jz      Lstart_one\nLstart_two_or_more: \n    mov     edx,[4+eax]    ;  src second highest limb \n\tshld\tedi,esi,cl\n\tshld\tesi,edx,cl\n    cmp     ebx,2\n    je      Linteger_two_left\n    jmp     Linteger_top\n\nLstart_one: \n\tshld\tedi,esi,cl\n    shl     esi,cl          ;  n10 = high << l \n    jmp     Linteger_one_left\n\nLstart_zero: \n;  Can be here with xsize==0 if mpn_preinv_divrem_1 had size==1 and \n;  skipped a division. \n\n    shl     edi,cl          ;  n2 = carry << l \n    mov     eax,edi         ;  return value for zero_done \n    cmp     [PARAM_XSIZE],dword 0\n    je      Lzero_done\n    jmp     Lfraction_some\n\n;  This loop runs at about 25 cycles,which is probably sub-optimal,and \n;  certainly more than the dependent chain would suggest.  A better loop,or \n;  a better rough analysis of what's possible,would be welcomed. \n; \n;  In the current implementation,the following successively dependent \n;  micro-ops seem to exist. \n; \n;                     uops \n;              n2+n1   1   (addl) \n;              mul     5 \n;              q1+1    3   (addl/adcl) \n;              mul     5 \n;              sub     3   (subl/sbbl) \n;              addback 2   (cmov) \n;                     --- \n;                     19 \n; \n;  Lack of registers hinders explicit scheduling and it might be that the \n;  normal out of order execution isn't able to hide enough under the mul \n;  latencies. \n; \n;  Using sarl/negl to pick out n1 for the n2+n1 stage is a touch faster than \n;  cmov (and takes one uop off the dependent chain).  A sarl/andl/addl \n;  combination was tried for the addback (despite the fact it would lengthen \n;  the dependent chain) but found to be no faster. \n\n;  eax scratch \n;  ebx scratch (nadj,q1) \n;  ecx scratch (src,dst) \n;  edx scratch \n;  esi n10 \n;  edi n2 \n;  ebp d \n;\n;  mm0 scratch (src qword) \n;  mm7 rshift for normalization \n\n\talign   16\nLinteger_top: \n    mov     eax,esi\n    mov     ebx,ebp\n    sar     eax,31\t\t\t\t;  -n1 \n    mov     ecx,[VAR_SRC]\n    and     ebx,eax\t\t\t\t;  -n1 & d \n    neg     eax\t\t\t\t\t;  n1 \n    add     ebx,esi\t\t\t\t;  nadj = n10 + (-n1 & d),ignoring overflow \n    add     eax,edi\t\t\t\t;  n2+n1 \n    movq    mm0,[ecx]\t\t\t;  next src limb and the one below it \n    mul     dword [VAR_INVERSE] ;  m*(n2+n1) \n    sub     ecx,4\n    mov     [VAR_SRC],ecx\n    add     eax,ebx\t\t\t\t;  m*(n2+n1) + nadj,low giving carry flag \n    mov     eax,ebp\t\t\t\t;  d \n    lea     ebx,[1+edi]\t\t\t;  n2+1 \n    adc     ebx,edx\t\t\t\t;  1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 \n    jz      Lq1_ff\n    mul     ebx\t\t\t\t\t;  (q1+1)*d \n    mov     ecx,[VAR_DST]\n    psrlq   mm0,mm7\n    sub     esi,eax\n    mov     eax,[VAR_DST_STOP]\n    sbb     edi,edx\t\t\t\t;  n - (q1+1)*d \n    mov     edi,esi\t\t\t\t;  remainder -> n2 \n    lea     edx,[ebp+esi]\n\tcmovc\tedi,edx\n    movd    esi,mm0\n    sbb     ebx,0    ;  q \n    sub     ecx,4\n    mov     [ecx],ebx\n    cmp     ecx,eax\n    mov     [VAR_DST],ecx\n    jne     Linteger_top\nLinteger_loop_done: \n \n;  Here,and in integer_one_left below,an sbbl $0 is used rather than a jz \n;  q1_ff special case.  This make the code a bit smaller and simpler,and \n;  costs only 2 cycles (each). \n\n;  eax scratch \n;  ebx scratch (nadj,q1) \n;  ecx scratch (src,dst) \n;  edx scratch \n;  esi n10 \n;  edi n2 \n;  ebp divisor \n;\n;  mm7 rshift \n\nLinteger_two_left: \n    mov     eax,esi\n    mov     ebx,ebp\n    sar     eax,31\t\t\t\t;  -n1 \n    mov     ecx,[PARAM_SRC]\n    and     ebx,eax\t\t\t\t;  -n1 & d \n    neg     eax\t\t\t\t\t;  n1 \n    add     ebx,esi\t\t\t\t;  nadj = n10 + (-n1 & d),ignoring overflow \n    add     eax,edi\t\t\t\t;  n2+n1 \n    mul     dword [VAR_INVERSE] ;  m*(n2+n1) \n    movd    mm0,[ecx]\t\t\t;  src low limb \n    mov     ecx,[VAR_DST_STOP]\n    add     eax,ebx\t\t\t\t;  m*(n2+n1) + nadj,low giving carry flag \n    lea     ebx,[1+edi]\t\t\t;  n2+1 \n    mov     eax,ebp\t\t\t\t;  d \n    adc     ebx,edx\t\t\t\t;  1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 \n    sbb     ebx,0\n    mul     ebx\t\t\t\t\t;  (q1+1)*d \n    psllq   mm0,32\n    psrlq   mm0,mm7\n    sub     esi,eax\n    sbb     edi,edx\t\t\t\t;  n - (q1+1)*d \n    mov     edi,esi\t\t\t\t;  remainder -> n2 \n    lea     edx,[ebp+esi]\n\tcmovc\tedi,edx\n    movd    esi,mm0\n    sbb     ebx,0\t\t\t\t;  q \n    mov     [-4+ecx],ebx\n\n;  eax scratch \n;  ebx scratch (nadj,q1) \n;  ecx scratch (dst) \n;  edx scratch \n;  esi n10 \n;  edi n2 \n;  ebp divisor \n;\n;  mm7 rshift \n\nLinteger_one_left: \n    mov     eax,esi\n    mov     ebx,ebp\n    sar     eax,31\t\t\t\t;  -n1 \n    mov     ecx,[VAR_DST_STOP]\n    and     ebx,eax\t\t\t\t;  -n1 & d \n    neg     eax\t\t\t\t\t;  n1 \n    add     ebx,esi\t\t\t\t;  nadj = n10 + (-n1 & d),ignoring overflow \n    add     eax,edi\t\t\t\t;  n2+n1 \n    mul     dword [VAR_INVERSE]\t;  m*(n2+n1) \n    add     eax,ebx\t\t\t\t;  m*(n2+n1) + nadj,low giving carry flag \n    lea     ebx,[1+edi]\t\t\t;  n2+1 \n    mov     eax,ebp\t\t\t\t;  d \n    adc     ebx,edx\t\t\t\t;  1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 \n    sbb     ebx,0\t\t\t\t;  q1 if q1+1 overflowed \n    mul     ebx\n    sub     esi,eax\n    mov     eax,[PARAM_XSIZE]\n    sbb     edi,edx\t\t\t\t;  n - (q1+1)*d \n    mov     edi,esi\t\t\t\t;  remainder -> n2 \n    lea     edx,[ebp+esi]\n\tcmovc\tedi,edx\n    sbb     ebx,0\t\t\t\t;  q \n    mov     [-8+ecx],ebx\n    sub     ecx,8\n    or      eax,eax\t\t\t\t;  xsize \n    jnz     Lfraction_some\n    mov     eax,edi\nLfraction_done: \n    mov     ecx,[VAR_NORM]\nLzero_done: \n    mov     ebp,[SAVE_EBP]\n    mov     edi,[SAVE_EDI]\n    mov     esi,[SAVE_ESI]\n    mov     ebx,[SAVE_EBX]\n    add     esp,STACK_SPACE\n    shr     eax,cl\n    emms\n    ret\n\n;  Special case for q1=0xFFFFFFFF,giving q=0xFFFFFFFF meaning the low dword \n;  of q*d is simply -d and the remainder n-q*d = n10+d \n;\n;  eax (divisor) \n;  ebx (q1+1 == 0) \n;  ecx \n;  edx \n;  esi n10 \n;  edi n2 \n;  ebp divisor \n\nLq1_ff: \n    mov     ecx,[VAR_DST]\n    mov     edx,[VAR_DST_STOP]\n    sub     ecx,4\n    mov     [VAR_DST],ecx\n    psrlq   mm0,mm7\n    lea     edi,[ebp+esi]\t\t;  n-q*d remainder -> next n2 \n    mov     [ecx],dword -1\n    movd    esi,mm0\t\t\t\t;  next n10 \n    cmp     edx,ecx\n    jne     Linteger_top\n    jmp     Linteger_loop_done\n\n; \n;  In the current implementation,the following successively dependent \n;  micro-ops seem to exist. \n; \n;                     uops \n;              mul     5 \n;              q1+1    1   (addl) \n;              mul     5 \n;              sub     3   (negl/sbbl) \n;              addback 2   (cmov) \n;                     --- \n;                     16 \n; \n;  The loop in fact runs at about 17.5 cycles.  Using a sarl/andl/addl for \n;  the addback was found to be a touch slower. \n\n;  eax \n;  ebx \n;  ecx \n;  edx \n;  esi \n;  edi carry \n;  ebp divisor \n\n\talign   16\nLfraction_some: \n    mov     esi,[PARAM_DST]\n    mov     ecx,[VAR_DST_STOP]\t;  &dst[xsize+2] \n    mov     eax,edi\n    sub     ecx,8\t\t\t\t;  &dst[xsize] \n\n;  eax n2,then scratch \n;  ebx scratch (nadj,q1) \n;  ecx dst,decrementing \n;  edx scratch \n;  esi dst stop point \n;  edi n2 \n;  ebp divisor \n\n\talign   16\nLfraction_top: \n    mul     dword [VAR_INVERSE]\t;  m*n2 \n    mov     eax,ebp\t\t\t\t;  d \n    sub     ecx,4\t\t\t\t;  dst \n    lea     ebx,[edi+1]\n    add     ebx,edx\t\t\t\t;  1 + high(n2<<32 + m*n2) = q1+1 \n    mul     ebx\t\t\t\t\t;  (q1+1)*d \n    neg     eax\t\t\t\t\t;  low of n - (q1+1)*d \n    sbb     edi,edx\t\t\t\t;  high of n - (q1+1)*d,caring only about carry \n    lea     edx,[ebp+eax]\n\tcmovc\teax,edx\n    sbb     ebx,0\t\t\t\t;  q \n    mov     edi,eax\t\t\t\t;  remainder->n2 \n    cmp     ecx,esi\n    mov     [ecx],ebx\t\t\t;  previous q \n    jne     Lfraction_top\n    jmp     Lfraction_done\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p3/hamdist.asm",
    "content": "\n;  Copyright 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%define       REG_AAAAAAAAAAAAAAAA    mm7 \n%define       REG_3333333333333333    mm6 \n%define       REG_0F0F0F0F0F0F0F0F    mm5 \n%define       REG_0000000000000000    mm4 \n\n%ifndef\tPIC\n\tsection\t.data\n\talign   8\n\nLrodata_AAAAAAAAAAAAAAAA: \n    dd      0AAAAAAAAh\n    dd      0AAAAAAAAh\n\nLrodata_3333333333333333: \n    dd      033333333h\n    dd      033333333h\n\nLrodata_0F0F0F0F0F0F0F0F: \n    dd      00F0F0F0Fh\n    dd      00F0F0F0Fh\n%endif\n\n%macro\tph_fun\t3\n\n\talign   32\n\t \n%ifdef\tDLL\n\texport\t%1%2\n%endif\n\n%1%2:\n    mov     ecx,[PARAM_SIZE]\n%ifdef\tPIC\n    mov     eax,0xAAAAAAAA\n    mov     edx,0x33333333\n    movd    mm7,eax\n    movd    mm6,edx\n    mov     eax,0x0F0F0F0F\n    punpckldq mm7,mm7\n    punpckldq mm6,mm6\n    movd    mm5,eax\n    movd    mm4,edx\n    punpckldq mm5,mm5\n%else\n    movq    mm7,[Lrodata_AAAAAAAAAAAAAAAA]\n    movq    mm6,[Lrodata_3333333333333333]\n    movq    mm5,[Lrodata_0F0F0F0F0F0F0F0F]\n%endif\n\tpxor    mm4,mm4\n    mov     eax,[PARAM_SRC]\n%if\t%3 == 1\n\tmov     edx,[PARAM_SRC2]\n%endif\n    pxor    mm2,mm2\n    shr     ecx,1\n    jnc     %%Ltop\n    movd    mm1,[eax+ecx*8]\n%if\t%3 == 1\n    movd    mm0,[edx+ecx*8]\n    pxor    mm1,mm0\n%endif\n    or      ecx,ecx\n    jmp     %%Lloaded\n\n;  eax src \n;  ebx \n;  ecx counter,qwords,decrementing \n;  edx [hamdist] src2 \n; \n;  mm0 (scratch) \n;  mm1 (scratch) \n;  mm2 total (low dword) \n;  mm3 \n;  mm4 \\ \n;  mm5 | special constants \n;  mm6 | \n;  mm7 / \n\n\talign   16\n%%Ltop: \n\tmovq    mm1,[eax+ecx*8-8]\n%if\t%3 == 1\n\tpxor    mm1,[edx+ecx*8-8]\n%endif\n\tdec     ecx\n%%Lloaded: \n\tmovq    mm0,mm1\n\tpand    mm1,REG_AAAAAAAAAAAAAAAA\n\tpsrlq   mm1,1\n\tpsubd   mm0,mm1  ;  bit pairs \n\tmovq    mm1,mm0\n\tpsrlq   mm0,2\n\tpand    mm0,REG_3333333333333333\n\tpand    mm1,REG_3333333333333333\n\tpaddd   mm0,mm1  ;  nibbles \n    movq    mm1,mm0\n    psrlq   mm0,4\n    pand    mm0,REG_0F0F0F0F0F0F0F0F\n    pand    mm1,REG_0F0F0F0F0F0F0F0F\n    paddd   mm0,mm1  ;  bytes \n    psadbw\tmm0,mm4\n    paddd   mm2,mm0  ;  add to total \n    jnz     %%Ltop\n    movd    eax,mm2\n    emms\n    ret\n%endmacro\n\n\tsection .text\n\t\n%define\tPARAM_SIZE  esp+frame+12 \n%define PARAM_SRC2  esp+frame+8 \n%define PARAM_SRC   esp+frame+4 \n%define\tframe\t\t0\n\t\n\tglobal ___gmpn_hamdist\n\t\n\tph_fun\t___g,mpn_hamdist,1\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p3/lshift.asm",
    "content": "\n;  Copyright 2001 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n\tglobal\t___gmpn_lshift \n\n%ifdef\tDLL\n\texport\t___gmpn_lshift\n%endif\n\n%define\tPARAM_SHIFT esp+frame+16 \n%define PARAM_SIZE  esp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n%define\tframe\t\t8 \n\n;   minimum 5,because the unrolled loop can't handle less \n%define       UNROLL_THRESHOLD  5 \n\n\tsection .text\n\talign   8\n\n___gmpn_lshift: \n    push    ebx\n    push    edi\n    mov     eax,[PARAM_SIZE]\n    mov     edx,[PARAM_DST]\n    mov     ebx,[PARAM_SRC]\n    mov     ecx,[PARAM_SHIFT]\n\tcmp     eax,UNROLL_THRESHOLD\n    jae     Lunroll\n    mov     edi,[-4+ebx+eax*4]\t;  src high limb \n    dec     eax\n    jnz     Lsimple\n\tshld\teax,edi,cl\n    shl     edi,cl\n    mov     [edx],edi\t\t\t;  dst low limb \n    pop     edi\t\t\t\t\t;  risk of data cache bank clash \n    pop     ebx\n    ret\n\n;  eax size-1 \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\nLsimple: \n    movd    mm5,[ebx+eax*4] ;  src high limb \n    movd    mm6,ecx         ;  lshift \n    neg     ecx\n    psllq   mm5,mm6\n    add     ecx,32\n    movd    mm7,ecx\n    psrlq   mm5,32          ;  retval \n\n;  eax counter,limbs,negative \n;  ebx src \n;  ecx \n;  edx dst \n;  esi \n;  edi \n; \n;  mm0 scratch \n;  mm5 return value \n;  mm6 shift \n;  mm7 32-shift \n\nLsimple_top: \n    movq    mm0,[ebx+eax*4-4]\n    dec     eax\n    psrlq   mm0,mm7\n    movd    [4+edx+eax*4],mm0\n    jnz     Lsimple_top\n    movd    mm0,[ebx]\n    movd    eax,mm5\n    psllq   mm0,mm6\n    pop     edi\n    pop     ebx\n    movd    [edx],mm0\n    emms\n    ret\n\n;  eax size \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\n\talign   8\nLunroll: \n    movd    mm5,[ebx+eax*4-4]\t;  src high limb \n    lea     edi,[ebx+eax*4]\n    movd    mm6,ecx\t\t\t\t;  lshift \n    and     edi,4\n    psllq   mm5,mm6\n    jz      Lstart_src_aligned\n\n;  src isn't aligned,process high limb separately (marked xxx) to \n;  make it so. \n; \n;   source     -8(ebx,%eax,4) \n;                   | \n;   +-------+-------+-------+-- \n;   |               |           \n;   +-------+-------+-------+-- \n;         0mod8   4mod8   0mod8 \n; \n;   dest \n;      -4(edx,%eax,4) \n;           | \n;   +-------+-------+-- \n;   |  xxx  |       |   \n;   +-------+-------+-- \n\n    movq    mm0,[ebx+eax*4-8]\t\t;  unaligned load \n    psllq   mm0,mm6\n    dec     eax\n    psrlq   mm0,32\n    movd    [edx+eax*4],mm0\nLstart_src_aligned: \n    movq    mm1,[ebx+eax*4-8]\t\t;  src high qword \n    lea     edi,[edx+eax*4]\n    and     edi,4\n    psrlq   mm5,32\t\t\t\t\t;  return value \n    movq    mm3,[ebx+eax*4-16]\t\t;  src second highest qword \n    jz      Lstart_dst_aligned\n\n;  dst isn't aligned,subtract 4 to make it so,and pretend the shift \n;  is 32 bits extra.  High limb of dst (marked xxx) handled here \n;  separately. \n; \n;   source     -8(ebx,%eax,4) \n;                   | \n;   +-------+-------+-- \n;   |      mm1      |   \n;   +-------+-------+-- \n;                 0mod8   4mod8 \n; \n;   dest \n;      -4(edx,%eax,4) \n;           | \n;   +-------+-------+-------+-- \n;   |  xxx  |               | \n;   +-------+-------+-------+-- \n;         0mod8   4mod8   0mod8 \n\n    movq    mm0,mm1\n    add     ecx,32         ;  new shift \n    psllq   mm0,mm6\n    movd    mm6,ecx\n    psrlq   mm0,32\n\n;  wasted cycle here waiting for %mm0 \n\n    movd    [-4+edx+eax*4],mm0\n    sub     edx,4\nLstart_dst_aligned: \n\n    psllq   mm1,mm6\n    neg     ecx\t\t\t\t;  -shift \n    add     ecx,64\t\t\t;  64-shift \n    movq    mm2,mm3\n    movd    mm7,ecx\n    sub     eax,8\t\t\t;  size-8 \n    psrlq   mm3,mm7\n    por     mm3,mm1         ;  mm3 ready to store \n    jc      Lfinish\n\n;  The comments in mpn_rshift apply here too. \n\n;  eax counter,limbs \n;  ebx src \n;  ecx \n;  edx dst \n;  esi \n;  edi \n; \n;  mm0 \n;  mm1 \n;  mm2 src qword from 16(%ebx,%eax,4) \n;  mm3 dst qword ready to store to 24(%edx,%eax,4) \n; \n;  mm5 return value \n;  mm6 lshift \n;  mm7 rshift \n\n\talign   8\nLunroll_loop: \n    movq    mm0,[ebx+eax*4+8]\n    psllq   mm2,mm6\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    movq    [24+edx+eax*4],mm3\n    por     mm0,mm2\n    movq    mm3,[ebx+eax*4]\n    psllq   mm1,mm6\n    movq    [16+edx+eax*4],mm0\n    movq    mm2,mm3 \n\tpsrlq   mm3,mm7\n    sub     eax,4\n\tpor     mm3,mm1\n    jnc     Lunroll_loop\nLfinish: \n;  eax -4 to -1 representing respectively 0 to 3 limbs remaining \n\n    test    al,2\n    jz      Lfinish_no_two\n    movq    mm0,[ebx+eax*4+8]\n    psllq   mm2,mm6\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    movq    [24+edx+eax*4],mm3  ;  prev \n    por     mm0,mm2\n    movq    mm2,mm1\n    movq    mm3,mm0\n    sub     eax,2\nLfinish_no_two: \n\n;  eax -4 or -3 representing respectively 0 or 1 limbs remaining \n;  mm2 src prev qword,from 16(%ebx,%eax,4) \n;  mm3 dst qword,for 24(%edx,%eax,4) \n\n    test    al,1\n    movd    eax,mm5  ;  retval \n    pop     edi\n    jz      Lfinish_zero\n\n;  One extra src limb,destination was aligned. \n;\n;                  source                  ebx \n;                  --+---------------+-------+ \n;                    |      mm2      |       | \n;                  --+---------------+-------+ \n;\n;  dest         edx+12           edx+4     edx \n;  --+---------------+---------------+-------+ \n;    |      mm3      |               |       | \n;  --+---------------+---------------+-------+ \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  One extra src limb,destination was unaligned. \n;\n;                  source                  ebx \n;                  --+---------------+-------+ \n;                    |      mm2      |       | \n;                  --+---------------+-------+ \n;\n;          dest         edx+12           edx+4 \n;          --+---------------+---------------+ \n;            |      mm3      |               | \n;          --+---------------+---------------+ \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n\n;  In both cases there's one extra limb of src to fetch and combine \n;  with mm2 to make a qword at 4(%edx),and in the aligned case \n;  there's an extra limb of dst to be formed from that extra src limb \n;  left shifted. \n\n    movd    mm0,[ebx]\n    psllq   mm2,mm6\n    movq    [12+edx],mm3\n    psllq   mm0,32\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    por     mm0,mm2\n    psllq   mm1,mm6\n    movq    [4+edx],mm0\n    psrlq   mm1,32\n    and     ecx,32\n    pop     ebx\n    jz      Lfinish_one_unaligned\n    movd    [edx],mm1\nLfinish_one_unaligned: \n    emms\n    ret\nLfinish_zero: \n\n;  No extra src limbs,destination was aligned. \n;\n;                  source          ebx \n;                  --+---------------+ \n;                    |      mm2      | \n;                  --+---------------+ \n;\n;  dest          edx+8             edx \n;  --+---------------+---------------+ \n;    |      mm3      |               | \n;  --+---------------+---------------+ \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  No extra src limbs,destination was unaligned. \n;\n;                source            ebx \n;                  --+---------------+ \n;                    |      mm2      | \n;                  --+---------------+ \n;\n;          dest          edx+8   edx+4 \n;          --+---------------+-------+ \n;            |      mm3      |       | \n;          --+---------------+-------+ \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n;  The movd for the unaligned case writes the same data to 4(%edx) \n;  that the movq does for the aligned case. \n\n    movq    [8+edx],mm3\n    and     ecx,32\n    psllq   mm2,mm6\n    jz      Lfinish_zero_unaligned\n    movq    [edx],mm2\nLfinish_zero_unaligned: \n    psrlq   mm2,32\n    pop     ebx\n    movd    eax,mm5  ;  retval \n    movd    [4+edx],mm2\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p3/mod_1.asm",
    "content": "\n;  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n    global  ___gmpn_preinv_mod_1 \n    global  ___gmpn_mod_1c     \n\tglobal  ___gmpn_mod_1 \n\n%ifdef\tDLL\n\texport\t___gmpn_mod_1c\n\texport\t___gmpn_mod_1\n%endif\n\n%define\tMUL_NORM_THRESHOLD      4 \n%define MUL_UNNORM_THRESHOLD\t5 \n\n%define\tMUL_NORM_DELTA\tMUL_NORM_THRESHOLD - MUL_UNNORM_THRESHOLD\n\n%define\tPARAM_INVERSE\tesp+frame+16 \n%define PARAM_CARRY     esp+frame+16 \n%define PARAM_DIVISOR   esp+frame+12 \n%define PARAM_SIZE      esp+frame+8 \n%define PARAM_SRC       esp+frame+4 \n\n%define SAVE_EBX        esp+frame-4 \n%define SAVE_ESI        esp+frame-8 \n%define SAVE_EDI        esp+frame-12 \n%define SAVE_EBP        esp+frame-16 \n\n%define\tVAR_NORM        esp+frame-20 \n%define VAR_INVERSE     esp+frame-24 \n%define STACK_SPACE     24 \n\n\tsection .text\n\t\n\talign   16\n\n%define frame\t0 \n\n___gmpn_preinv_mod_1: \n    mov     edx,[PARAM_SRC]\n    sub     esp,STACK_SPACE\n\tFR_sesp\tSTACK_SPACE\n    mov     [SAVE_EBX],ebx\n    mov     ebx,[PARAM_SIZE]\n    mov     [SAVE_EBP],ebp\n    mov     ebp,[PARAM_DIVISOR]\n    mov     [SAVE_ESI],esi\n    mov     eax,[PARAM_INVERSE]\n    mov     [SAVE_EDI],edi\n    mov     edi,[-4+edx+ebx*4]\t;  src high limb \n    mov     [VAR_NORM],dword 0\n    lea     ecx,[-8+edx+ebx*4]\t;  &src[size-2] \n    mov     esi,edi\n    sub     edi,ebp\t\t\t\t;  high-divisor \n\tcmovc\tedi,esi\n    dec     ebx\n    jnz     Lpreinv_entry\n    jmp     Ldone_edi\n     \n\talign   16\n\n%define       frame   0 \n\n___gmpn_mod_1c: \n    mov     ecx,[PARAM_SIZE]\n    sub     esp,STACK_SPACE\n\tFR_sesp\tSTACK_SPACE\n    mov     [SAVE_EBP],ebp\n    mov     eax,[PARAM_DIVISOR]\n    mov     [SAVE_ESI],esi\n    mov     edx,[PARAM_CARRY]\n    mov     esi,[PARAM_SRC]\n    or      ecx,ecx\n    jz      Ldone_edx       ;  result==carry if size==0 \n    sar     eax,31\n    mov     ebp,[PARAM_DIVISOR]\n    and     eax,MUL_NORM_DELTA\n    add     eax,MUL_UNNORM_THRESHOLD\n    cmp     ecx,eax\n    jb      Ldivide_top\n\n;  The carry parameter pretends to be the src high limb. \n    mov     [SAVE_EBX],ebx\n    lea     ebx,[1+ecx]    ;  size+1 \n    mov     eax,edx         ;  carry \n    jmp     Lmul_by_inverse_1c\n\n\talign   16\n\n%define       frame   0\n \n___gmpn_mod_1: \n    mov     ecx,[PARAM_SIZE]\n    sub     esp,STACK_SPACE\n\tFR_sesp\tSTACK_SPACE\n    mov     edx,0\t\t\t\t;  initial carry (if can't skip a div) \n    mov     [SAVE_ESI],esi\n    mov     eax,[PARAM_SRC]\n    mov     [SAVE_EBP],ebp\n    mov     ebp,[PARAM_DIVISOR]\n    mov     esi,[PARAM_DIVISOR]\n    or      ecx,ecx\n    jz      Ldone_edx\n    mov     eax,[-4+eax+ecx*4]\t;  src high limb \n    sar     ebp,31\n    and     ebp,MUL_NORM_DELTA\n    add     ebp,MUL_UNNORM_THRESHOLD\n    cmp     eax,esi\t\t\t\t;  carry flag if high<divisor \n\tcmovc\tedx,eax\n    mov     esi,[PARAM_SRC]\n    sbb     ecx,0\t\t\t\t;  size-1 to skip one div \n    jz      Ldone_eax\t\t\t;  done if had size==1 \n    cmp     ecx,ebp\n    mov     ebp,[PARAM_DIVISOR]\n    jae     Lmul_by_inverse\n\n;  eax scratch (quotient) \n;  ebx \n;  ecx counter,limbs,decrementing \n;  edx scratch (remainder) \n;  esi src \n;  edi \n;  ebp divisor \n\nLdivide_top: \n    mov     eax,[-4+esi+ecx*4]\n    div     ebp\n    dec     ecx\n    jnz     Ldivide_top\nLdone_edx: \n    mov     eax,edx\nLdone_eax: \n    mov     esi,[SAVE_ESI]\n    mov     ebp,[SAVE_EBP]\n    add     esp,STACK_SPACE\n    ret\n\n;  eax src high limb \n;  ebx \n;  ecx \n;  edx \n;  esi src \n;  edi \n;  ebp divisor \n\nLmul_by_inverse: \n    mov     [SAVE_EBX],ebx\n    mov     ebx,[PARAM_SIZE]\nLmul_by_inverse_1c: \n    bsr     ecx,ebp\t\t\t\t;  31-l \n    mov     [SAVE_EDI],edi\n    xor     ecx,31\t\t\t\t;  l \n    mov     [VAR_NORM],ecx\n    shl     ebp,cl\t\t\t\t;  d normalized \n    mov     edi,eax\t\t\t\t;  src high -> n2 \n    sub     eax,ebp\n\tcmovnc\tedi,eax\n    mov     eax,-1\n    mov     edx,-1\n    sub     edx,ebp\t\t\t\t;  (b-d)-1 so  edx:eax = b*(b-d)-1 \n    lea     ecx,[-8+esi+ebx*4] ;  &src[size-2] \n    div     ebp\t\t\t\t\t;  floor (b*(b-d)-1) / d \nLpreinv_entry: \n    mov     [VAR_INVERSE],eax\n\n;  No special scheduling of loads is necessary in this loop,out of order \n;  execution hides the latencies already. \n; \n;  The way q1+1 is generated in %ebx and d is moved to %eax for the multiply \n;  seems fastest.  The obvious change to generate q1+1 in %eax and then just \n;  multiply by %ebp (as per mpn/x86/pentium/mod_1.asm in fact) runs 1 cycle \n;  slower,for no obvious reason. \n\n;  eax n10 (then scratch) \n;  ebx scratch (nadj,q1) \n;  ecx src pointer,decrementing \n;  edx scratch \n;  esi n10 \n;  edi n2 \n;  ebp divisor \n\n\talign   16\nLinverse_top: \n    mov     eax,[ecx]\t\t\t;  next src limb \n    mov     esi,eax\n    sar     eax,31\t\t\t\t;  -n1 \n    mov     ebx,ebp\n    and     ebx,eax\t\t\t\t;  -n1 & d \n    neg     eax\t\t\t\t\t;  n1 \n    add     eax,edi\t\t\t\t;  n2+n1 \n    mul     dword [VAR_INVERSE] ;  m*(n2+n1) \n    add     ebx,esi\t\t\t\t;  nadj = n10 + (-n1 & d),ignoring overflow \n    sub     ecx,4\n    add     eax,ebx\t\t\t\t;  m*(n2+n1) + nadj,low giving carry flag \n    lea     ebx,[1+edi]\t\t\t;  n2+1 \n    mov     eax,ebp\t\t\t\t;  d \n    adc     ebx,edx\t\t\t\t;  1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 \n    jz      Lq1_ff\n    mul     ebx\t\t\t\t\t;  (q1+1)*d \n    sub     esi,eax\t\t\t\t;  low n - (q1+1)*d \n    sbb     edi,edx\t\t\t\t;  high n - (q1+1)*d,0 or -1 \n    and     edi,ebp\t\t\t\t;  d if underflow \n    add     edi,esi\t\t\t\t;  remainder with addback if necessary \n    cmp     ecx,[PARAM_SRC]\n    jae     Linverse_top\n\n;  %edi is the remainder modulo d*2^n and now must be reduced to \n;  0<=r<d by calculating r*2^n mod d*2^n and then right shifting by \n;  n.  If d was already normalized on entry so that n==0 then nothing \n;  is needed here.  The chance of n==0 is low,but it's true of say \n;  PP from gmp-impl.h. \n;\n;  eax \n;  ebx \n;  ecx \n;  edx \n;  esi  \n;  edi remainder \n;  ebp divisor (normalized) \n\nLinverse_loop_done: \n    mov     ecx,[VAR_NORM]\n    mov     esi,0\n    or      ecx,ecx\n    jz      Ldone_edi\n\n;  Here use %edi=n10 and %esi=n2,opposite to the loop above. \n;\n;  The q1=0xFFFFFFFF case is handled with an sbbl to adjust q1+1 \n;  back,rather than q1_ff special case code.  This is simpler and \n;  costs only 2 uops. \n\n\tshld\tesi,edi,cl\n    shl     edi,cl\n    mov     eax,edi\t\t\t\t;  n10 \n    mov     ebx,ebp\t\t\t\t;  d \n    sar     eax,31\t\t\t\t;  -n1 \n    and     ebx,eax\t\t\t\t;  -n1 & d \n    neg     eax\t\t\t\t\t;  n1 \n    add     ebx,edi\t\t\t\t;  nadj = n10 + (-n1 & d),ignoring overflow \n    add     eax,esi\t\t\t\t;  n2+n1 \n    mul     dword [VAR_INVERSE]\t;  m*(n2+n1) \n    add     eax,ebx\t\t\t\t;  m*(n2+n1) + nadj,low giving carry flag \n    lea     ebx,[1+esi]\t\t\t;  n2+1 \n    adc     ebx,edx\t\t\t\t;  1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 \n    sbb     ebx,0\n    mov     eax,ebp\t\t\t\t;  d \n    mul     ebx\t\t\t\t\t;  (q1+1)*d \n    mov     ebx,[SAVE_EBX]\n    sub     edi,eax\t\t\t\t;  low  n - (q1+1)*d is remainder \n    sbb     esi,edx\t\t\t\t;  high n - (q1+1)*d,0 or -1 \n    and     esi,ebp\n    mov     ebp,[SAVE_EBP]\n    lea     eax,[esi+edi]\t\t;  remainder \n    mov     esi,[SAVE_ESI]\n    shr     eax,cl\t\t\t\t;  denorm remainder \n    mov     edi,[SAVE_EDI]\n    add     esp,STACK_SPACE\n    ret\nLdone_edi: \n    mov     ebx,[SAVE_EBX]\n    mov     eax,edi\n    mov     esi,[SAVE_ESI]\n    mov     edi,[SAVE_EDI]\n    mov     ebp,[SAVE_EBP]\n    add     esp,STACK_SPACE\n    ret\n\n;  Special case for q1=0xFFFFFFFF,giving q=0xFFFFFFFF meaning the low dword \n;  of q*d is simply -d and the remainder n-q*d = n10+d. \n; \n;  This is reached only very rarely. \n;\n;  eax (divisor) \n;  ebx (q1+1 == 0) \n;  ecx src pointer \n;  edx \n;  esi n10 \n;  edi (n2) \n;  ebp divisor \n\nLq1_ff: \n    lea     edi,[ebp+esi]  ;  n-q*d remainder -> next n2 \n    cmp     ecx,[PARAM_SRC]\n    jae     Linverse_top\n    jmp     Linverse_loop_done\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p3/p3mmx/hamdist.asm",
    "content": "\n;  Copyright 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%define       REG_AAAAAAAAAAAAAAAA    mm7 \n%define       REG_3333333333333333    mm6 \n%define       REG_0F0F0F0F0F0F0F0F    mm5 \n%define       REG_0000000000000000    mm4 \n\n%ifndef\tPIC\n\tsection\t.data\n\talign   8\n\nLrodata_AAAAAAAAAAAAAAAA: \n    dd      0AAAAAAAAh\n    dd      0AAAAAAAAh\n\nLrodata_3333333333333333: \n    dd      033333333h\n    dd      033333333h\n\nLrodata_0F0F0F0F0F0F0F0F: \n    dd      00F0F0F0Fh\n    dd      00F0F0F0Fh\n%endif\n\n%macro\tph_fun\t3\n\n\talign   32\n\tglobal  %1%2\n\t \n%ifdef\tDLL\n\texport\t%1%2\n%endif\n\n%1%2:\n    mov     ecx,[PARAM_SIZE]\n%ifdef\tPIC\n    mov     eax,0xAAAAAAAA\n    mov     edx,0x33333333\n    movd    mm7,eax\n    movd    mm6,edx\n    mov     eax,0x0F0F0F0F\n    punpckldq mm7,mm7\n    punpckldq mm6,mm6\n    movd    mm5,eax\n    movd    mm4,edx\n    punpckldq mm5,mm5\n%else\n    movq    mm7,[Lrodata_AAAAAAAAAAAAAAAA]\n    movq    mm6,[Lrodata_3333333333333333]\n    movq    mm5,[Lrodata_0F0F0F0F0F0F0F0F]\n%endif\n\tpxor    mm4,mm4\n    mov     eax,[PARAM_SRC]\n%if\t%3 == 1\n\tmov     edx,[PARAM_SRC2]\n%endif\n    pxor    mm2,mm2\n    shr     ecx,1\n    jnc     %%Ltop\n    movd    mm1,[eax+ecx*8]\n%if\t%3 == 1\n    movd    mm0,[edx+ecx*8]\n    pxor    mm1,mm0\n%endif\n    or      ecx,ecx\n    jmp     %%Lloaded\n\n;  eax src \n;  ebx \n;  ecx counter,qwords,decrementing \n;  edx [hamdist] src2 \n; \n;  mm0 (scratch) \n;  mm1 (scratch) \n;  mm2 total (low dword) \n;  mm3 \n;  mm4 \\ \n;  mm5 | special constants \n;  mm6 | \n;  mm7 / \n\n\talign   16\n%%Ltop: \n\tmovq    mm1,[eax+ecx*8-8]\n%if\t%3 == 1\n\tpxor    mm1,[edx+ecx*8-8]\n%endif\n\tdec     ecx\n%%Lloaded: \n\tmovq    mm0,mm1\n\tpand    mm1,REG_AAAAAAAAAAAAAAAA\n\tpsrlq   mm1,1\n\tpsubd   mm0,mm1  ;  bit pairs \n\tmovq    mm1,mm0\n\tpsrlq   mm0,2\n\tpand    mm0,REG_3333333333333333\n\tpand    mm1,REG_3333333333333333\n\tpaddd   mm0,mm1  ;  nibbles \n    movq    mm1,mm0\n    psrlq   mm0,4\n    pand    mm0,REG_0F0F0F0F0F0F0F0F\n    pand    mm1,REG_0F0F0F0F0F0F0F0F\n    paddd   mm0,mm1  ;  bytes \n    psadbw\tmm0,mm4\n    paddd   mm2,mm0  ;  add to total \n    jnz     %%Ltop\n    movd    eax,mm2\n    emms\n    ret\n%endmacro\n\n\tsection .text\n\t\n%define\tPARAM_SIZE  esp+frame+12 \n%define PARAM_SRC2  esp+frame+8 \n%define PARAM_SRC   esp+frame+4 \n%define\tframe\t\t0\n\n;   global  ___gmpn_hamdist\n\tph_fun\t___g,mpn_hamdist,1\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p3/p3mmx/popcount.asm",
    "content": "\n;  Copyright 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%define       REG_AAAAAAAAAAAAAAAA    mm7 \n%define       REG_3333333333333333    mm6 \n%define       REG_0F0F0F0F0F0F0F0F    mm5 \n%define       REG_0000000000000000    mm4 \n\n%ifndef\tPIC\n\tsection\t.data\n\talign   8\n\nLrodata_AAAAAAAAAAAAAAAA: \n    dd      0AAAAAAAAh\n    dd      0AAAAAAAAh\n\nLrodata_3333333333333333: \n    dd      033333333h\n    dd      033333333h\n\nLrodata_0F0F0F0F0F0F0F0F: \n    dd      00F0F0F0Fh\n    dd      00F0F0F0Fh\n%endif\n\n%macro\tph_fun\t3\n\n\talign   32\n\tglobal  %1%2\n\t \n%ifdef\tDLL\n\texport\t%1%2\n%endif\n\n%1%2:\n    mov     ecx,[PARAM_SIZE]\n%ifdef\tPIC\n    mov     eax,0xAAAAAAAA\n    mov     edx,0x33333333\n    movd    mm7,eax\n    movd    mm6,edx\n    mov     eax,0x0F0F0F0F\n    punpckldq mm7,mm7\n    punpckldq mm6,mm6\n    movd    mm5,eax\n    movd    mm4,edx\n    punpckldq mm5,mm5\n%else\n    movq    mm7,[Lrodata_AAAAAAAAAAAAAAAA]\n    movq    mm6,[Lrodata_3333333333333333]\n    movq    mm5,[Lrodata_0F0F0F0F0F0F0F0F]\n%endif\n\tpxor    mm4,mm4\n    mov     eax,[PARAM_SRC]\n%if\t%3 == 1\n\tmov     edx,[PARAM_SRC2]\n%endif\n    pxor    mm2,mm2\n    shr     ecx,1\n    jnc     %%Ltop\n    movd    mm1,[eax+ecx*8]\n%if\t%3 == 1\n    movd    mm0,[edx+ecx*8]\n    pxor    mm1,mm0\n%endif\n    or      ecx,ecx\n    jmp     %%Lloaded\n\n;  eax src \n;  ebx \n;  ecx counter,qwords,decrementing \n;  edx [hamdist] src2 \n; \n;  mm0 (scratch) \n;  mm1 (scratch) \n;  mm2 total (low dword) \n;  mm3 \n;  mm4 \\ \n;  mm5 | special constants \n;  mm6 | \n;  mm7 / \n\n\talign   16\n%%Ltop: \n\tmovq    mm1,[eax+ecx*8-8]\n%if\t%3 == 1\n\tpxor    mm1,[edx+ecx*8-8]\n%endif\n\tdec     ecx\n%%Lloaded: \n\tmovq    mm0,mm1\n\tpand    mm1,REG_AAAAAAAAAAAAAAAA\n\tpsrlq   mm1,1\n\tpsubd   mm0,mm1  ;  bit pairs \n\tmovq    mm1,mm0\n\tpsrlq   mm0,2\n\tpand    mm0,REG_3333333333333333\n\tpand    mm1,REG_3333333333333333\n\tpaddd   mm0,mm1  ;  nibbles \n    movq    mm1,mm0\n    psrlq   mm0,4\n    pand    mm0,REG_0F0F0F0F0F0F0F0F\n    pand    mm1,REG_0F0F0F0F0F0F0F0F\n    paddd   mm0,mm1  ;  bytes \n    psadbw\tmm0,mm4\n    paddd   mm2,mm0  ;  add to total \n    jnz     %%Ltop\n    movd    eax,mm2\n    emms\n    ret\n%endmacro\n\n\tsection .text\n\t\n%define\tPARAM_SIZE  esp+frame+8 \n%define PARAM_SRC   esp+frame+4 \n%define\tframe\t\t0\n\n;   global  ___gmpn_popcount\n\tph_fun\t___g,mpn_popcount,0\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p3/popcount.asm",
    "content": "\n;  Copyright 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%define       REG_AAAAAAAAAAAAAAAA    mm7 \n%define       REG_3333333333333333    mm6 \n%define       REG_0F0F0F0F0F0F0F0F    mm5 \n%define       REG_0000000000000000    mm4 \n\n%ifndef\tPIC\n\tsection\t.data\n\talign   8\n\nLrodata_AAAAAAAAAAAAAAAA: \n    dd      0AAAAAAAAh\n    dd      0AAAAAAAAh\n\nLrodata_3333333333333333: \n    dd      033333333h\n    dd      033333333h\n\nLrodata_0F0F0F0F0F0F0F0F: \n    dd      00F0F0F0Fh\n    dd      00F0F0F0Fh\n%endif\n\n%macro\tph_fun\t3\n\n\talign   32\n\t \n%ifdef\tDLL\n\texport\t%1%2\n%endif\n\n%1%2:\n    mov     ecx,[PARAM_SIZE]\n%ifdef\tPIC\n    mov     eax,0xAAAAAAAA\n    mov     edx,0x33333333\n    movd    mm7,eax\n    movd    mm6,edx\n    mov     eax,0x0F0F0F0F\n    punpckldq mm7,mm7\n    punpckldq mm6,mm6\n    movd    mm5,eax\n    movd    mm4,edx\n    punpckldq mm5,mm5\n%else\n    movq    mm7,[Lrodata_AAAAAAAAAAAAAAAA]\n    movq    mm6,[Lrodata_3333333333333333]\n    movq    mm5,[Lrodata_0F0F0F0F0F0F0F0F]\n%endif\n\tpxor    mm4,mm4\n    mov     eax,[PARAM_SRC]\n%if\t%3 == 1\n\tmov     edx,[PARAM_SRC2]\n%endif\n    pxor    mm2,mm2\n    shr     ecx,1\n    jnc     %%Ltop\n    movd    mm1,[eax+ecx*8]\n%if\t%3 == 1\n    movd    mm0,[edx+ecx*8]\n    pxor    mm1,mm0\n%endif\n    or      ecx,ecx\n    jmp     %%Lloaded\n\n;  eax src \n;  ebx \n;  ecx counter,qwords,decrementing \n;  edx [hamdist] src2 \n; \n;  mm0 (scratch) \n;  mm1 (scratch) \n;  mm2 total (low dword) \n;  mm3 \n;  mm4 \\ \n;  mm5 | special constants \n;  mm6 | \n;  mm7 / \n\n\talign   16\n%%Ltop: \n\tmovq    mm1,[eax+ecx*8-8]\n%if\t%3 == 1\n\tpxor    mm1,[edx+ecx*8-8]\n%endif\n\tdec     ecx\n%%Lloaded: \n\tmovq    mm0,mm1\n\tpand    mm1,REG_AAAAAAAAAAAAAAAA\n\tpsrlq   mm1,1\n\tpsubd   mm0,mm1  ;  bit pairs \n\tmovq    mm1,mm0\n\tpsrlq   mm0,2\n\tpand    mm0,REG_3333333333333333\n\tpand    mm1,REG_3333333333333333\n\tpaddd   mm0,mm1  ;  nibbles \n    movq    mm1,mm0\n    psrlq   mm0,4\n    pand    mm0,REG_0F0F0F0F0F0F0F0F\n    pand    mm1,REG_0F0F0F0F0F0F0F0F\n    paddd   mm0,mm1  ;  bytes \n    psadbw\tmm0,mm4\n    paddd   mm2,mm0  ;  add to total \n    jnz     %%Ltop\n    movd    eax,mm2\n    emms\n    ret\n%endmacro\n\n\tsection .text\n\t\n%define\tPARAM_SIZE  esp+frame+8 \n%define PARAM_SRC   esp+frame+4 \n%define\tframe\t\t0\n\n\tglobal ___gmpn_popcount\n\t\n\tph_fun\t___g,mpn_popcount,0\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p3/submul_1.asm",
    "content": "\n;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n%define\tUNROLL_LOG2\t\t4\n%define\tUNROLL_COUNT\t(1 << UNROLL_LOG2)\n%define\tUNROLL_MASK\t\tUNROLL_COUNT-1  \n%define\tUNROLL_BYTES\t4*UNROLL_COUNT\n\n%ifdef\tPIC\n%define\tUNROLL_THRESHOLD\t5 \n%else\n%define\tUNROLL_THRESHOLD\t5 \n%endif\n\n%define\tPARAM_CARRY\t\t\tesp+frame+20 \n%define PARAM_MULTIPLIER\tesp+frame+16 \n%define PARAM_SIZE\t\t\tesp+frame+12 \n%define PARAM_SRC\t\t\tesp+frame+8 \n%define PARAM_DST\t\t\tesp+frame+4 \n\n%macro\tmul_fun\t4\n\n%ifdef\tDLL\n\texport\t%1%3\n\texport  %1%4\n%endif\n\n\talign   32\n%define\tframe\t0\n%1%4:\n    FR_push ebx\n    mov     ebx,[PARAM_CARRY]\n\tjmp     %%Lstart_nc\n\n%define\tframe\t0\n%1%3:\n\tFR_push ebx\n    xor     ebx,ebx\t;  initial carry \n%%Lstart_nc: \n    mov     ecx,[PARAM_SIZE]\n    FR_push esi\n    mov     esi,[PARAM_SRC]\n    FR_push\tedi\n    mov     edi,[PARAM_DST]\n    FR_push ebp\n    cmp     ecx,UNROLL_THRESHOLD\n    mov     ebp,[PARAM_MULTIPLIER]\n    jae     %%Lunroll\n\n;  simple loop \n;  this is offset 0x22,so close enough to aligned \n;  eax scratch \n;  ebx carry \n;  ecx counter \n;  edx scratch \n;  esi src \n;  edi dst \n;  ebp multiplier \n\n%%Lsimple: \n    mov     eax,[esi]\n    add     edi,4\n    mul     ebp\n    add     eax,ebx\n    adc     edx,0\n\t%2\t\t[edi-4],eax\n    mov     ebx,edx\n    adc     ebx,0\n    dec     ecx\n    lea     esi,[4+esi]\n    jnz     %%Lsimple\n    pop     ebp\n    pop     edi\n    pop     esi\n    mov     eax,ebx\n    pop     ebx\n    ret\n\n;  VAR_JUMP holds the computed jump temporarily because there's not enough \n;  registers when doing the mul for the initial two carry limbs. \n; \n;  The add/adc for the initial carry in %ebx is necessary only for the \n;  mpn_add/submul_1c entry points.  Duplicating the startup code to \n;  eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good \n;  idea. \n;\n;   overlapping with parameters already fetched \n\n%define\tVAR_COUNTER\tPARAM_SIZE\n%define\tVAR_JUMP\tPARAM_DST\n\n; VAL1 = ifelse(UNROLL_BYTES,256,128)\n%define\tVAL1\t128\n; VAL2 = ifelse(UNROLL_BYTES,256,-128)\n%define\tVAL2   -128\n\n;  this is offset 0x43,so close enough to aligned \n;  eax \n;  ebx initial carry \n;  ecx size \n;  edx \n;  esi src \n;  edi dst \n;  ebp \n\n%%Lunroll: \n    mov     edx,ecx\n    dec     ecx\n    sub     edx,2\n    neg     ecx\n    shr     edx,UNROLL_LOG2\n    and     ecx,UNROLL_MASK\n    mov     [VAR_COUNTER],edx\n    mov     edx,ecx\n        \n;  15 code bytes per limb \n\n%ifdef\tPIC\n\tcall    %%Lhere\n%%Lhere: \n    shl     edx,4\n    neg     ecx\n    lea     edx,[edx+ecx*1]\n    add     edx,%%Lentry-%%Lhere\n    add     edx,[esp]\n\tadd\t\tesp,4\n%else\n\tshl     edx,4\n    neg     ecx\n\tlea\t\tedx,[%%Lentry+edx+ecx]\n%endif\n    mov     eax,[esi]\t\t\t;  src low limb \n    mov     [VAR_JUMP],edx\n\tlea\t\tesi,[VAL1+4+esi+ecx*4]\n    mul     ebp\n    add     eax,ebx\t\t\t\t;  initial carry (from _1c) \n    adc     edx,0\n    mov     ebx,edx\t\t\t\t;  high carry \n\tlea\t\tedi,[VAL1+edi+ecx*4]\n    mov     edx,[VAR_JUMP]\n    test    ecx,1\n    mov     ecx,eax\t\t\t\t;  low carry \n\tcmovnz\tecx,ebx\n\tcmovnz\tebx,eax\n    jmp     edx\n\n;  eax scratch \n;  ebx carry hi \n;  ecx carry lo \n;  edx scratch \n;  esi src \n;  edi dst \n;  ebp multiplier \n;\n;  VAR_COUNTER loop counter \n;\n;  15 code bytes per limb \n\n%define\tCHUNK_COUNT\t2 \n\n\talign   32\n%%Ltop: \n\tadd     edi,UNROLL_BYTES\n%%Lentry: \n%assign\tdisp\tVAL2\n%rep\tUNROLL_COUNT/CHUNK_COUNT\n\tmov\t\teax,[byte disp+esi]\n\tmul     ebp\n\t%2\t\t[byte disp+edi],ecx\n    adc     ebx,eax\n    mov     ecx,edx\n    adc     ecx,0\n    mov     eax,[byte disp+4+esi]\n    mul     ebp\n\t%2\t\t[byte disp+4+edi],ebx\n    adc     ecx,eax\n    mov     ebx,edx\n    adc     ebx,0\n%assign\t\tdisp\tdisp+4*CHUNK_COUNT\n%endrep\n\n    dec     dword [VAR_COUNTER]\n    lea     esi,[UNROLL_BYTES+esi]\n    jns     %%Ltop\n\n%assign\tdisp\tUNROLL_BYTES+VAL2\n\t%2\t\t[disp+edi],ecx\n    mov     eax,ebx\n    pop     ebp\n    pop     edi\n    pop     esi\n    pop     ebx\n    adc     eax,0\n    ret\n%endmacro\n\n\tsection .text\n\tglobal ___gmpn_submul_1\n\tglobal ___gmpn_submul_1c\n\t\n\tmul_fun\t___g,sub,mpn_submul_1,mpn_submul_1c\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/add_n.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\"\n\n\tsection .text\n\n\tglobal\t___gmpn_add_nc\n%ifdef\tDLL\n\texport\t___gmpn_add_nc\n%endif\n\n\talign\t8\n___gmpn_add_nc:\n    movd    mm0,[20+esp]\n    mov     eax,[8+esp]\n    mov     [8+esp],ebx\n    mov     ebx,[12+esp]\n    mov     edx,[4+esp]\n    mov     ecx,[16+esp]\n    lea     eax,[eax+ecx*4]\n    lea     ebx,[ebx+ecx*4]\n    lea     edx,[edx+ecx*4]\n    neg     ecx\nt1: movd    mm1,[eax+ecx*4]\n\tmovd    mm2,[ebx+ecx*4]\n\tpaddq   mm1,mm2\n\tpaddq   mm0,mm1\n    movd    [edx+ecx*4],mm0\n    psrlq   mm0,32\n    add     ecx,1\n    jnz     t1\n    movd    eax,mm0\n    mov     ebx,[8+esp]\n    emms\n    ret\n\n\tglobal\t___gmpn_add_n\n%ifdef\tDLL\n\texport\t___gmpn_add_n\n%endif\n\n\talign\t8\n___gmpn_add_n:\t\n\tpxor    mm0,mm0\n    mov     eax,[8+esp]\n    mov     [8+esp],ebx\n    mov     ebx,[12+esp]\n    mov     edx,[4+esp]\n    mov     ecx,[16+esp]\n    lea     eax,[eax+ecx*4]\n    lea     ebx,[ebx+ecx*4]\n    lea     edx,[edx+ecx*4]\n    neg     ecx\n\tjz\t\tt3\nt2:\tmovd    mm1,[eax+ecx*4]\n\tmovd    mm2,[ebx+ecx*4]\n\tpaddq   mm1,mm2\n\tpaddq   mm0,mm1\n    movd    [edx+ecx*4],mm0\n    psrlq   mm0,32\n    add     ecx,1\n    jnz     t2\n    movd    eax,mm0\nt3: mov     ebx,[8+esp]\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/addmul_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_addmul_1\n\tglobal\t___gmpn_addmul_1c\n\n%ifdef\tDLL\n\texport\t___gmpn_addmul_1\n\texport\t___gmpn_addmul_1c\n%endif\n\n\talign\t16\n___gmpn_addmul_1c: \n    movd    mm0,[20+esp]\n    jmp     start_1c\n\n\talign\t16\n___gmpn_addmul_1: \n\tpxor    mm0,mm0\n\nstart_1c: \n    mov     eax,[8+esp]\n    mov     ecx,[12+esp]\n    mov     edx,[4+esp]\n    movd    mm7,[16+esp]\n\noop: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[edx]\n\tpmuludq mm1,mm7\n\tpaddq   mm2,mm1\n\tpaddq   mm0,mm2\n\tsub     ecx,1\n\tmovd    [edx],mm0\n\tpsrlq   mm0,32\n\tlea     edx,[4+edx]\n\tjnz     oop\n\tmovd    eax,mm0\n\temms\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/copyd.asm",
    "content": "\n;  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection\t.text\n\n\tglobal\t___gmpn_copyd\n%ifdef\tDLL\n\texport\t___gmpn_copyd\n%endif\n\n\talign\t8\n___gmpn_copyd:\n\n    mov     ecx,[12+esp]\n    mov     eax,[8+esp]\n    mov     edx,[4+esp]\n    mov     [12+esp],ebx\n    add     ecx,-1\n    js      nd\noop: \n    mov     ebx,[eax+ecx*4]\n    mov     [edx+ecx*4],ebx\n    add     ecx,-1\n    jns     oop\nnd: \n    mov     ebx,[12+esp]\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/copyi.asm",
    "content": "\n;  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"..\\x86i.inc\"\n\n\tsection\t.text\n\n\tglobal\t___gmpn_copyi\n%ifdef\tDLL\n\texport\t___gmpn_copyi\n%endif\n\n\talign\t8\n___gmpn_copyi:\n    mov     ecx, [12+esp]\n    cmp     ecx, 150\n    jg      rm\n    mov     eax, [8+esp]\n    mov     edx, [4+esp]\n    mov     [12+esp],ebx\n    test    ecx,ecx\n    jz      nd\noop: \n    mov     ebx, [eax]\n    lea     eax, [4+eax]\n    add     ecx, -1\n    mov     [edx],ebx\n    lea     edx, [4+edx]\n    jnz     oop\nnd: \n    mov     ebx, [12+esp]\n    ret\nrm:\n    cld\n    mov     eax,esi\n    mov     esi, [8+esp]\n    mov     edx,edi\n    mov     edi, [4+esp]\n    rep movsd\n    mov     esi,eax\n    mov     edi,edx\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/divexact_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\"\n%define\tp_space\t16\n\n\textern\t___gmp_modlimb_invert_table\n\n\tsection .text\n\n\tglobal\t___gmpn_divexact_1\n%ifdef\tDLL\n\texport\t___gmpn_divexact_1\n%endif\n\n\talign\t16\n___gmpn_divexact_1: \n    mov     edx,[12+esp]\n    mov     eax,[8+esp]\n    mov     ecx,[16+esp]\n    sub     edx,1\n    jnz     two_or_more\n    mov     eax,[eax]\n    xor     edx,edx\n    div     ecx\n    mov     ecx,[4+esp]\n    mov     [ecx],eax\n    ret\n    \ntwo_or_more: \n    mov     eax,ecx\n    bsf     ecx,ecx\n    shr     eax,cl\n    movd    mm6,eax\n    movd    mm7,ecx\n    shr     eax,1\n    and     eax,127\n    movzx   eax,byte [___gmp_modlimb_invert_table+eax]\n    movd    mm5,eax\n    movd    mm0,eax\n\tpmuludq mm5,mm5\n\tpmuludq mm5,mm6\n    paddd   mm0,mm0\n    psubd   mm0,mm5\n    pxor    mm5,mm5\n    paddd   mm5,mm0\n\tpmuludq mm0,mm0\n    pcmpeqd mm4,mm4\n    psrlq   mm4,32\n\tpmuludq mm0,mm6\n    paddd   mm5,mm5\n    mov     eax,[8+esp]\n    mov     ecx,[4+esp]\n    pxor    mm1,mm1\n    psubd   mm5,mm0\n    pxor    mm0,mm0\n\ntop: \n    movd    mm2,[eax]\n    movd    mm3,[4+eax]\n    add     eax,4\n    punpckldq mm2,mm3\n    psrlq   mm2,mm7\n    pand    mm2,mm4\n\tpsubq   mm2,mm0\n\tpsubq   mm2,mm1\n    movq    mm0,mm2\n    psrlq   mm0,63\n\tpmuludq mm2,mm5\n    movd    [ecx],mm2\n    add     ecx,4\n    movq    mm1,mm6\n\tpmuludq mm1,mm2\n    psrlq   mm1,32\n    sub     edx,1\n    jnz     top\n\ndone: \n    movd    mm2,[eax]\n    psrlq   mm2,mm7\n\tpsubq   mm2,mm0\n\tpsubq   mm2,mm1\n\tpmuludq mm2,mm5\n    movd    [ecx],mm2\n    emms\n    ret\n    \n    end\n"
  },
  {
    "path": "mpn/x86w/p4/divexact_by3c.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Library General Public License as\n;  published by the Free Software Foundation; either version 2 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Library General Public License for more details.\n; \n;  You should have received a copy of the GNU Library General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection\t.text\n\n\tglobal\t___gmpn_divexact_by3c\t\n%ifdef\tDLL\n\texport\t___gmpn_divexact_by3c\n%endif\n\n\talign\t16\n___gmpn_divexact_by3c: \n    mov     eax,[8+esp]\n    pxor    mm0,mm0\n    movd    mm1,[16+esp]\n    pcmpeqd mm6,mm6\n    movd    mm7,[val]\n    mov     edx,[4+esp]\n    psrlq   mm6,32\n    mov     ecx,[12+esp]\n\ntop: \n    movd    mm2,[eax]\n    add     eax,4\n\tpsubq   mm2,mm0\n\tpsubq   mm2,mm1\n    movq    mm0,mm2\n    psrlq   mm0,63\n\tpmuludq mm2,mm7\n    movd    [edx],mm2\n    add     edx,4\n    movq    mm1,mm6\n    pand    mm1,mm2\n    pand    mm2,mm6\n    psllq   mm1,1\n\tpaddq   mm1,mm2\n    psrlq   mm1,32\n    sub     ecx,1\n    jnz     top\n    paddd   mm0,mm1\n    movd    eax,mm0\n    emms\n    ret\n\n\tsection\t.data\nval:\n\tdd\t0xAAAAAAAB\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/hamdist.asm",
    "content": "\n;  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%macro\tph_fun 1\n%ifdef\tPIC\n    mov\t\t\tedx,0xAAAAAAAA\n    movd\t\tmm7,edx\n    punpckldq\tmm7,mm7\n    mov\t\t\tedx,0x33333333\n    movd\t\tmm6,edx\n    punpckldq\tmm6,mm6\n    mov\t\t\tedx,0x0F0F0F0F\n    movd\t\tmm5,edx\n    punpckldq\tmm5,mm5\n%else\n    movq\t\tmm7,[L_AA]\n    movq\t\tmm6,[L_33]\n    movq\t\tmm5,[L_0F]\n%endif\n    mov\t\t\tecx,[esp+PARAM_SIZE]\n    mov\t\t\teax,[esp+PARAM_SRC]\n%if %1 == 1\n    mov\t\t\tedx,[esp+PARAM_SRC2]\n%endif\n    pxor\t\tmm4,mm4\n    pxor\t\tmm0,mm0\n    sub\t\t\tecx,1\n    ja\t\t\t%%L_top\n\n%%L_last:\n    movd\t\tmm1,[eax+ecx*4]\n%if %1 == 1\n    movd\t\tmm2,[edx+ecx*4]\n    pxor\t\tmm1,mm2\n%endif\n    jmp\t\t\t%%L_loaded\n\n%%L_top:\n    movd\t\tmm1,[eax]\n    movd\t\tmm2,[eax+4]\n    punpckldq\tmm1,mm2\n    add\t\t\teax,8\n%if %1 == 1\n    movd\t\tmm2,[edx]\n    movd\t\tmm3,[edx+4]\n    punpckldq\tmm2,mm3\n    pxor\t\tmm1,mm2\n    add\t\t\tedx,8\n%endif\n%%L_loaded:\n    movq\t\tmm2,mm7\n    pand\t\tmm2,mm1\n    psrlq\t\tmm2,1\n    psubd\t\tmm1,mm2\n    movq\t\tmm2,mm6\n    pand\t\tmm2,mm1\n    psrlq\t\tmm1,2\n    pand\t\tmm1,mm6\n    paddd\t\tmm1,mm2\n    movq\t\tmm2,mm5\n    pand\t\tmm2,mm1\n    psrlq\t\tmm1,4\n    pand\t\tmm1,mm5\n    paddd\t\tmm1,mm2\n    psadbw\t\tmm1,mm4\n    paddd\t\tmm0,mm1\n    sub\t\t\tecx,2\n    jg\t\t\t%%L_top\n    jz\t\t\t%%L_last\n    movd\t\teax,mm0\n    emms\n    ret\n%endmacro\n\n%ifndef PIC\n    data\n    align 8\n\nL_AA:\tdq\t0xAAAAAAAAAAAAAAAA\nL_33:\tdq\t0x3333333333333333\nL_0F:\tdq\t0x0F0F0F0F0F0F0F0F\n%endif\n\n    section .text\n\n%define\tPARAM_SIZE\t12\n%define\tPARAM_SRC2\t 8\n%define\tPARAM_SRC\t 4\n    global\t___gmpn_hamdist\n%ifdef\tDLL\n    export\t___gmpn_hamdist\n%endif\n    align   16\n___gmpn_hamdist:\n    ph_fun\t1\n\n    end\n"
  },
  {
    "path": "mpn/x86w/p4/lshift.asm",
    "content": "\n;  Copyright 2001 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n%define\tPARAM_SHIFT esp+frame+16 \n%define PARAM_SIZE  esp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n%define\tframe\t\t8 \n\n;   minimum 5,because the unrolled loop can't handle less \n%define       UNROLL_THRESHOLD  5 \n\n\tsection .text\n\n\tglobal\t___gmpn_lshift\n%ifdef\tDLL\n\texport\t___gmpn_lshift\n%endif\n\n\talign   8\n___gmpn_lshift:\n    push    ebx\n    push    edi\n    mov     eax,[PARAM_SIZE]\n    mov     edx,[PARAM_DST]\n    mov     ebx,[PARAM_SRC]\n    mov     ecx,[PARAM_SHIFT]\n\tcmp     eax,UNROLL_THRESHOLD\n    jae     Lunroll\n    mov     edi,[-4+ebx+eax*4]\t;  src high limb \n    dec     eax\n    jnz     Lsimple\n\tshld\teax,edi,cl\n    shl     edi,cl\n    mov     [edx],edi\t\t\t;  dst low limb \n    pop     edi\t\t\t\t\t;  risk of data cache bank clash \n    pop     ebx\n    ret\n\n;  eax size-1 \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\nLsimple: \n    movd    mm5,[ebx+eax*4] ;  src high limb \n    movd    mm6,ecx         ;  lshift \n    neg     ecx\n    psllq   mm5,mm6\n    add     ecx,32\n    movd    mm7,ecx\n    psrlq   mm5,32          ;  retval \n\n;  eax counter,limbs,negative \n;  ebx src \n;  ecx \n;  edx dst \n;  esi \n;  edi \n; \n;  mm0 scratch \n;  mm5 return value \n;  mm6 shift \n;  mm7 32-shift \n\nLsimple_top: \n    movq    mm0,[ebx+eax*4-4]\n    dec     eax\n    psrlq   mm0,mm7\n    movd    [4+edx+eax*4],mm0\n    jnz     Lsimple_top\n    movd    mm0,[ebx]\n    movd    eax,mm5\n    psllq   mm0,mm6\n    pop     edi\n    pop     ebx\n    movd    [edx],mm0\n    emms\n    ret\n\n;  eax size \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\n\talign   8\nLunroll: \n    movd    mm5,[ebx+eax*4-4]\t;  src high limb \n    lea     edi,[ebx+eax*4]\n    movd    mm6,ecx\t\t\t\t;  lshift \n    and     edi,4\n    psllq   mm5,mm6\n    jz      Lstart_src_aligned\n\n;  src isn't aligned,process high limb separately (marked xxx) to \n;  make it so. \n; \n;   source     -8(ebx,%eax,4) \n;                   | \n;   +-------+-------+-------+-- \n;   |               |           \n;   +-------+-------+-------+-- \n;         0mod8   4mod8   0mod8 \n; \n;   dest \n;      -4(edx,%eax,4) \n;           | \n;   +-------+-------+-- \n;   |  xxx  |       |   \n;   +-------+-------+-- \n\n    movq    mm0,[ebx+eax*4-8]\t\t;  unaligned load \n    psllq   mm0,mm6\n    dec     eax\n    psrlq   mm0,32\n    movd    [edx+eax*4],mm0\nLstart_src_aligned: \n    movq    mm1,[ebx+eax*4-8]\t\t;  src high qword \n    lea     edi,[edx+eax*4]\n    and     edi,4\n    psrlq   mm5,32\t\t\t\t\t;  return value \n    movq    mm3,[ebx+eax*4-16]\t\t;  src second highest qword \n    jz      Lstart_dst_aligned\n\n;  dst isn't aligned,subtract 4 to make it so,and pretend the shift \n;  is 32 bits extra.  High limb of dst (marked xxx) handled here \n;  separately. \n; \n;   source     -8(ebx,%eax,4) \n;                   | \n;   +-------+-------+-- \n;   |      mm1      |   \n;   +-------+-------+-- \n;                 0mod8   4mod8 \n; \n;   dest \n;      -4(edx,%eax,4) \n;           | \n;   +-------+-------+-------+-- \n;   |  xxx  |               | \n;   +-------+-------+-------+-- \n;         0mod8   4mod8   0mod8 \n\n    movq    mm0,mm1\n    add     ecx,32         ;  new shift \n    psllq   mm0,mm6\n    movd    mm6,ecx\n    psrlq   mm0,32\n\n;  wasted cycle here waiting for %mm0 \n\n    movd    [-4+edx+eax*4],mm0\n    sub     edx,4\nLstart_dst_aligned: \n\n    psllq   mm1,mm6\n    neg     ecx\t\t\t\t;  -shift \n    add     ecx,64\t\t\t;  64-shift \n    movq    mm2,mm3\n    movd    mm7,ecx\n    sub     eax,8\t\t\t;  size-8 \n    psrlq   mm3,mm7\n    por     mm3,mm1         ;  mm3 ready to store \n    jc      Lfinish\n\n;  The comments in mpn_rshift apply here too. \n\n;  eax counter,limbs \n;  ebx src \n;  ecx \n;  edx dst \n;  esi \n;  edi \n; \n;  mm0 \n;  mm1 \n;  mm2 src qword from 16(%ebx,%eax,4) \n;  mm3 dst qword ready to store to 24(%edx,%eax,4) \n; \n;  mm5 return value \n;  mm6 lshift \n;  mm7 rshift \n\n\talign   8\nLunroll_loop: \n    movq    mm0,[ebx+eax*4+8]\n    psllq   mm2,mm6\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    movq    [24+edx+eax*4],mm3\n    por     mm0,mm2\n    movq    mm3,[ebx+eax*4]\n    psllq   mm1,mm6\n    movq    [16+edx+eax*4],mm0\n    movq    mm2,mm3 \n\tpsrlq   mm3,mm7\n    sub     eax,4\n\tpor     mm3,mm1\n    jnc     Lunroll_loop\nLfinish: \n;  eax -4 to -1 representing respectively 0 to 3 limbs remaining \n\n    test    al,2\n    jz      Lfinish_no_two\n    movq    mm0,[ebx+eax*4+8]\n    psllq   mm2,mm6\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    movq    [24+edx+eax*4],mm3  ;  prev \n    por     mm0,mm2\n    movq    mm2,mm1\n    movq    mm3,mm0\n    sub     eax,2\nLfinish_no_two: \n\n;  eax -4 or -3 representing respectively 0 or 1 limbs remaining \n;  mm2 src prev qword,from 16(%ebx,%eax,4) \n;  mm3 dst qword,for 24(%edx,%eax,4) \n\n    test    al,1\n    movd    eax,mm5  ;  retval \n    pop     edi\n    jz      Lfinish_zero\n\n;  One extra src limb,destination was aligned. \n;\n;                  source                  ebx \n;                  --+---------------+-------+ \n;                    |      mm2      |       | \n;                  --+---------------+-------+ \n;\n;  dest         edx+12           edx+4     edx \n;  --+---------------+---------------+-------+ \n;    |      mm3      |               |       | \n;  --+---------------+---------------+-------+ \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  One extra src limb,destination was unaligned. \n;\n;                  source                  ebx \n;                  --+---------------+-------+ \n;                    |      mm2      |       | \n;                  --+---------------+-------+ \n;\n;          dest         edx+12           edx+4 \n;          --+---------------+---------------+ \n;            |      mm3      |               | \n;          --+---------------+---------------+ \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n\n;  In both cases there's one extra limb of src to fetch and combine \n;  with mm2 to make a qword at 4(%edx),and in the aligned case \n;  there's an extra limb of dst to be formed from that extra src limb \n;  left shifted. \n\n    movd    mm0,[ebx]\n    psllq   mm2,mm6\n    movq    [12+edx],mm3\n    psllq   mm0,32\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    por     mm0,mm2\n    psllq   mm1,mm6\n    movq    [4+edx],mm0\n    psrlq   mm1,32\n    and     ecx,32\n    pop     ebx\n    jz      Lfinish_one_unaligned\n    movd    [edx],mm1\nLfinish_one_unaligned: \n    emms\n    ret\nLfinish_zero: \n\n;  No extra src limbs,destination was aligned. \n;\n;                  source          ebx \n;                  --+---------------+ \n;                    |      mm2      | \n;                  --+---------------+ \n;\n;  dest          edx+8             edx \n;  --+---------------+---------------+ \n;    |      mm3      |               | \n;  --+---------------+---------------+ \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  No extra src limbs,destination was unaligned. \n;\n;                source            ebx \n;                  --+---------------+ \n;                    |      mm2      | \n;                  --+---------------+ \n;\n;          dest          edx+8   edx+4 \n;          --+---------------+-------+ \n;            |      mm3      |       | \n;          --+---------------+-------+ \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n;  The movd for the unaligned case writes the same data to 4(%edx) \n;  that the movq does for the aligned case. \n\n    movq    [8+edx],mm3\n    and     ecx,32\n    psllq   mm2,mm6\n    jz      Lfinish_zero_unaligned\n    movq    [edx],mm2\nLfinish_zero_unaligned: \n    psrlq   mm2,32\n    pop     ebx\n    movd    eax,mm5  ;  retval \n    movd    [4+edx],mm2\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/mmx/hamdist.asm",
    "content": "\n;  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%macro\tph_fun 1\n%ifdef\tPIC\n    mov\t\t\tedx,0xAAAAAAAA\n    movd\t\tmm7,edx\n    punpckldq\tmm7,mm7\n    mov\t\t\tedx,0x33333333\n    movd\t\tmm6,edx\n    punpckldq\tmm6,mm6\n    mov\t\t\tedx,0x0F0F0F0F\n    movd\t\tmm5,edx\n    punpckldq\tmm5,mm5\n%else\n    movq\t\tmm7,[L_AA]\n    movq\t\tmm6,[L_33]\n    movq\t\tmm5,[L_0F]\n%endif\n    mov\t\t\tecx,[esp+PARAM_SIZE]\n    mov\t\t\teax,[esp+PARAM_SRC]\n%if %1 == 1\n    mov\t\t\tedx,[esp+PARAM_SRC2]\n%endif\n    pxor\t\tmm4,mm4\n    pxor\t\tmm0,mm0\n    sub\t\t\tecx,1\n    ja\t\t\t%%L_top\n\n%%L_last:\n    movd\t\tmm1,[eax+ecx*4]\n%if %1 == 1\n    movd\t\tmm2,[edx+ecx*4]\n    pxor\t\tmm1,mm2\n%endif\n    jmp\t\t\t%%L_loaded\n\n%%L_top:\n    movd\t\tmm1,[eax]\n    movd\t\tmm2,[eax+4]\n    punpckldq\tmm1,mm2\n    add\t\t\teax,8\n%if %1 == 1\n    movd\t\tmm2,[edx]\n    movd\t\tmm3,[edx+4]\n    punpckldq\tmm2,mm3\n    pxor\t\tmm1,mm2\n    add\t\t\tedx,8\n%endif\n%%L_loaded:\n    movq\t\tmm2,mm7\n    pand\t\tmm2,mm1\n    psrlq\t\tmm2,1\n    psubd\t\tmm1,mm2\n    movq\t\tmm2,mm6\n    pand\t\tmm2,mm1\n    psrlq\t\tmm1,2\n    pand\t\tmm1,mm6\n    paddd\t\tmm1,mm2\n    movq\t\tmm2,mm5\n    pand\t\tmm2,mm1\n    psrlq\t\tmm1,4\n    pand\t\tmm1,mm5\n    paddd\t\tmm1,mm2\n    psadbw\t\tmm1,mm4\n    paddd\t\tmm0,mm1\n    sub\t\t\tecx,2\n    jg\t\t\t%%L_top\n    jz\t\t\t%%L_last\n    movd\t\teax,mm0\n    emms\n    ret\n%endmacro\n\n%ifndef PIC\n    data\n    align 8\n\nL_AA:\tdq\t0xAAAAAAAAAAAAAAAA\nL_33:\tdq\t0x3333333333333333\nL_0F:\tdq\t0x0F0F0F0F0F0F0F0F\n%endif\n\n    section .text\n\n%define\tPARAM_SIZE\t12\n%define\tPARAM_SRC2\t 8\n%define\tPARAM_SRC\t 4\n    global\t___gmpn_hamdist\n%ifdef\tDLL\n    export\t___gmpn_hamdist\n%endif\n    align   16\n___gmpn_hamdist:\n    ph_fun\t1\n\n    end\n"
  },
  {
    "path": "mpn/x86w/p4/mmx/lshift.asm",
    "content": "\n;  Copyright 2001 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\" \n\n%define\tPARAM_SHIFT esp+frame+16 \n%define PARAM_SIZE  esp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n%define\tframe\t\t8 \n\n;   minimum 5,because the unrolled loop can't handle less \n%define       UNROLL_THRESHOLD  5 \n\n\tsection .text\n\n\tglobal\t___gmpn_lshift\n%ifdef\tDLL\n\texport\t___gmpn_lshift\n%endif\n\n\talign   8\n___gmpn_lshift:\n    push    ebx\n    push    edi\n    mov     eax,[PARAM_SIZE]\n    mov     edx,[PARAM_DST]\n    mov     ebx,[PARAM_SRC]\n    mov     ecx,[PARAM_SHIFT]\n\tcmp     eax,UNROLL_THRESHOLD\n    jae     Lunroll\n    mov     edi,[-4+ebx+eax*4]\t;  src high limb \n    dec     eax\n    jnz     Lsimple\n\tshld\teax,edi,cl\n    shl     edi,cl\n    mov     [edx],edi\t\t\t;  dst low limb \n    pop     edi\t\t\t\t\t;  risk of data cache bank clash \n    pop     ebx\n    ret\n\n;  eax size-1 \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\nLsimple: \n    movd    mm5,[ebx+eax*4] ;  src high limb \n    movd    mm6,ecx         ;  lshift \n    neg     ecx\n    psllq   mm5,mm6\n    add     ecx,32\n    movd    mm7,ecx\n    psrlq   mm5,32          ;  retval \n\n;  eax counter,limbs,negative \n;  ebx src \n;  ecx \n;  edx dst \n;  esi \n;  edi \n; \n;  mm0 scratch \n;  mm5 return value \n;  mm6 shift \n;  mm7 32-shift \n\nLsimple_top: \n    movq    mm0,[ebx+eax*4-4]\n    dec     eax\n    psrlq   mm0,mm7\n    movd    [4+edx+eax*4],mm0\n    jnz     Lsimple_top\n    movd    mm0,[ebx]\n    movd    eax,mm5\n    psllq   mm0,mm6\n    pop     edi\n    pop     ebx\n    movd    [edx],mm0\n    emms\n    ret\n\n;  eax size \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\n\talign   8\nLunroll: \n    movd    mm5,[ebx+eax*4-4]\t;  src high limb \n    lea     edi,[ebx+eax*4]\n    movd    mm6,ecx\t\t\t\t;  lshift \n    and     edi,4\n    psllq   mm5,mm6\n    jz      Lstart_src_aligned\n\n;  src isn't aligned,process high limb separately (marked xxx) to \n;  make it so. \n; \n;   source     -8(ebx,%eax,4) \n;                   | \n;   +-------+-------+-------+-- \n;   |               |           \n;   +-------+-------+-------+-- \n;         0mod8   4mod8   0mod8 \n; \n;   dest \n;      -4(edx,%eax,4) \n;           | \n;   +-------+-------+-- \n;   |  xxx  |       |   \n;   +-------+-------+-- \n\n    movq    mm0,[ebx+eax*4-8]\t\t;  unaligned load \n    psllq   mm0,mm6\n    dec     eax\n    psrlq   mm0,32\n    movd    [edx+eax*4],mm0\nLstart_src_aligned: \n    movq    mm1,[ebx+eax*4-8]\t\t;  src high qword \n    lea     edi,[edx+eax*4]\n    and     edi,4\n    psrlq   mm5,32\t\t\t\t\t;  return value \n    movq    mm3,[ebx+eax*4-16]\t\t;  src second highest qword \n    jz      Lstart_dst_aligned\n\n;  dst isn't aligned,subtract 4 to make it so,and pretend the shift \n;  is 32 bits extra.  High limb of dst (marked xxx) handled here \n;  separately. \n; \n;   source     -8(ebx,%eax,4) \n;                   | \n;   +-------+-------+-- \n;   |      mm1      |   \n;   +-------+-------+-- \n;                 0mod8   4mod8 \n; \n;   dest \n;      -4(edx,%eax,4) \n;           | \n;   +-------+-------+-------+-- \n;   |  xxx  |               | \n;   +-------+-------+-------+-- \n;         0mod8   4mod8   0mod8 \n\n    movq    mm0,mm1\n    add     ecx,32         ;  new shift \n    psllq   mm0,mm6\n    movd    mm6,ecx\n    psrlq   mm0,32\n\n;  wasted cycle here waiting for %mm0 \n\n    movd    [-4+edx+eax*4],mm0\n    sub     edx,4\nLstart_dst_aligned: \n\n    psllq   mm1,mm6\n    neg     ecx\t\t\t\t;  -shift \n    add     ecx,64\t\t\t;  64-shift \n    movq    mm2,mm3\n    movd    mm7,ecx\n    sub     eax,8\t\t\t;  size-8 \n    psrlq   mm3,mm7\n    por     mm3,mm1         ;  mm3 ready to store \n    jc      Lfinish\n\n;  The comments in mpn_rshift apply here too. \n\n;  eax counter,limbs \n;  ebx src \n;  ecx \n;  edx dst \n;  esi \n;  edi \n; \n;  mm0 \n;  mm1 \n;  mm2 src qword from 16(%ebx,%eax,4) \n;  mm3 dst qword ready to store to 24(%edx,%eax,4) \n; \n;  mm5 return value \n;  mm6 lshift \n;  mm7 rshift \n\n\talign   8\nLunroll_loop: \n    movq    mm0,[ebx+eax*4+8]\n    psllq   mm2,mm6\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    movq    [24+edx+eax*4],mm3\n    por     mm0,mm2\n    movq    mm3,[ebx+eax*4]\n    psllq   mm1,mm6\n    movq    [16+edx+eax*4],mm0\n    movq    mm2,mm3 \n\tpsrlq   mm3,mm7\n    sub     eax,4\n\tpor     mm3,mm1\n    jnc     Lunroll_loop\nLfinish: \n;  eax -4 to -1 representing respectively 0 to 3 limbs remaining \n\n    test    al,2\n    jz      Lfinish_no_two\n    movq    mm0,[ebx+eax*4+8]\n    psllq   mm2,mm6\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    movq    [24+edx+eax*4],mm3  ;  prev \n    por     mm0,mm2\n    movq    mm2,mm1\n    movq    mm3,mm0\n    sub     eax,2\nLfinish_no_two: \n\n;  eax -4 or -3 representing respectively 0 or 1 limbs remaining \n;  mm2 src prev qword,from 16(%ebx,%eax,4) \n;  mm3 dst qword,for 24(%edx,%eax,4) \n\n    test    al,1\n    movd    eax,mm5  ;  retval \n    pop     edi\n    jz      Lfinish_zero\n\n;  One extra src limb,destination was aligned. \n;\n;                  source                  ebx \n;                  --+---------------+-------+ \n;                    |      mm2      |       | \n;                  --+---------------+-------+ \n;\n;  dest         edx+12           edx+4     edx \n;  --+---------------+---------------+-------+ \n;    |      mm3      |               |       | \n;  --+---------------+---------------+-------+ \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  One extra src limb,destination was unaligned. \n;\n;                  source                  ebx \n;                  --+---------------+-------+ \n;                    |      mm2      |       | \n;                  --+---------------+-------+ \n;\n;          dest         edx+12           edx+4 \n;          --+---------------+---------------+ \n;            |      mm3      |               | \n;          --+---------------+---------------+ \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n\n;  In both cases there's one extra limb of src to fetch and combine \n;  with mm2 to make a qword at 4(%edx),and in the aligned case \n;  there's an extra limb of dst to be formed from that extra src limb \n;  left shifted. \n\n    movd    mm0,[ebx]\n    psllq   mm2,mm6\n    movq    [12+edx],mm3\n    psllq   mm0,32\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    por     mm0,mm2\n    psllq   mm1,mm6\n    movq    [4+edx],mm0\n    psrlq   mm1,32\n    and     ecx,32\n    pop     ebx\n    jz      Lfinish_one_unaligned\n    movd    [edx],mm1\nLfinish_one_unaligned: \n    emms\n    ret\nLfinish_zero: \n\n;  No extra src limbs,destination was aligned. \n;\n;                  source          ebx \n;                  --+---------------+ \n;                    |      mm2      | \n;                  --+---------------+ \n;\n;  dest          edx+8             edx \n;  --+---------------+---------------+ \n;    |      mm3      |               | \n;  --+---------------+---------------+ \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  No extra src limbs,destination was unaligned. \n;\n;                source            ebx \n;                  --+---------------+ \n;                    |      mm2      | \n;                  --+---------------+ \n;\n;          dest          edx+8   edx+4 \n;          --+---------------+-------+ \n;            |      mm3      |       | \n;          --+---------------+-------+ \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n;  The movd for the unaligned case writes the same data to 4(%edx) \n;  that the movq does for the aligned case. \n\n    movq    [8+edx],mm3\n    and     ecx,32\n    psllq   mm2,mm6\n    jz      Lfinish_zero_unaligned\n    movq    [edx],mm2\nLfinish_zero_unaligned: \n    psrlq   mm2,32\n    pop     ebx\n    movd    eax,mm5  ;  retval \n    movd    [4+edx],mm2\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/mmx/popcount.asm",
    "content": "\n;  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%macro\tph_fun 1\n%ifdef\tPIC\n    mov\t\t\tedx,0xAAAAAAAA\n    movd\t\tmm7,edx\n    punpckldq\tmm7,mm7\n    mov\t\t\tedx,0x33333333\n    movd\t\tmm6,edx\n    punpckldq\tmm6,mm6\n    mov\t\t\tedx,0x0F0F0F0F\n    movd\t\tmm5,edx\n    punpckldq\tmm5,mm5\n%else\n    movq\t\tmm7,[L_AA]\n    movq\t\tmm6,[L_33]\n    movq\t\tmm5,[L_0F]\n%endif\n    mov\t\t\tecx,[esp+PARAM_SIZE]\n    mov\t\t\teax,[esp+PARAM_SRC]\n%if %1 == 1\n    mov\t\t\tedx,[esp+PARAM_SRC2]\n%endif\n    pxor\t\tmm4,mm4\n    pxor\t\tmm0,mm0\n    sub\t\t\tecx,1\n    ja\t\t\t%%L_top\n\n%%L_last:\n    movd\t\tmm1,[eax+ecx*4]\n%if %1 == 1\n    movd\t\tmm2,[edx+ecx*4]\n    pxor\t\tmm1,mm2\n%endif\n    jmp\t\t\t%%L_loaded\n\n%%L_top:\n    movd\t\tmm1,[eax]\n    movd\t\tmm2,[eax+4]\n    punpckldq\tmm1,mm2\n    add\t\t\teax,8\n%if %1 == 1\n    movd\t\tmm2,[edx]\n    movd\t\tmm3,[edx+4]\n    punpckldq\tmm2,mm3\n    pxor\t\tmm1,mm2\n    add\t\t\tedx,8\n%endif\n%%L_loaded:\n    movq\t\tmm2,mm7\n    pand\t\tmm2,mm1\n    psrlq\t\tmm2,1\n    psubd\t\tmm1,mm2\n    movq\t\tmm2,mm6\n    pand\t\tmm2,mm1\n    psrlq\t\tmm1,2\n    pand\t\tmm1,mm6\n    paddd\t\tmm1,mm2\n    movq\t\tmm2,mm5\n    pand\t\tmm2,mm1\n    psrlq\t\tmm1,4\n    pand\t\tmm1,mm5\n    paddd\t\tmm1,mm2\n    psadbw\t\tmm1,mm4\n    paddd\t\tmm0,mm1\n    sub\t\t\tecx,2\n    jg\t\t\t%%L_top\n    jz\t\t\t%%L_last\n    movd\t\teax,mm0\n    emms\n    ret\n%endmacro\n\n%ifndef PIC\n    data\n    align 8\n\nL_AA:\tdq\t0xAAAAAAAAAAAAAAAA\nL_33:\tdq\t0x3333333333333333\nL_0F:\tdq\t0x0F0F0F0F0F0F0F0F\n%endif\n\n    section .text\n\n%define\tPARAM_SIZE\t 8\n%define PARAM_SRC\t 4\n    global\t___gmpn_popcount\n%ifdef\tDLL\n    export\t___gmpn_popcount\n%endif\n    align   16\n___gmpn_popcount:\n    ph_fun\t0\n\n    end\n"
  },
  {
    "path": "mpn/x86w/p4/mmx/rshift.asm",
    "content": "\n;  Copyright 2001 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\" \n\n%define\tPARAM_SHIFT esp+frame+16 \n%define PARAM_SIZE  esp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n%define frame\t\t8 \n\n;   Minimum 5,because the unrolled loop can't handle less. \n%define\tUNROLL_THRESHOLD  5 \n\n\tsection .text\n\t\n\tglobal\t___gmpn_rshift\n%ifdef\tDLL\n\texport\t___gmpn_rshift\n%endif\n\n\talign   8\n___gmpn_rshift:\n    push    ebx\n    push    edi\n    mov     eax,[PARAM_SIZE]\n    mov     edx,[PARAM_DST]\n    mov     ebx,[PARAM_SRC]\n    mov     ecx,[PARAM_SHIFT]\n\tcmp     eax,UNROLL_THRESHOLD\n\tjae     Lunroll\n    dec     eax\n    mov     edi,[ebx]\t\t;  src low limb \n    jnz     Lsimple\n\tshrd\teax,edi,cl\n    shr     edi,cl\n    mov     [edx],edi       ;  dst low limb \n    pop     edi             ;  risk of data cache bank clash \n    pop     ebx\n    ret\n\n;  eax size-1 \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\n\talign   8\nLsimple: \n    movd    mm5,[ebx]       ;  src[0] \n    lea     ebx,[ebx+eax*4]  ;  &src[size-1] \n    movd    mm6,ecx         ;  rshift \n    lea     edx,[-4+edx+eax*4] ;  &dst[size-2] \n    psllq   mm5,32\n    neg     eax\n\n;  This loop is 5 or 8 cycles,with every second load unaligned and a wasted \n;  cycle waiting for the mm0 result to be ready.  For comparison a shrdl is 4 \n;  cycles and would be 8 in a simple loop.  Using mmx helps the return value \n;  and last limb calculations too. \n\n;  eax counter,limbs,negative \n;  ebx &src[size-1] \n;  ecx return value \n;  edx &dst[size-2] \n;\n;  mm0 scratch \n;  mm5 return value \n;  mm6 shift \n\nLsimple_top: \n    movq    mm0,[ebx+eax*4]\n    inc     eax\n    psrlq   mm0,mm6\n    movd    [edx+eax*4],mm0\n    jnz     Lsimple_top\n    movd    mm0,[ebx]\n    psrlq   mm5,mm6         ;  return value \n    psrlq   mm0,mm6\n    pop     edi\n    movd    eax,mm5\n    pop     ebx\n    movd    [4+edx],mm0\n    emms\n    ret\n\n;  eax size \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\n\talign   8\nLunroll: \n    movd    mm5,[ebx]       ;  src[0] \n    mov     edi,4\n    movd    mm6,ecx         ;  rshift \n    test    ebx,edi\n    psllq   mm5,32\n    jz      Lstart_src_aligned\n\n;  src isn't aligned,process low limb separately (marked xxx) and \n;  step src and dst by one limb,making src aligned. \n;\n;  source                  ebx \n;  --+-------+-------+-------+ \n;            |          xxx  | \n;  --+-------+-------+-------+ \n;          4mod8   0mod8   4mod8 \n;\n;          dest            edx \n;          --+-------+-------+ \n;            |       |  xxx  |   \n;          --+-------+-------+ \n\n    movq    mm0,[ebx]       ;  unaligned load \n    psrlq   mm0,mm6\n    add     ebx,4\n    dec     eax\n    movd    [edx],mm0\n    add     edx,4\nLstart_src_aligned: \n    movq    mm1,[ebx]\n    test    edx,edi\n    psrlq   mm5,mm6         ;  retval \n    jz      Lstart_dst_aligned\n\n;  dst isn't aligned,add 4 to make it so,and pretend the shift is \n;  32 bits extra.  Low limb of dst (marked xxx) handled here \n;  separately. \n;\n;           source          ebx \n;           --+-------+-------+ \n;             |      mm1      | \n;           --+-------+-------+ \n;                   4mod8   0mod8 \n;\n;   dest                    edx \n;   --+-------+-------+-------+ \n;                     |  xxx  |         \n;   --+-------+-------+-------+ \n;           4mod8   0mod8   4mod8 \n\n    movq    mm0,mm1\n    add     ecx,32         ;  new shift \n    psrlq   mm0,mm6\n    movd    mm6,ecx\n    movd    [edx],mm0\n    add     edx,4\nLstart_dst_aligned: \n    movq    mm3,[8+ebx]\n    neg     ecx\n    movq    mm2,mm3\t\t\t;  mm2 src qword \n    add     ecx,64\n    movd    mm7,ecx\n    psrlq   mm1,mm6\n    lea     ebx,[-12+ebx+eax*4]\n    lea     edx,[-20+edx+eax*4]\n    psllq   mm3,mm7\n    sub     eax,7\t\t\t;  size-7 \n    por     mm3,mm1         ;  mm3 ready to store \n    neg     eax             ;  -(size-7) \n    jns     Lfinish\n\n;  This loop is the important bit,the rest is just support.  Careful \n;  instruction scheduling achieves the claimed 1.75 c/l.  The \n;  relevant parts of the pairing rules are: \n;\n;  - mmx loads and stores execute only in the U pipe \n;  - only one mmx shift in a pair \n;  - wait one cycle before storing an mmx register result \n;  - the usual address generation interlock \n;\n;  Two qword calculations are slightly interleaved.  The instructions \n;  marked \"C\" belong to the second qword,and the \"C prev\" one is for \n;  the second qword from the previous iteration. \n\n;  eax counter,limbs,negative \n;  ebx &src[size-12] \n;  ecx \n;  edx &dst[size-12] \n;  esi \n;  edi \n;\n;  mm0 \n;  mm1 \n;  mm2 src qword from -8(%ebx,%eax,4) \n;  mm3 dst qword ready to store to -8(%edx,%eax,4) \n;\n;  mm5 return value \n;  mm6 rshift \n;  mm7 lshift \n\n\talign   8\nLunroll_loop: \n    movq    mm0,[ebx+eax*4]\n    psrlq   mm2,mm6\n    movq    mm1,mm0\n    psllq   mm0,mm7\n    movq    [-8+edx+eax*4],mm3\n    por     mm0,mm2\n\n\tmovq    mm3,[ebx+eax*4+8]\n\tpsrlq   mm1,mm6\n    movq    [edx+eax*4],mm0\n\tmovq    mm2,mm3\n\tpsllq   mm3,mm7\n    add     eax,4\n\tpor     mm3,mm1\n    js      Lunroll_loop\n\nLfinish: \n;  eax 0 to 3 representing respectively 3 to 0 limbs remaining \n\n    test    al,2\n    jnz     Lfinish_no_two\n    movq    mm0,[ebx+eax*4]\n    psrlq   mm2,mm6\n    movq    mm1,mm0\n    psllq   mm0,mm7\n    movq    [-8+edx+eax*4],mm3  ;  prev \n    por     mm0,mm2\n    movq    mm2,mm1\n    movq    mm3,mm0\n    add     eax,2\nLfinish_no_two: \n\n;  eax 2 or 3 representing respectively 1 or 0 limbs remaining \n;\n;  mm2 src prev qword,from -8(%ebx,%eax,4) \n;  mm3 dst qword,for -8(%edx,%eax,4) \n\n    test    al,1\n    pop     edi\n    movd    eax,mm5  ;  retval \n    jnz     Lfinish_zero\n\n;  One extra limb,destination was aligned. \n;\n;  source                ebx \n;  +-------+---------------+-- \n;  |       |      mm2      | \n;  +-------+---------------+-- \n;\n;  dest                                  edx \n;  +-------+---------------+---------------+-- \n;  |       |               |      mm3      | \n;  +-------+---------------+---------------+-- \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  One extra limb,destination was unaligned. \n;\n;  source                ebx \n;  +-------+---------------+-- \n;  |       |      mm2      | \n;  +-------+---------------+-- \n;\n;  dest                          edx \n;  +---------------+---------------+-- \n;  |               |      mm3      | \n;  +---------------+---------------+-- \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n;  In both cases there's one extra limb of src to fetch and combine \n;  with mm2 to make a qword at 8(%edx),and in the aligned case \n;  there's a further extra limb of dst to be formed. \n\n\n    movd    mm0,[8+ebx]\n    psrlq   mm2,mm6\n    movq    mm1,mm0\n    psllq   mm0,mm7\n    movq    [edx],mm3\n    por     mm0,mm2\n    psrlq   mm1,mm6\n    and     ecx,32\n    pop     ebx\n    jz      Lfinish_one_unaligned\n\n    ;  dst was aligned,must store one extra limb \n    movd    [16+edx],mm1\nLfinish_one_unaligned: \n\n    movq    [8+edx],mm0\n    emms\n    ret\nLfinish_zero: \n\n;  No extra limbs,destination was aligned. \n;\n;  source        ebx \n;  +---------------+-- \n;  |      mm2      | \n;  +---------------+-- \n;\n;  dest                        edx+4 \n;  +---------------+---------------+-- \n;  |               |      mm3      | \n;  +---------------+---------------+-- \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  No extra limbs,destination was unaligned. \n;\n;  source        ebx \n;  +---------------+-- \n;  |      mm2      | \n;  +---------------+-- \n;\n;  dest                edx+4 \n;  +-------+---------------+-- \n;  |       |      mm3      | \n;  +-------+---------------+-- \n;\n;  mm6 = shift+32 \n;  mm7 = 64-(shift+32) \n\n;  The movd for the unaligned case is clearly the same data as the \n;  movq for the aligned case,it's just a choice between whether one \n;  or two limbs should be written. \n\n    movq    [4+edx],mm3\n    psrlq   mm2,mm6\n    movd    [12+edx],mm2\n    and     ecx,32\n    pop     ebx\n    jz      Lfinish_zero_unaligned\n    movq    [12+edx],mm2\nLfinish_zero_unaligned: \n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/mod_34lsub1.asm",
    "content": "\n;  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection\t.text\n\n\tglobal\t___gmpn_mod_34lsub1\n%ifdef\tDLL\n\texport\t___gmpn_mod_34lsub1\n%endif\n\n\talign\t16\n___gmpn_mod_34lsub1: \n    mov     ecx,[8+esp]\n    mov     edx,[4+esp]\n    mov     eax,[edx]\n    sub     ecx,2\n    ja      three_or_more\n    jne     one\n    mov     edx,[4+edx]\n    mov     ecx,eax\n    shr     eax,24\n    and     ecx,0x00FFFFFF\n    add     eax,ecx\n    mov     ecx,edx\n    shl     edx,8\n    shr     ecx,16\n    add     eax,ecx\n    and     edx,0x00FFFF00\n    add     eax,edx\none:ret\n\nthree_or_more: \n    pxor    mm0,mm0\n    pxor    mm1,mm1\n    pxor    mm2,mm2\n    pcmpeqd mm7,mm7\n    psrlq   mm7,32\n    pcmpeqd mm6,mm6\n    psrlq   mm6,40\n\ntop: \n\tmovd    mm3,[edx]\n\tpaddq   mm0,mm3\n\tmovd    mm3,[4+edx]\n\tpaddq   mm1,mm3\n    movd    mm3,[8+edx]\n\tpaddq   mm2,mm3\n    add     edx,12\n    sub     ecx,3\n    ja      top\n    add     ecx,1\n    js      combine\n    movd    mm3,[edx]\n\tpaddq   mm0,mm3\n\tjz      combine\n\tmovd    mm3,[4+edx]\n\tpaddq   mm1,mm3\n\ncombine: \n    movq    mm3,mm7\n    pand    mm3,mm0\n    movq    mm4,mm7\n    pand    mm4,mm1\n    movq    mm5,mm7\n    pand    mm5,mm2\n    psrlq   mm0,32\n    psrlq   mm1,32\n    psrlq   mm2,32\n\tpaddq   mm4,mm0\n\tpaddq   mm5,mm1\n\tpaddq   mm3,mm2\n    psllq   mm4,8\n    psllq   mm5,16\n\tpaddq   mm3,mm4\n\tpaddq\tmm3,mm5\n    pand    mm6,mm3\n    psrlq   mm3,24\n\tpaddq   mm3,mm6\n    movd    eax,mm3\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/modexact_1c_odd.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\textern\t___gmp_modlimb_invert_table\n\tglobal\t___gmpn_modexact_1_odd\n\tglobal\t___gmpn_modexact_1c_odd\n\t\n%ifdef\tDLL\n\texport\t___gmpn_modexact_1_odd\n\texport\t___gmpn_modexact_1c_odd\n%endif\n\n\talign\t16\n___gmpn_modexact_1c_odd: \n    movd    mm1,[16+esp]\n    jmp     start_1c\n\n\talign\t16\n___gmpn_modexact_1_odd: \n\tpxor    mm1,mm1\n\nstart_1c: \n    mov     eax,[12+esp]\n    movd    mm7,[12+esp]\n    shr     eax,1\n    and     eax,127\n    movzx   eax,byte [___gmp_modlimb_invert_table+eax]\n    movd    mm6,eax\n    movd    mm0,eax\n\tpmuludq mm6,mm6\n\tpmuludq mm6,mm7\n    paddd   mm0,mm0\n    psubd   mm0,mm6\n    pxor    mm6,mm6\n    paddd   mm6,mm0\n\tpmuludq mm0,mm0\n\tpmuludq mm0,mm7\n    paddd   mm6,mm6\n    mov     eax,[4+esp]\n    mov     ecx,[8+esp]\n    psubd   mm6,mm0\n    pxor    mm0,mm0\n\ntop: \n    movd    mm2,[eax]\n    add     eax,4\n\tpsubq   mm2,mm0\n\tpsubq   mm2,mm1\n    movq    mm0,mm2\n    psrlq   mm0,63\n\tpmuludq mm2,mm6\n    movq    mm1,mm7\n\tpmuludq mm1,mm2\n    psrlq   mm1,32\n    sub     ecx,1\n    jnz     top\n\ndone: \n\tpaddq   mm0,mm1\n    movd    eax,mm0\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/mul_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_mul_1\n\tglobal\t___gmpn_mul_1c\n\n%ifdef\tDLL\n\texport\t___gmpn_mul_1\n\texport\t___gmpn_mul_1c\n%endif\n\n\talign\t16\t\n___gmpn_mul_1c: \n    movd    mm0,[20+esp]\n    jmp     start_1c\n\n\talign\t16\t\n___gmpn_mul_1: \n    pxor    mm0,mm0\n\nstart_1c: \n    mov     eax,[8+esp]\n    movd    mm7,[16+esp]\n    mov     edx,[4+esp]\n    mov     ecx,[12+esp]\n\ntop: \n    movd    mm1,[eax]\n    add     eax,4\n\tpmuludq mm1,mm7\n\tpaddq   mm0,mm1\n    movd    [edx],mm0\n    add     edx,4\n    psrlq   mm0,32\n    sub     ecx,1\n    jnz     top\n    movd    eax,mm0\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/mul_basecase.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_mul_basecase\n%ifdef\tDLL\n\texport\t___gmpn_mul_basecase\n%endif\n\n\talign\t8\n___gmpn_mul_basecase: \n    mov     eax,[8+esp]\n    mov     [8+esp],ebx\n    pxor    mm0,mm0\n    mov     edx,[16+esp]\n    mov     [16+esp],esi\n    mov     ebx,[4+esp]\n    mov     [4+esp],ebp\n    mov     esi,eax\n    movd    mm7,[edx]\n    mov     ecx,[12+esp]\n    mov     ebp,[20+esp]\n    mov     [20+esp],edi\n    mov     edi,ebx\n\nmul1: \n    movd    mm1,[eax]\n    add     eax,4\n\tpmuludq mm1,mm7\n\tpaddq   mm0,mm1\n    movd    [ebx],mm0\n    add     ebx,4\n    psrlq   mm0,32\n    sub     ecx,1\n    jnz     mul1\n    movd    [ebx],mm0\n    sub     ebp,1\n    jz      done\n\nouter: \n    mov     eax,esi\n    lea     ebx,[4+edi]\n    add     edi,4\n    movd    mm7,[4+edx]\n    add     edx,4\n    pxor    mm0,mm0\n    mov     ecx,[12+esp]\n\ninner: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[ebx]\n\tpmuludq mm1,mm7\n\tpaddq   mm1,mm2\n\tpaddq   mm0,mm1\n    sub     ecx,1\n    movd    [ebx],mm0\n    psrlq   mm0,32\n    lea     ebx,[4+ebx]\n    jnz     inner\n    movd    [ebx],mm0\n    sub     ebp,1\n    jnz     outer\n\ndone: \n    mov     ebx,[8+esp]\n    mov     esi,[16+esp]\n    mov     edi,[20+esp]\n    mov     ebp,[4+esp]\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/popcount.asm",
    "content": "\n;  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%macro\tph_fun 1\n%ifdef\tPIC\n    mov\t\t\tedx,0xAAAAAAAA\n    movd\t\tmm7,edx\n    punpckldq\tmm7,mm7\n    mov\t\t\tedx,0x33333333\n    movd\t\tmm6,edx\n    punpckldq\tmm6,mm6\n    mov\t\t\tedx,0x0F0F0F0F\n    movd\t\tmm5,edx\n    punpckldq\tmm5,mm5\n%else\n    movq\t\tmm7,[L_AA]\n    movq\t\tmm6,[L_33]\n    movq\t\tmm5,[L_0F]\n%endif\n    mov\t\t\tecx,[esp+PARAM_SIZE]\n    mov\t\t\teax,[esp+PARAM_SRC]\n%if %1 == 1\n    mov\t\t\tedx,[esp+PARAM_SRC2]\n%endif\n    pxor\t\tmm4,mm4\n    pxor\t\tmm0,mm0\n    sub\t\t\tecx,1\n    ja\t\t\t%%L_top\n\n%%L_last:\n    movd\t\tmm1,[eax+ecx*4]\n%if %1 == 1\n    movd\t\tmm2,[edx+ecx*4]\n    pxor\t\tmm1,mm2\n%endif\n    jmp\t\t\t%%L_loaded\n\n%%L_top:\n    movd\t\tmm1,[eax]\n    movd\t\tmm2,[eax+4]\n    punpckldq\tmm1,mm2\n    add\t\t\teax,8\n%if %1 == 1\n    movd\t\tmm2,[edx]\n    movd\t\tmm3,[edx+4]\n    punpckldq\tmm2,mm3\n    pxor\t\tmm1,mm2\n    add\t\t\tedx,8\n%endif\n%%L_loaded:\n    movq\t\tmm2,mm7\n    pand\t\tmm2,mm1\n    psrlq\t\tmm2,1\n    psubd\t\tmm1,mm2\n    movq\t\tmm2,mm6\n    pand\t\tmm2,mm1\n    psrlq\t\tmm1,2\n    pand\t\tmm1,mm6\n    paddd\t\tmm1,mm2\n    movq\t\tmm2,mm5\n    pand\t\tmm2,mm1\n    psrlq\t\tmm1,4\n    pand\t\tmm1,mm5\n    paddd\t\tmm1,mm2\n    psadbw\t\tmm1,mm4\n    paddd\t\tmm0,mm1\n    sub\t\t\tecx,2\n    jg\t\t\t%%L_top\n    jz\t\t\t%%L_last\n    movd\t\teax,mm0\n    emms\n    ret\n%endmacro\n\n%ifndef PIC\n    data\n    align 8\n\nL_AA:\tdq\t0xAAAAAAAAAAAAAAAA\nL_33:\tdq\t0x3333333333333333\nL_0F:\tdq\t0x0F0F0F0F0F0F0F0F\n%endif\n\n    section .text\n\n%define\tPARAM_SIZE\t 8\n%define PARAM_SRC\t 4\n    global\t___gmpn_popcount\n%ifdef\tDLL\n    export\t___gmpn_popcount\n%endif\n    align   16\n___gmpn_popcount:\n    ph_fun\t0\n\n    end\n"
  },
  {
    "path": "mpn/x86w/p4/rshift.asm",
    "content": "\n;  Copyright 2001 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n%define\tPARAM_SHIFT esp+frame+16 \n%define PARAM_SIZE  esp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n%define frame\t\t8 \n\n;   Minimum 5,because the unrolled loop can't handle less. \n%define\tUNROLL_THRESHOLD  5 \n\n\tsection .text\n\t\n\tglobal\t___gmpn_rshift\n%ifdef\tDLL\n\texport\t___gmpn_rshift\n%endif\n\n\talign   8\n___gmpn_rshift:\n    push    ebx\n    push    edi\n    mov     eax,[PARAM_SIZE]\n    mov     edx,[PARAM_DST]\n    mov     ebx,[PARAM_SRC]\n    mov     ecx,[PARAM_SHIFT]\n\tcmp     eax,UNROLL_THRESHOLD\n\tjae     Lunroll\n    dec     eax\n    mov     edi,[ebx]\t\t;  src low limb \n    jnz     Lsimple\n\tshrd\teax,edi,cl\n    shr     edi,cl\n    mov     [edx],edi       ;  dst low limb \n    pop     edi             ;  risk of data cache bank clash \n    pop     ebx\n    ret\n\n;  eax size-1 \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\n\talign   8\nLsimple: \n    movd    mm5,[ebx]       ;  src[0] \n    lea     ebx,[ebx+eax*4]  ;  &src[size-1] \n    movd    mm6,ecx         ;  rshift \n    lea     edx,[-4+edx+eax*4] ;  &dst[size-2] \n    psllq   mm5,32\n    neg     eax\n\n;  This loop is 5 or 8 cycles,with every second load unaligned and a wasted \n;  cycle waiting for the mm0 result to be ready.  For comparison a shrdl is 4 \n;  cycles and would be 8 in a simple loop.  Using mmx helps the return value \n;  and last limb calculations too. \n\n;  eax counter,limbs,negative \n;  ebx &src[size-1] \n;  ecx return value \n;  edx &dst[size-2] \n;\n;  mm0 scratch \n;  mm5 return value \n;  mm6 shift \n\nLsimple_top: \n    movq    mm0,[ebx+eax*4]\n    inc     eax\n    psrlq   mm0,mm6\n    movd    [edx+eax*4],mm0\n    jnz     Lsimple_top\n    movd    mm0,[ebx]\n    psrlq   mm5,mm6         ;  return value \n    psrlq   mm0,mm6\n    pop     edi\n    movd    eax,mm5\n    pop     ebx\n    movd    [4+edx],mm0\n    emms\n    ret\n\n;  eax size \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\n\talign   8\nLunroll: \n    movd    mm5,[ebx]       ;  src[0] \n    mov     edi,4\n    movd    mm6,ecx         ;  rshift \n    test    ebx,edi\n    psllq   mm5,32\n    jz      Lstart_src_aligned\n\n;  src isn't aligned,process low limb separately (marked xxx) and \n;  step src and dst by one limb,making src aligned. \n;\n;  source                  ebx \n;  --+-------+-------+-------+ \n;            |          xxx  | \n;  --+-------+-------+-------+ \n;          4mod8   0mod8   4mod8 \n;\n;          dest            edx \n;          --+-------+-------+ \n;            |       |  xxx  |   \n;          --+-------+-------+ \n\n    movq    mm0,[ebx]       ;  unaligned load \n    psrlq   mm0,mm6\n    add     ebx,4\n    dec     eax\n    movd    [edx],mm0\n    add     edx,4\nLstart_src_aligned: \n    movq    mm1,[ebx]\n    test    edx,edi\n    psrlq   mm5,mm6         ;  retval \n    jz      Lstart_dst_aligned\n\n;  dst isn't aligned,add 4 to make it so,and pretend the shift is \n;  32 bits extra.  Low limb of dst (marked xxx) handled here \n;  separately. \n;\n;           source          ebx \n;           --+-------+-------+ \n;             |      mm1      | \n;           --+-------+-------+ \n;                   4mod8   0mod8 \n;\n;   dest                    edx \n;   --+-------+-------+-------+ \n;                     |  xxx  |         \n;   --+-------+-------+-------+ \n;           4mod8   0mod8   4mod8 \n\n    movq    mm0,mm1\n    add     ecx,32         ;  new shift \n    psrlq   mm0,mm6\n    movd    mm6,ecx\n    movd    [edx],mm0\n    add     edx,4\nLstart_dst_aligned: \n    movq    mm3,[8+ebx]\n    neg     ecx\n    movq    mm2,mm3\t\t\t;  mm2 src qword \n    add     ecx,64\n    movd    mm7,ecx\n    psrlq   mm1,mm6\n    lea     ebx,[-12+ebx+eax*4]\n    lea     edx,[-20+edx+eax*4]\n    psllq   mm3,mm7\n    sub     eax,7\t\t\t;  size-7 \n    por     mm3,mm1         ;  mm3 ready to store \n    neg     eax             ;  -(size-7) \n    jns     Lfinish\n\n;  This loop is the important bit,the rest is just support.  Careful \n;  instruction scheduling achieves the claimed 1.75 c/l.  The \n;  relevant parts of the pairing rules are: \n;\n;  - mmx loads and stores execute only in the U pipe \n;  - only one mmx shift in a pair \n;  - wait one cycle before storing an mmx register result \n;  - the usual address generation interlock \n;\n;  Two qword calculations are slightly interleaved.  The instructions \n;  marked \"C\" belong to the second qword,and the \"C prev\" one is for \n;  the second qword from the previous iteration. \n\n;  eax counter,limbs,negative \n;  ebx &src[size-12] \n;  ecx \n;  edx &dst[size-12] \n;  esi \n;  edi \n;\n;  mm0 \n;  mm1 \n;  mm2 src qword from -8(%ebx,%eax,4) \n;  mm3 dst qword ready to store to -8(%edx,%eax,4) \n;\n;  mm5 return value \n;  mm6 rshift \n;  mm7 lshift \n\n\talign   8\nLunroll_loop: \n    movq    mm0,[ebx+eax*4]\n    psrlq   mm2,mm6\n    movq    mm1,mm0\n    psllq   mm0,mm7\n    movq    [-8+edx+eax*4],mm3\n    por     mm0,mm2\n\n\tmovq    mm3,[ebx+eax*4+8]\n\tpsrlq   mm1,mm6\n    movq    [edx+eax*4],mm0\n\tmovq    mm2,mm3\n\tpsllq   mm3,mm7\n    add     eax,4\n\tpor     mm3,mm1\n    js      Lunroll_loop\n\nLfinish: \n;  eax 0 to 3 representing respectively 3 to 0 limbs remaining \n\n    test    al,2\n    jnz     Lfinish_no_two\n    movq    mm0,[ebx+eax*4]\n    psrlq   mm2,mm6\n    movq    mm1,mm0\n    psllq   mm0,mm7\n    movq    [-8+edx+eax*4],mm3  ;  prev \n    por     mm0,mm2\n    movq    mm2,mm1\n    movq    mm3,mm0\n    add     eax,2\nLfinish_no_two: \n\n;  eax 2 or 3 representing respectively 1 or 0 limbs remaining \n;\n;  mm2 src prev qword,from -8(%ebx,%eax,4) \n;  mm3 dst qword,for -8(%edx,%eax,4) \n\n    test    al,1\n    pop     edi\n    movd    eax,mm5  ;  retval \n    jnz     Lfinish_zero\n\n;  One extra limb,destination was aligned. \n;\n;  source                ebx \n;  +-------+---------------+-- \n;  |       |      mm2      | \n;  +-------+---------------+-- \n;\n;  dest                                  edx \n;  +-------+---------------+---------------+-- \n;  |       |               |      mm3      | \n;  +-------+---------------+---------------+-- \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  One extra limb,destination was unaligned. \n;\n;  source                ebx \n;  +-------+---------------+-- \n;  |       |      mm2      | \n;  +-------+---------------+-- \n;\n;  dest                          edx \n;  +---------------+---------------+-- \n;  |               |      mm3      | \n;  +---------------+---------------+-- \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n;  In both cases there's one extra limb of src to fetch and combine \n;  with mm2 to make a qword at 8(%edx),and in the aligned case \n;  there's a further extra limb of dst to be formed. \n\n\n    movd    mm0,[8+ebx]\n    psrlq   mm2,mm6\n    movq    mm1,mm0\n    psllq   mm0,mm7\n    movq    [edx],mm3\n    por     mm0,mm2\n    psrlq   mm1,mm6\n    and     ecx,32\n    pop     ebx\n    jz      Lfinish_one_unaligned\n\n    ;  dst was aligned,must store one extra limb \n    movd    [16+edx],mm1\nLfinish_one_unaligned: \n\n    movq    [8+edx],mm0\n    emms\n    ret\nLfinish_zero: \n\n;  No extra limbs,destination was aligned. \n;\n;  source        ebx \n;  +---------------+-- \n;  |      mm2      | \n;  +---------------+-- \n;\n;  dest                        edx+4 \n;  +---------------+---------------+-- \n;  |               |      mm3      | \n;  +---------------+---------------+-- \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  No extra limbs,destination was unaligned. \n;\n;  source        ebx \n;  +---------------+-- \n;  |      mm2      | \n;  +---------------+-- \n;\n;  dest                edx+4 \n;  +-------+---------------+-- \n;  |       |      mm3      | \n;  +-------+---------------+-- \n;\n;  mm6 = shift+32 \n;  mm7 = 64-(shift+32) \n\n;  The movd for the unaligned case is clearly the same data as the \n;  movq for the aligned case,it's just a choice between whether one \n;  or two limbs should be written. \n\n    movq    [4+edx],mm3\n    psrlq   mm2,mm6\n    movd    [12+edx],mm2\n    and     ecx,32\n    pop     ebx\n    jz      Lfinish_zero_unaligned\n    movq    [12+edx],mm2\nLfinish_zero_unaligned: \n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/sqr_basecase.asm",
    "content": ";  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_sqr_basecase\n%ifdef\tDLL\n\texport\t___gmpn_sqr_basecase\n%endif\n\n\talign\t8\n___gmpn_sqr_basecase: \n    mov     edx,[12+esp]\n    mov     eax,[8+esp]\n    mov     ecx,[4+esp]\n    cmp     edx,2\n    je      two_limbs\n    ja      three_or_more\n    mov     eax,[eax]\n    mul     eax\n    mov     [ecx],eax\n    mov     [4+ecx],edx\n    ret\n\ntwo_limbs: \n    movd    mm1,[eax]\n    movd    mm0,[4+eax]\n\tpmuludq mm0,mm1\n\tpmuludq mm1,mm1\n    movd    mm2,[4+eax]\n\tpmuludq mm2,mm2\n    movd    [ecx],mm1\n    psrlq   mm1,32\n    pcmpeqd mm3,mm3\n    psrlq   mm3,32\n    pand    mm3,mm0\n    psrlq   mm0,32\n    psllq   mm3,1\n\tpaddq   mm1,mm3\n    movd    [4+ecx],mm1\n    pcmpeqd mm4,mm4\n    psrlq   mm4,32\n    pand    mm4,mm2\n    psrlq   mm2,32\n    psllq   mm0,1\n    psrlq   mm1,32\n\tpaddq   mm0,mm1\n\tpaddq   mm0,mm4\n    movd    [8+ecx],mm0\n    psrlq   mm0,32\n\tpaddq   mm0,mm2\n    movd    [12+ecx],mm0\n\temms\n    ret\n\nthree_or_more: \n    sub     esp,12\n    pxor    mm0,mm0\n    movd    mm7,[eax]\n    mov     [8+esp],esi\n    mov     [4+esp],edi\n    mov     [esp],ebp\n    mov     esi,eax\n    mov     edi,ecx\n    sub     edx,1\n\nmul1: \n    movd    mm1,[4+eax]\n    add     eax,4\n\tpmuludq mm1,mm7\n\tpaddq   mm0,mm1\n    movd    [4+ecx],mm0\n    add     ecx,4\n    psrlq   mm0,32\n    sub     edx,1\n    jnz     mul1\n    mov     ebp,[24+esp]\n    sub     ebp,3\n    jz      corner\n\nouter: \n    movd    mm7,[4+esi]\n    movd    [4+ecx],mm0\n    lea     eax,[8+esi]\n    add     esi,4\n    lea     ecx,[8+edi]\n    add     edi,8\n    lea     edx,[1+ebp]\n    pxor    mm0,mm0\n\ninner: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[4+ecx]\n\tpmuludq mm1,mm7\n\tpaddq   mm1,mm2\n\tpaddq   mm0,mm1\n    sub     edx,1\n    movd    [4+ecx],mm0\n    psrlq   mm0,32\n    lea     ecx,[4+ecx]\n    jnz     inner\n    sub     ebp,1\n    jnz     outer\n\ncorner: \n    movd    mm1,[4+esi]\n    movd    mm2,[8+esi]\n\tpmuludq mm1,mm2\n    mov     eax,[20+esp]\n    movd    mm2,[eax]\n\tpmuludq mm2,mm2\n    pcmpeqd mm7,mm7\n    psrlq   mm7,32\n    mov     edx,[16+esp]\n    movd    mm3,[4+edx]\n\tpaddq   mm0,mm1\n    movd    [12+edi],mm0\n    psrlq   mm0,32\n    movd    [16+edi],mm0\n    movd    [edx],mm2\n    psrlq   mm2,32\n    psllq   mm3,1\n\tpaddq   mm2,mm3\n    movd    [4+edx],mm2\n    psrlq   mm2,32\n    mov     ecx,[24+esp]\n    sub     ecx,2\n\ndiag: \n    movd    mm0,[4+eax]\n    add     eax,4\n\tpmuludq mm0,mm0\n    movq    mm1,mm7\n    pand    mm1,mm0\n    psrlq   mm0,32\n    movd    mm3,[8+edx]\n    psllq   mm3,1\n\tpaddq   mm1,mm3\n\tpaddq   mm2,mm1\n    movd    [8+edx],mm2\n    psrlq   mm2,32\n    movd    mm3,[12+edx]\n    psllq   mm3,1\n\tpaddq   mm0,mm3\n\tpaddq   mm2,mm0\n    movd    [12+edx],mm2\n    add     edx,8\n    psrlq   mm2,32\n    sub     ecx,1\n    jnz     diag\n    movd    mm0,[4+eax]\n\tpmuludq mm0,mm0\n    pand    mm7,mm0\n    psrlq   mm0,32\n    movd    mm3,[8+edx]\n    psllq   mm3,1\n\tpaddq   mm7,mm3\n\tpaddq   mm2,mm7\n    movd    [8+edx],mm2\n    psrlq   mm2,32\n\tpaddq   mm2,mm0\n    movd    [12+edx],mm2\n    mov     esi,[8+esp]\n    mov     edi,[4+esp]\n    mov     ebp,[esp]\n    add     esp,12\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/sse2/add_n.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\"\n\n\tsection .text\n\n\tglobal\t___gmpn_add_nc\n%ifdef\tDLL\n\texport\t___gmpn_add_nc\n%endif\n\n\talign\t8\n___gmpn_add_nc:\n    movd    mm0,[20+esp]\n    mov     eax,[8+esp]\n    mov     [8+esp],ebx\n    mov     ebx,[12+esp]\n    mov     edx,[4+esp]\n    mov     ecx,[16+esp]\n    lea     eax,[eax+ecx*4]\n    lea     ebx,[ebx+ecx*4]\n    lea     edx,[edx+ecx*4]\n    neg     ecx\nt1: movd    mm1,[eax+ecx*4]\n\tmovd    mm2,[ebx+ecx*4]\n\tpaddq   mm1,mm2\n\tpaddq   mm0,mm1\n    movd    [edx+ecx*4],mm0\n    psrlq   mm0,32\n    add     ecx,1\n    jnz     t1\n    movd    eax,mm0\n    mov     ebx,[8+esp]\n    emms\n    ret\n\n\tglobal\t___gmpn_add_n\n%ifdef\tDLL\n\texport\t___gmpn_add_n\n%endif\n\n\talign\t8\n___gmpn_add_n:\t\n\tpxor    mm0,mm0\n    mov     eax,[8+esp]\n    mov     [8+esp],ebx\n    mov     ebx,[12+esp]\n    mov     edx,[4+esp]\n    mov     ecx,[16+esp]\n    lea     eax,[eax+ecx*4]\n    lea     ebx,[ebx+ecx*4]\n    lea     edx,[edx+ecx*4]\n    neg     ecx\nt2:\tmovd    mm1,[eax+ecx*4]\n\tmovd    mm2,[ebx+ecx*4]\n\tpaddq   mm1,mm2\n\tpaddq   mm0,mm1\n    movd    [edx+ecx*4],mm0\n    psrlq   mm0,32\n    add     ecx,1\n    jnz     t2\n    movd    eax,mm0\n    mov     ebx,[8+esp]\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/sse2/addmul_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_addmul_1\n\tglobal\t___gmpn_addmul_1c\n\n%ifdef\tDLL\n\texport\t___gmpn_addmul_1\n\texport\t___gmpn_addmul_1c\n%endif\n\n\talign\t16\n___gmpn_addmul_1c: \n    movd    mm0,[20+esp]\n    jmp     start_1c\n\n\talign\t16\n___gmpn_addmul_1: \n\tpxor    mm0,mm0\n\nstart_1c: \n    mov     eax,[8+esp]\n    mov     ecx,[12+esp]\n    mov     edx,[4+esp]\n    movd    mm7,[16+esp]\n\noop: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[edx]\n\tpmuludq mm1,mm7\n\tpaddq   mm2,mm1\n\tpaddq   mm0,mm2\n\tsub     ecx,1\n\tmovd    [edx],mm0\n\tpsrlq   mm0,32\n\tlea     edx,[4+edx]\n\tjnz     oop\n\tmovd    eax,mm0\n\temms\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/sse2/divexact_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\"\n%define\tp_space\t16\n\n\textern\t___gmp_modlimb_invert_table\n\n\tsection .text\n\n\tglobal\t___gmpn_divexact_1\n%ifdef\tDLL\n\texport\t___gmpn_divexact_1\n%endif\n\n\talign\t16\n___gmpn_divexact_1: \n    mov     edx,[12+esp]\n    mov     eax,[8+esp]\n    mov     ecx,[16+esp]\n    sub     edx,1\n    jnz     two_or_more\n    mov     eax,[eax]\n    xor     edx,edx\n    div     ecx\n    mov     ecx,[4+esp]\n    mov     [ecx],eax\n    ret\n    \ntwo_or_more: \n    mov     eax,ecx\n    bsf     ecx,ecx\n    shr     eax,cl\n    movd    mm6,eax\n    movd    mm7,ecx\n    shr     eax,1\n    and     eax,127\n    movzx   eax,byte [___gmp_modlimb_invert_table+eax]\n    movd    mm5,eax\n    movd    mm0,eax\n\tpmuludq mm5,mm5\n\tpmuludq mm5,mm6\n    paddd   mm0,mm0\n    psubd   mm0,mm5\n    pxor    mm5,mm5\n    paddd   mm5,mm0\n\tpmuludq mm0,mm0\n    pcmpeqd mm4,mm4\n    psrlq   mm4,32\n\tpmuludq mm0,mm6\n    paddd   mm5,mm5\n    mov     eax,[8+esp]\n    mov     ecx,[4+esp]\n    pxor    mm1,mm1\n    psubd   mm5,mm0\n    pxor    mm0,mm0\n\ntop: \n    movd    mm2,[eax]\n    movd    mm3,[4+eax]\n    add     eax,4\n    punpckldq mm2,mm3\n    psrlq   mm2,mm7\n    pand    mm2,mm4\n\tpsubq   mm2,mm0\n\tpsubq   mm2,mm1\n    movq    mm0,mm2\n    psrlq   mm0,63\n\tpmuludq mm2,mm5\n    movd    [ecx],mm2\n    add     ecx,4\n    movq    mm1,mm6\n\tpmuludq mm1,mm2\n    psrlq   mm1,32\n    sub     edx,1\n    jnz     top\n\ndone: \n    movd    mm2,[eax]\n    psrlq   mm2,mm7\n\tpsubq   mm2,mm0\n\tpsubq   mm2,mm1\n\tpmuludq mm2,mm5\n    movd    [ecx],mm2\n    emms\n    ret\n    \n    end\n"
  },
  {
    "path": "mpn/x86w/p4/sse2/divexact_by3c.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Library General Public License as\n;  published by the Free Software Foundation; either version 2 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Library General Public License for more details.\n; \n;  You should have received a copy of the GNU Library General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection\t.text\n\n\tglobal\t___gmpn_divexact_by3c\t\n%ifdef\tDLL\n\texport\t___gmpn_divexact_by3c\n%endif\n\n\talign\t16\n___gmpn_divexact_by3c: \n    mov     eax,[8+esp]\n    pxor    mm0,mm0\n    movd    mm1,[16+esp]\n    pcmpeqd mm6,mm6\n    movd    mm7,[val]\n    mov     edx,[4+esp]\n    psrlq   mm6,32\n    mov     ecx,[12+esp]\n\ntop: \n    movd    mm2,[eax]\n    add     eax,4\n\tpsubq   mm2,mm0\n\tpsubq   mm2,mm1\n    movq    mm0,mm2\n    psrlq   mm0,63\n\tpmuludq mm2,mm7\n    movd    [edx],mm2\n    add     edx,4\n    movq    mm1,mm6\n    pand    mm1,mm2\n    pand    mm2,mm6\n    psllq   mm1,1\n\tpaddq   mm1,mm2\n    psrlq   mm1,32\n    sub     ecx,1\n    jnz     top\n    paddd   mm0,mm1\n    movd    eax,mm0\n    emms\n    ret\n\n\tsection\t.data\nval:\n\tdd\t0xAAAAAAAB\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/sse2/mod_34lsub1.asm",
    "content": "\n;  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection\t.text\n\n\tglobal\t___gmpn_mod_34lsub1\n%ifdef\tDLL\n\texport\t___gmpn_mod_34lsub1\n%endif\n\n\talign\t16\n___gmpn_mod_34lsub1: \n    mov     ecx,[8+esp]\n    mov     edx,[4+esp]\n    mov     eax,[edx]\n    sub     ecx,2\n    ja      three_or_more\n    jne     one\n    mov     edx,[4+edx]\n    mov     ecx,eax\n    shr     eax,24\n    and     ecx,0x00FFFFFF\n    add     eax,ecx\n    mov     ecx,edx\n    shl     edx,8\n    shr     ecx,16\n    add     eax,ecx\n    and     edx,0x00FFFF00\n    add     eax,edx\none:ret\n\nthree_or_more: \n    pxor    mm0,mm0\n    pxor    mm1,mm1\n    pxor    mm2,mm2\n    pcmpeqd mm7,mm7\n    psrlq   mm7,32\n    pcmpeqd mm6,mm6\n    psrlq   mm6,40\n\ntop: \n\tmovd    mm3,[edx]\n\tpaddq   mm0,mm3\n\tmovd    mm3,[4+edx]\n\tpaddq   mm1,mm3\n    movd    mm3,[8+edx]\n\tpaddq   mm2,mm3\n    add     edx,12\n    sub     ecx,3\n    ja      top\n    add     ecx,1\n    js      combine\n    movd    mm3,[edx]\n\tpaddq   mm0,mm3\n\tjz      combine\n\tmovd    mm3,[4+edx]\n\tpaddq   mm1,mm3\n\ncombine: \n    movq    mm3,mm7\n    pand    mm3,mm0\n    movq    mm4,mm7\n    pand    mm4,mm1\n    movq    mm5,mm7\n    pand    mm5,mm2\n    psrlq   mm0,32\n    psrlq   mm1,32\n    psrlq   mm2,32\n\tpaddq   mm4,mm0\n\tpaddq   mm5,mm1\n\tpaddq   mm3,mm2\n    psllq   mm4,8\n    psllq   mm5,16\n\tpaddq   mm3,mm4\n\tpaddq\tmm3,mm5\n    pand    mm6,mm3\n    psrlq   mm3,24\n\tpaddq   mm3,mm6\n    movd    eax,mm3\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/sse2/modexact_1c_odd.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\textern\t___gmp_modlimb_invert_table\n\tglobal\t___gmpn_modexact_1_odd\n\tglobal\t___gmpn_modexact_1c_odd\n\t\n%ifdef\tDLL\n\texport\t___gmpn_modexact_1_odd\n\texport\t___gmpn_modexact_1c_odd\n%endif\n\n\talign\t16\n___gmpn_modexact_1c_odd: \n    movd    mm1,[16+esp]\n    jmp     start_1c\n\n\talign\t16\n___gmpn_modexact_1_odd: \n\tpxor    mm1,mm1\n\nstart_1c: \n    mov     eax,[12+esp]\n    movd    mm7,[12+esp]\n    shr     eax,1\n    and     eax,127\n    movzx   eax,byte [___gmp_modlimb_invert_table+eax]\n    movd    mm6,eax\n    movd    mm0,eax\n\tpmuludq mm6,mm6\n\tpmuludq mm6,mm7\n    paddd   mm0,mm0\n    psubd   mm0,mm6\n    pxor    mm6,mm6\n    paddd   mm6,mm0\n\tpmuludq mm0,mm0\n\tpmuludq mm0,mm7\n    paddd   mm6,mm6\n    mov     eax,[4+esp]\n    mov     ecx,[8+esp]\n    psubd   mm6,mm0\n    pxor    mm0,mm0\n\ntop: \n    movd    mm2,[eax]\n    add     eax,4\n\tpsubq   mm2,mm0\n\tpsubq   mm2,mm1\n    movq    mm0,mm2\n    psrlq   mm0,63\n\tpmuludq mm2,mm6\n    movq    mm1,mm7\n\tpmuludq mm1,mm2\n    psrlq   mm1,32\n    sub     ecx,1\n    jnz     top\n\ndone: \n\tpaddq   mm0,mm1\n    movd    eax,mm0\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/sse2/mul_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_mul_1\n\tglobal\t___gmpn_mul_1c\n\n%ifdef\tDLL\n\texport\t___gmpn_mul_1\n\texport\t___gmpn_mul_1c\n%endif\n\n\talign\t16\t\n___gmpn_mul_1c: \n    movd    mm0,[20+esp]\n    jmp     start_1c\n\n\talign\t16\t\n___gmpn_mul_1: \n    pxor    mm0,mm0\n\nstart_1c: \n    mov     eax,[8+esp]\n    movd    mm7,[16+esp]\n    mov     edx,[4+esp]\n    mov     ecx,[12+esp]\n\ntop: \n    movd    mm1,[eax]\n    add     eax,4\n\tpmuludq mm1,mm7\n\tpaddq   mm0,mm1\n    movd    [edx],mm0\n    add     edx,4\n    psrlq   mm0,32\n    sub     ecx,1\n    jnz     top\n    movd    eax,mm0\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/sse2/mul_basecase.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_mul_basecase\n%ifdef\tDLL\n\texport\t___gmpn_mul_basecase\n%endif\n\n\talign\t8\n___gmpn_mul_basecase: \n    mov     eax,[8+esp]\n    mov     [8+esp],ebx\n    pxor    mm0,mm0\n    mov     edx,[16+esp]\n    mov     [16+esp],esi\n    mov     ebx,[4+esp]\n    mov     [4+esp],ebp\n    mov     esi,eax\n    movd    mm7,[edx]\n    mov     ecx,[12+esp]\n    mov     ebp,[20+esp]\n    mov     [20+esp],edi\n    mov     edi,ebx\n\nmul1: \n    movd    mm1,[eax]\n    add     eax,4\n\tpmuludq mm1,mm7\n\tpaddq   mm0,mm1\n    movd    [ebx],mm0\n    add     ebx,4\n    psrlq   mm0,32\n    sub     ecx,1\n    jnz     mul1\n    movd    [ebx],mm0\n    sub     ebp,1\n    jz      done\n\nouter: \n    mov     eax,esi\n    lea     ebx,[4+edi]\n    add     edi,4\n    movd    mm7,[4+edx]\n    add     edx,4\n    pxor    mm0,mm0\n    mov     ecx,[12+esp]\n\ninner: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[ebx]\n\tpmuludq mm1,mm7\n\tpaddq   mm1,mm2\n\tpaddq   mm0,mm1\n    sub     ecx,1\n    movd    [ebx],mm0\n    psrlq   mm0,32\n    lea     ebx,[4+ebx]\n    jnz     inner\n    movd    [ebx],mm0\n    sub     ebp,1\n    jnz     outer\n\ndone: \n    mov     ebx,[8+esp]\n    mov     esi,[16+esp]\n    mov     edi,[20+esp]\n    mov     ebp,[4+esp]\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/sse2/sqr_basecase.asm",
    "content": ";  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_sqr_basecase\n%ifdef\tDLL\n\texport\t___gmpn_sqr_basecase\n%endif\n\n\talign\t8\n___gmpn_sqr_basecase: \n    mov     edx,[12+esp]\n    mov     eax,[8+esp]\n    mov     ecx,[4+esp]\n    cmp     edx,2\n    je      two_limbs\n    ja      three_or_more\n    mov     eax,[eax]\n    mul     eax\n    mov     [ecx],eax\n    mov     [4+ecx],edx\n    ret\n\ntwo_limbs: \n    movd    mm1,[eax]\n    movd    mm0,[4+eax]\n\tpmuludq mm0,mm1\n\tpmuludq mm1,mm1\n    movd    mm2,[4+eax]\n\tpmuludq mm2,mm2\n    movd    [ecx],mm1\n    psrlq   mm1,32\n    pcmpeqd mm3,mm3\n    psrlq   mm3,32\n    pand    mm3,mm0\n    psrlq   mm0,32\n    psllq   mm3,1\n\tpaddq   mm1,mm3\n    movd    [4+ecx],mm1\n    pcmpeqd mm4,mm4\n    psrlq   mm4,32\n    pand    mm4,mm2\n    psrlq   mm2,32\n    psllq   mm0,1\n    psrlq   mm1,32\n\tpaddq   mm0,mm1\n\tpaddq   mm0,mm4\n    movd    [8+ecx],mm0\n    psrlq   mm0,32\n\tpaddq   mm0,mm2\n    movd    [12+ecx],mm0\n\temms\n    ret\n\nthree_or_more: \n    sub     esp,12\n    pxor    mm0,mm0\n    movd    mm7,[eax]\n    mov     [8+esp],esi\n    mov     [4+esp],edi\n    mov     [esp],ebp\n    mov     esi,eax\n    mov     edi,ecx\n    sub     edx,1\n\nmul1: \n    movd    mm1,[4+eax]\n    add     eax,4\n\tpmuludq mm1,mm7\n\tpaddq   mm0,mm1\n    movd    [4+ecx],mm0\n    add     ecx,4\n    psrlq   mm0,32\n    sub     edx,1\n    jnz     mul1\n    mov     ebp,[24+esp]\n    sub     ebp,3\n    jz      corner\n\nouter: \n    movd    mm7,[4+esi]\n    movd    [4+ecx],mm0\n    lea     eax,[8+esi]\n    add     esi,4\n    lea     ecx,[8+edi]\n    add     edi,8\n    lea     edx,[1+ebp]\n    pxor    mm0,mm0\n\ninner: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[4+ecx]\n\tpmuludq mm1,mm7\n\tpaddq   mm1,mm2\n\tpaddq   mm0,mm1\n    sub     edx,1\n    movd    [4+ecx],mm0\n    psrlq   mm0,32\n    lea     ecx,[4+ecx]\n    jnz     inner\n    sub     ebp,1\n    jnz     outer\n\ncorner: \n    movd    mm1,[4+esi]\n    movd    mm2,[8+esi]\n\tpmuludq mm1,mm2\n    mov     eax,[20+esp]\n    movd    mm2,[eax]\n\tpmuludq mm2,mm2\n    pcmpeqd mm7,mm7\n    psrlq   mm7,32\n    mov     edx,[16+esp]\n    movd    mm3,[4+edx]\n\tpaddq   mm0,mm1\n    movd    [12+edi],mm0\n    psrlq   mm0,32\n    movd    [16+edi],mm0\n    movd    [edx],mm2\n    psrlq   mm2,32\n    psllq   mm3,1\n\tpaddq   mm2,mm3\n    movd    [4+edx],mm2\n    psrlq   mm2,32\n    mov     ecx,[24+esp]\n    sub     ecx,2\n\ndiag: \n    movd    mm0,[4+eax]\n    add     eax,4\n\tpmuludq mm0,mm0\n    movq    mm1,mm7\n    pand    mm1,mm0\n    psrlq   mm0,32\n    movd    mm3,[8+edx]\n    psllq   mm3,1\n\tpaddq   mm1,mm3\n\tpaddq   mm2,mm1\n    movd    [8+edx],mm2\n    psrlq   mm2,32\n    movd    mm3,[12+edx]\n    psllq   mm3,1\n\tpaddq   mm0,mm3\n\tpaddq   mm2,mm0\n    movd    [12+edx],mm2\n    add     edx,8\n    psrlq   mm2,32\n    sub     ecx,1\n    jnz     diag\n    movd    mm0,[4+eax]\n\tpmuludq mm0,mm0\n    pand    mm7,mm0\n    psrlq   mm0,32\n    movd    mm3,[8+edx]\n    psllq   mm3,1\n\tpaddq   mm7,mm3\n\tpaddq   mm2,mm7\n    movd    [8+edx],mm2\n    psrlq   mm2,32\n\tpaddq   mm2,mm0\n    movd    [12+edx],mm2\n    mov     esi,[8+esp]\n    mov     edi,[4+esp]\n    mov     ebp,[esp]\n    add     esp,12\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/sse2/sub_n.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_sub_nc\n\tglobal\t___gmpn_sub_n\n\n%ifdef\tDLL\n\texport\t___gmpn_sub_nc\n\texport\t___gmpn_sub_n\n%endif\n\n\talign\t8\n___gmpn_sub_nc: \n    movd    mm0,[20+esp]\n    jmp     start_nc\n\t\n\talign\t8\n___gmpn_sub_n: \n    pxor    mm0,mm0\n\nstart_nc: \n    mov     eax,[8+esp]\n    mov     [8+esp],ebx\n    mov     ebx,[12+esp]\n    mov     edx,[4+esp]\n    mov     ecx,[16+esp]\n\n    lea     eax,[eax+ecx*4]\n    lea     ebx,[ebx+ecx*4]\n    lea     edx,[edx+ecx*4]\n    neg     ecx\n\ntop: \n\tmovd    mm1,[eax+ecx*4]\n\tmovd    mm2,[ebx+ecx*4]\n\tpsubq   mm1,mm2\n\tpsubq   mm1,mm0\n    movd    [edx+ecx*4],mm1\n    psrlq   mm1,63\n    add     ecx,1\n    jz      done_mm1\n\tmovd    mm0,[eax+ecx*4]\n    movd    mm2,[ebx+ecx*4]\n   \tpsubq   mm0,mm2\n\tpsubq   mm0,mm1\n    movd    [edx+ecx*4],mm0\n    psrlq   mm0,63\n    add     ecx,1\n    jnz     top\n    movd    eax,mm0\n    mov     ebx,[8+esp]\n    emms\n    ret\n\ndone_mm1: \n    movd    eax,mm1\n    mov     ebx,[8+esp]\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/sse2/submul_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\" \n\n\tsection .text\n\n\tglobal\t___gmpn_submul_1c\n%ifdef\tDLL\n\texport\t___gmpn_submul_1c\n%endif\n\n\talign\t16\n___gmpn_submul_1c:\n    movd    mm1,[20+esp]\n\tmov     eax,[8+esp]\n\tpcmpeqd mm0,mm0\n\tmovd    mm7,[16+esp]\n\tpcmpeqd mm6,mm6\n\tmov     edx,[4+esp]\n\tpsrlq   mm0,32\n\tmov     ecx,[12+esp]\n\tpsllq   mm6,32\n\tpsubq   mm6,mm0\n\tpsubq   mm0,mm1\noop1: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[edx]\n\tpaddq   mm2,mm6\n\tpmuludq mm1,mm7\n\tpsubq   mm2,mm1\n\tpaddq   mm0,mm2\n    sub     ecx,1\n    movd    [edx],mm0\n    psrlq   mm0,32\n    lea     edx,[4+edx]\n    jnz     oop1\n    movd    eax,mm0\n    not     eax\n    emms\n    ret\n\n\tglobal\t___gmpn_submul_1\n%ifdef\tDLL\n\texport\t___gmpn_submul_1\n%endif\n\talign\t16\n___gmpn_submul_1:\n\n\tpxor    mm1,mm1\n\tmov     eax,[8+esp]\n\tpcmpeqd mm0,mm0\n\tmovd    mm7,[16+esp]\n\tpcmpeqd mm6,mm6\n\tmov     edx,[4+esp]\n\tpsrlq   mm0,32\n\tmov     ecx,[12+esp]\n\tpsllq   mm6,32\n\tpsubq   mm6,mm0\n\tpsubq   mm0,mm1\noop2: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[edx]\n\tpaddq   mm2,mm6\n\tpmuludq mm1,mm7\n\tpsubq   mm2,mm1\n\tpaddq   mm0,mm2\n    sub     ecx,1\n    movd    [edx],mm0\n    psrlq   mm0,32\n    lea     edx,[4+edx]\n    jnz     oop2\n    movd    eax,mm0\n    not     eax\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/sub_n.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_sub_nc\n\tglobal\t___gmpn_sub_n\n\n%ifdef\tDLL\n\texport\t___gmpn_sub_nc\n\texport\t___gmpn_sub_n\n%endif\n\n\talign\t8\n___gmpn_sub_nc: \n    movd    mm0,[20+esp]\n    jmp     start_nc\n\t\n\talign\t8\n___gmpn_sub_n: \n    pxor    mm0,mm0\n\nstart_nc: \n    mov     eax,[8+esp]\n    mov     [8+esp],ebx\n    mov     ebx,[12+esp]\n    mov     edx,[4+esp]\n    mov     ecx,[16+esp]\n\n    lea     eax,[eax+ecx*4]\n    lea     ebx,[ebx+ecx*4]\n    lea     edx,[edx+ecx*4]\n    neg     ecx\n\tjz\t\txit\ntop: \n\tmovd    mm1,[eax+ecx*4]\n\tmovd    mm2,[ebx+ecx*4]\n\tpsubq   mm1,mm2\n\tpsubq   mm1,mm0\n    movd    [edx+ecx*4],mm1\n    psrlq   mm1,63\n    add     ecx,1\n    jz      done_mm1\n\tmovd    mm0,[eax+ecx*4]\n    movd    mm2,[ebx+ecx*4]\n   \tpsubq   mm0,mm2\n\tpsubq   mm0,mm1\n    movd    [edx+ecx*4],mm0\n    psrlq   mm0,63\n    add     ecx,1\n    jnz     top\n    movd    eax,mm0\nxit:mov     ebx,[8+esp]\n    emms\n    ret\n\ndone_mm1: \n    movd    eax,mm1\n    mov     ebx,[8+esp]\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p4/submul_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n\tsection .text\n\n\tglobal\t___gmpn_submul_1c\n%ifdef\tDLL\n\texport\t___gmpn_submul_1c\n%endif\n\n\talign\t16\n___gmpn_submul_1c:\n    movd    mm1,[20+esp]\n\tmov     eax,[8+esp]\n\tpcmpeqd mm0,mm0\n\tmovd    mm7,[16+esp]\n\tpcmpeqd mm6,mm6\n\tmov     edx,[4+esp]\n\tpsrlq   mm0,32\n\tmov     ecx,[12+esp]\n\tpsllq   mm6,32\n\tpsubq   mm6,mm0\n\tpsubq   mm0,mm1\noop1: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[edx]\n\tpaddq   mm2,mm6\n\tpmuludq mm1,mm7\n\tpsubq   mm2,mm1\n\tpaddq   mm0,mm2\n    sub     ecx,1\n    movd    [edx],mm0\n    psrlq   mm0,32\n    lea     edx,[4+edx]\n    jnz     oop1\n    movd    eax,mm0\n    not     eax\n    emms\n    ret\n\n\tglobal\t___gmpn_submul_1\n%ifdef\tDLL\n\texport\t___gmpn_submul_1\n%endif\n\talign\t16\n___gmpn_submul_1:\n\n\tpxor    mm1,mm1\n\tmov     eax,[8+esp]\n\tpcmpeqd mm0,mm0\n\tmovd    mm7,[16+esp]\n\tpcmpeqd mm6,mm6\n\tmov     edx,[4+esp]\n\tpsrlq   mm0,32\n\tmov     ecx,[12+esp]\n\tpsllq   mm6,32\n\tpsubq   mm6,mm0\n\tpsubq   mm0,mm1\noop2: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[edx]\n\tpaddq   mm2,mm6\n\tpmuludq mm1,mm7\n\tpsubq   mm2,mm1\n\tpaddq   mm0,mm2\n    sub     ecx,1\n    movd    [edx],mm0\n    psrlq   mm0,32\n    lea     edx,[4+edx]\n    jnz     oop2\n    movd    eax,mm0\n    not     eax\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/addmul_1.asm",
    "content": "\n;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n%define\tUNROLL_LOG2\t\t4\n%define\tUNROLL_COUNT\t(1 << UNROLL_LOG2)\n%define\tUNROLL_MASK\t\tUNROLL_COUNT-1  \n%define\tUNROLL_BYTES\t4*UNROLL_COUNT\n\n%ifdef\tPIC\n%define\tUNROLL_THRESHOLD\t5 \n%else\n%define\tUNROLL_THRESHOLD\t5 \n%endif\n\n%define\tPARAM_CARRY\t\t\tesp+frame+20 \n%define PARAM_MULTIPLIER\tesp+frame+16 \n%define PARAM_SIZE\t\t\tesp+frame+12 \n%define PARAM_SRC\t\t\tesp+frame+8 \n%define PARAM_DST\t\t\tesp+frame+4 \n\n%macro\tmul_fun\t4\n\n\tglobal  %1%3\n\tglobal  %1%4\n\n%ifdef\tDLL\n\texport\t%1%3\n\texport  %1%4\n%endif\n\n\talign   32\n%define\tframe\t0\n%1%4:\n    FR_push ebx\n    mov     ebx,[PARAM_CARRY]\n\tjmp     %%Lstart_nc\n\n%define\tframe\t0\n%1%3:\n\tFR_push ebx\n    xor     ebx,ebx\t;  initial carry \n%%Lstart_nc: \n    mov     ecx,[PARAM_SIZE]\n    FR_push esi\n    mov     esi,[PARAM_SRC]\n    FR_push\tedi\n    mov     edi,[PARAM_DST]\n    FR_push ebp\n    cmp     ecx,UNROLL_THRESHOLD\n    mov     ebp,[PARAM_MULTIPLIER]\n    jae     %%Lunroll\n\n;  simple loop \n;  this is offset 0x22,so close enough to aligned \n;  eax scratch \n;  ebx carry \n;  ecx counter \n;  edx scratch \n;  esi src \n;  edi dst \n;  ebp multiplier \n\n%%Lsimple: \n    mov     eax,[esi]\n    add     edi,4\n    mul     ebp\n    add     eax,ebx\n    adc     edx,0\n\t%2\t\t[edi-4],eax\n    mov     ebx,edx\n    adc     ebx,0\n    dec     ecx\n    lea     esi,[4+esi]\n    jnz     %%Lsimple\n    pop     ebp\n    pop     edi\n    pop     esi\n    mov     eax,ebx\n    pop     ebx\n    ret\n\n;  VAR_JUMP holds the computed jump temporarily because there's not enough \n;  registers when doing the mul for the initial two carry limbs. \n; \n;  The add/adc for the initial carry in %ebx is necessary only for the \n;  mpn_add/submul_1c entry points.  Duplicating the startup code to \n;  eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good \n;  idea. \n;\n;   overlapping with parameters already fetched \n\n%define\tVAR_COUNTER\tPARAM_SIZE\n%define\tVAR_JUMP\tPARAM_DST\n\n; VAL1 = ifelse(UNROLL_BYTES,256,128)\n%define\tVAL1\t128\n; VAL2 = ifelse(UNROLL_BYTES,256,-128)\n%define\tVAL2   -128\n\n;  this is offset 0x43,so close enough to aligned \n;  eax \n;  ebx initial carry \n;  ecx size \n;  edx \n;  esi src \n;  edi dst \n;  ebp \n\n%%Lunroll: \n    mov     edx,ecx\n    dec     ecx\n    sub     edx,2\n    neg     ecx\n    shr     edx,UNROLL_LOG2\n    and     ecx,UNROLL_MASK\n    mov     [VAR_COUNTER],edx\n    mov     edx,ecx\n        \n;  15 code bytes per limb \n\n%ifdef\tPIC\n\tcall    %%Lhere\n%%Lhere: \n    shl     edx,4\n    neg     ecx\n    lea     edx,[edx+ecx*1]\n    add     edx,%%Lentry-%%Lhere\n    add     edx,[esp]\n\tadd\t\tesp,4\n%else\n\tshl     edx,4\n    neg     ecx\n\tlea\t\tedx,[%%Lentry+edx+ecx]\n%endif\n    mov     eax,[esi]\t\t\t;  src low limb \n    mov     [VAR_JUMP],edx\n\tlea\t\tesi,[VAL1+4+esi+ecx*4]\n    mul     ebp\n    add     eax,ebx\t\t\t\t;  initial carry (from _1c) \n    adc     edx,0\n    mov     ebx,edx\t\t\t\t;  high carry \n\tlea\t\tedi,[VAL1+edi+ecx*4]\n    mov     edx,[VAR_JUMP]\n    test    ecx,1\n    mov     ecx,eax\t\t\t\t;  low carry \n\tcmovnz\tecx,ebx\n\tcmovnz\tebx,eax\n    jmp     edx\n\n;  eax scratch \n;  ebx carry hi \n;  ecx carry lo \n;  edx scratch \n;  esi src \n;  edi dst \n;  ebp multiplier \n;\n;  VAR_COUNTER loop counter \n;\n;  15 code bytes per limb \n\n%define\tCHUNK_COUNT\t2 \n\n\talign   32\n%%Ltop: \n\tadd     edi,UNROLL_BYTES\n%%Lentry: \n%assign\tdisp\tVAL2\n%rep\tUNROLL_COUNT/CHUNK_COUNT\n\tmov\t\teax,[byte disp+esi]\n\tmul     ebp\n\t%2\t\t[byte disp+edi],ecx\n    adc     ebx,eax\n    mov     ecx,edx\n    adc     ecx,0\n    mov     eax,[byte disp+4+esi]\n    mul     ebp\n\t%2\t\t[byte disp+4+edi],ebx\n    adc     ecx,eax\n    mov     ebx,edx\n    adc     ebx,0\n%assign\t\tdisp\tdisp+4*CHUNK_COUNT\n%endrep\n\n    dec     dword [VAR_COUNTER]\n    lea     esi,[UNROLL_BYTES+esi]\n    jns     %%Ltop\n\n%assign\tdisp\tUNROLL_BYTES+VAL2\n\t%2\t\t[disp+edi],ecx\n    mov     eax,ebx\n    pop     ebp\n    pop     edi\n    pop     esi\n    pop     ebx\n    adc     eax,0\n    ret\n%endmacro\n\n\tsection .text\n;\tglobal ___gmpn_addmul_1\n;\tglobal ___gmpn_addmul_1c\n\t\n\tmul_fun\t___g,add,mpn_addmul_1,mpn_addmul_1c\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/copyd.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n\tglobal  ___gmpn_copyd \n\n%ifdef\tDLL\n\texport\t___gmpn_copyd\n%endif\n\n%define\tPARAM_SIZE\tesp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n\n%define\tSAVE_ESI\tPARAM_SIZE\n%define\tSAVE_EDI\tPARAM_SRC\n%define\tframe\t\t0 \n\n\tsection .text\n\talign   16\n\n___gmpn_copyd: \n\tmov     ecx,[PARAM_SIZE]\n    mov     [SAVE_ESI],esi\n    mov     esi,[PARAM_SRC]\n    mov     [SAVE_EDI],edi\n    mov     edi,[PARAM_DST]\n    sub     ecx,1\n    jb      Lzero\n    mov     eax,[esi+ecx*4]\t\t\t;  src[size-1] \n    jz      Lone\n    mov     edx,[-4+esi+ecx*4]\t\t;  src[size-2] \n    sub     ecx,2\n    jbe     Ldone_loop              ;  2 or 3 limbs only \n        \n;  The usual overlap is \n;\n;      high                   low \n;      +------------------+ \n;      |               dst| \n;      +------------------+ \n;            +------------------+ \n;            |               src| \n;            +------------------+ \n;\n;  We can use an incrementing copy in the following circumstances. \n;\n;      src+4*size<=dst,since then the regions are disjoint \n;\n;      src==dst,clearly (though this shouldn't occur normally) \n;\n;      src>dst,since in that case it's a requirement of the \n;               parameters that src>=dst+size*4,and hence the \n;               regions are disjoint \n;\n;  eax prev high limb \n;  ebx \n;  ecx counter,size-3 down to 0 or -1,inclusive,by 2s \n;  edx prev low limb \n;  esi src \n;  edi dst \n;  ebp \n\n    lea     edx,[edi+ecx*4]\n    cmp     esi,edi\n    jae     Luse_movsl\t\t\t;  src >= dst \n    cmp     edx,edi\n    mov     edx,[4+esi+ecx*4]\t;  src[size-2] again \n    jbe     Luse_movsl\t\t\t;  src+4*size <= dst \nLtop: \n    mov     [8+edi+ecx*4],eax\n    mov     eax,[esi+ecx*4]\n    mov     [4+edi+ecx*4],edx\n    mov     edx,[-4+esi+ecx*4]\n    sub     ecx,2\n    jnbe    Ltop\nLdone_loop: \n    mov     [8+edi+ecx*4],eax\n    mov     [4+edi+ecx*4],edx\n\n;  copy low limb (needed if size was odd,but will already have been \n;  done in the loop if size was even) \n\n    mov     eax,[esi]\nLone: \n    mov     [edi],eax\n    mov     edi,[SAVE_EDI]\n    mov     esi,[SAVE_ESI]\n\tret\nLuse_movsl: \n    add     ecx,3\n    cld\n    rep\t\tmovsd\nLzero: \n    mov     esi,[SAVE_ESI]\n    mov     edi,[SAVE_EDI]\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/divexact_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n\textern\t___gmp_modlimb_invert_table\n\tglobal  ___gmpn_divexact_1 \n\n%ifdef\tDLL\n\texport\t___gmpn_divexact_1\n%endif\n\n%define\tPARAM_DIVISOR\tesp+frame+16 \n%define PARAM_SIZE      esp+frame+12 \n%define PARAM_SRC       esp+frame+8 \n%define PARAM_DST       esp+frame+4 \n\n%define SAVE_EBX        esp+frame-4 \n%define SAVE_ESI        esp+frame-8 \n%define SAVE_EDI\t\tesp+frame-12 \n%define SAVE_EBP\t\tesp+frame-16 \n%define VAR_INVERSE\t\tesp+frame-20 \n%define STACK_SPACE\t\t20 \n%define frame\t\t\t0 \n\n\tsection .text\n\n\talign   16\n\n___gmpn_divexact_1: \n\tmov     eax,[PARAM_DIVISOR]\n    sub     esp,STACK_SPACE\n\tFR_sesp\tSTACK_SPACE\n    mov     [SAVE_ESI],esi\n    mov     esi,[PARAM_SRC]\n    mov     [SAVE_EBX],ebx\n    mov     ebx,[PARAM_SIZE]\n    bsf     ecx,eax         ;  trailing twos \n    mov     [SAVE_EBP],ebp\n    shr     eax,cl          ;  d without twos \n    mov     edx,eax\n    shr     eax,1           ;  d/2 without twos \n    mov     [PARAM_DIVISOR],edx\n    and     eax,127\n\n%ifdef\tPIC\n    call    Lmovl_eip_ebp\n    add     ebp,_GLOBAL_OFFSET_TABLE_\n    mov     ebp,[___gmp_modlimb_invert_table+edx+ebp]\n    movzx   ebp,byte [eax+ebp]\t\t\t\t\t\t\t;  inv 8 bits \n%else\n\tmovzx   ebp,byte [___gmp_modlimb_invert_table+eax]\t;  inv 8 bits \n%endif\n\n    lea     eax,[ebp+ebp]\t\t;  2*inv \n    imul    ebp,ebp\t\t\t\t;  inv*inv \n    mov     [SAVE_EDI],edi\n    mov     edi,[PARAM_DST]\n    lea     esi,[esi+ebx*4]\t\t;  src end \n    imul    ebp,[PARAM_DIVISOR]\t;  inv*inv*d \n    sub     eax,ebp\t\t\t\t;  inv = 2*inv - inv*inv*d \n    lea     ebp,[eax+eax]\t\t;  2*inv \n    imul    eax,eax\t\t\t\t;  inv*inv \n    lea     edi,[edi+ebx*4]\t\t;  dst end \n    neg     ebx\t\t\t\t\t;  -size \n    mov     [PARAM_DST],edi\n    imul    eax,[PARAM_DIVISOR] ;  inv*inv*d \n    sub     ebp,eax\t\t\t\t;  inv = 2*inv - inv*inv*d \n\n    mov     [VAR_INVERSE],ebp\n    mov     eax,[esi+ebx*4]\t\t;  src[0] \n    or      ecx,ecx\n    jnz     Leven\n    jmp     Lodd_entry\t\t\t;  ecx initial carry is zero \n\n;  The dependent chain here is \n; \n;      subl    %edx,%eax        1 \n;      imull   %ebp,%eax        4 \n;      mull    PARAM_DIVISOR    5 \n;                             ---- \n;        total                 10 \n; \n;  and this is the measured speed.  No special scheduling is necessary,out \n;  of order execution hides the load latency. \n;\n;  eax scratch (src limb) \n;  ebx counter,limbs,negative \n;  ecx carry bit \n;  edx carry limb,high of last product \n;  esi &src[size] \n;  edi &dst[size] \n\nLodd_top: \n    mul     dword [PARAM_DIVISOR]\n    mov     eax,[esi+ebx*4]\n    sub     eax,ecx\n    sbb     ecx,ecx\n    sub     eax,edx\n    sbb     ecx,0\nLodd_entry: \n    imul    eax,[VAR_INVERSE]\n    mov     [edi+ebx*4],eax\n    neg     ecx\n    inc     ebx\n    jnz     Lodd_top\n    mov     esi,[SAVE_ESI]\n    mov     edi,[SAVE_EDI]\n    mov     ebp,[SAVE_EBP]\n    mov     ebx,[SAVE_EBX]\n    add     esp,STACK_SPACE\n    ret\n\n;  eax src[0] \n;  ebx counter,limbs,negative \n;  ecx shift \n\nLeven: \n    xor     ebp,ebp         ;  initial carry bit \n    xor     edx,edx         ;  initial carry limb (for size==1) \n    inc     ebx\n    jz      Leven_one\n    mov     edi,[esi+ebx*4]\t;  src[1] \n\tshrd\teax,edi,cl\n    jmp     Leven_entry\n\n;  eax scratch \n;  ebx counter,limbs,negative \n;  ecx shift \n;  edx scratch \n;  esi &src[size] \n;  edi &dst[size] and scratch \n;  ebp carry bit \n\nLeven_top: \n    mov     edi,[esi+ebx*4]\n    mul     dword [PARAM_DIVISOR]\n    mov     eax,[-4+esi+ebx*4]\n\tshrd\teax,edi,cl\n    sub     eax,ebp\n    sbb     ebp,ebp\n    sub     eax,edx\n    sbb     ebp,0\n\nLeven_entry: \n    imul    eax,[VAR_INVERSE]\n    mov     edi,[PARAM_DST]\n    neg     ebp\n    mov     [-4+edi+ebx*4],eax\n    inc     ebx\n    jnz     Leven_top\n    mul     dword [PARAM_DIVISOR]\n    mov     eax,[-4+esi]\nLeven_one: \n    shr     eax,cl\n    mov     esi,[SAVE_ESI]\n    sub     eax,ebp\n    mov     ebp,[SAVE_EBP]\n    sub     eax,edx\n    mov     ebx,[SAVE_EBX]\n    imul    eax,[VAR_INVERSE]\n    mov     [-4+edi],eax\n    mov     edi,[SAVE_EDI]\n    add     esp,STACK_SPACE\n    ret\n\n%ifdef\tPIC\nLmovl_eip_ebp: \n    mov     ebp,[esp]\n    ret\n%endif\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/divexact_by3c.asm",
    "content": "\n;  Copyright 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n\tglobal  ___gmpn_divexact_by3c \n\n%ifdef\tDLL\n\texport\t___gmpn_divexact_by3c\n%endif\n\n%define\tPARAM_CARRY esp+frame+16 \n%define PARAM_SIZE  esp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n%define\tframe\t\t0 \n\n;   multiplicative inverse of 3,modulo 2^32 \n;   ceil(b/3),ceil(b*2/3) and floor(b*2/3) where b=2^32 \n%define\tINVERSE_3\t\t-0x55555555\n%define\tONE_THIRD_CEIL\t\t0x55555556\n%define\tTWO_THIRDS_CEIL\t\t0xAAAAAAAB\n%define\tTWO_THIRDS_FLOOR\t0xAAAAAAAA\n\n\tsection .text\n\n\talign   8   \n\n___gmpn_divexact_by3c: \n    mov     ecx,[PARAM_SRC]\n    mov     edx,[PARAM_SIZE]\n    dec     edx\n    jnz     Ltwo_or_more\n    mov     edx,[ecx]\n    mov     eax,[PARAM_CARRY]\t\t\t;  risk of cache bank clash here \n    mov     ecx,[PARAM_DST]\n    sub     edx,eax\n    sbb     eax,eax\t\t\t\t\t\t;  0 or -1 \n    imul    edx,edx,INVERSE_3\n    neg     eax\t\t\t\t\t\t\t;  0 or 1 \n    cmp     edx,ONE_THIRD_CEIL\n    sbb     eax,-1\t\t\t\t\t\t;  +1 if edx>=ceil(b/3) \n    cmp     edx,TWO_THIRDS_CEIL\n    sbb     eax,-1\t\t\t\t\t\t;  +1 if edx>=ceil(b*2/3) \n    mov     [ecx],edx\n    ret\n\n;  eax \n;  ebx \n;  ecx src \n;  edx size-1 \n;  esi \n;  edi \n;  ebp \n\nLtwo_or_more: \n\tFR_push\tebx\n\tFR_push\tesi\n\tFR_push\tedi\n\tFR_push\tebp\n    mov     edi,[PARAM_DST]\n    mov     esi,[PARAM_CARRY]\n    mov     eax,[ecx]\t\t\t\t;  src low limb \n    xor     ebx,ebx\n\tsub     eax,esi\n    mov     esi,TWO_THIRDS_FLOOR\n    lea     ecx,[ecx+edx*4]\t\t\t;  &src[size-1] \n    lea     edi,[edi+edx*4]\t\t\t;  &dst[size-1] \n    adc     ebx,0\t\t\t\t\t;  carry,0 or 1 \n    neg     edx\t\t\t\t\t\t;  -(size-1) \n\n;  The loop needs a source limb ready at the top,which leads to one limb \n;  handled separately at the end,and the special case above for size==1. \n;  There doesn't seem to be any scheduling that would keep the speed but move \n;  the source load and carry subtract up to the top. \n; \n;  The destination cache line prefetching adds 1 cycle to the loop but is \n;  considered worthwhile.  The slowdown is a factor of 1.07,but will prevent \n;  repeated write-throughs if the destination isn't in L1.  A version using \n;  an outer loop to prefetch only every 8 limbs (a cache line) proved to be \n;  no faster,due to unavoidable branch mispreditions in the inner loop. \n; \n;  setc is 2 cycles on P54,so an adcl is used instead.  If the movl $0,%ebx \n;  could be avoided then the src limb fetch could pair up and save a cycle. \n;  This would probably mean going to a two limb loop with the carry limb \n;  alternately positive or negative,since an sbbl %ebx,%ebx will leave a \n;  value which is in the opposite sense to the preceding sbbl/adcl %ebx,%eax. \n; \n;  A register is used for TWO_THIRDS_FLOOR because a cmp can't be done as \n;  \"cmpl %edx,$n\" with the immediate as the second operand. \n; \n;  The \"4\" source displacement is in the loop rather than the setup because \n;  this gets Ltop aligned to 8 bytes at no cost. \n\n;  eax source limb,carry subtracted \n;  ebx carry (0 or 1) \n;  ecx &src[size-1] \n;  edx counter,limbs,negative \n;  esi TWO_THIRDS_FLOOR \n;  edi &dst[size-1] \n;  ebp scratch (result limb) \n\n\talign   8\nLtop: \n    imul    ebp,eax,INVERSE_3\n    cmp     ebp,ONE_THIRD_CEIL\n    mov     eax,[edi+edx*4]\t\t;  dst cache line prefetch \n    sbb     ebx,-1\t\t\t\t;  +1 if ebp>=ceil(b/3) \n    cmp     esi,ebp\n    mov     eax,[4+ecx+edx*4]\t;  next src limb \n    sbb     eax,ebx\t\t\t\t;  and further -1 if ebp>=ceil(b*2/3) \n    mov     ebx,0\n    adc     ebx,0\t\t\t\t;  new carry \n    mov     [edi+edx*4],ebp\n    inc     edx\n    jnz     Ltop\n    imul    edx,eax,INVERSE_3\n    cmp     edx,ONE_THIRD_CEIL\n    mov     [edi],edx\n    sbb     ebx,-1\t\t\t\t;  +1 if edx>=ceil(b/3) \n    cmp     edx,TWO_THIRDS_CEIL\n    sbb     ebx,-1\t\t\t\t;  +1 if edx>=ceil(b*2/3) \n    pop     ebp\n    mov     eax,ebx\n    pop     edi\n    pop     esi\n    pop     ebx\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/mmx/divrem_1.asm",
    "content": "\n;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\" \n\n\tglobal  ___gmpn_preinv_divrem_1 \n    global  ___gmpn_divrem_1c \n    global  ___gmpn_divrem_1 \n\n%ifdef\tDLL\n\texport\t___gmpn_divrem_1c\n\texport\t___gmpn_divrem_1\n%endif\n\n%define\tMUL_THRESHOLD\t\t4 \n%define\tPARAM_PREINV_SHIFT      esp+frame+28 \n%define PARAM_PREINV_INVERSE\tesp+frame+24 \n%define PARAM_CARRY     esp+frame+24 \n%define PARAM_DIVISOR   esp+frame+20 \n%define PARAM_SIZE      esp+frame+16 \n%define PARAM_SRC       esp+frame+12 \n%define PARAM_XSIZE     esp+frame+8 \n%define PARAM_DST       esp+frame+4 \n\n%define SAVE_EBX        esp+frame-4 \n%define SAVE_ESI        esp+frame-8 \n%define SAVE_EDI        esp+frame-12 \n%define SAVE_EBP        esp+frame-16 \n\n%define VAR_NORM        esp+frame-20 \n%define VAR_INVERSE     esp+frame-24 \n%define VAR_SRC\t\t\tesp+frame-28 \n%define VAR_DST\t\t\tesp+frame-32 \n%define VAR_DST_STOP    esp+frame-36 \n%define STACK_SPACE\t\t36 \n%define frame\t\t\t0 \n\n\tsection .text\n\n\talign   16   \n\n___gmpn_preinv_divrem_1: \n    mov     ecx,[PARAM_XSIZE]\n    sub     esp,STACK_SPACE\n\tFR_sesp\tSTACK_SPACE\n    mov     [SAVE_ESI],esi\n    mov     esi,[PARAM_SRC]\n    mov     [SAVE_EBX],ebx\n    mov     ebx,[PARAM_SIZE]\n    mov     [SAVE_EBP],ebp\n    mov     ebp,[PARAM_DIVISOR]\n    mov     [SAVE_EDI],edi\n    mov     edx,[PARAM_DST]\n    mov     eax,[-4+esi+ebx*4]\t;  src high limb \n    xor     edi,edi\t\t\t\t;  initial carry (if can't skip a div) \n\tlea     edx,[8+edx+ecx*4]\t;  &dst[xsize+2] \n\txor     ecx,ecx\n    mov     [VAR_DST_STOP],edx\t;  &dst[xsize+2] \n    cmp     eax,ebp\t\t\t\t;  high cmp divisor \n\tcmovc\tedi,eax\n\tcmovnc\tecx,eax\t\t\t\t;  (the latter in case src==dst) \n    mov     [-12+edx+ebx*4],ecx\t;  dst high limb \n\tsbb     ebx,0\t\t\t\t;  skip one division if high<divisor \n    mov     ecx,[PARAM_PREINV_SHIFT]\n    lea     edx,[-8+edx+ebx*4]\t;  &dst[xsize+size] \n    mov     eax,32\n    mov     [VAR_DST],edx\t\t;  &dst[xsize+size] \n    shl     ebp,cl\t\t\t\t;  d normalized \n    sub     eax,ecx\n    mov     [VAR_NORM],ecx\n    movd    mm7,eax\t\t\t\t;  rshift \n    mov     eax,[PARAM_PREINV_INVERSE]\n    jmp     Lstart_preinv\n\n\talign   16\n\n%define       frame   0 \n\n___gmpn_divrem_1c: \n    mov     edx,[PARAM_CARRY]\n    mov     ecx,[PARAM_SIZE]\n    sub     esp,STACK_SPACE\n%define frame   STACK_SPACE \n    mov     [SAVE_EBX],ebx\n    mov     ebx,[PARAM_XSIZE]\n    mov     [SAVE_EDI],edi\n    mov     edi,[PARAM_DST]\n    mov     [SAVE_EBP],ebp\n    mov     ebp,[PARAM_DIVISOR]\n    mov     [SAVE_ESI],esi\n    mov     esi,[PARAM_SRC]\n    lea     edi,[-4+edi+ebx*4]\n    jmp     Lstart_1c\n\n;  offset 0x31,close enough to aligned \n\n%define       frame   0 \n\n___gmpn_divrem_1: \n    mov     ecx,[PARAM_SIZE]\n    mov     edx,0\t\t\t\t;  initial carry (if can't skip a div) \n    sub     esp,STACK_SPACE\n%define frame   STACK_SPACE \n    mov     [SAVE_EBP],ebp\n    mov     ebp,[PARAM_DIVISOR]\n    mov     [SAVE_EBX],ebx\n    mov     ebx,[PARAM_XSIZE]\n    mov     [SAVE_ESI],esi\n    mov     esi,[PARAM_SRC]\n    or      ecx,ecx\t\t\t\t;  size \n    mov     [SAVE_EDI],edi\n    mov     edi,[PARAM_DST]\n    lea     edi,[-4+edi+ebx*4]\t;  &dst[xsize-1] \n    jz      Lno_skip_div\t\t;  if size==0 \n    mov     eax,[-4+esi+ecx*4]\t;  src high limb \n    xor     esi,esi\n    cmp     eax,ebp\t\t\t\t;  high cmp divisor \n\tcmovc\tedx,eax\n\tcmovnc\tesi,eax\t\t\t\t;  (the latter in case src==dst) \n    mov     [edi+ecx*4],esi\t\t;  dst high limb \n    sbb     ecx,0\t\t\t\t;  size-1 if high<divisor \n    mov     esi,[PARAM_SRC]\t\t;  reload \nLno_skip_div: \n\n;  eax  \n;  ebx xsize \n;  ecx size \n;  edx carry \n;  esi src \n;  edi &dst[xsize-1] \n;  ebp divisor \n\nLstart_1c: \n    lea     eax,[ebx+ecx]\t\t;  size+xsize \n    cmp     eax,MUL_THRESHOLD\n    jae     Lmul_by_inverse\n    or      ecx,ecx\n    jz      Ldivide_no_integer\n\n;  eax scratch (quotient) \n;  ebx xsize \n;  ecx counter \n;  edx scratch (remainder) \n;  esi src \n;  edi &dst[xsize-1] \n;  ebp divisor \n\nLdivide_integer: \n    mov     eax,[-4+esi+ecx*4]\n    div     ebp\n    mov     [edi+ecx*4],eax\n    dec     ecx\n    jnz     Ldivide_integer\nLdivide_no_integer: \n    mov     edi,[PARAM_DST]\n    or      ebx,ebx\n    jnz     Ldivide_fraction\nLdivide_done: \n    mov     esi,[SAVE_ESI]\n    mov     edi,[SAVE_EDI]\n    mov     ebx,[SAVE_EBX]\n    mov     eax,edx\n    mov     ebp,[SAVE_EBP]\n    add     esp,STACK_SPACE\n    ret\n\n;  eax scratch (quotient) \n;  ebx counter \n;  ecx \n;  edx scratch (remainder) \n;  esi \n;  edi dst \n;  ebp divisor \n\nLdivide_fraction: \n    mov     eax,0\n    div     ebp\n    mov     [-4+edi+ebx*4],eax\n    dec     ebx\n    jnz     Ldivide_fraction\n    jmp     Ldivide_done\n\n;  eax \n;  ebx xsize \n;  ecx size \n;  edx carry \n;  esi src \n;  edi &dst[xsize-1] \n;  ebp divisor \n\nLmul_by_inverse: \n    lea     ebx,[12+edi]   ;  &dst[xsize+2],loop dst stop \n    mov     [VAR_DST_STOP],ebx\n    lea     edi,[4+edi+ecx*4] ;  &dst[xsize+size] \n    mov     [VAR_DST],edi\n    mov     ebx,ecx         ;  size \n    bsr     ecx,ebp         ;  31-l \n    mov     edi,edx         ;  carry \n    lea     eax,[1+ecx]    ;  32-l \n    xor     ecx,31         ;  l \n    mov     [VAR_NORM],ecx\n    mov     edx,-1\n    shl     ebp,cl          ;  d normalized \n    movd    mm7,eax\n    mov     eax,-1\n    sub     edx,ebp         ;  (b-d)-1 giving edx:eax = b*(b-d)-1 \n    div     ebp             ;  floor (b*(b-d)-1) / d \n\n;  eax inverse \n;  ebx size \n;  ecx shift \n;  edx \n;  esi src \n;  edi carry \n;  ebp divisor \n;\n;  mm7 rshift \n\nLstart_preinv: \n    mov     [VAR_INVERSE],eax\n    or      ebx,ebx         ;  size \n    lea     eax,[-12+esi+ebx*4] ;  &src[size-3] \n    mov     [VAR_SRC],eax\n    jz      Lstart_zero\n    mov     esi,[8+eax]    ;  src high limb \n    cmp     ebx,1\n    jz      Lstart_one\nLstart_two_or_more: \n    mov     edx,[4+eax]    ;  src second highest limb \n\tshld\tedi,esi,cl\n\tshld\tesi,edx,cl\n    cmp     ebx,2\n    je      Linteger_two_left\n    jmp     Linteger_top\n\nLstart_one: \n\tshld\tedi,esi,cl\n    shl     esi,cl          ;  n10 = high << l \n    jmp     Linteger_one_left\n\nLstart_zero: \n;  Can be here with xsize==0 if mpn_preinv_divrem_1 had size==1 and \n;  skipped a division. \n\n    shl     edi,cl          ;  n2 = carry << l \n    mov     eax,edi         ;  return value for zero_done \n    cmp     [PARAM_XSIZE],dword 0\n    je      Lzero_done\n    jmp     Lfraction_some\n\n;  This loop runs at about 25 cycles,which is probably sub-optimal,and \n;  certainly more than the dependent chain would suggest.  A better loop,or \n;  a better rough analysis of what's possible,would be welcomed. \n; \n;  In the current implementation,the following successively dependent \n;  micro-ops seem to exist. \n; \n;                     uops \n;              n2+n1   1   (addl) \n;              mul     5 \n;              q1+1    3   (addl/adcl) \n;              mul     5 \n;              sub     3   (subl/sbbl) \n;              addback 2   (cmov) \n;                     --- \n;                     19 \n; \n;  Lack of registers hinders explicit scheduling and it might be that the \n;  normal out of order execution isn't able to hide enough under the mul \n;  latencies. \n; \n;  Using sarl/negl to pick out n1 for the n2+n1 stage is a touch faster than \n;  cmov (and takes one uop off the dependent chain).  A sarl/andl/addl \n;  combination was tried for the addback (despite the fact it would lengthen \n;  the dependent chain) but found to be no faster. \n\n;  eax scratch \n;  ebx scratch (nadj,q1) \n;  ecx scratch (src,dst) \n;  edx scratch \n;  esi n10 \n;  edi n2 \n;  ebp d \n;\n;  mm0 scratch (src qword) \n;  mm7 rshift for normalization \n\n\talign   16\nLinteger_top: \n    mov     eax,esi\n    mov     ebx,ebp\n    sar     eax,31\t\t\t\t;  -n1 \n    mov     ecx,[VAR_SRC]\n    and     ebx,eax\t\t\t\t;  -n1 & d \n    neg     eax\t\t\t\t\t;  n1 \n    add     ebx,esi\t\t\t\t;  nadj = n10 + (-n1 & d),ignoring overflow \n    add     eax,edi\t\t\t\t;  n2+n1 \n    movq    mm0,[ecx]\t\t\t;  next src limb and the one below it \n    mul     dword [VAR_INVERSE] ;  m*(n2+n1) \n    sub     ecx,4\n    mov     [VAR_SRC],ecx\n    add     eax,ebx\t\t\t\t;  m*(n2+n1) + nadj,low giving carry flag \n    mov     eax,ebp\t\t\t\t;  d \n    lea     ebx,[1+edi]\t\t\t;  n2+1 \n    adc     ebx,edx\t\t\t\t;  1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 \n    jz      Lq1_ff\n    mul     ebx\t\t\t\t\t;  (q1+1)*d \n    mov     ecx,[VAR_DST]\n    psrlq   mm0,mm7\n    sub     esi,eax\n    mov     eax,[VAR_DST_STOP]\n    sbb     edi,edx\t\t\t\t;  n - (q1+1)*d \n    mov     edi,esi\t\t\t\t;  remainder -> n2 \n    lea     edx,[ebp+esi]\n\tcmovc\tedi,edx\n    movd    esi,mm0\n    sbb     ebx,0    ;  q \n    sub     ecx,4\n    mov     [ecx],ebx\n    cmp     ecx,eax\n    mov     [VAR_DST],ecx\n    jne     Linteger_top\nLinteger_loop_done: \n \n;  Here,and in integer_one_left below,an sbbl $0 is used rather than a jz \n;  q1_ff special case.  This make the code a bit smaller and simpler,and \n;  costs only 2 cycles (each). \n\n;  eax scratch \n;  ebx scratch (nadj,q1) \n;  ecx scratch (src,dst) \n;  edx scratch \n;  esi n10 \n;  edi n2 \n;  ebp divisor \n;\n;  mm7 rshift \n\nLinteger_two_left: \n    mov     eax,esi\n    mov     ebx,ebp\n    sar     eax,31\t\t\t\t;  -n1 \n    mov     ecx,[PARAM_SRC]\n    and     ebx,eax\t\t\t\t;  -n1 & d \n    neg     eax\t\t\t\t\t;  n1 \n    add     ebx,esi\t\t\t\t;  nadj = n10 + (-n1 & d),ignoring overflow \n    add     eax,edi\t\t\t\t;  n2+n1 \n    mul     dword [VAR_INVERSE] ;  m*(n2+n1) \n    movd    mm0,[ecx]\t\t\t;  src low limb \n    mov     ecx,[VAR_DST_STOP]\n    add     eax,ebx\t\t\t\t;  m*(n2+n1) + nadj,low giving carry flag \n    lea     ebx,[1+edi]\t\t\t;  n2+1 \n    mov     eax,ebp\t\t\t\t;  d \n    adc     ebx,edx\t\t\t\t;  1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 \n    sbb     ebx,0\n    mul     ebx\t\t\t\t\t;  (q1+1)*d \n    psllq   mm0,32\n    psrlq   mm0,mm7\n    sub     esi,eax\n    sbb     edi,edx\t\t\t\t;  n - (q1+1)*d \n    mov     edi,esi\t\t\t\t;  remainder -> n2 \n    lea     edx,[ebp+esi]\n\tcmovc\tedi,edx\n    movd    esi,mm0\n    sbb     ebx,0\t\t\t\t;  q \n    mov     [-4+ecx],ebx\n\n;  eax scratch \n;  ebx scratch (nadj,q1) \n;  ecx scratch (dst) \n;  edx scratch \n;  esi n10 \n;  edi n2 \n;  ebp divisor \n;\n;  mm7 rshift \n\nLinteger_one_left: \n    mov     eax,esi\n    mov     ebx,ebp\n    sar     eax,31\t\t\t\t;  -n1 \n    mov     ecx,[VAR_DST_STOP]\n    and     ebx,eax\t\t\t\t;  -n1 & d \n    neg     eax\t\t\t\t\t;  n1 \n    add     ebx,esi\t\t\t\t;  nadj = n10 + (-n1 & d),ignoring overflow \n    add     eax,edi\t\t\t\t;  n2+n1 \n    mul     dword [VAR_INVERSE]\t;  m*(n2+n1) \n    add     eax,ebx\t\t\t\t;  m*(n2+n1) + nadj,low giving carry flag \n    lea     ebx,[1+edi]\t\t\t;  n2+1 \n    mov     eax,ebp\t\t\t\t;  d \n    adc     ebx,edx\t\t\t\t;  1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 \n    sbb     ebx,0\t\t\t\t;  q1 if q1+1 overflowed \n    mul     ebx\n    sub     esi,eax\n    mov     eax,[PARAM_XSIZE]\n    sbb     edi,edx\t\t\t\t;  n - (q1+1)*d \n    mov     edi,esi\t\t\t\t;  remainder -> n2 \n    lea     edx,[ebp+esi]\n\tcmovc\tedi,edx\n    sbb     ebx,0\t\t\t\t;  q \n    mov     [-8+ecx],ebx\n    sub     ecx,8\n    or      eax,eax\t\t\t\t;  xsize \n    jnz     Lfraction_some\n    mov     eax,edi\nLfraction_done: \n    mov     ecx,[VAR_NORM]\nLzero_done: \n    mov     ebp,[SAVE_EBP]\n    mov     edi,[SAVE_EDI]\n    mov     esi,[SAVE_ESI]\n    mov     ebx,[SAVE_EBX]\n    add     esp,STACK_SPACE\n    shr     eax,cl\n    emms\n    ret\n\n;  Special case for q1=0xFFFFFFFF,giving q=0xFFFFFFFF meaning the low dword \n;  of q*d is simply -d and the remainder n-q*d = n10+d \n;\n;  eax (divisor) \n;  ebx (q1+1 == 0) \n;  ecx \n;  edx \n;  esi n10 \n;  edi n2 \n;  ebp divisor \n\nLq1_ff: \n    mov     ecx,[VAR_DST]\n    mov     edx,[VAR_DST_STOP]\n    sub     ecx,4\n    mov     [VAR_DST],ecx\n    psrlq   mm0,mm7\n    lea     edi,[ebp+esi]\t\t;  n-q*d remainder -> next n2 \n    mov     [ecx],dword -1\n    movd    esi,mm0\t\t\t\t;  next n10 \n    cmp     edx,ecx\n    jne     Linteger_top\n    jmp     Linteger_loop_done\n\n; \n;  In the current implementation,the following successively dependent \n;  micro-ops seem to exist. \n; \n;                     uops \n;              mul     5 \n;              q1+1    1   (addl) \n;              mul     5 \n;              sub     3   (negl/sbbl) \n;              addback 2   (cmov) \n;                     --- \n;                     16 \n; \n;  The loop in fact runs at about 17.5 cycles.  Using a sarl/andl/addl for \n;  the addback was found to be a touch slower. \n\n;  eax \n;  ebx \n;  ecx \n;  edx \n;  esi \n;  edi carry \n;  ebp divisor \n\n\talign   16\nLfraction_some: \n    mov     esi,[PARAM_DST]\n    mov     ecx,[VAR_DST_STOP]\t;  &dst[xsize+2] \n    mov     eax,edi\n    sub     ecx,8\t\t\t\t;  &dst[xsize] \n\n;  eax n2,then scratch \n;  ebx scratch (nadj,q1) \n;  ecx dst,decrementing \n;  edx scratch \n;  esi dst stop point \n;  edi n2 \n;  ebp divisor \n\n\talign   16\nLfraction_top: \n    mul     dword [VAR_INVERSE]\t;  m*n2 \n    mov     eax,ebp\t\t\t\t;  d \n    sub     ecx,4\t\t\t\t;  dst \n    lea     ebx,[edi+1]\n    add     ebx,edx\t\t\t\t;  1 + high(n2<<32 + m*n2) = q1+1 \n    mul     ebx\t\t\t\t\t;  (q1+1)*d \n    neg     eax\t\t\t\t\t;  low of n - (q1+1)*d \n    sbb     edi,edx\t\t\t\t;  high of n - (q1+1)*d,caring only about carry \n    lea     edx,[ebp+eax]\n\tcmovc\teax,edx\n    sbb     ebx,0\t\t\t\t;  q \n    mov     edi,eax\t\t\t\t;  remainder->n2 \n    cmp     ecx,esi\n    mov     [ecx],ebx\t\t\t;  previous q \n    jne     Lfraction_top\n    jmp     Lfraction_done\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/mmx/hamdist.asm",
    "content": "\n;  Copyright 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\" \n\n%define       REG_AAAAAAAAAAAAAAAA    mm7 \n%define       REG_3333333333333333    mm6 \n%define       REG_0F0F0F0F0F0F0F0F    mm5 \n%define       REG_000000FF000000FF    mm4 \n\n%ifndef\tPIC\n\n\tsection\t.data\n\talign   8\n\nLrodata_AAAAAAAAAAAAAAAA: \n    dd      0AAAAAAAAh\n    dd      0AAAAAAAAh\n\nLrodata_3333333333333333: \n    dd      033333333h\n    dd      033333333h\n\nLrodata_0F0F0F0F0F0F0F0F: \n    dd      00F0F0F0Fh\n    dd      00F0F0F0Fh\n\nLrodata_000000FF000000FF: \n    dd      0000000FFh\n    dd      0000000FFh\n\n%endif\n\n%macro\tph_fun 3\n\n\tsection .text\n\talign   32\n\tglobal\t%1%2\n\n%ifdef\tDLL\n\texport\t%1%2\n%endif\n\n%if\t%3 == 0\n%ifdef\tPIC\n\tnop\t\t;  avoid shrl crossing a 32-byte boundary \n%endif\n%endif\n\n%1%2: \n\tmov     ecx, [PARAM_SIZE]\n%ifdef\tPIC\n    mov     eax, 0xAAAAAAAA\n    mov     edx, 0x33333333\n    movd    mm7,eax\n    movd    mm6,edx\n    mov     eax, 0x0F0F0F0F\n    mov     edx, 0x000000FF\n    punpckldq mm7,mm7\n    punpckldq mm6,mm6\n    movd    mm5,eax\n    movd    mm4,edx\n    punpckldq mm5,mm5\n    punpckldq mm4,mm4\n%else\n    movq    mm7,[Lrodata_AAAAAAAAAAAAAAAA]\n    movq    mm6,[Lrodata_3333333333333333]\n    movq    mm5,[Lrodata_0F0F0F0F0F0F0F0F]\n    movq    mm4,[Lrodata_000000FF000000FF]\n%endif\n\tmov     eax,[PARAM_SRC]\n%if\t%3 == 1\n\tmov\t\tedx,[PARAM_SRC2]\n%endif\n    pxor    mm2,mm2\n\tshr     ecx,1\n\tjnc     %%Ltop\n\tmovd\tmm1,[eax+ecx*8]\t; Zdisp(  movd,0,(%eax,%ecx,8),%mm1)\n%if\t%3 == 1\n\tmovd\tmm0,[edx+ecx*8]\t; Zdisp(  movd,0,(%edx,%ecx,8),%mm0)\"\n    pxor    mm1,mm0\n%endif\n    inc     ecx\n    jmp     %%Lloaded\n\n\talign   16\n%if\t%3 == 0\n\tnop\n%endif\n\n;  eax src \n;  ebx \n;  ecx counter,qwords,decrementing \n;  edx [hamdist] src2 \n; \n;  mm0 (scratch) \n;  mm1 (scratch) \n;  mm2 total (low dword) \n;  mm3 \n;  mm4 \\ \n;  mm5 | special constants \n;  mm6 | \n;  mm7 / \n\n%%Ltop: \n\tmovq    mm1,[eax+ecx*8-8]\n%if %3 == 1\n\tpxor    mm1,[edx+ecx*8-8]\n%endif\n%%Lloaded: \n    movq    mm0,mm1\n    pand    mm1,REG_AAAAAAAAAAAAAAAA\n    psrlq   mm1,1\n%if\t%3 == 1\n\tnop\n%endif\n    psubd   mm0,mm1\t\t\t\t\t\t;  bit pairs \n%if %3 == 1\n\tnop\n%endif\n    movq    mm1,mm0\n    psrlq   mm0,2\n    pand    mm0,REG_3333333333333333\n    pand    mm1,REG_3333333333333333\n    paddd   mm0,mm1\t\t\t\t\t\t;  nibbles \n    movq    mm1,mm0\n    psrlq   mm0,4\n    pand    mm0,REG_0F0F0F0F0F0F0F0F\n    pand    mm1,REG_0F0F0F0F0F0F0F0F\n    paddd   mm0,mm1\t\t\t\t\t\t;  bytes \n    movq    mm1,mm0\n    psrlq   mm0,8\n    paddb   mm0,mm1\t\t\t\t\t\t;  words \n    movq    mm1,mm0\n    psrlq   mm0,16\n    paddd   mm0,mm1\t\t\t\t\t\t;  dwords \n    pand    mm0,REG_000000FF000000FF\n    paddd   mm2,mm0\t\t\t\t\t\t;  low to total \n    psrlq   mm0,32\n    paddd   mm2,mm0\t\t\t\t\t\t;  high to total \n    loop    %%Ltop\n    movd    eax,mm2\n    emms\n    ret\n%endmacro\n\n%define\tPARAM_SIZE  esp+frame+12 \n%define PARAM_SRC2  esp+frame+8 \n%define PARAM_SRC   esp+frame+4 \n%define\tframe\t\t0\n\n;\tglobal ___gmpn_hamdist\n\t\n\tph_fun\t___g,mpn_hamdist,1\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/mmx/lshift.asm",
    "content": "\n;  Copyright 2001 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\" \n\n\tglobal\t___gmpn_lshift \n\n%ifdef\tDLL\n\texport\t___gmpn_lshift\n%endif\n\n%define\tPARAM_SHIFT esp+frame+16 \n%define PARAM_SIZE  esp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n%define\tframe\t\t8 \n\n;   minimum 5,because the unrolled loop can't handle less \n%define       UNROLL_THRESHOLD  5 \n\n\tsection .text\n\talign   8\n\n___gmpn_lshift: \n    push    ebx\n    push    edi\n    mov     eax,[PARAM_SIZE]\n    mov     edx,[PARAM_DST]\n    mov     ebx,[PARAM_SRC]\n    mov     ecx,[PARAM_SHIFT]\n\tcmp     eax,UNROLL_THRESHOLD\n    jae     Lunroll\n    mov     edi,[-4+ebx+eax*4]\t;  src high limb \n    dec     eax\n    jnz     Lsimple\n\tshld\teax,edi,cl\n    shl     edi,cl\n    mov     [edx],edi\t\t\t;  dst low limb \n    pop     edi\t\t\t\t\t;  risk of data cache bank clash \n    pop     ebx\n    ret\n\n;  eax size-1 \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\nLsimple: \n    movd    mm5,[ebx+eax*4] ;  src high limb \n    movd    mm6,ecx         ;  lshift \n    neg     ecx\n    psllq   mm5,mm6\n    add     ecx,32\n    movd    mm7,ecx\n    psrlq   mm5,32          ;  retval \n\n;  eax counter,limbs,negative \n;  ebx src \n;  ecx \n;  edx dst \n;  esi \n;  edi \n; \n;  mm0 scratch \n;  mm5 return value \n;  mm6 shift \n;  mm7 32-shift \n\nLsimple_top: \n    movq    mm0,[ebx+eax*4-4]\n    dec     eax\n    psrlq   mm0,mm7\n    movd    [4+edx+eax*4],mm0\n    jnz     Lsimple_top\n    movd    mm0,[ebx]\n    movd    eax,mm5\n    psllq   mm0,mm6\n    pop     edi\n    pop     ebx\n    movd    [edx],mm0\n    emms\n    ret\n\n;  eax size \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\n\talign   8\nLunroll: \n    movd    mm5,[ebx+eax*4-4]\t;  src high limb \n    lea     edi,[ebx+eax*4]\n    movd    mm6,ecx\t\t\t\t;  lshift \n    and     edi,4\n    psllq   mm5,mm6\n    jz      Lstart_src_aligned\n\n;  src isn't aligned,process high limb separately (marked xxx) to \n;  make it so. \n; \n;   source     -8(ebx,%eax,4) \n;                   | \n;   +-------+-------+-------+-- \n;   |               |           \n;   +-------+-------+-------+-- \n;         0mod8   4mod8   0mod8 \n; \n;   dest \n;      -4(edx,%eax,4) \n;           | \n;   +-------+-------+-- \n;   |  xxx  |       |   \n;   +-------+-------+-- \n\n    movq    mm0,[ebx+eax*4-8]\t\t;  unaligned load \n    psllq   mm0,mm6\n    dec     eax\n    psrlq   mm0,32\n    movd    [edx+eax*4],mm0\nLstart_src_aligned: \n    movq    mm1,[ebx+eax*4-8]\t\t;  src high qword \n    lea     edi,[edx+eax*4]\n    and     edi,4\n    psrlq   mm5,32\t\t\t\t\t;  return value \n    movq    mm3,[ebx+eax*4-16]\t\t;  src second highest qword \n    jz      Lstart_dst_aligned\n\n;  dst isn't aligned,subtract 4 to make it so,and pretend the shift \n;  is 32 bits extra.  High limb of dst (marked xxx) handled here \n;  separately. \n; \n;   source     -8(ebx,%eax,4) \n;                   | \n;   +-------+-------+-- \n;   |      mm1      |   \n;   +-------+-------+-- \n;                 0mod8   4mod8 \n; \n;   dest \n;      -4(edx,%eax,4) \n;           | \n;   +-------+-------+-------+-- \n;   |  xxx  |               | \n;   +-------+-------+-------+-- \n;         0mod8   4mod8   0mod8 \n\n    movq    mm0,mm1\n    add     ecx,32         ;  new shift \n    psllq   mm0,mm6\n    movd    mm6,ecx\n    psrlq   mm0,32\n\n;  wasted cycle here waiting for %mm0 \n\n    movd    [-4+edx+eax*4],mm0\n    sub     edx,4\nLstart_dst_aligned: \n\n    psllq   mm1,mm6\n    neg     ecx\t\t\t\t;  -shift \n    add     ecx,64\t\t\t;  64-shift \n    movq    mm2,mm3\n    movd    mm7,ecx\n    sub     eax,8\t\t\t;  size-8 \n    psrlq   mm3,mm7\n    por     mm3,mm1         ;  mm3 ready to store \n    jc      Lfinish\n\n;  The comments in mpn_rshift apply here too. \n\n;  eax counter,limbs \n;  ebx src \n;  ecx \n;  edx dst \n;  esi \n;  edi \n; \n;  mm0 \n;  mm1 \n;  mm2 src qword from 16(%ebx,%eax,4) \n;  mm3 dst qword ready to store to 24(%edx,%eax,4) \n; \n;  mm5 return value \n;  mm6 lshift \n;  mm7 rshift \n\n\talign   8\nLunroll_loop: \n    movq    mm0,[ebx+eax*4+8]\n    psllq   mm2,mm6\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    movq    [24+edx+eax*4],mm3\n    por     mm0,mm2\n    movq    mm3,[ebx+eax*4]\n    psllq   mm1,mm6\n    movq    [16+edx+eax*4],mm0\n    movq    mm2,mm3 \n\tpsrlq   mm3,mm7\n    sub     eax,4\n\tpor     mm3,mm1\n    jnc     Lunroll_loop\nLfinish: \n;  eax -4 to -1 representing respectively 0 to 3 limbs remaining \n\n    test    al,2\n    jz      Lfinish_no_two\n    movq    mm0,[ebx+eax*4+8]\n    psllq   mm2,mm6\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    movq    [24+edx+eax*4],mm3  ;  prev \n    por     mm0,mm2\n    movq    mm2,mm1\n    movq    mm3,mm0\n    sub     eax,2\nLfinish_no_two: \n\n;  eax -4 or -3 representing respectively 0 or 1 limbs remaining \n;  mm2 src prev qword,from 16(%ebx,%eax,4) \n;  mm3 dst qword,for 24(%edx,%eax,4) \n\n    test    al,1\n    movd    eax,mm5  ;  retval \n    pop     edi\n    jz      Lfinish_zero\n\n;  One extra src limb,destination was aligned. \n;\n;                  source                  ebx \n;                  --+---------------+-------+ \n;                    |      mm2      |       | \n;                  --+---------------+-------+ \n;\n;  dest         edx+12           edx+4     edx \n;  --+---------------+---------------+-------+ \n;    |      mm3      |               |       | \n;  --+---------------+---------------+-------+ \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  One extra src limb,destination was unaligned. \n;\n;                  source                  ebx \n;                  --+---------------+-------+ \n;                    |      mm2      |       | \n;                  --+---------------+-------+ \n;\n;          dest         edx+12           edx+4 \n;          --+---------------+---------------+ \n;            |      mm3      |               | \n;          --+---------------+---------------+ \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n\n;  In both cases there's one extra limb of src to fetch and combine \n;  with mm2 to make a qword at 4(%edx),and in the aligned case \n;  there's an extra limb of dst to be formed from that extra src limb \n;  left shifted. \n\n    movd    mm0,[ebx]\n    psllq   mm2,mm6\n    movq    [12+edx],mm3\n    psllq   mm0,32\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    por     mm0,mm2\n    psllq   mm1,mm6\n    movq    [4+edx],mm0\n    psrlq   mm1,32\n    and     ecx,32\n    pop     ebx\n    jz      Lfinish_one_unaligned\n    movd    [edx],mm1\nLfinish_one_unaligned: \n    emms\n    ret\nLfinish_zero: \n\n;  No extra src limbs,destination was aligned. \n;\n;                  source          ebx \n;                  --+---------------+ \n;                    |      mm2      | \n;                  --+---------------+ \n;\n;  dest          edx+8             edx \n;  --+---------------+---------------+ \n;    |      mm3      |               | \n;  --+---------------+---------------+ \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  No extra src limbs,destination was unaligned. \n;\n;                source            ebx \n;                  --+---------------+ \n;                    |      mm2      | \n;                  --+---------------+ \n;\n;          dest          edx+8   edx+4 \n;          --+---------------+-------+ \n;            |      mm3      |       | \n;          --+---------------+-------+ \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n;  The movd for the unaligned case writes the same data to 4(%edx) \n;  that the movq does for the aligned case. \n\n    movq    [8+edx],mm3\n    and     ecx,32\n    psllq   mm2,mm6\n    jz      Lfinish_zero_unaligned\n    movq    [edx],mm2\nLfinish_zero_unaligned: \n    psrlq   mm2,32\n    pop     ebx\n    movd    eax,mm5  ;  retval \n    movd    [4+edx],mm2\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/mmx/popcount.asm",
    "content": "\n;  Copyright 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\" \n\n%define       REG_AAAAAAAAAAAAAAAA    mm7 \n%define       REG_3333333333333333    mm6 \n%define       REG_0F0F0F0F0F0F0F0F    mm5 \n%define       REG_000000FF000000FF    mm4 \n\n%ifndef\tPIC\n\n\tsection\t.data\n\talign   8\n\nLrodata_AAAAAAAAAAAAAAAA: \n    dd      0AAAAAAAAh\n    dd      0AAAAAAAAh\n\nLrodata_3333333333333333: \n    dd      033333333h\n    dd      033333333h\n\nLrodata_0F0F0F0F0F0F0F0F: \n    dd      00F0F0F0Fh\n    dd      00F0F0F0Fh\n\nLrodata_000000FF000000FF: \n    dd      0000000FFh\n    dd      0000000FFh\n\n%endif\n\n%macro\tph_fun 3\n\n\tsection .text\n\talign   32\n\tglobal\t%1%2\n\n%ifdef\tDLL\n\texport\t%1%2\n%endif\n\n%if\t%3 == 0\n%ifdef\tPIC\n\tnop\t\t;  avoid shrl crossing a 32-byte boundary \n%endif\n%endif\n\n%1%2: \n\tmov     ecx, [PARAM_SIZE]\n%ifdef\tPIC\n    mov     eax, 0xAAAAAAAA\n    mov     edx, 0x33333333\n    movd    mm7,eax\n    movd    mm6,edx\n    mov     eax, 0x0F0F0F0F\n    mov     edx, 0x000000FF\n    punpckldq mm7,mm7\n    punpckldq mm6,mm6\n    movd    mm5,eax\n    movd    mm4,edx\n    punpckldq mm5,mm5\n    punpckldq mm4,mm4\n%else\n    movq    mm7,[Lrodata_AAAAAAAAAAAAAAAA]\n    movq    mm6,[Lrodata_3333333333333333]\n    movq    mm5,[Lrodata_0F0F0F0F0F0F0F0F]\n    movq    mm4,[Lrodata_000000FF000000FF]\n%endif\n\tmov     eax,[PARAM_SRC]\n%if\t%3 == 1\n\tmov\t\tedx,[PARAM_SRC2]\n%endif\n    pxor    mm2,mm2\n\tshr     ecx,1\n\tjnc     %%Ltop\n\tmovd\tmm1,[eax+ecx*8]\t; Zdisp(  movd,0,(%eax,%ecx,8),%mm1)\n%if\t%3 == 1\n\tmovd\tmm0,[edx+ecx*8]\t; Zdisp(  movd,0,(%edx,%ecx,8),%mm0)\"\n    pxor    mm1,mm0\n%endif\n    inc     ecx\n    jmp     %%Lloaded\n\n\talign   16\n%if\t%3 == 0\n\tnop\n%endif\n\n;  eax src \n;  ebx \n;  ecx counter,qwords,decrementing \n;  edx [hamdist] src2 \n; \n;  mm0 (scratch) \n;  mm1 (scratch) \n;  mm2 total (low dword) \n;  mm3 \n;  mm4 \\ \n;  mm5 | special constants \n;  mm6 | \n;  mm7 / \n\n%%Ltop: \n\tmovq    mm1,[eax+ecx*8-8]\n%if %3 == 1\n\tpxor    mm1,[edx+ecx*8-8]\n%endif\n%%Lloaded: \n    movq    mm0,mm1\n    pand    mm1,REG_AAAAAAAAAAAAAAAA\n    psrlq   mm1,1\n%if\t%3 == 1\n\tnop\n%endif\n    psubd   mm0,mm1\t\t\t\t\t\t;  bit pairs \n%if %3 == 1\n\tnop\n%endif\n    movq    mm1,mm0\n    psrlq   mm0,2\n    pand    mm0,REG_3333333333333333\n    pand    mm1,REG_3333333333333333\n    paddd   mm0,mm1\t\t\t\t\t\t;  nibbles \n    movq    mm1,mm0\n    psrlq   mm0,4\n    pand    mm0,REG_0F0F0F0F0F0F0F0F\n    pand    mm1,REG_0F0F0F0F0F0F0F0F\n    paddd   mm0,mm1\t\t\t\t\t\t;  bytes \n    movq    mm1,mm0\n    psrlq   mm0,8\n    paddb   mm0,mm1\t\t\t\t\t\t;  words \n    movq    mm1,mm0\n    psrlq   mm0,16\n    paddd   mm0,mm1\t\t\t\t\t\t;  dwords \n    pand    mm0,REG_000000FF000000FF\n    paddd   mm2,mm0\t\t\t\t\t\t;  low to total \n    psrlq   mm0,32\n    paddd   mm2,mm0\t\t\t\t\t\t;  high to total \n    loop    %%Ltop\n    movd    eax,mm2\n    emms\n    ret\n%endmacro\n\n%define\tPARAM_SIZE  esp+frame+8 \n%define PARAM_SRC   esp+frame+4 \n%define\tframe\t\t0\n\n;\tglobal ___gmpn_popcount\n\t\n\tph_fun\t___g,mpn_popcount,0\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/mmx/rshift.asm",
    "content": "\n;  Copyright 2001 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\" \n\t\n\tglobal  ___gmpn_rshift \n\n%ifdef\tDLL\n\texport\t___gmpn_rshift\n%endif\n\n%define\tPARAM_SHIFT esp+frame+16 \n%define PARAM_SIZE  esp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n%define frame\t\t8 \n\n;   Minimum 5,because the unrolled loop can't handle less. \n%define\tUNROLL_THRESHOLD  5 \n\n\tsection .text\n\talign   8\n\n___gmpn_rshift: \n    push    ebx\n    push    edi\n    mov     eax,[PARAM_SIZE]\n    mov     edx,[PARAM_DST]\n    mov     ebx,[PARAM_SRC]\n    mov     ecx,[PARAM_SHIFT]\n\tcmp     eax,UNROLL_THRESHOLD\n\tjae     Lunroll\n    dec     eax\n    mov     edi,[ebx]\t\t;  src low limb \n    jnz     Lsimple\n\tshrd\teax,edi,cl\n    shr     edi,cl\n    mov     [edx],edi       ;  dst low limb \n    pop     edi             ;  risk of data cache bank clash \n    pop     ebx\n    ret\n\n;  eax size-1 \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\n\talign   8\nLsimple: \n    movd    mm5,[ebx]       ;  src[0] \n    lea     ebx,[ebx+eax*4]  ;  &src[size-1] \n    movd    mm6,ecx         ;  rshift \n    lea     edx,[-4+edx+eax*4] ;  &dst[size-2] \n    psllq   mm5,32\n    neg     eax\n\n;  This loop is 5 or 8 cycles,with every second load unaligned and a wasted \n;  cycle waiting for the mm0 result to be ready.  For comparison a shrdl is 4 \n;  cycles and would be 8 in a simple loop.  Using mmx helps the return value \n;  and last limb calculations too. \n\n;  eax counter,limbs,negative \n;  ebx &src[size-1] \n;  ecx return value \n;  edx &dst[size-2] \n;\n;  mm0 scratch \n;  mm5 return value \n;  mm6 shift \n\nLsimple_top: \n    movq    mm0,[ebx+eax*4]\n    inc     eax\n    psrlq   mm0,mm6\n    movd    [edx+eax*4],mm0\n    jnz     Lsimple_top\n    movd    mm0,[ebx]\n    psrlq   mm5,mm6         ;  return value \n    psrlq   mm0,mm6\n    pop     edi\n    movd    eax,mm5\n    pop     ebx\n    movd    [4+edx],mm0\n    emms\n    ret\n\n;  eax size \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\n\talign   8\nLunroll: \n    movd    mm5,[ebx]       ;  src[0] \n    mov     edi,4\n    movd    mm6,ecx         ;  rshift \n    test    ebx,edi\n    psllq   mm5,32\n    jz      Lstart_src_aligned\n\n;  src isn't aligned,process low limb separately (marked xxx) and \n;  step src and dst by one limb,making src aligned. \n;\n;  source                  ebx \n;  --+-------+-------+-------+ \n;            |          xxx  | \n;  --+-------+-------+-------+ \n;          4mod8   0mod8   4mod8 \n;\n;          dest            edx \n;          --+-------+-------+ \n;            |       |  xxx  |   \n;          --+-------+-------+ \n\n    movq    mm0,[ebx]       ;  unaligned load \n    psrlq   mm0,mm6\n    add     ebx,4\n    dec     eax\n    movd    [edx],mm0\n    add     edx,4\nLstart_src_aligned: \n    movq    mm1,[ebx]\n    test    edx,edi\n    psrlq   mm5,mm6         ;  retval \n    jz      Lstart_dst_aligned\n\n;  dst isn't aligned,add 4 to make it so,and pretend the shift is \n;  32 bits extra.  Low limb of dst (marked xxx) handled here \n;  separately. \n;\n;           source          ebx \n;           --+-------+-------+ \n;             |      mm1      | \n;           --+-------+-------+ \n;                   4mod8   0mod8 \n;\n;   dest                    edx \n;   --+-------+-------+-------+ \n;                     |  xxx  |         \n;   --+-------+-------+-------+ \n;           4mod8   0mod8   4mod8 \n\n    movq    mm0,mm1\n    add     ecx,32         ;  new shift \n    psrlq   mm0,mm6\n    movd    mm6,ecx\n    movd    [edx],mm0\n    add     edx,4\nLstart_dst_aligned: \n    movq    mm3,[8+ebx]\n    neg     ecx\n    movq    mm2,mm3\t\t\t;  mm2 src qword \n    add     ecx,64\n    movd    mm7,ecx\n    psrlq   mm1,mm6\n    lea     ebx,[-12+ebx+eax*4]\n    lea     edx,[-20+edx+eax*4]\n    psllq   mm3,mm7\n    sub     eax,7\t\t\t;  size-7 \n    por     mm3,mm1         ;  mm3 ready to store \n    neg     eax             ;  -(size-7) \n    jns     Lfinish\n\n;  This loop is the important bit,the rest is just support.  Careful \n;  instruction scheduling achieves the claimed 1.75 c/l.  The \n;  relevant parts of the pairing rules are: \n;\n;  - mmx loads and stores execute only in the U pipe \n;  - only one mmx shift in a pair \n;  - wait one cycle before storing an mmx register result \n;  - the usual address generation interlock \n;\n;  Two qword calculations are slightly interleaved.  The instructions \n;  marked \"C\" belong to the second qword,and the \"C prev\" one is for \n;  the second qword from the previous iteration. \n\n;  eax counter,limbs,negative \n;  ebx &src[size-12] \n;  ecx \n;  edx &dst[size-12] \n;  esi \n;  edi \n;\n;  mm0 \n;  mm1 \n;  mm2 src qword from -8(%ebx,%eax,4) \n;  mm3 dst qword ready to store to -8(%edx,%eax,4) \n;\n;  mm5 return value \n;  mm6 rshift \n;  mm7 lshift \n\n\talign   8\nLunroll_loop: \n    movq    mm0,[ebx+eax*4]\n    psrlq   mm2,mm6\n    movq    mm1,mm0\n    psllq   mm0,mm7\n    movq    [-8+edx+eax*4],mm3\n    por     mm0,mm2\n\n\tmovq    mm3,[ebx+eax*4+8]\n\tpsrlq   mm1,mm6\n    movq    [edx+eax*4],mm0\n\tmovq    mm2,mm3\n\tpsllq   mm3,mm7\n    add     eax,4\n\tpor     mm3,mm1\n    js      Lunroll_loop\n\nLfinish: \n;  eax 0 to 3 representing respectively 3 to 0 limbs remaining \n\n    test    al,2\n    jnz     Lfinish_no_two\n    movq    mm0,[ebx+eax*4]\n    psrlq   mm2,mm6\n    movq    mm1,mm0\n    psllq   mm0,mm7\n    movq    [-8+edx+eax*4],mm3  ;  prev \n    por     mm0,mm2\n    movq    mm2,mm1\n    movq    mm3,mm0\n    add     eax,2\nLfinish_no_two: \n\n;  eax 2 or 3 representing respectively 1 or 0 limbs remaining \n;\n;  mm2 src prev qword,from -8(%ebx,%eax,4) \n;  mm3 dst qword,for -8(%edx,%eax,4) \n\n    test    al,1\n    pop     edi\n    movd    eax,mm5  ;  retval \n    jnz     Lfinish_zero\n\n;  One extra limb,destination was aligned. \n;\n;  source                ebx \n;  +-------+---------------+-- \n;  |       |      mm2      | \n;  +-------+---------------+-- \n;\n;  dest                                  edx \n;  +-------+---------------+---------------+-- \n;  |       |               |      mm3      | \n;  +-------+---------------+---------------+-- \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  One extra limb,destination was unaligned. \n;\n;  source                ebx \n;  +-------+---------------+-- \n;  |       |      mm2      | \n;  +-------+---------------+-- \n;\n;  dest                          edx \n;  +---------------+---------------+-- \n;  |               |      mm3      | \n;  +---------------+---------------+-- \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n;  In both cases there's one extra limb of src to fetch and combine \n;  with mm2 to make a qword at 8(%edx),and in the aligned case \n;  there's a further extra limb of dst to be formed. \n\n\n    movd    mm0,[8+ebx]\n    psrlq   mm2,mm6\n    movq    mm1,mm0\n    psllq   mm0,mm7\n    movq    [edx],mm3\n    por     mm0,mm2\n    psrlq   mm1,mm6\n    and     ecx,32\n    pop     ebx\n    jz      Lfinish_one_unaligned\n\n    ;  dst was aligned,must store one extra limb \n    movd    [16+edx],mm1\nLfinish_one_unaligned: \n\n    movq    [8+edx],mm0\n    emms\n    ret\nLfinish_zero: \n\n;  No extra limbs,destination was aligned. \n;\n;  source        ebx \n;  +---------------+-- \n;  |      mm2      | \n;  +---------------+-- \n;\n;  dest                        edx+4 \n;  +---------------+---------------+-- \n;  |               |      mm3      | \n;  +---------------+---------------+-- \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  No extra limbs,destination was unaligned. \n;\n;  source        ebx \n;  +---------------+-- \n;  |      mm2      | \n;  +---------------+-- \n;\n;  dest                edx+4 \n;  +-------+---------------+-- \n;  |       |      mm3      | \n;  +-------+---------------+-- \n;\n;  mm6 = shift+32 \n;  mm7 = 64-(shift+32) \n\n;  The movd for the unaligned case is clearly the same data as the \n;  movq for the aligned case,it's just a choice between whether one \n;  or two limbs should be written. \n\n    movq    [4+edx],mm3\n    psrlq   mm2,mm6\n    movd    [12+edx],mm2\n    and     ecx,32\n    pop     ebx\n    jz      Lfinish_zero_unaligned\n    movq    [12+edx],mm2\nLfinish_zero_unaligned: \n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/mod_1.asm",
    "content": "\n;  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n    global  ___gmpn_preinv_mod_1 \n    global  ___gmpn_mod_1c     \n\tglobal  ___gmpn_mod_1 \n\n%ifdef\tDLL\n\texport\t___gmpn_mod_1c\n\texport\t___gmpn_mod_1\n%endif\n\n%define\tMUL_NORM_THRESHOLD      4 \n%define MUL_UNNORM_THRESHOLD\t5 \n\n%define\tMUL_NORM_DELTA\tMUL_NORM_THRESHOLD - MUL_UNNORM_THRESHOLD\n\n%define\tPARAM_INVERSE\tesp+frame+16 \n%define PARAM_CARRY     esp+frame+16 \n%define PARAM_DIVISOR   esp+frame+12 \n%define PARAM_SIZE      esp+frame+8 \n%define PARAM_SRC       esp+frame+4 \n\n%define SAVE_EBX        esp+frame-4 \n%define SAVE_ESI        esp+frame-8 \n%define SAVE_EDI        esp+frame-12 \n%define SAVE_EBP        esp+frame-16 \n\n%define\tVAR_NORM        esp+frame-20 \n%define VAR_INVERSE     esp+frame-24 \n%define STACK_SPACE     24 \n\n\tsection .text\n\t\n\talign   16\n\n%define frame\t0 \n\n___gmpn_preinv_mod_1: \n    mov     edx,[PARAM_SRC]\n    sub     esp,STACK_SPACE\n\tFR_sesp\tSTACK_SPACE\n    mov     [SAVE_EBX],ebx\n    mov     ebx,[PARAM_SIZE]\n    mov     [SAVE_EBP],ebp\n    mov     ebp,[PARAM_DIVISOR]\n    mov     [SAVE_ESI],esi\n    mov     eax,[PARAM_INVERSE]\n    mov     [SAVE_EDI],edi\n    mov     edi,[-4+edx+ebx*4]\t;  src high limb \n    mov     [VAR_NORM],dword 0\n    lea     ecx,[-8+edx+ebx*4]\t;  &src[size-2] \n    mov     esi,edi\n    sub     edi,ebp\t\t\t\t;  high-divisor \n\tcmovc\tedi,esi\n    dec     ebx\n    jnz     Lpreinv_entry\n    jmp     Ldone_edi\n     \n\talign   16\n\n%define       frame   0 \n\n___gmpn_mod_1c: \n    mov     ecx,[PARAM_SIZE]\n    sub     esp,STACK_SPACE\n\tFR_sesp\tSTACK_SPACE\n    mov     [SAVE_EBP],ebp\n    mov     eax,[PARAM_DIVISOR]\n    mov     [SAVE_ESI],esi\n    mov     edx,[PARAM_CARRY]\n    mov     esi,[PARAM_SRC]\n    or      ecx,ecx\n    jz      Ldone_edx       ;  result==carry if size==0 \n    sar     eax,31\n    mov     ebp,[PARAM_DIVISOR]\n    and     eax,MUL_NORM_DELTA\n    add     eax,MUL_UNNORM_THRESHOLD\n    cmp     ecx,eax\n    jb      Ldivide_top\n\n;  The carry parameter pretends to be the src high limb. \n    mov     [SAVE_EBX],ebx\n    lea     ebx,[1+ecx]    ;  size+1 \n    mov     eax,edx         ;  carry \n    jmp     Lmul_by_inverse_1c\n\n\talign   16\n\n%define       frame   0\n \n___gmpn_mod_1: \n    mov     ecx,[PARAM_SIZE]\n    sub     esp,STACK_SPACE\n\tFR_sesp\tSTACK_SPACE\n    mov     edx,0\t\t\t\t;  initial carry (if can't skip a div) \n    mov     [SAVE_ESI],esi\n    mov     eax,[PARAM_SRC]\n    mov     [SAVE_EBP],ebp\n    mov     ebp,[PARAM_DIVISOR]\n    mov     esi,[PARAM_DIVISOR]\n    or      ecx,ecx\n    jz      Ldone_edx\n    mov     eax,[-4+eax+ecx*4]\t;  src high limb \n    sar     ebp,31\n    and     ebp,MUL_NORM_DELTA\n    add     ebp,MUL_UNNORM_THRESHOLD\n    cmp     eax,esi\t\t\t\t;  carry flag if high<divisor \n\tcmovc\tedx,eax\n    mov     esi,[PARAM_SRC]\n    sbb     ecx,0\t\t\t\t;  size-1 to skip one div \n    jz      Ldone_eax\t\t\t;  done if had size==1 \n    cmp     ecx,ebp\n    mov     ebp,[PARAM_DIVISOR]\n    jae     Lmul_by_inverse\n\n;  eax scratch (quotient) \n;  ebx \n;  ecx counter,limbs,decrementing \n;  edx scratch (remainder) \n;  esi src \n;  edi \n;  ebp divisor \n\nLdivide_top: \n    mov     eax,[-4+esi+ecx*4]\n    div     ebp\n    dec     ecx\n    jnz     Ldivide_top\nLdone_edx: \n    mov     eax,edx\nLdone_eax: \n    mov     esi,[SAVE_ESI]\n    mov     ebp,[SAVE_EBP]\n    add     esp,STACK_SPACE\n    ret\n\n;  eax src high limb \n;  ebx \n;  ecx \n;  edx \n;  esi src \n;  edi \n;  ebp divisor \n\nLmul_by_inverse: \n    mov     [SAVE_EBX],ebx\n    mov     ebx,[PARAM_SIZE]\nLmul_by_inverse_1c: \n    bsr     ecx,ebp\t\t\t\t;  31-l \n    mov     [SAVE_EDI],edi\n    xor     ecx,31\t\t\t\t;  l \n    mov     [VAR_NORM],ecx\n    shl     ebp,cl\t\t\t\t;  d normalized \n    mov     edi,eax\t\t\t\t;  src high -> n2 \n    sub     eax,ebp\n\tcmovnc\tedi,eax\n    mov     eax,-1\n    mov     edx,-1\n    sub     edx,ebp\t\t\t\t;  (b-d)-1 so  edx:eax = b*(b-d)-1 \n    lea     ecx,[-8+esi+ebx*4] ;  &src[size-2] \n    div     ebp\t\t\t\t\t;  floor (b*(b-d)-1) / d \nLpreinv_entry: \n    mov     [VAR_INVERSE],eax\n\n;  No special scheduling of loads is necessary in this loop,out of order \n;  execution hides the latencies already. \n; \n;  The way q1+1 is generated in %ebx and d is moved to %eax for the multiply \n;  seems fastest.  The obvious change to generate q1+1 in %eax and then just \n;  multiply by %ebp (as per mpn/x86/pentium/mod_1.asm in fact) runs 1 cycle \n;  slower,for no obvious reason. \n\n;  eax n10 (then scratch) \n;  ebx scratch (nadj,q1) \n;  ecx src pointer,decrementing \n;  edx scratch \n;  esi n10 \n;  edi n2 \n;  ebp divisor \n\n\talign   16\nLinverse_top: \n    mov     eax,[ecx]\t\t\t;  next src limb \n    mov     esi,eax\n    sar     eax,31\t\t\t\t;  -n1 \n    mov     ebx,ebp\n    and     ebx,eax\t\t\t\t;  -n1 & d \n    neg     eax\t\t\t\t\t;  n1 \n    add     eax,edi\t\t\t\t;  n2+n1 \n    mul     dword [VAR_INVERSE] ;  m*(n2+n1) \n    add     ebx,esi\t\t\t\t;  nadj = n10 + (-n1 & d),ignoring overflow \n    sub     ecx,4\n    add     eax,ebx\t\t\t\t;  m*(n2+n1) + nadj,low giving carry flag \n    lea     ebx,[1+edi]\t\t\t;  n2+1 \n    mov     eax,ebp\t\t\t\t;  d \n    adc     ebx,edx\t\t\t\t;  1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 \n    jz      Lq1_ff\n    mul     ebx\t\t\t\t\t;  (q1+1)*d \n    sub     esi,eax\t\t\t\t;  low n - (q1+1)*d \n    sbb     edi,edx\t\t\t\t;  high n - (q1+1)*d,0 or -1 \n    and     edi,ebp\t\t\t\t;  d if underflow \n    add     edi,esi\t\t\t\t;  remainder with addback if necessary \n    cmp     ecx,[PARAM_SRC]\n    jae     Linverse_top\n\n;  %edi is the remainder modulo d*2^n and now must be reduced to \n;  0<=r<d by calculating r*2^n mod d*2^n and then right shifting by \n;  n.  If d was already normalized on entry so that n==0 then nothing \n;  is needed here.  The chance of n==0 is low,but it's true of say \n;  PP from gmp-impl.h. \n;\n;  eax \n;  ebx \n;  ecx \n;  edx \n;  esi  \n;  edi remainder \n;  ebp divisor (normalized) \n\nLinverse_loop_done: \n    mov     ecx,[VAR_NORM]\n    mov     esi,0\n    or      ecx,ecx\n    jz      Ldone_edi\n\n;  Here use %edi=n10 and %esi=n2,opposite to the loop above. \n;\n;  The q1=0xFFFFFFFF case is handled with an sbbl to adjust q1+1 \n;  back,rather than q1_ff special case code.  This is simpler and \n;  costs only 2 uops. \n\n\tshld\tesi,edi,cl\n    shl     edi,cl\n    mov     eax,edi\t\t\t\t;  n10 \n    mov     ebx,ebp\t\t\t\t;  d \n    sar     eax,31\t\t\t\t;  -n1 \n    and     ebx,eax\t\t\t\t;  -n1 & d \n    neg     eax\t\t\t\t\t;  n1 \n    add     ebx,edi\t\t\t\t;  nadj = n10 + (-n1 & d),ignoring overflow \n    add     eax,esi\t\t\t\t;  n2+n1 \n    mul     dword [VAR_INVERSE]\t;  m*(n2+n1) \n    add     eax,ebx\t\t\t\t;  m*(n2+n1) + nadj,low giving carry flag \n    lea     ebx,[1+esi]\t\t\t;  n2+1 \n    adc     ebx,edx\t\t\t\t;  1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 \n    sbb     ebx,0\n    mov     eax,ebp\t\t\t\t;  d \n    mul     ebx\t\t\t\t\t;  (q1+1)*d \n    mov     ebx,[SAVE_EBX]\n    sub     edi,eax\t\t\t\t;  low  n - (q1+1)*d is remainder \n    sbb     esi,edx\t\t\t\t;  high n - (q1+1)*d,0 or -1 \n    and     esi,ebp\n    mov     ebp,[SAVE_EBP]\n    lea     eax,[esi+edi]\t\t;  remainder \n    mov     esi,[SAVE_ESI]\n    shr     eax,cl\t\t\t\t;  denorm remainder \n    mov     edi,[SAVE_EDI]\n    add     esp,STACK_SPACE\n    ret\nLdone_edi: \n    mov     ebx,[SAVE_EBX]\n    mov     eax,edi\n    mov     esi,[SAVE_ESI]\n    mov     edi,[SAVE_EDI]\n    mov     ebp,[SAVE_EBP]\n    add     esp,STACK_SPACE\n    ret\n\n;  Special case for q1=0xFFFFFFFF,giving q=0xFFFFFFFF meaning the low dword \n;  of q*d is simply -d and the remainder n-q*d = n10+d. \n; \n;  This is reached only very rarely. \n;\n;  eax (divisor) \n;  ebx (q1+1 == 0) \n;  ecx src pointer \n;  edx \n;  esi n10 \n;  edi (n2) \n;  ebp divisor \n\nLq1_ff: \n    lea     edi,[ebp+esi]  ;  n-q*d remainder -> next n2 \n    cmp     ecx,[PARAM_SRC]\n    jae     Linverse_top\n    jmp     Linverse_loop_done\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/modexact_1c_odd.asm",
    "content": "\n;  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n\textern\t___gmp_modlimb_invert_table\n\tglobal  ___gmpn_modexact_1c_odd \n    global  ___gmpn_modexact_1_odd \n\n%ifdef\tDLL\n\texport\t___gmpn_modexact_1c_odd\n\texport\t___gmpn_modexact_1_odd\n%endif\n\n%define\tPARAM_CARRY     esp+frame+16 \n%define PARAM_DIVISOR\tesp+frame+12 \n%define PARAM_SIZE      esp+frame+8 \n%define PARAM_SRC       esp+frame+4 \n\n;   Not enough room under modexact_1 to make these re-use the parameter \n;   space,unfortunately. \n\n%define\tSAVE_EBX    esp+frame-4 \n%define SAVE_ESI    esp+frame-8 \n%define SAVE_EDI    esp+frame-12 \n%define STACK_SPACE\t12 \n%define\tframe\t\t0 \n\n\tsection .text\n\n\talign   16\n\n___gmpn_modexact_1c_odd: \n    mov     ecx,[PARAM_CARRY]\n    jmp     Lstart_1c\n\n\talign   16\n\n___gmpn_modexact_1_odd: \n    xor     ecx,ecx\nLstart_1c: \n    mov     eax,[PARAM_DIVISOR]\n    sub     esp,STACK_SPACE\n\tFR_sesp\tSTACK_SPACE\n\n    mov     [SAVE_ESI],esi\n    mov     esi,[PARAM_SRC]\n    shr     eax,1\t\t\t\t;  d/2 \n    mov     [SAVE_EDI],edi\n    and     eax,127\n\n%ifdef\tPIC\n    call    Lmovl_eip_edi\n    add     edi,_GLOBAL_OFFSET_TABLE_\n    mov     edi,[___gmp_modlimb_invert_table+edi]\n    movzx   edi,byte [eax+edi]\t\t\t\t\t\t\t;  inv 8 bits \n%else\n    movzx   edi,byte [___gmp_modlimb_invert_table+eax]\t;  inv 8 bits \n%endif\n\n    xor     edx,edx\t\t\t\t;  initial extra carry \n    lea     eax,[edi+edi]\t\t;  2*inv \n    imul    edi,edi\t\t\t\t;  inv*inv \n    mov     [SAVE_EBX],ebx\n    mov     ebx,[PARAM_SIZE]\n    imul    edi,[PARAM_DIVISOR] ;  inv*inv*d \n    sub     eax,edi\t\t\t\t;  inv = 2*inv - inv*inv*d \n    lea     edi,[eax+eax]\t\t;  2*inv \n    imul    eax,eax\t\t\t\t;  inv*inv \n    imul    eax,[PARAM_DIVISOR] ;  inv*inv*d \n    lea     esi,[esi+ebx*4]\t\t;  src end \n    neg     ebx\t\t\t\t\t;  -size \n    sub     edi,eax\t\t\t\t;  inv = 2*inv - inv*inv*d \n        \n;  The dependent chain here is \n; \n;      subl    %edx,%eax        1 \n;      imull   %edi,%eax        4 \n;      mull    PARAM_DIVISOR    5 \n;                             ---- \n;        total                 10 \n; \n;  and this is the measured speed.  No special scheduling is necessary,out \n;  of order execution hides the load latency. \n;\n;  eax scratch (src limb) \n;  ebx counter,limbs,negative \n;  ecx carry bit,0 or 1 \n;  edx carry limb,high of last product \n;  esi &src[size] \n;  edi inverse \n;  ebp \n\nLtop: \n    mov     eax,[esi+ebx*4]\n    sub     eax,ecx\n    sbb     ecx,ecx\n    sub     eax,edx\n    sbb     ecx,0\n    imul    eax,edi\n    neg     ecx\n    mul     dword [PARAM_DIVISOR]\n    inc     ebx\n    jnz     Ltop\n    mov     esi,[SAVE_ESI]\n    lea     eax,[ecx+edx]\n    mov     edi,[SAVE_EDI]\n    mov     ebx,[SAVE_EBX]\n    add     esp,STACK_SPACE\n    ret\n\n%ifdef\tPIC\nLmovl_eip_edi: \n    mov     edi,[esp]\n    ret\n%endif\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/p3mmx/hamdist.asm",
    "content": "\n;  Copyright 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%define       REG_AAAAAAAAAAAAAAAA    mm7 \n%define       REG_3333333333333333    mm6 \n%define       REG_0F0F0F0F0F0F0F0F    mm5 \n%define       REG_0000000000000000    mm4 \n\n%ifndef\tPIC\n\tsection\t.data\n\talign   8\n\nLrodata_AAAAAAAAAAAAAAAA: \n    dd      0AAAAAAAAh\n    dd      0AAAAAAAAh\n\nLrodata_3333333333333333: \n    dd      033333333h\n    dd      033333333h\n\nLrodata_0F0F0F0F0F0F0F0F: \n    dd      00F0F0F0Fh\n    dd      00F0F0F0Fh\n%endif\n\n%macro\tph_fun\t3\n\n\talign   32\n\tglobal  %1%2\n\t \n%ifdef\tDLL\n\texport\t%1%2\n%endif\n\n%1%2:\n    mov     ecx,[PARAM_SIZE]\n%ifdef\tPIC\n    mov     eax,0xAAAAAAAA\n    mov     edx,0x33333333\n    movd    mm7,eax\n    movd    mm6,edx\n    mov     eax,0x0F0F0F0F\n    punpckldq mm7,mm7\n    punpckldq mm6,mm6\n    movd    mm5,eax\n    movd    mm4,edx\n    punpckldq mm5,mm5\n%else\n    movq    mm7,[Lrodata_AAAAAAAAAAAAAAAA]\n    movq    mm6,[Lrodata_3333333333333333]\n    movq    mm5,[Lrodata_0F0F0F0F0F0F0F0F]\n%endif\n\tpxor    mm4,mm4\n    mov     eax,[PARAM_SRC]\n%if\t%3 == 1\n\tmov     edx,[PARAM_SRC2]\n%endif\n    pxor    mm2,mm2\n    shr     ecx,1\n    jnc     %%Ltop\n    movd    mm1,[eax+ecx*8]\n%if\t%3 == 1\n    movd    mm0,[edx+ecx*8]\n    pxor    mm1,mm0\n%endif\n    or      ecx,ecx\n    jmp     %%Lloaded\n\n;  eax src \n;  ebx \n;  ecx counter,qwords,decrementing \n;  edx [hamdist] src2 \n; \n;  mm0 (scratch) \n;  mm1 (scratch) \n;  mm2 total (low dword) \n;  mm3 \n;  mm4 \\ \n;  mm5 | special constants \n;  mm6 | \n;  mm7 / \n\n\talign   16\n%%Ltop: \n\tmovq    mm1,[eax+ecx*8-8]\n%if\t%3 == 1\n\tpxor    mm1,[edx+ecx*8-8]\n%endif\n\tdec     ecx\n%%Lloaded: \n\tmovq    mm0,mm1\n\tpand    mm1,REG_AAAAAAAAAAAAAAAA\n\tpsrlq   mm1,1\n\tpsubd   mm0,mm1  ;  bit pairs \n\tmovq    mm1,mm0\n\tpsrlq   mm0,2\n\tpand    mm0,REG_3333333333333333\n\tpand    mm1,REG_3333333333333333\n\tpaddd   mm0,mm1  ;  nibbles \n    movq    mm1,mm0\n    psrlq   mm0,4\n    pand    mm0,REG_0F0F0F0F0F0F0F0F\n    pand    mm1,REG_0F0F0F0F0F0F0F0F\n    paddd   mm0,mm1  ;  bytes \n    psadbw\tmm0,mm4\n    paddd   mm2,mm0  ;  add to total \n    jnz     %%Ltop\n    movd    eax,mm2\n    emms\n    ret\n%endmacro\n\n\tsection .text\n\t\n%define\tPARAM_SIZE  esp+frame+12 \n%define PARAM_SRC2  esp+frame+8 \n%define PARAM_SRC   esp+frame+4 \n%define\tframe\t\t0\n\n;\tglobal ___gmpn_hamdist\n\t\n\tph_fun\t___g,mpn_hamdist,1\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/p3mmx/popcount.asm",
    "content": "\n;  Copyright 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%define       REG_AAAAAAAAAAAAAAAA    mm7 \n%define       REG_3333333333333333    mm6 \n%define       REG_0F0F0F0F0F0F0F0F    mm5 \n%define       REG_0000000000000000    mm4 \n\n%ifndef\tPIC\n\tsection\t.data\n\talign   8\n\nLrodata_AAAAAAAAAAAAAAAA: \n    dd      0AAAAAAAAh\n    dd      0AAAAAAAAh\n\nLrodata_3333333333333333: \n    dd      033333333h\n    dd      033333333h\n\nLrodata_0F0F0F0F0F0F0F0F: \n    dd      00F0F0F0Fh\n    dd      00F0F0F0Fh\n%endif\n\n%macro\tph_fun\t3\n\n\talign   32\n\tglobal  %1%2\n\t \n%ifdef\tDLL\n\texport\t%1%2\n%endif\n\n%1%2:\n    mov     ecx,[PARAM_SIZE]\n%ifdef\tPIC\n    mov     eax,0xAAAAAAAA\n    mov     edx,0x33333333\n    movd    mm7,eax\n    movd    mm6,edx\n    mov     eax,0x0F0F0F0F\n    punpckldq mm7,mm7\n    punpckldq mm6,mm6\n    movd    mm5,eax\n    movd    mm4,edx\n    punpckldq mm5,mm5\n%else\n    movq    mm7,[Lrodata_AAAAAAAAAAAAAAAA]\n    movq    mm6,[Lrodata_3333333333333333]\n    movq    mm5,[Lrodata_0F0F0F0F0F0F0F0F]\n%endif\n\tpxor    mm4,mm4\n    mov     eax,[PARAM_SRC]\n%if\t%3 == 1\n\tmov     edx,[PARAM_SRC2]\n%endif\n    pxor    mm2,mm2\n    shr     ecx,1\n    jnc     %%Ltop\n    movd    mm1,[eax+ecx*8]\n%if\t%3 == 1\n    movd    mm0,[edx+ecx*8]\n    pxor    mm1,mm0\n%endif\n    or      ecx,ecx\n    jmp     %%Lloaded\n\n;  eax src \n;  ebx \n;  ecx counter,qwords,decrementing \n;  edx [hamdist] src2 \n; \n;  mm0 (scratch) \n;  mm1 (scratch) \n;  mm2 total (low dword) \n;  mm3 \n;  mm4 \\ \n;  mm5 | special constants \n;  mm6 | \n;  mm7 / \n\n\talign   16\n%%Ltop: \n\tmovq    mm1,[eax+ecx*8-8]\n%if\t%3 == 1\n\tpxor    mm1,[edx+ecx*8-8]\n%endif\n\tdec     ecx\n%%Lloaded: \n\tmovq    mm0,mm1\n\tpand    mm1,REG_AAAAAAAAAAAAAAAA\n\tpsrlq   mm1,1\n\tpsubd   mm0,mm1  ;  bit pairs \n\tmovq    mm1,mm0\n\tpsrlq   mm0,2\n\tpand    mm0,REG_3333333333333333\n\tpand    mm1,REG_3333333333333333\n\tpaddd   mm0,mm1  ;  nibbles \n    movq    mm1,mm0\n    psrlq   mm0,4\n    pand    mm0,REG_0F0F0F0F0F0F0F0F\n    pand    mm1,REG_0F0F0F0F0F0F0F0F\n    paddd   mm0,mm1  ;  bytes \n    psadbw\tmm0,mm4\n    paddd   mm2,mm0  ;  add to total \n    jnz     %%Ltop\n    movd    eax,mm2\n    emms\n    ret\n%endmacro\n\n\tsection .text\n\t\n%define\tPARAM_SIZE  esp+frame+8 \n%define PARAM_SRC   esp+frame+4 \n%define\tframe\t\t0\n\n;\tglobal ___gmpn_popcount\n\tph_fun\t___g,mpn_popcount,0\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/sqr_basecase.asm",
    "content": "\n;  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n\tglobal\t___gmpn_sqr_basecase\n\n%ifdef\tDLL\n\texport\t___gmpn_sqr_basecase\n%endif\n\n%define UNROLL_COUNT\t64\t; seems to be maximum required (I hope!)\n\n%define\tPARAM_SIZE\tesp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n%define frame\t\t0 \n\n\tsection\t.text\n\t\n\talign\t32\n\t\t\n___gmpn_sqr_basecase:\n    mov     edx,[PARAM_SIZE]\n    mov     eax,[PARAM_SRC]\n    cmp     edx,2\n    mov     ecx,[PARAM_DST]\n    je      Ltwo_limbs\n    mov     eax,[eax]\n    ja      Lthree_or_more\n\n;  one limb only \n;  eax        src limb \n;  ebx \n;  ecx        dst \n;  edx \n\n    mul     eax\n    mov     [ecx],eax\n    mov     [4+ecx],edx\n    ret\n\n;  eax        src \n;  ebx \n;  ecx        dst \n;  edx \n\n%define\tSAVE_ESI    esp+frame-4 \n%define SAVE_EBX    esp+frame-8 \n%define SAVE_EDI    esp+frame-12 \n%define SAVE_EBP    esp+frame-16 \n%define\tframe\t\t16\n\nLtwo_limbs: \n    sub     esp,frame\n    mov     [SAVE_ESI],esi\n    mov     esi,eax\n    mov     eax,[eax]\n    mul     eax\t\t\t\t;  src[0]^2 \n    mov     [ecx],eax\t\t;  dst[0] \n    mov     eax,[4+esi]\n    mov     [SAVE_EBX],ebx\n    mov     ebx,edx\t\t\t;  dst[1] \n    mul     eax\t\t\t\t;  src[1]^2 \n    mov     [SAVE_EDI],edi\n    mov     edi,eax\t\t\t;  dst[2] \n    mov     eax,[esi]\n    mov     [SAVE_EBP],ebp\n    mov     ebp,edx\t\t\t;  dst[3] \n    mul     dword [4+esi]\t;  src[0]*src[1] \n    add     ebx,eax\n    mov     esi,[SAVE_ESI]\n    adc     edi,edx\n    adc     ebp,0\n    add     eax,ebx\n    mov     ebx,[SAVE_EBX]\n    adc     edx,edi\n    mov     edi,[SAVE_EDI]\n    adc     ebp,0\n    mov     [4+ecx],eax\n    mov     [12+ecx],ebp\n    mov     ebp,[SAVE_EBP]\n    mov     [8+ecx],edx\n    add     esp,frame\n    ret\n\n;  eax        src low limb \n;  ebx \n;  ecx        dst \n;  edx        size \n\nLthree_or_more: \n\tsub\t\tesp,frame\n\tmov\t\t[SAVE_ESI],esi\n    cmp     edx,4\n    mov     esi,[PARAM_SRC]\n    jae     Lfour_or_more\n\n;  three limbs \n;  eax        src low limb \n;  ebx \n;  ecx        dst \n;  edx \n;  esi        src \n;  edi \n;  ebp \n\n\tmov\t\t[SAVE_EBP],ebp\n\tmov\t\t[SAVE_EDI],edi\n    mul     eax\t\t\t\t;  src[0] ^ 2 \n    mov     [ecx],eax\n    mov     [4+ecx],edx\n    mov     eax,[4+esi]\n    xor     ebp,ebp\n    mul     eax\t\t\t\t;  src[1] ^ 2 \n    mov     [8+ecx],eax\n    mov     [12+ecx],edx\n    mov     eax,[8+esi]\n\tmov\t\t[SAVE_EBX],ebx\n    mul     eax\t\t\t\t;  src[2] ^ 2 \n    mov     [16+ecx],eax\n    mov     [20+ecx],edx\n    mov     eax,[esi]\n    mul     dword [4+esi]\t;  src[0] * src[1] \n    mov     ebx,eax\n    mov     edi,edx\n    mov     eax,[esi]\n    mul     dword [8+esi]\t;  src[0] * src[2] \n    add     edi,eax\n    mov     ebp,edx\n    adc     ebp,0\n    mov     eax,[4+esi]\n    mul     dword [8+esi]\t;  src[1] * src[2] \n    xor     esi,esi\n    add     ebp,eax\n\n;  eax \n;  ebx        dst[1] \n;  ecx        dst \n;  edx        dst[4] \n;  esi        zero,will be dst[5] \n;  edi        dst[2] \n;  ebp        dst[3] \n\n    adc     edx,0\n    add     ebx,ebx\n    adc     edi,edi\n    adc     ebp,ebp\n    adc     edx,edx\n    mov     eax,[4+ecx]\n    adc     esi,0\n    add     eax,ebx\n    mov     [4+ecx],eax\n    mov     eax,[8+ecx]\n    adc     eax,edi\n    mov     ebx,[12+ecx]\n    adc     ebx,ebp\n    mov     edi,[16+ecx]\n    mov     [8+ecx],eax\n    mov     ebp,[SAVE_EBP]\n    mov     [12+ecx],ebx\n    mov     ebx,[SAVE_EBX]\n    adc     edi,edx\n    mov     eax,[20+ecx]\n    mov     [16+ecx],edi\n    mov     edi,[SAVE_EDI]\n    adc     eax,esi\t\t\t;  no carry out of this \n    mov     esi,[SAVE_ESI]\n    mov     [20+ecx],eax\n    add     esp,frame\n    ret\n\n;  eax        src low limb \n;  ebx \n;  ecx \n;  edx        size \n;  esi        src \n;  edi \n;  ebp \n;  First multiply src[0]*src[1..size-1] and store at dst[1..size]. \n\n%define VAR_COUNTER\tesp+frame-20 \n%define VAR_JMP\t\tesp+frame-24 \n%define\tSTACK_SPACE 24 \n\nLfour_or_more: \n\tsub     esp,STACK_SPACE-frame\n%define       frame   STACK_SPACE \n    mov     ecx,1\n    mov     [SAVE_EDI],edi\n    mov     edi,[PARAM_DST]\n    mov     [SAVE_EBX],ebx\n    sub     ecx,edx\t\t\t\t;  -(size-1) \n    mov     [SAVE_EBP],ebp\n    mov     ebx,0\t\t\t\t;  initial carry \n    lea     esi,[esi+edx*4]\t\t;  &src[size] \n    mov     ebp,eax\t\t\t\t;  multiplier \n    lea     edi,[-4+edi+edx*4]  ;  &dst[size-1] \n\n;  This loop runs at just over 6 c/l. \n;  eax        scratch \n;  ebx        carry \n;  ecx        counter,limbs,negative,-(size-1) to -1 \n;  edx        scratch \n;  esi        &src[size] \n;  edi        &dst[size-1] \n;  ebp        multiplier \n\nLmul_1: \n    mov     eax,ebp\n    mul     dword [esi+ecx*4]\n    add     eax,ebx\n    mov     ebx,0\n    adc     ebx,edx\n    mov     [4+edi+ecx*4],eax\n    inc     ecx\n    jnz     Lmul_1\n    mov     [4+edi],ebx\n\n;  Addmul src[n]*src[n+1..size-1] at dst[2*n-1...],for each n=1..size-2. \n;  \n;  The last two addmuls,which are the bottom right corner of the product \n;  triangle,are left to the end.  These are src[size-3]*src[size-2,size-1] \n;  and src[size-2]*src[size-1].  If size is 4 then it's only these corner \n;  cases that need to be done. \n;  \n;  The unrolled code is the same as mpn_addmul_1(),see that routine for some \n;  comments. \n;  \n;  VAR_COUNTER is the outer loop,running from -(size-4) to -1,inclusive. \n;  \n;  VAR_JMP is the computed jump into the unrolled code,stepped by one code \n;  chunk each outer loop. \n;\n;   This is also hard-coded in the address calculation below. \n;\n;   With &src[size] and &dst[size-1] pointers,the displacements in the \n;   unrolled code fit in a byte for UNROLL_COUNT values up to 32,but above \n;   that an offset must be added to them. \n;\n;  eax \n;  ebx        carry \n;  ecx \n;  edx \n;  esi        &src[size] \n;  edi        &dst[size-1] \n;  ebp \n\n%define\tCODE_BYTES_PER_LIMB\t15 \n%if\tUNROLL_COUNT > 32\n%define\tOFFSET\t4*(UNROLL_COUNT-32)\n%else\n%define\tOFFSET\t0\n%endif\n    mov     ecx,[PARAM_SIZE]\n    sub     ecx,4\n    jz      Lcorner\n    mov     edx,ecx\n    neg     ecx\n    shl     ecx,4\n%if\tOFFSET != 0\n\tsub\t\tesi,OFFSET\n%endif\n\n%ifdef\tPIC\n    call    Lhere\nLhere:\n    add     ecx,[esp]\n    add     ecx,Lunroll_inner_end-Lhere-(2*CODE_BYTES_PER_LIMB)\n    add     ecx,edx\n    add\t\tesp,4\n%else\n\tlea     ecx,[Lunroll_inner_end-2*CODE_BYTES_PER_LIMB+ecx+edx]\n%endif\n\tneg     edx\n%if OFFSET != 0\n\tsub\t\tedi,OFFSET\n%endif\n\n;  The calculated jump mustn't be before the start of the available \n;  code.  This is the limit that UNROLL_COUNT puts on the src operand \n;  size,but checked here using the jump address directly. \n\n; ASSERT(ae,movl_text_address( Lunroll_inner_start,%eax) cmpl %eax,%ecx) \n\n%ifdef\tASSERT\n\tmov\t\teax,Lunroll_inner_start\n\tcmp\t\tecx,eax\n\tjae\t\tLunroll_outer_top\n\tjmp\t\texit\n%endif\n\n;  eax \n;  ebx        high limb to store \n;  ecx        VAR_JMP \n;  edx        VAR_COUNTER,limbs,negative \n;  esi        &src[size],constant \n;  edi        dst ptr,second highest limb of last addmul \n;  ebp \n\n%if\tUNROLL_COUNT % 2 == 1\n%define\tcmovX\tcmovz\n%else\n%define\tcmovX\tcmovnz\n%endif\n\n\talign\t16\nLunroll_outer_top: \n    mov     ebp,[-12+OFFSET+esi+edx*4]   ;  multiplier \n    mov     [VAR_COUNTER],edx\n    mov     eax,[-8+OFFSET+esi+edx*4]   ;  first limb of multiplicand \n    mul     ebp\n    test    cl,1\n    mov     ebx,edx    ;  high carry \n    lea     edi,[4+edi]\n    mov     edx,ecx    ;  jump \n    mov     ecx,eax    ;  low carry \n    lea     edx,[CODE_BYTES_PER_LIMB+edx]\n\tcmovX\tecx,ebx\n\tcmovX\tebx,eax\n    mov     [VAR_JMP],edx\n    jmp     edx\n\n;  Must be on an even address here so the low bit of the jump address \n;  will indicate which way around ecx/ebx should start. \n\n;  eax        scratch \n;  ebx        carry high \n;  ecx        carry low \n;  edx        scratch \n;  esi        src pointer \n;  edi        dst pointer \n;  ebp        multiplier \n;  \n;  15 code bytes each limb \n;  ecx/ebx reversed on each chunk \n\n\talign\t2\n\nLunroll_inner_start: \n\n%assign\ti\tUNROLL_COUNT\n%rep\tUNROLL_COUNT\n\t%assign\tdisp_src\tOFFSET-4*i\n\t%assign\tdisp_dst\tdisp_src \n;\tm4_assert(disp_src>=-128 && disp_src<128)\n;\tm4_assert(disp_dst>=-128 && disp_dst<128)\n\n\tmov\t\teax,[byte disp_src+esi]\n    mul     ebp\n%if\ti % 2 == 0\n\tadd\t\t[byte disp_dst+edi],ebx\n\tadc     ecx,eax\n    mov     ebx,edx\n    adc     ebx,0\n%else\n\tadd\t\t[byte disp_dst+edi],ecx\n\tadc     ebx,eax\n    mov     ecx,edx\n    adc     ecx,0\n%endif\n%assign\ti\ti-1\n%endrep\n\nLunroll_inner_end: \n    add     [OFFSET+edi],ebx\n    mov     edx,[VAR_COUNTER]\n    adc     ecx,0\n    mov     [OFFSET+4+edi],ecx\n    mov     ecx,[VAR_JMP]\n    inc     edx\n    jnz     Lunroll_outer_top\n\n%if\tOFFSET != 0\n    add     esi,OFFSET\n    add     edi,OFFSET\n%endif\n\n;  eax \n;  ebx \n;  ecx \n;  edx \n;  esi        &src[size] \n;  edi        &dst[2*size-5] \n;  ebp \n\n\talign\t16\nLcorner: \n    mov     eax,[-12+esi]\n    mul     dword [-8+esi]\n    add     [edi],eax\n    mov     eax,[-12+esi]\n    mov     ebx,0\n    adc     ebx,edx\n    mul     dword [-4+esi]\n    add     ebx,eax\n    mov     eax,[-8+esi]\n    adc     edx,0\n    add     [4+edi],ebx\n    mov     ebx,0\n    adc     ebx,edx\n    mul     dword [-4+esi]\n    mov     ecx,[PARAM_SIZE]\n    add     eax,ebx\n    adc     edx,0\n    mov     [8+edi],eax\n    mov     [12+edi],edx\n    mov     edi,[PARAM_DST]\n\n;  Left shift of dst[1..2*size-2],the bit shifted out becomes dst[2*size-1]. \n\n    sub     ecx,1\t\t\t\t;  size-1 \n    xor     eax,eax\t\t\t\t;  ready for final adcl,and clear carry \n    mov     edx,ecx\n    mov     esi,[PARAM_SRC]\n\n;  eax \n;  ebx \n;  ecx        counter,size-1 to 1 \n;  edx        size-1 (for later use) \n;  esi        src (for later use) \n;  edi        dst,incrementing \n;  ebp \n\nLlshift: \n    rcl     dword [4+edi],1\n    rcl     dword [8+edi],1\n    lea     edi,[8+edi]\n    dec     ecx\n    jnz     Llshift\n    adc     eax,eax\n    mov     [4+edi],eax\t\t\t;  dst most significant limb \n    mov     eax,[esi]\t\t\t;  src[0] \n    lea     esi,[4+esi+edx*4]   ;  &src[size] \n    sub     ecx,edx\t\t\t\t;  -(size-1) \n\n;  Now add in the squares on the diagonal,src[0]^2,src[1]^2,...,\n;  src[size-1]^2.  dst[0] hasn't yet been set at all yet,and just gets the \n;  low limb of src[0]^2. \n\n    mul     eax\n    mov     [edi+ecx*8],eax     ;  dst[0] \n\n;  eax        scratch \n;  ebx        scratch \n;  ecx        counter,negative \n;  edx        carry \n;  esi        &src[size] \n;  edi        dst[2*size-2] \n;  ebp \n\nLdiag: \n    mov     eax,[esi+ecx*4]\n    mov     ebx,edx\n    mul     eax\n    add     [4+edi+ecx*8],ebx\n    adc     [8+edi+ecx*8],eax\n    adc     edx,0\n    inc     ecx\n    jnz     Ldiag\n    mov     esi,[SAVE_ESI]\n    mov     ebx,[SAVE_EBX]\n    add     [4+edi],edx\t\t\t;  dst most significant limb \n    mov     edi,[SAVE_EDI]\n    mov     ebp,[SAVE_EBP]\n    add     esp,frame\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/p6/submul_1.asm",
    "content": "\n;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\x86i.inc\" \n\n%define\tUNROLL_LOG2\t\t4\n%define\tUNROLL_COUNT\t(1 << UNROLL_LOG2)\n%define\tUNROLL_MASK\t\tUNROLL_COUNT-1  \n%define\tUNROLL_BYTES\t4*UNROLL_COUNT\n\n%ifdef\tPIC\n%define\tUNROLL_THRESHOLD\t5 \n%else\n%define\tUNROLL_THRESHOLD\t5 \n%endif\n\n%define\tPARAM_CARRY\t\t\tesp+frame+20 \n%define PARAM_MULTIPLIER\tesp+frame+16 \n%define PARAM_SIZE\t\t\tesp+frame+12 \n%define PARAM_SRC\t\t\tesp+frame+8 \n%define PARAM_DST\t\t\tesp+frame+4 \n\n%macro\tmul_fun\t4\n\n\tglobal  %1%3\n\tglobal  %1%4\n\n%ifdef\tDLL\n\texport\t%1%3\n\texport  %1%4\n%endif\n\n\talign   32\n%define\tframe\t0\n%1%4:\n    FR_push ebx\n    mov     ebx,[PARAM_CARRY]\n\tjmp     %%Lstart_nc\n\n%define\tframe\t0\n%1%3:\n\tFR_push ebx\n    xor     ebx,ebx\t;  initial carry \n%%Lstart_nc: \n    mov     ecx,[PARAM_SIZE]\n    FR_push esi\n    mov     esi,[PARAM_SRC]\n    FR_push\tedi\n    mov     edi,[PARAM_DST]\n    FR_push ebp\n    cmp     ecx,UNROLL_THRESHOLD\n    mov     ebp,[PARAM_MULTIPLIER]\n    jae     %%Lunroll\n\n;  simple loop \n;  this is offset 0x22,so close enough to aligned \n;  eax scratch \n;  ebx carry \n;  ecx counter \n;  edx scratch \n;  esi src \n;  edi dst \n;  ebp multiplier \n\n%%Lsimple: \n    mov     eax,[esi]\n    add     edi,4\n    mul     ebp\n    add     eax,ebx\n    adc     edx,0\n\t%2\t\t[edi-4],eax\n    mov     ebx,edx\n    adc     ebx,0\n    dec     ecx\n    lea     esi,[4+esi]\n    jnz     %%Lsimple\n    pop     ebp\n    pop     edi\n    pop     esi\n    mov     eax,ebx\n    pop     ebx\n    ret\n\n;  VAR_JUMP holds the computed jump temporarily because there's not enough \n;  registers when doing the mul for the initial two carry limbs. \n; \n;  The add/adc for the initial carry in %ebx is necessary only for the \n;  mpn_add/submul_1c entry points.  Duplicating the startup code to \n;  eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good \n;  idea. \n;\n;   overlapping with parameters already fetched \n\n%define\tVAR_COUNTER\tPARAM_SIZE\n%define\tVAR_JUMP\tPARAM_DST\n\n; VAL1 = ifelse(UNROLL_BYTES,256,128)\n%define\tVAL1\t128\n; VAL2 = ifelse(UNROLL_BYTES,256,-128)\n%define\tVAL2   -128\n\n;  this is offset 0x43,so close enough to aligned \n;  eax \n;  ebx initial carry \n;  ecx size \n;  edx \n;  esi src \n;  edi dst \n;  ebp \n\n%%Lunroll: \n    mov     edx,ecx\n    dec     ecx\n    sub     edx,2\n    neg     ecx\n    shr     edx,UNROLL_LOG2\n    and     ecx,UNROLL_MASK\n    mov     [VAR_COUNTER],edx\n    mov     edx,ecx\n        \n;  15 code bytes per limb \n\n%ifdef\tPIC\n\tcall    %%Lhere\n%%Lhere: \n    shl     edx,4\n    neg     ecx\n    lea     edx,[edx+ecx*1]\n    add     edx,%%Lentry-%%Lhere\n    add     edx,[esp]\n\tadd\t\tesp,4\n%else\n\tshl     edx,4\n    neg     ecx\n\tlea\t\tedx,[%%Lentry+edx+ecx]\n%endif\n    mov     eax,[esi]\t\t\t;  src low limb \n    mov     [VAR_JUMP],edx\n\tlea\t\tesi,[VAL1+4+esi+ecx*4]\n    mul     ebp\n    add     eax,ebx\t\t\t\t;  initial carry (from _1c) \n    adc     edx,0\n    mov     ebx,edx\t\t\t\t;  high carry \n\tlea\t\tedi,[VAL1+edi+ecx*4]\n    mov     edx,[VAR_JUMP]\n    test    ecx,1\n    mov     ecx,eax\t\t\t\t;  low carry \n\tcmovnz\tecx,ebx\n\tcmovnz\tebx,eax\n    jmp     edx\n\n;  eax scratch \n;  ebx carry hi \n;  ecx carry lo \n;  edx scratch \n;  esi src \n;  edi dst \n;  ebp multiplier \n;\n;  VAR_COUNTER loop counter \n;\n;  15 code bytes per limb \n\n%define\tCHUNK_COUNT\t2 \n\n\talign   32\n%%Ltop: \n\tadd     edi,UNROLL_BYTES\n%%Lentry: \n%assign\tdisp\tVAL2\n%rep\tUNROLL_COUNT/CHUNK_COUNT\n\tmov\t\teax,[byte disp+esi]\n\tmul     ebp\n\t%2\t\t[byte disp+edi],ecx\n    adc     ebx,eax\n    mov     ecx,edx\n    adc     ecx,0\n    mov     eax,[byte disp+4+esi]\n    mul     ebp\n\t%2\t\t[byte disp+4+edi],ebx\n    adc     ecx,eax\n    mov     ebx,edx\n    adc     ebx,0\n%assign\t\tdisp\tdisp+4*CHUNK_COUNT\n%endrep\n\n    dec     dword [VAR_COUNTER]\n    lea     esi,[UNROLL_BYTES+esi]\n    jns     %%Ltop\n\n%assign\tdisp\tUNROLL_BYTES+VAL2\n\t%2\t\t[disp+edi],ecx\n    mov     eax,ebx\n    pop     ebp\n    pop     edi\n    pop     esi\n    pop     ebx\n    adc     eax,0\n    ret\n%endmacro\n\n\tsection .text\n;\tglobal ___gmpn_submul_1\n;\tglobal ___gmpn_submul_1c\n\t\n\tmul_fun\t___g,sub,mpn_submul_1,mpn_submul_1c\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/copyd.asm",
    "content": "\n;  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection\t.text\n\n\tglobal\t___gmpn_copyd\n%ifdef\tDLL\n\texport\t___gmpn_copyd\n%endif\n\n\talign\t8\n___gmpn_copyd:\n\n    mov     ecx,[12+esp]\n    mov     eax,[8+esp]\n    mov     edx,[4+esp]\n    mov     [12+esp],ebx\n    add     ecx,-1\n    js      nd\noop: \n    mov     ebx,[eax+ecx*4]\n    mov     [edx+ecx*4],ebx\n    add     ecx,-1\n    jns     oop\nnd: \n    mov     ebx,[12+esp]\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/copyi.asm",
    "content": "\n;  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"..\\x86i.inc\"\n\n\tsection\t.text\n\n\tglobal\t___gmpn_copyi\n%ifdef\tDLL\n\texport\t___gmpn_copyi\n%endif\n\n\talign\t8\n___gmpn_copyi:\n    mov     ecx, [12+esp]\n    cmp     ecx, 150\n    jg      rm\n    mov     eax, [8+esp]\n    mov     edx, [4+esp]\n    mov     [12+esp],ebx\n    test    ecx,ecx\n    jz      nd\noop: \n    mov     ebx, [eax]\n    lea     eax, [4+eax]\n    add     ecx, -1\n    mov     [edx],ebx\n    lea     edx, [4+edx]\n    jnz     oop\nnd: \n    mov     ebx, [12+esp]\n    ret\nrm:\n    cld\n    mov     eax,esi\n    mov     esi, [8+esp]\n    mov     edx,edi\n    mov     edi, [4+esp]\n    rep movsd\n    mov     esi,eax\n    mov     edi,edx\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/mmx/hamdist.asm",
    "content": "\n;  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%macro\tph_fun 1\n%ifdef\tPIC\n    mov\t\t\tedx,0xAAAAAAAA\n    movd\t\tmm7,edx\n    punpckldq\tmm7,mm7\n    mov\t\t\tedx,0x33333333\n    movd\t\tmm6,edx\n    punpckldq\tmm6,mm6\n    mov\t\t\tedx,0x0F0F0F0F\n    movd\t\tmm5,edx\n    punpckldq\tmm5,mm5\n%else\n    movq\t\tmm7,[L_AA]\n    movq\t\tmm6,[L_33]\n    movq\t\tmm5,[L_0F]\n%endif\n    mov\t\t\tecx,[esp+PARAM_SIZE]\n    mov\t\t\teax,[esp+PARAM_SRC]\n%if %1 == 1\n    mov\t\t\tedx,[esp+PARAM_SRC2]\n%endif\n    pxor\t\tmm4,mm4\n    pxor\t\tmm0,mm0\n    sub\t\t\tecx,1\n    ja\t\t\t%%L_top\n\n%%L_last:\n    movd\t\tmm1,[eax+ecx*4]\n%if %1 == 1\n    movd\t\tmm2,[edx+ecx*4]\n    pxor\t\tmm1,mm2\n%endif\n    jmp\t\t\t%%L_loaded\n\n%%L_top:\n    movd\t\tmm1,[eax]\n    movd\t\tmm2,[eax+4]\n    punpckldq\tmm1,mm2\n    add\t\t\teax,8\n%if %1 == 1\n    movd\t\tmm2,[edx]\n    movd\t\tmm3,[edx+4]\n    punpckldq\tmm2,mm3\n    pxor\t\tmm1,mm2\n    add\t\t\tedx,8\n%endif\n%%L_loaded:\n    movq\t\tmm2,mm7\n    pand\t\tmm2,mm1\n    psrlq\t\tmm2,1\n    psubd\t\tmm1,mm2\n    movq\t\tmm2,mm6\n    pand\t\tmm2,mm1\n    psrlq\t\tmm1,2\n    pand\t\tmm1,mm6\n    paddd\t\tmm1,mm2\n    movq\t\tmm2,mm5\n    pand\t\tmm2,mm1\n    psrlq\t\tmm1,4\n    pand\t\tmm1,mm5\n    paddd\t\tmm1,mm2\n    psadbw\t\tmm1,mm4\n    paddd\t\tmm0,mm1\n    sub\t\t\tecx,2\n    jg\t\t\t%%L_top\n    jz\t\t\t%%L_last\n    movd\t\teax,mm0\n    emms\n    ret\n%endmacro\n\n%ifndef PIC\n    data\n    align 8\n\nL_AA:\tdq\t0xAAAAAAAAAAAAAAAA\nL_33:\tdq\t0x3333333333333333\nL_0F:\tdq\t0x0F0F0F0F0F0F0F0F\n%endif\n\n    section .text\n\n%define\tPARAM_SIZE\t12\n%define\tPARAM_SRC2\t 8\n%define\tPARAM_SRC\t 4\n    global\t___gmpn_hamdist\n%ifdef\tDLL\n    export\t___gmpn_hamdist\n%endif\n    align   16\n___gmpn_hamdist:\n    ph_fun\t1\n\n    end\n"
  },
  {
    "path": "mpn/x86w/pentium4/mmx/lshift.asm",
    "content": "\n;  Copyright 2001 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\" \n\n%define\tPARAM_SHIFT esp+frame+16 \n%define PARAM_SIZE  esp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n%define\tframe\t\t8 \n\n;   minimum 5,because the unrolled loop can't handle less \n%define       UNROLL_THRESHOLD  5 \n\n\tsection .text\n\n\tglobal\t___gmpn_lshift\n%ifdef\tDLL\n\texport\t___gmpn_lshift\n%endif\n\n\talign   8\n___gmpn_lshift:\n    push    ebx\n    push    edi\n    mov     eax,[PARAM_SIZE]\n    mov     edx,[PARAM_DST]\n    mov     ebx,[PARAM_SRC]\n    mov     ecx,[PARAM_SHIFT]\n\tcmp     eax,UNROLL_THRESHOLD\n    jae     Lunroll\n    mov     edi,[-4+ebx+eax*4]\t;  src high limb \n    dec     eax\n    jnz     Lsimple\n\tshld\teax,edi,cl\n    shl     edi,cl\n    mov     [edx],edi\t\t\t;  dst low limb \n    pop     edi\t\t\t\t\t;  risk of data cache bank clash \n    pop     ebx\n    ret\n\n;  eax size-1 \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\nLsimple: \n    movd    mm5,[ebx+eax*4] ;  src high limb \n    movd    mm6,ecx         ;  lshift \n    neg     ecx\n    psllq   mm5,mm6\n    add     ecx,32\n    movd    mm7,ecx\n    psrlq   mm5,32          ;  retval \n\n;  eax counter,limbs,negative \n;  ebx src \n;  ecx \n;  edx dst \n;  esi \n;  edi \n; \n;  mm0 scratch \n;  mm5 return value \n;  mm6 shift \n;  mm7 32-shift \n\nLsimple_top: \n    movq    mm0,[ebx+eax*4-4]\n    dec     eax\n    psrlq   mm0,mm7\n    movd    [4+edx+eax*4],mm0\n    jnz     Lsimple_top\n    movd    mm0,[ebx]\n    movd    eax,mm5\n    psllq   mm0,mm6\n    pop     edi\n    pop     ebx\n    movd    [edx],mm0\n    emms\n    ret\n\n;  eax size \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\n\talign   8\nLunroll: \n    movd    mm5,[ebx+eax*4-4]\t;  src high limb \n    lea     edi,[ebx+eax*4]\n    movd    mm6,ecx\t\t\t\t;  lshift \n    and     edi,4\n    psllq   mm5,mm6\n    jz      Lstart_src_aligned\n\n;  src isn't aligned,process high limb separately (marked xxx) to \n;  make it so. \n; \n;   source     -8(ebx,%eax,4) \n;                   | \n;   +-------+-------+-------+-- \n;   |               |           \n;   +-------+-------+-------+-- \n;         0mod8   4mod8   0mod8 \n; \n;   dest \n;      -4(edx,%eax,4) \n;           | \n;   +-------+-------+-- \n;   |  xxx  |       |   \n;   +-------+-------+-- \n\n    movq    mm0,[ebx+eax*4-8]\t\t;  unaligned load \n    psllq   mm0,mm6\n    dec     eax\n    psrlq   mm0,32\n    movd    [edx+eax*4],mm0\nLstart_src_aligned: \n    movq    mm1,[ebx+eax*4-8]\t\t;  src high qword \n    lea     edi,[edx+eax*4]\n    and     edi,4\n    psrlq   mm5,32\t\t\t\t\t;  return value \n    movq    mm3,[ebx+eax*4-16]\t\t;  src second highest qword \n    jz      Lstart_dst_aligned\n\n;  dst isn't aligned,subtract 4 to make it so,and pretend the shift \n;  is 32 bits extra.  High limb of dst (marked xxx) handled here \n;  separately. \n; \n;   source     -8(ebx,%eax,4) \n;                   | \n;   +-------+-------+-- \n;   |      mm1      |   \n;   +-------+-------+-- \n;                 0mod8   4mod8 \n; \n;   dest \n;      -4(edx,%eax,4) \n;           | \n;   +-------+-------+-------+-- \n;   |  xxx  |               | \n;   +-------+-------+-------+-- \n;         0mod8   4mod8   0mod8 \n\n    movq    mm0,mm1\n    add     ecx,32         ;  new shift \n    psllq   mm0,mm6\n    movd    mm6,ecx\n    psrlq   mm0,32\n\n;  wasted cycle here waiting for %mm0 \n\n    movd    [-4+edx+eax*4],mm0\n    sub     edx,4\nLstart_dst_aligned: \n\n    psllq   mm1,mm6\n    neg     ecx\t\t\t\t;  -shift \n    add     ecx,64\t\t\t;  64-shift \n    movq    mm2,mm3\n    movd    mm7,ecx\n    sub     eax,8\t\t\t;  size-8 \n    psrlq   mm3,mm7\n    por     mm3,mm1         ;  mm3 ready to store \n    jc      Lfinish\n\n;  The comments in mpn_rshift apply here too. \n\n;  eax counter,limbs \n;  ebx src \n;  ecx \n;  edx dst \n;  esi \n;  edi \n; \n;  mm0 \n;  mm1 \n;  mm2 src qword from 16(%ebx,%eax,4) \n;  mm3 dst qword ready to store to 24(%edx,%eax,4) \n; \n;  mm5 return value \n;  mm6 lshift \n;  mm7 rshift \n\n\talign   8\nLunroll_loop: \n    movq    mm0,[ebx+eax*4+8]\n    psllq   mm2,mm6\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    movq    [24+edx+eax*4],mm3\n    por     mm0,mm2\n    movq    mm3,[ebx+eax*4]\n    psllq   mm1,mm6\n    movq    [16+edx+eax*4],mm0\n    movq    mm2,mm3 \n\tpsrlq   mm3,mm7\n    sub     eax,4\n\tpor     mm3,mm1\n    jnc     Lunroll_loop\nLfinish: \n;  eax -4 to -1 representing respectively 0 to 3 limbs remaining \n\n    test    al,2\n    jz      Lfinish_no_two\n    movq    mm0,[ebx+eax*4+8]\n    psllq   mm2,mm6\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    movq    [24+edx+eax*4],mm3  ;  prev \n    por     mm0,mm2\n    movq    mm2,mm1\n    movq    mm3,mm0\n    sub     eax,2\nLfinish_no_two: \n\n;  eax -4 or -3 representing respectively 0 or 1 limbs remaining \n;  mm2 src prev qword,from 16(%ebx,%eax,4) \n;  mm3 dst qword,for 24(%edx,%eax,4) \n\n    test    al,1\n    movd    eax,mm5  ;  retval \n    pop     edi\n    jz      Lfinish_zero\n\n;  One extra src limb,destination was aligned. \n;\n;                  source                  ebx \n;                  --+---------------+-------+ \n;                    |      mm2      |       | \n;                  --+---------------+-------+ \n;\n;  dest         edx+12           edx+4     edx \n;  --+---------------+---------------+-------+ \n;    |      mm3      |               |       | \n;  --+---------------+---------------+-------+ \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  One extra src limb,destination was unaligned. \n;\n;                  source                  ebx \n;                  --+---------------+-------+ \n;                    |      mm2      |       | \n;                  --+---------------+-------+ \n;\n;          dest         edx+12           edx+4 \n;          --+---------------+---------------+ \n;            |      mm3      |               | \n;          --+---------------+---------------+ \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n\n;  In both cases there's one extra limb of src to fetch and combine \n;  with mm2 to make a qword at 4(%edx),and in the aligned case \n;  there's an extra limb of dst to be formed from that extra src limb \n;  left shifted. \n\n    movd    mm0,[ebx]\n    psllq   mm2,mm6\n    movq    [12+edx],mm3\n    psllq   mm0,32\n    movq    mm1,mm0\n    psrlq   mm0,mm7\n    por     mm0,mm2\n    psllq   mm1,mm6\n    movq    [4+edx],mm0\n    psrlq   mm1,32\n    and     ecx,32\n    pop     ebx\n    jz      Lfinish_one_unaligned\n    movd    [edx],mm1\nLfinish_one_unaligned: \n    emms\n    ret\nLfinish_zero: \n\n;  No extra src limbs,destination was aligned. \n;\n;                  source          ebx \n;                  --+---------------+ \n;                    |      mm2      | \n;                  --+---------------+ \n;\n;  dest          edx+8             edx \n;  --+---------------+---------------+ \n;    |      mm3      |               | \n;  --+---------------+---------------+ \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  No extra src limbs,destination was unaligned. \n;\n;                source            ebx \n;                  --+---------------+ \n;                    |      mm2      | \n;                  --+---------------+ \n;\n;          dest          edx+8   edx+4 \n;          --+---------------+-------+ \n;            |      mm3      |       | \n;          --+---------------+-------+ \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n;  The movd for the unaligned case writes the same data to 4(%edx) \n;  that the movq does for the aligned case. \n\n    movq    [8+edx],mm3\n    and     ecx,32\n    psllq   mm2,mm6\n    jz      Lfinish_zero_unaligned\n    movq    [edx],mm2\nLfinish_zero_unaligned: \n    psrlq   mm2,32\n    pop     ebx\n    movd    eax,mm5  ;  retval \n    movd    [4+edx],mm2\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/mmx/popcount.asm",
    "content": "\n;  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%macro\tph_fun 1\n%ifdef\tPIC\n    mov\t\t\tedx,0xAAAAAAAA\n    movd\t\tmm7,edx\n    punpckldq\tmm7,mm7\n    mov\t\t\tedx,0x33333333\n    movd\t\tmm6,edx\n    punpckldq\tmm6,mm6\n    mov\t\t\tedx,0x0F0F0F0F\n    movd\t\tmm5,edx\n    punpckldq\tmm5,mm5\n%else\n    movq\t\tmm7,[L_AA]\n    movq\t\tmm6,[L_33]\n    movq\t\tmm5,[L_0F]\n%endif\n    mov\t\t\tecx,[esp+PARAM_SIZE]\n    mov\t\t\teax,[esp+PARAM_SRC]\n%if %1 == 1\n    mov\t\t\tedx,[esp+PARAM_SRC2]\n%endif\n    pxor\t\tmm4,mm4\n    pxor\t\tmm0,mm0\n    sub\t\t\tecx,1\n    ja\t\t\t%%L_top\n\n%%L_last:\n    movd\t\tmm1,[eax+ecx*4]\n%if %1 == 1\n    movd\t\tmm2,[edx+ecx*4]\n    pxor\t\tmm1,mm2\n%endif\n    jmp\t\t\t%%L_loaded\n\n%%L_top:\n    movd\t\tmm1,[eax]\n    movd\t\tmm2,[eax+4]\n    punpckldq\tmm1,mm2\n    add\t\t\teax,8\n%if %1 == 1\n    movd\t\tmm2,[edx]\n    movd\t\tmm3,[edx+4]\n    punpckldq\tmm2,mm3\n    pxor\t\tmm1,mm2\n    add\t\t\tedx,8\n%endif\n%%L_loaded:\n    movq\t\tmm2,mm7\n    pand\t\tmm2,mm1\n    psrlq\t\tmm2,1\n    psubd\t\tmm1,mm2\n    movq\t\tmm2,mm6\n    pand\t\tmm2,mm1\n    psrlq\t\tmm1,2\n    pand\t\tmm1,mm6\n    paddd\t\tmm1,mm2\n    movq\t\tmm2,mm5\n    pand\t\tmm2,mm1\n    psrlq\t\tmm1,4\n    pand\t\tmm1,mm5\n    paddd\t\tmm1,mm2\n    psadbw\t\tmm1,mm4\n    paddd\t\tmm0,mm1\n    sub\t\t\tecx,2\n    jg\t\t\t%%L_top\n    jz\t\t\t%%L_last\n    movd\t\teax,mm0\n    emms\n    ret\n%endmacro\n\n%ifndef PIC\n    data\n    align 8\n\nL_AA:\tdq\t0xAAAAAAAAAAAAAAAA\nL_33:\tdq\t0x3333333333333333\nL_0F:\tdq\t0x0F0F0F0F0F0F0F0F\n%endif\n\n    section .text\n\n%define\tPARAM_SIZE\t 8\n%define PARAM_SRC\t 4\n    global\t___gmpn_popcount\n%ifdef\tDLL\n    export\t___gmpn_popcount\n%endif\n    align   16\n___gmpn_popcount:\n    ph_fun\t0\n\n    end\n"
  },
  {
    "path": "mpn/x86w/pentium4/mmx/rshift.asm",
    "content": "\n;  Copyright 2001 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\" \n\n%define\tPARAM_SHIFT esp+frame+16 \n%define PARAM_SIZE  esp+frame+12 \n%define PARAM_SRC   esp+frame+8 \n%define PARAM_DST   esp+frame+4 \n%define frame\t\t8 \n\n;   Minimum 5,because the unrolled loop can't handle less. \n%define\tUNROLL_THRESHOLD  5 \n\n\tsection .text\n\t\n\tglobal\t___gmpn_rshift\n%ifdef\tDLL\n\texport\t___gmpn_rshift\n%endif\n\n\talign   8\n___gmpn_rshift:\n    push    ebx\n    push    edi\n    mov     eax,[PARAM_SIZE]\n    mov     edx,[PARAM_DST]\n    mov     ebx,[PARAM_SRC]\n    mov     ecx,[PARAM_SHIFT]\n\tcmp     eax,UNROLL_THRESHOLD\n\tjae     Lunroll\n    dec     eax\n    mov     edi,[ebx]\t\t;  src low limb \n    jnz     Lsimple\n\tshrd\teax,edi,cl\n    shr     edi,cl\n    mov     [edx],edi       ;  dst low limb \n    pop     edi             ;  risk of data cache bank clash \n    pop     ebx\n    ret\n\n;  eax size-1 \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\n\talign   8\nLsimple: \n    movd    mm5,[ebx]       ;  src[0] \n    lea     ebx,[ebx+eax*4]  ;  &src[size-1] \n    movd    mm6,ecx         ;  rshift \n    lea     edx,[-4+edx+eax*4] ;  &dst[size-2] \n    psllq   mm5,32\n    neg     eax\n\n;  This loop is 5 or 8 cycles,with every second load unaligned and a wasted \n;  cycle waiting for the mm0 result to be ready.  For comparison a shrdl is 4 \n;  cycles and would be 8 in a simple loop.  Using mmx helps the return value \n;  and last limb calculations too. \n\n;  eax counter,limbs,negative \n;  ebx &src[size-1] \n;  ecx return value \n;  edx &dst[size-2] \n;\n;  mm0 scratch \n;  mm5 return value \n;  mm6 shift \n\nLsimple_top: \n    movq    mm0,[ebx+eax*4]\n    inc     eax\n    psrlq   mm0,mm6\n    movd    [edx+eax*4],mm0\n    jnz     Lsimple_top\n    movd    mm0,[ebx]\n    psrlq   mm5,mm6         ;  return value \n    psrlq   mm0,mm6\n    pop     edi\n    movd    eax,mm5\n    pop     ebx\n    movd    [4+edx],mm0\n    emms\n    ret\n\n;  eax size \n;  ebx src \n;  ecx shift \n;  edx dst \n;  esi \n;  edi \n;  ebp \n\n\talign   8\nLunroll: \n    movd    mm5,[ebx]       ;  src[0] \n    mov     edi,4\n    movd    mm6,ecx         ;  rshift \n    test    ebx,edi\n    psllq   mm5,32\n    jz      Lstart_src_aligned\n\n;  src isn't aligned,process low limb separately (marked xxx) and \n;  step src and dst by one limb,making src aligned. \n;\n;  source                  ebx \n;  --+-------+-------+-------+ \n;            |          xxx  | \n;  --+-------+-------+-------+ \n;          4mod8   0mod8   4mod8 \n;\n;          dest            edx \n;          --+-------+-------+ \n;            |       |  xxx  |   \n;          --+-------+-------+ \n\n    movq    mm0,[ebx]       ;  unaligned load \n    psrlq   mm0,mm6\n    add     ebx,4\n    dec     eax\n    movd    [edx],mm0\n    add     edx,4\nLstart_src_aligned: \n    movq    mm1,[ebx]\n    test    edx,edi\n    psrlq   mm5,mm6         ;  retval \n    jz      Lstart_dst_aligned\n\n;  dst isn't aligned,add 4 to make it so,and pretend the shift is \n;  32 bits extra.  Low limb of dst (marked xxx) handled here \n;  separately. \n;\n;           source          ebx \n;           --+-------+-------+ \n;             |      mm1      | \n;           --+-------+-------+ \n;                   4mod8   0mod8 \n;\n;   dest                    edx \n;   --+-------+-------+-------+ \n;                     |  xxx  |         \n;   --+-------+-------+-------+ \n;           4mod8   0mod8   4mod8 \n\n    movq    mm0,mm1\n    add     ecx,32         ;  new shift \n    psrlq   mm0,mm6\n    movd    mm6,ecx\n    movd    [edx],mm0\n    add     edx,4\nLstart_dst_aligned: \n    movq    mm3,[8+ebx]\n    neg     ecx\n    movq    mm2,mm3\t\t\t;  mm2 src qword \n    add     ecx,64\n    movd    mm7,ecx\n    psrlq   mm1,mm6\n    lea     ebx,[-12+ebx+eax*4]\n    lea     edx,[-20+edx+eax*4]\n    psllq   mm3,mm7\n    sub     eax,7\t\t\t;  size-7 \n    por     mm3,mm1         ;  mm3 ready to store \n    neg     eax             ;  -(size-7) \n    jns     Lfinish\n\n;  This loop is the important bit,the rest is just support.  Careful \n;  instruction scheduling achieves the claimed 1.75 c/l.  The \n;  relevant parts of the pairing rules are: \n;\n;  - mmx loads and stores execute only in the U pipe \n;  - only one mmx shift in a pair \n;  - wait one cycle before storing an mmx register result \n;  - the usual address generation interlock \n;\n;  Two qword calculations are slightly interleaved.  The instructions \n;  marked \"C\" belong to the second qword,and the \"C prev\" one is for \n;  the second qword from the previous iteration. \n\n;  eax counter,limbs,negative \n;  ebx &src[size-12] \n;  ecx \n;  edx &dst[size-12] \n;  esi \n;  edi \n;\n;  mm0 \n;  mm1 \n;  mm2 src qword from -8(%ebx,%eax,4) \n;  mm3 dst qword ready to store to -8(%edx,%eax,4) \n;\n;  mm5 return value \n;  mm6 rshift \n;  mm7 lshift \n\n\talign   8\nLunroll_loop: \n    movq    mm0,[ebx+eax*4]\n    psrlq   mm2,mm6\n    movq    mm1,mm0\n    psllq   mm0,mm7\n    movq    [-8+edx+eax*4],mm3\n    por     mm0,mm2\n\n\tmovq    mm3,[ebx+eax*4+8]\n\tpsrlq   mm1,mm6\n    movq    [edx+eax*4],mm0\n\tmovq    mm2,mm3\n\tpsllq   mm3,mm7\n    add     eax,4\n\tpor     mm3,mm1\n    js      Lunroll_loop\n\nLfinish: \n;  eax 0 to 3 representing respectively 3 to 0 limbs remaining \n\n    test    al,2\n    jnz     Lfinish_no_two\n    movq    mm0,[ebx+eax*4]\n    psrlq   mm2,mm6\n    movq    mm1,mm0\n    psllq   mm0,mm7\n    movq    [-8+edx+eax*4],mm3  ;  prev \n    por     mm0,mm2\n    movq    mm2,mm1\n    movq    mm3,mm0\n    add     eax,2\nLfinish_no_two: \n\n;  eax 2 or 3 representing respectively 1 or 0 limbs remaining \n;\n;  mm2 src prev qword,from -8(%ebx,%eax,4) \n;  mm3 dst qword,for -8(%edx,%eax,4) \n\n    test    al,1\n    pop     edi\n    movd    eax,mm5  ;  retval \n    jnz     Lfinish_zero\n\n;  One extra limb,destination was aligned. \n;\n;  source                ebx \n;  +-------+---------------+-- \n;  |       |      mm2      | \n;  +-------+---------------+-- \n;\n;  dest                                  edx \n;  +-------+---------------+---------------+-- \n;  |       |               |      mm3      | \n;  +-------+---------------+---------------+-- \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  One extra limb,destination was unaligned. \n;\n;  source                ebx \n;  +-------+---------------+-- \n;  |       |      mm2      | \n;  +-------+---------------+-- \n;\n;  dest                          edx \n;  +---------------+---------------+-- \n;  |               |      mm3      | \n;  +---------------+---------------+-- \n;\n;  mm6 = shift+32 \n;  mm7 = ecx = 64-(shift+32) \n\n;  In both cases there's one extra limb of src to fetch and combine \n;  with mm2 to make a qword at 8(%edx),and in the aligned case \n;  there's a further extra limb of dst to be formed. \n\n\n    movd    mm0,[8+ebx]\n    psrlq   mm2,mm6\n    movq    mm1,mm0\n    psllq   mm0,mm7\n    movq    [edx],mm3\n    por     mm0,mm2\n    psrlq   mm1,mm6\n    and     ecx,32\n    pop     ebx\n    jz      Lfinish_one_unaligned\n\n    ;  dst was aligned,must store one extra limb \n    movd    [16+edx],mm1\nLfinish_one_unaligned: \n\n    movq    [8+edx],mm0\n    emms\n    ret\nLfinish_zero: \n\n;  No extra limbs,destination was aligned. \n;\n;  source        ebx \n;  +---------------+-- \n;  |      mm2      | \n;  +---------------+-- \n;\n;  dest                        edx+4 \n;  +---------------+---------------+-- \n;  |               |      mm3      | \n;  +---------------+---------------+-- \n;\n;  mm6 = shift \n;  mm7 = ecx = 64-shift \n\n;  No extra limbs,destination was unaligned. \n;\n;  source        ebx \n;  +---------------+-- \n;  |      mm2      | \n;  +---------------+-- \n;\n;  dest                edx+4 \n;  +-------+---------------+-- \n;  |       |      mm3      | \n;  +-------+---------------+-- \n;\n;  mm6 = shift+32 \n;  mm7 = 64-(shift+32) \n\n;  The movd for the unaligned case is clearly the same data as the \n;  movq for the aligned case,it's just a choice between whether one \n;  or two limbs should be written. \n\n    movq    [4+edx],mm3\n    psrlq   mm2,mm6\n    movd    [12+edx],mm2\n    and     ecx,32\n    pop     ebx\n    jz      Lfinish_zero_unaligned\n    movq    [12+edx],mm2\nLfinish_zero_unaligned: \n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/sse2/add_n.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\"\n\n\tsection .text\n\n\tglobal\t___gmpn_add_nc\n%ifdef\tDLL\n\texport\t___gmpn_add_nc\n%endif\n\n\talign\t8\n___gmpn_add_nc:\n    movd    mm0,[20+esp]\n    mov     eax,[8+esp]\n    mov     [8+esp],ebx\n    mov     ebx,[12+esp]\n    mov     edx,[4+esp]\n    mov     ecx,[16+esp]\n    lea     eax,[eax+ecx*4]\n    lea     ebx,[ebx+ecx*4]\n    lea     edx,[edx+ecx*4]\n    neg     ecx\n    jz\t    t3\nt1: movd    mm1,[eax+ecx*4]\n\tmovd    mm2,[ebx+ecx*4]\n\tpaddq   mm1,mm2\n\tpaddq   mm0,mm1\n    movd    [edx+ecx*4],mm0\n    psrlq   mm0,32\n    add     ecx,1\n    jnz     t1\n    movd    eax,mm0\nt3: mov     ebx,[8+esp]\n    emms\n    ret\n\n\tglobal\t___gmpn_add_n\n%ifdef\tDLL\n\texport\t___gmpn_add_n\n%endif\n\n\talign\t8\n___gmpn_add_n:\t\n\tpxor    mm0,mm0\n    mov     eax,[8+esp]\n    mov     [8+esp],ebx\n    mov     ebx,[12+esp]\n    mov     edx,[4+esp]\n    mov     ecx,[16+esp]\n    lea     eax,[eax+ecx*4]\n    lea     ebx,[ebx+ecx*4]\n    lea     edx,[edx+ecx*4]\n    neg     ecx\n\tjz\t\tt4\nt2:\tmovd    mm1,[eax+ecx*4]\n\tmovd    mm2,[ebx+ecx*4]\n\tpaddq   mm1,mm2\n\tpaddq   mm0,mm1\n    movd    [edx+ecx*4],mm0\n    psrlq   mm0,32\n    add     ecx,1\n    jnz     t2\n    movd    eax,mm0\nt4: mov     ebx,[8+esp]\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/sse2/addmul_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_addmul_1\n\tglobal\t___gmpn_addmul_1c\n\n%ifdef\tDLL\n\texport\t___gmpn_addmul_1\n\texport\t___gmpn_addmul_1c\n%endif\n\n\talign\t16\n___gmpn_addmul_1c: \n    movd    mm0,[20+esp]\n    jmp     start_1c\n\n\talign\t16\n___gmpn_addmul_1: \n\tpxor    mm0,mm0\n\nstart_1c: \n    mov     eax,[8+esp]\n    mov     ecx,[12+esp]\n    mov     edx,[4+esp]\n    movd    mm7,[16+esp]\n\noop: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[edx]\n\tpmuludq mm1,mm7\n\tpaddq   mm2,mm1\n\tpaddq   mm0,mm2\n\tsub     ecx,1\n\tmovd    [edx],mm0\n\tpsrlq   mm0,32\n\tlea     edx,[4+edx]\n\tjnz     oop\n\tmovd    eax,mm0\n\temms\n\tret\n\t\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/sse2/divexact_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\"\n%define\tp_space\t16\n\n\textern\t___gmp_modlimb_invert_table\n\n\tsection .text\n\n\tglobal\t___gmpn_divexact_1\n%ifdef\tDLL\n\texport\t___gmpn_divexact_1\n%endif\n\n\talign\t16\n___gmpn_divexact_1: \n    mov     edx,[12+esp]\n    mov     eax,[8+esp]\n    mov     ecx,[16+esp]\n    sub     edx,1\n    jnz     two_or_more\n    mov     eax,[eax]\n    xor     edx,edx\n    div     ecx\n    mov     ecx,[4+esp]\n    mov     [ecx],eax\n    ret\n    \ntwo_or_more: \n    mov     eax,ecx\n    bsf     ecx,ecx\n    shr     eax,cl\n    movd    mm6,eax\n    movd    mm7,ecx\n    shr     eax,1\n    and     eax,127\n    movzx   eax,byte [___gmp_modlimb_invert_table+eax]\n    movd    mm5,eax\n    movd    mm0,eax\n\tpmuludq mm5,mm5\n\tpmuludq mm5,mm6\n    paddd   mm0,mm0\n    psubd   mm0,mm5\n    pxor    mm5,mm5\n    paddd   mm5,mm0\n\tpmuludq mm0,mm0\n    pcmpeqd mm4,mm4\n    psrlq   mm4,32\n\tpmuludq mm0,mm6\n    paddd   mm5,mm5\n    mov     eax,[8+esp]\n    mov     ecx,[4+esp]\n    pxor    mm1,mm1\n    psubd   mm5,mm0\n    pxor    mm0,mm0\n\ntop: \n    movd    mm2,[eax]\n    movd    mm3,[4+eax]\n    add     eax,4\n    punpckldq mm2,mm3\n    psrlq   mm2,mm7\n    pand    mm2,mm4\n\tpsubq   mm2,mm0\n\tpsubq   mm2,mm1\n    movq    mm0,mm2\n    psrlq   mm0,63\n\tpmuludq mm2,mm5\n    movd    [ecx],mm2\n    add     ecx,4\n    movq    mm1,mm6\n\tpmuludq mm1,mm2\n    psrlq   mm1,32\n    sub     edx,1\n    jnz     top\n\ndone: \n    movd    mm2,[eax]\n    psrlq   mm2,mm7\n\tpsubq   mm2,mm0\n\tpsubq   mm2,mm1\n\tpmuludq mm2,mm5\n    movd    [ecx],mm2\n    emms\n    ret\n    \n    end\n"
  },
  {
    "path": "mpn/x86w/pentium4/sse2/divexact_by3c.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Library General Public License as\n;  published by the Free Software Foundation; either version 2 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Library General Public License for more details.\n; \n;  You should have received a copy of the GNU Library General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection\t.text\n\n\tglobal\t___gmpn_divexact_by3c\t\n%ifdef\tDLL\n\texport\t___gmpn_divexact_by3c\n%endif\n\n\talign\t16\n___gmpn_divexact_by3c: \n    mov     eax,[8+esp]\n    pxor    mm0,mm0\n    movd    mm1,[16+esp]\n    pcmpeqd mm6,mm6\n    movd    mm7,[val]\n    mov     edx,[4+esp]\n    psrlq   mm6,32\n    mov     ecx,[12+esp]\n\ntop: \n    movd    mm2,[eax]\n    add     eax,4\n\tpsubq   mm2,mm0\n\tpsubq   mm2,mm1\n    movq    mm0,mm2\n    psrlq   mm0,63\n\tpmuludq mm2,mm7\n    movd    [edx],mm2\n    add     edx,4\n    movq    mm1,mm6\n    pand    mm1,mm2\n    pand    mm2,mm6\n    psllq   mm1,1\n\tpaddq   mm1,mm2\n    psrlq   mm1,32\n    sub     ecx,1\n    jnz     top\n    paddd   mm0,mm1\n    movd    eax,mm0\n    emms\n    ret\n\n\tsection\t.data\nval:\n\tdd\t0xAAAAAAAB\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/sse2/mod_34lsub1.asm",
    "content": "\n;  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection\t.text\n\n\tglobal\t___gmpn_mod_34lsub1\n%ifdef\tDLL\n\texport\t___gmpn_mod_34lsub1\n%endif\n\n\talign\t16\n___gmpn_mod_34lsub1: \n    mov     ecx,[8+esp]\n    mov     edx,[4+esp]\n    mov     eax,[edx]\n    sub     ecx,2\n    ja      three_or_more\n    jne     one\n    mov     edx,[4+edx]\n    mov     ecx,eax\n    shr     eax,24\n    and     ecx,0x00FFFFFF\n    add     eax,ecx\n    mov     ecx,edx\n    shl     edx,8\n    shr     ecx,16\n    add     eax,ecx\n    and     edx,0x00FFFF00\n    add     eax,edx\none:ret\n\nthree_or_more: \n    pxor    mm0,mm0\n    pxor    mm1,mm1\n    pxor    mm2,mm2\n    pcmpeqd mm7,mm7\n    psrlq   mm7,32\n    pcmpeqd mm6,mm6\n    psrlq   mm6,40\n\ntop: \n\tmovd    mm3,[edx]\n\tpaddq   mm0,mm3\n\tmovd    mm3,[4+edx]\n\tpaddq   mm1,mm3\n    movd    mm3,[8+edx]\n\tpaddq   mm2,mm3\n    add     edx,12\n    sub     ecx,3\n    ja      top\n    add     ecx,1\n    js      combine\n    movd    mm3,[edx]\n\tpaddq   mm0,mm3\n\tjz      combine\n\tmovd    mm3,[4+edx]\n\tpaddq   mm1,mm3\n\ncombine: \n    movq    mm3,mm7\n    pand    mm3,mm0\n    movq    mm4,mm7\n    pand    mm4,mm1\n    movq    mm5,mm7\n    pand    mm5,mm2\n    psrlq   mm0,32\n    psrlq   mm1,32\n    psrlq   mm2,32\n\tpaddq   mm4,mm0\n\tpaddq   mm5,mm1\n\tpaddq   mm3,mm2\n    psllq   mm4,8\n    psllq   mm5,16\n\tpaddq   mm3,mm4\n\tpaddq\tmm3,mm5\n    pand    mm6,mm3\n    psrlq   mm3,24\n\tpaddq   mm3,mm6\n    movd    eax,mm3\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/sse2/modexact_1c_odd.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\textern\t___gmp_modlimb_invert_table\n\tglobal\t___gmpn_modexact_1_odd\n\tglobal\t___gmpn_modexact_1c_odd\n\t\n%ifdef\tDLL\n\texport\t___gmpn_modexact_1_odd\n\texport\t___gmpn_modexact_1c_odd\n%endif\n\n\talign\t16\n___gmpn_modexact_1c_odd: \n    movd    mm1,[16+esp]\n    jmp     start_1c\n\n\talign\t16\n___gmpn_modexact_1_odd: \n\tpxor    mm1,mm1\n\nstart_1c: \n    mov     eax,[12+esp]\n    movd    mm7,[12+esp]\n    shr     eax,1\n    and     eax,127\n    movzx   eax,byte [___gmp_modlimb_invert_table+eax]\n    movd    mm6,eax\n    movd    mm0,eax\n\tpmuludq mm6,mm6\n\tpmuludq mm6,mm7\n    paddd   mm0,mm0\n    psubd   mm0,mm6\n    pxor    mm6,mm6\n    paddd   mm6,mm0\n\tpmuludq mm0,mm0\n\tpmuludq mm0,mm7\n    paddd   mm6,mm6\n    mov     eax,[4+esp]\n    mov     ecx,[8+esp]\n    psubd   mm6,mm0\n    pxor    mm0,mm0\n\ntop: \n    movd    mm2,[eax]\n    add     eax,4\n\tpsubq   mm2,mm0\n\tpsubq   mm2,mm1\n    movq    mm0,mm2\n    psrlq   mm0,63\n\tpmuludq mm2,mm6\n    movq    mm1,mm7\n\tpmuludq mm1,mm2\n    psrlq   mm1,32\n    sub     ecx,1\n    jnz     top\n\ndone: \n\tpaddq   mm0,mm1\n    movd    eax,mm0\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/sse2/mul_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_mul_1\n\tglobal\t___gmpn_mul_1c\n\n%ifdef\tDLL\n\texport\t___gmpn_mul_1\n\texport\t___gmpn_mul_1c\n%endif\n\n\talign\t16\t\n___gmpn_mul_1c: \n    movd    mm0,[20+esp]\n    jmp     start_1c\n\n\talign\t16\t\n___gmpn_mul_1: \n    pxor    mm0,mm0\n\nstart_1c: \n    mov     eax,[8+esp]\n    movd    mm7,[16+esp]\n    mov     edx,[4+esp]\n    mov     ecx,[12+esp]\n\ntop: \n    movd    mm1,[eax]\n    add     eax,4\n\tpmuludq mm1,mm7\n\tpaddq   mm0,mm1\n    movd    [edx],mm0\n    add     edx,4\n    psrlq   mm0,32\n    sub     ecx,1\n    jnz     top\n    movd    eax,mm0\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/sse2/mul_basecase.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_mul_basecase\n%ifdef\tDLL\n\texport\t___gmpn_mul_basecase\n%endif\n\n\talign\t8\n___gmpn_mul_basecase: \n    mov     eax,[8+esp]\n    mov     [8+esp],ebx\n    pxor    mm0,mm0\n    mov     edx,[16+esp]\n    mov     [16+esp],esi\n    mov     ebx,[4+esp]\n    mov     [4+esp],ebp\n    mov     esi,eax\n    movd    mm7,[edx]\n    mov     ecx,[12+esp]\n    mov     ebp,[20+esp]\n    mov     [20+esp],edi\n    mov     edi,ebx\n\nmul1: \n    movd    mm1,[eax]\n    add     eax,4\n\tpmuludq mm1,mm7\n\tpaddq   mm0,mm1\n    movd    [ebx],mm0\n    add     ebx,4\n    psrlq   mm0,32\n    sub     ecx,1\n    jnz     mul1\n    movd    [ebx],mm0\n    sub     ebp,1\n    jz      done\n\nouter: \n    mov     eax,esi\n    lea     ebx,[4+edi]\n    add     edi,4\n    movd    mm7,[4+edx]\n    add     edx,4\n    pxor    mm0,mm0\n    mov     ecx,[12+esp]\n\ninner: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[ebx]\n\tpmuludq mm1,mm7\n\tpaddq   mm1,mm2\n\tpaddq   mm0,mm1\n    sub     ecx,1\n    movd    [ebx],mm0\n    psrlq   mm0,32\n    lea     ebx,[4+ebx]\n    jnz     inner\n    movd    [ebx],mm0\n    sub     ebp,1\n    jnz     outer\n\ndone: \n    mov     ebx,[8+esp]\n    mov     esi,[16+esp]\n    mov     edi,[20+esp]\n    mov     ebp,[4+esp]\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/sse2/sqr_basecase.asm",
    "content": ";  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_sqr_basecase\n%ifdef\tDLL\n\texport\t___gmpn_sqr_basecase\n%endif\n\n\talign\t8\n___gmpn_sqr_basecase: \n    mov     edx,[12+esp]\n    mov     eax,[8+esp]\n    mov     ecx,[4+esp]\n    cmp     edx,2\n    je      two_limbs\n    ja      three_or_more\n    mov     eax,[eax]\n    mul     eax\n    mov     [ecx],eax\n    mov     [4+ecx],edx\n    ret\n\ntwo_limbs: \n    movd    mm1,[eax]\n    movd    mm0,[4+eax]\n\tpmuludq mm0,mm1\n\tpmuludq mm1,mm1\n    movd    mm2,[4+eax]\n\tpmuludq mm2,mm2\n    movd    [ecx],mm1\n    psrlq   mm1,32\n    pcmpeqd mm3,mm3\n    psrlq   mm3,32\n    pand    mm3,mm0\n    psrlq   mm0,32\n    psllq   mm3,1\n\tpaddq   mm1,mm3\n    movd    [4+ecx],mm1\n    pcmpeqd mm4,mm4\n    psrlq   mm4,32\n    pand    mm4,mm2\n    psrlq   mm2,32\n    psllq   mm0,1\n    psrlq   mm1,32\n\tpaddq   mm0,mm1\n\tpaddq   mm0,mm4\n    movd    [8+ecx],mm0\n    psrlq   mm0,32\n\tpaddq   mm0,mm2\n    movd    [12+ecx],mm0\n\temms\n    ret\n\nthree_or_more: \n    sub     esp,12\n    pxor    mm0,mm0\n    movd    mm7,[eax]\n    mov     [8+esp],esi\n    mov     [4+esp],edi\n    mov     [esp],ebp\n    mov     esi,eax\n    mov     edi,ecx\n    sub     edx,1\n\nmul1: \n    movd    mm1,[4+eax]\n    add     eax,4\n\tpmuludq mm1,mm7\n\tpaddq   mm0,mm1\n    movd    [4+ecx],mm0\n    add     ecx,4\n    psrlq   mm0,32\n    sub     edx,1\n    jnz     mul1\n    mov     ebp,[24+esp]\n    sub     ebp,3\n    jz      corner\n\nouter: \n    movd    mm7,[4+esi]\n    movd    [4+ecx],mm0\n    lea     eax,[8+esi]\n    add     esi,4\n    lea     ecx,[8+edi]\n    add     edi,8\n    lea     edx,[1+ebp]\n    pxor    mm0,mm0\n\ninner: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[4+ecx]\n\tpmuludq mm1,mm7\n\tpaddq   mm1,mm2\n\tpaddq   mm0,mm1\n    sub     edx,1\n    movd    [4+ecx],mm0\n    psrlq   mm0,32\n    lea     ecx,[4+ecx]\n    jnz     inner\n    sub     ebp,1\n    jnz     outer\n\ncorner: \n    movd    mm1,[4+esi]\n    movd    mm2,[8+esi]\n\tpmuludq mm1,mm2\n    mov     eax,[20+esp]\n    movd    mm2,[eax]\n\tpmuludq mm2,mm2\n    pcmpeqd mm7,mm7\n    psrlq   mm7,32\n    mov     edx,[16+esp]\n    movd    mm3,[4+edx]\n\tpaddq   mm0,mm1\n    movd    [12+edi],mm0\n    psrlq   mm0,32\n    movd    [16+edi],mm0\n    movd    [edx],mm2\n    psrlq   mm2,32\n    psllq   mm3,1\n\tpaddq   mm2,mm3\n    movd    [4+edx],mm2\n    psrlq   mm2,32\n    mov     ecx,[24+esp]\n    sub     ecx,2\n\ndiag: \n    movd    mm0,[4+eax]\n    add     eax,4\n\tpmuludq mm0,mm0\n    movq    mm1,mm7\n    pand    mm1,mm0\n    psrlq   mm0,32\n    movd    mm3,[8+edx]\n    psllq   mm3,1\n\tpaddq   mm1,mm3\n\tpaddq   mm2,mm1\n    movd    [8+edx],mm2\n    psrlq   mm2,32\n    movd    mm3,[12+edx]\n    psllq   mm3,1\n\tpaddq   mm0,mm3\n\tpaddq   mm2,mm0\n    movd    [12+edx],mm2\n    add     edx,8\n    psrlq   mm2,32\n    sub     ecx,1\n    jnz     diag\n    movd    mm0,[4+eax]\n\tpmuludq mm0,mm0\n    pand    mm7,mm0\n    psrlq   mm0,32\n    movd    mm3,[8+edx]\n    psllq   mm3,1\n\tpaddq   mm7,mm3\n\tpaddq   mm2,mm7\n    movd    [8+edx],mm2\n    psrlq   mm2,32\n\tpaddq   mm2,mm0\n    movd    [12+edx],mm2\n    mov     esi,[8+esp]\n    mov     edi,[4+esp]\n    mov     ebp,[esp]\n    add     esp,12\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/sse2/sub_n.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n\tsection .text\n\n\tglobal\t___gmpn_sub_nc\n\tglobal\t___gmpn_sub_n\n\n%ifdef\tDLL\n\texport\t___gmpn_sub_nc\n\texport\t___gmpn_sub_n\n%endif\n\n\talign\t8\n___gmpn_sub_nc: \n    movd    mm0,[20+esp]\n    jmp     start_nc\n\t\n\talign\t8\n___gmpn_sub_n: \n    pxor    mm0,mm0\n\nstart_nc: \n    mov     eax,[8+esp]\n    mov     [8+esp],ebx\n    mov     ebx,[12+esp]\n    mov     edx,[4+esp]\n    mov     ecx,[16+esp]\n\n    lea     eax,[eax+ecx*4]\n    lea     ebx,[ebx+ecx*4]\n    lea     edx,[edx+ecx*4]\n    neg     ecx\n\tjz\t\txit\ntop: \n\tmovd    mm1,[eax+ecx*4]\n\tmovd    mm2,[ebx+ecx*4]\n\tpsubq   mm1,mm2\n\tpsubq   mm1,mm0\n    movd    [edx+ecx*4],mm1\n    psrlq   mm1,63\n    add     ecx,1\n    jz      done_mm1\n\tmovd    mm0,[eax+ecx*4]\n    movd    mm2,[ebx+ecx*4]\n   \tpsubq   mm0,mm2\n\tpsubq   mm0,mm1\n    movd    [edx+ecx*4],mm0\n    psrlq   mm0,63\n    add     ecx,1\n    jnz     top\n    movd    eax,mm0\nxit:mov     ebx,[8+esp]\n    emms\n    ret\n\ndone_mm1: \n    movd    eax,mm1\n    mov     ebx,[8+esp]\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/pentium4/sse2/submul_1.asm",
    "content": "\n;  Copyright 2001, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"..\\..\\x86i.inc\" \n\n\tsection .text\n\n\tglobal\t___gmpn_submul_1c\n%ifdef\tDLL\n\texport\t___gmpn_submul_1c\n%endif\n\n\talign\t16\n___gmpn_submul_1c:\n    movd    mm1,[20+esp]\n\tmov     eax,[8+esp]\n\tpcmpeqd mm0,mm0\n\tmovd    mm7,[16+esp]\n\tpcmpeqd mm6,mm6\n\tmov     edx,[4+esp]\n\tpsrlq   mm0,32\n\tmov     ecx,[12+esp]\n\tpsllq   mm6,32\n\tpsubq   mm6,mm0\n\tpsubq   mm0,mm1\noop1: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[edx]\n\tpaddq   mm2,mm6\n\tpmuludq mm1,mm7\n\tpsubq   mm2,mm1\n\tpaddq   mm0,mm2\n    sub     ecx,1\n    movd    [edx],mm0\n    psrlq   mm0,32\n    lea     edx,[4+edx]\n    jnz     oop1\n    movd    eax,mm0\n    not     eax\n    emms\n    ret\n\n\tglobal\t___gmpn_submul_1\n%ifdef\tDLL\n\texport\t___gmpn_submul_1\n%endif\n\talign\t16\n___gmpn_submul_1:\n\n\tpxor    mm1,mm1\n\tmov     eax,[8+esp]\n\tpcmpeqd mm0,mm0\n\tmovd    mm7,[16+esp]\n\tpcmpeqd mm6,mm6\n\tmov     edx,[4+esp]\n\tpsrlq   mm0,32\n\tmov     ecx,[12+esp]\n\tpsllq   mm6,32\n\tpsubq   mm6,mm0\n\tpsubq   mm0,mm1\noop2: \n    movd    mm1,[eax]\n    lea     eax,[4+eax]\n    movd    mm2,[edx]\n\tpaddq   mm2,mm6\n\tpmuludq mm1,mm7\n\tpsubq   mm2,mm1\n\tpaddq   mm0,mm2\n    sub     ecx,1\n    movd    [edx],mm0\n    psrlq   mm0,32\n    lea     edx,[4+edx]\n    jnz     oop2\n    movd    eax,mm0\n    not     eax\n    emms\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/rshift.asm",
    "content": "\n;  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software\n;  Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n%define PARAM_SHIFT esp+frame+16\n%define PARAM_SIZE  esp+frame+12\n%define PARAM_SRC   esp+frame+8\n%define PARAM_DST   esp+frame+4\n%assign frame\t\tframe+12\n\n\tsection .text\n\n\tglobal  ___gmpn_rshift\n%ifdef\tDLL\n\texport\t___gmpn_rshift\n%endif\n\n\talign   8\n___gmpn_rshift: \n    push    edi\n    push    esi\n    push    ebx\n    mov     edi,[PARAM_DST]\n    mov     esi,[PARAM_SRC]\n    mov     edx,[PARAM_SIZE]\n    mov     ecx,[PARAM_SHIFT]\n    lea     edi,[-4+edi+edx*4]\n    lea     esi,[esi+edx*4]\n    neg     edx\n    mov     ebx,[esi+edx*4]\t; read least significant limb \n    xor     eax,eax\n\tshrd\teax,ebx,cl\n    inc     edx\n    jz      Lend\n    push    eax             ; push carry limb onto stack \n    test    dl,1\n    jnz     L1\t\t\t\t; enter Lop in the middle \n    mov     eax,ebx\n\n\talign   8\nLop:\t\n\tmov     ebx,[esi+edx*4]\t; load next higher limb \n\tshrd\teax,ebx,cl\n    mov     [edi+edx*4],eax ; store it \n    inc     edx\nL1:\n\tmov     eax,[esi+edx*4]\n\tshrd\tebx,eax,cl\n    mov     [edi+edx*4],ebx\n    inc     edx\n    jnz     Lop\n    shr     eax,cl          ; compute most significant limb \n    mov     [edi],eax       ; store it \n    pop     eax             ; pop carry limb \n    pop     ebx\n    pop     esi\n    pop     edi\n    ret\nLend:\n\tshr     ebx,cl\t\t\t; compute most significant limb \n    mov     [edi],ebx       ; store it \n    pop     ebx\n    pop     esi\n    pop     edi\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/sub_n.asm",
    "content": "\n;  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software\n;  Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n; mp_limb_t M4_function_n (mp_ptr dst,mp_srcptr src1,mp_srcptr src2,\n; mp_size_t size); \n; mp_limb_t M4_function_nc (mp_ptr dst,mp_srcptr src1,mp_srcptr src2,\n; mp_size_t size,mp_limb_t carry); \n\n%define PARAM_SPACE   20\n%define PARAM_CARRY   esp+frame+20\n%define PARAM_SIZE    esp+frame+16\n%define PARAM_SRC2    esp+frame+12\n%define PARAM_SRC1    esp+frame+8\n%define PARAM_DST     esp+frame+4\n\n%macro\tmac_sub 4\n\n\tglobal  %1%4\n%ifdef\tDLL\n\texport\t%1%4\n%endif\n\n    align   8\n%1%4:\n%assign\tframe   0\n\tFR_push edi\n\tFR_push esi\n    mov     edi,[PARAM_DST]\n    mov     esi,[PARAM_SRC1]\n    mov     edx,[PARAM_SRC2]\n    mov     ecx,[PARAM_SIZE]\n    mov     eax,ecx\n    shr     ecx,3\t\t\t\t; compute count for unrolled %%4 \n    neg     eax\n    and     eax,7\t\t\t\t; get index where to start %%4 \n    jz      %%3\t\t\t\t\t; necessary special case for 0 \n    inc     ecx\t\t\t\t\t; adjust %%4 count \n    shl     eax,2\t\t\t\t; adjustment for pointers... \n    sub     edi,eax\t\t\t\t; ... since they are offset ... \n    sub     esi,eax\t\t\t\t; ... by a constant when we ... \n    sub     edx,eax\t\t\t\t; ... enter the %%4 \n    shr     eax,2\t\t\t\t; restore previous value \n\n; Calculate start address in %%4\n\n%ifdef\tPIC\n    call    %%1\n%%1:\n\tlea     eax,[%%4-%%1-3+eax+eax*8]\n    add     eax,[esp]\n    add     esp,4\n%else\n\tlea     eax,[%%4-3+eax+eax*8]\n%endif\n\n; These lines initialize carry from the 5th parameter.  Should be \n; possible to simplify. \n\n\tFR_push ebp    \n\tmov     ebp,[PARAM_CARRY]    \n    shr     ebp,1\t\t\t\t; shift bit 0 into carry \n\tFR_pop  ebp\n    jmp     eax\t\t\t\t\t; jump into %%4 \n\n\tglobal\t%1%3\n%ifdef\tDLL\n\texport\t%1%3\n%endif\n\talign   8\t\n%1%3:\n%assign\tframe\t0\n\tFR_push edi\n    FR_push esi\n    mov     edi,[PARAM_DST]\n    mov     esi,[PARAM_SRC1]\n    mov     edx,[PARAM_SRC2]\n    mov     ecx,[PARAM_SIZE]\n    mov     eax,ecx\n    shr     ecx,3\t\t\t\t; compute count for unrolled %%4 \n    neg     eax\n    and     eax,7\t\t\t\t; get index where to start %%4 \n    jz      %%4\t\t\t\t\t; necessary special case for 0 \n    inc     ecx\t\t\t\t\t; adjust %%4 count \n    shl     eax,2\t\t\t\t; adjustment for pointers... \n    sub     edi,eax\t\t\t\t; ... since they are offset ... \n    sub     esi,eax\t\t\t\t; ... by a constant when we ... \n    sub     edx,eax\t\t\t\t; ... enter the %%4 \n    shr     eax,2\t\t\t\t; restore previous value \n\n; Calculate start address in %%4 for PIC.  \n; Due to limitations in some assemblers,%%4-%%2-3 \n; cannot be put into the leal \n\n%ifdef\tPIC\n\tcall    %%2\n%%2:\n\tlea     eax,[%%4-%%2-3+eax+eax*8]\n    add     eax,[esp]\n    add     esp,4\n%else\n    lea     eax,[%%4-3+eax+eax*8]\n%endif\n\tjmp     eax\t\t\t\t\t; jump into %%4 \n%%3:\n\tFR_push ebp\n    mov     ebp,[PARAM_CARRY]\n    shr     ebp,1\t\t\t\t; shift bit 0 into carry \n\tFR_pop  ebp\n\n\talign   8\n%%4:\n\tmov     eax,[esi]\n\t%2\t\teax,[edx]\n    mov     [edi],eax\n    mov     eax,[4+esi]\n\t%2\t\teax,[edx+4]\n    mov     [4+edi],eax\n    mov     eax,[8+esi]\n\t%2\t\teax,[edx+8]\n    mov     [8+edi],eax\n    mov     eax,[12+esi]\n    %2\t\teax,[edx+12]\n    mov     [12+edi],eax\n    mov     eax,[16+esi]\n\t%2\t\teax,[edx+16]\n    mov     [16+edi],eax\n    mov     eax,[20+esi]\n\t%2\t\teax,[edx+20]\n    mov     [20+edi],eax\n    mov     eax,[24+esi]\n\t%2\t\teax,[edx+24]\n    mov     [24+edi],eax\n    mov     eax,[28+esi]\n    %2\t\teax,[edx+28]\n    mov     [28+edi],eax\n    lea     edi,[32+edi]\n    lea     esi,[32+esi]\n    lea     edx,[32+edx]\n    dec     ecx\n    jnz     %%4\n    sbb     eax,eax\n    neg     eax\n    pop     esi\n    pop     edi\n    ret\t\t\n%endmacro\n\n\tsection .text\n;       global ___gmpn_sub_n\n;       global ___gmpn_sub_nc\n\t\t\n\tmac_sub\t___g,sbb,mpn_sub_n,mpn_sub_nc\n\t\n    end\n"
  },
  {
    "path": "mpn/x86w/submul_1.asm",
    "content": "\n;  Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002 Free Software\n;  Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n%define\tPARAM_MULTIPLIER\tesp+frame+16\n%define\tPARAM_SIZE\t\t\tesp+frame+12\n%define\tPARAM_SRC\t\t\tesp+frame+8\n%define\tPARAM_DST\t\t\tesp+frame+4\n%assign frame\t\t\t\t16\n\n%macro\tmac_sub 3\n\n\tglobal  %1%3\n%ifdef\tDLL\n\texport\t%1%3\n%endif\n\n\talign   8\n%1%3:\n    push    edi\n    push    esi\n    push    ebx\n    push    ebp\n    mov     edi,[PARAM_DST]\n    mov     esi,[PARAM_SRC]\n    mov     ecx,[PARAM_SIZE]\n    xor     ebx,ebx\n    and     ecx,3\n    jz      %%2\n%%1:\n    mov     eax,[esi]\n    mul     dword [PARAM_MULTIPLIER]\n    lea     esi,[4+esi]\n    add     eax,ebx\n    mov     ebx,0\n    adc     edx,ebx\n\t%2\t\t[edi],eax\n    adc     ebx,edx\t\t; propagate carry into cylimb \n    lea     edi,[4+edi]\n    dec     ecx\n    jnz     %%1\n%%2:\n    mov     ecx,[PARAM_SIZE]\n    shr     ecx,2\n    jz      %%4\n\n\talign   8\n%%3:\n\tmov     eax,[esi]\n    mul     dword [PARAM_MULTIPLIER]\n    add     ebx,eax\n    mov     ebp,0\n    adc     ebp,edx\n    mov     eax,[4+esi]\n    mul     dword [PARAM_MULTIPLIER]\n\t%2\t\t[edi],ebx\n    adc     ebp,eax\t\t; new lo + cylimb \n    mov     ebx,0\n    adc     ebx,edx\n    mov     eax,[8+esi]\n    mul     dword [PARAM_MULTIPLIER]\n\t%2\t\t[4+edi],ebp\n    adc     ebx,eax\t\t; new lo + cylimb \n    mov     ebp,0\n    adc     ebp,edx\n    mov     eax,[12+esi]\n    mul     dword [PARAM_MULTIPLIER]\n\t%2\t\t[8+edi],ebx\n    adc     ebp,eax   ; new lo + cylimb \n    mov     ebx,0\n    adc     ebx,edx\n\t%2\t\t[12+edi],ebp\n    adc     ebx,0\t\t; propagate carry into cylimb \n    lea     esi,[16+esi]\n    lea     edi,[16+edi]\n    dec     ecx\n    jnz     %%3\n%%4:\n\tmov     eax,ebx\n    pop     ebp\n    pop     ebx\n    pop     esi\n    pop     edi\n    ret\n%endmacro\n\n\tsection .text\n;       global ___gmpn_submul_1\n\tmac_sub\t___g,sub,mpn_submul_1\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/udiv_qrnnd.asm",
    "content": "\n;  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include \"x86i.inc\"\n\n%define PARAM_DIVISOR\tesp+frame+16\n%define PARAM_LOW\t\tesp+frame+12\n%define PARAM_HIGH\t\tesp+frame+8\n%define PARAM_REMPTR\tesp+frame+4\n%assign\tframe\t\t\t0\n\n\tsection .text\n\n\tglobal  ___gmpn_udiv_qrnnd\n%ifdef\tDLL\n\texport\t___gmpn_udiv_qrnnd\n%endif\n\n\talign   8\n___gmpn_udiv_qrnnd:\n    mov     eax,[PARAM_LOW]\n    mov     edx,[PARAM_HIGH]\n    div     dword [PARAM_DIVISOR]\n    mov     ecx,[PARAM_REMPTR]\n    mov     [ecx],edx\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/umul_ppmm.asm",
    "content": "\n;  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.\n; \n;  This file is part of the GNU MP Library.\n; \n;  The GNU MP Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The GNU MP Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the GNU MP Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 59 Temple Place -\n;  Suite 330, Boston, MA 02111-1307, USA.\n;\n; Translation of AT&T syntax code by Brian Gladman \n\n%include\t\"x86i.inc\"\n\n%define PARAM_M2\t\tesp+frame+12\n%define PARAM_M1\t\tesp+frame+8\n%define PARAM_LOWPTR\tesp+frame+4\n%assign frame\t\t\t0\n\n\tsection .text\n\t\n\tglobal\t___gmpn_umul_ppmm\n%ifdef\tDLL\n\texport\t___gmpn_umul_ppmm\n%endif\n\n\talign   8\n___gmpn_umul_ppmm:\n    mov     ecx,[PARAM_LOWPTR]\n    mov     eax,[PARAM_M1]\n    mul     dword [PARAM_M2]\n    mov     [ecx],eax\n    mov     eax,edx\n    ret\n\n\tend\n"
  },
  {
    "path": "mpn/x86w/x86i.inc",
    "content": "\n%macro\tFR_push\t1\n\tpush\t%1\n%assign\tframe\tframe+4\n%endmacro\n\n%macro\tFR_pop\t1\n\tpop\t\t%1\n%assign\tframe\tframe-4\n%endmacro\n\n%macro\tFR_sesp\t1\n%assign\tframe\tframe+%1\n%endmacro\n\n%macro\tFR_aesp\t1\n%assign\tframe\tframe-%1\n%endmacro\n\n%assign\tframe\t0\n\n%macro do_name 2\n\n%ifdef\tDLL\n\texport %1\n%else\n\texport %1@%2\n%endif\n\n%ifndef STDCALL\n\tglobal %1\n%1:\n%else\n\tglobal %1@%2\n%1@%2:\n%endif\n\n%endmacro\n\n%macro do_ret 1\n%ifndef STDCALL\n\tret\n%else\n    ret %1\n%endif\n%endmacro\n"
  },
  {
    "path": "mpq/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 1996, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -D__GMP_WITHIN_GMP -I$(top_srcdir)\n\nnoinst_LTLIBRARIES = libmpq.la\nlibmpq_la_SOURCES = abs.c aors.c canonicalize.c clear.c clears.c cmp.c cmp_si.c cmp_ui.c div.c equal.c get_d.c get_den.c get_num.c get_str.c init.c inits.c inp_str.c inv.c md_2exp.c mul.c neg.c out_str.c set.c set_d.c set_den.c set_f.c set_num.c set_si.c set_str.c set_ui.c set_z.c swap.c\n"
  },
  {
    "path": "mpq/abs.c",
    "content": "/* mpq_abs -- absolute value of a rational.\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpq_abs 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nvoid\nmpq_abs (mpq_ptr dst, mpq_srcptr src)\n{\n  mp_size_t  num_size = src->_mp_num._mp_size;\n  mp_size_t  num_abs_size = ABS (num_size);\n\n  if (dst != src)\n    {\n      mp_size_t  den_size = src->_mp_den._mp_size;\n\n      MPZ_REALLOC (mpq_numref(dst), num_abs_size);\n      MPZ_REALLOC (mpq_denref(dst), den_size);\n\n      MPN_COPY (dst->_mp_num._mp_d, src->_mp_num._mp_d, num_abs_size);\n      MPN_COPY (dst->_mp_den._mp_d, src->_mp_den._mp_d, den_size);\n\n      dst->_mp_den._mp_size = den_size;\n    }\n\n  dst->_mp_num._mp_size = num_abs_size;\n}\n"
  },
  {
    "path": "mpq/aors.c",
    "content": "/* mpq_add, mpq_sub -- add or subtract rational numbers.\n\nCopyright 1991, 1994, 1995, 1996, 1997, 2000, 2001, 2004, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nstatic void __gmpq_aors(REGPARM_3_1 (mpq_ptr w, mpq_srcptr x, mpq_srcptr y, void (*fun)(mpz_ptr, mpz_srcptr, mpz_srcptr))) REGPARM_ATTR (1);\n#define mpq_aors(w,x,y,fun)  __gmpq_aors (REGPARM_3_1 (w, x, y, fun))\n\nREGPARM_ATTR (1) static void\nmpq_aors (mpq_ptr rop, mpq_srcptr op1, mpq_srcptr op2,\n          void (*fun)(mpz_ptr, mpz_srcptr, mpz_srcptr))\n{\n  mpz_t gcd;\n  mpz_t tmp1, tmp2;\n  mp_size_t op1_num_size = ABS (op1->_mp_num._mp_size);\n  mp_size_t op1_den_size =      op1->_mp_den._mp_size;\n  mp_size_t op2_num_size = ABS (op2->_mp_num._mp_size);\n  mp_size_t op2_den_size =      op2->_mp_den._mp_size;\n  TMP_DECL;\n\n  TMP_MARK;\n  MPZ_TMP_INIT (gcd, MIN (op1_den_size, op2_den_size));\n  MPZ_TMP_INIT (tmp1, op1_num_size + op2_den_size);\n  MPZ_TMP_INIT (tmp2, op2_num_size + op1_den_size);\n\n  /* ROP might be identical to either operand, so don't store the\n     result there until we are finished with the input operands.  We\n     dare to overwrite the numerator of ROP when we are finished\n     with the numerators of OP1 and OP2.  */\n\n  mpz_gcd (gcd, &(op1->_mp_den), &(op2->_mp_den));\n  if (! MPZ_EQUAL_1_P (gcd))\n    {\n      mpz_t t;\n\n      mpz_divexact_gcd (tmp1, &(op2->_mp_den), gcd);\n      mpz_mul (tmp1, &(op1->_mp_num), tmp1);\n\n      mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd);\n      mpz_mul (tmp2, &(op2->_mp_num), tmp2);\n\n      MPZ_TMP_INIT (t, MAX (ABS (tmp1->_mp_size), ABS (tmp2->_mp_size)) + 1);\n\n      (*fun) (t, tmp1, tmp2);\n      mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd);\n\n      mpz_gcd (gcd, t, gcd);\n      if (MPZ_EQUAL_1_P (gcd))\n        {\n          mpz_set (&(rop->_mp_num), t);\n          mpz_mul (&(rop->_mp_den), &(op2->_mp_den), tmp2);\n        }\n      else\n        {\n          mpz_divexact_gcd (&(rop->_mp_num), t, gcd);\n          mpz_divexact_gcd (tmp1, &(op2->_mp_den), gcd);\n          mpz_mul (&(rop->_mp_den), tmp1, tmp2);\n        }\n    }\n  else\n    {\n      /* The common divisor is 1.  This is the case (for random input) with\n\t probability 6/(pi**2), which is about 60.8%.  */\n      mpz_mul (tmp1, &(op1->_mp_num), &(op2->_mp_den));\n      mpz_mul (tmp2, &(op2->_mp_num), &(op1->_mp_den));\n      (*fun) (&(rop->_mp_num), tmp1, tmp2);\n      mpz_mul (&(rop->_mp_den), &(op1->_mp_den), &(op2->_mp_den));\n    }\n  TMP_FREE;\n}\n\n\nvoid\nmpq_add (mpq_ptr rop, mpq_srcptr op1, mpq_srcptr op2)\n{\n  mpq_aors (rop, op1, op2, mpz_add);\n}\n\nvoid\nmpq_sub (mpq_ptr rop, mpq_srcptr op1, mpq_srcptr op2)\n{\n  mpq_aors (rop, op1, op2, mpz_sub);\n}\n"
  },
  {
    "path": "mpq/canonicalize.c",
    "content": "/* mpq_canonicalize(op) -- Remove common factors of the denominator and\n   numerator in OP.\n\nCopyright 1991, 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpq_canonicalize (mpq_ptr op)\n{\n  mpz_t gcd;\n  TMP_DECL;\n\n  if (op->_mp_den._mp_size == 0)\n    DIVIDE_BY_ZERO;\n\n  TMP_MARK;\n\n  /* ??? Dunno if the 1+ is needed.  */\n  MPZ_TMP_INIT (gcd, 1 + MAX (ABS (op->_mp_num._mp_size),\n\t\t\t      ABS (op->_mp_den._mp_size)));\n\n  mpz_gcd (gcd, &(op->_mp_num), &(op->_mp_den));\n  if (! MPZ_EQUAL_1_P (gcd))\n    {\n      mpz_divexact_gcd (&(op->_mp_num), &(op->_mp_num), gcd);\n      mpz_divexact_gcd (&(op->_mp_den), &(op->_mp_den), gcd);\n    }\n\n  if (op->_mp_den._mp_size < 0)\n    {\n      op->_mp_num._mp_size = -op->_mp_num._mp_size;\n      op->_mp_den._mp_size = -op->_mp_den._mp_size;\n    }\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpq/clear.c",
    "content": "/* mpq_clear -- free the space occupied by a mpq\n\nCopyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpq_clear (mpq_ptr m)\n{\n  (*__gmp_free_func) (m->_mp_num._mp_d,\n\t\t    m->_mp_num._mp_alloc * BYTES_PER_MP_LIMB);\n  (*__gmp_free_func) (m->_mp_den._mp_d,\n\t\t    m->_mp_den._mp_alloc * BYTES_PER_MP_LIMB);\n}\n"
  },
  {
    "path": "mpq/clears.c",
    "content": "/* mpq_clears() -- Clear multiple mpq_t variables.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\t\t/* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\n#if HAVE_STDARG\nmpq_clears (mpq_ptr x, ...)\n#else\nmpq_clears (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n\n#if HAVE_STDARG\n  va_start (ap, x);\n#else\n  mpq_ptr x;\n  va_start (ap);\n  x = va_arg (ap, mpq_ptr);\n#endif\n\n  while (x != NULL)\n    {\n      mpq_clear (x);\n      x = va_arg (ap, mpq_ptr);\n    }\n  va_end (ap);\n}\n"
  },
  {
    "path": "mpq/cmp.c",
    "content": "/* mpq_cmp(u,v) -- Compare U, V.  Return positive, zero, or negative\n   based on if U > V, U == V, or U < V.\n\nCopyright 1991, 1994, 1996, 2001, 2002, 2005, 2015 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nstatic int\nmpq_cmp_numden (mpq_srcptr op1, mpz_srcptr num_op2, mpz_srcptr den_op2)\n{\n  mp_size_t num1_size = SIZ(NUM(op1));\n  mp_size_t den1_size = SIZ(DEN(op1));\n  mp_size_t num2_size = SIZ(num_op2);\n  mp_size_t den2_size = SIZ(den_op2);\n  int op2_is_int;\n  mp_limb_t d1h, d2h;\n  mp_size_t tmp1_size, tmp2_size;\n  mp_ptr tmp1_ptr, tmp2_ptr;\n  mp_size_t num1_sign;\n  int cc;\n  TMP_DECL;\n\n  /* need canonical signs to get right result */\n  ASSERT (den1_size > 0);\n  ASSERT (den2_size > 0);\n\n  if (num1_size == 0)\n    return -num2_size;\n  if (num2_size == 0)\n    return num1_size;\n  if ((num1_size ^ num2_size) < 0) /* I.e. are the signs different? */\n    return num1_size;\n\n  num1_sign = num1_size;\n  num1_size = ABS (num1_size);\n\n  /* THINK: Does storing d1h and d2h make sense? */\n  d1h = PTR(DEN(op1))[den1_size - 1];\n  d2h = PTR(den_op2)[den2_size - 1];\n  op2_is_int = (den2_size | d2h) == 1;\n  if (op2_is_int == (den1_size | d1h)) /* Both ops are integers */\n    /* return mpz_cmp (NUM (op1), num_op2); */\n    {\n      int cmp;\n\n      if (num1_sign != num2_size)\n\treturn num1_sign - num2_size;\n\n      cmp = mpn_cmp (PTR(NUM(op1)), PTR(num_op2), num1_size);\n      return (num1_sign > 0 ? cmp : -cmp);\n    }\n\n  num2_size = ABS (num2_size);\n\n  tmp1_size = num1_size + den2_size;\n  tmp2_size = num2_size + den1_size;\n\n  /* 1. Check to see if we can tell which operand is larger by just looking at\n     the number of limbs.  */\n\n  /* NUM1 x DEN2 is either TMP1_SIZE limbs or TMP1_SIZE-1 limbs.\n     Same for NUM1 x DEN1 with respect to TMP2_SIZE.  */\n  if (tmp1_size > tmp2_size + 1)\n    /* NUM1 x DEN2 is surely larger in magnitude than NUM2 x DEN1.  */\n    return num1_sign;\n  if (tmp2_size + op2_is_int > tmp1_size + 1)\n    /* NUM1 x DEN2 is surely smaller in magnitude than NUM2 x DEN1.  */\n    return -num1_sign;\n\n  /* 2. Same, but compare the number of significant bits.  */\n  {\n    int cnt1, cnt2;\n    mp_bitcnt_t bits1, bits2;\n\n    count_leading_zeros (cnt1, PTR(NUM(op1))[num1_size - 1]);\n    count_leading_zeros (cnt2, d2h);\n    bits1 = (mp_bitcnt_t) tmp1_size * GMP_NUMB_BITS - cnt1 - cnt2 + 2 * GMP_NAIL_BITS;\n\n    count_leading_zeros (cnt1, PTR(num_op2)[num2_size - 1]);\n    count_leading_zeros (cnt2, d1h);\n    bits2 = (mp_bitcnt_t) tmp2_size * GMP_NUMB_BITS - cnt1 - cnt2 + 2 * GMP_NAIL_BITS;\n\n    if (bits1 > bits2 + 1)\n      return num1_sign;\n    if (bits2 + op2_is_int > bits1 + 1)\n      return -num1_sign;\n  }\n\n  /* 3. Finally, cross multiply and compare.  */\n\n  TMP_MARK;\n  if (op2_is_int)\n    {\n      tmp2_ptr = TMP_ALLOC_LIMBS (tmp2_size);\n      tmp1_ptr = PTR(NUM(op1));\n      --tmp1_size;\n    }\n  else\n    {\n  TMP_ALLOC_LIMBS_2 (tmp1_ptr,tmp1_size, tmp2_ptr,tmp2_size);\n\n  if (num1_size >= den2_size)\n    tmp1_size -= 0 == mpn_mul (tmp1_ptr,\n\t\t\t       PTR(NUM(op1)), num1_size,\n\t\t\t       PTR(den_op2), den2_size);\n  else\n    tmp1_size -= 0 == mpn_mul (tmp1_ptr,\n\t\t\t       PTR(den_op2), den2_size,\n\t\t\t       PTR(NUM(op1)), num1_size);\n    }\n\n   if (num2_size >= den1_size)\n     tmp2_size -= 0 == mpn_mul (tmp2_ptr,\n\t\t\t\tPTR(num_op2), num2_size,\n\t\t\t\tPTR(DEN(op1)), den1_size);\n   else\n     tmp2_size -= 0 == mpn_mul (tmp2_ptr,\n\t\t\t\tPTR(DEN(op1)), den1_size,\n\t\t\t\tPTR(num_op2), num2_size);\n\n\n  cc = tmp1_size - tmp2_size != 0\n    ? tmp1_size - tmp2_size : mpn_cmp (tmp1_ptr, tmp2_ptr, tmp1_size);\n  TMP_FREE;\n  return num1_sign < 0 ? -cc : cc;\n}\n\nint\nmpq_cmp (mpq_srcptr op1, mpq_srcptr op2)\n{\n  return mpq_cmp_numden (op1, NUM(op2), DEN(op2));\n}\n\nint\nmpq_cmp_z (mpq_srcptr op1, mpz_srcptr op2)\n{\n  const static mp_limb_t one = 1;\n  const static mpz_t den = MPZ_ROINIT_N ((mp_limb_t *) &one, 1);\n\n  return mpq_cmp_numden (op1, op2, den);\n}\n"
  },
  {
    "path": "mpq/cmp_si.c",
    "content": "/* _mpq_cmp_si -- compare mpq and long/ulong fraction.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Something like mpq_cmpabs_ui would be more useful for the neg/neg case,\n   and perhaps a version accepting a parameter to reverse the test, to make\n   it a tail call here.  */\n\nint\n_mpq_cmp_si (mpq_srcptr q, mpir_si n, mpir_ui d)\n{\n  /* need canonical sign to get right result */\n  ASSERT (q->_mp_den._mp_size > 0);\n\n  if (q->_mp_num._mp_size >= 0)\n    {\n      if (n >= 0)\n        return _mpq_cmp_ui (q, n, d);            /* >=0 cmp >=0 */\n      else\n        return 1;                                /* >=0 cmp <0 */\n    }\n  else\n    {\n      if (n >= 0)\n        return -1;                               /* <0 cmp >=0 */\n      else\n        {\n          mpq_t  qabs;\n          qabs->_mp_num._mp_size = ABS (q->_mp_num._mp_size);\n          qabs->_mp_num._mp_d    = q->_mp_num._mp_d;\n          qabs->_mp_den._mp_size = q->_mp_den._mp_size;\n          qabs->_mp_den._mp_d    = q->_mp_den._mp_d;\n\n          return - _mpq_cmp_ui (qabs, -n, d);    /* <0 cmp <0 */\n        }\n    }\n}\n"
  },
  {
    "path": "mpq/cmp_ui.c",
    "content": "/* mpq_cmp_ui(u,vn,vd) -- Compare U with Vn/Vd.  Return positive, zero, or\n   negative based on if U > V, U == V, or U < V.  Vn and Vd may have\n   common factors.\n\nCopyright 1993, 1994, 1996, 2000, 2001, 2002, 2003, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\n_mpq_cmp_ui (mpq_srcptr op1, mpir_ui num2, mpir_ui den2)\n{\n  mp_size_t num1_size = op1->_mp_num._mp_size;\n  mp_size_t den1_size = op1->_mp_den._mp_size;\n  mp_size_t tmp1_size, tmp2_size;\n  mp_ptr tmp1_ptr, tmp2_ptr;\n  mp_limb_t cy_limb;\n  int cc;\n  TMP_DECL;\n\n#if GMP_NAIL_BITS != 0\n  if ((num2 | den2) > GMP_NUMB_MAX)\n    {\n      mpq_t op2;\n      mpq_init (op2);\n      mpz_set_ui (mpq_numref (op2), num2);\n      mpz_set_ui (mpq_denref (op2), den2);\n      cc = mpq_cmp (op1, op2);\n      mpq_clear (op2);\n      return cc;\n    }\n#endif\n\n  /* need canonical sign to get right result */\n  ASSERT (den1_size > 0);\n\n  if (den2 == 0)\n    DIVIDE_BY_ZERO;\n\n  if (num1_size == 0)\n    return -(num2 != 0);\n  if (num1_size < 0)\n    return num1_size;\n  if (num2 == 0)\n    return num1_size;\n\n  /* NUM1 x DEN2 is either TMP1_SIZE limbs or TMP1_SIZE-1 limbs.\n     Same for NUM1 x DEN1 with respect to TMP2_SIZE.  */\n  if (num1_size > den1_size + 1)\n    /* NUM1 x DEN2 is surely larger in magnitude than NUM2 x DEN1.  */\n    return num1_size;\n  if (den1_size > num1_size + 1)\n    /* NUM1 x DEN2 is surely smaller in magnitude than NUM2 x DEN1.  */\n    return -num1_size;\n\n  TMP_MARK;\n  tmp1_ptr = (mp_ptr) TMP_ALLOC ((num1_size + 1) * BYTES_PER_MP_LIMB);\n  tmp2_ptr = (mp_ptr) TMP_ALLOC ((den1_size + 1) * BYTES_PER_MP_LIMB);\n\n  cy_limb = mpn_mul_1 (tmp1_ptr, op1->_mp_num._mp_d, num1_size,\n                       (mp_limb_t) den2);\n  tmp1_ptr[num1_size] = cy_limb;\n  tmp1_size = num1_size + (cy_limb != 0);\n\n  cy_limb = mpn_mul_1 (tmp2_ptr, op1->_mp_den._mp_d, den1_size,\n                       (mp_limb_t) num2);\n  tmp2_ptr[den1_size] = cy_limb;\n  tmp2_size = den1_size + (cy_limb != 0);\n\n  cc = tmp1_size - tmp2_size != 0\n    ? tmp1_size - tmp2_size : mpn_cmp (tmp1_ptr, tmp2_ptr, tmp1_size);\n  TMP_FREE;\n  return cc;\n}\n"
  },
  {
    "path": "mpq/div.c",
    "content": "/* mpq_div -- divide two rational numbers.\n\nCopyright 1991, 1994, 1995, 1996, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nvoid\nmpq_div (mpq_ptr quot, mpq_srcptr op1, mpq_srcptr op2)\n{\n  mpz_t gcd1, gcd2;\n  mpz_t tmp1, tmp2;\n  mpz_t numtmp;\n\n  if (op2->_mp_num._mp_size == 0)\n    DIVIDE_BY_ZERO;\n\n  mpz_init (gcd1);\n  mpz_init (gcd2);\n  mpz_init (tmp1);\n  mpz_init (tmp2);\n  mpz_init (numtmp);\n\n  /* QUOT might be identical to either operand, so don't store the\n     result there until we are finished with the input operands.  We\n     dare to overwrite the numerator of QUOT when we are finished\n     with the numerators of OP1 and OP2.  */\n\n  mpz_gcd (gcd1, &(op1->_mp_num), &(op2->_mp_num));\n  mpz_gcd (gcd2, &(op2->_mp_den), &(op1->_mp_den));\n\n  mpz_divexact_gcd (tmp1, &(op1->_mp_num), gcd1);\n  mpz_divexact_gcd (tmp2, &(op2->_mp_den), gcd2);\n\n  mpz_mul (numtmp, tmp1, tmp2);\n\n  mpz_divexact_gcd (tmp1, &(op2->_mp_num), gcd1);\n  mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd2);\n\n  mpz_mul (&(quot->_mp_den), tmp1, tmp2);\n\n  /* We needed to go via NUMTMP to take care of QUOT being the same\n     as either input operands.  Now move NUMTMP to QUOT->_mp_num.  */\n  mpz_set (&(quot->_mp_num), numtmp);\n\n  /* Keep the denominator positive.  */\n  if (quot->_mp_den._mp_size < 0)\n    {\n      quot->_mp_den._mp_size = -quot->_mp_den._mp_size;\n      quot->_mp_num._mp_size = -quot->_mp_num._mp_size;\n    }\n\n  mpz_clear (numtmp);\n  mpz_clear (tmp2);\n  mpz_clear (tmp1);\n  mpz_clear (gcd2);\n  mpz_clear (gcd1);\n}\n"
  },
  {
    "path": "mpq/equal.c",
    "content": "/* mpq_equal(u,v) -- Compare U, V.  Return non-zero if they are equal, zero\n   if they are non-equal.\n\nCopyright 1996, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpq_equal (mpq_srcptr op1, mpq_srcptr op2)\n{\n  mp_size_t  num1_size, num2_size, den1_size, den2_size, i;\n  mp_srcptr  num1_ptr,  num2_ptr,  den1_ptr,  den2_ptr;\n\n  /* need fully canonical for correct results */\n  ASSERT_MPQ_CANONICAL (op1);\n  ASSERT_MPQ_CANONICAL (op2);\n\n  num1_size = op1->_mp_num._mp_size;\n  num2_size = op2->_mp_num._mp_size;\n  if (num1_size != num2_size)\n    return 0;\n\n  num1_ptr = op1->_mp_num._mp_d;\n  num2_ptr = op2->_mp_num._mp_d;\n  num1_size = ABS (num1_size);\n  for (i = 0; i < num1_size; i++)\n    if (num1_ptr[i] != num2_ptr[i])\n      return 0;\n\n  den1_size = op1->_mp_den._mp_size;\n  den2_size = op2->_mp_den._mp_size;\n  if (den1_size != den2_size)\n    return 0;\n\n  den1_ptr = op1->_mp_den._mp_d;\n  den2_ptr = op2->_mp_den._mp_d;\n  for (i = 0; i < den1_size; i++)\n    if (den1_ptr[i] != den2_ptr[i])\n      return 0;\n\n  return 1;\n}\n"
  },
  {
    "path": "mpq/get_d.c",
    "content": "/* double mpq_get_d (mpq_t src) -- mpq to double, rounding towards zero.\n\nCopyright 1995, 1996, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>  /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* All that's needed is to get the high 53 bits of the quotient num/den,\n   rounded towards zero.  More than 53 bits is fine, any excess is ignored\n   by mpn_get_d.\n\n   N_QLIMBS is how many quotient limbs we need to satisfy the mantissa of a\n   double, assuming the highest of those limbs is non-zero.  The target\n   qsize for mpn_tdiv_qr is then 1 more than this, since that function may\n   give a zero in the high limb (and non-zero in the second highest).\n\n   The use of 8*sizeof(double) in N_QLIMBS is an overestimate of the\n   mantissa bits, but it gets the same result as the true value (53 or 48 or\n   whatever) when rounded up to a multiple of GMP_NUMB_BITS, for non-nails.\n\n   Enhancements:\n\n   Use the true mantissa size in the N_QLIMBS formala, to save a divide step\n   in nails.\n\n   Examine the high limbs of num and den to see if the highest 1 bit of the\n   quotient will fall high enough that just N_QLIMBS-1 limbs is enough to\n   get the necessary bits, thereby saving a division step.\n\n   Bit shift either num or den to arrange for the above condition on the\n   high 1 bit of the quotient, to save a division step always.  A shift to\n   save a division step is definitely worthwhile with mpn_tdiv_qr, though we\n   may want to reassess this on big num/den when a quotient-only division\n   exists.\n\n   Maybe we could estimate the final exponent using nsize-dsize (and\n   possibly the high limbs of num and den), so as to detect overflow and\n   return infinity or zero quickly.  Overflow is never very helpful to an\n   application, and can therefore probably be regarded as abnormal, but we\n   may still like to optimize it if the conditions are easy.  (This would\n   only be for float formats we know, unknown formats are not important and\n   can be left to mpn_get_d.)\n\n   Future:\n\n   If/when mpn_tdiv_qr supports its qxn parameter we can use that instead of\n   padding n with zeros in temporary space.\n\n   If/when a quotient-only division exists it can be used here immediately.\n   remp is only to satisfy mpn_tdiv_qr, the remainder is not used.\n\n   Alternatives:\n\n   An alternative algorithm, that may be faster:\n   0. Let n be somewhat larger than the number of significant bits in a double.\n   1. Extract the most significant n bits of the denominator, and an equal\n      number of bits from the numerator.\n   2. Interpret the extracted numbers as integers, call them a and b\n      respectively, and develop n bits of the fractions ((a + 1) / b) and\n      (a / (b + 1)) using mpn_divrem.\n   3. If the computed values are identical UP TO THE POSITION WE CARE ABOUT,\n      we are done.  If they are different, repeat the algorithm from step 1,\n      but first let n = n * 2.\n   4. If we end up using all bits from the numerator and denominator, fall\n      back to a plain division.\n   5. Just to make life harder, The computation of a + 1 and b + 1 above\n      might give carry-out...  Needs special handling.  It might work to\n      subtract 1 in both cases instead.\n\n   Not certain if this approach would be faster than a quotient-only\n   division.  Presumably such optimizations are the sort of thing we would\n   like to have helping everywhere that uses a quotient-only division. */\n\ndouble\nmpq_get_d (mpq_srcptr src)\n{\n  double res;\n  mp_srcptr np, dp;\n  mp_ptr remp, tp;\n  mp_size_t nsize = src->_mp_num._mp_size;\n  mp_size_t dsize = src->_mp_den._mp_size;\n  mp_size_t qsize, prospective_qsize, zeros, chop, tsize;\n  mp_size_t sign_quotient = nsize;\n  long exp;\n#define N_QLIMBS (1 + (sizeof (double) + BYTES_PER_MP_LIMB-1) / BYTES_PER_MP_LIMB)\n  mp_limb_t qarr[N_QLIMBS + 1];\n  mp_ptr qp = qarr;\n  TMP_DECL;\n\n  ASSERT (dsize > 0);    /* canonical src */\n\n  /* mpn_get_d below requires a non-zero operand */\n  if (UNLIKELY (nsize == 0))\n    return 0.0;\n\n  TMP_MARK;\n  nsize = ABS (nsize);\n  dsize = ABS (dsize);\n  np = src->_mp_num._mp_d;\n  dp = src->_mp_den._mp_d;\n\n  prospective_qsize = nsize - dsize + 1;   /* from using given n,d */\n  qsize = N_QLIMBS + 1;                    /* desired qsize */\n\n  zeros = qsize - prospective_qsize;       /* padding n to get qsize */\n  exp = (long) -zeros * GMP_NUMB_BITS;     /* relative to low of qp */\n\n  chop = MAX (-zeros, 0);                  /* negative zeros means shorten n */\n  np += chop;\n  nsize -= chop;\n  zeros += chop;                           /* now zeros >= 0 */\n\n  tsize = nsize + zeros;                   /* size for possible copy of n */\n\n  if (WANT_TMP_DEBUG)\n    {\n      /* separate blocks, for malloc debugging */\n      remp = TMP_ALLOC_LIMBS (dsize);\n      tp = (zeros > 0 ? TMP_ALLOC_LIMBS (tsize) : NULL);\n    }\n  else\n    {\n      /* one block with conditionalized size, for efficiency */\n      remp = TMP_ALLOC_LIMBS (dsize + (zeros > 0 ? tsize : 0));\n      tp = remp + dsize;\n    }\n\n  /* zero extend n into temporary space, if necessary */\n  if (zeros > 0)\n    {\n      MPN_ZERO (tp, zeros);\n      MPN_COPY (tp+zeros, np, nsize);\n      np = tp;\n      nsize = tsize;\n    }\n\n  ASSERT (qsize == nsize - dsize + 1);\n  mpn_tdiv_qr (qp, remp, (mp_size_t) 0, np, nsize, dp, dsize);\n\n  /* strip possible zero high limb */\n  qsize -= (qp[qsize-1] == 0);\n\n  res = mpn_get_d (qp, qsize, sign_quotient, exp);\n  TMP_FREE;\n  return res;\n}\n"
  },
  {
    "path": "mpq/get_den.c",
    "content": "/* mpq_get_den(den,rat_src) -- Set DEN to the denominator of RAT_SRC.\n\nCopyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpq_get_den (mpz_ptr den, mpq_srcptr src)\n{\n  mp_size_t size = src->_mp_den._mp_size;\n\n  if (den->_mp_alloc < size)\n    _mpz_realloc (den, size);\n\n  MPN_COPY (den->_mp_d, src->_mp_den._mp_d, size);\n  den->_mp_size = size;\n}\n"
  },
  {
    "path": "mpq/get_num.c",
    "content": " /* mpq_get_num(num,rat_src) -- Set NUM to the numerator of RAT_SRC.\n\nCopyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpq_get_num (mpz_ptr num, mpq_srcptr src)\n{\n  mp_size_t size = src->_mp_num._mp_size;\n  mp_size_t abs_size = ABS (size);\n\n  if (num->_mp_alloc < abs_size)\n    _mpz_realloc (num, abs_size);\n\n  MPN_COPY (num->_mp_d, src->_mp_num._mp_d, abs_size);\n  num->_mp_size = size;\n}\n"
  },
  {
    "path": "mpq/get_str.c",
    "content": "/* mpq_get_str -- mpq to string conversion.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <string.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nchar *\nmpq_get_str (char *str, int base, mpq_srcptr q)\n{\n  size_t  str_alloc, len;\n\n  ASSERT (ABS(base) >= 2);\n  ASSERT (ABS(base) <= 36);\n\n  str_alloc = 0;\n  if (str == NULL)\n    {\n      /* This is an overestimate since we don't bother checking how much of\n         the high limbs of num and den are used.  +2 for rounding up the\n         chars per bit of num and den.  +3 for sign, slash and '\\0'.  */\n      str_alloc = ((size_t) ((ABS (q->_mp_num._mp_size) + q->_mp_den._mp_size)\n                             * BITS_PER_MP_LIMB\n                             * __mp_bases[ABS(base)].chars_per_bit_exactly))\n                   + 5;\n      str = (char *) (*__gmp_allocate_func) (str_alloc);\n    }\n\n  mpz_get_str (str, base, mpq_numref(q));\n  len = strlen (str);\n  if (! MPZ_EQUAL_1_P (mpq_denref (q)))\n    {\n      str[len++] = '/';\n      mpz_get_str (str+len, base, mpq_denref(q));\n      len += strlen (str+len);\n    }\n\n  ASSERT (len == strlen(str));\n  ASSERT (str_alloc == 0 || len+1 <= str_alloc);\n  ASSERT (len+1 <=  /* size recommended to applications */\n          mpz_sizeinbase (mpq_numref(q), ABS(base)) +\n          mpz_sizeinbase (mpq_denref(q), ABS(base)) + 3);\n\n  if (str_alloc != 0)\n    __GMP_REALLOCATE_FUNC_MAYBE_TYPE (str, str_alloc, len+1, char);\n\n  return str;\n}\n"
  },
  {
    "path": "mpq/init.c",
    "content": "/* mpq_init -- Make a new rational number with value 0/1.\n\nCopyright 1991, 1994, 1995, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpq_init (mpq_ptr x)\n{\n  x->_mp_num._mp_alloc = 1;\n  x->_mp_num._mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);\n  x->_mp_num._mp_size = 0;\n  x->_mp_den._mp_alloc = 1;\n  x->_mp_den._mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);\n  x->_mp_den._mp_d[0] = 1;\n  x->_mp_den._mp_size = 1;\n\n#ifdef __CHECKER__\n  /* let the low limb look initialized, for the benefit of mpz_get_ui etc */\n  x->_mp_num._mp_d[0] = 0;\n#endif\n}\n"
  },
  {
    "path": "mpq/inits.c",
    "content": "/* mpq_inits() -- Initialize multiple mpq_t variables and set them to 0.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\t\t/* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\n#if HAVE_STDARG\nmpq_inits (mpq_ptr x, ...)\n#else\nmpq_inits (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n\n#if HAVE_STDARG\n  va_start (ap, x);\n#else\n  mpq_ptr x;\n  va_start (ap);\n  x = va_arg (ap, mpq_ptr);\n#endif\n\n  while (x != NULL)\n    {\n      mpq_init (x);\n      x = va_arg (ap, mpq_ptr);\n    }\n  va_end (ap);\n}\n"
  },
  {
    "path": "mpq/inp_str.c",
    "content": "/* mpq_inp_str -- read an mpq from a FILE.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <ctype.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nsize_t\nmpq_inp_str (mpq_ptr q, FILE *fp, int base)\n{\n  size_t  nread;\n  int     c;\n\n  if (fp == NULL)\n    fp = stdin;\n\n  q->_mp_den._mp_size = 1;\n  q->_mp_den._mp_d[0] = 1;\n\n  nread = mpz_inp_str (mpq_numref(q), fp, base);\n  if (nread == 0)\n    return 0;\n\n  c = getc (fp);\n  nread++;\n\n  if (c == '/')\n    {\n      c = getc (fp);\n      nread++;\n\n      nread = mpz_inp_str_nowhite (mpq_denref(q), fp, base, c, nread);\n      if (nread == 0)\n        {\n          q->_mp_num._mp_size = 0;\n          q->_mp_den._mp_size = 1;\n          q->_mp_den._mp_d[0] = 1;\n        }\n    }\n  else\n    {\n      ungetc (c, fp);\n      nread--;\n    }\n\n  return nread;\n}\n"
  },
  {
    "path": "mpq/inv.c",
    "content": "/* mpq_inv(dest,src) -- invert a rational number, i.e. set DEST to SRC\n   with the numerator and denominator swapped.\n\nCopyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpq_inv (mpq_ptr dest, mpq_srcptr src)\n{\n  mp_size_t num_size = src->_mp_num._mp_size;\n  mp_size_t den_size = src->_mp_den._mp_size;\n\n  if (num_size == 0)\n    DIVIDE_BY_ZERO;\n\n  if (num_size < 0)\n    {\n      num_size = -num_size;\n      den_size = -den_size;\n    }\n  dest->_mp_den._mp_size = num_size;\n  dest->_mp_num._mp_size = den_size;\n\n  /* If dest == src we may just swap the numerator and denominator, but\n     we have to ensure the new denominator is positive.  */\n\n  if (dest == src)\n    {\n      mp_size_t alloc = dest->_mp_num._mp_alloc;\n      mp_ptr limb_ptr = dest->_mp_num._mp_d;\n\n      dest->_mp_num._mp_alloc = dest->_mp_den._mp_alloc;\n      dest->_mp_num._mp_d = dest->_mp_den._mp_d;\n\n      dest->_mp_den._mp_alloc = alloc;\n      dest->_mp_den._mp_d = limb_ptr;\n    }\n  else\n    {\n      den_size = ABS (den_size);\n      if (dest->_mp_num._mp_alloc < den_size)\n\t_mpz_realloc (&(dest->_mp_num), den_size);\n\n      if (dest->_mp_den._mp_alloc < num_size)\n\t_mpz_realloc (&(dest->_mp_den), num_size);\n\n      MPN_COPY (dest->_mp_num._mp_d, src->_mp_den._mp_d, den_size);\n      MPN_COPY (dest->_mp_den._mp_d, src->_mp_num._mp_d, num_size);\n    }\n}\n"
  },
  {
    "path": "mpq/md_2exp.c",
    "content": "/* mpq_mul_2exp, mpq_div_2exp - multiply or divide by 2^N */\n\n/*\nCopyright 2000, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* The multiplier/divisor \"n\", representing 2^n, is applied by right shifting\n   \"r\" until it's odd (if it isn't already), and left shifting \"l\" for the\n   rest. */\n\nstatic void\nmord_2exp (mpz_ptr ldst, mpz_ptr rdst, mpz_srcptr lsrc, mpz_srcptr rsrc,\n           mp_bitcnt_t n)\n{\n  mp_size_t  rsrc_size = SIZ(rsrc);\n  mp_size_t  len = ABS (rsrc_size);\n  mp_ptr     rsrc_ptr = PTR(rsrc);\n  mp_ptr     p, rdst_ptr;\n  mp_limb_t  plow;\n\n  p = rsrc_ptr;\n  plow = *p;\n  while (n >= GMP_NUMB_BITS && plow == 0)\n    {\n      n -= GMP_NUMB_BITS;\n      p++;\n      plow = *p;\n    }\n\n  /* no realloc here if rsrc==rdst, so p and rsrc_ptr remain valid */\n  len -= (p - rsrc_ptr);\n  MPZ_REALLOC (rdst, len);\n  rdst_ptr = PTR(rdst);\n\n  if ((plow & 1) || n == 0)\n    {\n      /* need DECR when src==dst */\n      if (p != rdst_ptr)\n        MPN_COPY_DECR (rdst_ptr, p, len);\n    }\n  else\n    {\n      unsigned long  shift;\n      if (plow == 0)\n        shift = n;\n      else\n        {\n          count_trailing_zeros (shift, plow);\n          shift = MIN (shift, n);\n        }\n      mpn_rshift (rdst_ptr, p, len, shift);\n      len -= (rdst_ptr[len-1] == 0);\n      n -= shift;\n    }\n  SIZ(rdst) = (rsrc_size >= 0) ? len : -len;\n\n  if (n)\n    mpz_mul_2exp (ldst, lsrc, n);\n  else if (ldst != lsrc)\n    mpz_set (ldst, lsrc);\n}\n\n\nvoid\nmpq_mul_2exp (mpq_ptr dst, mpq_srcptr src, mp_bitcnt_t n)\n{\n  mord_2exp (mpq_numref (dst), mpq_denref (dst),\n             mpq_numref (src), mpq_denref (src), n);\n}\n\nvoid\nmpq_div_2exp (mpq_ptr dst, mpq_srcptr src, mp_bitcnt_t n)\n{\n  if (SIZ (mpq_numref(src)) == 0)\n    {\n      dst->_mp_num._mp_size = 0;\n      dst->_mp_den._mp_size = 1;\n      dst->_mp_den._mp_d[0] = 1;\n      return;\n    }\n\n  mord_2exp (mpq_denref (dst), mpq_numref (dst),\n             mpq_denref (src), mpq_numref (src), n);\n}\n"
  },
  {
    "path": "mpq/mul.c",
    "content": "/* mpq_mul -- multiply two rational numbers.\n\nCopyright 1991, 1994, 1995, 1996, 2000, 2001, 2002 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nvoid\nmpq_mul (mpq_ptr prod, mpq_srcptr op1, mpq_srcptr op2)\n{\n  mpz_t gcd1, gcd2;\n  mpz_t tmp1, tmp2;\n\n  if (op1 == op2)\n    {\n      /* No need for any GCDs when squaring. */\n      mpz_mul (mpq_numref (prod), mpq_numref (op1), mpq_numref (op1));\n      mpz_mul (mpq_denref (prod), mpq_denref (op1), mpq_denref (op1));\n      return;\n    }\n\n  mpz_init (gcd1);\n  mpz_init (gcd2);\n  mpz_init (tmp1);\n  mpz_init (tmp2);\n\n  /* PROD might be identical to either operand, so don't store the\n     result there until we are finished with the input operands.  We\n     dare to overwrite the numerator of PROD when we are finished\n     with the numerators of OP1 and OP2.  */\n\n  mpz_gcd (gcd1, &(op1->_mp_num), &(op2->_mp_den));\n  mpz_gcd (gcd2, &(op2->_mp_num), &(op1->_mp_den));\n\n  mpz_divexact_gcd (tmp1, &(op1->_mp_num), gcd1);\n  mpz_divexact_gcd (tmp2, &(op2->_mp_num), gcd2);\n\n  mpz_mul (&(prod->_mp_num), tmp1, tmp2);\n\n  mpz_divexact_gcd (tmp1, &(op2->_mp_den), gcd1);\n  mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd2);\n\n  mpz_mul (&(prod->_mp_den), tmp1, tmp2);\n\n  mpz_clear (tmp2);\n  mpz_clear (tmp1);\n  mpz_clear (gcd2);\n  mpz_clear (gcd1);\n}\n"
  },
  {
    "path": "mpq/neg.c",
    "content": "/* mpq_neg -- negate a rational.\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpq_neg 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nvoid\nmpq_neg (mpq_ptr dst, mpq_srcptr src)\n{\n  mp_size_t  num_size = src->_mp_num._mp_size;\n\n  if (src != dst)\n    {\n      mp_size_t  num_abs_size = ABS(num_size);\n      mp_size_t  den_size = src->_mp_den._mp_size;\n\n      MPZ_REALLOC (mpq_numref(dst), num_abs_size);\n      MPZ_REALLOC (mpq_denref(dst), den_size);\n\n      MPN_COPY (dst->_mp_num._mp_d, src->_mp_num._mp_d, num_abs_size);\n      MPN_COPY (dst->_mp_den._mp_d, src->_mp_den._mp_d, den_size);\n\n      dst->_mp_den._mp_size = den_size;\n    }\n\n  dst->_mp_num._mp_size = -num_size;\n}\n"
  },
  {
    "path": "mpq/out_str.c",
    "content": "/* mpq_out_str(stream,base,integer) */\n\n/*\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nsize_t\nmpq_out_str (FILE *stream, int base, mpq_srcptr q)\n{\n  size_t  written;\n\n  if (stream == NULL)\n    stream = stdout;\n\n  written = mpz_out_str (stream, base, mpq_numref (q));\n\n  if (mpz_cmp_ui (mpq_denref (q), 1) != 0)\n    {\n      putc ('/', stream);\n      written += 1 + mpz_out_str (stream, base, mpq_denref (q));\n    }\n\n  return ferror (stream) ? 0 : written;\n}\n"
  },
  {
    "path": "mpq/set.c",
    "content": "/* mpq_set(dest,src) -- Set DEST to SRC.\n\nCopyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpq_set (mpq_ptr dest, mpq_srcptr src)\n{\n  mp_size_t num_size, den_size;\n  mp_size_t abs_num_size;\n\n  num_size = src->_mp_num._mp_size;\n  abs_num_size = ABS (num_size);\n  if (dest->_mp_num._mp_alloc < abs_num_size)\n    _mpz_realloc (&(dest->_mp_num), abs_num_size);\n  MPN_COPY (dest->_mp_num._mp_d, src->_mp_num._mp_d, abs_num_size);\n  dest->_mp_num._mp_size = num_size;\n\n  den_size = src->_mp_den._mp_size;\n  if (dest->_mp_den._mp_alloc < den_size)\n    _mpz_realloc (&(dest->_mp_den), den_size);\n  MPN_COPY (dest->_mp_den._mp_d, src->_mp_den._mp_d, den_size);\n  dest->_mp_den._mp_size = den_size;\n}\n"
  },
  {
    "path": "mpq/set_d.c",
    "content": "/* mpq_set_d(mpq_t q, double d) -- Set q to d without rounding.\n\nCopyright 2000, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_FLOAT_H\n#include <float.h>  /* for DBL_MAX */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#if LIMBS_PER_DOUBLE > 4\n  choke me\n#endif\n\nvoid\nmpq_set_d (mpq_ptr dest, double d)\n{\n  int negative;\n  mp_exp_t exp;\n  mp_limb_t tp[LIMBS_PER_DOUBLE];\n  mp_ptr np, dp;\n  mp_size_t nn, dn;\n  int c;\n\n  DOUBLE_NAN_INF_ACTION (d,\n                         __gmp_invalid_operation (),\n                         __gmp_invalid_operation ());\n\n  negative = d < 0;\n  d = ABS (d);\n\n  exp = __gmp_extract_double (tp, d);\n\n  /* There are two main version of the conversion.  The `then' arm handles\n     numbers with a fractional part, while the `else' arm handles integers.  */\n#if LIMBS_PER_DOUBLE == 4\n  if (exp <= 1 || (exp == 2 && (tp[0] | tp[1]) != 0))\n#endif\n#if LIMBS_PER_DOUBLE == 3\n  if (exp <= 1 || (exp == 2 && tp[0] != 0))\n#endif\n#if LIMBS_PER_DOUBLE == 2\n  if (exp <= 1)\n#endif\n    {\n      if (d == 0.0)\n\t{\n\t  SIZ(&(dest->_mp_num)) = 0;\n\t  SIZ(&(dest->_mp_den)) = 1;\n\t  PTR(&(dest->_mp_den))[0] = 1;\n\t  return;\n\t}\n\n      dn = -exp;\n      MPZ_REALLOC (&(dest->_mp_num), 3);\n      np = PTR(&(dest->_mp_num));\n#if LIMBS_PER_DOUBLE == 4\n      if ((tp[0] | tp[1] | tp[2]) == 0)\n\tnp[0] = tp[3], nn = 1;\n      else if ((tp[0] | tp[1]) == 0)\n\tnp[1] = tp[3], np[0] = tp[2], nn = 2;\n      else if (tp[0] == 0)\n\tnp[2] = tp[3], np[1] = tp[2], np[0] = tp[1], nn = 3;\n      else\n\tnp[3] = tp[3], np[2] = tp[2], np[1] = tp[1], np[0] = tp[0], nn = 4;\n#endif\n#if LIMBS_PER_DOUBLE == 3\n      if ((tp[0] | tp[1]) == 0)\n\tnp[0] = tp[2], nn = 1;\n      else if (tp[0] == 0)\n\tnp[1] = tp[2], np[0] = tp[1], nn = 2;\n      else\n\tnp[2] = tp[2], np[1] = tp[1], np[0] = tp[0], nn = 3;\n#endif\n#if LIMBS_PER_DOUBLE == 2\n      if (tp[0] == 0)\n\tnp[0] = tp[1], nn = 1;\n      else\n\tnp[1] = tp[1], np[0] = tp[0], nn = 2;\n#endif\n      dn += nn + 1;\n      ASSERT_ALWAYS (dn > 0);\n      MPZ_REALLOC (&(dest->_mp_den), dn);\n      dp = PTR(&(dest->_mp_den));\n      MPN_ZERO (dp, dn - 1);\n      dp[dn - 1] = 1;\n      count_trailing_zeros (c, np[0] | dp[0]);\n      if (c != 0)\n\t{\n\t  mpn_rshift (np, np, nn, c);\n\t  nn -= np[nn - 1] == 0;\n\t  mpn_rshift (dp, dp, dn, c);\n\t  dn -= dp[dn - 1] == 0;\n\t}\n      SIZ(&(dest->_mp_den)) = dn;\n      SIZ(&(dest->_mp_num)) = negative ? -nn : nn;\n    }\n  else\n    {\n      nn = exp;\n      MPZ_REALLOC (&(dest->_mp_num), nn);\n      np = PTR(&(dest->_mp_num));\n      switch (nn)\n        {\n\tdefault:\n\t  MPN_ZERO (np, nn - LIMBS_PER_DOUBLE);\n\t  np += nn - LIMBS_PER_DOUBLE;\n\t  /* fall through */\n#if LIMBS_PER_DOUBLE == 2\n\tcase 2:\n\t  np[1] = tp[1], np[0] = tp[0];\n\t  break;\n#endif\n#if LIMBS_PER_DOUBLE == 3\n\tcase 3:\n\t  np[2] = tp[2], np[1] = tp[1], np[0] = tp[0];\n\t  break;\n\tcase 2:\n\t  np[1] = tp[2], np[0] = tp[1];\n\t  break;\n#endif\n#if LIMBS_PER_DOUBLE == 4\n\tcase 4:\n\t  np[3] = tp[3], np[2] = tp[2], np[1] = tp[1], np[0] = tp[0];\n\t  break;\n\tcase 3:\n\t  np[2] = tp[3], np[1] = tp[2], np[0] = tp[1];\n\t  break;\n\tcase 2:\n\t  np[1] = tp[3], np[0] = tp[2];\n\t  break;\n#endif\n\t}\n      dp = PTR(&(dest->_mp_den));\n      dp[0] = 1;\n      SIZ(&(dest->_mp_den)) = 1;\n      SIZ(&(dest->_mp_num)) = negative ? -nn : nn;\n    }\n}\n"
  },
  {
    "path": "mpq/set_den.c",
    "content": "/* mpq_set_den(dest,den) -- Set the denominator of DEST from DEN.\n\nCopyright 1991, 1994, 1995, 1996, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpq_set_den (mpq_ptr dest, mpz_srcptr den)\n{\n  mp_size_t size = den->_mp_size;\n  mp_size_t abs_size = ABS (size);\n\n  if (dest->_mp_den._mp_alloc < abs_size)\n    _mpz_realloc (&(dest->_mp_den), abs_size);\n\n  MPN_COPY (dest->_mp_den._mp_d, den->_mp_d, abs_size);\n  dest->_mp_den._mp_size = size;\n}\n"
  },
  {
    "path": "mpq/set_f.c",
    "content": "/* mpq_set_f -- set an mpq from an mpf.\n\nCopyright 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\nvoid\nmpq_set_f (mpq_ptr q, mpf_srcptr f)\n{\n  mp_size_t  fexp = EXP(f);\n  mp_ptr     fptr = PTR(f);\n  mp_size_t  fsize = SIZ(f);\n  mp_size_t  abs_fsize = ABS(fsize);\n  mp_limb_t  flow;\n\n  if (fsize == 0)\n    {\n      /* set q=0 */\n      q->_mp_num._mp_size = 0;\n      q->_mp_den._mp_size = 1;\n      q->_mp_den._mp_d[0] = 1;\n      return;\n    }\n\n  /* strip low zero limbs from f */\n  flow = *fptr;\n  MPN_STRIP_LOW_ZEROS_NOT_ZERO (fptr, abs_fsize, flow);\n\n  if (fexp >= abs_fsize)\n    {\n      /* radix point is to the right of the limbs, no denominator */\n      mp_ptr  num_ptr;\n\n      MPZ_REALLOC (mpq_numref (q), fexp);\n      num_ptr = q->_mp_num._mp_d;\n      MPN_ZERO (num_ptr, fexp - abs_fsize);\n      MPN_COPY (num_ptr + fexp - abs_fsize, fptr, abs_fsize);\n\n      q->_mp_num._mp_size = fsize >= 0 ? fexp : -fexp;\n      q->_mp_den._mp_size = 1;\n      q->_mp_den._mp_d[0] = 1;\n    }\n  else\n    {\n      /* radix point is within or to the left of the limbs, use demominator */\n      mp_ptr     num_ptr, den_ptr;\n      mp_size_t  den_size;\n\n      den_size = abs_fsize - fexp;\n      MPZ_REALLOC (mpq_numref (q), abs_fsize);\n      MPZ_REALLOC (mpq_denref (q), den_size+1);\n      num_ptr = q->_mp_num._mp_d;\n      den_ptr = q->_mp_den._mp_d;\n\n      if (flow & 1)\n        {\n          /* no powers of two to strip from numerator */\n\n          MPN_COPY (num_ptr, fptr, abs_fsize);\n          MPN_ZERO (den_ptr, den_size);\n          den_ptr[den_size] = 1;\n        }\n      else\n        {\n          /* right shift numerator, adjust denominator accordingly */\n          int  shift;\n\n          den_size--;\n          count_trailing_zeros (shift, flow);\n\n          mpn_rshift (num_ptr, fptr, abs_fsize, shift);\n          abs_fsize -= (num_ptr[abs_fsize-1] == 0);\n\n          MPN_ZERO (den_ptr, den_size);\n          den_ptr[den_size] = GMP_LIMB_HIGHBIT >> (shift-1);\n        }\n\n      q->_mp_num._mp_size = fsize >= 0 ? abs_fsize : -abs_fsize;\n      q->_mp_den._mp_size = den_size + 1;\n    }\n}\n"
  },
  {
    "path": "mpq/set_num.c",
    "content": "/* mpq_set_num(dest,num) -- Set the numerator of DEST from NUM.\n\nCopyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpq_set_num (mpq_ptr dest, mpz_srcptr num)\n{\n  mp_size_t size = num->_mp_size;\n  mp_size_t abs_size = ABS (size);\n\n  if (dest->_mp_num._mp_alloc < abs_size)\n    _mpz_realloc (&(dest->_mp_num), abs_size);\n\n  MPN_COPY (dest->_mp_num._mp_d, num->_mp_d, abs_size);\n  dest->_mp_num._mp_size = size;\n}\n"
  },
  {
    "path": "mpq/set_si.c",
    "content": "/* mpq_set_si(dest,ulong_num,ulong_den) -- Set DEST to the retional number\n   ULONG_NUM/ULONG_DEN.\n\nCopyright 1991, 1994, 1995, 2001, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpq_set_si (mpq_ptr dest, mpir_si num, mpir_ui den)\n{\n  mpir_ui abs_num;\n\n  if (GMP_NUMB_BITS < BITS_PER_UI)\n    {\n      if (num == 0)  /* Canonicalize 0/d to 0/1.  */\n        den = 1;\n      mpz_set_si (mpq_numref (dest), num);\n      mpz_set_ui (mpq_denref (dest), den);\n      return;\n    }\n\n  abs_num = ABS (num);\n\n  if (num == 0)\n    {\n      /* Canonicalize 0/d to 0/1.  */\n      den = 1;\n      dest->_mp_num._mp_size = 0;\n    }\n  else\n    {\n      dest->_mp_num._mp_d[0] = abs_num;\n      dest->_mp_num._mp_size = num > 0 ? 1 : -1;\n    }\n\n  dest->_mp_den._mp_d[0] = den;\n  dest->_mp_den._mp_size = (den != 0);\n}\n"
  },
  {
    "path": "mpq/set_str.c",
    "content": "/* mpq_set_str -- string to mpq conversion.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <string.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* FIXME: Would like an mpz_set_mem (or similar) accepting a pointer and\n   length so we wouldn't have to copy the numerator just to null-terminate\n   it.  */\n\nint\nmpq_set_str (mpq_ptr q, const char *str, int base)\n{\n  const char  *slash;\n  char        *num;\n  size_t      numlen;\n  int         ret;\n\n  slash = strchr (str, '/');\n  if (slash == NULL)\n    {\n      q->_mp_den._mp_size = 1;\n      q->_mp_den._mp_d[0] = 1;\n\n      return mpz_set_str (mpq_numref(q), str, base);\n    }\n\n  numlen = slash - str;\n  num = __GMP_ALLOCATE_FUNC_TYPE (numlen+1, char);\n  memcpy (num, str, numlen);\n  num[numlen] = '\\0';\n  ret = mpz_set_str (mpq_numref(q), num, base);\n  (*__gmp_free_func) (num, numlen+1);\n\n  if (ret != 0)\n    return ret;\n\n  return mpz_set_str (mpq_denref(q), slash+1, base);\n}\n"
  },
  {
    "path": "mpq/set_ui.c",
    "content": "/* mpq_set_ui(dest,ulong_num,ulong_den) -- Set DEST to the rational number\n   ULONG_NUM/ULONG_DEN.\n\nCopyright 1991, 1994, 1995, 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpq_set_ui (mpq_ptr dest, mpir_ui num, mpir_ui den)\n{\n  if (GMP_NUMB_BITS < BITS_PER_UI)\n    {\n      if (num == 0)  /* Canonicalize 0/d to 0/1.  */\n        den = 1;\n      mpz_set_ui (mpq_numref (dest), num);\n      mpz_set_ui (mpq_denref (dest), den);\n      return;\n    }\n\n  if (num == 0)\n    {\n      /* Canonicalize 0/n to 0/1.  */\n      den = 1;\n      dest->_mp_num._mp_size = 0;\n    }\n  else\n    {\n      dest->_mp_num._mp_d[0] = num;\n      dest->_mp_num._mp_size = 1;\n    }\n\n  dest->_mp_den._mp_d[0] = den;\n  dest->_mp_den._mp_size = (den != 0);\n}\n"
  },
  {
    "path": "mpq/set_z.c",
    "content": "/* mpq_set_z (dest,src) -- Set DEST to SRC.\n\nCopyright 1996, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpq_set_z (mpq_ptr dest, mpz_srcptr src)\n{\n  mp_size_t num_size;\n  mp_size_t abs_num_size;\n\n  num_size = src->_mp_size;\n  abs_num_size = ABS (num_size);\n  if (dest->_mp_num._mp_alloc < abs_num_size)\n    _mpz_realloc (&(dest->_mp_num), abs_num_size);\n  MPN_COPY (dest->_mp_num._mp_d, src->_mp_d, abs_num_size);\n  dest->_mp_num._mp_size = num_size;\n\n  dest->_mp_den._mp_d[0] = 1;\n  dest->_mp_den._mp_size = 1;\n}\n"
  },
  {
    "path": "mpq/swap.c",
    "content": "/* mpq_swap (U, V) -- Swap U and V.\n\nCopyright 1997, 1998, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpq_swap (mpq_ptr u, mpq_ptr v)\n{\n  mp_ptr up, vp;\n  mp_size_t usize, vsize;\n  mp_size_t ualloc, valloc;\n\n  ualloc = u->_mp_num._mp_alloc;\n  valloc = v->_mp_num._mp_alloc;\n  v->_mp_num._mp_alloc = ualloc;\n  u->_mp_num._mp_alloc = valloc;\n\n  usize = u->_mp_num._mp_size;\n  vsize = v->_mp_num._mp_size;\n  v->_mp_num._mp_size = usize;\n  u->_mp_num._mp_size = vsize;\n\n  up = u->_mp_num._mp_d;\n  vp = v->_mp_num._mp_d;\n  v->_mp_num._mp_d = up;\n  u->_mp_num._mp_d = vp;\n\n\n  ualloc = u->_mp_den._mp_alloc;\n  valloc = v->_mp_den._mp_alloc;\n  v->_mp_den._mp_alloc = ualloc;\n  u->_mp_den._mp_alloc = valloc;\n\n  usize = u->_mp_den._mp_size;\n  vsize = v->_mp_den._mp_size;\n  v->_mp_den._mp_size = usize;\n  u->_mp_den._mp_size = vsize;\n\n  up = u->_mp_den._mp_d;\n  vp = v->_mp_den._mp_d;\n  v->_mp_den._mp_d = up;\n  u->_mp_den._mp_d = vp;\n}\n"
  },
  {
    "path": "mpz/2fac_ui.c",
    "content": "/* mpz_2fac_ui(RESULT, N) -- Set RESULT to N!!.\n\nContributed to the GNU project by Marco Bodrato.\n\nCopyright 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if GMP_LIMB_BITS == 64\n\n#define TABLE_LIMIT_2N_MINUS_POPC_2N 81\n\n#define ODD_DOUBLEFACTORIAL_TABLE_MAX CNST_LIMB(0x57e22099c030d941)\n#define ODD_DOUBLEFACTORIAL_TABLE_LIMIT 33\n\n#else\n\n#define TABLE_LIMIT_2N_MINUS_POPC_2N 49\n\n#define ODD_DOUBLEFACTORIAL_TABLE_MAX CNST_LIMB(0x27065f73)\n#define ODD_DOUBLEFACTORIAL_TABLE_LIMIT (19)\n\n#endif\n\n#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)\t\t\\\n  do {\t\t\t\t\t\t\t\t\\\n    if ((PR) > (MAX_PR)) {\t\t\t\t\t\\\n      (VEC)[(I)++] = (PR);\t\t\t\t\t\\\n      (PR) = (P);\t\t\t\t\t\t\\\n    } else\t\t\t\t\t\t\t\\\n      (PR) *= (P);\t\t\t\t\t\t\\\n  } while (0)\n\n#define FAC_2DSC_THRESHOLD ((FAC_DSC_THRESHOLD << 1) | (FAC_DSC_THRESHOLD & 1))\n#define FACTORS_PER_LIMB   (GMP_NUMB_BITS / (LOG2C(FAC_2DSC_THRESHOLD-1)+1))\n\n/* Computes n!!, the 2-multi-factorial of n. (aka double-factorial or semi-factorial)\n   WARNING: it assumes that n fits in a limb!\n */\nvoid\nmpz_2fac_ui (mpz_ptr x, mpir_ui n)\n{\n  ASSERT (n <= GMP_NUMB_MAX);\n\n  if ((n & 1) == 0) { /* n is even, n = 2k, (2k)!! = k! 2^k */\n    mp_limb_t count;\n\n    if ((n <= TABLE_LIMIT_2N_MINUS_POPC_2N) & (n != 0))\n      count = __gmp_fac2cnt_table[n / 2 - 1];\n    else\n      {\n\tpopc_limb (count, n);\t/* popc(n) == popc(k) */\n\tcount = n - count;\t\t/* n - popc(n) == k + k - popc(k) */\n      }\n    mpz_oddfac_1 (x, n >> 1, 0);\n    mpz_mul_2exp (x, x, count);\n  } else { /* n is odd */\n    if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT) {\n\tPTR (x)[0] = __gmp_odd2fac_table[n >> 1];\n\tSIZ (x) = 1;\n    } else if (BELOW_THRESHOLD (n, FAC_2DSC_THRESHOLD)) { /* odd basecase, */\n      mp_limb_t *factors, prod, max_prod, j;\n      TMP_SDECL;\n\n      /* FIXME: we might alloc a fixed ammount 1+FAC_2DSC_THRESHOLD/FACTORS_PER_LIMB */\n      TMP_SMARK;\n      factors = TMP_SALLOC_LIMBS (1 + n / (2 * FACTORS_PER_LIMB));\n\n      factors[0] = ODD_DOUBLEFACTORIAL_TABLE_MAX;\n      j = 1;\n      prod = n;\n\n      max_prod = GMP_NUMB_MAX / FAC_2DSC_THRESHOLD;\n      while ((n -= 2) > ODD_DOUBLEFACTORIAL_TABLE_LIMIT)\n\tFACTOR_LIST_STORE (n, prod, max_prod, factors, j);\n\n      factors[j++] = prod;\n      mpz_prodlimbs (x, factors, j);\n\n      TMP_SFREE;\n    } else { /* for the asymptotically fast odd case, let oddfac do the job. */\n      mpz_oddfac_1 (x, n, 1);\n    }\n  }\n}\n\n#undef FACTORS_PER_LIMB\n#undef FACTOR_LIST_STORE\n#undef FAC_2DSC_THRESHOLD\n"
  },
  {
    "path": "mpz/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2003 Free Software\n# Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -D__GMP_WITHIN_GMP -I$(top_srcdir)\n\nnoinst_LTLIBRARIES = libmpz.la\nlibmpz_la_SOURCES = 2fac_ui.c abs.c add.c add_ui.c and.c aors.h aors_ui.h aorsmul.c aorsmul_i.c array_init.c bin_ui.c bin_uiui.c cdiv_q.c cdiv_q_ui.c cdiv_qr.c cdiv_qr_ui.c cdiv_r.c cdiv_r_ui.c cdiv_ui.c cfdiv_q_2exp.c cfdiv_r_2exp.c clear.c clears.c clrbit.c cmp.c cmp_d.c cmp_si.c cmp_ui.c cmpabs.c cmpabs_d.c cmpabs_ui.c com.c combit.c cong.c cong_2exp.c cong_ui.c dive_ui.c divegcd.c divexact.c divis.c divis_2exp.c divis_ui.c dump.c export.c fac_ui.c fdiv_q.c fdiv_q_ui.c fdiv_qr.c fdiv_qr_ui.c fdiv_r.c fdiv_r_ui.c fdiv_ui.c fib2_ui.c fib_ui.c fits_s.h fits_si.c fits_sint.c fits_slong.c fits_sshort.c fits_ui.c fits_uint.c fits_ulong.c fits_ushort.c gcd.c gcd_ui.c gcdext.c get_d.c get_d_2exp.c get_si.c get_str.c get_sx.c get_ui.c get_ux.c getlimbn.c hamdist.c import.c init.c init2.c inits.c inp_raw.c inp_str.c invert.c ior.c iset.c iset_d.c iset_si.c iset_str.c iset_sx.c iset_ui.c iset_ux.c jacobi.c kronsz.c kronuz.c kronzs.c kronzu.c lcm.c lcm_ui.c likely_prime_p.c limbs_finish.c limbs_modify.c limbs_read.c limbs_write.c lucnum2_ui.c lucnum_ui.c mfac_uiui.c miller_rabin.c millerrabin.c mod.c mul.c mul_2exp.c mul_i.h mul_si.c mul_ui.c n_pow_ui.c neg.c next_prime_candidate.c nextprime.c nthroot.c oddfac_1.c out_raw.c out_str.c perfpow.c perfsqr.c popcount.c pow_ui.c powm.c powm_ui.c pprime_p.c primorial_ui.c probable_prime_p.c prodlimbs.c realloc.c realloc2.c remove.c roinit_n.c root.c rootrem.c rrandomb.c scan0.c scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c set_sx.c set_ui.c set_ux.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c sub_ui.c swap.c tdiv_q.c tdiv_q_2exp.c tdiv_q_ui.c tdiv_qr.c tdiv_qr_ui.c tdiv_r.c tdiv_r_2exp.c tdiv_r_ui.c tdiv_ui.c trial_division.c tstbit.c ui_pow_ui.c ui_sub.c urandomb.c urandomm.c xor.c\n"
  },
  {
    "path": "mpz/abs.c",
    "content": "/* mpz_abs(dst, src) -- Assign the absolute value of SRC to DST.\n\nCopyright 1991, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpz_abs 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_abs (mpz_ptr w, mpz_srcptr u)\n{\n  mp_ptr wp, up;\n  mp_size_t size;\n\n  size = ABS (u->_mp_size);\n\n  if (u != w)\n    {\n      if (w->_mp_alloc < size)\n\t_mpz_realloc (w, size);\n\n      wp = w->_mp_d;\n      up = u->_mp_d;\n\n      MPN_COPY (wp, up, size);\n    }\n\n  w->_mp_size = size;\n}\n"
  },
  {
    "path": "mpz/add.c",
    "content": "/* mpz_add -- add integers.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define OPERATION_add\n#include \"aors.h\"\n"
  },
  {
    "path": "mpz/add_ui.c",
    "content": "/* mpz_add_ui -- Add an mpz_t and an unsigned one-word integer.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define OPERATION_add_ui\n#include \"aors_ui.h\"\n"
  },
  {
    "path": "mpz/and.c",
    "content": "/* mpz_and -- Logical and.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2003, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_and (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)\n{\n  mp_srcptr op1_ptr, op2_ptr;\n  mp_size_t op1_size, op2_size;\n  mp_ptr res_ptr;\n  mp_size_t res_size;\n  mp_size_t i;\n  TMP_DECL;\n\n  TMP_MARK;\n  op1_size = op1->_mp_size;\n  op2_size = op2->_mp_size;\n\n  op1_ptr = op1->_mp_d;\n  op2_ptr = op2->_mp_d;\n  res_ptr = res->_mp_d;\n\n  if (op1_size >= 0)\n    {\n      if (op2_size >= 0)\n\t{\n\t  res_size = MIN (op1_size, op2_size);\n\t  /* First loop finds the size of the result.  */\n\t  for (i = res_size - 1; i >= 0; i--)\n\t    if ((op1_ptr[i] & op2_ptr[i]) != 0)\n\t      break;\n\t  res_size = i + 1;\n\n\t  /* Handle allocation, now then we know exactly how much space is\n\t     needed for the result.  */\n\t  if (UNLIKELY (res->_mp_alloc < res_size))\n\t    {\n\t      _mpz_realloc (res, res_size);\n\t      op1_ptr = op1->_mp_d;\n\t      op2_ptr = op2->_mp_d;\n\t      res_ptr = res->_mp_d;\n\t    }\n\n\t  res->_mp_size = res_size;\n          if (LIKELY (res_size != 0))\n            mpn_and_n (res_ptr, op1_ptr, op2_ptr, res_size);\n\t  return;\n\t}\n      else /* op2_size < 0 */\n\t{\n\t  /* Fall through to the code at the end of the function.  */\n\t}\n    }\n  else\n    {\n      if (op2_size < 0)\n\t{\n\t  mp_ptr opx;\n\t  mp_limb_t cy;\n\t  mp_size_t res_alloc;\n\n\t  /* Both operands are negative, so will be the result.\n\t     -((-OP1) & (-OP2)) = -(~(OP1 - 1) & ~(OP2 - 1)) =\n\t     = ~(~(OP1 - 1) & ~(OP2 - 1)) + 1 =\n\t     = ((OP1 - 1) | (OP2 - 1)) + 1      */\n\n\t  /* It might seem as we could end up with an (invalid) result with\n\t     a leading zero-limb here when one of the operands is of the\n\t     type 1,,0,,..,,.0.  But some analysis shows that we surely\n\t     would get carry into the zero-limb in this situation...  */\n\n\t  op1_size = -op1_size;\n\t  op2_size = -op2_size;\n\n\t  res_alloc = 1 + MAX (op1_size, op2_size);\n\n\t  opx = (mp_ptr) TMP_ALLOC (op1_size * BYTES_PER_MP_LIMB);\n\t  mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);\n\t  op1_ptr = opx;\n\n\t  opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);\n\t  mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);\n\t  op2_ptr = opx;\n\n\t  if (res->_mp_alloc < res_alloc)\n\t    {\n\t      _mpz_realloc (res, res_alloc);\n\t      res_ptr = res->_mp_d;\n\t      /* Don't re-read OP1_PTR and OP2_PTR.  They point to\n\t\t temporary space--never to the space RES->_mp_d used\n\t\t to point to before reallocation.  */\n\t    }\n\n\t  if (op1_size >= op2_size)\n\t    {\n\t      MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,\n\t\t\top1_size - op2_size);\n\t      for (i = op2_size - 1; i >= 0; i--)\n\t\tres_ptr[i] = op1_ptr[i] | op2_ptr[i];\n\t      res_size = op1_size;\n\t    }\n\t  else\n\t    {\n\t      MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,\n\t\t\top2_size - op1_size);\n\t      for (i = op1_size - 1; i >= 0; i--)\n\t\tres_ptr[i] = op1_ptr[i] | op2_ptr[i];\n\t      res_size = op2_size;\n\t    }\n\n\t  cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);\n\t  if (cy)\n\t    {\n\t      res_ptr[res_size] = cy;\n\t      res_size++;\n\t    }\n\n\t  res->_mp_size = -res_size;\n\t  TMP_FREE;\n\t  return;\n\t}\n      else\n\t{\n\t  /* We should compute -OP1 & OP2.  Swap OP1 and OP2 and fall\n\t     through to the code that handles OP1 & -OP2.  */\n          MPZ_SRCPTR_SWAP (op1, op2);\n          MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);\n\t}\n\n    }\n\n  {\n#if ANDNEW\n    mp_size_t op2_lim;\n    mp_size_t count;\n\n    /* OP2 must be negated as with infinite precision.\n\n       Scan from the low end for a non-zero limb.  The first non-zero\n       limb is simply negated (two's complement).  Any subsequent\n       limbs are one's complemented.  Of course, we don't need to\n       handle more limbs than there are limbs in the other, positive\n       operand as the result for those limbs is going to become zero\n       anyway.  */\n\n    /* Scan for the least significant non-zero OP2 limb, and zero the\n       result meanwhile for those limb positions.  (We will surely\n       find a non-zero limb, so we can write the loop with one\n       termination condition only.)  */\n    for (i = 0; op2_ptr[i] == 0; i++)\n      res_ptr[i] = 0;\n    op2_lim = i;\n\n    op2_size = -op2_size;\n\n    if (op1_size <= op2_size)\n      {\n\t/* The ones-extended OP2 is >= than the zero-extended OP1.\n\t   RES_SIZE <= OP1_SIZE.  Find the exact size.  */\n\tfor (i = op1_size - 1; i > op2_lim; i--)\n\t  if ((op1_ptr[i] & ~op2_ptr[i]) != 0)\n\t    break;\n\tres_size = i + 1;\n\tfor (i = res_size - 1; i > op2_lim; i--)\n\t  res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];\n\tres_ptr[op2_lim] = op1_ptr[op2_lim] & -op2_ptr[op2_lim];\n\t/* Yes, this *can* happen!  */\n\tMPN_NORMALIZE (res_ptr, res_size);\n      }\n    else\n      {\n\t/* The ones-extended OP2 is < than the zero-extended OP1.\n\t   RES_SIZE == OP1_SIZE, since OP1 is normalized.  */\n\tres_size = op1_size;\n\tMPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size);\n\tfor (i = op2_size - 1; i > op2_lim; i--)\n\t  res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];\n\tres_ptr[op2_lim] = op1_ptr[op2_lim] & -op2_ptr[op2_lim];\n      }\n\n    res->_mp_size = res_size;\n#else\n\n    /* OP1 is positive and zero-extended,\n       OP2 is negative and ones-extended.\n       The result will be positive.\n       OP1 & -OP2 = OP1 & ~(OP2 - 1).  */\n\n    mp_ptr opx;\n\n    op2_size = -op2_size;\n    opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);\n    mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);\n    op2_ptr = opx;\n\n    if (op1_size > op2_size)\n      {\n\t/* The result has the same size as OP1, since OP1 is normalized\n\t   and longer than the ones-extended OP2.  */\n\tres_size = op1_size;\n\n\t/* Handle allocation, now then we know exactly how much space is\n\t   needed for the result.  */\n\tif (res->_mp_alloc < res_size)\n\t  {\n\t    _mpz_realloc (res, res_size);\n\t    res_ptr = res->_mp_d;\n\t    op1_ptr = op1->_mp_d;\n\t    /* Don't re-read OP2_PTR.  It points to temporary space--never\n\t       to the space RES->_mp_d used to point to before reallocation.  */\n\t  }\n\n\tMPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,\n\t\t  res_size - op2_size);\n\tfor (i = op2_size - 1; i >= 0; i--)\n\t  res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];\n\n\tres->_mp_size = res_size;\n      }\n    else\n      {\n\t/* Find out the exact result size.  Ignore the high limbs of OP2,\n\t   OP1 is zero-extended and would make the result zero.  */\n\tfor (i = op1_size - 1; i >= 0; i--)\n\t  if ((op1_ptr[i] & ~op2_ptr[i]) != 0)\n\t    break;\n\tres_size = i + 1;\n\n\t/* Handle allocation, now then we know exactly how much space is\n\t   needed for the result.  */\n\tif (res->_mp_alloc < res_size)\n\t  {\n\t    _mpz_realloc (res, res_size);\n\t    res_ptr = res->_mp_d;\n\t    op1_ptr = op1->_mp_d;\n\t    /* Don't re-read OP2_PTR.  It points to temporary space--never\n\t       to the space RES->_mp_d used to point to before reallocation.  */\n\t  }\n\n\tfor (i = res_size - 1; i >= 0; i--)\n\t  res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];\n\n\tres->_mp_size = res_size;\n      }\n#endif\n  }\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/aors.h",
    "content": "/* mpz_add, mpz_sub -- add or subtract integers.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n#ifdef OPERATION_add\n#define FUNCTION     mpz_add\n#define VARIATION\n#endif\n#ifdef OPERATION_sub\n#define FUNCTION     mpz_sub\n#define VARIATION    -\n#endif\n#define ARGUMENTS    mpz_ptr w, mpz_srcptr u, mpz_srcptr v\n\n#ifndef FUNCTION\nError, need OPERATION_add or OPERATION_sub\n#endif\n\n\nvoid\nFUNCTION (ARGUMENTS)\n{\n  mp_srcptr up, vp;\n  mp_ptr wp;\n  mp_size_t usize, vsize, wsize;\n  mp_size_t abs_usize;\n  mp_size_t abs_vsize;\n\n  usize = u->_mp_size;\n  vsize = VARIATION v->_mp_size;\n  abs_usize = ABS (usize);\n  abs_vsize = ABS (vsize);\n\n  if (abs_usize < abs_vsize)\n    {\n      /* Swap U and V. */\n      MPZ_SRCPTR_SWAP (u, v);\n      MP_SIZE_T_SWAP (usize, vsize);\n      MP_SIZE_T_SWAP (abs_usize, abs_vsize);\n    }\n\n  /* True: ABS_USIZE >= ABS_VSIZE.  */\n\n  /* If not space for w (and possible carry), increase space.  */\n  wsize = abs_usize + 1;\n  if (w->_mp_alloc < wsize)\n    _mpz_realloc (w, wsize);\n\n  /* These must be after realloc (u or v may be the same as w).  */\n  up = u->_mp_d;\n  vp = v->_mp_d;\n  wp = w->_mp_d;\n\n  if ((usize ^ vsize) < 0)\n    {\n      /* U and V have different sign.  Need to compare them to determine\n\t which operand to subtract from which.  */\n\n      /* This test is right since ABS_USIZE >= ABS_VSIZE.  */\n      if (abs_usize != abs_vsize)\n\t{\n\t  mpn_sub (wp, up, abs_usize, vp, abs_vsize);\n\t  wsize = abs_usize;\n\t  MPN_NORMALIZE (wp, wsize);\n\t  if (usize < 0)\n\t    wsize = -wsize;\n\t}\n      else if (mpn_cmp (up, vp, abs_usize) < 0)\n\t{\n\t  mpn_sub_n (wp, vp, up, abs_usize);\n\t  wsize = abs_usize;\n\t  MPN_NORMALIZE (wp, wsize);\n\t  if (usize >= 0)\n\t    wsize = -wsize;\n\t}\n      else\n\t{\n\t  mpn_sub_n (wp, up, vp, abs_usize);\n\t  wsize = abs_usize;\n\t  MPN_NORMALIZE (wp, wsize);\n\t  if (usize < 0)\n\t    wsize = -wsize;\n\t}\n    }\n  else\n    {\n      /* U and V have same sign.  Add them.  */\n      mp_limb_t cy_limb = mpn_add (wp, up, abs_usize, vp, abs_vsize);\n      wp[abs_usize] = cy_limb;\n      wsize = abs_usize + cy_limb;\n      if (usize < 0)\n\twsize = -wsize;\n    }\n\n  w->_mp_size = wsize;\n}\n"
  },
  {
    "path": "mpz/aors_ui.h",
    "content": "/* mpz_add_ui, mpz_sub_ui -- Add or subtract an mpz_t and an unsigned\n   one-word integer.\n\nCopyright 1991, 1993, 1994, 1996, 1999, 2000, 2001, 2002, 2004 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n#ifdef OPERATION_add_ui\n#define FUNCTION          mpz_add_ui\n#define FUNCTION2         mpz_add\n#define VARIATION_CMP     >=\n#define VARIATION_NEG\n#define VARIATION_UNNEG   -\n#endif\n\n#ifdef OPERATION_sub_ui\n#define FUNCTION          mpz_sub_ui\n#define FUNCTION2         mpz_sub\n#define VARIATION_CMP     <\n#define VARIATION_NEG     -\n#define VARIATION_UNNEG\n#endif\n\n#ifndef FUNCTION\nError, need OPERATION_add_ui or OPERATION_sub_ui\n#endif\n\n\nvoid\nFUNCTION (mpz_ptr w, mpz_srcptr u, mpir_ui vval)\n{\n  mp_srcptr up;\n  mp_ptr wp;\n  mp_size_t usize, wsize;\n  mp_size_t abs_usize;\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (vval > GMP_NUMB_MAX)\n    {\n      mpz_t v;\n      mp_limb_t vl[2];\n      PTR(v) = vl;\n      vl[0] = vval & GMP_NUMB_MASK;\n      vl[1] = vval >> GMP_NUMB_BITS;\n      SIZ(v) = 2;\n      FUNCTION2 (w, u, v);\n      return;\n    }\n#endif\n\n  usize = u->_mp_size;\n  abs_usize = ABS (usize);\n\n  /* If not space for W (and possible carry), increase space.  */\n  wsize = abs_usize + 1;\n  if (w->_mp_alloc < wsize)\n    _mpz_realloc (w, wsize);\n\n  /* These must be after realloc (U may be the same as W).  */\n  up = u->_mp_d;\n  wp = w->_mp_d;\n\n  if (abs_usize == 0)\n    {\n      wp[0] = vval;\n      w->_mp_size = VARIATION_NEG (vval != 0);\n      return;\n    }\n\n  if (usize VARIATION_CMP 0)\n    {\n      mp_limb_t cy;\n      cy = mpn_add_1 (wp, up, abs_usize, (mp_limb_t) vval);\n      wp[abs_usize] = cy;\n      wsize = VARIATION_NEG (abs_usize + cy);\n    }\n  else\n    {\n      /* The signs are different.  Need exact comparison to determine\n\t which operand to subtract from which.  */\n      if (abs_usize == 1 && up[0] < vval)\n\t{\n\t  wp[0] = vval - up[0];\n\t  wsize = VARIATION_NEG 1;\n\t}\n      else\n\t{\n\t  mpn_sub_1 (wp, up, abs_usize, (mp_limb_t) vval);\n\t  /* Size can decrease with at most one limb.  */\n\t  wsize = VARIATION_UNNEG (abs_usize - (wp[abs_usize - 1] == 0));\n\t}\n    }\n\n  w->_mp_size = wsize;\n}\n"
  },
  {
    "path": "mpz/aorsmul.c",
    "content": "/* mpz_addmul, mpz_submul -- add or subtract multiple.\n\nCopyright 2001, 2004, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* expecting x and y both with non-zero high limbs */\n#define mpn_cmp_twosizes_lt(xp,xsize, yp,ysize)                 \\\n  ((xsize) < (ysize)                                            \\\n   || ((xsize) == (ysize) && mpn_cmp (xp, yp, xsize) < 0))\n\n\n/* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y.\n\n   The signs of w, x and y are fully accounted for by each flipping \"sub\".\n\n   The sign of w is retained for the result, unless the absolute value\n   submul underflows, in which case it flips.  */\n\nstatic void __gmpz_aorsmul(REGPARM_3_1 (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub)) REGPARM_ATTR (1);\n#define mpz_aorsmul(w,x,y,sub)  __gmpz_aorsmul (REGPARM_3_1 (w, x, y, sub))\n\nREGPARM_ATTR (1) static void\nmpz_aorsmul (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub)\n{\n  mp_size_t  xsize, ysize, tsize, wsize, wsize_signed;\n  mp_ptr     wp, tp;\n  mp_limb_t  c, high;\n  TMP_DECL;\n\n  /* w unaffected if x==0 or y==0 */\n  xsize = SIZ(x);\n  ysize = SIZ(y);\n  if (xsize == 0 || ysize == 0)\n    return;\n\n  /* make x the bigger of the two */\n  if (ABS(ysize) > ABS(xsize))\n    {\n      MPZ_SRCPTR_SWAP (x, y);\n      MP_SIZE_T_SWAP (xsize, ysize);\n    }\n\n  sub ^= ysize;\n  ysize = ABS(ysize);\n\n  /* use mpn_addmul_1/mpn_submul_1 if possible */\n  if (ysize == 1)\n    {\n      mpz_aorsmul_1 (w, x, PTR(y)[0], sub);\n      return;\n    }\n\n  sub ^= xsize;\n  xsize = ABS(xsize);\n\n  wsize_signed = SIZ(w);\n  sub ^= wsize_signed;\n  wsize = ABS(wsize_signed);\n\n  tsize = xsize + ysize;\n  MPZ_REALLOC (w, MAX (wsize, tsize) + 1);\n  wp = PTR(w);\n\n  if (wsize_signed == 0)\n    {\n      /* Nothing to add to, just set w=x*y.  No w==x or w==y overlap here,\n         since we know x,y!=0 but w==0.  */\n      high = mpn_mul (wp, PTR(x),xsize, PTR(y),ysize);\n      tsize -= (high == 0);\n      SIZ(w) = (sub >= 0 ? tsize : -tsize);\n      return;\n    }\n\n  TMP_MARK;\n  tp = TMP_ALLOC_LIMBS (tsize);\n\n  high = mpn_mul (tp, PTR(x),xsize, PTR(y),ysize);\n  tsize -= (high == 0);\n  ASSERT (tp[tsize-1] != 0);\n  if (sub >= 0)\n    {\n      mp_srcptr up    = wp;\n      mp_size_t usize = wsize;\n\n      if (usize < tsize)\n        {\n          up    = tp;\n          usize = tsize;\n          tp    = wp;\n          tsize = wsize;\n\n          wsize = usize;\n        }\n\n      c = mpn_add (wp, up,usize, tp,tsize);\n      wp[wsize] = c;\n      wsize += (c != 0);\n    }\n  else\n    {\n      mp_srcptr up    = wp;\n      mp_size_t usize = wsize;\n\n      if (mpn_cmp_twosizes_lt (up,usize, tp,tsize))\n        {\n          up    = tp;\n          usize = tsize;\n          tp    = wp;\n          tsize = wsize;\n\n          wsize = usize;\n          wsize_signed = -wsize_signed;\n        }\n\n      ASSERT_NOCARRY (mpn_sub (wp, up,usize, tp,tsize));\n      wsize = usize;\n      MPN_NORMALIZE (wp, wsize);\n    }\n\n  SIZ(w) = (wsize_signed >= 0 ? wsize : -wsize);\n\n  TMP_FREE;\n}\n\n\nvoid\nmpz_addmul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)\n{\n  mpz_aorsmul (w, u, v, (mp_size_t) 0);\n}\n\nvoid\nmpz_submul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)\n{\n  mpz_aorsmul (w, u, v, (mp_size_t) -1);\n}\n"
  },
  {
    "path": "mpz/aorsmul_i.c",
    "content": "/* mpz_addmul_ui, mpz_submul_ui - add or subtract small multiple.\n\n   THE mpz_aorsmul_1 FUNCTION IN THIS FILE IS FOR INTERNAL USE ONLY AND IS\n   ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR\n   COMPLETELY IN FUTURE GNU MP RELEASES.\n\nCopyright 2001, 2002, 2004, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n#if HAVE_NATIVE_mpn_mul_1c\n#define MPN_MUL_1C(cout, dst, src, size, n, cin)        \\\n  do {                                                  \\\n    (cout) = mpn_mul_1c (dst, src, size, n, cin);       \\\n  } while (0)\n#else\n#define MPN_MUL_1C(cout, dst, src, size, n, cin)        \\\n  do {                                                  \\\n    mp_limb_t __cy;                                     \\\n    __cy = mpn_mul_1 (dst, src, size, n);               \\\n    (cout) = __cy + mpn_add_1 (dst, dst, size, cin);    \\\n  } while (0)\n#endif\n\n\n/* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y.\n\n   All that's needed to account for negative w or x is to flip \"sub\".\n\n   The final w will retain its sign, unless an underflow occurs in a submul\n   of absolute values, in which case it's flipped.\n\n   If x has more limbs than w, then mpn_submul_1 followed by mpn_com_n is\n   used.  The alternative would be mpn_mul_1 into temporary space followed\n   by mpn_sub_n.  Avoiding temporary space seem good, and submul+com stands\n   a chance of being faster since it involves only one set of carry\n   propagations, not two.  Note that doing an addmul_1 with a\n   twos-complement negative y doesn't work, because it effectively adds an\n   extra x * 2^BITS_PER_MP_LIMB.  */\n\nREGPARM_ATTR(1) void\nmpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)\n{\n  mp_size_t  xsize, wsize, wsize_signed, new_wsize, min_size, dsize;\n  mp_srcptr  xp;\n  mp_ptr     wp;\n  mp_limb_t  cy;\n\n  /* w unaffected if x==0 or y==0 */\n  xsize = SIZ (x);\n  if (xsize == 0 || y == 0)\n    return;\n\n  sub ^= xsize;\n  xsize = ABS (xsize);\n\n  wsize_signed = SIZ (w);\n  if (wsize_signed == 0)\n    {\n      /* nothing to add to, just set x*y, \"sub\" gives the sign */\n      MPZ_REALLOC (w, xsize+1);\n      wp = PTR (w);\n      cy = mpn_mul_1 (wp, PTR(x), xsize, y);\n      wp[xsize] = cy;\n      xsize += (cy != 0);\n      SIZ (w) = (sub >= 0 ? xsize : -xsize);\n      return;\n    }\n\n  sub ^= wsize_signed;\n  wsize = ABS (wsize_signed);\n\n  new_wsize = MAX (wsize, xsize);\n  MPZ_REALLOC (w, new_wsize+1);\n  wp = PTR (w);\n  xp = PTR (x);\n  min_size = MIN (wsize, xsize);\n\n  if (sub >= 0)\n    {\n      /* addmul of absolute values */\n\n      cy = mpn_addmul_1 (wp, xp, min_size, y);\n      wp += min_size;\n      xp += min_size;\n\n      dsize = xsize - wsize;\n#if HAVE_NATIVE_mpn_mul_1c\n      if (dsize > 0)\n        cy = mpn_mul_1c (wp, xp, dsize, y, cy);\n      else if (dsize < 0)\n        {\n          dsize = -dsize;\n          cy = mpn_add_1 (wp, wp, dsize, cy);\n        }\n#else\n      if (dsize != 0)\n        {\n          mp_limb_t  cy2;\n          if (dsize > 0)\n            cy2 = mpn_mul_1 (wp, xp, dsize, y);\n          else\n            {\n              dsize = -dsize;\n              cy2 = 0;\n            }\n          cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);\n        }\n#endif\n\n      wp[dsize] = cy;\n      new_wsize += (cy != 0);\n    }\n  else\n    {\n      /* submul of absolute values */\n\n      cy = mpn_submul_1 (wp, xp, min_size, y);\n      if (wsize >= xsize)\n        {\n          /* if w bigger than x, then propagate borrow through it */\n          if (wsize != xsize)\n            cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);\n\n          if (cy != 0)\n            {\n              /* Borrow out of w, take twos complement negative to get\n                 absolute value, flip sign of w.  */\n              wp[new_wsize] = ~-cy;  /* extra limb is 0-cy */\n              mpn_not (wp, new_wsize);\n              new_wsize++;\n              MPN_INCR_U (wp, new_wsize, CNST_LIMB(1));\n              wsize_signed = -wsize_signed;\n            }\n        }\n      else /* wsize < xsize */\n        {\n          /* x bigger than w, so want x*y-w.  Submul has given w-x*y, so\n             take twos complement and use an mpn_mul_1 for the rest.  */\n\n          mp_limb_t  cy2;\n\n          /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */\n          mpn_not (wp, wsize);\n          cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));\n          cy -= 1;\n\n          /* If cy-1 == -1 then hold that -1 for latter.  mpn_submul_1 never\n             returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */\n          cy2 = (cy == MP_LIMB_T_MAX);\n          cy += cy2;\n          MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);\n          wp[new_wsize] = cy;\n          new_wsize += (cy != 0);\n\n          /* Apply any -1 from above.  The value at wp+wsize is non-zero\n             because y!=0 and the high limb of x will be non-zero.  */\n          if (cy2)\n            MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));\n\n          wsize_signed = -wsize_signed;\n        }\n\n      /* submul can produce high zero limbs due to cancellation, both when w\n         has more limbs or x has more  */\n      MPN_NORMALIZE (wp, new_wsize);\n    }\n\n  SIZ (w) = (wsize_signed >= 0 ? new_wsize : -new_wsize);\n\n  ASSERT (new_wsize == 0 || PTR(w)[new_wsize-1] != 0);\n}\n\n\nvoid\nmpz_addmul_ui (mpz_ptr w, mpz_srcptr x, mpir_ui y)\n{\n#if BITS_PER_UI > GMP_NUMB_BITS\n  if (UNLIKELY (y > GMP_NUMB_MAX && SIZ(x) != 0))\n    {\n      mpz_t t;\n      mp_ptr tp;\n      mp_size_t xn;\n      TMP_DECL;\n      TMP_MARK;\n      xn = SIZ (x);\n      MPZ_TMP_INIT (t, ABS (xn) + 1);\n      tp = PTR (t);\n      tp[0] = 0;\n      MPN_COPY (tp + 1, PTR(x), ABS (xn));\n      SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;\n      mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) 0);\n      PTR(t) = tp + 1;\n      SIZ(t) = xn;\n      mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) 0);\n      TMP_FREE;\n      return;\n    }\n#endif\n  mpz_aorsmul_1 (w, x, (mp_limb_t) y, (mp_size_t) 0);\n}\n\nvoid\nmpz_submul_ui (mpz_ptr w, mpz_srcptr x, mpir_ui y)\n{\n#if BITS_PER_UI > GMP_NUMB_BITS\n  if (y > GMP_NUMB_MAX && SIZ(x) != 0)\n    {\n      mpz_t t;\n      mp_ptr tp;\n      mp_size_t xn;\n      TMP_DECL;\n      TMP_MARK;\n      xn = SIZ (x);\n      MPZ_TMP_INIT (t, ABS (xn) + 1);\n      tp = PTR (t);\n      tp[0] = 0;\n      MPN_COPY (tp + 1, PTR(x), ABS (xn));\n      SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;\n      mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) -1);\n      PTR(t) = tp + 1;\n      SIZ(t) = xn;\n      mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);\n      TMP_FREE;\n      return;\n    }\n#endif\n  mpz_aorsmul_1 (w, x, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);\n}\n"
  },
  {
    "path": "mpz/array_init.c",
    "content": "/* mpz_array_init (array, array_size, size_per_elem) --\n\nCopyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n// This function is obsolete 19/08/09\nvoid\nmpz_array_init (mpz_ptr arr, mp_size_t arr_size, mp_size_t nbits)\n{\n  register mp_ptr p;\n  register mp_size_t i;\n  mp_size_t nlimbs;\n\n  nlimbs = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;\n  p = (mp_ptr) (*__gmp_allocate_func) (arr_size * nlimbs * BYTES_PER_MP_LIMB);\n\n  for (i = 0; i < arr_size; i++)\n    {\n      arr[i]._mp_alloc = nlimbs + 1; /* Yes, lie a little... */\n      arr[i]._mp_size = 0;\n      arr[i]._mp_d = p + i * nlimbs;\n    }\n}\n"
  },
  {
    "path": "mpz/bin_ui.c",
    "content": "/* mpz_bin_ui - compute n over k.\n\nCopyright 1998, 1999, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* This is a poor implementation.  Look at bin_uiui.c for improvement ideas.\n   In fact consider calling mpz_bin_uiui() when the arguments fit, leaving\n   the code here only for big n.\n\n   The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol\n   1 section 1.2.6 part G. */\n\n\n#define DIVIDE()                                                              \\\n  do {                                                                        \\\n    ASSERT (SIZ(r) > 0);                                                      \\\n    MPN_DIVREM_OR_DIVEXACT_1 (PTR(r), PTR(r), (mp_size_t) SIZ(r), kacc);      \\\n    SIZ(r) -= (PTR(r)[SIZ(r)-1] == 0);                                        \\\n  } while (0)\n\nvoid\nmpz_bin_ui (mpz_ptr r, mpz_srcptr n, mpir_ui k)\n{\n  mpz_t      ni;\n  mp_limb_t  i;\n  mpz_t      nacc;\n  mp_limb_t  kacc;\n  mp_size_t  negate;\n\n  if (SIZ (n) < 0)\n    {\n      /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */\n      mpz_init (ni);\n      mpz_neg (ni, n);\n      mpz_sub_ui (ni, ni, 1L);\n      negate = (k & 1);   /* (-1)^k */\n    }\n  else\n    {\n      /* bin(n,k) == 0 if k>n\n\t (no test for this under the n<0 case, since -n+k-1 >= k there) */\n      if (mpz_cmp_ui (n, k) < 0)\n\t{\n\t  SIZ (r) = 0;\n\t  return;\n\t}\n\n      /* set ni = n-k */\n      mpz_init (ni);\n      mpz_sub_ui (ni, n, k);\n      negate = 0;\n    }\n\n  /* Now wanting bin(ni+k,k), with ni positive, and \"negate\" is the sign (0\n     for positive, 1 for negative). */\n  SIZ (r) = 1; PTR (r)[0] = 1;\n\n  /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller.  In this case it's\n     whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k\n     = ni, and new ni of ni+k-ni = k.  */\n  if (mpz_cmp_ui (ni, k) < 0)\n    {\n      unsigned long  tmp;\n      tmp = k;\n      k = mpz_get_ui (ni);\n      mpz_set_ui (ni, tmp);\n    }\n\n  kacc = 1;\n  mpz_init_set_ui (nacc, 1L);\n\n  for (i = 1; i <= k; i++)\n    {\n      mp_limb_t k1, k0;\n\n#if 0\n      mp_limb_t nacclow;\n      int c;\n\n      nacclow = PTR(nacc)[0];\n      for (c = 0; (((kacc | nacclow) & 1) == 0); c++)\n\t{\n\t  kacc >>= 1;\n\t  nacclow >>= 1;\n\t}\n      mpz_div_2exp (nacc, nacc, c);\n#endif\n\n      mpz_add_ui (ni, ni, 1L);\n      mpz_mul (nacc, nacc, ni);\n      umul_ppmm (k1, k0, kacc, i << GMP_NAIL_BITS);\n      if (k1 != 0)\n\t{\n\t  /* Accumulator overflow.  Perform bignum step.  */\n\t  mpz_mul (r, r, nacc);\n\t  SIZ (nacc) = 1; PTR (nacc)[0] = 1;\n\t  DIVIDE ();\n\t  kacc = i;\n\t}\n      else\n\t{\n\t  /* Save new products in accumulators to keep accumulating.  */\n\t  kacc = k0 >> GMP_NAIL_BITS;\n\t}\n    }\n\n  mpz_mul (r, r, nacc);\n  DIVIDE ();\n  SIZ(r) = (SIZ(r) ^ -negate) + negate;\n\n  mpz_clear (nacc);\n  mpz_clear (ni);\n}\n"
  },
  {
    "path": "mpz/bin_uiui.c",
    "content": "/* mpz_bin_uiui - compute n over k.\n\nContributed to the GNU project by Torbjorn Granlund and Marco Bodrato.\n\nCopyright 2010, 2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#ifndef BIN_GOETGHELUCK_THRESHOLD\n#define BIN_GOETGHELUCK_THRESHOLD  1000\n#endif\n#ifndef BIN_UIUI_ENABLE_SMALLDC\n#define BIN_UIUI_ENABLE_SMALLDC    1\n#endif\n#ifndef BIN_UIUI_RECURSIVE_SMALLDC\n#define BIN_UIUI_RECURSIVE_SMALLDC (GMP_NUMB_BITS > 32)\n#endif\n\n#if GMP_LIMB_BITS == 64\n\n/* This table contains inverses of odd factorials, modulo 2^GMP_NUMB_BITS */\n\n/* It begins with (2!/2)^-1=1 */\n#define ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE CNST_LIMB(0x1),CNST_LIMB(0xaaaaaaaaaaaaaaab),CNST_LIMB(0xaaaaaaaaaaaaaaab),CNST_LIMB(0xeeeeeeeeeeeeeeef),CNST_LIMB(0x4fa4fa4fa4fa4fa5),CNST_LIMB(0x2ff2ff2ff2ff2ff3),CNST_LIMB(0x2ff2ff2ff2ff2ff3),CNST_LIMB(0x938cc70553e3771b),CNST_LIMB(0xb71c27cddd93e49f),CNST_LIMB(0xb38e3229fcdee63d),CNST_LIMB(0xe684bb63544a4cbf),CNST_LIMB(0xc2f684917ca340fb),CNST_LIMB(0xf747c9cba417526d),CNST_LIMB(0xbb26eb51d7bd49c3),CNST_LIMB(0xbb26eb51d7bd49c3),CNST_LIMB(0xb0a7efb985294093),CNST_LIMB(0xbe4b8c69f259eabb),CNST_LIMB(0x6854d17ed6dc4fb9),CNST_LIMB(0xe1aa904c915f4325),CNST_LIMB(0x3b8206df131cead1),CNST_LIMB(0x79c6009fea76fe13),CNST_LIMB(0xd8c5d381633cd365),CNST_LIMB(0x4841f12b21144677),CNST_LIMB(0x4a91ff68200b0d0f),CNST_LIMB(0x8f9513a58c4f9e8b),CNST_LIMB(0x2b3e690621a42251),CNST_LIMB(0x4f520f00e03c04e7),CNST_LIMB(0x2edf84ee600211d3),CNST_LIMB(0xadcaa2764aaacdfd),CNST_LIMB(0x161f4f9033f4fe63),CNST_LIMB(0x161f4f9033f4fe63),CNST_LIMB(0xbada2932ea4d3e03),CNST_LIMB(0xcec189f3efaa30d3),CNST_LIMB(0xf7475bb68330bf91),CNST_LIMB(0x37eb7bf7d5b01549),CNST_LIMB(0x46b35660a4e91555),CNST_LIMB(0xa567c12d81f151f7),CNST_LIMB(0x4c724007bb2071b1),CNST_LIMB(0xf4a0cce58a016bd),CNST_LIMB(0xfa21068e66106475),CNST_LIMB(0x244ab72b5a318ae1),CNST_LIMB(0x366ce67e080d0f23),CNST_LIMB(0xd666fdae5dd2a449),CNST_LIMB(0xd740ddd0acc06a0d),CNST_LIMB(0xb050bbbb28e6f97b),CNST_LIMB(0x70b003fe890a5c75),CNST_LIMB(0xd03aabff83037427),CNST_LIMB(0x13ec4ca72c783bd7),CNST_LIMB(0x90282c06afdbd96f),CNST_LIMB(0x4414ddb9db4a95d5),CNST_LIMB(0xa2c68735ae6832e9),CNST_LIMB(0xbf72d71455676665),CNST_LIMB(0xa8469fab6b759b7f),CNST_LIMB(0xc1e55b56e606caf9),CNST_LIMB(0x40455630fc4a1cff),CNST_LIMB(0x120a7b0046d16f7),CNST_LIMB(0xa7c3553b08faef23),CNST_LIMB(0x9f0bfd1b08d48639),CNST_LIMB(0xa433ffce9a304d37),CNST_LIMB(0xa22ad1d53915c683),CNST_LIMB(0xcb6cbc723ba5dd1d),CNST_LIMB(0x547fb1b8ab9d0ba3),CNST_LIMB(0x547fb1b8ab9d0ba3),CNST_LIMB(0x8f15a826498852e3)\n\n#define ODD_FACTORIAL_TABLE_MAX CNST_LIMB(0x335281867ec241ef)\n#define ODD_FACTORIAL_TABLE_LIMIT (25)\n\n#define ODD_FACTORIAL_EXTTABLE_LIMIT (67)\n\n#define ODD_CENTRAL_BINOMIAL_OFFSET (13)\n\n/* This table contains binomial(2k,k)/2^t */\n\n/* It begins with ODD_CENTRAL_BINOMIAL_TABLE_MIN */\n#define ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE CNST_LIMB(0x13d66b),CNST_LIMB(0x4c842f),CNST_LIMB(0x93ee7d),CNST_LIMB(0x11e9e123),CNST_LIMB(0x22c60053),CNST_LIMB(0x873ae4d1),CNST_LIMB(0x10757bd97),CNST_LIMB(0x80612c6cd),CNST_LIMB(0xfaa556bc1),CNST_LIMB(0x3d3cc24821),CNST_LIMB(0x77cfeb6bbb),CNST_LIMB(0x7550ebd97c7),CNST_LIMB(0xe5f08695caf),CNST_LIMB(0x386120ffce11),CNST_LIMB(0x6eabb28dd6df),CNST_LIMB(0x3658e31c82a8f),CNST_LIMB(0x6ad2050312783),CNST_LIMB(0x1a42902a5af0bf),CNST_LIMB(0x33ac44f881661d),CNST_LIMB(0xcb764f927d82123),CNST_LIMB(0x190c23fa46b93983),CNST_LIMB(0x62b7609e25caf1b9),CNST_LIMB(0xc29cb72925ef2cff)\n#define ODD_CENTRAL_BINOMIAL_TABLE_LIMIT (35)\n\n/* This table contains the inverses of elements in the previous table. */\n#define ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE CNST_LIMB(0x61e5bd199bb12643),CNST_LIMB(0x78321494dc8342cf),CNST_LIMB(0x4fd348704ebf7ad5),CNST_LIMB(0x7e722ba086ab568b),CNST_LIMB(0xa5fcc124265843db),CNST_LIMB(0x89c4a6b18633f431),CNST_LIMB(0x4daa2c15f8ce9227),CNST_LIMB(0x801c618ca9be9605),CNST_LIMB(0x32dc192f948a441),CNST_LIMB(0xd02b90c2bf3be1),CNST_LIMB(0xd897e8c1749aa173),CNST_LIMB(0x54a234fc01fef9f7),CNST_LIMB(0x83ff2ab4d1ff7a4f),CNST_LIMB(0xa427f1c9b304e2f1),CNST_LIMB(0x9c14595d1793651f),CNST_LIMB(0x883a71c607a7b46f),CNST_LIMB(0xd089863c54bc9f2b),CNST_LIMB(0x9022f6bce5d07f3f),CNST_LIMB(0xbec207e218768c35),CNST_LIMB(0x9d70cb4cbb4f168b),CNST_LIMB(0x3c3d3403828a9d2b),CNST_LIMB(0x7672df58c56bc489),CNST_LIMB(0x1e66ca55d727d2ff)\n\n/* This table contains the values t in the formula binomial(2k,k)/2^t */\n#define CENTRAL_BINOMIAL_2FAC_TABLE 3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,1,2,2,3\n\n#else\n\n/* This table contains inverses of odd factorials, modulo 2^GMP_NUMB_BITS */\n\n/* It begins with (2!/2)^-1=1 */\n#define ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE CNST_LIMB(0x1),CNST_LIMB(0xaaaaaaab),CNST_LIMB(0xaaaaaaab),CNST_LIMB(0xeeeeeeef),CNST_LIMB(0xa4fa4fa5),CNST_LIMB(0xf2ff2ff3),CNST_LIMB(0xf2ff2ff3),CNST_LIMB(0x53e3771b),CNST_LIMB(0xdd93e49f),CNST_LIMB(0xfcdee63d),CNST_LIMB(0x544a4cbf),CNST_LIMB(0x7ca340fb),CNST_LIMB(0xa417526d),CNST_LIMB(0xd7bd49c3),CNST_LIMB(0xd7bd49c3),CNST_LIMB(0x85294093),CNST_LIMB(0xf259eabb),CNST_LIMB(0xd6dc4fb9),CNST_LIMB(0x915f4325),CNST_LIMB(0x131cead1),CNST_LIMB(0xea76fe13),CNST_LIMB(0x633cd365),CNST_LIMB(0x21144677),CNST_LIMB(0x200b0d0f),CNST_LIMB(0x8c4f9e8b),CNST_LIMB(0x21a42251),CNST_LIMB(0xe03c04e7),CNST_LIMB(0x600211d3),CNST_LIMB(0x4aaacdfd),CNST_LIMB(0x33f4fe63),CNST_LIMB(0x33f4fe63)\n\n#define ODD_FACTORIAL_TABLE_MAX CNST_LIMB(0x260eeeeb)\n#define ODD_FACTORIAL_TABLE_LIMIT (16)\n\n#define ODD_FACTORIAL_EXTTABLE_LIMIT (34)\n\n#define ODD_CENTRAL_BINOMIAL_OFFSET (8)\n\n/* This table contains binomial(2k,k)/2^t */\n\n/* It begins with ODD_CENTRAL_BINOMIAL_TABLE_MIN */\n#define ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE CNST_LIMB(0x1923),CNST_LIMB(0x2f7b),CNST_LIMB(0xb46d),CNST_LIMB(0x15873),CNST_LIMB(0xa50c7),CNST_LIMB(0x13d66b),CNST_LIMB(0x4c842f),CNST_LIMB(0x93ee7d),CNST_LIMB(0x11e9e123),CNST_LIMB(0x22c60053),CNST_LIMB(0x873ae4d1)\n#define ODD_CENTRAL_BINOMIAL_TABLE_LIMIT (18)\n\n/* This table contains the inverses of elements in the previous table. */\n#define ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE CNST_LIMB(0x16a2de8b),CNST_LIMB(0x847457b3),CNST_LIMB(0xfa6f7565),CNST_LIMB(0xf0e50cbb),CNST_LIMB(0xdca370f7),CNST_LIMB(0x9bb12643),CNST_LIMB(0xdc8342cf),CNST_LIMB(0x4ebf7ad5),CNST_LIMB(0x86ab568b),CNST_LIMB(0x265843db),CNST_LIMB(0x8633f431)\n\n/* This table contains the values t in the formula binomial(2k,k)/2^t */\n#define CENTRAL_BINOMIAL_2FAC_TABLE 1,2,2,3,2,3,3,4,1,2,2\n\n#endif\n\n/* Algorithm:\n\n   Accumulate chunks of factors first limb-by-limb (using one of mul0-mul8)\n   which are then accumulated into mpn numbers.  The first inner loop\n   accumulates divisor factors, the 2nd inner loop accumulates exactly the same\n   number of dividend factors.  We avoid accumulating more for the divisor,\n   even with its smaller factors, since we else cannot guarantee divisibility.\n\n   Since we know each division will yield an integer, we compute the quotient\n   using Hensel norm: If the quotient is limited by 2^t, we compute A / B mod\n   2^t.\n\n   Improvements:\n\n   (1) An obvious improvement to this code would be to compute mod 2^t\n   everywhere.  Unfortunately, we cannot determine t beforehand, unless we\n   invoke some approximation, such as Stirling's formula.  Of course, we don't\n   need t to be tight.  However, it is not clear that this would help much,\n   our numbers are kept reasonably small already.\n\n   (2) Compute nmax/kmax semi-accurately, without scalar division or a loop.\n   Extracting the 3 msb, then doing a table lookup using cnt*8+msb as index,\n   would make it both reasonably accurate and fast.  (We could use a table\n   stored into a limb, perhaps.)  The table should take the removed factors of\n   2 into account (those done on-the-fly in mulN).\n\n   (3) The first time in the loop we compute the odd part of a\n   factorial in kp, we might use oddfac_1 for this task.\n */\n\n/* This threshold determines how large divisor to accumulate before we call\n   bdiv.  Perhaps we should never call bdiv, and accumulate all we are told,\n   since we are just basecase code anyway?  Presumably, this depends on the\n   relative speed of the asymptotically fast code and this code.  */\n#define SOME_THRESHOLD 20\n\n/* Multiply-into-limb functions.  These remove factors of 2 on-the-fly.  FIXME:\n   All versions of MAXFACS don't take this 2 removal into account now, meaning\n   that then, shifting just adds some overhead.  (We remove factors from the\n   completed limb anyway.)  */\n\nstatic mp_limb_t\nmul1 (mp_limb_t m)\n{\n  return m;\n}\n\nstatic mp_limb_t\nmul2 (mp_limb_t m)\n{\n  /* We need to shift before multiplying, to avoid an overflow. */\n  mp_limb_t m01 = (m | 1) * ((m + 1) >> 1);\n  return m01;\n}\n\nstatic mp_limb_t\nmul3 (mp_limb_t m)\n{\n  mp_limb_t m01 = (m + 0) * (m + 1) >> 1;\n  mp_limb_t m2 = (m + 2);\n  return m01 * m2;\n}\n\nstatic mp_limb_t\nmul4 (mp_limb_t m)\n{\n  mp_limb_t m01 = (m + 0) * (m + 1) >> 1;\n  mp_limb_t m23 = (m + 2) * (m + 3) >> 1;\n  return m01 * m23;\n}\n\nstatic mp_limb_t\nmul5 (mp_limb_t m)\n{\n  mp_limb_t m012 = (m + 0) * (m + 1) * (m + 2) >> 1;\n  mp_limb_t m34 = (m + 3) * (m + 4) >> 1;\n  return m012 * m34;\n}\n\nstatic mp_limb_t\nmul6 (mp_limb_t m)\n{\n  mp_limb_t m01 = (m + 0) * (m + 1);\n  mp_limb_t m23 = (m + 2) * (m + 3);\n  mp_limb_t m45 = (m + 4) * (m + 5) >> 1;\n  mp_limb_t m0123 = m01 * m23 >> 3;\n  return m0123 * m45;\n}\n\nstatic mp_limb_t\nmul7 (mp_limb_t m)\n{\n  mp_limb_t m01 = (m + 0) * (m + 1);\n  mp_limb_t m23 = (m + 2) * (m + 3);\n  mp_limb_t m456 = (m + 4) * (m + 5) * (m + 6) >> 1;\n  mp_limb_t m0123 = m01 * m23 >> 3;\n  return m0123 * m456;\n}\n\nstatic mp_limb_t\nmul8 (mp_limb_t m)\n{\n  mp_limb_t m01 = (m + 0) * (m + 1);\n  mp_limb_t m23 = (m + 2) * (m + 3);\n  mp_limb_t m45 = (m + 4) * (m + 5);\n  mp_limb_t m67 = (m + 6) * (m + 7);\n  mp_limb_t m0123 = m01 * m23 >> 3;\n  mp_limb_t m4567 = m45 * m67 >> 3;\n  return m0123 * m4567;\n}\n\ntypedef mp_limb_t (* mulfunc_t) (mp_limb_t);\n\nstatic const mulfunc_t mulfunc[] = {mul1,mul2,mul3,mul4,mul5,mul6,mul7,mul8};\n#define M (numberof(mulfunc))\n\n/* Number of factors-of-2 removed by the corresponding mulN functon.  */\nstatic const unsigned char tcnttab[] = {0, 1, 1, 2, 2, 4, 4, 6};\n\n#if 1\n/* This variant is inaccurate but share the code with other functions.  */\n#define MAXFACS(max,l)\t\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    (max) = log_n_max (l);\t\t\t\t\t\t\\\n  } while (0)\n#else\n\n/* This variant is exact(?) but uses a loop.  It takes the 2 removal\n of mulN into account.  */\nstatic const unsigned long ftab[] =\n#if GMP_NUMB_BITS == 64\n  /* 1 to 8 factors per iteration */\n  {CNST_LIMB(0xffffffffffffffff),CNST_LIMB(0x100000000),0x32cbfe,0x16a0b,0x24c4,0xa16,0x34b,0x1b2 /*,0xdf,0x8d */};\n#endif\n#if GMP_NUMB_BITS == 32\n  /* 1 to 7 factors per iteration */\n  {0xffffffff,0x10000,0x801,0x16b,0x71,0x42,0x26 /* ,0x1e */};\n#endif\n\n#define MAXFACS(max,l)\t\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    int __i;\t\t\t\t\t\t\t\t\\\n    for (__i = numberof (ftab) - 1; l > ftab[__i]; __i--)\t\t\\\n      ;\t\t\t\t\t\t\t\t\t\\\n    (max) = __i + 1;\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n/* Entry i contains (i!/2^t)^(-1) where t is chosen such that the parenthesis\n   is an odd integer. */\nstatic const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE };\n\nstatic void\nmpz_bdiv_bin_uiui (mpz_ptr r, mpir_ui n, mpir_ui k)\n{\n  int nmax, kmax, nmaxnow, numfac;\n  mp_ptr np, kp;\n  mp_size_t nn, kn, alloc;\n  mp_limb_t i, j, t, iii, jjj, cy, dinv;\n  mp_bitcnt_t i2cnt, j2cnt;\n  int cnt;\n  mp_size_t maxn;\n  TMP_DECL;\n\n  ASSERT (k > ODD_FACTORIAL_TABLE_LIMIT);\n  TMP_MARK;\n\n  maxn = 1 + n / GMP_NUMB_BITS;    /* absolutely largest result size (limbs) */\n\n  /* FIXME: This allocation might be insufficient, but is usually way too\n     large.  */\n  alloc = SOME_THRESHOLD - 1 + MAX (3 * maxn / 2, SOME_THRESHOLD);\n  alloc = MIN (alloc, k) + 1;\n  np = TMP_ALLOC_LIMBS (alloc);\n  kp = TMP_ALLOC_LIMBS (SOME_THRESHOLD + 1);\n\n  MAXFACS (nmax, n);\n  ASSERT (nmax <= M);\n  MAXFACS (kmax, k);\n  ASSERT (kmax <= M);\n  ASSERT (k >= M);\n\n  i = n - k + 1;\n\n  np[0] = 1; nn = 1;\n\n  i2cnt = 0;\t\t\t\t/* total low zeros in dividend */\n  j2cnt = __gmp_fac2cnt_table[ODD_FACTORIAL_TABLE_LIMIT / 2 - 1];\n\t\t\t\t\t/* total low zeros in divisor */\n\n  numfac = 1;\n  j = ODD_FACTORIAL_TABLE_LIMIT + 1;\n  jjj = ODD_FACTORIAL_TABLE_MAX;\n  ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX);\n\n  while (1)\n    {\n      kp[0] = jjj;\t\t\t\t/* store new factors */\n      kn = 1;\n      t = k - j + 1;\n      kmax = MIN (kmax, t);\n\n      while (kmax != 0 && kn < SOME_THRESHOLD)\n\t{\n\t  jjj = mulfunc[kmax - 1] (j);\n\t  j += kmax;\t\t\t\t/* number of factors used */\n\t  count_trailing_zeros (cnt, jjj);\t/* count low zeros */\n\t  jjj >>= cnt;\t\t\t\t/* remove remaining low zeros */\n\t  j2cnt += tcnttab[kmax - 1] + cnt;\t/* update low zeros count */\n\t  cy = mpn_mul_1 (kp, kp, kn, jjj);\t/* accumulate new factors */\n\t  kp[kn] = cy;\n\t  kn += cy != 0;\n\t  t = k - j + 1;\n\t  kmax = MIN (kmax, t);\n\t}\n      numfac = j - numfac;\n\n      while (numfac != 0)\n\t{\n\t  nmaxnow = MIN (nmax, numfac);\n\t  iii = mulfunc[nmaxnow - 1] (i);\n\t  i += nmaxnow;\t\t\t\t/* number of factors used */\n\t  count_trailing_zeros (cnt, iii);\t/* count low zeros */\n\t  iii >>= cnt;\t\t\t\t/* remove remaining low zeros */\n\t  i2cnt += tcnttab[nmaxnow - 1] + cnt;\t/* update low zeros count */\n\t  cy = mpn_mul_1 (np, np, nn, iii);\t/* accumulate new factors */\n\t  np[nn] = cy;\n\t  nn += cy != 0;\n\t  numfac -= nmaxnow;\n\t}\n\n      ASSERT (nn < alloc);\n\n      modlimb_invert (dinv, kp[0]);\n      nn += (np[nn - 1] >= kp[kn - 1]);\n      nn -= kn;\n      {\n         mp_limb_t wp[2];\n         \n         mpn_sb_bdiv_q (np, wp, np, nn, kp, MIN(kn,nn), dinv);\n      }\n      if (kmax == 0)\n\tbreak;\n      numfac = j;\n\n      jjj = mulfunc[kmax - 1] (j);\n      j += kmax;\t\t\t\t/* number of factors used */\n      count_trailing_zeros (cnt, jjj);\t\t/* count low zeros */\n      jjj >>= cnt;\t\t\t\t/* remove remaining low zeros */\n      j2cnt += tcnttab[kmax - 1] + cnt;\t\t/* update low zeros count */\n    }\n\n  /* Put back the right number of factors of 2.  */\n  cnt = i2cnt - j2cnt;\n  if (cnt != 0)\n    {\n      ASSERT (cnt < GMP_NUMB_BITS); /* can happen, but not for intended use */\n      cy = mpn_lshift (np, np, nn, cnt);\n      np[nn] = cy;\n      nn += cy != 0;\n    }\n\n  nn -= np[nn - 1] == 0;\t/* normalisation */\n\n  kp = MPZ_NEWALLOC (r, nn);\n  SIZ(r) = nn;\n  MPN_COPY (kp, np, nn);\n  TMP_FREE;\n}\n\nstatic void\nmpz_smallk_bin_uiui (mpz_ptr r, mpir_ui n, mpir_ui k)\n{\n  int nmax, numfac;\n  mp_ptr rp;\n  mp_size_t rn, alloc;\n  mp_limb_t i, iii, cy;\n  mp_bitcnt_t i2cnt, cnt;\n\n  count_leading_zeros (cnt, (mp_limb_t) n);\n  cnt = GMP_LIMB_BITS - cnt;\n  alloc = cnt * k / GMP_NUMB_BITS + 3;\t/* FIXME: ensure rounding is enough. */\n  rp = MPZ_NEWALLOC (r, alloc);\n\n  MAXFACS (nmax, n);\n  nmax = MIN (nmax, M);\n\n  i = n - k + 1;\n\n  nmax = MIN (nmax, k);\n  rp[0] = mulfunc[nmax - 1] (i);\n  rn = 1;\n  i += nmax;\t\t\t\t/* number of factors used */\n  i2cnt = tcnttab[nmax - 1];\t\t/* low zeros count */\n  numfac = k - nmax;\n  while (numfac != 0)\n    {\n      nmax = MIN (nmax, numfac);\n      iii = mulfunc[nmax - 1] (i);\n      i += nmax;\t\t\t/* number of factors used */\n      i2cnt += tcnttab[nmax - 1];\t/* update low zeros count */\n      cy = mpn_mul_1 (rp, rp, rn, iii);\t/* accumulate new factors */\n      rp[rn] = cy;\n      rn += cy != 0;\n      numfac -= nmax;\n    }\n\n  ASSERT (rn < alloc);\n\n  mpn_divrem_hensel_rsh_qr_1_preinv (rp, rp, rn, __gmp_oddfac_table[k],\n\t\t    facinv[k - 2], __gmp_fac2cnt_table[k / 2 - 1] - i2cnt); \n  /* A two-fold, branch-free normalisation is possible :*/\n  /* rn -= rp[rn - 1] == 0; */\n  /* rn -= rp[rn - 1] == 0; */\n  MPN_NORMALIZE_NOT_ZERO (rp, rn);\n\n  SIZ(r) = rn;\n}\n\n/* Algorithm:\n\n   Plain and simply multiply things together.\n\n   We tabulate factorials (k!/2^t)^(-1) mod B (where t is chosen such\n   that k!/2^t is odd).\n\n*/\n\nstatic mp_limb_t\nbc_bin_uiui (mpir_ui n, mpir_ui k)\n{\n  return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2])\n    << (__gmp_fac2cnt_table[n / 2 - 1] - __gmp_fac2cnt_table[k / 2 - 1] - __gmp_fac2cnt_table[(n-k) / 2 - 1]))\n    & GMP_NUMB_MASK;\n}\n\n/* Algorithm:\n\n   Recursively exploit the relation\n   bin(n,k) = bin(n,k>>1)*bin(n-k>>1,k-k>>1)/bin(k,k>>1) .\n\n   Values for binomial(k,k>>1) that fit in a limb are precomputed\n   (with inverses).\n*/\n\n/* bin2kk[i - ODD_CENTRAL_BINOMIAL_OFFSET] =\n   binomial(i*2,i)/2^t (where t is chosen so that it is odd). */\nstatic const mp_limb_t bin2kk[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE };\n\n/* bin2kkinv[i] = bin2kk[i]^-1 mod B */\nstatic const mp_limb_t bin2kkinv[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE };\n\n/* bin2kk[i] = binomial((i+MIN_S)*2,i+MIN_S)/2^t. This table contains the t values. */\nstatic const unsigned char fac2bin[] = { CENTRAL_BINOMIAL_2FAC_TABLE };\n\nstatic void\nmpz_smallkdc_bin_uiui (mpz_ptr r, mpir_ui n, mpir_ui k)\n{\n  mp_ptr rp;\n  mp_size_t rn;\n  mpir_ui hk;\n\n  hk = k >> 1;\n\n  if ((! BIN_UIUI_RECURSIVE_SMALLDC) || hk <= ODD_FACTORIAL_TABLE_LIMIT)\n    mpz_smallk_bin_uiui (r, n, hk);\n  else\n    mpz_smallkdc_bin_uiui (r, n, hk);\n  k -= hk;\n  n -= hk;\n  if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) {\n    mp_limb_t cy;\n    rn = SIZ (r);\n    rp = MPZ_REALLOC (r, rn + 1);\n    cy = mpn_mul_1 (rp, rp, rn, bc_bin_uiui (n, k));\n    rp [rn] = cy;\n    rn += cy != 0;\n  } else {\n    mp_limb_t buffer[ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3];\n    mpz_t t;\n\n    ALLOC (t) = ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3;\n    PTR (t) = buffer;\n    if ((! BIN_UIUI_RECURSIVE_SMALLDC) || k <= ODD_FACTORIAL_TABLE_LIMIT)\n      mpz_smallk_bin_uiui (t, n, k);\n    else\n      mpz_smallkdc_bin_uiui (t, n, k);\n    mpz_mul (r, r, t);\n    rp = PTR (r);\n    rn = SIZ (r);\n  }\n\n  mpn_divrem_hensel_rsh_qr_1_preinv (rp, rp, rn, bin2kk[k - ODD_CENTRAL_BINOMIAL_OFFSET],\n\t\t   bin2kkinv[k - ODD_CENTRAL_BINOMIAL_OFFSET],\n\t\t    fac2bin[k - ODD_CENTRAL_BINOMIAL_OFFSET] - (k != hk));\n  /* A two-fold, branch-free normalisation is possible :*/\n  /* rn -= rp[rn - 1] == 0; */\n  /* rn -= rp[rn - 1] == 0; */\n  MPN_NORMALIZE_NOT_ZERO (rp, rn);\n\n  SIZ(r) = rn;\n}\n\n/* mpz_goetgheluck_bin_uiui(RESULT, N, K) -- Set RESULT to binomial(N,K).\n *\n * Contributed to the GNU project by Marco Bodrato.\n *\n * Implementation of the algorithm by P. Goetgheluck, \"Computing\n * Binomial Coefficients\", The American Mathematical Monthly, Vol. 94,\n * No. 4 (April 1987), pp. 360-365.\n *\n * Acknowledgment: Peter Luschny did spot the slowness of the previous\n * code and suggested the reference.\n */\n\n/* TODO: Remove duplicated constants / macros / static functions...\n */\n\n/*************************************************************/\n/* Section macros: common macros, for swing/fac/bin (&sieve) */\n/*************************************************************/\n\n#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)\t\t\t\\\n  if ((PR) > (MAX_PR)) {\t\t\t\t\t\\\n    (VEC)[(I)++] = (PR);\t\t\t\t\t\\\n    (PR) = 1;\t\t\t\t\t\t\t\\\n  }\n\n#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)\t\t\\\n  do {\t\t\t\t\t\t\t\t\\\n    if ((PR) > (MAX_PR)) {\t\t\t\t\t\\\n      (VEC)[(I)++] = (PR);\t\t\t\t\t\\\n      (PR) = (P);\t\t\t\t\t\t\\\n    } else\t\t\t\t\t\t\t\\\n      (PR) *= (P);\t\t\t\t\t\t\\\n  } while (0)\n\n#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)\t\t\t\\\n    __max_i = (end);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\\\n    do {\t\t\t\t\t\t\t\\\n      ++__i;\t\t\t\t\t\t\t\\\n      if (((sieve)[__index] & __mask) == 0)\t\t\t\\\n\t{\t\t\t\t\t\t\t\\\n\t  (prime) = id_to_n(__i)\n\n#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)\t\t\\\n  do {\t\t\t\t\t\t\t\t\\\n    mp_limb_t __mask, __index, __max_i, __i;\t\t\t\\\n\t\t\t\t\t\t\t\t\\\n    __i = (start)-(off);\t\t\t\t\t\\\n    __index = __i / GMP_LIMB_BITS;\t\t\t\t\\\n    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);\t\t\\\n    __i += (off);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\\\n    LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)\n\n#define LOOP_ON_SIEVE_STOP\t\t\t\t\t\\\n\t}\t\t\t\t\t\t\t\\\n      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);\t\\\n      __index += __mask & 1;\t\t\t\t\t\\\n    }  while (__i <= __max_i)\t\t\t\t\t\\\n\n#define LOOP_ON_SIEVE_END\t\t\t\t\t\\\n    LOOP_ON_SIEVE_STOP;\t\t\t\t\t\t\\\n  } while (0)\n\n/*********************************************************/\n/* Section sieve: sieving functions and tools for primes */\n/*********************************************************/\n\n#if WANT_ASSERT\nstatic mp_limb_t\nbit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }\n#endif\n\n/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/\nstatic mp_limb_t\nid_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }\n\n/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */\nstatic mp_limb_t\nn_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }\n\nstatic mp_size_t\nprimesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }\n\n/*********************************************************/\n/* Section binomial: fast binomial implementation        */\n/*********************************************************/\n\n#define COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I)\t\\\n  do {\t\t\t\t\t\t\t\\\n    mp_limb_t __a, __b, __prime, __ma,__mb;\t\t\\\n    __prime = (P);\t\t\t\t\t\\\n    __a = (N); __b = (K); __mb = 0;\t\t\t\\\n    FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);\t\t\\\n    do {\t\t\t\t\t\t\\\n      __mb += __b % __prime; __b /= __prime;\t\t\\\n      __ma = __a % __prime; __a /= __prime;\t\t\\\n      if (__ma < __mb) {\t\t\t\t\\\n        __mb = 1; (PR) *= __prime;\t\t\t\\\n      } else  __mb = 0;\t\t\t\t\t\\\n    } while (__a >= __prime);\t\t\t\t\\\n  } while (0)\n\n#define SH_COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I)\t\\\n  do {\t\t\t\t\t\t\t\\\n    mp_limb_t __prime;\t\t\t\t\t\\\n    __prime = (P);\t\t\t\t\t\\\n    if (((N) % __prime) < ((K) % __prime)) {\t\t\\\n      FACTOR_LIST_STORE (__prime, PR, MAX_PR, VEC, I);\t\\\n    }\t\t\t\t\t\t\t\\\n  } while (0)\n\n/* Returns an approximation of the sqare root of x.  *\n * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4    */\nstatic mp_limb_t\nlimb_apprsqrt (mp_limb_t x)\n{\n  int s;\n\n  ASSERT (x > 2);\n  count_leading_zeros (s, x - 1);\n  s = GMP_LIMB_BITS - 1 - s;\n  return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));\n}\n\nstatic void\nmpz_goetgheluck_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)\n{\n  mp_limb_t *sieve, *factors, count;\n  mp_limb_t prod, max_prod, j;\n  TMP_DECL;\n\n  ASSERT (BIN_GOETGHELUCK_THRESHOLD >= 13);\n  ASSERT (n >= 25);\n\n  TMP_MARK;\n  sieve = TMP_ALLOC_LIMBS (primesieve_size (n));\n\n  count = gmp_primesieve (sieve, n) + 1;\n  factors = TMP_ALLOC_LIMBS (count / log_n_max (n) + 1);\n\n  max_prod = GMP_NUMB_MAX / n;\n\n  /* Handle primes = 2, 3 separately. */\n  popc_limb (count, n - k);\n  popc_limb (j, k);\n  count += j;\n  popc_limb (j, n);\n  count -= j;\n  prod = CNST_LIMB(1) << count;\n\n  j = 0;\n  COUNT_A_PRIME (3, n, k, prod, max_prod, factors, j);\n\n  /* Accumulate prime factors from 5 to n/2 */\n    {\n      mp_limb_t s;\n\n      {\n\tmp_limb_t prime;\n\ts = limb_apprsqrt(n);\n\ts = n_to_bit (s);\n\tLOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);\n\tCOUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);\n\tLOOP_ON_SIEVE_END;\n\ts++;\n      }\n\n      ASSERT (max_prod <= GMP_NUMB_MAX / 2);\n      max_prod <<= 1;\n      ASSERT (bit_to_n (s) * bit_to_n (s) > n);\n      ASSERT (s <= n_to_bit (n >> 1));\n      {\n\tmp_limb_t prime;\n\n\tLOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n >> 1), 0,sieve);\n\tSH_COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);\n\tLOOP_ON_SIEVE_END;\n      }\n      max_prod >>= 1;\n    }\n\n  /* Store primes from (n-k)+1 to n */\n  ASSERT (n_to_bit (n - k) < n_to_bit (n));\n    {\n      mp_limb_t prime;\n      LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n - k) + 1, n_to_bit (n), 0,sieve);\n      FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);\n      LOOP_ON_SIEVE_END;\n    }\n\n  if (LIKELY (j != 0))\n    {\n      factors[j++] = prod;\n      mpz_prodlimbs (r, factors, j);\n    }\n  else\n    {\n      PTR (r)[0] = prod;\n      SIZ (r) = 1;\n    }\n  TMP_FREE;\n}\n\n#undef COUNT_A_PRIME\n#undef SH_COUNT_A_PRIME\n#undef LOOP_ON_SIEVE_END\n#undef LOOP_ON_SIEVE_STOP\n#undef LOOP_ON_SIEVE_BEGIN\n#undef LOOP_ON_SIEVE_CONTINUE\n\n/*********************************************************/\n/* End of implementation of Goetgheluck's algorithm      */\n/*********************************************************/\n\nvoid\nmpz_bin_uiui (mpz_ptr r, mpir_ui n, mpir_ui k)\n{\n  if (UNLIKELY (n < k)) {\n    SIZ (r) = 0;\n#if BITS_PER_ULONG > GMP_NUMB_BITS\n  } else if (UNLIKELY (n > GMP_NUMB_MAX)) {\n    mpz_t tmp;\n\n    mpz_init_set_ui (tmp, n);\n    mpz_bin_ui (r, tmp, k);\n    mpz_clear (tmp);\n#endif\n  } else {\n    ASSERT (n <= GMP_NUMB_MAX);\n    /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */\n    k = MIN (k, n - k);\n    if (k < 2) {\n      PTR(r)[0] = k ? n : 1; /* 1 + ((-k) & (n-1)); */\n      SIZ(r) = 1;\n    } else if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { /* k >= 2, n >= 4 */\n      PTR(r)[0] = bc_bin_uiui (n, k);\n      SIZ(r) = 1;\n    } else if (k <= ODD_FACTORIAL_TABLE_LIMIT)\n      mpz_smallk_bin_uiui (r, n, k);\n    else if (BIN_UIUI_ENABLE_SMALLDC &&\n\t     k <= (BIN_UIUI_RECURSIVE_SMALLDC ? ODD_CENTRAL_BINOMIAL_TABLE_LIMIT : ODD_FACTORIAL_TABLE_LIMIT)* 2)\n      mpz_smallkdc_bin_uiui (r, n, k);\n    else if (ABOVE_THRESHOLD (k, BIN_GOETGHELUCK_THRESHOLD) &&\n\t     k > (n >> 4)) /* k > ODD_FACTORIAL_TABLE_LIMIT */\n      mpz_goetgheluck_bin_uiui (r, n, k);\n    else\n      mpz_bdiv_bin_uiui (r, n, k);\n  }\n}\n"
  },
  {
    "path": "mpz/cdiv_q.c",
    "content": "/* mpz_cdiv_q -- Division rounding the quotient towards +infinity.  The\n   remainder gets the opposite sign as the denominator.\n\nCopyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_cdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)\n{\n  mp_size_t dividend_size = dividend->_mp_size;\n  mp_size_t divisor_size = divisor->_mp_size;\n  mpz_t rem;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  MPZ_TMP_INIT (rem, ABS (divisor_size));\n\n  mpz_tdiv_qr (quot, rem, dividend, divisor);\n\n  if ((divisor_size ^ dividend_size) >= 0 && rem->_mp_size != 0)\n    mpz_add_ui (quot, quot, 1L);\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/cdiv_q_ui.c",
    "content": "/* mpz_cdiv_q_ui -- Division rounding the quotient towards +infinity.  The\n   remainder gets the opposite sign as the denominator.  In order to make it\n   always fit into the return type, the negative of the true remainder is\n   returned.\n\nCopyright 1994, 1996, 1999, 2001, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpir_ui\nmpz_cdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, mpir_ui divisor)\n{\n  mp_size_t ns, nn, qn;\n  mp_ptr np, qp;\n  mp_limb_t rl;\n\n  if (divisor == 0)\n    DIVIDE_BY_ZERO;\n\n  ns = SIZ(dividend);\n  if (ns == 0)\n    {\n      SIZ(quot) = 0;\n      return 0;\n    }\n\n  nn = ABS(ns);\n  MPZ_REALLOC (quot, nn);\n  qp = PTR(quot);\n  np = PTR(dividend);\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (divisor > GMP_NUMB_MAX)\n    {\n      mp_limb_t dp[2], rp[2];\n\n      if (nn == 1)\t\t/* tdiv_qr requirements; tested above for 0 */\n\t{\n\t  qp[0] = 0;\n\t  rl = np[0];\n\t  qn = 1;\t\t/* a white lie, fixed below */\n\t}\n      else\n\t{\n\t  dp[0] = divisor & GMP_NUMB_MASK;\n\t  dp[1] = divisor >> GMP_NUMB_BITS;\n\t  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);\n\t  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);\n\t  qn = nn - 2 + 1; \n\t}\n\n      if (rl != 0 && ns >= 0)\n\t{\n\t  mpn_incr_u (qp, (mp_limb_t) 1);\n\t  rl = divisor - rl;\n\t}\n\n      qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;\n    }\n  else\n#endif\n    {\n      rl = mpn_divrem_1 (qp, 0, np, nn, (mp_limb_t) divisor);\n\n      if (rl != 0 && ns >= 0)\n\t{\n\t  mpn_incr_u (qp, (mp_limb_t) 1);\n\t  rl = divisor - rl;\n\t}\n\n      qn = nn - (qp[nn - 1] == 0);\n    }\n\n  SIZ(quot) = ns >= 0 ? qn : -qn;\n  return rl;\n}\n"
  },
  {
    "path": "mpz/cdiv_qr.c",
    "content": "/* mpz_cdiv_qr -- Division rounding the quotient towards +infinity.  The\n   remainder gets the opposite sign as the denominator.\n\nCopyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_cdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)\n{\n  mp_size_t divisor_size = divisor->_mp_size;\n  mp_size_t xsize;\n  mpz_t temp_divisor;\t\t/* N.B.: lives until function returns! */\n  TMP_DECL;\n\n  TMP_MARK;\n\n  /* We need the original value of the divisor after the quotient and\n     remainder have been preliminary calculated.  We have to copy it to\n     temporary space if it's the same variable as either QUOT or REM.  */\n  if (quot == divisor || rem == divisor)\n    {\n      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));\n      mpz_set (temp_divisor, divisor);\n      divisor = temp_divisor;\n    }\n\n  xsize = dividend->_mp_size ^ divisor_size;;\n  mpz_tdiv_qr (quot, rem, dividend, divisor);\n\n  if (xsize >= 0 && rem->_mp_size != 0)\n    {\n      mpz_add_ui (quot, quot, 1L);\n      mpz_sub (rem, rem, divisor);\n    }\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/cdiv_qr_ui.c",
    "content": "/* mpz_cdiv_qr_ui -- Division rounding the quotient towards +infinity.  The\n   remainder gets the opposite sign as the denominator.  In order to make it\n   always fit into the return type, the negative of the true remainder is\n   returned.\n\nCopyright 1994, 1995, 1996, 1999, 2001, 2002, 2004 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpir_ui\nmpz_cdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpir_ui divisor)\n{\n  mp_size_t ns, nn, qn;\n  mp_ptr np, qp;\n  mp_limb_t rl;\n\n  if (divisor == 0)\n    DIVIDE_BY_ZERO;\n\n  ns = SIZ(dividend);\n  if (ns == 0)\n    {\n      SIZ(quot) = 0;\n      SIZ(rem) = 0;\n      return 0;\n    }\n\n  nn = ABS(ns);\n  MPZ_REALLOC (quot, nn);\n  qp = PTR(quot);\n  np = PTR(dividend);\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (divisor > GMP_NUMB_MAX)\n    {\n      mp_limb_t dp[2];\n      mp_ptr rp;\n      mp_size_t rn;\n\n      MPZ_REALLOC (rem, 2);\n      rp = PTR(rem);\n\n      if (nn == 1)\t\t/* tdiv_qr requirements; tested above for 0 */\n\t{\n\t  qp[0] = 0;\n\t  qn = 1;\t\t/* a white lie, fixed below */\n\t  rl = np[0];\n\t  rp[0] = rl;\n\t}\n      else\n\t{\n\t  dp[0] = divisor & GMP_NUMB_MASK;\n\t  dp[1] = divisor >> GMP_NUMB_BITS;\n\t  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);\n\t  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);\n\t  qn = nn - 2 + 1; \n\t}\n\n      if (rl != 0 && ns >= 0)\n\t{\n\t  mpn_incr_u (qp, (mp_limb_t) 1);\n\t  rl = divisor - rl;\n\t  rp[0] = rl & GMP_NUMB_MASK;\n\t  rp[1] = rl >> GMP_NUMB_BITS;\n\t}\n\n      qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;\n      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);\n      SIZ(rem) = -rn;\n    }\n  else\n#endif\n    {\n      rl = mpn_divrem_1 (qp, 0, np, nn, (mp_limb_t) divisor);\n      if (rl == 0)\n\tSIZ(rem) = 0;\n      else\n\t{\n\t  if (ns >= 0)\n\t    {\n\t      mpn_incr_u (qp, (mp_limb_t) 1);\n\t      rl = divisor - rl;\n\t    }\n\n\t  PTR(rem)[0] = rl;\n\t  SIZ(rem) = -(rl != 0);\n\t}\n      qn = nn - (qp[nn - 1] == 0);\n    }\n\n  SIZ(quot) = ns >= 0 ? qn : -qn;\n  return rl;\n}\n"
  },
  {
    "path": "mpz/cdiv_r.c",
    "content": "/* mpz_cdiv_r -- Division rounding the quotient towards +infinity.  The\n   remainder gets the opposite sign as the denominator.\n\nCopyright 1994, 1995, 1996, 2001, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_cdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)\n{\n  mp_size_t divisor_size = divisor->_mp_size;\n  mpz_t temp_divisor;\t\t/* N.B.: lives until function returns! */\n  TMP_DECL;\n\n  TMP_MARK;\n\n  /* We need the original value of the divisor after the remainder has been\n     preliminary calculated.  We have to copy it to temporary space if it's\n     the same variable as REM.  */\n  if (rem == divisor)\n    {\n\n      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));\n      mpz_set (temp_divisor, divisor);\n      divisor = temp_divisor;\n    }\n\n  mpz_tdiv_r (rem, dividend, divisor);\n\n  if ((divisor_size ^ dividend->_mp_size) >= 0 && rem->_mp_size != 0)\n    mpz_sub (rem, rem, divisor);\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/cdiv_r_ui.c",
    "content": "/* mpz_cdiv_r_ui -- Division rounding the quotient towards +infinity.  The\n   remainder gets the opposite sign as the denominator.  In order to make it\n   always fit into the return type, the negative of the true remainder is\n   returned.\n\nCopyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpir_ui\nmpz_cdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, mpir_ui divisor)\n{\n  mp_size_t ns, nn;\n  mp_ptr np;\n  mp_limb_t rl;\n\n  if (divisor == 0)\n    DIVIDE_BY_ZERO;\n\n  ns = SIZ(dividend);\n  if (ns == 0)\n    {\n      SIZ(rem) = 0;\n      return 0;\n    }\n\n  nn = ABS(ns);\n  np = PTR(dividend);\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (divisor > GMP_NUMB_MAX)\n    {\n      mp_limb_t dp[2];\n      mp_ptr rp, qp;\n      mp_size_t rn;\n      TMP_DECL;\n\n      MPZ_REALLOC (rem, 2);\n      rp = PTR(rem);\n\n      if (nn == 1)\t\t/* tdiv_qr requirements; tested above for 0 */\n\t{\n\t  rl = np[0];\n\t  rp[0] = rl;\n\t}\n      else\n\t{\n\t  TMP_MARK;\n\t  dp[0] = divisor & GMP_NUMB_MASK;\n\t  dp[1] = divisor >> GMP_NUMB_BITS;\n\t  qp = TMP_ALLOC_LIMBS (nn - 2 + 1);\n\t  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);\n\t  TMP_FREE;\n\t  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);\n\t}\n\n      if (rl != 0 && ns >= 0)\n\t{\n\t  rl = divisor - rl;\n\t  rp[0] = rl & GMP_NUMB_MASK;\n\t  rp[1] = rl >> GMP_NUMB_BITS;\n\t}\n\n      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);\n      SIZ(rem) = -rn;\n    }\n  else\n#endif\n    {\n      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);\n      if (rl == 0)\n\tSIZ(rem) = 0;\n      else\n\t{\n\t  if (ns >= 0)\n\t    rl = divisor - rl;\n\n\t  PTR(rem)[0] = rl;\n\t  SIZ(rem) = -1;\n\t}\n    }\n\n  return rl;\n}\n"
  },
  {
    "path": "mpz/cdiv_ui.c",
    "content": "/* mpz_cdiv_ui -- Division rounding the quotient towards +infinity.  The\n   remainder gets the opposite sign as the denominator.  In order to make it\n   always fit into the return type, the negative of the true remainder is\n   returned.\n\nCopyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpir_ui\nmpz_cdiv_ui (mpz_srcptr dividend, mpir_ui divisor)\n{\n  mp_size_t ns, nn;\n  mp_ptr np;\n  mp_limb_t rl;\n\n  if (divisor == 0)\n    DIVIDE_BY_ZERO;\n\n  ns = SIZ(dividend);\n  if (ns == 0)\n    {\n      return 0;\n    }\n\n  nn = ABS(ns);\n  np = PTR(dividend);\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (divisor > GMP_NUMB_MAX)\n    {\n      mp_limb_t dp[2], rp[2];\n      mp_ptr qp;\n      mp_size_t rn;\n      TMP_DECL;\n\n      if (nn == 1)\t\t/* tdiv_qr requirements; tested above for 0 */\n\t{\n\t  rl = np[0];\n\t  rp[0] = rl;\n\t}\n      else\n\t{\n\t  TMP_MARK;\n\t  dp[0] = divisor & GMP_NUMB_MASK;\n\t  dp[1] = divisor >> GMP_NUMB_BITS;\n\t  qp = TMP_ALLOC_LIMBS (nn - 2 + 1);\n\t  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);\n\t  TMP_FREE;\n\t  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);\n\t}\n\n      if (rl != 0 && ns >= 0)\n\t{\n\t  rl = divisor - rl;\n\t  rp[0] = rl & GMP_NUMB_MASK;\n\t  rp[1] = rl >> GMP_NUMB_BITS;\n\t}\n\n      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);\n    }\n  else\n#endif\n    {\n      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);\n      if (rl == 0)\n\t;\n      else\n\t{\n\t  if (ns >= 0)\n\t    rl = divisor - rl;\n\t}\n    }\n\n  return rl;\n}\n"
  },
  {
    "path": "mpz/cfdiv_q_2exp.c",
    "content": "/* mpz_cdiv_q_2exp, mpz_fdiv_q_2exp -- quotient from mpz divided by 2^n.\n\nCopyright 1991, 1993, 1994, 1996, 1998, 1999, 2001, 2002, 2004 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* dir==1 for ceil, dir==-1 for floor */\n\nstatic void __gmpz_cfdiv_q_2exp(REGPARM_3_1 (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir)) REGPARM_ATTR (1);\n#define cfdiv_q_2exp(w,u,cnt,dir)  __gmpz_cfdiv_q_2exp (REGPARM_3_1 (w,u,cnt,dir))\n\nREGPARM_ATTR (1) static void\ncfdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir)\n{\n  mp_size_t  wsize, usize, abs_usize, limb_cnt, i;\n  mp_srcptr  up;\n  mp_ptr     wp;\n  mp_limb_t  round, rmask;\n\n  usize = SIZ (u);\n  abs_usize = ABS (usize);\n  limb_cnt = cnt / GMP_NUMB_BITS;\n  wsize = abs_usize - limb_cnt;\n  if (wsize <= 0)\n    {\n      /* u < 2**cnt, so result 1, 0 or -1 according to rounding */\n      PTR(w)[0] = 1;\n      SIZ(w) = (usize == 0 || (usize ^ dir) < 0 ? 0 : dir);\n      return;\n    }\n\n  /* +1 limb to allow for mpn_add_1 below */\n  MPZ_REALLOC (w, wsize+1);\n\n  /* Check for rounding if direction matches u sign.\n     Set round if we're skipping non-zero limbs.  */\n  up = PTR(u);\n  round = 0;\n  rmask = ((usize ^ dir) >= 0 ? MP_LIMB_T_MAX : 0);\n  if (rmask != 0)\n    for (i = 0; i < limb_cnt && round == 0; i++)\n      round = up[i];\n\n  wp = PTR(w);\n  cnt %= GMP_NUMB_BITS;\n  if (cnt != 0)\n    {\n      round |= rmask & mpn_rshift (wp, up + limb_cnt, wsize, cnt);\n      wsize -= (wp[wsize - 1] == 0);\n    }\n  else\n    MPN_COPY_INCR (wp, up + limb_cnt, wsize);\n\n  if (round != 0)\n    {\n      if (wsize != 0)\n\t{\n          mp_limb_t cy;\n\t  cy = mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));\n\t  wp[wsize] = cy;\n\t  wsize += cy;\n\t}\n      else\n\t{\n\t  /* We shifted something to zero.  */\n\t  wp[0] = 1;\n\t  wsize = 1;\n\t}\n    }\n  SIZ(w) = (usize >= 0 ? wsize : -wsize);\n}\n\n\nvoid\nmpz_cdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)\n{\n  cfdiv_q_2exp (w, u, cnt, 1);\n}\n\nvoid\nmpz_fdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)\n{\n  cfdiv_q_2exp (w, u, cnt, -1);\n}\n"
  },
  {
    "path": "mpz/cfdiv_r_2exp.c",
    "content": "/* mpz_cdiv_r_2exp, mpz_fdiv_r_2exp -- remainder from mpz divided by 2^n.\n\nCopyright 2001, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Bit mask of \"n\" least significant bits of a limb. */\n#define LOW_MASK(n)   ((CNST_LIMB(1) << (n)) - 1)\n\n\n/* dir==1 for ceil, dir==-1 for floor */\n\nstatic void __gmpz_cfdiv_r_2exp(REGPARM_3_1 (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir)) REGPARM_ATTR (1);\n#define cfdiv_r_2exp(w,u,cnt,dir)  __gmpz_cfdiv_r_2exp (REGPARM_3_1 (w, u, cnt, dir))\n\nREGPARM_ATTR (1) static void\ncfdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir)\n{\n  mp_size_t  usize, abs_usize, limb_cnt, i;\n  mp_srcptr  up;\n  mp_ptr     wp;\n  mp_limb_t  high;\n\n  usize = SIZ(u);\n  if (usize == 0)\n    {\n      SIZ(w) = 0;\n      return;\n    }\n\n  limb_cnt = cnt / GMP_NUMB_BITS;\n  cnt %= GMP_NUMB_BITS;\n  abs_usize = ABS (usize);\n\n  /* MPZ_REALLOC(w) below is only when w!=u, so we can fetch PTR(u) here\n     nice and early */\n  up = PTR(u);\n\n  if ((usize ^ dir) < 0)\n    {\n      /* Round towards zero, means just truncate */\n\n      if (w == u)\n        {\n          /* if already smaller than limb_cnt then do nothing */\n          if (abs_usize <= limb_cnt)\n            return;\n          wp = PTR(w);\n        }\n      else\n        {\n          i = MIN (abs_usize, limb_cnt+1);\n          MPZ_REALLOC (w, i);\n          wp = PTR(w);\n          MPN_COPY (wp, up, i);\n\n          /* if smaller than limb_cnt then only the copy is needed */\n          if (abs_usize <= limb_cnt)\n            {\n              SIZ(w) = usize;\n              return;\n            }\n        }\n    }\n  else\n    {\n      /* Round away from zero, means twos complement if non-zero */\n\n      /* if u!=0 and smaller than divisor, then must negate */\n      if (abs_usize <= limb_cnt)\n        goto negate;\n\n      /* if non-zero low limb, then must negate */\n      for (i = 0; i < limb_cnt; i++)\n        if (up[i] != 0)\n          goto negate;\n\n      /* if non-zero partial limb, then must negate */\n      if ((up[limb_cnt] & LOW_MASK (cnt)) != 0)\n        goto negate;\n\n      /* otherwise low bits of u are zero, so that's the result */\n      SIZ(w) = 0;\n      return;\n\n    negate:\n      /* twos complement negation to get 2**cnt-u */\n\n      MPZ_REALLOC (w, limb_cnt+1);\n      up = PTR(u);\n      wp = PTR(w);\n\n      /* Ones complement */\n      i = MIN (abs_usize, limb_cnt+1);\n      mpn_com_n (wp, up, i);\n      for ( ; i <= limb_cnt; i++)\n        wp[i] = GMP_NUMB_MAX;\n\n      /* Twos complement.  Since u!=0 in the relevant part, the twos\n         complement never gives 0 and a carry, so can use MPN_INCR_U. */\n      MPN_INCR_U (wp, limb_cnt+1, CNST_LIMB(1));\n\n      usize = -usize;\n    }\n\n  /* Mask the high limb */\n  high = wp[limb_cnt];\n  high &= LOW_MASK (cnt);\n  wp[limb_cnt] = high;\n\n  /* Strip any consequent high zeros */\n  while (high == 0)\n    {\n      limb_cnt--;\n      if (limb_cnt < 0)\n        {\n          SIZ(w) = 0;\n          return;\n        }\n      high = wp[limb_cnt];\n    }\n\n  limb_cnt++;\n  SIZ(w) = (usize >= 0 ? limb_cnt : -limb_cnt);\n}\n\n\nvoid\nmpz_cdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)\n{\n  cfdiv_r_2exp (w, u, cnt, 1);\n}\n\nvoid\nmpz_fdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)\n{\n  cfdiv_r_2exp (w, u, cnt, -1);\n}\n"
  },
  {
    "path": "mpz/clear.c",
    "content": "/* mpz_clear -- de-allocate the space occupied by the dynamic digit space of\n   an integer.\n\nCopyright 1991, 1993, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_clear (mpz_ptr m)\n{\n  (*__gmp_free_func) (m->_mp_d, m->_mp_alloc * BYTES_PER_MP_LIMB);\n}\n"
  },
  {
    "path": "mpz/clears.c",
    "content": "/* mpz_clears() -- Clear multiple mpz_t variables.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\t\t/* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\n#if HAVE_STDARG\nmpz_clears (mpz_ptr x, ...)\n#else\nmpz_clears (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n\n#if HAVE_STDARG\n  va_start (ap, x);\n#else\n  mpz_ptr x;\n  va_start (ap);\n  x = va_arg (ap, mpz_ptr);\n#endif\n\n  while (x != NULL)\n    {\n      mpz_clear (x);\n      x = va_arg (ap, mpz_ptr);\n    }\n  va_end (ap);\n}\n"
  },
  {
    "path": "mpz/clrbit.c",
    "content": "/* mpz_clrbit -- clear a specified bit.\n\nCopyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_clrbit (mpz_ptr d, mp_bitcnt_t bit_index)\n{\n  mp_size_t dsize = d->_mp_size;\n  mp_ptr dp = d->_mp_d;\n  mp_size_t limb_index;\n\n  limb_index = bit_index / GMP_NUMB_BITS;\n  if (dsize >= 0)\n    {\n      if (limb_index < dsize)\n\t{\n          mp_limb_t  dlimb;\n          dlimb = dp[limb_index];\n          dlimb &= ~((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));\n          dp[limb_index] = dlimb;\n\n          if (UNLIKELY (dlimb == 0 && limb_index == dsize-1))\n            {\n              /* high limb became zero, must normalize */\n              do {\n                dsize--;\n              } while (dsize > 0 && dp[dsize-1] == 0);\n              d->_mp_size = dsize;\n            }\n\t}\n      else\n\t;\n    }\n  else\n    {\n      mp_size_t zero_bound;\n\n      /* Simulate two's complement arithmetic, i.e. simulate\n\t 1. Set OP = ~(OP - 1) [with infinitely many leading ones].\n\t 2. clear the bit.\n\t 3. Set OP = ~OP + 1.  */\n\n      dsize = -dsize;\n\n      /* No upper bound on this loop, we're sure there's a non-zero limb\n\t sooner ot later.  */\n      for (zero_bound = 0; ; zero_bound++)\n\tif (dp[zero_bound] != 0)\n\t  break;\n\n      if (limb_index > zero_bound)\n\t{\n\t  if (limb_index < dsize)\n\t    dp[limb_index] |= (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);\n\t  else\n\t    {\n\t      /* Ugh.  The bit should be cleared outside of the end of the\n\t\t number.  We have to increase the size of the number.  */\n\t      if (UNLIKELY (d->_mp_alloc < limb_index + 1))\n                dp = _mpz_realloc (d, limb_index + 1);\n\n\t      MPN_ZERO (dp + dsize, limb_index - dsize);\n\t      dp[limb_index] = (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);\n\t      d->_mp_size = -(limb_index + 1);\n\t    }\n\t}\n      else if (limb_index == zero_bound)\n\t{\n\t  dp[limb_index] = ((((dp[limb_index] - 1)\n\t\t\t      | ((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS))) + 1)\n\t\t\t    & GMP_NUMB_MASK);\n\t  if (dp[limb_index] == 0)\n\t    {\n\t      mp_size_t i;\n\t      for (i = limb_index + 1; i < dsize; i++)\n\t\t{\n\t\t  dp[i] = (dp[i] + 1) & GMP_NUMB_MASK;\n\t\t  if (dp[i] != 0)\n\t\t    goto fin;\n\t\t}\n\t      /* We got carry all way out beyond the end of D.  Increase\n\t\t its size (and allocation if necessary).  */\n\t      dsize++;\n\t      if (UNLIKELY (d->_mp_alloc < dsize))\n                dp = _mpz_realloc (d, dsize);\n\n\t      dp[i] = 1;\n\t      d->_mp_size = -dsize;\n\t    fin:;\n\t    }\n\t}\n      else\n\t;\n    }\n}\n"
  },
  {
    "path": "mpz/cmp.c",
    "content": "/* mpz_cmp(u,v) -- Compare U, V.  Return positive, zero, or negative\n   based on if U > V, U == V, or U < V.\n\nCopyright 1991, 1993, 1994, 1996, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpz_cmp (mpz_srcptr u, mpz_srcptr v)\n{\n  mp_size_t  usize, vsize, dsize, asize;\n  mp_srcptr  up, vp;\n  int        cmp;\n\n  usize = SIZ(u);\n  vsize = SIZ(v);\n  dsize = usize - vsize;\n  if (dsize != 0)\n    return dsize;\n\n  asize = ABS (usize);\n  up = PTR(u);\n  vp = PTR(v);\n  MPN_CMP (cmp, up, vp, asize);\n  return (usize >= 0 ? cmp : -cmp);\n}\n"
  },
  {
    "path": "mpz/cmp_d.c",
    "content": "/* mpz_cmp_d -- compare absolute values of mpz and double.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_FLOAT_H\n#include <float.h>  /* for DBL_MAX */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n#define RETURN_CMP(zl, dl)                      \\\n  do {                                          \\\n    zlimb = (zl);                               \\\n    dlimb = (dl);                               \\\n    if (zlimb != dlimb)                         \\\n      return (zlimb >= dlimb ? ret : -ret);     \\\n  } while (0)\n\n#define RETURN_NONZERO(ptr, size, val)          \\\n  do {                                          \\\n    mp_size_t __i;                              \\\n    for (__i = (size)-1; __i >= 0; __i--)       \\\n      if ((ptr)[__i] != 0)                      \\\n        return val;                             \\\n    return 0;                                   \\\n  } while (0)\n\n\nint\nmpz_cmp_d (mpz_srcptr z, double d)\n{\n  mp_limb_t  darray[LIMBS_PER_DOUBLE], zlimb, dlimb;\n  mp_srcptr  zp;\n  mp_size_t  zsize;\n  int        dexp, ret;\n\n  /* d=NaN is an invalid operation, there's no sensible return value.\n     d=Inf or -Inf is always bigger than z.  */\n  DOUBLE_NAN_INF_ACTION (d, __gmp_invalid_operation (), goto z_zero);\n\n  /* 1. Either operand zero. */\n  zsize = SIZ(z);\n  if (d == 0.0)\n    return zsize;\n  if (zsize == 0)\n    {\n    z_zero:\n      return (d < 0.0 ? 1 : -1);\n    }\n\n  /* 2. Opposite signs. */\n  if (zsize >= 0)\n    {\n      if (d < 0.0)\n        return 1;    /* >=0 cmp <0 */\n      ret = 1;\n    }\n  else\n    {\n      if (d >= 0.0)\n        return -1;   /* <0 cmp >=0 */\n      ret = -1;\n      d = -d;\n      zsize = -zsize;\n    }\n\n  /* 3. Small d, knowing abs(z) >= 1. */\n  if (d < 1.0)\n    return ret;\n\n  dexp = __gmp_extract_double (darray, d);\n  ASSERT (dexp >= 1);\n\n  /* 4. Check for different high limb positions. */\n  if (zsize != dexp)\n    return (zsize >= dexp ? ret : -ret);\n\n  /* 5. Limb data. */\n  zp = PTR(z);\n\n#if LIMBS_PER_DOUBLE == 2\n  RETURN_CMP (zp[zsize-1], darray[1]);\n  if (zsize == 1)\n    return (darray[0] != 0 ? -ret : 0);\n\n  RETURN_CMP (zp[zsize-2], darray[0]);\n  RETURN_NONZERO (zp, zsize-2, ret);\n#endif\n\n#if LIMBS_PER_DOUBLE == 3\n  RETURN_CMP (zp[zsize-1], darray[2]);\n  if (zsize == 1)\n    return ((darray[0] | darray[1]) != 0 ? -ret : 0);\n\n  RETURN_CMP (zp[zsize-2], darray[1]);\n  if (zsize == 2)\n    return (darray[0] != 0 ? -ret : 0);\n\n  RETURN_CMP (zp[zsize-3], darray[0]);\n  RETURN_NONZERO (zp, zsize-3, ret);\n#endif\n\n#if LIMBS_PER_DOUBLE >= 4\n  {\n    int i;\n    for (i = 1; i <= LIMBS_PER_DOUBLE; i++)\n      {\n\tRETURN_CMP (zp[zsize-i], darray[LIMBS_PER_DOUBLE-i]);\n\tif (i >= zsize)\n\t  RETURN_NONZERO (darray, LIMBS_PER_DOUBLE-i, -ret);\n      }\n    RETURN_NONZERO (zp, zsize-LIMBS_PER_DOUBLE, ret);\n  }\n#endif\n}\n"
  },
  {
    "path": "mpz/cmp_si.c",
    "content": "/* mpz_cmp_si(u,v) -- Compare an integer U with a single-word int V.\n   Return positive, zero, or negative based on if U > V, U == V, or U < V.\n\nCopyright 1991, 1993, 1994, 1995, 1996, 2000, 2001, 2002 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\n_mpz_cmp_si (mpz_srcptr u, mpir_si v_digit)\n{\n  mp_size_t usize = u->_mp_size;\n  mp_size_t vsize;\n  mp_limb_t u_digit;\n\n#if GMP_NAIL_BITS != 0\n  /* FIXME.  This isn't very pretty.  */\n  mpz_t tmp;\n  mp_limb_t tt[2];\n  PTR(tmp) = tt;\n  ALLOC(tmp) = 2;\n  mpz_set_si (tmp, v_digit);\n  return mpz_cmp (u, tmp);\n#endif\n\n  vsize = 0;\n  if (v_digit > 0)\n    vsize = 1;\n  else if (v_digit < 0)\n    {\n      vsize = -1;\n      v_digit = -v_digit;\n    }\n\n  if (usize != vsize)\n    return usize - vsize;\n\n  if (usize == 0)\n    return 0;\n\n  u_digit = u->_mp_d[0];\n\n  if (u_digit == (mp_limb_t) (mpir_ui) v_digit)\n    return 0;\n\n  if (u_digit > (mp_limb_t) (mpir_ui) v_digit)\n    return usize;\n  else\n    return -usize;\n}\n"
  },
  {
    "path": "mpz/cmp_ui.c",
    "content": "/* mpz_cmp_ui.c -- Compare a mpz_t a with an mp_limb_t b.  Return positive,\n  zero, or negative based on if a > b, a == b, or a < b.\n\nCopyright 1991, 1993, 1994, 1995, 1996, 2001, 2002 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\n_mpz_cmp_ui (mpz_srcptr u, mpir_ui v_digit)\n{\n  mp_ptr up;\n  mp_size_t un;\n  mp_limb_t ul;\n\n  up = PTR(u);\n  un = SIZ(u);\n\n  if (un == 0)\n    return -(v_digit != 0);\n\n  if (un == 1)\n    {\n      ul = up[0];\n      if (ul > v_digit)\n\treturn 1;\n      if (ul < v_digit)\n\treturn -1;\n      return 0;\n    }\n\n#if GMP_NAIL_BITS != 0\n  if (v_digit > GMP_NUMB_MAX)\n    {\n      if (un == 2)\n\t{\n\t  ul = up[0] + (up[1] << GMP_NUMB_BITS);\n\n\t  if ((up[1] >> GMP_NAIL_BITS) != 0)\n\t    return 1;\n\n\t  if (ul > v_digit)\n\t    return 1;\n\t  if (ul < v_digit)\n\t    return -1;\n\t  return 0;\n\t}\n    }\n#endif\n\n  return un > 0 ? 1 : -1;\n}\n"
  },
  {
    "path": "mpz/cmpabs.c",
    "content": "/* mpz_cmpabs(u,v) -- Compare U, V.  Return positive, zero, or negative\n   based on if U > V, U == V, or U < V.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\nmpz_cmpabs (mpz_srcptr u, mpz_srcptr v)\n{\n  mp_size_t  usize, vsize, dsize;\n  mp_srcptr  up, vp;\n  int        cmp;\n\n  usize = ABSIZ (u);\n  vsize = ABSIZ (v);\n  dsize = usize - vsize;\n  if (dsize != 0)\n    return dsize;\n\n  up = PTR(u);\n  vp = PTR(v);\n  MPN_CMP (cmp, up, vp, usize);\n  return cmp;\n}\n"
  },
  {
    "path": "mpz/cmpabs_d.c",
    "content": "/* mpz_cmpabs_d -- compare absolute values of mpz and double.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_FLOAT_H\n#include <float.h>  /* for DBL_MAX */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n#define RETURN_CMP(zl, dl)              \\\n  do {                                  \\\n    zlimb = (zl);                       \\\n    dlimb = (dl);                       \\\n    if (zlimb != dlimb)                 \\\n      return (zlimb >= dlimb ? 1 : -1); \\\n  } while (0)\n\n#define RETURN_NONZERO(ptr, size, val)          \\\n  do {                                          \\\n    mp_size_t __i;                              \\\n    for (__i = (size)-1; __i >= 0; __i--)       \\\n      if ((ptr)[__i] != 0)                      \\\n        return val;                             \\\n    return 0;                                   \\\n  } while (0)\n\n\nint\nmpz_cmpabs_d (mpz_srcptr z, double d)\n{\n  mp_limb_t  darray[LIMBS_PER_DOUBLE], zlimb, dlimb;\n  mp_srcptr  zp;\n  mp_size_t  zsize;\n  int        dexp;\n\n  /* d=NaN is an invalid operation, there's no sensible return value.\n     d=Inf or -Inf is always bigger than z.  */\n  DOUBLE_NAN_INF_ACTION (d, __gmp_invalid_operation (), return -1);\n\n  /* 1. Check for either operand zero. */\n  zsize = SIZ(z);\n  if (d == 0.0)\n    return (zsize != 0);\n  if (zsize == 0)\n    return (d != 0 ? -1 : 0);\n\n  /* 2. Ignore signs. */\n  zsize = ABS(zsize);\n  d = ABS(d);\n\n  /* 3. Small d, knowing abs(z) >= 1. */\n  if (d < 1.0)\n    return 1;\n\n  dexp = __gmp_extract_double (darray, d);\n  ASSERT (dexp >= 1);\n\n  /* 4. Check for different high limb positions. */\n  if (zsize != dexp)\n    return (zsize >= dexp ? 1 : -1);\n\n  /* 5. Limb data. */\n  zp = PTR(z);\n\n#if LIMBS_PER_DOUBLE == 2\n  RETURN_CMP (zp[zsize-1], darray[1]);\n  if (zsize == 1)\n    return (darray[0] != 0 ? -1 : 0);\n\n  RETURN_CMP (zp[zsize-2], darray[0]);\n  RETURN_NONZERO (zp, zsize-2, 1);\n#endif\n\n#if LIMBS_PER_DOUBLE == 3\n  RETURN_CMP (zp[zsize-1], darray[2]);\n  if (zsize == 1)\n    return ((darray[0] | darray[1]) != 0 ? -1 : 0);\n\n  RETURN_CMP (zp[zsize-2], darray[1]);\n  if (zsize == 2)\n    return (darray[0] != 0 ? -1 : 0);\n\n  RETURN_CMP (zp[zsize-3], darray[0]);\n  RETURN_NONZERO (zp, zsize-3, 1);\n#endif\n\n#if LIMBS_PER_DOUBLE >= 4\n  {\n    int i;\n    for (i = 1; i <= LIMBS_PER_DOUBLE; i++)\n      {\n\tRETURN_CMP (zp[zsize-i], darray[LIMBS_PER_DOUBLE-i]);\n\tif (i >= zsize)\n\t  RETURN_NONZERO (darray, LIMBS_PER_DOUBLE-i, -1);\n      }\n    RETURN_NONZERO (zp, zsize-LIMBS_PER_DOUBLE, 1);\n  }\n#endif\n}\n"
  },
  {
    "path": "mpz/cmpabs_ui.c",
    "content": "/* mpz_cmpabs_ui.c -- Compare a mpz_t a with an mp_limb_t b.  Return positive,\n  zero, or negative based on if a > b, a == b, or a < b.\n\nCopyright 1991, 1993, 1994, 1995, 1997, 2000, 2001, 2002 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpz_cmpabs_ui (mpz_srcptr u, mpir_ui v_digit)\n{\n  mp_ptr up;\n  mp_size_t un;\n  mp_limb_t ul;\n\n  up = PTR(u);\n  un = SIZ(u);\n\n  if (un == 0)\n    return -(v_digit != 0);\n\n  un = ABS (un);\n\n  if (un == 1)\n    {\n      ul = up[0];\n      if (ul > v_digit)\n\treturn 1;\n      if (ul < v_digit)\n\treturn -1;\n      return 0;\n    }\n\n#if GMP_NAIL_BITS != 0\n  if (v_digit > GMP_NUMB_MAX)\n    {\n      if (un == 2)\n\t{\n\t  ul = up[0] + (up[1] << GMP_NUMB_BITS);\n\n\t  if (ul > v_digit)\n\t    return 1;\n\t  if (ul < v_digit)\n\t    return -1;\n\t  return 0;\n\t}\n    }\n#endif\n\n  return 1;\n}\n"
  },
  {
    "path": "mpz/com.c",
    "content": "/* mpz_com(mpz_ptr dst, mpz_ptr src) -- Assign the bit-complemented value of\n   SRC to DST.\n\nCopyright 1991, 1993, 1994, 1996, 2001, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_com (mpz_ptr dst, mpz_srcptr src)\n{\n  mp_size_t size = src->_mp_size;\n  mp_srcptr src_ptr;\n  mp_ptr dst_ptr;\n\n  if (size >= 0)\n    {\n      /* As with infinite precision: one's complement, two's complement.\n\t But this can be simplified using the identity -x = ~x + 1.\n\t So we're going to compute (~~x) + 1 = x + 1!  */\n\n      if (dst->_mp_alloc < size + 1)\n\t_mpz_realloc (dst, size + 1);\n\n      src_ptr = src->_mp_d;\n      dst_ptr = dst->_mp_d;\n\n      if (UNLIKELY (size == 0))\n\t{\n\t  /* special case, as mpn_add_1 wants size!=0 */\n\t  dst_ptr[0] = 1;\n\t  dst->_mp_size = -1;\n\t  return;\n\t}\n\n      {\n\tmp_limb_t cy;\n\n\tcy = mpn_add_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);\n\tif (cy)\n\t  {\n\t    dst_ptr[size] = cy;\n\t    size++;\n\t  }\n      }\n\n      /* Store a negative size, to indicate ones-extension.  */\n      dst->_mp_size = -size;\n    }\n  else\n    {\n      /* As with infinite precision: two's complement, then one's complement.\n\t But that can be simplified using the identity -x = ~(x - 1).\n\t So we're going to compute ~~(x - 1) = x - 1!  */\n      size = -size;\n\n      if (dst->_mp_alloc < size)\n\t_mpz_realloc (dst, size);\n\n      src_ptr = src->_mp_d;\n      dst_ptr = dst->_mp_d;\n\n      mpn_sub_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);\n      size -= dst_ptr[size - 1] == 0;\n\n      /* Store a positive size, to indicate zero-extension.  */\n      dst->_mp_size = size;\n    }\n}\n"
  },
  {
    "path": "mpz/combit.c",
    "content": "/* mpz_combit -- complement a specified bit.\n\nCopyright 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_combit (mpz_ptr d, mp_bitcnt_t bit_index)\n{\n  mp_size_t dsize = ABSIZ(d);\n  mp_ptr dp = LIMBS(d);\n  \n  mp_size_t limb_index = bit_index / GMP_NUMB_BITS;\n  mp_limb_t bit = ((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));\n\n  if (limb_index >= dsize)\n    {\n      MPZ_REALLOC(d, limb_index + 1);\n      dp = LIMBS(d);\n      \n      MPN_ZERO(dp + dsize, limb_index + 1 - dsize);\n      dsize = limb_index + 1;\n    }\n    \n  if (SIZ(d) >= 0)\n    {\n      dp[limb_index] ^= bit;\n      MPN_NORMALIZE (dp, dsize);\n      SIZ(d) = dsize;\n    }\n  else\n    {\n      mp_limb_t x = -dp[limb_index];\n      mp_size_t i;\n\n      /* non-zero limb below us means ones-complement */\n      for (i = limb_index-1; i >= 0; i--)\n        if (dp[i] != 0)\n          {\n            x--;  /* change twos comp to ones comp */\n            break;\n          }\n\n      if (x & bit)\n\t{\n          mp_limb_t  c;\n\n\t  /* Clearing the bit increases the magitude. We might need a carry. */\n\t  MPZ_REALLOC(d, dsize + 1);\n\t  dp = LIMBS(d);\n\n          __GMPN_ADD_1 (c, dp+limb_index, dp+limb_index,\n                        dsize - limb_index, bit);\n          dp[dsize] = c;\n          dsize += c;\n        }\n      else\n\t/* Setting the bit decreases the magnitude */\n\tmpn_sub_1(dp+limb_index, dp+limb_index, dsize + limb_index, bit);\n\n      MPN_NORMALIZE (dp, dsize);\n      SIZ(d) = -dsize;\n    }\n}\n"
  },
  {
    "path": "mpz/cong.c",
    "content": "/* mpz_congruent_p -- test congruence of two mpz's.\n\nCopyright 2001, 2002, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* For big divisors this code is only very slightly better than the user\n   doing a combination of mpz_sub and mpz_tdiv_r, but it's quite convenient,\n   and perhaps in the future can be improved, in similar ways to\n   mpn_divisible_p perhaps.\n\n   The csize==1 / dsize==1 special case makes mpz_congruent_p as good as\n   mpz_congruent_ui_p on relevant operands, though such a combination\n   probably doesn't occur often.\n\n   Alternatives:\n\n   If c<d then it'd work to just form a%d and compare a and c (either as\n   a==c or a+c==d depending on the signs), but the saving from avoiding the\n   abs(a-c) calculation would be small compared to the division.\n\n   Similarly if both a<d and c<d then it would work to just compare a and c\n   (a==c or a+c==d), but this isn't considered a particularly important case\n   and so isn't done for the moment.\n\n   Low zero limbs on d could be stripped and the corresponding limbs of a\n   and c tested and skipped, but doing so would introduce a borrow when a\n   and c differ in sign and have non-zero skipped limbs.  It doesn't seem\n   worth the complications to do this, since low zero limbs on d should\n   occur only rarely.  */\n\nint\nmpz_congruent_p (mpz_srcptr a, mpz_srcptr c, mpz_srcptr d)\n{\n  mp_size_t  asize, csize, dsize, sign;\n  mp_srcptr  ap, cp, dp;\n  mp_ptr     xp;\n  mp_limb_t  alow, clow, dlow, dmask, r;\n  int        result;\n  TMP_DECL;\n\n  dsize = SIZ(d);\n  if (UNLIKELY (dsize == 0))\n    return (mpz_cmp (a, c) == 0);\n\n  dsize = ABS(dsize);\n  dp = PTR(d);\n\n  if (ABSIZ(a) < ABSIZ(c))\n    MPZ_SRCPTR_SWAP (a, c);\n\n  asize = SIZ(a);\n  csize = SIZ(c);\n  sign = (asize ^ csize);\n\n  asize = ABS(asize);\n  ap = PTR(a);\n\n  if (csize == 0)\n    return mpn_divisible_p (ap, asize, dp, dsize);\n\n  csize = ABS(csize);\n  cp = PTR(c);\n\n  alow = ap[0];\n  clow = cp[0];\n  dlow = dp[0];\n\n  /* Check a==c mod low zero bits of dlow.  This might catch a few cases of\n     a!=c quickly, and it helps the csize==1 special cases below.  */\n  dmask = LOW_ZEROS_MASK (dlow) & GMP_NUMB_MASK;\n  alow = (sign >= 0 ? alow : -alow);\n  if (((alow-clow) & dmask) != 0)\n    return 0;\n\n  if (csize == 1)\n    {\n      if (dsize == 1)\n        {\n        cong_1:\n          if (sign < 0)\n            NEG_MOD (clow, clow, dlow);\n\n          if (BELOW_THRESHOLD (asize, MODEXACT_1_ODD_THRESHOLD))\n            {\n              r = mpn_mod_1 (ap, asize, dlow);\n              if (clow < dlow)\n                return r == clow;\n              else\n                return r == (clow % dlow);\n            }\n\n          if ((dlow & 1) == 0)\n            {\n              /* Strip low zero bits to get odd d required by modexact.  If\n                 d==e*2^n then a==c mod d if and only if both a==c mod e and\n                 a==c mod 2^n, the latter having been done above.  */\n              unsigned  twos;\n              count_trailing_zeros (twos, dlow);\n              dlow >>= twos;\n            }\n\n          r = mpn_modexact_1c_odd (ap, asize, dlow, clow);\n          return r == 0 || r == dlow;\n        }\n\n      /* dlow==0 is avoided since we don't want to bother handling extra low\n         zero bits if dsecond is even (would involve borrow if a,c differ in\n         sign and alow,clow!=0).  */\n      if (dsize == 2 && dlow != 0)\n        {\n          mp_limb_t  dsecond = dp[1];\n\n          if (dsecond <= dmask)\n            {\n              unsigned   twos;\n              count_trailing_zeros (twos, dlow);\n              dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));\n              ASSERT_LIMB (dlow);\n\n              /* dlow will be odd here, so the test for it even under cong_1\n                 is unnecessary, but the rest of that code is wanted. */\n              goto cong_1;\n            }\n        }\n    }\n\n  TMP_MARK;\n  xp = TMP_ALLOC_LIMBS (asize+1);\n\n  /* calculate abs(a-c) */\n  if (sign >= 0)\n    {\n      /* same signs, subtract */\n      if (asize > csize || mpn_cmp (ap, cp, asize) >= 0)\n        ASSERT_NOCARRY (mpn_sub (xp, ap, asize, cp, csize));\n      else\n        ASSERT_NOCARRY (mpn_sub_n (xp, cp, ap, asize));\n      MPN_NORMALIZE (xp, asize);\n    }\n  else\n    {\n      /* different signs, add */\n      mp_limb_t  carry;\n      carry = mpn_add (xp, ap, asize, cp, csize);\n      xp[asize] = carry;\n      asize += (carry != 0);\n    }\n\n  result = mpn_divisible_p (xp, asize, dp, dsize);\n\n  TMP_FREE;\n  return result;\n}\n"
  },
  {
    "path": "mpz/cong_2exp.c",
    "content": "/* mpz_congruent_2exp_p -- test congruence of mpz mod 2^n.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\nmpz_congruent_2exp_p (mpz_srcptr a, mpz_srcptr c, mp_bitcnt_t d)\n{\n  mp_size_t      i, dlimbs;\n  unsigned long  dbits;\n  mp_ptr         ap, cp;\n  mp_limb_t      dmask, alimb, climb, sum;\n  mp_size_t      asize_signed, csize_signed, asize, csize;\n\n  if (ABSIZ(a) < ABSIZ(c))\n    MPZ_SRCPTR_SWAP (a, c);\n\n  dlimbs = d / GMP_NUMB_BITS;\n  dbits = d % GMP_NUMB_BITS;\n  dmask = (CNST_LIMB(1) << dbits) - 1;\n\n  ap = PTR(a);\n  cp = PTR(c);\n\n  asize_signed = SIZ(a);\n  asize = ABS(asize_signed);\n\n  csize_signed = SIZ(c);\n  csize = ABS(csize_signed);\n\n  if (csize_signed == 0)\n    goto a_zeros;\n\n  if ((asize_signed ^ csize_signed) >= 0)\n    {\n      /* same signs, direct comparison */\n\n      /* a==c for limbs in common */\n      if (mpn_cmp (ap, cp, MIN (csize, dlimbs)) != 0)\n        return 0;\n\n      /* if that's all of dlimbs, then a==c for remaining bits */\n      if (csize > dlimbs)\n        return ((ap[dlimbs]-cp[dlimbs]) & dmask) == 0;\n\n    a_zeros:\n      /* a remains, need all zero bits */\n\n      /* if d covers all of a and c, then must be exactly equal */\n      if (asize <= dlimbs)\n        return asize == csize;\n\n      /* whole limbs zero */\n      for (i = csize; i < dlimbs; i++)\n        if (ap[i] != 0)\n          return 0;\n\n      /* partial limb zero */\n      return (ap[dlimbs] & dmask) == 0;\n    }\n  else\n    {\n      /* different signs, negated comparison */\n\n      /* common low zero limbs, stopping at first non-zeros, which must\n         match twos complement */\n      i = 0;\n      for (;;)\n        {\n          ASSERT (i < csize);  /* always have a non-zero limb on c */\n          alimb = ap[i];\n          climb = cp[i];\n          sum = (alimb + climb) & GMP_NUMB_MASK;\n\n          if (i >= dlimbs)\n            return (sum & dmask) == 0;\n          i++;\n\n          /* require both zero, or first non-zeros as twos-complements */\n          if (sum != 0)\n            return 0;\n\n          if (alimb != 0)\n            break;\n        }\n\n      /* further limbs matching as ones-complement */\n      for (;;)\n        {\n          if (i >= csize)\n            break;\n\n          alimb = ap[i];\n          climb = cp[i];\n          sum = (alimb + climb + 1) & GMP_NUMB_MASK;\n\n          if (i >= dlimbs)\n            return (sum & dmask) == 0;\n\n          if (sum != 0)\n            return 0;\n\n          i++;\n        }\n\n      /* no more c, so require all 1 bits in a */\n\n      if (asize < dlimbs)\n        return 0;   /* not enough a */\n\n      /* whole limbs */\n      for ( ; i < dlimbs; i++)\n        if (ap[i] != GMP_NUMB_MAX)\n          return 0;\n\n      /* if only whole limbs, no further fetches from a */\n      if (dbits == 0)\n        return 1;\n\n      /* need enough a */\n      if (asize == dlimbs)\n        return 0;\n\n      return ((ap[dlimbs]+1) & dmask) == 0;\n    }\n}\n"
  },
  {
    "path": "mpz/cong_ui.c",
    "content": "/* mpz_congruent_ui_p -- test congruence of mpz and ulong.\n\nCopyright 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* There's some explicit checks for c<d since it seems reasonably likely an\n   application might use that in a test.\n\n   Hopefully the compiler can generate something good for r==(c%d), though\n   if modexact is being used exclusively then that's not reached.  */\n\nint\nmpz_congruent_ui_p (mpz_srcptr a, mpir_ui cu, mpir_ui du)\n{\n  mp_srcptr  ap;\n  mp_size_t  asize;\n  mp_limb_t  c, d, r;\n\n  if (UNLIKELY (du == 0))\n    return (mpz_cmp_ui (a, cu) == 0);\n\n  asize = SIZ(a);\n  if (asize == 0)\n    {\n      if (cu < du)\n        return cu == 0;\n      else\n        return (cu % du) == 0;\n    }\n\n  /* For nails don't try to be clever if c or d is bigger than a limb, just\n     fake up some mpz_t's and go to the main mpz_congruent_p.  */\n  if (du > GMP_NUMB_MAX || cu > GMP_NUMB_MAX)\n    {\n      mp_limb_t  climbs[2], dlimbs[2];\n      mpz_t      cz, dz;\n\n      ALLOC(cz) = 2;\n      PTR(cz) = climbs;\n      ALLOC(dz) = 2;\n      PTR(dz) = dlimbs;\n\n      mpz_set_ui (cz, cu);\n      mpz_set_ui (dz, du);\n      return mpz_congruent_p (a, cz, dz);\n    }\n\n  /* NEG_MOD works on limbs, so convert ulong to limb */\n  c = cu;\n  d = du;\n\n  if (asize < 0)\n    {\n      asize = -asize;\n      NEG_MOD (c, c, d);\n    }\n\n  ap = PTR (a);\n\n  if (BELOW_THRESHOLD (asize, MODEXACT_1_ODD_THRESHOLD))\n    {\n      r = mpn_mod_1 (ap, asize, d);\n      if (c < d)\n        return r == c;\n      else\n        return r == (c % d);\n    }\n\n  if ((d & 1) == 0)\n    {\n      /* Strip low zero bits to get odd d required by modexact.  If\n         d==e*2^n then a==c mod d if and only if both a==c mod 2^n\n         and a==c mod e.  */\n\n      unsigned  twos;\n\n      if ((ap[0]-c) & LOW_ZEROS_MASK (d))\n        return 0;\n\n      count_trailing_zeros (twos, d);\n      d >>= twos;\n    }\n\n  r = mpn_modexact_1c_odd (ap, asize, d, c);\n  return r == 0 || r == d;\n}\n"
  },
  {
    "path": "mpz/dive_ui.c",
    "content": "/* mpz_divexact_ui -- exact division mpz by ulong.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_divexact_ui (mpz_ptr dst, mpz_srcptr src, mpir_ui divisor)\n{\n  mp_size_t  size, abs_size;\n  mp_ptr     dst_ptr;\n\n  if (divisor == 0)\n    DIVIDE_BY_ZERO;\n\n  /* For nails don't try to be clever if d is bigger than a limb, just fake\n     up an mpz_t and go to the main mpz_divexact.  */\n  if (divisor > GMP_NUMB_MAX)\n    {\n      mp_limb_t  dlimbs[2];\n      mpz_t      dz;\n      ALLOC(dz) = 2;\n      PTR(dz) = dlimbs;\n      mpz_set_ui (dz, divisor);\n      mpz_divexact (dst, src, dz);\n      return;\n    }\n\n  size = SIZ(src);\n  if (size == 0)\n    {\n      SIZ(dst) = 0;\n      return;\n    }\n  abs_size = ABS (size);\n\n  MPZ_REALLOC (dst, abs_size);\n  dst_ptr = PTR(dst);\n\n  MPN_DIVREM_OR_DIVEXACT_1 (dst_ptr, PTR(src), abs_size, (mp_limb_t) divisor);\n  abs_size -= (dst_ptr[abs_size-1] == 0);\n  SIZ(dst) = (size >= 0 ? abs_size : -abs_size);\n}\n"
  },
  {
    "path": "mpz/divegcd.c",
    "content": "/* mpz_divexact_gcd -- exact division optimized for GCDs.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO\n   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.\n\nCopyright 2000, 2005, 2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* Set q to a/d, expecting d to be from a GCD and therefore usually small.\n\n   The distribution of GCDs of random numbers can be found in Knuth volume 2\n   section 4.5.2 theorem D.\n\n            GCD     chance\n             1       60.8%\n            2^k      20.2%     (1<=k<32)\n           3*2^k      9.0%     (1<=k<32)\n           other     10.1%\n\n   Only the low limb is examined for optimizations, since GCDs bigger than\n   2^32 (or 2^64) will occur very infrequently.\n\n   Future: This could change to an mpn_divexact_gcd, possibly partly\n   inlined, if/when the relevant mpq functions change to an mpn based\n   implementation.  */\n\n\n#if GMP_NUMB_BITS % 2 == 0\nstatic void\nmpz_divexact_by3 (mpz_ptr q, mpz_srcptr a)\n{\n  mp_size_t  size = SIZ(a);\n  mp_size_t  abs_size = ABS(size);\n  mp_ptr     qp;\n\n  qp = MPZ_REALLOC (q, abs_size);\n\n  mpn_divexact_byfobm1 (qp, PTR(a), abs_size, 3, GMP_NUMB_MASK / 3);\n\n  abs_size -= (qp[abs_size-1] == 0);\n  SIZ(q) = (size>0 ? abs_size : -abs_size);\n}\n#endif\n\n#if GMP_NUMB_BITS % 4 == 0\nstatic void\nmpz_divexact_by5 (mpz_ptr q, mpz_srcptr a)\n{\n  mp_size_t  size = SIZ(a);\n  mp_size_t  abs_size = ABS(size);\n  mp_ptr     qp;\n\n  qp = MPZ_REALLOC (q, abs_size);\n\n  mpn_divexact_byfobm1 (qp, PTR(a), abs_size, 5, GMP_NUMB_MASK / 5);\n\n  abs_size -= (qp[abs_size-1] == 0);\n  SIZ(q) = (size>0 ? abs_size : -abs_size);\n}\n#endif\n\nstatic void\nmpz_divexact_limb (mpz_ptr q, mpz_srcptr a, mp_limb_t d)\n{\n  mp_size_t  size = SIZ(a);\n  mp_size_t  abs_size = ABS(size);\n  mp_ptr     qp;\n\n  qp = MPZ_REALLOC (q, abs_size);\n\n  mpn_divexact_1 (qp, PTR(a), abs_size, d);\n\n  abs_size -= (qp[abs_size-1] == 0);\n  SIZ(q) = (size>0 ? abs_size : -abs_size);\n}\n\nvoid\nmpz_divexact_gcd (mpz_ptr q, mpz_srcptr a, mpz_srcptr d)\n{\n  ASSERT (mpz_sgn (d) > 0);\n\n  if (SIZ(a) == 0)\n    {\n      SIZ(q) = 0;\n      return;\n    }\n\n  if (SIZ(d) == 1)\n    {\n      mp_limb_t  dl = PTR(d)[0];\n      int        twos;\n\n      if ((dl & 1) == 0)\n\t{\n\t  count_trailing_zeros (twos, dl);\n\t  dl >>= twos;\n\t  mpz_tdiv_q_2exp (q, a, twos);\n\t  a = q;\n\t}\n\n      if (dl == 1)\n\t{\n\t  if (q != a)\n\t    mpz_set (q, a);\n\t  return;\n\t}\n#if GMP_NUMB_BITS % 2 == 0\n      if (dl == 3)\n\t{\n\t  mpz_divexact_by3 (q, a);\n\t  return;\n\t}\n#endif\n#if GMP_NUMB_BITS % 4 == 0\n      if (dl == 5)\n\t{\n\t  mpz_divexact_by5 (q, a);\n\t  return;\n\t}\n#endif\n\n      mpz_divexact_limb (q, a, dl);\n      return;\n    }\n\n  mpz_divexact (q, a, d);\n}\n"
  },
  {
    "path": "mpz/divexact.c",
    "content": "/* mpz_divexact -- finds quotient when known that quot * den == num && den != 0.\n\nContributed to the GNU project by Niels Möller.\n\nCopyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2005,\n2006, 2007, 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nvoid\nmpz_divexact (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)\n{\n  mp_ptr qp;\n  mp_size_t qn;\n  mp_srcptr np, dp;\n  mp_size_t nn, dn;\n  TMP_DECL;\n\n#if WANT_ASSERT\n  {\n    mpz_t  rem;\n    mpz_init (rem);\n    mpz_tdiv_r (rem, num, den);\n    ASSERT (SIZ(rem) == 0);\n    mpz_clear (rem);\n  }\n#endif\n\n  nn = ABSIZ (num);\n  dn = ABSIZ (den);\n\n  qn = nn - dn + 1;\n  MPZ_REALLOC (quot, qn);\n\n  if (nn < dn)\n    {\n      /* This special case avoids segfaults below when the function is\n\t incorrectly called with |N| < |D|, N != 0.  It also handles the\n\t well-defined case N = 0.  */\n      SIZ(quot) = 0;\n      return;\n    }\n\n  TMP_MARK;\n\n  qp = PTR(quot);\n\n  if (quot == num || quot == den)\n    qp = TMP_ALLOC_LIMBS (qn);\n\n  np = PTR(num);\n  dp = PTR(den);\n\n  mpn_divexact (qp, np, nn, dp, dn);\n  MPN_NORMALIZE (qp, qn);\n\n  SIZ(quot) = (SIZ(num) ^ SIZ(den)) >= 0 ? qn : -qn;\n\n  if (qp != PTR(quot))\n    MPN_COPY (PTR(quot), qp, qn);\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/divis.c",
    "content": "/* mpz_divisible_p -- mpz by mpz divisibility test\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpz_divisible_p (mpz_srcptr a, mpz_srcptr d)\n{\n  mp_size_t dsize = SIZ(d);\n  mp_size_t asize = SIZ(a);\n\n  if (UNLIKELY (dsize == 0))\n    return (asize == 0);\n\n  return mpn_divisible_p (PTR(a), ABS(asize), PTR(d), ABS(dsize));\n}\n"
  },
  {
    "path": "mpz/divis_2exp.c",
    "content": "/* mpz_divisible_2exp_p -- mpz by 2^n divisibility test\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\nmpz_divisible_2exp_p (mpz_srcptr a, mp_bitcnt_t d)\n{\n  mp_size_t      i, dlimbs;\n  unsigned long  dbits;\n  mp_ptr         ap;\n  mp_limb_t      dmask;\n  mp_size_t      asize;\n\n  asize = ABSIZ(a);\n  dlimbs = d / GMP_NUMB_BITS;\n\n  /* if d covers the whole of a, then only a==0 is divisible */\n  if (asize <= dlimbs)\n    return asize == 0;\n\n  /* whole limbs must be zero */\n  ap = PTR(a);\n  for (i = 0; i < dlimbs; i++)\n    if (ap[i] != 0)\n      return 0;\n\n  /* left over bits must be zero */\n  dbits = d % GMP_NUMB_BITS;\n  dmask = (CNST_LIMB(1) << dbits) - 1;\n  return (ap[dlimbs] & dmask) == 0;\n}\n"
  },
  {
    "path": "mpz/divis_ui.c",
    "content": "/* mpz_divisible_ui_p -- mpz by ulong divisibility test.\n\nCopyright 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\nint\nmpz_divisible_ui_p (mpz_srcptr a, mpir_ui d)\n{\n  mp_size_t  asize;\n  mp_ptr     ap;\n  unsigned   twos;\n\n  asize = SIZ(a);\n  if (UNLIKELY (d == 0))\n    return (asize == 0);\n\n  if (asize == 0)  /* 0 divisible by any d */\n    return 1;\n\n  /* For nails don't try to be clever if d is bigger than a limb, just fake\n     up an mpz_t and go to the main mpz_divisible_p.  */\n  if (d > GMP_NUMB_MAX)\n    {\n      mp_limb_t  dlimbs[2];\n      mpz_t      dz;\n      ALLOC(dz) = 2;\n      PTR(dz) = dlimbs;\n      mpz_set_ui (dz, d);\n      return mpz_divisible_p (a, dz);\n    }\n\n  ap = PTR(a);\n  asize = ABS(asize);  /* ignore sign of a */\n\n  if (BELOW_THRESHOLD (asize, MODEXACT_1_ODD_THRESHOLD))\n    return mpn_mod_1 (ap, asize, (mp_limb_t) d) == 0;\n\n  if (! (d & 1))\n    {\n      /* Strip low zero bits to get odd d required by modexact.  If d==e*2^n\n         and a is divisible by 2^n and by e, then it's divisible by d. */\n\n      if ((ap[0] & LOW_ZEROS_MASK (d)) != 0)\n        return 0;\n\n      count_trailing_zeros (twos, (mp_limb_t) d);\n      d >>= twos;\n    }\n\n  return mpn_modexact_1_odd (ap, asize, (mp_limb_t) d) == 0;\n}\n"
  },
  {
    "path": "mpz/dump.c",
    "content": "/* mpz_dump - Dump an integer to stdout.\n\n   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS NOT SAFE TO\n   CALL THIS FUNCTION DIRECTLY.  IN FACT, IT IS ALMOST GUARANTEED THAT THIS\n   FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\n\nCopyright 1999, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <string.h> /* for strlen */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_dump (mpz_srcptr u)\n{\n  char *str;\n\n  str = mpz_get_str (0, 10, u);\n  printf (\"%s\\n\", str);\n  (*__gmp_free_func) (str, strlen (str) + 1);\n}\n"
  },
  {
    "path": "mpz/export.c",
    "content": "/* mpz_export -- create word data from mpz.\n\nCopyright 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>  /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n#if HAVE_LIMB_BIG_ENDIAN\n#define HOST_ENDIAN     1\n#endif\n#if HAVE_LIMB_LITTLE_ENDIAN\n#define HOST_ENDIAN     (-1)\n#endif\n#ifndef HOST_ENDIAN\nstatic const mp_limb_t  endian_test = (CNST_LIMB(1) << (GMP_LIMB_BITS-7)) - 1;\n#define HOST_ENDIAN     (* (signed char *) &endian_test)\n#endif\n\nvoid *\nmpz_export (void *data, size_t *countp, int order,\n            size_t size, int endian, size_t nail, mpz_srcptr z)\n{\n  mp_size_t      zsize;\n  mp_srcptr      zp;\n  size_t         count, dummy;\n  mpir_ui         numb;\n  unsigned       align;\n\n  ASSERT (order == 1 || order == -1);\n  ASSERT (endian == 1 || endian == 0 || endian == -1);\n  ASSERT (nail <= 8*size);\n  ASSERT (8*size-nail > 0);\n\n  if (countp == NULL)\n    countp = &dummy;\n\n  zsize = SIZ(z);\n  if (zsize == 0)\n    {\n      *countp = 0;\n      return data;\n    }\n\n  zsize = ABS (zsize);\n  zp = PTR(z);\n  numb = 8*size - nail;\n  MPN_SIZEINBASE_2EXP (count, zp, zsize, numb);\n  *countp = count;\n\n  if (data == NULL)\n    data = (*__gmp_allocate_func) (count*size);\n\n  if (endian == 0)\n    endian = HOST_ENDIAN;\n\n  align = ((char *) data - (char *) NULL) % sizeof (mp_limb_t);\n\n  if (nail == GMP_NAIL_BITS)\n    {\n      if (size == sizeof (mp_limb_t) && align == 0)\n        {\n          if (order == -1 && endian == HOST_ENDIAN)\n            {\n              MPN_COPY ((mp_ptr) data, zp, (mp_size_t) count);\n              return data;\n            }\n          if (order == 1 && endian == HOST_ENDIAN)\n            {\n              MPN_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);\n              return data;\n            }\n\n          if (order == -1 && endian == -HOST_ENDIAN)\n            {\n              MPN_BSWAP ((mp_ptr) data, zp, (mp_size_t) count);\n              return data;\n            }\n          if (order == 1 && endian == -HOST_ENDIAN)\n            {\n              MPN_BSWAP_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);\n              return data;\n            }\n        }\n    }\n\n  {\n    mp_limb_t      limb, wbitsmask;\n    size_t         i, numb;\n    mp_size_t      j, wbytes, woffset;\n    unsigned char  *dp;\n    int            lbits, wbits;\n    mp_srcptr      zend;\n\n    numb = size * 8 - nail;\n\n    /* whole bytes per word */\n    wbytes = numb / 8;\n\n    /* possible partial byte */\n    wbits = numb % 8;\n    wbitsmask = (CNST_LIMB(1) << wbits) - 1;\n\n    /* offset to get to the next word */\n    woffset = (endian >= 0 ? size : - (mp_size_t) size)\n      + (order < 0 ? size : - (mp_size_t) size);\n\n    /* least significant byte */\n    dp = (unsigned char *) data\n      + (order >= 0 ? (count-1)*size : 0) + (endian >= 0 ? size-1 : 0);\n\n#define EXTRACT(N, MASK)                                \\\n    do {                                                \\\n      if (lbits >= (N))                                 \\\n        {                                               \\\n          *dp = limb MASK;                              \\\n          limb >>= (N);                                 \\\n          lbits -= (N);                                 \\\n        }                                               \\\n      else                                              \\\n        {                                               \\\n          mp_limb_t  newlimb;                           \\\n          newlimb = (zp == zend ? 0 : *zp++);           \\\n          *dp = (limb | (newlimb << lbits)) MASK;       \\\n          limb = newlimb >> ((N)-lbits);                \\\n          lbits += GMP_NUMB_BITS - (N);                 \\\n        }                                               \\\n    } while (0)\n\n    zend = zp + zsize;\n    lbits = 0;\n    limb = 0;\n    for (i = 0; i < count; i++)\n      {\n        for (j = 0; j < wbytes; j++)\n          {\n            EXTRACT (8, + 0);\n            dp -= endian;\n          }\n        if (wbits != 0)\n          {\n            EXTRACT (wbits, & wbitsmask);\n            dp -= endian;\n            j++;\n          }\n        for ( ; j < size; j++)\n          {\n            *dp = '\\0';\n            dp -= endian;\n          }\n        dp += woffset;\n      }\n\n    ASSERT (zp == PTR(z) + ABSIZ(z));\n\n    /* low byte of word after most significant */\n    ASSERT (dp == (unsigned char *) data\n            + (order < 0 ? count*size : - (mp_size_t) size)\n            + (endian >= 0 ? (mp_size_t) size - 1 : 0));\n  }\n  return data;\n}\n"
  },
  {
    "path": "mpz/fac_ui.c",
    "content": "/* mpz_fac_ui(RESULT, N) -- Set RESULT to N!.\n\nContributed to the GNU project by Marco Bodrato.\n\nCopyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003, 2011, 2012\nFree Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if GMP_LIMB_BITS == 64\n\n/* This table is 0!,1!,2!,3!,...,n! where n! has <= GMP_NUMB_BITS bits */\n#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x2),CNST_LIMB(0x6),CNST_LIMB(0x18),CNST_LIMB(0x78),CNST_LIMB(0x2d0),CNST_LIMB(0x13b0),CNST_LIMB(0x9d80),CNST_LIMB(0x58980),CNST_LIMB(0x375f00),CNST_LIMB(0x2611500),CNST_LIMB(0x1c8cfc00),CNST_LIMB(0x17328cc00),CNST_LIMB(0x144c3b2800),CNST_LIMB(0x13077775800),CNST_LIMB(0x130777758000),CNST_LIMB(0x1437eeecd8000),CNST_LIMB(0x16beecca730000),CNST_LIMB(0x1b02b9306890000),CNST_LIMB(0x21c3677c82b40000)\n\n#define TABLE_LIMIT_2N_MINUS_POPC_2N 81\n\n#else\n\n/* This table is 0!,1!,2!,3!,...,n! where n! has <= GMP_NUMB_BITS bits */\n#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x2),CNST_LIMB(0x6),CNST_LIMB(0x18),CNST_LIMB(0x78),CNST_LIMB(0x2d0),CNST_LIMB(0x13b0),CNST_LIMB(0x9d80),CNST_LIMB(0x58980),CNST_LIMB(0x375f00),CNST_LIMB(0x2611500),CNST_LIMB(0x1c8cfc00)\n\n#define TABLE_LIMIT_2N_MINUS_POPC_2N 49\n\n#endif\n\n#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)\t\t\\\n  do {\t\t\t\t\t\t\t\t\\\n    if ((PR) > (MAX_PR)) {\t\t\t\t\t\\\n      (VEC)[(I)++] = (PR);\t\t\t\t\t\\\n      (PR) = (P);\t\t\t\t\t\t\\\n    } else\t\t\t\t\t\t\t\\\n      (PR) *= (P);\t\t\t\t\t\t\\\n  } while (0)\n\n#if TUNE_PROGRAM_BUILD\n#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))\n#else\n#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_ODD_THRESHOLD)+1))\n#endif\n\n/* Computes n!, the factorial of n.\n   WARNING: it assumes that n fits in a limb!\n */\nvoid\nmpz_fac_ui (mpz_ptr x, mpir_ui n)\n{\n  static const mp_limb_t table[] = { ONE_LIMB_FACTORIAL_TABLE };\n\n  ASSERT (n <= GMP_NUMB_MAX);\n\n  if (n < numberof (table))\n    {\n      PTR (x)[0] = table[n];\n      SIZ (x) = 1;\n    }\n  else if (BELOW_THRESHOLD (n, FAC_ODD_THRESHOLD))\n    {\n      mp_limb_t prod, max_prod;\n      mp_size_t j;\n      mp_ptr    factors;\n      TMP_SDECL;\n\n      TMP_SMARK;\n      factors = TMP_SALLOC_LIMBS (2 + (n - numberof (table)) / FACTORS_PER_LIMB);\n\n      factors[0] = table[numberof (table)-1];\n      j = 1;\n      prod = n;\n#if TUNE_PROGRAM_BUILD\n      max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;\n#else\n      max_prod = GMP_NUMB_MAX / (FAC_ODD_THRESHOLD | 1);\n#endif\n      while (--n >= numberof (table))\n\tFACTOR_LIST_STORE (n, prod, max_prod, factors, j);\n\n      factors[j++] = prod;\n      mpz_prodlimbs (x, factors, j);\n\n      TMP_SFREE;\n    }\n  else\n    {\n      mp_limb_t count;\n      mpz_oddfac_1 (x, n, 0);\n      if (n <= TABLE_LIMIT_2N_MINUS_POPC_2N)\n\tcount = __gmp_fac2cnt_table[n / 2 - 1];\n      else\n\t{\n\t  popc_limb (count, n);\n\t  count = n - count;\n\t}\n      mpz_mul_2exp (x, x, count);\n    }\n}\n\n#undef FACTORS_PER_LIMB\n#undef FACTOR_LIST_STORE\n"
  },
  {
    "path": "mpz/fdiv_q.c",
    "content": "/* mpz_fdiv_q -- Division rounding the quotient towards -infinity.\n   The remainder gets the same sign as the denominator.\n\nCopyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_fdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)\n{\n  mp_size_t dividend_size = dividend->_mp_size;\n  mp_size_t divisor_size = divisor->_mp_size;\n  mpz_t rem;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  MPZ_TMP_INIT (rem, ABS (divisor_size));\n\n  mpz_tdiv_qr (quot, rem, dividend, divisor);\n\n  if ((divisor_size ^ dividend_size) < 0 && rem->_mp_size != 0)\n    mpz_sub_ui (quot, quot, 1L);\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/fdiv_q_ui.c",
    "content": "/* mpz_fdiv_q_ui -- Division rounding the quotient towards -infinity.\n   The remainder gets the same sign as the denominator.\n\nCopyright 1994, 1995, 1996, 1999, 2001, 2002, 2004 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpir_ui\nmpz_fdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, mpir_ui divisor)\n{\n  mp_size_t ns, nn, qn;\n  mp_ptr np, qp;\n  mp_limb_t rl;\n\n  if (divisor == 0)\n    DIVIDE_BY_ZERO;\n\n  ns = SIZ(dividend);\n  if (ns == 0)\n    {\n      SIZ(quot) = 0;\n      return 0;\n    }\n\n  nn = ABS(ns);\n  MPZ_REALLOC (quot, nn);\n  qp = PTR(quot);\n  np = PTR(dividend);\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (divisor > GMP_NUMB_MAX)\n    {\n      mp_limb_t dp[2], rp[2];\n\n      if (nn == 1)\t\t/* tdiv_qr requirements; tested above for 0 */\n\t{\n\t  qp[0] = 0;\n\t  rl = np[0];\n\t  qn = 1;\t\t/* a white lie, fixed below */\n\t}\n      else\n\t{\n\t  dp[0] = divisor & GMP_NUMB_MASK;\n\t  dp[1] = divisor >> GMP_NUMB_BITS;\n\t  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);\n\t  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);\n\t  qn = nn - 2 + 1; \n\t}\n\n      if (rl != 0 && ns < 0)\n\t{\n\t  mpn_incr_u (qp, (mp_limb_t) 1);\n\t  rl = divisor - rl;\n\t}\n\n      qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;\n    }\n  else\n#endif\n    {\n      rl = mpn_divrem_1 (qp, 0, np, nn, (mp_limb_t) divisor);\n\n      if (rl != 0 && ns < 0)\n\t{\n\t  mpn_incr_u (qp, (mp_limb_t) 1);\n\t  rl = divisor - rl;\n\t}\n\n      qn = nn - (qp[nn - 1] == 0);\n    }\n\n  SIZ(quot) = ns >= 0 ? qn : -qn;\n  return rl;\n}\n"
  },
  {
    "path": "mpz/fdiv_qr.c",
    "content": "/* mpz_fdiv_qr -- Division rounding the quotient towards -infinity.\n   The remainder gets the same sign as the denominator.\n\nCopyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_fdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)\n{\n  mp_size_t divisor_size = divisor->_mp_size;\n  mp_size_t xsize;\n  mpz_t temp_divisor;\t\t/* N.B.: lives until function returns! */\n  TMP_DECL;\n\n  TMP_MARK;\n\n  /* We need the original value of the divisor after the quotient and\n     remainder have been preliminary calculated.  We have to copy it to\n     temporary space if it's the same variable as either QUOT or REM.  */\n  if (quot == divisor || rem == divisor)\n    {\n      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));\n      mpz_set (temp_divisor, divisor);\n      divisor = temp_divisor;\n    }\n\n  xsize = dividend->_mp_size ^ divisor_size;;\n  mpz_tdiv_qr (quot, rem, dividend, divisor);\n\n  if (xsize < 0 && rem->_mp_size != 0)\n    {\n      mpz_sub_ui (quot, quot, 1L);\n      mpz_add (rem, rem, divisor);\n    }\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/fdiv_qr_ui.c",
    "content": "/* mpz_fdiv_qr_ui -- Division rounding the quotient towards -infinity.\n   The remainder gets the same sign as the denominator.\n\nCopyright 1994, 1995, 1996, 1999, 2001, 2002, 2004 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpir_ui\nmpz_fdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpir_ui divisor)\n{\n  mp_size_t ns, nn, qn;\n  mp_ptr np, qp;\n  mp_limb_t rl;\n\n  if (divisor == 0)\n    DIVIDE_BY_ZERO;\n\n  ns = SIZ(dividend);\n  if (ns == 0)\n    {\n      SIZ(quot) = 0;\n      SIZ(rem) = 0;\n      return 0;\n    }\n\n  nn = ABS(ns);\n  MPZ_REALLOC (quot, nn);\n  qp = PTR(quot);\n  np = PTR(dividend);\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (divisor > GMP_NUMB_MAX)\n    {\n      mp_limb_t dp[2];\n      mp_ptr rp;\n      mp_size_t rn;\n\n      MPZ_REALLOC (rem, 2);\n      rp = PTR(rem);\n\n      if (nn == 1)\t\t/* tdiv_qr requirements; tested above for 0 */\n\t{\n\t  qp[0] = 0;\n\t  qn = 1;\t\t/* a white lie, fixed below */\n\t  rl = np[0];\n\t  rp[0] = rl;\n\t}\n      else\n\t{\n\t  dp[0] = divisor & GMP_NUMB_MASK;\n\t  dp[1] = divisor >> GMP_NUMB_BITS;\n\t  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);\n\t  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);\n\t  qn = nn - 2 + 1; \n\t}\n\n      if (rl != 0 && ns < 0)\n\t{\n\t  mpn_incr_u (qp, (mp_limb_t) 1);\n\t  rl = divisor - rl;\n\t  rp[0] = rl & GMP_NUMB_MASK;\n\t  rp[1] = rl >> GMP_NUMB_BITS;\n\t}\n\n      qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;\n      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);\n      SIZ(rem) = rn;\n    }\n  else\n#endif\n    {\n      rl = mpn_divrem_1 (qp, 0, np, nn, (mp_limb_t) divisor);\n      if (rl == 0)\n\tSIZ(rem) = 0;\n      else\n\t{\n\t  if (ns < 0)\n\t    {\n\t      mpn_incr_u (qp, (mp_limb_t) 1);\n\t      rl = divisor - rl;\n\t    }\n\n\t  PTR(rem)[0] = rl;\n\t  SIZ(rem) = rl != 0;\n\t}\n      qn = nn - (qp[nn - 1] == 0);\n    }\n\n  SIZ(quot) = ns >= 0 ? qn : -qn;\n  return rl;\n}\n"
  },
  {
    "path": "mpz/fdiv_r.c",
    "content": "/* mpz_fdiv_r -- Division rounding the quotient towards -infinity.\n   The remainder gets the same sign as the denominator.\n\nCopyright 1994, 1995, 1996, 2001, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_fdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)\n{\n  mp_size_t divisor_size = divisor->_mp_size;\n  mpz_t temp_divisor;\t\t/* N.B.: lives until function returns! */\n  TMP_DECL;\n\n  TMP_MARK;\n\n  /* We need the original value of the divisor after the remainder has been\n     preliminary calculated.  We have to copy it to temporary space if it's\n     the same variable as REM.  */\n  if (rem == divisor)\n    {\n      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));\n      mpz_set (temp_divisor, divisor);\n      divisor = temp_divisor;\n    }\n\n  mpz_tdiv_r (rem, dividend, divisor);\n\n  if ((divisor_size ^ dividend->_mp_size) < 0 && rem->_mp_size != 0)\n    mpz_add (rem, rem, divisor);\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/fdiv_r_ui.c",
    "content": "/* mpz_fdiv_r_ui -- Division rounding the quotient towards -infinity.\n   The remainder gets the same sign as the denominator.\n\nCopyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpir_ui\nmpz_fdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, mpir_ui divisor)\n{\n  mp_size_t ns, nn;\n  mp_ptr np;\n  mp_limb_t rl;\n\n  if (divisor == 0)\n    DIVIDE_BY_ZERO;\n\n  ns = SIZ(dividend);\n  if (ns == 0)\n    {\n      SIZ(rem) = 0;\n      return 0;\n    }\n\n  nn = ABS(ns);\n  np = PTR(dividend);\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (divisor > GMP_NUMB_MAX)\n    {\n      mp_limb_t dp[2];\n      mp_ptr rp, qp;\n      mp_size_t rn;\n      TMP_DECL;\n\n      MPZ_REALLOC (rem, 2);\n      rp = PTR(rem);\n\n      if (nn == 1)\t\t/* tdiv_qr requirements; tested above for 0 */\n\t{\n\t  rl = np[0];\n\t  rp[0] = rl;\n\t}\n      else\n\t{\n\t  TMP_MARK;\n\t  dp[0] = divisor & GMP_NUMB_MASK;\n\t  dp[1] = divisor >> GMP_NUMB_BITS;\n\t  qp = TMP_ALLOC_LIMBS (nn - 2 + 1);\n\t  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);\n\t  TMP_FREE;\n\t  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);\n\t}\n\n      if (rl != 0 && ns < 0)\n\t{\n\t  rl = divisor - rl;\n\t  rp[0] = rl & GMP_NUMB_MASK;\n\t  rp[1] = rl >> GMP_NUMB_BITS;\n\t}\n\n      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);\n      SIZ(rem) = rn;\n    }\n  else\n#endif\n    {\n      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);\n      if (rl == 0)\n\tSIZ(rem) = 0;\n      else\n\t{\n\t  if (ns < 0)\n\t    rl = divisor - rl;\n\n\t  PTR(rem)[0] = rl;\n\t  SIZ(rem) = 1;\n\t}\n    }\n\n  return rl;\n}\n"
  },
  {
    "path": "mpz/fdiv_ui.c",
    "content": "/* mpz_fdiv_ui -- Division rounding the quotient towards -infinity.\n   The remainder gets the same sign as the denominator.\n\nCopyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpir_ui\nmpz_fdiv_ui (mpz_srcptr dividend, mpir_ui divisor)\n{\n  mp_size_t ns, nn;\n  mp_ptr np;\n  mp_limb_t rl;\n\n  if (divisor == 0)\n    DIVIDE_BY_ZERO;\n\n  ns = SIZ(dividend);\n  if (ns == 0)\n    {\n      return 0;\n    }\n\n  nn = ABS(ns);\n  np = PTR(dividend);\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (divisor > GMP_NUMB_MAX)\n    {\n      mp_limb_t dp[2], rp[2];\n      mp_ptr qp;\n      mp_size_t rn;\n      TMP_DECL;\n\n      if (nn == 1)\t\t/* tdiv_qr requirements; tested above for 0 */\n\t{\n\t  rl = np[0];\n\t  rp[0] = rl;\n\t}\n      else\n\t{\n\t  TMP_MARK;\n\t  dp[0] = divisor & GMP_NUMB_MASK;\n\t  dp[1] = divisor >> GMP_NUMB_BITS;\n\t  qp = TMP_ALLOC_LIMBS (nn - 2 + 1);\n\t  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);\n\t  TMP_FREE;\n\t  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);\n\t}\n\n      if (rl != 0 && ns < 0)\n\t{\n\t  rl = divisor - rl;\n\t  rp[0] = rl & GMP_NUMB_MASK;\n\t  rp[1] = rl >> GMP_NUMB_BITS;\n\t}\n\n      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);\n    }\n  else\n#endif\n    {\n      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);\n      if (rl == 0)\n\t;\n      else\n\t{\n\t  if (ns < 0)\n\t    rl = divisor - rl;\n\t}\n    }\n\n  return rl;\n}\n"
  },
  {
    "path": "mpz/fib2_ui.c",
    "content": "/* mpz_fib2_ui -- calculate Fibonacci numbers.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nvoid\nmpz_fib2_ui (mpz_ptr fn, mpz_ptr fnsub1, mpir_ui n)\n{\n  mp_ptr     fp, f1p;\n  mp_size_t  size;\n\n  size = MPN_FIB2_SIZE (n);\n  MPZ_REALLOC (fn,     size);\n  MPZ_REALLOC (fnsub1, size);\n  fp = PTR (fn);\n  f1p = PTR (fnsub1);\n\n  size = mpn_fib2_ui (fp, f1p, n);\n\n  SIZ(fn)     = size - (n == 0);\n  SIZ(fnsub1) = size - (f1p[size-1] == 0);\n}\n"
  },
  {
    "path": "mpz/fib_ui.c",
    "content": "/* mpz_fib_ui -- calculate Fibonacci numbers.\n\nCopyright 2000, 2001, 2002, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* change to \"#define TRACE(x) x\" to get some traces */\n#define TRACE(x)\n\n\n/* In the F[2k+1] below for k odd, the -2 won't give a borrow from the low\n   limb because the result F[2k+1] is an F[4m+3] and such numbers are always\n   == 1, 2 or 5 mod 8, whereas an underflow would leave 6 or 7.  (This is\n   the same as in mpn_fib2_ui.)\n\n   In the F[2k+1] for k even, the +2 won't give a carry out of the low limb\n   in normal circumstances.  This is an F[4m+1] and we claim that F[3*2^b+1]\n   == 1 mod 2^b is the first F[4m+1] congruent to 0 or 1 mod 2^b, and hence\n   if n < 2^GMP_NUMB_BITS then F[n] cannot have a low limb of 0 or 1.  No\n   proof for this claim, but it's been verified up to b==32 and has such a\n   nice pattern it must be true :-).  Of interest is that F[3*2^b] == 0 mod\n   2^(b+1) seems to hold too.\n\n   When n >= 2^GMP_NUMB_BITS, which can arise in a nails build, then the low\n   limb of F[4m+1] can certainly be 1, and an mpn_add_1 must be used.  */\n\nvoid\nmpz_fib_ui (mpz_ptr fn, mpir_ui n)\n{\n  mp_ptr         fp, xp, yp;\n  mp_size_t      size, xalloc;\n  mpir_ui         n2;\n  mp_limb_t      c, c2;\n  TMP_DECL;\n\n  if (n <= FIB_TABLE_LIMIT)\n    {\n      PTR(fn)[0] = FIB_TABLE (n);\n      SIZ(fn) = (n != 0);      /* F[0]==0, others are !=0 */\n      return;\n    }\n\n  n2 = n/2;\n  xalloc = MPN_FIB2_SIZE (n2) + 1;\n  MPZ_REALLOC (fn, 2*xalloc+1);\n  fp = PTR (fn);\n\n  TMP_MARK;\n  TMP_ALLOC_LIMBS_2 (xp,xalloc, yp,xalloc);\n  size = mpn_fib2_ui (xp, yp, n2);\n\n  TRACE (printf (\"mpz_fib_ui last step n=%lu size=%ld bit=%lu\\n\",\n                 n >> 1, size, n&1);\n         mpn_trace (\"xp\", xp, size);\n         mpn_trace (\"yp\", yp, size));\n\n  if (n & 1)\n    {\n      /* F[2k+1] = (2F[k]+F[k-1])*(2F[k]-F[k-1]) + 2*(-1)^k  */\n      mp_size_t  xsize, ysize;\n\n#if HAVE_NATIVE_mpn_sumdiff_n\n      xp[size] = mpn_double (xp, size);\n      yp[size] = 0;\n      ASSERT_NOCARRY (mpn_sumdiff_n (xp, yp, xp, yp, size+1));\n      xsize = size + (xp[size] != 0);\n      ysize = size + (yp[size] != 0);\n#else\n      c2 = mpn_lshift1 (fp, xp, size);\n      c = c2 + mpn_add_n (xp, fp, yp, size);\n      xp[size] = c;\n      xsize = size + (c != 0);\n      c2 -= mpn_sub_n (yp, fp, yp, size);\n      yp[size] = c2;\n      ASSERT (c2 <= 1);\n      ysize = size + c2;\n#endif\n\n      size = xsize + ysize;\n      c = mpn_mul (fp, xp, xsize, yp, ysize);\n\n#if GMP_NUMB_BITS >= BITS_PER_UI\n      /* no overflow, see comments above */\n      ASSERT (n & 2 ? fp[0] >= 2 : fp[0] <= GMP_NUMB_MAX-2);\n      fp[0] += (n & 2 ? -CNST_LIMB(2) : CNST_LIMB(2));\n#else\n      if (n & 2)\n        {\n          ASSERT (fp[0] >= 2);\n          fp[0] -= 2;\n        }\n      else\n        {\n          ASSERT (c != GMP_NUMB_MAX); /* because it's the high of a mul */\n          c += mpn_add_1 (fp, fp, size-1, CNST_LIMB(2));\n          fp[size-1] = c;\n        }\n#endif\n    }\n  else\n    {\n      /* F[2k] = F[k]*(F[k]+2F[k-1]) */\n\n      mp_size_t  xsize, ysize;\n      c = mpn_double (yp, size);\n      c += mpn_add_n (yp, yp, xp, size);\n      yp[size] = c;\n      xsize = size;\n      ysize = size + (c != 0);\n      size += ysize;\n      c = mpn_mul (fp, yp, ysize, xp, xsize);\n    }\n\n  /* one or two high zeros */\n  size -= (c == 0);\n  size -= (fp[size-1] == 0);\n  SIZ(fn) = size;\n\n  TRACE (printf (\"done special, size=%ld\\n\", size);\n         mpn_trace (\"fp \", fp, size));\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/fits_s.h",
    "content": "/* int mpz_fits_X_p (mpz_t z) -- test whether z fits signed type X.\n\nCopyright 1997, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\nFUNCTION (mpz_srcptr z)\n{\n  mp_size_t n = SIZ(z);\n  mp_ptr p = PTR(z);\n  mp_limb_t limb = p[0];\n\n  if (n == 0)\n    return 1;\n  if (n == 1)\n    return limb <= MAXIMUM;\n  if (n == -1)\n    return limb <= - (mp_limb_t) MINIMUM;\n#if GMP_NAIL_BITS != 0\n  {\n    if ((p[1] >> GMP_NAIL_BITS) == 0)\n      {\n\tlimb += p[1] << GMP_NUMB_BITS;\n\tif (n == 2)\n\t  return limb <= MAXIMUM;\n\tif (n == -2)\n\t  return limb <= - (mp_limb_t) MINIMUM;\n      }\n  }\n#endif\n  return 0;\n}\n"
  },
  {
    "path": "mpz/fits_si.c",
    "content": "/* int mpz_fits_slong_p (mpz_t z) -- test whether z fits a long.\n\nCopyright 1997, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define FUNCTION  mpz_fits_si_p\n#define MAXIMUM   GMP_SI_MAX\n#define MINIMUM   GMP_SI_MIN\n\n#include \"fits_s.h\"\n"
  },
  {
    "path": "mpz/fits_sint.c",
    "content": "/* int mpz_fits_sint_p (mpz_t z) -- test whether z fits a int.\n\nCopyright 1997, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define FUNCTION  mpz_fits_sint_p\n#define MAXIMUM   INT_MAX\n#define MINIMUM   INT_MIN\n\n#include \"fits_s.h\"\n"
  },
  {
    "path": "mpz/fits_slong.c",
    "content": "/* int mpz_fits_slong_p (mpz_t z) -- test whether z fits a long.\n\nCopyright 1997, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define FUNCTION  mpz_fits_slong_p\n#define MAXIMUM   LONG_MAX\n#define MINIMUM   LONG_MIN\n\n#include \"fits_s.h\"\n"
  },
  {
    "path": "mpz/fits_sshort.c",
    "content": "/* int mpz_fits_sshort_p (mpz_t z) -- test whether z fits a short.\n\nCopyright 1997, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define FUNCTION  mpz_fits_sshort_p\n#define MAXIMUM   SHRT_MAX\n#define MINIMUM   SHRT_MIN\n\n#include \"fits_s.h\"\n"
  },
  {
    "path": "mpz/fits_ui.c",
    "content": "/* mpz_fits_uint_p -- test whether z fits an unsigned int.\n\nCopyright 1997, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpz_fits_ui_p 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpz/fits_uint.c",
    "content": "/* mpz_fits_uint_p -- test whether z fits an unsigned int.\n\nCopyright 1997, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpz_fits_uint_p 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpz/fits_ulong.c",
    "content": "/* mpz_fits_ulong_p -- test whether z fits an unsigned long.\n\nCopyright 1997, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpz_fits_ulong_p 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpz/fits_ushort.c",
    "content": "/* mpz_fits_ushort_p -- test whether z fits an unsigned short.\n\nCopyright 1997, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpz_fits_ushort_p 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpz/gcd.c",
    "content": "/* mpz/gcd.c:   Calculate the greatest common divisor of two integers.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005, 2010 Free\nSoftware Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\nvoid\nmpz_gcd (mpz_ptr g, mpz_srcptr u, mpz_srcptr v)\n{\n  mpir_ui g_zero_bits, u_zero_bits, v_zero_bits;\n  mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs;\n  mp_ptr tp;\n  mp_ptr up;\n  mp_size_t usize;\n  mp_ptr vp;\n  mp_size_t vsize;\n  mp_size_t gsize;\n  TMP_DECL;\n\n  up = PTR(u);\n  usize = ABSIZ (u);\n  vp = PTR(v);\n  vsize = ABSIZ (v);\n  /* GCD(0, V) == V.  */\n  if (usize == 0)\n    {\n      SIZ (g) = vsize;\n      if (g == v)\n\treturn;\n      MPZ_REALLOC (g, vsize);\n      MPN_COPY (PTR (g), vp, vsize);\n      return;\n    }\n\n  /* GCD(U, 0) == U.  */\n  if (vsize == 0)\n    {\n      SIZ (g) = usize;\n      if (g == u)\n\treturn;\n      MPZ_REALLOC (g, usize);\n      MPN_COPY (PTR (g), up, usize);\n      return;\n    }\n\n  if (usize == 1)\n    {\n      SIZ (g) = 1;\n      PTR (g)[0] = mpn_gcd_1 (vp, vsize, up[0]);\n      return;\n    }\n\n  if (vsize == 1)\n    {\n      SIZ(g) = 1;\n      PTR (g)[0] = mpn_gcd_1 (up, usize, vp[0]);\n      return;\n    }\n\n  TMP_MARK;\n\n  /*  Eliminate low zero bits from U and V and move to temporary storage.  */\n  while (*up == 0)\n    up++;\n  u_zero_limbs = up - PTR(u);\n  usize -= u_zero_limbs;\n  count_trailing_zeros (u_zero_bits, *up);\n  tp = up;\n  up = TMP_ALLOC_LIMBS (usize);\n  if (u_zero_bits != 0)\n    {\n      mpn_rshift (up, tp, usize, u_zero_bits);\n      usize -= up[usize - 1] == 0;\n    }\n  else\n    MPN_COPY (up, tp, usize);\n\n  while (*vp == 0)\n    vp++;\n  v_zero_limbs = vp - PTR (v);\n  vsize -= v_zero_limbs;\n  count_trailing_zeros (v_zero_bits, *vp);\n  tp = vp;\n  vp = TMP_ALLOC_LIMBS (vsize);\n  if (v_zero_bits != 0)\n    {\n      mpn_rshift (vp, tp, vsize, v_zero_bits);\n      vsize -= vp[vsize - 1] == 0;\n    }\n  else\n    MPN_COPY (vp, tp, vsize);\n\n  if (u_zero_limbs > v_zero_limbs)\n    {\n      g_zero_limbs = v_zero_limbs;\n      g_zero_bits = v_zero_bits;\n    }\n  else if (u_zero_limbs < v_zero_limbs)\n    {\n      g_zero_limbs = u_zero_limbs;\n      g_zero_bits = u_zero_bits;\n    }\n  else  /*  Equal.  */\n    {\n      g_zero_limbs = u_zero_limbs;\n      g_zero_bits = MIN (u_zero_bits, v_zero_bits);\n    }\n\n  /*  Call mpn_gcd.  The 2nd argument must not have more bits than the 1st.  */\n  vsize = (usize < vsize || (usize == vsize && up[usize-1] < vp[vsize-1]))\n    ? mpn_gcd (vp, vp, vsize, up, usize)\n    : mpn_gcd (vp, up, usize, vp, vsize);\n\n  /*  Here G <-- V << (g_zero_limbs*GMP_LIMB_BITS + g_zero_bits).  */\n  gsize = vsize + g_zero_limbs;\n  if (g_zero_bits != 0)\n    {\n      mp_limb_t cy_limb;\n      gsize += (vp[vsize - 1] >> (GMP_NUMB_BITS - g_zero_bits)) != 0;\n      MPZ_REALLOC (g, gsize);\n      MPN_ZERO (PTR (g), g_zero_limbs);\n\n      tp = PTR(g) + g_zero_limbs;\n      cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits);\n      if (cy_limb != 0)\n\ttp[vsize] = cy_limb;\n    }\n  else\n    {\n      MPZ_REALLOC (g, gsize);\n      MPN_ZERO (PTR (g), g_zero_limbs);\n      MPN_COPY (PTR (g) + g_zero_limbs, vp, vsize);\n    }\n\n  SIZ (g) = gsize;\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/gcd_ui.c",
    "content": "/* mpz_gcd_ui -- Calculate the greatest common divisior of two integers.\n\nCopyright 1994, 1996, 1999, 2000, 2001, 2002, 2003, 2004 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h> /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpir_ui\nmpz_gcd_ui (mpz_ptr w, mpz_srcptr u, mpir_ui v)\n{\n  mp_size_t un;\n  mp_limb_t res;\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (v > GMP_NUMB_MAX)\n    {\n      mpz_t vz;\n      mp_limb_t vlimbs[2];\n      vlimbs[0] = v & GMP_NUMB_MASK;\n      vlimbs[1] = v >> GMP_NUMB_BITS;\n      PTR(vz) = vlimbs;\n      SIZ(vz) = 2;\n      mpz_gcd (w, u, vz);\n      /* because v!=0 we will have w<=v hence fitting a ulong */\n      ASSERT (mpz_fits_ui_p (w));\n      return mpz_get_ui (w);\n    }\n#endif\n\n  un = ABSIZ(u);\n\n  if (un == 0)\n    res = v;\n  else if (v == 0)\n    {\n      if (w != NULL)\n\t{\n\t  if (u != w)\n\t    {\n\t      MPZ_REALLOC (w, un);\n\t      MPN_COPY (PTR(w), PTR(u), un);\n\t    }\n\t  SIZ(w) = un;\n\t}\n      /* Return u if it fits a ulong, otherwise 0. */\n      res = PTR(u)[0];\n      return (un == 1 && res <= ULONG_MAX ? res : 0);\n    }\n  else\n    res = mpn_gcd_1 (PTR(u), un, (mp_limb_t) v);\n\n  if (w != NULL)\n    {\n      PTR(w)[0] = res;\n      SIZ(w) = res != 0;\n    }\n  return res;\n}\n"
  },
  {
    "path": "mpz/gcdext.c",
    "content": "/* mpz_gcdext(g, s, t, a, b) -- Set G to gcd(a, b), and S and T such that\n   g = as + bt.\n\nCopyright 1991, 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2005, 2011,\n2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include <stdio.h> /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_gcdext (mpz_ptr g, mpz_ptr s, mpz_ptr t, mpz_srcptr a, mpz_srcptr b)\n{\n  mp_size_t asize, bsize;\n  mp_ptr tmp_ap, tmp_bp;\n  mp_size_t gsize, ssize, tmp_ssize;\n  mp_ptr gp, tmp_gp, tmp_sp;\n  TMP_DECL;\n\n  /* mpn_gcdext requires that Usize >= Vsize.  Therefore, we often\n     have to swap U and V.  The computed cofactor will be the\n     \"smallest\" one, which is faster to produce.  The wanted one will\n     be computed here; this is needed anyway when both are requested.  */\n\n  asize = ABSIZ (a);\n  bsize = ABSIZ (b);\n\n  if (asize < bsize)\n    {\n      MPZ_SRCPTR_SWAP (a, b);\n      MP_SIZE_T_SWAP (asize, bsize);\n      MPZ_PTR_SWAP (s, t);\n    }\n\n  if (bsize == 0)\n    {\n      /* g = |a|, s = sgn(a), t = 0. */\n      ssize = SIZ (a) >= 0 ? (asize != 0) : -1;\n\n      gp = MPZ_REALLOC (g, asize);\n      MPN_COPY (gp, PTR (a), asize);\n      SIZ (g) = asize;\n\n      if (t != NULL)\n\tSIZ (t) = 0;\n      if (s != NULL)\n\t{\n\t  SIZ (s) = ssize;\n\t  PTR (s)[0] = 1;\n\t}\n      return;\n    }\n\n  TMP_MARK;\n\n  TMP_ALLOC_LIMBS_2 (tmp_ap, asize, tmp_bp, bsize);\n  MPN_COPY (tmp_ap, PTR (a), asize);\n  MPN_COPY (tmp_bp, PTR (b), bsize);\n\n  TMP_ALLOC_LIMBS_2 (tmp_gp, bsize, tmp_sp, bsize + 1);\n\n  gsize = mpn_gcdext (tmp_gp, tmp_sp, &tmp_ssize, tmp_ap, asize, tmp_bp, bsize);\n\n  ssize = ABS (tmp_ssize);\n  tmp_ssize = SIZ (a) >= 0 ? tmp_ssize : -tmp_ssize;\n\n  if (t != NULL)\n    {\n      mpz_t x;\n      __mpz_struct gtmp, stmp;\n\n      PTR (&gtmp) = tmp_gp;\n      SIZ (&gtmp) = gsize;\n\n      PTR (&stmp) = tmp_sp;\n      SIZ (&stmp) = tmp_ssize;\n\n      MPZ_TMP_INIT (x, ssize + asize + 1);\n      mpz_mul (x, &stmp, a);\n      mpz_sub (x, &gtmp, x);\n      mpz_divexact (t, x, b);\n    }\n\n  if (s != NULL)\n    {\n      mp_ptr sp;\n\n      sp = MPZ_REALLOC (s, ssize);\n      MPN_COPY (sp, tmp_sp, ssize);\n      SIZ (s) = tmp_ssize;\n    }\n\n  gp = MPZ_REALLOC (g, gsize);\n  MPN_COPY (gp, tmp_gp, gsize);\n  SIZ (g) = gsize;\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/get_d.c",
    "content": "/* double mpz_get_d (mpz_t src) -- Return the double approximation to SRC.\n\nCopyright 1996, 1997, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\ndouble\nmpz_get_d (mpz_srcptr z)\n{\n  mp_size_t size;\n\n  size = SIZ (z);\n  if (UNLIKELY (size == 0))\n    return 0.0;\n\n  return mpn_get_d (PTR (z), ABS (size), size, 0L);\n}\n"
  },
  {
    "path": "mpz/get_d_2exp.c",
    "content": "/* double mpz_get_d_2exp (signed long int *exp, mpz_t src).\n\nCopyright 2001, 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#ifndef _MSC_VER\nstatic\n#endif\nmpir_si\nmpz_get_2exp_d (double *r, mpz_srcptr src)\n{\n  mp_size_t size, abs_size;\n  mp_srcptr ptr;\n  int cnt;\n  mpir_si exp;\n\n  size = SIZ(src);\n  if (UNLIKELY (size == 0))\n    {\n\t  *r = 0.0;\n\t  return 0;\n    }\n\n  ptr = PTR(src);\n  abs_size = ABS(size);\n  count_leading_zeros (cnt, ptr[abs_size - 1]);\n  exp = abs_size * GMP_NUMB_BITS - (cnt - GMP_NAIL_BITS);\n  *r = mpn_get_d (ptr, abs_size, size, -exp);\n  return exp;\n}\n\ndouble\nmpz_get_d_2exp(signed long *exp2, mpz_srcptr src)\n{\n\tdouble r;\n\t*exp2 = mpz_get_2exp_d(&r, src);\n\treturn r;\n}\n"
  },
  {
    "path": "mpz/get_si.c",
    "content": "/* mpz_get_si(integer) -- Return the least significant digit from INTEGER.\n\nCopyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2006 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpir_si\nmpz_get_si (mpz_srcptr z)\n{\n  mp_ptr zp = z->_mp_d;\n  mp_size_t size = z->_mp_size;\n  mp_limb_t zl = zp[0];\n\n#if GMP_NAIL_BITS != 0\n  if (GMP_UI_MAX > GMP_NUMB_MAX && ABS (size) >= 2)\n    zl |= zp[1] << GMP_NUMB_BITS;\n#endif\n\n  if (size > 0)\n    return (mpir_si) zl & GMP_SI_MAX;\n  else if (size < 0)\n    /* This expression is necessary to properly handle 0x80000000 */\n    return ~(((mpir_si) zl - 1L) & GMP_SI_MAX);\n  else\n    return 0;\n}\n"
  },
  {
    "path": "mpz/get_str.c",
    "content": "/* mpz_get_str (string, base, mp_src) -- Convert the multiple precision\n   number MP_SRC to a string STRING of base BASE.  If STRING is NULL\n   allocate space for the result.  In any case, return a pointer to the\n   result.  If STRING is not NULL, the caller must ensure enough space is\n   available to store the result.\n\nCopyright 1991, 1993, 1994, 1996, 2000-2002, 2005, 2012 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n#include <string.h> /* for strlen */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nchar *\nmpz_get_str (char *res_str, int base, mpz_srcptr x)\n{\n  mp_ptr xp;\n  mp_size_t x_size = SIZ (x);\n  char *return_str;\n  size_t str_size;\n  size_t alloc_size = 0;\n  const char *num_to_text;\n  int i;\n  TMP_DECL;\n\n  if (base >= 0)\n    {\n      num_to_text = \"0123456789abcdefghijklmnopqrstuvwxyz\";\n      if (base <= 1)\n\tbase = 10;\n      else if (base > 36)\n\t{\n\t  num_to_text = \"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\";\n\t  if (base > 62)\n\t    return NULL;\n\t}\n    }\n  else\n    {\n      base = -base;\n      if (base <= 1)\n\tbase = 10;\n      else if (base > 36)\n\treturn NULL;\n      num_to_text = \"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\";\n    }\n\n  /* allocate string for the user if necessary */\n  if (res_str == NULL)\n    {\n      /* digits, null terminator, possible minus sign */\n      MPN_SIZEINBASE (alloc_size, PTR(x), ABS(x_size), base);\n      alloc_size += 1 + (x_size<0);\n      res_str = (char *) (*__gmp_allocate_func) (alloc_size);\n    }\n  return_str = res_str;\n\n  if (x_size < 0)\n    {\n      *res_str++ = '-';\n      x_size = -x_size;\n    }\n\n  /* mpn_get_str clobbers its input on non power-of-2 bases */\n  TMP_MARK;\n  xp = PTR (x);\n  if (! POW2_P (base))\n    {\n      xp = TMP_ALLOC_LIMBS (x_size | 1);  /* |1 in case x_size==0 */\n      MPN_COPY (xp, PTR (x), x_size);\n    }\n\n  str_size = mpn_get_str ((unsigned char *) res_str, base, xp, x_size);\n  ASSERT (alloc_size == 0 || str_size <= alloc_size - (SIZ(x) < 0));\n\n  /* Convert result to printable chars.  */\n  for (i = 0; i < str_size; i++)\n    res_str[i] = num_to_text[(int) res_str[i]];\n  res_str[str_size] = 0;\n\n  TMP_FREE;\n\n  /* if allocated then resize down to the actual space required */\n  if (alloc_size != 0)\n    {\n      size_t  actual_size = str_size + 1 + (res_str - return_str);\n      ASSERT (actual_size == strlen (return_str) + 1);\n      __GMP_REALLOCATE_FUNC_MAYBE_TYPE (return_str, alloc_size, actual_size,\n\t\t\t\t\tchar);\n    }\n  return return_str;\n}\n"
  },
  {
    "path": "mpz/get_sx.c",
    "content": "/* \nmpz_get_sx(mpz z) -- return an intmax_t integer that is the corresponding\nleast significant integer part of z, with the same sign as z.\n\nCopyright 2011 Brian Gladman\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n#ifdef HAVE_STDINT_H\n#include <stdint.h>\n#endif\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#ifdef HAVE_STDINT_H\n\n#define NLIMBS ((8 * SIZEOF_UINTMAX_T + GMP_NUMB_BITS  - 1) / GMP_NUMB_BITS)\n\nintmax_t\nmpz_get_sx (mpz_srcptr z)\n{\n#if NLIMBS == 1\n    uintmax_t v = (intmax_t)(z->_mp_size ? z->_mp_d[0] : 0);\n#else\n    uintmax_t v  = 0, n = MIN(NLIMBS, ABS(z->_mp_size));\n    while(n--)\n        v = (v << GMP_NUMB_BITS) | z->_mp_d[n];\n#endif\n    return z->_mp_size < 0 ? -v : v;\n}\n\n#endif\n"
  },
  {
    "path": "mpz/get_ui.c",
    "content": "/* mpz_get_ui(integer) -- Return the least significant digit from INTEGER.\n\nCopyright 1991, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpz_get_ui 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpz/get_ux.c",
    "content": "/* \nmpz_get_ux(mpz z) -- return a uintmax_t integer that is the corresponding\nleast significant integer part of z, ignoring its sign.\n\nCopyright 2011 Brian Gladman\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n#ifdef HAVE_STDINT_H\n#include <stdint.h>\n#endif\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#ifdef HAVE_STDINT_H\n\n#define NLIMBS ((8 * SIZEOF_UINTMAX_T + GMP_NUMB_BITS  - 1) / GMP_NUMB_BITS)\n\nuintmax_t\nmpz_get_ux (mpz_srcptr z)\n{\n#if NLIMBS == 1\n    return (uintmax_t)(z->_mp_size ? z->_mp_d[0] : 0);\n#else\n    uintmax_t v = 0, n = MIN(NLIMBS, ABS(z->_mp_size));\n    while(n--)\n        v = (v << GMP_NUMB_BITS) | z->_mp_d[n];\n    return v;\n#endif\n}\n\n#endif\n"
  },
  {
    "path": "mpz/getlimbn.c",
    "content": "/* mpz_getlimbn(integer,n) -- Return the N:th limb from INTEGER.\n\nCopyright 1993, 1994, 1995, 1996, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpz_getlimbn 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpz/hamdist.c",
    "content": "/* mpz_hamdist -- calculate hamming distance.\n\nCopyright 1994, 1996, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nmp_bitcnt_t\nmpz_hamdist (mpz_srcptr u, mpz_srcptr v)\n{\n  mp_srcptr      up, vp;\n  mp_size_t      usize, vsize;\n  mp_bitcnt_t    count;\n\n  usize = SIZ(u);\n  vsize = SIZ(v);\n\n  up = PTR(u);\n  vp = PTR(v);\n\n  if (usize >= 0)\n    {\n      if (vsize < 0)\n        return ~ (mp_bitcnt_t) 0;\n\n      /* positive/positive */\n\n      if (usize < vsize)\n        MPN_SRCPTR_SWAP (up,usize, vp,vsize);\n\n      count = 0;\n      if (vsize != 0)\n        count = mpn_hamdist (up, vp, vsize);\n\n      usize -= vsize;\n      if (usize != 0)\n        count += mpn_popcount (up + vsize, usize);\n\n      return count;\n    }\n  else\n    {\n      mp_limb_t  ulimb, vlimb;\n      mp_size_t  old_vsize, step;\n\n      if (vsize >= 0)\n        return ~ (mp_limb_t) 0;\n\n      /* negative/negative */\n\n      usize = -usize;\n      vsize = -vsize;\n\n      /* skip common low zeros */\n      for (;;)\n        {\n          ASSERT (usize > 0);\n          ASSERT (vsize > 0);\n\n          usize--;\n          vsize--;\n\n          ulimb = *up++;\n          vlimb = *vp++;\n\n          if (ulimb != 0)\n            break;\n\n          if (vlimb != 0)\n            {\n              MPN_SRCPTR_SWAP (up,usize, vp,vsize);\n              ulimb = vlimb;\n              vlimb = 0;\n              break;\n            }\n        }\n\n      /* twos complement first non-zero limbs (ulimb is non-zero, but vlimb\n         might be zero) */\n      ulimb = -ulimb;\n      vlimb = -vlimb;\n      popc_limb (count, (ulimb ^ vlimb) & GMP_NUMB_MASK);\n\n      if (vlimb == 0)\n        {\n          mp_bitcnt_t  twoscount;\n\n          /* first non-zero of v */\n          old_vsize = vsize;\n          do\n            {\n              ASSERT (vsize > 0);\n              vsize--;\n              vlimb = *vp++;\n            }\n          while (vlimb == 0);\n\n          /* part of u corresponding to skipped v zeros */\n          step = old_vsize - vsize - 1;\n          count += step * GMP_NUMB_BITS;\n          step = MIN (step, usize);\n          if (step != 0)\n            {\n              count -= mpn_popcount (up, step);\n              usize -= step;\n              up += step;\n            }\n\n          /* First non-zero vlimb as twos complement, xor with ones\n             complement ulimb.  Note -v^(~0^u) == (v-1)^u. */\n          vlimb--;\n          if (usize != 0)\n            {\n              usize--;\n              vlimb ^= *up++;\n            }\n          popc_limb (twoscount, vlimb);\n          count += twoscount;\n        }\n\n      /* Overlapping part of u and v, if any.  Ones complement both, so just\n         plain hamdist. */\n      step = MIN (usize, vsize);\n      if (step != 0)\n        {\n          count += mpn_hamdist (up, vp, step);\n          usize -= step;\n          vsize -= step;\n          up += step;\n          vp += step;\n        }\n\n      /* Remaining high part of u or v, if any, ones complement but xor\n         against all ones in the other, so plain popcount. */\n      if (usize != 0)\n        {\n        remaining:\n          count += mpn_popcount (up, usize);\n        }\n      else if (vsize != 0)\n        {\n          up = vp;\n          usize = vsize;\n          goto remaining;\n        }\n      return count;\n    }\n}\n"
  },
  {
    "path": "mpz/import.c",
    "content": "/* mpz_import -- set mpz from word data.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n\n#if HAVE_LIMB_BIG_ENDIAN\n#define HOST_ENDIAN     1\n#endif\n#if HAVE_LIMB_LITTLE_ENDIAN\n#define HOST_ENDIAN     (-1)\n#endif\n#ifndef HOST_ENDIAN\nstatic const mp_limb_t  endian_test = (CNST_LIMB(1) << (GMP_LIMB_BITS-7)) - 1;\n#define HOST_ENDIAN     (* (signed char *) &endian_test)\n#endif\n\n\nvoid\nmpz_import (mpz_ptr z, size_t count, int order,\n            size_t size, int endian, size_t nail, const void *data)\n{\n  mp_size_t  zsize;\n  mp_ptr     zp;\n\n  ASSERT (order == 1 || order == -1);\n  ASSERT (endian == 1 || endian == 0 || endian == -1);\n  ASSERT (nail <= 8*size);\n\n  zsize = (count * (8*size - nail) + GMP_NUMB_BITS-1) / GMP_NUMB_BITS;\n  MPZ_REALLOC (z, zsize);\n  zp = PTR(z);\n\n  if (endian == 0)\n    endian = HOST_ENDIAN;\n\n  /* Can't use these special cases with nails currently, since they don't\n     mask out the nail bits in the input data.  */\n  if (nail == 0 && GMP_NAIL_BITS == 0)\n    {\n      unsigned  align = ((char *) data - (char *) NULL) % sizeof (mp_limb_t);\n\n      if (order == -1\n          && size == sizeof (mp_limb_t)\n          && endian == HOST_ENDIAN\n          && align == 0)\n        {\n          MPN_COPY (zp, (mp_srcptr) data, (mp_size_t) count);\n          goto done;\n        }\n\n      if (order == -1\n          && size == sizeof (mp_limb_t)\n          && endian == - HOST_ENDIAN\n          && align == 0)\n        {\n          MPN_BSWAP (zp, (mp_srcptr) data, (mp_size_t) count);\n          goto done;\n        }\n\n      if (order == 1\n          && size == sizeof (mp_limb_t)\n          && endian == HOST_ENDIAN\n          && align == 0)\n        {\n          MPN_REVERSE (zp, (mp_srcptr) data, (mp_size_t) count);\n          goto done;\n        }\n    }\n\n  {\n    mp_limb_t      limb, byte, wbitsmask;\n    size_t         i, j, numb, wbytes;\n    mp_size_t      woffset;\n    unsigned char  *dp;\n    int            lbits, wbits;\n\n    numb = size * 8 - nail;\n\n    /* whole bytes to process */\n    wbytes = numb / 8;\n\n    /* partial byte to process */\n    wbits = numb % 8;\n    wbitsmask = (CNST_LIMB(1) << wbits) - 1;\n\n    /* offset to get to the next word after processing wbytes and wbits */\n    woffset = (numb + 7) / 8;\n    woffset = (endian >= 0 ? woffset : -woffset)\n      + (order < 0 ? size : - (mp_size_t) size);\n\n    /* least significant byte */\n    dp = (unsigned char *) data\n      + (order >= 0 ? (count-1)*size : 0) + (endian >= 0 ? size-1 : 0);\n\n#define ACCUMULATE(N)                                   \\\n    do {                                                \\\n      ASSERT (lbits < GMP_NUMB_BITS);                   \\\n      ASSERT (limb <= (CNST_LIMB(1) << lbits) - 1);     \\\n                                                        \\\n      limb |= (mp_limb_t) byte << lbits;                \\\n      lbits += (N);                                     \\\n      if (lbits >= GMP_NUMB_BITS)                       \\\n        {                                               \\\n          *zp++ = limb & GMP_NUMB_MASK;                 \\\n          lbits -= GMP_NUMB_BITS;                       \\\n          ASSERT (lbits < (N));                         \\\n          limb = byte >> ((N) - lbits);                 \\\n        }                                               \\\n    } while (0)\n\n    limb = 0;\n    lbits = 0;\n    for (i = 0; i < count; i++)\n      {\n        for (j = 0; j < wbytes; j++)\n          {\n            byte = *dp;\n            dp -= endian;\n            ACCUMULATE (8);\n          }\n        if (wbits != 0)\n          {\n            byte = *dp & wbitsmask;\n            dp -= endian;\n            ACCUMULATE (wbits);\n          }\n        dp += woffset;\n      }\n\n    if (lbits != 0)\n      {\n        ASSERT (lbits <= GMP_NUMB_BITS);\n        ASSERT_LIMB (limb);\n        *zp++ = limb;\n      }\n\n    ASSERT (zp == PTR(z) + zsize);\n\n    /* low byte of word after most significant */\n    ASSERT (dp == (unsigned char *) data\n            + (order < 0 ? count*size : - (mp_size_t) size)\n            + (endian >= 0 ? (mp_size_t) size - 1 : 0));\n\n  }\n\n done:\n  zp = PTR(z);\n  MPN_NORMALIZE (zp, zsize);\n  SIZ(z) = zsize;\n}\n"
  },
  {
    "path": "mpz/init.c",
    "content": "/* mpz_init() -- Make a new multiple precision number with value 0.\n\nCopyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_init (mpz_ptr x)\n{\n  x->_mp_alloc = 1;\n  x->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);\n  x->_mp_size = 0;\n\n#ifdef __CHECKER__\n  /* let the low limb look initialized, for the benefit of mpz_get_ui etc */\n  x->_mp_d[0] = 0;\n#endif\n}\n"
  },
  {
    "path": "mpz/init2.c",
    "content": "/* mpz_init2 -- initialize mpz, with requested size in bits.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_init2 (mpz_ptr x, mp_bitcnt_t bits)\n{\n  mp_size_t  limbs;\n  limbs = (bits + GMP_NUMB_BITS-1) / GMP_NUMB_BITS;\n  limbs = MAX (limbs, 1);\n  SIZ(x) = 0;\n  ALLOC(x) = limbs;\n  PTR(x) = __GMP_ALLOCATE_FUNC_LIMBS (limbs);\n\n#ifdef __CHECKER__\n  /* let the low limb look initialized, for the benefit of mpz_get_ui etc */\n  PTR(x)[0] = 0;\n#endif\n}\n"
  },
  {
    "path": "mpz/inits.c",
    "content": "/* mpz_inits() -- Initialize multiple mpz_t variables and set them to 0.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\t\t/* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\n#if HAVE_STDARG\nmpz_inits (mpz_ptr x, ...)\n#else\nmpz_inits (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n\n#if HAVE_STDARG\n  va_start (ap, x);\n#else\n  mpz_ptr x;\n  va_start (ap);\n  x = va_arg (ap, mpz_ptr);\n#endif\n\n  while (x != NULL)\n    {\n      mpz_init (x);\n      x = va_arg (ap, mpz_ptr);\n    }\n  va_end (ap);\n}\n"
  },
  {
    "path": "mpz/inp_raw.c",
    "content": "/* mpz_inp_raw -- read an mpz_t in raw format.\n\nCopyright 2001, 2002, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* NTOH_LIMB_FETCH fetches a limb which is in network byte order (ie. big\n   endian) and produces a normal host byte order result. */\n\n#if HAVE_LIMB_BIG_ENDIAN\n#define NTOH_LIMB_FETCH(limb, src)  do { (limb) = *(src); } while (0)\n#endif\n\n#if HAVE_LIMB_LITTLE_ENDIAN\n#define NTOH_LIMB_FETCH(limb, src)  BSWAP_LIMB_FETCH (limb, src)\n#endif\n\n#ifndef NTOH_LIMB_FETCH\n#define NTOH_LIMB_FETCH(limb, src)                              \\\n  do {                                                          \\\n    const unsigned char  *__p = (const unsigned char *) (src);  \\\n    mp_limb_t  __limb;                                          \\\n    int        __i;                                             \\\n    __limb = 0;                                                 \\\n    for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++)               \\\n      __limb = (__limb << 8) | __p[__i];                        \\\n    (limb) = __limb;                                            \\\n  } while (0)\n#endif\n\n/* In order to allow mpz_inp_raw() to be called from MPIR.Net, its implementation has been refactored into three separate functions.\n   Both the contract and implementation of mpz_inp_raw() were unchanged, the split was made in order for MPIR.Net to access intermediate variables.\n   The basic flow of mpz_inp_raw is to 1) read a 4-byte size from file, 2) re-allocate the destination __mpz_struct accordingly, \n   3) read raw limb data from file, and 4) reconstitute the limb data from raw format.\n   Of these, steps 2 and 4 represent the (in-memory) bulk of the entire operation.\n   The new mpz_inp_raw_p() function below performs step 2.  mpz_inp_raw_m() performs step 4.\n   To pass internal state from step to step an mpir_out struct is used.\n   mpz_inp_raw() now performs file I/O for steps 1 and 3, and calls into mpz_inp_raw_m() and mpz_inp_raw_p() for steps 2 and 4 respectively.\n   MPIR.Net performs its own file I/O using different infrastructure, and calls mpz_inp_raw_m() and mpz_inp_raw_p() for the rest. */\nvoid mpz_inp_raw_p (mpz_ptr x, unsigned char* csize_bytes, mpir_out_ptr out)\n{\n  mp_size_t      csize, abs_xsize, i;\n  size_t         abs_csize;\n  mp_ptr         xp;\n\n  csize =\n    (  (mp_size_t) csize_bytes[0] << 24)\n    + ((mp_size_t) csize_bytes[1] << 16)\n    + ((mp_size_t) csize_bytes[2] << 8)\n    + ((mp_size_t) csize_bytes[3]);\n\n  /* Sign extend if necessary.\n     Could write \"csize -= ((csize & 0x80000000L) << 1)\", but that tickles a\n     bug in gcc 3.0 for powerpc64 on AIX.  */\n/* We exculde win32 as msvc gives a spurious warning */\n#if !defined(_WIN32) || defined(_WIN64)\n  if (sizeof (csize) > 4 && csize & 0x80000000L)\n    csize |= (mp_size_t)(-1) << 32;\n#endif\n\n  abs_csize = ABS (csize);\n\n  /* round up to a multiple of limbs */\n  abs_xsize = (abs_csize*8 + GMP_NUMB_BITS-1) / GMP_NUMB_BITS;\n\n  if (abs_xsize != 0)\n    {\n      MPZ_REALLOC (x, abs_xsize);\n      xp = PTR(x);\n\n      /* Get limb boundaries right in the read, for the benefit of the\n         non-nails case.  */\n      xp[0] = 0;\n      out->written = (char *) (xp + abs_xsize) - abs_csize;\n    }\n  out->writtenSize = abs_csize;\n  out->allocatedSize = abs_xsize;\n  SIZ(x) = (csize >= 0 ? abs_xsize : -abs_xsize);\n}\n\nvoid mpz_inp_raw_m(mpz_ptr x, mpir_out_ptr out)\n{\n      mp_ptr         xp, sp, ep;\n      mp_size_t      abs_xsize, i;\n      mp_limb_t      slimb, elimb;\n\n      abs_xsize = out->allocatedSize;\n\n      xp = PTR(x);\n      if (GMP_NAIL_BITS == 0)\n        {\n          /* Reverse limbs to least significant first, and byte swap.  If\n             abs_xsize is odd then on the last iteration elimb and slimb are\n             the same.  It doesn't seem extra code to handle that case\n             separately, to save an NTOH.  */\n          sp = xp;\n          ep = xp + abs_xsize-1;\n          for (i = 0; i < (abs_xsize+1)/2; i++)\n            {\n              NTOH_LIMB_FETCH (elimb, ep);\n              NTOH_LIMB_FETCH (slimb, sp);\n              *sp++ = elimb;\n              *ep-- = slimb;\n            }\n        }\n      else\n        {\n          /* It ought to be possible to do the transformation in-place, but\n             for now it's easier to use an extra temporary area.  */\n          mp_limb_t  byte, limb;\n          int        bits;\n          mp_size_t  tpos;\n          mp_ptr     tp;\n          TMP_DECL;\n\n          TMP_MARK;\n          tp = TMP_ALLOC_LIMBS (abs_xsize);\n          limb = 0;\n          bits = 0;\n          tpos = 0;\n          for (i = out->writtenSize-1; i >= 0; i--)\n            {\n              byte = out->written[i];\n              limb |= (byte << bits);\n              bits += 8;\n              if (bits >= GMP_NUMB_BITS)\n                {\n                  ASSERT (tpos < abs_xsize);\n                  tp[tpos++] = limb & GMP_NUMB_MASK;\n                  bits -= GMP_NUMB_BITS;\n                  ASSERT (bits < 8);\n                  limb = byte >> (8 - bits);\n                }\n            }\n          if (bits != 0)\n            {\n              ASSERT (tpos < abs_xsize);\n              tp[tpos++] = limb;\n            }\n          ASSERT (tpos == abs_xsize);\n\n          MPN_COPY (xp, tp, abs_xsize);\n          TMP_FREE;\n        }\n\n      /* GMP 1.x mpz_out_raw wrote high zero bytes, strip any high zero\n         limbs resulting from this.  Should be a non-zero value here, but\n         for safety don't assume that. */\n      MPN_NORMALIZE (xp, abs_xsize);\n      SIZ(x) = (SIZ(x) >= 0 ? abs_xsize : -abs_xsize);\n}\n\nsize_t\nmpz_inp_raw(mpz_ptr x, FILE *fp)\n{\n    unsigned char  csize_bytes[4];\n    mpir_out_struct out;\n\n    if (fp == 0)\n        fp = stdin;\n\n    /* 4 bytes for size */\n    if (fread(csize_bytes, sizeof(csize_bytes), 1, fp) != 1)\n        return 0;\n\n    mpz_inp_raw_p(x, csize_bytes, out);\n\n    if (out->writtenSize != 0)\n    {\n        if (fread(out->written, out->writtenSize, 1, fp) != 1)\n            return 0;\n\n        mpz_inp_raw_m(x, out);\n    }\n    return out->writtenSize + 4;\n}\n"
  },
  {
    "path": "mpz/inp_str.c",
    "content": "/* mpz_inp_str(dest_integer, stream, base) -- Input a number in base\n   BASE from stdio stream STREAM and store the result in DEST_INTEGER.\n\n   OF THE FUNCTIONS IN THIS FILE, ONLY mpz_inp_str IS FOR EXTERNAL USE, THE\n   REST ARE INTERNALS AND ARE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE\n   CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU MP RELEASES.\n\nCopyright 1991, 1993, 1994, 1996, 1998, 2000, 2001, 2002, 2003 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <ctype.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nextern const unsigned char __gmp_digit_value_tab[];\n#define digit_value_tab __gmp_digit_value_tab\n\n/* shared by mpq_inp_str */\nsize_t\nmpz_inp_str_nowhite (mpz_ptr x, FILE *stream, int base, int c, size_t nread)\n{\n  char *str;\n  size_t alloc_size, str_size;\n  int negative;\n  mp_size_t xsize;\n  const unsigned char *digit_value;\n\n  ASSERT_ALWAYS (EOF == -1);\t/* FIXME: handle this by adding explicit */\n         \t\t\t/* comparisons of c and EOF before each  */\n\t\t\t\t/* read of digit_value[].  */\n\n  digit_value = digit_value_tab;\n  if (base > 36)\n    {\n      /* For bases > 36, use the collating sequence\n\t 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */\n      digit_value += 224;\n      if (base > 62)\n\treturn 0;\t\t/* too large base */\n    }\n\n  negative = 0;\n  if (c == '-')\n    {\n      negative = 1;\n      c = getc (stream);\n      nread++;\n    }\n\n  if (c == EOF || digit_value[c] >= (base == 0 ? 10 : base))\n    return 0;\t\t\t/* error if no digits */\n\n  /* If BASE is 0, try to find out the base by looking at the initial\n     characters.  */\n  if (base == 0)\n    {\n      base = 10;\n      if (c == '0')\n\t{\n\t  base = 8;\n\t  c = getc (stream);\n\t  nread++;\n\t  if (c == 'x' || c == 'X')\n\t    {\n\t      base = 16;\n\t      c = getc (stream);\n\t      nread++;\n\t    }\n\t  else if (c == 'b' || c == 'B')\n\t    {\n\t      base = 2;\n\t      c = getc (stream);\n\t      nread++;\n\t    }\n\t}\n    }\n\n  /* Skip leading zeros.  */\n  while (c == '0')\n    {\n      c = getc (stream);\n      nread++;\n    }\n\n  alloc_size = 100;\n  str = (char *) (*__gmp_allocate_func) (alloc_size);\n  str_size = 0;\n\n  while (c != EOF)\n    {\n      int dig;\n      dig = digit_value[c];\n      if (dig >= base)\n\tbreak;\n      if (str_size >= alloc_size)\n\t{\n\t  size_t old_alloc_size = alloc_size;\n\t  alloc_size = alloc_size * 3 / 2;\n\t  str = (char *) (*__gmp_reallocate_func) (str, old_alloc_size, alloc_size);\n\t}\n      str[str_size++] = dig;\n      c = getc (stream);\n    }\n  nread += str_size;\n\n  ungetc (c, stream);\n  nread--;\n\n  /* Make sure the string is not empty, mpn_set_str would fail.  */\n  if (str_size == 0)\n    {\n      x->_mp_size = 0;\n    }\n  else\n    {\n      xsize = (((mp_size_t)\n                (str_size / __mp_bases[base].chars_per_bit_exactly))\n               / GMP_NUMB_BITS + 2);\n      MPZ_REALLOC (x, xsize);\n\n      /* Convert the byte array in base BASE to our bignum format.  */\n      xsize = mpn_set_str (x->_mp_d, (unsigned char *) str, str_size, base);\n      x->_mp_size = negative ? -xsize : xsize;\n    }\n  (*__gmp_free_func) (str, alloc_size);\n  return nread;\n}\n\nsize_t\nmpz_inp_str (mpz_ptr x, FILE *stream, int base)\n{\n  int c;\n  size_t nread;\n\n  if (stream == 0)\n    stream = stdin;\n\n  nread = 0;\n\n  /* Skip whitespace.  */\n  do\n    {\n      c = getc (stream);\n      nread++;\n    }\n  while (isspace (c));\n\n  return mpz_inp_str_nowhite (x, stream, base, c, nread);\n}\n"
  },
  {
    "path": "mpz/invert.c",
    "content": "/* mpz_invert (inv, x, n).  Find multiplicative inverse of X in Z(N).\n   If X has an inverse, return non-zero and store inverse in INVERSE,\n   otherwise, return 0 and put garbage in INVERSE.\n\nCopyright 1996, 1997, 1998, 1999, 2000, 2001, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpz_invert (mpz_ptr inverse, mpz_srcptr x, mpz_srcptr n)\n{\n  mpz_t gcd, tmp;\n  mp_size_t xsize, nsize, size;\n  TMP_DECL;\n\n  xsize = SIZ (x);\n  nsize = SIZ (n);\n  xsize = ABS (xsize);\n  nsize = ABS (nsize);\n  size = MAX (xsize, nsize) + 1;\n\n  /* No inverse exists if the leftside operand is 0.  Likewise, no\n     inverse exists if the mod operand is 1.  */\n  if (xsize == 0 || (nsize == 1 && (PTR (n))[0] == 1))\n    return 0;\n\n  TMP_MARK;\n\n  MPZ_TMP_INIT (gcd, size);\n  MPZ_TMP_INIT (tmp, size);\n  mpz_gcdext (gcd, tmp, (mpz_ptr) 0, x, n);\n\n  /* If no inverse existed, return with an indication of that.  */\n  if (SIZ (gcd) != 1 || PTR(gcd)[0] != 1)\n    {\n      TMP_FREE;\n      return 0;\n    }\n\n  /* Make sure we return a positive inverse.  */\n  if (SIZ (tmp) < 0)\n    {\n      if (SIZ (n) < 0)\n\tmpz_sub (inverse, tmp, n);\n      else\n\tmpz_add (inverse, tmp, n);\n    }\n  else\n    mpz_set (inverse, tmp);\n\n  TMP_FREE;\n  return 1;\n}\n"
  },
  {
    "path": "mpz/ior.c",
    "content": "/* mpz_ior -- Logical inclusive or.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_ior (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)\n{\n  mp_srcptr op1_ptr, op2_ptr;\n  mp_size_t op1_size, op2_size;\n  mp_ptr res_ptr;\n  mp_size_t res_size;\n  mp_size_t i;\n  TMP_DECL;\n\n  TMP_MARK;\n  op1_size = op1->_mp_size;\n  op2_size = op2->_mp_size;\n\n  op1_ptr = op1->_mp_d;\n  op2_ptr = op2->_mp_d;\n  res_ptr = res->_mp_d;\n\n  if (op1_size >= 0)\n    {\n      if (op2_size >= 0)\n\t{\n\t  if (op1_size >= op2_size)\n\t    {\n\t      if (res->_mp_alloc < op1_size)\n\t\t{\n\t\t  _mpz_realloc (res, op1_size);\n\t\t  op1_ptr = op1->_mp_d;\n\t\t  op2_ptr = op2->_mp_d;\n\t\t  res_ptr = res->_mp_d;\n\t\t}\n\n\t      if (res_ptr != op1_ptr)\n\t\tMPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,\n\t\t\t  op1_size - op2_size);\n\t      for (i = op2_size - 1; i >= 0; i--)\n\t\tres_ptr[i] = op1_ptr[i] | op2_ptr[i];\n\t      res_size = op1_size;\n\t    }\n\t  else\n\t    {\n\t      if (res->_mp_alloc < op2_size)\n\t\t{\n\t\t  _mpz_realloc (res, op2_size);\n\t\t  op1_ptr = op1->_mp_d;\n\t\t  op2_ptr = op2->_mp_d;\n\t\t  res_ptr = res->_mp_d;\n\t\t}\n\n\t      if (res_ptr != op2_ptr)\n\t\tMPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,\n\t\t\t  op2_size - op1_size);\n\t      for (i = op1_size - 1; i >= 0; i--)\n\t\tres_ptr[i] = op1_ptr[i] | op2_ptr[i];\n\t      res_size = op2_size;\n\t    }\n\n\t  res->_mp_size = res_size;\n\t  return;\n\t}\n      else /* op2_size < 0 */\n\t{\n\t  /* Fall through to the code at the end of the function.  */\n\t}\n    }\n  else\n    {\n      if (op2_size < 0)\n\t{\n\t  mp_ptr opx;\n\t  mp_limb_t cy;\n\n\t  /* Both operands are negative, so will be the result.\n\t     -((-OP1) | (-OP2)) = -(~(OP1 - 1) | ~(OP2 - 1)) =\n\t     = ~(~(OP1 - 1) | ~(OP2 - 1)) + 1 =\n\t     = ((OP1 - 1) & (OP2 - 1)) + 1      */\n\n\t  op1_size = -op1_size;\n\t  op2_size = -op2_size;\n\n\t  res_size = MIN (op1_size, op2_size);\n\n\t  /* Possible optimization: Decrease mpn_sub precision,\n\t     as we won't use the entire res of both.  */\n\t  opx = (mp_ptr) TMP_ALLOC (res_size * BYTES_PER_MP_LIMB);\n\t  mpn_sub_1 (opx, op1_ptr, res_size, (mp_limb_t) 1);\n\t  op1_ptr = opx;\n\n\t  opx = (mp_ptr) TMP_ALLOC (res_size * BYTES_PER_MP_LIMB);\n\t  mpn_sub_1 (opx, op2_ptr, res_size, (mp_limb_t) 1);\n\t  op2_ptr = opx;\n\n\t  if (res->_mp_alloc < res_size)\n\t    {\n\t      _mpz_realloc (res, res_size);\n\t      res_ptr = res->_mp_d;\n\t      /* Don't re-read OP1_PTR and OP2_PTR.  They point to\n\t\t temporary space--never to the space RES->_mp_d used\n\t\t to point to before reallocation.  */\n\t    }\n\n\t  /* First loop finds the size of the result.  */\n\t  for (i = res_size - 1; i >= 0; i--)\n\t    if ((op1_ptr[i] & op2_ptr[i]) != 0)\n\t      break;\n\t  res_size = i + 1;\n\n\t  if (res_size != 0)\n\t    {\n\t      /* Second loop computes the real result.  */\n\t      for (i = res_size - 1; i >= 0; i--)\n\t\tres_ptr[i] = op1_ptr[i] & op2_ptr[i];\n\n\t      cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);\n\t      if (cy)\n\t\t{\n\t\t  res_ptr[res_size] = cy;\n\t\t  res_size++;\n\t\t}\n\t    }\n\t  else\n\t    {\n\t      res_ptr[0] = 1;\n\t      res_size = 1;\n\t    }\n\n\t  res->_mp_size = -res_size;\n\t  TMP_FREE;\n\t  return;\n\t}\n      else\n\t{\n\t  /* We should compute -OP1 | OP2.  Swap OP1 and OP2 and fall\n\t     through to the code that handles OP1 | -OP2.  */\n          MPZ_SRCPTR_SWAP (op1, op2);\n          MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);\n\t}\n    }\n\n  {\n    mp_ptr opx;\n    mp_limb_t cy;\n    mp_size_t res_alloc;\n    mp_size_t count;\n\n    /* Operand 2 negative, so will be the result.\n       -(OP1 | (-OP2)) = -(OP1 | ~(OP2 - 1)) =\n       = ~(OP1 | ~(OP2 - 1)) + 1 =\n       = (~OP1 & (OP2 - 1)) + 1      */\n\n    op2_size = -op2_size;\n\n    res_alloc = op2_size;\n\n    opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);\n    mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);\n    op2_ptr = opx;\n    op2_size -= op2_ptr[op2_size - 1] == 0;\n\n    if (res->_mp_alloc < res_alloc)\n      {\n\t_mpz_realloc (res, res_alloc);\n\top1_ptr = op1->_mp_d;\n\tres_ptr = res->_mp_d;\n\t/* Don't re-read OP2_PTR.  It points to temporary space--never\n\t   to the space RES->_mp_d used to point to before reallocation.  */\n      }\n\n    if (op1_size >= op2_size)\n      {\n\t/* We can just ignore the part of OP1 that stretches above OP2,\n\t   because the result limbs are zero there.  */\n\n\t/* First loop finds the size of the result.  */\n\tfor (i = op2_size - 1; i >= 0; i--)\n\t  if ((~op1_ptr[i] & op2_ptr[i]) != 0)\n\t    break;\n\tres_size = i + 1;\n\tcount = res_size;\n      }\n    else\n      {\n\tres_size = op2_size;\n\n\t/* Copy the part of OP2 that stretches above OP1, to RES.  */\n\tMPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size);\n\tcount = op1_size;\n      }\n\n    if (res_size != 0)\n      {\n\t/* Second loop computes the real result.  */\n\tfor (i = count - 1; i >= 0; i--)\n\t  res_ptr[i] = ~op1_ptr[i] & op2_ptr[i];\n\n\tcy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);\n\tif (cy)\n\t  {\n\t    res_ptr[res_size] = cy;\n\t    res_size++;\n\t  }\n      }\n    else\n      {\n\tres_ptr[0] = 1;\n\tres_size = 1;\n      }\n\n    res->_mp_size = -res_size;\n  }\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/iset.c",
    "content": "/* mpz_init_set (src_integer) -- Make a new multiple precision number with\n   a value copied from SRC_INTEGER.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_init_set (mpz_ptr w, mpz_srcptr u)\n{\n  mp_ptr wp, up;\n  mp_size_t usize, size;\n\n  usize = u->_mp_size;\n  size = ABS (usize);\n\n  w->_mp_alloc = MAX (size, 1);\n  w->_mp_d = (mp_ptr) (*__gmp_allocate_func) (w->_mp_alloc * BYTES_PER_MP_LIMB);\n\n  wp = w->_mp_d;\n  up = u->_mp_d;\n\n  MPN_COPY (wp, up, size);\n  w->_mp_size = usize;\n\n#ifdef __CHECKER__\n  /* let the low limb look initialized, for the benefit of mpz_get_ui etc */\n  if (size == 0)\n    wp[0] = 0;\n#endif\n}\n"
  },
  {
    "path": "mpz/iset_d.c",
    "content": "/* mpz_init_set_d(integer, val) -- Initialize and assign INTEGER with a double\n   value VAL.\n\nCopyright 1996, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_init_set_d (mpz_ptr dest, double val)\n{\n  dest->_mp_alloc = 1;\n  dest->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);\n  dest->_mp_size = 0;\n  mpz_set_d (dest, val);\n}\n"
  },
  {
    "path": "mpz/iset_si.c",
    "content": "/* mpz_init_set_si(dest,val) -- Make a new multiple precision in DEST and\n   assign VAL to the new number.\n\nCopyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_init_set_si (mpz_ptr dest, mpir_si val)\n{\n  mp_size_t size;\n  mp_limb_t vl;\n\n  dest->_mp_alloc = 1;\n  dest->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);\n\n  vl = (mp_limb_t) (mpir_ui) (val >= 0 ? val : -val);\n\n  dest->_mp_d[0] = vl & GMP_NUMB_MASK;\n  size = vl != 0;\n\n#if GMP_NAIL_BITS != 0\n  if (vl > GMP_NUMB_MAX)\n    {\n      MPZ_REALLOC (dest, 2);\n      dest->_mp_d[1] = vl >> GMP_NUMB_BITS;\n      size = 2;\n    }\n#endif\n\n  dest->_mp_size = val >= 0 ? size : -size;\n}\n"
  },
  {
    "path": "mpz/iset_str.c",
    "content": "/* mpz_init_set_str(string, base) -- Convert the \\0-terminated string\n   STRING in base BASE to a multiple precision integer.  Return a mpz\n   structure representing the integer.  Allow white space in the\n   string.  If BASE == 0 determine the base in the C standard way,\n   i.e.  0xhh...h means base 16, 0oo...o means base 8, otherwise\n   assume base 10.\n\nCopyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpz_init_set_str (mpz_ptr x, const char *str, int base)\n{\n  x->_mp_alloc = 1;\n  x->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);\n\n  /* if str has no digits mpz_set_str leaves x->_mp_size unset */\n  x->_mp_size = 0;\n\n#ifdef __CHECKER__\n  /* let the low limb look initialized, for the benefit of mpz_get_ui etc */\n  x->_mp_d[0] = 0;\n#endif\n\n  return mpz_set_str (x, str, base);\n}\n"
  },
  {
    "path": "mpz/iset_sx.c",
    "content": "/* \nmpz_init_set_sx(z, v) -- create a new z and set it to the intmax_t value v\n\nCopyright 2011 Brian Gladman\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n#ifdef HAVE_STDINT_H\n#include <stdint.h>\n#endif\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#ifdef HAVE_STDINT_H\n\n#define NLIMBS ((8 * SIZEOF_UINTMAX_T + GMP_NUMB_BITS  - 1) / GMP_NUMB_BITS)\n\nvoid\nmpz_init_set_sx (mpz_ptr z, intmax_t v)\n{   uintmax_t i, uv = (v < 0 ? -v : v);\n\n    z->_mp_d = (mp_ptr) (*__gmp_allocate_func) (MAX(1, NLIMBS) * BYTES_PER_MP_LIMB);\n    z->_mp_alloc = MAX(1, NLIMBS);\n\n#if NLIMBS == 1\n    z->_mp_d[0] = (mp_limb_t)uv;\n    z->_mp_size = v < 0 ? -NLIMBS : v ? NLIMBS : 0;\n#else\n    for( i = 0 ; i < NLIMBS && uv; ++i )\n    {\n        z->_mp_d[i] = uv & GMP_NUMB_MASK;\n        uv >>= GMP_NUMB_BITS;\n    }\n    z->_mp_size = v < 0 ? -i : v ? i : 0;\n#endif\n}\n\n#endif\n"
  },
  {
    "path": "mpz/iset_ui.c",
    "content": "/* mpz_init_set_ui(dest,val) -- Make a new multiple precision in DEST and\n   assign VAL to the new number.\n\nCopyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2004 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_init_set_ui (mpz_ptr dest, mpir_ui val)\n{\n  mp_size_t size;\n\n  dest->_mp_alloc = 1;\n  dest->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);\n\n  dest->_mp_d[0] = val & GMP_NUMB_MASK;\n  size = val != 0;\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (val > GMP_NUMB_MAX)\n    {\n      MPZ_REALLOC (dest, 2);\n      dest->_mp_d[1] = val >> GMP_NUMB_BITS;\n      size = 2;\n    }\n#endif\n\n  dest->_mp_size = size;\n}\n"
  },
  {
    "path": "mpz/iset_ux.c",
    "content": "/*\nmpz_init_set_ux(z, v) -- create a new z and set it to the uintmax_t value v\n\nCopyright 2011  Brian Gladman\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n#ifdef HAVE_STDINT_H\n#include <stdint.h>\n#endif\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#ifdef HAVE_STDINT_H\n\n#define NLIMBS ((8 * SIZEOF_UINTMAX_T + GMP_NUMB_BITS  - 1) / GMP_NUMB_BITS)\n\nvoid\nmpz_init_set_ux (mpz_ptr z, uintmax_t v)\n{   uintmax_t i, uv;\n\n    z->_mp_d = (mp_ptr) (*__gmp_allocate_func) (MAX(1, NLIMBS) * BYTES_PER_MP_LIMB);\n    z->_mp_alloc = MAX(1, NLIMBS);\n\n#if NLIMBS == 1\n    z->_mp_d[0] = (mp_limb_t)v;\n    z->_mp_size = (v ? NLIMBS : 0);\n#else\n    for( i = 0, uv = v ; i < NLIMBS && uv ; ++i )\n    {\n        z->_mp_d[i] = uv & GMP_NUMB_MASK;\n        uv >>= GMP_NUMB_BITS;\n    }\n    z->_mp_size = (v ? i : 0);\n#endif\n}\n\n#endif\n"
  },
  {
    "path": "mpz/jacobi.c",
    "content": "/* mpz_jacobi, mpz_legendre, mpz_kronecker -- mpz/mpz Jacobi symbols.\n\nCopyright 2000, 2001, 2002, 2005, 2010, 2011, 2012 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify it\nunder the terms of the GNU Lesser General Public License as published by the\nFree Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\nFITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\nfor more details.\n\nYou should have received a copy of the GNU Lesser General Public License along\nwith the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* This code does triple duty as mpz_jacobi, mpz_legendre and\n   mpz_kronecker. For ABI compatibility, the link symbol is\n   __gmpz_jacobi, not __gmpz_kronecker, even though the latter would\n   be more logical.\n\n   mpz_jacobi could assume b is odd, but the improvements from that seem\n   small compared to other operations, and anything significant should be\n   checked at run-time since we'd like odd b to go fast in mpz_kronecker\n   too.\n\n   mpz_legendre could assume b is an odd prime, but knowing this doesn't\n   present any obvious benefits.  Result 0 wouldn't arise (unless \"a\" is a\n   multiple of b), but the checking for that takes little time compared to\n   other operations.\n\n   Enhancements:\n\n   mpn_bdiv_qr should be used instead of mpn_tdiv_qr.\n\n*/\n\nint\nmpz_jacobi (mpz_srcptr a, mpz_srcptr b)\n{\n  mp_srcptr  asrcp, bsrcp;\n  mp_size_t  asize, bsize;\n  mp_limb_t  alow, blow;\n  mp_ptr     ap, bp;\n  unsigned   btwos;\n  int        result_bit1;\n  int        res;\n  TMP_DECL;\n\n  asize = SIZ(a);\n  asrcp = PTR(a);\n  alow = asrcp[0];\n\n  bsize = SIZ(b);\n  bsrcp = PTR(b);\n  blow = bsrcp[0];\n\n  /* The MPN jacobi functions require positive a and b, and b odd. So\n     we must to handle the cases of a or b zero, then signs, and then\n     the case of even b.\n  */\n\n  if (bsize == 0)\n    /* (a/0) = [ a = 1 or a = -1 ] */\n    return JACOBI_LS0 (alow, asize);\n\n  if (asize == 0)\n    /* (0/b) = [ b = 1 or b = - 1 ] */\n    return JACOBI_0LS (blow, bsize);\n\n  if ( (((alow | blow) & 1) == 0))\n    /* Common factor of 2 ==> (a/b) = 0 */\n    return 0;\n\n  if (bsize < 0)\n    {\n      /* (a/-1) = -1 if a < 0, +1 if a >= 0 */\n      result_bit1 = (asize < 0) << 1;\n      bsize = -bsize;\n    }\n  else\n    result_bit1 = 0;\n\n  JACOBI_STRIP_LOW_ZEROS (result_bit1, alow, bsrcp, bsize, blow);\n\n  count_trailing_zeros (btwos, blow);\n  blow >>= btwos;\n\n  if (bsize > 1 && btwos > 0)\n    {\n      mp_limb_t b1 = bsrcp[1];\n      blow |= b1 << (GMP_NUMB_BITS - btwos);\n      if (bsize == 2 && (b1 >> btwos) == 0)\n\tbsize = 1;\n    }\n\n  if (asize < 0)\n    {\n      /* (-1/b) = -1 iff b = 3 (mod 4) */\n      result_bit1 ^= JACOBI_N1B_BIT1(blow);\n      asize = -asize;\n    }\n\n  JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, asrcp, asize, alow);\n\n  /* Ensure asize >= bsize. Take advantage of the generalized\n     reciprocity law (a/b*2^n) = (b*2^n / a) * RECIP(a,b) */\n\n  if (asize < bsize)\n    {\n      MPN_SRCPTR_SWAP (asrcp, asize, bsrcp, bsize);\n      MP_LIMB_T_SWAP (alow, blow);\n\n      /* NOTE: The value of alow (old blow) is a bit subtle. For this code\n\t path, we get alow as the low, always odd, limb of shifted A. Which is\n\t what we need for the reciprocity update below.\n\n\t However, all other uses of alow assumes that it is *not*\n\t shifted. Luckily, alow matters only when either\n\n\t + btwos > 0, in which case A is always odd\n\n\t + asize == bsize == 1, in which case this code path is never\n\t   taken. */\n\n      count_trailing_zeros (btwos, blow);\n      blow >>= btwos;\n\n      if (bsize > 1 && btwos > 0)\n\t{\n\t  mp_limb_t b1 = bsrcp[1];\n\t  blow |= b1 << (GMP_NUMB_BITS - btwos);\n\t  if (bsize == 2 && (b1 >> btwos) == 0)\n\t    bsize = 1;\n\t}\n\n      result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);\n    }\n\n  if (bsize == 1)\n    {\n      result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);\n\n      if (blow == 1)\n\treturn JACOBI_BIT1_TO_PN (result_bit1);\n\n      if (asize > 1)\n\tJACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow);\n\n      return mpn_jacobi_base (alow, blow, result_bit1);\n    }\n\n  /* Allocation strategy: For A, we allocate a working copy only for A % B, but\n     when A is much larger than B, we have to allocate space for the large\n     quotient. We use the same area, pointed to by bp, for both the quotient\n     A/B and the working copy of B. */\n\n  TMP_MARK;\n\n  if (asize >= 2*bsize)\n    TMP_ALLOC_LIMBS_2 (ap, bsize, bp, asize - bsize + 1);\n  else\n    TMP_ALLOC_LIMBS_2 (ap, bsize, bp, bsize);\n\n  /* In the case of even B, we conceptually shift out the powers of two first,\n     and then divide A mod B. Hence, when taking those powers of two into\n     account, we must use alow *before* the division. Doing the actual division\n     first is ok, because the point is to remove multiples of B from A, and\n     multiples of 2^k B are good enough. */\n  if (asize > bsize)\n    mpn_tdiv_qr (bp, ap, 0, asrcp, asize, bsrcp, bsize);\n  else\n    MPN_COPY (ap, asrcp, bsize);\n\n  if (btwos > 0)\n    {\n      result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);\n\n      ASSERT_NOCARRY (mpn_rshift (bp, bsrcp, bsize, btwos));\n      bsize -= (ap[bsize-1] | bp[bsize-1]) == 0;\n    }\n  else\n    MPN_COPY (bp, bsrcp, bsize);\n\n  ASSERT (blow == bp[0]);\n  res = mpn_jacobi_n (ap, bp, bsize,\n\t\t      mpn_jacobi_init (ap[0], blow, (result_bit1>>1) & 1));\n\n  TMP_FREE;\n  return res;\n}\n"
  },
  {
    "path": "mpz/kronsz.c",
    "content": "/* mpz_si_kronecker -- long+mpz Kronecker/Jacobi symbol.\n\nCopyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\nint\nmpz_si_kronecker (mpir_si a, mpz_srcptr b)\n{\n  mp_srcptr  b_ptr;\n  mp_limb_t  b_low;\n  mp_size_t  b_size;\n  mp_size_t  b_abs_size;\n  mp_limb_t  a_limb, b_rem;\n  unsigned   twos;\n  int        result_bit1;\n\n#if GMP_NUMB_BITS < BITS_PER_UI\n  if (a > GMP_NUMB_MAX || a < -GMP_NUMB_MAX)\n    {\n      mp_limb_t  alimbs[2];\n      mpz_t      az;\n      ALLOC(az) = numberof (alimbs);\n      PTR(az) = alimbs;\n      mpz_set_si (az, a);\n      return mpz_kronecker (az, b);\n    }\n#endif\n\n  b_size = SIZ (b);\n  if (b_size == 0)\n    return JACOBI_S0 (a);  /* (a/0) */\n\n  /* account for the effect of the sign of b, then ignore it */\n  result_bit1 = JACOBI_BSGN_SS_BIT1 (a, b_size);\n\n  b_ptr = PTR(b);\n  b_low = b_ptr[0];\n  b_abs_size = ABS (b_size);\n\n  if ((b_low & 1) != 0)\n    {\n      /* b odd */\n\n      result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_low);\n      a_limb = (mpir_ui) ABS(a);\n\n      if ((a_limb & 1) == 0)\n        {\n          /* (0/b)=1 for b=+/-1, 0 otherwise */\n          if (a_limb == 0)\n            return (b_abs_size == 1 && b_low == 1);\n\n          /* a even, b odd */\n          count_trailing_zeros (twos, a_limb);\n          a_limb >>= twos;\n          /* (a*2^n/b) = (a/b) * twos(n,a) */\n          result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b_low);\n        }\n    }\n  else\n    {\n      /* (even/even)=0, and (0/b)=0 for b!=+/-1 */\n      if ((a & 1) == 0)\n        return 0;\n\n      /* a odd, b even\n\n         Establish shifted b_low with valid bit1 for ASGN and RECIP below.\n         Zero limbs stripped are acounted for, but zero bits on b_low are\n         not because they remain in {b_ptr,b_abs_size} for the\n         JACOBI_MOD_OR_MODEXACT_1_ODD. */\n\n      JACOBI_STRIP_LOW_ZEROS (result_bit1, a, b_ptr, b_abs_size, b_low);\n      if ((b_low & 1) == 0)\n        {\n          if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))\n            {\n              /* need b_ptr[1] to get bit1 in b_low */\n              if (b_abs_size == 1)\n                {\n                  /* (a/0x80000000) = (a/2)^(BPML-1) */\n                  if ((GMP_NUMB_BITS % 2) == 0)\n                    result_bit1 ^= JACOBI_TWO_U_BIT1 (a);\n                  return JACOBI_BIT1_TO_PN (result_bit1);\n                }\n\n              /* b_abs_size > 1 */\n              b_low = b_ptr[1] << 1;\n            }\n          else\n            {\n              count_trailing_zeros (twos, b_low);\n              b_low >>= twos;\n            }\n        }\n\n      result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_low);\n      a_limb = (unsigned long) ABS(a);\n    }\n\n  if (a_limb == 1)\n    return JACOBI_BIT1_TO_PN (result_bit1);  /* (1/b)=1 */\n\n  /* (a/b*2^n) = (b*2^n mod a / a) * recip(a,b) */\n  JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, b_rem, b_ptr, b_abs_size, a_limb);\n  result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a_limb, b_low);\n  return mpn_jacobi_base (b_rem, a_limb, result_bit1);\n}\n"
  },
  {
    "path": "mpz/kronuz.c",
    "content": "/* mpz_ui_kronecker -- ulong+mpz Kronecker/Jacobi symbol.\n\nCopyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\nint\nmpz_ui_kronecker (mpir_ui a, mpz_srcptr b)\n{\n  mp_srcptr  b_ptr;\n  mp_limb_t  b_low;\n  int        b_abs_size;\n  mp_limb_t  b_rem;\n  int        twos;\n  int        result_bit1;\n\n  /* (a/-1)=1 when a>=0, so the sign of b is ignored */\n  b_abs_size = ABSIZ (b);\n\n  if (b_abs_size == 0)\n    return JACOBI_U0 (a);  /* (a/0) */\n\n  if (a > GMP_NUMB_MAX)\n    {\n      mp_limb_t  alimbs[2];\n      mpz_t      az;\n      ALLOC(az) = numberof (alimbs);\n      PTR(az) = alimbs;\n      mpz_set_ui (az, a);\n      return mpz_kronecker (az, b);\n    }\n\n  b_ptr = PTR(b);\n  b_low = b_ptr[0];\n  result_bit1 = 0;\n\n  if (! (b_low & 1))\n    {\n      /* (0/b)=0 for b!=+/-1; and (even/even)=0 */\n      if (! (a & 1))\n        return 0;\n\n      /* a odd, b even\n\n         Establish shifted b_low with valid bit1 for the RECIP below.  Zero\n         limbs stripped are accounted for, but zero bits on b_low are not\n         because they remain in {b_ptr,b_abs_size} for\n         JACOBI_MOD_OR_MODEXACT_1_ODD. */\n\n      JACOBI_STRIP_LOW_ZEROS (result_bit1, a, b_ptr, b_abs_size, b_low);\n      if (! (b_low & 1))\n        {\n          if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))\n            {\n              /* need b_ptr[1] to get bit1 in b_low */\n              if (b_abs_size == 1)\n                {\n                  /* (a/0x80...00) == (a/2)^(NUMB-1) */\n                  if ((GMP_NUMB_BITS % 2) == 0)\n                    {\n                      /* JACOBI_STRIP_LOW_ZEROS does nothing to result_bit1\n                         when GMP_NUMB_BITS is even, so it's still 0. */\n                      ASSERT (result_bit1 == 0);\n                      result_bit1 = JACOBI_TWO_U_BIT1 (a);\n                    }\n                  return JACOBI_BIT1_TO_PN (result_bit1);\n                }\n\n              /* b_abs_size > 1 */\n              b_low = b_ptr[1] << 1;\n            }\n          else\n            {\n              count_trailing_zeros (twos, b_low);\n              b_low >>= twos;\n            }\n        }\n    }\n  else\n    {\n      if (a == 0)        /* (0/b)=1 for b=+/-1, 0 otherwise */\n        return (b_abs_size == 1 && b_low == 1);\n\n      if (! (a & 1))\n        {\n          /* a even, b odd */\n          count_trailing_zeros (twos, a);\n          a >>= twos;\n          /* (a*2^n/b) = (a/b) * (2/a)^n */\n          result_bit1 = JACOBI_TWOS_U_BIT1 (twos, b_low);\n        }\n    }\n\n  if (a == 1)\n    return JACOBI_BIT1_TO_PN (result_bit1);  /* (1/b)=1 */\n\n  /* (a/b*2^n) = (b*2^n mod a / a) * RECIP(a,b) */\n  JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, b_rem, b_ptr, b_abs_size, a);\n  result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b_low);\n  return mpn_jacobi_base (b_rem, (mp_limb_t) a, result_bit1);\n}\n"
  },
  {
    "path": "mpz/kronzs.c",
    "content": "/* mpz_kronecker_si -- mpz+long Kronecker/Jacobi symbol.\n\nCopyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* After the absolute value of b is established it's treated as an unsigned\n   long, because 0x80..00 doesn't fit in a signed long. */\n\nint\nmpz_kronecker_si (mpz_srcptr a, mpir_si b)\n{\n  mp_srcptr  a_ptr;\n  mp_size_t  a_size;\n  mp_limb_t  a_rem, b_limb;\n  int        result_bit1;\n\n  a_size = SIZ(a);\n  if (a_size == 0)\n    return JACOBI_0S (b);\n\n#if GMP_NUMB_BITS < BITS_PER_UI\n  if (b > GMP_NUMB_MAX || b < -GMP_NUMB_MAX)\n    {\n      mp_limb_t  blimbs[2];\n      mpz_t      bz;\n      ALLOC(bz) = numberof (blimbs);\n      PTR(bz) = blimbs;\n      mpz_set_si (bz, b);\n      return mpz_kronecker (a, bz);\n    }\n#endif\n\n  result_bit1 = JACOBI_BSGN_SS_BIT1 (a_size, b);\n  b_limb = (mpir_ui) ABS (b);\n  a_ptr = PTR(a);\n\n  if ((b_limb & 1) == 0)\n    {\n      mp_limb_t  a_low = a_ptr[0];\n      int        twos;\n\n      if (b_limb == 0)\n        return JACOBI_LS0 (a_low, a_size);   /* (a/0) */\n\n      if (! (a_low & 1))\n        return 0;  /* (even/even)=0 */\n\n      /* (a/2)=(2/a) for a odd */\n      count_trailing_zeros (twos, b_limb);\n      b_limb >>= twos;\n      result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, a_low);\n    }\n\n  if (b_limb == 1)\n    return JACOBI_BIT1_TO_PN (result_bit1);  /* (a/1)=1 for any a */\n\n  result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a_size, b_limb);\n  a_size = ABS(a_size);\n\n  /* (a/b) = (a mod b / b) */\n  JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, a_rem, a_ptr, a_size, b_limb);\n  return mpn_jacobi_base (a_rem, b_limb, result_bit1);\n}\n\n\n"
  },
  {
    "path": "mpz/kronzu.c",
    "content": "/* mpz_kronecker_ui -- mpz+ulong Kronecker/Jacobi symbol.\n\nCopyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\nint\nmpz_kronecker_ui (mpz_srcptr a, mpir_ui b)\n{\n  mp_srcptr  a_ptr;\n  mp_size_t  a_size;\n  mp_limb_t  a_rem;\n  int        result_bit1;\n\n  a_size = SIZ(a);\n  if (a_size == 0)\n    return JACOBI_0U (b);\n\n  if (b > GMP_NUMB_MAX)\n    {\n      mp_limb_t  blimbs[2];\n      mpz_t      bz;\n      ALLOC(bz) = numberof (blimbs);\n      PTR(bz) = blimbs;\n      mpz_set_ui (bz, b);\n      return mpz_kronecker (a, bz);\n    }\n\n  a_ptr = PTR(a);\n  if ((b & 1) != 0)\n    {\n      result_bit1 = JACOBI_ASGN_SU_BIT1 (a_size, b);\n    }\n  else\n    {\n      mp_limb_t  a_low = a_ptr[0];\n      int        twos;\n\n      if (b == 0)\n        return JACOBI_LS0 (a_low, a_size);   /* (a/0) */\n\n      if (! (a_low & 1))\n        return 0;  /* (even/even)=0 */\n\n      /* (a/2)=(2/a) for a odd */\n      count_trailing_zeros (twos, b);\n      b >>= twos;\n      result_bit1 = (JACOBI_TWOS_U_BIT1 (twos, a_low)\n                     ^ JACOBI_ASGN_SU_BIT1 (a_size, b));\n    }\n\n  if (b == 1)\n    return JACOBI_BIT1_TO_PN (result_bit1);  /* (a/1)=1 for any a */\n\n  a_size = ABS(a_size);\n\n  /* (a/b) = (a mod b / b) */\n  JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, a_rem, a_ptr, a_size, b);\n  return mpn_jacobi_base (a_rem, (mp_limb_t) b, result_bit1);\n}\n"
  },
  {
    "path": "mpz/lcm.c",
    "content": "/* mpz_lcm -- mpz/mpz least common multiple.\n\nCopyright 1996, 2000, 2001, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\nvoid\nmpz_lcm (mpz_ptr r, mpz_srcptr u, mpz_srcptr v)\n{\n  mpz_t g;\n  mp_size_t usize, vsize, size;\n  TMP_DECL;\n\n  usize = SIZ (u);\n  vsize = SIZ (v);\n  if (usize == 0 || vsize == 0)\n    {\n      SIZ (r) = 0;\n      return;\n    }\n  usize = ABS (usize);\n  vsize = ABS (vsize);\n\n  if (vsize == 1)\n    {\n      mp_limb_t  vl, gl, c;\n      mp_srcptr  up;\n      mp_ptr     rp;\n\n    one:\n      MPZ_REALLOC (r, usize+1);\n\n      up = PTR(u);\n      vl = PTR(v)[0];\n      gl = mpn_gcd_1 (up, usize, vl);\n      vl /= gl;\n\n      rp = PTR(r);\n      c = mpn_mul_1 (rp, up, usize, vl);\n      rp[usize] = c;\n      usize += (c != 0);\n      SIZ(r) = usize;\n      return;\n    }\n\n  if (usize == 1)\n    {\n      usize = vsize;\n      MPZ_SRCPTR_SWAP (u, v);\n      goto one;\n    }\n\n  TMP_MARK;\n  size = MAX (usize, vsize);\n  MPZ_TMP_INIT (g, size);\n\n  mpz_gcd (g, u, v);\n  mpz_divexact (g, u, g);\n  mpz_mul (r, g, v);\n\n  SIZ (r) = ABS (SIZ (r));\t/* result always positive */\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/lcm_ui.c",
    "content": "/* mpz_lcm_ui -- least common multiple of mpz and ulong.\n\nCopyright 2001, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\nvoid\nmpz_lcm_ui (mpz_ptr r, mpz_srcptr u, mpir_ui v)\n{\n  mp_size_t      usize;\n  mp_srcptr      up;\n  mp_ptr         rp;\n  mpir_ui         g;\n  mp_limb_t      c;\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (v > GMP_NUMB_MAX)\n    {\n      mpz_t vz;\n      mp_limb_t vlimbs[2];\n      vlimbs[0] = v & GMP_NUMB_MASK;\n      vlimbs[1] = v >> GMP_NUMB_BITS;\n      PTR(vz) = vlimbs;\n      SIZ(vz) = 2;\n      mpz_lcm (r, u, vz);\n      return;\n    }\n#endif\n\n  /* result zero if either operand zero */\n  usize = SIZ(u);\n  if (usize == 0 || v == 0)\n    {\n      SIZ(r) = 0;\n      return;\n    }\n  usize = ABS(usize);\n\n  MPZ_REALLOC (r, usize+1);\n\n  up = PTR(u);\n  g = (mpir_ui) mpn_gcd_1 (up, usize, (mp_limb_t) v);\n  v /= g;\n\n  rp = PTR(r);\n  c = mpn_mul_1 (rp, up, usize, (mp_limb_t) v);\n  rp[usize] = c;\n  usize += (c != 0);\n  SIZ(r) = usize;\n}\n"
  },
  {
    "path": "mpz/likely_prime_p.c",
    "content": "/*\nCopyright 2009 Jason Moxham\nCopyright (C) 2008 Peter Shrimpton\nCopyright (C) 2008, 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#if GMP_LIMB_BITS == 32 || GMP_LIMB_BITS == 64\n\n#if GMP_LIMB_BITS == 32\n#define D_BITS 31\n#else \n#define D_BITS 53\n#endif\n\ntypedef struct pair_s\n{\n\tmp_limb_t x, y;\n} n_pair_t;\n\n#define r_shift(in, shift) \\\n   ((shift == GMP_LIMB_BITS) ? CNST_LIMB(0) : ((in)>>(shift)))\n\nmp_limb_t mpir_sqrt(mp_limb_t r)\n{\n\tmp_limb_t res, is;\n\n#if GMP_LIMB_BITS == 32\n    float x, z;\n\tunion {\n\t  float f;\n      mp_limb_t l;\n\t} temp;\n\n   mp_limb_t bits32 = (r & GMP_LIMB_HIGHBIT);\n   mp_limb_t r2;\n   /* algorithm can't handle 32 bits */\n   if (bits32) \n   {\n      r2 = r;\n      r >>= 2;\n   }\n\n\ttemp.f = (float) r;\n\ttemp.l = (CNST_LIMB(0xbe6ec85e) - temp.l)>>1; // estimate of 1/sqrt(y) \n\tx =  temp.f;\n\tz =  (float) r*0.5;                        \n   x = (1.5*x) - (x*x)*(x*z);\n\tx = (1.5*x) - (x*x)*(x*z);\n\tx = (1.5*x) - (x*x)*(x*z);\n\tx = (1.5*x) - (x*x)*(x*z);\n   is = (mp_limb_t) (x*(double) r);\n   res =  is + ((is+1)*(is+1) <= r);\n   if (!bits32) return res - (res*res > r);\n   else \n   {\n      mp_limb_t sq;\n      res = res - (res*res > r);\n      res <<= 1;\n      sq = res*res;\n      res = res - ((sq > r2) || ((sq ^ r2) & GMP_LIMB_HIGHBIT));\n      sq = (res + 1)*(res + 1);\n      res = res + ((sq <= r2) && !((sq ^ r2) & GMP_LIMB_HIGHBIT));\n      return res;\n   }\n#else\n\t\n\tdouble x, z;\n\tunion {\n\t  double f;\n      mp_limb_t l;\n\t} temp;\n\n   mp_limb_t bits64 = (r & GMP_LIMB_HIGHBIT);\n   mp_limb_t r2;\n   /* algorithm can't handle 64 bits */\n   if (bits64) \n   {\n      r2 = r;\n      r >>= 2;\n   }\n\n\ttemp.f = (double) r;\n\ttemp.l = (CNST_LIMB(0xbfcdd90a00000000) - temp.l)>>1; /* estimate of 1/sqrt(y) */ \n\tx =  temp.f;\n\tz =  (double) r*0.5;                        \n   x = (1.5*x) - (x*x)*(x*z);\n\tx = (1.5*x) - (x*x)*(x*z);\n\tx = (1.5*x) - (x*x)*(x*z);\n\tx = (1.5*x) - (x*x)*(x*z);\n\tx = (1.5*x) - (x*x)*(x*z);\n   is = (mp_limb_t) (x*(double) r);\n   res =  is + ((is+1)*(is+1) <= r);\n   if (!bits64) return res - (res*res > r);\n   else \n   {\n      mp_limb_t sq;\n      res = res - (res*res > r);\n      res <<= 1;\n      sq = res*res;\n      res = res - ((sq > r2) || ((sq ^ r2) & GMP_LIMB_HIGHBIT));\n      sq = (res + 1)*(res + 1);\n      res = res + ((sq <= r2) && !((sq ^ r2) & GMP_LIMB_HIGHBIT));\n      return res;\n   }\n#endif\n}\n\nstatic\ndouble n_precompute_inverse(mp_limb_t n)\n{\n   return (double) 1 / (double) n;\n}\n\nstatic\nunsigned int BIT_COUNT(mp_limb_t x)\n{\n   unsigned int zeros = GMP_LIMB_BITS;\n   if (x) count_leading_zeros(zeros, x);\n   return GMP_LIMB_BITS - zeros;\n}\n\nstatic\nint mod64[64] = {1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,\n                 0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,\n                 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0}; \n\nstatic\nint mod65[65] = {1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,\n                 0,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,\n                 0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1};\n\nstatic\nint mod63[63] = {1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,\n                 0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,\n                 0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0};\n   \nstatic\nint n_is_square(mp_limb_t x)\n{\n\tmp_limb_t sq;\n\tif (!mod64[x%CNST_LIMB(64)]) return 0;\n   if (!mod63[x%CNST_LIMB(63)]) return 0;\n   if (!mod65[x%CNST_LIMB(65)]) return 0;\n\n   sq = mpir_sqrt(x); \n   \n   return (x == sq*sq);\n}\n\nstatic\nmp_limb_t n_preinvert_limb(mp_limb_t n)\n{\n   mp_limb_t norm, ninv;\n\n   count_leading_zeros(norm, n);\n   invert_limb(ninv, n<<norm);\n\n   return ninv;\n}\n\nstatic\nmp_limb_t n_addmod(mp_limb_t x, mp_limb_t y, mp_limb_t n)\n{\n   if (n - y > x) return x + y;\n   else return x + y - n;\n}\n\nstatic inline\nmp_limb_t n_submod(mp_limb_t x, mp_limb_t y, mp_limb_t n)\n{\n   if (y > x) return x - y + n;\n   else return x - y;\n}\n\nstatic\nmp_limb_t n_mod2_preinv(mp_limb_t a, mp_limb_t n, mp_limb_t ninv)\n{\n   unsigned int norm;\n   mp_limb_t q, r;\n\n   count_leading_zeros(norm, n);\n   udiv_qrnnd_preinv(q, r, r_shift(a, GMP_LIMB_BITS-norm), a<<norm, n<<norm, ninv);\n   return (r>>norm);\n}\n\nstatic\nmp_limb_t n_ll_mod_preinv(mp_limb_t a_hi, mp_limb_t a_lo, \n                                             mp_limb_t n, mp_limb_t ninv)\n{\n   mp_limb_t q, r, norm;\n   \n   if (a_hi > n) a_hi = n_mod2_preinv(a_hi, n, ninv);\n\n   count_leading_zeros(norm, n);\n   \n   udiv_qrnnd_preinv(q, r, (a_hi<<norm) + \n      r_shift(a_lo, GMP_LIMB_BITS-norm), a_lo<<norm, n<<norm, ninv);\n\n   return (r>>norm);\n}\n\nstatic\nmp_limb_t n_mulmod_precomp(mp_limb_t a, mp_limb_t b, mp_limb_t n, double npre)\n{\n   mp_limb_t quot = (mp_limb_t) ((double) a * (double) b * npre);\n   mp_limb_signed_t rem = a*b - quot*n;\n   if (rem < 0) \n   {\n      rem += n;\n      if (rem < 0) return rem + n;\n   } else if (rem >= n) return rem - n;\n   return rem;\n}\n\nstatic\nmp_limb_t n_mulmod2_preinv(mp_limb_t a, mp_limb_t b, mp_limb_t n, mp_limb_t ninv)\n{\n   mp_limb_t p1, p2;\n   \n   umul_ppmm(p1, p2, a, b);\n   \n   return n_ll_mod_preinv(p1, p2, n, ninv);\n}\n\nstatic\nmp_limb_t n_powmod_precomp(mp_limb_t a, mp_limb_t exp, mp_limb_t n, double npre)\n{\n   \n   mp_limb_t x, y;\n   mp_limb_t e;\n\n   if (n == CNST_LIMB(1)) return 0L;\n   e = exp;\n   \n   x = CNST_LIMB(1);\n   y = a;\n\n   while (e) \n   {\n      if (e & 1L) x = n_mulmod_precomp(x, y, n, npre);\n      e >>= 1;\n      if (e) y = n_mulmod_precomp(y, y, n, npre);\n   }\n\n   return x;\n}\n\nstatic\nmp_limb_t n_powmod2_preinv(mp_limb_t a, mp_limb_t exp, mp_limb_t n, mp_limb_t ninv)\n{   \n   mp_limb_t x, y;\n   mp_limb_t e;\n   \n   if (n == CNST_LIMB(1)) return CNST_LIMB(0);\n   e = exp;\n   \n   x = CNST_LIMB(1);\n   y = a;\n   while (e) \n   {\n      if (e & 1) x = n_mulmod2_preinv(x, y, n, ninv);\n      e = e >> 1;\n      if (e) y = n_mulmod2_preinv(y, y, n, ninv);\n   }\n\n   return x;\n} \n\nstatic\nmp_limb_t n_powmod(mp_limb_t a, mp_limb_signed_t exp, mp_limb_t n)\n{\n   double npre = n_precompute_inverse(n);\n\n   return n_powmod_precomp(a, exp, n, npre);\n}\n\nstatic\nmp_limb_t n_powmod2(mp_limb_t a, mp_limb_signed_t exp, mp_limb_t n)\n{\n   mp_limb_t ninv = n_preinvert_limb(n);\n\n   return n_powmod2_preinv(a, exp, n, ninv);\n}\n\nstatic\nmp_limb_t n_gcd(mp_limb_t x, mp_limb_t y)\n{\n   mp_limb_t u3, v3;\n   mp_limb_t quot, rem;\n   \n   u3 = x; v3 = y;\n   \n   if ((mp_limb_signed_t) (x & y) < 0L) /* x and y both have top bit set */ \n   {\n     quot=u3-v3;\n     u3 = v3;\n     v3 = quot;\n   }\n\n   while ((mp_limb_signed_t) (v3<<1) < 0L) /* second value has second msb set */\n   {\n     quot=u3-v3;\n     if (quot < v3)\n     {\n        u3 = v3;\n        v3 = quot;\n     } else if (quot < (v3<<1))\n     {  \n        u3 = v3;\n        v3 = quot-u3;\n     } else\n     {\n        u3 = v3;\n        v3 = quot-(u3<<1);\n     }\n   }\n   \n   while (v3) {\n      quot=u3-v3;\n      if (u3 < (v3<<2)) /* overflow not possible due to top 2 bits of v3 not being set */\n      {\n         if (quot < v3)\n         {\n            u3 = v3;\n            v3 = quot;\n         } else if (quot < (v3<<1))\n         {  \n            u3 = v3;\n            v3 = quot-u3;\n         } else\n         {\n            u3 = v3;\n            v3 = quot-(u3<<1);\n         }\n      } else\n      {\n         quot=u3/v3;\n         rem = u3 - v3*quot;\n         u3 = v3;\n         v3 = rem;\n      }\n   }\n   \n   return u3;\n}\n\nstatic\nmp_limb_t n_invmod(mp_limb_t x, mp_limb_t y)\n{\n   mp_limb_signed_t v1 = CNST_LIMB(0); \n   mp_limb_signed_t v2 = CNST_LIMB(1); \n   mp_limb_signed_t t2; \n   mp_limb_t u3, v3;\n   mp_limb_t quot, rem;\n   \n   u3 = y, v3 = x;\n   \n   if (v3 > u3)\n   {\n         rem = u3;\n         u3 = v3;\n         t2 = v2; v2 = v1; v1 = t2; v3 = rem;\n   }\n   \n   if ((mp_limb_signed_t) (y & x) < 0L) /* y and x both have top bit set */ \n   {\n     quot=u3-v3;\n     t2 = v2; \n     u3 = v3;\n     v2 = v1 - v2; v1 = t2; v3 = quot;\n   }\n\n   while ((mp_limb_signed_t) (v3<<1) < 0L) /* second value has second msb set */\n   {\n     quot=u3-v3;\n     if (quot < v3)\n     {\n        t2 = v2; \n        u3 = v3;\n        v2 = v1 - v2; v1 = t2; v3 = quot;\n     } else if (quot < (v3<<1))\n     {  \n        u3 = v3;\n        t2 = v2; v2 = v1 - (v2<<1); v1 = t2; v3 = quot-u3;\n     } else\n     {\n        u3 = v3;\n        t2 = v2; v2 = v1 - 3*v2; v1 = t2; v3 = quot-(u3<<1);\n     }\n   }\n   \n   while (v3) {\n      quot=u3-v3;\n      if (u3 < (v3<<2)) /* overflow not possible due to top 2 bits of v3 not being set */\n      {\n         if (quot < v3)\n         {\n            t2 = v2; \n            u3 = v3;\n            v2 = v1 - v2; v1 = t2; v3 = quot;\n         } else if (quot < (v3<<1))\n         {  \n            u3 = v3;\n            t2 = v2; v2 = v1 - (v2<<1); v1 = t2; v3 = quot-u3;\n         } else\n         {\n            u3 = v3;\n            t2 = v2; v2 = v1 - 3*v2; v1 = t2; v3 = quot-(u3<<1);\n         }\n      } else\n      {\n         quot=u3/v3;\n         rem = u3 - v3*quot;\n         u3 = v3;\n         t2 = v2; v2 = v1 - quot*v2; v1 = t2; v3 = rem;\n      }\n   }\n\n   if (v1 < 0L) v1 += y;\n      \n   return v1;\n}\n\nstatic\nint n_jacobi(mp_limb_signed_t x, mp_limb_t y)\n{\n\tmp_limb_t a, b, temp;\n\tint s, exp;\n\t\n   a = x;\n\tb = y;\n\ts = 1;\n\n\tif (x < 0L)\n\t{\n\t\tif (((b - 1)/2)%2 == CNST_LIMB(1))\n\t\t   s = -s;\n\t\ta = -x;\n\t} \n\n   if ((a < b) && (b != CNST_LIMB(1)))\n   {\n      if (a == CNST_LIMB(0)) return 0;\n      \n      temp = a;\n      a = b;\n      b = temp;\n\n      count_trailing_zeros(exp, b);\n\t   b>>=exp;\n\n      if (((exp*(a*a - 1))/8)%2 == CNST_LIMB(1)) /* only want values mod 8, */\n\t\t   s = -s;                        /* so overflows don't matter here */\n\n\t\tif ((((a - 1)*(b - 1))/4)%2 == CNST_LIMB(1)) /* only want values mod 4, */\n\t\t   s = -s;                          /* so overflows don't matter here */\n   }\n\n\twhile (b != CNST_LIMB(1))\n\t{\n      if ((a>>2) < b)\n      {\n         temp = a - b;\n         a = b;         \n         if (temp < b)\n            b = temp;\n         else if (temp < (b<<1)) \n            b = temp - a;\n         else\n            b = temp - (a<<1);\n      } else\n      {\n         temp = (a%b);\n         a = b;\n         b = temp;\n      }\n\n      if (b == CNST_LIMB(0)) return 0;\n      \n      count_trailing_zeros(exp, b);\n\t   b>>=exp;\n\n      if (((exp*(a*a - 1))/8)%2 == CNST_LIMB(1)) /* only want values mod 8, */ \n\t\t   s = -s;                        /* so overflows don't matter here */\n\n\t\tif ((((a - 1)*(b - 1))/4)%2 == CNST_LIMB(1)) /* only want values mod 4, */\n\t\t   s = -s;                          /* so overflows don't matter here */\n\t}\n\n\treturn s;\n}\n\nstatic\nint n_is_pseudoprime_fermat(mp_limb_t n, mp_limb_t i)\n{\n\tif (BIT_COUNT(n) <= D_BITS) return (n_powmod(i, n - 1, n) == CNST_LIMB(1));\n   else \n   {\n      if ((mp_limb_signed_t) (n - 1) < 0L)\n      {\n         mp_limb_t temp = n_powmod2(i, (n - 1)/2, n);\n         return (n_powmod2(temp, 2, n) == CNST_LIMB(1));\n      } else\n      {\n         return (n_powmod2(i, n - 1, n) == CNST_LIMB(1));\n      }\n   }\n}\n\nstatic\nint n_is_strong_pseudoprime_precomp(mp_limb_t n, double npre, mp_limb_t a, mp_limb_t d)\n{\n   mp_limb_t t = d;\n   mp_limb_t y;\n      \n   y = n_powmod_precomp(a, t, n, npre);\n\n   if (y == CNST_LIMB(1)) return 1;\n   t <<= 1;\n\n   while ((t != n - 1) && (y != n - 1))\n   {\n      y = n_mulmod_precomp(y, y, n, npre);\n      t <<= 1;\n   }\n   \n   return (y == n - 1);\n}\n\nstatic\nint n_is_strong_pseudoprime2_preinv(mp_limb_t n, mp_limb_t ninv, mp_limb_t a, mp_limb_t d)\n{\n   mp_limb_t t = d;\n   mp_limb_t y;\n      \n   y = n_powmod2_preinv(a, t, n, ninv);\n\n   if (y == CNST_LIMB(1)) return 1;\n   t <<= 1;\n\n   while ((t != n - 1) && (y != n - 1))\n   {\n      y = n_mulmod2_preinv(y, y, n, ninv);\n      t <<= 1;\n   }\n   \n   return (y == n - 1);\n}\n\nstatic\nn_pair_t fchain_precomp(mp_limb_t m, mp_limb_t n, double npre)\n{\n\tn_pair_t current, old;\n\tint length;\n\tmp_limb_t power, xy, xx, yy;\n\t\n\told.x = CNST_LIMB(2);\n\told.y = n - CNST_LIMB(3);\n\t\n\tlength = BIT_COUNT(m);\n\tpower = (CNST_LIMB(1)<<(length-1));\n\n\tfor ( ; length > 0; length--)\n\t{\n\t\txy = n_mulmod_precomp(old.x, old.y, n, npre);\n\t\t\n\t\txy = n_addmod(xy, CNST_LIMB(3), n);\n\t\t\n\t\tif (m & power)\n\t\t{\n\t\t\tcurrent.y = n_submod(n_mulmod_precomp(old.y, old.y, n, npre), CNST_LIMB(2), n);\n\t\t\tcurrent.x = xy;\n\t\t} else \n\t\t{\n\t\t\tcurrent.x = n_submod(n_mulmod_precomp(old.x, old.x, n, npre), CNST_LIMB(2), n);\n\t\t\tcurrent.y = xy;\n\t\t}\n\n\t\tpower >>= 1;\n\t\told = current;\n\t}\n\n\treturn current;\n}\n\nstatic\nn_pair_t fchain2_preinv(mp_limb_t m, mp_limb_t n, mp_limb_t ninv)\n{\n\tn_pair_t current, old;\n\tint length;\n\tmp_limb_t power, xy, xx, yy;\n\t\n\told.x = CNST_LIMB(2);\n\told.y = n - CNST_LIMB(3);\n\t\n\tlength = BIT_COUNT(m);\n\tpower = (CNST_LIMB(1)<<(length-1));\n\n\tfor ( ; length > 0; length--)\n\t{\n\t\txy = n_mulmod2_preinv(old.x, old.y, n, ninv);\n\t\t\n\t\txy = n_addmod(xy, CNST_LIMB(3), n);\n\t\t\n\t\tif (m & power)\n\t\t{\n\t\t\tcurrent.y = n_submod(n_mulmod2_preinv(old.y, old.y, n, ninv), CNST_LIMB(2), n);\n\t\t\tcurrent.x = xy;\n\t\t} else \n\t\t{\n\t\t\tcurrent.x = n_submod(n_mulmod2_preinv(old.x, old.x, n, ninv), CNST_LIMB(2), n);\n\t\t\tcurrent.y = xy;\n\t\t}\n\n\t\tpower >>= 1;\n\t\told = current;\n\t}\n\n\treturn current;\n}\n\nstatic\nint n_is_pseudoprime_fibonacci(mp_limb_t n)\n{\n\tmp_limb_t m, left, right;\n\tn_pair_t V;\n\n   if (ABS((mp_limb_signed_t) n) <= CNST_LIMB(3))\n   {\n      if (n >= CNST_LIMB(2)) return 1;\n      return 0;\n   }\n\n\tm = (n - n_jacobi(CNST_LIMB(5), n))/2; /* no overflow as (5/n) = 0 for n = 2^64 - 1 */\n\n   if (BIT_COUNT(n) <= D_BITS)\n   {\n      double npre = n_precompute_inverse(n);\n\t\n      V = fchain_precomp(m, n, npre);\n\t   return (n_mulmod_precomp(n - CNST_LIMB(3), V.x, n, npre) \n           == n_mulmod_precomp(CNST_LIMB(2), V.y, n, npre));\n   } else\n   {\n      mp_limb_t ninv = n_preinvert_limb(n);\n\t\n      V = fchain2_preinv(m, n, ninv);\n\t   return (n_mulmod2_preinv(n - CNST_LIMB(3), V.x, n, ninv) \n           == n_mulmod2_preinv(CNST_LIMB(2), V.y, n, ninv));\n  }\n}\n\nstatic\nn_pair_t lchain_precomp(mp_limb_t m, mp_limb_t a, mp_limb_t n, double npre)\n{\n\tn_pair_t current, old;\n\tint length, i;\n\tmp_limb_t power, xy, xx, yy;\n\t\n\told.x = CNST_LIMB(2);\n\told.y = a;\n\t\n\tlength = BIT_COUNT(m);\n\tpower = (CNST_LIMB(1)<<(length - 1));\n\n\tfor (i = 0; i < length; i++)\n\t{\n\t\txy = n_submod(n_mulmod_precomp(old.x, old.y, n, npre), a, n);\n\t\t\n      if (m & power)\n\t\t{\n\t\t\tyy = n_submod(n_mulmod_precomp(old.y, old.y, n, npre), CNST_LIMB(2), n);\n\t\t\tcurrent.x = xy;\n\t\t\tcurrent.y = yy;\n\t\t} else \n\t\t{\n\t\t\txx = n_submod(n_mulmod_precomp(old.x, old.x, n, npre), CNST_LIMB(2), n);\n\t\t\tcurrent.x = xx;\n\t\t\tcurrent.y = xy;\n\t\t}\n\n\t\tpower >>= 1;\n\t\told = current;\n\t}\n\n\treturn current;\n}\n\nstatic\nn_pair_t lchain2_preinv(mp_limb_t m, mp_limb_t a, mp_limb_t n, mp_limb_t ninv)\n{\n\tn_pair_t current, old;\n\tint length, i;\n\tmp_limb_t power, xy, xx, yy;\n\t\n\told.x = CNST_LIMB(2);\n\told.y = a;\n\t\n\tlength = BIT_COUNT(m);\n\tpower = (CNST_LIMB(1)<<(length - 1));\n\n\tfor (i = 0; i < length; i++)\n\t{\n\t\txy = n_submod(n_mulmod2_preinv(old.x, old.y, n, ninv), a, n);\n\t\t\n      if (m & power)\n\t\t{\n\t\t\tyy = n_submod(n_mulmod2_preinv(old.y, old.y, n, ninv), CNST_LIMB(2), n);\n\t\t\tcurrent.x = xy;\n\t\t\tcurrent.y = yy;\n\t\t} else \n\t\t{\n\t\t\txx = n_submod(n_mulmod2_preinv(old.x, old.x, n, ninv), CNST_LIMB(2), n);\n\t\t\tcurrent.x = xx;\n\t\t\tcurrent.y = xy;\n\t\t}\n\n\t\tpower >>= 1;\n\t\told = current;\n\t}\n\n\treturn current;\n}\n\nstatic\nint n_is_pseudoprime_lucas(mp_limb_t n)\n{\n\tint i, D, Q;\n\tmp_limb_t A;\n\tmp_limb_t left, right;\n\tn_pair_t V;\n\n\tD = 0;\n\tQ = 0;\n\t\n\tif (((n % 2) == 0) || (ABS((mp_limb_signed_t) n) <= 2))\n\t{\n\t\tif (n == CNST_LIMB(2)) return 1;\n\t\telse return 0;\n\t}\n\t\n\tfor (i = 0; i < 100; i++)\n\t{\n\t\tD = 5 + 2*i;\n\t\tif (n_gcd(D, n%D) != CNST_LIMB(1)) \n      { \n         if (n == D) continue;\n         else return 0;\n      }\n\t\tif (i % 2 == 1) D = -D;\n\t\tif (n_jacobi(D, n) == -1) break;\n\t}\n\n\tif (i == 100)\n\t{\n\t\tif (n_is_square(n)) return -1;\n\t\telse return 1;\n\t}\n   \n   Q = (1 - D)/4;\n   if (Q < 0)\n   {\n      if (n < CNST_LIMB(52))\n      {\n         while (Q < 0) Q += n;\n         A = n_submod(n_invmod(Q, n), CNST_LIMB(2), n);\n      } else\n         A = n_submod(n_invmod(Q + n, n), CNST_LIMB(2), n);\n   } else\n   {\n      if (n < CNST_LIMB(52))\n      {\n         while (Q >= n) Q -= n;\n         A = n_submod(n_invmod(Q, n), CNST_LIMB(2), n);\n      } else\n         A = n_submod(n_invmod(Q, n), CNST_LIMB(2), n);\n   }\n\n   if (BIT_COUNT(n) <= D_BITS)\n   {\n      double npre = n_precompute_inverse(n);\n\t   V = lchain_precomp(n + 1, A, n, npre);\n\t\n\t   left = n_mulmod_precomp(A, V.x, n, npre);\n\t   right = n_mulmod_precomp(2, V.y, n, npre);\n   } else\n   {\n      mp_limb_t ninv = n_preinvert_limb(n);\n\t   V = lchain2_preinv(n + 1, A, n, ninv);\n\t\n\t   left = n_mulmod_precomp(A, V.x, n, ninv);\n\t   right = n_mulmod_precomp(2, V.y, n, ninv);\n   }\n\t\n\treturn (left == right);\n}\n\nint mpir_is_likely_prime_BPSW(mp_limb_t n)\n{\n\tif (n <= CNST_LIMB(1)) return 0;\n\n   if ((n & CNST_LIMB(1)) == CNST_LIMB(0))\n   {\n      if (n == CNST_LIMB(2)) return 1;\n      return 0;\n   }\n   \n   if (((n % CNST_LIMB(10)) == CNST_LIMB(3)) || ((n % CNST_LIMB(10)) == CNST_LIMB(7)))\n\t{\n\t\tif (n_is_pseudoprime_fermat(n, 2) == 0) return 0;\n      \n\t\treturn n_is_pseudoprime_fibonacci(n);\t\n\t} else\n\t{\n\t\tmp_limb_t d;\n\n\t\td = n - CNST_LIMB(1);\n\t\twhile ((d & CNST_LIMB(1)) == CNST_LIMB(0)) d >>= 1;\n\n\t\tif (BIT_COUNT(n) <= D_BITS)\n      {\n         double npre = n_precompute_inverse(n);\n         if (n_is_strong_pseudoprime_precomp(n, npre, 2L, d) == 0) return 0;\n      } else\n      {\n         mp_limb_t ninv = n_preinvert_limb(n);\n         if (n_is_strong_pseudoprime2_preinv(n, ninv, 2L, d) == 0) return 0;\n      }\n\n\t\treturn (n_is_pseudoprime_lucas(n) == 1);\n\t}\n}\n\n#endif /* GMP_LIMB_BITS */\n\n/* \n   Could have another parameter to specify what \"likely\" means\n   i.e. for factoring, for RSA or to state that we have already done \n   trial div\n*/\n\n/* \n   could call it mpz_likely_composite_p then when true return more info, \n   i.e. a factor \n*/\nint\nmpz_likely_prime_p (mpz_srcptr N, gmp_randstate_t STATE, mpir_ui td)\n{\n  int d, t, r;\n  unsigned long tdlim, i;\n  mpz_t base, nm1, x, e, n;\n\n  ALLOC (n) = ALLOC(N);\n  SIZ (n) = ABSIZ(N);\n  PTR (n) = PTR(N);\t\t/* fake up an absolute value that we don't have de-allocate */\n\n  /* algorithm does not handle small values, get rid of them here */\n  if (mpz_cmp_ui (n, 2) == 0 || mpz_cmp_ui (n, 3) == 0)\n    return 1;\n\n  if (mpz_cmp_ui (n, 5) < 0 || mpz_even_p (n))\n    return 0;\n\n#if GMP_LIMB_BITS == 64 || GMP_LIMB_BITS == 32\n  if (SIZ(n) == 1)\n  {\n     return mpir_is_likely_prime_BPSW(PTR(n)[0]);\n  }\n#endif\n\n/* \n   For factoring purposes we assume we know nothing about N i.e. it is \n   a random integer. Therefore it has a good chance of factoring by small \n   divisiors. So try trial division as its fast and it checks small \n   divisors. Checking for other divisors is not worth it even if the test \n   is fast as we have random integer so only small divisors are common\n   enough. Remember this is not exact so it doesn't matter if we miss a \n   few divisors\n*/\n  tdlim = mpz_sizeinbase(n, 2);\n  tdlim = MAX(1000, tdlim);\n  \n  d = mpz_trial_division(n, 3, tdlim);\n  \n  if (d != 0)\n  {\n     if (mpz_cmp_ui(n, d) == 0)\n        return 1;\n     \n     return 0;\n  }\n  \n  if (mpz_cmp_ui (n, tdlim * tdlim) < 0)\n     return 1;\t/* if tdlim*tdlim overflows, n is not a single limb so can't be true */\n  \n  ASSERT (mpz_odd_p(n));\n  ASSERT (mpz_cmp_ui (n, 5) >= 0);\t/* so we can choose a base */\n\n  /* now do strong pseudoprime test */\n  /* get random base, for now choose any size, later choose a small one */\n  mpz_init (base);\n  mpz_init_set (nm1, n);\n  mpz_sub_ui (nm1, nm1, 1);\n\n  mpz_init (e);\n  mpz_init (x);\n     \n  r = 1;\n  \n  for (i = 0; i < 10; i++) /* try LP_ITERS random bases */\n  {\n     do\n     {\n        mpz_urandomm(base, STATE, nm1);\n     } while (mpz_cmp_ui(base, 1) <= 0);\n     \n     /* \n        Base is 2 to n - 2 which implies n >= 4. Only really want a \n        small base, and ignore the rare base = n - 1 condition etc.\n     */\n     t = mpz_scan1(nm1, 0);\t/* 2^t divides nm1 */\n\n     ASSERT(t > 0);\n     \n     mpz_tdiv_q_2exp(e, nm1, t);\t/* e = nm1/2^t */\n     mpz_powm(x, base, e, n);\t/* x = base^e mod n */\n     \n     if (mpz_cmp_ui(x, 1) == 0) \n        continue;\n     \n     if (mpz_cmp(x, nm1) == 0) \n        continue;\n     \n     for (r = 0, t = t - 1; t > 0; t--)\n     {\n        mpz_mul(x, x, x);\n        mpz_mod(x, x, n);\n        \n        if (mpz_cmp(x, nm1) == 0)\n\t     {\n\t        r = 1;\n\t        break;\n\t     }\n        \n        if (mpz_cmp_ui (x, 1) == 0)\n\t        break;\n     }\n     \n     if (r == 1) \n        continue;\n     \n     break;\n  }\n\n  mpz_clear (e);\n  mpz_clear (base);\n  mpz_clear (nm1);\n  mpz_clear (x);\n  \n  return r;\n}\n"
  },
  {
    "path": "mpz/limbs_finish.c",
    "content": "/* mpz_finish_limbs -- Update mpz after writing to the limb array.\n\nCopyright 2013 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_limbs_finish (mpz_ptr x, mp_size_t xs)\n{\n  mp_size_t xn = ABS(xs);\n  MPN_NORMALIZE (PTR (x), xn);\n  SIZ (x) = xs < 0 ? -xn : xn;\n}\n"
  },
  {
    "path": "mpz/limbs_modify.c",
    "content": "/* mpz_limbs_modify -- Read-and-modify access to the mpn-style limb array.\n\nCopyright 2013 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmp_ptr\nmpz_limbs_modify (mpz_ptr x, mp_size_t n)\n{\n  ASSERT (n > 0);\n  return MPZ_REALLOC (x, n);\n}\n"
  },
  {
    "path": "mpz/limbs_read.c",
    "content": "/* mpz_limbs_read -- Read access to the mpn-style limb array.\n\nCopyright 2013 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmp_srcptr\nmpz_limbs_read (mpz_srcptr x)\n{\n  return PTR(x);\n}\n"
  },
  {
    "path": "mpz/limbs_write.c",
    "content": "/* mpz_limbs_write -- Write access to the mpn-style limb array.\n\nCopyright 2013 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmp_ptr\nmpz_limbs_write (mpz_ptr x, mp_size_t n)\n{\n  ASSERT (n > 0);\n  return MPZ_NEWALLOC (x, n);\n}\n"
  },
  {
    "path": "mpz/lucnum2_ui.c",
    "content": "/* mpz_lucnum2_ui -- calculate Lucas numbers.\n\nCopyright 2001, 2003, 2005, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nvoid\nmpz_lucnum2_ui (mpz_ptr ln, mpz_ptr lnsub1, mpir_ui n)\n{\n  mp_ptr     lp, l1p, f1p;\n  mp_size_t  size;\n  mp_limb_t  c;\n  TMP_DECL;\n\n  ASSERT (ln != lnsub1);\n\n  /* handle small n quickly, and hide the special case for L[-1]=-1 */\n  if (n <= FIB_TABLE_LUCNUM_LIMIT)\n    {\n      mp_limb_t  f  = FIB_TABLE (n);\n      mp_limb_t  f1 = FIB_TABLE ((int) n - 1);\n\n      /* L[n] = F[n] + 2F[n-1] */\n      PTR(ln)[0] = f + 2*f1;\n      SIZ(ln) = 1;\n\n      /* L[n-1] = 2F[n] - F[n-1], but allow for L[-1]=-1 */\n      PTR(lnsub1)[0] = (n == 0 ? 1 : 2*f - f1);\n      SIZ(lnsub1) = (n == 0 ? -1 : 1);\n\n      return;\n    }\n\n  TMP_MARK;\n  size = MPN_FIB2_SIZE (n);\n  f1p = TMP_ALLOC_LIMBS (size);\n\n  lp  = MPZ_REALLOC (ln,     size+1);\n  l1p = MPZ_REALLOC (lnsub1, size+1);\n\n  size = mpn_fib2_ui (l1p, f1p, n);\n\n  /* L[n] = F[n] + 2F[n-1] */\n#if HAVE_NATIVE_mpn_addlsh1_n\n  c = mpn_addlsh1_n (lp, l1p, f1p, size);\n#else\n  c = mpn_lshift (lp, f1p, size, 1);\n  c += mpn_add_n (lp, lp, l1p, size);\n#endif\n  lp[size] = c;\n  SIZ(ln) = size + (c != 0);\n\n  /* L[n-1] = 2F[n] - F[n-1] */\n  c = mpn_lshift (l1p, l1p, size, 1);\n  c -= mpn_sub_n (l1p, l1p, f1p, size);\n  ASSERT ((mp_limb_signed_t) c >= 0);\n  l1p[size] = c;\n  SIZ(lnsub1) = size + (c != 0);\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/lucnum_ui.c",
    "content": "/* mpz_lucnum_ui -- calculate Lucas number.\n\nCopyright 2001, 2003, 2005, 2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* change this to \"#define TRACE(x) x\" for diagnostics */\n#define TRACE(x)\n\n\n/* Notes:\n\n   For the +4 in L[2k+1] when k is even, all L[4m+3] == 4, 5 or 7 mod 8, so\n   there can't be an overflow applying +4 to just the low limb (since that\n   would leave 0, 1, 2 or 3 mod 8).\n\n   For the -4 in L[2k+1] when k is even, it seems (no proof) that\n   L[3*2^(b-2)-3] == -4 mod 2^b, so for instance with a 32-bit limb\n   L[0xBFFFFFFD] == 0xFFFFFFFC mod 2^32, and this implies a borrow from the\n   low limb.  Obviously L[0xBFFFFFFD] is a huge number, but it's at least\n   conceivable to calculate it, so it probably should be handled.\n\n   For the -2 in L[2k] with k even, it seems (no proof) L[2^(b-1)] == -1 mod\n   2^b, so for instance in 32-bits L[0x80000000] has a low limb of\n   0xFFFFFFFF so there would have been a borrow.  Again L[0x80000000] is\n   obviously huge, but probably should be made to work.  */\n\nvoid\nmpz_lucnum_ui (mpz_ptr ln, mpir_ui n)\n{\n  mp_size_t  lalloc, xalloc, lsize, xsize;\n  mp_ptr     lp, xp;\n  mp_limb_t  c;\n  int        zeros;\n  TMP_DECL;\n\n  TRACE (printf (\"mpn_lucnum_ui n=%lu\\n\", n));\n\n  if (n <= FIB_TABLE_LUCNUM_LIMIT)\n    {\n      /* L[n] = F[n] + 2F[n-1] */\n      PTR(ln)[0] = FIB_TABLE(n) + 2 * FIB_TABLE ((int) n - 1);\n      SIZ(ln) = 1;\n      return;\n    }\n\n  /* +1 since L[n]=F[n]+2F[n-1] might be 1 limb bigger than F[n], further +1\n     since square or mul used below might need an extra limb over the true\n     size */\n  lalloc = MPN_FIB2_SIZE (n) + 2;\n  lp = MPZ_REALLOC (ln, lalloc);\n\n  TMP_MARK;\n  xalloc = lalloc;\n  xp = TMP_ALLOC_LIMBS (xalloc);\n\n  /* Strip trailing zeros from n, until either an odd number is reached\n     where the L[2k+1] formula can be used, or until n fits within the\n     FIB_TABLE data.  The table is preferred of course.  */\n  zeros = 0;\n  for (;;)\n    {\n      if (n & 1)\n\t{\n\t  /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */\n\n\t  mp_size_t  yalloc, ysize;\n\t  mp_ptr     yp;\n\n\t  TRACE (printf (\"  initial odd n=%lu\\n\", n));\n\n\t  yalloc = MPN_FIB2_SIZE (n/2);\n\t  yp = TMP_ALLOC_LIMBS (yalloc);\n\t  ASSERT (xalloc >= yalloc);\n\n\t  xsize = mpn_fib2_ui (xp, yp, n/2);\n\n\t  /* possible high zero on F[k-1] */\n\t  ysize = xsize;\n\t  ysize -= (yp[ysize-1] == 0);\n\t  ASSERT (yp[ysize-1] != 0);\n\n\t  /* xp = 2*F[k] + F[k-1] */\n#if HAVE_NATIVE_mpn_addlsh1_n\n\t  c = mpn_addlsh1_n (xp, yp, xp, xsize);\n#else\n\t  c = mpn_lshift (xp, xp, xsize, 1);\n\t  c += mpn_add_n (xp, xp, yp, xsize);\n#endif\n\t  ASSERT (xalloc >= xsize+1);\n\t  xp[xsize] = c;\n\t  xsize += (c != 0);\n\t  ASSERT (xp[xsize-1] != 0);\n\n\t  ASSERT (lalloc >= xsize + ysize);\n\t  c = mpn_mul (lp, xp, xsize, yp, ysize);\n\t  lsize = xsize + ysize;\n\t  lsize -= (c == 0);\n\n\t  /* lp = 5*lp */\n#if HAVE_NATIVE_mpn_addlsh2_n\n\t  c = mpn_addlsh2_n (lp, lp, lp, lsize);\n#else\n\t  /* FIXME: Is this faster than mpn_mul_1 ? */\n\t  c = mpn_lshift (xp, lp, lsize, 2);\n\t  c += mpn_add_n (lp, lp, xp, lsize);\n#endif\n\t  ASSERT (lalloc >= lsize+1);\n\t  lp[lsize] = c;\n\t  lsize += (c != 0);\n\n\t  /* lp = lp - 4*(-1)^k */\n\t  if (n & 2)\n\t    {\n\t      /* no overflow, see comments above */\n\t      ASSERT (lp[0] <= MP_LIMB_T_MAX-4);\n\t      lp[0] += 4;\n\t    }\n\t  else\n\t    {\n\t      /* won't go negative */\n\t      MPN_DECR_U (lp, lsize, CNST_LIMB(4));\n\t    }\n\n\t  TRACE (mpn_trace (\"  l\",lp, lsize));\n\t  break;\n\t}\n\n      MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */\n      zeros++;\n      n /= 2;\n\n      if (n <= FIB_TABLE_LUCNUM_LIMIT)\n\t{\n\t  /* L[n] = F[n] + 2F[n-1] */\n\t  lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);\n\t  lsize = 1;\n\n\t  TRACE (printf (\"  initial small n=%lu\\n\", n);\n\t\t mpn_trace (\"  l\",lp, lsize));\n\t  break;\n\t}\n    }\n\n  for ( ; zeros != 0; zeros--)\n    {\n      /* L[2k] = L[k]^2 + 2*(-1)^k */\n\n      TRACE (printf (\"  zeros=%d\\n\", zeros));\n\n      ASSERT (xalloc >= 2*lsize);\n      mpn_sqr (xp, lp, lsize);\n      lsize *= 2;\n      lsize -= (xp[lsize-1] == 0);\n\n      /* First time around the loop k==n determines (-1)^k, after that k is\n\t always even and we set n=0 to indicate that.  */\n      if (n & 1)\n\t{\n\t  /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */\n\t  ASSERT (xp[0] <= MP_LIMB_T_MAX-2);\n\t  xp[0] += 2;\n\t  n = 0;\n\t}\n      else\n\t{\n\t  /* won't go negative */\n\t  MPN_DECR_U (xp, lsize, CNST_LIMB(2));\n\t}\n\n      MP_PTR_SWAP (xp, lp);\n      ASSERT (lp[lsize-1] != 0);\n    }\n\n  /* should end up in the right spot after all the xp/lp swaps */\n  ASSERT (lp == PTR(ln));\n  SIZ(ln) = lsize;\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/mfac_uiui.c",
    "content": "/* mpz_mfac_uiui(RESULT, N, M) -- Set RESULT to N!^(M) = N(N-M)(N-2M)...\n\nContributed to the GNU project by Marco Bodrato.\n\nCopyright 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/*************************************************************/\n/* Section macros: common macros, for swing/fac/bin (&sieve) */\n/*************************************************************/\n\n#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)\t\t\\\n  do {\t\t\t\t\t\t\t\t\\\n    if ((PR) > (MAX_PR)) {\t\t\t\t\t\\\n      (VEC)[(I)++] = (PR);\t\t\t\t\t\\\n      (PR) = (P);\t\t\t\t\t\t\\\n    } else\t\t\t\t\t\t\t\\\n      (PR) *= (P);\t\t\t\t\t\t\\\n  } while (0)\n\n/*********************************************************/\n/* Section oder factorials:                              */\n/*********************************************************/\n\n/* mpz_mfac_uiui (x, n, m) computes x = n!^(m) = n*(n-m)*(n-2m)*...   */\n\nvoid\nmpz_mfac_uiui (mpz_ptr x, mpir_ui n, mpir_ui m)\n{\n  ASSERT (n <= GMP_NUMB_MAX);\n  ASSERT (m != 0);\n\n  if (n < 3 || n - 3 < m - 1) { /* (n < 3 || n - 1 <= m || m == 0) */\n    PTR (x)[0] = n + (n == 0);\n    SIZ (x) = 1;\n  } else { /* m < n - 1 < GMP_NUMB_MAX */\n    mp_limb_t g, sn;\n    mpz_t     t;\n\n    sn = n;\n    g = mpn_gcd_1 (&sn, 1, m);\n    if (g != 1) { n/=g; m/=g; }\n\n    if (m <= 2) { /* fac or 2fac */\n      if (m == 1) {\n\tif (g > 2) {\n\t  mpz_init (t);\n\t  mpz_fac_ui (t, n);\n\t  sn = n;\n\t} else {\n\t  if (g == 2)\n\t    mpz_2fac_ui (x, n << 1);\n\t  else\n\t    mpz_fac_ui (x, n);\n\t  return;\n\t}\n      } else { /* m == 2 */\n\tif (g != 1) {\n\t  mpz_init (t);\n\t  mpz_2fac_ui (t, n);\n\t  sn = n / 2 + 1;\n\t} else {\n\t  mpz_2fac_ui (x, n);\n\t  return;\n\t}\n      }\n    } else { /* m >= 3, gcd(n,m) = 1 */\n      mp_limb_t *factors;\n      mp_limb_t prod, max_prod, j;\n      TMP_DECL;\n\n      sn = n / m + 1;\n\n      j = 0;\n      prod = n;\n      n -= m;\n      max_prod = GMP_NUMB_MAX / n;\n\n      TMP_MARK;\n      factors = TMP_ALLOC_LIMBS (sn / log_n_max (n) + 2);\n\n      for (; n > m; n -= m)\n\tFACTOR_LIST_STORE (n, prod, max_prod, factors, j);\n\n      factors[j++] = n;\n      factors[j++] = prod;\n\n      if (g > 1) {\n\tmpz_init (t);\n\tmpz_prodlimbs (t, factors, j);\n      } else\n\tmpz_prodlimbs (x, factors, j);\n\n      TMP_FREE;\n    }\n\n    if (g > 1) {\n      mpz_t p;\n\n      mpz_init (p);\n      mpz_ui_pow_ui (p, g, sn); /* g^sn */\n      mpz_mul (x, p, t);\n      mpz_clear (p);\n      mpz_clear (t);\n    }\n  }\n}\n"
  },
  {
    "path": "mpz/miller_rabin.c",
    "content": "/* mpz_millerrabin(n,reps) -- An implementation of the probabilistic primality\n   test found in Knuth's Seminumerical Algorithms book.  If the function\n   mpz_millerrabin() returns 0 then n is not prime.  If it returns 1, then n is\n   'probably' prime.  The probability of a false positive is (1/4)**reps, where\n   reps is the number of internal passes of the probabilistic algorithm.  Knuth\n   indicates that 25 passes are reasonable.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2005 Free\nSoftware Foundation, Inc.  Contributed by John Amanatides.\nCopyright 2011, Brian Gladman\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nstatic int\nmill_rab (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,\n             mpz_srcptr q, mpir_ui k)\n{\n  unsigned long int i;\n\n  mpz_powm (y, x, q, n);\n\n  if (mpz_cmp_ui (y, 1L) == 0 || mpz_cmp (y, nm1) == 0)\n    return 1;\n\n  for (i = 1; i < k; i++)\n    {\n      mpz_powm_ui (y, y, 2L, n);\n      if (mpz_cmp (y, nm1) == 0)\n\treturn 1;\n      if (mpz_cmp_ui (y, 1L) == 0)\n\treturn 0;\n    }\n  return 0;\n}\n\nint\nmpz_miller_rabin (mpz_srcptr n, int reps, gmp_randstate_t rnd)\n{\n  int r;\n  mpz_t nm1, nm3, x, y, q;\n  unsigned long int k;\n  int is_prime;\n  TMP_DECL;\n  TMP_MARK;\n\n  MPZ_TMP_INIT (nm1, SIZ (n) + 1);\n  mpz_sub_ui (nm1, n, 1L);\n\n  MPZ_TMP_INIT (x, SIZ (n) + 1);\n  MPZ_TMP_INIT (y, 2 * SIZ (n)); /* mpz_powm_ui needs excessive memory!!! */\n\n  /* Perform a Fermat test.  */\n  mpz_set_ui (x, 210L);\n  mpz_powm (y, x, nm1, n);\n  if (mpz_cmp_ui (y, 1L) != 0)\n    {\n      TMP_FREE;\n      return 0;\n    }\n\n  MPZ_TMP_INIT (q, SIZ (n));\n\n  /* Find q and k, where q is odd and n = 1 + 2**k * q.  */\n  k = mpz_scan1 (nm1, 0L);\n  mpz_tdiv_q_2exp (q, nm1, k);\n\n  /* n-3 */\n  MPZ_TMP_INIT (nm3, SIZ (n) + 1);\n  mpz_sub_ui (nm3, n, 3L);\n  ASSERT (mpz_cmp_ui (nm3, 1L) >= 0);\n\n  is_prime = 1;\n  for (r = 0; r < reps && is_prime; r++)\n    {\n      /* 2 to n-2 inclusive, don't want 1, 0 or -1 */\n      mpz_urandomm (x, rnd, nm3);\n      mpz_add_ui (x, x, 2L);\n\n      is_prime = mill_rab (n, nm1, x, y, q, k);\n    }\n\n  TMP_FREE;\n  return is_prime;\n}\n\n"
  },
  {
    "path": "mpz/millerrabin.c",
    "content": "/* mpz_millerrabin(n,reps) -- An implementation of the probabilistic primality\n   test found in Knuth's Seminumerical Algorithms book.  If the function\n   mpz_millerrabin() returns 0 then n is not prime.  If it returns 1, then n is\n   'probably' prime.  The probability of a false positive is (1/4)**reps, where\n   reps is the number of internal passes of the probabilistic algorithm.  Knuth\n   indicates that 25 passes are reasonable.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2005 Free\nSoftware Foundation, Inc.  Contributed by John Amanatides.\nCopyright 2011, Brian Gladman\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n//  This function is obsolete     4/12/2011\n\nint\nmpz_millerrabin (mpz_srcptr n, int reps)\n{\n  gmp_randstate_t rstate;\n  int is_prime;\n\n  gmp_randinit_default(rstate);\n  is_prime = mpz_miller_rabin(n, reps, rstate);\n  gmp_randclear(rstate);\n  return is_prime;\n}\n"
  },
  {
    "path": "mpz/mod.c",
    "content": "/* mpz_mod -- The mathematical mod function.\n\nCopyright 1991, 1993, 1994, 1995, 1996, 2001, 2002, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_mod (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)\n{\n  mp_size_t divisor_size = divisor->_mp_size;\n  mpz_t temp_divisor;\t\t/* N.B.: lives until function returns! */\n  TMP_DECL;\n\n  TMP_MARK;\n\n  /* We need the original value of the divisor after the remainder has been\n     preliminary calculated.  We have to copy it to temporary space if it's\n     the same variable as REM.  */\n  if (rem == divisor)\n    {\n      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));\n      mpz_set (temp_divisor, divisor);\n      divisor = temp_divisor;\n    }\n\n  mpz_tdiv_r (rem, dividend, divisor);\n\n  if (rem->_mp_size != 0)\n    {\n      if (dividend->_mp_size < 0)\n\t{\n\t  if (divisor->_mp_size < 0)\n\t    mpz_sub (rem, rem, divisor);\n\t  else\n\t    mpz_add (rem, rem, divisor);\n\t}\n    }\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/mul.c",
    "content": "/* mpz_mul -- Multiply two integers.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h> /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)\n{\n  mp_size_t usize = u->_mp_size;\n  mp_size_t vsize = v->_mp_size;\n  mp_size_t wsize;\n  mp_size_t sign_product;\n  mp_ptr up, vp;\n  mp_ptr wp;\n  mp_ptr free_me;\n  size_t free_me_size;\n  mp_limb_t cy_limb;\n  TMP_DECL;\n\n  sign_product = usize ^ vsize;\n  usize = ABS (usize);\n  vsize = ABS (vsize);\n\n  if ((usize == 0) || (vsize == 0))\n    {\n      SIZ(w) = 0;\n      return;\n    }\n\n#if HAVE_NATIVE_mpn_mul_2\n  if (vsize <= 2)\n    {\n      MPZ_REALLOC (w, usize+vsize);\n      wp = PTR(w);\n      if (vsize == 1)\n        cy_limb = mpn_mul_1 (wp, PTR(u), usize, PTR(v)[0]);\n      else\n        {\n          cy_limb = mpn_mul_2 (wp, PTR(u), usize, PTR(v));\n          usize++;\n        }\n      wp[usize] = cy_limb;\n      usize += (cy_limb != 0);\n      SIZ(w) = (sign_product >= 0 ? usize : -usize);\n      return;\n    }\n#else\n  if (vsize == 1)\n    {\n      MPZ_REALLOC (w, usize+1);\n      wp = PTR(w);\n      cy_limb = mpn_mul_1 (wp, PTR(u), usize, PTR(v)[0]);\n      wp[usize] = cy_limb;\n      usize += (cy_limb != 0);\n      SIZ(w) = (sign_product >= 0 ? usize : -usize);\n      return;\n    }\n#endif\n\n  wsize = usize + vsize;\n  \n  if ((wsize <= MUL_KARATSUBA_THRESHOLD) && (w != u) && (w != v))\n  {\n\t  MPZ_REALLOC (w, wsize);\n     wp = PTR(w);\n     if (usize == vsize)\n     {\n#if HAVE_NATIVE_mpn_sqr_basecase\n        if (PTR(u) == PTR(v)) \n           mpn_sqr_basecase(wp, PTR(u), usize);\n        else\n#endif\n           mpn_mul_basecase(wp, PTR(u), usize, PTR(v), vsize);\n     } else if (usize > vsize) \n        mpn_mul_basecase(wp, PTR(u), usize, PTR(v), vsize);\n     else \n        mpn_mul_basecase(wp, PTR(v), vsize, PTR(u), usize);\n               \n     wsize -= (wp[wsize - 1] == 0);\n\t  SIZ(w) = (sign_product >= 0 ? wsize : -wsize);\n     return;\n  }\n\n  if (usize < vsize)\n    {\n      MPZ_SRCPTR_SWAP (u, v);\n      MP_SIZE_T_SWAP (usize, vsize);\n    }\n\n  TMP_MARK;\n  free_me = NULL;\n  up = u->_mp_d;\n  vp = v->_mp_d;\n  wp = w->_mp_d;\n\n  /* Ensure W has space enough to store the result.  */\n  if (w->_mp_alloc < wsize)\n    {\n      if (wp == up || wp == vp)\n\t{\n\t  free_me = wp;\n\t  free_me_size = w->_mp_alloc;\n\t}\n      else\n\t(*__gmp_free_func) (wp, w->_mp_alloc * BYTES_PER_MP_LIMB);\n\n      w->_mp_alloc = wsize;\n      wp = (mp_ptr) (*__gmp_allocate_func) (wsize * BYTES_PER_MP_LIMB);\n      w->_mp_d = wp;\n    }\n  else\n    {\n      /* Make U and V not overlap with W.  */\n      if (wp == up)\n\t{\n\t  /* W and U are identical.  Allocate temporary space for U.  */\n\t  up = (mp_ptr) TMP_ALLOC (usize * BYTES_PER_MP_LIMB);\n\t  /* Is V identical too?  Keep it identical with U.  */\n\t  if (wp == vp)\n\t    vp = up;\n\t  /* Copy to the temporary space.  */\n\t  MPN_COPY (up, wp, usize);\n\t}\n      else if (wp == vp)\n\t{\n\t  /* W and V are identical.  Allocate temporary space for V.  */\n\t  vp = (mp_ptr) TMP_ALLOC (vsize * BYTES_PER_MP_LIMB);\n\t  /* Copy to the temporary space.  */\n\t  MPN_COPY (vp, wp, vsize);\n\t}\n    }\n\n  if ((up == vp) && (usize == vsize))\n  {\n     mpn_sqr (wp, up, usize);\n     cy_limb = wp[2*usize-1];\n  } else\n  cy_limb = mpn_mul (wp, up, usize, vp, vsize);\n  wsize = usize + vsize;\n  wsize -= cy_limb == 0;\n\n  w->_mp_size = sign_product < 0 ? -wsize : wsize;\n  if (free_me != NULL)\n    (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/mul_2exp.c",
    "content": "/* mpz_mul_2exp -- Multiply a bignum by 2**CNT\n\nCopyright 1991, 1993, 1994, 1996, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_mul_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)\n{\n  mp_size_t usize = u->_mp_size;\n  mp_size_t abs_usize = ABS (usize);\n  mp_size_t wsize;\n  mp_size_t limb_cnt;\n  mp_ptr wp;\n  mp_limb_t wlimb;\n\n  if (usize == 0)\n    {\n      w->_mp_size = 0;\n      return;\n    }\n\n  limb_cnt = cnt / GMP_NUMB_BITS;\n  wsize = abs_usize + limb_cnt + 1;\n  if (w->_mp_alloc < wsize)\n    _mpz_realloc (w, wsize);\n\n  wp = w->_mp_d;\n  wsize = abs_usize + limb_cnt;\n\n  cnt %= GMP_NUMB_BITS;\n  if (cnt != 0)\n    {\n      wlimb = mpn_lshift (wp + limb_cnt, u->_mp_d, abs_usize, cnt);\n      if (wlimb != 0)\n\t{\n\t  wp[wsize] = wlimb;\n\t  wsize++;\n\t}\n    }\n  else\n    {\n      MPN_COPY_DECR (wp + limb_cnt, u->_mp_d, abs_usize);\n    }\n\n  /* Zero all whole limbs at low end.  Do it here and not before calling\n     mpn_lshift, not to lose for U == W.  */\n  MPN_ZERO (wp, limb_cnt);\n\n  w->_mp_size = usize >= 0 ? wsize : -wsize;\n}\n"
  },
  {
    "path": "mpz/mul_i.h",
    "content": "/* mpz_mul_ui/si (product, multiplier, small_multiplicand) -- Set PRODUCT to\n   MULTIPLICATOR times SMALL_MULTIPLICAND.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n#ifdef OPERATION_mul_si\n#define FUNCTION               mpz_mul_si\n#define MULTIPLICAND_UNSIGNED  mpir_si\n#define MULTIPLICAND_ABS(x)    ((mpir_ui) ABS(x))\n#define LT_ZERO(x)             (x < 0)\n#endif\n\n#ifdef OPERATION_mul_ui\n#define FUNCTION               mpz_mul_ui\n#define MULTIPLICAND_UNSIGNED  mpir_ui\n#define MULTIPLICAND_ABS(x)    x\n#define LT_ZERO(x)             0\n#endif\n\n#ifndef FUNCTION\nError, error, unrecognised OPERATION\n#endif\n\n\nvoid\nFUNCTION (mpz_ptr prod, mpz_srcptr mult,\n          MULTIPLICAND_UNSIGNED small_mult)\n{\n  mp_size_t size = SIZ(mult);\n  mp_size_t sign_product = size;\n  mp_limb_t sml;\n  mp_limb_t cy;\n  mp_ptr pp;\n\n  if (size == 0 || small_mult == 0)\n    {\n      SIZ(prod) = 0;\n      return;\n    }\n\n  size = ABS (size);\n\n  sml = MULTIPLICAND_ABS (small_mult);\n\n  if (small_mult <= GMP_NUMB_MAX)\n    {\n      MPZ_REALLOC (prod, size + 1);\n      pp = PTR(prod);\n      cy = mpn_mul_1 (pp, PTR(mult), size, sml & GMP_NUMB_MASK);\n      pp[size] = cy;\n      size += cy != 0;\n    }\n#if GMP_NAIL_BITS != 0\n  else\n    {\n      /* Operand too large for the current nails size.  Use temporary for\n\t intermediate products, to allow prod and mult being identical.  */\n      mp_ptr tp;\n      TMP_DECL;\n      TMP_MARK;\n\n      tp = TMP_ALLOC_LIMBS (size + 2);\n\n      cy = mpn_mul_1 (tp, PTR(mult), size, sml & GMP_NUMB_MASK);\n      tp[size] = cy;\n      cy = mpn_addmul_1 (tp + 1, PTR(mult), size, sml >> GMP_NUMB_BITS);\n      tp[size + 1] = cy;\n      size += 2;\n      MPN_NORMALIZE_NOT_ZERO (tp, size); /* too general, need to trim one or two limb */\n      MPZ_REALLOC (prod, size);\n      pp = PTR(prod);\n      MPN_COPY (pp, tp, size);\n      TMP_FREE;\n    }\n#endif\n\n  SIZ(prod) = ((sign_product < 0) ^ LT_ZERO(small_mult)) ? -size : size;\n}\n"
  },
  {
    "path": "mpz/mul_si.c",
    "content": "/* mpz_mul_si (product, multiplier, small_multiplicand) -- Set PRODUCT to\n   MULTIPLICATOR times SMALL_MULTIPLICAND.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define OPERATION_mul_si\n#include \"mul_i.h\"\n"
  },
  {
    "path": "mpz/mul_ui.c",
    "content": "/* mpz_mul_ui (product, multiplier, small_multiplicand) -- Set PRODUCT to\n   MULTIPLICATOR times SMALL_MULTIPLICAND.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define OPERATION_mul_ui\n#include \"mul_i.h\"\n"
  },
  {
    "path": "mpz/n_pow_ui.c",
    "content": "/* mpz_n_pow_ui -- mpn raised to ulong.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001, 2002, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* Change this to \"#define TRACE(x) x\" for some traces. */\n#define TRACE(x)\n\n\n/* Use this to test the mul_2 code on a CPU without a native version of that\n   routine.  */\n#if 0\n#define mpn_mul_2  refmpn_mul_2\n#define HAVE_NATIVE_mpn_mul_2  1\n#endif\n\n\n/* mpz_pow_ui and mpz_ui_pow_ui want to share almost all of this code.\n   ui_pow_ui doesn't need the mpn_mul based powering loop or the tests on\n   bsize==2 or >2, but separating that isn't easy because there's shared\n   code both before and after (the size calculations and the powers of 2\n   handling).\n\n   Alternatives:\n\n   It would work to just use the mpn_mul powering loop for 1 and 2 limb\n   bases, but the current separate loop allows mul_1 and mul_2 to be done\n   in-place, which might help cache locality a bit.  If mpn_mul was relaxed\n   to allow source==dest when vn==1 or 2 then some pointer twiddling might\n   let us get the same effect in one loop.\n\n   The initial powering for bsize==1 into blimb or blimb:blimb_low doesn't\n   form the biggest possible power of b that fits, only the biggest power of\n   2 power, ie. b^(2^n).  It'd be possible to choose a bigger power, perhaps\n   using __mp_bases[b].big_base for small b, and thereby get better value\n   from mpn_mul_1 or mpn_mul_2 in the bignum powering.  It's felt that doing\n   so would be more complicated than it's worth, and could well end up being\n   a slowdown for small e.  For big e on the other hand the algorithm is\n   dominated by mpn_sqr so there wouldn't much of a saving.  The current\n   code can be viewed as simply doing the first few steps of the powering in\n   a single or double limb where possible.\n\n   If r==b, and blow_twos==0, and r must be realloc'ed, then the temporary\n   copy made of b is unnecessary.  We could just use the old alloc'ed block\n   and free it at the end.  But arranging this seems like a lot more trouble\n   than it's worth.  */\n\n\n/* floor(sqrt(GMP_NUMB_MAX)), ie. the biggest value that can be squared in\n   a limb without overflowing.\n   FIXME: This formula is an underestimate when GMP_NUMB_BITS is odd. */\n\n#define GMP_NUMB_HALFMAX  (((mp_limb_t) 1 << GMP_NUMB_BITS/2) - 1)\n\n\n/* The following are for convenience, they update the size and check the\n   alloc.  */\n\n#define MPN_SQR(dst, alloc, src, size)        \\\n  do {                                          \\\n    ASSERT (2*(size) <= (alloc));               \\\n    mpn_sqr (dst, src, size);                 \\\n    (size) *= 2;                                \\\n    (size) -= ((dst)[(size)-1] == 0);           \\\n  } while (0)\n\n#define MPN_MUL(dst, alloc, src, size, src2, size2)     \\\n  do {                                                  \\\n    mp_limb_t  cy;                                      \\\n    ASSERT ((size) + (size2) <= (alloc));               \\\n    cy = mpn_mul (dst, src, size, src2, size2);         \\\n    (size) += (size2) - (cy == 0);                      \\\n  } while (0)\n\n#define MPN_MUL_2(ptr, size, alloc, mult)       \\\n  do {                                          \\\n    mp_limb_t  cy;                              \\\n    ASSERT ((size)+2 <= (alloc));               \\\n    cy = mpn_mul_2 (ptr, ptr, size, mult);      \\\n    (size)++;                                   \\\n    (ptr)[(size)] = cy;                         \\\n    (size) += (cy != 0);                        \\\n  } while (0)\n\n#define MPN_MUL_1(ptr, size, alloc, limb)       \\\n  do {                                          \\\n    mp_limb_t  cy;                              \\\n    ASSERT ((size)+1 <= (alloc));               \\\n    cy = mpn_mul_1 (ptr, ptr, size, limb);      \\\n    (ptr)[size] = cy;                           \\\n    (size) += (cy != 0);                        \\\n  } while (0)\n\n#define MPN_LSHIFT(ptr, size, alloc, shift)     \\\n  do {                                          \\\n    mp_limb_t  cy;                              \\\n    ASSERT ((size)+1 <= (alloc));               \\\n    cy = mpn_lshift (ptr, ptr, size, shift);    \\\n    (ptr)[size] = cy;                           \\\n    (size) += (cy != 0);                        \\\n  } while (0)\n\n#define MPN_RSHIFT_OR_COPY(dst, src, size, shift)       \\\n  do {                                                  \\\n    if ((shift) == 0)                                   \\\n      MPN_COPY (dst, src, size);                        \\\n    else                                                \\\n      {                                                 \\\n        mpn_rshift (dst, src, size, shift);             \\\n        (size) -= ((dst)[(size)-1] == 0);               \\\n      }                                                 \\\n  } while (0)\n\n\n/* ralloc and talloc are only wanted for ASSERTs, after the initial space\n   allocations.  Avoid writing values to them in a normal build, to ensure\n   the compiler lets them go dead.  gcc already figures this out itself\n   actually.  */\n\n#define SWAP_RP_TP                                      \\\n  do {                                                  \\\n    MP_PTR_SWAP (rp, tp);                               \\\n    ASSERT_CODE (MP_SIZE_T_SWAP (ralloc, talloc));      \\\n  } while (0)\n\n\nvoid\nmpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, mpir_ui e)\n{\n  mp_ptr         rp;\n  mp_size_t      rtwos_limbs, ralloc, rsize;\n  int            rneg, i, cnt, btwos, r_bp_overlap;\n  mp_limb_t      blimb, rl;\n  unsigned long  rtwos_bits;\n#if HAVE_NATIVE_mpn_mul_2\n  mp_limb_t      blimb_low, rl_high;\n#else\n  mp_limb_t      b_twolimbs[2];\n#endif\n  TMP_DECL;\n\n  TRACE (printf (\"mpz_n_pow_ui rp=0x%lX bp=0x%lX bsize=%ld e=%lu (0x%lX)\\n\",\n                 PTR(r), bp, bsize, e, e);\n         mpn_trace (\"b\", bp, bsize));\n\n  ASSERT (bsize == 0 || bp[ABS(bsize)-1] != 0);\n  ASSERT (MPN_SAME_OR_SEPARATE2_P (PTR(r), ABSIZ(r), bp, bsize));\n\n  /* b^0 == 1, including 0^0 == 1 */\n  if (e == 0)\n    {\n      PTR(r)[0] = 1;\n      SIZ(r) = 1;\n      return;\n    }\n\n  /* 0^e == 0 apart from 0^0 above */\n  if (bsize == 0)\n    {\n      SIZ(r) = 0;\n      return;\n    }\n\n  /* Sign of the final result. */\n  rneg = (bsize < 0 && (e & 1) != 0);\n  bsize = ABS (bsize);\n  TRACE (printf (\"rneg %d\\n\", rneg));\n\n  r_bp_overlap = (PTR(r) == bp);\n\n  /* Strip low zero limbs from b. */\n  rtwos_limbs = 0;\n  for (blimb = *bp; blimb == 0; blimb = *++bp)\n    {\n      rtwos_limbs += e;\n      bsize--; ASSERT (bsize >= 1);\n    }\n  TRACE (printf (\"trailing zero rtwos_limbs=%ld\\n\", rtwos_limbs));\n\n  /* Strip low zero bits from b. */\n  count_trailing_zeros (btwos, blimb);\n  blimb >>= btwos;\n  rtwos_bits = e * btwos;\n  rtwos_limbs += rtwos_bits / GMP_NUMB_BITS;\n  rtwos_bits %= GMP_NUMB_BITS;\n  TRACE (printf (\"trailing zero btwos=%d rtwos_limbs=%ld rtwos_bits=%lu\\n\",\n                 btwos, rtwos_limbs, rtwos_bits));\n\n  TMP_MARK;\n\n  rl = 1;\n#if HAVE_NATIVE_mpn_mul_2\n  rl_high = 0;\n#endif\n\n  if (bsize == 1)\n    {\n    bsize_1:\n      /* Power up as far as possible within blimb.  We start here with e!=0,\n         but if e is small then we might reach e==0 and the whole b^e in rl.\n         Notice this code works when blimb==1 too, reaching e==0.  */\n\n      while (blimb <= GMP_NUMB_HALFMAX)\n        {\n          TRACE (printf (\"small e=0x%lX blimb=0x%lX rl=0x%lX\\n\",\n                         e, blimb, rl));\n          ASSERT (e != 0);\n          if ((e & 1) != 0)\n            rl *= blimb;\n          e >>= 1;\n          if (e == 0)\n            goto got_rl;\n          blimb *= blimb;\n        }\n\n#if HAVE_NATIVE_mpn_mul_2\n      TRACE (printf (\"single power, e=0x%lX b=0x%lX rl=0x%lX\\n\",\n                     e, blimb, rl));\n\n      /* Can power b once more into blimb:blimb_low */\n      bsize = 2;\n      ASSERT (e != 0);\n      if ((e & 1) != 0)\n\t{\n\t  umul_ppmm (rl_high, rl, rl, blimb << GMP_NAIL_BITS);\n\t  rl >>= GMP_NAIL_BITS;\n\t}\n      e >>= 1;\n      umul_ppmm (blimb, blimb_low, blimb, blimb << GMP_NAIL_BITS);\n      blimb_low >>= GMP_NAIL_BITS;\n\n    got_rl:\n      TRACE (printf (\"double power e=0x%lX blimb=0x%lX:0x%lX rl=0x%lX:%lX\\n\",\n                     e, blimb, blimb_low, rl_high, rl));\n\n      /* Combine left-over rtwos_bits into rl_high:rl to be handled by the\n         final mul_1 or mul_2 rather than a separate lshift.\n         - rl_high:rl mustn't be 1 (since then there's no final mul)\n         - rl_high mustn't overflow\n         - rl_high mustn't change to non-zero, since mul_1+lshift is\n         probably faster than mul_2 (FIXME: is this true?)  */\n\n      if (rtwos_bits != 0\n          && ! (rl_high == 0 && rl == 1)\n          && (rl_high >> (GMP_NUMB_BITS-rtwos_bits)) == 0)\n        {\n          mp_limb_t  new_rl_high = (rl_high << rtwos_bits)\n            | (rl >> (GMP_NUMB_BITS-rtwos_bits));\n          if (! (rl_high == 0 && new_rl_high != 0))\n            {\n              rl_high = new_rl_high;\n              rl <<= rtwos_bits;\n              rtwos_bits = 0;\n              TRACE (printf (\"merged rtwos_bits, rl=0x%lX:%lX\\n\",\n                             rl_high, rl));\n            }\n        }\n#else\n    got_rl:\n      TRACE (printf (\"small power e=0x%lX blimb=0x%lX rl=0x%lX\\n\",\n                     e, blimb, rl));\n\n      /* Combine left-over rtwos_bits into rl to be handled by the final\n         mul_1 rather than a separate lshift.\n         - rl mustn't be 1 (since then there's no final mul)\n         - rl mustn't overflow  */\n\n      if (rtwos_bits != 0\n          && rl != 1\n          && (rl >> (GMP_NUMB_BITS-rtwos_bits)) == 0)\n        {\n          rl <<= rtwos_bits;\n          rtwos_bits = 0;\n          TRACE (printf (\"merged rtwos_bits, rl=0x%lX\\n\", rl));\n        }\n#endif\n    }\n  else if (bsize == 2)\n    {\n      mp_limb_t  bsecond = bp[1];\n      if (btwos != 0)\n        blimb |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;\n      bsecond >>= btwos;\n      if (bsecond == 0)\n        {\n          /* Two limbs became one after rshift. */\n          bsize = 1;\n          goto bsize_1;\n        }\n\n      TRACE (printf (\"bsize==2 using b=0x%lX:%lX\", bsecond, blimb));\n#if HAVE_NATIVE_mpn_mul_2\n      blimb_low = blimb;\n#else\n      bp = b_twolimbs;\n      b_twolimbs[0] = blimb;\n      b_twolimbs[1] = bsecond;\n#endif\n      blimb = bsecond;\n    }\n  else\n    {\n      if (r_bp_overlap || btwos != 0)\n        {\n          mp_ptr tp = TMP_ALLOC_LIMBS (bsize);\n          MPN_RSHIFT_OR_COPY (tp, bp, bsize, btwos);\n          bp = tp;\n          TRACE (printf (\"rshift or copy bp,bsize, new bsize=%ld\\n\", bsize));\n        }\n#if HAVE_NATIVE_mpn_mul_2\n      /* in case 3 limbs rshift to 2 and hence use the mul_2 loop below */\n      blimb_low = bp[0];\n#endif\n      blimb = bp[bsize-1];\n\n      TRACE (printf (\"big bsize=%ld  \", bsize);\n             mpn_trace (\"b\", bp, bsize));\n    }\n\n  /* At this point blimb is the most significant limb of the base to use.\n\n     Each factor of b takes (bsize*BPML-cnt) bits and there's e of them; +1\n     limb to round up the division; +1 for multiplies all using an extra\n     limb over the true size; +2 for rl at the end; +1 for lshift at the\n     end.\n\n     The size calculation here is reasonably accurate.  The base is at least\n     half a limb, so in 32 bits the worst case is 2^16+1 treated as 17 bits\n     when it will power up as just over 16, an overestimate of 17/16 =\n     6.25%.  For a 64-bit limb it's half that.\n\n     If e==0 then blimb won't be anything useful (though it will be\n     non-zero), but that doesn't matter since we just end up with ralloc==5,\n     and that's fine for 2 limbs of rl and 1 of lshift.  */\n\n  ASSERT (blimb != 0);\n  count_leading_zeros (cnt, blimb);\n  ralloc = (bsize*GMP_NUMB_BITS - cnt + GMP_NAIL_BITS) * e / GMP_NUMB_BITS + 5;\n  TRACE (printf (\"ralloc %ld, from bsize=%ld blimb=0x%lX cnt=%d\\n\",\n                 ralloc, bsize, blimb, cnt));\n  MPZ_REALLOC (r, ralloc + rtwos_limbs);\n  rp = PTR(r);\n\n  /* Low zero limbs resulting from powers of 2. */\n  MPN_ZERO (rp, rtwos_limbs);\n  rp += rtwos_limbs;\n\n  if (e == 0)\n    {\n      /* Any e==0 other than via bsize==1 or bsize==2 is covered at the\n         start. */\n      rp[0] = rl;\n      rsize = 1;\n#if HAVE_NATIVE_mpn_mul_2\n      rp[1] = rl_high;\n      rsize += (rl_high != 0);\n#endif\n      ASSERT (rp[rsize-1] != 0);\n    }\n  else\n    {\n      mp_ptr     tp;\n      mp_size_t  talloc;\n\n      /* In the mpn_mul_1 or mpn_mul_2 loops or in the mpn_mul loop when the\n         low bit of e is zero, tp only has to hold the second last power\n         step, which is half the size of the final result.  There's no need\n         to round up the divide by 2, since ralloc includes a +2 for rl\n         which not needed by tp.  In the mpn_mul loop when the low bit of e\n         is 1, tp must hold nearly the full result, so just size it the same\n         as rp.  */\n\n      talloc = ralloc;\n#if HAVE_NATIVE_mpn_mul_2\n      if (bsize <= 2 || (e & 1) == 0)\n        talloc /= 2;\n#else\n      if (bsize <= 1 || (e & 1) == 0)\n        talloc /= 2;\n#endif\n      TRACE (printf (\"talloc %ld\\n\", talloc));\n      tp = TMP_ALLOC_LIMBS (talloc);\n\n      /* Go from high to low over the bits of e, starting with i pointing at\n         the bit below the highest 1 (which will mean i==-1 if e==1).  */\n      count_leading_zeros (cnt, e);\n      i = GMP_LIMB_BITS - cnt - 2;\n\n#if HAVE_NATIVE_mpn_mul_2\n      if (bsize <= 2)\n        {\n          mp_limb_t  mult[2];\n\n          /* Any bsize==1 will have been powered above to be two limbs. */\n          ASSERT (bsize == 2);\n          ASSERT (blimb != 0);\n\n          /* Arrange the final result ends up in r, not in the temp space */\n          if ((i & 1) == 0)\n            SWAP_RP_TP;\n\n          rp[0] = blimb_low;\n          rp[1] = blimb;\n          rsize = 2;\n\n          mult[0] = blimb_low;\n          mult[1] = blimb;\n\n          for ( ; i >= 0; i--)\n            {\n              TRACE (printf (\"mul_2 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\\n\",\n                             i, e, rsize, ralloc, talloc);\n                     mpn_trace (\"r\", rp, rsize));\n\n              MPN_SQR (tp, talloc, rp, rsize);\n              SWAP_RP_TP;\n              if ((e & (1L << i)) != 0)\n                MPN_MUL_2 (rp, rsize, ralloc, mult);\n            }\n\n          TRACE (mpn_trace (\"mul_2 before rl, r\", rp, rsize));\n          if (rl_high != 0)\n            {\n              mult[0] = rl;\n              mult[1] = rl_high;\n              MPN_MUL_2 (rp, rsize, ralloc, mult);\n            }\n          else if (rl != 1)\n            MPN_MUL_1 (rp, rsize, ralloc, rl);\n        }\n#else\n      if (bsize == 1)\n        {\n          /* Arrange the final result ends up in r, not in the temp space */\n          if ((i & 1) == 0)\n            SWAP_RP_TP;\n\n          rp[0] = blimb;\n          rsize = 1;\n\n          for ( ; i >= 0; i--)\n            {\n              TRACE (printf (\"mul_1 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\\n\",\n                             i, e, rsize, ralloc, talloc);\n                     mpn_trace (\"r\", rp, rsize));\n\n              MPN_SQR (tp, talloc, rp, rsize);\n              SWAP_RP_TP;\n              if ((e & (1L << i)) != 0)\n                MPN_MUL_1 (rp, rsize, ralloc, blimb);\n            }\n\n          TRACE (mpn_trace (\"mul_1 before rl, r\", rp, rsize));\n          if (rl != 1)\n            MPN_MUL_1 (rp, rsize, ralloc, rl);\n        }\n#endif\n      else\n        {\n          int  parity;\n\n          /* Arrange the final result ends up in r, not in the temp space */\n          ULONG_PARITY (parity, e);\n          if (((parity ^ i) & 1) != 0)\n            SWAP_RP_TP;\n\n          MPN_COPY (rp, bp, bsize);\n          rsize = bsize;\n\n          for ( ; i >= 0; i--)\n            {\n              TRACE (printf (\"mul loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\\n\",\n                             i, e, rsize, ralloc, talloc);\n                     mpn_trace (\"r\", rp, rsize));\n\n              MPN_SQR (tp, talloc, rp, rsize);\n              SWAP_RP_TP;\n              if ((e & (1L << i)) != 0)\n                {\n                  MPN_MUL (tp, talloc, rp, rsize, bp, bsize);\n                  SWAP_RP_TP;\n                }\n            }\n        }\n    }\n\n  ASSERT (rp == PTR(r) + rtwos_limbs);\n  TRACE (mpn_trace (\"end loop r\", rp, rsize));\n  TMP_FREE;\n\n  /* Apply any partial limb factors of 2. */\n  if (rtwos_bits != 0)\n    {\n      MPN_LSHIFT (rp, rsize, ralloc, (unsigned) rtwos_bits);\n      TRACE (mpn_trace (\"lshift r\", rp, rsize));\n    }\n\n  rsize += rtwos_limbs;\n  SIZ(r) = (rneg ? -rsize : rsize);\n}\n"
  },
  {
    "path": "mpz/neg.c",
    "content": "/* mpz_neg(mpz_ptr dst, mpz_ptr src) -- Assign the negated value of SRC to DST.\n\nCopyright 1991, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpz_neg 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_neg (mpz_ptr w, mpz_srcptr u)\n{\n  mp_ptr wp, up;\n  mp_size_t usize, size;\n\n  usize = u->_mp_size;\n\n  if (u != w)\n    {\n      size = ABS (usize);\n\n      if (w->_mp_alloc < size)\n\t_mpz_realloc (w, size);\n\n      wp = w->_mp_d;\n      up = u->_mp_d;\n\n      MPN_COPY (wp, up, size);\n    }\n\n  w->_mp_size = -usize;\n}\n"
  },
  {
    "path": "mpz/next_prime_candidate.c",
    "content": "/* mpz_next_prime_candidate(p,t,rnd) - compute the next likely prime > t and store that in p.\n\nCopyright 1999, 2000, 2001 Free Software Foundation, Inc.\nCopyright 2009 Jason Moxham, Brian Gladman\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if 0\n\nvoid \nmpz_next_prime_candidate (mpz_ptr p, mpz_srcptr t, gmp_randstate_t rnd)\n{\n  mpz_add_ui (p, t, 1L);\n  while (! mpz_likely_prime_p (p, rnd,0))\n    mpz_add_ui (p, p, 1L);\n}\n\n#else\n\n/* This code is not yet tested.  Will be enabled some time. */\n\nstatic unsigned short primes[] =\n{\n3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,\n101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,\n191,193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,\n281,283,293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,\n389,397,401,409,419,421,431,433,439,443,449,457,461,463,467,479,487,\n491,499,503,509,521,523,541,547,557,563,569,571,577,587,593,599,601,\n607,613,617,619,631,641,643,647,653,659,661,673,677,683,691,701,709,\n719,727,733,739,743,751,757,761,769,773,787,797,809,811,821,823,827,\n829,839,853,857,859,863,877,881,883,887,907,911,919,929,937,941,947,\n953,967,971,977,983,991,997\n};\n\n#define NUMBER_OF_PRIMES (sizeof(primes) / sizeof(primes[0]))\n\nvoid\nmpz_next_prime_candidate (mpz_ptr p, mpz_srcptr n, gmp_randstate_t rnd)\n{\n  unsigned short *moduli;\n  unsigned long difference;\n  int i, prime_limit;\n  int composite;\n  TMP_DECL;\n\n  /* First handle tiny numbers */\n  if (mpz_cmp_ui (n, 2) < 0)\n  {\n      mpz_set_ui (p, 2);\n      return;\n  }\n  mpz_add_ui (p, n, 1);\n  mpz_setbit (p, 0);\n\n  if (mpz_cmp_ui (p, 7) <= 0)\n    return;\n\n  prime_limit = NUMBER_OF_PRIMES - 1;\n  if (mpz_cmp_ui (p, primes[prime_limit]) <= 0)\n  {\n      int lo = 0, hi = NUMBER_OF_PRIMES - 1, mid;\n\n      i = mpz_get_ui(p);  \n      while(lo <= hi)\n      {\n          mid = lo + (hi - lo) / 2;\n          if (i > primes[mid])\n             lo = mid + 1;\n          else if (i < primes[mid])\n             hi = mid - 1;\n          else\n          {\n             lo = mid;\n             break;\n          }\n      }\n      mpz_set_ui(p, primes[lo]);\n      return;\n  }\n\n  TMP_MARK;\n  /* Compute residues modulo small odd primes */\n  moduli = (unsigned short *) TMP_ALLOC (prime_limit * sizeof moduli[0]);\n  for (i = 0; i < prime_limit; i++)\n    moduli[i] = mpz_fdiv_ui (p, primes[i]);\n  for (difference = 0; ; difference += 2)\n  {\n    composite = 0;\n\n    /* First check residues */\n    for (i = 0; i < prime_limit; i++)\n\t{\n\t  int acc, pr;\n\t  composite |= (moduli[i] == 0);\n\t  acc = moduli[i] + 2;\n\t  pr = primes[i];\n\t  moduli[i] = acc >= pr ? acc - pr : acc;\n\t}\n    if (composite)\n\t  continue;\n\n    mpz_add_ui (p, p, difference);\n    difference = 0;\n\n    /* Miller-Rabin test */\n    if (mpz_miller_rabin (p, 2, rnd))\n\t  break;\n  }\n  TMP_FREE;\n}\n\n#endif\n"
  },
  {
    "path": "mpz/nextprime.c",
    "content": "/* mpz_nextprime(x,y) - compute the next probable prime > y and store that in x\n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* This function is Obsolete  17/8/2009 */\n\n/* \n   But people use it anyway! \n   \n   FIXME: This function should prove the primality of x using \n   ECPP or APR-CL.\n*/\nvoid mpz_nextprime(mpz_ptr x, mpz_srcptr y)\n{\n  gmp_randstate_t rnd;\n  \n  gmp_randinit_default(rnd);\n  mpz_next_prime_candidate(x, y, rnd);\n  \n  if (mpz_cmp_ui(x, 1000000L) >= 0) /* nextprime_candidate sieves primes up to 1000 */\n  {\n     while (!mpz_miller_rabin (x, 23, rnd)) /* we've done 2 rounds already, do another 23 */\n     {\n        mpz_add_ui(x, x, 2);\n        mpz_next_prime_candidate(x, x, rnd);\n     }\n  }\n\n  gmp_randclear(rnd);\n}\n"
  },
  {
    "path": "mpz/nthroot.c",
    "content": "/* mpz_nthroot(root, u, nth) --  Set ROOT to floor(U^(1/nth)).\n\nCopyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_nthroot (mpz_ptr root, mpz_srcptr u, mpir_ui nth)\n{\n  mp_ptr rootp, up, remp;\n  mp_size_t us, un, rootn;\n  mpz_t tmp_u;\n\n  TMP_DECL;\n\n  up = PTR(u);\n  us = SIZ(u);\n  \n  /* even roots of negatives provoke an exception */\n  if (us < 0 && (nth & 1) == 0)\n    SQRT_OF_NEGATIVE;\n  \n  /* root extraction interpreted as c^(1/nth) means a zeroth root should\n     provoke a divide by zero, do this even if c==0 */\n  if (nth == 0)\n    DIVIDE_BY_ZERO;\n  \n  if (us == 0)\n    {\n      if (root != 0)\n\tSIZ(root) = 0;\n      return;\n    }\n \n  un = ABS (us);\n  rootn = (un - 1) / nth + 1;\n  \n  TMP_MARK;\n\n  if (root == u)\n  {\n     MPZ_TMP_INIT(tmp_u, ABS(u->_mp_size));\n     mpz_set(tmp_u, u);\n     u = tmp_u;\n  }\n\n  rootp = MPZ_REALLOC (root, rootn);\n  up = PTR(u);\n\n  if (nth == 1)\n    {\n      MPN_COPY (rootp, up, un);\n    }\n  else\n    {\n      mpn_rootrem (rootp, 0, up, un, (mp_limb_t) nth);\n    }\n\n  SIZ(root) = us >= 0 ? rootn : -rootn;\n\n  TMP_FREE;\n\n  return;\n}\n"
  },
  {
    "path": "mpz/oddfac_1.c",
    "content": "/* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.\n\nContributed to the GNU project by Marco Bodrato.\n\nTHE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.\nIT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.\nIN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR\nDISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2010, 2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#if GMP_LIMB_BITS == 64\n\n#define ODD_FACTORIAL_TABLE_MAX CNST_LIMB(0x335281867ec241ef)\n#define ODD_FACTORIAL_TABLE_LIMIT (25)\n\n#define ODD_FACTORIAL_EXTTABLE_LIMIT (67)\n\n#define ODD_DOUBLEFACTORIAL_TABLE_MAX CNST_LIMB(0x57e22099c030d941)\n#define ODD_DOUBLEFACTORIAL_TABLE_LIMIT (33)\n\n#else\n\n#define ODD_FACTORIAL_TABLE_MAX CNST_LIMB(0x260eeeeb)\n#define ODD_FACTORIAL_TABLE_LIMIT (16)\n\n#define ODD_FACTORIAL_EXTTABLE_LIMIT (34)\n\n#define ODD_DOUBLEFACTORIAL_TABLE_MAX CNST_LIMB(0x27065f73)\n#define ODD_DOUBLEFACTORIAL_TABLE_LIMIT (19)\n\n#endif\n\n/* TODO:\n   - split this file in smaller parts with functions that can be recycled for different computations.\n */\n\n/**************************************************************/\n/* Section macros: common macros, for mswing/fac/bin (&sieve) */\n/**************************************************************/\n\n#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)\t\t\t\\\n  if ((PR) > (MAX_PR)) {\t\t\t\t\t\\\n    (VEC)[(I)++] = (PR);\t\t\t\t\t\\\n    (PR) = 1;\t\t\t\t\t\t\t\\\n  }\n\n#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)\t\t\\\n  do {\t\t\t\t\t\t\t\t\\\n    if ((PR) > (MAX_PR)) {\t\t\t\t\t\\\n      (VEC)[(I)++] = (PR);\t\t\t\t\t\\\n      (PR) = (P);\t\t\t\t\t\t\\\n    } else\t\t\t\t\t\t\t\\\n      (PR) *= (P);\t\t\t\t\t\t\\\n  } while (0)\n\n#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)\t\t\t\\\n    __max_i = (end);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\\\n    do {\t\t\t\t\t\t\t\\\n      ++__i;\t\t\t\t\t\t\t\\\n      if (((sieve)[__index] & __mask) == 0)\t\t\t\\\n\t{\t\t\t\t\t\t\t\\\n\t  (prime) = id_to_n(__i)\n\n#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)\t\t\\\n  do {\t\t\t\t\t\t\t\t\\\n    mp_limb_t __mask, __index, __max_i, __i;\t\t\t\\\n\t\t\t\t\t\t\t\t\\\n    __i = (start)-(off);\t\t\t\t\t\\\n    __index = __i / GMP_LIMB_BITS;\t\t\t\t\\\n    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);\t\t\\\n    __i += (off);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\\\n    LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)\n\n#define LOOP_ON_SIEVE_STOP\t\t\t\t\t\\\n\t}\t\t\t\t\t\t\t\\\n      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);\t\\\n      __index += __mask & 1;\t\t\t\t\t\\\n    }  while (__i <= __max_i)\t\t\t\t\t\\\n\n#define LOOP_ON_SIEVE_END\t\t\t\t\t\\\n    LOOP_ON_SIEVE_STOP;\t\t\t\t\t\t\\\n  } while (0)\n\n/*********************************************************/\n/* Section sieve: sieving functions and tools for primes */\n/*********************************************************/\n\n#if WANT_ASSERT\nstatic mp_limb_t\nbit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }\n#endif\n\n/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/\nstatic mp_limb_t\nid_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }\n\n/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */\nstatic mp_limb_t\nn_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }\n\n#if WANT_ASSERT\nstatic mp_size_t\nprimesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }\n#endif\n\n/*********************************************************/\n/* Section mswing: 2-multiswing factorial                 */\n/*********************************************************/\n\n/* Returns an approximation of the sqare root of x.  *\n * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4    */\nstatic mp_limb_t\nlimb_apprsqrt (mp_limb_t x)\n{\n  int s;\n\n  ASSERT (x > 2);\n  count_leading_zeros (s, x - 1);\n  s = GMP_LIMB_BITS - 1 - s;\n  return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));\n}\n\n#if 0\n/* A count-then-exponentiate variant for SWING_A_PRIME */\n#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)\t\t\\\n  do {\t\t\t\t\t\t\t\\\n    mp_limb_t __q, __prime;\t\t\t\t\\\n    int __exp;\t\t\t\t\t\t\\\n    __prime = (P);\t\t\t\t\t\\\n    __exp = 0;\t\t\t\t\t\t\\\n    __q = (N);\t\t\t\t\t\t\\\n    do {\t\t\t\t\t\t\\\n      __q /= __prime;\t\t\t\t\t\\\n      __exp += __q & 1;\t\t\t\t\t\\\n    } while (__q >= __prime);\t\t\t\t\\\n    if (__exp) { /* Store $prime^{exp}$ */\t\t\\\n      for (__q = __prime; --__exp; __q *= __prime);\t\\\n      FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I);\t\\\n    };\t\t\t\t\t\t\t\\\n  } while (0)\n#else\n#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)\t\\\n  do {\t\t\t\t\t\t\\\n    mp_limb_t __q, __prime;\t\t\t\\\n    __prime = (P);\t\t\t\t\\\n    FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);\t\\\n    __q = (N);\t\t\t\t\t\\\n    do {\t\t\t\t\t\\\n      __q /= __prime;\t\t\t\t\\\n      if ((__q & 1) != 0) (PR) *= __prime;\t\\\n    } while (__q >= __prime);\t\t\t\\\n  } while (0)\n#endif\n\n#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)\t\\\n  do {\t\t\t\t\t\t\t\\\n    mp_limb_t __prime;\t\t\t\t\t\\\n    __prime = (P);\t\t\t\t\t\\\n    if ((((N) / __prime) & 1) != 0)\t\t\t\\\n      FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I);\t\\\n  } while (0)\n\n/* mpz_2multiswing_1 computes the odd part of the 2-multiswing\n   factorial of the parameter n.  The result x is an odd positive\n   integer so that multiswing(n,2) = x 2^a.\n\n   Uses the algorithm described by Peter Luschny in \"Divide, Swing and\n   Conquer the Factorial!\".\n\n   The pointer sieve points to primesieve_size(n) limbs containing a\n   bit-array where primes are marked as 0.\n   Enough (FIXME: explain :-) limbs must be pointed by factors.\n */\n\nstatic void\nmpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)\n{\n  mp_limb_t prod, max_prod;\n  mp_size_t j;\n\n  ASSERT (n >= 26);\n\n  j = 0;\n  prod  = -(n & 1);\n  n &= ~ CNST_LIMB(1); /* n-1, if n is odd */\n\n  prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */\n  max_prod = GMP_NUMB_MAX / (n-1);\n\n  /* Handle prime = 3 separately. */\n  SWING_A_PRIME (3, n, prod, max_prod, factors, j);\n\n  /* Swing primes from 5 to n/3 */\n  {\n    mp_limb_t s;\n\n    {\n      mp_limb_t prime;\n\n      s = limb_apprsqrt(n);\n      ASSERT (s >= 5);\n      s = n_to_bit (s);\n      LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);\n      SWING_A_PRIME (prime, n, prod, max_prod, factors, j);\n      LOOP_ON_SIEVE_END;\n      s++;\n    }\n\n    ASSERT (max_prod <= GMP_NUMB_MAX / 3);\n    ASSERT (bit_to_n (s) * bit_to_n (s) > n);\n    ASSERT (s <= n_to_bit (n / 3));\n    {\n      mp_limb_t prime;\n      mp_limb_t l_max_prod = max_prod * 3;\n\n      LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);\n      SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);\n      LOOP_ON_SIEVE_END;\n    }\n  }\n\n  /* Store primes from (n+1)/2 to n */\n  {\n    mp_limb_t prime;\n    LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);\n    FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);\n    LOOP_ON_SIEVE_END;\n  }\n\n  if (LIKELY (j != 0))\n    {\n      factors[j++] = prod;\n      mpz_prodlimbs (x, factors, j);\n    }\n  else\n    {\n      PTR (x)[0] = prod;\n      SIZ (x) = 1;\n    }\n}\n\n#undef SWING_A_PRIME\n#undef SH_SWING_A_PRIME\n#undef LOOP_ON_SIEVE_END\n#undef LOOP_ON_SIEVE_STOP\n#undef LOOP_ON_SIEVE_BEGIN\n#undef LOOP_ON_SIEVE_CONTINUE\n#undef FACTOR_LIST_APPEND\n\n/*********************************************************/\n/* Section oddfac: odd factorial, needed also by binomial*/\n/*********************************************************/\n\n#if TUNE_PROGRAM_BUILD\n#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))\n#else\n#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))\n#endif\n\n/* mpz_oddfac_1 computes the odd part of the factorial of the\n   parameter n.  I.e. n! = x 2^a, where x is the returned value: an\n   odd positive integer.\n\n   If flag != 0 a square is skipped in the DSC part, e.g.\n   if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.\n\n   If n is too small, flag is ignored, and an ASSERT can be triggered.\n\n   TODO: FAC_DSC_THRESHOLD is used here with two different roles:\n    - to decide when prime factorisation is needed,\n    - to stop the recursion, once sieving is done.\n   Maybe two thresholds can do a better job.\n */\nvoid\nmpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)\n{\n  ASSERT (n <= GMP_NUMB_MAX);\n  ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));\n\n  if (n <= ODD_FACTORIAL_TABLE_LIMIT)\n    {\n      PTR (x)[0] = __gmp_oddfac_table[n];\n      SIZ (x) = 1;\n    }\n  else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)\n    {\n      mp_ptr   px;\n\n      px = MPZ_NEWALLOC (x, 2);\n      umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);\n      SIZ (x) = 2;\n    }\n  else\n    {\n      unsigned s;\n      mp_ptr   factors;\n\n      s = 0;\n      {\n\tmp_limb_t tn;\n\tmp_limb_t prod, max_prod, i;\n\tmp_size_t j;\n\tTMP_SDECL;\n\n#if TUNE_PROGRAM_BUILD\n\tASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);\n\tASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));\n#endif\n\n\t/* Compute the number of recursive steps for the DSC algorithm. */\n\tfor (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)\n\t  tn >>= 1;\n\n\tj = 0;\n\n\tTMP_SMARK;\n\tfactors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);\n\tASSERT (tn >= FACTORS_PER_LIMB);\n\n\tprod = 1;\n#if TUNE_PROGRAM_BUILD\n\tmax_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;\n#else\n\tmax_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;\n#endif\n\n\tASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);\n\tdo {\n\t  i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;\n\t  factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;\n\t  do {\n\t    FACTOR_LIST_STORE (i, prod, max_prod, factors, j);\n\t    i += 2;\n\t  } while (i <= tn);\n\t  max_prod <<= 1;\n\t  tn >>= 1;\n\t} while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);\n\n\tfactors[j++] = prod;\n\tfactors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];\n\tfactors[j++] = __gmp_oddfac_table[tn >> 1];\n\tmpz_prodlimbs (x, factors, j);\n\n\tTMP_SFREE;\n      }\n\n      if (s != 0)\n\t/* Use the algorithm described by Peter Luschny in \"Divide,\n\t   Swing and Conquer the Factorial!\".\n\n\t   Improvement: there are two temporary buffers, factors and\n\t   square, that are never used together; with a good estimate\n\t   of the maximal needed size, they could share a single\n\t   allocation.\n\t*/\n\t{\n\t  mpz_t mswing;\n\t  mp_ptr sieve;\n\t  mp_size_t size;\n\t  TMP_DECL;\n\n\t  TMP_MARK;\n\n\t  flag--;\n\t  size = n / GMP_NUMB_BITS + 4;\n\t  ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));\n\t  /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);\n\t     one more can be overwritten by mul, another for the sieve */\n\t  MPZ_TMP_INIT (mswing, size);\n\t  /* Initialize size, so that ASSERT can check it correctly. */\n\t  ASSERT_CODE (SIZ (mswing) = 0);\n\n\t  /* Put the sieve on the second half, it will be overwritten by the last mswing. */\n\t  sieve = PTR (mswing) + size / 2 + 1;\n\n\t  size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;\n\n\t  factors = TMP_ALLOC_LIMBS (size);\n\t  do {\n\t    mp_ptr    square, px;\n\t    mp_size_t nx, ns;\n\t    mp_limb_t cy;\n\t    TMP_DECL;\n\n\t    s--;\n\t    ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */\n\t    mpz_2multiswing_1 (mswing, n >> s, sieve, factors);\n\n\t    TMP_MARK;\n\t    nx = SIZ (x);\n\t    if (s == flag) {\n\t      size = nx;\n\t      square = TMP_ALLOC_LIMBS (size);\n\t      MPN_COPY (square, PTR (x), nx);\n\t    } else {\n\t      size = nx << 1;\n\t      square = TMP_ALLOC_LIMBS (size);\n\t      mpn_sqr (square, PTR (x), nx);\n\t      size -= (square[size - 1] == 0);\n\t    }\n\t    ns = SIZ (mswing);\n\t    nx = size + ns;\n\t    px = MPZ_NEWALLOC (x, nx);\n\t    ASSERT (ns <= size);\n\t    cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */\n\n\t    TMP_FREE;\n\t    SIZ(x) = nx - (cy == 0);\n\t  } while (s != 0);\n\t  TMP_FREE;\n\t}\n    }\n}\n\n#undef FACTORS_PER_LIMB\n#undef FACTOR_LIST_STORE\n"
  },
  {
    "path": "mpz/out_raw.c",
    "content": "/* mpz_out_raw -- write an mpz_t in raw format.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* HTON_LIMB_STORE takes a normal host byte order limb and stores it as\n   network byte order (ie. big endian). */\n\n#if HAVE_LIMB_BIG_ENDIAN\n#define HTON_LIMB_STORE(dst, limb)  do { *(dst) = (limb); } while (0)\n#endif\n\n#if HAVE_LIMB_LITTLE_ENDIAN\n#define HTON_LIMB_STORE(dst, limb)  BSWAP_LIMB_STORE (dst, limb)\n#endif\n\n#ifndef HTON_LIMB_STORE\n#define HTON_LIMB_STORE(dst, limb)                                      \\\n  do {                                                                  \\\n    mp_limb_t  __limb = (limb);                                         \\\n    char      *__p = (char *) (dst);                                    \\\n    int        __i;                                                     \\\n    for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++)                       \\\n      __p[__i] = (char) (__limb >> ((BYTES_PER_MP_LIMB-1 - __i) * 8));  \\\n  } while (0)\n#endif\n\n/* In order to allow mpz_out_raw() to be called from MPIR.Net, its implementation has been refactored into two separate functions.\n   Both the contract and implementation of mpz_out_raw() were unchanged, the split was made in order for MPIR.Net to access intermediate variables.\n   The basic flow of mpz_out_raw is to 1) allocate scratch memory, 2) write output there, 3) write the output to a file, and 4) free memory.\n   Of these, step 2 represents the bulk of the entire operation.\n   The new mpz_out_raw_m() function below performs steps 1 and 2.  At that point, local state is saved to the mpir_out struct.\n   mpz_out_raw() now calls into mpz_out_raw_m() and completes the remaining steps (3 and 4) using the saved local state.\n   For MPIR.Net, file I/O is done differently.  MPIR.Net calls into mpz_out_raw_m(), then performs its own step 3 and duplicates step 4. */\nvoid mpz_out_raw_m (mpir_out_ptr mpir_out, mpz_srcptr x)\n{\n  mp_size_t   xsize, abs_xsize, bytes, i;\n  mp_srcptr   xp;\n  char        *tp, *bp;\n  mp_limb_t   xlimb;\n  int         zeros;\n  size_t      tsize, ssize;\n\n  xsize = SIZ(x);\n  abs_xsize = ABS (xsize);\n  bytes = (abs_xsize * GMP_NUMB_BITS + 7) / 8;\n  tsize = ROUND_UP_MULTIPLE ((unsigned) 4, BYTES_PER_MP_LIMB) + bytes;\n\n  tp = __GMP_ALLOCATE_FUNC_TYPE (tsize, char);\n  bp = tp + ROUND_UP_MULTIPLE ((unsigned) 4, BYTES_PER_MP_LIMB);\n\n  if (bytes != 0)\n    {\n      bp += bytes;\n      xp = PTR (x);\n      i = abs_xsize;\n\n      if (GMP_NAIL_BITS == 0)\n        {\n          /* reverse limb order, and byte swap if necessary */\n          do\n            {\n              bp -= BYTES_PER_MP_LIMB;\n              xlimb = *xp;\n              HTON_LIMB_STORE ((mp_ptr) bp, xlimb);\n              xp++;\n            }\n          while (--i > 0);\n\n          /* strip high zero bytes (without fetching from bp) */\n          count_leading_zeros (zeros, xlimb);\n          zeros /= 8;\n          bp += zeros;\n          bytes -= zeros;\n        }\n      else\n        {\n          mp_limb_t  new_xlimb;\n          int        bits;\n          ASSERT_CODE (char *bp_orig = bp - bytes);\n\n          ASSERT_ALWAYS (GMP_NUMB_BITS >= 8);\n\n          bits = 0;\n          xlimb = 0;\n          for (;;)\n            {\n              while (bits >= 8)\n                {\n                  ASSERT (bp > bp_orig);\n                  *--bp = xlimb & 0xFF;\n                  xlimb >>= 8;\n                  bits -= 8;\n                }\n\n              if (i == 0)\n                break;\n\n              new_xlimb = *xp++;\n              i--;\n              ASSERT (bp > bp_orig);\n              *--bp = (xlimb | (new_xlimb << bits)) & 0xFF;\n              xlimb = new_xlimb >> (8 - bits);\n              bits += GMP_NUMB_BITS - 8;\n            }\n\n          if (bits != 0)\n            {\n              ASSERT (bp > bp_orig);\n              *--bp = xlimb;\n            }\n\n          ASSERT (bp == bp_orig);\n          while (*bp == 0)\n            {\n              bp++;\n              bytes--;\n            }\n        }\n    }\n\n  /* total bytes to be written */\n  ssize = 4 + bytes;\n\n  /* twos complement negative for the size value */\n  bytes = (xsize >= 0 ? bytes : -bytes);\n\n  /* so we don't rely on sign extension in \">>\" */\n  ASSERT_ALWAYS (sizeof (bytes) >= 4);\n\n  bp[-4] = bytes >> 24;\n  bp[-3] = bytes >> 16;\n  bp[-2] = bytes >> 8;\n  bp[-1] = bytes;\n  bp -= 4;\n\n  mpir_out->allocated = tp;\n  mpir_out->allocatedSize = tsize;\n  mpir_out->written = bp;\n  mpir_out->writtenSize = ssize;\n}\n\nsize_t\nmpz_out_raw (FILE* fp, mpz_srcptr x)\n{\n  mpir_out_struct out;\n\n  if (fp == 0)\n    fp = stdout;\n\n  //For re-use in MPIR.Net, the bulk of the work (output into a memory location) has been refactored into a separate function mpz_out_raw_m().\n  mpz_out_raw_m(out, x);\n\n  if (fwrite (out->written, out->writtenSize, 1, fp) != 1)\n    out->writtenSize = 0;\n\n  (*__gmp_free_func) (out->allocated, out->allocatedSize);\n  return out->writtenSize;\n}\n"
  },
  {
    "path": "mpz/out_str.c",
    "content": "/* mpz_out_str(stream, base, integer) -- Output to STREAM the multi prec.\n   integer INTEGER in base BASE.\n\nCopyright 1991, 1993, 1994, 1996, 2001, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nsize_t\nmpz_out_str (FILE *stream, int base, mpz_srcptr x)\n{\n  mp_ptr xp;\n  mp_size_t x_size = x->_mp_size;\n  unsigned char *str;\n  size_t str_size;\n  size_t i;\n  size_t written;\n  char *num_to_text;\n  TMP_DECL;\n\n  if (stream == 0)\n    stream = stdout;\n\n  if (base >= 0)\n    {\n      num_to_text = \"0123456789abcdefghijklmnopqrstuvwxyz\";\n      if (base == 0)\n\tbase = 10;\n      else if (base > 36)\n\t{\n\t  num_to_text = \"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\";\n\t  if (base > 62)\n\t    return 0;\n\t}\n    }\n  else\n    {\n      base = -base;\n      num_to_text = \"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\";\n    }\n\n  if (x_size == 0)\n    {\n      fputc ('0', stream);\n      return ferror (stream) ? 0 : 1;\n    }\n\n  written = 0;\n\n  if (x_size < 0)\n    {\n      fputc ('-', stream);\n      x_size = -x_size;\n      written = 1;\n    }\n\n  TMP_MARK;\n  str_size = ((size_t) (x_size * BITS_PER_MP_LIMB\n\t\t\t* __mp_bases[base].chars_per_bit_exactly)) + 3;\n  str = (unsigned char *) TMP_ALLOC (str_size);\n\n  /* Move the number to convert into temporary space, since mpn_get_str\n     clobbers its argument + needs one extra high limb....  */\n  xp = (mp_ptr) TMP_ALLOC ((x_size + 1) * BYTES_PER_MP_LIMB);\n  MPN_COPY (xp, x->_mp_d, x_size);\n\n  str_size = mpn_get_str (str, base, xp, x_size);\n\n  /* mpn_get_str might make some leading zeros.  Skip them.  */\n  while (*str == 0)\n    {\n      str_size--;\n      str++;\n    }\n\n  /* Translate to printable chars.  */\n  for (i = 0; i < str_size; i++)\n    str[i] = num_to_text[str[i]];\n  str[str_size] = 0;\n\n  {\n    size_t fwret;\n    fwret = fwrite ((char *) str, 1, str_size, stream);\n    written += fwret;\n  }\n\n  TMP_FREE;\n  return ferror (stream) ? 0 : written;\n}\n"
  },
  {
    "path": "mpz/perfpow.c",
    "content": "/* mpz_perfect_power_p(arg) -- Return non-zero if ARG is a perfect power,\n   zero otherwise.\n\nCopyright 1998, 1999, 2000, 2001, 2005 Free Software Foundation, Inc.\n\nCopyright 2008 Jason Moxham\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/*\n  We are to determine if c is a perfect power, c = a ^ b.\n  Assume c is divisible by 2^n and that codd = c/2^n is odd.\n  Assume a is divisible by 2^m and that aodd = a/2^m is odd.\n  It is always true that m divides n.\n\n  * If n is prime, either 1) a is 2*aodd and b = n\n\t\t       or 2) a = c and b = 1.\n    So for n prime, we readily have a solution.\n  * If n is factorable into the non-trivial factors p1,p2,...\n    Since m divides n, m has a subset of n's factors and b = n / m.\n*/\n\n/* This is a naive approach to recognizing perfect powers.\n   Many things can be improved.  In particular, we should use p-adic\n   arithmetic for computing possible roots.  */\n\n#include <stdio.h> /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nstatic mpir_ui gcd(mpir_ui a, mpir_ui b);\nstatic int isprime(mpir_ui t);\n\nstatic const unsigned short primes[] =\n{  2,  3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,\n  59, 61, 67, 71, 73, 79, 83, 89, 97,101,103,107,109,113,127,131,\n 137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,\n 227,229,233,239,241,251,257,263,269,271,277,281,283,293,307,311,\n 313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,409,\n 419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,\n 509,521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,\n 617,619,631,641,643,647,653,659,661,673,677,683,691,701,709,719,\n 727,733,739,743,751,757,761,769,773,787,797,809,811,821,823,827,\n 829,839,853,857,859,863,877,881,883,887,907,911,919,929,937,941,\n 947,953,967,971,977,983,991,997,0\n};\n#define SMALLEST_OMITTED_PRIME 1009\n\n\nint\nmpz_perfect_power_p (mpz_srcptr u)\n{\n  unsigned long int prime;\n  unsigned long int n, n2;\n  int i;\n  unsigned long int rem;\n  mpz_t u2, q;\n  int exact;\n  mp_size_t uns;\n  mp_size_t usize = SIZ (u);\n  TMP_DECL;\n\n  if (usize == 0)\n    return 1;\t\t\t/* consider 0 a perfect power */\n\n  n2 = mpz_scan1 (u, 0);\n\n  if (n2 == 1)\n    return 0;\t\t\t/* 2 divides exactly once.  */\n\n  if (n2 > 1 && POW2_P(n2) && usize < 0)\n    return 0;\t\t\t/* 2 has power of two  multiplicity with negative U */\n\n  TMP_MARK;\n\n  uns = ABS (usize) - n2 / BITS_PER_MP_LIMB;\n  MPZ_TMP_INIT (q, uns);\n  MPZ_TMP_INIT (u2, uns);\n\n  mpz_tdiv_q_2exp (u2, u, n2);\n\n  if (isprime (n2))\n    goto n2prime;\n\n  for (i = 1; primes[i] != 0; i++)\n    {\n      prime = primes[i];\n\n      if (mpz_divisible_ui_p (u2, prime))\t/* divisible by this prime? */\n\t{\n\t  rem = mpz_tdiv_q_ui (q, u2, prime * prime);\n\t  if (rem != 0)\n\t    {\n\t      TMP_FREE;\n\t      return 0;\t\t/* prime divides exactly once, reject */\n\t    }\n\t  mpz_swap (q, u2);\n\t  for (n = 2;;)\n\t    {\n\t      rem = mpz_tdiv_q_ui (q, u2, prime);\n\t      if (rem != 0)\n\t\tbreak;\n\t      mpz_swap (q, u2);\n\t      n++;\n\t    }\n\n\t  if ( POW2_P(n) && usize < 0)\n\t    {\n\t      TMP_FREE;\n\t      return 0;\t\t/* power of two multiplicity with negative U, reject */\n\t    }\n\n\t  n2 = gcd (n2, n);\n\t  if (n2 == 1)\n\t    {\n\t      TMP_FREE;\n\t      return 0;\t\t/* we have multiplicity 1 of some factor */\n\t    }\n\n\t  if (mpz_cmpabs_ui (u2, 1) == 0)\n\t    {\n\t      TMP_FREE;\n\t      if(usize<0 && POW2_P(n2))return 0;/* factoring completed; not consistent power */\n\t      return 1;\t\t/* factoring completed; consistent power */\n\t    }\n\n\t  /* As soon as n2 becomes a prime number, stop factoring.\n\t     Either we have u=x^n2 or u is not a perfect power.  */\n\t  if (isprime (n2))\n\t    goto n2prime;\n\t}\n    }\n\n  if (n2 == 0)\n    {\n      /* We found no factors above; have to check all values of n.  */\n      unsigned long int nth;\n      for (nth = usize < 0 ? 3 : 2;; nth++)\n\t{\n\t  if (! isprime (nth))\n\t    continue;\n#if 0\n\t  exact = mpz_padic_root (q, u2, nth, PTH);\n\t  if (exact)\n#endif\n\t    exact = mpz_root (q, u2, nth);\n\t  if (exact)\n\t    {\n\t      TMP_FREE;\n\t      return 1;\n\t    }\n\t  if (mpz_cmpabs_ui (q, SMALLEST_OMITTED_PRIME) < 0)\n\t    {\n\t      TMP_FREE;\n\t      return 0;\n\t    }\n\t}\n    }\n  else\n    {\n      unsigned long int nth;\n      /* We found some factors above.  We just need to consider values of n\n\t that divides n2.  */\n      for (nth = usize < 0 ? 3 : 2; nth <= n2; nth++)\n\t{\n\t  if (! isprime (nth))\n\t    continue;\n\t  if (n2 % nth != 0)\n\t    continue;\n#if 0\n\t  exact = mpz_padic_root (q, u2, nth, PTH);\n\t  if (exact)\n#endif\n\t    exact = mpz_root (q, u2, nth);\n\t  if (exact)\n\t    {\n\t      TMP_FREE;\n\t      return 1;\n\t    }\n\t  if (mpz_cmpabs_ui (q, SMALLEST_OMITTED_PRIME) < 0)\n\t    {\n\t      TMP_FREE;\n\t      return 0;\n\t    }\n\t}\n\n      TMP_FREE;\n      return 0;\n    }\n\nn2prime:\n  if(n2==2 && usize<0){TMP_FREE;return 0;}\n  exact = mpz_root (NULL, u2, n2);\n  TMP_FREE;\n  return exact;\n}\n\nstatic mpir_ui\ngcd (mpir_ui a, mpir_ui b)\n{\n  int an2, bn2, n2;\n\n  if (a == 0)\n    return b;\n  if (b == 0)\n    return a;\n\n  count_trailing_zeros (an2, a);\n  a >>= an2;\n\n  count_trailing_zeros (bn2, b);\n  b >>= bn2;\n\n  n2 = MIN (an2, bn2);\n\n  while (a != b)\n    {\n      if (a > b)\n\t{\n\t  a -= b;\n\t  do\n\t    a >>= 1;\n\t  while ((a & 1) == 0);\n\t}\n      else /*  b > a.  */\n\t{\n\t  b -= a;\n\t  do\n\t    b >>= 1;\n\t  while ((b & 1) == 0);\n\t}\n    }\n\n  return a << n2;\n}\n\nstatic int\nisprime (mpir_ui t)\n{\n  mpir_ui q, r, d;\n\n  if (t < 3 || (t & 1) == 0)\n    return t == 2;\n\n  for (d = 3, r = 1; r != 0; d += 2)\n    {\n      q = t / d;\n      r = t - q * d;\n      if (q < d)\n\treturn 1;\n    }\n  return 0;\n}\n"
  },
  {
    "path": "mpz/perfsqr.c",
    "content": "/* mpz_perfect_square_p(arg) -- Return non-zero if ARG is a perfect square,\n   zero otherwise.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpz_perfect_square_p 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpz/popcount.c",
    "content": "/* mpz_popcount(mpz_ptr op) -- Population count of OP.  If the operand is\n   negative, return ~0 (a novel representation of infinity).\n\nCopyright 1994, 1996, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpz_popcount 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpz/pow_ui.c",
    "content": "/* mpz_pow_ui -- mpz raised to ulong.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_pow_ui (mpz_ptr r, mpz_srcptr b, mpir_ui e)\n{\n  mpz_n_pow_ui (r, PTR(b), (mp_size_t) SIZ(b), e);\n}\n"
  },
  {
    "path": "mpz/powm.c",
    "content": "/* mpz_powm(res,base,exp,mod) -- Set R to (U^E) mod M.\n\n   Contributed to the GNU project by Torbjorn Granlund.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2008, 2009, 2011, 2012\nFree Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* TODO\n\n * Improve handling of buffers.  It is pretty ugly now.\n\n * For even moduli, we compute a binvert of its odd part both here and in\n   mpn_powm.  How can we avoid this recomputation?\n*/\n\n/*\n  b ^ e mod m   res\n  0   0     0    ?\n  0   e     0    ?\n  0   0     m    ?\n  0   e     m    0\n  b   0     0    ?\n  b   e     0    ?\n  b   0     m    1 mod m\n  b   e     m    b^e mod m\n*/\n\n#define HANDLE_NEGATIVE_EXPONENT 1\n\nvoid\nmpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)\n{\n  mp_size_t n, nodd, ncnt;\n  int cnt;\n  mp_ptr rp, tp;\n  mp_srcptr bp, ep, mp;\n  mp_size_t rn, bn, es, en, itch;\n  mpz_t new_b;\t\t\t/* note: value lives long via 'b' */\n  TMP_DECL;\n\n  n = ABSIZ(m);\n  if (UNLIKELY (n == 0))\n    DIVIDE_BY_ZERO;\n\n  mp = PTR(m);\n\n  TMP_MARK;\n\n  es = SIZ(e);\n  if (UNLIKELY (es <= 0))\n    {\n      if (es == 0)\n\t{\n\t  /* b^0 mod m,  b is anything and m is non-zero.\n\t     Result is 1 mod m, i.e., 1 or 0 depending on if m = 1.  */\n\t  SIZ(r) = n != 1 || mp[0] != 1;\n\t  PTR(r)[0] = 1;\n\t  TMP_FREE;\t/* we haven't really allocated anything here */\n\t  return;\n\t}\n#if HANDLE_NEGATIVE_EXPONENT\n      MPZ_TMP_INIT (new_b, n + 1);\n\n      if (UNLIKELY (! mpz_invert (new_b, b, m)))\n\tDIVIDE_BY_ZERO;\n      b = new_b;\n      es = -es;\n#else\n      DIVIDE_BY_ZERO;\n#endif\n    }\n  en = es;\n\n  bn = ABSIZ(b);\n\n  if (UNLIKELY (bn == 0))\n    {\n      SIZ(r) = 0;\n      TMP_FREE;\n      return;\n    }\n\n  ep = PTR(e);\n\n  /* Handle (b^1 mod m) early, since mpn_pow* do not handle that case.  */\n  if (UNLIKELY (en == 1 && ep[0] == 1))\n    {\n      rp = TMP_ALLOC_LIMBS (n);\n      bp = PTR(b);\n      if (bn >= n)\n\t{\n\t  mp_ptr qp = TMP_ALLOC_LIMBS (bn - n + 1);\n\t  mpn_tdiv_qr (qp, rp, 0L, bp, bn, mp, n);\n\t  rn = n;\n\t  MPN_NORMALIZE (rp, rn);\n\n\t  if (SIZ(b) < 0 && rn != 0)\n\t    {\n\t      mpn_sub (rp, mp, n, rp, rn);\n\t      rn = n;\n\t      MPN_NORMALIZE (rp, rn);\n\t    }\n\t}\n      else\n\t{\n\t  if (SIZ(b) < 0)\n\t    {\n\t      mpn_sub (rp, mp, n, bp, bn);\n\t      rn = n;\n\t      rn -= (rp[rn - 1] == 0);\n\t    }\n\t  else\n\t    {\n\t      MPN_COPY (rp, bp, bn);\n\t      rn = bn;\n\t    }\n\t}\n      goto ret;\n    }\n\n  /* Remove low zero limbs from M.  This loop will terminate for correctly\n     represented mpz numbers.  */\n  ncnt = 0;\n  while (UNLIKELY (mp[0] == 0))\n    {\n      mp++;\n      ncnt++;\n    }\n  nodd = n - ncnt;\n  cnt = 0;\n  if (mp[0] % 2 == 0)\n    {\n      mp_ptr newmp = TMP_ALLOC_LIMBS (nodd);\n      count_trailing_zeros (cnt, mp[0]);\n      mpn_rshift (newmp, mp, nodd, cnt);\n      nodd -= newmp[nodd - 1] == 0;\n      mp = newmp;\n      ncnt++;\n    }\n\n  if (ncnt != 0)\n    {\n      /* We will call both mpn_powm and mpn_powlo.  */\n      /* rp needs n, mpn_powlo needs 4n, the 2 mpn_binvert might need more */\n      mp_size_t n_largest_binvert = MAX (ncnt, nodd);\n      mp_size_t itch_binvert = mpn_binvert_itch (n_largest_binvert);\n      itch = 3 * n + MAX (itch_binvert, 2 * n);\n    }\n  else\n    {\n      /* We will call just mpn_powm.  */\n      mp_size_t itch_binvert = mpn_binvert_itch (nodd);\n      itch = n + MAX (itch_binvert, 2 * n);\n    }\n  tp = TMP_ALLOC_LIMBS (itch);\n\n  rp = tp;  tp += n;\n\n  bp = PTR(b);\n  mpn_powm (rp, bp, bn, ep, en, mp, nodd, tp);\n\n  rn = n;\n\n  if (ncnt != 0)\n    {\n      mp_ptr r2, xp, yp, odd_inv_2exp;\n      unsigned long t;\n      int bcnt;\n\n      if (bn < ncnt)\n\t{\n\t  mp_ptr newbp = TMP_ALLOC_LIMBS (ncnt);\n\t  MPN_COPY (newbp, bp, bn);\n\t  MPN_ZERO (newbp + bn, ncnt - bn);\n\t  bp = newbp;\n\t}\n\n      r2 = tp;\n\n      if (bp[0] % 2 == 0)\n\t{\n\t  if (en > 1)\n\t    {\n\t      MPN_ZERO (r2, ncnt);\n\t      goto zero;\n\t    }\n\n\t  ASSERT (en == 1);\n\t  t = (ncnt - (cnt != 0)) * GMP_NUMB_BITS + cnt;\n\n\t  /* Count number of low zero bits in B, up to 3.  */\n\t  bcnt = (0x1213 >> ((bp[0] & 7) << 1)) & 0x3;\n\t  /* Note that ep[0] * bcnt might overflow, but that just results\n\t     in a missed optimization.  */\n\t  if (ep[0] * bcnt >= t)\n\t    {\n\t      MPN_ZERO (r2, ncnt);\n\t      goto zero;\n\t    }\n\t}\n\n      mpn_powlo (r2, bp, ep, en, ncnt, tp + ncnt);\n\n    zero:\n      if (nodd < ncnt)\n\t{\n\t  mp_ptr newmp = TMP_ALLOC_LIMBS (ncnt);\n\t  MPN_COPY (newmp, mp, nodd);\n\t  MPN_ZERO (newmp + nodd, ncnt - nodd);\n\t  mp = newmp;\n\t}\n\n      odd_inv_2exp = tp + n;\n      mpn_binvert (odd_inv_2exp, mp, ncnt, tp + 2 * n);\n\n      mpn_sub (r2, r2, ncnt, rp, nodd > ncnt ? ncnt : nodd);\n\n      xp = tp + 2 * n;\n      /* ncnt = trailing limbs from m */\n      /* tp is at least 5*n, no worries */\n      mpn_mullow_n (xp, odd_inv_2exp, r2, ncnt); /* JPF */\n\n      if (cnt != 0)\n\txp[ncnt - 1] &= (CNST_LIMB(1) << cnt) - 1;\n\n      yp = tp;\n      if (ncnt > nodd)\n\tmpn_mul (yp, xp, ncnt, mp, nodd);\n      else\n\tmpn_mul (yp, mp, nodd, xp, ncnt);\n\n      mpn_add (rp, yp, n, rp, nodd);\n\n      ASSERT (nodd + ncnt >= n);\n      ASSERT (nodd + ncnt <= n + 1);\n    }\n\n  MPN_NORMALIZE (rp, rn);\n\n  if ((ep[0] & 1) && SIZ(b) < 0 && rn != 0)\n    {\n      mpn_sub (rp, PTR(m), n, rp, rn);\n      rn = n;\n      MPN_NORMALIZE (rp, rn);\n    }\n\n ret:\n  MPZ_REALLOC (r, rn);\n  SIZ(r) = rn;\n  MPN_COPY (PTR(r), rp, rn);\n\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/powm_ui.c",
    "content": "/* mpz_powm_ui(res,base,exp,mod) -- Set R to (B^E) mod M.\n\n   Contributed to the GNU project by Torbjörn Granlund.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2008, 2009, 2011-2013\nFree Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* This code is very old, and should be rewritten to current GMP standard.  It\n   is slower than mpz_powm for large exponents, but also for small exponents\n   when the mod argument is small.\n\n   As an intermediate solution, we now deflect to mpz_powm for exponents >= 20.\n*/\n\n/*\n  b ^ e mod m   res\n  0   0     0    ?\n  0   e     0    ?\n  0   0     m    ?\n  0   e     m    0\n  b   0     0    ?\n  b   e     0    ?\n  b   0     m    1 mod m\n  b   e     m    b^e mod m\n*/\n\nstatic void\nmod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv,\n     mp_ptr tp)\n{\n  mp_ptr qp;\n  TMP_DECL;\n  TMP_MARK;\n\n  qp = tp;\n\n  if (dn == 1)\n    {\n      np[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]); /* JPF */\n    }\n  else if (dn == 2)\n    {\n      mpn_divrem_2 (qp, 0L, np, nn, dp);\n    }\n  else if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD) ||\n\t   BELOW_THRESHOLD (nn - dn, DC_DIV_QR_THRESHOLD))\n    {\n      mpn_sb_div_qr (qp, np, nn, dp, dn, dinv); /* JPF: no gmp_pi1_t, two limbs pi */\n    }\n  /* Different conditions */\n  else if (BELOW_THRESHOLD (dn, INV_DIV_QR_THRESHOLD) ||   /* fast condition */\n\t   BELOW_THRESHOLD (nn, 2 * INV_DIV_QR_THRESHOLD)) /* fast condition */\n    {\n      mpn_dc_div_qr (qp, np, nn, dp, dn, dinv); /* JPF: no gmp_pi1_t */\n    }\n  else\n    {\n      mp_ptr dinv2 = TMP_ALLOC_LIMBS(dn); /* JPF: ... */\n      mpn_invert(dinv2, dp, dn); /* JPF: ... */\n      mpn_inv_div_qr (qp, np, nn, dp, dn, dinv2); /* JPF: need nn+1 for new np? I don't think so as everyting is already normalized */\n    }\n\n  TMP_FREE;\n}\n\n/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and\n   t is defined by (tp,mn).  */\nstatic void\nreduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn,\n        mp_limb_t dinv)\n{\n  mp_ptr rp, scratch;\n  TMP_DECL;\n  TMP_MARK;\n\n  rp = TMP_ALLOC_LIMBS (an);\n  scratch = TMP_ALLOC_LIMBS (an - mn + 1);\n  MPN_COPY (rp, ap, an);\n  mod (rp, an, mp, mn, dinv, scratch);\n  MPN_COPY (tp, rp, mn);\n\n  TMP_FREE;\n}\n\nvoid\nmpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)\n{\n  if (el < 20) /* JPF */\n    {\n      mp_ptr xp, tp, mp, bp, scratch;\n      mp_size_t xn, tn, mn, bn;\n      int m_zero_cnt;\n      int c;\n      mp_limb_t e, m2;\n      mp_limb_t dinv;\n      TMP_DECL;\n\n      mp = PTR(m);\n      mn = ABSIZ(m);\n      if (UNLIKELY (mn == 0))\n\tDIVIDE_BY_ZERO;\n\n      if (el == 0)\n\t{\n\t  /* Exponent is zero, result is 1 mod M, i.e., 1 or 0 depending on if\n\t     M equals 1.  */\n\t  SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;\n\t  PTR(r)[0] = 1;\n\t  return;\n\t}\n\n      TMP_MARK;\n\n      /* Normalize m (i.e. make its most significant bit set) as required by\n\t division functions below.  */\n      count_leading_zeros (m_zero_cnt, mp[mn - 1]);\n      m_zero_cnt -= GMP_NAIL_BITS;\n      if (m_zero_cnt != 0)\n\t{\n\t  mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);\n\t  mpn_lshift (new_mp, mp, mn, m_zero_cnt);\n\t  mp = new_mp;\n\t}\n\n      m2 = mn == 1 ? 0 : mp[mn - 2];\n      mpir_invert_pi1 (dinv, mp[mn - 1], m2); /* JPF: don't use gmp_pi1_t */\n\n      bn = ABSIZ(b);\n      bp = PTR(b);\n      if (bn > mn)\n\t{\n\t  /* Reduce possibly huge base.  Use a function call to reduce, since we\n\t     don't want the quotient allocation to live until function return.  */\n\t  mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);\n\t  reduce (new_bp, bp, bn, mp, mn, dinv); /* JPF */\n\t  bp = new_bp;\n\t  bn = mn;\n\t  /* Canonicalize the base, since we are potentially going to multiply with\n\t     it quite a few times.  */\n\t  MPN_NORMALIZE (bp, bn);\n\t}\n\n      if (bn == 0)\n\t{\n\t  SIZ(r) = 0;\n\t  TMP_FREE;\n\t  return;\n\t}\n\n      tp = TMP_ALLOC_LIMBS (2 * mn + 1);\n      xp = TMP_ALLOC_LIMBS (mn);\n      scratch = TMP_ALLOC_LIMBS (mn + 1);\n\n      MPN_COPY (xp, bp, bn);\n      xn = bn;\n\n      e = el;\n      count_leading_zeros (c, e);\n      e = (e << c) << 1;\t\t/* shift the exp bits to the left, lose msb */\n      c = GMP_LIMB_BITS - 1 - c;\n\n      if (c == 0)\n\t{\n\t  /* If m is already normalized (high bit of high limb set), and b is\n\t     the same size, but a bigger value, and e==1, then there's no\n\t     modular reductions done and we can end up with a result out of\n\t     range at the end. */\n\t  if (xn == mn && mpn_cmp (xp, mp, mn) >= 0)\n\t    mpn_sub_n (xp, xp, mp, mn);\n\t}\n      else\n\t{\n\t  /* Main loop. */\n\t  do\n\t    {\n\t      mpn_sqr (tp, xp, xn);\n\t      tn = 2 * xn; tn -= tp[tn - 1] == 0;\n\t      if (tn < mn)\n\t\t{\n\t\t  MPN_COPY (xp, tp, tn);\n\t\t  xn = tn;\n\t\t}\n\t      else\n\t\t{\n                    mod (tp, tn, mp, mn, dinv, scratch); /* JPF */\n\t\t  MPN_COPY (xp, tp, mn);\n\t\t  xn = mn;\n\t\t}\n\n\t      if ((mp_limb_signed_t) e < 0)\n\t\t{\n\t\t  mpn_mul (tp, xp, xn, bp, bn);\n\t\t  tn = xn + bn; tn -= tp[tn - 1] == 0;\n\t\t  if (tn < mn)\n\t\t    {\n\t\t      MPN_COPY (xp, tp, tn);\n\t\t      xn = tn;\n\t\t    }\n\t\t  else\n\t\t    {\n                        mod (tp, tn, mp, mn, dinv, scratch); /* JPF */\n\t\t      MPN_COPY (xp, tp, mn);\n\t\t      xn = mn;\n\t\t    }\n\t\t}\n\t      e <<= 1;\n\t      c--;\n\t    }\n\t  while (c != 0);\n\t}\n\n      /* We shifted m left m_zero_cnt steps.  Adjust the result by reducing it\n\t with the original M.  */\n      if (m_zero_cnt != 0)\n\t{\n\t  mp_limb_t cy;\n\t  cy = mpn_lshift (tp, xp, xn, m_zero_cnt);\n\t  tp[xn] = cy; xn += cy != 0;\n\n\t  if (xn < mn)\n\t    {\n\t      MPN_COPY (xp, tp, xn);\n\t    }\n\t  else\n\t    {\n                mod (tp, xn, mp, mn, dinv, scratch); /* JPF */\n\t      MPN_COPY (xp, tp, mn);\n\t      xn = mn;\n\t    }\n\t  mpn_rshift (xp, xp, xn, m_zero_cnt);\n\t}\n      MPN_NORMALIZE (xp, xn);\n\n      if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)\n\t{\n\t  mp = PTR(m);\t\t\t/* want original, unnormalized m */\n\t  mpn_sub (xp, mp, mn, xp, xn);\n\t  xn = mn;\n\t  MPN_NORMALIZE (xp, xn);\n\t}\n      MPZ_REALLOC (r, xn);\n      SIZ (r) = xn;\n      MPN_COPY (PTR(r), xp, xn);\n\n      TMP_FREE;\n    }\n  else /* e >= 20 */\n    {\n      /* For large exponents, fake a mpz_t exponent and deflect to the more\n\t sophisticated mpz_powm.  */\n      mpz_t e;\n      mp_limb_t ep[LIMBS_PER_UI]; /* JPF: no ulong in MPIR */\n      MPZ_FAKE_UI (e, ep, el);\n      mpz_powm (r, b, e, m);\n    }\n}\n"
  },
  {
    "path": "mpz/pprime_p.c",
    "content": "/* mpz_probab_prime_p --\n   An implementation of the probabilistic primality test found in Knuth's\n   Seminumerical Algorithms book.  If the function mpz_probab_prime_p()\n   returns 0 then n is not prime.  If it returns 1, then n is 'probably'\n   prime.  If it returns 2, n is surely prime.  The probability of a false\n   positive is (1/4)**reps, where reps is the number of internal passes of the\n   probabilistic algorithm.  Knuth indicates that 25 passes are reasonable.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2005 Free\nSoftware Foundation, Inc.  Miller-Rabin code contributed by John Amanatides.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nstatic int isprime(mpir_ui t);\n\n\n/* MPN_MOD_OR_MODEXACT_1_ODD can be used instead of mpn_mod_1 for the trial\n   division.  It gives a result which is not the actual remainder r but a\n   value congruent to r*2^n mod d.  Since all the primes being tested are\n   odd, r*2^n mod p will be 0 if and only if r mod p is 0.  */\n\n\n//  This function is obsolete     17/08/2009\n\nint\nmpz_probab_prime_p (mpz_srcptr n, int reps)\n{\n  mp_limb_t r;\n  mpz_t n2;\n\n  /* Handle small and negative n.  */\n  if (mpz_cmp_ui (n, 1000000L) <= 0)\n    {\n      int is_prime;\n      if (mpz_cmpabs_ui (n, 1000000L) <= 0)\n\t{\n\t  is_prime = isprime (mpz_get_ui (n));\n\t  return is_prime ? 2 : 0;\n\t}\n      /* Negative number.  Negate and fall out.  */\n      PTR(n2) = PTR(n);\n      SIZ(n2) = -SIZ(n);\n      n = n2;\n    }\n\n  /* If n is now even, it is not a prime.  */\n  if ((mpz_get_ui (n) & 1) == 0)\n    return 0;\n\n#if defined (PP)\n  /* Check if n has small factors.  */\n#if defined (PP_INVERTED)\n  r = MPN_MOD_OR_PREINV_MOD_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) PP,\n                               (mp_limb_t) PP_INVERTED);\n#else\n  r = mpn_mod_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) PP);\n#endif\n  if (r % 3 == 0\n#if BITS_PER_MP_LIMB >= 4\n      || r % 5 == 0\n#endif\n#if BITS_PER_MP_LIMB >= 8\n      || r % 7 == 0\n#endif\n#if BITS_PER_MP_LIMB >= 16\n      || r % 11 == 0 || r % 13 == 0\n#endif\n#if BITS_PER_MP_LIMB >= 32\n      || r % 17 == 0 || r % 19 == 0 || r % 23 == 0 || r % 29 == 0\n#endif\n#if BITS_PER_MP_LIMB >= 64\n      || r % 31 == 0 || r % 37 == 0 || r % 41 == 0 || r % 43 == 0\n      || r % 47 == 0 || r % 53 == 0\n#endif\n      )\n    {\n      return 0;\n    }\n#endif /* PP */\n\n  /* Do more dividing.  We collect small primes, using umul_ppmm, until we\n     overflow a single limb.  We divide our number by the small primes product,\n     and look for factors in the remainder.  */\n  {\n    mpir_ui ln2, q;\n    mp_limb_t p1, p0, p;\n    unsigned int primes[15];\n    int nprimes;\n\n    nprimes = 0;\n    p = 1;\n    ln2 = mpz_sizeinbase (n, 2);\t/* FIXME: tune this limit */\n    for (q = PP_FIRST_OMITTED; q < ln2; q += 2)\n      {\n\tif (isprime (q))\n\t  {\n\t    umul_ppmm (p1, p0, p, q);\n\t    if (p1 != 0)\n\t      {\n\t\tr = MPN_MOD_OR_MODEXACT_1_ODD (PTR(n), (mp_size_t) SIZ(n), p);\n\t\twhile (--nprimes >= 0)\n\t\t  if (r % primes[nprimes] == 0)\n\t\t    {\n\t\t      ASSERT_ALWAYS (mpn_mod_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) primes[nprimes]) == 0);\n\t\t      return 0;\n\t\t    }\n\t\tp = q;\n\t\tnprimes = 0;\n\t      }\n\t    else\n\t      {\n\t\tp = p0;\n\t      }\n\t    primes[nprimes++] = q;\n\t  }\n      }\n  }\n\n  /* Perform a number of Miller-Rabin tests.  */\n  return mpz_millerrabin (n, reps);\n}\n\nstatic int\nisprime (mpir_ui t)\n{\n  mpir_ui q, r, d;\n\n  if (t < 3 || (t & 1) == 0)\n    return t == 2;\n\n  for (d = 3, r = 1; r != 0; d += 2)\n    {\n      q = t / d;\n      r = t - q * d;\n      if (q < d)\n\treturn 1;\n    }\n  return 0;\n}\n"
  },
  {
    "path": "mpz/primorial_ui.c",
    "content": "/* mpz_primorial_ui(RESULT, N) -- Set RESULT to N# the product of primes <= N.\n\nContributed to the GNU project by Marco Bodrato.\n\nCopyright 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* TODO: Remove duplicated constants / macros / static functions...\n */\n\n/*************************************************************/\n/* Section macros: common macros, for swing/fac/bin (&sieve) */\n/*************************************************************/\n\n#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)\t\t\\\n  do {\t\t\t\t\t\t\t\t\\\n    if ((PR) > (MAX_PR)) {\t\t\t\t\t\\\n      (VEC)[(I)++] = (PR);\t\t\t\t\t\\\n      (PR) = (P);\t\t\t\t\t\t\\\n    } else\t\t\t\t\t\t\t\\\n      (PR) *= (P);\t\t\t\t\t\t\\\n  } while (0)\n\n#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)\t\t\t\\\n    __max_i = (end);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\\\n    do {\t\t\t\t\t\t\t\\\n      ++__i;\t\t\t\t\t\t\t\\\n      if (((sieve)[__index] & __mask) == 0)\t\t\t\\\n\t{\t\t\t\t\t\t\t\\\n\t  (prime) = id_to_n(__i)\n\n#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)\t\t\\\n  do {\t\t\t\t\t\t\t\t\\\n    mp_limb_t __mask, __index, __max_i, __i;\t\t\t\\\n\t\t\t\t\t\t\t\t\\\n    __i = (start)-(off);\t\t\t\t\t\\\n    __index = __i / GMP_LIMB_BITS;\t\t\t\t\\\n    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);\t\t\\\n    __i += (off);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\\\n    LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)\n\n#define LOOP_ON_SIEVE_STOP\t\t\t\t\t\\\n\t}\t\t\t\t\t\t\t\\\n      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);\t\\\n      __index += __mask & 1;\t\t\t\t\t\\\n    }  while (__i <= __max_i)\t\t\t\t\t\\\n\n#define LOOP_ON_SIEVE_END\t\t\t\t\t\\\n    LOOP_ON_SIEVE_STOP;\t\t\t\t\t\t\\\n  } while (0)\n\n/*********************************************************/\n/* Section sieve: sieving functions and tools for primes */\n/*********************************************************/\n\n#if WANT_ASSERT\nstatic mp_limb_t\nbit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }\n#endif\n\n/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/\nstatic mp_limb_t\nid_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }\n\n/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */\nstatic mp_limb_t\nn_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }\n\n#if WANT_ASSERT\nstatic mp_size_t\nprimesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }\n#endif\n\n/*********************************************************/\n/* Section primorial: implementation                     */\n/*********************************************************/\n\nvoid\nmpz_primorial_ui (mpz_ptr x, mpir_ui n)\n{\n  static const mp_limb_t table[] = { 1, 1, 2, 6, 6 };\n\n  ASSERT (n <= GMP_NUMB_MAX);\n\n  if (n < numberof (table))\n    {\n      PTR (x)[0] = table[n];\n      SIZ (x) = 1;\n    }\n  else\n    {\n      mp_limb_t *sieve, *factors;\n      mp_size_t size;\n      mp_limb_t prod;\n      mp_limb_t j;\n      TMP_DECL;\n\n      size = 1 + n / GMP_NUMB_BITS + n / (2*GMP_NUMB_BITS);\n      ASSERT (size >= primesieve_size (n));\n      sieve = MPZ_REALLOC (x, size);\n      size = (gmp_primesieve (sieve, n) + 1) / log_n_max (n) + 1;\n\n      TMP_MARK;\n      factors = TMP_ALLOC_LIMBS (size);\n\n      j = 0;\n\n      prod = table[numberof (table)-1];\n\n      /* Store primes from 5 to n */\n      {\n\tmp_limb_t prime, max_prod;\n\n\tmax_prod = GMP_NUMB_MAX / n;\n\n\tLOOP_ON_SIEVE_BEGIN (prime, n_to_bit(numberof (table)), n_to_bit (n), 0, sieve);\n\tFACTOR_LIST_STORE (prime, prod, max_prod, factors, j);\n\tLOOP_ON_SIEVE_END;\n      }\n\n      if (j != 0)\n\t{\n\t  factors[j++] = prod;\n\t  mpz_prodlimbs (x, factors, j);\n\t}\n      else\n\t{\n\t  PTR (x)[0] = prod;\n\t  SIZ (x) = 1;\n\t}\n\n      TMP_FREE;\n    }\n}\n"
  },
  {
    "path": "mpz/probable_prime_p.c",
    "content": "/*\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpz_probable_prime_p (mpz_srcptr N, gmp_randstate_t STATE, int PROB, mpir_ui td)\n{\n  int d, t, i, r;\n  mpz_t base, nm1, x, e, n;\n\n  ALLOC(n) = ALLOC(N);\n  SIZ(n) = ABSIZ(N);\n  PTR(n) = PTR(N);\t\t/* fake up an absolute value that we dont have de-allocate */\n\n  /* algorithm does not handle small values, get rid of them here */\n  if (mpz_cmp_ui(n, 2) == 0 || mpz_cmp_ui(n, 3) == 0)\n     return 1;\n\n  if (mpz_cmp_ui(n, 5) < 0 || mpz_even_p(n))\n     return 0;\n\n  /*\n     We assume we know nothing about N, i.e. it is a random integer\n     So we try here anything which speeds up the average case\n     We try some trial division\n  */\n#define LIM 1024\n  \n  d = mpz_trial_division(n, 3, LIM);\n  \n  if (d != 0)\n  {\n     if (mpz_cmp_ui(n, d) == 0)\n        return 1;\n     \n     return 0;\n  }\n  \n  if (mpz_cmp_ui(n, LIM * LIM) < 0)\n     return 1;\n  \n  ASSERT (mpz_odd_p(n));\n  ASSERT (mpz_cmp_ui(n, 5) >= 0);\n\n  /* now do some random strong pseudoprime tests */\n  mpz_init(base);\n  mpz_init_set(nm1, n);\n  mpz_sub_ui(nm1, nm1, 1);\n  mpz_init(e);\n  mpz_init(x);\n\n  t = mpz_scan1(nm1, 0);\t/* 2^t divides nm1 */\n  \n  ASSERT (t > 0);\n  \n  mpz_tdiv_q_2exp (e, nm1, t);\t/* e = nm1/2^t */\n  \n  r = 1;\n\n  while (PROB > 0)\n  {\n     PROB -= 2;\n     do\n\t  {\n\t     mpz_urandomm (base, STATE, nm1);\n\t  } while (mpz_cmp_ui (base, 1) <= 0);\n      \n     mpz_powm (x, base, e, n);\t/* x = base^e mod n */\n\n     if (mpz_cmp_ui(x, 1) == 0 || mpz_cmp(x, nm1) == 0)\n\t     continue;\n      \n     for (i = t - 1; i > 0; i--)\n\t  {\n\t     mpz_mul(x, x, x);\n\t     mpz_mod(x, x, n);\n\t     \n        if (mpz_cmp(x, nm1) == 0)\n\t        break;\n\t  \n        if (mpz_cmp_ui(x, 1) == 0)\n\t     {\n\t        r = 0;\n\t        break;\n        }\n     }\n      \n     if (i == 0 || r == 0)\n     {\n        r = 0;\n        break;\n     }\n  }\n  \n  mpz_clear (nm1);\n  mpz_clear (x);\n  mpz_clear (e);\n  mpz_clear (base);\n  \n  return r;\n}\n"
  },
  {
    "path": "mpz/prodlimbs.c",
    "content": "/* mpz_prodlimps(RESULT, V, LEN) -- Set RESULT to V[0]*V[1]*...*V[LEN-1].\n\nContributed to the GNU project by Marco Bodrato.\n\nTHE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.\nIT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.\nIN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR\nDISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2010, 2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/*********************************************************/\n/* Section list-prod: product of a list -> mpz_t         */\n/*********************************************************/\n\n/* FIXME: should be tuned */\n#ifndef RECURSIVE_PROD_THRESHOLD\n#define RECURSIVE_PROD_THRESHOLD (MUL_KARATSUBA_THRESHOLD)\n#endif\n\n/* Computes the product of the j>1 limbs pointed by factors, puts the\n * result in x. It assumes that all limbs are non-zero. Above\n * Karatsuba's threshold it uses a binary splitting startegy, to gain\n * speed by the asymptotically fast multiplication algorithms.\n *\n * The list in  {factors, j} is overwritten.\n * Returns the size of the result\n */\n\nmp_size_t\nmpz_prodlimbs (mpz_ptr x, mp_ptr factors, mp_size_t j)\n{\n  mp_limb_t cy;\n  mp_size_t size, i;\n  mp_ptr    prod;\n\n  ASSERT (j > 1);\n  ASSERT (RECURSIVE_PROD_THRESHOLD > 3);\n\n  if (BELOW_THRESHOLD (j, RECURSIVE_PROD_THRESHOLD)) {\n    j--;\n    size = 1;\n\n    for (i = 1; i < j; i++)\n      {\n\tcy = mpn_mul_1 (factors, factors, size, factors[i]);\n\tfactors[size] = cy;\n\tsize += cy != 0;\n      };\n\n    prod = MPZ_NEWALLOC (x, size + 1);\n\n    cy = mpn_mul_1 (prod, factors, size, factors[i]);\n    prod[size] = cy;\n    return SIZ (x) = size + (cy != 0);\n  } else {\n    mpz_t x1, x2;\n    TMP_DECL;\n\n    i = j >> 1;\n    j -= i;\n    TMP_MARK;\n\n    MPZ_TMP_INIT (x2, j);\n\n    PTR (x1) = factors + i;\n    ALLOC (x1) = j;\n    j = mpz_prodlimbs (x2, factors + i, j);\n    i = mpz_prodlimbs (x1, factors, i);\n    size = i + j;\n    prod = MPZ_NEWALLOC (x, size);\n    if (i >= j)\n      cy = mpn_mul (prod, PTR(x1), i, PTR(x2), j);\n    else\n      cy = mpn_mul (prod, PTR(x2), j, PTR(x1), i);\n    TMP_FREE;\n\n    return SIZ (x) = size - (cy == 0);\n  }\n}\n"
  },
  {
    "path": "mpz/realloc.c",
    "content": "/* _mpz_realloc -- make the mpz_t have NEW_ALLOC digits allocated.\n\nCopyright 1991, 1993, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid *\n_mpz_realloc (mpz_ptr m, mp_size_t new_alloc)\n{\n  mp_ptr mp;\n\n  /* Never allocate zero space. */\n  new_alloc = MAX (new_alloc, 1);\n\n  mp = __GMP_REALLOCATE_FUNC_LIMBS (PTR(m), ALLOC(m), new_alloc);\n  PTR(m) = mp;\n  ALLOC(m) = new_alloc;\n\n  /* Don't create an invalid number; if the current value doesn't fit after\n     reallocation, clear it to 0.  */\n  if (ABSIZ(m) > new_alloc)\n    SIZ(m) = 0;\n\n  return (void *) mp;\n}\n"
  },
  {
    "path": "mpz/realloc2.c",
    "content": "/* mpz_realloc2 -- change allocated data size.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_realloc2 (mpz_ptr m, mp_bitcnt_t bits)\n{\n  mp_ptr mp;\n  mp_size_t new_alloc;\n\n  new_alloc = (bits + GMP_NUMB_BITS-1) / GMP_NUMB_BITS;\n\n  /* Never allocate zero space. */\n  new_alloc = MAX (new_alloc, 1);\n\n  mp = __GMP_REALLOCATE_FUNC_LIMBS (PTR(m), ALLOC(m), new_alloc);\n  PTR(m) = mp;\n  ALLOC(m) = new_alloc;\n\n  /* Don't create an invalid number; if the current value doesn't fit after\n     reallocation, clear it to 0.  */\n  if (ABSIZ(m) > new_alloc)\n    SIZ(m) = 0;\n}\n"
  },
  {
    "path": "mpz/remove.c",
    "content": "/* mpz_remove -- divide out a factor and return its multiplicity.\n\nCopyright 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmp_bitcnt_t\nmpz_remove (mpz_ptr dest, mpz_srcptr src, mpz_srcptr f)\n{\n  mpz_t fpow[40];\t\t/* inexhaustible...until year 2020 or so */\n  mpz_t x, rem;\n  mp_bitcnt_t pwr;\n  int p;\n\n  if (mpz_cmp_ui (f, 1) <= 0)\n    DIVIDE_BY_ZERO;\n\n  if (SIZ (src) == 0)\n    {\n      if (src != dest)\n        mpz_set (dest, src);\n      return 0;\n    }\n\n  if (mpz_cmp_ui (f, 2) == 0)\n    {\n      unsigned long int s0;\n      s0 = mpz_scan1 (src, 0);\n      mpz_fdiv_q_2exp (dest, src, s0);\n      return s0;\n    }\n\n  /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0).  It is an\n     upper bound of the result we're seeking.  We could also shift down the\n     operands so that they become odd, to make intermediate values smaller.  */\n\n  mpz_init (rem);\n  mpz_init (x);\n\n  pwr = 0;\n  mpz_init (fpow[0]);\n  mpz_set (fpow[0], f);\n  mpz_set (dest, src);\n\n  /* Divide by f, f^2, ..., f^(2^k) until we get a remainder for f^(2^k).  */\n  for (p = 0;; p++)\n    {\n      mpz_tdiv_qr (x, rem, dest, fpow[p]);\n      if (SIZ (rem) != 0)\n\tbreak;\n      mpz_init (fpow[p + 1]);\n      mpz_mul (fpow[p + 1], fpow[p], fpow[p]);\n      mpz_set (dest, x);\n    }\n\n  pwr = (1 << p) - 1;\n\n  mpz_clear (fpow[p]);\n\n  /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give a\n     zero remainder.  */\n  while (--p >= 0)\n    {\n      mpz_tdiv_qr (x, rem, dest, fpow[p]);\n      if (SIZ (rem) == 0)\n\t{\n\t  pwr += 1 << p;\n\t  mpz_set (dest, x);\n\t}\n      mpz_clear (fpow[p]);\n    }\n\n  mpz_clear (x);\n  mpz_clear (rem);\n  return pwr;\n}\n"
  },
  {
    "path": "mpz/roinit_n.c",
    "content": "/* mpz_roinit_n -- Initialize mpz with read-only limb array.\n\nCopyright 2013 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of either:\n\n  * the GNU Lesser General Public License as published by the Free\n    Software Foundation; either version 3 of the License, or (at your\n    option) any later version.\n\nor\n\n  * the GNU General Public License as published by the Free Software\n    Foundation; either version 2 of the License, or (at your option) any\n    later version.\n\nor both in parallel, as here.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\nfor more details.\n\nYou should have received copies of the GNU General Public License and the\nGNU Lesser General Public License along with the GNU MP Library.  If not,\nsee https://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpz_srcptr\nmpz_roinit_n (mpz_ptr x, mp_srcptr xp, mp_size_t xs)\n{\n  mp_size_t xn = ABS(xs);\n  MPN_NORMALIZE (xp, xn);\n\n  ALLOC (x) = 0;\n  SIZ (x) = xs < 0 ? -xn : xn;\n  PTR (x) = (mp_ptr) xp;\n  return x;\n}\n"
  },
  {
    "path": "mpz/root.c",
    "content": "/* mpz_root(root, u, nth) --  Set ROOT to floor(U^(1/nth)).\n   Return an indication if the result is exact.\n\nCopyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include <stdio.h>\t\t/* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint\nmpz_root (mpz_ptr root, mpz_srcptr u, mpir_ui nth)\n{\n  mp_ptr rootp, up;\n  mp_size_t us, un, rootn, remn;\n  TMP_DECL;\n\n  us = SIZ(u);\n\n  /* even roots of negatives provoke an exception */\n  if (us < 0 && (nth & 1) == 0)\n    SQRT_OF_NEGATIVE;\n\n  /* root extraction interpreted as c^(1/nth) means a zeroth root should\n     provoke a divide by zero, do this even if c==0 */\n  if (nth == 0)\n    DIVIDE_BY_ZERO;\n\n  if (us == 0)\n    {\n      if (root != NULL)\n\tSIZ(root) = 0;\n      return 1;\t\t\t/* exact result */\n    }\n\n  un = ABS (us);\n  rootn = (un - 1) / nth + 1;\n\n  TMP_MARK;\n\n  /* FIXME: Perhaps disallow root == NULL */\n  if (root != NULL && u != root)\n    rootp = MPZ_REALLOC (root, rootn);\n  else\n    rootp = TMP_ALLOC_LIMBS (rootn);\n\n  up = PTR(u);\n\n  if (nth == 1)\n    {\n      MPN_COPY (rootp, up, un);\n      remn = 0;\n    }\n  else\n    {\n      remn = mpn_rootrem (rootp, NULL, up, un, (mp_limb_t) nth);\n    }\n\n  if (root != NULL)\n    {\n      SIZ(root) = us >= 0 ? rootn : -rootn;\n      if (u == root)\n\tMPN_COPY (up, rootp, rootn);\n    }\n\n  TMP_FREE;\n  return remn == 0;\n}\n"
  },
  {
    "path": "mpz/rootrem.c",
    "content": "/* mpz_rootrem(root, rem, u, nth) --  Set ROOT to floor(U^(1/nth)) and\n   set REM to the remainder.\n\nCopyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.\n\nCopyright 2010 Dr B R Gladman\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include <stdio.h>\t\t/* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_rootrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr u, mpir_ui nth)\n{\n  mp_ptr rootp, up, remp;\n  mp_size_t us, un, rootn, remn;\n  TMP_DECL;\n\n  us = SIZ(u);\n\n  /* even roots of negatives provoke an exception */\n  if (us < 0 && (nth & 1) == 0)\n    SQRT_OF_NEGATIVE;\n\n  /* root extraction interpreted as c^(1/nth) means a zeroth root should\n     provoke a divide by zero, do this even if c==0 */\n  if (nth == 0)\n    DIVIDE_BY_ZERO;\n\n  if (us == 0)\n    {\n      if (root != NULL)\n\tSIZ(root) = 0;\n      SIZ(rem) = 0;\n      return;\n    }\n\n  un = ABS (us);\n  rootn = (un - 1) / nth + 1;\n\n  TMP_MARK;\n\n  /* FIXME: Perhaps disallow root == NULL */\n  if (root != NULL && u != root)\n    rootp = MPZ_REALLOC (root, rootn);\n  else\n    rootp = TMP_ALLOC_LIMBS (rootn);\n\n  if (u != rem)\n    remp = MPZ_REALLOC (rem, un);\n  else\n    remp = TMP_ALLOC_LIMBS (un);\n\n  up = PTR(u);\n\n  if (nth == 1)\n    {\n      MPN_COPY (rootp, up, un);\n      remn = 0;\n    }\n  else\n    {\n      remn = mpn_rootrem (rootp, remp, up, un, (mp_limb_t) nth);\n    }\n\n  if (root != NULL)\n    {\n      SIZ(root) = us >= 0 ? rootn : -rootn;\n      if (u == root)\n\tMPN_COPY (up, rootp, rootn);\n      else if (u == rem)\n\tMPN_COPY (up, remp, remn);\n    }\n\n  SIZ(rem) = us < 0 && remn > 0 ? -remn : remn;\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/rrandomb.c",
    "content": "/* mpz_rrandomb -- Generate a positive random mpz_t of specified bit size, with\n   long runs of consecutive ones and zeros in the binary representation.\n   Meant for testing of other MP routines.\n\nCopyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nstatic void gmp_rrandomb(mp_ptr rp, gmp_randstate_t rstate, mp_bitcnt_t nbits);\n\nvoid\nmpz_rrandomb (mpz_ptr x, gmp_randstate_t rstate, mp_bitcnt_t nbits)\n{\n  mp_size_t nl;\n\n  nl = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;\n  if (nbits != 0)\n    {\n      MPZ_REALLOC (x, nl);\n      gmp_rrandomb (PTR(x), rstate, nbits);\n    }\n\n  SIZ(x) = nl;\n}\n\n/* Ask _gmp_rand for 32 bits per call unless that's more than a limb can hold.\n   Thus, we get the same random number sequence in the common cases.\n   FIXME: We should always generate the same random number sequence!  */\n#if GMP_NUMB_BITS < 32\n#define BITS_PER_RANDCALL GMP_NUMB_BITS\n#else\n#define BITS_PER_RANDCALL 32\n#endif\n\nstatic void\ngmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, mp_bitcnt_t nbits)\n{\n  mp_bitcnt_t bi;\n  mp_limb_t ranm;\t\t/* buffer for random bits */\n  unsigned cap_chunksize, chunksize;\n  mp_size_t i;\n\n  /* Set entire result to 111..1  */\n  i = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS - 1;\n  rp[i] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - (nbits % GMP_NUMB_BITS)) % GMP_NUMB_BITS;\n  for (i = i - 1; i >= 0; i--)\n    rp[i] = GMP_NUMB_MAX;\n\n  _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);\n  cap_chunksize = nbits / (ranm % 4 + 1);\n  cap_chunksize += cap_chunksize == 0; /* make it at least 1 */\n\n  bi = nbits;\n\n  for (;;)\n    {\n      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);\n      chunksize = 1 + ranm % cap_chunksize;\n      bi = (bi < chunksize) ? 0 : bi - chunksize;\n\n      if (bi == 0)\n\tbreak;\t\t\t/* low chunk is ...1 */\n\n      rp[bi / GMP_NUMB_BITS] ^= CNST_LIMB (1) << bi % GMP_NUMB_BITS;\n\n      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);\n      chunksize = 1 + ranm % cap_chunksize;\n      bi = (bi < chunksize) ? 0 : bi - chunksize;\n\n      mpn_incr_u (rp + bi / GMP_NUMB_BITS, CNST_LIMB (1) << bi % GMP_NUMB_BITS);\n\n      if (bi == 0)\n\tbreak;\t\t\t/* low chunk is ...0 */\n    }\n}\n"
  },
  {
    "path": "mpz/scan0.c",
    "content": "/* mpz_scan0 -- search for a 0 bit.\n\nCopyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* mpn_scan0 can't be used for the u>0 search since there might not be a 0\n   bit before the end of the data.  mpn_scan1 could be used for the inverted\n   search under u<0, but usually the search won't go very far so it seems\n   reasonable to inline that code.  */\n\nmp_bitcnt_t\nmpz_scan0 (mpz_srcptr u, mp_bitcnt_t starting_bit)\n{\n  mp_srcptr      u_ptr = PTR(u);\n  mp_size_t      size = SIZ(u);\n  mp_size_t      abs_size = ABS(size);\n  mp_srcptr      u_end = u_ptr + abs_size;\n  mp_size_t      starting_limb = starting_bit / GMP_NUMB_BITS;\n  mp_srcptr      p = u_ptr + starting_limb;\n  mp_limb_t      limb;\n  int            cnt;\n\n  /* When past end, there's an immediate 0 bit for u>=0, or no 0 bits for\n     u<0.  Notice this test picks up all cases of u==0 too. */\n  if (starting_limb >= abs_size)\n    return (size >= 0 ? starting_bit : __GMP_BITCNT_MAX);\n\n  limb = *p;\n\n  if (size >= 0)\n    {\n      /* Mask to 1 all bits before starting_bit, thus ignoring them. */\n      limb |= (CNST_LIMB(1) << (starting_bit % GMP_NUMB_BITS)) - 1;\n\n      /* Search for a limb which isn't all ones.  If the end is reached then\n         the zero bit immediately past the end is returned.  */\n      while (limb == GMP_NUMB_MAX)\n        {\n          p++;\n          if (p == u_end)\n            return (mp_bitcnt_t) abs_size * GMP_NUMB_BITS;\n          limb = *p;\n        }\n\n      /* Now seek low 1 bit. */\n      limb = ~limb;\n    }\n  else\n    {\n      mp_srcptr  q;\n\n      /* If there's a non-zero limb before ours then we're in the ones\n         complement region.  Search from *(p-1) downwards since that might\n         give better cache locality, and since a non-zero in the middle of a\n         number is perhaps a touch more likely than at the end.  */\n      q = p;\n      while (q != u_ptr)\n        {\n          q--;\n          if (*q != 0)\n            goto inverted;\n        }\n\n      /* Adjust so ~limb implied by searching for 1 bit below becomes -limb.\n         If limb==0 here then this isn't the beginning of twos complement\n         inversion, but that doesn't matter because limb==0 is a zero bit\n         immediately (-1 is all ones for below).  */\n      limb--;\n\n    inverted:\n      /* Now seeking a 1 bit. */\n\n      /* Mask to 0 all bits before starting_bit, thus ignoring them. */\n      limb &= (MP_LIMB_T_MAX << (starting_bit % GMP_NUMB_BITS));\n\n      if (limb == 0)\n        {\n          /* If the high limb is zero after masking, then no 1 bits past\n             starting_bit.  */\n          p++;\n          if (p == u_end)\n            return __GMP_BITCNT_MAX;\n\n          /* Search further for a non-zero limb.  The high limb is non-zero,\n             if nothing else.  */\n          for (;;)\n            {\n              limb = *p;\n              if (limb != 0)\n                break;\n              p++;\n              ASSERT (p < u_end);\n            }\n        }\n    }\n\n  ASSERT (limb != 0);\n  count_trailing_zeros (cnt, limb);\n  return (mp_bitcnt_t)((p - u_ptr) * GMP_NUMB_BITS + cnt);\n}\n"
  },
  {
    "path": "mpz/scan1.c",
    "content": "/* mpz_scan1 -- search for a 1 bit.\n\nCopyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* mpn_scan0 can't be used for the inverted u<0 search since there might not\n   be a 0 bit before the end of the data.  mpn_scan1 could be used under u>0\n   (except when in the high limb), but usually the search won't go very far\n   so it seems reasonable to inline that code.  */\n\nmp_bitcnt_t\nmpz_scan1 (mpz_srcptr u, mp_bitcnt_t starting_bit)\n{\n  mp_srcptr      u_ptr = PTR(u);\n  mp_size_t      size = SIZ(u);\n  mp_size_t      abs_size = ABS(size);\n  mp_srcptr      u_end = u_ptr + abs_size;\n  mp_size_t      starting_limb = starting_bit / GMP_NUMB_BITS;\n  mp_srcptr      p = u_ptr + starting_limb;\n  mp_limb_t      limb;\n  int            cnt;\n\n  /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit for u<0.\n     Notice this test picks up any u==0 too. */\n  if (starting_limb >= abs_size)\n    return (size >= 0 ? __GMP_BITCNT_MAX : starting_bit);\n\n  limb = *p;\n\n  if (size >= 0)\n    {\n      /* Mask to 0 all bits before starting_bit, thus ignoring them. */\n      limb &= (MP_LIMB_T_MAX << (starting_bit % GMP_NUMB_BITS));\n\n      if (limb == 0)\n        {\n          /* If it's the high limb which is zero after masking, then there's\n             no 1 bits after starting_bit.  */\n          p++;\n          if (p == u_end)\n            return __GMP_BITCNT_MAX;\n\n          /* Otherwise search further for a non-zero limb.  The high limb is\n             non-zero, if nothing else.  */\n          for (;;)\n            {\n              limb = *p;\n              if (limb != 0)\n                break;\n              p++;\n              ASSERT (p < u_end);\n            }\n        }\n    }\n  else\n    {\n      mp_srcptr  q;\n\n      /* If there's a non-zero limb before ours then we're in the ones\n         complement region.  Search from *(p-1) downwards since that might\n         give better cache locality, and since a non-zero in the middle of a\n         number is perhaps a touch more likely than at the end.  */\n      q = p;\n      while (q != u_ptr)\n        {\n          q--;\n          if (*q != 0)\n            goto inverted;\n        }\n\n      if (limb == 0)\n        {\n          /* Skip zero limbs, to find the start of twos complement.  The\n             high limb is non-zero, if nothing else.  This search is\n             necessary so the -limb is applied at the right spot. */\n          do\n            {\n              p++;\n              ASSERT (p < u_end);\n              limb = *p;\n            }\n          while (limb == 0);\n\n          /* Apply twos complement, and look for a 1 bit in that.  Since\n             limb!=0 here, also have (-limb)!=0 so there's certainly a 1\n             bit.  */\n          limb = -limb;\n          goto got_limb;\n        }\n\n      /* Adjust so ~limb implied by searching for 0 bit becomes -limb.  */\n      limb--;\n\n    inverted:\n      /* Now seeking a 0 bit. */\n\n      /* Mask to 1 all bits before starting_bit, thus ignoring them. */\n      limb |= (CNST_LIMB(1) << (starting_bit % GMP_NUMB_BITS)) - 1;\n\n      /* Search for a limb which is not all ones.  If the end is reached\n         then the zero immediately past the end is the result.  */\n      while (limb == GMP_NUMB_MAX)\n        {\n          p++;\n          if (p == u_end)\n            return (mp_bitcnt_t)abs_size * GMP_NUMB_BITS;\n          limb = *p;\n        }\n\n      /* Now seeking low 1 bit. */\n      limb = ~limb;\n    }\n\n got_limb:\n  ASSERT (limb != 0);\n  count_trailing_zeros (cnt, limb);\n  return (mp_bitcnt_t)((p - u_ptr) * GMP_NUMB_BITS + cnt);\n}\n"
  },
  {
    "path": "mpz/set.c",
    "content": "/* mpz_set (dest_integer, src_integer) -- Assign DEST_INTEGER from SRC_INTEGER.\n\nCopyright 1991, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n#define FUNCTION   mpz_set\n#define ARGUMENTS  mpz_ptr w, mpz_srcptr u\n\nvoid\nFUNCTION (ARGUMENTS)\n{\n  mp_ptr wp, up;\n  mp_size_t usize, size;\n\n  usize = u->_mp_size;\n  size = ABS (usize);\n\n  if (w->_mp_alloc < size)\n    _mpz_realloc (w, size);\n\n  wp = w->_mp_d;\n  up = u->_mp_d;\n\n  MPN_COPY (wp, up, size);\n  w->_mp_size = usize;\n}\n"
  },
  {
    "path": "mpz/set_d.c",
    "content": "/* mpz_set_d(integer, val) -- Assign INTEGER with a double value VAL.\n\nCopyright 1995, 1996, 2000, 2001, 2002, 2003, 2006 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_FLOAT_H\n#include <float.h>  /* for DBL_MAX */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* We used to have a special case for d < MP_BASE_AS_DOUBLE, just casting\n   double -> limb.  Unfortunately gcc 3.3 on powerpc970-apple-darwin6.8.5\n   got this wrong.  (It assumed __fixunsdfdi returned its result in a single\n   64-bit register, where instead that function followed the calling\n   conventions and gave the result in two parts r3 and r4.)  Hence the use\n   of __gmp_extract_double in all cases.  */\n\nvoid\nmpz_set_d (mpz_ptr r, double d)\n{\n  int negative;\n  mp_limb_t tp[LIMBS_PER_DOUBLE];\n  mp_ptr rp;\n  mp_size_t rn;\n\n  DOUBLE_NAN_INF_ACTION (d,\n                         __gmp_invalid_operation (),\n                         __gmp_invalid_operation ());\n\n  negative = d < 0;\n  d = ABS (d);\n\n  rn = __gmp_extract_double (tp, d);\n\n  if (ALLOC(r) < rn)\n    _mpz_realloc (r, rn);\n\n  if (rn <= 0)\n    rn = 0;\n\n  rp = PTR (r);\n\n  switch (rn)\n    {\n    default:\n      MPN_ZERO (rp, rn - LIMBS_PER_DOUBLE);\n      rp += rn - LIMBS_PER_DOUBLE;\n      /* fall through */\n#if LIMBS_PER_DOUBLE == 2\n    case 2:\n      rp[1] = tp[1], rp[0] = tp[0];\n      break;\n    case 1:\n      rp[0] = tp[1];\n      break;\n#endif\n#if LIMBS_PER_DOUBLE == 3\n    case 3:\n      rp[2] = tp[2], rp[1] = tp[1], rp[0] = tp[0];\n      break;\n    case 2:\n      rp[1] = tp[2], rp[0] = tp[1];\n      break;\n    case 1:\n      rp[0] = tp[2];\n      break;\n#endif\n#if LIMBS_PER_DOUBLE == 4\n    case 4:\n      rp[3] = tp[3], rp[2] = tp[2], rp[1] = tp[1], rp[0] = tp[0];\n      break;\n    case 3:\n      rp[2] = tp[3], rp[1] = tp[2], rp[0] = tp[1];\n      break;\n    case 2:\n      rp[1] = tp[3], rp[0] = tp[2];\n      break;\n    case 1:\n      rp[0] = tp[3];\n      break;\n#endif\n    case 0:\n      break;\n    }\n\n  SIZ(r) = negative ? -rn : rn;\n}\n"
  },
  {
    "path": "mpz/set_f.c",
    "content": "/* mpz_set_f (dest_integer, src_float) -- Assign DEST_INTEGER from SRC_FLOAT.\n\nCopyright 1996, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nvoid\nmpz_set_f (mpz_ptr w, mpf_srcptr u)\n{\n  mp_ptr    wp, up;\n  mp_size_t size;\n  mp_exp_t  exp;\n\n  /* abs(u)<1 truncates to zero */\n  exp = EXP (u);\n  if (exp <= 0)\n    {\n      SIZ(w) = 0;\n      return;\n    }\n\n  MPZ_REALLOC (w, exp);\n  wp = PTR(w);\n  up = PTR(u);\n\n  size = SIZ (u);\n  SIZ(w) = (size >= 0 ? exp : -exp);\n  size = ABS (size);\n\n  if (exp > size)\n    {\n      /* pad with low zeros to get a total \"exp\" many limbs */\n      mp_size_t  zeros = exp - size;\n      MPN_ZERO (wp, zeros);\n      wp += zeros;\n    }\n  else\n    {\n      /* exp<=size, trucate to the high \"exp\" many limbs */\n      up += (size - exp);\n      size = exp;\n    }\n\n  MPN_COPY (wp, up, size);\n}\n"
  },
  {
    "path": "mpz/set_q.c",
    "content": "/* mpz_set_q (dest_integer, src_rational) -- Assign DEST_INTEGER from\n   SRC_rational.\n\nCopyright 1996, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpz_set_q 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpz/set_si.c",
    "content": "/* mpz_set_si(dest,val) -- Assign DEST with a small value VAL.\n\nCopyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_set_si (mpz_ptr dest, mpir_si val)\n{\n  mp_size_t size;\n  mp_limb_t vl;\n\n  vl = (mp_limb_t) (mpir_ui) (val >= 0 ? val : -val);\n\n  dest->_mp_d[0] = vl & GMP_NUMB_MASK;\n  size = vl != 0;\n\n#if GMP_NAIL_BITS != 0\n  if (vl > GMP_NUMB_MAX)\n    {\n      MPZ_REALLOC (dest, 2);\n      dest->_mp_d[1] = vl >> GMP_NUMB_BITS;\n      size = 2;\n    }\n#endif\n\n  dest->_mp_size = val >= 0 ? size : -size;\n}\n"
  },
  {
    "path": "mpz/set_str.c",
    "content": "/* mpz_set_str(mp_dest, string, base) -- Convert the \\0-terminated\n   string STRING in base BASE to multiple precision integer in\n   MP_DEST.  Allow white space in the string.  If BASE == 0 determine\n   the base in the C standard way, i.e.  0xhh...h means base 16,\n   0oo...o means base 8, otherwise assume base 10.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2005\nFree Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <string.h>\n#include <ctype.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nextern const unsigned char __gmp_digit_value_tab[];\n#define digit_value_tab __gmp_digit_value_tab\n\nint\nmpz_set_str (mpz_ptr x, const char *str, int base)\n{\n  size_t str_size;\n  char *s, *begs;\n  size_t i;\n  mp_size_t xsize;\n  int c;\n  int negative;\n  const unsigned char *digit_value;\n  TMP_DECL;\n\n  digit_value = digit_value_tab;\n  if (base > 36)\n    {\n      /* For bases > 36, use the collating sequence\n\t 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */\n      digit_value += 224;\n      if (base > 62)\n\treturn -1;\t\t/* too large base */\n    }\n\n  /* Skip whitespace.  */\n  do\n    c = (unsigned char) *str++;\n  while (isspace (c));\n\n  negative = 0;\n  if (c == '-')\n    {\n      negative = 1;\n      c = (unsigned char) *str++;\n    }\n\n  if (digit_value[c] >= (base == 0 ? 10 : base))\n    return -1;\t\t\t/* error if no valid digits */\n\n  /* If BASE is 0, try to find out the base by looking at the initial\n     characters.  */\n  if (base == 0)\n    {\n      base = 10;\n      if (c == '0')\n\t{\n\t  base = 8;\n\t  c = (unsigned char) *str++;\n\t  if (c == 'x' || c == 'X')\n\t    {\n\t      base = 16;\n\t      c = (unsigned char) *str++;\n\t    }\n\t  else if (c == 'b' || c == 'B')\n\t    {\n\t      base = 2;\n\t      c = (unsigned char) *str++;\n\t    }\n\t}\n    }\n\n  /* Skip leading zeros and white space.  */\n  while (c == '0' || isspace (c))\n    c = (unsigned char) *str++;\n  /* Make sure the string does not become empty, mpn_set_str would fail.  */\n  if (c == 0)\n    {\n      x->_mp_size = 0;\n      return 0;\n    }\n\n  TMP_MARK;\n  str_size = strlen (str - 1);\n  s = begs = (char *) TMP_ALLOC (str_size + 1);\n\n  /* Remove spaces from the string and convert the result from ASCII to a\n     byte array.  */\n  for (i = 0; i < str_size; i++)\n    {\n      if (!isspace (c))\n\t{\n\t  int dig = digit_value[c];\n\t  if (dig >= base)\n\t    {\n\t      TMP_FREE;\n\t      return -1;\n\t    }\n\t  *s++ = dig;\n\t}\n      c = (unsigned char) *str++;\n    }\n\n  str_size = s - begs;\n\n  xsize = (((mp_size_t) (str_size / __mp_bases[base].chars_per_bit_exactly))\n\t   / GMP_NUMB_BITS + 2);\n  MPZ_REALLOC (x, xsize);\n\n  /* Convert the byte array in base BASE to our bignum format.  */\n  xsize = mpn_set_str (x->_mp_d, (unsigned char *) begs, str_size, base);\n  x->_mp_size = negative ? -xsize : xsize;\n\n  TMP_FREE;\n  return 0;\n}\n"
  },
  {
    "path": "mpz/set_sx.c",
    "content": "/* \nmpz_set_sx(z, v) -- Set z to the intmax_t value v\n\nCopyright 2011 Brian Gladman\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n#ifdef HAVE_STDINT_H\n#include <stdint.h>\n#endif\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#ifdef HAVE_STDINT_H\n\n#define NLIMBS ((8 * SIZEOF_UINTMAX_T + GMP_NUMB_BITS  - 1) / GMP_NUMB_BITS)\n\nvoid\nmpz_set_sx (mpz_ptr z, intmax_t v)\n{   uintmax_t i, uv = (v < 0 ? -v : v);\n\n#if NLIMBS == 1\n    z->_mp_d[0] = (mp_limb_t)uv;\n    z->_mp_size = v < 0 ? -NLIMBS : v ? NLIMBS : 0;\n#else\n    if(NLIMBS > z->_mp_alloc)\n        MPZ_REALLOC(z, NLIMBS);\n    for( i = 0 ; i < NLIMBS && uv ; ++i )\n    {\n        z->_mp_d[i] = uv & GMP_NUMB_MASK;\n        uv >>= GMP_NUMB_BITS;\n    }\n    z->_mp_size = v < 0 ? -i : v ? i : 0;\n#endif\n}\n\n#endif\n"
  },
  {
    "path": "mpz/set_ui.c",
    "content": "/* mpz_set_ui(integer, val) -- Assign INTEGER with a small value VAL.\n\nCopyright 1991, 1993, 1994, 1995, 2001, 2002, 2004 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_set_ui (mpz_ptr dest, mpir_ui val)\n{\n  mp_size_t size;\n\n  dest->_mp_d[0] = val & GMP_NUMB_MASK;\n  size = val != 0;\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (val > GMP_NUMB_MAX)\n    {\n      MPZ_REALLOC (dest, 2);\n      dest->_mp_d[1] = val >> GMP_NUMB_BITS;\n      size = 2;\n    }\n#endif\n\n  dest->_mp_size = size;\n}\n"
  },
  {
    "path": "mpz/set_ux.c",
    "content": "/* \nmpz_set_ux(z, v) -- Set z to the uintmax_t value v\n\nCopyright 2011 Brian Gladman\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n#ifdef HAVE_STDINT_H\n#include <stdint.h>\n#endif\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#ifdef HAVE_STDINT_H\n\n#define NLIMBS ((8 * SIZEOF_UINTMAX_T + GMP_NUMB_BITS  - 1) / GMP_NUMB_BITS)\n\nvoid\nmpz_set_ux (mpz_ptr z, uintmax_t v)\n{   uintmax_t i, uv;\n\n#if NLIMBS == 1\n    z->_mp_d[0] = (mp_limb_t)v;\n    z->_mp_size = (v ? NLIMBS : 0);\n#else\n    if(NLIMBS > z->_mp_alloc)\n        MPZ_REALLOC(z, NLIMBS);\n    for( i = 0, uv = v ; i < NLIMBS && uv ; ++i )\n    {\n        z->_mp_d[i] = uv & GMP_NUMB_MASK;\n        uv >>= GMP_NUMB_BITS;\n    }\n    z->_mp_size = (v ? i : 0);\n#endif\n}\n\n#endif\n"
  },
  {
    "path": "mpz/setbit.c",
    "content": "/* mpz_setbit -- set a specified bit.\n\nCopyright 1991, 1993, 1994, 1995, 1997, 1999, 2001, 2002 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_setbit (mpz_ptr d, mp_bitcnt_t bit_index)\n{\n  mp_size_t dsize = d->_mp_size;\n  mp_ptr dp = d->_mp_d;\n  mp_size_t limb_index;\n\n  limb_index = bit_index / GMP_NUMB_BITS;\n  if (dsize >= 0)\n    {\n      if (limb_index < dsize)\n\t{\n\t  dp[limb_index] |= (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);\n\t  d->_mp_size = dsize;\n\t}\n      else\n\t{\n\t  /* Ugh.  The bit should be set outside of the end of the\n\t     number.  We have to increase the size of the number.  */\n\t  if (UNLIKELY (d->_mp_alloc < limb_index + 1))\n            dp = _mpz_realloc (d, limb_index + 1);\n\t  MPN_ZERO (dp + dsize, limb_index - dsize);\n\t  dp[limb_index] = (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);\n\t  d->_mp_size = limb_index + 1;\n\t}\n    }\n  else\n    {\n      mp_size_t zero_bound;\n\n      /* Simulate two's complement arithmetic, i.e. simulate\n\t 1. Set OP = ~(OP - 1) [with infinitely many leading ones].\n\t 2. Set the bit.\n\t 3. Set OP = ~OP + 1.  */\n\n      dsize = -dsize;\n\n      /* No upper bound on this loop, we're sure there's a non-zero limb\n\t sooner ot later.  */\n      for (zero_bound = 0; ; zero_bound++)\n\tif (dp[zero_bound] != 0)\n\t  break;\n\n      if (limb_index > zero_bound)\n\t{\n\t  if (limb_index < dsize)\n            {\n              mp_limb_t  dlimb;\n              dlimb = dp[limb_index];\n              dlimb &= ~((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));\n              dp[limb_index] = dlimb;\n\n              if (UNLIKELY (dlimb == 0 && limb_index == dsize-1))\n                {\n                  /* high limb became zero, must normalize */\n                  do {\n                    dsize--;\n                  } while (dsize > 0 && dp[dsize-1] == 0);\n                  d->_mp_size = -dsize;\n                }\n            }\n\t}\n      else if (limb_index == zero_bound)\n\t{\n\t  dp[limb_index] = ((dp[limb_index] - 1)\n\t\t\t    & ~((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS))) + 1;\n\t  if (dp[limb_index] == 0)\n\t    {\n\t      mp_size_t i;\n\t      for (i = limb_index + 1; i < dsize; i++)\n\t\t{\n\t\t  dp[i] += 1;\n\t\t  if (dp[i] != 0)\n\t\t    goto fin;\n\t\t}\n\t      /* We got carry all way out beyond the end of D.  Increase\n\t\t its size (and allocation if necessary).  */\n\t      dsize++;\n\t      if (UNLIKELY (d->_mp_alloc < dsize))\n                dp = _mpz_realloc (d, dsize);\n\t      dp[i] = 1;\n\t      d->_mp_size = -dsize;\n\t    fin:;\n\t    }\n\t}\n      else\n\t{\n\t  mpn_decr_u (dp + limb_index,\n\t\t     (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));\n\t  dsize -= dp[dsize - 1] == 0;\n\t  d->_mp_size = -dsize;\n\t}\n    }\n}\n"
  },
  {
    "path": "mpz/size.c",
    "content": "/* mpz_size(x) -- return the number of lims currently used by the\n   value of integer X.\n\nCopyright 1991, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define __GMP_FORCE_mpz_size 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n"
  },
  {
    "path": "mpz/sizeinbase.c",
    "content": "/* mpz_sizeinbase(x, base) -- return an approximation to the number of\n   character the integer X would have printed in base BASE.  The\n   approximation is never too small.\n\nCopyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nsize_t\nmpz_sizeinbase (mpz_srcptr x, int base)\n{\n  size_t  result;\n  MPN_SIZEINBASE (result, PTR(x), ABSIZ(x), base);\n  return result;\n}\n"
  },
  {
    "path": "mpz/sqrt.c",
    "content": "/* mpz_sqrt(root, u) --  Set ROOT to floor(sqrt(U)).\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h> /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_sqrt (mpz_ptr root, mpz_srcptr op)\n{\n  mp_size_t op_size, root_size;\n  mp_ptr root_ptr, op_ptr;\n  mp_ptr free_me = NULL;\n  mp_size_t free_me_size;\n  TMP_DECL;\n\n  TMP_MARK;\n  op_size = op->_mp_size;\n  if (op_size <= 0)\n    {\n      if (op_size < 0)\n        SQRT_OF_NEGATIVE;\n      SIZ(root) = 0;\n      return;\n    }\n\n  /* The size of the root is accurate after this simple calculation.  */\n  root_size = (op_size + 1) / 2;\n\n  root_ptr = root->_mp_d;\n  op_ptr = op->_mp_d;\n\n  if (root->_mp_alloc < root_size)\n    {\n      if (root_ptr == op_ptr)\n\t{\n\t  free_me = root_ptr;\n\t  free_me_size = root->_mp_alloc;\n\t}\n      else\n\t(*__gmp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB);\n\n      root->_mp_alloc = root_size;\n      root_ptr = (mp_ptr) (*__gmp_allocate_func) (root_size * BYTES_PER_MP_LIMB);\n      root->_mp_d = root_ptr;\n    }\n  else\n    {\n      /* Make OP not overlap with ROOT.  */\n      if (root_ptr == op_ptr)\n\t{\n\t  /* ROOT and OP are identical.  Allocate temporary space for OP.  */\n\t  op_ptr = (mp_ptr) TMP_ALLOC (op_size * BYTES_PER_MP_LIMB);\n\t  /* Copy to the temporary space.  Hack: Avoid temporary variable\n\t     by using ROOT_PTR.  */\n\t  MPN_COPY (op_ptr, root_ptr, op_size);\n\t}\n    }\n\n  mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size);\n\n  root->_mp_size = root_size;\n\n  if (free_me != NULL)\n    (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/sqrtrem.c",
    "content": "/* mpz_sqrtrem(root,rem,x) -- Set ROOT to floor(sqrt(X)) and REM\n   to the remainder, i.e. X - ROOT**2.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h> /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_sqrtrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr op)\n{\n  mp_size_t op_size, root_size, rem_size;\n  mp_ptr root_ptr, op_ptr;\n  mp_ptr free_me = NULL;\n  mp_size_t free_me_size;\n  TMP_DECL;\n\n  TMP_MARK;\n  op_size = op->_mp_size;\n  if (op_size <= 0)\n    {\n      if (op_size < 0)\n        SQRT_OF_NEGATIVE;\n      SIZ(root) = 0;\n      SIZ(rem) = 0;\n      return;\n    }\n\n  if (rem->_mp_alloc < op_size)\n    _mpz_realloc (rem, op_size);\n\n  /* The size of the root is accurate after this simple calculation.  */\n  root_size = (op_size + 1) / 2;\n\n  root_ptr = root->_mp_d;\n  op_ptr = op->_mp_d;\n\n  if (root->_mp_alloc < root_size)\n    {\n      if (root_ptr == op_ptr)\n\t{\n\t  free_me = root_ptr;\n\t  free_me_size = root->_mp_alloc;\n\t}\n      else\n\t(*__gmp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB);\n\n      root->_mp_alloc = root_size;\n      root_ptr = (mp_ptr) (*__gmp_allocate_func) (root_size * BYTES_PER_MP_LIMB);\n      root->_mp_d = root_ptr;\n    }\n  else\n    {\n      /* Make OP not overlap with ROOT.  */\n      if (root_ptr == op_ptr)\n\t{\n\t  /* ROOT and OP are identical.  Allocate temporary space for OP.  */\n\t  op_ptr = (mp_ptr) TMP_ALLOC (op_size * BYTES_PER_MP_LIMB);\n\t  /* Copy to the temporary space.  Hack: Avoid temporary variable\n\t     by using ROOT_PTR.  */\n\t  MPN_COPY (op_ptr, root_ptr, op_size);\n\t}\n    }\n\n  rem_size = mpn_sqrtrem (root_ptr, rem->_mp_d, op_ptr, op_size);\n\n  root->_mp_size = root_size;\n\n  /* Write remainder size last, to enable us to define this function to\n     give only the square root remainder, if the user calls if with\n     ROOT == REM.  */\n  rem->_mp_size = rem_size;\n\n  if (free_me != NULL)\n    (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/sub.c",
    "content": "/* mpz_sub -- subtract integers.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define OPERATION_sub\n#include \"aors.h\"\n"
  },
  {
    "path": "mpz/sub_ui.c",
    "content": "/* mpz_sub_ui -- Subtract an mpz_t and an unsigned one-word integer.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#define OPERATION_sub_ui\n#include \"aors_ui.h\"\n"
  },
  {
    "path": "mpz/swap.c",
    "content": "/* mpz_swap (dest_integer, src_integer) -- Swap U and V.\n\nCopyright 1997, 1998, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_swap (mpz_ptr u, mpz_ptr v)\n{\n  mp_ptr up, vp;\n  mp_size_t usize, vsize;\n  mp_size_t ualloc, valloc;\n\n  ualloc = u->_mp_alloc;\n  valloc = v->_mp_alloc;\n  v->_mp_alloc = ualloc;\n  u->_mp_alloc = valloc;\n\n  usize = u->_mp_size;\n  vsize = v->_mp_size;\n  v->_mp_size = usize;\n  u->_mp_size = vsize;\n\n  up = u->_mp_d;\n  vp = v->_mp_d;\n  v->_mp_d = up;\n  u->_mp_d = vp;\n}\n"
  },
  {
    "path": "mpz/tdiv_q.c",
    "content": "/* mpz_tdiv_q -- divide two integers and produce a quotient.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation,\nInc.\n\nCopyright 2009, William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nvoid\nmpz_tdiv_q (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)\n{\n  mp_size_t ql;\n  mp_size_t ns, ds, nl, dl;\n  mp_ptr np, dp, qp;\n  TMP_DECL;\n\n  ns = SIZ (num);\n  ds = SIZ (den);\n  nl = ABS (ns);\n  dl = ABS (ds);\n  ql = nl - dl + 1;\n\n  if (dl == 0)\n    DIVIDE_BY_ZERO;\n\n  if (ql <= 0)\n    {\n      SIZ (quot) = 0;\n      return;\n    }\n\n  MPZ_REALLOC (quot, ql);\n\n  TMP_MARK;\n  qp = PTR (quot);\n  np = PTR (num);\n  dp = PTR (den);\n\n  /* FIXME: We should think about how to handle the temporary allocation.\n     Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to\n     allocate temp space.  */\n\n  /* Copy denominator to temporary space if it overlaps with the quotient.  */\n  if (dp == qp)\n    {\n      mp_ptr tp;\n      tp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB);\n      MPN_COPY (tp, dp, dl);\n      dp = tp;\n    }\n  /* Copy numerator to temporary space if it overlaps with the quotient.  */\n  if (np == qp)\n    {\n      mp_ptr tp;\n      tp = (mp_ptr) TMP_ALLOC (nl * BYTES_PER_MP_LIMB);\n      MPN_COPY (tp, np, nl);\n      np = tp;\n    }\n\n  mpn_tdiv_q (qp, np, nl, dp, dl);\n\n  ql -=  qp[ql - 1] == 0;\n\n  SIZ (quot) = (ns ^ ds) >= 0 ? ql : -ql;\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/tdiv_q_2exp.c",
    "content": "/* mpz_tdiv_q_2exp -- Divide an integer by 2**CNT.  Round the quotient\n   towards -infinity.\n\nCopyright 1991, 1993, 1994, 1996, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_tdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)\n{\n  mp_size_t usize, wsize;\n  mp_size_t limb_cnt;\n\n  usize = u->_mp_size;\n  limb_cnt = cnt / GMP_NUMB_BITS;\n  wsize = ABS (usize) - limb_cnt;\n  if (wsize <= 0)\n    w->_mp_size = 0;\n  else\n    {\n      mp_ptr wp;\n      mp_srcptr up;\n\n      if (w->_mp_alloc < wsize)\n\t_mpz_realloc (w, wsize);\n\n      wp = w->_mp_d;\n      up = u->_mp_d;\n\n      cnt %= GMP_NUMB_BITS;\n      if (cnt != 0)\n\t{\n\t  mpn_rshift (wp, up + limb_cnt, wsize, cnt);\n\t  wsize -= wp[wsize - 1] == 0;\n\t}\n      else\n\t{\n\t  MPN_COPY_INCR (wp, up + limb_cnt, wsize);\n\t}\n\n      w->_mp_size = usize >= 0 ? wsize : -wsize;\n    }\n}\n"
  },
  {
    "path": "mpz/tdiv_q_ui.c",
    "content": "/* mpz_tdiv_q_ui(quot, dividend, divisor_limb)\n   -- Divide DIVIDEND by DIVISOR_LIMB and store the result in QUOT.\n\nCopyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpir_ui\nmpz_tdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, mpir_ui divisor)\n{\n  mp_size_t ns, nn, qn;\n  mp_ptr np, qp;\n  mp_limb_t rl;\n\n  if (divisor == 0)\n    DIVIDE_BY_ZERO;\n\n  ns = SIZ(dividend);\n  if (ns == 0)\n    {\n      SIZ(quot) = 0;\n      return 0;\n    }\n\n  nn = ABS(ns);\n  MPZ_REALLOC (quot, nn);\n  qp = PTR(quot);\n  np = PTR(dividend);\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (divisor > GMP_NUMB_MAX)\n    {\n      mp_limb_t dp[2], rp[2];\n\n      if (nn == 1)\t\t/* tdiv_qr requirements; tested above for 0 */\n\t{\n\t  SIZ(quot) = 0;\n\t  rl = np[0];\n\t  return rl;\n\t}\n\n      dp[0] = divisor & GMP_NUMB_MASK;\n      dp[1] = divisor >> GMP_NUMB_BITS;\n      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);\n      rl = rp[0] + (rp[1] << GMP_NUMB_BITS);\n      qn = nn - 2 + 1; qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;\n    }\n  else\n#endif\n    {\n      rl = mpn_divrem_1 (qp, 0, np, nn, (mp_limb_t) divisor);\n      qn = nn - (qp[nn - 1] == 0);\n    }\n\n  SIZ(quot) = ns >= 0 ? qn : -qn;\n  return rl;\n}\n"
  },
  {
    "path": "mpz/tdiv_qr.c",
    "content": "/* mpz_tdiv_qr(quot,rem,dividend,divisor) -- Set QUOT to DIVIDEND/DIVISOR,\n   and REM to DIVIDEND mod DIVISOR.\n\nCopyright 1991, 1993, 1994, 2000, 2001, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nvoid\nmpz_tdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)\n{\n  mp_size_t ql;\n  mp_size_t ns, ds, nl, dl;\n  mp_ptr np, dp, qp, rp;\n  TMP_DECL;\n\n  ns = SIZ (num);\n  ds = SIZ (den);\n  nl = ABS (ns);\n  dl = ABS (ds);\n  ql = nl - dl + 1;\n\n  if (dl == 0)\n    DIVIDE_BY_ZERO;\n\n  MPZ_REALLOC (rem, dl);\n\n  if (ql <= 0)\n    {\n      if (num != rem)\n\t{\n\t  mp_ptr np, rp;\n\t  np = PTR (num);\n\t  rp = PTR (rem);\n\t  MPN_COPY (rp, np, nl);\n\t  SIZ (rem) = SIZ (num);\n\t}\n      /* This needs to follow the assignment to rem, in case the\n\t numerator and quotient are the same.  */\n      SIZ (quot) = 0;\n      return;\n    }\n\n  MPZ_REALLOC (quot, ql);\n\n  TMP_MARK;\n  qp = PTR (quot);\n  rp = PTR (rem);\n  np = PTR (num);\n  dp = PTR (den);\n\n  /* FIXME: We should think about how to handle the temporary allocation.\n     Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to\n     allocate temp space.  */\n\n  /* Copy denominator to temporary space if it overlaps with the quotient\n     or remainder.  */\n  if (dp == rp || dp == qp)\n    {\n      mp_ptr tp;\n      tp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB);\n      MPN_COPY (tp, dp, dl);\n      dp = tp;\n    }\n  /* Copy numerator to temporary space if it overlaps with the quotient or\n     remainder.  */\n  if (np == rp || np == qp)\n    {\n      mp_ptr tp;\n      tp = (mp_ptr) TMP_ALLOC (nl * BYTES_PER_MP_LIMB);\n      MPN_COPY (tp, np, nl);\n      np = tp;\n    }\n\n  mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl);\n\n  ql -=  qp[ql - 1] == 0;\n  MPN_NORMALIZE (rp, dl);\n\n  SIZ (quot) = (ns ^ ds) >= 0 ? ql : -ql;\n  SIZ (rem) = ns >= 0 ? dl : -dl;\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/tdiv_qr_ui.c",
    "content": "/* mpz_tdiv_qr_ui(quot,rem,dividend,short_divisor) --\n   Set QUOT to DIVIDEND / SHORT_DIVISOR\n   and REM to DIVIDEND mod SHORT_DIVISOR.\n\nCopyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpir_ui\nmpz_tdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpir_ui divisor)\n{\n  mp_size_t ns, nn, qn;\n  mp_ptr np, qp;\n  mp_limb_t rl;\n\n  if (divisor == 0)\n    DIVIDE_BY_ZERO;\n\n  ns = SIZ(dividend);\n  if (ns == 0)\n    {\n      SIZ(quot) = 0;\n      SIZ(rem) = 0;\n      return 0;\n    }\n\n  nn = ABS(ns);\n  MPZ_REALLOC (quot, nn);\n  qp = PTR(quot);\n  np = PTR(dividend);\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (divisor > GMP_NUMB_MAX)\n    {\n      mp_limb_t dp[2];\n      mp_ptr rp;\n      mp_size_t rn;\n\n      if (nn == 1)\t\t/* tdiv_qr requirements; tested above for 0 */\n\t{\n\t  SIZ(quot) = 0;\n\t  rl = np[0];\n\t  SIZ(rem) = ns >= 0 ? 1 : -1;\n\t  PTR(rem)[0] = rl;\n\t  return rl;\n\t}\n\n      MPZ_REALLOC (rem, 2);\n      rp = PTR(rem);\n\n      dp[0] = divisor & GMP_NUMB_MASK;\n      dp[1] = divisor >> GMP_NUMB_BITS;\n      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);\n      rl = rp[0] + (rp[1] << GMP_NUMB_BITS);\n      qn = nn - 2 + 1; qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;\n      rn = 2 - (rp[1] == 0);  rn -= (rp[rn - 1] == 0);\n      SIZ(rem) = ns >= 0 ? rn : -rn;\n    }\n  else\n#endif\n    {\n      rl = mpn_divrem_1 (qp, 0, np, nn, (mp_limb_t) divisor);\n      if (rl == 0)\n\tSIZ(rem) = 0;\n      else\n\t{\n\t  /* Store the single-limb remainder.  We don't check if there's space\n\t     for just one limb, since no function ever makes zero space.  */\n\t  SIZ(rem) = ns >= 0 ? 1 : -1;\n\t  PTR(rem)[0] = rl;\n\t}\n      qn = nn - (qp[nn - 1] == 0);\n    }\n\n  SIZ(quot) = ns >= 0 ? qn : -qn;\n  return rl;\n}\n"
  },
  {
    "path": "mpz/tdiv_r.c",
    "content": "/* mpz_tdiv_r(rem, dividend, divisor) -- Set REM to DIVIDEND mod DIVISOR.\n\nCopyright 1991, 1993, 1994, 2000, 2001, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nvoid\nmpz_tdiv_r (mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)\n{\n  mp_size_t ql;\n  mp_size_t ns, ds, nl, dl;\n  mp_ptr np, dp, qp, rp;\n  TMP_DECL;\n\n  ns = SIZ (num);\n  ds = SIZ (den);\n  nl = ABS (ns);\n  dl = ABS (ds);\n  ql = nl - dl + 1;\n\n  if (dl == 0)\n    DIVIDE_BY_ZERO;\n\n  MPZ_REALLOC (rem, dl);\n\n  if (ql <= 0)\n    {\n      if (num != rem)\n\t{\n\t  mp_ptr np, rp;\n\t  np = PTR (num);\n\t  rp = PTR (rem);\n\t  MPN_COPY (rp, np, nl);\n\t  SIZ (rem) = SIZ (num);\n\t}\n      return;\n    }\n\n  TMP_MARK;\n  qp = (mp_ptr) TMP_ALLOC (ql * BYTES_PER_MP_LIMB);\n  rp = PTR (rem);\n  np = PTR (num);\n  dp = PTR (den);\n\n  /* FIXME: We should think about how to handle the temporary allocation.\n     Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to\n     allocate temp space.  */\n\n  /* Copy denominator to temporary space if it overlaps with the remainder.  */\n  if (dp == rp)\n    {\n      mp_ptr tp;\n      tp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB);\n      MPN_COPY (tp, dp, dl);\n      dp = tp;\n    }\n  /* Copy numerator to temporary space if it overlaps with the remainder.  */\n  if (np == rp)\n    {\n      mp_ptr tp;\n      tp = (mp_ptr) TMP_ALLOC (nl * BYTES_PER_MP_LIMB);\n      MPN_COPY (tp, np, nl);\n      np = tp;\n    }\n\n  mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl);\n\n  MPN_NORMALIZE (rp, dl);\n\n  SIZ (rem) = ns >= 0 ? dl : -dl;\n  TMP_FREE;\n}\n"
  },
  {
    "path": "mpz/tdiv_r_2exp.c",
    "content": "/* mpz_tdiv_r_2exp -- Divide a integer by 2**CNT and produce a remainder.\n\nCopyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_tdiv_r_2exp (mpz_ptr res, mpz_srcptr in, mp_bitcnt_t cnt)\n{\n  mp_size_t in_size = ABS (in->_mp_size);\n  mp_size_t res_size;\n  mp_size_t limb_cnt = cnt / GMP_NUMB_BITS;\n  mp_srcptr in_ptr = in->_mp_d;\n\n  if (in_size > limb_cnt)\n    {\n      /* The input operand is (probably) greater than 2**CNT.  */\n      mp_limb_t x;\n\n      x = in_ptr[limb_cnt] & (((mp_limb_t) 1 << cnt % GMP_NUMB_BITS) - 1);\n      if (x != 0)\n\t{\n\t  res_size = limb_cnt + 1;\n\t  if (res->_mp_alloc < res_size)\n\t    _mpz_realloc (res, res_size);\n\n\t  res->_mp_d[limb_cnt] = x;\n\t}\n      else\n\t{\n\t  res_size = limb_cnt;\n\t  MPN_NORMALIZE (in_ptr, res_size);\n\n\t  if (res->_mp_alloc < res_size)\n\t    _mpz_realloc (res, res_size);\n\n\t  limb_cnt = res_size;\n\t}\n    }\n  else\n    {\n      /* The input operand is smaller than 2**CNT.  We perform a no-op,\n\t apart from that we might need to copy IN to RES.  */\n      res_size = in_size;\n      if (res->_mp_alloc < res_size)\n\t_mpz_realloc (res, res_size);\n\n      limb_cnt = res_size;\n    }\n\n  if (res != in)\n    MPN_COPY (res->_mp_d, in->_mp_d, limb_cnt);\n  res->_mp_size = in->_mp_size >= 0 ? res_size : -res_size;\n}\n"
  },
  {
    "path": "mpz/tdiv_r_ui.c",
    "content": "/* mpz_tdiv_r_ui(rem, dividend, divisor_limb)\n   -- Set REM to DIVDEND mod DIVISOR_LIMB.\n\nCopyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nmpir_ui\nmpz_tdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, mpir_ui divisor)\n{\n  mp_size_t ns, nn;\n  mp_ptr np;\n  mp_limb_t rl;\n\n  if (divisor == 0)\n    DIVIDE_BY_ZERO;\n\n  ns = SIZ(dividend);\n  if (ns == 0)\n    {\n      SIZ(rem) = 0;\n      return 0;\n    }\n\n  nn = ABS(ns);\n  np = PTR(dividend);\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (divisor > GMP_NUMB_MAX)\n    {\n      mp_limb_t dp[2];\n      mp_ptr rp, qp;\n      mp_size_t rn;\n      TMP_DECL;\n\n      if (nn == 1)\t\t/* tdiv_qr requirements; tested above for 0 */\n\t{\n\t  rl = np[0];\n\t  SIZ(rem) = ns >= 0 ? 1 : -1;\n\t  PTR(rem)[0] = rl;\n\t  return rl;\n\t}\n\n      MPZ_REALLOC (rem, 2);\n      rp = PTR(rem);\n\n      TMP_MARK;\n      dp[0] = divisor & GMP_NUMB_MASK;\n      dp[1] = divisor >> GMP_NUMB_BITS;\n      qp = TMP_ALLOC_LIMBS (nn - 2 + 1);\n      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);\n      TMP_FREE;\n      rl = rp[0] + (rp[1] << GMP_NUMB_BITS);\n      rn = 2 - (rp[1] == 0);  rn -= (rp[rn - 1] == 0);\n      SIZ(rem) = ns >= 0 ? rn : -rn;\n    }\n  else\n#endif\n    {\n      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);\n      if (rl == 0)\n\tSIZ(rem) = 0;\n      else\n\t{\n\t  /* Store the single-limb remainder.  We don't check if there's space\n\t     for just one limb, since no function ever makes zero space.  */\n\t  SIZ(rem) = ns >= 0 ? 1 : -1;\n\t  PTR(rem)[0] = rl;\n\t}\n    }\n\n  return rl;\n}\n"
  },
  {
    "path": "mpz/tdiv_ui.c",
    "content": "/* mpz_tdiv_ui(dividend, divisor_limb) -- Return DIVDEND mod DIVISOR_LIMB.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 1998, 2001, 2002, 2004, 2005 Free\nSoftware Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\nmpir_ui\nmpz_tdiv_ui (mpz_srcptr dividend, mpir_ui divisor)\n{\n  mp_size_t ns, nn;\n  mp_ptr np;\n  mp_limb_t rl;\n\n  if (divisor == 0)\n    DIVIDE_BY_ZERO;\n\n  ns = SIZ(dividend);\n  if (ns == 0)\n    {\n      return 0;\n    }\n\n  nn = ABS(ns);\n  np = PTR(dividend);\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (divisor > GMP_NUMB_MAX)\n    {\n      mp_limb_t dp[2], rp[2];\n      mp_ptr qp;\n      mp_size_t rn;\n      TMP_DECL;\n\n      if (nn == 1)\t\t/* tdiv_qr requirements; tested above for 0 */\n\t{\n\t  rl = np[0];\n\t  return rl;\n\t}\n\n      TMP_MARK;\n      dp[0] = divisor & GMP_NUMB_MASK;\n      dp[1] = divisor >> GMP_NUMB_BITS;\n      qp = TMP_ALLOC_LIMBS (nn - 2 + 1);\n      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);\n      TMP_FREE;\n      rl = rp[0] + (rp[1] << GMP_NUMB_BITS);\n      rn = 2 - (rp[1] == 0);  rn -= (rp[rn - 1] == 0);\n    }\n  else\n#endif\n    {\n      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);\n    }\n\n  return rl;\n}\n"
  },
  {
    "path": "mpz/trial_division.c",
    "content": "/*\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/*\n   Returns smallest d such that  d|N, start <= d < stop, d != 1. If no such d exists \n   return 0. The name trial_division implies a method, perhaps call it \n   mpz_smallest_divisor.\n   N must have no divisors < start.\n*/\nunsigned long mpz_trial_division(mpz_srcptr N, unsigned long start, unsigned long stop)\n{\n   unsigned long i, dd;\n\n   /* ASSERT N has no divisors < start excluding 1 */\n\n   ASSERT(mpz_cmp_ui(N, 0) != 0);\n   ASSERT(mpz_cmp_ui(N, 1) != 0);\n   ASSERT(mpz_cmp_si(N, -1) != 0);\n\n   if (start <= 2 && 2 < stop && mpz_even_p(N))\n      return 2;\n   \n   if (start <= 3 && 3 < stop && mpz_divisible_ui_p(N, 3))\n      return 3;\n   \n   if (start < 5) \n      start = 5; /* dont be silly */\n   \n   if (start%2 == 0) \n      start+=1;\n   \n   if (start%3 == 0)\n      start+=2;\n\n   ASSERT(start%2 != 0);\n   ASSERT(start%3 != 0);\n\n   dd = 2;\n   if (start%6 == 1)\n      dd = 4;\n\n   for (i = start; i < stop; i += dd, dd = 6 - dd)\n      if (mpz_divisible_ui_p(N, i))\n         return i;\n \n   return 0;\n}\n"
  },
  {
    "path": "mpz/tstbit.c",
    "content": "/* mpz_tstbit -- test a specified bit.\n\nCopyright 2000, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* For negatives the effective twos complement is achieved by negating the\n   limb tested, either with a ones or twos complement.  Twos complement\n   (\"-\") is used if there's only zero limbs below the one being tested.\n   Ones complement (\"~\") is used if there's a non-zero below.  Note that \"-\"\n   is correct even if the limb examined is 0 (and the true beginning of twos\n   complement is further up).\n\n   Testing the limbs below p is unavoidable on negatives, but will usually\n   need to examine only *(p-1).  The search is done from *(p-1) down to\n   *u_ptr, since that might give better cache locality, and because a\n   non-zero limb is perhaps a touch more likely in the middle of a number\n   than at the low end.\n\n   Bits past the end of available data simply follow sign of u.  Notice that\n   the limb_index >= abs_size test covers u=0 too.  */\n\nint\nmpz_tstbit (mpz_srcptr u, mp_bitcnt_t bit_index)\n{\n  mp_srcptr      u_ptr      = PTR(u);\n  mp_size_t      size       = SIZ(u);\n  unsigned       abs_size   = ABS(size);\n  mp_size_t      limb_index = bit_index / GMP_NUMB_BITS;\n  mp_srcptr      p          = u_ptr + limb_index;\n  mp_limb_t      limb;\n\n  if (limb_index >= abs_size)\n    return (size < 0);\n\n  limb = *p;\n  if (size < 0)\n    {\n      limb = -limb;     /* twos complement */\n\n      while (p != u_ptr)\n        {\n          p--;\n          if (*p != 0)\n            {\n              limb--;   /* make it a ones complement instead */\n              break;\n            }\n        }\n    }\n\n  return (limb >> (bit_index % GMP_NUMB_BITS)) & 1;\n}\n"
  },
  {
    "path": "mpz/ui_pow_ui.c",
    "content": "/* mpz_ui_pow_ui -- ulong raised to ulong.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nvoid\nmpz_ui_pow_ui (mpz_ptr r, mpir_ui b, mpir_ui e)\n{\n#if GMP_NAIL_BITS != 0\n  if (b > GMP_NUMB_MAX)\n    {\n      mp_limb_t bb[2];\n      bb[0] = b & GMP_NUMB_MASK;\n      bb[1] = b >> GMP_NUMB_BITS;\n      mpz_n_pow_ui (r, bb, (mp_size_t) 2, e);\n    }\n  else\n#endif\n    {\n#ifdef _LONG_LONG_LIMB\n      /* i386 gcc 2.95.3 doesn't recognise blimb can be eliminated when\n\t mp_limb_t is an unsigned long, so only use a separate blimb when\n\t necessary.  */\n      mp_limb_t  blimb = b;\n      mpz_n_pow_ui (r, &blimb, (mp_size_t) (b != 0), e);\n#else\n      mpz_n_pow_ui (r, &b,     (mp_size_t) (b != 0), e);\n#endif\n    }\n}\n\n"
  },
  {
    "path": "mpz/ui_sub.c",
    "content": "/* mpz_ui_sub -- Subtract an unsigned one-word integer and an mpz_t.\n\nCopyright 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_ui_sub (mpz_ptr w, mpir_ui uval, mpz_srcptr v)\n{\n  mp_ptr vp, wp;\n  mp_size_t vn, wn;\n  mp_limb_t cy;\n\n#if BITS_PER_UI > GMP_NUMB_BITS  /* avoid warnings about shift amount */\n  if (uval > GMP_NUMB_MAX)\n    {\n      mpz_t u;\n      mp_limb_t ul[2];\n      PTR(u) = ul;\n      ul[0] = uval & GMP_NUMB_MASK;\n      ul[1] = uval >> GMP_NUMB_BITS;\n      SIZ(u) = 2;\n      mpz_sub (w, u, v);\n      return;\n    }\n#endif\n\n  vp = PTR(v);\n  vn = SIZ(v);\n\n  wp = PTR(w);\n\n  if (vn > 1)\n    {\n      wp = MPZ_REALLOC (w, vn);\n      vp = PTR(v);\n      mpn_sub_1 (wp, vp, vn, (mp_limb_t) uval);\n      wn = -(vn - (wp[vn - 1] == 0));\n    }\n  else if (vn == 1)\n    {\n      if (uval >= vp[0])\n\t{\n\t  wp[0] = uval - vp[0];\n\t  wn = wp[0] != 0;\n\t}\n      else\n\t{\n\t  wp[0] = vp[0] - uval;\n\t  wn = -1;\n\t}\n    }\n  else if (vn == 0)\n    {\n      wp[0] = uval;\n      wn = uval != 0;\n    }\n  else /* (vn < 0) */\n    {\n      vn = -vn;\n      wp = MPZ_REALLOC (w, vn + 1);\n      vp = PTR(v);\n      cy = mpn_add_1 (wp, vp, vn, (mp_limb_t) uval);\n      wp[vn] = cy;\n      wn = vn + (cy != 0);\n    }\n\n  SIZ(w) = wn;\n}\n"
  },
  {
    "path": "mpz/urandomb.c",
    "content": "/* mpz_urandomb (rop, state, n) -- Generate a uniform pseudorandom\n   integer in the range 0 to 2^N - 1, inclusive, using STATE as the\n   random state previously initialized by a call to gmp_randinit().\n\nCopyright 1999, 2000, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_urandomb (mpz_ptr rop, gmp_randstate_t rstate, mp_bitcnt_t nbits)\n{\n  mp_ptr rp;\n  mp_size_t size;\n\n  size = BITS_TO_LIMBS (nbits);\n  rp = MPZ_REALLOC (rop, size);\n\n  _gmp_rand (rp, rstate, nbits);\n  MPN_NORMALIZE (rp, size);\n  SIZ (rop) = size;\n}\n"
  },
  {
    "path": "mpz/urandomm.c",
    "content": "/* mpz_urandomm (rop, state, n) -- Generate a uniform pseudorandom\n   integer in the range 0 to N-1, using STATE as the random state\n   previously initialized by a call to gmp_randinit().\n\nCopyright 2000, 2002  Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\" /* for count_leading_zeros */\n\nvoid\nmpz_urandomm (mpz_ptr rop, gmp_randstate_t rstate, mpz_srcptr n)\n{\n  mp_ptr rp, np, nlast;\n  mp_size_t nbits, size;\n  int count;\n  int pow2;\n  int cmp;\n  int overlap=0;\n\n  size = ABSIZ (n);\n  if (size == 0)\n    DIVIDE_BY_ZERO;\n\n  nlast = &PTR (n)[size - 1];\n\n  /* Detect whether n is a power of 2.  */\n  pow2 = POW2_P (*nlast);\n  if (pow2 != 0)\n    for (np = PTR (n); np < nlast; np++)\n      if (*np != 0)\n\t{\n\t  pow2 = 0;\t\t/* Mark n as `not a power of two'.  */\n\t  break;\n\t}\n  count_leading_zeros (count, *nlast);\n  nbits = size * GMP_NUMB_BITS - (count - GMP_NAIL_BITS) - pow2;\n  if (nbits == 0)\t\t/* nbits == 0 means that n was == 1.  */\n    {\n      SIZ (rop) = 0;\n      return;\n    }\n\n  np=PTR(n);\n  rp=PTR(rop);\n  if(np==rp)\n    {overlap=1;\n     np=__GMP_ALLOCATE_FUNC_LIMBS(size);\n     MPN_COPY(np,PTR(n),size);\n    }\n  /* Here the allocated size can be one too much if n is a power of\n     (2^GMP_NUMB_BITS) but it's convenient for using mpn_cmp below.  */\n  rp = MPZ_REALLOC (rop, size);\n  /* Clear last limb to prevent the case in which size is one too much.  */\n  rp[size - 1] = 0;\n\n  do\n    {\n      _gmp_rand (rp, rstate, nbits);\n      MPN_CMP (cmp, rp, np, size);\n    }\n  while (cmp >= 0);\n\n  if(overlap)__GMP_FREE_FUNC_LIMBS(np,size);\n  MPN_NORMALIZE (rp, size);\n  SIZ (rop) = size;\n}\n"
  },
  {
    "path": "mpz/xor.c",
    "content": "/* mpz_xor -- Logical xor.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)\n{\n  mp_srcptr op1_ptr, op2_ptr;\n  mp_size_t op1_size, op2_size;\n  mp_ptr res_ptr;\n  mp_size_t res_size, res_alloc;\n  mp_size_t i;\n  TMP_DECL;\n\n  TMP_MARK;\n  op1_size = op1->_mp_size;\n  op2_size = op2->_mp_size;\n\n  op1_ptr = op1->_mp_d;\n  op2_ptr = op2->_mp_d;\n  res_ptr = res->_mp_d;\n\n  if (op1_size >= 0)\n    {\n      if (op2_size >= 0)\n\t{\n\t  if (op1_size >= op2_size)\n\t    {\n\t      if (res->_mp_alloc < op1_size)\n\t\t{\n\t\t  _mpz_realloc (res, op1_size);\n\t\t  op1_ptr = op1->_mp_d;\n\t\t  op2_ptr = op2->_mp_d;\n\t\t  res_ptr = res->_mp_d;\n\t\t}\n\n\t      if (res_ptr != op1_ptr)\n\t\tMPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,\n\t\t\t  op1_size - op2_size);\n\t      for (i = op2_size - 1; i >= 0; i--)\n\t\tres_ptr[i] = op1_ptr[i] ^ op2_ptr[i];\n\t      res_size = op1_size;\n\t    }\n\t  else\n\t    {\n\t      if (res->_mp_alloc < op2_size)\n\t\t{\n\t\t  _mpz_realloc (res, op2_size);\n\t\t  op1_ptr = op1->_mp_d;\n\t\t  op2_ptr = op2->_mp_d;\n\t\t  res_ptr = res->_mp_d;\n\t\t}\n\n\t      if (res_ptr != op2_ptr)\n\t\tMPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,\n\t\t\t  op2_size - op1_size);\n\t      for (i = op1_size - 1; i >= 0; i--)\n\t\tres_ptr[i] = op1_ptr[i] ^ op2_ptr[i];\n\t      res_size = op2_size;\n\t    }\n\n\t  MPN_NORMALIZE (res_ptr, res_size);\n\t  res->_mp_size = res_size;\n\t  return;\n\t}\n      else /* op2_size < 0 */\n\t{\n\t  /* Fall through to the code at the end of the function.  */\n\t}\n    }\n  else\n    {\n      if (op2_size < 0)\n\t{\n\t  mp_ptr opx;\n\n\t  /* Both operands are negative, the result will be positive.\n\t      (-OP1) ^ (-OP2) =\n\t     = ~(OP1 - 1) ^ ~(OP2 - 1) =\n\t     = (OP1 - 1) ^ (OP2 - 1)  */\n\n\t  op1_size = -op1_size;\n\t  op2_size = -op2_size;\n\n\t  /* Possible optimization: Decrease mpn_sub precision,\n\t     as we won't use the entire res of both.  */\n\t  opx = (mp_ptr) TMP_ALLOC (op1_size * BYTES_PER_MP_LIMB);\n\t  mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);\n\t  op1_ptr = opx;\n\n\t  opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);\n\t  mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);\n\t  op2_ptr = opx;\n\n\t  res_alloc = MAX (op1_size, op2_size);\n\t  if (res->_mp_alloc < res_alloc)\n\t    {\n\t      _mpz_realloc (res, res_alloc);\n\t      res_ptr = res->_mp_d;\n\t      /* Don't re-read OP1_PTR and OP2_PTR.  They point to\n\t\t temporary space--never to the space RES->_mp_d used\n\t\t to point to before reallocation.  */\n\t    }\n\n\t  if (op1_size > op2_size)\n\t    {\n\t      MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,\n\t\t\top1_size - op2_size);\n\t      for (i = op2_size - 1; i >= 0; i--)\n\t\tres_ptr[i] = op1_ptr[i] ^ op2_ptr[i];\n\t      res_size = op1_size;\n\t    }\n\t  else\n\t    {\n\t      MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,\n\t\t\top2_size - op1_size);\n\t      for (i = op1_size - 1; i >= 0; i--)\n\t\tres_ptr[i] = op1_ptr[i] ^ op2_ptr[i];\n\t      res_size = op2_size;\n\t    }\n\n\t  MPN_NORMALIZE (res_ptr, res_size);\n\t  res->_mp_size = res_size;\n\t  TMP_FREE;\n\t  return;\n\t}\n      else\n\t{\n\t  /* We should compute -OP1 ^ OP2.  Swap OP1 and OP2 and fall\n\t     through to the code that handles OP1 ^ -OP2.  */\n          MPZ_SRCPTR_SWAP (op1, op2);\n          MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);\n\t}\n    }\n\n  {\n    mp_ptr opx;\n    mp_limb_t cy;\n\n    /* Operand 2 negative, so will be the result.\n       -(OP1 ^ (-OP2)) = -(OP1 ^ ~(OP2 - 1)) =\n       = ~(OP1 ^ ~(OP2 - 1)) + 1 =\n       = (OP1 ^ (OP2 - 1)) + 1      */\n\n    op2_size = -op2_size;\n\n    opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);\n    mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);\n    op2_ptr = opx;\n\n    res_alloc = MAX (op1_size, op2_size) + 1;\n    if (res->_mp_alloc < res_alloc)\n      {\n\t_mpz_realloc (res, res_alloc);\n\top1_ptr = op1->_mp_d;\n\tres_ptr = res->_mp_d;\n\t/* Don't re-read OP2_PTR.  It points to temporary space--never\n\t   to the space RES->_mp_d used to point to before reallocation.  */\n      }\n\n    if (op1_size > op2_size)\n      {\n\tMPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size);\n\tfor (i = op2_size - 1; i >= 0; i--)\n\t  res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];\n\tres_size = op1_size;\n      }\n    else\n      {\n\tMPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size);\n\tfor (i = op1_size - 1; i >= 0; i--)\n\t  res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];\n\tres_size = op2_size;\n      }\n\n    cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);\n    if (cy)\n      {\n\tres_ptr[res_size] = cy;\n\tres_size++;\n      }\n\n    MPN_NORMALIZE (res_ptr, res_size);\n    res->_mp_size = -res_size;\n    TMP_FREE;\n  }\n}\n"
  },
  {
    "path": "nextprime.c",
    "content": "/* gmp_nextprime -- generate small primes reasonably efficiently for internal\n   GMP needs.\n\n   Contributed to the GNU project by Torbjorn Granlund.  Miscellaneous\n   improvements by Martin Boij.\n\n   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY\n   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST\n   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2009 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n/*\n  Optimisation ideas:\n\n  1. Unroll the sieving loops.  Should reach 1 write/cycle.  That would be a 2x\n     improvement.\n\n  2. Separate sieving with primes p < SIEVESIZE and p >= SIEVESIZE.  The latter\n     will need at most one write, and thus not need any inner loop.\n\n  3. For primes p >= SIEVESIZE, i.e., typically the majority of primes, we\n     perform more than one division per sieving write.  That might dominate the\n     entire run time for the nextprime function.  A incrementally initialised\n     remainder table of Pi(65536) = 6542 16-bit entries could replace that\n     division.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include <string.h>\t\t/* for memset */\n\n\nmpir_ui\ngmp_nextprime (gmp_primesieve_t *ps)\n{\n  mpir_ui p, d, pi;\n  unsigned char *sp;\n  static unsigned char addtab[] =\n    { 2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,8,6,4,6,2,4,6,2,6,6,4,\n      2,4,6,2,6,4,2,4,2,10,2,10 };\n  unsigned char *addp = addtab;\n  mpir_ui ai;\n\n  /* Look for already sieved primes.  A sentinel at the end of the sieving\n     area allows us to use a very simple loop here.  */\n  d = ps->d;\n  sp = ps->s + d;\n  while (*sp != 0)\n    sp++;\n  if (sp != ps->s + SIEVESIZE)\n    {\n      d = sp - ps->s;\n      ps->d = d + 1;\n      return ps->s0 + 2 * d;\n    }\n\n  /* Handle the number 2 separately.  */\n  if (ps->s0 < 3)\n    {\n      ps->s0 = 3 - 2 * SIEVESIZE; /* Tricky */\n      return 2;\n    }\n\n  /* Exhausted computed primes.  Resieve, then call ourselves recursively.  */\n\n#if 0\n  for (sp = ps->s; sp < ps->s + SIEVESIZE; sp++)\n    *sp = 0;\n#else\n  memset (ps->s, 0, SIEVESIZE);\n#endif\n\n  ps->s0 += 2 * SIEVESIZE;\n\n  /* Update sqrt_s0 as needed.  */\n  while ((ps->sqrt_s0 + 1) * (ps->sqrt_s0 + 1) <= ps->s0 + 2 * SIEVESIZE - 1)\n    ps->sqrt_s0++;\n\n  pi = ((ps->s0 + 3) / 2) % 3;\n  if (pi > 0)\n    pi = 3 - pi;\n  if (ps->s0 + 2 * pi <= 3)\n    pi += 3;\n  sp = ps->s + pi;\n  while (sp < ps->s + SIEVESIZE)\n    {\n      *sp = 1, sp += 3;\n    }\n\n  pi = ((ps->s0 + 5) / 2) % 5;\n  if (pi > 0)\n    pi = 5 - pi;\n  if (ps->s0 + 2 * pi <= 5)\n    pi += 5;\n  sp = ps->s + pi;\n  while (sp < ps->s + SIEVESIZE)\n    {\n      *sp = 1, sp += 5;\n    }\n\n  pi = ((ps->s0 + 7) / 2) % 7;\n  if (pi > 0)\n    pi = 7 - pi;\n  if (ps->s0 + 2 * pi <= 7)\n    pi += 7;\n  sp = ps->s + pi;\n  while (sp < ps->s + SIEVESIZE)\n    {\n      *sp = 1, sp += 7;\n    }\n\n  p = 11;\n  ai = 0;\n  while (p <= ps->sqrt_s0)\n    {\n      pi = ((ps->s0 + p) / 2) % p;\n      if (pi > 0)\n\tpi = p - pi;\n      if (ps->s0 + 2 * pi <= p)\n\t  pi += p;\n      sp = ps->s + pi;\n      while (sp < ps->s + SIEVESIZE)\n\t{\n\t  *sp = 1, sp += p;\n\t}\n      p += addp[ai];\n      ai = (ai + 1) % 48;\n    }\n  ps->d = 0;\n  return gmp_nextprime (ps);\n}\n\nvoid\ngmp_init_primesieve (gmp_primesieve_t *ps)\n{\n  ps->s0 = 0;\n  ps->sqrt_s0 = 0;\n  ps->d = SIEVESIZE;\n  ps->s[SIEVESIZE] = 0;\t\t/* sentinel */\n}\n"
  },
  {
    "path": "primesieve.c",
    "content": "/* primesieve (BIT_ARRAY, N) -- Fills the BIT_ARRAY with a mask for primes up to N.\n\nContributed to the GNU project by Marco Bodrato.\n\nTHE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.\nIT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.\nIN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR\nDISAPPEAR IN A FUTURE GNU MP RELEASE.\n\nCopyright 2010, 2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/**************************************************************/\n/* Section macros: common macros, for mswing/fac/bin (&sieve) */\n/**************************************************************/\n\n#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)\t\t\t\\\n    __max_i = (end);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\\\n    do {\t\t\t\t\t\t\t\\\n      ++__i;\t\t\t\t\t\t\t\\\n      if (((sieve)[__index] & __mask) == 0)\t\t\t\\\n\t{\t\t\t\t\t\t\t\\\n\t  (prime) = id_to_n(__i)\n\n#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)\t\t\\\n  do {\t\t\t\t\t\t\t\t\\\n    mp_limb_t __mask, __index, __max_i, __i;\t\t\t\\\n\t\t\t\t\t\t\t\t\\\n    __i = (start)-(off);\t\t\t\t\t\\\n    __index = __i / GMP_LIMB_BITS;\t\t\t\t\\\n    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);\t\t\\\n    __i += (off);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\\\n    LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)\n\n#define LOOP_ON_SIEVE_STOP\t\t\t\t\t\\\n\t}\t\t\t\t\t\t\t\\\n      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);\t\\\n      __index += __mask & 1;\t\t\t\t\t\\\n    }  while (__i <= __max_i)\t\t\t\t\t\\\n\n#define LOOP_ON_SIEVE_END\t\t\t\t\t\\\n    LOOP_ON_SIEVE_STOP;\t\t\t\t\t\t\\\n  } while (0)\n\n/*********************************************************/\n/* Section sieve: sieving functions and tools for primes */\n/*********************************************************/\n\n#if 0\nstatic mp_limb_t\nbit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }\n#endif\n\n/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/\nstatic mp_limb_t\nid_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }\n\n/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */\nstatic mp_limb_t\nn_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }\n\n#if 0\nstatic mp_size_t\nprimesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }\n#endif\n\n#if GMP_LIMB_BITS > 61\n#define SIEVE_SEED CNST_LIMB(0x3294C9E069128480)\n#define SEED_LIMIT 202\n#else\n#if GMP_LIMB_BITS > 30\n#define SIEVE_SEED CNST_LIMB(0x69128480)\n#define SEED_LIMIT 114\n#else\n#if GMP_LIMB_BITS > 15\n#define SIEVE_SEED CNST_LIMB(0x8480)\n#define SEED_LIMIT 54\n#else\n#if GMP_LIMB_BITS > 7\n#define SIEVE_SEED CNST_LIMB(0x80)\n#define SEED_LIMIT 34\n#else\n#define SIEVE_SEED CNST_LIMB(0x0)\n#define SEED_LIMIT 24\n#endif /* 7 */\n#endif /* 15 */\n#endif /* 30 */\n#endif /* 61 */\n\nstatic void\nfirst_block_primesieve (mp_ptr bit_array, mp_limb_t n)\n{\n  mp_size_t bits, limbs;\n\n  ASSERT (n > 4);\n\n  bits  = n_to_bit(n);\n  limbs = bits / GMP_LIMB_BITS + 1;\n\n  /* FIXME: We can skip 5 too, filling with a 5-part pattern. */\n  MPN_ZERO (bit_array, limbs);\n  bit_array[0] = SIEVE_SEED;\n\n  if ((bits + 1) % GMP_LIMB_BITS != 0)\n    bit_array[limbs-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS);\n\n  if (n > SEED_LIMIT) {\n    mp_limb_t mask, index, i;\n\n    ASSERT (n > 49);\n\n    mask = 1;\n    index = 0;\n    i = 1;\n    do {\n      if ((bit_array[index] & mask) == 0)\n\t{\n\t  mp_size_t step, lindex;\n\t  mp_limb_t lmask;\n\t  unsigned  maskrot;\n\n\t  step = id_to_n(i);\n/*\t  lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */\n\t  lindex = i*(step+1)-1+(-(i&1)&(i+1));\n/*\t  lindex = i*(step+1+(i&1))-1+(i&1); */\n\t  if (lindex > bits)\n\t    break;\n\n\t  step <<= 1;\n\t  maskrot = step % GMP_LIMB_BITS;\n\n\t  lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);\n\t  do {\n\t    bit_array[lindex / GMP_LIMB_BITS] |= lmask;\n\t    lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);\n\t    lindex += step;\n\t  } while (lindex <= bits);\n\n/*\t  lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */\n\t  lindex = i*(i*3+6)+(i&1);\n\n\t  lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);\n\t  for ( ; lindex <= bits; lindex += step) {\n\t    bit_array[lindex / GMP_LIMB_BITS] |= lmask;\n\t    lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);\n\t  };\n\t}\n      mask = mask << 1 | mask >> (GMP_LIMB_BITS-1);\n      index += mask & 1;\n      i++;\n    } while (1);\n  }\n}\n\nstatic void\nblock_resieve (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset,\n\t\t      mp_srcptr sieve, mp_limb_t sieve_bits)\n{\n  mp_size_t bits, step;\n\n  ASSERT (limbs > 0);\n\n  bits = limbs * GMP_LIMB_BITS - 1;\n\n  /* FIXME: We can skip 5 too, filling with a 5-part pattern. */\n  MPN_ZERO (bit_array, limbs);\n\n  LOOP_ON_SIEVE_BEGIN(step,0,sieve_bits,0,sieve);\n  {\n    mp_size_t lindex;\n    mp_limb_t lmask;\n    unsigned  maskrot;\n\n/*  lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */\n    lindex = __i*(step+1)-1+(-(__i&1)&(__i+1));\n/*  lindex = __i*(step+1+(__i&1))-1+(__i&1); */\n    if (lindex > bits + offset)\n      break;\n\n    step <<= 1;\n    maskrot = step % GMP_LIMB_BITS;\n\n    if (lindex < offset)\n      lindex += step * ((offset - lindex - 1) / step + 1);\n\n    lindex -= offset;\n\n    lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);\n    for ( ; lindex <= bits; lindex += step) {\n      bit_array[lindex / GMP_LIMB_BITS] |= lmask;\n      lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);\n    };\n\n/*  lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */\n    lindex = __i*(__i*3+6)+(__i&1);\n    if (lindex > bits + offset)\n      continue;\n\n    if (lindex < offset)\n      lindex += step * ((offset - lindex - 1) / step + 1);\n\n    lindex -= offset;\n\n    lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);\n    for ( ; lindex <= bits; lindex += step) {\n      bit_array[lindex / GMP_LIMB_BITS] |= lmask;\n      lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);\n    };\n  }\n  LOOP_ON_SIEVE_END;\n}\n\n#define BLOCK_SIZE 2048\n\n/* Fills bit_array with the characteristic function of composite\n   numbers up to the parameter n. I.e. a bit set to \"1\" represent a\n   composite, a \"0\" represent a prime.\n\n   The primesieve_size(n) limbs pointed to by bit_array are\n   overwritten. The returned value counts prime integers in the\n   interval [4, n]. Note that n > 4.\n\n   Even numbers and multiples of 3 are excluded \"a priori\", only\n   numbers equivalent to +/- 1 mod 6 have their bit in the array.\n\n   Once sieved, if the bit b is ZERO it represent a prime, the\n   represented prime is bit_to_n(b), if the LSbit is bit 0, or\n   id_to_n(b), if you call \"1\" the first bit.\n */\n\nmp_limb_t\ngmp_primesieve (mp_ptr bit_array, mp_limb_t n)\n{\n  mp_size_t size;\n  mp_limb_t bits;\n\n  ASSERT (n > 4);\n\n  bits = n_to_bit(n);\n  size = bits / GMP_LIMB_BITS + 1;\n\n  if (size > BLOCK_SIZE * 2) {\n    mp_size_t off;\n    off = BLOCK_SIZE + (size % BLOCK_SIZE);\n    first_block_primesieve (bit_array, id_to_n (off * GMP_LIMB_BITS));\n    for ( ; off < size; off += BLOCK_SIZE)\n      block_resieve (bit_array + off, BLOCK_SIZE, off * GMP_LIMB_BITS, bit_array, off * GMP_LIMB_BITS - 1);\n  } else {\n    first_block_primesieve (bit_array, n);\n  }\n\n  if ((bits + 1) % GMP_LIMB_BITS != 0)\n    bit_array[size-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS);\n\n\n  return size * GMP_LIMB_BITS - mpn_popcount (bit_array, size);\n}\n\n#undef BLOCK_SIZE\n#undef SEED_LIMIT\n#undef SIEVE_SEED\n#undef LOOP_ON_SIEVE_END\n#undef LOOP_ON_SIEVE_STOP\n#undef LOOP_ON_SIEVE_BEGIN\n#undef LOOP_ON_SIEVE_CONTINUE\n"
  },
  {
    "path": "printf/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 2001, 2002 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -D__GMP_WITHIN_GMP -I$(top_srcdir)\n\nnoinst_LTLIBRARIES = libprintf.la\n\nlibprintf_la_SOURCES = asprintf.c asprntffuns.c doprnt.c doprntf.c doprnti.c fprintf.c obprintf.c obprntffuns.c obvprintf.c printf.c printffuns.c repl-vsnprintf.c snprintf.c snprntffuns.c sprintf.c sprintffuns.c vasprintf.c vfprintf.c vprintf.c vsnprintf.c vsprintf.c\n"
  },
  {
    "path": "printf/asprintf.c",
    "content": "/* gmp_asprintf -- formatted output to an allocated space.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\n#if HAVE_STDARG\ngmp_asprintf (char **result, const char *fmt, ...)\n#else\ngmp_asprintf (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n  int      ret;\n\n#if HAVE_STDARG\n  va_start (ap, fmt);\n#else\n  char       **result;\n  const char *fmt;\n  va_start (ap);\n  result = va_arg (ap, char **);\n  fmt = va_arg (ap, const char *);\n#endif\n\n  ret = gmp_vasprintf (result, fmt, ap);\n  va_end (ap);\n  return ret;\n}\n"
  },
  {
    "path": "printf/asprntffuns.c",
    "content": "/* __gmp_asprintf_memory etc -- formatted output to allocated space.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n/* These routines are in a separate file so that the mpz_t, mpq_t and mpf_t\n   operator<< routines can avoid dragging vsnprintf into the link (via\n   __gmp_asprintf_format).  */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\n__gmp_asprintf_memory (struct gmp_asprintf_t *d, const char *str, size_t len)\n{\n  GMP_ASPRINTF_T_NEED (d, len);\n  memcpy (d->buf + d->size, str, len);\n  d->size += len;\n  return len;\n}\n\nint\n__gmp_asprintf_reps (struct gmp_asprintf_t *d, int c, int reps)\n{\n  GMP_ASPRINTF_T_NEED (d, reps);\n  memset (d->buf + d->size, c, reps);\n  d->size += reps;\n  return reps;\n}\n\nint\n__gmp_asprintf_final (struct gmp_asprintf_t *d)\n{\n  char  *buf = d->buf;\n  ASSERT (d->alloc >= d->size + 1);\n  buf[d->size] = '\\0';\n  __GMP_REALLOCATE_FUNC_MAYBE_TYPE (buf, d->alloc, d->size+1, char);\n  *d->result = buf;\n  return 0;\n}\n"
  },
  {
    "path": "printf/doprnt.c",
    "content": "/* __gmp_doprnt -- printf style formatted output.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#ifndef _GNU_SOURCE\n#define _GNU_SOURCE    /* for DECIMAL_POINT in glibc langinfo.h */\n#endif\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <ctype.h>     /* for isdigit */\n#include <stddef.h>    /* for ptrdiff_t */\n#include <string.h>\n#include <stdio.h>     /* for NULL */\n#include <stdlib.h>\n\n#if HAVE_INTTYPES_H\n# include <inttypes.h> /* for intmax_t */\n#else\n# if HAVE_STDINT_H\n#  include <stdint.h>\n# endif\n#endif\n\n#if HAVE_LANGINFO_H\n#include <langinfo.h>  /* for nl_langinfo */\n#endif\n\n#if HAVE_LOCALE_H\n#include <locale.h>    /* for localeconv */\n#endif\n\n#if HAVE_SYS_TYPES_H\n#include <sys/types.h> /* for quad_t */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* change this to \"#define TRACE(x) x\" for diagnostics */\n#define TRACE(x)\n\n\n/* Should be portable, but in any case this is only used under some ASSERTs. */\n#define va_equal(x, y)                           \\\n  (memcmp (&(x), &(y), sizeof(va_list)) == 0)\n\n\n/* printf is convenient because it allows various types to be printed in one\n   fairly compact call, so having gmp_printf support the standard types as\n   well as the gmp ones is important.  This ends up meaning all the standard\n   parsing must be duplicated, to get a new routine recognising the gmp\n   extras.\n\n   With the currently favoured handling of mpz etc as Z, Q and F type\n   markers, it's not possible to use glibc register_printf_function since\n   that only accepts new conversion characters, not new types.  If Z was a\n   conversion there'd be no way to specify hex, decimal or octal, or\n   similarly with F no way to specify fixed point or scientific format.\n\n   It seems wisest to pass conversions %f, %e and %g of float, double and\n   long double over to the standard printf.  It'd be hard to be sure of\n   getting the right handling for NaNs, rounding, etc.  Integer conversions\n   %d etc and string conversions %s on the other hand could be easily enough\n   handled within gmp_doprnt, but if floats are going to libc then it's just\n   as easy to send all non-gmp types there.\n\n   \"Z\" was a type marker for size_t in old glibc, but there seems no need to\n   provide access to that now \"z\" is standard.\n\n   In GMP 4.1.1 we documented \"ll\" and \"L\" as being equivalent, but in C99\n   in fact \"ll\" is just for long long and \"L\" just for long double.\n   Apparentely GLIBC allows \"L\" for long long though.  This doesn't affect\n   us as such, since both are passed through to the C library.  To be\n   consistent with what we said before, the two are treated equivalently\n   here, and it's left to the C library to do what it thinks with them.\n\n   Possibilities:\n\n   \"b\" might be nice for binary output, and could even be supported for the\n   standard C types too if desired.\n\n   POSIX style \"%n$\" parameter numbering would be possible, but would need\n   to be handled completely within gmp_doprnt, since the numbering will be\n   all different once the format string it cut into pieces.\n\n   Some options for mpq formatting would be good.  Perhaps a non-zero\n   precision field could give a width for the denominator and mean always\n   put a \"/\".  A form \"n+p/q\" might interesting too, though perhaps that's\n   better left to applications.\n\n   Right now there's no way for an application to know whether types like\n   intmax_t are supported here.  If configure is doing its job and the same\n   compiler is used for gmp as for the application then there shouldn't be\n   any problem, but perhaps mpir.h should have some preprocessor symbols to\n   say what libmpir can do.  */\n\n\n\n/* If a gmp format is the very first thing or there are two gmp formats with\n   nothing in between then we'll reach here with this_fmt == last_fmt and we\n   can do nothing in that case.\n\n   last_ap is always replaced after a FLUSH, so it doesn't matter if va_list\n   is a call-by-reference and the funs->format routine modifies it.  */\n\n#define FLUSH()                                         \\\n  do {                                                  \\\n    if (this_fmt == last_fmt)                           \\\n      {                                                 \\\n        TRACE (printf (\"nothing to flush\\n\"));          \\\n        ASSERT (va_equal (this_ap, last_ap));           \\\n      }                                                 \\\n    else                                                \\\n      {                                                 \\\n        ASSERT (*this_fmt == '%');                      \\\n        *this_fmt = '\\0';                               \\\n        TRACE (printf (\"flush \\\"%s\\\"\\n\", last_fmt));    \\\n        DOPRNT_FORMAT (last_fmt, last_ap);              \\\n      }                                                 \\\n  } while (0)\n\n\n/* Parse up the given format string and do the appropriate output using the\n   given \"funs\" routines.  The data parameter is passed through to those\n   routines.  */\n\nint\n__gmp_doprnt (const struct doprnt_funs_t *funs, void *data,\n              const char *orig_fmt, va_list orig_ap)\n{\n  va_list  ap, this_ap, last_ap;\n  size_t   alloc_fmt_size;\n  char     *fmt, *alloc_fmt, *last_fmt, *this_fmt, *gmp_str;\n  int      retval = 0;\n  int      type, fchar, *value, seen_precision;\n  struct doprnt_params_t param;\n\n  TRACE (printf (\"gmp_doprnt \\\"%s\\\"\\n\", orig_fmt));\n\n  /* Don't modify orig_ap, if va_list is actually an array and hence call by\n     reference.  It could be argued that it'd be more efficient to leave the\n     caller to make a copy if it cared, but doing so here is going to be a\n     very small part of the total work, and we may as well keep applications\n     out of trouble.  */\n  va_copy (ap, orig_ap);\n\n  /* The format string is chopped up into pieces to be passed to\n     funs->format.  Unfortunately that means it has to be copied so each\n     piece can be null-terminated.  We're not going to be very fast here, so\n     use __gmp_allocate_func rather than TMP_ALLOC, to avoid overflowing the\n     stack if a long output string is given.  */\n  alloc_fmt_size = strlen (orig_fmt) + 1;\n#if _LONG_LONG_LIMB\n  /* for a long long limb we change %Mx to %llx, so could need an extra 1\n     char for every 3 existing */\n  alloc_fmt_size += alloc_fmt_size / 3;\n#endif\n  alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);\n  fmt = alloc_fmt;\n  strcpy (fmt, orig_fmt);\n\n  /* last_fmt and last_ap are just after the last output, and hence where\n     the next output will begin, when that's done */\n  last_fmt = fmt;\n  va_copy (last_ap, ap);\n\n  for (;;)\n    {\n      TRACE (printf (\"next: \\\"%s\\\"\\n\", fmt));\n\n      fmt = strchr (fmt, '%');\n      if (fmt == NULL)\n        break;\n\n      /* this_fmt and this_ap are the current '%' sequence being considered */\n      this_fmt = fmt;\n      va_copy (this_ap, ap);\n      fmt++; /* skip the '%' */\n\n      TRACE (printf (\"considering\\n\");\n             printf (\"  last: \\\"%s\\\"\\n\", last_fmt);\n             printf (\"  this: \\\"%s\\\"\\n\", this_fmt));\n\n      type = '\\0';\n      value = &param.width;\n\n      param.base = 10;\n      param.conv = 0;\n      param.expfmt = \"e%c%02ld\";\n      param.exptimes4 = 0;\n      param.fill = ' ';\n      param.justify = DOPRNT_JUSTIFY_RIGHT;\n      param.prec = 6;\n      param.showbase = DOPRNT_SHOWBASE_NO;\n      param.showpoint = 0;\n      param.showtrailing = 1;\n      param.sign = '\\0';\n      param.width = 0;\n      seen_precision = 0;\n\n      /* This loop parses a single % sequence.  \"break\" from the switch\n         means continue with this %, \"goto next\" means the conversion\n         character has been seen and a new % should be sought.  */\n      for (;;)\n        {\n          fchar = *fmt++;\n          if (fchar == '\\0')\n            break;\n\n          switch (fchar) {\n\n          case 'a':\n            /* %a behaves like %e, but defaults to all significant digits,\n               and there's no leading zeros on the exponent (which is in\n               fact bit-based) */\n            param.base = 16;\n            param.expfmt = \"p%c%ld\";\n            goto conv_a;\n          case 'A':\n            param.base = -16;\n            param.expfmt = \"P%c%ld\";\n          conv_a:\n            param.conv = DOPRNT_CONV_SCIENTIFIC;\n            param.exptimes4 = 1;\n            if (! seen_precision)\n              param.prec = -1;  /* default to all digits */\n            param.showbase = DOPRNT_SHOWBASE_YES;\n            param.showtrailing = 1;\n            goto floating_a;\n\n          case 'c':\n            /* Let's assume wchar_t will be promoted to \"int\" in the call,\n               the same as char will be. */\n            (void) va_arg (ap, int);\n            goto next;\n\n          case 'd':\n          case 'i':\n          case 'u':\n          integer:\n            TRACE (printf (\"integer, base=%d\\n\", param.base));\n            if (! seen_precision)\n              param.prec = -1;\n            switch (type) {\n            case 'j':\n              /* Let's assume uintmax_t is the same size as intmax_t. */\n#if HAVE_INTMAX_T\n              (void) va_arg (ap, intmax_t);\n#else\n              ASSERT_FAIL (intmax_t not available);\n#endif\n              break;\n            case 'l':\n              (void) va_arg (ap, long);\n              break;\n            case 'L':\n#if HAVE_LONG_LONG\n              (void) va_arg (ap, long long);\n#else\n              ASSERT_FAIL (long long not available);\n#endif\n              break;\n            case 'N':\n              {\n                mp_ptr     xp;\n                mp_size_t  xsize, abs_xsize;\n                mpz_t      z;\n                FLUSH ();\n                xp = va_arg (ap, mp_ptr);\n                PTR(z) = xp;\n                xsize = (int) va_arg (ap, mp_size_t);\n                abs_xsize = ABS (xsize);\n                MPN_NORMALIZE (xp, abs_xsize);\n                SIZ(z) = (xsize >= 0 ? abs_xsize : -abs_xsize);\n                ASSERT_CODE (ALLOC(z) = abs_xsize);\n                gmp_str = mpz_get_str (NULL, param.base, z);\n                goto gmp_integer;\n              }\n              /* break; */\n            case 'q':\n              /* quad_t is probably the same as long long, but let's treat\n                 it separately just to be sure.  Also let's assume u_quad_t\n                 will be the same size as quad_t.  */\n#if HAVE_QUAD_T\n              (void) va_arg (ap, quad_t);\n#else\n              ASSERT_FAIL (quad_t not available);\n#endif\n              break;\n            case 'Q':\n              FLUSH ();\n              gmp_str = mpq_get_str (NULL, param.base, va_arg(ap, mpq_srcptr));\n              goto gmp_integer;\n            case 't':\n              (void) va_arg (ap, ptrdiff_t);\n              break;\n            case 'z':\n              (void) va_arg (ap, size_t);\n              break;\n            case 'Z':\n              {\n                int   ret;\n                FLUSH ();\n                gmp_str = mpz_get_str (NULL, param.base,\n                                       va_arg (ap, mpz_srcptr));\n              gmp_integer:\n                ret = __gmp_doprnt_integer (funs, data, &param, gmp_str);\n                (*__gmp_free_func) (gmp_str, strlen(gmp_str)+1);\n                DOPRNT_ACCUMULATE (ret);\n                va_copy (last_ap, ap);\n                last_fmt = fmt;\n              }\n              break;\n            default:\n              /* default is an \"int\", and this includes h=short and hh=char\n                 since they're promoted to int in a function call */\n              (void) va_arg (ap, int);\n              break;\n            }\n            goto next;\n\n          case 'E':\n            param.base = -10;\n            param.expfmt = \"E%c%02ld\";\n            /*FALLTHRU*/\n          case 'e':\n            param.conv = DOPRNT_CONV_SCIENTIFIC;\n          floating:\n            if (param.showbase == DOPRNT_SHOWBASE_NONZERO)\n              {\n                /* # in %e, %f and %g */\n                param.showpoint = 1;\n                param.showtrailing = 1;\n              }\n          floating_a:\n            switch (type) {\n            case 'F':\n              FLUSH ();\n              DOPRNT_ACCUMULATE (__gmp_doprnt_mpf (funs, data, &param,\n                                                   GMP_DECIMAL_POINT,\n                                                   va_arg (ap, mpf_srcptr)));\n              va_copy (last_ap, ap);\n              last_fmt = fmt;\n              break;\n            case 'L':\n#if HAVE_LONG_DOUBLE\n              (void) va_arg (ap, long double);\n#else\n              ASSERT_FAIL (long double not available);\n#endif\n              break;\n            default:\n              (void) va_arg (ap, double);\n              break;\n            }\n            goto next;\n\n          case 'f':\n            param.conv = DOPRNT_CONV_FIXED;\n            goto floating;\n\n          case 'F': /* mpf_t     */\n          case 'j': /* intmax_t  */\n          case 'L': /* long long */\n          case 'N': /* mpn       */\n          case 'q': /* quad_t    */\n          case 'Q': /* mpq_t     */\n          case 't': /* ptrdiff_t */\n          case 'z': /* size_t    */\n          case 'Z': /* mpz_t     */\n          set_type:\n            type = fchar;\n            break;\n\n          case 'G':\n            param.base = -10;\n            param.expfmt = \"E%c%02ld\";\n            /*FALLTHRU*/\n          case 'g':\n            param.conv = DOPRNT_CONV_GENERAL;\n            param.showtrailing = 0;\n            goto floating;\n\n          case 'h':\n            if (type != 'h')\n              goto set_type;\n            type = 'H';   /* internal code for \"hh\" */\n            break;\n\n          case 'l':\n            if (type != 'l')\n              goto set_type;\n            type = 'L';   /* \"ll\" means \"L\" */\n            break;\n\n          case 'm':\n            /* glibc strerror(errno), no argument */\n            goto next;\n\n          case 'M': /* mp_limb_t */\n            /* mung format string to l or ll and let plain printf handle it */\n#if _LONG_LONG_LIMB\n            memmove (fmt+1, fmt, strlen (fmt)+1);\n            fmt[-1] = 'l';\n            fmt[0] = 'l';\n            fmt++;\n            type = 'L';\n#else\n            fmt[-1] = 'l';\n            type = 'l';\n#endif\n            break;\n\n          case 'n':\n            {\n              void  *p;\n              FLUSH ();\n              p = va_arg (ap, void *);\n              switch (type) {\n              case '\\0': * (int       *) p = retval; break;\n              case 'F':  mpf_set_si ((mpf_ptr) p, (long) retval); break;\n              case 'H':  * (char      *) p = retval; break;\n              case 'h':  * (short     *) p = retval; break;\n#if HAVE_INTMAX_T\n              case 'j':  * (intmax_t  *) p = retval; break;\n#else\n              case 'j':  ASSERT_FAIL (intmax_t not available); break;\n#endif\n              case 'l':  * (long      *) p = retval; break;\n#if HAVE_QUAD_T && HAVE_LONG_LONG\n              case 'q':\n                ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));\n                /*FALLTHRU*/\n#else\n              case 'q':  ASSERT_FAIL (quad_t not available); break;\n#endif\n#if HAVE_LONG_LONG\n              case 'L':  * (long long *) p = retval; break;\n#else\n              case 'L':  ASSERT_FAIL (long long not available); break;\n#endif\n              case 'N':\n                {\n                  mp_size_t  n;\n                  n = va_arg (ap, mp_size_t);\n                  n = ABS (n);\n                  if (n != 0)\n                    {\n                      * (mp_ptr) p = retval;\n                      MPN_ZERO ((mp_ptr) p + 1, n - 1);\n                    }\n                }\n                break;\n              case 'Q':  mpq_set_si ((mpq_ptr) p, (long) retval, 1L); break;\n              case 't':  * (ptrdiff_t *) p = retval; break;\n              case 'z':  * (size_t    *) p = retval; break;\n              case 'Z':  mpz_set_si ((mpz_ptr) p, (long) retval); break;\n              }\n            }\n            va_copy (last_ap, ap);\n            last_fmt = fmt;\n            goto next;\n\n          case 'o':\n            param.base = 8;\n            goto integer;\n\n          case 'p':\n          case 's':\n            /* \"void *\" will be good enough for \"char *\" or \"wchar_t *\", no\n               need for separate code.  */\n            (void) va_arg (ap, const void *);\n            goto next;\n\n          case 'x':\n            param.base = 16;\n            goto integer;\n          case 'X':\n            param.base = -16;\n            goto integer;\n\n          case '%':\n            goto next;\n\n          case '#':\n            param.showbase = DOPRNT_SHOWBASE_NONZERO;\n            break;\n\n          case '\\'':\n            /* glibc digit grouping, just pass it through, no support for it\n               on gmp types */\n            break;\n\n          case '+':\n          case ' ':\n            param.sign = fchar;\n            break;\n\n          case '-':\n            param.justify = DOPRNT_JUSTIFY_LEFT;\n            break;\n          case '.':\n            seen_precision = 1;\n            param.prec = -1; /* \".\" alone means all necessary digits */\n            value = &param.prec;\n            break;\n\n          case '*':\n            {\n              int n = va_arg (ap, int);\n\n              if (value == &param.width)\n                {\n                  /* negative width means left justify */\n                  if (n < 0)\n                    {\n                      param.justify = DOPRNT_JUSTIFY_LEFT;\n                      n = -n;\n                    }\n                  param.width = n;\n                }\n              else\n                {\n                  /* don't allow negative precision */\n                  param.prec = MAX (0, n);\n                }\n            }\n            break;\n\n          case '0':\n            if (value == &param.width)\n              {\n                /* in width field, set fill */\n                param.fill = '0';\n\n                /* for right justify, put the fill after any minus sign */\n                if (param.justify == DOPRNT_JUSTIFY_RIGHT)\n                  param.justify = DOPRNT_JUSTIFY_INTERNAL;\n              }\n            else\n              {\n                /* in precision field, set value */\n                *value = 0;\n              }\n            break;\n\n          case '1': case '2': case '3': case '4': case '5':\n          case '6': case '7': case '8': case '9':\n            /* process all digits to form a value */\n            {\n              int  n = 0;\n              do {\n                n = n * 10 + (fchar-'0');\n                fchar = *fmt++;\n              } while (isascii (fchar) && isdigit (fchar));\n              fmt--; /* unget the non-digit */\n              *value = n;\n            }\n            break;\n\n          default:\n            /* something invalid */\n            ASSERT (0);\n            goto next;\n          }\n        }\n\n    next:\n      /* Stop parsing the current \"%\" format, look for a new one. */\n      ;\n    }\n\n  TRACE (printf (\"remainder: \\\"%s\\\"\\n\", last_fmt));\n  if (*last_fmt != '\\0')\n    DOPRNT_FORMAT (last_fmt, last_ap);\n\n  if (funs->final != NULL)\n    if ((*funs->final) (data) == -1)\n      goto error;\n\n done:\n  (*__gmp_free_func) (alloc_fmt, alloc_fmt_size);\n  return retval;\n\n error:\n  retval = -1;\n  goto done;\n}\n"
  },
  {
    "path": "printf/doprntf.c",
    "content": "/* __gmp_doprnt_mpf -- mpf formatted output.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */\n#else\n#include <varargs.h>\n#endif\n\n#include <ctype.h>\n#include <string.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* change this to \"#define TRACE(x) x\" for diagnostics */\n#define TRACE(x) \n\n\n/* The separate of __gmp_doprnt_float_digits and __gmp_doprnt_float is so\n   some C++ can do the mpf_get_str and release it in case of an exception */\n\n#define DIGIT_VALUE(c)                  \\\n  (isdigit (c)   ? (c) - '0'            \\\n   : islower (c) ? (c) - 'a' + 10       \\\n   :               (c) - 'A' + 10)\n\nint\n__gmp_doprnt_mpf (const struct doprnt_funs_t *funs,\n                  void *data,\n                  const struct doprnt_params_t *p,\n                  const char *point,\n                  mpf_srcptr f)\n{\n  int         prec, ndigits, free_size, len, newlen, justify, justlen, explen;\n  int         showbaselen, sign, signlen, intlen, intzeros, pointlen;\n  int         fraczeros, fraclen, preczeros;\n  char        *s, *free_ptr;\n  mp_exp_t    exp;\n  char        exponent[BITS_PER_MP_LIMB + 10];\n  const char  *showbase;\n  int         retval = 0;\n\n  TRACE (printf (\"__gmp_doprnt_float\\n\");\n         printf (\"  conv=%d prec=%d\\n\", p->conv, p->prec));\n\n  prec = p->prec;\n  if (prec <= -1)\n    {\n      /* all digits */\n      ndigits = 0;\n\n      /* arrange the fixed/scientific decision on a \"prec\" implied by how\n         many significant digits there are */\n      if (p->conv == DOPRNT_CONV_GENERAL)\n        MPF_SIGNIFICANT_DIGITS (prec, PREC(f), ABS(p->base));\n    }\n  else\n    {\n      switch (p->conv) {\n      case DOPRNT_CONV_FIXED:\n        /* Precision is digits after the radix point.  Try not to generate\n           too many more than will actually be required.  If f>=1 then\n           overestimate the integer part, and add prec.  If f<1 then\n           underestimate the zeros between the radix point and the first\n           digit and subtract that from prec.  In either case add 2 so the\n           round to nearest can be applied accurately.  */\n        ndigits = prec + 2\n          + EXP(f) * (__mp_bases[ABS(p->base)].chars_per_limb + (EXP(f)>=0));\n        ndigits = MAX (ndigits, 1);\n        break;\n\n      case DOPRNT_CONV_SCIENTIFIC:\n        /* precision is digits after the radix point, and there's one digit\n           before */\n        ndigits = prec + 1;\n        break;\n\n      default:\n        ASSERT (0);\n        /*FALLTHRU*/\n        \n      case DOPRNT_CONV_GENERAL:\n        /* precision is total digits, but be sure to ask mpf_get_str for at\n           least 1, not 0 */\n        ndigits = MAX (prec, 1);\n        break;\n      }\n    }\n  TRACE (printf (\"  ndigits %d\\n\", ndigits));\n\n  s = mpf_get_str (NULL, &exp, p->base, ndigits, f);\n  len = strlen (s);\n  free_ptr = s;\n  free_size = len + 1;\n  TRACE (printf (\"  s   %s\\n\", s);\n         printf (\"  exp %ld\\n\", exp);\n         printf (\"  len %d\\n\", len));\n\n  /* For fixed mode check the ndigits formed above was in fact enough for\n     the integer part plus p->prec after the radix point. */\n  ASSERT ((p->conv == DOPRNT_CONV_FIXED && p->prec > -1)\n          ? ndigits >= MAX (1, exp + p->prec + 2) : 1);\n\n  sign = p->sign;\n  if (s[0] == '-')\n    {\n      sign = s[0];\n      s++, len--;\n    }\n  signlen = (sign != '\\0');\n  TRACE (printf (\"  sign %c  signlen %d\\n\", sign, signlen));\n\n  switch (p->conv) {\n  case DOPRNT_CONV_FIXED:\n    if (prec <= -1)\n      prec = MAX (0, len-exp);   /* retain all digits */\n\n    /* Truncate if necessary so fraction will be at most prec digits. */\n    ASSERT (prec >= 0);\n    newlen = exp + prec;\n    if (newlen < 0)\n      {\n        /* first non-zero digit is below target prec, and at least one zero\n           digit in between, so print zero */\n        len = 0;\n        exp = 0;\n      }\n    else if (len <= newlen)\n      {\n        /* already got few enough digits */\n      }\n    else\n      {\n        /* discard excess digits and round to nearest */\n\n        const char  *num_to_text = (p->base >= 0\n                                    ? \"0123456789abcdefghijklmnopqrstuvwxyz\"\n                                    : \"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\");\n        int  base = ABS(p->base);\n        int  n;\n\n        ASSERT (base <= 36);\n\n        len = newlen;\n        n = DIGIT_VALUE (s[len]);\n        TRACE (printf (\"  rounding with %d\\n\", n));\n        if (n >= (base + 1) / 2)\n          {\n            /* propagate a carry */\n            for (;;)\n              {\n                if (len == 0)\n                  {\n                    s[0] = '1';\n                    len = 1;\n                    exp++;\n                    break;\n                  }\n                n = DIGIT_VALUE (s[len-1]);\n                ASSERT (n >= 0 && n < base);\n                n++;\n                if (n != base)\n                  {\n                    TRACE (printf (\"  storing now %d\\n\", n));\n                    s[len-1] = num_to_text[n];\n                    break;\n                  }\n                len--;\n              }\n          }\n        else\n          {\n            /* truncate only, strip any trailing zeros now exposed */\n            while (len > 0 && s[len-1] == '0')\n              len--;\n          }\n\n        /* Can have newlen==0, in which case the truncate was just to check\n           for a carry turning it into \"1\".  If we're left with len==0 then\n           adjust exp to match.  */\n        if (len == 0)\n          exp = 0;\n      }  \n\n  fixed:\n    ASSERT (len == 0 ? exp == 0 : 1);\n    if (exp <= 0)\n      {\n        TRACE (printf (\"  fixed 0.000sss\\n\"));\n        intlen = 0;\n        intzeros = 1;\n        fraczeros = -exp;\n        fraclen = len;\n      }\n    else\n      {\n        TRACE (printf (\"  fixed sss.sss or sss000\\n\"));\n        intlen = MIN (len, exp);\n        intzeros = exp - intlen;\n        fraczeros = 0;\n        fraclen = len - intlen;\n      }\n    explen = 0;\n    break;\n\n  case DOPRNT_CONV_SCIENTIFIC:\n    {\n      long  expval;\n      char  expsign;\n\n      if (prec <= -1)\n        prec = MAX (0, len-1);   /* retain all digits */\n\n    scientific:\n      TRACE (printf (\"  scientific s.sss\\n\"));\n\n      intlen = MIN (1, len);\n      intzeros = (intlen == 0 ? 1 : 0);\n      fraczeros = 0;\n      fraclen = len - intlen;\n\n      expval = (exp-intlen);\n      if (p->exptimes4)\n        expval <<= 2;\n\n      /* Split out the sign since %o or %x in expfmt give negatives as twos\n         complement, not with a sign. */\n      expsign = (expval >= 0 ? '+' : '-');\n      expval = ABS (expval);\n\n#if HAVE_VSNPRINTF\n      explen = snprintf (exponent, sizeof(exponent),\n                         p->expfmt, expsign, expval);\n      /* test for < sizeof-1 since a glibc 2.0.x return of sizeof-1 might\n         mean truncation */\n      ASSERT (explen >= 0 && explen < sizeof(exponent)-1);\n#else\n      sprintf (exponent, p->expfmt, expsign, expval);\n      explen = strlen (exponent);\n      ASSERT (explen < sizeof(exponent));\n#endif\n      TRACE (printf (\"  expfmt %s gives %s\\n\", p->expfmt, exponent));\n    }\n    break;\n\n  default:\n    ASSERT (0);\n    /*FALLTHRU*/  /* to stop variables looking uninitialized */\n\n  case DOPRNT_CONV_GENERAL:\n    /* The exponent for \"scientific\" will be exp-1, choose scientific if\n       this is < -4 or >= prec (and minimum 1 for prec).  For f==0 will have\n       exp==0 and get the desired \"fixed\".  This rule follows glibc.  For\n       fixed there's no need to truncate, the desired ndigits will already\n       be as required.  */\n    if (exp-1 < -4 || exp-1 >= MAX (1, prec))\n      goto scientific;\n    else\n      goto fixed;\n  }\n\n  TRACE (printf (\"  intlen %d intzeros %d fraczeros %d fraclen %d\\n\",\n                 intlen, intzeros, fraczeros, fraclen));\n  ASSERT (p->prec <= -1\n          ? intlen + fraclen == strlen (s)\n          : intlen + fraclen <= strlen (s));\n\n  if (p->showtrailing)\n    {\n      /* Pad to requested precision with trailing zeros, for general this is\n         all digits, for fixed and scientific just the fraction.  */\n      preczeros = prec - (fraczeros + fraclen\n                          + (p->conv == DOPRNT_CONV_GENERAL\n                             ? intlen + intzeros : 0));\n      preczeros = MAX (0, preczeros);\n    }\n  else\n    preczeros = 0;\n  TRACE (printf (\"  prec=%d showtrailing=%d, pad with preczeros %d\\n\",\n                 prec, p->showtrailing, preczeros));\n\n  /* radix point if needed, or if forced */\n  pointlen = ((fraczeros + fraclen + preczeros) != 0 || p->showpoint != 0)\n    ? strlen (point) : 0;\n  TRACE (printf (\"  point |%s|  pointlen %d\\n\", point, pointlen));\n\n  /* Notice the test for a non-zero value is done after any truncation for\n     DOPRNT_CONV_FIXED. */\n  showbase = NULL;\n  showbaselen = 0;\n  switch (p->showbase) {\n  default:\n    ASSERT (0);\n    /*FALLTHRU*/\n  case DOPRNT_SHOWBASE_NO:\n    break;\n  case DOPRNT_SHOWBASE_NONZERO:\n    if (intlen == 0 && fraclen == 0)\n      break;\n    /*FALLTHRU*/\n  case DOPRNT_SHOWBASE_YES:\n    switch (p->base) {\n    case 16:  showbase = \"0x\"; showbaselen = 2; break;\n    case -16: showbase = \"0X\"; showbaselen = 2; break;\n    case 8:   showbase = \"0\";  showbaselen = 1; break;\n    }\n    break;\n  }\n  TRACE (printf (\"  showbase %s showbaselen %d\\n\",\n                 showbase == NULL ? \"\" : showbase, showbaselen));\n\n  /* left over field width */\n  justlen = p->width - (signlen + showbaselen + intlen + intzeros + pointlen\n                        + fraczeros + fraclen + preczeros + explen);\n  TRACE (printf (\"  justlen %d fill 0x%X\\n\", justlen, p->fill));\n\n  justify = p->justify;\n  if (justlen <= 0) /* no justifying if exceed width */\n    justify = DOPRNT_JUSTIFY_NONE;\n\n  TRACE (printf (\"  justify type %d  intlen %d pointlen %d fraclen %d\\n\",\n                 justify, intlen, pointlen, fraclen));\n\n  if (justify == DOPRNT_JUSTIFY_RIGHT)         /* pad for right */\n    DOPRNT_REPS (p->fill, justlen);\n\n  if (signlen)                                 /* sign */\n    DOPRNT_REPS (sign, 1);\n\n  DOPRNT_MEMORY_MAYBE (showbase, showbaselen); /* base */\n\n  if (justify == DOPRNT_JUSTIFY_INTERNAL)      /* pad for internal */\n    DOPRNT_REPS (p->fill, justlen);\n\n  DOPRNT_MEMORY (s, intlen);                   /* integer */\n  DOPRNT_REPS_MAYBE ('0', intzeros);\n\n  DOPRNT_MEMORY_MAYBE (point, pointlen);       /* point */\n\n  DOPRNT_REPS_MAYBE ('0', fraczeros);          /* frac */\n  DOPRNT_MEMORY_MAYBE (s+intlen, fraclen);\n\n  DOPRNT_REPS_MAYBE ('0', preczeros);          /* prec */\n\n  DOPRNT_MEMORY_MAYBE (exponent, explen);      /* exp */\n\n  if (justify == DOPRNT_JUSTIFY_LEFT)          /* pad for left */         \n    DOPRNT_REPS (p->fill, justlen);\n\n done:\n  (*__gmp_free_func) (free_ptr, free_size);\n  return retval;\n\n error:\n  retval = -1;\n  goto done;\n}\n"
  },
  {
    "path": "printf/doprnti.c",
    "content": "/* __gmp_doprnt_integer -- integer style formatted output.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */\n#else\n#include <varargs.h>\n#endif\n\n#include <string.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\n__gmp_doprnt_integer (const struct doprnt_funs_t *funs,\n                      void *data,\n                      const struct doprnt_params_t *p,\n                      const char *s)\n{\n  int         retval = 0;\n  int         slen, justlen, showbaselen, sign, signlen, slashlen, zeros;\n  int         justify, den_showbaselen;\n  const char  *slash, *showbase;\n\n  /* '+' or ' ' if wanted, and don't already have '-' */\n  sign = p->sign;\n  if (s[0] == '-')\n    {\n      sign = s[0];\n      s++;\n    }\n  signlen = (sign != '\\0');\n\n  /* if the precision was explicitly 0, print nothing for a 0 value */\n  if (*s == '0' && p->prec == 0)\n    s++;\n\n  slen = strlen (s);\n  slash = strchr (s, '/');\n\n  showbase = NULL;\n  showbaselen = 0;\n\n  if (p->showbase != DOPRNT_SHOWBASE_NO)\n    {\n      switch (p->base) {\n      case 16:  showbase = \"0x\"; showbaselen = 2; break;\n      case -16: showbase = \"0X\"; showbaselen = 2; break;\n      case 8:   showbase = \"0\";  showbaselen = 1; break;\n      }\n    }\n\n  den_showbaselen = showbaselen;\n  if (slash == NULL\n      || (p->showbase == DOPRNT_SHOWBASE_NONZERO && slash[1] == '0'))\n    den_showbaselen = 0;\n\n  if (p->showbase == DOPRNT_SHOWBASE_NONZERO && s[0] == '0')\n    showbaselen = 0;\n\n  /* the influence of p->prec on mpq is currently undefined */\n  zeros = MAX (0, p->prec - slen);\n\n  /* space left over after actual output length */\n  justlen = p->width\n    - (strlen(s) + signlen + showbaselen + den_showbaselen + zeros);\n\n  justify = p->justify;\n  if (justlen <= 0) /* no justifying if exceed width */\n    justify = DOPRNT_JUSTIFY_NONE;\n\n  if (justify == DOPRNT_JUSTIFY_RIGHT)             /* pad right */\n    DOPRNT_REPS (p->fill, justlen);\n\n  DOPRNT_REPS_MAYBE (sign, signlen);               /* sign */\n\n  DOPRNT_MEMORY_MAYBE (showbase, showbaselen);     /* base */\n\n  DOPRNT_REPS_MAYBE ('0', zeros);                  /* zeros */\n\n  if (justify == DOPRNT_JUSTIFY_INTERNAL)          /* pad internal */\n    DOPRNT_REPS (p->fill, justlen);\n\n  /* if there's a showbase on the denominator, then print the numerator\n     separately so it can be inserted */\n  if (den_showbaselen != 0)\n    {\n      ASSERT (slash != NULL);\n      slashlen = slash+1 - s;\n      DOPRNT_MEMORY (s, slashlen);                 /* numerator and slash */\n      slen -= slashlen;\n      s += slashlen;\n      DOPRNT_MEMORY (showbase, den_showbaselen);\n    }\n\n  DOPRNT_MEMORY (s, slen);                         /* number, or denominator */\n\n  if (justify == DOPRNT_JUSTIFY_LEFT)              /* pad left */\n    DOPRNT_REPS (p->fill, justlen);\n\n done:\n  return retval;\n\n error:\n  retval = -1;\n  goto done;\n}\n"
  },
  {
    "path": "printf/fprintf.c",
    "content": "/* gmp_fprintf -- formatted output.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\n#if HAVE_STDARG\ngmp_fprintf (FILE *fp, const char *fmt, ...)\n#else\ngmp_fprintf (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n  int      ret;\n\n#if HAVE_STDARG\n  va_start (ap, fmt);\n#else\n  FILE       *fp;\n  const char *fmt;\n  va_start (ap);\n  fp = va_arg (ap, FILE *);\n  fmt = va_arg (ap, const char *);\n#endif\n\n  ret = __gmp_doprnt (&__gmp_fprintf_funs, fp, fmt, ap);\n  va_end (ap);\n  return ret;\n}\n"
  },
  {
    "path": "printf/obprintf.c",
    "content": "/* gmp_obstack_printf -- formatted output to an obstack.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_OBSTACK_VPRINTF\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <obstack.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\n#if HAVE_STDARG\ngmp_obstack_printf (struct obstack *ob, const char *fmt, ...)\n#else\ngmp_obstack_printf (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n  int      ret;\n\n#if HAVE_STDARG\n  va_start (ap, fmt);\n#else\n  struct obstack *ob;\n  const char     *fmt;\n  va_start (ap);\n  ob = va_arg (ap, struct obstack *);\n  fmt = va_arg (ap, const char *);\n#endif\n\n  ASSERT (! MEM_OVERLAP_P (obstack_base(ob), obstack_object_size(ob),\n                           fmt, strlen(fmt)+1));\n\n  ret = __gmp_doprnt (&__gmp_obstack_printf_funs, ob, fmt, ap);\n  va_end (ap);\n  return ret;\n}\n\n#endif /* HAVE_OBSTACK_VPRINTF */\n"
  },
  {
    "path": "printf/obprntffuns.c",
    "content": "/* __gmp_obstack_printf_funs -- support for gmp_obstack_printf and\n   gmp_obstack_vprintf.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_OBSTACK_VPRINTF\n\n#ifndef _GNU_SOURCE\n#define _GNU_SOURCE   /* ask glibc <stdio.h> for obstack_vprintf */\n#endif\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>    /* for obstack_vprintf */\n#include <string.h>\n#include <obstack.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nstatic int\ngmp_obstack_memory (struct obstack *ob, const char *ptr, size_t len)\n{\n  obstack_grow (ob, ptr, len);\n  return len;  \n}\n\nstatic int\ngmp_obstack_reps (struct obstack *ob, int c, int reps)\n{\n  obstack_blank (ob, reps);\n  memset ((char *) obstack_next_free(ob) - reps, c, reps);\n  return reps;\n}\n\nconst struct doprnt_funs_t  __gmp_obstack_printf_funs = {\n  (doprnt_format_t) obstack_vprintf,\n  (doprnt_memory_t) gmp_obstack_memory,\n  (doprnt_reps_t)   gmp_obstack_reps\n};\n\n#endif /* HAVE_OBSTACK_VPRINTF */\n"
  },
  {
    "path": "printf/obvprintf.c",
    "content": "/* gmp_obstack_vprintf -- formatted output to an obstack.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_OBSTACK_VPRINTF\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <obstack.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\ngmp_obstack_vprintf (struct obstack *ob, const char *fmt, va_list ap)\n{\n  ASSERT (! MEM_OVERLAP_P (obstack_base(ob), obstack_object_size(ob),\n                           fmt, strlen(fmt)+1));\n\n  return __gmp_doprnt (&__gmp_obstack_printf_funs, ob, fmt, ap);\n}\n\n#endif /* HAVE_OBSTACK_VPRINTF */\n"
  },
  {
    "path": "printf/printf.c",
    "content": "/* gmp_printf -- formatted output.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\n#if HAVE_STDARG\ngmp_printf (const char *fmt, ...)\n#else\ngmp_printf (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n  int      ret;\n\n#if HAVE_STDARG\n  va_start (ap, fmt);\n#else\n  const char *fmt;\n  va_start (ap);\n  fmt = va_arg (ap, const char *);\n#endif\n\n  ret = __gmp_doprnt (&__gmp_fprintf_funs, stdout, fmt, ap);\n  va_end (ap);\n  return ret;\n}\n"
  },
  {
    "path": "printf/printffuns.c",
    "content": "/* __gmp_fprintf_funs -- support for formatted output to FILEs.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* SunOS 4 stdio.h doesn't provide a prototype for this */\n#if ! HAVE_DECL_VFPRINTF\nint vfprintf(FILE *fp, const char *fmt, va_list ap);\n#endif\n\n\nstatic int\ngmp_fprintf_memory (FILE *fp, const char *str, size_t len)\n{\n  return fwrite (str, 1, len, fp);\n}\n\n/* glibc putc is a function, at least when it's in multi-threaded mode or\n   some such, so fwrite chunks instead of making many calls. */\nstatic int\ngmp_fprintf_reps (FILE *fp, int c, int reps)\n{\n  char  buf[256];\n  int   i, piece, ret;\n  ASSERT (reps >= 0);\n\n  memset (buf, c, MIN (reps, sizeof (buf)));\n  for (i = reps; i > 0; i -= sizeof (buf))\n    {\n      piece = MIN (i, sizeof (buf));\n      ret = fwrite (buf, 1, piece, fp);\n      if (ret == -1)\n        return ret;\n      ASSERT (ret == piece);\n    }\n\n  return reps;\n}\n\nconst struct doprnt_funs_t  __gmp_fprintf_funs = {\n  (doprnt_format_t) vfprintf,\n  (doprnt_memory_t) gmp_fprintf_memory,\n  (doprnt_reps_t)   gmp_fprintf_reps,\n};\n"
  },
  {
    "path": "printf/repl-vsnprintf.c",
    "content": "/* __gmp_replacement_vsnprintf -- for systems which don't have vsnprintf, or\n   only have a broken one.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if ! HAVE_VSNPRINTF   /* only need this file if we don't have vsnprintf */\n\n#ifndef _GNU_SOURCE\n#define _GNU_SOURCE    /* for strnlen prototype */\n#endif\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <ctype.h>     /* for isdigit */\n#include <stddef.h>    /* for ptrdiff_t */\n#include <string.h>\n#include <stdio.h>     /* for NULL */\n#include <stdlib.h>\n\n#if HAVE_FLOAT_H\n#include <float.h>     /* for DBL_MAX_10_EXP etc */\n#endif\n\n#if HAVE_INTTYPES_H\n# include <inttypes.h> /* for intmax_t */\n#else\n# if HAVE_STDINT_H\n#  include <stdint.h>\n# endif\n#endif\n\n#if HAVE_SYS_TYPES_H\n#include <sys/types.h> /* for quad_t */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Autoconf notes that AIX 4.3 has a broken strnlen, but fortunately it\n   doesn't affect us since __gmp_replacement_vsnprintf is not required on\n   that system.  */\n#if ! HAVE_STRNLEN\nstatic size_t\nstrnlen (const char *s, size_t n)\n{\n  size_t  i;\n  for (i = 0; i < n; i++)\n    if (s[i] == '\\0')\n      break;\n  return i;\n}\n#endif\n\n\n/* The approach here is to parse the fmt string, and decide how much space\n   it requires, then use vsprintf into a big enough buffer.  The space\n   calculated isn't an exact amount, but it's certainly no less than\n   required.\n\n   This code was inspired by GNU libiberty/vasprintf.c but we support more\n   datatypes, when available.\n\n   mingw32 - doesn't have vsnprintf, it seems.  Because gcc is used a full\n       set of types are available, but \"long double\" is just a plain IEEE\n       64-bit \"double\" and LDBL_MAX_EXP_10 is correspondingly defined, so we\n       avoid the big 15-bit exponent estimate.  */\n\nint\n__gmp_replacement_vsnprintf (char *buf, size_t buf_size,\n                             const char *orig_fmt, va_list orig_ap)\n{\n  va_list     ap;\n  const char  *fmt;\n  size_t      total_width, integer_sizeof, floating_sizeof, len;\n  char        fchar, type;\n  int         width, prec, seen_prec, double_digits, long_double_digits;\n  int         *value;\n\n  /* preserve orig_ap for use after size estimation */\n  va_copy (ap, orig_ap);\n\n  fmt = orig_fmt;\n  total_width = strlen (fmt) + 1;   /* 1 extra for the '\\0' */\n\n  integer_sizeof = sizeof (long);\n#if HAVE_LONG_LONG\n  integer_sizeof = MAX (integer_sizeof, sizeof (long long));\n#endif\n#if HAVE_QUAD_T\n  integer_sizeof = MAX (integer_sizeof, sizeof (quad_t));\n#endif\n\n  floating_sizeof = sizeof (double);\n#if HAVE_LONG_DOUBLE\n  floating_sizeof = MAX (floating_sizeof, sizeof (long double));\n#endif\n\n  /* IEEE double or VAX G floats have an 11 bit exponent, so the default is\n     a maximum 308 decimal digits.  VAX D floats have only an 8 bit\n     exponent, but we don't bother trying to detect that directly.  */\n  double_digits = 308;\n#ifdef DBL_MAX_10_EXP\n  /* but in any case prefer a value the compiler says */\n  double_digits = DBL_MAX_10_EXP;\n#endif\n\n  /* IEEE 128-bit quad, Intel 80-bit temporary, or VAX H floats all have 15\n     bit exponents, so the default is a maximum 4932 decimal digits.  */\n  long_double_digits = 4932;\n  /* but if double == long double, then go with that size */\n#if HAVE_LONG_DOUBLE\n  if (sizeof (double) == sizeof (long double))\n    long_double_digits = double_digits;\n#endif\n#ifdef LDBL_MAX_10_EXP\n  /* but in any case prefer a value the compiler says */\n  long_double_digits = LDBL_MAX_10_EXP;\n#endif\n\n  for (;;)\n    {\n      fmt = strchr (fmt, '%');\n      if (fmt == NULL)\n        break;\n      fmt++;\n\n      type = '\\0';\n      width = 0;\n      prec = 6;\n      seen_prec = 0;\n      value = &width;\n\n      for (;;)\n        {\n          fchar = *fmt++;\n          switch (fchar) {\n\n          case 'c':\n            /* char, already accounted for by strlen(fmt) */\n            goto next;\n\n          case 'd':\n          case 'i':\n          case 'o':\n          case 'x':\n          case 'X':\n          case 'u':\n            /* at most 3 digits per byte in hex, dec or octal, plus a sign */\n            total_width += 3 * integer_sizeof + 1;\n\n            switch (type) {\n            case 'j':\n              /* Let's assume uintmax_t is the same size as intmax_t. */\n#if HAVE_INTMAX_T\n              (void) va_arg (ap, intmax_t);\n#else\n              ASSERT_FAIL (intmax_t not available);\n#endif\n              break;\n            case 'l':\n              (void) va_arg (ap, long);\n              break;\n            case 'L':\n#if HAVE_LONG_LONG\n              (void) va_arg (ap, long long);\n#else\n              ASSERT_FAIL (long long not available);\n#endif\n              break;\n            case 'q':\n              /* quad_t is probably the same as long long, but let's treat\n                 it separately just to be sure.  Also let's assume u_quad_t\n                 will be the same size as quad_t.  */\n#if HAVE_QUAD_T\n              (void) va_arg (ap, quad_t);\n#else\n              ASSERT_FAIL (quad_t not available);\n#endif\n              break;\n            case 't':\n              (void) va_arg (ap, ptrdiff_t);\n              break;\n            case 'z':\n              (void) va_arg (ap, size_t);\n              break;\n            default:\n              /* default is an \"int\", and this includes h=short and hh=char\n                 since they're promoted to int in a function call */\n              (void) va_arg (ap, int);\n              break;\n            }\n            goto next;\n\n          case 'E':\n          case 'e':\n          case 'G':\n          case 'g':\n            /* Requested decimals, sign, point and e, plus an overestimate\n               of exponent digits (the assumption is all the float is\n               exponent!).  */\n            total_width += prec + 3 + floating_sizeof * 3;\n            if (type == 'L')\n              {\n#if HAVE_LONG_DOUBLE\n                (void) va_arg (ap, long double);\n#else\n                ASSERT_FAIL (long double not available);\n#endif\n              }\n            else\n\t      (void) va_arg (ap, double);\n            break;\n\n          case 'f':\n            /* Requested decimals, sign and point, and a margin for error,\n               then add the maximum digits that can be in the integer part,\n               based on the maximum exponent value. */\n            total_width += prec + 2 + 10;\n            if (type == 'L')\n              {\n#if HAVE_LONG_DOUBLE\n                (void) va_arg (ap, long double);\n                total_width += long_double_digits;\n#else\n                ASSERT_FAIL (long double not available);\n#endif\n              }\n            else\n              {\n                (void) va_arg (ap, double);\n                total_width += double_digits;\n              }\n            break;\n\n          case 'h':  /* short or char */\n          case 'j':  /* intmax_t */\n          case 'L':  /* long long or long double */\n          case 'q':  /* quad_t */\n          case 't':  /* ptrdiff_t */\n          set_type:\n            type = fchar;\n            break;\n\n          case 'l':\n            /* long or long long */\n            if (type != 'l')\n              goto set_type;\n            type = 'L';   /* \"ll\" means \"L\" */\n            break;\n\n          case 'n':\n            /* bytes written, no output as such */\n            (void) va_arg (ap, void *);\n            goto next;\n\n          case 's':\n            /* If no precision was given, then determine the string length\n               and put it there, to be added to the total under \"next\".  If\n               a precision was given then that's already the maximum from\n               this field, but see whether the string is shorter than that,\n               in case the limit was very big.  */\n            {\n              const char  *s = va_arg (ap, const char *);\n              prec = (seen_prec ? strnlen (s, prec) : strlen (s));\n            }\n            goto next;\n\n          case 'p':\n            /* pointer, let's assume at worst it's octal with some padding */\n            (void) va_arg (ap, const void *);\n            total_width += 3 * sizeof (void *) + 16;\n            goto next;\n\n          case '%':\n            /* literal %, already accounted for by strlen(fmt) */\n            goto next;\n\n          case '#':\n            /* showbase, at most 2 for \"0x\" */\n            total_width += 2;\n            break;\n\n          case '+':\n          case ' ':\n            /* sign, already accounted for under numerics */\n            break;\n\n          case '-':\n            /* left justify, no effect on total width */\n            break;\n\n          case '.':\n            seen_prec = 1;\n            value = &prec;\n            break;\n\n          case '*':\n            {\n              /* negative width means left justify which can be ignored,\n                 negative prec would be invalid, just use absolute value */\n              int n = va_arg (ap, int);\n              *value = ABS (n);\n            }\n            break;\n\n          case '0': case '1': case '2': case '3': case '4':\n          case '5': case '6': case '7': case '8': case '9':\n            /* process all digits to form a value */\n            {\n              int  n = 0;\n              do {\n                n = n * 10 + (fchar-'0');\n                fchar = *fmt++;\n              } while (isascii (fchar) && isdigit (fchar));\n              fmt--; /* unget the non-digit */\n              *value = n;\n            }\n            break;\n\n          default:\n            /* incomplete or invalid % sequence */\n            ASSERT (0);\n            goto next;\n          }\n        }\n\n    next:\n      total_width += width;\n      total_width += prec;\n    }\n\n  if (total_width <= buf_size)\n    {\n      vsprintf (buf, orig_fmt, orig_ap);\n      len = strlen (buf);\n    }\n  else\n    {\n      char  *s;\n\n      s = __GMP_ALLOCATE_FUNC_TYPE (total_width, char);\n      vsprintf (s, orig_fmt, orig_ap);\n      len = strlen (s);\n      if (buf_size != 0)\n        {\n          size_t  copylen = MIN (len, buf_size-1);\n          memcpy (buf, s, copylen);\n          buf[copylen] = '\\0';\n        }\n      (*__gmp_free_func) (s, total_width);\n    }\n\n  /* If total_width was somehow wrong then chances are we've already\n     clobbered memory, but maybe this check will still work.  */\n  ASSERT_ALWAYS (len < total_width);\n\n  return len;\n}\n\n#endif /* ! HAVE_VSNPRINTF */\n"
  },
  {
    "path": "printf/snprintf.c",
    "content": "/* gmp_snprintf -- formatted output to an fixed size buffer.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <string.h>    /* for strlen */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\n#if HAVE_STDARG\ngmp_snprintf (char *buf, size_t size, const char *fmt, ...)\n#else\ngmp_snprintf (va_alist)\n     va_dcl\n#endif\n{\n  struct gmp_snprintf_t d;\n  va_list  ap;\n  int      ret;\n\n#if HAVE_STDARG\n  va_start (ap, fmt);\n  d.buf = buf;\n  d.size = size;\n\n#else\n  const char *fmt;\n  va_start (ap);\n  d.buf = va_arg (ap, char *);\n  d.size = va_arg (ap, size_t);\n  fmt = va_arg (ap, const char *);\n#endif\n\n  ASSERT (! MEM_OVERLAP_P (buf, size, fmt, strlen(fmt)+1));\n\n  ret = __gmp_doprnt (&__gmp_snprintf_funs, &d, fmt, ap);\n  va_end (ap);\n  return ret;\n}\n"
  },
  {
    "path": "printf/snprntffuns.c",
    "content": "/* __gmp_snprintf_funs -- support for gmp_snprintf and gmp_vsnprintf.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n#if ! HAVE_VSNPRINTF\n#define vsnprintf  __gmp_replacement_vsnprintf\n#endif\n\n\n/* glibc 2.0.x vsnprintf returns either -1 or size-1 for an overflow, with\n   no indication how big the output would have been.  It's necessary to\n   re-run to determine that size.\n\n   \"size-1\" would mean sucess from a C99 vsnprintf, and the re-run is\n   unnecessary in this case, but we don't bother to try to detect what sort\n   of vsnprintf we've got.  size-1 should occur rarely in normal\n   circumstances.\n\n   vsnprintf might trash it's given ap (it does for instance in glibc 2.1.3\n   on powerpc), so copy it in case we need to use it to probe for the size\n   output that would have been produced.  Note there's no need to preserve\n   it for our callers, just for ourselves.  */\n\nstatic int\ngmp_snprintf_format (struct gmp_snprintf_t *d, const char *fmt,\n                     va_list orig_ap)\n{\n  int      ret, step, alloc, avail;\n  va_list  ap;\n  char     *p;\n\n  ASSERT (d->size >= 0);\n\n  avail = d->size;\n  if (avail > 1)\n    {\n      va_copy (ap, orig_ap);\n      ret = vsnprintf (d->buf, avail, fmt, ap);\n      if (ret == -1)\n        {\n          ASSERT (strlen (d->buf) == avail-1);\n          ret = avail-1;\n        }\n\n      step = MIN (ret, avail-1);\n      d->size -= step;\n      d->buf += step;\n\n      if (ret != avail-1)\n        return ret;\n\n      /* probably glibc 2.0.x truncated output, probe for actual size */\n      alloc = MAX (128, ret);\n    }\n  else\n    {\n      /* no space to write anything, just probe for size */\n      alloc = 128;\n    }\n\n  do\n    {\n      alloc *= 2;\n      p = __GMP_ALLOCATE_FUNC_TYPE (alloc, char);\n      va_copy (ap, orig_ap);\n      ret = vsnprintf (p, alloc, fmt, ap);\n      (*__gmp_free_func) (p, alloc);\n    }\n  while (ret == alloc-1 || ret == -1);\n\n  return ret;\n}\n\nstatic int\ngmp_snprintf_memory (struct gmp_snprintf_t *d, const char *str, size_t len)\n{\n  size_t n;\n\n  ASSERT (d->size >= 0);\n\n  if (d->size > 1)\n    {\n      n = MIN (d->size-1, len);\n      memcpy (d->buf, str, n);\n      d->buf += n;\n      d->size -= n;\n    }\n  return len;\n}\n\nstatic int\ngmp_snprintf_reps (struct gmp_snprintf_t *d, int c, int reps)\n{\n  size_t n;\n\n  ASSERT (reps >= 0);\n  ASSERT (d->size >= 0);\n\n  if (d->size > 1)\n    {\n      n = MIN (d->size-1, reps);\n      memset (d->buf, c, n);\n      d->buf += n;\n      d->size -= n;\n    }\n  return reps;\n}\n\nstatic int\ngmp_snprintf_final (struct gmp_snprintf_t *d)\n{\n  if (d->size >= 1)\n    d->buf[0] = '\\0';\n  return 0;\n}\n\nconst struct doprnt_funs_t  __gmp_snprintf_funs = {\n  (doprnt_format_t) gmp_snprintf_format,\n  (doprnt_memory_t) gmp_snprintf_memory,\n  (doprnt_reps_t)   gmp_snprintf_reps,\n  (doprnt_final_t)  gmp_snprintf_final\n};\n"
  },
  {
    "path": "printf/sprintf.c",
    "content": "/* gmp_sprintf -- formatted output to an unrestricted string.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <string.h>    /* for strlen */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\n#if HAVE_STDARG\ngmp_sprintf (char *buf, const char *fmt, ...)\n#else\ngmp_sprintf (va_alist)\n     va_dcl\n#endif\n{\n#if WANT_ASSERT\n  int      fmtlen = strlen(fmt);\n#endif\n  va_list  ap;\n  int      ret;\n\n#if HAVE_STDARG\n  va_start (ap, fmt);\n#else\n  char       *buf;\n  const char *fmt;\n  va_start (ap);\n  buf = va_arg (ap, char *);\n  fmt = va_arg (ap, const char *);\n#endif\n\n  ret = __gmp_doprnt (&__gmp_sprintf_funs, &buf, fmt, ap);\n  va_end (ap);\n\n  ASSERT (! MEM_OVERLAP_P (buf, strlen(buf)+1, fmt, fmtlen+1));\n\n  return ret;\n}\n"
  },
  {
    "path": "printf/sprintffuns.c",
    "content": "/* __gmp_sprintf_funs -- support for gmp_sprintf and gmp_vsprintf.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* The data parameter \"bufp\" points to a \"char *buf\" which is the next\n   character to be written, having started as the destination from the\n   application.  This is then increased each time output is produced.  */\n\n\n/* If vsprintf returns -1 then pass it upwards.  It doesn't matter that\n   \"*bufp\" is ruined in this case, since gmp_doprint will bail out\n   immediately anyway.  */\nstatic int\ngmp_sprintf_format (char **bufp, const char *fmt, va_list ap)\n{\n  char  *buf = *bufp;\n  int   ret;\n  vsprintf (buf, fmt, ap);\n  ret = strlen (buf);\n  *bufp = buf + ret;\n  return ret;  \n}\n\nstatic int\ngmp_sprintf_memory (char **bufp, const char *str, size_t len)\n{\n  char  *buf = *bufp;\n  *bufp = buf + len;\n  memcpy (buf, str, len);\n  return len;  \n}\n\nstatic int\ngmp_sprintf_reps (char **bufp, int c, int reps)\n{\n  char  *buf = *bufp;\n  ASSERT (reps >= 0);\n  *bufp = buf + reps;\n  memset (buf, c, reps);\n  return reps;  \n}\n\nstatic int\ngmp_sprintf_final (char **bufp, int c, int reps)\n{\n  char  *buf = *bufp;\n  *buf = '\\0';\n  return 0;\n}\n\nconst struct doprnt_funs_t  __gmp_sprintf_funs = {\n  (doprnt_format_t) gmp_sprintf_format,\n  (doprnt_memory_t) gmp_sprintf_memory,\n  (doprnt_reps_t)   gmp_sprintf_reps,\n  (doprnt_final_t)  gmp_sprintf_final\n};\n"
  },
  {
    "path": "printf/vasprintf.c",
    "content": "/* gmp_vasprintf -- formatted output to an allocated space.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if ! HAVE_VSNPRINTF\n#define vsnprintf  __gmp_replacement_vsnprintf\n#endif\n\n\n/* vasprintf isn't used since we prefer all GMP allocs to go through\n   __gmp_allocate_func, and in particular we don't want the -1 return from\n   vasprintf for out-of-memory, instead __gmp_allocate_func should handle\n   that.  Using vsnprintf unfortunately means we might have to re-run it if\n   our current space is insufficient.\n\n   The initial guess for the needed space is an arbitrary 256 bytes.  If\n   that (and any extra GMP_ASPRINTF_T_NEED might give) isn't enough then an\n   ISO C99 standard vsnprintf will tell us what we really need.\n\n   GLIBC 2.0.x vsnprintf returns either -1 or space-1 to indicate overflow,\n   without giving any indication how much is really needed.  In this case\n   keep trying with double the space each time.\n\n   A return of space-1 is success on a C99 vsnprintf, but we're not\n   bothering to identify which style vsnprintf we've got, so just take the\n   pessimistic option and assume it's glibc 2.0.x.\n\n   Notice the use of ret+2 for the new space in the C99 case.  This ensures\n   the next vsnprintf return value will be space-2, which is unambiguously\n   successful.  But actually GMP_ASPRINTF_T_NEED() will realloc to even\n   bigger than that ret+2.\n\n   vsnprintf might trash it's given ap, so copy it in case we need to use it\n   more than once.  See comments with gmp_snprintf_format.  */\n\nstatic int\ngmp_asprintf_format (struct gmp_asprintf_t *d, const char *fmt,\n                     va_list orig_ap)\n{\n  int      ret;\n  va_list  ap;\n  size_t   space = 256;\n\n  for (;;)\n    {\n      GMP_ASPRINTF_T_NEED (d, space);\n      space = d->alloc - d->size;\n      va_copy (ap, orig_ap);\n      ret = vsnprintf (d->buf + d->size, space, fmt, ap);\n      if (ret == -1)\n        {\n          ASSERT (strlen (d->buf + d->size) == space-1);\n          ret = space-1;\n        }\n\n      /* done if output fits in our space */\n      if (ret < space-1)\n        break;\n\n      if (ret == space-1)\n        space *= 2;     /* possible glibc 2.0.x, so double */\n      else\n        space = ret+2;  /* C99, so now know space required */\n    }\n\n  d->size += ret;\n  return ret;\n}\n\nconst struct doprnt_funs_t  __gmp_asprintf_funs = {\n  (doprnt_format_t) gmp_asprintf_format,\n  (doprnt_memory_t) __gmp_asprintf_memory,\n  (doprnt_reps_t)   __gmp_asprintf_reps,\n  (doprnt_final_t)  __gmp_asprintf_final\n};\n\nint\ngmp_vasprintf (char **result, const char *fmt, va_list ap)\n{\n  struct gmp_asprintf_t  d;\n  GMP_ASPRINTF_T_INIT (d, result);\n  return __gmp_doprnt (&__gmp_asprintf_funs, &d, fmt, ap);\n}\n"
  },
  {
    "path": "printf/vfprintf.c",
    "content": "/* gmp_vfprintf -- formatted output.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\ngmp_vfprintf (FILE *fp, const char *fmt, va_list ap)\n{\n  return __gmp_doprnt (&__gmp_fprintf_funs, fp, fmt, ap);\n}\n"
  },
  {
    "path": "printf/vprintf.c",
    "content": "/* gmp_vprintf -- formatted output.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\ngmp_vprintf (const char *fmt, va_list ap)\n{\n  return __gmp_doprnt (&__gmp_fprintf_funs, stdout, fmt, ap);\n}\n"
  },
  {
    "path": "printf/vsnprintf.c",
    "content": "/* gmp_vsnprintf -- formatted output to an fixed size buffer.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <string.h>    /* for strlen */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\ngmp_vsnprintf (char *buf, size_t size, const char *fmt, va_list ap)\n{\n  struct gmp_snprintf_t d;\n\n  ASSERT (! MEM_OVERLAP_P (buf, size, fmt, strlen(fmt)+1));\n\n  d.buf = buf;\n  d.size = size;\n  return __gmp_doprnt (&__gmp_snprintf_funs, &d, fmt, ap);\n}\n"
  },
  {
    "path": "printf/vsprintf.c",
    "content": "/* gmp_vsprintf -- formatted output to an unrestricted string.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <string.h>    /* for strlen */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\ngmp_vsprintf (char *buf, const char *fmt, va_list ap)\n{\n#if WANT_ASSERT\n  int  fmtlen = strlen(fmt);\n#endif\n  int  ret;\n\n  ret = __gmp_doprnt (&__gmp_sprintf_funs, &buf, fmt, ap);\n\n  ASSERT (! MEM_OVERLAP_P (buf, strlen(buf)+1, fmt, fmtlen+1));\n\n  return ret;\n}\n"
  },
  {
    "path": "randbui.c",
    "content": "/* gmp_urandomb_ui -- random bits returned in a ulong.\n\nCopyright 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Currently bits>=BITS_PER_ULONG is quietly truncated to BITS_PER_ULONG,\n   maybe this should raise an exception or something.  */\n\nmpir_ui\ngmp_urandomb_ui (gmp_randstate_ptr rstate, mpir_ui bits)\n{\n  mp_limb_t  a[LIMBS_PER_UI];\n\n  /* start with zeros, since if bits==0 then _gmp_rand will store nothing at\n     all, or if bits <= GMP_NUMB_BITS then it will store only a[0] */\n  a[0] = 0;\n#if LIMBS_PER_UI > 1\n  a[1] = 0;\n#endif\n\n  _gmp_rand (a, rstate, MIN (bits, BITS_PER_UI));\n\n#if LIMBS_PER_UI == 1\n  return a[0];\n#else\n  return a[0] | (a[1] << GMP_NUMB_BITS);\n#endif\n}\n"
  },
  {
    "path": "randclr.c",
    "content": "/* gmp_randclear (state) -- Clear and deallocate random state STATE.\n\nCopyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\ngmp_randclear (gmp_randstate_t rstate)\n{\n  (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randclear_fn) (rstate);\n}\n"
  },
  {
    "path": "randdef.c",
    "content": "/* gmp_randinit_default -- initialize a random state with a default algorithm.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\ngmp_randinit_default (gmp_randstate_t rstate)\n{\n  gmp_randinit_mt (rstate);\n}\n"
  },
  {
    "path": "randiset.c",
    "content": "/* gmp_randinit_set -- initialize with a copy of another gmp_randstate_t.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nvoid\ngmp_randinit_set (gmp_randstate_ptr dst, gmp_randstate_srcptr src)\n{\n  (*((gmp_randfnptr_t *) RNG_FNPTR (src))->randiset_fn) (dst, src);\n}\n"
  },
  {
    "path": "randlc2s.c",
    "content": "/* gmp_randinit_lc_2exp_size -- initialize a random state with a linear\n   congruential generator of a requested size.\n\nCopyright 1999, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h> /* for NULL */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Array of LC-schemes, ordered in increasing order of the first\n   member (the 'm2exp' value).  The end of the array is indicated with\n   an entry containing all zeros.  */\n\n/* All multipliers are in the range 0.01*m and 0.99*m, and are\ncongruent to 5 (mod 8).\nThey all pass the spectral test with Vt >= 2^(30/t) and merit >= 1.\n(Up to and including 196 bits, merit is >= 3.)  */\n\nstruct __gmp_rand_lc_scheme_struct\n{\n  unsigned long int m2exp;\t/* Modulus is 2 ^ m2exp. */\n  const char *astr;\t\t/* Multiplier in string form. */\n  mpir_ui c;\t\t/* Addend. */\n};\n\nstatic const struct __gmp_rand_lc_scheme_struct __gmp_rand_lc_scheme[] =\n{\n  {32, \"29CF535\", \t     1},\n  {33, \"51F666D\", \t     1},\n  {34, \"A3D73AD\", \t     1},\n  {35, \"147E5B85\", \t     1},\n  {36, \"28F725C5\", \t     1},\n  {37, \"51EE3105\", \t     1},\n  {38, \"A3DD5CDD\", \t     1},\n  {39, \"147AF833D\", \t     1},\n  {40, \"28F5DA175\", \t     1},\n  {56, \"AA7D735234C0DD\",  1},\n  {64, \"BAECD515DAF0B49D\", 1},\n  {100, \"292787EBD3329AD7E7575E2FD\", 1},\n  {128, \"48A74F367FA7B5C8ACBB36901308FA85\", 1},\n  {156, \"78A7FDDDC43611B527C3F1D760F36E5D7FC7C45\", 1},\n  {196, \"41BA2E104EE34C66B3520CE706A56498DE6D44721E5E24F5\", 1},\n  {200, \"4E5A24C38B981EAFE84CD9D0BEC48E83911362C114F30072C5\", 1},\n  {256, \"AF66BA932AAF58A071FD8F0742A99A0C76982D648509973DB802303128A14CB5\", 1},\n  {0, NULL, 0}\t\t\t/* End of array. */\n};\n\nint\ngmp_randinit_lc_2exp_size (gmp_randstate_t rstate, mp_bitcnt_t size)\n{\n  const struct __gmp_rand_lc_scheme_struct *sp;\n  mpz_t a;\n\n  /* Pick a scheme.  */\n  for (sp = __gmp_rand_lc_scheme; sp->m2exp != 0; sp++)\n    if (sp->m2exp / 2 >= size)\n      goto found;\n  return 0;\n\n found:\n  /* Install scheme.  */\n  mpz_init_set_str (a, sp->astr, 16);\n  gmp_randinit_lc_2exp (rstate, a, sp->c, sp->m2exp);\n  mpz_clear (a);\n  return 1;\n}\n"
  },
  {
    "path": "randlc2x.c",
    "content": "/* Linear Congruential pseudo-random number generator functions.\n\nCopyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* State structure for LC, the RNG_STATE() pointer in a gmp_randstate_t.\n\n   _mp_seed holds the current seed value, in the range 0 to 2^m2exp-1.\n   SIZ(_mp_seed) is fixed at BITS_TO_LIMBS(_mp_m2exp) and the value is\n   padded with high zero limbs if necessary.  ALLOC(_mp_seed) is the current\n   size of PTR(_mp_seed) in the usual way.  There only needs to be\n   BITS_TO_LIMBS(_mp_m2exp) allocated, but the mpz functions in the\n   initialization and seeding end up making it a bit more than this.\n\n   _mp_a is the \"a\" multiplier, in the range 0 to 2^m2exp-1.  SIZ(_mp_a) is\n   the size of the value in the normal way for an mpz_t, except that a value\n   of zero is held with SIZ(_mp_a)==1 and PTR(_mp_a)[0]==0.  This makes it\n   easy to call mpn_mul, and the case of a==0 is highly un-random and not\n   worth any trouble to optimize.\n\n   {_cp,_cn} is the \"c\" addend.  Normally _cn is 1, but when nails are in\n   use a ulong can be bigger than one limb, and in this case _cn is 2 if\n   necessary.  c==0 is stored as _cp[0]==0 and _cn==1, which makes it easy\n   to call __GMPN_ADD.  c==0 is fairly un-random so isn't worth optimizing.\n\n   _mp_m2exp gives the modulus, namely 2^m2exp.  We demand m2exp>=1, since\n   m2exp==0 would mean no bits at all out of each iteration, which makes no\n   sense.  */\n\ntypedef struct {\n  mpz_t          _mp_seed;\n  mpz_t          _mp_a;\n  mp_size_t      _cn;\n  mp_limb_t      _cp[LIMBS_PER_UI];\n  mpir_ui  _mp_m2exp;\n} gmp_rand_lc_struct;\n\n\n/* lc (rp, state) -- Generate next number in LC sequence.  Return the\n   number of valid bits in the result.  Discards the lower half of the\n   result.  */\n\nstatic mpir_ui\nlc (mp_ptr rp, gmp_randstate_t rstate)\n{\n  mp_ptr tp, seedp, ap;\n  mp_size_t ta;\n  mp_size_t tn, seedn, an;\n  mpir_ui m2exp;\n  mpir_ui bits;\n  int cy;\n  mp_size_t xn;\n  gmp_rand_lc_struct *p;\n  TMP_DECL;\n\n  p = (gmp_rand_lc_struct *) RNG_STATE (rstate);\n\n  m2exp = p->_mp_m2exp;\n\n  seedp = PTR (p->_mp_seed);\n  seedn = SIZ (p->_mp_seed);\n\n  ap = PTR (p->_mp_a);\n  an = SIZ (p->_mp_a);\n\n  /* Allocate temporary storage.  Let there be room for calculation of\n     (A * seed + C) % M, or M if bigger than that.  */\n\n  TMP_MARK;\n\n  ta = an + seedn + 1;\n  tn = BITS_TO_LIMBS (m2exp);\n  if (ta <= tn) /* that is, if (ta < tn + 1) */\n    {\n      mp_size_t tmp = an + seedn;\n      ta = tn + 1;\n      tp = (mp_ptr) TMP_ALLOC (ta * BYTES_PER_MP_LIMB);\n      MPN_ZERO (&tp[tmp], ta - tmp); /* mpn_mul won't zero it out.  */\n    }\n  else\n    tp = (mp_ptr) TMP_ALLOC (ta * BYTES_PER_MP_LIMB);\n\n  /* t = a * seed.  NOTE: an is always > 0; see initialization.  */\n  ASSERT (seedn >= an && an > 0);\n  mpn_mul (tp, seedp, seedn, ap, an);\n\n  /* t = t + c.  NOTE: tn is always >= p->_cn (precondition for __GMPN_ADD);\n     see initialization.  */\n  ASSERT (tn >= p->_cn);\n  __GMPN_ADD (cy, tp, tp, tn, p->_cp, p->_cn);\n\n  /* t = t % m */\n  tp[m2exp / GMP_NUMB_BITS] &= (CNST_LIMB (1) << m2exp % GMP_NUMB_BITS) - 1;\n\n  /* Save result as next seed.  */\n  MPN_COPY (PTR (p->_mp_seed), tp, tn);\n\n  /* Discard the lower m2exp/2 of the result.  */\n  bits = m2exp / 2;\n  xn = bits / GMP_NUMB_BITS;\n\n  tn -= xn;\n  if (tn > 0)\n    {\n      unsigned int cnt = bits % GMP_NUMB_BITS;\n      if (cnt != 0)\n\t{\n\t  mpn_rshift (tp, tp + xn, tn, cnt);\n\t  MPN_COPY_INCR (rp, tp, xn + 1);\n\t}\n      else\t\t\t/* Even limb boundary.  */\n\tMPN_COPY_INCR (rp, tp + xn, tn);\n    }\n\n  TMP_FREE;\n\n  /* Return number of valid bits in the result.  */\n  return (m2exp + 1) / 2;\n}\n\n\n/* Obtain a sequence of random numbers.  */\nstatic void\nrandget_lc (gmp_randstate_t rstate, mp_ptr rp, mpir_ui nbits)\n{\n  mpir_ui rbitpos;\n  int chunk_nbits;\n  mp_ptr tp;\n  mp_size_t tn;\n  gmp_rand_lc_struct *p;\n  TMP_DECL;\n\n  p = (gmp_rand_lc_struct *) RNG_STATE (rstate);\n\n  TMP_MARK;\n\n  chunk_nbits = p->_mp_m2exp / 2;\n  tn = BITS_TO_LIMBS (chunk_nbits);\n\n  tp = (mp_ptr) TMP_ALLOC (tn * BYTES_PER_MP_LIMB);\n\n  rbitpos = 0;\n  while (rbitpos + chunk_nbits <= nbits)\n    {\n      mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;\n\n      if (rbitpos % GMP_NUMB_BITS != 0)\n\t{\n\t  mp_limb_t savelimb, rcy;\n\t  /* Target of new chunk is not bit aligned.  Use temp space\n\t     and align things by shifting it up.  */\n\t  lc (tp, rstate);\n\t  savelimb = r2p[0];\n\t  rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);\n\t  r2p[0] |= savelimb;\n\t  /* bogus */\n\t  if ((chunk_nbits % GMP_NUMB_BITS + rbitpos % GMP_NUMB_BITS)\n\t      > GMP_NUMB_BITS)\n\t    r2p[tn] = rcy;\n\t}\n      else\n\t{\n\t  /* Target of new chunk is bit aligned.  Let `lc' put bits\n\t     directly into our target variable.  */\n\t  lc (r2p, rstate);\n\t}\n      rbitpos += chunk_nbits;\n    }\n\n  /* Handle last [0..chunk_nbits) bits.  */\n  if (rbitpos != nbits)\n    {\n      mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;\n      int last_nbits = nbits - rbitpos;\n      tn = BITS_TO_LIMBS (last_nbits);\n      lc (tp, rstate);\n      if (rbitpos % GMP_NUMB_BITS != 0)\n\t{\n\t  mp_limb_t savelimb, rcy;\n\t  /* Target of new chunk is not bit aligned.  Use temp space\n\t     and align things by shifting it up.  */\n\t  savelimb = r2p[0];\n\t  rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);\n\t  r2p[0] |= savelimb;\n\t  if (rbitpos + tn * GMP_NUMB_BITS - rbitpos % GMP_NUMB_BITS < nbits)\n\t    r2p[tn] = rcy;\n\t}\n      else\n\t{\n\t  MPN_COPY (r2p, tp, tn);\n\t}\n      /* Mask off top bits if needed.  */\n      if (nbits % GMP_NUMB_BITS != 0)\n\trp[nbits / GMP_NUMB_BITS]\n\t  &= ~(~CNST_LIMB (0) << nbits % GMP_NUMB_BITS);\n    }\n\n  TMP_FREE;\n}\n\n\nstatic void\nrandseed_lc (gmp_randstate_t rstate, mpz_srcptr seed)\n{\n  gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);\n  mpz_ptr seedz = p->_mp_seed;\n  mp_size_t seedn = BITS_TO_LIMBS (p->_mp_m2exp);\n\n  /* Store p->_mp_seed as an unnormalized integer with size enough\n     for numbers up to 2^m2exp-1.  That size can't be zero.  */\n  mpz_fdiv_r_2exp (seedz, seed, p->_mp_m2exp);\n  MPN_ZERO (&PTR (seedz)[SIZ (seedz)], seedn - SIZ (seedz));\n  SIZ (seedz) = seedn;\n}\n\n\nstatic void\nrandclear_lc (gmp_randstate_t rstate)\n{\n  gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);\n\n  mpz_clear (p->_mp_seed);\n  mpz_clear (p->_mp_a);\n  (*__gmp_free_func) (p, sizeof (gmp_rand_lc_struct));\n}\n\nstatic void randiset_lc(gmp_randstate_ptr dst, gmp_randstate_srcptr src);\n\nstatic const gmp_randfnptr_t Linear_Congruential_Generator = {\n  randseed_lc,\n  randget_lc,\n  randclear_lc,\n  randiset_lc\n};\n\nstatic void\nrandiset_lc (gmp_randstate_ptr dst, gmp_randstate_srcptr src)\n{\n  gmp_rand_lc_struct *dstp, *srcp;\n\n  srcp = (gmp_rand_lc_struct *) RNG_STATE (src);\n  dstp = (*__gmp_allocate_func) (sizeof (gmp_rand_lc_struct));\n\n  RNG_STATE (dst) = (void *) dstp;\n  RNG_FNPTR (dst) = (void *) &Linear_Congruential_Generator;\n\n  /* _mp_seed and _mp_a might be unnormalized (high zero limbs), but\n     mpz_init_set won't worry about that */\n  mpz_init_set (dstp->_mp_seed, srcp->_mp_seed);\n  mpz_init_set (dstp->_mp_a,    srcp->_mp_a);\n\n  dstp->_cn = srcp->_cn;\n\n  dstp->_cp[0] = srcp->_cp[0];\n  if (LIMBS_PER_UI > 1)\n    dstp->_cp[1] = srcp->_cp[1];\n  if (LIMBS_PER_UI > 2)  /* usually there's only 1 or 2 */\n    MPN_COPY (dstp->_cp + 2, srcp->_cp + 2, LIMBS_PER_UI - 2);\n\n  dstp->_mp_m2exp = srcp->_mp_m2exp;\n}\n\n\nvoid\ngmp_randinit_lc_2exp (gmp_randstate_t rstate,\n\t\t      mpz_srcptr a,\n\t\t      mpir_ui c,\n\t\t      mp_bitcnt_t m2exp)\n{\n  gmp_rand_lc_struct *p;\n  mp_size_t seedn = BITS_TO_LIMBS (m2exp);\n\n  ASSERT_ALWAYS (m2exp != 0);\n\n  p = __GMP_ALLOCATE_FUNC_TYPE (1, gmp_rand_lc_struct);\n  RNG_STATE (rstate) = (void *) p;\n  RNG_FNPTR (rstate) = (void *) &Linear_Congruential_Generator;\n\n  /* allocate m2exp bits of space for p->_mp_seed, and initial seed \"1\" */\n  mpz_init2 (p->_mp_seed, m2exp);\n  MPN_ZERO (PTR (p->_mp_seed), seedn);\n  SIZ (p->_mp_seed) = seedn;\n  PTR (p->_mp_seed)[0] = 1;\n\n  /* \"a\", forced to 0 to 2^m2exp-1 */\n  mpz_init (p->_mp_a);\n  mpz_fdiv_r_2exp (p->_mp_a, a, m2exp);\n\n  /* Avoid SIZ(a) == 0 to avoid checking for special case in lc().  */\n  if (SIZ (p->_mp_a) == 0)\n    {\n      SIZ (p->_mp_a) = 1;\n      PTR (p->_mp_a)[0] = CNST_LIMB (0);\n    }\n\n  MPN_SET_UI (p->_cp, p->_cn, c);\n\n  /* Internally we may discard any bits of c above m2exp.  The following\n     code ensures that __GMPN_ADD in lc() will always work.  */\n  if (seedn < p->_cn)\n    p->_cn = (p->_cp[0] != 0);\n\n  p->_mp_m2exp = m2exp;\n}\n"
  },
  {
    "path": "randmt.c",
    "content": "/* Mersenne Twister pseudo-random number generator functions.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.  */\n\n#include <stdio.h>   /* for NULL */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"randmt.h\"\n\n\n/* This code implements the Mersenne Twister pseudorandom number generator\n   by Takuji Nishimura and Makoto Matsumoto.  The buffer initialization\n   function is different in order to permit seeds greater than 2^32-1.\n\n   This file contains a special __gmp_randinit_mt_noseed which excludes the\n   seeding function from the gmp_randfnptr_t routines.  This is for use by\n   mpn_random and mpn_random2 on the global random generator.  MT seeding\n   uses mpz functions, and we don't want mpn routines dragging mpz functions\n   into the link.  */\n\n\n/* Default seed to use when the generator is not initialized.  */\n#define DEFAULT_SEED 5489 /* was 4357 */\n\n/* Tempering masks.  */\n#define MASK_1 0x9D2C5680\n#define MASK_2 0xEFC60000\n\n/* Initial state of buffer when initialized with default seed.  */\nstatic const gmp_uint_least32_t default_state[N] =\n{\n  0xD247B233,0x9E5AA8F1,0x0FFA981B,0x9DCB0980,0x74200F2B,0xA576D044,\n  0xE9F05ADF,0x1538BFF5,0x59818BBF,0xCF9E58D8,0x09FCE032,0x6A1C663F,\n  0x5116E78A,0x69B3E0FA,0x6D92D665,0xD0A8BE98,0xF669B734,0x41AC1B68,\n  0x630423F1,0x4B8D6B8A,0xC2C46DD7,0x5680747D,0x43703E8F,0x3B6103D2,\n  0x49E5EB3F,0xCBDAB4C1,0x9C988E23,0x747BEE0B,0x9111E329,0x9F031B5A,\n  0xECCA71B9,0x2AFE4EF8,0x8421C7ED,0xAC89AFF1,0xAED90DF3,0x2DD74F01,\n  0x14906A13,0x75873FA9,0xFF83F877,0x5028A0C9,0x11B4C41D,0x7CAEDBC4,\n  0x8672D0A7,0x48A7C109,0x8320E59F,0xBC0B3D5F,0x75A30886,0xF9E0D128,\n  0x41AF7580,0x239BB94D,0xC67A3C81,0x74EEBD6E,0xBC02B53C,0x727EA449,\n  0x6B8A2806,0x5853B0DA,0xBDE032F4,0xCE234885,0x320D6145,0x48CC053F,\n  0x00DBC4D2,0xD55A2397,0xE1059B6F,0x1C3E05D1,0x09657C64,0xD07CB661,\n  0x6E982E34,0x6DD1D777,0xEDED1071,0xD79DFD65,0xF816DDCE,0xB6FAF1E4,\n  0x1C771074,0x311835BD,0x18F952F7,0xF8F40350,0x4ECED354,0x7C8AC12B,\n  0x31A9994D,0x4FD47747,0xDC227A23,0x6DFAFDDF,0x6796E748,0x0C6F634F,\n  0xF992FA1D,0x4CF670C9,0x067DFD31,0xA7A3E1A5,0x8CD7D9DF,0x972CCB34,\n  0x67C82156,0xD548F6A8,0x045CEC21,0xF3240BFB,0xDEF656A7,0x43DE08C5,\n  0xDAD1F92F,0x3726C56B,0x1409F19A,0x942FD147,0xB926749C,0xADDC31B8,\n  0x53D0D869,0xD1BA52FE,0x6722DF8C,0x22D95A74,0x7DC1B52A,0x1DEC6FD5,\n  0x7262874D,0x0A725DC9,0xE6A8193D,0xA052835A,0xDC9AD928,0xE59EBB90,\n  0x70DBA9FF,0xD612749D,0x5A5A638C,0x6086EC37,0x2A579709,0x1449EA3A,\n  0xBC8E3C06,0x2F900666,0xFBE74FD1,0x6B35B911,0xF8335008,0xEF1E979D,\n  0x738AB29D,0xA2DC0FDC,0x7696305D,0xF5429DAC,0x8C41813B,0x8073E02E,\n  0xBEF83CCD,0x7B50A95A,0x05EE5862,0x00829ECE,0x8CA1958C,0xBE4EA2E2,\n  0x4293BB73,0x656F7B23,0x417316D8,0x4467D7CF,0x2200E63B,0x109050C8,\n  0x814CBE47,0x36B1D4A8,0x36AF9305,0x308327B3,0xEBCD7344,0xA738DE27,\n  0x5A10C399,0x4142371D,0x64A18528,0x0B31E8B2,0x641057B9,0x6AFC363B,\n  0x108AD953,0x9D4DA234,0x0C2D9159,0x1C8A1A1F,0x310C66BA,0x87AA1070,\n  0xDAC832FF,0x0A433422,0x7AF15812,0x2D8D9BD0,0x995A25E9,0x25326CAC,\n  0xA34384DB,0x4C8421CC,0x4F0315EC,0x29E8649E,0xA7732D6F,0x2E94D3E3,\n  0x7D98A340,0x397C4D74,0x659DB4DE,0x747D4E9A,0xD9DB8435,0x4659DBE9,\n  0x313E6DC5,0x29D104DC,0x9F226CBA,0x452F18B0,0xD0BC5068,0x844CA299,\n  0x782B294E,0x4AE2EB7B,0xA4C475F8,0x70A81311,0x4B3E8BCC,0x7E20D4BA,\n  0xABCA33C9,0x57BE2960,0x44F9B419,0x2E567746,0x72EB757A,0x102CC0E8,\n  0xB07F32B9,0xD0DABD59,0xBA85AD6B,0xF3E20667,0x98D77D81,0x197AFA47,\n  0x518EE9AC,0xE10CE5A2,0x01CF2C2A,0xD3A3AF3D,0x16DDFD65,0x669232F8,\n  0x1C50A301,0xB93D9151,0x9354D3F4,0x847D79D0,0xD5FE2EC6,0x1F7B0610,\n  0xFA6B90A5,0xC5879041,0x2E7DC05E,0x423F1F32,0xEF623DDB,0x49C13280,\n  0x98714E92,0xC7B6E4AD,0xC4318466,0x0737F312,0x4D3C003F,0x9ACC1F1F,\n  0x5F1C926D,0x085FA771,0x185A83A2,0xF9AA159D,0x0B0B0132,0xF98E7A43,\n  0xCD9EBDBE,0x0190CB29,0x10D93FB6,0x3B8A4D97,0x66A65A41,0xE43E766F,\n  0x77BE3C41,0xB9686364,0xCB36994D,0x6846A287,0x567E77F7,0x36178DD8,\n  0xBDE6B1F2,0xB6EFDC64,0x82950324,0x42053F47,0xC09BE51C,0x0942D762,\n  0x35F92C7F,0x367DEC61,0x6EE3D983,0xDBAAF78A,0x265D2C47,0x8EB4BF5C,\n  0x33B232D7,0xB0137E77,0x373C39A7,0x8D2B2E76,0xC7510F01,0x50F9E032,\n  0x7B1FDDDB,0x724C2AAE,0xB10ECB31,0xCCA3D1B8,0x7F0BCF10,0x4254BBBD,\n  0xE3F93B97,0x2305039B,0x53120E22,0x1A2F3B9A,0x0FDDBD97,0x0118561E,\n  0x0A798E13,0x9E0B3ACD,0xDB6C9F15,0xF512D0A2,0x9E8C3A28,0xEE2184AE,\n  0x0051EC2F,0x2432F74F,0xB0AA66EA,0x55128D88,0xF7D83A38,0x4DAE8E82,\n  0x3FDC98D6,0x5F0BD341,0x7244BE1D,0xC7B48E78,0x2D473053,0x43892E20,\n  0xBA0F1F2A,0x524D4895,0x2E10BCB1,0x4C372D81,0x5C3E50CD,0xCF61CC2E,\n  0x931709AB,0x81B3AEFC,0x39E9405E,0x7FFE108C,0x4FBB3FF8,0x06ABE450,\n  0x7F5BF51E,0xA4E3CDFD,0xDB0F6C6F,0x159A1227,0x3B9FED55,0xD20B6F7F,\n  0xFBE9CC83,0x64856619,0xBF52B8AF,0x9D7006B0,0x71165BC6,0xAE324AEE,\n  0x29D27F2C,0x794C2086,0x74445CE2,0x782915CC,0xD4CE6886,0x3289AE7C,\n  0x53DEF297,0x4185F7ED,0x88B72400,0x3C09DC11,0xBCE3AAB6,0x6A75934A,\n  0xB267E399,0x000DF1BF,0x193BA5E2,0xFA3E1977,0x179E14F6,0x1EEDE298,\n  0x691F0B06,0xB84F78AC,0xC1C15316,0xFFFF3AD6,0x0B457383,0x518CD612,\n  0x05A00F3E,0xD5B7D275,0x4C5ECCD7,0xE02CD0BE,0x5558E9F2,0x0C89BBF0,\n  0xA3D96227,0x2832D2B2,0xF667B897,0xD4556554,0xF9D2F01F,0xFA1E3FAE,\n  0x52C2E1EE,0xE5451F31,0x7E849729,0xDABDB67A,0x54BF5E7E,0xF831C271,\n  0x5F1A17E3,0x9D140AFE,0x92741C47,0x48CFABCE,0x9CBBE477,0x9C3EE57F,\n  0xB07D4C39,0xCC21BCE2,0x697708B1,0x58DA2A6B,0x2370DB16,0x6E641948,\n  0xACC5BD52,0x868F24CC,0xCA1DB0F5,0x4CADA492,0x3F443E54,0xC4A4D5E9,\n  0xF00AD670,0xE93C86E0,0xFE90651A,0xDDE532A3,0xA66458DF,0xAB7D7151,\n  0x0E2E775F,0xC9109F99,0x8D96D59F,0x73CEF14C,0xC74E88E9,0x02712DC0,\n  0x04F41735,0x2E5914A2,0x59F4B2FB,0x0287FC83,0x80BC0343,0xF6B32559,\n  0xC74178D4,0xF1D99123,0x383CCC07,0xACC0637D,0x0863A548,0xA6FCAC85,\n  0x2A13EFF0,0xAF2EEDB1,0x41E72750,0xE0C6B342,0x5DA22B46,0x635559E0,\n  0xD2EA40AC,0x10AA98C0,0x19096497,0x112C542B,0x2C85040C,0xA868E7D0,\n  0x6E260188,0xF596D390,0xC3BB5D7A,0x7A2AA937,0xDFD15032,0x6780AE3B,\n  0xDB5F9CD8,0x8BD266B0,0x7744AF12,0xB463B1B0,0x589629C9,0xE30DBC6E,\n  0x880F5569,0x209E6E16,0x9DECA50C,0x02987A57,0xBED3EA57,0xD3A678AA,\n  0x70DD030D,0x0CFD9C5D,0x92A18E99,0xF5740619,0x7F6F0A7D,0x134CAF9A,\n  0x70F5BAE4,0x23DCA7B5,0x4D788FCD,0xC7F07847,0xBCF77DA1,0x9071D568,\n  0xFC627EA1,0xAE004B77,0x66B54BCB,0x7EF2DAAC,0xDCD5AC30,0xB9BDF730,\n  0x505A97A7,0x9D881FD3,0xADB796CC,0x94A1D202,0x97535D7F,0x31EC20C0,\n  0xB1887A98,0xC1475069,0xA6F73AF3,0x71E4E067,0x46A569DE,0xD2ADE430,\n  0x6F0762C7,0xF50876F4,0x53510542,0x03741C3E,0x53502224,0xD8E54D60,\n  0x3C44AB1A,0x34972B46,0x74BFA89D,0xD7D768E0,0x37E605DC,0xE13D1BDF,\n  0x5051C421,0xB9E057BE,0xB717A14C,0xA1730C43,0xB99638BE,0xB5D5F36D,\n  0xE960D9EA,0x6B1388D3,0xECB6D3B6,0xBDBE8B83,0x2E29AFC5,0x764D71EC,\n  0x4B8F4F43,0xC21DDC00,0xA63F657F,0x82678130,0xDBF535AC,0xA594FC58,\n  0x942686BC,0xBD9B657B,0x4A0F9B61,0x44FF184F,0x38E10A2F,0x61910626,\n  0x5E247636,0x7106D137,0xC62802F0,0xBD1D1F00,0x7CC0DCB2,0xED634909,\n  0xDC13B24E,0x9799C499,0xD77E3D6A,0x14773B68,0x967A4FB7,0x35EECFB1,\n  0x2A5110B8,0xE2F0AF94,0x9D09DEA5,0x20255D27,0x5771D34B,0xE1089EE4,\n  0x246F330B,0x8F7CAEE5,0xD3064712,0x75CAFBEE,0xB94F7028,0xED953666,\n  0x5D1975B4,0x5AF81271,0x13BE2025,0x85194659,0x30805331,0xEC9D46C0,\n  0xBC027C36,0x2AF84188,0xC2141B80,0xC02B1E4A,0x04D36177,0xFC50E9D7,\n  0x39CE79DA,0x917E0A00,0xEF7A0BF4,0xA98BD8D1,0x19424DD2,0x9439DF1F,\n  0xC42AF746,0xADDBE83E,0x85221F0D,0x45563E90,0x9095EC52,0x77887B25,\n  0x8AE46064,0xBD43B71A,0xBB541956,0x7366CF9D,0xEE8E1737,0xB5A727C9,\n  0x5076B3E7,0xFC70BACA,0xCE135B75,0xC4E91AA3,0xF0341911,0x53430C3F,\n  0x886B0824,0x6BB5B8B7,0x33E21254,0xF193B456,0x5B09617F,0x215FFF50,\n  0x48D97EF1,0x356479AB,0x6EA9DDC4,0x0D352746,0xA2F5CE43,0xB226A1B3,\n  0x1329EA3C,0x7A337CC2,0xB5CCE13D,0x563E3B5B,0x534E8E8F,0x561399C9,\n  0xE1596392,0xB0F03125,0x4586645B,0x1F371847,0x94EAABD1,0x41F97EDD,\n  0xE3E5A39B,0x71C774E2,0x507296F4,0x5960133B,0x7852C494,0x3F5B2691,\n  0xA3F87774,0x5A7AF89E,0x17DA3F28,0xE9D9516D,0xFCC1C1D5,0xE4618628,\n  0x04081047,0xD8E4DB5F,0xDC380416,0x8C4933E2,0x95074D53,0xB1B0032D,\n  0xCC8102EA,0x71641243,0x98D6EB6A,0x90FEC945,0xA0914345,0x6FAB037D,\n  0x70F49C4D,0x05BF5B0E,0x927AAF7F,0xA1940F61,0xFEE0756F,0xF815369F,\n  0x5C00253B,0xF2B9762F,0x4AEB3CCC,0x1069F386,0xFBA4E7B9,0x70332665,\n  0x6BCA810E,0x85AB8058,0xAE4B2B2F,0x9D120712,0xBEE8EACB,0x776A1112\n};\n\nvoid\n__gmp_mt_recalc_buffer (gmp_uint_least32_t mt[])\n{\n  gmp_uint_least32_t y;\n  int kk;\n\n  for (kk = 0; kk < N - M; kk++)\n    {\n      y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);\n      mt[kk] = mt[kk + M] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);\n    }\n  for (; kk < N - 1; kk++)\n    {\n      y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);\n      mt[kk] = mt[kk - (N - M)] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);\n    }\n\n  y = (mt[N - 1] & 0x80000000) | (mt[0] & 0x7FFFFFFF);\n  mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);\n}\n\n\n/* Get nbits bits of output from the generator into dest.\n   Note that Mersenne Twister is designed to produce outputs in\n   32-bit words.  */\nvoid\n__gmp_randget_mt (gmp_randstate_t rstate, mp_ptr dest, mpir_ui nbits)\n{\n  gmp_uint_least32_t y;\n  int rbits;\n  mp_size_t i;\n  mp_size_t nlimbs;\n  int *pmti;\n  gmp_uint_least32_t *mt;\n\n  pmti = &((gmp_rand_mt_struct *) RNG_STATE (rstate))->mti;\n  mt = ((gmp_rand_mt_struct *) RNG_STATE (rstate))->mt;\n\n  nlimbs = nbits / GMP_NUMB_BITS;\n  rbits = nbits % GMP_NUMB_BITS;\n\n#define NEXT_RANDOM\t\t\t\\\n  do\t\t\t\t\t\\\n    {\t\t\t\t\t\\\n      if (*pmti >= N)\t\t\t\\\n\t{\t\t\t\t\\\n\t  __gmp_mt_recalc_buffer (mt);  \\\n\t  *pmti = 0;\t\t\t\\\n\t}\t\t\t\t\\\n      y = mt[(*pmti)++];\t\t\\\n      y ^= (y >> 11);\t\t\t\\\n      y ^= (y << 7) & MASK_1;\t\t\\\n      y ^= (y << 15) & MASK_2;\t\t\\\n      y ^= (y >> 18);\t\t\t\\\n    }\t\t\t\t\t\\\n  while (0)\n\n\n  /* Handle the common cases of 32- or 64-bit limbs with fast,\n     optimized routines, and the rest of cases with a general\n     routine.  In all cases, no more than 31 bits are rejected\n     for the last limb so that every version of the code is\n     consistent with the others.  */\n\n#if (GMP_NUMB_BITS == 32)\n\n  for (i = 0; i < nlimbs; i++)\n    {\n      NEXT_RANDOM;\n      dest[i] = (mp_limb_t) y;\n    }\n  if (rbits)\n    {\n      NEXT_RANDOM;\n      dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));\n    }\n\n#else /* GMP_NUMB_BITS != 32 */\n#if (GMP_NUMB_BITS == 64)\n\n  for (i = 0; i < nlimbs; i++)\n    {\n      NEXT_RANDOM;\n      dest[i] = (mp_limb_t) y;\n      NEXT_RANDOM;\n      dest[i] |= (mp_limb_t) y << 32;\n    }\n  if (rbits)\n    {\n      if (rbits < 32)\n\t{\n\t  NEXT_RANDOM;\n\t  dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));\n\t}\n      else\n\t{\n\t  NEXT_RANDOM;\n\t  dest[nlimbs] = (mp_limb_t) y;\n\t  if (rbits > 32)\n\t    {\n\t      NEXT_RANDOM;\n\t      dest[nlimbs] |=\n\t\t((mp_limb_t) (y & ~(ULONG_MAX << (rbits-32)))) << 32;\n\t    }\n\t}\n    }\n\n#else /* GMP_NUMB_BITS != 64 */\n\n  {\n    /* Fall back to a general algorithm.  This algorithm works by\n       keeping a pool of up to 64 bits (2 outputs from MT) acting\n       as a shift register from which bits are consumed as needed.\n       Bits are consumed using the LSB bits of bitpool_l, and\n       inserted via bitpool_h and shifted to the right place.  */\n\n    gmp_uint_least32_t bitpool_h = 0;\n    gmp_uint_least32_t bitpool_l = 0;\n    int bits_in_pool = 0;\t/* Holds number of valid bits in the pool.  */\n    int bits_to_fill;\t\t/* Holds total number of bits to put in\n\t\t\t\t   destination.  */\n    int bitidx;\t\t\t/* Holds the destination bit position.  */\n    mp_size_t nlimbs2;\t\t/* Number of whole+partial limbs to fill.  */\n\n    nlimbs2 = nlimbs + (rbits != 0);\n\n    for (i = 0; i < nlimbs2; i++)\n      {\n\tbitidx = 0;\n\tif (i < nlimbs)\n\t  bits_to_fill = GMP_NUMB_BITS;\n\telse\n\t  bits_to_fill = rbits;\n\n\tdest[i] = CNST_LIMB (0);\n\twhile (bits_to_fill >= 32) /* Process whole 32-bit blocks first.  */\n\t  {\n\t    if (bits_in_pool < 32)\t/* Need more bits.  */\n\t      {\n\t\t/* 64-bit right shift. */\n\t\tNEXT_RANDOM;\n\t\tbitpool_h = y;\n\t\tbitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;\n\t\tif (bits_in_pool == 0)\n\t\t  bitpool_h = 0;\n\t\telse\n\t\t  bitpool_h >>= 32 - bits_in_pool;\n\t\tbits_in_pool += 32;\t/* We've got 32 more bits.  */\n\t      }\n\n\t    /* Fill a 32-bit chunk */\n\t    dest[i] |= ((mp_limb_t) bitpool_l) << bitidx;\n\t    bitpool_l = bitpool_h;\n\t    bits_in_pool -= 32;\n\t    bits_to_fill -= 32;\n\t    bitidx += 32;\n\t  }\n\n\t/* Cover the case where GMP_NUMB_BITS is not a multiple of 32.  */\n\tif (bits_to_fill != 0)\n\t  {\n\t    if (bits_in_pool < bits_to_fill)\n\t      {\n\t\tNEXT_RANDOM;\n\t\tbitpool_h = y;\n\t\tbitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;\n\t\tif (bits_in_pool == 0)\n\t\t  bitpool_h = 0;\n\t\telse\n\t\t  bitpool_h >>= 32 - bits_in_pool;\n\t\tbits_in_pool += 32;\n\t      }\n\n\t    dest[i] |= (((mp_limb_t) bitpool_l\n\t\t\t & ~(~CNST_LIMB (0) << bits_to_fill))\n\t\t\t<< bitidx);\n\t    bitpool_l = ((bitpool_l >> bits_to_fill)\n\t\t\t | (bitpool_h << (32 - bits_to_fill))) & 0xFFFFFFFF;\n\t    bitpool_h >>= bits_to_fill;\n\t    bits_in_pool -= bits_to_fill;\n\t  }\n      }\n  }\n\n#endif /* GMP_NUMB_BITS != 64 */\n#endif /* GMP_NUMB_BITS != 32 */\n}\n\nvoid\n__gmp_randclear_mt (gmp_randstate_t rstate)\n{\n  (*__gmp_free_func) ((void *) RNG_STATE (rstate),\n\t\t      sizeof (gmp_rand_mt_struct));\n}\n\nvoid __gmp_randiset_mt(gmp_randstate_ptr dst, gmp_randstate_srcptr src);\n\nstatic const gmp_randfnptr_t Mersenne_Twister_Generator_Noseed = {\n  NULL,\n  __gmp_randget_mt,\n  __gmp_randclear_mt,\n  __gmp_randiset_mt\n};\n\nvoid\n__gmp_randiset_mt (gmp_randstate_ptr dst, gmp_randstate_srcptr src)\n{\n  gmp_rand_mt_struct *dstp, *srcp;\n  int  i;\n\n  srcp = (gmp_rand_mt_struct *) RNG_STATE (src);\n  dstp = (*__gmp_allocate_func) (sizeof (gmp_rand_mt_struct));\n\n  RNG_STATE (dst) = (void *) dstp;\n  RNG_FNPTR (dst) = RNG_FNPTR(src);\n\n  for (i = 0; i < N; i++)\n    dstp->mt[i] = srcp->mt[i];\n\n  dstp->mti = srcp->mti;\n}\n\n\n/* Initialize MT-specific data.  */\nvoid\n__gmp_randinit_mt_noseed (gmp_randstate_t rstate)\n{\n  int i;\n  gmp_rand_mt_struct *p;\n\n  /* Set the generator functions.  */\n  RNG_FNPTR (rstate) = (void *) &Mersenne_Twister_Generator_Noseed;\n\n  /* Allocate the MT-specific state.  */\n  p = (gmp_rand_mt_struct *)\n    (*__gmp_allocate_func) (sizeof (gmp_rand_mt_struct));\n  RNG_STATE (rstate) = (mp_ptr) p;\n\n  /* Set state for default seed.  */\n  for (i = 0; i < N; i++)\n    p->mt[i] = default_state[i];\n\n  ((gmp_rand_mt_struct *) RNG_STATE (rstate))->mti = WARM_UP % N;\n}\n"
  },
  {
    "path": "randmt.h",
    "content": "/* Mersenne Twister pseudo-random number generator defines.\n\nCopyright 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.  */\n\n\n/* Number of extractions used to warm the buffer up.  */\n#define WARM_UP 2000\n\n/* Period parameters.  */\n#define N 624\n#define M 397\n#define MATRIX_A 0x9908B0DF   /* Constant vector a.  */\n\n/* State structure for MT.  */\ntypedef struct\n{\n  gmp_uint_least32_t mt[N];    /* State array.  */\n  int mti;                     /* Index of current value.  */\n} gmp_rand_mt_struct;\n\n\nvoid __gmp_mt_recalc_buffer(gmp_uint_least32_t *);\nvoid __gmp_randget_mt(gmp_randstate_t, mp_ptr, mpir_ui);\nvoid __gmp_randclear_mt(gmp_randstate_t rstate);\nvoid __gmp_randiset_mt(gmp_randstate_ptr, gmp_randstate_srcptr);\n"
  },
  {
    "path": "randmts.c",
    "content": "/* Mersenne Twister pseudo-random number generator functions.\n\nCopyright 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"randmt.h\"\n\n\n/* Calculate (b^e) mod (2^n-k) for e=1074888996, n=19937 and k=20023,\n   needed by the seeding function below.  */\nstatic void\nmangle_seed (mpz_ptr r, mpz_srcptr b_orig)\n{\n  mpz_t          t, b;\n  unsigned long  e = 0x40118124;\n  unsigned long  bit = 0x20000000;\n\n  mpz_init (t);\n  mpz_init_set (b, b_orig);  /* in case r==b_orig */\n\n  mpz_set (r, b);\n  do\n    {\n      mpz_mul (r, r, r);\n\n    reduce:\n      for (;;)\n        {\n          mpz_tdiv_q_2exp (t, r, 19937L);\n          if (mpz_sgn (t) == 0)\n            break;\n          mpz_tdiv_r_2exp (r, r, 19937L);\n          mpz_addmul_ui (r, t, 20023L);\n        }\n\n      if ((e & bit) != 0)\n        {\n          e &= ~bit;\n          mpz_mul (r, r, b);\n          goto reduce;\n        }\n\n      bit >>= 1;\n    }\n  while (bit != 0);\n\n  mpz_clear (t);\n  mpz_clear (b);\n}\n\n\n/* Seeding function.  Uses powering modulo a non-Mersenne prime to obtain\n   a permutation of the input seed space.  The modulus is 2^19937-20023,\n   which is probably prime.  The power is 1074888996.  In order to avoid\n   seeds 0 and 1 generating invalid or strange output, the input seed is\n   first manipulated as follows:\n\n     seed1 = seed mod (2^19937-20027) + 2\n\n   so that seed1 lies between 2 and 2^19937-20026 inclusive. Then the\n   powering is performed as follows:\n\n     seed2 = (seed1^1074888996) mod (2^19937-20023)\n\n   and then seed2 is used to bootstrap the buffer.\n\n   This method aims to give guarantees that:\n     a) seed2 will never be zero,\n     b) seed2 will very seldom have a very low population of ones in its\n\tbinary representation, and\n     c) every seed between 0 and 2^19937-20028 (inclusive) will yield a\n\tdifferent sequence.\n\n   CAVEATS:\n\n   The period of the seeding function is 2^19937-20027.  This means that\n   with seeds 2^19937-20027, 2^19937-20026, ... the exact same sequences\n   are obtained as with seeds 0, 1, etc.; it also means that seed -1\n   produces the same sequence as seed 2^19937-20028, etc.\n */\n\nstatic void\nrandseed_mt (gmp_randstate_t rstate, mpz_srcptr seed)\n{\n  int i;\n  size_t cnt;\n\n  gmp_rand_mt_struct *p;\n  mpz_t mod;    /* Modulus.  */\n  mpz_t seed1;  /* Intermediate result.  */\n\n  p = (gmp_rand_mt_struct *) RNG_STATE (rstate);\n\n  mpz_init (mod);\n  mpz_init (seed1);\n\n  mpz_set_ui (mod, 0L);\n  mpz_setbit (mod, 19937L);\n  mpz_sub_ui (mod, mod, 20027L);\n  mpz_mod (seed1, seed, mod);\t/* Reduce `seed' modulo `mod'.  */\n  mpz_add_ui (seed1, seed1, 2L);\t/* seed1 is now ready.  */\n  mangle_seed (seed1, seed1);\t/* Perform the mangling by powering.  */\n\n  /* Copy the last bit into bit 31 of mt[0] and clear it.  */\n  p->mt[0] = (mpz_tstbit (seed1, 19936L) != 0) ? 0x80000000 : 0;\n  mpz_clrbit (seed1, 19936L);\n\n  /* Split seed1 into N-1 32-bit chunks.  */\n  mpz_export (&p->mt[1], &cnt, -1, sizeof (p->mt[1]), 0,\n              8 * sizeof (p->mt[1]) - 32, seed1);\n  cnt++;\n  ASSERT (cnt <= N);\n  while (cnt < N)\n    p->mt[cnt++] = 0;\n\n  mpz_clear (mod);\n  mpz_clear (seed1);\n\n  /* Warm the generator up if necessary.  */\n  if (WARM_UP != 0)\n    for (i = 0; i < WARM_UP / N; i++)\n      __gmp_mt_recalc_buffer (p->mt);\n\n  p->mti = WARM_UP % N;\n}\n\n\nstatic const gmp_randfnptr_t Mersenne_Twister_Generator = {\n  randseed_mt,\n  __gmp_randget_mt,\n  __gmp_randclear_mt,\n  __gmp_randiset_mt\n};\n\n/* Initialize MT-specific data.  */\nvoid\ngmp_randinit_mt (gmp_randstate_t rstate)\n{\n  __gmp_randinit_mt_noseed (rstate);\n  RNG_FNPTR (rstate) = (void *) &Mersenne_Twister_Generator;\n}\n"
  },
  {
    "path": "randmui.c",
    "content": "/* gmp_urandomm_ui -- uniform random number 0 to N-1 for ulong N.\n\nCopyright 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n/* If n is a power of 2 then the test ret<n is always true and the loop is\n   unnecessary, but there's no need to add special code for this.  Just get\n   the \"bits\" calculation correct and let it go through normally.\n\n   If n is 1 then will have bits==0 and _gmp_rand will produce no output and\n   we always return 0.  Again there seems no need for a special case, just\n   initialize a[0]=0 and let it go through normally.  */\n\n#define MAX_URANDOMM_ITER  80\n\nmpir_ui\ngmp_urandomm_ui (gmp_randstate_ptr rstate, mpir_ui n)\n{\n  mp_limb_t      a[LIMBS_PER_UI];\n  unsigned long  ret, bits, leading;\n  int            i;\n\n  if (UNLIKELY (n == 0))\n    DIVIDE_BY_ZERO;\n\n  /* start with zeros, since if bits==0 then _gmp_rand will store nothing at\n     all (bits==0 arises when n==1), or if bits <= GMP_NUMB_BITS then it\n     will store only a[0].  */\n  a[0] = 0;\n#if LIMBS_PER_UI > 1\n  a[1] = 0;\n#endif\n\n  count_leading_zeros (leading, (mp_limb_t) n);\n  bits = GMP_LIMB_BITS - leading - (POW2_P(n) != 0);\n\n  for (i = 0; i < MAX_URANDOMM_ITER; i++)\n    {\n      _gmp_rand (a, rstate, bits);\n#if LIMBS_PER_UI == 1\n      ret = a[0];\n#else\n      ret = a[0] | (a[1] << GMP_NUMB_BITS);\n#endif\n      if (LIKELY (ret < n))   /* usually one iteration suffices */\n        goto done;\n    }\n\n  /* Too many iterations, there must be something degenerate about the\n     rstate algorithm.  Return r%n.  */\n  ret -= n;\n  ASSERT (ret < n);\n\n done:\n  return ret;\n}\n"
  },
  {
    "path": "rands.c",
    "content": "/* __gmp_rands -- global random state for old-style random functions.\n\n   EVERYTHING IN THIS FILE IS FOR INTERNAL USE ONLY.  IT'S ALMOST CERTAIN TO\n   BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU\n   MP RELEASES.  */\n\n/*\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Use this via the RANDS macro in gmp-impl.h */\nchar             __gmp_rands_initialized = 0;\ngmp_randstate_t  __gmp_rands;\n"
  },
  {
    "path": "randsd.c",
    "content": "/* gmp_randseed (state, seed) -- Set initial seed SEED in random state STATE.\n\nCopyright 2000, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\ngmp_randseed (gmp_randstate_t rstate,\n\t      mpz_srcptr seed)\n{\n  (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randseed_fn) (rstate, seed);\n}\n"
  },
  {
    "path": "randsdui.c",
    "content": "/* gmp_randseed_ui (state, seed) -- Set initial seed SEED in random\n   state STATE.\n\nCopyright 2000, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\ngmp_randseed_ui (gmp_randstate_t rstate,\n                 mpir_ui seed)\n{\n  mpz_t zseed;\n  mp_limb_t zlimbs[LIMBS_PER_UI];\n\n  MPZ_FAKE_UI (zseed, zlimbs, seed);\n  gmp_randseed (rstate, zseed);\n}\n"
  },
  {
    "path": "scanf/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 2001, 2002 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -D__GMP_WITHIN_GMP -I$(top_srcdir)\n\nnoinst_LTLIBRARIES = libscanf.la\n\nlibscanf_la_SOURCES = doscan.c fscanf.c fscanffuns.c scanf.c sscanf.c sscanffuns.c vfscanf.c vscanf.c vsscanf.c\n"
  },
  {
    "path": "scanf/doscan.c",
    "content": "/* __gmp_doscan -- formatted input internals.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#ifndef _GNU_SOURCE\n#define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */\n#endif\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <ctype.h>\n#include <stddef.h>    /* for ptrdiff_t */\n#include <stdio.h>\n#include <stdlib.h>    /* for strtol */\n#include <string.h>\n\n#if HAVE_LANGINFO_H\n#include <langinfo.h>  /* for nl_langinfo */\n#endif\n\n#if HAVE_LOCALE_H\n#include <locale.h>    /* for localeconv */\n#endif\n\n#if HAVE_INTTYPES_H\n# include <inttypes.h> /* for intmax_t */\n#else\n# if HAVE_STDINT_H\n#  include <stdint.h>\n# endif\n#endif\n\n#if HAVE_SYS_TYPES_H\n#include <sys/types.h> /* for quad_t */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Change this to \"#define TRACE(x) x\" for some traces. */\n#define TRACE(x)\n\n\n/* General:\n\n       It's necessary to parse up the format string to recognise the GMP\n       extra types F, Q and Z.  Other types and conversions are passed\n       across to the standard sscanf or fscanf via funs->scan, for ease of\n       implemenation.  This is essential in the case of something like glibc\n       %p where the pointer format isn't actually documented.\n\n       Because funs->scan doesn't get the whole input it can't put the right\n       values in for %n, so that's handled in __gmp_doscan.  Neither sscanf\n       nor fscanf directly indicate how many characters were read, so an\n       extra %n is appended to each run for that.  For fscanf this merely\n       supports our %n output, but for sscanf it lets funs->step move us\n       along the input string.\n\n       Whitespace and literal matches in the format string, including %%,\n       are handled directly within __gmp_doscan.  This is reasonably\n       efficient, and avoids some suspicious behaviour observed in various\n       system libc's.  GLIBC 2.2.4 for instance returns 0 on\n\n           sscanf(\" \", \" x\")\n       or\n           sscanf(\" \", \" x%d\",&n)\n\n       whereas we think they should return EOF, since end-of-string is\n       reached when a match of \"x\" is required.\n\n       For standard % conversions, funs->scan is called once for each\n       conversion.  If we had vfscanf and vsscanf and could rely on their\n       fixed text matching behaviour then we could call them with multiple\n       consecutive standard conversions.  But plain fscanf and sscanf work\n       fine, and parsing one field at a time shouldn't be too much of a\n       slowdown.\n\n   gmpscan:\n\n       gmpscan reads a gmp type.  It's only used from one place, but is a\n       separate subroutine to avoid a big chunk of complicated code in the\n       middle of __gmp_doscan.  Within gmpscan a couple of loopbacks make it\n       possible to share code for parsing integers, rationals and floats.\n\n       In gmpscan normally one char of lookahead is maintained, but when width\n       is reached that stops, on the principle that an fgetc/ungetc of a char\n       past where we're told to stop would be undesirable.  \"chars\" is how many\n       characters have been read so far, including the current c.  When\n       chars==width and another character is desired then a jump is done to the\n       \"convert\" stage.  c is invalid and mustn't be unget'ed in this case;\n       chars is set to width+1 to indicate that.\n\n       gmpscan normally returns the number of characters read.  -1 means an\n       invalid field, -2 means EOF reached before any matching characters\n       were read.\n\n       For hex floats, the mantissa part is passed to mpf_set_str, then the\n       exponent is applied with mpf_mul_exp or mpf_div_2exp.  This is easier\n       than teaching mpf_set_str about an exponent factor (ie. 2) differing\n       from the mantissa radix point factor (ie. 16).  mpf_mul_exp and\n       mpf_div_2exp will preserve the application requested precision, so\n       nothing in that respect is lost by making this a two-step process.\n\n   Matching and errors:\n\n       C99 7.19.6.2 paras 9 and 10 say an input item is read as the longest\n       string which is a match for the appropriate type, or a prefix of a\n       match.  With that done, if it's only a prefix then the result is a\n       matching failure, ie. invalid input.\n\n       This rule seems fairly clear, but doesn't seem to be universally\n       applied in system C libraries.  Even GLIBC doesn't seem to get it\n       right, insofar as it seems to accept some apparently invalid forms.\n       Eg. glibc 2.3.1 accepts \"0x\" for a \"%i\", where a reading of the\n       standard would suggest a non-empty sequence of digits should be\n       required after an \"0x\".\n\n       A footnote to 7.19.6.2 para 17 notes how this input item reading can\n       mean inputs acceptable to strtol are not acceptable to fscanf.  We\n       think this confirms our reading of \"0x\" as invalid.\n\n       Clearly gmp_sscanf could backtrack to a longest input which was a\n       valid match for a given item, but this is not done, since C99 says\n       sscanf is identical to fscanf, so we make gmp_sscanf identical to\n       gmp_fscanf.\n\n   Types:\n\n       C99 says \"ll\" is for long long, and \"L\" is for long double floats.\n       Unfortunately in GMP 4.1.1 we documented the two as equivalent.  This\n       doesn't affect us directly, since both are passed through to plain\n       scanf.  It seems wisest not to try to enforce the C99 rule.  This is\n       consistent with what we said before, though whether it actually\n       worked was always up to the C library.\n\n   Alternatives:\n\n       Consideration was given to using separate code for gmp_fscanf and\n       gmp_sscanf.  The sscanf case could zip across a string doing literal\n       matches or recognising digits in gmpscan, rather than making a\n       function call fun->get per character.  The fscanf could use getc\n       rather than fgetc too, which might help those systems where getc is a\n       macro or otherwise inlined.  But none of this scanning and converting\n       will be particularly fast, so the two are done together to keep it a\n       little simpler for now.\n\n       Various multibyte string issues are not addressed, for a start C99\n       scanf says the format string is multibyte.  Since we pass %c, %s and\n       %[ to the system scanf, they might do multibyte reads already, but\n       it's another matter whether or not that can be used, since our digit\n       and whitespace parsing is only unibyte.  The plan is to quietly\n       ignore multibyte locales for now.  This is not as bad as it sounds,\n       since GMP is presumably used mostly on numbers, which can be\n       perfectly adequately treated in plain ASCII.\n\n*/\n\n\nstruct gmp_doscan_params_t {\n  int   base;\n  int   ignore;\n  char  type;\n  int   width;\n};\n\n\n#define GET(c)                  \\\n  do {                          \\\n    ASSERT (chars <= width);    \\\n    chars++;                    \\\n    if (chars > width)          \\\n      goto convert;             \\\n    (c) = (*funs->get) (data);  \\\n  } while (0)\n\n/* store into \"s\", extending if necessary */\n#define STORE(c)                                                        \\\n  do {                                                                  \\\n    ASSERT (s_upto <= s_alloc);                                         \\\n    if (s_upto >= s_alloc)                                              \\\n      {                                                                 \\\n        size_t  s_alloc_new = s_alloc + S_ALLOC_STEP;                   \\\n        s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char); \\\n        s_alloc = s_alloc_new;                                          \\\n      }                                                                 \\\n    s[s_upto++] = c;                                                    \\\n  } while (0)\n\n#define S_ALLOC_STEP  512\n\nstatic int\ngmpscan (const struct gmp_doscan_funs_t *funs, void *data,\n         const struct gmp_doscan_params_t *p, void *dst)\n{\n  int     chars, c, base, first, width, seen_point, seen_digit, hexfloat;\n  size_t  s_upto, s_alloc, hexexp;\n  char    *s;\n  int     invalid = 0;\n\n  TRACE (printf (\"gmpscan\\n\"));\n\n  ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z');\n\n  c = (*funs->get) (data);\n  if (c == EOF)\n    return -2;\n\n  chars = 1;\n  first = 1;\n  seen_point = 0;\n  width = (p->width == 0 ? INT_MAX-1 : p->width);\n  base = p->base;\n  s_alloc = S_ALLOC_STEP;\n  s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char);\n  s_upto = 0;\n  hexfloat = 0;\n  hexexp = 0;\n\n another:\n  seen_digit = 0;\n  if (c == '-')\n    {\n      STORE (c);\n      goto get_for_sign;\n    }\n  else if (c == '+')\n    {\n      /* don't store '+', it's not accepted by mpz_set_str etc */\n    get_for_sign:\n      GET (c);\n    }\n\n  if (base == 0)\n    {\n      base = 10;                  /* decimal if no base indicator */\n      if (c == '0')\n        {\n          seen_digit = 1;         /* 0 alone is a valid number */\n          if (p->type != 'F')\n            base = 8;             /* leading 0 is octal, for non-floats */\n          STORE (c);\n          GET (c);\n          if (c == 'x' || c == 'X')\n            {\n              base = 16;\n              seen_digit = 0;     /* must have digits after an 0x */\n              if (p->type == 'F') /* don't pass 'x' to mpf_set_str_point */\n                hexfloat = 1;\n              else\n                STORE (c);\n              GET (c);\n            }\n        }\n    }\n\n digits:\n  for (;;)\n    {\n      if (base == 16)\n        {\n          if (! isxdigit (c))\n            break;\n        }\n      else\n        {\n          if (! isdigit (c))\n            break;\n          if (base == 8 && (c == '8' || c == '9'))\n            break;\n        }\n\n      seen_digit = 1;\n      STORE (c);\n      GET (c);\n    }\n\n  if (first)\n    {\n      /* decimal point */\n      if (p->type == 'F' && ! seen_point)\n        {\n          /* For a multi-character decimal point, if the first character is\n             present then all of it must be, otherwise the input is\n             considered invalid.  */\n          const char  *point = GMP_DECIMAL_POINT;\n          int         pc = (unsigned char) *point++;\n          if (c == pc)\n            {\n              for (;;)\n                {\n                  STORE (c);\n                  GET (c);\n                  pc = (unsigned char) *point++;\n                  if (pc == '\\0')\n                    break;\n                  if (c != pc)\n                    goto set_invalid;\n                }\n              seen_point = 1;\n              goto digits;\n            }\n        }\n\n      /* exponent */\n      if (p->type == 'F')\n        {\n          if (hexfloat && (c == 'p' || c == 'P'))\n            {\n              hexexp = s_upto; /* exponent location */\n              base = 10;       /* exponent in decimal */\n              goto exponent;\n            }\n          else if (! hexfloat && (c == 'e' || c == 'E'))\n            {\n            exponent:\n              /* must have at least one digit in the mantissa, just an exponent\n                 is not good enough */\n              if (! seen_digit)\n                goto set_invalid;\n\n            do_second:\n              first = 0;\n              STORE (c);\n              GET (c);\n              goto another;\n            }\n        }\n\n      /* denominator */\n      if (p->type == 'Q' && c == '/')\n        {\n          /* must have at least one digit in the numerator */\n          if (! seen_digit)\n            goto set_invalid;\n\n          /* now look for at least one digit in the denominator */\n          seen_digit = 0;\n\n          /* allow the base to be redetermined for \"%i\" */\n          base = p->base;\n          goto do_second;\n        }\n    }\n\n convert:\n  if (! seen_digit)\n    {\n    set_invalid:\n      invalid = 1;\n      goto done;\n    }\n\n  if (! p->ignore)\n    {\n      STORE ('\\0');\n      TRACE (printf (\"  convert \\\"%s\\\"\\n\", s));\n\n      /* We ought to have parsed out a valid string above, so just test\n         mpz_set_str etc with an ASSERT.  */\n      switch (p->type) {\n      case 'F':\n        {\n          mpf_ptr  f = (mpf_ptr) dst;\n          if (hexexp != 0)\n            s[hexexp] = '\\0';\n          ASSERT_NOCARRY (mpf_set_str (f, s, hexfloat ? 16 : 10));\n          if (hexexp != 0)\n            {\n              char *dummy;\n              long  exp;\n              exp = strtol (s + hexexp + 1, &dummy, 10);\n              if (exp >= 0)\n                mpf_mul_2exp (f, f, (mpir_ui) exp);\n              else\n                mpf_div_2exp (f, f, - (mpir_ui) exp);\n            }\n        }\n        break;\n      case 'Q':\n        ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base));\n        break;\n      case 'Z':\n        ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base));\n        break;\n      default:\n        ASSERT (0);\n        /*FALLTHRU*/\n        break;\n      }\n    }\n\n done:\n  ASSERT (chars <= width+1);\n  if (chars != width+1)\n    {\n      (*funs->unget) (c, data);\n      TRACE (printf (\"  ungetc %d, to give %d chars\\n\", c, chars-1));\n    }\n  chars--;\n\n  (*__gmp_free_func) (s, s_alloc);\n\n  if (invalid)\n    {\n      TRACE (printf (\"  invalid\\n\"));\n      return -1;\n    }\n\n  TRACE (printf (\"  return %d chars (cf width %d)\\n\", chars, width));\n  return chars;\n}\n\n\n/* Read and discard whitespace, if any.  Return number of chars skipped.\n   Whitespace skipping never provokes the EOF return from __gmp_doscan, so\n   it's not necessary to watch for EOF from funs->get, */\nstatic int\nskip_white (const struct gmp_doscan_funs_t *funs, void *data)\n{\n  int  c;\n  int  ret = 0;\n\n  do\n    {\n      c = (funs->get) (data);\n      ret++;\n    }\n  while (isspace (c));\n\n  (funs->unget) (c, data);\n  ret--;\n\n  TRACE (printf (\"  skip white %d\\n\", ret));\n  return ret;\n}\n\n\nint\n__gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data,\n              const char *orig_fmt, va_list orig_ap)\n{\n  struct gmp_doscan_params_t  param;\n  va_list     ap;\n  char        *alloc_fmt;\n  const char  *fmt, *this_fmt, *end_fmt;\n  size_t      orig_fmt_len, alloc_fmt_size, len;\n  int         new_fields, new_chars;\n  char        fchar;\n  int         fields = 0;\n  int         chars = 0;\n\n  TRACE (printf (\"__gmp_doscan \\\"%s\\\"\\n\", orig_fmt);\n         if (funs->scan == (gmp_doscan_scan_t) sscanf)\n           printf (\"  s=\\\"%s\\\"\\n\", * (const char **) data));\n\n  /* Don't modify orig_ap, if va_list is actually an array and hence call by\n     reference.  It could be argued that it'd be more efficient to leave\n     callers to make a copy if they care, but doing so here is going to be a\n     very small part of the total work, and we may as well keep applications\n     out of trouble.  */\n  va_copy (ap, orig_ap);\n\n  /* Parts of the format string are going to be copied so that a \" %n\" can\n     be appended.  alloc_fmt is some space for that.  orig_fmt_len+4 will be\n     needed if fmt consists of a single \"%\" specifier, but otherwise is an\n     overestimate.  We're not going to be very fast here, so use\n     __gmp_allocate_func rather than TMP_ALLOC.  */\n  orig_fmt_len = strlen (orig_fmt);\n  alloc_fmt_size = orig_fmt_len + 4;\n  alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);\n\n  fmt = orig_fmt;\n  end_fmt = orig_fmt + orig_fmt_len;\n\n  for (;;)\n    {\n    next:\n      fchar = *fmt++;\n\n      if (fchar == '\\0')\n        break;\n\n      if (isspace (fchar))\n        {\n          chars += skip_white (funs, data);\n          continue;\n        }\n\n      if (fchar != '%')\n        {\n          int  c;\n        literal:\n          c = (funs->get) (data);\n          if (c != fchar)\n            {\n              (funs->unget) (c, data);\n              if (c == EOF)\n                {\n                eof_no_match:\n                  if (fields == 0)\n                    fields = EOF;\n                }\n              goto done;\n            }\n          chars++;\n          continue;\n        }\n\n      param.type = '\\0';\n      param.base = 0;    /* for e,f,g,i */\n      param.ignore = 0;\n      param.width = 0;\n\n      this_fmt = fmt-1;\n      TRACE (printf (\"  this_fmt \\\"%s\\\"\\n\", this_fmt));\n\n      for (;;)\n        {\n          ASSERT (fmt <= end_fmt);\n\n          fchar = *fmt++;\n          switch (fchar) {\n\n          case '\\0':  /* unterminated % sequence */\n            ASSERT (0);\n            goto done;\n\n          case '%':   /* literal % */\n            goto literal;\n\n          case '[':   /* character range */\n            fchar = *fmt++;\n            if (fchar == '^')\n              fchar = *fmt++;\n            /* ']' allowed as the first char (possibly after '^') */\n            if (fchar == ']')\n              fchar = *fmt++;\n            for (;;)\n              {\n                ASSERT (fmt <= end_fmt);\n                if (fchar == '\\0')\n                  {\n                    /* unterminated % sequence */\n                    ASSERT (0);\n                    goto done;\n                  }\n                if (fchar == ']')\n                  break;\n                fchar = *fmt++;\n              }\n            /*FALLTHRU*/\n          case 'c':   /* characters */\n          case 's':   /* string of non-whitespace */\n          case 'p':   /* pointer */\n          libc_type:\n            len = fmt - this_fmt;\n            memcpy (alloc_fmt, this_fmt, len);\n            alloc_fmt[len++] = '%';\n            alloc_fmt[len++] = 'n';\n            alloc_fmt[len] = '\\0';\n\n            TRACE (printf (\"  scan \\\"%s\\\"\\n\", alloc_fmt);\n                   if (funs->scan == (gmp_doscan_scan_t) sscanf)\n                     printf (\"  s=\\\"%s\\\"\\n\", * (const char **) data));\n\n            new_chars = -1;\n            if (param.ignore)\n              {\n                new_fields = (*funs->scan) (data, alloc_fmt, &new_chars);\n                ASSERT (new_fields == 0 || new_fields == EOF);\n              }\n            else\n              {\n                new_fields = (*funs->scan) (data, alloc_fmt,\n                                            va_arg (ap, void *), &new_chars);\n                ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF);\n\n                if (new_fields == 0)\n                  goto done;  /* invalid input */\n\n                if (new_fields == 1)\n                  ASSERT (new_chars != -1);\n              }\n            TRACE (printf (\"  new_fields %d   new_chars %d\\n\",\n                           new_fields, new_chars));\n\n            if (new_fields == -1)\n              goto eof_no_match;  /* EOF before anything matched */\n\n            /* Wnder param.ignore, when new_fields==0 we don't know if\n               it's a successful match or an invalid field.  new_chars\n               won't have been assigned if it was an invalid field.  */\n            if (new_chars == -1)\n              goto done;  /* invalid input */\n\n            chars += new_chars;\n            (*funs->step) (data, new_chars);\n\n          increment_fields:\n            if (! param.ignore)\n              fields++;\n            goto next;\n\n          case 'd':   /* decimal */\n          case 'u':   /* decimal */\n            param.base = 10;\n            goto numeric;\n\n          case 'e':   /* float */\n          case 'E':   /* float */\n          case 'f':   /* float */\n          case 'g':   /* float */\n          case 'G':   /* float */\n          case 'i':   /* integer with base marker */\n          numeric:\n            if (param.type != 'F' && param.type != 'Q' && param.type != 'Z')\n              goto libc_type;\n\n            chars += skip_white (funs, data);\n\n            new_chars = gmpscan (funs, data, &param,\n                                 param.ignore ? NULL : va_arg (ap, void*));\n            if (new_chars == -2)\n              goto eof_no_match;\n            if (new_chars == -1)\n              goto done;\n\n            ASSERT (new_chars >= 0);\n            chars += new_chars;\n            goto increment_fields;\n\n          case 'a':   /* glibc allocate string */\n          case '\\'':  /* glibc digit groupings */\n            break;\n\n          case 'F':   /* mpf_t */\n          case 'j':   /* intmax_t */\n          case 'L':   /* long long */\n          case 'q':   /* quad_t */\n          case 'Q':   /* mpq_t */\n          case 't':   /* ptrdiff_t */\n          case 'z':   /* size_t */\n          case 'Z':   /* mpz_t */\n          set_type:\n            param.type = fchar;\n            break;\n\n          case 'h':   /* short or char */\n            if (param.type != 'h')\n              goto set_type;\n            param.type = 'H';   /* internal code for \"hh\" */\n            break;\n\n            goto numeric;\n\n          case 'l':   /* long, long long, double or long double */\n            if (param.type != 'l')\n              goto set_type;\n            param.type = 'L';   /* \"ll\" means \"L\" */\n            break;\n\n          case 'n':\n            if (! param.ignore)\n              {\n                void  *p;\n                p = va_arg (ap, void *);\n                TRACE (printf (\"  store %%n to %p\\n\", p));\n                switch (param.type) {\n                case '\\0': * (int       *) p = chars; break;\n                case 'F':  mpf_set_si ((mpf_ptr) p, (long) chars); break;\n                case 'H':  * (char      *) p = chars; break;\n                case 'h':  * (short     *) p = chars; break;\n#if HAVE_INTMAX_T\n                case 'j':  * (intmax_t  *) p = chars; break;\n#else\n                case 'j':  ASSERT_FAIL (intmax_t not available); break;\n#endif\n                case 'l':  * (long      *) p = chars; break;\n#if HAVE_QUAD_T && HAVE_LONG_LONG\n                case 'q':\n                  ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));\n                  /*FALLTHRU*/\n#else\n                case 'q':  ASSERT_FAIL (quad_t not available); break;\n#endif\n#if HAVE_LONG_LONG\n                case 'L':  * (long long *) p = chars; break;\n#else\n                case 'L':  ASSERT_FAIL (long long not available); break;\n#endif\n                case 'Q':  mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break;\n                case 't':  * (ptrdiff_t *) p = chars; break;\n                case 'z':  * (size_t    *) p = chars; break;\n                case 'Z':  mpz_set_si ((mpz_ptr) p, (long) chars); break;\n                default: ASSERT (0); break;\n                }\n              }\n            goto next;\n\n          case 'o':\n            param.base = 8;\n            goto numeric;\n\n          case 'x':\n          case 'X':\n            param.base = 16;\n            goto numeric;\n\n          case '0': case '1': case '2': case '3': case '4':\n          case '5': case '6': case '7': case '8': case '9':\n            param.width = 0;\n            do {\n              param.width = param.width * 10 + (fchar-'0');\n              fchar = *fmt++;\n            } while (isdigit (fchar));\n            fmt--; /* unget the non-digit */\n            break;\n\n          case '*':\n            param.ignore = 1;\n            break;\n\n          default:\n            /* something invalid in a % sequence */\n            ASSERT (0);\n            goto next;\n          }\n        }\n    }\n\n done:\n  (*__gmp_free_func) (alloc_fmt, alloc_fmt_size);\n  return fields;\n}\n"
  },
  {
    "path": "scanf/fscanf.c",
    "content": "/* gmp_fscanf -- formatted input from a FILE.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\n#if HAVE_STDARG\ngmp_fscanf (FILE *fp, const char *fmt, ...)\n#else\ngmp_fscanf (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n  int      ret;\n#if HAVE_STDARG\n  va_start (ap, fmt);\n#else\n  FILE       *fp;\n  const char *fmt;\n  va_start (ap);\n  fp = va_arg (ap, FILE *);\n  fmt = va_arg (ap, const char *);\n#endif\n\n  ret = __gmp_doscan (&__gmp_fscanf_funs, fp, fmt, ap);\n  va_end (ap);\n  return ret;\n}\n"
  },
  {
    "path": "scanf/fscanffuns.c",
    "content": "/* __gmp_fscanf_funs -- support for formatted input from a FILE.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* SunOS 4 stdio.h doesn't provide prototypes for these */\n#if ! HAVE_DECL_FGETC\nint fgetc(FILE *fp);\n#endif\n#if ! HAVE_DECL_FSCANF\nint fscanf(FILE *fp, const char *fmt, ...);\n#endif\n#if ! HAVE_DECL_UNGETC\nint ungetc(int c, FILE *fp);\n#endif\n\n\nstatic void\nstep (FILE *fp, int n)\n{\n}\n\nconst struct gmp_doscan_funs_t  __gmp_fscanf_funs = {\n  (gmp_doscan_scan_t)  fscanf,\n  (gmp_doscan_step_t)  step,\n  (gmp_doscan_get_t)   fgetc,\n  (gmp_doscan_unget_t) ungetc,\n};\n"
  },
  {
    "path": "scanf/scanf.c",
    "content": "/* gmp_scanf -- formatted input from stdin.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\n#if HAVE_STDARG\ngmp_scanf (const char *fmt, ...)\n#else\ngmp_scanf (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n  int      ret;\n#if HAVE_STDARG\n  va_start (ap, fmt);\n#else\n  const char *fmt;\n  va_start (ap);\n  fmt = va_arg (ap, const char *);\n#endif\n\n  ret = __gmp_doscan (&__gmp_fscanf_funs, stdin, fmt, ap);\n  va_end (ap);\n  return ret;\n}\n"
  },
  {
    "path": "scanf/sscanf.c",
    "content": "/* gmp_sscanf -- formatted input from a string.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\n#if HAVE_STDARG\ngmp_sscanf (const char *s, const char *fmt, ...)\n#else\ngmp_sscanf (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n  int      ret;\n#if HAVE_STDARG\n  va_start (ap, fmt);\n#else\n  const char *s;\n  const char *fmt;\n  va_start (ap);\n  s = va_arg (ap, const char *);\n  fmt = va_arg (ap, const char *);\n#endif\n\n#if SSCANF_WRITABLE_INPUT\n  /* let gmp_vsscanf handle the copying */\n  ret = gmp_vsscanf (s, fmt, ap);\n#else\n  ret = __gmp_doscan (&__gmp_sscanf_funs, (void *) &s, fmt, ap);\n#endif\n  va_end (ap);\n  return ret;\n}\n"
  },
  {
    "path": "scanf/sscanffuns.c",
    "content": "/* __gmp_sscanf_funs -- support for formatted input from a string.\n\n   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST\n   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN\n   FUTURE GNU MP RELEASES.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nstatic int\nscan (const char **sp, const char *fmt, void *p1, void *p2)\n{\n  return sscanf (*sp, fmt, p1, p2);\n}\n\nstatic void\nstep (const char **sp, int n)\n{\n  ASSERT (n >= 0);\n\n  /* shouldn't push us past the the end of the string */\n#if WANT_ASSERT\n  {\n    int  i;\n    for (i = 0; i < n; i++)\n      ASSERT ((*sp)[i] != '\\0');\n  }\n#endif\n\n  (*sp) += n;\n}\n\nstatic int\nget (const char **sp)\n{\n  const char  *s;\n  int  c;\n  s = *sp;\n  c = (unsigned char) *s++;\n  if (c == '\\0')\n    return EOF;\n  *sp = s;\n  return c;\n}\n\nstatic void\nunget (int c, const char **sp)\n{\n  const char  *s;\n  s = *sp;\n  if (c == EOF)\n    {\n      ASSERT (*s == '\\0');\n      return;\n    }\n  s--;\n  ASSERT ((unsigned char) *s == c);\n  *sp = s;\n}\n\nconst struct gmp_doscan_funs_t  __gmp_sscanf_funs = {\n  (gmp_doscan_scan_t)  scan,\n  (gmp_doscan_step_t)  step,\n  (gmp_doscan_get_t)   get,\n  (gmp_doscan_unget_t) unget,\n};\n"
  },
  {
    "path": "scanf/vfscanf.c",
    "content": "/* gmp_vfscanf -- formatted input from a FILE.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\ngmp_vfscanf (FILE *fp, const char *fmt, va_list ap)\n{\n  return __gmp_doscan (&__gmp_fscanf_funs, fp, fmt, ap);\n}\n"
  },
  {
    "path": "scanf/vscanf.c",
    "content": "/* gmp_vscanf -- formatted input from stdin.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stdio.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\ngmp_vscanf (const char *fmt, va_list ap)\n{\n  return __gmp_doscan (&__gmp_fscanf_funs, stdin, fmt, ap);\n}\n"
  },
  {
    "path": "scanf/vsscanf.c",
    "content": "/* gmp_vsscanf -- formatted input from a string.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nint\ngmp_vsscanf (const char *s, const char *fmt, va_list ap)\n{\n#if SSCANF_WRITABLE_INPUT\n  /* We only actually need this if there's standard C types in fmt, and if\n     \"s\" is not already writable, but it's too much trouble to check that,\n     and in any case this writable sscanf input business is only for a few\n     old systems. */\n  size_t size;\n  char   *alloc;\n  int    ret;\n  size = strlen (s) + 1;\n  alloc = (char *) (*__gmp_allocate_func) (size);\n  memcpy (alloc, s, size);\n  s = alloc;\n  ret = __gmp_doscan (&__gmp_sscanf_funs, (void *) &s, fmt, ap);\n  (*__gmp_free_func) (alloc, size);\n  return ret;\n\n#else\n  return __gmp_doscan (&__gmp_sscanf_funs, (void *) &s, fmt, ap);\n#endif\n}\n"
  },
  {
    "path": "strip_fPIC.sh",
    "content": "#!/bin/sh\n#\n# This file was found distributed in numerous places on the \n# web, including, but not limited to NetBSD, which is covered \n# by the three clause BSD license. No accompanying copyright \n# information was found in it. Since the file has been so\n# widely distributed without a copyright notice, I assume\n# the file is actually in the public domain. I have modified\n# it considerably since and hereby place my modifications\n# in the public domain.\n#\n# libtool assumes that the compiler can handle the -fPIC flag\n# This isn't always true (for example, yasm can't handle it)\ncommand=\"\"\nwhile [ $# -gt 0 ]; do\n   case \"$1\" in\n\t\t-DPIC)\n\t\t\tcommand=\"$command -D PIC\"\n\t\t\t;;\n\t\t-fPIC)\n\t\t\t;;\n\t\t-fno-common)\n\t\t\t;;\n\t\t*)\n\t\t\tcommand=\"$command $1\"\n\t\t\t;;\n\t\tesac\n\t\tshift\ndone\necho $command\nexec $command\n"
  },
  {
    "path": "tal-debug.c",
    "content": "/* TMP_ALLOC routines for debugging.\n\nCopyright 2000, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* This method aims to help a malloc debugger find problems.  A linked list\n   of allocated block is kept for TMP_FREE to release.  This is reentrant\n   and thread safe.\n\n   Each TMP_ALLOC is a separate malloced block, so redzones or sentinels\n   applied by a malloc debugger either above or below can guard against\n   accesses outside the allocated area.\n\n   A marker is a \"struct tmp_debug_t *\" so that TMP_DECL can initialize it\n   to NULL and we can detect TMP_ALLOC without TMP_MARK.\n\n   It will work to realloc an MPZ_TMP_INIT variable, but when TMP_FREE comes\n   to release the memory it will have the old size, thereby triggering an\n   error from tests/memory.c.\n\n   Possibilities:\n\n   It'd be possible to keep a global list of active \"struct tmp_debug_t\"\n   records, so at the end of a program any TMP leaks could be printed.  But\n   if only a couple of routines are under test at any one time then the\n   likely culprit should be easy enough to spot.  */\n\n\nvoid\n__gmp_tmp_debug_mark (const char *file, int line,\n                      struct tmp_debug_t **markp, struct tmp_debug_t *mark,\n                      const char *decl_name, const char *mark_name)\n{\n  if (strcmp (mark_name, decl_name) != 0)\n    {\n      __gmp_assert_header (file, line);\n      fprintf (stderr, \"GNU MP: TMP_MARK(%s) but TMP_DECL(%s) is in scope\\n\",\n               mark_name, decl_name);\n      abort ();\n    }\n\n  if (*markp != NULL)\n    {\n      __gmp_assert_header (file, line);\n      fprintf (stderr, \"GNU MP: Repeat of TMP_MARK(%s)\\n\", mark_name);\n      if (mark->file != NULL && mark->file[0] != '\\0' && mark->line != -1)\n        {\n          __gmp_assert_header (mark->file, mark->line);\n          fprintf (stderr, \"previous was here\\n\");\n        }\n      abort ();\n    }\n\n  *markp = mark;\n  mark->file = file;\n  mark->line = line;\n  mark->list = NULL;\n}\n\nvoid *\n__gmp_tmp_debug_alloc (const char *file, int line, int dummy,\n                       struct tmp_debug_t **markp,\n                       const char *decl_name, size_t size)\n{\n  struct tmp_debug_t        *mark = *markp;\n  struct tmp_debug_entry_t  *p;\n\n  ASSERT_ALWAYS (size >= 1);\n\n  if (mark == NULL)\n    {\n      __gmp_assert_header (file, line);\n      fprintf (stderr, \"GNU MP: TMP_ALLOC without TMP_MARK(%s)\\n\", decl_name);\n      abort ();\n    }\n\n  p = __GMP_ALLOCATE_FUNC_TYPE (1, struct tmp_debug_entry_t);\n  p->size = size;\n  p->block = (*__gmp_allocate_func) (size);\n  p->next = mark->list;\n  mark->list = p;\n  return p->block;\n}\n\nvoid\n__gmp_tmp_debug_free (const char *file, int line, int dummy,\n                      struct tmp_debug_t **markp,\n                      const char *decl_name, const char *free_name)\n{\n  struct tmp_debug_t        *mark = *markp;\n  struct tmp_debug_entry_t  *p, *next;\n\n  if (mark == NULL)\n    {\n      __gmp_assert_header (file, line);\n      fprintf (stderr, \"GNU MP: TMP_FREE(%s) without TMP_MARK(%s)\\n\",\n               free_name, decl_name);\n      abort ();\n    }\n\n  if (strcmp (free_name, decl_name) != 0)\n    {\n      __gmp_assert_header (file, line);\n      fprintf (stderr, \"GNU MP: TMP_FREE(%s) when TMP_DECL(%s) is in scope\\n\",\n               free_name, decl_name);\n      abort ();\n    }\n\n  p = mark->list;\n  while (p != NULL)\n    {\n      next = p->next;\n      (*__gmp_free_func) (p->block, p->size);\n      __GMP_FREE_FUNC_TYPE (p, 1, struct tmp_debug_entry_t);\n      p = next;\n    }\n\n  *markp = NULL;\n}\n"
  },
  {
    "path": "tal-notreent.c",
    "content": "/* Stack allocation routines.  This is intended for machines without support\n   for the `alloca' function.\n\nCopyright 1996, 1997, 1999, 2000, 2001, 2006 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\nstruct tmp_stack\n{\n  void *end;\n  void *alloc_point;\n  struct tmp_stack *prev;\n};\ntypedef struct tmp_stack tmp_stack;\n\n\nstatic unsigned long max_total_allocation = 0;\nstatic unsigned long current_total_allocation = 0;\n\nstatic tmp_stack xxx = {&xxx, &xxx, 0};\nstatic tmp_stack *current = &xxx;\n\n/* The rounded size of the header of each allocation block.  */\n#define HSIZ   ROUND_UP_MULTIPLE (sizeof (tmp_stack), __TMP_ALIGN)\n\n\n/* Allocate a block of exactly <size> bytes.  This should only be called\n   through the TMP_ALLOC macro, which takes care of rounding/alignment.  */\nvoid *\n__gmp_tmp_alloc (unsigned long size)\n{\n  void *that;\n\n  ASSERT ((size % __TMP_ALIGN) == 0);\n  ASSERT (((unsigned) current->alloc_point % __TMP_ALIGN) == 0);\n\n  if (size > (char *) current->end - (char *) current->alloc_point)\n    {\n      void *chunk;\n      tmp_stack *header;\n      unsigned long chunk_size;\n      unsigned long now;\n\n      /* Allocate a chunk that makes the total current allocation somewhat\n\t larger than the maximum allocation ever.  If size is very large, we\n\t allocate that much.  */\n\n      now = current_total_allocation + size;\n      if (now > max_total_allocation)\n\t{\n\t  /* We need more temporary memory than ever before.  Increase\n\t     for future needs.  */\n\t  now = (now * 3 / 2 + __TMP_ALIGN - 1) & -__TMP_ALIGN;\n\t  chunk_size = now - current_total_allocation + HSIZ;\n\t  current_total_allocation = now;\n\t  max_total_allocation = current_total_allocation;\n\t}\n      else\n\t{\n\t  chunk_size = max_total_allocation - current_total_allocation + HSIZ;\n\t  current_total_allocation = max_total_allocation;\n\t}\n\n      chunk = (*__gmp_allocate_func) (chunk_size);\n      header = (tmp_stack *) chunk;\n      header->end = (char *) chunk + chunk_size;\n      header->alloc_point = (char *) chunk + HSIZ;\n      header->prev = current;\n      current = header;\n    }\n\n  that = current->alloc_point;\n  current->alloc_point = (char *) that + size;\n  ASSERT (((unsigned) that % __TMP_ALIGN) == 0);\n  return that;\n}\n\n/* Typically called at function entry.  <mark> is assigned so that\n   __gmp_tmp_free can later be used to reclaim all subsequently allocated\n   storage.  */\nvoid\n__gmp_tmp_mark (struct tmp_marker *mark)\n{\n  mark->which_chunk = current;\n  mark->alloc_point = current->alloc_point;\n}\n\n/* Free everything allocated since <mark> was assigned by __gmp_tmp_mark */\nvoid\n__gmp_tmp_free (struct tmp_marker *mark)\n{\n  while (mark->which_chunk != current)\n    {\n      tmp_stack *tmp;\n\n      tmp = current;\n      current = tmp->prev;\n      current_total_allocation -= (((char *) (tmp->end) - (char *) tmp) - HSIZ);\n      (*__gmp_free_func) (tmp, (char *) tmp->end - (char *) tmp);\n    }\n  current->alloc_point = mark->alloc_point;\n}\n"
  },
  {
    "path": "tal-reent.c",
    "content": "/* TMP_ALLOC routines using malloc in a reentrant fashion.\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Each TMP_ALLOC uses __gmp_allocate_func to get a block of memory of the\n   size requested, plus a header at the start which is used to hold the\n   blocks on a linked list in the marker variable, ready for TMP_FREE to\n   release.\n\n   Callers should try to do multiple allocs with one call, in the style of\n   TMP_ALLOC_LIMBS_2 if it's easy to arrange, since that will keep down the\n   number of separate malloc calls.\n\n   Enhancements:\n\n   Could inline both TMP_ALLOC and TMP_FREE, though TMP_ALLOC would need the\n   compiler to have \"inline\" since it returns a value.  The calls to malloc\n   will be slow though, so it hardly seems worth worrying about one extra\n   level of function call.  */\n\n\n#define HSIZ   ROUND_UP_MULTIPLE (sizeof (struct tmp_reentrant_t), __TMP_ALIGN)\n\nvoid *\n__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **markp, size_t size)\n{\n  char    *p;\n  size_t  total_size;\n\n#define P   ((struct tmp_reentrant_t *) p)\n\n  total_size = size + HSIZ;\n  p = (*__gmp_allocate_func) (total_size);\n  P->size = total_size;\n  P->next = *markp;\n  *markp = P;\n  return p + HSIZ;\n}\n\nvoid\n__gmp_tmp_reentrant_free (struct tmp_reentrant_t *mark)\n{\n  struct tmp_reentrant_t  *next;\n  \n  while (mark != NULL)\n    {\n      next = mark->next;\n      (*__gmp_free_func) ((char *) mark, mark->size);\n      mark = next;\n    }\n}\n"
  },
  {
    "path": "tests/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nSUBDIRS = . devel mpn fft mpz mpq mpf rand misc cxx\n\ninclude ../mpn/Makeasm.am\n\nAM_CPPFLAGS = -I$(top_srcdir)\nLDADD = libtests.la $(top_builddir)/libmpir.la\n\ncheck_LTLIBRARIES = libtests.la\n\nEXTRA_libtests_la_SOURCES = x86_64call.as x86_64check.c x86call.asm x86check.c\nlibtests_la_SOURCES = tests.h  memory.c misc.c refmpf.c refmpn.c refmpq.c refmpz.c spinner.c trace.c\nlibtests_la_DEPENDENCIES = @CALLING_CONVENTIONS_OBJS@\nlibtests_la_LIBADD = $(libtests_la_DEPENDENCIES) $(top_builddir)/libmpir.la $(LIBM)\n\ncheck_PROGRAMS = t-bswap t-constants t-count_zeros t-gmpmax t-hightomask t-modlinv t-parity t-popc t-sub \nTESTS = $(check_PROGRAMS)\n\nTEST_SUITE_ALL_LOG = testsuite-all.log\n\n$(TEST_SUITE_ALL_LOG): $(TEST_SUITE_LOG) $(SUBDIRS)\n\t@rm -rf $(TEST_SUITE_ALL_LOG); \\\n\tlist='$(SUBDIRS)'; for subdir in $$list; do \\\n\t  cat $$subdir/$(TEST_SUITE_LOG) >> $(TEST_SUITE_ALL_LOG) 2>&1; \\\n\tdone\n"
  },
  {
    "path": "tests/cxx/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\n# LDADD has an explicit -L of $(top_builddir)/.libs for the benefit of gcc\n# 3.2 on itanium2-hp-hpux11.22.  Without this option, the libgmp.sl.6\n# required by libgmpxx.sl (ie. in its NEEDED records) is not found by the\n# linker.  FIXME: Presumably libtool should do something about this itself.\n#\nAM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/tests\nLDADD = -L$(top_builddir)/.libs \\\n  $(top_builddir)/tests/libtests.la \\\n  $(top_builddir)/libmpirxx.la \\\n  $(top_builddir)/libmpir.la\n\nif WANT_CXX\ncheck_PROGRAMS = t-assign t-binary t-cast t-constr t-headers t-istream t-locale t-misc t-ops t-ostream t-prec t-rand t-ternary t-unary \nTESTS = $(check_PROGRAMS)\nendif\n\nt_assign_SOURCES  = t-assign.cc\nt_binary_SOURCES  = t-binary.cc\nt_cast_SOURCES    = t-cast.cc\nt_constr_SOURCES  = t-constr.cc\nt_headers_SOURCES = t-headers.cc\nt_istream_SOURCES = t-istream.cc\nt_locale_SOURCES  = t-locale.cc clocale.c\nt_misc_SOURCES    = t-misc.cc\nt_ops_SOURCES     = t-ops.cc\nt_ostream_SOURCES = t-ostream.cc\nt_prec_SOURCES    = t-prec.cc\nt_rand_SOURCES    = t-rand.cc\nt_ternary_SOURCES = t-ternary.cc\nt_unary_SOURCES   = t-unary.cc\n\n$(top_builddir)/tests/libtests.la:\n\tcd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la\n\n\n# Libtool (1.5) somehow botches its uninstalled shared library setups on\n# OpenBSD 3.2, making the C++ test programs here fail.  libgmpxx.so ends up\n# with a NEEDED record asking for ./.libs/libgmp.so.N, but the loader can't\n# find that unless it exists in the current directory.\n#\n# FIXME: Clearly libtool ought to handle this itself, in which case the hack\n# here can be removed.\n#\n# Note this fix applies only when running \"make check\".  The cp here should\n# be done manually if just one program is to be built and run.\n#\nTESTS_ENVIRONMENT = cp $(top_builddir)/.libs/libmpir.so.* .libs 2>/dev/null || true;\n"
  },
  {
    "path": "tests/cxx/clocale.c",
    "content": "/* Manipulable localeconv and nl_langinfo.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#if HAVE_NL_TYPES_H\n#include <nl_types.h>  /* for nl_item */\n#endif\n\n#if HAVE_LANGINFO_H\n#include <langinfo.h>  /* for nl_langinfo */\n#endif\n\n#if HAVE_LOCALE_H\n#include <locale.h>    /* for lconv */\n#endif\n\n\n/* Replace the libc localeconv and nl_langinfo with ones we can manipulate.\n\n   This is done in a C file since if it was in a C++ file then we'd have to\n   match the \"throw\" or lack thereof declared for localeconv in <locale.h>.\n   g++ 3.2 gives an error about mismatched throws under \"-pedantic\", other\n   C++ compilers may very possibly do so too.  */\n\nextern char point_string[];\n\n#if HAVE_LOCALECONV && ! (defined(__MINGW32__) || defined(_MSC_VER))\nstruct lconv *\nlocaleconv (void)\n{\n  static struct lconv  l;\n  l.decimal_point = point_string;\n  return &l;\n}\n#endif\n\n#if HAVE_NL_LANGINFO\nchar *\nnl_langinfo (nl_item n)\n{\n  return point_string;\n}\n#endif\n"
  },
  {
    "path": "tests/cxx/t-assign.cc",
    "content": "/* Test mp*_class assignment operators.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <iostream>\n#include <string>\n\n#include \"mpir.h\"\n#include \"mpirxx.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nusing namespace std;\n\n\nvoid\ncheck_mpz (void)\n{\n  // operator=(const mpz_class &)\n  {\n    mpz_class a(123), b;\n    b = a; ASSERT_ALWAYS(b == 123);\n  }\n\n  // template <class T, class U> operator=(const __gmp_expr<T, U> &)\n  // not tested here, see t-unary.cc, t-binary.cc\n\n  // operator=(signed char)\n  {\n    signed char a = -127;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == -127);\n  }\n\n  // operator=(unsigned char)\n  {\n    unsigned char a = 255;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == 255);\n  }\n\n  // either signed or unsigned char, machine dependent\n  {\n    mpz_class a;\n    a = 'A'; ASSERT_ALWAYS(a == 65);\n  }\n  {\n    mpz_class a;\n    a = 'z'; ASSERT_ALWAYS(a == 122);\n  }\n\n  // operator=(signed int)\n  {\n    signed int a = 0;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == 0);\n  }\n  {\n    signed int a = -123;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == -123);\n  }\n  {\n    signed int a = 32767;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == 32767);\n  }\n\n  // operator=(unsigned int)\n  {\n    unsigned int a = 65535u;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == 65535u);\n  }\n\n  // operator=(signed short int)\n  {\n    signed short int a = -12345;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == -12345);\n  }\n\n  // operator=(unsigned short int)\n  {\n    unsigned short int a = 54321u;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == 54321u);\n  }\n\n  // operator=(signed long int)\n  {\n    signed long int a = -1234567890L;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == -1234567890L);\n  }\n\n  // operator=(unsigned long int)\n  {\n    unsigned long int a = 3456789012UL;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == 3456789012UL);\n  }\n\n#ifdef MPIRXX_HAVE_LLONG\n  // operator=(unsigned long long int)\n  {\n    unsigned long long int a = 0x1234567812345678ULL;\n    mpz_class b;\n    b = a; \n    ASSERT_ALWAYS(b == 0x1234567812345678ULL);\n  }\n\n  // operator=(unsigned long long int)\n  {\n    long long int a = 0xfedcba9876543210ULL;\n    mpz_class b;\n    b = a; \n    ASSERT_ALWAYS(b == (mpir_si)0xfedcba9876543210ULL);\n  }\n#endif\n\n  // operator=(float)\n  {\n    float a = 123.0;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == 123);\n  }\n\n  // operator=(double)\n  {\n    double a = 0.0;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == 0);\n  }\n  {\n    double a = -12.375;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == -12);\n  }\n  {\n    double a = 6.789e+3;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == 6789);\n  }\n  {\n    double a = 9.375e-1;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == 0);\n  }\n\n  // operator=(long double)\n  // currently not implemented\n\n  // operator=(const char *)\n  {\n    const char *a = \"1234567890\";\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == 1234567890L);\n  }\n\n  // operator=(const std::string &)\n  {\n    string a(\"1234567890\");\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == 1234567890L);\n  }\n\n  // operator=(const char *) with invalid\n  {\n    try {\n      const char *a = \"abc\";\n      mpz_class b;\n      b = a;\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // operator=(const std::string &) with invalid\n  {\n    try {\n      string a(\"def\");\n      mpz_class b;\n      b = a;\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n}\n\nvoid\ncheck_mpq (void)\n{\n  // operator=(const mpq_class &)\n  {\n    mpq_class a(1, 2), b;\n    b = a; ASSERT_ALWAYS(b == 0.5);\n  }\n\n  // template <class T, class U> operator=(const __gmp_expr<T, U> &)\n  // not tested here, see t-unary.cc, t-binary.cc\n\n  // operator=(signed char)\n  {\n    signed char a = -127;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == -127);\n  }\n\n  // operator=(unsigned char)\n  {\n    unsigned char a = 255;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == 255);\n  }\n\n  // either signed or unsigned char, machine dependent\n  {\n    mpq_class a;\n    a = 'A'; ASSERT_ALWAYS(a == 65);\n  }\n  {\n    mpq_class a;\n    a = 'z'; ASSERT_ALWAYS(a == 122);\n  }\n\n  // operator=(signed int)\n  {\n    signed int a = 0;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == 0);\n  }\n  {\n    signed int a = -123;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == -123);\n  }\n  {\n    signed int a = 32767;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == 32767);\n  }\n\n  // operator=(unsigned int)\n  {\n    unsigned int a = 65535u;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == 65535u);\n  }\n\n  // operator=(signed short int)\n  {\n    signed short int a = -12345;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == -12345);\n  }\n\n  // operator=(unsigned short int)\n  {\n    unsigned short int a = 54321u;\n    mpz_class b;\n    b = a; ASSERT_ALWAYS(b == 54321u);\n  }\n\n  // operator=(signed long int)\n  {\n    signed long int a = -1234567890L;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == -1234567890L);\n  }\n\n  // operator=(unsigned long int)\n  {\n    unsigned long int a = 3456789012UL;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == 3456789012UL);\n  }\n\n  // operator=(float)\n  {\n    float a = 123.0;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == 123);\n  }\n\n  // operator=(double)\n  {\n    double a = 0.0;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == 0);\n  }\n  {\n    double a = -12.375;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == -12.375);\n  }\n  {\n    double a = 6.789e+3;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == 6789);\n  }\n  {\n    double a = 9.375e-1;\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == 0.9375);\n  }\n\n  // operator=(long double)\n  // currently not implemented\n\n  // operator=(const char *)\n  {\n    const char *a = \"1234567890\";\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == 1234567890L);\n  }\n\n  // operator=(const std::string &)\n  {\n    string a(\"1234567890\");\n    mpq_class b;\n    b = a; ASSERT_ALWAYS(b == 1234567890L);\n  }\n\n  // operator=(const char *) with invalid\n  {\n    try {\n      const char *a = \"abc\";\n      mpq_class b;\n      b = a;\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // operator=(const std::string &) with invalid\n  {\n    try {\n      string a(\"def\");\n      mpq_class b;\n      b = a;\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n}\n\nvoid\ncheck_mpf (void)\n{\n  // operator=(const mpf_class &)\n  {\n    mpf_class a(123), b;\n    b = a; ASSERT_ALWAYS(b == 123);\n  }\n\n  // template <class T, class U> operator=(const __gmp_expr<T, U> &)\n  // not tested here, see t-unary.cc, t-binary.cc\n\n  // operator=(signed char)\n  {\n    signed char a = -127;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == -127);\n  }\n\n  // operator=(unsigned char)\n  {\n    unsigned char a = 255;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == 255);\n  }\n\n  // either signed or unsigned char, machine dependent\n  {\n    mpf_class a;\n    a = 'A'; ASSERT_ALWAYS(a == 65);\n  }\n  {\n    mpf_class a;\n    a = 'z'; ASSERT_ALWAYS(a == 122);\n  }\n\n  // operator=(signed int)\n  {\n    signed int a = 0;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == 0);\n  }\n  {\n    signed int a = -123;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == -123);\n  }\n  {\n    signed int a = 32767;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == 32767);\n  }\n\n  // operator=(unsigned int)\n  {\n    unsigned int a = 65535u;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == 65535u);\n  }\n\n  // operator=(signed short int)\n  {\n    signed short int a = -12345;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == -12345);\n  }\n\n  // operator=(unsigned short int)\n  {\n    unsigned short int a = 54321u;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == 54321u);\n  }\n\n  // operator=(signed long int)\n  {\n    signed long int a = -1234567890L;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == -1234567890L);\n  }\n\n  // operator=(unsigned long int)\n  {\n    unsigned long int a = 3456789012UL;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == 3456789012UL);\n  }\n\n  // operator=(float)\n  {\n    float a = 123.0;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == 123);\n  }\n\n  // operator=(double)\n  {\n    double a = 0.0;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == 0);\n  }\n  {\n    double a = -12.375;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == -12.375);\n  }\n  {\n    double a = 6.789e+3;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == 6789);\n  }\n  {\n    double a = 9.375e-1;\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == 0.9375);\n  }\n\n  // operator=(long double)\n  // currently not implemented\n\n  // operator=(const char *)\n  {\n    const char *a = \"1234567890\";\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == 1234567890L);\n  }\n\n  // operator=(const std::string &)\n  {\n    string a(\"1234567890\");\n    mpf_class b;\n    b = a; ASSERT_ALWAYS(b == 1234567890L);\n  }\n\n  // operator=(const char *) with invalid\n  {\n    try {\n      const char *a = \"abc\";\n      mpf_class b;\n      b = a;\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // operator=(const std::string &) with invalid\n  {\n    try {\n      string a(\"def\");\n      mpf_class b;\n      b = a;\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n}\n\n\n\nint\nmain (void)\n{\n  tests_start();\n\n  check_mpz();\n  check_mpq();\n  check_mpf();\n\n  tests_end();\n  return 0;\n}\n"
  },
  {
    "path": "tests/cxx/t-binary.cc",
    "content": "/* Test mp*_class binary expressions.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <iostream>\n\n#include \"mpir.h\"\n#include \"mpirxx.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nusing namespace std;\n\n\nvoid\ncheck_mpz (void)\n{\n  // template <class T, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, T>, Op> >\n  {\n    mpz_class a(1), b(2);\n    mpz_class c(a + b); ASSERT_ALWAYS(c == 3);\n  }\n  {\n    mpz_class a(3), b(4);\n    mpz_class c;\n    c = a * b; ASSERT_ALWAYS(c == 12);\n  }\n  {\n    mpz_class a(5), b(3);\n    mpz_class c;\n    c = a % b; ASSERT_ALWAYS(c == 2);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, U, Op> >\n  {\n    mpz_class a(1);\n    signed int b = 3;\n    mpz_class c(a - b); ASSERT_ALWAYS(c == -2);\n  }\n  {\n    mpz_class a(-8);\n    unsigned int b = 2;\n    mpz_class c;\n    c = a / b; ASSERT_ALWAYS(c == -4);\n  }\n  {\n    mpz_class a(2);\n    double b = 3.0;\n    mpz_class c(a + b); ASSERT_ALWAYS(c == 5);\n  }\n  {\n    mpz_class a(4);\n    mpz_class b;\n    b = a + 0; ASSERT_ALWAYS(b == 4);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, T>, Op> >\n  {\n    mpz_class a(3);\n    signed int b = 9;\n    mpz_class c(b / a); ASSERT_ALWAYS(c == 3);\n  }\n\n  // template <class T, class U, class V, class W, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >\n  // type of result can't be mpz\n\n  // template <class T, class U, class V, class W, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >\n  // type of result can't be mpz\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, U>, Op> >\n  {\n    mpz_class a(3), b(4);\n    mpz_class c(a * (-b)); ASSERT_ALWAYS(c == -12);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, T>, Op> >\n  {\n    mpz_class a(3), b(2), c(1);\n    mpz_class d;\n    d = (a % b) + c; ASSERT_ALWAYS(d == 2);\n  }\n\n  // template <class T, class U, class V, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, V, Op> >\n  {\n    mpz_class a(-5);\n    unsigned int b = 2;\n    mpz_class c((-a) << b); ASSERT_ALWAYS(c == 20);\n  }\n  {\n    mpz_class a(5), b(-4);\n    signed int c = 3;\n    mpz_class d;\n    d = (a * b) >> c; ASSERT_ALWAYS(d == -3);\n  }\n\n  // template <class T, class U, class V, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, V>, Op> >\n  {\n    mpz_class a(2), b(4);\n    double c = 6;\n    mpz_class d(c / (a - b)); ASSERT_ALWAYS(d == -3);\n  }\n  {\n    mpz_class a(3), b(2);\n    double c = 1;\n    mpz_class d;\n    d = c + (a + b); ASSERT_ALWAYS(d == 6);\n  }\n\n  // template <class T, class U, class V, class W, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >\n  // type of result can't be mpz\n\n  // template <class T, class U, class V, class W, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >\n  // type of result can't be mpz\n\n  // template <class T, class U, class V, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, V>, Op> >\n  {\n    mpz_class a(3), b(5), c(7);\n    mpz_class d;\n    d = (a - b) * (-c); ASSERT_ALWAYS(d == 14);\n  }\n}\n\nvoid\ncheck_mpq (void)\n{\n  // template <class T, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, T>, Op> >\n  {\n    mpq_class a(1, 2), b(3, 4);\n    mpq_class c(a + b); ASSERT_ALWAYS(c == 1.25);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, U, Op> >\n  {\n    mpq_class a(1, 2);\n    signed int b = 3;\n    mpq_class c(a - b); ASSERT_ALWAYS(c == -2.5);\n  }\n  {\n    mpq_class a(1, 2);\n    mpq_class b;\n    b = a + 0; ASSERT_ALWAYS(b == 0.5);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, T>, Op> >\n  {\n    mpq_class a(2, 3);\n    signed int b = 4;\n    mpq_class c;\n    c = b / a; ASSERT_ALWAYS(c == 6);\n  }\n\n  // template <class T, class U, class V, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<U, V>, Op> >\n  {\n    mpq_class a(1, 2);\n    mpz_class b(1);\n    mpq_class c(a + b); ASSERT_ALWAYS(c == 1.5);\n  }\n  {\n    mpq_class a(2, 3);\n    mpz_class b(1);\n    double c = 2.0;\n    mpq_class d;\n    d = a * (b + c); ASSERT_ALWAYS(d == 2);\n  }\n\n  // template <class T, class U, class V, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, T>, Op> >\n  {\n    mpq_class a(2, 3);\n    mpz_class b(4);\n    mpq_class c(b / a); ASSERT_ALWAYS(c == 6);\n  }\n  {\n    mpq_class a(2, 3);\n    mpz_class b(1), c(4);\n    mpq_class d;\n    d = (b - c) * a; ASSERT_ALWAYS(d == -2);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, U>, Op> >\n  {\n    mpq_class a(1, 3), b(3, 4);\n    mpq_class c;\n    c = a * (-b); ASSERT_ALWAYS(c == -0.25);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, T>, Op> >\n  {\n    mpq_class a(1, 3), b(2, 3), c(1, 4);\n    mpq_class d((a / b) + c); ASSERT_ALWAYS(d == 0.75);\n  }\n\n  // template <class T, class U, class V, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, V, Op> >\n  {\n    mpq_class a(3, 8);\n    unsigned int b = 4;\n    mpq_class c((-a) << b); ASSERT_ALWAYS(c == -6);\n  }\n\n  // template <class T, class U, class V, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, V>, Op> >\n  {\n    mpq_class a(1, 2), b(1, 4);\n    double c = 6.0;\n    mpq_class d;\n    d = c / (a + b); ASSERT_ALWAYS(d == 8);\n  }\n\n  // template <class T, class U, class V, class W, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >\n  {\n    mpq_class a(1, 2), b(1, 4);\n    mpz_class c(1);\n    mpq_class d((a + b) - c); ASSERT_ALWAYS(d == -0.25);\n  }\n  {\n    mpq_class a(1, 3), b(3, 2);\n    mpz_class c(2), d(4);\n    mpq_class e;\n    e = (a * b) / (c - d); ASSERT_ALWAYS(e == -0.25);\n  }\n\n  // template <class T, class U, class V, class W, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >\n  {\n    mpq_class a(1, 3), b(3, 4);\n    mpz_class c(-3);\n    mpq_class d(c * (a * b)); ASSERT_ALWAYS(d == -0.75);\n  }\n  {\n    mpq_class a(1, 3), b(3, 5);\n    mpz_class c(6);\n    signed int d = 4;\n    mpq_class e;\n    e = (c % d) / (a * b); ASSERT_ALWAYS(e == 10);\n  }\n\n  // template <class T, class U, class V, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, V>, Op> >\n  {\n    mpq_class a(1, 3), b(3, 4), c(2, 5);\n    mpq_class d;\n    d = (a * b) / (-c); ASSERT_ALWAYS(d == -0.625);\n  }\n}\n\nvoid\ncheck_mpf (void)\n{\n  // template <class T, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, T>, Op> >\n  {\n    mpf_class a(1), b(2);\n    mpf_class c(a + b); ASSERT_ALWAYS(c == 3);\n  }\n  {\n    mpf_class a(1.5), b(6);\n    mpf_class c;\n    c = a / b; ASSERT_ALWAYS(c == 0.25);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, U, Op> >\n  {\n    mpf_class a(1);\n    signed int b = -2;\n    mpf_class c(a - b); ASSERT_ALWAYS(c == 3);\n  }\n  {\n    mpf_class a(2);\n    mpf_class b;\n    b = a + 0; ASSERT_ALWAYS(b == 2);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, T>, Op> >\n  {\n    mpf_class a(2);\n    unsigned int b = 3;\n    mpf_class c;\n    c = b / a; ASSERT_ALWAYS(c == 1.5);\n  }\n\n  // template <class T, class U, class V, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<U, V>, Op> >\n  {\n    mpf_class a(2);\n    mpz_class b(3);\n    mpf_class c(a - b); ASSERT_ALWAYS(c == -1);\n  }\n  {\n    mpf_class a(3);\n    mpz_class b(2), c(1);\n    mpf_class d;\n    d = a * (b + c); ASSERT_ALWAYS(d == 9);\n  }\n\n  // template <class T, class U, class V, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, T>, Op> >\n  {\n    mpf_class a(6);\n    mpq_class b(3, 4);\n    mpf_class c(a * b); ASSERT_ALWAYS(c == 4.5);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, U>, Op> >\n  {\n    mpf_class a(2), b(-3);\n    mpf_class c;\n    c = a * (-b); ASSERT_ALWAYS(c == 6);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, T>, Op> >\n  {\n    mpf_class a(3), b(4), c(5);\n    mpf_class d;\n    d = (a / b) - c; ASSERT_ALWAYS(d == -4.25);\n  }\n\n  // template <class T, class U, class V, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, V, Op> >\n  {\n    mpf_class a(3);\n    unsigned int b = 2;\n    mpf_class c((-a) >> b); ASSERT_ALWAYS(c == -0.75);\n  }\n\n  // template <class T, class U, class V, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, V>, Op> >\n  {\n    mpf_class a(2), b(3);\n    double c = 5.0;\n    mpf_class d;\n    d = c / (a + b); ASSERT_ALWAYS(d == 1);\n  }\n\n  // template <class T, class U, class V, class W, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >\n  {\n    mpf_class a(2), b(3);\n    mpz_class c(4);\n    mpf_class d;\n    d = (a + b) * c; ASSERT_ALWAYS(d == 20);\n  }\n  {\n    mpf_class a(2), b(3);\n    mpq_class c(1, 2), d(1, 4);\n    mpf_class e;\n    e = (a * b) / (c + d); ASSERT_ALWAYS(e == 8);\n  }\n\n  // template <class T, class U, class V, class W, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >\n  {\n    mpf_class a(1), b(2);\n    mpq_class c(3);\n    mpf_class d(c / (a + b)); ASSERT_ALWAYS(d == 1);\n  }\n  {\n    mpf_class a(1);\n    mpz_class b(2);\n    mpq_class c(3, 4);\n    mpf_class d;\n    d = (-c) + (a + b); ASSERT_ALWAYS(d == 2.25);\n  }\n\n  // template <class T, class U, class V, class Op>\n  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, V>, Op> >\n  {\n    mpf_class a(1), b(2), c(3);\n    mpf_class d;\n    d = (a + b) * (-c); ASSERT_ALWAYS(d == -9);\n  }\n}\n\n\n\nint\nmain (void)\n{\n  tests_start();\n\n  check_mpz();\n  check_mpq();\n  check_mpf();\n\n  tests_end();\n  return 0;\n}\n"
  },
  {
    "path": "tests/cxx/t-cast.cc",
    "content": "/* Test g++ -Wold-style-cast cleanliness.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"mpirxx.h\"\n\n\n/* This code doesn't do anything when run, it just expands various C macros\n   to see that they don't trigger compile-time warnings from g++\n   -Wold-style-cast.  This option isn't used in a normal build, it has to be\n   added manually to make this test worthwhile.  */\n\nvoid\ncheck_macros (void)\n{\n  mpz_t          z;\n  long           l = 123;\n  unsigned long  u = 456;\n  int            i;\n  mp_limb_t      limb;\n\n  mpz_init_set_ui (z, 0L);\n  i = mpz_odd_p (z);\n  i = mpz_even_p (z);\n  i = mpz_cmp_si (z, l);\n  i = mpz_cmp_ui (z, u);\n  mpz_clear (z);\n\n  limb = GMP_NUMB_MASK;\n  limb = GMP_NUMB_MAX;\n  limb = GMP_NAIL_MASK;\n\n  mpn_divexact_by3 (&limb, &limb, 1);\n}\n\nint\nmain (void)\n{\n  return 0;\n}\n"
  },
  {
    "path": "tests/cxx/t-constr.cc",
    "content": "/* Test mp*_class constructors.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <iostream>\n#include <string>\n\n#include \"mpir.h\"\n#include \"mpirxx.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nusing namespace std;\n\n\nvoid\ncheck_mpz (void)\n{\n  // mpz_class()\n  {\n    mpz_class a; ASSERT_ALWAYS(a == 0);\n  }\n\n  // mpz_class(const mpz_class &)\n  // see below\n\n  // template <class T, class U> mpz_class(const __gmp_expr<T, U> &)\n  // not tested here, see t-unary.cc, t-binary.cc\n\n  // mpz_class(signed char)\n  {\n    signed char a = -127;\n    mpz_class b(a); ASSERT_ALWAYS(b == -127);\n  }\n\n  // mpz_class(unsigned char)\n  {\n    unsigned char a = 255;\n    mpz_class b(a); ASSERT_ALWAYS(b == 255);\n  }\n\n  // either signed or unsigned char, machine dependent\n  {\n    mpz_class a('A'); ASSERT_ALWAYS(a == 65);\n  }\n  {\n    mpz_class a('z'); ASSERT_ALWAYS(a == 122);\n  }\n\n  // mpz_class(signed int)\n  {\n    signed int a = 0;\n    mpz_class b(a); ASSERT_ALWAYS(b == 0);\n  }\n  {\n    signed int a = -123;\n    mpz_class b(a); ASSERT_ALWAYS(b == -123);\n  }\n  {\n    signed int a = 4567;\n    mpz_class b(a); ASSERT_ALWAYS(b == 4567);\n  }\n\n  // mpz_class(unsigned int)\n  {\n    unsigned int a = 890;\n    mpz_class b(a); ASSERT_ALWAYS(b == 890);\n  }\n\n  // mpz_class(signed short int)\n  {\n    signed short int a = -12345;\n    mpz_class b(a); ASSERT_ALWAYS(b == -12345);\n  }\n\n  // mpz_class(unsigned short int)\n  {\n    unsigned short int a = 54321u;\n    mpz_class b(a); ASSERT_ALWAYS(b == 54321u);\n  }\n\n  // mpz_class(signed long int)\n  {\n    signed long int a = -1234567890L;\n    mpz_class b(a); ASSERT_ALWAYS(b == -1234567890L);\n  }\n\n  // mpz_class(unsigned long int)\n  {\n    unsigned long int a = 1UL << 30;\n    mpz_class b(a); ASSERT_ALWAYS(b == 1073741824L);\n  }\n\n  // mpz_class(float)\n  {\n    float a = 123.45;\n    mpz_class b(a); ASSERT_ALWAYS(b == 123);\n  }\n\n  // mpz_class(double)\n  {\n    double a = 3.141592653589793238;\n    mpz_class b(a); ASSERT_ALWAYS(b == 3);\n  }\n\n  // mpz_class(long double)\n  // currently not implemented\n\n  // mpz_class(const char *)\n  {\n    const char *a = \"1234567890\";\n    mpz_class b(a); ASSERT_ALWAYS(b == 1234567890L);\n  }\n\n  // mpz_class(const char *, int)\n  {\n    const char *a = \"FFFF\";\n    int base = 16;\n    mpz_class b(a, base); ASSERT_ALWAYS(b == 65535u);\n  }\n\n  // mpz_class(const std::string &)\n  {\n    string a(\"1234567890\");\n    mpz_class b(a); ASSERT_ALWAYS(b == 1234567890L);\n  }\n\n  // mpz_class(const std::string &, int)\n  {\n    string a(\"7777\");\n    int base = 8;\n    mpz_class b(a, base); ASSERT_ALWAYS(b == 4095);\n  }\n\n  // mpz_class(const char *) with invalid\n  {\n    try {\n      const char *a = \"ABC\";\n      mpz_class b(a);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // mpz_class(const char *, int) with invalid\n  {\n    try {\n      const char *a = \"GHI\";\n      int base = 16;\n      mpz_class b(a, base);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // mpz_class(const std::string &) with invalid\n  {\n    try {\n      string a(\"abc\");\n      mpz_class b(a);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // mpz_class(const std::string &, int) with invalid\n  {\n    try {\n      string a(\"ZZZ\");\n      int base = 8;\n      mpz_class b(a, base);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // mpz_class(mpz_srcptr)\n  {\n    mpz_t a;\n    mpz_init_set_ui(a, 100);\n    mpz_class b(a); ASSERT_ALWAYS(b == 100);\n    mpz_clear(a);\n  }\n\n  // mpz_class(const mpz_class &)\n  {\n    mpz_class a(12345); // tested above, assume it works\n    mpz_class b(a); ASSERT_ALWAYS(b == 12345);\n  }\n\n  // no constructor for bool, but it gets casted to int\n  {\n    bool a = true;\n    mpz_class b(a); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    bool a = false;\n    mpz_class b(a); ASSERT_ALWAYS(b == 0);\n  }\n}\n\nvoid\ncheck_mpq (void)\n{\n  // mpq_class()\n  {\n    mpq_class a; ASSERT_ALWAYS(a == 0);\n  }\n\n  // mpq_class(const mpq_class &)\n  // see below\n\n  // template <class T, class U> mpq_class(const __gmp_expr<T, U> &)\n  // not tested here, see t-unary.cc, t-binary.cc\n\n  // mpq_class(signed char)\n  {\n    signed char a = -127;\n    mpq_class b(a); ASSERT_ALWAYS(b == -127);\n  }\n\n  // mpq_class(unsigned char)\n  {\n    unsigned char a = 255;\n    mpq_class b(a); ASSERT_ALWAYS(b == 255);\n  }\n\n  // either signed or unsigned char, machine dependent\n  {\n    mpq_class a('A'); ASSERT_ALWAYS(a == 65);\n  }\n  {\n    mpq_class a('z'); ASSERT_ALWAYS(a == 122);\n  }\n\n  // mpq_class(signed int)\n  {\n    signed int a = 0;\n    mpq_class b(a); ASSERT_ALWAYS(b == 0);\n  }\n  {\n    signed int a = -123;\n    mpq_class b(a); ASSERT_ALWAYS(b == -123);\n  }\n  {\n    signed int a = 4567;\n    mpq_class b(a); ASSERT_ALWAYS(b == 4567);\n  }\n\n  // mpq_class(unsigned int)\n  {\n    unsigned int a = 890;\n    mpq_class b(a); ASSERT_ALWAYS(b == 890);\n  }\n\n  // mpq_class(signed short int)\n  {\n    signed short int a = -12345;\n    mpq_class b(a); ASSERT_ALWAYS(b == -12345);\n  }\n\n  // mpq_class(unsigned short int)\n  {\n    unsigned short int a = 54321u;\n    mpq_class b(a); ASSERT_ALWAYS(b == 54321u);\n  }\n\n  // mpq_class(signed long int)\n  {\n    signed long int a = -1234567890L;\n    mpq_class b(a); ASSERT_ALWAYS(b == -1234567890L);\n  }\n\n  // mpq_class(unsigned long int)\n  {\n    unsigned long int a = 1UL << 30;\n    mpq_class b(a); ASSERT_ALWAYS(b == 1073741824L);\n  }\n\n  // mpq_class(float)\n  {\n    float a = 0.625;\n    mpq_class b(a); ASSERT_ALWAYS(b == 0.625);\n  }\n\n  // mpq_class(double)\n  {\n    double a = 1.25;\n    mpq_class b(a); ASSERT_ALWAYS(b == 1.25);\n  }\n\n  // mpq_class(long double)\n  // currently not implemented\n\n  // mpq_class(const char *)\n  {\n    const char *a = \"1234567890\";\n    mpq_class b(a); ASSERT_ALWAYS(b == 1234567890L);\n  }\n\n  // mpq_class(const char *, int)\n  {\n    const char *a = \"FFFF\";\n    int base = 16;\n    mpq_class b(a, base); ASSERT_ALWAYS(b == 65535u);\n  }\n\n  // mpq_class(const std::string &)\n  {\n    string a(\"1234567890\");\n    mpq_class b(a); ASSERT_ALWAYS(b == 1234567890L);\n  }\n\n  // mpq_class(const std::string &, int)\n  {\n    string a(\"7777\");\n    int base = 8;\n    mpq_class b(a, base); ASSERT_ALWAYS(b == 4095);\n  }\n\n  // mpq_class(const char *) with invalid\n  {\n    try {\n      const char *a = \"abc\";\n      mpq_class b(a);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // mpq_class(const char *, int) with invalid\n  {\n    try {\n      const char *a = \"ZZZ\";\n      int base = 16;\n      mpq_class b (a, base);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // mpq_class(const std::string &) with invalid\n  {\n    try {\n      string a(\"abc\");\n      mpq_class b(a);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // mpq_class(const std::string &, int) with invalid\n  {\n    try {\n      string a(\"ZZZ\");\n      int base = 8;\n      mpq_class b (a, base);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // mpq_class(mpq_srcptr)\n  {\n    mpq_t a;\n    mpq_init(a);\n    mpq_set_ui(a, 100, 1);\n    mpq_class b(a); ASSERT_ALWAYS(b == 100);\n    mpq_clear(a);\n  }\n\n  // mpq_class(const mpz_class &, const mpz_class &)\n  {\n    mpz_class a(123), b(4); // tested above, assume it works\n    mpq_class c(a, b); ASSERT_ALWAYS(c == 30.75);\n  }\n  {\n    mpz_class a(-1), b(2);  // tested above, assume it works\n    mpq_class c(a, b); ASSERT_ALWAYS(c == -0.5);\n  }\n  {\n    mpz_class a(5), b(4); // tested above, assume it works\n    mpq_class c(a, b); ASSERT_ALWAYS(c == 1.25);\n  }\n\n  // mpq_class(const mpz_class &)\n  {\n    mpq_class a(12345); // tested above, assume it works\n    mpq_class b(a); ASSERT_ALWAYS(b == 12345);\n  }\n\n  // no constructor for bool, but it gets casted to int\n  {\n    bool a = true;\n    mpq_class b(a); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    bool a = false;\n    mpq_class b(a); ASSERT_ALWAYS(b == 0);\n  }\n}\n\nvoid\ncheck_mpf (void)\n{\n  // mpf_class()\n  {\n    mpf_class a; ASSERT_ALWAYS(a == 0);\n  }\n\n  // mpf_class(const mpf_class &)\n  // mpf_class(const mpf_class &, unsigned long int)\n  // see below\n\n  // template <class T, class U> mpf_class(const __gmp_expr<T, U> &)\n  // template <class T, class U> mpf_class(const __gmp_expr<T, U> &,\n  //                                       unsigned long int)\n  // not tested here, see t-unary.cc, t-binary.cc\n\n  // mpf_class(signed char)\n  {\n    signed char a = -127;\n    mpf_class b(a); ASSERT_ALWAYS(b == -127);\n  }\n\n  // mpf_class(signed char, unsigned long int)\n  {\n    signed char a = -1;\n    int prec = 64;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == -1);\n  }\n\n  // mpf_class(unsigned char)\n  {\n    unsigned char a = 255;\n    mpf_class b(a); ASSERT_ALWAYS(b == 255);\n  }\n\n  // mpf_class(unsigned char, unsigned long int)\n  {\n    unsigned char a = 128;\n    int prec = 128;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == 128);\n  }\n\n  // either signed or unsigned char, machine dependent\n  {\n    mpf_class a('A'); ASSERT_ALWAYS(a == 65);\n  }\n  {\n    int prec = 256;\n    mpf_class a('z', prec); ASSERT_ALWAYS(a == 122);\n  }\n\n  // mpf_class(signed int)\n  {\n    signed int a = 0;\n    mpf_class b(a); ASSERT_ALWAYS(b == 0);\n  }\n  {\n    signed int a = -123;\n    mpf_class b(a); ASSERT_ALWAYS(b == -123);\n  }\n  {\n    signed int a = 4567;\n    mpf_class b(a); ASSERT_ALWAYS(b == 4567);\n  }\n\n  // mpf_class(signed int, unsigned long int)\n  {\n    signed int a = -123;\n    int prec = 64;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == -123);\n  }\n\n  // mpf_class(unsigned int)\n  {\n    unsigned int a = 890;\n    mpf_class b(a); ASSERT_ALWAYS(b == 890);\n  }\n\n  // mpf_class(unsigned int, unsigned long int)\n  {\n    unsigned int a = 890;\n    int prec = 128;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == 890);\n  }\n\n  // mpf_class(signed short int)\n  {\n    signed short int a = -12345;\n    mpf_class b(a); ASSERT_ALWAYS(b == -12345);\n  }\n\n  // mpf_class(signed short int, unsigned long int)\n  {\n    signed short int a = 6789;\n    int prec = 256;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == 6789);\n  }\n\n  // mpf_class(unsigned short int)\n  {\n    unsigned short int a = 54321u;\n    mpf_class b(a); ASSERT_ALWAYS(b == 54321u);\n  }\n\n  // mpf_class(unsigned short int, unsigned long int)\n  {\n    unsigned short int a = 54321u;\n    int prec = 64;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == 54321u);\n  }\n\n  // mpf_class(signed long int)\n  {\n    signed long int a = -1234567890L;\n    mpf_class b(a); ASSERT_ALWAYS(b == -1234567890L);\n  }\n\n  // mpf_class(signed long int, unsigned long int)\n  {\n    signed long int a = -1234567890L;\n    int prec = 128;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == -1234567890L);\n  }\n\n  // mpf_class(unsigned long int)\n  {\n    unsigned long int a = 3456789012UL;\n    mpf_class b(a); ASSERT_ALWAYS(b == 3456789012UL);\n  }\n\n  // mpf_class(unsigned long int, unsigned long int)\n  {\n    unsigned long int a = 3456789012UL;\n    int prec = 256;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == 3456789012UL);\n  }\n\n  // mpf_class(float)\n  {\n    float a = 1234.5;\n    mpf_class b(a); ASSERT_ALWAYS(b == 1234.5);\n  }\n\n  // mpf_class(float, unsigned long int)\n  {\n    float a = 1234.5;\n    int prec = 64;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234.5);\n  }\n\n  // mpf_class(double)\n  {\n    double a = 12345.0;\n    mpf_class b(a); ASSERT_ALWAYS(b == 12345);\n  }\n  {\n    double a = 1.2345e+4;\n    mpf_class b(a); ASSERT_ALWAYS(b == 12345);\n  }\n  {\n    double a = 312.5e-2;\n    mpf_class b(a); ASSERT_ALWAYS(b == 3.125);\n  }\n\n  // mpf_class(double, unsigned long int)\n  {\n    double a = 5.4321e+4;\n    int prec = 128;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == 54321L);\n  }\n\n  // mpf_class(long double)\n  // mpf_class(long double, unsigned long int)\n  // currently not implemented\n\n  // mpf_class(const char *)\n  {\n    const char *a = \"1234567890\";\n    mpf_class b(a); ASSERT_ALWAYS(b == 1234567890L);\n  }\n\n  // mpf_class(const char *, unsigned long int, int = 0)\n  {\n    const char *a = \"1234567890\";\n    int prec = 256;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234567890L);\n  }\n  {\n    const char *a = \"777777\";\n    int prec = 64, base = 8;\n    mpf_class b(a, prec, base); ASSERT_ALWAYS(b == 262143L);\n  }\n\n  // mpf_class(const std::string &)\n  {\n    string a(\"1234567890\");\n    mpf_class b(a); ASSERT_ALWAYS(b == 1234567890L);\n  }\n\n  // mpf_class(const std::string &, unsigned long int, int = 0)\n  {\n    string a(\"1234567890\");\n    int prec = 128;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234567890L);\n  }\n  {\n    string a(\"FFFF\");\n    int prec = 256, base = 16;\n    mpf_class b(a, prec, base); ASSERT_ALWAYS(b == 65535u);\n  }\n\n  // mpf_class(const char *) with invalid\n  {\n    try {\n      const char *a = \"abc\";\n      mpf_class b(a);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // mpf_class(const char *, unsigned long int, int = 0) with invalid\n  {\n    try {\n      const char *a = \"def\";\n      int prec = 256;\n      mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234567890L);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n  {\n    try {\n      const char *a = \"ghi\";\n      int prec = 64, base = 8;\n      mpf_class b(a, prec, base); ASSERT_ALWAYS(b == 262143L);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // mpf_class(const std::string &) with invalid\n  {\n    try {\n      string a(\"abc\");\n      mpf_class b(a); ASSERT_ALWAYS(b == 1234567890L);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // mpf_class(const std::string &, unsigned long int, int = 0) with invalid\n  {\n    try {\n      string a(\"def\");\n      int prec = 128;\n      mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234567890L);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n  {\n    try {\n      string a(\"ghi\");\n      int prec = 256, base = 16;\n      mpf_class b(a, prec, base); ASSERT_ALWAYS(b == 65535u);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (invalid_argument) {\n    }\n  }\n\n  // mpf_class(mpf_srcptr)\n  {\n    mpf_t a;\n    mpf_init_set_ui(a, 100);\n    mpf_class b(a); ASSERT_ALWAYS(b == 100);\n    mpf_clear(a);\n  }\n\n  // mpf_class(mpf_srcptr, unsigned long int)\n  {\n    mpf_t a;\n    int prec = 64;\n    mpf_init_set_ui(a, 100);\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == 100);\n    mpf_clear(a);\n  }\n\n  // mpf_class(const mpf_class &)\n  {\n    mpf_class a(12345); // tested above, assume it works\n    mpf_class b(a); ASSERT_ALWAYS(b == 12345);\n  }\n\n  // mpf_class(const mpf_class &, unsigned long int)\n  {\n    mpf_class a(12345); // tested above, assume it works\n    int prec = 64;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == 12345);\n  }\n\n  // no constructors for bool, but it gets casted to int\n  {\n    bool a = true;\n    mpf_class b(a); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    bool a = false;\n    mpf_class b(a); ASSERT_ALWAYS(b == 0);\n  }\n  {\n    bool a = true;\n    int prec = 128;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    bool a = false;\n    int prec = 256;\n    mpf_class b(a, prec); ASSERT_ALWAYS(b == 0);\n  }\n}\n\n\n\nint\nmain (void)\n{\n  tests_start();\n\n  check_mpz();\n  check_mpq();\n  check_mpf();\n\n  tests_end();\n  return 0;\n}\n"
  },
  {
    "path": "tests/cxx/t-headers.cc",
    "content": "/* Test that mpirxx.h compiles correctly.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpirxx.h\"\n\nint\nmain (void)\n{\n  return 0;\n}\n"
  },
  {
    "path": "tests/cxx/t-istream.cc",
    "content": "/* Test istream formatted input.\n\nCopyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <iostream>\n#include <cstdlib>\n#include <cstring>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nusing namespace std;\n\n\n// Under option_check_standard, the various test cases for mpz operator>>\n// are put through the standard operator>> for long, and likewise mpf\n// operator>> is put through double.\n//\n// In g++ 3.3 this results in some printouts about the final position\n// indicated for something like \".e123\".  Our mpf code stops at the \"e\"\n// since there's no mantissa digits, but g++ reads the whole thing and only\n// then decides it's bad.\n\nint   option_check_standard = 0;\n\n\n// On some versions of g++ 2.96 it's been observed that putback() may leave\n// tellg() unchanged.  We believe this is incorrect and presumably the\n// result of a bug, since for instance it's ok in g++ 2.95 and g++ 3.3.  We\n// detect the problem at runtime and disable affected checks.\n\nint putback_tellg_works = 1;\n\nvoid\ncheck_putback_tellg (void)\n{\n  istringstream input (\"hello\");\n  streampos  old_pos, new_pos;\n  char  c;\n\n  input.get(c);\n  old_pos = input.tellg();\n  input.putback(c);\n  new_pos = input.tellg();\n\n  if (old_pos == new_pos)\n    {\n      cout << \"Warning, istringstream has a bug: putback() doesn't update tellg().\\n\";;\n      cout << \"Tests on tellg() will be skipped.\\n\";\n      putback_tellg_works = 0;\n    }\n}\n\n\n#define WRONG(str)                                              \\\n  do {                                                          \\\n    cout << str \", data[\" << i << \"]\\n\";                        \\\n    cout << \"  input: \\\"\" << data[i].input << \"\\\"\\n\";           \\\n    cout << \"  flags: \" << hex << input.flags() << dec << \"\\n\"; \\\n  } while (0)\n\nvoid\ncheck_mpz (void)\n{\n  static const struct {\n    const char     *input;\n    int            want_pos;\n    const char     *want;\n    ios::fmtflags  flags;\n\n  } data[] = {\n\n    { \"0\",      -1, \"0\",    (ios::fmtflags) 0 },\n    { \"123\",    -1, \"123\",  (ios::fmtflags) 0 },\n    { \"0123\",   -1, \"83\",   (ios::fmtflags) 0 },\n    { \"0x123\",  -1, \"291\",  (ios::fmtflags) 0 },\n    { \"-123\",   -1, \"-123\", (ios::fmtflags) 0 },\n    { \"-0123\",  -1, \"-83\",  (ios::fmtflags) 0 },\n    { \"-0x123\", -1, \"-291\", (ios::fmtflags) 0 },\n    { \"+123\",   -1, \"123\", (ios::fmtflags) 0 },\n    { \"+0123\",  -1, \"83\",  (ios::fmtflags) 0 },\n    { \"+0x123\", -1, \"291\", (ios::fmtflags) 0 },\n\n    { \"0\",     -1, \"0\",    ios::dec },\n    { \"1f\",     1, \"1\",    ios::dec },\n    { \"011f\",   3, \"11\",   ios::dec },\n    { \"123\",   -1, \"123\",  ios::dec },\n    { \"-1f\",    2, \"-1\",   ios::dec },\n    { \"-011f\",  4, \"-11\",  ios::dec },\n    { \"-123\",  -1, \"-123\", ios::dec },\n    { \"+1f\",    2, \"1\",    ios::dec },\n    { \"+011f\",  4, \"11\",   ios::dec },\n    { \"+123\",  -1, \"123\",  ios::dec },\n\n    { \"0\",    -1, \"0\",   ios::oct },\n    { \"123\",  -1, \"83\",  ios::oct },\n    { \"-123\", -1, \"-83\", ios::oct },\n    { \"+123\", -1, \"83\",  ios::oct },\n\n    { \"0\",    -1, \"0\",    ios::hex },\n    { \"123\",  -1, \"291\",  ios::hex },\n    { \"ff\",   -1, \"255\",  ios::hex },\n    { \"FF\",   -1, \"255\",  ios::hex },\n    { \"-123\", -1, \"-291\", ios::hex },\n    { \"-ff\",  -1, \"-255\", ios::hex },\n    { \"-FF\",  -1, \"-255\", ios::hex },\n    { \"+123\", -1, \"291\",  ios::hex },\n    { \"+ff\",  -1, \"255\",  ios::hex },\n    { \"+FF\",  -1, \"255\",  ios::hex },\n    { \"ab\",   -1, \"171\",  ios::hex },\n    { \"cd\",   -1, \"205\",  ios::hex },\n    { \"ef\",   -1, \"239\",  ios::hex },\n\n    { \" 123\",  0, NULL,  (ios::fmtflags) 0 },   // not without skipws\n    { \" 123\", -1, \"123\", ios::skipws },\n  };\n\n  mpz_t      got, want;\n  int        got_ok, want_ok;\n  long       got_si, want_si;\n  streampos  init_tellg, got_pos, want_pos;\n\n  mpz_init (got);\n  mpz_init (want);\n\n  for (size_t i = 0; i < numberof (data); i++)\n    {\n      want_pos = (data[i].want_pos == -1\n                  ? strlen (data[i].input) : data[i].want_pos);\n\n      want_ok = (data[i].want != NULL);\n\n      if (data[i].want != NULL)\n        mpz_set_str_or_abort (want, data[i].want, 0);\n      else\n        mpz_set_ui (want, 0L);\n\n      if (option_check_standard && mpz_fits_slong_p (want))\n        {\n          istringstream  input (data[i].input);\n          input.flags (data[i].flags);\n          init_tellg = input.tellg();\n          want_si = mpz_get_si (want);\n\n          input >> got_si;\n          got_ok = (input ? 1 : 0);\n          input.clear();\n          got_pos = input.tellg() - init_tellg;\n\n          if (got_ok != want_ok)\n            {\n              WRONG (\"stdc++ operator>> wrong status, check_mpz\");\n              cout << \"  want_ok: \" << want_ok << \"\\n\";\n              cout << \"  got_ok:  \" << got_ok << \"\\n\";\n            }\n          if (want_ok && got_si != want_si)\n            {\n              WRONG (\"stdc++ operator>> wrong result, check_mpz\");\n              cout << \"  got_si:  \" << got_si << \"\\n\";\n              cout << \"  want_si: \" << want_si << \"\\n\";\n            }\n          if (putback_tellg_works && got_pos != want_pos)\n            {\n              WRONG (\"stdc++ operator>> wrong position, check_mpz\");\n              cout << \"  want_pos: \" << want_pos << \"\\n\";\n              cout << \"  got_pos:  \" << got_pos << \"\\n\";\n            }\n        }\n\n      {\n        istringstream  input (data[i].input);\n        input.flags (data[i].flags);\n        init_tellg = input.tellg();\n\n        mpz_set_ui (got, 0xDEAD);\n        input >> got;\n        got_ok = (input ? 1 : 0);\n        input.clear();\n        got_pos = input.tellg() - init_tellg;\n\n        if (got_ok != want_ok)\n          {\n            WRONG (\"mpz operator>> wrong status\");\n            cout << \"  want_ok: \" << want_ok << \"\\n\";\n            cout << \"  got_ok:  \" << got_ok << \"\\n\";\n            abort ();\n          }\n        if (want_ok && mpz_cmp (got, want) != 0)\n          {\n            WRONG (\"mpz operator>> wrong result\");\n            mpz_trace (\"  got \", got);\n            mpz_trace (\"  want\", want);\n            abort ();\n          }\n        if (putback_tellg_works && got_pos != want_pos)\n          {\n            WRONG (\"mpz operator>> wrong position\");\n            cout << \"  want_pos: \" << want_pos << \"\\n\";\n            cout << \"  got_pos:  \" << got_pos << \"\\n\";\n            abort ();\n          }\n      }\n    }\n\n  mpz_clear (got);\n  mpz_clear (want);\n}\n\nvoid\ncheck_mpq (void)\n{\n  static const struct {\n    const char     *input;\n    int            want_pos;\n    const char     *want;\n    ios::fmtflags  flags;\n\n  } data[] = {\n\n    { \"0\",   -1, \"0\", (ios::fmtflags) 0 },\n    { \"00\",  -1, \"0\", (ios::fmtflags) 0 },\n    { \"0x0\", -1, \"0\", (ios::fmtflags) 0 },\n\n    { \"123/456\",   -1, \"123/456\", ios::dec },\n    { \"0123/456\",  -1, \"123/456\", ios::dec },\n    { \"123/0456\",  -1, \"123/456\", ios::dec },\n    { \"0123/0456\", -1, \"123/456\", ios::dec },\n\n    { \"123/456\",   -1, \"83/302\", ios::oct },\n    { \"0123/456\",  -1, \"83/302\", ios::oct },\n    { \"123/0456\",  -1, \"83/302\", ios::oct },\n    { \"0123/0456\", -1, \"83/302\", ios::oct },\n\n    { \"ab\",   -1, \"171\",  ios::hex },\n    { \"cd\",   -1, \"205\",  ios::hex },\n    { \"ef\",   -1, \"239\",  ios::hex },\n\n    { \"0/0\",     -1, \"0/0\", (ios::fmtflags) 0 },\n    { \"5/8\",     -1, \"5/8\", (ios::fmtflags) 0 },\n    { \"0x5/0x8\", -1, \"5/8\", (ios::fmtflags) 0 },\n\n    { \"123/456\",   -1, \"123/456\",  (ios::fmtflags) 0 },\n    { \"123/0456\",  -1, \"123/302\",  (ios::fmtflags) 0 },\n    { \"123/0x456\", -1, \"123/1110\", (ios::fmtflags) 0 },\n    { \"123/0X456\", -1, \"123/1110\", (ios::fmtflags) 0 },\n\n    { \"0123/123\",   -1, \"83/123\", (ios::fmtflags) 0 },\n    { \"0123/0123\",  -1, \"83/83\",  (ios::fmtflags) 0 },\n    { \"0123/0x123\", -1, \"83/291\", (ios::fmtflags) 0 },\n    { \"0123/0X123\", -1, \"83/291\", (ios::fmtflags) 0 },\n\n    { \"0x123/123\",   -1, \"291/123\", (ios::fmtflags) 0 },\n    { \"0X123/0123\",  -1, \"291/83\",  (ios::fmtflags) 0 },\n    { \"0x123/0x123\", -1, \"291/291\", (ios::fmtflags) 0 },\n\n    { \" 123\",  0, NULL,  (ios::fmtflags) 0 },   // not without skipws\n    { \" 123\", -1, \"123\", ios::skipws },\n  };\n\n  mpq_t      got, want;\n  int        got_ok, want_ok;\n  long       got_si, want_si;\n  streampos  init_tellg, got_pos, want_pos;\n\n  mpq_init (got);\n  mpq_init (want);\n\n  for (size_t i = 0; i < numberof (data); i++)\n    {\n      want_pos = (data[i].want_pos == -1\n                  ? strlen (data[i].input) : data[i].want_pos);\n\n      want_ok = (data[i].want != NULL);\n\n      if (data[i].want != NULL)\n        mpq_set_str_or_abort (want, data[i].want, 0);\n      else\n        mpq_set_ui (want, 0L, 1L);\n\n      if (option_check_standard\n          && mpz_fits_slong_p (mpq_numref(want))\n          && mpz_cmp_ui (mpq_denref(want), 1L) == 0)\n        {\n          istringstream  input (data[i].input);\n          input.flags (data[i].flags);\n          init_tellg = input.tellg();\n          want_si = mpz_get_si (mpq_numref(want));\n\n          input >> got_si;\n          got_ok = (input ? 1 : 0);\n          input.clear();\n          got_pos = input.tellg() - init_tellg;\n\n          if (got_ok != want_ok)\n            {\n              WRONG (\"stdc++ operator>> wrong status, check_mpq\");\n              cout << \"  want_ok: \" << want_ok << \"\\n\";\n              cout << \"  got_ok:  \" << got_ok << \"\\n\";\n            }\n          if (want_ok && want_si != got_si)\n            {\n              WRONG (\"stdc++ operator>> wrong result, check_mpq\");\n              cout << \"  got_si:  \" << got_si << \"\\n\";\n              cout << \"  want_si: \" << want_si << \"\\n\";\n            }\n          if (putback_tellg_works && got_pos != want_pos)\n            {\n              WRONG (\"stdc++ operator>> wrong position, check_mpq\");\n              cout << \"  want_pos: \" << want_pos << \"\\n\";\n              cout << \"  got_pos:  \" << got_pos << \"\\n\";\n            }\n        }\n\n      {\n        istringstream  input (data[i].input);\n        input.flags (data[i].flags);\n        init_tellg = input.tellg();\n        mpq_set_si (got, 0xDEAD, 0xBEEF);\n\n        input >> got;\n        got_ok = (input ? 1 : 0);\n        input.clear();\n        got_pos = input.tellg() - init_tellg;\n\n        if (got_ok != want_ok)\n          {\n            WRONG (\"mpq operator>> wrong status\");\n            cout << \"  want_ok: \" << want_ok << \"\\n\";\n            cout << \"  got_ok:  \" << got_ok << \"\\n\";\n            abort ();\n          }\n        // don't use mpq_equal, since we allow non-normalized values to be\n        // read, which can trigger ASSERTs in mpq_equal\n        if (want_ok && (mpz_cmp (mpq_numref (got), mpq_numref(want)) != 0\n                        || mpz_cmp (mpq_denref (got), mpq_denref(want)) != 0))\n          {\n            WRONG (\"mpq operator>> wrong result\");\n            mpq_trace (\"  got \", got);\n            mpq_trace (\"  want\", want);\n            abort ();\n          }\n        if (putback_tellg_works && got_pos != want_pos)\n          {\n            WRONG (\"mpq operator>> wrong position\");\n            cout << \"  want_pos: \" << want_pos << \"\\n\";\n            cout << \"  got_pos:  \" << got_pos << \"\\n\";\n            abort ();\n          }\n      }\n    }\n\n  mpq_clear (got);\n  mpq_clear (want);\n}\n\n\nvoid\ncheck_mpf (void)\n{\n  static const struct {\n    const char     *input;\n    int            want_pos;\n    const char     *want;\n    ios::fmtflags  flags;\n\n  } data[] = {\n\n    { \"0\",      -1, \"0\", (ios::fmtflags) 0 },\n    { \"+0\",     -1, \"0\", (ios::fmtflags) 0 },\n    { \"-0\",     -1, \"0\", (ios::fmtflags) 0 },\n    { \"0.0\",    -1, \"0\", (ios::fmtflags) 0 },\n    { \"0.\",     -1, \"0\", (ios::fmtflags) 0 },\n    { \".0\",     -1, \"0\", (ios::fmtflags) 0 },\n    { \"+.0\",    -1, \"0\", (ios::fmtflags) 0 },\n    { \"-.0\",    -1, \"0\", (ios::fmtflags) 0 },\n    { \"+0.00\",  -1, \"0\", (ios::fmtflags) 0 },\n    { \"-0.000\", -1, \"0\", (ios::fmtflags) 0 },\n    { \"+0.00\",  -1, \"0\", (ios::fmtflags) 0 },\n    { \"-0.000\", -1, \"0\", (ios::fmtflags) 0 },\n    { \"0.0e0\",  -1, \"0\", (ios::fmtflags) 0 },\n    { \"0.e0\",   -1, \"0\", (ios::fmtflags) 0 },\n    { \".0e0\",   -1, \"0\", (ios::fmtflags) 0 },\n    { \"0.0e-0\", -1, \"0\", (ios::fmtflags) 0 },\n    { \"0.e-0\",  -1, \"0\", (ios::fmtflags) 0 },\n    { \".0e-0\",  -1, \"0\", (ios::fmtflags) 0 },\n    { \"0.0e+0\", -1, \"0\", (ios::fmtflags) 0 },\n    { \"0.e+0\",  -1, \"0\", (ios::fmtflags) 0 },\n    { \".0e+0\",  -1, \"0\", (ios::fmtflags) 0 },\n\n    { \"1\",  -1,  \"1\", (ios::fmtflags) 0 },\n    { \"+1\", -1,  \"1\", (ios::fmtflags) 0 },\n    { \"-1\", -1, \"-1\", (ios::fmtflags) 0 },\n\n    { \" 0\",  0,  NULL, (ios::fmtflags) 0 },  // not without skipws\n    { \" 0\",  -1, \"0\", ios::skipws },\n    { \" +0\", -1, \"0\", ios::skipws },\n    { \" -0\", -1, \"0\", ios::skipws },\n\n    { \"+-123\", 1, NULL, (ios::fmtflags) 0 },\n    { \"-+123\", 1, NULL, (ios::fmtflags) 0 },\n    { \"1e+-123\", 3, NULL, (ios::fmtflags) 0 },\n    { \"1e-+123\", 3, NULL, (ios::fmtflags) 0 },\n\n    { \"e123\",   0, NULL, (ios::fmtflags) 0 }, // at least one mantissa digit\n    { \".e123\",  1, NULL, (ios::fmtflags) 0 },\n    { \"+.e123\", 2, NULL, (ios::fmtflags) 0 },\n    { \"-.e123\", 2, NULL, (ios::fmtflags) 0 },\n\n    { \"123e\",   4, NULL, (ios::fmtflags) 0 }, // at least one exponent digit\n    { \"123e-\",  5, NULL, (ios::fmtflags) 0 },\n    { \"123e+\",  5, NULL, (ios::fmtflags) 0 },\n  };\n\n  mpf_t      got, want;\n  int        got_ok, want_ok;\n  double     got_d, want_d;\n  streampos  init_tellg, got_pos, want_pos;\n\n  mpf_init (got);\n  mpf_init (want);\n\n  for (size_t i = 0; i < numberof (data); i++)\n    {\n      want_pos = (data[i].want_pos == -1\n                  ? strlen (data[i].input) : data[i].want_pos);\n\n      want_ok = (data[i].want != NULL);\n\n      if (data[i].want != NULL)\n        mpf_set_str_or_abort (want, data[i].want, 0);\n      else\n        mpf_set_ui (want, 0L);\n\n      want_d = mpf_get_d (want);\n      if (option_check_standard && mpf_cmp_d (want, want_d) == 0)\n        {\n          istringstream  input (data[i].input);\n          input.flags (data[i].flags);\n          init_tellg = input.tellg();\n\n          input >> got_d;\n          got_ok = (input ? 1 : 0);\n          input.clear();\n          got_pos = input.tellg() - init_tellg;\n\n          if (got_ok != want_ok)\n            {\n              WRONG (\"stdc++ operator>> wrong status, check_mpf\");\n              cout << \"  want_ok: \" << want_ok << \"\\n\";\n              cout << \"  got_ok:  \" << got_ok << \"\\n\";\n            }\n          if (want_ok && want_d != got_d)\n            {\n              WRONG (\"stdc++ operator>> wrong result, check_mpf\");\n              cout << \"  got:   \" << got_d << \"\\n\";\n              cout << \"  want:  \" << want_d << \"\\n\";\n            }\n          if (putback_tellg_works && got_pos != want_pos)\n            {\n              WRONG (\"stdc++ operator>> wrong position, check_mpf\");\n              cout << \"  want_pos: \" << want_pos << \"\\n\";\n              cout << \"  got_pos:  \" << got_pos << \"\\n\";\n            }\n        }\n\n      {\n        istringstream  input (data[i].input);\n        input.flags (data[i].flags);\n        init_tellg = input.tellg();\n\n        mpf_set_ui (got, 0xDEAD);\n        input >> got;\n        got_ok = (input ? 1 : 0);\n        input.clear();\n        got_pos = input.tellg() - init_tellg;\n\n        if (got_ok != want_ok)\n          {\n            WRONG (\"mpf operator>> wrong status\");\n            cout << \"  want_ok: \" << want_ok << \"\\n\";\n            cout << \"  got_ok:  \" << got_ok << \"\\n\";\n            abort ();\n          }\n        if (want_ok && mpf_cmp (got, want) != 0)\n          {\n            WRONG (\"mpf operator>> wrong result\");\n            mpf_trace (\"  got \", got);\n            mpf_trace (\"  want\", want);\n            abort ();\n          }\n        if (putback_tellg_works && got_pos != want_pos)\n          {\n            WRONG (\"mpf operator>> wrong position\");\n            cout << \"  want_pos: \" << want_pos << \"\\n\";\n            cout << \"  got_pos:  \" << got_pos << \"\\n\";\n            abort ();\n          }\n      }\n    }\n\n  mpf_clear (got);\n  mpf_clear (want);\n}\n\n\n\nint\nmain (int argc, char *argv[])\n{\n  if (argc > 1 && strcmp (argv[1], \"-s\") == 0)\n    option_check_standard = 1;\n\n  tests_start ();\n\n  check_putback_tellg ();\n  check_mpz ();\n  check_mpq ();\n  check_mpf ();\n\n  tests_end ();\n  return 0;\n}\n"
  },
  {
    "path": "tests/cxx/t-locale.cc",
    "content": "/* Test locale support in C++ functions.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#include <clocale>\n#include <iostream>\n#include <cstdlib>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nusing namespace std;\n\nextern \"C\"\n{\n    char point_string[2];\n}\n\n#if HAVE_STD__LOCALE\n// Like std::numpunct, but with decimal_point coming from point_string[].\nclass my_numpunct : public numpunct<char> {\n public:\n  explicit my_numpunct (size_t r = 0) : numpunct<char>(r) { }\n protected:\n  char do_decimal_point() const { return point_string[0]; }\n};\n#endif\n\nvoid\nset_point (char c)\n{\n  point_string[0] = c;\n\n#if HAVE_STD__LOCALE\n  locale loc (locale::classic(), new my_numpunct ());\n  locale::global (loc);\n#endif\n}\n\n\nvoid\ncheck_input (void)\n{\n  static const struct {\n    const char  *str1;\n    const char  *str2;\n    double      want;\n  } data[] = {\n\n    { \"1\",\"\",   1.0 },\n    { \"1\",\"0\",  1.0 },\n    { \"1\",\"00\", 1.0 },\n\n    { \"\",\"5\",    0.5 },\n    { \"0\",\"5\",   0.5 },\n    { \"00\",\"5\",  0.5 },\n    { \"00\",\"50\", 0.5 },\n\n    { \"1\",\"5\",    1.5 },\n    { \"1\",\"5e1\", 15.0 },\n  };\n\n  static char point[] = {\n    '.', ',', 'x', '\\xFF'\n  };\n\n  mpf_t  got;\n  mpf_init (got);\n\n  for (size_t i = 0; i < numberof (point); i++)\n    {\n      set_point (point[i]);\n\n      for (int neg = 0; neg <= 1; neg++)\n        {\n          for (size_t j = 0; j < numberof (data); j++)\n            {\n              string str = string(data[j].str1)+point[i]+string(data[j].str2);\n              if (neg)\n                str = \"-\" + str;\n\n              istringstream is (str.c_str());\n\n              mpf_set_ui (got, 123);   // dummy initial value\n\n              if (! (is >> got))\n                {\n                  cout << \"istream mpf_t operator>> error\\n\";\n                  cout << \"  point \" << point[i] << \"\\n\";\n                  cout << \"  str   \\\"\" << str << \"\\\"\\n\";\n                  cout << \"  localeconv point \\\"\"\n                       << localeconv()->decimal_point << \"\\\"\\n\";\n                  abort ();\n                }\n\n              double want = data[j].want;\n              if (neg)\n                want = -want;\n              if (mpf_cmp_d (got, want) != 0)\n                {\n                  cout << \"istream mpf_t operator>> wrong\\n\";\n                  cout << \"  point \" << point[i] << \"\\n\";\n                  cout << \"  str   \\\"\" << str << \"\\\"\\n\";\n                  cout << \"  got   \" << got << \"\\n\";\n                  cout << \"  want  \" << want << \"\\n\";\n                  cout << \"  localeconv point \\\"\"\n                       << localeconv()->decimal_point << \"\\\"\\n\";\n                  abort ();\n                }\n            }\n        }\n    }\n\n  mpf_clear (got);\n}\n\nvoid\ncheck_output (void)\n{\n  static char point[] = {\n    '.', ',', 'x', '\\xFF'\n  };\n\n  for (size_t i = 0; i < numberof (point); i++)\n    {\n      set_point (point[i]);\n      ostringstream  got;\n\n      mpf_t  f;\n      mpf_init (f);\n      mpf_set_d (f, 1.5);\n      got << f;\n      mpf_clear (f);\n\n      string  want = string(\"1\") + point[i] + string(\"5\");\n\n      if (want.compare (got.str()) != 0)\n        {\n          cout << \"ostream mpf_t operator<< doesn't respect locale\\n\";\n          cout << \"  point \" << point[i] << \"\\n\";\n          cout << \"  got   \\\"\" << got.str() << \"\\\"\\n\";\n          cout << \"  want  \\\"\" << want      << \"\\\"\\n\";\n          abort ();\n        }\n    }\n}\n\nint\nreplacement_works (void)\n{\n  set_point ('x');\n  mpf_t  f;\n  mpf_init (f);\n  mpf_set_d (f, 1.5);\n  ostringstream s;\n  s << f;\n  mpf_clear (f);\n\n  return (s.str().compare(\"1x5\") == 0);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  if (replacement_works())\n    {\n      check_input ();\n      check_output ();\n    }\n  else\n    {\n      cout << \"Replacing decimal point didn't work, tests skipped\\n\";\n    }\n\n  tests_end ();\n  return 0;\n}\n"
  },
  {
    "path": "tests/cxx/t-misc.cc",
    "content": "/* Test mp*_class functions.\n\nCopyright 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n/* Note that we don't use <climits> for LONG_MIN, but instead our own\n   definitions in gmp-impl.h.  In g++ 2.95.4 (debian 3.0) under\n   -mcpu=ultrasparc, limits.h sees __sparc_v9__ defined and assumes that\n   means long is 64-bit long, but it's only 32-bits, causing fatal compile\n   errors.  */\n\n#include <string>\n\n#include \"mpir.h\"\n#include \"mpirxx.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nusing namespace std;\n\n\nvoid\ncheck_mpz (void)\n{\n  // mpz_class::fits_sint_p\n  {\n    int        fits;\n    mpz_class  z;\n    z = INT_MIN; fits = z.fits_sint_p(); ASSERT_ALWAYS (fits);\n    z--;         fits = z.fits_sint_p(); ASSERT_ALWAYS (! fits);\n    z = INT_MAX; fits = z.fits_sint_p(); ASSERT_ALWAYS (fits);\n    z++;         fits = z.fits_sint_p(); ASSERT_ALWAYS (! fits);\n  }\n\n  // mpz_class::fits_uint_p\n  {\n    int        fits;\n    mpz_class  z;\n    z = 0;        fits = z.fits_uint_p(); ASSERT_ALWAYS (fits);\n    z--;          fits = z.fits_uint_p(); ASSERT_ALWAYS (! fits);\n    z = UINT_MAX; fits = z.fits_uint_p(); ASSERT_ALWAYS (fits);\n    z++;          fits = z.fits_uint_p(); ASSERT_ALWAYS (! fits);\n  }\n\n  // mpz_class::fits_slong_p\n  {\n    int        fits;\n    mpz_class  z;\n    z = LONG_MIN; fits = z.fits_slong_p(); ASSERT_ALWAYS (fits);\n    z--;          fits = z.fits_slong_p(); ASSERT_ALWAYS (! fits);\n    z = LONG_MAX; fits = z.fits_slong_p(); ASSERT_ALWAYS (fits);\n    z++;          fits = z.fits_slong_p(); ASSERT_ALWAYS (! fits);\n  }\n\n  // mpz_class::fits_ulong_p\n  {\n    int        fits;\n    mpz_class  z;\n    z = 0;         fits = z.fits_ulong_p(); ASSERT_ALWAYS (fits);\n    z--;           fits = z.fits_ulong_p(); ASSERT_ALWAYS (! fits);\n    z = ULONG_MAX; fits = z.fits_ulong_p(); ASSERT_ALWAYS (fits);\n    z++;           fits = z.fits_ulong_p(); ASSERT_ALWAYS (! fits);\n  }\n\n  // mpz_class::fits_sshort_p\n  {\n    int        fits;\n    mpz_class  z;\n    z = SHRT_MIN; fits = z.fits_sshort_p(); ASSERT_ALWAYS (fits);\n    z--;          fits = z.fits_sshort_p(); ASSERT_ALWAYS (! fits);\n    z = SHRT_MAX; fits = z.fits_sshort_p(); ASSERT_ALWAYS (fits);\n    z++;          fits = z.fits_sshort_p(); ASSERT_ALWAYS (! fits);\n  }\n\n  // mpz_class::fits_ushort_p\n  {\n    int        fits;\n    mpz_class  z;\n    z = 0;         fits = z.fits_ushort_p(); ASSERT_ALWAYS (fits);\n    z--;           fits = z.fits_ushort_p(); ASSERT_ALWAYS (! fits);\n    z = USHRT_MAX; fits = z.fits_ushort_p(); ASSERT_ALWAYS (fits);\n    z++;           fits = z.fits_ushort_p(); ASSERT_ALWAYS (! fits);\n  }\n\n  // mpz_class::get_mpz_t\n  {\n    mpz_class  z(0);\n    mpz_ptr    p = z.get_mpz_t();\n    ASSERT_ALWAYS (mpz_cmp_ui (p, 0) == 0);\n  }\n  {\n    mpz_class  z(0);\n    mpz_srcptr p = z.get_mpz_t();\n    ASSERT_ALWAYS (mpz_cmp_ui (p, 0) == 0);\n  }\n\n  // mpz_class::get_d\n  // mpz_class::get_si\n  // mpz_class::get_ui\n  {\n    mpz_class  z(123);\n    { double d = z.get_d();  ASSERT_ALWAYS (d == 123.0); }\n    { long   l = z.get_si(); ASSERT_ALWAYS (l == 123L); }\n    { long   u = z.get_ui(); ASSERT_ALWAYS (u == 123L); }\n  }\n  {\n    mpz_class  z(-123);\n    { double d = z.get_d();  ASSERT_ALWAYS (d == -123.0); }\n    { long   l = z.get_si(); ASSERT_ALWAYS (l == -123L); }\n  }\n\n  // mpz_class::get_str\n  {\n    mpz_class  z(123);\n    string     s;\n    s = z.get_str(); ASSERT_ALWAYS (s == \"123\");\n    s = z.get_str(16); ASSERT_ALWAYS (s == \"7b\");\n    s = z.get_str(-16); ASSERT_ALWAYS (s == \"7B\");\n  }\n\n  // mpz_class::set_str\n  {\n    mpz_class  z;\n    int        ret;\n    ret = z.set_str (\"123\", 10);  ASSERT_ALWAYS (ret == 0 && z == 123);\n    ret = z.set_str (\"7b\",  16);  ASSERT_ALWAYS (ret == 0 && z == 123);\n    ret = z.set_str (\"7B\",  16);  ASSERT_ALWAYS (ret == 0 && z == 123);\n    ret = z.set_str (\"0x7B\", 0);  ASSERT_ALWAYS (ret == 0 && z == 123);\n\n    ret = z.set_str (string(\"123\"), 10);  ASSERT_ALWAYS (ret == 0 && z == 123);\n    ret = z.set_str (string(\"7b\"),  16);  ASSERT_ALWAYS (ret == 0 && z == 123);\n    ret = z.set_str (string(\"7B\"),  16);  ASSERT_ALWAYS (ret == 0 && z == 123);\n    ret = z.set_str (string(\"0x7B\"), 0);  ASSERT_ALWAYS (ret == 0 && z == 123);\n  }\n}\n\nvoid\ncheck_mpq (void)\n{\n  // mpq_class::canonicalize\n  {\n    mpq_class  q(12,9);\n    q.canonicalize();\n    ASSERT_ALWAYS (q.get_num() == 4);\n    ASSERT_ALWAYS (q.get_den() == 3);\n  }\n\n  // mpq_class::get_d\n  {\n    mpq_class  q(123);\n    { double d = q.get_d();  ASSERT_ALWAYS (d == 123.0); }\n  }\n  {\n    mpq_class  q(-123);\n    { double d = q.get_d();  ASSERT_ALWAYS (d == -123.0); }\n  }\n\n  // mpq_class::get_mpq_t\n  {\n    mpq_class  q(0);\n    mpq_ptr    p = q.get_mpq_t();\n    ASSERT_ALWAYS (mpq_cmp_ui (p, 0, 1) == 0);\n  }\n  {\n    mpq_class  q(0);\n    mpq_srcptr p = q.get_mpq_t();\n    ASSERT_ALWAYS (mpq_cmp_ui (p, 0, 1) == 0);\n  }\n\n  // mpq_class::get_num, mpq_class::get_den\n  {\n    mpq_class  q(4,5);\n    mpz_class  z;\n    z = q.get_num(); ASSERT_ALWAYS (z == 4);\n    z = q.get_den(); ASSERT_ALWAYS (z == 5);\n  }\n\n  // mpq_class::get_num_mpz_t, mpq_class::get_den_mpz_t\n  {\n    mpq_class  q(4,5);\n    mpz_ptr    p;\n    p = q.get_num_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 4) == 0);\n    p = q.get_den_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 5) == 0);\n  }\n  {\n    mpq_class  q(4,5);\n    mpz_srcptr p;\n    p = q.get_num_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 4) == 0);\n    p = q.get_den_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 5) == 0);\n  }\n\n  // mpq_class::get_str\n  {\n    mpq_class  q(17,11);\n    string     s;\n    s = q.get_str();    ASSERT_ALWAYS (s == \"17/11\");\n    s = q.get_str(10);  ASSERT_ALWAYS (s == \"17/11\");\n    s = q.get_str(16);  ASSERT_ALWAYS (s == \"11/b\");\n    s = q.get_str(-16); ASSERT_ALWAYS (s == \"11/B\");\n  }\n\n  // mpq_class::set_str\n  {\n    mpq_class  q;\n    int        ret;\n    ret = q.set_str (\"123\", 10);     ASSERT_ALWAYS (ret == 0 && q == 123);\n    ret = q.set_str (\"4/5\", 10);     ASSERT_ALWAYS (ret == 0 && q == mpq_class(4,5));\n    ret = q.set_str (\"7b\",  16);     ASSERT_ALWAYS (ret == 0 && q == 123);\n    ret = q.set_str (\"7B\",  16);     ASSERT_ALWAYS (ret == 0 && q == 123);\n    ret = q.set_str (\"0x7B\", 0);     ASSERT_ALWAYS (ret == 0 && q == 123);\n    ret = q.set_str (\"0x10/17\", 0);  ASSERT_ALWAYS (ret == 0 && q == mpq_class(16,17));\n\n    ret = q.set_str (string(\"4/5\"), 10);  ASSERT_ALWAYS (ret == 0 && q == mpq_class(4,5));\n    ret = q.set_str (string(\"123\"), 10);  ASSERT_ALWAYS (ret == 0 && q == 123);\n    ret = q.set_str (string(\"7b\"),  16);  ASSERT_ALWAYS (ret == 0 && q == 123);\n    ret = q.set_str (string(\"7B\"),  16);  ASSERT_ALWAYS (ret == 0 && q == 123);\n    ret = q.set_str (string(\"0x7B\"), 0);  ASSERT_ALWAYS (ret == 0 && q == 123);\n    ret = q.set_str (string(\"0x10/17\"), 0);  ASSERT_ALWAYS (ret == 0 && q == mpq_class(16,17));\n  }\n}\n\nvoid\ncheck_mpf (void)\n{\n  // mpf_class::fits_sint_p\n  {\n    int        fits;\n    mpf_class  f (0, 2*8*sizeof(int));\n    f = INT_MIN; fits = f.fits_sint_p(); ASSERT_ALWAYS (fits);\n    f--;         fits = f.fits_sint_p(); ASSERT_ALWAYS (! fits);\n    f = INT_MAX; fits = f.fits_sint_p(); ASSERT_ALWAYS (fits);\n    f++;         fits = f.fits_sint_p(); ASSERT_ALWAYS (! fits);\n  }\n\n  // mpf_class::fits_uint_p\n  {\n    int        fits;\n    mpf_class  f (0, 2*8*sizeof(int));\n    f = 0;        fits = f.fits_uint_p(); ASSERT_ALWAYS (fits);\n    f--;          fits = f.fits_uint_p(); ASSERT_ALWAYS (! fits);\n    f = UINT_MAX; fits = f.fits_uint_p(); ASSERT_ALWAYS (fits);\n    f++;          fits = f.fits_uint_p(); ASSERT_ALWAYS (! fits);\n  }\n\n  // mpf_class::fits_slong_p\n  {\n    int        fits;\n    mpf_class  f (0, 2*8*sizeof(long));\n    f = LONG_MIN; fits = f.fits_slong_p(); ASSERT_ALWAYS (fits);\n    f--;          fits = f.fits_slong_p(); ASSERT_ALWAYS (! fits);\n    f = LONG_MAX; fits = f.fits_slong_p(); ASSERT_ALWAYS (fits);\n    f++;          fits = f.fits_slong_p(); ASSERT_ALWAYS (! fits);\n  }\n\n  // mpf_class::fits_ulong_p\n  {\n    int        fits;\n    mpf_class  f (0, 2*8*sizeof(long));\n    f = 0;         fits = f.fits_ulong_p(); ASSERT_ALWAYS (fits);\n    f--;           fits = f.fits_ulong_p(); ASSERT_ALWAYS (! fits);\n    f = ULONG_MAX; fits = f.fits_ulong_p(); ASSERT_ALWAYS (fits);\n    f++;           fits = f.fits_ulong_p(); ASSERT_ALWAYS (! fits);\n  }\n\n  // mpf_class::fits_sshort_p\n  {\n    int        fits;\n    mpf_class  f (0, 2*8*sizeof(short));\n    f = SHRT_MIN; fits = f.fits_sshort_p(); ASSERT_ALWAYS (fits);\n    f--;          fits = f.fits_sshort_p(); ASSERT_ALWAYS (! fits);\n    f = SHRT_MAX; fits = f.fits_sshort_p(); ASSERT_ALWAYS (fits);\n    f++;          fits = f.fits_sshort_p(); ASSERT_ALWAYS (! fits);\n  }\n\n  // mpf_class::fits_ushort_p\n  {\n    int        fits;\n    mpf_class  f (0, 2*8*sizeof(short));\n    f = 0;         fits = f.fits_ushort_p(); ASSERT_ALWAYS (fits);\n    f--;           fits = f.fits_ushort_p(); ASSERT_ALWAYS (! fits);\n    f = USHRT_MAX; fits = f.fits_ushort_p(); ASSERT_ALWAYS (fits);\n    f++;           fits = f.fits_ushort_p(); ASSERT_ALWAYS (! fits);\n  }\n\n  // mpf_class::get_d\n  // mpf_class::get_si\n  // mpf_class::get_ui\n  {\n    mpf_class  f(123);\n    { double d = f.get_d();  ASSERT_ALWAYS (d == 123.0); }\n    { long   l = f.get_si(); ASSERT_ALWAYS (l == 123L); }\n    { long   u = f.get_ui(); ASSERT_ALWAYS (u == 123L); }\n  }\n  {\n    mpf_class  f(-123);\n    { double d = f.get_d();  ASSERT_ALWAYS (d == -123.0); }\n    { long   l = f.get_si(); ASSERT_ALWAYS (l == -123L); }\n  }\n\n  // mpf_class::get_prec\n  {\n    mpf_class  f;\n    ASSERT_ALWAYS (f.get_prec() == mpf_get_default_prec());\n  }\n\n  // mpf_class::get_str\n  {\n    mpf_class  f(123);\n    string     s;\n    mp_exp_t   e;\n    s = f.get_str(e);        ASSERT_ALWAYS (s == \"123\" && e == 3);\n    s = f.get_str(e,  16);   ASSERT_ALWAYS (s == \"7b\"  && e == 2);\n    s = f.get_str(e, -16);   ASSERT_ALWAYS (s == \"7B\"  && e == 2);\n    s = f.get_str(e, 10, 2); ASSERT_ALWAYS (s == \"12\"  && e == 3);\n    s = f.get_str(e, 10, 1); ASSERT_ALWAYS (s == \"1\"   && e == 3);\n  }\n\n  // mpf_class::set_str\n  {\n    mpf_class  f;\n    int        ret;\n    ret = f.set_str (\"123\",     10);  ASSERT_ALWAYS (ret == 0 && f == 123);\n    ret = f.set_str (\"123e1\",   10);  ASSERT_ALWAYS (ret == 0 && f == 1230);\n    ret = f.set_str (\"1230e-1\", 10);  ASSERT_ALWAYS (ret == 0 && f == 123);\n    ret = f.set_str (\"7b\",      16);  ASSERT_ALWAYS (ret == 0 && f == 123);\n    ret = f.set_str (\"7B\",      16);  ASSERT_ALWAYS (ret == 0 && f == 123);\n    ret = f.set_str (\"7B@1\",    16);  ASSERT_ALWAYS (ret == 0 && f == 1968);\n    ret = f.set_str (\"7B0@-1\",  16);  ASSERT_ALWAYS (ret == 0 && f == 123);\n\n    ret = f.set_str (string(\"123\"),     10);  ASSERT_ALWAYS (ret == 0 && f == 123);\n    ret = f.set_str (string(\"123e1\"),   10);  ASSERT_ALWAYS (ret == 0 && f == 1230);\n    ret = f.set_str (string(\"1230e-1\"), 10);  ASSERT_ALWAYS (ret == 0 && f == 123);\n    ret = f.set_str (string(\"7b\"),      16);  ASSERT_ALWAYS (ret == 0 && f == 123);\n    ret = f.set_str (string(\"7B\"),      16);  ASSERT_ALWAYS (ret == 0 && f == 123);\n    ret = f.set_str (string(\"7B@1\"),    16);  ASSERT_ALWAYS (ret == 0 && f == 1968);\n    ret = f.set_str (string(\"7B0@-1\"),  16);  ASSERT_ALWAYS (ret == 0 && f == 123);\n  }\n\n  // mpf_class::set_prec\n  {\n    mpf_class  f;\n    f.set_prec (256);\n    ASSERT_ALWAYS (f.get_prec () >= 256);\n  }\n\n  // mpf_class::set_prec_raw\n  {\n    mpf_class  f (0, 100 * GMP_NUMB_BITS);\n    f.set_prec_raw (5 * GMP_NUMB_BITS);\n    ASSERT_ALWAYS (f.get_prec () >= 5 * GMP_NUMB_BITS);\n    ASSERT_ALWAYS (f.get_prec () < 100 * GMP_NUMB_BITS);\n    f.set_prec_raw (100 * GMP_NUMB_BITS);\n  }\n}\n\n\n\n\nint\nmain (void)\n{\n  tests_start();\n\n  check_mpz();\n  check_mpq();\n  check_mpf();\n\n  tests_end();\n  return 0;\n}\n"
  },
  {
    "path": "tests/cxx/t-ops.cc",
    "content": "/* Test mp*_class operators and functions.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <iostream>\n\n#include \"mpir.h\"\n#include \"mpirxx.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nusing namespace std;\n\n\nvoid\ncheck_mpz (void)\n{\n  // unary operators and functions\n\n  // operator+\n  {\n    mpz_class a(1);\n    mpz_class b;\n    b = +a; ASSERT_ALWAYS(b == 1);\n  }\n\n  // operator-\n  {\n    mpz_class a(2);\n    mpz_class b;\n    b = -a; ASSERT_ALWAYS(b == -2);\n  }\n\n  // operator~\n  {\n    mpz_class a(3);\n    mpz_class b;\n    b = ~a; ASSERT_ALWAYS(b == -4);\n  }\n\n  // abs\n  {\n    mpz_class a(-123);\n    mpz_class b;\n    b = abs(a); ASSERT_ALWAYS(b == 123);\n  }\n\n  // sqrt\n  {\n    mpz_class a(25);\n    mpz_class b;\n    b = sqrt(a); ASSERT_ALWAYS(b == 5);\n  }\n  {\n    mpz_class a(125);\n    mpz_class b;\n    b = sqrt(a); ASSERT_ALWAYS(b == 11); // round toward zero\n  }\n\n  // sgn\n  {\n    mpz_class a(123);\n    int b = sgn(a); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    mpz_class a(0);\n    int b = sgn(a); ASSERT_ALWAYS(b == 0);\n  }\n  {\n    mpz_class a(-123);\n    int b = sgn(a); ASSERT_ALWAYS(b == -1);\n  }\n\n\n  // binary operators and functions\n\n  // operator+\n  {\n    mpz_class a(1), b(2);\n    mpz_class c;\n    c = a + b; ASSERT_ALWAYS(c == 3);\n  }\n  {\n    mpz_class a(3);\n    signed int b = 4;\n    mpz_class c;\n    c = a + b; ASSERT_ALWAYS(c == 7);\n  }\n  {\n    mpz_class a(5);\n    double b = 6.0;\n    mpz_class c;\n    c = b + a; ASSERT_ALWAYS(c == 11);\n  }\n\n  // operator-\n  {\n    mpz_class a(3), b(6);\n    mpz_class c;\n    c = a - b; ASSERT_ALWAYS(c == -3);\n  }\n\n  // operator*\n  {\n    mpz_class a(-2), b(4);\n    mpz_class c;\n    c = a * b; ASSERT_ALWAYS(c == -8);\n  }\n  {\n    mpz_class a(2);\n    long b = -4;\n    mpz_class c;\n    c = a * b; ASSERT_ALWAYS(c == -8);\n    c = b * a; ASSERT_ALWAYS(c == -8);\n  }\n  {\n    mpz_class a(-2);\n    unsigned long b = 4;\n    mpz_class c;\n    c = a * b; ASSERT_ALWAYS(c == -8);\n    c = b * a; ASSERT_ALWAYS(c == -8);\n  }\n\n  // operator/ and operator%\n  {\n    mpz_class a(12), b(4);\n    mpz_class c;\n    c = a / b; ASSERT_ALWAYS(c == 3);\n    c = a % b; ASSERT_ALWAYS(c == 0);\n  }\n  {\n    mpz_class a(7), b(5);\n    mpz_class c;\n    c = a / b; ASSERT_ALWAYS(c == 1);\n    c = a % b; ASSERT_ALWAYS(c == 2);\n  }\n  {\n    mpz_class a(-10);\n    signed int ai = -10;\n    mpz_class b(3);\n    signed int bi = 3;\n    mpz_class c;\n    c = a / b;  ASSERT_ALWAYS(c == -3);\n    c = a % b;  ASSERT_ALWAYS(c == -1);\n    c = a / bi; ASSERT_ALWAYS(c == -3);\n    c = a % bi; ASSERT_ALWAYS(c == -1);\n    c = ai / b; ASSERT_ALWAYS(c == -3);\n    c = ai % b; ASSERT_ALWAYS(c == -1);\n  }\n  {\n    mpz_class a(-10);\n    signed int ai = -10;\n    mpz_class b(-3);\n    signed int bi = -3;\n    mpz_class c;\n    c = a / b;  ASSERT_ALWAYS(c == 3);\n    c = a % b;  ASSERT_ALWAYS(c == -1);\n    c = a / bi; ASSERT_ALWAYS(c == 3);\n    c = a % bi; ASSERT_ALWAYS(c == -1);\n    c = ai / b; ASSERT_ALWAYS(c == 3);\n    c = ai % b; ASSERT_ALWAYS(c == -1);\n  }\n  {\n    mpz_class a (static_cast<mpir_si>(GMP_SI_MIN));\n    mpir_si ai = GMP_SI_MIN;\n    mpz_class b = - mpz_class (static_cast<mpir_si>(GMP_SI_MIN));\n    mpz_class c;\n    c = a / b;  ASSERT_ALWAYS(c == -1);\n    c = a % b;  ASSERT_ALWAYS(c == 0);\n    c = ai / b; ASSERT_ALWAYS(c == -1);\n    c = ai % b; ASSERT_ALWAYS(c == 0);\n  }\n\n  // operator&\n  // operator|\n  // operator^\n\n  // operator<<\n  {\n    mpz_class a(3);\n    unsigned int b = 4;\n    mpz_class c;\n    c = a << b; ASSERT_ALWAYS(c == 48);\n  }\n\n  // operator>>\n  {\n    mpz_class a(127);\n    unsigned int b = 4;\n    mpz_class c;\n    c = a >> b; ASSERT_ALWAYS(c == 7);\n  }\n\n  // operator==\n  // operator!=\n  // operator<\n  // operator<=\n  // operator>\n  // operator>=\n\n  // cmp\n  {\n    mpz_class a(123), b(45);\n    int c;\n    c = cmp(a, b); ASSERT_ALWAYS(c > 0);\n    c = cmp(b, a); ASSERT_ALWAYS(c < 0);\n  }\n  {\n    mpz_class a(123);\n    unsigned long b = 45;\n    int c;\n    c = cmp(a, b); ASSERT_ALWAYS(c > 0);\n    c = cmp(b, a); ASSERT_ALWAYS(c < 0);\n  }\n  {\n    mpz_class a(123);\n    long b = 45;\n    int c;\n    c = cmp(a, b); ASSERT_ALWAYS(c > 0);\n    c = cmp(b, a); ASSERT_ALWAYS(c < 0);\n  }\n  {\n    mpz_class a(123);\n    double b = 45;\n    int c;\n    c = cmp(a, b); ASSERT_ALWAYS(c > 0);\n    c = cmp(b, a); ASSERT_ALWAYS(c < 0);\n  }\n\n\n  // ternary operators\n\n  // mpz_addmul\n  {\n    mpz_class a(1), b(2), c(3);\n    mpz_class d;\n    d = a + b * c; ASSERT_ALWAYS(d == 7);\n  }\n  {\n    mpz_class a(1), b(2);\n    unsigned int c = 3;\n    mpz_class d;\n    d = a + b * c; ASSERT_ALWAYS(d == 7);\n  }\n  {\n    mpz_class a(1), b(3);\n    unsigned int c = 2;\n    mpz_class d;\n    d = a + c * b; ASSERT_ALWAYS(d == 7);\n  }\n  {\n    mpz_class a(1), b(2);\n    signed int c = 3;\n    mpz_class d;\n    d = a + b * c; ASSERT_ALWAYS(d == 7);\n  }\n  {\n    mpz_class a(1), b(3);\n    signed int c = 2;\n    mpz_class d;\n    d = a + c * b; ASSERT_ALWAYS(d == 7);\n  }\n  {\n    mpz_class a(1), b(2);\n    double c = 3.0;\n    mpz_class d;\n    d = a + b * c; ASSERT_ALWAYS(d == 7);\n  }\n  {\n    mpz_class a(1), b(3);\n    double c = 2.0;\n    mpz_class d;\n    d = a + c * b; ASSERT_ALWAYS(d == 7);\n  }\n\n  {\n    mpz_class a(2), b(3), c(4);\n    mpz_class d;\n    d = a * b + c; ASSERT_ALWAYS(d == 10);\n  }\n  {\n    mpz_class a(2), b(4);\n    unsigned int c = 3;\n    mpz_class d;\n    d = a * c + b; ASSERT_ALWAYS(d == 10);\n  }\n  {\n    mpz_class a(3), b(4);\n    unsigned int c = 2;\n    mpz_class d;\n    d = c * a + b; ASSERT_ALWAYS(d == 10);\n  }\n  {\n    mpz_class a(2), b(4);\n    signed int c = 3;\n    mpz_class d;\n    d = a * c + b; ASSERT_ALWAYS(d == 10);\n  }\n  {\n    mpz_class a(3), b(4);\n    signed int c = 2;\n    mpz_class d;\n    d = c * a + b; ASSERT_ALWAYS(d == 10);\n  }\n  {\n    mpz_class a(2), b(4);\n    double c = 3.0;\n    mpz_class d;\n    d = a * c + b; ASSERT_ALWAYS(d == 10);\n  }\n  {\n    mpz_class a(3), b(4);\n    double c = 2.0;\n    mpz_class d;\n    d = c * a + b; ASSERT_ALWAYS(d == 10);\n  }\n\n  // mpz_submul\n  {\n    mpz_class a(1), b(2), c(3);\n    mpz_class d;\n    d = a - b * c; ASSERT_ALWAYS(d == -5);\n  }\n  {\n    mpz_class a(1), b(2);\n    unsigned int c = 3;\n    mpz_class d;\n    d = a - b * c; ASSERT_ALWAYS(d == -5);\n  }\n  {\n    mpz_class a(1), b(3);\n    unsigned int c = 2;\n    mpz_class d;\n    d = a - c * b; ASSERT_ALWAYS(d == -5);\n  }\n  {\n    mpz_class a(1), b(2);\n    signed int c = 3;\n    mpz_class d;\n    d = a - b * c; ASSERT_ALWAYS(d == -5);\n  }\n  {\n    mpz_class a(1), b(3);\n    signed int c = 2;\n    mpz_class d;\n    d = a - c * b; ASSERT_ALWAYS(d == -5);\n  }\n  {\n    mpz_class a(1), b(2);\n    double c = 3.0;\n    mpz_class d;\n    d = a - b * c; ASSERT_ALWAYS(d == -5);\n  }\n  {\n    mpz_class a(1), b(3);\n    double c = 2.0;\n    mpz_class d;\n    d = a - c * b; ASSERT_ALWAYS(d == -5);\n  }\n\n  {\n    mpz_class a(2), b(3), c(4);\n    mpz_class d;\n    d = a * b - c; ASSERT_ALWAYS(d == 2);\n  }\n  {\n    mpz_class a(2), b(4);\n    unsigned int c = 3;\n    mpz_class d;\n    d = a * c - b; ASSERT_ALWAYS(d == 2);\n  }\n  {\n    mpz_class a(3), b(4);\n    unsigned int c = 2;\n    mpz_class d;\n    d = c * a - b; ASSERT_ALWAYS(d == 2);\n  }\n  {\n    mpz_class a(2), b(4);\n    signed int c = 3;\n    mpz_class d;\n    d = a * c - b; ASSERT_ALWAYS(d == 2);\n  }\n  {\n    mpz_class a(3), b(4);\n    signed int c = 2;\n    mpz_class d;\n    d = c * a - b; ASSERT_ALWAYS(d == 2);\n  }\n  {\n    mpz_class a(2), b(4);\n    double c = 3.0;\n    mpz_class d;\n    d = a * c - b; ASSERT_ALWAYS(d == 2);\n  }\n  {\n    mpz_class a(3), b(4);\n    double c = 2.0;\n    mpz_class d;\n    d = c * a - b; ASSERT_ALWAYS(d == 2);\n  }\n}\n\nvoid\ncheck_mpq (void)\n{\n  // unary operators and functions\n\n  // operator+\n  {\n    mpq_class a(1, 2);\n    mpq_class b;\n    b = +a; ASSERT_ALWAYS(b == 0.5);\n  }\n\n  // operator-\n  {\n    mpq_class a(3, 4);\n    mpq_class b;\n    b = -a; ASSERT_ALWAYS(b == -0.75);\n  }\n\n  // abs\n  {\n    mpq_class a(-123);\n    mpq_class b;\n    b = abs(a); ASSERT_ALWAYS(b == 123);\n  }\n\n  // sgn\n  {\n    mpq_class a(123);\n    int b = sgn(a); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    mpq_class a(0);\n    int b = sgn(a); ASSERT_ALWAYS(b == 0);\n  }\n  {\n    mpq_class a(-123);\n    int b = sgn(a); ASSERT_ALWAYS(b == -1);\n  }\n\n\n  // binary operators and functions\n\n  // operator+\n  {\n    mpq_class a(1, 2), b(3, 4);\n    mpq_class c;\n    c = a + b; ASSERT_ALWAYS(c == 1.25);\n  }\n  {\n    mpq_class a(1, 2);\n    signed int b = 2;\n    mpq_class c;\n    c = a + b; ASSERT_ALWAYS(c == 2.5);\n  }\n  {\n    mpq_class a(1, 2);\n    double b = 1.5;\n    mpq_class c;\n    c = b + a; ASSERT_ALWAYS(c == 2);\n  }\n\n  // operator-\n  {\n    mpq_class a(1, 2), b(3, 4);\n    mpq_class c;\n    c = a - b; ASSERT_ALWAYS(c == -0.25);\n  }\n\n  // operator*\n  {\n    mpq_class a(1, 3), b(3, 4);\n    mpq_class c;\n    c = a * b; ASSERT_ALWAYS(c == 0.25);\n  }\n\n  // operator/\n  {\n    mpq_class a(1, 2), b(2, 3);\n    mpq_class c;\n    c = a / b; ASSERT_ALWAYS(c == 0.75);\n  }\n\n  // operator<<\n  // operator>>\n  // operator==\n  // operator!=\n  // operator<\n  // operator<=\n  // operator>\n  // operator>=\n\n  // cmp\n  {\n    mpq_class a(123), b(45);\n    int c;\n    c = cmp(a, b); ASSERT_ALWAYS(c > 0);\n    c = cmp(b, a); ASSERT_ALWAYS(c < 0);\n  }\n  {\n    mpq_class a(123);\n    unsigned long b = 45;\n    int c;\n    c = cmp(a, b); ASSERT_ALWAYS(c > 0);\n    c = cmp(b, a); ASSERT_ALWAYS(c < 0);\n  }\n  {\n    mpq_class a(123);\n    long b = 45;\n    int c;\n    c = cmp(a, b); ASSERT_ALWAYS(c > 0);\n    c = cmp(b, a); ASSERT_ALWAYS(c < 0);\n  }\n  {\n    mpq_class a(123);\n    double b = 45;\n    int c;\n    c = cmp(a, b); ASSERT_ALWAYS(c > 0);\n    c = cmp(b, a); ASSERT_ALWAYS(c < 0);\n  }\n}\n\nvoid\ncheck_mpf (void)\n{\n  // unary operators and functions\n\n  // operator+\n  {\n    mpf_class a(1);\n    mpf_class b;\n    b = +a; ASSERT_ALWAYS(b == 1);\n  }\n\n  // operator-\n  {\n    mpf_class a(2);\n    mpf_class b;\n    b = -a; ASSERT_ALWAYS(b == -2);\n  }\n\n  // abs\n  {\n    mpf_class a(-123);\n    mpf_class b;\n    b = abs(a); ASSERT_ALWAYS(b == 123);\n  }\n\n  // trunc\n  {\n    mpf_class a(1.5);\n    mpf_class b;\n    b = trunc(a); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    mpf_class a(-1.5);\n    mpf_class b;\n    b = trunc(a); ASSERT_ALWAYS(b == -1);\n  }\n\n  // floor\n  {\n    mpf_class a(1.9);\n    mpf_class b;\n    b = floor(a); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    mpf_class a(-1.1);\n    mpf_class b;\n    b = floor(a); ASSERT_ALWAYS(b == -2);\n  }\n\n  // ceil\n  {\n    mpf_class a(1.1);\n    mpf_class b;\n    b = ceil(a); ASSERT_ALWAYS(b == 2);\n  }\n  {\n    mpf_class a(-1.9);\n    mpf_class b;\n    b = ceil(a); ASSERT_ALWAYS(b == -1);\n  }\n\n  // sqrt\n  {\n    mpf_class a(25);\n    mpf_class b;\n    b = sqrt(a); ASSERT_ALWAYS(b == 5);\n  }\n  {\n    mpf_class a(2.25);\n    mpf_class b;\n    b = sqrt(a); ASSERT_ALWAYS(b == 1.5);\n  }\n\n  // sgn\n  {\n    mpf_class a(123);\n    int b = sgn(a); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    mpf_class a(0);\n    int b = sgn(a); ASSERT_ALWAYS(b == 0);\n  }\n  {\n    mpf_class a(-123);\n    int b = sgn(a); ASSERT_ALWAYS(b == -1);\n  }\n\n\n  // binary operators and functions\n\n  // operator+\n  {\n    mpf_class a(1), b(2);\n    mpf_class c;\n    c = a + b; ASSERT_ALWAYS(c == 3);\n  }\n\n  // operator-\n  {\n    mpf_class a(3), b(4);\n    mpf_class c;\n    c = a - b; ASSERT_ALWAYS(c == -1);\n  }\n\n  // operator*\n  {\n    mpf_class a(2), b(5);\n    mpf_class c;\n    c = a * b; ASSERT_ALWAYS(c == 10);\n  }\n\n  // operator/\n  {\n    mpf_class a(7), b(4);\n    mpf_class c;\n    c = a / b; ASSERT_ALWAYS(c == 1.75);\n  }\n\n  // operator<<\n  // operator>>\n  // operator==\n  // operator!=\n  // operator<\n  // operator<=\n  // operator>\n  // operator>=\n\n  // hypot\n  {\n    mpf_class a(3), b(4);\n    mpf_class c;\n    c = hypot(a, b); ASSERT_ALWAYS(c == 5);\n  }\n\n  // cmp\n  {\n    mpf_class a(123), b(45);\n    int c;\n    c = cmp(a, b); ASSERT_ALWAYS(c > 0);\n    c = cmp(b, a); ASSERT_ALWAYS(c < 0);\n  }\n  {\n    mpf_class a(123);\n    unsigned long b = 45;\n    int c;\n    c = cmp(a, b); ASSERT_ALWAYS(c > 0);\n    c = cmp(b, a); ASSERT_ALWAYS(c < 0);\n  }\n  {\n    mpf_class a(123);\n    long b = 45;\n    int c;\n    c = cmp(a, b); ASSERT_ALWAYS(c > 0);\n    c = cmp(b, a); ASSERT_ALWAYS(c < 0);\n  }\n  {\n    mpf_class a(123);\n    double b = 45;\n    int c;\n    c = cmp(a, b); ASSERT_ALWAYS(c > 0);\n    c = cmp(b, a); ASSERT_ALWAYS(c < 0);\n  }\n}\n\n\n\nint\nmain (void)\n{\n  tests_start();\n\n  check_mpz();\n  check_mpq();\n  check_mpf();\n\n  tests_end();\n  return 0;\n}\n"
  },
  {
    "path": "tests/cxx/t-ostream.cc",
    "content": "/* Test ostream formatted output.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <iostream>\n#include <cstdlib>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nusing namespace std;\n\n\nint   option_check_standard = 0;\n\n\n#define CALL(expr)                                      \\\n  do {                                                  \\\n    got.flags (data[i].flags);                          \\\n    got.width (data[i].width);                          \\\n    got.precision (data[i].precision);                  \\\n    if (data[i].fill == '\\0')                           \\\n      got.fill (' ');                                   \\\n    else                                                \\\n      got.fill (data[i].fill);                          \\\n                                                        \\\n    if (! (expr))                                       \\\n      {                                                 \\\n        cout << \"\\\"got\\\" output error\\n\";               \\\n        abort ();                                       \\\n      }                                                 \\\n    if (got.width() != 0)                               \\\n      {                                                 \\\n        cout << \"\\\"got\\\" width not reset to 0\\n\";       \\\n        abort ();                                       \\\n      }                                                 \\\n                                                        \\\n  } while (0)\n\n\n#define DUMP()                                                          \\\n  do {                                                                  \\\n    cout << \"  want:  |\" << data[i].want << \"|\\n\";                      \\\n    cout << \"  got:   |\" << got.str() << \"|\\n\";                         \\\n    cout << \"  width: \" << data[i].width << \"\\n\";                       \\\n    cout << \"  prec:  \" << got.precision() << \"\\n\";                     \\\n    cout << \"  flags: \" << hex << (unsigned long) got.flags() << \"\\n\";  \\\n  } while (0)\n\n#define ABORT() \\\n  do {          \\\n    DUMP ();    \\\n    abort ();   \\\n  } while (0)\n\nvoid\ncheck_mpz (void)\n{\n  static const struct {\n    const char     *z;\n    const char     *want;\n    ios::fmtflags  flags;\n    int            width;\n    int            precision;\n    char           fill;\n\n  } data[] = {\n\n    { \"0\", \"0\", ios::dec },\n\n    { \"0\", \"0\", ios::oct },\n    { \"0\", \"0\", ios::oct | ios::showbase },\n\n    { \"0\", \"0\", ios::hex },\n    { \"0\", \"0x0\", ios::hex | ios::showbase },\n    { \"0\", \"0X0\", ios::hex | ios::showbase | ios::uppercase },\n\n    { \"1\", \"****1\", ios::dec, 5, 0, '*' },\n\n    { \"-1\", \"   -1\",  ios::dec | ios::right,    5 },\n    { \"-1\", \"-   1\",  ios::dec | ios::internal, 5 },\n    { \"-1\", \"-1   \",  ios::dec | ios::left,     5 },\n\n    { \"1\", \"   0x1\", ios::hex | ios::showbase | ios::right,    6 },\n    { \"1\", \"0x   1\", ios::hex | ios::showbase | ios::internal, 6 },\n    { \"1\", \"0x1   \", ios::hex | ios::showbase | ios::left,     6 },\n\n    { \"1\", \"   +0x1\", ios::hex | ios::showbase | ios::showpos | ios::right,\n      7 },\n    { \"1\", \"+0x   1\", ios::hex | ios::showbase | ios::showpos | ios::internal,\n      7 },\n    { \"1\", \"+0x1   \", ios::hex | ios::showbase | ios::showpos | ios::left,\n      7 },\n\n    {  \"123\",    \"7b\", ios::hex },\n    {  \"123\",    \"7B\", ios::hex | ios::uppercase },\n    {  \"123\",  \"0x7b\", ios::hex | ios::showbase },\n    {  \"123\",  \"0X7B\", ios::hex | ios::showbase | ios::uppercase },\n    { \"-123\", \"-0x7b\", ios::hex | ios::showbase },\n    { \"-123\", \"-0X7B\", ios::hex | ios::showbase | ios::uppercase },\n\n    {  \"123\",   \"173\", ios::oct },\n    {  \"123\",   \"173\", ios::oct | ios::uppercase },\n    {  \"123\",  \"0173\", ios::oct | ios::showbase },\n    {  \"123\",  \"0173\", ios::oct | ios::showbase | ios::uppercase },\n    { \"-123\", \"-0173\", ios::oct | ios::showbase },\n    { \"-123\", \"-0173\", ios::oct | ios::showbase | ios::uppercase },\n\n  };\n\n  size_t  i;\n  mpz_t   z;\n\n  mpz_init (z);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (z, data[i].z, 0);\n\n      if (option_check_standard\n          && mpz_fits_slong_p (z)\n\n          // no negatives or showpos in hex or oct\n          && (((data[i].flags & ios::basefield) == ios::hex\n               || (data[i].flags & ios::basefield) == ios::oct)\n              ? (mpz_sgn (z) >= 0\n                 && ! (data[i].flags & ios::showpos))\n              : 1)\n          )\n        {\n          ostringstream  got;\n          long  n = mpz_get_si (z);\n          CALL (got << n);\n          if (got.str().compare (data[i].want) != 0)\n            {\n              cout << \"check_mpz data[\" << i\n\t\t   << \"] doesn't match standard ostream output\\n\";\n              cout << \"  z:     \" << data[i].z << \"\\n\";\n              cout << \"  n:     \" << n << \"\\n\";\n              DUMP ();\n            }\n        }\n\n      {\n        ostringstream  got;\n        CALL (got << z);\n        if (got.str().compare (data[i].want) != 0)\n          {\n            cout << \"mpz operator<< wrong, data[\" << i << \"]\\n\";\n            cout << \"  z:     \" << data[i].z << \"\\n\";\n            ABORT ();\n          }\n      }\n    }\n\n  mpz_clear (z);\n}\n\nvoid\ncheck_mpq (void)\n{\n  static const struct {\n    const char     *q;\n    const char     *want;\n    ios::fmtflags  flags;\n    int            width;\n    int            precision;\n    char           fill;\n\n  } data[] = {\n\n    { \"0\", \"0\", ios::dec },\n    { \"0\", \"0\", ios::hex },\n    { \"0\", \"0x0\", ios::hex | ios::showbase },\n    { \"0\", \"0X0\", ios::hex | ios::showbase | ios::uppercase },\n\n    { \"5/8\", \"5/8\", ios::dec },\n    { \"5/8\", \"0X5/0X8\", ios::hex | ios::showbase | ios::uppercase },\n\n    // zero denominator with showbase\n    { \"0/0\",   \"       0/0\", ios::oct | ios::showbase, 10 },\n    { \"0/0\",   \"       0/0\", ios::dec | ios::showbase, 10 },\n    { \"0/0\",   \"   0x0/0x0\", ios::hex | ios::showbase, 10 },\n    { \"123/0\", \"    0173/0\", ios::oct | ios::showbase, 10 },\n    { \"123/0\", \"     123/0\", ios::dec | ios::showbase, 10 },\n    { \"123/0\", \"  0x7b/0x0\", ios::hex | ios::showbase, 10 },\n    { \"123/0\", \"  0X7B/0X0\", ios::hex | ios::showbase | ios::uppercase, 10 },\n    { \"0/123\", \"    0/0173\", ios::oct | ios::showbase, 10 },\n    { \"0/123\", \"     0/123\", ios::dec | ios::showbase, 10 },\n    { \"0/123\", \"  0x0/0x7b\", ios::hex | ios::showbase, 10 },\n    { \"0/123\", \"  0X0/0X7B\", ios::hex | ios::showbase | ios::uppercase, 10 },\n  };\n\n  size_t  i;\n  mpq_t   q;\n\n  mpq_init (q);\n\n#define mpq_integer_p(q)  (mpz_cmp_ui (mpq_denref(q), 1L) == 0)\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpq_set_str_or_abort (q, data[i].q, 0);\n      MPZ_CHECK_FORMAT (mpq_numref (q));\n      MPZ_CHECK_FORMAT (mpq_denref (q));\n\n      if (option_check_standard\n          && mpz_fits_slong_p (mpq_numref(q))\n          && mpq_integer_p (q))\n        {\n          ostringstream  got;\n          long  n = mpz_get_si (mpq_numref(q));\n          CALL (got << n);\n          if (got.str().compare (data[i].want) != 0)\n            {\n              cout << \"check_mpq data[\" << i\n\t\t   << \"] doesn't match standard ostream output\\n\";\n              cout << \"  q:     \" << data[i].q << \"\\n\";\n              cout << \"  n:     \" << n << \"\\n\";\n              DUMP ();\n            }\n        }\n\n      {\n        ostringstream  got;\n        CALL (got << q);\n        if (got.str().compare (data[i].want) != 0)\n          {\n            cout << \"mpq operator<< wrong, data[\" << i << \"]\\n\";\n            cout << \"  q:     \" << data[i].q << \"\\n\";\n            ABORT ();\n          }\n      }\n    }\n\n  mpq_clear (q);\n}\n\n\nvoid\ncheck_mpf (void)\n{\n  static const struct {\n    const char     *f;\n    const char     *want;\n    ios::fmtflags  flags;\n    int            width;\n    int            precision;\n    char           fill;\n\n  } data[] = {\n\n    { \"0\", \"0\",            ios::dec },\n    { \"0\", \"+0\",           ios::dec | ios::showpos },\n    { \"0\", \"0.00000\",      ios::dec | ios::showpoint },\n    { \"0\", \"0\",            ios::dec | ios::fixed },\n    { \"0\", \"0.\",           ios::dec | ios::fixed | ios::showpoint },\n    { \"0\", \"0.000000e+00\", ios::dec | ios::scientific },\n    { \"0\", \"0.000000e+00\", ios::dec | ios::scientific | ios::showpoint },\n    \n    { \"0\", \"0\",          ios::dec, 0, 4 },\n    { \"0\", \"0.000\",      ios::dec | ios::showpoint, 0, 4 },\n    { \"0\", \"0.0000\",     ios::dec | ios::fixed, 0, 4 },\n    { \"0\", \"0.0000\",     ios::dec | ios::fixed | ios::showpoint, 0, 4 },\n    { \"0\", \"0.0000e+00\", ios::dec | ios::scientific, 0, 4 },\n    { \"0\", \"0.0000e+00\", ios::dec | ios::scientific | ios::showpoint, 0, 4 },\n    \n    { \"1\", \"1\",       ios::dec },\n    { \"1\", \"+1\",      ios::dec | ios::showpos },\n    { \"1\", \"1.00000\", ios::dec | ios::showpoint },\n    { \"1\", \"1\",       ios::dec | ios::fixed },\n    { \"1\", \"1.\",      ios::dec | ios::fixed | ios::showpoint },\n    { \"1\", \"1.000000e+00\",   ios::dec | ios::scientific },\n    { \"1\", \"1.000000e+00\",  ios::dec | ios::scientific | ios::showpoint },\n    \n    { \"1\", \"1\",          ios::dec,                   0, 4 },\n    { \"1\", \"1.000\",      ios::dec | ios::showpoint,  0, 4 },\n    { \"1\", \"1.0000\",     ios::dec | ios::fixed,      0, 4 },\n    { \"1\", \"1.0000\",     ios::dec | ios::fixed | ios::showpoint, 0, 4 },\n    { \"1\", \"1.0000e+00\", ios::dec | ios::scientific, 0, 4 },\n    { \"1\", \"1.0000e+00\", ios::dec | ios::scientific | ios::showpoint, 0, 4 },\n\n    { \"-1\", \"-1\",        ios::dec | ios::showpos },\n\n    { \"-1\", \"  -1\",      ios::dec, 4 },\n    { \"-1\", \"-  1\",      ios::dec | ios::internal, 4 },\n    { \"-1\", \"-1  \",      ios::dec | ios::left, 4 },\n\n    { \"-1\", \"  -0x1\",    ios::hex | ios::showbase, 6 },\n    { \"-1\", \"-0x  1\",    ios::hex | ios::showbase | ios::internal, 6 },\n    { \"-1\", \"-0x1  \",    ios::hex | ios::showbase | ios::left, 6 },\n\n    {    \"1\", \"*********1\", ios::dec, 10, 4, '*' },\n    { \"1234\", \"******1234\", ios::dec, 10, 4, '*' },\n    { \"1234\", \"*****1234.\", ios::dec | ios::showpoint, 10, 4, '*' },\n\n    { \"12345\", \"1.23e+04\", ios::dec, 0, 3 },\n\n    { \"12345\", \"12345.\", ios::dec | ios::fixed | ios::showpoint },\n\n    { \"1.9999999\",    \"2\",     ios::dec, 0, 1 },\n    { \"1.0009999999\", \"1.001\", ios::dec, 0, 4 },\n    { \"1.0001\",       \"1\",     ios::dec, 0, 4 },\n    { \"1.0004\",       \"1\",     ios::dec, 0, 4 },\n    { \"1.000555\",     \"1.001\", ios::dec, 0, 4 },\n\n    { \"1.0002\",       \"1.000\", ios::dec | ios::fixed, 0, 3 },\n    { \"1.0008\",       \"1.001\", ios::dec | ios::fixed, 0, 3 },\n\n    { \"0\", \"0\", ios::hex },\n    { \"0\", \"0x0\", ios::hex | ios::showbase },\n    { \"0\", \"0X0\", ios::hex | ios::showbase | ios::uppercase },\n    { \"123\",   \"7b\", ios::hex },\n    { \"123\", \"0x7b\", ios::hex | ios::showbase },\n    { \"123\", \"0X7B\", ios::hex | ios::showbase | ios::uppercase },\n\n    { \"0\", \"0.000@+00\", ios::hex | ios::scientific, 0, 3 },\n    { \"256\", \"1.000@+02\", ios::hex | ios::scientific, 0, 3 },\n\n    { \"123\",   \"7.b@+01\", ios::hex | ios::scientific, 0, 1 },\n    { \"123\",   \"7.B@+01\", ios::hex | ios::scientific | ios::uppercase, 0, 1 },\n    { \"123\", \"0x7.b@+01\", ios::hex | ios::scientific | ios::showbase, 0, 1 },\n    { \"123\", \"0X7.B@+01\",\n      ios::hex | ios::scientific | ios::showbase | ios::uppercase, 0, 1 },\n\n    { \"1099511627776\", \"1.0@+10\", ios::hex | ios::scientific, 0, 1 },\n    { \"1099511627776\", \"1.0@+10\",\n      ios::hex | ios::scientific | ios::uppercase, 0, 1 },\n\n    { \"0.0625\", \"1.00@-01\", ios::hex | ios::scientific, 0, 2 },\n\n    { \"0\", \"0\", ios::oct },\n    { \"123\",  \"173\", ios::oct },\n    { \"123\", \"0173\", ios::oct | ios::showbase },\n\n    // octal showbase suppressed for 0\n    { \"0\", \"0\", ios::oct | ios::showbase },\n    { \".125\",    \"00.1\",  ios::oct | ios::showbase, 0, 1 },\n    { \".015625\", \"00.01\", ios::oct | ios::showbase, 0, 2 },\n    { \".125\",    \"00.1\",  ios::fixed | ios::oct | ios::showbase, 0, 1 },\n    { \".015625\", \"0.0\",   ios::fixed | ios::oct | ios::showbase, 0, 1 },\n    { \".015625\", \"00.01\", ios::fixed | ios::oct | ios::showbase, 0, 2 },\n\n    {  \"0.125\",  \"1.000000e-01\", ios::oct | ios::scientific },\n    {  \"0.125\", \"+1.000000e-01\", ios::oct | ios::scientific | ios::showpos },\n    { \"-0.125\", \"-1.000000e-01\", ios::oct | ios::scientific },\n    { \"-0.125\", \"-1.000000e-01\", ios::oct | ios::scientific | ios::showpos },\n\n    { \"0\", \"0.000e+00\", ios::oct | ios::scientific, 0, 3 },\n    { \"256\",  \"4.000e+02\", ios::oct | ios::scientific, 0, 3 },\n    { \"256\", \"04.000e+02\", ios::oct | ios::scientific | ios::showbase, 0, 3 },\n    { \"256\",  \"4.000E+02\", ios::oct | ios::scientific | ios::uppercase, 0, 3 },\n    { \"256\", \"04.000E+02\",\n      ios::oct | ios::scientific | ios::showbase | ios::uppercase, 0, 3 },\n\n    { \"16777216\",    \"1.000000e+08\", ios::oct | ios::scientific },\n    { \"16777216\",    \"1.000000E+08\",\n      ios::oct | ios::scientific | ios::uppercase },\n    { \"16777216\",   \"01.000000e+08\",\n      ios::oct | ios::scientific | ios::showbase },\n    { \"16777216\",   \"01.000000E+08\",\n      ios::oct | ios::scientific | ios::showbase | ios::uppercase },\n    { \"16777216\",  \"+01.000000e+08\",\n      ios::oct | ios::scientific | ios::showbase | ios::showpos },\n    { \"16777216\",  \"+01.000000E+08\", ios::oct | ios::scientific\n      | ios::showbase | ios::showpos | ios::uppercase },\n    { \"-16777216\", \"-01.000000e+08\",\n      ios::oct | ios::scientific | ios::showbase | ios::showpos },\n    { \"-16777216\", \"-01.000000E+08\", ios::oct | ios::scientific\n      | ios::showbase | ios::showpos | ios::uppercase },\n\n  };\n\n  size_t  i;\n  mpf_t   f, f2;\n  double  d;\n\n  mpf_init (f);\n  mpf_init (f2);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpf_set_str_or_abort (f, data[i].f, 0);\n\n      d = mpf_get_d (f);\n      mpf_set_d (f2, d);\n      if (option_check_standard && mpf_cmp (f, f2) == 0\n          && ! (data[i].flags & (ios::hex | ios::oct | ios::showbase)))\n        {\n          ostringstream  got;\n          CALL (got << d);\n          if (got.str().compare (data[i].want) != 0)\n            {\n              cout << \"check_mpf data[\" << i\n\t\t   << \"] doesn't match standard ostream output\\n\";\n              cout << \"  f:     \" << data[i].f << \"\\n\";\n              cout << \"  d:     \" << d << \"\\n\";\n              DUMP ();\n            }\n        }\n\n      {\n        ostringstream  got;\n        CALL (got << f);\n        if (got.str().compare (data[i].want) != 0)\n          {\n            cout << \"mpf operator<< wrong, data[\" << i << \"]\\n\";\n            cout << \"  f:     \" << data[i].f << \"\\n\";\n            ABORT ();\n          }\n      }\n    }\n\n  mpf_clear (f);\n  mpf_clear (f2);\n}\n\n\n\nint\nmain (int argc, char *argv[])\n{\n  if (argc > 1 && strcmp (argv[1], \"-s\") == 0)\n    option_check_standard = 1;\n\n  tests_start ();\n\n  check_mpz ();\n  check_mpq ();\n  check_mpf ();\n\n  tests_end ();\n  return 0;\n}\n"
  },
  {
    "path": "tests/cxx/t-prec.cc",
    "content": "/* Test precision of mpf_class expressions.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <iostream>\n\n#include \"mpir.h\"\n#include \"mpirxx.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nusing namespace std;\n\n\nconst int\nsmall_prec = 64, medium_prec = 128, large_prec = 192, very_large_prec = 256;\n\n#define ASSERT_ALWAYS_PREC(a, s, prec) \\\n{                                      \\\n  mpf_srcptr _a = a.get_mpf_t();       \\\n  mpf_class _b(s, prec);               \\\n  mpf_srcptr _c = _b.get_mpf_t();      \\\n  ASSERT_ALWAYS(mpf_eq(_a, _c, prec)); \\\n}\n\n\n\nvoid\ncheck_mpf (void)\n{\n  mpf_set_default_prec(medium_prec);\n\n  // simple expressions\n  {\n    mpf_class f(3.0, small_prec);\n    mpf_class g(1 / f, very_large_prec);\n    ASSERT_ALWAYS_PREC\n      (g, \"0.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333\"\n       \"     33333 33333 33333 33333 33333 333\", very_large_prec);\n  }\n  {\n    mpf_class f(9.0, medium_prec);\n    mpf_class g(0.0, very_large_prec);\n    g = 1 / f;\n    ASSERT_ALWAYS_PREC\n      (g, \"0.11111 11111 11111 11111 11111 11111 11111 11111 11111 11111\"\n       \"     11111 11111 11111 11111 11111 111\", very_large_prec);\n  }\n  {\n    mpf_class f(15.0, large_prec);\n    mpf_class g(0.0, very_large_prec);\n    g = 1 / f;\n    ASSERT_ALWAYS_PREC\n      (g, \"0.06666 66666 66666 66666 66666 66666 66666 66666 66666 66666\"\n       \"     66666 66666 66666 66666 66666 667\", very_large_prec);\n  }\n\n  // compound expressions\n  {\n    mpf_class f(3.0, small_prec);\n    mpf_class g(-(-(-1 / f)), very_large_prec);\n    ASSERT_ALWAYS_PREC\n      (g, \"-0.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333\"\n       \"      33333 33333 33333 33333 33333 333\", very_large_prec);\n  }\n  {\n    mpf_class f(3.0, small_prec), g(9.0, medium_prec);\n    mpf_class h(0.0, very_large_prec);\n    h = 1/f + 1/g;\n    ASSERT_ALWAYS_PREC\n      (h, \"0.44444 44444 44444 44444 44444 44444 44444 44444 44444 44444\"\n       \"     44444 44444 44444 44444 44444 444\", very_large_prec);\n  }\n  {\n    mpf_class f(3.0, small_prec), g(9.0, medium_prec), h(15.0, large_prec);\n    mpf_class i(0.0, very_large_prec);\n    i = f / g + h;\n    ASSERT_ALWAYS_PREC\n      (i, \"15.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333\"\n       \"      33333 33333 33333 33333 33333 333\", very_large_prec);\n  }\n  {\n    mpf_class f(3.0, small_prec);\n    mpf_class g(-(1 + f) / 3, very_large_prec);\n    ASSERT_ALWAYS_PREC\n      (g, \"-1.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333\"\n       \"      33333 33333 33333 33333 33333 333\", very_large_prec);\n  }\n  {\n    mpf_class f(9.0, medium_prec);\n    mpf_class g(0.0, very_large_prec);\n    g = sqrt(1 / f);\n    ASSERT_ALWAYS_PREC\n      (g, \"0.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333\"\n       \"     33333 33333 33333 33333 33333 333\", very_large_prec);\n  }\n  {\n    mpf_class f(15.0, large_prec);\n    mpf_class g(0.0, very_large_prec);\n    g = hypot(1 + 5 / f, 1.0);\n    ASSERT_ALWAYS_PREC\n      (g, \"1.66666 66666 66666 66666 66666 66666 66666 66666 66666 66666\"\n       \"     66666 66666 66666 66666 66666 667\", very_large_prec);\n  }\n\n  // compound assignments\n  {\n    mpf_class f(3.0, small_prec), g(9.0, medium_prec);\n    mpf_class h(1.0, very_large_prec);\n    h -= f / g;\n    ASSERT_ALWAYS_PREC\n      (h, \"0.66666 66666 66666 66666 66666 66666 66666 66666 66666 66666\"\n       \"     66666 66666 66666 66666 66666 667\", very_large_prec);\n  }\n\n  // construction from expressions\n  {\n    mpf_class f(3.0, small_prec);\n    mpf_class g(0.0, very_large_prec);\n    g = mpf_class(1 / f);\n    ASSERT_ALWAYS_PREC(g, \"0.33333 33333 33333 33333\", small_prec);\n  }\n  {\n    mpf_class f(9.0, medium_prec);\n    mpf_class g(0.0, very_large_prec);\n    g = mpf_class(1 / f);\n    ASSERT_ALWAYS_PREC\n      (g, \"0.11111 11111 11111 11111 11111 11111 11111 1111\", medium_prec);\n  }\n  {\n    mpf_class f(15.0, large_prec);\n    mpf_class g(0.0, very_large_prec);\n    g = mpf_class(1 / f);\n    ASSERT_ALWAYS_PREC\n      (g, \"0.06666 66666 66666 66666 66666 66666 66666 66666 66666 66666\"\n       \"     66666 6667\", large_prec);\n  }\n\n  {\n    mpf_class f(3.0, small_prec), g(9.0, medium_prec);\n    mpf_class h(0.0, very_large_prec);\n    h = mpf_class(f / g + 1, large_prec);\n    ASSERT_ALWAYS_PREC\n      (h, \"1.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333 33333 333\",\n       large_prec);\n  }\n\n  // mixed mpf/mpq expressions\n  {\n    mpf_class f(3.0, small_prec);\n    mpq_class q(1, 3);\n    mpf_class g(0.0, very_large_prec);\n    g = f - q;\n    ASSERT_ALWAYS_PREC\n      (g, \"2.66666 66666 66666 66666 66666 66666 66666 66666 66666 66666\"\n       \"     66666 66666 66666 66666 66666 67\", very_large_prec);\n  }\n\n  {\n    mpf_class f(3.0, small_prec);\n    mpq_class q(1, 3);\n    mpf_class g(0.0, very_large_prec);\n    g = mpf_class(f - q, large_prec);\n    ASSERT_ALWAYS_PREC\n      (g, \"2.66666 66666 66666 66666 66666 66666 66666 66666 66666 66666 66666 667\",\n       large_prec);\n  }\n  {\n    mpf_class f(3.0, small_prec);\n    mpq_class q(1, 3);\n    mpf_class g(0.0, very_large_prec);\n    g = mpf_class(f - q);\n    ASSERT_ALWAYS_PREC\n      (g, \"2.66666 66666 66666 66666 66666 66666 66666 667\", medium_prec);\n  }\n  {\n    mpf_class f(15.0, large_prec);\n    mpq_class q(1, 3);\n    mpf_class g(0.0, very_large_prec);\n    g = mpf_class(f + q);\n    ASSERT_ALWAYS_PREC\n      (g, \"15.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333 33333 33\",\n       large_prec);\n  }\n}\n\n\nint\nmain (void)\n{\n  tests_start();\n\n  check_mpf();\n\n  tests_end();\n  return 0;\n}\n"
  },
  {
    "path": "tests/cxx/t-rand.cc",
    "content": "/* Test gmp_randclass.\n\nCopyright 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"mpirxx.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nusing namespace std;\n\n\n/* all flavours of initialization */\nvoid\ncheck_randinit (void)\n{\n  {\n    gmp_randclass r(gmp_randinit_default);\n  }\n\n  {\n    mpz_class a(0);\n    mpir_ui c = 0;\n    mp_bitcnt_t m2exp = 8;\n    gmp_randclass r(gmp_randinit_lc_2exp, a, c, m2exp);\n  }\n\n  {\n    mpir_ui m2exp = 64;\n    gmp_randclass r(gmp_randinit_lc_2exp_size, m2exp);\n  }\n\n  /* gmp_randinit_lc_2exp_size, with excessive size */\n  {\n    try {\n      mpir_ui m2exp = ULONG_MAX;\n      gmp_randclass r(gmp_randinit_lc_2exp_size, m2exp);\n      ASSERT_ALWAYS (0);  /* should not be reached */\n    } catch (length_error) {\n    }\n  }\n\n  {\n    gmp_randclass r(gmp_randinit_mt);\n  }\n\n  /* obsolete, but still available */\n  {\n    gmp_randalg_t alg = GMP_RAND_ALG_LC;\n    mpir_ui m2exp = 64;\n    gmp_randclass r(alg, m2exp);\n  }\n  {\n    gmp_randalg_t alg = GMP_RAND_ALG_DEFAULT;\n    mpir_ui m2exp = 64;\n    gmp_randclass r(alg, m2exp);\n  }\n  {\n    gmp_randalg_t alg = (gmp_randalg_t) 0;\n    mpir_ui m2exp = 64;\n    gmp_randclass r(alg, m2exp);\n  }\n}\n\nvoid\ncheck_mpz (void)\n{\n  {\n    gmp_randclass r(gmp_randinit_default);\n    mpz_class a(123);\n    unsigned int b = 256;\n    mpz_class c;\n    r.seed(a);\n    c = r.get_z_bits(b);\n  }\n  {\n    gmp_randclass r(gmp_randinit_default);\n    mpz_class a(256);\n    mpir_ui b = 123;\n    mpz_class c;\n    r.seed(b);\n    c = r.get_z_bits(a);\n  }\n  {\n    gmp_randclass r(gmp_randinit_default);\n    mpz_class a(123), b(256);\n    mpz_class c;\n    r.seed(a);\n    c = r.get_z_range(b);\n  }\n}\n\nvoid\ncheck_mpf (void)\n{\n  {\n    gmp_randclass r(gmp_randinit_default);\n    mpz_class a(123);\n    r.seed(a);\n    mpf_class b;\n    b = r.get_f();\n  }\n  {\n    gmp_randclass r(gmp_randinit_default);\n    int a = 123, b = 128;\n    r.seed(a);\n    mpf_class c;\n    c = r.get_f(b);\n  }\n}\n\n/*check that get_randstate_t really returns the randstate_t underlying a gmp_randclass*/\nvoid\ncheck_randstate_t(void)\n{\n  /*seed gmp_randclass r using its method, and seed gmp_randclass s using get_randstate_t\n    if they both generate the same sequence, then gmp_randstate_t must be returning the\n    underlying randstate_t of s */\n  {\n    gmp_randclass r(gmp_randinit_default);\n    gmp_randclass s(gmp_randinit_default);\n    __gmp_randstate_struct s_state = *s.get_randstate_t();\n\n    r.seed(0xdeadbeef);\n    gmp_randseed_ui(&s_state, 0xdeadbeef);\n\n    bool res = true;\n    for (int i = 0; i < 100; ++i)\n    {\n      mpz_class n1 = r.get_z_bits(32);\n      mpz_class n2 = s.get_z_bits(32);\n      ASSERT_ALWAYS (n1 == n2);\n    }\n  }\n}\n\n\nint\nmain (void)\n{\n  tests_start();\n\n  check_randinit();\n  check_mpz();\n  check_mpf();\n  check_randstate_t();\n\n  tests_end();\n  return 0;\n}\n"
  },
  {
    "path": "tests/cxx/t-ternary.cc",
    "content": "/* Test mp*_class ternary expressions.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <iostream>\n\n#include \"mpir.h\"\n#include \"mpirxx.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nusing namespace std;\n\n\n/* The various test cases are broken up into separate functions to keep down\n   compiler memory use.  They're static so that any mistakenly omitted from\n   main() will provoke warnings (under gcc -Wall at least).  */\n\nstatic void\ncheck_mpz_1 (void)\n{\n  // template<class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr\n  // <mpz_t, __gmp_binary_expr<mpz_class, mpz_class, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2), c(3);\n    mpz_class d;\n    d = a + b * c; ASSERT_ALWAYS(d == 7);\n  }\n  {\n    mpz_class a(1), b(2), c(3);\n    mpz_class d;\n    d = a - b * c; ASSERT_ALWAYS(d == -5);\n  }\n}\n\nstatic void\ncheck_mpz_2 (void)\n{\n  // template <class T, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr\n  // <mpz_t, __gmp_binary_expr<mpz_class, T, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2);\n    signed int c = 3;\n    mpz_class d;\n    d = a + b * c; ASSERT_ALWAYS(d == 7);\n  }\n  {\n    mpz_class a(1), b(2);\n    signed int c = 3;\n    mpz_class d;\n    d = a - b * c; ASSERT_ALWAYS(d == -5);\n  }\n}\n\nstatic void\ncheck_mpz_3 (void)\n{\n  // template <class T, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr\n  // <mpz_t, __gmp_binary_expr<T, mpz_class, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2);\n    unsigned int c = 3;\n    mpz_class d;\n    d = a + c * b; ASSERT_ALWAYS(d == 7);\n  }\n  {\n    mpz_class a(1), b(2);\n    unsigned int c = 3;\n    mpz_class d;\n    d = a - c * b; ASSERT_ALWAYS(d == -5);\n  }\n}\n\nstatic void\ncheck_mpz_4 (void)\n{\n  // template <class T, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr\n  // <mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr<mpz_t, T>, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2), c(3);\n    double d = 4.0;\n    mpz_class e;\n    e = a + b * (c + d); ASSERT_ALWAYS(e == 15);\n  }\n  {\n    mpz_class a(1), b(2), c(3);\n    double d = 4.0;\n    mpz_class e;\n    e = a - b * (c + d); ASSERT_ALWAYS(e == -13);\n  }\n}\n\nstatic void\ncheck_mpz_5 (void)\n{\n  // template <class T, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr\n  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, mpz_class, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2), c(3);\n    signed int d = 4;\n    mpz_class e;\n    e = a + (b - d) * c; ASSERT_ALWAYS(e == -5);\n  }\n  {\n    mpz_class a(1), b(2), c(3);\n    signed int d = 4;\n    mpz_class e;\n    e = a - (b - d) * c; ASSERT_ALWAYS(e == 7);\n  }\n}\n\nstatic void\ncheck_mpz_6 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr\n  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, U, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2);\n    unsigned int c = 3, d = 4;\n    mpz_class e;\n    e = a + (b + c) * d; ASSERT_ALWAYS(e == 21);\n  }\n  {\n    mpz_class a(1), b(2);\n    unsigned int c = 3, d = 4;\n    mpz_class e;\n    e = a - (b + c) * d; ASSERT_ALWAYS(e == -19);\n  }\n}\n\nstatic void\ncheck_mpz_7 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr\n  // <mpz_t, __gmp_binary_expr<T, __gmp_expr<mpz_t, U>, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2);\n    double c = 3.0, d = 4.0;\n    mpz_class e;\n    e = a + c * (b + d); ASSERT_ALWAYS(e == 19);\n  }\n  {\n    mpz_class a(1), b(2);\n    double c = 3.0, d = 4.0;\n    mpz_class e;\n    e = a - c * (b + d); ASSERT_ALWAYS(e == -17);\n  }\n}\n\nstatic void\ncheck_mpz_8 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr\n  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr<mpz_t, U>,\n  // Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2), c(3);\n    signed int d = 4, e = 5;\n    mpz_class f;\n    f = a + (b - d) * (c + e); ASSERT_ALWAYS(f == -15);\n  }\n  {\n    mpz_class a(1), b(2), c(3);\n    signed int d = 4, e = 5;\n    mpz_class f;\n    f = a - (b - d) * (c + e); ASSERT_ALWAYS(f == 17);\n  }\n}\n\nstatic void\ncheck_mpz_9 (void)\n{\n  // template <class T, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>,\n  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, mpz_class, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2), c(3);\n    unsigned int d = 4;\n    mpz_class e;\n    e = (a + d) + b * c; ASSERT_ALWAYS(e == 11);\n  }\n  {\n    mpz_class a(1), b(2), c(3);\n    unsigned int d = 4;\n    mpz_class e;\n    e = (a + d) - b * c; ASSERT_ALWAYS(e == -1);\n  }\n}\n\nstatic void\ncheck_mpz_10 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>,\n  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, U, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2);\n    double c = 3.0, d = 4.0;\n    mpz_class e;\n    e = (a - c) + b * d; ASSERT_ALWAYS(e == 6);\n  }\n  {\n    mpz_class a(1), b(2);\n    double c = 3.0, d = 4.0;\n    mpz_class e;\n    e = (a - c) - b * d; ASSERT_ALWAYS(e == -10);\n  }\n}\n\nstatic void\ncheck_mpz_11 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>,\n  // __gmp_expr<mpz_t, __gmp_binary_expr<U, mpz_class, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2);\n    signed int c = 3, d = 4;\n    mpz_class e;\n    e = (a - c) + d * b; ASSERT_ALWAYS(e == 6);\n  }\n  {\n    mpz_class a(1), b(2);\n    signed int c = 3, d = 4;\n    mpz_class e;\n    e = (a - c) - d * b; ASSERT_ALWAYS(e == -10);\n  }\n}\n\nstatic void\ncheck_mpz_12 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr\n  // <mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr<mpz_t, U>, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2), c(3);\n    unsigned int d = 4, e = 5;\n    mpz_class f;\n    f = (a + d) + b * (c - e); ASSERT_ALWAYS(f == 1);\n  }\n  {\n    mpz_class a(1), b(2), c(3);\n    unsigned int d = 4, e = 5;\n    mpz_class f;\n    f = (a + d) - b * (c - e); ASSERT_ALWAYS(f == 9);\n  }\n}\n\nstatic void\ncheck_mpz_13 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr\n  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, U>, mpz_class, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2), c(3);\n    double d = 4.0, e = 5.0;\n    mpz_class f;\n    f = (a - d) + (b + e) * c; ASSERT_ALWAYS(f == 18);\n  }\n  {\n    mpz_class a(1), b(2), c(3);\n    double d = 4.0, e = 5.0;\n    mpz_class f;\n    f = (a - d) - (b + e) * c; ASSERT_ALWAYS(f == -24);\n  }\n\n}\n\nstatic void\ncheck_mpz_14 (void)\n{\n  // template <class T, class U, class V, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr\n  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, U>, V, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2);\n    signed int c = 3, d = 4, e = 5;\n    mpz_class f;\n    f = (a + c) + (b + d) * e; ASSERT_ALWAYS(f == 34);\n  }\n  {\n    mpz_class a(1), b(2);\n    signed int c = 3, d = 4, e = 5;\n    mpz_class f;\n    f = (a + c) - (b + d) * e; ASSERT_ALWAYS(f == -26);\n  }\n}\n\nstatic void\ncheck_mpz_15 (void)\n{\n  // template <class T, class U, class V, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr\n  // <mpz_t, __gmp_binary_expr<U, __gmp_expr<mpz_t, V>, Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2);\n    unsigned int c = 3, d = 4, e = 5;\n    mpz_class f;\n    f = (a - c) + d * (b - e); ASSERT_ALWAYS(f == -14);\n  }\n  {\n    mpz_class a(1), b(2);\n    unsigned int c = 3, d = 4, e = 5;\n    mpz_class f;\n    f = (a - c) - d * (b - e); ASSERT_ALWAYS(f == 10);\n  }\n\n}\n\nstatic void\ncheck_mpz_16 (void)\n{\n  // template <class T, class U, class V, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr\n  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, U>, __gmp_expr<mpz_t, V>,\n  // Op1> >, Op2> >\n  {\n    mpz_class a(1), b(2), c(3);\n    double d = 4.0, e = 5.0, f = 6.0;\n    mpz_class g;\n    g = (a + d) + (b - e) * (c + f); ASSERT_ALWAYS(g == -22);\n  }\n  {\n    mpz_class a(1), b(2), c(3);\n    double d = 4.0, e = 5.0, f = 6.0;\n    mpz_class g;\n    g = (a + d) - (b - e) * (c + f); ASSERT_ALWAYS(g == 32);\n  }\n}\n\nstatic void\ncheck_mpz_17 (void)\n{\n  // template <class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr\n  // <mpz_t, __gmp_binary_expr<mpz_class, mpz_class, Op1> >, mpz_class, Op2> >\n  {\n    mpz_class a(2), b(3), c(4);\n    mpz_class d;\n    d = a * b + c; ASSERT_ALWAYS(d == 10);\n  }\n  {\n    mpz_class a(2), b(3), c(4);\n    mpz_class d;\n    d = a * b - c; ASSERT_ALWAYS(d == 2);\n  }\n}\n\nstatic void\ncheck_mpz_18 (void)\n{\n  // template <class T, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr\n  // <mpz_t, __gmp_binary_expr<mpz_class, T, Op1> >, mpz_class, Op2> >\n  {\n    mpz_class a(2), b(3);\n    signed int c = 4;\n    mpz_class d;\n    d = a * c + b; ASSERT_ALWAYS(d == 11);\n  }\n  {\n    mpz_class a(2), b(3);\n    signed int c = 4;\n    mpz_class d;\n    d = a * c - b; ASSERT_ALWAYS(d == 5);\n  }\n\n}\n\nstatic void\ncheck_mpz_19 (void)\n{\n  // template <class T, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr\n  // <mpz_t, __gmp_binary_expr<T, mpz_class, Op1> >, mpz_class, Op2> >\n  {\n    mpz_class a(2), b(3);\n    unsigned int c = 4;\n    mpz_class d;\n    d = c * a + b; ASSERT_ALWAYS(d == 11);\n  }\n  {\n    mpz_class a(2), b(3);\n    unsigned int c = 4;\n    mpz_class d;\n    d = c * a - b; ASSERT_ALWAYS(d == 5);\n  }\n}\n\nstatic void\ncheck_mpz_20 (void)\n{\n  // template <class T, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr\n  // <mpz_class, __gmp_expr<mpz_t, T>, Op1> >, mpz_class, Op2> >\n  {\n    mpz_class a(2), b(3), c(4);\n    double d = 5.0;\n    mpz_class e;\n    e = a * (b + d) + c; ASSERT_ALWAYS(e == 20);\n  }\n  {\n    mpz_class a(2), b(3), c(4);\n    double d = 5.0;\n    mpz_class e;\n    e = a * (b + d) - c; ASSERT_ALWAYS(e == 12);\n  }\n}\n\nstatic void\ncheck_mpz_21 (void)\n{\n  // template <class T, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr\n  // <__gmp_expr<mpz_t, T>, mpz_class, Op1> >, mpz_class, Op2> >\n  {\n    mpz_class a(2), b(3), c(4);\n    signed int d = 5;\n    mpz_class e;\n    e = (a - d) * b + c; ASSERT_ALWAYS(e == -5);\n  }\n  {\n    mpz_class a(2), b(3), c(4);\n    signed int d = 5;\n    mpz_class e;\n    e = (a - d) * b - c; ASSERT_ALWAYS(e == -13);\n  }\n}\n\nstatic void\ncheck_mpz_22 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr\n  // <__gmp_expr<mpz_t, T>, U, Op1> >, mpz_class, Op2> >\n  {\n    mpz_class a(2), b(3);\n    unsigned int c = 4, d = 5;\n    mpz_class e;\n    e = (a + c) * d + b; ASSERT_ALWAYS(e == 33);\n  }\n  {\n    mpz_class a(2), b(3);\n    unsigned int c = 4, d = 5;\n    mpz_class e;\n    e = (a + c) * d - b; ASSERT_ALWAYS(e == 27);\n  }\n}\n\nstatic void\ncheck_mpz_23 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr\n  // <T, __gmp_expr<mpz_t, U>, Op1> >, mpz_class, Op2> >\n  {\n    mpz_class a(2), b(3);\n    double c = 4.0, d = 5.0;\n    mpz_class e;\n    e = c * (a + d) + b; ASSERT_ALWAYS(e == 31);\n  }\n  {\n    mpz_class a(2), b(3);\n    double c = 4.0, d = 5.0;\n    mpz_class e;\n    e = c * (a + d) - b; ASSERT_ALWAYS(e == 25);\n  }\n\n}\n\nstatic void\ncheck_mpz_24 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr\n  // <__gmp_expr<mpz_t, T>, __gmp_expr<mpz_t, U>, Op1> >, mpz_class, Op2> >\n  {\n    mpz_class a(2), b(3), c(4);\n    signed int d = 5, e = 6;\n    mpz_class f;\n    f = (a - d) * (b + e) + c; ASSERT_ALWAYS(f == -23);\n  }\n  {\n    mpz_class a(2), b(3), c(4);\n    signed int d = 5, e = 6;\n    mpz_class f;\n    f = (a - d) * (b + e) - c; ASSERT_ALWAYS(f == -31);\n  }\n}\n\nstatic void\ncheck_mpz_25 (void)\n{\n  // template <class T, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr\n  // <mpz_class, mpz_class, Op1> >, __gmp_expr<mpz_t, T>, Op2> >\n  {\n    mpz_class a(2), b(3), c(4);\n    unsigned int d = 5;\n    mpz_class e;\n    e = a * b + (c - d); ASSERT_ALWAYS(e == 5);\n  }\n  {\n    mpz_class a(2), b(3), c(4);\n    unsigned int d = 5;\n    mpz_class e;\n    e = a * b - (c - d); ASSERT_ALWAYS(e == 7);\n  }\n}\n\nstatic void\ncheck_mpz_26 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr\n  // <mpz_class, T, Op1> >, __gmp_expr<mpz_t, U>, Op2> >\n  {\n    mpz_class a(2), b(3);\n    double c = 4.0, d = 5.0;\n    mpz_class e;\n    e = a * c + (b + d); ASSERT_ALWAYS(e == 16);\n  }\n  {\n    mpz_class a(2), b(3);\n    double c = 4.0, d = 5.0;\n    mpz_class e;\n    e = a * c - (b + d); ASSERT_ALWAYS(e == 0);\n  }\n}\n\nstatic void\ncheck_mpz_27 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr\n  // <T, mpz_class, Op1> >, __gmp_expr<mpz_t, U>, Op2> >\n  {\n    mpz_class a(2), b(3);\n    signed int c = 4, d = 5;\n    mpz_class e;\n    e = c * a + (b - d); ASSERT_ALWAYS(e == 6);\n  }\n  {\n    mpz_class a(2), b(3);\n    signed int c = 4, d = 5;\n    mpz_class e;\n    e = c * a - (b - d); ASSERT_ALWAYS(e == 10);\n  }\n}\n\nstatic void\ncheck_mpz_28 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr\n  // <mpz_class, __gmp_expr<mpz_t, T>, Op1> >, __gmp_expr<mpz_t, U>, Op2> >\n  {\n    mpz_class a(2), b(3), c(4);\n    unsigned int d = 5, e = 6;\n    mpz_class f;\n    f = a * (b - d) + (c + e); ASSERT_ALWAYS(f == 6);\n  }\n  {\n    mpz_class a(2), b(3), c(4);\n    unsigned int d = 5, e = 6;\n    mpz_class f;\n    f = a * (b - d) - (c + e); ASSERT_ALWAYS(f == -14);\n  }\n}\n\nstatic void\ncheck_mpz_29 (void)\n{\n  // template <class T, class U, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr\n  // <__gmp_expr<mpz_t, T>, mpz_class, Op1> >, __gmp_expr<mpz_t, U>, Op2> >\n  {\n    mpz_class a(2), b(3), c(4);\n    double d = 5.0, e = 6.0;\n    mpz_class f;\n    f = (a + d) * b + (c - e); ASSERT_ALWAYS(f == 19);\n  }\n  {\n    mpz_class a(2), b(3), c(4);\n    double d = 5.0, e = 6.0;\n    mpz_class f;\n    f = (a + d) * b - (c - e); ASSERT_ALWAYS(f == 23);\n  }\n}\n\nstatic void\ncheck_mpz_30 (void)\n{\n  // template <class T, class U, class V, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr\n  // <__gmp_expr<mpz_t, T>, U, Op1> >, __gmp_expr<mpz_t, V>, Op2> >\n  {\n    mpz_class a(2), b(3);\n    signed int c = 4, d = 5, e = 6;\n    mpz_class f;\n    f = (a + c) * d + (b + e); ASSERT_ALWAYS(f == 39);\n  }\n  {\n    mpz_class a(2), b(3);\n    signed int c = 4, d = 5, e = 6;\n    mpz_class f;\n    f = (a + c) * d - (b + e); ASSERT_ALWAYS(f == 21);\n  }\n}\n\nstatic void\ncheck_mpz_31 (void)\n{\n  // template <class T, class U, class V, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr\n  // <T, __gmp_expr<mpz_t, U>, Op1> >, __gmp_expr<mpz_t, V>, Op2> >\n  {\n    mpz_class a(2), b(3);\n    unsigned int c = 4, d = 5, e = 6;\n    mpz_class f;\n    f = c * (a + d) + (b - e); ASSERT_ALWAYS(f == 25);\n  }\n  {\n    mpz_class a(2), b(3);\n    unsigned int c = 4, d = 5, e = 6;\n    mpz_class f;\n    f = c * (a + d) - (b - e); ASSERT_ALWAYS(f == 31);\n  }\n}\n\nstatic void\ncheck_mpz_32 (void)\n{\n  // template <class T, class U, class V, class Op1, class Op2>\n  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr\n  // <__gmp_expr<mpz_t, T>, __gmp_expr<mpz_t, U>, Op1> >,\n  // __gmp_expr<mpz_t, V>, Op2> >\n  {\n    mpz_class a(2), b(3), c(4);\n    double d = 5.0, e = 6.0, f = 7.0;\n    mpz_class g;\n    g = (a + d) * (b - e) + (c + f); ASSERT_ALWAYS(g == -10);\n  }\n  {\n    mpz_class a(2), b(3), c(4);\n    double d = 5.0, e = 6.0, f = 7.0;\n    mpz_class g;\n    g = (a + d) * (b - e) - (c + f); ASSERT_ALWAYS(g == -32);\n  }\n}\n\nvoid\ncheck_mpq (void)\n{\n  // currently there's no ternary mpq operation\n}\n\nvoid\ncheck_mpf (void)\n{\n  // currently there's no ternary mpf operation\n}\n\n\nint\nmain (void)\n{\n  tests_start();\n\n  check_mpz_1 ();\n  check_mpz_2 ();\n  check_mpz_3 ();\n  check_mpz_4 ();\n  check_mpz_5 ();\n  check_mpz_6 ();\n  check_mpz_7 ();\n  check_mpz_8 ();\n  check_mpz_9 ();\n  check_mpz_10 ();\n  check_mpz_11 ();\n  check_mpz_12 ();\n  check_mpz_13 ();\n  check_mpz_14 ();\n  check_mpz_15 ();\n  check_mpz_16 ();\n  check_mpz_17 ();\n  check_mpz_18 ();\n  check_mpz_19 ();\n  check_mpz_20 ();\n  check_mpz_21 ();\n  check_mpz_22 ();\n  check_mpz_23 ();\n  check_mpz_24 ();\n  check_mpz_25 ();\n  check_mpz_26 ();\n  check_mpz_27 ();\n  check_mpz_28 ();\n  check_mpz_29 ();\n  check_mpz_30 ();\n  check_mpz_31 ();\n  check_mpz_32 ();\n\n  check_mpq();\n  check_mpf();\n\n  tests_end();\n  return 0;\n}\n"
  },
  {
    "path": "tests/cxx/t-unary.cc",
    "content": "/* Test mp*_class unary expressions.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <iostream>\n\n#include \"mpir.h\"\n#include \"mpirxx.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nusing namespace std;\n\n\nvoid\ncheck_mpz (void)\n{\n  // template <class T, class Op>\n  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, T>, Op> >\n  {\n    mpz_class a(1);\n    mpz_class b(+a); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    mpz_class a(2);\n    mpz_class b;\n    b = -a; ASSERT_ALWAYS(b == -2);\n  }\n  {\n    mpz_class a(3);\n    mpz_class b;\n    b = ~a; ASSERT_ALWAYS(b == -4);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, Op> >\n  {\n    mpz_class a(1);\n    mpz_class b(-(-a)); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    mpz_class a(2);\n    mpz_class b;\n    b = -(-(-a)); ASSERT_ALWAYS(b == -2);\n  }\n}\n\nvoid\ncheck_mpq (void)\n{\n  // template <class T, class Op>\n  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, T>, Op> >\n  {\n    mpq_class a(1);\n    mpq_class b(+a); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    mpq_class a(2);\n    mpq_class b;\n    b = -a; ASSERT_ALWAYS(b == -2);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, Op> >\n  {\n    mpq_class a(1);\n    mpq_class b(-(-a)); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    mpq_class a(2);\n    mpq_class b;\n    b = -(-(-a)); ASSERT_ALWAYS(b == -2);\n  }\n}\n\nvoid\ncheck_mpf (void)\n{\n  // template <class T, class Op>\n  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, T>, Op> >\n  {\n    mpf_class a(1);\n    mpf_class b(+a); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    mpf_class a(2);\n    mpf_class b;\n    b = -a; ASSERT_ALWAYS(b == -2);\n  }\n\n  // template <class T, class U, class Op>\n  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, Op> >\n  {\n    mpf_class a(1);\n    mpf_class b(-(-a)); ASSERT_ALWAYS(b == 1);\n  }\n  {\n    mpf_class a(2);\n    mpf_class b;\n    b = -(-(-a)); ASSERT_ALWAYS(b == -2);\n  }\n}\n\n\nint\nmain (void)\n{\n  tests_start();\n\n  check_mpz();\n  check_mpq();\n  check_mpf();\n\n  tests_end();\n  return 0;\n}\n"
  },
  {
    "path": "tests/devel/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/tests\nLDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmpir.la\n\nEXTRA_PROGRAMS = \\\n  aors_n anymul_1 copy divmod_1 shift logops_n mul_N addmul_N try\n\nif ENABLE_STATIC\nif ENABLE_SHARED\nEXTRA_PROGRAMS += static_try\nstatic_try_SOURCES = try.c\nstatic_try_LDFLAGS = -static\nendif\nendif\n\nallprogs: $(EXTRA_PROGRAMS)\n\nCLEANFILES = $(EXTRA_PROGRAMS)\n\n$(top_builddir)/tests/libtests.la:\n\tcd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la\n"
  },
  {
    "path": "tests/devel/README",
    "content": "Copyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n\n\n\n                       DEVELOPMENT TEST PROGRAMS\n\n\nThis directory contains various programs used during development.  Casual\nGMP users are unlikely to find anything of interest.\n\nNothing here is built or installed, nor even run in a \"make check\", but\nthere's Makefile rules to build each program, or \"allprogs\" to build\neverything.\n\n\n\n----------------\nLocal variables:\nmode: text\nfill-column: 76\nEnd:\n"
  },
  {
    "path": "tests/devel/addmul_N.c",
    "content": "/*\nCopyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdlib.h>\n#include <string.h>\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#if defined (USG) || defined (__SVR4) || defined (__hpux)\n#include <time.h>\n\nint\ncputime ()\n{\n  if (CLOCKS_PER_SEC < 100000)\n    return clock () * 1000 / CLOCKS_PER_SEC;\n  return clock () / (CLOCKS_PER_SEC / 1000);\n}\n#else\n#include <sys/types.h>\n#include <sys/time.h>\n#include <sys/resource.h>\n\nint\ncputime ()\n{\n  struct rusage rus;\n\n  getrusage (0, &rus);\n  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;\n}\n#endif\n\nstatic void print_posneg (mp_limb_t);\nstatic void mpn_print (mp_ptr, mp_size_t);\n\n#define LXW ((int) (2 * sizeof (mp_limb_t)))\n#define M * 1000000\n\n#ifndef CLOCK\n#error \"Don't know CLOCK of your machine\"\n#endif\n\n#ifndef OPS\n#define OPS (CLOCK/5)\n#endif\n#ifndef SIZE\n#define SIZE 496\n#endif\n#ifndef TIMES\n#define TIMES OPS/(SIZE+1)\n#endif\n\n#if N == 2\n#define mpn_addmul_N mpn_addmul_2\n#elif N == 3\n#define mpn_addmul_N mpn_addmul_3\n#elif N == 4\n#define mpn_addmul_N mpn_addmul_4\n#elif N == 5\n#define mpn_addmul_N mpn_addmul_5\n#elif N == 6\n#define mpn_addmul_N mpn_addmul_6\n#elif N == 7\n#define mpn_addmul_N mpn_addmul_7\n#elif N == 8\n#define mpn_addmul_N mpn_addmul_8\n#endif\n\nmp_limb_t\nrefmpn_addmul_N (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)\n{\n  int i;\n  for (i = 1; i < N; i++)\n    {\n      rp[n] = mpn_addmul_1 (rp, up, n, *vp);\n      rp++;\n      vp++;\n    }\n  return mpn_addmul_1 (rp, up, n, *vp);\n}\n\nint\nmain (int argc, char **argv)\n{\n  mp_limb_t up[SIZE];\n  mp_limb_t ref[SIZE + N - 1];\n  mp_limb_t mem[SIZE + N + 1];\n  mp_ptr rp = mem + 1;\n  mp_limb_t vp[N];\n  mp_limb_t cy_ref, cy_try;\n  int i;\n  long t0, t;\n  unsigned test;\n  mp_size_t size;\n  double cyc;\n  unsigned ntests;\n  gmp_randstate_t rands;\n  gmp_randinit_default(rands);\n\n  ntests = ~(unsigned) 0;\n  if (argc == 2)\n    ntests = strtol (argv[1], 0, 0);\n\n  for (test = 1; test <= ntests; test++)\n    {\n#if TIMES == 1 && ! defined (PRINT)\n      if (test % (CLOCK / SIZE / 1000) == 0)\n\t{\n\t  printf (\"\\r%u\", test);\n\t  fflush (stdout);\n\t}\n#endif\n\n#ifdef RANDOM\n      size = random () % SIZE + 1;\n#else\n      size = SIZE;\n#endif\n\n      rp[size + N - 1] = 0x12345678;\n      rp[-1] = 0x87654321;\n\n      mpn_randomb (vp, rands, N);\n\n#if TIMES != 1\t\t\t/* run timing tests unless asked not to */\n      mpn_randomb (up, rands, size);\n      mpn_randomb (rp, rands, size + N - 1);\n\n      MPN_COPY (ref, rp, size + N - 1);\n      t0 = cputime();\n      for (i = 0; i < TIMES; i++)\n\tmpn_addmul_N (ref, up, size, vp);\n      t = cputime() - t0;\n      cyc = ((double) t * CLOCK) / (TIMES * size * 1000.0) / N;\n      printf (\"mpn_addmul_N:    %5ldms (%.3f cycles/limb) [%.2f Gb/s]\\n\",\n\t      t, cyc, CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB/1e9);\n#endif\n\n#ifdef ZEROu\n      MPN_ZERO (up, size);\n#else\n      mpn_rrandom (up,rands, size);\n#endif\n      mpn_rrandom (vp,rands, N);\n#ifdef ZERO\n      MPN_ZERO (rp, size + N - 1);\n#else\n      mpn_rrandom (rp, rands,size + N - 1);\n#endif\n\n#if defined (PRINT) || defined (PRINTV)\n      printf (\"vp=\");\n      mpn_print (vp, N);\n#endif\n#ifdef PRINT\n      printf (\"%*s \", 3 + N * LXW, \"\");\n      mpn_print (rp, size);\n      printf (\"%*s \", 3 + N * LXW, \"\");\n      mpn_print (up, size);\n#endif\n\n      MPN_COPY (ref, rp, size + N - 1);\n      cy_ref = refmpn_addmul_N (ref, up, size, vp);\n      cy_try = mpn_addmul_N (rp, up, size, vp);\n\n#ifdef PRINT\n      printf (\"%*lX \", LXW, cy_ref);\n      mpn_print (ref, size + N - 1);\n      printf (\"%*lX \", LXW, cy_try);\n      mpn_print (rp, size + N - 1);\n#endif\n\n#ifndef NOCHECK\n      if (cy_ref != cy_try || mpn_cmp (ref, rp, size + N - 1) != 0\n\t  || rp[size + N - 1] != 0x12345678 || rp[-1] != 0x87654321)\n\t{\n\t  printf (\"\\n        ref%*s try%*s diff\\n\", LXW - 3, \"\", 2 * LXW - 6, \"\");\n\t  for (i = 0; i < size + N - 1; i++)\n\t    {\n\t      printf (\"%6d: \", i);\n\t      printf (\"%0*llX \", LXW, (unsigned long long) ref[i]);\n\t      printf (\"%0*llX \", LXW, (unsigned long long) rp[i]);\n\t      print_posneg (rp[i] - ref[i]);\n\t      printf (\"\\n\");\n\t    }\n\t  printf (\"retval: \");\n\t  printf (\"%0*llX \", LXW, (unsigned long long) cy_ref);\n\t  printf (\"%0*llX \", LXW, (unsigned long long) cy_try);\n\t  print_posneg (cy_try - cy_ref);\n\t  printf (\"\\n\");\n\t  if (rp[-1] != 0x87654321)\n\t    printf (\"clobbered at low end\\n\");\n\t  if (rp[size + N - 1] != 0x12345678)\n\t    printf (\"clobbered at high end\\n\");\n\t  printf (\"TEST NUMBER %u\\n\", test);\n\t  abort();\n\t}\n#endif\n    }\n  exit (0);\n}\n\nstatic void\nprint_posneg (mp_limb_t d)\n{\n  char buf[LXW + 2];\n  if (d == 0)\n    printf (\" %*X\", LXW, 0);\n  else if (-d < d)\n    {\n      sprintf (buf, \"%llX\", (unsigned long long) -d);\n      printf (\"%*s-%s\", LXW - (int) strlen (buf), \"\", buf);\n    }\n  else\n    {\n      sprintf (buf, \"%llX\", (unsigned long long) d);\n      printf (\"%*s+%s\", LXW - (int) strlen (buf), \"\", buf);\n    }\n}\n\nstatic void\nmpn_print (mp_ptr p, mp_size_t size)\n{\n  mp_size_t i;\n\n  for (i = size - 1; i >= 0; i--)\n    {\n#ifdef _LONG_LONG_LIMB\n      printf (\"%0*lX%0*lX\", (int) (sizeof(mp_limb_t)),\n\t      (unsigned long) (p[i] >> (BITS_PER_MP_LIMB/2)),\n              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));\n#else\n      printf (\"%0*lX\", LXW, p[i]);\n#endif\n#ifdef SPACE\n      if (i != 0)\n\tprintf (\" \");\n#endif\n    }\n  puts (\"\");\n}\n"
  },
  {
    "path": "tests/devel/anymul_1.c",
    "content": "/*\nCopyright 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdlib.h>\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#ifdef OPERATION_mul_1\n#define func __gmpn_mul_1\n#define reffunc refmpn_mul_1\n#define funcname \"mpn_mul_1\"\n#endif\n\n#ifdef OPERATION_addmul_1\n#define func __gmpn_addmul_1\n#define reffunc refmpn_addmul_1\n#define funcname \"mpn_addmul_1\"\n#endif\n\n#ifdef OPERATION_submul_1\n#define func __gmpn_submul_1\n#define reffunc refmpn_submul_1\n#define funcname \"mpn_submul_1\"\n#endif\n\n#if defined (USG) || defined (__SVR4) || defined (__hpux)\n#include <time.h>\n\nint\ncputime ()\n{\n  if (CLOCKS_PER_SEC < 100000)\n    return clock () * 1000 / CLOCKS_PER_SEC;\n  return clock () / (CLOCKS_PER_SEC / 1000);\n}\n#else\n#include <sys/types.h>\n#include <sys/time.h>\n#include <sys/resource.h>\n\nint\ncputime ()\n{\n  struct rusage rus;\n\n  getrusage (0, &rus);\n  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;\n}\n#endif\n\nstatic void print_posneg (mp_limb_t);\nstatic void mpn_print (mp_ptr, mp_size_t);\n\n#define LXW ((int) (2 * sizeof (mp_limb_t)))\n#define M * 1000000\n\n#ifndef CLOCK\n#error \"Don't know CLOCK of your machine\"\n#endif\n\n#ifndef OPS\n#define OPS (CLOCK/5)\n#endif\n#ifndef SIZE\n#define SIZE 496\n#endif\n#ifndef TIMES\n#define TIMES OPS/(SIZE+1)\n#endif\n\nint\nmain (int argc, char **argv)\n{\n  mp_ptr s1, ref, rp;\n  mp_limb_t cy_ref, cy_try;\n  int i;\n  long t0, t;\n  unsigned int test;\n  mp_limb_t xlimb;\n  mp_size_t size;\n  double cyc;\n  unsigned int ntests;\n  gmp_randstate_t rands;\n  gmp_randinit_default(rands);\n  \n  s1 = malloc (SIZE * sizeof (mp_limb_t));\n  ref = malloc (SIZE * sizeof (mp_limb_t));\n  rp = malloc ((SIZE + 2) * sizeof (mp_limb_t));\n  rp++;\n\n  ntests = ~(unsigned) 0;\n  if (argc == 2)\n    ntests = strtol (argv[1], 0, 0);\n\n  for (test = 1; test <= ntests; test++)\n    {\n#if TIMES == 1 && ! defined (PRINT)\n      if (test % (SIZE > 10000 ? 1 : 10000 / SIZE) == 0)\n\t{\n\t  printf (\"\\r%u\", test);\n\t  fflush (stdout);\n\t}\n#endif\n\n#ifdef RANDOM\n      size = random () % SIZE + 1;\n#else\n      size = SIZE;\n#endif\n\n      rp[-1] = 0x87654321;\n      rp[size] = 0x12345678;\n\n#ifdef FIXED_XLIMB\n      xlimb = FIXED_XLIMB;\n#else\n      mpn_rrandom (&xlimb, rands,1);\n#endif\n\n#if TIMES != 1\n      mpn_randomb (s1, rands, size);\n      mpn_randomb (rp, rands, size);\n\n      MPN_COPY (ref, rp, size);\n      t0 = cputime();\n      for (i = 0; i < TIMES; i++)\n\tfunc (ref, s1, size, xlimb);\n      t = cputime() - t0;\n      cyc = ((double) t * CLOCK) / (TIMES * size * 1000.0);\n      printf (funcname \":    %5ldms (%.3f cycles/limb) [%.2f Gb/s]\\n\",\n\t      t, cyc,\n\t      CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB/1e9);\n#endif\n\n#ifndef NOCHECK\n      mpn_rrandom (s1, rands,size);\n#ifdef ZERO\n      memset (rp, 0, size * sizeof *rp);\n#else\n      mpn_rrandom (rp, rands,size);\n#endif\n#if defined (PRINT) || defined (XPRINT)\n      printf (\"xlimb=\");\n      mpn_print (&xlimb, 1);\n#endif\n#ifdef PRINT\n#ifndef OPERATION_mul_1\n      printf (\"%*s \", (int) (2 * sizeof(mp_limb_t)), \"\");\n      mpn_print (rp, size);\n#endif\n      printf (\"%*s \", (int) (2 * sizeof(mp_limb_t)), \"\");\n      mpn_print (s1, size);\n#endif\n\n      MPN_COPY (ref, rp, size);\n      cy_ref = reffunc (ref, s1, size, xlimb);\n      cy_try = func (rp, s1, size, xlimb);\n\n#ifdef PRINT\n      mpn_print (&cy_ref, 1);\n      mpn_print (ref, size);\n      mpn_print (&cy_try, 1);\n      mpn_print (rp, size);\n#endif\n\n      if (cy_ref != cy_try || mpn_cmp (ref, rp, size) != 0\n\t  || rp[-1] != 0x87654321 || rp[size] != 0x12345678)\n\t{\n\t  printf (\"\\n        ref%*s try%*s diff\\n\", LXW - 3, \"\", 2 * LXW - 6, \"\");\n\t  for (i = 0; i < size; i++)\n\t    {\n\t      printf (\"%6d: \", i);\n\t      printf (\"%0*llX \", LXW, (unsigned long long) ref[i]);\n\t      printf (\"%0*llX \", LXW, (unsigned long long) rp[i]);\n\t      print_posneg (rp[i] - ref[i]);\n\t      printf (\"\\n\");\n\t    }\n\t  printf (\"retval: \");\n\t  printf (\"%0*llX \", LXW, (unsigned long long) cy_ref);\n\t  printf (\"%0*llX \", LXW, (unsigned long long) cy_try);\n\t  print_posneg (cy_try - cy_ref);\n\t  printf (\"\\n\");\n\t  if (rp[-1] != 0x87654321)\n\t    printf (\"clobbered at low end\\n\");\n\t  if (rp[size] != 0x12345678)\n\t    printf (\"clobbered at high end\\n\");\n\t  printf (\"TEST NUMBER %u\\n\", test);\n\t  abort();\n\t}\n#endif\n    }\n  exit (0);\n}\n\nstatic void\nprint_posneg (mp_limb_t d)\n{\n  char buf[LXW + 2];\n  if (d == 0)\n    printf (\" %*X\", LXW, 0);\n  else if (-d < d)\n    {\n      sprintf (buf, \"%llX\", (unsigned long long) -d);\n      printf (\"%*s-%s\", LXW - (int) strlen (buf), \"\", buf);\n    }\n  else\n    {\n      sprintf (buf, \"%llX\", (unsigned long long) d);\n      printf (\"%*s+%s\", LXW - (int) strlen (buf), \"\", buf);\n    }\n}\n\nstatic void\nmpn_print (mp_ptr p, mp_size_t size)\n{\n  mp_size_t i;\n\n  for (i = size - 1; i >= 0; i--)\n    {\n#ifdef _LONG_LONG_LIMB\n      printf (\"%0*lX%0*lX\", (int) (sizeof(mp_limb_t)),\n\t      (unsigned long) (p[i] >> (BITS_PER_MP_LIMB/2)),\n              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));\n#else\n      printf (\"%0*lX\", (int) (2 * sizeof(mp_limb_t)), p[i]);\n#endif\n#ifdef SPACE\n      if (i != 0)\n\tprintf (\" \");\n#endif\n    }\n  puts (\"\");\n}\n"
  },
  {
    "path": "tests/devel/aors_n.c",
    "content": "/*\nCopyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdlib.h>\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#ifdef OPERATION_add_n\n#define func __gmpn_add_n\n#define reffunc refmpn_add_n\n#define funcname \"mpn_add_n\"\n#endif\n\n#ifdef OPERATION_sub_n\n#define func __gmpn_sub_n\n#define reffunc refmpn_sub_n\n#define funcname \"mpn_sub_n\"\n#endif\n\n#ifdef OPERATION_addlsh1_n\n#define func __gmpn_addlsh1_n\n#define reffunc refmpn_addlsh1_n\n#define funcname \"mpn_addlsh1_n\"\n#endif\n\n#ifdef OPERATION_sublsh1_n\n#define func __gmpn_sublsh1_n\n#define reffunc refmpn_sublsh1_n\n#define funcname \"mpn_sublsh1_n\"\n#endif\n\n#ifdef OPERATION_rsh1add_n\n#define func __gmpn_rsh1add_n\n#define reffunc refmpn_rsh1add_n\n#define funcname \"mpn_rsh1add_n\"\n#endif\n\n#ifdef OPERATION_rsh1sub_n\n#define func __gmpn_rsh1sub_n\n#define reffunc refmpn_rsh1sub_n\n#define funcname \"mpn_rsh1sub_n\"\n#endif\n\n#if defined (USG) || defined (__SVR4) || defined (__hpux)\n#include <time.h>\n\nint\ncputime ()\n{\n  if (CLOCKS_PER_SEC < 100000)\n    return clock () * 1000 / CLOCKS_PER_SEC;\n  return clock () / (CLOCKS_PER_SEC / 1000);\n}\n#else\n#include <sys/types.h>\n#include <sys/time.h>\n#include <sys/resource.h>\n\nint\ncputime ()\n{\n  struct rusage rus;\n\n  getrusage (0, &rus);\n  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;\n}\n#endif\n\nstatic void mpn_print (mp_ptr, mp_size_t);\n\n#define M * 1000000\n\n#ifndef CLOCK\n#error \"Don't know CLOCK of your machine\"\n#endif\n\n#ifndef OPS\n#define OPS (CLOCK/5)\n#endif\n#ifndef SIZE\n#define SIZE 328\n#endif\n#ifndef TIMES\n#define TIMES OPS/(SIZE+1)\n#endif\n\nmain (int argc, char **argv)\n{\n  mp_ptr s1, s2, dx, dy;\n  mp_limb_t cyx, cyy;\n  int i;\n  long t0, t;\n  unsigned int test;\n  mp_size_t size;\n  unsigned int ntests;\n  gmp_randstate_t rands;\n  gmp_randinit_default(rands);\n  \n  s1 = malloc (SIZE * sizeof (mp_limb_t));\n  s2 = malloc (SIZE * sizeof (mp_limb_t));\n  dx = malloc ((SIZE + 2) * sizeof (mp_limb_t));\n  dy = malloc ((SIZE + 2) * sizeof (mp_limb_t));\n\n  ntests = ~(unsigned) 0;\n  if (argc == 2)\n    ntests = strtol (argv[1], 0, 0);\n\n  for (test = 1; test <= ntests; test++)\n    {\n#if TIMES == 1 && ! defined (PRINT)\n      if (test % (SIZE > 100000 ? 1 : 100000 / SIZE) == 0)\n\t{\n\t  printf (\"\\r%u\", test);\n\t  fflush (stdout);\n\t}\n#endif\n\n#ifdef RANDOM\n      size = random () % SIZE + 1;\n#else\n      size = SIZE;\n#endif\n\n      dx[0] = 0x87654321;\n      dy[0] = 0x87654321;\n      dx[size+1] = 0x12345678;\n      dy[size+1] = 0x12345678;\n\n#if TIMES != 1\n      mpn_randomb (s1, rands, size);\n      mpn_randomb (s2, rands, size);\n\n      t0 = cputime();\n      for (i = 0; i < TIMES; i++)\n\tfunc (dx+1, s1, s2, size);\n      t = cputime() - t0;\n      printf (funcname \":    %5ldms (%.3f cycles/limb)\\n\",\n\t      t, ((double) t * CLOCK) / (TIMES * size * 1000.0));\n#endif\n\n#ifndef NOCHECK\n      mpn_rrandom (s1, rands, size);\n      mpn_rrandom (s2, rands, size);\n\n#ifdef PRINT\n      mpn_print (s1, size);\n      mpn_print (s2, size);\n#endif\n\n      /* Put garbage in the destination.  */\n      for (i = 0; i < size; i++)\n\t{\n\t  dx[i+1] = 0xdead;\n\t  dy[i+1] = 0xbeef;\n\t}\n\n      cyx = reffunc (dx+1, s1, s2, size);\n      cyy = func (dy+1, s1, s2, size);\n\n#ifdef PRINT\n      mpn_print (&cyx, 1);\n      mpn_print (dx+1, size);\n      mpn_print (&cyy, 1);\n      mpn_print (dy+1, size);\n#endif\n\n      if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0\n\t  || dx[0] != 0x87654321 || dx[size+1] != 0x12345678)\n\t{\n#ifndef PRINT\n\t  mpn_print (&cyx, 1);\n\t  mpn_print (dx+1, size);\n\t  mpn_print (&cyy, 1);\n\t  mpn_print (dy+1, size);\n#endif\n\t  printf (\"\\n\");\n\t  if (dy[0] != 0x87654321)\n\t    printf (\"clobbered at low end\\n\");\n\t  if (dy[size+1] != 0x12345678)\n\t    printf (\"clobbered at high end\\n\");\n\t  printf (\"TEST NUMBER %u\\n\", test);\n\t  abort();\n\t}\n#endif\n    }\n  exit (0);\n}\n\nstatic void\nmpn_print (mp_ptr p, mp_size_t size)\n{\n  mp_size_t i;\n\n  for (i = size - 1; i >= 0; i--)\n    {\n#ifdef _LONG_LONG_LIMB\n      printf (\"%0*lX%0*lX\", (int) (sizeof(mp_limb_t)),\n\t      (unsigned long) (p[i] >> (BITS_PER_MP_LIMB/2)),\n              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));\n#else\n      printf (\"%0*lX\", (int) (2 * sizeof(mp_limb_t)), p[i]);\n#endif\n#ifdef SPACE\n      if (i != 0)\n\tprintf (\" \");\n#endif\n    }\n  puts (\"\");\n}\n"
  },
  {
    "path": "tests/devel/copy.c",
    "content": "/*\nCopyright 1999, 2000, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdlib.h>\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#ifdef OPERATION_copyi\n#define func MPN_COPY_INCR\n#define reffunc refmpn_copyi\n#define funcname \"MPN_COPY_INCR\"\n#endif\n\n#ifdef OPERATION_copyd\n#define func MPN_COPY_DECR\n#define reffunc refmpn_copyd\n#define funcname \"MPN_COPY_DECR\"\n#endif\n\n#if defined (USG) || defined (__SVR4) || defined (__hpux)\n#include <time.h>\n\nint\ncputime ()\n{\n  if (CLOCKS_PER_SEC < 100000)\n    return clock () * 1000 / CLOCKS_PER_SEC;\n  return clock () / (CLOCKS_PER_SEC / 1000);\n}\n#else\n#include <sys/types.h>\n#include <sys/time.h>\n#include <sys/resource.h>\n\nint\ncputime ()\n{\n  struct rusage rus;\n\n  getrusage (0, &rus);\n  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;\n}\n#endif\n\nstatic void mpn_print (mp_ptr, mp_size_t);\n\n#define M * 1000000\n\n#ifndef CLOCK\n#error \"Don't know CLOCK of your machine\"\n#endif\n\n#ifndef OPS\n#define OPS (CLOCK/2)\n#endif\n#ifndef SIZE\n#define SIZE 496\n#endif\n#ifndef TIMES\n#define TIMES OPS/(SIZE+1)\n#endif\n\nmain (int argc, char **argv)\n{\n  mp_ptr s1, dx, dy;\n  int i;\n  long t0, t;\n  unsigned int test;\n  mp_size_t size;\n  unsigned int ntests;\n  gmp_randstate_t rands;\n  gmp_randinit_default(rands);\n  \n  s1 = malloc (SIZE * sizeof (mp_limb_t));\n  dx = malloc ((SIZE + 2) * sizeof (mp_limb_t));\n  dy = malloc ((SIZE + 2) * sizeof (mp_limb_t));\n\n  ntests = ~(unsigned) 0;\n  if (argc == 2)\n    ntests = strtol (argv[1], 0, 0);\n\n  for (test = 1; test <= ntests; test++)\n    {\n#if TIMES == 1 && ! defined (PRINT)\n      if (test % (SIZE > 100000 ? 1 : 100000 / SIZE) == 0)\n\t{\n\t  printf (\"\\r%u\", test);\n\t  fflush (stdout);\n\t}\n#endif\n\n#ifdef RANDOM\n      size = random () % SIZE + 1;\n#else\n      size = SIZE;\n#endif\n\n      dx[0] = 0x87654321;\n      dy[0] = 0x87654321;\n      dx[size+1] = 0x12345678;\n      dy[size+1] = 0x12345678;\n\n#if TIMES != 1\n      mpn_randomb (s1, rands, size);\n\n      t0 = cputime();\n      for (i = 0; i < TIMES; i++)\n\tfunc (dx+1, s1, size);\n      t = cputime() - t0;\n      printf (funcname \":    %5ldms (%.3f cycles/limb)\\n\",\n\t      t, ((double) t * CLOCK) / (TIMES * size * 1000.0));\n#endif\n\n#ifndef NOCHECK\n      mpn_rrandom (s1, rands,size);\n\n#ifdef PRINT\n      mpn_print (s1, size);\n#endif\n\n      /* Put garbage in the destination.  */\n      for (i = 0; i < size; i++)\n\t{\n\t  dx[i+1] = 0xdead;\n\t  dy[i+1] = 0xbeef;\n\t}\n\n      reffunc (dx+1, s1, size);\n      func (dy+1, s1, size);\n\n#ifdef PRINT\n      mpn_print (dx+1, size);\n      mpn_print (dy+1, size);\n#endif\n\n      if (mpn_cmp (dx, dy, size+2) != 0\n\t  || dx[0] != 0x87654321 || dx[size+1] != 0x12345678)\n\t{\n#ifndef PRINT\n\t  mpn_print (dx+1, size);\n\t  mpn_print (dy+1, size);\n#endif\n\t  printf (\"\\n\");\n\t  if (dy[0] != 0x87654321)\n\t    printf (\"clobbered at low end\\n\");\n\t  if (dy[size+1] != 0x12345678)\n\t    printf (\"clobbered at high end\\n\");\n\t  printf (\"TEST NUMBER %u\\n\", test);\n\t  abort();\n\t}\n#endif\n    }\n  exit (0);\n}\n\nstatic void\nmpn_print (mp_ptr p, mp_size_t size)\n{\n  mp_size_t i;\n\n  for (i = size - 1; i >= 0; i--)\n    {\n#ifdef _LONG_LONG_LIMB\n      printf (\"%0*lX%0*lX\", (int) (sizeof(mp_limb_t)),\n\t      (unsigned long) (p[i] >> (BITS_PER_MP_LIMB/2)),\n              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));\n#else\n      printf (\"%0*lX\", (int) (2 * sizeof(mp_limb_t)), p[i]);\n#endif\n#ifdef SPACE\n      if (i != 0)\n\tprintf (\" \");\n#endif\n    }\n  puts (\"\");\n}\n"
  },
  {
    "path": "tests/devel/divmod_1.c",
    "content": "/*\nCopyright 1996, 1998, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if defined (USG) || defined (__SVR4) || defined (__hpux)\n#include <time.h>\n\nint\ncputime ()\n{\n  if (CLOCKS_PER_SEC < 100000)\n    return clock () * 1000 / CLOCKS_PER_SEC;\n  return clock () / (CLOCKS_PER_SEC / 1000);\n}\n#else\n#include <sys/types.h>\n#include <sys/time.h>\n#include <sys/resource.h>\n\nint\ncputime ()\n{\n  struct rusage rus;\n\n  getrusage (0, &rus);\n  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;\n}\n#endif\n\n#define M * 1000000\n\n#ifndef CLOCK\n#if defined (__i386__)\n#define CLOCK (16666667)\n#elif defined (__m68k__)\n#define CLOCK (20 M)\n#elif defined (_IBMR2)\n#define CLOCK (25 M)\n#elif defined (__sparc__)\n#define CLOCK (20 M)\n#elif defined (__sun__)\n#define CLOCK (20 M)\n#elif defined (__mips)\n#define CLOCK (40 M)\n#elif defined (__hppa__)\n#define CLOCK (50 M)\n#elif defined (__alpha)\n#define CLOCK (133 M)\n#else\n#error \"Don't know CLOCK of your machine\"\n#endif\n#endif\n\n#ifndef OPS\n#define OPS 20000000\n#endif\n#ifndef SIZE\n#define SIZE 1000\n#endif\n#ifndef TIMES\n#define TIMES OPS/SIZE\n#else\n#undef OPS\n#define OPS (SIZE*TIMES)\n#endif\n\nmain ()\n{\n  mp_limb_t nptr[SIZE];\n  mp_limb_t qptr[SIZE];\n  mp_limb_t pptr[SIZE];\n  mp_limb_t dlimb, rlimb, plimb;\n  mp_size_t nsize, qsize, psize;\n  int test;\n  gmp_randstate_t rands;\n  gmp_randinit_default(rands);\n  \n  for (test = 0; ; test++)\n    {\n#ifdef RANDOM\n      nsize = random () % SIZE + 1;\n#else\n      nsize = SIZE;\n#endif\n\n      mpn_rrandom (nptr,rands,  nsize);\n\n      mpn_rrandom (&dlimb,rands, 1);\n      if (dlimb == 0)\n\tabort ();\n\n      rlimb = mpn_divmod_1 (qptr, nptr, nsize, dlimb);\n      qsize = nsize - (qptr[nsize - 1] == 0);\n      if (qsize == 0)\n\t{\n\t  plimb = rlimb;\n\t  psize = qsize;\n\t}\n      else\n\t{\n\t  plimb = mpn_mul_1 (pptr, qptr, qsize, dlimb);\n\t  psize = qsize;\n\t  plimb += mpn_add_1 (pptr, pptr, psize, rlimb);\n\t}\n      if (plimb != 0)\n\tpptr[psize++] = plimb;\n\n\n      if (nsize != psize || mpn_cmp (nptr, pptr, nsize) != 0)\n\tabort ();\n    }\n}\n"
  },
  {
    "path": "tests/devel/logops_n.c",
    "content": "/*\nCopyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdlib.h>\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#ifdef OPERATION_and_n\n#define func __gmpn_and_n\n#define reffunc refmpn_and_n\n#define funcname \"mpn_and_n\"\n#endif\n\n#ifdef OPERATION_andn_n\n#define func __gmpn_andn_n\n#define reffunc refmpn_andn_n\n#define funcname \"mpn_andn_n\"\n#endif\n\n#ifdef OPERATION_nand_n\n#define func __gmpn_nand_n\n#define reffunc refmpn_nand_n\n#define funcname \"mpn_nand_n\"\n#endif\n\n#ifdef OPERATION_ior_n\n#define func __gmpn_ior_n\n#define reffunc refmpn_ior_n\n#define funcname \"mpn_ior_n\"\n#endif\n\n#ifdef OPERATION_iorn_n\n#define func __gmpn_iorn_n\n#define reffunc refmpn_iorn_n\n#define funcname \"mpn_iorn_n\"\n#endif\n\n#ifdef OPERATION_nior_n\n#define func __gmpn_nior_n\n#define reffunc refmpn_nior_n\n#define funcname \"mpn_nior_n\"\n#endif\n\n#ifdef OPERATION_xor_n\n#define func __gmpn_xor_n\n#define reffunc refmpn_xor_n\n#define funcname \"mpn_xor_n\"\n#endif\n\n#ifdef OPERATION_xnor_n\n#define func __gmpn_xnor_n\n#define reffunc refmpn_xnor_n\n#define funcname \"mpn_xnor_n\"\n#endif\n\n#if defined (USG) || defined (__SVR4) || defined (__hpux)\n#include <time.h>\n\nint\ncputime ()\n{\n  if (CLOCKS_PER_SEC < 100000)\n    return clock () * 1000 / CLOCKS_PER_SEC;\n  return clock () / (CLOCKS_PER_SEC / 1000);\n}\n#else\n#include <sys/types.h>\n#include <sys/time.h>\n#include <sys/resource.h>\n\nint\ncputime ()\n{\n  struct rusage rus;\n\n  getrusage (0, &rus);\n  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;\n}\n#endif\n\nstatic void mpn_print (mp_ptr, mp_size_t);\n\n#define M * 1000000\n\n#ifndef CLOCK\n#error \"Don't know CLOCK of your machine\"\n#endif\n\n#ifndef OPS\n#define OPS (CLOCK/5)\n#endif\n#ifndef SIZE\n#define SIZE 328\n#endif\n#ifndef TIMES\n#define TIMES OPS/(SIZE+1)\n#endif\n\nmain (int argc, char **argv)\n{\n  mp_ptr s1, s2, dx, dy;\n  int i;\n  long t0, t;\n  unsigned int test;\n  mp_size_t size;\n  unsigned int ntests;\n  gmp_randstate_t rands;\n  gmp_randinit_default(rands);\n  \n  s1 = malloc (SIZE * sizeof (mp_limb_t));\n  s2 = malloc (SIZE * sizeof (mp_limb_t));\n  dx = malloc ((SIZE + 2) * sizeof (mp_limb_t));\n  dy = malloc ((SIZE + 2) * sizeof (mp_limb_t));\n\n  ntests = ~(unsigned) 0;\n  if (argc == 2)\n    ntests = strtol (argv[1], 0, 0);\n\n  for (test = 1; test <= ntests; test++)\n    {\n#if TIMES == 1 && ! defined (PRINT)\n      if (test % (SIZE > 100000 ? 1 : 100000 / SIZE) == 0)\n\t{\n\t  printf (\"\\r%d\", test);\n\t  fflush (stdout);\n\t}\n#endif\n\n#ifdef RANDOM\n      size = random () % SIZE + 1;\n#else\n      size = SIZE;\n#endif\n\n      dx[0] = 0x87654321;\n      dy[0] = 0x87654321;\n      dx[size+1] = 0x12345678;\n      dy[size+1] = 0x12345678;\n\n#if TIMES != 1\n      mpn_randomb (s1, rands, size);\n      mpn_randomb (s2, rands, size);\n\n      t0 = cputime();\n      for (i = 0; i < TIMES; i++)\n\tfunc (dx+1, s1, s2, size);\n      t = cputime() - t0;\n      printf (funcname \":    %5ldms (%.3f cycles/limb)\\n\",\n\t      t, ((double) t * CLOCK) / (TIMES * size * 1000.0));\n#endif\n\n#ifndef NOCHECK\n      mpn_rrandom (s1, rands,size);\n      mpn_rrandom (s2, rands,size);\n\n#ifdef PRINT\n      mpn_print (s1, size);\n      mpn_print (s2, size);\n#endif\n\n      /* Put garbage in the destination.  */\n      for (i = 0; i < size; i++)\n\t{\n\t  dx[i+1] = 0xdead;\n\t  dy[i+1] = 0xbeef;\n\t}\n\n      reffunc (dx+1, s1, s2, size);\n      func (dy+1, s1, s2, size);\n#ifdef PRINT\n      mpn_print (dx+1, size);\n      mpn_print (dy+1, size);\n#endif\n      if (mpn_cmp (dx, dy, size+2) != 0\n\t  || dx[0] != 0x87654321 || dx[size+1] != 0x12345678)\n\t{\n#ifndef PRINT\n\t  mpn_print (dx+1, size);\n\t  mpn_print (dy+1, size);\n#endif\n\t  printf (\"\\n\");\n\t  if (dy[0] != 0x87654321)\n\t    printf (\"clobbered at low end\\n\");\n\t  if (dy[size+1] != 0x12345678)\n\t    printf (\"clobbered at high end\\n\");\n\t  printf (\"TEST NUMBER %u\\n\", test);\n\t  abort();\n\t}\n#endif\n    }\n  exit (0);\n}\n\nstatic void\nmpn_print (mp_ptr p, mp_size_t size)\n{\n  mp_size_t i;\n\n  for (i = size - 1; i >= 0; i--)\n    {\n#ifdef _LONG_LONG_LIMB\n      printf (\"%0*lX%0*lX\", (int) (sizeof(mp_limb_t)),\n\t      (unsigned long) (p[i] >> (BITS_PER_MP_LIMB/2)),\n              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));\n#else\n      printf (\"%0*lX\", (int) (2 * sizeof(mp_limb_t)), p[i]);\n#endif\n#ifdef SPACE\n      if (i != 0)\n\tprintf (\" \");\n#endif\n    }\n  puts (\"\");\n}\n"
  },
  {
    "path": "tests/devel/mul_N.c",
    "content": "/*\nCopyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdlib.h>\n#include <string.h>\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#if defined (USG) || defined (__SVR4) || defined (__hpux)\n#include <time.h>\n\nint\ncputime ()\n{\n  if (CLOCKS_PER_SEC < 100000)\n    return clock () * 1000 / CLOCKS_PER_SEC;\n  return clock () / (CLOCKS_PER_SEC / 1000);\n}\n#else\n#include <sys/types.h>\n#include <sys/time.h>\n#include <sys/resource.h>\n\nint\ncputime ()\n{\n  struct rusage rus;\n\n  getrusage (0, &rus);\n  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;\n}\n#endif\n\nstatic void print_posneg (mp_limb_t);\nstatic void mpn_print (mp_ptr, mp_size_t);\n\n#define LXW ((int) (2 * sizeof (mp_limb_t)))\n#define M * 1000000\n\n#ifndef CLOCK\n#error \"Don't know CLOCK of your machine\"\n#endif\n\n#ifndef OPS\n#define OPS (CLOCK/5)\n#endif\n#ifndef SIZE\n#define SIZE 496\n#endif\n#ifndef TIMES\n#define TIMES OPS/(SIZE+1)\n#endif\n\n#if N == 2\n#define mpn_mul_N mpn_mul_2\n#elif N == 3\n#define mpn_mul_N mpn_mul_3\n#elif N == 4\n#define mpn_mul_N mpn_mul_4\n#elif N == 5\n#define mpn_mul_N mpn_mul_5\n#elif N == 6\n#define mpn_mul_N mpn_mul_6\n#elif N == 7\n#define mpn_mul_N mpn_mul_7\n#elif N == 8\n#define mpn_mul_N mpn_mul_8\n#endif\n\nmp_limb_t\nrefmpn_mul_N (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)\n{\n  int i;\n  rp[n] = mpn_mul_1 (rp, up, n, *vp);\n  rp++;\n  vp++;\n  for (i = 2; i < N; i++)\n    {\n      rp[n] = mpn_addmul_1 (rp, up, n, *vp);\n      rp++;\n      vp++;\n    }\n  return mpn_addmul_1 (rp, up, n, *vp);\n}\n\nint\nmain (int argc, char **argv)\n{\n  mp_limb_t up[SIZE];\n  mp_limb_t ref[SIZE + N - 1];\n  mp_limb_t mem[SIZE + N + 1];\n  mp_ptr rp = mem + 1;\n  mp_limb_t vp[N];\n  mp_limb_t cy_ref, cy_try;\n  int i;\n  long t0, t;\n  unsigned test;\n  mp_size_t size;\n  double cyc;\n  unsigned ntests;\n  gmp_randstate_t rands;\n  gmp_randinit_default(rands);\n  \n  ntests = ~(unsigned) 0;\n  if (argc == 2)\n    ntests = strtol (argv[1], 0, 0);\n\n  for (test = 1; test <= ntests; test++)\n    {\n#if TIMES == 1 && ! defined (PRINT)\n      if (test % (CLOCK / SIZE / 1000) == 0)\n\t{\n\t  printf (\"\\r%u\", test);\n\t  fflush (stdout);\n\t}\n#endif\n\n#ifdef RANDOM\n      size = random () % SIZE + 1;\n#else\n      size = SIZE;\n#endif\n\n      rp[size + N - 1] = 0x12345678;\n      rp[-1] = 0x87654321;\n\n      mpn_randomb (vp, rands, N);\n\n#if TIMES != 1\t\t\t/* run timing tests unless asked not to */\n      mpn_randomb (up, rands, size);\n\n      MPN_COPY (ref, rp, size + N - 1);\n      t0 = cputime();\n      for (i = 0; i < TIMES; i++)\n\tmpn_mul_N (ref, up, size, vp);\n      t = cputime() - t0;\n      cyc = ((double) t * CLOCK) / (TIMES * size * 1000.0) / N;\n      printf (\"mpn_mul_N:    %5ldms (%.3f cycles/limb) [%.2f Gb/s]\\n\",\n\t      t, cyc, CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB/1e9);\n#endif\n\n#ifdef ZEROu\n      MPN_ZERO (up, size);\n#else\n      mpn_rrandom (up, rands, size);\n#endif\n      mpn_rrandom (vp, rands ,N);\n      mpn_rrandom (rp, rands ,size + N - 1);\n\n#if defined (PRINT) || defined (PRINTV)\n      printf (\"vp=\");\n      mpn_print (vp, N);\n#endif\n#ifdef PRINT\n      printf (\"%*s \", 3 + N * LXW, \"\");\n      mpn_print (rp, size);\n      printf (\"%*s \", 3 + N * LXW, \"\");\n      mpn_print (up, size);\n#endif\n\n      MPN_COPY (ref, rp, size + N - 1);\n      cy_ref = refmpn_mul_N (ref, up, size, vp);\n      cy_try = mpn_mul_N (rp, up, size, vp);\n\n#ifdef PRINT\n      printf (\"%*lX \", LXW, cy_ref);\n      mpn_print (ref, size + N - 1);\n      printf (\"%*lX \", LXW, cy_try);\n      mpn_print (rp, size + N - 1);\n#endif\n\n#ifndef NOCHECK\n      if (cy_ref != cy_try || mpn_cmp (ref, rp, size + N - 1) != 0\n\t  || rp[size + N - 1] != 0x12345678 || rp[-1] != 0x87654321)\n\t{\n\t  printf (\"\\n        ref%*s try%*s diff\\n\", LXW - 3, \"\", 2 * LXW - 6, \"\");\n\t  for (i = 0; i < size + N - 1; i++)\n\t    {\n\t      printf (\"%6d: \", i);\n\t      printf (\"%0*llX \", LXW, (unsigned long long) ref[i]);\n\t      printf (\"%0*llX \", LXW, (unsigned long long) rp[i]);\n\t      print_posneg (rp[i] - ref[i]);\n\t      printf (\"\\n\");\n\t    }\n\t  printf (\"retval: \");\n\t  printf (\"%0*llX \", LXW, (unsigned long long) cy_ref);\n\t  printf (\"%0*llX \", LXW, (unsigned long long) cy_try);\n\t  print_posneg (cy_try - cy_ref);\n\t  printf (\"\\n\");\n\t  if (rp[-1] != 0x87654321)\n\t    printf (\"clobbered at low end\\n\");\n\t  if (rp[size + N - 1] != 0x12345678)\n\t    printf (\"clobbered at high end\\n\");\n\t  printf (\"TEST NUMBER %u\\n\", test);\n\t  abort();\n\t}\n#endif\n    }\n  exit (0);\n}\n\nstatic void\nprint_posneg (mp_limb_t d)\n{\n  char buf[LXW + 2];\n  if (d == 0)\n    printf (\" %*X\", LXW, 0);\n  else if (-d < d)\n    {\n      sprintf (buf, \"%llX\", (unsigned long long) -d);\n      printf (\"%*s-%s\", LXW - (int) strlen (buf), \"\", buf);\n    }\n  else\n    {\n      sprintf (buf, \"%llX\", (unsigned long long) d);\n      printf (\"%*s+%s\", LXW - (int) strlen (buf), \"\", buf);\n    }\n}\n\nstatic void\nmpn_print (mp_ptr p, mp_size_t size)\n{\n  mp_size_t i;\n\n  for (i = size - 1; i >= 0; i--)\n    {\n#ifdef _LONG_LONG_LIMB\n      printf (\"%0*lX%0*lX\", (int) (sizeof(mp_limb_t)),\n\t      (unsigned long) (p[i] >> (BITS_PER_MP_LIMB/2)),\n              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));\n#else\n      printf (\"%0*lX\", LXW, p[i]);\n#endif\n#ifdef SPACE\n      if (i != 0)\n\tprintf (\" \");\n#endif\n    }\n  puts (\"\");\n}\n"
  },
  {
    "path": "tests/devel/shift.c",
    "content": "/*\nCopyright 1996, 1998, 1999, 2000, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdlib.h>\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#ifdef OPERATION_lshift\n#define func __gmpn_lshift\n#define reffunc refmpn_lshift\n#define funcname \"mpn_lshift\"\n#endif\n\n#ifdef OPERATION_rshift\n#define func __gmpn_rshift\n#define reffunc refmpn_rshift\n#define funcname \"mpn_rshift\"\n#endif\n\n#if defined (USG) || defined (__SVR4) || defined (__hpux)\n#include <time.h>\n\nint\ncputime ()\n{\n  if (CLOCKS_PER_SEC < 100000)\n    return clock () * 1000 / CLOCKS_PER_SEC;\n  return clock () / (CLOCKS_PER_SEC / 1000);\n}\n#else\n#include <sys/types.h>\n#include <sys/time.h>\n#include <sys/resource.h>\n\nint\ncputime ()\n{\n  struct rusage rus;\n\n  getrusage (0, &rus);\n  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;\n}\n#endif\n\nstatic void mpn_print (mp_ptr, mp_size_t);\n\n#define M * 1000000\n\n#ifndef CLOCK\n#error \"Don't know CLOCK of your machine\"\n#endif\n\n#ifndef OPS\n#define OPS (CLOCK/5)\n#endif\n#ifndef SIZE\n#define SIZE 496\n#endif\n#ifndef TIMES\n#define TIMES OPS/(SIZE+1)\n#endif\n\n#ifndef CNT\nint CNT = 4;\n#endif\n\nmain (int argc, char **argv)\n{\n  mp_ptr s1, dx, dy;\n  mp_limb_t cyx, cyy;\n  int i;\n  long t0, t;\n  unsigned int test;\n  int cnt = CNT;\n  mp_size_t size;\n  unsigned int ntests;\n  gmp_randstate_t rands;\n  gmp_randinit_default(rands);\n  \n  s1 = malloc (SIZE * sizeof (mp_limb_t));\n  dx = malloc ((SIZE + 2) * sizeof (mp_limb_t));\n  dy = malloc ((SIZE + 2) * sizeof (mp_limb_t));\n\n  ntests = ~(unsigned) 0;\n  if (argc == 2)\n    ntests = strtol (argv[1], 0, 0);\n\n  for (test = 1; test <= ntests; test++)\n    {\n#if TIMES == 1 && ! defined (PRINT)\n      if (test % (SIZE > 100000 ? 1 : 100000 / SIZE) == 0)\n\t{\n\t  printf (\"\\r%u\", test);\n\t  fflush (stdout);\n\t}\n#endif\n\n#if TIMES == 1\n      cnt = random () % (GMP_NUMB_BITS - 1) + 1;\n#endif\n\n#ifdef RANDOM\n      size = random () % SIZE + 1;\n#else\n      size = SIZE;\n#endif\n\n      dx[0] = 0x87654321;\n      dy[0] = 0x87654321;\n      dx[size+1] = 0x12345678;\n      dy[size+1] = 0x12345678;\n\n#if TIMES != 1\n      mpn_randomb (s1, rands, size);\n\n      t0 = cputime();\n      for (i = 0; i < TIMES; i++)\n\tfunc (dx+1, s1, size, cnt);\n      t = cputime() - t0;\n      printf (funcname \":    %5ldms (%.3f cycles/limb)\\n\",\n\t      t, ((double) t * CLOCK) / (TIMES * size * 1000.0));\n#endif\n\n#ifndef NOCHECK\n      mpn_rrandom (s1, rands, size);\n\n#ifdef PRINT\n      printf (\"cnt=%-*d \", (int) (2 * sizeof(mp_limb_t)) - 4, cnt);\n      mpn_print (s1, size);\n#endif\n\n      /* Put garbage in the destination.  */\n      for (i = 0; i < size; i++)\n\t{\n\t  dx[i+1] = 0xdead;\n\t  dy[i+1] = 0xbeef;\n\t}\n\n      cyx = reffunc (dx+1, s1, size, cnt);\n      cyy = func (dy+1, s1, size, cnt);\n\n#ifdef PRINT\n      mpn_print (&cyx, 1);\n      mpn_print (dx+1, size);\n      mpn_print (&cyy, 1);\n      mpn_print (dy+1, size);\n#endif\n\n      if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0\n\t  || dx[0] != 0x87654321 || dx[size+1] != 0x12345678)\n\t{\n#ifndef PRINT\n\t  mpn_print (&cyx, 1);\n\t  mpn_print (dx+1, size);\n\t  mpn_print (&cyy, 1);\n\t  mpn_print (dy+1, size);\n#endif\n\t  printf (\"\\n\");\n\t  if (dy[0] != 0x87654321)\n\t    printf (\"clobbered at low end\\n\");\n\t  if (dy[size+1] != 0x12345678)\n\t    printf (\"clobbered at high end\\n\");\n\t  printf (\"TEST NUMBER %u\\n\", test);\n\t  abort();\n\t}\n#endif\n    }\n  exit (0);\n}\n\nstatic void\nmpn_print (mp_ptr p, mp_size_t size)\n{\n  mp_size_t i;\n\n  for (i = size - 1; i >= 0; i--)\n    {\n#ifdef _LONG_LONG_LIMB\n      printf (\"%0*lX%0*lX\", (int) (sizeof(mp_limb_t)),\n\t      (unsigned long) (p[i] >> (BITS_PER_MP_LIMB/2)),\n              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));\n#else\n      printf (\"%0*lX\", (int) (2 * sizeof(mp_limb_t)), p[i]);\n#endif\n#ifdef SPACE\n      if (i != 0)\n\tprintf (\" \");\n#endif\n    }\n  puts (\"\");\n}\n"
  },
  {
    "path": "tests/devel/try.c",
    "content": "/* Run some tests on various mpn routines.\n\n   THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT.  IT'S ALMOST CERTAIN TO\n   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.\n\nCopyright 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n/* Usage: try [options] <function>...\n\n   For example, \"./try mpn_add_n\" to run tests of that function.\n\n   Combinations of alignments and overlaps are tested, with redzones above\n   or below the destinations, and with the sources write-protected.\n\n   The number of tests performed becomes ridiculously large with all the\n   combinations, and for that reason this can't be a part of a \"make check\",\n   it's meant only for development.  The code isn't very pretty either.\n\n   During development it can help to disable the redzones, since seeing the\n   rest of the destination written can show where the wrong part is, or if\n   the dst pointers are off by 1 or whatever.  The magic DEADVAL initial\n   fill (see below) will show locations never written.\n\n   The -s option can be used to test only certain size operands, which is\n   useful if some new code doesn't yet support say sizes less than the\n   unrolling, or whatever.\n\n   When a problem occurs it'll of course be necessary to run the program\n   under gdb to find out quite where, how and why it's going wrong.  Disable\n   the spinner with the -W option when doing this, or single stepping won't\n   work.  Using the \"-1\" option to run with simple data can be useful.\n\n   New functions to test can be added in try_array[].  If a new TYPE is\n   required then add it to the existing constants, set up its parameters in\n   param_init(), and add it to the call() function.  Extra parameter fields\n   can be added if necessary, or further interpretations given to existing\n   fields.\n\n\n   Enhancements:\n\n   umul_ppmm support is not very good, lots of source data is generated\n   whereas only two limbs are needed.\n\n   Make a little scheme for interpreting the \"SIZE\" selections uniformly.\n\n   Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2\n   source limbs.  Possibly increase the default repetitions in that case.\n\n   Automatically detect gdb and disable the spinner (use -W for now).\n\n   Make a way to re-run a failing case in the debugger.  Have an option to\n   snapshot each test case before it's run so the data is available if a\n   segv occurs.  (This should be more reliable than the current print_all()\n   in the signal handler.)\n\n   When alignment means a dst isn't hard against the redzone, check the\n   space in between remains unchanged.\n\n   When a source overlaps a destination, don't run both s[i].high 0 and 1,\n   as s[i].high has no effect.  Maybe encode s[i].high into overlap->s[i].\n\n   When partial overlaps aren't done, don't loop over source alignments\n   during overlaps.\n\n   Try to make the looping code a bit less horrible.  Right now it's pretty\n   hard to see what iterations are actually done.\n\n   Perhaps specific setups and loops for each style of function under test\n   would be clearer than a parameterized general loop.  There's lots of\n   stuff common to all functions, but the exceptions get messy.\n\n   When there's no overlap, run with both src>dst and src<dst.  A subtle\n   calling-conventions violation occured in a P6 copy which depended on the\n   relative location of src and dst.\n\n   multiplier_N is more or less a third source region for the addmul_N\n   routines, and could be done with the redzoned region scheme.\n\n*/\n\n/* always do assertion checking */\n#define WANT_ASSERT 1\n\n#include \"config.h\"\n\n#include <errno.h>\n#include <limits.h>\n#include <signal.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <time.h>\n\n#if defined( _MSC_VER )\n#define WINDOWS_LEAN_AND_MEAN\n#include <windows.h>\n#endif\n\n#if HAVE_UNISTD_H\n#include <unistd.h>\n#endif\n\n#if HAVE_SYS_MMAN_H\n#include <sys/mman.h>\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n\n#if !HAVE_DECL_OPTARG\nextern char *optarg;\nextern int optind, opterr;\n#endif\n\n#if ! HAVE_DECL_SYS_NERR\nextern int sys_nerr;\n#endif\n\n#if ! HAVE_DECL_SYS_ERRLIST && !defined( _MSC_VER )\nextern char *sys_errlist[];\n#endif\n\n#if ! HAVE_STRERROR\nchar *\nstrerror (int n)\n{\n  if (n < 0 || n >= sys_nerr)\n    return \"errno out of range\";\n  else\n    return sys_errlist[n];\n}\n#endif\n\n/* Rumour has it some systems lack a define of PROT_NONE. */\n#ifndef PROT_NONE\n#define PROT_NONE   0\n#endif\n\n/* Dummy defines for when mprotect doesn't exist. */\n#ifndef PROT_READ\n#define PROT_READ   0\n#endif\n#ifndef PROT_WRITE\n#define PROT_WRITE  0\n#endif\n\n/* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have\n   _SC_PAGE_SIZE instead. */\n#if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE)\n#define _SC_PAGESIZE  _SC_PAGE_SIZE\n#endif\n\n\n#ifdef EXTRA_PROTOS\nEXTRA_PROTOS\n#endif\n#ifdef EXTRA_PROTOS2\nEXTRA_PROTOS2\n#endif\n\n\n#define DEFAULT_REPETITIONS  10\n\nint  option_repetitions = DEFAULT_REPETITIONS;\nint  option_spinner = 1;\nint  option_redzones = 1;\nint  option_firstsize = 0;\nint  option_lastsize = 500;\nint  option_firstsize2 = 0;\n\n#define ALIGNMENTS          4\n#define OVERLAPS            4\n#define CARRY_RANDOMS       5\n#define MULTIPLIER_RANDOMS  5\n#define DIVISOR_RANDOMS     5\n#define FRACTION_COUNT      4\n\nint  option_print = 0;\n\n#define DATA_TRAND  0\n#define DATA_ZEROS  1\n#define DATA_SEQ    2\n#define DATA_FFS    3\n#define DATA_2FD    4\nint  option_data = DATA_TRAND;\n\n\nmp_size_t  pagesize;\n#define PAGESIZE_LIMBS  (pagesize / BYTES_PER_MP_LIMB)\n\n/* must be a multiple of the page size */\n#define REDZONE_BYTES   (pagesize * 16)\n#define REDZONE_LIMBS   (REDZONE_BYTES / BYTES_PER_MP_LIMB)\n\n\n#define MAX3(x,y,z)   (MAX (x, MAX (y, z)))\n\n#if BITS_PER_MP_LIMB == 32\n#define DEADVAL  CNST_LIMB(0xDEADBEEF)\n#else\n#define DEADVAL  CNST_LIMB(0xDEADBEEFBADDCAFE)\n#endif\n\n\nstruct region_t {\n  mp_ptr     ptr;\n  mp_size_t  size;\n};\n\n\n#define TRAP_NOWHERE 0\n#define TRAP_REF     1\n#define TRAP_FUN     2\n#define TRAP_SETUPS  3\nint trap_location = TRAP_NOWHERE;\n\n\n#define NUM_SOURCES  3\n#define NUM_DESTS    2\n\nstruct source_t {\n  struct region_t  region;\n  int        high;\n  mp_size_t  align;\n  mp_ptr     p;\n};\n\nstruct source_t  s[NUM_SOURCES];\n\nstruct dest_t {\n  int        high;\n  mp_size_t  align;\n  mp_size_t  size;\n};\n\nstruct dest_t  d[NUM_DESTS];\n\nstruct source_each_t {\n  mp_ptr     p;\n};\n\nstruct dest_each_t {\n  struct region_t  region;\n  mp_ptr     p;\n};\n\nmp_size_t       size;\nmp_size_t       size2;\nunsigned long   shift;\nmp_limb_t       carry;\nmp_limb_t       divisor;\nmp_limb_t\taltdiv;\nmp_limb_t       multiplier;\nmp_limb_t       multiplier_N[8];\n\nstruct each_t {\n  const char  *name;\n  struct dest_each_t    d[NUM_DESTS];\n  struct source_each_t  s[NUM_SOURCES];\n  mp_limb_t  retval;\n};\n\nstruct each_t  ref = { \"Ref\" };\nstruct each_t  fun = { \"Fun\" };\n\n#define SRC_SIZE(n)  ((n) == 1 && tr->size2 ? size2 : size)\n\nvoid validate_fail(void);\n\n\n#if HAVE_TRY_NEW_C\n#include \"try-new.c\"\n#endif\n\n\ntypedef mp_limb_t (*tryfun_t)(ANYARGS);\n\nstruct try_t {\n  char  retval;\n\n  char  src[NUM_SOURCES];\n  char  dst[NUM_DESTS];\n\n#define SIZE_YES          1\n#define SIZE_ALLOW_ZERO   2\n#define SIZE_1            3  /* 1 limb  */\n#define SIZE_2            4  /* 2 limbs */\n#define SIZE_3            5  /* 3 limbs */\n#define SIZE_4\t\t  20 /* 4 limbs */\n#define SIZE_FRACTION     6  /* size2 is fraction for divrem etc */\n#define SIZE_SIZE2        7\n#define SIZE_PLUS_1       8\n#define SIZE_PLUS_2       9\n#define SIZE_SUM         10\n#define SIZE_DIFF        11\n#define SIZE_DIFF_PLUS_1 12\n#define SIZE_DIFF_PLUS_3 13\n#define SIZE_RETVAL      14\n#define SIZE_CEIL_HALF   15\n#define SIZE_GET_STR     16\n#define SIZE_PLUS_MSIZE_SUB_1 17  /* size+msize-1 */\n#define SIZE_DOUBLE\t18\n#define SIZE_DOUBLE_MINUS_1\t19\n  char  size;\n  char  size2;\n  char  dst_size[NUM_DESTS];\n\n  /* multiplier_N size in limbs */\n  mp_size_t  msize;\n\n  char  dst_bytes[NUM_DESTS];\n\n  char  dst0_from_src1;\n\n#define CARRY_BIT     1  /* single bit 0 or 1 */\n#define CARRY_3       2  /* 0, 1, 2 */\n#define CARRY_4       3  /* 0 to 3 */\n#define CARRY_LIMB    4  /* any limb value */\n#define CARRY_DIVISOR 5  /* carry<divisor */\n  char  carry;\n\n  /* a fudge to tell the output when to print negatives */\n  char  carry_sign;\n\n  char  multiplier;\n  char  shift;\n\n#define DIVISOR_LIMB  1\n#define DIVISOR_NORM  2\n#define DIVISOR_ODD   3\n#define DIVISOR_DIVBM1\t4\n  char  divisor;\n\n#define DATA_NON_ZERO         1\n#define DATA_GCD              2\n#define DATA_SRC1_ODD         3\n#define DATA_SRC1_HIGHBIT     4\n#define DATA_MULTIPLE_DIVISOR 5\n#define DATA_UDIV_QRNND       6\n#define DATA_SRC0_ODD\t      7\n  char  data;\n\n/* Default is allow full overlap. */\n#define OVERLAP_NONE         1\n#define OVERLAP_LOW_TO_HIGH  2\n#define OVERLAP_HIGH_TO_LOW  3\n#define OVERLAP_NOT_SRCS     4\n#define OVERLAP_NOT_SRC2     8\n  char  overlap;\n\n  tryfun_t    reference;\n  const char  *reference_name;\n\n  void        (*validate)(void);\n  const char  *validate_name;\n};\n\nstruct try_t  *tr;\n\nvoid\nvalidate_mod_34lsub1 (void)\n{\n#define CNST_34LSUB1   ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1)\n\n  mp_srcptr  ptr = s[0].p;\n  int        error = 0;\n  mp_limb_t  got, got_mod, want, want_mod;\n\n  ASSERT (size >= 1);\n\n  got = fun.retval;\n  got_mod = got % CNST_34LSUB1;\n\n  want = refmpn_mod_34lsub1 (ptr, size);\n  want_mod = want % CNST_34LSUB1;\n\n  if (got_mod != want_mod)\n    {\n      gmp_printf (\"got   0x%MX reduced from 0x%MX\\n\", got_mod, got);\n      gmp_printf (\"want  0x%MX reduced from 0x%MX\\n\", want_mod, want);\n      error = 1;\n    }\n\n  if (error)\n    validate_fail ();\n}\n\nvoid\nvalidate_divexact_1 (void)\n{\n  mp_srcptr  src = s[0].p;\n  mp_srcptr  dst = fun.d[0].p;\n  int  error = 0;\n\n  ASSERT (size >= 1);\n\n  {\n    mp_ptr     tp = refmpn_malloc_limbs (size);\n    mp_limb_t  rem;\n\n    rem = refmpn_divrem_1 (tp, 0, src, size, divisor);\n    if (rem != 0)\n      {\n\tgmp_printf (\"Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\\n\", rem);\n\terror = 1;\n      }\n    if (! refmpn_equal_anynail (tp, dst, size))\n      {\n\tprintf (\"Quotient a/d wrong\\n\");\n\tmpn_trace (\"fun \", dst, size);\n\tmpn_trace (\"want\", tp, size);\n\terror = 1;\n      }\n    free (tp);\n  }\n\n  if (error)\n    validate_fail ();\n}\n\n\nvoid\nvalidate_modexact_1c_odd (void)\n{\n  mp_srcptr  ptr = s[0].p;\n  mp_limb_t  r = fun.retval;\n  int  error = 0;\n\n  ASSERT (size >= 1);\n  ASSERT (divisor & 1);\n\n  if ((r & GMP_NAIL_MASK) != 0)\n    printf (\"r has non-zero nail\\n\");\n\n  if (carry < divisor)\n    {\n      if (! (r < divisor))\n\t{\n\t  printf (\"Don't have r < divisor\\n\");\n\t  error = 1;\n\t}\n    }\n  else /* carry >= divisor */\n    {\n      if (! (r <= divisor))\n\t{\n\t  printf (\"Don't have r <= divisor\\n\");\n\t  error = 1;\n\t}\n    }\n\n  {\n    mp_limb_t  c = carry % divisor;\n    mp_ptr     tp = refmpn_malloc_limbs (size+1);\n    mp_size_t  k;\n\n    for (k = size-1; k <= size; k++)\n      {\n\t/* set {tp,size+1} to r*b^k + a - c */\n\trefmpn_copyi (tp, ptr, size);\n\ttp[size] = 0;\n\tASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r));\n\tif (refmpn_sub_1 (tp, tp, size+1, c))\n\t  ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor));\n\n\tif (refmpn_mod_1 (tp, size+1, divisor) == 0)\n\t  goto good_remainder;\n      }\n    printf (\"Remainder matches neither r*b^(size-1) nor r*b^size\\n\");\n    error = 1;\n\n  good_remainder:\n    free (tp);\n  }\n\n  if (error)\n    validate_fail ();\n}\n\nvoid\nvalidate_modexact_1_odd (void)\n{\n  carry = 0;\n  validate_modexact_1c_odd ();\n}\n\n\nvoid\nvalidate_sqrtrem (void)\n{\n  mp_srcptr  orig_ptr = s[0].p;\n  mp_size_t  orig_size = size;\n  mp_size_t  root_size = (size+1)/2;\n  mp_srcptr  root_ptr = fun.d[0].p;\n  mp_size_t  rem_size = fun.retval;\n  mp_srcptr  rem_ptr = fun.d[1].p;\n  mp_size_t  prod_size = 2*root_size;\n  mp_ptr     p;\n  int  error = 0;\n\n  if (rem_size < 0 || rem_size > size)\n    {\n      printf (\"Bad remainder size retval %ld\\n\", (long) rem_size);\n      validate_fail ();\n    }\n\n  p = refmpn_malloc_limbs (prod_size);\n\n  p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1);\n  if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0)\n    {\n      printf (\"Remainder bigger than 2*root\\n\");\n      error = 1;\n    }\n\n  refmpn_sqr (p, root_ptr, root_size);\n  if (rem_size != 0)\n    refmpn_add (p, p, prod_size, rem_ptr, rem_size);\n  if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0)\n    {\n      printf (\"root^2+rem != original\\n\");\n      mpn_trace (\"prod\", p, prod_size);\n      error = 1;\n    }\n  free (p);\n\n  if (error)\n    validate_fail ();\n}\n\n\n/* These types are indexes into the param[] array and are arbitrary so long\n   as they're all distinct and within the size of param[].  Renumber\n   whenever necessary or desired.  */\n\n#define TYPE_ADD               1\n#define TYPE_ADD_N             2\n#define TYPE_ADD_NC            3\n#define TYPE_SUB               4\n#define TYPE_SUB_N             5\n#define TYPE_SUB_NC            6\n\n#define TYPE_MUL_1             7\n#define TYPE_MUL_1C            8\n\n#define TYPE_MUL_2             9\n\n#define TYPE_ADDMUL_1         10\n#define TYPE_ADDMUL_1C        11\n#define TYPE_SUBMUL_1         12\n#define TYPE_SUBMUL_1C        13\n\n#define TYPE_ADDMUL_2         14\n#define TYPE_ADDMUL_3         15\n#define TYPE_ADDMUL_4         16\n#define TYPE_ADDMUL_5         17\n#define TYPE_ADDMUL_6         18\n#define TYPE_ADDMUL_7         19\n#define TYPE_ADDMUL_8         20\n\n#define TYPE_SUMDIFF_N        21\n#define TYPE_NSUMDIFF_N       141\n#define TYPE_SUMDIFF_NC       22\n\n#define TYPE_RSHIFT           23\n#define TYPE_LSHIFT           24\n\n#define TYPE_COPY             25\n#define TYPE_COPYI            26\n#define TYPE_COPYD            27\n#define TYPE_COM_N            28\n\n#define TYPE_ADDLSH1_N        30\n#define TYPE_SUBLSH1_N        31\n#define TYPE_RSH1ADD_N        32\n#define TYPE_RSH1SUB_N        33\n\n#define TYPE_MOD_1            35\n#define TYPE_MOD_1C           36\n#define TYPE_DIVMOD_1         37\n#define TYPE_DIVMOD_1C        38\n#define TYPE_DIVREM_1         39\n#define TYPE_DIVREM_1C        40\n#define TYPE_PREINV_DIVREM_1  41\n#define TYPE_PREINV_MOD_1     42\n#define TYPE_MOD_34LSUB1      43\n#define TYPE_UDIV_QRNND       44\n#define TYPE_UDIV_QRNND_R     45\n\n#define TYPE_DIVEXACT_1       50\n#define TYPE_DIVEXACT_BY3     51\n#define TYPE_DIVEXACT_BY3C    52\n#define TYPE_MODEXACT_1_ODD   53\n#define TYPE_MODEXACT_1C_ODD  54\n\n#define TYPE_GCD              60\n#define TYPE_GCD_1            61\n#define TYPE_GCD_FINDA        62\n#define TYPE_MPZ_JACOBI       63\n#define TYPE_MPZ_KRONECKER    64\n#define TYPE_MPZ_KRONECKER_UI 65\n#define TYPE_MPZ_KRONECKER_SI 66\n#define TYPE_MPZ_UI_KRONECKER 67\n#define TYPE_MPZ_SI_KRONECKER 68\n\n#define TYPE_AND_N            70\n#define TYPE_NAND_N           71\n#define TYPE_ANDN_N           72\n#define TYPE_IOR_N            73\n#define TYPE_IORN_N           74\n#define TYPE_NIOR_N           75\n#define TYPE_XOR_N            76\n#define TYPE_XNOR_N           77\n\n#define TYPE_MUL_BASECASE     80\n#define TYPE_MUL_N            81\n#define TYPE_MULMID_BASECASE  82\n#define TYPE_MULMID           83\n#define TYPE_MULMID_N         84\n#define TYPE_SQR              85\n#define TYPE_UMUL_PPMM        86\n#define TYPE_UMUL_PPMM_R      87\n\n#define TYPE_SB_DIVREM_MN     90\n#define TYPE_TDIV_QR          91\n#define TYPE_TDIV_Q           92\n\n#define TYPE_SQRTREM          100\n#define TYPE_ZERO             101\n#define TYPE_GET_STR          102\n#define TYPE_POPCOUNT         103\n#define TYPE_HAMDIST          104\n\n#define TYPE_DIVEXACT_BYFF    105\n#define TYPE_LSHIFT1\t      106\n#define TYPE_RSHIFT1\t      107\n\n#define TYPE_ADDADD_N\t      108\n#define TYPE_ADDSUB_N\t      109\n#define TYPE_SUBADD_N\t      110\n\n#define TYPE_REDC_BASECASE\t111\n#define TYPE_DIVREM_EUCLIDEAN_QR_1\t112\n#define TYPE_DIVREM_EUCLIDEAN_R_1\t113\n#define TYPE_DIVEXACT_BYFOBM1 114\n\n#define TYPE_LSHIFT2\t      115\n#define TYPE_RSHIFT2\t      116\n#define TYPE_STORE\t\t117\n#define TYPE_LSHIFTC\t\t118\n//#define TYPE_DIVREM_EUCLIDEAN_QR_2\t118\n#define TYPE_ADDLSH_N\t\t120\n#define TYPE_SUBLSH_N\t\t121\n\n#define TYPE_INCLSH_N\t\t122\n#define TYPE_DECLSH_N\t\t123\n#define TYPE_ADDERR1_N\t\t124\n#define TYPE_SUBERR1_N\t\t125\n#define TYPE_ADDERR2_N\t\t126\n#define TYPE_SUBERR2_N\t\t127\n#define TYPE_ADDLSH_NC\t\t128\n#define TYPE_SUBLSH_NC\t\t129\n\n#define TYPE_DIVREM_HENSEL_QR_1\t\t130\n#define TYPE_DIVREM_HENSEL_QR_1_1\t131\n#define TYPE_DIVREM_HENSEL_QR_1_2\t132\n#define TYPE_DIVREM_HENSEL_R_1\t133\n#define TYPE_RSH_DIVREM_HENSEL_QR_1\t134\n#define TYPE_RSH_DIVREM_HENSEL_QR_1_1\t135\n#define TYPE_RSH_DIVREM_HENSEL_QR_1_2\t136\n#define TYPE_DIVREM_HENSEL_RSH_QR_1\t137\n#define TYPE_NOT\t\t\t138\n\n#define TYPE_DOUBLE\t139\n#define TYPE_HALF\t140\n\n#define TYPE_EXTRA            150\n\nstruct try_t  param[150];\n\n\nvoid\nparam_init (void)\n{\n  struct try_t  *p;\n\n#define COPY(index)  memcpy (p, &param[index], sizeof (*p))\n\n#if HAVE_STRINGIZE\n#define REFERENCE(fun)                  \\\n  p->reference = (tryfun_t) fun;        \\\n  p->reference_name = #fun\n#define VALIDATE(fun)           \\\n  p->validate = fun;            \\\n  p->validate_name = #fun\n#else\n#define REFERENCE(fun)                  \\\n  p->reference = (tryfun_t) fun;        \\\n  p->reference_name = \"fun\"\n#define VALIDATE(fun)           \\\n  p->validate = fun;            \\\n  p->validate_name = \"fun\"\n#endif\n\n\n  p = &param[TYPE_ADD_N];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->src[1] = 1;\n  REFERENCE (refmpn_add_n);\n\n  p = &param[TYPE_ADD_NC];\n  COPY (TYPE_ADD_N);\n  p->carry = CARRY_BIT;\n  REFERENCE (refmpn_add_nc);\n\n  p = &param[TYPE_SUB_N];\n  COPY (TYPE_ADD_N);\n  REFERENCE (refmpn_sub_n);\n\n  p = &param[TYPE_SUB_NC];\n  COPY (TYPE_ADD_NC);\n  REFERENCE (refmpn_sub_nc);\n\n  p = &param[TYPE_ADD];\n  COPY (TYPE_ADD_N);\n  p->size = SIZE_ALLOW_ZERO;\n  p->size2 = 1;\n  REFERENCE (refmpn_add);\n\n  p = &param[TYPE_SUB];\n  COPY (TYPE_ADD);\n  REFERENCE (refmpn_sub);\n\n\n  p = &param[TYPE_MUL_1];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->multiplier = 1;\n  p->overlap = OVERLAP_LOW_TO_HIGH;\n  REFERENCE (refmpn_mul_1);\n\n  p = &param[TYPE_MUL_1C];\n  COPY (TYPE_MUL_1);\n  p->carry = CARRY_LIMB;\n  REFERENCE (refmpn_mul_1c);\n\n\n  p = &param[TYPE_MUL_2];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;\n  p->src[0] = 1;\n  p->src[1] = 1;\n  p->msize = 2;\n  p->overlap = OVERLAP_NOT_SRC2;\n  REFERENCE (refmpn_mul_2);\n\n\n  p = &param[TYPE_ADDMUL_1];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->multiplier = 1;\n  p->dst0_from_src1 = 1;\n  REFERENCE (refmpn_addmul_1);\n\n  p = &param[TYPE_ADDMUL_1C];\n  COPY (TYPE_ADDMUL_1);\n  p->carry = CARRY_LIMB;\n  REFERENCE (refmpn_addmul_1c);\n\n  p = &param[TYPE_SUBMUL_1];\n  COPY (TYPE_ADDMUL_1);\n  REFERENCE (refmpn_submul_1);\n\n  p = &param[TYPE_SUBMUL_1C];\n  COPY (TYPE_ADDMUL_1C);\n  REFERENCE (refmpn_submul_1c);\n\n\n  p = &param[TYPE_ADDMUL_2];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;\n  p->src[0] = 1;\n  p->src[1] = 1;\n  p->msize = 2;\n  p->dst0_from_src1 = 1;\n  p->overlap = OVERLAP_NOT_SRC2;\n  REFERENCE (refmpn_addmul_2);\n\n  p = &param[TYPE_ADDMUL_3];\n  COPY (TYPE_ADDMUL_2);\n  p->msize = 3;\n  REFERENCE (refmpn_addmul_3);\n\n  p = &param[TYPE_ADDMUL_4];\n  COPY (TYPE_ADDMUL_2);\n  p->msize = 4;\n  REFERENCE (refmpn_addmul_4);\n\n  p = &param[TYPE_ADDMUL_5];\n  COPY (TYPE_ADDMUL_2);\n  p->msize = 5;\n  REFERENCE (refmpn_addmul_5);\n\n  p = &param[TYPE_ADDMUL_6];\n  COPY (TYPE_ADDMUL_2);\n  p->msize = 6;\n  REFERENCE (refmpn_addmul_6);\n\n  p = &param[TYPE_ADDMUL_7];\n  COPY (TYPE_ADDMUL_2);\n  p->msize = 7;\n  REFERENCE (refmpn_addmul_7);\n\n  p = &param[TYPE_ADDMUL_8];\n  COPY (TYPE_ADDMUL_2);\n  p->msize = 8;\n  REFERENCE (refmpn_addmul_8);\n\n\n  p = &param[TYPE_AND_N];\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->src[1] = 1;\n  REFERENCE (refmpn_and_n);\n\n  p = &param[TYPE_ANDN_N];\n  COPY (TYPE_AND_N);\n  REFERENCE (refmpn_andn_n);\n\n  p = &param[TYPE_NAND_N];\n  COPY (TYPE_AND_N);\n  REFERENCE (refmpn_nand_n);\n\n  p = &param[TYPE_IOR_N];\n  COPY (TYPE_AND_N);\n  REFERENCE (refmpn_ior_n);\n\n  p = &param[TYPE_IORN_N];\n  COPY (TYPE_AND_N);\n  REFERENCE (refmpn_iorn_n);\n\n  p = &param[TYPE_NIOR_N];\n  COPY (TYPE_AND_N);\n  REFERENCE (refmpn_nior_n);\n\n  p = &param[TYPE_XOR_N];\n  COPY (TYPE_AND_N);\n  REFERENCE (refmpn_xor_n);\n\n  p = &param[TYPE_XNOR_N];\n  COPY (TYPE_AND_N);\n  REFERENCE (refmpn_xnor_n);\n\n  p = &param[TYPE_SUMDIFF_N];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->dst[1] = 1;\n  p->src[0] = 1;\n  p->src[1] = 1;\n  REFERENCE (refmpn_sumdiff_n);\n\n  p = &param[TYPE_NSUMDIFF_N];\n  COPY (TYPE_SUMDIFF_N);\n  REFERENCE (refmpn_nsumdiff_n);\n\n  p = &param[TYPE_ADDERR1_N];\n  p->retval=1;\n  p->dst[0]=1;\n  p->dst[1]=1;\n  p->dst_size[1]=SIZE_2;\n  p->src[0]=1;\n  p->src[1]=1;\n  p->src[2]=1;\n  p->carry=CARRY_BIT;\n  p->overlap=OVERLAP_NONE;\n  REFERENCE (refmpn_add_err1_n);\n  \n  p = &param[TYPE_SUBERR1_N];\n  p->retval=1;\n  p->dst[0]=1;\n  p->dst[1]=1;\n  p->dst_size[1]=SIZE_2;\n  p->src[0]=1;\n  p->src[1]=1;\n  p->src[2]=1;\n  p->carry=CARRY_BIT;\n  p->overlap=OVERLAP_NONE;\n  REFERENCE (refmpn_sub_err1_n);\n  \n  p = &param[TYPE_ADDERR2_N];\n  p->retval=1;\n  p->dst[0]=1;\n  p->dst[1]=1;\n  p->dst_size[1]=SIZE_4;\n  p->src[0]=1;\n  p->src[1]=1;\n  p->src[2]=1;\n  p->src[3]=1;//FIXME\n  p->carry=CARRY_BIT;\n  p->overlap=OVERLAP_NONE;\n  REFERENCE (refmpn_add_err2_n);\n  \n  p = &param[TYPE_SUBERR2_N];\n  p->retval=1;\n  p->dst[0]=1;\n  p->dst[1]=1;\n  p->dst_size[1]=SIZE_4;\n  p->src[0]=1;\n  p->src[1]=1;\n  p->src[2]=1;\n  p->src[3]=1;//FIXME\n  p->carry=CARRY_BIT;\n  p->overlap=OVERLAP_NONE;\n  REFERENCE (refmpn_sub_err2_n);\n  \n  p = &param[TYPE_SUMDIFF_NC];\n  COPY (TYPE_SUMDIFF_N);\n  p->carry = CARRY_4;\n  REFERENCE (refmpn_sumdiff_nc);\n\n  p = &param[TYPE_ADDADD_N];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->src[1] = 1;\n  p->src[2] = 1;\n  REFERENCE (refmpn_addadd_n);\n\n  p = &param[TYPE_ADDSUB_N];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->src[1] = 1;\n  p->src[2] = 1;\n  REFERENCE (refmpn_addsub_n);\n\n  p = &param[TYPE_SUBADD_N];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->src[1] = 1;\n  p->src[2] = 1;\n  REFERENCE (refmpn_subadd_n);\n\n  p = &param[TYPE_COPY];\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->overlap = OVERLAP_NONE;\n  p->size = SIZE_ALLOW_ZERO;\n  REFERENCE (refmpn_copy);\n\n  p = &param[TYPE_COPYI];\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->overlap = OVERLAP_LOW_TO_HIGH;\n  p->size = SIZE_ALLOW_ZERO;\n  REFERENCE (refmpn_copyi);\n\n  p = &param[TYPE_COPYD];\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->overlap = OVERLAP_HIGH_TO_LOW;\n  p->size = SIZE_ALLOW_ZERO;\n  REFERENCE (refmpn_copyd);\n\n  p = &param[TYPE_COM_N];\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  REFERENCE (refmpn_com_n);\n\n  p = &param[TYPE_ADDLSH_N];\n  p->dst[0]=1;\n  p->src[0]=1;\n  p->src[1]=1;\n  p->shift=1;\n  REFERENCE (refmpn_addlsh_n);\n\n  p = &param[TYPE_SUBLSH_N];\n  p->dst[0]=1;\n  p->src[0]=1;\n  p->src[1]=1;\n  p->shift=1;\n  REFERENCE (refmpn_sublsh_n);\n\n  p = &param[TYPE_ADDLSH_NC];\n  p->dst[0]=1;\n  p->src[0]=1;\n  p->src[1]=1;\n  p->shift=1;\n  p->carry=CARRY_LIMB;\n  REFERENCE (refmpn_addlsh_nc);\n\n  p = &param[TYPE_SUBLSH_NC];\n  p->dst[0]=1;\n  p->src[0]=1;\n  p->src[1]=1;\n  p->shift=1;\n  p->carry=CARRY_LIMB;\n  REFERENCE (refmpn_sublsh_nc);\n  \n  p = &param[TYPE_INCLSH_N];\n  p->dst[0]=1;\n  p->src[0]=1;\n  p->shift=1;\n  REFERENCE (refmpn_inclsh_n);\n\n  p = &param[TYPE_DECLSH_N];\n  p->dst[0]=1;\n  p->src[0]=1;\n  p->shift=1;\n  REFERENCE (refmpn_declsh_n);  \n  \n  p = &param[TYPE_ADDLSH1_N];\n  COPY (TYPE_ADD_N);\n  REFERENCE (refmpn_addlsh1_n);\n\n  p = &param[TYPE_SUBLSH1_N];\n  COPY (TYPE_ADD_N);\n  REFERENCE (refmpn_sublsh1_n);\n\n  p = &param[TYPE_RSH1ADD_N];\n  COPY (TYPE_ADD_N);\n  REFERENCE (refmpn_rsh1add_n);\n\n  p = &param[TYPE_RSH1SUB_N];\n  COPY (TYPE_ADD_N);\n  REFERENCE (refmpn_rsh1sub_n);\n\n\n  p = &param[TYPE_MOD_1];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->size = SIZE_ALLOW_ZERO;\n  p->divisor = DIVISOR_LIMB;\n  REFERENCE (refmpn_mod_1);\n\n  p = &param[TYPE_MOD_1C];\n  COPY (TYPE_MOD_1);\n  p->carry = CARRY_DIVISOR;\n  REFERENCE (refmpn_mod_1c);\n\n  p = &param[TYPE_DIVMOD_1];\n  COPY (TYPE_MOD_1);\n  p->dst[0] = 1;\n  REFERENCE (refmpn_divmod_1);\n\n  p = &param[TYPE_DIVREM_EUCLIDEAN_QR_1];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_LIMB;\n  p->dst[0] = 1;\n  REFERENCE (refmpn_divrem_1);\n\n  p = &param[TYPE_DIVREM_EUCLIDEAN_R_1];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_LIMB;\n  REFERENCE (refmpn_divrem_euclidean_r_1);\n\n  p = &param[TYPE_DIVREM_HENSEL_QR_1];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_ODD;\n  p->dst[0] = 1;\n  REFERENCE (refmpn_divrem_hensel_qr_1);\n\n  p = &param[TYPE_DIVREM_HENSEL_QR_1_1];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_ODD;\n  p->dst[0] = 1;\n  REFERENCE (refmpn_divrem_hensel_qr_1);\n\n  p = &param[TYPE_DIVREM_HENSEL_QR_1_2];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_ODD;\n  p->dst[0] = 1;\n  REFERENCE (refmpn_divrem_hensel_qr_1);\n\n  p = &param[TYPE_DIVREM_HENSEL_R_1];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_ODD;\n  REFERENCE (refmpn_divrem_hensel_r_1);\n\n  p = &param[TYPE_DIVREM_HENSEL_RSH_QR_1];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_ODD;\n  p->dst[0] = 1;\n  p->shift=1;\n  REFERENCE (refmpn_divrem_hensel_rsh_qr_1);\n\n  p = &param[TYPE_RSH_DIVREM_HENSEL_QR_1];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_ODD;\n  p->dst[0] = 1;\n  p->shift=1;\n  p->carry=CARRY_LIMB;\n  REFERENCE (refmpn_rsh_divrem_hensel_qr_1);\n\n  p = &param[TYPE_RSH_DIVREM_HENSEL_QR_1_1];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_ODD;\n  p->dst[0] = 1;\n  p->shift=1;\n  p->carry=CARRY_LIMB;\n  REFERENCE (refmpn_rsh_divrem_hensel_qr_1);\n\n  p = &param[TYPE_RSH_DIVREM_HENSEL_QR_1_2];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_ODD;\n  p->dst[0] = 1;\n  p->shift=1;\n  p->carry=CARRY_LIMB;\n  REFERENCE (refmpn_rsh_divrem_hensel_qr_1);\n\n  p = &param[TYPE_DIVEXACT_BYFOBM1];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_DIVBM1;\n  p->dst[0] = 1;\n  REFERENCE (refmpn_divexact_byfobm1);\n\n  p = &param[TYPE_DIVMOD_1C];\n  COPY (TYPE_DIVMOD_1);\n  p->carry = CARRY_DIVISOR;\n  REFERENCE (refmpn_divmod_1c);\n\n  p = &param[TYPE_DIVREM_1];\n  COPY (TYPE_DIVMOD_1);\n  p->size2 = SIZE_FRACTION;\n  p->dst_size[0] = SIZE_SUM;\n  REFERENCE (refmpn_divrem_1);\n\n  p = &param[TYPE_DIVREM_1C];\n  COPY (TYPE_DIVREM_1);\n  p->carry = CARRY_DIVISOR;\n  REFERENCE (refmpn_divrem_1c);\n\n  p = &param[TYPE_PREINV_DIVREM_1];\n  COPY (TYPE_DIVREM_1);\n  p->size = SIZE_YES; /* ie. no size==0 */\n  REFERENCE (refmpn_preinv_divrem_1);\n\n  p = &param[TYPE_PREINV_MOD_1];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_NORM;\n  REFERENCE (refmpn_preinv_mod_1);\n\n  p = &param[TYPE_MOD_34LSUB1];\n  p->retval = 1;\n  p->src[0] = 1;\n  VALIDATE (validate_mod_34lsub1);\n\n  p = &param[TYPE_UDIV_QRNND];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->dst[0] = 1;\n  p->dst_size[0] = SIZE_1;\n  p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB;\n  p->data = DATA_UDIV_QRNND;\n  p->overlap = OVERLAP_NONE;\n  REFERENCE (refmpn_udiv_qrnnd);\n\n  p = &param[TYPE_UDIV_QRNND_R];\n  COPY (TYPE_UDIV_QRNND);\n  REFERENCE (refmpn_udiv_qrnnd_r);\n\n\n  p = &param[TYPE_DIVEXACT_1];\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_LIMB;\n  p->data = DATA_MULTIPLE_DIVISOR;\n  VALIDATE (validate_divexact_1);\n  REFERENCE (refmpn_divmod_1);\n\n\n  p = &param[TYPE_DIVEXACT_BY3];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  REFERENCE (refmpn_divexact_by3);\n\n  p = &param[TYPE_DIVEXACT_BYFF];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  REFERENCE (refmpn_divexact_byff);\n\n  p = &param[TYPE_LSHIFT1];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  REFERENCE (refmpn_lshift1);\n\n  p = &param[TYPE_RSHIFT1];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  REFERENCE (refmpn_rshift1);\n\n  p = &param[TYPE_LSHIFT2];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  REFERENCE (refmpn_lshift2);\n\n  p = &param[TYPE_RSHIFT2];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  REFERENCE (refmpn_rshift2);\n\n  p = &param[TYPE_DIVEXACT_BY3C];\n  COPY (TYPE_DIVEXACT_BY3);\n  p->carry = CARRY_3;\n  REFERENCE (refmpn_divexact_by3c);\n\n\n  p = &param[TYPE_MODEXACT_1_ODD];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->divisor = DIVISOR_ODD;\n  VALIDATE (validate_modexact_1_odd);\n\n  p = &param[TYPE_MODEXACT_1C_ODD];\n  COPY (TYPE_MODEXACT_1_ODD);\n  p->carry = CARRY_LIMB;\n  VALIDATE (validate_modexact_1c_odd);\n\n\n  p = &param[TYPE_GCD_1];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->data = DATA_NON_ZERO;\n  p->divisor = DIVISOR_LIMB;\n  REFERENCE (refmpn_gcd_1);\n\n  p = &param[TYPE_GCD];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->src[1] = 1;\n  p->size2 = 1;\n  p->dst_size[0] = SIZE_RETVAL;\n  p->overlap = OVERLAP_NOT_SRCS;\n  p->data = DATA_GCD;\n  REFERENCE (refmpn_gcd);\n\n  p = &param[TYPE_MPZ_JACOBI];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->size = SIZE_ALLOW_ZERO;\n  p->src[1] = 1;\n  p->data = DATA_SRC1_ODD;\n  p->size2 = 1;\n  p->carry = CARRY_4;\n  p->carry_sign = 1;\n  REFERENCE (refmpz_jacobi);\n\n  p = &param[TYPE_MPZ_KRONECKER];\n  COPY (TYPE_MPZ_JACOBI);\n  p->data = 0;\t\t\t/* clear inherited DATA_SRC1_ODD */\n  REFERENCE (refmpz_kronecker);\n\n\n  p = &param[TYPE_MPZ_KRONECKER_UI];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->size = SIZE_ALLOW_ZERO;\n  p->multiplier = 1;\n  p->carry = CARRY_BIT;\n  REFERENCE (refmpz_kronecker_ui);\n\n  p = &param[TYPE_MPZ_KRONECKER_SI];\n  COPY (TYPE_MPZ_KRONECKER_UI);\n  REFERENCE (refmpz_kronecker_si);\n\n  p = &param[TYPE_MPZ_UI_KRONECKER];\n  COPY (TYPE_MPZ_KRONECKER_UI);\n  REFERENCE (refmpz_ui_kronecker);\n\n  p = &param[TYPE_MPZ_SI_KRONECKER];\n  COPY (TYPE_MPZ_KRONECKER_UI);\n  REFERENCE (refmpz_si_kronecker);\n\n  p = &param[TYPE_REDC_BASECASE];\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->src[1] = 1;\n  p->data = DATA_SRC0_ODD ;\n  p->size2 = SIZE_DOUBLE;\n  p->overlap = OVERLAP_NONE;\n  REFERENCE (refmpn_redc_1);\n\n  p = &param[TYPE_SQR];\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->dst_size[0] = SIZE_SUM;\n  p->overlap = OVERLAP_NONE;\n  REFERENCE (refmpn_sqr);\n\n  p = &param[TYPE_MUL_N];\n  COPY (TYPE_SQR);\n  p->src[1] = 1;\n  REFERENCE (refmpn_mul_n);\n\n  p = &param[TYPE_MULMID_BASECASE];\n  COPY (TYPE_MUL_BASECASE);\n  p->dst_size[0] = SIZE_DIFF_PLUS_3;\n  p->size2 = 1;\n  p->overlap = OVERLAP_NONE;\n  REFERENCE (refmpn_mulmid_basecase);\n\n  p = &param[TYPE_MULMID];\n  COPY (TYPE_MULMID_BASECASE);\n  REFERENCE (refmpn_mulmid);\n\n  p = &param[TYPE_MULMID_N];\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->src[1] = 1;\n  p->dst_size[0] = SIZE_PLUS_2;\n  p->size2 = SIZE_DOUBLE_MINUS_1;\n  p->overlap = OVERLAP_NONE;\n  REFERENCE (refmpn_mulmid_n);\n\n  p = &param[TYPE_MUL_BASECASE];\n  COPY (TYPE_MUL_N);\n  p->dst_size[0] = SIZE_SUM;\n  p->size2 = 1;\n  REFERENCE (refmpn_mul_basecase);\n  \n  p = &param[TYPE_UMUL_PPMM];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->dst[0] = 1;\n  p->dst_size[0] = SIZE_1;\n  p->overlap = OVERLAP_NONE;\n  REFERENCE (refmpn_umul_ppmm);\n\n  p = &param[TYPE_UMUL_PPMM_R];\n  COPY (TYPE_UMUL_PPMM);\n  REFERENCE (refmpn_umul_ppmm_r);\n\n\n  p = &param[TYPE_RSHIFT];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->shift = 1;\n  p->overlap = OVERLAP_LOW_TO_HIGH;\n  REFERENCE (refmpn_rshift);\n\n  p = &param[TYPE_LSHIFT];\n  COPY (TYPE_RSHIFT);\n  p->overlap = OVERLAP_HIGH_TO_LOW;\n  REFERENCE (refmpn_lshift);\n\n  p = &param[TYPE_LSHIFTC];\n  COPY (TYPE_LSHIFT);\n  REFERENCE (refmpn_lshiftc);\n\n  p = &param[TYPE_POPCOUNT];\n  p->retval = 1;\n  p->src[0] = 1;\n  REFERENCE (refmpn_popcount);\n\n  p = &param[TYPE_NOT];\n  //p->src[0] = 1;\n  p->dst[0] = 1;\n  REFERENCE (refmpn_not);\n\n  p = &param[TYPE_DOUBLE];\n  p->retval = 1;\n  p->dst[0] = 1;\n  REFERENCE (refmpn_double);\n\n  p = &param[TYPE_HALF];\n  p->retval = 1;\n  p->dst[0] = 1;\n  REFERENCE (refmpn_half);\n\n  p = &param[TYPE_HAMDIST];\n  COPY (TYPE_POPCOUNT);\n  p->src[1] = 1;\n  REFERENCE (refmpn_hamdist);\n\n  p = &param[TYPE_TDIV_QR];\n  p->dst[0] = 1;\n  p->dst[1] = 1;\n  p->src[0] = 1;\n  p->src[1] = 1;\n  p->size2 = 1;\n  p->dst_size[0] = SIZE_DIFF_PLUS_1;\n  p->dst_size[1] = SIZE_SIZE2;\n  p->overlap = OVERLAP_NONE;\n  REFERENCE (refmpn_tdiv_qr);\n\n  p = &param[TYPE_TDIV_Q];\n  p->dst[0] = 1;\n  p->src[0] = 1;\n  p->src[1] = 1;\n  p->size2 = 1;\n  p->dst_size[0] = SIZE_DIFF_PLUS_1;\n  p->overlap = OVERLAP_NONE;\n  REFERENCE (refmpn_tdiv_q);\n\n  p = &param[TYPE_SQRTREM];\n  p->retval = 1;\n  p->dst[0] = 1;\n  p->dst[1] = 1;\n  p->src[0] = 1;\n  p->dst_size[0] = SIZE_CEIL_HALF;\n  p->dst_size[1] = SIZE_RETVAL;\n  p->overlap = OVERLAP_NONE;\n  VALIDATE (validate_sqrtrem);\n  REFERENCE (refmpn_sqrtrem);\n\n  p = &param[TYPE_ZERO];\n  p->dst[0] = 1;\n  p->size = SIZE_ALLOW_ZERO;\n  REFERENCE (refmpn_zero);\n\n  p = &param[TYPE_STORE];\n  p->dst[0] = 1;\n  p->size = SIZE_ALLOW_ZERO;\n  REFERENCE (refmpn_store);\n\n  p = &param[TYPE_GET_STR];\n  p->retval = 1;\n  p->src[0] = 1;\n  p->size = SIZE_ALLOW_ZERO;\n  p->dst[0] = 1;\n  p->dst[1] = 1;\n  p->dst_size[0] = SIZE_GET_STR;\n  p->dst_bytes[0] = 1;\n  p->overlap = OVERLAP_NONE;\n  REFERENCE (refmpn_get_str);\n\n#ifdef EXTRA_PARAM_INIT\n  EXTRA_PARAM_INIT\n#endif\n}\n\n\n/* The following are macros if there's no native versions, so wrap them in\n   functions that can be in try_array[]. */\n\nvoid\nMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{ MPN_COPY (rp, sp, size); }\n\nvoid\nMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{ MPN_COPY_INCR (rp, sp, size); }\n\nvoid\nMPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{ MPN_COPY_DECR (rp, sp, size); }\n\nvoid\n__GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{ __GMPN_COPY (rp, sp, size); }\n\n#ifdef __GMPN_COPY_INCR\nvoid\n__GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{ __GMPN_COPY_INCR (rp, sp, size); }\n#endif\n\nvoid\nmpn_com_n_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{ mpn_com_n (rp, sp, size); }\n\nvoid\nmpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)\n{ mpn_and_n (rp, s1, s2, size); }\n\nvoid\nmpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)\n{ mpn_andn_n (rp, s1, s2, size); }\n\nvoid\nmpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)\n{ mpn_nand_n (rp, s1, s2, size); }\n\nvoid\nmpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)\n{ mpn_ior_n (rp, s1, s2, size); }\n\nvoid\nmpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)\n{ mpn_iorn_n (rp, s1, s2, size); }\n\nvoid\nmpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)\n{ mpn_nior_n (rp, s1, s2, size); }\n\nvoid\nmpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)\n{ mpn_xor_n (rp, s1, s2, size); }\n\nvoid\nmpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)\n{ mpn_xnor_n (rp, s1, s2, size); }\n\nmp_limb_t\nudiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d)\n{\n  mp_limb_t  q;\n  udiv_qrnnd (q, *remptr, n1, n0, d);\n  return q;\n}\n\nmp_limb_t\nmpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{\n  return mpn_divexact_by3 (rp, sp, size);\n}\n\nvoid\nmpn_not_fun (mp_ptr rp, mp_size_t size)\n{\n  mpn_not (rp, size);\n}\n\nmp_limb_t\nmpn_double_fun (mp_ptr rp, mp_size_t size)\n{\n  return mpn_double (rp, size);\n}\n\nmp_limb_t\nmpn_half_fun (mp_ptr rp, mp_size_t size)\n{\n  return mpn_half (rp, size);\n}\n\nmp_limb_t\nmpn_lshift1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{\n  return mpn_lshift1 (rp, sp, size);\n}\n\nmp_limb_t\nmpn_rshift1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{\n  return mpn_rshift1 (rp, sp, size);\n}\n\nmp_limb_t\nmpn_lshift2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{\n  return mpn_lshift2 (rp, sp, size);\n}\n\nmp_limb_t\nmpn_rshift2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{\n  return mpn_rshift2 (rp, sp, size);\n}\n\n#if HAVE_NATIVE_mpn_addlsh1_n\nmp_limb_t\nmpn_addlsh1_n_fun (mp_ptr rp, mp_srcptr sp, mp_srcptr sp1,mp_size_t size)\n{\n  return mpn_addlsh1_n (rp, sp,sp1, size);\n}\n#endif\n\n#if HAVE_NATIVE_mpn_sublsh1_n\nmp_limb_t\nmpn_sublsh1_n_fun (mp_ptr rp, mp_srcptr sp, mp_srcptr sp1,mp_size_t size)\n{\n  return mpn_sublsh1_n (rp, sp,sp1, size);\n}\n#endif\n\n#if HAVE_NATIVE_mpn_inclsh_n\nmp_limb_t\nmpn_inclsh_n_fun (mp_ptr rp, mp_srcptr sp,mp_size_t size,unsigned int c)\n{\n  return mpn_inclsh_n (rp, sp, size,c);\n}\n#endif\n\n#if HAVE_NATIVE_mpn_declsh_n\nmp_limb_t\nmpn_declsh_n_fun (mp_ptr rp, mp_srcptr sp,mp_size_t size,unsigned int c)\n{\n  return mpn_declsh_n (rp, sp, size,c);\n}\n#endif\n\nmp_limb_t\nmpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)\n{\n  return mpn_modexact_1_odd (ptr, size, divisor);\n}\n\nvoid\nmpn_kara_mul_n_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)\n{\n  mp_ptr  tspace;\n  TMP_DECL;\n  TMP_MARK;\n  tspace = TMP_ALLOC_LIMBS (MPN_KARA_MUL_N_TSIZE (size));\n  mpn_kara_mul_n (dst, src1, src2, size, tspace);\n}\nvoid\nmpn_kara_sqr_n_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)\n{\n  mp_ptr tspace;\n  TMP_DECL;\n  TMP_MARK;\n  tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (size));\n  mpn_kara_sqr_n (dst, src, size, tspace);\n  TMP_FREE;\n}\nvoid\nmpn_toom3_mul_n_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)\n{\n  mp_ptr  tspace;\n  TMP_DECL;\n  TMP_MARK;\n  tspace = TMP_ALLOC_LIMBS (MPN_TOOM3_MUL_N_TSIZE (size));\n  mpn_toom3_mul_n (dst, src1, src2, size, tspace);\n  TMP_FREE;\n}\nvoid\nmpn_toom3_sqr_n_fun (mp_ptr dst, mp_srcptr src1, mp_size_t size)\n{\n  mp_ptr  tspace;\n  TMP_DECL;\n  TMP_MARK;\n  tspace = TMP_ALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (size));\n  mpn_toom3_sqr_n (dst, src1, size, tspace);\n  TMP_FREE;\n}\nvoid\nmpn_toom4_mul_n_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)\n{\n  mpn_toom4_mul_n (dst, src1, src2, size);\n}\nvoid\nmpn_toom4_sqr_n_fun (mp_ptr dst, mp_srcptr src1, mp_size_t size)\n{\n  mpn_toom4_sqr_n (dst, src1, size);\n}\nvoid\nmpn_toom8h_mul_fun (mp_ptr dst, mp_srcptr src1, mp_size_t size1, mp_srcptr src2, mp_size_t size2)\n{\n  mpn_toom8h_mul (dst, src1, size1, src2, size2);\n}\nvoid\nmpn_toom8_sqr_n_fun (mp_ptr dst, mp_srcptr src1, mp_size_t size)\n{\n  mpn_toom8_sqr_n (dst, src1, size);\n}\n\nmp_limb_t\numul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)\n{\n  mp_limb_t  high;\n  umul_ppmm (high, *lowptr, m1, m2);\n  return high;\n}\n\nvoid\nMPN_ZERO_fun (mp_ptr ptr, mp_size_t size)\n{ MPN_ZERO (ptr, size); }\n\nvoid\nmpn_store_fun (mp_ptr ptr, mp_size_t size,mp_limb_t val)\n{ mpn_store (ptr, size,val); }\n\nstruct choice_t {\n  const char  *name;\n  tryfun_t    function;\n  int         type;\n  mp_size_t   minsize;\n};\n\n#if HAVE_STRINGIZE\n#define TRY(fun)        #fun, (tryfun_t) fun\n#define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun\n#else\n#define TRY(fun)        \"fun\", (tryfun_t) fun\n#define TRY_FUNFUN(fun) \"fun\", (tryfun_t) fun/**/_fun\n#endif\n\nconst struct choice_t choice_array[] = {\n  { TRY(mpn_add),       TYPE_ADD    },\n  { TRY(mpn_sub),       TYPE_SUB    },\n\n  { TRY(mpn_add_n),     TYPE_ADD_N  },\n  { TRY(mpn_sub_n),     TYPE_SUB_N  },\n\n#if HAVE_NATIVE_mpn_add_nc\n  { TRY(mpn_add_nc),    TYPE_ADD_NC },\n#endif\n#if HAVE_NATIVE_mpn_sub_nc\n  { TRY(mpn_sub_nc),    TYPE_SUB_NC },\n#endif\n\n  { TRY(mpn_sumdiff_n),  TYPE_SUMDIFF_N  },\n#if HAVE_NATIVE_mpn_nsumdiff_n\n  { TRY(mpn_nsumdiff_n),  TYPE_NSUMDIFF_N  },\n#endif\n#if HAVE_NATIVE_mpn_sumdiff_nc\n  { TRY(mpn_sumdiff_nc), TYPE_SUMDIFF_NC },\n#endif\n\n  { TRY(mpn_addadd_n),  TYPE_ADDADD_N  },\n  { TRY(mpn_addsub_n),  TYPE_ADDSUB_N  },\n  { TRY(mpn_subadd_n),  TYPE_SUBADD_N  },\n\n  { TRY(mpn_addmul_1),  TYPE_ADDMUL_1  },\n  { TRY(mpn_submul_1),  TYPE_SUBMUL_1  },\n#if HAVE_NATIVE_mpn_addmul_1c\n  { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C },\n#endif\n#if HAVE_NATIVE_mpn_submul_1c\n  { TRY(mpn_submul_1c), TYPE_SUBMUL_1C },\n#endif\n\n#if HAVE_NATIVE_mpn_addmul_2\n  { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 },\n#endif\n#if HAVE_NATIVE_mpn_addmul_3\n  { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 },\n#endif\n#if HAVE_NATIVE_mpn_addmul_4\n  { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 },\n#endif\n#if HAVE_NATIVE_mpn_addmul_5\n  { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 },\n#endif\n#if HAVE_NATIVE_mpn_addmul_6\n  { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 },\n#endif\n#if HAVE_NATIVE_mpn_addmul_7\n  { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 },\n#endif\n#if HAVE_NATIVE_mpn_addmul_8\n  { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 },\n#endif\n\n  { TRY_FUNFUN(mpn_com_n),  TYPE_COM_N },\n\n  { TRY_FUNFUN(MPN_COPY),      TYPE_COPY },\n  { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI },\n  { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD },\n\n  { TRY_FUNFUN(__GMPN_COPY),      TYPE_COPY },\n#ifdef __GMPN_COPY_INCR\n  { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI },\n#endif\n\n#if HAVE_NATIVE_mpn_copyi\n  { TRY(mpn_copyi), TYPE_COPYI },\n#endif\n#if HAVE_NATIVE_mpn_copyd\n  { TRY(mpn_copyd), TYPE_COPYD },\n#endif\n\n#if HAVE_NATIVE_mpn_addlsh1_n\n  { TRY_FUNFUN(mpn_addlsh1_n), TYPE_ADDLSH1_N },\n#endif\n#if HAVE_NATIVE_mpn_sublsh1_n\n  { TRY_FUNFUN(mpn_sublsh1_n), TYPE_SUBLSH1_N },\n#endif\n#if HAVE_NATIVE_mpn_addlsh_n\n  { TRY(mpn_addlsh_n), TYPE_ADDLSH_N },\n#endif\n#if HAVE_NATIVE_mpn_sublsh_n\n  { TRY(mpn_sublsh_n), TYPE_SUBLSH_N },\n#endif\n#if HAVE_NATIVE_mpn_addlsh_nc\n  { TRY(mpn_addlsh_nc), TYPE_ADDLSH_NC },\n#endif\n#if HAVE_NATIVE_mpn_sublsh_nc\n  { TRY(mpn_sublsh_nc), TYPE_SUBLSH_NC },\n#endif\n#if HAVE_NATIVE_mpn_inclsh_n\n  { TRY_FUNFUN(mpn_inclsh_n), TYPE_INCLSH_N },\n#endif\n#if HAVE_NATIVE_mpn_declsh_n\n  { TRY_FUNFUN(mpn_declsh_n), TYPE_DECLSH_N },\n#endif\n\n#if HAVE_NATIVE_mpn_rsh1add_n\n  { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N },\n#endif\n#if HAVE_NATIVE_mpn_rsh1sub_n\n  { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N },\n#endif\n\n  { TRY_FUNFUN(mpn_and_n),  TYPE_AND_N  },\n  { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },\n  { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },\n  { TRY_FUNFUN(mpn_ior_n),  TYPE_IOR_N  },\n  { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N },\n  { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N },\n  { TRY_FUNFUN(mpn_xor_n),  TYPE_XOR_N  },\n  { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N },\n\n  { TRY(mpn_divrem_1),     TYPE_DIVREM_1 },\n  { TRY(mpn_divrem_euclidean_qr_1),     TYPE_DIVREM_EUCLIDEAN_QR_1 },\n  { TRY(mpn_divrem_euclidean_r_1),     TYPE_DIVREM_EUCLIDEAN_R_1 },\n  { TRY(mpn_divrem_hensel_qr_1),     TYPE_DIVREM_HENSEL_QR_1 },\n  { TRY(mpn_divrem_hensel_qr_1_1),     TYPE_DIVREM_HENSEL_QR_1_1 },\n  { TRY(mpn_divrem_hensel_qr_1_2),     TYPE_DIVREM_HENSEL_QR_1_2 ,2},\n  { TRY(mpn_divrem_hensel_r_1),     TYPE_DIVREM_HENSEL_R_1 },\n  { TRY(mpn_rsh_divrem_hensel_qr_1),     TYPE_RSH_DIVREM_HENSEL_QR_1 },\n  { TRY(mpn_rsh_divrem_hensel_qr_1_1),     TYPE_RSH_DIVREM_HENSEL_QR_1_1 },\n  { TRY(mpn_rsh_divrem_hensel_qr_1_2),     TYPE_RSH_DIVREM_HENSEL_QR_1_2 ,3},\n  \n  { TRY(mpn_divrem_hensel_rsh_qr_1),     TYPE_DIVREM_HENSEL_RSH_QR_1 },\n\n  { TRY(mpn_add_err1_n),\tTYPE_ADDERR1_N},  \n  { TRY(mpn_sub_err1_n),\tTYPE_SUBERR1_N},\n  { TRY(mpn_add_err2_n),\tTYPE_ADDERR2_N},  \n  { TRY(mpn_sub_err2_n),\tTYPE_SUBERR2_N},\n#if USE_PREINV_DIVREM_1\n  { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 },\n#endif\n  { TRY(mpn_mod_1),        TYPE_MOD_1 },\n#if USE_PREINV_MOD_1\n  { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 },\n#endif\n#if HAVE_NATIVE_mpn_divrem_1c\n  { TRY(mpn_divrem_1c),    TYPE_DIVREM_1C },\n#endif\n#if HAVE_NATIVE_mpn_mod_1c\n  { TRY(mpn_mod_1c),       TYPE_MOD_1C },\n#endif\n#if GMP_NUMB_BITS % 4 == 0\n  { TRY(mpn_mod_34lsub1),  TYPE_MOD_34LSUB1 },\n#endif\n\n  { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 },\n#if HAVE_NATIVE_mpn_udiv_qrnnd\n  { TRY(mpn_udiv_qrnnd),    TYPE_UDIV_QRNND, 2 },\n#endif\n#if HAVE_NATIVE_mpn_udiv_qrnnd_r\n  { TRY(mpn_udiv_qrnnd_r),  TYPE_UDIV_QRNND_R, 2 },\n#endif\n\n  { TRY(mpn_divexact_1),          TYPE_DIVEXACT_1 },\n  { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },\n  { TRY(mpn_divexact_byff),       TYPE_DIVEXACT_BYFF },\n  { TRY(mpn_divexact_byfobm1),    TYPE_DIVEXACT_BYFOBM1 },\n  \n  { TRY_FUNFUN(mpn_lshift1),\t  TYPE_LSHIFT1 },\n  { TRY_FUNFUN(mpn_rshift1),\t  TYPE_RSHIFT1 },\n  { TRY_FUNFUN(mpn_lshift2),\t  TYPE_LSHIFT2 },\n  { TRY_FUNFUN(mpn_rshift2),\t  TYPE_RSHIFT2 },\n  { TRY(mpn_divexact_by3c),       TYPE_DIVEXACT_BY3C },\n\n  { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD },\n  { TRY(mpn_modexact_1c_odd),       TYPE_MODEXACT_1C_ODD },\n\n  { TRY(mpn_tdiv_qr),      TYPE_TDIV_QR },\n  { TRY(mpn_tdiv_q),      TYPE_TDIV_Q },\n\n  { TRY(mpn_mul_1),      TYPE_MUL_1 },\n#if HAVE_NATIVE_mpn_mul_1c\n  { TRY(mpn_mul_1c),     TYPE_MUL_1C },\n#endif\n#if HAVE_NATIVE_mpn_mul_2\n  { TRY(mpn_mul_2),      TYPE_MUL_2, 2 },\n#endif\n\n  { TRY(mpn_rshift),     TYPE_RSHIFT },\n  { TRY(mpn_lshift),     TYPE_LSHIFT },\n#if HAVE_NATIVE_mpn_lshiftc\n  { TRY(mpn_lshiftc),     TYPE_LSHIFTC },\n#endif\n\n  { TRY(mpn_mul_basecase), TYPE_MUL_BASECASE },\n  { TRY(mpn_redc_1), TYPE_REDC_BASECASE },\n#if SQR_KARATSUBA_THRESHOLD > 0\n  { TRY(mpn_sqr_basecase), TYPE_SQR },\n#endif\n\n  { TRY(mpn_mul),    TYPE_MUL_BASECASE },\n  { TRY(mpn_mul_n),  TYPE_MUL_N },\n  { TRY(mpn_sqr),  TYPE_SQR },\n  { TRY(mpn_mulmid_basecase), TYPE_MULMID_BASECASE },\n  { TRY(mpn_mulmid), TYPE_MULMID },\n  { TRY(mpn_mulmid_n), TYPE_MULMID_N },\n  \n  { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 },\n#if HAVE_NATIVE_mpn_umul_ppmm\n  { TRY(mpn_umul_ppmm),    TYPE_UMUL_PPMM, 2 },\n#endif\n#if HAVE_NATIVE_mpn_umul_ppmm_r\n  { TRY(mpn_umul_ppmm_r),  TYPE_UMUL_PPMM_R, 2 },\n#endif\n\n  { TRY_FUNFUN(mpn_kara_mul_n),  TYPE_MUL_N, MPN_KARA_MUL_N_MINSIZE },\n  { TRY_FUNFUN(mpn_kara_sqr_n),  TYPE_SQR,   MPN_KARA_SQR_N_MINSIZE },\n  { TRY_FUNFUN(mpn_toom3_mul_n), TYPE_MUL_N, MPN_TOOM3_MUL_N_MINSIZE },\n  { TRY_FUNFUN(mpn_toom4_mul_n), TYPE_MUL_N, MPN_TOOM4_MUL_N_MINSIZE },\n  { TRY_FUNFUN(mpn_toom8h_mul), TYPE_MUL_BASECASE, MPN_TOOM8H_MUL_MINSIZE },\n  { TRY_FUNFUN(mpn_toom3_sqr_n), TYPE_SQR,   MPN_TOOM3_SQR_N_MINSIZE },\n  { TRY_FUNFUN(mpn_toom4_sqr_n), TYPE_SQR,   MPN_TOOM4_SQR_N_MINSIZE },\n  { TRY_FUNFUN(mpn_toom8_sqr_n), TYPE_SQR,   MPN_TOOM8_SQR_N_MINSIZE },\n\n  { TRY(mpn_gcd_1),        TYPE_GCD_1            },\n  { TRY(mpn_gcd),          TYPE_GCD              },\n  { TRY(mpz_jacobi),       TYPE_MPZ_JACOBI       },\n  { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },\n  { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },\n  { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },\n  { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER },\n\n  { TRY_FUNFUN(mpn_not),   TYPE_NOT },\n  { TRY_FUNFUN(mpn_double),   TYPE_DOUBLE },\n  { TRY_FUNFUN(mpn_half),   TYPE_HALF },\n  \n  { TRY(mpn_popcount),   TYPE_POPCOUNT },\n  { TRY(mpn_hamdist),    TYPE_HAMDIST },\n\n  { TRY(mpn_sqrtrem),    TYPE_SQRTREM },\n\n  { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO },\n  { TRY_FUNFUN(mpn_store), TYPE_STORE },\n\n  { TRY(mpn_get_str),    TYPE_GET_STR },\n\n#ifdef EXTRA_ROUTINES\n  EXTRA_ROUTINES\n#endif\n};\n\nconst struct choice_t *choice = NULL;\n\n\nvoid\nmprotect_maybe (void *addr, size_t len, int prot)\n{\n  if (!option_redzones)\n    return;\n\n#if HAVE_MPROTECT\n  if (mprotect (addr, len, prot) != 0)\n    {\n      fprintf (stderr, \"Cannot mprotect %p 0x%X 0x%X: %s\\n\",\n\t       addr, len, prot, strerror (errno));\n      exit (1);\n    }\n#else\n  {\n    static int  warned = 0;\n    if (!warned)\n      {\n\tfprintf (stderr,\n\t\t \"mprotect not available, bounds testing not performed\\n\");\n\twarned = 1;\n      }\n  }\n#endif\n}\n\n/* round \"a\" up to a multiple of \"m\" */\nsize_t\nround_up_multiple (size_t a, size_t m)\n{\n  unsigned long  r;\n\n  r = a % m;\n  if (r == 0)\n    return a;\n  else\n    return a + (m - r);\n}\n\n\n/* On some systems it seems that only an mmap'ed region can be mprotect'ed,\n   for instance HP-UX 10.\n\n   mmap will almost certainly return a pointer already aligned to a page\n   boundary, but it's easy enough to share the alignment handling with the\n   malloc case. */\n\nvoid\nmalloc_region (struct region_t *r, mp_size_t n)\n{\n  mp_ptr  p;\n  size_t  nbytes;\n\n  ASSERT ((pagesize % BYTES_PER_MP_LIMB) == 0);\n\n  n = round_up_multiple (n, PAGESIZE_LIMBS);\n  r->size = n;\n\n  nbytes = n*BYTES_PER_MP_LIMB + 2*REDZONE_BYTES + pagesize;\n\n#if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON)\n#define MAP_ANON  MAP_ANONYMOUS\n#endif\n\n#if HAVE_MMAP && defined (MAP_ANON)\n  /* note must pass fd=-1 for MAP_ANON on BSD */\n  p = (mp_ptr)mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);\n  if (p == (void *) -1)\n    {\n      fprintf (stderr, \"Cannot mmap %#x anon bytes: %s\\n\",\n\t       nbytes, strerror (errno));\n      exit (1);\n    }\n#else\n  p = (mp_ptr) malloc (nbytes);\n  ASSERT_ALWAYS (p != NULL);\n#endif\n\n  p = align_pointer (p, pagesize);\n\n  mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);\n  p += REDZONE_LIMBS;\n  r->ptr = p;\n\n  mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE);\n}\n\nvoid\nmprotect_region (const struct region_t *r, int prot)\n{\n  mprotect_maybe (r->ptr, r->size, prot);\n}\n\n\n/* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3,\n   and CARRY_4 */\nmp_limb_t  carry_array[] = {\n  0, 1, 2, 3,\n  4,\n  CNST_LIMB(1) << 8,\n  CNST_LIMB(1) << 16,\n  GMP_NUMB_MAX\n};\nint        carry_index;\n\n#define CARRY_COUNT                                             \\\n  ((tr->carry == CARRY_BIT) ? 2                                 \\\n   : tr->carry == CARRY_3   ? 3                                 \\\n   : tr->carry == CARRY_4   ? 4                                 \\\n   : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR)    \\\n     ? numberof(carry_array) + CARRY_RANDOMS                    \\\n   : 1)\n\n#define MPN_RANDOM_ALT(index,dst,size) \\\n  (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size))\n\n/* The dummy value after MPN_RANDOM_ALT ensures both sides of the \":\" have\n   the same type */\n#define CARRY_ITERATION                                                 \\\n  for (carry_index = 0;                                                 \\\n       (carry_index < numberof (carry_array)                            \\\n\t? (carry = carry_array[carry_index])                            \\\n\t: (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)),    \\\n\t (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0),           \\\n\t carry_index < CARRY_COUNT;                                     \\\n       carry_index++)\n\n\nmp_limb_t  multiplier_array[] = {\n  0, 1, 2, 3,\n  CNST_LIMB(1) << 8,\n  CNST_LIMB(1) << 16,\n  GMP_NUMB_MAX - 2,\n  GMP_NUMB_MAX - 1,\n  GMP_NUMB_MAX\n};\nint        multiplier_index;\n\nmp_limb_t  divisor_array[] = {\n  1, 2, 3,\n  CNST_LIMB(1) << 8,\n  CNST_LIMB(1) << 16,\n  CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1),\n  GMP_NUMB_MAX >> (GMP_NUMB_BITS/2),\n  GMP_NUMB_HIGHBIT,\n  GMP_NUMB_HIGHBIT + 1,\n  GMP_NUMB_MAX - 2,\n  GMP_NUMB_MAX - 1,\n  GMP_NUMB_MAX\n};\n\nint        divisor_index;\n\nmp_limb_t  altdiv_array[]={1,3,5,15,17,51,85,255,65535,\nGMP_NUMB_MAX/1,GMP_NUMB_MAX/3,GMP_NUMB_MAX/5,GMP_NUMB_MAX/15,\nGMP_NUMB_MAX/17,GMP_NUMB_MAX/51,GMP_NUMB_MAX/85,GMP_NUMB_MAX/255,GMP_NUMB_MAX/65535};\n\nint \taltdiv_index;\n\n/* The dummy value after MPN_RANDOM_ALT ensures both sides of the \":\" have\n   the same type */\n#define ARRAY_ITERATION(var, index, limit, array, randoms, cond)        \\\n  for (index = 0;                                                       \\\n       (index < numberof (array)                                        \\\n\t? (var = array[index])                                          \\\n\t: (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)),            \\\n       index < limit;                                                   \\\n       index++)\n\n#define MULTIPLIER_COUNT                                \\\n  (tr->multiplier                                       \\\n    ? numberof (multiplier_array) + MULTIPLIER_RANDOMS  \\\n    : 1)\n\n#define MULTIPLIER_ITERATION                                            \\\n  ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT,       \\\n\t\t  multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)\n\n#define DIVISOR_COUNT                           \\\n  (tr->divisor == 0 ? 1 :\t\t\t\\\n   tr->divisor == DIVISOR_DIVBM1 \t\t\\\n   ? 1\t\t\t \t\t\t\\\n   : numberof (divisor_array) + DIVISOR_RANDOMS )\n\n#define ALTDIV_COUNT                           \\\n  (tr->divisor == 0 ? 1 :\t\t\t\\\n   tr->divisor == DIVISOR_DIVBM1 \t\t\\\n   ? numberof (divisor_array) : 1 )\n\n#define DIVISOR_ITERATION                                               \\\n  ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT,divisor_array , \\\n\t\t  DIVISOR_RANDOMS, TRY_DIVISOR)\n\n\n#define ALTDIV_ITERATION                                        \\\n  ARRAY_ITERATION(altdiv, altdiv_index, ALTDIV_COUNT, altdiv_array, \\\n\t\t  0 , TRY_DIVISOR)\n\n\n/* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping\n   d[0] or d[1] respectively, -1 means a separate (write-protected)\n   location. */\n\nstruct overlap_t {\n  int  s[NUM_SOURCES];\n} overlap_array[] = {\n  { { -1, -1, -1 } },\n  { {  0, -1, -1 } },\n  { { -1,  0, -1 } },\n  { {  0,  0, -1 } },\n  { {  1, -1, -1 } },\n  { { -1,  1, -1 } },\n  { {  1,  1, -1 } },\n  { {  0,  1, -1 } },\n  { {  1,  0, -1 } },\n  { { -1, -1, 0 } },\n  { {  0, -1, 0 } },\n  { { -1,  0, 0 } },\n  { {  0,  0, 0 } },\n  { {  1, -1, 0 } },\n  { { -1,  1, 0 } },\n  { {  1,  1, 0 } },\n  { {  0,  1, 0 } },\n  { {  1,  0, 0 } },\n  { { -1, -1, 1 } },\n  { {  0, -1, 1 } },\n  { { -1,  0, 1 } },\n  { {  0,  0, 1 } },\n  { {  1, -1, 1 } },\n  { { -1,  1, 1 } },\n  { {  1,  1, 1 } },\n  { {  0,  1, 1 } },\n  { {  1,  0, 1 } },\n};\n\nstruct overlap_t  *overlap, *overlap_limit;\n\n/* \n   This is a count of the number of overlaps from the above table to try. \n   Each source operand can be overlapped with each destination operand (which \n\tare fixed and cannot be overlapped) or put in a non-overlapping block all\n\tto itself. Some functions require that source operands don't overlap. They\n\tcan't go beyond the first three entries of the table, as after that, this \n\tstarts to happen.\n\n\tThree source operands are available, but only those which are used by the\n\tfunction are actually filled with data and made part of the test. The rest\n\tare ignored.\n*/\n\n#define OVERLAP_COUNT                   \\\n  (tr->overlap & OVERLAP_NONE       ? 1 \\\n   : tr->overlap & OVERLAP_NOT_SRCS ? 3 \\\n   : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \\\n   : tr->dst[1]                     ? 9 \\\n   : tr->src[2]                     ? 27 \\\n   : tr->src[1]                     ? 4 \\\n   : tr->dst[0]                     ? 2 \\\n   : 1)\n\n#define OVERLAP_ITERATION                               \\\n  for (overlap = &overlap_array[0],                     \\\n    overlap_limit = &overlap_array[OVERLAP_COUNT];      \\\n    overlap < overlap_limit;                            \\\n    overlap++)\n\n\nint  base = 10;\n\n#define T_RAND_COUNT  2\nint  t_rand;\n\nvoid\nt_random (mp_ptr ptr, mp_size_t n)\n{\n  if (n == 0)\n    return;\n\n  switch (option_data) {\n  case DATA_TRAND:\n    switch (t_rand) {\n    case 0: refmpn_random (ptr, n); break;\n    case 1: refmpn_random2 (ptr, n); break;\n    default: abort();\n    }\n    break;\n  case DATA_SEQ:\n    {\n      static mp_limb_t  counter = 0;\n      mp_size_t  i;\n      for (i = 0; i < n; i++)\n\tptr[i] = ++counter;\n    }\n    break;\n  case DATA_ZEROS:\n    refmpn_zero (ptr, n);\n    break;\n  case DATA_FFS:\n    refmpn_fill (ptr, n, GMP_NUMB_MAX);\n    break;\n  case DATA_2FD:\n    /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,\n       inducing the q1_ff special case in the mul-by-inverse part of some\n       versions of divrem_1 and mod_1. */\n    refmpn_fill (ptr, n, (mp_limb_t) -1);\n    ptr[n-1] = 2;\n    ptr[0] -= 2;\n    break;\n\n  default:\n    abort();\n  }\n}\n#define T_RAND_ITERATION \\\n  for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)\n\n\nvoid\nprint_each (const struct each_t *e)\n{\n  int  i;\n\n  printf (\"%s %s\\n\", e->name, e == &ref ? tr->reference_name : choice->name);\n  if (tr->retval)\n    mpn_trace (\"   retval\", &e->retval, 1);\n\n  for (i = 0; i < NUM_DESTS; i++)\n    {\n      if (tr->dst[i])\n\t{\n\t  if (tr->dst_bytes[i])\n\t    byte_tracen (\"   d[%d]\", i, e->d[i].p, d[i].size);\n\t  else\n\t    mpn_tracen (\"   d[%d]\", i, e->d[i].p, d[i].size);\n\t  printf (\"        located %p\\n\", e->d[i].p);\n\t}\n    }\n\n  for (i = 0; i < NUM_SOURCES; i++)\n    if (tr->src[i])\n      printf (\"   s[%d] located %p\\n\", i, e->s[i].p);\n}\n\n\nvoid\nprint_all (void)\n{\n  int  i;\n\n  printf (\"\\n\");\n  printf (\"size  %ld\\n\", (long) size);\n  if (tr->size2)\n    printf (\"size2 %ld\\n\", (long) size2);\n\n  for (i = 0; i < NUM_DESTS; i++)\n    if (d[i].size != size)\n      printf (\"d[%d].size %ld\\n\", i, (long) d[i].size);\n\n  if (tr->multiplier)\n    mpn_trace (\"   multiplier\", &multiplier, 1);\n  if (tr->divisor)\n    mpn_trace (\"   divisor\", &divisor, 1);\n  if (tr->shift)\n    printf (\"   shift %lu\\n\", shift);\n  if (tr->carry)\n    mpn_trace (\"   carry\", &carry, 1);\n  if (tr->msize)\n    mpn_trace (\"   multiplier_N\", multiplier_N, tr->msize);\n\n  for (i = 0; i < NUM_DESTS; i++)\n    if (tr->dst[i])\n      printf (\"   d[%d] %s, align %ld, size %ld\\n\",\n\t      i, d[i].high ? \"high\" : \"low\",\n\t      (long) d[i].align, (long) d[i].size);\n\n  for (i = 0; i < NUM_SOURCES; i++)\n    {\n      if (tr->src[i])\n\t{\n\t  printf (\"   s[%d] %s, align %ld, \",\n\t\t  i, s[i].high ? \"high\" : \"low\", (long) s[i].align);\n\t  switch (overlap->s[i]) {\n\t  case -1:\n\t    printf (\"no overlap\\n\");\n\t    break;\n\t  default:\n\t    printf (\"==d[%d]%s\\n\",\n\t\t    overlap->s[i],\n\t\t    tr->overlap == OVERLAP_LOW_TO_HIGH ? \"+a\"\n\t\t    : tr->overlap == OVERLAP_HIGH_TO_LOW ? \"-a\"\n\t\t    : \"\");\n\t    break;\n\t  }\n\t  printf (\"   s[%d]=\", i);\n\t  if (tr->carry_sign && (carry & (1 << i)))\n\t    printf (\"-\");\n\t  mpn_trace (NULL, s[i].p, SRC_SIZE(i));\n\t}\n    }\n\n  if (tr->dst0_from_src1)\n    mpn_trace (\"   d[0]\", s[1].region.ptr, size);\n\n  if (tr->reference)\n    print_each (&ref);\n  print_each (&fun);\n}\n\nvoid\ncompare (void)\n{\n  int  error = 0;\n  int  i;\n\n  if (tr->retval && ref.retval != fun.retval)\n    {\n      gmp_printf (\"Different return values (%Mu, %Mu)\\n\",\n\t\t  ref.retval, fun.retval);\n      error = 1;\n    }\n\n  for (i = 0; i < NUM_DESTS; i++)\n    {\n      switch (tr->dst_size[i]) {\n      case SIZE_RETVAL:\n      case SIZE_GET_STR:\n\td[i].size = ref.retval;\n\tbreak;\n      }\n    }\n\n  for (i = 0; i < NUM_DESTS; i++)\n    {\n      if (! tr->dst[i])\n\tcontinue;\n\n      if (tr->dst_bytes[i])\n\t{\n\t  if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0)\n\t    {\n\t      printf (\"Different d[%d] data results, low diff at %ld, high diff at %ld\\n\",\n\t\t      i,\n\t\t      (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),\n\t\t      (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));\n\t      error = 1;\n\t    }\n\t}\n      else\n\t{\n\t  if (d[i].size != 0\n\t      && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size))\n\t    {\n\t      printf (\"Different d[%d] data results, low diff at %ld, high diff at %ld\\n\",\n\t\t      i,\n\t\t      (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),\n\t\t      (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));\n\t      error = 1;\n\t    }\n\t}\n    }\n\n  if (error)\n    {\n      print_all();\n      abort();\n    }\n}\n\n\n/* The functions are cast if the return value should be a long rather than\n   the default mp_limb_t.  This is necessary under _LONG_LONG_LIMB.  This\n   might not be enough if some actual calling conventions checking is\n   implemented on a long long limb system.  */\n\nvoid\ncall (struct each_t *e, tryfun_t function)\n{\n  switch (choice->type) {\n  case TYPE_ADD:\n  case TYPE_SUB:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);\n    break;\n\n  case TYPE_ADDERR1_N:\n  case TYPE_SUBERR1_N:\n     e->retval =CALLING_CONVENTIONS(function)\n         (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p , size,carry);\n     break;\n  case TYPE_ADDERR2_N:\n  case TYPE_SUBERR2_N:\n     e->retval =CALLING_CONVENTIONS(function)\n         (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p ,e->s[3].p, size,carry);\n     break;\n  \n  case TYPE_ADD_N:\n  case TYPE_SUB_N:\n  case TYPE_ADDLSH1_N:\n  case TYPE_SUBLSH1_N:\n  case TYPE_RSH1ADD_N:\n  case TYPE_RSH1SUB_N:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, e->s[1].p, size);\n    break;\n  case TYPE_ADDLSH_N:\n  case TYPE_SUBLSH_N:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, e->s[1].p, size,shift);\n    break;\n  case TYPE_ADDLSH_NC:\n  case TYPE_SUBLSH_NC:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, e->s[1].p, size,shift,carry);\n    break;\n  case TYPE_INCLSH_N:\n  case TYPE_DECLSH_N:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size,shift);\n    break;\n  case TYPE_ADD_NC:\n  case TYPE_SUB_NC:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);\n    break;\n\n  case TYPE_MUL_1:\n  case TYPE_ADDMUL_1:\n  case TYPE_SUBMUL_1:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size, multiplier);\n    break;\n  case TYPE_MUL_1C:\n  case TYPE_ADDMUL_1C:\n  case TYPE_SUBMUL_1C:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size, multiplier, carry);\n    break;\n\n  case TYPE_MUL_2:\n    if (size == 1)\n      abort ();\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size, e->s[1].p);\n    break;\n\n  case TYPE_ADDMUL_2:\n  case TYPE_ADDMUL_3:\n  case TYPE_ADDMUL_4:\n  case TYPE_ADDMUL_5:\n  case TYPE_ADDMUL_6:\n  case TYPE_ADDMUL_7:\n  case TYPE_ADDMUL_8:\n    if (size == 1)\n      abort ();\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size, multiplier_N);\n    break;\n\n  case TYPE_AND_N:\n  case TYPE_ANDN_N:\n  case TYPE_NAND_N:\n  case TYPE_IOR_N:\n  case TYPE_IORN_N:\n  case TYPE_NIOR_N:\n  case TYPE_XOR_N:\n  case TYPE_XNOR_N:\n    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);\n    break;\n\n  case TYPE_SUMDIFF_N:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);\n    break;\n  case TYPE_NSUMDIFF_N:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);\n    break;\n  case TYPE_SUMDIFF_NC:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);\n    break;\n\n  case TYPE_ADDSUB_N:\n    e->retval = (int)CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, e->s[1].p, e->s[2].p,size);\n    break;\n  case TYPE_ADDADD_N:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, e->s[1].p, e->s[2].p,size);\n    break;\n  case TYPE_SUBADD_N:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, e->s[1].p, e->s[2].p,size);\n    break;\n    \n\n  case TYPE_COPY:\n  case TYPE_COPYI:\n  case TYPE_COPYD:\n  case TYPE_COM_N:\n    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);\n    break;\n\n\n  case TYPE_DIVEXACT_BY3:\n    e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);\n    break;\n  case TYPE_DIVEXACT_BYFF:\n    e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);\n    break;\n\n  case TYPE_LSHIFT1:\n  case TYPE_RSHIFT1:\n  case TYPE_LSHIFT2:\n  case TYPE_RSHIFT2:\n    e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);\n    break;    \n\n  case TYPE_DIVEXACT_BY3C:\n    e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,\n\t\t\t\t\t\tcarry);\n    break;\n\n\n  case TYPE_DIVREM_HENSEL_QR_1:\n  case TYPE_DIVREM_HENSEL_QR_1_1:\n  case TYPE_DIVREM_HENSEL_QR_1_2:\n  case TYPE_DIVMOD_1:\n  case TYPE_DIVEXACT_1:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size, divisor);\n    break;\n\n  case TYPE_RSH_DIVREM_HENSEL_QR_1:\n  case TYPE_RSH_DIVREM_HENSEL_QR_1_1:\n  case TYPE_RSH_DIVREM_HENSEL_QR_1_2:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size, divisor,shift,carry);\n    break;\n\n  case TYPE_DIVREM_HENSEL_RSH_QR_1:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size, divisor,shift);\n    break;\n  case TYPE_DIVEXACT_BYFOBM1:\n    e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, altdiv,GMP_NUMB_MAX/altdiv);\n    break;\n  case TYPE_DIVMOD_1C:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size, divisor, carry);\n    break;\n  case TYPE_DIVREM_EUCLIDEAN_QR_1:\n  case TYPE_DIVREM_1:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, size2, e->s[0].p, size, divisor);\n    break;\n  case TYPE_DIVREM_1C:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, size2, e->s[0].p, size, divisor, carry);\n    break;\n  case TYPE_PREINV_DIVREM_1:\n    {\n      mp_limb_t  dinv;\n      unsigned   shift;\n      shift = refmpn_count_leading_zeros (divisor);\n      dinv = refmpn_invert_limb (divisor << shift);\n      e->retval = CALLING_CONVENTIONS (function)\n\t(e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift);\n    }\n    break;\n  case TYPE_DIVREM_HENSEL_R_1:\n  case TYPE_DIVREM_EUCLIDEAN_R_1:\n  case TYPE_MOD_1:\n  case TYPE_MODEXACT_1_ODD:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->s[0].p, size, divisor);\n    break;\n  case TYPE_MOD_1C:\n  case TYPE_MODEXACT_1C_ODD:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->s[0].p, size, divisor, carry);\n    break;\n  case TYPE_PREINV_MOD_1:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->s[0].p, size, divisor, refmpn_invert_limb (divisor));\n    break;\n  case TYPE_MOD_34LSUB1:\n    e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);\n    break;\n\n  case TYPE_UDIV_QRNND:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor);\n    break;\n  case TYPE_UDIV_QRNND_R:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p);\n    break;\n\n  case TYPE_SB_DIVREM_MN:\n    refmpn_copyi (e->d[1].p, e->s[0].p, size);        /* dividend */\n    refmpn_fill (e->d[0].p, size-size2, 0x98765432);  /* quotient */\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->d[1].p, size, e->s[1].p, size2);\n    refmpn_zero (e->d[1].p+size2, size-size2);    /* excess over remainder */\n    break;\n  case TYPE_TDIV_QR:\n    CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0,\n\t\t\t\t    e->s[0].p, size, e->s[1].p, size2);\n    break;\ncase TYPE_TDIV_Q:\n    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, \n                                 size, e->s[1].p, size2);\n    break;\n\n  case TYPE_GCD_1:\n    /* Must have a non-zero src, but this probably isn't the best way to do\n       it. */\n    if (refmpn_zero_p (e->s[0].p, size))\n      e->retval = 0;\n    else\n      e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);\n    break;\n\n  case TYPE_GCD:\n    /* Sources are destroyed, so they're saved and replaced, but a general\n       approach to this might be better.  Note that it's still e->s[0].p and\n       e->s[1].p that are passed, to get the desired alignments. */\n    {\n      mp_ptr  s0 = refmpn_malloc_limbs (size);\n      mp_ptr  s1 = refmpn_malloc_limbs (size2);\n      refmpn_copyi (s0, e->s[0].p, size);\n      refmpn_copyi (s1, e->s[1].p, size2);\n\n      mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);\n      mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);\n      e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,\n\t\t\t\t\t\t  e->s[0].p, size,\n\t\t\t\t\t\t  e->s[1].p, size2);\n      refmpn_copyi (e->s[0].p, s0, size);\n      refmpn_copyi (e->s[1].p, s1, size2);\n      free (s0);\n      free (s1);\n    }\n    break;\n\n  case TYPE_GCD_FINDA:\n    {\n      /* FIXME: do this with a flag */\n      mp_limb_t  c[2];\n      c[0] = e->s[0].p[0];\n      c[0] += (c[0] == 0);\n      c[1] = e->s[0].p[0];\n      c[1] += (c[1] == 0);\n      e->retval = CALLING_CONVENTIONS (function) (c);\n    }\n    break;\n\n  case TYPE_MPZ_JACOBI:\n  case TYPE_MPZ_KRONECKER:\n    {\n      mpz_t  a, b;\n      PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size);\n      PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2);\n      e->retval = CALLING_CONVENTIONS (function) (a, b);\n    }\n    break;\n  case TYPE_MPZ_KRONECKER_UI:\n    {\n      mpz_t  a;\n      PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);\n      e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier);\n    }\n    break;\n  case TYPE_MPZ_KRONECKER_SI:\n    {\n      mpz_t  a;\n      PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);\n      e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier);\n    }\n    break;\n  case TYPE_MPZ_UI_KRONECKER:\n    {\n      mpz_t  b;\n      PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);\n      e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b);\n    }\n    break;\n  case TYPE_MPZ_SI_KRONECKER:\n    {\n      mpz_t  b;\n      PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);\n      e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b);\n    }\n    break;\n\n  case TYPE_MUL_BASECASE:\n    CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);\n    break;\n  case TYPE_MULMID_BASECASE:\n    CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);\n    break;\n  case TYPE_MULMID:\n    CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);\n    break;\n  case TYPE_MULMID_N:\n    CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[1].p, e->s[0].p, size);\n    break;\n  case TYPE_REDC_BASECASE:\n    /* Sources are destroyed, so they're saved and replaced, but a general\n       approach to this might be better.  Note that it's still e->s[0].p and\n       e->s[1].p that are passed, to get the desired alignments. */\n    {\n      mp_limb_t Np;\n      mp_ptr  s0 = refmpn_malloc_limbs (size);\n      mp_ptr  s1 = refmpn_malloc_limbs (size2);\n      modlimb_invert(Np,e->s[0].p[0]);\n      Np=-Np;\n      refmpn_copyi (s0, e->s[0].p, size);\n      refmpn_copyi (s1, e->s[1].p, size2);\n\n      mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);\n      mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);\n      e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,e->s[1].p,\n\t\t\t\t\t\t  e->s[0].p, size,Np);\n      refmpn_copyi (e->s[0].p, s0, size);\n      refmpn_copyi (e->s[1].p, s1, size2);\n      free (s0);\n      free (s1);\n    }\n  \n    break;\n  case TYPE_MUL_N:\n    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);\n    break;\n  case TYPE_SQR:\n    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);\n    break;\n\n  case TYPE_UMUL_PPMM:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p[0], e->s[0].p[1]);\n    break;\n  case TYPE_UMUL_PPMM_R:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->s[0].p[0], e->s[0].p[1], e->d[0].p);\n    break;\n\n  case TYPE_LSHIFTC:\n  case TYPE_LSHIFT:\n  case TYPE_RSHIFT:\n    e->retval = CALLING_CONVENTIONS (function)\n      (e->d[0].p, e->s[0].p, size, shift);\n    break;\n\n  case TYPE_NOT:\n    \t CALLING_CONVENTIONS (function) (e->d[0].p, size);\n    break;\n  case TYPE_HALF:\n  case TYPE_DOUBLE:\n    \t e->retval=CALLING_CONVENTIONS (function) (e->d[0].p, size);\n    break;\n  case TYPE_POPCOUNT:\n    e->retval = (* (unsigned long (*)(ANYARGS))\n\t\t CALLING_CONVENTIONS (function)) (e->s[0].p, size);\n    break;\n  case TYPE_HAMDIST:\n    e->retval = (* (unsigned long (*)(ANYARGS))\n\t\t CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size);\n    break;\n\n  case TYPE_SQRTREM:\n    e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))\n      (e->d[0].p, e->d[1].p, e->s[0].p, size);\n    break;\n\n  case TYPE_ZERO:\n    CALLING_CONVENTIONS (function) (e->d[0].p, size);\n    break;\n  case TYPE_STORE:\n    CALLING_CONVENTIONS (function) (e->d[0].p, size,4354);\n    break;\n\n  case TYPE_GET_STR:\n    {\n      size_t  sizeinbase, fill;\n      char    *dst;\n      MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base);\n      ASSERT_ALWAYS (sizeinbase <= d[0].size);\n      fill = d[0].size - sizeinbase;\n      if (d[0].high)\n\t{\n\t  memset (e->d[0].p, 0xBA, fill);\n\t  dst = (char *) e->d[0].p + fill;\n\t}\n      else\n\t{\n\t  dst = (char *) e->d[0].p;\n\t  memset (dst + sizeinbase, 0xBA, fill);\n\t}\n      if (POW2_P (base))\n\t{\n\t  e->retval = CALLING_CONVENTIONS (function) (dst, base,\n\t\t\t\t\t\t      e->s[0].p, size);\n\t}\n      else\n\t{\n\t  refmpn_copy (e->d[1].p, e->s[0].p, size);\n\t  e->retval = CALLING_CONVENTIONS (function) (dst, base,\n\t\t\t\t\t\t      e->d[1].p, size);\n\t}\n      refmpn_zero (e->d[1].p, size);  /* cloberred or unused */\n    }\n    break;\n\n#ifdef EXTRA_CALL\n    EXTRA_CALL\n#endif\n\n  default:\n    printf (\"Unknown routine type %d\\n\", choice->type);\n    abort ();\n    break;\n  }\n}\n\n\nvoid\npointer_setup (struct each_t *e)\n{\n  int  i, j;\n\n  for (i = 0; i < NUM_DESTS; i++)\n    {\n      switch (tr->dst_size[i]) {\n      case 0:\n      case SIZE_RETVAL: /* will be adjusted later */\n\td[i].size = size;\n\tbreak;\n\n      case SIZE_1:\n\td[i].size = 1;\n\tbreak;\n      case SIZE_2:\n\td[i].size = 2;\n\tbreak;\n      case SIZE_3:\n\td[i].size = 3;\n\tbreak;\n      case SIZE_4:\n\td[i].size = 4;\n\tbreak;\n\n      case SIZE_PLUS_1:\n\td[i].size = size+1;\n\tbreak;\n      case SIZE_PLUS_2:\n\td[i].size = size+2;\n\tbreak;\n      case SIZE_PLUS_MSIZE_SUB_1:\n\td[i].size = size + tr->msize - 1;\n\tbreak;\n\n      case SIZE_SUM:\n\tif (tr->size2)\n\t  d[i].size = size + size2;\n\telse\n\t  d[i].size = 2*size;\n\tbreak;\n\n      case SIZE_SIZE2:\n\td[i].size = size2;\n\tbreak;\n\n      case SIZE_DIFF:\n\td[i].size = size - size2;\n\tbreak;\n\n      case SIZE_DIFF_PLUS_1:\n\td[i].size = size - size2 + 1;\n\tbreak;\n\n      case SIZE_DIFF_PLUS_3:\n\td[i].size = size - size2 + 3;\n\tbreak;\n\n      case SIZE_CEIL_HALF:\n\td[i].size = (size+1)/2;\n\tbreak;\n\n      case SIZE_GET_STR:\n\t{\n\t  mp_limb_t ff = GMP_NUMB_MAX;\n\t  MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base);\n\t}\n\tbreak;\n\n      default:\n\tprintf (\"Unrecognised dst_size type %d\\n\", tr->dst_size[i]);\n\tabort ();\n      }\n    }\n\n  /* establish e->d[].p destinations */\n  for (i = 0; i < NUM_DESTS; i++)\n    {\n      mp_size_t  offset = 0;\n\n      /* possible room for overlapping sources */\n      for (j = 0; j < numberof (overlap->s); j++)\n\tif (overlap->s[j] == i)\n\t  offset = MAX (offset, s[j].align);\n\n      if (d[i].high)\n\t{\n\t  if (tr->dst_bytes[i])\n\t    {\n\t      e->d[i].p = (mp_ptr)\n\t\t((char *) (e->d[i].region.ptr + e->d[i].region.size)\n\t\t - d[i].size - d[i].align);\n\t    }\n\t  else\n\t    {\n\t      e->d[i].p = e->d[i].region.ptr + e->d[i].region.size\n\t\t- d[i].size - d[i].align;\n\t      if (tr->overlap == OVERLAP_LOW_TO_HIGH)\n\t\te->d[i].p -= offset;\n\t    }\n\t}\n      else\n\t{\n\t  if (tr->dst_bytes[i])\n\t    {\n\t      e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align);\n\t    }\n\t  else\n\t    {\n\t      e->d[i].p = e->d[i].region.ptr + d[i].align;\n\t      if (tr->overlap == OVERLAP_HIGH_TO_LOW)\n\t\te->d[i].p += offset;\n\t    }\n\t}\n    }\n\n  /* establish e->s[].p sources */\n  for (i = 0; i < NUM_SOURCES; i++)\n    {\n      int  o = overlap->s[i];\n      switch (o) {\n      case -1:\n\t/* no overlap */\n\te->s[i].p = s[i].p;\n\tbreak;\n      case 0:\n      case 1:\n\t/* overlap with d[o] */\n\tif (tr->overlap == OVERLAP_HIGH_TO_LOW)\n\t  e->s[i].p = e->d[o].p - s[i].align;\n\telse if (tr->overlap == OVERLAP_LOW_TO_HIGH)\n\t  e->s[i].p = e->d[o].p + s[i].align;\n\telse if (tr->size2 == SIZE_FRACTION)\n\t  e->s[i].p = e->d[o].p + size2;\n\telse\n\t  e->s[i].p = e->d[o].p;\n\tbreak;\n      default:\n\tabort();\n\tbreak;\n      }\n    }\n}\n\n\nvoid\nvalidate_fail (void)\n{\n  if (tr->reference)\n    {\n      trap_location = TRAP_REF;\n      call (&ref, tr->reference);\n      trap_location = TRAP_NOWHERE;\n    }\n\n  print_all();\n  abort();\n}\n\n\nvoid\ntry_one (void)\n{\n  int  i;\n\n  if (option_spinner)\n    spinner();\n  spinner_count++;\n\n  trap_location = TRAP_SETUPS;\n\n  if (tr->divisor == DIVISOR_NORM)\n    divisor |= GMP_NUMB_HIGHBIT;\n  if (tr->divisor == DIVISOR_ODD)\n    divisor |= 1;\n\n  for (i = 0; i < NUM_SOURCES; i++)\n    {\n      if (s[i].high)\n\ts[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;\n      else\n\ts[i].p = s[i].region.ptr + s[i].align;\n    }\n\n  pointer_setup (&ref);\n  pointer_setup (&fun);\n\n  ref.retval = 0x04152637;\n  fun.retval = 0x8C9DAEBF;\n\n  t_random (multiplier_N, tr->msize);\n\n  for (i = 0; i < NUM_SOURCES; i++)\n    {\n      if (! tr->src[i])\n\tcontinue;\n\n      mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);\n      t_random (s[i].p, SRC_SIZE(i));\n\n      switch (tr->data) {\n      case DATA_NON_ZERO:\n\tif (refmpn_zero_p (s[i].p, SRC_SIZE(i)))\n\t  s[i].p[0] = 1;\n\tbreak;\n\n      case DATA_MULTIPLE_DIVISOR:\n\t/* same number of low zero bits as divisor */\n\ts[i].p[0] &= ~ LOW_ZEROS_MASK (divisor);\n\trefmpn_sub_1 (s[i].p, s[i].p, size,\n\t\t      refmpn_mod_1 (s[i].p, size, divisor));\n\tbreak;\n\n      case DATA_GCD:\n\t/* s[1] no more bits than s[0] */\n\tif (i == 1 && size2 == size)\n\t  s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);\n\n\t/* high limb non-zero */\n\ts[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);\n\n\t/* odd */\n\ts[i].p[0] |= 1;\n\tbreak;\n\n      case DATA_SRC1_ODD:\n\tif (i == 1)\n\t  s[i].p[0] |= 1;\n\tbreak;\n\t\n      case DATA_SRC0_ODD:\n\tif (i == 0)\n\t  s[i].p[0] |= 1;\n\tbreak;\n\n      case DATA_SRC1_HIGHBIT:\n\tif (i == 1)\n\t  {\n\t    if (tr->size2)\n\t      s[i].p[size2-1] |= GMP_NUMB_HIGHBIT;\n\t    else\n\t      s[i].p[size-1] |= GMP_NUMB_HIGHBIT;\n\t  }\n\tbreak;\n\n      case DATA_UDIV_QRNND:\n\ts[i].p[1] %= divisor;\n\tbreak;\n      }\n\n      mprotect_region (&s[i].region, PROT_READ);\n    }\n\n  for (i = 0; i < NUM_DESTS; i++)\n    {\n      if (! tr->dst[i])\n\tcontinue;\n\n      if (tr->dst0_from_src1 && i==0)\n\t{\n\t  mp_size_t  copy = MIN (d[0].size, SRC_SIZE(1));\n\t  mp_size_t  fill = MAX (0, d[0].size - copy);\n\t  MPN_COPY (fun.d[0].p, s[1].region.ptr, copy);\n\t  MPN_COPY (ref.d[0].p, s[1].region.ptr, copy);\n\t  refmpn_fill (fun.d[0].p + copy, fill, DEADVAL);\n\t  refmpn_fill (ref.d[0].p + copy, fill, DEADVAL);\n\t}\n      else if (tr->dst_bytes[i])\n\t{\n\t  memset (ref.d[i].p, 0xBA, d[i].size);\n\t  memset (fun.d[i].p, 0xBA, d[i].size);\n\t}\n      else\n\t{\n\t  refmpn_fill (ref.d[i].p, d[i].size, DEADVAL);\n\t  refmpn_fill (fun.d[i].p, d[i].size, DEADVAL);\n\t}\n    }\n\n  for (i = 0; i < NUM_SOURCES; i++)\n    {\n      if (! tr->src[i])\n\tcontinue;\n\n      if (ref.s[i].p != s[i].p)\n\t{\n\t  refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i));\n\t  refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i));\n\t}\n    }\n\n  if (option_print)\n    print_all();\n\n  if (tr->validate != NULL)\n    {\n      trap_location = TRAP_FUN;\n      call (&fun, choice->function);\n      trap_location = TRAP_NOWHERE;\n\n      if (! CALLING_CONVENTIONS_CHECK ())\n\t{\n\t  print_all();\n\t  abort();\n\t}\n\n      (*tr->validate) ();\n    }\n  else\n    {\n      trap_location = TRAP_REF;\n      call (&ref, tr->reference);\n      trap_location = TRAP_FUN;\n      call (&fun, choice->function);\n      trap_location = TRAP_NOWHERE;\n\n      if (! CALLING_CONVENTIONS_CHECK ())\n\t{\n\t  print_all();\n\t  abort();\n\t}\n\n      compare ();\n    }\n}\n\n\n#define SIZE_ITERATION                                          \\\n  for (size = MAX3 (option_firstsize,                           \\\n\t\t    choice->minsize,                            \\\n\t\t    (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1);     \\\n       size <= option_lastsize;                                 \\\n       size++)\n\n#define SIZE2_FIRST                                     \\\n  (tr->size2 == SIZE_2 ? 2                              \\\n   : tr->size2 == SIZE_FRACTION ? option_firstsize2     \\\n   : tr->size2 == SIZE_DOUBLE ? size*2\t\t\t\\\n   : tr->size2 == SIZE_DOUBLE_MINUS_1 ? size*2-1\t\t\\\n   : tr->size2 ?                                        \\\n   MAX (choice->minsize, (option_firstsize2 != 0        \\\n\t\t\t  ? option_firstsize2 : 1))     \\\n   : 0)\n\n#define SIZE2_LAST                                      \\\n  (tr->size2 == SIZE_2 ? 2                              \\\n   : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1      \\\n   : tr->size2 == SIZE_DOUBLE ? size*2\t\t        \t\\\n   : tr->size2 == SIZE_DOUBLE_MINUS_1 ? size*2-1\t\t\\\n   : tr->size2 ? size                                   \\\n   : 0)\n\n#define SIZE2_ITERATION \\\n  for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)\n\n#define ALIGN_COUNT(cond)  ((cond) ? ALIGNMENTS : 1)\n#define ALIGN_ITERATION(w,n,cond) \\\n  for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)\n\n#define HIGH_LIMIT(cond)  ((cond) != 0)\n#define HIGH_COUNT(cond)  (HIGH_LIMIT (cond) + 1)\n#define HIGH_ITERATION(w,n,cond) \\\n  for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)\n\n#define SHIFT_LIMIT                                     \\\n  ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1))\n\n#define SHIFT_ITERATION                                 \\\n  for (shift = 1; shift <= SHIFT_LIMIT; shift++)\n\n\nvoid\ntry_many (void)\n{\n  int   i;\n\n  {\n    unsigned long  total = 1;\n\n    total *= option_repetitions;\n    total *= option_lastsize;\n    if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT;\n    else if (tr->size2 == SIZE_DOUBLE) total *= 1;\n    else if (tr->size2)             total *= (option_lastsize+1)/2;\n\n    total *= SHIFT_LIMIT;\n    total *= MULTIPLIER_COUNT;\n    total *= DIVISOR_COUNT;\n    total *= ALTDIV_COUNT;\n    total *= CARRY_COUNT;\n    total *= T_RAND_COUNT;\n\n    total *= HIGH_COUNT (tr->dst[0]);\n    total *= HIGH_COUNT (tr->dst[1]);\n    total *= HIGH_COUNT (tr->src[0]);\n    total *= HIGH_COUNT (tr->src[1]);\n    total *= HIGH_COUNT (tr->src[2]);\n    \n    total *= ALIGN_COUNT (tr->dst[0]);\n    total *= ALIGN_COUNT (tr->dst[1]);\n    total *= ALIGN_COUNT (tr->src[0]);\n    total *= ALIGN_COUNT (tr->src[1]);\n    total *= ALIGN_COUNT (tr->src[2]);\n\n#if NUM_SOURCES > 3 || NUM_DESTS > 2\n#error Need to adjust high_count and align_count above\n#endif\n\n    total *= OVERLAP_COUNT;\n\n    printf (\"%s %lu\\n\", choice->name, total);\n  }\n\n  spinner_count = 0;\n\n  for (i = 0; i < option_repetitions; i++)\n    SIZE_ITERATION\n      SIZE2_ITERATION\n\n      SHIFT_ITERATION\n      MULTIPLIER_ITERATION\n      ALTDIV_ITERATION\n      DIVISOR_ITERATION\n      CARRY_ITERATION /* must be after divisor */\n      T_RAND_ITERATION\n\n      HIGH_ITERATION(d,0, tr->dst[0])\n      HIGH_ITERATION(d,1, tr->dst[1])\n      HIGH_ITERATION(s,0, tr->src[0])\n      HIGH_ITERATION(s,1, tr->src[1])\n      HIGH_ITERATION(s,2, tr->src[2])\n\n      ALIGN_ITERATION(d,0, tr->dst[0])\n      ALIGN_ITERATION(d,1, tr->dst[1])\n      ALIGN_ITERATION(s,0, tr->src[0])\n      ALIGN_ITERATION(s,1, tr->src[1])\n      ALIGN_ITERATION(s,2, tr->src[2])\n\n#if NUM_SOURCES > 3 || NUM_DESTS > 2\n#error Need to adjust high_iteration and align_iteration above\n#endif\n\n      OVERLAP_ITERATION\n      try_one();\n\n  printf(\"\\n\");\n}\n\n\n/* Usually print_all() doesn't show much, but it might give a hint as to\n   where the function was up to when it died. */\nvoid\ntrap (int sig)\n{\n  const char *name = \"noname\";\n\n  switch (sig) {\n  case SIGILL:  name = \"SIGILL\";  break;\n#ifdef SIGBUS\n  case SIGBUS:  name = \"SIGBUS\";  break;\n#endif\n  case SIGSEGV: name = \"SIGSEGV\"; break;\n  case SIGFPE:  name = \"SIGFPE\";  break;\n  }\n\n  printf (\"\\n\\nSIGNAL TRAP: %s\\n\", name);\n\n  switch (trap_location) {\n  case TRAP_REF:\n    printf (\"  in reference function: %s\\n\", tr->reference_name);\n    break;\n  case TRAP_FUN:\n    printf (\"  in test function: %s\\n\", choice->name);\n    print_all ();\n    break;\n  case TRAP_SETUPS:\n    printf (\"  in parameter setups\\n\");\n    print_all ();\n    break;\n  default:\n    printf (\"  somewhere unknown\\n\");\n    break;\n  }\n  exit (1);\n}\n\n\nvoid\ntry_init (void)\n{\n#if HAVE_GETPAGESIZE\n  /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't\n     know _SC_PAGESIZE. */\n  pagesize = getpagesize ();\n#elif HAVE_SYSCONF\n  if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)\n    {\n      /* According to the linux man page, sysconf doesn't set errno */\n      fprintf (stderr, \"Cannot get sysconf _SC_PAGESIZE\\n\");\n      exit (1);\n    }\n#elif defined( _MSC_VER )\n    SYSTEM_INFO si;\n    GetSystemInfo(&si);\n    pagesize = si.dwPageSize;\n#else\n#error Error, error, cannot get page size\n#endif\n\n  printf (\"pagesize is 0x%lX bytes\\n\", pagesize);\n\n  signal (SIGILL,  trap);\n#ifdef SIGBUS\n  signal (SIGBUS,  trap);\n#endif\n  signal (SIGSEGV, trap);\n  signal (SIGFPE,  trap);\n\n  {\n    int  i;\n\n    for (i = 0; i < NUM_SOURCES; i++)\n      {\n\tmalloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);\n\tprintf (\"s[%d] %p to %p (0x%lX bytes)\\n\",\n\t\ti, s[i].region.ptr,\n\t\ts[i].region.ptr + s[i].region.size,\n\t\t(long) s[i].region.size * BYTES_PER_MP_LIMB);\n      }\n\n#define INIT_EACH(e,es)                                                 \\\n    for (i = 0; i < NUM_DESTS; i++)                                     \\\n      {                                                                 \\\n\tmalloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \\\n\tprintf (\"%s d[%d] %p to %p (0x%lX bytes)\\n\",                    \\\n\t\tes, i, e.d[i].region.ptr,                               \\\n\t\te.d[i].region.ptr + e.d[i].region.size,                 \\\n\t\t(long) e.d[i].region.size * BYTES_PER_MP_LIMB);         \\\n      }\n\n    INIT_EACH(ref, \"ref\");\n    INIT_EACH(fun, \"fun\");\n  }\n}\n\nint\nstrmatch_wild (const char *pattern, const char *str)\n{\n  size_t  plen, slen;\n\n  /* wildcard at start */\n  if (pattern[0] == '*')\n    {\n      pattern++;\n      plen = strlen (pattern);\n      slen = strlen (str);\n      return (plen == 0\n\t      || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));\n    }\n\n  /* wildcard at end */\n  plen = strlen (pattern);\n  if (plen >= 1 && pattern[plen-1] == '*')\n    return (memcmp (pattern, str, plen-1) == 0);\n\n  /* no wildcards */\n  return (strcmp (pattern, str) == 0);\n}\n\nvoid\ntry_name (const char *name)\n{\n  int  found = 0;\n  int  i;\n\n  for (i = 0; i < numberof (choice_array); i++)\n    {\n      if (strmatch_wild (name, choice_array[i].name))\n\t{\n\t  choice = &choice_array[i];\n\t  tr = &param[choice->type];\n\t  try_many ();\n\t  found = 1;\n\t}\n    }\n\n  if (!found)\n    {\n      printf (\"%s unknown\\n\", name);\n      /* exit (1); */\n    }\n}\n\n\nvoid\nusage (const char *prog)\n{\n  int  col = 0;\n  int  i;\n\n  printf (\"Usage: %s [options] function...\\n\", prog);\n  printf (\"    -1        use limb data 1,2,3,etc\\n\");\n  printf (\"    -9        use limb data all 0xFF..FFs\\n\");\n  printf (\"    -a zeros  use limb data all zeros\\n\");\n  printf (\"    -a ffs    use limb data all 0xFF..FFs (same as -9)\\n\");\n  printf (\"    -a 2fd    use data 0x2FFF...FFFD\\n\");\n  printf (\"    -p        print each case tried (try this if seg faulting)\\n\");\n  printf (\"    -R        seed random numbers from time()\\n\");\n  printf (\"    -r reps   set repetitions (default %d)\\n\", DEFAULT_REPETITIONS);\n  printf (\"    -s size   starting size to test\\n\");\n  printf (\"    -S size2  starting size2 to test\\n\");\n  printf (\"    -s s1-s2  range of sizes to test\\n\");\n  printf (\"    -W        don't show the spinner (use this in gdb)\\n\");\n  printf (\"    -z        disable mprotect() redzones\\n\");\n  printf (\"Default data is refmpn_random() and refmpn_random2().\\n\");\n  printf (\"\\n\");\n  printf (\"Functions that can be tested:\\n\");\n\n  for (i = 0; i < numberof (choice_array); i++)\n    {\n      if (col + 1 + strlen (choice_array[i].name) > 79)\n\t{\n\t  printf (\"\\n\");\n\t  col = 0;\n\t}\n      printf (\" %s\", choice_array[i].name);\n      col += 1 + strlen (choice_array[i].name);\n    }\n  printf (\"\\n\");\n\n  exit(1);\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  int  i;\n\n  /* unbuffered output */\n  setbuf (stdout, NULL);\n  setbuf (stderr, NULL);\n\n  /* default trace in hex, and in upper-case so can paste into bc */\n  mp_trace_base = -16;\n\n  param_init ();\n\n  {\n    unsigned long  seed = 123;\n    int   opt;\n\n    while ((opt = getopt(argc, argv, \"19a:b:E:pRr:S:s:Wz\")) != EOF)\n      {\n\tswitch (opt) {\n\tcase '1':\n\t  /* use limb data values 1, 2, 3, ... etc */\n\t  option_data = DATA_SEQ;\n\t  break;\n\tcase '9':\n\t  /* use limb data values 0xFFF...FFF always */\n\t  option_data = DATA_FFS;\n\t  break;\n\tcase 'a':\n\t  if (strcmp (optarg, \"zeros\") == 0)     option_data = DATA_ZEROS;\n\t  else if (strcmp (optarg, \"seq\") == 0)  option_data = DATA_SEQ;\n\t  else if (strcmp (optarg, \"ffs\") == 0)  option_data = DATA_FFS;\n\t  else if (strcmp (optarg, \"2fd\") == 0)  option_data = DATA_2FD;\n\t  else\n\t    {\n\t      fprintf (stderr, \"unrecognised data option: %s\\n\", optarg);\n\t      exit (1);\n\t    }\n\t  break;\n\tcase 'b':\n\t  mp_trace_base = atoi (optarg);\n\t  break;\n\tcase 'E':\n\t  /* re-seed */\n\t  sscanf (optarg, \"%lu\", &seed);\n\t  printf (\"Re-seeding with %lu\\n\", seed);\n\t  break;\n\tcase 'p':\n\t  option_print = 1;\n\t  break;\n\tcase 'R':\n\t  /* randomize */\n\t  seed = time (NULL);\n\t  printf (\"Seeding with %lu, re-run using \\\"-E %lu\\\"\\n\", seed, seed);\n\t  break;\n\tcase 'r':\n\t  option_repetitions = atoi (optarg);\n\t  break;\n\tcase 's':\n\t  {\n\t    char  *p;\n\t    option_firstsize = atoi (optarg);\n\t    if ((p = strchr (optarg, '-')) != NULL)\n\t      option_lastsize = atoi (p+1);\n\t  }\n\t  break;\n\tcase 'S':\n\t  /* -S <size> sets the starting size for the second of a two size\n\t     routine (like mpn_mul_basecase) */\n\t  option_firstsize2 = atoi (optarg);\n\t  break;\n\tcase 'W':\n\t  /* use this when running in the debugger */\n\t  option_spinner = 0;\n\t  break;\n\tcase 'z':\n\t  /* disable redzones */\n\t  option_redzones = 0;\n\t  break;\n\tcase '?':\n\t  usage (argv[0]);\n\t  break;\n\t}\n      }\n\n    gmp_randinit_default (__gmp_rands);\n    __gmp_rands_initialized = 1;\n    gmp_randseed_ui (__gmp_rands, seed);\n  }\n\n  try_init();\n\n  if (argc <= optind)\n    usage (argv[0]);\n\n  for (i = optind; i < argc; i++)\n    try_name (argv[i]);\n\n  return 0;\n}\n"
  },
  {
    "path": "tests/fft/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2003 Free Software\n# Foundation, Inc.\n#\n# Copyright 2008 Jason Moxham\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/tests\nLDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmpir.la\n\ncheck_PROGRAMS = t-adjust t-adjust_sqrt2 t-butterfly t-butterfly_lshB t-butterfly_rshB t-butterfly_sqrt2 t-butterfly_twiddle t-div_2expmod_2expp1 t-fft_ifft_mfa_trunc_sqrt2 t-fft_ifft_negacyclic t-fft_ifft_radix2 t-fft_ifft_trunc t-fft_ifft_trunc_sqrt2 t-mul_2expmod_2expp1 t-mul_fft_main t-mul_mfa_trunc_sqrt2 t-mul_trunc_sqrt2 t-mulmod_2expp1 t-normmod_2expp1 t-split_combine_bits \n\n#if ENABLE_STATIC\n#if ENABLE_SHARED\n#check_PROGRAMS += st_hamdist st_popcount\n#st_hamdist_SOURCES = t-hamdist.c\n#st_hamdist_LDFLAGS = -static\n#st_popcount_SOURCES = t-popcount.c\n#st_popcount_LDFLAGS = -static\n#endif\n#endif\n\nTESTS = $(check_PROGRAMS)\n\n# Temporary files used by the tests.  Removed automatically if the tests\n# pass, but ensure they're cleaned if they fail.\n#\nCLEANFILES = *.tmp\n\n$(top_builddir)/tests/libtests.la:\n\tcd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la\n"
  },
  {
    "path": "tests/fft/t-adjust.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* set p = 2^wn + 1 */\nvoid set_p(mpz_t p, mp_size_t n, mp_bitcnt_t w)\n{\n   mpz_set_ui(p, 1);\n   mpz_mul_2exp(p, p, n*w);\n   mpz_add_ui(p, p, 1);\n}\n\nvoid ref_adjust(mpz_t r, mpz_t i1, mpz_t p, mp_size_t i, mp_size_t w)\n{\n   mpz_mul_2exp(r, i1, w*i);\n   mpz_mod(r, r, p);\n}\n\nint\nmain(void)\n{\n    mp_size_t c, bits, j, k, n, w, limbs;\n    mpz_t p, m2a, m2b, mn1;\n    mp_limb_t * nn1, * r1;\n   \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    mpz_init(p);\n    mpz_init(m2a);\n    mpz_init(m2b);\n    mpz_init(mn1);\n   \n    for (bits = GMP_LIMB_BITS; bits < 20*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)\n    {\n        for (j = 1; j < 10; j++)\n        {\n            for (k = 1; k <= GMP_LIMB_BITS; k <<= 1)\n            {\n                n = bits/k;\n                w = j*k;\n\n                limbs = (n*w)/GMP_LIMB_BITS;\n\n                for (c = 0; c < n; c++)\n                {\n                    set_p(p, n, w);\n                    \n                    nn1 = malloc((limbs+1)*sizeof(mp_limb_t));\n                    r1 = malloc((limbs+1)*sizeof(mp_limb_t));\n\n                    mpir_random_fermat(nn1, state, limbs); \n                    mpir_fermat_to_mpz(mn1, nn1, limbs);\n                    ref_adjust(m2a, mn1, p, c, w);\n            \n                    mpir_fft_adjust(r1, nn1, c, limbs, w);\n                    mpir_fermat_to_mpz(m2b, r1, limbs);\n                    \n                    mpz_mod(m2a, m2a, p);\n                    mpz_mod(m2b, m2b, p);\n                    \n                    if (mpz_cmp(m2a, m2a) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"adjust error a\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, c = %ld\\n\", n, w, c);\n                        gmp_printf(\"want %Zx\\n\\n\", m2a);\n                        gmp_printf(\"got  %Zx\\n\", m2b);\n                        abort();\n                    }\n                    \n                    free(nn1);\n                    free(r1);\n                }\n            }\n        }\n    }\n\n    mpz_clear(p);\n    mpz_clear(m2a);\n    mpz_clear(m2b);\n    mpz_clear(mn1);\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-adjust_sqrt2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* set p = 2^wn + 1 */\nvoid set_p(mpz_t p, mp_size_t n, mp_bitcnt_t w)\n{\n   mpz_set_ui(p, 1);\n   mpz_mul_2exp(p, p, n*w);\n   mpz_add_ui(p, p, 1);\n}\n\nvoid ref_adjust_sqrt2(mpz_t r, mpz_t i1, mpz_t p, mp_size_t i, mp_size_t limbs, mp_size_t w)\n{\n   mpz_mul_2exp(r, i1, (w/2)*i + i/2);\n   if (i & 1)\n   {\n       mpz_mul_2exp(i1, r, 3*limbs*GMP_LIMB_BITS/4);\n       mpz_mul_2exp(r, r, limbs*GMP_LIMB_BITS/4);\n       mpz_sub(r, i1, r);\n   }\n   mpz_mod(r, r, p);\n}\n\nint\nmain(void)\n{\n    mp_size_t c, bits, j, k, n, w, limbs;\n    mpz_t p, m2a, m2b, mn1;\n    mp_limb_t * nn1, * r1, * temp;\n   \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    mpz_init(p);\n    mpz_init(m2a);\n    mpz_init(m2b);\n    mpz_init(mn1);\n   \n    for (bits = GMP_LIMB_BITS; bits < 20*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)\n    {\n        for (j = 1; j < 10; j++)\n        {\n            for (k = 1; k <= GMP_LIMB_BITS; k <<= 1)\n            {\n                n = bits/k;\n                w = j*k;\n\n                limbs = (n*w)/GMP_LIMB_BITS;\n\n                for (c = 1; c < 2*n; c+=2)\n                {\n                    set_p(p, n, w);\n                    \n                    nn1 = malloc((limbs+1)*sizeof(mp_limb_t));\n                    r1 = malloc((limbs+1)*sizeof(mp_limb_t));\n                    temp = malloc((limbs+1)*sizeof(mp_limb_t));\n\n                    mpir_random_fermat(nn1, state, limbs); \n                    mpir_fermat_to_mpz(mn1, nn1, limbs);\n                    ref_adjust_sqrt2(m2a, mn1, p, c, limbs, w);\n            \n                    mpir_fft_adjust_sqrt2(r1, nn1, c, limbs, w, temp);\n                    mpir_fermat_to_mpz(m2b, r1, limbs);\n                    \n                    mpz_mod(m2a, m2a, p);\n                    mpz_mod(m2b, m2b, p);\n                    \n                    if (mpz_cmp(m2a, m2b) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"adjust_sqrt2 error a\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, c = %ld\\n\", n, w, c);\n                        gmp_printf(\"want %Zx\\n\\n\", m2a);\n                        gmp_printf(\"got  %Zx\\n\", m2b);\n                        abort();\n                    }\n                    \n                    free(temp);\n                    free(nn1);\n                    free(r1);\n                }\n            }\n        }\n    }\n\n    mpz_clear(p);\n    mpz_clear(m2a);\n    mpz_clear(m2b);\n    mpz_clear(mn1);\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-butterfly.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* set p = 2^wn + 1 */\nvoid set_p(mpz_t p, mp_size_t n, mp_bitcnt_t w)\n{\n   mpz_set_ui(p, 1);\n   mpz_mul_2exp(p, p, n*w);\n   mpz_add_ui(p, p, 1);\n}\n\nvoid ref_fft_butterfly(mpz_t s, mpz_t t, mpz_t i1, mpz_t i2, \n                                 mpz_t p, mp_size_t i, mp_size_t w)\n{\n   mpz_add(s, i1, i2);\n   mpz_sub(t, i1, i2);\n   mpz_mul_2exp(t, t, i*w);\n   mpz_mod(s, s, p);\n   mpz_mod(t, t, p);\n}\n\nvoid ref_ifft_butterfly(mpz_t s, mpz_t t, mpz_t i1, mpz_t i2, \n                                 mpz_t p, mp_size_t i, mp_size_t n, mp_size_t w)\n{\n   mpz_mul_2exp(i2, i2, 2*n*w - i*w);\n   mpz_add(s, i1, i2);\n   mpz_sub(t, i1, i2);\n   mpz_mod(s, s, p);\n   mpz_mod(t, t, p);\n}\n\nint\nmain(void)\n{\n    mp_size_t c, bits, j, k, n, w, limbs;\n    mpz_t p, ma, mb, m2a, m2b, mn1, mn2;\n    mp_limb_t * nn1, * nn2, * r1, * r2;\n   \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    mpz_init(p);\n    mpz_init(ma);\n    mpz_init(mb);\n    mpz_init(m2a);\n    mpz_init(m2b);\n    mpz_init(mn1);\n    mpz_init(mn2);\n   \n    for (bits = GMP_LIMB_BITS; bits < 20*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)\n    {\n        for (j = 1; j < 10; j++)\n        {\n            for (k = 1; k <= GMP_LIMB_BITS; k <<= 1)\n            {\n                n = bits/k;\n                w = j*k;\n\n                limbs = (n*w)/GMP_LIMB_BITS;\n\n                for (c = 0; c < n; c++)\n                {\n                    nn1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    nn2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    mpir_random_fermat(nn1, state, limbs);\n                    mpir_random_fermat(nn2, state, limbs);\n                     \n                    mpir_fermat_to_mpz(mn1, nn1, limbs);\n                    mpir_fermat_to_mpz(mn2, nn2, limbs);\n                    set_p(p, n, w);\n            \n                    mpir_fft_butterfly(r1, r2, nn1, nn2, c, limbs, w);\n                    mpir_fermat_to_mpz(m2a, r1, limbs);\n                    mpir_fermat_to_mpz(m2b, r2, limbs);\n                    \n                    mpz_mod(m2a, m2a, p);\n                    mpz_mod(m2b, m2b, p);\n                    ref_fft_butterfly(ma, mb, mn1, mn2, p, c, w);\n\n                    if (mpz_cmp(ma, m2a) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"fft_butterfly error a\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", ma);\n                        gmp_printf(\"got  %Zx\\n\", m2a);\n                        abort();\n                    }\n                    if (mpz_cmp(mb, m2b) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"fft_butterfly error b\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", mb);\n                        gmp_printf(\"got  %Zx\\n\", m2b);\n                        abort();\n                    }\n                    \n                    free(nn1);\n                    free(nn2);\n                    free(r1);\n                    free(r2);\n                }\n            }\n        }\n    }\n\n    for (bits = GMP_LIMB_BITS; bits < 20*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)\n    {\n        for (j = 1; j < 10; j++)\n        {\n            for (k = 1; k <= GMP_LIMB_BITS; k <<= 1)\n            {\n                n = bits/k;\n                w = j*k;\n\n                limbs = (n*w)/GMP_LIMB_BITS;\n\n                for (c = 0; c < n; c++)\n                {\n                    nn1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    nn2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    mpir_random_fermat(nn1, state, limbs);\n                    mpir_random_fermat(nn2, state, limbs);\n                     \n                    mpir_fermat_to_mpz(mn1, nn1, limbs);\n                    mpir_fermat_to_mpz(mn2, nn2, limbs);\n                    set_p(p, n, w);\n            \n                    mpir_ifft_butterfly(r1, r2, nn1, nn2, c, limbs, w);\n                    mpir_fermat_to_mpz(m2a, r1, limbs);\n                    mpir_fermat_to_mpz(m2b, r2, limbs);\n                    \n                    mpz_mod(m2a, m2a, p);\n                    mpz_mod(m2b, m2b, p);\n                    ref_ifft_butterfly(ma, mb, mn1, mn2, p, c, n, w);\n\n                    if (mpz_cmp(ma, m2a) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"ifft_butterfly error a\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", ma);\n                        gmp_printf(\"got  %Zx\\n\", m2a);\n                        abort();\n                    }\n                    if (mpz_cmp(mb, m2b) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"ifft_butterfly error b\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", mb);\n                        gmp_printf(\"got  %Zx\\n\", m2b);\n                        abort();\n                    }\n                    \n                    free(nn1);\n                    free(nn2);\n                    free(r1);\n                    free(r2);\n                }\n            }\n        }\n    }\n\n    mpz_clear(p);\n    mpz_clear(ma);\n    mpz_clear(mb);\n    mpz_clear(m2a);\n    mpz_clear(m2b);\n    mpz_clear(mn1);\n    mpz_clear(mn2);\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-butterfly_lshB.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* set p = 2^wn + 1 */\nvoid set_p(mpz_t p, mp_size_t n, mp_bitcnt_t w)\n{\n   mpz_set_ui(p, 1);\n   mpz_mul_2exp(p, p, n*w);\n   mpz_add_ui(p, p, 1);\n}\n\nvoid ref_butterfly_lshB(mpz_t t, mpz_t u, mpz_t i1, mpz_t i2, \n                                 mpz_t p, mp_size_t x, mp_size_t y)\n{\n   mpz_add(t, i1, i2);\n   mpz_sub(u, i1, i2);\n   mpz_mul_2exp(t, t, x*GMP_LIMB_BITS);\n   mpz_mul_2exp(u, u, y*GMP_LIMB_BITS);\n   mpz_mod(t, t, p);\n   mpz_mod(u, u, p);\n}\n\nint\nmain(void)\n{\n    mp_size_t c, bits, j, k, n, w, limbs;\n    mp_limb_t  x, y;\n    mpz_t p, ma, mb, m2a, m2b, mn1, mn2;\n    mp_limb_t * nn1, * nn2, * r1, * r2;\n   \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    mpz_init(p);\n    mpz_init(ma);\n    mpz_init(mb);\n    mpz_init(m2a);\n    mpz_init(m2b);\n    mpz_init(mn1);\n    mpz_init(mn2);\n   \n    for (bits = GMP_LIMB_BITS; bits < 20*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)\n    {\n        for (j = 1; j < 10; j++)\n        {\n            for (k = 1; k <= GMP_LIMB_BITS; k <<= 1)\n            {\n                n = bits/k;\n                w = j*k;\n\n                limbs = (n*w)/GMP_LIMB_BITS;\n\n                for (c = 0; c < limbs; c++)\n                {\n                    mpn_rrandom(&x, state, 1);\n                    mpn_rrandom(&y, state, 1);\n                    x %= limbs;\n                    y %= limbs;\n\n                    nn1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    nn2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    mpn_rrandom(nn1, state, limbs);\n                    mpir_random_fermat(nn1, state, limbs);\n                    mpir_random_fermat(nn2, state, limbs);\n                     \n                    mpir_fermat_to_mpz(mn1, nn1, limbs);\n                    mpir_fermat_to_mpz(mn2, nn2, limbs);\n                    set_p(p, n, w);\n            \n                    mpir_butterfly_lshB(r1, r2, nn1, nn2, limbs, x, y);\n                    mpir_fermat_to_mpz(m2a, r1, limbs);\n                    mpir_fermat_to_mpz(m2b, r2, limbs);\n                    \n                    mpz_mod(m2a, m2a, p);\n                    mpz_mod(m2b, m2b, p);\n                    ref_butterfly_lshB(ma, mb, mn1, mn2, p, x, y);\n\n                    if (mpz_cmp(ma, m2a) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"butterfly_lshB error a\\n\");\n                        gmp_printf(\"x = %Md, y = %Md, limbs = %ld\\n\", x, y, limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", ma);\n                        gmp_printf(\"got  %Zx\\n\", m2a);\n                        abort();\n                    }\n                    if (mpz_cmp(mb, m2b) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"butterfly_lshB error b\\n\");\n                        gmp_printf(\"x = %Md, y = %Md, limbs = %ld\\n\", x, y, limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", mb);\n                        gmp_printf(\"got  %Zx\\n\", m2b);\n                        abort();\n                    }\n                    \n                    free(nn1);\n                    free(nn2);\n                    free(r1);\n                    free(r2);\n                }\n            }\n        }\n    }\n\n    mpz_clear(p);\n    mpz_clear(ma);\n    mpz_clear(mb);\n    mpz_clear(m2a);\n    mpz_clear(m2b);\n    mpz_clear(mn1);\n    mpz_clear(mn2);\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-butterfly_rshB.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* set p = 2^wn + 1 */\nvoid set_p(mpz_t p, mp_size_t n, mp_bitcnt_t w)\n{\n   mpz_set_ui(p, 1);\n   mpz_mul_2exp(p, p, n*w);\n   mpz_add_ui(p, p, 1);\n}\n\nvoid ref_butterfly_rshB(mpz_t t, mpz_t u, mpz_t i1, mpz_t i2, \n                                 mpz_t p, mp_size_t x, mp_size_t y)\n{\n   mpz_t mult1, mult2;\n\n   mpz_init(mult1);\n   mpz_init(mult2);\n\n   mpz_set_ui(mult1, 1);\n   mpz_mul_2exp(mult1, mult1, x*GMP_LIMB_BITS);\n   mpz_invert(mult1, mult1, p);\n   mpz_set_ui(mult2, 1);\n   mpz_mul_2exp(mult2, mult2, y*GMP_LIMB_BITS);\n   mpz_invert(mult2, mult2, p);\n   mpz_mul(mult1, mult1, i1);\n   mpz_mul(mult2, mult2, i2);\n   mpz_add(t, mult1, mult2);\n   mpz_sub(u, mult1, mult2);\n   mpz_mod(t, t, p);\n   mpz_mod(u, u, p);\n\n   mpz_clear(mult1);\n   mpz_clear(mult2);\n}\n\nint\nmain(void)\n{\n    mp_size_t c, bits, j, k, n, w, limbs;\n    mp_limb_t x, y;\n    mpz_t p, ma, mb, m2a, m2b, mn1, mn2;\n    mp_limb_t * nn1, * nn2, * r1, * r2;\n   \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    mpz_init(p);\n    mpz_init(ma);\n    mpz_init(mb);\n    mpz_init(m2a);\n    mpz_init(m2b);\n    mpz_init(mn1);\n    mpz_init(mn2);\n   \n    for (bits = GMP_LIMB_BITS; bits < 20*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)\n    {\n        for (j = 1; j < 10; j++)\n        {\n            for (k = 1; k <= GMP_LIMB_BITS; k <<= 1)\n            {\n                n = bits/k;\n                w = j*k;\n\n                limbs = (n*w)/GMP_LIMB_BITS;\n\n                for (c = 0; c < limbs; c++)\n                {\n                    mpn_rrandom(&x, state, 1);\n                    x %= limbs;\n                    mpn_rrandom(&y, state, 1);\n                    y %= limbs;\n                    \n                    nn1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    nn2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    mpn_rrandom(nn1, state, limbs);\n                    mpir_random_fermat(nn1, state, limbs);\n                    mpir_random_fermat(nn2, state, limbs);\n                     \n                    mpir_fermat_to_mpz(mn1, nn1, limbs);\n                    mpir_fermat_to_mpz(mn2, nn2, limbs);\n                    set_p(p, n, w);\n            \n                    mpir_butterfly_rshB(r1, r2, nn1, nn2, limbs, x, y);\n                    mpir_fermat_to_mpz(m2a, r1, limbs);\n                    mpir_fermat_to_mpz(m2b, r2, limbs);\n                    \n                    mpz_mod(m2a, m2a, p);\n                    mpz_mod(m2b, m2b, p);\n                    ref_butterfly_rshB(ma, mb, mn1, mn2, p, x, y);\n\n                    if (mpz_cmp(ma, m2a) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"butterfly_rshB error a\\n\");\n                        gmp_printf(\"x = %Md, y = %Md, limbs = %ld\\n\", x, y, limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", ma);\n                        gmp_printf(\"got  %Zx\\n\", m2a);\n                        abort();\n                    }\n                    if (mpz_cmp(mb, m2b) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"butterfly_rshB error b\\n\");\n                        gmp_printf(\"x = %ld, y = %Md, limbs = %Md\\n\", x, y, limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", mb);\n                        gmp_printf(\"got  %Zx\\n\", m2b);\n                        abort();\n                    }\n                    \n                    free(nn1);\n                    free(nn2);\n                    free(r1);\n                    free(r2);\n                }\n            }\n        }\n    }\n\n    mpz_clear(p);\n    mpz_clear(ma);\n    mpz_clear(mb);\n    mpz_clear(m2a);\n    mpz_clear(m2b);\n    mpz_clear(mn1);\n    mpz_clear(mn2);\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-butterfly_sqrt2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* set p = 2^wn + 1 */\nvoid set_p(mpz_t p, mp_size_t n, mp_bitcnt_t w)\n{\n   mpz_set_ui(p, 1);\n   mpz_mul_2exp(p, p, n*w);\n   mpz_add_ui(p, p, 1);\n}\n\nvoid ref_fft_butterfly_sqrt2(mpz_t s, mpz_t t, mpz_t i1, mpz_t i2, \n                                 mpz_t p, mp_size_t i, mp_size_t limbs, mp_size_t w)\n{\n   mpz_sub(t, i1, i2);\n   mpz_mul_2exp(t, t, i*(w/2) + i/2);\n   mpz_mul_2exp(s, t, 3*limbs*GMP_LIMB_BITS/4);\n   mpz_mul_2exp(t, t, limbs*GMP_LIMB_BITS/4);\n   mpz_sub(t, s, t);\n   mpz_add(s, i1, i2);\n   mpz_mod(s, s, p);\n   mpz_mod(t, t, p);\n}\n\nvoid ref_ifft_butterfly_sqrt2(mpz_t s, mpz_t t, mpz_t i1, mpz_t i2, \n                                 mpz_t p, mp_size_t i, mp_size_t n, mp_size_t limbs, mp_size_t w)\n{\n   mpz_mul_2exp(s, i2, 2*n*w - i*(w/2) - 1 - i/2);\n   mpz_mul_2exp(t, s, 3*limbs*GMP_LIMB_BITS/4);\n   mpz_mul_2exp(s, s, limbs*GMP_LIMB_BITS/4);\n   mpz_sub(i2, t, s);\n   mpz_add(s, i1, i2);\n   mpz_sub(t, i1, i2);\n   mpz_mod(s, s, p);\n   mpz_mod(t, t, p);\n}\n\nint\nmain(void)\n{\n    mp_size_t c, bits, j, k, n, w, limbs;\n    mpz_t p, ma, mb, m2a, m2b, mn1, mn2;\n    mp_limb_t * nn1, * nn2, * r1, * r2, * temp;\n   \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    mpz_init(p);\n    mpz_init(ma);\n    mpz_init(mb);\n    mpz_init(m2a);\n    mpz_init(m2b);\n    mpz_init(mn1);\n    mpz_init(mn2);\n   \n    for (bits = GMP_LIMB_BITS; bits < 20*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)\n    {\n        for (j = 1; j < 10; j++)\n        {\n            for (k = 1; k <= GMP_LIMB_BITS; k <<= 1)\n            {\n                n = bits/k;\n                w = j*k;\n\n                if ((w & 1) == 0) continue; /* w must be odd here */\n\n                limbs = (n*w)/GMP_LIMB_BITS;\n\n                for (c = 1; c < 2*n; c+=2)\n                {\n                    nn1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    nn2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    temp = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    mpir_random_fermat(nn1, state, limbs);\n                    mpir_random_fermat(nn2, state, limbs);\n                     \n                    mpir_fermat_to_mpz(mn1, nn1, limbs);\n                    mpir_fermat_to_mpz(mn2, nn2, limbs);\n                    set_p(p, n, w);\n            \n                    mpir_fft_butterfly_sqrt2(r1, r2, nn1, nn2, c, limbs, w, temp);\n                    mpir_fermat_to_mpz(m2a, r1, limbs);\n                    mpir_fermat_to_mpz(m2b, r2, limbs);\n                    \n                    mpz_mod(m2a, m2a, p);\n                    mpz_mod(m2b, m2b, p);\n                    ref_fft_butterfly_sqrt2(ma, mb, mn1, mn2, p, c, limbs, w);\n\n                    if (mpz_cmp(ma, m2a) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"fft_butterfly_sqrt2 error a\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", ma);\n                        gmp_printf(\"got  %Zx\\n\", m2a);\n                        abort();\n                    }\n                    if (mpz_cmp(mb, m2b) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"fft_butterfly_sqrt2 error b\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", mb);\n                        gmp_printf(\"got  %Zx\\n\", m2b);\n                        abort();\n                    }\n                    \n                    free(temp);\n                    free(nn1);\n                    free(nn2);\n                    free(r1);\n                    free(r2);\n                }\n            }\n        }\n    }\n\n    for (bits = GMP_LIMB_BITS; bits < 20*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)\n    {\n        for (j = 1; j < 10; j++)\n        {\n            for (k = 1; k <= GMP_LIMB_BITS; k <<= 1)\n            {\n                n = bits/k;\n                w = j*k;\n                if ((w & 1) == 0) continue; /* w must be odd here */\n\n                limbs = (n*w)/GMP_LIMB_BITS;\n\n                for (c = 1; c < 2*n; c+=2)\n                {\n                    nn1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    nn2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    temp = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    mpir_random_fermat(nn1, state, limbs);\n                    mpir_random_fermat(nn2, state, limbs);\n                     \n                    mpir_fermat_to_mpz(mn1, nn1, limbs);\n                    mpir_fermat_to_mpz(mn2, nn2, limbs);\n                    set_p(p, n, w);\n            \n                    mpir_ifft_butterfly_sqrt2(r1, r2, nn1, nn2, c, limbs, w, temp);\n                    mpir_fermat_to_mpz(m2a, r1, limbs);\n                    mpir_fermat_to_mpz(m2b, r2, limbs);\n                    \n                    mpz_mod(m2a, m2a, p);\n                    mpz_mod(m2b, m2b, p);\n                    ref_ifft_butterfly_sqrt2(ma, mb, mn1, mn2, p, c, n, limbs, w);\n\n                    if (mpz_cmp(ma, m2a) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"ifft_butterfly_sqrt2 error a\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", ma);\n                        gmp_printf(\"got  %Zx\\n\", m2a);\n                        abort();\n                    }\n                    if (mpz_cmp(mb, m2b) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"ifft_butterfly_sqrt2 error b\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", mb);\n                        gmp_printf(\"got  %Zx\\n\", m2b);\n                        abort();\n                    }\n                    free(temp);\n                    free(nn1);\n                    free(nn2);\n                    free(r1);\n                    free(r2);\n                }\n            }\n        }\n    }\n\n    mpz_clear(p);\n    mpz_clear(ma);\n    mpz_clear(mb);\n    mpz_clear(m2a);\n    mpz_clear(m2b);\n    mpz_clear(mn1);\n    mpz_clear(mn2);\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-butterfly_twiddle.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* set p = 2^wn + 1 */\nvoid set_p(mpz_t p, mp_size_t n, mp_bitcnt_t w)\n{\n   mpz_set_ui(p, 1);\n   mpz_mul_2exp(p, p, n*w);\n   mpz_add_ui(p, p, 1);\n}\n\nvoid ref_fft_butterfly_twiddle(mpz_t s, mpz_t t, mpz_t i1, mpz_t i2, \n                   mpz_t p, mp_size_t i, mp_size_t w, mp_bitcnt_t b1, mp_bitcnt_t b2)\n{\n   mpz_add(s, i1, i2);\n   mpz_sub(t, i1, i2);\n   mpz_mul_2exp(s, s, b1);\n   mpz_mul_2exp(t, t, b2);\n   mpz_mod(s, s, p);\n   mpz_mod(t, t, p);\n}\n\nvoid ref_ifft_butterfly_twiddle(mpz_t s, mpz_t t, mpz_t i1, mpz_t i2, \n      mpz_t p, mp_size_t i, mp_size_t n, mp_size_t w, mp_bitcnt_t b1, mp_bitcnt_t b2)\n{\n   mpz_mul_2exp(i1, i1, 2*n*w - b1);\n   mpz_mul_2exp(i2, i2, 2*n*w - b2);\n   mpz_add(s, i1, i2);\n   mpz_sub(t, i1, i2);\n   mpz_mod(s, s, p);\n   mpz_mod(t, t, p);\n}\n\nint\nmain(void)\n{\n    mp_size_t c, bits, j, k, n, w, limbs;\n    mpz_t p, ma, mb, m2a, m2b, mn1, mn2;\n    mp_limb_t * nn1, * nn2, * r1, * r2;\n    mp_bitcnt_t b1, b2;\n   \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    mpz_init(p);\n    mpz_init(ma);\n    mpz_init(mb);\n    mpz_init(m2a);\n    mpz_init(m2b);\n    mpz_init(mn1);\n    mpz_init(mn2);\n   \n    for (bits = GMP_LIMB_BITS; bits < 20*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)\n    {\n        for (j = 1; j < 10; j++)\n        {\n            for (k = 1; k <= GMP_LIMB_BITS; k <<= 1)\n            {\n                n = bits/k;\n                w = j*k;\n\n                limbs = (n*w)/GMP_LIMB_BITS;\n\n                for (c = 0; c < n; c++)\n                {\n                    mpn_rrandom(&b1, state, 1);\n                    b1 %= n * w;\n                    mpn_rrandom(&b2, state, 1);\n                    b2 %= n * w;\n                    nn1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    nn2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    mpir_random_fermat(nn1, state, limbs);\n                    mpir_random_fermat(nn2, state, limbs);\n                     \n                    mpir_fermat_to_mpz(mn1, nn1, limbs);\n                    mpir_fermat_to_mpz(mn2, nn2, limbs);\n                    set_p(p, n, w);\n            \n                    mpir_fft_butterfly_twiddle(r1, r2, nn1, nn2, limbs, b1, b2);\n                    mpir_fermat_to_mpz(m2a, r1, limbs);\n                    mpir_fermat_to_mpz(m2b, r2, limbs);\n                    \n                    mpz_mod(m2a, m2a, p);\n                    mpz_mod(m2b, m2b, p);\n                    ref_fft_butterfly_twiddle(ma, mb, mn1, mn2, p, c, w, b1, b2);\n\n                    if (mpz_cmp(ma, m2a) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"fft_butterfly_twiddle error a\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", ma);\n                        gmp_printf(\"got  %Zx\\n\", m2a);\n                        abort();\n                    }\n                    if (mpz_cmp(mb, m2b) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"fft_butterfly_twiddle error b\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", mb);\n                        gmp_printf(\"got  %Zx\\n\", m2b);\n                        abort();\n                    }\n                    \n                    free(nn1);\n                    free(nn2);\n                    free(r1);\n                    free(r2);\n                }\n            }\n        }\n    }\n\n    for (bits = GMP_LIMB_BITS; bits < 20*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)\n    {\n        for (j = 1; j < 10; j++)\n        {\n            for (k = 1; k <= GMP_LIMB_BITS; k <<= 1)\n            {\n                n = bits/k;\n                w = j*k;\n\n                limbs = (n*w)/GMP_LIMB_BITS;\n\n                for (c = 0; c < n; c++)\n                {\n                    mpn_rrandom(&b1, state, 1);\n                    b1 %= n * w;\n                    mpn_rrandom(&b2, state, 1);\n                    b2 %= n * w;\n                    nn1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    nn2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r1 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r2 = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    mpir_random_fermat(nn1, state, limbs);\n                    mpir_random_fermat(nn2, state, limbs);\n                     \n                    mpir_fermat_to_mpz(mn1, nn1, limbs);\n                    mpir_fermat_to_mpz(mn2, nn2, limbs);\n                    set_p(p, n, w);\n            \n                    mpir_ifft_butterfly_twiddle(r1, r2, nn1, nn2, limbs, b1, b2);\n                    mpir_fermat_to_mpz(m2a, r1, limbs);\n                    mpir_fermat_to_mpz(m2b, r2, limbs);\n                    \n                    mpz_mod(m2a, m2a, p);\n                    mpz_mod(m2b, m2b, p);\n                    ref_ifft_butterfly_twiddle(ma, mb, mn1, mn2, p, c, n, w, b1, b2);\n\n                    if (mpz_cmp(ma, m2a) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"ifft_butterfly_twiddle error a\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", ma);\n                        gmp_printf(\"got  %Zx\\n\", m2a);\n                        abort();\n                    }\n                    if (mpz_cmp(mb, m2b) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"ifft_butterfly_twiddle error b\\n\");\n                        printf(\"limbs = %ld\\n\", limbs);\n                        printf(\"n = %ld, w = %ld, k = %ld, c = %ld\\n\", n, w, k, c);\n                        gmp_printf(\"want %Zx\\n\\n\", mb);\n                        gmp_printf(\"got  %Zx\\n\", m2b);\n                        abort();\n                    }\n                    \n                    free(nn1);\n                    free(nn2);\n                    free(r1);\n                    free(r2);\n                }\n            }\n        }\n    }\n\n    mpz_clear(p);\n    mpz_clear(ma);\n    mpz_clear(mb);\n    mpz_clear(m2a);\n    mpz_clear(m2b);\n    mpz_clear(mn1);\n    mpz_clear(mn2);\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-div_2expmod_2expp1.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* set p = 2^wn + 1 */\nvoid set_p(mpz_t p, mp_size_t n, mp_bitcnt_t w)\n{\n   mpz_set_ui(p, 1);\n   mpz_mul_2exp(p, p, n*w);\n   mpz_add_ui(p, p, 1);\n}\n\nint\nmain(void)\n{\n    mp_bitcnt_t bits;\n    mp_size_t j, k, n, w, limbs, d;\n    mp_limb_t * nn, * r;\n    mpz_t p, m1, m2, mn1, mn2;\n\n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    mpz_init(m1);\n    mpz_init(m2);\n    mpz_init(mn1);\n    mpz_init(mn2);\n    mpz_init(p);\n\n    /* normalisation mod p = 2^wn + 1 where B divides nw and n is a power of 2 */\n    for (bits = GMP_LIMB_BITS; bits < 16*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)\n    {\n        for (j = 1; j < 16; j++)\n        {\n            for (k = 1; k <= GMP_LIMB_BITS; k <<= 1)\n            {\n                for (d = 0; d < GMP_LIMB_BITS; d++)\n                {\n                    n = bits/k;\n                    w = j*k;\n                    limbs = (n*w)/GMP_LIMB_BITS;\n            \n                    nn = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r  = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    mpir_random_fermat(nn, state, limbs);\n                    mpir_fermat_to_mpz(mn1, nn, limbs);\n                    set_p(p, n, w);\n            \n                    mpn_div_2expmod_2expp1(r, nn, limbs, d);\n                    mpir_fermat_to_mpz(m2, r, limbs);\n                    mpz_mod(m2, m2, p);\n                    \n                    mpz_mod(m1, mn1, p);\n                    mpz_mul_2exp(m2, m2, d);\n                    mpz_mod(m2, m2, p);\n                    \n                    if (mpz_cmp(m1, m2) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"mpn_div_2expmod_2expp1 error\\n\");\n                        gmp_printf(\"want %Zx\\n\\n\", m1);\n                        gmp_printf(\"got  %Zx\\n\", m2);\n                        abort();\n                    }\n                }\n\n                free(nn);\n                free(r);\n            }\n        }\n    }\n\n    mpz_clear(mn2);\n    mpz_clear(mn1);\n    mpz_clear(m2);\n    mpz_clear(m1);\n    mpz_clear(p);\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-fft_ifft_mfa_trunc_sqrt2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain(void)\n{\n    mp_bitcnt_t depth, w;\n\n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    for (depth = 6; depth <= 13; depth++)\n    {\n        for (w = 1; w <= 5; w++)\n        {\n            mp_size_t n = (((mp_limb_t)1)<<depth);\n            mp_size_t trunc;\n            mp_size_t n1 = (((mp_limb_t)1)<<(depth/2));\n            mp_size_t limbs = (n*w)/GMP_LIMB_BITS;\n            mp_size_t size = limbs + 1;\n            mp_size_t i;\n            mp_limb_t * ptr;\n            mp_limb_t ** ii, ** jj, * t1, * t2, * s1;\n        \n            mpn_rrandom((mp_ptr) &trunc, state, 1);\n            trunc = 2*n + trunc % (2 * n) + 1;\n            trunc = 2*n1*((trunc + 2*n1 - 1)/(2*n1));\n\n            ii = malloc((4*(n + n*size) + 3*size)*sizeof(mp_limb_t));\n            for (i = 0, ptr = (mp_limb_t *) ii + 4*n; i < 4*n; i++, ptr += size) \n            {\n                ii[i] = ptr;\n                mpir_random_fermat(ii[i], state, limbs);\n            }\n            t1 = ptr;\n            t2 = t1 + size;\n            s1 = t2 + size;\n   \n            for (i = 0; i < 4*n; i++)\n               mpn_normmod_2expp1(ii[i], limbs);\n    \n            jj = malloc(4*(n + n*size)*sizeof(mp_limb_t));\n            for (i = 0, ptr = (mp_limb_t *) jj + 4*n; i < 4*n; i++, ptr += size) \n            {\n                jj[i] = ptr;\n                mpn_copyi(jj[i], ii[i], size);\n            }\n   \n            mpir_fft_mfa_trunc_sqrt2(ii, n, w, &t1, &t2, &s1, n1, trunc);\n            mpir_ifft_mfa_trunc_sqrt2(ii, n, w, &t1, &t2, &s1, n1, trunc);\n            for (i = 0; i < trunc; i++)\n            {\n                mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 2);\n                mpn_normmod_2expp1(ii[i], limbs);\n            }\n\n            for (i = 0; i < trunc; i++)\n            {\n                if (mpn_cmp(ii[i], jj[i], size) != 0)\n                {\n                    printf(\"FAIL:\\n\");\n                    printf(\"n = %ld, trunc = %ld\\n\", n, trunc);\n                    printf(\"Error in entry %ld\\n\", i);\n                    abort();\n                }\n            }\n\n            free(ii);\n            free(jj);\n        }\n    }\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-fft_ifft_negacyclic.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain(void)\n{\n    mp_bitcnt_t depth, w;\n    \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    for (depth = 6; depth <= 12; depth++)\n    {\n        for (w = 1; w <= 5; w++)\n        {\n            mp_size_t n = (((mp_limb_t)1)<<depth);\n            mp_size_t limbs = (n*w)/GMP_LIMB_BITS;\n            mp_size_t size = limbs + 1;\n            mp_size_t i;\n            mp_limb_t * ptr;\n            mp_limb_t ** ii, ** jj, * t1, * t2, * s1;\n        \n            ii = malloc((2*(n + n*size) + 3*size)*sizeof(mp_limb_t));\n            for (i = 0, ptr = (mp_limb_t *) ii + 2*n; i < 2*n; i++, ptr += size) \n            {\n                ii[i] = ptr;\n                mpir_random_fermat(ii[i], state, limbs);\n            }\n            t1 = ptr;\n            t2 = t1 + size;\n            s1 = t2 + size;\n   \n            for (i = 0; i < 2*n; i++)\n               mpn_normmod_2expp1(ii[i], limbs);\n    \n            jj = malloc(2*(n + n*size)*sizeof(mp_limb_t));\n            for (i = 0, ptr = (mp_limb_t *) jj + 2*n; i < 2*n; i++, ptr += size) \n            {\n                jj[i] = ptr;\n                mpn_copyi(jj[i], ii[i], size);\n            }\n   \n            mpir_fft_negacyclic(ii, n, w, &t1, &t2, &s1);\n            mpir_ifft_negacyclic(ii, n, w, &t1, &t2, &s1);\n            for (i = 0; i < 2*n; i++)\n            {\n                mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 1);\n                mpn_normmod_2expp1(ii[i], limbs);\n            }\n\n            for (i = 0; i < 2*n; i++)\n            {\n                if (mpn_cmp(ii[i], jj[i], size) != 0)\n                {\n                    printf(\"FAIL:\\n\");\n                    printf(\"Error in entry %ld\\n\", i);\n                    abort();\n                }\n            }\n\n            free(ii);\n            free(jj);\n        }\n    }\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-fft_ifft_radix2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain(void)\n{\n    mp_bitcnt_t depth, w;\n    \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    for (depth = 6; depth <= 12; depth++)\n    {\n        for (w = 1; w <= 5; w++)\n        {\n            mp_size_t n = (((mp_limb_t)1)<<depth);\n            mp_size_t limbs = (n*w)/GMP_LIMB_BITS;\n            mp_size_t size = limbs + 1;\n            mp_size_t i;\n            mp_limb_t * ptr;\n            mp_limb_t ** ii, ** jj, *t1, *t2;\n        \n            ii = malloc((2*(n + n*size) + 2*size)*sizeof(mp_limb_t));\n            for (i = 0, ptr = (mp_limb_t *) ii + 2*n; i < 2*n; i++, ptr += size) \n            {\n                ii[i] = ptr;\n                mpir_random_fermat(ii[i], state, limbs);\n            }\n            t1 = ptr;\n            t2 = t1 + size;\n   \n            for (i = 0; i < 2*n; i++)\n               mpn_normmod_2expp1(ii[i], limbs);\n    \n            jj = malloc(2*(n + n*size)*sizeof(mp_limb_t));\n            for (i = 0, ptr = (mp_limb_t *) jj + 2*n; i < 2*n; i++, ptr += size) \n            {\n                jj[i] = ptr;\n                mpn_copyi(jj[i], ii[i], size);\n            }\n   \n            mpir_fft_radix2(ii, n, w, &t1, &t2);\n            mpir_ifft_radix2(ii, n, w, &t1, &t2);\n            for (i = 0; i < 2*n; i++)\n            {\n                mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 1);\n                mpn_normmod_2expp1(ii[i], limbs);\n            }\n\n            for (i = 0; i < 2*n; i++)\n            {\n                if (mpn_cmp(ii[i], jj[i], size) != 0)\n                {\n                    printf(\"FAIL:\\n\");\n                    printf(\"Error in entry %ld\\n\", i);\n                    abort();\n                }\n            }\n\n            free(ii);\n            free(jj);\n        }\n    }\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-fft_ifft_trunc.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain(void)\n{\n    mp_bitcnt_t depth, w;\n    \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    for (depth = 6; depth <= 12; depth++)\n    {\n        for (w = 1; w <= 5; w++)\n        {\n            mp_size_t n = (((mp_limb_t)1)<<depth);\n            mp_limb_t trunc;\n            mp_size_t limbs;\n            mp_size_t size;\n            mp_size_t i;\n            mp_limb_t * ptr;\n            mp_limb_t ** ii, ** jj, *t1, *t2;\n\n            mpn_rrandom(&trunc, state, 1);\n            trunc = trunc % (2 * n) + 1;\n            limbs = (n*w)/GMP_LIMB_BITS;\n            size = limbs + 1;\n            trunc = 2*((trunc + 1)/2);\n\n            ii = malloc((2*(n + n*size) + 2*size)*sizeof(mp_limb_t));\n            for (i = 0, ptr = (mp_limb_t *) ii + 2*n; i < 2*n; i++, ptr += size) \n            {\n                ii[i] = ptr;\n                mpir_random_fermat(ii[i], state, limbs);\n            }\n            t1 = ptr;\n            t2 = t1 + size;\n   \n            for (i = 0; i < 2*n; i++)\n               mpn_normmod_2expp1(ii[i], limbs);\n    \n            jj = malloc(2*(n + n*size)*sizeof(mp_limb_t));\n            for (i = 0, ptr = (mp_limb_t *) jj + 2*n; i < 2*n; i++, ptr += size) \n            {\n                jj[i] = ptr;\n                mpn_copyi(jj[i], ii[i], size);\n            }\n   \n            mpir_fft_trunc(ii, n, w, &t1, &t2, trunc);\n            mpir_ifft_trunc(ii, n, w, &t1, &t2, trunc);\n            for (i = 0; i < trunc; i++)\n            {\n                mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 1);\n                mpn_normmod_2expp1(ii[i], limbs);\n            }\n\n            for (i = 0; i < trunc; i++)\n            {\n                if (mpn_cmp(ii[i], jj[i], size) != 0)\n                {\n                    printf(\"FAIL:\\n\");\n                    printf(\"Error in entry %ld\\n\", i);\n                    abort();\n                }\n            }\n\n            free(ii);\n            free(jj);\n        }\n    }\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-fft_ifft_trunc_sqrt2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain(void)\n{\n    mp_bitcnt_t depth, w;\n    \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    for (depth = 6; depth <= 12; depth++)\n    {\n        for (w = 1; w <= 5; w++)\n        {\n            mp_size_t n = (((mp_limb_t)1)<<depth);\n            mp_limb_t trunc;\n            mp_size_t limbs = (n*w)/GMP_LIMB_BITS;\n            mp_size_t size = limbs + 1;\n            mp_size_t i;\n            mp_limb_t * ptr;\n            mp_limb_t ** ii, ** jj, * t1, * t2, * s1;\n        \n            mpn_rrandom(&trunc, state, 1);\n            trunc = 2*n + trunc % (2 * n) + 1;\n            trunc = 2*((trunc + 1)/2);\n\n            ii = malloc((4*(n + n*size) + 3*size)*sizeof(mp_limb_t));\n            for (i = 0, ptr = (mp_limb_t *) ii + 4*n; i < 4*n; i++, ptr += size) \n            {\n                ii[i] = ptr;\n                mpir_random_fermat(ii[i], state, limbs);\n            }\n            t1 = ptr;\n            t2 = t1 + size;\n            s1 = t2 + size;\n   \n            for (i = 0; i < 4*n; i++)\n               mpn_normmod_2expp1(ii[i], limbs);\n    \n            jj = malloc(4*(n + n*size)*sizeof(mp_limb_t));\n            for (i = 0, ptr = (mp_limb_t *) jj + 4*n; i < 4*n; i++, ptr += size) \n            {\n                jj[i] = ptr;\n                mpn_copyi(jj[i], ii[i], size);\n            }\n   \n            mpir_fft_trunc_sqrt2(ii, n, w, &t1, &t2, &s1, trunc);\n            mpir_ifft_trunc_sqrt2(ii, n, w, &t1, &t2, &s1, trunc);\n            for (i = 0; i < trunc; i++)\n            {\n                mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 2);\n                mpn_normmod_2expp1(ii[i], limbs);\n            }\n\n            for (i = 0; i < trunc; i++)\n            {\n                if (mpn_cmp(ii[i], jj[i], size) != 0)\n                {\n                    printf(\"FAIL:\\n\");\n                    printf(\"n = %ld, trunc = %ld\\n\", n, trunc);\n                    printf(\"Error in entry %ld\\n\", i);\n                    abort();\n                }\n            }\n\n            free(ii);\n            free(jj);\n        }\n    }\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-mul_2expmod_2expp1.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* set p = 2^wn + 1 */\nvoid set_p(mpz_t p, mp_size_t n, mp_bitcnt_t w)\n{\n   mpz_set_ui(p, 1);\n   mpz_mul_2exp(p, p, n*w);\n   mpz_add_ui(p, p, 1);\n}\n\nint\nmain(void)\n{\n    mp_bitcnt_t bits;\n    mp_size_t j, k, n, w, limbs, d;\n    mp_limb_t * nn, * r;\n    mpz_t p, m1, m2, mn1, mn2;\n\n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    mpz_init(m1);\n    mpz_init(m2);\n    mpz_init(mn1);\n    mpz_init(mn2);\n    mpz_init(p);\n\n    /* normalisation mod p = 2^wn + 1 where B divides nw and n is a power of 2 */\n    for (bits = GMP_LIMB_BITS; bits < 16*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)\n    {\n        for (j = 1; j < 16; j++)\n        {\n            for (k = 1; k <= GMP_LIMB_BITS; k <<= 1)\n            {\n                for (d = 0; d < GMP_LIMB_BITS; d++)\n                {\n                    n = bits/k;\n                    w = j*k;\n                    limbs = (n*w)/GMP_LIMB_BITS;\n            \n                    nn = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    r  = malloc((limbs + 1)*sizeof(mp_limb_t));\n                    mpir_random_fermat(nn, state, limbs);\n                    mpir_fermat_to_mpz(mn1, nn, limbs);\n                    set_p(p, n, w);\n            \n                    mpn_mul_2expmod_2expp1(r, nn, limbs, d);\n                    mpir_fermat_to_mpz(m2, r, limbs);\n                    mpz_mod(m2, m2, p);\n                    \n                    mpz_mod(m1, mn1, p);\n                    mpz_mul_2exp(m1, m1, d);\n                    mpz_mod(m1, m1, p);\n                    \n                    if (mpz_cmp(m1, m2) != 0)\n                    {\n                        printf(\"FAIL:\\n\");\n                        printf(\"mpn_mul_2expmod_2expp1 error\\n\");\n                        gmp_printf(\"want %Zx\\n\\n\", m1);\n                        gmp_printf(\"got  %Zx\\n\", m2);\n                        abort();\n                    }\n                }\n\n                free(nn);\n                free(r);\n            }\n        }\n    }\n\n    mpz_clear(mn2);\n    mpz_clear(mn1);\n    mpz_clear(m2);\n    mpz_clear(m1);\n    mpz_clear(p);\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-mul_fft_main.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain(void)\n{\n    mp_bitcnt_t depth, w;\n    \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    for (depth = 6; depth <= 13; depth++)\n    {\n        for (w = 1; w <= 3 - (depth >= 12); w++)\n        {\n            int iter = 1 + 200*(depth <= 8) + 80*(depth <= 9) + 10*(depth <= 10), i;\n            \n            for (i = 0; i < iter; i++)\n            {\n               mp_size_t n = (((mp_limb_t)1)<<depth);\n               mp_bitcnt_t bits1 = (n*w - (depth + 1))/2;\n               mp_size_t len1;\n               mp_size_t len2;\n\n               mp_bitcnt_t b1, b2;\n               mp_size_t n1, n2;\n               mp_size_t j;\n               mp_limb_t rr, * i1, *i2, *r1, *r2;\n\n               mpn_rrandom(&rr, state, 1);\n               len1 = 2*n + rr % (2 * n) + 1;\n               mpn_rrandom(&rr, state, 1);\n               len2 = 2*n + 2 - len1 + rr % (2 * n);\n               b1 = len1 * bits1;\n               if (len2 <= 0)\n               {\n                    mpn_rrandom(&rr, state, 1);\n                    len2 = 2*n + rr % (2 * n) + 1;\n               }\n               b2 = len2*bits1;\n               \n               n1 = (b1 - 1)/GMP_LIMB_BITS + 1;\n               n2 = (b2 - 1)/GMP_LIMB_BITS + 1;\n                    \n               if (n1 < n2) /* ensure b1 >= b2 */\n               {\n                  mp_size_t t = n1;\n                  mp_bitcnt_t tb = b1;\n                  n1 = n2;\n                  b1 = b2;\n                  n2 = t;\n                  b2 = tb;\n               }\n\n               i1 = malloc(3*(n1 + n2)*sizeof(mp_limb_t));\n               i2 = i1 + n1;\n               r1 = i2 + n2;\n               r2 = r1 + n1 + n2;\n  \n               mpn_urandomb(i1, state, b1);\n               mpn_urandomb(i2, state, b2);\n  \n               if (ABOVE_THRESHOLD (n1 + n2, 2*MUL_FFT_FULL_THRESHOLD) && \n                  n2 >= MUL_KARATSUBA_THRESHOLD && n1*5 <= n2*11)\n                   mpn_toom8h_mul(r2, i1, n1, i2, n2);\n               else\n                   mpn_mul(r2, i1, n1, i2, n2);\n               mpn_mul_fft_main(r1, i1, n1, i2, n2);\n               \n               for (j = 0; j < n1 + n2; j++)\n               {\n                   if (r1[j] != r2[j]) \n                   {\n                       gmp_printf(\"error in limb %Md, %Mx != %Mx\\n\", j, r1[j], r2[j]);\n                       abort();\n                   }\n               }\n\n               free(i1);\n            }\n        }\n    }\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-mul_mfa_trunc_sqrt2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain(void)\n{\n    mp_bitcnt_t depth, w;\n    \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    for (depth = 6; depth <= 13; depth++)\n    {\n        for (w = 1; w <= 3 - (depth >= 12); w++)\n        {\n            mp_size_t n = (((mp_limb_t)1)<<depth);\n            mp_bitcnt_t bits1 = (n*w - (depth + 1))/2; \n            mp_size_t int_limbs;\n            mp_limb_t trunc;\n            mp_bitcnt_t bits;\n            mp_size_t j;\n            mp_limb_t * i1, *i2, *r1, *r2;\n\n            mpn_rrandom(&trunc, state, 1);\n            trunc = 2 * n + 2 * (trunc % n) + 2; /* trunc is even */\n            bits = (trunc/2)*bits1;\n            int_limbs = (bits - 1)/GMP_LIMB_BITS + 1;\n            i1 = malloc(6*int_limbs*sizeof(mp_limb_t));\n            i2 = i1 + int_limbs;\n            r1 = i2 + int_limbs;\n            r2 = r1 + 2*int_limbs;\n   \n            mpn_urandomb(i1, state, int_limbs*GMP_LIMB_BITS);\n            mpn_urandomb(i2, state, int_limbs*GMP_LIMB_BITS);\n  \n            if (ABOVE_THRESHOLD (int_limbs, MUL_FFT_FULL_THRESHOLD))\n               mpn_toom8h_mul(r2, i1, int_limbs, i2, int_limbs);\n            else\n               mpn_mul(r2, i1, int_limbs, i2, int_limbs);\n            mpn_mul_mfa_trunc_sqrt2(r1, i1, int_limbs, i2, int_limbs, depth, w);\n            \n            for (j = 0; j < 2*int_limbs; j++)\n            {\n                if (r1[j] != r2[j]) \n                {\n                    gmp_printf(\"error in limb %ld, %Mx != %Mx\\n\", j, r1[j], r2[j]);\n                    abort();\n                }\n            }\n\n            free(i1);\n        }\n    }\n\n    /* test squaring */\n    for (depth = 6; depth <= 13; depth++)\n    {\n        for (w = 1; w <= 3 - (depth >= 12); w++)\n        {\n            mp_size_t n = (((mp_limb_t)1)<<depth);\n            mp_bitcnt_t bits1 = (n*w - (depth + 1))/2; \n            mp_limb_t trunc;\n            mp_bitcnt_t bits;\n            mp_size_t int_limbs;\n            mp_size_t j;\n            mp_limb_t * i1, *r1, *r2;\n        \n            mpn_rrandom(&trunc, state, 1);\n            trunc = 2*n + 2 * (trunc % n) + 2; /* trunc is even */\n            bits = (trunc/2)*bits1;\n            int_limbs = (bits - 1)/GMP_LIMB_BITS + 1;\n            i1 = malloc(5*int_limbs*sizeof(mp_limb_t));\n            r1 = i1 + int_limbs;\n            r2 = r1 + 2*int_limbs;\n   \n            mpn_urandomb(i1, state, int_limbs*GMP_LIMB_BITS);\n            \n            if (ABOVE_THRESHOLD (int_limbs, MUL_FFT_FULL_THRESHOLD))\n               mpn_toom8h_mul(r2, i1, int_limbs, i1, int_limbs);\n            else\n               mpn_mul(r2, i1, int_limbs, i1, int_limbs);\n            mpn_mul_mfa_trunc_sqrt2(r1, i1, int_limbs, i1, int_limbs, depth, w);\n            \n            for (j = 0; j < 2*int_limbs; j++)\n            {\n                if (r1[j] != r2[j]) \n                {\n                    gmp_printf(\"error in limb %ld, %Mx != %Mx\\n\", j, r1[j], r2[j]);\n                    abort();\n                }\n            }\n\n            free(i1);\n        }\n    }\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-mul_trunc_sqrt2.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain(void)\n{\n    mp_bitcnt_t depth, w;\n    \n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    for (depth = 6; depth <= 12; depth++)\n    {\n        for (w = 1; w <= 5; w++)\n        {\n            mp_size_t n = (((mp_limb_t)1)<<depth);\n            mp_bitcnt_t bits1 = (n*w - (depth + 1))/2; \n            mp_limb_t trunc;\n            mp_bitcnt_t bits;\n            mp_size_t int_limbs;\n            mp_size_t j;\n            mp_limb_t * i1, *i2, *r1, *r2;\n        \n            mpn_rrandom(&trunc, state, 1);\n            trunc = 2 * n + 2 * (trunc % n) + 2; /* trunc is even */\n            bits = (trunc/2)*bits1;\n            int_limbs = (bits - 1)/GMP_LIMB_BITS + 1;\n\n            i1 = malloc(6*int_limbs*sizeof(mp_limb_t));\n            i2 = i1 + int_limbs;\n            r1 = i2 + int_limbs;\n            r2 = r1 + 2*int_limbs;\n   \n            mpn_urandomb(i1, state, int_limbs*GMP_LIMB_BITS);\n            mpn_urandomb(i2, state, int_limbs*GMP_LIMB_BITS);\n  \n            if (ABOVE_THRESHOLD (int_limbs, MUL_FFT_FULL_THRESHOLD))\n               mpn_toom8h_mul(r2, i1, int_limbs, i2, int_limbs);\n            else\n               mpn_mul(r2, i1, int_limbs, i2, int_limbs);\n            mpn_mul_trunc_sqrt2(r1, i1, int_limbs, i2, int_limbs, depth, w);\n            \n            for (j = 0; j < 2*int_limbs; j++)\n            {\n                if (r1[j] != r2[j]) \n                {\n                    gmp_printf(\"error in limb %ld, %Mx != %Mx\\n\", j, r1[j], r2[j]);\n                    abort();\n                }\n            }\n\n            free(i1);\n        }\n    }\n\n    /* test squaring */\n    for (depth = 6; depth <= 12; depth++)\n    {\n        for (w = 1; w <= 5; w++)\n        {\n            mp_size_t n = (((mp_limb_t)1)<<depth);\n            mp_bitcnt_t bits1 = (n*w - (depth + 1))/2; \n            mp_limb_t trunc;\n            mp_bitcnt_t bits;\n            mp_size_t int_limbs;\n            mp_size_t j;\n            mp_limb_t * i1, *r1, *r2;\n        \n            mpn_rrandom(&trunc, state, 1);\n            trunc = 2*n + 2 * (trunc % n) + 2; /* trunc is even */\n            bits = (trunc/2)*bits1;\n            int_limbs = (bits - 1)/GMP_LIMB_BITS + 1;\n\n            i1 = malloc(5*int_limbs*sizeof(mp_limb_t));\n            r1 = i1 + int_limbs;\n            r2 = r1 + 2*int_limbs;\n   \n            mpn_urandomb(i1, state, int_limbs*GMP_LIMB_BITS);\n            \n            if (ABOVE_THRESHOLD (int_limbs, MUL_FFT_FULL_THRESHOLD))\n               mpn_toom8h_mul(r2, i1, int_limbs, i1, int_limbs);\n            else\n               mpn_mul(r2, i1, int_limbs, i1, int_limbs);\n            mpn_mul_trunc_sqrt2(r1, i1, int_limbs, i1, int_limbs, depth, w);\n            \n            for (j = 0; j < 2*int_limbs; j++)\n            {\n                if (r1[j] != r2[j]) \n                {\n                    gmp_printf(\"error in limb %ld, %Mx != %Mx\\n\", j, r1[j], r2[j]);\n                    abort();\n                }\n            }\n\n            free(i1);\n        }\n    }\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-mulmod_2expp1.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain(void)\n{\n    mp_bitcnt_t depth, w;\n    int iters;\n\n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    for (iters = 0; iters < 100; iters++)\n    {\n        for (depth = 6; depth <= 18; depth++)\n        {\n            for (w = 1; w <= 2; w++)\n            {\n                mp_size_t n = (((mp_limb_t)1)<<depth);\n                mp_bitcnt_t bits = n*w;\n                mp_size_t int_limbs = bits/GMP_LIMB_BITS;\n                mp_size_t j;\n                mp_limb_t c, * i1, * i2, * r1, * r2, * tt;\n        \n                i1 = malloc(6*(int_limbs+1)*sizeof(mp_limb_t));\n                i2 = i1 + int_limbs + 1;\n                r1 = i2 + int_limbs + 1;\n                r2 = r1 + int_limbs + 1;\n                tt = r2 + int_limbs + 1;\n\n                mpir_random_fermat(i1, state, int_limbs);\n                mpir_random_fermat(i2, state, int_limbs);\n                mpn_normmod_2expp1(i1, int_limbs);\n                mpn_normmod_2expp1(i2, int_limbs);\n\n                mpn_mulmod_Bexpp1(r2, i1, i2, n * w / GMP_LIMB_BITS, tt);\n                c = 2*i1[int_limbs] + i2[int_limbs];\n                c = mpn_mulmod_2expp1_basecase(r1, i1, i2, c, int_limbs*GMP_LIMB_BITS, tt);\n            \n                for (j = 0; j < int_limbs; j++)\n                {\n                    if (r1[j] != r2[j]) \n                    {\n                        gmp_printf(\"error in limb %ld, %Mx != %Mx\\n\", j, r1[j], r2[j]);\n                        abort();\n                    }\n                }\n\n                if (c != r2[int_limbs])\n                {\n                    gmp_printf(\"error in limb %ld, %Mx != %Mx\\n\", j, c, r2[j]);\n                    abort();\n                }\n\n                free(i1);\n            }\n        }\n    }\n\n    /* test squaring */\n    for (iters = 0; iters < 100; iters++)\n    {\n        for (depth = 6; depth <= 18; depth++)\n        {\n            for (w = 1; w <= 2; w++)\n            {\n                mp_size_t n = (((mp_limb_t)1)<<depth);\n                mp_bitcnt_t bits = n*w;\n                mp_size_t int_limbs = bits/GMP_LIMB_BITS;\n                mp_size_t j;\n                mp_limb_t c, * i1, * r1, * r2, * tt;\n        \n                i1 = malloc(5*(int_limbs+1)*sizeof(mp_limb_t));\n                r1 = i1 + int_limbs + 1;\n                r2 = r1 + int_limbs + 1;\n               tt = r2 + int_limbs + 1;\n\n                mpir_random_fermat(i1, state, int_limbs);\n                mpn_normmod_2expp1(i1, int_limbs);\n                \n                mpn_mulmod_Bexpp1(r2, i1, i1, n * w / GMP_LIMB_BITS, tt);\n                c = i1[int_limbs] + 2*i1[int_limbs];\n                c = mpn_mulmod_2expp1_basecase(r1, i1, i1, c, int_limbs*GMP_LIMB_BITS, tt);\n            \n                for (j = 0; j < int_limbs; j++)\n                {\n                    if (r1[j] != r2[j]) \n                    {\n                        gmp_printf(\"error in limb %ld, %Mx != %Mx\\n\", j, r1[j], r2[j]);\n                        abort();\n                    }\n                }\n\n                if (c != r2[int_limbs])\n                {\n                    gmp_printf(\"error in limb %ld, %Mx != %Mx\\n\", j, c, r2[j]);\n                    abort();\n                }\n\n                free(i1);\n            }\n        }\n    }\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-normmod_2expp1.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* set p = 2^wn + 1 */\nvoid set_p(mpz_t p, mp_size_t n, mp_bitcnt_t w)\n{\n   mpz_set_ui(p, 1);\n   mpz_mul_2exp(p, p, n*w);\n   mpz_add_ui(p, p, 1);\n}\n\nint\nmain(void)\n{\n    mp_bitcnt_t bits;\n    mp_size_t j, k, n, w, limbs;\n    mp_limb_t * nn;\n    mpz_t p, m1, m2;\n\n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    mpz_init(m1);\n    mpz_init(m2);\n    mpz_init(p);\n\n    /* normalisation mod p = 2^wn + 1 where B divides nw and n is a power of 2 */\n    for (bits = GMP_LIMB_BITS; bits < 32*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)\n    {\n        for (j = 1; j < 32; j++)\n        {\n            for (k = 1; k <= GMP_NUMB_BITS; k <<= 1)\n            {\n                n = bits/k;\n                w = j*k;\n                limbs = (n*w)/GMP_LIMB_BITS;\n            \n                nn = malloc((limbs + 1)*sizeof(mp_limb_t));\n                mpn_rrandom(nn, state, limbs + 1);\n                mpir_fermat_to_mpz(m1, nn, limbs);\n                set_p(p, n, w);\n            \n                mpn_normmod_2expp1(nn, limbs);\n                mpir_fermat_to_mpz(m2, nn, limbs);\n                mpz_mod(m1, m1, p);\n\n                if (mpz_cmp(m1, m2) != 0)\n                {\n                    printf(\"FAIL:\\n\");\n                    printf(\"mpn_normmod_2expp1 error\\n\");\n                    gmp_printf(\"want %Zx\\n\\n\", m1);\n                    gmp_printf(\"got  %Zx\\n\", m2);\n                    abort();\n                }\n\n                free(nn);\n            }\n        }\n    }\n\n    mpz_clear(m2);\n    mpz_clear(m1);\n    mpz_clear(p);\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/fft/t-split_combine_bits.c",
    "content": "/* \n\nCopyright 2009, 2011 William Hart. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are\npermitted provided that the following conditions are met:\n\n   1. Redistributions of source code must retain the above copyright notice, this list of\n      conditions and the following disclaimer.\n\n   2. Redistributions in binary form must reproduce the above copyright notice, this list\n      of conditions and the following disclaimer in the documentation and/or other materials\n      provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED\nWARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\nADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those of the\nauthors and should not be interpreted as representing official policies, either expressed\nor implied, of William Hart.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <mpir.h>\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain(void)\n{\n    int i;\n    mp_size_t j;\n\n    gmp_randstate_t state;\n\n    tests_start();\n    fflush(stdout);\n\n    gmp_randinit_default(state);\n\n    for (i = 0; i < 10000; i++)\n    {\n        mp_limb_t total_limbs;\n        mp_limb_t * in;\n        mp_limb_t * out;\n        mp_bitcnt_t bits;\n        mp_size_t limbs;\n        long length;\n        mp_limb_t ** poly;\n        \n        mpn_rrandom(&total_limbs, state, 1);\n        total_limbs = total_limbs % 1000 + 1;\n        in = malloc(total_limbs*sizeof(mp_limb_t));\n        out = calloc(total_limbs, sizeof(mp_limb_t));\n        mpn_rrandom(&bits, state, 1);\n        bits = bits % 200 + 1;\n        limbs = (2*bits - 1)/GMP_LIMB_BITS + 1;\n        length = (total_limbs*GMP_LIMB_BITS - 1)/bits + 1;\n        \n        poly = malloc(length*sizeof(mp_limb_t *));\n        for (j = 0; j < length; j++)\n           poly[j] = malloc((limbs + 1)*sizeof(mp_limb_t));\n\n        mpn_urandomb(in, state, total_limbs*GMP_LIMB_BITS);\n\n        mpir_fft_split_bits(poly, in, total_limbs, bits, limbs);\n        mpir_fft_combine_bits(out, poly, length, bits, limbs, total_limbs);\n        \n        for (j = 0; j < total_limbs; j++)\n        {\n           if (in[j] != out[j])\n           {\n              printf(\"FAIL:\\n\");\n              gmp_printf(\"Error in limb %ld, %Mu != %Mu\\n\", j, in[j], out[j]);\n              abort();\n           }\n        }\n\n        free(in);\n        free(out);\n\n        for (j = 0; j < length; j++)\n           free(poly[j]);\n\n        free(poly);\n    }\n\n    gmp_randclear(state);\n    \n    tests_end();\n    return 0;\n}\n"
  },
  {
    "path": "tests/memory.c",
    "content": "/* Memory allocation used during tests.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>  /* for abort */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n\n/* Each block allocated is a separate malloc, for the benefit of a redzoning\n   malloc debugger during development or when bug hunting.\n\n   Sizes passed when reallocating or freeing are checked (the default\n   routines don't care about these).\n\n   Memory leaks are checked by requiring that all blocks have been freed\n   when tests_memory_end() is called.  Test programs must be sure to have\n   \"clear\"s for all temporary variables used.  */\n\n\nstruct header {\n  void           *ptr;\n  size_t         size;\n  struct header  *next;\n};\n\nstruct header  *tests_memory_list = NULL;\n\n/* Return a pointer to a pointer to the found block (so it can be updated\n   when unlinking). */\nstruct header **\ntests_memory_find (void *ptr)\n{\n  struct header  **hp;\n\n  for (hp = &tests_memory_list; *hp != NULL; hp = &((*hp)->next))\n    if ((*hp)->ptr == ptr)\n      return hp;\n\n  return NULL;\n}\n\nint\ntests_memory_valid (void *ptr)\n{\n  return (tests_memory_find (ptr) != NULL);\n}\n\nvoid *\ntests_allocate (size_t size)\n{\n  struct header  *h;\n\n  if (size == 0)\n    {\n      printf (\"tests_allocate(): attempt to allocate 0 bytes\\n\");\n      abort ();\n    }\n\n  h = (struct header *) __gmp_default_allocate (sizeof (*h));\n  h->next = tests_memory_list;\n  tests_memory_list = h;\n\n  h->size = size;\n  h->ptr = __gmp_default_allocate (size);\n  return h->ptr;\n}\n\nvoid *\ntests_reallocate (void *ptr, size_t old_size, size_t new_size)\n{\n  struct header  **hp, *h;\n\n  if (new_size == 0)\n    {\n      printf (\"tests_reallocate(): attempt to reallocate %p to 0 bytes\\n\",\n              ptr);\n      abort ();\n    }\n\n  hp = tests_memory_find (ptr);\n  if (hp == NULL)\n    {\n      printf (\"tests_reallocate(): attempt to reallocate bad pointer %p\\n\",\n              ptr);\n      abort ();\n    }\n  h = *hp;\n\n  if (h->size != old_size)\n    {\n      printf (\"tests_reallocate(): bad old size %lu, should be %lu\\n\",\n              old_size, h->size);\n      abort ();\n    }\n\n  h->size = new_size;\n  h->ptr = __gmp_default_reallocate (ptr, old_size, new_size);\n  return h->ptr;\n}\n\nstruct header **\ntests_free_find (void *ptr)\n{\n  struct header  **hp = tests_memory_find (ptr);\n  if (hp == NULL)\n    {\n      printf (\"tests_free(): attempt to free bad pointer %p\\n\",\n              ptr);\n      abort ();\n    }\n  return hp;\n}\n\nvoid\ntests_free_nosize (void *ptr)\n{\n  struct header  **hp = tests_free_find (ptr);\n  struct header  *h = *hp;\n\n  *hp = h->next;  /* unlink */\n\n  __gmp_default_free (ptr, h->size);\n  __gmp_default_free (h, sizeof (*h));\n}\n\nvoid\ntests_free (void *ptr, size_t size)\n{\n  struct header  **hp = tests_free_find (ptr);\n  struct header  *h = *hp;\n\n  if (h->size != size)\n    {\n      printf (\"tests_free(): bad size %lu, should be %lu\\n\", size, h->size);\n      abort ();\n    }\n\n  tests_free_nosize (ptr);\n}\n\nvoid\ntests_memory_start (void)\n{\n  mp_set_memory_functions (tests_allocate, tests_reallocate, tests_free);\n}\n\nvoid\ntests_memory_end (void)\n{\n  if (tests_memory_list != NULL)\n    {\n      struct header  *h;\n      unsigned  count;\n\n      printf (\"tests_memory_end(): not all memory freed\\n\");\n\n      count = 0;\n      for (h = tests_memory_list; h != NULL; h = h->next)\n        count++;\n\n      printf (\"    %u blocks remaining\\n\", count);\n      abort ();\n    }\n}\n"
  },
  {
    "path": "tests/misc/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 2001, 2002 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/tests\nLDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmpir.la\n\ncheck_PROGRAMS = t-locale t-printf t-scanf \nTESTS = $(check_PROGRAMS)\n\n# Temporary files used by the tests.  Removed automatically if the tests\n# pass, but ensure they're cleaned if they fail.\n#\nCLEANFILES = *.tmp\n\n$(top_builddir)/tests/libtests.la:\n\tcd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la\n"
  },
  {
    "path": "tests/misc/t-locale.c",
    "content": "/* Test locale support, or attempt to do so.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#ifndef _GNU_SOURCE\n#define _GNU_SOURCE    /* for DECIMAL_POINT in glibc langinfo.h */\n#endif\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#if HAVE_NL_TYPES_H\n#include <nl_types.h>  /* for nl_item (on netbsd 1.4.1 at least) */\n#endif\n\n#if HAVE_LANGINFO_H\n#include <langinfo.h>  /* for nl_langinfo */\n#endif\n\n#if HAVE_LOCALE_H\n#include <locale.h>    /* for lconv */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nchar *decimal_point;\n\n/* Replace the libc localeconv with one we can manipulate. */\n/*\n  The t-local test fails on MinGW, MinGW-W64 and Microsoft C/C++\n  compilers because of their own 'localeconv' implementation.\n  The goal is to detect those compilers and skip 'localeconv'\n  redefinition so the 't-locale' test ends up in\n     printf(\"Test skipped...\")\n  which shows that replacing 'localeconv' is disabled (on Windows).\n  It's best in this case to use '__MINGW32__' macro, which is\n  common for all\n  MinGW (i686), MinGW-W64 (i686) and MinGW-W64 (x86_64), and '_MSC_VER'\n  for Microsoft C/C++ Compiler, details at\n  http://sourceforge.net/p/predef/wiki/Compilers/\n*/\n\n#if HAVE_LOCALECONV && ! (defined(__MINGW32__) || defined(_MSC_VER))\nstruct lconv *\nlocaleconv (void)\n{\n  static struct lconv  l;\n  l.decimal_point = decimal_point;\n  return &l;\n}\n#endif\n\n/* Replace the libc nl_langinfo with one we can manipulate. */\n#if HAVE_NL_LANGINFO\nchar *\nnl_langinfo (nl_item n)\n{\n#if defined (DECIMAL_POINT)\n  if (n == DECIMAL_POINT)\n    return decimal_point;\n#endif\n#if defined (RADIXCHAR)\n  if (n == RADIXCHAR)\n    return decimal_point;\n#endif\n  return \"\";\n}\n#endif\n\nvoid\ncheck_input (void)\n{\n  static char *point[] = {\n    \".\", \",\", \"WU\", \"STR\", \"ZTV***\"\n  };\n\n  static const struct {\n    const char  *str;\n    double      d;\n  } data[] = {\n\n    { \"1%s\",   1.0 },\n    { \"1%s0\",  1.0 },\n    { \"1%s00\", 1.0 },\n\n    { \"%s5\",    0.5 },\n    { \"0%s5\",   0.5 },\n    { \"00%s5\",  0.5 },\n    { \"00%s50\", 0.5 },\n\n    { \"1%s5\",    1.5 },\n    { \"1%s5e1\", 15.0 },\n  };\n\n  int     i, j, neg, ret;\n  char    str[128];\n  mpf_t   f;\n  double  d;\n\n  mpf_init (f);\n\n  for (i = 0; i < numberof (point); i++)\n    {\n      decimal_point = point[i];\n\n      for (neg = 0; neg <= 1; neg++)\n        {\n          for (j = 0; j < numberof (data); j++)\n            {\n              strcpy (str, neg ? \"-\" : \"\");\n              sprintf (str+strlen(str), data[j].str, decimal_point);\n\n              d = data[j].d;\n              if (neg)\n                d = -d;\n\n              mpf_set_d (f, 123.0);\n              if (mpf_set_str (f, str, 10) != 0)\n                {\n                  printf (\"mpf_set_str error\\n\");\n                  printf (\"  point  %s\\n\", decimal_point);\n                  printf (\"  str    %s\\n\", str);\n                  abort ();\n                }\n              if (mpf_cmp_d (f, d) != 0)\n                {\n                  printf    (\"mpf_set_str wrong result\\n\");\n                  printf    (\"  point  %s\\n\", decimal_point);\n                  printf    (\"  str    %s\\n\", str);\n                  mpf_trace (\"  f\", f);\n                  printf    (\"  d=%g\\n\", d);\n                  abort ();\n                }\n\n              mpf_set_d (f, 123.0);\n              ret = gmp_sscanf (str, \"%Ff\", f);\n              if (ret != 1)\n                {\n                  printf (\"gmp_sscanf wrong return value\\n\");\n                  printf (\"  point  %s\\n\", decimal_point);\n                  printf (\"  str    %s\\n\", str);\n                  printf (\"  ret    %d\\n\", ret);\n                  abort ();\n                }\n              if (mpf_cmp_d (f, d) != 0)\n                {\n                  printf    (\"gmp_sscanf wrong result\\n\");\n                  printf    (\"  point  %s\\n\", decimal_point);\n                  printf    (\"  str    %s\\n\", str);\n                  mpf_trace (\"  f\", f);\n                  printf    (\"  d=%g\\n\", d);\n                  abort ();\n                }\n            }\n        }\n    }\n  mpf_clear (f);\n}\n\nint\nmain (void)\n{\n  /* The localeconv replacement breaks printf \"%lu\" on SunOS 4, so we can't\n     print the seed in tests_rand_start().  Nothing random is used in this\n     program though, so just use the memory tests alone.  */\n  tests_memory_start ();\n\n  {\n    mpf_t  f;\n    char   buf[128];\n    mpf_init (f);\n    decimal_point = \",\";\n    mpf_set_d (f, 1.5);\n    gmp_snprintf (buf, sizeof(buf), \"%.1Ff\", f);\n    mpf_clear (f);\n    if (strcmp (buf, \"1,5\") != 0)\n      {\n        printf (\"Test skipped, replacing localeconv/nl_langinfo doesn't work\\n\");\n        goto done;\n      }\n  }\n\n  check_input ();\n\n done:\n  tests_memory_end ();\n  exit (0);\n}\n\n"
  },
  {
    "path": "tests/misc/t-printf.c",
    "content": "/* Test gmp_printf and related functions.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n/* Usage: t-printf [-s]\n\n   -s  Check the data against the system printf, where possible.  This is\n       only an option since we don't want to fail if the system printf is\n       faulty or strange.  */\n\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stddef.h>    /* for ptrdiff_t */\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#if HAVE_OBSTACK_VPRINTF\n#define obstack_chunk_alloc tests_allocate\n#define obstack_chunk_free  tests_free_nosize\n#include <obstack.h>\n#endif\n\n#if HAVE_INTTYPES_H\n# include <inttypes.h> /* for intmax_t */\n#else\n# if HAVE_STDINT_H\n#  include <stdint.h>\n# endif\n#endif\n\n#if HAVE_UNISTD_H\n#include <unistd.h>  /* for unlink */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nint   option_check_printf = 0;\n\n\n#define CHECK_VFPRINTF_FILENAME  \"t-printf.tmp\"\nFILE  *check_vfprintf_fp;\n\n\n/* From any of the tests run here. */\n#define MAX_OUTPUT  1024\n\n\nvoid\n#if HAVE_STDARG\ncheck_plain (const char *want, const char *fmt_orig, ...)\n#else\ncheck_plain (va_alist)\n     va_dcl\n#endif\n{\n  char        got[MAX_OUTPUT];\n  int         got_len, want_len;\n  size_t      fmtsize;\n  char        *fmt, *q;\n  const char  *p;\n  va_list     ap;\n#if HAVE_STDARG\n  va_start (ap, fmt_orig);\n#else\n  const char  *want;\n  const char  *fmt_orig;\n  va_start (ap);\n  want = va_arg (ap, const char *);\n  fmt_orig = va_arg (ap, const char *);\n#endif\n\n  if (! option_check_printf)\n    return;\n\n  fmtsize = strlen (fmt_orig) + 1;\n  fmt = (*__gmp_allocate_func) (fmtsize);\n\n  for (p = fmt_orig, q = fmt; *p != '\\0'; p++)\n    {\n      switch (*p) {\n      case 'a':\n      case 'A':\n        /* The exact value of the exponent isn't guaranteed in glibc, and it\n           and gmp_printf do slightly different things, so don't compare\n           directly. */\n        goto done;\n      case 'F':\n        if (p > fmt_orig && *(p-1) == '.')\n          goto done;  /* don't test the \"all digits\" cases */\n        /* discard 'F' type */\n        break;\n      case 'Z':\n        /* transmute */\n        *q++ = 'l';\n        break;\n      default:\n        *q++ = *p;\n        break;\n      }\n    }\n  *q = '\\0';\n\n  want_len = strlen (want);\n  ASSERT_ALWAYS (want_len < sizeof(got));\n\n  got_len = vsprintf (got, fmt, ap);\n\n  if (got_len != want_len || strcmp (got, want) != 0)\n    {\n      printf (\"wanted data doesn't match plain vsprintf\\n\");\n      printf (\"  fmt      |%s|\\n\", fmt);\n      printf (\"  got      |%s|\\n\", got);\n      printf (\"  want     |%s|\\n\", want);\n      printf (\"  got_len  %d\\n\", got_len);\n      printf (\"  want_len %d\\n\", want_len);\n      abort ();\n    }\n\n done:\n  (*__gmp_free_func) (fmt, fmtsize);\n}\n\nvoid\ncheck_vsprintf (const char *want, const char *fmt, va_list ap)\n{\n  char  got[MAX_OUTPUT];\n  int   got_len, want_len;\n\n  want_len = strlen (want);\n  got_len = gmp_vsprintf (got, fmt, ap);\n\n  if (got_len != want_len || strcmp (got, want) != 0)\n    {\n      printf (\"gmp_vsprintf wrong\\n\");\n      printf (\"  fmt      |%s|\\n\", fmt);\n      printf (\"  got      |%s|\\n\", got);\n      printf (\"  want     |%s|\\n\", want);\n      printf (\"  got_len  %d\\n\", got_len);\n      printf (\"  want_len %d\\n\", want_len);\n      abort ();\n    }\n}\n\nvoid\ncheck_vfprintf (const char *want, const char *fmt, va_list ap)\n{\n  char  got[MAX_OUTPUT];\n  int   got_len, want_len, fread_len;\n  long  ftell_len;\n\n  want_len = strlen (want);\n\n  rewind (check_vfprintf_fp);\n  got_len = gmp_vfprintf (check_vfprintf_fp, fmt, ap);\n  ASSERT_ALWAYS (got_len != -1);\n  ASSERT_ALWAYS (fflush (check_vfprintf_fp) == 0);\n\n  ftell_len = ftell (check_vfprintf_fp);\n  ASSERT_ALWAYS (ftell_len != -1);\n\n  rewind (check_vfprintf_fp);\n  ASSERT_ALWAYS (ftell_len <= sizeof(got));\n  fread_len = fread (got, 1, ftell_len, check_vfprintf_fp);\n\n  if (got_len != want_len\n      || ftell_len != want_len\n      || fread_len != want_len\n      || memcmp (got, want, want_len) != 0)\n    {\n      printf (\"gmp_vfprintf wrong\\n\");\n      printf (\"  fmt       |%s|\\n\", fmt);\n      printf (\"  got       |%.*s|\\n\", fread_len, got);\n      printf (\"  want      |%s|\\n\", want);\n      printf (\"  got_len   %d\\n\", got_len);\n      printf (\"  ftell_len %ld\\n\", ftell_len);\n      printf (\"  fread_len %d\\n\", fread_len);\n      printf (\"  want_len  %d\\n\", want_len);\n      abort ();\n    }\n}\n\nvoid\ncheck_vsnprintf (const char *want, const char *fmt, va_list ap)\n{\n  char    got[MAX_OUTPUT+1];\n  int     ret, got_len, want_len;\n  size_t  bufsize;\n\n  want_len = strlen (want);\n\n  bufsize = -1;\n  for (;;)\n    {\n      /* do 0 to 5, then want-5 to want+5 */\n      bufsize++;\n      if (bufsize > 5 && bufsize < want_len-5)\n        bufsize = want_len-5;\n      if (bufsize > want_len + 5)\n        break;\n      ASSERT_ALWAYS (bufsize+1 <= sizeof (got));\n\n      got[bufsize] = '!';\n      ret = gmp_vsnprintf (got, bufsize, fmt, ap);\n\n      got_len = MIN (MAX(1,bufsize)-1, want_len);\n\n      if (got[bufsize] != '!')\n        {\n          printf (\"gmp_vsnprintf overwrote bufsize sentinel\\n\");\n          goto error;\n        }\n\n      if (ret != want_len)\n        {\n          printf (\"gmp_vsnprintf return value wrong\\n\");\n          goto error;\n        }\n\n      if (bufsize > 0)\n        {\n          if (memcmp (got, want, got_len) != 0 || got[got_len] != '\\0')\n            {\n              printf (\"gmp_vsnprintf wrong result string\\n\");\n            error:\n              printf (\"  fmt       |%s|\\n\", fmt);\n              printf (\"  bufsize   %lu\\n\", bufsize);\n              printf (\"  got       |%s|\\n\", got);\n              printf (\"  want      |%.*s|\\n\", got_len, want);\n              printf (\"  want full |%s|\\n\", want);\n              printf (\"  ret       %d\\n\", ret);\n              printf (\"  want_len  %d\\n\", want_len);\n              abort ();\n            }\n        }\n    }\n}\n\nvoid\ncheck_vasprintf (const char *want, const char *fmt, va_list ap)\n{\n  char  *got;\n  int   got_len, want_len;\n\n  want_len = strlen (want);\n  got_len = gmp_vasprintf (&got, fmt, ap);\n\n  if (got_len != want_len || strcmp (got, want) != 0)\n    {\n      printf (\"gmp_vasprintf wrong\\n\");\n      printf (\"  fmt      |%s|\\n\", fmt);\n      printf (\"  got      |%s|\\n\", got);\n      printf (\"  want     |%s|\\n\", want);\n      printf (\"  got_len  %d\\n\", got_len);\n      printf (\"  want_len %d\\n\", want_len);\n      abort ();\n    }\n  (*__gmp_free_func) (got, strlen(got)+1);\n}\n\nvoid\ncheck_obstack_vprintf (const char *want, const char *fmt, va_list ap)\n{\n#if HAVE_OBSTACK_VPRINTF\n  struct obstack  ob;\n  int   got_len, want_len, ob_len;\n  char  *got;\n\n  want_len = strlen (want);\n\n  obstack_init (&ob);\n  got_len = gmp_obstack_vprintf (&ob, fmt, ap);\n  got = obstack_base (&ob);\n  ob_len = obstack_object_size (&ob);\n\n  if (got_len != want_len\n      || ob_len != want_len\n      || memcmp (got, want, want_len) != 0)\n    {\n      printf (\"gmp_obstack_vprintf wrong\\n\");\n      printf (\"  fmt      |%s|\\n\", fmt);\n      printf (\"  got      |%s|\\n\", got);\n      printf (\"  want     |%s|\\n\", want);\n      printf (\"  got_len  %d\\n\", got_len);\n      printf (\"  ob_len   %d\\n\", ob_len);\n      printf (\"  want_len %d\\n\", want_len);\n      abort ();\n    }\n  obstack_free (&ob, NULL);\n#endif\n}\n\n\nvoid\n#if HAVE_STDARG\ncheck_one (const char *want, const char *fmt, ...)\n#else\ncheck_one (va_alist)\n     va_dcl\n#endif\n{\n  va_list ap;\n#if HAVE_STDARG\n  va_start (ap, fmt);\n#else\n  const char  *want;\n  const char  *fmt;\n  va_start (ap);\n  want = va_arg (ap, const char *);\n  fmt = va_arg (ap, const char *);\n#endif\n\n  /* simplest first */\n  check_vsprintf (want, fmt, ap);\n  check_vfprintf (want, fmt, ap);\n  check_vsnprintf (want, fmt, ap);\n  check_vasprintf (want, fmt, ap);\n  check_obstack_vprintf (want, fmt, ap);\n}\n\n\n#define hex_or_octal_p(fmt)             \\\n  (strchr (fmt, 'x') != NULL            \\\n   || strchr (fmt, 'X') != NULL         \\\n   || strchr (fmt, 'o') != NULL)\n\nvoid\ncheck_z (void)\n{\n  static const struct {\n    const char  *fmt;\n    const char  *z;\n    const char  *want;\n  } data[] = {\n    { \"%Zd\", \"0\",    \"0\" },\n    { \"%Zd\", \"1\",    \"1\" },\n    { \"%Zd\", \"123\",  \"123\" },\n    { \"%Zd\", \"-1\",   \"-1\" },\n    { \"%Zd\", \"-123\", \"-123\" },\n\n    { \"%+Zd\", \"0\",      \"+0\" },\n    { \"%+Zd\", \"123\",  \"+123\" },\n    { \"%+Zd\", \"-123\", \"-123\" },\n\n    { \"%Zx\",  \"123\",   \"7b\" },\n    { \"%ZX\",  \"123\",   \"7B\" },\n    { \"%Zx\", \"-123\",  \"-7b\" },\n    { \"%ZX\", \"-123\",  \"-7B\" },\n    { \"%Zo\",  \"123\",  \"173\" },\n    { \"%Zo\", \"-123\", \"-173\" },\n\n    { \"%#Zx\",    \"0\",     \"0\" },\n    { \"%#ZX\",    \"0\",     \"0\" },\n    { \"%#Zx\",  \"123\",  \"0x7b\" },\n    { \"%#ZX\",  \"123\",  \"0X7B\" },\n    { \"%#Zx\", \"-123\", \"-0x7b\" },\n    { \"%#ZX\", \"-123\", \"-0X7B\" },\n\n    { \"%#Zo\",    \"0\",     \"0\" },\n    { \"%#Zo\",  \"123\",  \"0173\" },\n    { \"%#Zo\", \"-123\", \"-0173\" },\n\n    { \"%10Zd\",      \"0\", \"         0\" },\n    { \"%10Zd\",    \"123\", \"       123\" },\n    { \"%10Zd\",   \"-123\", \"      -123\" },\n\n    { \"%-10Zd\",     \"0\", \"0         \" },\n    { \"%-10Zd\",   \"123\", \"123       \" },\n    { \"%-10Zd\",  \"-123\", \"-123      \" },\n\n    { \"%+10Zd\",   \"123\", \"      +123\" },\n    { \"%+-10Zd\",  \"123\", \"+123      \" },\n    { \"%+10Zd\",  \"-123\", \"      -123\" },\n    { \"%+-10Zd\", \"-123\", \"-123      \" },\n\n    { \"%08Zd\",    \"0\", \"00000000\" },\n    { \"%08Zd\",  \"123\", \"00000123\" },\n    { \"%08Zd\", \"-123\", \"-0000123\" },\n\n    { \"%+08Zd\",    \"0\", \"+0000000\" },\n    { \"%+08Zd\",  \"123\", \"+0000123\" },\n    { \"%+08Zd\", \"-123\", \"-0000123\" },\n\n    { \"%#08Zx\",    \"0\", \"00000000\" },\n    { \"%#08Zx\",  \"123\", \"0x00007b\" },\n    { \"%#08Zx\", \"-123\", \"-0x0007b\" },\n\n    { \"%+#08Zx\",    \"0\", \"+0000000\" },\n    { \"%+#08Zx\",  \"123\", \"+0x0007b\" },\n    { \"%+#08Zx\", \"-123\", \"-0x0007b\" },\n\n    { \"%.0Zd\", \"0\", \"\" },\n    { \"%.1Zd\", \"0\", \"0\" },\n    { \"%.2Zd\", \"0\", \"00\" },\n    { \"%.3Zd\", \"0\", \"000\" },\n  };\n\n  int        i, j;\n  mpz_t      z;\n  char       *nfmt;\n  mp_size_t  nsize, zeros;\n\n  mpz_init (z);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (z, data[i].z, 0);\n\n      /* don't try negatives or forced sign in hex or octal */\n      if (mpz_fits_slong_p (z)\n          && ! (hex_or_octal_p (data[i].fmt)\n                && (strchr (data[i].fmt, '+') != NULL || mpz_sgn(z) < 0)))\n        {\n          check_plain (data[i].want, data[i].fmt, mpz_get_si (z));\n        }\n\n      check_one (data[i].want, data[i].fmt, z);\n\n      /* Same again, with %N and possibly some high zero limbs */\n      nfmt = __gmp_allocate_strdup (data[i].fmt);\n      for (j = 0; nfmt[j] != '\\0'; j++)\n        if (nfmt[j] == 'Z')\n          nfmt[j] = 'N';\n      for (zeros = 0; zeros <= 3; zeros++)\n        {\n          nsize = ABSIZ(z)+zeros;\n          MPZ_REALLOC (z, nsize);\n          nsize = (SIZ(z) >= 0 ? nsize : -nsize);\n          refmpn_zero (PTR(z)+ABSIZ(z), zeros);\n          check_one (data[i].want, nfmt, PTR(z), nsize);\n        }\n      __gmp_free_func (nfmt, strlen(nfmt)+1);\n    }\n\n  mpz_clear (z);\n}\n\nvoid\ncheck_q (void)\n{\n  static const struct {\n    const char  *fmt;\n    const char  *q;\n    const char  *want;\n  } data[] = {\n    { \"%Qd\",    \"0\",    \"0\" },\n    { \"%Qd\",    \"1\",    \"1\" },\n    { \"%Qd\",  \"123\",  \"123\" },\n    { \"%Qd\",   \"-1\",   \"-1\" },\n    { \"%Qd\", \"-123\", \"-123\" },\n    { \"%Qd\",  \"3/2\",  \"3/2\" },\n    { \"%Qd\", \"-3/2\", \"-3/2\" },\n\n    { \"%+Qd\", \"0\",      \"+0\" },\n    { \"%+Qd\", \"123\",  \"+123\" },\n    { \"%+Qd\", \"-123\", \"-123\" },\n    { \"%+Qd\", \"5/8\",  \"+5/8\" },\n    { \"%+Qd\", \"-5/8\", \"-5/8\" },\n\n    { \"%Qx\",  \"123\",   \"7b\" },\n    { \"%QX\",  \"123\",   \"7B\" },\n    { \"%Qx\",  \"15/16\", \"f/10\" },\n    { \"%QX\",  \"15/16\", \"F/10\" },\n    { \"%Qx\", \"-123\",  \"-7b\" },\n    { \"%QX\", \"-123\",  \"-7B\" },\n    { \"%Qx\", \"-15/16\", \"-f/10\" },\n    { \"%QX\", \"-15/16\", \"-F/10\" },\n    { \"%Qo\",  \"123\",  \"173\" },\n    { \"%Qo\", \"-123\", \"-173\" },\n    { \"%Qo\",  \"16/17\",  \"20/21\" },\n    { \"%Qo\", \"-16/17\", \"-20/21\" },\n\n    { \"%#Qx\",    \"0\",     \"0\" },\n    { \"%#QX\",    \"0\",     \"0\" },\n    { \"%#Qx\",  \"123\",  \"0x7b\" },\n    { \"%#QX\",  \"123\",  \"0X7B\" },\n    { \"%#Qx\",  \"5/8\",  \"0x5/0x8\" },\n    { \"%#QX\",  \"5/8\",  \"0X5/0X8\" },\n    { \"%#Qx\", \"-123\", \"-0x7b\" },\n    { \"%#QX\", \"-123\", \"-0X7B\" },\n    { \"%#Qx\", \"-5/8\", \"-0x5/0x8\" },\n    { \"%#QX\", \"-5/8\", \"-0X5/0X8\" },\n    { \"%#Qo\",    \"0\",     \"0\" },\n    { \"%#Qo\",  \"123\",  \"0173\" },\n    { \"%#Qo\", \"-123\", \"-0173\" },\n    { \"%#Qo\",  \"5/7\",  \"05/07\" },\n    { \"%#Qo\", \"-5/7\", \"-05/07\" },\n\n    /* zero denominator and showbase */\n    { \"%#10Qo\", \"0/0\",     \"       0/0\" },\n    { \"%#10Qd\", \"0/0\",     \"       0/0\" },\n    { \"%#10Qx\", \"0/0\",     \"       0/0\" },\n    { \"%#10Qo\", \"123/0\",   \"    0173/0\" },\n    { \"%#10Qd\", \"123/0\",   \"     123/0\" },\n    { \"%#10Qx\", \"123/0\",   \"    0x7b/0\" },\n    { \"%#10QX\", \"123/0\",   \"    0X7B/0\" },\n    { \"%#10Qo\", \"-123/0\",  \"   -0173/0\" },\n    { \"%#10Qd\", \"-123/0\",  \"    -123/0\" },\n    { \"%#10Qx\", \"-123/0\",  \"   -0x7b/0\" },\n    { \"%#10QX\", \"-123/0\",  \"   -0X7B/0\" },\n\n    { \"%10Qd\",      \"0\", \"         0\" },\n    { \"%-10Qd\",     \"0\", \"0         \" },\n    { \"%10Qd\",    \"123\", \"       123\" },\n    { \"%-10Qd\",   \"123\", \"123       \" },\n    { \"%10Qd\",   \"-123\", \"      -123\" },\n    { \"%-10Qd\",  \"-123\", \"-123      \" },\n\n    { \"%+10Qd\",   \"123\", \"      +123\" },\n    { \"%+-10Qd\",  \"123\", \"+123      \" },\n    { \"%+10Qd\",  \"-123\", \"      -123\" },\n    { \"%+-10Qd\", \"-123\", \"-123      \" },\n\n    { \"%08Qd\",    \"0\", \"00000000\" },\n    { \"%08Qd\",  \"123\", \"00000123\" },\n    { \"%08Qd\", \"-123\", \"-0000123\" },\n\n    { \"%+08Qd\",    \"0\", \"+0000000\" },\n    { \"%+08Qd\",  \"123\", \"+0000123\" },\n    { \"%+08Qd\", \"-123\", \"-0000123\" },\n\n    { \"%#08Qx\",    \"0\", \"00000000\" },\n    { \"%#08Qx\",  \"123\", \"0x00007b\" },\n    { \"%#08Qx\", \"-123\", \"-0x0007b\" },\n\n    { \"%+#08Qx\",    \"0\", \"+0000000\" },\n    { \"%+#08Qx\",  \"123\", \"+0x0007b\" },\n    { \"%+#08Qx\", \"-123\", \"-0x0007b\" },\n  };\n\n  int    i;\n  mpq_t  q;\n\n  mpq_init (q);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpq_set_str_or_abort (q, data[i].q, 0);\n      check_one (data[i].want, data[i].fmt, q);\n    }\n\n  mpq_clear (q);\n}\n\nvoid\ncheck_f (void)\n{\n  static const struct {\n    const char  *fmt;\n    const char  *f;\n    const char  *want;\n\n  } data[] = {\n\n    { \"%Ff\",    \"0\",    \"0.000000\" },\n    { \"%Ff\",  \"123\",  \"123.000000\" },\n    { \"%Ff\", \"-123\", \"-123.000000\" },\n\n    { \"%+Ff\",    \"0\",   \"+0.000000\" },\n    { \"%+Ff\",  \"123\", \"+123.000000\" },\n    { \"%+Ff\", \"-123\", \"-123.000000\" },\n\n    { \"%.0Ff\",    \"0\",    \"0\" },\n    { \"%.0Ff\",  \"123\",  \"123\" },\n    { \"%.0Ff\", \"-123\", \"-123\" },\n\n    { \"%8.0Ff\",    \"0\", \"       0\" },\n    { \"%8.0Ff\",  \"123\", \"     123\" },\n    { \"%8.0Ff\", \"-123\", \"    -123\" },\n\n    { \"%08.0Ff\",    \"0\", \"00000000\" },\n    { \"%08.0Ff\",  \"123\", \"00000123\" },\n    { \"%08.0Ff\", \"-123\", \"-0000123\" },\n\n    { \"%10.2Ff\",       \"0\", \"      0.00\" },\n    { \"%10.2Ff\",    \"0.25\", \"      0.25\" },\n    { \"%10.2Ff\",  \"123.25\", \"    123.25\" },\n    { \"%10.2Ff\", \"-123.25\", \"   -123.25\" },\n\n    { \"%-10.2Ff\",       \"0\", \"0.00      \" },\n    { \"%-10.2Ff\",    \"0.25\", \"0.25      \" },\n    { \"%-10.2Ff\",  \"123.25\", \"123.25    \" },\n    { \"%-10.2Ff\", \"-123.25\", \"-123.25   \" },\n\n    { \"%.2Ff\", \"0.00000000000001\", \"0.00\" },\n    { \"%.2Ff\", \"0.002\",            \"0.00\" },\n    { \"%.2Ff\", \"0.008\",            \"0.01\" },\n\n    { \"%.0Ff\", \"123.00000000000001\", \"123\" },\n    { \"%.0Ff\", \"123.2\",              \"123\" },\n    { \"%.0Ff\", \"123.8\",              \"124\" },\n\n    { \"%.0Ff\",  \"999999.9\", \"1000000\" },\n    { \"%.0Ff\", \"3999999.9\", \"4000000\" },\n\n    { \"%Fe\",    \"0\",  \"0.000000e+00\" },\n    { \"%Fe\",    \"1\",  \"1.000000e+00\" },\n    { \"%Fe\",  \"123\",  \"1.230000e+02\" },\n\n    { \"%FE\",    \"0\",  \"0.000000E+00\" },\n    { \"%FE\",    \"1\",  \"1.000000E+00\" },\n    { \"%FE\",  \"123\",  \"1.230000E+02\" },\n\n    { \"%Fe\",    \"0\",  \"0.000000e+00\" },\n    { \"%Fe\",    \"1\",  \"1.000000e+00\" },\n\n    { \"%.0Fe\",     \"10000000000\",    \"1e+10\" },\n    { \"%.0Fe\",    \"-10000000000\",   \"-1e+10\" },\n\n    { \"%.2Fe\",     \"10000000000\",  \"1.00e+10\" },\n    { \"%.2Fe\",    \"-10000000000\", \"-1.00e+10\" },\n\n    { \"%8.0Fe\",    \"10000000000\", \"   1e+10\" },\n    { \"%8.0Fe\",   \"-10000000000\", \"  -1e+10\" },\n\n    { \"%-8.0Fe\",   \"10000000000\", \"1e+10   \" },\n    { \"%-8.0Fe\",  \"-10000000000\", \"-1e+10  \" },\n\n    { \"%12.2Fe\",   \"10000000000\", \"    1.00e+10\" },\n    { \"%12.2Fe\",  \"-10000000000\", \"   -1.00e+10\" },\n\n    { \"%012.2Fe\",  \"10000000000\", \"00001.00e+10\" },\n    { \"%012.2Fe\", \"-10000000000\", \"-0001.00e+10\" },\n\n    { \"%Fg\",   \"0\", \"0\" },\n    { \"%Fg\",   \"1\", \"1\" },\n    { \"%Fg\",   \"-1\", \"-1\" },\n\n    { \"%.0Fg\", \"0\", \"0\" },\n    { \"%.0Fg\", \"1\", \"1\" },\n    { \"%.0Fg\", \"-1\", \"-1\" },\n\n    { \"%.1Fg\", \"100\", \"1e+02\" },\n    { \"%.2Fg\", \"100\", \"1e+02\" },\n    { \"%.3Fg\", \"100\", \"100\" },\n    { \"%.4Fg\", \"100\", \"100\" },\n\n    { \"%Fg\", \"0.001\",    \"0.001\" },\n    { \"%Fg\", \"0.0001\",   \"0.0001\" },\n    { \"%Fg\", \"0.00001\",  \"1e-05\" },\n    { \"%Fg\", \"0.000001\", \"1e-06\" },\n\n    { \"%.4Fg\", \"1.00000000000001\", \"1\" },\n    { \"%.4Fg\", \"100000000000001\",  \"1e+14\" },\n\n    { \"%.4Fg\", \"12345678\", \"1.235e+07\" },\n\n    { \"%Fa\", \"0\",\"0x0p+0\" },\n    { \"%FA\", \"0\",\"0X0P+0\" },\n\n    { \"%Fa\", \"1\",\"0x1p+0\" },\n    { \"%Fa\", \"65535\",\"0xf.fffp+12\" },\n    { \"%Fa\", \"65536\",\"0x1p+16\" },\n    { \"%F.10a\", \"65536\",\"0x1.0000000000p+16\" },\n    { \"%F.1a\", \"65535\",\"0x1.0p+16\" },\n    { \"%F.0a\", \"65535\",\"0x1p+16\" },\n\n    { \"%.2Ff\", \"0.99609375\", \"1.00\" },\n    { \"%.Ff\",  \"0.99609375\", \"0.99609375\" },\n    { \"%.Fe\",  \"0.99609375\", \"9.9609375e-01\" },\n    { \"%.Fg\",  \"0.99609375\", \"0.99609375\" },\n    { \"%.20Fg\",  \"1000000\", \"1000000\" },\n    { \"%.Fg\",  \"1000000\", \"1000000\" },\n\n    { \"%#.0Ff\", \"1\", \"1.\" },\n    { \"%#.0Fe\", \"1\", \"1.e+00\" },\n    { \"%#.0Fg\", \"1\", \"1.\" },\n\n    { \"%#.1Ff\", \"1\", \"1.0\" },\n    { \"%#.1Fe\", \"1\", \"1.0e+00\" },\n    { \"%#.1Fg\", \"1\", \"1.\" },\n\n    { \"%#.4Ff\", \"1234\", \"1234.0000\" },\n    { \"%#.4Fe\", \"1234\", \"1.2340e+03\" },\n    { \"%#.4Fg\", \"1234\", \"1234.\" },\n\n    { \"%#.8Ff\", \"1234\", \"1234.00000000\" },\n    { \"%#.8Fe\", \"1234\", \"1.23400000e+03\" },\n    { \"%#.8Fg\", \"1234\", \"1234.0000\" },\n\n  };\n\n  int     i;\n  mpf_t   f;\n  double  d;\n\n  mpf_init2 (f, 256L);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      if (data[i].f[0] == '0' && data[i].f[1] == 'x')\n        mpf_set_str_or_abort (f, data[i].f, 16);\n      else\n        mpf_set_str_or_abort (f, data[i].f, 10);\n\n      /* if mpf->double doesn't truncate, then expect same result */\n      d = mpf_get_d (f);\n      if (mpf_cmp_d (f, d) == 0)\n        check_plain (data[i].want, data[i].fmt, d);\n\n      check_one (data[i].want, data[i].fmt, f);\n    }\n\n  mpf_clear (f);\n}\n\n\nvoid\ncheck_limb (void)\n{\n  int        i;\n  mp_limb_t  limb;\n  mpz_t      z;\n  char       *s;\n\n  check_one (\"0\", \"%Md\", CNST_LIMB(0));\n  check_one (\"1\", \"%Md\", CNST_LIMB(1));\n\n  /* \"i\" many 1 bits, tested against mpz_get_str in decimal and hex */\n  limb = 1;\n  mpz_init_set_ui (z, 1L);\n  for (i = 1; i <= GMP_LIMB_BITS; i++)\n    {\n      s = mpz_get_str (NULL, 10, z);\n      check_one (s, \"%Mu\", limb);\n      (*__gmp_free_func) (s, strlen (s) + 1);\n\n      s = mpz_get_str (NULL, 16, z);\n      check_one (s, \"%Mx\", limb);\n      (*__gmp_free_func) (s, strlen (s) + 1);\n\n      s = mpz_get_str (NULL, -16, z);\n      check_one (s, \"%MX\", limb);\n      (*__gmp_free_func) (s, strlen (s) + 1);\n\n      limb = 2*limb + 1;\n      mpz_mul_2exp (z, z, 1L);\n      mpz_add_ui (z, z, 1L);\n    }\n\n  mpz_clear (z);\n}\n\n\nvoid\ncheck_n (void)\n{\n  {\n    int  n = -1;\n    check_one (\"blah\", \"%nblah\", &n);\n    ASSERT_ALWAYS (n == 0);\n  }\n\n  {\n    int  n = -1;\n    check_one (\"hello \", \"hello %n\", &n);\n    ASSERT_ALWAYS (n == 6);\n  }\n\n  {\n    int  n = -1;\n    check_one (\"hello  world\", \"hello %n world\", &n);\n    ASSERT_ALWAYS (n == 6);\n  }\n\n#define CHECK_N(type, string)                           \\\n  do {                                                  \\\n    type  x[2];                                         \\\n    char  fmt[128];                                     \\\n                                                        \\\n    x[0] = ~ (type) 0;                                  \\\n    x[1] = ~ (type) 0;                                  \\\n    sprintf (fmt, \"%%d%%%sn%%d\", string);               \\\n    check_one (\"123456\", fmt, 123, &x[0], 456);         \\\n                                                        \\\n    /* should write whole of x[0] and none of x[1] */   \\\n    ASSERT_ALWAYS (x[0] == 3);                          \\\n    ASSERT_ALWAYS (x[1] == (type) ~ (type) 0);\t\t\\\n                                                        \\\n  } while (0)\n\n  CHECK_N (mp_limb_t, \"M\");\n  CHECK_N (char,      \"hh\");\n  CHECK_N (long,      \"l\");\n#if HAVE_LONG_LONG\n  CHECK_N (long long, \"L\");\n#endif\n#if HAVE_INTMAX_T\n  CHECK_N (intmax_t,  \"j\");\n#endif\n  CHECK_N (ptrdiff_t, \"t\");\n  CHECK_N (short,     \"h\");\n  CHECK_N (size_t,    \"z\");\n\n  {\n    mpz_t  x[2];\n    mpz_init_set_si (x[0], -987L);\n    mpz_init_set_si (x[1],  654L);\n    check_one (\"123456\", \"%d%Zn%d\", 123, x[0], 456);\n    MPZ_CHECK_FORMAT (x[0]);\n    MPZ_CHECK_FORMAT (x[1]);\n    ASSERT_ALWAYS (mpz_cmp_ui (x[0], 3L) == 0);\n    ASSERT_ALWAYS (mpz_cmp_ui (x[1], 654L) == 0);\n    mpz_clear (x[0]);\n    mpz_clear (x[1]);\n  }\n\n  {\n    mpq_t  x[2];\n    mpq_init (x[0]);\n    mpq_init (x[1]);\n    mpq_set_ui (x[0], -987L, 654L);\n    mpq_set_ui (x[1], 4115L, 226L);\n    check_one (\"123456\", \"%d%Qn%d\", 123, x[0], 456);\n    MPQ_CHECK_FORMAT (x[0]);\n    MPQ_CHECK_FORMAT (x[1]);\n    ASSERT_ALWAYS (mpq_cmp_ui (x[0], 3L, 1L) == 0);\n    ASSERT_ALWAYS (mpq_cmp_ui (x[1], 4115L, 226L) == 0);\n    mpq_clear (x[0]);\n    mpq_clear (x[1]);\n  }\n\n  {\n    mpf_t  x[2];\n    mpf_init (x[0]);\n    mpf_init (x[1]);\n    mpf_set_ui (x[0], -987L);\n    mpf_set_ui (x[1],  654L);\n    check_one (\"123456\", \"%d%Fn%d\", 123, x[0], 456);\n    MPF_CHECK_FORMAT (x[0]);\n    MPF_CHECK_FORMAT (x[1]);\n    ASSERT_ALWAYS (mpf_cmp_ui (x[0], 3L) == 0);\n    ASSERT_ALWAYS (mpf_cmp_ui (x[1], 654L) == 0);\n    mpf_clear (x[0]);\n    mpf_clear (x[1]);\n  }\n\n  {\n    mp_limb_t  a[5];\n    mp_limb_t  a_want[numberof(a)];\n    mp_size_t  i;\n\n    a[0] = 123;\n    check_one (\"blah\", \"bl%Nnah\", a, (mp_size_t) 0);\n    ASSERT_ALWAYS (a[0] == 123);\n\n    MPN_ZERO (a_want, numberof (a_want));\n    for (i = 1; i < numberof (a); i++)\n      {\n        check_one (\"blah\", \"bl%Nnah\", a, i);\n        a_want[0] = 2;\n        ASSERT_ALWAYS (mpn_cmp (a, a_want, i) == 0);\n      }\n  }\n}\n\n\nvoid\ncheck_misc (void)\n{\n  mpz_t  z;\n  mpf_t  f;\n\n  mpz_init (z);\n  mpf_init2 (f, 128L);\n\n  check_one (\"!\", \"%c\", '!');\n\n  check_one (\"hello world\", \"hello %s\", \"world\");\n  check_one (\"hello:\", \"%s:\", \"hello\");\n  mpz_set_ui (z, 0L);\n  check_one (\"hello0\", \"%s%Zd\", \"hello\", z, z);\n\n  {\n    static char  xs[801];\n    memset (xs, 'x', sizeof(xs)-1);\n    check_one (xs, \"%s\", xs);\n  }\n\n  mpz_set_ui (z, 12345L);\n  check_one (\"     12345\", \"%*Zd\", 10, z);\n  check_one (\"0000012345\", \"%0*Zd\", 10, z);\n  check_one (\"12345     \", \"%*Zd\", -10, z);\n  check_one (\"12345 and 678\", \"%Zd and %d\", z, 678);\n  check_one (\"12345,1,12345,2,12345\", \"%Zd,%d,%Zd,%d,%Zd\", z, 1, z, 2, z);\n\n  /* from the glibc info docs */\n  mpz_set_si (z, 0L);\n  check_one (\"|    0|0    |   +0|+0   |    0|00000|     |   00|0|\",\n             \"|%5Zd|%-5Zd|%+5Zd|%+-5Zd|% 5Zd|%05Zd|%5.0Zd|%5.2Zd|%Zd|\",\n             /**/ z,    z,    z,     z,    z,    z,     z,     z,  z);\n  mpz_set_si (z, 1L);\n  check_one (\"|    1|1    |   +1|+1   |    1|00001|    1|   01|1|\",\n             \"|%5Zd|%-5Zd|%+5Zd|%+-5Zd|% 5Zd|%05Zd|%5.0Zd|%5.2Zd|%Zd|\",\n             /**/ z,    z,    z,     z,    z,    z,     z,     z,  z);\n  mpz_set_si (z, -1L);\n  check_one (\"|   -1|-1   |   -1|-1   |   -1|-0001|   -1|  -01|-1|\",\n             \"|%5Zd|%-5Zd|%+5Zd|%+-5Zd|% 5Zd|%05Zd|%5.0Zd|%5.2Zd|%Zd|\",\n             /**/ z,    z,    z,     z,    z,    z,     z,     z,  z);\n  mpz_set_si (z, 100000L);\n  check_one (\"|100000|100000|+100000|+100000| 100000|100000|100000|100000|100000|\",\n             \"|%5Zd|%-5Zd|%+5Zd|%+-5Zd|% 5Zd|%05Zd|%5.0Zd|%5.2Zd|%Zd|\",\n             /**/ z,    z,    z,     z,    z,    z,     z,     z,  z);\n  mpz_set_si (z, 0L);\n  check_one (\"|    0|    0|    0|    0|    0|    0|  00000000|\",\n             \"|%5Zo|%5Zx|%5ZX|%#5Zo|%#5Zx|%#5ZX|%#10.8Zx|\",\n             /**/ z,   z,   z,    z,    z,    z,       z);\n  mpz_set_si (z, 1L);\n  check_one (\"|    1|    1|    1|   01|  0x1|  0X1|0x00000001|\",\n             \"|%5Zo|%5Zx|%5ZX|%#5Zo|%#5Zx|%#5ZX|%#10.8Zx|\",\n             /**/ z,   z,   z,    z,    z,    z,       z);\n  mpz_set_si (z, 100000L);\n  check_one (\"|303240|186a0|186A0|0303240|0x186a0|0X186A0|0x000186a0|\",\n             \"|%5Zo|%5Zx|%5ZX|%#5Zo|%#5Zx|%#5ZX|%#10.8Zx|\",\n             /**/ z,   z,   z,    z,    z,    z,       z);\n\n  /* %zd for size_t won't be available on old systems, and running something\n     to see if it works might be bad, so only try it on glibc, and only on a\n     new enough version (glibc 2.0 doesn't have %zd) */\n#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 0)\n  mpz_set_ui (z, 789L);\n  check_one (\"456 789 blah\", \"%zd %Zd blah\", (size_t) 456, z);\n#endif\n\n  mpz_clear (z);\n  mpf_clear (f);\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  if (argc > 1 && strcmp (argv[1], \"-s\") == 0)\n    option_check_printf = 1;\n\n  tests_start ();\n  check_vfprintf_fp = fopen (CHECK_VFPRINTF_FILENAME, \"w+\");\n  ASSERT_ALWAYS (check_vfprintf_fp != NULL);\n\n  check_z ();\n  check_q ();\n  check_f ();\n  check_limb ();\n  check_n ();\n  check_misc ();\n\n  ASSERT_ALWAYS (fclose (check_vfprintf_fp) == 0);\n  unlink (CHECK_VFPRINTF_FILENAME);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/misc/t-scanf.c",
    "content": "/* Test gmp_scanf and related functions.\n\nCopyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n/* Usage: t-scanf [-s]\n\n   -s  Check the data against the system scanf, where possible.  This is\n       only an option since we don't want to fail if the system scanf is\n       faulty or strange.\n\n   There's some fairly unattractive repetition between check_z, check_q and\n   check_f, but enough differences to make a common loop or a set of macros\n   seem like too much trouble. */\n\n\n#include \"config.h\"\n\n#if HAVE_STDARG\n#include <stdarg.h>\n#else\n#include <varargs.h>\n#endif\n\n#include <stddef.h>    /* for ptrdiff_t */\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#if HAVE_INTTYPES_H\n# include <inttypes.h> /* for intmax_t */\n#else\n# if HAVE_STDINT_H\n#  include <stdint.h>\n# endif\n#endif\n\n#if HAVE_UNISTD_H\n#include <unistd.h>  /* for unlink */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n#define TEMPFILE  \"t-scanf.tmp\"\n\nint   option_libc_scanf = 0;\n\ntypedef int (*fun_t)(const char *, const char *, void *, void *);\n\n\n/* This problem was seen on powerpc7450-apple-darwin7.0.0, sscanf returns 0\n   where it should return EOF.  A workaround in gmp_sscanf would be a bit\n   tedious, and since this is a rather obvious libc bug, quite likely\n   affecting other programs, we'll just suppress affected tests for now.  */\nint\ntest_sscanf_eof_ok (void)\n{\n  static int  result = -1;\n\n  if (result == -1)\n    {\n      int  x;\n      if (sscanf (\"\", \"%d\", &x) == EOF)\n        {\n          result = 1;\n        }\n      else\n        {\n          printf (\"Warning, sscanf(\\\"\\\",\\\"%%d\\\",&x) doesn't return EOF.\\n\");\n          printf (\"This affects gmp_sscanf, tests involving it will be suppressed.\\n\");\n          printf (\"You should try to get a fix for your libc.\\n\");\n          result = 0;\n        }\n    }\n  return result;\n}\n\n\n/* Convert fmt from a GMP scanf format string to an equivalent for a plain\n   libc scanf, for example \"%Zd\" becomes \"%ld\".  Return 1 if this succeeds,\n   0 if it cannot (or should not) be done.  */\nint\nlibc_scanf_convert (char *fmt)\n{\n  char  *p = fmt;\n\n  if (! option_libc_scanf)\n    return 0;\n\n  for ( ; *fmt != '\\0'; fmt++)\n    {\n      switch (*fmt) {\n      case 'F':\n      case 'Q':\n      case 'Z':\n        /* transmute */\n        *p++ = 'l';\n        break;\n      default:\n        *p++ = *fmt;\n        break;\n      }\n    }\n  *p = '\\0';\n  return 1;\n}\n\n\nlong  got_ftell;\nint   fromstring_next_c;\n\n/* Call gmp_fscanf, reading the \"input\" string data provided. */\nint\n#if HAVE_STDARG\nfromstring_gmp_fscanf (const char *input, const char *fmt, ...)\n#else\nfromstring_gmp_fscanf (va_alist)\n     va_dcl\n#endif\n{\n  va_list  ap;\n  FILE     *fp;\n  int      ret;\n#if HAVE_STDARG\n  va_start (ap, fmt);\n#else\n  const char    *input;\n  const char    *fmt;\n  va_start (ap);\n  input = va_arg (ap, const char *);\n  fmt = va_arg (ap, const char *);\n#endif\n\n  fp = fopen (TEMPFILE, \"w+\");\n  ASSERT_ALWAYS (fp != NULL);\n  ASSERT_ALWAYS (fputs (input, fp) != EOF);\n  ASSERT_ALWAYS (fflush (fp) == 0);\n  rewind (fp);\n\n  ret = gmp_vfscanf (fp, fmt, ap);\n  got_ftell = ftell (fp);\n  ASSERT_ALWAYS (got_ftell != -1L);\n\n  fromstring_next_c = getc (fp);\n\n  ASSERT_ALWAYS (fclose (fp) == 0);\n  va_end (ap);\n  return ret;\n}\n\n\nint\nfun_gmp_sscanf (const char *input, const char *fmt, void *a1, void *a2)\n{\n  if (a2 == NULL)\n    return gmp_sscanf (input, fmt, a1);\n  else\n    return gmp_sscanf (input, fmt, a1, a2);\n}\n\nint\nfun_gmp_fscanf (const char *input, const char *fmt, void *a1, void *a2)\n{\n  if (a2 == NULL)\n    return fromstring_gmp_fscanf (input, fmt, a1);\n  else\n    return fromstring_gmp_fscanf (input, fmt, a1, a2);\n}\n\n\nint\nfun_fscanf (const char *input, const char *fmt, void *a1, void *a2)\n{\n  FILE  *fp;\n  int   ret;\n\n  fp = fopen (TEMPFILE, \"w+\");\n  ASSERT_ALWAYS (fp != NULL);\n  ASSERT_ALWAYS (fputs (input, fp) != EOF);\n  ASSERT_ALWAYS (fflush (fp) == 0);\n  rewind (fp);\n\n  if (a2 == NULL)\n    ret = fscanf (fp, fmt, a1);\n  else\n    ret = fscanf (fp, fmt, a1, a2);\n\n  got_ftell = ftell (fp);\n  ASSERT_ALWAYS (got_ftell != -1L);\n\n  fromstring_next_c = getc (fp);\n\n  ASSERT_ALWAYS (fclose (fp) == 0);\n  return ret;\n}\n\n\n/* On various old systems, for instance HP-UX 9, the C library sscanf needs\n   to be able to write into the input string.  Ensure that this is possible,\n   when gcc is putting the test data into a read-only section.\n\n   Actually we ought to only need this under SSCANF_WRITABLE_INPUT from\n   configure, but it's just as easy to do it unconditionally, and in any\n   case this code is only executed under the -s option.  */\n\nint\nfun_sscanf (const char *input, const char *fmt, void *a1, void *a2)\n{\n  char    *input_writable;\n  size_t  size;\n  int     ret;\n\n  size = strlen (input) + 1;\n  input_writable = (*__gmp_allocate_func) (size);\n  memcpy (input_writable, input, size);\n\n  if (a2 == NULL)\n    ret = sscanf (input_writable, fmt, a1);\n  else\n    ret = sscanf (input_writable, fmt, a1, a2);\n\n  (*__gmp_free_func) (input_writable, size);\n  return ret;\n}\n\n\n/* whether the format string consists entirely of ignored fields */\nint\nfmt_allignore (const char *fmt)\n{\n  int  saw_star = 1;\n  for ( ; *fmt != '\\0'; fmt++)\n    {\n      switch (*fmt) {\n      case '%':\n        if (! saw_star)\n          return 0;\n        saw_star = 0;\n        break;\n      case '*':\n        saw_star = 1;\n        break;\n      }\n    }\n  return 1;\n}\n\nvoid\ncheck_z (void)\n{\n  static const struct {\n    const char  *fmt;\n    const char  *input;\n    const char  *want;\n    int         want_ret;\n    long        want_ftell;\n    int         want_upto;\n    int         not_glibc;\n\n  } data[] = {\n\n    { \"%Zd\",    \"0\",    \"0\", 1, -1, -1 },\n    { \"%Zd\",    \"1\",    \"1\", 1, -1, -1 },\n    { \"%Zd\",  \"123\",  \"123\", 1, -1, -1 },\n    { \"%Zd\",   \"+0\",    \"0\", 1, -1, -1 },\n    { \"%Zd\",   \"+1\",    \"1\", 1, -1, -1 },\n    { \"%Zd\", \"+123\",  \"123\", 1, -1, -1 },\n    { \"%Zd\",   \"-0\",    \"0\", 1, -1, -1 },\n    { \"%Zd\",   \"-1\",   \"-1\", 1, -1, -1 },\n    { \"%Zd\", \"-123\", \"-123\", 1, -1, -1 },\n\n    { \"%Zo\",    \"0\",    \"0\", 1, -1, -1 },\n    { \"%Zo\",  \"173\",  \"123\", 1, -1, -1 },\n    { \"%Zo\",   \"+0\",    \"0\", 1, -1, -1 },\n    { \"%Zo\", \"+173\",  \"123\", 1, -1, -1 },\n    { \"%Zo\",   \"-0\",    \"0\", 1, -1, -1 },\n    { \"%Zo\", \"-173\", \"-123\", 1, -1, -1 },\n\n    { \"%Zx\",    \"0\",    \"0\", 1, -1, -1 },\n    { \"%Zx\",   \"7b\",  \"123\", 1, -1, -1 },\n    { \"%Zx\",   \"7b\",  \"123\", 1, -1, -1 },\n    { \"%Zx\",   \"+0\",    \"0\", 1, -1, -1 },\n    { \"%Zx\",  \"+7b\",  \"123\", 1, -1, -1 },\n    { \"%Zx\",  \"+7b\",  \"123\", 1, -1, -1 },\n    { \"%Zx\",   \"-0\",   \"-0\", 1, -1, -1 },\n    { \"%Zx\",  \"-7b\", \"-123\", 1, -1, -1 },\n    { \"%Zx\",  \"-7b\", \"-123\", 1, -1, -1 },\n    { \"%ZX\",    \"0\",    \"0\", 1, -1, -1 },\n    { \"%ZX\",   \"7b\",  \"123\", 1, -1, -1 },\n    { \"%ZX\",   \"7b\",  \"123\", 1, -1, -1 },\n    { \"%ZX\",   \"+0\",    \"0\", 1, -1, -1 },\n    { \"%ZX\",  \"+7b\",  \"123\", 1, -1, -1 },\n    { \"%ZX\",  \"+7b\",  \"123\", 1, -1, -1 },\n    { \"%ZX\",   \"-0\",   \"-0\", 1, -1, -1 },\n    { \"%ZX\",  \"-7b\", \"-123\", 1, -1, -1 },\n    { \"%ZX\",  \"-7b\", \"-123\", 1, -1, -1 },\n    { \"%Zx\",    \"0\",    \"0\", 1, -1, -1 },\n    { \"%Zx\",   \"7B\",  \"123\", 1, -1, -1 },\n    { \"%Zx\",   \"7B\",  \"123\", 1, -1, -1 },\n    { \"%Zx\",   \"+0\",    \"0\", 1, -1, -1 },\n    { \"%Zx\",  \"+7B\",  \"123\", 1, -1, -1 },\n    { \"%Zx\",  \"+7B\",  \"123\", 1, -1, -1 },\n    { \"%Zx\",   \"-0\",   \"-0\", 1, -1, -1 },\n    { \"%Zx\",  \"-7B\", \"-123\", 1, -1, -1 },\n    { \"%Zx\",  \"-7B\", \"-123\", 1, -1, -1 },\n    { \"%ZX\",    \"0\",    \"0\", 1, -1, -1 },\n    { \"%ZX\",   \"7B\",  \"123\", 1, -1, -1 },\n    { \"%ZX\",   \"7B\",  \"123\", 1, -1, -1 },\n    { \"%ZX\",   \"+0\",    \"0\", 1, -1, -1 },\n    { \"%ZX\",  \"+7B\",  \"123\", 1, -1, -1 },\n    { \"%ZX\",  \"+7B\",  \"123\", 1, -1, -1 },\n    { \"%ZX\",   \"-0\",   \"-0\", 1, -1, -1 },\n    { \"%ZX\",  \"-7B\", \"-123\", 1, -1, -1 },\n    { \"%ZX\",  \"-7B\", \"-123\", 1, -1, -1 },\n\n    { \"%Zi\",    \"0\",    \"0\", 1, -1, -1 },\n    { \"%Zi\",    \"1\",    \"1\", 1, -1, -1 },\n    { \"%Zi\",  \"123\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"+0\",    \"0\", 1, -1, -1 },\n    { \"%Zi\",   \"+1\",    \"1\", 1, -1, -1 },\n    { \"%Zi\", \"+123\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"-0\",    \"0\", 1, -1, -1 },\n    { \"%Zi\",   \"-1\",   \"-1\", 1, -1, -1 },\n    { \"%Zi\", \"-123\", \"-123\", 1, -1, -1 },\n\n    { \"%Zi\",    \"00\",    \"0\", 1, -1, -1 },\n    { \"%Zi\",  \"0173\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"+00\",    \"0\", 1, -1, -1 },\n    { \"%Zi\", \"+0173\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"-00\",    \"0\", 1, -1, -1 },\n    { \"%Zi\", \"-0173\", \"-123\", 1, -1, -1 },\n\n    { \"%Zi\",    \"0x0\",    \"0\", 1, -1, -1 },\n    { \"%Zi\",   \"0x7b\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"0x7b\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"+0x0\",    \"0\", 1, -1, -1 },\n    { \"%Zi\",  \"+0x7b\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",  \"+0x7b\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"-0x0\",   \"-0\", 1, -1, -1 },\n    { \"%Zi\",  \"-0x7b\", \"-123\", 1, -1, -1 },\n    { \"%Zi\",  \"-0x7b\", \"-123\", 1, -1, -1 },\n    { \"%Zi\",    \"0X0\",    \"0\", 1, -1, -1 },\n    { \"%Zi\",   \"0X7b\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"0X7b\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"+0X0\",    \"0\", 1, -1, -1 },\n    { \"%Zi\",  \"+0X7b\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",  \"+0X7b\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"-0X0\",   \"-0\", 1, -1, -1 },\n    { \"%Zi\",  \"-0X7b\", \"-123\", 1, -1, -1 },\n    { \"%Zi\",  \"-0X7b\", \"-123\", 1, -1, -1 },\n    { \"%Zi\",    \"0x0\",    \"0\", 1, -1, -1 },\n    { \"%Zi\",   \"0x7B\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"0x7B\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"+0x0\",    \"0\", 1, -1, -1 },\n    { \"%Zi\",  \"+0x7B\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",  \"+0x7B\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"-0x0\",   \"-0\", 1, -1, -1 },\n    { \"%Zi\",  \"-0x7B\", \"-123\", 1, -1, -1 },\n    { \"%Zi\",  \"-0x7B\", \"-123\", 1, -1, -1 },\n    { \"%Zi\",    \"0X0\",    \"0\", 1, -1, -1 },\n    { \"%Zi\",   \"0X7B\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"0X7B\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"+0X0\",    \"0\", 1, -1, -1 },\n    { \"%Zi\",  \"+0X7B\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",  \"+0X7B\",  \"123\", 1, -1, -1 },\n    { \"%Zi\",   \"-0X0\",   \"-0\", 1, -1, -1 },\n    { \"%Zi\",  \"-0X7B\", \"-123\", 1, -1, -1 },\n    { \"%Zi\",  \"-0X7B\", \"-123\", 1, -1, -1 },\n\n    { \"%Zd\",    \" 0\",    \"0\", 1, -1, -1 },\n    { \"%Zd\",   \"  0\",    \"0\", 1, -1, -1 },\n    { \"%Zd\",  \"   0\",    \"0\", 1, -1, -1 },\n    { \"%Zd\",   \"\\t0\",    \"0\", 1, -1, -1 },\n    { \"%Zd\", \"\\t\\t0\",    \"0\", 1, -1, -1 },\n\n    { \"hello%Zd\",      \"hello0\",       \"0\", 1, -1, -1 },\n    { \"hello%Zd\",      \"hello 0\",      \"0\", 1, -1, -1 },\n    { \"hello%Zd\",      \"hello \\t0\",    \"0\", 1, -1, -1 },\n    { \"hello%Zdworld\", \"hello 0world\", \"0\", 1, -1, -1 },\n\n    { \"hello%*Zd\",      \"hello0\",       \"-999\", 0, -1, -1 },\n    { \"hello%*Zd\",      \"hello 0\",      \"-999\", 0, -1, -1 },\n    { \"hello%*Zd\",      \"hello \\t0\",    \"-999\", 0, -1, -1 },\n    { \"hello%*Zdworld\", \"hello 0world\", \"-999\", 0, -1, -1 },\n\n    { \"%Zd\",    \"\",     \"-999\", -1, -1, -555 },\n    { \"%Zd\",    \" \",    \"-999\", -1, -1, -555 },\n    { \" %Zd\",   \"\",     \"-999\", -1, -1, -555 },\n    { \"xyz%Zd\", \"\",     \"-999\", -1, -1, -555 },\n\n    { \"%*Zd\",    \"\",     \"-999\", -1, -1, -555 },\n    { \" %*Zd\",   \"\",     \"-999\", -1, -1, -555 },\n    { \"xyz%*Zd\", \"\",     \"-999\", -1, -1, -555 },\n\n    { \"%Zd\",    \"xyz\",  \"0\",     0, 0, -555 },\n\n    /* match something, but invalid */\n    { \"%Zd\",    \"-\",    \"-999\",  0, 1, -555 },\n    { \"%Zd\",    \"+\",    \"-999\",  0, 1, -555 },\n    { \"xyz%Zd\", \"xyz-\", \"-999\",  0, 4, -555 },\n    { \"xyz%Zd\", \"xyz+\", \"-999\",  0, 4, -555 },\n    { \"%Zi\",    \"0x\",   \"-999\",  0, 2, -555 },\n    { \"%Zi\",    \"0X\",   \"-999\",  0, 2, -555 },\n    { \"%Zi\",    \"0x-\",  \"-999\",  0, 2, -555 },\n    { \"%Zi\",    \"0X+\",  \"-999\",  0, 2, -555 },\n    { \"%Zi\",    \"-0x\",  \"-999\",  0, 3, -555 },\n    { \"%Zi\",    \"-0X\",  \"-999\",  0, 3, -555 },\n    { \"%Zi\",    \"+0x\",  \"-999\",  0, 3, -555 },\n    { \"%Zi\",    \"+0X\",  \"-999\",  0, 3, -555 },\n\n    { \"%1Zi\",  \"1234\", \"1\",    1, 1, 1 },\n    { \"%2Zi\",  \"1234\", \"12\",   1, 2, 2 },\n    { \"%3Zi\",  \"1234\", \"123\",  1, 3, 3 },\n    { \"%4Zi\",  \"1234\", \"1234\", 1, 4, 4 },\n    { \"%5Zi\",  \"1234\", \"1234\", 1, 4, 4 },\n    { \"%6Zi\",  \"1234\", \"1234\", 1, 4, 4 },\n\n    { \"%1Zi\",  \"01234\", \"0\",     1, 1, 1 },\n    { \"%2Zi\",  \"01234\", \"01\",    1, 2, 2 },\n    { \"%3Zi\",  \"01234\", \"012\",   1, 3, 3 },\n    { \"%4Zi\",  \"01234\", \"0123\",  1, 4, 4 },\n    { \"%5Zi\",  \"01234\", \"01234\", 1, 5, 5 },\n    { \"%6Zi\",  \"01234\", \"01234\", 1, 5, 5 },\n    { \"%7Zi\",  \"01234\", \"01234\", 1, 5, 5 },\n\n    { \"%1Zi\",  \"0x1234\", \"0\",      1, 1, 1 },\n    { \"%2Zi\",  \"0x1234\", \"-999\",   0, 2, -555 },\n    { \"%3Zi\",  \"0x1234\", \"0x1\",    1, 3, 3 },\n    { \"%4Zi\",  \"0x1234\", \"0x12\",   1, 4, 4 },\n    { \"%5Zi\",  \"0x1234\", \"0x123\",  1, 5, 5 },\n    { \"%6Zi\",  \"0x1234\", \"0x1234\", 1, 6, 6 },\n    { \"%7Zi\",  \"0x1234\", \"0x1234\", 1, 6, 6 },\n    { \"%8Zi\",  \"0x1234\", \"0x1234\", 1, 6, 6 },\n\n    { \"%%xyz%Zd\",  \"%xyz123\",  \"123\", 1, -1, -1 },\n    { \"12%%34%Zd\", \"12%34567\", \"567\", 1, -1, -1 },\n    { \"%%%%%Zd\",   \"%%123\",    \"123\", 1, -1, -1 },\n\n    /* various subtle EOF cases */\n    { \"x\",       \"\",    \"-999\", EOF, 0, -555 },\n    { \" x\",      \"\",    \"-999\", EOF, 0, -555 },\n    { \"xyz\",     \"\",    \"-999\", EOF, 0, -555 },\n    { \" \",       \"\",    \"-999\",   0, 0,    0 },\n    { \" \",       \" \",   \"-999\",   0, 1,    1 },\n    { \"%*Zd%Zd\", \"\",    \"-999\", EOF, 0, -555 },\n    { \"%*Zd%Zd\", \"123\", \"-999\", EOF, 3, -555 },\n    { \"x\",       \"x\",   \"-999\",   0, 1,    1 },\n    { \"xyz\",     \"x\",   \"-999\", EOF, 1, -555 },\n    { \"xyz\",     \"xy\",  \"-999\", EOF, 2, -555 },\n    { \"xyz\",     \"xyz\", \"-999\",   0, 3,    3 },\n    { \"%Zn\",     \"\",    \"0\",      0, 0,    0 },\n    { \" %Zn\",    \"\",    \"0\",      0, 0,    0 },\n    { \" x%Zn\",   \"\",    \"-999\", EOF, 0, -555 },\n    { \"xyz%Zn\",  \"\",    \"-999\", EOF, 0, -555 },\n    { \" x%Zn\",   \"\",    \"-999\", EOF, 0, -555 },\n    { \" %Zn x\",  \" \",   \"-999\", EOF, 1, -555 },\n\n    /* these seem to tickle a bug in glibc 2.2.4 */\n    { \" x\",      \" \",   \"-999\", EOF, 1, -555, 1 },\n    { \" xyz\",    \" \",   \"-999\", EOF, 1, -555, 1 },\n    { \" x%Zn\",   \" \",   \"-999\", EOF, 1, -555, 1 },\n  };\n\n  int         i, j, ignore;\n  int         got_ret, want_ret, got_upto, want_upto;\n  mpz_t       got, want;\n  long        got_l, want_ftell;\n  int         error = 0;\n  fun_t       fun;\n  const char  *name;\n  char        fmt[128];\n\n  mpz_init (got);\n  mpz_init (want);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (want, data[i].want, 0);\n\n      ASSERT_ALWAYS (strlen (data[i].fmt) + 2 < sizeof (fmt));\n      strcpy (fmt, data[i].fmt);\n      strcat (fmt, \"%n\");\n\n      ignore = fmt_allignore (fmt);\n\n      for (j = 0; j <= 3; j++)\n        {\n          want_ret = data[i].want_ret;\n\n          want_ftell = data[i].want_ftell;\n          if (want_ftell == -1)\n            want_ftell = strlen (data[i].input);\n\n          want_upto = data[i].want_upto;\n          if (want_upto == -1)\n            want_upto = strlen (data[i].input);\n\n          switch (j) {\n          case 0:\n            name = \"gmp_sscanf\";\n            fun = fun_gmp_sscanf;\n            break;\n          case 1:\n            name = \"gmp_fscanf\";\n            fun = fun_gmp_fscanf;\n            break;\n          case 2:\n#ifdef __GLIBC__\n            if (data[i].not_glibc)\n              continue;\n#endif\n            if (! libc_scanf_convert (fmt))\n              continue;\n            name = \"standard sscanf\";\n            fun = fun_sscanf;\n            break;\n          case 3:\n#ifdef __GLIBC__\n            if (data[i].not_glibc)\n              continue;\n#endif\n            if (! libc_scanf_convert (fmt))\n              continue;\n            name = \"standard fscanf\";\n            fun = fun_fscanf;\n            break;\n          default:\n            ASSERT_ALWAYS (0);\n            break;\n          }\n\n          got_upto = -555;\n          got_ftell = -1L;\n\n          switch (j) {\n          case 0:\n          case 1:\n            mpz_set_si (got, -999L);\n            if (ignore)\n              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);\n            else\n              got_ret = (*fun) (data[i].input, fmt, got, &got_upto);\n            break;\n          case 2:\n          case 3:\n            got_l = -999L;\n            if (ignore)\n              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);\n            else\n              got_ret = (*fun) (data[i].input, fmt, &got_l, &got_upto);\n            mpz_set_si (got, got_l);\n            break;\n          default:\n            ASSERT_ALWAYS (0);\n            break;\n          }\n\n          MPZ_CHECK_FORMAT (got);\n\n          if (got_ret != want_ret)\n            {\n              printf (\"%s wrong return value\\n\", name);\n              error = 1;\n            }\n          if (want_ret == 1 && mpz_cmp (want, got) != 0)\n            {\n              printf (\"%s wrong result\\n\", name);\n              error = 1;\n            }\n          if (got_upto != want_upto)\n            {\n              printf (\"%s wrong upto\\n\", name);\n              error = 1;\n            }\n          if (got_ftell != -1 && want_ftell != -1 && got_ftell != want_ftell)\n            {\n              printf (\"%s wrong ftell\\n\", name);\n              error = 1;\n            }\n          if (error)\n            {\n              printf    (\"  fmt   \\\"%s\\\"\\n\", data[i].fmt);\n              printf    (\"  input \\\"%s\\\"\\n\", data[i].input);\n              printf    (\"  ignore %d\\n\", ignore);\n              printf    (\"  ret   want=%d\\n\", want_ret);\n              printf    (\"        got =%d\\n\", got_ret);\n              mpz_trace (\"  value want\", want);\n              mpz_trace (\"        got \", got);\n              printf    (\"  upto  want =%d\\n\", want_upto);\n              printf    (\"        got  =%d\\n\", got_upto);\n              if (got_ftell != -1)\n                {\n                  printf    (\"  ftell want =%ld\\n\", want_ftell);\n                  printf    (\"        got  =%ld\\n\", got_ftell);\n                }\n              abort ();\n            }\n        }\n    }\n\n  mpz_clear (got);\n  mpz_clear (want);\n}\n\nvoid\ncheck_q (void)\n{\n  static const struct {\n    const char  *fmt;\n    const char  *input;\n    const char  *want;\n    int         ret;\n    long        ftell;\n\n  } data[] = {\n\n    { \"%Qd\",    \"0\",    \"0\", 1, -1 },\n    { \"%Qd\",    \"1\",    \"1\", 1, -1 },\n    { \"%Qd\",  \"123\",  \"123\", 1, -1 },\n    { \"%Qd\",   \"+0\",    \"0\", 1, -1 },\n    { \"%Qd\",   \"+1\",    \"1\", 1, -1 },\n    { \"%Qd\", \"+123\",  \"123\", 1, -1 },\n    { \"%Qd\",   \"-0\",    \"0\", 1, -1 },\n    { \"%Qd\",   \"-1\",   \"-1\", 1, -1 },\n    { \"%Qd\", \"-123\", \"-123\", 1, -1 },\n\n    { \"%Qo\",    \"0\",    \"0\", 1, -1 },\n    { \"%Qo\",  \"173\",  \"123\", 1, -1 },\n    { \"%Qo\",   \"+0\",    \"0\", 1, -1 },\n    { \"%Qo\", \"+173\",  \"123\", 1, -1 },\n    { \"%Qo\",   \"-0\",    \"0\", 1, -1 },\n    { \"%Qo\", \"-173\", \"-123\", 1, -1 },\n\n    { \"%Qx\",    \"0\",    \"0\", 1, -1 },\n    { \"%Qx\",   \"7b\",  \"123\", 1, -1 },\n    { \"%Qx\",   \"7b\",  \"123\", 1, -1 },\n    { \"%Qx\",   \"+0\",    \"0\", 1, -1 },\n    { \"%Qx\",  \"+7b\",  \"123\", 1, -1 },\n    { \"%Qx\",  \"+7b\",  \"123\", 1, -1 },\n    { \"%Qx\",   \"-0\",   \"-0\", 1, -1 },\n    { \"%Qx\",  \"-7b\", \"-123\", 1, -1 },\n    { \"%Qx\",  \"-7b\", \"-123\", 1, -1 },\n    { \"%QX\",    \"0\",    \"0\", 1, -1 },\n    { \"%QX\",   \"7b\",  \"123\", 1, -1 },\n    { \"%QX\",   \"7b\",  \"123\", 1, -1 },\n    { \"%QX\",   \"+0\",    \"0\", 1, -1 },\n    { \"%QX\",  \"+7b\",  \"123\", 1, -1 },\n    { \"%QX\",  \"+7b\",  \"123\", 1, -1 },\n    { \"%QX\",   \"-0\",   \"-0\", 1, -1 },\n    { \"%QX\",  \"-7b\", \"-123\", 1, -1 },\n    { \"%QX\",  \"-7b\", \"-123\", 1, -1 },\n    { \"%Qx\",    \"0\",    \"0\", 1, -1 },\n    { \"%Qx\",   \"7B\",  \"123\", 1, -1 },\n    { \"%Qx\",   \"7B\",  \"123\", 1, -1 },\n    { \"%Qx\",   \"+0\",    \"0\", 1, -1 },\n    { \"%Qx\",  \"+7B\",  \"123\", 1, -1 },\n    { \"%Qx\",  \"+7B\",  \"123\", 1, -1 },\n    { \"%Qx\",   \"-0\",   \"-0\", 1, -1 },\n    { \"%Qx\",  \"-7B\", \"-123\", 1, -1 },\n    { \"%Qx\",  \"-7B\", \"-123\", 1, -1 },\n    { \"%QX\",    \"0\",    \"0\", 1, -1 },\n    { \"%QX\",   \"7B\",  \"123\", 1, -1 },\n    { \"%QX\",   \"7B\",  \"123\", 1, -1 },\n    { \"%QX\",   \"+0\",    \"0\", 1, -1 },\n    { \"%QX\",  \"+7B\",  \"123\", 1, -1 },\n    { \"%QX\",  \"+7B\",  \"123\", 1, -1 },\n    { \"%QX\",   \"-0\",   \"-0\", 1, -1 },\n    { \"%QX\",  \"-7B\", \"-123\", 1, -1 },\n    { \"%QX\",  \"-7B\", \"-123\", 1, -1 },\n\n    { \"%Qi\",    \"0\",    \"0\", 1, -1 },\n    { \"%Qi\",    \"1\",    \"1\", 1, -1 },\n    { \"%Qi\",  \"123\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"+0\",    \"0\", 1, -1 },\n    { \"%Qi\",   \"+1\",    \"1\", 1, -1 },\n    { \"%Qi\", \"+123\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"-0\",    \"0\", 1, -1 },\n    { \"%Qi\",   \"-1\",   \"-1\", 1, -1 },\n    { \"%Qi\", \"-123\", \"-123\", 1, -1 },\n\n    { \"%Qi\",    \"00\",    \"0\", 1, -1 },\n    { \"%Qi\",  \"0173\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"+00\",    \"0\", 1, -1 },\n    { \"%Qi\", \"+0173\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"-00\",    \"0\", 1, -1 },\n    { \"%Qi\", \"-0173\", \"-123\", 1, -1 },\n\n    { \"%Qi\",    \"0x0\",    \"0\", 1, -1 },\n    { \"%Qi\",   \"0x7b\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"0x7b\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"+0x0\",    \"0\", 1, -1 },\n    { \"%Qi\",  \"+0x7b\",  \"123\", 1, -1 },\n    { \"%Qi\",  \"+0x7b\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"-0x0\",   \"-0\", 1, -1 },\n    { \"%Qi\",  \"-0x7b\", \"-123\", 1, -1 },\n    { \"%Qi\",  \"-0x7b\", \"-123\", 1, -1 },\n    { \"%Qi\",    \"0X0\",    \"0\", 1, -1 },\n    { \"%Qi\",   \"0X7b\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"0X7b\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"+0X0\",    \"0\", 1, -1 },\n    { \"%Qi\",  \"+0X7b\",  \"123\", 1, -1 },\n    { \"%Qi\",  \"+0X7b\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"-0X0\",   \"-0\", 1, -1 },\n    { \"%Qi\",  \"-0X7b\", \"-123\", 1, -1 },\n    { \"%Qi\",  \"-0X7b\", \"-123\", 1, -1 },\n    { \"%Qi\",    \"0x0\",    \"0\", 1, -1 },\n    { \"%Qi\",   \"0x7B\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"0x7B\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"+0x0\",    \"0\", 1, -1 },\n    { \"%Qi\",  \"+0x7B\",  \"123\", 1, -1 },\n    { \"%Qi\",  \"+0x7B\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"-0x0\",   \"-0\", 1, -1 },\n    { \"%Qi\",  \"-0x7B\", \"-123\", 1, -1 },\n    { \"%Qi\",  \"-0x7B\", \"-123\", 1, -1 },\n    { \"%Qi\",    \"0X0\",    \"0\", 1, -1 },\n    { \"%Qi\",   \"0X7B\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"0X7B\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"+0X0\",    \"0\", 1, -1 },\n    { \"%Qi\",  \"+0X7B\",  \"123\", 1, -1 },\n    { \"%Qi\",  \"+0X7B\",  \"123\", 1, -1 },\n    { \"%Qi\",   \"-0X0\",   \"-0\", 1, -1 },\n    { \"%Qi\",  \"-0X7B\", \"-123\", 1, -1 },\n    { \"%Qi\",  \"-0X7B\", \"-123\", 1, -1 },\n\n    { \"%Qd\",    \" 0\",    \"0\", 1, -1 },\n    { \"%Qd\",   \"  0\",    \"0\", 1, -1 },\n    { \"%Qd\",  \"   0\",    \"0\", 1, -1 },\n    { \"%Qd\",   \"\\t0\",    \"0\", 1, -1 },\n    { \"%Qd\", \"\\t\\t0\",    \"0\", 1, -1 },\n\n    { \"%Qd\",  \"3/2\",   \"3/2\", 1, -1 },\n    { \"%Qd\", \"+3/2\",   \"3/2\", 1, -1 },\n    { \"%Qd\", \"-3/2\",  \"-3/2\", 1, -1 },\n\n    { \"%Qx\",  \"f/10\", \"15/16\", 1, -1 },\n    { \"%Qx\",  \"F/10\", \"15/16\", 1, -1 },\n    { \"%QX\",  \"f/10\", \"15/16\", 1, -1 },\n    { \"%QX\",  \"F/10\", \"15/16\", 1, -1 },\n\n    { \"%Qo\",  \"20/21\",  \"16/17\", 1, -1 },\n    { \"%Qo\", \"-20/21\", \"-16/17\", 1, -1 },\n\n    { \"%Qi\",    \"10/11\",  \"10/11\", 1, -1 },\n    { \"%Qi\",   \"+10/11\",  \"10/11\", 1, -1 },\n    { \"%Qi\",   \"-10/11\", \"-10/11\", 1, -1 },\n    { \"%Qi\",   \"010/11\",   \"8/11\", 1, -1 },\n    { \"%Qi\",  \"+010/11\",   \"8/11\", 1, -1 },\n    { \"%Qi\",  \"-010/11\",  \"-8/11\", 1, -1 },\n    { \"%Qi\",  \"0x10/11\",  \"16/11\", 1, -1 },\n    { \"%Qi\", \"+0x10/11\",  \"16/11\", 1, -1 },\n    { \"%Qi\", \"-0x10/11\", \"-16/11\", 1, -1 },\n\n    { \"%Qi\",    \"10/011\",  \"10/9\", 1, -1 },\n    { \"%Qi\",   \"+10/011\",  \"10/9\", 1, -1 },\n    { \"%Qi\",   \"-10/011\", \"-10/9\", 1, -1 },\n    { \"%Qi\",   \"010/011\",   \"8/9\", 1, -1 },\n    { \"%Qi\",  \"+010/011\",   \"8/9\", 1, -1 },\n    { \"%Qi\",  \"-010/011\",  \"-8/9\", 1, -1 },\n    { \"%Qi\",  \"0x10/011\",  \"16/9\", 1, -1 },\n    { \"%Qi\", \"+0x10/011\",  \"16/9\", 1, -1 },\n    { \"%Qi\", \"-0x10/011\", \"-16/9\", 1, -1 },\n\n    { \"%Qi\",    \"10/0x11\",  \"10/17\", 1, -1 },\n    { \"%Qi\",   \"+10/0x11\",  \"10/17\", 1, -1 },\n    { \"%Qi\",   \"-10/0x11\", \"-10/17\", 1, -1 },\n    { \"%Qi\",   \"010/0x11\",   \"8/17\", 1, -1 },\n    { \"%Qi\",  \"+010/0x11\",   \"8/17\", 1, -1 },\n    { \"%Qi\",  \"-010/0x11\",  \"-8/17\", 1, -1 },\n    { \"%Qi\",  \"0x10/0x11\",  \"16/17\", 1, -1 },\n    { \"%Qi\", \"+0x10/0x11\",  \"16/17\", 1, -1 },\n    { \"%Qi\", \"-0x10/0x11\", \"-16/17\", 1, -1 },\n\n    { \"hello%Qd\",      \"hello0\",         \"0\", 1, -1 },\n    { \"hello%Qd\",      \"hello 0\",        \"0\", 1, -1 },\n    { \"hello%Qd\",      \"hello \\t0\",      \"0\", 1, -1 },\n    { \"hello%Qdworld\", \"hello 0world\",   \"0\", 1, -1 },\n    { \"hello%Qd\",      \"hello3/2\",     \"3/2\", 1, -1 },\n\n    { \"hello%*Qd\",      \"hello0\",        \"-999/121\", 0, -1 },\n    { \"hello%*Qd\",      \"hello 0\",       \"-999/121\", 0, -1 },\n    { \"hello%*Qd\",      \"hello \\t0\",     \"-999/121\", 0, -1 },\n    { \"hello%*Qdworld\", \"hello 0world\",  \"-999/121\", 0, -1 },\n    { \"hello%*Qdworld\", \"hello3/2world\", \"-999/121\", 0, -1 },\n\n    { \"%Qd\",    \"\",     \"-999/121\", -1, -1 },\n    { \"%Qd\",   \" \",     \"-999/121\", -1, -1 },\n    { \" %Qd\",   \"\",     \"-999/121\", -1, -1 },\n    { \"xyz%Qd\", \"\",     \"-999/121\", -1, -1 },\n\n    { \"%*Qd\",    \"\",     \"-999/121\", -1, -1 },\n    { \" %*Qd\",   \"\",     \"-999/121\", -1, -1 },\n    { \"xyz%*Qd\", \"\",     \"-999/121\", -1, -1 },\n\n    /* match something, but invalid */\n    { \"%Qd\",    \"-\",     \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"+\",     \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"/-\",    \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"/+\",    \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"-/\",    \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"+/\",    \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"-/-\",   \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"-/+\",   \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"+/+\",   \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"/123\",  \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"-/123\", \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"+/123\", \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"123/\",  \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"123/-\", \"-999/121\",  0, 1 },\n    { \"%Qd\",    \"123/+\", \"-999/121\",  0, 1 },\n    { \"xyz%Qd\", \"xyz-\",  \"-999/121\",  0, 4 },\n    { \"xyz%Qd\", \"xyz+\",  \"-999/121\",  0, 4 },\n\n    { \"%1Qi\",  \"12/57\", \"1\",        1, 1 },\n    { \"%2Qi\",  \"12/57\", \"12\",       1, 2 },\n    { \"%3Qi\",  \"12/57\", \"-999/121\", 0, -1 },\n    { \"%4Qi\",  \"12/57\", \"12/5\",     1, 4 },\n    { \"%5Qi\",  \"12/57\", \"12/57\",    1, 5 },\n    { \"%6Qi\",  \"12/57\", \"12/57\",    1, 5 },\n    { \"%7Qi\",  \"12/57\", \"12/57\",    1, 5 },\n\n    { \"%1Qi\",  \"012/057\", \"0\",        1, 1 },\n    { \"%2Qi\",  \"012/057\", \"01\",       1, 2 },\n    { \"%3Qi\",  \"012/057\", \"012\",      1, 3 },\n    { \"%4Qi\",  \"012/057\", \"-999/121\", 0, -1 },\n    { \"%5Qi\",  \"012/057\", \"012/0\",    1, 5 },\n    { \"%6Qi\",  \"012/057\", \"012/5\",    1, 6 },\n    { \"%7Qi\",  \"012/057\", \"012/057\",  1, 7 },\n    { \"%8Qi\",  \"012/057\", \"012/057\",  1, 7 },\n    { \"%9Qi\",  \"012/057\", \"012/057\",  1, 7 },\n\n    { \"%1Qi\",  \"0x12/0x57\", \"0\",         1, 1 },\n    { \"%2Qi\",  \"0x12/0x57\", \"-999\",      0, 2 },\n    { \"%3Qi\",  \"0x12/0x57\", \"0x1\",       1, 3 },\n    { \"%4Qi\",  \"0x12/0x57\", \"0x12\",      1, 4 },\n    { \"%5Qi\",  \"0x12/0x57\", \"-999/121\",  0, 5 },\n    { \"%6Qi\",  \"0x12/0x57\", \"0x12/0\",    1, 6 },\n    { \"%7Qi\",  \"0x12/0x57\", \"-999/121\",  0, 7 },\n    { \"%8Qi\",  \"0x12/0x57\", \"0x12/0x5\",  1, 8 },\n    { \"%9Qi\",  \"0x12/0x57\", \"0x12/0x57\", 1, 9 },\n    { \"%10Qi\", \"0x12/0x57\", \"0x12/0x57\", 1, 9 },\n    { \"%11Qi\", \"0x12/0x57\", \"0x12/0x57\", 1, 9 },\n\n    { \"%Qd\",  \"xyz\", \"0\", 0, 0 },\n  };\n\n  int         i, j, ignore, got_ret, want_ret, got_upto, want_upto;\n  mpq_t       got, want;\n  long        got_l, want_ftell;\n  int         error = 0;\n  fun_t       fun;\n  const char  *name;\n  char        fmt[128];\n\n  mpq_init (got);\n  mpq_init (want);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpq_set_str_or_abort (want, data[i].want, 0);\n\n      ASSERT_ALWAYS (strlen (data[i].fmt) + 2 < sizeof (fmt));\n      strcpy (fmt, data[i].fmt);\n      strcat (fmt, \"%n\");\n\n      ignore = (strchr (fmt, '*') != NULL);\n\n      for (j = 0; j <= 3; j++)\n        {\n          want_ret = data[i].ret;\n\n          want_ftell = data[i].ftell;\n          if (want_ftell == -1)\n            want_ftell = strlen (data[i].input);\n          want_upto = want_ftell;\n\n          if (want_ret == -1 || (want_ret == 0 && ! ignore))\n            {\n              want_ftell = -1;\n              want_upto = -555;\n            }\n\n          switch (j) {\n          case 0:\n            name = \"gmp_sscanf\";\n            fun = fun_gmp_sscanf;\n            break;\n          case 1:\n            name = \"gmp_fscanf\";\n            fun = fun_gmp_fscanf;\n            break;\n          case 2:\n            if (strchr (data[i].input, '/') != NULL)\n              continue;\n            if (! libc_scanf_convert (fmt))\n              continue;\n            name = \"standard sscanf\";\n            fun = fun_sscanf;\n            break;\n          case 3:\n            if (strchr (data[i].input, '/') != NULL)\n              continue;\n            if (! libc_scanf_convert (fmt))\n              continue;\n            name = \"standard fscanf\";\n            fun = fun_fscanf;\n            break;\n          default:\n            ASSERT_ALWAYS (0);\n            break;\n          }\n\n          got_upto = -555;\n          got_ftell = -1;\n\n          switch (j) {\n          case 0:\n          case 1:\n            mpq_set_si (got, -999L, 121L);\n            if (ignore)\n              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);\n            else\n              got_ret = (*fun) (data[i].input, fmt, got, &got_upto);\n            break;\n          case 2:\n          case 3:\n            got_l = -999L;\n            if (ignore)\n              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);\n            else\n              got_ret = (*fun) (data[i].input, fmt, &got_l, &got_upto);\n            mpq_set_si (got, got_l, (got_l == -999L ? 121L : 1L));\n            break;\n          default:\n            ASSERT_ALWAYS (0);\n            break;\n          }\n\n          MPZ_CHECK_FORMAT (mpq_numref (got));\n          MPZ_CHECK_FORMAT (mpq_denref (got));\n\n          if (got_ret != want_ret)\n            {\n              printf (\"%s wrong return value\\n\", name);\n              error = 1;\n            }\n          /* use direct mpz compares, since some of the test data is\n             non-canonical and can trip ASSERTs in mpq_equal */\n          if (want_ret == 1\n              && ! (mpz_cmp (mpq_numref(want), mpq_numref(got)) == 0\n                    && mpz_cmp (mpq_denref(want), mpq_denref(got)) == 0))\n            {\n              printf (\"%s wrong result\\n\", name);\n              error = 1;\n            }\n          if (got_upto != want_upto)\n            {\n              printf (\"%s wrong upto\\n\", name);\n              error = 1;\n            }\n          if (got_ftell != -1 && want_ftell != -1 && got_ftell != want_ftell)\n            {\n              printf (\"%s wrong ftell\\n\", name);\n              error = 1;\n            }\n          if (error)\n            {\n              printf    (\"  fmt   \\\"%s\\\"\\n\", data[i].fmt);\n              printf    (\"  input \\\"%s\\\"\\n\", data[i].input);\n              printf    (\"  ret   want=%d\\n\", want_ret);\n              printf    (\"        got =%d\\n\", got_ret);\n              mpq_trace (\"  value want\", want);\n              mpq_trace (\"        got \", got);\n              printf    (\"  upto  want=%d\\n\", want_upto);\n              printf    (\"        got =%d\\n\", got_upto);\n              if (got_ftell != -1)\n                {\n                  printf    (\"  ftell want =%ld\\n\", want_ftell);\n                  printf    (\"        got  =%ld\\n\", got_ftell);\n                }\n              abort ();\n            }\n        }\n    }\n\n  mpq_clear (got);\n  mpq_clear (want);\n}\n\nvoid\ncheck_f (void)\n{\n  static const struct {\n    const char  *fmt;\n    const char  *input;\n    const char  *want;\n    int         ret;\n    long        ftell;    /* or -1 for length of input string */\n\n  } data[] = {\n\n    { \"%Ff\",    \"0\",    \"0\", 1, -1 },\n    { \"%Fe\",    \"0\",    \"0\", 1, -1 },\n    { \"%FE\",    \"0\",    \"0\", 1, -1 },\n    { \"%Fg\",    \"0\",    \"0\", 1, -1 },\n    { \"%FG\",    \"0\",    \"0\", 1, -1 },\n\n    { \"%Ff\",  \"123\",    \"123\", 1, -1 },\n    { \"%Ff\", \"+123\",    \"123\", 1, -1 },\n    { \"%Ff\", \"-123\",   \"-123\", 1, -1 },\n    { \"%Ff\",  \"123.\",   \"123\", 1, -1 },\n    { \"%Ff\", \"+123.\",   \"123\", 1, -1 },\n    { \"%Ff\", \"-123.\",  \"-123\", 1, -1 },\n    { \"%Ff\",  \"123.0\",  \"123\", 1, -1 },\n    { \"%Ff\", \"+123.0\",  \"123\", 1, -1 },\n    { \"%Ff\", \"-123.0\", \"-123\", 1, -1 },\n    { \"%Ff\",  \"0123\",   \"123\", 1, -1 },\n    { \"%Ff\", \"-0123\",  \"-123\", 1, -1 },\n\n    { \"%Ff\",  \"123.456e3\",   \"123456\", 1, -1 },\n    { \"%Ff\", \"-123.456e3\",  \"-123456\", 1, -1 },\n    { \"%Ff\",  \"123.456e+3\",  \"123456\", 1, -1 },\n    { \"%Ff\", \"-123.456e+3\", \"-123456\", 1, -1 },\n    { \"%Ff\",  \"123000e-3\",      \"123\", 1, -1 },\n    { \"%Ff\", \"-123000e-3\",     \"-123\", 1, -1 },\n    { \"%Ff\",  \"123000.e-3\",     \"123\", 1, -1 },\n    { \"%Ff\", \"-123000.e-3\",    \"-123\", 1, -1 },\n\n    { \"%Ff\",  \"123.456E3\",   \"123456\", 1, -1 },\n    { \"%Ff\", \"-123.456E3\",  \"-123456\", 1, -1 },\n    { \"%Ff\",  \"123.456E+3\",  \"123456\", 1, -1 },\n    { \"%Ff\", \"-123.456E+3\", \"-123456\", 1, -1 },\n    { \"%Ff\",  \"123000E-3\",      \"123\", 1, -1 },\n    { \"%Ff\", \"-123000E-3\",     \"-123\", 1, -1 },\n    { \"%Ff\",  \"123000.E-3\",     \"123\", 1, -1 },\n    { \"%Ff\", \"-123000.E-3\",    \"-123\", 1, -1 },\n\n    { \"%Ff\",  \".456e3\",   \"456\", 1, -1 },\n    { \"%Ff\", \"-.456e3\",  \"-456\", 1, -1 },\n    { \"%Ff\",  \".456e+3\",  \"456\", 1, -1 },\n    { \"%Ff\", \"-.456e+3\", \"-456\", 1, -1 },\n\n    { \"%Ff\",    \" 0\",    \"0\", 1, -1 },\n    { \"%Ff\",   \"  0\",    \"0\", 1, -1 },\n    { \"%Ff\",  \"   0\",    \"0\", 1, -1 },\n    { \"%Ff\",   \"\\t0\",    \"0\", 1, -1 },\n    { \"%Ff\", \"\\t\\t0\",    \"0\", 1, -1 },\n\n    { \"hello%Fg\",      \"hello0\",       \"0\",   1, -1 },\n    { \"hello%Fg\",      \"hello 0\",      \"0\",   1, -1 },\n    { \"hello%Fg\",      \"hello \\t0\",    \"0\",   1, -1 },\n    { \"hello%Fgworld\", \"hello 0world\", \"0\",   1, -1 },\n    { \"hello%Fg\",      \"hello3.0\",     \"3.0\", 1, -1 },\n\n    { \"hello%*Fg\",      \"hello0\",        \"-999\", 0, -1 },\n    { \"hello%*Fg\",      \"hello 0\",       \"-999\", 0, -1 },\n    { \"hello%*Fg\",      \"hello \\t0\",     \"-999\", 0, -1 },\n    { \"hello%*Fgworld\", \"hello 0world\",  \"-999\", 0, -1 },\n    { \"hello%*Fgworld\", \"hello3.0world\", \"-999\", 0, -1 },\n\n    { \"%Ff\",     \"\",   \"-999\", -1, -1 },\n    { \"%Ff\",    \" \",   \"-999\", -1, -1 },\n    { \"%Ff\",   \"\\t\",   \"-999\", -1, -1 },\n    { \"%Ff\",  \" \\t\",   \"-999\", -1, -1 },\n    { \" %Ff\",    \"\",   \"-999\", -1, -1 },\n    { \"xyz%Ff\",  \"\",   \"-999\", -1, -1 },\n\n    { \"%*Ff\",    \"\",   \"-999\", -1, -1 },\n    { \" %*Ff\",   \"\",   \"-999\", -1, -1 },\n    { \"xyz%*Ff\", \"\",   \"-999\", -1, -1 },\n\n    { \"%Ff\",    \"xyz\", \"0\", 0 },\n\n    /* various non-empty but invalid */\n    { \"%Ff\",    \"-\",      \"-999\",  0, 1 },\n    { \"%Ff\",    \"+\",      \"-999\",  0, 1 },\n    { \"xyz%Ff\", \"xyz-\",   \"-999\",  0, 4 },\n    { \"xyz%Ff\", \"xyz+\",   \"-999\",  0, 4 },\n    { \"%Ff\",    \"-.\",     \"-999\",  0, 2 },\n    { \"%Ff\",    \"+.\",     \"-999\",  0, 2 },\n    { \"%Ff\",    \".e\",     \"-999\",  0, 1 },\n    { \"%Ff\",   \"-.e\",     \"-999\",  0, 2 },\n    { \"%Ff\",   \"+.e\",     \"-999\",  0, 2 },\n    { \"%Ff\",    \".E\",     \"-999\",  0, 1 },\n    { \"%Ff\",   \"-.E\",     \"-999\",  0, 2 },\n    { \"%Ff\",   \"+.E\",     \"-999\",  0, 2 },\n    { \"%Ff\",    \".e123\",  \"-999\",  0, 1 },\n    { \"%Ff\",   \"-.e123\",  \"-999\",  0, 2 },\n    { \"%Ff\",   \"+.e123\",  \"-999\",  0, 2 },\n    { \"%Ff\",    \"123e\",   \"-999\",  0, 4 },\n    { \"%Ff\",   \"-123e\",   \"-999\",  0, 5 },\n    { \"%Ff\",    \"123e-\",  \"-999\",  0, 5 },\n    { \"%Ff\",   \"-123e-\",  \"-999\",  0, 6 },\n    { \"%Ff\",    \"123e+\",  \"-999\",  0, 5 },\n    { \"%Ff\",   \"-123e+\",  \"-999\",  0, 6 },\n    { \"%Ff\",   \"123e-Z\",  \"-999\",  0, 5 },\n\n    /* hex floats */\n    { \"%Ff\", \"0x123p0\",       \"291\",  1, -1 },\n    { \"%Ff\", \"0x123P0\",       \"291\",  1, -1 },\n    { \"%Ff\", \"0X123p0\",       \"291\",  1, -1 },\n    { \"%Ff\", \"0X123P0\",       \"291\",  1, -1 },\n    { \"%Ff\", \"-0x123p0\",     \"-291\",  1, -1 },\n    { \"%Ff\", \"+0x123p0\",      \"291\",  1, -1 },\n    { \"%Ff\", \"0x123.p0\",      \"291\",  1, -1 },\n    { \"%Ff\", \"0x12.3p4\",      \"291\",  1, -1 },\n    { \"%Ff\", \"-0x12.3p4\",    \"-291\",  1, -1 },\n    { \"%Ff\", \"+0x12.3p4\",     \"291\",  1, -1 },\n    { \"%Ff\", \"0x1230p-4\",     \"291\",  1, -1 },\n    { \"%Ff\", \"-0x1230p-4\",   \"-291\",  1, -1 },\n    { \"%Ff\", \"+0x1230p-4\",    \"291\",  1, -1 },\n    { \"%Ff\", \"+0x.1230p12\",   \"291\",  1, -1 },\n    { \"%Ff\", \"+0x123000p-12\", \"291\",  1, -1 },\n    { \"%Ff\", \"0x123 p12\",     \"291\",  1, 5 },\n    { \"%Ff\", \"0x9 9\",           \"9\",  1, 3 },\n    { \"%Ff\", \"0x01\",            \"1\",  1, 4 },\n    { \"%Ff\", \"0x23\",           \"35\",  1, 4 },\n    { \"%Ff\", \"0x45\",           \"69\",  1, 4 },\n    { \"%Ff\", \"0x67\",          \"103\",  1, 4 },\n    { \"%Ff\", \"0x89\",          \"137\",  1, 4 },\n    { \"%Ff\", \"0xAB\",          \"171\",  1, 4 },\n    { \"%Ff\", \"0xCD\",          \"205\",  1, 4 },\n    { \"%Ff\", \"0xEF\",          \"239\",  1, 4 },\n    { \"%Ff\", \"0xab\",          \"171\",  1, 4 },\n    { \"%Ff\", \"0xcd\",          \"205\",  1, 4 },\n    { \"%Ff\", \"0xef\",          \"239\",  1, 4 },\n    { \"%Ff\", \"0x100p0A\",      \"256\",  1, 7 },\n    { \"%Ff\", \"0x1p9\",         \"512\",  1, -1 },\n\n    /* invalid hex floats */\n    { \"%Ff\", \"0x\",     \"-999\",  0, 2 },\n    { \"%Ff\", \"-0x\",    \"-999\",  0, 3 },\n    { \"%Ff\", \"+0x\",    \"-999\",  0, 3 },\n    { \"%Ff\", \"0x-\",    \"-999\",  0, 2 },\n    { \"%Ff\", \"0x+\",    \"-999\",  0, 2 },\n    { \"%Ff\", \"0x.\",    \"-999\",  0, 3 },\n    { \"%Ff\", \"-0x.\",   \"-999\",  0, 4 },\n    { \"%Ff\", \"+0x.\",   \"-999\",  0, 4 },\n    { \"%Ff\", \"0x.p\",   \"-999\",  0, 3 },\n    { \"%Ff\", \"-0x.p\",  \"-999\",  0, 4 },\n    { \"%Ff\", \"+0x.p\",  \"-999\",  0, 4 },\n    { \"%Ff\", \"0x.P\",   \"-999\",  0, 3 },\n    { \"%Ff\", \"-0x.P\",  \"-999\",  0, 4 },\n    { \"%Ff\", \"+0x.P\",  \"-999\",  0, 4 },\n    { \"%Ff\", \".p123\",  \"-999\",  0, 1 },\n    { \"%Ff\", \"-.p123\", \"-999\",  0, 2 },\n    { \"%Ff\", \"+.p123\", \"-999\",  0, 2 },\n    { \"%Ff\", \"0x1p\",   \"-999\",  0, 4 },\n    { \"%Ff\", \"0x1p-\",  \"-999\",  0, 5 },\n    { \"%Ff\", \"0x1p+\",  \"-999\",  0, 5 },\n    { \"%Ff\", \"0x123p 12\", \"291\",  0, 6 },\n    { \"%Ff\", \"0x 123p12\", \"291\",  0, 2 },\n\n  };\n\n  int         i, j, ignore, got_ret, want_ret, got_upto, want_upto;\n  mpf_t       got, want;\n  double      got_d;\n  long        want_ftell;\n  int         error = 0;\n  fun_t       fun;\n  const char  *name;\n  char        fmt[128];\n\n  mpf_init (got);\n  mpf_init (want);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpf_set_str_or_abort (want, data[i].want, 10);\n\n      ASSERT_ALWAYS (strlen (data[i].fmt) + 2 < sizeof (fmt));\n      strcpy (fmt, data[i].fmt);\n      strcat (fmt, \"%n\");\n\n      ignore = (strchr (fmt, '*') != NULL);\n\n      for (j = 0; j <= 3; j++)\n        {\n          want_ret = data[i].ret;\n\n          want_ftell = data[i].ftell;\n          if (want_ftell == -1)\n            want_ftell = strlen (data[i].input);\n          want_upto = want_ftell;\n\n          if (want_ret == -1 || (want_ret == 0 && ! ignore))\n            want_upto = -555;\n\n          switch (j) {\n          case 0:\n            name = \"gmp_sscanf\";\n            fun = fun_gmp_sscanf;\n            break;\n          case 1:\n            name = \"gmp_fscanf\";\n            fun = fun_gmp_fscanf;\n            break;\n          case 2:\n            if (! libc_scanf_convert (fmt))\n              continue;\n            name = \"standard sscanf\";\n            fun = fun_sscanf;\n            break;\n          case 3:\n            if (! libc_scanf_convert (fmt))\n              continue;\n            name = \"standard fscanf\";\n            fun = fun_fscanf;\n            break;\n          default:\n            ASSERT_ALWAYS (0);\n            break;\n          }\n\n          got_upto = -555;\n          got_ftell = -1;\n\n          switch (j) {\n          case 0:\n          case 1:\n            mpf_set_si (got, -999L);\n            if (ignore)\n              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);\n            else\n              got_ret = (*fun) (data[i].input, fmt, got, &got_upto);\n            break;\n          case 2:\n          case 3:\n            got_d = -999L;\n            if (ignore)\n              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);\n            else\n              got_ret = (*fun) (data[i].input, fmt, &got_d, &got_upto);\n            mpf_set_d (got, got_d);\n            break;\n          default:\n            ASSERT_ALWAYS (0);\n            break;\n          }\n\n          MPF_CHECK_FORMAT (got);\n\n          if (got_ret != want_ret)\n            {\n              printf (\"%s wrong return value\\n\", name);\n              error = 1;\n            }\n          if (want_ret == 1 && mpf_cmp (want, got) != 0)\n            {\n              printf (\"%s wrong result\\n\", name);\n              error = 1;\n            }\n          if (got_upto != want_upto)\n            {\n              printf (\"%s wrong upto\\n\", name);\n              error = 1;\n            }\n          if (got_ftell != -1 && want_ftell != -1 && got_ftell != want_ftell)\n            {\n              printf (\"%s wrong ftell\\n\", name);\n              error = 1;\n            }\n          if (error)\n            {\n              printf    (\"  fmt   \\\"%s\\\"\\n\", data[i].fmt);\n              printf    (\"  input \\\"%s\\\"\\n\", data[i].input);\n              printf    (\"  ret   want=%d\\n\", want_ret);\n              printf    (\"        got =%d\\n\", got_ret);\n              mpf_trace (\"  value want\", want);\n              mpf_trace (\"        got \", got);\n              printf    (\"  upto  want=%d\\n\", want_upto);\n              printf    (\"        got =%d\\n\", got_upto);\n              if (got_ftell != -1)\n                {\n                  printf    (\"  ftell want =%ld\\n\", want_ftell);\n                  printf    (\"        got  =%ld\\n\", got_ftell);\n                }\n              abort ();\n            }\n        }\n    }\n\n  mpf_clear (got);\n  mpf_clear (want);\n}\n\n\nvoid\ncheck_n (void)\n{\n  int    ret;\n\n  /* %n suppressed */\n  {\n    int n = 123;\n    gmp_sscanf (\"   \", \" %*n\", &n);\n    ASSERT_ALWAYS (n == 123);\n  }\n  {\n    int n = 123;\n    fromstring_gmp_fscanf (\"   \", \" %*n\", &n);\n    ASSERT_ALWAYS (n == 123);\n  }\n\n\n#define CHECK_N(type, string)                           \\\n  do {                                                  \\\n    type  x[2];                                         \\\n    char  fmt[128];                                     \\\n    int   ret;                                          \\\n                                                        \\\n    x[0] = ~ (type) 0;                                  \\\n    x[1] = ~ (type) 0;                                  \\\n    sprintf (fmt, \"abc%%%sn\", string);                  \\\n    ret = gmp_sscanf (\"abc\", fmt, &x[0]);               \\\n                                                        \\\n    ASSERT_ALWAYS (ret == 0);                           \\\n                                                        \\\n    /* should write whole of x[0] and none of x[1] */   \\\n    ASSERT_ALWAYS (x[0] == 3);                          \\\n    ASSERT_ALWAYS (x[1] == (type) ~ (type) 0);\t\t\\\n                                                        \\\n  } while (0)\n\n  CHECK_N (char,      \"hh\");\n  CHECK_N (long,      \"l\");\n#if HAVE_LONG_LONG\n  CHECK_N (long long, \"L\");\n#endif\n#if HAVE_INTMAX_T\n  CHECK_N (intmax_t,  \"j\");\n#endif\n  CHECK_N (ptrdiff_t, \"t\");\n  CHECK_N (short,     \"h\");\n  CHECK_N (size_t,    \"z\");\n\n  /* %Zn */\n  {\n    mpz_t  x[2];\n    mpz_init_set_si (x[0], -987L);\n    mpz_init_set_si (x[1],  654L);\n    ret = gmp_sscanf (\"xyz   \", \"xyz%Zn\", x[0]);\n    MPZ_CHECK_FORMAT (x[0]);\n    MPZ_CHECK_FORMAT (x[1]);\n    ASSERT_ALWAYS (ret == 0);\n    ASSERT_ALWAYS (mpz_cmp_ui (x[0], 3L) == 0);\n    ASSERT_ALWAYS (mpz_cmp_ui (x[1], 654L) == 0);\n    mpz_clear (x[0]);\n    mpz_clear (x[1]);\n  }\n  {\n    mpz_t  x;\n    mpz_init (x);\n    ret = fromstring_gmp_fscanf (\"xyz   \", \"xyz%Zn\", x);\n    ASSERT_ALWAYS (ret == 0);\n    ASSERT_ALWAYS (mpz_cmp_ui (x, 3L) == 0);\n    mpz_clear (x);\n  }\n\n  /* %Qn */\n  {\n    mpq_t  x[2];\n    mpq_init (x[0]);\n    mpq_init (x[1]);\n    mpq_set_ui (x[0], -987L, 654L);\n    mpq_set_ui (x[1], 4115L, 226L);\n    ret = gmp_sscanf (\"xyz   \", \"xyz%Qn\", x[0]);\n    MPQ_CHECK_FORMAT (x[0]);\n    MPQ_CHECK_FORMAT (x[1]);\n    ASSERT_ALWAYS (ret == 0);\n    ASSERT_ALWAYS (mpq_cmp_ui (x[0], 3L, 1L) == 0);\n    ASSERT_ALWAYS (mpq_cmp_ui (x[1], 4115L, 226L) == 0);\n    mpq_clear (x[0]);\n    mpq_clear (x[1]);\n  }\n  {\n    mpq_t  x;\n    mpq_init (x);\n    ret = fromstring_gmp_fscanf (\"xyz   \", \"xyz%Qn\", x);\n    ASSERT_ALWAYS (ret == 0);\n    ASSERT_ALWAYS (mpq_cmp_ui (x, 3L, 1L) == 0);\n    mpq_clear (x);\n  }\n\n  /* %Fn */\n  {\n    mpf_t  x[2];\n    mpf_init (x[0]);\n    mpf_init (x[1]);\n    mpf_set_ui (x[0], -987L);\n    mpf_set_ui (x[1],  654L);\n    ret = gmp_sscanf (\"xyz   \", \"xyz%Fn\", x[0]);\n    MPF_CHECK_FORMAT (x[0]);\n    MPF_CHECK_FORMAT (x[1]);\n    ASSERT_ALWAYS (ret == 0);\n    ASSERT_ALWAYS (mpf_cmp_ui (x[0], 3L) == 0);\n    ASSERT_ALWAYS (mpf_cmp_ui (x[1], 654L) == 0);\n    mpf_clear (x[0]);\n    mpf_clear (x[1]);\n  }\n  {\n    mpf_t  x;\n    mpf_init (x);\n    ret = fromstring_gmp_fscanf (\"xyz   \", \"xyz%Fn\", x);\n    ASSERT_ALWAYS (ret == 0);\n    ASSERT_ALWAYS (mpf_cmp_ui (x, 3L) == 0);\n    mpf_clear (x);\n  }\n}\n\n\nvoid\ncheck_misc (void)\n{\n  int  ret, cmp;\n  {\n    int  a=9, b=8, c=7, n=66;\n    mpz_t  z;\n    mpz_init (z);\n    ret = gmp_sscanf (\"1 2 3 4\", \"%d %d %d %Zd%n\",\n                      &a, &b, &c, z, &n);\n    ASSERT_ALWAYS (ret == 4);\n    ASSERT_ALWAYS (a == 1);\n    ASSERT_ALWAYS (b == 2);\n    ASSERT_ALWAYS (c == 3);\n    ASSERT_ALWAYS (n == 7);\n    ASSERT_ALWAYS (mpz_cmp_ui (z, 4L) == 0);\n    mpz_clear (z);\n  }\n  {\n    int  a=9, b=8, c=7, n=66;\n    mpz_t  z;\n    mpz_init (z);\n    ret = fromstring_gmp_fscanf (\"1 2 3 4\", \"%d %d %d %Zd%n\",\n                                 &a, &b, &c, z, &n);\n    ASSERT_ALWAYS (ret == 4);\n    ASSERT_ALWAYS (a == 1);\n    ASSERT_ALWAYS (b == 2);\n    ASSERT_ALWAYS (c == 3);\n    ASSERT_ALWAYS (mpz_cmp_ui (z, 4L) == 0);\n    ASSERT_ALWAYS (n == 7);\n    ASSERT_ALWAYS (got_ftell == 7);\n    mpz_clear (z);\n  }\n\n  {\n    int  a=9, n=8;\n    mpz_t  z;\n    mpz_init (z);\n    ret = gmp_sscanf (\"1 2 3 4\", \"%d %*d %*d %Zd%n\", &a, z, &n);\n    ASSERT_ALWAYS (ret == 2);\n    ASSERT_ALWAYS (a == 1);\n    ASSERT_ALWAYS (mpz_cmp_ui (z, 4L) == 0);\n    ASSERT_ALWAYS (n == 7);\n    mpz_clear (z);\n  }\n  {\n    int  a=9, n=8;\n    mpz_t  z;\n    mpz_init (z);\n    ret = fromstring_gmp_fscanf (\"1 2 3 4\", \"%d %*d %*d %Zd%n\",\n                                 &a, z, &n);\n    ASSERT_ALWAYS (ret == 2);\n    ASSERT_ALWAYS (a == 1);\n    ASSERT_ALWAYS (mpz_cmp_ui (z, 4L) == 0);\n    ASSERT_ALWAYS (n == 7);\n    ASSERT_ALWAYS (got_ftell == 7);\n    mpz_clear (z);\n  }\n\n  /* EOF for no matching */\n  {\n    char buf[128];\n    ret = gmp_sscanf (\"   \", \"%s\", buf);\n    ASSERT_ALWAYS (ret == EOF);\n    ret = fromstring_gmp_fscanf (\"   \", \"%s\", buf);\n    ASSERT_ALWAYS (ret == EOF);\n    if (option_libc_scanf)\n      {\n        ret = sscanf (\"   \", \"%s\", buf);\n        ASSERT_ALWAYS (ret == EOF);\n        ret = fun_fscanf (\"   \", \"%s\", buf, NULL);\n        ASSERT_ALWAYS (ret == EOF);\n      }\n  }\n\n  /* suppressed field, then eof */\n  {\n    int  x;\n    if (test_sscanf_eof_ok ())\n      {\n        ret = gmp_sscanf (\"123\", \"%*d%d\", &x);\n        ASSERT_ALWAYS (ret == EOF);\n      }\n    ret = fromstring_gmp_fscanf (\"123\", \"%*d%d\", &x);\n    ASSERT_ALWAYS (ret == EOF);\n    if (option_libc_scanf)\n      {\n        ret = sscanf (\"123\", \"%*d%d\", &x);\n        ASSERT_ALWAYS (ret == EOF);\n        ret = fun_fscanf (\"123\", \"%*d%d\", &x, NULL);\n        ASSERT_ALWAYS (ret == EOF);\n      }\n  }\n  {\n    mpz_t  x;\n    mpz_init (x);\n    ret = gmp_sscanf (\"123\", \"%*Zd%Zd\", x);\n    ASSERT_ALWAYS (ret == EOF);\n    ret = fromstring_gmp_fscanf (\"123\", \"%*Zd%Zd\", x);\n    ASSERT_ALWAYS (ret == EOF);\n    mpz_clear (x);\n  }\n\n  /* %[...], glibc only */\n#ifdef __GLIBC__\n  {\n    char  buf[128];\n    int   n = -1;\n    buf[0] = '\\0';\n    ret = gmp_sscanf (\"abcdefgh\", \"%[a-d]ef%n\", buf, &n);\n    ASSERT_ALWAYS (ret == 1);\n    cmp = strcmp (buf, \"abcd\");\n    ASSERT_ALWAYS (cmp == 0);\n    ASSERT_ALWAYS (n == 6);\n  }\n  {\n    char  buf[128];\n    int   n = -1;\n    buf[0] = '\\0';\n    ret = gmp_sscanf (\"xyza\", \"%[^a]a%n\", buf, &n);\n    ASSERT_ALWAYS (ret == 1);\n    cmp = strcmp (buf, \"xyz\");\n    ASSERT_ALWAYS (cmp == 0);\n    ASSERT_ALWAYS (n == 4);\n  }\n  {\n    char  buf[128];\n    int   n = -1;\n    buf[0] = '\\0';\n    ret = gmp_sscanf (\"ab]ab]\", \"%[]ab]%n\", buf, &n);\n    ASSERT_ALWAYS (ret == 1);\n    cmp = strcmp (buf, \"ab]ab]\");\n    ASSERT_ALWAYS (cmp == 0);\n    ASSERT_ALWAYS (n == 6);\n  }\n  {\n    char  buf[128];\n    int   n = -1;\n    buf[0] = '\\0';\n    ret = gmp_sscanf (\"xyzb\", \"%[^]ab]b%n\", buf, &n);\n    ASSERT_ALWAYS (ret == 1);\n    cmp = strcmp (buf, \"xyz\");\n    ASSERT_ALWAYS (cmp == 0);\n    ASSERT_ALWAYS (n == 4);\n  }\n#endif\n\n  /* %zd etc won't be accepted by sscanf on old systems, and running\n     something to see if they work might be bad, so only try it on glibc,\n     and only on a new enough version (glibc 2.0 doesn't have %zd) */\n#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 0)\n  {\n    mpz_t   z;\n    size_t  s = -1;\n    mpz_init (z);\n    ret = gmp_sscanf (\"456 789\", \"%zd %Zd\", &s, z);\n    ASSERT_ALWAYS (ret == 2);\n    ASSERT_ALWAYS (s == 456);\n    ASSERT_ALWAYS (mpz_cmp_ui (z, 789L) == 0);\n    mpz_clear (z);\n  }\n  {\n    mpz_t      z;\n    ptrdiff_t  d = -1;\n    mpz_init (z);\n    ret = gmp_sscanf (\"456 789\", \"%td %Zd\", &d, z);\n    ASSERT_ALWAYS (ret == 2);\n    ASSERT_ALWAYS (d == 456);\n    ASSERT_ALWAYS (mpz_cmp_ui (z, 789L) == 0);\n    mpz_clear (z);\n  }\n  {\n    mpz_t      z;\n    long long  ll = -1;\n    mpz_init (z);\n    ret = gmp_sscanf (\"456 789\", \"%Ld %Zd\", &ll, z);\n    ASSERT_ALWAYS (ret == 2);\n    ASSERT_ALWAYS (ll == 456);\n    ASSERT_ALWAYS (mpz_cmp_ui (z, 789L) == 0);\n    mpz_clear (z);\n  }\n#endif\n}\n\nint\nmain (int argc, char *argv[])\n{\n  if (argc > 1 && strcmp (argv[1], \"-s\") == 0)\n    option_libc_scanf = 1;\n\n  tests_start ();\n\n  mp_trace_base = 16;\n\n  check_z ();\n  check_q ();\n  check_f ();\n  check_n ();\n  check_misc ();\n\n  unlink (TEMPFILE);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/misc.c",
    "content": "/* Miscellaneous test program support routines.\n\nCopyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#include <ctype.h>\n#include <signal.h>\n#include <stdio.h>\n#include <stdlib.h>     /* for getenv */\n#include <string.h>\n\n#if HAVE_FLOAT_H || defined( _MSC_VER )\t/* BRG */\n#include <float.h>      /* for DBL_MANT_DIG */\n#endif\n\n#if HAVE_FENV_H\n#include <fenv.h>    /* for changing rounding modes */\n#endif\n\n#if TIME_WITH_SYS_TIME\n# include <sys/time.h>  /* for struct timeval */\n# include <time.h>\n#else\n# if HAVE_SYS_TIME_H\n#  include <sys/time.h>\n# else\n#  include <time.h>\n# endif\n#endif\n\n#ifdef _MSC_VER\n#define FE_TOWARD_ZERO\t0xC00\n#define FE_DOWNWARD\t0x400\n#define FE_UPWARD\t0x800\n#define FE_TONEAREST\t0\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* The various tests setups and final checks, collected up together. */\nvoid\ntests_start (void)\n{\n  /* don't buffer, so output is not lost if a test causes a segv etc */\n  setbuf (stdout, NULL);\n  setbuf (stderr, NULL);\n\n  tests_memory_start ();\n  tests_rand_start ();\n}\nvoid\ntests_end (void)\n{\n  tests_rand_end ();\n  tests_memory_end ();\n}\n\n\nvoid\ntests_rand_start (void)\n{\n  gmp_randstate_ptr  rands;\n  char           *perform_seed;\n  unsigned long  seed;\n\n  if (__gmp_rands_initialized)\n    {\n      printf (\"Please let tests_start() initialize the global __gmp_rands.\\n\");\n      printf (\"ie. ensure that function is called before the first use of RANDS.\\n\");\n      abort ();\n    }\n\n  gmp_randinit_default (__gmp_rands);\n  __gmp_rands_initialized = 1;\n  rands = __gmp_rands;\n\n  perform_seed = getenv (\"GMP_CHECK_RANDOMIZE\");\n  if (perform_seed != NULL)\n    {\n#ifdef HAVE_STRTOUL\n      seed = strtoul (perform_seed, 0, 0);\n#else\n      /* This will not work right for seeds >= 2^31 on 64-bit machines.\n\t Perhaps use atol unconditionally?  Is that ubiquitous?  */\n      seed = atoi (perform_seed);\n#endif\n      if (! (seed == 0 || seed == 1))\n        {\n          printf (\"Re-seeding with GMP_CHECK_RANDOMIZE=%lu\\n\", seed);\n          gmp_randseed_ui (rands, seed);\n        }\n      else\n        {\n#if HAVE_GETTIMEOFDAY\n          struct timeval  tv;\n          gettimeofday (&tv, NULL);\n          seed = tv.tv_sec ^ (tv.tv_usec << 12);\n\t  seed &= 0xffffffff;\n#else\n          time_t  tv;\n          time (&tv);\n          seed = tv;\n#endif\n          gmp_randseed_ui (rands, seed);\n          printf (\"Seed GMP_CHECK_RANDOMIZE=%lu (include this in bug reports)\\n\", seed);\n        }\n      fflush (stdout);\n    }\n}\nvoid\ntests_rand_end (void)\n{\n  RANDS_CLEAR ();\n}\n\n\n/* Only used if CPU calling conventions checking is available. */\nmp_limb_t (*calling_conventions_function)(ANYARGS);\n\n\n/* Return p advanced to the next multiple of \"align\" bytes.  \"align\" must be\n   a power of 2.  Care is taken not to assume sizeof(int)==sizeof(pointer).\n   Using \"unsigned long\" avoids a warning on hpux.  */\nvoid *\nalign_pointer (void *p, size_t align)\n{\n  mpir_ui  d;\n  d = ((mpir_ui) p) & (align-1);\n  d = (d != 0 ? align-d : 0);\n  return (void *) (((char *) p) + d);\n}\n\n\n/* Note that memory allocated with this function can never be freed, because\n   the start address of the block allocated is lost. */\nvoid *\n__gmp_allocate_func_aligned (size_t bytes, size_t align)\n{\n  return align_pointer ((*__gmp_allocate_func) (bytes + align-1), align);\n}\n\n\nvoid *\n__gmp_allocate_or_reallocate (void *ptr, size_t oldsize, size_t newsize)\n{\n  if (ptr == NULL)\n    return (*__gmp_allocate_func) (newsize);\n  else\n    return (*__gmp_reallocate_func) (ptr, oldsize, newsize);\n}\n\nchar *\n__gmp_allocate_strdup (const char *s)\n{\n  size_t  len;\n  char    *t;\n  len = strlen (s);\n  t = (*__gmp_allocate_func) (len+1);\n  memcpy (t, s, len+1);\n  return t;\n}\n\n\nchar *\nstrtoupper (char *s_orig)\n{\n  char  *s;\n  for (s = s_orig; *s != '\\0'; s++)\n    if (isascii (*s))\n      *s = toupper (*s);\n  return s_orig;\n}\n\n\nvoid\nmpz_set_n (mpz_ptr z, mp_srcptr p, mp_size_t size)\n{\n  ASSERT (size >= 0);\n  MPN_NORMALIZE (p, size);\n  MPZ_REALLOC (z, size);\n  MPN_COPY (PTR(z), p, size);\n  SIZ(z) = size;\n}\n\nvoid\nmpz_init_set_n (mpz_ptr z, mp_srcptr p, mp_size_t size)\n{\n  ASSERT (size >= 0);\n\n  MPN_NORMALIZE (p, size);\n  ALLOC(z) = MAX (size, 1);\n  PTR(z) = __GMP_ALLOCATE_FUNC_LIMBS (ALLOC(z));\n  SIZ(z) = size;\n  MPN_COPY (PTR(z), p, size);\n}\n\n\n/* Find least significant limb position where p1,size and p2,size differ.  */\nmp_size_t\nmpn_diff_lowest (mp_srcptr p1, mp_srcptr p2, mp_size_t size)\n{\n  mp_size_t  i;\n\n  for (i = 0; i < size; i++)\n    if (p1[i] != p2[i])\n      return i;\n\n  /* no differences */\n  return -1;\n}\n\n\n/* Find most significant limb position where p1,size and p2,size differ.  */\nmp_size_t\nmpn_diff_highest (mp_srcptr p1, mp_srcptr p2, mp_size_t size)\n{\n  mp_size_t  i;\n\n  for (i = size-1; i >= 0; i--)\n    if (p1[i] != p2[i])\n      return i;\n\n  /* no differences */\n  return -1;\n}\n\n\n/* Find least significant byte position where p1,size and p2,size differ.  */\nmp_size_t\nbyte_diff_lowest (const void *p1, const void *p2, mp_size_t size)\n{\n  mp_size_t  i;\n\n  for (i = 0; i < size; i++)\n    if (((const char *) p1)[i] != ((const char *) p2)[i])\n      return i;\n\n  /* no differences */\n  return -1;\n}\n\n\n/* Find most significant limb position where p1,size and p2,size differ.  */\nmp_size_t\nbyte_diff_highest (const void *p1, const void *p2, mp_size_t size)\n{\n  mp_size_t  i;\n\n  for (i = size-1; i >= 0; i--)\n    if (((const char *) p1)[i] != ((const char *) p2)[i])\n      return i;\n\n  /* no differences */\n  return -1;\n}\n\n\nvoid\nmpz_set_str_or_abort (mpz_ptr z, const char *str, int base)\n{\n  if (mpz_set_str (z, str, base) != 0)\n    {\n      fprintf (stderr, \"ERROR: mpz_set_str failed\\n\");\n      fprintf (stderr, \"   str  = \\\"%s\\\"\\n\", str);\n      fprintf (stderr, \"   base = %d\\n\", base);\n      abort();\n    }\n}\n\nvoid\nmpq_set_str_or_abort (mpq_ptr q, const char *str, int base)\n{\n  if (mpq_set_str (q, str, base) != 0)\n    {\n      fprintf (stderr, \"ERROR: mpq_set_str failed\\n\");\n      fprintf (stderr, \"   str  = \\\"%s\\\"\\n\", str);\n      fprintf (stderr, \"   base = %d\\n\", base);\n      abort();\n    }\n}\n\nvoid\nmpf_set_str_or_abort (mpf_ptr f, const char *str, int base)\n{\n  if (mpf_set_str (f, str, base) != 0)\n    {\n      fprintf (stderr, \"ERROR mpf_set_str failed\\n\");\n      fprintf (stderr, \"   str  = \\\"%s\\\"\\n\", str);\n      fprintf (stderr, \"   base = %d\\n\", base);\n      abort();\n    }\n}\n\n\n/* Whether the absolute value of z is a power of 2. */\nint\nmpz_pow2abs_p (mpz_srcptr z)\n{\n  mp_size_t  size, i;\n  mp_srcptr  ptr;\n\n  size = SIZ (z);\n  if (size == 0)\n    return 0;  /* zero is not a power of 2 */\n  size = ABS (size);\n\n  ptr = PTR (z);\n  for (i = 0; i < size-1; i++)\n    if (ptr[i] != 0)\n      return 0;  /* non-zero low limb means not a power of 2 */\n\n  return POW2_P (ptr[i]);  /* high limb power of 2 */\n}\n\n\n/* Exponentially distributed between 0 and 2^nbits-1, meaning the number of\n   bits in the result is uniformly distributed between 0 and nbits-1.\n\n   FIXME: This is not a proper exponential distribution, since the\n   probability function will have a stepped shape due to using a uniform\n   distribution after choosing how many bits.  */\n\nvoid\nmpz_erandomb (mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits)\n{\n  mpz_urandomb (rop, rstate, gmp_urandomm_ui (rstate, nbits));\n}\n\nvoid\nmpz_erandomb_nonzero (mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits)\n{\n  mpz_erandomb (rop, rstate, nbits);\n  if (mpz_sgn (rop) == 0)\n    mpz_set_ui (rop, 1L);\n}\n\nvoid\nmpz_errandomb (mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits)\n{\n  mpz_rrandomb (rop, rstate, gmp_urandomm_ui (rstate, nbits));\n}\n\nvoid\nmpz_errandomb_nonzero (mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits)\n{\n  mpz_errandomb (rop, rstate, nbits);\n  if (mpz_sgn (rop) == 0)\n    mpz_set_ui (rop, 1L);\n}\n\nvoid\nmpz_negrandom (mpz_ptr rop, gmp_randstate_t rstate)\n{\n  mp_limb_t  n;\n  _gmp_rand (&n, rstate, 1);\n  if (n != 0)\n    mpz_neg (rop, rop);\n}\n\n\nmp_limb_t\nurandom (gmp_randstate_t rands)\n{\n#if GMP_NAIL_BITS == 0\n  mp_limb_t  n;\n  _gmp_rand (&n, rands, BITS_PER_MP_LIMB);\n  return n;\n#else\n  mp_limb_t n[2];\n  _gmp_rand (n, rands, BITS_PER_MP_LIMB);\n  return n[0] + (n[1] << GMP_NUMB_BITS);\n#endif\n}\n\n\n/* Call (*func)() with various random number generators. */\nvoid\ncall_rand_algs (void (*func)(const char *, gmp_randstate_ptr))\n{\n  gmp_randstate_t  rstate;\n  mpz_t            a;\n\n  mpz_init (a);\n\n  gmp_randinit_default (rstate);\n  (*func) (\"gmp_randinit_default\", rstate);\n  gmp_randclear (rstate);\n\n  gmp_randinit_mt (rstate);\n  (*func) (\"gmp_randinit_mt\", rstate);\n  gmp_randclear (rstate);\n\n  gmp_randinit_lc_2exp_size (rstate, 8L);\n  (*func) (\"gmp_randinit_lc_2exp_size 8\", rstate);\n  gmp_randclear (rstate);\n\n  gmp_randinit_lc_2exp_size (rstate, 16L);\n  (*func) (\"gmp_randinit_lc_2exp_size 16\", rstate);\n  gmp_randclear (rstate);\n\n  gmp_randinit_lc_2exp_size (rstate, 128L);\n  (*func) (\"gmp_randinit_lc_2exp_size 128\", rstate);\n  gmp_randclear (rstate);\n\n  /* degenerate always zeros */\n  mpz_set_ui (a, 0L);\n  gmp_randinit_lc_2exp (rstate, a, 0L, 8L);\n  (*func) (\"gmp_randinit_lc_2exp a=0 c=0 m=8\", rstate);\n  gmp_randclear (rstate);\n\n  /* degenerate always FFs */\n  mpz_set_ui (a, 0L);\n  gmp_randinit_lc_2exp (rstate, a, 0xFFL, 8L);\n  (*func) (\"gmp_randinit_lc_2exp a=0 c=0xFF m=8\", rstate);\n  gmp_randclear (rstate);\n\n  mpz_clear (a);\n}\n\n\n/* Return +infinity if available, or 0 if not.\n   We don't want to use libm, so INFINITY or other system values are not\n   used here.  */\ndouble\ntests_infinity_d (void)\n{\n  union ieee_double_extract x;\n  x.s.exp = 2047;\n  x.s.manl = 0;\n  x.s.manh = 0;\n  x.s.sig = 0;\n  return x.d;\n}\n\n\n/* Return non-zero if d is an infinity (either positive or negative).\n   Don't want libm, so don't use isinf() or other system tests.  */\nint\ntests_isinf (double d)\n{\n  union ieee_double_extract x;\n  x.d = d;\n  return (x.s.exp == 2047 && x.s.manl == 0 && x.s.manh == 0);\n}\n\n\n/* Set the hardware floating point rounding mode.  Same mode values as mpfr,\n   namely 0=nearest, 1=tozero, 2=up, 3=down.  Return 1 if successful, 0 if\n   not.  */\nint\ntests_hardware_setround (int mode)\n{\n#if defined( _MSC_VER )\n    unsigned int cw, rc;\n    switch (mode) {\n    case 0: rc = RC_NEAR; break;  /* nearest */\n    case 1: rc = RC_CHOP; break;  /* tozero  */\n    case 2: rc = RC_UP; break;    /* up      */\n    case 3: rc = RC_DOWN; break;  /* down    */\n    default:\n        return 0;\n    }\n    _controlfp_s(&cw, rc, _MCW_RC);\n    return 1;\n#elif defined( HAVE_FENV_H )\n  int  rc;\n  switch (mode) {\n  case 0: rc = FE_TONEAREST; break;  /* nearest */\n  case 1: rc = FE_TOWARDZERO; break;  /* tozero  */\n  case 2: rc = FE_UPWARD; break;  /* up      */\n  case 3: rc = FE_DOWNWARD; break;  /* down    */\n  default:\n    return 0;\n  }\n  int cwi=fegetround();\n  if(cwi<0)return 0;\n  cwi=fesetround ((cwi & ~(FE_TOWARDZERO | FE_DOWNWARD | FE_UPWARD | FE_TONEAREST)) | rc );\n  if(cwi==0)return 1;\n  return 0;\n#endif\n}\n\n/* Return the hardware floating point rounding mode, or -1 if unknown. */\nint\ntests_hardware_getround (void)\n{\n#if defined( _MSC_VER )\n    unsigned int cw;\n    _controlfp_s(&cw, 0, 0);\n    switch (cw & (_RC_CHOP | _RC_UP | _RC_DOWN | _RC_NEAR)) {\n    case _RC_NEAR: return 0; break;  /* nearest */\n    case _RC_DOWN: return 3; break;  /* down    */\n    case _RC_UP:   return 2; break;  /* up      */\n    case _RC_CHOP: return 1; break;  /* tozero  */\n    }\n#elif defined( HAVE_FENV_H )\n  unsigned int cw;\n  int cwi;\n  cwi = fegetround();\n  if(cwi<0)return -1;\n  cw=cwi;\n  switch (cw & (FE_TOWARDZERO | FE_DOWNWARD | FE_UPWARD | FE_TONEAREST)) {\n  case FE_TONEAREST: return 0; break;  /* nearest */\n  case FE_DOWNWARD: return 3; break;  /* down    */\n  case FE_UPWARD: return 2; break;  /* up      */\n  case FE_TOWARDZERO: return 1; break;  /* tozero  */\n  }\n#endif\n  return -1;\n}\n\n/* tests_dbl_mant_bits() determines by experiment the number of bits in the\n   mantissa of a \"double\".  If it's not possible to find a value (perhaps\n   due to the compiler optimizing too aggressively), then return 0.\n\n   This code is used rather than DBL_MANT_DIG from <float.h> since ancient\n   systems like SunOS don't have that file, and since one GNU/Linux ARM\n   system was seen where the float emulation seemed to have only 32 working\n   bits, not the 53 float.h claimed.  */\n\nint\ntests_dbl_mant_bits (void)\n{\n  static int n = -1;\n  volatile double x, y, d;\n\n  if (n != -1)\n    return n;\n\n  n = 1;\n  x = 2.0;\n  for (;;)\n    {\n      /* see if 2^(n+1)+1 can be formed without rounding, if so then\n         continue, if not then \"n\" is the answer */\n      y = x + 1.0;\n      d = y - x;\n      if (d != 1.0)\n        {\n#if defined (DBL_MANT_DIG) && DBL_RADIX == 2\n          if (n != DBL_MANT_DIG)\n            printf (\"Warning, tests_dbl_mant_bits got %d but DBL_MANT_DIG says %d\\n\", n, DBL_MANT_DIG);\n#endif\n          break;\n        }\n\n      x *= 2;\n      n++;\n\n      if (n > 1000)\n        {\n          printf (\"Oops, tests_dbl_mant_bits can't determine mantissa size\\n\");\n          n = 0;\n          break;\n        }\n    }\n  return n;\n}\n\n\n/* See tests_setjmp_sigfpe in tests.h. */\n\njmp_buf    tests_sigfpe_target;\n\nvoid\ntests_sigfpe_handler (int sig)\n{\n  longjmp (tests_sigfpe_target, 1);\n}\n\nvoid\ntests_sigfpe_done (void)\n{\n  signal (SIGFPE, SIG_DFL);\n}\n"
  },
  {
    "path": "tests/mpf/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 1996, 1999, 2000, 2001, 2002, 2003, 2004 Free Software\n# Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/tests\nLDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmpir.la\n\ncheck_PROGRAMS = reuse t-add t-cmp_d t-cmp_si t-conv t-div t-dm2exp t-eq t-fits t-get_d t-get_d_2exp t-get_si t-get_ui t-gsprec t-inp_str t-int_p t-mul_ui t-muldiv t-set t-set_q t-set_si t-set_ui t-sqrt t-sqrt_ui t-sub t-trunc t-ui_div \n\nTESTS = $(check_PROGRAMS)\n\n$(top_builddir)/tests/libtests.la:\n\tcd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la\n"
  },
  {
    "path": "tests/mpf/reuse.c",
    "content": "/* Test that routines allow reusing a source variable as destination.\n\nCopyright 1996, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#if __GMP_LIBGMP_DLL\n\n/* FIXME: When linking to a DLL libmpir, mpf_add etc can't be used as\n   initializers for global variables because they're effectively global\n   variables (function pointers) themselves.  Perhaps calling a test\n   function successively with mpf_add etc would be better.  */\n\nint\nmain (void)\n{\n  printf (\"Test suppressed for windows DLL\\n\");\n  exit (0);\n}\n\n\n#else /* ! DLL_EXPORT */\n\n#ifndef SIZE\n#define SIZE 16\n#endif\n\n#ifndef EXPO\n#define EXPO 32\n#endif\n\nvoid dump_abort(char *name, mpf_t res1, mpf_t res2);\n\ntypedef void (*dss_func)(mpf_ptr, mpf_srcptr, mpf_srcptr);\n\ndss_func dss_funcs[] =\n{\n  mpf_div, mpf_add, mpf_mul, mpf_sub,\n};\n\nchar *dss_func_names[] =\n{\n  \"mpf_div\", \"mpf_add\", \"mpf_mul\", \"mpf_sub\",\n};\n\ntypedef void (*dsi_func)(mpf_ptr, mpf_srcptr, mpir_ui);\n\ndsi_func dsi_funcs[] =\n{\n  mpf_div_ui, mpf_add_ui, mpf_mul_ui, mpf_sub_ui,\n  mpf_mul_2exp, mpf_div_2exp\n};\n\nchar *dsi_func_names[] =\n{\n  \"mpf_div_ui\", \"mpf_add_ui\", \"mpf_mul_ui\", \"mpf_sub_ui\",\n  \"mpf_mul_2exp\", \"mpf_div_2exp\"\n};\n\ntypedef void (*dis_func)(mpf_ptr, mpir_ui, mpf_srcptr);\n\ndis_func dis_funcs[] =\n{\n  mpf_ui_div, mpf_ui_sub,\n};\n\nchar *dis_func_names[] =\n{\n  \"mpf_ui_div\", \"mpf_ui_sub\",\n};\n\nint\nmain (int argc, char **argv)\n{\n  int i;\n  int pass, reps = 10000;\n  mpf_t in1, in2, out1;\n  unsigned long int in1i, in2i;\n  mpf_t res1, res2, res3;\n  mp_size_t bprec = 100;\n  gmp_randstate_t rands;\n  \n  tests_start ();\n  gmp_randinit_default(rands);\n  if (argc > 1)\n    {\n      reps = strtol (argv[1], 0, 0);\n      if (argc > 2)\n\tbprec = strtol (argv[2], 0, 0);\n    }\n\n  mpf_set_default_prec (bprec);\n\n  mpf_init (in1);\n  mpf_init (in2);\n  mpf_init (out1);\n  mpf_init (res1);\n  mpf_init (res2);\n  mpf_init (res3);\n\n  for (pass = 1; pass <= reps; pass++)\n    {\n      mpf_rrandomb (in1, rands, urandom (rands) % SIZE - SIZE/2, urandom (rands) % EXPO);\n      mpf_rrandomb (in2, rands, urandom (rands) % SIZE - SIZE/2, urandom (rands) % EXPO);\n\n      for (i = 0; i < sizeof (dss_funcs) / sizeof (dss_func); i++)\n\t{\n\t  /* Don't divide by 0.  */\n\t  if (i == 0 && mpf_cmp_ui (in2, 0) == 0)\n\t    continue;\n\n\t  (dss_funcs[i]) (res1, in1, in2);\n\n\t  mpf_set (out1, in1);\n\t  (dss_funcs[i]) (out1, out1, in2);\n\t  mpf_set (res2, out1);\n\n\t  mpf_set (out1, in2);\n\t  (dss_funcs[i]) (out1, in1, out1);\n\t  mpf_set (res3, out1);\n\n\t  if (mpf_cmp (res1, res2) != 0)\n\t    dump_abort (dss_func_names[i], res1, res2);\n\t  if (mpf_cmp (res1, res3) != 0)\n\t    dump_abort (dss_func_names[i], res1, res3);\n\t}\n\n      in2i = urandom (rands);\n      for (i = 0; i < sizeof (dsi_funcs) / sizeof (dsi_func); i++)\n\t{\n\t  /* Don't divide by 0.  */\n\t  if (strcmp (dsi_func_names[i], \"mpf_div_ui\") == 0 && in2i == 0)\n\t    continue;\n\n\t  (dsi_funcs[i]) (res1, in1, in2i);\n\n\t  mpf_set (out1, in1);\n\t  (dsi_funcs[i]) (out1, out1, in2i);\n\t  mpf_set (res2, out1);\n\n\t  if (mpf_cmp (res1, res2) != 0)\n\t    dump_abort (dsi_func_names[i], res1, res2);\n\t}\n\n      in1i = urandom (rands);\n      for (i = 0; i < sizeof (dis_funcs) / sizeof (dis_func); i++)\n\t{\n\t  /* Don't divide by 0.  */\n\t  if (strcmp (dis_func_names[i], \"mpf_ui_div\") == 0\n\t      && mpf_cmp_ui (in2, 0) == 0)\n\t    continue;\n\n\t  (dis_funcs[i]) (res1, in1i, in2);\n\n\t  mpf_set (out1, in2);\n\t  (dis_funcs[i]) (out1, in1i, out1);\n\t  mpf_set (res2, out1);\n\n\t  if (mpf_cmp (res1, res2) != 0)\n\t    dump_abort (dis_func_names[i], res1, res2);\n\t}\n\n    }\n\n  mpf_clear (in1);\n  mpf_clear (in2);\n  mpf_clear (out1);\n  mpf_clear (res1);\n  mpf_clear (res2);\n  mpf_clear (res3);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndump_abort (char *name, mpf_t res1, mpf_t res2)\n{\n  printf (\"failure in %s:\\n\", name);\n  mpf_dump (res1);\n  mpf_dump (res2);\n  abort ();\n}\n\n#if 0\nvoid mpf_abs\t\t(mpf_ptr, mpf_srcptr);\nvoid mpf_sqrt\t\t(mpf_ptr, mpf_srcptr);\nvoid mpf_neg\t\t(mpf_ptr, mpf_srcptr);\n#endif\n\n#endif /* ! DLL_EXPORT */\n"
  },
  {
    "path": "tests/mpf/t-add.c",
    "content": "/* Test mpf_add.\n\nCopyright 1996, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#ifndef SIZE\n#define SIZE 16\n#endif\n\nint\nmain (int argc, char **argv)\n{\n  mp_size_t size;\n  mp_exp_t exp;\n  int reps = 20000;\n  int i;\n  mpf_t u, v, w, wref;\n  mp_size_t bprec = 100;\n  mpf_t rerr, max_rerr, limit_rerr;\n  gmp_randstate_t rands;\n  \n  tests_start ();\n  gmp_randinit_default(rands);\n  if (argc > 1)\n    {\n      reps = strtol (argv[1], 0, 0);\n      if (argc > 2)\n\tbprec = strtol (argv[2], 0, 0);\n    }\n\n  mpf_set_default_prec (bprec);\n\n  mpf_init_set_ui (limit_rerr, 1);\n  mpf_div_2exp (limit_rerr, limit_rerr, bprec);\n#if VERBOSE\n  mpf_dump (limit_rerr);\n#endif\n  mpf_init (rerr);\n  mpf_init_set_ui (max_rerr, 0);\n\n  mpf_init (u);\n  mpf_init (v);\n  mpf_init (w);\n  mpf_init (wref);\n  for (i = 0; i < reps; i++)\n    {\n      size = urandom (rands) % (2 * SIZE) - SIZE;\n      exp = urandom (rands) % SIZE;\n      mpf_rrandomb (u, rands, size, exp);\n\n      size = urandom (rands) % (2 * SIZE) - SIZE;\n      exp = urandom (rands) % SIZE;\n      mpf_rrandomb (v, rands, size, exp);\n\n      mpf_add (w, u, v);\n      refmpf_add (wref, u, v);\n\n      mpf_reldiff (rerr, w, wref);\n      if (mpf_cmp (rerr, max_rerr) > 0)\n\t{\n\t  mpf_set (max_rerr, rerr);\n#if VERBOSE\n\t  mpf_dump (max_rerr);\n#endif\n\t  if (mpf_cmp (rerr, limit_rerr) > 0)\n\t    {\n\t      printf (\"ERROR after %d tests\\n\", i);\n\t      printf (\"   u = \"); mpf_dump (u);\n\t      printf (\"   v = \"); mpf_dump (v);\n\t      printf (\"wref = \"); mpf_dump (wref);\n\t      printf (\"   w = \"); mpf_dump (w);\n\t      abort ();\n\t    }\n\t}\n    }\n\n  mpf_clear (limit_rerr);\n  mpf_clear (rerr);\n  mpf_clear (max_rerr);\n\n  mpf_clear (u);\n  mpf_clear (v);\n  mpf_clear (w);\n  mpf_clear (wref);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-cmp_d.c",
    "content": "/* Test mpf_cmp_d.\n\nCopyright 2001, 2003, 2003, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n#define SGN(n)  ((n) > 0 ? 1 : (n) < 0 ? -1 : 0)\n\nvoid\ncheck_one (const char *name, mpf_srcptr x, double y, int cmp)\n{\n  int   got;\n\n  got = mpf_cmp_d (x, y);\n  if (SGN(got) != cmp)\n    {\n      int i;\n      printf    (\"mpf_cmp_d wrong (from %s)\\n\", name);\n      printf    (\"  got  %d\\n\", got);\n      printf    (\"  want %d\\n\", cmp);\n      mpf_trace (\"  x\", x);\n      printf    (\"  y %g\\n\", y);\n      mp_trace_base=-16;\n      mpf_trace (\"  x\", x);\n      printf    (\"  y %g\\n\", y);\n      printf    (\"  y\");\n      for (i = 0; i < sizeof(y); i++)\n        printf (\" %02X\", (unsigned) ((unsigned char *) &y)[i]);\n      printf (\"\\n\");\n      abort ();\n    }\n}\n\nvoid\ncheck_infinity (void)\n{\n  mpf_t   x;\n  double  y = tests_infinity_d ();\n  if (y == 0.0)\n    return;\n\n  mpf_init (x);\n\n  /* 0 cmp inf */\n  mpf_set_ui (x, 0L);\n  check_one (\"check_infinity\", x,  y, -1);\n  check_one (\"check_infinity\", x, -y,  1);\n\n  /* 123 cmp inf */\n  mpf_set_ui (x, 123L);\n  check_one (\"check_infinity\", x,  y, -1);\n  check_one (\"check_infinity\", x, -y,  1);\n\n  /* -123 cmp inf */\n  mpf_set_si (x, -123L);\n  check_one (\"check_infinity\", x,  y, -1);\n  check_one (\"check_infinity\", x, -y,  1);\n\n  /* 2^5000 cmp inf */\n  mpf_set_ui (x, 1L);\n  mpf_mul_2exp (x, x, 5000L);\n  check_one (\"check_infinity\", x,  y, -1);\n  check_one (\"check_infinity\", x, -y,  1);\n\n  /* -2^5000 cmp inf */\n  mpf_neg (x, x);\n  check_one (\"check_infinity\", x,  y, -1);\n  check_one (\"check_infinity\", x, -y,  1);\n\n  mpf_clear (x);\n}\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n\n  check_infinity ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-cmp_si.c",
    "content": "/* Test mpf_cmp_si.\n\nCopyright 2000, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define SGN(x)       ((x) < 0 ? -1 : (x) == 0 ? 0 : 1)\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    int         a_base;\n    const char  *a;\n    const char  *b;\n    int         want;\n  } data[] = {\n    { 10, \"0\",  \"1\", -1 },\n    { 10, \"0\",  \"0\",  0 },\n    { 10, \"0\", \"-1\",  1 },\n\n    { 10, \"1\",  \"1\", 0 },\n    { 10, \"1\",  \"0\", 1 },\n    { 10, \"1\", \"-1\", 1 },\n\n    { 10, \"-1\",  \"1\", -1 },\n    { 10, \"-1\",  \"0\", -1 },\n    { 10, \"-1\", \"-1\", 0 },\n\n    { 10, \"1.5\", \"2\", -1 },\n    { 10, \"1.5\", \"1\",  1 },\n    { 10, \"0.5\", \"1\", -1 },\n\n    { 10, \"-1.5\", \"-2\",  1 },\n    { 10, \"-1.5\", \"-1\", -1 },\n    { 10, \"-0.5\", \"-1\",  1 },\n\n    { 16,         \"0\", \"-0x80000000\",  1 },\n    { 16,  \"80000000\", \"-0x80000000\",  1 },\n    { 16,  \"80000001\", \"-0x80000000\",  1 },\n    { 16, \"-80000000\", \"-0x80000000\",  0 },\n    { 16, \"-80000001\", \"-0x80000000\", -1 },\n    { 16, \"-FF0080000001\", \"-0x80000000\", -1 },\n\n    { 16,                 \"0\", \"-0x8000000000000000\",  1 },\n    { 16,  \"8000000000000000\", \"-0x8000000000000000\",  1 },\n    { 16,  \"8000000000000001\", \"-0x8000000000000000\",  1 },\n    { 16, \"-8000000000000000\", \"-0x8000000000000000\",  0 },\n    { 16, \"-8000000000000000.1\", \"-0x8000000000000000\", -1 },\n    { 16, \"-FF008000000000000001\", \"-0x8000000000000000\", -1 },\n\n    { 16,                 \"0\", \"-0x876543210FEDCBA9876543210000000\",  1 },\n    { 16,  \"876543210FEDCBA9876543210000000\", \"-0x876543210FEDCBA9876543210000000\",  1 },\n    { 16,  \"876543210FEDCBA9876543210000001\", \"-0x876543210FEDCBA9876543210000000\",  1 },\n    { 16, \"-876543210FEDCBA9876543210000000\", \"-0x876543210FEDCBA9876543210000000\",  0 },\n    { 16, \"-876543210FEDCBA9876543210000000.1\", \"-0x876543210FEDCBA9876543210000000\", -1 },\n    { 16, \"-FF00876543210FEDCBA9876543210000000\", \"-0x876543210FEDCBA9876543210000000\", -1 },\n  };\n\n  mpf_t  a;\n  mpz_t  bz;\n  long   b;\n  int    got;\n  int    i;\n\n  mpf_init2 (a, 128);\n  mpz_init (bz);\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpf_set_str_or_abort (a, data[i].a, data[i].a_base);\n      mpz_set_str_or_abort (bz, data[i].b, 0);\n\n      if (mpz_fits_slong_p (bz))\n        {\n          b = mpz_get_si (bz);\n          got = mpf_cmp_si (a, b);\n          if (SGN (got) != data[i].want)\n            {\n              printf (\"mpf_cmp_si wrong on data[%d]\\n\", i);\n              printf (\"  a=\"); mpf_out_str (stdout, 10, 0, a);\n              printf (\" (%s)\\n\", data[i].a);\n              printf (\"  b=%ld (%s)\\n\", b, data[i].b);\n              printf (\"  got=%d\\n\", got);\n              printf (\"  want=%d\\n\", data[i].want);\n              abort();\n            }\n        }\n\n      got = mpf_cmp_z (a, bz);\n      if (SGN (got) != data[i].want)\n      \t{\n\t  b = mpz_get_si (bz);\n      \t  printf (\"mpf_cmp_z wrong on data[%d]\\n\", i);\n      \t  printf (\"  a=\"); mpf_out_str (stdout, 10, 0, a);\n      \t  printf (\" (%s)\\n\", data[i].a);\n      \t  printf (\"  b=%ld (%s)\\n\", b, data[i].b);\n      \t  printf (\"  got=%d\\n\", got);\n      \t  printf (\"  want=%d\\n\", data[i].want);\n      \t  abort();\n      \t}\n    }\n\n  mpf_clear (a);\n  mpz_clear (bz);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-conv.c",
    "content": "/* Test mpf_get_str and mpf_set_str.\n\nCopyright 1996, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h> /* for strlen */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#ifndef SIZE\n#define SIZE 10\n#endif\n\n#ifndef EXPO\n#define EXPO 200\n#endif\n\nint\nmain (int argc, char **argv)\n{\n  mpf_t x, y;\n  int reps = 20000;\n  int i;\n  mp_size_t bprec = 100;\n  mpf_t d, rerr, max_rerr, limit_rerr;\n  char *str;\n  mp_exp_t bexp;\n  long size, exp;\n  int base;\n  char buf[SIZE * BITS_PER_MP_LIMB + 5];\n  gmp_randstate_t rands;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n  \n  if (argc > 1)\n    {\n      reps = strtol (argv[1], 0, 0);\n      if (argc > 2)\n\tbprec = strtol (argv[2], 0, 0);\n    }\n\n  mpf_set_default_prec (bprec);\n\n  mpf_init_set_ui (limit_rerr, 1);\n  mpf_div_2exp (limit_rerr, limit_rerr, bprec);\n#if VERBOSE\n  mpf_dump (limit_rerr);\n#endif\n  mpf_init (rerr);\n  mpf_init_set_ui (max_rerr, 0);\n\n  mpf_init (x);\n  mpf_init (y);\n  mpf_init (d);\n\n  for (i = 0; i < reps; i++)\n    {\n      if (i == 0)\n        {\n          /* exercise the special case in get_str for for x==0 */\n          mpf_set_ui (x, 0L);\n          base = 10;\n        }\n      else\n        {\n          size = urandom (rands) % (2 * SIZE) - SIZE;\n          exp = urandom (rands) % EXPO;\n          mpf_rrandomb (x, rands, size, exp);\n          base = urandom (rands) % 61 + 2;\n        }\n\n      str = mpf_get_str (0, &bexp, base, 0, x);\n\n      if (str[0] == '-')\n\tsprintf (buf, \"-0.%s@%ld\", str + 1, bexp);\n      else\n\tsprintf (buf, \"0.%s@%ld\", str, bexp);\n\n      mpf_set_str_or_abort (y, buf, -base);\n      (*__gmp_free_func) (str, strlen (str) + 1);\n\n      mpf_reldiff (rerr, x, y);\n      if (mpf_cmp (rerr, max_rerr) > 0)\n\t{\n\t  mpf_set (max_rerr, rerr);\n#if VERBOSE\n\t  mpf_dump (max_rerr);\n#endif\n\t  if (mpf_cmp (rerr, limit_rerr) > 0)\n\t    {\n\t      printf (\"ERROR after %d tests\\n\", i);\n\t      printf (\"base = %d\\n\", base);\n\t      printf (\"   x = \"); mpf_dump (x);\n\t      printf (\"   y = \"); mpf_dump (y);\n\t      abort ();\n\t    }\n\t}\n    }\n\n  mpf_clear (limit_rerr);\n  mpf_clear (rerr);\n  mpf_clear (max_rerr);\n\n  mpf_clear (x);\n  mpf_clear (y);\n  mpf_clear (d);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-div.c",
    "content": "/* Test mpf_div.\n\nCopyright 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (const char *desc, mpf_ptr got, mpf_srcptr u, mpf_srcptr v)\n{\n  if (! refmpf_validate_division (\"mpf_div\", got, u, v))\n    {\n      mp_trace_base = -16;\n      mpf_trace (\"  u\", u);\n      mpf_trace (\"  v\", v);\n      printf    (\"  %s\\n\", desc);\n      abort ();\n    }\n}\n\nvoid\ncheck_rand (gmp_randstate_t rands)\n{\n  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);\n  unsigned long  prec;\n  mpf_t  got, u, v;\n  int    i;\n\n  mpf_init (got);\n  mpf_init (u);\n  mpf_init (v);\n  //gmp_randinit_default(rands);\n\n  /* separate */\n  for (i = 0; i < 100; i++)\n    {\n      /* got precision */\n      prec = min_prec + gmp_urandomm_ui (rands, 15L);\n      refmpf_set_prec_limbs (got, prec);\n\n      /* u */\n      prec = min_prec + gmp_urandomm_ui (rands, 15L);\n      refmpf_set_prec_limbs (u, prec);\n      do {\n        mpf_rrandomb (u, rands, PREC(u), (mp_exp_t) 20);\n      } while (SIZ(u) == 0);\n      if (gmp_urandomb_ui (rands, 1L))\n        mpf_neg (u, u);\n\n      /* v */\n      prec = min_prec + gmp_urandomm_ui (rands, 15L);\n      refmpf_set_prec_limbs (v, prec);\n      do {\n        mpf_rrandomb (v, rands, PREC(v), (mp_exp_t) 20);\n      } while (SIZ(v) == 0);\n      if (gmp_urandomb_ui (rands, 1L))\n        mpf_neg (v, v);\n\n      switch (i % 3) {\n      case 0:\n        mpf_div (got, u, v);\n        check_one (\"separate\", got, u, v);\n        break;\n      case 1:\n        prec = refmpf_set_overlap (got, u);\n        mpf_div (got, got, v);\n        check_one (\"dst == u\", got, u, v);\n        mpf_set_prec_raw (got, prec);\n        break;\n      case 2:\n        prec = refmpf_set_overlap (got, v);\n        mpf_div (got, u, got);\n        check_one (\"dst == v\", got, u, v);\n        mpf_set_prec_raw (got, prec);\n        break;\n      }\n    }\n\n  mpf_clear (got);\n  mpf_clear (u);\n  mpf_clear (v);\n  //gmp_randclear(rands);\n}\n\n/* Exercise calls mpf(x,x,x) */\nvoid\ncheck_reuse_three (gmp_randstate_t rands)\n{\n  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);\n  unsigned long  result_prec, input_prec, set_prec;\n  mpf_t  got;\n  int    i;\n\n  mpf_init (got);\n\n  for (i = 0; i < 8; i++)\n    {\n      result_prec = min_prec + gmp_urandomm_ui (rands, 15L);\n      input_prec = min_prec + gmp_urandomm_ui (rands, 15L);\n\n      set_prec = MAX (result_prec, input_prec);\n      refmpf_set_prec_limbs (got, set_prec);\n\n      /* input, non-zero, possibly negative */\n      PREC(got) = input_prec;\n      do {\n        mpf_rrandomb (got, rands, input_prec, (mp_exp_t) 20);\n      } while (SIZ(got) == 0);\n      if (gmp_urandomb_ui (rands, 1L))\n        mpf_neg (got, got);\n\n      PREC(got) = result_prec;\n\n      mpf_div (got, got, got);\n\n      /* expect exactly 1.0 always */\n      ASSERT_ALWAYS (mpf_cmp_ui (got, 1L) == 0);\n\n      PREC(got) = set_prec;\n    }\n\n  mpf_clear (got);\n}\n\nvoid\ncheck_various (void)\n{\n  mpf_t got, u, v;\n\n  mpf_init (got);\n  mpf_init (u);\n  mpf_init (v);\n\n  /* 100/4 == 25 */\n  mpf_set_prec (got, 20L);\n  mpf_set_ui (u, 100L);\n  mpf_set_ui (v, 4L);\n  mpf_div (got, u, v);\n  MPF_CHECK_FORMAT (got);\n  ASSERT_ALWAYS (mpf_cmp_ui (got, 25L) == 0);\n\n  /* 1/(2^n+1), a case where truncating the divisor would be wrong */\n  mpf_set_prec (got, 500L);\n  mpf_set_prec (v, 900L);\n  mpf_set_ui (v, 1L);\n  mpf_mul_2exp (v, v, 800L);\n  mpf_add_ui (v, v, 1L);\n  mpf_div (got, u, v);\n  check_one (\"1/2^n+1, separate\", got, u, v);\n\n  mpf_clear (got);\n  mpf_clear (u);\n  mpf_clear (v);\n}\n\nint\nmain (void)\n{gmp_randstate_t rands;\n  tests_start ();\n  gmp_randinit_default(rands);\n  check_various ();\n  check_rand (rands);\n  check_reuse_three (rands);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-dm2exp.c",
    "content": "/* Test mpf_div, mpf_div_2exp, mpf_mul_2exp.\n\nCopyright 1996, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#ifndef SIZE\n#define SIZE 16\n#endif\n\nint\nmain (int argc, char **argv)\n{\n  int reps = 100000;\n  int i;\n  mpf_t u, v, w1, w2, w3;\n  mp_size_t bprec = 100;\n  mpf_t rerr, limit_rerr;\n  mp_size_t un;\n  mp_exp_t ue;\n  gmp_randstate_t rands;\n  \n  tests_start ();\n  gmp_randinit_default(rands);\n  \n  if (argc > 1)\n    {\n      reps = strtol (argv[1], 0, 0);\n      if (argc > 2)\n\tbprec = strtol (argv[2], 0, 0);\n    }\n\n  mpf_set_default_prec (bprec);\n\n  mpf_init (rerr);\n  mpf_init (limit_rerr);\n\n  mpf_init (u);\n  mpf_init (v);\n  mpf_init (w1);\n  mpf_init (w2);\n  mpf_init (w3);\n\n  for (i = 0; i < reps; i++)\n    {\n      unsigned long int res_prec;\n      unsigned long int pow2;\n\n      res_prec = urandom (rands) % (bprec + 100);\n      mpf_set_prec (w1, res_prec);\n      mpf_set_prec (w2, res_prec);\n      mpf_set_prec (w3, res_prec);\n\n      mpf_set_ui (limit_rerr, 1);\n      mpf_div_2exp (limit_rerr, limit_rerr, res_prec);\n\n      pow2 = urandom (rands) % 0x10000;\n      mpf_set_ui (v, 1);\n      mpf_mul_2exp (v, v, pow2);\n\n      un = urandom (rands) % (2 * SIZE) - SIZE;\n      ue = urandom (rands) % SIZE;\n      mpf_rrandomb (u, rands, un, ue);\n\n      mpf_div_2exp (w1, u, pow2);\n      mpf_div (w2, u, v);\n      mpf_reldiff (rerr, w1, w2);\n      if (mpf_cmp (rerr, limit_rerr) > 0)\n\t{\n\t  printf (\"ERROR in mpf_div or mpf_div_2exp after %d tests\\n\", i);\n\t  printf (\"   u = \"); mpf_dump (u);\n\t  printf (\"   v = \"); mpf_dump (v);\n\t  printf (\"  w1 = \"); mpf_dump (w1);\n\t  printf (\"  w2 = \"); mpf_dump (w2);\n\t  abort ();\n\t}\n      mpf_mul_2exp (w3, w1, pow2);\n      mpf_reldiff (rerr, u, w3);\n      if (mpf_cmp (rerr, limit_rerr) > 0)\n\t{\n\t  printf (\"ERROR in mpf_mul_2exp after %d tests\\n\", i);\n\t  printf (\"   u = \"); mpf_dump (u);\n\t  printf (\"   v = \"); mpf_dump (v);\n\t  printf (\"  w1 = \"); mpf_dump (w1);\n\t  printf (\"  w3 = \"); mpf_dump (w3);\n\t  abort ();\n\t}\n    }\n\n  mpf_clear (rerr);\n  mpf_clear (limit_rerr);\n\n  mpf_clear (u);\n  mpf_clear (v);\n  mpf_clear (w1);\n  mpf_clear (w2);\n  mpf_clear (w3);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-eq.c",
    "content": "/* Test mpf_eq\n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain (int argc, char *argv[])\n{mpf_t x,y;\n int i;\n \n  tests_start ();\nmpf_init(x);mpf_init(y);\n\nmpf_set_ui(x,88);mpf_set_ui(y,89);\nfor(i=0;i<=6;i++)if(!mpf_eq(x,y,i)){printf(\"mpf error\\n\");abort();}\nfor(i=7;i<123;i++)if(mpf_eq(x,y,i)){printf(\"mpf error\\n\");abort();}\n\nmpf_set_ui(x,256);mpf_set_ui(y,257);\nfor(i=0;i<=8;i++)if(!mpf_eq(x,y,i)){printf(\"mpf error\\n\");abort();}\nfor(i=9;i<123;i++)if(mpf_eq(x,y,i)){printf(\"mpf error\\n\");abort();}\n\n\n\nmpf_clear(x);\nmpf_clear(y);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-fits.c",
    "content": "/* Test mpf_fits_*_p\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* Nothing sophisticated here, just exercise mpf_fits_*_p on a small amount\n   of data. */\n\n#define EXPECT_S(fun,name,answer)                                        \\\n  got = fun (f);                                                         \\\n  if (got != answer)                                                     \\\n    {                                                                    \\\n      printf (\"%s (%s) got %d want %d\\n\", name, expr, got, answer);      \\\n      printf (\" f size %d exp %ld\\n\", SIZ(f), EXP(f));                   \\\n      printf (\" f dec \"); mpf_out_str (stdout, 10, 0, f); printf (\"\\n\"); \\\n      printf (\" f hex \"); mpf_out_str (stdout, 16, 0, f); printf (\"\\n\"); \\\n      error = 1;                                                         \\\n    }\n\n#if HAVE_STRINGIZE\n#define EXPECT(fun,answer)  EXPECT_S(fun,#fun,answer)\n#else\n#define EXPECT(fun,answer)  EXPECT_S(fun,\"fun\",answer)\n#endif\n\nint\nmain (void)\n{\n  mpf_t       f, f0p5;\n  int         got;\n  const char  *expr;\n  int         error = 0;\n\n  tests_start ();\n  mpf_init2 (f, 200L);\n  mpf_init2 (f0p5, 200L);\n\n  /* 0.5 */\n  mpf_set_ui (f0p5, 1L);\n  mpf_div_2exp (f0p5, f0p5, 1L);\n\n  mpf_set_ui (f, 0L);\n  expr = \"0\";\n  EXPECT (mpf_fits_ulong_p, 1);\n  EXPECT (mpf_fits_uint_p, 1);\n  EXPECT (mpf_fits_ushort_p, 1);\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n  EXPECT (mpf_fits_sshort_p, 1);\n\n  mpf_set_ui (f, 1L);\n  expr = \"1\";\n  EXPECT (mpf_fits_ulong_p, 1);\n  EXPECT (mpf_fits_uint_p, 1);\n  EXPECT (mpf_fits_ushort_p, 1);\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n  EXPECT (mpf_fits_sshort_p, 1);\n\n  mpf_set_si (f, -1L);\n  expr = \"-1\";\n  EXPECT (mpf_fits_ulong_p, 0);\n  EXPECT (mpf_fits_uint_p, 0);\n  EXPECT (mpf_fits_ushort_p, 0);\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n  EXPECT (mpf_fits_sshort_p, 1);\n\n\n  mpf_set_ui (f, (unsigned long) USHRT_MAX);\n  expr = \"USHRT_MAX\";\n  EXPECT (mpf_fits_ulong_p, 1);\n  EXPECT (mpf_fits_uint_p, 1);\n  EXPECT (mpf_fits_ushort_p, 1);\n\n  mpf_set_ui (f, (unsigned long) USHRT_MAX);\n  mpf_add (f, f, f0p5);\n  expr = \"USHRT_MAX + 0.5\";\n  EXPECT (mpf_fits_ulong_p, 1);\n  EXPECT (mpf_fits_uint_p, 1);\n  EXPECT (mpf_fits_ushort_p, 1);\n\n  mpf_set_ui (f, (unsigned long) USHRT_MAX);\n  mpf_add_ui (f, f, 1L);\n  expr = \"USHRT_MAX + 1\";\n  EXPECT (mpf_fits_ushort_p, 0);\n\n\n  mpf_set_ui (f, (unsigned long) UINT_MAX);\n  expr = \"UINT_MAX\";\n  EXPECT (mpf_fits_ulong_p, 1);\n  EXPECT (mpf_fits_uint_p, 1);\n\n  mpf_set_ui (f, (unsigned long) UINT_MAX);\n  mpf_add (f, f, f0p5);\n  expr = \"UINT_MAX + 0.5\";\n  EXPECT (mpf_fits_ulong_p, 1);\n  EXPECT (mpf_fits_uint_p, 1);\n\n  mpf_set_ui (f, (unsigned long) UINT_MAX);\n  mpf_add_ui (f, f, 1L);\n  expr = \"UINT_MAX + 1\";\n  EXPECT (mpf_fits_uint_p, 0);\n\n\n  mpf_set_ui (f, ULONG_MAX);\n  expr = \"ULONG_MAX\";\n  EXPECT (mpf_fits_ulong_p, 1);\n\n  mpf_set_ui (f, ULONG_MAX);\n  mpf_add (f, f, f0p5);\n  expr = \"ULONG_MAX + 0.5\";\n  EXPECT (mpf_fits_ulong_p, 1);\n\n  mpf_set_ui (f, ULONG_MAX);\n  mpf_add_ui (f, f, 1L);\n  expr = \"ULONG_MAX + 1\";\n  EXPECT (mpf_fits_ulong_p, 0);\n\n\n  mpf_set_si (f, (long) SHRT_MAX);\n  expr = \"SHRT_MAX\";\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n  EXPECT (mpf_fits_sshort_p, 1);\n\n  mpf_set_si (f, (long) SHRT_MAX);\n  expr = \"SHRT_MAX + 0.5\";\n  mpf_add (f, f, f0p5);\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n  EXPECT (mpf_fits_sshort_p, 1);\n\n  mpf_set_si (f, (long) SHRT_MAX);\n  mpf_add_ui (f, f, 1L);\n  expr = \"SHRT_MAX + 1\";\n  EXPECT (mpf_fits_sshort_p, 0);\n\n\n  mpf_set_si (f, (long) INT_MAX);\n  expr = \"INT_MAX\";\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n\n  mpf_set_si (f, (long) INT_MAX);\n  mpf_add (f, f, f0p5);\n  expr = \"INT_MAX + 0.5\";\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n\n  mpf_set_si (f, (long) INT_MAX);\n  mpf_add_ui (f, f, 1L);\n  expr = \"INT_MAX + 1\";\n  EXPECT (mpf_fits_sint_p, 0);\n\n\n  mpf_set_si (f, LONG_MAX);\n  expr = \"LONG_MAX\";\n  EXPECT (mpf_fits_slong_p, 1);\n\n  mpf_set_si (f, LONG_MAX);\n  mpf_add (f, f, f0p5);\n  expr = \"LONG_MAX + 0.5\";\n  EXPECT (mpf_fits_slong_p, 1);\n\n  mpf_set_si (f, LONG_MAX);\n  mpf_add_ui (f, f, 1L);\n  expr = \"LONG_MAX + 1\";\n  EXPECT (mpf_fits_slong_p, 0);\n\n\n  mpf_set_si (f, (long) SHRT_MIN);\n  expr = \"SHRT_MIN\";\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n  EXPECT (mpf_fits_sshort_p, 1);\n\n  mpf_set_si (f, (long) SHRT_MIN);\n  mpf_sub (f, f, f0p5);\n  expr = \"SHRT_MIN - 0.5\";\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n  EXPECT (mpf_fits_sshort_p, 1);\n\n  mpf_set_si (f, (long) SHRT_MIN);\n  mpf_sub_ui (f, f, 1L);\n  expr = \"SHRT_MIN - 1\";\n  EXPECT (mpf_fits_sshort_p, 0);\n\n\n  mpf_set_si (f, (long) INT_MIN);\n  expr = \"INT_MIN\";\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n\n  mpf_set_si (f, (long) INT_MIN);\n  mpf_sub (f, f, f0p5);\n  expr = \"INT_MIN - 0.5\";\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n\n  mpf_set_si (f, (long) INT_MIN);\n  mpf_sub_ui (f, f, 1L);\n  expr = \"INT_MIN - 1\";\n  EXPECT (mpf_fits_sint_p, 0);\n\n\n  mpf_set_si (f, LONG_MIN);\n  expr = \"LONG_MIN\";\n  EXPECT (mpf_fits_slong_p, 1);\n\n  mpf_set_si (f, LONG_MIN);\n  mpf_sub (f, f, f0p5);\n  expr = \"LONG_MIN - 0.5\";\n  EXPECT (mpf_fits_slong_p, 1);\n\n  mpf_set_si (f, LONG_MIN);\n  mpf_sub_ui (f, f, 1L);\n  expr = \"LONG_MIN - 1\";\n  EXPECT (mpf_fits_slong_p, 0);\n\n\n  mpf_set_str_or_abort (f, \"0.5\", 10);\n  expr = \"0.5\";\n  EXPECT (mpf_fits_ulong_p, 1);\n  EXPECT (mpf_fits_uint_p, 1);\n  EXPECT (mpf_fits_ushort_p, 1);\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n  EXPECT (mpf_fits_sshort_p, 1);\n\n  mpf_set_str_or_abort (f, \"-0.5\", 10);\n  expr = \"-0.5\";\n  EXPECT (mpf_fits_ulong_p, 1);\n  EXPECT (mpf_fits_uint_p, 1);\n  EXPECT (mpf_fits_ushort_p, 1);\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n  EXPECT (mpf_fits_sshort_p, 1);\n  mpf_set_str_or_abort (f, \"-1.5\", 10);\n  expr = \"-1.5\";\n  EXPECT (mpf_fits_ulong_p, 0);\n  EXPECT (mpf_fits_uint_p, 0);\n  EXPECT (mpf_fits_ushort_p, 0);\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n  EXPECT (mpf_fits_sshort_p, 1);\n\n\n  mpf_set_str_or_abort (f, \"1.000000000000000000000000000000000001\", 16);\n  expr = \"1.000000000000000000000000000000000001 base 16\";\n  EXPECT (mpf_fits_ulong_p, 1);\n  EXPECT (mpf_fits_uint_p, 1);\n  EXPECT (mpf_fits_ushort_p, 1);\n  EXPECT (mpf_fits_slong_p, 1);\n  EXPECT (mpf_fits_sint_p, 1);\n  EXPECT (mpf_fits_sshort_p, 1);\n\n  mpf_set_str_or_abort (f, \"1@1000\", 16);\n  expr = \"1@1000 base 16\";\n  EXPECT (mpf_fits_ulong_p, 0);\n  EXPECT (mpf_fits_uint_p, 0);\n  EXPECT (mpf_fits_ushort_p, 0);\n  EXPECT (mpf_fits_slong_p, 0);\n  EXPECT (mpf_fits_sint_p, 0);\n  EXPECT (mpf_fits_sshort_p, 0);\n\n\n  mpf_set_ui (f, 1L);\n  mpf_mul_2exp (f, f, BITS_PER_ULONG + 1);\n  mpf_sub_ui (f, f, 1L);\n  expr = \"2^(BITS_PER_ULONG+1) - 1\";\n  EXPECT (mpf_fits_ulong_p, 0);\n  EXPECT (mpf_fits_uint_p, 0);\n  EXPECT (mpf_fits_ushort_p, 0);\n  EXPECT (mpf_fits_slong_p, 0);\n  EXPECT (mpf_fits_sint_p, 0);\n  EXPECT (mpf_fits_sshort_p, 0);\n\n  mpf_set_ui (f, 1L);\n  mpf_mul_2exp (f, f, BITS_PER_ULONG + 1);\n  mpf_sub_ui (f, f, 1L);\n  mpf_neg (f, f);\n  expr = \"- (2^(BITS_PER_ULONG+1) - 1)\";\n  EXPECT (mpf_fits_ulong_p, 0);\n  EXPECT (mpf_fits_uint_p, 0);\n  EXPECT (mpf_fits_ushort_p, 0);\n  EXPECT (mpf_fits_slong_p, 0);\n  EXPECT (mpf_fits_sint_p, 0);\n  EXPECT (mpf_fits_sshort_p, 0);\n\n  mpf_set_ui (f, 1L);\n  mpf_mul_2exp (f, f, BITS_PER_ULONG + 5);\n  mpf_sub_ui (f, f, 1L);\n  expr = \"2^(BITS_PER_ULONG+5) - 1\";\n  EXPECT (mpf_fits_ulong_p, 0);\n  EXPECT (mpf_fits_uint_p, 0);\n  EXPECT (mpf_fits_ushort_p, 0);\n  EXPECT (mpf_fits_slong_p, 0);\n  EXPECT (mpf_fits_sint_p, 0);\n  EXPECT (mpf_fits_sshort_p, 0);\n\n\n  if (error)\n    abort ();\n\n  mpf_clear (f);\n  mpf_clear (f0p5);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-get_d.c",
    "content": "/* Test mpf_get_d and mpf_set_d.\n\n   Copyright 1996, 1999, 2000, 2001 Free Software Foundation, Inc.\n\n   This file is part of the GNU MP Library.\n\n   The GNU MP Library is free software; you can redistribute it and/or modify\n   it under the terms of the GNU Lesser General Public License as published by\n   the Free Software Foundation; either version 2.1 of the License, or (at your\n   option) any later version.\n\n   The GNU MP Library is distributed in the hope that it will be useful, but\n   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n   License for more details.\n\n   You should have received a copy of the GNU Lesser General Public License\n   along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n   MA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"tests.h\"\n\n#if ! defined (LOW_BOUND)\n#define LOW_BOUND 1e-300\n#define HIGH_BOUND 1e300\n#endif\n\nint\nmain (int argc, char **argv)\n{\n  double d, e, r;\n  mpf_t u, v;\n  int sign;\n  \n  tests_start ();\n  mpf_init (u);\n  mpf_init (v);\n\n  \n  for(sign=-1;sign<=1;sign+=2)\n    {mpf_set_d (u, LOW_BOUND);\n     if(sign==-1)mpf_neg(u,u);\n     for (d = 2.0 * LOW_BOUND; d < HIGH_BOUND; d *= 1.01)\n       {\n         mpf_set_d (v, d*sign);\n         if ( ! mpf_cmp (v, u) == sign )\n        \tabort ();\n         e = mpf_get_d (v);\n         r = e/d;\n         if(r<0)r=-r;\n         if (r < 0.99999999999999 || r > 1.00000000000001)\n\t   {\n   \t     fprintf (stderr, \"should be one ulp from 1: %.16f\\n\", r);\n    \t     abort ();\n     \t   }\n         mpf_set (u, v);\n       }\n    }\n  mpf_clear (u);\n  mpf_clear (v);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-get_d_2exp.c",
    "content": "/* Test mpf_get_d_2exp.\n\nCopyright 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nstatic void\ncheck_onebit (void)\n{\n  static const long data[] = {\n    -513, -512, -511, -65, -64, -63, -32, -1,\n    0, 1, 32, 53, 54, 64, 128, 256, 511, 512, 513\n  };\n  mpf_t   f;\n  double  got, want;\n  signed long got_exp, want_exp;\n  int     i,sign;\n\n  mpf_init2(f,1024L);\n  for(sign=-1;sign<=1;sign+=2){\n  \n  for (i = 0; i < numberof (data); i++)\n    {\n      mpf_set_ui (f, 1L);if(sign==-1)mpf_neg(f,f);\n      if (data[i] >= 0)\n        mpf_mul_2exp (f, f, data[i]);\n      else\n        mpf_div_2exp (f, f, -data[i]);\n      want = 0.5*sign;\n      want_exp = data[i] + 1;\n\n      got = mpf_get_d_2exp (&got_exp, f);\n      if (got != want || got_exp != want_exp)\n        {\n          printf    (\"mpf_get_d_2exp wrong on 2**%ld\\n\", data[i]);\n          mpf_trace (\"   f    \", f);\n          d_trace   (\"   want \", want);\n          d_trace   (\"   got  \", got);\n          printf    (\"   want exp %ld\\n\", want_exp);\n          printf    (\"   got exp  %ld\\n\", got_exp);\n          abort();\n        }\n    }}\n  mpf_clear (f);\n}\n\n/* Check that hardware rounding doesn't make mpf_get_d_2exp return a value\n   outside its defined range. */\nstatic void\ncheck_round (void)\n{\n  static const unsigned long data[] = { 1, 32, 53, 54, 64, 128, 256, 512 };\n  mpf_t   f;\n  double  got;\n  mpir_si got_exp;\n  int     i, rnd_mode, old_rnd_mode;\n\n  mpf_init2 (f, 1024L);\n  old_rnd_mode = tests_hardware_getround ();\n\n  for (rnd_mode = 0; rnd_mode < 4; rnd_mode++)\n    {\n      tests_hardware_setround (rnd_mode);\n\n      for (i = 0; i < numberof (data); i++)\n        {\n          mpf_set_ui (f, 1L);\n          mpf_mul_2exp (f, f, data[i]);\n          mpf_sub_ui (f, f, 1L);\n\n          got = mpf_get_d_2exp (&got_exp, f);\n          if (got < 0.5 || got >= 1.0)\n            {\n              printf    (\"mpf_get_d_2exp bad on 2**%lu-1\\n\", data[i]);\n              printf    (\"result out of range, expect 0.5 <= got < 1.0\\n\");\n              printf    (\"   rnd_mode = %d\\n\", rnd_mode);\n              printf    (\"   data[i]  = %lu\\n\", data[i]);\n              mpf_trace (\"   f    \", f);\n              d_trace   (\"   got  \", got);\n              printf    (\"   got exp  %ld\\n\", got_exp);\n              abort();\n            }\n        }\n    }\n\n  mpf_clear (f);\n  tests_hardware_setround (old_rnd_mode);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n  mp_trace_base = 16;\n\n  check_onebit ();\n  check_round ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-get_si.c",
    "content": "/* Exercise mpf_get_si.\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define printf gmp_printf\nvoid\ncheck_data (void)\n{\n  static const struct {\n    int         base;\n    const char  *f;\n    mpir_si      want;\n  } data[] = {\n    { 10, \"0\",      0 },\n    { 10, \"1\",      1 },\n    { 10, \"-1\",     -1 },\n    { 10, \"2\",      2 },\n    { 10, \"-2\",     -2 },\n    { 10, \"12345\",  12345 },\n    { 10, \"-12345\", -12345 },\n\n    /* fraction bits ignored */\n    { 10, \"0.5\",    0 },\n    { 10, \"-0.5\",   0 },\n    { 10, \"1.1\",    1 },\n    { 10, \"-1.1\",   -1 },\n    { 10, \"1.9\",    1 },\n    { 10, \"-1.9\",   -1 },\n    { 16, \"1.000000000000000000000000000000000000000000000000001\", 1L },\n    { 16, \"-1.000000000000000000000000000000000000000000000000001\", -1L },\n\n    /* low bits extracted (this is undocumented) */\n    { 16, \"1000000000000000000000000000000000000000000000000001\", 1L },\n    { 16, \"-1000000000000000000000000000000000000000000000000001\", -1L },\n  };\n\n  int    i;\n  mpf_t  f;\n  mpir_si   got;\n\n  mpf_init2 (f, 2000);\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpf_set_str_or_abort (f, data[i].f, data[i].base);\n\n      got = mpf_get_si (f);\n      if (got != data[i].want)\n        {\n          printf (\"mpf_get_si wrong at data[%d]\\n\", i); \n          printf (\"   f     \\\"%s\\\"\\n\", data[i].f);\n          printf (\"     dec \"); mpf_out_str (stdout, 10, 0, f); printf (\"\\n\");\n          printf (\"     hex \"); mpf_out_str (stdout, 16, 0, f); printf (\"\\n\");\n          printf (\"     size %ld\\n\", (long) SIZ(f));\n          printf (\"     exp  %ld\\n\", (long) EXP(f));\n          printf (\"   got   %Md (%#Mx)\\n\", got, got);\n          printf (\"   want  %Md (%#Mx)\\n\", data[i].want, data[i].want);\n          abort();                                    \n        }\n    }\n  mpf_clear (f);\n}\n\n\nvoid\ncheck_max (void)\n{\n  mpf_t  f;\n  mpir_si   want;\n  mpir_si   got;\n\n  mpf_init2 (f, 200L);\n\n#define CHECK_MAX(name)                                         \\\n  if (got != want)                                              \\\n    {                                                           \\\n      printf (\"mpf_get_si wrong on %s\\n\", name);                \\\n      printf (\"   f    \");                                      \\\n      mpf_out_str (stdout, 10, 0, f); printf (\", hex \");        \\\n      mpf_out_str (stdout, 16, 0, f); printf (\"\\n\");            \\\n      printf (\"   got  %ld, hex %#Mx\\n\", got, got);             \\\n      printf (\"   want %ld, hex %#Mx\\n\", want, want);           \\\n      abort();                                                  \\\n    }\n\n  want = GMP_SI_MAX;\n  mpf_set_si (f, want);\n  got = mpf_get_si (f);\n  CHECK_MAX (\"GMP_SI_MAX\");\n\n  want = GMP_SI_MIN;\n  mpf_set_si (f, want);\n  got = mpf_get_si (f);\n  CHECK_MAX (\"GMP_SI_MIN\");\n\n  mpf_clear (f);\n}\n\n\nvoid\ncheck_limbdata (void)\n{\n#define M  GMP_NUMB_MAX\n  \n  static const struct {\n    mp_exp_t       exp;\n    mp_size_t      size;\n    mp_limb_t      d[10];\n    mpir_si         want;\n\n  } data[] = {\n\n    /* in the comments here, a \"_\" indicates a digit (ie. limb) position not\n       included in the d data, and therefore zero */\n\n    { 0, 0, { 0 }, 0 },    /* 0 */\n\n    { 1,  1, { 1 }, 1 },   /* 1 */\n    { 1, -1, { 1 }, -1 },  /* -1 */\n\n    { 0,  1, { 1 }, 0 },   /* .1 */\n    { 0, -1, { 1 }, 0 },   /* -.1 */\n\n    { -1,  1, { 1 }, 0 },  /* ._1 */\n    { -1, -1, { 1 }, 0 },  /* -._1 */\n\n    { -999,          1, { 1 }, 0 },   /* .___1 small */\n    { MP_EXP_T_MIN,  1, { 1 }, 0 },   /* .____1 very small */\n\n    { 999,          1, { 1 }, 0 },    /* 1____. big */\n    { MP_EXP_T_MAX, 1, { 1 }, 0 },    /* 1_____. very big */\n\n    { 1, 2, { 999, 2 }, 2L },                  /* 2.9 */\n    { 5, 8, { 7, 8, 9, 3, 0, 0, 0, 1 }, 3 },  /* 10003.987 */\n\n    { 2, 2, { M, M },    GMP_SI_MAX }, /* FF. */\n    { 2, 2, { M, M, M }, GMP_SI_MAX }, /* FF.F */\n    { 3, 3, { M, M, M }, GMP_SI_MAX }, /* FFF. */\n\n#if GMP_NUMB_BITS >= BITS_PER_UI\n    /* normal case, numb bigger than long */\n    { 2,  1, { 1 },    0 },      /* 1_. */\n    { 2,  2, { 0, 1 }, 0 },      /* 10. */\n    { 2,  2, { 999, 1 }, 999 },  /* 19. */\n    { 3,  2, { 999, 1 }, 0 },    /* 19_. */\n\n#else\n    /* nails case, numb smaller than long */\n    { 2,  1, { 1 }, 1 << GMP_NUMB_BITS },  /* 1_. */\n    { 3,  1, { 1 }, 0 },                   /* 1__. */\n\n    { 2,  2, { 99, 1 },    99 + (1 << GMP_NUMB_BITS) },  /* 19. */\n    { 3,  2, { 1, 99 },    1 << GMP_NUMB_BITS },          /* 91_. */\n    { 3,  3, { 0, 1, 99 }, 1 << GMP_NUMB_BITS },          /* 910. */\n\n#endif\n  };\n\n  mpf_t          f;\n  mpir_si         got;\n  int            i;\n  mp_limb_t      buf[20 + numberof(data[i].d)];\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      refmpn_fill (buf, 10, CNST_LIMB(0xDEADBEEF));\n      refmpn_copy (buf+10, data[i].d, ABS(data[i].size));\n      refmpn_fill (buf+10+ABS(data[i].size), 10, CNST_LIMB(0xDEADBEEF));\n\n      PTR(f) = buf+10;\n      EXP(f) = data[i].exp;\n      SIZ(f) = data[i].size;\n      PREC(f) = numberof (data[i].d);\n      MPF_CHECK_FORMAT (f);\n\n      got = mpf_get_si (f);\n      if (got != data[i].want)\n        {\n          printf    (\"mpf_get_si wrong at limb data[%d]\\n\", i);\n          mpf_trace (\"  f\", f);\n          mpn_trace (\"  d\", data[i].d, data[i].size);\n          printf    (\"  size %ld\\n\", (mpir_si) data[i].size);\n          printf    (\"  exp %ld\\n\", (mpir_si) data[i].exp);\n          printf    (\"  got   %Mu (%#Mx)\\n\", got, got);\n          printf    (\"  want  %Mu (%#Mx)\\n\", data[i].want, data[i].want);\n          abort();\n        }\n    }\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n  check_max ();\n  check_limbdata ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-get_ui.c",
    "content": "/* Exercise mpf_get_ui.\n\nCopyright 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define printf gmp_printf\n\nvoid\ncheck_limbdata (void)\n{\n#define M  GMP_NUMB_MAX\n\n  static const struct {\n    mp_exp_t       exp;\n    mp_size_t      size;\n    mp_limb_t      d[10];\n    mpir_ui  want;\n\n  } data[] = {\n\n    /* in the comments here, a \"_\" indicates a digit (ie. limb) position not\n       included in the d data, and therefore zero */\n\n    { 0, 0, { 0 }, 0L },    /* 0 */\n\n    { 1,  1, { 1 }, 1L },   /* 1 */\n    { 1, -1, { 1 }, 1L },   /* -1 */\n\n    { 0,  1, { 1 }, 0L },   /* .1 */\n    { 0, -1, { 1 }, 0L },   /* -.1 */\n\n    { -1,  1, { 1 }, 0L },  /* ._1 */\n    { -1, -1, { 1 }, 0L },  /* -._1 */\n\n    { -999,          1, { 1 }, 0L },   /* .___1 small */\n    { MP_EXP_T_MIN,  1, { 1 }, 0L },   /* .____1 very small */\n\n    { 999,          1, { 1 }, 0L },    /* 1____. big */\n    { MP_EXP_T_MAX, 1, { 1 }, 0L },    /* 1_____. very big */\n\n    { 1, 2, { 999, 2 }, 2L },                  /* 2.9 */\n    { 5, 8, { 7, 8, 9, 3, 0, 0, 0, 1 }, 3L },  /* 10003.987 */\n\n    { 2, 2, { M, M },    GMP_UI_MAX }, /* FF. */\n    { 2, 2, { M, M, M }, GMP_UI_MAX }, /* FF.F */\n    { 3, 3, { M, M, M }, GMP_UI_MAX }, /* FFF. */\n\n#if GMP_NUMB_BITS >= BITS_PER_UI\n    /* normal case, numb bigger than long */\n    { 2,  1, { 1 },    0L },      /* 1_. */\n    { 2,  2, { 0, 1 }, 0L },      /* 10. */\n    { 2,  2, { 999, 1 }, 999L },  /* 19. */\n    { 3,  2, { 999, 1 }, 0L },    /* 19_. */\n\n#else\n    /* nails case, numb smaller than long */\n    { 2,  1, { 1 }, 1L << GMP_NUMB_BITS },  /* 1_. */\n    { 3,  1, { 1 }, 0L },                   /* 1__. */\n\n    { 2,  2, { 99, 1 },    99L + (1L << GMP_NUMB_BITS) },  /* 19. */\n    { 3,  2, { 1, 99 },    1L << GMP_NUMB_BITS },          /* 91_. */\n    { 3,  3, { 0, 1, 99 }, 1L << GMP_NUMB_BITS },          /* 910. */\n\n#endif\n  };\n\n  mpf_t          f;\n  mpir_ui         got;\n  int            i;\n  mp_limb_t      buf[20 + numberof(data[i].d)];\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      refmpn_fill (buf, 10, CNST_LIMB(0xDEADBEEF));\n      refmpn_copy (buf+10, data[i].d, ABS(data[i].size));\n      refmpn_fill (buf+10+ABS(data[i].size), 10, CNST_LIMB(0xDEADBEEF));\n\n      PTR(f) = buf+10;\n      EXP(f) = data[i].exp;\n      SIZ(f) = data[i].size;\n      PREC(f) = numberof (data[i].d);\n      MPF_CHECK_FORMAT (f);\n\n      got = mpf_get_ui (f);\n      if (got!= data[i].want)\n        {\n          printf    (\"mpf_get_ui wrong at limb data[%d]\\n\", i);\n          mpf_trace (\"  f\", f);\n          mpn_trace (\"  d\", data[i].d, data[i].size);\n          printf    (\"  size %Md\\n\", (long) data[i].size);\n          printf    (\"  exp %Md\\n\", (long) data[i].exp);\n          printf    (\"  got   %Mu (%#Mx)\\n\", got, got);\n          printf    (\"  want  %Mu (%#Mx)\\n\", data[i].want, data[i].want);\n          abort();\n        }\n    }\n}\n\nint\nmain (void)\n{\n  tests_start ();\n  mp_trace_base = 16;\n\n  check_limbdata ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-gsprec.c",
    "content": "/* Test mpf_get_prec and mpf_set_prec.\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid\ncheck_consistency (void)\n{\n  mpf_t  x;\n  unsigned long  i, a, b;\n\n  mpf_init (x);\n\n  for (i = 1; i < 2000; i++)\n    {\n      mpf_set_prec (x, i);\n      a = mpf_get_prec (x);\n      mpf_set_prec (x, a);\n      b = mpf_get_prec (x);\n      if (a != b)\n        {\n          printf (\"mpf_get_prec / mpf_set_prec inconsistent\\n\");\n          printf (\"   set %lu gives %lu, but then set %lu gives %lu\\n\",\n                  i, a,\n                  a, b);\n          abort ();\n        }\n    }\n\n  mpf_clear (x);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_consistency ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-inp_str.c",
    "content": "/* Test mpf_inp_str.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#if HAVE_UNISTD_H\n#include <unistd.h>\t\t/* for unlink */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n#define FILENAME  \"t-inp_str.tmp\"\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char  *inp;\n    int         base;\n    const char  *want;\n    int         want_nread;\n\n  } data[] = {\n\n    { \"0\",   10, \"0\", 1 },\n\n    { \"abc\", 10, \"0\", 0 },\n    { \"ghi\", 16, \"0\", 0 },\n\n    { \"125\",    10, \"125\",  3 },\n    { \"125e1\",  10, \"1250\", 5 },\n    { \"125e-1\", 10, \"12.5\", 6 },\n\n    {  \"ff\", 16,  \"255\", 2 },\n    { \"-ff\", 16, \"-255\", 3 },\n    {  \"FF\", 16,  \"255\", 2 },\n    { \"-FF\", 16, \"-255\", 3 },\n\n    { \"100\",     16, \"256\",  3 },\n    { \"100@1\",   16, \"4096\", 5 },\n    { \"100@10\",  16, \"4722366482869645213696\", 6 },\n    { \"100@10\", -16, \"281474976710656\",        6 },\n    { \"100@-1\",  16, \"16\",   6 },\n    { \"10000000000000000@-10\",  16, \"1\", 21 },\n    { \"10000000000@-10\",       -16, \"1\", 15 },\n\n    { \"z\", 36, \"35\", 1 },\n    { \"Z\", 36, \"35\", 1 },\n    { \"z@1\", 36, \"1260\", 3 },\n    { \"Z@1\", 36, \"1260\", 3 },\n\n    {  \"0\",      0,   \"0\", 1 },\n  };\n\n  mpf_t  got, want;\n  long   ftell_nread;\n  int    i, pre, post, j, got_nread, want_nread;\n  FILE   *fp;\n\n  mpf_init (got);\n  mpf_init (want);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      for (pre = 0; pre <= 3; pre++)\n        {\n          for (post = 0; post <= 2; post++)\n            {\n              mpf_set_str_or_abort (want, data[i].want, 10);\n              MPF_CHECK_FORMAT (want);\n\n              /* create the file new each time to ensure its length is what\n                 we want */\n              fp = fopen (FILENAME, \"w+\");\n              ASSERT_ALWAYS (fp != NULL);\n              for (j = 0; j < pre; j++)\n                putc (' ', fp);\n              fputs (data[i].inp, fp);\n              for (j = 0; j < post; j++)\n                putc (' ', fp);\n              fflush (fp);\n              ASSERT_ALWAYS (! ferror(fp));\n\n              rewind (fp);\n              got_nread = mpf_inp_str (got, fp, data[i].base);\n\n              if (got_nread != 0)\n                {\n                  ftell_nread = ftell (fp);\n                  if (got_nread != ftell_nread)\n                    {\n                      printf (\"mpf_inp_str nread wrong\\n\");\n                      printf (\"  inp          \\\"%s\\\"\\n\", data[i].inp);\n                      printf (\"  base         %d\\n\", data[i].base);\n                      printf (\"  pre          %d\\n\", pre);\n                      printf (\"  post         %d\\n\", post);\n                      printf (\"  got_nread    %d\\n\", got_nread);\n                      printf (\"  ftell_nread  %ld\\n\", ftell_nread);\n                      abort ();\n                    }\n                }\n\n              /* if data[i].inp is a whole string to read and there's no post\n                 whitespace then expect to have EOF */\n              if (post == 0 && data[i].want_nread == strlen(data[i].inp))\n                {\n                  int  c = getc(fp);\n                  if (c != EOF)\n                    {\n                      printf (\"mpf_inp_str didn't read to EOF\\n\");\n                      printf (\"  inp   \\\"%s\\\"\\n\", data[i].inp);\n                      printf (\"  base  %d\\n\", data[i].base);\n                      printf (\"  pre   %d\\n\", pre);\n                      printf (\"  post  %d\\n\", post);\n                      printf (\"  c     '%c' %#x\\n\", c, c);\n                      abort ();\n                    }\n                }\n\n              /* only expect \"pre\" included in the count when non-zero */\n              want_nread = data[i].want_nread;\n              if (want_nread != 0)\n                want_nread += pre;\n\n              if (got_nread != want_nread)\n                {\n                  printf (\"mpf_inp_str nread wrong\\n\");\n                  printf (\"  inp         \\\"%s\\\"\\n\", data[i].inp);\n                  printf (\"  base        %d\\n\", data[i].base);\n                  printf (\"  pre         %d\\n\", pre);\n                  printf (\"  post        %d\\n\", post);\n                  printf (\"  got_nread   %d\\n\", got_nread);\n                  printf (\"  want_nread  %d\\n\", want_nread);\n                  abort ();\n                }\n\n              MPF_CHECK_FORMAT (got);\n\n              if (mpf_cmp (got, want) != 0)\n                {\n                  printf (\"mpf_inp_str wrong result\\n\");\n                  printf (\"  inp   \\\"%s\\\"\\n\", data[i].inp);\n                  printf (\"  base  %d\\n\", data[i].base);\n                  mpf_trace (\"  got \",  got);\n                  mpf_trace (\"  want\", want);\n                  abort ();\n                }\n\n              ASSERT_ALWAYS (fclose (fp) == 0);\n            }\n        }\n    }\n\n  mpf_clear (got);\n  mpf_clear (want);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n\n  unlink (FILENAME);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-int_p.c",
    "content": "/* Test mpf_integer_p.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\none (mpf_srcptr f, int want)\n{\n  int  got;\n  got = mpf_integer_p (f);\n  if (got != want)\n    {\n      printf (\"mpf_integer_p got %d want %d\\n\", got, want);\n      mpf_trace (\" f\", f);\n      abort ();\n    }\n}\n\nvoid\nall (mpf_ptr f, int want)\n{\n  one (f, want);\n  mpf_neg (f, f);\n  one (f, want);\n}\n\nint\nmain (void)\n{\n  mpf_t  f;\n\n  tests_start ();\n  mpf_init2 (f, 200L);\n\n  mpf_set_ui (f, 0L);\n  one (f, 1);\n\n  mpf_set_ui (f, 1L);\n  all (f, 1);\n\n  mpf_set_ui (f, 1L);\n  mpf_div_2exp (f, f, 1L);\n  all (f, 0);\n\n  mpf_set_ui (f, 1L);\n  mpf_div_2exp (f, f, 5000L);\n  all (f, 0);\n\n  mpf_set_ui (f, 1L);\n  mpf_mul_2exp (f, f, 5000L);\n  all (f, 1);\n\n  mpf_set_str (f, \"0.5\", 10);\n  all (f, 0);\n\n  mpf_set_ui (f, 1L);\n  mpf_div_ui (f, f, 3L);\n  all (f, 0);\n\n  mpf_clear (f);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-mul_ui.c",
    "content": "/* Exercise mpf_mul_ui.\n\nCopyright 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid\ncheck_one (const char *desc, mpf_ptr got, mpf_srcptr u, mpir_ui v)\n{\n  mp_size_t  usize, usign;\n  mp_ptr     wp;\n  mpf_t      want;\n\n  MPF_CHECK_FORMAT (got);\n\n  /* this code not nailified yet */\n  ASSERT_ALWAYS (BITS_PER_UI <= GMP_NUMB_BITS);\n  usign = SIZ (u);\n  usize = ABS (usign);\n  wp = refmpn_malloc_limbs (usize + 1);\n  wp[usize] = mpn_mul_1 (wp, PTR(u), usize, (mp_limb_t) v);\n\n  PTR(want) = wp;\n  SIZ(want) = (usign >= 0 ? usize+1 : -(usize+1));\n  EXP(want) = EXP(u) + 1;\n  refmpf_normalize (want);\n\n  if (! refmpf_validate (\"mpf_mul_ui\", got, want))\n    {\n      mp_trace_base = -16;\n      printf    (\"  %s\\n\", desc);\n      mpf_trace (\"  u\", u);\n      printf    (\"  v %ld  0x%lX\\n\", v, v);\n      abort ();\n    }\n\n  free (wp);\n}\n\nvoid\ncheck_rand (void)\n{\n  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);\n  gmp_randstate_t  rands;\n  mpf_t              got, u;\n  unsigned long      prec, v;\n  int                i;\n\n  /* The nails code in mpf_mul_ui currently isn't exact, so suppress these\n     tests for now.  */\n  if (BITS_PER_UI > GMP_NUMB_BITS)\n    return;\n\n  mpf_init (got);\n  mpf_init (u);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < 200; i++)\n    {\n      /* got precision */\n      prec = min_prec + gmp_urandomm_ui (rands, 15L);\n      refmpf_set_prec_limbs (got, prec);\n\n      /* u precision */\n      prec = min_prec + gmp_urandomm_ui (rands, 15L);\n      refmpf_set_prec_limbs (u, prec);\n\n      /* u, possibly negative */\n      mpf_rrandomb (u, rands, PREC(u), (mp_exp_t) 20);\n      if (gmp_urandomb_ui (rands, 1L))\n        mpf_neg (u, u);\n\n      /* v, 0 to BITS_PER_ULONG bits (inclusive) */\n      prec = gmp_urandomm_ui (rands, BITS_PER_ULONG+1);\n      v = gmp_urandomb_ui (rands, prec);\n\n      if ((i % 2) == 0)\n        {\n          /* separate */\n          mpf_mul_ui (got, u, v);\n          check_one (\"separate\", got, u, v);\n        }\n      else\n        {\n          /* overlap */\n          prec = refmpf_set_overlap (got, u);\n          mpf_mul_ui (got, got, v);\n          check_one (\"overlap src==dst\", got, u, v);\n\n          mpf_set_prec_raw (got, prec);\n        }\n    }\n\n  mpf_clear (got);\n  mpf_clear (u);\n  gmp_randclear(rands);\n}\n\nvoid\ncheck_various (void)\n{\n  mpf_t  u, got, want;\n  char   *s;\n\n  mpf_init2 (u,    2*8*sizeof(long));\n  mpf_init2 (got,  2*8*sizeof(long));\n  mpf_init2 (want, 2*8*sizeof(long));\n\n  s = \"0 * GMP_UI_MAX\";\n  mpf_set_ui (u, 0L);\n  mpf_mul_ui (got, u, GMP_UI_MAX);\n  MPF_CHECK_FORMAT (got);\n  mpf_set_ui (want, 0L);\n  if (mpf_cmp (got, want) != 0)\n    {\n    error:\n      printf (\"Wrong result from %s\\n\", s);\n      mpf_trace (\"u   \", u);\n      mpf_trace (\"got \", got);\n      mpf_trace (\"want\", want);\n      abort ();\n    }\n\n  s = \"1 * GMP_UI_MAX\";\n  mpf_set_ui (u, 1L);\n  mpf_mul_ui (got, u, GMP_UI_MAX);\n  MPF_CHECK_FORMAT (got);\n  mpf_set_ui (want, GMP_UI_MAX);\n  if (mpf_cmp (got, want) != 0)\n    goto error;\n\n  mpf_clear (u);\n  mpf_clear (got);\n  mpf_clear (want);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_various ();\n  check_rand ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-muldiv.c",
    "content": "/* Test mpf_mul, mpf_div, mpf_ui_div, and mpf_div_ui.\n\nCopyright 1996, 2000, 2001, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#ifndef SIZE\n#define SIZE 16\n#endif\n\nint\nmain (int argc, char **argv)\n{\n  mp_size_t size;\n  mp_exp_t exp;\n  int reps = 10000;\n  int i;\n  mpf_t u, v, w, x;\n  mp_size_t bprec = SIZE * BITS_PER_MP_LIMB;\n  mpf_t rerr, limit_rerr;\n  unsigned long ulimb, vlimb;\n  int single_flag;\n  gmp_randstate_t rands;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n  \n  if (argc > 1)\n    {\n      reps = strtol (argv[1], 0, 0);\n      if (argc > 2)\n\tbprec = strtol (argv[2], 0, 0);\n    }\n\n  mpf_set_default_prec (bprec);\n\n  mpf_init (rerr);\n  mpf_init (limit_rerr);\n\n  mpf_init (u);\n  mpf_init (v);\n  mpf_init (w);\n  mpf_init (x);\n\n  for (i = 0; i < reps; i++)\n    {\n      mp_size_t res_prec;\n\n      res_prec = urandom (rands) % bprec + 1;\n      mpf_set_prec (w, res_prec);\n      mpf_set_prec (x, res_prec);\n\n      mpf_set_ui (limit_rerr, 1);\n      mpf_div_2exp (limit_rerr, limit_rerr, res_prec - 1);\n\n      single_flag = 0;\n\n      if ((urandom (rands) & 1) != 0)\n\t{\n\t  size = urandom (rands) % (2 * SIZE) - SIZE;\n\t  exp = urandom (rands) % SIZE;\n\t  mpf_rrandomb (u, rands, size, exp);\n\t}\n      else\n\t{\n\t  ulimb = urandom (rands);\n\t  mpf_set_ui (u, ulimb);\n\t  single_flag = 1;\n\t}\n\n      if ((urandom (rands) & 1) != 0)\n\t{\n\t  size = urandom (rands) % (2 * SIZE) - SIZE;\n\t  exp = urandom (rands) % SIZE;\n\t  mpf_rrandomb (v, rands, size, exp);\n\t}\n      else\n\t{\n\t  vlimb = urandom (rands);\n\t  mpf_set_ui (v, vlimb);\n\t  single_flag = 2;\n\t}\n\n      if (mpf_sgn (v) == 0)\n\tcontinue;\n\n      mpf_div (w, u, v);\n      mpf_mul (x, w, v);\n      mpf_reldiff (rerr, u, x);\n      if (mpf_cmp (rerr, limit_rerr) > 0)\n\t{\n\t  printf (\"ERROR in mpf_mul or mpf_div after %d tests\\n\", i);\n\t  printf (\"   u = \"); mpf_dump (u);\n\t  printf (\"   v = \"); mpf_dump (v);\n\t  printf (\"   x = \"); mpf_dump (x);\n\t  printf (\"   w = \"); mpf_dump (w);\n\t  abort ();\n\t}\n\n      if (single_flag == 2)\n\t{\n\t  mpf_div_ui (x, u, vlimb);\n\t  mpf_reldiff (rerr, w, x);\n\t  if (mpf_cmp (rerr, limit_rerr) > 0)\n\t    {\n\t      printf (\"ERROR in mpf_div or mpf_div_ui after %d tests\\n\", i);\n\t      printf (\"   u = \"); mpf_dump (u);\n\t      printf (\"   v = \"); mpf_dump (v);\n\t      printf (\"   x = \"); mpf_dump (x);\n\t      printf (\"   w = \"); mpf_dump (w);\n\t      abort ();\n\t    }\n\t}\n\n      if (single_flag == 1)\n\t{\n\t  mpf_ui_div (x, ulimb, v);\n\t  mpf_reldiff (rerr, w, x);\n\t  if (mpf_cmp (rerr, limit_rerr) > 0)\n\t    {\n\t      printf (\"ERROR in mpf_div or mpf_ui_div after %d tests\\n\", i);\n\t      printf (\"   u = \"); mpf_dump (u);\n\t      printf (\"   v = \"); mpf_dump (v);\n\t      printf (\"   x = \"); mpf_dump (x);\n\t      printf (\"   w = \"); mpf_dump (w);\n\t      abort ();\n\t    }\n\t}\n    }\n\n  mpf_clear (rerr);\n  mpf_clear (limit_rerr);\n\n  mpf_clear (u);\n  mpf_clear (v);\n  mpf_clear (w);\n  mpf_clear (x);\n  gmp_randclear(rands); \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-set.c",
    "content": "/* Test mpf_set.\n\nCopyright 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid\ncheck_reuse (void)\n{\n  /* Try mpf_set(f,f) when f is bigger than prec.  In the past this had\n     resulted in an MPN_COPY with invalid operand overlap. */\n  mpf_t  f;\n  mp_size_t      limbs = 20;\n  unsigned long  bits = limbs * GMP_NUMB_BITS;\n  mpf_init2 (f, bits);\n  refmpf_fill (f, limbs, GMP_NUMB_MAX);\n  mpf_set_prec_raw (f, bits / 2);\n  mpf_set (f, f);\n  MPF_CHECK_FORMAT (f);\n  mpf_set_prec_raw (f, bits);\n  mpf_clear (f);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_reuse ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-set_q.c",
    "content": "/* Test mpf_set_q.\n\nCopyright 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (mpf_ptr got, mpq_srcptr q)\n{\n  mpf_t  n, d;\n\n  mpf_set_q (got, q);\n\n  PTR(n) = PTR(&q->_mp_num);\n  SIZ(n) = SIZ(&q->_mp_num);\n  EXP(n) = ABSIZ(&q->_mp_num);\n\n  PTR(d) = PTR(&q->_mp_den);\n  SIZ(d) = SIZ(&q->_mp_den);\n  EXP(d) = ABSIZ(&q->_mp_den);\n\n  if (! refmpf_validate_division (\"mpf_set_q\", got, n, d))\n    {\n      mp_trace_base = -16;\n      mpq_trace (\"   q\", q);\n      abort ();\n    }\n}\n\nvoid\ncheck_rand (void)\n{\n  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);\n  gmp_randstate_t  rands;\n  unsigned long  prec;\n  mpf_t  got;\n  mpq_t  q;\n  int    i;\n\n  mpf_init (got);\n  mpq_init (q);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < 400; i++)\n    {\n      /* result precision */\n      prec = min_prec + gmp_urandomm_ui (rands, 20L);\n      refmpf_set_prec_limbs (got, prec);\n\n      /* num */\n      prec = gmp_urandomm_ui (rands, 20L * GMP_NUMB_BITS);\n      mpz_rrandomb (mpq_numref(q), rands, prec);\n\n      /* possibly negative num */\n      if (gmp_urandomb_ui (rands, 1L))\n        mpz_neg (mpq_numref(q), mpq_numref(q));\n\n      /* den, non-zero */\n      do {\n        prec = gmp_urandomm_ui (rands, 20L * GMP_NUMB_BITS);\n        mpz_rrandomb (mpq_denref(q), rands, prec);\n      } while (mpz_sgn (mpq_denref(q)) <= 0);\n\n      check_one (got, q);\n    }\n\n  mpf_clear (got);\n  mpq_clear (q);\n  gmp_randclear(rands);\n}\n\nvoid\ncheck_various (void)\n{\n  mpf_t got;\n  mpq_t q;\n\n  mpf_init (got);\n  mpq_init (q);\n\n  /* 1/1 == 1 */\n  mpf_set_prec (got, 20L);\n  mpq_set_ui (q, 1L, 1L);\n  mpf_set_q (got, q);\n  MPF_CHECK_FORMAT (got);\n  ASSERT_ALWAYS (mpf_cmp_ui (got, 1L) == 0);\n\n  /* 1/(2^n+1), a case where truncating the divisor would be wrong */\n  mpf_set_prec (got, 500L);\n  mpq_set_ui (q, 1L, 1L);\n  mpz_mul_2exp (mpq_denref(q), mpq_denref(q), 800L);\n  mpz_add_ui (mpq_denref(q), mpq_denref(q), 1L);\n  check_one (got, q);\n\n  mpf_clear (got);\n  mpq_clear (q);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_various ();\n  check_rand ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-set_si.c",
    "content": "/* Test mpf_set_si and mpf_init_set_si.\n\nCopyright 2000, 2001, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    mpir_si       x;\n    mp_size_t  want_size;\n    mp_limb_t  want_data[2];\n  } data[] = {\n\n    {  0L,  0 },\n    {  1L,  1, { 1 } },\n    { -1L, -1, { 1 } },\n\n#if GMP_NUMB_BITS >= BITS_PER_UI\n    { GMP_SI_MAX,  1, { GMP_SI_MAX, 0 } },\n    { -GMP_SI_MAX,  -1, { GMP_SI_MAX, 0 } },\n    { GMP_SI_MIN,  -1, { GMP_UI_HIBIT, 0 } },\n#else\n    { GMP_SI_MAX,  2, { GMP_SI_MAX & GMP_NUMB_MASK, GMP_SI_MAX >> GMP_NUMB_BITS } },\n    { -GMP_SI_MAX,  -2, { GMP_SI_MAX & GMP_NUMB_MASK, GMP_SI_MAX >> GMP_NUMB_BITS }},\n    { GMP_SI_MIN,  -2, { 0, ULONG_HIGHBIT >> GMP_NUMB_BITS } },\n#endif\n  };\n\n  mpf_t  x;\n  int    i;\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpf_init (x);\n      mpf_set_si (x, data[i].x);\n      MPF_CHECK_FORMAT (x);\n      if (x->_mp_size != data[i].want_size\n          || refmpn_cmp_allowzero (x->_mp_d, data[i].want_data,\n                                   ABS (data[i].want_size)) != 0\n          || x->_mp_exp != ABS (data[i].want_size))\n        {\n          printf (\"mpf_set_si wrong on data[%d]\\n\", i);\n          abort();\n        }\n      mpf_clear (x);\n\n      mpf_init_set_si (x, data[i].x);\n      MPF_CHECK_FORMAT (x);\n      if (x->_mp_size != data[i].want_size\n          || refmpn_cmp_allowzero (x->_mp_d, data[i].want_data,\n                                   ABS (data[i].want_size)) != 0\n          || x->_mp_exp != ABS (data[i].want_size))\n        {\n          printf (\"mpf_init_set_si wrong on data[%d]\\n\", i);\n          abort();\n        }\n      mpf_clear (x);\n    }\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-set_ui.c",
    "content": "/* Test mpf_set_ui and mpf_init_set_ui.\n\nCopyright 2000, 2001, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    mpir_ui  x;\n    mp_size_t      want_size;\n    mp_limb_t      want_data[2];\n  } data[] = {\n\n    {  0L,  0 },\n    {  1L,  1, { 1 } },\n\n#if GMP_NUMB_BITS >= BITS_PER_UI\n    { GMP_UI_MAX,     1, { GMP_UI_MAX, 0 } },\n    { GMP_UI_HIBIT, 1, { GMP_UI_HIBIT, 0 } },\n#else\n    { GMP_UI_MAX,     2, { GMP_UI_MAX & GMP_NUMB_MASK,\n                          GMP_UI_MAX >> GMP_NUMB_BITS } },\n    { LONG_HIGHBIT,  2, { 0,\n                          GMP_UI_HIBIT >> GMP_NUMB_BITS } },\n#endif\n  };\n\n  mpf_t  x;\n  int    i;\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpf_init (x);\n      mpf_set_ui (x, data[i].x);\n      MPF_CHECK_FORMAT (x);\n      if (x->_mp_size != data[i].want_size\n          || refmpn_cmp_allowzero (x->_mp_d, data[i].want_data,\n                                   ABS (data[i].want_size)) != 0\n          || x->_mp_exp != ABS (data[i].want_size))\n        {\n          printf (\"mpf_set_ui wrong on data[%d]\\n\", i);\n          abort();\n        }\n      mpf_clear (x);\n\n      mpf_init_set_ui (x, data[i].x);\n      MPF_CHECK_FORMAT (x);\n      if (x->_mp_size != data[i].want_size\n          || refmpn_cmp_allowzero (x->_mp_d, data[i].want_data,\n                                   ABS (data[i].want_size)) != 0\n          || x->_mp_exp != ABS (data[i].want_size))\n        {\n          printf (\"mpf_init_set_ui wrong on data[%d]\\n\", i);\n          abort();\n        }\n      mpf_clear (x);\n    }\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-sqrt.c",
    "content": "/* Test mpf_sqrt, mpf_mul.\n\nCopyright 1996, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#ifndef SIZE\n#define SIZE 16\n#endif\n\nvoid\ncheck_rand1 (int argc, gmp_randstate_t rands,char **argv)\n{\n  mp_size_t size;\n  mp_exp_t exp;\n  int reps = 20000;\n  int i;\n  mpf_t x, y, y2;\n  mp_size_t bprec = 100;\n  mpf_t rerr, max_rerr, limit_rerr;\n\n  if (argc > 1)\n    {\n      reps = strtol (argv[1], 0, 0);\n      if (argc > 2)\n\tbprec = strtol (argv[2], 0, 0);\n    }\n\n  mpf_set_default_prec (bprec);\n\n  mpf_init_set_ui (limit_rerr, 1);\n  mpf_div_2exp (limit_rerr, limit_rerr, bprec);\n#if VERBOSE\n  mpf_dump (limit_rerr);\n#endif\n  mpf_init (rerr);\n  mpf_init_set_ui (max_rerr, 0);\n\n  mpf_init (x);\n  mpf_init (y);\n  mpf_init (y2);\n  for (i = 0; i < reps; i++)\n    {\n      size = urandom (rands) % SIZE;\n      exp = urandom (rands) % SIZE;\n      mpf_rrandomb (x, rands, size, exp);\n\n      mpf_sqrt (y, x);\n      MPF_CHECK_FORMAT (y);\n      mpf_mul (y2, y, y);\n\n      mpf_reldiff (rerr, x, y2);\n      if (mpf_cmp (rerr, max_rerr) > 0)\n\t{\n\t  mpf_set (max_rerr, rerr);\n#if VERBOSE\n\t  mpf_dump (max_rerr);\n#endif\n\t  if (mpf_cmp (rerr, limit_rerr) > 0)\n\t    {\n\t      printf (\"ERROR after %d tests\\n\", i);\n\t      printf (\"   x = \"); mpf_dump (x);\n\t      printf (\"   y = \"); mpf_dump (y);\n\t      printf (\"  y2 = \"); mpf_dump (y2);\n\t      printf (\"   rerr       = \"); mpf_dump (rerr);\n\t      printf (\"   limit_rerr = \"); mpf_dump (limit_rerr);\n              printf (\"in hex:\\n\");\n              mp_trace_base = 16;\n\t      mpf_trace (\"   x  \", x);\n\t      mpf_trace (\"   y  \", y);\n\t      mpf_trace (\"   y2 \", y2);\n\t      mpf_trace (\"   rerr      \", rerr);\n\t      mpf_trace (\"   limit_rerr\", limit_rerr);\n\t      abort ();\n\t    }\n\t}\n    }\n\n  mpf_clear (limit_rerr);\n  mpf_clear (rerr);\n  mpf_clear (max_rerr);\n\n  mpf_clear (x);\n  mpf_clear (y);\n  mpf_clear (y2);\n}\n\nvoid\ncheck_rand2 (gmp_randstate_t rands)\n{\n  unsigned long      max_prec = 20;\n  unsigned long      min_prec = __GMPF_BITS_TO_PREC (1);\n  unsigned long      x_prec, r_prec;\n  mpf_t              x, r, s;\n  int                i;\n\n  mpf_init (x);\n  mpf_init (r);\n  mpf_init (s);\n  refmpf_set_prec_limbs (s, 2*max_prec+10);\n\n  for (i = 0; i < 500; i++)\n    {\n      /* input precision */\n      x_prec = gmp_urandomm_ui (rands, max_prec-min_prec) + min_prec;\n      refmpf_set_prec_limbs (x, x_prec);\n\n      /* result precision */\n      r_prec = gmp_urandomm_ui (rands, max_prec-min_prec) + min_prec;\n      refmpf_set_prec_limbs (r, r_prec);\n\n      mpf_rrandomb (x, rands, x_prec, 1000);\n\n      mpf_sqrt (r, x);\n      MPF_CHECK_FORMAT (r);\n\n      /* Expect to prec limbs of result.\n         In the current implementation there's no stripping of low zero\n         limbs in mpf_sqrt, so size should be exactly prec.  */\n      if (SIZ(r) != r_prec)\n        {\n          printf (\"mpf_sqrt wrong number of result limbs\\n\");\n          mpf_trace (\"  x\", x);\n          mpf_trace (\"  r\", r);\n          printf    (\"  r_prec=%lu\\n\", r_prec);\n          printf    (\"  SIZ(r)  %ld\\n\", (long) SIZ(r));\n          printf    (\"  PREC(r) %ld\\n\", (long) PREC(r));\n          abort ();\n        }\n\n      /* Must have r^2 <= x, since r has been truncated. */\n      mpf_mul (s, r, r);\n      if (! (mpf_cmp (s, x) <= 0))\n        {\n          printf    (\"mpf_sqrt result too big\\n\");\n          mpf_trace (\"  x\", x);\n          printf    (\"  r_prec=%lu\\n\", r_prec);\n          mpf_trace (\"  r\", r);\n          mpf_trace (\"  s\", s);\n          abort ();\n        }\n\n      /* Must have (r+ulp)^2 > x, or else r is too small. */\n      refmpf_add_ulp (r);\n      mpf_mul (s, r, r);\n      if (! (mpf_cmp (s, x) > 0))\n        {\n          printf    (\"mpf_sqrt result too small\\n\");\n          mpf_trace (\"  x\", x);\n          printf    (\"  r_prec=%lu\\n\", r_prec);\n          mpf_trace (\"  r+ulp\", r);\n          mpf_trace (\"  s\", s);\n          abort ();\n        }\n    }\n\n  mpf_clear (x);\n  mpf_clear (r);\n  mpf_clear (s);\n}\n\nint\nmain (int argc, char **argv)\n{gmp_randstate_t rands;\n  tests_start ();\n  gmp_randinit_default(rands);\n  mp_trace_base = -16;\n\n  check_rand1 (argc,rands, argv);\n  check_rand2 (rands);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-sqrt_ui.c",
    "content": "/* Test mpf_sqrt_ui.\n\nCopyright 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_rand (void)\n{\n  unsigned long      max_prec = 15;\n  unsigned long      min_prec = __GMPF_BITS_TO_PREC (1);\n  gmp_randstate_t  rands;\n  unsigned long      x, prec;\n  mpf_t              r, s;\n  int                i;\n\n  mpf_init (r);\n  mpf_init (s);\n  gmp_randinit_default(rands);\n  refmpf_set_prec_limbs (s, 2*max_prec+10);\n\n  for (i = 0; i < 50; i++)\n    {\n      /* input, a random non-zero ulong, exponentially distributed */\n      do {\n        x = gmp_urandomb_ui (rands,\n                             gmp_urandomm_ui (rands, BITS_PER_ULONG) + 1);\n      } while (x == 0);\n\n      /* result precision */\n      prec = gmp_urandomm_ui (rands, max_prec-min_prec) + min_prec;\n      refmpf_set_prec_limbs (r, prec);\n\n      mpf_sqrt_ui (r, x);\n      MPF_CHECK_FORMAT (r);\n\n      /* Expect to prec limbs of result.\n         In the current implementation there's no stripping of low zero\n         limbs in mpf_sqrt_ui, not even on perfect squares, so size should\n         be exactly prec.  */\n      if (SIZ(r) != prec)\n        {\n          printf (\"mpf_sqrt_ui result not enough result limbs\\n\");\n          printf    (\"  x=%lu\\n\", x);\n          printf    (\"  want prec=%lu\\n\", prec);\n          mpf_trace (\"  r\", r);\n          printf    (\"  r size %ld\\n\", (long) SIZ(r));\n          printf    (\"  r prec %ld\\n\", (long) PREC(r));\n          abort ();\n        }\n\n      /* Must have r^2 <= x, since r has been truncated. */\n      mpf_mul (s, r, r);\n      if (! (mpf_cmp_ui (s, x) <= 0))\n        {\n          printf    (\"mpf_sqrt_ui result too big\\n\");\n          printf    (\"  x=%lu\\n\", x);\n          printf    (\"  want prec=%lu\\n\", prec);\n          mpf_trace (\"  r\", r);\n          mpf_trace (\"  s\", s);\n          abort ();\n        }\n\n      /* Must have (r+ulp)^2 > x.\n         No overflow from refmpf_add_ulp since r is only prec limbs. */\n      refmpf_add_ulp (r);\n      mpf_mul (s, r, r);\n      if (! (mpf_cmp_ui (s, x) > 0))\n        {\n          printf    (\"mpf_sqrt_ui result too small\\n\");\n          printf    (\"  x=%lu\\n\", x);\n          printf    (\"  want prec=%lu\\n\", prec);\n          mpf_trace (\"  r+ulp\", r);\n          mpf_trace (\"  s\", s);\n          abort ();\n        }\n    }\n\n  mpf_clear (r);\n  mpf_clear (s);\n  gmp_randclear(rands);\n}\n\nint\nmain (int argc, char **argv)\n{\n  tests_start ();\n  mp_trace_base = -16;\n\n  check_rand ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-sub.c",
    "content": "/* Test mpf_sub.\n\nCopyright 1996, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#ifndef SIZE\n#define SIZE 16\n#endif\n\nvoid\ncheck_rand (int argc, char **argv)\n{\n  mp_size_t size;\n  mp_exp_t exp;\n  int reps = 20000;\n  int i;\n  mpf_t u, v, w, wref;\n  mp_size_t bprec = 100;\n  mpf_t rerr, max_rerr, limit_rerr;\n  gmp_randstate_t rands;\n\n  if (argc > 1)\n    {\n      reps = strtol (argv[1], 0, 0);\n      if (argc > 2)\n\tbprec = strtol (argv[2], 0, 0);\n    }\n\n  mpf_set_default_prec (bprec);\n  gmp_randinit_default(rands);\n  mpf_init_set_ui (limit_rerr, 1);\n  mpf_div_2exp (limit_rerr, limit_rerr, bprec);\n#if VERBOSE\n  mpf_dump (limit_rerr);\n#endif\n  mpf_init (rerr);\n  mpf_init_set_ui (max_rerr, 0);\n\n  mpf_init (u);\n  mpf_init (v);\n  mpf_init (w);\n  mpf_init (wref);\n  for (i = 0; i < reps; i++)\n    {\n      size = urandom (rands) % (2 * SIZE) - SIZE;\n      exp = urandom (rands) % SIZE;\n      mpf_rrandomb (u, rands, size, exp);\n\n      size = urandom (rands) % (2 * SIZE) - SIZE;\n      exp = urandom (rands) % SIZE;\n      mpf_rrandomb (v, rands, size, exp);\n\n      if ((urandom (rands) & 1) != 0)\n\tmpf_add_ui (u, v, 1);\n      else if ((urandom (rands) & 1) != 0)\n\tmpf_sub_ui (u, v, 1);\n\n      mpf_sub (w, u, v);\n      refmpf_sub (wref, u, v);\n\n      mpf_reldiff (rerr, w, wref);\n      if (mpf_cmp (rerr, max_rerr) > 0)\n\t{\n\t  mpf_set (max_rerr, rerr);\n#if VERBOSE\n\t  mpf_dump (max_rerr);\n#endif\n\t  if (mpf_cmp (rerr, limit_rerr) > 0)\n\t    {\n\t      printf (\"ERROR after %d tests\\n\", i);\n\t      printf (\"   u = \"); mpf_dump (u);\n\t      printf (\"   v = \"); mpf_dump (v);\n\t      printf (\"wref = \"); mpf_dump (wref);\n\t      printf (\"   w = \"); mpf_dump (w);\n\t      abort ();\n\t    }\n\t}\n    }\n\n  mpf_clear (limit_rerr);\n  mpf_clear (rerr);\n  mpf_clear (max_rerr);\n\n  mpf_clear (u);\n  mpf_clear (v);\n  mpf_clear (w);\n  mpf_clear (wref);\n  gmp_randclear(rands);\n}\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    struct {\n      int        exp, size;\n      mp_limb_t  d[10];\n    } x, y, want;\n\n  } data[] = {\n    { { 123, 2, { 8, 9 } },             { 123, 1, { 9 } }, { 122, 1, { 8 } } },\n\n    /* f - f == 0, various sizes.\n       These exercise a past problem (gmp 4.1.3 and earlier) where the\n       result exponent was not zeroed on a zero result like this.  */\n    { { 0, 0 }, { 0, 0 }, { 0, 0 } },\n    { { 99, 1, { 1 } },             { 99, 1, { 1 } },             { 0, 0 } },\n    { { 99, 2, { 123, 456 } },      { 99, 2, { 123, 456 } },      { 0, 0 } },\n    { { 99, 3, { 123, 456, 789 } }, { 99, 3, { 123, 456, 789 } }, { 0, 0 } },\n\n    /* High limbs cancel, leaving just the low limbs of the longer operand.\n       This exercises a past problem (gmp 4.1.3 and earlier) where high zero\n       limbs on the remainder were not stripped before truncating to the\n       destination, causing loss of precision.  */\n    { { 123, 2, { 8, 9 } },             { 123, 1, { 9 } }, { 122, 1, { 8 } } },\n    { { 123, 3, { 8, 0, 9 } },          { 123, 1, { 9 } }, { 121, 1, { 8 } } },\n    { { 123, 4, { 8, 0, 0, 9 } },       { 123, 1, { 9 } }, { 120, 1, { 8 } } },\n    { { 123, 5, { 8, 0, 0, 0, 9 } },    { 123, 1, { 9 } }, { 119, 1, { 8 } } },\n    { { 123, 6, { 8, 0, 0, 0, 0, 9 } }, { 123, 1, { 9 } }, { 118, 1, { 8 } } },\n\n  };\n\n  mpf_t  x, y, got, want;\n  int  i, swap;\n\n  mp_trace_base = 16;\n  mpf_init (got);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      for (swap = 0; swap <= 1; swap++)\n        {\n          PTR(x) = (mp_ptr) data[i].x.d;\n          SIZ(x) = data[i].x.size;\n          EXP(x) = data[i].x.exp;\n          PREC(x) = numberof (data[i].x.d);\n          MPF_CHECK_FORMAT (x);\n\n          PTR(y) = (mp_ptr) data[i].y.d;\n          SIZ(y) = data[i].y.size;\n          EXP(y) = data[i].y.exp;\n          PREC(y) = numberof (data[i].y.d);\n          MPF_CHECK_FORMAT (y);\n\n          PTR(want) = (mp_ptr) data[i].want.d;\n          SIZ(want) = data[i].want.size;\n          EXP(want) = data[i].want.exp;\n          PREC(want) = numberof (data[i].want.d);\n          MPF_CHECK_FORMAT (want);\n\n          if (swap)\n            {\n              mpf_swap (x, y);\n              SIZ(want) = - SIZ(want);\n            }\n\n          mpf_sub (got, x, y);\n/*           MPF_CHECK_FORMAT (got); */\n\n          if (mpf_cmp (got, want) != 0)\n            {\n              printf (\"check_data() wrong reault at data[%d] (operands%s swapped)\\n\", i, swap ? \"\" : \" not\");\n              mpf_trace (\"x   \", x);\n              mpf_trace (\"y   \", y);\n              mpf_trace (\"got \", got);\n              mpf_trace (\"want\", want);\n              abort ();\n            }\n        }\n    }\n\n  mpf_clear (got);\n}\n\n\nint\nmain (int argc, char **argv)\n{\n  tests_start ();\n\n  check_data ();\n  check_rand (argc, argv);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-trunc.c",
    "content": "/* Test mpf_trunc, mpf_ceil, mpf_floor.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_print (mpf_srcptr src, mpf_srcptr got, mpf_srcptr want)\n{\n  mp_trace_base = 16;\n  mpf_trace (\"src \", src);\n  mpf_trace (\"got \", got);\n  mpf_trace (\"want\", want);\n\n  printf (\"got  size=%d exp=%ld\\n\", SIZ(got), EXP(got));\n  mpn_trace (\"     limbs=\", PTR(got), (mp_size_t) ABSIZ(got));\n\n  printf (\"want size=%d exp=%ld\\n\", SIZ(want), EXP(want));\n  mpn_trace (\"     limbs=\", PTR(want), (mp_size_t) ABSIZ(want));\n}\n\nvoid\ncheck_one (mpf_srcptr src, mpf_srcptr trunc, mpf_srcptr ceil, mpf_srcptr floor)\n{\n  mpf_t  got;\n\n  mpf_init2 (got, mpf_get_prec (trunc));\n  ASSERT_ALWAYS (PREC(got) == PREC(trunc));\n  ASSERT_ALWAYS (PREC(got) == PREC(ceil));\n  ASSERT_ALWAYS (PREC(got) == PREC(floor));\n\n#define CHECK_SEP(name, fun, want)              \\\n  mpf_set_ui (got, 54321L); /* initial junk */  \\\n  fun (got, src);                               \\\n  MPF_CHECK_FORMAT (got);                       \\\n  if (mpf_cmp (got, want) != 0)                 \\\n    {                                           \\\n        printf (\"%s wrong\\n\", name);            \\\n        check_print (src, got, want);           \\\n        abort ();                               \\\n    }\n\n  CHECK_SEP (\"mpf_trunc\", mpf_trunc, trunc);\n  CHECK_SEP (\"mpf_ceil\",  mpf_ceil,  ceil);\n  CHECK_SEP (\"mpf_floor\", mpf_floor, floor);\n\n#define CHECK_INPLACE(name, fun, want)  \\\n  mpf_set (got, src);                   \\\n  fun (got, got);                       \\\n  MPF_CHECK_FORMAT (got);               \\\n  if (mpf_cmp (got, want) != 0)         \\\n    {                                   \\\n        printf (\"%s wrong\\n\", name);    \\\n        check_print (src, got, want);   \\\n        abort ();                       \\\n    }\n\n  CHECK_INPLACE (\"mpf_trunc\", mpf_trunc, trunc);\n\n  /* Can't do these unconditionally in case truncation by mpf_set strips\n     some low non-zero limbs which would have rounded the result.  */\n  if (ABSIZ(src) <= PREC(trunc)+1)\n    {\n      CHECK_INPLACE (\"mpf_ceil\",  mpf_ceil,  ceil);\n      CHECK_INPLACE (\"mpf_floor\", mpf_floor, floor);\n    }\n\n  mpf_clear (got);\n}\n\nvoid\ncheck_all (mpf_ptr src, mpf_ptr trunc, mpf_ptr ceil, mpf_ptr floor)\n{\n  /* some of these values are generated with direct field assignments */\n  MPF_CHECK_FORMAT (src);\n  MPF_CHECK_FORMAT (trunc);\n  MPF_CHECK_FORMAT (ceil);\n  MPF_CHECK_FORMAT (floor);\n\n  check_one (src, trunc, ceil, floor);\n\n  mpf_neg (src,   src);\n  mpf_neg (trunc, trunc);\n  mpf_neg (ceil,  ceil);\n  mpf_neg (floor, floor);\n  check_one (src, trunc, floor, ceil);\n}\n\nvoid\ncheck_various (void)\n{\n  mpf_t  src, trunc, ceil, floor;\n  int    n, i;\n\n  mpf_init2 (src, 512L);\n  mpf_init2 (trunc, 256L);\n  mpf_init2 (ceil,  256L);\n  mpf_init2 (floor, 256L);\n\n  /* 0 */\n  mpf_set_ui (src, 0L);\n  mpf_set_ui (trunc, 0L);\n  mpf_set_ui (ceil, 0L);\n  mpf_set_ui (floor, 0L);\n  check_all (src, trunc, ceil, floor);\n\n  /* 1 */\n  mpf_set_ui (src, 1L);\n  mpf_set_ui (trunc, 1L);\n  mpf_set_ui (ceil, 1L);\n  mpf_set_ui (floor, 1L);\n  check_all (src, trunc, ceil, floor);\n\n  /* 2^1024 */\n  mpf_set_ui (src, 1L);\n  mpf_mul_2exp (src,   src,   1024L);\n  mpf_set (trunc, src);\n  mpf_set (ceil,  src);\n  mpf_set (floor, src);\n  check_all (src, trunc, ceil, floor);\n  \n  /* 1/2^1024, fraction only */\n  mpf_set_ui (src, 1L);\n  mpf_div_2exp (src,  src, 1024L);\n  mpf_set_si (trunc, 0L);\n  mpf_set_si (ceil, 1L);\n  mpf_set_si (floor, 0L);\n  check_all (src, trunc, ceil, floor);\n\n  /* 1/2 */\n  mpf_set_ui (src, 1L);\n  mpf_div_2exp (src,  src, 1L);\n  mpf_set_si (trunc, 0L);\n  mpf_set_si (ceil, 1L);\n  mpf_set_si (floor, 0L);\n  check_all (src, trunc, ceil, floor);\n  \n  /* 123+1/2^64 */\n  mpf_set_ui (src, 1L);\n  mpf_div_2exp (src,  src, 64L);\n  mpf_add_ui (src,  src, 123L);\n  mpf_set_si (trunc, 123L);\n  mpf_set_si (ceil, 124L);\n  mpf_set_si (floor, 123L);\n  check_all (src, trunc, ceil, floor);\n\n  /* integer of full prec+1 limbs, unchanged */\n  n = PREC(trunc)+1;\n  ASSERT_ALWAYS (n <= PREC(src)+1);\n  EXP(src) = n;\n  SIZ(src) = n;\n  for (i = 0; i < SIZ(src); i++)\n    PTR(src)[i] = i+100;\n  mpf_set (trunc, src);\n  mpf_set (ceil, src);\n  mpf_set (floor, src);\n  check_all (src, trunc, ceil, floor);\n\n  /* full prec+1 limbs, 1 trimmed for integer */\n  n = PREC(trunc)+1;\n  ASSERT_ALWAYS (n <= PREC(src)+1);\n  EXP(src) = n-1;\n  SIZ(src) = n;\n  for (i = 0; i < SIZ(src); i++)\n    PTR(src)[i] = i+200;\n  EXP(trunc) = n-1;\n  SIZ(trunc) = n-1;\n  for (i = 0; i < SIZ(trunc); i++)\n    PTR(trunc)[i] = i+201;\n  mpf_set (floor, trunc);\n  mpf_add_ui (ceil, trunc, 1L);\n  check_all (src, trunc, ceil, floor);\n\n  /* prec+3 limbs, 2 trimmed for size */\n  n = PREC(trunc)+3;\n  ASSERT_ALWAYS (n <= PREC(src)+1);\n  EXP(src) = n;\n  SIZ(src) = n;\n  for (i = 0; i < SIZ(src); i++)\n    PTR(src)[i] = i+300;\n  EXP(trunc) = n;\n  SIZ(trunc) = n-2;\n  for (i = 0; i < SIZ(trunc); i++)\n    PTR(trunc)[i] = i+302;\n  mpf_set (floor, trunc);\n  mpf_set (ceil, trunc);\n  PTR(ceil)[0]++;\n  check_all (src, trunc, ceil, floor);\n\n  /* prec+4 limbs, 2 trimmed for size, 1 trimmed for integer */\n  n = PREC(trunc)+4;\n  ASSERT_ALWAYS (n <= PREC(src)+1);\n  EXP(src) = n-1;\n  SIZ(src) = n;\n  for (i = 0; i < SIZ(src); i++)\n    PTR(src)[i] = i+400;\n  EXP(trunc) = n-1;\n  SIZ(trunc) = n-3;\n  for (i = 0; i < SIZ(trunc); i++)\n    PTR(trunc)[i] = i+403;\n  mpf_set (floor, trunc);\n  mpf_set (ceil, trunc);\n  PTR(ceil)[0]++;\n  check_all (src, trunc, ceil, floor);\n\n  /* F.F, carry out of ceil */\n  EXP(src) = 1;\n  SIZ(src) = 2;\n  PTR(src)[0] = GMP_NUMB_MAX;\n  PTR(src)[1] = GMP_NUMB_MAX;\n  EXP(trunc) = 1;\n  SIZ(trunc) = 1;\n  PTR(trunc)[0] = GMP_NUMB_MAX;\n  mpf_set (floor, trunc);\n  EXP(ceil) = 2;\n  SIZ(ceil) = 1;\n  PTR(ceil)[0] = 1;\n  check_all (src, trunc, ceil, floor);\n\n  /* FF.F, carry out of ceil */\n  EXP(src) = 2;\n  SIZ(src) = 3;\n  PTR(src)[0] = GMP_NUMB_MAX;\n  PTR(src)[1] = GMP_NUMB_MAX;\n  PTR(src)[2] = GMP_NUMB_MAX;\n  EXP(trunc) = 2;\n  SIZ(trunc) = 2;\n  PTR(trunc)[0] = GMP_NUMB_MAX;\n  PTR(trunc)[1] = GMP_NUMB_MAX;\n  mpf_set (floor, trunc);\n  EXP(ceil) = 3;\n  SIZ(ceil) = 1;\n  PTR(ceil)[0] = 1;\n  check_all (src, trunc, ceil, floor);\n\n  mpf_clear (src);\n  mpf_clear (trunc);\n  mpf_clear (ceil);\n  mpf_clear (floor);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_various ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpf/t-ui_div.c",
    "content": "/* Test mpf_ui_div.\n\nCopyright 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (const char *desc, mpf_ptr got, unsigned long u, mpf_srcptr v)\n{\n  mpf_t      uf;\n  mp_limb_t  ulimbs[2];\n  mp_size_t  usize;\n\n  ulimbs[0] = u & GMP_NUMB_MASK;\n  usize = (u != 0);\n#if BITS_PER_UI > GMP_NUMB_BITS\n  u >>= GMP_NUMB_BITS;\n  ulimbs[1] = u;\n  usize += (u != 0);\n#endif\n  PTR(uf) = ulimbs;\n  SIZ(uf) = usize;\n  EXP(uf) = usize;\n\n  if (! refmpf_validate_division (\"mpf_ui_div\", got, uf, v))\n    {\n      mp_trace_base = -16;\n      printf    (\"  u 0x%lX  (%lu)\\n\", u, u);\n      mpf_trace (\"  v\", v);\n      printf    (\"  %s\\n\", desc);\n      abort ();\n    }\n}\n\nvoid\ncheck_rand (void)\n{\n  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);\n  gmp_randstate_t  rands;\n  unsigned long  prec, u;\n  mpf_t  got, v;\n  int    i;\n\n  mpf_init (got);\n  mpf_init (v);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < 200; i++)\n    {\n      /* got precision */\n      prec = min_prec + gmp_urandomm_ui (rands, 15L);\n      refmpf_set_prec_limbs (got, prec);\n\n      /* u */\n      prec = gmp_urandomm_ui (rands, BITS_PER_UI+1);\n      u = gmp_urandomb_ui (rands, prec);\n\n      /* v precision */\n      prec = min_prec + gmp_urandomm_ui (rands, 15L);\n      refmpf_set_prec_limbs (v, prec);\n\n      /* v, non-zero */\n      do {\n        mpf_rrandomb (v, rands, PREC(v), (mp_exp_t) 20);\n      } while (SIZ(v) == 0);\n\n      /* v possibly negative */\n      if (gmp_urandomb_ui (rands, 1L))\n        mpf_neg (v, v);\n\n      if ((i % 2) == 0)\n        {\n          /* src != dst */\n          mpf_ui_div (got, u, v);\n          check_one (\"separate\", got, u, v);\n        }\n      else\n        {\n          /* src == dst */\n          prec = refmpf_set_overlap (got, v);\n          mpf_ui_div (got, u, got);\n          check_one (\"overlap src==dst\", got, u, v);\n\n          mpf_set_prec_raw (got, prec);\n        }\n    }\n\n  mpf_clear (got);\n  mpf_clear (v);\n  gmp_randclear(rands);\n}\n\nvoid\ncheck_various (void)\n{\n  mpf_t got, v;\n\n  mpf_init (got);\n  mpf_init (v);\n\n  /* 100/4 == 25 */\n  mpf_set_prec (got, 20L);\n  mpf_set_ui (v, 4L);\n  mpf_ui_div (got, 100L, v);\n  MPF_CHECK_FORMAT (got);\n  ASSERT_ALWAYS (mpf_cmp_ui (got, 25L) == 0);\n\n  {\n    /* 1/(2^n+1), a case where truncating the divisor would be wrong */\n    unsigned long  u = 1L;\n    mpf_set_prec (got, 500L);\n    mpf_set_prec (v, 900L);\n    mpf_set_ui (v, 1L);\n    mpf_mul_2exp (v, v, 800L);\n    mpf_add_ui (v, v, 1L);\n    mpf_ui_div (got, u, v);\n    check_one (\"1/2^n+1, separate\", got, u, v);\n  }\n\n  mpf_clear (got);\n  mpf_clear (v);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_various ();\n  check_rand ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 2001, 2002, 2003 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/tests\nLDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmpir.la\n\ncheck_PROGRAMS = t-addadd_n t-addsub_n t-aors_1 t-asmtype t-dc_bdiv_q t-dc_bdiv_q_n t-dc_bdiv_qr t-dc_bdiv_qr_n t-dc_div_q t-dc_div_qr t-dc_div_qr_n t-dc_divappr_q t-divebyff t-divebyfobm1 t-divrem_1 t-fat t-gcdext t-get_d t-hgcd t-instrument t-inv_div_q t-inv_div_qr t-inv_div_qr_n t-inv_divappr_q t-inv_divappr_q_n t-invert t-iord_u t-logic t-lorrshift1 t-matrix22 t-mp_bases t-mullow_basecase t-mullowhigh t-mulmid t-mulmod_2expm1 t-mulmod_2expp1 t-neg t-perfsqr t-redc_1 t-sb_bdiv_q t-sb_bdiv_qr t-sb_div_q t-sb_div_qr t-sb_divappr_q t-scan t-sizeinbase t-subadd_n t-tdiv_q t-tdiv_qr \n  \nif ENABLE_STATIC\nif ENABLE_SHARED\ncheck_PROGRAMS += st_fat st_instrument\nst_fat_SOURCES = t-fat.c\nst_fat_LDFLAGS = -static\nst_instrument_SOURCES = t-instrument.c\nst_instrument_LDFLAGS = -static\nendif\nendif\n\nTESTS = $(check_PROGRAMS)\n\n$(top_builddir)/tests/libtests.la:\n\tcd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la\n"
  },
  {
    "path": "tests/mpn/t-addadd_n.c",
    "content": "/* Test mpn_addadd_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain (void)\n{\n  unsigned long n;\n  gmp_randstate_t rands;\n  int j, k, i, i1;\n  mp_limb_t sp[10000], tp[10000], xp[10000], yp[10000], zp[10000];\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  for (i1 = 0; i1 < 2; i1++)\n    {\n      for (n = 1; n < 100; n++)\n\t{\n\t  for (j = 1; j < 5; j++)\n\t    {\n\t      if (i1 == 0)\n\t\t{\n\t\t  mpn_randomb (xp, rands, n);\n\t\t  mpn_randomb (yp, rands, n);\n\t\t  mpn_randomb (zp, rands, n);\n\t\t}\n\t      else\n\t\t{\n\t\t  mpn_rrandom (xp, rands, n);\n\t\t  mpn_rrandom (yp, rands, n);\n\t\t  mpn_rrandom (zp, rands, n);\n\t\t}\n\t      k = mpn_addadd_n (sp, xp, yp, zp, n);\n\t      i = mpn_add_n (tp, xp, yp, n);\n\t      i += mpn_add_n (tp, tp, zp, n);\n\t      if (k != i)\n\t\t{\n\t\t  printf (\"mpn_addadd_n ret wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (sp, tp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_addadd_n sum wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t    }\n\t}\n    }\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-addsub_n.c",
    "content": "/* Test mpn_addsub_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain (void)\n{\n  unsigned long n;\n  gmp_randstate_t rands;\n  int j, k, i, i1;\n  mp_limb_t sp[10000], tp[10000], xp[10000], yp[10000], zp[10000];\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n\n  for (i1 = 0; i1 < 2; i1++)\n    {\n      for (n = 1; n < 100; n++)\n\t{\n\t  for (j = 1; j < 5; j++)\n\t    {\n\t      if (i1 == 0)\n\t\t{\n\t\t  mpn_randomb (xp, rands, n);\n\t\t  mpn_randomb (yp, rands, n);\n\t\t  mpn_randomb (zp, rands, n);\n\t\t}\n\t      else\n\t\t{\n\t\t  mpn_rrandom (xp, rands, n);\n\t\t  mpn_rrandom (yp, rands, n);\n\t\t  mpn_rrandom (zp, rands, n);\n\t\t}\n\t      k = mpn_addsub_n (sp, xp, yp, zp, n);\n\t      i = mpn_add_n (tp, xp, yp, n);\n\t      i -= mpn_sub_n (tp, tp, zp, n);\n\t      if (k != i)\n\t\t{\n\t\t  printf (\"mpn_addsub_n ret wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (sp, tp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_addsub_n sum wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t    }\n\t}\n    }\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-aors_1.c",
    "content": "/* Test mpn_add_1 and mpn_sub_1.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n#define M      GMP_NUMB_MAX\n#define ASIZE  10\n#define MAGIC  0x1234\n\n#define SETUP()                         \\\n  do {                                  \\\n    refmpn_random (got, data[i].size);  \\\n    got[data[i].size] = MAGIC;          \\\n  } while (0)\n\n#define SETUP_INPLACE()                                 \\\n  do {                                                  \\\n    refmpn_copyi (got, data[i].src, data[i].size);      \\\n    got[data[i].size] = MAGIC;                          \\\n  } while (0)\n\n#define VERIFY(name)                            \\\n  do {                                          \\\n    verify (name, i, data[i].src, data[i].n,    \\\n            got_c, data[i].want_c,              \\\n            got, data[i].want, data[i].size);   \\\n  } while (0)\n\ntypedef mp_limb_t (*mpn_aors_1_t)(mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);\nmpn_aors_1_t fudge(mpn_aors_1_t);\n\n\nvoid\nverify (const char *name, int i,\n        mp_srcptr src, mp_limb_t n,\n        mp_limb_t got_c, mp_limb_t want_c,\n        mp_srcptr got, mp_srcptr want, mp_size_t size)\n{\n  if (got[size] != MAGIC)\n    {\n      printf (\"Overwrite at %s i=%d\\n\", name, i);\n      abort ();\n    }\n\n  if (got_c != want_c || ! refmpn_equal_anynail (got, want, size))\n    {\n      printf (\"Wrong at %s i=%d size=%ld\\n\", name, i, size);\n      mpn_trace (\"   src\", src,  size);\n      mpn_trace (\"     n\", &n,   (mp_size_t) 1);\n      mpn_trace (\"   got\", got,  size);\n      mpn_trace (\"  want\", want, size);\n      mpn_trace (\" got c\", &got_c,  (mp_size_t) 1);\n      mpn_trace (\"want c\", &want_c, (mp_size_t) 1);\n      abort ();\n    }\n}\n\n\nvoid\ncheck_add_1 (void)\n{\n  static const struct {\n    mp_size_t        size;\n    mp_limb_t        n;\n    const mp_limb_t  src[ASIZE];\n    mp_limb_t        want_c;\n    const mp_limb_t  want[ASIZE];\n  } data[] = {\n    { 1, 0, { 0 },  0, { 0 } },\n    { 1, 0, { 1 },  0, { 1 } },\n    { 1, 1, { 0 },  0, { 1 } },\n    { 1, 0, { M },  0, { M } },\n    { 1, M, { 0 },  0, { M } },\n    { 1, 1, { 123 }, 0, { 124 } },\n\n    { 1, 1, { M },  1, { 0 } },\n    { 1, M, { 1 },  1, { 0 } },\n    { 1, M, { M },  1, { M-1 } },\n\n    { 2, 0, { 0, 0 },  0, { 0, 0 } },\n    { 2, 0, { 1, 0 },  0, { 1, 0 } },\n    { 2, 1, { 0, 0 },  0, { 1, 0 } },\n    { 2, 0, { M, 0 },  0, { M, 0 } },\n    { 2, M, { 0, 0 },  0, { M, 0 } },\n    { 2, 1, { M, 0 },  0, { 0, 1 } },\n    { 2, M, { 1, 0 },  0, { 0, 1 } },\n    { 2, M, { M, 0 },  0, { M-1, 1 } },\n    { 2, M, { M, 0 },  0, { M-1, 1 } },\n\n    { 2, 1, { M, M },  1, { 0, 0 } },\n    { 2, M, { 1, M },  1, { 0, 0 } },\n    { 2, M, { M, M },  1, { M-1, 0 } },\n    { 2, M, { M, M },  1, { M-1, 0 } },\n\n    { 3, 1, { M, M, M },  1, { 0, 0, 0 } },\n    { 3, M, { 1, M, M },  1, { 0, 0, 0 } },\n    { 3, M, { M, M, M },  1, { M-1, 0, 0 } },\n    { 3, M, { M, M, M },  1, { M-1, 0, 0 } },\n\n    { 4, 1, { M, M, M, M },  1, { 0, 0, 0, 0 } },\n    { 4, M, { 1, M, M, M },  1, { 0, 0, 0, 0 } },\n    { 4, M, { M, M, M, M },  1, { M-1, 0, 0, 0 } },\n    { 4, M, { M, M, M, M },  1, { M-1, 0, 0, 0 } },\n\n    { 4, M, { M, 0,   M, M },  0, { M-1, 1, M, M } },\n    { 4, M, { M, M-1, M, M },  0, { M-1, M, M, M } },\n\n    { 4, M, { M, M, 0,   M },  0, { M-1, 0, 1, M } },\n    { 4, M, { M, M, M-1, M },  0, { M-1, 0, M, M } },\n  };\n\n  mp_limb_t  got[ASIZE];\n  mp_limb_t  got_c;\n  int        i;\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      SETUP ();\n      got_c = mpn_add_1 (got, data[i].src, data[i].size, data[i].n);\n      VERIFY (\"check_add_1 (separate)\");\n\n      SETUP_INPLACE ();\n      got_c = mpn_add_1 (got, got, data[i].size, data[i].n);\n      VERIFY (\"check_add_1 (in-place)\");\n\n      if (data[i].n == 1)\n        {\n          SETUP ();\n          got_c = mpn_add_1 (got, data[i].src, data[i].size, CNST_LIMB(1));\n          VERIFY (\"check_add_1 (separate, const 1)\");\n\n          SETUP_INPLACE ();\n          got_c = mpn_add_1 (got, got, data[i].size, CNST_LIMB(1));\n          VERIFY (\"check_add_1 (in-place, const 1)\");\n        }\n\n      /* Same again on functions, not inlines. */\n      SETUP ();\n      got_c = (*fudge(mpn_add_1)) (got, data[i].src, data[i].size, data[i].n);\n      VERIFY (\"check_add_1 (function, separate)\");\n\n      SETUP_INPLACE ();\n      got_c = (*fudge(mpn_add_1)) (got, got, data[i].size, data[i].n);\n      VERIFY (\"check_add_1 (function, in-place)\");\n    }\n}\n\nvoid\ncheck_sub_1 (void)\n{\n  static const struct {\n    mp_size_t        size;\n    mp_limb_t        n;\n    const mp_limb_t  src[ASIZE];\n    mp_limb_t        want_c;\n    const mp_limb_t  want[ASIZE];\n  } data[] = {\n    { 1, 0, { 0 },  0, { 0 } },\n    { 1, 0, { 1 },  0, { 1 } },\n    { 1, 1, { 1 },  0, { 0 } },\n    { 1, 0, { M },  0, { M } },\n    { 1, 1, { M },  0, { M-1 } },\n    { 1, 1, { 123 }, 0, { 122 } },\n\n    { 1, 1, { 0 },  1, { M } },\n    { 1, M, { 0 },  1, { 1 } },\n\n    { 2, 0, { 0, 0 },  0, { 0, 0 } },\n    { 2, 0, { 1, 0 },  0, { 1, 0 } },\n    { 2, 1, { 1, 0 },  0, { 0, 0 } },\n    { 2, 0, { M, 0 },  0, { M, 0 } },\n    { 2, 1, { M, 0 },  0, { M-1, 0 } },\n    { 2, 1, { 123, 0 }, 0, { 122, 0 } },\n\n    { 2, 1, { 0, 0 },  1, { M, M } },\n    { 2, M, { 0, 0 },  1, { 1, M } },\n\n    { 3, 0, { 0,   0, 0 },  0, { 0,   0, 0 } },\n    { 3, 0, { 123, 0, 0 },  0, { 123, 0, 0 } },\n\n    { 3, 1, { 0, 0, 0 },  1, { M, M, M } },\n    { 3, M, { 0, 0, 0 },  1, { 1, M, M } },\n\n    { 4, 1, { 0, 0, 0, 0 },  1, { M, M, M, M } },\n    { 4, M, { 0, 0, 0, 0 },  1, { 1, M, M, M } },\n\n    { 4, 1, { 0, 0, 1,   42 },  0, { M, M, 0,   42 } },\n    { 4, M, { 0, 0, 123, 24 },  0, { 1, M, 122, 24 } },\n  };\n\n  mp_limb_t  got[ASIZE];\n  mp_limb_t  got_c;\n  int        i;\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      SETUP ();\n      got_c = mpn_sub_1 (got, data[i].src, data[i].size, data[i].n);\n      VERIFY (\"check_sub_1 (separate)\");\n\n      SETUP_INPLACE ();\n      got_c = mpn_sub_1 (got, got, data[i].size, data[i].n);\n      VERIFY (\"check_sub_1 (in-place)\");\n\n      if (data[i].n == 1)\n        {\n          SETUP ();\n          got_c = mpn_sub_1 (got, data[i].src, data[i].size, CNST_LIMB(1));\n          VERIFY (\"check_sub_1 (separate, const 1)\");\n\n          SETUP_INPLACE ();\n          got_c = mpn_sub_1 (got, got, data[i].size, CNST_LIMB(1));\n          VERIFY (\"check_sub_1 (in-place, const 1)\");\n        }\n\n      /* Same again on functions, not inlines. */\n      SETUP ();\n      got_c = (*fudge(mpn_sub_1)) (got, data[i].src, data[i].size, data[i].n);\n      VERIFY (\"check_sub_1 (function, separate)\");\n\n      SETUP_INPLACE ();\n      got_c = (*fudge(mpn_sub_1)) (got, got, data[i].size, data[i].n);\n      VERIFY (\"check_sub_1 (function, in-place)\");\n    }\n}\n\n/* Try to prevent the optimizer inlining. */\nmpn_aors_1_t\nfudge (mpn_aors_1_t f)\n{\n  return f;\n}\n\nint\nmain (void)\n{\n  tests_start ();\n  mp_trace_base = -16;\n\n  check_add_1 ();\n  check_sub_1 ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-asmtype.c",
    "content": "/* Test .type directives on assembler functions.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#include \"tests.h\"\n\n\n/* This apparently trivial test is designed to detect missing .type and\n   .size directives in asm code, per the problem described under\n   GMP_ASM_TYPE in acinclude.m4.\n\n   A failure can be provoked in a shared or shared+static build by making\n   TYPE and SIZE in config.m4 empty, either by editing it or by configuring\n   with\n\n       ./configure gmp_cv_asm_type= gmp_cv_asm_size=\n\n   mpn_add_n is used for the test because normally it's implemented in\n   assembler on a CPU that has any asm code.\n\n   Enhancement: As noted with GMP_ASM_TYPE, if .type is wrong but .size is\n   right then everything works, but uses code copied down to the mainline\n   data area.  Maybe we could detect that if we built a test library with an\n   object that had .size deliberately disabled.  */\n\nint\nmain (void)\n{\n  static const mp_limb_t x[3]    = { 1, 2, 3 };\n  static const mp_limb_t y[3]    = { 4, 5, 6 };\n  static const mp_limb_t want[3] = { 5, 7, 9 };\n  mp_limb_t  got[3];\n\n  mpn_add_n (got, x, y, (mp_size_t) 3);\n\n  if (refmpn_cmp (got, want, (mp_size_t) 3) != 0)\n    {\n      printf (\"Wrong result from mpn_add_n\\n\");\n      abort ();\n    }\n\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-dc_bdiv_q.c",
    "content": "/* Test mpn_dc_bdiv_q.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009, 2010 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 400\n#define ITERS 10000\n   \n/* Check divide and conquer hensel division routine. */\nvoid\ncheck_dc_bdiv_q (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[3*MAX_LIMBS];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t dip;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 5)) + 6;\n      nn = (random() % MAX_LIMBS) + dn;\n\n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      \n      dp[0] |= 1;\n     \n      MPN_COPY(np2, np, nn);\n      \n      modlimb_invert(dip, dp[0]);\n \n      mpn_dc_bdiv_q(qp, np, nn, dp, dn, dip);\n      \n      if (nn >= dn) mpn_mul(rp, qp, nn, dp, dn);\n      else mpn_mul(rp, dp, dn, qp, nn);\n\n      if (mpn_cmp(rp, np2, nn) != 0)\n      { \n         printf(\"failed: quotient wrong!\\n\");\n         printf (\"nn = %lu, dn = %lu\\n\\n\", nn, dn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, nn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, nn);\n         abort ();\n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_dc_bdiv_q ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-dc_bdiv_q_n.c",
    "content": "/* Test mpn_dc_bdiv_q_n.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009, 2010 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 400\n#define ITERS 10000\n   \n/* Check divide and conquer hensel division routine. */\nvoid\ncheck_dc_bdiv_q_n (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[3*MAX_LIMBS];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t wp[2];\n   mp_limb_t tp[DC_BDIV_Q_N_ITCH(MAX_LIMBS)];\n   mp_limb_t dip, cy, hi, lo;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 5)) + 6;\n      nn = dn;\n\n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      \n      dp[0] |= 1;\n     \n      MPN_COPY(np2, np, nn);\n      \n      modlimb_invert(dip, dp[0]);\n \n      mpn_dc_bdiv_q_n(qp, wp, np, dp, dn, dip, tp);\n      \n      if (nn >= dn) mpn_mul(rp, qp, nn, dp, dn);\n      else mpn_mul(rp, dp, dn, qp, nn);\n\n      if (mpn_cmp(rp, np2, nn) != 0)\n      { \n         printf(\"failed: quotient wrong!\\n\");\n         printf (\"nn = %lu, dn = %lu\\n\\n\", nn, dn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, nn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, nn);\n         abort ();\n      }\n\n      MPN_ZERO(rp, 3*MAX_LIMBS);\n      hi = lo = 0;\n      for (j = 0; j < dn; j++)\n      {\n         cy = mpn_addmul_1(rp + j, qp, nn - j, dp[j]);\n         add_ssaaaa(hi, lo, hi, lo, 0, cy);\n      } \n      if ((hi != wp[1]) || (lo != wp[0]))\n      {\n         printf(\"failed: wp wrong!\\n\");\n         printf(\"wp = %lx %lx, wp2 = %lx %lx\\n\", wp[1], wp[0], hi, lo);\n         abort();\n      } \n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_dc_bdiv_q_n ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-dc_bdiv_qr.c",
    "content": "/* Test mpn_dc_bdiv_qr.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009, 2010 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 400\n#define ITERS 10000\n   \n/* Check divide and conquer hensel division routine. */\nvoid\ncheck_dc_bdiv_qr (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS];\n   mp_limb_t dp[2*MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t dip, cy1, cy2;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 2)) + 3;\n      nn = (random() % (MAX_LIMBS - 1)) + dn + 1;\n      qn = nn - dn;\n\n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      \n      dp[0] |= 1;\n     \n      MPN_COPY(np2, np, nn);\n      \n      modlimb_invert(dip, dp[0]);\n      \n      cy1 = mpn_dc_bdiv_qr(qp, np, nn, dp, dn, dip);\n      \n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n      } else\n         MPN_ZERO(rp, nn);\n      \n      cy2 = mpn_sub_n(rp, np2, rp, nn);\n      \n      if (mpn_cmp(rp + qn, np + qn, dn) != 0)\n      { \n         printf(\"failed: quotient wrong!\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu\\n\\n\", nn, dn, qn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, qn);\n         abort ();\n      }\n\n      if (cy1 != cy2)\n      {\n         printf(\"failed: carry wrong!\\n\");\n         printf(\"cy1 = %lx, cy2 = %lx\\n\", cy1, cy2);\n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_dc_bdiv_qr ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-dc_bdiv_qr_n.c",
    "content": "/* Test mpn_dc_bdiv_qr_n.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009, 2010 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 400\n#define ITERS 10000\n   \n/* Check divide and conquer hensel division routine. */\nvoid\ncheck_dc_bdiv_qr_n (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS];\n   mp_limb_t dp[2*MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t tp[DC_BDIV_QR_N_ITCH(MAX_LIMBS)];\n   mp_limb_t dip, cy1, cy2;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 2)) + 3;\n      nn = 2*dn;\n      qn = dn;\n\n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      \n      dp[0] |= 1;\n     \n      MPN_COPY(np2, np, nn);\n      \n      modlimb_invert(dip, dp[0]);\n      \n      cy1 = mpn_dc_bdiv_qr_n(qp, np, dp, dn, dip, tp);\n      \n      if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n      else mpn_mul(rp, dp, dn, qp, qn);\n      \n      cy2 = mpn_sub_n(rp, np2, rp, nn);\n      \n      if (mpn_cmp(rp + qn, np + qn, dn) != 0)\n      { \n         printf(\"failed: quotient wrong!\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu\\n\\n\", nn, dn, qn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, qn);\n         abort ();\n      }\n\n      if (cy1 != cy2)\n      {\n         printf(\"failed: carry wrong!\\n\");\n         printf(\"cy1 = %lx, cy2 = %lx\\n\", cy1, cy2);\n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_dc_bdiv_qr_n ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-dc_div_q.c",
    "content": "/* Test mpn_dc_div_q.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 400\n#define ITERS 10000\n   \n/* Check divide and conquer division routine. */\nvoid\ncheck_dc_div_q (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS+1];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t dip, cy;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 5)) + 6;\n      nn = (random() % (MAX_LIMBS-3)) + dn + 3;\n      \n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      dp[dn-1] |= GMP_LIMB_HIGHBIT;\n\n      MPN_COPY(np2, np, nn);\n      \n      mpir_invert_pi1(dip, dp[dn - 1], dp[dn - 2]);\n      \n      qn = nn - dn + 1;\n         \n      qp[qn - 1] = mpn_dc_div_q(qp, np, nn, dp, dn, dip);\n\n      MPN_NORMALIZE(qp, qn);\n\n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n      \n         rn = dn + qn;\n         MPN_NORMALIZE(rp, rn);\n         \n         if (rn > nn)\n         {\n            printf(\"failed: q*d has too many limbs\\n\");\n            abort();\n         }\n         \n         if (mpn_cmp(rp, np2, nn) > 0)\n         {\n            printf(\"failed: remainder negative\\n\");\n            abort();\n         }\n         \n         mpn_sub(rp, np2, nn, rp, rn);\n         rn = nn;\n         MPN_NORMALIZE(rp, rn);\n      } else\n      {\n         rn = nn;\n         MPN_COPY(rp, np, nn);\n      }\n      \n      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);\n      if (s >= 0)\n      {\n         printf (\"failed:\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu, rn = %lu\\n\\n\", nn, dn, qn, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_dc_div_q ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-dc_div_qr.c",
    "content": "/* Test mpn_dc_div_qr.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 400\n#define ITERS 10000\n   \n/* Check divide and conquer division routine. */\nvoid\ncheck_dc_div_qr (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS+1];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t dip, cy;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 5)) + 6;\n      nn = (random() % (MAX_LIMBS - 3)) + dn + 3;\n      \n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      dp[dn-1] |= GMP_LIMB_HIGHBIT;\n\n      MPN_COPY(np2, np, nn);\n      \n      mpir_invert_pi1(dip, dp[dn - 1], dp[dn - 2]);\n      \n      qn = nn - dn + 1;\n         \n      qp[qn - 1] = mpn_dc_div_qr(qp, np, nn, dp, dn, dip);\n\n      MPN_NORMALIZE(qp, qn);\n\n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n      \n         rn = dn + qn;\n         MPN_NORMALIZE(rp, rn);\n         \n         if (rn > nn)\n         {\n            printf(\"failed: q*d has too many limbs\\n\");\n            abort();\n         }\n         \n         if (mpn_cmp(rp, np2, nn) > 0)\n         {\n            printf(\"failed: remainder negative\\n\");\n            abort();\n         }\n         \n         mpn_sub(rp, np2, nn, rp, rn);\n         rn = nn;\n         MPN_NORMALIZE(rp, rn);\n      } else\n      {\n         rn = nn;\n         MPN_COPY(rp, np, nn);\n      }\n      \n      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);\n      if (s >= 0)\n      {\n         printf (\"failed:\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu, rn = %lu\\n\\n\", nn, dn, qn, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n\n      if (mpn_cmp(rp, np, rn) != 0)\n      {\n         printf(\"failed: remainder does not match\\n\");\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);        \n         gmp_printf (\" rp2: %Nx\\n\\n\", np, rn);        \n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_dc_div_qr ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-dc_div_qr_n.c",
    "content": "/* Test mpn_dc_div_qr_n.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 400\n#define ITERS 10000\n   \n/* Check divide and conquer division routine. */\nvoid\ncheck_dc_div_qr_n (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS+1];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t tp[DC_DIVAPPR_Q_N_ITCH(MAX_LIMBS)];\n   mp_limb_t dip, cy;\n\n   mp_size_t rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 5)) + 6;\n      \n      mpn_rrandom (np, rands, 2*dn);\n      mpn_rrandom (dp, rands, dn);\n      dp[dn-1] |= GMP_LIMB_HIGHBIT;\n\n      MPN_COPY(np2, np, 2*dn);\n      \n      mpir_invert_pi1(dip, dp[dn - 1], dp[dn - 2]);\n      \n      qn = dn + 1;\n         \n      qp[qn - 1] = mpn_dc_div_qr_n(qp, np, dp, dn, dip, tp);\n\n      MPN_NORMALIZE(qp, qn);\n\n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n      \n         rn = dn + qn;\n         MPN_NORMALIZE(rp, rn);\n         \n         if (rn > 2*dn)\n         {\n            printf(\"failed: q*d has too many limbs\\n\");\n            abort();\n         }\n         \n         if (mpn_cmp(rp, np2, 2*dn) > 0)\n         {\n            printf(\"failed: remainder negative\\n\");\n            abort();\n         }\n         \n         mpn_sub(rp, np2, 2*dn, rp, rn);\n         rn = 2*dn;\n         MPN_NORMALIZE(rp, rn);\n      } else\n      {\n         rn = 2*dn;\n         MPN_COPY(rp, np, 2*dn);\n      }\n      \n      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);\n      if (s >= 0)\n      {\n         printf (\"failed:\\n\");\n         printf (\"dn = %lu, qn = %lu, rn = %lu\\n\\n\", dn, qn, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, 2*dn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n\n      if (mpn_cmp(rp, np, rn) != 0)\n      {\n         printf(\"failed: remainder does not match\\n\");\n         gmp_printf (\" np: %Nx\\n\\n\", np2, 2*dn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);        \n         gmp_printf (\" rp2: %Nx\\n\\n\", np, rn);        \n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_dc_div_qr_n ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-dc_divappr_q.c",
    "content": "/* Test mpn_dc_divappr_q.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 400\n#define ITERS 10000\n   \n/* Check divide and conquer division routine. */\nvoid\ncheck_dc_divappr_q (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t dip;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 5)) + 6;\n      nn = (random() % (MAX_LIMBS - 3)) + dn + 3;\n      \n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      dp[dn-1] |= GMP_LIMB_HIGHBIT;\n\n      MPN_COPY(np2, np, nn);\n      \n      mpir_invert_pi1(dip, dp[dn - 1], dp[dn - 2]);\n      \n      qn = nn - dn + 1;\n         \n      qp[qn - 1] = mpn_dc_divappr_q(qp, np, nn, dp, dn, dip);\n\n      MPN_NORMALIZE(qp, qn);\n\n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n\n         rn = dn + qn;\n         MPN_NORMALIZE(rp, rn);\n\n         s = (rn < nn) ? -1 : (rn > nn) ? 1 : mpn_cmp(rp, np2, nn);\n         if (s <= 0) \n         {\n            mpn_sub(rp, np2, nn, rp, rn);\n            rn = nn;\n            MPN_NORMALIZE(rp, rn);\n         } else \n         {\n            mpn_sub(rp, rp, rn, np2, nn);\n            MPN_NORMALIZE(rp, rn);\n         }\n      } else\n      {\n         rn = nn;\n         MPN_COPY(rp, np, nn);\n      }\n      \n      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);\n      if (s >= 0)\n      {\n         printf (\"failed:\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu, rn = %lu\\n\\n\", nn, dn, qn, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_dc_divappr_q ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-divebyff.c",
    "content": "/* Test mpn_divexact_byff\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#define DISP(xp,xn)  do{int __t;printf(\"%s=\",#xp);for(__t=(xn)-1;__t>=0;__t--)printf(\"%lX \",(xp)[__t]);printf(\"\\n\");}while(0)\n\nint\nmain (void)\n{\n  unsigned long n, c;\n  gmp_randstate_t rands;\n  mp_limb_t xp[10000], qp[10000], tp[10000], r1, r2;\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  /*\n     where (xp,n) = (qp,n)*(B-1) -ret*B^n and 0 <= ret < B-1      \n     B=2^GMP_NUMB_BITS. This assumes we use a divexact algorithm, \n     a bi-directional algorithm would give different results.\n  */\n  for (n = 1; n < 100; n++)\n    {\n      for (c = 0; c < 10; c++)\n\t{\n\t  mpn_randomb (xp, rands, n);\n\t  r1 = mpn_divexact_byff (qp, xp, n);\n\t  r2 = mpn_mul_1 (tp, qp, n, GMP_NUMB_MAX);\n\t  if (r1 != r2)\n\t    {\n\t      printf (\"mpn_divexact_byff ret error\\n\");\n\t      abort ();\n\t    }\n\t  if (mpn_cmp (xp, tp, n) != 0)\n\t    {\n\t      printf (\"mpn_divexact_byff error\\n\");\n\t      abort ();\n\t    }\n\t}\n    }\n\n  for (n = 2; n < 100; n++)\n    {\n      for (c = 0; c < 10; c++)\n\t{\n\t  mpn_randomb (xp, rands, n);\n\t  xp[n] = mpn_mul_1 (xp, xp, n - 1, GMP_NUMB_MAX);\n\t  r1 = mpn_divexact_byff (qp, xp, n);\n\t  r2 = mpn_mul_1 (tp, qp, n, GMP_NUMB_MAX);\n\t  if (r1 != r2)\n\t    {\n\t      printf (\"mpn_divexact_byff ret error\\n\");\n\t      abort ();\n\t    }\n\t  if (mpn_cmp (xp, tp, n) != 0)\n\t    {\n\t      printf (\"mpn_divexact_byff error\\n\");\n\t      abort ();\n\t    }\n\t}\n    }\n\n  for (n = 0; n < 100; n++)\n    {\n      umul_ppmm (xp[1], xp[0], GMP_NUMB_MAX, n);\n      r1 = mpn_divexact_byff (qp, xp, 2);\n      r2 = mpn_mul_1 (tp, qp, 2, GMP_NUMB_MAX);\n      if (r1 != r2)\n\t{\n\t  printf (\"mpn_divexact_byff ret error\\n\");\n\t  abort ();\n\t}\n      if (mpn_cmp (xp, tp, 2) != 0)\n\t{\n\t  printf (\"mpn_divexact_byff error\\n\");\n\t  abort ();\n\t}\n    }\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-divebyfobm1.c",
    "content": "/* Test mpn_divexact_fobm1\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#define DISP(xp,xn)  do{int __t;printf(\"%s=\",#xp);for(__t=(xn)-1;__t>=0;__t--)printf(\"%lX \",(xp)[__t]);printf(\"\\n\");}while(0)\n\nint\nmain (void)\n{\n  unsigned long n, c,j;\n  gmp_randstate_t rands;\n  mp_limb_t xp[10000], qp[10000], tp[10000], r1, r2, i, f;\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  /* \n     where (xp,n) = (qp,n)*f -ret*B^n    and 0 <= ret < f      \n     B=2^GMP_NUMB_BITS  where f divides B-1. This assumes \n     we use a divexact algorithm, a bi-directional algorithm \n     would give different results and so would a diveby3 type \n     div without the correction\n  */\n\n  for (i = 1; i < 10000; i += 2)\n    {\n      if (GMP_NUMB_MAX % i != 0)\n\tcontinue;\n      f = i;\n      for (j = 0; j < 2; j++)\n\t{\n\t  f = GMP_NUMB_MAX / f;\n\t  for (n = 1; n < 100; n++)\n\t    {\n\t      for (c = 0; c < 10; c++)\n\t\t{\n\t\t  mpn_randomb (xp, rands,n);\n\t\t  r1 = mpn_divexact_byfobm1 (qp, xp, n, f, GMP_NUMB_MAX / f);\n\t\t  r2 = mpn_mul_1 (tp, qp, n, f);\n\t\t  if (r1 != r2)\n\t\t    {\n\t\t      printf (\"mpn_divexact_byfobm1 ret error\\n\");\n\t\t      abort ();\n\t\t    }\n\t\t  if (mpn_cmp (xp, tp, n) != 0)\n\t\t    {\n\t\t      printf (\"mpn_divexact_byfobm1 error\\n\");\n\t\t      abort ();\n\t\t    }\n\t\t}\n\t    }\n\n\t  for (n = 2; n < 100; n++)\n\t    {\n\t      for (c = 0; c < 10; c++)\n\t\t{\n\t\t  mpn_randomb (xp, rands,n);\n\t\t  xp[n] = mpn_mul_1 (xp, xp, n - 1, f);\n\t\t  r1 = mpn_divexact_byfobm1 (qp, xp, n, f, GMP_NUMB_MAX / f);\n\t\t  r2 = mpn_mul_1 (tp, qp, n, f);\n\t\t  if (r1 != r2)\n\t\t    {\n\t\t      printf (\"mpn_divexact_byfobm1 ret error\\n\");\n\t\t      abort ();\n\t\t    }\n\t\t  if (mpn_cmp (xp, tp, n) != 0)\n\t\t    {\n\t\t      printf (\"mpn_divexact_byfobm1 error\\n\");\n\t\t      abort ();\n\t\t    }\n\t\t}\n\t    }\n\n\t  for (n = 0; n < 100; n++)\n\t    {\n\t      umul_ppmm (xp[1], xp[0], f, n);\n\t      r1 = mpn_divexact_byfobm1 (qp, xp, 2, f, GMP_NUMB_MAX / f);\n\t      r2 = mpn_mul_1 (tp, qp, 2, f);\n\t      if (r1 != r2)\n\t\t{\n\t\t  printf (\"mpn_divexact_byfobm1 ret error\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (xp, tp, 2) != 0)\n\t\t{\n\t\t  printf (\"mpn_divexact_byfobm1 error\\n\");\n\t\t  abort ();\n\t\t}\n\t    }\n\n\t}\n    }\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-divrem_1.c",
    "content": "/* Test mpn_divrem_1 and mpn_preinv_divrem_1.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    mp_limb_t  n[1];\n    mp_size_t  nsize;\n    mp_limb_t  d;\n    mp_size_t  qxn;\n    mp_limb_t  want_q[5];\n    mp_limb_t  want_r;\n  } data[] = {\n    { { 0 }, 1, 1, 0,\n      { 0 }, 0},\n\n    { { 5 }, 1, 2, 0,\n      { 2 }, 1},\n\n#if GMP_NUMB_BITS == 32\n    { { 0x3C }, 1, 0xF2, 1,\n      { 0x3F789854, 0 }, 0x98 },\n#endif\n\n#if GMP_NUMB_BITS == 64\n    { { 0x3C }, 1, 0xF2, 1,\n      { CNST_LIMB(0x3F789854A0CB1B81), 0 }, 0x0E },\n\n    /* This case exposed some wrong code generated by SGI cc on mips64 irix\n       6.5 with -n32 -O2, in the fractional loop for normalized divisor\n       using udiv_qrnnd_preinv.  A test \"x>al\" in one of the sub_ddmmss\n       expansions came out wrong, leading to an incorrect quotient.  */\n    { { CNST_LIMB(0x3C00000000000000) }, 1, CNST_LIMB(0xF200000000000000), 1,\n      { CNST_LIMB(0x3F789854A0CB1B81), 0 }, CNST_LIMB(0x0E00000000000000) },\n#endif\n  };\n\n  mp_limb_t  dinv, got_r, got_q[numberof(data[0].want_q)];\n  mp_size_t  qsize;\n  int        i, shift;\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      qsize = data[i].nsize + data[i].qxn;\n      ASSERT_ALWAYS (qsize <= numberof (got_q));\n\n      got_r = mpn_divrem_1 (got_q, data[i].qxn, data[i].n, data[i].nsize,\n                            data[i].d);\n      if (got_r != data[i].want_r\n          || refmpn_cmp (got_q, data[i].want_q, qsize) != 0)\n        {\n          printf        (\"mpn_divrem_1 wrong at data[%d]\\n\", i);\n        bad:\n          mpn_trace     (\"  n\", data[i].n, data[i].nsize);\n          printf        (\"  nsize=%ld\\n\", (long) data[i].nsize);\n          mp_limb_trace (\"  d\", data[i].d);\n          printf        (\"  qxn=%ld\\n\", (long) data[i].qxn);\n          mpn_trace     (\"  want q\", data[i].want_q, qsize);\n          mpn_trace     (\"  got  q\", got_q, qsize);\n          mp_limb_trace (\"  want r\", data[i].want_r);\n          mp_limb_trace (\"  got  r\", got_r);\n          abort ();\n        }\n\n      /* test if available */\n#if USE_PREINV_DIVREM_1 || HAVE_NATIVE_mpn_preinv_divrem_1\n      shift = refmpn_count_leading_zeros (data[i].d);\n      dinv = refmpn_invert_limb (data[i].d << shift);\n      got_r = mpn_preinv_divrem_1 (got_q, data[i].qxn,\n                                   data[i].n, data[i].nsize,\n                                   data[i].d, dinv, shift);\n      if (got_r != data[i].want_r\n          || refmpn_cmp (got_q, data[i].want_q, qsize) != 0)\n        {\n          printf        (\"mpn_preinv_divrem_1 wrong at data[%d]\\n\", i);\n          printf        (\"  shift=%d\\n\", shift);\n          mp_limb_trace (\"  dinv\", dinv);\n          goto bad;\n        }\n#endif\n    }\n}\n\nint\nmain (void)\n{\n  tests_start ();\n  mp_trace_base = -16;\n\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-fat.c",
    "content": "/* Test fat binary setups.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n\n/* In this program we're aiming to pick up certain subtle problems that\n   might creep into a fat binary.\n\n   1. We want to ensure the application entry point routines like\n      __gmpn_add_n dispatch to the correct field of __gmpn_cpuvec.\n\n      Note that these routines are not exercised as a side effect of other\n      tests (eg. the mpz routines).  Internally the fields of __gmpn_cpuvec\n      are used directly, so we need to write test code explicitly calling\n      the mpn functions, like an application will have.\n\n   2. We want to ensure the initial __gmpn_cpuvec data has the initializer\n      function pointers in the correct fields, and that those initializer\n      functions dispatch to their correct corresponding field once\n      initialization has been done.\n\n      Only one of the initializer routines executes in a normal program,\n      since that routine sets all the pointers to actual mpn functions.  We\n      forcibly reset __gmpn_cpuvec so we can run each.\n\n   In both cases for the above, the data put through the functions is\n   nothing special, just enough to verify that for instance an add_n is\n   really doing an add_n and has not for instance mistakenly gone to sub_n\n   or something.\n\n   The loop around each test will exercise the initializer routine on the\n   first iteration, and the dispatcher routine on the second.\n\n   If the dispatcher and/or initializer routines are generated mechanically\n   via macros (eg. mpn/x86/fat/fat_entry.asm) then there shouldn't be too\n   much risk of them going wrong, provided the structure layout is correctly\n   expressed.  But if they're in C then it's good to guard against typos in\n   what is rather repetitive code.  The initializer data for __gmpn_cpuvec\n   in fat.c is always done by hand and is likewise a bit repetitive.  */\n\n\n/* dummies when not a fat binary */\n#if ! WANT_FAT_BINARY\nstruct cpuvec_t {\n  int  initialized;\n};\nstruct cpuvec_t __gmpn_cpuvec;\n#define ITERATE_FAT_THRESHOLDS()  do { } while (0)\n#endif\n\n/* saved from program startup */\nstruct cpuvec_t  initial_cpuvec;\n\nvoid\ncheck_functions (void)\n{\n  mp_limb_t  wp[2], wp2[2], xp[2], yp[2], r;\n  int  i;\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 123;\n      yp[0] = 456;\n      mpn_add_n (wp, xp, yp, (mp_size_t) 1);\n      ASSERT_ALWAYS (wp[0] == 579);\n    }\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 123;\n      wp[0] = 456;\n      r = mpn_addmul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2));\n      ASSERT_ALWAYS (wp[0] == 702);\n      ASSERT_ALWAYS (r == 0);\n    }\n\n#if HAVE_NATIVE_mpn_copyd\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 123;\n      xp[1] = 456;\n      mpn_copyd (xp+1, xp, (mp_size_t) 1);\n      ASSERT_ALWAYS (xp[1] == 123);\n    }\n#endif\n\n#if HAVE_NATIVE_mpn_copyi\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 123;\n      xp[1] = 456;\n      mpn_copyi (xp, xp+1, (mp_size_t) 1);\n      ASSERT_ALWAYS (xp[0] == 456);\n    }\n#endif\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 1605;\n      mpn_divexact_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(5));\n      ASSERT_ALWAYS (wp[0] == 321);\n    }\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 1296;\n      r = mpn_divexact_by3c (wp, xp, (mp_size_t) 1, CNST_LIMB(0));\n      ASSERT_ALWAYS (wp[0] == 432);\n      ASSERT_ALWAYS (r == 0);\n    }\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 578;\n      r = mpn_divexact_byfobm1 (wp, xp, (mp_size_t) 1, CNST_LIMB(17),CNST_LIMB(-1)/CNST_LIMB(17));\n      ASSERT_ALWAYS (wp[0] == 34);\n      ASSERT_ALWAYS (r == 0);\n    }\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 287;\n      r = mpn_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1, CNST_LIMB(7));\n      ASSERT_ALWAYS (wp[1] == 41);\n      ASSERT_ALWAYS (wp[0] == 0);\n      ASSERT_ALWAYS (r == 0);\n    }\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 290;\n      r = mpn_divrem_euclidean_qr_1 (wp, 0, xp, (mp_size_t) 1, CNST_LIMB(7));\n      ASSERT_ALWAYS (wp[0] == 41);\n      ASSERT_ALWAYS (r == 3);\n    }\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 12;\n      r = mpn_gcd_1 (xp, (mp_size_t) 1, CNST_LIMB(9));\n      ASSERT_ALWAYS (r == 3);\n    }\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 0x1001;\n      mpn_lshift (wp, xp, (mp_size_t) 1, 1);\n      ASSERT_ALWAYS (wp[0] == 0x2002);\n    }\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 14;\n      r = mpn_mod_1 (xp, (mp_size_t) 1, CNST_LIMB(4));\n      ASSERT_ALWAYS (r == 2);\n    }\n\n#if (GMP_NUMB_BITS % 4) == 0\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      int  bits = (GMP_NUMB_BITS / 4) * 3;\n      mp_limb_t  mod = (CNST_LIMB(1) << bits) - 1;\n      mp_limb_t  want = GMP_NUMB_MAX % mod;\n      xp[0] = GMP_NUMB_MAX;\n      r = mpn_mod_34lsub1 (xp, (mp_size_t) 1);\n      ASSERT_ALWAYS (r % mod == want);\n    }\n#endif\n\n  //   DECL_modexact_1c_odd ((*modexact_1c_odd)); \n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 14;\n      r = mpn_mul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(4));\n      ASSERT_ALWAYS (wp[0] == 56);\n      ASSERT_ALWAYS (r == 0);\n    }\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 5;\n      yp[0] = 7;\n      mpn_mul_basecase (wp, xp, (mp_size_t) 1, yp, (mp_size_t) 1);\n      ASSERT_ALWAYS (wp[0] == 35);\n      ASSERT_ALWAYS (wp[1] == 0);\n    }\n\n#if HAVE_NATIVE_mpn_preinv_divrem_1 && GMP_NAIL_BITS == 0\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 0x101;\n      r = mpn_preinv_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1,\n                               GMP_LIMB_HIGHBIT,\n                               refmpn_invert_limb (GMP_LIMB_HIGHBIT), 0);\n      ASSERT_ALWAYS (wp[0] == 0x202);\n      ASSERT_ALWAYS (wp[1] == 0);\n      ASSERT_ALWAYS (r == 0);\n    }\n#endif\n\n#if GMP_NAIL_BITS == 0\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = GMP_LIMB_HIGHBIT+123;\n      r = mpn_preinv_mod_1 (xp, (mp_size_t) 1, GMP_LIMB_HIGHBIT,\n                            refmpn_invert_limb (GMP_LIMB_HIGHBIT));\n      ASSERT_ALWAYS (r == 123);\n    }\n#endif\n\n\n memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n   for (i = 0; i < 2; i++)\n       {\n        xp[0] = 5;\n        modlimb_invert(r,xp[0]);\n        r=-r;\n        yp[0]=43;\n        yp[1]=75;\n        mpn_redc_1 (wp, yp, xp, (mp_size_t) 1,r);\n        ASSERT_ALWAYS (wp[0] == 78);\n       }\n\n memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n   for (i = 0; i < 2; i++)\n       {\n        xp[0]=5;\n        yp[0]=3;\n        mpn_sumdiff_n (wp, wp2,xp, yp,1);\n        ASSERT_ALWAYS (wp[0] == 8);\n        ASSERT_ALWAYS (wp2[0] == 2);\n       }\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 0x8008;\n      mpn_rshift (wp, xp, (mp_size_t) 1, 1);\n      ASSERT_ALWAYS (wp[0] == 0x4004);\n    }\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 5;\n      mpn_sqr_basecase (wp, xp, (mp_size_t) 1);\n      ASSERT_ALWAYS (wp[0] == 25);\n      ASSERT_ALWAYS (wp[1] == 0);\n    }\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 999;\n      yp[0] = 666;\n      mpn_sub_n (wp, xp, yp, (mp_size_t) 1);\n      ASSERT_ALWAYS (wp[0] == 333);\n    }\n\n  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));\n  for (i = 0; i < 2; i++)\n    {\n      xp[0] = 123;\n      wp[0] = 456;\n      r = mpn_submul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2));\n      ASSERT_ALWAYS (wp[0] == 210);\n      ASSERT_ALWAYS (r == 0);\n    }\n}\n\n/* Expect the first use of a each fat threshold to invoke the necessary\n   initialization.  */\nvoid\ncheck_thresholds (void)\n{\n#define ITERATE(name,field)                                             \\\n  do {                                                                  \\\n    memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));   \\\n    ASSERT_ALWAYS (name != 0);                                          \\\n    ASSERT_ALWAYS (name == __gmpn_cpuvec.field);                        \\\n    ASSERT_ALWAYS (__gmpn_cpuvec.initialized);                          \\\n  } while (0)\n\n  ITERATE_FAT_THRESHOLDS ();\n}\n\n\nint\nmain (void)\n{\n  memcpy (&initial_cpuvec, &__gmpn_cpuvec, sizeof (__gmpn_cpuvec));\n\n  tests_start ();\n\n  check_functions ();\n  check_thresholds ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-gcdext.c",
    "content": "/* Test mpn_gcdext.\n\nCopyright 2010 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 2000\n#define ITERS 1000\n\nvoid check_normalisation(const mpz_t G, const mpz_t S, const mpz_t U, const mpz_t V, \n\t\t\t\t\t\t                                   mpz_t T, mpz_t T1, mpz_t T2)\n{\n   /* compute T */\n   mpz_mul(T1, U, S);\n   mpz_sub(T, G, T1);\n   mpz_divexact(T, T, V);\n\n   /* T2 = V/(2G) */\n   mpz_divexact(T2, V, G);\n   mpz_cdiv_q_ui(T2, T2, 2);\n\n   /* T1 = abs(S) */\n   mpz_abs(T1, S);\n      \n   if ((mpz_cmp(T1, T2) >= 0) && (mpz_cmp_ui(S, 1) != 0))\n   {\n\t   printf(\"FAIL\\n\");\n\t   gmp_printf(\"U = %Zd\\n\", U);\n\t   gmp_printf(\"V = %Zd\\n\", V);\n\t   gmp_printf(\"G = %Zd\\n\", G);\n\t   gmp_printf(\"S = %Zd\\n\", S);\n\t   gmp_printf(\"T = %Zd\\n\", T);\n\t   abort();\n   }\n}\n\n/* Check extended gcd routine. */\nvoid check_gcdext(void)\n{\n   mp_limb_t up[MAX_LIMBS + 1];\n   mp_limb_t vp[MAX_LIMBS + 1];\n   mp_limb_t up2[MAX_LIMBS];\n   mp_limb_t vp2[MAX_LIMBS];\n   mp_limb_t sp[MAX_LIMBS + 1];\n   mp_limb_t tp[MAX_LIMBS];\n   mp_limb_t gp[MAX_LIMBS + 1];\n   \n   mp_size_t un, vn, sn, tn, bits, gn;\n   mp_bitcnt_t u_bits, v_bits, g_bits;\n   long i, j;\n   mpz_t U, V, S, T, G, T1, T2;\n\n   gmp_randstate_t rands;\n   gmp_randinit_default(rands);\n\n   mpz_init(T);\n   mpz_init(T1);\n   mpz_init(T2);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      u_bits = (random()%(GMP_LIMB_BITS*MAX_LIMBS)) + 1;\n      v_bits = (random()%u_bits) + 1;\n\t  g_bits = (random()%v_bits) + 1;\n      \n\t  gn = (g_bits + GMP_LIMB_BITS - 1)/GMP_LIMB_BITS;\n\t  sn = (u_bits - g_bits + GMP_LIMB_BITS - 1)/GMP_LIMB_BITS;\n\t  tn = (v_bits - g_bits + GMP_LIMB_BITS - 1)/GMP_LIMB_BITS;\n\n\t  /* generate quasi gcd gp*/\n\t  do{\n\t     mpn_urandomb(gp, rands, g_bits);\n         gn = (g_bits + GMP_LIMB_BITS - 1)/GMP_LIMB_BITS;\n\t     MPN_NORMALIZE(gp, gn);\n\t  } while (gn == 0);\n\n\t  /* \n\t     generate random {sp, sn}\n\t     set {up, un} = {gp, gn}*{sp, sn} \n\t  */\n\t  if (u_bits > g_bits)\n\t  {\n\t\t mpn_urandomb(sp, rands, u_bits - g_bits);\n         up[0] |= (mp_limb_t) 1;\n\n\t\t if (gn >= sn)\n\t\t\tmpn_mul(up, gp, gn, sp, sn);\n\t\t else\n            mpn_mul(up, sp, sn, gp, gn);\n\t\t un = gn + sn;\n\t\t MPN_NORMALIZE(up, un);\n\t  } else\n\t  {\n\t     MPN_COPY(up, gp, gn);\n\t\t un = gn;\n\t\t MPN_NORMALIZE(up, un);\n\t  }\n      \n\t  do\n\t  {\n\t  /* \n\t     generate random odd {tp, tn}\n\t     set {vp, vn} = {gp, gn}*{tp, tn} \n\t  */\n\t  if (v_bits > g_bits)\n\t  {\n\t\t mpn_urandomb(tp, rands, v_bits - g_bits);\n\t\t tp[0] |= (mp_limb_t) 1;\n\n         if (gn >= tn)\n\t\t\tmpn_mul(vp, gp, gn, tp, tn);\n\t\t else\n            mpn_mul(vp, tp, tn, gp, gn);\n\t\t vn = gn + tn;\n\t\t MPN_NORMALIZE(vp, vn);\n\t  } else\n\t  {\n\t     MPN_COPY(vp, gp, gn);\n\t\t vn = gn;\n\t\t MPN_NORMALIZE(vp, vn);\n\t  }\n\t  } while ((un == vn) && (mpn_cmp(up, vp, un) < 0));\n\n\t  /* Save a copy of up and vp */\n\t  MPN_COPY(up2, up, un);\n\t  MPN_COPY(vp2, vp, vn);\n          if(un<vn)continue;\n\t  gn = mpn_gcdext(gp, sp, &sn, up, un, vp, vn);\n      \n\t  U->_mp_d = up2;\n\t  U->_mp_size = un;\n      V->_mp_d = vp2;\n\t  V->_mp_size = vn;\n      S->_mp_d = sp;\n\t  S->_mp_size = sn;\n      G->_mp_d = gp;\n\t  G->_mp_size = gn;\n\n\t  check_normalisation(G, S, U, V, T, T1, T2);\n   }\n\n   mpz_clear(T);\n   mpz_clear(T1);\n   mpz_clear(T2);\n   gmp_randclear(rands);\n}\n\nint main(void)\n{\n  tests_start();\n  check_gcdext();\n  tests_end();\n\n  return 0;\n}\n"
  },
  {
    "path": "tests/mpn/t-get_d.c",
    "content": "/* Test mpn_get_d.\n\nCopyright 2002, 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/* Note that we don't use <limits.h> for LONG_MIN, but instead our own\n   definition in gmp-impl.h.  In gcc 2.95.4 (debian 3.0) under\n   -mcpu=ultrasparc, limits.h sees __sparc_v9__ defined and assumes that\n   means long is 64-bit long, but it's only 32-bits, causing fatal compile\n   errors.  */\n\n#include <setjmp.h>\n#include <signal.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* Exercise various 2^n values, with various exponents and positive and\n   negative.  */\nvoid\ncheck_onebit (void)\n{\n  static const int bit_table[] = {\n    0, 1, 2, 3,\n    GMP_NUMB_BITS - 2, GMP_NUMB_BITS - 1,\n    GMP_NUMB_BITS,\n    GMP_NUMB_BITS + 1, GMP_NUMB_BITS + 2,\n    2 * GMP_NUMB_BITS - 2, 2 * GMP_NUMB_BITS - 1,\n    2 * GMP_NUMB_BITS,\n    2 * GMP_NUMB_BITS + 1, 2 * GMP_NUMB_BITS + 2,\n    3 * GMP_NUMB_BITS - 2, 3 * GMP_NUMB_BITS - 1,\n    3 * GMP_NUMB_BITS,\n    3 * GMP_NUMB_BITS + 1, 3 * GMP_NUMB_BITS + 2,\n    4 * GMP_NUMB_BITS - 2, 4 * GMP_NUMB_BITS - 1,\n    4 * GMP_NUMB_BITS,\n    4 * GMP_NUMB_BITS + 1, 4 * GMP_NUMB_BITS + 2,\n    5 * GMP_NUMB_BITS - 2, 5 * GMP_NUMB_BITS - 1,\n    5 * GMP_NUMB_BITS,\n    5 * GMP_NUMB_BITS + 1, 5 * GMP_NUMB_BITS + 2,\n    6 * GMP_NUMB_BITS - 2, 6 * GMP_NUMB_BITS - 1,\n    6 * GMP_NUMB_BITS,\n    6 * GMP_NUMB_BITS + 1, 6 * GMP_NUMB_BITS + 2,\n  };\n  static const int exp_table[] = {\n    0, -100, -10, -1, 1, 10, 100,\n  };\n\n  /* FIXME: It'd be better to base this on the float format. */\n  int     limit = 511;\n\n  int        bit_i, exp_i, i;\n  double     got, want;\n  mp_size_t  nsize, sign;\n  long       bit, exp, want_bit;\n  mp_limb_t  np[20];\n\n  for (bit_i = 0; bit_i < numberof (bit_table); bit_i++)\n    {\n      bit = bit_table[bit_i];\n\n      nsize = BITS_TO_LIMBS (bit+1);\n      refmpn_zero (np, nsize);\n      np[bit/GMP_NUMB_BITS] = CNST_LIMB(1) << (bit % GMP_NUMB_BITS);\n\n      for (exp_i = 0; exp_i < numberof (exp_table); exp_i++)\n        {\n          exp = exp_table[exp_i];\n\n          want_bit = bit + exp;\n          if (want_bit > limit || want_bit < -limit)\n            continue;\n\n          want = 1.0;\n          for (i = 0; i < want_bit; i++)\n            want *= 2.0;\n          for (i = 0; i > want_bit; i--)\n            want *= 0.5;\n\n          for (sign = 0; sign >= -1; sign--, want = -want)\n            {\n              got = mpn_get_d (np, nsize, sign, exp);\n              if (got != want)\n                {\n                  printf    (\"mpn_get_d wrong on 2^n\\n\");\n                  printf    (\"   bit      %ld\\n\", bit);\n                  printf    (\"   exp      %ld\\n\", exp);\n                  printf    (\"   want_bit %ld\\n\", want_bit);\n                  printf    (\"   sign     %ld\\n\", (long) sign);\n                  mpn_trace (\"   n        \", np, nsize);\n                  printf    (\"   nsize    %ld\\n\", (long) nsize);\n                  d_trace   (\"   want     \", want);\n                  d_trace   (\"   got      \", got);\n                  abort();\n                }\n            }\n        }\n    }\n}\n\n\n/* Exercise values 2^n+1, while such a value fits the mantissa of a double. */\nvoid\ncheck_twobit (void)\n{\n  int        i, mant_bits;\n  double     got, want;\n  mp_size_t  nsize, sign;\n  mp_ptr     np;\n\n  mant_bits = tests_dbl_mant_bits ();\n  if (mant_bits == 0)\n    return;\n\n  np = refmpn_malloc_limbs (BITS_TO_LIMBS (mant_bits));\n  want = 3.0;\n  for (i = 1; i < mant_bits; i++)\n    {\n      nsize = BITS_TO_LIMBS (i+1);\n      refmpn_zero (np, nsize);\n      np[i/GMP_NUMB_BITS] = CNST_LIMB(1) << (i % GMP_NUMB_BITS);\n      np[0] |= 1;\n\n      for (sign = 0; sign >= -1; sign--)\n        {\n          got = mpn_get_d (np, nsize, sign, 0);\n          if (got != want)\n            {\n              printf    (\"mpn_get_d wrong on 2^%d + 1\\n\", i);\n              printf    (\"   sign     %ld\\n\", (long) sign);\n              mpn_trace (\"   n        \", np, nsize);\n              printf    (\"   nsize    %ld\\n\", (long) nsize);\n              d_trace   (\"   want     \", want);\n              d_trace   (\"   got      \", got);\n              abort();\n            }\n          want = -want;\n        }\n\n      want = 2.0 * want - 1.0;\n    }\n\n  free (np);\n}\n\n\n/* Expect large negative exponents to underflow to 0.0.\n   Some systems might have hardware traps for such an underflow (though\n   usually it's not the default), so watch out for SIGFPE. */\nvoid\ncheck_underflow (void)\n{\n  static const long exp_table[] = {\n    -999999L, LONG_MIN,\n  };\n  static const mp_limb_t  np[1] = { 1 };\n\n  static long exp;\n  mp_size_t  nsize, sign;\n  double     got;\n  int        exp_i;\n\n  nsize = numberof (np);\n\n  if (tests_setjmp_sigfpe() == 0)\n    {\n      for (exp_i = 0; exp_i < numberof (exp_table); exp_i++)\n        {\n          exp = exp_table[exp_i];\n\n          for (sign = 0; sign >= -1; sign--)\n            {\n              got = mpn_get_d (np, nsize, sign, exp);\n              if (got != 0.0)\n                {\n                  printf  (\"mpn_get_d wrong, didn't get 0.0 on underflow\\n\");\n                  printf  (\"  nsize    %ld\\n\", (long) nsize);\n                  printf  (\"  exp      %ld\\n\", exp);\n                  printf  (\"  sign     %ld\\n\", (long) sign);\n                  d_trace (\"  got      \", got);\n                  abort ();\n                }\n            }\n        }\n    }\n  else\n    {\n      printf (\"Warning, underflow to zero tests skipped due to SIGFPE (exp=%ld)\\n\", exp);\n    }\n  tests_sigfpe_done ();\n}\n\n\n/* Expect large values to result in +/-inf, on IEEE systems. */\nvoid\ncheck_inf (void)\n{\n  static const long exp_table[] = {\n    999999L, LONG_MAX,\n  };\n  static const mp_limb_t  np[4] = { 1, 1, 1, 1 };\n  long       exp;\n  mp_size_t  nsize, sign, got_sign;\n  double     got;\n  int        exp_i;\n\n  for (nsize = 1; nsize <= numberof (np); nsize++)\n    {\n      for (exp_i = 0; exp_i < numberof (exp_table); exp_i++)\n        {\n          exp = exp_table[exp_i];\n\n          for (sign = 0; sign >= -1; sign--)\n            {\n              got = mpn_get_d (np, nsize, sign, exp);\n              got_sign = (got >= 0 ? 0 : -1);\n              if (! tests_isinf (got))\n                {\n                  printf  (\"mpn_get_d wrong, didn't get infinity\\n\");\n                bad:\n                  printf  (\"  nsize    %ld\\n\", (long) nsize);\n                  printf  (\"  exp      %ld\\n\", exp);\n                  printf  (\"  sign     %ld\\n\", (long) sign);\n                  d_trace (\"  got      \", got);\n                  printf  (\"  got sign %ld\\n\", (long) got_sign);\n                  abort ();\n                }\n              if (got_sign != sign)\n                {\n                  printf  (\"mpn_get_d wrong sign on infinity\\n\");\n                  goto bad;\n                }\n            }\n        }\n    }\n}\n\n/* Check values 2^n approaching and into IEEE denorm range.\n   Some systems might not support denorms, or might have traps setup, so\n   watch out for SIGFPE.  */\nvoid\ncheck_ieee_denorm (void)\n{\n  static long exp;\n  mp_limb_t  n = 1;\n  long       i;\n  mp_size_t  sign;\n  double     want, got;\n\n  if (tests_setjmp_sigfpe() == 0)\n    {\n      exp = -1020;\n      want = 1.0;\n      for (i = 0; i > exp; i--)\n        want *= 0.5;\n\n      for ( ; exp > -1500 && want != 0.0; exp--)\n        {\n          for (sign = 0; sign >= -1; sign--)\n            {\n              got = mpn_get_d (&n, (mp_size_t) 1, sign, exp);\n              if (got != want)\n                {\n                  printf  (\"mpn_get_d wrong on denorm\\n\");\n                  printf  (\"  n=1\\n\");\n                  printf  (\"  exp   %ld\\n\", exp);\n                  printf  (\"  sign  %ld\\n\", (long) sign);\n                  d_trace (\"  got   \", got);\n                  d_trace (\"  want  \", want);\n                  abort ();\n                }\n              want = -want;\n            }\n          want *= 0.5;\n        }\n    }\n  else\n    {\n      printf (\"Warning, IEEE denorm tests skipped due to SIGFPE (exp=%ld)\\n\", exp);\n    }\n  tests_sigfpe_done ();\n}\n\n\n/* Check values 2^n approaching exponent overflow.\n   Some systems might trap on overflow, so watch out for SIGFPE.  */\nvoid\ncheck_ieee_overflow (void)\n{\n  static long exp;\n  mp_limb_t  n = 1;\n  long       i;\n  mp_size_t  sign;\n  double     want, got;\n\n  if (tests_setjmp_sigfpe() == 0)\n    {\n      exp = 1010;\n      want = 1.0;\n      for (i = 0; i < exp; i++)\n        want *= 2.0;\n\n      for ( ; exp < 1050; exp++)\n        {\n          for (sign = 0; sign >= -1; sign--)\n            {\n              got = mpn_get_d (&n, (mp_size_t) 1, sign, exp);\n              if (got != want)\n                {\n                  printf  (\"mpn_get_d wrong on overflow\\n\");\n                  printf  (\"  n=1\\n\");\n                  printf  (\"  exp   %ld\\n\", exp);\n                  printf  (\"  sign  %ld\\n\", (long) sign);\n                  d_trace (\"  got   \", got);\n                  d_trace (\"  want  \", want);\n                  abort ();\n                }\n              want = -want;\n            }\n          want *= 2.0;\n        }\n    }\n  else\n    {\n      printf (\"Warning, IEEE overflow tests skipped due to SIGFPE (exp=%ld)\\n\", exp);\n    }\n  tests_sigfpe_done ();\n}\n\n\n/* ARM gcc 2.95.4 was seen generating bad code for ulong->double\n   conversions, resulting in for instance 0x81c25113 incorrectly converted.\n   This test exercises that value, to see mpn_get_d has avoided the\n   problem.  */\nvoid\ncheck_0x81c25113 (void)\n{\n#if GMP_NUMB_BITS >= 32\n  double     want = 2176995603.0;\n  double     got;\n  mp_limb_t  np[4];\n  mp_size_t  nsize;\n  long       exp;\n\n  if (tests_dbl_mant_bits() < 32)\n    return;\n\n  for (nsize = 1; nsize <= numberof (np); nsize++)\n    {\n      refmpn_zero (np, nsize-1);\n      np[nsize-1] = CNST_LIMB(0x81c25113);\n      exp = - (nsize-1) * GMP_NUMB_BITS;\n      got = mpn_get_d (np, nsize, (mp_size_t) 0, exp);\n      if (got != want)\n        {\n          printf  (\"mpn_get_d wrong on 2176995603 (0x81c25113)\\n\");\n          printf  (\"  nsize  %ld\\n\", (long) nsize);\n          printf  (\"  exp    %ld\\n\", exp);\n          d_trace (\"  got    \", got);\n          d_trace (\"  want   \", want);\n          abort ();\n        }\n    }\n#endif\n}\n\n\nvoid\ncheck_rand (void)\n{\n  gmp_randstate_t rands;\n  int            rep, i;\n  unsigned long  mant_bits;\n  long           exp, exp_min, exp_max;\n  double         got, want, d;\n  mp_size_t      nalloc, nsize, sign;\n  mp_limb_t      nhigh_mask;\n  mp_ptr         np;\n  \n  gmp_randinit_default(rands);\n  \n  mant_bits = tests_dbl_mant_bits ();\n  if (mant_bits == 0)\n    return;\n\n  /* Allow for vax D format with exponent 127 to -128 only.\n     FIXME: Do something to probe for a valid exponent range.  */\n  exp_min = -100 - mant_bits;\n  exp_max =  100 - mant_bits;\n\n  /* space for mant_bits */\n  nalloc = BITS_TO_LIMBS (mant_bits);\n  np = refmpn_malloc_limbs (nalloc);\n  nhigh_mask = MP_LIMB_T_MAX\n    >> (GMP_NAIL_BITS + nalloc * GMP_NUMB_BITS - mant_bits);\n\n  for (rep = 0; rep < 200; rep++)\n    {\n      /* random exp_min to exp_max, inclusive */\n      exp = exp_min + (long) gmp_urandomm_ui (rands, exp_max - exp_min + 1);\n\n      /* mant_bits worth of random at np */\n      if (rep & 1)\n        mpn_randomb (np, rands, nalloc);\n      else\n        mpn_rrandom (np, rands,nalloc);\n      nsize = nalloc;\n      np[nsize-1] &= nhigh_mask;\n      MPN_NORMALIZE (np, nsize);\n      if (nsize == 0)\n        continue;\n\n      sign = (mp_size_t) gmp_urandomb_ui (rands, 1L) - 1;\n\n      /* want = {np,nsize}, converting one bit at a time */\n      want = 0.0;\n      for (i = 0, d = 1.0; i < mant_bits; i++, d *= 2.0)\n        if (np[i/GMP_NUMB_BITS] & (CNST_LIMB(1) << (i%GMP_NUMB_BITS)))\n          want += d;\n      if (sign < 0)\n        want = -want;\n\n      /* want = want * 2^exp */\n      for (i = 0; i < exp; i++)\n        want *= 2.0;\n      for (i = 0; i > exp; i--)\n        want *= 0.5;\n\n      got = mpn_get_d (np, nsize, sign, exp);\n\n      if (got != want)\n        {\n          printf    (\"mpn_get_d wrong on random data\\n\");\n          printf    (\"   sign     %ld\\n\", (long) sign);\n          mpn_trace (\"   n        \", np, nsize);\n          printf    (\"   nsize    %ld\\n\", (long) nsize);\n          printf    (\"   exp      %ld\\n\", exp);\n          d_trace   (\"   want     \", want);\n          d_trace   (\"   got      \", got);\n          abort();\n        }\n    }\n\n  free (np);\n  gmp_randclear(rands);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n  mp_trace_base = -16;\n\n  check_onebit ();\n  check_twobit ();\n  check_inf ();\n  check_underflow ();\n  check_ieee_denorm ();\n  check_ieee_overflow ();\n  check_0x81c25113 ();\n  check_rand ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-hgcd.c",
    "content": "/* Test mpz_gcd, mpz_gcdext, and mpz_gcd_ui.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004 Free\nSoftware Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nstatic mp_size_t one_test(mpz_t, mpz_t, int);\nstatic void debug_mp(mpz_t, int);\n\n#define MIN_OPERAND_SIZE 2\n\n/* Fixed values, for regression testing of mpn_hgcd. */\nstruct value { int res; const char *a; const char *b; };\nstatic const struct value hgcd_values[] = {\n#if GMP_NUMB_BITS == 32\n  { 5,\n    \"0x1bddff867272a9296ac493c251d7f46f09a5591fe\",\n    \"0xb55930a2a68a916450a7de006031068c5ddb0e5c\" },\n  { 4,\n    \"0x2f0ece5b1ee9c15e132a01d55768dc13\",\n    \"0x1c6f4fd9873cdb24466e6d03e1cc66e7\" },\n  { 3, \"0x7FFFFC003FFFFFFFFFC5\", \"0x3FFFFE001FFFFFFFFFE3\"},\n#endif\n  { -1, NULL, NULL }\n};\n\nstruct hgcd_ref\n{\n  mpz_t m[2][2];\n};\n\nstatic void hgcd_ref_init(struct hgcd_ref *hgcd);\nstatic void hgcd_ref_clear(struct hgcd_ref *hgcd);\nstatic int hgcd_ref(struct hgcd_ref *hgcd, mpz_t a, mpz_t b);\nstatic int hgcd_ref_equal(const struct hgcd_matrix *hgcd, const struct hgcd_ref *ref);\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t op1, op2, temp1, temp2;\n  int i, j, chain_len;\n  gmp_randstate_ptr rands;\n  mpz_t bs;\n  unsigned long size_range;\n\n  tests_start ();\n  rands = RANDS;\n\n  mpz_init (bs);\n  mpz_init (op1);\n  mpz_init (op2);\n  mpz_init (temp1);\n  mpz_init (temp2);\n\n  for (i = 0; hgcd_values[i].res >= 0; i++)\n    {\n      mp_size_t res;\n\n      mpz_set_str (op1, hgcd_values[i].a, 0);\n      mpz_set_str (op2, hgcd_values[i].b, 0);\n\n      res = one_test (op1, op2, -1-i);\n      if (res != hgcd_values[i].res)\n\t{\n\t  fprintf (stderr, \"ERROR in test %d\\n\", -1-i);\n\t  fprintf (stderr, \"Bad return code from hgcd\\n\");\n\t  fprintf (stderr, \"op1=\");                 debug_mp (op1, -16);\n\t  fprintf (stderr, \"op2=\");                 debug_mp (op2, -16);\n\t  fprintf (stderr, \"expected: %d\\n\", hgcd_values[i].res);\n\t  fprintf (stderr, \"hgcd:     %d\\n\", (int) res);\n\t  abort ();\n\t}\n    }\n\n  for (i = 0; i < 15; i++)\n    {\n      /* Generate plain operands with unknown gcd.  These types of operands\n\t have proven to trigger certain bugs in development versions of the\n\t gcd code.  The \"hgcd->row[3].rsize > M\" ASSERT is not triggered by\n\t the division chain code below, but that is most likely just a result\n\t of that other ASSERTs are triggered before it.  */\n\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 13 + 2;\n\n      mpz_urandomb (bs, rands, size_range);\n      mpz_urandomb (op1, rands, mpz_get_ui (bs) + MIN_OPERAND_SIZE);\n      mpz_urandomb (bs, rands, size_range);\n      mpz_urandomb (op2, rands, mpz_get_ui (bs) + MIN_OPERAND_SIZE);\n\n      if (mpz_cmp (op1, op2) < 0)\n\tmpz_swap (op1, op2);\n\n      if (mpz_size (op1) > 0)\n\tone_test (op1, op2, i);\n\n      /* Generate a division chain backwards, allowing otherwise\n\t unlikely huge quotients.  */\n\n      mpz_set_ui (op1, 0);\n      mpz_urandomb (bs, rands, 32);\n      mpz_urandomb (bs, rands, mpz_get_ui (bs) % 16 + 1);\n      mpz_rrandomb (op2, rands, mpz_get_ui (bs));\n      mpz_add_ui (op2, op2, 1);\n\n#if 0\n      chain_len = 1000000;\n#else\n      mpz_urandomb (bs, rands, 32);\n      chain_len = mpz_get_ui (bs) % (GMP_NUMB_BITS * GCD_DC_THRESHOLD / 256);\n#endif\n\n      for (j = 0; j < chain_len; j++)\n\t{\n\t  mpz_urandomb (bs, rands, 32);\n\t  mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);\n\t  mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);\n\t  mpz_add_ui (temp2, temp2, 1);\n\t  mpz_mul (temp1, op2, temp2);\n\t  mpz_add (op1, op1, temp1);\n\n\t  /* Don't generate overly huge operands.  */\n\t  if (SIZ (op1) > 3 * GCD_DC_THRESHOLD)\n\t    break;\n\n\t  mpz_urandomb (bs, rands, 32);\n\t  mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);\n\t  mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);\n\t  mpz_add_ui (temp2, temp2, 1);\n\t  mpz_mul (temp1, op1, temp2);\n\t  mpz_add (op2, op2, temp1);\n\n\t  /* Don't generate overly huge operands.  */\n\t  if (SIZ (op2) > 3 * GCD_DC_THRESHOLD)\n\t    break;\n\t}\n      if (mpz_cmp (op1, op2) < 0)\n\tmpz_swap (op1, op2);\n\n      if (mpz_size (op1) > 0)\n\tone_test (op1, op2, i);\n    }\n\n  mpz_clear (bs);\n  mpz_clear (op1);\n  mpz_clear (op2);\n  mpz_clear (temp1);\n  mpz_clear (temp2);\n\n  tests_end ();\n  exit (0);\n}\n\nstatic void\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x); fputc ('\\n', stderr);\n}\n\nstatic int\nmpz_mpn_equal (const mpz_t a, mp_srcptr bp, mp_size_t bsize);\n\nstatic mp_size_t\none_test (mpz_t a, mpz_t b, int i)\n{\n  struct hgcd_matrix hgcd;\n  struct hgcd_ref ref;\n\n  mpz_t ref_r0;\n  mpz_t ref_r1;\n  mpz_t hgcd_r0;\n  mpz_t hgcd_r1;\n\n  mp_size_t res[2];\n  mp_size_t asize;\n  mp_size_t bsize;\n\n  mp_size_t hgcd_init_scratch;\n  mp_size_t hgcd_scratch;\n\n  mp_ptr hgcd_init_tp;\n  mp_ptr hgcd_tp;\n\n  asize = a->_mp_size;\n  bsize = b->_mp_size;\n\n  ASSERT (asize >= bsize);\n\n  hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (asize);\n  hgcd_init_tp = refmpn_malloc_limbs (hgcd_init_scratch);\n  mpn_hgcd_matrix_init (&hgcd, asize, hgcd_init_tp);\n\n  hgcd_scratch = mpn_hgcd_itch (asize);\n  hgcd_tp = refmpn_malloc_limbs (hgcd_scratch);\n\n#if 0\n  fprintf (stderr,\n\t   \"one_test: i = %d asize = %d, bsize = %d\\n\",\n\t   i, a->_mp_size, b->_mp_size);\n\n  gmp_fprintf (stderr,\n\t       \"one_test: i = %d\\n\"\n\t       \"  a = %Zx\\n\"\n\t       \"  b = %Zx\\n\",\n\t       i, a, b);\n#endif\n  hgcd_ref_init (&ref);\n\n  mpz_init_set (ref_r0, a);\n  mpz_init_set (ref_r1, b);\n  res[0] = hgcd_ref (&ref, ref_r0, ref_r1);\n\n  mpz_init_set (hgcd_r0, a);\n  mpz_init_set (hgcd_r1, b);\n  if (bsize < asize)\n    {\n      _mpz_realloc (hgcd_r1, asize);\n      MPN_ZERO (hgcd_r1->_mp_d + bsize, asize - bsize);\n    }\n  res[1] = mpn_hgcd (hgcd_r0->_mp_d,\n\t\t     hgcd_r1->_mp_d,\n\t\t     asize,\n\t\t     &hgcd, hgcd_tp);\n\n  if (res[0] != res[1])\n    {\n      fprintf (stderr, \"ERROR in test %d\\n\", i);\n      fprintf (stderr, \"Different return value from hgcd and hgcd_ref\\n\");\n      fprintf (stderr, \"op1=\");                 debug_mp (a, -16);\n      fprintf (stderr, \"op2=\");                 debug_mp (b, -16);\n      fprintf (stderr, \"hgcd_ref: %ld\\n\", (long) res[0]);\n      fprintf (stderr, \"mpn_hgcd: %ld\\n\", (long) res[1]);\n      abort ();\n    }\n  if (res[0] > 0)\n    {\n      if (!hgcd_ref_equal (&hgcd, &ref)\n\t  || !mpz_mpn_equal (ref_r0, hgcd_r0->_mp_d, res[1])\n\t  || !mpz_mpn_equal (ref_r1, hgcd_r1->_mp_d, res[1]))\n\t{\n\t  fprintf (stderr, \"ERROR in test %d\\n\", i);\n\t  fprintf (stderr, \"mpn_hgcd and hgcd_ref returned different values\\n\");\n\t  fprintf (stderr, \"op1=\");                 debug_mp (a, -16);\n\t  fprintf (stderr, \"op2=\");                 debug_mp (b, -16);\n\t  abort ();\n\t}\n    }\n\n  refmpn_free_limbs (hgcd_init_tp);\n  refmpn_free_limbs (hgcd_tp);\n  hgcd_ref_clear (&ref);\n  mpz_clear (ref_r0);\n  mpz_clear (ref_r1);\n  mpz_clear (hgcd_r0);\n  mpz_clear (hgcd_r1);\n\n  return res[0];\n}\n\nstatic void\nhgcd_ref_init (struct hgcd_ref *hgcd)\n{\n  unsigned i;\n  for (i = 0; i<2; i++)\n    {\n      unsigned j;\n      for (j = 0; j<2; j++)\n\tmpz_init (hgcd->m[i][j]);\n    }\n}\n\nstatic void\nhgcd_ref_clear (struct hgcd_ref *hgcd)\n{\n  unsigned i;\n  for (i = 0; i<2; i++)\n    {\n      unsigned j;\n      for (j = 0; j<2; j++)\n\tmpz_clear (hgcd->m[i][j]);\n    }\n}\n\n\nstatic int\nsdiv_qr (mpz_t q, mpz_t r, mp_size_t s, const mpz_t a, const mpz_t b)\n{\n  mpz_fdiv_qr (q, r, a, b);\n  if (mpz_size (r) <= s)\n    {\n      mpz_add (r, r, b);\n      mpz_sub_ui (q, q, 1);\n    }\n\n  return (mpz_sgn (q) > 0);\n}\n\nstatic int\nhgcd_ref (struct hgcd_ref *hgcd, mpz_t a, mpz_t b)\n{\n  mp_size_t n = MAX (mpz_size (a), mpz_size (b));\n  mp_size_t s = n/2 + 1;\n  mp_size_t asize;\n  mp_size_t bsize;\n  mpz_t q;\n  int res;\n\n  if (mpz_size (a) <= s || mpz_size (b) <= s)\n    return 0;\n\n  res = mpz_cmp (a, b);\n  if (res < 0)\n    {\n      mpz_sub (b, b, a);\n      if (mpz_size (b) <= s)\n\treturn 0;\n\n      mpz_set_ui (hgcd->m[0][0], 1); mpz_set_ui (hgcd->m[0][1], 0);\n      mpz_set_ui (hgcd->m[1][0], 1); mpz_set_ui (hgcd->m[1][1], 1);\n    }\n  else if (res > 0)\n    {\n      mpz_sub (a, a, b);\n      if (mpz_size (a) <= s)\n\treturn 0;\n\n      mpz_set_ui (hgcd->m[0][0], 1); mpz_set_ui (hgcd->m[0][1], 1);\n      mpz_set_ui (hgcd->m[1][0], 0); mpz_set_ui (hgcd->m[1][1], 1);\n    }\n  else\n    return 0;\n\n  mpz_init (q);\n\n  for (;;)\n    {\n      ASSERT (mpz_size (a) > s);\n      ASSERT (mpz_size (b) > s);\n\n      if (mpz_cmp (a, b) > 0)\n\t{\n\t  if (!sdiv_qr (q, a, s, a, b))\n\t    break;\n\t  mpz_addmul (hgcd->m[0][1], q, hgcd->m[0][0]);\n\t  mpz_addmul (hgcd->m[1][1], q, hgcd->m[1][0]);\n\t}\n      else\n\t{\n\t  if (!sdiv_qr (q, b, s, b, a))\n\t    break;\n\t  mpz_addmul (hgcd->m[0][0], q, hgcd->m[0][1]);\n\t  mpz_addmul (hgcd->m[1][0], q, hgcd->m[1][1]);\n\t}\n    }\n\n  mpz_clear (q);\n\n  asize = mpz_size (a);\n  bsize = mpz_size (b);\n  return MAX (asize, bsize);\n}\n\nstatic int\nmpz_mpn_equal (const mpz_t a, mp_srcptr bp, mp_size_t bsize)\n{\n  mp_srcptr ap = a->_mp_d;\n  mp_size_t asize = a->_mp_size;\n\n  MPN_NORMALIZE (bp, bsize);\n  return asize == bsize && mpn_cmp (ap, bp, asize) == 0;\n}\n\nstatic int\nhgcd_ref_equal (const struct hgcd_matrix *hgcd, const struct hgcd_ref *ref)\n{\n  unsigned i;\n\n  for (i = 0; i<2; i++)\n    {\n      unsigned j;\n\n      for (j = 0; j<2; j++)\n\tif (!mpz_mpn_equal (ref->m[i][j], hgcd->p[i][j], hgcd->n))\n\t  return 0;\n    }\n\n  return 1;\n}\n"
  },
  {
    "path": "tests/mpn/t-instrument.c",
    "content": "/* Test assembler support for --enable-profiling=instrument.\n\nCopyright 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n\n#if WANT_PROFILING_INSTRUMENT\n\n/* This program exercises each mpn routine that might be implemented in\n   assembler.  It ensures the __cyg_profile_func_enter and exit calls have\n   come out right, and that in the x86 code \"ret_internal\" is correctly used\n   for PIC setups.  */\n\n\n/* Changes to enter_seen done by __cyg_profile_func_enter are essentially\n   unknown to the optimizer, so must use volatile.  */\nvolatile int  enter_seen;\n\n/* Dummy used to stop various calls going dead. */\nunsigned long  notdead;\n\nconst char     *name = \"<none>\";\nint  old_ncall;\n\nstruct {\n  void  *this_fn;\n  void  *call_site;\n} call[100];\nint  ncall;\n\n\nvoid __cyg_profile_func_enter(void *this_fn, void *call_site)\n     __attribute__ ((no_instrument_function));\n\nvoid\n__cyg_profile_func_enter (void *this_fn, void *call_site)\n{\n#if 0\n  printf (\"%24s %p %p\\n\", name, this_fn, call_site);\n#endif\n  ASSERT_ALWAYS (ncall >= 0);\n  ASSERT_ALWAYS (ncall <= numberof (call));\n\n  if (ncall >= numberof (call))\n    {\n      printf (\"__cyg_profile_func_enter: oops, call stack full, from %s\\n\", name);\n      abort ();\n    }\n\n  enter_seen = 1;\n  call[ncall].this_fn = this_fn;\n  call[ncall].call_site = call_site;\n  ncall++;\n}\n\nvoid __cyg_profile_func_exit(void *this_fn, void *call_site)\n     __attribute__ ((no_instrument_function));\n\nvoid\n__cyg_profile_func_exit  (void *this_fn, void *call_site)\n{\n  ASSERT_ALWAYS (ncall >= 0);\n  ASSERT_ALWAYS (ncall <= numberof (call));\n\n  if (ncall == 0)\n    {\n      printf (\"__cyg_profile_func_exit: call stack empty, from %s\\n\", name);\n      abort ();\n    }\n\n  ncall--;\n  if (this_fn != call[ncall].this_fn || call_site != call[ncall].call_site)\n    {\n      printf (\"__cyg_profile_func_exit: unbalanced this_fn/call_site from %s\\n\", name);\n      printf (\"  this_fn got  %p\\n\", this_fn);\n      printf (\"          want %p\\n\", call[ncall].this_fn);\n      printf (\"  call_site got  %p\\n\", call_site);\n      printf (\"            want %p\\n\", call[ncall].call_site);\n      abort ();\n    }\n}\n\n\nvoid\npre (const char *str)\n{\n  name = str;\n  enter_seen = 0;\n  old_ncall = ncall;\n}\n\nvoid\npost (void)\n{\n  if (! enter_seen)\n    {\n      printf (\"did not reach __cyg_profile_func_enter from %s\\n\", name);\n      abort ();\n    }\n\n  if (ncall != old_ncall)\n    {\n      printf (\"unbalance enter/exit calls from %s\\n\", name);\n      printf (\"  ncall     %d\\n\", ncall);\n      printf (\"  old_ncall %d\\n\", old_ncall);\n      abort ();\n    }\n}\n\nvoid\ncheck (void)\n{\n  mp_limb_t  wp[100], xp[100], yp[100];\n  mp_size_t  size = 100;\n\n  refmpn_zero (xp, size);\n  refmpn_zero (yp, size);\n  refmpn_zero (wp, size);\n\n  pre (\"mpn_add_n\");\n  mpn_add_n (wp, xp, yp, size);\n  post ();\n\n#if HAVE_NATIVE_mpn_add_nc\n  pre (\"mpn_add_nc\");\n  mpn_add_nc (wp, xp, yp, size, CNST_LIMB(0));\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_addlsh1_n\n  pre (\"mpn_addlsh1_n\");\n  mpn_addlsh1_n (wp, xp, yp, size);\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_and_n\n  pre (\"mpn_and_n\");\n  mpn_and_n (wp, xp, yp, size);\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_andn_n\n  pre (\"mpn_andn_n\");\n  mpn_andn_n (wp, xp, yp, size);\n  post ();\n#endif\n\n  pre (\"mpn_addmul_1\");\n  mpn_addmul_1 (wp, xp, size, yp[0]);\n  post ();\n\n#if HAVE_NATIVE_mpn_addmul_1c\n  pre (\"mpn_addmul_1c\");\n  mpn_addmul_1c (wp, xp, size, yp[0], CNST_LIMB(0));\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_com_n\n  pre (\"mpn_com_n\");\n  mpn_com_n (wp, xp, size);\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_copyd\n  pre (\"mpn_copyd\");\n  mpn_copyd (wp, xp, size);\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_copyi\n  pre (\"mpn_copyi\");\n  mpn_copyi (wp, xp, size);\n  post ();\n#endif\n\n  pre (\"mpn_divexact_1\");\n  mpn_divexact_1 (wp, xp, size, CNST_LIMB(123));\n  post ();\n\n  pre (\"mpn_divexact_by3c\");\n  mpn_divexact_by3c (wp, xp, size, CNST_LIMB(0));\n  post ();\n\n  pre (\"mpn_divrem_1\");\n  mpn_divrem_1 (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123));\n  post ();\n\n#if HAVE_NATIVE_mpn_divrem_1c\n  pre (\"mpn_divrem_1c\");\n  mpn_divrem_1c (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123), CNST_LIMB(122));\n  post ();\n#endif\n\n  pre (\"mpn_gcd_1\");\n  xp[0] |= 1;\n  notdead += (unsigned long) mpn_gcd_1 (xp, size, CNST_LIMB(123));\n  post ();\n\n  pre (\"mpn_hamdist\");\n  notdead += mpn_hamdist (xp, yp, size);\n  post ();\n\n#if HAVE_NATIVE_mpn_ior_n\n  pre (\"mpn_ior_n\");\n  mpn_ior_n (wp, xp, yp, size);\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_iorn_n\n  pre (\"mpn_iorn_n\");\n  mpn_iorn_n (wp, xp, yp, size);\n  post ();\n#endif\n\n  pre (\"mpn_lshift\");\n  mpn_lshift (wp, xp, size, 1);\n  post ();\n\n  pre (\"mpn_mod_1\");\n  notdead += mpn_mod_1 (xp, size, CNST_LIMB(123));\n  post ();\n\n#if HAVE_NATIVE_mpn_mod_1c\n  pre (\"mpn_mod_1c\");\n  notdead += mpn_mod_1c (xp, size, CNST_LIMB(123), CNST_LIMB(122));\n  post ();\n#endif\n\n#if GMP_NUMB_BITS % 4 == 0\n  pre (\"mpn_mod_34lsub1\");\n  notdead += mpn_mod_34lsub1 (xp, size);\n  post ();\n#endif\n\n  pre (\"mpn_modexact_1_odd\");\n  notdead += mpn_modexact_1_odd (xp, size, CNST_LIMB(123));\n  post ();\n\n  pre (\"mpn_modexact_1c_odd\");\n  notdead += mpn_modexact_1c_odd (xp, size, CNST_LIMB(123), CNST_LIMB(456));\n  post ();\n\n  pre (\"mpn_mul_1\");\n  mpn_mul_1 (wp, xp, size, yp[0]);\n  post ();\n\n#if HAVE_NATIVE_mpn_mul_1c\n  pre (\"mpn_mul_1c\");\n  mpn_mul_1c (wp, xp, size, yp[0], CNST_LIMB(0));\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_mul_2\n  pre (\"mpn_mul_2\");\n  mpn_mul_2 (wp, xp, size-1, yp);\n  post ();\n#endif\n\n  pre (\"mpn_mul_basecase\");\n  mpn_mul_basecase (wp, xp, (mp_size_t) 3, yp, (mp_size_t) 3);\n  post ();\n\n#if HAVE_NATIVE_mpn_nand_n\n  pre (\"mpn_nand_n\");\n  mpn_nand_n (wp, xp, yp, size);\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_nior_n\n  pre (\"mpn_nior_n\");\n  mpn_nior_n (wp, xp, yp, size);\n  post ();\n#endif\n\n  pre (\"mpn_popcount\");\n  notdead += mpn_popcount (xp, size);\n  post ();\n\n  pre (\"mpn_preinv_mod_1\");\n  notdead += mpn_preinv_mod_1 (xp, size, GMP_NUMB_MAX,\n                               refmpn_invert_limb (GMP_NUMB_MAX));\n  post ();\n\n#if USE_PREINV_DIVREM_1 || HAVE_NATIVE_mpn_preinv_divrem_1\n  pre (\"mpn_preinv_divrem_1\");\n  mpn_preinv_divrem_1 (wp, (mp_size_t) 0, xp, size, GMP_NUMB_MAX,\n                       refmpn_invert_limb (GMP_NUMB_MAX), 0);\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_rsh1add_n\n  pre (\"mpn_rsh1add_n\");\n  mpn_rsh1add_n (wp, xp, yp, size);\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_rsh1sub_n\n  pre (\"mpn_rsh1sub_n\");\n  mpn_rsh1sub_n (wp, xp, yp, size);\n  post ();\n#endif\n\n  pre (\"mpn_rshift\");\n  mpn_rshift (wp, xp, size, 1);\n  post ();\n\n  pre (\"mpn_sqr_basecase\");\n  mpn_sqr_basecase (wp, xp, (mp_size_t) 3);\n  post ();\n\n  pre (\"mpn_submul_1\");\n  mpn_submul_1 (wp, xp, size, yp[0]);\n  post ();\n\n#if HAVE_NATIVE_mpn_submul_1c\n  pre (\"mpn_submul_1c\");\n  mpn_submul_1c (wp, xp, size, yp[0], CNST_LIMB(0));\n  post ();\n#endif\n\n  pre (\"mpn_sub_n\");\n  mpn_sub_n (wp, xp, yp, size);\n  post ();\n\n#if HAVE_NATIVE_mpn_sub_nc\n  pre (\"mpn_sub_nc\");\n  mpn_sub_nc (wp, xp, yp, size, CNST_LIMB(0));\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_sublsh1_n\n  pre (\"mpn_sublsh1_n\");\n  mpn_sublsh1_n (wp, xp, yp, size);\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_udiv_qrnnd\n  pre (\"mpn_udiv_qrnnd\");\n  mpn_udiv_qrnnd (&wp[0], CNST_LIMB(122), xp[0], CNST_LIMB(123));\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_udiv_qrnnd_r\n  pre (\"mpn_udiv_qrnnd_r\");\n  mpn_udiv_qrnnd (CNST_LIMB(122), xp[0], CNST_LIMB(123), &wp[0]);\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_umul_ppmm\n  pre (\"mpn_umul_ppmm\");\n  mpn_umul_ppmm (&wp[0], xp[0], yp[0]);\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_umul_ppmm_r\n  pre (\"mpn_umul_ppmm_r\");\n  mpn_umul_ppmm_r (&wp[0], xp[0], yp[0]);\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_xor_n\n  pre (\"mpn_xor_n\");\n  mpn_xor_n (wp, xp, yp, size);\n  post ();\n#endif\n\n#if HAVE_NATIVE_mpn_xnor_n\n  pre (\"mpn_xnor_n\");\n  mpn_xnor_n (wp, xp, yp, size);\n  post ();\n#endif\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check ();\n\n  tests_end ();\n  exit (0);\n}\n\n\n#else /* ! WANT_PROFILING_INSTRUMENT */\n\nint\nmain (void)\n{\n  exit (0);\n}\n\n#endif\n"
  },
  {
    "path": "tests/mpn/t-inv_div_q.c",
    "content": "/* Test mpn_inv_div_q.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 400\n#define ITERS 10000\n   \n/* Check precomputed inverse division routine. */\nvoid\ncheck_inv_div_q (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS+1];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t inv[MAX_LIMBS];\n   mp_limb_t cy;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 5)) + 6;\n      nn = (random() % (MAX_LIMBS-3)) + dn + 3;\n      \n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      dp[dn-1] |= GMP_LIMB_HIGHBIT;\n\n      MPN_COPY(np2, np, nn);\n      \n      mpn_invert(inv, dp, dn);\n      \n      qn = nn - dn + 1;\n         \n      qp[qn - 1] = mpn_inv_div_q(qp, np, nn, dp, dn, inv);\n\n      MPN_NORMALIZE(qp, qn);\n\n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n      \n         rn = dn + qn;\n         MPN_NORMALIZE(rp, rn);\n         \n         if (rn > nn)\n         {\n            printf(\"failed: q*d has too many limbs\\n\");\n            abort();\n         }\n         \n         if (mpn_cmp(rp, np2, nn) > 0)\n         {\n            printf(\"failed: remainder negative\\n\");\n            abort();\n         }\n         \n         mpn_sub(rp, np2, nn, rp, rn);\n         rn = nn;\n         MPN_NORMALIZE(rp, rn);\n      } else\n      {\n         rn = nn;\n         MPN_COPY(rp, np, nn);\n      }\n      \n      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);\n      if (s >= 0)\n      {\n         printf (\"failed:\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu, rn = %lu\\n\\n\", nn, dn, qn, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_inv_div_q ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-inv_div_qr.c",
    "content": "/* Test mpn_inv_div_qr.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 400\n#define ITERS 10000\n   \n/* Check divide and conquer division routine. */\nvoid\ncheck_inv_div_qr (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS+1];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t inv[MAX_LIMBS];\n   mp_limb_t cy;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 5)) + 6;\n      nn = (random() % (MAX_LIMBS - 3)) + dn + 3;\n      \n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      dp[dn-1] |= GMP_LIMB_HIGHBIT;\n\n      MPN_COPY(np2, np, nn);\n      \n      mpn_invert(inv, dp, dn);\n      \n      qn = nn - dn + 1;\n         \n      qp[qn - 1] = mpn_inv_div_qr(qp, np, nn, dp, dn, inv);\n\n      MPN_NORMALIZE(qp, qn);\n\n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n      \n         rn = dn + qn;\n         MPN_NORMALIZE(rp, rn);\n         \n         if (rn > nn)\n         {\n            printf(\"failed: q*d has too many limbs\\n\");\n            abort();\n         }\n         \n         if (mpn_cmp(rp, np2, nn) > 0)\n         {\n            printf(\"failed: remainder negative\\n\");\n            abort();\n         }\n         \n         mpn_sub(rp, np2, nn, rp, rn);\n         rn = nn;\n         MPN_NORMALIZE(rp, rn);\n      } else\n      {\n         rn = nn;\n         MPN_COPY(rp, np, nn);\n      }\n      \n      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);\n      if (s >= 0)\n      {\n         printf (\"failed:\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu, rn = %lu\\n\\n\", nn, dn, qn, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n\n      if (mpn_cmp(rp, np, rn) != 0)\n      {\n         printf(\"failed: remainder does not match\\n\");\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);        \n         gmp_printf (\" rp2: %Nx\\n\\n\", np, rn);        \n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_inv_div_qr ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-inv_div_qr_n.c",
    "content": "/* Test mpn_inv_div_qr_n.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 400\n#define ITERS 10000\n   \n/* Check precomputed inverse division routine. */\nvoid\ncheck_inv_div_qr_n (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS+1];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t inv[MAX_LIMBS];\n   mp_limb_t cy;\n\n   mp_size_t rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 5)) + 6;\n      \n      mpn_rrandom (np, rands, 2*dn);\n      mpn_rrandom (dp, rands, dn);\n      dp[dn-1] |= GMP_LIMB_HIGHBIT;\n\n      MPN_COPY(np2, np, 2*dn);\n      \n      mpn_invert(inv, dp, dn);\n      \n      qn = dn + 1;\n         \n      qp[qn - 1] = mpn_inv_div_qr_n(qp, np, dp, dn, inv);\n\n      MPN_NORMALIZE(qp, qn);\n\n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n      \n         rn = dn + qn;\n         MPN_NORMALIZE(rp, rn);\n         \n         if (rn > 2*dn)\n         {\n            printf(\"failed: q*d has too many limbs\\n\");\n            abort();\n         }\n         \n         if (mpn_cmp(rp, np2, 2*dn) > 0)\n         {\n            printf(\"failed: remainder negative\\n\");\n            abort();\n         }\n         \n         mpn_sub(rp, np2, 2*dn, rp, rn);\n         rn = 2*dn;\n         MPN_NORMALIZE(rp, rn);\n      } else\n      {\n         rn = 2*dn;\n         MPN_COPY(rp, np, 2*dn);\n      }\n      \n      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);\n      if (s >= 0)\n      {\n         printf (\"failed:\\n\");\n         printf (\"dn = %lu, qn = %lu, rn = %lu\\n\\n\", dn, qn, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, 2*dn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n\n      if (mpn_cmp(rp, np, rn) != 0)\n      {\n         printf(\"failed: remainder does not match\\n\");\n         gmp_printf (\" np: %Nx\\n\\n\", np2, 2*dn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);        \n         gmp_printf (\" rp2: %Nx\\n\\n\", np, rn);        \n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_inv_div_qr_n ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-inv_divappr_q.c",
    "content": "/* Test mpn_inv_divappr_q.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 1000\n#define ITERS 1000\n   \n/* Check precomputed inverse division routine. */\nvoid\ncheck_inv_divappr_q (void)\n{\n   mp_ptr np, np2, rp, dp, qp, inv;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   \n   TMP_DECL;\n   TMP_MARK;\n\n   np = TMP_ALLOC_LIMBS(2*MAX_LIMBS);\n   np2 = TMP_ALLOC_LIMBS(2*MAX_LIMBS);\n   rp = TMP_ALLOC_LIMBS(2*MAX_LIMBS);\n   dp = TMP_ALLOC_LIMBS(MAX_LIMBS);\n   qp = TMP_ALLOC_LIMBS(2*MAX_LIMBS);\n   inv = TMP_ALLOC_LIMBS(MAX_LIMBS);\n\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 5)) + 6;\n      nn = (random() % (MAX_LIMBS - 3)) + dn + 3;\n      \n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      dp[dn-1] |= GMP_LIMB_HIGHBIT;\n\n      MPN_COPY(np2, np, nn);\n      \n      mpn_invert(inv, dp, dn);\n      \n      qn = nn - dn + 1;\n         \n      qp[qn - 1] = mpn_inv_divappr_q(qp, np, nn, dp, dn, inv);\n\n      MPN_NORMALIZE(qp, qn);\n\n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n\n         rn = dn + qn;\n         MPN_NORMALIZE(rp, rn);\n\n         s = (rn < nn) ? -1 : (rn > nn) ? 1 : mpn_cmp(rp, np2, nn);\n         if (s <= 0) \n         {\n            mpn_sub(rp, np2, nn, rp, rn);\n            rn = nn;\n            MPN_NORMALIZE(rp, rn);\n         } else \n         {\n            mpn_sub(rp, rp, rn, np2, nn);\n            MPN_NORMALIZE(rp, rn);\n         }\n      } else\n      {\n         rn = nn;\n         MPN_COPY(rp, np2, nn);\n      }\n      \n      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);\n      if (s >= 0)\n      {\n         printf (\"failed:\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu, rn = %lu\\n\\n\", nn, dn, qn, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n   }\n\n   gmp_randclear(rands);\n \n   TMP_FREE;\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_inv_divappr_q ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-inv_divappr_q_n.c",
    "content": "/* Test mpn_inv_divappr_q_n.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009, 2010 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 2000\n#define ITERS 500\n   \n/* Check divide and conquer division routine. */\nvoid\ncheck_inv_divappr_q_n (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[MAX_LIMBS];\n   mp_limb_t dip[MAX_LIMBS];\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 6)) + 6;\n      nn = 2*dn;\n         \n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      dp[dn-1] |= GMP_LIMB_HIGHBIT;\n\n      MPN_COPY(np2, np, nn);\n      \n      mpn_invert(dip, dp, dn);\n\n      qn = nn - dn + 1;\n         \n      qp[qn - 1] = mpn_inv_divappr_q_n(qp, np, dp, dn, dip);\n      \n      MPN_NORMALIZE(qp, qn);\n\n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n\n         rn = dn + qn;\n         MPN_NORMALIZE(rp, rn);\n\n         s = (rn < nn) ? -1 : (rn > nn) ? 1 : mpn_cmp(rp, np2, nn);\n         if (s <= 0) \n         {\n            mpn_sub(rp, np2, nn, rp, rn);\n            rn = nn;\n            MPN_NORMALIZE(rp, rn);\n         } else \n         {\n            mpn_sub(rp, rp, rn, np2, nn);\n            MPN_NORMALIZE(rp, rn);\n         }\n      } else\n      {\n         rn = nn;\n         MPN_COPY(rp, np, nn);\n      }\n      \n      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);\n      if (s >= 0)\n      {\n         printf (\"failed:\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu, rn = %lu\\n\\n\", nn, dn, qn, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_inv_divappr_q_n ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-invert.c",
    "content": "/* Test mpn_invert.\n\nCopyright 2002, 2003, 2004 Free Software Foundation, Inc.\nCopyright 2009 Paul Zimmermann\nCopyright 2009 William Hart\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/* Note that we don't use <limits.h> for LONG_MIN, but instead our own\n   definition in gmp-impl.h.  In gcc 2.95.4 (debian 3.0) under\n   -mcpu=ultrasparc, limits.h sees __sparc_v9__ defined and assumes that\n   means long is 64-bit long, but it's only 32-bits, causing fatal compile\n   errors.  */\n\n#include <setjmp.h>\n#include <signal.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#define ITERS 100\n\nint\ntest_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n)\n{\n  int res = 1;\n  mp_size_t i;\n  mp_ptr tp, up;\n  mp_limb_t cy;\n  TMP_DECL;\n\n  TMP_MARK;\n  tp = TMP_ALLOC_LIMBS (2 * n);\n  up = TMP_ALLOC_LIMBS (2 * n);\n\n  /* first check X*A < B^(2*n) */\n  mpn_mul_n (tp, xp, ap, n);\n  cy = mpn_add_n (tp + n, tp + n, ap, n); /* A * msb(X) */\n  if (cy != 0)\n    return 0;\n\n  /* now check B^(2n) - X*A <= A */\n  mpn_com_n (tp, tp, 2 * n);\n  mpn_add_1 (tp, tp, 2 * n, 1); /* B^(2n) - X*A */\n  MPN_ZERO (up, 2 * n);\n  MPN_COPY (up, ap, n);\n  res = mpn_cmp (tp, up, 2 * n) <= 0;\n  TMP_FREE;\n  return res;\n}\n\nvoid check_rand(void)\n{\n  mp_size_t i, n;\n  mp_ptr qp, dp;\n\n  gmp_randstate_t rands;\n  gmp_randinit_default(rands);\n\n  for (n = 1; n < 3000; n++)\n  {\n     mp_limb_t bits;\n     count_leading_zeros(bits, n);  \n     bits = GMP_LIMB_BITS - bits;\n     if (n > 100) n+=2;\n     if (n > 300) n+=4;\n     if (n > 1000) n+=8;\n     if (n > 2000) n+=16;\n     qp = malloc (n * sizeof (mp_limb_t));\n     dp = malloc (n * sizeof (mp_limb_t));\n  \n     mpn_rrandom(dp, rands, n);\n     dp[n - 1] |= GMP_NUMB_HIGHBIT;\n   \n     for (i = 0; i < ITERS/bits; i++)\n     {\n        mpn_rrandom(dp, rands, n);\n        dp[n - 1] |= GMP_NUMB_HIGHBIT;\n        mpn_invert (qp, dp, n);\n        if (test_invert (qp, dp, n) == 0)\n        {\n          fprintf (stderr, \"t-invert failed at n = %lu, i=%lu\\n\", n, i);\n          gmp_printf (\"A:= %Nx\\n\", dp, n);\n          gmp_printf (\"X:=B^%lu*%Nx\\n\", n, qp, n);\n          abort();\n        }\n     }\n  }\n\n  free (qp);\n  free (dp);\n  gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n  \n  check_rand ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-iord_u.c",
    "content": "/* Test MPN_INCR_U and MPN_DECR_U.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* The i386 MPN_INCR_U and MPN_DECR_U have special cases for \"n\" being a\n   compile-time constant 1, so that's exercised explicitly.  */\n\n\n#define M     GMP_NUMB_MAX\n#define SIZE  ((mp_size_t) 10)\n\n\nvoid\ncheck_one (const char *name, int i,\n           mp_srcptr src, mp_limb_t n,\n           mp_srcptr got, mp_srcptr want, mp_size_t size)\n{\n  if (! refmpn_equal_anynail (got, want, size))\n    {\n      printf (\"Wrong at %s i=%d\\n\", name, i);\n      mpn_trace (\"  src\", src,  size);\n      mpn_trace (\"    n\", &n,   (mp_size_t) 1);\n      mpn_trace (\"  got\", got,  size);\n      mpn_trace (\" want\", want, size);\n      abort ();\n    }\n}\n\n\nvoid\ncheck_incr_data (void)\n{\n  static const struct {\n    mp_limb_t        n;\n    const mp_limb_t  src[SIZE];\n    const mp_limb_t  want[SIZE];\n  } data[] = {\n    { 1, { 0 },   { 1 } },\n    { 1, { 123 }, { 124 } },\n    { 2, { 0 },   { 2 } },\n    { 2, { 123 }, { 125 } },\n    { M, { 0 },   { M } },\n\n    { 1, { M, 0 },   { 0,   1 } },\n    { 1, { M, 123 }, { 0,   124 } },\n    { 2, { M, 0 },   { 1,   1 } },\n    { 2, { M, 123 }, { 1,   124 } },\n    { M, { M, 0 },   { M-1, 1 } },\n    { M, { M, 123 }, { M-1, 124 } },\n\n    { 1, { M, M, 0 },   { 0,   0, 1 } },\n    { 1, { M, M, 123 }, { 0,   0, 124 } },\n    { 2, { M, M, 0 },   { 1,   0, 1 } },\n    { 2, { M, M, 123 }, { 1,   0, 124 } },\n    { M, { M, M, 0 },   { M-1, 0, 1 } },\n    { M, { M, M, 123 }, { M-1, 0, 124 } },\n\n    { 1, { M, M, M, 0 },   { 0,   0, 0, 1 } },\n    { 1, { M, M, M, 123 }, { 0,   0, 0, 124 } },\n    { 2, { M, M, M, 0 },   { 1,   0, 0, 1 } },\n    { 2, { M, M, M, 123 }, { 1,   0, 0, 124 } },\n    { M, { M, M, M, 0 },   { M-1, 0, 0, 1 } },\n    { M, { M, M, M, 123 }, { M-1, 0, 0, 124 } },\n\n    { 1, { M, M, M, M, 0 },   { 0,   0, 0, 0, 1 } },\n    { 1, { M, M, M, M, 123 }, { 0,   0, 0, 0, 124 } },\n    { 2, { M, M, M, M, 0 },   { 1,   0, 0, 0, 1 } },\n    { 2, { M, M, M, M, 123 }, { 1,   0, 0, 0, 124 } },\n    { M, { M, M, M, M, 0 },   { M-1, 0, 0, 0, 1 } },\n    { M, { M, M, M, M, 123 }, { M-1, 0, 0, 0, 124\n#if defined (__hpux) && ! defined (__GNUC__)\n    /* Some versions (at least HP92453-01 B.11.11.23709.GP) of the\n       HP C compilers fail to zero-fill aggregates as the ISO C standard\n       requires (cf 6.5.7 Initialization).  Compensate here:  */\n\t\t\t\t, 0, 0, 0, 0, 0\n#endif\n    } }\n  };\n\n  mp_limb_t  got[SIZE];\n  int   i;\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      refmpn_copyi (got, data[i].src, SIZE);\n      MPN_INCR_U (got, SIZE, data[i].n);\n      check_one (\"check_incr (general)\", i,\n                 data[i].src, data[i].n,\n                 got, data[i].want, SIZE);\n\n      if (data[i].n == 1)\n        {\n          refmpn_copyi (got, data[i].src, SIZE);\n          MPN_INCR_U (got, SIZE, CNST_LIMB(1));\n          check_one (\"check_incr (const 1)\", i,\n                     data[i].src, data[i].n,\n                     got, data[i].want, SIZE);\n        }\n    }\n}\n\nvoid\ncheck_decr_data (void)\n{\n  static const struct {\n    mp_limb_t        n;\n    const mp_limb_t  src[SIZE];\n    const mp_limb_t  want[SIZE];\n  } data[] = {\n    { 1,   { 1 },   { 0   } },\n    { 1,   { 123 }, { 122 } },\n    { 1,   { M },   { M-1 } },\n    { 2,   { 2 },   { 0   } },\n    { 2,   { 123 }, { 121 } },\n    { M,   { M },   { 0   } },\n    { M-1, { M },   { 1   } },\n\n    { 1,   { 0,   1   }, { M,   0   } },\n    { 1,   { 0,   123 }, { M,   122 } },\n    { 1,   { 0,   M   }, { M,   M-1 } },\n    { 2,   { 0,   123 }, { M-1, 122 } },\n    { 2,   { 1,   123 }, { M,   122 } },\n    { M,   { 0,   123 }, { 1,   122 } },\n    { M,   { M-1, M   }, { M,   M-1 } },\n\n    { 1,   { 0,   0, 1   }, { M,   M, 0   } },\n    { 1,   { 0,   0, 123 }, { M,   M, 122 } },\n    { 1,   { 0,   0, M   }, { M,   M, M-1 } },\n    { 2,   { 0,   0, 123 }, { M-1, M, 122 } },\n    { 2,   { 1,   0, 123 }, { M,   M, 122 } },\n    { M,   { 0,   0, 123 }, { 1,   M, 122 } },\n    { M,   { M-1, 0, M   }, { M,   M, M-1 } },\n\n    { 1,   { 0,   0, 0, 1   }, { M,   M, M, 0   } },\n    { 1,   { 0,   0, 0, 123 }, { M,   M, M, 122 } },\n    { 1,   { 0,   0, 0, M   }, { M,   M, M, M-1 } },\n    { 2,   { 0,   0, 0, 123 }, { M-1, M, M, 122 } },\n    { 2,   { 1,   0, 0, 123 }, { M,   M, M, 122 } },\n    { M,   { 0,   0, 0, 123 }, { 1,   M, M, 122 } },\n    { M,   { M-1, 0, 0, M   }, { M,   M, M, M-1 } },\n\n    { 1,   { 0,   0, 0, 0, 1   }, { M,   M, M, M, 0   } },\n    { 1,   { 0,   0, 0, 0, 123 }, { M,   M, M, M, 122 } },\n    { 1,   { 0,   0, 0, 0, M   }, { M,   M, M, M, M-1 } },\n    { 2,   { 0,   0, 0, 0, 123 }, { M-1, M, M, M, 122 } },\n    { 2,   { 1,   0, 0, 0, 123 }, { M,   M, M, M, 122 } },\n    { M,   { 0,   0, 0, 0, 123 }, { 1,   M, M, M, 122 } },\n    { M,   { M-1, 0, 0, 0, M   }, { M,   M, M, M, M-1 } },\n\n    { 1,   { 0,   0, 0, 0, 0, 1   }, { M,   M, M, M, M, 0   } },\n    { 1,   { 0,   0, 0, 0, 0, 123 }, { M,   M, M, M, M, 122 } },\n    { 1,   { 0,   0, 0, 0, 0, M   }, { M,   M, M, M, M, M-1 } },\n    { 2,   { 0,   0, 0, 0, 0, 123 }, { M-1, M, M, M, M, 122 } },\n    { 2,   { 1,   0, 0, 0, 0, 123 }, { M,   M, M, M, M, 122 } },\n    { M,   { 0,   0, 0, 0, 0, 123 }, { 1,   M, M, M, M, 122 } },\n    { M,   { M-1, 0, 0, 0, 0, M   }, { M,   M, M, M, M, M-1\n#if defined (__hpux) && ! defined (__GNUC__)\n    /* For explanation of this garbage, see previous function.  */\n\t\t\t\t       , 0, 0, 0, 0\n#endif\n    } }\n  };\n\n  mp_limb_t  got[SIZE];\n  int   i;\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      refmpn_copyi (got, data[i].src, SIZE);\n      MPN_DECR_U (got, SIZE, data[i].n);\n      check_one (\"check_decr_data\", i,\n                 data[i].src, data[i].n,\n                 got, data[i].want, SIZE);\n\n      if (data[i].n == 1)\n        {\n          refmpn_copyi (got, data[i].src, SIZE);\n          MPN_DECR_U (got, SIZE, CNST_LIMB(1));\n          check_one (\"check_decr (const 1)\", i,\n                     data[i].src, data[i].n,\n                     got, data[i].want, SIZE);\n        }\n    }\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n  mp_trace_base = -16;\n\n  check_incr_data ();\n  check_decr_data ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-logic.c",
    "content": "/* Test mpn_and, mpn_ior, mpn_xor, mpn_andn, mpn_iorn, mpn_xnor, mpn_nand, and\n   mpn_nior.\n\nCopyright 2011-2013 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library test suite.\n\nThe GNU MP Library test suite is free software; you can redistribute it\nand/or modify it under the terms of the GNU General Public License as\npublished by the Free Software Foundation; either version 3 of the License,\nor (at your option) any later version.\n\nThe GNU MP Library test suite is distributed in the hope that it will be\nuseful, but WITHOUT ANY WARRANTY; without even the implied warranty of\nMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General\nPublic License for more details.\n\nYou should have received a copy of the GNU General Public License along with\nthe GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */\n\n\n#include <stdlib.h>\n#include <stdio.h>\n\n/* Fake native prevalence of the tested operations, so that we actually test\n   the compiled functions, i.e., the ones which users will reach.  The inlined\n   variants will be tested through tests/mpz/logic.c.  */\n#define HAVE_NATIVE_mpn_com_n  1\n#define HAVE_NATIVE_mpn_and_n  1\n#define HAVE_NATIVE_mpn_andn_n 1\n#define HAVE_NATIVE_mpn_nand_n 1\n#define HAVE_NATIVE_mpn_ior_n  1\n#define HAVE_NATIVE_mpn_iorn_n 1\n#define HAVE_NATIVE_mpn_nior_n 1\n#define HAVE_NATIVE_mpn_xor_n  1\n#define HAVE_NATIVE_mpn_xnor_n 1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (mp_srcptr refp, mp_srcptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n, const char *funcname)\n{\n  if (mpn_cmp (refp, rp, n))\n    {\n      printf (\"ERROR in mpn_%s\\n\", funcname);\n      printf (\"a: \"); mpn_dump (ap, n);\n      printf (\"b: \"); mpn_dump (bp, n);\n      printf (\"r:   \"); mpn_dump (rp, n);\n      printf (\"ref: \"); mpn_dump (refp, n);\n      abort();\n    }\n}\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t a, b;\n  mp_ptr ap, bp, rp, refp;\n  mp_size_t max_n, n, i;\n  gmp_randstate_ptr rands;\n  long test, reps = 1000;\n  TMP_DECL;\n  TMP_MARK;\n\n  tests_start ();\n  /* TESTS_REPS (reps, argv, argc); */\n\n  mpz_inits (a, b, NULL);\n\n  rands = RANDS;\t\t/* FIXME: not used */\n\n  max_n = 100;\n\n  rp = TMP_ALLOC_LIMBS (1 + max_n * 8 / GMP_LIMB_BITS);\n  refp = TMP_ALLOC_LIMBS (1 + max_n * 8 / GMP_LIMB_BITS);\n\n  for (test = 0; test < reps; test++)\n    {\n      for (i = 1; i <= max_n; i++)\n\t{\n\t  mpz_rrandomb (a, rands, i * 8);\n\t  mpz_rrandomb (b, rands, i * 8);\n\t  mpz_setbit (a, i * 8 - 1);\n\t  mpz_setbit (b, i * 8 - 1);\n\t  ap = PTR(a);\n\t  bp = PTR(b);\n\t  n = SIZ(a);\n\n\t  refmpn_and_n (refp, ap, bp, n);\n\t  mpn_and_n (rp, ap, bp, n);\n\t  check_one (refp, rp, ap, bp, n, \"and_n\");\n\n\t  refmpn_ior_n (refp, ap, bp, n);\n\t  mpn_ior_n (rp, ap, bp, n);\n\t  check_one (refp, rp, ap, bp, n, \"ior_n\");\n\n\t  refmpn_xor_n (refp, ap, bp, n);\n\t  mpn_xor_n (rp, ap, bp, n);\n\t  check_one (refp, rp, ap, bp, n, \"xor_n\");\n\n\t  refmpn_andn_n (refp, ap, bp, n);\n\t  mpn_andn_n (rp, ap, bp, n);\n\t  check_one (refp, rp, ap, bp, n, \"andn_n\");\n\n\t  refmpn_iorn_n (refp, ap, bp, n);\n\t  mpn_iorn_n (rp, ap, bp, n);\n\t  check_one (refp, rp, ap, bp, n, \"iorn_n\");\n\n\t  refmpn_nand_n (refp, ap, bp, n);\n\t  mpn_nand_n (rp, ap, bp, n);\n\t  check_one (refp, rp, ap, bp, n, \"nand_n\");\n\n\t  refmpn_nior_n (refp, ap, bp, n);\n\t  mpn_nior_n (rp, ap, bp, n);\n\t  check_one (refp, rp, ap, bp, n, \"nior_n\");\n\n\t  refmpn_xnor_n (refp, ap, bp, n);\n\t  mpn_xnor_n (rp, ap, bp, n);\n\t  check_one (refp, rp, ap, bp, n, \"xnor_n\");\n\n\t  refmpn_com_n (refp, ap, n);\n\t  mpn_com_n (rp, ap, n);\n\t  check_one (refp, rp, ap, bp, n, \"com\");\n\t}\n    }\n\n  TMP_FREE;\n  mpz_clears (a, b, NULL);\n  tests_end ();\n  return 0;\n}\n"
  },
  {
    "path": "tests/mpn/t-lorrshift1.c",
    "content": "/* Test mpn_lshift1 mpn_rshift1 mpn_lshift2 mpn_rshift2 mpn_double mpn_half\n\n  Copyright 2008 Jason Moxham\n\n  This file is part of the MPIR Library.\n\n  The MPIR Library is free software; you can redistribute it and/or modify\n  it under the terms of the GNU Lesser General Public License as published\n  by the Free Software Foundation; either version 2.1 of the License, or (at\n  your option) any later version.\n\n  The MPIR Library is distributed in the hope that it will be useful, but\n  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n  License for more details.\n\n  You should have received a copy of the GNU Lesser General Public License\n  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n  Boston, MA 02110-1301, USA.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain (void)\n{\n  unsigned long n;\n  gmp_randstate_t rands;\n  int j, i1, k1, k2, k3;\n  mp_limb_t xp[10000], zp[10000], cp[10000], hp[10000];\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  for (i1 = 0; i1 < 2; i1++)\n    {\n      for (n = 1; n < 1000; n++)\n\t{\n\t  for (j = 1; j < 10; j++)\n\t    {\n\t      if (i1 == 0)\n\t\t{\n\t\t  mpn_randomb (xp,rands, n);\n\t\t}\n\t      else\n\t\t{\n\t\t  mpn_rrandom (xp, rands, n);\n\t\t}\n\t      k1 = mpn_lshift1 (zp, xp, n);\n\t      k2 = mpn_lshift (cp, xp, n, 1);\n\t      MPN_COPY(hp,xp,n);k3 = mpn_double(hp,n);\n\t      if (k1 != k2)\n\t\t{\n\t\t  printf (\"mpn_lshift1 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (zp, cp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_lshift1 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (k1 != k3)\n\t\t{\n\t\t  printf (\"mpn_double return wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (zp, hp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_double wrong\\n\");\n\t\t  abort ();\n\t\t}\n\n\t      k1 = mpn_rshift1 (zp, xp, n);\n\t      k2 = mpn_rshift (cp, xp, n, 1);\n\t      MPN_COPY(hp,xp,n);k3 = mpn_half(hp,n);\n\t      if (k1 != k2)\n\t\t{\n\t\t  printf (\"mpn_rshift1 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (zp, cp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_rshift1 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t     if (k1 != k3)\n\t\t{\n\t\t  printf (\"mpn_half return wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (zp, hp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_half wrong\\n\");\n\t\t  abort ();\n\t\t}\n              k1 = mpn_lshift2 (zp, xp, n);\n\t      k2 = mpn_lshift (cp, xp, n, 2);\n\t      if (k1 != k2)\n\t\t{\n\t\t  printf (\"mpn_lshift2 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (zp, cp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_lshift2 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      k1 = mpn_rshift2 (zp, xp, n);\n\t      k2 = mpn_rshift (cp, xp, n, 2);\n\t      if (k1 != k2)\n\t\t{\n\t\t  printf (\"mpn_rshift2 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (zp, cp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_rshift2 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t    }\n\t}\n    }\n\n  for (i1 = 0; i1 < 2; i1++)\n    {\n      for (n = 1; n < 1000; n++)\n\t{\n\t  for (j = 1; j < 10; j++)\n\t    {\n\t      if (i1 == 0)\n\t\t{\n\t\t  mpn_randomb (xp,rands, n);\n\t\t}\n\t      else\n\t\t{\n\t\t  mpn_rrandom (xp, rands, n);\n\t\t}\n\t      k2 = mpn_lshift (cp, xp, n, 1);\n\t      k1 = mpn_lshift1 (xp, xp, n);\n\t      if (k1 != k2)\n\t\t{\n\t\t  printf (\"mpn_lshift1 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (xp, cp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_lshift1 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t    }\n\t}\n    }\n\n  for (i1 = 0; i1 < 2; i1++)\n    {\n      for (n = 1; n < 1000; n++)\n\t{\n\t  for (j = 1; j < 10; j++)\n\t    {\n\t      if (i1 == 0)\n\t\t{\n\t\t  mpn_randomb (xp, rands,n);\n\t\t}\n\t      else\n\t\t{\n\t\t  mpn_rrandom (xp, rands, n);\n\t\t}\n\t      k2 = mpn_rshift (cp, xp, n, 1);\n\t      k1 = mpn_rshift1 (xp, xp, n);\n\t      if (k1 != k2)\n\t\t{\n\t\t  printf (\"mpn_rshift1 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (xp, cp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_rshift1 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t    }\n\t}\n    }\n  for (i1 = 0; i1 < 2; i1++)\n    {\n      for (n = 1; n < 1000; n++)\n\t{\n\t  for (j = 1; j < 10; j++)\n\t    {\n\t      if (i1 == 0)\n\t\t{\n\t\t  mpn_randomb (xp,rands, n);\n\t\t}\n\t      else\n\t\t{\n\t\t  mpn_rrandom (xp, rands ,n);\n\t\t}\n\t      k2 = mpn_lshift (cp, xp, n, 2);\n\t      k1 = mpn_lshift2 (xp, xp, n);\n\t      if (k1 != k2)\n\t\t{\n\t\t  printf (\"mpn_lshift2 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (xp, cp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_lshift2 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t    }\n\t}\n    }\n\n  for (i1 = 0; i1 < 2; i1++)\n    {\n      for (n = 1; n < 1000; n++)\n\t{\n\t  for (j = 1; j < 10; j++)\n\t    {\n\t      if (i1 == 0)\n\t\t{\n\t\t  mpn_randomb (xp, rands,n);\n\t\t}\n\t      else\n\t\t{\n\t\t  mpn_rrandom (xp, rands, n);\n\t\t}\n\t      k2 = mpn_rshift (cp, xp, n, 2);\n\t      k1 = mpn_rshift2 (xp, xp, n);\n\t      if (k1 != k2)\n\t\t{\n\t\t  printf (\"mpn_rshift2 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (xp, cp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_rshift2 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t    }\n\t}\n    }\n    \n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-matrix22.c",
    "content": "/* Tests matrix22_mul.\n\nCopyright 2008 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nstruct matrix {\n  mp_size_t alloc;\n  mp_size_t n;\n  mp_ptr e00, e01, e10, e11;\n};\n\nstatic void\nmatrix_init (struct matrix *M, mp_size_t n)\n{\n  mp_ptr p = refmpn_malloc_limbs (4*(n+1));\n  M->e00 = p; p += n+1;\n  M->e01 = p; p += n+1;\n  M->e10 = p; p += n+1;\n  M->e11 = p;\n  M->alloc = n + 1;\n  M->n = 0;\n}\n\nstatic void\nmatrix_clear (struct matrix *M)\n{\n  refmpn_free_limbs (M->e00);\n}\n\nstatic void\nmatrix_copy (struct matrix *R, const struct matrix *M)\n{\n  R->n = M->n;\n  MPN_COPY (R->e00, M->e00, M->n);\n  MPN_COPY (R->e01, M->e01, M->n);\n  MPN_COPY (R->e10, M->e10, M->n);\n  MPN_COPY (R->e11, M->e11, M->n);\n}\n\n/* Used with same size, so no need for normalization. */\nstatic int\nmatrix_equal_p (const struct matrix *A, const struct matrix *B)\n{\n  return (A->n == B->n\n\t  && mpn_cmp (A->e00, B->e00, A->n) == 0\n\t  && mpn_cmp (A->e01, B->e01, A->n) == 0\n\t  && mpn_cmp (A->e10, B->e10, A->n) == 0\n\t  && mpn_cmp (A->e11, B->e11, A->n) == 0);\n}\n\nstatic void\nmatrix_random(struct matrix *M, mp_size_t n, gmp_randstate_ptr rands)\n{\n  M->n = n;\n  mpn_random (M->e00, n);\n  mpn_random (M->e01, n);\n  mpn_random (M->e10, n);\n  mpn_random (M->e11, n);\n}\n\n#define MUL(rp, ap, an, bp, bn) do { \\\n    if (an > bn)\t\t     \\\n      mpn_mul (rp, ap, an, bp, bn);  \\\n    else\t\t\t     \\\n      mpn_mul (rp, bp, bn, ap, an);  \\\n  } while(0)\n\nstatic void\nref_matrix22_mul (struct matrix *R,\n\t\t  const struct matrix *A,\n\t\t  const struct matrix *B, mp_ptr tp)\n{\n  mp_size_t an, bn, n;\n  mp_ptr r00, r01, r10, r11, a00, a01, a10, a11, b00, b01, b10, b11;\n\n  if (A->n >= B->n)\n    {\n      r00 = R->e00; a00 = A->e00; b00 = B->e00;\n      r01 = R->e01; a01 = A->e01; b01 = B->e01;\n      r10 = R->e10; a10 = A->e10; b10 = B->e10;\n      r11 = R->e11; a11 = A->e11; b11 = B->e11;\n      an = A->n, bn = B->n;\n    }\n  else\n    {\n      /* Transpose */\n      r00 = R->e00; a00 = B->e00; b00 = A->e00;\n      r01 = R->e10; a01 = B->e10; b01 = A->e10;\n      r10 = R->e01; a10 = B->e01; b10 = A->e01;\n      r11 = R->e11; a11 = B->e11; b11 = A->e11;\n      an = B->n, bn = A->n;\n    }\n  n = an + bn;\n  R->n = n + 1;\n\n  mpn_mul (r00, a00, an, b00, bn);\n  mpn_mul (tp, a01, an, b10, bn);\n  r00[n] = mpn_add_n (r00, r00, tp, n);\n\n  mpn_mul (r01, a00, an, b01, bn);\n  mpn_mul (tp, a01, an, b11, bn);\n  r01[n] = mpn_add_n (r01, r01, tp, n);\n\n  mpn_mul (r10, a10, an, b00, bn);\n  mpn_mul (tp, a11, an, b10, bn);\n  r10[n] = mpn_add_n (r10, r10, tp, n);\n\n  mpn_mul (r11, a10, an, b01, bn);\n  mpn_mul (tp, a11, an, b11, bn);\n  r11[n] = mpn_add_n (r11, r11, tp, n);\n}\n\nstatic void\none_test (const struct matrix *A, const struct matrix *B, int i)\n{\n  struct matrix R;\n  struct matrix P;\n  mp_ptr tp;\n\n  matrix_init (&R, A->n + B->n + 1);\n  matrix_init (&P, A->n + B->n + 1);\n\n  tp = refmpn_malloc_limbs (mpn_matrix22_mul_itch (A->n, B->n));\n\n  ref_matrix22_mul (&R, A, B, tp);\n  matrix_copy (&P, A);\n  mpn_matrix22_mul (P.e00, P.e01, P.e10, P.e11, A->n,\n\t\t    B->e00, B->e01, B->e10, B->e11, B->n, tp);\n  P.n = A->n + B->n + 1;\n  if (!matrix_equal_p (&R, &P))\n    {\n      fprintf (stderr, \"ERROR in test %d\\n\", i);\n      gmp_fprintf (stderr, \"A = (%Nx, %Nx\\n      %Nx, %Nx)\\n\"\n\t\t   \"B = (%Nx, %Nx\\n      %Nx, %Nx)\\n\"\n\t\t   \"R = (%Nx, %Nx (expected)\\n      %Nx, %Nx)\\n\"\n\t\t   \"P = (%Nx, %Nx (incorrect)\\n      %Nx, %Nx)\\n\",\n\t\t   A->e00, A->n, A->e01, A->n, A->e10, A->n, A->e11, A->n,\n\t\t   B->e00, B->n, B->e01, B->n, B->e10, B->n, B->e11, B->n,\n\t\t   R.e00, R.n, R.e01, R.n, R.e10, R.n, R.e11, R.n,\n\t\t   P.e00, P.n, P.e01, P.n, P.e10, P.n, P.e11, P.n);\n      abort();\n    }\n  refmpn_free_limbs (tp);\n  matrix_clear (&R);\n  matrix_clear (&P);\n}\n\n#define MAX_SIZE (2+2*MATRIX22_STRASSEN_THRESHOLD)\n\nint\nmain (int argc, char **argv)\n{\n  struct matrix A;\n  struct matrix B;\n\n  gmp_randstate_ptr rands;\n  mpz_t bs;\n  int i;\n\n  tests_start ();\n  rands = RANDS;\n\n  matrix_init (&A, MAX_SIZE);\n  matrix_init (&B, MAX_SIZE);\n  mpz_init (bs);\n\n  for (i = 0; i < 1000; i++)\n    {\n      mp_size_t an, bn;\n      mpz_urandomb (bs, rands, 32);\n      an = 1 + mpz_get_ui (bs) % MAX_SIZE;\n      mpz_urandomb (bs, rands, 32);\n      bn = 1 + mpz_get_ui (bs) % MAX_SIZE;\n\n      matrix_random (&A, an, rands);\n      matrix_random (&B, bn, rands);\n\n      one_test (&A, &B, i);\n    }\n  mpz_clear (bs);\n  matrix_clear (&A);\n  matrix_clear (&B);\n\n  tests_end ();\n  return 0;\n}\n"
  },
  {
    "path": "tests/mpn/t-mp_bases.c",
    "content": "/* Check mp_bases values.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n\nint\nmain (int argc, char *argv[])\n{\n  mp_limb_t  want_bb, want_bb_inv;\n  int        base, want_chars_per_limb;\n\n  want_chars_per_limb = refmpn_chars_per_limb (10);\n  if (MP_BASES_CHARS_PER_LIMB_10 != want_chars_per_limb)\n    {\n      printf (\"MP_BASES_CHARS_PER_LIMB_10 wrong\\n\");\n      abort ();\n    }\n\n  want_bb = refmpn_big_base (10);\n  if (MP_BASES_BIG_BASE_10 != want_bb)\n    {\n      printf (\"MP_BASES_BIG_BASE_10 wrong\\n\");\n      abort ();\n    }\n\n  want_bb_inv = refmpn_invert_limb\n    (want_bb << refmpn_count_leading_zeros (want_bb));\n  if (MP_BASES_BIG_BASE_INVERTED_10 != want_bb_inv)\n    {\n      printf (\"MP_BASES_BIG_BASE_INVERTED_10 wrong\\n\");\n      abort ();\n    }\n\n  if (MP_BASES_NORMALIZATION_STEPS_10\n      != refmpn_count_leading_zeros (MP_BASES_BIG_BASE_10))\n    {\n      printf (\"MP_BASES_NORMALIZATION_STEPS_10 wrong\\n\");\n      abort ();\n    }\n\n  for (base = 2; base < numberof (mp_bases); base++)\n    {\n      want_chars_per_limb = refmpn_chars_per_limb (base);\n      if (mp_bases[base].chars_per_limb != want_chars_per_limb)\n        {\n          printf (\"mp_bases[%d].chars_per_limb wrong\\n\", base);\n          printf (\"  got  %d\\n\", mp_bases[base].chars_per_limb);\n          printf (\"  want %d\\n\", want_chars_per_limb);\n          abort ();\n        }\n\n      if (POW2_P (base))\n        {\n          want_bb = refmpn_count_trailing_zeros ((mp_limb_t) base);\n          if (mp_bases[base].big_base != want_bb)\n            {\n              printf (\"mp_bases[%d].big_base (log2 of base) wrong\\n\", base);\n              abort ();\n            }\n        }\n      else\n        {\n          want_bb = refmpn_big_base (base);\n          if (mp_bases[base].big_base != want_bb)\n            {\n              printf (\"mp_bases[%d].big_base wrong\\n\", base);\n              abort ();\n            }\n\n#if USE_PREINV_DIVREM_1\n          want_bb_inv = refmpn_invert_limb\n            (want_bb << refmpn_count_leading_zeros (want_bb));\n          if (mp_bases[base].big_base_inverted != want_bb_inv)\n            {\n              printf (\"mp_bases[%d].big_base_inverted wrong\\n\", base);\n              abort ();\n            }\n#endif\n        }\n    }\n\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-mullow_basecase.c",
    "content": "/* Test mpn_mullow_basecase\n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define DISP(xp,xn)  do{int __t;printf(\"%s=\",#xp);for(__t=(xn)-1;__t>=0;__t--)printf(\"%lX \",(xp)[__t]);printf(\"\\n\");}while(0)\n\nint\nmain (void)\n{\n  unsigned long yn, xn, n, b, zn, c;\n  mp_limb_t xp[1000], yp[1000], mp[1000], lp[1000];\n  gmp_randstate_t rands;\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  for (xn = 1; xn <= 100; xn++)\n    {\n      for (yn = 1; yn <= xn; yn++)\n\t{\n\t  for (n = xn; n <= xn + yn; n++)\n\t    {\n\t      mpn_randomb (xp, rands ,xn);\n\t      mpn_randomb (yp, rands, yn);\n\t      mpn_mul (mp, xp, xn, yp, yn);\n\t      mpn_mullow_basecase (lp, xp, xn, yp, yn, n);\n\t      if (mpn_cmp (mp, lp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_mullow_basecase error %ld\\n\", n);\n\t\t  abort ();\n\t\t}\n\t    }\n\t}\n    }\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-mullowhigh.c",
    "content": "/* Test mpn_mullow_n mpn_mulhigh_n\n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define DISP(xp,xn)  do{int __t;printf(\"%s=\",#xp);for(__t=(xn)-1;__t>=0;__t--)printf(\"%lX \",(xp)[__t]);printf(\"\\n\");}while(0)\n\nint\nmain (void)\n{\n  unsigned long bp, xn, n, b, zn, c;\n  mp_limb_t xp[1000], yp[1000], mp[1000], lp[1000], hp[1000];\n  gmp_randstate_t rands;\n  int qpn, j, k, i, l, i1, k1, j1, i2, k2, j2;\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  for (n = 1; n < 100; n++)\n    {\n      for (c = 0; c < 10; c++)\n\t{\n\t  mpn_randomb (xp, rands, n);\n\t  mpn_randomb (yp, rands, n);\n\t  mpn_mul_n (mp, xp, yp, n);\n\t  mpn_mullow_n (lp, xp, yp, n);\n\t  mpn_mulhigh_n (hp, xp, yp, n);\n\t  if (mpn_cmp (mp, lp, n) != 0)\n\t    {\n\t      printf (\"mpn_mullow_n error %ld\\n\", n);\n\t      abort ();\n\t    }\n\t  if (mpn_cmp (mp + n, hp + n, n) != 0)\n\t    {\n\t      printf (\"mpn_mulhigh_n error %ld\\n\", n);\n\t      abort ();\n\t    }\n\t}\n    }\n\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-mulmid.c",
    "content": "/* middle product test code\n\nCopyright (C) 2009, David Harvey\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n*/\n\n#include <math.h>\n#include <stdlib.h>\n#include <stdio.h>\n#include <string.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\nint compare_ul(const void* a, const void* b)\n{\n  unsigned long aa = * (unsigned long*) a;\n  unsigned long bb = * (unsigned long*) b;\n  if (aa < bb)\n    return -1;\n  if (aa > bb)\n    return 1;\n  return 0;\n}\n\n#define SIZE 400\n#define REPEAT 500\n#define SAFETY 16\n\nint\nmain (int argc, char* argv[])\n{\n  mp_limb_t up[2*SIZE];\n  mp_limb_t vp[SIZE];\n  mp_limb_t buf[SIZE + 2*SAFETY + 3];\n  mp_ptr rp = buf + SAFETY;\n  mp_limb_t ref[SIZE + 3];\n  mp_limb_t scratch[10*SIZE + 100];\n  mp_size_t un, vn;\n  gmp_randstate_t rands;\n  \n  const mp_limb_t sentry = 0x012345678;\n  \n  int i, j;\n\n  tests_start();\n  gmp_randinit_default(rands);\n  \n  for (i = 0; i < REPEAT; i++)\n    {\n      vn = (random() % SIZE) + 1;\n      un = (random() % (SIZE + 1 - vn)) + vn;\n\n      mpn_rrandom (up, rands, un);\n      mpn_rrandom (vp, rands, vn);\n\n      refmpn_mulmid (ref, up, un, vp, vn);\n\n      for (j = 0; j < SAFETY; j++)\n        rp[-j - 1] = rp[un - vn + 3 + j] = sentry;\n      mpn_mulmid_basecase (rp, up, un, vp, vn);\n\n      for (j = 0; j < SAFETY; j++)\n        if (rp[-j - 1] != sentry || rp[un - vn + 3 + j] != sentry)\n\t\t{\n\t\t  printf (\"sentry overwritten\\n\");\n\t\t  abort();\n\t\t}\n      if (mpn_cmp (rp, ref, un - vn + 3) != 0)\n        {\n          printf (\"failed:\\n\");\n          printf (\"un = %lu, vn = %lu\\n\\n\", un, vn);\n          gmp_printf (\" up: %Nx\\n\\n\", up, un);\n          gmp_printf (\" vp: %Nx\\n\\n\", vp, vn);\n          gmp_printf (\" rp: %Nx\\n\\n\", rp, un - vn + 3);\n          gmp_printf (\"ref: %Nx\\n\\n\", ref, un - vn + 3);\n          abort ();\n        }\n    }\n\n  for (i = 0; i < REPEAT; i++)\n    {\n      vn = (random() % SIZE) + 1;\n      un = (random() % (SIZE + 1 - vn)) + vn;\n\n      mpn_rrandom (up, rands, un);\n      mpn_rrandom (vp, rands, vn);\n\n      refmpn_mulmid (ref, up, un, vp, vn);\n\n      for (j = 0; j < SAFETY; j++)\n        rp[-j - 1] = rp[un - vn + 3 + j] = sentry;\n      mpn_mulmid (rp, up, un, vp, vn);\n\n      for (j = 0; j < SAFETY; j++)\n        if (rp[-j - 1] != sentry || rp[un - vn + 3 + j] != sentry)\n\t\t{\n\t\t  printf (\"sentry overwritten\\n\");\n\t\t  abort();\n\t\t}\n      if (mpn_cmp (rp, ref, un - vn + 3) != 0)\n        {\n          printf (\"failed:\\n\");\n          printf (\"un = %lu, vn = %lu\\n\\n\", un, vn);\n          gmp_printf (\" up: %Nx\\n\\n\", up, un);\n          gmp_printf (\" vp: %Nx\\n\\n\", vp, vn);\n          gmp_printf (\" rp: %Nx\\n\\n\", rp, un - vn + 3);\n          gmp_printf (\"ref: %Nx\\n\\n\", ref, un - vn + 3);\n          abort ();\n        }\n    }\n\n  for (i = 0; i < REPEAT; i++)\n    {\n      vn = (random() % (SIZE - 4)) + 4;\n      un = 2*vn - 1;\n\n      mpn_rrandom (up, rands, un);\n      mpn_rrandom (vp, rands, vn);\n\n      refmpn_mulmid (ref, up, un, vp, vn);\n\n      for (j = 0; j < SAFETY; j++)\n        rp[-j - 1] = rp[un - vn + 3 + j] = sentry;\n      mpn_toom42_mulmid (rp, up, vp, vn, scratch);\n\n      for (j = 0; j < SAFETY; j++)\n        if (rp[-j - 1] != sentry || rp[un - vn + 3 + j] != sentry)\n\t\t{\n\t\t  printf (\"sentry overwritten\\n\");\n\t\t  abort();\n\t\t}\n      if (mpn_cmp (rp, ref, un - vn + 3) != 0)\n        {\n          printf (\"failed:\\n\");\n          printf (\"un = %lu, vn = %lu\\n\\n\", un, vn);\n          gmp_printf (\" up: %Nx\\n\\n\", up, un);\n          gmp_printf (\" vp: %Nx\\n\\n\", vp, vn);\n          gmp_printf (\" rp: %Nx\\n\\n\", rp, un - vn + 3);\n          gmp_printf (\"ref: %Nx\\n\\n\", ref, un - vn + 3);\n          abort ();\n        }\n    }\n  gmp_randclear(rands);\n  tests_end();\n  return 0;\n}\n"
  },
  {
    "path": "tests/mpn/t-mulmod_2expm1.c",
    "content": "\n/* Test mpn_mulmod_2expm1\n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define DISP(xp,xn)                        \\\n   do {                                    \\\n      int __t;                             \\\n      printf(\"%s=\", #xp);                  \\\n      for(__t = (xn) - 1; __t >= 0; __t--) \\\n         printf(\"%lX \", (xp)[__t]);        \\\n      printf(\"\\n\");                        \\\n   } while(0)\n\nint\nmain (void)\n{\n    unsigned long bp, xn, n, b, zn, c;\n    gmp_randstate_t rands;\n    int qpn, j, k, i, l, i1, k1, j1, i2, k2, j2;\n    mp_limb_t xp[10000], dp[10000], qp[10000], yp[10000], rp[10000], zp[10000],\n        tp[10000];\n    tests_start ();\n    gmp_randinit_default (rands);\n\n    for (b = 1; b < 600; b++)\n    {\n        xn = BITS_TO_LIMBS (b);\n        k = xn * GMP_NUMB_BITS - b;\n        MPN_ZERO (dp, xn);\n        mpn_com_n (dp, dp, xn);\n        dp[xn - 1] &= GMP_NUMB_MASK >> k;       /* dp is 2^b-1 */\n        for (c = 0; c < 5 * b; c++)\n        {\n            mpn_rrandom (xp, rands, xn);\n            mpn_rrandom (yp, rands, xn);\n            xp[xn - 1] &= GMP_NUMB_MASK >> k;\n            yp[xn - 1] &= GMP_NUMB_MASK >> k;\n            mpn_mul_n (zp, xp, yp, xn);\n            zn = xn * 2;\n            MPN_NORMALIZE (zp, zn);\n            if (zn >= xn)\n            {\n                mpn_tdiv_qr (qp, rp, 0, zp, zn, dp, xn);\n            }\n            else\n            {\n                MPN_COPY (rp, zp, xn);\n            }\n            mpn_mulmod_2expm1 (tp, xp, yp, b, qp);\n            if (mpn_cmp (tp, dp, xn) == 0)\n                MPN_ZERO (tp, xn);      /* fully reduce */\n            if (mpn_cmp (tp, rp, xn) != 0)\n            {\n                printf (\"mpn_mulmod_2expm1 error %ld\\n\", b);\n                abort ();\n            }\n        }\n    }\n\n    for (b = MULMOD_2EXPM1_THRESHOLD * GMP_NUMB_BITS;\n         b < 2 * MULMOD_2EXPM1_THRESHOLD * GMP_NUMB_BITS; b += 2)\n    {\n        xn = BITS_TO_LIMBS (b);\n        k = xn * GMP_NUMB_BITS - b;\n        MPN_ZERO (dp, xn);\n        mpn_com_n (dp, dp, xn);\n        dp[xn - 1] &= GMP_NUMB_MASK >> k;       /* dp is 2^b-1 */\n        for (c = 0; c < 5; c++)\n        {\n            mpn_rrandom (xp, rands, xn);\n            mpn_rrandom (yp, rands, xn);\n            xp[xn - 1] &= GMP_NUMB_MASK >> k;\n            yp[xn - 1] &= GMP_NUMB_MASK >> k;\n            mpn_mul_n (zp, xp, yp, xn);\n            zn = xn * 2;\n            MPN_NORMALIZE (zp, zn);\n            if (zn >= xn)\n            {\n                mpn_tdiv_qr (qp, rp, 0, zp, zn, dp, xn);\n            }\n            else\n            {\n                MPN_COPY (rp, zp, xn);\n            }\n            mpn_mulmod_2expm1 (tp, xp, yp, b, qp);\n            if (mpn_cmp (tp, dp, xn) == 0)\n                MPN_ZERO (tp, xn);      /* fully reduce */\n            if (mpn_cmp (tp, rp, xn) != 0)\n            {\n                printf (\"mpn_mulmod_2expm1 error %ld\\n\", b);\n                abort ();\n            }\n        }\n    }\n\n    gmp_randclear (rands);\n    tests_end ();\n    exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-mulmod_2expp1.c",
    "content": "\n/* Test mpn_mulmod_2expp1_basecase\n\nCopyright 2009 Jason Moxham\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published\nby the Free Software Foundation; either version 2.1 of the License, or (at\nyour option) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write\nto the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\nBoston, MA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define DISP(xp,xn)                         \\\n   do {                                     \\\n      int __t;                              \\\n      printf(\"%s=\", #xp);                   \\\n      for (__t = (xn) - 1; __t >= 0; __t--) \\\n         printf(\"%ld \", (xp)[__t]);         \\\n      printf(\"\\n\");                         \\\n   } while(0)\n\nint\nmain (void)\n{\n    unsigned long bp, xn, n, yn, b, zn, c, dn;\n    gmp_randstate_t rands;\n    int qpn, j, k, i, l, i1, k1, j1, i2, k2, j2, cc;\n    mp_limb_t xp[10000], dp[10000], qp[10000], yp[10000], rp[10000], zp[10000],\n        tp[10000], tb;\n    tests_start ();\n    gmp_randinit_default (rands);\n\n    b = 1;\n    tb = 1;\n    tb <<= b;\n    for (; b < 600; b++, tb *= 2)\n    {\n        xn = BITS_TO_LIMBS (b);\n        k = xn * GMP_NUMB_BITS - b;\n        if (tb == 0 || tb > GMP_NUMB_MASK)\n            tb = 1;\n        MPN_ZERO (dp, xn);\n        mpn_com_n (dp, dp, xn);\n        dp[xn - 1] &= GMP_NUMB_MASK >> k;       /* dp is 2^b-1 */\n        dn = xn;\n        dp[xn] = mpn_add_1 (dp, dp, xn, 2);\n        if (dp[dn] != 0)\n            dn++;               /* dp is 2^b+1 */\n        for (c = 0; c < 20; c++)\n        {\n            mpn_rrandom (xp, rands, xn);\n            mpn_rrandom (yp, rands, xn);\n            xp[xn - 1] &= GMP_NUMB_MASK >> k;\n            yp[xn - 1] &= GMP_NUMB_MASK >> k;\n            ASSERT_MPN (xp, xn);\n            ASSERT_MPN (yp, xn);\n            ASSERT_MPN (zp, zn);\n            ASSERT_MPN (dp, dn);\n            mpn_mul_n (zp, xp, yp, xn);\n            zn = xn * 2;\n            MPN_NORMALIZE (zp, zn);\n            if (zn >= dn)\n            {\n                mpn_tdiv_qr (qp, rp, 0, zp, zn, dp, dn);\n            }\n            else\n            {\n                MPN_COPY (rp, zp, dn);\n            }\n            cc = tp[xn] = mpn_mulmod_2expp1_basecase (tp, xp, yp, 0, b, qp);\n            if (cc != 0 && dn == xn)\n            {\n                tp[xn - 1] |= tb;\n            }\n            if (mpn_cmp (tp, rp, dn) != 0)\n            {\n                printf (\"mpn_mulmod_2expp1_basecase error %ld\\n\", b);\n                abort ();\n            }\n        }\n    }\n\n    b = 1;\n    tb = 1;\n    tb <<= b;\n    for (; b < 600; b++, tb *= 2)\n    {\n        xn = BITS_TO_LIMBS (b);\n        k = xn * GMP_NUMB_BITS - b;\n        if (tb == 0 || tb > GMP_NUMB_MASK)\n            tb = 1;\n        MPN_ZERO (dp, xn);\n        mpn_com_n (dp, dp, xn);\n        dp[xn - 1] &= GMP_NUMB_MASK >> k;       /* dp is 2^b-1 */\n        dn = xn;\n        dp[xn] = mpn_add_1 (dp, dp, xn, 2);\n        if (dp[dn] != 0)\n            dn++;               /* dp is 2^b+1 */\n        for (c = 0; c < 20; c++)\n        {\n            mpn_rrandom (xp, rands, xn);\n            MPN_ZERO (yp, xn);  /* set yp to 2^b */\n            xp[xn - 1] &= GMP_NUMB_MASK >> k;\n            yp[xn - 1] &= GMP_NUMB_MASK >> k;\n            yn = xn;\n            if (tb == 1)\n                yn++;\n            yp[yn - 1] = tb;\n            ASSERT_MPN (xp, xn);\n            ASSERT_MPN (yp, yn);\n            ASSERT_MPN (zp, zn);\n            ASSERT_MPN (dp, dn);\n            mpn_mul (zp, yp, yn, xp, xn);\n            zn = xn * 2;\n            MPN_NORMALIZE (zp, zn);\n            MPN_ZERO (yp, xn);  /* set yp to 2^b */\n            if (zn >= dn)\n            {\n                mpn_tdiv_qr (qp, rp, 0, zp, zn, dp, dn);\n            }\n            else\n            {\n                MPN_COPY (rp, zp, dn);\n            }\n            cc = tp[xn] = mpn_mulmod_2expp1_basecase (tp, xp, yp, 1, b, qp);\n            if (cc != 0 && dn == xn)\n            {\n                tp[xn - 1] |= tb;\n            }\n            if (mpn_cmp (tp, rp, dn) != 0)\n            {\n                printf (\"mpn_mulmod_2expp1_basecase error %ld\\n\", b);\n                abort ();\n            }\n        }\n    }\n\n    b = 1;\n    tb = 1;\n    tb <<= b;\n    for (; b < 600; b++, tb *= 2)\n    {\n        xn = BITS_TO_LIMBS (b);\n        k = xn * GMP_NUMB_BITS - b;\n        if (tb == 0 || tb > GMP_NUMB_MASK)\n            tb = 1;\n        MPN_ZERO (dp, xn);\n        mpn_com_n (dp, dp, xn);\n        dp[xn - 1] &= GMP_NUMB_MASK >> k;       /* dp is 2^b-1 */\n        dn = xn;\n        dp[xn] = mpn_add_1 (dp, dp, xn, 2);\n        if (dp[dn] != 0)\n            dn++;               /* dp is 2^b+1 */\n        for (c = 0; c < 20; c++)\n        {\n            mpn_rrandom (xp, rands, xn);\n            MPN_ZERO (yp, xn);  /* set yp to 2^b */\n            xp[xn - 1] &= GMP_NUMB_MASK >> k;\n            yp[xn - 1] &= GMP_NUMB_MASK >> k;\n            yn = xn;\n            if (tb == 1)\n                yn++;\n            yp[yn - 1] = tb;\n            ASSERT_MPN (xp, xn);\n            ASSERT_MPN (yp, yn);\n            ASSERT_MPN (zp, zn);\n            ASSERT_MPN (dp, dn);\n            mpn_mul (zp, yp, yn, xp, xn);\n            zn = xn * 2;\n            MPN_NORMALIZE (zp, zn);\n            MPN_ZERO (yp, xn);  /* set yp to 2^b */\n            if (zn >= dn)\n            {\n                mpn_tdiv_qr (qp, rp, 0, zp, zn, dp, dn);\n            }\n            else\n            {\n                MPN_COPY (rp, zp, dn);\n            }\n            cc = tp[xn] = mpn_mulmod_2expp1_basecase (tp, yp, xp, 2, b, qp);\n            if (cc != 0 && dn == xn)\n            {\n                tp[xn - 1] |= tb;\n            }\n            if (mpn_cmp (tp, rp, dn) != 0)\n            {\n                printf (\"mpn_mulmod_2expp1_basecase error %ld\\n\", b);\n                abort ();\n            }\n        }\n    }\n\n    rp[0] = 1;\n    MPN_ZERO (rp + 1, 1000);\n    b = 1;\n    tb = 1;\n    tb <<= b;\n    for (; b < 600; b++, tb *= 2)\n    {\n        xn = BITS_TO_LIMBS (b);\n        k = xn * GMP_NUMB_BITS - b;\n        if (tb == 0 || tb > GMP_NUMB_MASK)\n            tb = 1;\n        MPN_ZERO (dp, xn);\n        mpn_com_n (dp, dp, xn);\n        dp[xn - 1] &= GMP_NUMB_MASK >> k;       /* dp is 2^b-1 */\n        dn = xn;\n        dp[xn] = mpn_add_1 (dp, dp, xn, 2);\n        if (dp[dn] != 0)\n            dn++;               /* dp is 2^b+1 */\n        for (c = 0; c < 1; c++)\n        {\n            MPN_ZERO (xp, xn);\n            MPN_ZERO (yp, xn);  /* set xp,yp to 2^b */\n            xp[xn - 1] &= GMP_NUMB_MASK >> k;\n            yp[xn - 1] &= GMP_NUMB_MASK >> k;\n            cc = tp[xn] = mpn_mulmod_2expp1_basecase (tp, yp, xp, 3, b, qp);\n            if (cc != 0 && dn == xn)\n            {\n                tp[xn - 1] |= tb;\n            }\n            if (mpn_cmp (tp, rp, dn) != 0)\n            {\n                printf (\"mpn_mulmod_2expp1_basecase error %ld\\n\", b);\n                abort ();\n            }\n        }\n    }\n    gmp_randclear (rands);\n    tests_end ();\n    exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-neg.c",
    "content": "/* Test mpn_neg_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain (void)\n{\n  unsigned long n;\n  gmp_randstate_t rands;\n  int j, k, i, l, i1, k1, j1, i2, k2, j2, d;\n  mp_limb_t xp[10000], yp[10000], zp[10000], cp[10000];\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  MPN_ZERO (zp, 10000);\n  for (i1 = 0; i1 < 2; i1++)\n    {\n      for (n = 1; n < 1000; n++)\n\t{\n\t  if (mpn_neg_n (yp, zp, n) != 0)\n\t    {\n\t      printf (\"mpn_neg_n wrong\\n\");\n\t      abort ();\n\t    }\n\t  if (mpn_cmp (yp, zp, n) != 0)\n\t    {\n\t      printf (\"mpn_neg_n wrong\\n\");\n\t      abort ();\n\t    }\n\t  for (j = 1; j < 10; j++)\n\t    {\n\t      if (i1 == 0)\n\t\t{\n\t\t  mpn_randomb (xp,rands, n);\n\t\t}\n\t      else\n\t\t{\n\t\t  mpn_rrandom (xp, rands,n);\n\t\t}\n\t      k1 = mpn_sub_n (cp, zp, xp, n);\n\t      k2 = mpn_neg_n (yp, xp, n);\n\t      if (k1 != k2)\n\t\t{\n\t\t  printf (\"mpn_neg_n wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (yp, cp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_neg_n wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t    }\n\t}\n    }\n  for (i1 = 0; i1 < 2; i1++)\n    {\n      for (n = 1; n < 1000; n++)\n\t{\n\t  MPN_COPY (yp, zp, n);\n\t  if (mpn_neg_n (yp, yp, n) != 0)\n\t    {\n\t      printf (\"mpn_neg_n wrong\\n\");\n\t      abort ();\n\t    }\n\t  if (mpn_cmp (yp, zp, n) != 0)\n\t    {\n\t      printf (\"mpn_neg_n wrong\\n\");\n\t      abort ();\n\t    }\n\t  for (j = 1; j < 10; j++)\n\t    {\n\t      if (i1 == 0)\n\t\t{\n\t\t  mpn_randomb (xp, rands,n);\n\t\t}\n\t      else\n\t\t{\n\t\t  mpn_rrandom (xp, rands,n);\n\t\t}\n\t      k1 = mpn_sub_n (cp, zp, xp, n);\n\t      k2 = mpn_neg_n (xp, xp, n);\n\t      if (k1 != k2)\n\t\t{\n\t\t  printf (\"mpn_neg_n wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (xp, cp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_neg_n wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t    }\n\t}\n    }\n\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-perfsqr.c",
    "content": "/* Test mpn_perfect_square_p data.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define PERFSQR_MOD_MASK   ((CNST_LIMB(1) << PERFSQR_MOD_BITS) - 1)\n\nvoid\ncheck_mod_2 (mp_limb_t d, mp_limb_t inv, mp_limb_t got_hi, mp_limb_t got_lo)\n{\n  int        want[2*GMP_LIMB_BITS], got;\n  unsigned   r, idx;\n  mp_limb_t  q;\n\n  ASSERT_ALWAYS (d <= numberof (want));\n  ASSERT_ALWAYS (((inv * d) & PERFSQR_MOD_MASK) == 1);\n  ASSERT_ALWAYS (MP_LIMB_T_MAX / d >= PERFSQR_MOD_MASK);\n\n  /* the squares mod d */\n  for (r = 0; r < d; r++)\n    want[r] = 0;\n  for (r = 0; r < d; r++)\n    want[(r*r)%d] = 1;\n\n  /* for each remainder mod d, expect the table data to correctly identify\n     it as a residue or non-residue */\n  for (r = 0; r < d; r++)\n    {\n      /* as per PERFSQR_MOD_IDX */\n      q = ((r) * (inv)) & PERFSQR_MOD_MASK;\n      idx = (q * (d)) >> PERFSQR_MOD_BITS;\n\n      if (idx >= GMP_LIMB_BITS)\n        got = (got_hi >> (idx - GMP_LIMB_BITS)) & 1;\n      else\n        got = (got_lo >> idx) & 1;\n\n      if (got != want[r])\n        {\n          printf (\"Wrong generated data\\n\");\n          printf (\"  d=%u\\n\", (unsigned) d);\n          printf (\"  r=%u\\n\", r);\n          printf (\"  idx=%u\\n\", idx);\n          printf (\"  got  %d\\n\", got);\n          printf (\"  want %d\\n\", want[r]);\n          abort ();\n        }\n    }\n}\n\n/* Check the generated data in perfsqr.h. */\nvoid\ncheck_mod (void)\n{\n#define PERFSQR_MOD_34(r, up, usize)       { r = 0; } /* so r isn't unused */\n#define PERFSQR_MOD_PP(r, up, usize)       { r = 0; }\n#define PERFSQR_MOD_1(r, d, inv, mask)     check_mod_2 (d, inv, CNST_LIMB(0), mask)\n#define PERFSQR_MOD_2(r, d, inv, mhi, mlo) check_mod_2 (d, inv, mhi, mlo)\n\n  PERFSQR_MOD_TEST (dummy, dummy);\n}\n\n/* Check PERFSQR_PP, if in use. */\nvoid\ncheck_pp (void)\n{\n#ifdef PERFSQR_PP\n  ASSERT_ALWAYS_LIMB (PERFSQR_PP);\n  ASSERT_ALWAYS_LIMB (PERFSQR_PP_NORM);\n  ASSERT_ALWAYS_LIMB (PERFSQR_PP_INVERTED);\n\n  /* preinv stuff only for nails==0 */\n  if (GMP_NAIL_BITS == 0)\n    {\n      ASSERT_ALWAYS (PERFSQR_PP_NORM\n                     == PERFSQR_PP << refmpn_count_leading_zeros (PERFSQR_PP));\n      ASSERT_ALWAYS (PERFSQR_PP_INVERTED\n                     == refmpn_invert_limb (PERFSQR_PP_NORM));\n    }\n#endif\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_mod ();\n  check_pp ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-redc_1.c",
    "content": "\n/* Test mpn_redc_1\n\n  Copyright 2009 Jason Moxham\n\n  This file is part of the MPIR Library.\n\n  The MPIR Library is free software; you can redistribute it and/or modify\n  it under the terms of the GNU Lesser General Public License as published\n  by the Free Software Foundation; either version 2.1 of the License, or (at\n  your option) any later version.\n\n  The MPIR Library is distributed in the hope that it will be useful, but\n  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n  License for more details.\n\n  You should have received a copy of the GNU Lesser General Public License\n  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n  Boston, MA 02110-1301, USA.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n/* Set cp[] <- tp[]/R^n mod mp[].  Clobber tp[].\n   mp[] is n limbs; tp[] is 2n limbs.  */\nvoid\nref_redc_1 (mp_ptr cp, mp_ptr tp, mp_srcptr mp, mp_size_t n, mp_limb_t Nprim)\n{\n    mp_limb_t cy;\n    mp_limb_t q;\n    mp_size_t j;\n\n    ASSERT_MPN (tp, 2 * n);\n    for (j = 0; j < n; j++)\n    {\n        q = (tp[0] * Nprim) & GMP_NUMB_MASK;\n        tp[0] = mpn_addmul_1 (tp, mp, n, q);\n        tp++;\n    }\n    cy = mpn_add_n (cp, tp, tp - n, n);\n    if (cy != 0)\n        mpn_sub_n (cp, cp, mp, n);\n}\n\nint\nmain (void)\n{\n    gmp_randstate_t rands;\n    int j, n;\n    mp_limb_t cp1[1000], cp2[1000], mp[1000], tp1[1000], tp2[1000], inv;\n\n    tests_start ();\n    gmp_randinit_default (rands);\n\n    for (n = 1; n < 100; n++)\n    {\n        for (j = 1; j < 100; j++)\n        {\n            mpn_randomb (mp, rands, n);\n            mp[0] |= 1;\n            modlimb_invert (inv, mp[0]);\n            inv = -inv;\n            mpn_randomb (tp1, rands, 2 * n);\n            MPN_COPY (tp2, tp1, 2 * n);\n            ref_redc_1 (cp1, tp1, mp, n, inv);\n            mpn_redc_1 (cp2, tp2, mp, n, inv);\n            if (mpn_cmp (cp1, cp2, n) != 0)\n            {\n                printf (\"mpn_redc_1 error %d\\n\", n);\n                abort ();\n            }\n            if (n != 1 && mpn_cmp (tp1, tp2, 2 * n) != 0)\n            {\n                printf (\"mpn_redc_1 possible error\\n\");\n                abort ();\n            }\n            /* we dont require the above to be the same but it could be a useful test */\n        }\n    }\n    for (n = 1; n < 100; n++)\n    {\n        for (j = 1; j < 100; j++)\n        {\n            mpn_rrandom (mp, rands, n);\n            mp[0] |= 1;\n            modlimb_invert (inv, mp[0]);\n            inv = -inv;\n            mpn_rrandom (tp1, rands, 2 * n);\n            MPN_COPY (tp2, tp1, 2 * n);\n            ref_redc_1 (cp1, tp1, mp, n, inv);\n            mpn_redc_1 (cp2, tp2, mp, n, inv);\n            if (mpn_cmp (cp1, cp2, n) != 0)\n            {\n                printf (\"mpn_redc_1 error %d\\n\", n);\n                abort ();\n            }\n            if (n != 1 && mpn_cmp (tp1, tp2, 2 * n) != 0)\n            {\n                printf (\"mpn_redc_1 possible error\\n\");\n                abort ();\n            }\n            /* we dont require the above to be the same but it could be a useful test */\n        }\n    }\n\n    gmp_randclear (rands);\n    tests_end ();\n    exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-sb_bdiv_q.c",
    "content": "/* Test mpn_sb_bdiv_q.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009, 2010 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 40\n#define ITERS 10000\n   \n/* Check schoolboy hensel division routine. */\nvoid\ncheck_sb_bdiv_q (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[3*MAX_LIMBS];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t wp[2];\n   mp_limb_t dip, cy, hi, lo;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 2)) + 3;\n      nn = (random() % MAX_LIMBS) + dn;\n\n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      \n      dp[0] |= 1;\n     \n      MPN_COPY(np2, np, nn);\n      \n      modlimb_invert(dip, dp[0]);\n \n      mpn_sb_bdiv_q(qp, wp, np, nn, dp, dn, dip);\n      \n      if (nn >= dn) mpn_mul(rp, qp, nn, dp, dn);\n      else mpn_mul(rp, dp, dn, qp, nn);\n\n      if (mpn_cmp(rp, np2, nn) != 0)\n      { \n         printf(\"failed: quotient wrong!\\n\");\n         printf (\"nn = %lu, dn = %lu\\n\\n\", nn, dn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, nn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, nn);\n         abort ();\n      }\n\n      MPN_ZERO(rp, 3*MAX_LIMBS);\n      hi = lo = 0;\n      for (j = 0; j < dn; j++)\n      {\n         cy = mpn_addmul_1(rp + j, qp, nn - j, dp[j]);\n         add_ssaaaa(hi, lo, hi, lo, 0, cy);\n      } \n      if ((hi != wp[1]) || (lo != wp[0]))\n      {\n         printf(\"failed: wp wrong!\\n\");\n         printf(\"wp = %lx %lx, wp2 = %lx %lx\\n\", wp[1], wp[0], hi, lo);\n         abort();\n      } \n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_sb_bdiv_q ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-sb_bdiv_qr.c",
    "content": "/* Test mpn_sb_bdiv_qr.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009, 2010 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 40\n#define ITERS 10000\n   \n/* Check schoolboy hensel division routine. */\nvoid\ncheck_sb_bdiv_qr (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS];\n   mp_limb_t dp[2*MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t dip, cy1, cy2;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 2)) + 3;\n      nn = (random() % (MAX_LIMBS - 1)) + dn + 1;\n      qn = nn - dn;\n\n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      \n      dp[0] |= 1;\n     \n      MPN_COPY(np2, np, nn);\n      \n      modlimb_invert(dip, dp[0]);\n      \n      cy1 = mpn_sb_bdiv_qr(qp, np, nn, dp, dn, dip);\n      \n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n      } else\n         MPN_ZERO(rp, nn);\n      \n      cy2 = mpn_sub_n(rp, np2, rp, nn);\n      \n      if (mpn_cmp(rp + qn, np + qn, dn) != 0)\n      { \n         printf(\"failed: quotient wrong!\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu\\n\\n\", nn, dn, qn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, qn);\n         abort ();\n      }\n\n      if (cy1 != cy2)\n      {\n         printf(\"failed: carry wrong!\\n\");\n         printf(\"cy1 = %lx, cy2 = %lx\\n\", cy1, cy2);\n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_sb_bdiv_qr ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-sb_div_q.c",
    "content": "/* Test mpn_sb_div_q.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 40\n#define ITERS 10000\n   \n/* Check schoolboy division routine. */\nvoid\ncheck_sb_div_q (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS+1];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t dip, cy;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 2)) + 3;\n      nn = (random() % MAX_LIMBS) + dn;\n      \n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      dp[dn-1] |= GMP_LIMB_HIGHBIT;\n\n      MPN_COPY(np2, np, nn);\n      \n      mpir_invert_pi1(dip, dp[dn - 1], dp[dn - 2]);\n      \n      qn = nn - dn + 1;\n         \n      qp[qn - 1] = mpn_sb_div_q(qp, np, nn, dp, dn, dip);\n\n      MPN_NORMALIZE(qp, qn);\n\n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n      \n         rn = dn + qn;\n         MPN_NORMALIZE(rp, rn);\n         \n         if (rn > nn)\n         {\n            printf(\"failed: q*d has too many limbs\\n\");\n            abort();\n         }\n         \n         if (mpn_cmp(rp, np2, nn) > 0)\n         {\n            printf(\"failed: remainder negative\\n\");\n            abort();\n         }\n         \n         mpn_sub(rp, np2, nn, rp, rn);\n         rn = nn;\n         MPN_NORMALIZE(rp, rn);\n      } else\n      {\n         rn = nn;\n         MPN_COPY(rp, np, nn);\n      }\n      \n      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);\n      if (s >= 0)\n      {\n         printf (\"failed:\\n\");\n         printf (\"nn = %zd, dn = %zd, qn = %zd, rn = %zd\\n\\n\", nn, dn, qn, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_sb_div_q ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-sb_div_qr.c",
    "content": "/* Test mpn_sb_div_qr.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 40\n#define ITERS 10000\n   \n/* Check schoolboy division routine. */\nvoid\ncheck_sb_div_qr (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS+1];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t dip, cy;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 2)) + 3;\n      nn = (random() % MAX_LIMBS) + dn;\n      \n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      dp[dn-1] |= GMP_LIMB_HIGHBIT;\n\n      MPN_COPY(np2, np, nn);\n      \n      mpir_invert_pi1(dip, dp[dn - 1], dp[dn - 2]);\n      \n      qn = nn - dn + 1;\n         \n      qp[qn - 1] = mpn_sb_div_qr(qp, np, nn, dp, dn, dip);\n\n      MPN_NORMALIZE(qp, qn);\n\n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n      \n         rn = dn + qn;\n         MPN_NORMALIZE(rp, rn);\n         \n         if (rn > nn)\n         {\n            printf(\"failed: q*d has too many limbs\\n\");\n            abort();\n         }\n         \n         if (mpn_cmp(rp, np2, nn) > 0)\n         {\n            printf(\"failed: remainder negative\\n\");\n            abort();\n         }\n         \n         mpn_sub(rp, np2, nn, rp, rn);\n         rn = nn;\n         MPN_NORMALIZE(rp, rn);\n      } else\n      {\n         rn = nn;\n         MPN_COPY(rp, np, nn);\n      }\n      \n      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);\n      if (s >= 0)\n      {\n         printf (\"failed:\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu, rn = %lu\\n\\n\", nn, dn, qn, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n\n      if (mpn_cmp(rp, np, rn) != 0)\n      {\n         printf(\"failed: remainder does not match\\n\");\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);        \n         gmp_printf (\" rp2: %Nx\\n\\n\", np, rn);        \n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_sb_div_qr ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-sb_divappr_q.c",
    "content": "/* Test mpn_sb_divappr_q.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 40\n#define ITERS 10000\n   \n/* Check schoolboy division routine. */\nvoid\ncheck_sb_divappr_q (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t dip;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 2)) + 3;\n      nn = (random() % MAX_LIMBS) + dn;\n      \n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      dp[dn-1] |= GMP_LIMB_HIGHBIT;\n\n      MPN_COPY(np2, np, nn);\n      \n      mpir_invert_pi1(dip, dp[dn - 1], dp[dn - 2]);\n      \n      qn = nn - dn + 1;\n         \n      qp[qn - 1] = mpn_sb_divappr_q(qp, np, nn, dp, dn, dip);\n\n      MPN_NORMALIZE(qp, qn);\n\n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n\n         rn = dn + qn;\n         MPN_NORMALIZE(rp, rn);\n\n         s = (rn < nn) ? -1 : (rn > nn) ? 1 : mpn_cmp(rp, np2, nn);\n         if (s <= 0) \n         {\n            mpn_sub(rp, np2, nn, rp, rn);\n            rn = nn;\n            MPN_NORMALIZE(rp, rn);\n         } else \n         {\n            mpn_sub(rp, rp, rn, np2, nn);\n            MPN_NORMALIZE(rp, rn);\n         }\n      } else\n      {\n         rn = nn;\n         MPN_COPY(rp, np, nn);\n      }\n      \n      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);\n      if (s >= 0)\n      {\n         printf (\"failed:\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu, rn = %lu\\n\\n\", nn, dn, qn, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_sb_divappr_q ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-scan.c",
    "content": "/* Test mpn_scan0 and mpn_scan1.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#include \"tests.h\"\n\n\n#define SIZE  ((mp_size_t) 3)\nmp_limb_t  x[SIZE+1];\n\nvoid\ncheck (void)\n{\n  unsigned long  i, got, want;\n\n  x[SIZE] = 1;\n  for (i = 0; i < SIZE*GMP_NUMB_BITS; i++)\n    {\n      got = refmpn_scan1 (x, i);\n      want = mpn_scan1 (x, i);\n      if (got != want)\n        {\n          printf (\"mpn_scan1\\n\");\n          printf (\"  i     %lu\\n\", i);\n          printf (\"  got   %lu\\n\", got);\n          printf (\"  want  %lu\\n\", want);\n          mpn_trace (\"  x    \", x, SIZE);\n          abort ();\n        }\n    }\n\n  x[SIZE] = 0;\n  for (i = 0; i < SIZE*GMP_NUMB_BITS; i++)\n    {\n      got = refmpn_scan0 (x, i);\n      want = mpn_scan0 (x, i);\n      if (got != want)\n        {\n          printf (\"mpn_scan0\\n\");\n          printf (\"  i     %lu\\n\", i);\n          printf (\"  got   %lu\\n\", got);\n          printf (\"  want  %lu\\n\", want);\n          mpn_trace (\"  x    \", x, SIZE);\n          abort ();\n        }\n    }\n}\n\nvoid\ncheck_twobits (void)\n{\n#define TWOBITS(a, b) \\\n  ((CNST_LIMB(1) << (a)) | (CNST_LIMB(1) << (b)))\n\n  refmpn_zero (x, SIZE);\n  x[0] = TWOBITS (1, 0);\n  check ();\n\n  refmpn_zero (x, SIZE);\n  x[0] = TWOBITS (GMP_NUMB_BITS-1, 1);\n  check ();\n\n  refmpn_zero (x, SIZE);\n  x[0] = CNST_LIMB(1);\n  x[1] = CNST_LIMB(1);\n  check ();\n\n  refmpn_zero (x, SIZE);\n  x[0] = CNST_LIMB(1) << (GMP_NUMB_BITS-1);\n  x[1] = CNST_LIMB(1);\n  check ();\n\n  refmpn_zero (x, SIZE);\n  x[1] = TWOBITS (1, 0);\n  check ();\n\n  refmpn_zero (x, SIZE);\n  x[1] = CNST_LIMB(1);\n  x[2] = CNST_LIMB(1);\n  check ();\n}\n\n/* This is unused, it takes too long, especially on 64-bit systems. */\nvoid\ncheck_twobits_exhaustive (void)\n{\n  unsigned long  i, j;\n\n  for (i = 0; i < GMP_NUMB_BITS * SIZE; i++)\n    {\n      for (j = 0; j < GMP_NUMB_BITS * SIZE; j++)\n        {\n          refmpn_zero (x, SIZE);\n          refmpn_setbit (x, i);\n          refmpn_setbit (x, j);\n          check ();\n        }\n    }\n}\n\nvoid\ncheck_rand (void)\n{\n  int  i;\n\n  for (i = 0; i < 100; i++)\n    {\n      refmpn_random2 (x, SIZE);\n      check ();\n    }\n}\n\nint\nmain (void)\n{\n  mp_trace_base = -16;\n  tests_start ();\n\n  check_twobits ();\n  check_rand ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-sizeinbase.c",
    "content": "/* Test for sizeinbase function.\n\nCopyright 2014 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library test suite.\n\nThe GNU MP Library test suite is free software; you can redistribute it\nand/or modify it under the terms of the GNU General Public License as\npublished by the Free Software Foundation; either version 3 of the License,\nor (at your option) any later version.\n\nThe GNU MP Library test suite is distributed in the hope that it will be\nuseful, but WITHOUT ANY WARRANTY; without even the implied warranty of\nMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General\nPublic License for more details.\n\nYou should have received a copy of the GNU General Public License along with\nthe GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */\n\n\n#include <stdlib.h>\n#include <stdio.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* Exponents up to 2^SIZE_LOG */\n#ifndef SIZE_LOG\n#define SIZE_LOG 13\n#endif\n\n#ifndef COUNT\n#define COUNT 30\n#endif\n\n#define MAX_N (1<<SIZE_LOG)\n\nint\nmain (int argc, char **argv)\n{\n  mp_limb_t a;\n  mp_ptr pp, scratch;\n  mp_limb_t max_b;\n  int count = COUNT;\n  int test;\n  gmp_randstate_ptr rands;\n  TMP_DECL;\n\n  if (argc > 1)\n    {\n      char *end;\n      count = strtol (argv[1], &end, 0);\n      if (*end || count <= 0)\n\t{\n\t  fprintf (stderr, \"Invalid test count: %s.\\n\", argv[1]);\n\t  return 1;\n\t}\n    }\n\n  tests_start ();\n  TMP_MARK;\n  rands = RANDS;\n\n  pp = TMP_ALLOC_LIMBS (MAX_N);\n  scratch = TMP_ALLOC_LIMBS (MAX_N);\n  max_b = numberof (mp_bases);\n\n  ASSERT_ALWAYS (max_b > 62);\n  ASSERT_ALWAYS (max_b < GMP_NUMB_MAX);\n\n  for (a = 2; a < max_b; ++a)\n    for (test = 0; test < count; ++test)\n      {\n\tmp_size_t pn;\n\tmp_limb_t exp;\n\tmp_bitcnt_t res;\n\n\texp = gmp_urandomm_ui (rands, MAX_N);\n\n\tpn = mpn_pow_1 (pp, &a, 1, exp, scratch);\n\n\tres = mpn_sizeinbase (pp, pn, a) - 1;\n\n\tif ((res < exp) || (res > exp + 1))\n\t  {\n\t    printf (\"ERROR in test %d, base = %d, exp = %d, res = %d\\n\",\n\t\t    test, (int) a, (int) exp, (int) res);\n\t    abort();\n\t  }\n\n\tmpn_sub_1 (pp, pp, pn, CNST_LIMB(1));\n\tpn -= pp[pn-1] == 0;\n\n\tres = mpn_sizeinbase (pp, pn, a);\n\n\tif ((res < exp) || (res - 1 > exp))\n\t  {\n\t    printf (\"ERROR in -1 test %d, base = %d, exp = %d, res = %d\\n\",\n\t\t    test, (int) a, (int) exp, (int) res);\n\t    abort();\n\t  }\n      }\n\n  TMP_FREE;\n  tests_end ();\n  return 0;\n}\n"
  },
  {
    "path": "tests/mpn/t-subadd_n.c",
    "content": "/* Test mpn_subadd_n\n\ndnl  Copyright 2009 Jason Moxham\n\ndnl  This file is part of the MPIR Library.\n\ndnl  The MPIR Library is free software; you can redistribute it and/or modify\ndnl  it under the terms of the GNU Lesser General Public License as published\ndnl  by the Free Software Foundation; either version 2.1 of the License, or (at\ndnl  your option) any later version.\n\ndnl  The MPIR Library is distributed in the hope that it will be useful, but\ndnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\ndnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\ndnl  License for more details.\n\ndnl  You should have received a copy of the GNU Lesser General Public License\ndnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write\ndnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\ndnl  Boston, MA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain (void)\n{\n  unsigned long n;\n  gmp_randstate_t rands;\n  int j, k, i, i1;\n  mp_limb_t sp[10000], tp[10000], xp[10000], yp[10000], zp[10000];\n\n  tests_start ();\n  gmp_randinit_default(rands);\n  \n  for (i1 = 0; i1 < 2; i1++)\n    {\n      for (n = 1; n < 100; n++)\n\t{\n\t  for (j = 1; j < 5; j++)\n\t    {\n\t      if (i1 == 0)\n\t\t{\n\t\t  mpn_randomb (xp, rands, n);\n\t\t  mpn_randomb (yp, rands, n);\n\t\t  mpn_randomb (zp, rands, n);\n\t\t}\n\t      else\n\t\t{\n\t\t  mpn_rrandom (xp, rands, n);\n\t\t  mpn_rrandom (yp, rands, n);\n\t\t  mpn_rrandom (zp, rands, n);\n\t\t}\n\t      k = mpn_subadd_n (sp, xp, yp, zp, n);\n\t      i = mpn_sub_n (tp, xp, yp, n);\n\t      i += mpn_sub_n (tp, tp, zp, n);\n\t      if (k != i)\n\t\t{\n\t\t  printf (\"mpn_subadd_n ret wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (mpn_cmp (sp, tp, n) != 0)\n\t\t{\n\t\t  printf (\"mpn_subadd_n sum wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t    }\n\t}\n    }\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-tdiv_q.c",
    "content": "/* Test mpn_tdiv_q.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 200\n#define ITERS 1000\n   \n/* Check division routine. */\nvoid\ncheck_tdiv_q (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t qp2[2*MAX_LIMBS];\n   mp_limb_t dip[2];\n\n   mp_size_t nn, rn, dn, qn, qn2;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 1)) + 1;\n      nn = (random() % MAX_LIMBS) + dn;\n         \n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      \n      qn = nn - dn + 1;\n      qn2 = nn - dn + 1;\n      rn = dn;\n   \n      mpn_tdiv_q(qp, np, nn, dp, dn);\n      mpn_tdiv_qr(qp2, rp, 0, np, nn, dp, dn);\n      \n      MPN_NORMALIZE(qp, qn);\n      MPN_NORMALIZE(qp2, qn2);\n      MPN_NORMALIZE(rp, rn);\n\n      if ((qn != qn2) || (mpn_cmp(qp, qp2, qn) != 0))\n      {\n         printf (\"failed:\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu, qn2 = %lu, rn = %lu\\n\\n\", nn, dn, qn, qn2, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" qp2: %Nx\\n\\n\", qp2, qn2);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_tdiv_q ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpn/t-tdiv_qr.c",
    "content": "/* Test mpn_tdiv_qr.\n\nCopyright 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define random rand\n#endif\n\n#define MAX_LIMBS 400\n#define ITERS 10000\n   \n/* Check division with remainder routine. */\nvoid\ncheck_tdiv_qr (void)\n{\n   mp_limb_t np[2*MAX_LIMBS];\n   mp_limb_t np2[2*MAX_LIMBS];\n   mp_limb_t rp[2*MAX_LIMBS+1];\n   mp_limb_t dp[MAX_LIMBS];\n   mp_limb_t qp[2*MAX_LIMBS];\n   mp_limb_t cy;\n\n   mp_size_t nn, rn, dn, qn;\n\n   gmp_randstate_t rands;\n\n   int i, j, s;\n   gmp_randinit_default(rands);\n  \n   for (i = 0; i < ITERS; i++)\n   {\n      dn = (random() % (MAX_LIMBS - 5)) + 6;\n      nn = (random() % (MAX_LIMBS - 3)) + dn + 3;\n      \n      mpn_rrandom (np, rands, nn);\n      mpn_rrandom (dp, rands, dn);\n      dp[dn-1] |= GMP_LIMB_HIGHBIT;\n\n      MPN_COPY(np2, np, nn);\n      \n      qn = nn - dn + 1;\n         \n      mpn_tdiv_qr(qp, np, 0, np, nn, dp, dn);\n\n      MPN_NORMALIZE(qp, qn);\n\n      if (qn)\n      {\n         if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);\n         else mpn_mul(rp, dp, dn, qp, qn);\n      \n         rn = dn + qn;\n         MPN_NORMALIZE(rp, rn);\n         \n         if (rn > nn)\n         {\n            printf(\"failed: q*d has too many limbs\\n\");\n            abort();\n         }\n         \n         if (mpn_cmp(rp, np2, nn) > 0)\n         {\n            printf(\"failed: remainder negative\\n\");\n            abort();\n         }\n         \n         mpn_sub(rp, np2, nn, rp, rn);\n         rn = nn;\n         MPN_NORMALIZE(rp, rn);\n      } else\n      {\n         rn = nn;\n         MPN_COPY(rp, np, nn);\n      }\n      \n      s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);\n      if (s >= 0)\n      {\n         printf (\"failed:\\n\");\n         printf (\"nn = %lu, dn = %lu, qn = %lu, rn = %lu\\n\\n\", nn, dn, qn, rn);\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);\n         abort ();\n      }\n\n      if (mpn_cmp(rp, np, rn) != 0)\n      {\n         printf(\"failed: remainder does not match\\n\");\n         gmp_printf (\" np: %Nx\\n\\n\", np2, nn);\n         gmp_printf (\" dp: %Nx\\n\\n\", dp, dn);\n         gmp_printf (\" qp: %Nx\\n\\n\", qp, qn);\n         gmp_printf (\" rp: %Nx\\n\\n\", rp, rn);        \n         gmp_printf (\" rp2: %Nx\\n\\n\", np, rn);        \n      }\n   }\n\n   gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_tdiv_qr ();\n  \n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpq/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/tests\nLDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmpir.la\n\ncheck_PROGRAMS = t-aors t-cmp t-cmp_si t-cmp_ui t-equal t-get_d t-get_str t-inp_str t-md_2exp t-set_f t-set_str t-cmp_z\n\nTESTS = $(check_PROGRAMS)\n\n# Temporary files used by the tests.  Removed automatically if the tests\n# pass, but ensure they're cleaned if they fail.\n#\nCLEANFILES = *.tmp\n\n$(top_builddir)/tests/libtests.la:\n\tcd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la\n"
  },
  {
    "path": "tests/mpq/t-aors.c",
    "content": "/* Test mpq_add and mpq_sub.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_all (mpq_ptr x, mpq_ptr y, mpq_ptr want_add, mpq_ptr want_sub)\n{\n  mpq_t  got;\n  int    neg_x, neg_y, swap;\n\n  mpq_init (got);\n\n  MPQ_CHECK_FORMAT (want_add);\n  MPQ_CHECK_FORMAT (want_sub);\n  MPQ_CHECK_FORMAT (x);\n  MPQ_CHECK_FORMAT (y);\n\n  for (swap = 0; swap <= 1; swap++)\n    {\n      for (neg_x = 0; neg_x <= 1; neg_x++)\n        {\n          for (neg_y = 0; neg_y <= 1; neg_y++)\n            {\n              mpq_add (got, x, y);\n              MPQ_CHECK_FORMAT (got);\n              if (! mpq_equal (got, want_add))\n                {\n                  printf (\"mpq_add wrong\\n\");\n                  mpq_trace (\"  x   \", x);\n                  mpq_trace (\"  y   \", y);\n                  mpq_trace (\"  got \", got);\n                  mpq_trace (\"  want\", want_add);\n                  abort ();\n                }\n\n              mpq_sub (got, x, y);\n              MPQ_CHECK_FORMAT (got);\n              if (! mpq_equal (got, want_sub))\n                {\n                  printf (\"mpq_sub wrong\\n\");\n                  mpq_trace (\"  x   \", x);\n                  mpq_trace (\"  y   \", y);\n                  mpq_trace (\"  got \", got);\n                  mpq_trace (\"  want\", want_sub);\n                  abort ();\n                }\n\n\n              mpq_neg (y, y);\n              mpq_swap (want_add, want_sub);\n            }\n\n          mpq_neg (x, x);\n          mpq_swap (want_add, want_sub);\n          mpq_neg (want_add, want_add);\n          mpq_neg (want_sub, want_sub);\n        }\n\n      mpq_swap (x, y);\n      mpq_neg (want_sub, want_sub);\n    }\n\n  mpq_clear (got);\n}\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char  *x;\n    const char  *y;\n    const char  *want_add;\n    const char  *want_sub;\n\n  } data[] = {\n\n    { \"0\", \"0\", \"0\", \"0\" },\n    { \"1\", \"0\", \"1\", \"1\" },\n    { \"1\", \"1\", \"2\", \"0\" },\n\n    { \"1/2\", \"1/2\", \"1\", \"0\" },\n    { \"5/6\", \"14/15\", \"53/30\", \"-1/10\" },\n  };\n\n  mpq_t  x, y, want_add, want_sub;\n  int i;\n\n  mpq_init (x);\n  mpq_init (y);\n  mpq_init (want_add);\n  mpq_init (want_sub);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpq_set_str_or_abort (x, data[i].x, 0);\n      mpq_set_str_or_abort (y, data[i].y, 0);\n      mpq_set_str_or_abort (want_add, data[i].want_add, 0);\n      mpq_set_str_or_abort (want_sub, data[i].want_sub, 0);\n\n      check_all (x, y, want_add, want_sub);\n    }\n\n  mpq_clear (x);\n  mpq_clear (y);\n  mpq_clear (want_add);\n  mpq_clear (want_sub);\n}\n\n\nvoid\ncheck_rand (void)\n{\n  mpq_t  x, y, want_add, want_sub;\n  int i;\n  gmp_randstate_t  rands;\n\n  mpq_init (x);\n  mpq_init (y);\n  mpq_init (want_add);\n  mpq_init (want_sub);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < 500; i++)\n    {\n      mpz_errandomb (mpq_numref(x), rands, 512L);\n      mpz_errandomb_nonzero (mpq_denref(x), rands, 512L);\n      mpq_canonicalize (x);\n\n      mpz_errandomb (mpq_numref(y), rands, 512L);\n      mpz_errandomb_nonzero (mpq_denref(y), rands, 512L);\n      mpq_canonicalize (y);\n\n      refmpq_add (want_add, x, y);\n      refmpq_sub (want_sub, x, y);\n\n      check_all (x, y, want_add, want_sub);\n    }\n\n  mpq_clear (x);\n  mpq_clear (y);\n  mpq_clear (want_add);\n  mpq_clear (want_sub);\n  gmp_randclear(rands);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n  check_rand ();\n\n  tests_end ();\n\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpq/t-cmp.c",
    "content": "/* Test mpq_cmp.\n\nCopyright 1996, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define NUM(x) (&((x)->_mp_num))\n#define DEN(x) (&((x)->_mp_den))\n\n#define SGN(x) ((x) < 0 ? -1 : (x) > 0 ? 1 : 0)\n\nvoid\nmpz_intrandom2 (mpz_ptr x, gmp_randstate_t rands, mp_size_t size)\n{\n  mp_size_t abs_size;\n\n  abs_size = ABS (size);\n  if (abs_size != 0)\n    {\n      if (x->_mp_alloc < abs_size)\n\t_mpz_realloc (x, abs_size);\n\n      mpn_rrandom (x->_mp_d, rands, abs_size);\n    }\n\n  x->_mp_size = size;\n}\n\n\nint\nref_mpq_cmp (mpq_t a, mpq_t b)\n{\n  mpz_t ai, bi;\n  int cc;\n\n  mpz_init (ai);\n  mpz_init (bi);\n\n  mpz_mul (ai, NUM (a), DEN (b));\n  mpz_mul (bi, NUM (b), DEN (a));\n  cc = mpz_cmp (ai, bi);\n  mpz_clear (ai);\n  mpz_clear (bi);\n  return cc;\n}\n\n#ifndef SIZE\n#define SIZE 8\t/* increasing this lowers the probabilty of finding an error */\n#endif\n\nint\nmain (int argc, char **argv)\n{\n  mpq_t a, b;\n  mp_size_t size;\n  int reps = 10000;\n  int i;\n  int cc, ccref;\n  gmp_randstate_t rands;\n  \n  tests_start ();\n  gmp_randinit_default(rands);\n  \n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpq_init (a);\n  mpq_init (b);\n\n  for (i = 0; i < reps; i++)\n    {\n      size = urandom (rands) % SIZE - SIZE/2;\n      mpz_intrandom2 (NUM (a), rands, size);\n      do\n\t{\n\t  size = urandom (rands) % SIZE - SIZE/2;\n\t  mpz_intrandom2 (DEN (a), rands, size);\n\t}\n      while (mpz_cmp_ui (DEN (a), 0) == 0);\n\n      size = urandom (rands) % SIZE - SIZE/2;\n      mpz_intrandom2 (NUM (b), rands, size);\n      do\n\t{\n\t  size = urandom (rands) % SIZE - SIZE/2;\n\t  mpz_intrandom2 (DEN (b), rands, size);\n\t}\n      while (mpz_cmp_ui (DEN (b), 0) == 0);\n\n      mpq_canonicalize (a);\n      mpq_canonicalize (b);\n\n      ccref = ref_mpq_cmp (a, b);\n      cc = mpq_cmp (a, b);\n\n      if (SGN (ccref) != SGN (cc))\n\tabort ();\n    }\n\n  mpq_clear (a);\n  mpq_clear (b);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpq/t-cmp_si.c",
    "content": "/* Test mpq_cmp_si.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <limits.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define printf gmp_printf\n\n#define SGN(x)   ((x)<0 ? -1 : (x) != 0)\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char     *q;\n    mpir_si         n;\n    mpir_ui         d;\n    int            want;\n  } data[] = {\n    { \"0\", 0, 1, 0 },\n    { \"0\", 0, 123, 0 },\n    { \"0\", 0, GMP_UI_MAX, 0 },\n    { \"1\", 0, 1, 1 },\n    { \"1\", 0, 123, 1 },\n    { \"1\", 0, GMP_UI_MAX, 1 },\n    { \"-1\", 0, 1, -1 },\n    { \"-1\", 0, 123, -1 },\n    { \"-1\", 0, GMP_UI_MAX, -1 },\n\n    { \"123\", 123, 1, 0 },\n    { \"124\", 123, 1, 1 },\n    { \"122\", 123, 1, -1 },\n\n    { \"-123\", 123, 1, -1 },\n    { \"-124\", 123, 1, -1 },\n    { \"-122\", 123, 1, -1 },\n\n    { \"123\", -123, 1, 1 },\n    { \"124\", -123, 1, 1 },\n    { \"122\", -123, 1, 1 },\n\n    { \"-123\", -123, 1, 0 },\n    { \"-124\", -123, 1, -1 },\n    { \"-122\", -123, 1, 1 },\n\n    { \"5/7\", 3,4, -1 },\n    { \"5/7\", -3,4, 1 },\n    { \"-5/7\", 3,4, -1 },\n    { \"-5/7\", -3,4, 1 },\n  };\n\n  mpq_t  q;\n  int    i, got;\n\n  mpq_init (q);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpq_set_str_or_abort (q, data[i].q, 0);\n      MPQ_CHECK_FORMAT (q);\n\n      got = mpq_cmp_si (q, data[i].n, data[i].d);\n      if (SGN(got) != data[i].want)\n        {\n          printf (\"mpq_cmp_si wrong\\n\");\n        error:\n          mpq_trace (\"  q\", q);\n          printf (\"  n=%Md\\n\", data[i].n);\n          printf (\"  d=%Mu\\n\", data[i].d);\n          printf (\"  got=%d\\n\", got);\n          printf (\"  want=%Md\\n\", data[i].want);\n          abort ();\n        }\n\n      if (data[i].n == 0)\n        {\n          got = mpq_cmp_si (q, 0L, data[i].d);\n          if (SGN(got) != data[i].want)\n            {\n              printf (\"mpq_cmp_si wrong\\n\");\n              goto error;\n            }\n        }\n    }\n\n  mpq_clear (q);\n}\n\nint\nmain (int argc, char **argv)\n{\n  tests_start ();\n\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpq/t-cmp_ui.c",
    "content": "/* Test mpq_cmp_ui.\n\nCopyright 1996, 1997, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define NUM(x) (&((x)->_mp_num))\n#define DEN(x) (&((x)->_mp_den))\n\n#define SGN(x) ((x) < 0 ? -1 : (x) > 0 ? 1 : 0)\n\nvoid\nmpz_intrandom2 (mpz_ptr x, gmp_randstate_t rands, mp_size_t size)\n{\n  mp_size_t abs_size;\n\n  abs_size = ABS (size);\n  if (abs_size != 0)\n    {\n      if (x->_mp_alloc < abs_size)\n\t_mpz_realloc (x, abs_size);\n\n      mpn_rrandom (x->_mp_d, rands ,abs_size);\n    }\n\n  x->_mp_size = size;\n}\n\n\nint\nref_mpq_cmp_ui (mpq_t a, mpir_ui bn, mpir_ui bd)\n{\n  mpz_t ai, bi;\n  int cc;\n\n  mpz_init (ai);\n  mpz_init (bi);\n\n  mpz_mul_ui (ai, NUM (a), bd);\n  mpz_mul_ui (bi, DEN (a), bn);\n  cc = mpz_cmp (ai, bi);\n  mpz_clear (ai);\n  mpz_clear (bi);\n  return cc;\n}\n\n#ifndef SIZE\n#define SIZE 8\t/* increasing this lowers the probabilty of finding an error */\n#endif\n\nint\nmain (int argc, char **argv)\n{\n  mpq_t a, b;\n  mp_size_t size;\n  int reps = 10000;\n  int i;\n  int cc, ccref;\n  mpir_ui bn, bd;\n  gmp_randstate_t rands;\n  \n  tests_start ();\n  gmp_randinit_default(rands);\n  \n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpq_init (a);\n  mpq_init (b);\n\n  for (i = 0; i < reps; i++)\n    {\n      size = urandom (rands) % SIZE - SIZE/2;\n      mpz_intrandom2 (NUM (a), rands, size);\n      do\n\t{\n\t  size = urandom (rands) % SIZE - SIZE/2;\n\t  mpz_intrandom2 (DEN (a), rands, size);\n\t}\n      while (mpz_cmp_ui (DEN (a), 0) == 0);\n\n      mpz_intrandom2 (NUM (b), rands, (mp_size_t) 1);\n      mpz_mod_ui (NUM (b), NUM (b), ~(mpir_ui) 0);\n      mpz_add_ui (NUM (b), NUM (b), 1);\n\n      mpz_intrandom2 (DEN (b), rands ,(mp_size_t) 1);\n      mpz_mod_ui (DEN (b), DEN (b), ~(mpir_ui) 0);\n      mpz_add_ui (DEN (b), DEN (b), 1);\n\n      mpq_canonicalize (a);\n      mpq_canonicalize (b);\n\n      bn = mpz_get_ui (NUM (b));\n      bd = mpz_get_ui (DEN (b));\n\n      ccref = ref_mpq_cmp_ui (a, bn, bd);\n      cc = mpq_cmp_ui (a, bn, bd);\n\n      if (SGN (ccref) != SGN (cc))\n\tabort ();\n    }\n\n  mpq_clear (a);\n  mpq_clear (b);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpq/t-cmp_z.c",
    "content": "/* Test mpq_cmp_z.\n\nCopyright 1996, 2001, 2015 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library test suite.\n\nThe GNU MP Library test suite is free software; you can redistribute it\nand/or modify it under the terms of the GNU General Public License as\npublished by the Free Software Foundation; either version 3 of the License,\nor (at your option) any later version.\n\nThe GNU MP Library test suite is distributed in the hope that it will be\nuseful, but WITHOUT ANY WARRANTY; without even the implied warranty of\nMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General\nPublic License for more details.\n\nYou should have received a copy of the GNU General Public License along with\nthe GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define SGN(x) ((x) < 0 ? -1 : (x) > 0 ? 1 : 0)\n\nvoid\nmpz_intrandom2 (mpz_ptr x, gmp_randstate_t rands, mp_size_t size)\n{\n  mp_size_t abs_size;\n\n  abs_size = ABS (size);\n  if (abs_size != 0)\n    {\n      if (x->_mp_alloc < abs_size)\n\t_mpz_realloc (x, abs_size);\n\n      mpn_rrandom (x->_mp_d, rands ,abs_size);\n    }\n\n  x->_mp_size = size;\n}\n\n\nint\nref_mpq_cmp_z (mpq_t a, mpz_t b)\n{\n  mpz_t bi;\n  int cc;\n\n  mpz_init (bi);\n\n  mpz_mul (bi, b, DEN (a));\n  cc = mpz_cmp (NUM (a), bi);\n  mpz_clear (bi);\n  return cc;\n}\n\n#ifndef SIZE\n#define SIZE 8\t/* increasing this lowers the probability of finding an error */\n#endif\n\n#ifndef MAXN\n#define MAXN 5\t/* increasing this impatcs on total timing */\n#endif\n\nvoid\nsizes_test (int m)\n{\n  mpq_t a;\n  mpz_t b;\n  int i, j, k, s;\n  int cc, ccref;\n\n  mpq_init (a);\n  mpz_init (b);\n\n  for (i = 0; i <= MAXN ; ++i)\n    {\n      mpz_setbit (DEN (a), i*m); /* \\sum_0^i 2^(i*m) */\n      for (j = 0; j <= MAXN; ++j)\n\t{\n\t  mpz_set_ui (NUM (a), 0);\n\t  mpz_setbit (NUM (a), j*m); /* 2^(j*m) */\n\t  for (k = 0; k <= MAXN; ++k)\n\t    {\n\t      mpz_set_ui (b, 0);\n\t      mpz_setbit (b, k*m); /* 2^(k*m) */\n\t      if (i == 0) /* Denominator is 1, compare the two exponents */\n\t\tccref = (j>k)-(j<k);\n\t      else\n\t\tccref = j-i > k ? 1 : -1;\n\t      for (s = 1; s >= -1; s -= 2)\n\t\t{\n\t\t  cc = mpq_cmp_z (a, b);\n\n\t\t  if (ccref != SGN (cc))\n\t\t    {\n\t\t      fprintf (stderr, \"i=%i, j=%i, k=%i, m=%i, s=%i\\n; ccref= %i, cc= %i\\n\", i, j, k, m, s, ccref, cc);\n\t\t      abort ();\n\t\t    }\n\n\t\t  mpq_neg (a, a);\n\t\t  mpz_neg (b, b);\n\t\t  ccref = - ccref;\n\t\t}\n\t    }\n\t}\n    }\n\n  mpq_clear (a);\n  mpz_clear (b);\n}\n\nint\nmain (int argc, char **argv)\n{\n  mpq_t a;\n  mpz_t b;\n  mp_size_t size;\n  int reps = 10000;\n  int i;\n  int cc, ccref;\n  gmp_randstate_t rands;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpq_init (a);\n  mpz_init (b);\n\n  for (i = 0; i < reps; i++)\n    {\n      if (i % 8192 == 0)\n\tsizes_test (urandom (rands) % (i + 1) + 1);\t  \n      size = urandom (rands) % SIZE - SIZE/2;\n      mpz_intrandom2 (NUM (a), rands, size);\n      do\n\t{\n\t  size = urandom (rands) % (SIZE/2);\n\t  mpz_intrandom2 (DEN (a), rands, size);\n\t}\n      while (mpz_cmp_ui (DEN (a), 0) == 0);\n\n      size = urandom (rands) % SIZE - SIZE/2;\n      mpz_intrandom2 (b, rands, size);\n\n      mpq_canonicalize (a);\n\n      ccref = ref_mpq_cmp_z (a, b);\n      cc = mpq_cmp_z (a, b);\n\n      if (SGN (ccref) != SGN (cc))\n\tabort ();\n    }\n\n  mpq_clear (a);\n  mpz_clear (b);\n  gmp_randclear(rands);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpq/t-equal.c",
    "content": "/* Test mpq_equal.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (mpq_srcptr x, mpq_srcptr y, int want)\n{\n  int  got;\n\n  MPQ_CHECK_FORMAT (x);\n  MPQ_CHECK_FORMAT (y);\n\n  got = mpq_equal (x, y);\n  if ((got != 0) != (want != 0))\n    {\n      printf (\"mpq_equal got %d want %d\\n\", got, want);\n      mpq_trace (\"x\", x);\n      mpq_trace (\"y\", y);\n      abort ();\n    }\n}\n\n\nvoid\ncheck_all (mpq_ptr x, mpq_ptr y, int want)\n{\n  check_one (x, y, want);\n  check_one (y, x, want);\n\n  mpq_neg (x, x);\n  mpq_neg (y, y);\n\n  check_one (x, y, want);\n  check_one (y, x, want);\n}\n\n\n#define SET4Z(z, size,l3,l2,l1,l0) \\\n  SIZ(z) = size; PTR(z)[3] = l3; PTR(z)[2] = l2; PTR(z)[1] = l1; PTR(z)[0] = l0\n\n#define SET4(q, nsize,n3,n2,n1,n0, dsize,d3,d2,d1,d0)   \\\n  SET4Z (mpq_numref(q), nsize,n3,n2,n1,n0);             \\\n  SET4Z (mpq_denref(q), dsize,d3,d2,d1,d0)\n\n\n/* Exercise various combinations of same and slightly different values. */\n\nvoid\ncheck_various (void)\n{\n  mpq_t  x, y;\n\n  mpq_init (x);\n  mpq_init (y);\n\n  mpz_realloc (mpq_numref(x), (mp_size_t) 20);\n  mpz_realloc (mpq_denref(x), (mp_size_t) 20);\n  mpz_realloc (mpq_numref(y), (mp_size_t) 20);\n  mpz_realloc (mpq_denref(y), (mp_size_t) 20);\n\n  /* 0 == 0 */\n  SET4 (x, 0,13,12,11,10, 1,23,22,21,1);\n  SET4 (y, 0,33,32,31,30, 1,43,42,41,1);\n  check_all (x, y, 1);\n\n  /* 83/99 == 83/99 */\n  SET4 (x, 1,13,12,11,83, 1,23,22,21,99);\n  SET4 (y, 1,33,32,31,83, 1,43,42,41,99);\n  check_all (x, y, 1);\n\n  /* 1:2:3:4/5:6:7 == 1:2:3:4/5:6:7 */\n  SET4 (x, 4,1,2,3,4, 3,88,5,6,7);\n  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);\n  check_all (x, y, 1);\n\n  /* various individual changes making != */\n  SET4 (x, 4,1,2,3,667, 3,88,5,6,7);\n  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);\n  check_all (x, y, 0);\n  SET4 (x, 4,1,2,666,4, 3,88,5,6,7);\n  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);\n  check_all (x, y, 0);\n  SET4 (x, 4,1,666,3,4, 3,88,5,6,7);\n  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);\n  check_all (x, y, 0);\n#if GMP_NUMB_BITS != 62\n  SET4 (x, 4,667,2,3,4, 3,88,5,6,7);\n  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);\n  check_all (x, y, 0);\n#endif\n  SET4 (x, 4,1,2,3,4, 3,88,5,6,667);\n  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);\n  check_all (x, y, 0);\n  SET4 (x, 4,1,2,3,4, 3,88,5,667,7);\n  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);\n  check_all (x, y, 0);\n  SET4 (x, 4,1,2,3,4, 3,88,666,6,7);\n  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);\n  check_all (x, y, 0);\n  SET4 (x, -4,1,2,3,4, 3,88,5,6,7);\n  SET4 (y,  4,1,2,3,4, 3,99,5,6,7);\n  check_all (x, y, 0);\n  SET4 (x, 1,1,2,3,4, 3,88,5,6,7);\n  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);\n  check_all (x, y, 0);\n\n  mpq_clear (x);\n  mpq_clear (y);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n  mp_trace_base = -16;\n\n  check_various ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpq/t-get_d.c",
    "content": "/* Test mpq_get_d and mpq_set_d\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2003 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#ifndef SIZE\n#define SIZE 8\n#endif\n\n#define EPSIZE SIZE\n\n\nvoid dump(mpq_t);\n\nvoid\nmpz_intrandom2 (mpz_ptr x, gmp_randstate_t rands, mp_size_t size)\n{\n  mp_size_t abs_size;\n\n  abs_size = ABS (size);\n  if (abs_size != 0)\n    {\n      if (x->_mp_alloc < abs_size)\n\t_mpz_realloc (x, abs_size);\n\n      mpn_rrandom (x->_mp_d, rands, abs_size);\n    }\n\n  x->_mp_size = size;\n}\n\n\nvoid\ncheck_monotonic (int argc, gmp_randstate_t rands, char **argv)\n{\n  mpq_t a;\n  mp_size_t size;\n  int reps = 100;\n  int i, j;\n  double last_d, new_d;\n  mpq_t qlast_d, qnew_d;\n  mpq_t eps;\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  /* The idea here is to test the monotonousness of mpq_get_d by adding\n     numbers to the numerator and denominator.  */\n\n  mpq_init (a);\n  mpq_init (eps);\n  mpq_init (qlast_d);\n  mpq_init (qnew_d);\n\n  for (i = 0; i < reps; i++)\n    {\n      size = urandom (rands) % SIZE - SIZE/2;\n      mpz_intrandom2 (mpq_numref (a), rands, size);\n      do\n\t{\n\t  size = urandom (rands) % SIZE - SIZE/2;\n\t  mpz_intrandom2 (mpq_denref (a), rands, size);\n\t}\n      while (mpz_cmp_ui (mpq_denref (a), 0) == 0);\n\n      mpq_canonicalize (a);\n\n      last_d = mpq_get_d (a);\n      mpq_set_d (qlast_d, last_d);\n      for (j = 0; j < 10; j++)\n\t{\n\t  size = urandom (rands) % EPSIZE + 1;\n\t  mpz_intrandom2 (mpq_numref (eps), rands, size);\n\t  size = urandom (rands) % EPSIZE + 1;\n\t  mpz_intrandom2 (mpq_denref (eps), rands, size);\n\t  mpq_canonicalize (eps);\n\n\t  mpq_add (a, a, eps);\n\t  mpq_canonicalize (a);\n\t  new_d = mpq_get_d (a);\n\t  if (last_d > new_d)\n\t    {\n\t      printf (\"\\nERROR (test %d/%d): bad mpq_get_d results\\n\", i, j);\n\t      printf (\"last: %.16g\\n\", last_d);\n\t      printf (\" new: %.16g\\n\", new_d); dump (a);\n\t      abort ();\n\t    }\n\t  mpq_set_d (qnew_d, new_d);\n\t  MPQ_CHECK_FORMAT (qnew_d);\n\t  if (mpq_cmp (qlast_d, qnew_d) > 0)\n\t    {\n\t      printf (\"ERROR (test %d/%d): bad mpq_set_d results\\n\", i, j);\n\t      printf (\"last: %.16g\\n\", last_d); dump (qlast_d);\n\t      printf (\" new: %.16g\\n\", new_d); dump (qnew_d);\n\t      abort ();\n\t    }\n\t  last_d = new_d;\n\t  mpq_set (qlast_d, qnew_d);\n\t}\n    }\n\n  mpq_clear (a);\n  mpq_clear (eps);\n  mpq_clear (qlast_d);\n  mpq_clear (qnew_d);\n}\n\ndouble\nmy_ldexp (double d, int e)\n{\n  for (;;)\n    {\n      if (e > 0)\n\t{\n\t  if (e >= 16)\n\t    {\n\t      d *= 65536.0;\n\t      e -= 16;\n\t    }\n\t  else\n\t    {\n\t      d *= 2.0;\n\t      e -= 1;\n\t    }\n\t}\n      else if (e < 0)\n\t{\n   \n\t  if (e <= -16)\n\t    {\n\t      d /= 65536.0;\n\t      e += 16;\n\t    }\n\t  else\n\t    {\n\t      d /= 2.0;\n\t      e += 1;\n\t    }\n\t}\n      else\n\treturn d;\n    }\n}\n\nvoid\ncheck_random (int argc, gmp_randstate_t rands, char **argv)\n{\n  double d, d2, nd, dd;\n  mpq_t q;\n  mp_limb_t rp[LIMBS_PER_DOUBLE + 1];\n  int test, reps = 100000;\n  int i;\n\n  if (argc == 2)\n     reps = 100 * atoi (argv[1]);\n\n  mpq_init (q);\n\n  for (test = 0; test < reps; test++)\n    {\n      mpn_rrandom (rp, rands, LIMBS_PER_DOUBLE + 1);\n      d = 0.0;\n      for (i = LIMBS_PER_DOUBLE - 1; i >= 0; i--)\n\td = d * MP_BASE_AS_DOUBLE + rp[i];\n      d = my_ldexp (d, (int) (rp[LIMBS_PER_DOUBLE] % 1000) - 500);\n      mpq_set_d (q, d);\n      nd = mpz_get_d (mpq_numref (q));\n      dd = mpz_get_d (mpq_denref (q));\n      d2 = nd / dd;\n      if (d != d2)\n\t{\n\t  printf (\"ERROR (check_random test %d): bad mpq_set_d results\\n\", test);\n\t  printf (\"%.16g\\n\", d);\n\t  printf (\"%.16g\\n\", d2);\n\t  abort ();\n\t}\n    }\n  mpq_clear (q);\n}\n\nvoid\ndump (mpq_t x)\n{\n  mpz_out_str (stdout, 10, mpq_numref (x));\n  printf (\"/\");\n  mpz_out_str (stdout, 10, mpq_denref (x));\n  printf (\"\\n\");\n}\n\n/* Check various values 2^n and 1/2^n. */\nvoid\ncheck_onebit (void)\n{\n  static const long data[] = {\n    -3*GMP_NUMB_BITS-1, -3*GMP_NUMB_BITS, -3*GMP_NUMB_BITS+1,\n    -2*GMP_NUMB_BITS-1, -2*GMP_NUMB_BITS, -2*GMP_NUMB_BITS+1,\n    -GMP_NUMB_BITS-1, -GMP_NUMB_BITS, -GMP_NUMB_BITS+1,\n    -5, -2, -1, 0, 1, 2, 5,\n    GMP_NUMB_BITS-1, GMP_NUMB_BITS, GMP_NUMB_BITS+1,\n    2*GMP_NUMB_BITS-1, 2*GMP_NUMB_BITS, 2*GMP_NUMB_BITS+1,\n    3*GMP_NUMB_BITS-1, 3*GMP_NUMB_BITS, 3*GMP_NUMB_BITS+1,\n  };\n\n  int     i, neg;\n  long    exp, l;\n  mpq_t   q;\n  double  got, want;\n\n  mpq_init (q);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      exp = data[i];\n\n      mpq_set_ui (q, 1L, 1L);\n      if (exp >= 0)\n        mpq_mul_2exp (q, q, exp);\n      else\n        mpq_div_2exp (q, q, -exp);\n\n      want = 1.0;\n      for (l = 0; l < exp; l++)\n        want *= 2.0;\n      for (l = 0; l > exp; l--)\n        want /= 2.0;\n\n      for (neg = 0; neg <= 1; neg++)\n        {\n          if (neg)\n            {\n              mpq_neg (q, q);\n              want = -want;\n            }\n\n          got = mpq_get_d (q);\n\n          if (got != want)\n            {\n              printf    (\"mpq_get_d wrong on %s2**%ld\\n\", neg ? \"-\" : \"\", exp);\n              mpq_trace (\"   q    \", q);\n              d_trace   (\"   want \", want);\n              d_trace   (\"   got  \", got);\n              abort();\n            }\n        }\n    }\n  mpq_clear (q);\n}\n\nint\nmain (int argc, char **argv)\n{gmp_randstate_t rands;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n  check_onebit ();\n  check_monotonic (argc,rands,  argv);\n  check_random (argc,rands,  argv);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpq/t-get_str.c",
    "content": "/* Test mpq_get_str.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (mpq_srcptr q, int base, const char *want)\n{\n  char    *str, *ret;\n  size_t  str_alloc;\n  \n  MPQ_CHECK_FORMAT (q);\n  mp_trace_base = base;\n\n  str_alloc =\n    mpz_sizeinbase (mpq_numref(q), ABS(base)) +\n    mpz_sizeinbase (mpq_denref(q), ABS(base)) + 3;\n  \n  str = mpq_get_str (NULL, base, q);\n  if (strlen(str)+1 > str_alloc)\n    {\n      printf (\"mpq_get_str size bigger than should be (passing NULL)\\n\");\n      printf (\"  base %d\\n\", base);\n      printf (\"  got  size %lu \\\"%s\\\"\\n\", strlen(str)+1, str);\n      printf (\"  want size %lu\\n\", str_alloc);\n      abort ();\n    }\n  if (strcmp (str, want) != 0)\n    {\n      printf (\"mpq_get_str wrong (passing NULL)\\n\");\n      printf (\"  base %d\\n\", base);\n      printf (\"  got  \\\"%s\\\"\\n\", str);\n      printf (\"  want \\\"%s\\\"\\n\", want);\n      mpq_trace (\"  q\", q);\n      abort ();\n    }\n  (*__gmp_free_func) (str, strlen (str) + 1);\n  \n  str = (char *) (*__gmp_allocate_func) (str_alloc);\n  \n  ret = mpq_get_str (str, base, q);\n  if (str != ret)\n    {\n      printf (\"mpq_get_str wrong return value (passing non-NULL)\\n\");\n      printf (\"  base %d\\n\", base);\n      printf (\"  got  %p\\n\", ret);\n      printf (\"  want %p\\n\", want);\n      abort ();\n    }\n  if (strcmp (str, want) != 0)\n    {\n      printf (\"mpq_get_str wrong (passing non-NULL)\\n\");\n      printf (\"  base %d\\n\", base);\n      printf (\"  got  \\\"%s\\\"\\n\", str);\n      printf (\"  want \\\"%s\\\"\\n\", want);\n      abort ();\n    }\n  (*__gmp_free_func) (str, str_alloc);\n}\n\n\nvoid\ncheck_all (mpq_srcptr q, int base, const char *want)\n{\n  char  *s;\n\n  check_one (q, base, want);\n\n  s = __gmp_allocate_strdup (want);\n  strtoupper (s);\n  check_one (q, -base, s);\n  (*__gmp_free_func) (s, strlen(s)+1);\n}\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    int         base;\n    const char  *num;\n    const char  *den;\n    const char  *want;\n  } data[] = {\n    { 10, \"0\", \"1\", \"0\" },\n    { 10, \"1\", \"1\", \"1\" },\n\n    { 16, \"ffffffff\", \"1\", \"ffffffff\" },\n    { 16, \"ffffffffffffffff\", \"1\", \"ffffffffffffffff\" },\n\n    { 16, \"1\", \"ffffffff\", \"1/ffffffff\" },\n    { 16, \"1\", \"ffffffffffffffff\", \"1/ffffffffffffffff\" },\n    { 16, \"1\", \"10000000000000003\", \"1/10000000000000003\" },\n\n    { 10, \"12345678901234567890\", \"9876543210987654323\",\n      \"12345678901234567890/9876543210987654323\" },\n  };\n\n  mpq_t  q;\n  int    i;\n\n  mpq_init (q);\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (mpq_numref(q), data[i].num, data[i].base);\n      mpz_set_str_or_abort (mpq_denref(q), data[i].den, data[i].base);\n      check_all (q, data[i].base, data[i].want);\n    }\n  mpq_clear (q);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpq/t-inp_str.c",
    "content": "/* Test mpq_inp_str.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#if HAVE_UNISTD_H\n#include <unistd.h>   /* for unlink */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n#define FILENAME  \"t-inp_str.tmp\"\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char  *inp;\n    int         base;\n    const char  *want;\n    int         want_nread;\n\n  } data[] = {\n\n    { \"0\",   10, \"0\", 1 },\n    { \"0/1\", 10, \"0\", 3 },\n\n    { \"0/\",   10, \"0\", 0 },\n    { \"/123\", 10, \"0\", 0 },\n    { \"blah\", 10, \"0\", 0 },\n    { \"123/blah\", 10, \"0\", 0 },\n    { \"5 /8\", 10, \"5\", 1 },\n    { \"5/ 8\", 10, \"0\", 0 },\n\n    {  \"ff\", 16,  \"255\", 2 },\n    { \"-ff\", 16, \"-255\", 3 },\n    {  \"FF\", 16,  \"255\", 2 },\n    { \"-FF\", 16, \"-255\", 3 },\n\n    { \"z\", 36, \"35\", 1 },\n    { \"Z\", 36, \"35\", 1 },\n\n    {  \"0x0\",    0,   \"0\", 3 },\n    {  \"0x10\",   0,  \"16\", 4 },\n    { \"-0x0\",    0,   \"0\", 4 },\n    { \"-0x10\",   0, \"-16\", 5 },\n    { \"-0x10/5\", 0, \"-16/5\", 7 },\n\n    {  \"00\",   0,  \"0\", 2 },\n    {  \"010\",  0,  \"8\", 3 },\n    { \"-00\",   0,  \"0\", 3 },\n    { \"-010\",  0, \"-8\", 4 },\n  };\n\n  mpq_t  got, want;\n  long   ftell_nread;\n  int    i, post, j, got_nread;\n  FILE   *fp;\n\n  mpq_init (got);\n  mpq_init (want);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      for (post = 0; post <= 2; post++)\n        {\n          mpq_set_str_or_abort (want, data[i].want, 0);\n          MPQ_CHECK_FORMAT (want);\n\n          fp = fopen (FILENAME, \"w+\");\n          ASSERT_ALWAYS (fp != NULL);\n          fputs (data[i].inp, fp);\n          for (j = 0; j < post; j++)\n            putc (' ', fp);\n          fflush (fp);\n          ASSERT_ALWAYS (! ferror(fp));\n\n          rewind (fp);\n          got_nread = mpq_inp_str (got, fp, data[i].base);\n\n          if (got_nread != 0)\n            {\n              ftell_nread = ftell (fp);\n              if (got_nread != ftell_nread)\n                {\n                  printf (\"mpq_inp_str nread wrong\\n\");\n                  printf (\"  inp          \\\"%s\\\"\\n\", data[i].inp);\n                  printf (\"  base         %d\\n\", data[i].base);\n                  printf (\"  got_nread    %d\\n\", got_nread);\n                  printf (\"  ftell_nread  %ld\\n\", ftell_nread);\n                  abort ();\n                }\n            }\n\n          if (post == 0 && data[i].want_nread == strlen(data[i].inp))\n            {\n              int  c = getc(fp);\n              if (c != EOF)\n                {\n                  printf (\"mpq_inp_str didn't read to EOF\\n\");\n                  printf (\"  inp         \\\"%s\\\"\\n\", data[i].inp);\n                  printf (\"  base        %d\\n\", data[i].base);\n                  printf (\"  c '%c' %#x\\n\", c, c);\n                  abort ();\n                }\n            }\n\n          if (got_nread != data[i].want_nread)\n            {\n              printf (\"mpq_inp_str nread wrong\\n\");\n              printf (\"  inp         \\\"%s\\\"\\n\", data[i].inp);\n              printf (\"  base        %d\\n\", data[i].base);\n              printf (\"  got_nread   %d\\n\", got_nread);\n              printf (\"  want_nread  %d\\n\", data[i].want_nread);\n              abort ();\n            }\n\n          MPQ_CHECK_FORMAT (got);\n      \n          if (! mpq_equal (got, want))\n            {\n              printf (\"mpq_inp_str wrong result\\n\");\n              printf (\"  inp   \\\"%s\\\"\\n\", data[i].inp);\n              printf (\"  base  %d\\n\", data[i].base);\n              mpq_trace (\"  got \",  got);\n              mpq_trace (\"  want\", want);\n              abort ();\n            }\n\n          ASSERT_ALWAYS (fclose (fp) == 0);\n        }\n    }\n\n  mpq_clear (got);\n  mpq_clear (want);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n\n  unlink (FILENAME);\n  tests_end ();\n\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpq/t-md_2exp.c",
    "content": "/* Test mpq_mul_2exp and mpq_div_2exp.\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nstruct pair_t {\n  const char     *num;\n  const char     *den;\n};\n\nint\nmain (void)\n{\n  static const struct {\n    struct pair_t  left;\n    unsigned long  n;\n    struct pair_t  right;\n\n  } data[] = {\n    { {\"0\",\"1\"}, 0, {\"0\",\"1\"} },\n    { {\"0\",\"1\"}, 1, {\"0\",\"1\"} },\n    { {\"0\",\"1\"}, 2, {\"0\",\"1\"} },\n\n    { {\"1\",\"1\"}, 0, {\"1\",\"1\"} },\n    { {\"1\",\"1\"}, 1, {\"2\",\"1\"} },\n    { {\"1\",\"1\"}, 2, {\"4\",\"1\"} },\n    { {\"1\",\"1\"}, 3, {\"8\",\"1\"} },\n\n    { {\"1\",\"1\"}, 31, {\"0x80000000\",\"1\"} },\n    { {\"1\",\"1\"}, 32, {\"0x100000000\",\"1\"} },\n    { {\"1\",\"1\"}, 33, {\"0x200000000\",\"1\"} },\n    { {\"1\",\"1\"}, 63, {\"0x8000000000000000\",\"1\"} },\n    { {\"1\",\"1\"}, 64, {\"0x10000000000000000\",\"1\"} },\n    { {\"1\",\"1\"}, 65, {\"0x20000000000000000\",\"1\"} },\n    { {\"1\",\"1\"}, 95, {\"0x800000000000000000000000\",\"1\"} },\n    { {\"1\",\"1\"}, 96, {\"0x1000000000000000000000000\",\"1\"} },\n    { {\"1\",\"1\"}, 97, {\"0x2000000000000000000000000\",\"1\"} },\n    { {\"1\",\"1\"}, 127, {\"0x80000000000000000000000000000000\",\"1\"} },\n    { {\"1\",\"1\"}, 128, {\"0x100000000000000000000000000000000\",\"1\"} },\n    { {\"1\",\"1\"}, 129, {\"0x200000000000000000000000000000000\",\"1\"} },\n\n    { {\"1\",\"2\"}, 31, {\"0x40000000\",\"1\"} },\n    { {\"1\",\"2\"}, 32, {\"0x80000000\",\"1\"} },\n    { {\"1\",\"2\"}, 33, {\"0x100000000\",\"1\"} },\n    { {\"1\",\"2\"}, 63, {\"0x4000000000000000\",\"1\"} },\n    { {\"1\",\"2\"}, 64, {\"0x8000000000000000\",\"1\"} },\n    { {\"1\",\"2\"}, 65, {\"0x10000000000000000\",\"1\"} },\n    { {\"1\",\"2\"}, 95, {\"0x400000000000000000000000\",\"1\"} },\n    { {\"1\",\"2\"}, 96, {\"0x800000000000000000000000\",\"1\"} },\n    { {\"1\",\"2\"}, 97, {\"0x1000000000000000000000000\",\"1\"} },\n    { {\"1\",\"2\"}, 127, {\"0x40000000000000000000000000000000\",\"1\"} },\n    { {\"1\",\"2\"}, 128, {\"0x80000000000000000000000000000000\",\"1\"} },\n    { {\"1\",\"2\"}, 129, {\"0x100000000000000000000000000000000\",\"1\"} },\n\n    { {\"1\",\"0x80000000\"}, 30, {\"1\",\"2\"} },\n    { {\"1\",\"0x80000000\"}, 31, {\"1\",\"1\"} },\n    { {\"1\",\"0x80000000\"}, 32, {\"2\",\"1\"} },\n    { {\"1\",\"0x80000000\"}, 33, {\"4\",\"1\"} },\n    { {\"1\",\"0x80000000\"}, 62, {\"0x80000000\",\"1\"} },\n    { {\"1\",\"0x80000000\"}, 63, {\"0x100000000\",\"1\"} },\n    { {\"1\",\"0x80000000\"}, 64, {\"0x200000000\",\"1\"} },\n    { {\"1\",\"0x80000000\"}, 94, {\"0x8000000000000000\",\"1\"} },\n    { {\"1\",\"0x80000000\"}, 95, {\"0x10000000000000000\",\"1\"} },\n    { {\"1\",\"0x80000000\"}, 96, {\"0x20000000000000000\",\"1\"} },\n    { {\"1\",\"0x80000000\"}, 126, {\"0x800000000000000000000000\",\"1\"} },\n    { {\"1\",\"0x80000000\"}, 127, {\"0x1000000000000000000000000\",\"1\"} },\n    { {\"1\",\"0x80000000\"}, 128, {\"0x2000000000000000000000000\",\"1\"} },\n\n    { {\"1\",\"0x100000000\"}, 1, {\"1\",\"0x80000000\"} },\n    { {\"1\",\"0x100000000\"}, 2, {\"1\",\"0x40000000\"} },\n    { {\"1\",\"0x100000000\"}, 3, {\"1\",\"0x20000000\"} },\n\n    { {\"1\",\"0x10000000000000000\"}, 1, {\"1\",\"0x8000000000000000\"} },\n    { {\"1\",\"0x10000000000000000\"}, 2, {\"1\",\"0x4000000000000000\"} },\n    { {\"1\",\"0x10000000000000000\"}, 3, {\"1\",\"0x2000000000000000\"} },\n  };\n\n  void (*fun)(mpq_ptr, mpq_srcptr, mpir_ui);\n  const struct pair_t  *p_start, *p_want;\n  const char  *name;\n  mpq_t    sep, got, want;\n  mpq_ptr  q;\n  int      i, muldiv, sign, overlap;\n\n  tests_start ();\n\n  mpq_init (sep);\n  mpq_init (got);\n  mpq_init (want);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      for (muldiv = 0; muldiv < 2; muldiv++)\n        {\n          if (muldiv == 0)\n            {\n              fun = mpq_mul_2exp;\n              name = \"mpq_mul_2exp\";\n              p_start = &data[i].left;\n              p_want = &data[i].right;\n            }\n          else\n            {\n              fun = mpq_div_2exp;\n              name = \"mpq_div_2exp\";\n              p_start = &data[i].right;\n              p_want = &data[i].left;\n            }\n\n          for (sign = 0; sign <= 1; sign++)\n            {\n              mpz_set_str_or_abort (mpq_numref(want), p_want->num, 0);\n              mpz_set_str_or_abort (mpq_denref(want), p_want->den, 0);\n              if (sign)\n                mpq_neg (want, want);\n\n              for (overlap = 0; overlap <= 1; overlap++)\n                {\n                  q = overlap ? got : sep;\n\n                  /* initial garbage in \"got\" */\n                  mpq_set_ui (got, 123L, 456L);\n\n                  mpz_set_str_or_abort (mpq_numref(q), p_start->num, 0);\n                  mpz_set_str_or_abort (mpq_denref(q), p_start->den, 0);\n                  if (sign)\n                    mpq_neg (q, q);\n\n                  (*fun) (got, q, data[i].n);\n                  MPQ_CHECK_FORMAT (got);\n\n                  if (! mpq_equal (got, want))\n                    {\n                      printf (\"%s wrong at data[%d], sign %d, overlap %d\\n\",\n                              name, i, sign, overlap);\n                      printf (\"   num \\\"%s\\\"\\n\", p_start->num);\n                      printf (\"   den \\\"%s\\\"\\n\", p_start->den);\n                      printf (\"   n   %lu\\n\", data[i].n);\n\n                      printf (\"   got  \");\n                      mpq_out_str (stdout, 16, got);\n                      printf (\" (hex)\\n\");\n\n                      printf (\"   want \");\n                      mpq_out_str (stdout, 16, want);\n                      printf (\" (hex)\\n\");\n\n                      abort ();\n                    }\n                }\n            }\n        }\n    }\n\n  mpq_clear (sep);\n  mpq_clear (got);\n  mpq_clear (want);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpq/t-set_f.c",
    "content": "/* Test mpq_set_f.\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain (int argc, char **argv)\n{\n#if GMP_NAIL_BITS == 0\n  static const struct {\n    int         f_base;\n    const char  *f;\n    int         z_base;\n    const char  *want_num;\n    const char  *want_den;\n\n  } data[] = {\n\n    { -2, \"0\",    16, \"0\", \"1\" },\n    { -2, \"1\",    16, \"1\", \"1\" },\n    { -2, \"1@1\",  16, \"2\", \"1\" },\n    { -2, \"1@2\",  16, \"4\", \"1\" },\n    { -2, \"1@3\",  16, \"8\", \"1\" },\n\n    { -2, \"1@30\", 16,  \"40000000\", \"1\" },\n    { -2, \"1@31\", 16,  \"80000000\", \"1\" },\n    { -2, \"1@32\", 16, \"100000000\", \"1\" },\n    { -2, \"1@33\", 16, \"200000000\", \"1\" },\n    { -2, \"1@34\", 16, \"400000000\", \"1\" },\n\n    { -2, \"1@62\", 16,  \"4000000000000000\", \"1\" },\n    { -2, \"1@63\", 16,  \"8000000000000000\", \"1\" },\n    { -2, \"1@64\", 16, \"10000000000000000\", \"1\" },\n    { -2, \"1@65\", 16, \"20000000000000000\", \"1\" },\n    { -2, \"1@66\", 16, \"40000000000000000\", \"1\" },\n\n    { -2, \"1@126\", 16,  \"40000000000000000000000000000000\", \"1\" },\n    { -2, \"1@127\", 16,  \"80000000000000000000000000000000\", \"1\" },\n    { -2, \"1@128\", 16, \"100000000000000000000000000000000\", \"1\" },\n    { -2, \"1@129\", 16, \"200000000000000000000000000000000\", \"1\" },\n    { -2, \"1@130\", 16, \"400000000000000000000000000000000\", \"1\" },\n\n    { -2, \"1@-1\",  16, \"1\", \"2\" },\n    { -2, \"1@-2\",  16, \"1\", \"4\" },\n    { -2, \"1@-3\",  16, \"1\", \"8\" },\n\n    { -2, \"1@-30\", 16, \"1\",  \"40000000\" },\n    { -2, \"1@-31\", 16, \"1\",  \"80000000\" },\n    { -2, \"1@-32\", 16, \"1\", \"100000000\" },\n    { -2, \"1@-33\", 16, \"1\", \"200000000\" },\n    { -2, \"1@-34\", 16, \"1\", \"400000000\" },\n\n    { -2, \"1@-62\", 16, \"1\",  \"4000000000000000\" },\n    { -2, \"1@-63\", 16, \"1\",  \"8000000000000000\" },\n    { -2, \"1@-64\", 16, \"1\", \"10000000000000000\" },\n    { -2, \"1@-65\", 16, \"1\", \"20000000000000000\" },\n    { -2, \"1@-66\", 16, \"1\", \"40000000000000000\" },\n\n    { -2, \"1@-126\", 16, \"1\",  \"40000000000000000000000000000000\" },\n    { -2, \"1@-127\", 16, \"1\",  \"80000000000000000000000000000000\" },\n    { -2, \"1@-128\", 16, \"1\", \"100000000000000000000000000000000\" },\n    { -2, \"1@-129\", 16, \"1\", \"200000000000000000000000000000000\" },\n    { -2, \"1@-130\", 16, \"1\", \"400000000000000000000000000000000\" },\n\n    { -2, \"1@-30\", 16, \"1\",  \"40000000\" },\n    { -2, \"1@-31\", 16, \"1\",  \"80000000\" },\n    { -2, \"1@-32\", 16, \"1\", \"100000000\" },\n    { -2, \"1@-33\", 16, \"1\", \"200000000\" },\n    { -2, \"1@-34\", 16, \"1\", \"400000000\" },\n\n    { -2, \"11@-62\", 16, \"3\",  \"4000000000000000\" },\n    { -2, \"11@-63\", 16, \"3\",  \"8000000000000000\" },\n    { -2, \"11@-64\", 16, \"3\", \"10000000000000000\" },\n    { -2, \"11@-65\", 16, \"3\", \"20000000000000000\" },\n    { -2, \"11@-66\", 16, \"3\", \"40000000000000000\" },\n\n    { 16, \"80000000.00000001\", 16, \"8000000000000001\", \"100000000\" },\n    { 16, \"80000000.00000008\", 16, \"1000000000000001\",  \"20000000\" },\n    { 16, \"80000000.8\",        16, \"100000001\", \"2\" },\n\n  };\n\n  mpf_t  f;\n  mpq_t  got;\n  mpz_t  want_num, want_den;\n  int    i, neg;\n\n  tests_start ();\n\n  mpf_init2 (f, 1024L);\n  mpq_init (got);\n  mpz_init (want_num);\n  mpz_init (want_den);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      for (neg = 0; neg <= 1; neg++)\n        {\n          mpf_set_str_or_abort (f, data[i].f, data[i].f_base);\n          mpz_set_str_or_abort (want_num, data[i].want_num, data[i].z_base);\n          mpz_set_str_or_abort (want_den, data[i].want_den, data[i].z_base);\n\n          if (neg)\n            {\n              mpf_neg (f, f);\n              mpz_neg (want_num, want_num);\n            }\n\n          mpq_set_f (got, f);\n          MPQ_CHECK_FORMAT (got);\n\n          if (mpz_cmp (mpq_numref(got), want_num) != 0\n              || mpz_cmp (mpq_denref(got), want_den) != 0)\n            {\n              printf (\"wrong at data[%d]\\n\", i);\n              printf (\"   f_base %d, z_base %d\\n\",\n                      data[i].f_base, data[i].z_base);\n\n              printf (\"   f \\\"%s\\\" hex \", data[i].f);\n              mpf_out_str (stdout, 16, 0, f);\n              printf (\"\\n\");\n\n              printf (\"   want num 0x\");\n              mpz_out_str (stdout, 16, want_num);\n              printf (\"\\n\");\n              printf (\"   want den 0x\");\n              mpz_out_str (stdout, 16, want_den);\n              printf (\"\\n\");\n\n              printf (\"   got num 0x\");\n              mpz_out_str (stdout, 16, mpq_numref(got));\n              printf (\"\\n\");\n              printf (\"   got den 0x\");\n              mpz_out_str (stdout, 16, mpq_denref(got));\n              printf (\"\\n\");\n\n              abort ();\n            }\n        }\n    }\n\n  mpf_clear (f);\n  mpq_clear (got);\n  mpz_clear (want_num);\n  mpz_clear (want_den);\n\n  tests_end ();\n#endif\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpq/t-set_str.c",
    "content": "/* Test mpq_set_str.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (mpq_srcptr want, int base, const char *str)\n{\n  mpq_t   got;\n  \n  MPQ_CHECK_FORMAT (want);\n  mp_trace_base = base;\n\n  mpq_init (got);\n\n  if (mpq_set_str (got, str, base) != 0)\n    {\n      printf (\"mpq_set_str unexpectedly failed\\n\");\n      printf (\"  base %d\\n\", base);\n      printf (\"  str  \\\"%s\\\"\\n\", str);\n      abort ();\n    }\n  MPQ_CHECK_FORMAT (got);\n\n  if (! mpq_equal (got, want))\n    {\n      printf (\"mpq_set_str wrong\\n\");\n      printf (\"  base %d\\n\", base);\n      printf (\"  str  \\\"%s\\\"\\n\", str);\n      mpq_trace (\"got \", got);\n      mpq_trace (\"want\", want);\n      abort ();\n    }\n\n  mpq_clear (got);\n}\n\nvoid\ncheck_samples (void)\n{\n  mpq_t  q;\n\n  mpq_init (q);\n\n  mpq_set_ui (q, 0L, 1L);\n  check_one (q, 10, \"0\");\n  check_one (q, 10, \"0/1\");\n  check_one (q, 10, \"0  / 1\");\n  check_one (q, 0, \"0x0/ 1\");\n  check_one (q, 0, \"0x0/ 0x1\");\n  check_one (q, 0, \"0 / 0x1\");\n\n  check_one (q, 10, \"-0\");\n  check_one (q, 10, \"-0/1\");\n  check_one (q, 10, \"-0  / 1\");\n  check_one (q, 0, \"-0x0/ 1\");\n  check_one (q, 0, \"-0x0/ 0x1\");\n  check_one (q, 0, \"-0 / 0x1\");\n\n  mpq_set_ui (q, 255L, 256L);\n  check_one (q, 10, \"255/256\");\n  check_one (q, 0,  \"0xFF/0x100\");\n  check_one (q, 16, \"FF/100\");\n\n  mpq_neg (q, q);\n  check_one (q, 10, \"-255/256\");\n  check_one (q, 0,  \"-0xFF/0x100\");\n  check_one (q, 16, \"-FF/100\");\n\n  mpq_clear (q);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_samples ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2003 Free Software\n# Foundation, Inc.\n#\n# Copyright 2008 Jason Moxham\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/tests\nLDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmpir.la\n\ncheck_PROGRAMS = bit convert dive dive_ui io logic reuse t-addsub t-aorsmul t-bin t-cdiv_ui t-cmp t-cmp_d t-cmp_si t-cong t-cong_2exp t-div_2exp t-divis t-divis_2exp t-export t-fac_ui t-fdiv t-fdiv_ui t-fib_ui t-fits t-gcd t-gcd_ui t-get_d t-get_d_2exp t-get_si t-get_sx t-get_ux t-hamdist t-import t-inp_str t-io_raw t-jac t-lcm t-limbs t-likely_prime_p t-lucnum_ui t-mfac_uiui t-mul t-mul_i t-next_prime_candidate t-oddeven t-perfpow t-perfsqr t-popcount t-pow t-powm t-powm_ui t-pprime_p t-primorial_ui t-root t-scan t-set_d t-set_f t-set_si t-set_str t-set_sx t-set_ux t-sizeinbase t-sqrtrem t-tdiv t-tdiv_ui t-trial_division \n\nif ENABLE_STATIC\nif ENABLE_SHARED\ncheck_PROGRAMS += st_hamdist st_popcount\nst_hamdist_SOURCES = t-hamdist.c\nst_hamdist_LDFLAGS = -static\nst_popcount_SOURCES = t-popcount.c\nst_popcount_LDFLAGS = -static\nendif\nendif\n\nTESTS = $(check_PROGRAMS)\n\n# Temporary files used by the tests.  Removed automatically if the tests\n# pass, but ensure they're cleaned if they fail.\n#\nCLEANFILES = *.tmp\n\n$(top_builddir)/tests/libtests.la:\n\tcd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la\n"
  },
  {
    "path": "tests/mpz/bit.c",
    "content": "/* Test mpz_setbit, mpz_clrbit, mpz_tstbit.\n\nCopyright 1997, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#ifndef SIZE\n#define SIZE 4\n#endif\n\nvoid\nmpz_intrandom2 (mpz_ptr x, gmp_randstate_t rands, mp_size_t size)\n{\n  mp_size_t abs_size;\n\n  abs_size = ABS (size);\n  if (abs_size != 0)\n    {\n      if (x->_mp_alloc < abs_size)\n\t_mpz_realloc (x, abs_size);\n\n      mpn_rrandom (x->_mp_d, rands, abs_size);\n    }\n\n  x->_mp_size = size;\n}\n                                          \nvoid\ndebug_mp (mpz_srcptr x, int base)\n{\n  mpz_out_str (stdout, base, x); fputc ('\\n', stdout);\n}\n\n\n/* exercise the case where mpz_clrbit or mpz_combit ends up extending a\n   value like -2^(k*GMP_NUMB_BITS-1) when clearing bit k*GMP_NUMB_BITS-1.  */\nvoid\ncheck_clr_extend (void)\n{\n  mpz_t          got, want;\n  unsigned long  i;\n  int            f;\n\n  mpz_init (got);\n  mpz_init (want);\n\n  for (i = 1; i < 5; i++)\n    {\n      for (f = 0; f <= 1; f++)\n        {\n          /* lots of 1 bits in _mp_d */\n          mpz_set_ui (got, 1L);\n          mpz_mul_2exp (got, got, 10*GMP_NUMB_BITS);\n          mpz_sub_ui (got, got, 1L);\n\n          /* value -2^(n-1) representing ..11100..00 */\n          mpz_set_si (got, -1L);\n          mpz_mul_2exp (got, got, i*GMP_NUMB_BITS-1);\n\n          /* complement bit n, giving ..11000..00 which is -2^n */\n          if (f == 0)\n            mpz_clrbit (got, i*GMP_NUMB_BITS-1);\n          else\n            mpz_combit (got, i*GMP_NUMB_BITS-1);\n          MPZ_CHECK_FORMAT (got);\n\n          mpz_set_si (want, -1L);\n          mpz_mul_2exp (want, want, i*GMP_NUMB_BITS);\n\n          if (mpz_cmp (got, want) != 0)\n            {\n              if (f == 0)\n                printf (\"mpz_clrbit: \");\n              else\n                printf (\"mpz_combit: \");\n              printf (\"wrong after extension\\n\");\n              mpz_trace (\"got \", got);\n              mpz_trace (\"want\", want);\n              abort ();\n            }\n        }\n    }\n\n  mpz_clear (got);\n  mpz_clear (want);\n}\n\nvoid\ncheck_com_negs (void)\n{\n  static const struct {\n    unsigned long  bit;\n    mp_size_t      inp_size;\n    mp_limb_t      inp_n[5];\n    mp_size_t      want_size;\n    mp_limb_t      want_n[5];\n  } data[] = {\n    { GMP_NUMB_BITS,   2, { 1, 1 },  1, { 1 } },\n    { GMP_NUMB_BITS+1, 2, { 1, 1 },  2, { 1, 3 } },\n\n    { GMP_NUMB_BITS,   2, { 0, 1 },  2, { 0, 2 } },\n    { GMP_NUMB_BITS+1, 2, { 0, 1 },  2, { 0, 3 } },\n  };\n  mpz_t  inp, got, want;\n  int    i;\n\n  mpz_init (got);\n  mpz_init (want);\n  mpz_init (inp);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_n (inp, data[i].inp_n, data[i].inp_size);\n      mpz_neg (inp, inp);\n\n      mpz_set_n (want, data[i].want_n, data[i].want_size);\n      mpz_neg (want, want);\n\n      mpz_set (got, inp);\n      mpz_combit (got, data[i].bit);\n\n      if (mpz_cmp (got, want) != 0)\n        {\n          printf (\"mpz_combit: wrong on neg data[%d]\\n\", i);\n          mpz_trace (\"inp \", inp);\n          printf    (\"bit %lu\\n\", data[i].bit);\n          mpz_trace (\"got \", got);\n          mpz_trace (\"want\", want);\n          abort ();\n        }\n    }\n\n  mpz_clear (inp);\n  mpz_clear (got);\n  mpz_clear (want);\n}\n\n/* See that mpz_tstbit matches a twos complement calculated explicitly, for\n   various low zeros.  */\nvoid\ncheck_tstbit (gmp_randstate_t rands)\n{\n#define MAX_ZEROS  3\n#define NUM_LIMBS  3\n\n  mp_limb_t      pos[1+NUM_LIMBS+MAX_ZEROS];\n  mp_limb_t      neg[1+NUM_LIMBS+MAX_ZEROS];\n  mpz_t          z;\n  unsigned long  i;\n  int            zeros, low1;\n  int            got, want;\n\n  mpz_init (z);\n  for (zeros = 0; zeros <= MAX_ZEROS; zeros++)\n    {\n      MPN_ZERO (pos, numberof(pos));\n      mpn_rrandom (pos+zeros, rands, (mp_size_t) NUM_LIMBS);\n\n      for (low1 = 0; low1 <= 1; low1++)\n        {\n          if (low1)\n            pos[0] |= 1;\n\n          refmpn_neg_n (neg, pos, (mp_size_t) numberof(neg));\n          mpz_set_n (z, neg, (mp_size_t) numberof(neg));\n          mpz_neg (z, z);\n\n          for (i = 0; i < numberof(pos)*GMP_NUMB_BITS; i++)\n            {\n              got = mpz_tstbit (z, i);\n              want = refmpn_tstbit (pos, i);\n              if (got != want)\n                {\n                  printf (\"wrong at bit %lu, with %d zeros\\n\", i, zeros);\n                  printf (\"z neg \"); debug_mp (z, -16);\n                  mpz_set_n (z, pos, (mp_size_t) numberof(pos));\n                  printf (\"pos   \"); debug_mp (z, -16);\n                  mpz_set_n (z, neg, (mp_size_t) numberof(neg));\n                  printf (\"neg   \"); debug_mp (z, -16);\n                  exit (1);\n                }\n            }\n        }\n    }\n  mpz_clear (z);\n}\n\n\nvoid\ncheck_single (void)\n{\n  mpz_t  x;\n  int    limb, offset, initial;\n  unsigned long  bit;\n\n  mpz_init (x);\n\n  for (limb = 0; limb < 4; limb++)\n    {\n      for (offset = (limb==0 ? 0 : -2); offset <= 2; offset++)\n        {\n          for (initial = 0; initial >= -1; initial--)\n            {\n              mpz_set_si (x, (long) initial);\n\n              bit = (unsigned long) limb*BITS_PER_MP_LIMB + offset;\n\n              mpz_clrbit (x, bit);\n              MPZ_CHECK_FORMAT (x);\n              if (mpz_tstbit (x, bit) != 0)\n                {\n                  printf (\"check_single(): expected 0\\n\");\n                  abort ();\n                }\n          \n              mpz_setbit (x, bit);\n              MPZ_CHECK_FORMAT (x);\n              if (mpz_tstbit (x, bit) != 1)\n                {\n                  printf (\"check_single(): expected 1\\n\");\n                  abort ();\n                }\n          \n              mpz_clrbit (x, bit);\n              MPZ_CHECK_FORMAT (x);\n              if (mpz_tstbit (x, bit) != 0)\n                {\n                  printf (\"check_single(): expected 0\\n\");\n                  abort ();\n                }\n\n              mpz_combit (x, bit);\n              MPZ_CHECK_FORMAT (x);\n              if (mpz_tstbit (x, bit) != 1)\n                {\n                  printf (\"check_single(): expected 1\\n\");\n                  abort ();\n                }\n\n              mpz_combit (x, bit);\n              MPZ_CHECK_FORMAT (x);\n              if (mpz_tstbit (x, bit) != 0)\n                {\n                  printf (\"check_single(): expected 0\\n\");\n                  abort ();\n                }\n            }\n        }\n    }          \n\n  mpz_clear (x);\n}\n\n\nvoid\ncheck_random (int argc, gmp_randstate_t rands, char *argv[])\n{\n  mpz_t x, s0, s1, s2, s3, m;\n  mp_size_t xsize;\n  int i;\n  int reps = 100000;\n  int bit0, bit1, bit2, bit3;\n  unsigned long int bitindex;\n  const char  *s = \"\";\n\n  if (argc == 2)\n    reps = atoi (argv[1]);\n\n  mpz_init (x);\n  mpz_init (s0);\n  mpz_init (s1);\n  mpz_init (s2);\n  mpz_init (s3);\n  mpz_init (m);\n\n  for (i = 0; i < reps; i++)\n    {\n      xsize = urandom (rands) % (2 * SIZE) - SIZE;\n      mpz_intrandom2 (x, rands,xsize);\n      bitindex = urandom (rands) % SIZE;\n\n      mpz_set (s0, x);\n      bit0 = mpz_tstbit (x, bitindex);\n      mpz_setbit (x, bitindex);\n      MPZ_CHECK_FORMAT (x);\n\n      mpz_set (s1, x);\n      bit1 = mpz_tstbit (x, bitindex);\n      mpz_clrbit (x, bitindex);\n      MPZ_CHECK_FORMAT (x);\n\n      mpz_set (s2, x);\n      bit2 = mpz_tstbit (x, bitindex);\n      mpz_setbit (x, bitindex);\n      MPZ_CHECK_FORMAT (x);\n\n      mpz_set (s3, x);\n      bit3 = mpz_tstbit (x, bitindex);\n\n#define FAIL(str) do { s = str; goto fail; } while (0)\n\n      if (bit1 != 1)  FAIL (\"bit1 != 1\");\n      if (bit2 != 0)  FAIL (\"bit2 != 0\");\n      if (bit3 != 1)  FAIL (\"bit3 != 1\");\n\n      if (bit0 == 0)\n\t{\n\t  if (mpz_cmp (s0, s1) == 0 || mpz_cmp (s0, s2) != 0 || mpz_cmp (s0, s3) == 0)\n\t    abort ();\n\t}\n      else\n\t{\n\t  if (mpz_cmp (s0, s1) != 0 || mpz_cmp (s0, s2) == 0 || mpz_cmp (s0, s3) != 0)\n\t    abort ();\n\t}\n\n      if (mpz_cmp (s1, s2) == 0 || mpz_cmp (s1, s3) != 0)\n\tabort ();\n      if (mpz_cmp (s2, s3) == 0)\n\tabort ();\n\n      mpz_ui_pow_ui (m, 2L, bitindex);\n      MPZ_CHECK_FORMAT (m);\n      mpz_ior (x, s2, m);\n      MPZ_CHECK_FORMAT (x);\n      if (mpz_cmp (x, s3) != 0)\n\tabort ();\n\n      mpz_com (m, m);\n      MPZ_CHECK_FORMAT (m);\n      mpz_and (x, s1, m);\n      MPZ_CHECK_FORMAT (x);\n      if (mpz_cmp (x, s2) != 0)\n\tabort ();\n    }\n\n  mpz_clear (x);\n  mpz_clear (s0);\n  mpz_clear (s1);\n  mpz_clear (s2);\n  mpz_clear (s3);\n  mpz_clear (m);\n  return;\n\n\n fail:\n  printf (\"%s\\n\", s);\n  printf (\"bitindex = %lu\\n\", bitindex);\n  printf (\"x = \"); mpz_out_str (stdout, -16, x); printf (\" hex\\n\");\n  exit (1);\n}\n\n\n\nint\nmain (int argc, char *argv[])\n{gmp_randstate_t rands;\n  tests_start ();\ngmp_randinit_default(rands);\n  mp_trace_base = -16;\n\n  check_clr_extend ();\n  check_com_negs ();\n  check_tstbit (rands);\n  check_random (argc, rands,argv);\n  check_single ();\ngmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\n\n"
  },
  {
    "path": "tests/mpz/convert.c",
    "content": "/* Test conversion using mpz_get_str and mpz_set_str.\n\nCopyright 1993, 1994, 1996, 1999, 2000, 2001, 2002 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h> /* for strlen */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid debug_mp(mpz_t, int);\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t op1, op2;\n  mp_size_t size;\n  int i;\n  int reps = 10000;\n  char *str, *buf;\n  int base;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  unsigned long bsi, size_range;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpz_init (op1);\n  mpz_init (op2);\n\n  for (i = 0; i < reps; i++)\n    {\n      /* 1. Generate random mpz_t and convert to a string and back to mpz_t\n\t again.  */\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 12 + 2;\t/* 2..13 */\n      mpz_urandomb (bs, rands, size_range);\t/* 3..8191 bits */\n      size = mpz_get_ui (bs);\n      mpz_rrandomb (op1, rands, size);\n\n      mpz_urandomb (bs, rands, 1);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (op1, op1);\n\n      mpz_urandomb (bs, rands, 32);\n      bsi = mpz_get_ui (bs);\n      base = bsi % 62 + 1;\n      if (base == 1)\n\tbase = 0;\n\n      str = mpz_get_str ((char *) 0, base, op1);\n      mpz_set_str_or_abort (op2, str, base);\n\n      if (mpz_cmp (op1, op2))\n\t{\n\t  fprintf (stderr, \"ERROR, op1 and op2 different in test %d\\n\", i);\n\t  fprintf (stderr, \"str  = %s\\n\", str);\n\t  fprintf (stderr, \"base = %d\\n\", base);\n\t  fprintf (stderr, \"op1  = \"); debug_mp (op1, -16);\n\t  fprintf (stderr, \"op2  = \"); debug_mp (op2, -16);\n\t  abort ();\n\t}\n\n      (*__gmp_free_func) (str, strlen (str) + 1);\n\n#if 0\n      /* 2. Generate random string and convert to mpz_t and back to a string\n\t again.  */\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 10 + 2;\t/* 2..11 */\n      mpz_urandomb (bs, rands, size_range);\t/* 3..2047 bits */\n      len = mpz_get_ui (bs);\n      buf = (*__gmp_allocate_func) (len + 1);\n      string_urandomb (buf, len, base);\n      mpz_set_str_or_abort (op1, buf, base);\n      str = mpz_get_str ((char *) 0, base, op1);\n\n      if (strcmp (str, buf) != 0)\n\t{\n\t  fprintf (stderr, \"ERROR, str and buf different\\n\");\n\t  fprintf (stderr, \"str  = %s\\n\", str);\n\t  fprintf (stderr, \"buf  = %s\\n\", buf);\n\t  fprintf (stderr, \"base = %d\\n\", base);\n\t  fprintf (stderr, \"op1  = \"); debug_mp (op1, -16);\n\t  abort ();\n\t}\n\n      (*__gmp_free_func) (buf, len + 1);\n      (*__gmp_free_func) (str, strlen (str) + 1);\n#endif\n    }\n\n  mpz_clear (bs);\n  mpz_clear (op1);\n  mpz_clear (op2);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x); fputc ('\\n', stderr);\n}\n"
  },
  {
    "path": "tests/mpz/dive.c",
    "content": "/* Test mpz_mul, mpz_divexact.\n\nCopyright 1996, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t op1, op2;\n  mpz_t prod, quot;\n  mp_size_t size;\n  int i;\n  int reps = 20000;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  unsigned long bsi, size_range;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mp_trace_base = -16;\n\n  mpz_init (bs);\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpz_init (op1);\n  mpz_init (op2);\n  mpz_init (prod);\n  mpz_init (quot);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 10 + 2; /* 0..2047 bit operands */\n\n      mpz_urandomb (bs, rands, size_range);\n      size = mpz_get_ui (bs);\n      mpz_rrandomb (op1, rands, size);\n\n      do\n\t{\n\t  mpz_urandomb (bs, rands, size_range);\n\t  size = mpz_get_ui (bs);\n\t  mpz_rrandomb (op2, rands, size);\n\t}\n      while (mpz_sgn (op2) == 0);\n\n      mpz_urandomb (bs, rands, 2);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (op1, op1);\n      if ((bsi & 2) != 0)\n\tmpz_neg (op2, op2);\n\n      mpz_mul (prod, op1, op2);\n\n      mpz_divexact (quot, prod, op2);\n      MPZ_CHECK_FORMAT (quot);\n\n      if (mpz_cmp (quot, op1) != 0)\n        {\n          printf (\"Wrong results:\\n\");\n          mpz_trace (\"  got     \", quot);\n          mpz_trace (\"  want    \", op1);\n          mpz_trace (\"  dividend\", prod);\n          mpz_trace (\"  divisor \", op2);\n          abort ();\n        }\n    }\n\n  mpz_clear (bs);\n  mpz_clear (op1);\n  mpz_clear (op2);\n  mpz_clear (prod);\n  mpz_clear (quot);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/dive_ui.c",
    "content": "/* Test mpz_divexact_ui.\n\nCopyright 1996, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_random (int argc, char *argv[])\n{\n  gmp_randstate_t rands;\n  int    reps = 5000;\n  mpz_t  a, q, got;\n  int    i, qneg;\n  mpir_ui  d;\n\n  if (argc == 2)\n    reps = atoi (argv[1]);\n\n  mpz_init (a);\n  mpz_init (q);\n  mpz_init (got);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < reps; i++)\n    {\n      d = (mpir_ui) urandom(rands);\n      mpz_erandomb (q, rands, 512);\n      mpz_mul_ui (a, q, d);\n\n      for (qneg = 0; qneg <= 1; qneg++)\n        {\n          mpz_divexact_ui (got, a, d);\n          MPZ_CHECK_FORMAT (got);\n          if (mpz_cmp (got, q) != 0)\n            {\n              printf    (\"mpz_divexact_ui wrong\\n\");\n              mpz_trace (\"    a\", a);\n              printf    (\"    d=%lu\\n\", d);\n              mpz_trace (\"    q\", q);\n              mpz_trace (\"  got\", got);\n              abort ();\n            }\n\n          mpz_neg (q, q);\n          mpz_neg (a, a);\n        }\n\n    }\n\n  mpz_clear (a);\n  mpz_clear (q);\n  mpz_clear (got);\n  gmp_randclear(rands);\n}\n\n\nint\nmain (int argc, char **argv)\n{\n  tests_start ();\n\n  check_random (argc, argv);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/io.c",
    "content": "/* Test conversion and I/O using mpz_out_str and mpz_inp_str.\n\nCopyright 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#if HAVE_UNISTD_H\n#include <unistd.h>\t\t/* for unlink */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define FILENAME  \"io.tmp\"\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stdout, base, x); fputc ('\\n', stdout);\n}\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t  op1, op2;\n  mp_size_t size;\n  int i;\n  int reps = 10000;\n  FILE *fp;\n  int base;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  unsigned long bsi, size_range;\n  size_t nread;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n    reps = atoi (argv[1]);\n\n  mpz_init (op1);\n  mpz_init (op2);\n\n  fp = fopen (FILENAME, \"w+\"); \n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 10 + 2;\n\n      mpz_urandomb (bs, rands, size_range);\n      size = mpz_get_ui (bs);\n      mpz_rrandomb (op1, rands, size);\n      mpz_urandomb (bs, rands, 1);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (op1, op1);\n\n      mpz_urandomb (bs, rands, 16);\n      bsi = mpz_get_ui (bs);\n      base = bsi % 36 + 1;\n      if (base == 1)\n\tbase = 0;\n\n      rewind (fp);  \n      if (mpz_out_str (fp, base, op1) == 0\n          || putc (' ', fp) == EOF\n          || fflush (fp) != 0)\n        {\n          printf (\"mpz_out_str write error\\n\");\n          abort ();\n        }\n\n      rewind (fp);  \n      nread = mpz_inp_str (op2, fp, base);\n      if (nread == 0)\n        {\n          if (ferror (fp))\n            printf (\"mpz_inp_str stream read error\\n\");\n          else\n            printf (\"mpz_inp_str data conversion error\\n\");\n\t  abort ();\n\t}\n\n      if (nread != ftell(fp))\n        {\n          printf (\"mpz_inp_str nread doesn't match ftell\\n\");\n          printf (\"  nread  %lu\\n\", nread);\n          printf (\"  ftell  %ld\\n\", ftell(fp));\n          abort ();\n        }\n\n      if (mpz_cmp (op1, op2))\n\t{\n\t  printf (\"ERROR\\n\");\n\t  printf (\"op1  = \"); debug_mp (op1, -16);\n\t  printf (\"op2  = \"); debug_mp (op2, -16);\n\t  printf (\"base = %d\\n\", base);\n\t  abort ();\n\t}\n    }\n\n  fclose (fp);\n\n  unlink (FILENAME);\n\n  mpz_clear (bs);\n  mpz_clear (op1);\n  mpz_clear (op2);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/logic.c",
    "content": "/* Test mpz_com, mpz_and, mpz_ior, and mpz_xor.\n\nCopyright 1993, 1994, 1996, 1997, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid dump_abort();\nvoid debug_mp(mpz_t, int);\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t x, y, r1, r2;\n  mpz_t t1, t2, t3;\n  mp_size_t xsize, ysize;\n  int i;\n  int reps = 40000;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  unsigned long bsi, size_range;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpz_init (x);\n  mpz_init (y);\n  mpz_init (r1);\n  mpz_init (r2);\n  mpz_init (t1);\n  mpz_init (t2);\n  mpz_init (t3);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 8 + 2;\n\n      mpz_urandomb (bs, rands, size_range);\n      xsize = mpz_get_ui (bs);\n      mpz_rrandomb (x, rands, xsize);\n      mpz_urandomb (bs, rands, 1);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (x, x);\n\n      mpz_urandomb (bs, rands, size_range);\n      ysize = mpz_get_ui (bs);\n      mpz_rrandomb (y, rands, ysize);\n      mpz_urandomb (bs, rands, 1);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (y, y);\n\n      mpz_com (r1, x);\n      MPZ_CHECK_FORMAT (r1);\n      mpz_com (r1, r1);\n      MPZ_CHECK_FORMAT (r1);\n      if (mpz_cmp (r1, x) != 0)\n\tdump_abort ();\n\n      mpz_com (r1, y);\n      MPZ_CHECK_FORMAT (r1);\n      mpz_com (r2, r1);\n      MPZ_CHECK_FORMAT (r2);\n      if (mpz_cmp (r2, y) != 0)\n\tdump_abort ();\n\n      mpz_com (t1, x);\n      MPZ_CHECK_FORMAT (t1);\n      mpz_com (t2, y);\n      MPZ_CHECK_FORMAT (t2);\n      mpz_and (t3, t1, t2);\n      MPZ_CHECK_FORMAT (t3);\n      mpz_com (r1, t3);\n      MPZ_CHECK_FORMAT (r1);\n      mpz_ior (r2, x, y);\n      MPZ_CHECK_FORMAT (r2);\n      if (mpz_cmp (r1, r2) != 0)\n\tdump_abort ();\n\n      mpz_com (t1, x);\n      MPZ_CHECK_FORMAT (t1);\n      mpz_com (t2, y);\n      MPZ_CHECK_FORMAT (t2);\n      mpz_ior (t3, t1, t2);\n      MPZ_CHECK_FORMAT (t3);\n      mpz_com (r1, t3);\n      MPZ_CHECK_FORMAT (r1);\n      mpz_and (r2, x, y);\n      MPZ_CHECK_FORMAT (r2);\n      if (mpz_cmp (r1, r2) != 0)\n\tdump_abort ();\n\n      mpz_ior (t1, x, y);\n      MPZ_CHECK_FORMAT (t1);\n      mpz_and (t2, x, y);\n      MPZ_CHECK_FORMAT (t2);\n      mpz_com (t3, t2);\n      MPZ_CHECK_FORMAT (t3);\n      mpz_and (r1, t1, t3);\n      MPZ_CHECK_FORMAT (r1);\n      mpz_xor (r2, x, y);\n      MPZ_CHECK_FORMAT (r2);\n      if (mpz_cmp (r1, r2) != 0)\n\tdump_abort ();\n    }\n\n  mpz_clear (bs);\n  mpz_clear (x);\n  mpz_clear (y);\n  mpz_clear (r1);\n  mpz_clear (r2);\n  mpz_clear (t1);\n  mpz_clear (t2);\n  mpz_clear (t3);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndump_abort ()\n{\n  abort();\n}\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x); fputc ('\\n', stderr);\n}\n"
  },
  {
    "path": "tests/mpz/reuse.c",
    "content": "/* Test that routines allow reusing a source variable as destination.\n\n   Test all relevant functions except:\n\tmpz_bin_ui\n\tmpz_nextprime\n\tmpz_next_prime_candidate\n\tmpz_mul_si\n\tmpz_addmul_ui (should this really allow a+=a*c?)\n\nCopyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#if __GMP_LIBGMP_DLL\n\n/* FIXME: When linking to a DLL libmpir, mpz_add etc can't be used as\n   initializers for global variables because they're effectively global\n   variables (function pointers) themselves.  Perhaps calling a test\n   function successively with mpz_add etc would be better.  */\n\nint\nmain (void)\n{\n  printf (\"Test suppressed for windows DLL\\n\");\n  exit (0);\n}\n\n\n#else /* ! DLL_EXPORT */\n\nvoid dump(char *, mpz_t, mpz_t, mpz_t);\n\ntypedef void (*dss_func)(mpz_ptr, mpz_srcptr, mpz_srcptr);\ntypedef void (*dsi_func)(mpz_ptr, mpz_srcptr, mpir_ui);\ntypedef mpir_ui (*dsi_div_func)(mpz_ptr, mpz_srcptr, mpir_ui);\ntypedef mpir_ui (*ddsi_div_func)(mpz_ptr, mpz_ptr, mpz_srcptr, mpir_ui);\ntypedef void (*ddss_div_func)(mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);\ntypedef void (*ds_func)(mpz_ptr, mpz_srcptr);\n\n\nvoid\nmpz_xinvert (mpz_ptr r, mpz_srcptr a, mpz_srcptr b)\n{\n  int res;\n  res = mpz_invert (r, a, b);\n  if (res == 0)\n    mpz_set_ui (r, 0);\n}\n\ndss_func dss_funcs[] =\n{\n  mpz_add, mpz_sub, mpz_mul,\n  mpz_cdiv_q, mpz_cdiv_r, mpz_fdiv_q, mpz_fdiv_r, mpz_tdiv_q, mpz_tdiv_r,\n  mpz_xinvert,\n  mpz_gcd, mpz_lcm, mpz_and, mpz_ior, mpz_xor\n};\nchar *dss_func_names[] =\n{\n  \"mpz_add\", \"mpz_sub\", \"mpz_mul\",\n  \"mpz_cdiv_q\", \"mpz_cdiv_r\", \"mpz_fdiv_q\", \"mpz_fdiv_r\", \"mpz_tdiv_q\", \"mpz_tdiv_r\",\n  \"mpz_xinvert\",\n  \"mpz_gcd\", \"mpz_lcm\", \"mpz_and\", \"mpz_ior\", \"mpz_xor\"\n};\nchar dss_func_division[] = {0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0};\n\ndsi_func dsi_funcs[] =\n{\n  /* Don't change order here without changing the code in main(). */\n  mpz_add_ui, mpz_mul_ui, mpz_sub_ui,\n  mpz_fdiv_q_2exp, mpz_fdiv_r_2exp,\n  mpz_cdiv_q_2exp, mpz_cdiv_r_2exp,\n  mpz_tdiv_q_2exp, mpz_tdiv_r_2exp,\n  mpz_mul_2exp,\n  mpz_pow_ui\n};\nchar *dsi_func_names[] =\n{\n  \"mpz_add_ui\", \"mpz_mul_ui\", \"mpz_sub_ui\",\n  \"mpz_fdiv_q_2exp\", \"mpz_fdiv_r_2exp\",\n  \"mpz_cdiv_q_2exp\", \"mpz_cdiv_r_2exp\",\n  \"mpz_tdiv_q_2exp\", \"mpz_tdiv_r_2exp\",\n  \"mpz_mul_2exp\",\n  \"mpz_pow_ui\"\n};\n\ndsi_div_func dsi_div_funcs[] =\n{\n  mpz_cdiv_q_ui, mpz_cdiv_r_ui,\n  mpz_fdiv_q_ui, mpz_fdiv_r_ui,\n  mpz_tdiv_q_ui, mpz_tdiv_r_ui\n};\nchar *dsi_div_func_names[] =\n{\n  \"mpz_cdiv_q_ui\", \"mpz_cdiv_r_ui\",\n  \"mpz_fdiv_q_ui\", \"mpz_fdiv_r_ui\",\n  \"mpz_tdiv_q_ui\", \"mpz_tdiv_r_ui\"\n};\n\nddsi_div_func ddsi_div_funcs[] =\n{\n  mpz_cdiv_qr_ui,\n  mpz_fdiv_qr_ui,\n  mpz_tdiv_qr_ui\n};\nchar *ddsi_div_func_names[] =\n{\n  \"mpz_cdiv_qr_ui\",\n  \"mpz_fdiv_qr_ui\",\n  \"mpz_tdiv_qr_ui\"\n};\n\nddss_div_func ddss_div_funcs[] =\n{\n  mpz_cdiv_qr,\n  mpz_fdiv_qr,\n  mpz_tdiv_qr\n};\nchar *ddss_div_func_names[] =\n{\n  \"mpz_cdiv_qr\",\n  \"mpz_fdiv_qr\",\n  \"mpz_tdiv_qr\"\n};\n\nds_func ds_funcs[] =\n{\n  mpz_abs, mpz_com, mpz_neg, mpz_sqrt\n};\nchar *ds_func_names[] =\n{\n  \"mpz_abs\", \"mpz_com\", \"mpz_neg\", \"mpz_sqrt\"\n};\n\n\n/* Really use `defined (__STDC__)' here; we want it to be true for Sun C */\n#if defined (__STDC__) || defined (__cplusplus) || defined(_MSC_VER)\n#define FAIL(class,indx,op1,op2,op3) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n  class##_funcs[indx] = 0;\t\t\t\t\t\t\\\n  dump (class##_func_names[indx], op1, op2, op3);\t\t\t\\\n  failures++;\t\t\t\t\t\t\t\t\\\n  } while (0)\n#define FAIL2(fname,op1,op2,op3) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n  dump (#fname, op1, op2, op3);\t\t\t\t\t\t\\\n  failures++;\t\t\t\t\t\t\t\t\\\n  } while (0)\n#else\n#define FAIL(class,indx,op1,op2,op3) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n  class/**/_funcs[indx] = 0;\t\t\t\t\t\t\\\n  dump (class/**/_func_names[indx], op1, op2, op3);\t\t\t\\\n  failures++;\t\t\t\t\t\t\t\t\\\n  } while (0)\n#define FAIL2(fname,op1,op2,op3) \\\n  do {\t\t\t\t\t\t\t\t\t\\\n  dump (\"fname\", op1, op2, op3);\t\t\t\t\t\\\n  failures++;\t\t\t\t\t\t\t\t\\\n  } while (0)\n#endif\n\n\nint\nmain (int argc, char **argv)\n{\n  int i;\n  int pass, reps = 100;\n  mpz_t in1, in2, in3;\n  unsigned long int in2i;\n  mp_size_t size;\n  mpz_t res1, res2, res3;\n  mpz_t ref1, ref2, ref3;\n  mpz_t t;\n  unsigned long int r1, r2;\n  long failures = 0;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  unsigned long bsi, size_range;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpz_init (in1);\n  mpz_init (in2);\n  mpz_init (in3);\n  mpz_init (ref1);\n  mpz_init (ref2);\n  mpz_init (ref3);\n  mpz_init (res1);\n  mpz_init (res2);\n  mpz_init (res3);\n  mpz_init (t);\n\n  for (pass = 1; pass <= reps; pass++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 12 + 2;\n\n      mpz_urandomb (bs, rands, size_range);\n      size = mpz_get_ui (bs);\n      mpz_rrandomb (in1, rands, size);\n\n      mpz_urandomb (bs, rands, size_range);\n      size = mpz_get_ui (bs);\n      mpz_rrandomb (in2, rands, size);\n\n      mpz_urandomb (bs, rands, size_range);\n      size = mpz_get_ui (bs);\n      mpz_rrandomb (in3, rands, size);\n\n      mpz_urandomb (bs, rands, 3);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (in1, in1);\n      if ((bsi & 1) != 0)\n\tmpz_neg (in2, in2);\n      if ((bsi & 1) != 0)\n\tmpz_neg (in3, in3);\n\n      for (i = 0; i < sizeof (dss_funcs) / sizeof (dss_func); i++)\n\t{\n\t  if (dss_funcs[i] == 0)\n\t    continue;\n\t  if (dss_func_division[i] && mpz_sgn (in2) == 0)\n\t    continue;\n\n\t  (dss_funcs[i]) (ref1, in1, in2);\n\n\t  mpz_set (res1, in1);\n\t  (dss_funcs[i]) (res1, res1, in2);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL (dss, i, in1, in2, NULL);\n\n\t  mpz_set (res1, in2);\n\t  (dss_funcs[i]) (res1, in1, res1);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL (dss, i, in1, in2, NULL);\n\t}\n\n      for (i = 0; i < sizeof (ddss_div_funcs) / sizeof (ddss_div_func); i++)\n\t{\n\t  if (ddss_div_funcs[i] == 0)\n\t    continue;\n\t  if (mpz_sgn (in2) == 0)\n\t    continue;\n\n\t  (ddss_div_funcs[i]) (ref1, ref2, in1, in2);\n\n\t  mpz_set (res1, in1);\n\t  (ddss_div_funcs[i]) (res1, res2, res1, in2);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)\n\t    FAIL (ddss_div, i, in1, in2, NULL);\n\n\t  mpz_set (res2, in1);\n\t  (ddss_div_funcs[i]) (res1, res2, res2, in2);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)\n\t    FAIL (ddss_div, i, in1, in2, NULL);\n\n\t  mpz_set (res1, in2);\n\t  (ddss_div_funcs[i]) (res1, res2, in1, res1);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)\n\t    FAIL (ddss_div, i, in1, in2, NULL);\n\n\t  mpz_set (res2, in2);\n\t  (ddss_div_funcs[i]) (res1, res2, in1, res2);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)\n\t    FAIL (ddss_div, i, in1, in2, NULL);\n\t}\n\n      for (i = 0; i < sizeof (ds_funcs) / sizeof (ds_func); i++)\n\t{\n\t  if (ds_funcs[i] == 0)\n\t    continue;\n\t  if (strcmp (ds_func_names[i], \"mpz_sqrt\") == 0\n\t      && mpz_sgn (in1) < 0)\n\t    continue;\n\n\t  (ds_funcs[i]) (ref1, in1);\n\n\t  mpz_set (res1, in1);\n\t  (ds_funcs[i]) (res1, res1);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL (ds, i, in1, in2, NULL);\n\t}\n\n      in2i = mpz_get_ui (in2);\n\n      for (i = 0; i < sizeof (dsi_funcs) / sizeof (dsi_func); i++)\n\t{\n\t  if (dsi_funcs[i] == 0)\n\t    continue;\n\t  if (strcmp (dsi_func_names[i], \"mpz_fdiv_q_2exp\") == 0)\n\t    /* Limit exponent to something reasonable for the division\n\t       functions.  Without this, we'd  normally shift things off\n\t       the end and just generate the trivial values 1, 0, -1.  */\n\t    in2i %= 0x1000;\n\t  if (strcmp (dsi_func_names[i], \"mpz_mul_2exp\") == 0)\n\t    /* Limit exponent more for mpz_mul_2exp to save time.  */\n\t    in2i %= 0x100;\n\t  if (strcmp (dsi_func_names[i], \"mpz_pow_ui\") == 0)\n\t    /* Limit exponent yet more for mpz_pow_ui to save time.  */\n\t    in2i %= 0x10;\n\n\t  (dsi_funcs[i]) (ref1, in1, in2i);\n\n\t  mpz_set (res1, in1);\n\t  (dsi_funcs[i]) (res1, res1, in2i);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL (dsi, i, in1, in2, NULL);\n\t}\n\n      if (in2i != 0)\t  /* Don't divide by 0.  */\n\t{\n\t  for (i = 0; i < sizeof (dsi_div_funcs) / sizeof (dsi_div_funcs); i++)\n\t    {\n\t      r1 = (dsi_div_funcs[i]) (ref1, in1, in2i);\n\n\t      mpz_set (res1, in1);\n\t      r2 = (dsi_div_funcs[i]) (res1, res1, in2i);\n\t      if (mpz_cmp (ref1, res1) != 0 || r1 != r2)\n\t\tFAIL (dsi_div, i, in1, in2, NULL);\n\t    }\n\n\t  for (i = 0; i < sizeof (ddsi_div_funcs) / sizeof (ddsi_div_funcs); i++)\n\t    {\n\t      r1 = (ddsi_div_funcs[i]) (ref1, ref2, in1, in2i);\n\n\t      mpz_set (res1, in1);\n\t      r2 = (ddsi_div_funcs[i]) (res1, res2, res1, in2i);\n\t      if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)\n\t\tFAIL (ddsi_div, i, in1, in2, NULL);\n\n\t      mpz_set (res2, in1);\n\t      (ddsi_div_funcs[i]) (res1, res2, res2, in2i);\n\t      if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)\n\t\tFAIL (ddsi_div, i, in1, in2, NULL);\n\t    }\n\t}\n\n      if (mpz_sgn (in1) >= 0)\n\t{\n\t  mpz_sqrtrem (ref1, ref2, in1);\n\n\t  mpz_set (res1, in1);\n\t  mpz_sqrtrem (res1, res2, res1);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)\n\t    FAIL2 (mpz_sqrtrem, in1, NULL, NULL);\n\n\t  mpz_set (res2, in1);\n\t  mpz_sqrtrem (res1, res2, res2);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)\n\t    FAIL2 (mpz_sqrtrem, in1, NULL, NULL);\n\t}\n\n      if (mpz_sgn (in1) >= 0)\n\t{\n\t  mpz_root (ref1, in1, in2i % 0x1000 + 1);\n\n\t  mpz_set (res1, in1);\n\t  mpz_root (res1, res1, in2i % 0x1000 + 1);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL2 (mpz_root, in1, in2, NULL);\n\t}\n\n      if (mpz_sgn (in1) >= 0)\n\t{\n\t  mpz_rootrem (ref1, ref2, in1, in2i % 0x1000 + 1);\n\n\t  mpz_set (res1, in1);\n\t  mpz_rootrem (res1, res2, res1, in2i % 0x1000 + 1);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)\n\t    FAIL2 (mpz_rootrem, in1, in2, NULL);\n\n\t  mpz_set (res2, in1);\n\t  mpz_rootrem (res1, res2, res2, in2i % 0x1000 + 1);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)\n\t    FAIL2 (mpz_rootrem, in1, in2, NULL);\n\t}\n\n      if (pass < reps / 2)\t/* run fewer tests since gcdext lots of time */\n\t{\n\t  mpz_gcdext (ref1, ref2, ref3, in1, in2);\n\n\t  mpz_set (res1, in1);\n\t  mpz_gcdext (res1, res2, res3, res1, in2);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0\n\t      || mpz_cmp (ref3, res3) != 0)\n\t    FAIL2 (mpz_gcdext, in1, in2, NULL);\n\n\t  mpz_set (res2, in1);\n\t  mpz_gcdext (res1, res2, res3, res2, in2);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0\n\t      || mpz_cmp (ref3, res3) != 0)\n\t    FAIL2 (mpz_gcdext, in1, in2, NULL);\n\n\t  mpz_set (res3, in1);\n\t  mpz_gcdext (res1, res2, res3, res3, in2);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0\n\t      || mpz_cmp (ref3, res3) != 0)\n\t    FAIL2 (mpz_gcdext, in1, in2, NULL);\n\n\t  mpz_set (res1, in2);\n\t  mpz_gcdext (res1, res2, res3, in1, res1);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0\n\t      || mpz_cmp (ref3, res3) != 0)\n\t    FAIL2 (mpz_gcdext, in1, in2, NULL);\n\n\t  mpz_set (res2, in2);\n\t  mpz_gcdext (res1, res2, res3, in1, res2);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0\n\t      || mpz_cmp (ref3, res3) != 0)\n\t    FAIL2 (mpz_gcdext, in1, in2, NULL);\n\n\t  mpz_set (res3, in2);\n\t  mpz_gcdext (res1, res2, res3, in1, res3);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0\n\t      || mpz_cmp (ref3, res3) != 0)\n\t    FAIL2 (mpz_gcdext, in1, in2, NULL);\n\n\t  mpz_set (res1, in1);\n\t  mpz_gcdext (res1, res2, NULL, res1, in2);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0\n\t      || mpz_cmp (ref3, res3) != 0)\n\t    FAIL2 (mpz_gcdext, in1, in2, NULL);\n\n\t  mpz_set (res2, in1);\n\t  mpz_gcdext (res1, res2, NULL, res2, in2);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0\n\t      || mpz_cmp (ref3, res3) != 0)\n\t    FAIL2 (mpz_gcdext, in1, in2, NULL);\n\n\t  mpz_set (res1, in2);\n\t  mpz_gcdext (res1, res2, NULL, in1, res1);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0\n\t      || mpz_cmp (ref3, res3) != 0)\n\t    FAIL2 (mpz_gcdext, in1, in2, NULL);\n\n\t  mpz_set (res2, in2);\n\t  mpz_gcdext (res1, res2, NULL, in1, res2);\n\t  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0\n\t      || mpz_cmp (ref3, res3) != 0)\n\t    FAIL2 (mpz_gcdext, in1, in2, NULL);\n\t}\n\n      /* Don't run mpz_powm for huge exponents or when undefined.  */\n      if (mpz_sizeinbase (in2, 2) < 250 && mpz_sgn (in3) != 0\n\t  && (mpz_sgn (in2) >= 0 || mpz_invert (t, in1, in3)))\n\t{\n\t  mpz_powm (ref1, in1, in2, in3);\n\n\t  mpz_set (res1, in1);\n\t  mpz_powm (res1, res1, in2, in3);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL2 (mpz_powm, in1, in2, in3);\n\n\t  mpz_set (res1, in2);\n\t  mpz_powm (res1, in1, res1, in3);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL2 (mpz_powm, in1, in2, in3);\n\n\t  mpz_set (res1, in3);\n\t  mpz_powm (res1, in1, in2, res1);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL2 (mpz_powm, in1, in2, in3);\n\t}\n\n      /* Don't run mpz_powm_ui when undefined.  */\n      if (mpz_sgn (in3) != 0)\n\t{\n\t  mpz_powm_ui (ref1, in1, in2i, in3);\n\n\t  mpz_set (res1, in1);\n\t  mpz_powm_ui (res1, res1, in2i, in3);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL2 (mpz_powm_ui, in1, in2, in3);\n\n\t  mpz_set (res1, in3);\n\t  mpz_powm_ui (res1, in1, in2i, res1);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL2 (mpz_powm_ui, in1, in2, in3);\n\t}\n\n      {\n\tr1 = mpz_gcd_ui (ref1, in1, in2i);\n\n\tmpz_set (res1, in1);\n\tr2 = mpz_gcd_ui (res1, res1, in2i);\n\tif (mpz_cmp (ref1, res1) != 0)\n\t  FAIL2 (mpz_gcd_ui, in1, in2, NULL);\n      }\n\n      if (mpz_cmp_ui (in2, 1L) > 0 && mpz_sgn (in1) != 0)\n\t{\n\t  /* Test mpz_remove */\n\t  mpz_remove (ref1, in1, in2);\n\n\t  mpz_set (res1, in1);\n\t  mpz_remove (res1, res1, in2);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL2 (mpz_remove, in1, in2, NULL);\n\n\t  mpz_set (res1, in2);\n\t  mpz_remove (res1, in1, res1);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL2 (mpz_remove, in1, in2, NULL);\n\t}\n\n      if (mpz_sgn (in2) != 0)\n\t{\n\t  /* Test mpz_divexact */\n\t  mpz_mul (t, in1, in2);\n\t  mpz_divexact (ref1, t, in2);\n\n\t  mpz_set (res1, t);\n\t  mpz_divexact (res1, res1, in2);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL2 (mpz_divexact, t, in2, NULL);\n\n\t  mpz_set (res1, in2);\n\t  mpz_divexact (res1, t, res1);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL2 (mpz_divexact, t, in2, NULL);\n\t}\n\n      if (mpz_sgn (in2) > 0)\n\t{\n\t  /* Test mpz_divexact_gcd, same as mpz_divexact */\n\t  mpz_mul (t, in1, in2);\n\t  mpz_divexact_gcd (ref1, t, in2);\n\n\t  mpz_set (res1, t);\n\t  mpz_divexact_gcd (res1, res1, in2);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL2 (mpz_divexact_gcd, t, in2, NULL);\n\n\t  mpz_set (res1, in2);\n\t  mpz_divexact_gcd (res1, t, res1);\n\t  if (mpz_cmp (ref1, res1) != 0)\n\t    FAIL2 (mpz_divexact_gcd, t, in2, NULL);\n\t}\n    }\n\n  if (failures != 0)\n    {\n      fprintf (stderr, \"mpz/reuse: %ld error%s\\n\", failures, \"s\" + (failures == 1));\n      exit (1);\n    }\n\n  mpz_clear (bs);\n  mpz_clear (in1);\n  mpz_clear (in2);\n  mpz_clear (in3);\n  mpz_clear (ref1);\n  mpz_clear (ref2);\n  mpz_clear (ref3);\n  mpz_clear (res1);\n  mpz_clear (res2);\n  mpz_clear (res3);\n  mpz_clear (t);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndump (char *name, mpz_t in1, mpz_t in2, mpz_t in3)\n{\n  printf (\"failure in %s (\", name);\n  mpz_out_str (stdout, -16, in1);\n  if (in2 != NULL)\n    {\n      printf (\" \");\n      mpz_out_str (stdout, -16, in2);\n    }\n  if (in3 != NULL)\n    {\n      printf (\" \");\n      mpz_out_str (stdout, -16, in3);\n    }\n  printf (\")\\n\");\n}\n\n#endif /* ! DLL_EXPORT */\n"
  },
  {
    "path": "tests/mpz/t-addsub.c",
    "content": "/* Test mpz_add, mpz_sub, mpz_add_ui, mpz_sub_ui, and mpz_ui_sub.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\nvoid debug_mp(mpz_t, int);\nvoid dump_abort(int, char *, mpz_t, mpz_t);\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t op1, op2, r1, r2;\n  mp_size_t op1n, op2n;\n  unsigned long int op2long;\n  int i;\n  int reps = 100000;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  unsigned long bsi, size_range;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpz_init (op1);\n  mpz_init (op2);\n  mpz_init (r1);\n  mpz_init (r2);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 10 + 2;\n\n      mpz_urandomb (bs, rands, size_range);\n      op1n = mpz_get_ui (bs);\n      mpz_rrandomb (op1, rands, op1n);\n\n      mpz_urandomb (bs, rands, size_range);\n      op2n = mpz_get_ui (bs);\n      mpz_rrandomb (op2, rands, op2n);\n\n      mpz_urandomb (bs, rands, 2);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (op1, op1);\n      if ((bsi & 2) != 0)\n\tmpz_neg (op2, op2);\n\n      /* printf (\"%ld %ld\\n\", SIZ (multiplier), SIZ (multiplicand)); */\n\n      mpz_add (r1, op1, op2);\n      mpz_sub (r2, r1, op2);\n      if (mpz_cmp (r2, op1) != 0)\n\tdump_abort (i, \"mpz_add or mpz_sub incorrect\", op1, op2);\n\n      if (mpz_fits_ulong_p (op2))\n\t{\n\t  op2long = mpz_get_ui (op2);\n\t  mpz_add_ui (r1, op1, op2long);\n\t  mpz_sub_ui (r2, r1, op2long);\n\t  if (mpz_cmp (r2, op1) != 0)\n\t    dump_abort (i, \"mpz_add_ui or mpz_sub_ui incorrect\", op1, op2);\n\n\t  mpz_ui_sub (r1, op2long, op1);\n\t  mpz_sub_ui (r2, op1, op2long);\n\t  mpz_neg (r2, r2);\n\t  if (mpz_cmp (r1, r2) != 0)\n\t    dump_abort (i, \"mpz_add_ui or mpz_ui_sub incorrect\", op1, op2);\n\t}\n    }\n\n  mpz_clear (bs);\n  mpz_clear (op1);\n  mpz_clear (op2);\n  mpz_clear (r1);\n  mpz_clear (r2);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndump_abort (int i, char *s, mpz_t op1, mpz_t op2)\n{\n  fprintf (stderr, \"ERROR: %s in test %d\\n\", s, i);\n  fprintf (stderr, \"op1 = \"); debug_mp (op1, -16);\n  fprintf (stderr, \"op2 = \"); debug_mp (op2, -16);\n  abort();\n}\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x); fputc ('\\n', stderr);\n}\n"
  },
  {
    "path": "tests/mpz/t-aorsmul.c",
    "content": "/* Test mpz_addmul, mpz_addmul_ui, mpz_submul, mpz_submul_ui.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n#define M GMP_NUMB_MAX\n\n\nvoid\ncheck_one_inplace (mpz_srcptr w, mpz_srcptr y)\n{\n  mpz_t  want, got;\n\n  mpz_init (want);\n  mpz_init (got);\n\n  mpz_mul (want, w, y);\n  mpz_add (want, w, want);\n  mpz_set (got, w);\n  mpz_addmul (got, got, y);\n  MPZ_CHECK_FORMAT (got);\n  if (mpz_cmp (want, got) != 0)\n    {\n      printf (\"mpz_addmul inplace fail\\n\");\n    fail:\n      mpz_trace (\"w\", w);\n      mpz_trace (\"y\", y);\n      mpz_trace (\"want\", want);\n      mpz_trace (\"got \", got);\n      abort ();\n    }\n\n  mpz_mul (want, w, y);\n  mpz_sub (want, w, want);\n  mpz_set (got, w);\n  mpz_submul (got, got, y);\n  MPZ_CHECK_FORMAT (got);\n  if (mpz_cmp (want, got) != 0)\n    {\n      printf (\"mpz_submul inplace fail\\n\");\n      goto fail;\n    }\n\n  mpz_clear (want);\n  mpz_clear (got);\n}\n\nvoid\ncheck_one_ui_inplace (mpz_ptr w, unsigned long y)\n{\n  mpz_t  want, got;\n\n  mpz_init (want);\n  mpz_init (got);\n\n  mpz_mul_ui (want, w, (unsigned long) y);\n  mpz_add (want, w, want);\n  mpz_set (got, w);\n  mpz_addmul_ui (got, got, (unsigned long) y);\n  MPZ_CHECK_FORMAT (got);\n  if (mpz_cmp (want, got) != 0)\n    {\n      printf (\"mpz_addmul_ui fail\\n\");\n    fail:\n      mpz_trace (\"w\", w);\n      printf    (\"y=0x%lX   %lu\\n\", y, y);\n      mpz_trace (\"want\", want);\n      mpz_trace (\"got \", got);\n      abort ();\n    }\n\n  mpz_mul_ui (want, w, y);\n  mpz_sub (want, w, want);\n  mpz_set (got, w);\n  mpz_submul_ui (got, got, y);\n  MPZ_CHECK_FORMAT (got);\n  if (mpz_cmp (want, got) != 0)\n    {\n      printf (\"mpz_submul_ui fail\\n\");\n      goto fail;\n    }\n\n  mpz_clear (want);\n  mpz_clear (got);\n}\n\nvoid\ncheck_all_inplace (mpz_ptr w, mpz_ptr y)\n{\n  int  wneg, yneg;\n\n  MPZ_CHECK_FORMAT (w);\n  MPZ_CHECK_FORMAT (y);\n\n  for (wneg = 0; wneg < 2; wneg++)\n    {\n      for (yneg = 0; yneg < 2; yneg++)\n        {\n          check_one_inplace (w, y);\n\n          if (mpz_fits_ulong_p (y))\n            check_one_ui_inplace (w, mpz_get_ui (y));\n\n          mpz_neg (y, y);\n        }\n      mpz_neg (w, w);\n    }\n}\n\nvoid\ncheck_one (mpz_srcptr w, mpz_srcptr x, mpz_srcptr y)\n{\n  mpz_t  want, got;\n\n  mpz_init (want);\n  mpz_init (got);\n\n  mpz_mul (want, x, y);\n  mpz_add (want, w, want);\n  mpz_set (got, w);\n  mpz_addmul (got, x, y);\n  MPZ_CHECK_FORMAT (got);\n  if (mpz_cmp (want, got) != 0)\n    {\n      printf (\"mpz_addmul fail\\n\");\n    fail:\n      mpz_trace (\"w\", w);\n      mpz_trace (\"x\", x);\n      mpz_trace (\"y\", y);\n      mpz_trace (\"want\", want);\n      mpz_trace (\"got \", got);\n      abort ();\n    }\n\n  mpz_mul (want, x, y);\n  mpz_sub (want, w, want);\n  mpz_set (got, w);\n  mpz_submul (got, x, y);\n  MPZ_CHECK_FORMAT (got);\n  if (mpz_cmp (want, got) != 0)\n    {\n      printf (\"mpz_submul fail\\n\");\n      goto fail;\n    }\n\n  mpz_clear (want);\n  mpz_clear (got);\n}\n\nvoid\ncheck_one_ui (mpz_ptr w, mpz_ptr x, unsigned long y)\n{\n  mpz_t  want, got;\n\n  mpz_init (want);\n  mpz_init (got);\n\n  mpz_mul_ui (want, x, (unsigned long) y);\n  mpz_add (want, w, want);\n  mpz_set (got, w);\n  mpz_addmul_ui (got, x, (unsigned long) y);\n  MPZ_CHECK_FORMAT (got);\n  if (mpz_cmp (want, got) != 0)\n    {\n      printf (\"mpz_addmul_ui fail\\n\");\n    fail:\n      mpz_trace (\"w\", w);\n      mpz_trace (\"x\", x);\n      printf    (\"y=0x%lX   %lu\\n\", y, y);\n      mpz_trace (\"want\", want);\n      mpz_trace (\"got \", got);\n      abort ();\n    }\n\n  mpz_mul_ui (want, x, y);\n  mpz_sub (want, w, want);\n  mpz_set (got, w);\n  mpz_submul_ui (got, x, y);\n  MPZ_CHECK_FORMAT (got);\n  if (mpz_cmp (want, got) != 0)\n    {\n      printf (\"mpz_submul_ui fail\\n\");\n      goto fail;\n    }\n\n  mpz_clear (want);\n  mpz_clear (got);\n}\n\n\nvoid\ncheck_all (mpz_ptr w, mpz_ptr x, mpz_ptr y)\n{\n  int    swap, wneg, xneg, yneg;\n\n  MPZ_CHECK_FORMAT (w);\n  MPZ_CHECK_FORMAT (x);\n  MPZ_CHECK_FORMAT (y);\n\n  for (swap = 0; swap < 2; swap++)\n    {\n      for (wneg = 0; wneg < 2; wneg++)\n        {\n          for (xneg = 0; xneg < 2; xneg++)\n            {\n              for (yneg = 0; yneg < 2; yneg++)\n                {\n                  check_one (w, x, y);\n\n                  if (mpz_fits_ulong_p (y))\n                    check_one_ui (w, x, mpz_get_ui (y));\n\n                  mpz_neg (y, y);\n                }\n              mpz_neg (x, x);\n            }\n          mpz_neg (w, w);\n        }\n      mpz_swap (x, y);\n    }\n}\n\nvoid\ncheck_data_inplace_ui (void)\n{\n  static const struct {\n    mp_limb_t      w[6];\n    unsigned long  y;\n\n  } data[] = {\n\n    { { 0 }, 0 },\n    { { 0 }, 1 },\n    { { 1 }, 1 },\n    { { 2 }, 1 },\n\n    { { 123 }, 1 },\n    { { 123 }, ULONG_MAX },\n    { { M }, 1 },\n    { { M }, ULONG_MAX },\n\n    { { 123, 456 }, 1 },\n    { { M, M }, 1 },\n    { { 123, 456 }, ULONG_MAX },\n    { { M, M }, ULONG_MAX },\n\n    { { 123, 456, 789 }, 1 },\n    { { M, M, M }, 1 },\n    { { 123, 456, 789 }, ULONG_MAX },\n    { { M, M, M }, ULONG_MAX },\n  };\n\n  mpz_t  w, y;\n  int    i;\n\n  mpz_init (w);\n  mpz_init (y);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_n (w, data[i].w, (mp_size_t) numberof(data[i].w));\n      mpz_set_ui (y, data[i].y);\n      check_all_inplace (w, y);\n    }\n\n  mpz_clear (w);\n  mpz_clear (y);\n}\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    mp_limb_t  w[6];\n    mp_limb_t  x[6];\n    mp_limb_t  y[6];\n\n  } data[] = {\n\n    /* reducing to zero */\n    { { 1 }, { 1 }, { 1 } },\n    { { 2 }, { 1 }, { 2 } },\n    { { 0,1 }, { 0,1 }, { 1 } },\n\n    /* reducing to 1 */\n    { { 0,1 },       { M },       { 1 } },\n    { { 0,0,1 },     { M,M },     { 1 } },\n    { { 0,0,0,1 },   { M,M,M },   { 1 } },\n    { { 0,0,0,0,1 }, { M,M,M,M }, { 1 } },\n\n    /* reducing to -1 */\n    { { M },       { 0,1 },       { 1 } },\n    { { M,M },     { 0,0,1 },     { 1 } },\n    { { M,M,M },   { 0,0,0,1 },   { 1 } },\n    { { M,M,M,M }, { 0,0,0,0,1 }, { 1 } },\n\n    /* carry out of addmul */\n    { { M },     { 1 }, { 1 } },\n    { { M,M },   { 1 }, { 1 } },\n    { { M,M,M }, { 1 }, { 1 } },\n\n    /* borrow from submul */\n    { { 0,1 },     { 1 }, { 1 } },\n    { { 0,0,1 },   { 1 }, { 1 } },\n    { { 0,0,0,1 }, { 1 }, { 1 } },\n\n    /* borrow from submul */\n    { { 0,0,1 },     { 0,1 }, { 1 } },\n    { { 0,0,0,1 },   { 0,1 }, { 1 } },\n    { { 0,0,0,0,1 }, { 0,1 }, { 1 } },\n\n    /* more borrow from submul */\n    { { M }, { 0,1 },       { 1 } },\n    { { M }, { 0,0,1 },     { 1 } },\n    { { M }, { 0,0,0,1 },   { 1 } },\n    { { M }, { 0,0,0,0,1 }, { 1 } },\n\n    /* big borrow from submul */\n    { { 0,0,1 },     { M,M }, { M } },\n    { { 0,0,0,1 },   { M,M }, { M } },\n    { { 0,0,0,0,1 }, { M,M }, { M } },\n\n    /* small w */\n    { { 0,1 }, { M,M },       { M } },\n    { { 0,1 }, { M,M,M },     { M } },\n    { { 0,1 }, { M,M,M,M },   { M } },\n    { { 0,1 }, { M,M,M,M,M }, { M } },\n  };\n\n  mpz_t  w, x, y;\n  int    i;\n\n  mpz_init (w);\n  mpz_init (x);\n  mpz_init (y);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_n (w, data[i].w, (mp_size_t) numberof(data[i].w));\n      mpz_set_n (x, data[i].x, (mp_size_t) numberof(data[i].x));\n      mpz_set_n (y, data[i].y, (mp_size_t) numberof(data[i].y));\n      check_all (w, x, y);\n    }\n\n  mpz_clear (w);\n  mpz_clear (x);\n  mpz_clear (y);\n}\n\n\nvoid\ncheck_random (int argc, char *argv[])\n{\n  gmp_randstate_t rands;\n  mpz_t  w, x, y;\n  int    i, reps = 2000;\n\n  gmp_randinit_default(rands);\n  mpz_init (w);\n  mpz_init (x);\n  mpz_init (y);\n\n  if (argc == 2)\n    reps = atoi (argv[1]);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_errandomb (w, rands, 5*BITS_PER_MP_LIMB);\n      mpz_errandomb (x, rands, 5*BITS_PER_MP_LIMB);\n      mpz_errandomb (y, rands, 5*BITS_PER_MP_LIMB);\n      check_all (w, x, y);\n      check_all_inplace (w, y);\n\n      mpz_errandomb (w, rands, 5*BITS_PER_MP_LIMB);\n      mpz_errandomb (x, rands, 5*BITS_PER_MP_LIMB);\n      mpz_errandomb (y, rands, BITS_PER_ULONG);\n      check_all (w, x, y);\n      check_all_inplace (w, y);\n    }\n\n  mpz_clear (w);\n  mpz_clear (x);\n  mpz_clear (y);\n  gmp_randclear(rands);\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n  mp_trace_base = -16;\n\n  check_data ();\n  check_data_inplace_ui ();\n  check_random (argc, argv);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-bin.c",
    "content": "/* Exercise mpz_bin_ui and mpz_bin_uiui.\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ntry_mpz_bin_ui (mpz_srcptr want, mpz_srcptr n, unsigned long k)\n{\n  mpz_t  got;\n\n  mpz_init (got);\n  mpz_bin_ui (got, n, k);\n  MPZ_CHECK_FORMAT (got);\n  if (mpz_cmp (got, want) != 0)\n    {\n      printf (\"mpz_bin_ui wrong\\n\");\n      printf (\"  n=\"); mpz_out_str (stdout, 10, n); printf (\"\\n\");\n      printf (\"  k=%lu\\n\", k);\n      printf (\"  got=\"); mpz_out_str (stdout, 10, got); printf (\"\\n\");\n      printf (\"  want=\"); mpz_out_str (stdout, 10, want); printf (\"\\n\");\n      abort();                                    \n    }\n  mpz_clear (got);\n}\n\n\nvoid\ntry_mpz_bin_uiui (mpz_srcptr want, unsigned long n, unsigned long k)\n{\n  mpz_t  got;\n\n  mpz_init (got);\n  mpz_bin_uiui (got, n, k);\n  MPZ_CHECK_FORMAT (got);\n  if (mpz_cmp (got, want) != 0)\n    {\n      printf (\"mpz_bin_uiui wrong\\n\");\n      printf (\"  n=%lu\\n\", n);\n      printf (\"  k=%lu\\n\", k);\n      printf (\"  got=\"); mpz_out_str (stdout, 10, got); printf (\"\\n\");\n      printf (\"  want=\"); mpz_out_str (stdout, 10, want); printf (\"\\n\");\n      abort();                                    \n    }\n  mpz_clear (got);\n}\n\n\nvoid\nsamples (void)\n{\n  static const struct {\n    const char     *n;\n    unsigned long  k;\n    const char     *want;\n  } data[] = {\n\n    {   \"0\",  0, \"1\"   },\n    {   \"0\",  1, \"0\"   },\n    {   \"0\",  2, \"0\"   },\n    {   \"0\",  3, \"0\"   },\n    {   \"0\",  4, \"0\"   },\n    {   \"0\", 123456, \"0\" },\n\n    {   \"1\",  0, \"1\"   },\n    {   \"1\",  1, \"1\"   },\n    {   \"1\",  2, \"0\"   },\n    {   \"1\",  3, \"0\"   },\n    {   \"1\",  4, \"0\"   },\n    {   \"1\", 123456, \"0\" },\n\n    {   \"2\",  0, \"1\"   },\n    {   \"2\",  1, \"2\"   },\n    {   \"2\",  2, \"1\"   },\n    {   \"2\",  3, \"0\"   },\n    {   \"2\",  4, \"0\"   },\n    {   \"2\", 123456, \"0\" },\n\n    {   \"3\",  0, \"1\"   },\n    {   \"3\",  1, \"3\"   },\n    {   \"3\",  2, \"3\"   },\n    {   \"3\",  3, \"1\"   },\n    {   \"3\",  4, \"0\"   },\n    {   \"3\",  5, \"0\"   },\n    {   \"3\", 123456, \"0\" },\n\n    {   \"4\",  0, \"1\"   },\n    {   \"4\",  1, \"4\"   },\n    {   \"4\",  2, \"6\"   },\n    {   \"4\",  3, \"4\"   },\n    {   \"4\",  4, \"1\"   },\n    {   \"4\",  5, \"0\"   },\n    {   \"4\",  6, \"0\"   },\n    {   \"4\", 123456, \"0\" },\n\n    {   \"10\",  0, \"1\"   },\n    {   \"10\",  1, \"10\"  },\n    {   \"10\",  2, \"45\"  },\n    {   \"10\",  3, \"120\" },\n    {   \"10\",  4, \"210\" },\n    {   \"10\",  5, \"252\" },\n    {   \"10\",  6, \"210\" },\n    {   \"10\",  7, \"120\" },\n    {   \"10\",  8, \"45\"  },\n    {   \"10\",  9, \"10\"  },\n    {   \"10\", 10, \"1\"   },\n    {   \"10\", 11,     \"0\" },\n    {   \"10\", 12,     \"0\" },\n    {   \"10\", 123456, \"0\" },\n\n    /* negatives, using bin(-n,k)=bin(n+k-1,k) */\n    {   \"-1\",  0,  \"1\"  },\n    {   \"-1\",  1, \"-1\"  },\n    {   \"-1\",  2,  \"1\"  },\n    {   \"-1\",  3, \"-1\"  },\n    {   \"-1\",  4,  \"1\"  },\n\n    {   \"-2\",  0,  \"1\"  },\n    {   \"-2\",  1, \"-2\"  },\n    {   \"-2\",  2,  \"3\"  },\n    {   \"-2\",  3, \"-4\"  },\n    {   \"-2\",  4,  \"5\"  },\n    {   \"-2\",  5, \"-6\"  },\n    {   \"-2\",  6,  \"7\"  },\n\n    {   \"-3\",  0,   \"1\"  },\n    {   \"-3\",  1,  \"-3\"  },\n    {   \"-3\",  2,   \"6\"  },\n    {   \"-3\",  3, \"-10\"  },\n    {   \"-3\",  4,  \"15\"  },\n    {   \"-3\",  5, \"-21\"  },\n    {   \"-3\",  6,  \"28\"  },\n\n    {   \"40\", 20,  \"137846528820\" },\n    {   \"60\", 30,  \"118264581564861424\" },\n  };\n\n  mpz_t  n, want;\n  int    i;\n\n  mpz_init (n);\n  mpz_init (want);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (n, data[i].n, 0);\n      mpz_set_str_or_abort (want, data[i].want, 0);\n\n      try_mpz_bin_ui (want, n, data[i].k);\n\n      if (mpz_fits_ulong_p (n))\n        try_mpz_bin_uiui (want, mpz_get_ui (n), data[i].k);\n    }\n\n  mpz_clear (n);\n  mpz_clear (want);\n}\n\n\n/* Test some bin(2k,k) cases.  This produces some biggish numbers to\n   exercise the limb accumulating code.  */\nvoid\ntwos (void)\n{\n  mpz_t          n, want;\n  unsigned long  k;\n\n  mpz_init (n);\n  mpz_init (want);\n\n  mpz_set_ui (want, (unsigned long) 2);\n  for (k = 1; k < 200; k++)\n    {\n      mpz_set_ui (n, 2*k);\n      try_mpz_bin_ui (want, n, k);\n\n      try_mpz_bin_uiui (want, 2*k, k);\n\n      mpz_mul_ui (want, want, 2*(2*k+1));\n      mpz_fdiv_q_ui (want, want, k+1);\n    }\n\n  mpz_clear (n);\n  mpz_clear (want);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n\n  samples ();\n  twos ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-cdiv_ui.c",
    "content": "/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_cdiv_qr_ui, mpz_cdiv_q_ui,\n   mpz_cdiv_r_ui, , mpz_cdiv_ui, mpz_mul_ui.\n\nCopyright 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid dump_abort(char *, mpz_t, mpir_ui);\nvoid debug_mp(mpz_t, int);\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t dividend;\n  mpz_t quotient, remainder;\n  mpz_t quotient2, remainder2;\n  mpz_t temp;\n  mp_size_t dividend_size;\n  mpir_ui divisor;\n  int i;\n  int reps = 10000;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  mpir_ui bsi, size_range;\n  mpir_ui r_rq, r_q, r_r, r;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpz_init (dividend);\n  mpz_init (quotient);\n  mpz_init (remainder);\n  mpz_init (quotient2);\n  mpz_init (remainder2);\n  mpz_init (temp);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 10 + 2; /* 0..2047 bit operands */\n\n      do\n\t{\n\t  mpz_rrandomb (bs, rands, 64);\n\t  divisor = mpz_get_ui (bs);\n\t}\n      while (divisor == 0);\n\n      mpz_urandomb (bs, rands, size_range);\n      dividend_size = mpz_get_ui (bs);\n      mpz_rrandomb (dividend, rands, dividend_size);\n\n      mpz_urandomb (bs, rands, 2);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (dividend, dividend);\n\n      /* printf (\"%ld\\n\", SIZ (dividend)); */\n\n      r_rq = mpz_cdiv_qr_ui (quotient, remainder, dividend, divisor);\n      r_q = mpz_cdiv_q_ui (quotient2, dividend, divisor);\n      r_r = mpz_cdiv_r_ui (remainder2, dividend, divisor);\n      r = mpz_cdiv_ui (dividend, divisor);\n\n      /* First determine that the quotients and remainders computed\n\t with different functions are equal.  */\n      if (mpz_cmp (quotient, quotient2) != 0)\n\tdump_abort (\"quotients from mpz_cdiv_qr_ui and mpz_cdiv_q_ui differ\",\n\t\t    dividend, divisor);\n      if (mpz_cmp (remainder, remainder2) != 0)\n\tdump_abort (\"remainders from mpz_cdiv_qr_ui and mpz_cdiv_r_ui differ\",\n\t\t    dividend, divisor);\n\n      /* Check if the sign of the quotient is correct.  */\n      if (mpz_cmp_ui (quotient, 0) != 0)\n\tif ((mpz_cmp_ui (quotient, 0) < 0)\n\t    != (mpz_cmp_ui (dividend, 0) < 0))\n\tdump_abort (\"quotient sign wrong\", dividend, divisor);\n\n      /* Check if the remainder has the opposite sign as the (positive) divisor\n\t (quotient rounded towards minus infinity).  */\n      if (mpz_cmp_ui (remainder, 0) != 0)\n\tif (mpz_cmp_ui (remainder, 0) > 0)\n\t  dump_abort (\"remainder sign wrong\", dividend, divisor);\n\n      mpz_mul_ui (temp, quotient, divisor);\n      mpz_add (temp, temp, remainder);\n      if (mpz_cmp (temp, dividend) != 0)\n\tdump_abort (\"n mod d != n - [n/d]*d\", dividend, divisor);\n\n      mpz_abs (remainder, remainder);\n      if (mpz_cmp_ui (remainder, divisor) >= 0)\n\tdump_abort (\"remainder greater than divisor\", dividend, divisor);\n\n      if (mpz_cmp_ui (remainder, r_rq) != 0)\n\tdump_abort (\"remainder returned from mpz_cdiv_qr_ui is wrong\",\n\t\t    dividend, divisor);\n      if (mpz_cmp_ui (remainder, r_q) != 0)\n\tdump_abort (\"remainder returned from mpz_cdiv_q_ui is wrong\",\n\t\t    dividend, divisor);\n      if (mpz_cmp_ui (remainder, r_r) != 0)\n\tdump_abort (\"remainder returned from mpz_cdiv_r_ui is wrong\",\n\t\t    dividend, divisor);\n      if (mpz_cmp_ui (remainder, r) != 0)\n\tdump_abort (\"remainder returned from mpz_cdiv_ui is wrong\",\n\t\t    dividend, divisor);\n    }\n\n  mpz_clear (bs);\n  mpz_clear (dividend);\n  mpz_clear (quotient);\n  mpz_clear (remainder);\n  mpz_clear (quotient2);\n  mpz_clear (remainder2);\n  mpz_clear (temp);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndump_abort (char *str, mpz_t dividend, mpir_ui divisor)\n{\n  fprintf (stderr, \"ERROR: %s\\n\", str);\n  fprintf (stderr, \"dividend = \"); debug_mp (dividend, -16);\n  fprintf (stderr, \"divisor  = %lX\\n\", divisor);\n  abort();\n}\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x); fputc ('\\n', stderr);\n}\n"
  },
  {
    "path": "tests/mpz/t-cmp.c",
    "content": "/* Test mpz_cmp and mpz_cmpabs.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* Nothing sophisticated here, just exercise some combinations of sizes and\n   signs.  */\n\n\nvoid\ncheck_one (mpz_ptr x, mpz_ptr y, int want_cmp, int want_cmpabs)\n{\n  int  got;\n\n  got = mpz_cmp (x, y);\n  if ((   got <  0) != (want_cmp <  0)\n      || (got == 0) != (want_cmp == 0)\n      || (got >  0) != (want_cmp >  0))\n    {\n      printf (\"mpz_cmp got %d want %d\\n\", got, want_cmp);\n      mpz_trace (\"x\", x);\n      mpz_trace (\"y\", y);\n      abort ();\n    }\n\n  got = mpz_cmpabs (x, y);\n  if ((   got <  0) != (want_cmpabs <  0)\n      || (got == 0) != (want_cmpabs == 0)\n      || (got >  0) != (want_cmpabs >  0))\n    {\n      printf (\"mpz_cmpabs got %d want %d\\n\", got, want_cmpabs);\n      mpz_trace (\"x\", x);\n      mpz_trace (\"y\", y);\n      abort ();\n    }\n}\n\n\nvoid\ncheck_all (mpz_ptr x, mpz_ptr y, int want_cmp, int want_cmpabs)\n{\n  check_one (x, y,  want_cmp,  want_cmpabs);\n  check_one (y, x, -want_cmp, -want_cmpabs);\n\n  mpz_neg (x, x);\n  mpz_neg (y, y);\n  want_cmp = -want_cmp;\n\n  check_one (x, y,  want_cmp,  want_cmpabs);\n  check_one (y, x, -want_cmp, -want_cmpabs);\n}\n\n\n#define SET1(z,size, n) \\\n  SIZ(z) = size; PTR(z)[0] = n\n\n#define SET2(z,size, n1,n0) \\\n  SIZ(z) = size; PTR(z)[1] = n1; PTR(z)[0] = n0\n\n#define SET4(z,size, n3,n2,n1,n0) \\\n  SIZ(z) = size; PTR(z)[3] = n3; PTR(z)[2] = n2; PTR(z)[1] = n1; PTR(z)[0] = n0\n\nvoid\ncheck_various (void)\n{\n  mpz_t  x, y;\n\n  mpz_init (x);\n  mpz_init (y);\n\n  mpz_realloc (x, (mp_size_t) 20);\n  mpz_realloc (y, (mp_size_t) 20);\n\n  /* 0 cmp 0, junk in low limbs */\n  SET1 (x,0, 123);\n  SET1 (y,0, 456);\n  check_all (x, y, 0, 0);\n\n\n  /* 123 cmp 0 */\n  SET1 (x,1, 123);\n  SET1 (y,0, 456);\n  check_all (x, y, 1, 1);\n\n  /* 123:456 cmp 0 */\n  SET2 (x,2, 456,123);\n  SET1 (y,0, 9999);\n  check_all (x, y, 1, 1);\n\n\n  /* 123 cmp 123 */\n  SET1(x,1, 123);\n  SET1(y,1, 123);\n  check_all (x, y, 0, 0);\n\n  /* -123 cmp 123 */\n  SET1(x,-1, 123);\n  SET1(y,1,  123);\n  check_all (x, y, -1, 0);\n\n\n  /* 123 cmp 456 */\n  SET1(x,1, 123);\n  SET1(y,1, 456);\n  check_all (x, y, -1, -1);\n\n  /* -123 cmp 456 */\n  SET1(x,-1, 123);\n  SET1(y,1,  456);\n  check_all (x, y, -1, -1);\n\n  /* 123 cmp -456 */\n  SET1(x,1,  123);\n  SET1(y,-1, 456);\n  check_all (x, y, 1, -1);\n\n\n  /* 1:0 cmp 1:0 */\n  SET2 (x,2, 1,0);\n  SET2 (y,2, 1,0);\n  check_all (x, y, 0, 0);\n\n  /* -1:0 cmp 1:0 */\n  SET2 (x,-2, 1,0);\n  SET2 (y,2,  1,0);\n  check_all (x, y, -1, 0);\n\n\n  /* 2:0 cmp 1:0 */\n  SET2 (x,2, 2,0);\n  SET2 (y,2, 1,0);\n  check_all (x, y, 1, 1);\n\n\n  /* 4:3:2:1 cmp 2:1 */\n  SET4 (x,4, 4,3,2,1);\n  SET2 (y,2, 2,1);\n  check_all (x, y, 1, 1);\n\n  /* -4:3:2:1 cmp 2:1 */\n  SET4 (x,-4, 4,3,2,1);\n  SET2 (y,2,  2,1);\n  check_all (x, y, -1, 1);\n\n\n  mpz_clear (x);\n  mpz_clear (y);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n  mp_trace_base = -16;\n\n  check_various ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-cmp_d.c",
    "content": "/* Test mpz_cmp_d and mpz_cmpabs_d.\n\nCopyright 2001, 2002, 2003, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* FIXME: Not sure if the tests here are exhaustive.  Ought to try to get\n   each possible exit from mpz_cmp_d (and mpz_cmpabs_d) exercised.  */\n\n\n#define SGN(n)  ((n) > 0 ? 1 : (n) < 0 ? -1 : 0)\n\n\nvoid\ncheck_one (const char *name, mpz_srcptr x, double y, int cmp, int cmpabs)\n{\n  int   got;\n\n  got = mpz_cmp_d (x, y);\n  if (SGN(got) != cmp)\n    {\n      int i;\n      printf    (\"mpz_cmp_d wrong (from %s)\\n\", name);\n      printf    (\"  got  %d\\n\", got);\n      printf    (\"  want %d\\n\", cmp);\n    fail:\n      mpz_trace (\"  x\", x);\n      printf    (\"  y %g\\n\", y);\n      mp_trace_base=-16;\n      mpz_trace (\"  x\", x);\n      printf    (\"  y %g\\n\", y);\n      printf    (\"  y\");\n      for (i = 0; i < sizeof(y); i++)\n        printf (\" %02X\", (unsigned) ((unsigned char *) &y)[i]);\n      printf (\"\\n\");\n      abort ();\n    }\n\n  got = mpz_cmpabs_d (x, y);\n  if (SGN(got) != cmpabs)\n    {\n      printf    (\"mpz_cmpabs_d wrong\\n\");\n      printf    (\"  got  %d\\n\", got);\n      printf    (\"  want %d\\n\", cmpabs);\n      goto fail;\n    }\n}\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char  *x;\n    double      y;\n    int         cmp, cmpabs;\n\n  } data[] = {\n\n    {  \"0\",  0.0,  0,  0 },\n\n    {  \"1\",  0.0,  1,  1 },\n    { \"-1\",  0.0, -1,  1 },\n\n    {  \"0\",  1.0, -1, -1 },\n    {  \"0\", -1.0,  1, -1 },\n\n    {  \"0x1000000000000000000000000000000000000000000000000\", 0.0,  1, 1 },\n    { \"-0x1000000000000000000000000000000000000000000000000\", 0.0, -1, 1 },\n\n    {  \"0\",  1e100, -1, -1 },\n    {  \"0\", -1e100,  1, -1 },\n\n    {  \"2\",  1.5,   1,  1 },\n    {  \"2\", -1.5,   1,  1 },\n    { \"-2\",  1.5,  -1,  1 },\n    { \"-2\", -1.5,  -1,  1 },\n  };\n\n  mpz_t  x;\n  int    i;\n\n  mpz_init (x);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (x, data[i].x, 0);\n      check_one (\"check_data\", x, data[i].y, data[i].cmp, data[i].cmpabs);\n    }\n\n  mpz_clear (x);\n}\n\n\n/* Equality of integers with up to 53 bits */\nvoid\ncheck_onebits (void)\n{\n  mpz_t   x, x2;\n  double  y;\n  int     i;\n\n  mpz_init_set_ui (x, 0L);\n  mpz_init (x2);\n\n  for (i = 0; i < 512; i++)\n    {\n      mpz_mul_2exp (x, x, 1);\n      mpz_add_ui (x, x, 1L);\n\n      y = mpz_get_d (x);\n      mpz_set_d (x2, y);\n\n      /* stop if any truncation is occurring */\n      if (mpz_cmp (x, x2) != 0)\n        break;\n\n      check_one (\"check_onebits\", x, y, 0, 0);\n      check_one (\"check_onebits\", x, -y, 1, 0);\n      mpz_neg (x, x);\n      check_one (\"check_onebits\", x, y, -1, 0);\n      check_one (\"check_onebits\", x, -y, 0, 0);\n      mpz_neg (x, x);\n    }\n\n  mpz_clear (x);\n  mpz_clear (x2);\n}\n\n\n/* With the mpz differing by 1, in a limb position possibly below the double */\nvoid\ncheck_low_z_one (void)\n{\n  mpz_t          x;\n  double         y;\n  unsigned long  i;\n\n  mpz_init (x);\n\n  /* FIXME: It'd be better to base this on the float format. */\n#define LIM 512\n\n  for (i = 1; i < LIM; i++)\n    {\n      mpz_set_ui (x, 1L);\n      mpz_mul_2exp (x, x, i);\n      y = mpz_get_d (x);\n\n      check_one (\"check_low_z_one\", x, y,   0, 0);\n      check_one (\"check_low_z_one\", x, -y,  1, 0);\n      mpz_neg (x, x);\n      check_one (\"check_low_z_one\", x, y,  -1, 0);\n      check_one (\"check_low_z_one\", x, -y,  0, 0);\n      mpz_neg (x, x);\n\n      mpz_sub_ui (x, x, 1);\n\n      check_one (\"check_low_z_one\", x, y,  -1, -1);\n      check_one (\"check_low_z_one\", x, -y,  1, -1);\n      mpz_neg (x, x);\n      check_one (\"check_low_z_one\", x, y,  -1, -1);\n      check_one (\"check_low_z_one\", x, -y,  1, -1);\n      mpz_neg (x, x);\n\n      mpz_add_ui (x, x, 2);\n\n      check_one (\"check_low_z_one\", x, y,   1, 1);\n      check_one (\"check_low_z_one\", x, -y,  1, 1);\n      mpz_neg (x, x);\n      check_one (\"check_low_z_one\", x, y,  -1, 1);\n      check_one (\"check_low_z_one\", x, -y, -1, 1);\n      mpz_neg (x, x);\n    }\n\n  mpz_clear (x);\n}\n\n/* Comparing 1 and 1+2^-n.  \"y\" is volatile to make gcc store and fetch it,\n   which forces it to a 64-bit double, whereas on x86 it would otherwise\n   remain on the float stack as an 80-bit long double.  */\nvoid\ncheck_one_2exp (void)\n{\n  double           e;\n  mpz_t            x;\n  volatile double  y;\n  int              i;\n\n  mpz_init (x);\n\n  e = 1.0;\n  for (i = 0; i < 128; i++)\n    {\n      e /= 2.0;\n      y = 1.0 + e;\n      if (y == 1.0)\n        break;\n\n      mpz_set_ui (x, 1L);\n      check_one (\"check_one_2exp\", x,  y, -1, -1);\n      check_one (\"check_one_2exp\", x, -y,  1, -1);\n\n      mpz_set_si (x, -1L);\n      check_one (\"check_one_2exp\", x,  y, -1, -1);\n      check_one (\"check_one_2exp\", x, -y,  1, -1);\n    }\n\n  mpz_clear (x);\n}\n\nvoid\ncheck_infinity (void)\n{\n  mpz_t   x;\n  double  y = tests_infinity_d ();\n  if (y == 0.0)\n    return;\n\n  mpz_init (x);\n\n  /* 0 cmp inf */\n  mpz_set_ui (x, 0L);\n  check_one (\"check_infinity\", x,  y, -1, -1);\n  check_one (\"check_infinity\", x, -y,  1, -1);\n\n  /* 123 cmp inf */\n  mpz_set_ui (x, 123L);\n  check_one (\"check_infinity\", x,  y, -1, -1);\n  check_one (\"check_infinity\", x, -y,  1, -1);\n\n  /* -123 cmp inf */\n  mpz_set_si (x, -123L);\n  check_one (\"check_infinity\", x,  y, -1, -1);\n  check_one (\"check_infinity\", x, -y,  1, -1);\n\n  /* 2^5000 cmp inf */\n  mpz_set_ui (x, 1L);\n  mpz_mul_2exp (x, x, 5000L);\n  check_one (\"check_infinity\", x,  y, -1, -1);\n  check_one (\"check_infinity\", x, -y,  1, -1);\n\n  /* -2^5000 cmp inf */\n  mpz_neg (x, x);\n  check_one (\"check_infinity\", x,  y, -1, -1);\n  check_one (\"check_infinity\", x, -y,  1, -1);\n\n  mpz_clear (x);\n}\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n\n  check_data ();\n  check_onebits ();\n  check_low_z_one ();\n  check_one_2exp ();\n  check_infinity ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-cmp_si.c",
    "content": "/* Test mpz_cmp_si.\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define SGN(x)       ((x) < 0 ? -1 : (x) == 0 ? 0 : 1)\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char  *a, *b;\n    int         want;\n  } data[] = {\n    { \"0\",  \"1\", -1 },\n    { \"0\",  \"0\",  0 },\n    { \"0\", \"-1\",  1 },\n\n    { \"1\",  \"1\", 0 },\n    { \"1\",  \"0\", 1 },\n    { \"1\", \"-1\", 1 },\n\n    { \"-1\",  \"1\", -1 },\n    { \"-1\",  \"0\", -1 },\n    { \"-1\", \"-1\", 0 },\n\n    {           \"0\", \"-0x80000000\",  1 },\n    {  \"0x80000000\", \"-0x80000000\",  1 },\n    {  \"0x80000001\", \"-0x80000000\",  1 },\n    { \"-0x80000000\", \"-0x80000000\",  0 },\n    { \"-0x80000001\", \"-0x80000000\", -1 },\n\n    {                   \"0\", \"-0x8000000000000000\",  1 },\n    {  \"0x8000000000000000\", \"-0x8000000000000000\",  1 },\n    {  \"0x8000000000000001\", \"-0x8000000000000000\",  1 },\n    { \"-0x8000000000000000\", \"-0x8000000000000000\",  0 },\n    { \"-0x8000000000000001\", \"-0x8000000000000000\", -1 },\n  };\n\n  mpz_t  a, bz;\n  long   b;\n  int    got;\n  int    i;\n\n  mpz_init (a);\n  mpz_init (bz);\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (a, data[i].a, 0);\n      mpz_set_str_or_abort (bz, data[i].b, 0);\n\n      if (mpz_fits_slong_p (bz))\n        {\n          b = mpz_get_si (bz);\n          got = mpz_cmp_si (a, b);\n          if (SGN (got) != data[i].want)\n            {\n              printf (\"mpz_cmp_si wrong on data[%d]\\n\", i);\n              printf (\"  a=\"); mpz_out_str (stdout, 10, a); printf (\"\\n\");\n              printf (\"  b=%ld\\n\", b);\n              printf (\"  got=%d\\n\", got);\n              printf (\"  want=%d\\n\", data[i].want);\n              abort();                                    \n            }\n        }\n    }\n\n  mpz_clear (a);\n  mpz_clear (bz);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-cong.c",
    "content": "/* test mpz_congruent_p and mpz_congruent_ui_p\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (mpz_srcptr a, mpz_srcptr c, mpz_srcptr d, int want)\n{\n  int   got;\n  int   swap;\n\n  for (swap = 0; swap <= 1; swap++)\n    {\n      got = (mpz_congruent_p (a, c, d) != 0);\n      if (want != got)\n        {\n          printf (\"mpz_congruent_p wrong\\n\");\n          printf (\"   expected %d got %d\\n\", want, got);\n          mpz_trace (\"   a\", a);\n          mpz_trace (\"   c\", c);\n          mpz_trace (\"   d\", d);\n          mp_trace_base = -16;\n          mpz_trace (\"   a\", a);\n          mpz_trace (\"   c\", c);\n          mpz_trace (\"   d\", d);\n          abort ();\n        }\n\n      if (mpz_fits_ulong_p (c) && mpz_fits_ulong_p (d))\n        {\n          unsigned long  uc = mpz_get_ui (c);\n          unsigned long  ud = mpz_get_ui (d);\n          got = (mpz_congruent_ui_p (a, uc, ud) != 0);\n          if (want != got)\n            {\n              printf    (\"mpz_congruent_ui_p wrong\\n\");\n              printf    (\"   expected %d got %d\\n\", want, got);\n              mpz_trace (\"   a\", a);\n              printf    (\"   c=%lu\\n\", uc);\n              printf    (\"   d=%lu\\n\", ud);\n              mp_trace_base = -16;\n              mpz_trace (\"   a\", a);\n              printf    (\"   c=0x%lX\\n\", uc);\n              printf    (\"   d=0x%lX\\n\", ud);\n              abort ();\n            }\n        }\n\n      MPZ_SRCPTR_SWAP (a, c);\n    }\n}\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char *a;\n    const char *c;\n    const char *d;\n    int        want;\n\n  } data[] = {\n\n    /* anything congruent mod 1 */\n    { \"0\", \"0\", \"1\", 1 },\n    { \"1\", \"0\", \"1\", 1 },\n    { \"0\", \"1\", \"1\", 1 },\n    { \"123\", \"456\", \"1\", 1 },\n    { \"0x123456789123456789\", \"0x987654321987654321\", \"1\", 1 },\n\n    /* csize==1, dsize==2 changing to 1 after stripping 2s */\n    { \"0x3333333333333333\",  \"0x33333333\",\n      \"0x180000000\", 1 },\n    { \"0x33333333333333333333333333333333\", \"0x3333333333333333\",\n      \"0x18000000000000000\", 1 },\n\n    /* another dsize==2 becoming 1, with opposite signs this time */\n    {  \"0x444444441\",\n      \"-0x22222221F\",\n       \"0x333333330\", 1 },\n    {  \"0x44444444444444441\",\n      \"-0x2222222222222221F\",\n       \"0x33333333333333330\", 1 },\n  };\n\n  mpz_t   a, c, d;\n  int     i;\n\n  mpz_init (a);\n  mpz_init (c);\n  mpz_init (d);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (a, data[i].a, 0);\n      mpz_set_str_or_abort (c, data[i].c, 0);\n      mpz_set_str_or_abort (d, data[i].d, 0);\n      check_one (a, c, d, data[i].want);\n    }\n\n  mpz_clear (a);\n  mpz_clear (c);\n  mpz_clear (d);\n}\n\n\nvoid\ncheck_random (int argc, char *argv[])\n{\n  gmp_randstate_t rands;\n  mpz_t   a, c, d, ra, rc;\n  int     i;\n  int     want;\n  int     reps = 2000;\n\n  if (argc >= 2)\n    reps = atoi (argv[1]);\n\n  mpz_init (a);\n  mpz_init (c);\n  mpz_init (d);\n  mpz_init (ra);\n  mpz_init (rc);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_errandomb (a, rands, 8*BITS_PER_MP_LIMB);\n      MPZ_CHECK_FORMAT (a);\n      mpz_errandomb (c, rands, 8*BITS_PER_MP_LIMB);\n      MPZ_CHECK_FORMAT (c);\n      mpz_errandomb_nonzero (d, rands, 8*BITS_PER_MP_LIMB);\n\n      mpz_negrandom (a, rands);\n      MPZ_CHECK_FORMAT (a);\n      mpz_negrandom (c, rands);\n      MPZ_CHECK_FORMAT (c);\n      mpz_negrandom (d, rands);\n\n      mpz_fdiv_r (ra, a, d);\n      mpz_fdiv_r (rc, c, d);\n\n      want = (mpz_cmp (ra, rc) == 0);\n      check_one (a, c, d, want);\n\n      mpz_sub (ra, ra, rc);\n      mpz_sub (a, a, ra);\n      MPZ_CHECK_FORMAT (a);\n      check_one (a, c, d, 1);\n\n      if (! mpz_pow2abs_p (d))\n        {\n          refmpz_combit (a, urandom(rands) % (8*BITS_PER_MP_LIMB));\n          check_one (a, c, d, 0);\n        }\n    }\n\n  mpz_clear (a);\n  mpz_clear (c);\n  mpz_clear (d);\n  mpz_clear (ra);\n  mpz_clear (rc);\n  gmp_randclear(rands);\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n\n  check_data ();\n  check_random (argc, argv);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-cong_2exp.c",
    "content": "/* test mpz_congruent_2exp_p */\n\n/*\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (mpz_srcptr a, mpz_srcptr c, unsigned long d, int want)\n{\n  mpz_t  diff, d2exp;\n  int    got;\n  int    swap;\n\n  for (swap = 0; swap <= 1; swap++)\n    {\n      got = (mpz_congruent_2exp_p (a, c, d) != 0);\n      if (want != got)\n        {\n          mpz_init (diff);\n          mpz_init (d2exp);\n\n          mpz_sub (diff, a, c);\n          mpz_set_ui (d2exp, 1L);\n          mpz_mul_2exp (d2exp, d2exp, d);\n\n          printf (\"mpz_congruent_2exp_p wrong\\n\");\n          printf (\"   expected %d got %d\\n\", want, got);\n          mpz_trace (\"   a\", a);\n          mpz_trace (\"   c\", c);\n          mpz_trace (\" a-c\", diff);\n          mpz_trace (\" 2^d\", d2exp);\n          printf    (\"   d=%lu\\n\", d);\n\n          mp_trace_base = -16;\n          mpz_trace (\"   a\", a);\n          mpz_trace (\"   c\", c);\n          mpz_trace (\" a-c\", diff);\n          mpz_trace (\" 2^d\", d2exp);\n          printf    (\"   d=0x%lX\\n\", d);\n          abort ();\n        }\n\n      MPZ_SRCPTR_SWAP (a, c);\n    }\n}\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char     *a;\n    const char     *c;\n    unsigned long  d;\n    int            want;\n\n  } data[] = {\n\n    /* anything is congruent mod 1 */\n    { \"0\", \"0\", 0, 1 },\n    { \"1\", \"0\", 0, 1 },\n    { \"0\", \"1\", 0, 1 },\n    { \"123\", \"456\", 0, 1 },\n    { \"0x123456789123456789\", \"0x987654321987654321\", 0, 1 },\n\n  };\n\n  mpz_t   a, c;\n  int     i;\n\n  mpz_init (a);\n  mpz_init (c);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (a, data[i].a, 0);\n      mpz_set_str_or_abort (c, data[i].c, 0);\n      check_one (a, c, data[i].d, data[i].want);\n    }\n\n  mpz_clear (a);\n  mpz_clear (c);\n}\n\n\nvoid\ncheck_random (int argc, char *argv[])\n{\n  gmp_randstate_t rands;\n  unsigned long  d;\n  mpz_t  a, c, ra, rc;\n  int    i;\n  int    want;\n  int    reps = 5000;\n\n  if (argc >= 2)\n    reps = atoi (argv[1]);\n\n  mpz_init (a);\n  mpz_init (c);\n  mpz_init (ra);\n  mpz_init (rc);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_errandomb (a, rands, 8*BITS_PER_MP_LIMB);\n      mpz_errandomb (c, rands, 8*BITS_PER_MP_LIMB);\n      d = urandom(rands) % (8*BITS_PER_MP_LIMB);\n\n      mpz_mul_2exp (a, a, urandom(rands) % (2*BITS_PER_MP_LIMB));\n      mpz_mul_2exp (c, c, urandom(rands) % (2*BITS_PER_MP_LIMB));\n\n      mpz_negrandom (a, rands);\n      mpz_negrandom (c, rands);\n\n      mpz_fdiv_r_2exp (ra, a, d);\n      mpz_fdiv_r_2exp (rc, c, d);\n\n      want = (mpz_cmp (ra, rc) == 0);\n      check_one (a, c, d, want);\n\n      mpz_sub (ra, ra, rc);\n      mpz_sub (a, a, ra);\n      check_one (a, c, d, 1);\n    }\n\n  mpz_clear (a);\n  mpz_clear (c);\n  mpz_clear (ra);\n  mpz_clear (rc);\n  gmp_randclear(rands);\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n\n  check_data ();\n  check_random (argc, argv);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-div_2exp.c",
    "content": "/* Test mpz_[cft]div_[qr]_2exp.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* If the remainder is in the correct range and q*d+r is correct, then q\n   must have rounded correctly.  */\n\nvoid\ncheck_one (mpz_srcptr a, unsigned long d)\n{\n  mpz_t  q, r, p, d2exp;\n  int    inplace;\n\n  mpz_init (d2exp);\n  mpz_init (q);\n  mpz_init (r);\n  mpz_init (p);\n\n  mpz_set_ui (d2exp, 1L);\n  mpz_mul_2exp (d2exp, d2exp, d);\n\n#define INPLACE(fun,dst,src,d)  \\\n  if (inplace)                  \\\n    {                           \\\n      mpz_set (dst, src);       \\\n      fun (dst, dst, d);        \\\n    }                           \\\n  else                          \\\n    fun (dst, src, d);\n  \n  for (inplace = 0; inplace <= 1; inplace++)\n    {\n      INPLACE (mpz_fdiv_q_2exp, q, a, d);\n      INPLACE (mpz_fdiv_r_2exp, r, a, d);\n\n      mpz_mul_2exp (p, q, d);\n      mpz_add (p, p, r);\n      if (mpz_sgn (r) < 0 || mpz_cmp (r, d2exp) >= 0)\n        {\n          printf (\"mpz_fdiv_r_2exp result out of range\\n\");\n          goto error;\n        }\n      if (mpz_cmp (p, a) != 0)\n        {\n          printf (\"mpz_fdiv_[qr]_2exp doesn't multiply back\\n\");\n          goto error;\n        }\n\n\n      INPLACE (mpz_cdiv_q_2exp, q, a, d);\n      INPLACE (mpz_cdiv_r_2exp, r, a, d);\n\n      mpz_mul_2exp (p, q, d);\n      mpz_add (p, p, r);\n      if (mpz_sgn (r) > 0 || mpz_cmpabs (r, d2exp) >= 0)\n        {\n          printf (\"mpz_cdiv_r_2exp result out of range\\n\");\n          goto error;\n        }\n      if (mpz_cmp (p, a) != 0)\n        {\n          printf (\"mpz_cdiv_[qr]_2exp doesn't multiply back\\n\");\n          goto error;\n        }\n\n\n      INPLACE (mpz_tdiv_q_2exp, q, a, d);\n      INPLACE (mpz_tdiv_r_2exp, r, a, d);\n\n      mpz_mul_2exp (p, q, d);\n      mpz_add (p, p, r);\n      if (mpz_sgn (r) != 0 && mpz_sgn (r) != mpz_sgn (a))\n        {\n          printf (\"mpz_tdiv_r_2exp result wrong sign\\n\");\n          goto error;\n        }\n      if (mpz_cmpabs (r, d2exp) >= 0)\n        {\n          printf (\"mpz_tdiv_r_2exp result out of range\\n\");\n          goto error;\n        }\n      if (mpz_cmp (p, a) != 0)\n        {\n          printf (\"mpz_tdiv_[qr]_2exp doesn't multiply back\\n\");\n          goto error;\n        }\n    }\n\n  mpz_clear (d2exp);\n  mpz_clear (q);\n  mpz_clear (r);\n  mpz_clear (p);\n  return;\n\n\n error:\n  mpz_trace (\"a\", a);\n  printf    (\"d=%lu\\n\", d);\n  mpz_trace (\"q\", q);\n  mpz_trace (\"r\", r);\n  mpz_trace (\"p\", p);\n\n  mp_trace_base = -16;\n  mpz_trace (\"a\", a);\n  printf    (\"d=0x%lX\\n\", d);\n  mpz_trace (\"q\", q);\n  mpz_trace (\"r\", r);\n  mpz_trace (\"p\", p);\n\n  abort ();\n}\n\n\nvoid\ncheck_all (mpz_ptr a, unsigned long d)\n{\n  check_one (a, d);\n  mpz_neg (a, a);\n  check_one (a, d);\n}\n\n\nvoid\ncheck_various (void)\n{\n  static const unsigned long  table[] = {\n    0, 1, 2, 3, 4, 5,\n    GMP_NUMB_BITS-1, GMP_NUMB_BITS, GMP_NUMB_BITS+1,\n    2*GMP_NUMB_BITS-1, 2*GMP_NUMB_BITS, 2*GMP_NUMB_BITS+1,\n    3*GMP_NUMB_BITS-1, 3*GMP_NUMB_BITS, 3*GMP_NUMB_BITS+1,\n    4*GMP_NUMB_BITS-1, 4*GMP_NUMB_BITS, 4*GMP_NUMB_BITS+1\n  };\n\n  int            i, j;\n  unsigned long  n, d;\n  mpz_t          a;\n\n  mpz_init (a);\n\n  /* a==0, and various d */\n  mpz_set_ui (a, 0L);\n  for (i = 0; i < numberof (table); i++)\n    check_one (a, table[i]);\n\n  /* a==2^n, and various d */\n  for (i = 0; i < numberof (table); i++)\n    {\n      n = table[i];\n      mpz_set_ui (a, 1L);\n      mpz_mul_2exp (a, a, n);\n\n      for (j = 0; j < numberof (table); j++)\n        {\n          d = table[j];\n          check_all (a, d);\n        }\n    }\n\n  mpz_clear (a);\n}\n\n\nvoid\ncheck_random (int argc, char *argv[])\n{\n  gmp_randstate_t  rands;\n  int            reps = 100;\n  mpz_t          a;\n  unsigned long  d;\n  int            i;\n\n  if (argc == 2)\n    reps = atoi (argv[1]);\n\n  mpz_init (a);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < reps; i++)\n    {\n      /* exponentially within 2 to 257 bits */\n      mpz_erandomb (a, rands, urandom (rands) % 8 + 2);\n\n      d = urandom (rands) % 256;\n\n      check_all (a, d);\n    }\n\n  mpz_clear (a);\n  gmp_randclear(rands);\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n\n  check_various ();\n  check_random (argc, argv);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-divis.c",
    "content": "/* test mpz_divisible_p and mpz_divisible_ui_p */\n\n/*\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (mpz_srcptr a, mpz_srcptr d, int want)\n{\n  int   got;\n\n  if (mpz_fits_ulong_p (d))\n    {\n      unsigned long  u = mpz_get_ui (d);\n      got = (mpz_divisible_ui_p (a, u) != 0);\n      if (want != got)\n        {\n          printf (\"mpz_divisible_ui_p wrong\\n\");\n          printf (\"   expected %d got %d\\n\", want, got);\n          mpz_trace (\"   a\", a);\n          printf (\"   d=%lu\\n\", u);\n          mp_trace_base = -16;\n          mpz_trace (\"   a\", a);\n          printf (\"   d=0x%lX\\n\", u);\n          abort ();\n        }\n    }\n\n  got = (mpz_divisible_p (a, d) != 0);\n  if (want != got)\n    {\n      printf (\"mpz_divisible_p wrong\\n\");\n      printf (\"   expected %d got %d\\n\", want, got);\n      mpz_trace (\"   a\", a);\n      mpz_trace (\"   d\", d);\n      mp_trace_base = -16;\n      mpz_trace (\"   a\", a);\n      mpz_trace (\"   d\", d);\n      abort ();\n    }\n}\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char *a;\n    const char *d;\n    int        want;\n\n  } data[] = {\n\n    { \"0\",    \"1\", 1 },\n    { \"123\",  \"1\", 1 },\n    { \"-123\", \"1\", 1 },\n\n    { \"0\",  \"2\", 1 },\n    { \"1\",  \"2\", 0 },\n    { \"2\",  \"2\", 1 },\n    { \"-2\", \"2\", 1 },\n    { \"0x100000000000000000000000000000000\", \"2\", 1 },\n    { \"0x100000000000000000000000000000001\", \"2\", 0 },\n\n    { \"0x3333333333333333\", \"3\", 1 },\n    { \"0x3333333333333332\", \"3\", 0 },\n    { \"0x33333333333333333333333333333333\", \"3\", 1 },\n    { \"0x33333333333333333333333333333332\", \"3\", 0 },\n\n    /* divisor changes from 2 to 1 limb after stripping 2s */\n    {          \"0x3333333300000000\",         \"0x180000000\",         1 },\n    {  \"0x33333333333333330000000000000000\", \"0x18000000000000000\", 1 },\n    { \"0x133333333333333330000000000000000\", \"0x18000000000000000\", 0 },\n  };\n\n  mpz_t   a, d;\n  int     i;\n\n  mpz_init (a);\n  mpz_init (d);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (a, data[i].a, 0);\n      mpz_set_str_or_abort (d, data[i].d, 0);\n      check_one (a, d, data[i].want);\n    }\n\n  mpz_clear (a);\n  mpz_clear (d);\n}\n\nvoid\ncheck_random (int reps)\n{\n  gmp_randstate_t rands;\n  mpz_t   a, d, r;\n  int     i;\n  int     want;\n \n  gmp_randinit_default(rands);\n  mpz_init (a);\n  mpz_init (d);\n  mpz_init (r);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_erandomb (a, rands, 512);\n      mpz_erandomb_nonzero (d, rands, 512);\n\n      mpz_fdiv_r (r, a, d);\n\n      want = (mpz_sgn (r) == 0);\n      check_one (a, d, want);\n\n      mpz_sub (a, a, r);\n      check_one (a, d, 1);\n\n      if (mpz_cmpabs_ui (d, 1L) == 0)\n        continue;\n\n      mpz_add_ui (a, a, 1L);\n      check_one (a, d, 0);\n    }\n\n  mpz_clear (a);\n  mpz_clear (d);\n  mpz_clear (r);\n  gmp_randclear(rands);\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  int  reps = 1000;\n\n  tests_start ();\n\n  if (argc == 2)\n    reps = atoi (argv[1]);\n\n  check_data ();\n  check_random (reps);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-divis_2exp.c",
    "content": "/* test mpz_divisible_2exp_p */\n\n/*\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (mpz_srcptr a, unsigned long d, int want)\n{\n  int   got;\n\n  got = (mpz_divisible_2exp_p (a, d) != 0);\n  if (want != got)\n    {\n      printf (\"mpz_divisible_2exp_p wrong\\n\");\n      printf (\"   expected %d got %d\\n\", want, got);\n      mpz_trace (\"   a\", a);\n      printf    (\"   d=%lu\\n\", d);\n      mp_trace_base = -16;\n      mpz_trace (\"   a\", a);\n      printf    (\"   d=0x%lX\\n\", d);\n      abort ();\n    }\n}\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char    *a;\n    unsigned long d;\n    int           want;\n\n  } data[] = {\n\n    { \"0\", 0, 1 },\n    { \"0\", 1, 1 },\n    { \"0\", 2, 1 },\n    { \"0\", 3, 1 },\n\n    { \"1\", 0, 1 },\n    { \"1\", 1, 0 },\n    { \"1\", 2, 0 },\n    { \"1\", 3, 0 },\n    { \"1\", 10000, 0 },\n\n    { \"4\", 0, 1 },\n    { \"4\", 1, 1 },\n    { \"4\", 2, 1 },\n    { \"4\", 3, 0 },\n    { \"4\", 4, 0 },\n    { \"4\", 10000, 0 },\n\n    { \"0x80000000\", 31, 1 },\n    { \"0x80000000\", 32, 0 },\n    { \"0x80000000\", 64, 0 },\n\n    { \"0x100000000\", 32, 1 },\n    { \"0x100000000\", 33, 0 },\n    { \"0x100000000\", 64, 0 },\n\n    { \"0x8000000000000000\", 63, 1 },\n    { \"0x8000000000000000\", 64, 0 },\n    { \"0x8000000000000000\", 128, 0 },\n\n    { \"0x10000000000000000\", 64, 1 },\n    { \"0x10000000000000000\", 65, 0 },\n    { \"0x10000000000000000\", 128, 0 },\n    { \"0x10000000000000000\", 256, 0 },\n\n    { \"0x10000000000000000100000000\", 32, 1 },\n    { \"0x10000000000000000100000000\", 33, 0 },\n    { \"0x10000000000000000100000000\", 64, 0 },\n\n    { \"0x1000000000000000010000000000000000\", 64, 1 },\n    { \"0x1000000000000000010000000000000000\", 65, 0 },\n    { \"0x1000000000000000010000000000000000\", 128, 0 },\n    { \"0x1000000000000000010000000000000000\", 256, 0 },\n    { \"0x1000000000000000010000000000000000\", 1024, 0 },\n\n  };\n\n  mpz_t   a, d;\n  int     i;\n\n  mpz_init (a);\n  mpz_init (d);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (a, data[i].a, 0);\n      check_one (a, data[i].d, data[i].want);\n\n      mpz_neg (a, a);\n      check_one (a, data[i].d, data[i].want);\n    }\n\n  mpz_clear (a);\n  mpz_clear (d);\n}\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-export.c",
    "content": "/* Test mpz_export.\n\nCopyright 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char  *src;\n    size_t      want_count;\n    int         order;\n    size_t      size;\n    int         endian;\n    int         nail;\n    char        want_data[64];\n\n  } data[] = {\n\n    { \"0\", 0,1, 1,1, 0 },\n    { \"0\", 0,1, 2,1, 0 },\n    { \"0\", 0,1, 3,1, 0 },\n\n    { \"0x12345678\", 4,1,  1,1, 0, { '\\022', '\\064', '\\126', '\\170' } },\n    { \"0x12345678\", 1,1,  4,1, 0, { '\\022', '\\064', '\\126', '\\170' } },\n    { \"0x12345678\", 1,-1, 4,1, 0, { '\\022', '\\064', '\\126', '\\170' } },\n\n    { \"0x12345678\", 4,-1, 1,-1, 0, { '\\170', '\\126', '\\064', '\\022' } },\n    { \"0x12345678\", 1,1,  4,-1, 0, { '\\170', '\\126', '\\064', '\\022' } },\n    { \"0x12345678\", 1,-1, 4,-1, 0, { '\\170', '\\126', '\\064', '\\022' } },\n\n    { \"0x15\", 5,1,  1,1, 7, { '\\001', '\\000', '\\001', '\\000', '\\001' } },\n\n    { \"0x1FFFFFFFFFFF\", 3,1,  2,1,   1, {\n        '\\177','\\377', '\\177','\\377', '\\177','\\377' } },\n    { \"0x1FFFFFFFFFFF\", 3,1,  2,-1,  1, {\n        '\\377','\\177', '\\377','\\177', '\\377','\\177' } },\n    { \"0x7\",            3,1,  2,1,  15, {\n        '\\000','\\001', '\\000','\\001', '\\000','\\001' } },\n    { \"0x7\",            3,1,  2,-1, 15, {\n        '\\001','\\000', '\\001','\\000', '\\001','\\000' } },\n\n    { \"0x24\", 3,1,  2,1,  14, { '\\000','\\002', '\\000','\\001', '\\000','\\000' }},\n    { \"0x24\", 3,1,  2,-1, 14, { '\\002','\\000', '\\001','\\000', '\\000','\\000' }},\n    { \"0x24\", 3,-1, 2,-1, 14, { '\\000','\\000', '\\001','\\000', '\\002','\\000' }},\n    { \"0x24\", 3,-1, 2,1,  14, { '\\000','\\000', '\\000','\\001', '\\000','\\002' }},\n\n    { \"0x123456789ABC\", 3,1,  2,1,  0, {\n        '\\022','\\064', '\\126','\\170', '\\232','\\274' } },\n    { \"0x123456789ABC\", 3,-1, 2,1,  0, {\n        '\\232','\\274', '\\126','\\170', '\\022','\\064' } },\n    { \"0x123456789ABC\", 3,1,  2,-1, 0, {\n        '\\064','\\022', '\\170','\\126', '\\274','\\232' } },\n    { \"0x123456789ABC\", 3,-1, 2,-1, 0, {\n        '\\274','\\232', '\\170','\\126', '\\064','\\022' } },\n\n    { \"0x112233445566778899AABBCC\", 3,1,  4,1,  0,\n      { '\\021','\\042','\\063','\\104',\n        '\\125','\\146','\\167','\\210',\n        '\\231','\\252','\\273','\\314' } },\n    { \"0x112233445566778899AABBCC\", 3,-1, 4,1,  0,\n      { '\\231','\\252','\\273','\\314',\n        '\\125','\\146','\\167','\\210',\n        '\\021','\\042','\\063','\\104' } },\n    { \"0x112233445566778899AABBCC\", 3,1,  4,-1, 0,\n      { '\\104','\\063','\\042','\\021',\n        '\\210','\\167','\\146','\\125',\n        '\\314','\\273','\\252','\\231' } },\n    { \"0x112233445566778899AABBCC\", 3,-1, 4,-1, 0,\n      { '\\314','\\273','\\252','\\231',\n        '\\210','\\167','\\146','\\125',\n        '\\104','\\063','\\042','\\021' } },\n\n    { \"0x100120023003400450056006700780089009A00AB00BC00C\", 3,1,  8,1,  0,\n      { '\\020','\\001','\\040','\\002','\\060','\\003','\\100','\\004',\n        '\\120','\\005','\\140','\\006','\\160','\\007','\\200','\\010',\n        '\\220','\\011','\\240','\\012','\\260','\\013','\\300','\\014' } },\n    { \"0x100120023003400450056006700780089009A00AB00BC00C\", 3,-1, 8,1,  0,\n      { '\\220','\\011','\\240','\\012','\\260','\\013','\\300','\\014',\n        '\\120','\\005','\\140','\\006','\\160','\\007','\\200','\\010',\n        '\\020','\\001','\\040','\\002','\\060','\\003','\\100','\\004' } },\n    { \"0x100120023003400450056006700780089009A00AB00BC00C\", 3,1,  8,-1, 0,\n      { '\\004','\\100','\\003','\\060','\\002','\\040','\\001','\\020',\n        '\\010','\\200','\\007','\\160','\\006','\\140','\\005','\\120',\n        '\\014','\\300','\\013','\\260','\\012','\\240','\\011','\\220' } },\n    { \"0x100120023003400450056006700780089009A00AB00BC00C\", 3,-1, 8,-1, 0,\n      { '\\014','\\300','\\013','\\260','\\012','\\240','\\011','\\220',\n        '\\010','\\200','\\007','\\160','\\006','\\140','\\005','\\120',\n        '\\004','\\100','\\003','\\060','\\002','\\040','\\001','\\020' } },\n\n    { \"0x155555555555555555555555\", 3,1,  4,1,  1,\n      { '\\125','\\125','\\125','\\125',\n        '\\052','\\252','\\252','\\252',\n        '\\125','\\125','\\125','\\125' } },\n    { \"0x155555555555555555555555\", 3,-1,  4,1,  1,\n      { '\\125','\\125','\\125','\\125',\n        '\\052','\\252','\\252','\\252',\n        '\\125','\\125','\\125','\\125' } },\n    { \"0x155555555555555555555555\", 3,1,  4,-1,  1,\n      { '\\125','\\125','\\125','\\125',\n        '\\252','\\252','\\252','\\052',\n        '\\125','\\125','\\125','\\125' } },\n    { \"0x155555555555555555555555\", 3,-1,  4,-1,  1,\n      { '\\125','\\125','\\125','\\125',\n        '\\252','\\252','\\252','\\052',\n        '\\125','\\125','\\125','\\125' } },\n  };\n\n  char    buf[sizeof(data[0].src) + sizeof (mp_limb_t) + 128];\n  char    *got_data;\n  void    *ret;\n  size_t  align, got_count, j;\n  int     i, error = 0;\n  mpz_t   src;\n\n  mpz_init (src);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      for (align = 0; align < sizeof (mp_limb_t); align++)\n        {\n          mpz_set_str_or_abort (src, data[i].src, 0);\n          MPZ_CHECK_FORMAT (src);\n          got_data = buf + align;\n\n          ASSERT_ALWAYS (data[i].want_count * data[i].size + align\n                         <= sizeof (buf));\n\n          memset (got_data, '\\0', data[i].want_count * data[i].size);\n          ret = mpz_export (got_data, &got_count, data[i].order,\n                            data[i].size, data[i].endian, data[i].nail, src);\n\n          if (ret != got_data)\n            {\n              printf (\"return doesn't equal given pointer\\n\");\n              error = 1;\n            }\n          if (got_count != data[i].want_count)\n            {\n              printf (\"wrong count\\n\");\n              error = 1;\n            }\n          if (memcmp (got_data, data[i].want_data, got_count * data[i].size) != 0)\n            {\n              printf (\"wrong result data\\n\");\n              error = 1;\n            }\n          if (error)\n            {\n              printf (\"    at data[%d]  align=%d\\n\", i, (int) align);\n              printf (\"    src \\\"%s\\\"\\n\", data[i].src);\n              mpz_trace (\"    src\", src);\n              printf (\"    order=%d  size=%lu endian=%d nail=%u\\n\",\n                      data[i].order,\n                      data[i].size, data[i].endian, data[i].nail);\n              printf (\"    want count %lu\\n\", data[i].want_count);\n              printf (\"    got count  %lu\\n\", got_count);\n              printf (\"    want\");\n              for (j = 0; j < data[i].want_count*data[i].size; j++)\n                printf (\" 0x%02X,\", (unsigned) (unsigned char) data[i].want_data[j]);\n              printf (\"\\n\");\n              printf (\"    got \");\n              for (j = 0; j < got_count*data[i].size; j++)\n                printf (\" 0x%02X,\", (unsigned) (unsigned char) got_data[j]);\n              printf (\"\\n\");\n              abort ();\n            }\n        }\n    }\n  mpz_clear (src);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n\n  mp_trace_base = -16;\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-fac_ui.c",
    "content": "/* Exercise mpz_fac_ui and mpz_2fac_ui.\n\nCopyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library test suite.\n\nThe GNU MP Library test suite is free software; you can redistribute it\nand/or modify it under the terms of the GNU General Public License as\npublished by the Free Software Foundation; either version 3 of the License,\nor (at your option) any later version.\n\nThe GNU MP Library test suite is distributed in the hope that it will be\nuseful, but WITHOUT ANY WARRANTY; without even the implied warranty of\nMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General\nPublic License for more details.\n\nYou should have received a copy of the GNU General Public License along with\nthe GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* Usage: t-fac_ui [x|num]\n\n   With no arguments testing goes up to the initial value of \"limit\" below.\n   With a number argument tests are carried that far, or with a literal \"x\"\n   tests are continued without limit (this being meant only for development\n   purposes).  */\n\n\nint\nmain (int argc, char *argv[])\n{\n  unsigned long  n, m;\n  unsigned long  limit = 2222;\n  mpz_t          df[2], f, r;\n\n  tests_start ();\n\n  if (argc > 1 && argv[1][0] == 'x')\n    limit = ULONG_MAX;\n  else if (argc > 1)\n    limit = atoi (argv[1]);\n\n  /* for small limb testing */\n  limit = MIN (limit, MP_LIMB_T_MAX);\n\n  mpz_init_set_ui (df[0], 1);  /* 0!! = 1 */\n  mpz_init_set_ui (df[1], 1);  /* -1!! = 1 */\n  mpz_init_set_ui (f, 1);  /* 0! = 1 */\n  mpz_init (r);\n\n  for (n = 0, m = 0; n < limit; n++)\n    {\n      mpz_fac_ui (r, n);\n      MPZ_CHECK_FORMAT (r);\n\n      if (mpz_cmp (f, r) != 0)\n        {\n          printf (\"mpz_fac_ui(%lu) wrong\\n\", n);\n          printf (\"  got  \"); mpz_out_str (stdout, 10, r); printf(\"\\n\");\n          printf (\"  want \"); mpz_out_str (stdout, 10, f); printf(\"\\n\");\n          abort ();\n        }\n\n      mpz_2fac_ui (r, n);\n      MPZ_CHECK_FORMAT (r);\n\n      if (mpz_cmp (df[m], r) != 0)\n        {\n          printf (\"mpz_2fac_ui(%lu) wrong\\n\", n);\n          printf (\"  got  \"); mpz_out_str (stdout, 10, r); printf(\"\\n\");\n          printf (\"  want \"); mpz_out_str (stdout, 10, df[m]); printf(\"\\n\");\n          abort ();\n        }\n\n      m ^= 1;\n      mpz_mul_ui (df[m], df[m], n+1);  /* (n+1)!! = (n-1)!! * (n+1) */\n      mpz_mul_ui (f, f, n+1);  /* (n+1)! = n! * (n+1) */\n    }\n\n  n = 1048573; /* a prime */\n  if (n > MP_LIMB_T_MAX)\n    n = 65521; /* a smaller prime :-) */\n  mpz_fac_ui (f, n - 1);\n  m = mpz_fdiv_ui (f, n);\n  if ( m != n - 1)\n    {\n      printf (\"mpz_fac_ui(%lu) wrong\\n\", n - 1);\n      printf (\"  Wilson's theorem not verified: got %lu, expected %lu.\\n\",m ,n - 1);\n      abort ();\n    }\n\n  mpz_clear (df[0]);\n  mpz_clear (df[1]);\n  mpz_clear (f);\n  mpz_clear (r);\n\n  tests_end ();\n\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-fdiv.c",
    "content": "/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_fdiv_qr, mpz_fdiv_q,\n   mpz_fdiv_r, mpz_mul.\n\nCopyright 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid dump_abort(mpz_t, mpz_t);\nvoid debug_mp(mpz_t, int);\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t dividend, divisor;\n  mpz_t quotient, remainder;\n  mpz_t quotient2, remainder2;\n  mpz_t temp;\n  mp_size_t dividend_size, divisor_size;\n  int i;\n  int reps = 200;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  unsigned long bsi, size_range;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpz_init (dividend);\n  mpz_init (divisor);\n  mpz_init (quotient);\n  mpz_init (remainder);\n  mpz_init (quotient2);\n  mpz_init (remainder2);\n  mpz_init (temp);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 16 + 2; /* 0..131071 bit operands */\n\n      do\n\t{\n\t  mpz_urandomb (bs, rands, size_range);\n\t  divisor_size = mpz_get_ui (bs);\n\t  mpz_rrandomb (divisor, rands, divisor_size);\n\t}\n      while (mpz_sgn (divisor) == 0);\n\n      mpz_urandomb (bs, rands, size_range);\n      dividend_size = mpz_get_ui (bs) + divisor_size;\n      mpz_rrandomb (dividend, rands, dividend_size);\n\n      mpz_urandomb (bs, rands, 2);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (dividend, dividend);\n      if ((bsi & 2) != 0)\n\tmpz_neg (divisor, divisor);\n\n      /* printf (\"%ld %ld\\n\", SIZ (dividend), SIZ (divisor)); */\n\n      mpz_fdiv_qr (quotient, remainder, dividend, divisor);\n      mpz_fdiv_q (quotient2, dividend, divisor);\n      mpz_fdiv_r (remainder2, dividend, divisor);\n\n      /* First determine that the quotients and remainders computed\n\t with different functions are equal.  */\n      if (mpz_cmp (quotient, quotient2) != 0)\n\tdump_abort (dividend, divisor);\n      if (mpz_cmp (remainder, remainder2) != 0)\n\tdump_abort (dividend, divisor);\n\n      /* Check if the sign of the quotient is correct.  */\n      if (mpz_cmp_ui (quotient, 0) != 0)\n\tif ((mpz_cmp_ui (quotient, 0) < 0)\n\t    != ((mpz_cmp_ui (dividend, 0) ^ mpz_cmp_ui (divisor, 0)) < 0))\n\tdump_abort (dividend, divisor);\n\n      /* Check if the remainder has the same sign as the divisor\n\t (quotient rounded towards minus infinity).  */\n      if (mpz_cmp_ui (remainder, 0) != 0)\n\tif ((mpz_cmp_ui (remainder, 0) < 0) != (mpz_cmp_ui (divisor, 0) < 0))\n\t  dump_abort (dividend, divisor);\n\n      mpz_mul (temp, quotient, divisor);\n      mpz_add (temp, temp, remainder);\n      if (mpz_cmp (temp, dividend) != 0)\n\tdump_abort (dividend, divisor);\n\n      mpz_abs (temp, divisor);\n      mpz_abs (remainder, remainder);\n      if (mpz_cmp (remainder, temp) >= 0)\n\tdump_abort (dividend, divisor);\n    }\n\n  mpz_clear (bs);\n  mpz_clear (dividend);\n  mpz_clear (divisor);\n  mpz_clear (quotient);\n  mpz_clear (remainder);\n  mpz_clear (quotient2);\n  mpz_clear (remainder2);\n  mpz_clear (temp);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndump_abort (mpz_t dividend, mpz_t divisor)\n{\n  fprintf (stderr, \"ERROR\\n\");\n  fprintf (stderr, \"dividend = \"); debug_mp (dividend, -16);\n  fprintf (stderr, \"divisor  = \"); debug_mp (divisor, -16);\n  abort();\n}\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x); fputc ('\\n', stderr);\n}\n"
  },
  {
    "path": "tests/mpz/t-fdiv_ui.c",
    "content": "/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_fdiv_qr_ui, mpz_fdiv_q_ui,\n   mpz_fdiv_r_ui, mpz_fdiv_ui, mpz_mul_ui.\n\nCopyright 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid dump_abort(char *, mpz_t, mpir_ui);\nvoid debug_mp(mpz_t, int);\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t dividend;\n  mpz_t quotient, remainder;\n  mpz_t quotient2, remainder2;\n  mpz_t temp;\n  mp_size_t dividend_size;\n  mpir_ui divisor;\n  int i;\n  int reps = 10000;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  mpir_ui bsi, size_range;\n  mpir_ui r_rq, r_q, r_r, r;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpz_init (dividend);\n  mpz_init (quotient);\n  mpz_init (remainder);\n  mpz_init (quotient2);\n  mpz_init (remainder2);\n  mpz_init (temp);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 10 + 2; /* 0..2047 bit operands */\n\n      do\n\t{\n\t  mpz_rrandomb (bs, rands, 64);\n\t  divisor = mpz_get_ui (bs);\n\t}\n      while (divisor == 0);\n\n      mpz_urandomb (bs, rands, size_range);\n      dividend_size = mpz_get_ui (bs);\n      mpz_rrandomb (dividend, rands, dividend_size);\n\n      mpz_urandomb (bs, rands, 2);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (dividend, dividend);\n\n      /* printf (\"%ld\\n\", SIZ (dividend)); */\n\n      r_rq = mpz_fdiv_qr_ui (quotient, remainder, dividend, divisor);\n      r_q = mpz_fdiv_q_ui (quotient2, dividend, divisor);\n      r_r = mpz_fdiv_r_ui (remainder2, dividend, divisor);\n      r = mpz_fdiv_ui (dividend, divisor);\n\n      /* First determine that the quotients and remainders computed\n\t with different functions are equal.  */\n      if (mpz_cmp (quotient, quotient2) != 0)\n\tdump_abort (\"quotients from mpz_fdiv_qr_ui and mpz_fdiv_q_ui differ\",\n\t\t    dividend, divisor);\n      if (mpz_cmp (remainder, remainder2) != 0)\n\tdump_abort (\"remainders from mpz_fdiv_qr_ui and mpz_fdiv_r_ui differ\",\n\t\t    dividend, divisor);\n\n      /* Check if the sign of the quotient is correct.  */\n      if (mpz_cmp_ui (quotient, 0) != 0)\n\tif ((mpz_cmp_ui (quotient, 0) < 0)\n\t    != (mpz_cmp_ui (dividend, 0) < 0))\n\tdump_abort (\"quotient sign wrong\", dividend, divisor);\n\n      /* Check if the remainder has the same sign as the (positive) divisor\n\t (quotient rounded towards minus infinity).  */\n      if (mpz_cmp_ui (remainder, 0) != 0)\n\tif (mpz_cmp_ui (remainder, 0) < 0)\n\t  dump_abort (\"remainder sign wrong\", dividend, divisor);\n\n      mpz_mul_ui (temp, quotient, divisor);\n      mpz_add (temp, temp, remainder);\n      if (mpz_cmp (temp, dividend) != 0)\n\tdump_abort (\"n mod d != n - [n/d]*d\", dividend, divisor);\n\n      mpz_abs (remainder, remainder);\n      if (mpz_cmp_ui (remainder, divisor) >= 0)\n\tdump_abort (\"remainder greater than divisor\", dividend, divisor);\n\n      if (mpz_cmp_ui (remainder, r_rq) != 0)\n\tdump_abort (\"remainder returned from mpz_fdiv_qr_ui is wrong\",\n\t\t    dividend, divisor);\n      if (mpz_cmp_ui (remainder, r_q) != 0)\n\tdump_abort (\"remainder returned from mpz_fdiv_q_ui is wrong\",\n\t\t    dividend, divisor);\n      if (mpz_cmp_ui (remainder, r_r) != 0)\n\tdump_abort (\"remainder returned from mpz_fdiv_r_ui is wrong\",\n\t\t    dividend, divisor);\n      if (mpz_cmp_ui (remainder, r) != 0)\n\tdump_abort (\"remainder returned from mpz_fdiv_ui is wrong\",\n\t\t    dividend, divisor);\n    }\n\n  mpz_clear (bs);\n  mpz_clear (dividend);\n  mpz_clear (quotient);\n  mpz_clear (remainder);\n  mpz_clear (quotient2);\n  mpz_clear (remainder2);\n  mpz_clear (temp);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndump_abort (char *str, mpz_t dividend, mpir_ui divisor)\n{\n  fprintf (stderr, \"ERROR: %s\\n\", str);\n  fprintf (stderr, \"dividend = \"); debug_mp (dividend, -16);\n  fprintf (stderr, \"divisor  = %lX\\n\", divisor);\n  abort();\n}\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x); fputc ('\\n', stderr);\n}\n"
  },
  {
    "path": "tests/mpz/t-fib_ui.c",
    "content": "/* Test mpz_fib_ui and mpz_fib2_ui.\n\nCopyright 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* Usage: t-fib_ui [x|num]\n\n   Run with no arguments, tests goes up to the initial value of \"limit\"\n   below.  With a number argument tests are carried up that far, or with a\n   literal \"x\" tests are continued without limit (this being only meant for\n   development purposes).\n\n   The size tests performed are designed to partially replicate what will be\n   going on in mpz_fib_ui.  There's plenty of ASSERTs there, but of course\n   they're not normally enabled.\n\n   Misfeatures:\n\n   The tests on MPN_FIB2_SIZE are a bit useless, since that macro includes a\n   +2 for the internal purposes of mpn_fib2_ui.  It's probably better to\n   give mpn_fib2_ui a run with assertion checking enabled.  */\n\n\n#define MPZ_FIB_SIZE_FLOAT(n) \\\n  ((mp_size_t) ((n) * 0.6942419 / GMP_NUMB_BITS + 1))\n\n\nvoid\ncheck_fib_table (void)\n{\n  int        i;\n  mp_limb_t  want;\n\n  ASSERT_ALWAYS (FIB_TABLE(-1) == 1);\n  ASSERT_ALWAYS (FIB_TABLE(0) == 0);\n\n  for (i = 1; i <= FIB_TABLE_LIMIT; i++)\n    {\n      want = FIB_TABLE(i-1) + FIB_TABLE(i-2);\n      if (FIB_TABLE(i) != want)\n        {\n          printf (\"FIB_TABLE(%d) wrong\\n\", i);\n          gmp_printf (\"  got  %#Nx\\n\", &FIB_TABLE(i), 1);\n          gmp_printf (\"  want %#Nx\\n\", &want, 1);\n          abort ();\n        }\n    }\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  unsigned long  n;\n  unsigned long  limit = 100 * BITS_PER_MP_LIMB;\n  mpz_t          want_fn, want_fn1, got_fn, got_fn1;\n\n  tests_start ();\n  mp_trace_base = -16;\n  if (argc > 1 && argv[1][0] == 'x')\n    limit = ULONG_MAX;\n  else if (argc > 1)\n    limit = atoi (argv[1]);\n\n  check_fib_table ();\n\n  /* start at n==0 */\n  mpz_init_set_ui (want_fn1, 1);  /* F[-1] */\n  mpz_init_set_ui (want_fn,  0);  /* F[0]   */\n  mpz_init (got_fn);\n  mpz_init (got_fn1);\n\n  for (n = 0; n < limit; n++)\n    {\n      /* check our float formula seems right */\n      if (MPZ_FIB_SIZE_FLOAT (n) < SIZ(want_fn))\n        {\n          printf (\"MPZ_FIB_SIZE_FLOAT wrong at n=%lu\\n\", n);\n          printf (\"  MPZ_FIB_SIZE_FLOAT  %ld\\n\", MPZ_FIB_SIZE_FLOAT (n));\n          printf (\"  SIZ(want_fn)        %d\\n\", SIZ(want_fn));\n          abort ();\n        }\n\n      /* check MPN_FIB2_SIZE seems right, compared to actual size and\n         compared to our float formula */\n      if (MPN_FIB2_SIZE (n) < MPZ_FIB_SIZE_FLOAT (n))\n        {\n          printf (\"MPN_FIB2_SIZE wrong at n=%lu\\n\", n);\n          printf (\"  MPN_FIB2_SIZE       %ld\\n\", MPN_FIB2_SIZE (n));\n          printf (\"  MPZ_FIB_SIZE_FLOAT  %ld\\n\", MPZ_FIB_SIZE_FLOAT (n));\n          abort ();\n        }\n      if (MPN_FIB2_SIZE (n) < SIZ(want_fn))\n        {\n          printf (\"MPN_FIB2_SIZE wrong at n=%lu\\n\", n);\n          printf (\"  MPN_FIB2_SIZE  %ld\\n\", MPN_FIB2_SIZE (n));\n          printf (\"  SIZ(want_fn)   %d\\n\", SIZ(want_fn));\n          abort ();\n        }\n\n      mpz_fib2_ui (got_fn, got_fn1, n);\n      MPZ_CHECK_FORMAT (got_fn);\n      MPZ_CHECK_FORMAT (got_fn1);\n      if (mpz_cmp (got_fn, want_fn) != 0 || mpz_cmp (got_fn1, want_fn1) != 0)\n        {\n          printf (\"mpz_fib2_ui(%lu) wrong\\n\", n);\n          mpz_trace (\"want fn \", want_fn);\n          mpz_trace (\"got  fn \",  got_fn);\n          mpz_trace (\"want fn1\", want_fn1);\n          mpz_trace (\"got  fn1\",  got_fn1);\n          abort ();\n        }\n\n      mpz_fib_ui (got_fn, n);\n      MPZ_CHECK_FORMAT (got_fn);\n      if (mpz_cmp (got_fn, want_fn) != 0)\n        {\n          printf (\"mpz_fib_ui(%lu) wrong\\n\", n);\n          mpz_trace (\"want fn\", want_fn);\n          mpz_trace (\"got  fn\", got_fn);\n          abort ();\n        }\n\n      mpz_add (want_fn1, want_fn1, want_fn);  /* F[n+1] = F[n] + F[n-1] */\n      mpz_swap (want_fn1, want_fn);\n    }\n\n  mpz_clear (want_fn);\n  mpz_clear (want_fn1);\n  mpz_clear (got_fn);\n  mpz_clear (got_fn1);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-fits.c",
    "content": "/* Test mpz_fits_*_p */\n\n/*\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* Nothing sophisticated here, just exercise mpz_fits_*_p on a small amount\n   of data. */\n\n#define EXPECT_S(fun,name,answer)                                       \\\n  got = fun (z);                                                        \\\n  if (got != answer)                                                    \\\n    {                                                                   \\\n      printf (\"%s (%s) got %d want %d\\n\", name, expr, got, answer);     \\\n      printf (\" z size %d\\n\", SIZ(z));                                  \\\n      printf (\" z dec \"); mpz_out_str (stdout, 10, z); printf (\"\\n\");   \\\n      printf (\" z hex \"); mpz_out_str (stdout, 16, z); printf (\"\\n\");   \\\n      error = 1;                                                        \\\n    }\n\n#if HAVE_STRINGIZE\n#define EXPECT(fun,answer)  EXPECT_S(fun,#fun,answer)\n#else\n#define EXPECT(fun,answer)  EXPECT_S(fun,\"fun\",answer)\n#endif\n\nint\nmain (void)\n{\n  mpz_t       z;\n  int         got;\n  const char  *expr;\n  int         error = 0;\n\n  tests_start ();\n  mpz_init (z);\n\n  mpz_set_ui (z, 0L);\n  expr = \"0\";\n  EXPECT (mpz_fits_ulong_p, 1);\n  EXPECT (mpz_fits_uint_p, 1);\n  EXPECT (mpz_fits_ushort_p, 1);\n  EXPECT (mpz_fits_slong_p, 1);\n  EXPECT (mpz_fits_sint_p, 1);\n  EXPECT (mpz_fits_sshort_p, 1);\n\n  mpz_set_ui (z, 1L);\n  expr = \"1\";\n  EXPECT (mpz_fits_ulong_p, 1);\n  EXPECT (mpz_fits_uint_p, 1);\n  EXPECT (mpz_fits_ushort_p, 1);\n  EXPECT (mpz_fits_slong_p, 1);\n  EXPECT (mpz_fits_sint_p, 1);\n  EXPECT (mpz_fits_sshort_p, 1);\n\n  mpz_set_si (z, -1L);\n  expr = \"-1\";\n  EXPECT (mpz_fits_ulong_p, 0);\n  EXPECT (mpz_fits_uint_p, 0);\n  EXPECT (mpz_fits_ushort_p, 0);\n  EXPECT (mpz_fits_slong_p, 1);\n  EXPECT (mpz_fits_sint_p, 1);\n  EXPECT (mpz_fits_sshort_p, 1);\n\n  mpz_set_ui (z, 1L);\n  mpz_mul_2exp (z, z, 5L*BITS_PER_MP_LIMB);\n  expr = \"2^(5*BPML)\";\n  EXPECT (mpz_fits_ulong_p, 0);\n  EXPECT (mpz_fits_uint_p, 0);\n  EXPECT (mpz_fits_ushort_p, 0);\n  EXPECT (mpz_fits_slong_p, 0);\n  EXPECT (mpz_fits_sint_p, 0);\n  EXPECT (mpz_fits_sshort_p, 0);\n\n\n  mpz_set_ui (z, (unsigned long) USHRT_MAX);\n  expr = \"USHRT_MAX\";\n  EXPECT (mpz_fits_ulong_p, 1);\n  EXPECT (mpz_fits_uint_p, 1);\n  EXPECT (mpz_fits_ushort_p, 1);\n\n  mpz_set_ui (z, (unsigned long) USHRT_MAX);\n  mpz_add_ui (z, z, 1L);\n  expr = \"USHRT_MAX + 1\";\n  EXPECT (mpz_fits_ushort_p, 0);\n\n\n  mpz_set_ui (z, (unsigned long) UINT_MAX);\n  expr = \"UINT_MAX\";\n  EXPECT (mpz_fits_ulong_p, 1);\n  EXPECT (mpz_fits_uint_p, 1);\n\n  mpz_set_ui (z, (unsigned long) UINT_MAX);\n  mpz_add_ui (z, z, 1L);\n  expr = \"UINT_MAX + 1\";\n  EXPECT (mpz_fits_uint_p, 0);\n\n\n  mpz_set_ui (z, ULONG_MAX);\n  expr = \"ULONG_MAX\";\n  EXPECT (mpz_fits_ulong_p, 1);\n\n  mpz_set_ui (z, ULONG_MAX);\n  mpz_add_ui (z, z, 1L);\n  expr = \"ULONG_MAX + 1\";\n  EXPECT (mpz_fits_ulong_p, 0);\n\n\n  mpz_set_si (z, (long) SHRT_MAX);\n  expr = \"SHRT_MAX\";\n  EXPECT (mpz_fits_slong_p, 1);\n  EXPECT (mpz_fits_sint_p, 1);\n  EXPECT (mpz_fits_sshort_p, 1);\n\n  mpz_set_si (z, (long) SHRT_MAX);\n  mpz_add_ui (z, z, 1L);\n  expr = \"SHRT_MAX + 1\";\n  EXPECT (mpz_fits_sshort_p, 0);\n\n\n  mpz_set_si (z, (long) INT_MAX);\n  expr = \"INT_MAX\";\n  EXPECT (mpz_fits_slong_p, 1);\n  EXPECT (mpz_fits_sint_p, 1);\n\n  mpz_set_si (z, (long) INT_MAX);\n  mpz_add_ui (z, z, 1L);\n  expr = \"INT_MAX + 1\";\n  EXPECT (mpz_fits_sint_p, 0);\n\n\n  mpz_set_si (z, LONG_MAX);\n  expr = \"LONG_MAX\";\n  EXPECT (mpz_fits_slong_p, 1);\n\n  mpz_set_si (z, LONG_MAX);\n  mpz_add_ui (z, z, 1L);\n  expr = \"LONG_MAX + 1\";\n  EXPECT (mpz_fits_slong_p, 0);\n\n\n  mpz_set_si (z, (long) SHRT_MIN);\n  expr = \"SHRT_MIN\";\n  EXPECT (mpz_fits_slong_p, 1);\n  EXPECT (mpz_fits_sint_p, 1);\n  EXPECT (mpz_fits_sshort_p, 1);\n\n  mpz_set_si (z, (long) SHRT_MIN);\n  mpz_sub_ui (z, z, 1L);\n  expr = \"SHRT_MIN + 1\";\n  EXPECT (mpz_fits_sshort_p, 0);\n\n\n  mpz_set_si (z, (long) INT_MIN);\n  expr = \"INT_MIN\";\n  EXPECT (mpz_fits_slong_p, 1);\n  EXPECT (mpz_fits_sint_p, 1);\n\n  mpz_set_si (z, (long) INT_MIN);\n  mpz_sub_ui (z, z, 1L);\n  expr = \"INT_MIN + 1\";\n  EXPECT (mpz_fits_sint_p, 0);\n\n\n  mpz_set_si (z, LONG_MIN);\n  expr = \"LONG_MIN\";\n  EXPECT (mpz_fits_slong_p, 1);\n\n  mpz_set_si (z, LONG_MIN);\n  mpz_sub_ui (z, z, 1L);\n  expr = \"LONG_MIN + 1\";\n  EXPECT (mpz_fits_slong_p, 0);\n\n\n  if (error)\n    abort ();\n\n  mpz_clear (z);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-gcd.c",
    "content": "/* Test mpz_gcd, mpz_gcdext, and mpz_gcd_ui.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005,\n2008, 2009, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library test suite.\n\nThe GNU MP Library test suite is free software; you can redistribute it\nand/or modify it under the terms of the GNU General Public License as\npublished by the Free Software Foundation; either version 3 of the License,\nor (at your option) any later version.\n\nThe GNU MP Library test suite is distributed in the hope that it will be\nuseful, but WITHOUT ANY WARRANTY; without even the implied warranty of\nMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General\nPublic License for more details.\n\nYou should have received a copy of the GNU General Public License along with\nthe GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid one_test (mpz_t, mpz_t, mpz_t, int);\nvoid debug_mp (mpz_t, int);\n\nstatic int gcdext_valid_p (const mpz_t, const mpz_t, const mpz_t, const mpz_t);\n\n/* Keep one_test's variables global, so that we don't need\n   to reinitialize them for each test.  */\nmpz_t gcd1, gcd2, s, temp1, temp2, temp3;\n\n#define MAX_SCHOENHAGE_THRESHOLD HGCD_REDUCE_THRESHOLD\n\n/* Define this to make all operands be large enough for Schoenhage gcd\n   to be used.  */\n#ifndef WHACK_SCHOENHAGE\n#define WHACK_SCHOENHAGE 0\n#endif\n\n#if WHACK_SCHOENHAGE\n#define MIN_OPERAND_BITSIZE (MAX_SCHOENHAGE_THRESHOLD * GMP_NUMB_BITS)\n#else\n#define MIN_OPERAND_BITSIZE 1\n#endif\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char *a;\n    const char *b;\n    const char *want;\n  } data[] = {\n    /* This tickled a bug in gmp 4.1.2 mpn/x86/k6/gcd_finda.asm. */\n    { \"0x3FFC000007FFFFFFFFFF00000000003F83FFFFFFFFFFFFFFF80000000000000001\",\n      \"0x1FFE0007FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC000000000000000000000001\",\n      \"5\" }\n  };\n\n  mpz_t  a, b, got, want;\n  int    i;\n\n  mpz_inits (a, b, got, want, NULL);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (a, data[i].a, 0);\n      mpz_set_str_or_abort (b, data[i].b, 0);\n      mpz_set_str_or_abort (want, data[i].want, 0);\n      mpz_gcd (got, a, b);\n      MPZ_CHECK_FORMAT (got);\n      if (mpz_cmp (got, want) != 0)\n\t{\n\t  printf    (\"mpz_gcd wrong on data[%d]\\n\", i);\n\t  printf    (\" a  %s\\n\", data[i].a);\n\t  printf    (\" b  %s\\n\", data[i].b);\n\t  mpz_trace (\" a\", a);\n\t  mpz_trace (\" b\", b);\n\t  mpz_trace (\" want\", want);\n\t  mpz_trace (\" got \", got);\n\t  abort ();\n\t}\n    }\n\n  mpz_clears (a, b, got, want, NULL);\n}\n\nvoid\nmake_chain_operands (mpz_t ref, mpz_t a, mpz_t b, gmp_randstate_t rs, int nb1, int nb2, int chain_len)\n{\n  mpz_t bs, temp1, temp2;\n  int j;\n\n  mpz_inits (bs, temp1, temp2, NULL);\n\n  /* Generate a division chain backwards, allowing otherwise unlikely huge\n     quotients.  */\n\n  mpz_set_ui (a, 0);\n  mpz_urandomb (bs, rs, 32);\n  mpz_urandomb (bs, rs, mpz_get_ui (bs) % nb1 + 1);\n  mpz_rrandomb (b, rs, mpz_get_ui (bs));\n  mpz_add_ui (b, b, 1);\n  mpz_set (ref, b);\n\n  for (j = 0; j < chain_len; j++)\n    {\n      mpz_urandomb (bs, rs, 32);\n      mpz_urandomb (bs, rs, mpz_get_ui (bs) % nb2 + 1);\n      mpz_rrandomb (temp2, rs, mpz_get_ui (bs) + 1);\n      mpz_add_ui (temp2, temp2, 1);\n      mpz_mul (temp1, b, temp2);\n      mpz_add (a, a, temp1);\n\n      mpz_urandomb (bs, rs, 32);\n      mpz_urandomb (bs, rs, mpz_get_ui (bs) % nb2 + 1);\n      mpz_rrandomb (temp2, rs, mpz_get_ui (bs) + 1);\n      mpz_add_ui (temp2, temp2, 1);\n      mpz_mul (temp1, a, temp2);\n      mpz_add (b, b, temp1);\n    }\n\n  mpz_clears (bs, temp1, temp2, NULL);\n}\n\n/* Test operands from a table of seed data.  This variant creates the operands\n   using plain ol' mpz_rrandomb.  This is a hack for better coverage of the gcd\n   code, which depends on that the random number generators give the exact\n   numbers we expect.  */\nvoid\ncheck_kolmo1 (void)\n{\n  static const struct {\n    unsigned int seed;\n    int nb;\n    const char *want;\n  } data[] = {\n    { 59618, 38208, \"5\"},\n    { 76521, 49024, \"3\"},\n    { 85869, 54976, \"1\"},\n    { 99449, 63680, \"1\"},\n    {112453, 72000, \"1\"}\n  };\n\n  gmp_randstate_t rs;\n  mpz_t  bs, a, b, want;\n  int    i, unb, vnb, nb;\n\n  gmp_randinit_default (rs);\n\n  mpz_inits (bs, a, b, want, NULL);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      nb = data[i].nb;\n\n      gmp_randseed_ui (rs, data[i].seed);\n\n      mpz_urandomb (bs, rs, 32);\n      unb = mpz_get_ui (bs) % nb;\n      mpz_urandomb (bs, rs, 32);\n      vnb = mpz_get_ui (bs) % nb;\n\n      mpz_rrandomb (a, rs, unb);\n      mpz_rrandomb (b, rs, vnb);\n\n      mpz_set_str_or_abort (want, data[i].want, 0);\n\n      one_test (a, b, want, -1);\n    }\n\n  mpz_clears (bs, a, b, want, NULL);\n  gmp_randclear (rs);\n}\n\n/* Test operands from a table of seed data.  This variant creates the operands\n   using a division chain.  This is a hack for better coverage of the gcd\n   code, which depends on that the random number generators give the exact\n   numbers we expect.  */\nvoid\ncheck_kolmo2 (void)\n{\n  static const struct {\n    unsigned int seed;\n    int nb, chain_len;\n  } data[] = {\n    {  917, 15, 5 },\n    { 1032, 18, 6 },\n    { 1167, 18, 6 },\n    { 1174, 18, 6 },\n    { 1192, 18, 6 },\n  };\n\n  gmp_randstate_t rs;\n  mpz_t  bs, a, b, want;\n  int    i;\n\n  gmp_randinit_default (rs);\n\n  mpz_inits (bs, a, b, want, NULL);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      gmp_randseed_ui (rs, data[i].seed);\n      make_chain_operands (want, a, b, rs, data[i].nb, data[i].nb, data[i].chain_len);\n      one_test (a, b, want, -1);\n    }\n\n  mpz_clears (bs, a, b, want, NULL);\n  gmp_randclear (rs);\n}\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t op1, op2, ref;\n  int i, chain_len;\n  gmp_randstate_ptr rands;\n  mpz_t bs;\n  unsigned long bsi, size_range;\n  long int reps = 200;\n\n  tests_start ();\n\n  rands = RANDS;\n\n  mpz_inits (bs, op1, op2, ref, gcd1, gcd2, temp1, temp2, temp3, s, NULL);\n\n  check_data ();\n  check_kolmo1 ();\n  check_kolmo2 ();\n\n  /* Testcase to exercise the u0 == u1 case in mpn_gcdext_lehmer_n. */\n  mpz_set_ui (op2, GMP_NUMB_MAX); /* FIXME: Huge limb doesn't always fit */\n  mpz_mul_2exp (op1, op2, 100);\n  mpz_add (op1, op1, op2);\n  mpz_mul_ui (op2, op2, 2);\n  one_test (op1, op2, NULL, -1);\n\n  for (i = 0; i < reps; i++)\n    {\n      /* Generate plain operands with unknown gcd.  These types of operands\n\t have proven to trigger certain bugs in development versions of the\n\t gcd code.  The \"hgcd->row[3].rsize > M\" ASSERT is not triggered by\n\t the division chain code below, but that is most likely just a result\n\t of that other ASSERTs are triggered before it.  */\n\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 17 + 2;\n\n      mpz_urandomb (bs, rands, size_range);\n      mpz_rrandomb (op1, rands, mpz_get_ui (bs) + MIN_OPERAND_BITSIZE);\n      mpz_urandomb (bs, rands, size_range);\n      mpz_rrandomb (op2, rands, mpz_get_ui (bs) + MIN_OPERAND_BITSIZE);\n\n      mpz_urandomb (bs, rands, 8);\n      bsi = mpz_get_ui (bs);\n\n      if ((bsi & 0x3c) == 4)\n\tmpz_mul (op1, op1, op2);\t/* make op1 a multiple of op2 */\n      else if ((bsi & 0x3c) == 8)\n\tmpz_mul (op2, op1, op2);\t/* make op2 a multiple of op1 */\n\n      if ((bsi & 1) != 0)\n\tmpz_neg (op1, op1);\n      if ((bsi & 2) != 0)\n\tmpz_neg (op2, op2);\n\n      one_test (op1, op2, NULL, i);\n\n      /* Generate a division chain backwards, allowing otherwise unlikely huge\n\t quotients.  */\n\n      mpz_urandomb (bs, rands, 32);\n      chain_len = mpz_get_ui (bs) % LOG2C (GMP_NUMB_BITS * MAX_SCHOENHAGE_THRESHOLD);\n      mpz_urandomb (bs, rands, 32);\n      chain_len = mpz_get_ui (bs) % (1 << chain_len) / 32;\n\n      make_chain_operands (ref, op1, op2, rands, 16, 12, chain_len);\n\n      one_test (op1, op2, ref, i);\n    }\n\n  mpz_clears (bs, op1, op2, ref, gcd1, gcd2, temp1, temp2, temp3, s, NULL);\n\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x); fputc ('\\n', stderr);\n}\n\nvoid\none_test (mpz_t op1, mpz_t op2, mpz_t ref, int i)\n{\n  /*\n  printf (\"%d %d %d\\n\", SIZ (op1), SIZ (op2), ref != NULL ? SIZ (ref) : 0);\n  fflush (stdout);\n  */\n\n  /*\n  fprintf (stderr, \"op1=\");  debug_mp (op1, -16);\n  fprintf (stderr, \"op2=\");  debug_mp (op2, -16);\n  */\n\n  mpz_gcdext (gcd1, s, NULL, op1, op2);\n  MPZ_CHECK_FORMAT (gcd1);\n  MPZ_CHECK_FORMAT (s);\n\n  if (ref && mpz_cmp (ref, gcd1) != 0)\n    {\n      fprintf (stderr, \"ERROR in test %d\\n\", i);\n      fprintf (stderr, \"mpz_gcdext returned incorrect result\\n\");\n      fprintf (stderr, \"op1=\");                 debug_mp (op1, -16);\n      fprintf (stderr, \"op2=\");                 debug_mp (op2, -16);\n      fprintf (stderr, \"expected result:\\n\");   debug_mp (ref, -16);\n      fprintf (stderr, \"mpz_gcdext returns:\\n\");debug_mp (gcd1, -16);\n      abort ();\n    }\n\n  if (!gcdext_valid_p(op1, op2, gcd1, s))\n    {\n      fprintf (stderr, \"ERROR in test %d\\n\", i);\n      fprintf (stderr, \"mpz_gcdext returned invalid result\\n\");\n      fprintf (stderr, \"op1=\");                 debug_mp (op1, -16);\n      fprintf (stderr, \"op2=\");                 debug_mp (op2, -16);\n      fprintf (stderr, \"mpz_gcdext returns:\\n\");debug_mp (gcd1, -16);\n      fprintf (stderr, \"s=\");                   debug_mp (s, -16);\n      abort ();\n    }\n\n  mpz_gcd (gcd2, op1, op2);\n  MPZ_CHECK_FORMAT (gcd2);\n\n  if (mpz_cmp (gcd2, gcd1) != 0)\n    {\n      fprintf (stderr, \"ERROR in test %d\\n\", i);\n      fprintf (stderr, \"mpz_gcd returned incorrect result\\n\");\n      fprintf (stderr, \"op1=\");                 debug_mp (op1, -16);\n      fprintf (stderr, \"op2=\");                 debug_mp (op2, -16);\n      fprintf (stderr, \"expected result:\\n\");   debug_mp (gcd1, -16);\n      fprintf (stderr, \"mpz_gcd returns:\\n\");   debug_mp (gcd2, -16);\n      abort ();\n    }\n\n  /* This should probably move to t-gcd_ui.c */\n  if (mpz_fits_ulong_p (op1) || mpz_fits_ulong_p (op2))\n    {\n      if (mpz_fits_ulong_p (op1))\n\tmpz_gcd_ui (gcd2, op2, mpz_get_ui (op1));\n      else\n\tmpz_gcd_ui (gcd2, op1, mpz_get_ui (op2));\n      if (mpz_cmp (gcd2, gcd1))\n\t{\n\t  fprintf (stderr, \"ERROR in test %d\\n\", i);\n\t  fprintf (stderr, \"mpz_gcd_ui returned incorrect result\\n\");\n\t  fprintf (stderr, \"op1=\");                 debug_mp (op1, -16);\n\t  fprintf (stderr, \"op2=\");                 debug_mp (op2, -16);\n\t  fprintf (stderr, \"expected result:\\n\");   debug_mp (gcd1, -16);\n\t  fprintf (stderr, \"mpz_gcd_ui returns:\\n\");   debug_mp (gcd2, -16);\n\t  abort ();\n\t}\n    }\n\n  mpz_gcdext (gcd2, temp1, temp2, op1, op2);\n  MPZ_CHECK_FORMAT (gcd2);\n  MPZ_CHECK_FORMAT (temp1);\n  MPZ_CHECK_FORMAT (temp2);\n\n  mpz_mul (temp1, temp1, op1);\n  mpz_mul (temp2, temp2, op2);\n  mpz_add (temp1, temp1, temp2);\n\n  if (mpz_cmp (gcd1, gcd2) != 0\n      || mpz_cmp (gcd2, temp1) != 0)\n    {\n      fprintf (stderr, \"ERROR in test %d\\n\", i);\n      fprintf (stderr, \"mpz_gcdext returned incorrect result\\n\");\n      fprintf (stderr, \"op1=\");                 debug_mp (op1, -16);\n      fprintf (stderr, \"op2=\");                 debug_mp (op2, -16);\n      fprintf (stderr, \"expected result:\\n\");   debug_mp (gcd1, -16);\n      fprintf (stderr, \"mpz_gcdext returns:\\n\");debug_mp (gcd2, -16);\n      abort ();\n    }\n}\n\n/* Called when g is supposed to be gcd(a,b), and g = s a + t b, for some t.\n   Uses temp1, temp2 and temp3. */\nstatic int\ngcdext_valid_p (const mpz_t a, const mpz_t b, const mpz_t g, const mpz_t s)\n{\n  /* It's not clear that gcd(0,0) is well defined, but we allow it and require that\n     gcd(0,0) = 0. */\n  if (mpz_sgn (g) < 0)\n    return 0;\n\n  if (mpz_sgn (a) == 0)\n    {\n      /* Must have g == abs (b). Any value for s is in some sense \"correct\",\n\t but it makes sense to require that s == 0. */\n      return mpz_cmpabs (g, b) == 0 && mpz_sgn (s) == 0;\n    }\n  else if (mpz_sgn (b) == 0)\n    {\n      /* Must have g == abs (a), s == sign (a) */\n      return mpz_cmpabs (g, a) == 0 && mpz_cmp_si (s, mpz_sgn (a)) == 0;\n    }\n\n  if (mpz_sgn (g) <= 0)\n    return 0;\n\n  mpz_tdiv_qr (temp1, temp3, a, g);\n  if (mpz_sgn (temp3) != 0)\n    return 0;\n\n  mpz_tdiv_qr (temp2, temp3, b, g);\n  if (mpz_sgn (temp3) != 0)\n    return 0;\n\n  /* Require that 2 |s| < |b/g|, or |s| == 1. */\n  if (mpz_cmpabs_ui (s, 1) > 0)\n    {\n      mpz_mul_2exp (temp3, s, 1);\n      if (mpz_cmpabs (temp3, temp2) >= 0)\n\treturn 0;\n    }\n\n  /* Compute the other cofactor. */\n  mpz_mul(temp2, s, a);\n  mpz_sub(temp2, g, temp2);\n  mpz_tdiv_qr(temp2, temp3, temp2, b);\n\n  if (mpz_sgn (temp3) != 0)\n    return 0;\n\n  /* Require that 2 |t| < |a/g| or |t| == 1*/\n  if (mpz_cmpabs_ui (temp2, 1) > 0)\n    {\n      mpz_mul_2exp (temp2, temp2, 1);\n      if (mpz_cmpabs (temp2, temp1) >= 0)\n\treturn 0;\n    }\n  return 1;\n}\n"
  },
  {
    "path": "tests/mpz/t-gcd_ui.c",
    "content": "/* Test mpz_gcd_ui.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define printf gmp_printf\n\n/* Check mpz_gcd_ui doesn't try to return a value out of range.\n   This was wrong in gmp 4.1.2 with a long long limb.  */\nstatic void\ncheck_ui_range (void)\n{\n  mpir_ui got;\n  mpz_t  x;\n  int  i;\n\n  mpz_init_set_ui (x, GMP_UI_MAX);\n\n  for (i = 0; i < 20; i++)\n    {\n      mpz_mul_2exp (x, x, 1L);\n      got = mpz_gcd_ui (NULL, x, 0L);\n      if (got != 0)\n        {\n          printf (\"mpz_gcd_ui (GMP_UI_MAX*2^%d, 0)\\n\", i);\n          printf (\"   return %#Mx\\n\", got);\n          printf (\"   should be 0\\n\");\n          abort ();\n        }\n    }\n\n  mpz_clear (x);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_ui_range ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-get_d.c",
    "content": "/* Test mpz_get_d.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_onebit (void)\n{\n  int     i;\n  mpz_t   z;\n  double  got, want;\n  /* FIXME: It'd be better to base this on the float format. */\n  int     limit = 512;\n\n  mpz_init (z);\n\n  mpz_set_ui (z, 1L);\n  want = 1.0;\n\n  for (i = 0; i < limit; i++)\n    {\n      got = mpz_get_d (z);\n\n      if (got != want)\n        {\n          printf    (\"mpz_get_d wrong on 2**%d\\n\", i);\n          mpz_trace (\"   z    \", z);\n          printf    (\"   want  %.20g\\n\", want);\n          printf    (\"   got   %.20g\\n\", got);\n          abort();\n        }\n\n      mpz_mul_2exp (z, z, 1L);\n      want *= 2.0;\n    }\n  mpz_clear (z);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_onebit ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-get_d_2exp.c",
    "content": "/* Test mpz_get_d_2exp.\n\nCopyright 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nstatic void\ncheck_onebit (void)\n{\n  static const unsigned long data[] = {\n    1, 32, 52, 53, 54, 63, 64, 65, 128, 256, 511, 512, 513\n  };\n  mpz_t   z;\n  double  got, want;\n  signed long got_exp, want_exp;\n  int     i;\n\n  mpz_init (z);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_ui (z, 1L);\n      mpz_mul_2exp (z, z, data[i]);\n      want = 0.5;\n      want_exp = data[i] + 1;\n      got = mpz_get_d_2exp (&got_exp, z);\n      if (got != want || got_exp != want_exp)\n        {\n          printf    (\"mpz_get_d_2exp wrong on 2**%ld\\n\", data[i]);\n          mpz_trace (\"   z    \", z);\n          d_trace   (\"   want \", want);\n          d_trace   (\"   got  \", got);\n          printf    (\"   want exp %ld\\n\", want_exp);\n          printf    (\"   got exp  %ld\\n\", got_exp);\n          abort();\n        }\n\n      mpz_set_si (z, -1L);\n      mpz_mul_2exp (z, z, data[i]);\n      want = -0.5;\n      want_exp = data[i] + 1;\n      got = mpz_get_d_2exp (&got_exp, z);\n      if (got != want || got_exp != want_exp)\n        {\n          printf    (\"mpz_get_d_2exp wrong on -2**%ld\\n\", data[i]);\n          mpz_trace (\"   z    \", z);\n          d_trace   (\"   want \", want);\n          d_trace   (\"   got  \", got);\n          printf    (\"   want exp %ld\\n\", want_exp);\n          printf    (\"   got exp  %ld\\n\", got_exp);\n          abort();\n        }\n    }\n  mpz_clear (z);\n}\n\n/* Check that hardware rounding doesn't make mpz_get_d_2exp return a value\n   outside its defined range. */\nstatic void\ncheck_round (void)\n{\n  static const unsigned long data[] = { 1, 32, 53, 54, 64, 128, 256, 512 };\n  mpz_t   z;\n  double  got;\n  mpir_si got_exp;\n  int     i, rnd_mode, old_rnd_mode;\n\n  mpz_init (z);\n  old_rnd_mode = tests_hardware_getround ();\n\n  for (rnd_mode = 0; rnd_mode < 4; rnd_mode++)\n    {\n      tests_hardware_setround (rnd_mode);\n\n      for (i = 0; i < numberof (data); i++)\n        {\n          mpz_set_ui (z, 1L);\n          mpz_mul_2exp (z, z, data[i]);\n          mpz_sub_ui (z, z, 1L);\n\n          got = mpz_get_d_2exp (&got_exp, z);\n          if (got < 0.5 || got >= 1.0)\n            {\n              printf    (\"mpz_get_d_2exp wrong on 2**%lu-1\\n\", data[i]);\n              printf    (\"result out of range, expect 0.5 <= got < 1.0\\n\");\n              printf    (\"   rnd_mode = %d\\n\", rnd_mode);\n              printf    (\"   data[i]  = %lu\\n\", data[i]);\n              mpz_trace (\"   z    \", z);\n              d_trace   (\"   got  \", got);\n              printf    (\"   got exp  %ld\\n\", got_exp);\n              abort();\n            }\n\n          mpz_neg (z, z);\n          got = mpz_get_d_2exp (&got_exp, z);\n          if (got <= -1.0 || got > -0.5)\n            {\n              printf    (\"mpz_get_d_2exp wrong on -2**%lu-1\\n\", data[i]);\n              printf    (\"result out of range, expect -1.0 < got <= -0.5\\n\");\n              printf    (\"   rnd_mode = %d\\n\", rnd_mode);\n              printf    (\"   data[i]  = %lu\\n\", data[i]);\n              mpz_trace (\"   z    \", z);\n              d_trace   (\"   got  \", got);\n              printf    (\"   got exp  %ld\\n\", got_exp);\n              abort();\n            }\n        }\n    }\n\n  mpz_clear (z);\n  tests_hardware_setround (old_rnd_mode);\n}\n\nstatic void\ncheck_rand (void)\n{\n  gmp_randstate_t rands;\n  int     i;\n  mpz_t   z;\n  double  got;\n  mpir_si    got_exp;\n  unsigned long  bits;\n\n  mpz_init (z);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < 200; i++)\n    {\n      bits = gmp_urandomm_ui (rands, 512L);\n      mpz_urandomb (z, rands, bits);\n\n      got = mpz_get_d_2exp (&got_exp, z);\n      if (mpz_sgn (z) == 0)\n        continue;\n      bits = mpz_sizeinbase (z, 2);\n\n      if (got < 0.5 || got >= 1.0)\n        {\n          printf    (\"mpz_get_d_2exp out of range, expect 0.5 <= got < 1.0\\n\");\n          mpz_trace (\"   z    \", z);\n          d_trace   (\"   got  \", got);\n          printf    (\"   got exp  %ld\\n\", got_exp);\n          abort();\n        }\n\n      /* FIXME: If mpz_get_d_2exp rounds upwards we might have got_exp ==\n         bits+1, so leave this test disabled until we decide if that's what\n         should happen, or not.  */\n#if 0\n      if (got_exp != bits)\n        {\n          printf    (\"mpz_get_d_2exp wrong exponent\\n\", i);\n          mpz_trace (\"   z    \", z);\n          d_trace   (\"   bits \", bits);\n          d_trace   (\"   got  \", got);\n          printf    (\"   got exp  %ld\\n\", got_exp);\n          abort();\n        }\n#endif\n    }\n  mpz_clear (z);\n  gmp_randclear(rands);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n  mp_trace_base = -16;\n\n  check_onebit ();\n  check_round ();\n  check_rand ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-get_si.c",
    "content": "/* Exercise mpz_get_si.\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char  *n;\n    mpir_si        want;\n  } data[] = {\n    { \"0\",      0L },\n    { \"1\",      1L },\n    { \"-1\",     -1L },\n    { \"2\",      2L },\n    { \"-2\",     -2L },\n    { \"12345\",  12345L },\n    { \"-12345\", -12345L },\n  };\n\n  int    i;\n  mpz_t  n;\n  mpir_si   got;\n\n  mpz_init (n);\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (n, data[i].n, 0);\n\n      got = mpz_get_si (n);\n      if (got != data[i].want)\n        {\n          printf (\"mpz_get_si wrong at data[%d]\\n\", i); \n          printf (\"   n     \\\"%s\\\" (\", data[i].n);\n          mpz_out_str (stdout, 10, n); printf (\", hex \");\n          mpz_out_str (stdout, 16, n); printf (\")\\n\");\n          printf (\"   got   %ld (0x%lX)\\n\", got, got);\n          printf (\"   want  %ld (0x%lX)\\n\", data[i].want, data[i].want);\n          abort();                                    \n        }\n    }\n  mpz_clear (n);\n}\n\n\nvoid\ncheck_max (void)\n{\n  mpz_t  n;\n  long   want;\n  long   got;\n\n  mpz_init (n);\n\n#define CHECK_MAX(name)                                 \\\n  if (got != want)                                      \\\n    {                                                   \\\n      printf (\"mpz_get_si wrong on %s\\n\", name);        \\\n      printf (\"   n    \");                              \\\n      mpz_out_str (stdout, 10, n); printf (\", hex \");   \\\n      mpz_out_str (stdout, 16, n); printf (\"\\n\");       \\\n      printf (\"   got  %ld, hex %lX\\n\", got, got);      \\\n      printf (\"   want %ld, hex %lX\\n\", want, want);    \\\n      abort();                                          \\\n    }\n\n  want = LONG_MAX;\n  mpz_set_si (n, want);\n  got = mpz_get_si (n);\n  CHECK_MAX (\"LONG_MAX\");\n\n  want = LONG_MIN;\n  mpz_set_si (n, want);\n  got = mpz_get_si (n);\n  CHECK_MAX (\"LONG_MIN\");\n\n  /* The following checks that -0x100000000 gives -0x80000000.  This doesn't\n     actually fit in a long and the result from mpz_get_si() is undefined,\n     but -0x80000000 is what comes out currently, and it should be that\n     value irrespective of the mp_limb_t size (long or long long).  */\n\n  want = LONG_MIN;\n  mpz_mul_2exp (n, n, 1);\n  CHECK_MAX (\"-0x100...00\");\n\n  mpz_clear (n);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n  check_max ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-get_sx.c",
    "content": "/* \n\nTest mpz_get_sx\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#if defined( _MSC_VER )\n#  if _MSC_VER < 1600\n#    define SKIP_TEST\n#  else\n#    include <stdint.h>\n#  endif\n#else\n#  include \"config.h\"\n#  ifdef HAVE_STDINT_H\n#    include <stdint.h>\n#  else\n#    define SKIP_TEST\n#  endif\n#endif\n\n#ifdef SKIP_TEST\n\nint\nmain (void)\n{\n  printf (\"(u)intmax_t not available - test skipped\\n\");\n  exit (0);\n}\n\n#else\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char  *n;\n    intmax_t     want;\n  } data[] = {\n    { \"0\",      0L },\n    { \"1\",      1L },\n    { \"-1\",     -1L },\n    { \"2\",      2L },\n    { \"-2\",     -2L },\n    { \"12345\",  12345L },\n    { \"-12345\", -12345L },\n  };\n\n  int    i;\n  mpz_t  n;\n  intmax_t got;\n\n  mpz_init (n);\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (n, data[i].n, 0);\n\n      got = mpz_get_sx (n);\n      if (got != data[i].want)\n        {\n          printf (\"mpz_get_sx wrong at data[%d]\\n\", i); \n          printf (\"   n     \\\"%s\\\" (\", data[i].n);\n          mpz_out_str (stdout, 10, n); printf (\", hex \");\n          mpz_out_str (stdout, 16, n); printf (\")\\n\");\n          printf (\"   got   %ld (0x%lX)\\n\", got, got);\n          printf (\"   want  %ld (0x%lX)\\n\", data[i].want, data[i].want);\n          abort();                                    \n        }\n    }\n  mpz_clear (n);\n}\n\n\nvoid\ncheck_max (void)\n{\n  mpz_t  n;\n  intmax_t want;\n  intmax_t   got;\n\n  mpz_init (n);\n\n#define CHECK_MAX(name)                                 \\\n  if (got != want)                                      \\\n    {                                                   \\\n      printf (\"mpz_get_sx wrong on %s\\n\", name);        \\\n      printf (\"   n    \");                              \\\n      mpz_out_str (stdout, 10, n); printf (\", hex \");   \\\n      mpz_out_str (stdout, 16, n); printf (\"\\n\");       \\\n      printf (\"   got  %ld, hex %lx\\n\", got, got);      \\\n      printf (\"   want %ld, hex %lx\\n\", want, want);    \\\n    }\n\n  want = INTMAX_MAX;\n  mpz_set_sx (n, want);\n  got = mpz_get_sx (n);\n  CHECK_MAX (\"INTMAX_MAX\");\n\n  want = INTMAX_MIN;\n  mpz_set_sx (n, want);\n  got = mpz_get_sx (n);\n  CHECK_MAX (\"INTMAX_MIN\");\n\n  /* The following checks that -0x100000000 gives -0x80000000.  This doesn't\n     actually fit in a long and the result from mpz_get_si() is undefined,\n     but -0x80000000 is what comes out currently, and it should be that\n     value irrespective of the mp_limb_t size (long or long long).  */\n\n  want = INTMAX_MIN;\n  mpz_mul_2exp (n, n, 1);\n  CHECK_MAX (\"-0x100...00\");\n\n  mpz_clear (n);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n  check_max ();\n\n  tests_end ();\n  exit (0);\n}\n\n#endif\n"
  },
  {
    "path": "tests/mpz/t-get_ux.c",
    "content": "/* \nTest mpz_get_ux (not much use but perhaps better than nothing)\n\nCopyright 2011, Brian Gladman\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#if defined( _MSC_VER )\n#  if _MSC_VER < 1600\n#    define SKIP_TEST\n#  else\n#    include <stdint.h>\n#  endif\n#else\n#  include \"config.h\"\n#  ifdef HAVE_STDINT_H\n#    include <stdint.h>\n#  else\n#    define SKIP_TEST\n#  endif\n#endif\n\n#ifdef SKIP_TEST\n\nint\nmain (void)\n{\n  printf (\"(u)intmax_t not available - test skipped\\n\");\n  exit (0);\n}\n\n#else\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define NLIMBS ((8 * SIZEOF_UINTMAX_T + GMP_NUMB_BITS  - 1) / GMP_NUMB_BITS)\n\nstatic const uintmax_t val[] =\n{\n    0, 1, 0xff, \n#if SIZEOF_UINTMAX_T >= 2\n    0x0100, 0x0101, 0xffff, \n#endif\n#if SIZEOF_UINTMAX_T >= 4     \n    0x10000, 0x10001, 0xffffffff, \n#endif\n#if SIZEOF_UINTMAX_T >= 8\n    0x100000000, 0x100000001, 0xffffffffffffffff,\n#endif\n    UINTMAX_MAX\n};\n\nvoid\ncheck_data (void)\n{   unsigned int i;\n    mpz_t   z;\n\n    mpz_init(z);\n\n    for( i = 0 ; i < sizeof(val) / sizeof(uintmax_t) ; ++i )\n    {\n        uintmax_t k = 0, n;\n\n        mpz_set_ux(z, val[i]);\n        n = mpz_get_ux(z);\n#if NLIMBS == 1\n        if(n && n != z->_mp_d[k++])\n        {\n            printf(\"mpz_get_ux() failed for data on item %d\\n\", i);\n            abort();\n        }\n#else\n        while(n)\n        {\n            if((n & GMP_NUMB_MASK) != z->_mp_d[k++])\n            {\n                printf(\"mpz_get_ux() failed for data on item %d\\n\", i);\n                abort();\n            }\n            n >>= GMP_NUMB_BITS;\n        }\n#endif\n        if(z->_mp_size != k)\n        {\n            printf(\"mpz_get_ux() failed for length on item %u (mpz size: %d, size: %lu)\\n\", i, z->_mp_size, k);\n            abort();\n        }\n        if(z->_mp_alloc < k)\n        {\n            printf(\"mpz_get_ux() failed for allocation on item %u (mpz alloc: %d, size: %lu)\\n\", i, z->_mp_alloc, k);\n            abort();\n        }\n    }\n    mpz_clear(z);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n\n#endif\n"
  },
  {
    "path": "tests/mpz/t-hamdist.c",
    "content": "/* Test mpz_hamdist.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_twobits (void)\n{\n  unsigned long  i, j, got, want;\n  mpz_t  x, y;\n\n  mpz_init (x);\n  mpz_init (y);\n  for (i = 0; i < 5 * GMP_NUMB_BITS; i++)\n    {\n      for (j = 0; j < 5 * GMP_NUMB_BITS; j++)\n        {\n          mpz_set_ui (x, 0L);\n          mpz_setbit (x, i);\n          mpz_set_ui (y, 0L);\n          mpz_setbit (y, j);\n\n          want = 2 * (i != j);\n          got = mpz_hamdist (x, y);\n          if (got != want)\n            {\n              printf    (\"mpz_hamdist wrong on 2 bits pos/pos\\n\");\n            wrong:\n              printf    (\"  i    %lu\\n\", i);\n              printf    (\"  j    %lu\\n\", j);\n              printf    (\"  got  %lu\\n\", got);\n              printf    (\"  want %lu\\n\", want);\n              mpz_trace (\"  x   \", x);\n              mpz_trace (\"  y   \", y);\n              abort();\n            }\n\n          mpz_neg (x, x);\n          mpz_neg (y, y);\n          want = ABS ((long) (i-j));\n          got = mpz_hamdist (x, y);\n          if (got != want)\n            {\n              printf    (\"mpz_hamdist wrong on 2 bits neg/neg\\n\");\n              goto wrong;\n            }\n        }\n\n    }\n  mpz_clear (x);\n  mpz_clear (y);\n}\n\n\nvoid\ncheck_rand (void)\n{\n  gmp_randstate_t  rands;\n  unsigned long  got, want;\n  int    i;\n  mpz_t  x, y;\n\n  mpz_init (x);\n  mpz_init (y);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < 2000; i++)\n    {\n      mpz_erandomb (x, rands, 6 * GMP_NUMB_BITS);\n      mpz_negrandom (x, rands);\n      mpz_mul_2exp (x, x, urandom(rands) % (4 * GMP_NUMB_BITS));\n\n      mpz_erandomb (y, rands, 6 * GMP_NUMB_BITS);\n      mpz_negrandom (y, rands);\n      mpz_mul_2exp (y, y, urandom(rands) % (4 * GMP_NUMB_BITS));\n\n      want = refmpz_hamdist (x, y);\n      got = mpz_hamdist (x, y);\n      if (got != want)\n        {\n          printf    (\"mpz_hamdist wrong on random\\n\");\n          printf    (\"  got  %lu\\n\", got);\n          printf    (\"  want %lu\\n\", want);\n          mpz_trace (\"  x   \", x);\n          mpz_trace (\"  y   \", y);\n          abort();\n        }\n    }\n  mpz_clear (x);\n  mpz_clear (y);\n  gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n  mp_trace_base = -16;\n\n  check_twobits ();\n  check_rand ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-import.c",
    "content": "/* Test mpz_import.\n\nCopyright 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char  *want;\n    size_t      count;\n    int         order;\n    size_t      size;\n    int         endian;\n    int         nail;\n    char        src[64];\n\n  } data[] = {\n\n    { \"0\", 0,1, 1,1, 0 },\n    { \"0\", 1,1, 0,1, 0 },\n\n    { \"0x12345678\", 4,1,  1,1, 0, { '\\22', '\\64', '\\126', '\\170' } },\n    { \"0x12345678\", 1,1,  4,1, 0, { '\\22', '\\64', '\\126', '\\170' } },\n    { \"0x12345678\", 1,-1, 4,1, 0, { '\\22', '\\64', '\\126', '\\170' } },\n\n    { \"0x12345678\", 4,-1, 1,-1, 0, { '\\170', '\\126', '\\064', '\\22' } },\n    { \"0x12345678\", 1,1,  4,-1, 0, { '\\170', '\\126', '\\064', '\\22' } },\n    { \"0x12345678\", 1,-1, 4,-1, 0, { '\\170', '\\126', '\\064', '\\22' } },\n\n    { \"0\",    5,1,  1,1, 7, { '\\376', '\\376', '\\376', '\\376', '\\376' } },\n    { \"0\",    5,-1, 1,1, 7, { '\\376', '\\376', '\\376', '\\376', '\\376' } },\n    { \"0x15\", 5,1,  1,1, 7, { '\\377', '\\376', '\\377', '\\376', '\\377' } },\n\n    { \"0\",    3,1,  2,1,   1, { '\\200','\\000', '\\200','\\000', '\\200','\\000' }},\n    { \"0\",    3,1,  2,-1,  1, { '\\000','\\200', '\\000','\\200', '\\000','\\200' }},\n    { \"0\",    3,1,  2,1,  15, { '\\377','\\376', '\\377','\\376', '\\377','\\376' }},\n\n    { \"0x2A\", 3,1,  2,1, 14, { '\\377','\\376', '\\377','\\376', '\\377','\\376' } },\n    { \"0x06\", 3,1,  2,1, 14, { '\\377','\\374', '\\377','\\375', '\\377','\\376' } },\n    { \"0x24\", 3,-1, 2,1, 14, { '\\377','\\374', '\\377','\\375', '\\377','\\376' } },\n\n    { \"0x123456789ABC\", 3,1,  2,1,  0, {\n        '\\022','\\064', '\\126','\\170', '\\232','\\274' } },\n    { \"0x123456789ABC\", 3,-1, 2,1,  0, {\n        '\\232','\\274', '\\126','\\170', '\\022','\\064' } },\n    { \"0x123456789ABC\", 3,1,  2,-1, 0, {\n        '\\064','\\022', '\\170','\\126', '\\274','\\232' } },\n    { \"0x123456789ABC\", 3,-1, 2,-1, 0, {\n        '\\274','\\232', '\\170','\\126', '\\064','\\022' } },\n\n    { \"0x112233445566778899AABBCC\", 3,1,  4,1,  0,\n      { '\\021','\\042','\\063','\\104',\n        '\\125','\\146','\\167','\\210',\n        '\\231','\\252','\\273','\\314' } },\n    { \"0x112233445566778899AABBCC\", 3,-1, 4,1,  0,\n      { '\\231','\\252','\\273','\\314',\n        '\\125','\\146','\\167','\\210',\n        '\\021','\\042','\\063','\\104' } },\n    { \"0x112233445566778899AABBCC\", 3,1,  4,-1, 0,\n      { '\\104','\\063','\\042','\\021',\n        '\\210','\\167','\\146','\\125',\n        '\\314','\\273','\\252','\\231' } },\n    { \"0x112233445566778899AABBCC\", 3,-1, 4,-1, 0,\n      { '\\314','\\273','\\252','\\231',\n        '\\210','\\167','\\146','\\125',\n        '\\104','\\063','\\042','\\021' } },\n\n    { \"0x100120023003400450056006700780089009A00AB00BC00C\", 3,1,  8,1,  0,\n      { '\\020','\\001','\\040','\\002','\\060','\\003','\\100','\\004',\n        '\\120','\\005','\\140','\\006','\\160','\\007','\\200','\\010',\n        '\\220','\\011','\\240','\\012','\\260','\\013','\\300','\\014' } },\n    { \"0x100120023003400450056006700780089009A00AB00BC00C\", 3,-1, 8,1,  0,\n      { '\\220','\\011','\\240','\\012','\\260','\\013','\\300','\\014',\n        '\\120','\\005','\\140','\\006','\\160','\\007','\\200','\\010',\n        '\\020','\\001','\\040','\\002','\\060','\\003','\\100','\\004' } },\n    { \"0x100120023003400450056006700780089009A00AB00BC00C\", 3,1,  8,-1, 0,\n      { '\\004','\\100','\\003','\\060','\\002','\\040','\\001','\\020',\n        '\\010','\\200','\\007','\\160','\\006','\\140','\\005','\\120',\n        '\\014','\\300','\\013','\\260','\\012','\\240','\\011','\\220' } },\n    { \"0x100120023003400450056006700780089009A00AB00BC00C\", 3,-1, 8,-1, 0,\n      { '\\014','\\300','\\013','\\260','\\012','\\240','\\011','\\220',\n        '\\010','\\200','\\007','\\160','\\006','\\140','\\005','\\120',\n        '\\004','\\100','\\003','\\060','\\002','\\040','\\001','\\020' } },\n\n    { \"0x155555555555555555555555\", 3,1,  4,1,  1,\n      { '\\325','\\125','\\125','\\125',\n        '\\252','\\252','\\252','\\252',\n        '\\325','\\125','\\125','\\125' } },\n    { \"0x155555555555555555555555\", 3,-1,  4,1,  1,\n      { '\\325','\\125','\\125','\\125',\n        '\\252','\\252','\\252','\\252',\n        '\\325','\\125','\\125','\\125' } },\n    { \"0x155555555555555555555555\", 3,1,  4,-1,  1,\n      { '\\125','\\125','\\125','\\325',\n        '\\252','\\252','\\252','\\252',\n        '\\125','\\125','\\125','\\325' } },\n    { \"0x155555555555555555555555\", 3,-1,  4,-1,  1,\n      { '\\125','\\125','\\125','\\325',\n        '\\252','\\252','\\252','\\252',\n        '\\125','\\125','\\125','\\325' } },\n  };\n\n  char    buf[sizeof(data[0].src) + sizeof (mp_limb_t)];\n  char    *src;\n  size_t  align;\n  int     i;\n  mpz_t   got, want;\n\n  mpz_init (got);\n  mpz_init (want);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      for (align = 0; align < sizeof (mp_limb_t); align++)\n        {\n          mpz_set_str_or_abort (want, data[i].want, 0);\n          src = buf + align;\n          memcpy (src, data[i].src, data[i].count * data[i].size);\n\n          mpz_set_ui (got, 0L);\n          mpz_import (got, data[i].count, data[i].order,\n                      data[i].size, data[i].endian, data[i].nail, src);\n\n          MPZ_CHECK_FORMAT (got);\n          if (mpz_cmp (got, want) != 0)\n            {\n              printf (\"wrong at data[%d]\\n\", i);\n              printf (\"    count=%lu order=%d  size=%lu endian=%d nail=%d  align=%lu\\n\",\n                      data[i].count, data[i].order,\n                      data[i].size, data[i].endian, data[i].nail,\n                      align);\n              mpz_trace (\"    got \", got);\n              mpz_trace (\"    want\", want);\n              abort ();\n            }\n        }\n    }\n  mpz_clear (got);\n  mpz_clear (want);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n\n  mp_trace_base = -16;\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-inp_str.c",
    "content": "/* Test mpz_inp_str.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#if HAVE_UNISTD_H\n#include <unistd.h>\t\t/* for unlink */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n#define FILENAME  \"t-inp_str.tmp\"\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char  *inp;\n    int         base;\n    const char  *want;\n    int         want_nread;\n\n  } data[] = {\n\n    { \"0\",   10, \"0\", 1 },\n\n    { \"abc\", 10, \"0\", 0 },\n    { \"ghi\", 16, \"0\", 0 },\n\n    {  \"ff\", 16,  \"255\", 2 },\n    { \"-ff\", 16, \"-255\", 3 },\n    {  \"FF\", 16,  \"255\", 2 },\n    { \"-FF\", 16, \"-255\", 3 },\n\n    { \"z\", 36, \"35\", 1 },\n    { \"Z\", 36, \"35\", 1 },\n\n    {  \"0x0\",    0,   \"0\", 3 },\n    {  \"0x10\",   0,  \"16\", 4 },\n    { \"-0x0\",    0,   \"0\", 4 },\n    { \"-0x10\",   0, \"-16\", 5 },\n\n    {  \"00\",   0,  \"0\", 2 },\n    {  \"010\",  0,  \"8\", 3 },\n    { \"-00\",   0,  \"0\", 3 },\n    { \"-010\",  0, \"-8\", 4 },\n\n    {  \"0x\",     0,   \"0\", 2 },\n    {  \"0\",      0,   \"0\", 1 },\n  };\n\n  mpz_t  got, want;\n  long   ftell_nread;\n  int    i, pre, post, j, got_nread, want_nread;\n  FILE   *fp;\n\n  mpz_init (got);\n  mpz_init (want);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      for (pre = 0; pre <= 3; pre++)\n        {\n          for (post = 0; post <= 2; post++)\n            {\n              mpz_set_str_or_abort (want, data[i].want, 0);\n              MPZ_CHECK_FORMAT (want);\n\n              /* create the file new each time to ensure its length is what\n                 we want */\n              fp = fopen (FILENAME, \"w+\");\n              ASSERT_ALWAYS (fp != NULL);\n              for (j = 0; j < pre; j++)\n                putc (' ', fp);\n              fputs (data[i].inp, fp);\n              for (j = 0; j < post; j++)\n                putc (' ', fp);\n              fflush (fp);\n              ASSERT_ALWAYS (! ferror(fp));\n\n              rewind (fp);\n              got_nread = mpz_inp_str (got, fp, data[i].base);\n\n              if (got_nread != 0)\n                {\n                  ftell_nread = ftell (fp);\n                  if (got_nread != ftell_nread)\n                    {\n                      printf (\"mpz_inp_str nread wrong\\n\");\n                      printf (\"  inp          \\\"%s\\\"\\n\", data[i].inp);\n                      printf (\"  base         %d\\n\", data[i].base);\n                      printf (\"  pre          %d\\n\", pre);\n                      printf (\"  post         %d\\n\", post);\n                      printf (\"  got_nread    %d\\n\", got_nread);\n                      printf (\"  ftell_nread  %ld\\n\", ftell_nread);\n                      abort ();\n                    }\n                }\n\n              /* if data[i].inp is a whole string to read and there's no post\n                 whitespace then expect to have EOF */\n              if (post == 0 && data[i].want_nread == strlen(data[i].inp))\n                {\n                  int  c = getc(fp);\n                  if (c != EOF)\n                    {\n                      printf (\"mpz_inp_str didn't read to EOF\\n\");\n                      printf (\"  inp   \\\"%s\\\"\\n\", data[i].inp);\n                      printf (\"  base  %d\\n\", data[i].base);\n                      printf (\"  pre   %d\\n\", pre);\n                      printf (\"  post  %d\\n\", post);\n                      printf (\"  c     '%c' %#x\\n\", c, c);\n                      abort ();\n                    }\n                }\n\n              /* only expect \"pre\" included in the count when non-zero */\n              want_nread = data[i].want_nread;\n              if (want_nread != 0)\n                want_nread += pre;\n\n              if (got_nread != want_nread)\n                {\n                  printf (\"mpz_inp_str nread wrong\\n\");\n                  printf (\"  inp         \\\"%s\\\"\\n\", data[i].inp);\n                  printf (\"  base        %d\\n\", data[i].base);\n                  printf (\"  pre         %d\\n\", pre);\n                  printf (\"  post        %d\\n\", post);\n                  printf (\"  got_nread   %d\\n\", got_nread);\n                  printf (\"  want_nread  %d\\n\", want_nread);\n                  abort ();\n                }\n\n              MPZ_CHECK_FORMAT (got);\n      \n              if (mpz_cmp (got, want) != 0)\n                {\n                  printf (\"mpz_inp_str wrong result\\n\");\n                  printf (\"  inp   \\\"%s\\\"\\n\", data[i].inp);\n                  printf (\"  base  %d\\n\", data[i].base);\n                  mpz_trace (\"  got \",  got);\n                  mpz_trace (\"  want\", want);\n                  abort ();\n                }\n\n              ASSERT_ALWAYS (fclose (fp) == 0);\n            }\n        }\n    }\n\n  mpz_clear (got);\n  mpz_clear (want);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n\n  unlink (FILENAME);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-io_raw.c",
    "content": "/* Test mpz_inp_raw and mpz_out_raw.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#if HAVE_UNISTD_H\n#include <unistd.h>\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define FILENAME  \"t-io_raw.tmp\"\n\n\n/* In the fopen, \"b\" selects binary mode on DOS systems, meaning no\n   conversion of '\\n' to and from CRLF.  It's believed systems without such\n   nonsense will simply ignore the \"b\", but in case that's not so a plain\n   \"w+\" is attempted if \"w+b\" fails.  */\n\nFILE *\nfopen_wplusb_or_die (const char *filename)\n{\n  FILE  *fp;\n  fp = fopen (filename, \"w+b\");\n  if (fp == NULL)\n    fp = fopen (filename, \"w+\");\n\n  if (fp == NULL)\n    {\n      printf (\"Cannot create file %s\\n\", filename);\n      abort ();\n    }\n  return fp;\n}\n\n/* use 0x80 to check nothing bad happens with sign extension etc */\n#define BYTEVAL(i)  (((i) + 1) | 0x80)\n\nvoid\ncheck_in (void)\n{\n  int        i, j, zeros, neg, error = 0;\n  mpz_t      want, got;\n  size_t     want_ret, got_ret;\n  mp_size_t  size;\n  FILE       *fp;\n\n  mpz_init (want);\n  mpz_init (got);\n\n  for (i = 0; i < 32; i++)\n    {\n      for (zeros = 0; zeros < 8; zeros++)\n        {\n          for (neg = 0; neg <= 1; neg++)\n            {\n              want_ret = i + zeros + 4;\n\n              /* need this to get the twos complement right */\n              ASSERT_ALWAYS (sizeof (size) >= 4);\n\n              size = i + zeros;\n              if (neg)\n                size = -size;\n\n              fp = fopen_wplusb_or_die (FILENAME);\n              for (j = 3; j >= 0; j--)\n                ASSERT_ALWAYS (putc ((size >> (j*8)) & 0xFF, fp) != EOF);\n              for (j = 0; j < zeros; j++)\n                ASSERT_ALWAYS (putc ('\\0', fp) != EOF);\n              for (j = 0; j < i; j++)\n                ASSERT_ALWAYS (putc (BYTEVAL (j), fp) != EOF);\n              /* and some trailing garbage */\n              ASSERT_ALWAYS (putc ('x', fp) != EOF);\n              ASSERT_ALWAYS (putc ('y', fp) != EOF);\n              ASSERT_ALWAYS (putc ('z', fp) != EOF);\n              ASSERT_ALWAYS (fflush (fp) == 0);\n              rewind (fp);\n\n              got_ret = mpz_inp_raw (got, fp);\n              ASSERT_ALWAYS (! ferror(fp));\n              ASSERT_ALWAYS (fclose (fp) == 0);\n\n              MPZ_CHECK_FORMAT (got);\n\n              if (got_ret != want_ret)\n                {\n                  printf (\"check_in: return value wrong\\n\");\n                  error = 1;\n                }\n              if (mpz_cmp (got, want) != 0)\n                {\n                  printf (\"check_in: result wrong\\n\");\n                  error = 1;\n                }\n              if (error)\n                {\n                  printf    (\"  i=%d zeros=%d neg=%d\\n\", i, zeros, neg);\n                  printf    (\"  got_ret  %lu\\n\", got_ret);\n                  printf    (\"  want_ret %lu\\n\", want_ret);\n                  mpz_trace (\"  got      \", got);\n                  mpz_trace (\"  want     \", want);\n                  abort ();\n                }\n\n              mpz_neg (want, want);\n            }\n        }\n      mpz_mul_2exp (want, want, 8);\n      mpz_add_ui (want, want, (unsigned long) BYTEVAL (i));\n    }\n\n  mpz_clear (want);\n  mpz_clear (got);\n}\n\n\nvoid\ncheck_out (void)\n{\n  int        i, j, neg, error = 0;\n  mpz_t      z;\n  char       want[256], got[256], *p;\n  size_t     want_len, got_ret, got_read;\n  mp_size_t  size;\n  FILE       *fp;\n\n  mpz_init (z);\n\n  for (i = 0; i < 32; i++)\n    {\n      for (neg = 0; neg <= 1; neg++)\n        {\n          want_len = i + 4;\n\n          /* need this to get the twos complement right */\n          ASSERT_ALWAYS (sizeof (size) >= 4);\n\n          size = i;\n          if (neg)\n            size = -size;\n\n          p = want;\n          for (j = 3; j >= 0; j--)\n            *p++ = size >> (j*8);\n          for (j = 0; j < i; j++)\n            *p++ = BYTEVAL (j);\n          ASSERT_ALWAYS (p <= want + sizeof (want));\n\n          fp = fopen_wplusb_or_die (FILENAME);\n          got_ret = mpz_out_raw (fp, z);\n          ASSERT_ALWAYS (fflush (fp) == 0);\n          rewind (fp);\n          got_read = fread (got, 1, sizeof(got), fp);\n          ASSERT_ALWAYS (! ferror(fp));\n          ASSERT_ALWAYS (fclose (fp) == 0);\n\n          if (got_ret != want_len)\n            {\n              printf (\"check_out: wrong return value\\n\");\n              error = 1;\n            }\n          if (got_read != want_len)\n            {\n              printf (\"check_out: wrong number of bytes read back\\n\");\n              error = 1;\n            }\n          if (memcmp (want, got, want_len) != 0)\n            {\n              printf (\"check_out: wrong data\\n\");\n              error = 1;\n            }\n          if (error)\n            {\n              printf    (\"  i=%d neg=%d\\n\", i, neg);\n              mpz_trace (\"  z\", z);\n              printf    (\"  got_ret  %lu\\n\", got_ret);\n              printf    (\"  got_read %lu\\n\", got_read);\n              printf    (\"  want_len %lu\\n\", want_len);\n              printf    (\"  want\");\n              for (j = 0; j < want_len; j++)\n                printf (\" %02X\", (unsigned) (unsigned char) want[j]);\n              printf    (\"\\n\");\n              printf    (\"  got \");\n              for (j = 0; j < want_len; j++)\n                printf (\" %02X\", (unsigned) (unsigned char) got[j]);\n              printf    (\"\\n\");\n              abort ();\n            }\n\n          mpz_neg (z, z);\n        }\n      mpz_mul_2exp (z, z, 8);\n      mpz_add_ui (z, z, (unsigned long) BYTEVAL (i));\n    }\n\n  mpz_clear (z);\n}\n\n\nvoid\ncheck_rand (void)\n{\n  gmp_randstate_t  rands;\n  int        i, error = 0;\n  mpz_t      got, want;\n  size_t     inp_ret, out_ret;\n  FILE       *fp;\n\n  mpz_init (want);\n  mpz_init (got);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < 500; i++)\n    {\n      mpz_erandomb (want, rands, 10*BITS_PER_MP_LIMB);\n      mpz_negrandom (want, rands);\n\n      fp = fopen_wplusb_or_die (FILENAME);\n      out_ret = mpz_out_raw (fp, want);\n      ASSERT_ALWAYS (fflush (fp) == 0);\n      rewind (fp);\n      inp_ret = mpz_inp_raw (got, fp);\n      ASSERT_ALWAYS (fclose (fp) == 0);\n\n      MPZ_CHECK_FORMAT (got);\n\n      if (inp_ret != out_ret)\n        {\n          printf (\"check_rand: different inp/out return values\\n\");\n          error = 1;\n        }\n      if (mpz_cmp (got, want) != 0)\n        {\n          printf (\"check_rand: wrong result\\n\");\n          error = 1;\n        }\n      if (error)\n        {\n          printf    (\"  out_ret %lu\\n\", out_ret);\n          printf    (\"  inp_ret %lu\\n\", inp_ret);\n          mpz_trace (\"  want\", want);\n          mpz_trace (\"  got \", got);\n          abort ();\n        }\n    }\n\n  mpz_clear (got);\n  mpz_clear (want);\n  gmp_randclear(rands);\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n  mp_trace_base = -16;\n\n  check_in ();\n  check_out ();\n  check_rand ();\n\n  unlink (FILENAME);\n  tests_end ();\n\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-jac.c",
    "content": "/* Exercise mpz_*_kronecker_*() and mpz_jacobi() functions.\n\nCopyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n/* With no arguments the various Kronecker/Jacobi symbol routines are\n   checked against some test data and a lot of derived data.\n\n   To check the test data against PARI-GP, run\n\n           t-jac -p | gp -q\n\n   It takes a while because the output from \"t-jac -p\" is big.\n\n\n   Enhancements:\n\n   More big test cases than those given by check_squares_zi would be good.  */\n\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n#ifdef _LONG_LONG_LIMB\n#define LL(l,ll)  ll\n#else\n#define LL(l,ll)  l\n#endif\n\n\nint option_pari = 0;\n\n\nunsigned long\nmpz_mod4 (mpz_srcptr z)\n{\n  mpz_t          m;\n  unsigned long  ret;\n\n  mpz_init (m);\n  mpz_fdiv_r_2exp (m, z, 2);\n  ret = mpz_get_ui (m);\n  mpz_clear (m);\n  return ret;\n}\n\nint\nmpz_fits_ulimb_p (mpz_srcptr z)\n{\n  return (SIZ(z) == 1 || SIZ(z) == 0);\n}\n\nmp_limb_t\nmpz_get_ulimb (mpz_srcptr z)\n{\n  if (SIZ(z) == 0)\n    return 0;\n  else\n    return PTR(z)[0];\n}\n\n\nvoid\ntry_base (mp_limb_t a, mp_limb_t b, int answer)\n{\n  int  got;\n\n  if ((b & 1) == 0 || b == 1 || a > b)\n    return;\n\n  got = mpn_jacobi_base (a, b, 0);\n  if (got != answer)\n    {\n      printf (LL(\"mpn_jacobi_base (%lu, %lu) is %d should be %d\\n\",\n                 \"mpn_jacobi_base (%llu, %llu) is %d should be %d\\n\"),\n              a, b, got, answer);\n      abort ();\n    }\n}\n\n\nvoid\ntry_zi_ui (mpz_srcptr a, unsigned long b, int answer)\n{\n  int  got;\n\n  got = mpz_kronecker_ui (a, b);\n  if (got != answer)\n    {\n      printf (\"mpz_kronecker_ui (\");\n      mpz_out_str (stdout, 10, a);\n      printf (\", %lu) is %d should be %d\\n\", b, got, answer);\n      abort ();\n    }\n}\n\n\nvoid\ntry_zi_si (mpz_srcptr a, long b, int answer)\n{\n  int  got;\n\n  got = mpz_kronecker_si (a, b);\n  if (got != answer)\n    {\n      printf (\"mpz_kronecker_si (\");\n      mpz_out_str (stdout, 10, a);\n      printf (\", %ld) is %d should be %d\\n\", b, got, answer);\n      abort ();\n    }\n}\n\n\nvoid\ntry_ui_zi (unsigned long a, mpz_srcptr b, int answer)\n{\n  int  got;\n\n  got = mpz_ui_kronecker (a, b);\n  if (got != answer)\n    {\n      printf (\"mpz_ui_kronecker (%lu, \", a);\n      mpz_out_str (stdout, 10, b);\n      printf (\") is %d should be %d\\n\", got, answer);\n      abort ();\n    }\n}\n\n\nvoid\ntry_si_zi (long a, mpz_srcptr b, int answer)\n{\n  int  got;\n\n  got = mpz_si_kronecker (a, b);\n  if (got != answer)\n    {\n      printf (\"mpz_si_kronecker (%ld, \", a);\n      mpz_out_str (stdout, 10, b);\n      printf (\") is %d should be %d\\n\", got, answer);\n      abort ();\n    }\n}\n\n\n/* Don't bother checking mpz_jacobi, since it only differs for b even, and\n   we don't have an actual expected answer for it.  tests/devel/try.c does\n   some checks though.  */\nvoid\ntry_zi_zi (mpz_srcptr a, mpz_srcptr b, int answer)\n{\n  int  got;\n\n  got = mpz_kronecker (a, b);\n  if (got != answer)\n    {\n      printf (\"mpz_kronecker (\");\n      mpz_out_str (stdout, 10, a); \n      printf (\", \");\n      mpz_out_str (stdout, 10, b);\n      printf (\") is %d should be %d\\n\", got, answer);\n      abort ();\n    }\n}\n\n\nvoid\ntry_pari (mpz_srcptr a, mpz_srcptr b, int answer)\n{\n  printf (\"try(\");\n  mpz_out_str (stdout, 10, a); \n  printf (\",\");\n  mpz_out_str (stdout, 10, b); \n  printf (\",%d)\\n\", answer);\n}\n\n\nvoid\ntry_each (mpz_srcptr a, mpz_srcptr b, int answer)\n{\n  if (option_pari)\n    {\n      try_pari (a, b, answer);\n      return;\n    }\n\n  if (mpz_fits_ulimb_p (a) && mpz_fits_ulimb_p (b))\n    try_base (mpz_get_ulimb (a), mpz_get_ulimb (b), answer);\n\n  if (mpz_fits_ulong_p (b))\n    try_zi_ui (a, mpz_get_ui (b), answer);\n\n  if (mpz_fits_slong_p (b))\n    try_zi_si (a, mpz_get_si (b), answer);\n\n  if (mpz_fits_ulong_p (a))\n    try_ui_zi (mpz_get_ui (a), b, answer);\n\n  if (mpz_fits_sint_p (a))\n    try_si_zi (mpz_get_si (a), b, answer);\n\n  try_zi_zi (a, b, answer);\n}        \n\n\n/* Try (a/b) and (a/-b). */\nvoid\ntry_pn (mpz_srcptr a, mpz_srcptr b_orig, int answer)\n{\n  mpz_t  b;\n\n  mpz_init_set (b, b_orig);\n  try_each (a, b, answer);\n\n  mpz_neg (b, b);\n  if (mpz_sgn (a) < 0)\n    answer = -answer;\n\n  try_each (a, b, answer);\n\n  mpz_clear (b);\n}\n\n\n/* Try (a+k*p/b) for various k, using the fact (a/b) is periodic in a with\n   period p.  For b>0, p=b if b!=2mod4 or p=4*b if b==2mod4. */\n\nvoid\ntry_periodic_num (mpz_srcptr a_orig, mpz_srcptr b, int answer)\n{\n  mpz_t  a, a_period;\n  int    i;\n\n  if (mpz_sgn (b) <= 0)\n    return;\n\n  mpz_init_set (a, a_orig);\n  mpz_init_set (a_period, b);\n  if (mpz_mod4 (b) == 2)\n    mpz_mul_ui (a_period, a_period, 4);\n\n  /* don't bother with these tests if they're only going to produce\n     even/even */\n  if (mpz_even_p (a) && mpz_even_p (b) && mpz_even_p (a_period))\n    goto done;\n\n  for (i = 0; i < 6; i++)\n    {\n      mpz_add (a, a, a_period);\n      try_pn (a, b, answer);\n    }\n\n  mpz_set (a, a_orig);\n  for (i = 0; i < 6; i++)\n    {\n      mpz_sub (a, a, a_period);\n      try_pn (a, b, answer);\n    }\n\n done:\n  mpz_clear (a);\n  mpz_clear (a_period);\n}\n\n\n/* Try (a/b+k*p) for various k, using the fact (a/b) is periodic in b of\n   period p.\n\n                               period p\n           a==0,1mod4             a\n           a==2mod4              4*a\n           a==3mod4 and b odd    4*a\n           a==3mod4 and b even   8*a\n\n   In Henri Cohen's book the period is given as 4*a for all a==2,3mod4, but\n   a counterexample would seem to be (3/2)=-1 which with (3/14)=+1 doesn't\n   have period 4*a (but rather 8*a with (3/26)=-1).  Maybe the plain 4*a is\n   to be read as applying to a plain Jacobi symbol with b odd, rather than\n   the Kronecker extension to b even. */\n\nvoid\ntry_periodic_den (mpz_srcptr a, mpz_srcptr b_orig, int answer)\n{\n  mpz_t  b, b_period;\n  int    i;\n\n  if (mpz_sgn (a) == 0 || mpz_sgn (b_orig) == 0)\n    return;\n\n  mpz_init_set (b, b_orig);\n\n  mpz_init_set (b_period, a);\n  if (mpz_mod4 (a) == 3 && mpz_even_p (b))\n    mpz_mul_ui (b_period, b_period, 8L);\n  else if (mpz_mod4 (a) >= 2)\n    mpz_mul_ui (b_period, b_period, 4L);\n\n  /* don't bother with these tests if they're only going to produce\n     even/even */\n  if (mpz_even_p (a) && mpz_even_p (b) && mpz_even_p (b_period))\n    goto done;\n\n  for (i = 0; i < 6; i++)\n    {\n      mpz_add (b, b, b_period);\n      try_pn (a, b, answer);\n    }\n\n  mpz_set (b, b_orig);\n  for (i = 0; i < 6; i++)\n    {\n      mpz_sub (b, b, b_period);\n      try_pn (a, b, answer);\n    }\n\n done:\n  mpz_clear (b);\n  mpz_clear (b_period);\n}\n\n\nstatic const unsigned long  ktable[] = {\n  0, 1, 2, 3, 4, 5, 6, 7,\n  GMP_NUMB_BITS-1, GMP_NUMB_BITS, GMP_NUMB_BITS+1,\n  2*GMP_NUMB_BITS-1, 2*GMP_NUMB_BITS, 2*GMP_NUMB_BITS+1,\n  3*GMP_NUMB_BITS-1, 3*GMP_NUMB_BITS, 3*GMP_NUMB_BITS+1\n};\n\n\n/* Try (a/b*2^k) for various k. */\nvoid\ntry_2den (mpz_srcptr a, mpz_srcptr b_orig, int answer)\n{\n  mpz_t  b;\n  int    kindex;\n  int    answer_a2, answer_k;\n  unsigned long k;\n\n  /* don't bother when b==0 */\n  if (mpz_sgn (b_orig) == 0)\n    return;\n\n  mpz_init_set (b, b_orig);\n\n  /* (a/2) is 0 if a even, 1 if a==1 or 7 mod 8, -1 if a==3 or 5 mod 8 */\n  answer_a2 = (mpz_even_p (a) ? 0\n               : (((SIZ(a) >= 0 ? PTR(a)[0] : -PTR(a)[0]) + 2) & 7) < 4 ? 1\n               : -1);\n\n  for (kindex = 0; kindex < numberof (ktable); kindex++)\n    {\n      k = ktable[kindex];\n\n      /* answer_k = answer*(answer_a2^k) */\n      answer_k = (answer_a2 == 0 && k != 0 ? 0\n                  : (k & 1) == 1 && answer_a2 == -1 ? -answer\n                  : answer);\n\n      mpz_mul_2exp (b, b_orig, k);\n      try_pn (a, b, answer_k);\n    }\n\n  mpz_clear (b);\n}\n\n\n/* Try (a*2^k/b) for various k.  If it happens mpz_ui_kronecker() gets (2/b)\n   wrong it will show up as wrong answers demanded. */\nvoid\ntry_2num (mpz_srcptr a_orig, mpz_srcptr b, int answer)\n{\n  mpz_t  a;\n  int    kindex;\n  int    answer_2b, answer_k;\n  unsigned long  k;\n\n  /* don't bother when a==0 */\n  if (mpz_sgn (a_orig) == 0)\n    return;\n\n  mpz_init (a);\n\n  /* (2/b) is 0 if b even, 1 if b==1 or 7 mod 8, -1 if b==3 or 5 mod 8 */\n  answer_2b = (mpz_even_p (b) ? 0\n               : (((SIZ(b) >= 0 ? PTR(b)[0] : -PTR(b)[0]) + 2) & 7) < 4 ? 1\n               : -1);\n\n  for (kindex = 0; kindex < numberof (ktable); kindex++)\n    {\n      k = ktable[kindex];\n\n      /* answer_k = answer*(answer_2b^k) */\n      answer_k = (answer_2b == 0 && k != 0 ? 0\n                  : (k & 1) == 1 && answer_2b == -1 ? -answer\n                  : answer);\n\n        mpz_mul_2exp (a, a_orig, k);\n      try_pn (a, b, answer_k);\n    }\n\n  mpz_clear (a);\n}\n\n\n/* The try_2num() and try_2den() routines don't in turn call\n   try_periodic_num() and try_periodic_den() because it hugely increases the\n   number of tests performed, without obviously increasing coverage.\n\n   Useful extra derived cases can be added here. */\n\nvoid\ntry_all (mpz_t a, mpz_t b, int answer)\n{\n  try_pn (a, b, answer);\n  try_periodic_num (a, b, answer);\n  try_periodic_den (a, b, answer);\n  try_2num (a, b, answer);\n  try_2den (a, b, answer);\n}\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char  *a;\n    const char  *b;\n    int         answer;\n\n  } data[] = {\n\n    /* Note that the various derived checks in try_all() reduce the cases\n       that need to be given here.  */\n\n    /* some zeros */\n    {  \"0\",  \"0\", 0 },\n    {  \"0\",  \"2\", 0 },\n    {  \"0\",  \"6\", 0 },\n    {  \"5\",  \"0\", 0 },\n    { \"24\", \"60\", 0 },\n\n    /* (a/1) = 1, any a \n       In particular note (0/1)=1 so that (a/b)=(a mod b/b). */\n    { \"0\", \"1\", 1 },\n    { \"1\", \"1\", 1 },\n    { \"2\", \"1\", 1 },\n    { \"3\", \"1\", 1 },\n    { \"4\", \"1\", 1 },\n    { \"5\", \"1\", 1 },\n\n    /* (0/b) = 0, b != 1 */\n    { \"0\",  \"3\", 0 },\n    { \"0\",  \"5\", 0 },\n    { \"0\",  \"7\", 0 },\n    { \"0\",  \"9\", 0 },\n    { \"0\", \"11\", 0 },\n    { \"0\", \"13\", 0 },\n    { \"0\", \"15\", 0 },\n\n    /* (1/b) = 1 */\n    { \"1\",  \"1\", 1 },\n    { \"1\",  \"3\", 1 },\n    { \"1\",  \"5\", 1 },\n    { \"1\",  \"7\", 1 },\n    { \"1\",  \"9\", 1 },\n    { \"1\", \"11\", 1 },\n\n    /* (-1/b) = (-1)^((b-1)/2) which is -1 for b==3 mod 4 */\n    { \"-1\",  \"1\",  1 },\n    { \"-1\",  \"3\", -1 },\n    { \"-1\",  \"5\",  1 },\n    { \"-1\",  \"7\", -1 },\n    { \"-1\",  \"9\",  1 },\n    { \"-1\", \"11\", -1 },\n    { \"-1\", \"13\",  1 },\n    { \"-1\", \"15\", -1 },\n    { \"-1\", \"17\",  1 },\n    { \"-1\", \"19\", -1 },\n\n    /* (2/b) = (-1)^((b^2-1)/8) which is -1 for b==3,5 mod 8.\n       try_2num() will exercise multiple powers of 2 in the numerator.  */\n    { \"2\",  \"1\",  1 },\n    { \"2\",  \"3\", -1 },\n    { \"2\",  \"5\", -1 },\n    { \"2\",  \"7\",  1 },\n    { \"2\",  \"9\",  1 },\n    { \"2\", \"11\", -1 },\n    { \"2\", \"13\", -1 },\n    { \"2\", \"15\",  1 },\n    { \"2\", \"17\",  1 },\n\n    /* (-2/b) = (-1)^((b^2-1)/8)*(-1)^((b-1)/2) which is -1 for b==5,7mod8.\n       try_2num() will exercise multiple powers of 2 in the numerator, which\n       will test that the shift in mpz_si_kronecker() uses unsigned not\n       signed.  */\n    { \"-2\",  \"1\",  1 },\n    { \"-2\",  \"3\",  1 },\n    { \"-2\",  \"5\", -1 },\n    { \"-2\",  \"7\", -1 },\n    { \"-2\",  \"9\",  1 },\n    { \"-2\", \"11\",  1 },\n    { \"-2\", \"13\", -1 },\n    { \"-2\", \"15\", -1 },\n    { \"-2\", \"17\",  1 },\n\n    /* (a/2)=(2/a).\n       try_2den() will exercise multiple powers of 2 in the denominator. */\n    {  \"3\",  \"2\", -1 },\n    {  \"5\",  \"2\", -1 },\n    {  \"7\",  \"2\",  1 },\n    {  \"9\",  \"2\",  1 },\n    {  \"11\", \"2\", -1 },\n\n    /* Harriet Griffin, \"Elementary Theory of Numbers\", page 155, various\n       examples.  */\n    {   \"2\", \"135\",  1 },\n    { \"135\",  \"19\", -1 },\n    {   \"2\",  \"19\", -1 },\n    {  \"19\", \"135\",  1 },\n    { \"173\", \"135\",  1 },\n    {  \"38\", \"135\",  1 },\n    { \"135\", \"173\",  1 },\n    { \"173\",   \"5\", -1 },\n    {   \"3\",   \"5\", -1 },\n    {   \"5\", \"173\", -1 },\n    { \"173\",   \"3\", -1 },\n    {   \"2\",   \"3\", -1 },\n    {   \"3\", \"173\", -1 },\n    { \"253\",  \"21\",  1 },\n    {   \"1\",  \"21\",  1 },\n    {  \"21\", \"253\",  1 },\n    {  \"21\",  \"11\", -1 },\n    {  \"-1\",  \"11\", -1 },\n\n    /* Griffin page 147 */\n    {  \"-1\",  \"17\",  1 },\n    {   \"2\",  \"17\",  1 },\n    {  \"-2\",  \"17\",  1 },\n    {  \"-1\",  \"89\",  1 },\n    {   \"2\",  \"89\",  1 },\n\n    /* Griffin page 148 */\n    {  \"89\",  \"11\",  1 },\n    {   \"1\",  \"11\",  1 },\n    {  \"89\",   \"3\", -1 },\n    {   \"2\",   \"3\", -1 },\n    {   \"3\",  \"89\", -1 },\n    {  \"11\",  \"89\",  1 },\n    {  \"33\",  \"89\", -1 },\n\n    /* H. Davenport, \"The Higher Arithmetic\", page 65, the quadratic\n       residues and non-residues mod 19.  */\n    {  \"1\", \"19\",  1 },\n    {  \"4\", \"19\",  1 },\n    {  \"5\", \"19\",  1 },\n    {  \"6\", \"19\",  1 },\n    {  \"7\", \"19\",  1 },\n    {  \"9\", \"19\",  1 },\n    { \"11\", \"19\",  1 },\n    { \"16\", \"19\",  1 },\n    { \"17\", \"19\",  1 },\n    {  \"2\", \"19\", -1 },\n    {  \"3\", \"19\", -1 },\n    {  \"8\", \"19\", -1 },\n    { \"10\", \"19\", -1 },\n    { \"12\", \"19\", -1 },\n    { \"13\", \"19\", -1 },\n    { \"14\", \"19\", -1 },\n    { \"15\", \"19\", -1 },\n    { \"18\", \"19\", -1 },\n\n    /* Residues and non-residues mod 13 */\n    {  \"0\",  \"13\",  0 },\n    {  \"1\",  \"13\",  1 },\n    {  \"2\",  \"13\", -1 },\n    {  \"3\",  \"13\",  1 },\n    {  \"4\",  \"13\",  1 },\n    {  \"5\",  \"13\", -1 },\n    {  \"6\",  \"13\", -1 },\n    {  \"7\",  \"13\", -1 },\n    {  \"8\",  \"13\", -1 },\n    {  \"9\",  \"13\",  1 },\n    { \"10\",  \"13\",  1 },\n    { \"11\",  \"13\", -1 },\n    { \"12\",  \"13\",  1 },\n\n    /* various */\n    {  \"5\",   \"7\", -1 },\n    { \"15\",  \"17\",  1 },\n    { \"67\",  \"89\",  1 },\n\n    /* special values inducing a==b==1 at the end of jac_or_kron() */\n    { \"0x10000000000000000000000000000000000000000000000001\",\n      \"0x10000000000000000000000000000000000000000000000003\", 1 },\n  };\n\n  int    i;\n  mpz_t  a, b;\n\n  mpz_init (a);\n  mpz_init (b);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (a, data[i].a, 0);\n      mpz_set_str_or_abort (b, data[i].b, 0);\n      try_all (a, b, data[i].answer);\n    }\n\n  mpz_clear (a);\n  mpz_clear (b);\n}\n\n\n/* (a^2/b)=1 if gcd(a,b)=1, or (a^2/b)=0 if gcd(a,b)!=1.\n   This includes when a=0 or b=0. */\nvoid\ncheck_squares_zi (void)\n{\n  gmp_randstate_t rands;\n  mpz_t  a, b, g;\n  int    i, answer;\n  mp_size_t size_range, an, bn;\n  mpz_t bs;\n\n  mpz_init (bs);\n  mpz_init (a);\n  mpz_init (b);\n  mpz_init (g);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < 50; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 10 + 2;\n\n      mpz_urandomb (bs, rands, size_range);\n      an = mpz_get_ui (bs);\n      mpz_rrandomb (a, rands, an);\n\n      mpz_urandomb (bs, rands, size_range);\n      bn = mpz_get_ui (bs);\n      mpz_rrandomb (b, rands, bn);\n\n      mpz_gcd (g, a, b);\n      if (mpz_cmp_ui (g, 1L) == 0)\n\tanswer = 1;\n      else\n\tanswer = 0;\n\n      mpz_mul (a, a, a);\n\n      try_all (a, b, answer);\n    }\n\n  mpz_clear (bs);\n  mpz_clear (a);\n  mpz_clear (b);\n  mpz_clear (g);\n  gmp_randclear(rands);\n}\n\n\n/* Check the handling of asize==0, make sure it isn't affected by the low\n   limb. */\nvoid\ncheck_a_zero (void)\n{\n  mpz_t  a, b;\n\n  mpz_init_set_ui (a, 0);\n  mpz_init (b);\n\n  mpz_set_ui (b, 1L);\n  PTR(a)[0] = 0;\n  try_all (a, b, 1);   /* (0/1)=1 */\n  PTR(a)[0] = 1;\n  try_all (a, b, 1);   /* (0/1)=1 */\n\n  mpz_set_si (b, -1L);\n  PTR(a)[0] = 0;\n  try_all (a, b, 1);   /* (0/-1)=1 */\n  PTR(a)[0] = 1;\n  try_all (a, b, 1);   /* (0/-1)=1 */\n\n  mpz_set_ui (b, 0);\n  PTR(a)[0] = 0;\n  try_all (a, b, 0);   /* (0/0)=0 */\n  PTR(a)[0] = 1;\n  try_all (a, b, 0);   /* (0/0)=0 */\n\n  mpz_set_ui (b, 2);\n  PTR(a)[0] = 0;\n  try_all (a, b, 0);   /* (0/2)=0 */\n  PTR(a)[0] = 1;\n  try_all (a, b, 0);   /* (0/2)=0 */\n\n  mpz_clear (a);\n  mpz_clear (b);\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n\n  if (argc >= 2 && strcmp (argv[1], \"-p\") == 0)\n    {\n      option_pari = 1;\n      \n      printf (\"\\\ntry(a,b,answer) =\\n\\\n{\\n\\\n  if (kronecker(a,b) != answer,\\n\\\n    print(\\\"wrong at \\\", a, \\\",\\\", b,\\n\\\n      \\\" expected \\\", answer,\\n\\\n      \\\" pari says \\\", kronecker(a,b)))\\n\\\n}\\n\");\n    }\n\n  check_data ();\n  check_squares_zi ();\n  check_a_zero ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-lcm.c",
    "content": "/* Test mpz_lcm and mpz_lcm_ui.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_all (mpz_ptr want, mpz_srcptr x_orig, mpz_srcptr y_orig)\n{\n  mpz_t  got, x, y;\n  int    negx, negy, swap, inplace;\n\n  mpz_init (got);\n  mpz_init_set (x, x_orig);\n  mpz_init_set (y, y_orig);\n\n  for (swap = 0; swap < 2; swap++)\n    {\n      mpz_swap (x, y);\n\n      for (negx = 0; negx < 2; negx++)\n        {\n          mpz_neg (x, x);\n\n          for (negy = 0; negy < 2; negy++)\n            {\n              mpz_neg (y, y);\n\n              for (inplace = 0; inplace <= 1; inplace++)\n                {\n                  if (inplace)\n                    { mpz_set (got, x); mpz_lcm (got, got, y); }\n                  else\n                    mpz_lcm (got, x, y);\n                  MPZ_CHECK_FORMAT (got);\n\n                  if (mpz_cmp (got, want) != 0)\n                    {\n                      printf (\"mpz_lcm wrong, inplace=%d\\n\", inplace);\n                    fail:\n                      mpz_trace (\"x\", x);\n                      mpz_trace (\"y\", y);\n                      mpz_trace (\"got\", got);\n                      mpz_trace (\"want\", want);\n                      abort ();\n                    }\n\n                  if (mpz_fits_ulong_p (y))\n                    {\n                      unsigned long  yu = mpz_get_ui (y);\n                      if (inplace)\n                        { mpz_set (got, x); mpz_lcm_ui (got, got, yu); }\n                      else\n                        mpz_lcm_ui (got, x, yu);\n          \n                      if (mpz_cmp (got, want) != 0)\n                        {\n                          printf (\"mpz_lcm_ui wrong, inplace=%d\\n\", inplace);\n                          printf    (\"yu=%lu\\n\", yu);\n                          goto fail;\n                        }\n                    }\n                }\n            }\n        }\n    }\n\n  mpz_clear (got);  \n  mpz_clear (x);\n  mpz_clear (y);\n}\n\n\nvoid\ncheck_primes (void)\n{\n  static unsigned long  prime[] = {\n    2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,\n    101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,\n    191,193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,\n    281,283,293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,\n    389,397,401,409,419,421,431,433,439,443,449,457,461,463,467,479,487,\n  };\n  mpz_t  want, x, y;\n  int    i;\n\n  mpz_init (want);\n  mpz_init (x);\n  mpz_init (y);\n\n  /* New prime each time. */\n  mpz_set_ui (want, 1L);\n  for (i = 0; i < numberof (prime); i++)\n    {\n      mpz_set (x, want);\n      mpz_set_ui (y, prime[i]);\n      mpz_mul_ui (want, want, prime[i]);\n      check_all (want, x, y);\n    }\n\n  /* Old prime each time. */\n  mpz_set (x, want);\n  for (i = 0; i < numberof (prime); i++)\n    {\n      mpz_set_ui (y, prime[i]);\n      check_all (want, x, y);\n    }\n\n  /* One old, one new each time. */\n  mpz_set_ui (want, prime[0]);\n  for (i = 1; i < numberof (prime); i++)\n    {\n      mpz_set (x, want);\n      mpz_set_ui (y, prime[i] * prime[i-1]);\n      mpz_mul_ui (want, want, prime[i]);\n      check_all (want, x, y);\n    }\n\n  /* Triplets with A,B in x and B,C in y. */\n  mpz_set_ui (want, 1L);\n  mpz_set_ui (x, 1L);\n  mpz_set_ui (y, 1L);\n  for (i = 0; i+2 < numberof (prime); i += 3)\n    {\n      mpz_mul_ui (want, want, prime[i]);\n      mpz_mul_ui (want, want, prime[i+1]);\n      mpz_mul_ui (want, want, prime[i+2]);\n\n      mpz_mul_ui (x, x, prime[i]);\n      mpz_mul_ui (x, x, prime[i+1]);\n\n      mpz_mul_ui (y, y, prime[i+1]);\n      mpz_mul_ui (y, y, prime[i+2]);\n\n      check_all (want, x, y);\n    }\n\n\n  mpz_clear (want);\n  mpz_clear (x);\n  mpz_clear (y);\n}\n\n\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n\n  check_primes ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-likely_prime_p.c",
    "content": "/* Test is_likely_prime_1.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\nCopyright 2009 William Hart\n\nThis file is part of the MPIR Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the MPIR Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid\ncheck_sqrt (void)\n{\n  gmp_randstate_t  rands;\n  int    i;\n  mpz_t  x;\n  unsigned long bits;\n  mp_limb_t p, m, m2, s;\n\n  mpz_init (x);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < 200000; i++)\n    {\n      do\n      {\n         mpz_set_ui(x, GMP_NUMB_BITS/2 - 1);\n         mpz_urandomm(x, rands, x);\n         bits = mpz_get_ui(x) + 2;\n         mpz_rrandomb(x, rands, bits);\n         p = mpz_getlimbn(x, 0);\n         m = p*p;\n         mpz_set_ui(x, MIN(p, 1000));\n         mpz_urandomm(x, rands, x);\n         m2 = m + mpz_get_ui(x) - MIN(p, 1000)/2;\n      } while ((m2 == m) || ((mp_limb_signed_t) (m2 ^ m) < (mp_limb_signed_t) 0) || (m2 == 0) || (m2 == 1));\n\n      s = mpir_sqrt(m2);\n      if (((m2 < m) && (s != p - 1)) || ((m2 > m) && (s != p)))\n      {\n          printf (\"mpz_likely_prime_p\\n\");\n          printf (\"mpir_sqrt is broken\\n\");\n#if defined( _MSC_VER ) && defined( _WIN64 )\n          printf (\"%llu is returned as mpir_sqrt(%llu)\\n\", s, m2);\n#else\n          printf (\"%lu is returned as mpir_sqrt(%lu)\\n\", s, m2);\n#endif\n          abort();\n      }\n    }\n\n  for (i = 0; i < 200000; i++)\n    {\n      mpz_set_ui(x, GMP_NUMB_BITS/2 - 1);\n      mpz_urandomm(x, rands, x);\n      bits = mpz_get_ui(x) + 2;\n      mpz_rrandomb(x, rands, bits);\n      p = mpz_getlimbn(x, 0);\n      m = p*p;\n\n      s = mpir_sqrt(m);\n      if (s != p)\n      {\n          printf (\"mpz_likely_prime_p\\n\");\n          printf (\"mpir_sqrt is broken\\n\");\n#if defined( _MSC_VER ) && defined( _WIN64 )\n          printf (\"%llu is returned as mpir_sqrt(%llu)\\n\", s, m);\n#else\n          printf (\"%lu is returned as mpir_sqrt(%lu)\\n\", s, m);\n#endif\n          abort();\n      }\n    }\n\n  mpz_clear (x);\n  gmp_randclear(rands);\n}\n\nvoid\ncheck_rand (void)\n{\n  gmp_randstate_t  rands;\n  int    i;\n  mpz_t  x;\n  unsigned long bits;\n  mp_limb_t p;\n\n  mpz_init (x);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < 2000; i++)\n    {\n      do\n      {\n         mpz_set_ui(x, GMP_NUMB_BITS);\n         mpz_urandomm(x, rands, x);\n         bits = mpz_get_ui(x) + 1;\n         mpz_rrandomb(x, rands, bits);\n         p = mpz_getlimbn(x, 0);\n      } while (mpir_is_likely_prime_BPSW(p));\n\n      if (mpz_probab_prime_p(x, 100))\n      {\n          printf (\"mpz_likely_prime_p\\n\");\n#if defined( _MSC_VER ) && defined( _WIN64 )\n          printf (\"%llu is declared composite\\n\", p);\n#else\n          printf (\"%lu is declared composite\\n\", p);\n#endif\n          abort();\n      }\n    }\n  mpz_clear (x);\n  gmp_randclear(rands);\n}\n\n/* return 1 if prime, 0 if composite */\nint\nisprime (long n)\n{\n  long  i;\n\n  n = ABS(n);\n\n  if (n < 2)\n    return 0;\n  if (n == 2)\n    return 1;\n  if ((n & 1) == 0)\n    return 0;\n\n  for (i = 3; i < n; i++)\n    if ((n % i) == 0)\n      return 0;\n\n  return 1;\n}\n\nvoid\ncheck_one (mpz_srcptr n, int want, gmp_randstate_t rands)\n{\n  int  got;\n\n  got = mpz_likely_prime_p (n, rands, 0);\n\n  /* \"definitely prime\" is fine if we only wanted \"probably prime\" */\n  if (got == 2 && want == 1)\n    want = 2;\n\n  if (got != want)\n    {\n      printf (\"mpz_likely_prime_p\\n\");\n      mpz_trace (\"  n    \", n);\n      printf    (\"  got =%d\", got);\n      printf    (\"  want=%d\", want);\n      abort ();\n    }\n}\n\nvoid\ncheck_pn (mpz_ptr n, int want, gmp_randstate_t rands)\n{\n  check_one (n, want, rands);\n  mpz_neg (n, n);\n  check_one (n, want, rands);\n}\n\n/* expect certainty for small n */\nvoid\ncheck_small (void)\n{\n  mpz_t  n;\n  long   i;\n  gmp_randstate_t  rands;\n  \n  mpz_init (n);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < 3000; i++)\n    {\n      mpz_set_si (n, i);\n      check_pn (n, isprime (i), rands);\n    }\n\n  gmp_randclear(rands);\n  mpz_clear (n);\n}\n\nvoid\ncheck_large (void)\n{\n  gmp_randstate_t  rands;\n  int    i, count, r1, r2;\n  mpz_t  x;\n  unsigned long bits;\n  mp_limb_t p;\n\n  mpz_init (x);\n  gmp_randinit_default(rands);\n\n  count = 0;\n  for (i = 0; i < 100000; i++)\n    {\n      mpz_set_ui(x, 300);\n      mpz_urandomm(x, rands, x);\n      bits = mpz_get_ui(x) + 1;\n      mpz_rrandomb(x, rands, bits);\n\n      r1 = mpz_probab_prime_p(x, 100);\n      r2 = mpz_likely_prime_p(x, rands, 0);\n\n      if (r1 == 1 && r2 == 0)\n      {\n          printf (\"mpz_likely_prime_p\\n\");\n          gmp_printf (\"%Zd is declared composite\\n\", x);\n          abort();\n      } else if (r1 == 0 && r2 == 1)\n         count++;\n    }\n\n  if (count > 1)\n  {\n     printf (\"mpz_likely_prime_p\\n\");\n     printf (\"%d composite(s) declared prime\\n\", count);\n     abort();\n  }\n\n  mpz_clear (x);\n  gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n  \n  check_sqrt ();\n  check_rand ();\n  check_small ();\n  check_large ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-limbs.c",
    "content": "/* Test mpz_limbs_* functions\n\nCopyright 2013 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library test suite.\n\nThe GNU MP Library test suite is free software; you can redistribute it\nand/or modify it under the terms of the GNU General Public License as\npublished by the Free Software Foundation; either version 3 of the License,\nor (at your option) any later version.\n\nThe GNU MP Library test suite is distributed in the hope that it will be\nuseful, but WITHOUT ANY WARRANTY; without even the implied warranty of\nMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General\nPublic License for more details.\n\nYou should have received a copy of the GNU General Public License along with\nthe GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */\n\n#include <stdlib.h>\n#include <stdio.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define COUNT 100\n#define BITSIZE 500\n\n/* Like mpz_add. For simplicity, support positive inputs only. */\nstatic void\nalt_add (mpz_ptr r, mpz_srcptr a, mpz_srcptr b)\n{\n  mp_size_t an = mpz_size (a);\n  mp_size_t bn = mpz_size (b);\n  mp_ptr rp;\n\n  ASSERT (an > 0);\n  ASSERT (bn > 0);\n  if (an < bn)\n    {\n      MP_SIZE_T_SWAP (an, bn);\n      MPZ_SRCPTR_SWAP (a, b);\n    }\n  rp = mpz_limbs_modify (r, an + 1);\n  rp[an] = mpn_add (rp, mpz_limbs_read (a), an, mpz_limbs_read (b), bn);\n  mpz_limbs_finish (r, an + 1);\n}\n\nstatic void\ncheck_funcs (const char *name,\n\t     void (*f)(mpz_ptr, mpz_srcptr, mpz_srcptr),\n\t     void (*ref_f)(mpz_ptr, mpz_srcptr, mpz_srcptr),\n\t     mpz_srcptr a, mpz_srcptr b)\n{\n  mpz_t r, ref;\n  mpz_inits (r, ref, NULL);\n\n  ref_f (ref, a, b);\n  MPZ_CHECK_FORMAT (ref);\n  f (r, a, b);\n  MPZ_CHECK_FORMAT (r);\n\n  if (mpz_cmp (r, ref) != 0)\n    {\n      printf (\"%s failed, abits %u, bbits %u\\n\",\n\t      name,\n\t      (unsigned) mpz_sizeinbase (a, 2),\n\t      (unsigned) mpz_sizeinbase (b, 2));\n      gmp_printf (\"a = %Zx\\n\", a);\n      gmp_printf (\"b = %Zx\\n\", b);\n      gmp_printf (\"r = %Zx (bad)\\n\", r);\n      gmp_printf (\"ref = %Zx\\n\", ref);\n      abort ();\n    }\n  mpz_clears (r, ref, NULL);\n}\n\nstatic void\ncheck_add (void)\n{\n  gmp_randstate_ptr rands = RANDS;\n  mpz_t bs, a, b;\n  unsigned i;\n  mpz_inits (bs, a, b, NULL);\n  for (i = 0; i < COUNT; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      mpz_rrandomb (a, rands, 1 + mpz_get_ui (bs) % BITSIZE);\n      mpz_urandomb (bs, rands, 32);\n      mpz_rrandomb (b, rands, 1 + mpz_get_ui (bs) % BITSIZE);\n\n      check_funcs (\"add\", alt_add, mpz_add, a, b);\n    }\n  mpz_clears (bs, a, b, NULL);\n}\n\nstatic void\nalt_mul (mpz_ptr r, mpz_srcptr a, mpz_srcptr b)\n{\n  mp_size_t an = mpz_size (a);\n  mp_size_t bn = mpz_size (b);\n  mp_srcptr ap, bp;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  ASSERT (an > 0);\n  ASSERT (bn > 0);\n  if (an < bn)\n    {\n      MP_SIZE_T_SWAP (an, bn);\n      MPZ_SRCPTR_SWAP (a, b);\n    }\n  /* NOTE: This copying seems unnecessary; better to allocate new\n     result area, and free the old area when done. */\n  if (r == a)\n    {\n      mp_ptr tp =  TMP_ALLOC_LIMBS (an);\n      MPN_COPY (tp, mpz_limbs_read (a), an);\n      ap = tp;\n      bp = (a == b) ? ap : mpz_limbs_read (b);\n    }\n  else if (r == b)\n    {\n      mp_ptr tp = TMP_ALLOC_LIMBS (bn);\n      MPN_COPY (tp, mpz_limbs_read (b), bn);\n      bp = tp;\n      ap = mpz_limbs_read (a);\n    }\n  else\n    {\n      ap = mpz_limbs_read (a);\n      bp = mpz_limbs_read (b);\n    }\n  mpn_mul (mpz_limbs_write (r, an + bn),\n\t   ap, an, bp, bn);\n\n  mpz_limbs_finish (r, an + bn);\n}\n\nvoid\ncheck_mul (void)\n{\n  gmp_randstate_ptr rands = RANDS;\n  mpz_t bs, a, b;\n  unsigned i;\n  mpz_inits (bs, a, b, NULL);\n  for (i = 0; i < COUNT; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      mpz_rrandomb (a, rands, 1 + mpz_get_ui (bs) % BITSIZE);\n      mpz_urandomb (bs, rands, 32);\n      mpz_rrandomb (b, rands, 1 + mpz_get_ui (bs) % BITSIZE);\n\n      check_funcs (\"mul\", alt_mul, mpz_mul, a, b);\n    }\n  mpz_clears (bs, a, b, NULL);\n}\n\n#define MAX_SIZE 100\n\nstatic void\ncheck_roinit (void)\n{\n  gmp_randstate_ptr rands = RANDS;\n  mpz_t bs, a, b, r, ref;\n  unsigned i;\n\n  mpz_inits (bs, a, b, r, ref, NULL);\n\n  for (i = 0; i < COUNT; i++)\n    {\n      mp_srcptr ap, bp;\n      mp_size_t an, bn;\n      mpz_urandomb (bs, rands, 32);\n      mpz_rrandomb (a, rands, 1 + mpz_get_ui (bs) % BITSIZE);\n      mpz_urandomb (bs, rands, 32);\n      mpz_rrandomb (b, rands, 1 + mpz_get_ui (bs) % BITSIZE);\n\n      an = mpz_size (a);\n      ap = mpz_limbs_read (a);\n      bn = mpz_size (b);\n      bp = mpz_limbs_read (b);\n\n      mpz_add (ref, a, b);\n      {\n\tmpz_t a1, b1;\n#if __STDC_VERSION__ >= 199901\n\tconst mpz_t a2 = MPZ_ROINIT_N ( (mp_ptr) ap, an);\n\tconst mpz_t b2 = MPZ_ROINIT_N ( (mp_ptr) bp, bn);\n\n\tmpz_set_ui (r, 0);\n\tmpz_add (r, a2, b2);\n\tif (mpz_cmp (r, ref) != 0)\n\t  {\n\t    printf (\"MPZ_ROINIT_N failed\\n\");\n\t    gmp_printf (\"a = %Zx\\n\", a);\n\t    gmp_printf (\"b = %Zx\\n\", b);\n\t    gmp_printf (\"r = %Zx (bad)\\n\", r);\n\t    gmp_printf (\"ref = %Zx\\n\", ref);\n\t    abort ();\n\t  }\n#endif\n\tmpz_set_ui (r, 0);\n\tmpz_add (r, mpz_roinit_n (a1, ap, an), mpz_roinit_n (b1, bp, bn));\n\tif (mpz_cmp (r, ref) != 0)\n\t  {\n\t    printf (\"mpz_roinit_n failed\\n\");\n\t    gmp_printf (\"a = %Zx\\n\", a);\n\t    gmp_printf (\"b = %Zx\\n\", b);\n\t    gmp_printf (\"r = %Zx (bad)\\n\", r);\n\t    gmp_printf (\"ref = %Zx\\n\", ref);\n\t    abort ();\n\t  }\n      }\n    }\n  mpz_clears (bs, a, b, r, ref, NULL);\n}\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n  tests_end ();\n\n  check_add ();\n  check_mul ();\n  check_roinit ();\n\n  return 0;\n\n}\n"
  },
  {
    "path": "tests/mpz/t-lucnum_ui.c",
    "content": "/* Test mpz_lucnum_ui and mpz_lucnum2_ui.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* Usage: t-lucnum_ui [n]\n\n   Test up to L[n], or if n is omitted then the default limit below.  A\n   literal \"x\" for the limit means continue forever, this being meant only\n   for development.  */\n\n\nvoid\ncheck_sequence (int argc, char *argv[])\n{\n  unsigned long  n;\n  unsigned long  limit = 100 * BITS_PER_MP_LIMB;\n  mpz_t          want_ln, want_ln1, got_ln, got_ln1;\n\n  if (argc > 1 && argv[1][0] == 'x')\n    limit = ULONG_MAX;\n  else if (argc > 1)\n    limit = atoi (argv[1]);\n\n  /* start at n==0 */\n  mpz_init_set_si (want_ln1, -1); /* L[-1] */\n  mpz_init_set_ui (want_ln,  2);  /* L[0]   */\n  mpz_init (got_ln);\n  mpz_init (got_ln1);\n\n  for (n = 0; n < limit; n++)\n    {\n      mpz_lucnum2_ui (got_ln, got_ln1, n);\n      MPZ_CHECK_FORMAT (got_ln);\n      MPZ_CHECK_FORMAT (got_ln1);\n      if (mpz_cmp (got_ln, want_ln) != 0 || mpz_cmp (got_ln1, want_ln1) != 0)\n        {\n          printf (\"mpz_lucnum2_ui(%lu) wrong\\n\", n);\n          mpz_trace (\"want ln \", want_ln);\n          mpz_trace (\"got  ln \",  got_ln);\n          mpz_trace (\"want ln1\", want_ln1);\n          mpz_trace (\"got  ln1\",  got_ln1);\n          abort ();\n        }\n\n      mpz_lucnum_ui (got_ln, n);\n      MPZ_CHECK_FORMAT (got_ln);\n      if (mpz_cmp (got_ln, want_ln) != 0)\n        {\n          printf (\"mpz_lucnum_ui(%lu) wrong\\n\", n);\n          mpz_trace (\"want ln\", want_ln);\n          mpz_trace (\"got  ln\", got_ln);\n          abort ();\n        }\n\n      mpz_add (want_ln1, want_ln1, want_ln);  /* L[n+1] = L[n] + L[n-1] */\n      mpz_swap (want_ln1, want_ln);\n    }\n\n  mpz_clear (want_ln);\n  mpz_clear (want_ln1);\n  mpz_clear (got_ln);\n  mpz_clear (got_ln1);\n}\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n  mp_trace_base = -16;\n\n  check_sequence (argc, argv);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-mfac_uiui.c",
    "content": "/* Exercise mpz_mfac_uiui.\n\nCopyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library test suite.\n\nThe GNU MP Library test suite is free software; you can redistribute it\nand/or modify it under the terms of the GNU General Public License as\npublished by the Free Software Foundation; either version 3 of the License,\nor (at your option) any later version.\n\nThe GNU MP Library test suite is distributed in the hope that it will be\nuseful, but WITHOUT ANY WARRANTY; without even the implied warranty of\nMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General\nPublic License for more details.\n\nYou should have received a copy of the GNU General Public License along with\nthe GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* Usage: t-mfac_uiui [x|num]\n\n   With no arguments testing goes up to the initial value of \"limit\" below.\n   With a number argument tests are carried that far, or with a literal \"x\"\n   tests are continued without limit (this being meant only for development\n   purposes).  */\n\n#define MULTIFAC_WHEEL (2*3*11)\n#define MULTIFAC_WHEEL2 (5*13)\n\nint\nmain (int argc, char *argv[])\n{\n  mpz_t ref[MULTIFAC_WHEEL], ref2[MULTIFAC_WHEEL2], res;\n  unsigned long n, j, m, m2;\n  unsigned long limit = 2222, step = 1;\n\n  tests_start ();\n\n  if (argc > 1 && argv[1][0] == 'x')\n    limit = ULONG_MAX;\n  else if (argc > 1)\n    limit = atoi (argv[1]);\n\n  /* for small limb testing */\n  limit = MIN (limit, MP_LIMB_T_MAX);\n\n  for (m = 0; m < MULTIFAC_WHEEL; m++)\n    mpz_init_set_ui(ref [m],1);\n  for (m2 = 0; m2 < MULTIFAC_WHEEL2; m2++)\n    mpz_init_set_ui(ref2 [m2],1);\n\n  mpz_init (res);\n\n  m = 0;\n  m2 = 0;\n  for (n = 0; n <= limit;)\n    {\n      mpz_mfac_uiui (res, n, MULTIFAC_WHEEL);\n      MPZ_CHECK_FORMAT (res);\n      if (mpz_cmp (ref[m], res) != 0)\n        {\n          printf (\"mpz_mfac_uiui(%lu,%d) wrong\\n\", n, MULTIFAC_WHEEL);\n          printf (\"  got  \"); mpz_out_str (stdout, 10, res); printf(\"\\n\");\n          printf (\"  want \"); mpz_out_str (stdout, 10, ref[m]); printf(\"\\n\");\n          abort ();\n        }\n      mpz_mfac_uiui (res, n, MULTIFAC_WHEEL2);\n      MPZ_CHECK_FORMAT (res);\n      if (mpz_cmp (ref2[m2], res) != 0)\n        {\n          printf (\"mpz_mfac_uiui(%lu,%d) wrong\\n\", n, MULTIFAC_WHEEL2);\n          printf (\"  got  \"); mpz_out_str (stdout, 10, res); printf(\"\\n\");\n          printf (\"  want \"); mpz_out_str (stdout, 10, ref2[m2]); printf(\"\\n\");\n          abort ();\n        }\n      if (n + step <= limit)\n\tfor (j = 0; j < step; j++) {\n\t  n++; m++; m2++;\n\t  if (m >= MULTIFAC_WHEEL) m -= MULTIFAC_WHEEL;\n\t  if (m2 >= MULTIFAC_WHEEL2) m2 -= MULTIFAC_WHEEL2;\n\t  mpz_mul_ui (ref[m], ref[m], n); /* Compute a reference, with current library */\n\t  mpz_mul_ui (ref2[m2], ref2[m2], n); /* Compute a reference, with current library */\n\t}\n      else n += step;\n    }\n  mpz_fac_ui (ref[0], n);\n  mpz_mfac_uiui (res, n, 1);\n  MPZ_CHECK_FORMAT (res);\n  if (mpz_cmp (ref[0], res) != 0)\n    {\n      printf (\"mpz_mfac_uiui(%lu,1) wrong\\n\", n);\n      printf (\"  got  \"); mpz_out_str (stdout, 10, res); printf(\"\\n\");\n      printf (\"  want \"); mpz_out_str (stdout, 10, ref[0]); printf(\"\\n\");\n      abort ();\n    }\n\n  mpz_2fac_ui (ref[0], n);\n  mpz_mfac_uiui (res, n, 2);\n  MPZ_CHECK_FORMAT (res);\n  if (mpz_cmp (ref[0], res) != 0)\n    {\n      printf (\"mpz_mfac_uiui(%lu,1) wrong\\n\", n);\n      printf (\"  got  \"); mpz_out_str (stdout, 10, res); printf(\"\\n\");\n      printf (\"  want \"); mpz_out_str (stdout, 10, ref[0]); printf(\"\\n\");\n      abort ();\n    }\n\n  n++;\n  mpz_2fac_ui (ref[0], n);\n  mpz_mfac_uiui (res, n, 2);\n  MPZ_CHECK_FORMAT (res);\n  if (mpz_cmp (ref[0], res) != 0)\n    {\n      printf (\"mpz_mfac_uiui(%lu,2) wrong\\n\", n);\n      printf (\"  got  \"); mpz_out_str (stdout, 10, res); printf(\"\\n\");\n      printf (\"  want \"); mpz_out_str (stdout, 10, ref[0]); printf(\"\\n\");\n      abort ();\n    }\n\n  for (m = 0; m < MULTIFAC_WHEEL; m++)\n    mpz_clear (ref[m]);\n  for (m2 = 0; m2 < MULTIFAC_WHEEL2; m2++)\n    mpz_clear (ref2[m2]);\n  mpz_clear (res);\n\n  tests_end ();\n\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-mul.c",
    "content": "/* Test mpz_cmp, mpz_mul.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004 Free\nSoftware Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\nvoid debug_mp(mpz_t);\nstatic void ref_mpn_mul(mp_ptr,mp_srcptr,mp_size_t,mp_srcptr,mp_size_t);\nstatic void ref_mpz_mul(mpz_t, const mpz_t, const mpz_t);\nvoid dump_abort(int, char *, mpz_t, mpz_t, mpz_t, mpz_t);\n\n#define FFT_MIN_BITSIZE 100000\n\nchar *extra_fft;\n\nvoid\none (int i, mpz_t multiplicand, mpz_t multiplier)\n{\n  mpz_t product, ref_product;\n  mpz_t quotient;\n\n  mpz_init (product);\n  mpz_init (ref_product);\n  mpz_init (quotient);\n\n  /* Test plain multiplication comparing results against reference code.  */\n  mpz_mul (product, multiplier, multiplicand);\n  ref_mpz_mul (ref_product, multiplier, multiplicand);\n  if (mpz_cmp (product, ref_product))\n    dump_abort (i, \"incorrect plain product\",\n\t\tmultiplier, multiplicand, product, ref_product);\n\n  /* Test squaring, comparing results against plain multiplication  */\n  mpz_mul (product, multiplier, multiplier);\n  mpz_set (multiplicand, multiplier);\n  mpz_mul (ref_product, multiplier, multiplicand);\n  if (mpz_cmp (product, ref_product))\n    dump_abort (i, \"incorrect square product\",\n\t\tmultiplier, multiplier, product, ref_product);\n\n  mpz_clear (product);\n  mpz_clear (ref_product);\n  mpz_clear (quotient);\n}\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t op1, op2;\n  int i;\n\n  gmp_randstate_t rands;\n  mpz_t bs;\n  unsigned long bsi, size_range, fsize_range;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  extra_fft = getenv (\"GMP_CHECK_FFT\");\n\n  mpz_init (bs);\n  mpz_init (op1);\n  mpz_init (op2);\n\n  fsize_range = 4 << 8;\t\t/* a fraction 1/256 of size_range */\n  for (i = 0; fsize_range >> 8 < (extra_fft ? 27 : 22); i++)\n    {\n      size_range = fsize_range >> 8;\n      fsize_range = fsize_range * 33 / 32;\n\n      mpz_urandomb (bs, rands, size_range);\n      mpz_rrandomb (op1, rands, mpz_get_ui (bs));\n      mpz_urandomb (bs, rands, size_range);\n      mpz_rrandomb (op2, rands, mpz_get_ui (bs));\n\n      mpz_urandomb (bs, rands, 4);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 0x3) == 0)\n\tmpz_neg (op1, op1);\n      if ((bsi & 0xC) == 0)\n\tmpz_neg (op2, op2);\n\n      /* printf (\"%d %d\\n\", SIZ (op1), SIZ (op2)); */\n      one (i, op2, op1);\n    }\n\n  if (extra_fft)\n    for (i = -50; i < 0; i++)\n      {\n\tmpz_urandomb (bs, rands, 32);\n\tsize_range = mpz_get_ui (bs) % 27;\n\n\tmpz_urandomb (bs, rands, size_range);\n\tmpz_rrandomb (op1, rands, mpz_get_ui (bs) + FFT_MIN_BITSIZE);\n\tmpz_urandomb (bs, rands, size_range);\n\tmpz_rrandomb (op2, rands, mpz_get_ui (bs) + FFT_MIN_BITSIZE);\n\n\t/* printf (\"%d: %d %d\\n\", i, SIZ (op1), SIZ (op2)); */\n\tfflush (stdout);\n\tone (-1, op2, op1);\n      }\n\n  mpz_clear (bs);\n  mpz_clear (op1);\n  mpz_clear (op2);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nstatic void\nref_mpz_mul (mpz_t w, const mpz_t u, const mpz_t v)\n{\n  mp_size_t usize = u->_mp_size;\n  mp_size_t vsize = v->_mp_size;\n  mp_size_t wsize;\n  mp_size_t sign_product;\n  mp_ptr up, vp;\n  mp_ptr wp;\n  mp_size_t talloc;\n\n  sign_product = usize ^ vsize;\n  usize = ABS (usize);\n  vsize = ABS (vsize);\n\n  if (usize == 0 || vsize == 0)\n    {\n      SIZ (w) = 0;\n      return;\n    }\n\n  talloc = usize + vsize;\n\n  up = u->_mp_d;\n  vp = v->_mp_d;\n\n  wp = __GMP_ALLOCATE_FUNC_LIMBS (talloc);\n\n  if (usize > vsize)\n    ref_mpn_mul (wp, up, usize, vp, vsize);\n  else\n    ref_mpn_mul (wp, vp, vsize, up, usize);\n  wsize = usize + vsize;\n  wsize -= wp[wsize - 1] == 0;\n  MPZ_REALLOC (w, wsize);\n  MPN_COPY (PTR(w), wp, wsize);\n\n  SIZ(w) = sign_product < 0 ? -wsize : wsize;\n  __GMP_FREE_FUNC_LIMBS (wp, talloc);\n}\n\nstatic void mul_basecase(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);\n\n#define TOOM3_THRESHOLD (MAX (MUL_TOOM3_THRESHOLD, SQR_TOOM3_THRESHOLD))\n#define FFT_THRESHOLD (MAX (MUL_FFT_FULL_THRESHOLD, SQR_FFT_FULL_THRESHOLD))\n\nstatic void\nref_mpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)\n{\n  mp_ptr tp;\n  mp_size_t tn;\n  mp_limb_t cy;\n\n  if (vn < TOOM3_THRESHOLD)\n    {\n      /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own\n\t mul_basecase.  */\n      if (vn != 0)\n\tmul_basecase (wp, up, un, vp, vn);\n      else\n\tMPN_ZERO (wp, un);\n      return;\n    }\n\n  if (vn < FFT_THRESHOLD)\n    {\n      /* In the mpn_toom3_mul_n and mpn_toom4_mul_n range, use mpn_kara_mul_n.  */\n      tn = 2 * vn + MPN_KARA_MUL_N_TSIZE (vn);\n      tp = __GMP_ALLOCATE_FUNC_LIMBS (tn);\n      mpn_kara_mul_n (tp, up, vp, vn, tp + 2 * vn);\n    }\n  else\n    {\n      /* Finally, for the largest operands, use mpn_toom3_mul_n.  */\n      /* The \"- 63 + 255\" tweaks the allocation to allow for huge operands.\n\t See the definition of this macro in gmp-impl.h to understand this.  */\n      tn = 2 * vn + MPN_TOOM3_MUL_N_TSIZE (vn) - 63 + 255;\n      tp = __GMP_ALLOCATE_FUNC_LIMBS (tn);\n      mpn_toom3_mul_n (tp, up, vp, vn, tp + 2 * vn);\n    }\n\n  if (un != vn)\n    {\n      if (un - vn < vn)\n\tref_mpn_mul (wp + vn, vp, vn, up + vn, un - vn);\n      else\n\tref_mpn_mul (wp + vn, up + vn, un - vn, vp, vn);\n\n      MPN_COPY (wp, tp, vn);\n      cy = mpn_add_n (wp + vn, wp + vn, tp + vn, vn);\n      mpn_incr_u (wp + 2 * vn, cy);\n    }\n  else\n    {\n      MPN_COPY (wp, tp, 2 * vn);\n    }\n\n  __GMP_FREE_FUNC_LIMBS (tp, tn);\n}\n\nstatic void\nmul_basecase (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)\n{\n  mp_size_t i, j;\n  mp_limb_t prod_low, prod_high;\n  mp_limb_t cy_dig;\n  mp_limb_t v_limb;\n\n  /* Multiply by the first limb in V separately, as the result can\n     be stored (not added) to PROD.  We also avoid a loop for zeroing.  */\n  v_limb = vp[0];\n  cy_dig = 0;\n  for (j = un; j > 0; j--)\n    {\n      mp_limb_t u_limb, w_limb;\n      u_limb = *up++;\n      umul_ppmm (prod_high, prod_low, u_limb, v_limb << GMP_NAIL_BITS);\n      add_ssaaaa (cy_dig, w_limb, prod_high, prod_low, 0, cy_dig << GMP_NAIL_BITS);\n      *wp++ = w_limb >> GMP_NAIL_BITS;\n    }\n\n  *wp++ = cy_dig;\n  wp -= un;\n  up -= un;\n\n  /* For each iteration in the outer loop, multiply one limb from\n     U with one limb from V, and add it to PROD.  */\n  for (i = 1; i < vn; i++)\n    {\n      v_limb = vp[i];\n      cy_dig = 0;\n\n      for (j = un; j > 0; j--)\n\t{\n\t  mp_limb_t u_limb, w_limb;\n\t  u_limb = *up++;\n\t  umul_ppmm (prod_high, prod_low, u_limb, v_limb << GMP_NAIL_BITS);\n\t  w_limb = *wp;\n\t  add_ssaaaa (prod_high, prod_low, prod_high, prod_low, 0, w_limb << GMP_NAIL_BITS);\n\t  prod_low >>= GMP_NAIL_BITS;\n\t  prod_low += cy_dig;\n#if GMP_NAIL_BITS == 0\n\t  cy_dig = prod_high + (prod_low < cy_dig);\n#else\n\t  cy_dig = prod_high;\n\t  cy_dig += prod_low >> GMP_NUMB_BITS;\n#endif\n\t  *wp++ = prod_low & GMP_NUMB_MASK;\n\t}\n\n      *wp++ = cy_dig;\n      wp -= un;\n      up -= un;\n    }\n}\n\nvoid\ndump_abort (int i, char *s,\n            mpz_t op1, mpz_t op2, mpz_t product, mpz_t ref_product)\n{\n  fprintf (stderr, \"ERROR: %s in test %d\\n\", s, i);\n  fprintf (stderr, \"op1          = \"); debug_mp (op1);\n  fprintf (stderr, \"op2          = \"); debug_mp (op2);\n  fprintf (stderr, \"    product  = \"); debug_mp (product);\n  fprintf (stderr, \"ref_product  = \"); debug_mp (ref_product);\n  abort();\n}\n\nvoid\ndebug_mp (mpz_t x)\n{\n  size_t siz = mpz_sizeinbase (x, 16);\n\n  if (siz > 65)\n    {\n      mpz_t q;\n      mpz_init (q);\n      mpz_tdiv_q_2exp (q, x, 4 * (mpz_sizeinbase (x, 16) - 25));\n      gmp_fprintf (stderr, \"%ZX...\", q);\n      mpz_tdiv_r_2exp (q, x, 4 * 25);\n      gmp_fprintf (stderr, \"%025ZX [%d]\\n\", q, (int) siz);\n      mpz_clear (q);\n    }\n  else\n    {\n      gmp_fprintf (stderr, \"%ZX\\n\", x);\n    }\n}\n"
  },
  {
    "path": "tests/mpz/t-mul_i.c",
    "content": "/* Test mpz_mul_ui and mpz_mul_si.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nmpz_t got, want, x;\n\nvoid\ncompare_si (long y)\n{\n  if (mpz_cmp (got, want) != 0)\n    {\n      printf    (\"mpz_mul_si wrong\\n\");\n      mpz_trace (\"  x\", x);\n      printf    (\"  y=%ld (0x%lX)\\n\", y, y);\n      mpz_trace (\"  got \", got);\n      mpz_trace (\"  want\", want);\n      abort ();\n    }\n}\n\nvoid\ncompare_ui (unsigned long y)\n{\n  if (mpz_cmp (got, want) != 0)\n    {\n      printf    (\"mpz_mul_ui wrong\\n\");\n      mpz_trace (\"  x\", x);\n      printf    (\"  y=%lu (0x%lX)\\n\", y, y);\n      mpz_trace (\"  got \", got);\n      mpz_trace (\"  want\", want);\n      abort ();\n    }\n}\n\nvoid\ncheck_samples (void)\n{\n  {\n    long  y;\n\n    mpz_set_ui (x, 1L);\n    y = 0;\n    mpz_mul_si (got, x, y);\n    mpz_set_si (want, y);\n    compare_si (y);\n\n    mpz_set_ui (x, 1L);\n    y = 1;\n    mpz_mul_si (got, x, y);\n    mpz_set_si (want, y);\n    compare_si (y);\n\n    mpz_set_ui (x, 1L);\n    y = -1;\n    mpz_mul_si (got, x, y);\n    mpz_set_si (want, y);\n    compare_si (y);\n\n    mpz_set_ui (x, 1L);\n    y = LONG_MIN;\n    mpz_mul_si (got, x, y);\n    mpz_set_si (want, y);\n    compare_si (y);\n\n    mpz_set_ui (x, 1L);\n    y = LONG_MAX;\n    mpz_mul_si (got, x, y);\n    mpz_set_si (want, y);\n    compare_si (y);\n  }\n\n  {\n    unsigned long y;\n  \n    mpz_set_ui (x, 1L);\n    y = 0;\n    mpz_mul_ui (got, x, y);\n    mpz_set_ui (want, y);\n    compare_ui (y);\n\n    mpz_set_ui (x, 1L);\n    y = 1;\n    mpz_mul_ui (got, x, y);\n    mpz_set_ui (want, y);\n    compare_ui (y);\n\n    mpz_set_ui (x, 1L);\n    y = ULONG_MAX;\n    mpz_mul_ui (got, x, y);\n    mpz_set_ui (want, y);\n    compare_ui (y);\n  }\n}\n\nint\nmain (int argc, char **argv)\n{\n  tests_start ();\n\n  mpz_init (x);\n  mpz_init (got);\n  mpz_init (want);\n\n  check_samples ();\n\n  mpz_clear (x);\n  mpz_clear (got);\n  mpz_clear (want);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-next_prime_candidate.c",
    "content": "/* Test mpz_mul_ui and mpz_mul_si.\n\nCopyright 1999, 2000, 2001 Free Software Foundation, Inc.\nCopyright 2011 Brian Gladman \n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"tests.h\"\n\n/* nextprime(10^pow10) = 10^pow10 + np_off  */\n\nstruct\n{ \n    int pow10, np_off;\n} tests1[] =\n{  { 1, 1 }, { 2, 1 }, {3, 9}, {4, 7}, {5, 3}, {6, 3}, {10, 19}, {20, 39} };\n\n/* count of primes 10^pow10 < prime[i] < 10^pow10 + 1000 and the sum of the\n   squares of (prime[i] - 10^pow10) in this range */\n\nstruct\n{ \n    int pow10, count, sumsq;\n} tests2[] =\n{  {4, 106, 34598882}, {5, 81, 26944273}, {6, 75, 24031603}, {10, 44, 16383852}, {20, 24, 8189464} };\n\nint \nmain (int argc, char **argv)\n{\n    char str[1000];\n    gmp_randstate_t rnd; \n    mpz_t x, y, z;\n    int i, j, k, s;\n\n    tests_start ();\n    gmp_randinit_default (rnd);\n    mpz_init(x);\n    mpz_init(y);\n    mpz_init(z);\n    for( i = 0 ; i < sizeof(tests1) / sizeof(tests1[0]) ; ++i )\n    {\n      mpz_ui_pow_ui(x, 10, tests1[i].pow10);\n      mpz_next_prime_candidate(y, x, rnd);\n      mpz_sub(y, y, x);\n      j = mpz_get_ui(y);\n      if(j != tests1[i].np_off)\n      {\n          printf(\"\\nnext_likely_prime(10^%d) - 10^%d: expected %d but got %d\", \n              tests1[i].pow10, tests1[i].pow10, tests1[i].np_off, j);\n          abort();\n      }\n    }\n\n    for( i = 0 ; i < sizeof(tests2) / sizeof(tests2[0]) ; ++i )\n    {\n      mpz_ui_pow_ui(x, 10, tests2[i].pow10);\n      mpz_set(y, x);\n      s = j = 0;\n      for( ; ; )\n      {\n          mpz_next_prime_candidate(y, y, rnd);\n          mpz_sub(z, y, x);\n          k = mpz_get_si(z);\n          if(k >= 1000)\n              break;\n          j++;\n          s += k * k;\n      }\n      if(j != tests2[i].count || s != tests2[i].sumsq)\n      {\n          printf(\"\\nnext_likely_prime failed test2[%d], expected %d and %d but got %d and %d\", \n              i, tests2[i].count, tests2[i].sumsq, j, s);\n          abort();\n      }\n    }\n      \n    gmp_randclear (rnd);\n    mpz_clear(z);\n    mpz_clear(y);\n    mpz_clear(x);\n    tests_end ();\n    exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-oddeven.c",
    "content": "/* Test mpz_odd_p and mpz_even_p.\n\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char  *n;\n    int          odd, even;\n  } data[] = {\n    {   \"0\", 0, 1 },\n    {   \"1\", 1, 0 },\n    {   \"2\", 0, 1 },\n    {   \"3\", 1, 0 },\n    {   \"4\", 0, 1 },\n\n    {  \"-4\", 0, 1 },\n    {  \"-3\", 1, 0 },\n    {  \"-2\", 0, 1 },\n    {  \"-1\", 1, 0 },\n\n    {  \"0x1000000000000000000000000000000000000000000000000000\", 0, 1 },\n    {  \"0x1000000000000000000000000000000000000000000000000001\", 1, 0 },\n    {  \"0x1000000000000000000000000000000000000000000000000002\", 0, 1 },\n    {  \"0x1000000000000000000000000000000000000000000000000003\", 1, 0 },\n\n    { \"-0x1000000000000000000000000000000000000000000000000004\", 0, 1 },\n    { \"-0x1000000000000000000000000000000000000000000000000003\", 1, 0 },\n    { \"-0x1000000000000000000000000000000000000000000000000002\", 0, 1 },\n    { \"-0x1000000000000000000000000000000000000000000000000001\", 1, 0 },\n  };\n\n  mpz_t  n;\n  int    i;\n\n  mpz_init (n);\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (n, data[i].n, 0);\n\n      if ((mpz_odd_p (n) != 0) != data[i].odd)\n        {\n          printf (\"mpz_odd_p wrong on data[%d]\\n\", i); \n          abort();                                    \n        }\n\n      if ((mpz_even_p (n) != 0) != data[i].even)\n        {\n          printf (\"mpz_even_p wrong on data[%d]\\n\", i); \n          abort();                                    \n        }\n    }\n\n  mpz_clear (n);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-perfpow.c",
    "content": "/* Exercise mpz_perfect_power_p\n\n  Copyright 2008 Jason Moxham\n\n  This file is part of the MPIR Library.\n\n  The MPIR Library is free software; you can redistribute it and/or modify\n  it under the terms of the GNU Lesser General Public License as published\n  by the Free Software Foundation; either version 2.1 of the License, or (at\n  your option) any later version.\n\n  The MPIR Library is distributed in the hope that it will be useful, but\n  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n  License for more details.\n\n  You should have received a copy of the GNU Lesser General Public License\n  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n  Boston, MA 02110-1301, USA.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nint\nperfpow_ref (mpz_t x)\n{\n  mpz_t y, q, r;\n  unsigned long i;\n  int ret = 0;\n\n  if (mpz_cmp_ui (x, 0) == 0 || mpz_cmp_ui (x, 1) == 0\n      || mpz_cmp_si (x, -1) == 0)\n    return 1;\n  mpz_init_set (y, x);\n  mpz_init (q);\n  mpz_init (r);\n  mpz_abs (y, y);\n  for (i = 2;; i++)\n    {\n      mpz_root (q, y, i);\n      mpz_pow_ui (r, q, i);\n      if (mpz_cmp (r, y) == 0 && (i % 2 == 1 || SIZ (x) > 0))\n\t{\n\t  ret = 1;\n\t  break;\n\t}\n      if (mpz_cmp_ui (q, 1) <= 0)\n\tbreak;\n    }\n  mpz_clear (y);\n  mpz_clear (q);\n  mpz_clear (r);\n  return ret;\n}\n\nint\nmain (void)\n{\n  int i, r1, r2, j, a, b, c, d;\n  mpz_t x, m;\n  tests_start ();\n\n  mpz_init (x);\n  mpz_init (m);\n  /* check small values \n    -8000 picked up an error\n    -729 picked up an error\n  */\n  for (i = -10000; i < 10000; i++)\n    {\n      mpz_set_si (x, i);\n      r1 = mpz_perfect_power_p (x);\n      r2 = perfpow_ref (x);\n      if (r1 != r2)\n\t{\n\t  printf (\"mpz_perfect_power_p wrong on %d got %d want %d\\n\", i, r1,\n\t\t  r2);\n\t  abort ();\n\t}\n    }\n  /* check (-1)^i.2^a.3^b.5^c.x^d where x is big */\n  /* 1.0.0.0.5 picked up one error\n     1.10.6.0.0 picked up another error     \n     1.0.0.15.5 picked up another error\n     want a good selection of powers to try all possibilitys\n   */\n  for (i = 1; i <= 2; i++)\n    {\n      for (d = 0; d < 10; d++)\n\t{\n\t  for (a = 0; a < 11; a++)\n\t    {\n\t      for (b = 0; b < 11; b++)\n\t\t{\n\t\t  for (c = 0; c < 20; c++)\n\t\t    {\n\t\t      mpz_set_ui (x, 1);\n\t\t      mpz_set_si (m, -1);\n\t\t      for (j = 0; j < i; j++)\n\t\t\tmpz_mul (x, x, m);\n\t\t      mpz_set_ui (m, 2);\n\t\t      for (j = 0; j < a; j++)\n\t\t\tmpz_mul (x, x, m);\n\t\t      mpz_set_ui (m, 3);\n\t\t      for (j = 0; j < b; j++)\n\t\t\tmpz_mul (x, x, m);\n\t\t      mpz_set_ui (m, 5);\n\t\t      for (j = 0; j < c; j++)\n\t\t\tmpz_mul (x, x, m);\n\t\t      mpz_set_ui (m, 117039007);\n\t\t      for (j = 0; j < d; j++)\n\t\t\tmpz_mul (x, x, m);\n\t\t      r1 = mpz_perfect_power_p (x);\n\t\t      r2 = perfpow_ref (x);\n\t\t      if (r1 != r2)\n\t\t\t{\n\t\t\t  printf\n\t\t\t    (\"mpz_perfect_power_p wrong on %d.%d.%d.%d.%d got %d want %d\\n\",\n\t\t\t     i, a, b, c, d, r1, r2);\n\t\t\t  abort ();\n\t\t\t}\n\t\t    }\n\t\t}\n\t    }\n\t}\n    }\n\n  mpz_clear (x);\n  mpz_clear (m);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-perfsqr.c",
    "content": "/* Test mpz_perfect_square_p.\n\nCopyright 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* check_modulo() exercises mpz_perfect_square_p on squares which cover each\n   possible quadratic residue to each divisor used within\n   mpn_perfect_square_p, ensuring those residues aren't incorrectly claimed\n   to be non-residues.\n\n   Each divisor is taken separately.  It's arranged that n is congruent to 0\n   modulo the other divisors, 0 of course being a quadratic residue to any\n   modulus.\n\n   The values \"(j*others)^2\" cover all quadratic residues mod divisor[i],\n   but in no particular order.  j is run from 1<=j<=divisor[i] so that zero\n   is excluded.  A literal n==0 doesn't reach the residue tests.  */\n\nvoid\ncheck_modulo (void)\n{\n  static const unsigned long  divisor[] = PERFSQR_DIVISORS;\n  unsigned long  i, j;\n\n  mpz_t  alldiv, others, n;\n\n  mpz_init (alldiv);\n  mpz_init (others);\n  mpz_init (n);\n\n  /* product of all divisors */\n  mpz_set_ui (alldiv, 1L);\n  for (i = 0; i < numberof (divisor); i++)\n    mpz_mul_ui (alldiv, alldiv, divisor[i]);\n\n  for (i = 0; i < numberof (divisor); i++)\n    {\n      /* product of all divisors except i */\n      mpz_set_ui (others, 1L);\n      for (j = 0; j < numberof (divisor); j++)\n        if (i != j)\n          mpz_mul_ui (others, others, divisor[j]);\n\n      for (j = 1; j <= divisor[i]; j++)\n        {\n          /* square */\n          mpz_mul_ui (n, others, j);\n          mpz_mul (n, n, n);\n          if (! mpz_perfect_square_p (n))\n            {\n              printf (\"mpz_perfect_square_p got 0, want 1\\n\");\n              mpz_trace (\"  n\", n);\n              abort ();\n            }\n        }\n    }\n\n  mpz_clear (alldiv);\n  mpz_clear (others);\n  mpz_clear (n);\n}\n\n\n/* Exercise mpz_perfect_square_p compared to what mpz_sqrt says. */\nvoid\ncheck_sqrt (int reps)\n{\n  mpz_t x2, x2t, x;\n  mp_size_t x2n;\n  int res;\n  int i;\n  /* int cnt = 0; */\n  gmp_randstate_t rands;\n  mpz_t bs;\n\n  mpz_init (bs);\n\n  mpz_init (x2);\n  mpz_init (x);\n  mpz_init (x2t);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 9);\n      x2n = mpz_get_ui (bs);\n      mpz_rrandomb (x2, rands, x2n);\n      /* mpz_out_str (stdout, -16, x2); puts (\"\"); */\n\n      res = mpz_perfect_square_p (x2);\n      mpz_sqrt (x, x2);\n      mpz_mul (x2t, x, x);\n\n      if (res != (mpz_cmp (x2, x2t) == 0))\n        {\n          printf    (\"mpz_perfect_square_p and mpz_sqrt differ\\n\");\n          mpz_trace (\"   x  \", x);\n          mpz_trace (\"   x2 \", x2);\n          mpz_trace (\"   x2t\", x2t);\n          printf    (\"   mpz_perfect_square_p %d\\n\", res);\n          printf    (\"   mpz_sqrt             %d\\n\", mpz_cmp (x2, x2t) == 0);\n          abort ();\n        }\n\n      /* cnt += res != 0; */\n    }\n  /* printf (\"%d/%d perfect squares\\n\", cnt, reps); */\n\n  mpz_clear (bs);\n  mpz_clear (x2);\n  mpz_clear (x);\n  mpz_clear (x2t);\n  gmp_randclear(rands);\n}\n\n\nint\nmain (int argc, char **argv)\n{\n  int reps = 100000;\n\n  tests_start ();\n  mp_trace_base = -16;\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  check_modulo ();\n  check_sqrt (reps);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-popcount.c",
    "content": "/* Test mpz_popcount.\n\nCopyright 2001, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n\nvoid\ncheck_onebit (void)\n{\n  mpz_t          n;\n  unsigned long  i, got;\n\n  mpz_init (n);\n  for (i = 0; i < 5 * BITS_PER_MP_LIMB; i++)\n    {\n      mpz_setbit (n, i);\n      got = mpz_popcount (n);\n      if (got != 1)\n        {\n          printf (\"mpz_popcount wrong on single bit at %lu\\n\", i); \n          printf (\"   got %lu, want 1\\n\", got);\n          abort();                                    \n        }\n      mpz_clrbit (n, i);\n    }\n  mpz_clear (n);\n}\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    const char     *n;\n    unsigned long  want;\n  } data[] = {\n    { \"-1\", ~ (unsigned long) 0 },\n    { \"-12345678\", ~ (unsigned long) 0 },\n    { \"0\", 0 },\n    { \"1\", 1 },\n    { \"3\", 2 },\n    { \"5\", 2 },\n    { \"0xFFFF\", 16 },\n    { \"0xFFFFFFFF\", 32 },\n    { \"0xFFFFFFFFFFFFFFFF\", 64 },\n    { \"0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\", 128 },\n  };\n\n  unsigned long   got;\n  int    i;\n  mpz_t  n;\n\n  mpz_init (n);\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (n, data[i].n, 0);\n      got = mpz_popcount (n);\n      if (got != data[i].want)\n        {\n          printf (\"mpz_popcount wrong at data[%d]\\n\", i); \n          printf (\"   n     \\\"%s\\\"\\n\", data[i].n);\n          printf (\"         \");   mpz_out_str (stdout, 10, n); printf (\"\\n\");\n          printf (\"         0x\"); mpz_out_str (stdout, 16, n); printf (\"\\n\");\n          printf (\"   got   %lu\\n\", got);\n          printf (\"   want  %lu\\n\", data[i].want);\n          abort();                                    \n        }\n    }\n  mpz_clear (n);\n}\n\nunsigned long\nrefmpz_popcount (mpz_t arg)\n{\n  mp_size_t n, i;\n  unsigned long cnt;\n  mp_limb_t x;\n\n  n = SIZ(arg);\n  if (n < 0)\n    return ~(unsigned long) 0;\n\n  cnt = 0;\n  for (i = 0; i < n; i++)\n    {\n      x = PTR(arg)[i];\n      while (x != 0)\n\t{\n\t  cnt += (x & 1);\n\t  x >>= 1;\n\t}\n    }\n  return cnt;\n}\n\nvoid\ncheck_random (void)\n{\n  gmp_randstate_t rands;\n  mpz_t bs;\n  mpz_t arg;\n  unsigned long arg_size, size_range;\n  unsigned long got, ref;\n  int i;\n\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n  mpz_init (arg);\n\n  for (i = 0; i < 10000; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 11 + 2; /* 0..4096 bit operands */\n\n      mpz_urandomb (bs, rands, size_range);\n      arg_size = mpz_get_ui (bs);\n      mpz_rrandomb (arg, rands, arg_size);\n\n      got = mpz_popcount (arg);\n      ref = refmpz_popcount (arg);\n      if (got != ref)\n\t{\n          printf (\"mpz_popcount wrong on random\\n\"); \n          printf (\"         \");   mpz_out_str (stdout, 10, arg); printf (\"\\n\");\n          printf (\"         0x\"); mpz_out_str (stdout, 16, arg); printf (\"\\n\");\n          printf (\"   got   %lu\\n\", got);\n          printf (\"   want  %lu\\n\", ref);\n          abort();                                    \n\t  abort ();\n\t}\n    }\n  mpz_clear (arg);\n  mpz_clear (bs);\n  gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_onebit ();\n  check_data ();\n  check_random ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-pow.c",
    "content": "/* Test mpz_pow_ui and mpz_ui_pow_ui.\n\nCopyright 1997, 1999, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (mpz_srcptr want, mpz_srcptr base, unsigned long exp)\n{\n  mpz_t  got;\n\n  mpz_init (got);\n\n  mpz_pow_ui (got, base, exp);\n  if (mpz_cmp (got, want))\n    {\n      printf (\"mpz_pow_ui wrong\\n\");\n      mpz_trace (\"  base\", base);\n      printf    (\"  exp = %lu (0x%lX)\\n\", exp, exp);\n      mpz_trace (\"  got \", got);\n      mpz_trace (\"  want\", want);\n      abort ();\n    }\n\n  mpz_set (got, base);\n  mpz_pow_ui (got, got, exp);\n  if (mpz_cmp (got, want))\n    {\n      printf (\"mpz_pow_ui wrong\\n\");\n      mpz_trace (\"  base\", base);\n      printf    (\"  exp = %lu (0x%lX)\\n\", exp, exp);\n      mpz_trace (\"  got \", got);\n      mpz_trace (\"  want\", want);\n      abort ();\n    }\n\n  if (mpz_fits_ulong_p (base))\n    {\n      unsigned long  base_u = mpz_get_ui (base);\n      mpz_ui_pow_ui (got, base_u, exp);\n      if (mpz_cmp (got, want))\n        {\n          printf    (\"mpz_ui_pow_ui wrong\\n\");\n          printf    (\"  base=%lu (0x%lX)\\n\", base_u, base_u);\n          printf    (\"  exp = %lu (0x%lX)\\n\", exp, exp);\n          mpz_trace (\"  got \", got);\n          mpz_trace (\"  want\", want);\n          abort ();\n        }\n    }\n\n  mpz_clear (got);\n}\n\nvoid\ncheck_base (mpz_srcptr base)\n{\n  unsigned long  exp;\n  mpz_t          want;\n\n  mpz_init (want);\n  mpz_set_ui (want, 1L);\n\n  for (exp = 0; exp < 20; exp++)\n    {\n      check_one (want, base, exp);\n      mpz_mul (want, want, base);\n    }\n\n  mpz_clear (want);\n}\n\nvoid\ncheck_various (void)\n{\n  static const struct {\n    const char *base;\n  } data[] = {\n    { \"0\" },\n    { \"1\" },\n    { \"2\" },\n    { \"3\" },\n    { \"4\" },\n    { \"5\" },\n    { \"6\" },\n    { \"10\" },\n    { \"15\" },\n    { \"16\" },\n\n    { \"0x1F\" },\n    { \"0xFF\" },\n    { \"0x1001\" },\n    { \"0xFFFF\" },\n    { \"0x10000001\" },\n    { \"0x1000000000000001\" },\n\n    /* actual size closest to estimate */\n    { \"0xFFFFFFFF\" },\n    { \"0xFFFFFFFFFFFFFFFF\" },\n\n    /* same after rshift */\n    { \"0xFFFFFFFF0\" },\n    { \"0xFFFFFFFF00\" },\n    { \"0xFFFFFFFFFFFFFFFF0\" },\n    { \"0xFFFFFFFFFFFFFFFF00\" },\n\n    /* change from 2 limbs to 1 after rshift */\n    { \"0x180000000\" },\n    { \"0x18000000000000000\" },\n\n    /* change from 3 limbs to 2 after rshift */\n    { \"0x18000000100000000\" },\n    { \"0x180000000000000010000000000000000\" },\n\n    /* handling of absolute value */\n    { \"-0x80000000\" },\n    { \"-0x8000000000000000\" },\n\n    /* low zero limb, and size>2, checking argument overlap detection */\n    { \"0x3000000000000000300000000000000030000000000000000\" },\n  };\n\n  mpz_t  base;\n  int    i;\n\n  mpz_init (base);\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_set_str_or_abort (base, data[i].base, 0);\n      check_base (base);\n    }\n\n  mpz_clear (base);\n}\n\nvoid\ncheck_random (int reps)\n{\n  mpz_t              base, want;\n  mp_size_t          base_size;\n  int                i;\n  unsigned long      size_range, exp;\n  gmp_randstate_t  rands;\n  \n  mpz_init (base);\n  mpz_init (want);\n  gmp_randinit_default(rands);\n\n  for (i = 0; i < reps; i++)\n    {\n      /* exponentially random 0 to 2^13 bits for base */\n      mpz_urandomb (want, rands, 32);\n      size_range = mpz_get_ui (want) % 12 + 2;\n      mpz_urandomb (want, rands, size_range);\n      base_size = mpz_get_ui (want);\n      mpz_rrandomb (base, rands, base_size);\n\n      /* randomly signed base */\n      mpz_urandomb (want, rands, 2);\n      if ((mpz_get_ui (want) & 1) != 0)\n\tmpz_neg (base, base);\n\n      /* random 5 bits for exponent */\n      mpz_urandomb (want, rands, 5L);\n      exp = mpz_get_ui (want);\n\n      refmpz_pow_ui (want, base, exp);\n      check_one (want, base, exp);\n    }\n\n  mpz_clear (base);\n  mpz_clear (want);\n  gmp_randclear(rands);\n}\n\nint\nmain (int argc, char **argv)\n{\n  int reps = 500;\n\n  /* dummy call to drag in refmpn.o for testing mpz/n_pow_ui.c with\n     refmpn_mul_2 */\n  refmpn_zero_p (NULL, (mp_size_t) 0);\n\n  tests_start ();\n  mp_trace_base = -16;\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  check_various ();\n  check_random (reps);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-powm.c",
    "content": "/* Test mpz_powm, mpz_mul. mpz_mod, mpz_mod_ui, mpz_fdiv_q_ui.\n\nCopyright 1991, 1993, 1994, 1996, 1999, 2000, 2001 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid debug_mp(mpz_t, int);\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t base, exp, mod;\n  mpz_t r1, r2, t1, exp2, base2;\n  mp_size_t base_size, exp_size, mod_size;\n  int i;\n  int reps = 100;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  unsigned long bsi, size_range;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpz_init (base);\n  mpz_init (exp);\n  mpz_init (mod);\n  mpz_init (r1);\n  mpz_init (r2);\n  mpz_init (t1);\n  mpz_init (exp2);\n  mpz_init (base2);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 13 + 2;\n\n      do  /* Loop until mathematically well-defined.  */\n\t{\n\t  mpz_urandomb (bs, rands, size_range);\n\t  base_size = mpz_get_ui (bs);\n\t  mpz_rrandomb (base, rands, base_size);\n\n\t  mpz_urandomb (bs, rands, 7L);\n\t  exp_size = mpz_get_ui (bs);\n\t  mpz_rrandomb (exp, rands, exp_size);\n\t}\n      while (mpz_cmp_ui (base, 0) == 0 && mpz_cmp_ui (exp, 0) == 0);\n\n      do\n        {\n\t  mpz_urandomb (bs, rands, size_range);\n\t  mod_size = mpz_get_ui (bs);\n\t  mpz_rrandomb (mod, rands, mod_size);\n\t}\n      while (mpz_cmp_ui (mod, 0) == 0);\n\n      mpz_urandomb (bs, rands, 2);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (base, base);\n\n      /* printf (\"%ld %ld %ld\\n\", SIZ (base), SIZ (exp), SIZ (mod)); */\n\n      mpz_powm (r1, base, exp, mod);\n\n      mpz_set_ui (r2, 1);\n      mpz_set (base2, base);\n      mpz_set (exp2, exp);\n\n      mpz_mod (r2, r2, mod);\t/* needed when exp==0 and mod==1 */\n      while (mpz_cmp_ui (exp2, 0) != 0)\n\t{\n\t  mpz_mod_ui (t1, exp2, 2);\n\t  if (mpz_cmp_ui (t1, 0) != 0)\n\t    {\n\t      mpz_mul (r2, r2, base2);\n\t      mpz_mod (r2, r2, mod);\n\t    }\n\t  mpz_mul (base2, base2, base2);\n\t  mpz_mod (base2, base2, mod);\n\t  mpz_fdiv_q_ui (exp2, exp2, 2);\n\t}\n\n      if (mpz_cmp (r1, r2) != 0)\n\t{\n\t  fprintf (stderr, \"\\nIncorrect results for operands:\\n\");\n\t  debug_mp (base, -16);\n\t  debug_mp (exp, -16);\n\t  debug_mp (mod, -16);\n\t  fprintf (stderr, \"mpz_powm result:\\n\");\n\t  debug_mp (r1, -16);\n\t  fprintf (stderr, \"reference result:\\n\");\n\t  debug_mp (r2, -16);\n\t  abort ();\n\t}\n    }\n\n  mpz_clear (bs);\n  mpz_clear (base);\n  mpz_clear (exp);\n  mpz_clear (mod);\n  mpz_clear (r1);\n  mpz_clear (r2);\n  mpz_clear (t1);\n  mpz_clear (exp2);\n  mpz_clear (base2);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x); fputc ('\\n', stderr);\n}\n"
  },
  {
    "path": "tests/mpz/t-powm_ui.c",
    "content": "/* Test mpz_powm_ui, mpz_mul. mpz_mod.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software\nFoundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid dump_abort(mpz_t, mpz_t);\nvoid debug_mp(mpz_t, int);\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t base, exp, mod;\n  mpz_t r1, r2, base2;\n  mp_size_t base_size, exp_size, mod_size;\n  unsigned long int exp2;\n  int i;\n  int reps = 200;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  unsigned long bsi, size_range;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpz_init (base);\n  mpz_init (exp);\n  mpz_init (mod);\n  mpz_init (r1);\n  mpz_init (r2);\n  mpz_init (base2);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 13 + 2;\n\n      do  /* Loop until mathematically well-defined.  */\n\t{\n\t  mpz_urandomb (bs, rands, size_range);\n\t  base_size = mpz_get_ui (bs);\n\t  mpz_rrandomb (base, rands, base_size);\n\n\t  mpz_urandomb (bs, rands, 6L);\n\t  exp_size = mpz_get_ui (bs);\n\t  mpz_rrandomb (exp, rands, exp_size);\n\t  exp2 = mpz_getlimbn (exp, (mp_size_t) 0);\n\t}\n      while (mpz_cmp_ui (base, 0) == 0 && exp2 == 0);\n\n      do\n        {\n\t  mpz_urandomb (bs, rands, size_range);\n\t  mod_size = mpz_get_ui (bs);\n\t  mpz_rrandomb (mod, rands, mod_size);\n\t}\n      while (mpz_cmp_ui (mod, 0) == 0);\n\n      mpz_urandomb (bs, rands, 2);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (base, base);\n\n      /* printf (\"%ld %ld\\n\", SIZ (base), SIZ (mod)); */\n\n#if 0\n      putc ('\\n', stderr);\n      debug_mp (base, -16);\n      debug_mp (mod, -16);\n#endif\n\n      mpz_powm_ui (r1, base, exp2, mod);\n\n      mpz_set_ui (r2, 1);\n      mpz_set (base2, base);\n\n      mpz_mod (r2, r2, mod);\t/* needed when exp==0 and mod==1 */\n      while (exp2 != 0)\n\t{\n\t  if (exp2 % 2 != 0)\n\t    {\n\t      mpz_mul (r2, r2, base2);\n\t      mpz_mod (r2, r2, mod);\n\t    }\n\t  mpz_mul (base2, base2, base2);\n\t  mpz_mod (base2, base2, mod);\n\t  exp2 = exp2 / 2;\n\t}\n\n#if 0\n      debug_mp (r1, -16);\n      debug_mp (r2, -16);\n#endif\n\n      if (mpz_cmp (r1, r2) != 0)\n\t{\n\t  fprintf (stderr, \"\\ntest %d: Incorrect results for operands:\\n\", i);\n\t  debug_mp (base, -16);\n\t  debug_mp (exp, -16);\n\t  debug_mp (mod, -16);\n\t  fprintf (stderr, \"mpz_powm_ui result:\\n\");\n\t  debug_mp (r1, -16);\n\t  fprintf (stderr, \"reference result:\\n\");\n\t  debug_mp (r2, -16);\n\t  abort ();\n\t}\n    }\n\n  mpz_clear (bs);\n  mpz_clear (base);\n  mpz_clear (exp);\n  mpz_clear (mod);\n  mpz_clear (r1);\n  mpz_clear (r2);\n  mpz_clear (base2);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndump_abort (mpz_t dividend, mpz_t divisor)\n{\n  fprintf (stderr, \"ERROR\\n\");\n  fprintf (stderr, \"dividend = \"); debug_mp (dividend, -16);\n  fprintf (stderr, \"divisor  = \"); debug_mp (divisor, -16);\n  abort();\n}\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x); fputc ('\\n', stderr);\n}\n"
  },
  {
    "path": "tests/mpz/t-pprime_p.c",
    "content": "/* Exercise mpz_probab_prime_p.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* Enhancements:\n\n   - Test some big primes don't come back claimed to be composite.\n   - Test some big composites don't come back claimed to be certainly prime.\n   - Test some big composites with small factors are identified as certainly\n     composite.  */\n\n\n/* return 1 if prime, 0 if composite */\nint\nisprime (long n)\n{\n  long  i;\n\n  n = ABS(n);\n\n  if (n < 2)\n    return 0;\n  if (n == 2)\n    return 1;\n  if ((n & 1) == 0)\n    return 0;\n\n  for (i = 3; i < n; i++)\n    if ((n % i) == 0)\n      return 0;\n\n  return 1;\n}\n\nvoid\ncheck_one (mpz_srcptr n, int want)\n{\n  int  got;\n\n  got = mpz_probab_prime_p (n, 25);\n\n  /* \"definitely prime\" is fine if we only wanted \"probably prime\" */\n  if (got == 2 && want == 1)\n    want = 2;\n\n  if (got != want)\n    {\n      printf (\"mpz_probab_prime_p\\n\");\n      mpz_trace (\"  n    \", n);\n      printf    (\"  got =%d\", got);\n      printf    (\"  want=%d\", want);\n      abort ();\n    }\n}\n\nvoid\ncheck_pn (mpz_ptr n, int want)\n{\n  check_one (n, want);\n  mpz_neg (n, n);\n  check_one (n, want);\n}\n\n/* expect certainty for small n */\nvoid\ncheck_small (void)\n{\n  mpz_t  n;\n  long   i;\n\n  mpz_init (n);\n\n  for (i = 0; i < 300; i++)\n    {\n      mpz_set_si (n, i);\n      check_pn (n, isprime (i));\n    }\n\n  mpz_clear (n);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_small ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-primorial_ui.c",
    "content": "/* Exercise mpz_primorial_ui.\n\nCopyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library test suite.\n\nThe GNU MP Library test suite is free software; you can redistribute it\nand/or modify it under the terms of the GNU General Public License as\npublished by the Free Software Foundation; either version 3 of the License,\nor (at your option) any later version.\n\nThe GNU MP Library test suite is distributed in the hope that it will be\nuseful, but WITHOUT ANY WARRANTY; without even the implied warranty of\nMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General\nPublic License for more details.\n\nYou should have received a copy of the GNU General Public License along with\nthe GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* Usage: t-primorial_ui [x|num]\n\n   With no arguments testing goes up to the initial value of \"limit\" below.\n   With a number argument tests are carried that far, or with a literal \"x\"\n   tests are continued without limit (this being meant only for development\n   purposes).  */\n\nstatic int isprime (unsigned long int t);\n\nint\nmain (int argc, char *argv[])\n{\n  unsigned long  n;\n  unsigned long  limit = 2222;\n  mpz_t          f, r;\n\n  tests_start ();\n\n  if (argc > 1 && argv[1][0] == 'x')\n    limit = ULONG_MAX;\n  else if (argc > 1)\n    limit = atoi (argv[1]);\n\n  /* for small limb testing */\n  limit = MIN (limit, MP_LIMB_T_MAX);\n\n  mpz_init_set_ui (f, 1);  /* 0# = 1 */\n  mpz_init (r);\n\n  for (n = 0; n < limit; n++)\n    {\n      mpz_primorial_ui (r, n);\n      MPZ_CHECK_FORMAT (r);\n\n      if (mpz_cmp (f, r) != 0)\n        {\n          printf (\"mpz_primorial_ui(%lu) wrong\\n\", n);\n          printf (\"  got  \"); mpz_out_str (stdout, 10, r); printf(\"\\n\");\n          printf (\"  want \"); mpz_out_str (stdout, 10, f); printf(\"\\n\");\n          abort ();\n        }\n\n      if (isprime (n+1))\n\tmpz_mul_ui (f, f, n+1);  /* p# = (p-1)# * (p) */\n    }\n\n  mpz_clear (f);\n  mpz_clear (r);\n\n  tests_end ();\n\n  exit (0);\n}\n\nstatic int\nisprime (unsigned long int t)\n{\n  unsigned long int q, r, d;\n\n  if (t < 3 || (t & 1) == 0)\n    return t == 2;\n\n  for (d = 3, r = 1; r != 0; d += 2)\n    {\n      q = t / d;\n      r = t - q * d;\n      if (q < d)\n\treturn 1;\n    }\n  return 0;\n}\n"
  },
  {
    "path": "tests/mpz/t-root.c",
    "content": "/* Test mpz_add, mpz_add_ui, mpz_cmp, mpz_cmp, mpz_mul, mpz_sqrtrem.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid debug_mp(mpz_t, int);\n\n\nvoid\nmpz_add_si (mpz_ptr x, mpz_srcptr y, signed long z)\n{\n  if (z >= 0)\n    {\n      mpz_add_ui (x, y, z);\n      return;\n    }\n  mpz_sub_ui (x, y, -z);\n  return;\n}\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t x2;\n  mpz_t root1, root2, root3, rem2;\n  mpz_t temp, temp2;\n  mp_size_t x2_size;\n  int i, r1, j, k;\n  int reps = 1000;\n  unsigned long nth;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  unsigned long bsi, size_range;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n    reps = atoi (argv[1]);\n\n  mpz_init (x2);\n  mpz_init (root1);\n  mpz_init (root2);\n  mpz_init (root3);\n  mpz_init (rem2);\n  mpz_init (temp);\n  mpz_init (temp2);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 12 + 2;\n\n      mpz_urandomb (bs, rands, size_range);\n      x2_size = mpz_get_ui (bs) + 10;\n      mpz_rrandomb (x2, rands, x2_size);\n\n      mpz_urandomb (bs, rands, 15);\n      nth = mpz_getlimbn (bs, 0) % mpz_sizeinbase (x2, 2) + 2;\n\n      mpz_root (root1, x2, nth);\n      mpz_nthroot (root3, x2, nth);\n      if (mpz_cmp (root1, root3) != 0)\n      {\n         printf (\"nthroot to root mismatch\\n\");\n         abort ();\n      }\n\n      mpz_urandomb (bs, rands, 4);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\t{\n\t  /* With 50% probability, set x2 near a perfect power.  */\n\t  mpz_pow_ui (x2, root1, nth);\n\t  if ((bsi & 2) != 0)\n\t    {\n\t      mpz_sub_ui (x2, x2, bsi >> 2);\n\t      mpz_abs (x2, x2);\n\t    }\n\t  else\n\t    mpz_add_ui (x2, x2, bsi >> 2);\n\t  mpz_root (root1, x2, nth);\n      mpz_nthroot (root3, x2, nth);\n      if (mpz_cmp (root1, root3) != 0)\n      {\n         printf (\"nthroot to root mismatch\\n\");\n         abort ();\n      }\n   }\n\n      /* printf (\"%ld %lu\\n\", SIZ (x2), nth); */\n\n      mpz_rootrem (root2, rem2, x2, nth);\n      mpz_pow_ui (temp, root1, nth);\n      mpz_add (temp2, temp, rem2);\n\n      /* Is power of result > argument?  */\n      if (mpz_cmp (root1, root2) != 0 || mpz_cmp (x2, temp2) != 0\n\t  || mpz_cmp (temp, x2) > 0)\n\t{\n\t  fprintf (stderr, \"ERROR after test %d\\n\", i);\n\t  debug_mp (x2, 10);\n\t  debug_mp (root1, 10);\n\t  debug_mp (root2, 10);\n\t  fprintf (stderr, \"nth: %lu\\n\", nth);\n\t  abort ();\n\t}\n\n      if (nth > 1 && mpz_cmp_ui (temp, 1L) > 0 && !mpz_perfect_power_p (temp))\n\t{\n\t  fprintf (stderr, \"ERROR in mpz_perfect_power_p after test %d\\n\", i);\n\t  debug_mp (temp, 10);\n\t  debug_mp (root1, 10);\n\t  fprintf (stderr, \"nth: %lu\\n\", nth);\n\t  abort ();\n\t}\n\n      if (nth > 10000)\n\tcontinue;\t\t/* skip too expensive test */\n\n      mpz_add_ui (temp2, root1, 1L);\n      mpz_pow_ui (temp2, temp2, nth);\n\n      /* Is square of (result + 1) <= argument?  */\n      if (mpz_cmp (temp2, x2) <= 0)\n\t{\n\t  fprintf (stderr, \"ERROR after test %d\\n\", i);\n\t  debug_mp (x2, 10);\n\t  debug_mp (root1, 10);\n\t  fprintf (stderr, \"nth: %lu\\n\", nth);\n\t  abort ();\n\t}\n    }\n  for (j = 0; j < reps; j++)\n    {\t\t\t\t// pick a number bs and a power nth\n      mpz_urandomb (bs, rands, GMP_LIMB_BITS);\n      nth = mpz_get_ui (bs) % 30 + 1;\n      mpz_urandomb (bs, rands, GMP_LIMB_BITS);\n      x2_size = mpz_get_ui (bs) % 500;\n      mpz_urandomb (bs, rands, x2_size);\n      mpz_pow_ui (temp, bs, nth);\n      for (k = 0; k < 2; k++)\n\t{\n\t  if (k != 0)\n\t    mpz_neg (temp, temp);\n\t  for (i = -10; i < 10; i++)\n\t    {\n\t      mpz_add_si (temp2, temp, i);\n\t      if (nth % 2 == 0 && mpz_cmp_ui (temp2, 0) < 0)\n\t\tcontinue;\n\t      //printf(\"i is %d nth is %d\\n\",i,nth);\n          mpz_nthroot (root1, temp2, nth);\n          r1 = mpz_root (root2, temp2, nth);\n\t      mpz_rootrem (root3, rem2, temp2, nth);\n          if (mpz_cmp (root1, root2) != 0)\n          {\n             printf (\"root12 mismatch\\n\");\n             abort ();\n          }\n          if (mpz_cmp (root2, root3) != 0)\n\t\t{\n\t\t  printf (\"root23 mismatch\\n\");\n\t\t  gmp_fprintf (stderr, \"root2 = %ZX\\n\", root2);\n\t\t  gmp_fprintf (stderr, \"root3 = %ZX\\n\", root3);\n\t\t  abort ();\n\t\t}\n\t      mpz_pow_ui (root3, root2, nth);\n\t      mpz_sub (root3, temp2, root3);\n\t      //printf(\"bs is \");mpz_out_str(stdout,10,bs);printf(\"\\n\");\n\t      //printf(\"temp2 is \");mpz_out_str(stdout,10,temp2);printf(\"\\n\");\n\t      //printf(\"root is \");mpz_out_str(stdout,10,root1);printf(\"\\n\");\n\t      //printf(\"rem is \");mpz_out_str(stdout,10,rem2);printf(\"\\n\");\n\t      if (mpz_cmp (root3, rem2) != 0)\n\t\t{\n\t\t  printf (\"rootrem mismatch\\n\");\n\t\t  gmp_fprintf (stderr, \"val = %ZX ^ (1 / %ld)\\n\", temp2, nth);\n\t\t  gmp_fprintf (stderr, \"root = %ZX\\n\", root2);\n\t\t  gmp_fprintf (stderr, \"rem2 = %ZX\\n\", root3);\n\t\t  gmp_fprintf (stderr, \"rem3 = %ZX\\n\", rem2);\n\t\t  abort ();\n\t\t}\n\t      if (r1 && mpz_cmp_ui (rem2, 0) != 0)\n\t\t{\n\t\t  printf (\"rootexact1 mismatch %d\\n\", r1);\n\t\t  abort ();\n\t\t}\n\t      if (!r1 && mpz_cmp_ui (rem2, 0) == 0)\n\t\t{\n\t\t  printf (\"rootexact2 mismatch\\n\");\n\t\t  abort ();\n\t\t}\n\t      mpz_add_ui (root2, root2, 1);\n\t      mpz_pow_ui (root2, root2, nth);\n\t      if (mpz_cmp (root2, temp2) <= 0)\n\t\t{\n\t\t  printf (\"root wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t      if (!r1 && i == 0)\n\t\t{\n\t\t  printf (\"rootexact2 wrong\\n\");\n\t\t  abort ();\n\t\t}\n\t    }\n\t}\n    }\n  // check small values and stupid stuff\n  for (k = 0; k < 2; k++)\n    {\n      for (nth = 1; nth < 3 * reps; nth++)\n\t{\n\t  if (k % 2 != 0 && nth % 2 == 0)\n\t    continue;\n\t  for (i = 0; i < 2 * reps; i++)\n\t    {\n\t      mpz_set_ui (temp, i);\n\t      if (k % 2 != 0)\n\t\tmpz_neg (temp, temp);\n\t      mpz_rootrem (root1, rem2, temp, nth);\n\t      mpz_pow_ui (temp2, root1, nth);\n\t      mpz_add (temp2, temp2, rem2);\n\t      if (mpz_cmp (temp2, temp) != 0)\n\t\t{\n\t\t  printf (\"root small mismatch\\n\");\n\t\t  abort ();\n\t\t}\n\t    }\n\t}\n    }\n\n  /* test case reported by Alex Dyachenko */\n    {\n  mpz_t a, cube;\n  \n  mpz_init_set_str(a, \"8984948281360922385394772450147012613851354303\", 10);\n  \n  mpz_init(cube);\n  mpz_mul(cube, a, a);\n  mpz_mul(cube, cube, a);\n\n  mpz_set(root1, cube);\n  mpz_nthroot(root1, root1, 3);\n  \n  mpz_nthroot(root2, cube, 3);\n\n  if (mpz_cmp(root1, root2) != 0)\n    {\n  printf(\"nthroot aliasing failed\\n\");\n  abort();\n    }\n\n  mpz_clear(cube);\n  mpz_clear(a);\n    }\n\n  mpz_clear (bs);\n  mpz_clear (x2);\n  mpz_clear (root1);\n  mpz_clear (root2);\n  mpz_clear (root3);\n  mpz_clear (rem2);\n  mpz_clear (temp);\n  mpz_clear (temp2);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x);\n  fputc ('\\n', stderr);\n}\n"
  },
  {
    "path": "tests/mpz/t-scan.c",
    "content": "/* Tests of mpz_scan0 and mpz_scan1.\n\nCopyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nunsigned long\nrefmpz_scan (mpz_srcptr z, unsigned long i, int sought)\n{\n  unsigned long  z_bits = (unsigned long) ABSIZ(z) * GMP_NUMB_BITS;\n\n  do\n    {\n      if (mpz_tstbit (z, i) == sought)\n        return i;\n      i++;\n    }\n  while (i <= z_bits);\n\n  return ULONG_MAX;\n}\n\nunsigned long\nrefmpz_scan0 (mpz_srcptr z, unsigned long starting_bit)\n{\n  return refmpz_scan (z, starting_bit, 0);\n}\n\nunsigned long\nrefmpz_scan1 (mpz_srcptr z, unsigned long starting_bit)\n{\n  return refmpz_scan (z, starting_bit, 1);\n}\n\n\nvoid\ncheck_ref (void)\n{\n  static const int offset[] = {\n    -2, -1, 0, 1, 2, 3\n  };\n\n  mpz_t          z;\n  int            test, neg, sought, oindex, o;\n  mp_size_t      size, isize;\n  unsigned long  start, got, want;\n  gmp_randstate_t rands;\n\n  mpz_init (z);\n  gmp_randinit_default(rands);\n  for (test = 0; test < 5; test++)\n    {\n      for (size = 0; size < 5; size++)\n        {\n          mpz_urandomb (z, rands, (unsigned long) (ABS (size) * GMP_NUMB_BITS));\n          if (size < 0)SIZ(z) = -SIZ(z);               \n\n          for (neg = 0; neg <= 1; neg++)\n            {\n              if (neg)\n                mpz_neg (z, z);\n\n              for (isize = 0; isize <= size; isize++)\n                {\n                  for (oindex = 0; oindex < numberof (offset); oindex++)\n                    {\n                      o = offset[oindex];\n                      if ((int) isize*GMP_NUMB_BITS < -o)\n                        continue;  /* start would be negative */\n\n                      start = isize*GMP_NUMB_BITS + o;\n\n                      for (sought = 0; sought <= 1; sought++)\n                        {\n                          if (sought == 0)\n                            {\n                              got = mpz_scan0 (z, start);\n                              want = refmpz_scan0 (z, start);\n                            }\n                          else\n                            {\n                              got = mpz_scan1 (z, start);\n                              want = refmpz_scan1 (z, start);\n                            }\n\n                          if (got != want)\n                            {\n                              printf (\"wrong at test=%d, size=%ld, neg=%d, start=%lu, sought=%d\\n\",\n                                      test, size, neg, start, sought);\n                              printf (\"   z 0x\");\n                              mpz_out_str (stdout, -16, z);\n                              printf (\"\\n\");\n                              printf (\"   got=%lu, want=%lu\\n\", got, want);\n                              exit (1);\n                            }\n                        }\n                    }\n                }\n            }\n        }\n    }\n  mpz_clear (z);gmp_randclear(rands);\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n\n  check_ref ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-set_d.c",
    "content": "/* Test mpz_set_d and mpz_init_set_d.\n\nCopyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    double     d;\n    mp_size_t  want_size;\n    mp_limb_t  want_data[2];\n  } data[] = {\n\n    {  0.0,  0 },\n    {  1.0,  1, { 1 } },\n    { -1.0, -1, { 1 } },\n\n    {  123.0,  1, { 123 } },\n    { -123.0, -1, { 123 } },\n  };\n\n  mpz_t  z;\n  int    i;\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_init (z);\n      mpz_set_d (z, data[i].d);\n      MPZ_CHECK_FORMAT (z);\n      if (z->_mp_size != data[i].want_size\n          || refmpn_cmp_allowzero (z->_mp_d, data[i].want_data,\n                                   ABS (data[i].want_size)) != 0)\n        {\n          printf (\"mpz_set_d wrong on data[%d]\\n\", i);\n        bad:\n          d_trace   (\"  d  \", data[i].d);\n          printf    (\"  got  size %ld\\n\", (long) z->_mp_size);\n          printf    (\"  want size %ld\\n\", (long) data[i].want_size);\n          mpn_trace (\"  got  z\", z->_mp_d, z->_mp_size);\n          mpn_trace (\"  want z\", data[i].want_data, data[i].want_size);\n          abort();\n        }\n      mpz_clear (z);\n\n      mpz_init_set_d (z, data[i].d);\n      MPZ_CHECK_FORMAT (z);\n      if (z->_mp_size != data[i].want_size\n          || refmpn_cmp_allowzero (z->_mp_d, data[i].want_data,\n                                   ABS (data[i].want_size)) != 0)\n        {\n          printf (\"mpz_init_set_d wrong on data[%d]\\n\", i);\n          goto bad;\n        }\n      mpz_clear (z);\n    }\n}\n\n/* Try mpz_set_d on values 2^i+1, while such a value fits a double. */\nvoid\ncheck_2n_plus_1 (void)\n{\n  volatile double  p, d, diff;\n  mpz_t  want, got;\n  int    i;\n\n  mpz_init (want);\n  mpz_init (got);\n\n  p = 1.0;\n  mpz_set_ui (want, 2L);  /* gives 3 on first step */\n\n  for (i = 1; i < 500; i++)\n    {\n      mpz_mul_2exp (want, want, 1L);\n      mpz_sub_ui (want, want, 1L);   /* want = 2^i+1 */\n\n      p *= 2.0;  /* p = 2^i */\n      d = p + 1.0;\n      diff = d - p;\n      if (diff != 1.0)\n        break;   /* rounding occurred, stop now */\n\n      mpz_set_d (got, d);\n      MPZ_CHECK_FORMAT (got);\n      if (mpz_cmp (got, want) != 0)\n        {\n          printf (\"mpz_set_d wrong on 2^%d+1\\n\", i);\n          d_trace   (\"  d \", d);\n          mpz_trace (\"  got  \", got);\n          mpz_trace (\"  want \", want);\n          abort ();\n        }\n    }\n\n  mpz_clear (want);\n  mpz_clear (got);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n  check_2n_plus_1 ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-set_f.c",
    "content": "/* Test mpz_set_f.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (mpz_srcptr z)\n{\n  static const int shift[] = {\n    0, 1, BITS_PER_MP_LIMB, 2*BITS_PER_MP_LIMB, 5*BITS_PER_MP_LIMB\n  };\n\n  int    sh, shneg, neg;\n  mpf_t  f;\n  mpz_t  got, want;\n\n  mpf_init2 (f, mpz_sizeinbase(z,2));\n  mpz_init (got);\n  mpz_init (want);\n\n  for (sh = 0; sh < numberof(shift); sh++)\n    {\n      for (shneg = 0; shneg <= 1; shneg++)\n        {\n          for (neg = 0; neg <= 1; neg++)\n            {\n              mpf_set_z (f, z);\n              mpz_set (want, z);\n            \n              if (neg)\n                {\n                  mpf_neg (f, f);\n                  mpz_neg (want, want);\n                }\n\n              if (shneg)\n                {\n                  mpz_tdiv_q_2exp (want, want, shift[sh]);\n                  mpf_div_2exp (f, f, shift[sh]);\n                }\n              else\n                {\n                  mpz_mul_2exp (want, want, shift[sh]);\n                  mpf_mul_2exp (f, f, shift[sh]);\n                }\n\n              mpz_set_f (got, f);\n              MPZ_CHECK_FORMAT (got);\n\n              if (mpz_cmp (got, want) != 0)\n                {\n                  printf (\"wrong result\\n\");\n                  printf (\"  shift  %d\\n\", shneg ? -shift[sh] : shift[sh]);\n                  printf (\"  neg    %d\\n\", neg);\n                  mpf_trace (\"     f\", f);\n                  mpz_trace (\"   got\", got);\n                  mpz_trace (\"  want\", want);\n                  abort ();\n                }\n            }\n        }\n    }\n\n  mpf_clear (f);\n  mpz_clear (got);\n  mpz_clear (want);\n}\n\n\nvoid\ncheck_various (void)\n{gmp_randstate_t rands;\n  mpz_t  z;\n\n  mpz_init (z);\n  gmp_randinit_default(rands);\n\n  mpz_set_ui (z, 0L);\n  check_one (z);\n\n  mpz_set_si (z, 123L);\n  check_one (z);\n\n  mpz_rrandomb (z, rands, 2*BITS_PER_MP_LIMB);\n  check_one (z);\n\n  mpz_rrandomb (z, rands, 5*BITS_PER_MP_LIMB);\n  check_one (z);\n\n  mpz_clear (z);\n  gmp_randclear(rands);\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n#if GMP_NAIL_BITS == 0\n  tests_start ();\n  mp_trace_base = 16;\n\n  check_various ();\n\n  tests_end ();\n#endif\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-set_si.c",
    "content": "/* Test mpz_set_si and mpz_init_set_si.\n\nCopyright 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_data (void)\n{\n#if GMP_NUMB_BITS <= BITS_PER_UI\n#define ENTRY(n)   { n, { n, 0 } }\n#else\n#define ENTRY(n)   { n, { (n) & GMP_NUMB_MASK, (n) >> GMP_NUMB_BITS } }\n#endif\n\n  static const struct {\n    mpir_si       n;\n    mp_size_t  want_size;\n    mp_limb_t  want_data[2];\n  } data[] = {\n\n    {  0L,  0 },\n    {  1L,  1, { 1 } },\n    { -1L, -1, { 1 } },\n\n#if GMP_NUMB_BITS >= BITS_PER_UI - 1\n    { GMP_SI_MAX,  1, { GMP_SI_MAX, 0 } },\n    { -GMP_SI_MAX,  -1, { GMP_SI_MAX, 0 } },\n#else\n    { GMP_SI_MAX,  2, { GMP_SI_MAX & GMP_NUMB_MASK, GMP_SI_MAX >> GMP_NUMB_BITS } },\n    { -GMP_SI_MAX,  -2, { GMP_SI_MAX & GMP_NUMB_MASK, GMP_SI_MAX >> GMP_NUMB_BITS } },\n#endif\n\n#if GMP_NUMB_BITS >= BITS_PER_UI\n    { GMP_SI_MIN,  -1, { GMP_UI_HIBIT, 0 } },\n#else\n    { GMP_SI_MIN,  -2, { 0, GMP_UI_HIBIT >> GMP_NUMB_BITS } },\n#endif\n  };\n\n  mpz_t  n;\n  int    i;\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_init (n);\n      mpz_set_si (n, data[i].n);\n      MPZ_CHECK_FORMAT (n);\n      if (n->_mp_size != data[i].want_size\n          || refmpn_cmp_allowzero (n->_mp_d, data[i].want_data,\n                                   ABS (data[i].want_size)) != 0)\n        {\n          printf (\"mpz_set_si wrong on data[%d]\\n\", i);\n          abort();\n        }\n      mpz_clear (n);\n\n      mpz_init_set_si (n, data[i].n);\n      MPZ_CHECK_FORMAT (n);\n      if (n->_mp_size != data[i].want_size\n          || refmpn_cmp_allowzero (n->_mp_d, data[i].want_data,\n                                   ABS (data[i].want_size)) != 0)\n        {\n          printf (\"mpz_init_set_si wrong on data[%d]\\n\", i);\n          abort();\n        }\n      mpz_clear (n);\n    }\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-set_str.c",
    "content": "/* Test mpz_set_str.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_one (mpz_srcptr want, int base, const char *str)\n{\n  mpz_t   got;\n  \n  MPZ_CHECK_FORMAT (want);\n  mp_trace_base = (base == 0 ? 16 : base);\n\n  mpz_init (got);\n\n  if (mpz_set_str (got, str, base) != 0)\n    {\n      printf (\"mpz_set_str unexpectedly failed\\n\");\n      printf (\"  base %d\\n\", base);\n      printf (\"  str  \\\"%s\\\"\\n\", str);\n      abort ();\n    }\n  MPZ_CHECK_FORMAT (got);\n\n  if (mpz_cmp (got, want) != 0)\n    {\n      printf (\"mpz_set_str wrong\\n\");\n      printf (\"  base %d\\n\", base);\n      printf (\"  str  \\\"%s\\\"\\n\", str);\n      mpz_trace (\"got \", got);\n      mpz_trace (\"want\", want);\n      abort ();\n    }\n\n  mpz_clear (got);\n}\n\nvoid\ncheck_samples (void)\n{\n  mpz_t  z;\n\n  mpz_init (z);\n\n  mpz_set_ui (z, 0L);\n  check_one (z, 0, \"0 \");\n  check_one (z, 0, \"0    \");\n  check_one (z, 10, \"0 \");\n  check_one (z, 10, \"0    \");\n  check_one (z, 10, \"0000000    \");\n\n  mpz_set_ui (z, 123L);\n  check_one (z, 0, \"123 \");\n  check_one (z, 0, \"123    \");\n  check_one (z, 10, \"123 \");\n  check_one (z, 10, \"123    \");\n  check_one (z, 0, \" 123 \");\n  check_one (z, 0, \"  123    \");\n  check_one (z, 10, \"  0000123 \");\n  check_one (z, 10, \"  123    \");\n\n  mpz_clear (z);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_samples ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-set_sx.c",
    "content": "/* \nTest mpz_set_sx and mpz_init_set_sx\n\nCopyright 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#if defined( _MSC_VER )\n#  if _MSC_VER < 1600\n#    define SKIP_TEST\n#  else\n#    include <stdint.h>\n#  endif\n#else\n#  include \"config.h\"\n#  ifdef HAVE_STDINT_H\n#    include <stdint.h>\n#  else\n#    define SKIP_TEST\n#  endif\n#endif\n\n#ifdef SKIP_TEST\n\nint\nmain (void)\n{\n  printf (\"(u)intmax_t not available - test skipped\\n\");\n  exit (0);\n}\n\n#else\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid\ncheck_data (void)\n{\n  static const struct {\n    intmax_t n;\n    mp_size_t  want_size;\n    mp_limb_t  want_data[2];\n  } data[] = {\n\n    {  0L,  0 },\n    {  1L,  1, { 1 } },\n    { -1L, -1, { 1 } },\n\n#if GMP_NUMB_BITS >= BITS_PER_UINTMAX\n    {  INTMAX_MAX,   1, { INTMAX_MAX, 0 } },\n    { -INTMAX_MAX,  -1, { INTMAX_MAX, 0 } },\n#else\n    {  INTMAX_MAX,   2, { INTMAX_MAX & GMP_NUMB_MASK, INTMAX_MAX >> GMP_NUMB_BITS } },\n    { -INTMAX_MAX,  -2, { INTMAX_MAX & GMP_NUMB_MASK, INTMAX_MAX >> GMP_NUMB_BITS } },\n#endif\n\n#if GMP_NUMB_BITS >= BITS_PER_UINTMAX\n    { INTMAX_MIN, -1, { INTMAX_MIN, 0 } },\n#else\n    { INTMAX_MIN,  -2, { INTMAX_MIN & GMP_NUMB_MASK, INTMAX_MIN >> GMP_NUMB_BITS } },\n#endif\n  };\n\n  mpz_t  n;\n  int    i;\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      mpz_init (n);\n      mpz_set_sx (n, data[i].n);\n      MPZ_CHECK_FORMAT (n);\n      if (n->_mp_size != data[i].want_size\n          || refmpn_cmp_allowzero (n->_mp_d, data[i].want_data,\n                                   ABS (data[i].want_size)) != 0)\n        {\n          printf (\"mpz_set_sx wrong on data[%d]\\n\", i);\n          abort();\n        }\n      mpz_clear (n);\n\n      mpz_init_set_sx (n, data[i].n);\n      MPZ_CHECK_FORMAT (n);\n      if (n->_mp_size != data[i].want_size\n          || refmpn_cmp_allowzero (n->_mp_d, data[i].want_data,\n                                   ABS (data[i].want_size)) != 0)\n        {\n          printf (\"mpz_init_set_sx wrong on data[%d]\\n\", i);\n          abort();\n        }\n      mpz_clear (n);\n    }\n}\n\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n\n#endif\n"
  },
  {
    "path": "tests/mpz/t-set_ux.c",
    "content": "/* \nTest mpz_set_ux and mpz_init_set_ux (not much use but perhaps better than nothing)\n\nCopyright 2011, Brian Gladman\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#if defined( _MSC_VER )\n#  if _MSC_VER < 1600\n#    define SKIP_TEST\n#  else\n#    include <stdint.h>\n#  endif\n#else\n#  include \"config.h\"\n#  ifdef HAVE_STDINT_H\n#    include <stdint.h>\n#  else\n#    define SKIP_TEST\n#  endif\n#endif\n\n#ifdef SKIP_TEST\n\nint\nmain (void)\n{\n  printf (\"(u)intmax_t not available - test skipped\\n\");\n  exit (0);\n}\n\n#else\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define NLIMBS ((8 * SIZEOF_UINTMAX_T + GMP_NUMB_BITS  - 1) / GMP_NUMB_BITS)\n\nstatic const uintmax_t val[] =\n{\n    0, 1, 0xff, \n#if SIZEOF_UINTMAX_T >= 2\n    0x0100, 0x0101, 0xffff, \n#endif\n#if SIZEOF_UINTMAX_T >= 4     \n    0x10000, 0x10001, 0xffffffff, \n#endif\n#if SIZEOF_UINTMAX_T >= 8\n    0x100000000, 0x100000001, 0xffffffffffffffff,\n#endif\n    UINTMAX_MAX\n};\n\nvoid\ncheck_data (void)\n{   unsigned int i;\n    mpz_t   z;\n\n    mpz_init(z);\n\n    for( i = 0 ; i < sizeof(val) / sizeof(uintmax_t) ; ++i )\n    {\n        uintmax_t k = 0, n = val[i];\n        mpz_set_ux(z, val[i]);\n#if NLIMBS == 1\n        if(n && n != z->_mp_d[k++])\n        {\n            printf(\"mpz_set_ux() failed for data on item %d\\n\", i);\n            abort();\n        }\n#else\n        while(n)\n        {\n            if((n & GMP_NUMB_MASK) != z->_mp_d[k++])\n            {\n                printf(\"mpz_set_ux() failed for data on item %d\\n\", i);\n                abort();\n            }\n            n >>= GMP_NUMB_BITS;\n        }\n#endif\n        if(z->_mp_size != k)\n        {\n            printf(\"mpz_set_ux() failed for length on item %u (mpz size: %d, size: %lu)\\n\", i, z->_mp_size, k);\n            abort();\n        }\n        if(z->_mp_alloc < k)\n        {\n            printf(\"mpz_set_ux() failed for allocation on item %u (mpz alloc: %d, size: %lu)\\n\", i, z->_mp_alloc, k);\n            abort();\n        }\n    }\n    mpz_clear(z);\n\n    for( i = 0 ; i < sizeof(val) / sizeof(uintmax_t) ; ++i )\n    {\n        uintmax_t k = 0, n = val[i];\n        mpz_init_set_ux(z, val[i]);\n#if NLIMBS == 1\n        if(n && n != z->_mp_d[k++])\n        {\n            printf(\"mpz_init_set_ux() failed for data on item %d\\n\", i);\n            abort();\n        }\n#else\n        while(n)\n        {\n            if((n & GMP_NUMB_MASK) != z->_mp_d[k++])\n            {\n                printf(\"mpz_init_set_ux() failed for data on item %d\\n\", i);\n                abort();\n            }\n            n >>= GMP_NUMB_BITS;\n        }\n#endif\n        if(z->_mp_size != k)\n        {\n            printf(\"mpz_init_set_ux() failed for length on item %u (mpz size: %d, size: %lu)\\n\", i, z->_mp_size, k);\n            abort();\n        }\n        if(z->_mp_alloc < k)\n        {\n            printf(\"mpz_init_set_ux() failed for allocation on item %u (mpz alloc: %d, size: %lu)\\n\", i, z->_mp_alloc, k);\n            abort();\n        }\n        mpz_clear(z);\n    }\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_data ();\n\n  tests_end ();\n  exit (0);\n}\n\n#endif\n"
  },
  {
    "path": "tests/mpz/t-sizeinbase.c",
    "content": "/* Test mpz_sizeinbase.\n\nCopyright 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n#if 0\n  /* Disabled due to the bogosity of trying to fake an _mp_d pointer to\n     below an object.  Has been seen to fail on a hppa system and on ia64.  */\n\n\n/* Create a fake mpz consisting of just a single 1 bit, with totbits being\n   the total number of bits, inclusive of that 1 bit.  */\nvoid\nmpz_fake_bits (mpz_ptr z, unsigned long totbits)\n{\n  static mp_limb_t  n;\n  unsigned long     zero_bits, zero_limbs;\n\n  zero_bits = totbits - 1;\n  zero_limbs = zero_bits / GMP_NUMB_BITS;\n  zero_bits %= GMP_NUMB_BITS;\n\n  SIZ(z) = zero_limbs + 1;\n  PTR(z) = (&n) - (SIZ(z) - 1);\n  n = CNST_LIMB(1) << zero_bits;\n\n  ASSERT_ALWAYS (mpz_sizeinbase (z, 2) == totbits);\n}\n\n\n/* This was seen to fail on a GNU/Linux powerpc32 with gcc 2.95.2,\n   apparently due to a doubtful value of mp_bases[10].chars_per_bit_exactly\n   (0X1.34413509F79FDP-2 whereas 0X1.34413509F79FFP-2 is believed correct).\n   Presumably this is a glibc problem when gcc converts the decimal string\n   in mp_bases.c, or maybe it's only a function of the rounding mode during\n   compilation.  */\nvoid\ncheck_sample (void)\n{\n  unsigned long  totbits = 198096465;\n  int        base = 10;\n  size_t     want = 59632979;\n  size_t     got;\n  mpz_t      z;\n\n  mpz_fake_bits (z, totbits);\n  got = mpz_sizeinbase (z, base);\n  if (got != want)\n    {\n      printf (\"mpz_sizeinbase\\n\");\n      printf (\"  base    %d\\n\",  base);\n      printf (\"  totbits %lu\\n\", totbits);\n      printf (\"  got     %u\\n\",  got);\n      printf (\"  want    %u\\n\",  want);\n      abort ();\n    }\n}\n#endif\n\nint\nmain (void)\n{\n  tests_start ();\n\n  /* check_sample (); */\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/mpz/t-sqrtrem.c",
    "content": "/* Test mpz_add, mpz_add_ui, mpz_cmp, mpz_cmp, mpz_mul, mpz_sqrtrem.\n\nCopyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid dump_abort(mpz_t, mpz_t, mpz_t);\nvoid debug_mp(mpz_t, int);\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t x2;\n  mpz_t x, rem;\n  mpz_t temp, temp2;\n  mp_size_t x2_size;\n  int i;\n  int reps = 20000;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  unsigned long size_range;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n    reps = atoi (argv[1]);\n\n  mpz_init (x2);\n  mpz_init (x);\n  mpz_init (rem);\n  mpz_init (temp);\n  mpz_init (temp2);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 12 + 2; /* 0..8191 bit operands */\n\n      mpz_urandomb (bs, rands, size_range);\n      x2_size = mpz_get_ui (bs);\n      mpz_rrandomb (x2, rands, x2_size);\n\n      /* printf (\"%ld\\n\", SIZ (x2)); */\n\n      mpz_sqrtrem (x, rem, x2);\n      mpz_mul (temp, x, x);\n\n      /* Is square of result > argument?  */\n      if (mpz_cmp (temp, x2) > 0)\n\tdump_abort (x2, x, rem);\n\n      mpz_add_ui (temp2, x, 1);\n      mpz_mul (temp2, temp2, temp2);\n\n      /* Is square of (result + 1) <= argument?  */\n      if (mpz_cmp (temp2, x2) <= 0)\n\tdump_abort (x2, x, rem);\n\n      mpz_add (temp2, temp, rem);\n\n      /* Is the remainder wrong?  */\n      if (mpz_cmp (x2, temp2) != 0)\n\tdump_abort (x2, x, rem);\n    }\n\n  mpz_clear (bs);\n  mpz_clear (x2);\n  mpz_clear (x);\n  mpz_clear (rem);\n  mpz_clear (temp);\n  mpz_clear (temp2);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndump_abort (mpz_t x2, mpz_t x, mpz_t rem)\n{\n  fprintf (stderr, \"ERROR\\n\");\n  fprintf (stderr, \"x2        = \"); debug_mp (x2, -16);\n  fprintf (stderr, \"x         = \"); debug_mp (x, -16);\n  fprintf (stderr, \"remainder = \"); debug_mp (rem, -16);\n  abort();\n}\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x); fputc ('\\n', stderr);\n}\n"
  },
  {
    "path": "tests/mpz/t-tdiv.c",
    "content": "/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_tdiv_qr, mpz_tdiv_q,\n   mpz_tdiv_r, mpz_mul.\n\nCopyright 1991, 1993, 1994, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid dump_abort(mpz_t, mpz_t);\nvoid debug_mp(mpz_t, int);\n\nvoid special_tests(void)\n{\n   mpz_t n, d, q, q2, r;\n   \n   mpz_init(n); mpz_init(d);\n   mpz_init(q); mpz_init(q2); mpz_init(r);\n   \n   gmp_sscanf(\"7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\", \"%Zx\", n);\n   gmp_sscanf(\"20000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\", \"%Zx\", d);\n\n   mpz_tdiv_q(q, n, d);\n   mpz_tdiv_qr(q2, r, n, d);\n   \n   if (mpz_cmp(q, q2) != 0)\n   {\n      fprintf (stderr, \"ERROR\\n\");\n      fprintf (stderr, \"dividend = \"); debug_mp (n, -16);\n      fprintf (stderr, \"divisor  = \"); debug_mp (d, -16);\n      fprintf (stderr, \"q1 = \"); debug_mp (q, -16);\n      fprintf (stderr, \"q2  = \"); debug_mp (q2, -16);\n      abort();\n   }\n   \n   gmp_sscanf(\"3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\", \"%Zx\", n);\n   gmp_sscanf(\"400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\", \"%Zx\", d);\n   \n   mpz_tdiv_q(q, n, d);\n   mpz_tdiv_qr(q2, r, n, d);\n   \n   if (mpz_cmp(q, q2) != 0)\n   {\n      fprintf (stderr, \"ERROR\\n\");\n      fprintf (stderr, \"dividend = \"); debug_mp (n, -16);\n      fprintf (stderr, \"divisor  = \"); debug_mp (d, -16);\n      fprintf (stderr, \"q1 = \"); debug_mp (q, -16);\n      fprintf (stderr, \"q2  = \"); debug_mp (q2, -16);\n      abort();\n   }\n   \n   mpz_clear(n); mpz_clear(d);\n   mpz_clear(q); mpz_clear(q2); mpz_clear(r);\n}\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t dividend, divisor;\n  mpz_t quotient, remainder;\n  mpz_t quotient2, remainder2;\n  mpz_t temp;\n  mp_size_t dividend_size, divisor_size;\n  int i;\n  int reps = 1000;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  unsigned long bsi, size_range;\n\n  tests_start ();\n\n  special_tests();\n\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpz_init (dividend);\n  mpz_init (divisor);\n  mpz_init (quotient);\n  mpz_init (remainder);\n  mpz_init (quotient2);\n  mpz_init (remainder2);\n  mpz_init (temp);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 16 + 2; /* 0..131071 bit operands */\n\n      do\n\t{\n\t  mpz_urandomb (bs, rands, size_range);\n\t  divisor_size = mpz_get_ui (bs);\n\t  mpz_rrandomb (divisor, rands, divisor_size);\n\t}\n      while (mpz_sgn (divisor) == 0);\n\n      mpz_urandomb (bs, rands, size_range);\n      dividend_size = mpz_get_ui (bs) + divisor_size;\n      mpz_rrandomb (dividend, rands, dividend_size);\n\n      mpz_urandomb (bs, rands, 2);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (dividend, dividend);\n      if ((bsi & 2) != 0)\n\tmpz_neg (divisor, divisor);\n\n      /* printf (\"%ld %ld\\n\", SIZ (dividend), SIZ (divisor)); */\n\n      mpz_tdiv_qr (quotient, remainder, dividend, divisor);\n      mpz_tdiv_q (quotient2, dividend, divisor);\n      mpz_tdiv_r (remainder2, dividend, divisor);\n\n      /* First determine that the quotients and remainders computed\n\t with different functions are equal.  */\n      if (mpz_cmp (quotient, quotient2) != 0)\n   dump_abort (dividend, divisor);\n      if (mpz_cmp (remainder, remainder2) != 0)\n\tdump_abort (dividend, divisor);\n\n      /* Check if the sign of the quotient is correct.  */\n      if (mpz_cmp_ui (quotient, 0) != 0)\n\tif ((mpz_cmp_ui (quotient, 0) < 0)\n\t    != ((mpz_cmp_ui (dividend, 0) ^ mpz_cmp_ui (divisor, 0)) < 0))\n\tdump_abort (dividend, divisor);\n\n      /* Check if the remainder has the same sign as the dividend\n\t (quotient rounded towards 0).  */\n      if (mpz_cmp_ui (remainder, 0) != 0)\n\tif ((mpz_cmp_ui (remainder, 0) < 0) != (mpz_cmp_ui (dividend, 0) < 0))\n\t  dump_abort (dividend, divisor);\n\n      mpz_mul (temp, quotient, divisor);\n      mpz_add (temp, temp, remainder);\n      if (mpz_cmp (temp, dividend) != 0)\n\tdump_abort (dividend, divisor);\n\n      mpz_abs (temp, divisor);\n      mpz_abs (remainder, remainder);\n      if (mpz_cmp (remainder, temp) >= 0)\n\tdump_abort (dividend, divisor);\n    }\n\n  mpz_clear (bs);\n  mpz_clear (dividend);\n  mpz_clear (divisor);\n  mpz_clear (quotient);\n  mpz_clear (remainder);\n  mpz_clear (quotient2);\n  mpz_clear (remainder2);\n  mpz_clear (temp);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndump_abort (mpz_t dividend, mpz_t divisor)\n{\n  fprintf (stderr, \"ERROR\\n\");\n  printf(\"nn = %ld, dn = %ld\\n\", mpz_size(dividend), mpz_size(divisor));\n  fprintf (stderr, \"dividend = \"); debug_mp (dividend, -16);\n  fprintf (stderr, \"divisor  = \"); debug_mp (divisor, -16);\n  abort();\n}\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x); fputc ('\\n', stderr);\n}\n"
  },
  {
    "path": "tests/mpz/t-tdiv_ui.c",
    "content": "/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_tdiv_qr_ui, mpz_tdiv_q_ui,\n   mpz_tdiv_r_ui, mpz_tdiv_ui, mpz_mul_ui.\n\nCopyright 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid dump_abort(char *, mpz_t, mpir_ui);\nvoid debug_mp(mpz_t, int);\n\nint\nmain (int argc, char **argv)\n{\n  mpz_t dividend;\n  mpz_t quotient, remainder;\n  mpz_t quotient2, remainder2;\n  mpz_t temp;\n  mp_size_t dividend_size;\n  mpir_ui divisor;\n  int i;\n  int reps = 10000;\n  gmp_randstate_t rands;\n  mpz_t bs;\n  mpir_ui bsi, size_range;\n  mpir_ui r_rq, r_q, r_r, r;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n\n  mpz_init (bs);\n\n  if (argc == 2)\n     reps = atoi (argv[1]);\n\n  mpz_init (dividend);\n  mpz_init (quotient);\n  mpz_init (remainder);\n  mpz_init (quotient2);\n  mpz_init (remainder2);\n  mpz_init (temp);\n\n  for (i = 0; i < reps; i++)\n    {\n      mpz_urandomb (bs, rands, 32);\n      size_range = mpz_get_ui (bs) % 10 + 2; /* 0..2047 bit operands */\n\n      do\n\t{\n\t  mpz_rrandomb (bs, rands, 64);\n\t  divisor = mpz_get_ui (bs);\n\t}\n      while (divisor == 0);\n\n      mpz_urandomb (bs, rands, size_range);\n      dividend_size = mpz_get_ui (bs);\n      mpz_rrandomb (dividend, rands, dividend_size);\n\n      mpz_urandomb (bs, rands, 2);\n      bsi = mpz_get_ui (bs);\n      if ((bsi & 1) != 0)\n\tmpz_neg (dividend, dividend);\n\n      /* printf (\"%ld\\n\", SIZ (dividend)); */\n\n      r_rq = mpz_tdiv_qr_ui (quotient, remainder, dividend, divisor);\n      r_q = mpz_tdiv_q_ui (quotient2, dividend, divisor);\n      r_r = mpz_tdiv_r_ui (remainder2, dividend, divisor);\n      r = mpz_tdiv_ui (dividend, divisor);\n\n      /* First determine that the quotients and remainders computed\n\t with different functions are equal.  */\n      if (mpz_cmp (quotient, quotient2) != 0)\n\tdump_abort (\"quotients from mpz_tdiv_qr_ui and mpz_tdiv_q_ui differ\",\n\t\t    dividend, divisor);\n      if (mpz_cmp (remainder, remainder2) != 0)\n\tdump_abort (\"remainders from mpz_tdiv_qr_ui and mpz_tdiv_r_ui differ\",\n\t\t    dividend, divisor);\n\n      /* Check if the sign of the quotient is correct.  */\n      if (mpz_cmp_ui (quotient, 0) != 0)\n\tif ((mpz_cmp_ui (quotient, 0) < 0)\n\t    != (mpz_cmp_ui (dividend, 0) < 0))\n\tdump_abort (\"quotient sign wrong\", dividend, divisor);\n\n      /* Check if the remainder has the same sign as the dividend\n\t (quotient rounded towards 0).  */\n      if (mpz_cmp_ui (remainder, 0) != 0)\n\tif ((mpz_cmp_ui (remainder, 0) < 0) != (mpz_cmp_ui (dividend, 0) < 0))\n\t  dump_abort (\"remainder sign wrong\", dividend, divisor);\n\n      mpz_mul_ui (temp, quotient, divisor);\n      mpz_add (temp, temp, remainder);\n      if (mpz_cmp (temp, dividend) != 0)\n\tdump_abort (\"n mod d != n - [n/d]*d\", dividend, divisor);\n\n      mpz_abs (remainder, remainder);\n      if (mpz_cmp_ui (remainder, divisor) >= 0)\n\tdump_abort (\"remainder greater than divisor\", dividend, divisor);\n\n      if (mpz_cmp_ui (remainder, r_rq) != 0)\n\tdump_abort (\"remainder returned from mpz_tdiv_qr_ui is wrong\",\n\t\t    dividend, divisor);\n      if (mpz_cmp_ui (remainder, r_q) != 0)\n\tdump_abort (\"remainder returned from mpz_tdiv_q_ui is wrong\",\n\t\t    dividend, divisor);\n      if (mpz_cmp_ui (remainder, r_r) != 0)\n\tdump_abort (\"remainder returned from mpz_tdiv_r_ui is wrong\",\n\t\t    dividend, divisor);\n      if (mpz_cmp_ui (remainder, r) != 0)\n\tdump_abort (\"remainder returned from mpz_tdiv_ui is wrong\",\n\t\t    dividend, divisor);\n    }\n\n  mpz_clear (bs);\n  mpz_clear (dividend);\n  mpz_clear (quotient);\n  mpz_clear (remainder);\n  mpz_clear (quotient2);\n  mpz_clear (remainder2);\n  mpz_clear (temp);\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n\nvoid\ndump_abort (char *str, mpz_t dividend, mpir_ui divisor)\n{\n  fprintf (stderr, \"ERROR: %s\\n\", str);\n  fprintf (stderr, \"dividend = \"); debug_mp (dividend, -16);\n  fprintf (stderr, \"divisor  = %lX\\n\", divisor);\n  abort();\n}\n\nvoid\ndebug_mp (mpz_t x, int base)\n{\n  mpz_out_str (stderr, base, x); fputc ('\\n', stderr);\n}\n"
  },
  {
    "path": "tests/mpz/t-trial_division.c",
    "content": "\n/* Exercise mpz_trial_division\n\n  Copyright 2009 Jason Moxham\n\n  This file is part of the MPIR Library.\n\n  The MPIR Library is free software; you can redistribute it and/or modify\n  it under the terms of the GNU Lesser General Public License as published\n  by the Free Software Foundation; either version 2.1 of the License, or (at\n  your option) any later version.\n\n  The MPIR Library is distributed in the hope that it will be useful, but\n  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n  License for more details.\n\n  You should have received a copy of the GNU Lesser General Public License\n  along with the MPIR Library; see the file COPYING.LIB.  If not, write\n  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,\n  Boston, MA 02110-1301, USA.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nunsigned long\ntrial_divref (mpz_t n, unsigned long start, unsigned long stop)\n{\n    unsigned long i;\n\n    if (start <= 1)\n        start = 2;\n    for (i = start; i < stop; i++)\n        if (mpz_divisible_ui_p (n, i))\n            return i;\n    return 0;\n}\n\nint\nmain (void)\n{\n    long i, d1, d2, start, stop;\n    int correct;\n    mpz_t n;\n\n    tests_start ();\n\n    mpz_init (n);\n    for (i = 2; i < 300; i++)\n    {\n        mpz_set_ui (n, i);\n        for (start = 0; start < i + 10; start++)\n        {\n            correct = 1;\n            if (trial_divref (n, 0, start) != 0)\n                correct = 0;    /* n has divisor < start so answer is not fully correct */\n            for (stop = 0; stop < i + 10; stop++)\n            {\n                d1 = mpz_trial_division (n, start, stop);\n                if (correct)\n                {\n                    d2 = trial_divref (n, start, stop);\n                    if (d1 != d2)\n                    {\n                        printf\n                            (\"trial div mismatch %lu %lu %lu %lu %ld\\n\",\n                             d1, d2, start, stop, i);\n                        abort ();\n                    }\n                }\n                else\n                {\n                    if (d1 != 0 && !mpz_divisible_ui_p (n, d1))\n                    {\n                        printf\n                            (\"trial div divisor does not divide %lu %lu %lu %ld\\n\",\n                             d1, start, stop, i);\n                        abort ();\n                    }\n                }\n            }\n        }\n    }\n\n    for (i = 2; i < 300; i++)\n    {\n        mpz_set_si (n, -i);\n        for (start = 0; start < i + 10; start++)\n        {\n            correct = 1;\n            if (trial_divref (n, 0, start) != 0)\n                correct = 0;    /* n has divisor <start so answer is not fully correct */\n            for (stop = 0; stop < i + 10; stop++)\n            {\n                d1 = mpz_trial_division (n, start, stop);\n                if (correct)\n                {\n                    d2 = trial_divref (n, start, stop);\n                    if (d1 != d2)\n                    {\n                        printf\n                            (\"trial div mismatch %lu %lu %lu %lu %ld\\n\",\n                             d1, d2, start, stop, i);\n                        abort ();\n                    }\n                }\n                else\n                {\n                    if (d1 != 0 && !mpz_divisible_ui_p (n, d1))\n                    {\n                        printf\n                            (\"trial div divisor does not divide %lu %lu %lu %ld\\n\",\n                             d1, start, stop, i);\n                        abort ();\n                    }\n                }\n            }\n        }\n    }\n\n    mpz_clear (n);\n    tests_end ();\n    exit (0);\n}\n"
  },
  {
    "path": "tests/rand/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\nAM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/tests\nLDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmpir.la\n\ncheck_PROGRAMS = t-iset t-lc2exp t-mt t-rand t-urbui t-urmui t-urndmm \nTESTS = $(check_PROGRAMS)\n\nEXTRA_PROGRAMS = findlc gen gen.static spect stat\ngen_static_SOURCES = gen.c\ngen_static_LDFLAGS = -static\nfindlc_LDADD = libstat.la\nspect_LDADD = libstat.la\nstat_LDADD = libstat.la\n\nEXTRA_LTLIBRARIES = libstat.la\nlibstat_la_SOURCES = gmpstat.h statlib.c zdiv_round.c\nlibstat_la_LIBADD = $(top_builddir)/libmpir.la $(LIBM)\n\nCLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES)\n\nallprogs: $(EXTRA_PROGRAMS)\n\n$(top_builddir)/tests/libtests.la:\n\tcd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la\n\nmanual-test: gen$(EXEEXT) stat$(EXEEXT)\n\t@(echo -n '16i: '; ./gen -f mpz_urandomb -z 16 1000 \\\n\t\t| ./stat -i 0xffff | grep '^[0-9]')\n\t@(echo -n '32i: '; ./gen -f mpz_urandomb -z 32 1000 \\\n\t\t| ./stat -i 0xffffffff  | grep '^[0-9]')\n\t@(echo -n '33i: '; ./gen -f mpz_urandomb -z 33 1000 \\\n\t\t| ./stat -i 0x1ffffffff  | grep '^[0-9]')\n\t@(echo -n '64i: '; ./gen -f mpz_urandomb -z 64 1000 \\\n\t\t| ./stat -i 0xffffffffffffffff  | grep '^[0-9]')\n\t@(echo -n '128i: '; ./gen -f mpz_urandomb -z 128 1000 \\\n\t\t| ./stat -i 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF | grep '^[0-9]')\n\n\t@(echo -n '16f: '; ./gen -f mpf_urandomb -z 16 1000 \\\n\t\t| ./stat | grep '^[0-9]')\n\t@(echo -n '32f: '; ./gen -f mpf_urandomb -z 32 1000 \\\n\t\t| ./stat | grep '^[0-9]')\n\t@(echo -n '33f: '; ./gen -f mpf_urandomb -z 33 1000 \\\n\t\t| ./stat | grep '^[0-9]')\n\t@(echo -n '64f: '; ./gen -f mpf_urandomb -z 64 1000 \\\n\t\t| ./stat | grep '^[0-9]')\n\t@(echo -n '128f: '; ./gen -f mpf_urandomb -z 128 1000 \\\n\t\t| ./stat | grep '^[0-9]')\n\nmanual-bigtest: gen$(EXEEXT) stat$(EXEEXT)\n\t@(echo '16i: '; ./gen -f mpz_urandomb -z 16 50000 \\\n\t\t| ./stat -2 1000 -i 0xffff | grep '^K[mp]')\n\t@(echo '32i: '; ./gen -f mpz_urandomb -z 32 50000 \\\n\t\t| ./stat -2 1000 -i 0xffffffff | grep '^K[mp]')\n\t@(echo '33i: '; ./gen -f mpz_urandomb -z 33 50000 \\\n\t\t| ./stat -2 1000 -i 0x1ffffffff | grep '^K[mp]')\n\t@(echo '64i: '; ./gen -f mpz_urandomb -z 64 50000 \\\n\t\t| ./stat -2 1000 -i 0xffffffffffffffff  | grep '^K[mp]')\n\t@(echo '128i: '; ./gen -f mpz_urandomb -z 128 50000 \\\n\t\t| ./stat -2 1000 -i 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF | grep '^K[mp]')\n\n\t@(echo '16f: '; ./gen -f mpf_urandomb -z 16 50000 \\\n\t\t| ./stat -2 1000 | grep '^K[mp]')\n\t@(echo '32f: '; ./gen -f mpf_urandomb -z 32 50000 \\\n\t\t| ./stat -2 1000 | grep '^K[mp]')\n\t@(echo '33f: '; ./gen -f mpf_urandomb -z 33 50000 \\\n\t\t| ./stat -2 1000 | grep '^K[mp]')\n\t@(echo '64f: '; ./gen -f mpf_urandomb -z 64 50000 \\\n\t\t| ./stat -2 1000 | grep '^K[mp]')\n\t@(echo '128f: '; ./gen -f mpf_urandomb -z 128 50000 \\\n\t\t| ./stat -2 1000 | grep '^K[mp]')\n"
  },
  {
    "path": "tests/rand/findlc.c",
    "content": "/*\nCopyright 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <unistd.h>\n#include <signal.h>\n#include <math.h>\n#include \"mpir.h\"\n#include \"gmpstat.h\"\n\n#define RCSID(msg) \\\nstatic /**/const char *const rcsid[] = { (char *)rcsid, \"\\100(#)\" msg }\n\nRCSID(\"$Id: findlc.c,v 1.1.1.1 2006/03/14 15:57:54 tege Exp $\");\n\nint g_debug = 0;\n\nstatic mpz_t a;\n\nstatic void\nsh_status (int sig)\n{\n  printf (\"sh_status: signal %d caught. dumping status.\\n\", sig);\n\n  printf (\"  a = \");\n  mpz_out_str (stdout, 10, a);\n  printf (\"\\n\");\n  fflush (stdout);\n\n  if (SIGSEGV == sig)\t\t/* remove SEGV handler */\n    signal (SIGSEGV, SIG_DFL);\n}\n\n/* Input is a modulus (m).  We shall find multiplyer (a) and adder (c)\n   conforming to the rules found in the first comment block in file\n   mpz/urandom.c.\n\n   Then run a spectral test on the generator and discard any\n   multipliers not passing.  */\n\n/* TODO:\n\n   . find a better algorithm than a+=8; bigger jumps perhaps?\n\n*/\n\nvoid\nmpz_true_random (mpz_t s, unsigned long int nbits)\n{\n#if __FreeBSD__\n  FILE *fs;\n  char c[1];\n  int i;\n\n  mpz_set_ui (s, 0);\n  for (i = 0; i < nbits; i += 8)\n    {\n      for (;;)\n\t{\n\t  int nread;\n\t  fs = fopen (\"/dev/random\", \"r\");\n\t  nread = fread (c, 1, 1, fs);\n\t  fclose (fs);\n\t  if (nread != 0)\n\t    break;\n\t  sleep (1);\n\t}\n      mpz_mul_2exp (s, s, 8);\n      mpz_add_ui (s, s, ((unsigned long int) c[0]) & 0xff);\n      printf (\"%d random bits\\n\", i + 8);\n    }\n  if (nbits % 8 != 0)\n    mpz_mod_2exp (s, s, nbits);\n#endif\n}\n\nint\nmain (int argc, char *argv[])\n{\n  const char usage[] = \"usage: findlc [-dv] m2exp [low_merit [high_merit]]\\n\";\n  int f;\n  int v_lose, m_lose, v_best, m_best;\n  int c;\n  int debug = 1;\n  int cnt_high_merit;\n  mpz_t m;\n  unsigned long int m2exp;\n#define DIMS 6\t\t\t/* dimensions run in spectral test */\n  mpf_t v[DIMS-1];\t\t/* spectral test result (there's no v\n                                   for 1st dimension */\n  mpf_t f_merit, low_merit, high_merit;\n  mpz_t acc, minus8;\n  mpz_t min, max;\n  mpz_t s;\n\n\n  mpz_init (m);\n  mpz_init (a);\n  for (f = 0; f < DIMS-1; f++)\n    mpf_init (v[f]);\n  mpf_init (f_merit);\n  mpf_init_set_d (low_merit, .1);\n  mpf_init_set_d (high_merit, .1);\n\n  while ((c = getopt (argc, argv, \"a:di:hv\")) != -1)\n    switch (c)\n      {\n      case 'd':\t\t\t/* debug */\n\tg_debug++;\n\tbreak;\n\n      case 'v':\t\t\t/* print version */\n\tputs (rcsid[1]);\n\texit (0);\n\n      case 'h':\n      case '?':\n      default:\n\tfputs (usage, stderr);\n\texit (1);\n      }\n\n  argc -= optind;\n  argv += optind;\n\n  if (argc < 1)\n    {\n      fputs (usage, stderr);\n      exit (1);\n    }\n\n  /* Install signal handler. */\n  if (SIG_ERR == signal (SIGSEGV, sh_status))\n    {\n      perror (\"signal (SIGSEGV)\");\n      exit (1);\n    }\n  if (SIG_ERR == signal (SIGHUP, sh_status))\n    {\n      perror (\"signal (SIGHUP)\");\n      exit (1);\n    }\n\n  printf (\"findlc: version: %s\\n\", rcsid[1]);\n  m2exp = atol (argv[0]);\n  mpz_init_set_ui (m, 1);\n  mpz_mul_2exp (m, m, m2exp);\n  printf (\"m = 0x\");\n  mpz_out_str (stdout, 16, m);\n  puts (\"\");\n\n  if (argc > 1)\t\t\t/* have low_merit */\n    mpf_set_str (low_merit, argv[1], 0);\n  if (argc > 2)\t\t\t/* have high_merit */\n    mpf_set_str (high_merit, argv[2], 0);\n\n  if (debug)\n    {\n      fprintf (stderr, \"low_merit = \");\n      mpf_out_str (stderr, 10, 2, low_merit);\n      fprintf (stderr, \"; high_merit = \");\n      mpf_out_str (stderr, 10, 2, high_merit);\n      fputs (\"\\n\", stderr);\n    }\n\n  mpz_init (minus8);\n  mpz_set_si (minus8, -8L);\n  mpz_init_set_ui (acc, 0);\n  mpz_init (s);\n  mpz_init_set_d (min, 0.01 * pow (2.0, (double) m2exp));\n  mpz_init_set_d (max, 0.99 * pow (2.0, (double) m2exp));\n\n  mpz_true_random (s, m2exp);\t/* Start.  */\n  mpz_setbit (s, 0);\t\t/* Make it odd.  */\n\n  v_best = m_best = 2*(DIMS-1);\n  for (;;) \n    {\n      mpz_add (acc, acc, s);\n      mpz_mod_2exp (acc, acc, m2exp);\n#if later\n      mpz_and_si (a, acc, -8L);\n#else\n      mpz_and (a, acc, minus8);\n#endif\n      mpz_add_ui (a, a, 5);\n      if (mpz_cmp (a, min) <= 0 || mpz_cmp (a, max) >= 0)\n\tcontinue;\n\n      spectral_test (v, DIMS, a, m);\n      for (f = 0, v_lose = m_lose = 0, cnt_high_merit = DIMS-1;\n\t   f < DIMS-1; f++)\n\t{\n\t  merit (f_merit, f + 2, v[f], m);\n\n\t  if (mpf_cmp_ui (v[f], 1 << (30 / (f + 2) + (f == 2))) < 0)\n\t    v_lose++;\n\t    \n\t  if (mpf_cmp (f_merit, low_merit) < 0)\n\t    m_lose++;\n\n\t  if (mpf_cmp (f_merit, high_merit) >= 0)\n\t    cnt_high_merit--;\n\t}\n\n      if (0 == v_lose && 0 == m_lose)\n\t{\n\t  mpz_out_str (stdout, 10, a); puts (\"\"); fflush (stdout);\n\t  if (0 == cnt_high_merit)\n\t    break;\t\t/* leave loop */\n\t}\n      if (v_lose < v_best)\n\t{\n\t  v_best = v_lose;\n\t  printf (\"best (v_lose=%d; m_lose=%d): \", v_lose, m_lose);\n\t  mpz_out_str (stdout, 10, a); puts (\"\"); fflush (stdout);\n\t}\n      if (m_lose < m_best)\n\t{\n\t  m_best = m_lose;\n\t  printf (\"best (v_lose=%d; m_lose=%d): \", v_lose, m_lose);\n\t  mpz_out_str (stdout, 10, a); puts (\"\"); fflush (stdout);\n\t}\n    }\n\n  mpz_clear (m);\n  mpz_clear (a);\n  for (f = 0; f < DIMS-1; f++)\n    mpf_clear (v[f]);\n  mpf_clear (f_merit);\n  mpf_clear (low_merit);\n  mpf_clear (high_merit);\n\n  printf (\"done.\\n\");\n  return 0;\n}\n"
  },
  {
    "path": "tests/rand/gen.c",
    "content": "/* gen.c -- Generate pseudorandom numbers.\n\nCopyright 1999, 2000, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/* Examples:\n\n  $ gen 10\n10 integers 0 <= X < 2^32 generated by mpz_urandomb()\n\n  $ gen -f mpf_urandomb 10\n10 real numbers 0 <= X < 1\n\n  $ gen -z 127 10\n10 integers 0 <= X < 2^127\n\n  $ gen -f mpf_urandomb -x .9,1 10\n10 real numbers 0 <= X < .9\n\n  $ gen -s 1 10\n10 integers, sequence seeded with 1\n\n*/\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <unistd.h>\n#include <limits.h>\n#include <errno.h>\n#include <time.h>\n#include <string.h>\n\n#if !HAVE_DECL_OPTARG\nextern char *optarg;\nextern int optind, opterr;\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nint main (argc, argv)\n     int argc;\n     char *argv[];\n{\n  const char usage[] =\n    \"usage: gen [-bhpq] [-a n] [-c a,c,m2exp] [-C a,c,m] [-f func] [-g alg] [-m n] [-s n] \" \\\n    \"[-x f,t] [-z n] [n]\\n\" \\\n    \"  n        number of random numbers to generate\\n\" \\\n    \"  -a n     ASCII output in radix n (default, with n=10)\\n\" \\\n    \"  -b       binary output\\n\" \\\n    \"  -c a,c,m2exp use supplied LC scheme\\n\" \\\n    \"  -f func  random function, one of\\n\" \\\n    \"           mpz_urandomb (default), mpz_urandomm, mpf_urandomb, rand, random\\n\" \\\n    \"  -g alg   algorithm, one of mt (default), lc\\n\" \\\n    \"  -h       print this text and exit\\n\" \\\n    \"  -m n     maximum size of generated number plus 1 (0<= X < n) for mpz_urandomm\\n\" \\\n    \"  -p       print used seed on stderr\\n\" \\\n    \"  -q       quiet, no output\\n\" \\\n    \"  -s n     initial seed (default: output from time(3))\\n\" \\\n    \"  -x f,t   exclude all numbers f <= x <= t\\n\" \\\n    \"  -z n     size in bits of generated numbers (0<= X <2^n) (default 32)\\n\" \\\n    \"\";\n\n  unsigned long int f;\n  unsigned long int n = 0;\n  unsigned long int seed;\n  unsigned long int m2exp = 0;\n  unsigned int size = 32;\n  int seed_from_user = 0;\n  int ascout = 1, binout = 0, printseed = 0;\n  int output_radix = 10;\n  int lc_scheme_from_user = 0;\n  int quiet_flag = 0;\n  mpz_t z_seed;\n  mpz_t z1;\n  mpf_t f1;\n  gmp_randstate_t rstate;\n  int c, i;\n  double drand;\n  long lrand;\n  int do_exclude = 0;\n  mpf_t f_xf, f_xt;\t\t/* numbers to exclude from sequence */\n  char *str_xf, *str_xt;\t/* numbers to exclude from sequence */\n  char *str_a, *str_adder, *str_m;\n  mpz_t z_a, z_m, z_mmax;\n  unsigned long int ul_adder;\n\n  enum\n  {\n    RFUNC_mpz_urandomb = 0,\n    RFUNC_mpz_urandomm,\n    RFUNC_mpf_urandomb,\n    RFUNC_rand,\n    RFUNC_random,\n  } rfunc = RFUNC_mpz_urandomb;\n  char *rfunc_str[] =  { \"mpz_urandomb\", \"mpz_urandomm\", \"mpf_urandomb\",\n\t\t\t \"rand\", \"random\" };\n  enum\n  {\n    RNG_MT = 0,\n    RNG_LC\n  };\n  gmp_randalg_t ralg = RNG_MT;\n  /* Texts for the algorithms.  The index of each must match the\n     corresponding algorithm in the enum above.  */\n  char *ralg_str[] = { \"mt\", \"lc\" };\n\n  mpf_init (f_xf);\n  mpf_init (f_xt);\n  mpf_init (f1);\n  mpz_init (z1);\n  mpz_init (z_seed);\n  mpz_init_set_ui (z_mmax, 0);\n\n\n  while ((c = getopt (argc, argv, \"a:bc:f:g:hm:n:pqs:z:x:\")) != -1)\n    switch (c)\n      {\n      case 'a':\n\tascout = 1;\n\tbinout = 0;\n\toutput_radix = atoi (optarg);\n\tbreak;\n\n      case 'b':\n\tascout = 0;\n\tbinout = 1;\n\tbreak;\n\n      case 'c':\t\t\t/* User supplied LC scheme: a,c,m2exp */\n\tif (NULL == (str_a = strtok (optarg, \",\"))\n\t    || NULL == (str_adder = strtok (NULL, \",\"))\n\t    || NULL == (str_m = strtok (NULL, \",\")))\n\t  {\n\t    fprintf (stderr, \"gen: bad LC scheme parameters: %s\\n\", optarg);\n\t    exit (1);\n\t  }\n#ifdef HAVE_STRTOUL\n\tul_adder = strtoul (str_adder, NULL, 0);\n#elif HAVE_STRTOL\n\tul_adder = (unsigned long int) strtol (str_adder, NULL, 0);\n#else\n\tul_adder = (unsigned long int) atoi (str_adder);\n#endif\n\t\n\tif (mpz_init_set_str (z_a, str_a, 0))\n\t  {\n\t    fprintf (stderr, \"gen: bad LC scheme parameter `a': %s\\n\", str_a);\n\t    exit (1);\n\t  }\n\tif (ULONG_MAX == ul_adder)\n\t  {\n\t    fprintf (stderr, \"gen: bad LC scheme parameter `c': %s\\n\",\n\t\t     str_adder);\n\t    exit (1);\n\t  }\n\tm2exp = atol (str_m);\n\n\tlc_scheme_from_user = 1;\n\tbreak;\n\n\n      case 'f':\n\trfunc = -1;\n\tfor (f = 0; f < sizeof (rfunc_str) / sizeof (*rfunc_str); f++)\n\t    if (!strcmp (optarg, rfunc_str[f]))\n\t      {\n\t\trfunc = f;\n\t\tbreak;\n\t      }\n\tif (rfunc == -1)\n\t  {\n\t    fputs (usage, stderr);\n\t    exit (1);\n\t  }\n\tbreak;\n\n      case 'g':\t\t\t/* algorithm */\n\tralg = -1;\n\tfor (f = 0; f < sizeof (ralg_str) / sizeof (*ralg_str); f++)\n\t    if (!strcmp (optarg, ralg_str[f]))\n\t      {\n\t\tralg = f;\n\t\tbreak;\n\t      }\n\tif (ralg == -1)\n\t  {\n\t    fputs (usage, stderr);\n\t    exit (1);\n\t  }\n\tbreak;\n\n      case 'm':\t\t\t/* max for mpz_urandomm() */\n\tif (mpz_set_str (z_mmax, optarg, 0))\n\t  {\n\t    fprintf (stderr, \"gen: bad max value: %s\\n\", optarg);\n\t    exit (1);\n\t  }\n\tbreak;\n\n      case 'p':\t\t\t/* print seed on stderr */\n\tprintseed = 1;\n\tbreak;\n\n      case 'q':\t\t\t/* quiet */\n\tquiet_flag = 1;\n\tbreak;\n\n      case 's':\t\t\t/* user provided seed */\n\tif (mpz_set_str (z_seed, optarg, 0))\n\t  {\n\t    fprintf (stderr, \"gen: bad seed argument %s\\n\", optarg);\n\t    exit (1);\n\t  }\n\tseed_from_user = 1;\n\tbreak;\n\n      case 'z':\n\tsize = atoi (optarg);\n\tif (size < 1)\n\t  {\n\t    fprintf (stderr, \"gen: bad size argument (-z %u)\\n\", size);\n\t    exit (1);\n\t  }\n\tbreak;\n\n      case 'x':\t\t\t/* Exclude. from,to */\n\tstr_xf = optarg;\n\tstr_xt = strchr (optarg, ',');\n\tif (NULL == str_xt)\n\t  {\n\t    fprintf (stderr, \"gen: bad exclusion parameters: %s\\n\", optarg);\n\t    exit (1);\n\t  }\n\t*str_xt++ = '\\0';\n\tdo_exclude = 1;\n\tbreak;\n\n      case 'h':\n      case '?':\n      default:\n\tfputs (usage, stderr);\n\texit (1);\n      }\n  argc -= optind;\n  argv += optind;\n\n  if (! seed_from_user)\n    mpz_set_ui (z_seed, (unsigned long int) time (NULL));\n  seed = mpz_get_ui (z_seed);\n  if (printseed)\n    {\n      fprintf (stderr, \"gen: seed used: \");\n      mpz_out_str (stderr, output_radix, z_seed);\n      fprintf (stderr, \"\\n\");\n    }\n\n  mpf_set_prec (f1, size);\n\n  /* init random state and plant seed */\n  switch (rfunc)\n    {\n    case RFUNC_mpf_urandomb:\n#if 0\n      /* Don't init a too small generator.  */\n      size = PREC (f1) * BITS_PER_MP_LIMB;\n      /* Fall through.  */\n#endif\n    case RFUNC_mpz_urandomb:\n    case RFUNC_mpz_urandomm:\n      switch (ralg)\n\t{\n\tcase RNG_MT:\n\t  gmp_randinit_mt (rstate);\n\t  break;\n\n\tcase RNG_LC:\n\t  if (! lc_scheme_from_user)\n\t    gmp_randinit_lc_2exp_size (rstate, MIN (128, size));\n\t  else\n\t    gmp_randinit_lc_2exp (rstate, z_a, ul_adder, m2exp);\n\t  break;\n\n\tdefault:\n\t  fprintf (stderr, \"gen: unsupported algorithm\\n\");\n\t  exit (1);\n\t}\n\n      gmp_randseed (rstate, z_seed);\n      break;\n\n    case RFUNC_rand:\n      srand (seed);\n      break;\n\n    case RFUNC_random:\n#ifdef __FreeBSD__\t\t/* FIXME */\n      if (seed_from_user)\n\tsrandom (seed);\n      else\n\tsrandomdev ();\n#else\n      fprintf (stderr, \"gen: unsupported algorithm\\n\");\n#endif\n      break;\n\n    default:\n      fprintf (stderr, \"gen: random function not implemented\\n\");\n      exit (1);\n    }\n\n  /* set up excludes */\n  if (do_exclude)\n    switch (rfunc)\n      {\n      case RFUNC_mpf_urandomb:\n\n\tif (mpf_set_str (f_xf, str_xf, 10) ||\n\t    mpf_set_str (f_xt, str_xt, 10))\n\t  {\n\t    fprintf (stderr, \"gen: bad exclusion-from (\\\"%s\\\") \" \\\n\t\t     \"or exclusion-to (\\\"%s\\\") string.  no exclusion done.\\n\",\n\t\t     str_xf, str_xt);\n\t    do_exclude = 0;\n\t  }\n\tbreak;\n\n      default:\n\tfprintf (stderr, \"gen: exclusion not implemented for chosen \" \\\n\t\t \"randomization function.  all numbers included in sequence.\\n\");\n      }\n\n  /* generate and print */\n  if (argc > 0)\n    {\n#if HAVE_STRTOUL\n      n = strtoul (argv[0], (char **) NULL, 10);\n#elif HAVE_STRTOL\n      n = (unsigned long int) strtol (argv[0], (char **) NULL, 10);\n#else\n      n = (unsigned long int) atoi (argv[0]);\n#endif\n    }\n\n  for (f = 0; n == 0 || f < n; f++)\n    {\n      switch (rfunc)\n\t{\n\tcase RFUNC_mpz_urandomb:\n\t  mpz_urandomb (z1, rstate, size);\n\t  if (quiet_flag)\n\t    break;\n\t  if (binout)\n\t    {\n\t      /*fwrite ((unsigned int *) z1->_mp_d, 4, 1, stdout);*/\n\t      fprintf (stderr, \"gen: binary output for mpz_urandom* is broken\\n\");\n\t      exit (1);\n\t    }\n\t  else\n\t    {\n\t      mpz_out_str (stdout, output_radix, z1);\n\t      puts (\"\");\n\t    }\n\t  break;\n\n\tcase RFUNC_mpz_urandomm:\n\t  mpz_urandomm (z1, rstate, z_mmax);\n\t  if (quiet_flag)\n\t    break;\n\t  if (binout)\n\t    {\n\t      /*fwrite ((unsigned int *) z1->_mp_d, 4, 1, stdout);*/\n\t      fprintf (stderr, \"gen: binary output for mpz_urandom* is broken\\n\");\n\t      exit (1);\n\t    }\n\t  else\n\t    {\n\t      mpz_out_str (stdout, output_radix, z1);\n\t      puts (\"\");\n\t    }\n\t  break;\n\n\tcase RFUNC_mpf_urandomb:\n\t  mpf_urandomb (f1, rstate, size);\n\t  if (do_exclude)\n\t    if (mpf_cmp (f1, f_xf) >= 0 && mpf_cmp (f1, f_xt) <= 0)\n\t\tbreak;\n\t  if (quiet_flag)\n\t    break;\n\t  if (binout)\n\t    {\n\t      fprintf (stderr, \"gen: binary output for floating point numbers \"\\\n\t\t       \"not implemented\\n\");\n\t      exit (1);\n\t    }\n\t  else\n\t    {\n\t      mpf_out_str (stdout, output_radix, 0, f1);\n\t      puts (\"\");\n\t    }\n\t  break;\n\n\tcase RFUNC_rand:\n\t  i = rand ();\n#ifdef FLOAT_OUTPUT\n\t  if (i)\n\t    drand = (double) i / (double) RAND_MAX;\n\t  else\n\t    drand = 0.0;\n\t  if (quiet_flag)\n\t    break;\n\t  if (binout)\n\t    fwrite (&drand, sizeof (drand), 1, stdout);\n\t  else\n\t    printf (\"%e\\n\", drand);\n#else\n\t  if (quiet_flag)\n\t    break;\n\t  if (binout)\n\t    fwrite (&i, sizeof (i), 1, stdout);\n\t  else\n\t    printf (\"%d\\n\", i);\n#endif\n\t  break;\n\n\tcase RFUNC_random:\n\t  lrand = random ();\n\t  if (lrand)\n\t    drand = (double) lrand / (double) 0x7fffffff;\n\t  else\n\t    drand = 0;\n\t  if (quiet_flag)\n\t    break;\n\t  if (binout)\n\t    fwrite (&drand, sizeof (drand), 1, stdout);\n\t  else\n\t    printf (\"%e\\n\", drand);\n\t  break;\n\n\tdefault:\n\t  fprintf (stderr, \"gen: random function not implemented\\n\");\n\t  exit (1);\n\t}\n\n    }\n\n  /* clean up */\n  switch (rfunc)\n    {\n    case RFUNC_mpz_urandomb:\n    case RFUNC_mpf_urandomb:\n      gmp_randclear (rstate);\n      break;\n    default:\n      break;\n    }\n  mpf_clear (f1);\n  mpf_clear (f_xf); \n  mpf_clear (f_xt);\n  mpz_clear (z1);\n  mpz_clear (z_seed);\n\n  return 0;\n}\n\nstatic void *debug_dummyz = mpz_dump;\nstatic void *debug_dummyf = mpf_dump;\n"
  },
  {
    "path": "tests/rand/gmpstat.h",
    "content": "/* gmpstat.h */\n\n/*\nCopyright 1999 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n/* This file requires the following header files: mpir.h */\n\n#ifndef\t__GMPSTAT_H__\n#define\t__GMPSTAT_H__\n\n/* Global debug flag.  FIXME: Remove. */\nextern int g_debug;\n#define DEBUG_1 0\n#define DEBUG_2 1\n\n/* Max number of dimensions in spectral test.  FIXME: Makw dynamic. */\n#define GMP_SPECT_MAXT 10\n\nvoid\nmpf_freqt (mpf_t Kp,\n\t   mpf_t Km,\n\t   mpf_t X[],\n\t   const unsigned long int n);\nunsigned long int\nmpz_freqt (mpf_t V,\n\t   mpz_t X[],\n\t   unsigned int imax,\n\t   const unsigned long int n);\n\n/* Low level functions. */\nvoid\nks (mpf_t Kp,\n    mpf_t Km,\n    mpf_t X[],\n    void (P) (mpf_t, mpf_t),\n    const unsigned long int n);\n\nvoid\nks_table (mpf_t p, mpf_t val, const unsigned int n);\n\nvoid\nx2_table (double t[],\n\t  unsigned int v);\n\nvoid\nspectral_test (mpf_t rop[], unsigned int T, mpz_t a, mpz_t m);\nvoid\nvz_dot (mpz_t rop, mpz_t V1[], mpz_t V2[], unsigned int n);\nvoid\nf_floor (mpf_t rop, mpf_t op);\n\nvoid\nmerit (mpf_t rop, unsigned int t, mpf_t v, mpz_t m);\ndouble\nmerit_u (unsigned int t, mpf_t v, mpz_t m);\n\n/* From separate source files: */\nvoid zdiv_round (mpz_t rop, mpz_t n, mpz_t d);\n\n#endif /* !__GMPSTAT_H__ */\n"
  },
  {
    "path": "tests/rand/spect.c",
    "content": "/* spect.c -- the spectral test */\n\n/*\nCopyright 1999 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n/* T is upper dimension.  Z_A is the LC multiplier, which is\n   relatively prime to Z_M, the LC modulus.  The result is put in\n   rop[] with v[t] in rop[t-2]. */\n\n/* BUGS: Due to lazy allocation scheme, maximum T is hard coded to MAXT. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <unistd.h>\n#include <math.h>\n#include \"mpir.h\"\n\n#include \"gmpstat.h\"\n\nint g_debug = 0;\n\nint\nmain (int argc, char *argv[])\n{\n  const char usage[] = \"usage: spect [-d] a m n\\n\";\n  int c;\n  unsigned int n;\n  mpz_t a, m;\n  mpf_t res[GMP_SPECT_MAXT], res_min[GMP_SPECT_MAXT], f_tmp;\n  register int f;\n\n\n  mpz_init (a);\n  mpz_init (m);\n  for (f = 0; f < GMP_SPECT_MAXT; f++)\n    {\n      mpf_init (res[f]);\n      mpf_init (res_min[f]);\n    }\n  mpf_init (f_tmp);\n  mpf_set_ui (res_min[0], 32768); /* 2^15 */\n  mpf_set_ui (res_min[1], 1024); /* 2^10 */\n  mpf_set_ui (res_min[2], 256); /* 2^8 */\n  mpf_set_ui (res_min[3], 64); /* 2^6 */\n  mpf_set_ui (res_min[4], 32); /* 2^5 */\n\n  while ((c = getopt (argc, argv, \"dh\")) != -1)\n    switch (c)\n      {\n      case 'd':\t\t\t/* debug */\n\tg_debug++;\n\tbreak;\n      case 'h':\n      default:\n\tfputs (usage, stderr);\n\texit (1);\n      }\n  argc -= optind;\n  argv += optind;\n\n  if (argc < 3)\n    {\n      fputs (usage, stderr);\n      exit (1);\n    }\n\n  mpz_set_str (a, argv[0], 0);\n  mpz_set_str (m, argv[1], 0);\n  n = (unsigned int) atoi (argv[2]);\n  if (n + 1 > GMP_SPECT_MAXT)\n    n = GMP_SPECT_MAXT + 1;\n\n  spectral_test (res, n, a, m);\n\n  for (f = 0; f < n - 1; f++)\n    {\n      /* print v */\n      printf (\"%d: v = \", f + 2);\n      mpf_out_str (stdout, 10, 4, res[f]);\n\n#ifdef PRINT_RAISED_BY_TWO_AS_WELL\n      printf (\" (^2 = \");\n      mpf_mul (f_tmp, res[f], res[f]);\n      mpf_out_str (stdout, 10, 4, f_tmp);\n      printf (\")\");\n#endif /* PRINT_RAISED_BY_TWO_AS_WELL */\n\n      /* print merit */\n      printf (\" m = \");\n      merit (f_tmp, f + 2, res[f], m);\n      mpf_out_str (stdout, 10, 4, f_tmp);\n\n      if (mpf_cmp (res[f], res_min[f]) < 0)\n\tprintf (\"\\t*** v too low ***\");\n      if (mpf_get_d (f_tmp) < .1)\n\tprintf (\"\\t*** merit too low ***\");\n\n      puts (\"\");\n    }\n\n  mpz_clear (a);\n  mpz_clear (m);\n  for (f = 0; f < GMP_SPECT_MAXT; f++)\n    {\n      mpf_clear (res[f]);\n      mpf_clear (res_min[f]);\n    }\n  mpf_clear (f_tmp);\n\n  return 0;\n}\n\n\nvoid \ndebug_foo()\n{\n  if (0)\n    {\n      mpz_dump (0);\n      mpf_dump (0);\n    }\n}\n"
  },
  {
    "path": "tests/rand/stat.c",
    "content": "/* stat.c -- statistical tests of random number sequences. */\n\n/*\nCopyright 1999, 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n/* Examples:\n\n  $ gen 1000 | stat\nTest 1000 real numbers.\n\n  $ gen 30000 | stat -2 1000\nTest 1000 real numbers 30 times and then test the 30 results in a \n``second level''.\n\n  $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff\nTest 1000 integers 0 <= X <= 2^32-1.\n\n  $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff\nTest 1000 integers 0 <= X <= 2^34-1.\n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <unistd.h>\n#include <math.h>\n#include \"mpir.h\"\n#include \"gmpstat.h\"\n\n#if !HAVE_DECL_OPTARG\nextern char *optarg;\nextern int optind, opterr;\n#endif\n\n#define FVECSIZ (100000L)\n\nint g_debug = 0;\n\nstatic void\nprint_ks_results (mpf_t f_p, mpf_t f_p_prob,\n\t\t  mpf_t f_m, mpf_t f_m_prob,\n\t\t  FILE *fp)\n{\n  double p, pp, m, mp;\n\n  p = mpf_get_d (f_p);\n  m = mpf_get_d (f_m);\n  pp = mpf_get_d (f_p_prob);\n  mp = mpf_get_d (f_m_prob);\n  \n  fprintf (fp, \"%.4f (%.0f%%)\\t\", p, pp * 100.0);\n  fprintf (fp, \"%.4f (%.0f%%)\\n\", m, mp * 100.0);\n}\n\nstatic void\nprint_x2_table (unsigned int v, FILE *fp)\n{\n  double t[7];\n  int f;\n\n\n  fprintf (fp, \"Chi-square table for v=%u\\n\", v);\n  fprintf (fp, \"1%%\\t5%%\\t25%%\\t50%%\\t75%%\\t95%%\\t99%%\\n\");\n  x2_table (t, v);\n  for (f = 0; f < 7; f++)\n    fprintf (fp, \"%.2f\\t\", t[f]);\n  fputs (\"\\n\", fp);\n}\n\n\n\n/* Pks () -- Distribution function for KS results with a big n (like 1000\n   or so):  F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */\n/* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2)  */\n\nstatic void\nPks (mpf_t p, mpf_t x)\n{\n  double dt;\t\t\t/* temp double */\n\n  mpf_set (p, x);\n  mpf_mul (p, p, p);\t\t/* p = x^2 */\n  mpf_mul_ui (p, p, 2);\t\t/* p = 2*x^2 */\n  mpf_neg (p, p);\t\t/* p = -2*x^2 */\n  /* No pow() in gmp.  Use doubles. */\n  /* FIXME: Use exp()? */\n  dt = pow (M_E, mpf_get_d (p));\n  mpf_set_d (p, dt);\n  mpf_ui_sub (p, 1, p);\n}\n\n/* f_freq() -- frequency test on real numbers 0<=f<1*/\nstatic void\nf_freq (const unsigned l1runs, const unsigned l2runs,\n\tmpf_t fvec[], const unsigned long n)\n{\n  unsigned f;\n  mpf_t f_p, f_p_prob;\n  mpf_t f_m, f_m_prob;\n  mpf_t *l1res;\t\t\t/* level 1 result array */\n\n  mpf_init (f_p);  mpf_init (f_m);\n  mpf_init (f_p_prob);  mpf_init (f_m_prob);\n\n  \n  /* Allocate space for 1st level results. */\n  l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t));\n  if (NULL == l1res)\n    {\n      fprintf (stderr, \"stat: malloc failure\\n\");\n      exit (1);\n    }\n  \n  printf (\"\\nEquidistribution/Frequency test on real numbers (0<=X<1):\\n\");\n  printf (\"\\tKp\\t\\tKm\\n\");\n\n  for (f = 0; f < l2runs; f++)\n    {\n      /*  f_printvec (fvec, n); */\n      mpf_freqt (f_p, f_m, fvec + f * n, n);\n\n      /* what's the probability of getting these results? */\n      ks_table (f_p_prob, f_p, n);\n      ks_table (f_m_prob, f_m, n);\n\n      if (l1runs == 0)\n\t{\n\t  /*printf (\"%u:\\t\", f + 1);*/\n\t  print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);\n\t}\n      else\n\t{\n\t  /* save result */\n\t  mpf_init_set (l1res[f], f_p);\n\t  mpf_init_set (l1res[f + l2runs], f_m);\n\t}\n    }\n\n  /* Now, apply the KS test on the results from the 1st level rounds\n     with the distribution\n     F(x) = 1 - pow(e, -2*x^2)\t[Knuth, vol 2, p.51] */\n\n  if (l1runs != 0)\n    {\n      /*printf (\"-------------------------------------\\n\");*/\n\n      /* The Kp's. */\n      ks (f_p, f_m, l1res, Pks, l2runs);\n      ks_table (f_p_prob, f_p, l2runs);\n      ks_table (f_m_prob, f_m, l2runs);\n      printf (\"Kp:\\t\");\n      print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);\n\n      /* The Km's. */\n      ks (f_p, f_m, l1res + l2runs, Pks, l2runs);\n      ks_table (f_p_prob, f_p, l2runs);\n      ks_table (f_m_prob, f_m, l2runs);\n      printf (\"Km:\\t\");\n      print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);\n    }\n\n  mpf_clear (f_p);  mpf_clear (f_m);\n  mpf_clear (f_p_prob);  mpf_clear (f_m_prob);\n  free (l1res);\n}  \n\n/* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers\n   0<=z<=MAX */\nstatic void\nz_freq (const unsigned l1runs,\n\tconst unsigned l2runs,\n\tmpz_t zvec[],\n\tconst unsigned long n,\n\tunsigned int max)\n{\n  mpf_t V;\t\t\t/* result */\n  double d_V;\t\t\t/* result as a double */\n\n  mpf_init (V);\n\n\n  printf (\"\\nEquidistribution/Frequency test on integers (0<=X<=%u):\\n\", max);\n  print_x2_table (max, stdout);\n\n  mpz_freqt (V, zvec, max, n);\n\n  d_V = mpf_get_d (V);\n  printf (\"V = %.2f (n = %lu)\\n\", d_V, n);\n  \n  mpf_clear (V);\n}\n\nunsigned int stat_debug = 0;\n\nint \nmain (argc, argv)\n     int argc;\n     char *argv[];\n{\n  const char usage[] =\n    \"usage: stat [-d] [-2 runs] [-i max | -r max] [file]\\n\" \\\n    \"       file     filename\\n\" \\\n    \"       -2 runs  perform 2-level test with RUNS runs on 1st level\\n\" \\\n    \"       -d       increase debugging level\\n\" \\\n    \"       -i max   input is integers 0 <= Z <= MAX\\n\" \\\n    \"       -r max   input is real numbers 0 <= R < 1 and use MAX as\\n\" \\\n    \"                maximum value when converting real numbers to integers\\n\" \\\n    \"\";\n  \n  static mpf_t fvec[FVECSIZ];\n  static mpz_t zvec[FVECSIZ];\n  unsigned long int f, n, vecentries;\n  char *filen;\n  FILE *fp;\n  int c;\n  int omitoutput = 0;\n  int realinput = -1;\t\t/* 1: input is real numbers 0<=R<1;\n\t\t\t\t   0: input is integers 0 <= Z <= MAX. */\n  long l1runs = 0,\t\t/* 1st level runs */\n    l2runs = 1;\t\t\t/* 2nd level runs */\n  mpf_t f_temp;\n  mpz_t z_imax;\t\t\t/* max value when converting between\n                                   real number and integer. */\n  mpf_t f_imax_plus1;\t\t/* f_imax + 1 stored in an mpf_t for\n                                   convenience */\n  mpf_t f_imax_minus1;\t\t/* f_imax - 1 stored in an mpf_t for\n                                   convenience */\n\n\n  mpf_init (f_temp);\n  mpz_init_set_ui (z_imax, 0x7fffffff);\n  mpf_init (f_imax_plus1);\n  mpf_init (f_imax_minus1);\n\n  while ((c = getopt (argc, argv, \"d2:i:r:\")) != -1)\n    switch (c)\n      {\n      case '2':\n\tl1runs = atol (optarg);\n\tl2runs = -1;\t\t/* set later on */\n\tbreak;\n      case 'd':\t\t\t/* increase debug level */\n\tstat_debug++;\n\tbreak;\n      case 'i':\n\tif (1 == realinput)\n\t  {\n\t    fputs (\"stat: options -i and -r are mutually exclusive\\n\", stderr);\n\t    exit (1);\n\t  }\n\tif (mpz_set_str (z_imax, optarg, 0))\n\t  {\n\t    fprintf (stderr, \"stat: bad max value %s\\n\", optarg);\n\t    exit (1);\n\t  }\n\trealinput = 0;\n\tbreak;\n      case 'r':\n\tif (0 == realinput)\n\t  {\n\t    fputs (\"stat: options -i and -r are mutually exclusive\\n\", stderr);\n\t    exit (1);\n\t  }\n\tif (mpz_set_str (z_imax, optarg, 0))\n\t  {\n\t    fprintf (stderr, \"stat: bad max value %s\\n\", optarg);\n\t    exit (1);\n\t  }\n\trealinput = 1;\n\tbreak;\n      case 'o':\n\tomitoutput = atoi (optarg);\n\tbreak;\n      case '?':\n      default:\n\tfputs (usage, stderr);\n\texit (1);\n      }\n  argc -= optind;\n  argv += optind;\n\n  if (argc < 1)\n    fp = stdin;\n  else\n    filen = argv[0]; \n\n  if (fp != stdin)\n    if (NULL == (fp = fopen (filen, \"r\")))\n      {\n\tperror (filen);\n\texit (1);\n      }\n\n  if (-1 == realinput)\n    realinput = 1;\t\t/* default is real numbers */\n\n  /* read file and fill appropriate vec */\n  if (1 == realinput)\t\t/* real input */\n    {\n      for (f = 0; f < FVECSIZ ; f++)\n\t{\n\t  mpf_init (fvec[f]);\n\t  if (!mpf_inp_str (fvec[f], fp, 10))\n\t    break;\n\t}\n    }\n  else\t\t\t\t/* integer input */\n    {\n      for (f = 0; f < FVECSIZ ; f++)\n\t{\n\t  mpz_init (zvec[f]);\n\t  if (!mpz_inp_str (zvec[f], fp, 10))\n\t    break;\n\t}\n    }    \n  vecentries = n = f;\t\t/* number of entries read */\n  fclose (fp);\n\n  if (FVECSIZ == f)\n    fprintf (stderr, \"stat: warning: discarding input due to lazy allocation \"\\\n\t     \"of only %ld entries.  sorry.\\n\", FVECSIZ);\n\n  printf (\"Got %lu numbers.\\n\", n);\n\n  /* convert and fill the other vec */\n  /* since fvec[] contains 0<=f<1 and we want ivec[] to contain\n     0<=z<=imax and we are truncating all fractions when\n     converting float to int, we have to add 1 to imax.*/\n  mpf_set_z (f_imax_plus1, z_imax);\n  mpf_add_ui (f_imax_plus1, f_imax_plus1, 1);\n  if (1 == realinput)\t\t/* fill zvec[] */\n    {\n      for (f = 0; f < n; f++)\n\t{\n\t  mpf_mul (f_temp, fvec[f], f_imax_plus1);\n\t  mpz_init (zvec[f]);\n\t  mpz_set_f (zvec[f], f_temp); /* truncating fraction */\n\t  if (stat_debug > 1)\n\t    {\n\t      mpz_out_str (stderr, 10, zvec[f]);\n\t      fputs (\"\\n\", stderr);\n\t    }\n\t}\n    }\n  else\t\t\t\t/* integer input; fill fvec[] */\n    {\n      /*    mpf_set_z (f_imax_minus1, z_imax); \n\t    mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/\n      for (f = 0; f < n; f++)\n\t{\n\t  mpf_init (fvec[f]);\n\t  mpf_set_z (fvec[f], zvec[f]);\n\t  mpf_div (fvec[f], fvec[f], f_imax_plus1);\n\t  if (stat_debug > 1)\n\t    {\n\t      mpf_out_str (stderr, 10, 0, fvec[f]);\n\t      fputs (\"\\n\", stderr);\n\t    }\n\t}\n    }\n  \n  /* 2 levels? */\n  if (1 != l2runs)\n    {\n      l2runs = n / l1runs;\n      printf (\"Doing %ld second level rounds \"\\\n\t      \"with %ld entries in each round\", l2runs, l1runs);\n      if (n % l1runs)\n\tprintf (\" (discarding %ld entr%s)\", n % l1runs,\n\t\tn % l1runs == 1 ? \"y\" : \"ies\");\n      puts (\".\");\n      n = l1runs;\n    }\n\n#ifndef DONT_FFREQ\n  f_freq (l1runs, l2runs, fvec, n);\n#endif\n#ifdef DO_ZFREQ\n  z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax));\n#endif\n\n  mpf_clear (f_temp); mpz_clear (z_imax); \n  mpf_clear (f_imax_plus1);\n  mpf_clear (f_imax_minus1);\n  for (f = 0; f < vecentries; f++)\n    {\n      mpf_clear (fvec[f]);\n      mpz_clear (zvec[f]);\n    }\n\n  return 0;\n}\n\n\n\n\n\n"
  },
  {
    "path": "tests/rand/statlib.c",
    "content": "/* statlib.c -- Statistical functions for testing the randomness of\n   number sequences. */\n\n/*\nCopyright 1999, 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n/* The theories for these functions are taken from D. Knuth's \"The Art\nof Computer Programming: Volume 2, Seminumerical Algorithms\", Third\nEdition, Addison Wesley, 1998. */\n\n/* Implementation notes.\n\nThe Kolmogorov-Smirnov test.\n\nEq. (13) in Knuth, p. 50, says that if X1, X2, ..., Xn are independent\nobservations arranged into ascending order\n\n\tKp = sqr(n) * max(j/n - F(Xj))\t\tfor all 1<=j<=n\n\tKm = sqr(n) * max(F(Xj) - (j-1)/n))\tfor all 1<=j<=n\n\nwhere F(x) = Pr(X <= x) = probability that (X <= x), which for a\nuniformly distributed random real number between zero and one is\nexactly the number itself (x).\n\n\nThe answer to exercise 23 gives the following implementation, which\ndoesn't need the observations to be sorted in ascending order:\n\nfor (k = 0; k < m; k++)\n\ta[k] = 1.0\n\tb[k] = 0.0\n\tc[k] = 0\n\nfor (each observation Xj)\n\tY = F(Xj)\n\tk = floor (m * Y)\n\ta[k] = min (a[k], Y)\n\tb[k] = max (b[k], Y)\n\tc[k] += 1\n\n\tj = 0\n\trp = rm = 0\n\tfor (k = 0; k < m; k++)\n\t\tif (c[k] > 0)\n\t\t\trm = max (rm, a[k] - j/n)\n\t\t\tj += c[k]\n\t\t\trp = max (rp, j/n - b[k])\n\nKp = sqr (n) * rp\nKm = sqr (n) * rm \n\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <math.h>\n\n#include \"mpir.h\"\n#include \"gmpstat.h\"\n\n/* ks (Kp, Km, X, P, n) -- Perform a Kolmogorov-Smirnov test on the N\n   real numbers between zero and one in vector X.  P is the\n   distribution function, called for each entry in X, which should\n   calculate the probability of X being greater than or equal to any\n   number in the sequence.  (For a uniformly distributed sequence of\n   real numbers between zero and one, this is simply equal to X.)  The\n   result is put in Kp and Km.  */\n\nvoid\nks (mpf_t Kp,\n    mpf_t Km,\n    mpf_t X[],\n    void (P) (mpf_t, mpf_t),\n    unsigned long int n)\n{\n  mpf_t Kt;\t\t\t/* temp */\n  mpf_t f_x;\n  mpf_t f_j;\t\t\t/* j */\n  mpf_t f_jnq;\t\t\t/* j/n or (j-1)/n */\n  unsigned long int j;\n\n  /* Sort the vector in ascending order. */  \n  qsort (X, n, sizeof (__mpf_struct), mpf_cmp);\n\n  /* K-S test. */\n  /* \tKp = sqr(n) * max(j/n - F(Xj))\t\tfor all 1<=j<=n\n\tKm = sqr(n) * max(F(Xj) - (j-1)/n))\tfor all 1<=j<=n\n  */\n\n  mpf_init (Kt); mpf_init (f_x); mpf_init (f_j); mpf_init (f_jnq); \n  mpf_set_ui (Kp, 0);  mpf_set_ui (Km, 0);\n  for (j = 1; j <= n; j++)\n    {\n      P (f_x, X[j-1]);\n      mpf_set_ui (f_j, j);\n\n      mpf_div_ui (f_jnq, f_j, n);\n      mpf_sub (Kt, f_jnq, f_x);\n      if (mpf_cmp (Kt, Kp) > 0)\n\tmpf_set (Kp, Kt);\n      if (g_debug > DEBUG_2)\n\t{\n\t  printf (\"j=%lu \", j);\n\t  printf (\"P()=\"); mpf_out_str (stdout, 10, 2, f_x); printf (\"\\t\");\n\n\t  printf (\"jnq=\"); mpf_out_str (stdout, 10, 2, f_jnq); printf (\" \");\n\t  printf (\"diff=\"); mpf_out_str (stdout, 10, 2, Kt); printf (\" \");\n\t  printf (\"Kp=\"); mpf_out_str (stdout, 10, 2, Kp); printf (\"\\t\");\n\t}\n      mpf_sub_ui (f_j, f_j, 1);\n      mpf_div_ui (f_jnq, f_j, n);\n      mpf_sub (Kt, f_x, f_jnq);\n      if (mpf_cmp (Kt, Km) > 0)\n\tmpf_set (Km, Kt);\n\n      if (g_debug > DEBUG_2)\n\t{\n\t  printf (\"jnq=\"); mpf_out_str (stdout, 10, 2, f_jnq); printf (\" \");\n\t  printf (\"diff=\"); mpf_out_str (stdout, 10, 2, Kt); printf (\" \");\n\t  printf (\"Km=\"); mpf_out_str (stdout, 10, 2, Km); printf (\" \");\n\t  printf (\"\\n\");\n\t}\n    }\n  mpf_sqrt_ui (Kt, n);\n  mpf_mul (Kp, Kp, Kt);\n  mpf_mul (Km, Km, Kt);\n\n  mpf_clear (Kt); mpf_clear (f_x); mpf_clear (f_j); mpf_clear (f_jnq); \n}\n\n/* ks_table(val, n) -- calculate probability for Kp/Km less than or\n   equal to VAL with N observations.  See [Knuth section 3.3.1] */\n\nvoid\nks_table (mpf_t p, mpf_t val, const unsigned int n)\n{\n  /* We use Eq. (27), Knuth p.58, skipping O(1/n) for simplicity.\n     This shortcut will result in too high probabilities, especially\n     when n is small.\n\n     Pr(Kp(n) <= s) = 1 - pow(e, -2*s^2) * (1 - 2/3*s/sqrt(n) + O(1/n)) */\n\n  /* We have 's' in variable VAL and store the result in P. */\n\n  mpf_t t1, t2;\n\n  mpf_init (t1); mpf_init (t2);\n\n  /* t1 = 1 - 2/3 * s/sqrt(n) */\n  mpf_sqrt_ui (t1, n);\n  mpf_div (t1, val, t1);\n  mpf_mul_ui (t1, t1, 2);\n  mpf_div_ui (t1, t1, 3);\n  mpf_ui_sub (t1, 1, t1);\n\n  /* t2 = pow(e, -2*s^2) */\n#ifndef OLDGMP\n  mpf_pow_ui (t2, val, 2);\t/* t2 = s^2 */\n  mpf_set_d (t2, exp (-(2.0 * mpf_get_d (t2))));\n#else\n  /* hmmm, gmp doesn't have pow() for floats.  use doubles. */\n  mpf_set_d (t2, pow (M_E, -(2 * pow (mpf_get_d (val), 2))));\n#endif  \n\n  /* p = 1 - t1 * t2 */\n  mpf_mul (t1, t1, t2);\n  mpf_ui_sub (p, 1, t1);\n\n  mpf_clear (t1); mpf_clear (t2);\n}\n\nstatic double x2_table_X[][7] = {\n  { -2.33, -1.64, -.674, 0.0, 0.674, 1.64, 2.33 }, /* x */\n  { 5.4289, 2.6896, .454276, 0.0, .454276, 2.6896, 5.4289} /* x^2 */\n};\n\n#define _2D3 ((double) .6666666666)\n\n/* x2_table (t, v, n) -- return chi-square table row for V in T[]. */\nvoid\nx2_table (double t[],\n\t  unsigned int v)\n{\n  int f;\n\n\n  /* FIXME: Do a table lookup for v <= 30 since the following formula\n     [Knuth, vol 2, 3.3.1] is only good for v > 30. */\n\n  /* value = v + sqrt(2*v) * X[p] + (2/3) * X[p]^2 - 2/3 + O(1/sqrt(t) */\n  /* NOTE: The O() term is ignored for simplicity. */\n  \n  for (f = 0; f < 7; f++)\n      t[f] =\n\tv +\n\tsqrt (2 * v) * x2_table_X[0][f] +\n\t_2D3 * x2_table_X[1][f] - _2D3;\n}\n\n\n/* P(p, x) -- Distribution function.  Calculate the probability of X\nbeing greater than or equal to any number in the sequence.  For a\nrandom real number between zero and one given by a uniformly\ndistributed random number generator, this is simply equal to X. */\n\nstatic void \nP (mpf_t p, mpf_t x)\n{\n  mpf_set (p, x);\n}\n\n/* mpf_freqt() -- Frequency test using KS on N real numbers between zero\n   and one.  See [Knuth vol 2, p.61]. */\nvoid\nmpf_freqt (mpf_t Kp,\n\t   mpf_t Km,\n\t   mpf_t X[],\n\t   const unsigned long int n)\n{\n  ks (Kp, Km, X, P, n);\n}\n\n\n/* The Chi-square test.  Eq. (8) in Knuth vol. 2 says that if Y[]\n   holds the observations and p[] is the probability for.. (to be\n   continued!)\n\n   V = 1/n * sum((s=1 to k) Y[s]^2 / p[s]) - n */\n\nvoid\nx2 (mpf_t V,\t\t\t/* result */\n    unsigned long int X[],\t/* data */\n    unsigned int k,\t\t/* #of categories */\n    void (P) (mpf_t, unsigned long int, void *), /* probability func */\n    void *x,\t\t\t/* extra user data passed to P() */\n    unsigned long int n)\t/* #of samples */\n{\n  unsigned int f;\n  mpf_t f_t, f_t2;\t\t/* temp floats */\n\n  mpf_init (f_t); mpf_init (f_t2);\n\n\n  mpf_set_ui (V, 0);\n  for (f = 0; f < k; f++)\n    {\n      if (g_debug > DEBUG_2)\n\tfprintf (stderr, \"%u: P()=\", f);\n      mpf_set_ui (f_t, X[f]);\n      mpf_mul (f_t, f_t, f_t);\t/* f_t = X[f]^2 */\n      P (f_t2, f, x);\t\t/* f_t2 = Pr(f) */\n      if (g_debug > DEBUG_2)\n\tmpf_out_str (stderr, 10, 2, f_t2);\n      mpf_div (f_t, f_t, f_t2);\n      mpf_add (V, V, f_t);\n      if (g_debug > DEBUG_2)\n\t{\n\t  fprintf (stderr, \"\\tV=\");\n\t  mpf_out_str (stderr, 10, 2, V);\n\t  fprintf (stderr, \"\\t\");\n\t}\n    }\n  if (g_debug > DEBUG_2)\n    fprintf (stderr, \"\\n\");\n  mpf_div_ui (V, V, n);\n  mpf_sub_ui (V, V, n);\n  \n  mpf_clear (f_t); mpf_clear (f_t2);\n}\n\n/* Pzf(p, s, x) -- Probability for category S in mpz_freqt().  It's\n   1/d for all S.  X is a pointer to an unsigned int holding 'd'. */\nstatic void\nPzf (mpf_t p, unsigned long int s, void *x)\n{\n  mpf_set_ui (p, 1);\n  mpf_div_ui (p, p, *((unsigned int *) x));\n}\n\n/* mpz_freqt(V, X, imax, n) -- Frequency test on integers.  [Knuth,\n   vol 2, 3.3.2].  Keep IMAX low on this one, since we loop from 0 to\n   IMAX.  128 or 256 could be nice.\n\n   X[] must not contain numbers outside the range 0 <= X <= IMAX.\n\n   Return value is number of observations actally used, after\n   discarding entries out of range.\n\n   Since X[] contains integers between zero and IMAX, inclusive, we\n   have IMAX+1 categories.\n\n   Note that N should be at least 5*IMAX.  Result is put in V and can\n   be compared to output from x2_table (v=IMAX). */\n\nunsigned long int\nmpz_freqt (mpf_t V,\n\t   mpz_t X[],\n\t   unsigned int imax,\n\t   const unsigned long int n)\n{\n  unsigned long int *v;\t\t/* result */\n  unsigned int f;\n  unsigned int d;\t\t/* number of categories = imax+1 */\n  unsigned int uitemp;\n  unsigned long int usedn;\n\n\n  d = imax + 1;\n\n  v = (unsigned long int *) calloc (imax + 1, sizeof (unsigned long int));\n  if (NULL == v)\n    {\n      fprintf (stderr, \"mpz_freqt(): out of memory\\n\");\n      exit (1);\n    }\n\n  /* count */\n  usedn = n;\t\t\t/* actual number of observations */\n  for (f = 0; f < n; f++)\n    {\n      uitemp = mpz_get_ui(X[f]);\n      if (uitemp > imax)\t/* sanity check */\n\t{\n\t  if (g_debug)\n\t    fprintf (stderr, \"mpz_freqt(): warning: input insanity: %u, \"\\\n\t\t     \"ignored.\\n\", uitemp);\n\t  usedn--;\n\t  continue;\n\t}\n      v[uitemp]++;\n    }\n\n  if (g_debug > DEBUG_2)\n    {\n      fprintf (stderr, \"counts:\\n\");\n      for (f = 0; f <= imax; f++)\n\tfprintf (stderr, \"%u:\\t%lu\\n\", f, v[f]);\n    }\n\n  /* chi-square with k=imax+1 and P(x)=1/(imax+1) for all x.*/\n  x2 (V, v, d, Pzf, (void *) &d, usedn);\n\n  free (v);\n  return (usedn);\n}\n\n/* debug dummy to drag in dump funcs */\nvoid\nfoo_debug () \n{\n  if (0)\n    {\n      mpf_dump (0); \n#ifndef OLDGMP\n      mpz_dump (0);\n#endif      \n    }\n}\n\n/* merit (rop, t, v, m) -- calculate merit for spectral test result in\n   dimension T, see Knuth p. 105.  BUGS: Only valid for 2 <= T <=\n   6. */\nvoid\nmerit (mpf_t rop, unsigned int t, mpf_t v, mpz_t m)\n{\n  int f;\n  mpf_t f_m, f_const, f_pi;\n\n  mpf_init (f_m);\n  mpf_set_z (f_m, m);\n  mpf_init_set_d (f_const, M_PI);\n  mpf_init_set_d (f_pi, M_PI);\n  \n  switch (t)\n    {\n    case 2:\t\t\t/* PI */\n      break;\n    case 3:\t\t\t/* PI * 4/3 */\n      mpf_mul_ui (f_const, f_const, 4);\n      mpf_div_ui (f_const, f_const, 3);\n      break;\n    case 4:\t\t\t/* PI^2 * 1/2 */\n      mpf_mul (f_const, f_const, f_pi);\n      mpf_div_ui (f_const, f_const, 2);\n      break;\n    case 5:\t\t\t/* PI^2 * 8/15 */\n      mpf_mul (f_const, f_const, f_pi);\n      mpf_mul_ui (f_const, f_const, 8);\n      mpf_div_ui (f_const, f_const, 15);\n      break;\n    case 6:\t\t\t/* PI^3 * 1/6 */\n      mpf_mul (f_const, f_const, f_pi);\n      mpf_mul (f_const, f_const, f_pi);\n      mpf_div_ui (f_const, f_const, 6);\n      break;\n    default:\n      fprintf (stderr,\n\t       \"spect (merit): can't calculate merit for dimensions > 6\\n\");\n      mpf_set_ui (f_const, 0);\n      break;\n    }\n\n  /* rop = v^t */\n  mpf_set (rop, v);\n  for (f = 1; f < t; f++)\n    mpf_mul (rop, rop, v);\n  mpf_mul (rop, rop, f_const);\n  mpf_div (rop, rop, f_m);\n\n  mpf_clear (f_m);\n  mpf_clear (f_const);\n  mpf_clear (f_pi);\n}\n\ndouble\nmerit_u (unsigned int t, mpf_t v, mpz_t m)\n{\n  mpf_t rop;\n  double res;\n  \n  mpf_init (rop);\n  merit (rop, t, v, m);\n  res = mpf_get_d (rop);\n  mpf_clear (rop);\n  return res;\n}\n\n/* f_floor (rop, op) -- Set rop = floor (op). */\nvoid\nf_floor (mpf_t rop, mpf_t op)\n{\n  mpz_t z;\n\n  mpz_init (z);\n\n  /* No mpf_floor().  Convert to mpz and back. */\n  mpz_set_f (z, op);\n  mpf_set_z (rop, z);\n\n  mpz_clear (z);\n}\n\n\n/* vz_dot (rop, v1, v2, nelem) -- compute dot product of z-vectors V1,\n   V2.  N is number of elements in vectors V1 and V2. */\n\nvoid\nvz_dot (mpz_t rop, mpz_t V1[], mpz_t V2[], unsigned int n)\n{\n  mpz_t t;\n\n  mpz_init (t);\n  mpz_set_ui (rop, 0);\n  while (n--)\n    {\n      mpz_mul (t, V1[n], V2[n]);\n      mpz_add (rop, rop, t);\n    }\n\n  mpz_clear (t);\n}\n\nvoid\nspectral_test (mpf_t rop[], unsigned int T, mpz_t a, mpz_t m)\n{\n  /* Knuth \"Seminumerical Algorithms, Third Edition\", section 3.3.4\n     (pp. 101-103). */\n\n  /* v[t] = min { sqrt (x[1]^2 + ... + x[t]^2) |\n     x[1] + a*x[2] + ... + pow (a, t-1) * x[t] is congruent to 0 (mod m) } */\n\n\n  /* Variables. */\n  unsigned int ui_t;\n  unsigned int ui_i, ui_j, ui_k, ui_l;\n  mpf_t f_tmp1, f_tmp2;\n  mpz_t tmp1, tmp2, tmp3;\n  mpz_t U[GMP_SPECT_MAXT][GMP_SPECT_MAXT],\n    V[GMP_SPECT_MAXT][GMP_SPECT_MAXT],\n    X[GMP_SPECT_MAXT],\n    Y[GMP_SPECT_MAXT],\n    Z[GMP_SPECT_MAXT];\n  mpz_t h, hp, r, s, p, pp, q, u, v;\n\n  /* GMP inits. */\n  mpf_init (f_tmp1);\n  mpf_init (f_tmp2);\n  for (ui_i = 0; ui_i < GMP_SPECT_MAXT; ui_i++)\n    {\n      for (ui_j = 0; ui_j < GMP_SPECT_MAXT; ui_j++)\n\t{\n\t  mpz_init_set_ui (U[ui_i][ui_j], 0);\n\t  mpz_init_set_ui (V[ui_i][ui_j], 0);\n\t}\n      mpz_init_set_ui (X[ui_i], 0);\n      mpz_init_set_ui (Y[ui_i], 0);\n      mpz_init (Z[ui_i]);\n    }\n  mpz_init (tmp1);\n  mpz_init (tmp2);\n  mpz_init (tmp3);\n  mpz_init (h);\n  mpz_init (hp);\n  mpz_init (r);\n  mpz_init (s);\n  mpz_init (p);\n  mpz_init (pp);\n  mpz_init (q);\n  mpz_init (u);\n  mpz_init (v);\n\n  /* Implementation inits. */\n  if (T > GMP_SPECT_MAXT)\n    T = GMP_SPECT_MAXT;\t\t\t/* FIXME: Lazy. */\n\n  /* S1 [Initialize.] */\n  ui_t = 2 - 1;\t\t\t/* NOTE: `t' in description == ui_t + 1\n                                   for easy indexing */\n  mpz_set (h, a);\n  mpz_set (hp, m);\n  mpz_set_ui (p, 1);\n  mpz_set_ui (pp, 0);\n  mpz_set (r, a);\n  mpz_pow_ui (s, a, 2);\n  mpz_add_ui (s, s, 1);\t\t/* s = 1 + a^2 */\n\n  /* S2 [Euclidean step.] */\n  while (1)\n    {\n      if (g_debug > DEBUG_1)\n\t{\n\t  mpz_mul (tmp1, h, pp);\n\t  mpz_mul (tmp2, hp, p);\n\t  mpz_sub (tmp1, tmp1, tmp2);\n\t  if (mpz_cmpabs (m, tmp1))\n\t    {\n\t      printf (\"***BUG***: h*pp - hp*p = \");\n\t      mpz_out_str (stdout, 10, tmp1);\n\t      printf (\"\\n\");\n\t    }\n\t}\n      if (g_debug > DEBUG_2)\n\t{\n\t  printf (\"hp = \");\n\t  mpz_out_str (stdout, 10, hp);\n\t  printf (\"\\nh = \");\n\t  mpz_out_str (stdout, 10, h);\n\t  printf (\"\\n\");\n\t  fflush (stdout);\n\t}\n\n      if (mpz_sgn (h))\n\tmpz_tdiv_q (q, hp, h);\t/* q = floor(hp/h) */\n      else\n\tmpz_set_ui (q, 1);\n\n      if (g_debug > DEBUG_2)\n\t{\n\t  printf (\"q = \");\n\t  mpz_out_str (stdout, 10, q);\n\t  printf (\"\\n\");\n\t  fflush (stdout);\n\t}\n\n      mpz_mul (tmp1, q, h);\n      mpz_sub (u, hp, tmp1);\t/* u = hp - q*h */\n\n      mpz_mul (tmp1, q, p);\n      mpz_sub (v, pp, tmp1);\t/* v = pp - q*p */\n  \n      mpz_pow_ui (tmp1, u, 2);\n      mpz_pow_ui (tmp2, v, 2);\n      mpz_add (tmp1, tmp1, tmp2);\n      if (mpz_cmp (tmp1, s) < 0)\n\t{\n\t  mpz_set (s, tmp1);\t/* s = u^2 + v^2 */\n\t  mpz_set (hp, h);\t/* hp = h */\n\t  mpz_set (h, u);\t/* h = u */\n\t  mpz_set (pp, p);\t/* pp = p */\n\t  mpz_set (p, v);\t/* p = v */\n\t}\n      else\n\tbreak;\n    }\n\n  /* S3 [Compute v2.] */\n  mpz_sub (u, u, h);\n  mpz_sub (v, v, p);\n\n  mpz_pow_ui (tmp1, u, 2);\n  mpz_pow_ui (tmp2, v, 2);\n  mpz_add (tmp1, tmp1, tmp2);\n  if (mpz_cmp (tmp1, s) < 0)\n    {\n      mpz_set (s, tmp1);\t/* s = u^2 + v^2 */\n      mpz_set (hp, u);\n      mpz_set (pp, v);\n    }\n  mpf_set_z (f_tmp1, s);\n  mpf_sqrt (rop[ui_t - 1], f_tmp1);\n      \n  /* S4 [Advance t.] */\n  mpz_neg (U[0][0], h);\n  mpz_set (U[0][1], p);\n  mpz_neg (U[1][0], hp);\n  mpz_set (U[1][1], pp);\n  \n  mpz_set (V[0][0], pp);\n  mpz_set (V[0][1], hp);\n  mpz_neg (V[1][0], p);\n  mpz_neg (V[1][1], h);\n  if (mpz_cmp_ui (pp, 0) > 0)\n    {\n      mpz_neg (V[0][0], V[0][0]);\n      mpz_neg (V[0][1], V[0][1]);\n      mpz_neg (V[1][0], V[1][0]);\n      mpz_neg (V[1][1], V[1][1]);\n    }\n\n  while (ui_t + 1 != T)\t\t/* S4 loop */\n    {\n      ui_t++;\n      mpz_mul (r, a, r);\n      mpz_mod (r, r, m);\n\n      /* Add new row and column to U and V.  They are initialized with\n\t all elements set to zero, so clearing is not necessary. */\n\n      mpz_neg (U[ui_t][0], r); /* U: First col in new row. */\n      mpz_set_ui (U[ui_t][ui_t], 1); /* U: Last col in new row. */\n\n      mpz_set (V[ui_t][ui_t], m); /* V: Last col in new row. */\n      \n      /* \"Finally, for 1 <= i < t,\n\t   set q = round (vi1 * r / m),\n\t   vit = vi1*r - q*m,\n\t   and Ut=Ut+q*Ui */\n\n      for (ui_i = 0; ui_i < ui_t; ui_i++)\n\t{\n\t  mpz_mul (tmp1, V[ui_i][0], r); /* tmp1=vi1*r */\n\t  zdiv_round (q, tmp1, m); /* q=round(vi1*r/m) */\n\t  mpz_mul (tmp2, q, m);\t/* tmp2=q*m */\n\t  mpz_sub (V[ui_i][ui_t], tmp1, tmp2);\n\n\t  for (ui_j = 0; ui_j <= ui_t; ui_j++) /* U[t] = U[t] + q*U[i] */\n\t    {\n\t      mpz_mul (tmp1, q, U[ui_i][ui_j]);\t/* tmp=q*uij */\n\t      mpz_add (U[ui_t][ui_j], U[ui_t][ui_j], tmp1); /* utj = utj + q*uij */\n\t    }\n\t}\n\n      /* s = min (s, zdot (U[t], U[t]) */\n      vz_dot (tmp1, U[ui_t], U[ui_t], ui_t + 1);\n      if (mpz_cmp (tmp1, s) < 0)\n\tmpz_set (s, tmp1);\n\n      ui_k = ui_t;\n      ui_j = 0;\t\t\t/* WARNING: ui_j no longer a temp. */\n\n      /* S5 [Transform.] */\n      if (g_debug > DEBUG_2)\n\tprintf (\"(t, k, j, q1, q2, ...)\\n\");\n      do \n\t{\n\t  if (g_debug > DEBUG_2)\n\t    printf (\"(%u, %u, %u\", ui_t + 1, ui_k + 1, ui_j + 1);\n\n\t  for (ui_i = 0; ui_i <= ui_t; ui_i++)\n\t    {\n\t      if (ui_i != ui_j)\n\t\t{\n\t\t  vz_dot (tmp1, V[ui_i], V[ui_j], ui_t + 1); /* tmp1=dot(Vi,Vj). */\n\t\t  mpz_abs (tmp2, tmp1);\n\t\t  mpz_mul_ui (tmp2, tmp2, 2); /* tmp2 = 2*abs(dot(Vi,Vj) */\n\t\t  vz_dot (tmp3, V[ui_j], V[ui_j], ui_t + 1); /* tmp3=dot(Vj,Vj). */\n\n\t\t  if (mpz_cmp (tmp2, tmp3) > 0)\n\t\t    {\n\t\t      zdiv_round (q, tmp1, tmp3); /* q=round(Vi.Vj/Vj.Vj) */\n\t\t      if (g_debug > DEBUG_2)\n\t\t\t{\n\t\t\t  printf (\", \");\n\t\t\t  mpz_out_str (stdout, 10, q);\n\t\t\t}\n\n\t\t      for (ui_l = 0; ui_l <= ui_t; ui_l++)\n\t\t\t{\n\t\t\t  mpz_mul (tmp1, q, V[ui_j][ui_l]);\n\t\t\t  mpz_sub (V[ui_i][ui_l], V[ui_i][ui_l], tmp1); /* Vi=Vi-q*Vj */\n\t\t\t  mpz_mul (tmp1, q, U[ui_i][ui_l]);\n\t\t\t  mpz_add (U[ui_j][ui_l], U[ui_j][ui_l], tmp1); /* Uj=Uj+q*Ui */\n\t\t\t}\n\t\t      \n\t\t      vz_dot (tmp1, U[ui_j], U[ui_j], ui_t + 1); /* tmp1=dot(Uj,Uj) */\n\t\t      if (mpz_cmp (tmp1, s) < 0) /* s = min(s,dot(Uj,Uj)) */\n\t\t\tmpz_set (s, tmp1);\n\t\t      ui_k = ui_j;\n\t\t    }\n\t\t  else if (g_debug > DEBUG_2)\n\t\t    printf (\", #\"); /* 2|Vi.Vj| <= Vj.Vj */\n\t\t}\n\t      else if (g_debug > DEBUG_2)\n\t\tprintf (\", *\");\t/* i == j */\n\t    }\n\n\t  if (g_debug > DEBUG_2)\n\t    printf (\")\\n\");\n\n\t  /* S6 [Advance j.] */\n\t  if (ui_j == ui_t)\n\t    ui_j = 0;\n\t  else\n\t    ui_j++;\n\t}\n      while (ui_j != ui_k);\t/* S5 */\n\n      /* From Knuth p. 104: \"The exhaustive search in steps S8-S10\n\t reduces the value of s only rarely.\" */\n#ifdef DO_SEARCH\n      /* S7 [Prepare for search.] */\n      /* Find minimum in (x[1], ..., x[t]) satisfying condition\n\t x[k]^2 <= f(y[1], ...,y[t]) * dot(V[k],V[k]) */\n\n      ui_k = ui_t;\n      if (g_debug > DEBUG_2)\n\t{\n\t  printf (\"searching...\");\n\t  /*for (f = 0; f < ui_t*/\n\t  fflush (stdout);\n\t}\n\n      /* Z[i] = floor (sqrt (floor (dot(V[i],V[i]) * s / m^2))); */\n      mpz_pow_ui (tmp1, m, 2);\n      mpf_set_z (f_tmp1, tmp1);\n      mpf_set_z (f_tmp2, s);\n      mpf_div (f_tmp1, f_tmp2, f_tmp1);\t/* f_tmp1 = s/m^2 */\n      for (ui_i = 0; ui_i <= ui_t; ui_i++)\n\t{\n\t  vz_dot (tmp1, V[ui_i], V[ui_i], ui_t + 1);\n\t  mpf_set_z (f_tmp2, tmp1);\n\t  mpf_mul (f_tmp2, f_tmp2, f_tmp1);\n\t  f_floor (f_tmp2, f_tmp2);\n\t  mpf_sqrt (f_tmp2, f_tmp2);\n\t  mpz_set_f (Z[ui_i], f_tmp2);\n\t}\n\n      /* S8 [Advance X[k].] */\n      do \n\t{\n\t  if (g_debug > DEBUG_2)\n\t    {\n\t      printf (\"X[%u] = \", ui_k);\n\t      mpz_out_str (stdout, 10, X[ui_k]);\n\t      printf (\"\\tZ[%u] = \", ui_k);\n\t      mpz_out_str (stdout, 10, Z[ui_k]);\n\t      printf (\"\\n\");\n\t      fflush (stdout);\n\t    }\n\t      \n\t  if (mpz_cmp (X[ui_k], Z[ui_k]))\n\t    {\n\t      mpz_add_ui (X[ui_k], X[ui_k], 1);\n\t      for (ui_i = 0; ui_i <= ui_t; ui_i++)\n\t\tmpz_add (Y[ui_i], Y[ui_i], U[ui_k][ui_i]);\n\n\t      /* S9 [Advance k.] */\n\t      while (++ui_k <= ui_t)\n\t\t{\n\t\t  mpz_neg (X[ui_k], Z[ui_k]);\n\t\t  mpz_mul_ui (tmp1, Z[ui_k], 2);\n\t\t  for (ui_i = 0; ui_i <= ui_t; ui_i++)\n\t\t    {\n\t\t      mpz_mul (tmp2, tmp1, U[ui_k][ui_i]);\n\t\t      mpz_sub (Y[ui_i], Y[ui_i], tmp2);\n\t\t    }\n\t\t}\n\t      vz_dot (tmp1, Y, Y, ui_t + 1);\n\t      if (mpz_cmp (tmp1, s) < 0)\n\t\tmpz_set (s, tmp1);\n\t    }\n\t}\n      while (--ui_k);\n#endif /* DO_SEARCH */\n      mpf_set_z (f_tmp1, s);\n      mpf_sqrt (rop[ui_t - 1], f_tmp1);\n#ifdef DO_SEARCH\n      if (g_debug > DEBUG_2)\n\tprintf (\"done.\\n\");\n#endif /* DO_SEARCH */\n    } /* S4 loop */\n\n  /* Clear GMP variables. */\n\n  mpf_clear (f_tmp1);\n  mpf_clear (f_tmp2);\n  for (ui_i = 0; ui_i < GMP_SPECT_MAXT; ui_i++)\n    {\n      for (ui_j = 0; ui_j < GMP_SPECT_MAXT; ui_j++)\n\t{\n\t  mpz_clear (U[ui_i][ui_j]);\n\t  mpz_clear (V[ui_i][ui_j]);\n\t}\n      mpz_clear (X[ui_i]);\n      mpz_clear (Y[ui_i]);\n      mpz_clear (Z[ui_i]);\n    }\n  mpz_clear (tmp1);\n  mpz_clear (tmp2);\n  mpz_clear (tmp3);\n  mpz_clear (h);\n  mpz_clear (hp);\n  mpz_clear (r);\n  mpz_clear (s);\n  mpz_clear (p);\n  mpz_clear (pp);\n  mpz_clear (q);\n  mpz_clear (u);\n  mpz_clear (v);\n\n  return;\n}\n\n"
  },
  {
    "path": "tests/rand/t-iset.c",
    "content": "/* Test gmp_randinit_set.\n\nCopyright 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* expect after a gmp_randinit_set that the new and old generators will\n   produce the same sequence of numbers */\nvoid\ncheck_one (const char *name, gmp_randstate_ptr src)\n{\n  gmp_randstate_t dst;\n  mpz_t  sz, dz;\n  int    i;\n\n  gmp_randinit_set (dst, src);\n  mpz_init (sz);\n  mpz_init (dz);\n\n  for (i = 0; i < 20; i++)\n    {\n      mpz_urandomb (sz, src, 123);\n      mpz_urandomb (dz, dst, 123);\n\n      if (mpz_cmp (sz, dz) != 0)\n        {\n          printf     (\"gmp_randinit_set didn't duplicate randstate\\n\");\n          printf     (\"  algorithm: %s\\n\", name);\n          gmp_printf (\"  from src:  %#Zx\\n\", sz);\n          gmp_printf (\"  from dst:  %#Zx\\n\", dz);\n          abort ();\n        }\n    }\n\n  mpz_clear (sz);\n  mpz_clear (dz);\n  gmp_randclear (dst);\n}\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n\n  call_rand_algs (check_one);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/rand/t-lc2exp.c",
    "content": "/* Exercise the lc2exp random functions.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* a=0 and c=0 produces zero results always. */\nvoid\ncheck_zero (mpir_ui m2exp)\n{\n  gmp_randstate_t  r;\n  mpz_t            a;\n  mpir_ui           c;\n  int              i;\n\n  mpz_init_set_ui (a, 0L);\n  c = 0L;\n\n  gmp_randinit_lc_2exp (r, a, c, m2exp);\n  gmp_randseed_ui (r, 0L);\n\n  for (i = 0; i < 5; i++)\n    {\n      mpz_urandomb (a, r, 123L);\n      if (mpz_sgn (a) != 0)\n        {\n          printf (\"check_zero m2exp=%lu: didn't get zero\\n\", m2exp);\n          gmp_printf (\"  rand=%#Zx\\n\", a);\n          abort ();\n        }\n    }\n\n  mpz_clear (a);\n  gmp_randclear (r);\n}\n\n/* negative a */\nvoid\ncheck_nega (void)\n{\n  gmp_randstate_t  r;\n  mpz_t            a;\n  mpir_ui    c, m2exp;\n  int              i;\n\n  mpz_init (a);\n  mpz_setbit (a, 1000L);\n  mpz_neg (a, a);\n  c = 0L;\n  m2exp = 45L;\n\n  gmp_randinit_lc_2exp (r, a, c, m2exp);\n  gmp_randseed_ui (r, 0L);\n\n  for (i = 0; i < 5; i++)\n    {\n      mpz_urandomb (a, r, 123L);\n      if (mpz_sgn (a) != 0)\n        printf (\"check_nega m2exp=%lu: didn't get zero\\n\", m2exp);\n    }\n\n  mpz_clear (a);\n  gmp_randclear (r);\n}\n\nvoid\ncheck_bigc (void)\n{\n  gmp_randstate_t  r;\n  mpz_t            a;\n  mpir_ui    c, m2exp, bits;\n  int              i;\n\n  mpz_init_set_ui (a, 0L);\n  c = ULONG_MAX;\n  m2exp = 8;\n\n  gmp_randinit_lc_2exp (r, a, c, m2exp);\n  gmp_randseed_ui (r, 0L);\n\n  for (i = 0; i < 20; i++)\n    {\n      bits = 123L;\n      mpz_urandomb (a, r, bits);\n      if (mpz_sgn (a) < 0 || mpz_sizeinbase (a, 2) > bits)\n        {\n          printf     (\"check_bigc: mpz_urandomb out of range\\n\");\n          printf     (\"   m2exp=%lu\\n\", m2exp);\n          gmp_printf (\"   rand=%#ZX\\n\", a);\n          gmp_printf (\"   sizeinbase2=%u\\n\", mpz_sizeinbase (a, 2));\n        }\n    }\n\n  mpz_clear (a);\n  gmp_randclear (r);\n}\n\nvoid\ncheck_bigc1 (void)\n{\n  gmp_randstate_t  r;\n  mpz_t            a;\n  mpir_ui    c, m2exp;\n  int              i;\n\n  mpz_init_set_ui (a, 0L);\n  c = ULONG_MAX;\n  m2exp = 2;\n\n  gmp_randinit_lc_2exp (r, a, c, m2exp);\n  gmp_randseed_ui (r, 0L);\n\n  for (i = 0; i < 20; i++)\n    {\n      mpz_urandomb (a, r, 1L);\n      if (mpz_cmp_ui (a, 1L) != 0)\n        {\n          printf     (\"check_bigc1: mpz_urandomb didn't give 1\\n\");\n          printf     (\"   m2exp=%lu\\n\", m2exp);\n          gmp_printf (\"   got rand=%#ZX\\n\", a);\n          abort ();\n        }\n    }\n\n  mpz_clear (a);\n  gmp_randclear (r);\n}\n\n/* Checks parameters which triggered an assertion failure in the past.\n   Happened when limbs(a)+limbs(c) < bits_to_limbs(m2exp).  */\nvoid\ncheck_bigm (void)\n{\n  gmp_randstate_t rstate;\n  mpz_t a;\n\n  mpz_init_set_ui (a, 5L);\n  gmp_randinit_lc_2exp (rstate, a, 1L, 384L);\n\n  mpz_urandomb (a, rstate, 20L);\n\n  gmp_randclear (rstate);\n  mpz_clear (a);\n}\n\n/* Checks for seeds bigger than the modulus.  */\nvoid\ncheck_bigs (void)\n{\n  gmp_randstate_t rstate;\n  mpz_t sd, a;\n  int i;\n\n  mpz_init (sd);\n  mpz_setbit (sd, 300L);\n  mpz_sub_ui (sd, sd, 1L);\n  mpz_clrbit (sd, 13L);\n  mpz_init_set_ui (a, 123456789L);\n\n  gmp_randinit_lc_2exp (rstate, a, 5L, 64L);\n\n  for (i = 0; i < 20; i++)\n    {\n      mpz_neg (sd, sd);\n      gmp_randseed (rstate, sd);\n      mpz_mul_ui (sd, sd, 7L);\n\n      mpz_urandomb (a, rstate, 80L);\n    }\n\n  gmp_randclear (rstate);\n  mpz_clear (a);\n  mpz_clear (sd);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n\n  check_zero (2L);\n  check_zero (7L);\n  check_zero (32L);\n  check_zero (64L);\n  check_zero (1000L);\n\n  check_nega ();\n  check_bigc ();\n  check_bigc1 ();\n\n  check_bigm ();\n  check_bigs ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/rand/t-mt.c",
    "content": "/* Test the Mersenne Twister random number generator.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#ifndef TRUE\n#define TRUE (1)\n#endif\n#ifndef FALSE\n#define FALSE (0)\n#endif\n\n/* Test that the sequence without seeding equals the sequence with the\n   default seed.  */\nint\nchk_default_seed (void)\n{\n  gmp_randstate_t r1, r2;\n  mpz_t a, b;\n  int i;\n  int ok = TRUE;\n\n  mpz_init2 (a, 19936L);\n  mpz_init2 (b, 19936L);\n\n  gmp_randinit_mt (r1);\n  gmp_randinit_mt (r2);\n  gmp_randseed_ui (r2, 5489L); /* Must match DEFAULT_SEED in randmt.c */\n  for (i = 0; i < 3; i++)\n    {\n      /* Extract one whole buffer per iteration.  */\n      mpz_urandomb (a, r1, 19936L);\n      mpz_urandomb (b, r2, 19936L);\n      if (mpz_cmp (a, b) != 0)\n\t{\n\t  ok = FALSE;\n\t  printf (\"Default seed fails in iteration %d\\n\", i);\n\t  break;\n\t}\n    }\n  gmp_randclear (r1);\n  gmp_randclear (r2);\n\n  mpz_clear (a);\n  mpz_clear (b);\n  return ok;\n}\n\nint\nmain (int argc, char *argv[])\n{\n  int ok;\n\n  tests_start ();\n\n  ok = chk_default_seed ();\n\n  tests_end ();\n\n  if (ok)\n    return 0; /* pass */\n  else\n    return 1; /* fail */\n}\n"
  },
  {
    "path": "tests/rand/t-rand.c",
    "content": "/* t-rand -- Test random number generators.  */\n\n/*\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdlib.h>\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"tests.h\"\n\n#define SEED 1\n#define BASE 16\n#define ENTS 10\t\t\t/* Number of entries in array when\n                                   printing.  */\n\n/* These were generated by this very program.  Do not edit!  */\n/* Integers.  */\nchar *z1[ENTS] = {\"0\", \"1\", \"1\", \"1\", \"1\", \"0\", \"1\", \"1\", \"1\", \"1\"};\nchar *z2[ENTS] = {\"0\", \"3\", \"1\", \"3\", \"3\", \"0\", \"3\", \"3\", \"3\", \"1\"};\nchar *z3[ENTS] = {\"4\", \"3\", \"1\", \"7\", \"3\", \"0\", \"3\", \"3\", \"3\", \"1\"};\nchar *z4[ENTS] = {\"c\", \"3\", \"1\", \"f\", \"b\", \"8\", \"3\", \"3\", \"3\", \"1\"};\nchar *z5[ENTS] = {\"1c\", \"13\", \"11\", \"1f\", \"b\", \"18\", \"3\", \"13\", \"3\", \"1\"};\n\nchar *z10[ENTS] = {\"29c\", \"213\", \"f1\", \"17f\", \"12b\", \"178\", \"383\", \"d3\", \"3a3\", \"281\"};\n\nchar *z15[ENTS] = {\"29c\", \"1a13\", \"74f1\", \"257f\", \"592b\", \"4978\", \"4783\", \"7cd3\", \"5ba3\", \"4681\"};\nchar *z16[ENTS] = {\"29c\", \"9a13\", \"74f1\", \"a57f\", \"d92b\", \"4978\", \"c783\", \"fcd3\", \"5ba3\", \"c681\"};\nchar *z17[ENTS] = {\"51e\", \"f17a\", \"54ff\", \"1a335\", \"cf65\", \"5d6f\", \"583f\", \"618f\", \"1bc6\", \"98ff\"};\n\nchar *z31[ENTS] = {\"3aecd515\", \"13ae8ec6\", \"518c8090\", \"81ca077\", \"70b7134\", \"7ee78d71\", \"323a7636\", \"2122cb1a\", \"19811941\", \"41fd605\"};\nchar *z32[ENTS] = {\"baecd515\", \"13ae8ec6\", \"518c8090\", \"881ca077\", \"870b7134\", \"7ee78d71\", \"323a7636\", \"a122cb1a\", \"99811941\", \"841fd605\"};\nchar *z33[ENTS] = {\"1faf4cca\", \"15d6ef83b\", \"9095fe72\", \"1b6a3dff6\", \"b17cbddd\", \"16e5209d4\", \"6f65b12c\", \"493bbbc6\", \"abf2a5d5\", \"6d491a3c\"};\n\nchar *z63[ENTS] = {\"48a74f367fa7b5c8\", \"3ba9e9dc1b263076\", \"1e0ac84e7678e0fb\", \"11416581728b3e35\", \"36ab610523f0f1f7\", \"3e540e8e95c0eb4b\", \"439ae16057dbc9d3\", \"734fb260db243950\", \"7d3a317effc289bf\", \"1d80301fb3d1a0d1\"};\nchar *z64[ENTS] = {\"48a74f367fa7b5c8\", \"bba9e9dc1b263076\", \"9e0ac84e7678e0fb\", \"11416581728b3e35\", \"b6ab610523f0f1f7\", \"be540e8e95c0eb4b\", \"439ae16057dbc9d3\", \"f34fb260db243950\", \"fd3a317effc289bf\", \"1d80301fb3d1a0d1\"};\nchar *z65[ENTS] = {\"1ff77710d846d49f0\", \"1b1411701d709ee10\", \"31ffa81a208b6af4\", \"446638d431d3c681\", \"df5c569d5baa8b55\", \"197d99ea9bf28e5a0\", \"191ade09edd94cfae\", \"194acefa6dde5e18d\", \"1afc1167c56272d92\", \"d092994da72f206f\"};\n\nchar *z127[ENTS] = {\"2f66ba932aaf58a071fd8f0742a99a0c\", \"73cfa3c664c9c1753507ca60ec6b8425\", \"53ea074ca131dec12cd68b8aa8e20278\", \"3cf5ac8c343532f8a53cc0eb47581f73\", \"50c11d5869e208aa1b9aa317b8c2d0a9\", \"b23163c892876472b1ef19642eace09\", \"489f4c03d41f87509c8d6c90ce674f95\", \"2ab8748c96aa6762ea1932b44c9d7164\", \"98cb5591fc05ad31afbbc1d67b90edd\", \"77848bb991fd0be331adcf1457fbc672\"};\nchar *z128[ENTS] = {\"af66ba932aaf58a071fd8f0742a99a0c\", \"73cfa3c664c9c1753507ca60ec6b8425\", \"53ea074ca131dec12cd68b8aa8e20278\", \"3cf5ac8c343532f8a53cc0eb47581f73\", \"50c11d5869e208aa1b9aa317b8c2d0a9\", \"8b23163c892876472b1ef19642eace09\", \"489f4c03d41f87509c8d6c90ce674f95\", \"aab8748c96aa6762ea1932b44c9d7164\", \"98cb5591fc05ad31afbbc1d67b90edd\", \"f7848bb991fd0be331adcf1457fbc672\"};\n\n/* Floats.  */\nchar *f1[ENTS] = {\"0.@0\", \"0.8@0\", \"0.8@0\", \"0.8@0\", \"0.8@0\", \"0.@0\", \"0.8@0\", \"0.8@0\", \"0.8@0\", \"0.8@0\"};\nchar *f2[ENTS] = {\"0.@0\", \"0.c@0\", \"0.4@0\", \"0.c@0\", \"0.c@0\", \"0.@0\", \"0.c@0\", \"0.c@0\", \"0.c@0\", \"0.4@0\"};\nchar *f3[ENTS] = {\"0.8@0\", \"0.6@0\", \"0.2@0\", \"0.e@0\", \"0.6@0\", \"0.@0\", \"0.6@0\", \"0.6@0\", \"0.6@0\", \"0.2@0\"};\nchar *f4[ENTS] = {\"0.c@0\", \"0.3@0\", \"0.1@0\", \"0.f@0\", \"0.b@0\", \"0.8@0\", \"0.3@0\", \"0.3@0\", \"0.3@0\", \"0.1@0\"};\nchar *f5[ENTS] = {\"0.e@0\", \"0.98@0\", \"0.88@0\", \"0.f8@0\", \"0.58@0\", \"0.c@0\", \"0.18@0\", \"0.98@0\", \"0.18@0\", \"0.8@-1\"};\n\nchar *f10[ENTS] = {\"0.a7@0\", \"0.84c@0\", \"0.3c4@0\", \"0.5fc@0\", \"0.4ac@0\", \"0.5e@0\", \"0.e0c@0\", \"0.34c@0\", \"0.e8c@0\", \"0.a04@0\"};\n\nchar *f15[ENTS] = {\"0.538@-1\", \"0.3426@0\", \"0.e9e2@0\", \"0.4afe@0\", \"0.b256@0\", \"0.92f@0\", \"0.8f06@0\", \"0.f9a6@0\", \"0.b746@0\", \"0.8d02@0\"};\nchar *f16[ENTS] = {\"0.29c@-1\", \"0.9a13@0\", \"0.74f1@0\", \"0.a57f@0\", \"0.d92b@0\", \"0.4978@0\", \"0.c783@0\", \"0.fcd3@0\", \"0.5ba3@0\", \"0.c681@0\"};\nchar *f17[ENTS] = {\"0.28f@-1\", \"0.78bd@0\", \"0.2a7f8@0\", \"0.d19a8@0\", \"0.67b28@0\", \"0.2eb78@0\", \"0.2c1f8@0\", \"0.30c78@0\", \"0.de3@-1\", \"0.4c7f8@0\"};\n\nchar *f31[ENTS] = {\"0.75d9aa2a@0\", \"0.275d1d8c@0\", \"0.a319012@0\", \"0.103940ee@0\", \"0.e16e268@-1\", \"0.fdcf1ae2@0\", \"0.6474ec6c@0\", \"0.42459634@0\", \"0.33023282@0\", \"0.83fac0a@-1\"};\nchar *f32[ENTS] = {\"0.baecd515@0\", \"0.13ae8ec6@0\", \"0.518c809@0\", \"0.881ca077@0\", \"0.870b7134@0\", \"0.7ee78d71@0\", \"0.323a7636@0\", \"0.a122cb1a@0\", \"0.99811941@0\", \"0.841fd605@0\"};\nchar *f33[ENTS] = {\"0.fd7a665@-1\", \"0.aeb77c1d8@0\", \"0.484aff39@0\", \"0.db51effb@0\", \"0.58be5eee8@0\", \"0.b72904ea@0\", \"0.37b2d896@0\", \"0.249ddde3@0\", \"0.55f952ea8@0\", \"0.36a48d1e@0\"};\n\nchar *f63[ENTS] = {\"0.914e9e6cff4f6b9@0\", \"0.7753d3b8364c60ec@0\", \"0.3c15909cecf1c1f6@0\", \"0.2282cb02e5167c6a@0\", \"0.6d56c20a47e1e3ee@0\", \"0.7ca81d1d2b81d696@0\", \"0.8735c2c0afb793a6@0\", \"0.e69f64c1b64872a@0\", \"0.fa7462fdff85137e@0\", \"0.3b00603f67a341a2@0\"};\nchar *f64[ENTS] = {\"0.48a74f367fa7b5c8@0\", \"0.bba9e9dc1b263076@0\", \"0.9e0ac84e7678e0fb@0\", \"0.11416581728b3e35@0\", \"0.b6ab610523f0f1f7@0\", \"0.be540e8e95c0eb4b@0\", \"0.439ae16057dbc9d3@0\", \"0.f34fb260db24395@0\", \"0.fd3a317effc289bf@0\", \"0.1d80301fb3d1a0d1@0\"};\nchar *f65[ENTS] = {\"0.ffbbb886c236a4f8@0\", \"0.d8a08b80eb84f708@0\", \"0.18ffd40d1045b57a@0\", \"0.22331c6a18e9e3408@0\", \"0.6fae2b4eadd545aa8@0\", \"0.cbeccf54df9472d@0\", \"0.c8d6f04f6eca67d7@0\", \"0.ca5677d36ef2f0c68@0\", \"0.d7e08b3e2b1396c9@0\", \"0.68494ca6d39790378@0\"};\n\nchar *f127[ENTS] = {\"0.5ecd7526555eb140e3fb1e0e85533418@0\", \"0.e79f478cc99382ea6a0f94c1d8d7084a@0\", \"0.a7d40e994263bd8259ad171551c404f@0\", \"0.79eb5918686a65f14a7981d68eb03ee6@0\", \"0.a1823ab0d3c411543735462f7185a152@0\", \"0.16462c791250ec8e563de32c85d59c12@0\", \"0.913e9807a83f0ea1391ad9219cce9f2a@0\", \"0.5570e9192d54cec5d4326568993ae2c8@0\", \"0.13196ab23f80b5a635f7783acf721dba@0\", \"0.ef09177323fa17c6635b9e28aff78ce4@0\"};\nchar *f128[ENTS] = {\"0.af66ba932aaf58a071fd8f0742a99a0c@0\", \"0.73cfa3c664c9c1753507ca60ec6b8425@0\", \"0.53ea074ca131dec12cd68b8aa8e20278@0\", \"0.3cf5ac8c343532f8a53cc0eb47581f73@0\", \"0.50c11d5869e208aa1b9aa317b8c2d0a9@0\", \"0.8b23163c892876472b1ef19642eace09@0\", \"0.489f4c03d41f87509c8d6c90ce674f95@0\", \"0.aab8748c96aa6762ea1932b44c9d7164@0\", \"0.98cb5591fc05ad31afbbc1d67b90edd@-1\", \"0.f7848bb991fd0be331adcf1457fbc672@0\"};\n\n\nstruct rt\n{\n  char **s;\n  int nbits;\n};\n\nstatic struct rt zarr[] =\n{\n  {z1, 1},\n  {z2, 2},\n  {z3, 3},\n  {z4, 4},\n  {z5, 5},\n  {z10, 10},\n  {z15, 15},\n  {z16, 16},\n  {z17, 17},\n  {z31, 31},\n  {z32, 32},\n  {z33, 33},\n  {z63, 63},\n  {z64, 64},\n  {z65, 65},\n  {z127, 127},\n  {z128, 128},\n  {NULL, 0}\n};\n\nstatic struct rt farr[] =\n{\n  {f1, 1},\n  {f2, 2},\n  {f3, 3},\n  {f4, 4},\n  {f5, 5},\n  {f10, 10},\n  {f15, 15},\n  {f16, 16},\n  {f17, 17},\n  {f31, 31},\n  {f32, 32},\n  {f33, 33},\n  {f63, 63},\n  {f64, 64},\n  {f65, 65},\n  {f127, 127},\n  {f128, 128},\n  {NULL, 0}\n};\n  \n\nint \n#if __STDC__\nmain (int argc, char *argv[])\n#else\nmain (argc, argv)\n     int argc;\n     char *argv[];\n#endif\n{\n  static char usage[] = \"\\\nusage: t-rand [function nbits]\\n\\\n  function is one of z, f\\n\\\n  nbits is number of bits\\n\\\n\";\n  gmp_randstate_t rstate;\n  mpz_t z, rz;\n  mpf_t f, rf;\n  enum { Z, F } func = Z;\n  int nbits = 1;\n  int verify_mode_flag = 1;\n  register int i;\n  struct rt *a;\n\n  tests_start();\n\n  if (argc > 1)\n    {\n      if (argc < 3)\n\t{\n\t  fputs (usage, stderr);\n\t  exit (1);\n\t}\n      verify_mode_flag = 0;\n      if (*argv[1] == 'z')\n\tfunc = Z;\n      if (*argv[1] == 'f')\n\tfunc = F;\n      nbits = atoi (argv[2]);\n    }\n\n  mpz_init (rz);\n  \n  if (verify_mode_flag)\n    {\n#ifdef VERBOSE\n      printf (\"%s: verifying random numbers: \", argv[0]);\n#endif\n\n      /* Test z.  */\n      mpz_init (z);\n      for (a = zarr; a->s != NULL; a++)\n\t{\n\t  gmp_randinit_lc_2exp_size (rstate, a->nbits);\n\t  gmp_randseed_ui (rstate, SEED);\n\n\t  for (i = 0; i < ENTS; i++)\n\t    {\n\t      mpz_urandomb (rz, rstate, a->nbits);\n\t      mpz_set_str (z, a->s[i], BASE);\n\t      if (mpz_cmp (z, rz) != 0)\n\t\t{\n\t\t  printf (\"z%d: \", a->nbits); \n\t\t  mpz_out_str (stdout, BASE, rz);\n\t\t  printf (\" should be \");\n\t\t  mpz_out_str (stdout, BASE, z);\n\t\t  puts (\"\");\n\t\t  exit (1);\n\t\t}\n\t    }\n#ifdef VERBOSE\n\t  printf (\"z%d \", a->nbits);\n#endif\n\t  gmp_randclear (rstate);\n\t}\n      mpz_clear (z);\n\n\n      /* Test f.  */\n      for (a = farr; a->s != NULL; a++)\n\t{\n\t  gmp_randinit_lc_2exp_size (rstate, a->nbits);\n\t  gmp_randseed_ui (rstate, SEED);\n\n\t  mpf_init2 (f, a->nbits);\n\t  mpf_init2 (rf, a->nbits);\n\t  for (i = 0; i < ENTS; i++)\n\t    {\n\t      mpf_urandomb (rf, rstate, a->nbits);\n\t      mpf_set_str (f, a->s[i], BASE);\n\t      if (mpf_cmp (f, rf) != 0)\n\t\t{\n\t\t  printf (\"f%d: \", a->nbits); \n\t\t  mpf_out_str (stdout, BASE, a->nbits, rf);\n\t\t  printf (\" should be \");\n\t\t  mpf_out_str (stdout, BASE, a->nbits, f);\n\t\t  puts (\"\");\n\t\t  exit (1);\n\t\t}\n\t    }\n#ifdef VERBOSE\n\t  printf (\"f%d \", a->nbits);\n#endif\n\t  gmp_randclear (rstate);\n\t  mpf_clear (f);\n\t  mpf_clear (rf);\n\t}\n\n#ifdef VERBOSE\n      puts (\"\");\n#endif\n    }\n  else\t\t\t\t/* Print mode.  */\n    {\n      gmp_randinit_lc_2exp_size (rstate, nbits);\n      gmp_randseed_ui (rstate, SEED);\n\n      switch (func)\n\t{\n\tcase Z:\n\t  printf (\"char *z%d[ENTS] = {\", nbits);\n\t  for (i = 0; i < ENTS; i++)\n\t    {\n\t      mpz_urandomb (rz, rstate, nbits);\n\t      printf (\"\\\"\");\n\t      mpz_out_str (stdout, BASE, rz);\n\t      printf (\"\\\"\");\n\t      if (i != ENTS - 1)\n\t\tprintf (\", \");\n\t    }\n\t  printf (\"};\\n\");\n\t  printf (\"  {z%d, %d},\\n\", nbits, nbits);\n\t  break;\n\n\tcase F:\n\t  printf (\"char *f%d[ENTS] = {\", nbits);\n\t  mpf_init2 (rf, nbits);\n\t  for (i = 0; i < ENTS; i++)\n\t    {\n\t      mpf_urandomb (rf, rstate, nbits);\n\t      printf (\"\\\"\");\n\t      mpf_out_str (stdout, BASE, nbits, rf);\n\t      printf (\"\\\"\");\n\t      if (i != ENTS - 1)\n\t\tprintf (\", \");\n\t    }\n\t  printf (\"};\\n\");\n\t  printf (\"  {f%d, %d},\\n\", nbits, nbits);\n\t  mpf_clear (rf);\n\t  break;\n\n\tdefault:\n\t  exit (1);\n\t}\n\n      gmp_randclear (rstate);\n    }\n  \n  mpz_clear (rz);\n  tests_end();\n  return 0;\n}\n"
  },
  {
    "path": "tests/rand/t-urbui.c",
    "content": "/* Test gmp_urandomb_ui.\n\nCopyright 2003, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n/* Expect numbers generated by rstate to obey the number of bits requested.\n   No point testing bits==BITS_PER_ULONG, since any return is acceptable in\n   that case.  */\nvoid\ncheck_one (const char *name, gmp_randstate_ptr rstate)\n{\n  mpir_ui  bits, limit, got;\n  int    i;\n\n  for (bits = 0; bits < BITS_PER_UI; bits++)\n    {\n      /* will demand got < limit */\n      limit = (((mpir_ui)1) << bits);\n\n      for (i = 0; i < 5; i++)\n        {\n          got = gmp_urandomb_ui (rstate, bits);\n          if (got >= limit)\n            {\n              printf (\"Return value out of range:\\n\");\n              printf (\"  algorithm: %s\\n\", name);\n              printf (\"  bits:  %lu\\n\", bits);\n              printf (\"  limit: %#lx\\n\", limit);\n              printf (\"  got:   %#lx\\n\", got);\n              abort ();\n            }\n        }\n    }\n}\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n\n  call_rand_algs (check_one);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/rand/t-urmui.c",
    "content": "/* Test gmp_urandomm_ui.\n\nCopyright 2003, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#define printf gmp_printf\n\n/* Expect numbers generated by rstate to obey the limit requested. */\nvoid\ncheck_one (const char *name, gmp_randstate_ptr rstate)\n{\n  static const mpir_ui  n_table[] = {\n    1, 2, 3, 4, 5, 6, 7, 8,\n    123, 456, 789,\n\n    255, 256, 257,\n    1023, 1024, 1025,\n    32767, 32768, 32769,\n\n    GMP_UI_MAX/2-2, GMP_UI_MAX/2-1, GMP_UI_MAX/2, GMP_UI_MAX/2+1, GMP_UI_MAX/2+2,\n\n    GMP_UI_MAX-2, GMP_UI_MAX-1, GMP_UI_MAX,\n  };\n\n  mpir_ui  got, n;\n  int    i, j;\n\n  for (i = 0; i < numberof (n_table); i++)\n    {\n      n = n_table[i];\n\n      for (j = 0; j < 5; j++)\n        {\n          got = gmp_urandomm_ui (rstate, n);\n          if (got >= n)\n            {\n              printf (\"Return value out of range:\\n\");\n              printf (\"  algorithm: %s\\n\", name);\n              printf (\"  n:     %#Mx\\n\", n);\n              printf (\"  got:   %#Mx\\n\", got);\n              abort ();\n            }\n        }\n    }\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n\n  call_rand_algs (check_one);\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/rand/t-urndmm.c",
    "content": "/* Test mpz_urandomm.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n#ifndef TRUE\n#define TRUE (1)\n#endif\n#ifndef FALSE\n#define FALSE (0)\n#endif\n\nint\ncheck_params (void)\n{\n  gmp_randstate_t r1, r2;\n  mpz_t a, b, m;\n  int i;\n  int result;\n\n  result = TRUE;\n\n  mpz_init (a);\n  mpz_init (b);\n  mpz_init (m);\n\n  if (result)\n    {\n      /* Test the consistency between urandomm and urandomb. */\n      gmp_randinit_default (r1);\n      gmp_randinit_default (r2);\n      gmp_randseed_ui (r1, 85L);\n      gmp_randseed_ui (r2, 85L);\n      mpz_set_ui (m, 0L);\n      mpz_setbit (m, 80L);\n      for (i = 0; i < 100; i++)\n\t{\n\t  mpz_urandomm (a, r1, m);\n\t  mpz_urandomb (b, r2, 80L);\n\t  if (mpz_cmp (a, b) != 0)\n\t    {\n\t      result = FALSE;\n\t      printf (\"mpz_urandomm != mpz_urandomb\\n\");\n\t      break;\n\t    }\n\t}\n      gmp_randclear (r1);\n      gmp_randclear (r2);\n    }\n\n  if (0)\n    {\n      /* Test that mpz_urandomm returns the correct result with a\n\t broken LC.  */\n      mpz_set_ui (a, 0L);\n      gmp_randinit_lc_2exp (r1, a, 0xffL, 8L);\n      mpz_set_ui (m, 5L);\n      /* Warning: This code hangs in gmp 4.1 and below */\n      for (i = 0; i < 100; i++)\n\t{\n\t  mpz_urandomm (a, r1, m);\n\t  if (mpz_cmp_ui (a, 2L) != 0)\n\t    {\n\t      result = FALSE;\n\t      gmp_printf (\"mpz_urandomm returns %Zd instead of 2\\n\", a);\n\t      break;\n\t    }\n\t}\n      gmp_randclear (r1);\n    }\n\n  if (result)\n    {\n      /* Test that the results are always in range for either\n         positive or negative values of m.  */\n      gmp_randinit_default (r1);\n      mpz_set_ui (m, 5L);\n      mpz_set_si (b, -5L);\n      for (i = 0; i < 100; i++)\n\t{\n\t  mpz_urandomm (a, r1, m);\n\t  if (mpz_cmp_ui (a, 5L) >= 0 || mpz_sgn (a) < 0)\n\t    {\n\t      result = FALSE;\n\t      gmp_printf (\"Out-of-range or non-positive value: %Zd\\n\", a);\n\t      break;\n\t    }\n\t  mpz_urandomm (a, r1, b);\n\t  if (mpz_cmp_ui (a, 5L) >= 0 || mpz_sgn (a) < 0)\n\t    {\n\t      result = FALSE;\n\t      gmp_printf (\"Out-of-range or non-positive value (from negative modulus): %Zd\\n\", a);\n\t      break;\n\t    }\n\t}\n      gmp_randclear (r1);\n    }\n\n  if (result)\n    {\n      /* Test that m=1 forces always result=0.  */\n      gmp_randinit_default (r1);\n      mpz_set_ui (m, 1L);\n      for (i = 0; i < 100; i++)\n\t{\n\t  mpz_urandomm (a, r1, m);\n\t  if (mpz_sgn (a) != 0)\n\t    {\n\t      result = FALSE;\n\t      gmp_printf (\"mpz_urandomm fails with m=1 (result=%Zd)\\n\", a);\n\t      break;\n\t    }\n\t}\n      gmp_randclear (r1);\n    }\n\n  mpz_clear (a);\n  mpz_clear (b);\n  mpz_clear (m);\n  return result;\n}\n\nint\nmain (int argc, char *argv[])\n{\n  int result = TRUE;\n\n  tests_start ();\n\n  if (result)\n    if (!check_params ())\n      result = FALSE;\n\n  tests_end ();\n\n  if (result)\n    return 0; /* pass */\n  else\n    return 1; /* fail */\n}\n"
  },
  {
    "path": "tests/rand/zdiv_round.c",
    "content": "/* zdiv_round() -- divide integers, round to nearest */\n\n/*\nCopyright 1999 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n\nvoid\nzdiv_round (mpz_t rop, mpz_t n, mpz_t d)\n{\n  mpf_t f_n, f_d;\n\n  mpf_init (f_n);\n  mpf_init (f_d);\n\n  mpf_set_z (f_d, d);\n  mpf_set_z (f_n, n);\n\n  mpf_div (f_n, f_n, f_d);\n  mpf_set_d (f_d, .5);\n  if (mpf_sgn (f_n) < 0)\n    mpf_neg (f_d, f_d);\n  mpf_add (f_n, f_n, f_d);\n  mpz_set_f (rop, f_n);\n\n  mpf_clear (f_n);\n  mpf_clear (f_d);\n  return;\n}\n"
  },
  {
    "path": "tests/refmpf.c",
    "content": "/* Reference floating point routines.\n\nCopyright 1996, 2001, 2004, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\nrefmpf_add (mpf_ptr w, mpf_srcptr u, mpf_srcptr v)\n{\n  mp_size_t hi, lo, size;\n  mp_ptr ut, vt, wt;\n  int neg;\n  mp_exp_t exp;\n  mp_limb_t cy;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  if (SIZ (u) == 0)\n    {\n      size = ABSIZ (v);\n      wt = (mp_ptr) TMP_ALLOC ((size+1) * BYTES_PER_MP_LIMB);\n      MPN_COPY (wt, PTR (v), size);\n      exp = EXP (v);\n      neg = SIZ (v) < 0;\n      goto done;\n    }\n  if (SIZ (v) == 0)\n    {\n      size = ABSIZ (u);\n      wt = (mp_ptr) TMP_ALLOC ((size+1) * BYTES_PER_MP_LIMB);\n      MPN_COPY (wt, PTR (u), size);\n      exp = EXP (u);\n      neg = SIZ (u) < 0;\n      goto done;\n    }\n  if ((SIZ (u) ^ SIZ (v)) < 0)\n    {\n      mpf_t tmp;\n      SIZ (tmp) = -SIZ (v);\n      EXP (tmp) = EXP (v);\n      PTR (tmp) = PTR (v);\n      refmpf_sub (w, u, tmp);\n      return;\n    }\n  neg = SIZ (u) < 0;\n\n  /* Compute the significance of the hi and lo end of the result.  */\n  hi = MAX (EXP (u), EXP (v));\n  lo = MIN (EXP (u) - ABSIZ (u), EXP (v) - ABSIZ (v));\n  size = hi - lo;\n  ut = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);\n  vt = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);\n  wt = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);\n  MPN_ZERO (ut, size);\n  MPN_ZERO (vt, size);\n  {int off;\n  off = size + (EXP (u) - hi) - ABSIZ (u);\n  MPN_COPY (ut + off, PTR (u), ABSIZ (u));\n  off = size + (EXP (v) - hi) - ABSIZ (v);\n  MPN_COPY (vt + off, PTR (v), ABSIZ (v));\n  }\n\n  cy = mpn_add_n (wt, ut, vt, size);\n  wt[size] = cy;\n  size += cy;\n  exp = hi + cy;\n\ndone:\n  if (size > PREC (w))\n    {\n      wt += size - PREC (w);\n      size = PREC (w);\n    }\n  MPN_COPY (PTR (w), wt, size);\n  SIZ (w) = neg == 0 ? size : -size;\n  EXP (w) = exp;\n  TMP_FREE;\n}\n\n\n/* Add 1 \"unit in last place\" (ie. in the least significant limb) to f.\n   f cannot be zero, since that has no well-defined \"last place\".\n\n   This routine is designed for use in cases where we pay close attention to\n   the size of the data value and are using that (and the exponent) to\n   indicate the accurate part of a result, or similar.  For this reason, if\n   there's a carry out we don't store 1 and adjust the exponent, we just\n   leave 100..00.  We don't even adjust if there's a carry out of prec+1\n   limbs, but instead give up in that case (which we intend shouldn't arise\n   in normal circumstances).  */\n\nvoid\nrefmpf_add_ulp (mpf_ptr f)\n{\n  mp_ptr     fp = PTR(f);\n  mp_size_t  fsize = SIZ(f);\n  mp_size_t  abs_fsize = ABSIZ(f);\n  mp_limb_t  c;\n\n  if (fsize == 0)\n    {\n      printf (\"Oops, refmpf_add_ulp called with f==0\\n\");\n      abort ();\n    }\n\n  c = refmpn_add_1 (fp, fp, abs_fsize, CNST_LIMB(1));\n  if (c != 0)\n    {\n      if (abs_fsize >= PREC(f) + 1)\n        {\n          printf (\"Oops, refmpf_add_ulp carried out of prec+1 limbs\\n\");\n          abort ();\n        }\n\n      fp[abs_fsize] = c;\n      abs_fsize++;\n      SIZ(f) = (fsize > 0 ? abs_fsize : - abs_fsize);\n      EXP(f)++;\n    }\n}\n\n/* Fill f with size limbs of the given value, setup as an integer. */\nvoid\nrefmpf_fill (mpf_ptr f, mp_size_t size, mp_limb_t value)\n{\n  ASSERT (size >= 0);\n  size = MIN (PREC(f) + 1, size);\n  SIZ(f) = size;\n  EXP(f) = size;\n  refmpn_fill (PTR(f), size, value);\n}\n\n/* Strip high zero limbs from the f data, adjusting exponent accordingly. */\nvoid\nrefmpf_normalize (mpf_ptr f)\n{\n  while (SIZ(f) != 0 && PTR(f)[ABSIZ(f)-1] == 0)\n    {\n      SIZ(f) = (SIZ(f) >= 0 ? SIZ(f)-1 : SIZ(f)+1);\n      EXP(f) --;\n    }\n  if (SIZ(f) == 0)\n    EXP(f) = 0;\n}\n\n/* refmpf_set_overlap sets up dst as a copy of src, but with PREC(dst)\n   unchanged, in preparation for an overlap test.\n\n   The full value of src is copied, and the space at PTR(dst) is extended as\n   necessary.  The way PREC(dst) is unchanged is as per an mpf_set_prec_raw.\n   The return value is the new PTR(dst) space precision, in bits, ready for\n   a restoring mpf_set_prec_raw before mpf_clear.  */\n\nunsigned long\nrefmpf_set_overlap (mpf_ptr dst, mpf_srcptr src)\n{\n  mp_size_t  dprec = PREC(dst);\n  mp_size_t  ssize = ABSIZ(src);\n  unsigned long  ret;\n\n  refmpf_set_prec_limbs (dst, (unsigned long) MAX (dprec, ssize));\n  mpf_set (dst, src);\n\n  ret = mpf_get_prec (dst);\n  PREC(dst) = dprec;\n  return ret;\n}\n\n/* Like mpf_set_prec, but taking a precision in limbs.\n   PREC(f) ends up as the given \"prec\" value.  */\nvoid\nrefmpf_set_prec_limbs (mpf_ptr f, unsigned long prec)\n{\n  mpf_set_prec (f, __GMPF_PREC_TO_BITS (prec));\n}\n\n\nvoid\nrefmpf_sub (mpf_ptr w, mpf_srcptr u, mpf_srcptr v)\n{\n  mp_size_t hi, lo, size;\n  mp_ptr ut, vt, wt;\n  int neg;\n  mp_exp_t exp;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  if (SIZ (u) == 0)\n    {\n      size = ABSIZ (v);\n      wt = (mp_ptr) TMP_ALLOC ((size+1) * BYTES_PER_MP_LIMB);\n      MPN_COPY (wt, PTR (v), size);\n      exp = EXP (v);\n      neg = SIZ (v) > 0;\n      goto done;\n    }\n  if (SIZ (v) == 0)\n    {\n      size = ABSIZ (u);\n      wt = (mp_ptr) TMP_ALLOC ((size+1) * BYTES_PER_MP_LIMB);\n      MPN_COPY (wt, PTR (u), size);\n      exp = EXP (u);\n      neg = SIZ (u) < 0;\n      goto done;\n    }\n  if ((SIZ (u) ^ SIZ (v)) < 0)\n    {\n      mpf_t tmp;\n      SIZ (tmp) = -SIZ (v);\n      EXP (tmp) = EXP (v);\n      PTR (tmp) = PTR (v);\n      refmpf_add (w, u, tmp);\n      if (SIZ (u) < 0)\n\tmpf_neg (w, w);\n      return;\n    }\n  neg = SIZ (u) < 0;\n\n  /* Compute the significance of the hi and lo end of the result.  */\n  hi = MAX (EXP (u), EXP (v));\n  lo = MIN (EXP (u) - ABSIZ (u), EXP (v) - ABSIZ (v));\n  size = hi - lo;\n  ut = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);\n  vt = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);\n  wt = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);\n  MPN_ZERO (ut, size);\n  MPN_ZERO (vt, size);\n  {int off;\n  off = size + (EXP (u) - hi) - ABSIZ (u);\n  MPN_COPY (ut + off, PTR (u), ABSIZ (u));\n  off = size + (EXP (v) - hi) - ABSIZ (v);\n  MPN_COPY (vt + off, PTR (v), ABSIZ (v));\n  }\n\n  if (mpn_cmp (ut, vt, size) >= 0)\n    mpn_sub_n (wt, ut, vt, size);\n  else\n    {\n      mpn_sub_n (wt, vt, ut, size);\n      neg ^= 1;\n    }\n  exp = hi;\n  while (size != 0 && wt[size - 1] == 0)\n    {\n      size--;\n      exp--;\n    }\n\ndone:\n  if (size > PREC (w))\n    {\n      wt += size - PREC (w);\n      size = PREC (w);\n    }\n  MPN_COPY (PTR (w), wt, size);\n  SIZ (w) = neg == 0 ? size : -size;\n  EXP (w) = exp;\n  TMP_FREE;\n}\n\n\n/* Validate got by comparing to want.  Return 1 if good, 0 if bad.\n\n   The data in got is compared to that in want, up to either PREC(got) limbs\n   or the size of got, whichever is bigger.  Clearly we always demand\n   PREC(got) of accuracy, but we go further and say that if got is bigger\n   then any extra must be correct too.\n\n   want needs to have enough data to allow this comparison.  The size in\n   want doesn't have to be that big though, if it's smaller then further low\n   limbs are taken to be zero.\n\n   This validation approach is designed to allow some flexibility in exactly\n   how much data is generated by an mpf function, ie. either prec or prec+1\n   limbs.  We don't try to make a reference function that emulates that same\n   size decision, instead the idea is for a validation function to generate\n   at least as much data as the real function, then compare.  */\n\nint\nrefmpf_validate (const char *name, mpf_srcptr got, mpf_srcptr want)\n{\n  int  bad = 0;\n  mp_size_t  gsize, wsize, cmpsize, i;\n  mp_srcptr  gp, wp;\n  mp_limb_t  glimb, wlimb;\n\n  MPF_CHECK_FORMAT (got);\n\n  if (EXP (got) != EXP (want))\n    {\n      printf (\"%s: wrong exponent\\n\", name);\n      bad = 1;\n    }\n\n  gsize = SIZ (got);\n  wsize = SIZ (want);\n  if ((gsize < 0 && wsize > 0) || (gsize > 0 && wsize < 0))\n    {\n      printf (\"%s: wrong sign\\n\", name);\n      bad = 1;\n    }\n\n  gsize = ABS (gsize);\n  wsize = ABS (wsize);\n\n  /* most significant limb of respective data */\n  gp = PTR (got) + gsize - 1;\n  wp = PTR (want) + wsize - 1;\n\n  /* compare limb data */\n  cmpsize = MAX (PREC (got), gsize);\n  for (i = 0; i < cmpsize; i++)\n    {\n      glimb = (i < gsize ? gp[-i] : 0);\n      wlimb = (i < wsize ? wp[-i] : 0);\n\n      if (glimb != wlimb)\n        {\n          printf (\"%s: wrong data starting at index %ld from top\\n\",\n                  name, (long) i);\n          bad = 1;\n          break;\n        }\n    }\n\n  if (bad)\n    {\n      printf (\"  prec       %d\\n\", PREC(got));\n      printf (\"  exp got    %ld\\n\", (long) EXP(got));\n      printf (\"  exp want   %ld\\n\", (long) EXP(want));\n      printf (\"  size got   %d\\n\", SIZ(got));\n      printf (\"  size want  %d\\n\", SIZ(want));\n      printf (\"  limbs (high to low)\\n\");\n      printf (\"   got  \");\n      for (i = ABSIZ(got)-1; i >= 0; i--)\n        {\n          gmp_printf (\"%MX\", PTR(got)[i]);\n          if (i != 0)\n            printf (\",\");\n        }\n      printf (\"\\n\");\n      printf (\"   want \");\n      for (i = ABSIZ(want)-1; i >= 0; i--)\n        {\n          gmp_printf (\"%MX\", PTR(want)[i]);\n          if (i != 0)\n            printf (\",\");\n        }\n      printf (\"\\n\");\n      return 0;\n    }\n\n  return 1;\n}\n\n\nint\nrefmpf_validate_division (const char *name, mpf_srcptr got,\n                          mpf_srcptr n, mpf_srcptr d)\n{\n  mp_size_t  nsize, dsize, sign, prec, qsize, tsize;\n  mp_srcptr  np, dp;\n  mp_ptr     tp, qp, rp;\n  mpf_t      want;\n  int        ret;\n\n  nsize = SIZ (n);\n  dsize = SIZ (d);\n  ASSERT_ALWAYS (dsize != 0);\n\n  sign = nsize ^ dsize;\n  nsize = ABS (nsize);\n  dsize = ABS (dsize);\n\n  np = PTR (n);\n  dp = PTR (d);\n  prec = PREC (got);\n\n  EXP (want) = EXP (n) - EXP (d) + 1;\n\n  qsize = prec + 2;            /* at least prec+1 limbs, after high zero */\n  tsize = qsize + dsize - 1;   /* dividend size to give desired qsize */\n\n  /* dividend n, extended or truncated */\n  tp = refmpn_malloc_limbs (tsize);\n  refmpn_copy_extend (tp, tsize, np, nsize);\n\n  qp = refmpn_malloc_limbs (qsize);\n  rp = refmpn_malloc_limbs (dsize);  /* remainder, unused */\n\n  ASSERT_ALWAYS (qsize == tsize - dsize + 1);\n  refmpn_tdiv_qr (qp, rp, (mp_size_t) 0, tp, tsize, dp, dsize);\n\n  PTR (want) = qp;\n  SIZ (want) = (sign >= 0 ? qsize : -qsize);\n  refmpf_normalize (want);\n\n  ret = refmpf_validate (name, got, want);\n\n  free (tp);\n  free (qp);\n  free (rp);\n\n  return ret;\n}\n"
  },
  {
    "path": "tests/refmpn.c",
    "content": "/* Reference mpn functions, designed to be simple, portable and independent\n   of the normal gmp code.  Speed isn't a consideration.\n\nCopyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free\nSoftware Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/* The following license applies to refmpn_mulmid_basecase -- middle product *only*\n\nCopyright (C) 2009, David Harvey\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n*/\n\n/* Most routines have assertions representing what the mpn routines are\n   supposed to accept.  Many of these reference routines do sensible things\n   outside these ranges (eg. for size==0), but the assertions are present to\n   pick up bad parameters passed here that are about to be passed the same\n   to a real mpn routine being compared.  */\n\n/* always do assertion checking */\n#define WANT_ASSERT  1\n\n#include <stdio.h>  /* for NULL */\n#include <stdlib.h> /* for malloc */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#include \"tests.h\"\n\n\n\n/* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes\n   in bytes. */\nint\nbyte_overlap_p (const void *v_xp, mp_size_t xsize,\n                const void *v_yp, mp_size_t ysize)\n{\n  const char *xp = v_xp;\n  const char *yp = v_yp;\n\n  ASSERT (xsize >= 0);\n  ASSERT (ysize >= 0);\n\n  /* no wraparounds */\n  ASSERT (xp+xsize >= xp);\n  ASSERT (yp+ysize >= yp);\n\n  if (xp + xsize <= yp)\n    return 0;\n\n  if (yp + ysize <= xp)\n    return 0;\n\n  return 1;\n}\n\n/* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */\nint\nrefmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)\n{\n  return byte_overlap_p (xp, xsize * BYTES_PER_MP_LIMB,\n                         yp, ysize * BYTES_PER_MP_LIMB);\n}\n\n/* Check overlap for a routine defined to work low to high. */\nint\nrefmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)\n{\n  return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));\n}\n\n/* Check overlap for a routine defined to work high to low. */\nint\nrefmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)\n{\n  return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));\n}\n\n/* Check overlap for a standard routine requiring equal or separate. */\nint\nrefmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)\n{\n  return (dst == src || ! refmpn_overlap_p (dst, size, src, size));\n}\nint\nrefmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,\n                               mp_size_t size)\n{\n  return (refmpn_overlap_fullonly_p (dst, src1, size)\n          && refmpn_overlap_fullonly_p (dst, src2, size));\n}\n\n\nmp_ptr\nrefmpn_malloc_limbs (mp_size_t size)\n{\n  mp_ptr  p;\n  ASSERT (size >= 0);\n  if (size == 0)\n    size = 1;\n  p = (mp_ptr) malloc ((size_t) (size * BYTES_PER_MP_LIMB));\n  ASSERT (p != NULL);\n  return p;\n}\n\n/* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free\n * memory allocated by refmpn_malloc_limbs_aligned. */\nvoid\nrefmpn_free_limbs (mp_ptr p)\n{\n  free (p);\n}\n\nmp_ptr\nrefmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)\n{\n  mp_ptr  p;\n  p = refmpn_malloc_limbs (size);\n  refmpn_copyi (p, ptr, size);\n  return p;\n}\n\n/* malloc n limbs on a multiple of m bytes boundary */\nmp_ptr\nrefmpn_malloc_limbs_aligned (mp_size_t n, size_t m)\n{\n  return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);\n}\n\n\nvoid\nrefmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)\n{\n  mp_size_t  i;\n  ASSERT (size >= 0);\n  for (i = 0; i < size; i++)\n    ptr[i] = value;\n}\n\nvoid\nrefmpn_zero (mp_ptr ptr, mp_size_t size)\n{\n  refmpn_fill (ptr, size, CNST_LIMB(0));\n}\n\nvoid\nrefmpn_store (mp_ptr ptr, mp_size_t size,mp_limb_t val)\n{\n  refmpn_fill (ptr, size, val);\n}\n\nvoid\nrefmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize)\n{\n  ASSERT (newsize >= oldsize);\n  refmpn_zero (ptr+oldsize, newsize-oldsize);\n}\n\nint\nrefmpn_zero_p (mp_srcptr ptr, mp_size_t size)\n{\n  mp_size_t  i;\n  for (i = 0; i < size; i++)\n    if (ptr[i] != 0)\n      return 0;\n  return 1;\n}\n\nmp_size_t\nrefmpn_normalize (mp_srcptr ptr, mp_size_t size)\n{\n  ASSERT (size >= 0);\n  while (size > 0 && ptr[size-1] == 0)\n    size--;\n  return size;\n}\n\n/* the highest one bit in x */\nmp_limb_t\nrefmpn_msbone (mp_limb_t x)\n{\n  mp_limb_t  n = (mp_limb_t) 1 << (BITS_PER_MP_LIMB-1);\n\n  while (n != 0)\n    {\n      if (x & n)\n        break;\n      n >>= 1;\n    }\n  return n;\n}\n\n/* a mask of the highest one bit plus and all bits below */\nmp_limb_t\nrefmpn_msbone_mask (mp_limb_t x)\n{\n  if (x == 0)\n    return 0;\n\n  return (refmpn_msbone (x) << 1) - 1;\n}\n\n/* How many digits in the given base will fit in a limb.\n   Notice that the product b is allowed to be equal to the limit\n   2^GMP_NUMB_BITS, this ensures the result for base==2 will be\n   GMP_NUMB_BITS (and similarly other powers of 2).  */\nint\nrefmpn_chars_per_limb (int base)\n{\n  mp_limb_t  limit[2], b[2];\n  int        chars_per_limb;\n\n  ASSERT (base >= 2);\n\n  limit[0] = 0;  /* limit = 2^GMP_NUMB_BITS */\n  limit[1] = 1;\n  b[0] = 1;      /* b = 1 */\n  b[1] = 0;\n\n  chars_per_limb = 0;\n  for (;;)\n    {\n      if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base))\n        break;\n      if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0)\n        break;\n      chars_per_limb++;\n    }\n  return chars_per_limb;\n}\n\n/* The biggest value base**n which fits in GMP_NUMB_BITS. */\nmp_limb_t\nrefmpn_big_base (int base)\n{\n  int        chars_per_limb = refmpn_chars_per_limb (base);\n  int        i;\n  mp_limb_t  bb;\n\n  ASSERT (base >= 2);\n  bb = 1;\n  for (i = 0; i < chars_per_limb; i++)\n    bb *= base;\n  return bb;\n}\n\n\nvoid\nrefmpn_setbit (mp_ptr ptr, unsigned long bit)\n{\n  ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);\n}\n\nvoid\nrefmpn_clrbit (mp_ptr ptr, unsigned long bit)\n{\n  ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));\n}\n\n#define REFMPN_TSTBIT(ptr,bit) \\\n  (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)\n\nint\nrefmpn_tstbit (mp_srcptr ptr, unsigned long bit)\n{\n  return REFMPN_TSTBIT (ptr, bit);\n}\n\nunsigned long\nrefmpn_scan0 (mp_srcptr ptr, unsigned long bit)\n{\n  while (REFMPN_TSTBIT (ptr, bit) != 0)\n    bit++;\n  return bit;\n}\n\nunsigned long\nrefmpn_scan1 (mp_srcptr ptr, unsigned long bit)\n{\n  while (REFMPN_TSTBIT (ptr, bit) == 0)\n    bit++;\n  return bit;\n}\n\nvoid\nrefmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{\n  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));\n  refmpn_copyi (rp, sp, size);\n}\n\nvoid\nrefmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{\n  mp_size_t i;\n\n  ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));\n  ASSERT (size >= 0);\n\n  for (i = 0; i < size; i++)\n    rp[i] = sp[i];\n}\n\nvoid\nrefmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{\n  mp_size_t i;\n\n  ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));\n  ASSERT (size >= 0);\n\n  for (i = size-1; i >= 0; i--)\n    rp[i] = sp[i];\n}\n\n/* Copy {xp,xsize} to {wp,wsize}.  If x is shorter, then pad w with low\n   zeros to wsize.  If x is longer, then copy just the high wsize limbs.  */\nvoid\nrefmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize)\n{\n  ASSERT (wsize >= 0);\n  ASSERT (xsize >= 0);\n\n  /* high part of x if x bigger than w */\n  if (xsize > wsize)\n    {\n      xp += xsize - wsize;\n      xsize = wsize;\n    }\n\n  refmpn_copy (wp + wsize-xsize, xp, xsize);\n  refmpn_zero (wp, wsize-xsize);\n}\n\nvoid\nrefmpn_com_n (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{\n  mp_size_t i;\n\n  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));\n  ASSERT (size >= 1);\n  ASSERT_MPN (sp, size);\n\n  for (i = 0; i < size; i++)\n    rp[i] = sp[i] ^ GMP_NUMB_MASK;\n}\n\nint\nrefmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)\n{\n  mp_size_t  i;\n\n  ASSERT (size >= 1);\n  ASSERT_MPN (xp, size);\n  ASSERT_MPN (yp, size);\n\n  for (i = size-1; i >= 0; i--)\n    {\n      if (xp[i] > yp[i])  return 1;\n      if (xp[i] < yp[i])  return -1;\n    }\n  return 0;\n}\n\nint\nrefmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)\n{\n  if (size == 0)\n    return 0;\n  else\n    return refmpn_cmp (xp, yp, size);\n}\n\nint\nrefmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,\n                     mp_srcptr yp, mp_size_t ysize)\n{\n  int  opp, cmp;\n\n  ASSERT_MPN (xp, xsize);\n  ASSERT_MPN (yp, ysize);\n\n  opp = (xsize < ysize);\n  if (opp)\n    MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);\n\n  if (! refmpn_zero_p (xp+ysize, xsize-ysize))\n    cmp = 1;\n  else\n    cmp = refmpn_cmp (xp, yp, ysize);\n\n  return (opp ? -cmp : cmp);\n}\n\nint\nrefmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)\n{\n  mp_size_t  i;\n  ASSERT (size >= 0);\n\n  for (i = 0; i < size; i++)\n      if (xp[i] != yp[i])\n        return 0;\n  return 1;\n}\n\n\n#define LOGOPS(operation)                                               \\\n  {                                                                     \\\n    mp_size_t  i;                                                       \\\n                                                                        \\\n    ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \\\n    ASSERT (size >= 1);                                                 \\\n    ASSERT_MPN (s1p, size);                                             \\\n    ASSERT_MPN (s2p, size);                                             \\\n                                                                        \\\n    for (i = 0; i < size; i++)                                          \\\n      rp[i] = operation;                                                \\\n  }\n\nvoid\nrefmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)\n{\n  LOGOPS (s1p[i] & s2p[i]);\n}\nvoid\nrefmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)\n{\n  LOGOPS (s1p[i] & ~s2p[i]);\n}\nvoid\nrefmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)\n{\n  LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);\n}\nvoid\nrefmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)\n{\n  LOGOPS (s1p[i] | s2p[i]);\n}\nvoid\nrefmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)\n{\n  LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));\n}\nvoid\nrefmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)\n{\n  LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);\n}\nvoid\nrefmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)\n{\n  LOGOPS (s1p[i] ^ s2p[i]);\n}\nvoid\nrefmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)\n{\n  LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);\n}\n\n\n/* set *dh,*dl to mh:ml - sh:sl, in full limbs */\nvoid\nrefmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl,\n                   mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl)\n{\n  *dl = ml - sl;\n  *dh = mh - sh - (ml < sl);\n}\n\n\n/* set *w to x+y, return 0 or 1 carry */\nmp_limb_t\nref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)\n{\n  mp_limb_t  sum, cy;\n\n  ASSERT_LIMB (x);\n  ASSERT_LIMB (y);\n\n  sum = x + y;\n#if GMP_NAIL_BITS == 0\n  *w = sum;\n  cy = (sum < x);\n#else\n  *w = sum & GMP_NUMB_MASK;\n  cy = (sum >> GMP_NUMB_BITS);\n#endif\n  return cy;\n}\n\n/* set *w to x-y, return 0 or 1 borrow */\nmp_limb_t\nref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)\n{\n  mp_limb_t  diff, cy;\n\n  ASSERT_LIMB (x);\n  ASSERT_LIMB (y);\n\n  diff = x - y;\n#if GMP_NAIL_BITS == 0\n  *w = diff;\n  cy = (diff > x);\n#else\n  *w = diff & GMP_NUMB_MASK;\n  cy = (diff >> GMP_NUMB_BITS) & 1;\n#endif\n  return cy;\n}\n\n/* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */\nmp_limb_t\nadc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)\n{\n  mp_limb_t  r;\n\n  ASSERT_LIMB (x);\n  ASSERT_LIMB (y);\n  ASSERT (c == 0 || c == 1);\n\n  r = ref_addc_limb (w, x, y);\n  return r + ref_addc_limb (w, *w, c);\n}\n\n/* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */\nmp_limb_t\nsbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)\n{\n  mp_limb_t  r;\n\n  ASSERT_LIMB (x);\n  ASSERT_LIMB (y);\n  ASSERT (c == 0 || c == 1);\n\n  r = ref_subc_limb (w, x, y);\n  return r + ref_subc_limb (w, *w, c);\n}\n\n\n#define AORS_1(operation)                               \\\n  {                                                     \\\n    mp_limb_t  i;                                       \\\n                                                        \\\n    ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));  \\\n    ASSERT (size >= 1);                                 \\\n    ASSERT_MPN (sp, size);                              \\\n    ASSERT_LIMB (n);                                    \\\n                                                        \\\n    for (i = 0; i < size; i++)                          \\\n      n = operation (&rp[i], sp[i], n);                 \\\n    return n;                                           \\\n  }\n\nmp_limb_t\nrefmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)\n{\n  AORS_1 (ref_addc_limb);\n}\nmp_limb_t\nrefmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)\n{\n  AORS_1 (ref_subc_limb);\n}\n\n#define AORS_NC(operation)                                              \\\n  {                                                                     \\\n    mp_size_t  i;                                                       \\\n                                                                        \\\n    ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \\\n    ASSERT (carry == 0 || carry == 1);                                  \\\n    ASSERT (size >= 1);                                                 \\\n    ASSERT_MPN (s1p, size);                                             \\\n    ASSERT_MPN (s2p, size);                                             \\\n                                                                        \\\n    for (i = 0; i < size; i++)                                          \\\n      carry = operation (&rp[i], s1p[i], s2p[i], carry);                \\\n    return carry;                                                       \\\n  }\n\nmp_limb_t\nrefmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,\n               mp_limb_t carry)\n{\n  AORS_NC (adc);\n}\nmp_limb_t\nrefmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,\n               mp_limb_t carry)\n{\n  AORS_NC (sbb);\n}\n\n\nmp_limb_t\nrefmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)\n{\n  return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));\n}\n\nmp_limb_t\nrefmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)\n{\n  return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));\n}\n\nmp_limb_t \nrefmpn_addadd_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_srcptr zp, mp_size_t n)\n{\n  mp_limb_t r;\n  mp_limb_t * tp = refmpn_malloc_limbs (n);\n  r = refmpn_add_n (tp, yp, zp, n);\n  r += refmpn_add_n (rp, tp, xp, n);\n  free(tp);\n  return r;\n}\n\nint \nrefmpn_addsub_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_srcptr zp, mp_size_t n)\n{\n  mp_limb_t r;\n  mp_limb_t * tp = refmpn_malloc_limbs (n);\n  r = - refmpn_sub_n (tp, yp, zp, n);\n  r += refmpn_add_n (rp, tp, xp, n);\n  free(tp);\n  return r;\n}\n\nmp_limb_t \nrefmpn_subadd_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_srcptr zp, mp_size_t n)\n{\n  mp_limb_t r;\n  mp_limb_t * tp = refmpn_malloc_limbs (n);\n  r =  refmpn_sub_n (tp, xp, zp, n);\n  r += refmpn_sub_n (rp, tp, yp, n);\n  free(tp);\n  return r;\n}\n\nmp_limb_t\nrefmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  mp_limb_t cy;\n  mp_ptr tp;\n\n  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));\n  ASSERT (n >= 1);\n  ASSERT_MPN (up, n);\n  ASSERT_MPN (vp, n);\n\n  tp = refmpn_malloc_limbs (n);\n  cy  = refmpn_lshift (tp, vp, n, 1);\n  cy += refmpn_add_n (rp, up, tp, n);\n  free (tp);\n  return cy;\n}\n\nmp_limb_t\nrefmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  mp_limb_t cy;\n  mp_ptr tp;\n\n  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));\n  ASSERT (n >= 1);\n  ASSERT_MPN (up, n);\n  ASSERT_MPN (vp, n);\n\n  tp = refmpn_malloc_limbs (n);\n  cy  = refmpn_lshift (tp, vp, n, 1);\n  cy += refmpn_sub_n (rp, up, tp, n);\n  free (tp);\n  return cy;\n}\n\nmp_limb_t\nrefmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,unsigned int c)\n{return refmpn_addlsh_nc(rp,up,vp,n,c,0);}\n\nmp_limb_t\nrefmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,unsigned int c)\n{return refmpn_sublsh_nc(rp,up,vp,n,c,0);}\n\nmp_limb_t\nrefmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,unsigned int c,mp_limb_t cin)\n{\n  mp_limb_t cy;\n  mp_ptr tp;\n\n  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));\n  ASSERT (n >= 1);\n  ASSERT_MPN (up, n);\n  ASSERT_MPN (vp, n);\n  ASSERT(c>0);ASSERT(c<GMP_NUMB_BITS);\n\n  tp = refmpn_malloc_limbs (n);\n  cy  = refmpn_lshift (tp, vp, n, c);\n  tp[0] |= cin >> (GMP_NUMB_BITS-c);\n  cy += refmpn_add_n (rp, up, tp, n);\n  free (tp);\n  return cy;\n}\n\nmp_limb_t\nrefmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,unsigned int c,mp_limb_t cin)\n{\n  mp_limb_t cy;\n  mp_ptr tp;\n\n  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));\n  ASSERT (n >= 1);\n  ASSERT_MPN (up, n);\n  ASSERT_MPN (vp, n);\n  ASSERT(c>0);ASSERT(c<GMP_NUMB_BITS);\n\n  tp = refmpn_malloc_limbs (n);\n  cy  = refmpn_lshift (tp, vp, n, c);\n  tp[0] |= cin >> (GMP_NUMB_BITS-c);\n  cy += refmpn_sub_n (rp, up, tp, n);\n  free (tp);\n  return cy;\n}\n\nmp_limb_t\nrefmpn_inclsh_n (mp_ptr rp, mp_srcptr up, mp_size_t n,unsigned int c)\n{\n  mp_limb_t cy;\n  mp_ptr tp;\n\n  ASSERT (refmpn_overlap_fullonly_p (rp, up, n));\n  ASSERT (n >= 1);\n  ASSERT_MPN (up, n);\n  ASSERT_MPN (rp, n);\n  ASSERT(c>0);ASSERT(c<GMP_NUMB_BITS);\n\n  tp = refmpn_malloc_limbs (n);\n  cy  = refmpn_lshift (tp, up, n, c);\n  cy += refmpn_add_n (rp, rp, tp, n);\n  free (tp);\n  return cy;\n}\n\nmp_limb_t\nrefmpn_declsh_n (mp_ptr rp, mp_srcptr up, mp_size_t n,unsigned int c)\n{\n  mp_limb_t cy;\n  mp_ptr tp;\n\n  ASSERT (refmpn_overlap_fullonly_p (rp, up, n));\n  ASSERT (n >= 1);\n  ASSERT_MPN (up, n);\n  ASSERT_MPN (rp, n);\n  ASSERT(c>0);ASSERT(c<GMP_NUMB_BITS);\n\n  tp = refmpn_malloc_limbs (n);\n  cy  = refmpn_lshift (tp, up, n, c);\n  cy += refmpn_sub_n (rp, rp, tp, n);\n  free (tp);\n  return cy;\n}\n\nmp_limb_t\nrefmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  mp_limb_t cya, cys;\n\n  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));\n  ASSERT (n >= 1);\n  ASSERT_MPN (up, n);\n  ASSERT_MPN (vp, n);\n\n  cya = refmpn_add_n (rp, up, vp, n);\n  cys = refmpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);\n  rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);\n  return cys;\n}\nmp_limb_t\nrefmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  mp_limb_t cya, cys;\n\n  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));\n  ASSERT (n >= 1);\n  ASSERT_MPN (up, n);\n  ASSERT_MPN (vp, n);\n\n  cya = refmpn_sub_n (rp, up, vp, n);\n  cys = refmpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);\n  rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);\n  return cys;\n}\n\nmp_limb_t refmpn_add_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_ptr ep, mp_srcptr yp,mp_size_t n, mp_limb_t cy)\n{mp_limb_t  h=0,l=0;\n mp_size_t i,j=n-1;\n \n ASSERT(n>=1);\n ASSERT(refmpn_overlap_fullonly_p(rp,up,n));\n ASSERT(refmpn_overlap_fullonly_p(rp,vp,n));\n ASSERT(!refmpn_overlap_p(rp,n,ep,2));\n ASSERT(!refmpn_overlap_p(rp,n,yp,n));\n ASSERT(!refmpn_overlap_p(up,n,ep,2));\n ASSERT(!refmpn_overlap_p(vp,n,ep,2));\n ASSERT(!refmpn_overlap_p(yp,n,ep,2));\nfor(i=0;i<n;i++,j--)\n   {cy=adc(&rp[i],up[i],vp[i],cy);\n    if(cy!=0)\n      {h+=adc(&l,l,yp[j],0);\n      }\n   }\nep[0]=l;ep[1]=h;\nreturn cy;}\n\nmp_limb_t refmpn_sub_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_ptr ep, mp_srcptr yp,mp_size_t n, mp_limb_t cy)\n{mp_limb_t  h=0,l=0;\n mp_size_t i,j=n-1;\n \n ASSERT(n>=1);\n ASSERT(refmpn_overlap_fullonly_p(rp,up,n));\n ASSERT(refmpn_overlap_fullonly_p(rp,vp,n));\n ASSERT(!refmpn_overlap_p(rp,n,ep,2));\n ASSERT(!refmpn_overlap_p(rp,n,yp,n));\n ASSERT(!refmpn_overlap_p(up,n,ep,2));\n ASSERT(!refmpn_overlap_p(vp,n,ep,2));\n ASSERT(!refmpn_overlap_p(yp,n,ep,2));\nfor(i=0;i<n;i++,j--)\n   {cy=sbb(&rp[i],up[i],vp[i],cy);\n    if(cy!=0)\n      {h+=adc(&l,l,yp[j],0);\n      }\n   }\nep[0]=l;ep[1]=h;\nreturn cy;}\n\n\nmp_limb_t refmpn_add_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_ptr ep, mp_srcptr yp1,mp_srcptr yp2,mp_size_t n, mp_limb_t cy)\n{mp_limb_t  h1=0,l1=0,h2=0,l2=0;\n mp_size_t i,j=n-1;\n \n ASSERT(n>=1);\n ASSERT(refmpn_overlap_fullonly_p(rp,up,n));\n ASSERT(refmpn_overlap_fullonly_p(rp,vp,n));\n ASSERT(!refmpn_overlap_p(rp,n,ep,4));\n ASSERT(!refmpn_overlap_p(rp,n,yp1,n));\n ASSERT(!refmpn_overlap_p(rp,n,yp2,n));\n ASSERT(!refmpn_overlap_p(up,n,ep,4));\n ASSERT(!refmpn_overlap_p(vp,n,ep,4));\n ASSERT(!refmpn_overlap_p(yp1,n,ep,4));\n ASSERT(!refmpn_overlap_p(yp2,n,ep,4));\nfor(i=0;i<n;i++,j--)\n   {cy=adc(&rp[i],up[i],vp[i],cy);\n    if(cy!=0)\n      {h1+=adc(&l1,l1,yp1[j],0);\n       h2+=adc(&l2,l2,yp2[j],0);\n      }\n   }\nep[0]=l1;ep[1]=h1;ep[2]=l2;ep[3]=h2;\nreturn cy;}\n\nmp_limb_t refmpn_sub_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_ptr ep, mp_srcptr yp1,mp_srcptr yp2,mp_size_t n, mp_limb_t cy)\n{mp_limb_t  h1=0,l1=0,h2=0,l2=0;\n mp_size_t i,j=n-1;\n \n ASSERT(n>=1);\n ASSERT(refmpn_overlap_fullonly_p(rp,up,n));\n ASSERT(refmpn_overlap_fullonly_p(rp,vp,n));\n ASSERT(!refmpn_overlap_p(rp,n,ep,4));\n ASSERT(!refmpn_overlap_p(rp,n,yp1,n));\n ASSERT(!refmpn_overlap_p(rp,n,yp2,n));\n ASSERT(!refmpn_overlap_p(up,n,ep,4));\n ASSERT(!refmpn_overlap_p(vp,n,ep,4));\n ASSERT(!refmpn_overlap_p(yp1,n,ep,4));\n ASSERT(!refmpn_overlap_p(yp2,n,ep,4));\nfor(i=0;i<n;i++,j--)\n   {cy=sbb(&rp[i],up[i],vp[i],cy);\n    if(cy!=0)\n      {h1+=adc(&l1,l1,yp1[j],0);\n       h2+=adc(&l2,l2,yp2[j],0);\n      }\n   }\nep[0]=l1;ep[1]=h1;ep[2]=l2;ep[3]=h2;\nreturn cy;}\n\n/* Twos complement, return borrow. */\nmp_limb_t\nrefmpn_neg_n (mp_ptr dst, mp_srcptr src, mp_size_t size)\n{\n  mp_ptr     zeros;\n  mp_limb_t  ret;\n\n  ASSERT (size >= 1);\n\n  zeros = refmpn_malloc_limbs (size);\n  refmpn_fill (zeros, size, CNST_LIMB(0));\n  ret = refmpn_sub_n (dst, zeros, src, size);\n  free (zeros);\n  return ret;\n}\n\n\n#define AORS(aors_n, aors_1)                                    \\\n  {                                                             \\\n    mp_limb_t  c;                                               \\\n    ASSERT (s1size >= s2size);                                  \\\n    ASSERT (s2size >= 1);                                       \\\n    c = aors_n (rp, s1p, s2p, s2size);                          \\\n    if (s1size-s2size != 0)                                     \\\n      c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c);     \\\n    return c;                                                   \\\n  }\nmp_limb_t\nrefmpn_add (mp_ptr rp,\n            mp_srcptr s1p, mp_size_t s1size,\n            mp_srcptr s2p, mp_size_t s2size)\n{\n  AORS (refmpn_add_n, refmpn_add_1);\n}\nmp_limb_t\nrefmpn_sub (mp_ptr rp,\n            mp_srcptr s1p, mp_size_t s1size,\n            mp_srcptr s2p, mp_size_t s2size)\n{\n  AORS (refmpn_sub_n, refmpn_sub_1);\n}\n\n\n#define SHIFTHIGH(x) ((x) << BITS_PER_MP_LIMB/2)\n#define SHIFTLOW(x)  ((x) >> BITS_PER_MP_LIMB/2)\n\n#define LOWMASK   (((mp_limb_t) 1 << BITS_PER_MP_LIMB/2)-1)\n#define HIGHMASK  SHIFTHIGH(LOWMASK)\n\n#define LOWPART(x)   ((x) & LOWMASK)\n#define HIGHPART(x)  SHIFTLOW((x) & HIGHMASK)\n\n/* Set return:*lo to x*y, using full limbs not nails. */\nmp_limb_t\nrefmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)\n{\n  mp_limb_t  hi, s;\n\n  *lo = LOWPART(x) * LOWPART(y);\n  hi = HIGHPART(x) * HIGHPART(y);\n\n  s = LOWPART(x) * HIGHPART(y);\n  hi += HIGHPART(s);\n  s = SHIFTHIGH(LOWPART(s));\n  *lo += s;\n  hi += (*lo < s);\n\n  s = HIGHPART(x) * LOWPART(y);\n  hi += HIGHPART(s);\n  s = SHIFTHIGH(LOWPART(s));\n  *lo += s;\n  hi += (*lo < s);\n\n  return hi;\n}\n\nmp_limb_t\nrefmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)\n{\n  return refmpn_umul_ppmm (lo, x, y);\n}\n\nmp_limb_t\nrefmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,\n               mp_limb_t carry)\n{\n  mp_size_t  i;\n  mp_limb_t  hi, lo;\n\n  ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));\n  ASSERT (size >= 1);\n  ASSERT_MPN (sp, size);\n  ASSERT_LIMB (multiplier);\n  ASSERT_LIMB (carry);\n\n  multiplier <<= GMP_NAIL_BITS;\n  for (i = 0; i < size; i++)\n    {\n      hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);\n      lo >>= GMP_NAIL_BITS;\n      ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry)));\n      rp[i] = lo;\n      carry = hi;\n    }\n  return carry;\n}\n\nmp_limb_t\nrefmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)\n{\n  return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));\n}\n\n\nmp_limb_t\nrefmpn_mul_2 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult)\n{\n  mp_ptr     src_copy;\n  mp_limb_t  c;\n\n  ASSERT (refmpn_overlap_fullonly_p (dst, src, size));\n  ASSERT (! refmpn_overlap_p (dst, size+1, mult, (mp_size_t) 2));\n  ASSERT (size >= 2);\n  ASSERT_MPN (mult, 2);\n\n  /* in case dst==src */\n  src_copy = refmpn_malloc_limbs (size);\n  refmpn_copyi (src_copy, src, size);\n  src = src_copy;\n\n  dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);\n  c = refmpn_addmul_1 (dst+1, src, size, mult[1]);\n  free (src_copy);\n  return c;\n}\n\n\n#define AORSMUL_1C(operation_n)                                 \\\n  {                                                             \\\n    mp_ptr     p;                                               \\\n    mp_limb_t  ret;                                             \\\n                                                                \\\n    ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));          \\\n                                                                \\\n    p = refmpn_malloc_limbs (size);                             \\\n    ret = refmpn_mul_1c (p, sp, size, multiplier, carry);       \\\n    ret += operation_n (rp, rp, p, size);                       \\\n                                                                \\\n    free (p);                                                   \\\n    return ret;                                                 \\\n  }\n\nmp_limb_t\nrefmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,\n                  mp_limb_t multiplier, mp_limb_t carry)\n{\n  AORSMUL_1C (refmpn_add_n);\n}\nmp_limb_t\nrefmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,\n                  mp_limb_t multiplier, mp_limb_t carry)\n{\n  AORSMUL_1C (refmpn_sub_n);\n}\n\n\nmp_limb_t\nrefmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)\n{\n  return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));\n}\nmp_limb_t\nrefmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)\n{\n  return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));\n}\n\n\nmp_limb_t\nrefmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,\n                 mp_srcptr mult, mp_size_t msize)\n{\n  mp_ptr     src_copy;\n  mp_limb_t  ret;\n  mp_size_t  i;\n\n  ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size));\n  ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));\n  ASSERT (size >= msize);\n  ASSERT_MPN (mult, msize);\n\n  /* in case dst==src */\n  src_copy = refmpn_malloc_limbs (size);\n  refmpn_copyi (src_copy, src, size);\n  src = src_copy;\n\n  for (i = 0; i < msize-1; i++)\n    dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);\n  ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);\n\n  free (src_copy);\n  return ret;\n}\n\nmp_limb_t\nrefmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)\n{\n  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2);\n}\nmp_limb_t\nrefmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)\n{\n  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3);\n}\nmp_limb_t\nrefmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)\n{\n  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4);\n}\nmp_limb_t\nrefmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)\n{\n  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5);\n}\nmp_limb_t\nrefmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)\n{\n  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6);\n}\nmp_limb_t\nrefmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)\n{\n  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7);\n}\nmp_limb_t\nrefmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)\n{\n  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8);\n}\n\nmp_limb_t\nrefmpn_sumdiff_nc (mp_ptr r1p, mp_ptr r2p,\n                  mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,\n                  mp_limb_t carry)\n{\n  mp_ptr p;\n  mp_limb_t acy, scy;\n\n  /* Destinations can't overlap. */\n  ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));\n  ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));\n  ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));\n  ASSERT (size >= 1);\n\n  /* in case r1p==s1p or r1p==s2p */\n  p = refmpn_malloc_limbs (size);\n\n  acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);\n  scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);\n  refmpn_copyi (r1p, p, size);\n\n  free (p);\n  return 2 * acy + scy;\n}\n\nmp_limb_t\nrefmpn_nsumdiff_n (mp_ptr r1p, mp_ptr r2p,\n                  mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)\n{\n  mp_ptr p;\n  mp_limb_t acy, ncy, scy;\n\n  /* Destinations can't overlap. */\n  ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));\n  ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));\n  ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));\n  ASSERT (size >= 1);\n\n  /* in case r1p==s1p or r1p==s2p */\n  p = refmpn_malloc_limbs (size);\n\n  acy = refmpn_add_n (p, s1p, s2p, size);\n  ncy = refmpn_neg_n (p, p, size);\n  scy = refmpn_sub_n (r2p, s1p, s2p, size);\n  refmpn_copyi (r1p, p, size);\n\n  free (p);\n  return 2 * (acy + ncy) + scy;\n}\n\nmp_limb_t\nrefmpn_sumdiff_n (mp_ptr r1p, mp_ptr r2p,\n\t\t mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)\n{\n  return refmpn_sumdiff_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));\n}\n\n\n/* Right shift hi,lo and return the low limb of the result.\n   Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */\nmp_limb_t\nrshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)\n{\n  ASSERT (shift < GMP_NUMB_BITS);\n  if (shift == 0)\n    return lo;\n  else\n    return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;\n}\n\n/* Left shift hi,lo and return the high limb of the result.\n   Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */\nmp_limb_t\nlshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)\n{\n  ASSERT (shift < GMP_NUMB_BITS);\n  if (shift == 0)\n    return hi;\n  else\n    return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;\n}\n\n\nmp_limb_t\nrefmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)\n{\n  mp_limb_t  ret;\n  mp_size_t  i;\n\n  ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));\n  ASSERT (size >= 1);\n  ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);\n  ASSERT_MPN (sp, size);\n\n  ret = rshift_make (sp[0], CNST_LIMB(0), shift);\n\n  for (i = 0; i < size-1; i++)\n    rp[i] = rshift_make (sp[i+1], sp[i], shift);\n\n  rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);\n  return ret;\n}\n\nmp_limb_t\nrefmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)\n{\n  mp_limb_t  ret;\n  mp_size_t  i;\n\n  ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));\n  ASSERT (size >= 1);\n  ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);\n  ASSERT_MPN (sp, size);\n\n  ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);\n\n  for (i = size-2; i >= 0; i--)\n    rp[i+1] = lshift_make (sp[i+1], sp[i], shift);\n\n  rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);\n  return ret;\n}\n\nmp_limb_t\nrefmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)\n{\n  mp_limb_t  ret;\n\n  ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));\n  ASSERT (size >= 1);\n  ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);\n  ASSERT_MPN (sp, size);\n\n  ret=refmpn_lshift(rp,sp,size,shift);\n  refmpn_com_n(rp,rp,size);\n  return ret;\n}\n\nvoid \nrefmpn_not(mp_ptr rp, mp_size_t n)\n{\n\trefmpn_com_n (rp,rp, n);\n}\n\nmp_limb_t \nrefmpn_lshift1(mp_ptr rp, mp_srcptr xp, mp_size_t n)\n{\n\treturn refmpn_lshift (rp, xp, n, 1);\n}\n\nmp_limb_t \nrefmpn_rshift1(mp_ptr rp, mp_srcptr xp, mp_size_t n)\n{\n\treturn refmpn_rshift (rp, xp, n, 1);\n}\n\nmp_limb_t \nrefmpn_double(mp_ptr rp, mp_size_t n)\n{\n\treturn refmpn_lshift (rp, rp, n, 1);\n}\n\nmp_limb_t \nrefmpn_half(mp_ptr rp, mp_size_t n)\n{\n\treturn refmpn_rshift (rp, rp, n, 1);\n}\n\nmp_limb_t \nrefmpn_lshift2(mp_ptr rp, mp_srcptr xp, mp_size_t n)\n{\n\treturn refmpn_lshift (rp, xp, n, 2);\n}\n\nmp_limb_t \nrefmpn_rshift2(mp_ptr rp, mp_srcptr xp, mp_size_t n)\n{\n\treturn refmpn_rshift (rp, xp, n, 2);\n}\n\n/* accepting shift==0 and doing a plain copyi or copyd in that case */\nmp_limb_t\nrefmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)\n{\n  if (shift == 0)\n    {\n      refmpn_copyi (rp, sp, size);\n      return 0;\n    }\n  else\n    {\n      return refmpn_rshift (rp, sp, size, shift);\n    }\n}\nmp_limb_t\nrefmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)\n{\n  if (shift == 0)\n    {\n      refmpn_copyd (rp, sp, size);\n      return 0;\n    }\n  else\n    {\n      return refmpn_lshift (rp, sp, size, shift);\n    }\n}\n\n/* accepting size==0 too */\nmp_limb_t\nrefmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,\n                           unsigned shift)\n{\n  return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift));\n}\nmp_limb_t\nrefmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,\n                           unsigned shift)\n{\n  return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift));\n}\n\n/* Divide h,l by d, return quotient, store remainder to *rp.\n   Operates on full limbs, not nails.\n   Must have h < d.\n   __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */\nmp_limb_t\nrefmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)\n{\n  mp_limb_t  q, r;\n  int  n;\n\n  ASSERT (d != 0);\n  ASSERT (h < d);\n\n#if 0\n  udiv_qrnnd (q, r, h, l, d);\n  *rp = r;\n  return q;\n#endif\n\n  n = refmpn_count_leading_zeros (d);\n  d <<= n;\n\n  if (n != 0)\n    {\n      h = (h << n) | (l >> (GMP_LIMB_BITS - n));\n      l <<= n;\n    }\n\n  __udiv_qrnnd_c (q, r, h, l, d);\n  r >>= n;\n  *rp = r;\n  return q;\n}\n\nmp_limb_t\nrefmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)\n{\n  return refmpn_udiv_qrnnd (rp, h, l, d);\n}\n\n/* This little subroutine avoids some bad code generation from i386 gcc 3.0\n   -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized).  */\nstatic mp_limb_t\nrefmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,\n                             mp_limb_t divisor, mp_limb_t carry)\n{\n  mp_size_t  i;\n  mp_limb_t rem[1];\n  for (i = size-1; i >= 0; i--)\n    {\n      rp[i] = refmpn_udiv_qrnnd (rem, carry,\n                                 sp[i] << GMP_NAIL_BITS,\n                                 divisor << GMP_NAIL_BITS);\n      carry = *rem >> GMP_NAIL_BITS;\n    }\n  return carry;\n}\n\nmp_limb_t\nrefmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,\n                  mp_limb_t divisor, mp_limb_t carry)\n{\n  mp_ptr     sp_orig;\n  mp_ptr     prod;\n  mp_limb_t  carry_orig;\n\n  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));\n  ASSERT (size >= 0);\n  ASSERT (carry < divisor);\n  ASSERT_MPN (sp, size);\n  ASSERT_LIMB (divisor);\n  ASSERT_LIMB (carry);\n\n  if (size == 0)\n    return carry;\n\n  sp_orig = refmpn_memdup_limbs (sp, size);\n  prod = refmpn_malloc_limbs (size);\n  carry_orig = carry;\n\n  carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);\n\n  /* check by multiplying back */\n#if 0\n  printf (\"size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\\n\",\n          size, divisor, carry_orig, carry);\n  mpn_trace(\"s\",sp_copy,size);\n  mpn_trace(\"r\",rp,size);\n  printf (\"mul_1c %lX\\n\", refmpn_mul_1c (prod, rp, size, divisor, carry));\n  mpn_trace(\"p\",prod,size);\n#endif\n  ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);\n  ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);\n  free (sp_orig);\n  free (prod);\n\n  return carry;\n}\n\nmp_limb_t\nrefmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)\n{\n  return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));\n}\n\nmp_limb_t\nrefmpn_divrem_euclidean_qr_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)\n{\n  return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));\n}\n\nmp_limb_t refmpn_divrem_hensel_qr_1(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t d)\n{mp_size_t j;mp_limb_t c,h,q,dummy,h1,t,m,r;mp_ptr tp,tqp;\n\nASSERT(n>0);ASSERT_MPN(xp,n);ASSERT(refmpn_overlap_fullonly_p(qp,xp,n));\nASSERT(d%2==1);modlimb_invert(m,d);\nc=0;h=0;t=0;tqp=refmpn_malloc_limbs(n);\nfor(j=0;j<=n-1;j++)\n   {h1=xp[j];\n    t=h+c;if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}\n    q=h1*m;\n    tqp[j]=q;\n    h=refmpn_umul_ppmm(&dummy,q,d);\n    ASSERT(dummy==h1);}\nr=h+c;   //  (xp,n) = (tqp,n)*d -ret*B^n    and 0 <= ret < d\ntp=refmpn_malloc_limbs(n);\nASSERT(r==refmpn_mul_1(tp,tqp,n,d));\nASSERT(r<d);\nASSERT(refmpn_cmp(tp,xp,n)==0);\nrefmpn_copy(qp,tqp,n);\nfree(tp);\nfree(tqp);\nreturn r;}\n\nmp_limb_t\nrefmpn_divrem_hensel_rsh_qr_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor,int shift)\n{\nASSERT(divisor%2==1);ASSERT(shift>=0);// do we want shift ==0 ??\nASSERT(size>0);ASSERT_MPN(sp,size);\nASSERT(refmpn_overlap_fullonly_p(rp,sp,size));\nrefmpn_rshift(rp,sp,size,shift);\nreturn refmpn_divrem_hensel_qr_1(rp,rp,size,divisor);\n}\n\nmp_limb_t\nrefmpn_rsh_divrem_hensel_qr_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor,int shift,mp_limb_t cin)\n{mp_limb_t r;mp_ptr tp;\nASSERT(divisor%2==1);ASSERT(shift>=0);// do we want shift ==0 ??\nASSERT(size>0);ASSERT_MPN(sp,size);\nASSERT(refmpn_overlap_fullonly_p(rp,sp,size));\ntp=refmpn_malloc_limbs(size);\ncin=refmpn_sub_1(tp,sp,size,cin);\nr=refmpn_divrem_hensel_qr_1(rp,tp,size,divisor);\nrefmpn_rshift(rp,rp,size,shift);\nfree(tp);\nreturn r+cin;}\n\nmp_limb_t\nrefmpn_divrem_hensel_r_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)\n{mp_limb_t r;mp_ptr rp;\nASSERT(divisor%2==1);\nASSERT(size>0);ASSERT_MPN(sp,size);\nrp=refmpn_malloc_limbs(size);\nr=refmpn_divrem_hensel_qr_1(rp,sp,size,divisor);\nfree(rp);\nreturn r;}\n\nmp_limb_t\nrefmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,\n               mp_limb_t carry)\n{\n  mp_ptr  p = refmpn_malloc_limbs (size);\n  carry = refmpn_divmod_1c (p, sp, size, divisor, carry);\n  free (p);\n  return carry;\n}\n\nmp_limb_t\nrefmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)\n{\n  return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));\n}\n\nmp_limb_t\nrefmpn_divrem_euclidean_r_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)\n{\n  return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));\n}\n\nmp_limb_t\nrefmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,\n                     mp_limb_t inverse)\n{\n  ASSERT (divisor & GMP_NUMB_HIGHBIT);\n  ASSERT (inverse == refmpn_invert_limb (divisor));\n  return refmpn_mod_1 (sp, size, divisor);\n}\n\n/* This implementation will be rather slow, but has the advantage of being\n   in a different style than the libmpir versions.  */\nmp_limb_t\nrefmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)\n{\n  ASSERT ((GMP_NUMB_BITS % 4) == 0);\n  return refmpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);\n}\n\n\nmp_limb_t\nrefmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,\n                  mp_srcptr sp, mp_size_t size, mp_limb_t divisor,\n                  mp_limb_t carry)\n{\n  mp_ptr  z;\n\n  z = refmpn_malloc_limbs (xsize);\n  refmpn_fill (z, xsize, CNST_LIMB(0));\n\n  carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);\n  carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);\n\n  free (z);\n  return carry;\n}\n\nmp_limb_t\nrefmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,\n                 mp_srcptr sp, mp_size_t size, mp_limb_t divisor)\n{\n  return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));\n}\n\nmp_limb_t \nrefmpn_divexact_byff(mp_ptr rp, mp_srcptr xp, mp_size_t n)\n{\n  mp_limb_t r, cy;\n  mp_limb_t * tp;\n  mp_size_t i;\n  \n  tp = refmpn_malloc_limbs (n);\n\n  /* x = q1*(B - 1) + r1 */\n  r = refmpn_divrem_1 (rp, 0, xp, n, ~CNST_LIMB(0));\n  if (!r) return r; // r1 == 0\n  /* 0 < r1 < B - 1 \n     rp[n - 1] <= 1\n\t  if n > 1 and rp[n - 1] == 1 then rp[n - 2] <= 1, etc  \n\t  if all limbs are 1 then r == 0, already dealt with */\n\n  /* q = q1 + 1, -r = (B - 1) - r1, x = q*(B - 1) - r*/ \n  refmpn_add_1(rp, rp, n, CNST_LIMB(1));\n  r = -(r + 1); \n  /* rp[n - 1] <= 1 \n     if rp[n - 1] == 1 then rp[n - 1] <= 1, etc (***)*/\n  \n  /* x = q*(B - 1) + (B^n - 1)*r - (B^n - 1)*r - r\n       = (B - 1)*(q + r + r*B + r*B^2 + ... + r*B^(n-1)) - B^n*r */\n  for (i = 0; i < n; i++) tp[i] = r;\n  refmpn_add_n(rp, rp, tp, n); /* cannot have carry out of top limb due to (***) */\n  \n  free(tp);\n\n  return r;\n}\n\nmp_limb_t \nrefmpn_divexact_byfobm1(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t f,mp_limb_t Bm1of)\n{mp_size_t j;mp_limb_t c,acc,ax,dx,*tp,*tp1;\n\nASSERT(n>0);\nASSERT(qp==xp || !refmpn_overlap_p(xp,n,qp,n));\nASSERT(Bm1of*f+1==0);\nacc=0*Bm1of;\ntp1 = refmpn_malloc_limbs (n);refmpn_copy(tp1,xp,n);\nfor(j=0;j<=n-1;j++)\n   {dx=refmpn_umul_ppmm(&ax,xp[j],Bm1of);\n    c=ref_subc_limb(&acc,acc,ax);\n    qp[j]=acc;\n    acc-=dx+c;}\n//return next quotient*-f\nacc=acc*-f;\nASSERT(acc<f);\ntp = refmpn_malloc_limbs (n);\nc=refmpn_mul_1(tp,qp,n,f);\nASSERT(c==acc);\nASSERT(refmpn_cmp(tp1,tp,n)==0);\nfree(tp);free(tp1);\nreturn acc;}   // so  (xp,n) = (qp,n)*f -ret*B^n    and 0 <= ret < f \n\nmp_limb_t\nrefmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,\n                        mp_srcptr sp, mp_size_t size,\n                        mp_limb_t divisor, mp_limb_t inverse, unsigned shift)\n{\n  ASSERT (size >= 0);\n  ASSERT (shift == refmpn_count_leading_zeros (divisor));\n  ASSERT (inverse == refmpn_invert_limb (divisor << shift));\n\n  return refmpn_divrem_1 (rp, xsize, sp, size, divisor);\n}\n\n\n/* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers\n   paper, figure 8.1 m', where b=2^BITS_PER_MP_LIMB.  Note that -d-1 < d\n   since d has the high bit set. */\n\nmp_limb_t\nrefmpn_invert_limb (mp_limb_t d)\n{\n  mp_limb_t r;\n  ASSERT (d & GMP_LIMB_HIGHBIT);\n  return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d);\n}\n\n\n/* The aim is to produce a dst quotient and return a remainder c, satisfying\n   c*b^n + src-i == 3*dst, where i is the incoming carry.\n\n   Some value c==0, c==1 or c==2 will satisfy, so just try each.\n\n   If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero\n   remainder from the first division attempt determines the correct\n   remainder (3-c), but don't bother with that, since we can't guarantee\n   anything about GMP_NUMB_BITS when using nails.\n\n   If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos\n   complement negative, ie. b^n+a-i, and the calculation produces c1\n   satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1.  This\n   means it's enough to just add any borrow back at the end.\n\n   A borrow only occurs when a==0 or a==1, and, by the same reasoning as in\n   mpn/generic/diveby3.c, the c1 that results in those cases will only be 0\n   or 1 respectively, so with 1 added the final return value is still in the\n   prescribed range 0 to 2. */\n\nmp_limb_t\nrefmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)\n{\n  mp_ptr     spcopy;\n  mp_limb_t  c, cs;\n\n  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));\n  ASSERT (size >= 1);\n  ASSERT (carry <= 2);\n  ASSERT_MPN (sp, size);\n\n  spcopy = refmpn_malloc_limbs (size);\n  cs = refmpn_sub_1 (spcopy, sp, size, carry);\n\n  for (c = 0; c <= 2; c++)\n    if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0)\n      goto done;\n  ASSERT_FAIL (no value of c satisfies);\n\n done:\n  c += cs;\n  ASSERT (c <= 2);\n\n  free (spcopy);\n  return c;\n}\n\nmp_limb_t\nrefmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)\n{\n  return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));\n}\n\n\n/* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */\nvoid\nrefmpn_mul_basecase (mp_ptr prodp,\n                     mp_srcptr up, mp_size_t usize,\n                     mp_srcptr vp, mp_size_t vsize)\n{\n  mp_size_t i;\n\n  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));\n  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));\n  ASSERT (usize >= vsize);\n  ASSERT (vsize >= 1);\n  ASSERT_MPN (up, usize);\n  ASSERT_MPN (vp, vsize);\n\n  prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);\n  for (i = 1; i < vsize; i++)\n    prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);\n}\n\nvoid\nrefmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)\n{\n  refmpn_mul_basecase (prodp, up, size, vp, size);\n}\n\nvoid\nrefmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)\n{\n  refmpn_mul_basecase (dst, src, size, src, size);\n}\n\n/* Allowing usize<vsize, usize==0 or vsize==0. */\nvoid\nrefmpn_mul_any (mp_ptr prodp,\n                     mp_srcptr up, mp_size_t usize,\n                     mp_srcptr vp, mp_size_t vsize)\n{\n  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));\n  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));\n  ASSERT (usize >= 0);\n  ASSERT (vsize >= 0);\n  ASSERT_MPN (up, usize);\n  ASSERT_MPN (vp, vsize);\n\n  if (usize == 0)\n    {\n      refmpn_fill (prodp, vsize, CNST_LIMB(0));\n      return;\n    }\n\n  if (vsize == 0)\n    {\n      refmpn_fill (prodp, usize, CNST_LIMB(0));\n      return;\n    }\n\n  if (usize >= vsize)\n    refmpn_mul_basecase (prodp, up, usize, vp, vsize);\n  else\n    refmpn_mul_basecase (prodp, vp, vsize, up, usize);\n}\n\n\n/* Reference implementation of mpn_mulmid for testing.\n\n   Algorithm: use mpn_mul and then subtract off O(n) cross-terms.\n   Must have un >= vn >= 1. */\nvoid\nrefmpn_mulmid_basecase (mp_ptr rp,\n               mp_srcptr up, mp_size_t un,\n               mp_srcptr vp, mp_size_t vn)\n{\n  mp_ptr temp;\n  mp_limb_t diag[4];\n  mp_limb_t hi;\n  mp_size_t i, k;\n\n  ASSERT (un >= vn);\n  ASSERT (vn >= 1);\n\n  /* special case */\n  if (vn == 1)\n    {\n      rp[un] = refmpn_mul_1 (rp, up, un, vp[0]);\n      rp[un+1] = 0;\n      return;\n    }\n\n  /* compute plain product */\n  temp = refmpn_malloc_limbs (un + vn);\n  ASSERT (temp != NULL);\n  refmpn_mul_any (temp, up, un, vp, vn);\n\n  /* remove the cross-terms that interfere with the result we want, i.e. in\n     the following diagram, we want only contributions from O's, but mpn_mul\n     has given us all of O, A and X, and we will remove the A's.\n\n     OOOOAAXX\n     AOOOOAAX\n     AAOOOOAA\n     XAAOOOOA\n     XXAAOOOO\n  */\n\n  /* bottom-left diagonal */\n  diag[0] = diag[3] = 0;\n  diag[2] = mpn_mul_1 (diag + 1, up, 1, vp[vn-2]);\n  for (i = 0; i < vn - 2; i++)\n    {\n      hi = refmpn_addmul_1 (diag, up + vn - i - 3, 2, vp[i]);\n      refmpn_add_1 (diag + 2, diag + 2, 2, hi);\n    }\n  k = (vn == 2);\n  refmpn_sub (temp + vn - 3 + k, temp + vn - 3 + k, un - vn + 5 - k,\n           diag + k, 4 - k);\n\n  /* top-right diagonal */\n  diag[1] = refmpn_mul_1 (diag, up + un - 1, 1, vp[1]);\n  for (i = 2; i < vn; i++)\n    refmpn_addmul_1 (diag, up + un - i, 2, vp[i]);\n  refmpn_sub (temp + un, temp + un, 2, diag, 2);\n\n  /* copy result to rp */\n  refmpn_copy (rp, temp + vn - 1, un - vn + 3);\n  refmpn_free_limbs (temp);\n}\n\nvoid\nrefmpn_mulmid (mp_ptr rp,\n               mp_srcptr up, mp_size_t un,\n               mp_srcptr vp, mp_size_t vn)\n{\n  refmpn_mulmid_basecase(rp, up, un, vp, vn);\n}\n\nvoid\nrefmpn_mulmid_n (mp_ptr rp,\n               mp_srcptr up, mp_srcptr vp, mp_size_t n)\n{\n  refmpn_mulmid(rp, up, 2*n-1, vp, n);\n}\n\n\nmp_limb_t\nrefmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)\n{\n  mp_limb_t  x;\n  int  twos;\n\n  ASSERT (y != 0);\n  ASSERT (! refmpn_zero_p (xp, xsize));\n  ASSERT_MPN (xp, xsize);\n  ASSERT_LIMB (y);\n\n  x = refmpn_mod_1 (xp, xsize, y);\n  if (x == 0)\n    return y;\n\n  twos = 0;\n  while ((x & 1) == 0 && (y & 1) == 0)\n    {\n      x >>= 1;\n      y >>= 1;\n      twos++;\n    }\n\n  for (;;)\n    {\n      while ((x & 1) == 0)  x >>= 1;\n      while ((y & 1) == 0)  y >>= 1;\n\n      if (x < y)\n        MP_LIMB_T_SWAP (x, y);\n\n      x -= y;\n      if (x == 0)\n        break;\n    }\n\n  return y << twos;\n}\n\n\n/* Based on the full limb x, not nails. */\nunsigned\nrefmpn_count_leading_zeros (mp_limb_t x)\n{\n  unsigned  n = 0;\n\n  ASSERT (x != 0);\n\n  while ((x & GMP_LIMB_HIGHBIT) == 0)\n    {\n      x <<= 1;\n      n++;\n    }\n  return n;\n}\n\n/* Full limbs allowed, not limited to nails. */\nunsigned\nrefmpn_count_trailing_zeros (mp_limb_t x)\n{\n  unsigned  n = 0;\n\n  ASSERT (x != 0);\n  ASSERT_LIMB (x);\n\n  while ((x & 1) == 0)\n    {\n      x >>= 1;\n      n++;\n    }\n  return n;\n}\n\n/* Strip factors of two (low zero bits) from {p,size} by right shifting.\n   The return value is the number of twos stripped.  */\nmp_size_t\nrefmpn_strip_twos (mp_ptr p, mp_size_t size)\n{\n  mp_size_t  limbs;\n  unsigned   shift;\n\n  ASSERT (size >= 1);\n  ASSERT (! refmpn_zero_p (p, size));\n  ASSERT_MPN (p, size);\n\n  for (limbs = 0; p[0] == 0; limbs++)\n    {\n      refmpn_copyi (p, p+1, size-1);\n      p[size-1] = 0;\n    }\n\n  shift = refmpn_count_trailing_zeros (p[0]);\n  if (shift)\n    refmpn_rshift (p, p, size, shift);\n\n  return limbs*GMP_NUMB_BITS + shift;\n}\n\nmp_limb_t\nrefmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)\n{\n  int       cmp;\n\n  ASSERT (ysize >= 1);\n  ASSERT (xsize >= ysize);\n  ASSERT ((xp[0] & 1) != 0);\n  ASSERT ((yp[0] & 1) != 0);\n  /* ASSERT (xp[xsize-1] != 0); */  /* don't think x needs to be odd */\n  ASSERT (yp[ysize-1] != 0);\n  ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));\n  ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));\n  ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));\n  if (xsize == ysize)\n    ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));\n  ASSERT_MPN (xp, xsize);\n  ASSERT_MPN (yp, ysize);\n\n  refmpn_strip_twos (xp, xsize);\n  MPN_NORMALIZE (xp, xsize);\n  MPN_NORMALIZE (yp, ysize);\n\n  for (;;)\n    {\n      cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);\n      if (cmp == 0)\n        break;\n      if (cmp < 0)\n        MPN_PTR_SWAP (xp,xsize, yp,ysize);\n\n      ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));\n\n      refmpn_strip_twos (xp, xsize);\n      MPN_NORMALIZE (xp, xsize);\n    }\n\n  refmpn_copyi (gp, xp, xsize);\n  return xsize;\n}\n\nunsigned long\nref_popc_limb (mp_limb_t src)\n{\n  unsigned long  count;\n  int  i;\n\n  count = 0;\n  for (i = 0; i < GMP_LIMB_BITS; i++)\n    {\n      count += (src & 1);\n      src >>= 1;\n    }\n  return count;\n}\n\nunsigned long\nrefmpn_popcount (mp_srcptr sp, mp_size_t size)\n{\n  unsigned long  count = 0;\n  mp_size_t  i;\n\n  ASSERT (size >= 0);\n  ASSERT_MPN (sp, size);\n\n  for (i = 0; i < size; i++)\n    count += ref_popc_limb (sp[i]);\n  return count;\n}\n\nunsigned long\nrefmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)\n{\n  mp_ptr  d;\n  unsigned long  count;\n\n  ASSERT (size >= 0);\n  ASSERT_MPN (s1p, size);\n  ASSERT_MPN (s2p, size);\n\n  if (size == 0)\n    return 0;\n\n  d = refmpn_malloc_limbs (size);\n  refmpn_xor_n (d, s1p, s2p, size);\n  count = refmpn_popcount (d, size);\n  free (d);\n  return count;\n}\n\n\n/* set r to a%d */\nvoid\nrefmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])\n{\n  mp_limb_t  D[2];\n  int        n;\n\n  ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2));\n  ASSERT_MPN (a, 2);\n  ASSERT_MPN (d, 2);\n\n  D[1] = d[1], D[0] = d[0];\n  r[1] = a[1], r[0] = a[0];\n  n = 0;\n\n  for (;;)\n    {\n      if (D[1] & GMP_NUMB_HIGHBIT)\n        break;\n      if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0)\n        break;\n      refmpn_lshift (D, D, (mp_size_t) 2, 1);\n      n++;\n      ASSERT (n <= GMP_NUMB_BITS);\n    }\n\n  while (n >= 0)\n    {\n      if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0)\n        ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2));\n      refmpn_rshift (D, D, (mp_size_t) 2, 1);\n      n--;\n    }\n\n  ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0);\n}\n\n\n/* Similar to mpn/generic/sb_divrem_mn.c, but somewhat simplified, in\n   particular the trial quotient is allowed to be 2 too big. */\nmp_limb_t\nrefmpn_sb_divrem_mn (mp_ptr qp,\n                     mp_ptr np, mp_size_t nsize,\n                     mp_srcptr dp, mp_size_t dsize)\n{\n  mp_limb_t  retval = 0;\n  mp_size_t  i;\n  mp_limb_t  d1 = dp[dsize-1];\n  mp_ptr     np_orig = refmpn_memdup_limbs (np, nsize);\n\n  ASSERT (nsize >= dsize);\n  /* ASSERT (dsize > 2); */\n  ASSERT (dsize >= 2);\n  ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT);\n  ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);\n  ASSERT_MPN (np, nsize);\n  ASSERT_MPN (dp, dsize);\n\n  i = nsize-dsize;\n  if (refmpn_cmp (np+i, dp, dsize) >= 0)\n    {\n      ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));\n      retval = 1;\n    }\n\n  for (i--; i >= 0; i--)\n    {\n      mp_limb_t  n0 = np[i+dsize];\n      mp_limb_t  n1 = np[i+dsize-1];\n      mp_limb_t  q, dummy_r;\n\n      ASSERT (n0 <= d1);\n      if (n0 == d1)\n        q = GMP_NUMB_MAX;\n      else\n        q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS,\n                               d1 << GMP_NAIL_BITS);\n\n      n0 -= refmpn_submul_1 (np+i, dp, dsize, q);\n      ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);\n      if (n0)\n        {\n          q--;\n          if (! refmpn_add_n (np+i, np+i, dp, dsize))\n            {\n              q--;\n              ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize) != 0);\n            }\n        }\n      np[i+dsize] = 0;\n\n      qp[i] = q;\n    }\n\n  /* remainder < divisor */\n  ASSERT (refmpn_cmp (np, dp, dsize) < 0);\n\n  /* multiply back to original */\n  {\n    mp_ptr  mp = refmpn_malloc_limbs (nsize);\n\n    refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);\n    if (retval)\n      ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));\n    ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));\n    ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);\n\n    free (mp);\n  }\n\n  free (np_orig);\n  return retval;\n}\n\n/* Similar to mpn/generic/sb_divrem_mn.c, but somewhat simplified, in\n   particular the trial quotient is allowed to be 2 too big. */\nvoid\nrefmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,\n                mp_ptr np, mp_size_t nsize,\n                mp_srcptr dp, mp_size_t dsize)\n{\n  ASSERT (qxn == 0);\n  ASSERT_MPN (np, nsize);\n  ASSERT_MPN (dp, dsize);\n  ASSERT (dsize > 0);\n  ASSERT (dp[dsize-1] != 0);\n\n  if (dsize == 1)\n    {\n      rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);\n      return;\n    }\n  else\n    {\n      mp_ptr  n2p = refmpn_malloc_limbs (nsize+1);\n      mp_ptr  d2p = refmpn_malloc_limbs (dsize);\n      int     norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS;\n\n      n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);\n      ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));\n\n      refmpn_sb_divrem_mn (qp, n2p, nsize+1, d2p, dsize);\n      refmpn_rshift_or_copy (rp, n2p, dsize, norm);\n\n      /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */\n      free (n2p);\n      free (d2p);\n    }\n}\n\nvoid\nrefmpn_tdiv_q (mp_ptr qp, mp_ptr np, mp_size_t nsize,\n                mp_srcptr dp, mp_size_t dsize)\n{\n  ASSERT_MPN (np, nsize);\n  ASSERT_MPN (dp, dsize);\n  ASSERT (dsize > 0);\n  ASSERT (dp[dsize-1] != 0);\n\n  if (dsize == 1)\n    {\n      refmpn_divmod_1 (qp, np, nsize, dp[0]);\n      return;\n    }\n  else\n    {\n      mp_ptr  n2p = refmpn_malloc_limbs (nsize+1);\n      mp_ptr  d2p = refmpn_malloc_limbs (dsize);\n      int     norm = refmpn_count_leading_zeros (dp[dsize-1]);\n\n      n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);\n      ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));\n\n      refmpn_sb_divrem_mn (qp, n2p, nsize+1, d2p, dsize);\n      \n      /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */\n      free (n2p);\n      free (d2p);\n    }\n}\n\n\nsize_t\nrefmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)\n{\n  unsigned char  *d;\n  size_t  dsize;\n\n  ASSERT (size >= 0);\n  ASSERT (base >= 2);\n  ASSERT (base < numberof (__mp_bases));\n  ASSERT (size == 0 || src[size-1] != 0);\n  ASSERT_MPN (src, size);\n\n  MPN_SIZEINBASE (dsize, src, size, base);\n  ASSERT (dsize >= 1);\n  ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * BYTES_PER_MP_LIMB));\n\n  if (size == 0)\n    {\n      dst[0] = 0;\n      return 1;\n    }\n\n  /* don't clobber input for power of 2 bases */\n  if (POW2_P (base))\n    src = refmpn_memdup_limbs (src, size);\n\n  d = dst + dsize;\n  do\n    {\n      d--;\n      ASSERT (d >= dst);\n      *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base);\n      size -= (src[size-1] == 0);\n    }\n  while (size != 0);\n\n  /* Move result back and decrement dsize if we didn't generate\n     the maximum possible digits.  */\n  if (d != dst)\n    {\n      size_t i;\n      dsize -= d - dst;\n      for (i = 0; i < dsize; i++)\n\tdst[i] = d[i];\n    }\n\n  if (POW2_P (base))\n    free (src);\n\n  return dsize;\n}\n\n\nmp_limb_t\nref_bswap_limb (mp_limb_t src)\n{\n  mp_limb_t  dst;\n  int        i;\n\n  dst = 0;\n  for (i = 0; i < BYTES_PER_MP_LIMB; i++)\n    {\n      dst = (dst << 8) + (src & 0xFF);\n      src >>= 8;\n    }\n  return dst;\n}\n\n\n/* These random functions are mostly for transitional purposes while adding\n   nail support, since they're independent of the normal mpn routines.  They\n   can probably be removed when those normal routines are reliable, though\n   perhaps something independent would still be useful at times.  */\n\n#if BITS_PER_MP_LIMB == 32\n#define RAND_A  CNST_LIMB(0x29CF535)\n#endif\n#if BITS_PER_MP_LIMB == 64\n#define RAND_A  CNST_LIMB(0xBAECD515DAF0B49D)\n#endif\n\nmp_limb_t  refmpn_random_seed;\n\nmp_limb_t\nrefmpn_random_half (void)\n{\n  refmpn_random_seed = refmpn_random_seed * RAND_A + 1;\n  return (refmpn_random_seed >> BITS_PER_MP_LIMB/2);\n}\n\nmp_limb_t\nrefmpn_random_limb (void)\n{\n  return ((refmpn_random_half () << (BITS_PER_MP_LIMB/2))\n           | refmpn_random_half ()) & GMP_NUMB_MASK;\n}\n\nvoid\nrefmpn_random (mp_ptr ptr, mp_size_t size)\n{\n  mp_size_t  i;\n  if (GMP_NAIL_BITS == 0)\n    {\n      mpn_random (ptr, size);\n      return;\n    }\n\n  for (i = 0; i < size; i++)\n    ptr[i] = refmpn_random_limb ();\n}\n\nvoid\nrefmpn_random2 (mp_ptr ptr, mp_size_t size)\n{\n  mp_size_t  i;\n  mp_limb_t  bit, mask, limb;\n  int        run;\n  \n  if (GMP_NAIL_BITS == 0)\n    {\n      mpn_random2 (ptr, size);\n      return;\n    }\n\n#define RUN_MODULUS  32\n\n  /* start with ones at a random pos in the high limb */\n  bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);\n  mask = 0;\n  run = 0;\n\n  for (i = size-1; i >= 0; i--)\n    {\n      limb = 0;\n      do\n        {\n          if (run == 0)\n            {\n              run = (refmpn_random_half () % RUN_MODULUS) + 1;\n              mask = ~mask;\n            }\n\n          limb |= (bit & mask);\n          bit >>= 1;\n          run--;\n        }\n      while (bit != 0);\n\n      ptr[i] = limb;\n      bit = GMP_NUMB_HIGHBIT;\n    }\n}\n\n/* This is a simple bitwise algorithm working high to low across \"s\" and\n   testing each time whether setting the bit would make s^2 exceed n.  */\nmp_size_t\nrefmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize)\n{\n  mp_ptr     tp, dp;\n  mp_size_t  ssize, talloc, tsize, dsize, ret, ilimbs;\n  unsigned   ibit;\n  long       i;\n  mp_limb_t  c;\n\n  ASSERT (nsize >= 0);\n\n  /* If n==0, then s=0 and r=0.  */\n  if (nsize == 0)\n    return 0;\n\n  ASSERT (np[nsize - 1] != 0);\n  ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize));\n  ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize));\n  ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize));\n\n  /* root */\n  ssize = (nsize+1)/2;\n  refmpn_zero (sp, ssize);\n\n  /* the remainder so far */\n  dp = refmpn_memdup_limbs (np, nsize);\n  dsize = nsize;\n\n  /* temporary */\n  talloc = 2*ssize + 1;\n  tp = refmpn_malloc_limbs (talloc);\n\n  for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--)\n    {\n      /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i\n         is added to it */\n\n      ilimbs = (i+1) / GMP_NUMB_BITS;\n      ibit = (i+1) % GMP_NUMB_BITS;\n      refmpn_zero (tp, ilimbs);\n      c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit);\n      tsize = ilimbs + ssize;\n      tp[tsize] = c;\n      tsize += (c != 0);\n\n      ilimbs = (2*i) / GMP_NUMB_BITS;\n      ibit = (2*i) % GMP_NUMB_BITS;\n      if (ilimbs + 1 > tsize)\n        {\n          refmpn_zero_extend (tp, tsize, ilimbs + 1);\n          tsize = ilimbs + 1;\n        }\n      c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs,\n                        CNST_LIMB(1) << ibit);\n      ASSERT (tsize < talloc);\n      tp[tsize] = c;\n      tsize += (c != 0);\n\n      if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0)\n        {\n          /* set this bit in s and subtract from the remainder */\n          refmpn_setbit (sp, i);\n\n          ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize));\n          dsize = refmpn_normalize (dp, dsize);\n        }\n    }\n\n  if (rp == NULL)\n    {\n      ret = ! refmpn_zero_p (dp, dsize);\n    }\n  else\n    {\n      ASSERT (dsize == 0 || dp[dsize-1] != 0);\n      refmpn_copy (rp, dp, dsize);\n      ret = dsize;\n    }\n\n  free (dp);\n  free (tp);\n  return ret;\n}\n\nvoid \nrefmpn_redc_1 (mp_ptr cp, mp_ptr tp, mp_srcptr mp, mp_size_t n, mp_limb_t Nd)\n{\n  mp_limb_t cy, q;\n  mp_size_t j;\n      \n  ASSERT_MPN (tp, 2*n);\n        \n  for (j = 0; j < n; j++)\n  {\n    q = (tp[0] * Nd) & GMP_NUMB_MASK;\n    tp[0] = refmpn_addmul_1 (tp, mp, n, q);\n    tp++;\n  }\n   \n  cy = refmpn_add_n (cp, tp, tp - n, n);\n  \n  if (cy) refmpn_sub_n (cp, cp, mp, n);\n}\n"
  },
  {
    "path": "tests/refmpq.c",
    "content": "/* Reference rational routines.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nvoid\nrefmpq_add (mpq_ptr w, mpq_srcptr x, mpq_srcptr y)\n{\n  mpz_mul    (mpq_numref(w), mpq_numref(x), mpq_denref(y));\n  mpz_addmul (mpq_numref(w), mpq_denref(x), mpq_numref(y));\n  mpz_mul    (mpq_denref(w), mpq_denref(x), mpq_denref(y));\n  mpq_canonicalize (w);\n}\n\nvoid\nrefmpq_sub (mpq_ptr w, mpq_srcptr x, mpq_srcptr y)\n{\n  mpz_mul    (mpq_numref(w), mpq_numref(x), mpq_denref(y));\n  mpz_submul (mpq_numref(w), mpq_denref(x), mpq_numref(y));\n  mpz_mul    (mpq_denref(w), mpq_denref(x), mpq_denref(y));\n  mpq_canonicalize (w);\n}\n"
  },
  {
    "path": "tests/refmpz.c",
    "content": "/* Reference mpz functions.\n\nCopyright 1997, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n/* always do assertion checking */\n#define WANT_ASSERT  1\n\n#include <stdio.h>\n#include <stdlib.h> /* for free */\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n\n/* Change this to \"#define TRACE(x) x\" for some traces. */\n#define TRACE(x) \n\n\n/* FIXME: Shouldn't use plain mpz functions in a reference routine. */\nvoid\nrefmpz_combit (mpz_ptr r, unsigned long bit)\n{\n  if (mpz_tstbit (r, bit))\n    mpz_clrbit (r, bit);\n  else\n    mpz_setbit (r, bit);\n}\n\n\nunsigned long\nrefmpz_hamdist (mpz_srcptr x, mpz_srcptr y)\n{\n  mp_size_t      xsize, ysize, tsize;\n  mp_ptr         xp, yp;\n  unsigned long  ret;\n\n  if ((SIZ(x) < 0 && SIZ(y) >= 0)\n      || (SIZ(y) < 0 && SIZ(x) >= 0))\n    return ULONG_MAX;\n\n  xsize = ABSIZ(x);\n  ysize = ABSIZ(y);\n  tsize = MAX (xsize, ysize);\n\n  xp = refmpn_malloc_limbs (tsize);\n  refmpn_zero (xp, tsize);\n  refmpn_copy (xp, PTR(x), xsize);\n\n  yp = refmpn_malloc_limbs (tsize);\n  refmpn_zero (yp, tsize);\n  refmpn_copy (yp, PTR(y), ysize);\n\n  if (SIZ(x) < 0)\n    refmpn_neg_n (xp, xp, tsize);\n\n  if (SIZ(x) < 0)\n    refmpn_neg_n (yp, yp, tsize);\n\n  ret = refmpn_hamdist (xp, yp, tsize);\n\n  free (xp);\n  free (yp);\n  return ret;\n}\n\n\n/* (0/b), with mpz b; is 1 if b=+/-1, 0 otherwise */\n#define JACOBI_0Z(b)  JACOBI_0LS (PTR(b)[0], SIZ(b))\n\n/* (a/b) effect due to sign of b: mpz/mpz */\n#define JACOBI_BSGN_ZZ_BIT1(a, b)   JACOBI_BSGN_SS_BIT1 (SIZ(a), SIZ(b))\n\n/* (a/b) effect due to sign of a: mpz/unsigned-mpz, b odd;\n   is (-1/b) if a<0, or +1 if a>=0 */\n#define JACOBI_ASGN_ZZU_BIT1(a, b)  JACOBI_ASGN_SU_BIT1 (SIZ(a), PTR(b)[0])\n\nint\nrefmpz_kronecker (mpz_srcptr a_orig, mpz_srcptr b_orig)\n{\n  unsigned long  twos;\n  mpz_t  a, b;\n  int    result_bit1 = 0;\n\n  if (mpz_sgn (b_orig) == 0)\n    return JACOBI_Z0 (a_orig);  /* (a/0) */\n\n  if (mpz_sgn (a_orig) == 0)\n    return JACOBI_0Z (b_orig);  /* (0/b) */\n\n  if (mpz_even_p (a_orig) && mpz_even_p (b_orig))\n    return 0;\n\n  if (mpz_cmp_ui (b_orig, 1) == 0)\n    return 1;\n\n  mpz_init_set (a, a_orig);\n  mpz_init_set (b, b_orig);\n\n  if (mpz_sgn (b) < 0)\n    {\n      result_bit1 ^= JACOBI_BSGN_ZZ_BIT1 (a, b);\n      mpz_neg (b, b);\n    }\n  if (mpz_even_p (b))\n    {\n      twos = mpz_scan1 (b, 0L);\n      mpz_tdiv_q_2exp (b, b, twos);\n      result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, PTR(a)[0]);\n    }\n\n  if (mpz_sgn (a) < 0)\n    {\n      result_bit1 ^= JACOBI_N1B_BIT1 (PTR(b)[0]);\n      mpz_neg (a, a);\n    }\n  if (mpz_even_p (a))\n    {\n      twos = mpz_scan1 (a, 0L);\n      mpz_tdiv_q_2exp (a, a, twos);\n      result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, PTR(b)[0]);\n    }\n\n  for (;;)\n    {\n      ASSERT (mpz_odd_p (a));\n      ASSERT (mpz_odd_p (b));\n      ASSERT (mpz_sgn (a) > 0);\n      ASSERT (mpz_sgn (b) > 0);\n\n      TRACE (printf (\"top\\n\");\n             mpz_trace (\" a\", a);\n             mpz_trace (\" b\", b));\n\n      if (mpz_cmp (a, b) < 0)\n        {\n          TRACE (printf (\"swap\\n\"));\n          mpz_swap (a, b);\n          result_bit1 ^= JACOBI_RECIP_UU_BIT1 (PTR(a)[0], PTR(b)[0]);\n        }\n\n      if (mpz_cmp_ui (b, 1) == 0)\n        break;\n\n      mpz_sub (a, a, b);\n      TRACE (printf (\"sub\\n\");\n             mpz_trace (\" a\", a));\n      if (mpz_sgn (a) == 0)\n        goto zero;\n\n      twos = mpz_scan1 (a, 0L);\n      mpz_fdiv_q_2exp (a, a, twos);\n      TRACE (printf (\"twos %lu\\n\", twos);\n             mpz_trace (\" a\", a));\n      result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, PTR(b)[0]);\n    }\n\n  mpz_clear (a);\n  mpz_clear (b);\n  return JACOBI_BIT1_TO_PN (result_bit1);\n\n zero:\n  mpz_clear (a);\n  mpz_clear (b);\n  return 0;\n}\n\n/* Same as mpz_kronecker, but ignoring factors of 2 on b */\nint\nrefmpz_jacobi (mpz_srcptr a, mpz_srcptr b)\n{\n  mpz_t  b_odd;\n  mpz_init_set (b_odd, b);\n  if (mpz_sgn (b_odd) != 0)\n    mpz_fdiv_q_2exp (b_odd, b_odd, mpz_scan1 (b_odd, 0L));\n  return refmpz_kronecker (a, b_odd);\n}\n\nint\nrefmpz_legendre (mpz_srcptr a, mpz_srcptr b)\n{\n  return refmpz_jacobi (a, b);\n}\n\n\nint\nrefmpz_kronecker_ui (mpz_srcptr a, unsigned long b)\n{\n  mpz_t  bz;\n  int    ret;\n  mpz_init_set_ui (bz, b);\n  ret = refmpz_kronecker (a, bz);\n  mpz_clear (bz);\n  return ret;\n}\n\nint\nrefmpz_kronecker_si (mpz_srcptr a, long b)\n{\n  mpz_t  bz;\n  int    ret;\n  mpz_init_set_si (bz, b);\n  ret = refmpz_kronecker (a, bz);\n  mpz_clear (bz);\n  return ret;\n}\n\nint\nrefmpz_ui_kronecker (unsigned long a, mpz_srcptr b)\n{\n  mpz_t  az;\n  int    ret;\n  mpz_init_set_ui (az, a);\n  ret = refmpz_kronecker (az, b);\n  mpz_clear (az);\n  return ret;\n}\n\nint\nrefmpz_si_kronecker (long a, mpz_srcptr b)\n{\n  mpz_t  az;\n  int    ret;\n  mpz_init_set_si (az, a);\n  ret = refmpz_kronecker (az, b);\n  mpz_clear (az);\n  return ret;\n}\n\n\nvoid\nrefmpz_pow_ui (mpz_ptr w, mpz_srcptr b, unsigned long e)\n{\n  mpz_t          s, t;\n  unsigned long  i;\n\n  mpz_init_set_ui (t, 1L);\n  mpz_init_set (s, b);\n\n  if ((e & 1) != 0)\n    mpz_mul (t, t, s);\n\n  for (i = 2; i <= e; i <<= 1)\n    {\n      mpz_mul (s, s, s);\n      if ((i & e) != 0)\n\tmpz_mul (t, t, s);\n    }\n\n  mpz_set (w, t);\n\n  mpz_clear (s);\n  mpz_clear (t);\n}\n"
  },
  {
    "path": "tests/spinner.c",
    "content": "/* A stupid little spinning wheel designed to make it look like useful work\n   is being done.\n\nCopyright 1999, 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#include <signal.h>\n#include <stdio.h>\n#include <stdlib.h>\n#if HAVE_UNISTD_H\n#include <unistd.h>     /* for isatty */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#include \"tests.h\"\n\n\n/* \"alarm\" is not available on mingw32, and the SIGALRM constant is not\n   defined.  Don't bother with a spinner in this case.  */\n#if ! HAVE_ALARM || ! defined (SIGALRM)\n#define alarm(n)          abort()\n#define signal(sig,func)  SIG_ERR\n#endif\n\n\n/* An application can update this to get a count printed with the spinner.\n   If left at 0, no count is printed. */\n\nunsigned long  spinner_count = 0;\n\n\nint  spinner_wanted = -1;  /* -1 uninitialized, 1 wanted, 0 not */\nint  spinner_tick = 1;     /* 1 ready to print, 0 not */\n\n\n/*ARGSUSED*/\nvoid\nspinner_signal (int signum)\n{\n  spinner_tick = 1;\n\n  if (signal (SIGALRM, spinner_signal) == SIG_ERR)\n    {\n      printf (\"spinner_signal(): Oops, cannot reinstall SIGALRM\\n\");\n      abort ();\n    }\n  alarm (1);\n}\n\n\n/* Initialize the spinner.\n\n   This is done the first time spinner() is called, so an application\n   doesn't need to call this directly.\n\n   The spinner is only wanted if the output is a tty.  */\n\n#define SPINNER_WANTED_INIT() \\\n  if (spinner_wanted < 0) spinner_init ()\n\nvoid\nspinner_init (void)\n{\n  spinner_wanted = isatty (fileno (stdout));\n  if (spinner_wanted == -1)\n    abort ();\n\n  if (!spinner_wanted)\n    return;\n\n  if (signal (SIGALRM, spinner_signal) == SIG_ERR)\n    {\n      printf (\"(no spinner)\\r\");\n      spinner_tick = 0;\n      return;\n    }\n  alarm (1);\n\n  /* unbufferred output so the spinner will show up */\n  setbuf (stdout, NULL);\n}\n\n\nvoid\nspinner (void)\n{\n  static const char  data[] = { '|', '/', '-', '\\\\' };\n  static int         pos = 0;\n\n  char  buf[128];\n\n  SPINNER_WANTED_INIT ();\n\n  if (spinner_tick)\n    {\n      buf[0] = data[pos];\n      pos = (pos + 1) % numberof (data);\n      spinner_tick = 0;\n\n      if (spinner_count != 0)\n        {\n          sprintf (buf+1, \" %lu\\r\", spinner_count);\n        }\n      else\n        {\n          buf[1] = '\\r';\n          buf[2] = '\\0';\n        }\n      fputs (buf, stdout);\n    }\n}\n"
  },
  {
    "path": "tests/t-bswap.c",
    "content": "/* Test BSWAP_LIMB.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nint\nmain (void)\n{\n  mp_limb_t  src, want, got;\n  int        i;\n  gmp_randstate_t rands;\n  \n  tests_start ();\n  gmp_randinit_default(rands);\n  mp_trace_base = -16;\n\n  for (i = 0; i < 1000; i++)\n    {\n      mpn_randomb (&src, rands, (mp_size_t) 1);\n\n      want = ref_bswap_limb (src);\n\n      BSWAP_LIMB (got, src);\n      if (got != want)\n        {\n          printf (\"BSWAP_LIMB wrong result\\n\");\n        error:\n          mpn_trace (\"  src \", &src,  (mp_size_t) 1);\n          mpn_trace (\"  want\", &want, (mp_size_t) 1);\n          mpn_trace (\"  got \", &got,  (mp_size_t) 1);\n          abort ();\n        }\n\n      BSWAP_LIMB_FETCH (got, &src);\n      if (got != want)\n        {\n          printf (\"BSWAP_LIMB_FETCH wrong result\\n\");\n          goto error;\n        }\n\n      BSWAP_LIMB_STORE (&got, src);\n      if (got != want)\n        {\n          printf (\"BSWAP_LIMB_STORE wrong result\\n\");\n          goto error;\n        }\n    }\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/t-constants.c",
    "content": "/* Check the values of some constants.\n\nCopyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"tests.h\"\n\n#define printf gmp_printf\n\n#ifdef ULLONG_MAX\nchar *ullong_max_def = \"defined\";\n#else\nchar *ullong_max_def = \"not defined\";\n#endif\n#ifdef LLONG_MAX\nchar *llong_max_def = \"defined\";\n#else\nchar *llong_max_def = \"not defined\";\n#endif\n\n#ifdef ULONG_MAX\nchar *ulong_max_def = \"defined\";\n#else\nchar *ulong_max_def = \"not defined\";\n#endif\n#ifdef LONG_MAX\nchar *long_max_def = \"defined\";\n#else\nchar *long_max_def = \"not defined\";\n#endif\n\n#ifdef UINT_MAX\nchar *uint_max_def = \"defined\";\n#else\nchar *uint_max_def = \"not defined\";\n#endif\n#ifdef INT_MAX\nchar *int_max_def = \"defined\";\n#else\nchar *int_max_def = \"not defined\";\n#endif\n\n#ifdef USHRT_MAX\nchar *ushrt_max_def = \"defined\";\n#else\nchar *ushrt_max_def = \"not defined\";\n#endif\n#ifdef SHRT_MAX\nchar *shrt_max_def = \"defined\";\n#else\nchar *shrt_max_def = \"not defined\";\n#endif\n\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n\n#ifdef _LONG_LONG_LIMB\n#define LL(l,ll)  ll\n#else\n#define LL(l,ll)  l\n#endif\n\n#if __GMP_MP_SIZE_T_INT\n#define SS(i,l)   i\n#else\n#define SS(i,l)   l\n#endif\n\n\n#define CHECK_LIMB_S(x, xname, y, yname)                \\\n  do {                                                  \\\n    if ((x) != (y))                                     \\\n      {                                                 \\\n        printf (LL(\"%s == %lx, but %s == %lx\\n\",        \\\n                   \"%s == %llx, but %s == %llx\\n\"),     \\\n                xname, x, yname, y);                    \\\n        error = 1;                                      \\\n      }                                                 \\\n  } while (0)\n\n#define CHECK_INT_S(x, xname, y, yname)                                 \\\n  do {                                                                  \\\n    if ((x) != (y))                                                     \\\n      {                                                                 \\\n        printf (\"%s == %d, but %s == %d\\n\", xname, x, yname, y);        \\\n        error = 1;                                                      \\\n      }                                                                 \\\n  } while (0)\n\n\n\n#define CHECK_CONDITION_S(x, xname)             \\\n  do {                                          \\\n    if (!(x))                                   \\\n      {                                         \\\n        printf (\"%s is false\\n\", xname);        \\\n        error = 1;                              \\\n      }                                         \\\n  } while (0)\n\n\n/* How many bits seem to work in the given type. */\n#define CALC_BITS(result, type) \\\n  do {                          \\\n    type  n = 1;                \\\n    result = 0;                 \\\n    while (n != 0)              \\\n      {                         \\\n        n <<= 1;                \\\n        result++;               \\\n      }                         \\\n  } while (0)\n\n#define CHECK_BITS_S(constant, constant_name, type)     \\\n  do {                                                  \\\n    int   calculated;                                   \\\n    CALC_BITS (calculated, type);                       \\\n    if (calculated != constant)                         \\\n      {                                                 \\\n        printf (\"%s == %d, but calculated %d\\n\",        \\\n                constant_name, constant, calculated);   \\\n        error = 1;                                      \\\n      }                                                 \\\n  } while (0)\n\n\n#define CHECK_HIGHBIT_S(value, value_name, type, format)        \\\n  do {                                                          \\\n    type  n = value;                                            \\\n    if (n == 0)                                                 \\\n      {                                                         \\\n        printf (\"%s == 0\\n\", value_name);                       \\\n        error = 1;                                              \\\n      }                                                         \\\n    n <<= 1;                                                    \\\n    if (n != 0)                                                 \\\n      {                                                         \\\n        printf (\"%s << 1 = \", value_name);                      \\\n        printf (format, n);                                     \\\n        printf (\" != 0\\n\");                                     \\\n        error = 1;                                              \\\n      }                                                         \\\n  } while (0)\n\n\n#define CHECK_MAX_S(max_val, max_name, min_val, min_name, type, format) \\\n  do {                                                                  \\\n    type  maxval = max_val;                                             \\\n    type  minval = min_val;                                             \\\n    type  n = maxval;                                                   \\\n    n++;                                                                \\\n    if (n != minval)                                                    \\\n      {                                                                 \\\n        printf (\"%s + 1 = \", max_name);                                 \\\n        printf (format, n);                                             \\\n        printf (\" != %s = \", min_name);                                 \\\n        printf (format, minval);                                        \\\n        printf (\"\\n\");                                                  \\\n        error = 1;                                                      \\\n      }                                                                 \\\n    if (maxval <= minval)                                               \\\n      {                                                                 \\\n        printf (\"%s = \", max_name);                                     \\\n        printf (format, maxval);                                        \\\n        printf (\" <= %s = \", min_name);                                 \\\n        printf (format, minval);                                        \\\n        printf (\"\\n\");                                                  \\\n        error = 1;                                                      \\\n      }                                                                 \\\n  } while (0)\n\n\n#if HAVE_STRINGIZE\n#define CHECK_LIMB(x,y)      CHECK_LIMB_S (x, #x, y, #y)\n#define CHECK_INT(x,y)       CHECK_INT_S (x, #x, y, #y)\n#define CHECK_CONDITION(x)   CHECK_CONDITION_S (x, #x)\n#define CHECK_BITS(c,t)      CHECK_BITS_S (c, #c, t)\n#define CHECK_MAX(m,n,t,f)   CHECK_MAX_S (m, #m, n, #n, t, f)\n#define CHECK_HIGHBIT(n,t,f) CHECK_HIGHBIT_S (n, #n, t, f)\n#else\n#define CHECK_LIMB(x,y)      CHECK_LIMB_S (x, \"x\", y, \"y\")\n#define CHECK_INT(x,y)       CHECK_INT_S (x, \"x\", y, \"y\")\n#define CHECK_CONDITION(x)   CHECK_CONDITION_S (x, \"x\")\n#define CHECK_BITS(c,t)      CHECK_BITS_S (c, \"c\", t)\n#define CHECK_MAX(m,n,t,f)   CHECK_MAX_S (m, \"m\", n, \"n\", t, f)\n#define CHECK_HIGHBIT(n,t,f) CHECK_HIGHBIT_S (n, \"n\", t, f)\n#endif\n\n\n/* The tests below marked \"Bad!\" fail on Cray T90 systems, where int, short\n   and mp_size_t are 48 bits or some such but don't wraparound in a plain\n   twos complement fashion.  In particular,\n\n       INT_HIGHBIT << 1 = 0xFFFFC00000000000 != 0\n       INT_MAX + 1 = 35184372088832 != INT_MIN = -35184372088832\n\n   This is a bit bizarre, but doesn't matter because GMP doesn't rely on any\n   particular overflow behaviour for int or short, only for mp_limb_t.  */\n\nint\nmain (int argc, char *argv[])\n{\n  int  error = 0;\n\n  CHECK_INT (BYTES_PER_MP_LIMB, sizeof(mp_limb_t));\n  CHECK_INT (mp_bits_per_limb, BITS_PER_MP_LIMB);\n  CHECK_INT (GMP_LIMB_BITS, BITS_PER_MP_LIMB);\n\n  CHECK_BITS (BITS_PER_MP_LIMB, mp_limb_t);\n  CHECK_BITS (BITS_PER_ULONG, unsigned long);\n\n  CHECK_HIGHBIT (GMP_LIMB_HIGHBIT, mp_limb_t,      LL(\"0x%lX\",\"0x%llX\"));\n  CHECK_HIGHBIT (ULONG_HIGHBIT,     unsigned long,  \"0x%lX\");\n  CHECK_HIGHBIT (UINT_HIGHBIT,      unsigned int,   \"0x%X\");\n  CHECK_HIGHBIT (USHRT_HIGHBIT,     unsigned short, \"0x%hX\");\n  CHECK_HIGHBIT (LONG_HIGHBIT,      long,           \"0x%lX\");\n  CHECK_HIGHBIT (GMP_UI_HIBIT,      mpir_ui,         \"%#Mx\");\n\n#if 0 /* Bad! */\n  CHECK_HIGHBIT (INT_HIGHBIT,       int,            \"0x%X\");\n  CHECK_HIGHBIT (SHRT_HIGHBIT,      short,          \"0x%hX\");\n#endif\n\n#if 0 /* Bad! */\n  CHECK_MAX (LONG_MAX,      LONG_MIN,      long,           \"%ld\");\n  CHECK_MAX (INT_MAX,       INT_MIN,       int,            \"%d\");\n  CHECK_MAX (SHRT_MAX,      SHRT_MIN,      short,          \"%hd\");\n#endif\n  CHECK_MAX (ULONG_MAX,     0,             unsigned long,  \"%lu\");\n  CHECK_MAX (UINT_MAX,      0,             unsigned int,    \"%u\");\n  CHECK_MAX (GMP_UI_MAX,    0,             mpir_ui,          \"%M\");\n  CHECK_MAX (USHRT_MAX,     0,             unsigned short, \"%hu\");\n#if 0 /* Bad! */\n  CHECK_MAX (MP_SIZE_T_MAX, MP_SIZE_T_MIN, mp_size_t,      SS(\"%d\",\"%ld\"));\n#endif\n\n  /* UHWtype should have at least enough bits for half a UWtype */\n  {\n    int  bits_per_UWtype, bits_per_UHWtype;\n    CALC_BITS (bits_per_UWtype,  UWtype);\n    CALC_BITS (bits_per_UHWtype, UHWtype);\n    CHECK_CONDITION (2*bits_per_UHWtype >= bits_per_UWtype);\n  }\n\n  ASSERT_ALWAYS_LIMB (MODLIMB_INVERSE_3);\n  {\n    mp_limb_t  modlimb_inverse_3_calc;\n    modlimb_invert (modlimb_inverse_3_calc, CNST_LIMB(3));\n    ASSERT_ALWAYS_LIMB (modlimb_inverse_3_calc);\n    CHECK_LIMB (MODLIMB_INVERSE_3, modlimb_inverse_3_calc);\n  }\n  {\n    mp_limb_t  MODLIMB_INVERSE_3_times_3\n      = (MODLIMB_INVERSE_3 * CNST_LIMB(3)) & GMP_NUMB_MASK;\n    CHECK_LIMB (MODLIMB_INVERSE_3_times_3, CNST_LIMB(1));\n  }\n\n  {\n    mp_limb_t  hi, lo;\n    hi = refmpn_umul_ppmm (&lo, GMP_NUMB_CEIL_MAX_DIV3-1,\n                           CNST_LIMB(3) << GMP_NAIL_BITS);\n    if (! (hi < 1))\n      {\n        printf (\"GMP_NUMB_CEIL_MAX_DIV3 too big\\n\");\n        error = 1;\n      }\n    hi = refmpn_umul_ppmm (&lo, GMP_NUMB_CEIL_MAX_DIV3,\n                           CNST_LIMB(3) << GMP_NAIL_BITS);\n    if (! (hi >= 1))\n      {\n        printf (\"GMP_NUMB_CEIL_MAX_DIV3 too small\\n\");\n        error = 1;\n      }\n  }\n\n  {\n    mp_limb_t  hi, lo;\n    hi = refmpn_umul_ppmm (&lo, GMP_NUMB_CEIL_2MAX_DIV3-1,\n                           CNST_LIMB(3) << GMP_NAIL_BITS);\n    if (! (hi < 2))\n      {\n        printf (\"GMP_NUMB_CEIL_2MAX_DIV3 too big\\n\");\n        error = 1;\n      }\n    hi = refmpn_umul_ppmm (&lo, GMP_NUMB_CEIL_2MAX_DIV3,\n                           CNST_LIMB(3) << GMP_NAIL_BITS);\n    if (! (hi >= 2))\n      {\n        printf (\"GMP_NUMB_CEIL_2MAX_DIV3 too small\\n\");\n        error = 1;\n      }\n  }\n\n#ifdef PP_INVERTED\n  {\n    mp_limb_t  pp_inverted_calc;\n    invert_limb (pp_inverted_calc, PP);\n    CHECK_LIMB (PP_INVERTED, pp_inverted_calc);\n  }\n#endif\n\n  if (argc >= 2 || error)\n    {\n      int  bits;\n\n      printf (\"\\n\");\n      printf (\"After mpir.h,\\n\");\n      printf (\"  ULLONG_MAX  %s\\n\", ullong_max_def);\n      printf (\"  LLONG_MAX   %s\\n\", llong_max_def);\n      printf (\"  ULONG_MAX  %s\\n\", ulong_max_def);\n      printf (\"  LONG_MAX   %s\\n\", long_max_def);\n      printf (\"  UINT_MAX   %s\\n\", uint_max_def);\n      printf (\"  INT_MAX    %s\\n\", int_max_def);\n      printf (\"  USHRT_MAX  %s\\n\", ushrt_max_def);\n      printf (\"  SHRT_MAX   %s\\n\", shrt_max_def);\n      printf (\"\\n\");\n\n#if HAVE_LONG_LONG && defined(ULLONG_MAX)\n      printf (\"ULLONG_MAX     %llX\\n\", ULLONG_MAX);\n      printf (\"LLONG_MAX      %llX\\n\", LLONG_MAX);\n      printf (\"LLONG_MIN      %llX\\n\", LLONG_MIN);\n#endif\n#if defined(ULLONG_MAX)\n      printf (\"GMP_UI_MAX     %Mx\\n\", ULLONG_MAX);\n      printf (\"GMP_SI_MAX     %Mx\\n\", LLONG_MAX);\n      printf (\"GMP_SI_MIN     %Mx\\n\", LLONG_MIN);\n#endif\n\n      printf (\"ULONG_MAX      %lX\\n\", ULONG_MAX);\n      printf (\"ULONG_HIGHBIT  %lX\\n\", ULONG_HIGHBIT);\n      printf (\"LONG_MAX       %lX\\n\", LONG_MAX);\n      printf (\"LONG_MIN       %lX\\n\", LONG_MIN);\n\n      printf (\"UINT_MAX       %#X\\n\", UINT_MAX);\n      printf (\"UINT_HIGHBIT   %X\\n\", UINT_HIGHBIT);\n      printf (\"INT_MAX        %X\\n\", INT_MAX);\n      printf (\"INT_MIN        %X\\n\", INT_MIN);\n\n      printf (\"USHRT_MAX      %hX\\n\", USHRT_MAX);\n      printf (\"USHRT_HIGHBIT  %hX\\n\", USHRT_HIGHBIT);\n      printf (\"SHRT_MAX       %hX\\n\", SHRT_MAX);\n      printf (\"SHRT_MIN       %hX\\n\", SHRT_MIN);\n\n      printf (\"\\n\");\n      printf (\"Bits\\n\");\n      CALC_BITS (bits, long long);           printf (\"  long long           %d\\n\", bits);\n      CALC_BITS (bits, long);                printf (\"  long                %d\\n\", bits);\n      CALC_BITS (bits, int);                 printf (\"  int                 %d\\n\", bits);\n      CALC_BITS (bits, short);               printf (\"  short               %d\\n\", bits);\n      CALC_BITS (bits, mpir_si);              printf (\"  GMP signed int      %d\\n\", bits);\n      CALC_BITS (bits, unsigned long long);  printf (\"  unsigned long long  %d\\n\", bits);\n      CALC_BITS (bits, unsigned long);       printf (\"  unsigned long       %d\\n\", bits);\n      CALC_BITS (bits, unsigned int);        printf (\"  unsigned int        %d\\n\", bits);\n      CALC_BITS (bits, unsigned short);      printf (\"  unsigned short      %d\\n\", bits);\n      CALC_BITS (bits, mp_size_t);           printf (\"  mp_size_t           %d\\n\", bits);\n      CALC_BITS (bits, mpir_ui);              printf (\"  GMP unsigned int    %d\\n\", bits);\n    }\n\n  if (error)\n    abort ();\n\n  exit (0);\n}\n"
  },
  {
    "path": "tests/t-count_zeros.c",
    "content": "/* Test count_leading_zeros and count_trailing_zeros.\n\nCopyright 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\nvoid\ncheck_clz (int want, mp_limb_t n)\n{\n  int  got;\n  count_leading_zeros (got, n);\n  if (got != want)\n    {\n      printf        (\"count_leading_zeros wrong\\n\");\n      mp_limb_trace (\"  n    \", n);\n      printf        (\"  want %d\\n\", want);\n      printf        (\"  got  %d\\n\", got);\n      abort ();\n    }\n}\n\nvoid\ncheck_ctz (int want, mp_limb_t n)\n{\n  int  got;\n  count_trailing_zeros (got, n);\n  if (got != want)\n    {\n      printf (\"count_trailing_zeros wrong\\n\");\n      mpn_trace (\"  n    \", &n, (mp_size_t) 1);\n      printf    (\"  want %d\\n\", want);\n      printf    (\"  got  %d\\n\", got);\n      abort ();\n    }\n}\n\nvoid\ncheck_various (void)\n{\n  int        i;\n\n#ifdef COUNT_LEADING_ZEROS_0\n  check_clz (COUNT_LEADING_ZEROS_0, CNST_LIMB(0));\n#endif\n\n  for (i=0; i < BITS_PER_MP_LIMB; i++)\n    {\n      check_clz (i, CNST_LIMB(1) << (BITS_PER_MP_LIMB-1-i));\n      check_ctz (i, CNST_LIMB(1) << i);\n\n      check_ctz (i, MP_LIMB_T_MAX << i);\n      check_clz (i, MP_LIMB_T_MAX >> i);\n    }\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n  mp_trace_base = 16;\n\n  check_various ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/t-gmpmax.c",
    "content": "/* Check the values of __GMP_UINT_MAX etc.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <limits.h>\n#include \"mpir.h\"\n\n\n/* __GMP_UINT_MAX etc are generated with expressions in mpir.h since we don't\n   want to demand <limits.h> or forcibly include it.  Check the expressions\n   come out the same as <limits.h>.  */\n\nint\nmain (int argc, char *argv[])\n{\n  int  error = 0;\n\n#ifdef UINT_MAX\n  if (__GMP_UINT_MAX != UINT_MAX)\n    {\n      printf (\"__GMP_UINT_MAX incorrect\\n\");\n      printf (\"  __GMP_UINT_MAX  %u  0x%X\\n\", __GMP_UINT_MAX, __GMP_UINT_MAX);\n      printf (\"  UINT_MAX        %u  0x%X\\n\", UINT_MAX, UINT_MAX);\n      error = 1;\n    }\n#endif\n\n  /* gcc 2.95.2 limits.h on solaris 2.5.1 incorrectly selects a 64-bit\n     LONG_MAX, leading to some integer overflow in ULONG_MAX and a spurious\n     __GMP_ULONG_MAX != ULONG_MAX.  Casting ULONG_MAX to unsigned long is a\n     workaround.  */\n#ifdef ULONG_MAX\n  if (__GMP_ULONG_MAX != (unsigned long) ULONG_MAX)\n    {\n      printf (\"__GMP_ULONG_MAX incorrect\\n\");\n      printf (\"  __GMP_ULONG_MAX  %lu  0x%lX\\n\", __GMP_ULONG_MAX, __GMP_ULONG_MAX);\n      printf (\"  ULONG_MAX        %lu  0x%lX\\n\", ULONG_MAX, ULONG_MAX);\n      error = 1;\n    }\n#endif\n\n#ifdef USHRT_MAX\n  if (__GMP_USHRT_MAX != USHRT_MAX)\n    {\n      printf (\"__GMP_USHRT_MAX incorrect\\n\");\n      printf (\"  __GMP_USHRT_MAX  %hu  0x%hX\\n\", __GMP_USHRT_MAX, __GMP_USHRT_MAX);\n      printf (\"  USHRT_MAX        %hu  0x%hX\\n\", USHRT_MAX, USHRT_MAX);\n      error = 1;\n    }\n#endif\n\n  if (error)\n    abort ();\n\n  exit (0);\n}\n"
  },
  {
    "path": "tests/t-hightomask.c",
    "content": "/* Test LIMB_HIGHBIT_TO_MASK.\n\nCopyright 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* There's very little to these tests, but it's nice to have them since if\n   something has gone wrong with the arithmetic right shift business in\n   LIMB_HIGHBIT_TO_MASK then the only symptom is likely to be failures in\n   udiv_qrnnd_preinv, which would not be easy to diagnose.  */\n\nint\nmain (void)\n{\n  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (0) == 0);\n  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (GMP_LIMB_HIGHBIT) == MP_LIMB_T_MAX);\n  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (MP_LIMB_T_MAX) == MP_LIMB_T_MAX);\n  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (GMP_LIMB_HIGHBIT >> 1) == 0);\n  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (MP_LIMB_T_MAX >> 1) == 0);\n\n  exit (0);\n}\n"
  },
  {
    "path": "tests/t-modlinv.c",
    "content": "/* Test modlimb_invert.\n\nCopyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n\nvoid\none (mp_limb_t n)\n{\n  mp_limb_t  inv, prod;\n\n  modlimb_invert (inv, n);\n  prod = (inv * n) & GMP_NUMB_MASK;\n  if (prod != 1)\n    {\n      printf (\"modlimb_invert wrong\\n\");\n      mp_limb_trace (\"  n       \", n);\n      mp_limb_trace (\"  got     \", inv);\n      mp_limb_trace (\"  product \", prod);\n      abort ();\n    }\n}\n\nvoid\nsome (void)\n{\n  int  i;\n  for (i = 0; i < 10000; i++)\n    one (refmpn_random_limb () | 1);\n}\n\nvoid\nall (void)\n{\n  mp_limb_t  n;\n\n  n = 1;\n  do {\n    one (n);\n    n += 2;\n  } while (n != 1);\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n\n  if (argc >= 2 && strcmp (argv[1], \"-a\") == 0)\n    {\n      /* it's feasible to run all values on a 32-bit limb, but not a 64-bit */\n      all ();\n    }\n  else\n    {\n      some ();\n    }\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/t-parity.c",
    "content": "/* Test ULONG_PARITY.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\nvoid\ncheck_one (int want, unsigned long n)\n{\n  int  got;\n  ULONG_PARITY (got, n);\n  if (got != want)\n    {\n      printf (\"ULONG_PARITY wrong\\n\");\n      printf (\"  n    %lX\\n\", n);\n      printf (\"  want %d\\n\", want);\n      printf (\"  got  %d\\n\", got);\n      abort ();\n    }\n}\n\nvoid\ncheck_various (void)\n{\n  int  i;\n\n  check_one (0, 0L);\n  check_one (BITS_PER_ULONG & 1, ULONG_MAX);\n  check_one (0, 0x11L);\n  check_one (1, 0x111L);\n  check_one (1, 0x3111L);\n\n  for (i = 0; i < BITS_PER_ULONG; i++)\n    check_one (1, 1L << i);\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  tests_start ();\n  mp_trace_base = 16;\n\n  check_various ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/t-popc.c",
    "content": "/* Test popc_limb.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\nint\nmain (void)\n{\n  mp_limb_t  src, want, got;\n  int        i;\n  gmp_randstate_t rands;\n\n  tests_start ();\n  gmp_randinit_default(rands);\n  mp_trace_base = -16;\n\n  for (i = 0; i < GMP_LIMB_BITS; i++)\n    {\n      src = CNST_LIMB(1) << i;\n      want = 1;\n\n      popc_limb (got, src);\n      if (got != want)\n        {\n        error:\n          printf (\"popc_limb wrong result\\n\");\n          mpn_trace (\"  src \", &src,  (mp_size_t) 1);\n          mpn_trace (\"  want\", &want, (mp_size_t) 1);\n          mpn_trace (\"  got \", &got,  (mp_size_t) 1);\n          abort ();\n        }\n    }\n\n  for (i = 0; i < 100; i++)\n    {\n      mpn_rrandom (&src, rands, (mp_size_t) 1);\n      want = ref_popc_limb (src);\n\n      popc_limb (got, src);\n      if (got != want)\n        goto error;\n    }\n  gmp_randclear(rands);\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/t-sub.c",
    "content": "/* Test sub_ddmmss.\n\nCopyright 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"tests.h\"\n\n\nvoid\ncheck_data (void)\n{\n#define M  MP_LIMB_T_MAX\n\n  static const struct {\n    mp_limb_t  want_dh,want_dl, mh,ml, sh,sl;\n  } data[] = {\n    { 0,0,  0,0,  0,0 },\n    { 0,0,  0,1,  0,1 },\n    { 0,0,  1,2,  1,2 },\n\n    { 0,1,  0,2,  0,1 },\n    { 0,M,  1,0,  0,1 },\n    { M,M,  0,0,  0,1 },\n\n    { M,M,  0,M-1,  0,M },\n    { 0,0,  0,M-1,  0,M-1 },\n    { 0,1,  0,M-1,  0,M-2 },\n  };\n  int  i;\n  mp_limb_t  got_dh, got_dl;\n\n  for (i = 0; i < numberof (data); i++)\n    {\n      sub_ddmmss (got_dh,got_dl, data[i].mh,data[i].ml, data[i].sh,data[i].sl);\n      if (got_dh != data[i].want_dh || got_dl != data[i].want_dl)\n        {\n          printf (\"check_data wrong at data[%d]\\n\", i);\n          mp_limb_trace (\"  mh\", data[i].mh);\n          mp_limb_trace (\"  ml\", data[i].ml);\n          mp_limb_trace (\"  sh\", data[i].sh);\n          mp_limb_trace (\"  sl\", data[i].sl);\n          mp_limb_trace (\"  want dh\", data[i].want_dh);\n          mp_limb_trace (\"  want dl\", data[i].want_dl);\n          mp_limb_trace (\"  got dh \", got_dh);\n          mp_limb_trace (\"  got dl \", got_dl);\n          abort ();\n        }\n    }\n}\n\nvoid\ncheck_random (void)\n{\n  mp_limb_t  want_dh,want_dl, got_dh,got_dl, mh,ml, sh,sl;\n  int  i;\n  gmp_randstate_t rands;\n  \n  gmp_randinit_default(rands);\n\n  for (i = 0; i < 20; i++)\n    {\n      mh = urandom (rands);\n      ml = urandom (rands);\n      sh = urandom (rands);\n      sl = urandom (rands);\n\n      refmpn_sub_ddmmss (&want_dh,&want_dl, mh,ml, sh,sl);\n\n      sub_ddmmss (got_dh,got_dl, mh,ml, sh,sl);\n\n      if (got_dh != want_dh || got_dl != want_dl)\n        {\n          printf (\"check_data wrong at data[%d]\\n\", i);\n          mp_limb_trace (\"  mh\", mh);\n          mp_limb_trace (\"  ml\", ml);\n          mp_limb_trace (\"  sh\", sh);\n          mp_limb_trace (\"  sl\", sl);\n          mp_limb_trace (\"  want dh\", want_dh);\n          mp_limb_trace (\"  want dl\", want_dl);\n          mp_limb_trace (\"  got dh \", got_dh);\n          mp_limb_trace (\"  got dl \", got_dl);\n          abort ();\n        }\n    }\n    gmp_randclear(rands);\n}\n\nint\nmain (void)\n{\n  tests_start ();\n  mp_trace_base = -16;\n\n  check_data ();\n  check_random ();\n\n  tests_end ();\n  exit (0);\n}\n"
  },
  {
    "path": "tests/tests.h",
    "content": "/* Tests support prototypes etc.\n\nCopyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n#ifndef __TESTS_H__\n#define __TESTS_H__\n\n#include \"config.h\"\n\n#include <setjmp.h>  /* for jmp_buf */\n\n#if defined (__cplusplus)\nextern \"C\" {\n#endif\n\n\n#ifdef __cplusplus\n#define ANYARGS  ...\n#else\n#define ANYARGS\n#endif\n\n\nvoid tests_start(void);\nvoid tests_end(void);\n\nvoid tests_memory_start(void);\nvoid tests_memory_end(void);\nvoid *tests_allocate(size_t size);\nvoid *tests_reallocate(void *ptr, size_t old_size, size_t new_size);\nvoid tests_free(void *ptr, size_t size);\nvoid tests_free_nosize(void *ptr);\nint tests_memory_valid(void *ptr);\n\nvoid tests_rand_start(void);\nvoid tests_rand_end(void);\n\ndouble tests_infinity_d();\nint tests_hardware_getround(void);\nint tests_hardware_setround(int);\nint tests_isinf(double);\nint tests_dbl_mant_bits(void);\n\n/* tests_setjmp_sigfpe is like a setjmp, establishing a trap for SIGFPE.\n   The initial return is 0, if SIGFPE is trapped execution goes back there\n   with return value 1.\n\n   tests_sigfpe_done puts SIGFPE back to SIG_DFL, which should be used once\n   the setjmp point is out of scope, so a later SIGFPE won't try to go back\n   there.  */\n\n#define tests_setjmp_sigfpe()                   \\\n  (signal (SIGFPE, tests_sigfpe_handler),       \\\n   setjmp (tests_sigfpe_target))\n\nvoid tests_sigfpe_handler(int);\nvoid tests_sigfpe_done(void);\nextern jmp_buf  tests_sigfpe_target;\n\n\n#if HAVE_CALLING_CONVENTIONS\nextern mp_limb_t (*calling_conventions_function)(ANYARGS);\nmp_limb_t calling_conventions(ANYARGS);\nint calling_conventions_check(void);\n#define CALLING_CONVENTIONS(function) \\\n  (calling_conventions_function = (function), calling_conventions)\n#define CALLING_CONVENTIONS_CHECK()    (calling_conventions_check())\n#else\n#define CALLING_CONVENTIONS(function)  (function)\n#define CALLING_CONVENTIONS_CHECK()    1 /* always ok */\n#endif\n\n\nextern int mp_trace_base;\nvoid mp_limb_trace(const char *, mp_limb_t);\nvoid mpn_trace(const char *name, mp_srcptr ptr, mp_size_t size);\nvoid mpn_tracea(const char *name, const mp_ptr *a, int count, mp_size_t size);\nvoid mpn_tracen(const char *name, int num, mp_srcptr ptr, mp_size_t size);\nvoid mpn_trace_file(const char *filename, mp_srcptr ptr, mp_size_t size);\nvoid mpn_tracea_file(const char *filename, const mp_ptr *a, int count, mp_size_t size);\nvoid mpf_trace(const char *name, mpf_srcptr z);\nvoid mpq_trace(const char *name, mpq_srcptr q);\nvoid mpz_trace(const char *name, mpz_srcptr z);\nvoid mpz_tracen(const char *name, int num, mpz_srcptr z);\nvoid byte_trace(const char *, const void *, mp_size_t);\nvoid byte_tracen(const char *, int, const void *, mp_size_t);\nvoid d_trace(const char *, double);\n\n\nvoid spinner(void);\nextern unsigned long  spinner_count;\nextern int  spinner_wanted;\nextern int  spinner_tick;\n\n\nvoid *align_pointer(void *p, size_t align);\nvoid *__gmp_allocate_func_aligned(size_t bytes, size_t align);\nvoid *__gmp_allocate_or_reallocate(void *ptr, size_t oldsize, size_t newsize);\nchar *__gmp_allocate_strdup(const char *s);\nchar *strtoupper(char *s_orig);\nmp_limb_t urandom(gmp_randstate_t);\nvoid call_rand_algs(void (*func) (const char *, gmp_randstate_t));\n\n\nvoid mpf_set_str_or_abort(mpf_ptr f, const char *str, int base);\n\n\nvoid mpq_set_str_or_abort(mpq_ptr q, const char *str, int base);\n\n\nvoid mpz_erandomb(mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits);\nvoid mpz_erandomb_nonzero(mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits);\nvoid mpz_errandomb(mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits);\nvoid mpz_errandomb_nonzero(mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits);\nvoid mpz_init_set_n(mpz_ptr z, mp_srcptr p, mp_size_t size);\nvoid mpz_negrandom(mpz_ptr rop, gmp_randstate_t rstate);\nint mpz_pow2abs_p(mpz_srcptr z) __GMP_ATTRIBUTE_PURE;\nvoid mpz_set_n(mpz_ptr z, mp_srcptr p, mp_size_t size);\nvoid mpz_set_str_or_abort(mpz_ptr z, const char *str, int base);\n\nmp_size_t mpn_diff_highest(mp_srcptr p1, mp_srcptr p2, mp_size_t n) __GMP_ATTRIBUTE_PURE;\nmp_size_t mpn_diff_lowest(mp_srcptr p1, mp_srcptr p2, mp_size_t n) __GMP_ATTRIBUTE_PURE;\nmp_size_t byte_diff_highest(const void *p1, const void *p2, mp_size_t size) __GMP_ATTRIBUTE_PURE;\nmp_size_t byte_diff_lowest(const void *p1, const void *p2, mp_size_t size) __GMP_ATTRIBUTE_PURE;\n\n\nmp_limb_t ref_addc_limb(mp_limb_t *, mp_limb_t, mp_limb_t);\nmp_limb_t ref_bswap_limb(mp_limb_t src);\nunsigned long ref_popc_limb(mp_limb_t src);\nmp_limb_t ref_subc_limb(mp_limb_t *, mp_limb_t, mp_limb_t);\n\n\nvoid refmpf_add(mpf_ptr, mpf_srcptr, mpf_srcptr);\nvoid refmpf_add_ulp(mpf_ptr f);\nvoid refmpf_fill(mpf_ptr f, mp_size_t size, mp_limb_t value);\nvoid refmpf_normalize(mpf_ptr f);\nvoid refmpf_set_prec_limbs(mpf_ptr f, unsigned long prec);\nunsigned long refmpf_set_overlap(mpf_ptr dst, mpf_srcptr src);\nvoid refmpf_sub(mpf_ptr, mpf_srcptr, mpf_srcptr);\nint refmpf_validate(const char *name, mpf_srcptr got, mpf_srcptr want);\nint refmpf_validate_division(const char *name, mpf_srcptr got, mpf_srcptr n, mpf_srcptr d);\n\n\nmp_limb_t refmpn_add(mp_ptr rp, mp_srcptr s1p, mp_size_t s1size, mp_srcptr s2p, mp_size_t s2size);\nmp_limb_t refmpn_add_1(mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n);\nmp_limb_t refmpn_add_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\nmp_limb_t refmpn_add_nc(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size, mp_limb_t carry);\nmp_limb_t refmpn_addadd_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp,mp_srcptr zp, mp_size_t n);\n\nmp_limb_t refmpn_subadd_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp,mp_srcptr zp, mp_size_t n);\n\nint refmpn_addsub_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_srcptr zp, mp_size_t n);\n\nmp_limb_t refmpn_addlsh1_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\nmp_limb_t refmpn_addlsh_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size,unsigned int);\nmp_limb_t refmpn_sublsh_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size,unsigned int);\nmp_limb_t refmpn_addlsh_nc(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size,unsigned int,mp_limb_t);\nmp_limb_t refmpn_sublsh_nc(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size,unsigned int,mp_limb_t);\nmp_limb_t refmpn_inclsh_n(mp_ptr wp, mp_srcptr xp, mp_size_t size,unsigned int);\nmp_limb_t refmpn_declsh_n(mp_ptr wp, mp_srcptr xp, mp_size_t size,unsigned int);\nmp_limb_t refmpn_addmul_1(mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t multiplier);\nmp_limb_t refmpn_addmul_1c(mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t multiplier, mp_limb_t carry);\nmp_limb_t refmpn_addmul_2(mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult);\nmp_limb_t refmpn_addmul_3(mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult);\nmp_limb_t refmpn_addmul_4(mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult);\nmp_limb_t refmpn_addmul_5(mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult);\nmp_limb_t refmpn_addmul_6(mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult);\nmp_limb_t refmpn_addmul_7(mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult);\nmp_limb_t refmpn_addmul_8(mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult);\n\nmp_limb_t refmpn_add_err1_n(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_ptr ep, mp_srcptr yp,mp_size_t n, mp_limb_t cy);\nmp_limb_t refmpn_sub_err1_n(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_ptr ep, mp_srcptr yp,mp_size_t n, mp_limb_t cy);\n\nmp_limb_t refmpn_add_err2_n(mp_ptr rp, mp_srcptr up, mp_srcptr vp,mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,mp_size_t n, mp_limb_t cy);\nmp_limb_t refmpn_sub_err2_n(mp_ptr rp, mp_srcptr up, mp_srcptr vp,mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,mp_size_t n, mp_limb_t cy);\n\nmp_limb_t refmpn_sumdiff_n(mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size);\nmp_limb_t refmpn_nsumdiff_n(mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size);\nmp_limb_t refmpn_sumdiff_nc(mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t carry);\n\nvoid refmpn_and_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\nvoid refmpn_andn_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\n\nmp_limb_t refmpn_big_base(int);\n\nint refmpn_chars_per_limb(int);\nvoid refmpn_clrbit(mp_ptr, unsigned long);\nint refmpn_cmp(mp_srcptr s1p, mp_srcptr s2p, mp_size_t size);\nint refmpn_cmp_allowzero(mp_srcptr, mp_srcptr, mp_size_t);\nint refmpn_cmp_twosizes(mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize);\n\nvoid refmpn_com_n(mp_ptr rp, mp_srcptr sp, mp_size_t size);\nvoid refmpn_not(mp_ptr rp, mp_size_t size);\nvoid refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size);\nvoid refmpn_copyi(mp_ptr rp, mp_srcptr sp, mp_size_t size);\nvoid refmpn_copyd(mp_ptr rp, mp_srcptr sp, mp_size_t size);\nvoid refmpn_copy_extend(mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize);\n\nunsigned refmpn_count_leading_zeros(mp_limb_t x);\nunsigned refmpn_count_trailing_zeros(mp_limb_t x);\n\nmp_limb_t refmpn_divexact_by3(mp_ptr rp, mp_srcptr sp, mp_size_t size);\nmp_limb_t refmpn_divexact_by3c(mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry);\nmp_limb_t refmpn_divexact_byff(mp_ptr rp, mp_srcptr xp, mp_size_t n);\n\nmp_limb_t refmpn_divexact_byfobm1(mp_ptr rp, mp_srcptr xp, mp_size_t n, mp_limb_t,mp_limb_t);\n\nmp_limb_t refmpn_divmod_1(mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor);\nmp_limb_t refmpn_divrem_euclidean_qr_1(mp_ptr rp, mp_srcptr sp, mp_size_t size,mp_limb_t divisor);\nmp_limb_t refmpn_divrem_euclidean_r_1(mp_srcptr sp, mp_size_t size,mp_limb_t divisor);\n\nmp_limb_t refmpn_divrem_hensel_qr_1(mp_ptr rp, mp_srcptr sp, mp_size_t size,mp_limb_t divisor);\nmp_limb_t refmpn_divrem_hensel_rsh_qr_1(mp_ptr rp, mp_srcptr sp, mp_size_t size,mp_limb_t divisor,int);\nmp_limb_t refmpn_rsh_divrem_hensel_qr_1(mp_ptr rp, mp_srcptr sp, mp_size_t size,mp_limb_t divisor,int,mp_limb_t);\nmp_limb_t refmpn_divrem_hensel_r_1(mp_srcptr sp, mp_size_t size,mp_limb_t divisor);\n\nmp_limb_t refmpn_divmod_1c(mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t carry);\nmp_limb_t refmpn_divrem_1(mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor);\nmp_limb_t refmpn_divrem_1c(mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t carry);\n\nint refmpn_equal_anynail(mp_srcptr, mp_srcptr, mp_size_t);\n\nvoid refmpn_fill(mp_ptr p, mp_size_t s, mp_limb_t v);\n\nmp_limb_t refmpn_gcd_1(mp_srcptr xp, mp_size_t xsize, mp_limb_t y);\nmp_limb_t refmpn_gcd(mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize);\n\nsize_t refmpn_get_str(unsigned char *, int, mp_ptr, mp_size_t);\n\nunsigned long refmpn_hamdist(mp_srcptr s1p, mp_srcptr s2p, mp_size_t size);\n\nmp_limb_t refmpn_invert_limb(mp_limb_t d);\nvoid refmpn_ior_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\nvoid refmpn_iorn_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\n\nmp_limb_t refmpn_lshift(mp_ptr wp, mp_srcptr xp, mp_size_t size, unsigned shift);\nmp_limb_t refmpn_lshiftc(mp_ptr wp, mp_srcptr xp, mp_size_t size, unsigned shift);\nmp_limb_t refmpn_lshift_or_copy(mp_ptr wp, mp_srcptr xp, mp_size_t size, unsigned shift);\nmp_limb_t refmpn_lshift_or_copy_any(mp_ptr wp, mp_srcptr xp, mp_size_t size, unsigned shift);\nmp_limb_t refmpn_lshift1(mp_ptr rp, mp_srcptr xp, mp_size_t n);\nmp_limb_t refmpn_lshift2(mp_ptr rp, mp_srcptr xp, mp_size_t n);\n\nmp_limb_t refmpn_double(mp_ptr rp, mp_size_t n);\nmp_limb_t refmpn_half(mp_ptr rp,  mp_size_t n);\n\nmp_ptr refmpn_malloc_limbs(mp_size_t size);\nmp_ptr refmpn_malloc_limbs_aligned(mp_size_t n, size_t m);\nvoid refmpn_free_limbs(mp_ptr p);\nmp_limb_t refmpn_msbone(mp_limb_t x);\nmp_limb_t refmpn_msbone_mask(mp_limb_t x);\nmp_ptr refmpn_memdup_limbs(mp_srcptr ptr, mp_size_t size);\n\nmp_limb_t refmpn_mod_1(mp_srcptr sp, mp_size_t size, mp_limb_t divisor);\nmp_limb_t refmpn_mod_1c(mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t carry);\nmp_limb_t refmpn_mod_34lsub1(mp_srcptr p, mp_size_t n);\n\nmp_limb_t refmpn_mul_1(mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t multiplier);\nmp_limb_t refmpn_mul_1c(mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t multiplier, mp_limb_t carry);\nmp_limb_t refmpn_mul_2(mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult);\n\nvoid refmpn_mul_basecase(mp_ptr prodp, mp_srcptr up, mp_size_t usize, mp_srcptr vp, mp_size_t vsize);\nvoid refmpn_mul_any(mp_ptr prodp, mp_srcptr up, mp_size_t usize, mp_srcptr vp, mp_size_t vsize);\nvoid refmpn_mul_n(mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size);\nvoid refmpn_mulmid_basecase(mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn);\nvoid refmpn_mulmid(mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn);\nvoid refmpn_mulmid_n(mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n);\nvoid refmpn_nand_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\nvoid refmpn_nior_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\nmp_limb_t refmpn_neg_n(mp_ptr dst, mp_srcptr src, mp_size_t size);\nmp_size_t refmpn_normalize(mp_srcptr, mp_size_t);\n\nunsigned long refmpn_popcount(mp_srcptr sp, mp_size_t size);\nmp_limb_t refmpn_preinv_divrem_1(mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse, unsigned shift);\nmp_limb_t refmpn_preinv_mod_1(mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t divisor_inverse);\n\nvoid refmpn_random(mp_ptr, mp_size_t);\nvoid refmpn_random2(mp_ptr, mp_size_t);\nmp_limb_t refmpn_random_limb(void);\n\nmp_limb_t refmpn_rsh1add_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\nmp_limb_t refmpn_rsh1sub_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\nmp_limb_t refmpn_rshift(mp_ptr wp, mp_srcptr xp, mp_size_t size, unsigned shift);\nmp_limb_t refmpn_rshift_or_copy(mp_ptr wp, mp_srcptr xp, mp_size_t size, unsigned shift);\nmp_limb_t refmpn_rshift_or_copy_any(mp_ptr wp, mp_srcptr xp, mp_size_t size, unsigned shift);\nmp_limb_t refmpn_rshift1(mp_ptr rp, mp_srcptr xp, mp_size_t n);\nmp_limb_t refmpn_rshift2(mp_ptr rp, mp_srcptr xp, mp_size_t n);\n\nmp_limb_t refmpn_sb_divrem_mn(mp_ptr qp, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize);\nunsigned long refmpn_scan0(mp_srcptr, unsigned long);\nunsigned long refmpn_scan1(mp_srcptr, unsigned long);\nvoid refmpn_setbit(mp_ptr, unsigned long);\nvoid refmpn_sqr(mp_ptr dst, mp_srcptr src, mp_size_t size);\nmp_size_t refmpn_sqrtrem(mp_ptr, mp_ptr, mp_srcptr, mp_size_t);\n\nvoid refmpn_sub_ddmmss(mp_limb_t *, mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);\nmp_limb_t refmpn_sub(mp_ptr rp, mp_srcptr s1p, mp_size_t s1size, mp_srcptr s2p, mp_size_t s2size);\nmp_limb_t refmpn_sub_1(mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n);\nmp_limb_t refmpn_sub_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\nmp_limb_t refmpn_sub_nc(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size, mp_limb_t carry);\nmp_limb_t refmpn_sublsh1_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\nmp_limb_t refmpn_submul_1(mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t multiplier);\nmp_limb_t refmpn_submul_1c(mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t multiplier, mp_limb_t carry);\n\nvoid refmpn_tdiv_qr(mp_ptr qp, mp_ptr rp, mp_size_t qxn, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize);\nvoid refmpn_tdiv_q(mp_ptr qp, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize);\nint refmpn_tstbit(mp_srcptr, unsigned long);\n\nmp_limb_t refmpn_udiv_qrnnd(mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t);\nmp_limb_t refmpn_udiv_qrnnd_r(mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t *);\nmp_limb_t refmpn_umul_ppmm(mp_limb_t *, mp_limb_t, mp_limb_t);\nmp_limb_t refmpn_umul_ppmm_r(mp_limb_t, mp_limb_t, mp_limb_t *);\n\nvoid refmpn_xnor_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\nvoid refmpn_xor_n(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\n\nvoid refmpn_zero(mp_ptr p, mp_size_t s);\nvoid refmpn_zero_extend(mp_ptr, mp_size_t, mp_size_t);\nint refmpn_zero_p(mp_srcptr ptr, mp_size_t size);\nvoid refmpn_store(mp_ptr p, mp_size_t s,mp_limb_t);\n\nvoid refmpq_add(mpq_ptr w, mpq_srcptr x, mpq_srcptr y);\nvoid refmpq_sub(mpq_ptr w, mpq_srcptr x, mpq_srcptr y);\n\n\nvoid refmpz_combit(mpz_ptr r, unsigned long bit);\nunsigned long refmpz_hamdist(mpz_srcptr x, mpz_srcptr y);\nint refmpz_kronecker(mpz_srcptr a_orig, mpz_srcptr b_orig);\nint refmpz_jacobi(mpz_srcptr a_orig, mpz_srcptr b_orig);\nint refmpz_legendre(mpz_srcptr a_orig, mpz_srcptr b_orig);\nint refmpz_kronecker_si(mpz_srcptr, long);\nint refmpz_kronecker_ui(mpz_srcptr, unsigned long);\nint refmpz_si_kronecker(long, mpz_srcptr);\nint refmpz_ui_kronecker(unsigned long, mpz_srcptr);\n\nvoid refmpz_pow_ui(mpz_ptr w, mpz_srcptr b, unsigned long e);\n\nvoid refmpn_redc_1(mp_ptr cp, mp_ptr tp,mp_srcptr mp, mp_size_t n, mp_limb_t Nd);\n\n#if defined (__cplusplus)\n}\n#endif\n\n\n/* Establish ostringstream and istringstream.  Do this here so as to hide\n   the conditionals, rather than putting stuff in each test program.\n\n   Oldish versions of g++, like 2.95.2, don't have <sstream>, only\n   <strstream>.  Fake up ostringstream and istringstream classes, but not a\n   full implementation, just enough for our purposes.  */\n\n#ifdef __cplusplus\n#if HAVE_SSTREAM\n#include <sstream>\n#else /* ! HAVE_SSTREAM */\n#include <string>\n#include <strstream>\nclass\nostringstream : public std::ostrstream {\n public:\n  string str() {\n    int  pcount = ostrstream::pcount ();\n    char *s = (char *) (*__gmp_allocate_func) (pcount + 1);\n    memcpy (s, ostrstream::str(), pcount);\n    s[pcount] = '\\0';\n    string ret = string(s);\n    (*__gmp_free_func) (s, pcount + 1);\n    return ret; }\n};\nclass\nistringstream : public std::istrstream {\n public:\n  istringstream (const char *s) : istrstream (s) { };\n};\n#endif /* ! HAVE_SSTREAM */\n#endif /* __cplusplus */\n\n\n#endif /* __TESTS_H__ */\n"
  },
  {
    "path": "tests/trace.c",
    "content": "/* Support for diagnostic traces.\n\nCopyright 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,\nInc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n/* Future: Would like commas printed between limbs in hex or binary, but\n   perhaps not always since it might upset cutting and pasting into bc or\n   whatever.  */\n\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h> /* for strlen */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#include \"tests.h\"\n\n\n/* Number base for the various trace printing routines.\n   Set this in main() or with the debugger.\n   If hexadecimal is going to be fed into GNU bc, remember to use -16\n   because bc requires upper case.  */\n\nint  mp_trace_base = 10;\n\n\nvoid\nmp_trace_start (const char *name)\n{\n  if (name != NULL && name[0] != '\\0')\n    printf (\"%s=\", name);\n\n  switch (ABS (mp_trace_base)) {\n  case  2: printf (\"bin:\");                         break;\n  case  8: printf (\"oct:\");                         break;\n  case 10:                                          break;\n  case 16: printf (\"0x\");                           break;\n  default: printf (\"base%d:\", ABS (mp_trace_base)); break;\n  }\n}\n\n/* Print \"name=value\\n\" to stdout for an mpq_t value.  */\nvoid\nmpq_trace (const char *name, mpq_srcptr q)\n{\n  mp_trace_start (name);\n  if (q == NULL)\n    {\n      printf (\"NULL\\n\");\n      return;\n    }\n\n  mpq_out_str (stdout, mp_trace_base, q);\n  printf (\"\\n\");\n}\n\n\n/* Print \"name=value\\n\" to stdout for an mpz_t value.  */\nvoid\nmpz_trace (const char *name, mpz_srcptr z)\n{\n  mpq_t      q;\n  mp_limb_t  one;\n\n  if (z == NULL)\n    {\n      mpq_trace (name, NULL);\n      return;\n    }\n\n  q->_mp_num._mp_alloc = ALLOC(z);\n  q->_mp_num._mp_size = SIZ(z);\n  q->_mp_num._mp_d = PTR(z);\n\n  one = 1;\n  q->_mp_den._mp_alloc = 1;\n  q->_mp_den._mp_size = 1;\n  q->_mp_den._mp_d = &one;\n\n  mpq_trace(name, q);\n}\n\n\n/* Print \"name=value\\n\" to stdout for an mpf_t value. */\nvoid\nmpf_trace (const char *name, mpf_srcptr f)\n{\n  mp_trace_start (name);\n  if (f == NULL)\n    {\n      printf (\"NULL\\n\");\n      return;\n    }\n\n  mpf_out_str (stdout, ABS (mp_trace_base), 0, f);\n  printf (\"\\n\");\n}\n\n\n/* Print \"namenum=value\\n\" to stdout for an mpz_t value.\n   \"name\" should have a \"%d\" to get the number. */\nvoid\nmpz_tracen (const char *name, int num, mpz_srcptr z)\n{\n  if (name != NULL && name[0] != '\\0')\n    {\n      printf (name, num);\n      putchar ('=');\n    }\n  mpz_trace (NULL, z);\n}\n\n\n/* Print \"name=value\\n\" to stdout for an mpn style ptr,size. */\nvoid\nmpn_trace (const char *name, mp_srcptr ptr, mp_size_t size)\n{\n  mpz_t  z;\n  if (ptr == NULL)\n    {\n      mpz_trace (name, NULL);\n      return;\n    }\n  MPN_NORMALIZE (ptr, size);\n  PTR(z) = (mp_ptr) ptr;\n  SIZ(z) = size;\n  ALLOC(z) = size;\n  mpz_trace (name, z);\n}\n\n/* Print \"name=value\\n\" to stdout for a limb, nail doesn't have to be zero. */\nvoid\nmp_limb_trace (const char *name, mp_limb_t n)\n{\n#if GMP_NAIL_BITS != 0\n  mp_limb_t  a[2];\n  a[0] = n & GMP_NUMB_MASK;\n  a[1] = n >> GMP_NUMB_BITS;\n  mpn_trace (name, a, (mp_size_t) 2);\n#else\n  mpn_trace (name, &n, (mp_size_t) 1);\n#endif\n}\n\n\n/* Print \"namenum=value\\n\" to stdout for an mpn style ptr,size.\n   \"name\" should have a \"%d\" to get the number.  */\nvoid\nmpn_tracen (const char *name, int num, mp_srcptr ptr, mp_size_t size)\n{\n  if (name != NULL && name[0] != '\\0')\n    {\n      printf (name, num);\n      putchar ('=');\n    }\n  mpn_trace (NULL, ptr, size);\n}\n\n\n/* Print \"namenum=value\\n\" to stdout for an array of mpn style ptr,size.\n\n   \"a\" is an array of pointers, each a[i] is a pointer to \"size\" many limbs.\n   The formal parameter isn't mp_srcptr because that causes compiler\n   warnings, but the values aren't modified.\n\n   \"name\" should have a printf style \"%d\" to get the array index.  */\n\nvoid\nmpn_tracea (const char *name, const mp_ptr *a, int count, mp_size_t size)\n{\n  int i;\n  for (i = 0; i < count; i++)\n    mpn_tracen (name, i, a[i], size);\n}\n\n\n/* Print \"value\\n\" to a file for an mpz_t value.  Any previous contents of\n   the file are overwritten, so you need different file names each time this\n   is called.\n\n   Overwriting the file is a feature, it means you get old data replaced\n   when you run a test program repeatedly.  */\n\nvoid\nmpn_trace_file (const char *filename, mp_srcptr ptr, mp_size_t size)\n{\n  FILE   *fp;\n  mpz_t  z;\n\n  fp = fopen (filename, \"w\");\n  if (fp == NULL)\n    {\n      perror (\"fopen\");\n      abort();\n    }\n\n  MPN_NORMALIZE (ptr, size);\n  PTR(z) = (mp_ptr) ptr;\n  SIZ(z) = (int) size;\n\n  mpz_out_str (fp, mp_trace_base, z);\n  fprintf (fp, \"\\n\");\n\n  if (ferror (fp) || fclose (fp) != 0)\n    {\n      printf (\"error writing %s\\n\", filename);\n      abort();\n    }\n}\n\n\n/* Print \"value\\n\" to a set of files, one file for each element of the given\n   array of mpn style ptr,size.  Any previous contents of the files are\n   overwritten, so you need different file names each time this is called.\n   Each file is \"filenameN\" where N is 0 to count-1.\n\n   \"a\" is an array of pointers, each a[i] is a pointer to \"size\" many limbs.\n   The formal parameter isn't mp_srcptr because that causes compiler\n   warnings, but the values aren't modified.\n\n   Overwriting the files is a feature, it means you get old data replaced\n   when you run a test program repeatedly.  The output style isn't\n   particularly pretty, but at least it gets something out, and you can cat\n   the files into bc, or whatever. */\n\nvoid\nmpn_tracea_file (const char *filename,\n                 const mp_ptr *a, int count, mp_size_t size)\n{\n  char  *s;\n  int   i;\n  TMP_DECL;\n\n  TMP_MARK;\n  s = (char *) TMP_ALLOC (strlen (filename) + 50);\n\n  for (i = 0; i < count; i++)\n    {\n      sprintf (s, \"%s%d\", filename, i);\n      mpn_trace_file (s, a[i], size);\n    }\n\n  TMP_FREE;\n}\n\n\nvoid\nbyte_trace (const char *name, const void *ptr, mp_size_t size)\n{\n  char       *fmt;\n  mp_size_t  i;\n\n  mp_trace_start (name);\n\n  switch (mp_trace_base) {\n  case   8: fmt = \" %o\"; break;\n  case  10: fmt = \" %d\"; break;\n  case  16: fmt = \" %x\"; break;\n  case -16: fmt = \" %X\"; break;\n  default: printf (\"Oops, unsupported base in byte_trace\\n\"); abort (); break;\n  }\n\n  for (i = 0; i < size; i++)\n    printf (fmt, (int) ((unsigned char *) ptr)[i]);\n  printf (\"\\n\");\n}\n\nvoid\nbyte_tracen (const char *name, int num, const void *ptr, mp_size_t size)\n{\n  if (name != NULL && name[0] != '\\0')\n    {\n      printf (name, num);\n      putchar ('=');\n    }\n  byte_trace (NULL, ptr, size);\n}\n\n\nvoid\nd_trace (const char *name, double d)\n{\n  union {\n    double         d;\n    unsigned char  b[sizeof(double)];\n  } u;\n  int  i;\n\n  if (name != NULL && name[0] != '\\0')\n    printf (\"%s=\", name);\n\n  u.d = d;\n  printf (\"[\");\n  for (i = 0; i < sizeof (u.b); i++)\n    {\n      if (i != 0)\n        printf (\" \");\n      printf (\"%02X\", (int) u.b[i]);\n    }\n  printf (\"] %.20g\\n\", d);\n}\n"
  },
  {
    "path": "tests/x86_64call.as",
    "content": ";  AMD64 and Core 2 calling conventions checking.\n\n;  Copyright 2000, 2003, 2004 Free Software Foundation, Inc.\n;\n;  Copyright 2008 Jason Worth-Martin\n; \n;  This file is part of the MPIR Library.\n; \n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n; \n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n; \n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n\n\n%include 'yasm_mac.inc'\n\n%ifdef GSYM_PREFIX\n%define G_EX(a) _ %+ a\n%else\n%define G_EX(a) a\n%endif\n\nG_EXTERN calling_conventions_save_rbx\nG_EXTERN calling_conventions_save_rbp\nG_EXTERN calling_conventions_save_r12\nG_EXTERN calling_conventions_save_r13\nG_EXTERN calling_conventions_save_r14\nG_EXTERN calling_conventions_save_r15\nG_EXTERN calling_conventions_function\nG_EXTERN calling_conventions_rbx\nG_EXTERN calling_conventions_rbp\nG_EXTERN calling_conventions_r12\nG_EXTERN calling_conventions_r13\nG_EXTERN calling_conventions_r14\nG_EXTERN calling_conventions_r15\nG_EXTERN calling_conventions_rflags\nG_EXTERN calling_conventions_fenv\nG_EXTERN calling_conventions_retaddr\n\n\n     BITS 64\n\n\n; int calling_conventions (...);\n; \n; The global variable \"calling_conventions_function\" is the function to\n; call, with the arguments as passed here.\n; \n; Perhaps the finit should be done only if the tags word isn't clear, but\n; nothing uses the rounding mode or anything at the moment.\n\n\n     default rel\n     ALIGN\t32\nGLOBAL_FUNC calling_conventions\n     mov\trax, [rsp]\n     mov\t[G_EX(calling_conventions_retaddr)], rax\n\n     lea\trax, [return]\n     mov\t[rsp], rax\n\n     mov\t[G_EX(calling_conventions_save_rbx)], rbx\n     mov\t[G_EX(calling_conventions_save_rbp)], rbp\n     mov\t[G_EX(calling_conventions_save_r12)], r12\n     mov\t[G_EX(calling_conventions_save_r13)], r13\n     mov\t[G_EX(calling_conventions_save_r14)], r14\n     mov\t[G_EX(calling_conventions_save_r15)], r15\n\n     mov\trbx, [G_EX(calling_conventions_save_rbx)]\n     mov\trbp, [G_EX(calling_conventions_save_rbp)]\n     mov\tr12, [G_EX(calling_conventions_save_r12)]\n     mov\tr13, [G_EX(calling_conventions_save_r13)]\n     mov\tr14, [G_EX(calling_conventions_save_r14)]\n     mov\tr15, [G_EX(calling_conventions_save_r15)]\n\n; values we expect to see unchanged, as per amd64check.c\n\n     mov\trbx, 0x1234567887654321\n     mov\trbp, 0x89ABCDEFFEDCBA98\n     mov\tr12, 0xDEADBEEFBADECAFE\n     mov\tr13, 0xFFEEDDCCBBAA9988\n     mov\tr14, 0x0011223344556677\n     mov\tr15, 0x1234432156788765\n\n; Try to provoke a problem by starting with junk in the registers,\n; especially %rax which will be the return value.\n;\n; ENHANCE-ME: If we knew how many of the parameter registers were\n; actually being used we could put junk in the rest.  Maybe we could\n; get try.c to communicate this to us.\n; \n\n     mov\trax, 0xFEEDABBACAAFBEED\n     mov\tr10, 0xAB78DE89FF5125BB\n     mov\tr11, 0x1238901890189031\n\n     jmp\t[G_EX(calling_conventions_function)]\n\nreturn:\n     mov\t[G_EX(calling_conventions_rbx)], rbx\n     mov\t[G_EX(calling_conventions_rbp)], rbp\n     mov\t[G_EX(calling_conventions_r12)], r12\n     mov\t[G_EX(calling_conventions_r13)], r13\n     mov\t[G_EX(calling_conventions_r14)], r14\n     mov\t[G_EX(calling_conventions_r15)], r15\n\n     pushf\n     pop\trbx\n     mov\t[G_EX(calling_conventions_rflags)], rbx\n\n     fstenv\t[G_EX(calling_conventions_fenv)]\n     finit\n\n     mov\trbx, [G_EX(calling_conventions_save_rbx)]\n     mov\trbp, [G_EX(calling_conventions_save_rbp)]\n     mov\tr12, [G_EX(calling_conventions_save_r12)]\n     mov\tr13, [G_EX(calling_conventions_save_r13)]\n     mov\tr14, [G_EX(calling_conventions_save_r14)]\n     mov\tr15, [G_EX(calling_conventions_save_r15)]\n\n     jmp\t[G_EX(calling_conventions_retaddr)]\n"
  },
  {
    "path": "tests/x86_64check.c",
    "content": "/* AMD64 and Core 2 calling conventions checking.\n\nCopyright 2000, 2001, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* temporaries */\nlong  calling_conventions_save_rbx;\nlong  calling_conventions_save_rbp;\nlong  calling_conventions_save_r12;\nlong  calling_conventions_save_r13;\nlong  calling_conventions_save_r14;\nlong  calling_conventions_save_r15;\nlong  calling_conventions_retaddr;\nlong  calling_conventions_retval;\n\n/* values to check */\nstruct {\n  int  control;\n  int  status;\n  int  tag;\n  int  other[4];\n} calling_conventions_fenv;\nmpir_si  calling_conventions_rbx;\nmpir_si  calling_conventions_rbp;\nmpir_si  calling_conventions_r12;\nmpir_si  calling_conventions_r13;\nmpir_si  calling_conventions_r14;\nmpir_si  calling_conventions_r15;\nmpir_si  calling_conventions_rflags;\n\n/* expected values, as per x86_64call.asm */\nconst mpir_si  calling_conventions_want_rbx = CNST_LIMB(0x1234567887654321);\nconst mpir_si  calling_conventions_want_rbp = CNST_LIMB(0x89ABCDEFFEDCBA98);\nconst mpir_si  calling_conventions_want_r12 = CNST_LIMB(0xDEADBEEFBADECAFE);\nconst mpir_si  calling_conventions_want_r13 = CNST_LIMB(0xFFEEDDCCBBAA9988);\nconst mpir_si  calling_conventions_want_r14 = CNST_LIMB(0x0011223344556677);\nconst mpir_si  calling_conventions_want_r15 = CNST_LIMB(0x1234432156788765);\n\n#define DIR_BIT(rflags)   (((rflags) & (1<<10)) != 0)\n\n\n/* Return 1 if ok, 0 if not */\n\nint\ncalling_conventions_check (void)\n{\n  const char  *header = \"Violated calling conventions:\\n\";\n  int  ret = 1;\n\n#define CHECK(callreg, regstr, value)                   \\\n  if (callreg != value)                                 \\\n    {                                                   \\\n      printf (\"%s   %s  got 0x%016lX want 0x%016lX\\n\",  \\\n              header, regstr, callreg, value);          \\\n      header = \"\";                                      \\\n      ret = 0;                                          \\\n    }\n\n  CHECK (calling_conventions_rbx, \"rbx\", calling_conventions_want_rbx);\n  CHECK (calling_conventions_rbp, \"rbp\", calling_conventions_want_rbp);\n  CHECK (calling_conventions_r12, \"r12\", calling_conventions_want_r12);\n  CHECK (calling_conventions_r13, \"r13\", calling_conventions_want_r13);\n  CHECK (calling_conventions_r14, \"r14\", calling_conventions_want_r14);\n  CHECK (calling_conventions_r15, \"r15\", calling_conventions_want_r15);\n\n  if (DIR_BIT (calling_conventions_rflags) != 0)\n    {\n      printf (\"%s   rflags dir bit  got %d want 0\\n\",\n              header, DIR_BIT (calling_conventions_rflags));\n      header = \"\";\n      ret = 0;\n    }\n\n  if ((calling_conventions_fenv.tag & 0xFFFF) != 0xFFFF)\n    {\n      printf (\"%s   fpu tags  got %x want 0xFFFF\\n\",\n              header, (unsigned int) calling_conventions_fenv.tag & 0xFFFF);\n      header = \"\";\n      ret = 0;\n    }\n\n  return ret;\n}\n"
  },
  {
    "path": "tests/x86call.asm",
    "content": "dnl  x86 calling conventions checking.\n\ndnl  Copyright 2000, 2003 Free Software Foundation, Inc.\ndnl \ndnl  This file is part of the GNU MP Library.\ndnl \ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl \ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl \ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\n\ndnl  Instrumented profiling doesn't come out quite right below, since we\ndnl  don't do an actual \"ret\".  There's only a few instructions here, so\ndnl  there's no great need to get them separately accounted, just let them\ndnl  get attributed to the caller.\n\nifelse(WANT_PROFILING,instrument,\n`define(`WANT_PROFILING',no)')\n\n\nC int calling_conventions (...);\nC\nC The global variable \"calling_conventions_function\" is the function to\nC call, with the arguments as passed here.\nC\nC Perhaps the finit should be done only if the tags word isn't clear, but\nC nothing uses the rounding mode or anything at the moment.\n\ndefine(G,\nm4_assert_numargs(1)\n`GSYM_PREFIX`'$1')\n\n\t.text\n\tALIGN(8)\nPROLOGUE(calling_conventions)\n\tmovl\t(%esp), %eax\n\tmovl\t%eax, G(calling_conventions_retaddr)\n\n\tmovl\t$L(return), (%esp)\n\n\tmovl\t%ebx, G(calling_conventions_save_ebx)\n\tmovl\t%esi, G(calling_conventions_save_esi)\n\tmovl\t%edi, G(calling_conventions_save_edi)\n\tmovl\t%ebp, G(calling_conventions_save_ebp)\n\n\tmovl\t$0x01234567, %ebx\n\tmovl\t$0x89ABCDEF, %esi\n\tmovl\t$0xFEDCBA98, %edi\n\tmovl\t$0x76543210, %ebp\n\n\tC try to provoke a problem by starting with junk in the registers,\n\tC especially in %eax and %edx which will be return values\n\tmovl\t$0x70246135, %eax\n\tmovl\t$0x8ACE9BDF, %ecx\n\tmovl\t$0xFDB97531, %edx\n\n\tjmp\t*G(calling_conventions_function)\n\nL(return):\n\tmovl\t%ebx, G(calling_conventions_ebx)\n\tmovl\t%esi, G(calling_conventions_esi)\n\tmovl\t%edi, G(calling_conventions_edi)\n\tmovl\t%ebp, G(calling_conventions_ebp)\n\n\tpushf\n\tpopl\t%ebx\n\tmovl\t%ebx, G(calling_conventions_eflags)\n\n\tfstenv\tG(calling_conventions_fenv)\n\tfinit\n\n\tmovl\tG(calling_conventions_save_ebx), %ebx\n\tmovl\tG(calling_conventions_save_esi), %esi\n\tmovl\tG(calling_conventions_save_edi), %edi\n\tmovl\tG(calling_conventions_save_ebp), %ebp\n\n\tjmp\t*G(calling_conventions_retaddr)\n\nEPILOGUE()\n\nC void gmp_x86check_workaround_apple_ld_bug() \nC \nC Apple ld has an annoying bug that causes it to only load members from \nC static archives that satisfy text symbol dependencies.  This procedure \nC creates a bogus dependency on a text symbol in x86check.o (in libtests.a) \nC to ensure that ld loads it, also making all of the needed non-text \nC symbols available. \nPROLOGUE(gmp_x86check_workaround_apple_ld_bug) \n       jmp     *G(calling_conventions_check) \nEPILOGUE() \n"
  },
  {
    "path": "tests/x86check.c",
    "content": "/* x86 calling conventions checking. */\n\n/*\nCopyright 2000, 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"tests.h\"\n\n\n/* temporaries */\nint  calling_conventions_save_ebx;\nint  calling_conventions_save_esi;\nint  calling_conventions_save_edi;\nint  calling_conventions_save_ebp;\nint  calling_conventions_retaddr;\nint  calling_conventions_retval;\n\n/* values to check */\nstruct {\n  unsigned  control;\n  unsigned  status;\n  unsigned  tag;\n  unsigned  other[4];\n} calling_conventions_fenv;\nint  calling_conventions_ebx;\nint  calling_conventions_esi;\nint  calling_conventions_edi;\nint  calling_conventions_ebp;\nint  calling_conventions_eflags;\n\n/* expected values, as per x86call.asm */\n#define VALUE_EBX   0x01234567\n#define VALUE_ESI   0x89ABCDEF\n#define VALUE_EDI   0xFEDCBA98\n#define VALUE_EBP   0x76543210\n\n#define DIR_BIT(eflags)   (((eflags) & (1<<10)) != 0)\n\n\n/* Return 1 if ok, 0 if not */\n\nint\ncalling_conventions_check (void)\n{\n  const char  *header = \"Violated calling conventions:\\n\";\n  int  ret = 1;\n\n#define CHECK(callreg, regstr, value)                   \\\n  if (callreg != value)                                 \\\n    {                                                   \\\n      printf (\"%s   %s  got 0x%08X want 0x%08X\\n\",      \\\n              header, regstr, callreg, value);          \\\n      header = \"\";                                      \\\n      ret = 0;                                          \\\n    }\n\n  CHECK (calling_conventions_ebx, \"ebx\", VALUE_EBX);\n  CHECK (calling_conventions_esi, \"esi\", VALUE_ESI);\n  CHECK (calling_conventions_edi, \"edi\", VALUE_EDI);\n  CHECK (calling_conventions_ebp, \"ebp\", VALUE_EBP);\n\n  if (DIR_BIT (calling_conventions_eflags) != 0)\n    {\n      printf (\"%s   eflags dir bit  got %d want 0\\n\",\n              header, DIR_BIT (calling_conventions_eflags));\n      header = \"\";\n      ret = 0;\n    }\n\n  if ((calling_conventions_fenv.tag & 0xFFFF) != 0xFFFF)\n    {\n      printf (\"%s   fpu tags  got 0x%X want 0xFFFF\\n\",\n              header, calling_conventions_fenv.tag & 0xFFFF);\n      header = \"\";\n      ret = 0;\n    }\n\n  return ret;\n}\n"
  },
  {
    "path": "tune/Makefile.am",
    "content": "## Process this file with automake to generate Makefile.in\n\n# Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009,\n# 2010, 2011 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 3 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.\n\n\nAM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/tests\n\nEXTRA_DIST = alpha.asm pentium.asm sparcv9.asm hppa.asm hppa2.asm hppa2w.asm \\\n  ia64.asm powerpc.asm powerpc64.asm x86_64.as many.pl\nnoinst_HEADERS = speed.h\n\n# Prefer -static on the speed and tune programs, since that can avoid\n# overheads of shared library linkages on some systems.  Libtool tends to\n# botch -static if configured with --disable-static, perhaps reasonably\n# enough.  In any event under --disable-static the only choice is a dynamic\n# link so there's no point in -static.\n#\nif ENABLE_STATIC\nSTATIC = -static\nelse\nSTATIC =\nendif\n\n\nEXTRA_LTLIBRARIES = libspeed.la\n\nlibspeed_la_SOURCES =\t\t\t\t\t\t\t\\\n  common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c\t\t\\\n  freq.c \t\t\t\t\\\n  gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c\t\t\t\\\n  hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c      \\\n  jacbase1.c jacbase2.c jacbase3.c jacbase4.c\t\t\t\t\\\n  mod_1_div.c mod_1_inv.c modlinv.c\t\t\t\t\t\\\n  noop.c powm_mod.c powm_redc.c preinv_divrem_1.c\t\t\t\t\\\n  fac_ui_large.c fac_ui_small.c\t\t\t\t\t\t\\\n  set_strb.c set_strs.c set_strp.c time.c\n\nlibspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \\\n  $(top_builddir)/tests/libtests.la $(top_builddir)/libmpir.la\nlibspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(LIBM)\nlibspeed_la_LDFLAGS = $(STATIC)\n\n$(top_builddir)/tests/libtests.la:\n\tcd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la\n\n\n# The library code is faster static than shared on some systems, so do\n# tuning and measuring with static, since users who care about maximizing\n# speed will be using that.  speed-dynamic exists to show the difference.\n#\n# On Solaris 8, gcc 2.95.2 -static is somehow broken (it creates executables\n# that immediately seg fault), so -all-static is not used.  The only thing\n# -all-static does is make libc static linked as well as libmpir, and that\n# makes a difference only when measuring malloc and friends in the speed\n# program.  This can always be forced with \"make speed_LDFLAGS=-all-static\n# ...\" if desired, see tune/README.\n\nEXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup\n\nDEPENDENCIES = libspeed.la\nLDADD = $(DEPENDENCIES)\n\nspeed_SOURCES = speed.c\nspeed_LDFLAGS = $(STATIC)\n\nspeed_dynamic_SOURCES = speed.c\n\nspeed_ext_SOURCES = speed-ext.c\nspeed_ext_LDFLAGS = $(STATIC)\n\ntuneup_SOURCES = tuneup.c\nnodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS)\ntuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la\ntuneup_LDADD = $(tuneup_DEPENDENCIES)\ntuneup_LDFLAGS = $(STATIC)\n\n\ntune:\n\t$(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT)\n\t./tuneup\n\nallprogs: $(EXTRA_PROGRAMS)\n\n# $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl\nCLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \\\n\t$(TUNE_MPN_SRCS) sqr_asm.asm \\\n\tstg.gnuplot stg.data \\\n\tmtg.gnuplot mtg.data \\\n\tfibg.gnuplot fibg.data \\\n\tgraph.gnuplot graph.data \\\n\t$(MANY_CLEAN)\nDISTCLEANFILES = sqr_basecase.c fac_ui.c $(MANY_DISTCLEAN)\n\n\n# Generating these little files at build time seems better than including\n# them in the distribution, since the list can be changed more easily.\n#\n# mpn/generic/tdiv_qr.c uses mpn_divrem_1 and mpn_divrem_2, but only for 1\n# and 2 limb divisors, which are never used during tuning, so it doesn't\n# matter whether it picks up a tuned or untuned version of those.\n#\n# divrem_1 and mod_1 are recompiled renamed to \"_tune\" to avoid a linking\n# problem.  If a native divrem_1 provides an mpn_divrem_1c entrypoint then\n# common.c will want that, but the generic divrem_1 doesn't provide it,\n# likewise for mod_1.  The simplest way around this is to have the tune\n# build versions renamed suitably.\n#\n# FIXME: Would like say mul_n.c to depend on $(top_builddir)/mul_n.c so the\n# recompiled object will be rebuilt if that file changes.\n\nTUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) $(TUNE_FFT_SRCS_BASIC) divrem_1.c mod_1.c\nTUNE_MPN_SRCS_BASIC = divrem_2.c gcd.c gcdext.c get_str.c set_str.c \\\n  matrix22_mul.c hgcd.c hgcd_appr.c hgcd_reduce.c \\\n  mul_n.c mullow_n.c mulhigh_n.c mul.c tdiv_qr.c \\\n  mulmid.c mulmid_n.c toom42_mulmid.c \\\n  toom4_mul_n.c toom4_mul.c toom3_mul.c toom3_mul_n.c \\\n  toom8h_mul.c toom8_sqr_n.c mulmod_2expm1.c mulmod_2expp1_basecase.c \\\n  rootrem.c divrem_euclidean_r_1.c \\\n  divrem_hensel_qr_1.c rsh_divrem_hensel_qr_1.c sb_divappr_q.c sb_div_qr.c \\\n  dc_divappr_q.c dc_div_qr.c dc_div_qr_n.c \\\n  inv_divappr_q.c inv_div_qr.c tdiv_q.c dc_bdiv_qr.c dc_bdiv_qr_n.c dc_bdiv_q.c \nTUNE_FFT_SRCS_BASIC = split_bits.c revbin.c normmod_2expp1.c mulmod_2expp1.c \\\n  mul_trunc_sqrt2.c mul_mfa_trunc_sqrt2.c mul_fft_main.c \\\n  mul_2expmod_2expp1.c ifft_trunc_sqrt2.c ifft_trunc.c ifft_radix2.c \\\n  ifft_negacyclic.c fft_trunc.c fft_radix2.c fft_negacyclic.c \\\n  fft_mfa_trunc_sqrt2.c fft_mfa_trunc_sqrt2_inner.c fermat_to_mpz.c \\\n  div_2expmod_2expp1.c combine_bits.c butterfly_rshB.c butterfly_lshB.c \\\n  adjust_sqrt2.c adjust.c\n\n$(TUNE_MPN_SRCS_BASIC):\n\tfor i in $(TUNE_MPN_SRCS_BASIC); do \\\n\t  echo \"#define TUNE_PROGRAM_BUILD 1\" >$$i; \\\n\t  echo \"#include \\\"mpn/generic/$$i\\\"\" >>$$i; \\\n\tdone\n\n$(TUNE_FFT_SRCS_BASIC):\n\tfor i in $(TUNE_FFT_SRCS_BASIC); do \\\n\t  echo \"#define TUNE_PROGRAM_BUILD 1\" >$$i; \\\n\t  echo \"#include \\\"fft/$$i\\\"\" >>$$i; \\\n\tdone\n\ndivrem_1.c:\n\techo \"#define TUNE_PROGRAM_BUILD 1\"                >divrem_1.c\n\techo \"#define __gmpn_divrem_1  mpn_divrem_1_tune\" >>divrem_1.c\n\techo \"#include \\\"mpn/generic/divrem_1.c\\\"\"        >>divrem_1.c\n\nmod_1.c:\n\techo \"#define TUNE_PROGRAM_BUILD 1\"          >mod_1.c\n\techo \"#define __gmpn_mod_1  mpn_mod_1_tune\" >>mod_1.c\n\techo \"#include \\\"mpn/generic/mod_1.c\\\"\"     >>mod_1.c\n\nsqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm\n\techo 'define(SQR_KARATSUBA_THRESHOLD_OVERRIDE,SQR_KARATSUBA_THRESHOLD_MAX)' >sqr_asm.asm\n\techo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm\n\n# FIXME: Should it depend on $(top_builddir)/fac_ui.h too?\nfac_ui.c: $(top_builddir)/mpz/fac_ui.c\n\techo \"#define TUNE_PROGRAM_BUILD 1\"          >fac_ui.c\n\techo \"#define __gmpz_fac_ui mpz_fac_ui_tune\" >>fac_ui.c\n\techo \"#define __gmpz_oddfac_1 mpz_oddfac_1_tune\" >>fac_ui.c\n\techo \"#include \\\"mpz/oddfac_1.c\\\"\"           >>fac_ui.c\n\techo \"#include \\\"mpz/fac_ui.c\\\"\"             >>fac_ui.c\n\ninclude ../mpn/Makeasm.am\n"
  },
  {
    "path": "tune/README",
    "content": "Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n02110-1301, USA.\n\n\n\n\n\n               GMP SPEED MEASURING AND PARAMETER TUNING\n\n\nThe programs in this directory are for knowledgeable users who want to\nmeasure GMP routines on their machine, and perhaps tweak some settings or\nidentify things that can be improved.\n\nThe programs here are tools, not ready to run solutions.  Nothing is built\nin a normal \"make all\", but various Makefile targets described below exist.\n\nRelatively few systems and CPUs have been tested, so be sure to verify that\nresults are sensible before relying on them.\n\n\n\n\nMISCELLANEOUS NOTES\n\n--enable-assert\n\n    Don't configure with --enable-assert, since the extra code added by\n    assertion checking may influence measurements.\n\nDirect mapped caches\n\n    Some effort has been made to accommodate CPUs with direct mapped caches,\n    by putting data blocks more or less contiguously on the stack.  But this\n    will depend on TMP_ALLOC using alloca, and even then it may or may not\n    be enough.\n\nFreeBSD 4.2 i486 getrusage\n\n    This getrusage seems to be a bit doubtful, it looks like it's\n    microsecond accurate, but sometimes ru_utime remains unchanged after a\n    time of many microseconds has elapsed.  It'd be good to detect this in\n    the time.c initializations, but for now the suggestion is to pretend it\n    doesn't exist.\n\n        ./configure ac_cv_func_getrusage=no\n\nNetBSD 1.4.1 m68k macintosh time base\n\n    On this system it's been found getrusage often goes backwards, making it\n    unusable (time.c getrusage_backwards_p detects this).  gettimeofday\n    sometimes doesn't update atomically when it crosses a 1 second boundary.\n    Not sure what to do about this.  Expect possible intermittent failures.\n\nSCO OpenUNIX 8 /etc/hw\n\n    /etc/hw takes about a second to return the cpu frequency, which suggests\n    perhaps it's measuring each time it runs.  If this is annoying when\n    running the speed program repeatedly then set a GMP_CPU_FREQUENCY\n    environment variable (see TIME BASE section below).\n\nLow resolution timebase\n\n    Parameter tuning can be very time consuming if the only timebase\n    available is a 10 millisecond clock tick, to the point of being\n    unusable.  This is currently the case on ARM systems.\n\n\n\n\nPARAMETER TUNING\n\nThe \"tuneup\" program runs some tests designed to find the best settings for\nvarious thresholds, like MUL_KARATSUBA_THRESHOLD.  Its output can be put\ninto gmp-mparam.h.  The program is built and run with\n\n        make tune\n\nIf the thresholds indicated are grossly different from the values in the\nselected gmp-mparam.h then there may be a performance boost in applicable\nsize ranges by changing gmp-mparam.h accordingly.\n\nBe sure to do a full reconfigure and rebuild to get any newly set thresholds\nto take effect.  A partial rebuild is enough sometimes, but a fresh\nconfigure and make is certain to be correct.\n\nIf a CPU has specific tuned parameters coming from a gmp-mparam.h in one of\nthe mpn subdirectories then the values from \"make tune\" should be similar.\nBut check that the configured CPU is right and there are no machine specific\neffects causing a difference.\n\nIt's hoped the compiler and options used won't have too much effect on\nthresholds, since for most CPUs they ultimately come down to comparisons\nbetween assembler subroutines.  Missing out on the longlong.h macros by not\nusing gcc will probably have an effect.\n\nSome thresholds produced by the tune program are merely single values chosen\nfrom what's a range of sizes where two algorithms are pretty much the same\nspeed.  When this happens the program is likely to give somewhat different\nvalues on successive runs.  This is noticeable on the toom3 thresholds for\ninstance.\n\n\n\n\nSPEED PROGRAM\n\nThe \"speed\" program can be used for measuring and comparing various\nroutines, and producing tables of data or gnuplot graphs.  Compile it with\n\n\tmake speed\n\n(Or on DOS systems \"make speed.exe\".)\n\nHere are some examples of how to use it.  Check the code for all the\noptions.\n\nDraw a graph of mpn_mul_n, stepping through sizes by 10 or a factor of 1.05\n(whichever is greater).\n\n        ./speed -s 10-5000 -t 10 -f 1.05 -P foo mpn_mul_n\n\tgnuplot foo.gnuplot\n\nCompare mpn_add_n and an mpn_lshift by 1, showing times in cycles and\nshowing under mpn_lshift the difference between it and mpn_add_n.\n\n\t./speed -s 1-40 -c -d mpn_add_n mpn_lshift.1\n\nUsing option -c for times in cycles is interesting but normally only\nnecessary when looking carefully at assembler subroutines.  You might think\nit would always give an integer value, but this doesn't happen in practice,\nprobably due to overheads in the time measurements.\n\nIn the free-form output the \"#\" symbol against a measurement means the\ncorresponding routine is fastest at that size.  This is a convenient visual\ncue when comparing different routines.  The graph data files <name>.data\ndon't get this since it would upset gnuplot or other data viewers.\n\n\n\n\nTIME BASE\n\nThe time measuring method is determined in time.c, based on what the\nconfigured host has available.  A cycle counter is preferred, possibly\nsupplemented by another method if the counter has a limited range.  A\nmicrosecond accurate getrusage() or gettimeofday() will work quite well too.\n\nThe cycle counters (except possibly on alpha) and gettimeofday() will depend\non the machine being otherwise idle, or rather on other jobs not stealing\nCPU time from the measuring program.  Short routines (those that complete\nwithin a timeslice) should work even on a busy machine.\n\nSome trouble is taken by speed_measure() in common.c to avoid ill effects\nfrom sporadic interrupts, or other intermittent things (like cron waking up\nevery minute).  But generally an idle machine will be necessary to be\ncertain of consistent results.\n\nThe CPU frequency is needed to convert between cycles and seconds, or for\nwhen a cycle counter is supplemented by getrusage() etc.  The speed program\nwill convert as necessary according to the output format requested.  The\ntune program will work with either cycles or seconds.\n\nfreq.c knows how to get the frequency on some systems, or can measure a\ncycle counter against gettimeofday() or getrusage(), but when that fails, or\nneeds to be overridden, an environment variable GMP_CPU_FREQUENCY can be\nused (in Hertz).  For example in \"bash\" on a 650 MHz machine,\n\n\texport GMP_CPU_FREQUENCY=650e6\n\nA high precision time base makes it possible to get accurate measurements in\na shorter time.\n\n\n\n\nEXAMPLE COMPARISONS - VARIOUS\n\nHere are some ideas for things that can be done with the speed program.\n\nThere's always going to be a certain amount of overhead in the time\nmeasurements, due to reading the time base, and in the loop that runs a\nroutine enough times to get a reading of the desired precision.  Noop\nfunctions taking various arguments are available to measure this.  The\n\"overhead\" printed by the speed program each time in its intro is the \"noop\"\nroutine, but note that this is just for information, it isn't deducted from\nthe times printed or anything.\n\n\t./speed -s 1 noop noop_wxs noop_wxys\n\nTo see how many cycles per limb a routine is taking, look at the time\nincrease when the size increments, using option -D.  This avoids fixed\noverheads in the measuring.  Also, remember many of the assembler routines\nhave unrolled loops, so it might be necessary to compare times at, say, 16,\n32, 48, 64 etc to see what the unrolled part is taking, as opposed to any\nfinishing off.\n\n        ./speed -s 16-64 -t 16 -C -D mpn_add_n\n\nThe -C option on its own gives cycles per limb, but is really only useful at\nbig sizes where fixed overheads are small compared to the code doing the\nreal work.  Remember of course memory caching and/or page swapping will\naffect results at large sizes.\n\n        ./speed -s 500000 -C mpn_add_n\n\nOnce a calculation stops fitting in the CPU data cache, it's going to start\ntaking longer.  Exactly where this happens depends on the cache priming in\nthe measuring routines, and on what sort of \"least recently used\" the\nhardware does.  Here's an example for a CPU with a 16kbyte L1 data cache and\n32-bit limb, showing a suddenly steeper curve for mpn_add_n at about 2000\nlimbs.\n\n        ./speed -s 1-4000 -t 5 -f 1.02 -P foo mpn_add_n\n\tgnuplot foo.gnuplot\n\nWhen a routine has an unrolled loop for, say, multiples of 8 limbs and then\nan ordinary loop for the remainder, it can happen that it's actually faster\nto do an operation on, say, 8 limbs than it is on 7 limbs.  The following\ndraws a graph of mpn_sub_n, to see whether times smoothly increase with\nsize.\n\n        ./speed -s 1-100 -c -P foo mpn_sub_n\n\tgnuplot foo.gnuplot\n\nIf mpn_lshift and mpn_rshift have special case code for shifts by 1, it\nought to be faster (or at least not slower) than shifting by, say, 2 bits.\n\n        ./speed -s 1-200 -c mpn_rshift.1 mpn_rshift.2\n\nAn mpn_lshift by 1 can be done by mpn_add_n adding a number to itself, and\nif the lshift isn't faster there's an obvious improvement that's possible.\n\n        ./speed -s 1-200 -c mpn_lshift.1 mpn_add_n_self\n\nOn some CPUs (AMD K6 for example) an \"in-place\" mpn_add_n where the\ndestination is one of the sources is faster than a separate destination.\nHere's an example to see this.  \".1\" selects dst==src1 for mpn_add_n (and\nmpn_sub_n), for other values see speed.h SPEED_ROUTINE_MPN_BINARY_N_CALL.\n\n        ./speed -s 1-200 -c mpn_add_n mpn_add_n.1\n\nThe gmp manual points out that divisions by powers of two should be done\nusing a right shift because it'll be significantly faster than an actual\ndivision.  The following shows by what factor mpn_rshift is faster than\nmpn_divrem_1, using division by 32 as an example.\n\n        ./speed -s 10-20 -r mpn_rshift.5 mpn_divrem_1.32\n\n\n\n\nEXAMPLE COMPARISONS - MULTIPLICATION\n\nmul_basecase takes a \".<r>\" parameter which is the first (larger) size\nparameter.  For example to show speeds for 20x1 up to 20x15 in cycles,\n\n        ./speed -s 1-15 -c mpn_mul_basecase.20\n\nmul_basecase with no parameter does an NxN multiply, so for example to show\nspeeds in cycles for 1x1, 2x2, 3x3, etc, up to 20x20, in cycles,\n\n        ./speed -s 1-20 -c mpn_mul_basecase\n\nsqr_basecase is implemented by a \"triangular\" method on most CPUs, making it\nup to twice as fast as mul_basecase.  In practice loop overheads and the\nproducts on the diagonal mean it falls short of this.  Here's an example\nrunning the two and showing by what factor an NxN mul_basecase is slower\nthan an NxN sqr_basecase.  (Some versions of sqr_basecase only allow sizes\nbelow SQR_KARATSUBA_THRESHOLD, so if it crashes at that point don't worry.)\n\n        ./speed -s 1-20 -r mpn_sqr_basecase mpn_mul_basecase\n\nThe technique described above with -CD for showing the time difference in\ncycles per limb between two size operations can be done on an NxN\nmul_basecase using -E to change the basis for the size increment to N*N.\nFor instance a 20x20 operation is taken to be doing 400 limbs, and a 16x16\ndoing 256 limbs.  The following therefore shows the per crossproduct speed\nof mul_basecase and sqr_basecase at around 20x20 limbs.\n\n        ./speed -s 16-20 -t 4 -CDE mpn_mul_basecase mpn_sqr_basecase\n\nOf course sqr_basecase isn't really doing NxN crossproducts, but it can be\ninteresting to compare it to mul_basecase as if it was.  For sqr_basecase\nthe -F option can be used to base the deltas on N*(N+1)/2 operations, which\nis the triangular products sqr_basecase does.  For example,\n\n        ./speed -s 16-20 -t 4 -CDF mpn_sqr_basecase\n\nBoth -E and -F are preliminary and might change.  A consistent approach to\nusing them when claiming certain per crossproduct or per triangularproduct\nspeeds hasn't really been established, but the increment between speeds in\nthe range karatsuba will call seems sensible, that being k to k/2.  For\ninstance, if the karatsuba threshold was 20 for the multiply and 30 for the\nsquare,\n\n        ./speed -s 10-20 -t 10 -CDE mpn_mul_basecase\n        ./speed -s 15-30 -t 15 -CDF mpn_sqr_basecase\n\n\n\nEXAMPLE COMPARISONS - MALLOC\n\nThe gmp manual recommends application programs avoid excessive initializing\nand clearing of mpz_t variables (and mpq_t and mpf_t too).  Every new\nvariable will at a minimum go through an init, a realloc for its first\nstore, and finally a clear.  Quite how long that takes depends on the C\nlibrary.  The following compares an mpz_init/realloc/clear to a 10 limb\nmpz_add.  Don't be surprised if the mallocing is quite slow.\n\n        ./speed -s 10 -c mpz_init_realloc_clear mpz_add\n\nOn some systems malloc and free are much slower when dynamic linked.  The\nspeed-dynamic program can be used to see this.  For example the following\nmeasures malloc/free, first static then dynamic.\n\n        ./speed -s 10 -c malloc_free\n        ./speed-dynamic -s 10 -c malloc_free\n\nOf course a real world program has big problems if it's doing so many\nmallocs and frees that it gets slowed down by a dynamic linked malloc.\n\n\n\n\n\nEXAMPLE COMPARISONS - STRING CONVERSIONS\n\nmpn_get_str does a binary to string conversion.  The base is specified with\na \".<r>\" parameter, or decimal by default.  Power of 2 bases are much faster\nthan general bases.  The following compares decimal and hex for instance.\n\n        ./speed -s 1-20 -c mpn_get_str mpn_get_str.16\n\nSmaller bases need more divisions to split a given size number, and so are\nslower.  The following compares base 3 and base 9.  On small operands 9 will\nbe nearly twice as fast, though at bigger sizes this reduces since in the\ncurrent implementation both divide repeatedly by 3^20 (or 3^40 for 64 bit\nlimbs) and those divisions come to dominate.\n\n        ./speed -s 1-20 -cr mpn_get_str.3 mpn_get_str.9\n\nmpn_set_str does a string to binary conversion.  The base is specified with\na \".<r>\" parameter, or decimal by default.  Power of 2 bases are faster than\ngeneral bases on large conversions.\n\n\t./speed -s 1-512 -f 2 -c mpn_set_str.8 mpn_set_str.10\n\nmpn_set_str also has some special case code for decimal which is a bit\nfaster than the general case, basically by giving the compiler a chance to\noptimize some multiplications by 10.\n\n\t./speed -s 20-40 -c mpn_set_str.9 mpn_set_str.10 mpn_set_str.11\n\n\n\n\nEXAMPLE COMPARISONS - GCDs\n\nmpn_gcd_1 has a threshold for when to reduce using an initial x%y when both\nx and y are single limbs.  This isn't tuned currently, but a value can be\nestablished by a measurement like\n\n\t./speed -s 10-32 mpn_gcd_1.10\n\nThis runs src[0] from 10 to 32 bits, and y fixed at 10 bits.  If the div\nthreshold is high, say 31 so it's effectively disabled then a 32x10 bit gcd\nis done by nibbling away at the 32-bit operands bit-by-bit.  When the\nthreshold is small, say 1 bit, then an initial x%y is done to reduce it to a\n10x10 bit operation.\n\nThe threshold in mpn/generic/gcd_1.c or the various assembler\nimplementations can be tweaked up or down until there's no more speedups on\ninteresting combinations of sizes.  Note that this affects only a 1x1 limb\noperation and so isn't very important.  (An Nx1 limb operation always does\nan initial modular reduction, using mpn_mod_1 or mpn_modexact_1_odd.)\n\n\n\n\nSPEED PROGRAM EXTENSIONS\n\nPotentially lots of things could be made available in the program, but it's\nbeen left at only the things that have actually been wanted and are likely\nto be reasonably useful in the future.\n\nExtensions should be fairly easy to make though.  speed-ext.c is an example,\nin a style that should suit one-off tests, or new code fragments under\ndevelopment.\n\nmany.pl is a script for generating a new speed program supplemented with\nalternate versions of the standard routines.  It can be used for measuring\nexperimental code, or for comparing different implementations that exist\nwithin a CPU family.\n\n\n\n\nTHRESHOLD EXAMINING\n\nThe speed program can be used to examine the speeds of different algorithms\nto check the tune program has done the right thing.  For example to examine\nthe karatsuba multiply threshold,\n\n\t./speed -s 5-40 mpn_mul_basecase mpn_kara_mul_n\n\nWhen examining the toom3 threshold, remember it depends on the karatsuba\nthreshold, so the right karatsuba threshold needs to be compiled into the\nlibrary first.  The tune program uses specially recompiled versions of\nmpn/mul_n.c etc for this reason, but the speed program simply uses the\nnormal libmpir.la.\n\nNote further that the various routines may recurse into themselves on sizes\nfar enough above applicable thresholds.  For example, mpn_kara_mul_n will\nrecurse into itself on sizes greater than twice the compiled-in\nMUL_KARATSUBA_THRESHOLD.\n\nWhen doing the above comparison between mul_basecase and kara_mul_n what's\nprobably of interest is mul_basecase versus a kara_mul_n that does one level\nof Karatsuba then calls to mul_basecase, but this only happens on sizes less\nthan twice the compiled MUL_KARATSUBA_THRESHOLD.  A larger value for that\nsetting can be compiled-in to avoid the problem if necessary.  The same\napplies to toom3 and DC, though in a trickier fashion.\n\nThere are some upper limits on some of the thresholds, arising from arrays\ndimensioned according to a threshold (mpn_mul_n), or asm code with certain\nsized displacements (some x86 versions of sqr_basecase).  So putting huge\nvalues for the thresholds, even just for testing, may fail.\n\n\n\n\nFUTURE\n\nMake a program to check the time base is working properly, for small and\nlarge measurements.  Make it able to test each available method, including\nperhaps the apparent resolution of each.\n\nMake a general mechanism for specifying operand overlap, and a syntax like\nmaybe \"mpn_add_n.dst=src2\" to select it.  Some measuring routines do this\nsort of thing with the \"r\" parameter currently.\n\n\n\n----------------\nLocal variables:\nmode: text\nfill-column: 76\nEnd:\n"
  },
  {
    "path": "tune/aligntest",
    "content": "#!/bin/bash\nif [ $# -ne 3 ] ; then\n\techo \"$0 mpn_fn max_time size\"\n\texit 1\nfi\nfor x in 0 1 2 3 4 5 6 7\ndo\nfor y in 0 1 2 3 4 5 6 7\ndo\nfor w in 0 1 2 3 4 5 6 7\ndo\nfor W in 0 1 2 3 4 5 6 7\ndo\nwhile true\ndo\nc=$(./speed -x $x -y $y -w $w -W $W -c -s $3 $1 | tail -n 1 | tr -s \" \" | cut -d \" \" -f 2 | cut -d . -f 1)\nif [ $c -lt $2 ] ; then break ; fi\necho \"$x $y $w $W $c\"\ndone\n\ndone\ndone\ndone\ndone\n"
  },
  {
    "path": "tune/alpha.asm",
    "content": "dnl  Alpha time stamp counter access routine.\n\ndnl  Copyright 2000, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC void speed_cyclecounter (unsigned int p[2]);\nC\n\nC The rpcc instruction returns a 64-bit value split into two 32-bit fields.\nC The lower 32 bits are set by the hardware, and the upper 32 bits are set\nC by the operating system.  The real per-process cycle count is the sum of\nC these halves.\n\nC Unfortunately, some operating systems don't get this right.  NetBSD 1.3 is\nC known to sometimes put garbage in the upper half.  Whether newer NetBSD\nC versions get it right, is unknown to us.\n\nC rpcc measures cycles elapsed in the user program and hence should be very\nC accurate even on a busy system.  Losing cache contents due to task\nC switching may have an effect though.\n\nASM_START()\nPROLOGUE(speed_cyclecounter)\n\trpcc\tr0\n\tsrl\tr0,32,r1\n\taddq\tr1,r0,r0\n\tstl\tr0,0(r16)\n\tstl\tr31,4(r16)\t\tC zero upper return word\n\tret\tr31,(r26),1\nEPILOGUE(speed_cyclecounter)\nASM_END()\n"
  },
  {
    "path": "tune/benchmpn",
    "content": "#!/bin/bash\necho -ne \"cpu                  \\t\"\n../config.guess | cut -d - -f 1\nTMPFILE=$(mktemp mpir.XXXXXX)\nfor fn in mpn_add_n mpn_sub_n mpn_mul_1.333 mpn_addmul_1.333 mpn_submul_1.333 mpn_mul_2 mpn_addmul_2 mpn_submul_2 \\\n\tmpn_addadd_n mpn_addsub_n mpn_subadd_n mpn_lshift.3 mpn_rshift.3 mpn_lshift2 mpn_rshift2 mpn_lshift1 mpn_rshift1 mpn_double mpn_half \\\n\tmpn_addlsh1_n mpn_sublsh1_n mpn_addlsh_n.3 mpn_sublsh_n.3 mpn_inclsh_n.3 mpn_declsh_n.3 \\\n\tmpn_rsh1add_n mpn_rsh1sub_n mpn_sumdiff_n mpn_store mpn_copyi mpn_copyd \\\n\tmpn_rsblsh1_n mpn_addlsh2_n mpn_rsblsh2_n mpn_karaadd mpn_karasub \\\n\tmpn_popcount mpn_hamdist mpn_com mpn_not mpn_and_n mpn_xor_n mpn_ior_n mpn_nand_n mpn_nior_n mpn_xnor_n mpn_andn_n \\\n\tmpn_iorn_n mpn_lshiftc.3 mpn_divexact_byff mpn_divexact_byfobm1.3 mpn_divexact_by3 mpn_divexact_1.3333 mpn_modexact_1c_odd.334333 \\\n\tmpn_add_err1_n mpn_sub_err1_n \\\n\tmpn_divrem_euclidean_qr_1.77777 mpn_divrem_euclidean_qr_2 mpn_divrem_euclidean_r_1.77777 mpn_divrem_1.77777 mpn_divrem_2 \\\n\tmpn_divrem_hensel_qr_1.3333 mpn_divrem_hensel_qr_1_1.3333 mpn_divrem_hensel_qr_1_2.3333  \\\n\tmpn_divrem_hensel_r_1.3333 mpn_divrem_hensel_rsh_qr_1.3333 mpn_rsh_divrem_hensel_qr_1.3333 \\\n\tmpn_rsh_divrem_hensel_qr_1_1.3333 mpn_rsh_divrem_hensel_qr_1_2.3333 \\\n\tmpn_mod_1_1 mpn_mod_1_2 mpn_mod_1_3 mpn_mod_1_4 mpn_mod_34lsub1\ndo\n\tfnc=$(echo $fn | cut -d _ -f 2- | cut -d . -f 1)\n\twhile [ $(echo \"$fnc\" | wc -c) -le 24 ]\n\tdo \n\t\tfnc=$(echo \" $fnc\")\n\tdone\n\techo -ne \"$fnc\\t\"\n\t./speed -c -s 1000 $fn > $TMPFILE 2>&1\n\tif [ $? -eq 0 ] ; then\n\t\tcat $TMPFILE | tail -n 1 | tr -s \" \" | cut -f 2 -d \" \" | cut -d . -f 1\t\t\n\telse\n\t\t#try alternate name\n\t\tif [ $fn == mpn_com ] ; then fn=mpn_com_n ; fi\n\t\tif [ $fn == mpn_lshift2 ] ; then fn=mpn_lshift.2 ; fi\n\t\tif [ $fn == mpn_rshift2 ] ; then fn=mpn_rshift.2 ; fi\n\t\tif [ $fn == mpn_rshift1 ] ; then fn=mpn_rshift.1 ; fi\n\t\tif [ $fn == mpn_lshift1 ] ; then fn=mpn_lshift.1 ; fi\n\t\tif [ $fn == mpn_half ] ; then fn=mpn_rshift.1 ; fi\n\t\tif [ $fn == mpn_double ] ; then fn=mpn_lshift.1 ; fi\n\t\tif [ $fn == mpn_store ] ; then fn=MPN_ZERO ; fi\n\t\tif [ $fn == mpn_copyi ] ; then fn=MPN_COPY_INCR ; fi\n\t\tif [ $fn == mpn_copyd ] ; then fn=MPN_COPY_DECR ; fi\n\t\tif [ $fn == mpn_divexact_byfobm1.3 ] ; then fn=mpn_bdiv_dbm1c ; fi\n\t\tif [ $fn == mpn_mod_1_2 ] ; then fn=mpn_mod_1s_2 ; fi\n\t\tif [ $fn == mpn_mod_1_3 ] ; then fn=mpn_mod_1s_3 ; fi\n\t\tif [ $fn == mpn_mod_1_4 ] ; then fn=mpn_mod_1s_4 ; fi\n\t\tif [ $fn == mpn_not ] ; then fn=mpn_com ; fi\n\t\t./speed -c -s 1000 $fn > $TMPFILE 2>&1\n\t\tif [ $? -eq 0 ] ; then\n\t\t\tcat $TMPFILE | tail -n 1 | tr -s \" \" | cut -f 2 -d \" \" | cut -d . -f 1\t\t\n\t\telse\t\t\n\t\t\techo\n\t\tfi\n\tfi\ndone\nrm -f $TMPFILE\n"
  },
  {
    "path": "tune/common.c",
    "content": "/* Shared speed subroutines.\n\nCopyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2010,\n2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  \n\n*/\n\n#define __GMP_NO_ATTRIBUTE_CONST_PURE\n\n#include <errno.h>\n#include <fcntl.h>\n#include <math.h>\n#include <stdio.h>\n#include <stdlib.h> /* for qsort */\n#include <string.h>\n#include <unistd.h>\n#if 0\n#include <sys/ioctl.h>\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#include \"tests.h\"\n#include \"speed.h\"\n\n\nint   speed_option_addrs = 0;\nint   speed_option_verbose = 0;\n\n\n/* Provide __clz_tab even if it's not required, for the benefit of new code\n   being tested with many.pl. */\n#ifndef COUNT_LEADING_ZEROS_NEED_CLZ_TAB\n#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB\n#include \"mp_clz_tab.c\"\n#undef COUNT_LEADING_ZEROS_NEED_CLZ_TAB\n#endif\n\n\nvoid\npentium_wbinvd(void)\n{\n#if 0\n  {\n    static int  fd = -2;\n\n    if (fd == -2)\n      {\n        fd = open (\"/dev/wbinvd\", O_RDWR);\n        if (fd == -1)\n          perror (\"open /dev/wbinvd\");\n      }\n\n    if (fd != -1)\n      ioctl (fd, 0, 0);\n  }\n#endif\n\n#if 0\n#define WBINVDSIZE  1024*1024*2\n  {\n    static char  *p = NULL;\n    int   i, sum;\n\n    if (p == NULL)\n      p = malloc (WBINVDSIZE);\n\n#if 0\n    for (i = 0; i < WBINVDSIZE; i++)\n      p[i] = i & 0xFF;\n#endif\n\n    sum = 0;\n    for (i = 0; i < WBINVDSIZE; i++)\n      sum += p[i];\n\n    mpn_cache_fill_dummy (sum);\n  }\n#endif\n}\n\n\nint\ndouble_cmp_ptr (const double *p, const double *q)\n{\n  if (*p > *q)  return 1;\n  if (*p < *q)  return -1;\n  return 0;\n}\n\n\n/* Measure the speed of a given routine.\n\n   The routine is run with enough repetitions to make it take at least\n   speed_precision * speed_unittime.  This aims to minimize the effects of a\n   limited accuracy time base and the overhead of the measuring itself.\n\n   Measurements are made looking for 4 results within TOLERANCE of each\n   other (or 3 for routines taking longer than 2 seconds).  This aims to get\n   an accurate reading even if some runs are bloated by interrupts or task\n   switches or whatever.\n\n   The given (*fun)() is expected to run its function \"s->reps\" many times\n   and return the total elapsed time measured using speed_starttime() and\n   speed_endtime().  If the function doesn't support the given s->size or\n   s->r, -1.0 should be returned.  See the various base routines below.  */\n\ndouble\nspeed_measure (double (*fun)(struct speed_params *s),\n               struct speed_params *s)\n{\n#define TOLERANCE    1.05  /* 0.5% */\n  const int max_zeros = 40;\n\n  struct speed_params  s_dummy;\n  int     i, j, e;\n  double  t[100];\n  double  t_unsorted[100];\n  double  reps_d;\n  int     zeros = 0;\n\n  /* Use dummy parameters if caller doesn't provide any.  Only a few special\n     \"fun\"s will cope with this, speed_noop() is one.  */\n  if (s == NULL)\n    {\n      memset (&s_dummy, '\\0', sizeof (s_dummy));\n      s = &s_dummy;\n    }\n\n  s->reps = 1;\n  s->time_divisor = 1.0;\n  for (i = 0; i < numberof (t); i++)\n    {\n      for (;;)\n        {\n          s->src_num = 0;\n          s->dst_num = 0;\n\n          t[i] = (*fun) (s);\n\n          if (speed_option_verbose >= 3)\n            gmp_printf(\"size=%ld reps=%u r=%Md attempt=%d  %.9f\\n\",\n                       (long) s->size, s->reps, s->r, i, t[i]);\n\n          if (t[i] == 0.0)\n            {\n              zeros++;\n              if (zeros > max_zeros)\n                {\n                  fprintf (stderr, \"Fatal error: too many (%d) failed measurements (0.0)\\n\", zeros);\n                  abort ();\n                }\n              continue;\n            }\n\n          if (t[i] == -1.0)\n            return -1.0;\n\n          if (t[i] >= speed_unittime * speed_precision)\n            break;\n\n          /* go to a value of reps to make t[i] >= precision */\n          reps_d = ceil (1.1 * s->reps\n                         * speed_unittime * speed_precision\n                         / MAX (t[i], speed_unittime));\n          if (reps_d > 2e9 || reps_d < 1.0)\n            {\n              fprintf (stderr, \"Fatal error: new reps bad: %.2f\\n\", reps_d);\n              fprintf (stderr, \"  (old reps %u, unittime %.4g, precision %d, t[i] %.4g)\\n\",\n                       s->reps, speed_unittime, speed_precision, t[i]);\n              abort ();\n            }\n          s->reps = (unsigned) reps_d;\n        }\n      t[i] /= s->reps;\n      t_unsorted[i] = t[i];\n\n      if (speed_precision == 0)\n        return t[i];\n\n      /* require 3 values within TOLERANCE when >= 2 secs, 4 when below */\n      if (t[0] >= 2.0)\n        e = 3;\n      else\n        e = 4;\n\n      /* Look for e many t[]'s within TOLERANCE of each other to consider a\n         valid measurement.  Return smallest among them.  */\n      if (i >= e)\n        {\n          qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr);\n          for (j = e-1; j < i; j++)\n            if (t[j] <= t[j-e+1] * TOLERANCE)\n              return t[j-e+1] / s->time_divisor;\n        }\n    }\n\n  fprintf (stderr, \"speed_measure() could not get %d results within %.1f%%\\n\",\n           e, (TOLERANCE-1.0)*100.0);\n  fprintf (stderr, \"    unsorted         sorted\\n\");\n  fprintf (stderr, \"  %.12f    %.12f    is about 0.5%%\\n\",\n           t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0));\n  for (i = 0; i < numberof (t); i++)\n    fprintf (stderr, \"  %.09f       %.09f\\n\", t_unsorted[i], t[i]);\n\n  return -1.0;\n}\n\n\n/* Read all of ptr,size to get it into the CPU memory cache.\n\n   A call to mpn_cache_fill_dummy() is used to make sure the compiler\n   doesn't optimize away the whole loop.  Using \"volatile mp_limb_t sum\"\n   would work too, but the function call means we don't rely on every\n   compiler actually implementing volatile properly.\n\n   mpn_cache_fill_dummy() is in a separate source file to stop gcc thinking\n   it can inline it.  */\n\nvoid\nmpn_cache_fill (mp_srcptr ptr, mp_size_t size)\n{\n  mp_limb_t  sum = 0;\n  mp_size_t  i;\n\n  for (i = 0; i < size; i++)\n    sum += ptr[i];\n\n  mpn_cache_fill_dummy(sum);\n}\n\n\nvoid\nmpn_cache_fill_write (mp_ptr ptr, mp_size_t size)\n{\n  mpn_cache_fill (ptr, size);\n\n#if 0\n  mpn_random (ptr, size);\n#endif\n\n#if 0\n  mp_size_t  i;\n\n  for (i = 0; i < size; i++)\n    ptr[i] = i;\n#endif\n}\n\n\nvoid\nspeed_operand_src (struct speed_params *s, mp_ptr ptr, mp_size_t size)\n{\n  if (s->src_num >= numberof (s->src))\n    {\n      fprintf (stderr, \"speed_operand_src: no room left in s->src[]\\n\");\n      abort ();\n    }\n  s->src[s->src_num].ptr = ptr;\n  s->src[s->src_num].size = size;\n  s->src_num++;\n}\n\n\nvoid\nspeed_operand_dst (struct speed_params *s, mp_ptr ptr, mp_size_t size)\n{\n  if (s->dst_num >= numberof (s->dst))\n    {\n      fprintf (stderr, \"speed_operand_dst: no room left in s->dst[]\\n\");\n      abort ();\n    }\n  s->dst[s->dst_num].ptr = ptr;\n  s->dst[s->dst_num].size = size;\n  s->dst_num++;\n}\n\n\nvoid\nspeed_cache_fill (struct speed_params *s)\n{\n  static struct speed_params  prev;\n  int  i;\n\n  /* FIXME: need a better way to get the format string for a pointer */\n\n  if (speed_option_addrs)\n    {\n      int  different;\n\n      different = (s->dst_num != prev.dst_num || s->src_num != prev.src_num);\n      for (i = 0; i < s->dst_num; i++)\n        different |= (s->dst[i].ptr != prev.dst[i].ptr);\n      for (i = 0; i < s->src_num; i++)\n        different |= (s->src[i].ptr != prev.src[i].ptr);\n\n      if (different)\n        {\n          if (s->dst_num != 0)\n            {\n              printf (\"dst\");\n              for (i = 0; i < s->dst_num; i++)\n                printf (\" %08lX\", (unsigned long) s->dst[i].ptr);\n              printf (\" \");\n            }\n\n          if (s->src_num != 0)\n            {\n              printf (\"src\");\n              for (i = 0; i < s->src_num; i++)\n                printf (\" %08lX\", (unsigned long) s->src[i].ptr);\n              printf (\" \");\n            }\n          printf (\"  (cf sp approx %08lX)\\n\", (unsigned long) &different);\n\n        }\n\n      memcpy (&prev, s, sizeof(prev));\n    }\n\n  switch (s->cache) {\n  case 0:\n    for (i = 0; i < s->dst_num; i++)\n      mpn_cache_fill_write (s->dst[i].ptr, s->dst[i].size);\n    for (i = 0; i < s->src_num; i++)\n      mpn_cache_fill (s->src[i].ptr, s->src[i].size);\n    break;\n  case 1:\n    pentium_wbinvd();\n    break;\n  }\n}\n\n\n/* Miscellanous options accepted by tune and speed programs under -o. */\n\nvoid\nspeed_option_set (const char *s)\n{\n  int  n;\n\n  if (strcmp (s, \"addrs\") == 0)\n    {\n      speed_option_addrs = 1;\n    }\n  else if (strcmp (s, \"verbose\") == 0)\n    {\n      speed_option_verbose++;\n    }\n  else if (sscanf (s, \"verbose=%d\", &n) == 1)\n    {\n      speed_option_verbose = n;\n    }\n  else\n    {\n      printf (\"Unrecognised -o option: %s\\n\", s);\n      exit (1);\n    }\n}\n\n\n/* The following are basic speed running routines for various gmp functions.\n   Many are very similar and use speed.h macros.\n\n   Each routine allocates it's own destination space for the result of the\n   function, because only it can know what the function needs.\n\n   speed_starttime() and speed_endtime() are put tight around the code to be\n   measured.  Any setups are done outside the timed portion.\n\n   Each routine is responsible for its own cache priming.\n   speed_cache_fill() is a good way to do this, see examples in speed.h.\n   One cache priming possibility, for CPUs with write-allocate cache, and\n   functions that don't take too long, is to do one dummy call before timing\n   so as to cache everything that gets used.  But speed_measure() runs a\n   routine at least twice and will take the smaller time, so this might not\n   be necessary.\n\n   Data alignment will be important, for source, destination and temporary\n   workspace.  A routine can align its destination and workspace.  Programs\n   using the routines will ensure s->xp and s->yp are aligned.  Aligning\n   onto a CACHE_LINE_SIZE boundary is suggested.  s->align_wp and\n   s->align_wp2 should be respected where it makes sense to do so.\n   SPEED_TMP_ALLOC_LIMBS is a good way to do this.\n\n   A loop of the following form can be expected to turn into good assembler\n   code on most CPUs, thereby minimizing overhead in the measurement.  It\n   can always be assumed s->reps >= 1.\n\n          i = s->reps\n          do\n            foo();\n          while (--i != 0);\n\n   Additional parameters might be added to \"struct speed_params\" in the\n   future.  Routines should ignore anything they don't use.\n\n   s->size can be used creatively, and s->xp and s->yp can be ignored.  For\n   example, speed_mpz_fac_ui() uses s->size as n for the factorial.  s->r is\n   just a user-supplied parameter.  speed_mpn_lshift() uses it as a shift,\n   speed_mpn_mul_1() uses it as a multiplier.  */\n\n\n/* MPN_COPY etc can be macros, so the _CALL forms are necessary */\ndouble\nspeed_MPN_COPY (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_COPY (MPN_COPY);\n}\ndouble\nspeed_MPN_COPY_INCR (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_COPY (MPN_COPY_INCR);\n}\ndouble\nspeed_MPN_COPY_DECR (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_COPY (MPN_COPY_DECR);\n}\n#if HAVE_NATIVE_mpn_copyi\ndouble\nspeed_mpn_copyi (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_COPY (mpn_copyi);\n}\n#endif\n#if HAVE_NATIVE_mpn_copyd\ndouble\nspeed_mpn_copyd (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_COPY (mpn_copyd);\n}\n#endif\ndouble\nspeed_memcpy (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_COPY_BYTES (memcpy);\n}\ndouble\nspeed_mpn_com_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_COPY (mpn_com_n);\n}\n\ndouble\nspeed_mpn_add_err1_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_ADD_ERR1_N (mpn_add_err1_n);\n}\n\ndouble\nspeed_mpn_sub_err1_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_ADD_ERR1_N (mpn_sub_err1_n);\n}\n\ndouble\nspeed_mpn_addmul_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1 (mpn_addmul_1);\n}\ndouble\nspeed_mpn_submul_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1 (mpn_submul_1);\n}\n\n#if HAVE_NATIVE_mpn_addmul_2\ndouble\nspeed_mpn_addmul_2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_2 (mpn_addmul_2);\n}\n#endif\n#if HAVE_NATIVE_mpn_addmul_3\ndouble\nspeed_mpn_addmul_3 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_3 (mpn_addmul_3);\n}\n#endif\n#if HAVE_NATIVE_mpn_addmul_4\ndouble\nspeed_mpn_addmul_4 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_4 (mpn_addmul_4);\n}\n#endif\n#if HAVE_NATIVE_mpn_addmul_5\ndouble\nspeed_mpn_addmul_5 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_5 (mpn_addmul_5);\n}\n#endif\n#if HAVE_NATIVE_mpn_addmul_6\ndouble\nspeed_mpn_addmul_6 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_6 (mpn_addmul_6);\n}\n#endif\n#if HAVE_NATIVE_mpn_addmul_7\ndouble\nspeed_mpn_addmul_7 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_7 (mpn_addmul_7);\n}\n#endif\n#if HAVE_NATIVE_mpn_addmul_8\ndouble\nspeed_mpn_addmul_8 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_8 (mpn_addmul_8);\n}\n#endif\n\ndouble\nspeed_mpn_mul_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1 (mpn_mul_1);\n}\ndouble\nspeed_mpn_mul_1_inplace (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1_INPLACE (mpn_mul_1);\n}\n\n#if HAVE_NATIVE_mpn_mul_2\ndouble\nspeed_mpn_mul_2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_2 (mpn_mul_2);\n}\n#endif\n\n#if HAVE_NATIVE_mpn_karaadd\ndouble\nspeed_mpn_karaadd (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_KARA (mpn_karaadd);\n}\n#endif\n\n#if HAVE_NATIVE_mpn_karasub\ndouble\nspeed_mpn_karasub (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_KARA (mpn_karasub);\n}\n#endif\n\ndouble\nspeed_mpn_lshift (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshift);\n}\ndouble\nspeed_mpn_rshift (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1 (mpn_rshift);\n}\n\n#if HAVE_NATIVE_mpn_lshiftc\ndouble\nspeed_mpn_lshiftc (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshiftc);\n}\n#endif\n\ndouble\nspeed_mpn_not (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_POPCOUNT (mpn_not);\n}\ndouble\nspeed_mpn_double (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_POPCOUNT (mpn_double);\n}\ndouble\nspeed_mpn_half (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_POPCOUNT (mpn_half);\n}\ndouble\nspeed_mpn_lshift1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SHIFTX (mpn_lshift1);\n}\ndouble\nspeed_mpn_rshift1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SHIFTX (mpn_rshift1);\n}\n\ndouble\nspeed_mpn_lshift2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SHIFTX (mpn_lshift2);\n}\ndouble\nspeed_mpn_rshift2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SHIFTX (mpn_rshift2);\n}\n\n\ndouble\nspeed_mpn_divrem_euclidean_qr_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_euclidean_qr_1);\n}\n\ndouble\nspeed_mpn_divrem_hensel_qr_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1 (mpn_divrem_hensel_qr_1);\n}\ndouble\nspeed_mpn_divrem_hensel_qr_1_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1 (mpn_divrem_hensel_qr_1_1);\n}\ndouble\nspeed_mpn_divrem_hensel_qr_1_2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1 (mpn_divrem_hensel_qr_1_2);\n}\n\ndouble\nspeed_mpn_divrem_hensel_rsh_qr_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_divrem_hensel_rsh_qr_1(wp,s->xp,s->size,s->r,19));\n}\n\ndouble\nspeed_mpn_rsh_divrem_hensel_qr_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_rsh_divrem_hensel_qr_1(wp,s->xp,s->size,s->r,19,0));\n}\n\ndouble\nspeed_mpn_rsh_divrem_hensel_qr_1_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_rsh_divrem_hensel_qr_1_1(wp,s->xp,s->size,s->r,19,0));\n}\n\ndouble\nspeed_mpn_rsh_divrem_hensel_qr_1_2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_rsh_divrem_hensel_qr_1_2(wp,s->xp,s->size,s->r,19,0));\n}\n\ndouble\nspeed_mpn_divrem_euclidean_qr_2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREME_2 (mpn_divrem_euclidean_qr_2);\n}\n\ndouble\nspeed_mpn_divrem_euclidean_r_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MOD_1 (mpn_divrem_euclidean_r_1);\n}\n\ndouble\nspeed_mpn_divrem_hensel_r_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MOD_1 (mpn_divrem_hensel_r_1);\n}\n\n/* The carry-in variants (if available) are good for measuring because they\n   won't skip a division if high<divisor.  Alternately, use -1 as a divisor\n   with the plain _1 forms. */\ndouble\nspeed_mpn_divrem_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1);\n}\n\ndouble\nspeed_mpn_divrem_1f (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1);\n}\n#if HAVE_NATIVE_mpn_divrem_1c\ndouble\nspeed_mpn_divrem_1c (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREM_1C (mpn_divrem_1c);\n}\ndouble\nspeed_mpn_divrem_1cf (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREM_1CF (mpn_divrem_1c);\n}\n#endif\n\ndouble\nspeed_mpn_divrem_1_div (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_div);\n}\ndouble\nspeed_mpn_divrem_1f_div (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_div);\n}\ndouble\nspeed_mpn_divrem_1_inv (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_inv);\n}\ndouble\nspeed_mpn_divrem_1f_inv (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_inv);\n}\ndouble\nspeed_mpn_mod_1_div (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_div);\n}\ndouble\nspeed_mpn_mod_1_inv (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_inv);\n}\n\ndouble\nspeed_mpn_preinv_divrem_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_PREINV_DIVREM_1 (mpn_preinv_divrem_1);\n}\ndouble\nspeed_mpn_preinv_divrem_1f (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_PREINV_DIVREM_1F (mpn_preinv_divrem_1);\n}\n\n#if GMP_NUMB_BITS % 4 == 0\ndouble\nspeed_mpn_mod_34lsub1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MOD_34LSUB1 (mpn_mod_34lsub1);\n}\n#endif\n\ndouble\nspeed_mpn_divrem_2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2);\n}\ndouble\nspeed_mpn_divrem_2_div (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_div);\n}\ndouble\nspeed_mpn_divrem_2_inv (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_inv);\n}\n\ndouble\nspeed_mpn_mod_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1);\n}\n\ndouble\nspeed_mpn_mod_1_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_mod_1_1(wp,xp,s->size,yp));\n}\n\ndouble\nspeed_mpn_mod_1_2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_mod_1_2(wp,xp,s->size,yp));\n}\n\ndouble\nspeed_mpn_mod_1_3 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_mod_1_3(wp,xp,s->size,yp));\n}\n\ndouble\nspeed_mpn_mod_1_k (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MOD_1_K (mpn_mod_1_k);\n}\n\n#if HAVE_NATIVE_mpn_mod_1c\ndouble\nspeed_mpn_mod_1c (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MOD_1C (mpn_mod_1c);\n}\n#endif\ndouble\nspeed_mpn_preinv_mod_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_PREINV_MOD_1 (mpn_preinv_mod_1);\n}\n\ndouble\nspeed_mpn_divexact_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVEXACT_1 (mpn_divexact_1);\n}\n\ndouble\nspeed_mpn_divexact_by3 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_COPY (mpn_divexact_by3);\n}\n\ndouble\nspeed_mpn_divexact_byff (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_COPY (mpn_divexact_byff);\n}\n\ndouble\nspeed_mpn_divexact_byfobm1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVEXACT_BYFOBM1 (mpn_divexact_byfobm1);\n}\n\n#if HAVE_NATIVE_mpn_modexact_1_odd\ndouble\nspeed_mpn_modexact_1_odd (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MODEXACT_1_ODD (mpn_modexact_1_odd);\n}\n#endif\n\ndouble\nspeed_mpn_modexact_1c_odd (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MODEXACT_1C_ODD (mpn_modexact_1c_odd);\n}\n\n\ndouble\nspeed_mpn_dc_tdiv_qr (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DC_TDIV_QR (mpn_tdiv_qr);\n}\ndouble\nspeed_mpn_dc_div_qr_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DC_DIV_N_TSIZE (mpn_dc_div_qr_n, DC_DIVAPPR_Q_N_ITCH(s->size));\n}\ndouble\nspeed_mpn_sb_divappr_q (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SB_DIV_SMALL_Q (mpn_sb_divappr_q);\n}\ndouble\nspeed_mpn_sb_div_qr (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SB_DIV_SMALL_Q (mpn_sb_div_qr);\n}\ndouble\nspeed_mpn_dc_divappr_q (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DC_DIV_SMALL_Q (mpn_dc_divappr_q);\n}\ndouble\nspeed_mpn_inv_div_qr (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_INV_DIV (mpn_inv_div_qr);\n}\ndouble\nspeed_mpn_inv_divappr_q (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_INV_DIV_SMALL_Q (mpn_inv_divappr_q);\n}\ndouble\nspeed_mpn_tdiv_q (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_TDIV_Q (mpn_tdiv_q);\n}\ndouble\nspeed_mpn_tdiv_q1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_TDIV_Q1 (mpn_tdiv_q);\n}\ndouble\nspeed_mpn_tdiv_q2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_TDIV_SMALL_Q (mpn_tdiv_q);\n}\ndouble\nspeed_mpn_dc_bdiv_qr_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DC_BDIV_N_TSIZE (mpn_dc_bdiv_qr_n, DC_BDIV_QR_N_ITCH(2*s->size));\n}\ndouble\nspeed_mpn_dc_bdiv_q (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DC_BDIV_Q (mpn_dc_bdiv_q);\n}\n\ndouble\nspeed_mpz_mod (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_MOD (mpz_mod);\n}\n\n\ndouble\nspeed_mpn_popcount (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_POPCOUNT (mpn_popcount);\n}\ndouble\nspeed_mpn_hamdist (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_HAMDIST (mpn_hamdist);\n}\n\n\ndouble\nspeed_mpn_add_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N (mpn_add_n);\n}\ndouble\nspeed_mpn_sub_n (struct speed_params *s)\n{\nSPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n);\n}\n\ndouble\nspeed_mpn_addadd_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_TRINARY_N (mpn_addadd_n);\n}\n\ndouble\nspeed_mpn_subadd_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_TRINARY_N (mpn_subadd_n);\n}\n  \ndouble\nspeed_mpn_addsub_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_TRINARY_N (mpn_addsub_n);\n}\n\n#if HAVE_NATIVE_mpn_sumdiff_n\ndouble\nspeed_mpn_sumdiff_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SUMDIFF_N_CALL (mpn_sumdiff_n (ap, sp, s->xp, s->yp, s->size));\n}\n#endif\n\n#if HAVE_NATIVE_mpn_nsumdiff_n\ndouble\nspeed_mpn_nsumdiff_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SUMDIFF_N_CALL (mpn_nsumdiff_n (ap, sp, s->xp, s->yp, s->size));\n}\n#endif\n\n#if HAVE_NATIVE_mpn_addlsh1_n\ndouble\nspeed_mpn_addlsh1_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addlsh1_n(wp,xp,yp,s->size));\n}\n#endif\n#if HAVE_NATIVE_mpn_sublsh1_n\ndouble\nspeed_mpn_sublsh1_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_sublsh1_n(wp,xp,yp,s->size));\n}\n#endif\n#if HAVE_NATIVE_mpn_addlsh_n\ndouble\nspeed_mpn_addlsh_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addlsh_n(wp, xp, yp, s->size, s->r));\n}\n#endif\n#if HAVE_NATIVE_mpn_sublsh_n\ndouble\nspeed_mpn_sublsh_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_sublsh_n(wp, xp, yp, s->size, s->r));\n}\n#endif\n#if HAVE_NATIVE_mpn_inclsh_n\ndouble\nspeed_mpn_inclsh_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_inclsh_n(wp,s->xp,s->size,s->r));\n}\n#endif\n#if HAVE_NATIVE_mpn_declsh_n\ndouble\nspeed_mpn_declsh_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_declsh_n(wp,s->xp,s->size,s->r));\n}\n#endif\n#if HAVE_NATIVE_mpn_rsh1add_n\ndouble\nspeed_mpn_rsh1add_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N (mpn_rsh1add_n);\n}\n#endif\n#if HAVE_NATIVE_mpn_rsh1sub_n\ndouble\nspeed_mpn_rsh1sub_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N (mpn_rsh1sub_n);\n}\n#endif\n\n/* mpn_and_n etc can be macros and so have to be handled with\n   SPEED_ROUTINE_MPN_BINARY_N_CALL forms */\ndouble\nspeed_mpn_and_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, s->xp, s->yp, s->size));\n}\ndouble\nspeed_mpn_andn_n (struct speed_params *s)\n{\nSPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, s->xp, s->yp, s->size));\n}\ndouble\nspeed_mpn_nand_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, s->xp, s->yp, s->size));\n}\ndouble\nspeed_mpn_ior_n (struct speed_params *s)\n{\nSPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, s->xp, s->yp, s->size));\n}\ndouble\nspeed_mpn_iorn_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, s->xp, s->yp, s->size));\n}\ndouble\nspeed_mpn_nior_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, s->xp, s->yp, s->size));\n}\ndouble\nspeed_mpn_xor_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, s->xp, s->yp, s->size));\n}\ndouble\nspeed_mpn_xnor_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, s->xp, s->yp, s->size));\n}\n\n\ndouble\nspeed_mpn_mul_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MUL_N (mpn_mul_n);\n}\ndouble\nspeed_mpn_sqr (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SQR (mpn_sqr);\n}\ndouble\nspeed_mpn_mul_n_sqr (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SQR_CALL (mpn_mul_n (wp, s->xp, s->xp, s->size));\n}\n\ndouble\nspeed_mpn_mul_basecase (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MUL_BASECASE(mpn_mul_basecase);\n}\ndouble\nspeed_mpn_sqr_basecase (struct speed_params *s)\n{\n  /* FIXME: size restrictions on some versions of sqr_basecase */\n  SPEED_ROUTINE_MPN_SQR (mpn_sqr_basecase);\n}\n\n#if HAVE_NATIVE_mpn_sqr_diagonal\ndouble\nspeed_mpn_sqr_diagonal (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SQR (mpn_sqr_diagonal);\n}\n#endif\n\ndouble\nspeed_mpn_kara_mul_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_KARA_MUL_N (mpn_kara_mul_n);\n}\ndouble\nspeed_mpn_kara_sqr_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_KARA_SQR_N (mpn_kara_sqr_n);\n}\n\ndouble\nspeed_mpn_toom3_mul_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_TOOM3_MUL_N (mpn_toom3_mul_n);\n}\ndouble\nspeed_mpn_toom4_mul_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_TOOM4_MUL_N (mpn_toom4_mul_n);\n}\ndouble\nspeed_mpn_toom8h_mul (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_TOOM8H_MUL (mpn_toom8h_mul);\n}\ndouble\nspeed_mpn_toom3_sqr_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_TOOM3_SQR_N (mpn_toom3_sqr_n);\n}\ndouble\nspeed_mpn_toom4_sqr_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_TOOM4_SQR_N (mpn_toom4_sqr_n);\n}\ndouble\nspeed_mpn_toom8_sqr_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_TOOM8_SQR_N (mpn_toom8_sqr_n);\n}\n\ndouble\nspeed_mpn_mul_fft_main (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MUL_N_CALL\n    (mpn_mul_fft_main (wp, s->xp, s->size, s->yp, s->size));\n}\ndouble\nspeed_mpn_sqr_fft_main (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SQR_CALL\n    (mpn_mul_fft_main (wp, s->xp, s->size, s->xp, s->size));\n}\n\n\n/* These are mod 2^N+1 multiplies and squares.  If s->r is supplied it's\n   used as k, otherwise the best k for the size is used.  If s->size isn't a\n   multiple of 2^k it's rounded up to make the effective operation size.  */\n\n#define SPEED_ROUTINE_MPN_MUL_FFT_CALL(call, sqr)       \\\n  {                                                     \\\n    mp_ptr     wp;                                      \\\n    mp_size_t  pl;                                      \\\n    unsigned   i;                                       \\\n    double     t;                                       \\\n    TMP_DECL;                                           \\\n                                                        \\\n    SPEED_RESTRICT_COND (s->size >= 1);                 \\\n                                                        \\\n                                                        \\\n    TMP_MARK;                                           \\\n    pl = mpir_fft_adjust_limbs (s->size);                \\\n    SPEED_TMP_ALLOC_LIMBS (wp, pl+1, s->align_wp);      \\\n                                                        \\\n    speed_operand_src (s, s->xp, s->size);              \\\n    if (!sqr)                                           \\\n      speed_operand_src (s, s->yp, s->size);            \\\n    speed_operand_dst (s, wp, pl+1);                    \\\n    speed_cache_fill (s);                               \\\n                                                        \\\n    speed_starttime ();                                 \\\n    i = s->reps;                                        \\\n    do                                                  \\\n      call;                                             \\\n    while (--i != 0);                                   \\\n    t = speed_endtime ();                               \\\n                                                        \\\n    TMP_FREE;                                           \\\n    return t;                                           \\\n  }\n\ndouble\nspeed_mpn_mullow_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MUL_N (mpn_mullow_n);\n}\n\ndouble\nspeed_mpn_mulhigh_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MUL_N (mpn_mulhigh_n);\n}\n\ndouble\nspeed_mpn_mulmod_2expm1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MULMOD_2EXPM1 (mpn_mulmod_2expm1);\n}\n\ndouble\nspeed_mpn_mulmod_2expp1_basecase (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MULMOD_2EXPP1_BASECASE (mpn_mulmod_2expp1_basecase);\n}\n\ndouble\nspeed_mpn_mullow_n_basecase (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MUL_N (mpn_mullow_n_basecase);\n}\n\ndouble\nspeed_mpn_mulmid_basecase (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MULMID (mpn_mulmid_basecase);\n}\n\ndouble\nspeed_mpn_mulmid (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MULMID (mpn_mulmid);\n}\n\ndouble\nspeed_mpn_mulmid_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MULMID_N (mpn_mulmid_n);\n}\n\ndouble\nspeed_mpn_toom42_mulmid (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_TOOM42_MULMID (mpn_toom42_mulmid);\n}\n\ndouble\nspeed_mpn_matrix22_mul (struct speed_params *s)\n{\n  /* Speed params only includes 2 inputs, so we have to invent the\n     other 6. */\n\n  mp_ptr a;\n  mp_ptr r;\n  mp_ptr b;\n  mp_ptr tp;\n  mp_size_t itch;\n  unsigned i;\n  double t;\n  TMP_DECL;\n\n  TMP_MARK;\n  SPEED_TMP_ALLOC_LIMBS (a, 4 * s->size, s->align_xp);\n  SPEED_TMP_ALLOC_LIMBS (b, 4 * s->size, s->align_yp);\n  SPEED_TMP_ALLOC_LIMBS (r, 8 * s->size + 4, s->align_wp);\n\n  MPN_COPY (a, s->xp, s->size);\n  mpn_random (a + s->size, 3 * s->size);\n  MPN_COPY (b, s->yp, s->size);\n  mpn_random (b + s->size, 3 * s->size);\n\n  itch = mpn_matrix22_mul_itch (s->size, s->size);\n  SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);\n\n  speed_operand_src (s, a, 4 * s->size);\n  speed_operand_src (s, b, 4 * s->size);\n  speed_operand_dst (s, r, 8 * s->size + 4);\n  speed_operand_dst (s, tp, itch);\n  speed_cache_fill (s);\n\n  speed_starttime ();\n  i = s->reps;\n  do\n    {\n      mp_size_t sz = s->size;\n      MPN_COPY (r + 0 * sz + 0, a + 0 * sz, sz);\n      MPN_COPY (r + 2 * sz + 1, a + 1 * sz, sz);\n      MPN_COPY (r + 4 * sz + 2, a + 2 * sz, sz);\n      MPN_COPY (r + 6 * sz + 3, a + 3 * sz, sz);\n      mpn_matrix22_mul (r, r + 2 * sz + 1, r + 4 * sz + 2, r + 6 * sz + 3, sz,\n\t\t\tb, b + 1 * sz,     b + 2 * sz,     b + 3 * sz,     sz,\n\t\t\ttp);\n    }\n  while (--i != 0);\n  t = speed_endtime();\n  TMP_FREE;\n  return t;\n}\n\ndouble\nspeed_mpn_hgcd (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd, mpn_hgcd_itch);\n}\n\ndouble\nspeed_mpn_hgcd_lehmer (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, mpn_hgcd_lehmer_itch);\n}\n\ndouble\nspeed_mpn_hgcd_appr (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr, mpn_hgcd_appr_itch);\n}\n\ndouble\nspeed_mpn_hgcd_appr_lehmer (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr_lehmer, mpn_hgcd_appr_lehmer_itch);\n}\n\ndouble\nspeed_mpn_hgcd_reduce (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch);\n}\ndouble\nspeed_mpn_hgcd_reduce_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch);\n}\ndouble\nspeed_mpn_hgcd_reduce_2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch);\n}\n\ndouble\nspeed_mpn_gcd (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_GCD (mpn_gcd);\n}\n\ndouble\nspeed_mpn_gcdext (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext);\n}\ndouble\nspeed_mpn_gcdext_single (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_single);\n}\ndouble\nspeed_mpn_gcdext_double (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_double);\n}\ndouble\nspeed_mpn_gcdext_one_single (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_single);\n}\ndouble\nspeed_mpn_gcdext_one_double (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_double);\n}\ndouble\nspeed_mpn_gcd_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_GCD_1 (mpn_gcd_1);\n}\ndouble\nspeed_mpn_gcd_1N (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_GCD_1N (mpn_gcd_1);\n}\n\n\ndouble\nspeed_mpz_jacobi (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_JACOBI (mpz_jacobi);\n}\ndouble\nspeed_mpn_jacobi_base (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base);\n}\ndouble\nspeed_mpn_jacobi_base_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_1);\n}\ndouble\nspeed_mpn_jacobi_base_2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_2);\n}\ndouble\nspeed_mpn_jacobi_base_3 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_3);\n}\ndouble\nspeed_mpn_jacobi_base_4 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_4);\n}\n\n\ndouble\nspeed_mpn_sqrtrem (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SQRTREM (mpn_sqrtrem);\n}\n\ndouble\nspeed_mpn_rootrem (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_ROOTREM (mpn_rootrem);\n}\n\n\ndouble\nspeed_mpz_fac_ui (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui);\n}\ndouble\nspeed_mpz_fac_ui_small (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui_small);\n}\n\ndouble\nspeed_mpz_fac_ui_large (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui_large);\n}\n\n\ndouble\nspeed_mpn_fib2_ui (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_FIB2_UI (mpn_fib2_ui);\n}\ndouble\nspeed_mpz_fib_ui (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_FIB_UI (mpz_fib_ui);\n}\ndouble\nspeed_mpz_fib2_ui (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_FIB2_UI (mpz_fib2_ui);\n}\ndouble\nspeed_mpz_lucnum_ui (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_LUCNUM_UI (mpz_lucnum_ui);\n}\ndouble\nspeed_mpz_lucnum2_ui (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_LUCNUM2_UI (mpz_lucnum2_ui);\n}\n\n\ndouble\nspeed_mpz_powm (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_POWM (mpz_powm);\n}\ndouble\nspeed_mpz_powm_mod (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_POWM (mpz_powm_mod);\n}\n\ndouble\nspeed_mpz_powm_redc (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_POWM (mpz_powm_redc);\n}\ndouble\nspeed_mpz_powm_ui (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_POWM_UI (mpz_powm_ui);\n}\n\ndouble\nspeed_mpn_binvert (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_BINVERT (mpn_binvert, mpn_binvert_itch);\n}\n\ndouble\nspeed_mpn_redc_1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_REDC_1 (mpn_redc_1);\n}\ndouble\nspeed_mpn_redc_2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_REDC_2 (mpn_redc_2);\n}\ndouble\nspeed_mpn_redc_n (struct speed_params *s)\n{\n  SPEED_ROUTINE_REDC_N (mpn_redc_n);\n}\n\n\ndouble\nspeed_modlimb_invert (struct speed_params *s)\n{\n  SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert);\n}\n\n\ndouble\nspeed_noop (struct speed_params *s)\n{\n  unsigned  i;\n\n  speed_starttime ();\n  i = s->reps;\n  do\n    noop ();\n  while (--i != 0);\n  return speed_endtime ();\n}\n\ndouble\nspeed_noop_wxs (struct speed_params *s)\n{\n  mp_ptr   wp;\n  unsigned i;\n  double   t;\n  TMP_DECL;\n\n  TMP_MARK;\n  wp = TMP_ALLOC_LIMBS (1);\n\n  speed_starttime ();\n  i = s->reps;\n  do\n    noop_wxs (wp, s->xp, s->size);\n  while (--i != 0);\n  t = speed_endtime ();\n\n  TMP_FREE;\n  return t;\n}\n\ndouble\nspeed_noop_wxys (struct speed_params *s)\n{\n  mp_ptr   wp;\n  unsigned i;\n  double   t;\n  TMP_DECL;\n\n  TMP_MARK;\n  wp = TMP_ALLOC_LIMBS (1);\n\n  speed_starttime ();\n  i = s->reps;\n  do\n    noop_wxys (wp, s->xp, s->yp, s->size);\n  while (--i != 0);\n  t = speed_endtime ();\n\n  TMP_FREE;\n  return t;\n}\n\n\n#define SPEED_ROUTINE_ALLOC_FREE(variables, calls)      \\\n  {                                                     \\\n    unsigned  i;                                        \\\n    variables;                                          \\\n                                                        \\\n    speed_starttime ();                                 \\\n    i = s->reps;                                        \\\n    do                                                  \\\n      {                                                 \\\n        calls;                                          \\\n      }                                                 \\\n    while (--i != 0);                                   \\\n    return speed_endtime ();                            \\\n  }\n\n\n/* Compare these to see how much malloc/free costs and then how much\n   __gmp_default_allocate/free and mpz_init/clear add.  mpz_init/clear or\n   mpq_init/clear will be doing a 1 limb allocate, so use that as the size\n   when including them in comparisons.  */\n\ndouble\nspeed_malloc_free (struct speed_params *s)\n{\n  size_t  bytes = s->size * BYTES_PER_MP_LIMB;\n  SPEED_ROUTINE_ALLOC_FREE (void *p,\n                            p = malloc (bytes);\n                            free (p));\n}\n\ndouble\nspeed_malloc_realloc_free (struct speed_params *s)\n{\n  size_t  bytes = s->size * BYTES_PER_MP_LIMB;\n  SPEED_ROUTINE_ALLOC_FREE (void *p,\n                            p = malloc (BYTES_PER_MP_LIMB);\n                            p = realloc (p, bytes);\n                            free (p));\n}\n\ndouble\nspeed_gmp_allocate_free (struct speed_params *s)\n{\n  size_t  bytes = s->size * BYTES_PER_MP_LIMB;\n  SPEED_ROUTINE_ALLOC_FREE (void *p,\n                            p = (*__gmp_allocate_func) (bytes);\n                            (*__gmp_free_func) (p, bytes));\n}\n\ndouble\nspeed_gmp_allocate_reallocate_free (struct speed_params *s)\n{\n  size_t  bytes = s->size * BYTES_PER_MP_LIMB;\n  SPEED_ROUTINE_ALLOC_FREE\n    (void *p,\n     p = (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);\n     p = (*__gmp_reallocate_func) (p, bytes, BYTES_PER_MP_LIMB);\n     (*__gmp_free_func) (p, bytes));\n}\n\ndouble\nspeed_mpz_init_clear (struct speed_params *s)\n{\n  SPEED_ROUTINE_ALLOC_FREE (mpz_t z,\n                            mpz_init (z);\n                            mpz_clear (z));\n}\n\ndouble\nspeed_mpz_init_realloc_clear (struct speed_params *s)\n{\n  SPEED_ROUTINE_ALLOC_FREE (mpz_t z,\n                            mpz_init (z);\n                            _mpz_realloc (z, s->size);\n                            mpz_clear (z));\n}\n\ndouble\nspeed_mpq_init_clear (struct speed_params *s)\n{\n  SPEED_ROUTINE_ALLOC_FREE (mpq_t q,\n                            mpq_init (q);\n                            mpq_clear (q));\n}\n\ndouble\nspeed_mpf_init_clear (struct speed_params *s)\n{\n  SPEED_ROUTINE_ALLOC_FREE (mpf_t f,\n                            mpf_init (f);\n                            mpf_clear (f));\n}\n\n\n/* Compare this to mpn_add_n to see how much overhead mpz_add adds.  Note\n   that repeatedly calling mpz_add with the same data gives branch predition\n   in it an advantage.  */\n\ndouble\nspeed_mpz_add (struct speed_params *s)\n{\n  mpz_t     w, x, y;\n  unsigned  i;\n  double    t;\n\n  mpz_init (w);\n  mpz_init (x);\n  mpz_init (y);\n\n  mpz_set_n (x, s->xp, s->size);\n  mpz_set_n (y, s->yp, s->size);\n  mpz_add (w, x, y);\n\n  speed_starttime ();\n  i = s->reps;\n  do\n    {\n      mpz_add (w, x, y);\n    }\n  while (--i != 0);\n  t = speed_endtime ();\n\n  mpz_clear (w);\n  mpz_clear (x);\n  mpz_clear (y);\n  return t;\n}\n\n\n/* If r==0, calculate (size,size/2),\n   otherwise calculate (size,r). */\n\ndouble\nspeed_mpz_bin_uiui (struct speed_params *s)\n{\n  mpz_t          w;\n  unsigned long  k;\n  unsigned  i;\n  double    t;\n\n  mpz_init (w);\n  if (s->r != 0)\n    k = s->r;\n  else\n    k = s->size/2;\n\n  speed_starttime ();\n  i = s->reps;\n  do\n    {\n      mpz_bin_uiui (w, s->size, k);\n    }\n  while (--i != 0);\n  t = speed_endtime ();\n\n  mpz_clear (w);\n  return t;\n}\n\n\n/* The multiplies are successively dependent so the latency is measured, not\n   the issue rate.  There's only 10 per loop so the code doesn't get too big\n   since umul_ppmm is several instructions on some cpus.\n\n   Putting the arguments as \"h,l,l,h\" gets slightly better code from gcc\n   2.95.2 on x86, it puts only one mov between each mul, not two.  That mov\n   though will probably show up as a bogus extra cycle though.\n\n   The measuring function macros are into three parts to avoid overflowing\n   preprocessor expansion space if umul_ppmm is big.\n\n   Limitations:\n\n   Don't blindly use this to set UMUL_TIME in gmp-mparam.h, check the code\n   generated first, especially on CPUs with low latency multipliers.\n\n   The default umul_ppmm doing h*l will be getting increasing numbers of\n   high zero bits in the calculation.  CPUs with data-dependent multipliers\n   will want to use umul_ppmm.1 to get some randomization into the\n   calculation.  The extra xors and fetches will be a slowdown of course.  */\n\n#define SPEED_MACRO_UMUL_PPMM_A \\\n  {                             \\\n    mp_limb_t  h, l;            \\\n    unsigned   i;               \\\n    double     t;               \\\n                                \\\n    s->time_divisor = 10;       \\\n                                \\\n    h = s->xp[0];               \\\n    l = s->yp[0];               \\\n                                \\\n    if (s->r == 1)              \\\n      {                         \\\n        speed_starttime ();     \\\n        i = s->reps;            \\\n        do                      \\\n          {\n\n#define SPEED_MACRO_UMUL_PPMM_B \\\n          }                     \\\n        while (--i != 0);       \\\n        t = speed_endtime ();   \\\n      }                         \\\n    else                        \\\n      {                         \\\n        speed_starttime ();     \\\n        i = s->reps;            \\\n        do                      \\\n          {\n\n#define SPEED_MACRO_UMUL_PPMM_C                                         \\\n          }                                                             \\\n        while (--i != 0);                                               \\\n        t = speed_endtime ();                                           \\\n      }                                                                 \\\n                                                                        \\\n    /* stop the compiler optimizing away the whole calculation! */      \\\n    noop_1 (h);                                                         \\\n    noop_1 (l);                                                         \\\n                                                                        \\\n    return t;                                                           \\\n  }\n\n\ndouble\nspeed_umul_ppmm (struct speed_params *s)\n{\n  SPEED_MACRO_UMUL_PPMM_A;\n  {\n    umul_ppmm (h, l, l, h);  h ^= s->xp_block[0]; l ^= s->yp_block[0];\n     umul_ppmm (h, l, l, h); h ^= s->xp_block[1]; l ^= s->yp_block[1];\n     umul_ppmm (h, l, l, h); h ^= s->xp_block[2]; l ^= s->yp_block[2];\n    umul_ppmm (h, l, l, h);  h ^= s->xp_block[3]; l ^= s->yp_block[3];\n     umul_ppmm (h, l, l, h); h ^= s->xp_block[4]; l ^= s->yp_block[4];\n     umul_ppmm (h, l, l, h); h ^= s->xp_block[5]; l ^= s->yp_block[5];\n    umul_ppmm (h, l, l, h);  h ^= s->xp_block[6]; l ^= s->yp_block[6];\n     umul_ppmm (h, l, l, h); h ^= s->xp_block[7]; l ^= s->yp_block[7];\n     umul_ppmm (h, l, l, h); h ^= s->xp_block[8]; l ^= s->yp_block[8];\n    umul_ppmm (h, l, l, h);  h ^= s->xp_block[9]; l ^= s->yp_block[9];\n  }\n  SPEED_MACRO_UMUL_PPMM_B;\n  {\n    umul_ppmm (h, l, l, h);\n     umul_ppmm (h, l, l, h);\n     umul_ppmm (h, l, l, h);\n    umul_ppmm (h, l, l, h);\n     umul_ppmm (h, l, l, h);\n     umul_ppmm (h, l, l, h);\n    umul_ppmm (h, l, l, h);\n     umul_ppmm (h, l, l, h);\n     umul_ppmm (h, l, l, h);\n    umul_ppmm (h, l, l, h);\n  }\n  SPEED_MACRO_UMUL_PPMM_C;\n}\n\n\n#if HAVE_NATIVE_mpn_umul_ppmm\ndouble\nspeed_mpn_umul_ppmm (struct speed_params *s)\n{\n  SPEED_MACRO_UMUL_PPMM_A;\n  {\n    h = mpn_umul_ppmm (&l, h, l);  h ^= s->xp_block[0]; l ^= s->yp_block[0];\n     h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[1]; l ^= s->yp_block[1];\n     h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[2]; l ^= s->yp_block[2];\n    h = mpn_umul_ppmm (&l, h, l);  h ^= s->xp_block[3]; l ^= s->yp_block[3];\n     h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[4]; l ^= s->yp_block[4];\n     h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[5]; l ^= s->yp_block[5];\n    h = mpn_umul_ppmm (&l, h, l);  h ^= s->xp_block[6]; l ^= s->yp_block[6];\n     h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[7]; l ^= s->yp_block[7];\n     h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[8]; l ^= s->yp_block[8];\n    h = mpn_umul_ppmm (&l, h, l);  h ^= s->xp_block[9]; l ^= s->yp_block[9];\n  }\n  SPEED_MACRO_UMUL_PPMM_B;\n  {\n    h = mpn_umul_ppmm (&l, h, l);\n     h = mpn_umul_ppmm (&l, h, l);\n     h = mpn_umul_ppmm (&l, h, l);\n    h = mpn_umul_ppmm (&l, h, l);\n     h = mpn_umul_ppmm (&l, h, l);\n     h = mpn_umul_ppmm (&l, h, l);\n    h = mpn_umul_ppmm (&l, h, l);\n     h = mpn_umul_ppmm (&l, h, l);\n     h = mpn_umul_ppmm (&l, h, l);\n    h = mpn_umul_ppmm (&l, h, l);\n  }\n  SPEED_MACRO_UMUL_PPMM_C;\n}\n#endif\n\n#if HAVE_NATIVE_mpn_umul_ppmm_r\ndouble\nspeed_mpn_umul_ppmm_r (struct speed_params *s)\n{\n  SPEED_MACRO_UMUL_PPMM_A;\n  {\n    h = mpn_umul_ppmm_r (h, l, &l);  h ^= s->xp_block[0]; l ^= s->yp_block[0];\n     h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[1]; l ^= s->yp_block[1];\n     h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[2]; l ^= s->yp_block[2];\n    h = mpn_umul_ppmm_r (h, l, &l);  h ^= s->xp_block[3]; l ^= s->yp_block[3];\n     h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[4]; l ^= s->yp_block[4];\n     h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[5]; l ^= s->yp_block[5];\n    h = mpn_umul_ppmm_r (h, l, &l);  h ^= s->xp_block[6]; l ^= s->yp_block[6];\n     h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[7]; l ^= s->yp_block[7];\n     h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[8]; l ^= s->yp_block[8];\n    h = mpn_umul_ppmm_r (h, l, &l);  h ^= s->xp_block[9]; l ^= s->yp_block[9];\n  }\n  SPEED_MACRO_UMUL_PPMM_B;\n  {\n    h = mpn_umul_ppmm_r (h, l, &l);\n     h = mpn_umul_ppmm_r (h, l, &l);\n     h = mpn_umul_ppmm_r (h, l, &l);\n    h = mpn_umul_ppmm_r (h, l, &l);\n     h = mpn_umul_ppmm_r (h, l, &l);\n     h = mpn_umul_ppmm_r (h, l, &l);\n    h = mpn_umul_ppmm_r (h, l, &l);\n     h = mpn_umul_ppmm_r (h, l, &l);\n     h = mpn_umul_ppmm_r (h, l, &l);\n    h = mpn_umul_ppmm_r (h, l, &l);\n  }\n  SPEED_MACRO_UMUL_PPMM_C;\n}\n#endif\n\n\n/* The divisions are successively dependent so latency is measured, not\n   issue rate.  There's only 10 per loop so the code doesn't get too big,\n   especially for udiv_qrnnd_preinv and preinv2norm, which are several\n   instructions each.\n\n   Note that it's only the division which is measured here, there's no data\n   fetching and no shifting if the divisor gets normalized.\n\n   In speed_udiv_qrnnd with gcc 2.95.2 on x86 the parameters \"q,r,r,q,d\"\n   generate x86 div instructions with nothing in between.\n\n   The measuring function macros are in two parts to avoid overflowing\n   preprocessor expansion space if udiv_qrnnd etc are big.\n\n   Limitations:\n\n   Don't blindly use this to set UDIV_TIME in gmp-mparam.h, check the code\n   generated first.\n\n   CPUs with data-dependent divisions may want more attention paid to the\n   randomness of the data used.  Probably the measurement wanted is over\n   uniformly distributed numbers, but what's here might not be giving that.  */\n\n#define SPEED_ROUTINE_UDIV_QRNND_A(normalize)           \\\n  {                                                     \\\n    double     t;                                       \\\n    unsigned   i;                                       \\\n    mp_limb_t  q, r, d;                                 \\\n    mp_limb_t  dinv;                                    \\\n                                                        \\\n    s->time_divisor = 10;                               \\\n                                                        \\\n    /* divisor from \"r\" parameter, or a default */      \\\n    d = s->r;                                           \\\n    if (d == 0)                                         \\\n      d = __mp_bases[10].big_base;                      \\\n                                                        \\\n    if (normalize)                                      \\\n      {                                                 \\\n        unsigned  norm;                                 \\\n        count_leading_zeros (norm, d);                  \\\n        d <<= norm;                                     \\\n        invert_limb (dinv, d);                          \\\n      }                                                 \\\n                                                        \\\n    q = s->xp[0];                                       \\\n    r = s->yp[0] % d;                                   \\\n                                                        \\\n    speed_starttime ();                                 \\\n    i = s->reps;                                        \\\n    do                                                  \\\n      {\n\n#define SPEED_ROUTINE_UDIV_QRNND_B                                      \\\n      }                                                                 \\\n    while (--i != 0);                                                   \\\n    t = speed_endtime ();                                               \\\n                                                                        \\\n    /* stop the compiler optimizing away the whole calculation! */      \\\n    noop_1 (q);                                                         \\\n    noop_1 (r);                                                         \\\n                                                                        \\\n    return t;                                                           \\\n  }\n\ndouble\nspeed_udiv_qrnnd (struct speed_params *s)\n{\n  SPEED_ROUTINE_UDIV_QRNND_A (UDIV_NEEDS_NORMALIZATION);\n  {\n    udiv_qrnnd (q, r, r, q, d);\n     udiv_qrnnd (q, r, r, q, d);\n     udiv_qrnnd (q, r, r, q, d);\n    udiv_qrnnd (q, r, r, q, d);\n     udiv_qrnnd (q, r, r, q, d);\n     udiv_qrnnd (q, r, r, q, d);\n    udiv_qrnnd (q, r, r, q, d);\n     udiv_qrnnd (q, r, r, q, d);\n     udiv_qrnnd (q, r, r, q, d);\n    udiv_qrnnd (q, r, r, q, d);\n  }\n  SPEED_ROUTINE_UDIV_QRNND_B;\n}\n\ndouble\nspeed_udiv_qrnnd_preinv1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_UDIV_QRNND_A (1);\n  {\n    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);\n     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);\n     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);\n    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);\n     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);\n     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);\n    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);\n     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);\n     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);\n    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);\n  }\n  SPEED_ROUTINE_UDIV_QRNND_B;\n}\n\ndouble\nspeed_udiv_qrnnd_preinv2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_UDIV_QRNND_A (1);\n  {\n    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);\n     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);\n     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);\n    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);\n     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);\n     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);\n    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);\n     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);\n     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);\n    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);\n  }\n  SPEED_ROUTINE_UDIV_QRNND_B;\n}\n\ndouble\nspeed_udiv_qrnnd_c (struct speed_params *s)\n{\n  SPEED_ROUTINE_UDIV_QRNND_A (1);\n  {\n    __udiv_qrnnd_c (q, r, r, q, d);\n     __udiv_qrnnd_c (q, r, r, q, d);\n     __udiv_qrnnd_c (q, r, r, q, d);\n    __udiv_qrnnd_c (q, r, r, q, d);\n     __udiv_qrnnd_c (q, r, r, q, d);\n     __udiv_qrnnd_c (q, r, r, q, d);\n    __udiv_qrnnd_c (q, r, r, q, d);\n     __udiv_qrnnd_c (q, r, r, q, d);\n     __udiv_qrnnd_c (q, r, r, q, d);\n    __udiv_qrnnd_c (q, r, r, q, d);\n  }\n  SPEED_ROUTINE_UDIV_QRNND_B;\n}\n\n#if HAVE_NATIVE_mpn_udiv_qrnnd\ndouble\nspeed_mpn_udiv_qrnnd (struct speed_params *s)\n{\n  SPEED_ROUTINE_UDIV_QRNND_A (1);\n  {\n    q = mpn_udiv_qrnnd (&r, r, q, d);\n     q = mpn_udiv_qrnnd (&r, r, q, d);\n     q = mpn_udiv_qrnnd (&r, r, q, d);\n    q = mpn_udiv_qrnnd (&r, r, q, d);\n     q = mpn_udiv_qrnnd (&r, r, q, d);\n     q = mpn_udiv_qrnnd (&r, r, q, d);\n    q = mpn_udiv_qrnnd (&r, r, q, d);\n     q = mpn_udiv_qrnnd (&r, r, q, d);\n     q = mpn_udiv_qrnnd (&r, r, q, d);\n    q = mpn_udiv_qrnnd (&r, r, q, d);\n  }\n  SPEED_ROUTINE_UDIV_QRNND_B;\n}\n#endif\n\n#if HAVE_NATIVE_mpn_udiv_qrnnd_r\ndouble\nspeed_mpn_udiv_qrnnd_r (struct speed_params *s)\n{\n  SPEED_ROUTINE_UDIV_QRNND_A (1);\n  {\n    q = mpn_udiv_qrnnd_r (r, q, d, &r);\n     q = mpn_udiv_qrnnd_r (r, q, d, &r);\n     q = mpn_udiv_qrnnd_r (r, q, d, &r);\n    q = mpn_udiv_qrnnd_r (r, q, d, &r);\n     q = mpn_udiv_qrnnd_r (r, q, d, &r);\n     q = mpn_udiv_qrnnd_r (r, q, d, &r);\n    q = mpn_udiv_qrnnd_r (r, q, d, &r);\n     q = mpn_udiv_qrnnd_r (r, q, d, &r);\n     q = mpn_udiv_qrnnd_r (r, q, d, &r);\n    q = mpn_udiv_qrnnd_r (r, q, d, &r);\n  }\n  SPEED_ROUTINE_UDIV_QRNND_B;\n}\n#endif\n\n\ndouble\nspeed_invert_limb (struct speed_params *s)\n{\n  SPEED_ROUTINE_INVERT_LIMB_CALL (invert_limb (dinv, d));\n}\n\n\n/* xp[0] might not be particularly random, but should give an indication how\n   \"/\" runs.  Same for speed_operator_mod below.  */\ndouble\nspeed_operator_div (struct speed_params *s)\n{\n  double     t;\n  unsigned   i;\n  mp_limb_t  x, q, d;\n\n  s->time_divisor = 10;\n\n  /* divisor from \"r\" parameter, or a default */\n  d = s->r;\n  if (d == 0)\n    d = __mp_bases[10].big_base;\n\n  x = s->xp[0];\n  q = 0;\n\n  speed_starttime ();\n  i = s->reps;\n  do\n    {\n      q ^= x; q /= d;\n       q ^= x; q /= d;\n       q ^= x; q /= d;\n      q ^= x; q /= d;\n       q ^= x; q /= d;\n       q ^= x; q /= d;\n      q ^= x; q /= d;\n       q ^= x; q /= d;\n       q ^= x; q /= d;\n      q ^= x; q /= d;\n    }\n  while (--i != 0);\n  t = speed_endtime ();\n\n  /* stop the compiler optimizing away the whole calculation! */\n  noop_1 (q);\n\n  return t;\n}\n\ndouble\nspeed_operator_mod (struct speed_params *s)\n{\n  double     t;\n  unsigned   i;\n  mp_limb_t  x, r, d;\n\n  s->time_divisor = 10;\n\n  /* divisor from \"r\" parameter, or a default */\n  d = s->r;\n  if (d == 0)\n    d = __mp_bases[10].big_base;\n\n  x = s->xp[0];\n  r = 0;\n\n  speed_starttime ();\n  i = s->reps;\n  do\n    {\n      r ^= x; r %= d;\n       r ^= x; r %= d;\n       r ^= x; r %= d;\n      r ^= x; r %= d;\n       r ^= x; r %= d;\n       r ^= x; r %= d;\n      r ^= x; r %= d;\n       r ^= x; r %= d;\n       r ^= x; r %= d;\n      r ^= x; r %= d;\n    }\n  while (--i != 0);\n  t = speed_endtime ();\n\n  /* stop the compiler optimizing away the whole calculation! */\n  noop_1 (r);\n\n  return t;\n}\n\n\n/* r==0 measures on data with the values uniformly distributed.  This will\n   be typical for count_trailing_zeros in a GCD etc.\n\n   r==1 measures on data with the resultant count uniformly distributed\n   between 0 and BITS_PER_MP_LIMB-1.  This is probably sensible for\n   count_leading_zeros on the high limbs of divisors.  */\n\nint\nspeed_routine_count_zeros_setup (struct speed_params *s,\n                                 mp_ptr xp, int leading, int zero)\n{\n  int        i, c;\n  mp_limb_t  n;\n\n  if (s->r == 0)\n    {\n      /* Make uniformly distributed data.  If zero isn't allowed then change\n         it to 1 for leading, or 0x800..00 for trailing.  */\n      MPN_COPY (xp, s->xp_block, SPEED_BLOCK_SIZE);\n      if (! zero)\n        for (i = 0; i < SPEED_BLOCK_SIZE; i++)\n          if (xp[i] == 0)\n            xp[i] = leading ? 1 : GMP_LIMB_HIGHBIT;\n    }\n  else if (s->r == 1)\n    {\n      /* Make counts uniformly distributed.  A randomly chosen bit is set, and\n         for leading the rest above it are cleared, or for trailing then the\n         rest below.  */\n      for (i = 0; i < SPEED_BLOCK_SIZE; i++)\n        {\n          mp_limb_t  set = CNST_LIMB(1) << (s->yp_block[i] % BITS_PER_MP_LIMB);\n          mp_limb_t  keep_below = set-1;\n          mp_limb_t  keep_above = MP_LIMB_T_MAX ^ keep_below;\n          mp_limb_t  keep = (leading ? keep_below : keep_above);\n          xp[i] = (s->xp_block[i] & keep) | set;\n        }\n    }\n  else\n    {\n      return 0;\n    }\n\n  /* Account for the effect of n^=c. */\n  c = 0;\n  for (i = 0; i < SPEED_BLOCK_SIZE; i++)\n    {\n      n = xp[i];\n      xp[i] ^= c;\n\n      if (leading)\n        count_leading_zeros (c, n);\n      else\n        count_trailing_zeros (c, n);\n    }\n\n  return 1;\n}\n\ndouble\nspeed_count_leading_zeros (struct speed_params *s)\n{\n#ifdef COUNT_LEADING_ZEROS_0\n#define COUNT_LEADING_ZEROS_0_ALLOWED   1\n#else\n#define COUNT_LEADING_ZEROS_0_ALLOWED   0\n#endif\n\n  SPEED_ROUTINE_COUNT_ZEROS_A (1, COUNT_LEADING_ZEROS_0_ALLOWED);\n  count_leading_zeros (c, n);\n  SPEED_ROUTINE_COUNT_ZEROS_B ();\n}\ndouble\nspeed_count_trailing_zeros (struct speed_params *s)\n{\n  SPEED_ROUTINE_COUNT_ZEROS_A (0, 0);\n  count_trailing_zeros (c, n);\n  SPEED_ROUTINE_COUNT_ZEROS_B ();\n}\n\n\ndouble\nspeed_mpn_get_str (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_GET_STR (mpn_get_str);\n}\n\ndouble\nspeed_mpn_set_str (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SET_STR (mpn_set_str);\n}\ndouble\nspeed_mpn_set_str_basecase (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SET_STR (mpn_set_str_basecase);\n}\ndouble\nspeed_mpn_set_str_subquad (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_SET_STR (mpn_set_str_subquad);\n}\n\n\ndouble\nspeed_MPN_ZERO (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_ZERO_CALL (MPN_ZERO (wp, s->size));\n}\n\ndouble\nspeed_mpn_store (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_ZERO_CALL (mpn_store (wp, s->size,7654));\n}\n\nint\nspeed_randinit (struct speed_params *s, gmp_randstate_ptr rstate)\n{\n  if (s->r == 0)\n    gmp_randinit_default (rstate);\n  else if (s->r == 1)\n    gmp_randinit_mt (rstate);\n  else\n    {\n      return gmp_randinit_lc_2exp_size (rstate, s->r);\n    }\n  return 1;\n}\n\ndouble\nspeed_gmp_randseed (struct speed_params *s)\n{\n  gmp_randstate_t  rstate;\n  unsigned  i;\n  double    t;\n  mpz_t     x;\n\n  SPEED_RESTRICT_COND (s->size >= 1);\n  SPEED_RESTRICT_COND (speed_randinit (s, rstate));\n\n  /* s->size bits of seed */\n  mpz_init_set_n (x, s->xp, s->size);\n  mpz_fdiv_r_2exp (x, x, (unsigned long) s->size);\n\n  /* cache priming */\n  gmp_randseed (rstate, x);\n\n  speed_starttime ();\n  i = s->reps;\n  do\n    gmp_randseed (rstate, x);\n  while (--i != 0);\n  t = speed_endtime ();\n\n  gmp_randclear (rstate);\n  mpz_clear (x);\n  return t;\n}\n\ndouble\nspeed_gmp_randseed_ui (struct speed_params *s)\n{\n  gmp_randstate_t  rstate;\n  unsigned  i, j;\n  double    t;\n\n  SPEED_RESTRICT_COND (speed_randinit (s, rstate));\n\n  /* cache priming */\n  gmp_randseed_ui (rstate, 123L);\n\n  speed_starttime ();\n  i = s->reps;\n  j = 0;\n  do\n    {\n      gmp_randseed_ui (rstate, (unsigned long) s->xp_block[j]);\n      j++;\n      if (j >= SPEED_BLOCK_SIZE)\n        j = 0;\n    }\n  while (--i != 0);\n  t = speed_endtime ();\n\n  gmp_randclear (rstate);\n  return t;\n}\n\ndouble\nspeed_mpz_urandomb (struct speed_params *s)\n{\n  gmp_randstate_t  rstate;\n  mpz_t     z;\n  unsigned  i;\n  double    t;\n\n  SPEED_RESTRICT_COND (s->size >= 0);\n  SPEED_RESTRICT_COND (speed_randinit (s, rstate));\n\n  mpz_init (z);\n\n  /* cache priming */\n  mpz_urandomb (z, rstate, (unsigned long) s->size);\n  mpz_urandomb (z, rstate, (unsigned long) s->size);\n\n  speed_starttime ();\n  i = s->reps;\n  do\n    mpz_urandomb (z, rstate, (unsigned long) s->size);\n  while (--i != 0);\n  t = speed_endtime ();\n\n  mpz_clear (z);\n  gmp_randclear (rstate);\n  return t;\n}\n"
  },
  {
    "path": "tune/divrem1div.c",
    "content": "/* mpn/generic/divrem_1.c forced to use plain udiv_qrnnd.\n\nCopyright 2000, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define OPERATION_divrem_1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef DIVREM_1_NORM_THRESHOLD\n#undef DIVREM_1_UNNORM_THRESHOLD\n#define DIVREM_1_NORM_THRESHOLD    MP_SIZE_T_MAX\n#define DIVREM_1_UNNORM_THRESHOLD  MP_SIZE_T_MAX\n#define __gmpn_divrem_1  mpn_divrem_1_div\n\n#include \"mpn/generic/divrem_1.c\"\n"
  },
  {
    "path": "tune/divrem1inv.c",
    "content": "/* mpn/generic/divrem_1.c forced to use mul-by-inverse udiv_qrnnd_preinv.\n\nCopyright 2000, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define OPERATION_divrem_1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef DIVREM_1_NORM_THRESHOLD\n#undef DIVREM_1_UNNORM_THRESHOLD\n#define DIVREM_1_NORM_THRESHOLD    0\n#define DIVREM_1_UNNORM_THRESHOLD  0\n#define __gmpn_divrem_1  mpn_divrem_1_inv\n\n#include \"mpn/generic/divrem_1.c\"\n"
  },
  {
    "path": "tune/divrem2div.c",
    "content": "/* mpn/generic/divrem_2.c forced to use plain udiv_qrnnd. */\n\n/*\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#ifdef DIVREM_2_THRESHOLD\n#undef DIVREM_2_THRESHOLD\n#endif\n#define DIVREM_2_THRESHOLD  MP_SIZE_T_MAX\n#define __gmpn_divrem_2     mpn_divrem_2_div\n\n#include \"mpn/generic/divrem_2.c\"\n"
  },
  {
    "path": "tune/divrem2inv.c",
    "content": "/* mpn/generic/divrem_2.c forced to use udiv_qrnnd_preinv. */\n\n/*\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#ifdef DIVREM_2_THRESHOLD\n#undef DIVREM_2_THRESHOLD\n#endif\n#define DIVREM_2_THRESHOLD  0\n#define __gmpn_divrem_2     mpn_divrem_2_inv\n\n#include \"mpn/generic/divrem_2.c\"\n"
  },
  {
    "path": "tune/fac_ui_large.c",
    "content": "/* mpz/fac_ui.c forced to use large algorithm . */\n\n/*\nCopyright 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef FAC_UI_THRESHOLD\n#define FAC_UI_THRESHOLD  1\n#define __gmpz_fac_ui  mpz_fac_ui_large\n\n#include \"../mpz/fac_ui.c\"\n"
  },
  {
    "path": "tune/fac_ui_small.c",
    "content": "/* mpz/fac_ui.c forced to use small algorithm */\n\n/*\nCopyright 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef FAC_UI_THRESHOLD\n#define FAC_UI_THRESHOLD    MP_SIZE_T_MAX\n#define __gmpz_fac_ui  mpz_fac_ui_small\n\n#include \"../mpz/fac_ui.c\"\n"
  },
  {
    "path": "tune/freq.c",
    "content": "/* CPU frequency determination.\n\nCopyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n/* Currently we don't get a CPU frequency on the following systems,\n\n   m68040-unknown-netbsd1.4.1\n       Not sure if the system even knows the cpu frequency.  There's no\n       cycle counter to measure, though we could perhaps make a loop taking\n       a known number of cycles and measure that.\n\n   power-ibm-aix4.2.1.0\n   power2-ibm-aix4.3.1.0\n   powerpc604-ibm-aix4.3.1.0\n   powerpc604-ibm-aix4.3.3.0\n   powerpc630-ibm-aix4.3.3.0\n   powerpc-unknown-netbsd1.6\n       Don't know where any info hides on these.  mftb is not related to the\n       cpu frequency so doesn't help.\n\n   sparc-unknown-linux-gnu [maybe]\n       Don't know where any info hides on this.\n\n*/\n\n#include \"config.h\"\n\n#include <stdio.h>\n#include <stdlib.h> /* for getenv, qsort */\n#include <string.h> /* for memcmp */\n\n#if HAVE_UNISTD_H\n#include <unistd.h> /* for sysconf */\n#endif\n\n#include <sys/types.h>\n\n#if HAVE_SYS_PARAM_H     /* for constants needed by NetBSD <sys/sysctl.h> */\n#include <sys/param.h>   /* and needed by HPUX <sys/pstat.h> */\n#endif\n\n#if HAVE_SYS_PSTAT_H\n#include <sys/pstat.h>   /* for HPUX pstat_getprocessor() */\n#endif\n\n#if HAVE_SYS_SYSCTL_H\n#include <sys/sysctl.h>  /* for sysctlbyname() */\n#endif\n\n#if TIME_WITH_SYS_TIME\n# include <sys/time.h>  /* for struct timeval */\n# include <time.h>\n#else\n# if HAVE_SYS_TIME_H\n#  include <sys/time.h>\n# else\n#  include <time.h>\n# endif\n#endif\n\n#if HAVE_SYS_RESOURCE_H\n#include <sys/resource.h>  /* for struct rusage */\n#endif\n\n#if HAVE_SYS_PROCESSOR_H\n#include <sys/processor.h>  /* for solaris processor_info_t */\n#endif\n\n/* On AIX 5.1 with gcc 2.9-aix51-020209 in -maix64 mode, <sys/sysinfo.h>\n   gets an error about \"fill\" in \"struct cpuinfo\" having a negative size,\n   apparently due to __64BIT_KERNEL not being defined because _KERNEL is not\n   defined.  Avoid this file if we don't actually need it, which we don't on\n   AIX since there's no getsysinfo there.  */\n#if HAVE_SYS_SYSINFO_H && HAVE_GETSYSINFO\n#include <sys/sysinfo.h>  /* for OSF getsysinfo */\n#endif\n\n/* Remove definitions from NetBSD <sys/param.h>, to avoid conflicts with\n   gmp-impl.h. */\n#ifdef MIN\n#undef MIN\n#endif\n#ifdef MAX\n#undef MAX\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if defined( _MSC_VER)\n#define HAVE_GETRUSAGE      1\n#define HAVE_GETTIMEOFDAY   1\n#include \"getrusage.h\"\n#include \"gettimeofday.h\"\n#endif\n\n#include \"speed.h\"\n\n\n#define HELP(str)                       \\\n  if (help)                             \\\n    {                                   \\\n      printf (\"    - %s\\n\", str);       \\\n      return 0;                         \\\n    }\n\n\n/* GMP_CPU_FREQUENCY environment variable.  Should be in Hertz and can be\n   floating point, for example \"450e6\". */\nstatic int\nfreq_environment (int help)\n{\n  char  *e;\n\n  HELP (\"environment variable GMP_CPU_FREQUENCY (in Hertz)\");\n\n  e = getenv (\"GMP_CPU_FREQUENCY\");\n  if (e == NULL)\n    return 0;\n\n  speed_cycletime = 1.0 / atof (e);\n\n  if (speed_option_verbose)\n    printf (\"Using GMP_CPU_FREQUENCY %.2f for cycle time %.3g\\n\",\n            atof (e), speed_cycletime);\n\n  return 1;\n}\n\n\n/* getsysinfo is available on OSF, or 4.0 and up at least.\n   The man page (on 4.0) suggests a 0 return indicates information not\n   available, but that seems to be the normal return for GSI_CPU_INFO.  */\nstatic int\nfreq_getsysinfo (int help)\n{\n#if HAVE_GETSYSINFO\n  struct cpu_info  c;\n  int              start;\n\n  HELP (\"getsysinfo() GSI_CPU_INFO\");\n\n  start = 0;\n  if (getsysinfo (GSI_CPU_INFO, (caddr_t) &c, sizeof (c),\n                  &start, NULL, NULL) != -1)\n    {\n      speed_cycletime = 1e-6 / (double) c.mhz;\n      if (speed_option_verbose)\n        printf (\"Using getsysinfo() GSI_CPU_INFO %u for cycle time %.3g\\n\",\n                c.mhz, speed_cycletime);\n      return 1;\n    }\n#endif\n  return 0;\n}\n\n\n/* In HPUX 10 and up, pstat_getprocessor() psp_iticksperclktick is the\n   number of CPU cycles (ie. the CR16 register) per CLK_TCK.  HPUX 9 doesn't\n   have that field in pst_processor though, and has no apparent\n   equivalent.  */\n\nstatic int\nfreq_pstat_getprocessor (int help)\n{\n#if HAVE_PSTAT_GETPROCESSOR && HAVE_PSP_ITICKSPERCLKTICK\n  struct pst_processor  p;\n\n  HELP (\"pstat_getprocessor() psp_iticksperclktick\");\n\n  if (pstat_getprocessor (&p, sizeof(p), 1, 0) != -1)\n    {\n      long  c = clk_tck();\n      speed_cycletime = 1.0 / (c * p.psp_iticksperclktick);\n      if (speed_option_verbose)\n        printf (\"Using pstat_getprocessor() psp_iticksperclktick %lu and clk_tck %ld for cycle time %.3g\\n\",\n                (unsigned long) p.psp_iticksperclktick, c,\n                speed_cycletime);\n      return 1;\n    }\n#endif\n  return 0;\n}\n\n\n/* i386 FreeBSD 2.2.8 sysctlbyname machdep.i586_freq is in Hertz.\n   There's no obvious defines available to get this from plain sysctl.  */\nstatic int\nfreq_sysctlbyname_i586_freq (int help)\n{\n#if HAVE_SYSCTLBYNAME\n  unsigned  val;\n  size_t    size;\n\n  HELP (\"sysctlbyname() machdep.i586_freq\");\n\n  size = sizeof(val);\n  if (sysctlbyname (\"machdep.i586_freq\", &val, &size, NULL, 0) == 0\n      && size == sizeof(val))\n    {\n      speed_cycletime = 1.0 / (double) val;\n      if (speed_option_verbose)\n        printf (\"Using sysctlbyname() machdep.i586_freq %u for cycle time %.3g\\n\",\n                val, speed_cycletime);\n      return 1;\n    }\n#endif\n  return 0;\n}\n\n\n/* i368 FreeBSD 3.3 sysctlbyname machdep.tsc_freq is in Hertz.\n   There's no obvious defines to get this from plain sysctl.  */\n\nstatic int\nfreq_sysctlbyname_tsc_freq (int help)\n{\n#if HAVE_SYSCTLBYNAME\n  unsigned  val;\n  size_t    size;\n\n  HELP (\"sysctlbyname() machdep.tsc_freq\");\n\n  size = sizeof(val);\n  if (sysctlbyname (\"machdep.tsc_freq\", &val, &size, NULL, 0) == 0\n      && size == sizeof(val))\n    {\n      speed_cycletime = 1.0 / (double) val;\n      if (speed_option_verbose)\n        printf (\"Using sysctlbyname() machdep.tsc_freq %u for cycle time %.3g\\n\",\n                val, speed_cycletime);\n      return 1;\n    }\n#endif\n  return 0;\n}\n\n\n/* Apple powerpc Darwin 1.3 sysctl hw.cpufrequency is in hertz.  For some\n   reason only seems to be available from sysctl(), not sysctlbyname().  */\n\nstatic int\nfreq_sysctl_hw_cpufrequency (int help)\n{\n#if HAVE_SYSCTL && defined (CTL_HW) && defined (HW_CPU_FREQ)\n  int       mib[2];\n  unsigned  val;\n  size_t    size;\n\n  HELP (\"sysctl() hw.cpufrequency\");\n\n  mib[0] = CTL_HW;\n  mib[1] = HW_CPU_FREQ;\n  size = sizeof(val);\n  if (sysctl (mib, 2, &val, &size, NULL, 0) == 0)\n    {\n      speed_cycletime = 1.0 / (double) val;\n      if (speed_option_verbose)\n        printf (\"Using sysctl() hw.cpufrequency %u for cycle time %.3g\\n\",\n                val, speed_cycletime);\n      return 1;\n    }\n#endif\n  return 0;\n}\n\n\n/* The following ssyctl hw.model strings have been observed,\n\n       Alpha FreeBSD 4.1:   Digital AlphaPC 164LX 599 MHz\n       NetBSD 1.4:          Digital AlphaPC 164LX 599 MHz\n       NetBSD 1.6.1:        CY7C601 @ 40 MHz, TMS390C602A FPU\n\n   NetBSD 1.4 doesn't seem to have sysctlbyname, so sysctl() is used.  */\n\nstatic int\nfreq_sysctl_hw_model (int help)\n{\n#if HAVE_SYSCTL && defined (CTL_HW) && defined (HW_MODEL)\n  int       mib[2];\n  char      str[128];\n  unsigned  val;\n  size_t    size;\n  char      *p;\n  int       end;\n\n  HELP (\"sysctl() hw.model\");\n\n  mib[0] = CTL_HW;\n  mib[1] = HW_MODEL;\n  size = sizeof(str);\n  if (sysctl (mib, 2, str, &size, NULL, 0) == 0)\n    {\n      for (p = str; *p != '\\0'; p++)\n        {\n          end = 0;\n          if (sscanf (p, \"%u MHz%n\", &val, &end) == 1 && end != 0)\n            {\n              speed_cycletime = 1e-6 / (double) val;\n              if (speed_option_verbose)\n                printf (\"Using sysctl() hw.model %u for cycle time %.3g\\n\",\n                        val, speed_cycletime);\n              return 1;\n            }\n        }\n    }\n#endif\n  return 0;\n}\n\n\n/* /proc/cpuinfo for linux kernel.\n\n   Linux doesn't seem to have any system call to get the CPU frequency, at\n   least not in 2.0.x or 2.2.x, so it's necessary to read /proc/cpuinfo.\n\n   i386 2.0.36 - \"bogomips\" is the CPU frequency.\n\n   i386 2.2.13 - has both \"cpu MHz\" and \"bogomips\", and it's \"cpu MHz\" which\n                 is the frequency.\n\n   alpha 2.2.5 - \"cycle frequency [Hz]\" seems to be right, \"BogoMIPS\" is\n                 very slightly different.\n\n   alpha 2.2.18pre21 - \"cycle frequency [Hz]\" is 0 on at least one system,\n                 \"BogoMIPS\" seems near enough.\n\n   powerpc 2.2.19 - \"clock\" is the frequency, bogomips is something weird\n  */\n\nstatic int\nfreq_proc_cpuinfo (int help)\n{\n  FILE    *fp;\n  char    buf[128];\n  double  val;\n  int     ret = 0;\n  int     end;\n\n  HELP (\"linux kernel /proc/cpuinfo file, cpu MHz or bogomips\");\n\n  if ((fp = fopen (\"/proc/cpuinfo\", \"r\")) != NULL)\n    {\n      while (fgets (buf, sizeof (buf), fp) != NULL)\n        {\n          if (sscanf (buf, \"cycle frequency [Hz]    : %lf\", &val) == 1\n              && val != 0.0)\n            {\n              speed_cycletime = 1.0 / val;\n              if (speed_option_verbose)\n                printf (\"Using /proc/cpuinfo \\\"cycle frequency\\\" %.2f for cycle time %.3g\\n\", val, speed_cycletime);\n              ret = 1;\n              break;\n            }\n          if (sscanf (buf, \"cpu MHz : %lf\\n\", &val) == 1)\n            {\n              speed_cycletime = 1e-6 / val;\n              if (speed_option_verbose)\n                printf (\"Using /proc/cpuinfo \\\"cpu MHz\\\" %.2f for cycle time %.3g\\n\", val, speed_cycletime);\n              ret = 1;\n              break;\n            }\n          end = 0;\n          if (sscanf (buf, \"clock : %lfMHz\\n%n\", &val, &end) == 1 && end != 0)\n            {\n              speed_cycletime = 1e-6 / val;\n              if (speed_option_verbose)\n                printf (\"Using /proc/cpuinfo \\\"clock\\\" %.2f for cycle time %.3g\\n\", val, speed_cycletime);\n              ret = 1;\n              break;\n            }\n          if (sscanf (buf, \"bogomips : %lf\\n\", &val) == 1\n              || sscanf (buf, \"BogoMIPS : %lf\\n\", &val) == 1)\n            {\n              speed_cycletime = 1e-6 / val;\n              if (speed_option_verbose)\n                printf (\"Using /proc/cpuinfo \\\"bogomips\\\" %.2f for cycle time %.3g\\n\", val, speed_cycletime);\n              ret = 1;\n              break;\n            }\n        }\n      fclose (fp);\n    }\n  return ret;\n}\n\n\n/* /bin/sysinfo for SunOS 4.\n   Prints a line like: cpu0 is a \"75 MHz TI,TMS390Z55\" CPU */\nstatic int\nfreq_sunos_sysinfo (int help)\n{\n  int     ret = 0;\n#if HAVE_POPEN\n  FILE    *fp;\n  char    buf[128];\n  double  val;\n  int     end;\n\n  HELP (\"SunOS /bin/sysinfo program output, cpu0\");\n\n  /* Error messages are sent to /dev/null in case /bin/sysinfo doesn't\n     exist.  The brackets are necessary for some shells. */\n  if ((fp = popen (\"(/bin/sysinfo) 2>/dev/null\", \"r\")) != NULL)\n    {\n      while (fgets (buf, sizeof (buf), fp) != NULL)\n        {\n          end = 0;\n          if (sscanf (buf, \" cpu0 is a \\\"%lf MHz%n\", &val, &end) == 1\n              && end != 0)\n            {\n              speed_cycletime = 1e-6 / val;\n              if (speed_option_verbose)\n                printf (\"Using /bin/sysinfo \\\"cpu0 MHz\\\" %.2f for cycle time %.3g\\n\", val, speed_cycletime);\n              ret = 1;\n              break;\n            }\n        }\n      pclose (fp);\n    }\n#endif\n  return ret;\n}\n\n\n/* \"/etc/hw -r cpu\" for SCO OpenUnix 8, printing a line like\n\tThe speed of the CPU is approximately 450Mhz\n */\nstatic int\nfreq_sco_etchw (int help)\n{\n  int     ret = 0;\n#if HAVE_POPEN\n  FILE    *fp;\n  char    buf[128];\n  double  val;\n  int     end;\n\n  HELP (\"SCO /etc/hw program output\");\n\n  /* Error messages are sent to /dev/null in case /etc/hw doesn't exist.\n     The brackets are necessary for some shells. */\n  if ((fp = popen (\"(/etc/hw -r cpu) 2>/dev/null\", \"r\")) != NULL)\n    {\n      while (fgets (buf, sizeof (buf), fp) != NULL)\n        {\n          end = 0;\n          if (sscanf (buf, \" The speed of the CPU is approximately %lfMhz%n\",\n                      &val, &end) == 1 && end != 0)\n            {\n              speed_cycletime = 1e-6 / val;\n              if (speed_option_verbose)\n                printf (\"Using /etc/hw %.2f MHz, for cycle time %.3g\\n\",\n                        val, speed_cycletime);\n              ret = 1;\n              break;\n            }\n        }\n      pclose (fp);\n    }\n#endif\n  return ret;\n}\n\n\n/* attr_get(\"/hw/cpunum/0\",INFO_LBL_DETAIL_INVENT) ic_cpu_info.cpufq for\n   IRIX 6.5.  Past versions don't have INFO_LBL_DETAIL_INVENT,\n   invent_cpuinfo_t, or /hw/cpunum/0.\n\n   The same information is available from the \"hinv -c processor\" command,\n   but it seems better to make a system call where possible. */\n\nstatic int\nfreq_attr_get_invent (int help)\n{\n  int     ret = 0;\n#if HAVE_ATTR_GET && HAVE_INVENT_H && defined (INFO_LBL_DETAIL_INVENT)\n  invent_cpuinfo_t  inv;\n  int               len, val;\n\n  HELP (\"attr_get(\\\"/hw/cpunum/0\\\") ic_cpu_info.cpufq\");\n\n  len = sizeof (inv);\n  if (attr_get (\"/hw/cpunum/0\", INFO_LBL_DETAIL_INVENT,\n                (char *) &inv, &len, 0) == 0\n      && len == sizeof (inv)\n      && inv.ic_gen.ig_invclass == INV_PROCESSOR)\n    {\n      val = inv.ic_cpu_info.cpufq;\n      speed_cycletime = 1e-6 / val;\n      if (speed_option_verbose)\n        printf (\"Using attr_get(\\\"/hw/cpunum/0\\\") ic_cpu_info.cpufq %d MHz for cycle time %.3g\\n\", val, speed_cycletime);\n      ret = 1;\n    }\n#endif\n  return ret;\n}\n\n\n/* FreeBSD on i386 gives a line like the following at bootup, and which can\n   be read back from /var/run/dmesg.boot.\n\n       CPU: AMD Athlon(tm) Processor (755.29-MHz 686-class CPU)\n       CPU: Pentium 4 (1707.56-MHz 686-class CPU)\n       CPU: i486 DX4 (486-class CPU)\n\n   This is useful on FreeBSD 4.x, where there's no sysctl machdep.tsc_freq\n   or machdep.i586_freq.\n\n   It's better to use /var/run/dmesg.boot than to run /sbin/dmesg, since the\n   latter prints the current system message buffer, which is a limited size\n   and can wrap around if the system is up for a long time.  */\n\nstatic int\nfreq_bsd_dmesg (int help)\n{\n  FILE    *fp;\n  char    buf[256], *p;\n  double  val;\n  int     ret = 0;\n  int     end;\n\n  HELP (\"BSD /var/run/dmesg.boot file\");\n\n  if ((fp = fopen (\"/var/run/dmesg.boot\", \"r\")) != NULL)\n    {\n      while (fgets (buf, sizeof (buf), fp) != NULL)\n        {\n          if (memcmp (buf, \"CPU:\", 4) == 0)\n            {\n              for (p = buf; *p != '\\0'; p++)\n                {\n                  end = 0;\n                  if (sscanf (p, \"(%lf-MHz%n\", &val, &end) == 1 && end != 0)\n                    {\n                      speed_cycletime = 1e-6 / val;\n                      if (speed_option_verbose)\n                        printf (\"Using /var/run/dmesg.boot CPU: %.2f MHz for cycle time %.3g\\n\", val, speed_cycletime);\n                      ret = 1;\n                      break;\n                    }\n                }\n            }\n        }\n      fclose (fp);\n    }\n  return ret;\n}\n\n\n/* \"hinv -c processor\" for IRIX.  The following lines have been seen,\n\n              1 150 MHZ IP20 Processor\n              2 195 MHZ IP27 Processors\n              Processor 0: 500 MHZ IP35\n\n   This information is available from attr_get() on IRIX 6.5 (see above),\n   but on IRIX 6.2 it's not clear where to look, so fall back on\n   parsing.  */\n\nstatic int\nfreq_irix_hinv (int help)\n{\n  int     ret = 0;\n#if HAVE_POPEN\n  FILE    *fp;\n  char    buf[128];\n  double  val;\n  int     nproc, end;\n\n  HELP (\"IRIX \\\"hinv -c processor\\\" output\");\n\n  /* Error messages are sent to /dev/null in case hinv doesn't exist.  The\n     brackets are necessary for some shells. */\n  if ((fp = popen (\"(hinv -c processor) 2>/dev/null\", \"r\")) != NULL)\n    {\n      while (fgets (buf, sizeof (buf), fp) != NULL)\n        {\n          end = 0;\n          if (sscanf (buf, \"Processor 0: %lf MHZ%n\", &val, &end) == 1\n              && end != 0)\n            {\n            found:\n              speed_cycletime = 1e-6 / val;\n              if (speed_option_verbose)\n                printf (\"Using hinv -c processor \\\"%.2f MHZ\\\" for cycle time %.3g\\n\", val, speed_cycletime);\n              ret = 1;\n              break;\n            }\n          end = 0;\n          if (sscanf (buf, \"%d %lf MHZ%n\", &nproc, &val, &end) == 2\n              && end != 0)\n            goto found;\n        }\n      pclose (fp);\n    }\n#endif\n  return ret;\n}\n\n\n/* processor_info() for Solaris.  \"psrinfo\" is the command-line interface to\n   this.  \"prtconf -vp\" gives similar information.\n\n   Apple Darwin has a processor_info, but in an incompatible style.  It\n   doesn't have <sys/processor.h>, so test for that.  */\n\nstatic int\nfreq_processor_info (int help)\n{\n#if HAVE_PROCESSOR_INFO && HAVE_SYS_PROCESSOR_H\n  processor_info_t  p;\n  int  i, n, mhz = 0;\n\n  HELP (\"processor_info() pi_clock\");\n\n  n = sysconf (_SC_NPROCESSORS_CONF);\n  for (i = 0; i < n; i++)\n    {\n      if (processor_info (i, &p) != 0)\n        continue;\n      if (p.pi_state != P_ONLINE)\n        continue;\n\n      if (mhz != 0 && p.pi_clock != mhz)\n        {\n          fprintf (stderr,\n                   \"freq_processor_info(): There's more than one CPU and they have different clock speeds\\n\");\n          return 0;\n        }\n\n      mhz = p.pi_clock;\n    }\n\n  speed_cycletime = 1.0e-6 / (double) mhz;\n\n  if (speed_option_verbose)\n    printf (\"Using processor_info() %d mhz for cycle time %.3g\\n\",\n            mhz, speed_cycletime);\n  return 1;\n\n#else\n  return 0;\n#endif\n}\n\n\n#if HAVE_SPEED_CYCLECOUNTER && HAVE_GETTIMEOFDAY\nstatic double\nfreq_measure_gettimeofday_one (void)\n{\n#define call_gettimeofday(t)   gettimeofday (&(t), NULL)\n#define timeval_tv_sec(t)      ((t).tv_sec)\n#define timeval_tv_usec(t)     ((t).tv_usec)\n  FREQ_MEASURE_ONE (\"gettimeofday\", struct timeval,\n                    call_gettimeofday, speed_cyclecounter,\n                    timeval_tv_sec, timeval_tv_usec);\n}\n#endif\n\n#if HAVE_SPEED_CYCLECOUNTER && HAVE_GETRUSAGE\nstatic double\nfreq_measure_getrusage_one (void)\n{\n#define call_getrusage(t)   getrusage (0, &(t))\n#define rusage_tv_sec(t)    ((t).ru_utime.tv_sec)\n#define rusage_tv_usec(t)   ((t).ru_utime.tv_usec)\n  FREQ_MEASURE_ONE (\"getrusage\", struct rusage,\n                    call_getrusage, speed_cyclecounter,\n                    rusage_tv_sec, rusage_tv_usec);\n}\n#endif\n\n\n/* MEASURE_MATCH is how many readings within MEASURE_TOLERANCE of each other\n   are required.  This must be at least 2.  */\n#define MEASURE_MAX_ATTEMPTS   20\n#define MEASURE_TOLERANCE      1.005  /* 0.5% */\n#define MEASURE_MATCH          3\n\ndouble\nfreq_measure (const char *name, double (*one) (void))\n{\n  double  t[MEASURE_MAX_ATTEMPTS];\n  int     i, j;\n\n  for (i = 0; i < numberof (t); i++)\n    {\n      t[i] = (*one) ();\n\n      qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr);\n      if (speed_option_verbose >= 3)\n        for (j = 0; j <= i; j++)\n          printf (\"   t[%d] is %.6g\\n\", j, t[j]);\n\n      for (j = 0; j+MEASURE_MATCH-1 <= i; j++)\n        {\n          if (t[j+MEASURE_MATCH-1] <= t[j] * MEASURE_TOLERANCE)\n            {\n              /* use the average of the range found */\n                return (t[j+MEASURE_MATCH-1] + t[j]) / 2.0;\n            }\n        }\n    }\n  return -1.0;\n}\n\nstatic int\nfreq_measure_getrusage (int help)\n{\n#if HAVE_SPEED_CYCLECOUNTER && HAVE_GETRUSAGE\n  double  cycletime;\n\n  if (! getrusage_microseconds_p ())\n    return 0;\n  if (! cycles_works_p ())\n    return 0;\n\n  HELP (\"cycle counter measured with microsecond getrusage()\");\n\n  cycletime = freq_measure (\"getrusage\", freq_measure_getrusage_one);\n  if (cycletime == -1.0)\n    return 0;\n\n  speed_cycletime = cycletime;\n  if (speed_option_verbose)\n    printf (\"Using getrusage() measured cycle counter %.4g (%.2f MHz)\\n\",\n            speed_cycletime, 1e-6/speed_cycletime);\n  return 1;\n\n#else\n  return 0;\n#endif\n}\n\nstatic int\nfreq_measure_gettimeofday (int help)\n{\n#if HAVE_SPEED_CYCLECOUNTER && HAVE_GETTIMEOFDAY\n  double  cycletime;\n\n  if (! gettimeofday_microseconds_p ())\n    return 0;\n  if (! cycles_works_p ())\n    return 0;\n\n  HELP (\"cycle counter measured with microsecond gettimeofday()\");\n\n  cycletime = freq_measure (\"gettimeofday\", freq_measure_gettimeofday_one);\n  if (cycletime == -1.0)\n    return 0;\n\n  speed_cycletime = cycletime;\n  if (speed_option_verbose)\n    printf (\"Using gettimeofday() measured cycle counter %.4g (%.2f MHz)\\n\",\n            speed_cycletime, 1e-6/speed_cycletime);\n  return 1;\n#else\n  return 0;\n#endif\n}\n\n\n/* Each function returns 1 if it succeeds in setting speed_cycletime, or 0\n   if not.\n\n   In general system call tests are first since they're fast, then file\n   tests, then tests running programs.  Necessary exceptions to this rule\n   are noted.  The measuring is last since it's time consuming, and rather\n   wasteful of cpu.  */\n\nstatic int\nfreq_all (int help)\n{\n#ifdef _MSC_VER\n    return freq_environment (help) || freq_measure_gettimeofday (help) || freq_measure_getrusage (help);\n#else\n  return\n    /* This should be first, so an environment variable can override\n       anything the system gives. */\n    freq_environment (help)\n\n    || freq_attr_get_invent (help)\n    || freq_getsysinfo (help)\n    || freq_pstat_getprocessor (help)\n    || freq_sysctl_hw_model (help)\n    || freq_sysctl_hw_cpufrequency (help)\n    || freq_sysctlbyname_i586_freq (help)\n    || freq_sysctlbyname_tsc_freq (help)\n\n    /* SCO openunix 8 puts a dummy pi_clock==16 in processor_info, so be\n       sure to check /etc/hw before that function. */\n    || freq_sco_etchw (help)\n\n    || freq_processor_info (help)\n    || freq_proc_cpuinfo (help)\n    || freq_bsd_dmesg (help)\n    || freq_irix_hinv (help)\n    || freq_sunos_sysinfo (help)\n    || freq_measure_getrusage (help)\n    || freq_measure_gettimeofday (help);\n#endif\n}\n\n\nvoid\nspeed_cycletime_init (void)\n{\n  static int  attempted = 0;\n\n  if (attempted)\n    return;\n  attempted = 1;\n\n  if (freq_all (0))\n    return;\n\n  if (speed_option_verbose)\n    printf (\"CPU frequency couldn't be determined\\n\");\n}\n\n\nvoid\nspeed_cycletime_fail (const char *str)\n{\n  fprintf (stderr, \"Measuring with: %s\\n\", speed_time_string);\n  fprintf (stderr, \"%s,\\n\", str);\n  fprintf (stderr, \"but none of the following are available,\\n\");\n  freq_all (1);\n  abort ();\n}\n\n/* speed_time_init leaves speed_cycletime set to either 0.0 or 1.0 when the\n   CPU frequency is unknown.  0.0 is when the time base is in seconds, so\n   that's no good if cycles are wanted.  1.0 is when the time base is in\n   cycles, which conversely is no good if seconds are wanted.  */\nvoid\nspeed_cycletime_need_cycles (void)\n{\n  speed_time_init ();\n  if (speed_cycletime == 0.0)\n    speed_cycletime_fail\n      (\"Need to know CPU frequency to give times in cycles\");\n}\nvoid\nspeed_cycletime_need_seconds (void)\n{\n  speed_time_init ();\n  if (speed_cycletime == 1.0)\n    speed_cycletime_fail\n      (\"Need to know CPU frequency to convert cycles to seconds\");\n}\n"
  },
  {
    "path": "tune/gcdext_double.c",
    "content": "/* mpn/generic/gcdext.c forced to use double limb calculations. */\n\n/*\nCopyright 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef GCDEXT_THRESHOLD\n#define GCDEXT_THRESHOLD  0\n#define __gmpn_gcdext  mpn_gcdext_double\n\n#include \"../mpn/generic/gcdext.c\"\n"
  },
  {
    "path": "tune/gcdext_single.c",
    "content": "/* mpn/generic/gcdext.c forced to use single limb calculations. */\n\n/*\nCopyright 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef GCDEXT_THRESHOLD\n#define GCDEXT_THRESHOLD  MP_SIZE_T_MAX\n#define __gmpn_gcdext  mpn_gcdext_single\n\n#include \"../mpn/generic/gcdext.c\"\n"
  },
  {
    "path": "tune/gcdextod.c",
    "content": "/* mpn/generic/gcdext.c forced to one double limb step. */\n\n/*\nCopyright 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef GCDEXT_THRESHOLD\n#define GCDEXT_THRESHOLD  0\n#define WANT_GCDEXT_ONE_STEP 1\n#define __gmpn_gcdext  mpn_gcdext_one_double\n\n#include \"../mpn/generic/gcdext.c\"\n"
  },
  {
    "path": "tune/gcdextos.c",
    "content": "/* mpn/generic/gcdext.c forced to one single limb step. */\n\n/*\nCopyright 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef GCDEXT_THRESHOLD\n#define GCDEXT_THRESHOLD  MP_SIZE_T_MAX\n#define WANT_GCDEXT_ONE_STEP 1\n#define __gmpn_gcdext  mpn_gcdext_one_single\n\n#include \"../mpn/generic/gcdext.c\"\n"
  },
  {
    "path": "tune/hgcd_appr_lehmer.c",
    "content": "/* mpn/generic/hgcd_appr.c forced to use Lehmer's quadratic algorithm. */\n\n/*\nCopyright 2010, 2011 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef  HGCD_APPR_THRESHOLD\n#define HGCD_APPR_THRESHOLD MP_SIZE_T_MAX\n#define __gmpn_hgcd_appr  mpn_hgcd_appr_lehmer\n#define __gmpn_hgcd_appr_itch mpn_hgcd_appr_lehmer_itch\n\n#include \"../mpn/generic/hgcd_appr.c\"\n"
  },
  {
    "path": "tune/hgcd_lehmer.c",
    "content": "/* mpn/generic/hgcd.c forced to use Lehmer's quadratic algorithm. */\n\n/*\nCopyright 2010 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef  HGCD_THRESHOLD\n#define HGCD_THRESHOLD MP_SIZE_T_MAX\n#define __gmpn_hgcd  mpn_hgcd_lehmer\n#define __gmpn_hgcd_itch mpn_hgcd_lehmer_itch\n\n#include \"../mpn/generic/hgcd.c\"\n"
  },
  {
    "path": "tune/hgcd_reduce_1.c",
    "content": "/* mpn/generic/hgcd_reduce.c forced to use hgcd. */\n\n/*\nCopyright 2010 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef  HGCD_REDUCE_THRESHOLD\n#define HGCD_REDUCE_THRESHOLD MP_SIZE_T_MAX\n#define __gmpn_hgcd_reduce  mpn_hgcd_reduce_1\n#define __gmpn_hgcd_reduce_itch  mpn_hgcd_reduce_1_itch\n\n\n#include \"../mpn/generic/hgcd_reduce.c\"\n"
  },
  {
    "path": "tune/hgcd_reduce_2.c",
    "content": "/* mpn/generic/hgcd_reduce.c forced to use hgcd_appr. */\n\n/*\nCopyright 2010 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef  HGCD_REDUCE_THRESHOLD\n#define HGCD_REDUCE_THRESHOLD 0\n#define __gmpn_hgcd_reduce mpn_hgcd_reduce_2\n#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_2_itch\n\n#include \"../mpn/generic/hgcd_reduce.c\"\n"
  },
  {
    "path": "tune/hppa.asm",
    "content": "dnl  HPPA 32-bit time stamp counter access routine.\n\ndnl  Copyright 2000, 2002, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\ndnl void speed_cyclecounter (unsigned p[2]);\ndnl\ndnl Get the HPPA interval timer.\n\nPROLOGUE(speed_cyclecounter)\n\tmfctl\t%cr16,%r28\n\tstw\t%r28,0(0,%r26)\n\tbv\t0(%r2)\n\tstw\t%r0,4(0,%r26)\nEPILOGUE(speed_cyclecounter)\n"
  },
  {
    "path": "tune/hppa2.asm",
    "content": "dnl  HPPA 64-bit time stamp counter access routine.\n\ndnl  Copyright 2000, 2002, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\ndnl void speed_cyclecounter (unsigned p[2]);\ndnl\ndnl Get the HPPA interval timer.\n\n\t.level 2.0\nPROLOGUE(speed_cyclecounter)\n\tmfctl\t%cr16,%r28\n\tstw\t%r28,0(0,%r26)\t\t; low word\n\textrd,u\t%r28,31,32,%r28\n\tbve\t(%r2)\n\tstw\t%r28,4(0,%r26)\t\t; high word\nEPILOGUE(speed_cyclecounter)\n"
  },
  {
    "path": "tune/hppa2w.asm",
    "content": "dnl  HPPA 64-bit time stamp counter access routine.\n\ndnl  Copyright 2000, 2002, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\ndnl void speed_cyclecounter (unsigned p[2]);\ndnl\ndnl Get the HPPA interval timer.\n\n\t.level 2.0w\nPROLOGUE(speed_cyclecounter)\n\tmfctl\t%cr16,%r28\n\tstw\t%r28,0(0,%r26)\t\t; low word\n\textrd,u\t%r28,31,32,%r28\n\tbve\t(%r2)\n\tstw\t%r28,4(0,%r26)\t\t; high word\nEPILOGUE(speed_cyclecounter)\n"
  },
  {
    "path": "tune/ia64.asm",
    "content": "dnl  IA-64 time stamp counter access routine.\n\ndnl  Copyright 2000, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC void speed_cyclecounter (unsigned int p[2]);\nC\n\nASM_START()\nPROLOGUE(speed_cyclecounter)\n\tmov\tr14 = ar.itc\n\t;;\n\tst4\t[r32] = r14, 4\n\tshr.u\tr14 = r14, 32\n\t;;\n\tst4\t[r32] = r14\n\tbr.ret.sptk.many b0\nEPILOGUE(speed_cyclecounter)\nASM_END()\n"
  },
  {
    "path": "tune/jacbase1.c",
    "content": "/* mpn/generic/jacbase.c method 1.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef JACOBI_BASE_METHOD\n#define JACOBI_BASE_METHOD 1\n#define __gmpn_jacobi_base mpn_jacobi_base_1\n\n#include \"mpn/generic/jacobi_base.c\"\n"
  },
  {
    "path": "tune/jacbase2.c",
    "content": "/* mpn/generic/jacbase.c method 2.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef JACOBI_BASE_METHOD\n#define JACOBI_BASE_METHOD 2\n#define __gmpn_jacobi_base mpn_jacobi_base_2\n\n#include \"mpn/generic/jacobi_base.c\"\n"
  },
  {
    "path": "tune/jacbase3.c",
    "content": "/* mpn/generic/jacbase.c method 3.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef JACOBI_BASE_METHOD\n#define JACOBI_BASE_METHOD 3\n#define __gmpn_jacobi_base mpn_jacobi_base_3\n\n#include \"mpn/generic/jacobi_base.c\"\n"
  },
  {
    "path": "tune/jacbase4.c",
    "content": "/* mpn/generic/jacbase.c method 4.\n\nCopyright 2002, 2010 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef JACOBI_BASE_METHOD\n#define JACOBI_BASE_METHOD 4\n#define __gmpn_jacobi_base mpn_jacobi_base_4\n\n#include \"mpn/generic/jacobi_base.c\"\n"
  },
  {
    "path": "tune/many.pl",
    "content": "#! /usr/bin/perl -w\n\n# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.\n#\n# This file is part of the GNU MP Library.\n#\n# The GNU MP Library is free software; you can redistribute it and/or modify\n# it under the terms of the GNU Lesser General Public License as published by\n# the Free Software Foundation; either version 2.1 of the License, or (at your\n# option) any later version.\n#\n# The GNU MP Library is distributed in the hope that it will be useful, but\n# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n# License for more details.\n#\n# You should have received a copy of the GNU Lesser General Public License\n# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to\n# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\n# MA 02110-1301, USA.\n\n\n# Usage:  cd $builddir/tune\n#\t  perl $srcdir/tune/many.pl [-t] <files/dirs>...\n#\n# Output: speed-many.c\n#         try-many.c\n#         Makefile.many\n#\n# Make alternate versions of various mpn routines available for measuring\n# and testing.\n#\n# The $srcdir and $builddir in the invocation above just means the script\n# lives in the tune source directory, but should be run in the tune build\n# directory.  When not using a separate object directory this just becomes\n#\n#\tcd tune\n#\tperl many.pl [-t] <files/dirs>...\n#\n#\n# SINGLE FILES\n#\n# Suppose $HOME/newcode/mul_1_experiment.asm is a new implementation of\n# mpn_mul_1, then\n#\n#\tcd $builddir/tune\n#\tperl $srcdir/tune/many.pl $HOME/newcode/mul_1_experiment.asm\n#\n# will produce rules and renaming so that a speed program incorporating it\n# can be built,\n#\n#\tmake -f Makefile.many speed-many\n#\n# then for example it can be compared to the standard mul_1,\n#\n#\t./speed-many -s 1-30 mpn_mul_1 mpn_mul_1_experiment\n#\n# An expanded try program can be used to check correctness,\n#\n#\tmake -f Makefile.many try-many\n#\n# and run\n#\n#\t./try-many mpn_mul_1_experiment\n#\n# Files can be \".c\", \".S\" or \".asm\".  \".s\" files can't be used because they\n# don't get any preprocessing so there's no way to do renaming of their\n# functions.\n#\n#\n# WHOLE DIRECTORIES\n#\n# If a directory is given, then all files in it will be made available.\n# For example,\n#\n#\tcd $builddir/tune\n#\tperl $srcdir/tune/many.pl $HOME/newcode\n#\n# Each file should have a suffix, like \"_experiment\" above.\n#\n#\n# MPN DIRECTORIES\n#\n# mpn directories from the GMP source tree can be included, and this is a\n# convenient way to compare multiple implementations suiting different chips\n# in a CPU family.  For example the following would make all x86 routines\n# available,\n#\n#\tcd $builddir/tune\n#\tperl $srcdir/tune/many.pl `find $srcdir/mpn/x86 -type d`\n#\n# On a new x86 chip a comparison could then be made to see how existing code\n# runs.  For example,\n#\n#\tmake -f Makefile.many speed-many\n#\t./speed-many -s 1-30 -c \\\n#\t\tmpn_add_n_x86 mpn_add_n_pentium mpn_add_n_k6 mpn_add_n_k7\n#\n# Files in \"mpn\" subdirectories don't need the \"_experiment\" style suffix\n# described above, instead a suffix is constructed from the subdirectory.\n# For example \"mpn/x86/k7/mmx/mod_1.asm\" will generate a function\n# mpn_mod_1_k7_mmx.  The rule is to take the last directory name after the\n# \"mpn\", or the last two if there's three or more.  (Check the generated\n# speed-many.c if in doubt.)\n#\n#\n# GENERIC C\n#\n# The mpn/generic directory can be included too, just like any processor\n# specific directory.  This is a good way to compare assembler and generic C\n# implementations.  For example,\n#\n#\tcd $builddir/tune\n#\tperl $srcdir/tune/many.pl $srcdir/mpn/generic\n#\n# or if just a few routines are of interest, then for example\n#\n#\tcd $builddir/tune\n#\tperl $srcdir/tune/many.pl \\\n#\t\t$srcdir/mpn/generic/lshift.c \\\n#\t\t$srcdir/mpn/generic/mod_1.c \\\n#\t\t$srcdir/mpn/generic/aorsmul_1.c\n#\n# giving mpn_lshift_generic etc.\n#\n#\n# TESTS/DEVEL PROGRAMS\n#\n# Makefile.many also has rules to build the tests/devel programs with suitable\n# renaming, and with some parameters for correctness or speed.  This is less\n# convenient than the speed and try programs, but provides an independent\n# check.  For example,\n#\n#\tmake -f Makefile.many tests_mul_1_experimental\n#\t./tests_mul_1_experimental\n#\n# and for speed\n#\n#\tmake -f Makefile.many tests_mul_1_experimental_sp\n#\t./tests_mul_1_experimental_sp\n#\n# Not all the programs support speed measuring, in which case only the\n# correctness test will be useful.\n#\n# The parameters for repetitions and host clock speed are -D defines.  Some\n# defaults are provided at the end of Makefile.many, but probably these will\n# want to be overridden.  For example,\n#\n#\trm tests_mul_1_experimental.o\n#\tmake -f Makefile.many \\\n#\t   CFLAGS_TESTS=\"-DSIZE=50 -DTIMES=1000 -DRANDOM -DCLOCK=175000000\" \\\n#\t   tests_mul_1_experimental\n#\t./tests_mul_1_experimental\n#\n#\n# OTHER NOTES\n#\n# The mappings of file names to functions, and the macros to then use for\n# speed measuring etc are driven by @table below.  The scheme isn't\n# completely general, it's only got as many variations as have been needed\n# so far.\n#\n# Some functions are only made available in speed-many, or others only in\n# try-many.  An @table entry speed=>none means no speed measuring is\n# available, or try=>none no try program testing.  These can be removed\n# if/when the respective programs get the necessary support.\n#\n# If a file has \"1c\" or \"nc\" carry-in entrypoints, they're renamed and made\n# available too.  These are recognised from PROLOGUE or MULFUNC_PROLOGUE in\n# .S and .asm files, or from a line starting with \"mpn_foo_1c\" in a .c file\n# (possibly via a #define), and on that basis are entirely optional.  This\n# entrypoint matching is done for the standard entrypoints too, but it would\n# be very unusual to have for instance a mul_1c without a mul_1.\n#\n# Some mpz files are recognized.  For example an experimental copy of\n# mpz/powm.c could be included as powm_new.c and would be called\n# mpz_powm_new.  So far only speed measuring is available for these.\n#\n# For the \".S\" and \".asm\" files, both PIC and non-PIC objects are built.\n# The PIC functions have a \"_pic\" suffix, for example \"mpn_mod_1_k7_mmx_pic\".\n# This can be ignored for routines that don't differ for PIC, or for CPUs\n# where everything is PIC anyway.\n#\n# The \"-t\" option can be used to print a trace of the files found and what's\n# done with them.  A great deal of obscure output is produced, but it can\n# indicate where or why some files aren't being recognised etc.  For\n# example,\n#\n#\tcd $builddir/tune\n#\tperl $srcdir/tune/many.pl -t $HOME/newcode/add_n_weird.asm\n#\n# In general, when including new code, all that's really necessary is that\n# it will compile or assemble under the current configuration.  It's fine if\n# some code doesn't actually run due to bugs, or to needing a newer CPU or\n# whatever, simply don't ask for the offending routines when invoking\n# speed-many or try-many, or don't try to run them on sizes they don't yet\n# support, or whatever.\n#\n#\n# CPU SPECIFICS\n#\n# x86 - All the x86 code will assemble on any system, but code for newer\n#       chips might not run on older chips.  Expect SIGILLs from new\n#       instructions on old chips.\n#\n#       A few \"new\" instructions, like cmov for instance, are done as macros\n#       and will generate some equivalent plain i386 code when HAVE_HOST_CPU\n#       in config.m4 indicates an old CPU.  It won't run fast, but it does\n#       make it possible to test correctness.\n#\n#\n# INTERNALS\n#\n# The nonsense involving $ENV is some hooks used during development to add\n# additional functions temporarily.\n#\n#\n# FUTURE\n#\n# Maybe the C files should be compiled pic and non-pic too.  Wait until\n# there's a difference that might be of interest.\n#\n# Warn if a file provides no functions.\n#\n# Allow mpz and mpn files of the same name.  Currently the mpn fib2_ui\n# matching hides the mpz version of that.  Will need to check the file\n# contents to see which it is.  Would be worth allowing an \"mpz_\" or \"mpn_\"\n# prefix on the filenames to have working versions of both in one directory.\n#\n#\n# LIMITATIONS\n#\n# Some of the command lines can become very long when a lot of files are\n# included.  If this is a problem on a given system the only suggestion is\n# to run many.pl for just those that are actually wanted at a particular\n# time.\n#\n# DOS 8.3 or SysV 14 char filesystems won't work, since the long filenames\n# generated will almost certainly fail to be unique.\n\n\nuse strict;\nuse File::Basename;\nuse Getopt::Std;\n\nmy %opt;\ngetopts('t', \\%opt);\n\nmy @DIRECTORIES = @ARGV;\nif (defined $ENV{directories}) { push @DIRECTORIES, @{$ENV{directories}} }\n\n\n# regexp - matched against the start of the filename.  If a grouping \"(...)\"\n#          is present then only the first such part is used.\n#\n# mulfunc - filenames to be generated from a multi-function file.\n#\n# funs - functions provided by the file, defaulting to the filename with mpn\n#          (or mpX).\n#\n# mpX - prefix like \"mpz\", defaulting to \"mpn\".\n#\n# ret - return value type.\n#\n# args, args_<fun> - arguments for the given function.  If an args_<fun> is\n#          set then it's used, otherwise plain args is used.  \"mp_limb_t\n#          carry\" is appended for carry-in variants.\n#\n# try - try.c TYPE_ to use, defaulting to TYPE_fun with the function name\n#          in upper case.  \"C\" is appended for carry-in variants.  Can be\n#          'none' for no try program entry.\n#\n# speed - SPEED_ROUTINE_ to use, handled like \"try\".\n#\n# speed_flags - SPEED_ROUTINE_ to use, handled like \"try\".\n\n\nmy @table =\n    (\n     {\n       'regexp'=> 'add_n|sub_n|addlsh1_n|sublsh1_n|rsh1add_n|rsh1sub_n',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',\n       'speed' => 'SPEED_ROUTINE_MPN_BINARY_N',\n       'speed_flags'=> 'FLAG_R_OPTIONAL',\n     },\n     {\n       'regexp'=> 'addmul_1|submul_1',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult',\n       'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',\n       'speed_flags'=> 'FLAG_R',\n     },\n     {\n       'regexp'=> 'addmul_2|submul_2',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',\n       'speed' => 'SPEED_ROUTINE_MPN_UNARY_2',\n       'speed_flags'=> 'FLAG_R_OPTIONAL',\n       'try-minsize' => 2,\n     },\n     {\n       'regexp'=> 'addmul_3|submul_3',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',\n       'speed' => 'SPEED_ROUTINE_MPN_UNARY_3',\n       'speed_flags'=> 'FLAG_R_OPTIONAL',\n       'try-minsize' => 3,\n     },\n     {\n       'regexp'=> 'addmul_4|submul_4',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',\n       'speed' => 'SPEED_ROUTINE_MPN_UNARY_4',\n       'speed_flags'=> 'FLAG_R_OPTIONAL',\n       'try-minsize' => 4,\n     },\n     {\n       'regexp'=> 'addmul_5|submul_5',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',\n       'speed' => 'SPEED_ROUTINE_MPN_UNARY_5',\n       'speed_flags'=> 'FLAG_R_OPTIONAL',\n       'try-minsize' => 5,\n     },\n     {\n       'regexp'=> 'addmul_6|submul_6',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',\n       'speed' => 'SPEED_ROUTINE_MPN_UNARY_6',\n       'speed_flags'=> 'FLAG_R_OPTIONAL',\n       'try-minsize' => 6,\n     },\n     {\n       'regexp'=> 'addmul_7|submul_7',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',\n       'speed' => 'SPEED_ROUTINE_MPN_UNARY_7',\n       'speed_flags'=> 'FLAG_R_OPTIONAL',\n       'try-minsize' => 7,\n     },\n     {\n       'regexp'=> 'addmul_8|submul_8',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',\n       'speed' => 'SPEED_ROUTINE_MPN_UNARY_8',\n       'speed_flags'=> 'FLAG_R_OPTIONAL',\n       'try-minsize' => 8,\n     },\n\n     {\n       'regexp'=> 'sumdiff_n',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr sum, mp_ptr diff, mp_srcptr xp, mp_srcptr yp, mp_size_t size',\n       'speed_flags'=> 'FLAG_R_OPTIONAL',\n     },\n\n     {\n       'regexp'=> 'bdivmod',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr qp, mp_ptr up, mp_size_t usize, mp_srcptr vp, mp_size_t vsize, unsigned long int d',\n       'carrys'=> [''],\n       'try'   => 'none',\n       'speed' => 'none',\n     },\n\n     {\n       'regexp'=> 'com_n|copyi|copyd',\n       'ret'   => 'void',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',\n       'speed' => 'SPEED_ROUTINE_MPN_COPY',\n     },\n\n     {\n       'regexp'=> 'divexact_1',\n       'ret'   => 'void',\n       'args'  => 'mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor',\n       'speed_flags'=> 'FLAG_R',\n     },\n     {\n       'regexp'=> 'divexact_by3c',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr dst, mp_srcptr src, mp_size_t size',\n       'carrys'=> [''],\n       'speed' => 'SPEED_ROUTINE_MPN_COPY',\n     },\n\n     # mpn_preinv_divrem_1 is an optional extra entrypoint\n     {\n       'regexp'=> 'divrem_1',\n       'funs'  => ['divrem_1', 'preinv_divrem_1'],\n       'ret'   => 'mp_limb_t',\n       'args_divrem_1' => 'mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor',\n       'args_preinv_divrem_1' => 'mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse, unsigned shift',\n       'speed_flags'=> 'FLAG_R',\n       'speed_suffixes' => ['f'],\n     },\n     {\n       'regexp'=> 'preinv_divrem_1',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr qp, mp_size_t qxn, mp_srcptr ap, mp_size_t asize, mp_limb_t divisor, mp_limb_t inverse, int shift',\n       'speed_flags' => 'FLAG_R',\n     },\n\n     {\n       'regexp'=> 'divrem_2',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr qp, mp_size_t qxn, mp_srcptr np, mp_size_t nsize, mp_srcptr dp',\n       'try'   => 'none',\n     },\n\n     {\n       'regexp'=> 'tdiv_qr',\n       'ret'   => 'void',\n       'args'  => 'mp_ptr qp, mp_size_t qxn, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize',\n       'speed' => 'none',\n     },\n\n     {\n       'regexp'=> 'get_str',\n       'ret'   => 'size_t',\n       'args'  => 'unsigned char *str, int base, mp_ptr mptr, mp_size_t msize',\n       'speed_flags' => 'FLAG_R_OPTIONAL',\n       'try'   => 'none',\n     },\n     {\n       'regexp'=> 'set_str',\n       'ret'   => 'mp_size_t',\n       'args'  => 'mp_ptr xp, const unsigned char *str, size_t str_len, int base',\n       'speed_flags' => 'FLAG_R_OPTIONAL',\n       'try'   => 'none',\n     },\n\n     {\n       'regexp'=> 'fac_ui',\n       'mpX'   => 'mpz',\n       'ret'   => 'void',\n       'args'  => 'mpz_ptr r, unsigned long n',\n       'speed_flags' => 'FLAG_NODATA',\n       'try'   => 'none',\n     },\n\n     {\n       'regexp'=> 'fib2_ui',\n       'ret'   => 'void',\n       'args'  => 'mp_ptr fp, mp_ptr f1p, unsigned long n',\n       'rename'=> ['__gmp_fib_table'],\n       'speed_flags' => 'FLAG_NODATA',\n       'try'   => 'none',\n     },\n     {\n       'regexp'=> 'fib_ui',\n       'mpX'   => 'mpz',\n       'ret'   => 'void',\n       'args'  => 'mpz_ptr fn, unsigned long n',\n       'speed_flags' => 'FLAG_NODATA',\n       'try'   => 'none',\n     },\n     {\n       'regexp'=> 'fib2_ui',\n       'mpX'   => 'mpz',\n       'ret'   => 'void',\n       'args'  => 'mpz_ptr fn, mpz_ptr fnsub1, unsigned long n',\n       'speed_flags' => 'FLAG_NODATA',\n       'try'   => 'none',\n     },\n\n     {\n       'regexp'=> 'lucnum_ui',\n       'mpX'   => 'mpz',\n       'ret'   => 'void',\n       'args'  => 'mpz_ptr ln, unsigned long n',\n       'speed_flags' => 'FLAG_NODATA',\n       'try'   => 'none',\n     },\n     {\n       'regexp'=> 'lucnum2_ui',\n       'mpX'   => 'mpz',\n       'ret'   => 'void',\n       'args'  => 'mpz_ptr ln, mpz_ptr lnsub1, unsigned long n',\n       'speed_flags' => 'FLAG_NODATA',\n       'try'   => 'none',\n     },\n\n     {\n       'regexp'=> 'gcd_1',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr xp, mp_size_t xsize, mp_limb_t y',\n       'speed_flags'=> 'FLAG_R_OPTIONAL',\n       'speed_suffixes' => ['N'],\n     },\n     {\n       'regexp'=> '(gcd)(?!(_1|ext|_finda))',\n       'ret'   => 'mp_size_t',\n       'args'  => 'mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize',\n     },\n\n     {\n       'regexp'=> 'jacobi',\n       'funs'  => ['jacobi', 'legendre', 'kronecker'],\n       'mpX'   => 'mpz',\n       'ret'   => 'int',\n       'args'  => 'mpz_srcptr a, mpz_srcptr b',\n       'try-legendre' => 'TYPE_MPZ_JACOBI',\n     },\n     {\n       'regexp'=> 'jacobi_base',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_limb_t a, mp_limb_t b, int bit1',\n       'speed' => 'SPEED_ROUTINE_MPN_JACBASE',\n       'try'   => 'none',\n     },\n\n     {\n       'regexp'=> '[lr]shift',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, unsigned shift',\n       'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',\n       'speed_flags'=> 'FLAG_R',\n     },\n\n     # mpn_preinv_mod_1 is an optional extra entrypoint\n     {\n       'regexp'=> '(mod_1)(?!_rs)',\n       'funs'  => ['mod_1','preinv_mod_1'],\n       'ret'   => 'mp_limb_t',\n       'args_mod_1'       => 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor',\n       'args_preinv_mod_1'=> 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse',\n       'speed_flags'=> 'FLAG_R',\n     },\n     {\n       'regexp'=> 'preinv_mod_1',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse',\n       'speed_flags'=> 'FLAG_R',\n     },\n     {\n       'regexp'=> 'mod_34lsub1',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_srcptr src, mp_size_t len',\n     },\n     {\n       'regexp'=> 'invert_limb',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_limb_t divisor',\n       'speed_flags'=> 'FLAG_R_OPTIONAL',\n       'try'   => 'none',\n     },\n\n     {\n       # not for use with hppa reversed argument versions of mpn_umul_ppmm\n       'regexp'=> 'udiv',\n       'funs'  => ['udiv_qrnnd','udiv_qrnnd_r'],\n       'ret'   => 'mp_limb_t',\n       'args_udiv_qrnnd'   => 'mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t',\n       'args_udiv_qrnnd_r' => 'mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t *',\n       'speed' => 'none',\n       'try-minsize' => 2,\n     },\n\n     {\n       'regexp'=> 'mode1o',\n       'funs'  => ['modexact_1_odd'],\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_srcptr src, mp_size_t size, mp_limb_t divisor',\n       'speed_flags'=> 'FLAG_R',\n     },\n     {\n       'regexp'=> 'modlinv',\n       'funs'  => ['modlimb_invert'],\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_limb_t v',\n       'carrys'=> [''],\n       'try'   => 'none',\n     },\n\n     {\n       'regexp'=> 'mul_1',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult',\n       'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',\n       'speed_flags'=> 'FLAG_R',\n     },\n     {\n       'regexp'=> 'mul_2',\n       'ret'   => 'mp_limb_t',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr mult',\n       'speed' => 'SPEED_ROUTINE_MPN_UNARY_2',\n       'speed_flags'=> 'FLAG_R',\n     },\n     \n     {\n       'regexp'=> 'mul_basecase',\n       'ret'   => 'void',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize',\n       'speed_flags' => 'FLAG_R_OPTIONAL | FLAG_RSIZE',\n     },\n     {\n       'regexp'=> '(mul_n)[_.]',\n       'ret'   => 'void',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',\n       'rename'=> ['kara_mul_n','kara_sqr_n','toom3_mul_n','toom3_sqr_n'],\n     },\n     {\n       'regexp'=> 'umul',\n       'funs'  => ['umul_ppmm','umul_ppmm_r'],\n       'ret'   => 'mp_limb_t',\n       'args_umul_ppmm'   => 'mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2',\n       'args_umul_ppmm_r' => 'mp_limb_t m1, mp_limb_t m2, mp_limb_t *lowptr',\n       'speed' => 'none',\n       'try-minsize' => 3,\n     },\n\n     {\n       'regexp'=> 'popcount',\n       'ret'   => 'unsigned long',\n       'args'  => 'mp_srcptr xp, mp_size_t size',\n     },\n     {\n       'regexp'=> 'hamdist',\n       'ret'   => 'unsigned long',\n       'args'  => 'mp_srcptr xp, mp_srcptr yp, mp_size_t size',\n       # extra renaming to support sharing a data table with mpn_popcount\n       'rename'=> ['popcount'],\n     },\n\n     {\n       'regexp'=> 'sqr_basecase',\n       'ret'   => 'void',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',\n       'speed' => 'SPEED_ROUTINE_MPN_SQR',\n       'try'   => 'TYPE_SQR',\n     },\n     {\n       'regexp'=> 'sqr_diagonal',\n       'ret'   => 'void',\n       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',\n       'try'   => 'none',\n     },\n\n     {\n       'regexp'=> 'sqrtrem',\n       'ret'   => 'mp_size_t',\n       'args'  => 'mp_ptr root, mp_ptr rem, mp_srcptr src, mp_size_t size',\n       'try'   => 'none',\n     },\n\n     {\n       'regexp'=> 'cntlz',\n       'funs'  => ['count_leading_zeros'],\n       'ret'   => 'unsigned',\n       'args'  => 'mp_limb_t',\n       'macro-before' => \"#undef COUNT_LEADING_ZEROS_0\",\n       'macro-speed'  =>\n'#ifdef COUNT_LEADING_ZEROS_0\n#define COUNT_LEADING_ZEROS_0_ALLOWED   1\n#else\n#define COUNT_LEADING_ZEROS_0_ALLOWED   0\n#endif\n  SPEED_ROUTINE_COUNT_ZEROS_A (1, COUNT_LEADING_ZEROS_0_ALLOWED);\n  $fun (c, n);\n  SPEED_ROUTINE_COUNT_ZEROS_B ()',\n       'speed_flags'=> 'FLAG_R_OPTIONAL',\n       'try'   => 'none',\n     },\n     {\n       'regexp'=> 'cnttz',\n       'funs'  => ['count_trailing_zeros'],\n       'ret'   => 'unsigned',\n       'args'  => 'mp_limb_t',\n       'macro-speed' => '\n  SPEED_ROUTINE_COUNT_ZEROS_A (0, 0);\n  $fun (c, n);\n  SPEED_ROUTINE_COUNT_ZEROS_B ()',\n       'speed_flags' => 'FLAG_R_OPTIONAL',\n       'try'   => 'none',\n     },\n\n     {\n       'regexp'=> 'zero',\n       'ret'   => 'void',\n       'args'  => 'mp_ptr ptr, mp_size_t size',\n     },\n\n     {\n       'regexp'=> '(powm)(?!_ui)',\n       'mpX'   => 'mpz',\n       'ret'   => 'void',\n       'args'  => 'mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m',\n       'try'   => 'none',\n     },\n     {\n       'regexp'=> 'powm_ui',\n       'mpX'   => 'mpz',\n       'ret'   => 'void',\n       'args'  => 'mpz_ptr r, mpz_srcptr b, unsigned long e, mpz_srcptr m',\n       'try'   => 'none',\n     },\n\n     # special for use during development\n     {\n       'regexp'=> 'back',\n       'funs'  => ['back_to_back'],\n       'ret'   => 'void',\n       'args'  => 'void',\n       'pic'   => 'no',\n       'try'   => 'none',\n       'speed_flags'=> 'FLAG_NODATA',\n     },\n     );\n\nif (defined $ENV{table2}) {\n  my @newtable = @{$ENV{table2}};\n  push @newtable, @table;\n  @table = @newtable;\n}\n\n\nmy %pictable = \n    (\n     'yes' => {\n       'suffix' =>  '_pic',\n       'asmflags'=> '$(ASMFLAGS_PIC)',\n       'cflags' =>  '$(CFLAGS_PIC)',\n     },\n     'no' => {\n       'suffix' =>  '',\n       'asmflags'=> '',\n       'cflags' =>  '',\n     },\n     );\n\n\nmy $builddir = $ENV{builddir};\n$builddir = \".\" if (! defined $builddir);\n\nmy $top_builddir = \"${builddir}/..\";\n\n\nopen(MAKEFILE, \"<${builddir}/Makefile\")\n  or die \"Cannot open ${builddir}/Makefile: $!\\n\"\n       . \"Is this a tune build directory?\";\nmy ($srcdir, $top_srcdir);\nwhile (<MAKEFILE>) {\n  if (/^srcdir = (.*)/) {     $srcdir = $1;     }\n  if (/^top_srcdir = (.*)/) { $top_srcdir = $1; }\n}\ndie \"Cannot find \\$srcdir in Makefile\\n\" if (! defined $srcdir);\ndie \"Cannot find \\$top_srcdir in Makefile\\n\" if (! defined $top_srcdir);\nprint \"srcdir $srcdir\\n\" if $opt{'t'};\nprint \"top_srcdir $top_srcdir\\n\" if $opt{'t'};\nclose(MAKEFILE);\n\n\nopen(SPEED, \">speed-many.c\") or die;\nprint SPEED\n\"/* speed-many.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */\n\n\";\nmy $SPEED_EXTRA_ROUTINES = \"#define SPEED_EXTRA_ROUTINES \\\\\\n\";\nmy $SPEED_EXTRA_PROTOS = \"#define SPEED_EXTRA_PROTOS \\\\\\n\";\nmy $SPEED_CODE = \"\";\n\nopen(TRY, \">try-many.c\") or die;\nprint TRY\n    \"/* try-many.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */\\n\" .\n    \"\\n\";\nmy $TRY_EXTRA_ROUTINES = \"#define EXTRA_ROUTINES \\\\\\n\";\nmy $TRY_EXTRA_PROTOS = \"#define EXTRA_PROTOS \\\\\\n\";\n\nopen(FD,\"<${top_builddir}/libtool\") or die \"Cannot open \\\"${top_builddir}/libtool\\\": $!\\n\";\nmy $pic_flag;\nwhile (<FD>) {\n  if (/^pic_flag=\"?([^\"]*)\"?$/) {\n    $pic_flag=$1;\n    last;\n  }\n}\nclose FD;\nif (! defined $pic_flag) {\n  die \"Cannot find pic_flag in ${top_builddir}/libtool\";\n}\n\nmy $CFLAGS_PIC = $pic_flag;\n\nmy $ASMFLAGS_PIC = \"\";\nforeach (split /[ \\t]/, $pic_flag) {\n  if (/^-D/) {\n    $ASMFLAGS_PIC .= \" \" . $_;\n  }\n}\n\nopen(MAKEFILE, \">Makefile.many\") or die;\nprint MAKEFILE\n    \"# Makefile.many generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST\\n\" .\n    \"\\n\" .\n    \"all: speed-many try-many\\n\" .\n    \"\\n\" .\n    \"#--------- begin included copy of basic Makefile ----------\\n\" .\n    \"\\n\";\nopen(FD,\"<${builddir}/Makefile\") or die \"Cannot open \\\"${builddir}/Makefile\\\": $!\\n\";\nprint MAKEFILE <FD>;\nclose FD;\nprint MAKEFILE\n    \"\\n\" .\n    \"#--------- end included copy of basic Makefile ----------\\n\" .\n    \"\\n\" .\n    \"CFLAGS_PIC = $CFLAGS_PIC\\n\" .\n    \"ASMFLAGS_PIC = $ASMFLAGS_PIC\\n\" .\n    \"\\n\";\n\nmy $CLEAN=\"\";\nmy $MANY_OBJS=\"\";\n\n\nsub print_ansi2knr {\n  my ($base,$file,$includes) = @_;\n  if (! defined $file)     { $file = \"$base.c\"; }\n  if (! defined $includes) { $includes = \"\"; }\n\n  print MAKEFILE <<EOF;\n${base}_.c: $file\n\t\\$(CPP) \\$(DEFS) \\$(INCLUDES) $includes \\$(AM_CPPFLAGS) \\$(CPPFLAGS) $file | sed 's/^# \\([0-9]\\)/#line \\\\1/' >${base}_.c\n\nEOF\n}\n\n\n# Spawning a glob is a touch slow when there's lots of files.\nmy @files = ();\nforeach my $dir (@DIRECTORIES) {\n  print \"dir $dir\\n\" if $opt{'t'};\n  if (-f $dir) {\n    push @files,$dir;\n  } else {\n    if (! opendir DD,$dir) {\n      print \"Cannot open $dir: $!\\n\";\n    } else {\n      push @files, map {$_=\"$dir/$_\"} grep /\\.(c|asm|S|h)$/, readdir DD;\n      closedir DD;\n    }\n  }\n}\n@files = sort @files;\nprint \"@files \",join(\" \",@files),\"\\n\" if $opt{'t'};\n\nmy $count_files = 0;\nmy $count_functions = 0;\nmy %seen_obj;\nmy %seen_file;\n\nforeach my $file_full (@files) {\n  if (! -f $file_full) {\n    print \"Not a file: $file_full\\n\";\n    next;\n  }\n  if (defined $seen_file{$file_full}) {\n    print \"Skipping duplicate file: $file_full\\n\";\n    next;\n  }\n  $seen_file{$file_full} = 1;\n\n  my ($FILE,$path,$lang) = fileparse($file_full,\"\\.[a-zA-Z]+\");\n  $path =~ s/\\/$//;\n  print \"file $FILE path $path lang $lang\\n\" if $opt{'t'};\n\n  my @pic_choices;\n  if ($lang eq '.asm')  { @pic_choices=('no','yes'); }\n  elsif ($lang eq '.c') { @pic_choices=('no'); }\n  elsif ($lang eq '.S') { @pic_choices=('no','yes'); }\n  elsif ($lang eq '.h') { @pic_choices=('no'); }\n  else { next };\n  \n  my ($t, $file_match);\n  foreach my $p (@table) {\n    # print \" \",$p->{'regexp'},\"\\n\" if $opt{'t'};\n    if ($FILE =~ \"^($p->{'regexp'})\") {\n      $t = $p;\n      $file_match = $1;\n      $file_match = $2 if defined $2;\n      last;\n    }\n  }\n  next if ! defined $t;\n  print \"match $t->{'regexp'} $FILE ($file_full)\\n\" if $opt{'t'};\n\n  if (! open FD,\"<$file_full\") { print \"Can't open $file_full: $!\\n\"; next }\n  my @file_contents = <FD>;\n  close FD;\n\n  my $objs;\n  if (defined $t->{'mulfunc'}) { $objs = $t->{'mulfunc'}; }\n  else                         { $objs = [$file_match]; }\n  print \"objs @$objs\\n\" if $opt{'t'};\n\n  my $ret = $t->{'ret'};\n  if (! defined $ret && $lang eq '.h') { $ret = ''; }\n  if (! defined $ret) { die \"$FILE return type not defined\\n\" };\n  print \"ret $ret\\n\" if $opt{'t'};\n\n  my $mpX = $t->{'mpX'};\n  if (! defined $mpX) { $mpX = ($lang eq '.h' ? '' : 'mpn'); }\n  $mpX = \"${mpX}_\" if $mpX ne '';\n  print \"mpX $mpX\\n\" if $opt{'t'};\n\n  my $carrys;\n  if (defined $t->{'carrys'}) { $carrys = $t->{'carrys'}; }\n  else                        { $carrys = ['','c'];       }\n  print \"carrys $carrys @$carrys\\n\" if $opt{'t'};\n  \n  # some restriction functions are implemented, but they're not very useful\n  my $restriction='';\n\n  my $suffix;\n  if ($FILE =~ (\"${file_match}_(.+)\")) {\n    $suffix = $1;\n  } elsif ($path =~ /\\/mp[zn]\\/(.*)$/) {\n    # derive the suffix from the path\n    $suffix = $1;\n    $suffix =~ s/\\//_/g;\n    # use last directory name, or if there's 3 or more then the last two\n    if ($suffix =~ /([^_]*_)+([^_]+_[^_]+)$/) {\n      $suffix = $2;\n    } elsif ($suffix =~ /([^_]*_)*([^_]+)$/) {\n      $suffix = $2;\n    }\n  } else {\n    die \"Can't determine suffix for: $file_full (path $path)\\n\";\n  }\n  print \"suffix $suffix\\n\" if $opt{'t'};\n  \n  $count_files++;\n  \n  foreach my $obj (@{$objs}) {\n    print \"obj $obj\\n\" if $opt{'t'};\n\n    my $obj_with_suffix = \"${obj}_$suffix\";\n    if (defined $seen_obj{$obj_with_suffix}) {\n      print \"Skipping duplicate object: $obj_with_suffix\\n\";\n      print \"   first from: $seen_obj{$obj_with_suffix}\\n\";\n      print \"   now from:   $file_full\\n\";\n      next;\n    }\n    $seen_obj{$obj_with_suffix} = $file_full;\n\n    my $funs = $t->{'funs'};\n    $funs = [$obj] if ! defined $funs;\n    print \"funs @$funs\\n\" if $opt{'t'};\n\n    if (defined $t->{'pic'}) { @pic_choices = ('no'); }\n\n    foreach my $pic (map {$pictable{$_}} @pic_choices) {\n      print \"pic $pic->{'suffix'}\\n\" if $opt{'t'};\n      \n      my $objbase = \"${obj}_$suffix$pic->{'suffix'}\";\n      print \"objbase $objbase\\n\" if $opt{'t'};\n\n      if ($path !~ \".\" && -f \"${objbase}.c\") {\n\tdie \"Already have ${objbase}.c\";\n      }\n\n      my $tmp_file = \"tmp-$objbase.c\";\n      \n      my $renaming;\n      foreach my $fun (@{$funs}) {\n        if ($mpX eq 'mpn_' && $lang eq '.c') {\n          $renaming .= \"\\t\\t-DHAVE_NATIVE_mpn_$fun=1 \\\\\\n\";\n        }\n\n        # The carry-in variant is with a \"c\" appended, unless there's a \"_1\"\n        # somewhere, eg. \"modexact_1_odd\", in which case that becomes \"_1c\".\n\tmy $fun_carry = $fun;\n\tif (! ($fun_carry =~ s/_1/_1c/)) { $fun_carry = \"${fun}c\"; }\n\n\t$renaming .=\n\t    \"\\t\\t-D__g$mpX$fun=$mpX${fun}_$suffix$pic->{'suffix'} \\\\\\n\" .\n\t    \"\\t\\t-D__g$mpX$fun_carry=$mpX${fun_carry}_$suffix$pic->{'suffix'} \\\\\\n\";\n      }\n      foreach my $r (@{$t->{'rename'}}) {\n\tif ($r =~ /^__gmp/) {\n\t  $renaming .= \"\\\\\\n\" .\n\t      \"\\t\\t-D$r=${r}_$suffix$pic->{'suffix'}\";\n\t} else {\n\t  $renaming .= \"\\\\\\n\" .\n\t      \"\\t\\t-D__g$mpX$r=$mpX${r}_$suffix$pic->{'suffix'}\";\n\t}\n      }\n      print \"renaming $renaming\\n\" if $opt{'t'};\n\n      print MAKEFILE \"\\n\";\n      if ($lang eq '.asm') {\n\tprint MAKEFILE\n\t    \"$objbase.o: $file_full \\$(ASM_HEADERS)\\n\" .\n\t    \"\t\\$(M4) \\$(M4FLAGS) -DOPERATION_$obj $pic->{'asmflags'} \\\\\\n\" .\n  \t    \"$renaming\" .\n\t    \"\t\t$file_full >tmp-$objbase.s\\n\" .\n            \"\t\\$(CCAS) \\$(COMPILE_FLAGS) $pic->{'cflags'} tmp-$objbase.s -o $objbase.o\\n\" .\n            \"\t\\$(RM_TMP) tmp-$objbase.s\\n\";\n\t$MANY_OBJS .= \" $objbase.o\";\n\n      } elsif ($lang eq '.c') {\n\tprint MAKEFILE\n\t    \"$objbase.o: $file_full\\n\" .\n\t    \"\t\\$(COMPILE) -DOPERATION_$obj $pic->{'cflags'} \\\\\\n\" .\n  \t    \"$renaming\" .\n\t    \"\t\t-c $file_full -o $objbase.o\\n\";\n\tprint_ansi2knr($objbase,\n\t\t       $file_full,\n\t\t       \" -DOPERATION_$obj\\\\\\n$renaming\\t\\t\");\n\t$MANY_OBJS .= \" $objbase\\$U.o\";\n\n      } elsif ($lang eq '.S') {\n\tprint MAKEFILE\n\t    \"$objbase.o: $file_full\\n\" .\n            \"\t\\$(COMPILE) -g $pic->{'asmflags'} \\\\\\n\" .\n  \t    \"$renaming\" .\n            \"\t-c $file_full -o $objbase.o\\n\";\n\t$MANY_OBJS .= \" $objbase.o\";\n\n      } elsif ($lang eq '.h') {\n\tprint MAKEFILE\n\t    \"$objbase.o: tmp-$objbase.c $file_full\\n\" .\n\t    \"\t\\$(COMPILE) -DOPERATION_$obj $pic->{'cflags'} \\\\\\n\" .\n  \t    \"$renaming\" .\n\t    \"\t\t-c tmp-$objbase.c -o $objbase.o\\n\";\n\tprint_ansi2knr($objbase,\n\t\t       \"tmp-$objbase.c\",\n\t\t       \" -DOPERATION_$obj\\\\\\n$renaming\\t\\t\");\n\t$MANY_OBJS .= \" $objbase\\$U.o\";\n\n        $CLEAN .= \" tmp-$objbase.c\";\n\topen(TMP_C,\">tmp-$objbase.c\")\n\t    or die \"Can't create tmp-$objbase.c: $!\\n\";\n\tprint TMP_C\n\"/* tmp-$objbase.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */\n\n#include \\\"mpir.h\\\"\n#include \\\"gmp-impl.h\\\"\n#include \\\"longlong.h\\\"\n#include \\\"speed.h\\\"\n\n\";\n      }\n\n      my $tests_program = \"$top_srcdir/tests/devel/$obj.c\";\n      if (-f $tests_program) {\n\t$tests_program = \"\\$(top_srcdir)/tests/devel/$obj.c\";\n\tprint_ansi2knr(\"tests_${objbase}\",\n\t\t       $tests_program,\n\t\t       \"\\\\\\n$renaming\\t\\t\\$(CFLAGS_TESTS_SP)\");\n\tprint_ansi2knr(\"tests_${objbase}_sp\",\n\t\t       $tests_program,\n\t\t       \"\\\\\\n$renaming\\t\\t\\$(CFLAGS_TESTS_SP)\");\n\n\tprint MAKEFILE <<EOF;\ntests_$objbase.o: $tests_program\n\t\\$(COMPILE) \\$(CFLAGS_TESTS) \\\\\n$renaming\t\t-c $tests_program -o tests_$objbase.o\n\ntests_$objbase: $objbase\\$U.o tests_$objbase\\$U.o ../libmpir.la\n\t\\$(LINK) tests_$objbase\\$U.o $objbase\\$U.o ../libmpir.la -o tests_$objbase\n\ntests_${objbase}_sp.o: $tests_program\n\t\\$(COMPILE) \\$(CFLAGS_TESTS_SP) \\\\\n$renaming\t\t-c $tests_program -o tests_${objbase}_sp.o\n\ntests_${objbase}_sp: $objbase\\$U.o tests_${objbase}_sp\\$U.o ../libmpir.la\n\t\\$(LINK) tests_${objbase}_sp\\$U.o $objbase\\$U.o ../libmpir.la -o tests_${objbase}_sp\n\nEOF\n        $CLEAN .= \" tests_$objbase tests_${objbase}_sp\";\n      }\n\n      foreach my $fun (@{$funs}) {\n\tprint \"fun $fun\\n\" if $opt{'t'};\n\n\tif ($lang eq '.h') {\n          my $macro_before = $t->{'macro_before'};\n          $macro_before = \"\" if ! defined $macro_before;\n\t  print TMP_C\n\"$macro_before\n#undef $fun\n#include \\\"$file_full\\\"\n\n\";\n\t}\n\n\tmy $args = $t->{\"args_$fun\"};\n\tif (! defined $args) { $args = $t->{'args'}; }\n\tif (! defined $args) { die \"Need args for $fun\\n\"; }\n\tprint \"args $args\\n\" if $opt{'t'};\n      \n\tforeach my $carry (@$carrys) {\n\t  print \"carry $carry\\n\" if $opt{'t'};\n\n\t  my $fun_carry = $fun;\n\t  if (! ($fun_carry =~ s/_1/_1$carry/)) { $fun_carry = \"$fun$carry\"; }\n          print \"fun_carry $fun_carry\\n\" if $opt{'t'};\n\t\t    \n\t  if ($lang =~ /\\.(asm|S)/\n\t      && ! grep(m\"PROLOGUE\\((.* )?$mpX$fun_carry[ ,)]\",@file_contents)) {\n\t    print \"no PROLOGUE $mpX$fun_carry\\n\" if $opt{'t'};\n\t    next;\n\t  }\n\t  if ($lang eq '.c'\n\t      && ! grep(m\"^(#define FUNCTION\\s+)?$mpX$fun_carry\\W\", @file_contents)) {\n\t    print \"no mention of $mpX$fun_carry\\n\" if $opt{'t'};\n\t    next;\n\t  }\n\t  if ($lang eq '.h'\n\t      && ! grep(m\"^#define $fun_carry\\W\", @file_contents)) {\n\t    print \"no mention of #define $fun_carry\\n\" if $opt{'t'};\n\t    next;\n\t  }\n\t  \n\t  $count_functions++;\n\n\t  my $carryarg;\n\t  if (defined $t->{'carryarg'}) { $carryarg = $t->{'carryarg'}; }\n\t  if ($carry eq '')             { $carryarg = ''; }\n\t  else                          { $carryarg = ', mp_limb_t carry'; }\n\t  print \"carryarg $carryarg\\n\" if $opt{'t'};\n\t  \n\t  my $funfull=\"$mpX${fun_carry}_$suffix$pic->{'suffix'}\";\n\t  print \"funfull $funfull\\n\" if $opt{'t'};\n\n\t  if ($lang ne '.h') {\n\t    my $proto = \"$t->{'ret'} $funfull _PROTO (($args$carryarg)); \\\\\\n\";\n\t    $SPEED_EXTRA_PROTOS .= $proto;\n\t    $TRY_EXTRA_PROTOS .= $proto;\n\t  }\n\t  \n\t  my $try_type = $t->{\"try-$fun\"};\n\t  $try_type = $t->{'try'} if ! defined $try_type;\n\t  if (! defined $try_type) {\n\t    if ($mpX eq 'mpn_') {\n\t      $try_type = \"TYPE_\\U$fun_carry\";\n\t    } else {\n\t      $try_type = \"TYPE_\\U$mpX\\U$fun_carry\";\n\t    }\n\t  }\n\t  print \"try_type $try_type\\n\" if $opt{'t'};\n\t  \n\t  my $try_minsize = $t->{'try-minsize'};\n\t  if (defined $try_minsize) {\n\t    $try_minsize = \", \" . $try_minsize;\n\t  } else {\n\t    $try_minsize = \"\";\n\t  }\n\t  print \"try_minsize $try_minsize\\n\" if $opt{'t'};\n\t  \n\t  if ($try_type ne 'none') {\n\t    $TRY_EXTRA_ROUTINES .=\n\t\t\"  { TRY($mpX${fun_carry}_$suffix$pic->{'suffix'}), $try_type$try_minsize }, \\\\\\n\";\n\t  }\n\n\t  my $speed_flags = $t->{'speed_flags'};\n\t  $speed_flags = '0' if ! defined $speed_flags;\n\t  print \"speed_flags $speed_flags\\n\" if $opt{'t'};\n\t  \n\t  my $speed_routine = $t->{'speed'};\n\t  $speed_routine = \"SPEED_ROUTINE_\\U$mpX\\U$fun\"\n\t      if !defined $speed_routine;\n\t  if (! ($speed_routine =~ s/_1/_1\\U$carry/)) {\n\t    $speed_routine = \"$speed_routine\\U$carry\";\n\t  }\n\t  print \"speed_routine $speed_routine\\n\" if $opt{'t'};\n\t  \n\t  my @speed_suffixes = ();\n\t  push (@speed_suffixes, '') if $speed_routine ne 'none';\n\t  push (@speed_suffixes, @{$t->{'speed_suffixes'}})\n\t      if defined $t->{'speed_suffixes'};\n\t  \n          my $macro_speed = $t->{'macro-speed'};\n          $macro_speed = \"$speed_routine ($fun_carry)\" if ! defined $macro_speed;\n          $macro_speed =~ s/\\$fun/$fun_carry/g;\n\n\t  foreach my $S (@speed_suffixes) {\n\t    my $Sfunfull=\"$mpX${fun_carry}${S}_$suffix$pic->{'suffix'}\";\n\t    \n\t    $SPEED_EXTRA_PROTOS .=\n\t      \"double speed_$Sfunfull _PROTO ((struct speed_params *s)); \\\\\\n\";\n\t    $SPEED_EXTRA_ROUTINES .=\n\t      \"  { \\\"$Sfunfull\\\", speed_$Sfunfull, $speed_flags }, \\\\\\n\";\n\t    if ($lang eq '.h') {\n              print TMP_C\n\"double\nspeed_$Sfunfull (struct speed_params *s)\n{\n$macro_speed\n}\n\n\";\n            } else {\n\t      $SPEED_CODE .=\n\t        \"double\\n\" .\n\t        \"speed_$Sfunfull (struct speed_params *s)\\n\" .\n                \"{\\n\" .\n                \"$restriction\" .\n\t        \"  $speed_routine\\U$S\\E ($funfull)\\n\" .\n                \"}\\n\";\n            }\n\t  }\n\t}\n      }\n    }\n  }\n}\n\n\nprint SPEED $SPEED_EXTRA_PROTOS . \"\\n\";\nprint SPEED $SPEED_EXTRA_ROUTINES . \"\\n\";\nif (defined $ENV{speedinc}) { print SPEED $ENV{speedinc} . \"\\n\"; }\nprint SPEED\n    \"#include \\\"speed.c\\\"\\n\" .\n    \"\\n\";\nprint SPEED $SPEED_CODE;\n\nprint TRY $TRY_EXTRA_ROUTINES . \"\\n\";\nprint TRY $TRY_EXTRA_PROTOS . \"\\n\";\nmy $tryinc = \"\";\nif (defined $ENV{tryinc}) {\n  $tryinc = $ENV{tryinc};\n  print TRY \"#include \\\"$tryinc\\\"\\n\";\n}\nprint \"tryinc $tryinc\\n\" if $opt{'t'};\nprint TRY\n    \"#include \\\"try.c\\\"\\n\" .\n    \"\\n\";\n\nmy $extra_libraries = \"\";\nif (defined $ENV{extra_libraries}) { $extra_libraries = $ENV{extra_libraries};}\n\nmy $trydeps = \"\";\nif (defined $ENV{trydeps}) { $trydeps = $ENV{trydeps}; }\n$trydeps .= \" $tryinc\";\nprint \"trydeps $trydeps\\n\" if $opt{'t'};\n\nprint MAKEFILE <<EOF;\n\nMANY_OBJS = $MANY_OBJS\nMANY_CLEAN = \\$(MANY_OBJS) \\\\\n\tspeed-many.c speed-many\\$U.o speed-many\\$(EXEEXT) \\\\\n\ttry-many.c try-many\\$U.o try-many \\\\\n\t$CLEAN\nMANY_DISTCLEAN = Makefile.many\n\nspeed-many: \\$(MANY_OBJS) speed-many\\$U.o libspeed.la $extra_libraries\n\t\\$(LINK) \\$(LDFLAGS) speed-many\\$U.o \\$(MANY_OBJS) \\$(LDADD) \\$(LIBS) $extra_libraries\n\ntry-many: \\$(MANY_OBJS) try-many\\$U.o libspeed.la $extra_libraries\n\t\\$(LINK) \\$(LDFLAGS) try-many\\$U.o \\$(MANY_OBJS)  \\$(LDADD) \\$(LIBS) $extra_libraries\n\ntry-many.o: try-many.c \\$(top_srcdir)/tests/devel/try.c $trydeps\n\t\\$(COMPILE) -I\\$(top_srcdir)/tests/devel -c try-many.c\n\nEOF\n\nprint_ansi2knr(\"speed-many\");\nprint_ansi2knr(\"try-many\",\n\t       \"\\$(top_srcdir)/tests/devel/try.c\",\n\t       \"-I\\$(top_srcdir)/tests/devel\");\n\nprint MAKEFILE <<EOF;\nRM_TMP = rm -f\nCFLAGS_TESTS = -DSIZE=50 -DTIMES=1 -DRANDOM -DCLOCK=333000000\nCFLAGS_TESTS_SP = -DSIZE=1024 -DNOCHECK -DOPS=200000000 -DCLOCK=333000000\nEOF\n\nclose MAKEFILE or die;\n\nprint \"Total $count_files files, $count_functions functions\\n\";    \n\n\n\n# Local variables:\n# perl-indent-level: 2\n# End:\n"
  },
  {
    "path": "tune/mod_1_div.c",
    "content": "/* mpn/generic/mod_1.c forced to use plain udiv_qrnnd.\n\nCopyright 2000, 2003 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define OPERATION_mod_1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef MOD_1_NORM_THRESHOLD\n#undef MOD_1_UNNORM_THRESHOLD\n#define MOD_1_NORM_THRESHOLD    MP_SIZE_T_MAX\n#define MOD_1_UNNORM_THRESHOLD  MP_SIZE_T_MAX\n#define __gmpn_mod_1  mpn_mod_1_div\n\n#include \"mpn/generic/mod_1.c\"\n"
  },
  {
    "path": "tune/mod_1_inv.c",
    "content": "/* mpn/generic/mod_1.c forced to use mul-by-inverse udiv_qrnnd_preinv.\n\nCopyright 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#define OPERATION_mod_1\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef MOD_1_NORM_THRESHOLD\n#undef MOD_1_UNNORM_THRESHOLD\n#define MOD_1_NORM_THRESHOLD    0\n#define MOD_1_UNNORM_THRESHOLD  0\n#define __gmpn_mod_1  mpn_mod_1_inv\n\n#include \"mpn/generic/mod_1.c\"\n"
  },
  {
    "path": "tune/modlinv.c",
    "content": "/* Alternate implementations of modlimb_invert to compare speeds. */\n\n/*\nCopyright 2000, 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include <stdio.h>\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n#include \"speed.h\"\n\n\n/* Like the standard version in gmp-impl.h, but with the expressions using a\n   \"1-\" form.  This has the same number of steps, but \"1-\" is on the\n   dependent chain, whereas the \"2*\" in the standard version isn't.\n   Depending on the CPU this should be the same or a touch slower.  */\n\n#if BITS_PER_MP_LIMB <= 32\n#define modlimb_invert_mul1(inv,n)                              \\\n  do {                                                          \\\n    mp_limb_t  __n = (n);                                       \\\n    mp_limb_t  __inv;                                           \\\n    ASSERT ((__n & 1) == 1);                                    \\\n    __inv = modlimb_invert_table[(__n&0xFF)/2]; /*  8 */        \\\n    __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \\\n    __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \\\n    ASSERT (__inv * __n == 1);                                  \\\n    (inv) = __inv;                                              \\\n  } while (0)\n#endif\n\n#if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64\n#define modlimb_invert_mul1(inv,n)                              \\\n  do {                                                          \\\n    mp_limb_t  __n = (n);                                       \\\n    mp_limb_t  __inv;                                           \\\n    ASSERT ((__n & 1) == 1);                                    \\\n    __inv = modlimb_invert_table[(__n&0xFF)/2]; /*  8 */        \\\n    __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \\\n    __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \\\n    __inv = (1 - __n * __inv) * __inv + __inv;  /* 64 */        \\\n    ASSERT (__inv * __n == 1);                                  \\\n    (inv) = __inv;                                              \\\n  } while (0)\n#endif\n\n\n/* The loop based version used in GMP 3.0 and earlier.  Usually slower than\n   multiplying, due to the number of steps that must be performed.  Much\n   slower when the processor has a good multiply.  */\n\n#define modlimb_invert_loop(inv,n)              \\\n  do {                                          \\\n    mp_limb_t  __v = (n);                       \\\n    mp_limb_t  __v_orig = __v;                  \\\n    mp_limb_t  __make_zero = 1;                 \\\n    mp_limb_t  __two_i = 1;                     \\\n    mp_limb_t  __v_inv = 0;                     \\\n                                                \\\n    ASSERT ((__v & 1) == 1);                    \\\n                                                \\\n    do                                          \\\n      {                                         \\\n        while ((__two_i & __make_zero) == 0)    \\\n          __two_i <<= 1, __v <<= 1;             \\\n        __v_inv += __two_i;                     \\\n        __make_zero -= __v;                     \\\n      }                                         \\\n    while (__make_zero);                        \\\n                                                \\\n    ASSERT (__v_orig * __v_inv == 1);           \\\n    (inv) = __v_inv;                            \\\n  } while (0)\n\n\n/* Another loop based version with conditionals, but doing a fixed number of\n   steps. */\n\n#define modlimb_invert_cond(inv,n)              \\\n  do {                                          \\\n    mp_limb_t  __n = (n);                       \\\n    mp_limb_t  __rem = (1 - __n) >> 1;          \\\n    mp_limb_t  __inv = GMP_LIMB_HIGHBIT;       \\\n    int        __count;                         \\\n                                                \\\n    ASSERT ((__n & 1) == 1);                    \\\n                                                \\\n    __count = BITS_PER_MP_LIMB-1;               \\\n    do                                          \\\n      {                                         \\\n        __inv >>= 1;                            \\\n        if (__rem & 1)                          \\\n          {                                     \\\n            __inv |= GMP_LIMB_HIGHBIT;         \\\n            __rem -= __n;                       \\\n          }                                     \\\n        __rem >>= 1;                            \\\n      }                                         \\\n    while (-- __count);                         \\\n                                                \\\n    ASSERT (__inv * __n == 1);                  \\\n    (inv) = __inv;                              \\\n  } while (0)\n\n\n/* Another loop based bitwise version, but purely arithmetic, no\n   conditionals. */\n\n#define modlimb_invert_arith(inv,n)                                     \\\n  do {                                                                  \\\n    mp_limb_t  __n = (n);                                               \\\n    mp_limb_t  __rem = (1 - __n) >> 1;                                  \\\n    mp_limb_t  __inv = GMP_LIMB_HIGHBIT;                               \\\n    mp_limb_t  __lowbit;                                                \\\n    int        __count;                                                 \\\n                                                                        \\\n    ASSERT ((__n & 1) == 1);                                            \\\n                                                                        \\\n    __count = BITS_PER_MP_LIMB-1;                                       \\\n    do                                                                  \\\n      {                                                                 \\\n        __lowbit = __rem & 1;                                           \\\n        __inv = (__inv >> 1) | (__lowbit << (BITS_PER_MP_LIMB-1));      \\\n        __rem = (__rem - (__n & -__lowbit)) >> 1;                       \\\n      }                                                                 \\\n    while (-- __count);                                                 \\\n                                                                        \\\n    ASSERT (__inv * __n == 1);                                          \\\n    (inv) = __inv;                                                      \\\n  } while (0)\n\n\ndouble\nspeed_modlimb_invert_mul1 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_mul1);\n}\ndouble\nspeed_modlimb_invert_loop (struct speed_params *s)\n{\n  SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_loop);\n}\ndouble\nspeed_modlimb_invert_cond (struct speed_params *s)\n{\n  SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_cond);\n}\ndouble\nspeed_modlimb_invert_arith (struct speed_params *s)\n{\n  SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_arith);\n}\n"
  },
  {
    "path": "tune/noop.c",
    "content": "/* Noop routines.\n\n   These are in a separate file to stop gcc recognising do-nothing functions\n   and optimizing away calls to them.  */\n\n/*\nCopyright 1999, 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#include \"speed.h\"\n\n\nvoid\nnoop (void)\n{\n}\n\n/*ARGSUSED*/\nvoid\nnoop_1 (mp_limb_t n)\n{\n}\n\n/*ARGSUSED*/\nvoid\nnoop_wxs (mp_ptr wp, mp_srcptr xp, mp_size_t size)\n{\n}\n\n/*ARGSUSED*/\nvoid\nnoop_wxys (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)\n{\n}\n\n/*ARGSUSED*/\nvoid \nmpn_cache_fill_dummy (mp_limb_t n)\n{\n}\n"
  },
  {
    "path": "tune/pentium.asm",
    "content": "dnl  x86 pentium time stamp counter access routine.\n\ndnl  Copyright 1999, 2000, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\n\ninclude(`../config.m4')\n\n\nC void speed_cyclecounter (unsigned p[2]);\nC\nC Get the pentium rdtsc cycle counter, storing the least significant word in\nC p[0] and the most significant in p[1].\nC\nC cpuid is used to serialize execution.  On big measurements this won't be\nC significant but it may help make small single measurements more accurate.\n\n\t.text\n\tALIGN(8)\n\ndefframe(PARAM_P,4)\n\nPROLOGUE(speed_cyclecounter)\ndeflit(`FRAME',0)\n\tpushl\t%ebx\nFRAME_pushl()\n\txorl\t%eax, %eax\n\tcpuid\n\trdtsc\n\tmovl\tPARAM_P, %ebx\n\tmovl\t%eax, (%ebx)\n\tmovl\t%edx, 4(%ebx)\n\tpopl\t%ebx\n\tret\nEPILOGUE()\n"
  },
  {
    "path": "tune/powerpc.asm",
    "content": "dnl  PowerPC mftb_function -- read time base registers.\n\ndnl  Copyright 2002 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundationn; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC void mftb_function (unsigned a[2]);\nC\n\nASM_START()\nPROLOGUE(mftb_function)\n\n\tC r3\ta\n\nL(again):\n\tmftbu\tr4\n\tmftb\tr5\n\tmftbu\tr6\n\tcmp\tcr0, r4, r6\n\tbne\tL(again)\n\n\tstw\tr5, 0(r3)\n\tstw\tr4, 4(r3)\n\tblr\n\nEPILOGUE()\n"
  },
  {
    "path": "tune/powerpc64.asm",
    "content": "dnl  PowerPC mftb_function -- read time base registers, 64-bit integer.\n\ndnl  Copyright 2002, 2003, 2004 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundationn; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC void mftb_function (unsigned a[2]);\nC\n\nASM_START()\nPROLOGUE(mftb_function)\n\n\tC r3\ta\n\n\tmftb\tr5\n\n\tsrdi\tr4, r5, 32\n\tstw\tr5, 0(r3)\n\tstw\tr4, 4(r3)\n\tblr\n\nEPILOGUE()\n"
  },
  {
    "path": "tune/powm_mod.c",
    "content": "/* mpz/powm.c forced to use division. */\n\n/*\nCopyright 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef POWM_THRESHOLD\n#define POWM_THRESHOLD  1\n#define __gmpz_powm  mpz_powm_mod\n\n#include \"../mpz/powm.c\"\n"
  },
  {
    "path": "tune/powm_redc.c",
    "content": "/* mpz/powm.c forced to use REDC. */\n\n/*\nCopyright 2000 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA.\n*/\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n/* WANT_GLOBAL_REDC makes redc() available for speed and tune program use. */\n#undef POWM_THRESHOLD\n#define POWM_THRESHOLD    MP_SIZE_T_MAX\n#define WANT_REDC_GLOBAL  1\n#define __gmpz_powm  mpz_powm_redc\n\n#include \"../mpz/powm.c\"\n"
  },
  {
    "path": "tune/preinv_divrem_1.c",
    "content": "/* mpn_preinv_divrem_1 -- if not already in libmpir.\n\nCopyright 2001 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if ! USE_PREINV_DIVREM_1\n\n#undef USE_PREINV_DIVREM_1\n#define USE_PREINV_DIVREM_1 1\n\n#include \"mpn/generic/preinv_divrem_1.c\"\n\n#endif\n"
  },
  {
    "path": "tune/set_strb.c",
    "content": "/* mpn_set_str_basecase -- mpn_set_str forced to its basecase.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#define __gmpn_set_str mpn_set_str_basecase\n#define __gmpn_bc_set_str mpn_bc_set_str_basecase\n#define __gmpn_dc_set_str mpn_dc_set_str_basecase\n#define __gmpn_set_str_compute_powtab mpn_set_str_compute_powtab_basecase\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#ifndef SIZE_T_MAX\n#define SIZE_T_MAX  ((size_t) ULONG_MAX)\n#endif\n\n#undef SET_STR_DC_THRESHOLD\n#define SET_STR_DC_THRESHOLD           SIZE_T_MAX /* always */\n#undef SET_STR_PRECOMPUTE_THRESHOLD\n#define SET_STR_PRECOMPUTE_THRESHOLD   SIZE_T_MAX /* always */\n\n#include \"mpn/generic/set_str.c\"\n"
  },
  {
    "path": "tune/set_strp.c",
    "content": "/* mpn_set_str_subquad -- mpn_set_str forced to the sub-quadratic case.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#define TUNE_PROGRAM_BUILD  1   /* for gmp-impl.h */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nvoid\nmpn_pre_set_str (mp_ptr wp, unsigned char *str, size_t str_len, powers_t *powtab, mp_ptr tp)\n{\n  if (BELOW_THRESHOLD (str_len, set_str_dc_threshold))\n    mpn_bc_set_str (wp, str, str_len, powtab->base);\n  else\n    mpn_dc_set_str (wp, str, str_len, powtab, tp);\n}\n"
  },
  {
    "path": "tune/set_strs.c",
    "content": "/* mpn_set_str_subquad -- mpn_set_str forced to the sub-quadratic case.\n\nCopyright 2002 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */\n\n#define __gmpn_set_str mpn_set_str_subquad\n#define __gmpn_bc_set_str mpn_bc_set_str_subquad\n#define __gmpn_dc_set_str mpn_dc_set_str_subquad\n#define __gmpn_set_str_compute_powtab mpn_set_str_compute_powtab_subquad\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#undef SET_STR_DC_THRESHOLD\n#define SET_STR_DC_THRESHOLD  2 /* never */\n#undef SET_STR_PRECOMPUTE_THRESHOLD\n#define SET_STR_PRECOMPUTE_THRESHOLD  2 /* never */\n\n#include \"mpn/generic/set_str.c\"\n"
  },
  {
    "path": "tune/sparcv9.asm",
    "content": "dnl  Sparc v9 32-bit time stamp counter access routine.\n\ndnl  Copyright 2000, 2005 Free Software Foundation, Inc.\ndnl\ndnl  This file is part of the GNU MP Library.\ndnl\ndnl  The GNU MP Library is free software; you can redistribute it and/or\ndnl  modify it under the terms of the GNU Lesser General Public License as\ndnl  published by the Free Software Foundation; either version 2.1 of the\ndnl  License, or (at your option) any later version.\ndnl\ndnl  The GNU MP Library is distributed in the hope that it will be useful,\ndnl  but WITHOUT ANY WARRANTY; without even the implied warranty of\ndnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\ndnl  Lesser General Public License for more details.\ndnl\ndnl  You should have received a copy of the GNU Lesser General Public\ndnl  License along with the GNU MP Library; see the file COPYING.LIB.  If\ndnl  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\ndnl  Fifth Floor, Boston, MA 02110-1301, USA.\n\ninclude(`../config.m4')\n\n\nC void speed_cyclecounter (unsigned p[2]);\nC\nC Get the sparc v9 tick counter.\n\nASM_START()\nPROLOGUE(speed_cyclecounter)\n\trd\t%tick,%g1\n\tst\t%g1,[%o0]\t\tC low 32 bits\n\tsrlx\t%g1,32,%g4\n\tretl\n\tst\t%g4,[%o0+4]\t\tC high 32 bits\nEPILOGUE(speed_cyclecounter)\n"
  },
  {
    "path": "tune/speed-ext.c",
    "content": "/* An example of extending the speed program to measure routines not in GMP.\n\nCopyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n/* The extension here is three versions of an mpn arithmetic mean.  These\n   aren't meant to be particularly useful, just examples.\n\n   You can run something like the following to compare their speeds.\n\n           ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2\n\n   On RISC chips, mean_open() might be fastest if the compiler is doing a\n   good job.  On the register starved x86s, mean_calls will be fastest.\n\n\n   Notes:\n\n   SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded\n   by speed.c in useful places.  SPEED_EXTRA_PROTOS goes after the header\n   files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.\n\n   The advantage of this #include \"speed.c\" scheme is that there's no\n   editing of a copy of that file, and new features in new versions of it\n   will be immediately available.\n\n   In a real program the routines mean_calls() etc would probably be in\n   separate C or assembler source files, and just the measuring\n   speed_mean_calls() etc would be here.  Linking against other libraries\n   for things to measure is perfectly possible too.\n\n   When attempting to compare two versions of the same named routine, say\n   like the generic and assembler versions of mpn_add_n(), creative use of\n   cc -D or #define is suggested, so one or both can be renamed and linked\n   into the same program.  It'll be much easier to compare them side by side\n   than with separate programs for each.\n\n   common.c has notes on writing speed measuring routines.\n\n   Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if\n   not using libtool) to get common.o and other objects needed by speed.c.  */\n\n\n#define SPEED_EXTRA_PROTOS                                              \\\n  double speed_mean_calls(struct speed_params *s);       \\\n  double speed_mean_open(struct speed_params *s);       \\\n  double speed_mean_open2(struct speed_params *s);\n\n#define SPEED_EXTRA_ROUTINES            \\\n  { \"mean_calls\",  speed_mean_calls  }, \\\n  { \"mean_open\",   speed_mean_open   }, \\\n  { \"mean_open2\",  speed_mean_open2  },\n\n#include \"speed.c\"\n\n\n/* A straightforward implementation calling mpn subroutines.\n\n   wp,size is set to (xp,size + yp,size) / 2.  The return value is the\n   remainder from the division.  The other versions are the same.  */\n\nmp_limb_t\nmean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)\n{\n  mp_limb_t  c, ret;\n\n  ASSERT (size >= 1);\n\n  c = mpn_add_n (wp, xp, yp, size);\n  ret = mpn_rshift (wp, wp, size, 1) >> (BITS_PER_MP_LIMB-1);\n  wp[size-1] |= (c << (BITS_PER_MP_LIMB-1));\n  return ret;\n}\n\n\n/* An open-coded version, making one pass over the data.  The right shift is\n   done as the added limbs are produced.  The addition code follows\n   mpn/generic/add_n.c. */\n\nmp_limb_t\nmean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)\n{\n  mp_limb_t  w, wprev, x, y, c, ret;\n  mp_size_t  i;\n\n  ASSERT (size >= 1);\n\n  x = xp[0];\n  y = yp[0];\n\n  wprev = x + y;\n  c = (wprev < x);\n  ret = (wprev & 1);\n\n#define RSHIFT(hi,lo)   (((lo) >> 1) | ((hi) << (BITS_PER_MP_LIMB-1)))\n\n  for (i = 1; i < size; i++)\n    {\n      x = xp[i];\n      y = yp[i];\n\n      w = x + c;\n      c = (w < x);\n      w += y;\n      c += (w < y);\n\n      wp[i-1] = RSHIFT (w, wprev);\n      wprev = w;\n    }\n\n  wp[i-1] = RSHIFT (c, wprev);\n\n  return ret;\n}\n\n\n/* Another one-pass version, but right shifting the source limbs rather than\n   the result limbs.  There's not much chance of this being better than the\n   above, but it's an alternative at least. */\n\nmp_limb_t\nmean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)\n{\n  mp_limb_t  w, x, y, xnext, ynext, c, ret;\n  mp_size_t  i;\n\n  ASSERT (size >= 1);\n\n  x = xp[0];\n  y = yp[0];\n\n  /* ret is the low bit of x+y, c is the carry out of that low bit add */\n  ret = (x ^ y) & 1;\n  c   = (x & y) & 1;\n\n  for (i = 0; i < size-1; i++)\n    {\n      xnext = xp[i+1];\n      ynext = yp[i+1];\n      x = RSHIFT (xnext, x);\n      y = RSHIFT (ynext, y);\n\n      w = x + c;\n      c = (w < x);\n      w += y;\n      c += (w < y);\n      wp[i] = w;\n\n      x = xnext;\n      y = ynext;\n    }\n\n  wp[i] = (x >> 1) + (y >> 1) + c;\n\n  return ret;\n}\n\n\n/* The speed measuring routines are the same apart from which function they\n   run, so a macro is used.  Actually this macro is the same as\n   SPEED_ROUTINE_MPN_BINARY_N.  */\n\n#define SPEED_ROUTINE_MEAN(mean_fun)                    \\\n  {                                                     \\\n    unsigned  i;                                        \\\n    mp_ptr    wp;                                       \\\n    double    t;                                        \\\n    TMP_DECL;                                  \\\n                                                        \\\n    SPEED_RESTRICT_COND (s->size >= 1);                 \\\n                                                        \\\n    TMP_MARK;                                  \\\n    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);   \\\n                                                        \\\n    speed_operand_src (s, s->xp, s->size);              \\\n    speed_operand_src (s, s->yp, s->size);              \\\n    speed_operand_dst (s, wp, s->size);                 \\\n    speed_cache_fill (s);                               \\\n                                                        \\\n    speed_starttime ();                                 \\\n    i = s->reps;                                        \\\n    do                                                  \\\n      mean_fun (wp, s->xp, s->yp, s->size);             \\\n    while (--i != 0);                                   \\\n    t = speed_endtime ();                               \\\n                                                        \\\n    TMP_FREE;                                  \\\n    return t;                                           \\\n  }\n\ndouble\nspeed_mean_calls (struct speed_params *s)\n{\n  SPEED_ROUTINE_MEAN (mean_calls);\n}\n\ndouble\nspeed_mean_open (struct speed_params *s)\n{\n  SPEED_ROUTINE_MEAN (mean_open);\n}\n\ndouble\nspeed_mean_open2 (struct speed_params *s)\n{\n  SPEED_ROUTINE_MEAN (mean_open2);\n}\n"
  },
  {
    "path": "tune/speed.c",
    "content": "/* Speed measuring program.\n\nCopyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010,\n2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  \n\n*/\n\n/* Usage message is in the code below, run with no arguments to print it.\n   See README for interesting applications.\n\n   To add a new routine foo(), create a speed_foo() function in the style of\n   the existing ones and add an entry in the routine[] array.  Put FLAG_R if\n   speed_foo() wants an \"r\" parameter.\n\n   The routines don't have help messages or descriptions, but most have\n   suggestive names.  See the source code for full details.\n\n*/\n\n#include \"config.h\"\n\n#include <limits.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#if HAVE_UNISTD_H || _MSC_VER\n#include <unistd.h>  /* for getpid, R_OK */\n#endif\n\n#if TIME_WITH_SYS_TIME\n# include <sys/time.h>  /* for struct timeval */\n# include <time.h>\n#else\n# if HAVE_SYS_TIME_H\n#  include <sys/time.h>\n# else\n#  include <time.h>\n# endif\n#endif\n\n#if HAVE_SYS_RESOURCE_H\n#include <sys/resource.h>  /* for getrusage() */\n#endif\n\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"  /* for the benefit of speed-many.c */\n#include \"tests.h\"\n#include \"speed.h\"\n\n#if 1 && defined( _MSC_VER)\n#include \"win_timing.h\"\n#endif\n\n#if !HAVE_DECL_OPTARG\nextern char *optarg;\nextern int optind, opterr;\n#endif\n\n#if !HAVE_STRTOUL\n#define strtoul(p,e,b)  (unsigned long) strtol(p,e,b)\n#endif\n\n#ifdef SPEED_EXTRA_PROTOS\nSPEED_EXTRA_PROTOS\n#endif\n#ifdef SPEED_EXTRA_PROTOS2\nSPEED_EXTRA_PROTOS2\n#endif\n\n\n#define MPN_FILL(ptr, size, n)          \\\n  do {                                  \\\n    mp_size_t __i;                      \\\n    ASSERT ((size) >= 0);               \\\n    for (__i = 0; __i < (size); __i++)  \\\n      (ptr)[__i] = (n);                 \\\n  } while (0)\n\n\n#if BITS_PER_MP_LIMB == 32\n#define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)\n#endif\n#if BITS_PER_MP_LIMB == 64\n#define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)\n#endif\n\n\n#define CMP_ABSOLUTE     1\n#define CMP_RATIO        2\n#define CMP_DIFFERENCE   3\n#define CMP_DIFFPREV     4\nint  option_cmp = CMP_ABSOLUTE;\nint option_cmp_pos = 0;\n\n#define UNIT_SECONDS        1\n#define UNIT_CYCLES         2\n#define UNIT_CYCLESPERLIMB  3\nint  option_unit = UNIT_SECONDS;\n\n#define DATA_RANDOM   1\n#define DATA_RANDOM2  2\n#define DATA_ZEROS    3\n#define DATA_AAS      4\n#define DATA_FFS      5\n#define DATA_2FD      6\nint  option_data = DATA_RANDOM;\n\nint        option_square = 0;\ndouble     option_factor = 0.0;\nmp_size_t  option_step = 1;\nint        option_gnuplot = 0;\nchar      *option_gnuplot_basename;\nstruct size_array_t {\n  mp_size_t start, end, inc;\n} *size_array = NULL;\nmp_size_t  size_num = 0;\nmp_size_t  size_allocnum = 0;\nint        option_resource_usage = 0;\nlong       option_seed = 123456789;\n\n#define XCOLMAX\t\t16\nint\txcoln,xcol[XCOLMAX];\n\nstruct speed_params  sp;\n\n#define COLUMN_WIDTH  13  /* for the free-form output */\n\n#define FLAG_R            (1<<0)  /* require \".r\" */\n#define FLAG_R_OPTIONAL   (1<<1)  /* optional \".r\" */\n#define FLAG_RSIZE        (1<<2)\n#define FLAG_NODATA       (1<<3)  /* don't alloc xp, yp */\n\nconst struct routine_t {\n  /* constants */\n  const char        *name;\n  speed_function_t  fun;\n  int               flag;\n  \n} routine[] = {\n\n  { \"noop\",              speed_noop                 },\n  { \"noop_wxs\",          speed_noop_wxs             },\n  { \"noop_wxys\",         speed_noop_wxys            },\n\n  { \"mpn_add_n\",         speed_mpn_add_n,     FLAG_R_OPTIONAL },\n  { \"mpn_sub_n\",         speed_mpn_sub_n,     FLAG_R_OPTIONAL },\n\n#if HAVE_NATIVE_mpn_sumdiff_n\n  { \"mpn_sumdiff_n\",      speed_mpn_sumdiff_n,     FLAG_R_OPTIONAL },\n#endif\n#if HAVE_NATIVE_mpn_nsumdiff_n\n  { \"mpn_nsumdiff_n\",    speed_mpn_nsumdiff_n,     FLAG_R_OPTIONAL },\n#endif\n  { \"mpn_addadd_n\",      speed_mpn_addadd_n,  },\n  { \"mpn_subadd_n\",      speed_mpn_subadd_n,  },\n  { \"mpn_addsub_n\",      speed_mpn_addsub_n,  },\n#if HAVE_NATIVE_mpn_karaadd\n  { \"mpn_karaadd\",      speed_mpn_karaadd,  },\n#endif\n#if HAVE_NATIVE_mpn_karasub\n  { \"mpn_karasub\",      speed_mpn_karasub,  },\n#endif\n\n  { \"mpn_addmul_1\",      speed_mpn_addmul_1,  FLAG_R },\n  { \"mpn_submul_1\",      speed_mpn_submul_1,  FLAG_R },\n#if HAVE_NATIVE_mpn_addmul_2\n  { \"mpn_addmul_2\",      speed_mpn_addmul_2,  FLAG_R_OPTIONAL },\n#endif\n#if HAVE_NATIVE_mpn_addmul_3\n  { \"mpn_addmul_3\",      speed_mpn_addmul_3,  FLAG_R_OPTIONAL },\n#endif\n#if HAVE_NATIVE_mpn_addmul_4\n  { \"mpn_addmul_4\",      speed_mpn_addmul_4,  FLAG_R_OPTIONAL },\n#endif\n#if HAVE_NATIVE_mpn_addmul_5\n  { \"mpn_addmul_5\",      speed_mpn_addmul_5,  FLAG_R_OPTIONAL },\n#endif\n#if HAVE_NATIVE_mpn_addmul_6\n  { \"mpn_addmul_6\",      speed_mpn_addmul_6,  FLAG_R_OPTIONAL },\n#endif\n#if HAVE_NATIVE_mpn_addmul_7\n  { \"mpn_addmul_7\",      speed_mpn_addmul_7,  FLAG_R_OPTIONAL },\n#endif\n#if HAVE_NATIVE_mpn_addmul_8\n  { \"mpn_addmul_8\",      speed_mpn_addmul_8,  FLAG_R_OPTIONAL },\n#endif\n  { \"mpn_mul_1\",         speed_mpn_mul_1,     FLAG_R },\n  { \"mpn_mul_1_inplace\", speed_mpn_mul_1_inplace, FLAG_R },\n#if HAVE_NATIVE_mpn_mul_2\n  { \"mpn_mul_2\",         speed_mpn_mul_2,     FLAG_R_OPTIONAL },\n#endif\n  { \"mpn_divrem_euclidean_qr_1\",      speed_mpn_divrem_euclidean_qr_1,  FLAG_R },\n  { \"mpn_divrem_euclidean_qr_2\",      speed_mpn_divrem_euclidean_qr_2, },\n  { \"mpn_divrem_euclidean_r_1\",       speed_mpn_divrem_euclidean_r_1,  FLAG_R },\n  { \"mpn_divrem_hensel_qr_1\",         speed_mpn_divrem_hensel_qr_1, FLAG_R},\n  { \"mpn_divrem_hensel_qr_1_1\",       speed_mpn_divrem_hensel_qr_1_1, FLAG_R},\n  { \"mpn_divrem_hensel_qr_1_2\",       speed_mpn_divrem_hensel_qr_1_2, FLAG_R},\n  { \"mpn_divrem_hensel_r_1\",          speed_mpn_divrem_hensel_r_1,  FLAG_R },\n  { \"mpn_rsh_divrem_hensel_qr_1\",     speed_mpn_rsh_divrem_hensel_qr_1, FLAG_R},\n  { \"mpn_rsh_divrem_hensel_qr_1_1\",     speed_mpn_rsh_divrem_hensel_qr_1_1, FLAG_R},\n  { \"mpn_rsh_divrem_hensel_qr_1_2\",     speed_mpn_rsh_divrem_hensel_qr_1_2, FLAG_R},\n  \n\n  { \"mpn_divrem_hensel_rsh_qr_1\",     speed_mpn_divrem_hensel_rsh_qr_1, FLAG_R},\n  \n  { \"mpn_divrem_1\",      speed_mpn_divrem_1,  FLAG_R },\n  { \"mpn_divrem_1f\",     speed_mpn_divrem_1f, FLAG_R },\n#if HAVE_NATIVE_mpn_divrem_1c\n  { \"mpn_divrem_1c\",     speed_mpn_divrem_1c, FLAG_R },\n  { \"mpn_divrem_1cf\",    speed_mpn_divrem_1cf,FLAG_R },\n#endif\n  { \"mpn_mod_1\",         speed_mpn_mod_1,     FLAG_R },\n  { \"mpn_mod_1_1\",       speed_mpn_mod_1_1,},\n  { \"mpn_mod_1_2\",       speed_mpn_mod_1_2,},\n  { \"mpn_mod_1_3\",       speed_mpn_mod_1_3,},\n  { \"mpn_mod_1_k\",       speed_mpn_mod_1_k,     FLAG_R },\n#if HAVE_NATIVE_mpn_mod_1c\n  { \"mpn_mod_1c\",        speed_mpn_mod_1c,    FLAG_R },\n#endif\n  { \"mpn_preinv_divrem_1\",  speed_mpn_preinv_divrem_1,  FLAG_R },\n  { \"mpn_preinv_divrem_1f\", speed_mpn_preinv_divrem_1f, FLAG_R },\n  { \"mpn_preinv_mod_1\",  speed_mpn_preinv_mod_1, FLAG_R },\n\n  { \"mpn_add_err1_n\",  speed_mpn_add_err1_n, },\n  { \"mpn_sub_err1_n\",  speed_mpn_sub_err1_n, },\n\n  { \"mpn_inv_divappr_q\",  speed_mpn_inv_divappr_q, },\n  { \"mpn_inv_div_qr\",  speed_mpn_inv_div_qr,  },\n  { \"mpn_tdiv_q\",  speed_mpn_tdiv_q,  FLAG_R_OPTIONAL},\n  { \"mpn_sb_divappr_q\",  speed_mpn_sb_divappr_q},\n  { \"mpn_sb_div_qr\",  speed_mpn_sb_div_qr},\n  { \"mpn_dc_divappr_q\",  speed_mpn_dc_divappr_q,  },\n  { \"mpn_dc_div_qr_n\",  speed_mpn_dc_div_qr_n,  },\n   \n  { \"mpn_divrem_1_inv\",  speed_mpn_divrem_1_inv,  FLAG_R },\n  { \"mpn_divrem_1f_div\", speed_mpn_divrem_1f_div, FLAG_R },\n  { \"mpn_divrem_1f_inv\", speed_mpn_divrem_1f_inv, FLAG_R },\n  { \"mpn_mod_1_div\",     speed_mpn_mod_1_div,     FLAG_R },\n  { \"mpn_mod_1_inv\",     speed_mpn_mod_1_inv,     FLAG_R },\n\n  { \"mpn_divrem_2\",      speed_mpn_divrem_2,        },\n  { \"mpn_divrem_2_div\",  speed_mpn_divrem_2_div,    },\n  { \"mpn_divrem_2_inv\",  speed_mpn_divrem_2_inv,    },\n\n  { \"mpn_divexact_1\",    speed_mpn_divexact_1,    FLAG_R },\n  { \"mpn_divexact_by3\",  speed_mpn_divexact_by3          },\n  { \"mpn_divexact_byff\", speed_mpn_divexact_byff         },\n  { \"mpn_divexact_byfobm1\", speed_mpn_divexact_byfobm1, FLAG_R    },\n\n#if HAVE_NATIVE_mpn_modexact_1_odd\n  { \"mpn_modexact_1_odd\",  speed_mpn_modexact_1_odd,  FLAG_R },\n#endif\n  { \"mpn_modexact_1c_odd\", speed_mpn_modexact_1c_odd, FLAG_R },\n\n#if GMP_NUMB_BITS % 4 == 0\n  { \"mpn_mod_34lsub1\",   speed_mpn_mod_34lsub1 },\n#endif\n\n  { \"mpn_dc_tdiv_qr\",       speed_mpn_dc_tdiv_qr       },\n\n  { \"mpn_lshift\",        speed_mpn_lshift, FLAG_R   },\n  { \"mpn_rshift\",        speed_mpn_rshift, FLAG_R   },\n#if HAVE_NATIVE_mpn_lshiftc\n  { \"mpn_lshiftc\",       speed_mpn_lshiftc, FLAG_R   },\n#endif\n  { \"mpn_lshift1\",       speed_mpn_lshift1,   },\n  { \"mpn_rshift1\",       speed_mpn_rshift1,   },\n  { \"mpn_double\",       speed_mpn_double,   },\n  { \"mpn_half\",       speed_mpn_half,   },\n  { \"mpn_lshift2\",       speed_mpn_lshift2,   },\n  { \"mpn_rshift2\",       speed_mpn_rshift2,   },\n\n  { \"mpn_and_n\",         speed_mpn_and_n,  FLAG_R_OPTIONAL },\n  { \"mpn_andn_n\",        speed_mpn_andn_n, FLAG_R_OPTIONAL },\n  { \"mpn_nand_n\",        speed_mpn_nand_n, FLAG_R_OPTIONAL },\n  { \"mpn_ior_n\",         speed_mpn_ior_n,  FLAG_R_OPTIONAL },\n  { \"mpn_iorn_n\",        speed_mpn_iorn_n, FLAG_R_OPTIONAL },\n  { \"mpn_nior_n\",        speed_mpn_nior_n, FLAG_R_OPTIONAL },\n  { \"mpn_xor_n\",         speed_mpn_xor_n,  FLAG_R_OPTIONAL },\n  { \"mpn_xnor_n\",        speed_mpn_xnor_n, FLAG_R_OPTIONAL },\n  { \"mpn_com_n\",         speed_mpn_com_n            },\n  { \"mpn_not\",           speed_mpn_not            },\n\n  { \"mpn_popcount\",      speed_mpn_popcount         },\n  { \"mpn_hamdist\",       speed_mpn_hamdist          },\n\n  { \"mpn_matrix22_mul\",  speed_mpn_matrix22_mul     },\n\n  { \"mpn_hgcd\",          speed_mpn_hgcd             },\n  { \"mpn_hgcd_lehmer\",   speed_mpn_hgcd_lehmer      },\n  { \"mpn_hgcd_appr\",     speed_mpn_hgcd_appr        },\n  { \"mpn_hgcd_appr_lehmer\", speed_mpn_hgcd_appr_lehmer },\n\n  { \"mpn_hgcd_reduce\",   speed_mpn_hgcd_reduce      },\n  { \"mpn_hgcd_reduce_1\", speed_mpn_hgcd_reduce_1    },\n  { \"mpn_hgcd_reduce_2\", speed_mpn_hgcd_reduce_2    },\n\n  { \"mpn_gcd_1\",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },\n  { \"mpn_gcd\",           speed_mpn_gcd                    },\n\n  { \"mpn_gcdext\",            speed_mpn_gcdext            },\n  { \"mpn_gcdext_single\",     speed_mpn_gcdext_single     },\n  { \"mpn_gcdext_double\",     speed_mpn_gcdext_double     },\n  { \"mpn_gcdext_one_single\", speed_mpn_gcdext_one_single },\n  { \"mpn_gcdext_one_double\", speed_mpn_gcdext_one_double },\n\n  { \"mpz_jacobi\",        speed_mpz_jacobi           },\n  { \"mpn_jacobi_base\",   speed_mpn_jacobi_base      },\n  { \"mpn_jacobi_base_1\", speed_mpn_jacobi_base_1    },\n  { \"mpn_jacobi_base_2\", speed_mpn_jacobi_base_2    },\n  { \"mpn_jacobi_base_3\", speed_mpn_jacobi_base_3    },\n  { \"mpn_jacobi_base_4\", speed_mpn_jacobi_base_4    },\n\n  { \"mpn_mul_basecase\",  speed_mpn_mul_basecase, FLAG_R_OPTIONAL },\n  { \"mpn_sqr_basecase\",  speed_mpn_sqr_basecase     },\n#if HAVE_NATIVE_mpn_sqr_diagonal\n  { \"mpn_sqr_diagonal\",  speed_mpn_sqr_diagonal     },\n#endif\n\n  { \"mpn_mul_n\",         speed_mpn_mul_n            },\n  { \"mpn_sqr\",         speed_mpn_sqr            },\n\n  { \"mpn_kara_mul_n\",    speed_mpn_kara_mul_n       },\n  { \"mpn_kara_sqr_n\",    speed_mpn_kara_sqr_n       },\n  { \"mpn_toom3_mul_n\",   speed_mpn_toom3_mul_n      },\n  { \"mpn_toom4_mul_n\",   speed_mpn_toom4_mul_n      },\n  { \"mpn_toom8h_mul\",    speed_mpn_toom8h_mul       },\n  { \"mpn_toom3_sqr_n\",   speed_mpn_toom3_sqr_n      },\n  { \"mpn_toom4_sqr_n\",   speed_mpn_toom4_sqr_n      },\n  \n  { \"mpn_mul_fft_main\",   speed_mpn_mul_fft_main, FLAG_R_OPTIONAL },\n  { \"mpn_sqr_fft_main\",   speed_mpn_sqr_fft_main, FLAG_R_OPTIONAL },\n\n  { \"mpn_mullow_n\",      speed_mpn_mullow_n         },\n  { \"mpn_mullow_n_basecase\", speed_mpn_mullow_n_basecase},\n  { \"mpn_mulmid_basecase\",  speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL },\n  { \"mpn_toom42_mulmid\",    speed_mpn_toom42_mulmid },\n  { \"mpn_mulmid_n\",         speed_mpn_mulmid_n },\n  { \"mpn_mulmid\",           speed_mpn_mulmid, FLAG_R_OPTIONAL },\n  { \"mpn_mulhigh_n\",      speed_mpn_mulhigh_n         },\n  { \"mpn_mulmod_2expm1\",  speed_mpn_mulmod_2expm1     },\n  { \"mpn_mulmod_2expp1_basecase\",  speed_mpn_mulmod_2expp1_basecase     },\n\n  { \"mpn_get_str\",       speed_mpn_get_str,  FLAG_R_OPTIONAL },\n\n  { \"mpn_set_str\",          speed_mpn_set_str,          FLAG_R_OPTIONAL },\n  { \"mpn_set_str_basecase\", speed_mpn_set_str_basecase, FLAG_R_OPTIONAL },\n  { \"mpn_set_str_subquad\",  speed_mpn_set_str_subquad,  FLAG_R_OPTIONAL },\n\n  { \"mpn_sqrtrem\",       speed_mpn_sqrtrem          },\n  { \"mpn_rootrem\",       speed_mpn_rootrem, FLAG_R  },\n\n  { \"mpn_fib2_ui\",       speed_mpn_fib2_ui,    FLAG_NODATA },\n  { \"mpz_fib_ui\",        speed_mpz_fib_ui,     FLAG_NODATA },\n  { \"mpz_fib2_ui\",       speed_mpz_fib2_ui,    FLAG_NODATA },\n  { \"mpz_lucnum_ui\",     speed_mpz_lucnum_ui,  FLAG_NODATA },\n  { \"mpz_lucnum2_ui\",    speed_mpz_lucnum2_ui, FLAG_NODATA },\n\n  { \"mpz_add\",           speed_mpz_add              },\n  { \"mpz_bin_uiui\",      speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },\n  { \"mpz_fac_ui\",        speed_mpz_fac_ui,   FLAG_NODATA   },\n  { \"mpz_fac_ui_small\",  speed_mpz_fac_ui_small,   FLAG_NODATA   },\n  { \"mpz_fac_ui_large\",  speed_mpz_fac_ui_large,   FLAG_NODATA   },\n  { \"mpz_powm\",          speed_mpz_powm             },\n  { \"mpz_powm_mod\",      speed_mpz_powm_mod         },\n  { \"mpz_powm_redc\",     speed_mpz_powm_redc        },\n  { \"mpz_powm_ui\",       speed_mpz_powm_ui,  FLAG_R_OPTIONAL },\n\n  { \"mpz_mod\",           speed_mpz_mod              },\n  { \"mpn_store\",         speed_mpn_store },\n\n  { \"MPN_COPY\",          speed_MPN_COPY             },\n  { \"MPN_COPY_INCR\",     speed_MPN_COPY_INCR        },\n  { \"MPN_COPY_DECR\",     speed_MPN_COPY_DECR        },\n  { \"memcpy\",            speed_memcpy               },\n#if HAVE_NATIVE_mpn_copyi\n  { \"mpn_copyi\",         speed_mpn_copyi            },\n#endif\n#if HAVE_NATIVE_mpn_copyd\n  { \"mpn_copyd\",         speed_mpn_copyd            },\n#endif\n#if HAVE_NATIVE_mpn_addlsh1_n\n  { \"mpn_addlsh1_n\",     speed_mpn_addlsh1_n        },\n#endif\n#if HAVE_NATIVE_mpn_sublsh1_n\n  { \"mpn_sublsh1_n\",     speed_mpn_sublsh1_n        },\n#endif\n#if HAVE_NATIVE_mpn_addlsh_n\n  { \"mpn_addlsh_n\",     speed_mpn_addlsh_n,FLAG_R        },\n#endif\n#if HAVE_NATIVE_mpn_sublsh_n\n  { \"mpn_sublsh_n\",     speed_mpn_sublsh_n,FLAG_R        },\n#endif\n#if HAVE_NATIVE_mpn_inclsh_n\n  { \"mpn_inclsh_n\",     speed_mpn_inclsh_n,FLAG_R        },\n#endif\n#if HAVE_NATIVE_mpn_declsh_n\n  { \"mpn_declsh_n\",     speed_mpn_declsh_n,FLAG_R        },\n#endif\n#if HAVE_NATIVE_mpn_rsh1add_n\n  { \"mpn_rsh1add_n\",     speed_mpn_rsh1add_n        },\n#endif\n#if HAVE_NATIVE_mpn_rsh1sub_n\n  { \"mpn_rsh1sub_n\",     speed_mpn_rsh1sub_n        },\n#endif\n\n  { \"MPN_ZERO\",          speed_MPN_ZERO             },\n\n  { \"modlimb_invert\",       speed_modlimb_invert,       FLAG_NODATA },\n  { \"modlimb_invert_mul1\",  speed_modlimb_invert_mul1,  FLAG_NODATA },\n  { \"modlimb_invert_loop\",  speed_modlimb_invert_loop,  FLAG_NODATA },\n  { \"modlimb_invert_cond\",  speed_modlimb_invert_cond,  FLAG_NODATA },\n  { \"modlimb_invert_arith\", speed_modlimb_invert_arith, FLAG_NODATA },\n\n  { \"malloc_free\",                  speed_malloc_free                  },\n  { \"malloc_realloc_free\",          speed_malloc_realloc_free          },\n  { \"gmp_allocate_free\",            speed_gmp_allocate_free            },\n  { \"gmp_allocate_reallocate_free\", speed_gmp_allocate_reallocate_free },\n  { \"mpz_init_clear\",               speed_mpz_init_clear               },\n  { \"mpq_init_clear\",               speed_mpq_init_clear               },\n  { \"mpf_init_clear\",               speed_mpf_init_clear               },\n  { \"mpz_init_realloc_clear\",       speed_mpz_init_realloc_clear       },\n\n  { \"umul_ppmm\",         speed_umul_ppmm,     FLAG_R_OPTIONAL },\n#if HAVE_NATIVE_mpn_umul_ppmm\n  { \"mpn_umul_ppmm\",     speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },\n#endif\n#if HAVE_NATIVE_mpn_umul_ppmm_r\n  { \"mpn_umul_ppmm_r\",   speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL },\n#endif\n\n  { \"count_leading_zeros\",  speed_count_leading_zeros,  FLAG_NODATA | FLAG_R_OPTIONAL },\n  { \"count_trailing_zeros\", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },\n\n  { \"udiv_qrnnd\",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },\n  { \"udiv_qrnnd_preinv1\",     speed_udiv_qrnnd_preinv1,     FLAG_R_OPTIONAL },\n  { \"udiv_qrnnd_preinv2\",     speed_udiv_qrnnd_preinv2,     FLAG_R_OPTIONAL },\n  { \"udiv_qrnnd_c\",           speed_udiv_qrnnd_c,           FLAG_R_OPTIONAL },\n#if HAVE_NATIVE_mpn_udiv_qrnnd\n  { \"mpn_udiv_qrnnd\",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },\n#endif\n#if HAVE_NATIVE_mpn_udiv_qrnnd_r\n  { \"mpn_udiv_qrnnd_r\",       speed_mpn_udiv_qrnnd_r,       FLAG_R_OPTIONAL },\n#endif\n  { \"invert_limb\",            speed_invert_limb,            FLAG_R_OPTIONAL },\n\n  { \"operator_div\",           speed_operator_div,           FLAG_R_OPTIONAL },\n  { \"operator_mod\",           speed_operator_mod,           FLAG_R_OPTIONAL },\n\n  { \"gmp_randseed\",    speed_gmp_randseed,    FLAG_R_OPTIONAL               },\n  { \"gmp_randseed_ui\", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA },\n  { \"mpz_urandomb\",    speed_mpz_urandomb,    FLAG_R_OPTIONAL | FLAG_NODATA },\n\n#ifdef SPEED_EXTRA_ROUTINES\n  SPEED_EXTRA_ROUTINES\n#endif\n#ifdef SPEED_EXTRA_ROUTINES2\n  SPEED_EXTRA_ROUTINES2\n#endif\n};\n\n#define SUMMAX\t4\n\nstruct choice_t {\n  const struct routine_t  *p;\n  mp_limb_t               r;\n  double                  scale;\n  double                  time;\n  int                     no_time;\n  double                  prev_time;\n  const char              *name;\n  char\t\t  \t  *filename;\n  int\t\t\t  colfile;\n  int \t\t\t  nsum;\n  int\t\t\t  sum[SUMMAX];\n};\nstruct choice_t  *choice;\nint  num_choices = 0;\n\n\nvoid\ndata_fill (mp_ptr ptr, mp_size_t size)\n{\n  switch (option_data) {\n  case DATA_RANDOM:\n    mpn_random (ptr, size);\n    break;\n  case DATA_RANDOM2:\n    mpn_random2 (ptr, size);\n    break;\n  case DATA_ZEROS:\n    MPN_ZERO (ptr, size);\n    break;\n  case DATA_AAS:\n    MPN_FILL (ptr, size, GMP_NUMB_0xAA);\n    break;\n  case DATA_FFS:\n    MPN_FILL (ptr, size, GMP_NUMB_MAX);\n    break;\n  case DATA_2FD:\n    MPN_FILL (ptr, size, GMP_NUMB_MAX);\n    ptr[0] -= 2;\n    break;\n  default:\n    abort();\n    /*NOTREACHED*/\n  }\n}\n\n/* The code here handling the various combinations of output options isn't\n   too attractive, but it works and is fairly clean.  */\n\n#define SIZE_TO_DIVISOR(n)              \\\n  (option_square == 1 ? (n)*(n)         \\\n  : option_square == 2 ? (n)*((n)+1)/2  \\\n  : (n))\n\nvoid\nrun_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)\n{\n  const char  *first_open_fastest, *first_open_notfastest, *first_close;\n  int         j,i, fastest, want_data;\n  double      fastest_time;\n  TMP_DECL;\n\n  TMP_MARK;\n\n  /* allocate data, unless all routines are NODATA */\n  want_data = 0;\n  for (i = 0; i < num_choices; i++)\n    want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);\n\n  if (want_data)\n    {\n      SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp);\n      SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp);\n\n      data_fill (s->xp, s->size);\n      data_fill (s->yp, s->size);\n    }\n  else\n    {\n      sp.xp = NULL;\n      sp.yp = NULL;\n    }\n\n  if (prev_size == -1 && option_cmp == CMP_DIFFPREV)\n    {\n      first_open_fastest = \"(#\";\n      first_open_notfastest = \" (\";\n      first_close = \")\";\n    }\n  else\n    {\n      first_open_fastest = \"#\";\n      first_open_notfastest = \" \";\n      first_close = \"\";\n    }\n\n  fastest = -1;\n  fastest_time = -1.0;\n  for (i = 0; i < num_choices; i++)\n    {\n      if( choice[i].nsum!=0)continue;\n      s->r = choice[i].r;\n      if( choice[i].colfile==-1)\n        {choice[i].time = speed_measure (choice[i].p->fun, s);}\n      else\n        {FILE *fp;char buf[1024],buf2[1024],*p;int got=0;\n         choice[i].time=-1.0;\n         fp=fopen(choice[i].filename,\"rt\");\n         if(fp==0){printf(\"Cant open %s\\n\",choice[i].filename);exit(1);}\n         while(fgets(buf,1024,fp)!=0)\n              if(atoi(buf)==s->size)\n                {p=buf;\n                 for(j=0;j<=choice[i].colfile;j++)\n                    {if(sscanf(p,\" %s\",buf2)!=1)break;                \n                     p=strstr(p,buf2)+strlen(buf2);}\n                 if(j==choice[i].colfile+1)\n                   {while((p=strstr(buf2,\"#\"))!=0)*p=' ';// exclude #\n                    choice[i].time=atof(buf2);                  \n                   }         \n                 break;}\n         fclose(fp);\n         }\n      choice[i].no_time = (choice[i].time == -1.0);\n      if (! choice[i].no_time)\n        choice[i].time *= choice[i].scale;\n\n      /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time\n         is before any differences.  */\n      if(choice[i].colfile==-1)   \n      {\n        double     t;\n        t = choice[i].time;\n        if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)\n          {\n            if (choice[i].prev_time == -1.0)\n              choice[i].no_time = 1;\n            else\n              choice[i].time = choice[i].time - choice[i].prev_time;\n          }\n        choice[i].prev_time = t;\n      }\n\n    }\n  for (i = 0; i < num_choices; i++)\n     {if(choice[i].nsum==0)continue;\n      choice[i].time=0;choice[i].no_time=0;\n      for(j=0;j<choice[i].nsum;j++)\n         {choice[i].time+=choice[choice[i].sum[j]].time;\n          if(choice[choice[i].sum[j]].no_time)choice[i].no_time=1;\n         }\n     }\n\n  for (i = 0; i < num_choices; i++)\n    {\n      if (choice[i].no_time || choice[i].colfile!=-1)\n        continue;\n      if (option_cmp == CMP_DIFFPREV)\n        {\n          /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */\n          if (option_unit == UNIT_CYCLES)\n            choice[i].time /= speed_cycletime;\n          else if (option_unit == UNIT_CYCLESPERLIMB)\n            {\n              if (prev_size == -1)\n                choice[i].time /= speed_cycletime;\n              else\n                choice[i].time /=  (speed_cycletime\n                                    * (SIZE_TO_DIVISOR(s->size)\n                                       - SIZE_TO_DIVISOR(prev_size)));\n            }\n        }\n      else\n        {\n          if (option_unit == UNIT_CYCLES)\n            choice[i].time /= speed_cycletime;\n          else if (option_unit == UNIT_CYCLESPERLIMB)\n            choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));\n        }\n    }    \n  for (i = 0; i < num_choices; i++)\n    {\n      if (choice[i].no_time)\n        continue;\n\n      /* Look for the fastest after CMP_DIFFPREV has been applied, but\n         before CMP_RATIO or CMP_DIFFERENCE.  There's only a fastest shown\n         if there's more than one routine.  */\n      for(j=0;j<xcoln;j++)if(xcol[j]==i)break;   // excluded from fastest choice\n      if (j==xcoln && num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))\n        {\n          fastest = i;\n          fastest_time = choice[i].time;\n        }\n    }\n\n  for (i = 0; i < num_choices; i++)\n    {\n      if (choice[i].no_time )\n        continue;\n\n      if (option_cmp != CMP_DIFFPREV)\n        {\n        \n          if (option_cmp == CMP_RATIO && i != option_cmp_pos)\n            {\n              /* A ratio isn't affected by the units chosen. */\n              if (choice[option_cmp_pos].no_time || choice[option_cmp_pos].time == 0.0)\n                choice[i].no_time = 1;\n              else\n                choice[i].time /= choice[option_cmp_pos].time;\n            }\n          else if (option_cmp == CMP_DIFFERENCE && i != option_cmp_pos)\n            {\n              if (choice[option_cmp_pos].no_time)\n                {\n                  choice[i].no_time = 1;\n                  continue;\n                }\n              choice[i].time -= choice[option_cmp_pos].time;\n            }\n        }\n    }\n\n  if (option_gnuplot)\n    {\n      /* In CMP_DIFFPREV, don't print anything for the first size, start\n         with the second where an actual difference is available.\n\n         In CMP_RATIO, print the \"first\" ie option_cmp_pos column as 1.0.\n\n         The 9 decimals printed is much more than the expected precision of\n         the measurements actually. */\n\n      if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))\n        {\n          fprintf (fp, \"%-6ld \", s->size);\n          for (i = 0; i < num_choices; i++)\n            fprintf (fp, \"  %.9e\",\n                     choice[i].no_time ? 0.0\n                     : (option_cmp == CMP_RATIO && i == option_cmp_pos) ? 1.0\n                     : choice[i].time);\n          fprintf (fp, \"\\n\");\n        }\n    }\n  else\n    {\n      fprintf (fp, \"%-6ld \", s->size);\n      for (i = 0; i < num_choices; i++)\n        {\n          char  buf[128];\n          int   decimals;\n\n          if (choice[i].no_time)\n            {\n              fprintf (fp, \" %*s\", COLUMN_WIDTH, \"n/a\");\n            }\n          else\n            {if (option_unit == UNIT_CYCLESPERLIMB\n                 || (option_cmp == CMP_RATIO && i != option_cmp_pos))\n                decimals = 4;\n              else if (option_unit == UNIT_CYCLES)\n                decimals = 2;\n              else\n                decimals = 9;\n\n              sprintf (buf, \"%s%.*f%s\",\n                       i == fastest ? first_open_fastest : first_open_notfastest,\n                       decimals, choice[i].time, first_close);\n              fprintf (fp, \" %*s\", COLUMN_WIDTH, buf);\n            }\n        }\n      fprintf (fp, \"\\n\");\n    }\n\n  TMP_FREE;\n}\n\nvoid\nrun_all (FILE *fp)\n{\n  mp_size_t  prev_size;\n  int        i;\n  TMP_DECL;\n\n  TMP_MARK;\n  SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp);\n  SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp);\n\n  data_fill (sp.xp_block, SPEED_BLOCK_SIZE);\n  data_fill (sp.yp_block, SPEED_BLOCK_SIZE);\n\n  for (i = 0; i < size_num; i++)\n    {\n      sp.size = size_array[i].start;\n      prev_size = -1;\n      for (;;)\n        {\n          mp_size_t  step;\n\n          if (option_data == DATA_2FD && sp.size >= 2)\n            sp.xp[sp.size-1] = 2;\n\n          run_one (fp, &sp, prev_size);\n          prev_size = sp.size;\n\n          if (option_data == DATA_2FD && sp.size >= 2)\n            sp.xp[sp.size-1] = MP_LIMB_T_MAX;\n\n          if (option_factor != 0.0)\n            {\n              step = (mp_size_t) (sp.size * option_factor - sp.size);\n              if (step < 1)\n                step = 1;\n            }\n          else if(size_array[i].inc > 0)\n              step = size_array[i].inc;\n          else\n            step = 1;\n\n          if (step < option_step)\n            step = option_step;\n\n          sp.size += step;\n          if (sp.size > size_array[i].end)\n            break;\n        }\n    }\n\n  TMP_FREE;\n}\n\n\nFILE *\nfopen_for_write (const char *filename)\n{\n  FILE  *fp;\n  if ((fp = fopen (filename, \"w\")) == NULL)\n    {\n      fprintf (stderr, \"Cannot create %s\\n\", filename);\n      exit(1);\n    }\n  return fp;\n}\n      \nvoid\nfclose_written (FILE *fp, const char *filename)\n{\n  int  err;\n\n  err = ferror (fp);\n  err |= fclose (fp);\n\n  if (err)\n    {\n      fprintf (stderr, \"Error writing %s\\n\", filename);\n      exit(1);\n    }\n}\n\n\nvoid\nrun_gnuplot (int argc, char *argv[])\n{\n  char  *plot_filename;\n  char  *data_filename;\n  FILE  *fp;\n  int   i;\n     \n  plot_filename = (char *) (*__gmp_allocate_func)\n    (strlen (option_gnuplot_basename) + 20);\n  data_filename = (char *) (*__gmp_allocate_func)\n    (strlen (option_gnuplot_basename) + 20);\n      \n  sprintf (plot_filename, \"%s.gnuplot\", option_gnuplot_basename);\n  sprintf (data_filename, \"%s.data\",    option_gnuplot_basename);\n\n  fp = fopen_for_write (plot_filename);\n\n  fprintf (fp, \"# Generated with:\\n\");\n  fprintf (fp, \"#\");\n  for (i = 0; i < argc; i++)\n    fprintf (fp, \" %s\", argv[i]);\n  fprintf (fp, \"\\n\");\n  fprintf (fp, \"\\n\");\n\n  fprintf (fp, \"reset\\n\");\n\n  /* Putting the key at the top left is usually good, and you can change it\n     interactively if it's not. */\n  fprintf (fp, \"set key left\\n\");\n\n  /* designed to make it possible to see crossovers easily */\n  fprintf (fp, \"set data style lines\\n\");\n\n  fprintf (fp, \"plot \");\n  for (i = 0; i < num_choices; i++)\n    {\n      fprintf (fp, \" \\\"%s\\\" using 1:%d\", data_filename, i+2);\n      fprintf (fp, \" title \\\"%s\\\"\", choice[i].name);\n\n      if (i != num_choices-1)\n        fprintf (fp, \", \\\\\");\n      fprintf (fp, \"\\n\");\n    }\n\n  fprintf (fp, \"load \\\"-\\\"\\n\");\n  fclose_written (fp, plot_filename);\n\n  fp = fopen_for_write (data_filename);\n\n  /* Unbuffered so you can see where the program was up to if it crashes or\n     you kill it. */\n  setbuf (fp, NULL);\n\n  run_all (fp);\n  fclose_written (fp, data_filename);\n}\n\n\n/* Return a limb with n many one bits (starting from the least significant) */\n\n#define LIMB_ONES(n) \\\n  ((n) == BITS_PER_MP_LIMB ? MP_LIMB_T_MAX      \\\n    : (n) == 0 ? CNST_LIMB(0)                   \\\n    : (CNST_LIMB(1) << (n)) - 1)\n\nmp_limb_t\nr_string (const char *s)\n{\n  const char  *s_orig = s;\n  long        n;\n\n  if (strcmp (s, \"aas\") == 0)\n    return GMP_NUMB_0xAA;\n\n  {\n    mpz_t      z;\n    mp_limb_t  l;\n    int        set, siz;\n\n    mpz_init (z);\n    set = mpz_set_str (z, s, 0);\n    siz = SIZ(z);\n    l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);\n    mpz_clear (z);\n    if (set == 0)\n      {\n        if (siz > 1 || siz < -1)\n          printf (\"Warning, r parameter %s truncated to %d bits\\n\",\n                  s_orig, BITS_PER_MP_LIMB);\n        return l;\n      }\n  }\n\n  if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))\n    n = strtoul (s+2, (char **) &s, 16);\n  else\n    n = strtol (s, (char **) &s, 10);\n\n  if (strcmp (s, \"bits\") == 0)\n    {\n      mp_limb_t  l;\n      if (n > BITS_PER_MP_LIMB)\n        {\n          fprintf (stderr, \"%ld bit parameter invalid (max %d bits)\\n\", \n                   n, BITS_PER_MP_LIMB);\n          exit (1);\n        }\n      mpn_random (&l, 1);\n      return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n);\n    }\n  else  if (strcmp (s, \"ones\") == 0)\n    {\n      if (n > BITS_PER_MP_LIMB)\n        {\n          fprintf (stderr, \"%ld bit parameter invalid (max %d bits)\\n\", \n                   n, BITS_PER_MP_LIMB);\n          exit (1);\n        }\n      return LIMB_ONES (n);\n    }\n  else if (*s != '\\0')\n    {\n      fprintf (stderr, \"invalid r parameter: %s\\n\", s_orig);\n      exit (1);\n    }\n\n  return n;\n}\n\n\nvoid\nroutine_find (struct choice_t *c, const char *s_orig)\n{\n  const char  *s;\n  int     i;\n  size_t  nlen;\n\n  c->name = s_orig;\n  c->nsum=0;\n  c->colfile=-1;\n  s = strchr (s_orig, '*');\n  if (s != NULL)\n    {\n      c->scale = atof(s_orig);\n      s++;\n    }\n  else\n    {\n      c->scale = 1.0;\n      s = s_orig;\n    }\n\n  for (i = 0; i < numberof (routine); i++)\n    {\n      nlen = strlen (routine[i].name);\n      if (memcmp (s, routine[i].name, nlen) != 0)\n        continue;\n\n      if (s[nlen] == '.')\n        {\n          /* match, with a .r parameter */\n\n          if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))\n            {\n              fprintf (stderr,\n                       \"Choice %s bad: doesn't take a \\\".<r>\\\" parameter\\n\",\n                       s_orig);\n              exit (1);\n            }\n\n          c->p = &routine[i];\n          c->r = r_string (s + nlen + 1);\n          return;\n        }\n\n      if (s[nlen] == '\\0')\n        {\n          /* match, with no parameter */\n\n          if (routine[i].flag & FLAG_R)\n            {\n              fprintf (stderr,\n                       \"Choice %s bad: needs a \\\".<r>\\\" parameter\\n\",\n                       s_orig);\n              exit (1);\n            }\n\n          c->p = &routine[i];\n          c->r = 0;\n          return;\n        }\n    }\n  #if SUMMAX > 4\n  #error Change line below as summax > 4  \n  #endif\n  c->nsum=sscanf(s,\"colsum=%d+%d+%d+%d\",&c->sum[0],&c->sum[1],&c->sum[2],&c->sum[3]);\n  for(i=0;i<c->nsum;i++)c->sum[i]--;\n  for(i=0;i<c->nsum;i++)\n     {xcol[xcoln++]=c->sum[i];\n      if(xcoln>XCOLMAX){fprintf(stderr,\"XCOLMAX not big enough\\n\");exit(1);}}\n  if(c->nsum!=0)return;\n  \n  c->filename=malloc(1024);// bit of a over kill , but remember what if we have two colfile's , these pointer can get lost\n  if(sscanf(s,\"colfile=%d,%s\",&i,c->filename)==2)\n    {c->colfile=i;return;}\n  fprintf (stderr, \"Choice %s unrecognised\\n\", s_orig);\n  exit (1);\n}\n\n\nvoid\nusage (void)\n{\n  int  i;\n  \n  speed_time_init ();\n\n  printf (\"Usage: speed [-options] -s size <routine>...\\n\");\n  printf (\"Measure the speed of some routines.\\n\");\n  printf (\"Times are in seconds, accuracy is shown.\\n\");\n  printf (\"\\n\");\n  printf (\"   -p num       set precision as number of time units each routine must run\\n\");\n  printf (\"   -s x [,x]... sizes to measure: x = <size> | <start-end> | <start(step)end>\\n\");\n  printf (\"                single sizes or ranges, sep with comma or use multiple -s\\n\");\n  printf (\"   -t step      step through sizes by given amount\\n\");\n  printf (\"   -f factor    step through sizes by given factor (eg. 1.05)\\n\");\n#if _GNU_SOURCE\n  printf (\"   -r [col]     show times as ratios of the routine in column col(default column 1)\\n\");\n  printf (\"   -d [col]     show times as difference from the routine in column col(default column 1)\\n\");\n#else\n  printf (\"   -r col       show times as ratios of the routine in column col\\n\");\n  printf (\"   -d col       show times as difference from the routine in column col\\n\");\n#endif\n  printf (\"   -D           show times as difference from previous size shown\\n\");\n  printf (\"   -c           show times in CPU cycles\\n\");\n  printf (\"   -C           show times in cycles per limb\\n\");\n  printf (\"   -u           print resource usage (memory) at end\\n\");\n  printf (\"   -P name      output plot files \\\"name.gnuplot\\\" and \\\"name.data\\\"\\n\");\n  printf (\"   -a <type>    use given data: random(default), random2, zeros, aas, ffs, 2fd\\n\");\n  printf (\"   -x, -y, -w, -W <align>  specify data alignments, sources and dests\\n\");\n  printf (\"   -o addrs     print addresses of data blocks\\n\");\n  printf (\"   colsum=A+B+...+Z   Sums the columns A,B,.. upto a max of %d columns\\n\",SUMMAX);\n  printf (\"   colfile=col,filename Use the data in column col from file filename\\n\");\n  printf (\"\\n\");\n  printf (\"float*routine prefix can be used to multiply a column by a scale factor float.\\n\\n\");\n  printf (\"If both -t and -f are used, it means step by the factor or the step, whichever\\n\");\n  printf (\"is greater.\\n\");\n  printf (\"If both -C and -D are used, it means cycles per however many limbs between a\\n\");\n  printf (\"size and the previous size.\\n\");\n  printf (\"\\n\");\n  printf (\"After running with -P, plots can be viewed with Gnuplot or Quickplot.\\n\");\n  printf (\"\\\"gnuplot name.gnuplot\\\" (use \\\"set logscale xy; replot\\\" at the prompt for\\n\");\n  printf (\"a log/log plot).\\n\");\n  printf (\"\\\"quickplot -s name.data\\\" (has interactive zooming, and note -s is important\\n\");\n  printf (\"when viewing more than one routine, it means same axis scales for all data).\\n\");\n  printf (\"\\n\");\n  printf (\"The available routines are as follows.\\n\");\n  printf (\"\\n\");\n\n  for (i = 0; i < numberof (routine); i++)\n    {\n      if (routine[i].flag & FLAG_R) \n        printf (\"\\t%s.r\\n\", routine[i].name); \n      else if (routine[i].flag & FLAG_R_OPTIONAL) \n        printf (\"\\t%s (optional .r)\\n\", routine[i].name); \n      else\n        printf (\"\\t%s\\n\", routine[i].name); \n    }\n  printf (\"\\n\");\n  printf (\"Routines with a \\\".r\\\" need an extra parameter, for example mpn_lshift.6\\n\");\n  printf (\"r should be in decimal, or use 0xN for hexadecimal.\\n\");\n  printf (\"\\n\");\n  printf (\"Special forms for r are \\\"<N>bits\\\" for a random N bit number, \\\"<N>ones\\\" for\\n\");\n  printf (\"N one bits, or \\\"aas\\\" for 0xAA..AA.\\n\");\n  printf (\"\\n\");\n  printf (\"Times for sizes out of the range accepted by a routine are shown as 0.\\n\");\n  printf (\"The fastest routine at each size is marked with a # (free form output only)\\n\");\n  printf (\"but columns that are summed are excluded.\\n\");\n  printf (\"\\n\");\n  printf (\"%s\", speed_time_string);\n  printf (\"\\n\");\n  printf (\"Gnuplot home page http://www.cs.dartmouth.edu/gnuplot_info.html\\n\");\n  printf (\"Quickplot home page http://www.kachinatech.com/~quickplot\\n\");\n}\n\nvoid\ncheck_align_option (const char *name, mp_size_t align)\n{\n  if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK)\n    {\n      fprintf (stderr, \"Alignment request out of range: %s %ld\\n\",\n               name, (long) align);\n      fprintf (stderr, \"  should be 0 to %d (limbs), inclusive\\n\",\n               SPEED_TMP_ALLOC_ADJUST_MASK);\n      exit (1);\n    }\n}\n\n#ifdef _WIN64\n#  define s2_format \"%lld-%lld\"\n#  define s3_format \"%lld(%lld)%lld\"\n#else\n#  define s2_format \"%ld-%ld\"\n#  define s3_format \"%ld(%ld)%ld\"\n#endif\n\nint\nmain (int argc, char *argv[])\n{\n  int  i;\n  int  opt;\n\n  /* Unbuffered so output goes straight out when directed to a pipe or file\n     and isn't lost on killing the program half way.  */\n  setbuf (stdout, NULL);\n\n  for (;;)\n    {\n      #if _GNU_SOURCE\n      opt = getopt(argc, argv, \"a:CcDd::EFf:o:p:P:r::Rs:t:ux:y:w:W:z\");\n      #else\n      opt = getopt(argc, argv, \"a:CcDd:EFf:o:p:P:r:Rs:t:ux:y:w:W:z\");\n      #endif\n      if (opt == EOF)\n        break;\n\n      switch (opt) {\n      case 'a':\n        if (strcmp (optarg, \"random\") == 0)       option_data = DATA_RANDOM;\n        else if (strcmp (optarg, \"random2\") == 0) option_data = DATA_RANDOM2;\n        else if (strcmp (optarg, \"zeros\") == 0)   option_data = DATA_ZEROS;\n        else if (strcmp (optarg, \"aas\") == 0)     option_data = DATA_AAS;\n        else if (strcmp (optarg, \"ffs\") == 0)     option_data = DATA_FFS;\n        else if (strcmp (optarg, \"2fd\") == 0)     option_data = DATA_2FD;\n        else\n          {\n            fprintf (stderr, \"unrecognised data option: %s\\n\", optarg);\n            exit (1);\n          }\n        break;\n      case 'C':\n        if (option_unit  != UNIT_SECONDS) goto bad_unit;\n        option_unit = UNIT_CYCLESPERLIMB;\n        break;\n      case 'c':\n        if (option_unit != UNIT_SECONDS)\n          {\n          bad_unit:\n            fprintf (stderr, \"cannot use more than one of -c, -C\\n\");\n            exit (1);\n          }\n        option_unit = UNIT_CYCLES;\n        break;\n      case 'D':\n        if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;\n        option_cmp = CMP_DIFFPREV;\n        break;\n      case 'd':\n        if (option_cmp != CMP_ABSOLUTE)\n          {\n          bad_cmp:\n            fprintf (stderr, \"cannot use more than one of -d, -D, -r\\n\");\n            exit (1);\n          }\n        option_cmp = CMP_DIFFERENCE;\n        option_cmp_pos=0;\n        if(optarg!=0)option_cmp_pos=atoi(optarg)-1;        \n        break;\n      case 'E':\n        option_square = 1;\n        break;\n      case 'F':\n        option_square = 2;\n        break;\n      case 'f':\n        option_factor = atof (optarg);\n        if (option_factor <= 1.0)\n          {\n            fprintf (stderr, \"-f factor must be > 1.0\\n\");\n            exit (1);\n          }\n        break;\n      case 'o':\n        speed_option_set (optarg);\n        break;\n      case 'P':\n        option_gnuplot = 1;\n        option_gnuplot_basename = optarg;\n        break;\n      case 'p':\n        speed_precision = atoi (optarg);\n        break;\n      case 'R':\n        option_seed = time (NULL);\n        break;\n      case 'r':\n        if (option_cmp != CMP_ABSOLUTE)\n          goto bad_cmp;\n        option_cmp = CMP_RATIO;\n        option_cmp_pos=0;\n        if(optarg!=0)option_cmp_pos = atoi(optarg)-1;\n        break;\n      case 's':\n        {\n          char  *s;\n          for (s = strtok (optarg, \",\"); s != NULL; s = strtok (NULL, \",\"))\n            {\n              if (size_num == size_allocnum)\n                {\n                  size_array = (struct size_array_t *)\n                    __gmp_allocate_or_reallocate\n                    (size_array,\n                     size_allocnum * sizeof(size_array[0]),\n                     (size_allocnum+10) * sizeof(size_array[0]));\n                  size_allocnum += 10;\n                }\n              size_array[size_num].inc = 0;\n              if (sscanf (s, s3_format,\n                          &size_array[size_num].start,\n                          &size_array[size_num].inc,\n                          &size_array[size_num].end) != 3)\n                {\n\n                  if (sscanf (s, s2_format,\n                              &size_array[size_num].start,\n                              &size_array[size_num].end) != 2)\n                    {\n                      size_array[size_num].start = size_array[size_num].end\n                        = atol (s);\n                    }\n               }\n\n              if (size_array[size_num].start < 0\n                  || size_array[size_num].end < 0\n                  || size_array[size_num].start > size_array[size_num].end)\n                {\n                  fprintf (stderr, \"invalid size parameter: %s\\n\", s);\n                  exit (1);\n                }\n\n              size_num++;\n            }\n        }\n        break;\n      case 't':\n        option_step = atol (optarg);\n        if (option_step < 1)\n          {\n            fprintf (stderr, \"-t step must be >= 1\\n\");\n            exit (1);\n          }\n        break;\n      case 'u':\n        option_resource_usage = 1;\n        break;\n      case 'z':\n        sp.cache = 1;\n        break;\n      case 'x':\n        sp.align_xp = atol (optarg);\n        check_align_option (\"-x\", sp.align_xp);\n        break;\n      case 'y':\n        sp.align_yp = atol (optarg);\n        check_align_option (\"-y\", sp.align_yp);\n        break;\n      case 'w':\n        sp.align_wp = atol (optarg);\n        check_align_option (\"-w\", sp.align_wp);\n        break;\n      case 'W':\n        sp.align_wp2 = atol (optarg);\n        check_align_option (\"-W\", sp.align_wp2);\n        break;\n      case '?':\n        exit(1);\n      }\n    }\n\n  if (optind >= argc)\n    {\n      usage ();\n      exit (1);\n    }\n\n  if (size_num == 0)\n    {\n      fprintf (stderr, \"-s <size> must be specified\\n\");\n      exit (1);\n    }\n\n  gmp_randinit_default (__gmp_rands);\n  __gmp_rands_initialized = 1;\n  gmp_randseed_ui (__gmp_rands, option_seed);\n\n  choice = (struct choice_t *) (*__gmp_allocate_func)\n    ((argc - optind) * sizeof(choice[0]));\n  for ( ; optind < argc; optind++)\n    {\n      struct choice_t  c;\n      routine_find (&c, argv[optind]);\n      choice[num_choices] = c;\n      num_choices++;\n    }\n  \n  if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&\n      num_choices < 2)\n    {\n      fprintf (stderr, \"WARNING, -d or -r does nothing when only one routine requested\\n\");\n    }\n\n  speed_time_init ();\n  if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)\n    speed_cycletime_need_cycles ();\n  else\n    speed_cycletime_need_seconds ();\n\n  if (option_gnuplot)\n    {\n      run_gnuplot (argc, argv);\n    }\n  else\n    {\n      if (option_unit == UNIT_SECONDS)\n        printf (\"overhead %.9f secs\", speed_measure (speed_noop, NULL));\n      else\n        printf (\"overhead %.2f cycles\",\n                speed_measure (speed_noop, NULL) / speed_cycletime);\n      printf (\", precision %d units of %.2e secs\",\n              speed_precision, speed_unittime);\n      \n      if (speed_cycletime == 1.0 || speed_cycletime == 0.0)\n        printf (\", CPU freq unknown\\n\");\n      else\n        printf (\", CPU freq %.2f MHz\\n\", 1e-6/speed_cycletime);\n\n      printf (\"       \");\n      for (i = 0; i < num_choices; i++)\n        printf (\" %*s\", COLUMN_WIDTH, choice[i].name); \n      printf (\"\\n\");\n\n      run_all (stdout);\n    }\n\n  if (option_resource_usage)\n    {\n#if HAVE_GETRUSAGE\n      {\n        /* This doesn't give data sizes on linux 2.0.x, only utime. */\n        struct rusage  r;\n        if (getrusage (RUSAGE_SELF, &r) != 0)\n          perror (\"getrusage\");\n        else\n          printf (\"getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\\n\",\n                  r.ru_utime.tv_sec, r.ru_utime.tv_usec,\n                  r.ru_idrss, r.ru_isrss, r.ru_ixrss); \n      }\n#else\n      printf (\"getrusage() not available\\n\");\n#endif\n\n      /* Linux kernel. */\n      {\n        char  buf[128];\n        sprintf (buf, \"/proc/%d/status\", getpid());\n        if (access (buf, R_OK) == 0)\n          {\n            sprintf (buf, \"cat /proc/%d/status\", getpid());\n            system (buf);\n          }\n\n      }\n    }\n\n  return 0;\n}\n"
  },
  {
    "path": "tune/speed.h",
    "content": "/* Header for speed and threshold things.\n\nCopyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010, 2011,\n2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  \n\n*/\n\n#ifndef __SPEED_H__\n#define __SPEED_H__\n\n\n/* Pad ptr,oldsize with zero limbs (at the most significant end) to make it\n   newsize long. */\n#define MPN_ZERO_EXTEND(ptr, oldsize, newsize)\t\t\\\n  do {\t\t\t\t\t\t\t\\\n    ASSERT ((newsize) >= (oldsize));\t\t\t\\\n    MPN_ZERO ((ptr)+(oldsize), (newsize)-(oldsize));\t\\\n  } while (0)\n\n/* A mask of the least significant n bits.  Note 1<<32 doesn't give zero on\n   x86 family CPUs, hence the separate case for BITS_PER_MP_LIMB. */\n#define MP_LIMB_T_LOWBITMASK(n)\t\\\n  ((n) == BITS_PER_MP_LIMB ? MP_LIMB_T_MAX : ((mp_limb_t) 1 << (n)) - 1)\n\n\n/* align must be a power of 2 here, usually CACHE_LINE_SIZE is a good choice */\n\n#define TMP_ALLOC_ALIGNED(bytes, align)\t\\\n  align_pointer (TMP_ALLOC ((bytes) + (align)-1), (align))\n#define TMP_ALLOC_LIMBS_ALIGNED(limbs, align)\t\\\n  ((mp_ptr) TMP_ALLOC_ALIGNED ((limbs)*sizeof(mp_limb_t), align))\n\n/* CACHE_LINE_SIZE is our default alignment for speed operands, and the\n   limit on what s->align_xp etc and then request for off-alignment.  Maybe\n   this should be an option of some sort, but in any case here are some line\n   sizes,\n\n       bytes\n\t 32   pentium\n\t 64   athlon\n\t 64   itanium-2 L1\n\t128   itanium-2 L2\n*/\n#define CACHE_LINE_SIZE   64 /* bytes */\n\n#define SPEED_TMP_ALLOC_ADJUST_MASK  (CACHE_LINE_SIZE/BYTES_PER_MP_LIMB - 1)\n\n/* Set ptr to a TMP_ALLOC block of the given limbs, with the given limb\n   alignment.  */\n#define SPEED_TMP_ALLOC_LIMBS(ptr, limbs, align)\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr     __ptr;\t\t\t\t\t\t\t\\\n    mp_size_t  __ptr_align, __ptr_add;\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    ASSERT ((CACHE_LINE_SIZE % BYTES_PER_MP_LIMB) == 0);\t\t\\\n    __ptr = TMP_ALLOC_LIMBS ((limbs) + SPEED_TMP_ALLOC_ADJUST_MASK);\t\\\n    __ptr_align = (__ptr - (mp_ptr) NULL);\t\t\t\t\\\n    __ptr_add = ((align) - __ptr_align) & SPEED_TMP_ALLOC_ADJUST_MASK;\t\\\n    (ptr) = __ptr + __ptr_add;\t\t\t\t\t\t\\\n  } while (0)\n\n\n/* This is the size for s->xp_block and s->yp_block, used in certain\n   routines that want to run across many different data values and use\n   s->size for a different purpose, eg. SPEED_ROUTINE_MPN_GCD_1.\n\n   512 means 2kbytes of data for each of xp_block and yp_block, making 4k\n   total, which should fit easily in any L1 data cache. */\n\n#define SPEED_BLOCK_SIZE   512 /* limbs */\n\n#ifdef _MSC_VER\n#include \"win_timing.h\"\n#else\nextern double  speed_unittime;\nextern double  speed_cycletime;\nextern int     speed_precision;\nextern char    speed_time_string[];\nvoid speed_time_init(void);\nvoid speed_cycletime_fail(const char *str);\nvoid speed_cycletime_init(void);\nvoid speed_cycletime_need_cycles(void);\nvoid speed_cycletime_need_seconds(void);\nvoid speed_starttime(void);\ndouble speed_endtime(void);\n#endif\n\nstruct speed_params {\n  unsigned   reps;\t/* how many times to run the routine */\n  mp_ptr     xp;\t/* first argument */\n  mp_ptr     yp;\t/* second argument */\n  mp_size_t  size;\t/* size of both arguments */\n  mp_limb_t  r;\t\t/* user supplied parameter */\n  mp_size_t  align_xp;\t/* alignment of xp */\n  mp_size_t  align_yp;\t/* alignment of yp */\n  mp_size_t  align_wp;\t/* intended alignment of wp */\n  mp_size_t  align_wp2; /* intended alignment of wp2 */\n  mp_ptr     xp_block;\t/* first special SPEED_BLOCK_SIZE block */\n  mp_ptr     yp_block;\t/* second special SPEED_BLOCK_SIZE block */\n\n  double     time_divisor; /* optionally set by the speed routine */\n\n  /* used by the cache priming things */\n  int\t     cache;\n  unsigned   src_num, dst_num;\n  struct {\n    mp_ptr    ptr;\n    mp_size_t size;\n  } src[5], dst[5];\n};\n\ntypedef double (*speed_function_t)(struct speed_params *s);\n\ndouble speed_measure(speed_function_t fun, struct speed_params *s);\n\n/* Prototypes for speed measuring routines */\n\ndouble speed_back_to_back(struct speed_params *s);\ndouble speed_count_leading_zeros(struct speed_params *s);\ndouble speed_count_trailing_zeros(struct speed_params *s);\ndouble speed_gmp_allocate_free(struct speed_params *s);\ndouble speed_gmp_allocate_reallocate_free(struct speed_params *s);\ndouble speed_invert_limb(struct speed_params *s);\ndouble speed_malloc_free(struct speed_params *s);\ndouble speed_malloc_realloc_free(struct speed_params *s);\ndouble speed_memcpy(struct speed_params *s);\ndouble speed_modlimb_invert(struct speed_params *s);\ndouble speed_modlimb_invert_mul1(struct speed_params *s);\ndouble speed_modlimb_invert_loop(struct speed_params *s);\ndouble speed_modlimb_invert_cond(struct speed_params *s);\ndouble speed_modlimb_invert_arith(struct speed_params *s);\n\ndouble speed_mpf_init_clear(struct speed_params *s);\ndouble speed_mpn_add_err1_n(struct speed_params *s);\ndouble speed_mpn_sub_err1_n(struct speed_params *s);\n\ndouble speed_mpn_add_n(struct speed_params *s);\ndouble speed_mpn_addadd_n(struct speed_params *s);\ndouble speed_mpn_subadd_n(struct speed_params *s);\ndouble speed_mpn_addlsh1_n(struct speed_params *s);\ndouble speed_mpn_addlsh_n(struct speed_params *s);\ndouble speed_mpn_sublsh_n(struct speed_params *s);\ndouble speed_mpn_declsh_n(struct speed_params *s);\ndouble speed_mpn_inclsh_n(struct speed_params *s);\ndouble speed_mpn_sumdiff_n(struct speed_params *s);\ndouble speed_mpn_nsumdiff_n(struct speed_params *s);\ndouble speed_mpn_and_n(struct speed_params *s);\ndouble speed_mpn_andn_n(struct speed_params *s);\ndouble speed_mpn_addmul_1(struct speed_params *s);\ndouble speed_mpn_addmul_2(struct speed_params *s);\ndouble speed_mpn_addmul_3(struct speed_params *s);\ndouble speed_mpn_addmul_4(struct speed_params *s);\ndouble speed_mpn_addmul_5(struct speed_params *s);\ndouble speed_mpn_addmul_6(struct speed_params *s);\ndouble speed_mpn_addmul_7(struct speed_params *s);\ndouble speed_mpn_addmul_8(struct speed_params *s);\ndouble speed_mpn_addsub_n(struct speed_params *s);\ndouble speed_mpn_com_n(struct speed_params *s);\ndouble speed_mpn_not(struct speed_params *s);\ndouble speed_mpn_copyd(struct speed_params *s);\ndouble speed_mpn_copyi(struct speed_params *s);\ndouble speed_mpn_dc_tdiv_qr(struct speed_params *s);\ndouble speed_mpn_dc_div_qr_n(struct speed_params *s);\ndouble speed_mpn_tdiv_q(struct speed_params *s);\ndouble speed_mpn_tdiv_q1(struct speed_params *s);\ndouble speed_mpn_sb_divappr_q(struct speed_params *s);\ndouble speed_mpn_sb_div_qr(struct speed_params *s);\ndouble speed_mpn_dc_divappr_q(struct speed_params *s);\ndouble speed_mpn_dc_bdiv_q(struct speed_params *s);\ndouble speed_mpn_dc_bdiv_qr_n(struct speed_params *s);\ndouble speed_mpn_inv_div_qr(struct speed_params *s);\ndouble speed_mpn_inv_divappr_q(struct speed_params *s);\ndouble speed_mpn_tdiv_q(struct speed_params *s);\ndouble speed_mpn_tdiv_q2(struct speed_params *s);\ndouble speed_MPN_COPY(struct speed_params *s);\ndouble speed_MPN_COPY_DECR(struct speed_params *s);\ndouble speed_MPN_COPY_INCR(struct speed_params *s);\ndouble speed_mpn_divexact_1(struct speed_params *s);\ndouble speed_mpn_divexact_by3(struct speed_params *s);\ndouble speed_mpn_divexact_byff(struct speed_params *s);\ndouble speed_mpn_divexact_byfobm1(struct speed_params *s);\ndouble speed_mpn_divrem_euclidean_qr_1(struct speed_params *s);\ndouble speed_mpn_divrem_euclidean_qr_2(struct speed_params *s);\ndouble speed_mpn_divrem_euclidean_r_1(struct speed_params *s);\ndouble speed_mpn_divrem_hensel_qr_1(struct speed_params *s);\ndouble speed_mpn_divrem_hensel_qr_1_1(struct speed_params *s);\ndouble speed_mpn_divrem_hensel_qr_1_2(struct speed_params *s);\ndouble speed_mpn_divrem_hensel_r_1(struct speed_params *s);\ndouble speed_mpn_rsh_divrem_hensel_qr_1(struct speed_params *s);\ndouble speed_mpn_rsh_divrem_hensel_qr_1_1(struct speed_params *s);\ndouble speed_mpn_rsh_divrem_hensel_qr_1_2(struct speed_params *s);\ndouble speed_mpn_divrem_hensel_rsh_qr_1(struct speed_params *s);\ndouble speed_mpn_divrem_1(struct speed_params *s);\ndouble speed_mpn_divrem_1f(struct speed_params *s);\ndouble speed_mpn_divrem_1c(struct speed_params *s);\ndouble speed_mpn_divrem_1cf(struct speed_params *s);\ndouble speed_mpn_divrem_1_div(struct speed_params *s);\ndouble speed_mpn_divrem_1f_div(struct speed_params *s);\ndouble speed_mpn_divrem_1_inv(struct speed_params *s);\ndouble speed_mpn_divrem_1f_inv(struct speed_params *s);\ndouble speed_mpn_divrem_2(struct speed_params *s);\ndouble speed_mpn_divrem_2_div(struct speed_params *s);\ndouble speed_mpn_divrem_2_inv(struct speed_params *s);\ndouble speed_mpn_double(struct speed_params *s);\ndouble speed_mpn_half(struct speed_params *s);\ndouble speed_mpn_fib2_ui(struct speed_params *s);\ndouble speed_mpn_matrix22_mul (struct speed_params *);\ndouble speed_mpn_hgcd (struct speed_params *);\ndouble speed_mpn_hgcd_lehmer (struct speed_params *);\ndouble speed_mpn_hgcd_appr (struct speed_params *);\ndouble speed_mpn_hgcd_appr_lehmer (struct speed_params *);\ndouble speed_mpn_hgcd_reduce (struct speed_params *);\ndouble speed_mpn_hgcd_reduce_1 (struct speed_params *);\ndouble speed_mpn_hgcd_reduce_2 (struct speed_params *);\ndouble speed_mpn_gcd (struct speed_params *);\ndouble speed_mpn_gcd_1 (struct speed_params *);\ndouble speed_mpn_gcd_1N (struct speed_params *);\ndouble speed_mpn_gcdext (struct speed_params *);\ndouble speed_mpn_gcdext_double (struct speed_params *);\ndouble speed_mpn_gcdext_one_double (struct speed_params *);\ndouble speed_mpn_gcdext_one_single (struct speed_params *);\ndouble speed_mpn_gcdext_single (struct speed_params *);\ndouble speed_mpn_get_str(struct speed_params *s);\ndouble speed_mpn_hamdist(struct speed_params *s);\ndouble speed_mpn_ior_n(struct speed_params *s);\ndouble speed_mpn_iorn_n(struct speed_params *s);\ndouble speed_mpn_jacobi_base(struct speed_params *s);\ndouble speed_mpn_jacobi_base_1(struct speed_params *s);\ndouble speed_mpn_jacobi_base_2(struct speed_params *s);\ndouble speed_mpn_jacobi_base_3(struct speed_params *s);\ndouble speed_mpn_jacobi_base_4(struct speed_params *s);\ndouble speed_mpn_kara_mul_n(struct speed_params *s);\ndouble speed_mpn_kara_sqr_n(struct speed_params *s);\ndouble speed_mpn_karaadd(struct speed_params *s);\ndouble speed_mpn_karasub(struct speed_params *s);\ndouble speed_mpn_lshift(struct speed_params *s);\ndouble speed_mpn_lshift1(struct speed_params *s);\ndouble speed_mpn_lshift2(struct speed_params *s);\ndouble speed_mpn_lshiftc(struct speed_params *s);\ndouble speed_mpn_mod_1(struct speed_params *s);\ndouble speed_mpn_mod_1_1(struct speed_params *s);\ndouble speed_mpn_mod_1_2(struct speed_params *s);\ndouble speed_mpn_mod_1_3(struct speed_params *s);\ndouble speed_mpn_mod_1_k(struct speed_params *s);\ndouble speed_mpn_mod_1c(struct speed_params *s);\ndouble speed_mpn_mod_1_div(struct speed_params *s);\ndouble speed_mpn_mod_1_inv(struct speed_params *s);\ndouble speed_mpn_mod_34lsub1(struct speed_params *s);\ndouble speed_mpn_modexact_1_odd(struct speed_params *s);\ndouble speed_mpn_modexact_1c_odd(struct speed_params *s);\ndouble speed_mpn_mul_1(struct speed_params *s);\ndouble speed_mpn_mul_1_inplace(struct speed_params *s);\ndouble speed_mpn_mul_2(struct speed_params *s);\ndouble speed_mpn_mul_basecase(struct speed_params *s);\ndouble speed_mpn_mul_fft_main(struct speed_params *s);\ndouble speed_mpn_sqr_fft_main(struct speed_params *s);\ndouble speed_mpn_mul_n(struct speed_params *s);\ndouble speed_mpn_mul_n_sqr(struct speed_params *s);\ndouble speed_mpn_mullow_n(struct speed_params *s);\ndouble speed_mpn_mulhigh_n(struct speed_params *s);\ndouble speed_mpn_mulmod_2expm1(struct speed_params *s);\ndouble speed_mpn_mulmod_2expp1_basecase(struct speed_params *s);\ndouble speed_mpn_mullow_n_basecase(struct speed_params *s);\ndouble speed_mpn_mulmid(struct speed_params *s);\ndouble speed_mpn_mulmid_basecase(struct speed_params *s);\ndouble speed_mpn_mulmid_n(struct speed_params *s);\ndouble speed_mpn_toom42_mulmid(struct speed_params *s);\ndouble speed_mpn_nand_n(struct speed_params *s);\ndouble speed_mpn_nior_n(struct speed_params *s);\ndouble speed_mpn_popcount(struct speed_params *s);\ndouble speed_mpn_preinv_divrem_1(struct speed_params *s);\ndouble speed_mpn_preinv_divrem_1f(struct speed_params *s);\ndouble speed_mpn_preinv_mod_1(struct speed_params *s);\ndouble speed_mpn_binvert (struct speed_params *);\ndouble speed_mpn_redc_1 (struct speed_params *);\ndouble speed_mpn_redc_2 (struct speed_params *);\ndouble speed_mpn_redc_n (struct speed_params *);\ndouble speed_mpn_rsh1add_n(struct speed_params *s);\ndouble speed_mpn_rsh1sub_n(struct speed_params *s);\ndouble speed_mpn_rshift(struct speed_params *s);\ndouble speed_mpn_rshift1(struct speed_params *s);\ndouble speed_mpn_rshift2(struct speed_params *s);\ndouble speed_mpn_set_str(struct speed_params *s);\ndouble speed_mpn_set_str_basecase(struct speed_params *s);\ndouble speed_mpn_set_str_subquad(struct speed_params *s);\ndouble speed_mpn_sqr_basecase(struct speed_params *s);\ndouble speed_mpn_sqr_diagonal(struct speed_params *s);\ndouble speed_mpn_sqr(struct speed_params *s);\ndouble speed_mpn_sqrtrem(struct speed_params *s);\ndouble speed_mpn_rootrem(struct speed_params *s);\ndouble speed_mpn_sub_n(struct speed_params *s);\ndouble speed_mpn_sublsh1_n(struct speed_params *s);\ndouble speed_mpn_submul_1(struct speed_params *s);\ndouble speed_mpn_toom3_mul_n(struct speed_params *s);\ndouble speed_mpn_toom4_mul_n(struct speed_params *s);\ndouble speed_mpn_toom8h_mul(struct speed_params *s);\ndouble speed_mpn_toom3_sqr_n(struct speed_params *s);\ndouble speed_mpn_toom4_sqr_n(struct speed_params *s);\ndouble speed_mpn_toom8_sqr_n(struct speed_params *s);\ndouble speed_mpn_udiv_qrnnd(struct speed_params *s);\ndouble speed_mpn_udiv_qrnnd_r(struct speed_params *s);\ndouble speed_mpn_umul_ppmm(struct speed_params *s);\ndouble speed_mpn_umul_ppmm_r(struct speed_params *s);\ndouble speed_mpn_xnor_n(struct speed_params *s);\ndouble speed_mpn_xor_n(struct speed_params *s);\ndouble speed_MPN_ZERO(struct speed_params *s);\ndouble speed_mpn_store(struct speed_params *s);\n\ndouble speed_mpq_init_clear(struct speed_params *s);\n\ndouble speed_mpz_add(struct speed_params *s);\ndouble speed_mpz_bin_uiui(struct speed_params *s);\ndouble speed_mpz_fac_ui(struct speed_params *s);\ndouble speed_mpz_fac_ui_small(struct speed_params *s);\ndouble speed_mpz_fac_ui_large(struct speed_params *s);\ndouble speed_mpz_fib_ui(struct speed_params *s);\ndouble speed_mpz_fib2_ui(struct speed_params *s);\ndouble speed_mpz_init_clear(struct speed_params *s);\ndouble speed_mpz_init_realloc_clear(struct speed_params *s);\ndouble speed_mpz_jacobi(struct speed_params *s);\ndouble speed_mpz_lucnum_ui(struct speed_params *s);\ndouble speed_mpz_lucnum2_ui(struct speed_params *s);\ndouble speed_mpz_mod(struct speed_params *s);\ndouble speed_mpz_powm(struct speed_params *s);\ndouble speed_mpz_powm_mod(struct speed_params *s);\ndouble speed_mpz_powm_redc(struct speed_params *s);\ndouble speed_mpz_powm_ui(struct speed_params *s);\ndouble speed_mpz_urandomb(struct speed_params *s);\n\ndouble speed_gmp_randseed(struct speed_params *s);\ndouble speed_gmp_randseed_ui(struct speed_params *s);\n\ndouble speed_noop(struct speed_params *s);\ndouble speed_noop_wxs(struct speed_params *s);\ndouble speed_noop_wxys(struct speed_params *s);\n\ndouble speed_operator_div(struct speed_params *s);\ndouble speed_operator_mod(struct speed_params *s);\n\ndouble speed_udiv_qrnnd(struct speed_params *s);\ndouble speed_udiv_qrnnd_preinv1(struct speed_params *s);\ndouble speed_udiv_qrnnd_preinv2(struct speed_params *s);\ndouble speed_udiv_qrnnd_c(struct speed_params *s);\ndouble speed_umul_ppmm(struct speed_params *s);\n\n\n/* Prototypes for other routines */\n\n/* low 32-bits in p[0], high 32-bits in p[1] */\nvoid speed_cyclecounter(unsigned p[2]);\n\nvoid mftb_function(unsigned p[2]);\n\n/* In i386 gcc -fPIC, ebx is a fixed register and can't be declared a dummy\n   output or a clobber for the cpuid, hence an explicit save and restore.  A\n   clobber as such doesn't provoke an error unfortunately (gcc 3.0), so use\n   the dummy output style in non-PIC, so there's an error if somehow -fPIC\n   is used without a -DPIC to tell us about it.\t */\n#if defined(__GNUC__) && (defined (__i386__) || defined (__i486__))\n#ifdef PIC\n#define speed_cyclecounter(p)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    int\t __speed_cyclecounter__save_ebx;\t\t\t\t\\\n    int\t __speed_cyclecounter__dummy;\t\t\t\t\t\\\n    __asm__ __volatile__ (\"movl %%ebx, %1\\n\"\t\t\t\t\\\n\t\t\t  \"cpuid\\n\"\t\t\t\t\t\\\n\t\t\t  \"movl %1, %%ebx\\n\"\t\t\t\t\\\n\t\t\t  \"rdtsc\"\t\t\t\t\t\\\n\t\t\t  : \"=a\"   ((p)[0]),\t\t\t\t\\\n\t\t\t    \"=&rm\" (__speed_cyclecounter__save_ebx),\t\\\n\t\t\t    \"=c\"   (__speed_cyclecounter__dummy),\t\\\n\t\t\t    \"=d\"   ((p)[1]));\t\t\t\t\\\n  } while (0)\n#else\n#define speed_cyclecounter(p)\t\t\t\t\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    int\t __speed_cyclecounter__dummy1;\t\t\t\t\t\\\n    int\t __speed_cyclecounter__dummy2;\t\t\t\t\t\\\n    __asm__ __volatile__ (\"cpuid\\n\"\t\t\t\t\t\\\n\t\t\t  \"rdtsc\"\t\t\t\t\t\\\n\t\t\t  : \"=a\" ((p)[0]),\t\t\t\t\\\n\t\t\t    \"=b\" (__speed_cyclecounter__dummy1),\t\\\n\t\t\t    \"=c\" (__speed_cyclecounter__dummy2),\t\\\n\t\t\t    \"=d\" ((p)[1]));\t\t\t\t\\\n  } while (0)\n#endif\n#elif defined( _MSC_VER )\n#include <intrin.h>\n#pragma intrinsic(__rdtsc)\n__inline void speed_cyclecounter(unsigned long p[2]) { *(unsigned long long*)p = __rdtsc(); }\n#endif\n\ndouble speed_cyclecounter_diff(const unsigned end[2], const unsigned start[2]);\nint gettimeofday_microseconds_p(void);\nint getrusage_microseconds_p(void);\nint cycles_works_p(void);\nlong clk_tck(void);\ndouble freq_measure(const char *, double (*)(void));\n\nint double_cmp_ptr(const double *p, const double *q);\nvoid pentium_wbinvd(void);\ntypedef int (*qsort_function_t)(const void *, const void *);\n\nvoid noop(void);\nvoid noop_1(mp_limb_t n);\nvoid noop_wxs(mp_ptr wp, mp_srcptr xp, mp_size_t size);\nvoid noop_wxys(mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);\nvoid mpn_cache_fill(mp_srcptr ptr, mp_size_t size);\nvoid mpn_cache_fill_dummy(mp_limb_t n);\nvoid speed_cache_fill(struct speed_params *s);\nvoid speed_operand_src(struct speed_params *s, mp_ptr ptr, mp_size_t size);\nvoid speed_operand_dst(struct speed_params *s, mp_ptr ptr, mp_size_t size);\n\nextern int  speed_option_addrs;\nextern int  speed_option_verbose;\nvoid speed_option_set(const char *s);\n\nmp_limb_t mpn_divrem_1_div(mp_ptr qp, mp_size_t xsize, mp_srcptr ap, mp_size_t size, mp_limb_t d);\nmp_limb_t mpn_divrem_1_inv(mp_ptr qp, mp_size_t xsize, mp_srcptr ap, mp_size_t size, mp_limb_t d);\nmp_limb_t mpn_divrem_2_div(mp_ptr qp, mp_size_t qxn, mp_ptr np, mp_size_t nsize, mp_srcptr dp);\nmp_limb_t mpn_divrem_2_inv(mp_ptr qp, mp_size_t qxn, mp_ptr np, mp_size_t nsize, mp_srcptr dp);\n\nint mpn_jacobi_base_1(mp_limb_t a, mp_limb_t b, int result_bit1);\nint mpn_jacobi_base_2(mp_limb_t a, mp_limb_t b, int result_bit1);\nint mpn_jacobi_base_3(mp_limb_t a, mp_limb_t b, int result_bit1);\nint mpn_jacobi_base_4(mp_limb_t a, mp_limb_t b, int result_bit1);\n\nmp_limb_t mpn_mod_1_div(mp_srcptr ap, mp_size_t size, mp_limb_t d);\nmp_limb_t mpn_mod_1_inv(mp_srcptr ap, mp_size_t size, mp_limb_t d);\n\nmp_size_t mpn_gcdext_one_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);\nmp_size_t mpn_gcdext_one_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);\nmp_size_t mpn_gcdext_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);\nmp_size_t mpn_gcdext_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);\nmp_size_t mpn_hgcd_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);\nmp_size_t mpn_hgcd_lehmer_itch (mp_size_t);\n\nmp_size_t mpn_hgcd_appr_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);\nmp_size_t mpn_hgcd_appr_lehmer_itch (mp_size_t);\n\nmp_size_t mpn_hgcd_reduce_1 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);\nmp_size_t mpn_hgcd_reduce_1_itch (mp_size_t, mp_size_t);\n\nmp_size_t mpn_hgcd_reduce_2 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);\nmp_size_t mpn_hgcd_reduce_2_itch (mp_size_t, mp_size_t);\n\nmp_size_t mpn_set_str_basecase(mp_ptr, const unsigned char *, size_t, int);\nmp_size_t mpn_set_str_subquad(mp_ptr, const unsigned char *, size_t, int);\n\nvoid mpn_toom3_mul_n_open(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);\nvoid mpn_toom4_mul_n_open(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);\nvoid mpn_toom8h_mul_open(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);\n\nvoid mpn_toom3_sqr_n_open(mp_ptr, mp_srcptr, mp_size_t, mp_ptr);\nvoid mpn_toom4_sqr_n_open(mp_ptr, mp_srcptr, mp_size_t, mp_ptr);\nvoid mpn_toom8_sqr_n_open(mp_ptr, mp_srcptr, mp_size_t, mp_ptr);\n\nvoid mpn_toom3_mul_n_mpn(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);\nvoid mpn_toom4_mul_n_mpn(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);\nvoid mpn_toom8h_mul_mpn(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);\n\nvoid mpn_toom3_sqr_n_mpn(mp_ptr, mp_srcptr, mp_size_t, mp_ptr);\nvoid mpn_toom4_sqr_n_mpn(mp_ptr, mp_srcptr, mp_size_t, mp_ptr);\nvoid mpn_toom8_sqr_n_mpn(mp_ptr, mp_srcptr, mp_size_t, mp_ptr);\n\nvoid mpz_powm_mod(mpz_ptr res, mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod);\nvoid mpz_powm_redc(mpz_ptr res, mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod);\nvoid redc(mp_ptr cp, mp_ptr tp, mp_srcptr mp, mp_size_t n, mp_limb_t Nprim);\nvoid mpz_fac_ui_small(mpz_ptr,unsigned long);\nvoid mpz_fac_ui_large(mpz_ptr,unsigned long);\n\nint speed_routine_count_zeros_setup(struct speed_params *s, mp_ptr xp, int leading, int zero);\n\n\n/* \"get\" is called repeatedly until it ticks over, just in case on a fast\n   processor it takes less than a microsecond, though this is probably\n   unlikely if it's a system call.\n\n   speed_cyclecounter is called on the same side of the \"get\" for the start\n   and end measurements.  It doesn't matter how long it takes from the \"get\"\n   sample to the cycles sample, since that period will cancel out in the\n   difference calculation (assuming it's the same each time).\n\n   Letting the test run for more than a process time slice is probably only\n   going to reduce accuracy, especially for getrusage when the cycle counter\n   is real time, or for gettimeofday if the cycle counter is in fact process\n   time.  Use CLK_TCK/2 as a reasonable stop.\n\n   It'd be desirable to be quite accurate here.  The default speed_precision\n   for a cycle counter is 10000 cycles, so to mix that with getrusage or\n   gettimeofday the frequency should be at least that accurate.  But running\n   measurements for 10000 microseconds (or more) is too long.  Be satisfied\n   with just a half clock tick (5000 microseconds usually).  */\n\n#define FREQ_MEASURE_ONE(name, type, get, getc, sec, usec)\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    type      st1, st, et1, et;\t\t\t\t\t\t\\\n    unsigned  sc[2], ec[2];\t\t\t\t\t\t\\\n    long      dt, half_tick;\t\t\t\t\t\t\\\n    double    dc, cyc;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    half_tick = (1000000L / clk_tck()) / 2;\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    get (st1);\t\t\t\t\t\t\t\t\\\n    do {\t\t\t\t\t\t\t\t\\\n      get (st);\t\t\t\t\t\t\t\t\\\n    } while (usec(st) == usec(st1) && sec(st) == sec(st1));\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    getc (sc);\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    for (;;)\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tget (et1);\t\t\t\t\t\t\t\\\n\tdo {\t\t\t\t\t\t\t\t\\\n\t  get (et);\t\t\t\t\t\t\t\\\n\t} while (usec(et) == usec(et1) && sec(et) == sec(et1));\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n\tgetc (ec);\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n\tdc = speed_cyclecounter_diff (ec, sc);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n\t/* allow secs to cancel before multiplying */\t\t\t\\\n\tdt = sec(et) - sec(st);\t\t\t\t\t\t\\\n\tdt = dt * 1000000L + (usec(et) - usec(st));\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n\tif (dt >= half_tick)\t\t\t\t\t\t\\\n\t  break;\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    cyc = dt * 1e-6 / dc;\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    if (speed_option_verbose >= 2)\t\t\t\t\t\\\n      printf (\"freq_measure_%s_one() dc=%.6g dt=%ld cyc=%.6g\\n\",\t\\\n\t      name, dc, dt, cyc);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    return dt * 1e-6 / dc;\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n  } while (0)\n\n\n\n\n/* The measuring routines use these big macros to save duplication for\n   similar forms.  They also get used for some automatically generated\n   measuring of new implementations of functions.\n\n   Having something like SPEED_ROUTINE_BINARY_N as a subroutine accepting a\n   function pointer is considered undesirable since it's not the way a\n   normal application will be calling, and some processors might do\n   different things with an indirect call, like not branch predicting, or\n   doing a full pipe flush.  At least some of the \"functions\" measured are\n   actually macros too.\n\n   The net effect is to bloat the object code, possibly in a big way, but\n   only what's being measured is being run, so that doesn't matter.\n\n   The loop forms don't try to cope with __GMP_ATTRIBUTE_PURE or\n   ATTRIBUTE_CONST on the called functions.  Adding a cast to a non-pure\n   function pointer doesn't work in gcc 3.2.  Using an actual non-pure\n   function pointer variable works, but stands a real risk of a\n   non-optimizing compiler generating unnecessary overheads in the call.\n   Currently the best idea is not to use those attributes for a timing\n   program build.  __GMP_NO_ATTRIBUTE_CONST_PURE will tell mpir.h and\n   gmp-impl.h to omit them from routines there.  */\n\n#define SPEED_RESTRICT_COND(cond)   if (!(cond)) return -1.0;\n\n/* For mpn_copy or similar. */\n#define SPEED_ROUTINE_MPN_COPY(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 0);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, s->xp, s->size);\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_COPYC(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 0);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, s->xp, s->size, 0);\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n/* s->size is still in limbs, and it's limbs which are copied, but\n   \"function\" takes a size in bytes not limbs.\t*/\n#define SPEED_ROUTINE_MPN_COPY_BYTES(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 0);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, s->xp, s->size * BYTES_PER_MP_LIMB);\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n\n/* For mpn_add_n, mpn_sub_n, or similar. */\n#define SPEED_ROUTINE_MPN_BINARY_N_CALL(call)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr     wp;\t\t\t\t\t\t\t\\\n    mp_ptr     xp, yp;\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    xp = s->xp;\t\t\t\t\t\t\t\t\\\n    yp = s->yp;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    if (s->r == 0)\t;\t\t\t\t\t\t\\\n    else if (s->r == 1) { xp = wp;\t    }\t\t\t\t\\\n    else if (s->r == 2) {\t   yp = wp; }\t\t\t\t\\\n    else if (s->r == 3) { xp = wp; yp = wp; }\t\t\t\t\\\n    else if (s->r == 4) {     yp = xp;\t    }\t\t\t\t\\\n    else\t\t{\t\t\t\t\t\t\\\n      TMP_FREE;\t\t\t\t\t\t\t\t\\\n      return -1.0;\t\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* initialize wp if operand overlap */\t\t\t\t\\\n    if (xp == wp || yp == wp)\t\t\t\t\t\t\\\n      MPN_COPY (wp, s->xp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, xp, s->size);\t\t\t\t\t\\\n    speed_operand_src (s, yp, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n/* For mpn_add_n, mpn_sub_n, or similar. */\n#define SPEED_ROUTINE_MPN_SUMDIFF_N_CALL(call)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr     ap, sp;\t\t\t\t\t\t\t\\\n    mp_ptr     xp, yp;\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (ap, s->size, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (sp, s->size, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    xp = s->xp;\t\t\t\t\t\t\t\t\\\n    yp = s->yp;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    if ((s->r & 1) != 0) { xp = ap; }\t\t\t\t\t\\\n    if ((s->r & 2) != 0) { yp = ap; }\t\t\t\t\t\\\n    if ((s->r & 4) != 0) { xp = sp; }\t\t\t\t\t\\\n    if ((s->r & 8) != 0) { yp = sp; }\t\t\t\t\t\\\n    if ((s->r & 3) == 3  ||  (s->r & 12) == 12)\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tTMP_FREE;\t\t\t\t\t\t\t\\\n\treturn -1.0;\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* initialize ap if operand overlap */\t\t\t\t\\\n    if (xp == ap || yp == ap)\t\t\t\t\t\t\\\n      MPN_COPY (ap, s->xp, s->size);\t\t\t\t\t\\\n    /* initialize sp if operand overlap */\t\t\t\t\\\n    if (xp == sp || yp == sp)\t\t\t\t\t\t\\\n      MPN_COPY (sp, s->xp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, xp, s->size);\t\t\t\t\t\\\n    speed_operand_src (s, yp, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, ap, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, sp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n// for addadd or addsub or subadd \n#define SPEED_ROUTINE_MPN_TRINARY_N(call)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr     ap, sp;\t\t\t\t\t\t\t\\\n    mp_ptr     xp, yp;\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (ap, s->size, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (sp, s->size, s->align_wp2);\t\t\t\\\n    xp = s->xp;\t\t\t\t\t\t\t\t\\\n    yp = s->yp;\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, xp, s->size);\t\t\t\t\t\\\n    speed_operand_src (s, yp, s->size);\t\t\t\t\t\\\n    speed_operand_src (s, sp, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, ap, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call(ap,sp,xp,yp,s->size);\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_ADD_ERR1_N(call)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr     ap, sp;\t\t\t\t\t\t\t\\\n    mp_ptr     xp, yp;\t\t\t\t\t\t\t\\\n    mp_limb_t  ep[2];\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (ap, s->size, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (sp, s->size, s->align_wp2);\t\t\t\\\n    xp = s->xp;\t\t\t\t\t\t\t\t\\\n    yp = s->yp;\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, xp, s->size);\t\t\t\t\t\\\n    speed_operand_src (s, yp, s->size);\t\t\t\t\t\\\n    speed_operand_src (s, sp, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, ap, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call(ap,sp,xp,ep,yp,s->size,0);\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n\n#define SPEED_ROUTINE_MPN_BINARY_N(function)\t\t\t\t\\\n   SPEED_ROUTINE_MPN_BINARY_N_CALL ((*function) (wp, xp, yp, s->size))\n\n#define SPEED_ROUTINE_MPN_BINARY_NC(function)\t\t\t\t\\\n   SPEED_ROUTINE_MPN_BINARY_N_CALL ((*function) (wp, xp, yp, s->size, 0))\n\n\n/* For mpn_lshift, mpn_rshift, mpn_mul_1, with r, or similar. */\n#define SPEED_ROUTINE_MPN_UNARY_1_CALL(call)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_SHIFTX(call)\t\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call(wp,s->xp,s->size);\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_UNARY_1(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))\n\n#define SPEED_ROUTINE_MPN_UNARY_1C(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0))\n\n/* FIXME: wp is uninitialized here, should start it off from xp */\n#define SPEED_ROUTINE_MPN_UNARY_1_INPLACE(function)\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, wp, s->size, s->r))\n\n#define SPEED_ROUTINE_MPN_DIVEXACT_1(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))\n\n#define SPEED_ROUTINE_MPN_DIVEXACT_BYFOBM1(function)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    mp_limb_t inv=MP_LIMB_T_MAX/s->r;\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      (*function) (wp, s->xp, s->size, s->r,inv);\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_DIVREM_1(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, 0, s->xp, s->size, s->r))\n\n#define SPEED_ROUTINE_MPN_DIVREM_1C(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, 0, s->xp, s->size, s->r, 0))\n\n#define SPEED_ROUTINE_MPN_DIVREM_1F(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->size, s->xp, 0, s->r))\n\n#define SPEED_ROUTINE_MPN_DIVREM_1CF(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->size, s->xp, 0, s->r, 0))\n\n\n#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL(call)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned   shift;\t\t\t\t\t\t\t\\\n    mp_limb_t  dinv;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 0);\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->r != 0);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    count_leading_zeros (shift, s->r);\t\t\t\t\t\\\n    invert_limb (dinv, s->r << shift);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_ROUTINE_MPN_UNARY_1_CALL (call);\t\t\t\t\\\n  }\t\t\t\t\t\t\t\t\t\\\n\n#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1(function)\t\t\t\\\n  SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL\t\t\t\t\\\n  ((*function) (wp, 0, s->xp, s->size, s->r, dinv, shift))\n\n/* s->size limbs worth of fraction part */\n#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1F(function)\t\t\t\\\n  SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL\t\t\t\t\\\n  ((*function) (wp, s->size, s->xp, 0, s->r, dinv, shift))\n\n\n/* s->r is duplicated to form the multiplier, defaulting to\n   MP_BASES_BIG_BASE_10.  Not sure if that's particularly useful, but at\n   least it provides some control.  */\n#define SPEED_ROUTINE_MPN_UNARY_N(function,N)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr     wp;\t\t\t\t\t\t\t\\\n    mp_size_t  wn;\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    mp_limb_t  yp[N];\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= N);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    wn = s->size + N-1;\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);\t\t\t\\\n    for (i = 0; i < N; i++)\t\t\t\t\t\t\\\n      yp[i] = (s->r != 0 ? s->r : MP_BASES_BIG_BASE_10);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_src (s, yp, (mp_size_t) N);\t\t\t\t\\\n    speed_operand_dst (s, wp, wn);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, s->xp, s->size, yp);\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_UNARY_2(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_N (function, 2)\n#define SPEED_ROUTINE_MPN_UNARY_3(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_N (function, 3)\n#define SPEED_ROUTINE_MPN_UNARY_4(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_N (function, 4)\n#define SPEED_ROUTINE_MPN_UNARY_5(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_N (function, 5)\n#define SPEED_ROUTINE_MPN_UNARY_6(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_N (function, 6)\n#define SPEED_ROUTINE_MPN_UNARY_7(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_N (function, 7)\n#define SPEED_ROUTINE_MPN_UNARY_8(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_UNARY_N (function, 8)\n\n\n/* For mpn_mul_basecase, xsize=r, ysize=s->size. */\n#define SPEED_ROUTINE_MPN_MUL_BASECASE(function)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp;\t\t\t\t\t\t\t\\\n    mp_size_t size1;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    size1 = (s->r == 0 ? s->size : s->r);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (size1 >= s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, size1 + s->size, s->align_wp);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, size1);\t\t\t\t\\\n    speed_operand_src (s, s->yp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, size1 + s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, s->xp, size1, s->yp, s->size);\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_MUL_CALL(call, minsize)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp;\t\t\t\t\t\t\t\\\n    mp_size_t size1;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    size1 = (s->r == 0 ? s->size : s->r);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= minsize);\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (size1 >= s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, size1 + s->size, s->align_wp);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, size1);\t\t\t\t\\\n    speed_operand_src (s, s->yp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, size1 + s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_MUL_N_CALL(call)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_src (s, s->yp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, 2*s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_MUL_N(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_MUL_N_CALL (function (wp, s->xp, s->yp, s->size));\n\n#define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize)\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp, tspace;\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= minsize);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (tspace, tsize, s->align_wp2);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_src (s, s->yp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, 2*s->size);\t\t\t\t\\\n    speed_operand_dst (s, tspace, tsize);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_MUL_N_SIZE(call, minsize)\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp;\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= minsize);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);\t\t\t\\\n    \t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_src (s, s->yp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, 2*s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n/* For mpn_mulmid, mpn_mulmid_basecase, xsize=r, ysize=s->size. */\n#define SPEED_ROUTINE_MPN_MULMID(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp, xp;\t\t\t\t\t\t\t\\\n    mp_size_t size1;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    size1 = (s->r == 0 ? (2 * s->size - 1) : s->r);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (size1 >= s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp);\t\\\n    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, xp, size1);\t\t\t\t\t\\\n    speed_operand_src (s, s->yp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, size1 - s->size + 3);\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, xp, size1, s->yp, s->size);\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_MULMID_N(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp, xp;\t\t\t\t\t\t\t\\\n    mp_size_t size1;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    size1 = 2 * s->size - 1;\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp);\t\\\n    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, xp, size1);\t\t\t\t\t\\\n    speed_operand_src (s, s->yp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, size1 - s->size + 3);\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, xp, s->yp, s->size);\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_TOOM42_MULMID(function)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp, xp, scratch;\t\t\t\t\t\t\\\n    mp_size_t size1, scratch_size;\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    size1 = 2 * s->size - 1;\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp);\t\\\n    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);\t\t\t\\\n    scratch_size = mpn_toom42_mulmid_itch (s->size);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (scratch, scratch_size, 0);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, xp, size1);\t\t\t\t\t\\\n    speed_operand_src (s, s->yp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, size1 - s->size + 3);\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, xp, s->yp, s->size, scratch);\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n/* For mpn_mulmod_2expm1 , xsize=r, ysize=s->size. */\n#define SPEED_ROUTINE_MPN_MULMOD_2EXPM1(function)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp,temps;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (temps,5*s->size+64, s->align_wp);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_src (s, s->yp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, s->xp, s->yp, GMP_NUMB_BITS*s->size,temps);\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n/* For mpn_mulmod_2expp1_basecase , xsize=r, ysize=s->size. */\n#define SPEED_ROUTINE_MPN_MULMOD_2EXPP1_BASECASE(function)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp,temps;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (temps,2*s->size, s->align_wp);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_src (s, s->yp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, s->xp, s->yp,0, GMP_NUMB_BITS*s->size,temps);\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_KARA_MUL_N(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_MUL_N_TSPACE\t\t\t\t\t\\\n    (function (wp, s->xp, s->xp, s->size, tspace),\t\t\t\\\n     MPN_KARA_MUL_N_TSIZE (s->size),\t\t\t\t\t\\\n     MPN_KARA_MUL_N_MINSIZE)\n\n#define SPEED_ROUTINE_MPN_TOOM3_MUL_N(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_MUL_N_TSPACE\t\t\t\t\t\\\n    (function (wp, s->xp, s->yp, s->size, tspace),\t\t\t\\\n     MPN_TOOM3_MUL_N_TSIZE (s->size),\t\t\t\t\t\\\n     MPN_TOOM3_MUL_N_MINSIZE)\n\n#define SPEED_ROUTINE_MPN_TOOM4_MUL_N(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_MUL_N_SIZE\t\t\t\t\t                  \\\n    (function (wp, s->xp, s->yp, s->size),\t\t\t\\\n     MPN_TOOM4_MUL_N_MINSIZE)\n\n#define SPEED_ROUTINE_MPN_TOOM8H_MUL(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_MUL_CALL\t\t\t\t\t                  \\\n    (function (wp, s->xp, size1, s->yp, s->size),\t\t\t\\\n     MPN_TOOM8H_MUL_MINSIZE)\n\n\n#define SPEED_ROUTINE_MPN_SQR_CALL(call)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, 2*s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_SQR(function)\t\t\t\t\t\\\n  SPEED_ROUTINE_MPN_SQR_CALL (function (wp, s->xp, s->size))\n\n#define SPEED_ROUTINE_MPN_SQR_DIAGONAL(function)\t\t\t\\\n  SPEED_ROUTINE_MPN_SQR (function)\n\n\n#define SPEED_ROUTINE_MPN_SQR_TSPACE(call, tsize, minsize)\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp, tspace;\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= minsize);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (tspace, tsize, s->align_wp2);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, 2*s->size);\t\t\t\t\\\n    speed_operand_dst (s, tspace, tsize);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_SQR_N_SIZE(call, minsize)\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp;\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= minsize);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, 2*s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_KARA_SQR_N(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),\t\\\n\t\t\t\tMPN_KARA_SQR_N_TSIZE (s->size),\t\t\\\n\t\t\t\tMPN_KARA_SQR_N_MINSIZE)\n\n#define SPEED_ROUTINE_MPN_TOOM3_SQR_N(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),\t\\\n\t\t\t\tMPN_TOOM3_SQR_N_TSIZE (s->size),\t\\\n\t\t\t\tMPN_TOOM3_SQR_N_MINSIZE)\n\n#define SPEED_ROUTINE_MPN_TOOM4_SQR_N(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_SQR_N_SIZE (function (wp, s->xp, s->size),\t\\\n\t\t\t\tMPN_TOOM4_SQR_N_MINSIZE)\n\n#define SPEED_ROUTINE_MPN_TOOM8_SQR_N(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_SQR_N_SIZE (function (wp, s->xp, s->size),\t\\\n\t\t\t\tMPN_TOOM8_SQR_N_MINSIZE)\n\n#define SPEED_ROUTINE_MPN_MOD_CALL(call)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 0);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    return speed_endtime ();\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_MOD_1_K(call)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 0);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      mpn_mod_1_k(s->xp,s->size,2333,s->r);\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    return speed_endtime ();\t\t\t\t\t\t\\\n  }\n\n\n#define SPEED_ROUTINE_MPN_MOD_1(function)\t\t\t\t\\\n   SPEED_ROUTINE_MPN_MOD_CALL ((*function) (s->xp, s->size, s->r))\n\n#define SPEED_ROUTINE_MPN_MOD_1C(function)\t\t\t\t\\\n   SPEED_ROUTINE_MPN_MOD_CALL ((*function)(s->xp, s->size, s->r, CNST_LIMB(0)))\n\n#define SPEED_ROUTINE_MPN_MODEXACT_1_ODD(function)\t\t\t\\\n  SPEED_ROUTINE_MPN_MOD_CALL (function (s->xp, s->size, s->r));\n\n#define SPEED_ROUTINE_MPN_MODEXACT_1C_ODD(function)\t\t\t\\\n  SPEED_ROUTINE_MPN_MOD_CALL (function (s->xp, s->size, s->r, CNST_LIMB(0)));\n\n#define SPEED_ROUTINE_MPN_MOD_34LSUB1(function)\t\t\t\t\\\n   SPEED_ROUTINE_MPN_MOD_CALL ((*function) (s->xp, s->size))\n\n#define SPEED_ROUTINE_MPN_PREINV_MOD_1(function)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    mp_limb_t  inv;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 0);\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->r & GMP_LIMB_HIGHBIT);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    invert_limb (inv, s->r);\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      (*function) (s->xp, s->size, s->r, inv);\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    return speed_endtime ();\t\t\t\t\t\t\\\n  }\n\n\n/* A division of 2*s->size by s->size limbs */\n\n#define SPEED_ROUTINE_MPN_DC_DIVREM_CALL(call)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    mp_ptr    a, d, q, r;\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (a, 2*s->size, s->align_xp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (d, s->size,   s->align_yp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (q, s->size+1, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (r, s->size,   s->align_wp2);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n    MPN_COPY (a+s->size, s->xp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (d, s->yp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* normalize the data */\t\t\t\t\t\t\\\n    d[s->size-1] |= GMP_NUMB_HIGHBIT;\t\t\t\t\t\\\n    a[2*s->size-1] = d[s->size-1] - 1;\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, a, 2*s->size);\t\t\t\t\\\n    speed_operand_src (s, d, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, q, s->size+1);\t\t\t\t\\\n    speed_operand_dst (s, r, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_DC_DIV_N_TSIZE(function, tsize)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    mp_ptr    a, d, q, tmp;\t\t\t\t\t\t\\\n    mp_limb_t inv;                                             \\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 2);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (a, 4*s->size, s->align_xp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (d, 2*s->size,   s->align_yp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (q, 2*s->size+1, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (tmp, 2*tsize,   s->align_wp2);\t\t\t\\\n    \t\t\t\t\t\t\t\t\\\n    MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n    MPN_COPY (a+s->size, s->xp, s->size);\t\t\t\t\\\n    MPN_COPY (a+2*s->size, s->xp, s->size);\t\t\t\t\\\n    MPN_COPY (a+3*s->size, s->xp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (d, s->yp, s->size);\t\t\t\t\t\\\n    MPN_COPY (d+s->size, s->yp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* normalize the data */\t\t\t\t\t\t\\\n    d[2*s->size-1] |= GMP_NUMB_HIGHBIT;\t\t\t\t\t\\\n    a[4*s->size-1] = d[2*s->size-1] - 1;\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, a, 4*s->size);\t\t\t\t\\\n    speed_operand_src (s, d, 2*s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, q, 2*s->size+1);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t                                                         \\\n    mpir_invert_pi1(inv, d[2*s->size-1], d[2*s->size-2]);                        \\\n\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t{\t\t\t\t\t\t\t\t\\\n      MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n      MPN_COPY (a+s->size, s->xp, s->size);\t\t\t\t\\\n      MPN_COPY (a+2*s->size, s->xp, s->size);\t\t\t\t\\\n      MPN_COPY (a+3*s->size, s->xp, s->size);\t\t\t\t\\\n      function(q, a, d, 2*s->size, inv, tmp);\t\t\t\t\t\t\t\t\\\n    } while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n#define SPEED_ROUTINE_MPN_TDIV_Q1(function)          \\\n{                                 \\\n    unsigned   i;                           \\\n    mp_ptr     dp, ap, tp, qp;                  \\\n    double     t;                           \\\n    mp_size_t itch, size1;                          \\\n    TMP_DECL;                               \\\n                                            \\\n    size1 = 3 * s->size;               \\\n                                        \\\n    TMP_MARK;                               \\\n    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);           \\\n    SPEED_TMP_ALLOC_LIMBS (qp, 2*s->size, s->align_wp);           \\\n    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp);     \\\n    SPEED_TMP_ALLOC_LIMBS (ap, size1, s->align_xp);     \\\n                                                         \\\n    MPN_COPY (ap,         s->xp, s->size);              \\\n    MPN_COPY (ap+s->size, s->xp, s->size);                \\\n    MPN_COPY (ap+2*s->size, s->xp, s->size);                \\\n                                                            \\\n    /* normalize the data */                        \\\n    dp[s->size-1] |= GMP_NUMB_HIGHBIT;                  \\\n    ap[size1-1] = dp[s->size-1] - 1;                \\\n                                                    \\\n    speed_operand_dst (s, qp, 2*s->size);                 \\\n    speed_operand_src (s, tp, size1);               \\\n    speed_operand_src (s, dp, s->size);                 \\\n    speed_cache_fill (s);                       \\\n                                                  \\\n    speed_starttime ();                         \\\n    i = s->reps;                            \\\n    do {                                \\\n        MPN_COPY(tp, ap, size1); \\\n        function (qp, tp, size1, dp, s->size);        \\\n    } while (--i != 0);                         \\\n    t = speed_endtime ();                       \\\n                                             \\\n    TMP_FREE;                               \\\n    return t;                               \\\n}\n#define SPEED_ROUTINE_MPN_TDIV_Q(function)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    mp_ptr     dp, ap, tp, qp;\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    mp_size_t itch, size1;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    size1 = (s->r == 0 ? 2 * s->size : s->r);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (ap, size1, s->align_xp);\t\t\\\n    \t\t\t\t\t\t\t\t\\\n    MPN_COPY (ap,         s->xp, s->size);\t\t\t\t\\\n    MPN_COPY (ap+size1-s->size, s->xp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* normalize the data */\t\t\t\t\t\t\\\n    dp[s->size-1] |= GMP_NUMB_HIGHBIT;\t\t\t\t\t\\\n    ap[size1-1] = dp[s->size-1] - 1;\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_dst (s, qp, s->size);\t\t\t\t\t\\\n    speed_operand_src (s, tp, size1);\t\t\t\t\\\n    speed_operand_src (s, dp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do {\t\t\t\t\t\t\t\t\\\n      MPN_COPY(tp, ap, size1); \\\n      function (qp, tp, size1, dp, s->size);\t\t\\\n    } while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n#define SPEED_ROUTINE_MPN_DC_DIV_SMALL_Q(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    mp_ptr    a, d, t, q;\t\t\t\t\t\t\\\n    mp_limb_t inv;                                                \\\n    mp_size_t size1;   \\\n    double    td;       \t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    size1 = (s->r == 0 ? 2 * s->size : s->r);\t\t\t\t\\\n\t SPEED_RESTRICT_COND (s->size >= 2);\t\t\t\t\t\\\n\t SPEED_RESTRICT_COND (size1 >= s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (a, size1, s->align_xp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (t, size1, s->align_wp2);                 \\\n    SPEED_TMP_ALLOC_LIMBS (d, s->size,   s->align_yp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (q, size1 - s->size + 1, s->align_wp);\t\t\t\\\n    \t\t\t\t\t\t         \t\t\\\n    MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n    MPN_COPY (a+size1-s->size, s->xp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (d, s->yp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* normalize the data */\t\t\t\t\t\t\\\n    d[s->size-1] |= GMP_NUMB_HIGHBIT;\t\t\t\t\t\\\n    a[size1-1] = d[s->size-1] - 1;\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, a, size1);\t\t\t\t\\\n    speed_operand_dst (s, t, size1);                                \\\n    speed_operand_src (s, d, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, q, size1-s->size+1);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t                                                                \\\n    mpir_invert_pi1(inv, d[s->size-1], d[s->size-2]);             \\\n\t\t\t\t\t\t\t\t        \\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t{\t\t\t\t\t\t\t\t\\\n      MPN_COPY (t, a, size1);\t\t\t\t\t\\\n      function(q, t, size1, d, s->size, inv);\t\t        \\\n    } while (--i != 0);\t\t\t\t\t\t\t\\\n    td = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return td;\t\t\t\t\t\t\t\t\\\n  }\n#define SPEED_ROUTINE_MPN_SB_DIV_SMALL_Q(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    mp_ptr    a, d, t, q;\t\t\t\t\t\t\\\n    mp_limb_t inv;                                                \\\n    mp_size_t size1;   \\\n    double    td;       \t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    size1 = 2 * s->size;\t\t\t\t\\\n\t SPEED_RESTRICT_COND (s->size >= 2);\t\t\t\t\t\\\n\t SPEED_RESTRICT_COND (size1 >= s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (a, size1, s->align_xp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (t, size1, s->align_wp2);                 \\\n    SPEED_TMP_ALLOC_LIMBS (d, s->size,   s->align_yp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (q, size1 - s->size + 1, s->align_wp);\t\t\t\\\n    \t\t\t\t\t\t         \t\t\\\n    MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n    MPN_COPY (a+size1-s->size, s->xp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (d, s->yp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* normalize the data */\t\t\t\t\t\t\\\n    d[s->size-1] |= GMP_NUMB_HIGHBIT;\t\t\t\t\t\\\n    a[size1-1] = d[s->size-1] - 1;\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, a, size1);\t\t\t\t\\\n    speed_operand_dst (s, t, size1);                                \\\n    speed_operand_src (s, d, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, q, size1-s->size+1);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t                                                                \\\n    mpir_invert_pi1(inv, d[s->size-1], d[s->size-2]);             \\\n\t\t\t\t\t\t\t\t        \\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t{\t\t\t\t\t\t\t\t\\\n      MPN_COPY (t, a, size1);\t\t\t\t\t\\\n      function(q, t, size1, d, s->size, inv);\t\t        \\\n    } while (--i != 0);\t\t\t\t\t\t\t\\\n    td = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return td;\t\t\t\t\t\t\t\t\\\n  }\n#define SPEED_ROUTINE_MPN_INV_DIV(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    mp_ptr    a, d, q, inv;\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 2);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (a, 2*s->size, s->align_xp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (d, s->size,   s->align_yp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (q, s->size+1, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (inv, s->size, s->align_wp2);\t\t\t\\\n    \t\t\t\t\t\t\t\t\\\n    MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n    MPN_COPY (a+s->size, s->xp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (d, s->yp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* normalize the data */\t\t\t\t\t\t\\\n    d[s->size-1] |= GMP_NUMB_HIGHBIT;\t\t\t\t\t\\\n    a[2*s->size-1] = d[s->size-1] - 1;\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, a, 2*s->size);\t\t\t\t\\\n    speed_operand_src (s, d, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, q, s->size+1);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t                                                         \\\n    mpn_invert(inv, d, s->size);                        \\\n\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t{\t\t\t\t\t\t\t\t\\\n      MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n      MPN_COPY (a+s->size, s->xp, s->size);\t\t\t\t\\\n      function(q, a, 2*s->size, d, s->size, inv);\t\t\t\t\t\t\t\t\\\n    } while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_INV_DIV_SMALL_Q(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    mp_ptr    a, d, q, inv;\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 2);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (a, 3*s->size, s->align_xp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (d, 2*s->size,   s->align_yp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (q, s->size+1, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (inv, 2*s->size, s->align_wp2);\t\t\t\\\n    \t\t\t\t\t\t\t\t\\\n    MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n    MPN_COPY (a+s->size, s->xp, s->size);\t\t\t\t\\\n    MPN_COPY (a+2*s->size, s->xp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (d, s->yp, s->size);\t\t\t\t\t\\\n    MPN_COPY (d+s->size, s->yp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* normalize the data */\t\t\t\t\t\t\\\n    d[2*s->size-1] |= GMP_NUMB_HIGHBIT;\t\t\t\t\t\\\n    a[3*s->size-1] = d[2*s->size-1] - 1;\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, a, 3*s->size);\t\t\t\t\\\n    speed_operand_src (s, d, 2*s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, q, s->size+1);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t                                                         \\\n    mpn_invert(inv, d, 2*s->size);                        \\\n\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t{\t\t\t\t\t\t\t\t\\\n      MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n      MPN_COPY (a+s->size, s->xp, s->size);\t\t\t\t\\\n      MPN_COPY (a+2*s->size, s->xp, s->size);\t\t\t\t\\\n      function(q, a, 3*s->size, d, 2*s->size, inv);\t\t\t\t\t\t\t\t\\\n    } while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_TDIV_SMALL_Q(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    mp_ptr    a, d, q;\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 2);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (a, 3*s->size, s->align_xp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (d, 2*s->size,   s->align_yp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (q, s->size+1, s->align_wp);\t\t\t\\\n    \t\t\t\t\t\t\t\t\\\n    MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n    MPN_COPY (a+s->size, s->xp, s->size);\t\t\t\t\\\n    MPN_COPY (a+2*s->size, s->xp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (d, s->yp, s->size);\t\t\t\t\t\\\n    MPN_COPY (d+s->size, s->yp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* normalize the data */\t\t\t\t\t\t\\\n    d[2*s->size-1] |= GMP_NUMB_HIGHBIT;\t\t\t\t\t\\\n    a[3*s->size-1] = d[2*s->size-1] - 1;\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, a, 3*s->size);\t\t\t\t\\\n    speed_operand_src (s, d, 2*s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, q, s->size+1);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t                                                         \\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t{\t\t\t\t\t\t\t\t\\\n      MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n      MPN_COPY (a+s->size, s->xp, s->size);\t\t\t\t\\\n      MPN_COPY (a+2*s->size, s->xp, s->size);\t\t\t\t\\\n      function(q, a, 3*s->size, d, 2*s->size);\t\t\t\t\t\t\t\t\\\n    } while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_DC_DIVREM_N(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_DC_DIVREM_CALL((*function) (q, a, d, s->size))\n\n#define SPEED_ROUTINE_MPN_DC_DIVREM_SB(function)\t\t\t\\\n  SPEED_ROUTINE_MPN_DC_DIVREM_CALL\t\t\t\t\t\\\n    ((*function) (q, a, 2*s->size, d, s->size))\n\n#define SPEED_ROUTINE_MPN_DC_TDIV_QR(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_DC_DIVREM_CALL\t\t\t\t\t\\\n    ((*function) (q, r, 0, a, 2*s->size, d, s->size))\n\n#define SPEED_ROUTINE_MPN_DC_BDIV_N_TSIZE(function, tsize)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    mp_ptr    a, d, q, tmp;\t\t\t\t\t\t\\\n    mp_limb_t inv;                                             \\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t                                      \\\n    SPEED_RESTRICT_COND (s->size >= 2);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (a, 4*s->size, s->align_xp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (d, 2*s->size,   s->align_yp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (q, 2*s->size, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (tmp, tsize,   s->align_wp2);\t\t\t\\\n    \t\t\t\t\t\t\t\t\\\n    MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n    MPN_COPY (a+s->size, s->xp, s->size);\t\t\t\t\\\n    MPN_COPY (a+2*s->size, s->xp, s->size);\t\t\t\t\\\n    MPN_COPY (a+3*s->size, s->xp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (d, s->yp, s->size);\t\t\t\t\t\\\n    MPN_COPY (d+s->size, s->yp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* normalize the data */\t\t\t\t\t\t\\\n    d[0] |= 1;\t\t\t\t\t\\\n    \t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, a, 4*s->size);\t\t\t\t\\\n    speed_operand_src (s, d, 2*s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, q, 2*s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t                                                         \\\n    modlimb_invert(inv, d[0]);                        \\\n\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t{\t\t\t\t\t\t\t\t\\\n      MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n      MPN_COPY (a+s->size, s->xp, s->size);\t\t\t\t\\\n      MPN_COPY (a+2*s->size, s->xp, s->size);\t\t\t\t\\\n      MPN_COPY (a+3*s->size, s->xp, s->size);\t\t\t\t\\\n      function(q, a, d, 2*s->size, inv, tmp);\t\t\t\t\t\t\t\t\\\n    } while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_DC_BDIV_Q(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    mp_ptr    a, d, q;\t\t\t\t\t\t\\\n    mp_limb_t inv;                                             \\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 2);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (a, s->size, s->align_xp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (d, s->size,   s->align_yp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (q, s->size, s->align_wp);\t\t\t\\\n    \t\t\t\t\t\t\t\t\\\n    MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n    MPN_COPY (d, s->yp, s->size);\t\t\t\t\t\\\n    \t\t\t\t\t\t\t\t\\\n    /* normalize the data */\t\t\t\t\t\t\\\n    d[0] |= 1;\t\t\t\t\t\\\n    \t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, a, s->size);\t\t\t\t\\\n    speed_operand_src (s, d, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, q, s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t                                                         \\\n    modlimb_invert(inv, d[0]);                        \\\n\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t{\t\t\t\t\t\t\t\t\\\n      MPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n      function(q, a, s->size, d, s->size, inv);\t\t\t\t\t\t\t\t\\\n    } while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n/* A division of s->size by 3 limbs */\n\n#define SPEED_ROUTINE_MPN_SB_DIVREM_M3(function)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    mp_ptr     a, d, q;\t\t\t\t\t\t\t\\\n    mp_size_t  qsize;\t\t\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 3);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (a, s->size, s->align_xp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (d, 3, s->align_yp);\t\t\t\t\\\n    MPN_COPY (d, s->yp, 3);\t\t\t\t\t\t\\\n    d[2] |= GMP_NUMB_HIGHBIT;\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    qsize = s->size - 3;\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (q, qsize, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_dst (s, a, s->size);\t\t\t\t\t\\\n    speed_operand_src (s, d, 3);\t\t\t\t\t\\\n    speed_operand_dst (s, q, qsize);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tMPN_COPY (a, s->xp, s->size);\t\t\t\t\t\\\n\tfunction (q, a, s->size, d, 3);\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n\n/* A remainder 2*s->size by s->size limbs */\n\n#define SPEED_ROUTINE_MPZ_MOD(function)\t\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    mpz_t      a, d, r;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    mpz_init_set_n (d, s->yp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* high part less than d, low part a duplicate copied in */\t\t\\\n    mpz_init_set_n (a, s->xp, s->size);\t\t\t\t\t\\\n    mpz_mod (a, a, d);\t\t\t\t\t\t\t\\\n    mpz_mul_2exp (a, a, BITS_PER_MP_LIMB * s->size);\t\t\t\\\n    MPN_COPY (PTR(a), s->xp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    mpz_init (r);\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, PTR(a), SIZ(a));\t\t\t\t\\\n    speed_operand_src (s, PTR(d), SIZ(d));\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (r, a, d);\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    return speed_endtime ();\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_KARA(function)\t\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    mp_ptr     rp, tp;\t\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 8);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (rp, 2*s->size, s->align_xp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (tp, s->size+1,     s->align_yp);\t\t\\\n        \t\t\t\t\t\t\t\t\\\n    MPN_COPY (rp,         s->xp, s->size);\t\t\t\t\\\n    MPN_COPY (rp+s->size, s->yp, s->size);\t\t\t\t\\\n    MPN_COPY (tp , s->yp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_dst (s, rp, 2*s->size);\t\t\t\t\\\n    speed_operand_src (s, tp, s->size+1);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do {\t\t\t\t\t\t\t\t\\\n      function (rp, tp, s->size);\t\t\t\t\\\n    } while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_BINVERT(function,itchfn)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    long  i;\t\t\t\t\t\t\t\t\\\n    mp_ptr    up, tp, ip;\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (up, s->size,   s->align_yp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (up, s->xp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* normalize the data */\t\t\t\t\t\t\\\n    up[0] |= 1;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, up, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, tp, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, ip, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (ip, up, s->size, tp);\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_REDC_1(function)\t\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    mp_ptr     cp, mp, tp, ap;\t\t\t\t\t\t\\\n    mp_limb_t  inv;\t\t\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (mp, s->size,     s->align_yp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (cp, s->size,     s->align_wp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (ap,         s->xp, s->size);\t\t\t\t\\\n    MPN_COPY (ap+s->size, s->xp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* modulus must be odd */\t\t\t\t\t\t\\\n    MPN_COPY (mp, s->yp, s->size);\t\t\t\t\t\\\n    mp[0] |= 1;\t\t\t\t\t\t\t\t\\\n    modlimb_invert (inv, mp[0]);\t\t\t\t\t\t\\\n    inv = -inv;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, ap, 2*s->size+1);\t\t\t\t\\\n    speed_operand_dst (s, tp, 2*s->size+1);\t\t\t\t\\\n    speed_operand_src (s, mp, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, cp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do {\t\t\t\t\t\t\t\t\\\n      MPN_COPY (tp, ap, 2*s->size);\t\t\t\t\t\\\n      function (cp, tp, mp, s->size, inv);\t\t\t\t\\\n    } while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_REDC_2(function)\t\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    mp_ptr     cp, mp, tp, ap;\t\t\t\t\t\t\\\n    mp_limb_t  invp[2];\t\t\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (mp, s->size,     s->align_yp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (cp, s->size,     s->align_wp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (ap,         s->xp, s->size);\t\t\t\t\\\n    MPN_COPY (ap+s->size, s->xp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* modulus must be odd */\t\t\t\t\t\t\\\n    MPN_COPY (mp, s->yp, s->size);\t\t\t\t\t\\\n    mp[0] |= 1;\t\t\t\t\t\t\t\t\\\n    mpn_binvert (invp, mp, 2, tp);\t\t\t\t\t\\\n    invp[0] = -invp[0]; invp[1] = ~invp[1];\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, ap, 2*s->size+1);\t\t\t\t\\\n    speed_operand_dst (s, tp, 2*s->size+1);\t\t\t\t\\\n    speed_operand_src (s, mp, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, cp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do {\t\t\t\t\t\t\t\t\\\n      MPN_COPY (tp, ap, 2*s->size);\t\t\t\t\t\\\n      function (cp, tp, mp, s->size, invp);\t\t\t\t\\\n    } while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n#define SPEED_ROUTINE_REDC_N(function)\t\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    mp_ptr     cp, mp, tp, ap, invp;\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size > 8);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (mp, s->size,     s->align_yp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (cp, s->size,     s->align_wp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (invp, s->size,   s->align_wp2); /* align? */\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (ap,         s->xp, s->size);\t\t\t\t\\\n    MPN_COPY (ap+s->size, s->xp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* modulus must be odd */\t\t\t\t\t\t\\\n    MPN_COPY (mp, s->yp, s->size);\t\t\t\t\t\\\n    mp[0] |= 1;\t\t\t\t\t\t\t\t\\\n    mpn_binvert (invp, mp, s->size, tp);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, ap, 2*s->size+1);\t\t\t\t\\\n    speed_operand_dst (s, tp, 2*s->size+1);\t\t\t\t\\\n    speed_operand_src (s, mp, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, cp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do {\t\t\t\t\t\t\t\t\\\n      MPN_COPY (tp, ap, 2*s->size);\t\t\t\t\t\\\n      function (cp, tp, mp, s->size, invp);\t\t\t\t\\\n    } while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_POPCOUNT(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned i;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (s->xp, s->size);\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    return speed_endtime ();\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_HAMDIST(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned i;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_src (s, s->yp, s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (s->xp, s->yp, s->size);\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    return speed_endtime ();\t\t\t\t\t\t\\\n  }\n\n\n#define SPEED_ROUTINE_MPZ_UI(function)\t\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mpz_t     z;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 0);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    mpz_init (z);\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (z, s->size);\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    mpz_clear (z);\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPZ_FAC_UI(function)    SPEED_ROUTINE_MPZ_UI(function)\n#define SPEED_ROUTINE_MPZ_FIB_UI(function)    SPEED_ROUTINE_MPZ_UI(function)\n#define SPEED_ROUTINE_MPZ_LUCNUM_UI(function) SPEED_ROUTINE_MPZ_UI(function)\n\n\n#define SPEED_ROUTINE_MPZ_2_UI(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mpz_t     z, z2;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 0);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    mpz_init (z);\t\t\t\t\t\t\t\\\n    mpz_init (z2);\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (z, z2, s->size);\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    mpz_clear (z);\t\t\t\t\t\t\t\\\n    mpz_clear (z2);\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPZ_FIB2_UI(function)    SPEED_ROUTINE_MPZ_2_UI(function)\n#define SPEED_ROUTINE_MPZ_LUCNUM2_UI(function) SPEED_ROUTINE_MPZ_2_UI(function)\n\n\n#define SPEED_ROUTINE_MPN_FIB2_UI(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr     fp, f1p;\t\t\t\t\t\t\t\\\n    mp_size_t  alloc;\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 0);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    alloc = MPN_FIB2_SIZE (s->size);\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (fp,\talloc, s->align_xp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (f1p, alloc, s->align_yp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (fp, f1p, s->size);\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n\n\n/* Calculate b^e mod m for random b and m of s->size limbs and random e of 6\n   limbs.  m is forced to odd so that redc can be used.  e is limited in\n   size so the calculation doesn't take too long. */\n#define SPEED_ROUTINE_MPZ_POWM(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mpz_t     r, b, e, m;\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    mpz_init (r);\t\t\t\t\t\t\t\\\n    mpz_init_set_n (b, s->xp, s->size);\t\t\t\t\t\\\n    mpz_init_set_n (m, s->yp, s->size);\t\t\t\t\t\\\n    mpz_setbit (m, 0);\t/* force m to odd */\t\t\t\t\\\n    mpz_init_set_n (e, s->xp_block, 6);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (r, b, e, m);\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    mpz_clear (r);\t\t\t\t\t\t\t\\\n    mpz_clear (b);\t\t\t\t\t\t\t\\\n    mpz_clear (e);\t\t\t\t\t\t\t\\\n    mpz_clear (m);\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n/* (m-2)^0xAAAAAAAA mod m */\n#define SPEED_ROUTINE_MPZ_POWM_UI(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mpz_t     r, b, m;\t\t\t\t\t\t\t\\\n    unsigned  long  e = (~ (unsigned long) 0) / 3;\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    mpz_init (r);\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* force m to odd */\t\t\t\t\t\t\\\n    mpz_init (m);\t\t\t\t\t\t\t\\\n    mpz_set_n (m, s->xp, s->size);\t\t\t\t\t\\\n    PTR(m)[0] |= 1;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    mpz_init_set (b, m);\t\t\t\t\t\t\\\n    mpz_sub_ui (b, b, 2);\t\t\t\t\t\t\\\n/* printf (\"%X\\n\", mpz_get_ui(m)); */\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (r, b, e, m);\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    mpz_clear (r);\t\t\t\t\t\t\t\\\n    mpz_clear (b);\t\t\t\t\t\t\t\\\n    mpz_clear (m);\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n\n#define SPEED_ROUTINE_MPN_SUMDIFF_CALL(call)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp, wp2, xp, yp;\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 0);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp,\ts->size, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2);\t\t\t\\\n    xp = s->xp;\t\t\t\t\t\t\t\t\\\n    yp = s->yp;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    if (s->r == 0)\t;\t\t\t\t\t\t\\\n    else if (s->r == 1) { xp = wp;\t      }\t\t\t\t\\\n    else if (s->r == 2) {\t    yp = wp2; }\t\t\t\t\\\n    else if (s->r == 3) { xp = wp;  yp = wp2; }\t\t\t\t\\\n    else if (s->r == 4) { xp = wp2; yp = wp;  }\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      TMP_FREE;\t\t\t\t\t\t\t\t\\\n      return -1.0;\t\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    if (xp != s->xp) MPN_COPY (xp, s->xp, s->size);\t\t\t\\\n    if (yp != s->yp) MPN_COPY (yp, s->yp, s->size);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, xp, s->size);\t\t\t\t\t\\\n    speed_operand_src (s, yp, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, wp2, s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_SUMDIFF_N(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_SUMDIFF_CALL\t\t\t\t\t\t\\\n    (function (wp, wp2, xp, yp, s->size));\n\n#define SPEED_ROUTINE_MPN_SUMDIFF_NC(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_SUMDIFF_CALL\t\t\t\t\t\t\\\n    (function (wp, wp2, xp, yp, s->size, 0));\n\n\n/* Doing an Nx1 gcd with the given r. */\n#define SPEED_ROUTINE_MPN_GCD_1N(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    xp;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->r != 0);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (xp, s->size, s->align_xp);\t\t\t\\\n    MPN_COPY (xp, s->xp, s->size);\t\t\t\t\t\\\n    xp[0] |= refmpn_zero_p (xp, s->size);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (xp, s->size, s->r);\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n\n/* SPEED_BLOCK_SIZE many one GCDs of s->size bits each. */\n\n#define SPEED_ROUTINE_MPN_GCD_1_CALL(setup, call)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned  i, j;\t\t\t\t\t\t\t\\\n    mp_ptr    px, py;\t\t\t\t\t\t\t\\\n    mp_limb_t x_mask, y_mask;\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size <= mp_bits_per_limb);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (px, SPEED_BLOCK_SIZE, s->align_xp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (py, SPEED_BLOCK_SIZE, s->align_yp);\t\t\\\n    MPN_COPY (px, s->xp_block, SPEED_BLOCK_SIZE);\t\t\t\\\n    MPN_COPY (py, s->yp_block, SPEED_BLOCK_SIZE);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    x_mask = MP_LIMB_T_LOWBITMASK (s->size);\t\t\t\t\\\n    y_mask = MP_LIMB_T_LOWBITMASK (s->r != 0 ? s->r : s->size);\t\t\\\n    for (i = 0; i < SPEED_BLOCK_SIZE; i++)\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tpx[i] &= x_mask; px[i] += (px[i] == 0);\t\t\t\t\\\n\tpy[i] &= y_mask; py[i] += (py[i] == 0);\t\t\t\t\\\n\tsetup;\t\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, px, SPEED_BLOCK_SIZE);\t\t\t\\\n    speed_operand_src (s, py, SPEED_BLOCK_SIZE);\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tj = SPEED_BLOCK_SIZE;\t\t\t\t\t\t\\\n\tdo\t\t\t\t\t\t\t\t\\\n\t  {\t\t\t\t\t\t\t\t\\\n\t    call;\t\t\t\t\t\t\t\\\n\t  }\t\t\t\t\t\t\t\t\\\n\twhile (--j != 0);\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    s->time_divisor = SPEED_BLOCK_SIZE;\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_GCD_1(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_GCD_1_CALL( , function (&px[j-1], 1, py[j-1]))\n\n#define SPEED_ROUTINE_MPN_JACBASE(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_GCD_1_CALL\t\t\t\t\t\t\\\n    ({\t\t\t\t\t\t\t\t\t\\\n       /* require x<y, y odd, y!=1 */\t\t\t\t\t\\\n       px[i] %= py[i];\t\t\t\t\t\t\t\\\n       px[i] |= 1;\t\t\t\t\t\t\t\\\n       py[i] |= 1;\t\t\t\t\t\t\t\\\n       if (py[i]==1) py[i]=3;\t\t\t\t\t\t\\\n     },\t\t\t\t\t\t\t\t\t\\\n     function (px[j-1], py[j-1], 0))\n\n\n#define SPEED_ROUTINE_MPN_HGCD_CALL(func, itchfunc)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_size_t hgcd_init_itch, hgcd_itch;\t\t\t\t\\\n    mp_ptr ap, bp, wp, tmp1;\t\t\t\t\t\t\\\n    struct hgcd_matrix hgcd;\t\t\t\t\t\t\\\n    int res;\t\t\t\t\t\t\t\t\\\n    unsigned i;\t\t\t\t\t\t\t\t\\\n    double t;\t\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    if (s->size < 2)\t\t\t\t\t\t\t\\\n      return -1;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    s->xp[s->size - 1] |= 1;\t\t\t\t\t\t\\\n    s->yp[s->size - 1] |= 1;\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);\t\t\\\n    hgcd_itch = itchfunc (s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, hgcd_itch, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_src (s, s->yp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, ap, s->size + 1);\t\t\t\t\\\n    speed_operand_dst (s, bp, s->size + 1);\t\t\t\t\\\n    speed_operand_dst (s, wp, hgcd_itch);\t\t\t\t\\\n    speed_operand_dst (s, tmp1, hgcd_init_itch);\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tMPN_COPY (ap, s->xp, s->size);\t\t\t\t\t\\\n\tMPN_COPY (bp, s->yp, s->size);\t\t\t\t\t\\\n\tmpn_hgcd_matrix_init (&hgcd, s->size, tmp1);\t\t\t\\\n\tres = func (ap, bp, s->size, &hgcd, wp);\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc)\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_size_t hgcd_init_itch, hgcd_step_itch;\t\t\t\t\\\n    mp_ptr ap, bp, wp, tmp1;\t\t\t\t\t\t\\\n    struct hgcd_matrix hgcd;\t\t\t\t\t\t\\\n    mp_size_t p = s->size/2;\t\t\t\t\t\t\\\n    int res;\t\t\t\t\t\t\t\t\\\n    unsigned i;\t\t\t\t\t\t\t\t\\\n    double t;\t\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    if (s->size < 2)\t\t\t\t\t\t\t\\\n      return -1;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    s->xp[s->size - 1] |= 1;\t\t\t\t\t\t\\\n    s->yp[s->size - 1] |= 1;\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);\t\t\\\n    hgcd_step_itch = itchfunc (s->size, p);\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_src (s, s->yp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, ap, s->size + 1);\t\t\t\t\\\n    speed_operand_dst (s, bp, s->size + 1);\t\t\t\t\\\n    speed_operand_dst (s, wp, hgcd_step_itch);\t\t\t\t\\\n    speed_operand_dst (s, tmp1, hgcd_init_itch);\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tMPN_COPY (ap, s->xp, s->size);\t\t\t\t\t\\\n\tMPN_COPY (bp, s->yp, s->size);\t\t\t\t\t\\\n\tmpn_hgcd_matrix_init (&hgcd, s->size, tmp1);\t\t\t\\\n\tres = func (&hgcd, ap, bp, s->size, p, wp);\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n/* Run some GCDs of s->size limbs each.  The number of different data values\n   is decreased as s->size**2, since GCD is a quadratic algorithm.\n   SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT\n   though, because the plain gcd is about twice as fast as gcdext.  */\n\n#define SPEED_ROUTINE_MPN_GCD_CALL(datafactor, call)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    mp_size_t j, pieces, psize;\t\t\t\t\t\t\\\n    mp_ptr    wp, wp2, xtmp, ytmp, px, py;\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (xtmp, s->size+1, s->align_xp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (ytmp, s->size+1, s->align_yp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp,   s->size+1, s->align_wp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp2,  s->size+1, s->align_wp2);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    pieces = SPEED_BLOCK_SIZE * datafactor / s->size / s->size;\t\t\\\n    pieces = MIN (pieces, SPEED_BLOCK_SIZE / s->size);\t\t\t\\\n    pieces = MAX (pieces, 1);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    psize = pieces * s->size;\t\t\t\t\t\t\\\n    px = TMP_ALLOC_LIMBS (psize);\t\t\t\t\t\\\n    py = TMP_ALLOC_LIMBS (psize);\t\t\t\t\t\\\n    MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize);\t\t\\\n    MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* Requirements: x >= y, y must be odd, high limbs != 0.\t\t\\\n       No need to ensure random numbers are really great.  */\t\t\\\n    for (j = 0; j < pieces; j++)\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tmp_ptr\tx = px + j * s->size;\t\t\t\t\t\\\n\tmp_ptr\ty = py + j * s->size;\t\t\t\t\t\\\n\tif (x[s->size - 1] == 0) x[s->size - 1] = 1;\t\t\t\\\n\tif (y[s->size - 1] == 0) y[s->size - 1] = 1;\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n\tif (x[s->size - 1] < y[s->size - 1])\t\t\t\t\\\n\t  MP_LIMB_T_SWAP (x[s->size - 1], y[s->size - 1]);\t\t\\\n\telse if (x[s->size - 1] == y[s->size - 1])\t\t\t\\\n\t  {\t\t\t\t\t\t\t\t\\\n\t    x[s->size - 1] = 2;\t\t\t\t\t\t\\\n\t    y[s->size - 1] = 1;\t\t\t\t\t\t\\\n\t  }\t\t\t\t\t\t\t\t\\\n\ty[0] |= 1;\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, px, psize);\t\t\t\t\t\\\n    speed_operand_src (s, py, psize);\t\t\t\t\t\\\n    speed_operand_dst (s, xtmp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, ytmp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tj = pieces;\t\t\t\t\t\t\t\\\n\tdo\t\t\t\t\t\t\t\t\\\n\t  {\t\t\t\t\t\t\t\t\\\n\t    MPN_COPY (xtmp, px+(j - 1)*s->size, s->size);\t\t\\\n\t    MPN_COPY (ytmp, py+(j - 1)*s->size, s->size);\t\t\\\n\t    call;\t\t\t\t\t\t\t\\\n\t  }\t\t\t\t\t\t\t\t\\\n\twhile (--j != 0);\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    s->time_divisor = pieces;\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_GCD(function)\t\\\n  SPEED_ROUTINE_MPN_GCD_CALL (8, function (wp, xtmp, s->size, ytmp, s->size))\n\n#define SPEED_ROUTINE_MPN_GCDEXT(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_GCD_CALL\t\t\t\t\t\t\\\n    (4, { mp_size_t  wp2size;\t\t\t\t\t\t\\\n\t  function (wp, wp2, &wp2size, xtmp, s->size, ytmp, s->size); })\n\n\n\n#define SPEED_ROUTINE_MPN_GCDEXT_ONE(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    mp_size_t j, pieces, psize, wp2size;\t\t\t\t\\\n    mp_ptr    wp, wp2, xtmp, ytmp, px, py;\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (xtmp, s->size+1, s->align_xp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (ytmp, s->size+1, s->align_yp);\t\t\\\n    MPN_COPY (xtmp, s->xp, s->size);\t\t\t\t\t\\\n    MPN_COPY (ytmp, s->yp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp,\ts->size+1, s->align_wp);\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp2, s->size+1, s->align_wp2);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    pieces = SPEED_BLOCK_SIZE / 3;\t\t\t\t\t\\\n    psize = 3 * pieces;\t\t\t\t\t\t\t\\\n    px = TMP_ALLOC_LIMBS (psize);\t\t\t\t\t\\\n    py = TMP_ALLOC_LIMBS (psize);\t\t\t\t\t\\\n    MPN_COPY (px, s->xp_block, psize);\t\t\t\t\t\\\n    MPN_COPY (py, s->yp_block, psize);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* x must have at least as many bits as y,\t\t\t\t\\\n       high limbs must be non-zero */\t\t\t\t\t\\\n    for (j = 0; j < pieces; j++)\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tmp_ptr\tx = px+3*j;\t\t\t\t\t\t\\\n\tmp_ptr\ty = py+3*j;\t\t\t\t\t\t\\\n\tx[2] += (x[2] == 0);\t\t\t\t\t\t\\\n\ty[2] += (y[2] == 0);\t\t\t\t\t\t\\\n\tif (x[2] < y[2])\t\t\t\t\t\t\\\n\t  MP_LIMB_T_SWAP (x[2], y[2]);\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, px, psize);\t\t\t\t\t\\\n    speed_operand_src (s, py, psize);\t\t\t\t\t\\\n    speed_operand_dst (s, xtmp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, ytmp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tmp_ptr\tx = px;\t\t\t\t\t\t\t\\\n\tmp_ptr\ty = py;\t\t\t\t\t\t\t\\\n\tmp_ptr\txth = &xtmp[s->size-3];\t\t\t\t\t\\\n\tmp_ptr\tyth = &ytmp[s->size-3];\t\t\t\t\t\\\n\tj = pieces;\t\t\t\t\t\t\t\\\n\tdo\t\t\t\t\t\t\t\t\\\n\t  {\t\t\t\t\t\t\t\t\\\n\t    xth[0] = x[0], xth[1] = x[1], xth[2] = x[2];\t\t\\\n\t    yth[0] = y[0], yth[1] = y[1], yth[2] = y[2];\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n\t    ytmp[0] |= 1; /* y must be odd, */\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n\t    function (wp, wp2, &wp2size, xtmp, s->size, ytmp, s->size);\t\\\n\t\t\t\t\t\t\t\t\t\\\n\t    x += 3;\t\t\t\t\t\t\t\\\n\t    y += 3;\t\t\t\t\t\t\t\\\n\t  }\t\t\t\t\t\t\t\t\\\n\twhile (--j != 0);\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    s->time_divisor = pieces;\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPZ_JACOBI(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mpz_t     a, b;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    mp_size_t j, pieces, psize;\t\t\t\t\t\t\\\n    mp_ptr    px, py;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    pieces = SPEED_BLOCK_SIZE / MAX (s->size, 1);\t\t\t\\\n    pieces = MAX (pieces, 1);\t\t\t\t\t\t\\\n    s->time_divisor = pieces;\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    psize = pieces * s->size;\t\t\t\t\t\t\\\n    px = TMP_ALLOC_LIMBS (psize);\t\t\t\t\t\\\n    py = TMP_ALLOC_LIMBS (psize);\t\t\t\t\t\\\n    MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize);\t\t\\\n    MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    for (j = 0; j < pieces; j++)\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tmp_ptr\tx = px+j*s->size;\t\t\t\t\t\\\n\tmp_ptr\ty = py+j*s->size;\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n\t/* y odd */\t\t\t\t\t\t\t\\\n\ty[0] |= 1;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n\t/* high limbs non-zero */\t\t\t\t\t\\\n\tif (x[s->size-1] == 0) x[s->size-1] = 1;\t\t\t\\\n\tif (y[s->size-1] == 0) y[s->size-1] = 1;\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SIZ(a) = s->size;\t\t\t\t\t\t\t\\\n    SIZ(b) = s->size;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, px, psize);\t\t\t\t\t\\\n    speed_operand_src (s, py, psize);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tj = pieces;\t\t\t\t\t\t\t\\\n\tdo\t\t\t\t\t\t\t\t\\\n\t  {\t\t\t\t\t\t\t\t\\\n\t    PTR(a) = px+(j-1)*s->size;\t\t\t\t\t\\\n\t    PTR(b) = py+(j-1)*s->size;\t\t\t\t\t\\\n\t    function (a, b);\t\t\t\t\t\t\\\n\t  }\t\t\t\t\t\t\t\t\\\n\twhile (--j != 0);\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_DIVREM_2(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp, xp;\t\t\t\t\t\t\t\\\n    mp_limb_t yp[2];\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 2);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (xp, s->size, s->align_xp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* source is destroyed */\t\t\t\t\t\t\\\n    MPN_COPY (xp, s->xp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* divisor must be normalized */\t\t\t\t\t\\\n    MPN_COPY (yp, s->yp_block, 2);\t\t\t\t\t\\\n    yp[1] |= GMP_NUMB_HIGHBIT;\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, xp, s->size);\t\t\t\t\t\\\n    speed_operand_src (s, yp, 2);\t\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, 0, xp, s->size, yp);\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_DIVREME_2(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp, xp;\t\t\t\t\t\t\t\\\n    mp_limb_t yp[2];\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 2);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (xp, s->size, s->align_xp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* source is destroyed */\t\t\t\t\t\t\\\n    MPN_COPY (xp, s->xp, s->size);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* divisor must be normalized */\t\t\t\t\t\\\n    MPN_COPY (yp, s->yp_block, 2);\t\t\t\t\t\\\n    yp[1] |= GMP_NUMB_HIGHBIT;\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, xp, s->size);\t\t\t\t\t\\\n    speed_operand_src (s, yp, 2);\t\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, xp, s->size, yp);\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n\n#define SPEED_ROUTINE_MODLIMB_INVERT(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned   i, j;\t\t\t\t\t\t\t\\\n    mp_ptr     xp;\t\t\t\t\t\t\t\\\n    mp_limb_t  n = 1;\t\t\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    xp = s->xp_block-1;\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp_block, SPEED_BLOCK_SIZE);\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tj = SPEED_BLOCK_SIZE;\t\t\t\t\t\t\\\n\tdo\t\t\t\t\t\t\t\t\\\n\t  {\t\t\t\t\t\t\t\t\\\n\t    /* randomized but successively dependent */\t\t\t\\\n\t    n += (xp[j] << 1);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n\t    function (n, n);\t\t\t\t\t\t\\\n\t  }\t\t\t\t\t\t\t\t\\\n\twhile (--j != 0);\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* make sure the compiler won't optimize away n */\t\t\t\\\n    noop_1 (n);\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    s->time_divisor = SPEED_BLOCK_SIZE;\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n\n#define SPEED_ROUTINE_MPN_SQRTREM(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp, wp2;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp,\ts->size, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, wp2, s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, wp2, s->xp, s->size);\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_ROOTREM(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp, wp2;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp,\ts->size, s->align_wp);\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, wp2, s->size);\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, wp2, s->xp, s->size, s->r);\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n\n/* s->size controls the number of limbs in the input, s->r is the base, or\n   decimal by default. */\n#define SPEED_ROUTINE_MPN_GET_STR(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned char *wp;\t\t\t\t\t\t\t\\\n    mp_size_t wn;\t\t\t\t\t\t\t\\\n    mp_ptr xp;\t\t\t\t\t\t\t\t\\\n    int base;\t\t\t\t\t\t\t\t\\\n    unsigned i;\t\t\t\t\t\t\t\t\\\n    double t;\t\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    base = s->r == 0 ? 10 : s->r;\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (base >= 2 && base <= 256);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (xp, s->size + 1, s->align_xp);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    MPN_SIZEINBASE (wn, s->xp, s->size, base);\t\t\t\t\\\n    wp = TMP_ALLOC (wn);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* use this during development to guard against overflowing wp */\t\\\n    /*\t\t\t\t\t\t\t\t\t\\\n    MPN_COPY (xp, s->xp, s->size);\t\t\t\t\t\\\n    ASSERT_ALWAYS (mpn_get_str (wp, base, xp, s->size) <= wn);\t\t\\\n    */\t\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, s->xp, s->size);\t\t\t\t\\\n    speed_operand_dst (s, xp, s->size);\t\t\t\t\t\\\n    speed_operand_dst (s, (mp_ptr) wp, wn/BYTES_PER_MP_LIMB);\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tMPN_COPY (xp, s->xp, s->size);\t\t\t\t\t\\\n\tfunction (wp, base, xp, s->size);\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n/* s->size controls the number of digits in the input, s->r is the base, or\n   decimal by default. */\n#define SPEED_ROUTINE_MPN_SET_STR(function)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned char *xp;\t\t\t\t\t\t\t\\\n    mp_ptr     wp;\t\t\t\t\t\t\t\\\n    mp_size_t  wn;\t\t\t\t\t\t\t\\\n    unsigned   i;\t\t\t\t\t\t\t\\\n    int        base;\t\t\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 1);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    base = s->r == 0 ? 10 : s->r;\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (base >= 2 && base <= 256);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    xp = TMP_ALLOC (s->size);\t\t\t\t\t\t\\\n    for (i = 0; i < s->size; i++)\t\t\t\t\t\\\n      xp[i] = s->xp[i] % base;\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    wn = ((mp_size_t) (s->size / __mp_bases[base].chars_per_bit_exactly)) \\\n      / BITS_PER_MP_LIMB + 2;\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* use this during development to check wn is big enough */\t\t\\\n    /*\t\t\t\t\t\t\t\t\t\\\n    ASSERT_ALWAYS (mpn_set_str (wp, xp, s->size, base) <= wn);\t\t\\\n    */\t\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_operand_src (s, (mp_ptr) xp, s->size/BYTES_PER_MP_LIMB);\t\\\n    speed_operand_dst (s, wp, wn);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function (wp, xp, s->size, base);\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n\n/* \"call\" should do \"count_foo_zeros(c,n)\".\n   Give leading=1 if foo is leading zeros, leading=0 for trailing.\n   Give zero=1 if n=0 is allowed in the call, zero=0 if not.  */\n\n#define SPEED_ROUTINE_COUNT_ZEROS_A(leading, zero)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr     xp;\t\t\t\t\t\t\t\\\n    int        i, c;\t\t\t\t\t\t\t\\\n    unsigned   j;\t\t\t\t\t\t\t\\\n    mp_limb_t  n;\t\t\t\t\t\t\t\\\n    double     t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (xp, SPEED_BLOCK_SIZE, s->align_xp);\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    if (! speed_routine_count_zeros_setup (s, xp, leading, zero))\t\\\n      return -1.0;\t\t\t\t\t\t\t\\\n    speed_operand_src (s, xp, SPEED_BLOCK_SIZE);\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    c = 0;\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    j = s->reps;\t\t\t\t\t\t\t\\\n    do {\t\t\t\t\t\t\t\t\\\n      for (i = 0; i < SPEED_BLOCK_SIZE; i++)\t\t\t\t\\\n\t{\t\t\t\t\t\t\t\t\\\n\t  n = xp[i];\t\t\t\t\t\t\t\\\n\t  n ^= c;\t\t\t\t\t\t\t\\\n\n#define SPEED_ROUTINE_COUNT_ZEROS_B()\t\t\t\t\t\\\n\t}\t\t\t\t\t\t\t\t\\\n    } while (--j != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* don't let c go dead */\t\t\t\t\t\t\\\n    noop_1 (c);\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    s->time_divisor = SPEED_BLOCK_SIZE;\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\t\t\t\t\t\t\t\t\t\\\n\n#define SPEED_ROUTINE_COUNT_ZEROS_C(call, leading, zero)\t\t\\\n  do {\t\t\t\t\t\t\t\t\t\\\n    SPEED_ROUTINE_COUNT_ZEROS_A (leading, zero);\t\t\t\\\n    call;\t\t\t\t\t\t\t\t\\\n    SPEED_ROUTINE_COUNT_ZEROS_B ();\t\t\t\t\t\\\n  } while (0)\t\t\t\t\t\t\t\t\\\n\n#define SPEED_ROUTINE_COUNT_LEADING_ZEROS_C(call,zero)\t\t\t\\\n  SPEED_ROUTINE_COUNT_ZEROS_C (call, 1, zero)\n#define SPEED_ROUTINE_COUNT_LEADING_ZEROS(fun)\t\t\t\t\\\n  SPEED_ROUTINE_COUNT_ZEROS_C (fun (c, n), 1, 0)\n\n#define SPEED_ROUTINE_COUNT_TRAILING_ZEROS_C(call,zero)\t\t\t\\\n  SPEED_ROUTINE_COUNT_ZEROS_C (call, 0, zero)\n#define SPEED_ROUTINE_COUNT_TRAILING_ZEROS(call)\t\t\t\\\n  SPEED_ROUTINE_COUNT_ZEROS_C (fun (c, n), 0, 0)\n\n\n#define SPEED_ROUTINE_INVERT_LIMB_CALL(call)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned   i, j;\t\t\t\t\t\t\t\\\n    mp_limb_t  d, dinv=0;\t\t\t\t\t\t\\\n    mp_ptr     xp = s->xp_block - 1;\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    s->time_divisor = SPEED_BLOCK_SIZE;\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      {\t\t\t\t\t\t\t\t\t\\\n\tj = SPEED_BLOCK_SIZE;\t\t\t\t\t\t\\\n\tdo\t\t\t\t\t\t\t\t\\\n\t  {\t\t\t\t\t\t\t\t\\\n\t    d = dinv ^ xp[j];\t\t\t\t\t\t\\\n\t    d |= GMP_LIMB_HIGHBIT;\t\t\t\t\t\\\n\t    do { call; } while (0);\t\t\t\t\t\\\n\t  }\t\t\t\t\t\t\t\t\\\n\twhile (--j != 0);\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    /* don't let the compiler optimize everything away */\t\t\\\n    noop_1 (dinv);\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    return speed_endtime();\t\t\t\t\t\t\\\n  }\n\n\n#endif\n\n\n#define SPEED_ROUTINE_MPN_BACK_TO_BACK(function)\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      function ();\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    return speed_endtime ();\t\t\t\t\t\t\\\n  }\n\n\n#define SPEED_ROUTINE_MPN_ZERO_CALL(call)\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    mp_ptr    wp;\t\t\t\t\t\t\t\\\n    unsigned  i;\t\t\t\t\t\t\t\\\n    double    t;\t\t\t\t\t\t\t\\\n    TMP_DECL;\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    SPEED_RESTRICT_COND (s->size >= 0);\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_MARK;\t\t\t\t\t\t\t\t\\\n    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);\t\t\t\\\n    speed_operand_dst (s, wp, s->size);\t\t\t\t\t\\\n    speed_cache_fill (s);\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    speed_starttime ();\t\t\t\t\t\t\t\\\n    i = s->reps;\t\t\t\t\t\t\t\\\n    do\t\t\t\t\t\t\t\t\t\\\n      call;\t\t\t\t\t\t\t\t\\\n    while (--i != 0);\t\t\t\t\t\t\t\\\n    t = speed_endtime ();\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n    TMP_FREE;\t\t\t\t\t\t\t\t\\\n    return t;\t\t\t\t\t\t\t\t\\\n  }\n\n#define SPEED_ROUTINE_MPN_ZERO(function)\t\t\t\t\\\n  SPEED_ROUTINE_MPN_ZERO_CALL (function (wp, s->size))\n"
  },
  {
    "path": "tune/time.c",
    "content": "/* Time routines for speed measurments.\n\nCopyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n\n/* Usage:\n\n   The code in this file implements the lowest level of time measuring,\n   simple one-time measuring of time between two points.\n\n   void speed_starttime (void)\n   double speed_endtime (void)\n       Call speed_starttime to start measuring, and then call speed_endtime\n       when done.\n\n       speed_endtime returns the time taken, in seconds.  Or if the timebase\n       is in CPU cycles and the CPU frequency is unknown then speed_endtime\n       returns cycles.  Applications can identify the cycles return by\n       checking for speed_cycletime (described below) equal to 1.0.\n\n       If some sort of temporary glitch occurs then speed_endtime returns\n       0.0.  Currently this is for various cases where a negative time has\n       occurred.  This unfortunately occurs with getrusage on some systems,\n       and with the hppa cycle counter on hpux.\n\n   double speed_cycletime\n       The time in seconds for each CPU cycle.  For example on a 100 MHz CPU\n       this would be 1.0e-8.\n\n       If the CPU frequency is unknown, then speed_cycletime is either 0.0\n       or 1.0.  It's 0.0 when speed_endtime is returning seconds, or it's\n       1.0 when speed_endtime is returning cycles.\n\n       It may be noted that \"speed_endtime() / speed_cycletime\" gives a\n       measured time in cycles, irrespective of whether speed_endtime is\n       returning cycles or seconds.  (Assuming cycles can be had, ie. it's\n       either cycles already or the cpu frequency is known.  See also\n       speed_cycletime_need_cycles below.)\n\n   double speed_unittime\n       The unit of time measurement accuracy for the timing method in use.\n       This is in seconds or cycles, as per speed_endtime.\n\n   char speed_time_string[]\n       A null-terminated string describing the time method in use.\n\n   void speed_time_init (void)\n       Initialize time measuring.  speed_starttime() does this\n       automatically, so it's only needed if an application wants to inspect\n       the above global variables before making a measurement.\n\n   int speed_precision\n       The intended accuracy of time measurements.  speed_measure() in\n       common.c for instance runs target routines with enough repetitions so\n       it takes at least \"speed_unittime * speed_precision\" (this expression\n       works for both cycles or seconds from speed_endtime).\n\n       A program can provide an option so the user to set speed_precision.\n       If speed_precision is zero when speed_time_init or speed_starttime\n       first run then it gets a default based on the measuring method\n       chosen.  (More precision for higher accuracy methods.)\n\n   void speed_cycletime_need_seconds (void)\n       Call this to demand that speed_endtime will return seconds, and not\n       cycles.  If only cycles are available then an error is printed and\n       the program exits.\n\n   void speed_cycletime_need_cycles (void)\n       Call this to demand that speed_cycletime is non-zero, so that\n       \"speed_endtime() / speed_cycletime\" will give times in cycles.\n\n\n\n   Notes:\n\n   Various combinations of cycle counter, read_real_time(), getrusage(),\n   gettimeofday() and times() can arise, according to which are available\n   and their precision.\n\n\n   Allowing speed_endtime() to return either seconds or cycles is only a\n   slight complication and makes it possible for the speed program to do\n   some sensible things without demanding the CPU frequency.  If seconds are\n   being measured then it can always print seconds, and if cycles are being\n   measured then it can always print them without needing to know how long\n   they are.  Also the tune program doesn't care at all what the units are.\n\n   GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c\n   fail.  This will be needed if times in seconds are wanted but a cycle\n   counter is being used, or if times in cycles are wanted but getrusage or\n   another seconds based timer is in use.\n\n   If the measuring method uses a cycle counter but supplements it with\n   getrusage or the like, then knowing the CPU frequency is mandatory since\n   the code compares values from the two.\n\n\n   Not done:\n\n   Solaris gethrtime() seems no more than a slow way to access the Sparc V9\n   cycle counter.  gethrvtime() seems to be relevant only to light weight\n   processes, it doesn't for instance give nanosecond virtual time.  So\n   neither of these are used.\n\n\n   Bugs:\n\n   getrusage_microseconds_p is fundamentally flawed, getrusage and\n   gettimeofday can have resolutions other than clock ticks or microseconds,\n   for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.\n\n\n   Enhancements:\n\n   The SGI hardware counter has 64 bits on some machines, which could be\n   used when available.  But perhaps 32 bits is enough range, and then rely\n   on the getrusage supplement.\n\n   Maybe getrusage (or times) should be used as a supplement for any\n   wall-clock measuring method.  Currently a wall clock with a good range\n   (eg. a 64-bit cycle counter) is used without a supplement.\n\n   On PowerPC the timebase registers could be used, but would have to do\n   something to find out the speed.  On 6xx chips it's normally 1/4 bus\n   speed, on 4xx chips it's either that or an external clock.  Measuring\n   against gettimeofday might be ok.  */\n\n\n#include \"config.h\"\n\n#include <errno.h>\n#include <setjmp.h>\n#include <signal.h>\n#include <stddef.h>\n#include <stdio.h>\n#include <string.h>\n#include <stdlib.h> /* for getenv() */\n\n#if HAVE_FCNTL_H\n#include <fcntl.h>  /* for open() */\n#endif\n\n#if HAVE_STDINT_H\n#include <stdint.h> /* for uint64_t */\n#endif\n\n#if HAVE_UNISTD_H\n#include <unistd.h> /* for sysconf() */\n#endif\n\n#include <sys/types.h>\n\n#if TIME_WITH_SYS_TIME\n# include <sys/time.h>  /* for struct timeval */\n# include <time.h>\n#else\n# if HAVE_SYS_TIME_H\n#  include <sys/time.h>\n# else\n#  include <time.h>\n# endif\n#endif\n\n#if HAVE_SYS_MMAN_H\n#include <sys/mman.h>      /* for mmap() */\n#endif\n\n#if HAVE_SYS_RESOURCE_H\n#include <sys/resource.h>  /* for struct rusage */\n#endif\n\n#if HAVE_SYS_SYSTEMCFG_H\n#include <sys/systemcfg.h> /* for RTC_POWER on AIX */\n#endif\n\n#if HAVE_SYS_TIMES_H\n#include <sys/times.h>  /* for times() and struct tms */\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\n#if 1 && defined( _MSC_VER)\n#define HAVE_GETRUSAGE      1\n#define HAVE_GETTIMEOFDAY   1\n#include \"getrusage.h\"\n#include \"gettimeofday.h\"\n#endif\n\n#include \"speed.h\"\n\n\n/* strerror is only used for some stuff on newish systems, no need to have a\n   proper replacement */\n#if ! HAVE_STRERROR\n#define strerror(n)  \"<strerror not available>\"\n#endif\n\n\nchar    speed_time_string[256];\nint     speed_precision = 0;\ndouble  speed_unittime;\ndouble  speed_cycletime = 0.0;\n\n\n/* don't rely on \"unsigned\" to \"double\" conversion, it's broken in SunOS 4\n   native cc */\n#define M_2POWU   (((double) INT_MAX + 1.0) * 2.0)\n\n#define M_2POW32  4294967296.0\n#define M_2POW64  (M_2POW32 * M_2POW32)\n\n\n/* Conditionals for the time functions available are done with normal C\n   code, which is a lot easier than wildly nested preprocessor directives.\n\n   The choice of what to use is partly made at run-time, according to\n   whether the cycle counter works and the measured accuracy of getrusage\n   and gettimeofday.\n\n   A routine that's not available won't be getting called, but is an abort()\n   to be sure it isn't called mistakenly.\n\n   It can be assumed that if a function exists then its data type will, but\n   if the function doesn't then the data type might or might not exist, so\n   the type can't be used unconditionally.  The \"struct_rusage\" etc macros\n   provide dummies when the respective function doesn't exist. */\n\n\n#if HAVE_SPEED_CYCLECOUNTER\nstatic const int have_cycles = HAVE_SPEED_CYCLECOUNTER;\n#else\nstatic const int have_cycles = 0;\n#define speed_cyclecounter(p)  ASSERT_FAIL (speed_cyclecounter not available)\n#endif\n\n/* \"stck\" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12\n   microseconds.  Same #ifdefs here as in longlong.h.  */\n#if defined (__GNUC__) && (defined (__i370__) || defined (__s390__) || defined (__mvs__))\nstatic const int  have_stck = 1;\nstatic const int  use_stck = 1;  /* always use when available */\ntypedef uint64_t  stck_t; /* gcc for s390 is quite new, always has uint64_t */\n#define STCK(timestamp)                 \\\n  do {                                  \\\n    asm (\"stck %0\" : \"=m\" (timestamp)); \\\n  } while (0)\n#else\nstatic const int  have_stck = 0;\nstatic const int  use_stck = 0;\ntypedef unsigned long  stck_t;   /* dummy */\n#define STCK(timestamp)  ASSERT_FAIL (stck instruction not available)\n#endif\n#define STCK_PERIOD      (1.0 / 4096e6)   /* 2^-12 microseconds */\n\n/* mftb\n   Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu\n   and a loop (see powerpc64.asm).  */\n#if HAVE_HOST_CPU_FAMILY_powerpc\nstatic const int  have_mftb = 1;\n#if defined (__GNUC__)\n#define MFTB(a)                         \\\n  do {                                  \\\n    unsigned  __h1, __l, __h2;          \\\n    do {                                \\\n      asm volatile (\"mftbu %0\\n\"        \\\n                    \"mftb  %1\\n\"        \\\n                    \"mftbu %2\"          \\\n                    : \"=r\" (__h1),      \\\n                      \"=r\" (__l),       \\\n                      \"=r\" (__h2));     \\\n    } while (__h1 != __h2);             \\\n    a[0] = __l;                         \\\n    a[1] = __h1;                        \\\n  } while (0)\n#else\n#define MFTB(a)   mftb_function (a)\n#endif\n#else /* ! powerpc */\nstatic const int  have_mftb = 0;\n#define MFTB(a)                         \\\n  do {                                  \\\n    a[0] = 0;                           \\\n    a[1] = 0;                           \\\n    ASSERT_FAIL (mftb not available);   \\\n  } while (0)\n#endif\n\n/* Unicos 10.X has syssgi(), but not mmap(). */\n#if HAVE_SYSSGI && HAVE_MMAP\nstatic const int  have_sgi = 1;\n#else\nstatic const int  have_sgi = 0;\n#endif\n\n#if HAVE_READ_REAL_TIME\nstatic const int have_rrt = 1;\n#else\nstatic const int have_rrt = 0;\n#define read_real_time(t,s)     ASSERT_FAIL (read_real_time not available)\n#define time_base_to_time(t,s)  ASSERT_FAIL (time_base_to_time not available)\n#define RTC_POWER     1\n#define RTC_POWER_PC  2\n#define timebasestruct_t   struct timebasestruct_dummy\nstruct timebasestruct_dummy {\n  int             flag;\n  unsigned int    tb_high;\n  unsigned int    tb_low;\n};\n#endif\n\n#if HAVE_CLOCK_GETTIME\nstatic const int have_cgt = 1;\n#define struct_timespec  struct timespec\n#else\nstatic const int have_cgt = 0;\n#define struct_timespec       struct timespec_dummy\n#define clock_gettime(id,ts)  (ASSERT_FAIL (clock_gettime not available), -1)\n#define clock_getres(id,ts)   (ASSERT_FAIL (clock_getres not available), -1)\n#endif\n\n#if HAVE_GETRUSAGE\nstatic const int have_grus = 1;\n#define struct_rusage   struct rusage\n#else\nstatic const int have_grus = 0;\n#define getrusage(n,ru)  ASSERT_FAIL (getrusage not available)\n#define struct_rusage    struct rusage_dummy\n#endif\n\n#if HAVE_GETTIMEOFDAY\nstatic const int have_gtod = 1;\n#define struct_timeval   struct timeval\n#else\nstatic const int have_gtod = 0;\n#define gettimeofday(tv,tz)  ASSERT_FAIL (gettimeofday not available)\n#define struct_timeval   struct timeval_dummy\n#endif\n\n#if HAVE_TIMES\nstatic const int have_times = 1;\n#define struct_tms   struct tms\n#else\nstatic const int have_times = 0;\n#define times(tms)   ASSERT_FAIL (times not available)\n#define struct_tms   struct tms_dummy\n#endif\n\nstruct tms_dummy {\n  long  tms_utime;\n};\nstruct timeval_dummy {\n  long  tv_sec;\n  long  tv_usec;\n};\nstruct rusage_dummy {\n  struct_timeval ru_utime;\n};\nstruct timespec_dummy {\n  long  tv_sec;\n  long  tv_nsec;\n};\n\nstatic int  use_cycles;\nstatic int  use_mftb;\nstatic int  use_sgi;\nstatic int  use_rrt;\nstatic int  use_cgt;\nstatic int  use_gtod;\nstatic int  use_grus;\nstatic int  use_times;\nstatic int  use_tick_boundary;\n\nstatic unsigned         start_cycles[2];\nstatic stck_t           start_stck;\nstatic unsigned         start_mftb[2];\nstatic unsigned         start_sgi;\nstatic timebasestruct_t start_rrt;\nstatic struct_timespec  start_cgt;\nstatic struct_rusage    start_grus;\nstatic struct_timeval   start_gtod;\nstatic struct_tms       start_times;\n\nstatic double  cycles_limit = 1e100;\nstatic double  mftb_unittime;\nstatic double  sgi_unittime;\nstatic double  cgt_unittime;\nstatic double  grus_unittime;\nstatic double  gtod_unittime;\nstatic double  times_unittime;\n\n/* for RTC_POWER format, ie. seconds and nanoseconds */\n#define TIMEBASESTRUCT_SECS(t)  ((t)->tb_high + (t)->tb_low * 1e-9)\n\n\n/* Return a string representing a time in seconds, nicely formatted.\n   Eg. \"10.25ms\".  */\nchar *\nunittime_string (double t)\n{\n  static char  buf[128];\n\n  const char  *unit;\n  int         prec;\n\n  /* choose units and scale */\n  if (t < 1e-6)\n    t *= 1e9, unit = \"ns\";\n  else if (t < 1e-3)\n    t *= 1e6, unit = \"us\";\n  else if (t < 1.0)\n    t *= 1e3, unit = \"ms\";\n  else\n    unit = \"s\";\n\n  /* want 4 significant figures */\n  if (t < 1.0)\n    prec = 4;\n  else if (t < 10.0)\n    prec = 3;\n  else if (t < 100.0)\n    prec = 2;\n  else\n    prec = 1;\n\n  sprintf (buf, \"%.*f%s\", prec, t, unit);\n  return buf;\n}\n\n\nstatic jmp_buf  cycles_works_buf;\n\nstatic void\ncycles_works_handler (int sig)\n{\n  longjmp (cycles_works_buf, 1);\n}\n\nint\ncycles_works_p (void)\n{\n  static int  result = -1;\n\n  if (result != -1)\n    goto done;\n\n#ifdef SIGILL\n  {\n    void (*old_handler)(int);\n    unsigned  cycles[2];\n\n    old_handler = signal (SIGILL, cycles_works_handler);\n    if (old_handler == SIG_ERR)\n      {\n        if (speed_option_verbose)\n          printf (\"cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\\n\");\n        goto yes;\n      }\n    if (setjmp (cycles_works_buf))\n      {\n        if (speed_option_verbose)\n          printf (\"cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\\n\");\n        result = 0;\n        goto done;\n      }\n    speed_cyclecounter (cycles);\n    signal (SIGILL, old_handler);\n    if (speed_option_verbose)\n      printf (\"cycles_works_p(): speed_cyclecounter() works\\n\");\n  }\n#else\n\n  if (speed_option_verbose)\n    printf (\"cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\\n\");\n  goto yes;\n#endif\n\n yes:\n  result = 1;\n\n done:\n  return result;\n}\n\n\n/* The number of clock ticks per second, but looking at sysconf rather than\n   just CLK_TCK, where possible.  */\nlong\nclk_tck (void)\n{\n  static long  result = -1L;\n  if (result != -1L)\n    return result;\n\n#if HAVE_SYSCONF\n  result = sysconf (_SC_CLK_TCK);\n  if (result != -1L)\n    {\n      if (speed_option_verbose)\n        printf (\"sysconf(_SC_CLK_TCK) is %ld per second\\n\", result);\n      return result;\n    }\n\n  fprintf (stderr,\n           \"sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\\n\");\n#endif\n\n#ifdef CLK_TCK\n  result = CLK_TCK;\n  if (speed_option_verbose)\n    printf (\"CLK_TCK is %ld per second\\n\", result);\n  return result;\n#else\n  fprintf (stderr, \"CLK_TCK not defined, cannot continue\\n\");\n  abort ();\n#endif\n}\n\n\n/* If two times can be observed less than half a clock tick apart, then\n   assume \"get\" is microsecond accurate.\n\n   Two times only 1 microsecond apart are not believed, since some kernels\n   take it upon themselves to ensure gettimeofday doesn't return the same\n   value twice, for the benefit of applications using it for a timestamp.\n   This is obviously very stupid given the speed of CPUs these days.\n\n   Making \"reps\" many calls to noop_1() is designed to waste some CPU, with\n   a view to getting measurements 2 microseconds (or more) apart.  \"reps\" is\n   increased progressively until such a period is seen.\n\n   The outer loop \"attempts\" are just to allow for any random nonsense or\n   system load upsetting the measurements (ie. making two successive calls\n   to \"get\" come out as a longer interval than normal).\n\n   Bugs:\n\n   The assumption that any interval less than a half tick implies\n   microsecond resolution is obviously fairly rash, the true resolution\n   could be anything between a microsecond and that half tick.  Perhaps\n   something special would have to be done on a system where this is the\n   case, since there's no obvious reliable way to detect it\n   automatically.  */\n\n#define MICROSECONDS_P(name, type, get, sec, usec)                      \\\n  {                                                                     \\\n    static int  result = -1;                                            \\\n    type      st, et;                                                   \\\n    long      dt, half_tick;                                            \\\n    unsigned  attempt, reps, i, j;                                      \\\n                                                                        \\\n    if (result != -1)                                                   \\\n      return result;                                                    \\\n                                                                        \\\n    result = 0;                                                         \\\n    half_tick = (1000000L / clk_tck ()) / 2;                            \\\n                                                                        \\\n    for (attempt = 0; attempt < 5; attempt++)                           \\\n      {                                                                 \\\n        reps = 0;                                                       \\\n        for (;;)                                                        \\\n          {                                                             \\\n            get (st);                                                   \\\n            for (i = 0; i < reps; i++)                                  \\\n              for (j = 0; j < 100; j++)                                 \\\n                noop_1 (CNST_LIMB(0));                                  \\\n            get (et);                                                   \\\n                                                                        \\\n            dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st);        \\\n                                                                        \\\n            if (speed_option_verbose >= 2)                              \\\n              printf (\"%s attempt=%u, reps=%u, dt=%ld\\n\",               \\\n                      name, attempt, reps, dt);                         \\\n                                                                        \\\n            if (dt >= 2)                                                \\\n              break;                                                    \\\n                                                                        \\\n            reps = (reps == 0 ? 1 : 2*reps);                            \\\n            if (reps == 0)                                              \\\n              break;  /* uint overflow, not normal */                   \\\n          }                                                             \\\n                                                                        \\\n        if (dt < half_tick)                                             \\\n          {                                                             \\\n            result = 1;                                                 \\\n            break;                                                      \\\n          }                                                             \\\n      }                                                                 \\\n                                                                        \\\n    if (speed_option_verbose)                                           \\\n      {                                                                 \\\n        if (result)                                                     \\\n          printf (\"%s is microsecond accurate\\n\", name);                \\\n        else                                                            \\\n          printf (\"%s is only %s clock tick accurate\\n\",                \\\n                  name, unittime_string (1.0/clk_tck()));               \\\n      }                                                                 \\\n    return result;                                                      \\\n  }\n\n\nint\ngettimeofday_microseconds_p (void)\n{\n#define call_gettimeofday(t)   gettimeofday (&(t), NULL)\n#define timeval_tv_sec(t)      ((t).tv_sec)\n#define timeval_tv_usec(t)     ((t).tv_usec)\n  MICROSECONDS_P (\"gettimeofday\", struct_timeval,\n                  call_gettimeofday, timeval_tv_sec, timeval_tv_usec);\n}\n\nint\ngetrusage_microseconds_p (void)\n{\n#define call_getrusage(t)   getrusage (0, &(t))\n#define rusage_tv_sec(t)    ((t).ru_utime.tv_sec)\n#define rusage_tv_usec(t)   ((t).ru_utime.tv_usec)\n  MICROSECONDS_P (\"getrusage\", struct_rusage,\n                  call_getrusage, rusage_tv_sec, rusage_tv_usec);\n}\n\n/* Test whether getrusage goes backwards, return non-zero if it does\n   (suggesting it's flawed).\n\n   On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's\n   microsecond accurate, but has been seen remaining unchanged after many\n   microseconds have elapsed.  It also regularly goes backwards by 1000 to\n   5000 usecs, this has been seen after between 500 and 4000 attempts taking\n   perhaps 0.03 seconds.  We consider this too broken for good measuring.\n   We used to have configure pretend getrusage didn't exist on this system,\n   but a runtime test should be more reliable, since we imagine the problem\n   is not confined to just this exact system tuple.  */\n\nint\ngetrusage_backwards_p (void)\n{\n  static int result = -1;\n  struct_rusage  start, prev, next;\n  long  d;\n  int   i;\n\n  if (result != -1)\n    return result;\n\n  getrusage (0, &start);\n  memcpy (&next, &start, sizeof (next));\n\n  result = 0;\n  i = 0;\n  for (;;)\n    {\n      memcpy (&prev, &next, sizeof (prev));\n      getrusage (0, &next);\n\n      if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec\n          || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec\n              && next.ru_utime.tv_usec < prev.ru_utime.tv_usec))\n        {\n          if (speed_option_verbose)\n            printf (\"getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\\n\",\n                    i,\n                    prev.ru_utime.tv_sec, prev.ru_utime.tv_usec,\n                    next.ru_utime.tv_sec, next.ru_utime.tv_usec);\n          result = 1;\n          break;\n        }\n\n      /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000\n         attempts, whichever comes first */\n      d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec)\n        + (next.ru_utime.tv_usec - start.ru_utime.tv_usec);\n      i++;\n      if (i > 50000 || (i > 1000 && d > 100000))\n        break;\n    }\n\n  return result;\n}\n\n/* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version\n   of glibc (some time post 2.2).\n\n   CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes\n   defined, but returning -1 for an error).  */\n\n#ifdef CLOCK_PROCESS_CPUTIME_ID\n# define CGT_ID        CLOCK_PROCESS_CPUTIME_ID\n#else\n# ifdef CLOCK_VIRTUAL\n#  define CGT_ID       CLOCK_VIRTUAL\n# endif\n#endif\n#ifdef CGT_ID\nconst int  have_cgt_id = 1;\n#else\nconst int  have_cgt_id = 0;\n# define CGT_ID       (ASSERT_FAIL (CGT_ID not determined), -1)\n#endif\n\nint\ncgt_works_p (void)\n{\n  static int  result = -1;\n  struct_timespec  unit;\n\n  if (! have_cgt)\n    return 0;\n\n  if (! have_cgt_id)\n    {\n      if (speed_option_verbose)\n        printf (\"clock_gettime don't know what ID to use\\n\");\n      result = 0;\n      return result;\n    }\n\n  if (result != -1)\n    return result;\n\n  /* trial run to see if it works */\n  if (clock_gettime (CGT_ID, &unit) != 0)\n    {\n      if (speed_option_verbose)\n        printf (\"clock_gettime id=%d error: %s\\n\", CGT_ID, strerror (errno));\n      result = 0;\n      return result;\n    }\n\n  /* get the resolution */\n  if (clock_getres (CGT_ID, &unit) != 0)\n    {\n      if (speed_option_verbose)\n        printf (\"clock_getres id=%d error: %s\\n\", CGT_ID, strerror (errno));\n      result = 0;\n      return result;\n    }\n\n  cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;\n  printf (\"clock_gettime is %s accurate\\n\",\n          unittime_string (cgt_unittime));\n  result = 1;\n  return result;\n}\n\n\nstatic double\nfreq_measure_mftb_one (void)\n{\n#define call_gettimeofday(t)   gettimeofday (&(t), NULL)\n#define timeval_tv_sec(t)      ((t).tv_sec)\n#define timeval_tv_usec(t)     ((t).tv_usec)\n  FREQ_MEASURE_ONE (\"mftb\", struct_timeval,\n                    call_gettimeofday, MFTB,\n                    timeval_tv_sec, timeval_tv_usec);\n}\n\n\nstatic jmp_buf  mftb_works_buf;\n\nstatic void\nmftb_works_handler (int sig)\n{\n  longjmp (mftb_works_buf, 1);\n}\n\nint\nmftb_works_p (void)\n{\n  unsigned   a[2];\n  void (*old_handler)(int);\n  double     cycletime;\n\n  /* suppress a warning about a[] unused */\n  a[0] = 0;\n\n  if (! have_mftb)\n    return 0;\n\n#ifdef SIGILL\n  old_handler = signal (SIGILL, mftb_works_handler);\n  if (old_handler == SIG_ERR)\n    {\n      if (speed_option_verbose)\n        printf (\"mftb_works_p(): SIGILL not supported, assuming mftb works\\n\");\n      return 1;\n    }\n  if (setjmp (mftb_works_buf))\n    {\n      if (speed_option_verbose)\n        printf (\"mftb_works_p(): SIGILL during mftb, so doesn't work\\n\");\n      return 0;\n    }\n  MFTB (a);\n  signal (SIGILL, old_handler);\n  if (speed_option_verbose)\n    printf (\"mftb_works_p(): mftb works\\n\");\n#else\n\n  if (speed_option_verbose)\n    printf (\"mftb_works_p(): SIGILL not defined, assuming mftb works\\n\");\n#endif\n\n#if ! HAVE_GETTIMEOFDAY\n  if (speed_option_verbose)\n    printf (\"mftb_works_p(): no gettimeofday available to measure mftb\\n\");\n  return 0;\n#endif\n\n  /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on\n     other chips it can be driven from an external clock. */\n  cycletime = freq_measure (\"mftb\", freq_measure_mftb_one);\n  if (cycletime == -1.0)\n    {\n      if (speed_option_verbose)\n        printf (\"mftb_works_p(): cannot measure mftb period\\n\");\n      return 0;\n    }\n\n  mftb_unittime = cycletime;\n  return 1;\n}\n\n\nvolatile unsigned  *sgi_addr;\n\nint\nsgi_works_p (void)\n{\n#if HAVE_SYSSGI && HAVE_MMAP\n  static int  result = -1;\n\n  size_t          pagesize, offset;\n  __psunsigned_t  phys, physpage;\n  void            *virtpage;\n  unsigned        period_picoseconds;\n  int             size, fd;\n\n  if (result != -1)\n    return result;\n\n  phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);\n  if (phys == (__psunsigned_t) -1)\n    {\n      /* ENODEV is the error when a counter is not available */\n      if (speed_option_verbose)\n        printf (\"syssgi SGI_QUERY_CYCLECNTR error: %s\\n\", strerror (errno));\n      result = 0;\n      return result;\n    }\n  sgi_unittime = period_picoseconds * 1e-12;\n\n  /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.\n     Challenge/ONYX hardware has a 64 bit byte counter, but there seems no\n     obvious way to identify that without SGI_CYCLECNTR_SIZE.  */\n#ifdef SGI_CYCLECNTR_SIZE\n  size = syssgi (SGI_CYCLECNTR_SIZE);\n  if (size == -1)\n    {\n      if (speed_option_verbose)\n        {\n          printf (\"syssgi SGI_CYCLECNTR_SIZE error: %s\\n\", strerror (errno));\n          printf (\"    will assume size==4\\n\");\n        }\n      size = 32;\n    }\n#else\n  size = 32;\n#endif\n\n  if (size < 32)\n    {\n      printf (\"syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\\n\", size);\n      result = 0;\n      return result;\n    }\n\n  pagesize = getpagesize();\n  offset = (size_t) phys & (pagesize-1);\n  physpage = phys - offset;\n\n  /* shouldn't cross over a page boundary */\n  ASSERT_ALWAYS (offset + size/8 <= pagesize);\n\n  fd = open(\"/dev/mmem\", O_RDONLY);\n  if (fd == -1)\n    {\n      if (speed_option_verbose)\n        printf (\"open /dev/mmem: %s\\n\", strerror (errno));\n      result = 0;\n      return result;\n    }\n\n  virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);\n  if (virtpage == (void *) -1)\n    {\n      if (speed_option_verbose)\n        printf (\"mmap /dev/mmem: %s\\n\", strerror (errno));\n      result = 0;\n      return result;\n    }\n\n  /* address of least significant 4 bytes, knowing mips is big endian */\n  sgi_addr = (unsigned *) ((char *) virtpage + offset\n                           + size/8 - sizeof(unsigned));\n  result = 1;\n  return result;\n\n#else /* ! (HAVE_SYSSGI && HAVE_MMAP) */\n  return 0;\n#endif\n}\n\n\n#define DEFAULT(var,n)  \\\n  do {                  \\\n    if (! (var))        \\\n      (var) = (n);      \\\n  } while (0)\n\nvoid\nspeed_time_init (void)\n{\n  double supplement_unittime = 0.0;\n\n  static int  speed_time_initialized = 0;\n  if (speed_time_initialized)\n    return;\n  speed_time_initialized = 1;\n\n  speed_cycletime_init ();\n\n  if (have_cycles && cycles_works_p ())\n    {\n      use_cycles = 1;\n      DEFAULT (speed_cycletime, 1.0);\n      speed_unittime = speed_cycletime;\n      DEFAULT (speed_precision, 1000000);\n      strcpy (speed_time_string, \"CPU cycle counter\");\n\n      /* only used if a supplementary method is chosen below */\n      cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0\n        * speed_cycletime;\n\n      if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())\n        {\n          /* this is a good combination */\n          use_grus = 1;\n          supplement_unittime = grus_unittime = 1.0e-6;\n          strcpy (speed_time_string, \"CPU cycle counter, supplemented by microsecond getrusage()\");\n        }\n      else if (have_cycles == 1)\n        {\n          /* When speed_cyclecounter has a limited range, look for something\n             to supplement it. */\n          if (have_gtod && gettimeofday_microseconds_p())\n            {\n              use_gtod = 1;\n              supplement_unittime = gtod_unittime = 1.0e-6;\n              strcpy (speed_time_string, \"CPU cycle counter, supplemented by microsecond gettimeofday()\");\n            }\n          else if (have_grus)\n            {\n              use_grus = 1;\n              supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();\n              sprintf (speed_time_string, \"CPU cycle counter, supplemented by %s clock tick getrusage()\", unittime_string (supplement_unittime));\n            }\n          else if (have_times)\n            {\n              use_times = 1;\n              supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();\n              sprintf (speed_time_string, \"CPU cycle counter, supplemented by %s clock tick times()\", unittime_string (supplement_unittime));\n            }\n          else if (have_gtod)\n            {\n              use_gtod = 1;\n              supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();\n              sprintf (speed_time_string, \"CPU cycle counter, supplemented by %s clock tick gettimeofday()\", unittime_string (supplement_unittime));\n            }\n          else\n            {\n              fprintf (stderr, \"WARNING: cycle counter is 32 bits and there's no other functions.\\n\");\n              fprintf (stderr, \"    Wraparounds may produce bad results on long measurements.\\n\");\n            }\n        }\n\n      if (use_grus || use_times || use_gtod)\n        {\n          /* must know cycle period to compare cycles to other measuring\n             (via cycles_limit) */\n          speed_cycletime_need_seconds ();\n\n          if (speed_precision * supplement_unittime > cycles_limit)\n            {\n              fprintf (stderr, \"WARNING: requested precision can't always be achieved due to limited range\\n\");\n              fprintf (stderr, \"    cycle counter and limited precision supplemental method\\n\");\n              fprintf (stderr, \"    (%s)\\n\", speed_time_string);\n            }\n        }\n    }\n  else if (have_stck)\n    {\n      strcpy (speed_time_string, \"STCK timestamp\");\n      /* stck is in units of 2^-12 microseconds, which is very likely higher\n         resolution than a cpu cycle */\n      if (speed_cycletime == 0.0)\n        speed_cycletime_fail\n          (\"Need to know CPU frequency for effective stck unit\");\n      speed_unittime = MAX (speed_cycletime, STCK_PERIOD);\n      DEFAULT (speed_precision, 10000);\n    }\n  else if (have_mftb && mftb_works_p ())\n    {\n      use_mftb = 1;\n      DEFAULT (speed_precision, 10000);\n      speed_unittime = mftb_unittime;\n      sprintf (speed_time_string, \"mftb counter (%s)\",\n               unittime_string (speed_unittime));\n    }\n  else if (have_sgi && sgi_works_p ())\n    {\n      use_sgi = 1;\n      DEFAULT (speed_precision, 10000);\n      speed_unittime = sgi_unittime;\n      sprintf (speed_time_string, \"syssgi() mmap counter (%s), supplemented by millisecond getrusage()\",\n               unittime_string (speed_unittime));\n      /* supplemented with getrusage, which we assume to have 1ms resolution */\n      use_grus = 1;\n      supplement_unittime = 1e-3;\n    }\n  else if (have_rrt)\n    {\n      timebasestruct_t  t;\n      use_rrt = 1;\n      DEFAULT (speed_precision, 10000);\n      read_real_time (&t, sizeof(t));\n      switch (t.flag) {\n      case RTC_POWER:\n        /* FIXME: What's the actual RTC resolution? */\n        speed_unittime = 1e-7;\n        strcpy (speed_time_string, \"read_real_time() power nanoseconds\");\n        break;\n      case RTC_POWER_PC:\n        t.tb_high = 1;\n        t.tb_low = 0;\n        time_base_to_time (&t, sizeof(t));\n        speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;\n        sprintf (speed_time_string, \"%s read_real_time() powerpc ticks\",\n                 unittime_string (speed_unittime));\n        break;\n      default:\n        fprintf (stderr, \"ERROR: Unrecognised timebasestruct_t flag=%d\\n\",\n                 t.flag);\n        abort ();\n      }\n    }\n  else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)\n    {\n      /* use clock_gettime if microsecond or better resolution */\n    choose_cgt:\n      use_cgt = 1;\n      speed_unittime = cgt_unittime;\n      DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));\n      strcpy (speed_time_string, \"microsecond accurate getrusage()\");\n    }\n  else if (have_times && clk_tck() > 1000000)\n    {\n      /* Cray vector systems have times() which is clock cycle resolution\n         (eg. 450 MHz).  */\n      DEFAULT (speed_precision, 10000);\n      goto choose_times;\n    }\n  else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())\n    {\n      use_grus = 1;\n      speed_unittime = grus_unittime = 1.0e-6;\n      DEFAULT (speed_precision, 1000);\n      strcpy (speed_time_string, \"microsecond accurate getrusage()\");\n    }\n  else if (have_gtod && gettimeofday_microseconds_p())\n    {\n      use_gtod = 1;\n      speed_unittime = gtod_unittime = 1.0e-6;\n      DEFAULT (speed_precision, 1000);\n      strcpy (speed_time_string, \"microsecond accurate gettimeofday()\");\n    }\n  else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())\n    {\n      /* use clock_gettime if 1 tick or better resolution */\n      goto choose_cgt;\n    }\n  else if (have_times)\n    {\n      use_tick_boundary = 1;\n      DEFAULT (speed_precision, 200);\n    choose_times:\n      use_times = 1;\n      speed_unittime = times_unittime = 1.0 / (double) clk_tck ();\n      sprintf (speed_time_string, \"%s clock tick times()\",\n               unittime_string (speed_unittime));\n    }\n  else if (have_grus)\n    {\n      use_grus = 1;\n      use_tick_boundary = 1;\n      speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();\n      DEFAULT (speed_precision, 200);\n      sprintf (speed_time_string, \"%s clock tick getrusage()\\n\",\n               unittime_string (speed_unittime));\n    }\n  else if (have_gtod)\n    {\n      use_gtod = 1;\n      use_tick_boundary = 1;\n      speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();\n      DEFAULT (speed_precision, 200);\n      sprintf (speed_time_string, \"%s clock tick gettimeofday()\",\n               unittime_string (speed_unittime));\n    }\n  else\n    {\n      fprintf (stderr, \"No time measuring method available\\n\");\n      fprintf (stderr, \"None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\\n\");\n      abort ();\n    }\n\n  if (speed_option_verbose)\n    {\n      printf (\"speed_time_init: %s\\n\", speed_time_string);\n      printf (\"    speed_precision     %d\\n\", speed_precision);\n      printf (\"    speed_unittime      %.2g\\n\", speed_unittime);\n      if (supplement_unittime)\n        printf (\"    supplement_unittime %.2g\\n\", supplement_unittime);\n      printf (\"    use_tick_boundary   %d\\n\", use_tick_boundary);\n      if (have_cycles)\n        printf (\"    cycles_limit        %.2g seconds\\n\", cycles_limit);\n    }\n}\n\n\n\n/* Burn up CPU until a clock tick boundary, for greater accuracy.  Set the\n   corresponding \"start_foo\" appropriately too. */\n\nvoid\ngrus_tick_boundary (void)\n{\n  struct_rusage  prev;\n  getrusage (0, &prev);\n  do {\n    getrusage (0, &start_grus);\n  } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);\n}\n\nvoid\ngtod_tick_boundary (void)\n{\n  struct_timeval  prev;\n  gettimeofday (&prev, NULL);\n  do {\n    gettimeofday (&start_gtod, NULL);\n  } while (start_gtod.tv_usec == prev.tv_usec);\n}\n\nvoid\ntimes_tick_boundary (void)\n{\n  struct_tms  prev;\n  times (&prev);\n  do\n    times (&start_times);\n  while (start_times.tms_utime == prev.tms_utime);\n}\n\n\n/* \"have_\" values are tested to let unused code go dead.  */\n\nvoid\nspeed_starttime (void)\n{\n  speed_time_init ();\n\n  if (have_grus && use_grus)\n    {\n      if (use_tick_boundary)\n        grus_tick_boundary ();\n      else\n        getrusage (0, &start_grus);\n    }\n\n  if (have_gtod && use_gtod)\n    {\n      if (use_tick_boundary)\n        gtod_tick_boundary ();\n      else\n        gettimeofday (&start_gtod, NULL);\n    }\n\n  if (have_times && use_times)\n    {\n      if (use_tick_boundary)\n        times_tick_boundary ();\n      else\n        times (&start_times);\n    }\n\n  if (have_cgt && use_cgt)\n    clock_gettime (CGT_ID, &start_cgt);\n\n  if (have_rrt && use_rrt)\n    read_real_time (&start_rrt, sizeof(start_rrt));\n\n  if (have_sgi && use_sgi)\n    start_sgi = *sgi_addr;\n\n  if (have_mftb && use_mftb)\n    MFTB (start_mftb);\n\n  if (have_stck && use_stck)\n    STCK (start_stck);\n\n  /* Cycles sampled last for maximum accuracy. */\n  if (have_cycles && use_cycles)\n    speed_cyclecounter (start_cycles);\n}\n\n\n/* Calculate the difference between two cycle counter samples, as a \"double\"\n   counter of cycles.\n\n   The start and end values are allowed to cancel in integers in case the\n   counter values are bigger than the 53 bits that normally fit in a double.\n\n   This works even if speed_cyclecounter() puts a value bigger than 32-bits\n   in the low word (the high word always gets a 2**32 multiplier though). */\n\ndouble\nspeed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])\n{\n  unsigned  d;\n  double    t;\n\n  if (have_cycles == 1)\n    {\n      t = (end[0] - start[0]);\n    }\n  else\n    {\n      d = end[0] - start[0];\n      t = d - (d > end[0] ? M_2POWU : 0.0);\n      t += (end[1] - start[1]) * M_2POW32;\n    }\n  return t;\n}\n\n\ndouble\nspeed_mftb_diff (const unsigned end[2], const unsigned start[2])\n{\n  unsigned  d;\n  double    t;\n\n  d = end[0] - start[0];\n  t = (double) d - (d > end[0] ? M_2POW32 : 0.0);\n  t += (end[1] - start[1]) * M_2POW32;\n  return t;\n}\n\n\n/* Calculate the difference between \"start\" and \"end\" using fields \"sec\" and\n   \"psec\", where each \"psec\" is a \"punit\" of a second.\n\n   The seconds parts are allowed to cancel before being combined with the\n   psec parts, in case a simple \"sec+psec*punit\" exceeds the precision of a\n   double.\n\n   Total time is only calculated in a \"double\" since an integer count of\n   psecs might overflow.  2^32 microseconds is only a bit over an hour, or\n   2^32 nanoseconds only about 4 seconds.\n\n   The casts to \"long\" are for the beneifit of timebasestruct_t, where the\n   fields are only \"unsigned int\", but we want a signed difference.  */\n\n#define DIFF_SECS_ROUTINE(sec, psec, punit)                     \\\n  {                                                             \\\n    long  sec_diff, psec_diff;                                  \\\n    sec_diff = (long) end->sec - (long) start->sec;             \\\n    psec_diff = (long) end->psec - (long) start->psec;          \\\n    return (double) sec_diff + punit * (double) psec_diff;      \\\n  }\n\ndouble\ntimeval_diff_secs (const struct_timeval *end, const struct_timeval *start)\n{\n  DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);\n}\n\ndouble\nrusage_diff_secs (const struct_rusage *end, const struct_rusage *start)\n{\n  DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);\n}\n\ndouble\ntimespec_diff_secs (const struct_timespec *end, const struct_timespec *start)\n{\n  DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);\n}\n\n/* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */\ndouble\ntimebasestruct_diff_secs (const timebasestruct_t *end,\n                          const timebasestruct_t *start)\n{\n  DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);\n}\n\n\ndouble\nspeed_endtime (void)\n{\n#define END_USE(name,value)                             \\\n  do {                                                  \\\n    if (speed_option_verbose >= 3)                      \\\n      printf (\"speed_endtime(): used %s\\n\", name);      \\\n    result = value;                                     \\\n    goto done;                                          \\\n  } while (0)\n\n#define END_ENOUGH(name,value)                                          \\\n  do {                                                                  \\\n    if (speed_option_verbose >= 3)                                      \\\n      printf (\"speed_endtime(): %s gives enough precision\\n\", name);    \\\n    result = value;                                                     \\\n    goto done;                                                          \\\n  } while (0)\n\n#define END_EXCEED(name,value)                                            \\\n  do {                                                                    \\\n    if (speed_option_verbose >= 3)                                        \\\n      printf (\"speed_endtime(): cycle counter limit exceeded, used %s\\n\", \\\n              name);                                                      \\\n    result = value;                                                       \\\n    goto done;                                                            \\\n  } while (0)\n\n  unsigned          end_cycles[2];\n  stck_t            end_stck;\n  unsigned          end_mftb[2];\n  unsigned          end_sgi;\n  timebasestruct_t  end_rrt;\n  struct_timespec   end_cgt;\n  struct_timeval    end_gtod;\n  struct_rusage     end_grus;\n  struct_tms        end_times;\n  double            t_gtod, t_grus, t_times, t_cgt;\n  double            t_rrt, t_sgi, t_mftb, t_stck, t_cycles;\n  double            result;\n\n  /* Cycles sampled first for maximum accuracy.\n     \"have_\" values tested to let unused code go dead.  */\n\n  if (have_cycles && use_cycles)  speed_cyclecounter (end_cycles);\n  if (have_stck   && use_stck)    STCK (end_stck);\n  if (have_mftb   && use_mftb)    MFTB (end_mftb);\n  if (have_sgi    && use_sgi)     end_sgi = *sgi_addr;\n  if (have_rrt    && use_rrt)     read_real_time (&end_rrt, sizeof(end_rrt));\n  if (have_cgt    && use_cgt)     clock_gettime (CGT_ID, &end_cgt);\n  if (have_gtod   && use_gtod)    gettimeofday (&end_gtod, NULL);\n  if (have_grus   && use_grus)    getrusage (0, &end_grus);\n  if (have_times  && use_times)   times (&end_times);\n\n  result = -1.0;\n\n  if (speed_option_verbose >= 4)\n    {\n      printf (\"speed_endtime():\\n\");\n      if (use_cycles)\n        printf (\"   cycles  0x%X,0x%X -> 0x%X,0x%X\\n\",\n                start_cycles[1], start_cycles[0],\n                end_cycles[1], end_cycles[0]);\n\n      if (use_stck)\n        printf (\"   stck  0x%lX -> 0x%lX\\n\", start_stck, end_stck);\n\n      if (use_mftb)\n        printf (\"   mftb  0x%X,%08X -> 0x%X,%08X\\n\",\n                start_mftb[1], start_mftb[0],\n                end_mftb[1], end_mftb[0]);\n\n      if (use_sgi)\n        printf (\"   sgi  0x%X -> 0x%X\\n\", start_sgi, end_sgi);\n\n      if (use_rrt)\n        printf (\"   read_real_time  (%d)%u,%u -> (%d)%u,%u\\n\",\n                start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,\n                end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);\n\n      if (use_cgt)\n        printf (\"   clock_gettime  %ld.%09ld -> %ld.%09ld\\n\",\n                start_cgt.tv_sec, start_cgt.tv_nsec,\n                end_cgt.tv_sec, end_cgt.tv_nsec);\n\n      if (use_gtod)\n        printf (\"   gettimeofday  %ld.%06ld -> %ld.%06ld\\n\",\n                start_gtod.tv_sec, start_gtod.tv_usec,\n                end_gtod.tv_sec, end_gtod.tv_usec);\n\n      if (use_grus)\n        printf (\"   getrusage  %ld.%06ld -> %ld.%06ld\\n\",\n                start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec,\n                end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec);\n\n      if (use_times)\n        printf (\"   times  %ld -> %ld\\n\",\n                start_times.tms_utime, end_times.tms_utime);\n    }\n\n  if (use_rrt)\n    {\n      time_base_to_time (&start_rrt, sizeof(start_rrt));\n      time_base_to_time (&end_rrt, sizeof(end_rrt));\n      t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);\n      END_USE (\"read_real_time()\", t_rrt);\n    }\n\n  if (use_cgt)\n    {\n      t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);\n      END_USE (\"clock_gettime()\", t_cgt);\n    }\n\n  if (use_grus)\n    {\n      t_grus = rusage_diff_secs (&end_grus, &start_grus);\n\n      /* Use getrusage() if the cycle counter limit would be exceeded, or if\n         it provides enough accuracy already. */\n      if (use_cycles)\n        {\n          if (t_grus >= speed_precision*grus_unittime)\n            END_ENOUGH (\"getrusage()\", t_grus);\n          if (t_grus >= cycles_limit)\n            END_EXCEED (\"getrusage()\", t_grus);\n        }\n    }\n\n  if (use_times)\n    {\n      t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;\n\n      /* Use times() if the cycle counter limit would be exceeded, or if\n         it provides enough accuracy already. */\n      if (use_cycles)\n        {\n          if (t_times >= speed_precision*times_unittime)\n            END_ENOUGH (\"times()\", t_times);\n          if (t_times >= cycles_limit)\n            END_EXCEED (\"times()\", t_times);\n        }\n    }\n\n  if (use_gtod)\n    {\n      t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);\n\n      /* Use gettimeofday() if it measured a value bigger than the cycle\n         counter can handle.  */\n      if (use_cycles)\n        {\n          if (t_gtod >= cycles_limit)\n            END_EXCEED (\"gettimeofday()\", t_gtod);\n        }\n    }\n\n  if (use_mftb)\n    {\n      t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime;\n      END_USE (\"mftb\", t_mftb);\n    }\n\n  if (use_stck)\n    {\n      t_stck = (end_stck - start_stck) * STCK_PERIOD;\n      END_USE (\"stck\", t_stck);\n    }\n\n  if (use_sgi)\n    {\n      t_sgi = (end_sgi - start_sgi) * sgi_unittime;\n      END_USE (\"SGI hardware counter\", t_sgi);\n    }\n\n  if (use_cycles)\n    {\n      t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)\n        * speed_cycletime;\n      END_USE (\"cycle counter\", t_cycles);\n    }\n\n  if (use_grus && getrusage_microseconds_p())\n    END_USE (\"getrusage()\", t_grus);\n\n  if (use_gtod && gettimeofday_microseconds_p())\n    END_USE (\"gettimeofday()\", t_gtod);\n\n  if (use_times)  END_USE (\"times()\",        t_times);\n  if (use_grus)   END_USE (\"getrusage()\",    t_grus);\n  if (use_gtod)   END_USE (\"gettimeofday()\", t_gtod);\n\n  fprintf (stderr, \"speed_endtime(): oops, no time method available\\n\");\n  abort ();\n\n done:\n  if (result < 0.0)\n    {\n      if (speed_option_verbose >= 2)\n        fprintf (stderr, \"speed_endtime(): warning, treating negative time as zero: %.9f\\n\", result);\n      result = 0.0;\n    }\n  return result;\n}\n"
  },
  {
    "path": "tune/tuneup.c",
    "content": "/* Create tuned thresholds for various algorithms.\n\nCopyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010,\n2011, 2012 Free Software Foundation, Inc.\n\nThis file is part of the GNU MP Library.\n\nThe GNU MP Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 3 of the License, or (at your\noption) any later version.\n\nThe GNU MP Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  \n\n*/\n\n\n/* Usage: tuneup [-t] [-t] [-p precision]\n\n   -t turns on some diagnostic traces, a second -t turns on more traces.\n\n   Algorithm:\n\n   The thresholds are determined as follows: two algorithms A and B are \n   compared over a range of sizes. At each point, we define \"badness\" to\n   be the percentage time lost if algorithm B is chosen over algorithm A.\n   Then total badness is the sum of this over all sizes measured.  The \n   threshold is set to minimize total badness.\n\n   In practice the thresholds tend to be chosen to bring on algorithm B \n   fairly quickly.\n\n   Implementation:\n\n   In a normal library build the thresholds are constants.  To tune them\n   selected objects are recompiled with the thresholds as global variables\n   instead.  #define TUNE_PROGRAM_BUILD does this, with help from code at\n   the end of gmp-impl.h, and rules in tune/Makefile.am.\n\n   MUL_KARATSUBA_THRESHOLD for example uses a recompiled mpn_mul_n.  The\n   threshold is set to \"size+1\" to avoid karatsuba, or to \"size\" to use one\n   level, but recurse into the basecase.\n\n   MUL_TOOM3_THRESHOLD makes use of the tuned MUL_KARATSUBA_THRESHOLD value.\n   Other routines in turn will make use of both of those.  Naturally the\n   dependants must be tuned first.\n\n   In a couple of cases, like DIVEXACT_1_THRESHOLD, there's no recompiling,\n   just a threshold based on comparing two routines (mpn_divrem_1 and\n   mpn_divexact_1), and no further use of the value determined.\n\n   Flags like USE_PREINV_MOD_1 or JACOBI_BASE_METHOD are even simpler, being\n   just comparisons between certain routines on representative data.\n\n   Shortcuts are applied when native (assembler) versions of routines exist.\n   For instance a native mpn_sqr_basecase is assumed to be always faster\n   than mpn_mul_basecase, with no measuring.\n\n   No attempt is made to tune within assembler routines, for instance\n   DIVREM_1_NORM_THRESHOLD.  An assembler mpn_divrem_1 is expected to be\n   written and tuned all by hand.  Assembler routines that might have hard\n   limits are recompiled though, to make them accept a bigger range of sizes\n   than normal, eg. mpn_sqr_basecase to compare against mpn_kara_sqr_n.\n\n   Code: \n     - main : checks for various command line options and calls all()\n     - all : prints the tuneup message, date and compiler, then calls\n             each of the individual tuning functions in turn, e.g. \n             tune_mul()\n     - tune_blah() : tunes function of type blah, e.g. tune_mul() tunes the\n             karatsuba and toom cutoffs. It sets up a param struct with the\n             following parameters:\n               a) name : the name of the threshold being tuned, e.g.\n                  MUL_TOOM3_THRESHOLD\n               b) function : the first function being compared (this must be\n                  of the form speed_blah and the function speed_blah will \n                  exist in speed.h and speed.c\n               c) function2 : the second function being compared (if set to\n                  NULL, this is automatically set to equal function\n               d) step_factor : the size of the step between sizes, \n                  set to 0.01 by default, i.e. 1% increments\n               e) function_fudge : multiplier for the speed of function, used\n                  to adjust for overheads, by default set to 1.0\n               f) stop_since_change is a stop condition. If the threshold\n                  has not changed for this many iterations, then stop. This\n                  is set to 80 iterations by default.\n               g) stop_factor : this is another stop factor. If method B \n                  becomes faster by at least this factor, then stop. By\n                  default this is set to 1.2, i.e. 20% faster.\n               h) min_size : the minimum size to start comparing from.\n               i) min_is_always : if this is set to 1, then if the threshold\n                  just ends up being min_size, then the threshold is actually\n                  set to 0, i.e. algorithm B is always used.\n               j) max_size : the maximum size to compare up to. By default this\n                  is set to DEFAULT_MAX_SIZE which is 1000 limbs.\n               h) check_size : if set, will check that the given starting size\n                  is valid for both algorithms and that algorithm B is at least\n                  4% slower than algorithm A at that point.\n               i) size_extra : this is a bias added to each size when doing \n                  measurements. It is subtracted off after each measurement. \n                  It is basically used for shifting a threshold from the\n                  measured value.\n               j) data_high : if set to 1, the high limb of xp and yp are set to\n                  be less than s->r, if set to 2, the high limb of xp and yp are\n                  set to be greater than or equal to s->r\n               k) noprint : if set, the threshold is computed but not printed.\n             \n             After setting all the appropriate parameters, the function one() is\n             called. It takes a reference to a parameter, e.g. mul_toom3_threshold\n             which is defined in a table below. That threshold will have been given\n             some initial value (usually MP_SIZE_T_MAX) in the table. It also takes\n             a reference to the param struct.\n        - one() : does repeated timings over the given range of sizes always setting\n            the threshold to size+1 for function and size for function2.\n        \n        N.B: the functions that need to be rebuilt to use variable thresholds must be\n        added to the Makefile.am file (and automake run) before tune can work.\n\n*/\n\n#define TUNE_PROGRAM_BUILD  1   /* for gmp-impl.h */\n\n#include \"config.h\"\n\n#include <math.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <time.h>\n#if HAVE_UNISTD_H\n#include <unistd.h>\n#endif\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n#include \"longlong.h\"\n\n#include \"tests.h\"\n#include \"speed.h\"\n\n#if !HAVE_DECL_OPTARG\nextern char *optarg;\nextern int optind, opterr;\n#endif\n\n\n#define DEFAULT_MAX_SIZE   1000  /* limbs */\n\n#if WANT_FFT\nmp_size_t  option_fft_max_size = 50000;  /* limbs */\n#else\nmp_size_t  option_fft_max_size = 0;\n#endif\nint        option_trace = 0;\nint        option_fft_trace = 0;\nstruct speed_params  s;\n\nstruct dat_t {\n  mp_size_t  size;\n  double     d;\n} *dat = NULL;\nint  ndat = 0;\nint  allocdat = 0;\n\n/* This is not defined if mpn_sqr_basecase doesn't declare a limit.  In that\n   case use zero here, which for params.max_size means no limit.  */\n#ifndef TUNE_SQR_KARATSUBA_MAX\n#define TUNE_SQR_KARATSUBA_MAX  0\n#endif\n\nmp_size_t  mul_karatsuba_threshold      = MP_SIZE_T_MAX;\nmp_size_t  mul_toom3_threshold          = MUL_TOOM3_THRESHOLD_LIMIT;\nmp_size_t  mul_toom4_threshold          = MUL_TOOM4_THRESHOLD_LIMIT;\nmp_size_t  mul_toom8h_threshold         = MUL_TOOM8H_THRESHOLD_LIMIT;\nmp_size_t  mul_fft_full_threshold       = MP_SIZE_T_MAX;\nmp_size_t  sqr_basecase_threshold       = MP_SIZE_T_MAX;\nmp_size_t  sqr_karatsuba_threshold\n  = (TUNE_SQR_KARATSUBA_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_KARATSUBA_MAX);\nmp_size_t  sqr_toom3_threshold          = SQR_TOOM3_THRESHOLD_LIMIT;\nmp_size_t  sqr_toom4_threshold          = SQR_TOOM4_THRESHOLD_LIMIT;\nmp_size_t  sqr_toom8_threshold          = SQR_TOOM8_THRESHOLD_LIMIT;\nmp_size_t  sqr_fft_full_threshold       = MP_SIZE_T_MAX;\nmp_size_t  mulmod_2expm1_threshold\t= MP_SIZE_T_MAX;\nmp_size_t  mullow_basecase_threshold    = MP_SIZE_T_MAX;\nmp_size_t  mullow_dc_threshold          = MP_SIZE_T_MAX;\nmp_size_t  mullow_mul_threshold         = MP_SIZE_T_MAX;\nmp_size_t  mulmid_toom42_threshold      = MP_SIZE_T_MAX;\nmp_size_t  mulhigh_basecase_threshold   = MP_SIZE_T_MAX;\nmp_size_t  mulhigh_dc_threshold         = MP_SIZE_T_MAX;\nmp_size_t  mulhigh_mul_threshold        = MP_SIZE_T_MAX;\nmp_size_t  div_sb_preinv_threshold      = MP_SIZE_T_MAX;\nmp_size_t  dc_div_qr_threshold          = MP_SIZE_T_MAX;\nmp_size_t  inv_div_qr_threshold         = MP_SIZE_T_MAX;\nmp_size_t  inv_divappr_q_n_threshold    = MP_SIZE_T_MAX;\nmp_size_t  dc_div_q_threshold           = MP_SIZE_T_MAX;\nmp_size_t  inv_div_q_threshold          = MP_SIZE_T_MAX;\nmp_size_t  dc_divappr_q_threshold       = MP_SIZE_T_MAX;\nmp_size_t  inv_divappr_q_threshold      = MP_SIZE_T_MAX;\nmp_size_t  dc_bdiv_qr_threshold         = MP_SIZE_T_MAX;\nmp_size_t  dc_bdiv_q_threshold          = MP_SIZE_T_MAX;\nmp_size_t  binv_newton_threshold        = MP_SIZE_T_MAX;\nmp_size_t  redc_1_to_redc_2_threshold   = MP_SIZE_T_MAX;\nmp_size_t  redc_1_to_redc_n_threshold   = MP_SIZE_T_MAX;\nmp_size_t  redc_2_to_redc_n_threshold   = MP_SIZE_T_MAX;\nmp_size_t  matrix22_strassen_threshold  = MP_SIZE_T_MAX;\nmp_size_t  hgcd_threshold               = MP_SIZE_T_MAX;\nmp_size_t  hgcd_appr_threshold          = MP_SIZE_T_MAX;\nmp_size_t  hgcd_reduce_threshold        = MP_SIZE_T_MAX;\nmp_size_t  gcd_dc_threshold             = MP_SIZE_T_MAX;\nmp_size_t  gcdext_dc_threshold          = MP_SIZE_T_MAX;\nmp_size_t  divrem_1_norm_threshold      = MP_SIZE_T_MAX;\nmp_size_t  divrem_1_unnorm_threshold    = MP_SIZE_T_MAX;\nmp_size_t  mod_1_norm_threshold         = MP_SIZE_T_MAX;\nmp_size_t  mod_1_1_threshold            = MP_SIZE_T_MAX;\nmp_size_t  mod_1_2_threshold            = MP_SIZE_T_MAX;\nmp_size_t  mod_1_3_threshold            = MP_SIZE_T_MAX;\nmp_size_t  mod_1_unnorm_threshold       = MP_SIZE_T_MAX;\nmp_size_t  divrem_2_threshold           = MP_SIZE_T_MAX;\nmp_size_t  get_str_dc_threshold         = MP_SIZE_T_MAX;\nmp_size_t  get_str_precompute_threshold = MP_SIZE_T_MAX;\nmp_size_t  set_str_dc_threshold         = MP_SIZE_T_MAX;\nmp_size_t  set_str_precompute_threshold = MP_SIZE_T_MAX;\nmp_size_t  rootrem_threshold            = MP_SIZE_T_MAX;\nmp_size_t  divrem_hensel_qr_1_threshold = MP_SIZE_T_MAX;\nmp_size_t  rsh_divrem_hensel_qr_1_threshold = MP_SIZE_T_MAX;\nmp_size_t  divrem_euclid_hensel_threshold = MP_SIZE_T_MAX;\nmp_size_t  fac_odd_threshold            = 0;\nmp_size_t  fac_dsc_threshold            = FAC_DSC_THRESHOLD_LIMIT;\n\nstruct param_t {\n  const char        *name;\n  speed_function_t  function;\n  speed_function_t  function2;\n  double            step_factor;    /* how much to step sizes (rounded down) */\n  double            function_fudge; /* multiplier for \"function\" speeds */\n  int               stop_since_change;\n  double            stop_factor;\n  mp_size_t         min_size;\n  int               min_is_always;\n  mp_size_t         max_size;\n  mp_size_t         check_size;\n  mp_size_t         size_extra;\n\n#define DATA_HIGH_LT_R  1\n#define DATA_HIGH_GE_R  2\n  int               data_high;\n\n  int               noprint;\n};\n\n\n/* These are normally undefined when false, which suits \"#if\" fine.\n   But give them zero values so they can be used in plain C \"if\"s.  */\n#ifndef UDIV_PREINV_ALWAYS\n#define UDIV_PREINV_ALWAYS 0\n#endif\n#ifndef HAVE_NATIVE_mpn_divexact_1\n#define HAVE_NATIVE_mpn_divexact_1 0\n#endif\n#ifndef HAVE_NATIVE_mpn_divrem_1\n#define HAVE_NATIVE_mpn_divrem_1 0\n#endif\n#ifndef HAVE_NATIVE_mpn_divrem_2\n#define HAVE_NATIVE_mpn_divrem_2 0\n#endif\n#ifndef HAVE_NATIVE_mpn_mod_1\n#define HAVE_NATIVE_mpn_mod_1 0\n#endif\n#ifndef HAVE_NATIVE_mpn_modexact_1_odd\n#define HAVE_NATIVE_mpn_modexact_1_odd 0\n#endif\n#ifndef HAVE_NATIVE_mpn_preinv_divrem_1\n#define HAVE_NATIVE_mpn_preinv_divrem_1 0\n#endif\n#ifndef HAVE_NATIVE_mpn_preinv_mod_1\n#define HAVE_NATIVE_mpn_preinv_mod_1 0\n#endif\n#ifndef HAVE_NATIVE_mpn_sqr_basecase\n#define HAVE_NATIVE_mpn_sqr_basecase 0\n#endif\n\n\n#define MAX3(a,b,c)  MAX (MAX (a, b), c)\n\nmp_limb_t\nrandlimb_norm (gmp_randstate_t rands)\n{\n  mp_limb_t  n;\n  mpn_randomb (&n,rands, 1);\n  n |= GMP_NUMB_HIGHBIT;\n  return n;\n}\n\n#define GMP_NUMB_HALFMASK  ((CNST_LIMB(1) << (GMP_NUMB_BITS/2)) - 1)\n\nmp_limb_t\nrandlimb_half (gmp_randstate_t rands)\n{\n  mp_limb_t  n;\n  mpn_randomb (&n, rands,1);\n  n &= GMP_NUMB_HALFMASK;\n  n += (n==0);\n  return n;\n}\n\n\n/* Add an entry to the end of the dat[] array, reallocing to make it bigger\n   if necessary.  */\nvoid\nadd_dat (mp_size_t size, double d)\n{\n#define ALLOCDAT_STEP  500\n\n  ASSERT_ALWAYS (ndat <= allocdat);\n\n  if (ndat == allocdat)\n    {\n      dat = (struct dat_t *) __gmp_allocate_or_reallocate\n        (dat, allocdat * sizeof(dat[0]),\n         (allocdat+ALLOCDAT_STEP) * sizeof(dat[0]));\n      allocdat += ALLOCDAT_STEP;\n    }\n\n  dat[ndat].size = size;\n  dat[ndat].d = d;\n  ndat++;\n}\n\n\n/* Return the threshold size based on the data accumulated. */\nmp_size_t\nanalyze_dat (int final)\n{\n  double  x, min_x;\n  int     j, min_j;\n\n  /* If the threshold is set at dat[0].size, any positive values are bad. */\n  x = 0.0;\n  for (j = 0; j < ndat; j++)\n    if (dat[j].d > 0.0)\n      x += dat[j].d;\n\n  if (option_trace >= 2 && final)\n    {\n      printf (\"\\n\");\n      printf (\"x is the sum of the badness from setting thresh at given size\\n\");\n      printf (\"  (minimum x is sought)\\n\");\n      printf (\"size=%ld  first x=%.4f\\n\", (long) dat[j].size, x);\n    }\n\n  min_x = x;\n  min_j = 0;\n\n\n  /* When stepping to the next dat[j].size, positive values are no longer\n     bad (so subtracted), negative values become bad (so add the absolute\n     value, meaning subtract). */\n  for (j = 0; j < ndat; x -= dat[j].d, j++)\n    {\n      if (option_trace >= 2 && final)\n        printf (\"size=%ld  x=%.4f\\n\", (long) dat[j].size, x);\n\n      if (x < min_x)\n        {\n          min_x = x;\n          min_j = j;\n        }\n    }\n\n  return min_j;\n}\n\n\n/* Measuring for recompiled mpn/generic/divrem_1.c and mpn/generic/mod_1.c */\n\nmp_limb_t mpn_divrem_1_tune(mp_ptr qp, mp_size_t xsize, mp_srcptr ap, mp_size_t size, mp_limb_t d);\nmp_limb_t mpn_mod_1_tune(mp_srcptr ap, mp_size_t size, mp_limb_t d);\n\nvoid mpz_fac_ui_tune(mpz_ptr, mpir_ui);\n\ndouble\nspeed_mpn_mod_1_tune (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_tune);\n}\ndouble\nspeed_mpn_divrem_1_tune (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_tune);\n}\n\ndouble\nspeed_mpz_fac_ui_tune (struct speed_params *s)\n{\n  SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui_tune);\n}\n\ndouble\ntuneup_measure (speed_function_t fun,gmp_randstate_t rands,\n                const struct param_t *param,\n                struct speed_params *s)\n{\n  static struct param_t  dummy;\n  double   t;\n  TMP_DECL;\n\n  if (! param)\n    param = &dummy;\n\n  s->size += param->size_extra;\n\n  TMP_MARK;\n  SPEED_TMP_ALLOC_LIMBS (s->xp, s->size, 0);\n  SPEED_TMP_ALLOC_LIMBS (s->yp, s->size, 0);\n\n  mpn_randomb (s->xp, rands, s->size);\n  mpn_randomb (s->yp, rands, s->size);\n\n  switch (param->data_high) {\n  case DATA_HIGH_LT_R:\n    s->xp[s->size-1] %= s->r;\n    s->yp[s->size-1] %= s->r;\n    break;\n  case DATA_HIGH_GE_R:\n    s->xp[s->size-1] |= s->r;\n    s->yp[s->size-1] |= s->r;\n    break;\n  }\n\n  t = speed_measure (fun, s);\n\n  s->size -= param->size_extra;\n\n  TMP_FREE;\n  return t;\n}\n\n\n#define PRINT_WIDTH  28\n\nvoid\nprint_define_start (const char *name)\n{\n  printf (\"#define %-*s  \", PRINT_WIDTH, name);\n  if (option_trace)\n    printf (\"...\\n\");\n}\n\nvoid\nprint_define_end_remark (const char *name, mp_size_t value, const char *remark)\n{\n  if (option_trace)\n    printf (\"#define %-*s  \", PRINT_WIDTH, name);\n\n  if (value == MP_SIZE_T_MAX)\n    printf (\"MP_SIZE_T_MAX\");\n  else\n    printf (\"%5ld\", (long) value);\n\n  if (remark != NULL)\n    printf (\"  /* %s */\", remark);\n  printf (\"\\n\");\n}\n\nvoid\nprint_define_end (const char *name, mp_size_t value)\n{\n  const char  *remark;\n  if (value == MP_SIZE_T_MAX)\n    remark = \"never\";\n  else if (value == 0)\n    remark = \"always\";\n  else\n    remark = NULL;\n  print_define_end_remark (name, value, remark);\n}\n\nvoid\nprint_define (const char *name, mp_size_t value)\n{\n  print_define_start (name);\n  print_define_end (name, value);\n}\n\nvoid\nprint_define_remark (const char *name, mp_size_t value, const char *remark)\n{\n  print_define_start (name);\n  print_define_end_remark (name, value, remark);\n}\n\n\nvoid\none (mp_size_t *threshold, gmp_randstate_t rands,struct param_t *param)\n{\n  int  since_positive, since_thresh_change;\n  int  thresh_idx, new_thresh_idx;\n\n#define DEFAULT(x,n)  do { if (! (x))  (x) = (n); } while (0)\n\n  DEFAULT (param->function_fudge, 1.0);\n  DEFAULT (param->function2, param->function);\n  DEFAULT (param->step_factor, 0.01);  /* small steps by default */\n  DEFAULT (param->stop_since_change, 80);\n  DEFAULT (param->stop_factor, 1.2);\n  DEFAULT (param->min_size, 10);\n  DEFAULT (param->max_size, DEFAULT_MAX_SIZE);\n\n  if (param->check_size != 0)\n    {\n      double   t1, t2;\n      s.size = param->check_size;\n\n      *threshold = s.size+1;\n      t1 = tuneup_measure (param->function, rands,param, &s);\n\n      *threshold = s.size;\n      t2 = tuneup_measure (param->function2, rands, param, &s);\n      if (t1 == -1.0 || t2 == -1.0)\n        {\n          printf (\"Oops, can't run both functions at size %ld\\n\",\n                  (long) s.size);\n          abort ();\n        }\n      t1 *= param->function_fudge;\n\n      /* ask that t2 is at least 4% below t1 */\n      if (t1 < t2*1.04)\n        {\n          if (option_trace)\n            printf (\"function2 never enough faster: t1=%.9f t2=%.9f\\n\", t1, t2);\n          *threshold = MP_SIZE_T_MAX;\n          if (! param->noprint)\n            print_define (param->name, *threshold);\n          return;\n        }\n\n      if (option_trace >= 2)\n        printf (\"function2 enough faster at size=%ld: t1=%.9f t2=%.9f\\n\",\n                (long) s.size, t1, t2);\n    }\n\n  if (! param->noprint || option_trace)\n    print_define_start (param->name);\n\n  ndat = 0;\n  since_positive = 0;\n  since_thresh_change = 0;\n  thresh_idx = 0;\n\n  if (option_trace >= 2)\n    {\n      printf (\"             algorithm-A  algorithm-B   ratio  possible\\n\");\n      printf (\"              (seconds)    (seconds)    diff    thresh\\n\");\n    }\n\n  for (s.size = param->min_size;\n       s.size < param->max_size;\n       s.size += MAX ((mp_size_t) floor (s.size * param->step_factor), 1))\n    {\n      double   ti, tiplus1, d;\n\n      /* If there's a size limit and it's reached then it should still\n         be sensible to analyze the data since we want the threshold put\n         either at or near the limit.  */\n      if (s.size >= param->max_size)\n        {\n          if (option_trace)\n            printf (\"Reached maximum size (%ld) without otherwise stopping\\n\",\n                    (long) param->max_size);\n          break;\n        }\n\n      /*\n        FIXME: check minimum size requirements are met, possibly by just\n        checking for the -1 returns from the speed functions.\n      */\n\n      /* using method A at this size */\n      *threshold = s.size+1;\n      ti = tuneup_measure (param->function, rands,param, &s);\n      if (ti == -1.0)\n        abort ();\n      ti *= param->function_fudge;\n\n      /* using method B at this size */\n      *threshold = s.size;\n      tiplus1 = tuneup_measure (param->function2, rands, param, &s);\n      if (tiplus1 == -1.0)\n        abort ();\n\n      /* Calculate the fraction by which the one or the other routine is\n         slower.  */\n      if (tiplus1 >= ti)\n        d = (tiplus1 - ti) / tiplus1;  /* negative */\n      else\n        d = (tiplus1 - ti) / ti;       /* positive */\n\n      add_dat (s.size, d);\n\n      new_thresh_idx = analyze_dat (0);\n\n      if (option_trace >= 2)\n        printf (\"size=%ld  %.9f  %.9f  % .4f %c  %ld\\n\",\n                (long) s.size, ti, tiplus1, d,\n                ti > tiplus1 ? '#' : ' ',\n                (long) dat[new_thresh_idx].size);\n\n      /* Stop if the last time method i was faster was more than a\n         certain number of measurements ago.  */\n#define STOP_SINCE_POSITIVE  200\n      if (d >= 0)\n        since_positive = 0;\n      else\n        if (++since_positive > STOP_SINCE_POSITIVE)\n          {\n            if (option_trace >= 1)\n              printf (\"stopped due to since_positive (%d)\\n\",\n                      STOP_SINCE_POSITIVE);\n            break;\n          }\n\n      /* Stop if method A has become slower by a certain factor. */\n      if (ti >= tiplus1 * param->stop_factor)\n        {\n          if (option_trace >= 1)\n            printf (\"stopped due to ti >= tiplus1 * factor (%.1f)\\n\",\n                    param->stop_factor);\n          break;\n        }\n\n      /* Stop if the threshold implied hasn't changed in a certain\n         number of measurements.  (It's this condition that ususally\n         stops the loop.) */\n      if (thresh_idx != new_thresh_idx)\n        since_thresh_change = 0, thresh_idx = new_thresh_idx;\n      else\n        if (++since_thresh_change > param->stop_since_change)\n          {\n            if (option_trace >= 1)\n              printf (\"stopped due to since_thresh_change (%d)\\n\",\n                      param->stop_since_change);\n            break;\n          }\n\n      /* Stop if the threshold implied is more than a certain number of\n         measurements ago.  */\n#define STOP_SINCE_AFTER   500\n      if (ndat - thresh_idx > STOP_SINCE_AFTER)\n        {\n          if (option_trace >= 1)\n            printf (\"stopped due to ndat - thresh_idx > amount (%d)\\n\",\n                    STOP_SINCE_AFTER);\n          break;\n        }\n\n      /* Stop when the size limit is reached before the end of the\n         crossover, but only show this as an error for >= the default max\n         size.  FIXME: Maybe should make it a param choice whether this is\n         an error.  */\n      if (s.size >= param->max_size && param->max_size >= DEFAULT_MAX_SIZE)\n        {\n          fprintf (stderr, \"%s\\n\", param->name);\n          fprintf (stderr, \"sizes %ld to %ld total %d measurements\\n\",\n                   (long) dat[0].size, (long) dat[ndat-1].size, ndat);\n          fprintf (stderr, \"    max size reached before end of crossover\\n\");\n          break;\n        }\n    }\n\n  if (option_trace >= 1)\n    printf (\"sizes %ld to %ld total %d measurements\\n\",\n            (long) dat[0].size, (long) dat[ndat-1].size, ndat);\n\n  *threshold = dat[analyze_dat (1)].size;\n\n  if (param->min_is_always)\n    {\n      if (*threshold == param->min_size)\n        *threshold = 0;\n    }\n\n  if (! param->noprint || option_trace)\n    print_define_end (param->name, *threshold);\n}\n\n\n/* Special probing for the fft thresholds.  The size restrictions on the\n   FFTs mean the graph of time vs size has a step effect.  See this for\n   example using\n\n       ./speed -s 4096-16384 -t 128 -P foo mpn_mul_fft.8 mpn_mul_fft.9\n       gnuplot foo.gnuplot\n\n   The current approach is to compare routines at the midpoint of relevant\n   steps.  Arguably a more sophisticated system of threshold data is wanted\n   if this step effect remains. */\n\nstruct fft_param_t {\n  const char        *threshold_name;\n  mp_size_t         *p_threshold;\n  mp_size_t         first_size;\n  mp_size_t         max_size;\n  speed_function_t  function;\n  speed_function_t  mul_function;\n  mp_size_t         sqr;\n};\n\nmp_size_t\nfft_step_size (int size)\n{\n  mp_size_t  step;\n  \n  step = mpir_fft_adjust_limbs(size + 1) - size;\n\n  if (step <= 0)\n    {\n      printf (\"Can't handle size=%d\\n\", size);\n      abort ();\n    }\n\n  return step;\n}\n\nvoid\nfft (struct fft_param_t *p,gmp_randstate_t rands)\n{\n  mp_size_t  size;\n  int        i, k;\n\n  *p->p_threshold = MP_SIZE_T_MAX;\n  \n  option_trace = MAX (option_trace, option_fft_trace);\n\n  size = p->first_size;\n\n  /* Declare an FFT faster than a plain toom3 etc multiplication found as\n     soon as one faster measurement obtained.  A multiplication in the\n     middle of the FFT step is tested.  */\n  for (;;)\n    {\n      double  tk, tm;\n\n      size = mpir_fft_adjust_limbs (size+1);\n      \n      if (size >= p->max_size)\n        break;\n\n      s.size = size + fft_step_size (size) / 2;\n      \n      tk = tuneup_measure (p->function, rands, NULL, &s);\n      if (tk == -1.0)\n        abort ();\n\n      tm = tuneup_measure (p->mul_function, rands, NULL, &s);\n      if (tm == -1.0)\n        abort ();\n\n      if (option_trace >= 2)\n        printf (\"at %ld   size=%ld   %.9f   size=%ld %s mul %.9f\\n\",\n                (long) size,\n                (long) size + fft_step_size (size) / 2, tk,\n                (long) s.size, \"full\", tm);\n\n      if (tk < tm)\n        {\n            *p->p_threshold = s.size;\n            print_define (p->threshold_name,      *p->p_threshold);\n            break;\n        }\n    }\n\n}\n\n/* Start karatsuba from 4, since the Cray t90 ieee code is much faster at 2,\n   giving wrong results.  */\nvoid\ntune_mul (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n\n  param.function = speed_mpn_mul_n;\n\n  param.name = \"MUL_KARATSUBA_THRESHOLD\";\n  param.min_size = MAX (4, MPN_KARA_MUL_N_MINSIZE);\n  param.max_size = MUL_KARATSUBA_THRESHOLD_LIMIT-1;\n  one (&mul_karatsuba_threshold, rands,&param);\n\n  param.name = \"MUL_TOOM3_THRESHOLD\";\n  param.min_size = MAX (mul_karatsuba_threshold, MPN_TOOM3_MUL_N_MINSIZE);\n  param.max_size = MUL_TOOM3_THRESHOLD_LIMIT-1;\n  one (&mul_toom3_threshold, rands, &param);\n\n  param.name = \"MUL_TOOM4_THRESHOLD\";\n  param.min_size = MAX (mul_toom3_threshold, MPN_TOOM4_MUL_N_MINSIZE);\n  param.max_size = MUL_TOOM4_THRESHOLD_LIMIT-1;\n  one (&mul_toom4_threshold, rands, &param);\n\n  param.name = \"MUL_TOOM8H_THRESHOLD\";\n  param.min_size = MAX (mul_toom4_threshold, MPN_TOOM8H_MUL_MINSIZE);\n  param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1;\n  one (&mul_toom8h_threshold, rands, &param);\n\n  /* disabled until tuned */\n  MUL_FFT_FULL_THRESHOLD = MP_SIZE_T_MAX;\n}\n\n\n/* This was written by the tuneup challenged tege.  Kevin, please delete\n   this comment when you've reviewed/rewritten this.  :-) */\nvoid\ntune_mullow (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n\n  param.function = speed_mpn_mullow_n;\n\n  param.name = \"MULLOW_BASECASE_THRESHOLD\";\n  param.min_size = 3;\n  param.min_is_always = 1;\n  //param.max_size = MULLOW_BASECASE_THRESHOLD_LIMIT-1;\n  one (&mullow_basecase_threshold, rands, &param);\n\n  param.min_is_always = 0;\t/* ??? */\n\n  param.name = \"MULLOW_DC_THRESHOLD\";\n  param.min_size = mullow_basecase_threshold;\n  param.max_size = 1000;\n  one (&mullow_dc_threshold, rands, &param);\n\n  param.name = \"MULLOW_MUL_THRESHOLD\";\n  param.min_size = mullow_dc_threshold;\n  param.max_size = 10000;\n  one (&mullow_mul_threshold, rands, &param);\n\n  /* disabled until tuned */\n  MUL_FFT_FULL_THRESHOLD = MP_SIZE_T_MAX;\n}\n\nvoid\ntune_mulmid (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n\n  param.name = \"MULMID_TOOM42_THRESHOLD\";\n  param.function = speed_mpn_mulmid_n;\n  param.min_size = 4;\n  param.max_size = 100;\n  one (&mulmid_toom42_threshold, rands, &param);\n\n  /* disabled until tuned */\n  MUL_FFT_FULL_THRESHOLD = MP_SIZE_T_MAX;\n}\n\nvoid\ntune_mulmod_2expm1 (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  param.function = speed_mpn_mulmod_2expm1;\n  param.name = \"MULMOD_2EXPM1_THRESHOLD\";\n  param.min_size = 1;\n  //param.max_size =  ?? ;\n  one (&mulmod_2expm1_threshold, rands, &param);\n  /* disabled until tuned */\n  MUL_FFT_FULL_THRESHOLD = MP_SIZE_T_MAX;    // ??????????????\n}\n\nvoid\ntune_mulhigh (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n\n  param.function = speed_mpn_mulhigh_n;\n\n  param.name = \"MULHIGH_BASECASE_THRESHOLD\";\n  param.min_size = 3;\n  param.min_is_always = 3;\n  //param.max_size = MULHIGH_BASECASE_THRESHOLD_LIMIT-1;\n  one (&mulhigh_basecase_threshold, rands, &param);\n\n  param.min_is_always = 0;\t/* ??? */\n\n  param.name = \"MULHIGH_DC_THRESHOLD\";\n  param.min_size = MAX(mulhigh_basecase_threshold,4);\n  param.max_size = 1000;\n  one (&mulhigh_dc_threshold, rands, &param);\n\n  param.name = \"MULHIGH_MUL_THRESHOLD\";\n  param.min_size = mulhigh_dc_threshold;\n  param.max_size = 10000;\n  one (&mulhigh_mul_threshold, rands, &param);\n\n  /* disabled until tuned */\n  MUL_FFT_FULL_THRESHOLD = MP_SIZE_T_MAX;\n}\n\nvoid\ntune_rootrem (gmp_randstate_t rands)\n{\n\n  static struct param_t  param;\n  s.r=5;   // tune for 5th root\n  param.function = speed_mpn_rootrem;\n  param.name = \"ROOTREM_THRESHOLD\";\n  param.min_size = 1;\n  one (&rootrem_threshold, rands, &param);\n}\n\nvoid\ntune_divrem_hensel_qr_1 (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  param.function = speed_mpn_divrem_hensel_qr_1;\n  param.name = \"DIVREM_HENSEL_QR_1_THRESHOLD\";\n  param.min_size = 2;\n  one (&divrem_hensel_qr_1_threshold, rands, &param);\n}\n\nvoid\ntune_rsh_divrem_hensel_qr_1 (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  param.function = speed_mpn_rsh_divrem_hensel_qr_1;\n  param.name = \"RSH_DIVREM_HENSEL_QR_1_THRESHOLD\";\n  param.min_size = 3;\n  one (&rsh_divrem_hensel_qr_1_threshold, rands, &param);\n}\n\nvoid\ntune_divrem_euclid_hensel (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  param.function = speed_mpn_divrem_1;\n  param.name = \"DIVREM_EUCLID_HENSEL_THRESHOLD\";\n  param.min_size = 8;\n  s.r=0x81234567;// tune for this divisor\n  one (&divrem_euclid_hensel_threshold, rands, &param);\n}\n\n// for tuning  we dont care if the divisors go out of range as it doesn't affect the runtime\nvoid tune_mod_1_k (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n\n  param.function = speed_mpn_divrem_euclidean_r_1;\n\n  param.name = \"MOD_1_1_THRESHOLD\";\n  param.min_size = 3;\n  one (&mod_1_1_threshold, rands, &param);\n\n  param.name = \"MOD_1_2_THRESHOLD\";\n  param.min_size = MAX(mod_1_1_threshold,4);\n  //param.max_size = 1000;\n  one (&mod_1_2_threshold, rands, &param);\n\n  param.name = \"MOD_1_3_THRESHOLD\";\n  param.min_size = MAX(mod_1_2_threshold,5);\n  //param.max_size = 10000;\n  one (&mod_1_3_threshold, rands, &param);\n\n}\n\n\n\n/* Start the basecase from 3, since 1 is a special case, and if mul_basecase\n   is faster only at size==2 then we don't want to bother with extra code\n   just for that.  Start karatsuba from 4 same as MUL above.  */\n\nvoid\ntune_sqr (gmp_randstate_t rands)\n{       \n  /* disabled until tuned */\n  SQR_FFT_FULL_THRESHOLD = MP_SIZE_T_MAX;\n\n  if (HAVE_NATIVE_mpn_sqr_basecase)\n    {\n      print_define_remark (\"SQR_BASECASE_THRESHOLD\", 0, \"always (native)\");\n      sqr_basecase_threshold = 0;\n    }\n  else\n    {\n      static struct param_t  param;\n      param.name = \"SQR_BASECASE_THRESHOLD\";\n      param.function = speed_mpn_sqr;\n      param.min_size = 3;\n      param.min_is_always = 1;\n      param.max_size = TUNE_SQR_KARATSUBA_MAX;\n      param.noprint = 1;\n      one (&sqr_basecase_threshold, rands, &param);\n    }\n\n  {\n    static struct param_t  param;\n    param.name = \"SQR_KARATSUBA_THRESHOLD\";\n    param.function = speed_mpn_sqr;\n    param.min_size = MAX (4, MPN_KARA_SQR_N_MINSIZE);\n    param.max_size = TUNE_SQR_KARATSUBA_MAX;\n    param.noprint = 1;\n    one (&sqr_karatsuba_threshold, rands, &param);\n\n    if (! HAVE_NATIVE_mpn_sqr_basecase\n        && sqr_karatsuba_threshold < sqr_basecase_threshold)\n      {\n        /* Karatsuba becomes faster than mul_basecase before\n           sqr_basecase does.  Arrange for the expression\n           \"BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD))\" which\n           selects mpn_sqr_basecase in mpn_sqr to be false, by setting\n           SQR_KARATSUBA_THRESHOLD to zero, making\n           SQR_BASECASE_THRESHOLD the karatsuba threshold.  */\n\n        sqr_basecase_threshold = SQR_KARATSUBA_THRESHOLD;\n        SQR_KARATSUBA_THRESHOLD = 0;\n\n        print_define_remark (\"SQR_BASECASE_THRESHOLD\", sqr_basecase_threshold,\n                             \"karatsuba\");\n        print_define_remark (\"SQR_KARATSUBA_THRESHOLD\",SQR_KARATSUBA_THRESHOLD,\n                             \"never sqr_basecase\");\n      }\n    else\n      {\n        if (! HAVE_NATIVE_mpn_sqr_basecase)\n          print_define (\"SQR_BASECASE_THRESHOLD\", sqr_basecase_threshold);\n        print_define (\"SQR_KARATSUBA_THRESHOLD\", SQR_KARATSUBA_THRESHOLD);\n      }\n  }\n\n  {\n    static struct param_t  param;\n    param.function = speed_mpn_sqr;\n    \n  {\n    param.name = \"SQR_TOOM3_THRESHOLD\";\n    param.min_size = MAX3 (MPN_TOOM3_SQR_N_MINSIZE,\n                           SQR_KARATSUBA_THRESHOLD, SQR_BASECASE_THRESHOLD);\n    param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1;\n    one (&sqr_toom3_threshold, rands, &param);\n  }\n\n  {\n    param.name = \"SQR_TOOM4_THRESHOLD\";\n    param.min_size = MAX (MPN_TOOM4_SQR_N_MINSIZE, sqr_toom3_threshold);\n    param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;\n    one (&sqr_toom4_threshold, rands, &param);\n  }\n\n  {\n    param.name = \"SQR_TOOM8_THRESHOLD\";\n    param.min_size = MAX (MPN_TOOM8_SQR_N_MINSIZE, sqr_toom4_threshold);\n    param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1;\n    one (&sqr_toom8_threshold, rands, &param);\n  }\n  }\n}\n\nvoid\ntune_dc_div (gmp_randstate_t rands)\n{\n  {\n  static struct param_t  param;\n  param.name = \"DC_DIV_QR_THRESHOLD\";\n  param.function = speed_mpn_dc_div_qr_n;\n  param.step_factor = 0.02;\n  one (&dc_div_qr_threshold, rands, &param);\n  }\n\n  {\n  static struct param_t  param;\n  param.name = \"INV_DIV_QR_THRESHOLD\";\n  param.max_size = 10000;\n  param.function = speed_mpn_inv_div_qr;\n  param.min_size = dc_div_qr_threshold;\n  param.step_factor = 0.02;\n  one (&inv_div_qr_threshold, rands, &param);\n  }\n  \n  {\n  static struct param_t  param;\n  param.name = \"INV_DIVAPPR_Q_N_THRESHOLD\";\n  param.function = speed_mpn_inv_divappr_q;\n  param.max_size = 10000;\n  param.min_size = dc_divappr_q_threshold;\n  param.step_factor = 0.1;\n  param.stop_factor = 0.2;\n  one (&inv_divappr_q_n_threshold, rands, &param);\n  }\n}\n\nvoid\ntune_tdiv_q (gmp_randstate_t rands)\n{\n  {\n  static struct param_t  param;\n  param.name = \"DC_DIV_Q_THRESHOLD\";\n  param.function = speed_mpn_tdiv_q1;\n  param.step_factor = 0.02;\n  one (&dc_div_q_threshold, rands, &param);\n  }\n\n  {\n  static struct param_t  param;\n  param.name = \"INV_DIV_Q_THRESHOLD\";\n  param.function = speed_mpn_tdiv_q1;\n  param.max_size = 10000;\n  param.min_size = dc_div_q_threshold;\n  param.step_factor = 0.02;\n  one (&inv_div_q_threshold, rands, &param);\n  }\n\n  {\n  static struct param_t  param;\n  param.name = \"DC_DIVAPPR_Q_THRESHOLD\";\n  param.function = speed_mpn_tdiv_q2;\n  param.step_factor = 0.02;\n  one (&dc_divappr_q_threshold, rands, &param);\n  }\n\n  {\n  static struct param_t  param;\n  param.name = \"INV_DIVAPPR_Q_THRESHOLD\";\n  param.function = speed_mpn_tdiv_q2;\n  param.max_size = 20000;\n  param.min_size = dc_divappr_q_threshold;\n  param.step_factor = 0.1;\n  one (&inv_divappr_q_threshold, rands, &param);\n  }\n}\n\nvoid\ntune_dc_bdiv (gmp_randstate_t rands)\n{\n  {\n  static struct param_t  param;\n  param.name = \"DC_BDIV_QR_THRESHOLD\";\n  param.function = speed_mpn_dc_bdiv_qr_n;\n  param.step_factor = 0.02;\n  one (&dc_bdiv_qr_threshold, rands, &param);\n  }\n\n  {\n  static struct param_t  param;\n  param.name = \"DC_BDIV_Q_THRESHOLD\";\n  param.function = speed_mpn_dc_bdiv_q;\n  param.step_factor = 0.02;\n  one (&dc_bdiv_q_threshold, rands, &param);\n  }\n}\n\nvoid\ntune_binvert (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n\n  param.function = speed_mpn_binvert;\n  param.name = \"BINV_NEWTON_THRESHOLD\";\n  param.min_size = 8;\t\t/* pointless with smaller operands */\n  one (&binv_newton_threshold, rands, &param);\n}\n\nvoid\ntune_redc (gmp_randstate_t rands)\n{\n#define TUNE_REDC_2_MAX 100\n#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2\n#define WANT_REDC_2 1\n#endif\n\n#if WANT_REDC_2\n  {\n    static struct param_t  param;\n    param.name = \"REDC_1_TO_REDC_2_THRESHOLD\";\n    param.function = speed_mpn_redc_1;\n    param.function2 = speed_mpn_redc_2;\n    param.min_size = 1;\n    param.min_is_always = 1;\n    param.max_size = TUNE_REDC_2_MAX;\n    param.noprint = 1;\n    param.stop_factor = 1.5;\n    one (&redc_1_to_redc_2_threshold, rands, &param);\n  }\n  {\n    static struct param_t  param;\n    param.name = \"REDC_2_TO_REDC_N_THRESHOLD\";\n    param.function = speed_mpn_redc_2;\n    param.function2 = speed_mpn_redc_n;\n    param.min_size = 16;\n    param.noprint = 1;\n    one (&redc_2_to_redc_n_threshold, rands, &param);\n  }\n  if (redc_1_to_redc_2_threshold >= redc_2_to_redc_n_threshold)\n    {\n      redc_2_to_redc_n_threshold = 0;\t/* disable redc_2 */\n\n      /* Never use redc2, measure redc_1 -> redc_n cutoff, store result as\n\t REDC_1_TO_REDC_2_THRESHOLD.  */\n      {\n\tstatic struct param_t  param;\n\tparam.name = \"REDC_1_TO_REDC_2_THRESHOLD\";\n\tparam.function = speed_mpn_redc_1;\n\tparam.function2 = speed_mpn_redc_n;\n\tparam.min_size = 16;\n\tparam.noprint = 1;\n\tone (&redc_1_to_redc_2_threshold, rands, &param);\n      }\n    }\n  print_define (\"REDC_1_TO_REDC_2_THRESHOLD\", REDC_1_TO_REDC_2_THRESHOLD);\n  print_define (\"REDC_2_TO_REDC_N_THRESHOLD\", REDC_2_TO_REDC_N_THRESHOLD);\n#else\n  {\n    static struct param_t  param;\n    param.name = \"REDC_1_TO_REDC_N_THRESHOLD\";\n    param.function = speed_mpn_redc_1;\n    param.function2 = speed_mpn_redc_n;\n    param.min_size = 16;\n    one (&redc_1_to_redc_n_threshold, rands, &param);\n  }\n#endif\n}\n\nvoid\ntune_matrix22_mul (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  param.name = \"MATRIX22_STRASSEN_THRESHOLD\";\n  param.function = speed_mpn_matrix22_mul;\n  param.min_size = 2;\n  one (&matrix22_strassen_threshold, rands, &param);\n}\n\nvoid\ntune_hgcd (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  param.name = \"HGCD_THRESHOLD\";\n  param.function = speed_mpn_hgcd;\n  /* We seem to get strange results for small sizes */\n  param.min_size = 30;\n  one (&hgcd_threshold, rands, &param);\n}\n\nvoid\ntune_hgcd_appr (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  param.name = \"HGCD_APPR_THRESHOLD\";\n  param.function = speed_mpn_hgcd_appr;\n  /* We seem to get strange results for small sizes */\n  param.min_size = 50;\n  param.stop_since_change = 150;\n  one (&hgcd_appr_threshold, rands, &param);\n}\n\nvoid\ntune_hgcd_reduce (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  param.name = \"HGCD_REDUCE_THRESHOLD\";\n  param.function = speed_mpn_hgcd_reduce;\n  param.min_size = 30;\n  param.max_size = 7000;\n  param.step_factor = 0.04;\n  one (&hgcd_reduce_threshold, rands, &param);\n}\n\nvoid\ntune_gcd_dc (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  param.name = \"GCD_DC_THRESHOLD\";\n  param.function = speed_mpn_gcd;\n  param.min_size = hgcd_threshold;\n  param.max_size = 3000;\n  param.step_factor = 0.02;\n  one (&gcd_dc_threshold, rands, &param);\n}\n\nvoid\ntune_gcdext_dc (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  param.name = \"GCDEXT_DC_THRESHOLD\";\n  param.function = speed_mpn_gcdext;\n  param.min_size = hgcd_threshold;\n  param.max_size = 3000;\n  param.step_factor = 0.02;\n  one (&gcdext_dc_threshold, rands, &param);\n}\n\n\n\n/* size_extra==1 reflects the fact that with high<divisor one division is\n   always skipped.  Forcing high<divisor while testing ensures consistency\n   while stepping through sizes, ie. that size-1 divides will be done each\n   time.\n\n   min_size==2 and min_is_always are used so that if plain division is only\n   better at size==1 then don't bother including that code just for that\n   case, instead go with preinv always and get a size saving.  */\n\n#define DIV_1_PARAMS                    \\\n  param.check_size = 256;               \\\n  param.min_size = 2;                   \\\n  param.min_is_always = 1;              \\\n  param.data_high = DATA_HIGH_LT_R;     \\\n  param.size_extra = 1;                 \\\n  param.stop_factor = 2.0;\n\n\ndouble (*tuned_speed_mpn_divrem_1)(struct speed_params *s);\n\nvoid\ntune_divrem_1 (gmp_randstate_t rands)\n{\n  /* plain version by default */\n  tuned_speed_mpn_divrem_1 = speed_mpn_divrem_1;\n\n  /* No support for tuning native assembler code, do that by hand and put\n     the results in the .asm file, there's no need for such thresholds to\n     appear in gmp-mparam.h.  */\n  if (HAVE_NATIVE_mpn_divrem_1)\n    return;\n\n  if (GMP_NAIL_BITS != 0)\n    {\n      print_define_remark (\"DIVREM_1_NORM_THRESHOLD\", MP_SIZE_T_MAX,\n                           \"no preinv with nails\");\n      print_define_remark (\"DIVREM_1_UNNORM_THRESHOLD\", MP_SIZE_T_MAX,\n                           \"no preinv with nails\");\n      return;\n    }\n\n  if (UDIV_PREINV_ALWAYS)\n    {\n      print_define_remark (\"DIVREM_1_NORM_THRESHOLD\", 0L, \"preinv always\");\n      print_define (\"DIVREM_1_UNNORM_THRESHOLD\", 0L);\n      return;\n    }\n\n  tuned_speed_mpn_divrem_1 = speed_mpn_divrem_1_tune;\n\n  /* Tune for the integer part of mpn_divrem_1.  This will very possibly be\n     a bit out for the fractional part, but that's too bad, the integer part\n     is more important. */\n  {\n    static struct param_t  param;\n    param.name = \"DIVREM_1_NORM_THRESHOLD\";\n    DIV_1_PARAMS;\n    s.r = randlimb_norm (rands);\n    param.function = speed_mpn_divrem_1_tune;\n    one (&divrem_1_norm_threshold, rands, &param);\n  }\n  {\n    static struct param_t  param;\n    param.name = \"DIVREM_1_UNNORM_THRESHOLD\";\n    DIV_1_PARAMS;\n    s.r = randlimb_half (rands);\n    param.function = speed_mpn_divrem_1_tune;\n    one (&divrem_1_unnorm_threshold, rands, &param);\n  }\n}\n\n\ndouble (*tuned_speed_mpn_mod_1)(struct speed_params *s);\n\nvoid\ntune_mod_1 (gmp_randstate_t rands)\n{\n  /* plain version by default */\n  tuned_speed_mpn_mod_1 = speed_mpn_mod_1;\n\n  /* No support for tuning native assembler code, do that by hand and put\n     the results in the .asm file, there's no need for such thresholds to\n     appear in gmp-mparam.h.  */\n  if (HAVE_NATIVE_mpn_mod_1)\n    return;\n\n  if (GMP_NAIL_BITS != 0)\n    {\n      print_define_remark (\"MOD_1_NORM_THRESHOLD\", MP_SIZE_T_MAX,\n                           \"no preinv with nails\");\n      print_define_remark (\"MOD_1_UNNORM_THRESHOLD\", MP_SIZE_T_MAX,\n                           \"no preinv with nails\");\n      return;\n    }\n\n  if (UDIV_PREINV_ALWAYS)\n    {\n      print_define (\"MOD_1_NORM_THRESHOLD\", 0L);\n      print_define (\"MOD_1_UNNORM_THRESHOLD\", 0L);\n      return;\n    }\n\n  tuned_speed_mpn_mod_1 = speed_mpn_mod_1_tune;\n\n  {\n    static struct param_t  param;\n    param.name = \"MOD_1_NORM_THRESHOLD\";\n    DIV_1_PARAMS;\n    s.r = randlimb_norm (rands);\n    param.function = speed_mpn_mod_1_tune;\n    one (&mod_1_norm_threshold, rands, &param);\n  }\n  {\n    static struct param_t  param;\n    param.name = \"MOD_1_UNNORM_THRESHOLD\";\n    DIV_1_PARAMS;\n    s.r = randlimb_half (rands);\n    param.function = speed_mpn_mod_1_tune;\n    one (&mod_1_unnorm_threshold, rands, &param);\n  }\n}\n\n\n/* A non-zero DIVREM_1_UNNORM_THRESHOLD (or DIVREM_1_NORM_THRESHOLD) would\n   imply that udiv_qrnnd_preinv is worth using, but it seems most\n   straightforward to compare mpn_preinv_divrem_1 and mpn_divrem_1_div\n   directly.  */\n\nvoid\ntune_preinv_divrem_1 (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  speed_function_t  divrem_1;\n  const char        *divrem_1_name;\n  double            t1, t2;\n\n  if (GMP_NAIL_BITS != 0)\n    {\n      print_define_remark (\"USE_PREINV_DIVREM_1\", 0, \"no preinv with nails\");\n      return;\n    }\n\n  /* Any native version of mpn_preinv_divrem_1 is assumed to exist because\n     it's faster than mpn_divrem_1.  */\n  if (HAVE_NATIVE_mpn_preinv_divrem_1)\n    {\n      print_define_remark (\"USE_PREINV_DIVREM_1\", 1, \"native\");\n      return;\n    }\n\n  /* If udiv_qrnnd_preinv is the only division method then of course\n     mpn_preinv_divrem_1 should be used.  */\n  if (UDIV_PREINV_ALWAYS)\n    {\n      print_define_remark (\"USE_PREINV_DIVREM_1\", 1, \"preinv always\");\n      return;\n    }\n\n  /* If we've got an assembler version of mpn_divrem_1, then compare against\n     that, not the mpn_divrem_1_div generic C.  */\n  if (HAVE_NATIVE_mpn_divrem_1)\n    {\n      divrem_1 = speed_mpn_divrem_1;\n      divrem_1_name = \"mpn_divrem_1\";\n    }\n  else\n    {\n      divrem_1 = speed_mpn_divrem_1_div;\n      divrem_1_name = \"mpn_divrem_1_div\";\n    }\n\n  param.data_high = DATA_HIGH_LT_R; /* allow skip one division */\n  s.size = 200;                     /* generous but not too big */\n  /* Divisor, nonzero.  Unnormalized so as to exercise the shift!=0 case,\n     since in general that's probably most common, though in fact for a\n     64-bit limb mp_bases[10].big_base is normalized.  */\n  s.r = urandom(rands) & (GMP_NUMB_MASK >> 4);\n  if (s.r == 0) s.r = 123;\n\n  t1 = tuneup_measure (speed_mpn_preinv_divrem_1, rands, &param, &s);\n  t2 = tuneup_measure (divrem_1, rands, &param, &s);\n  if (t1 == -1.0 || t2 == -1.0)\n    {\n      printf (\"Oops, can't measure mpn_preinv_divrem_1 and %s at %ld\\n\",\n              divrem_1_name, (long) s.size);\n      abort ();\n    }\n  if (option_trace >= 1)\n    printf (\"size=%ld, mpn_preinv_divrem_1 %.9f, %s %.9f\\n\",\n            (long) s.size, t1, divrem_1_name, t2);\n\n  print_define_remark (\"USE_PREINV_DIVREM_1\", (mp_size_t) (t1 < t2), NULL);\n}\n\n\n/* A non-zero MOD_1_UNNORM_THRESHOLD (or MOD_1_NORM_THRESHOLD) would imply\n   that udiv_qrnnd_preinv is worth using, but it seems most straightforward\n   to compare mpn_preinv_mod_1 and mpn_mod_1_div directly.  */\n\nvoid\ntune_preinv_mod_1 (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  speed_function_t  mod_1;\n  const char        *mod_1_name;\n  double            t1, t2;\n\n  /* Any native version of mpn_preinv_mod_1 is assumed to exist because it's\n     faster than mpn_mod_1.  */\n  if (HAVE_NATIVE_mpn_preinv_mod_1)\n    {\n      print_define_remark (\"USE_PREINV_MOD_1\", 1, \"native\");\n      return;\n    }\n\n  if (GMP_NAIL_BITS != 0)\n    {\n      print_define_remark (\"USE_PREINV_MOD_1\", 0, \"no preinv with nails\");\n      return;\n    }\n\n  /* If udiv_qrnnd_preinv is the only division method then of course\n     mpn_preinv_mod_1 should be used.  */\n  if (UDIV_PREINV_ALWAYS)\n    {\n      print_define_remark (\"USE_PREINV_MOD_1\", 1, \"preinv always\");\n      return;\n    }\n\n  /* If we've got an assembler version of mpn_mod_1, then compare against\n     that, not the mpn_mod_1_div generic C.  */\n  if (HAVE_NATIVE_mpn_mod_1)\n    {\n      mod_1 = speed_mpn_mod_1;\n      mod_1_name = \"mpn_mod_1\";\n    }\n  else\n    {\n      mod_1 = speed_mpn_mod_1_div;\n      mod_1_name = \"mpn_mod_1_div\";\n    }\n\n  param.data_high = DATA_HIGH_LT_R; /* let mpn_mod_1 skip one division */\n  s.size = 200;                     /* generous but not too big */\n  s.r = randlimb_norm(rands);            /* divisor */\n\n  t1 = tuneup_measure (speed_mpn_preinv_mod_1, rands, &param, &s);\n  t2 = tuneup_measure (mod_1, rands, &param, &s);\n  if (t1 == -1.0 || t2 == -1.0)\n    {\n      printf (\"Oops, can't measure mpn_preinv_mod_1 and %s at %ld\\n\",\n              mod_1_name, (long) s.size);\n      abort ();\n    }\n  if (option_trace >= 1)\n    printf (\"size=%ld, mpn_preinv_mod_1 %.9f, %s %.9f\\n\",\n            (long) s.size, t1, mod_1_name, t2);\n\n  print_define_remark (\"USE_PREINV_MOD_1\", (mp_size_t) (t1 < t2), NULL);\n}\n\n\nvoid\ntune_divrem_2 (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n\n  /* No support for tuning native assembler code, do that by hand and put\n     the results in the .asm file, and there's no need for such thresholds\n     to appear in gmp-mparam.h.  */\n  if (HAVE_NATIVE_mpn_divrem_2)\n    return;\n\n  if (GMP_NAIL_BITS != 0)\n    {\n      print_define_remark (\"DIVREM_2_THRESHOLD\", MP_SIZE_T_MAX,\n                           \"no preinv with nails\");\n      return;\n    }\n\n  if (UDIV_PREINV_ALWAYS)\n    {\n      print_define_remark (\"DIVREM_2_THRESHOLD\", 0L, \"preinv always\");\n      return;\n    }\n\n  /* Tune for the integer part of mpn_divrem_2.  This will very possibly be\n     a bit out for the fractional part, but that's too bad, the integer part\n     is more important.\n\n     min_size must be >=2 since nsize>=2 is required, but is set to 4 to save\n     code space if plain division is better only at size==2 or size==3. */\n  param.name = \"DIVREM_2_THRESHOLD\";\n  param.check_size = 256;\n  param.min_size = 4;\n  param.min_is_always = 1;\n  param.size_extra = 2;      /* does qsize==nsize-2 divisions */\n  param.stop_factor = 2.0;\n\n  s.r = randlimb_norm (rands);\n  param.function = speed_mpn_divrem_2;\n  one (&divrem_2_threshold, rands, &param);\n}\n\n\n/* mpn_divexact_1 is vaguely expected to be used on smallish divisors, so\n   tune for that.  Its speed can differ on odd or even divisor, so take an\n   average threshold for the two.\n\n   mpn_divrem_1 can vary with high<divisor or not, whereas mpn_divexact_1\n   might not vary that way, but don't test this since high<divisor isn't\n   expected to occur often with small divisors.  */\n\nvoid\ntune_divexact_1 (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  mp_size_t  thresh[2], average;\n  int        low, i;\n\n  /* Any native mpn_divexact_1 is assumed to incorporate all the speed of a\n     full mpn_divrem_1.  */\n  if (HAVE_NATIVE_mpn_divexact_1)\n    {\n      print_define_remark (\"DIVEXACT_1_THRESHOLD\", 0, \"always (native)\");\n      return;\n    }\n\n  ASSERT_ALWAYS (tuned_speed_mpn_divrem_1 != NULL);\n\n  param.name = \"DIVEXACT_1_THRESHOLD\";\n  param.data_high = DATA_HIGH_GE_R;\n  param.check_size = 256;\n  param.min_size = 2;\n  param.stop_factor = 1.5;\n  param.function  = tuned_speed_mpn_divrem_1;\n  param.function2 = speed_mpn_divexact_1;\n  param.noprint = 1;\n\n  print_define_start (param.name);\n\n  for (low = 0; low <= 1; low++)\n    {\n      s.r = randlimb_half(rands);\n      if (low == 0)\n        s.r |= 1;\n      else\n        s.r &= ~CNST_LIMB(7);\n\n      one (&thresh[low], rands, &param);\n      if (option_trace)\n        printf (\"low=%d thresh %ld\\n\", low, (long) thresh[low]);\n\n      if (thresh[low] == MP_SIZE_T_MAX)\n        {\n          average = MP_SIZE_T_MAX;\n          goto divexact_1_done;\n        }\n    }\n\n  if (option_trace)\n    {\n      printf (\"average of:\");\n      for (i = 0; i < numberof(thresh); i++)\n        printf (\" %ld\", (long) thresh[i]);\n      printf (\"\\n\");\n    }\n\n  average = 0;\n  for (i = 0; i < numberof(thresh); i++)\n    average += thresh[i];\n  average /= numberof(thresh);\n\n  /* If divexact turns out to be better as early as 3 limbs, then use it\n     always, so as to reduce code size and conditional jumps.  */\n  if (average <= 3)\n    average = 0;\n\n divexact_1_done:\n  print_define_end (param.name, average);\n}\n\n\n/* The generic mpn_modexact_1_odd skips a divide step if high<divisor, the\n   same as mpn_mod_1, but this might not be true of an assembler\n   implementation.  The threshold used is an average based on data where a\n   divide can be skipped and where it can't.\n\n   If modexact turns out to be better as early as 3 limbs, then use it\n   always, so as to reduce code size and conditional jumps.  */\n\nvoid\ntune_modexact_1_odd (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  mp_size_t  thresh_lt, thresh_ge, average;\n\n  /* Any native mpn_modexact_1_odd is assumed to incorporate all the speed\n     of a full mpn_mod_1.  */\n  if (HAVE_NATIVE_mpn_modexact_1_odd)\n    {\n      print_define_remark (\"MODEXACT_1_ODD_THRESHOLD\", 0, \"always (native)\");\n      return;\n    }\n\n  ASSERT_ALWAYS (tuned_speed_mpn_mod_1 != NULL);\n\n  param.name = \"MODEXACT_1_ODD_THRESHOLD\";\n  param.check_size = 256;\n  param.min_size = 2;\n  param.stop_factor = 1.5;\n  param.function  = tuned_speed_mpn_mod_1;\n  param.function2 = speed_mpn_modexact_1c_odd;\n  param.noprint = 1;\n  s.r = randlimb_half (rands) | 1;\n\n  print_define_start (param.name);\n\n  param.data_high = DATA_HIGH_LT_R;\n  one (&thresh_lt, rands, &param);\n  if (option_trace)\n    printf (\"lt thresh %ld\\n\", (long) thresh_lt);\n\n  average = thresh_lt;\n  if (thresh_lt != MP_SIZE_T_MAX)\n    {\n      param.data_high = DATA_HIGH_GE_R;\n      one (&thresh_ge, rands, &param);\n      if (option_trace)\n        printf (\"ge thresh %ld\\n\", (long) thresh_ge);\n\n      if (thresh_ge != MP_SIZE_T_MAX)\n        {\n          average = (thresh_ge + thresh_lt) / 2;\n          if (thresh_ge <= 3)\n            average = 0;\n        }\n    }\n\n  print_define_end (param.name, average);\n}\n\n\nvoid\ntune_jacobi_base (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n  double   t1, t2, t3, t4;\n  int      method;\n\n  s.size = GMP_LIMB_BITS * 3 / 4;\n\n  t1 = tuneup_measure (speed_mpn_jacobi_base_1, rands, &param, &s);\n  if (option_trace >= 1)\n    printf (\"size=%ld, mpn_jacobi_base_1 %.9f\\n\", (long) s.size, t1);\n\n  t2 = tuneup_measure (speed_mpn_jacobi_base_2, rands, &param, &s);\n  if (option_trace >= 1)\n    printf (\"size=%ld, mpn_jacobi_base_2 %.9f\\n\", (long) s.size, t2);\n\n  t3 = tuneup_measure (speed_mpn_jacobi_base_3, rands, &param, &s);\n  if (option_trace >= 1)\n    printf (\"size=%ld, mpn_jacobi_base_3 %.9f\\n\", (long) s.size, t3);\n\n  t4 = tuneup_measure (speed_mpn_jacobi_base_4, rands, &param, &s);\n  if (option_trace >= 1)\n    printf (\"size=%ld, mpn_jacobi_base_4 %.9f\\n\", (long) s.size, t4);\n\n  if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0 || t4 == -1.0)\n    {\n      printf (\"Oops, can't measure all mpn_jacobi_base methods at %ld\\n\",\n              (long) s.size);\n      abort ();\n    }\n\n  if (t1 < t2 && t1 < t3 && t1 < t4)\n    method = 1;\n  else if (t2 < t3 && t2 < t4)\n    method = 2;\n  else if (t3 < t4)\n    method = 3;\n  else\n    method = 4;\n\n  print_define (\"JACOBI_BASE_METHOD\", method);\n}\n\n\n\nvoid\ntune_get_str (gmp_randstate_t rands)\n{\n  /* Tune for decimal, it being most common.  Some rough testing suggests\n     other bases are different, but not by very much.  */\n  s.r = 10;\n  {\n    static struct param_t  param;\n    GET_STR_PRECOMPUTE_THRESHOLD = 0;\n    param.name = \"GET_STR_DC_THRESHOLD\";\n    param.function = speed_mpn_get_str;\n    param.min_size = 4;\n    param.max_size = GET_STR_THRESHOLD_LIMIT;\n    one (&get_str_dc_threshold, rands, &param);\n  }\n  {\n    static struct param_t  param;\n    param.name = \"GET_STR_PRECOMPUTE_THRESHOLD\";\n    param.function = speed_mpn_get_str;\n    param.min_size = GET_STR_DC_THRESHOLD;\n    param.max_size = GET_STR_THRESHOLD_LIMIT;\n    one (&get_str_precompute_threshold, rands, &param);\n  }\n}\n\ndouble\nspeed_mpn_pre_set_str (struct speed_params *s)\n{\n  unsigned char *str;\n  mp_ptr     wp;\n  mp_size_t  wn;\n  unsigned   i;\n  int        base;\n  double     t;\n  mp_ptr powtab_mem, tp;\n  powers_t powtab[GMP_LIMB_BITS];\n  mp_size_t un;\n  int chars_per_limb;\n  TMP_DECL;\n\n  SPEED_RESTRICT_COND (s->size >= 1);\n\n  base = s->r == 0 ? 10 : s->r;\n  SPEED_RESTRICT_COND (base >= 2 && base <= 256);\n\n  TMP_MARK;\n\n  str = TMP_ALLOC (s->size);\n  for (i = 0; i < s->size; i++)\n    str[i] = s->xp[i] % base;\n\n  wn = ((mp_size_t) (s->size / mp_bases[base].chars_per_bit_exactly))\n    / GMP_LIMB_BITS + 2;\n  SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);\n\n  /* use this during development to check wn is big enough */\n  /*\n  ASSERT_ALWAYS (mpn_set_str (wp, str, s->size, base) <= wn);\n  */\n\n  speed_operand_src (s, (mp_ptr) str, s->size/BYTES_PER_MP_LIMB);\n  speed_operand_dst (s, wp, wn);\n  speed_cache_fill (s);\n\n  chars_per_limb = mp_bases[base].chars_per_limb;\n  un = s->size / chars_per_limb + 1;\n  powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un));\n  mpn_set_str_compute_powtab (powtab, powtab_mem, un, base);\n  tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un));\n\n  speed_starttime ();\n  i = s->reps;\n  do\n    {\n      mpn_pre_set_str (wp, str, s->size, powtab, tp);\n    }\n  while (--i != 0);\n  t = speed_endtime ();\n\n  TMP_FREE;\n  return t;\n}\n\nvoid\ntune_set_str (gmp_randstate_t rands)\n{\n  s.r = 10;  /* decimal */\n  {\n    static struct param_t  param;\n    SET_STR_PRECOMPUTE_THRESHOLD = 0;\n    param.step_factor = 0.01;\n    param.name = \"SET_STR_DC_THRESHOLD\";\n    param.function = speed_mpn_pre_set_str;\n    param.min_size = 100;\n    param.max_size = 50000;\n    one (&set_str_dc_threshold, rands, &param);\n  }\n  {\n    static struct param_t  param;\n    param.step_factor = 0.02;\n    param.name = \"SET_STR_PRECOMPUTE_THRESHOLD\";\n    param.function = speed_mpn_set_str;\n    param.min_size = SET_STR_DC_THRESHOLD;\n    param.max_size = 100000;\n    one (&set_str_precompute_threshold, rands, &param);\n  }\n}\n\nvoid \ntune_fft(gmp_randstate_t state)\n{\n    mp_bitcnt_t depth, w, depth1, w1;\n    clock_t start, end;\n    double elapsed;\n    double best = 0.0;\n    mp_size_t best_off, off, best_d, best_w, num_twos, num_printed;\n\n    if (option_fft_max_size == 0)\n       return;\n\n    printf(\"/* fft_tuning -- autogenerated by tune-fft */\\n\\n\");\n    printf(\"#define FFT_TAB \\\\\\n\");\n    fflush(stdout);\n\n    printf(\"   { \"); fflush(stdout);\n    for (depth = 6; depth <= 10; depth++)\n    {\n        printf(\"{ \"); fflush(stdout);\n        for (w = 1; w <= 2; w++)\n        {\n            int iters = 100*((mp_size_t) 1 << (3*(10 - depth)/2)), i;\n            \n            mp_size_t n = ((mp_limb_t)1<<depth);\n            mp_bitcnt_t bits1 = (n*w - (depth + 1))/2; \n            mp_size_t len1 = 2*n;\n            mp_size_t len2 = 2*n;\n\n            mp_bitcnt_t b1 = len1*bits1, b2 = len2*bits1;\n            mp_size_t n1, n2;\n            mp_size_t j;\n            mp_limb_t * i1, *i2, *r1;\n   \n            n1 = (b1 - 1)/GMP_LIMB_BITS + 1;\n            n2 = (b2 - 1)/GMP_LIMB_BITS + 1;\n                    \n            i1 = malloc(2*(n1 + n2)*sizeof(mp_limb_t));\n            i2 = i1 + n1;\n            r1 = i2 + n2;\n   \n            mpn_urandomb(i1, state, b1);\n            mpn_urandomb(i2, state, b2);\n  \n            best_off = -1;\n            \n            for (off = 0; off <= 4; off++)\n            {\n               start = clock();\n               for (i = 0; i < iters; i++)\n                  mpn_mul_trunc_sqrt2(r1, i1, n1, i2, n2, depth - off, w*((mp_size_t)1 << (off*2)));\n               end = clock();\n               \n               elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;\n               \n               if (elapsed < best || best_off == -1)\n               {\n                  best_off = off;\n                  best = elapsed;\n               }\n            }\n           \n            printf(\"%ld\", best_off); \n            if (w != 2) printf(\",\");\n            printf(\" \"); fflush(stdout);\n\n            free(i1);\n        }\n        printf(\"}\");\n        if (depth != 10) printf(\",\");\n        printf(\" \"); fflush(stdout);\n    }\n\n    printf(\"}\\n\\n\");\n    \n    best_d = 12;\n    best_w = 1;\n    best_off = -1;\n    num_printed = 0;\n    num_twos = 0;\n\n    printf(\"#define MULMOD_TAB \\\\\\n\");\n    fflush(stdout);\n    printf(\"   { \"); fflush(stdout);\n    for (depth = 12; best_off != 1 && !(num_printed >= 25 && best_off == 2 && num_twos >= 5) ; depth++)\n    {\n        for (w = 1; w <= 2; w++)\n        {\n            int iters = 100*((mp_size_t) 1 << (3*(18 - depth)/2)), i;\n            mp_size_t n = ((mp_limb_t)1<<depth);\n            mp_bitcnt_t bits = n*w;\n            mp_size_t int_limbs = (bits - 1)/GMP_LIMB_BITS + 1;\n            mp_size_t j;\n            mp_limb_t c, * i1, * i2, * r1, * tt;\n        \n            if (depth <= 21) iters = 32*((mp_size_t) 1 << (21 - depth));\n            else iters = MAX(32/((mp_size_t) 1 << (depth - 21)), 1);\n\n            i1 = malloc(6*(int_limbs+1)*sizeof(mp_limb_t));\n            i2 = i1 + int_limbs + 1;\n            r1 = i2 + int_limbs + 1;\n            tt = r1 + 2*(int_limbs + 1);\n                \n            mpn_urandomb(i1, state, int_limbs*GMP_LIMB_BITS);\n            mpn_urandomb(i2, state, int_limbs*GMP_LIMB_BITS);\n            i1[int_limbs] = 0;\n            i2[int_limbs] = 0;\n\n            depth1 = 1;\n            while ((((mp_limb_t)1)<<depth1) < bits) depth1++;\n            depth1 = depth1/2;\n\n            w1 = bits/(((mp_limb_t)1)<<(2*depth1));\n\n            best_off = -1;\n            \n            for (off = 0; off <= 4; off++)\n            {\n               start = clock();\n               for (i = 0; i < iters; i++)\n                  mpir_fft_mulmod_2expp1(r1, i1, i2, int_limbs, depth1 - off, w1*((mp_size_t)1 << (off*2)));\n               end = clock();\n               \n               elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;\n               \n               if (best_off == -1 || elapsed < best)\n               {\n                  best_off = off;\n                  best = elapsed;\n               }\n            }\n           \n            start = clock();\n            for (i = 0; i < iters; i++)\n                mpn_mulmod_2expp1_basecase(r1, i1, i2, 0, bits, tt);\n            end = clock();\n               \n            elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;\n            if (elapsed < best)\n            {\n                best_d = depth + (w == 2);\n                best_w = w + 1 - 2*(w == 2);\n            }\n\n            printf(\"%ld\", best_off); \n            if (best_off == 2)\n               num_twos++;\n            else\n               num_twos = 0;\n            num_printed++;\n            if (w != 2) printf(\", \"); fflush(stdout);\n\n            free(i1);\n        }\n        printf(\", \"); fflush(stdout);\n    }\n    if (best_off == 2)\n    {\n       printf(\"2, 2, 2, 2, 2, 1, 1 }\\n\\n\");\n       num_printed += 6;\n    } else\n       printf(\"1 }\\n\\n\");\n    \n    printf(\"#define FFT_N_NUM %ld\\n\\n\", num_printed + 1);\n    \n    printf(\"#define FFT_MULMOD_2EXPP1_CUTOFF %ld\\n\\n\", ((mp_limb_t) 1 << best_d)*best_w/(2*GMP_LIMB_BITS));\n}\n\nvoid\ntune_fac_ui (gmp_randstate_t rands)\n{\n  static struct param_t  param;\n\n  param.function = speed_mpz_fac_ui_tune;\n\n  param.name = \"FAC_DSC_THRESHOLD\";\n  param.min_size = 70;\n  param.max_size = FAC_DSC_THRESHOLD_LIMIT;\n  one (&fac_dsc_threshold, rands, &param);\n\n  param.name = \"FAC_ODD_THRESHOLD\";\n  param.min_size = 22;\n  param.stop_factor = 1.7;\n  param.min_is_always = 1;\n  one (&fac_odd_threshold, rands, &param);\n}\n\nvoid\ntune_fft_mul (gmp_randstate_t rands)\n{\n  static struct fft_param_t  param;\n\n  if (option_fft_max_size == 0)\n    return;\n\n  param.threshold_name      = \"MUL_FFT_FULL_THRESHOLD\";\n  param.p_threshold         = &mul_fft_full_threshold;\n  param.first_size          = MUL_TOOM8H_THRESHOLD / 2;\n  param.max_size            = option_fft_max_size;\n  param.function            = speed_mpn_mul_fft_main;\n  param.mul_function        = speed_mpn_mul_n;\n  param.sqr = 0;\n  fft (&param,rands);\n}\n\n\nvoid\ntune_fft_sqr (gmp_randstate_t rands)\n{\n  static struct fft_param_t  param;\n\n  if (option_fft_max_size == 0)\n    return;\n\n  param.threshold_name      = \"SQR_FFT_FULL_THRESHOLD\";\n  param.p_threshold         = &sqr_fft_full_threshold;\n  param.first_size          = SQR_TOOM8_THRESHOLD / 2;\n  param.max_size            = option_fft_max_size;\n  param.function            = speed_mpn_sqr_fft_main;\n  param.mul_function        = speed_mpn_sqr;\n  param.sqr = 0;\n  fft (&param,rands);\n}\n\n#ifdef _MSC_VER\n#define GMP_MPARAM_H_SUGGEST \"vc_gmp_mparam.h\"\n#endif\n\nvoid\nall (gmp_randstate_t rands)\n{\n  time_t  start_time, end_time;\n  TMP_DECL;\n\n  TMP_MARK;\n  SPEED_TMP_ALLOC_LIMBS (s.xp_block, SPEED_BLOCK_SIZE, 0);\n  SPEED_TMP_ALLOC_LIMBS (s.yp_block, SPEED_BLOCK_SIZE, 0);\n\n  mpn_randomb (s.xp_block, rands, SPEED_BLOCK_SIZE);\n  mpn_randomb (s.yp_block, rands, SPEED_BLOCK_SIZE);\n\n  fprintf (stderr, \"Parameters for %s\\n\", GMP_MPARAM_H_SUGGEST);\n\n  speed_time_init ();\n  fprintf (stderr, \"Using: %s\\n\", speed_time_string);\n\n  fprintf (stderr, \"speed_precision %d\", speed_precision);\n  if (speed_unittime == 1.0)\n    fprintf (stderr, \", speed_unittime 1 cycle\");\n  else\n    fprintf (stderr, \", speed_unittime %.2e secs\", speed_unittime);\n  if (speed_cycletime == 1.0 || speed_cycletime == 0.0)\n    fprintf (stderr, \", CPU freq unknown\\n\");\n  else\n    fprintf (stderr, \", CPU freq %.2f MHz\\n\", 1e-6/speed_cycletime);\n\n  fprintf (stderr, \"DEFAULT_MAX_SIZE %d, fft_max_size %ld\\n\",\n           DEFAULT_MAX_SIZE, (long) option_fft_max_size);\n  fprintf (stderr, \"\\n\");\n\n  time (&start_time);\n  {\n    struct tm  *tp;\n    tp = localtime (&start_time);\n    printf (\"/* Generated by tuneup.c, %d-%02d-%02d, \",\n            tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);\n\n#ifdef __GNUC__\n    /* gcc sub-minor version doesn't seem to come through as a define */\n    printf (\"gcc %d.%d */\\n\", __GNUC__, __GNUC_MINOR__);\n#define PRINTED_COMPILER\n#endif\n#if defined (__SUNPRO_C)\n    printf (\"Sun C %d.%d */\\n\", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);\n#define PRINTED_COMPILER\n#endif\n#if ! defined (__GNUC__) && defined (__sgi) && defined (_COMPILER_VERSION)\n    /* gcc defines __sgi and _COMPILER_VERSION on irix 6, avoid that */\n    printf (\"MIPSpro C %d.%d.%d */\\n\",\n\t    _COMPILER_VERSION / 100,\n\t    _COMPILER_VERSION / 10 % 10,\n\t    _COMPILER_VERSION % 10);\n#define PRINTED_COMPILER\n#endif\n#if defined (__DECC) && defined (__DECC_VER)\n    printf (\"DEC C %d */\\n\", __DECC_VER);\n#define PRINTED_COMPILER\n#endif\n#if ! defined (PRINTED_COMPILER)\n    printf (\"system compiler */\\n\");\n#endif\n  }\n  printf (\"\\n\");\n\n  tune_mul (rands);\n  printf(\"\\n\");\n\n  tune_sqr (rands);\n  printf(\"\\n\");\n\n  tune_divrem_1 (rands);\n  tune_mod_1 (rands);\n  tune_preinv_divrem_1 (rands);\n  tune_preinv_mod_1 (rands);\n  tune_divrem_2 (rands);\n  tune_divexact_1 (rands);\n  tune_modexact_1_odd (rands);\n  tune_mod_1_k(rands);\n  tune_divrem_hensel_qr_1(rands);\n  tune_rsh_divrem_hensel_qr_1(rands);\n  tune_divrem_euclid_hensel(rands);\n  printf(\"\\n\");\n\n  tune_fft_mul (rands);\n  printf(\"\\n\");\n\n  tune_fft_sqr (rands);\n  printf (\"\\n\");\n\n  tune_mullow (rands);\n  printf(\"\\n\");\n  tune_mulmid (rands);\n  printf(\"\\n\");\n  tune_mulhigh (rands);\n  printf(\"\\n\");\n\n  tune_mulmod_2expm1(rands);\n  printf(\"\\n\");\n\n  /* dc_div_qr_n, dc_divappr_q, inv_div_qr, inv_divappr_q */\n  tune_dc_div (rands);\n  \n  /* mpn_tdiv_q : balanced */\n  tune_tdiv_q (rands);\n  \n  /* dc_bdiv_qr_n, dc_bdiv_q */\n  tune_dc_bdiv (rands);  \n  printf(\"\\n\");\n\n  tune_binvert (rands);\n  tune_redc (rands);\n  printf(\"\\n\");\n\n  tune_rootrem(rands);\n  printf(\"\\n\");\n\n  tune_matrix22_mul (rands);\n  tune_hgcd (rands);\n  tune_hgcd_appr (rands);\n  tune_hgcd_reduce(rands);\n  tune_gcd_dc (rands);\n  tune_gcdext_dc (rands);\n  tune_jacobi_base (rands);\n  printf(\"\\n\");\n\n  tune_get_str (rands);\n  tune_set_str (rands);\n  printf(\"\\n\");\n\n  tune_fac_ui (rands);\n  printf(\"\\n\");\n\n  tune_fft (rands);\n  printf(\"\\n\");\n\n  time (&end_time);\n  printf (\"/* Tuneup completed successfully, took %ld seconds */\\n\",\n          end_time - start_time);\n\n  TMP_FREE;\n}\n\n\nint\nmain (int argc, char *argv[])\n{\n  int  opt;\n  gmp_randstate_t rands;\n\n  gmp_randinit_default(rands);\n  /* Unbuffered so if output is redirected to a file it isn't lost if the\n     program is killed part way through.  */\n  setbuf (stdout, NULL);\n  setbuf (stderr, NULL);\n\n  while ((opt = getopt(argc, argv, \"f:o:p:t\")) != EOF)\n    {\n      switch (opt) {\n      case 'f':\n        if (optarg[0] == 't')\n          option_fft_trace = 2;\n        else\n          option_fft_max_size = atol (optarg);\n        break;\n      case 'o':\n        speed_option_set (optarg);\n        break;\n      case 'p':\n        speed_precision = atoi (optarg);\n        break;\n      case 't':\n        option_trace++;\n        break;\n      case '?':\n        exit(1);\n      }\n    }\n\n  all (rands);\n  exit (0);\n}\n"
  },
  {
    "path": "tune/x86_64.as",
    "content": ";  x86 pentium time stamp counter access routine.\n;\n;  Copyright 1999, 2000, 2003, 2004, 2005 Free Software Foundation, Inc.\n;\n;  Copyright 2008 William Hart\n;\n;  This file is part of the MPIR Library.\n;\n;  The MPIR Library is free software; you can redistribute it and/or\n;  modify it under the terms of the GNU Lesser General Public License as\n;  published by the Free Software Foundation; either version 2.1 of the\n;  License, or (at your option) any later version.\n;\n;  The MPIR Library is distributed in the hope that it will be useful,\n;  but WITHOUT ANY WARRANTY; without even the implied warranty of\n;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n;  Lesser General Public License for more details.\n;\n;  You should have received a copy of the GNU Lesser General Public\n;  License along with the MPIR Library; see the file COPYING.LIB.  If\n;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n;  Fifth Floor, Boston, MA 02110-1301, USA.\n\n\n; void speed_cyclecounter (unsigned p[2]);\n;\n; Get the pentium rdtsc cycle counter, storing the least significant word in\n; p[0] and the most significant in p[1].\n;\n; cpuid is used to serialize execution.  On big measurements this won't be\n; significant but it may help make small single measurements more accurate.\n\n%include 'yasm_mac.inc'\n\nbits 64\n    section .text\n\n    G_EXPORT speed_cyclecounter\n\nG_LABEL speed_cyclecounter\n\n\tmov     r10, rbx\n\txor     eax, eax\n\tcpuid\n\trdtsc\n\tmov\t    [rdi], eax \n\tmov     [rdi+4], edx\n\tmov     rbx, r10\n\tret\n\n    end\n"
  },
  {
    "path": "version.c",
    "content": "/* gmp_version and mpir_version -- version numbers compiled into the library.\n\nCopyright 1996, 1999, 2000, 2001 Free Software Foundation, Inc.\n\nCopyright 2008 William Hart.\n\nThis file is part of the MPIR Library.\n\nThe MPIR Library is free software; you can redistribute it and/or modify\nit under the terms of the GNU Lesser General Public License as published by\nthe Free Software Foundation; either version 2.1 of the License, or (at your\noption) any later version.\n\nThe MPIR Library is distributed in the hope that it will be useful, but\nWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\nor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\nLicense for more details.\n\nYou should have received a copy of the GNU Lesser General Public License\nalong with the MPIR Library; see the file COPYING.LIB.  If not, write to\nthe Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,\nMA 02110-1301, USA. */\n\n#include \"mpir.h\"\n#include \"gmp-impl.h\"\n\nconst char * const gmp_version = GMP_VERSION;\n#if defined( _MSC_VER )\nconst char * const mpir_version = _MSC_MPIR_VERSION;\n#else\nconst char * const mpir_version = VERSION;\n#endif\n"
  },
  {
    "path": "yasm_mac.inc.fat",
    "content": "; Copyright 2008, William Hart, Jason Worth-Martin\n;\n; This file is part of the MPIR Library.\n;\n; The MPIR Library is free software; you can redistribute it and/or modify\n; it under the terms of the GNU Lesser General Public License as published by\n; the Free Software Foundation; either version 2.1 of the License, or (at your\n; option) any later version.\n;\n; The MPIR Library is distributed in the hope that it will be useful, but\n; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n; License for more details.\n;\n; You should have received a copy of the GNU Lesser General Public License\n; along with the MPIR Library; see the file COPYING.LIB.  If not, write to\n; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n; 02110-1301, USA.\n\n%macro G_LABEL 1\n%ifdef GSYM_PREFIX\n_%1:\n%else\n%1:\n%endif\n%endmacro\n\n%macro G_EXPORT 1\n%ifdef GSYM_PREFIX\n    global _%1:function\n%else\n    global %1:function\n%endif\n%endmacro\n\n%macro G_EXTERN 1\n%ifdef GSYM_PREFIX\n    extern _%1\n%else\n    extern %1\n%endif\n%endmacro\n\n%macro G_FUNC 2 \nG_EXPORT __g%1_%2\nG_EXPORT %1_%2\nG_LABEL __g%1_%2\nG_LABEL %1_%2\n%endmacro\n\n%macro GLOBAL_FUNC 1\nG_FUNC %1, suffix\n%endmacro\n"
  },
  {
    "path": "yasm_mac.inc.nofat",
    "content": "; Copyright 2008, William Hart, Jason Worth-Martin\n;\n; This file is part of the MPIR Library.\n;\n; The MPIR Library is free software; you can redistribute it and/or modify\n; it under the terms of the GNU Lesser General Public License as published by\n; the Free Software Foundation; either version 2.1 of the License, or (at your\n; option) any later version.\n;\n; The MPIR Library is distributed in the hope that it will be useful, but\n; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n; License for more details.\n;\n; You should have received a copy of the GNU Lesser General Public License\n; along with the MPIR Library; see the file COPYING.LIB.  If not, write to\n; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n; 02110-1301, USA.\n\n%macro G_LABEL 1\n%ifdef GSYM_PREFIX\n_%1:\n%else\n%1:\n%endif\n%endmacro\n\n%macro G_EXPORT 1\n%ifdef GSYM_PREFIX\n    global _%1:function\n%else\n    global %1:function\n%endif\n%endmacro\n\n%macro G_EXTERN 1\n%ifdef GSYM_PREFIX\n    extern _%1\n%else\n    extern %1\n%endif\n%endmacro\n\n%macro GLOBAL_FUNC 1\nG_EXPORT __g%1\nG_EXPORT %1\nG_LABEL __g%1\nG_LABEL %1\n%endmacro\n"
  },
  {
    "path": "yasm_macwin.inc.fat",
    "content": ";  Copyright (C) 2009, 2008 Brian Gladman\n;\n;  All rights reserved.\n;\n;  Redistribution and use in source and binary forms, with or without\n;  modification, are permitted provided that the following conditions are\n;  met:\n;  1. Redistributions of source code must retain the above copyright notice,\n;  this list of conditions and the following disclaimer.\n;\n;  2. Redistributions in binary form must reproduce the above copyright\n;  notice, this list of conditions and the following disclaimer in the\n;  documentation and/or other materials provided with the distribution.\n;\n;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\n;  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n;  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\n;  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\n;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\n;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n;\n\n; Standardised register numbering scheme\n\n%define r0  rax\n%define r1  rdx\n%define r2  rcx\n%define r3  rbx\n%define r4  rsi\n%define r5  rdi\n%define r6  rbp\n%define r7  rsp\n\n%define r0d eax\n%define r1d edx\n%define r2d ecx\n%define r3d ebx\n%define r4d esi\n%define r5d edi\n%define r6d ebp\n%define r7d esp\n\n%define r0w ax\n%define r1w dx\n%define r2w cx\n%define r3w bx\n%define r4w si\n%define r5w di\n%define r6w bp\n%define r7w sp\n\n%define r0b al\n%define r1b dl\n%define r2b cl\n%define r3b bl\n%define r4b sil\n%define r5b dil\n%define r6b bpl\n%define r7b spl\n\n%define asm_sym(x) __g %+ x\n\n; Standard macro for alignment (used to allow easy subsititution of\n; alternative padding schemes)\n\n%macro xalign 1\n    align %1\n%endmacro\n\n; YASM macros for handling Windows Prologues and Epilogues\n;\n; Copyright 2008, 2009 Brian Gladman\n;\n; Windows x64 prologue macro:\n;\n;     FRAME_PROC name, var_slots, register_save_list \n;\n; Windows x64 epilogue macro:\n;\n;     EXIT_PROC register_save_list\n;\n; Windows x64 epilogue and end procedure macro\n;\n;     END_PROC register_save_list\n;\n; where:\n\n;   name:                routine name\n;   var_slots:           stack space needed in 8 byte units\n;   register_save_list:  comma separated list of registers to\n;                        save and restore: a list of general\n;                        purpose registers followed, if XMM\n;                        registers are present, by XMM, and\n;                        then a list of the numbers of the \n;                        XMM registers to save and restore\n;                        e.g.  rsi, rdi, rbp, XMM, 6, 7, 8\n\n; On return the macro variable 'stack_use' gives the total number \n; of bytes used on the stack. This allows the function parameters\n; to be accessed at [rsp + stack_use + 8 * n] where n starts at 1\n; (for n = 1..4 this is shadow space for a register parameter)  \n\n%macro FRAME_PROC 2-*\n\n    global  asm_sym(%1)\n    \n%ifdef DLL\n    export  asm_sym(%1)\n%endif\n\n    PROC_FRAME asm_sym(%1)\n    %rotate 1\n      \n    %if %1 < 0\n        %error Negative stack allocation not allowed\n    %else\n\t    %assign var_slots %1\n    %endif\n    %rotate 1\n\n\t%assign reg_slots 0\n\t%assign xmm_seen 0\n    %if %0 > 2\n        %rep %0 - 2\n\t\t    %ifnum %1\n\t\t\t    %if xmm_seen == 0\n\t\t\t\t    %error Not an XMM register\n\t\t\t\t%else\n\t\t\t\t    alloc_stack 16\n\t\t\t\t\tsave_xmm128 XMM%1, 0\n\t\t\t\t\t%assign reg_slots reg_slots + 2\n\t\t\t\t%endif\n\t\t    %elifid %1\n\t\t        %ifidni XMM, %1 \n\t\t\t\t\t%if reg_slots & 1 == 0\n\t\t\t\t\t    alloc_stack 8\n  \t\t\t\t\t    %assign reg_slots reg_slots + 1\n    \t\t\t\t    %assign xmm_seen 1 \n\t\t\t\t\t%else\n\t\t\t\t        %assign xmm_seen 2\t\t\t\t\t \n\t\t\t\t\t%endif\n\t\t\t\t%elif xmm_seen == 0\n\t\t\t\t\tpush_reg  %1\n  \t\t\t\t\t%assign reg_slots reg_slots + 1\n\t\t\t\t%else\n\t\t\t\t    %error XMM registers must be last in the save list\n\t\t\t\t%endif \n\t\t\t%else\n\t\t\t    %error Bad parameter list\n\t\t\t%endif\n            %rotate 1\n        %endrep\n    %endif\n\n\t%if (reg_slots & 1) == (var_slots & 1)\n\t    %assign var_slots var_slots + 1\n\t%endif\n\n    %if var_slots > 0\n        alloc_stack 8 * var_slots\n\t%endif\n\t%assign stack_use 8 * (reg_slots + var_slots)\n    END_PROLOGUE\n\n%endmacro\n\n%macro EXIT_PROC 0-*\n\n    add rsp, 8 * var_slots\n    %if %0 > 0\n        %rep %0\n            %rotate -1\n\t\t\t%ifnum %1\n\t\t\t    movdqa XMM%1, [rsp]\n\t\t\t\tadd rsp, 16\n\t\t\t%elifidni %1, XMM\n\t\t\t    %if xmm_seen == 1 \n\t\t\t\t    add rsp, 8\n\t\t\t\t%endif\n\t\t\t%else \n                pop  %1\n\t\t\t%endif  \n        %endrep\n    %endif\n    ret\n\n%endmacro\n\n%macro END_PROC 0-*\n\n    %if var_slots\n\t    add rsp, 8 * var_slots\n    %endif\n    %if %0 > 0\n        %rep %0\n            %rotate -1\n\t\t\t%ifnum %1\n\t\t\t    movdqa XMM%1, [rsp]\n\t\t\t\tadd rsp, 16\n\t\t\t%elifidni %1, XMM\n\t\t\t    %if xmm_seen == 1 \n\t\t\t\t    add rsp, 8\n\t\t\t\t%endif\n\t\t\t%else \n                pop  %1\n\t\t\t%endif  \n        %endrep\n    %endif\n\tret\n    ENDPROC_FRAME\n\n%endmacro\n\n%macro LEAF_PROC 1\n    \n    global  asm_sym(%1)\n    \n%ifdef DLL\n    export  asm_sym(%1)\n%endif\n\nasm_sym(%1):\n\n%endmacro\n\n; Macros for using assembler code using the GCC Calling\n; Conventions in Windows.  These macros move the first\n; six integer parameters from Microsoft ABI calling\n; calling conventions to those used by GCC: \n;\n;   function(    MSVC --> GCC\n;       p1,       rcx     rdi\n;       p2,       rdx     rsi\n;       p3,        r8     rdx\n;       p4,        r9     rcx\n;       p5,  [rsp+40]      r8\n;       p6,  [rsp+48]      r9\n;\n;   WIN64_GCC_PROC name, n_parms, (frame | leaf)\n;\n;   WIN64_GCC_EXIT frame | leaf\n;\n;   WIN64_GCC_END  frame | leaf\n; \n;     name    subroutine name\n;     n_parms number of parameters (default 6)\n;     type    frame or leaf function (default frame)\n;\n; These defines are also used and must be set before the\n; macros are used:\n;\n;     reg_save_list   list of registers to be saved\n;                     and restored\n;     var_slots       number of 8 byte slots needed\n;                     on the stack (excluding the \n;                     register save/restore space)\n\n%macro WIN64_GCC_PROC 1-3 6, frame\n\n    %ifidn %3, frame\n\n        %ifndef reg_save_list\n            %define reg_save_list rsi, rdi\n        %endif\n\n        %ifndef var_slots\n            %define var_slots 0\n        %endif\n\n        FRAME_PROC %1, var_slots, reg_save_list\n        \n    %elifidn %3, leaf\n\n        LEAF_PROC %1\n\n    %else\n\n        %error no (or wrong) function type defined \n\n    %endif\n\n        %if %2 > 0\n            mov     rdi, rcx\n        %endif\n        %if %2 > 1\n            mov     rsi, rdx\n        %endif\n        %if %2 > 2\n            mov     rdx, r8\n        %endif\n        %if %2 > 3\n            mov     rcx, r9\n        %endif\n        %if %2 > 4\n            mov     r8, [rsp + stack_use + 40]\n        %endif\n        %if %2 > 5\n            mov     r9, [rsp + stack_use + 48]\n        %endif\n        \n%endmacro\n\n%macro WIN64_GCC_EXIT 0-1 frame\n\n    %ifidn %1, frame\n        EXIT_PROC reg_save_list\n    %elifidn %1, leaf\n        ret\n    %else\n        %error no (or wrong) function type defined \n    %endif\n\n%endmacro\n\n%macro WIN64_GCC_END 0-1 frame\n\n    %ifidn %1, frame\n        END_PROC reg_save_list\n    %elifidn %1, leaf\n        ret\n    %else\n        %error no (or wrong) function type defined \n    %endif\n\n%endmacro\n; Copyright 2008, William Hart, Jason Worth-Martin\n;\n; This file is part of the MPIR Library.\n;\n; The MPIR Library is free software; you can redistribute it and/or modify\n; it under the terms of the GNU Lesser General Public License as published by\n; the Free Software Foundation; either version 2.1 of the License, or (at your\n; option) any later version.\n;\n; The MPIR Library is distributed in the hope that it will be useful, but\n; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n; License for more details.\n;\n; You should have received a copy of the GNU Lesser General Public License\n; along with the MPIR Library; see the file COPYING.LIB.  If not, write to\n; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n; 02110-1301, USA.\n\n%macro G_LABEL 1\n%ifdef GSYM_PREFIX\n_%1:\n%else\n%1:\n%endif\n%endmacro\n\n%macro G_EXPORT 1\n%ifdef GSYM_PREFIX\n    global _%1:function\n%else\n    global %1:function\n%endif\n%endmacro\n\n%macro G_EXTERN 1\n%ifdef GSYM_PREFIX\n    extern _%1\n%else\n    extern %1\n%endif\n%endmacro\n\n%macro G_FUNC 2 \nG_EXPORT __g%1_%2\nG_EXPORT %1_%2\nG_LABEL __g%1_%2\nG_LABEL %1_%2\n%endmacro\n\n%macro GLOBAL_FUNC 1\nG_FUNC %1, suffix\n%endmacro\n\n%define EXCLUDE_PREINV 1\n"
  },
  {
    "path": "yasm_macwin.inc.nofat",
    "content": ";  Copyright (C) 2009, 2008 Brian Gladman\n;\n;  All rights reserved.\n;\n;  Redistribution and use in source and binary forms, with or without\n;  modification, are permitted provided that the following conditions are\n;  met:\n;  1. Redistributions of source code must retain the above copyright notice,\n;  this list of conditions and the following disclaimer.\n;\n;  2. Redistributions in binary form must reproduce the above copyright\n;  notice, this list of conditions and the following disclaimer in the\n;  documentation and/or other materials provided with the distribution.\n;\n;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\n;  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n;  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\n;  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\n;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\n;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n;\n\n; Standardised register numbering scheme\n\n%define r0  rax\n%define r1  rdx\n%define r2  rcx\n%define r3  rbx\n%define r4  rsi\n%define r5  rdi\n%define r6  rbp\n%define r7  rsp\n\n%define r0d eax\n%define r1d edx\n%define r2d ecx\n%define r3d ebx\n%define r4d esi\n%define r5d edi\n%define r6d ebp\n%define r7d esp\n\n%define r0w ax\n%define r1w dx\n%define r2w cx\n%define r3w bx\n%define r4w si\n%define r5w di\n%define r6w bp\n%define r7w sp\n\n%define r0b al\n%define r1b dl\n%define r2b cl\n%define r3b bl\n%define r4b sil\n%define r5b dil\n%define r6b bpl\n%define r7b spl\n\n%define asm_sym(x) __g %+ x\n\n; Standard macro for alignment (used to allow easy subsititution of\n; alternative padding schemes)\n\n%macro xalign 1\n    align %1\n%endmacro\n\n; YASM macros for handling Windows Prologues and Epilogues\n;\n; Copyright 2008, 2009 Brian Gladman\n;\n; Windows x64 prologue macro:\n;\n;     FRAME_PROC name, var_slots, register_save_list \n;\n; Windows x64 epilogue macro:\n;\n;     EXIT_PROC register_save_list\n;\n; Windows x64 epilogue and end procedure macro\n;\n;     END_PROC register_save_list\n;\n; where:\n\n;   name:                routine name\n;   var_slots:           stack space needed in 8 byte units\n;   register_save_list:  comma separated list of registers to\n;                        save and restore: a list of general\n;                        purpose registers followed, if XMM\n;                        registers are present, by XMM, and\n;                        then a list of the numbers of the \n;                        XMM registers to save and restore\n;                        e.g.  rsi, rdi, rbp, XMM, 6, 7, 8\n\n; On return the macro variable 'stack_use' gives the total number \n; of bytes used on the stack. This allows the function parameters\n; to be accessed at [rsp + stack_use + 8 * n] where n starts at 1\n; (for n = 1..4 this is shadow space for a register parameter)  \n\n%macro FRAME_PROC 2-*\n\n    global  asm_sym(%1)\n    \n%ifdef DLL\n    export  asm_sym(%1)\n%endif\n\n    PROC_FRAME asm_sym(%1)\n    %rotate 1\n      \n    %if %1 < 0\n        %error Negative stack allocation not allowed\n    %else\n\t    %assign var_slots %1\n    %endif\n    %rotate 1\n\n\t%assign reg_slots 0\n\t%assign xmm_seen 0\n    %if %0 > 2\n        %rep %0 - 2\n\t\t    %ifnum %1\n\t\t\t    %if xmm_seen == 0\n\t\t\t\t    %error Not an XMM register\n\t\t\t\t%else\n\t\t\t\t    alloc_stack 16\n\t\t\t\t\tsave_xmm128 XMM%1, 0\n\t\t\t\t\t%assign reg_slots reg_slots + 2\n\t\t\t\t%endif\n\t\t    %elifid %1\n\t\t        %ifidni XMM, %1 \n\t\t\t\t\t%if reg_slots & 1 == 0\n\t\t\t\t\t    alloc_stack 8\n  \t\t\t\t\t    %assign reg_slots reg_slots + 1\n    \t\t\t\t    %assign xmm_seen 1 \n\t\t\t\t\t%else\n\t\t\t\t        %assign xmm_seen 2\t\t\t\t\t \n\t\t\t\t\t%endif\n\t\t\t\t%elif xmm_seen == 0\n\t\t\t\t\tpush_reg  %1\n  \t\t\t\t\t%assign reg_slots reg_slots + 1\n\t\t\t\t%else\n\t\t\t\t    %error XMM registers must be last in the save list\n\t\t\t\t%endif \n\t\t\t%else\n\t\t\t    %error Bad parameter list\n\t\t\t%endif\n            %rotate 1\n        %endrep\n    %endif\n\n\t%if (reg_slots & 1) == (var_slots & 1)\n\t    %assign var_slots var_slots + 1\n\t%endif\n\n    %if var_slots > 0\n        alloc_stack 8 * var_slots\n\t%endif\n\t%assign stack_use 8 * (reg_slots + var_slots)\n    END_PROLOGUE\n\n%endmacro\n\n%macro EXIT_PROC 0-*\n\n    add rsp, 8 * var_slots\n    %if %0 > 0\n        %rep %0\n            %rotate -1\n\t\t\t%ifnum %1\n\t\t\t    movdqa XMM%1, [rsp]\n\t\t\t\tadd rsp, 16\n\t\t\t%elifidni %1, XMM\n\t\t\t    %if xmm_seen == 1 \n\t\t\t\t    add rsp, 8\n\t\t\t\t%endif\n\t\t\t%else \n                pop  %1\n\t\t\t%endif  \n        %endrep\n    %endif\n    ret\n\n%endmacro\n\n%macro END_PROC 0-*\n\n    %if var_slots\n\t    add rsp, 8 * var_slots\n    %endif\n    %if %0 > 0\n        %rep %0\n            %rotate -1\n\t\t\t%ifnum %1\n\t\t\t    movdqa XMM%1, [rsp]\n\t\t\t\tadd rsp, 16\n\t\t\t%elifidni %1, XMM\n\t\t\t    %if xmm_seen == 1 \n\t\t\t\t    add rsp, 8\n\t\t\t\t%endif\n\t\t\t%else \n                pop  %1\n\t\t\t%endif  \n        %endrep\n    %endif\n\tret\n    ENDPROC_FRAME\n\n%endmacro\n\n%macro LEAF_PROC 1\n    \n    global  asm_sym(%1)\n    \n%ifdef DLL\n    export  asm_sym(%1)\n%endif\n\nasm_sym(%1):\n\n%endmacro\n\n; Macros for using assembler code using the GCC Calling\n; Conventions in Windows.  These macros move the first\n; six integer parameters from Microsoft ABI calling\n; calling conventions to those used by GCC: \n;\n;   function(    MSVC --> GCC\n;       p1,       rcx     rdi\n;       p2,       rdx     rsi\n;       p3,        r8     rdx\n;       p4,        r9     rcx\n;       p5,  [rsp+40]      r8\n;       p6,  [rsp+48]      r9\n;\n;   WIN64_GCC_PROC name, n_parms, (frame | leaf)\n;\n;   WIN64_GCC_EXIT frame | leaf\n;\n;   WIN64_GCC_END  frame | leaf\n; \n;     name    subroutine name\n;     n_parms number of parameters (default 6)\n;     type    frame or leaf function (default frame)\n;\n; These defines are also used and must be set before the\n; macros are used:\n;\n;     reg_save_list   list of registers to be saved\n;                     and restored\n;     var_slots       number of 8 byte slots needed\n;                     on the stack (excluding the \n;                     register save/restore space)\n\n%macro WIN64_GCC_PROC 1-3 6, frame\n\n    %ifidn %3, frame\n\n        %ifndef reg_save_list\n            %define reg_save_list rsi, rdi\n        %endif\n\n        %ifndef var_slots\n            %define var_slots 0\n        %endif\n\n        FRAME_PROC %1, var_slots, reg_save_list\n        \n    %elifidn %3, leaf\n\n        LEAF_PROC %1\n\n    %else\n\n        %error no (or wrong) function type defined \n\n    %endif\n\n        %if %2 > 0\n            mov     rdi, rcx\n        %endif\n        %if %2 > 1\n            mov     rsi, rdx\n        %endif\n        %if %2 > 2\n            mov     rdx, r8\n        %endif\n        %if %2 > 3\n            mov     rcx, r9\n        %endif\n        %if %2 > 4\n            mov     r8, [rsp + stack_use + 40]\n        %endif\n        %if %2 > 5\n            mov     r9, [rsp + stack_use + 48]\n        %endif\n        \n%endmacro\n\n%macro WIN64_GCC_EXIT 0-1 frame\n\n    %ifidn %1, frame\n        EXIT_PROC reg_save_list\n    %elifidn %1, leaf\n        ret\n    %else\n        %error no (or wrong) function type defined \n    %endif\n\n%endmacro\n\n%macro WIN64_GCC_END 0-1 frame\n\n    %ifidn %1, frame\n        END_PROC reg_save_list\n    %elifidn %1, leaf\n        ret\n    %else\n        %error no (or wrong) function type defined \n    %endif\n\n%endmacro\n; Copyright 2008, William Hart, Jason Worth-Martin\n;\n; This file is part of the MPIR Library.\n;\n; The MPIR Library is free software; you can redistribute it and/or modify\n; it under the terms of the GNU Lesser General Public License as published by\n; the Free Software Foundation; either version 2.1 of the License, or (at your\n; option) any later version.\n;\n; The MPIR Library is distributed in the hope that it will be useful, but\n; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public\n; License for more details.\n;\n; You should have received a copy of the GNU Lesser General Public License\n; along with the MPIR Library; see the file COPYING.LIB.  If not, write to\n; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n; 02110-1301, USA.\n\n%macro G_LABEL 1\n%ifdef GSYM_PREFIX\n_%1:\n%else\n%1:\n%endif\n%endmacro\n\n%macro G_EXPORT 1\n%ifdef GSYM_PREFIX\n    global _%1:function\n%else\n    global %1:function\n%endif\n%endmacro\n\n%macro G_EXTERN 1\n%ifdef GSYM_PREFIX\n    extern _%1\n%else\n    extern %1\n%endif\n%endmacro\n\n%macro GLOBAL_FUNC 1\nG_EXPORT __g%1\nG_EXPORT %1\nG_LABEL __g%1\nG_LABEL %1\n%endmacro\n\n%define EXCLUDE_PREINV 1\n"
  },
  {
    "path": "ylwrap",
    "content": "#! /bin/sh\n# ylwrap - wrapper for lex/yacc invocations.\n\nscriptversion=2007-11-22.22\n\n# Copyright (C) 1996, 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005,\n# 2007  Free Software Foundation, Inc.\n#\n# Written by Tom Tromey <tromey@cygnus.com>.\n#\n# This program is free software; you can redistribute it and/or modify\n# it under the terms of the GNU General Public License as published by\n# the Free Software Foundation; either version 2, or (at your option)\n# any later version.\n#\n# This program is distributed in the hope that it will be useful,\n# but WITHOUT ANY WARRANTY; without even the implied warranty of\n# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n# GNU General Public License for more details.\n#\n# You should have received a copy of the GNU General Public License\n# along with this program; if not, write to the Free Software\n# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n# 02110-1301, USA.\n\n# As a special exception to the GNU General Public License, if you\n# distribute this file as part of a program that contains a\n# configuration script generated by Autoconf, you may include it under\n# the same distribution terms that you use for the rest of that program.\n\n# This file is maintained in Automake, please report\n# bugs to <bug-automake@gnu.org> or send patches to\n# <automake-patches@gnu.org>.\n\ncase \"$1\" in\n  '')\n    echo \"$0: No files given.  Try \\`$0 --help' for more information.\" 1>&2\n    exit 1\n    ;;\n  --basedir)\n    basedir=$2\n    shift 2\n    ;;\n  -h|--h*)\n    cat <<\\EOF\nUsage: ylwrap [--help|--version] INPUT [OUTPUT DESIRED]... -- PROGRAM [ARGS]...\n\nWrapper for lex/yacc invocations, renaming files as desired.\n\n  INPUT is the input file\n  OUTPUT is one file PROG generates\n  DESIRED is the file we actually want instead of OUTPUT\n  PROGRAM is program to run\n  ARGS are passed to PROG\n\nAny number of OUTPUT,DESIRED pairs may be used.\n\nReport bugs to <bug-automake@gnu.org>.\nEOF\n    exit $?\n    ;;\n  -v|--v*)\n    echo \"ylwrap $scriptversion\"\n    exit $?\n    ;;\nesac\n\n\n# The input.\ninput=\"$1\"\nshift\ncase \"$input\" in\n  [\\\\/]* | ?:[\\\\/]*)\n    # Absolute path; do nothing.\n    ;;\n  *)\n    # Relative path.  Make it absolute.\n    input=\"`pwd`/$input\"\n    ;;\nesac\n\npairlist=\nwhile test \"$#\" -ne 0; do\n  if test \"$1\" = \"--\"; then\n    shift\n    break\n  fi\n  pairlist=\"$pairlist $1\"\n  shift\ndone\n\n# The program to run.\nprog=\"$1\"\nshift\n# Make any relative path in $prog absolute.\ncase \"$prog\" in\n  [\\\\/]* | ?:[\\\\/]*) ;;\n  *[\\\\/]*) prog=\"`pwd`/$prog\" ;;\nesac\n\n# FIXME: add hostname here for parallel makes that run commands on\n# other machines.  But that might take us over the 14-char limit.\ndirname=ylwrap$$\ntrap \"cd '`pwd`'; rm -rf $dirname > /dev/null 2>&1\" 1 2 3 15\nmkdir $dirname || exit 1\n\ncd $dirname\n\ncase $# in\n  0) \"$prog\" \"$input\" ;;\n  *) \"$prog\" \"$@\" \"$input\" ;;\nesac\nret=$?\n\nif test $ret -eq 0; then\n  set X $pairlist\n  shift\n  first=yes\n  # Since DOS filename conventions don't allow two dots,\n  # the DOS version of Bison writes out y_tab.c instead of y.tab.c\n  # and y_tab.h instead of y.tab.h. Test to see if this is the case.\n  y_tab_nodot=\"no\"\n  if test -f y_tab.c || test -f y_tab.h; then\n    y_tab_nodot=\"yes\"\n  fi\n\n  # The directory holding the input.\n  input_dir=`echo \"$input\" | sed -e 's,\\([\\\\/]\\)[^\\\\/]*$,\\1,'`\n  # Quote $INPUT_DIR so we can use it in a regexp.\n  # FIXME: really we should care about more than `.' and `\\'.\n  input_rx=`echo \"$input_dir\" | sed 's,\\\\\\\\,\\\\\\\\\\\\\\\\,g;s,\\\\.,\\\\\\\\.,g'`\n\n  while test \"$#\" -ne 0; do\n    from=\"$1\"\n    # Handle y_tab.c and y_tab.h output by DOS\n    if test $y_tab_nodot = \"yes\"; then\n      if test $from = \"y.tab.c\"; then\n    \tfrom=\"y_tab.c\"\n      else\n    \tif test $from = \"y.tab.h\"; then\n    \t  from=\"y_tab.h\"\n    \tfi\n      fi\n    fi\n    if test -f \"$from\"; then\n      # If $2 is an absolute path name, then just use that,\n      # otherwise prepend `../'.\n      case \"$2\" in\n    \t[\\\\/]* | ?:[\\\\/]*) target=\"$2\";;\n    \t*) target=\"../$2\";;\n      esac\n\n      # We do not want to overwrite a header file if it hasn't\n      # changed.  This avoid useless recompilations.  However the\n      # parser itself (the first file) should always be updated,\n      # because it is the destination of the .y.c rule in the\n      # Makefile.  Divert the output of all other files to a temporary\n      # file so we can compare them to existing versions.\n      if test $first = no; then\n\trealtarget=\"$target\"\n\ttarget=\"tmp-`echo $target | sed s/.*[\\\\/]//g`\"\n      fi\n      # Edit out `#line' or `#' directives.\n      #\n      # We don't want the resulting debug information to point at\n      # an absolute srcdir; it is better for it to just mention the\n      # .y file with no path.\n      #\n      # We want to use the real output file name, not yy.lex.c for\n      # instance.\n      #\n      # We want the include guards to be adjusted too.\n      FROM=`echo \"$from\" | sed \\\n            -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'\\\n            -e 's/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ]/_/g'`\n      TARGET=`echo \"$2\" | sed \\\n            -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'\\\n            -e 's/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ]/_/g'`\n\n      sed -e \"/^#/!b\" -e \"s,$input_rx,,\" -e \"s,$from,$2,\" \\\n          -e \"s,$FROM,$TARGET,\" \"$from\" >\"$target\" || ret=$?\n\n      # Check whether header files must be updated.\n      if test $first = no; then\n\tif test -f \"$realtarget\" && cmp -s \"$realtarget\" \"$target\"; then\n\t  echo \"$2\" is unchanged\n\t  rm -f \"$target\"\n\telse\n          echo updating \"$2\"\n          mv -f \"$target\" \"$realtarget\"\n        fi\n      fi\n    else\n      # A missing file is only an error for the first file.  This\n      # is a blatant hack to let us support using \"yacc -d\".  If -d\n      # is not specified, we don't want an error when the header\n      # file is \"missing\".\n      if test $first = yes; then\n        ret=1\n      fi\n    fi\n    shift\n    shift\n    first=no\n  done\nelse\n  ret=$?\nfi\n\n# Remove the directory.\ncd ..\nrm -rf $dirname\n\nexit $ret\n\n# Local Variables:\n# mode: shell-script\n# sh-indentation: 2\n# eval: (add-hook 'write-file-hooks 'time-stamp)\n# time-stamp-start: \"scriptversion=\"\n# time-stamp-format: \"%:y-%02m-%02d.%02H\"\n# time-stamp-end: \"$\"\n# End:\n"
  }
]